diff --git a/.ackrc b/.ackrc index 0e0073a801..44d645a5d2 100644 --- a/.ackrc +++ b/.ackrc @@ -3,4 +3,5 @@ --ignore-directory=is:.mypy_cache --ignore-directory=is:.pytest_cache --ignore-directory=is:.ruff_cache +--ignore-directory=is:.venv --ignore-directory=is:site diff --git a/.github/workflows/code-quality.yml b/.github/workflows/code-quality.yml index 3e1c9acd8f..ab87914ff3 100644 --- a/.github/workflows/code-quality.yml +++ b/.github/workflows/code-quality.yml @@ -76,6 +76,6 @@ jobs: # Coverage report - name: Upload coverage reports to Codecov - uses: codecov/codecov-action@v4 + uses: codecov/codecov-action@v5 with: token: ${{ secrets.CODECOV_TOKEN }} diff --git a/bin/create_extra_bib.py b/bin/create_extra_bib.py index 92d2924641..b3bc3ec5a1 100755 --- a/bin/create_extra_bib.py +++ b/bin/create_extra_bib.py @@ -36,6 +36,7 @@ import msgspec from pathlib import Path import re +from rich.console import Console from rich.progress import track import shutil import subprocess @@ -48,6 +49,7 @@ BIB2XML = None XML2END = None +CONSOLE = Console(stderr=True) # Max shard size in MiB MAX_SHARD_MB = 49 @@ -89,6 +91,7 @@ def create_bibtex(builddir, clean=False) -> None: reverse=True, ), description="Create anthology.bib.gz... ", + console=CONSOLE, ): with open(volume_file, "r") as f: bibtex = f.read() @@ -124,6 +127,7 @@ def create_bibtex(builddir, clean=False) -> None: reverse=True, ), description=" +abstracts.bib.gz... ", + console=CONSOLE, ): with open(collection_file, "rb") as f: data = msgspec.json.decode(f.read()) @@ -351,7 +355,7 @@ def batch_convert_to_mods_and_endf(bibtex, context): ) log_level = log.DEBUG if args["--debug"] else log.INFO - tracker = setup_rich_logging(level=log_level) + tracker = setup_rich_logging(console=CONSOLE, level=log_level) max_workers = int(args["--max-workers"]) if args["--max-workers"] else None if (BIB2XML := shutil.which("bib2xml")) is None: diff --git a/bin/create_hugo_data.py b/bin/create_hugo_data.py index b2c5bf0f5b..1ed8bc656e 100755 --- a/bin/create_hugo_data.py +++ b/bin/create_hugo_data.py @@ -39,6 +39,7 @@ import msgspec from omegaconf import OmegaConf import os +from rich.console import Console from rich.progress import ( Progress, TextColumn, @@ -60,6 +61,7 @@ BIBLIMIT = None +CONSOLE = Console(stderr=True) ENCODER = msgspec.json.Encoder() SCRIPTDIR = os.path.dirname(os.path.realpath(__file__)) @@ -93,7 +95,7 @@ def make_progress(): TaskProgressColumn(show_speed=True), TimeRemainingColumn(elapsed_when_finished=True), ] - return Progress(*columns) + return Progress(*columns, console=CONSOLE) @cache @@ -396,6 +398,8 @@ def export_people(anthology, builddir, dryrun): data["full"] = f"{data['full']} ({', '.join(diff_script_variants)})" if person.comment is not None: data["comment"] = person.comment + if person.orcid is not None: + data["orcid"] = person.orcid similar = anthology.people.similar.subset(person_id) if len(similar) > 1: data["similar"] = list(similar - {person_id}) @@ -567,7 +571,7 @@ def export_anthology(anthology, builddir, clean=False, dryrun=False): ) log_level = log.DEBUG if args["--debug"] else log.INFO - tracker = setup_rich_logging(level=log_level) + tracker = setup_rich_logging(console=CONSOLE, level=log_level) if limit := args["--bib-limit"]: BIBLIMIT = int(limit) diff --git a/bin/oneoff/transition_to_people_yaml.py b/bin/oneoff/transition_to_people_yaml.py new file mode 100644 index 0000000000..544c900600 --- /dev/null +++ b/bin/oneoff/transition_to_people_yaml.py @@ -0,0 +1,260 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +# +# Copyright 2025 Marcel Bollmann +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Usage: transition_to_people_yaml.py [options] + +Creates people.yaml and rewrites author IDs in the XML according to . + +Options: + --debug Output debug-level log messages. + -d, --datadir=DIR Directory with data files. [default: {scriptdir}/../../data] + -x, --write-xml Write changes to the XML files. + -y, --write-yaml Write the new people.yaml. + -h, --help Display this helpful text. +""" + +from collections import defaultdict +from docopt import docopt +from importlib.metadata import version as get_version +import itertools as it +import logging as log +import os +from pathlib import Path +import yaml + +try: + from yaml import CLoader as Loader, CDumper as Dumper +except ImportError: # pragma: no cover + from yaml import Loader, Dumper # type: ignore + +from acl_anthology import Anthology +from acl_anthology.people import Name +from acl_anthology.utils.logging import setup_rich_logging + + +def parse_variant_list(anthology): + # We create a dictionary mapping person IDs to their original entry in + # name_variants.yaml; this is because there are fields in name_variants.yaml + # that the Python library does not store (such as 'orcid' or 'degree'), and + # we might want to transfer them to the new people.yaml + name_variants = {} + with open( + anthology.datadir / "yaml" / "name_variants.yaml", "r", encoding="utf-8" + ) as f: + variant_list = yaml.load(f, Loader=Loader) + for entry in variant_list: + if "id" in entry: + name_variants[entry["id"]] = entry + else: + people = anthology.people.get_by_name(Name.from_dict(entry["canonical"])) + assert ( + len(people) == 1 + ), "Canonical name in name_variants.yaml shouldn't be ambiguous" + name_variants[people[0].id] = entry + return name_variants + + +# This exists to serialize names in "flow" style (i.e. one-liner {first: ..., +# last: ...}), without having to force flow style on the entire YAML document +class YAMLName(yaml.YAMLObject): + yaml_dumper = Dumper + yaml_tag = "tag:yaml.org,2002:map" # serialize like a dictionary + yaml_flow_style = True # force flow style + + def __init__(self, first, last, script): + if first is not None: + self.first = first + self.last = last + if script is not None: + self.script = script + + +def name_to_yaml(name): + return YAMLName(name.first, name.last, name.script) + + +def refactor(anthology, name_variants): + new_people_dict = {} + c_removed, c_added = 0, 0 + + # These two are to infer if we need to set disable_name_matching: true somewhere + names_to_ids = defaultdict(list) + names_with_catchall_id = [] + c_disable_name_matching = 0 + + for pid, person in anthology.people.items(): + # We only consider people who are currently defined in name_variants.yaml + if not person.is_explicit: + continue + + orig_entry = name_variants[pid] + + # name_variants.yaml may define IDs that are actually never used + if not person.item_ids: + log.warning( + f"Person '{pid}' derived from name_variants.yaml has no papers; discarding" + ) + continue + + # If person has a comment like "May refer to multiple people" or "May + # refer to several people", their identity is "unverified", so we: + # - Don't write them to people.yaml + # - Remove their ID from the XML + if person.comment is not None and person.comment.startswith("May refer"): + log.debug(f"Removing ID '{pid}' ('{person.comment}')") + for paper in person.papers(): + # Remove their ID from the XML + for namespec in it.chain(paper.authors, paper.get_editors()): + if namespec.id == pid: + namespec.id = None + c_removed += 1 + + # Record the name(s) of this person so we can check later if this ID + # was important for disambiguation + names_with_catchall_id.extend(person.names) + + # Don't process this person further + continue + + # If we reach this point, this person should be considered "verified" + # under the new system. However, maybe not all of their *names* should + # go into people.yaml---a name can have been added to `person.names` in + # different ways: + # + # 1. It was listed explicitly in `name_variants.yaml` -- keep + # 2. It was in the XML with this person's explicit ID -- keep + # 3. It was added to this person via the name matching mechanism that + # compares slugified names -- don't keep, as it was inferred heuristically + # + # (This happens in ) + c = 0 + names_to_keep = {Name.from_dict(orig_entry["canonical"])} | { + Name.from_dict(name) for name in orig_entry.get("variants", []) + } # Case 1 + + for paper in person.papers(): + for namespec in it.chain(paper.authors, paper.get_editors()): + if namespec.id == pid: + names_to_keep.add(namespec.name) # Case 2 + break + else: + # Does *not* already have an explicit ID in the XML; add it. + # --- + # NOTE: Doing this in a separate loop to avoid the edge case where + # a paper might have two authors with identical names, + # disambiguated by their ID---not sure if that ever happens, but + # better be safe than sorry. + for namespec in it.chain(paper.authors, paper.get_editors()): + if person.has_name(namespec.name): + if namespec.name in names_to_keep: # Avoid case 3 + namespec.id = pid + c += 1 + c_added += 1 + break + else: + # Should never happen + log.error( + f"Did not find '{pid}' on paper '{paper.full_id}' connected to them", + ) + + if c > 0: + log.debug(f"Added explicit ID '{pid}' to {c} papers") + + for name in person.names: + names_to_ids[name].append(pid) + + # Construct entry for new people.yaml + entry = { + # First name is always the canonical one + "names": [ + name_to_yaml(name) for name in person.names if name in names_to_keep + ], + } + if person.comment is not None: + entry["comment"] = person.comment + # These are keys we copy over from the old name_variants.yaml + for key in ("degree", "similar", "orcid"): + if key in orig_entry: + entry[key] = orig_entry[key] + + new_people_dict[pid] = entry + + for name in names_with_catchall_id: + pids = names_to_ids.get(name, []) + if len(pids) == 1: + # There is only one "verified" person with this name, but there was + # a catch-all ID ("May refer to several people") with this name too, + # so we need to disable name matching under the new system + new_people_dict[pids[0]]["disable_name_matching"] = True + c_disable_name_matching += 1 + + log.info( + f"Removed {c_removed:>5d} explicit IDs from the XML ('May refer to several people' etc.)" + ) + log.info(f" Added {c_added:>5d} explicit IDs to the XML") + log.info(f"Created {len(new_people_dict):>5d} entries for people.yaml") + log.info( + f" {c_disable_name_matching:>5d} of those have `disable_name_matching: true`" + ) + + return new_people_dict + + +if __name__ == "__main__": + args = docopt(__doc__) + + log_level = log.DEBUG if args["--debug"] else log.INFO + tracker = setup_rich_logging(level=log_level) + + if (version := get_version("acl_anthology")) != "0.5.3": + log.error( + f"This script needs to run with version 0.5.3 of the acl-anthology library; got {version}" + ) + exit(1) + + if "{scriptdir}" in args["--datadir"]: + args["--datadir"] = os.path.abspath( + args["--datadir"].format(scriptdir=os.path.dirname(os.path.abspath(__file__))) + ) + datadir = Path(args["--datadir"]) + log.info(f"Using data directory {datadir}") + + anthology = Anthology(datadir=datadir) + anthology.load_all() + + name_variants = parse_variant_list(anthology) + log.info(f" Found {len(name_variants):>5d} entries in name_variants.yaml") + + new_people_dict = refactor(anthology, name_variants) + + if tracker.highest >= log.ERROR: + log.warning("There were errors; aborting without saving") + exit(1) + + if args["--write-yaml"]: + log.info("Writing new people.yaml...") + with open(datadir / "yaml" / "people.yaml", "w", encoding="utf-8") as f: + yaml.dump(new_people_dict, f, allow_unicode=True, Dumper=Dumper) + else: + log.warning("Not writing people.yaml; use -y/--write-yaml flag") + + if args["--write-xml"]: + log.info("Saving XML files...") + for collection in anthology.collections.values(): + collection.save() + else: + log.warning("Not modifying XML files; use -x/--write-xml flag") diff --git a/data/xml/1952.earlymt.xml b/data/xml/1952.earlymt.xml index 0e95c06119..d8be388ffe 100644 --- a/data/xml/1952.earlymt.xml +++ b/data/xml/1952.earlymt.xml @@ -40,7 +40,7 @@ Human translation versus machine translation - LeonDostert + LeonDostert dostert-1952-human diff --git a/data/xml/1956.earlymt.xml b/data/xml/1956.earlymt.xml index 97c72adc49..025d7e5b2a 100644 --- a/data/xml/1956.earlymt.xml +++ b/data/xml/1956.earlymt.xml @@ -28,7 +28,7 @@ Organisation and Method in Mechanical Translation Work - L. E.Dostert + L. E.Dostert 1956.earlymt-1.3 dostert-1956-organisation diff --git a/data/xml/1957.earlymt.xml b/data/xml/1957.earlymt.xml index 990e19843d..359560052c 100644 --- a/data/xml/1957.earlymt.xml +++ b/data/xml/1957.earlymt.xml @@ -6,7 +6,7 @@
Georgetown University
12-13 April 1957 - LéonDostert + LéonDostert earlymt diff --git a/data/xml/1960.earlymt.xml b/data/xml/1960.earlymt.xml index 405aace488..b258b7e20b 100644 --- a/data/xml/1960.earlymt.xml +++ b/data/xml/1960.earlymt.xml @@ -56,7 +56,7 @@ Summation by Chairman - LeonDostert + LeonDostert 1960.earlymt-nsmt.8 dostert-1960-summation diff --git a/data/xml/1961.earlymt.xml b/data/xml/1961.earlymt.xml index c2e2aaca59..8eadd7b769 100644 --- a/data/xml/1961.earlymt.xml +++ b/data/xml/1961.earlymt.xml @@ -269,7 +269,7 @@ An approach to the segmentation problem in speech analysis and language translation - GerardSalton + GerardSalton R. W.Thorpe 1961.earlymt-1.37 salton-thorpe-1961-approach diff --git a/data/xml/1962.earlymt.xml b/data/xml/1962.earlymt.xml index 5cad5b0ff0..38dbcb4d96 100644 --- a/data/xml/1962.earlymt.xml +++ b/data/xml/1962.earlymt.xml @@ -73,7 +73,7 @@ Langages Artificiels, Systèmes formels et Traduction automatique - BernardVauquois + BernardVauquois 211-236 1962.earlymt-1.9 vauquois-1962-langages diff --git a/data/xml/1976.earlymt.xml b/data/xml/1976.earlymt.xml index 37f342a45a..39dfa7769b 100644 --- a/data/xml/1976.earlymt.xml +++ b/data/xml/1976.earlymt.xml @@ -40,13 +40,13 @@ Automatic language processing project, Brigham Young University - Eldon G.Lytel + Eldon G.Lytel 14-23 lytel-1976-automatic <fixed-case>C</fixed-case>hinese-<fixed-case>E</fixed-case>nglish machine translation project on linguistic analysis, <fixed-case>U</fixed-case>niversity of <fixed-case>C</fixed-case>alifornia, <fixed-case>B</fixed-case>erkeley - William S-Y.Wang + William S-Y.Wang 24 1976.earlymt-1.6 wang-1976-chinese @@ -135,13 +135,13 @@ Programs to understand stories - Roger C.Schank + Roger C.Schank 65 schank-1976-programs Semantics and world knowledge in <fixed-case>MT</fixed-case> - YorickWilks + YorickWilks 67-69 1976.earlymt-1.20 wilks-1976-semantics diff --git a/data/xml/1978.tc.xml b/data/xml/1978.tc.xml index 4012c1a5fa..a16f8b92ee 100644 --- a/data/xml/1978.tc.xml +++ b/data/xml/1978.tc.xml @@ -22,7 +22,7 @@ Machine translation and artificial intelligence Implementing machine aids to translation - YorickWilks + YorickWilks 1978.tc-1.2 wilks-1978-machine diff --git a/data/xml/1980.tc.xml b/data/xml/1980.tc.xml index 8449895ee3..fce225a735 100644 --- a/data/xml/1980.tc.xml +++ b/data/xml/1980.tc.xml @@ -35,7 +35,7 @@ Terminological Data Banks: a model for a <fixed-case>B</fixed-case>ritish Linguistic Data Bank (<fixed-case>LDB</fixed-case>) - JohnMcNaught + JohnMcNaught 1980.tc-1.5 mcnaught-1980-terminological diff --git a/data/xml/1981.tc.xml b/data/xml/1981.tc.xml index 4d7cb470c8..43bc1b5c04 100644 --- a/data/xml/1981.tc.xml +++ b/data/xml/1981.tc.xml @@ -28,7 +28,7 @@ The evolution of machine translation systems - W. JohnHutchins + W. JohnHutchins 1981.tc-1.3 hutchins-1981-evolution @@ -143,13 +143,13 @@ Summary of discussion: Speculation; The Limits of Innovation - W. JohnHutchins + W. JohnHutchins 1981.tc-1.22 hutchins-1981-summary Concluding remarks - YorickWilks + YorickWilks 1981.tc-1.23 wilks-1981-concluding diff --git a/data/xml/1984.bcs.xml b/data/xml/1984.bcs.xml index 7a9dcf454c..fe86be894f 100644 --- a/data/xml/1984.bcs.xml +++ b/data/xml/1984.bcs.xml @@ -25,7 +25,7 @@ Methods of linguistic analysis in machine translation - JohnHutchins + JohnHutchins 1984.bcs-1.3 hutchins-1984-methods @@ -62,7 +62,7 @@ Searching single-word and multi-word dictionaries - Francis J.Smith + Francis J.Smith K.Devine P.Craig 1984.bcs-1.9 @@ -92,8 +92,8 @@ A software system for describing a grammar of machine translation: <fixed-case>GRADE</fixed-case> - Jun-ichiNakamura - MakotoNagao + Jun-ichiNakamura + MakotoNagao A new software system for describing a grammar of a machine translation system has been developed. This software system is called GRADE (GRAmmar DEscriber). GRADE has the following features: 1. GRADE allows a grammar writer to divide a whole grammar into several parts. Each part of the grammar is called a subgrammar. A subgrammar describes a step of the translation process. A whole grammar is then described by a network of sub-grammars. This network is called a subgrammar network. A subgrammar network allows a grammar writer to control the process of the translation precisely. When a subgrammar network in the analysis phase consists of a subgrammar for a noun-phrase (SG1) and a subgrammar for a verb-phase (SG2) in this sequence, the subgrammar network first applies SG1 to an input sentence, then applies SG2 to the result of an application of SG1, thus getting a syntactic structure for the input sentence. 2. A subgrammar consists of a set of rewriting rules. Rewriting rules in a subgrammar are applied for an input sentence in an appropriate order, which is specified in the description of the subgrammar. A rewriting rule transforms a tree structure into another tree structure. Rewriting rules use a powerful pattern matching algorithm to test their applicability to a tree structure. For example, a grammar writer can write a pattern that recognizes and parses an arbitrary numbers of sub-trees. Each node of a tree-structure has a list of pairs of a property name and a property value. A node can express a category name, a semantic marker, flags to control the translation process, and various other information. This tree-to-tree transformation operation by GRADE allows a grammar writer to describe all the processes of analysis, transfer and generation of a machine translation system with this uniform description capability of GRADE. 3. A subgrammar network or a subgrammar can be written in an entry of the dictionaries for a machine translation system. A subgrammar network or a subgrammar written in a dictionary entry is called a dictionary rule, which is specific for a word. When an input sentence contains a word which has a dictionary rule, it is applied to an input sentence at an appropriate point of a translation process. It can express more precise processing appropriate for that specific word that a general Subgrammar Network or Subgrammar. it also allows grammar writers to adjust a machine translation system to a specific domain easily. 4. GRADE is written in LISP. GRADE is implemented on FACOM M-382 and Symbolics 3600. GRADE is used in the machine translation system between Japanese and English. The project was started by the Japanese government in 1982. The effectiveness of GRADE has been demonstrated in the project. 1984.bcs-1.13 nakamura-nagao-1984-software @@ -113,7 +113,7 @@ Production of sentences: a general algorithm and a case study - GiovanniAdorni + GiovanniAdorni LinaMassone 1984.bcs-1.16 In this paper a procedure for the production of sentences is described, producing written sentences in a particular language starting from formal representations of their meaning. After a brief description of the internal representation used, the algorithm is presented, and some results and future trends are discussed. @@ -134,7 +134,7 @@ Machine translation with post editing versus a three-level integrated translator aid system - Alan K.Melby + Alan K.Melby 1984.bcs-1.19 The standard design for a computer-assisted translation system consists of data entry of source text, machine translation, and post editing (i.e. revision) of raw machine translation. This paper discusses this standard design and presents an alternative three-level design consisting of word processing integrated with terminology aids, simple source text processing, and a link to an off-line machine translation system. Advantages of the new design are discussed. melby-1984-machine @@ -143,7 +143,7 @@ The grammatical tagging of unrestricted <fixed-case>E</fixed-case>nglish text RogerGarside GeoffreyLeach - EricAtwell + EricAtwell 1984.bcs-1.20 garside-etal-1984-grammatical diff --git a/data/xml/1984.tc.xml b/data/xml/1984.tc.xml index f7625a5e65..4f6c74b035 100644 --- a/data/xml/1984.tc.xml +++ b/data/xml/1984.tc.xml @@ -68,7 +68,7 @@ Parallel Session <fixed-case>III</fixed-case>: Machine translation. Summary of discussion - JohnHutchins + JohnHutchins MoniqueL’Huillier BrianMcCluskey 1984.tc-1.9 diff --git a/data/xml/1985.tc.xml b/data/xml/1985.tc.xml index f51d060ce1..e10bd29525 100644 --- a/data/xml/1985.tc.xml +++ b/data/xml/1985.tc.xml @@ -114,7 +114,7 @@ Machine translation: <fixed-case>J</fixed-case>apanese perspectives - ToyoakiNishida + ToyoakiNishida ShujiDoshita 1985.tc-1.17 nishida-doshita-1985-machine diff --git a/data/xml/1985.tmi.xml b/data/xml/1985.tmi.xml index a1c791d788..686061e870 100644 --- a/data/xml/1985.tmi.xml +++ b/data/xml/1985.tmi.xml @@ -13,11 +13,11 @@ DougArnold LievenJaspaert RodJohnson - StevenKrauwer - MikeRosner - Louisdes Tombe + StevenKrauwer + MikeRosner + Louisdes Tombe NinoVarile - SusanWarwick + SusanWarwick arnold-etal-1985-mul @@ -27,15 +27,15 @@ A Case Study in Software Evolution: from Ariane-78.4 to Ariane-85 - ChristianBoitet + ChristianBoitet P.Guillaume M.Quezel-Ambrunaz boitet-etal-1985-case New Approaches to Machine Translation - Jaime G.Carbonell - MasaruTomita + Jaime G.Carbonell + MasaruTomita carbonell-tomita-1985-new @@ -47,7 +47,7 @@ On the Design of Expert Systems Grafted on <fixed-case>MT</fixed-case> Systems R.Gerber - ChristianBoitet + ChristianBoitet gerber-boitet-1985-design @@ -57,13 +57,13 @@ Machine Translation as an Expert Task - Roderick L.Johnson + Roderick L.Johnson PeterWhitelock johnson-whitelock-1985-machine Structural Correspondences and <fixed-case>L</fixed-case>exical-<fixed-case>F</fixed-case>unctional <fixed-case>G</fixed-case>rammar - Ronald M.Kaplan + Ronald M.Kaplan kaplan-1985-structural @@ -73,34 +73,34 @@ Integrating Syntax and Semantics - Steven L.Lytinen + Steven L.Lytinen lytinen-1985-integrating <fixed-case>LMT</fixed-case>: a <fixed-case>P</fixed-case>rolog-Based Machine Translation System - MichaelMcCord + MichaelMcCord mccord-1985-lmt Recovering the Speaker’s Decisions during Mechanical Translation - David D.McDonald + David D.McDonald mcdonald-1985-recovering Structural Transformation in the Generation Stage of the <fixed-case>MU</fixed-case> <fixed-case>J</fixed-case>apanese to <fixed-case>E</fixed-case>nglish Machine Translation System - MakotoNagao + MakotoNagao nagao-1985-structural Interlingua Design for translator - SergeiNirenburg + SergeiNirenburg VictorRaskin - Allen B.Tucker + Allen B.Tucker nirenburg-etal-1985-interlingua The Level Hypothesis in Discourse Analysis - JamesPustejovsky + JamesPustejovsky pustejovsky-1985-level @@ -110,24 +110,24 @@ A Preliminary Linguistic Framework for <fixed-case>EUROTRA</fixed-case>, <fixed-case>J</fixed-case>une 1985 - Louisdes Tombe + Louisdes Tombe DougArnold LievenJaspaert RodJohnson - StevenKrauwer - MikeRosner + StevenKrauwer + MikeRosner NinoVarile - SusanWarwick + SusanWarwick des-tombe-etal-1985-preliminary Feasibility Study of Personal/Interactive Machine Translation Systems - MasaruTomita + MasaruTomita tomita-1985-feasibility Static Grammars: A Formalism for the Description of Linguistic Models - BernardVauquois + BernardVauquois SylvianeChappuy vauquois-chappuy-1985-static @@ -143,17 +143,17 @@ Reflections on the Knowledge Needed to Process Ill-Formed Language - Ralph M.Weischedel + Ralph M.Weischedel weischedel-1985-reflections Characteristics of the metal Machine Translation System at Production Stage - John S.White + John S.White white-1985-characteristics Relevance, Points of View and Dialogue Modelling - YorickWilks + YorickWilks wilks-1985-relevance diff --git a/data/xml/1986.tc.xml b/data/xml/1986.tc.xml index b357259005..a6a44cda35 100644 --- a/data/xml/1986.tc.xml +++ b/data/xml/1986.tc.xml @@ -22,7 +22,7 @@ Continuing training for the language professions: a survey of needs - Anthony F.Hartley + Anthony F.Hartley 1986.tc-1.2 hartley-1986-continuing @@ -76,7 +76,7 @@ Translation practice in <fixed-case>E</fixed-case>urope - DavidSmith + DavidSmith 1986.tc-1.11 smith-1986-translation @@ -106,7 +106,7 @@ Current machine translation systems developed with <fixed-case>GETA</fixed-case>’s methodology and software tools - ChristianBoitet + ChristianBoitet 1986.tc-1.16 boitet-1986-current diff --git a/data/xml/1987.mtsummit.xml b/data/xml/1987.mtsummit.xml index b6c6e79637..e1d14b5566 100644 --- a/data/xml/1987.mtsummit.xml +++ b/data/xml/1987.mtsummit.xml @@ -11,7 +11,7 @@ Present and future of machine translation systems — an introduction to the <fixed-case>MT</fixed-case> Summit — - MakotoNagao + MakotoNagao 1987.mtsummit-1.1 nagao-1987-present @@ -29,7 +29,7 @@ Prospects in Machine Translation - W. JohnHutchins + W. JohnHutchins 1987.mtsummit-1.4 hutchins-1987-prospects @@ -90,7 +90,7 @@ <fixed-case>TAURAS</fixed-case>: The Toshiba Machine Translation System - Shin-yaAmano + Shin-yaAmano YoshinaoTsutsumi 1987.mtsummit-1.14 amano-tsutsumi-1987-tauras @@ -121,7 +121,7 @@ Interlingua - Technical Prospect of Interlingua - - Jaime G.Carbonell + Jaime G.Carbonell 1987.mtsummit-1.19 carbonell-1987-interlingua @@ -145,7 +145,7 @@ What is ‘<fixed-case>PIVOT</fixed-case>’? - Jun-ichiTsujii + Jun-ichiTsujii 1987.mtsummit-1.23 tsujii-1987-pivot @@ -194,14 +194,14 @@ The Current Stage of the Mu-Project - Jun-ichiTsujii + Jun-ichiTsujii 1987.mtsummit-1.31 tsujii-1987-current <fixed-case>CMU</fixed-case> Project - MasaruTomita - Jaime G.Carbonell + MasaruTomita + Jaime G.Carbonell 1987.mtsummit-1.32 tomita-carbonell-1987-cmu @@ -262,7 +262,7 @@ Governmental Views of <fixed-case>MT</fixed-case> for <fixed-case>I</fixed-case>taly - AntonioZampolli + AntonioZampolli 1987.mtsummit-1.42 zampolli-1987-governmental @@ -280,7 +280,7 @@ Concluding Remarks - MakotoNagao + MakotoNagao 1987.mtsummit-1.45 nagao-1987-concluding diff --git a/data/xml/1987.tc.xml b/data/xml/1987.tc.xml index a295bf243a..92bfc8651b 100644 --- a/data/xml/1987.tc.xml +++ b/data/xml/1987.tc.xml @@ -77,7 +77,7 @@ A survey of termbanks worldwide - JohnMcNaught + JohnMcNaught 1987.tc-1.11 mcnaught-1987-survey diff --git a/data/xml/1988.tc.xml b/data/xml/1988.tc.xml index cf5fa76b08..8dc0e6809f 100644 --- a/data/xml/1988.tc.xml +++ b/data/xml/1988.tc.xml @@ -112,7 +112,7 @@ Themes in the work of Margaret Masterman - YorickWilks + YorickWilks 1988.tc-1.17 wilks-1988-themes diff --git a/data/xml/1988.tmi.xml b/data/xml/1988.tmi.xml index 33c6de7d45..d5da22c02e 100644 --- a/data/xml/1988.tmi.xml +++ b/data/xml/1988.tmi.xml @@ -24,37 +24,37 @@ A method of analyzing <fixed-case>J</fixed-case>apanese speech act types KiyoshiKogure HitoshiIida - KeiYoshimoto + KeiYoshimoto HiroyukiMaeda - MasakoKume + MasakoKume SusumuKato 1988.tmi-1.3 kogure-etal-1988-method Lexical realization in natural language generation - SergeiNirenburg + SergeiNirenburg RitaMcCardell - EricNyberg + EricNyberg ScottHuffman EdwardKernschaft - IreneNirenburg + IreneNirenburg 1988.tmi-1.4 nirenburg-etal-1988-lexical Multi-lingual text generation and the Meaning-Text Theory - RichardKittredge + RichardKittredge LidijaIordanskaja - AlainPolguère + AlainPolguère 1988.tmi-1.5 kittredge-etal-1988-multi ‘Relaxed’ compositionality in machine translation DougArnold - StevenKrauwer - Louisdes Tombe + StevenKrauwer + Louisdes Tombe LouisaSadler 1988.tmi-1.6 arnold-etal-1988-relaxed @@ -67,13 +67,13 @@ Towards speech translation systems - MasaruTomita + MasaruTomita 1988.tmi-1.8 tomita-1988-towards The Universal Parser Compiler and its application to a speech translation system - MasaruTomita + MasaruTomita MarionKee HiroakiSaito TerukoMitamura @@ -84,13 +84,13 @@ Functional descriptions as a formalism for linguistic knowledge representation in a generation oriented approach MiyoOtani - NathalieSimonin + NathalieSimonin 1988.tmi-1.10 otani-simonin-1988-functional Computational complexity of left-associative grammar - RolandHausser + RolandHausser 1988.tmi-1.11 hausser-1988-computational @@ -103,7 +103,7 @@ A translation aid system using flexible text retrieval based on syntax-matching - EiichiroSumita + EiichiroSumita YutakaTsutsumi 1988.tmi-1.13 sumita-tsutsumi-1988-translation @@ -124,7 +124,7 @@ A principle-based <fixed-case>K</fixed-case>orean/<fixed-case>J</fixed-case>apanese machine translation system: <fixed-case>NARA</fixed-case> - Hee SungChung + Hee SungChung 1988.tmi-1.16 chung-1988-principle @@ -132,7 +132,7 @@ A comparative study of <fixed-case>J</fixed-case>apanese and <fixed-case>E</fixed-case>nglish sublanguage patterns VirginiaTeller MichikoKosaka - RalphGrishman + RalphGrishman 1988.tmi-1.17 teller-etal-1988-comparative @@ -146,11 +146,11 @@ A statistical approach to <fixed-case>F</fixed-case>rench/<fixed-case>E</fixed-case>nglish translation P.Brown - JohnCocke - StephenDella Pietra - Vincent J.Della Pietra - FrederickJelinek - Robert L.Mercer + JohnCocke + StephenDella Pietra + Vincent J.Della Pietra + FrederickJelinek + Robert L.Mercer P.Roossin 1988.tmi-1.19 brown-etal-1988-statistical-approach @@ -165,7 +165,7 @@ Application of natural language interface to a machine translation problem Heidi M.Johnson YukikoSekine - John S.White + John S.White Gil C.Kim 1988.tmi-1.21 johnson-etal-1988-application @@ -178,7 +178,7 @@ Bernard Vauqois’ contribution to the theory and practice of building <fixed-case>MT</fixed-case> systems: a historical perspective - ChristianBoitet + ChristianBoitet 1988.tmi-1.23 boitet-1988-bernard diff --git a/data/xml/1989.mtsummit.xml b/data/xml/1989.mtsummit.xml index 2a2b2dd2d8..d796c30d55 100644 --- a/data/xml/1989.mtsummit.xml +++ b/data/xml/1989.mtsummit.xml @@ -14,7 +14,7 @@ Two years after the <fixed-case>MT</fixed-case> Summit - MakotoNagao + MakotoNagao 1989.mtsummit-1.1 nagao-1989-two @@ -91,13 +91,13 @@ <fixed-case>LMT</fixed-case> - MichaelMcCord + MichaelMcCord 1989.mtsummit-1.14 mccord-1989-lmt The Rosetta project - JanLandsbergen + JanLandsbergen 1989.mtsummit-1.15 landsbergen-1989-rosetta @@ -117,7 +117,7 @@ <fixed-case>J</fixed-case>apanese view of the future of machine translation - MakotoNagao + MakotoNagao 1989.mtsummit-1.18 nagao-1989-japanese @@ -140,7 +140,7 @@ Computational Linguistics and <fixed-case>MT</fixed-case> in <fixed-case>I</fixed-case>taly - AntonioZampolli + AntonioZampolli 1989.mtsummit-1.22 zampolli-1989-computational @@ -173,7 +173,7 @@ <fixed-case>KBMT</fixed-case>-89 - A knowledge-based <fixed-case>MT</fixed-case> project at <fixed-case>C</fixed-case>arnegie <fixed-case>M</fixed-case>ellon <fixed-case>U</fixed-case>niversity - SergeiNirenburg + SergeiNirenburg 1989.mtsummit-1.27 nirenburg-1989-kbmt diff --git a/data/xml/1989.tc.xml b/data/xml/1989.tc.xml index bf09f36128..c9ed0d7448 100644 --- a/data/xml/1989.tc.xml +++ b/data/xml/1989.tc.xml @@ -111,7 +111,7 @@ Speech recognition, artificial intelligence and translation: how rosy a future? - HenryThompson + HenryThompson 1989.tc-1.17 thompson-1989-speech diff --git a/data/xml/1990.tc.xml b/data/xml/1990.tc.xml index 6addff3e4f..2da27eae66 100644 --- a/data/xml/1990.tc.xml +++ b/data/xml/1990.tc.xml @@ -16,7 +16,7 @@ Machine Translation and Machine-Aided Translation - What’s going on - Jun-ichiTsujii + Jun-ichiTsujii 1990.tc-1.1 tsujii-1990-machine diff --git a/data/xml/1991.iwpt.xml b/data/xml/1991.iwpt.xml index de1bfb050b..63fa622b21 100644 --- a/data/xml/1991.iwpt.xml +++ b/data/xml/1991.iwpt.xml @@ -20,14 +20,14 @@ Proceedings of the Second International Workshop on Parsing Technologies (<fixed-case>IWPT</fixed-case> ’91) - MasaruTomita + MasaruTomita MartinKay - RobertBerwick - EvaHajicova - AravindJoshi - RonaldKaplan - MakotoNagao - YorickWilks + RobertBerwick + EvaHajicova + AravindJoshi + RonaldKaplan + MakotoNagao + YorickWilks i-viii 1991.iwpt-1.1 February 13-25, 1991 @@ -60,7 +60,7 @@ Preprocessing and lexicon design for parsing technical text - Robert P.Futrelle + Robert P.Futrelle Christopher E.Dunn Debra S.Ellis Maurice J.Pescitelli, Jr. @@ -107,7 +107,7 @@ Using Inheritance in <fixed-case>O</fixed-case>bject-<fixed-case>O</fixed-case>riented <fixed-case>P</fixed-case>rogramming to Combine Syntactic Rules and Lexical Idiosyncrasies - BenoîtHabert + BenoîtHabert 79-88 1991.iwpt-1.10 In parsing idioms and frozen expressions in French, one needs to combine general syntactic rules and idiosyncratic constraints. The inheritance structure provided by Object-Oriented Programming languages, and more specifically the combination of methods present in CLOS, Common Lisp Object System, appears as an elegant and efficient approach to deal with such a complex interaction. @@ -161,7 +161,7 @@ Processing Unknown Words in Continuous Speech Recognition KenjiKita TerumasaEhara - TsuyoshiMorimoto + TsuyoshiMorimoto 136-142 1991.iwpt-1.16 Current continuous speech recognition systems essentially ignore unknown words. Systems are designed to recognize words in the lexicon. However, for using speech recognition systems in real applications of spoken-language processing, it is very important to process unknown words. This paper proposes a continuous speech recognition method which accepts any utterance that might include unknown words. In this method, words not in the lexicon are transcribed as phone sequences, while words in the lexicon are recognized correctly. The HMM-LR speech recognition system, which is an integration of Hidden Markov Models and generalized LR parsing, is used as the baseline system, and enhanced with the trigram model of syllables to take into account the stochastic characteristics of a language. Preliminary results indicate that our approach is very promising. @@ -170,7 +170,7 @@ The Specification and Implementation of Constraint-Based Unification Grammars RobertCarpenter - CarlPollard + CarlPollard AlexFranz 143-153 1991.iwpt-1.17 @@ -179,8 +179,8 @@ Probabilistic <fixed-case>LR</fixed-case> Parsing for General Context-Free Grammars - See-KiongNg - MasaruTomita + See-KiongNg + MasaruTomita 154-163 1991.iwpt-1.18 To combine the advantages of probabilistic grammars and generalized LR parsing, an algorithm for constructing a probabilistic LR parser given a probabilistic context-free grammar is needed. In this paper, implementation issues in adapting Tomita’s generalized LR parser with graph-structured stack to perform probabilistic parsing are discussed. Wright and Wrigley (1989) has proposed a probabilistic LR-table construction algorithm for non-left-recursive context-free grammars. To account for left recursions, a method for computing item probabilities using the generation of systems of linear equations is presented. The notion of deferred probabilities is proposed as a means for dealing with similar item sets with differing probability assignments. @@ -205,7 +205,7 @@ Unification-Based Dependency Parsing of Governor-Final Languages Hyuk-ChulKwon - AesunYoon + AesunYoon 182-192 1991.iwpt-1.21 This paper describes a unification-based dependency parsing method for governor-final languages. Our method can parse not only projective sentences but also non-projective sentences. The feature structures in the tradition of the unification-based formalism are used for writing dependency relations. We use a structure sharing and a local ambiguity packing to save storage. @@ -213,8 +213,8 @@ <fixed-case>P</fixed-case>earl: A Probabilistic Chart Parser - David M.Magerman - Mitchell P.Marcus + David M.Magerman + Mitchell P.Marcus 193-199 1991.iwpt-1.22 This paper describes a natural language parsing algorithm for unrestricted text which uses a probability-based scoring function to select the “best” parse of a sentence. The parser, Pearl, is a time-asynchronous bottom-up chart parser with Earley-type top-down prediction which pursues the highest-scoring theory in the chart, where the score of a theory represents the extent to which the context of the sentence predicts that interpretation. This parser differs from previous attempts at stochastic parsers in that it uses a richer form of conditional probabilities based on context to predict likelihood. Pearl also provides a framework for incorporating the results of previous work in part-of-speech assignment, unknown word models, and other probabilistic models of linguistic features into one parsing tool, interleaving these techniques instead of using the traditional pipeline architecture. In preliminary tests, Pearl has been successful at resolving part-of-speech and word (in speech processing) ambiguity, determining categories for unknown words, and selecting correct parses first using a very loosely fitting covering grammar. @@ -232,7 +232,7 @@ Stochastic Context-Free Grammars for Island-Driven Probabilistic Parsing AnnaCorazza - RenatoDe Mori + RenatoDe Mori RobertoGretter GiorgioSatta 210-217 diff --git a/data/xml/1991.mtsummit.xml b/data/xml/1991.mtsummit.xml index babf1f7c0a..9e84fb2f1a 100644 --- a/data/xml/1991.mtsummit.xml +++ b/data/xml/1991.mtsummit.xml @@ -22,7 +22,7 @@ Advances in Machine Translation Research in <fixed-case>IBM</fixed-case> MoriRimon PilarMartinez - MichaelMcCord + MichaelMcCord UlrikeSchwall 11-18 1991.mtsummit-papers.2 @@ -31,8 +31,8 @@ <fixed-case>ULTRA</fixed-case>: A Multi-lingual Machine Translator - DavidFarwell - YorickWilks + DavidFarwell + YorickWilks 19-24 1991.mtsummit-papers.3 ULTRA (Universal Language TRAnslator) is a multilingual, interlingual machine translation system currently under development at the Computing Research Laboratory at New Mexico State University. It translates between five languages (Chinese, English, German, Japanese, Spanish) with vocabularies in each language based on approximately 10,000 word senses. The major design criteria are that the system be robust and general purpose with simple to use utilities for customization to suit the needs of particular users. This paper describes the central characteristics of the system: the intermediate representation, the language components, semantic and pragmatic processes, and supporting lexical entry tools. @@ -42,7 +42,7 @@ Capturing Language-Specific Semantic Distinctions in Interlingua-based <fixed-case>MT</fixed-case> JamesBarnett InderjeetMani - ElaineRich + ElaineRich ChinatsuAone KevinKnight Juan C.Martinez @@ -72,7 +72,7 @@ Applying an Experimental <fixed-case>MT</fixed-case> System to a Realistic Problem - PierretteBouillon + PierretteBouillon KatharinaBoeseleldt 45-49 1991.mtsummit-papers.7 @@ -91,8 +91,8 @@ An Efficient Interlingua Translation System for Multi-lingual Document Production TerukoMitamura - Eric H.Nyberg - Jaime G.Carbonell + Eric H.Nyberg + Jaime G.Carbonell 55-61 1991.mtsummit-papers.9 Knowledge-based interlingual machine translation systems produce semantically accurate translations, but typically require massive knowledge acquisition. This paper describes KANT, a system that reduces this requirement to produce practical, scalable, and accurate KBMT applications. First, the set of requirements is discussed, then the full KANT architecture is illustrated, and finally results from a fully implemented prototype are presented. @@ -119,9 +119,9 @@ <fixed-case>EJ</fixed-case>/<fixed-case>JE</fixed-case> Machine Translation System <fixed-case>ASTRANSAC</fixed-case> — Extensions toward Personalization - HidekiHirakawa + HidekiHirakawa HiroyasuNogami - Shin-yaAmano + Shin-yaAmano 73-80 1991.mtsummit-papers.12 The demand for personal use of a translation system seems to be increasing in accordance with the improvement in MT quality. A recent portable and powerful engineering workstation, such as AS1000 (SPARC LT), enables us to develop a personal-use oriented MT system This paper describes the outline of ASTRANSAC (an English-Japanese/Japanese- English bi-directional MT system) and the extensions related to the personalization of ASTRANSAC, which have been newly made since the MT Summit II. @@ -150,8 +150,8 @@ Toward High Performance Machine Translation: Preliminary Results from Massively Parallel Memory-Based Translation on <fixed-case>SNAP</fixed-case> HiroakiKitano - DanMoldovan - SeunghoCha + DanMoldovan + SeunghoCha 93-100 1991.mtsummit-papers.15 This paper describes a memory-based machine translation system developed for the Semantic Net- work Array Processor (SNAP). The goal of our work is to develop a scalable and high-performance memory-based machine translation system which utilizes the high degree of parallelism provided by the SNAP machine. We have implemented an experimental machine translation system DMSNAP as a central part of a real-time speech-to-speech dia- logue translation system. It is a SNAP version of the ΦDMDIALOG speech-to-speech translation system. Memory-based natural language processing and syntactic constraint network model has been incorporated using parallel marker-passing which is directly supported from hardware level. Experimental results demonstrate that the parsing of a sentence is done in the order of milliseconds. @@ -160,7 +160,7 @@ Toward an <fixed-case>MT</fixed-case> System without Pre-Editing: Effects of a New Method in <fixed-case>ALT</fixed-case>-<fixed-case>J</fixed-case>/<fixed-case>E</fixed-case> SatoruIkehara - SatoshiShirai + SatoshiShirai AkioYokoo HiromiNakaiwa 101-106 @@ -170,7 +170,7 @@ <fixed-case>KIELIKONE</fixed-case> Machine Translation Workstation - HarriJäppinen + HarriJäppinen L.Kulikov A.Ylä-Rotiala 107-111 @@ -243,7 +243,7 @@ HaraldHille MartinKay FrederickKlein - SergeiNirenburg + SergeiNirenburg 131-140 1991.mtsummit-panels.4 gross-etal-1991-translators @@ -251,7 +251,7 @@ Evaluation of <fixed-case>MT</fixed-case> Systems MargaretKing - YorickWilks + YorickWilks StureAllen UlrichHeid DorisAlbisser diff --git a/data/xml/1991.tc.xml b/data/xml/1991.tc.xml index 54d5550a45..43bd733ca0 100644 --- a/data/xml/1991.tc.xml +++ b/data/xml/1991.tc.xml @@ -11,14 +11,14 @@ Why Computers Do Not Translate Better - W. JohnHutchins + W. JohnHutchins 1991.tc-1.1 hutchins-1991-computers <fixed-case>TEI</fixed-case>-<fixed-case>TERM</fixed-case>: an <fixed-case>SGML</fixed-case>-based interchange format for terminology files The <fixed-case>E</fixed-case>uro<fixed-case>T</fixed-case>erm<fixed-case>B</fixed-case>ank - AlanMelby - Sue EllenWright + AlanMelby + Sue EllenWright 1991.tc-1.2 melby-wright-1991-tei @@ -78,7 +78,7 @@ Current Practical Machine Translation Systems in <fixed-case>J</fixed-case>apan and Future Directions <fixed-case>EUROTRA</fixed-case>: an assessment of the current state of the <fixed-case>EC</fixed-case>’s <fixed-case>MT</fixed-case> programme - MakotoNagao + MakotoNagao 1991.tc-1.12 nagao-1991-current @@ -91,7 +91,7 @@ Machine Translation Seen as Interactive Multilingual Text Generation - Harold L.Somers + Harold L.Somers DannyJones 1991.tc-1.14 somers-jones-1991-machine diff --git a/data/xml/1992.tc.xml b/data/xml/1992.tc.xml index 44f6dc0ef6..a9424ddd16 100644 --- a/data/xml/1992.tc.xml +++ b/data/xml/1992.tc.xml @@ -93,11 +93,11 @@ <fixed-case>E</fixed-case>nglish-<fixed-case>S</fixed-case>wedish translation of dialogue software - HiyanAlshawi - DavidCarter + HiyanAlshawi + DavidCarter StevePulman - MannyRayner - BjörnGambäck + MannyRayner + BjörnGambäck 1992.tc-1.14 alshawi-etal-1992-english diff --git a/data/xml/1992.tmi.xml b/data/xml/1992.tmi.xml index 9c6046b137..0752f28239 100644 --- a/data/xml/1992.tmi.xml +++ b/data/xml/1992.tmi.xml @@ -11,10 +11,10 @@ Translation equivalence and lexicalization in the <fixed-case>ACQUILEX</fixed-case> <fixed-case>LKB</fixed-case> AntonioSanfilippo - TedBriscoe + TedBriscoe AnnCopestake - Maria AntòniaMartí - MarionaTaulé + Maria AntòniaMartí + MarionaTaulé AntoniettaAlonge 1992.tmi-1.1 sanfilippo-etal-1992-translation @@ -63,18 +63,18 @@ Analysis, statistical transfer, and synthesis in machine translation - Peter F.Brown - Stephen A.Della Pietra - Vincent J.Della Pietra - John D.Lafferty - Robert L.Mercer + Peter F.Brown + Stephen A.Della Pietra + Vincent J.Della Pietra + John D.Lafferty + Robert L.Mercer 1992.tmi-1.8 brown-etal-1992-analysis Using bilingual materials to develop word sense disambiguation methods - William A.Gale - Kenneth W.Church + William A.Gale + Kenneth W.Church DavidYarowsky 1992.tmi-1.9 gale-etal-1992-using @@ -87,7 +87,7 @@ Are the grammars so far developed appropriate to recognize the real structure of a sentence? - MakotoNagao + MakotoNagao 1992.tmi-1.11 nagao-1992-grammars @@ -100,7 +100,7 @@ Interactive multilingual text generation for a monolingual user - HaroldSomers + HaroldSomers 1992.tmi-1.13 somers-1992-interactive @@ -139,14 +139,14 @@ Contextual constraints for <fixed-case>MT</fixed-case> KurtEberle - WalterKasper + WalterKasper ChristianRohrer 1992.tmi-1.19 eberle-etal-1992-contextual The <fixed-case>KANT</fixed-case> perspective: a critique of pure transfer (and pure interlingua, pure statistics, .. ) - Jaime G.Carbonell + Jaime G.Carbonell TerukoMitamura Eric H.Nyberg 3rd 1992.tmi-1.20 @@ -168,7 +168,7 @@ Combining rationalist and empiricist approaches to machine translation - RalphGrishman + RalphGrishman MichikoKosaka 1992.tmi-1.23 grishman-kosaka-1992-combining diff --git a/data/xml/1993.eamt.xml b/data/xml/1993.eamt.xml index a5ca89f159..c1985171ec 100644 --- a/data/xml/1993.eamt.xml +++ b/data/xml/1993.eamt.xml @@ -21,8 +21,8 @@ Knowledge extraction from machine-readable dictionaries: an evaluation - NancyIde - JeanVéronis + NancyIde + JeanVéronis 19-34 Machine-readable versions of everyday dictionaries have been seen as a likely source of information for use in natural language processing because they contain an enormous amount of lexical and semantic knowledge. However, after 15 years of research, the results appear to be disappointing. No comprehensive evaluation of machine-readable dictionaries (MRDs) as a knowledge source has been made to date, although this is necessary to determine what, if anything, can be gained from MRD research. To this end, this paper will first consider the postulates upon which MRD research has been based over the past fifteen years, discuss the validity of these postulates, and evaluate the results of this work. We will then propose possible future directions and applications that may exploit these years of effort, in the light of current directions in not only NLP research, but also fields such as lexicography and electronic publishing. ide-veronis-1993-knowledge @@ -51,21 +51,21 @@ Memory-based lexical acquisition and processing - WalterDaelemans + WalterDaelemans 85-98 Current approaches to computational lexicology in language technology are knowledge-based (competence-oriented) and try to abstract away from specific formalisms, domains, and applications. This results in severe complexity, acquisition and reusability bottlenecks. As an alternative, we propose a particular performance-oriented approach to Natural Language Processing based on automatic memory-based learning of linguistic (lexical) tasks. The consequences of the approach for computational lexicology are discussed, and the application of the approach on a number of lexical acquisition and disambiguation tasks in phonology, morphology and syntax is described. daelemans-1993-memory Typed feature formalisms as a common basis for linguistic specification - Hans-UlrichKrieger + Hans-UlrichKrieger 101-119 Typed feature formalisms (TFF) play an increasingly important role in NLP and, in particular, in MT. Many of these systems are inspired by Pollard and Sag’s work on Head-Driven Phrase Structure Grammar (HPSG), which has shown that a great deal of syntax and semantics can be neatly encoded within TFF. However, syntax and semantics are not the only areas in which TFF can be beneficially employed. In this paper, I will show that TFF can also be used as a means to model finite automata (FA) and to perform certain types of logical inferencing. In particular, I will (i) describe how FA can be defined and processed within TFF and (ii) propose a conservative extension to HPSG, which allows for a restricted form of semantic processing within TFF, so that the construction of syntax and semantics can be intertwined with the simplification of the logical form of an utterance. The approach which I propose provides a uniform, HPSG-oriented framework for different levels of linguistic processing, including allomorphy and morphotactics, syntax, semantics, and logical form simplification. krieger-1993-typed <fixed-case>E</fixed-case>uropean efforts towards standardizing language resources - NicolettaCalzolari + NicolettaCalzolari 121-130 This paper aims at providing a broad overview of the situation in Europe during the past few years, regarding efforts and concerted actions towards the standardization of large language resources, with particular emphasis on what is taking place in the fields of Computational Lexicons and Text Corpora. Attention will be focused on the plans, work in progress, and a few preliminary results of the LRE project EAGLES (Expert Advisory Group on Language Engineering Standards). calzolari-1993-european @@ -80,18 +80,18 @@ A generic lexical model - DanielBachut + DanielBachut IsabelleDuquennoy LeeHumphreys TitaKyriakopoulou AnneMonceaux - FiammettaNamer + FiammettaNamer Jean-MichelOmbrouck ClairePerrey AnnePoncet-Montange Maria-ClaudiaPuerta CarolineRaffy - BrigitteRoudaud + BrigitteRoudaud SimonSabbagh 141-158 Linguistic engineering presupposes lexical resources. For translation, it is highly desirable that a Machine Translation engine and human translators should have access to the same dictionary information. The present paper describes a multilingual dictionary model, which integrates information for use by both humans and a variety of NLP systems. The model is used as a reference in the design of commercial translation products. @@ -107,7 +107,7 @@ The use of terminological knowledge bases in software localisation VangelisKarkaletsis - Constantine D.Spyropoulos + Constantine D.Spyropoulos George A.Vouros 174-188 This paper describes the work that was undertaken in the Glossasoft project in the area of terminology management. Some of the draw-backs of existing terminology management systems are outlined and an alternative approach to maintaining terminological data is proposed. The approach which we advocate relies on knowledge-based representation techniques. These are used to model conceptual knowledge about the terms included in the database, general knowledge about the subject domain, application-specific knowledge, and - of course - language-specific terminological knowledge. We consider the multifunctionality of the proposed architecture to be one of its major advantages. To illustrate this, we outline how the knowledge representation scheme, which we suggest, could be drawn upon in message generation and machine-assisted translation. diff --git a/data/xml/1993.iwpt.xml b/data/xml/1993.iwpt.xml index d7f19ff470..6b1ed1e9e8 100644 --- a/data/xml/1993.iwpt.xml +++ b/data/xml/1993.iwpt.xml @@ -27,10 +27,10 @@ Proceedings of the Third International Workshop on Parsing Technologies (<fixed-case>IWPT</fixed-case> ’93) - HarryBunt + HarryBunt i-vii 1993.iwpt-1.1 - + bunt-1993-proceedings @@ -51,7 +51,7 @@ Parsing as Dynamic Interpretation - HarryBunt + HarryBunt Kovan der Sloat 27-38 1993.iwpt-1.4 @@ -77,8 +77,8 @@ A New Transformation into Deterministically Parsable Form for Natural Language Grammars Nigel R.Ellis - RobertoGarigliano - Richard G.Morgan + RobertoGarigliano + Richard G.Morgan 61-72 1993.iwpt-1.7 Marcus demonstrated that it was possible to construct a deterministic grammar/interpreter for a subset of natural language [Marcus, 1980]. Although his work with PARSIFAL pioneered the field of deterministic natural language parsing, his method has several drawbacks: • The rules and actions in the grammar / interpreter are so embedded that it is difficult to distinguish between them. • The grammar / interpreter is very difficult to construct (the small grammar shown in [Marcus, 1980] took about four months to construct). • The grammar is very difficult to maintain, as a small change may have several side effects. This paper outlines a set of structure transformations for converting a non-deterministic grammar into deterministic form. The original grammar is written in a context free form; this is then transformed to resolve ambiguities. @@ -89,7 +89,7 @@ JoeGarman JefferyMartin PaolaMerlo - AmyWeinberg + AmyWeinberg 73-88 1993.iwpt-1.8 In this paper we discuss the design and implementation of a parser for German and Arabic, which is currently being used in a tutoring system for foreign language training. Computer-aided language tutoring is a good application for testing the robustness and flexibility of a parsing system, since the input is usually ungrammatical in some way. Efficiency is also a concern, as tutoring applications typically run on personal computers, with the parser sharing memory with other components of the system. Our system is principle-based, which ensures a compact representation, and improves portability, needed in order to extend the initial design from German to Arabic and (eventually) Spanish. Currently, the parser diagnoses agreement errors, case errors, selection errors, and some word order errors. The parser can handle simple and complex declaratives and questions, topicalisations, verb movement, relative clauses — broad enough coverage to be useful in the design of real exercises and dialogues. @@ -116,7 +116,7 @@ Structural Disambiguation in <fixed-case>J</fixed-case>apanese by Evaluating Case Structures based on Examples in a Case Frame Dictionary SadaoKurohashi - MakotoNagao + MakotoNagao 111-122 1993.iwpt-1.11 A case structure expression is one of the most important forms to represent the meaning of a sentence. Case structure analysis is usually performed by consulting case frame information in verb dictionaries and by selecting a proper case frame for an input sentence. However, this analysis is very difficult because of word sense ambiguity and structural ambiguity. A conventional method for solving these problems is to use the method of selectional restriction, but this method has a drawback in the semantic marker (SM) system – the trade-off between descriptive power and construction cost. This paper describes a method of case structure analysis of Japanese sentences which overcomes the drawback in the SM system, concentrating on the structural disambiguation. This method selects a proper case frame for an input by the similarity measure between the input and typical example sentences of each case frame. When there are two or more possible readings for an input because of structural ambiguity, the best reading will be selected by evaluating case structures in each possible reading by the similarity measure with typical example sentences of case frames. @@ -124,8 +124,8 @@ <fixed-case>GLR</fixed-case>* – An Efficient Noise-skipping Parsing Algorithm For Context Free Grammars - AlonLavie - MasaruTomita + AlonLavie + MasaruTomita 123-134 1993.iwpt-1.12 This paper describes GLR*, a parser that can parse any input sentence by ignoring unrecognizable parts of the sentence. In case the standard parsing procedure fails to parse an input sentence, the parser nondeterministically skips some word(s) in the sentence, and returns the parse with fewest skipped words. Therefore, the parser will return some parse(s) with any input sentence, unless no part of the sentence can be recognized at all. The problem can be defined in the following way: Given a context-free grammar G and a sentence S, find and parse S' – the largest subset of words of S, such that S' \in L(G). The algorithm described in this paper is a modification of the Generalized LR (Tomita) parsing algorithm [Tomita, 1986] . The parser accommodates the skipping of words by allowing shift operations to be performed from inactive state nodes of the Graph Structured Stack. A heuristic similar to beam search makes the algorithm computationally tractable. There have been several other approaches to the problem of robust parsing, most of which are special purpose algorithms [Carbonell and Hayes, 1984] , [Ward, 1991] and others. Because our approach is a modification to a standard context-free parsing algorithm, all the techniques and grammars developed for the standard parser can be applied as they are. Also, in case the input sentence is by itself grammatical, our parser behaves exactly as the standard GLR parser. The modified parser, GLR*, has been implemented and integrated with the latest version of the Generalized LR Parser/Compiler [Tomita et al , 1988], [Tomita, 1990]. We discuss an application of the GLR* parser to spontaneous speech understanding and present some preliminary tests on the utility of the GLR* parser in such settings. @@ -149,7 +149,7 @@ The Interplay of Syntactic and Semantic Node Labels in Partial Parsing - David D.McDonald + David D.McDonald 171-186 1993.iwpt-1.15 Our natural language comprehension system, “Sparser” , uses a semantic grammar in conjunction with a domain model that defines the categories and already-known individuals that can be expected in the sublanguages we are studying, the most significant of which to date has been articles from the Wall Street Journal’s “Who’s News” column. In this paper we describe the systematic use of default syntactic rules in this grammar: an alternative set of labels on consitutents that are used to capture generalities in the semantic interpretation of constructions like the verbal auxiliaries or many adverbials. Syntactic rules form the basis of a set of schemas in a Tree Adjoining Grammar that are used as templates from which to create the primary, semantically labeled rules of the grammar as part of defining the categories in the domain models. This design permits the semantic grammar to be developed on a linguistically principled basis since all the rules must conform to syntactically sound patterns. @@ -218,7 +218,7 @@ Evaluation of <fixed-case>TTP</fixed-case> Parser: A Preliminary Report - TomekStrzalkowski + TomekStrzalkowski Peter G. N.Scheyen 293-308 1993.iwpt-1.23 @@ -228,9 +228,9 @@ Frequency Estimation of Verb Subcategorization Frames Based on Syntactic and Multidimensional Statistical Analysis AkiraUshioda - David A.Evans + David A.Evans TedGibson - AlexWaibel + AlexWaibel 309-318 1993.iwpt-1.24 We describe a mechanism for automatically estimating frequencies of verb subcategorization frames in a large corpus. A tagged corpus is first partially parsed to identify noun phrases and then a regular grammar is used to estimate the appropriate subcategorization frame for each verb token in the corpus. In an experiment involving the identification of six fixed subcategorization frames, our current system showed more than 80% accuracy. In addition, a new statistical method enables the system to learn patterns of errors based on a set of training samples and substantially improves the accuracy of the frequency estimation. diff --git a/data/xml/1993.mtsummit.xml b/data/xml/1993.mtsummit.xml index 12465135e8..ece60cdea1 100644 --- a/data/xml/1993.mtsummit.xml +++ b/data/xml/1993.mtsummit.xml @@ -10,14 +10,14 @@ Machine Translation: What have we to do? - MakotoNagao + MakotoNagao 3-10 1993.mtsummit-1.1 nagao-1993-machine Latest Developments in Machine Translation Technology: Beginning a New Era in <fixed-case>MT</fixed-case> Research - JohnHutchins + JohnHutchins 11-34 1993.mtsummit-1.2 hutchins-1993-latest @@ -80,14 +80,14 @@ Verbmobil: Translation of Face-To-Face Dialogs - WolfgangWahlster + WolfgangWahlster 127-136 1993.mtsummit-1.11 wahlster-1993-verbmobil Corpora and Machine Translation - YorickWilks + YorickWilks 137-146 1993.mtsummit-1.12 wilks-1993-corpora @@ -109,7 +109,7 @@ Practical Speech Translation Systems will Integrate Human Expertise, Multimodal Communication, and Interactive Disambiguation - ChristianBoitet + ChristianBoitet 173-176 1993.mtsummit-1.15 boitet-1993-practical @@ -130,7 +130,7 @@ A Direction of <fixed-case>MT</fixed-case> Development - SergeiNirenburg + SergeiNirenburg 189-194 1993.mtsummit-1.18 nirenburg-1993-direction @@ -144,7 +144,7 @@ After Linguistics-based <fixed-case>MT</fixed-case> - JunichiTsujii + JunichiTsujii 197-198 1993.mtsummit-1.20 tsujii-1993-linguistics @@ -179,7 +179,7 @@ Evaluation Method of Machine Translation: from the Viewpoint of Natural Language Processing - ShoichiYokoyama + ShoichiYokoyama 215-220 1993.mtsummit-1.25 yokoyama-1993-evaluation diff --git a/data/xml/1993.tc.xml b/data/xml/1993.tc.xml index 5b7ffb914c..773c199206 100644 --- a/data/xml/1993.tc.xml +++ b/data/xml/1993.tc.xml @@ -11,7 +11,7 @@ Developments in machine translation research in the <fixed-case>US</fixed-case> - YorickWilks + YorickWilks 1993.tc-1.1 wilks-1993-developments @@ -96,7 +96,7 @@ Multilingual drafting of instructional texts - DoniaScott + DoniaScott 1993.tc-1.15 scott-1993-multilingual diff --git a/data/xml/1993.tmi.xml b/data/xml/1993.tmi.xml index 22425af60e..25651f1248 100644 --- a/data/xml/1993.tmi.xml +++ b/data/xml/1993.tmi.xml @@ -33,7 +33,7 @@ Two Approaches to Matching in Example-Based Machine Translation - SergeiNirenburg + SergeiNirenburg ConstantineDomashnev Dean J.Grannes 1993.tmi-1.4 @@ -41,7 +41,7 @@ Example-Based Translation of Technical Terms - SatoshiSato + SatoshiSato 1993.tmi-1.5 sato-1993-example @@ -49,13 +49,13 @@ Combining Dictionary-Based and Example-Based Methods for Natural Language Analysis Stephen D.Richardson LucyVanderwende - WilliamDolan + WilliamDolan 1993.tmi-1.6 richardson-etal-1993-combining An Example-Based Disambiguation of Prepositional Phrase Attachment - EiichiroSumita + EiichiroSumita OsamuFuruse HitoshiIida 1993.tmi-1.7 @@ -85,14 +85,14 @@ Treatment of Tense and Aspect in Translation from <fixed-case>I</fixed-case>talian to <fixed-case>G</fixed-case>reek — An Example of Treatment of Implicit Information in Knowledge-based Transfer <fixed-case>MT</fixed-case> — MargheritaAntona - Jun-ichiTsujii + Jun-ichiTsujii 1993.tmi-1.11 antona-tsujii-1993-treatment — An Example of Treatment of Implicit Information in Knowledge-based Transfer <fixed-case>MT</fixed-case> — MargheritaAntona - Jun-ichiTsujii + Jun-ichiTsujii 1993.tmi-1.12 antona-tsujii-1993-example @@ -105,7 +105,7 @@ An Idiom-based Approach to Machine Translation HagyuLee - Yung TaekKim + Yung TaekKim 1993.tmi-1.14 lee-kim-1993-idiom @@ -130,7 +130,7 @@ J.Tsutsumi N.Aoki-Waibel A.Waibel - WayneWard + WayneWard 1993.tmi-1.16 woszczyna-etal-1993-recent-advances @@ -147,20 +147,20 @@ Determination of Referential Property and Number of Nouns in <fixed-case>J</fixed-case>apanese Sentences for Machine Translation into <fixed-case>E</fixed-case>nglish MasakiMurata - MakotoNagao + MakotoNagao 1993.tmi-1.18 murata-nagao-1993-determination Translation into <fixed-case>E</fixed-case>nglish MasakiMurata - MakotoNagao + MakotoNagao 1993.tmi-1.19 murata-nagao-1993-translation Effects of Automatic Rewriting of Source Language within a <fixed-case>J</fixed-case>apanese to <fixed-case>E</fixed-case>nglish <fixed-case>MT</fixed-case> System - SatoshiShirai + SatoshiShirai SatoruIkehara TsukasaKawaoka 1993.tmi-1.20 @@ -170,15 +170,15 @@ Better Translation with Knowledge Extracted from Source Text SatoshiKinoshita MiwakoShimazu - HidekiHirakawa + HidekiHirakawa 1993.tmi-1.21 kinoshita-etal-1993-better Evaluation of <fixed-case>MT</fixed-case> Systems by <fixed-case>TOEFL</fixed-case> - MasaruTomita + MasaruTomita MasakoShirai - JunyaTsutsumi + JunyaTsutsumi MikiMatsumura Yuki 1993.tmi-1.22 @@ -193,7 +193,7 @@ Towards a Machine Translation System with Self-Critiquing Capability KwangseobShim - Yung TaekKim + Yung TaekKim 1993.tmi-1.24 shim-kim-1993-towards @@ -205,14 +205,14 @@ Evaluation of <fixed-case>DMAX</fixed-case> Criteria for Selecting Equivalent Translation based on Dual Corpora Statistics - ShinichiDoi + ShinichiDoi KazunoriMuraki 1993.tmi-1.26 doi-muraki-1993-evaluation Corpora Statistics - ShinichiDoi + ShinichiDoi KazunoriMuraki 1993.tmi-1.27 doi-muraki-1993-corpora @@ -221,7 +221,7 @@ Automated Corpus Analysis and the Acquisition of Large, Multi-Lingual Knowledge Bases for <fixed-case>MT</fixed-case> TerukoMitamara Eric H.Nyberg 3rd - Jaime G.Carbonell + Jaime G.Carbonell 1993.tmi-1.28 mitamara-etal-1993-automated diff --git a/data/xml/1994.amta.xml b/data/xml/1994.amta.xml index e8e8234e16..05d1c19bcf 100644 --- a/data/xml/1994.amta.xml +++ b/data/xml/1994.amta.xml @@ -12,9 +12,9 @@ A Hybrid Approach to Multilingual Text Processing: Information Extraction and Machine Translation ChinatsuAone - HatteBlejer + HatteBlejer Mary EllenOkurowski - CarolVan Ess-Dykema + CarolVan Ess-Dykema 1994.amta-1.1 aone-etal-1994-hybrid @@ -23,14 +23,14 @@ LynnCarlson ElizabethCooper RonaldDolan - StevenMaiorano + StevenMaiorano 1994.amta-1.2 carlson-etal-1994-representing Using Partially Aligned Parallel Text and Part-of-speech Information in Word Alignment - Jyun-ShengChang - Huey-ChyunChen + Jyun-ShengChang + Huey-ChyunChen 1994.amta-1.3 chang-chen-1994-using @@ -45,14 +45,14 @@ Stylistic Choice in Machine Translation - ChrysanneDiMarco + ChrysanneDiMarco 1994.amta-1.5 dimarco-1994-stylistic The Case for a <fixed-case>MT</fixed-case> Developers’ Tool with a Two-Component View of the Interlingua - BonnieDorr - ClareVoss + BonnieDorr + ClareVoss 1994.amta-1.6 dorr-voss-1994-case @@ -67,7 +67,7 @@ <fixed-case>PANGLYZER</fixed-case>: <fixed-case>S</fixed-case>panish Language Analysis System - DavidFarwell + DavidFarwell StevenHelmreich WanyingJin MarkCasper @@ -85,46 +85,46 @@ Integrating Translations from Multiple Sources within the <fixed-case>PANGLOSS</fixed-case> Mark <fixed-case>III</fixed-case> Machine Translation System - RobertFrederking - SergeiNirenburg - DavidFarwell + RobertFrederking + SergeiNirenburg + DavidFarwell StevenHelmreich - EduardHovy + EduardHovy KevinKnight StephenBeale ConstantinoDomashnev DonaleeAttardo DeanGrannes - RalfBrown + RalfBrown 1994.amta-1.10 frederking-etal-1994-integrating Aligning Noisy Parallel Corpora Across Language Groups: Word Pair Feature Matching by Dynamic Time Warping PascaleFung - KathleenMcKeown + KathleenMcKeown 1994.amta-1.11 fung-mckeown-1994-aligning Complex Verb Transfer Phenomena in the <fixed-case>SLT</fixed-case> System - BjörnGambäck + BjörnGambäck IvanBretan 1994.amta-1.12 gamback-bretan-1994-complex The Logos Translatability Index - ClaudiaGdaniec + ClaudiaGdaniec 1994.amta-1.13 gdaniec-1994-logos An Adaptation of Lexical Conceptual Structures to Multilingual Processing in an Existing Text Understanding System - Bonnie GloverStalls - RobertBelvin + Bonnie GloverStalls + RobertBelvin AlfredoArnaiz - ChristineMontgomery + ChristineMontgomery RobertStumberger 1994.amta-1.14 stalls-etal-1994-adaptation @@ -164,7 +164,7 @@ IshwarChander MatthewHaines VasileiosHatzivassiloglou - EduardHovy + EduardHovy MasayoIida Steve K.Luk AkitoshiOkumura @@ -184,7 +184,7 @@ A Parameter-Based Message-Passing Parser for <fixed-case>MT</fixed-case> of <fixed-case>K</fixed-case>orean and <fixed-case>E</fixed-case>nglish DekangLin - BonnieDorr + BonnieDorr Jye-hoonLee SungkiSuh 1994.amta-1.20 @@ -205,7 +205,7 @@ Lexicon-to-Ontology Concept Association Using a Bilingual Dictionary AkitoshiOkumura - EduardHovy + EduardHovy 1994.amta-1.23 okumura-hovy-1994-lexicon @@ -218,7 +218,7 @@ The <fixed-case>ARPA</fixed-case> <fixed-case>MT</fixed-case> Evaluation Methodologies: Evolution, Lessons, and Future Approaches - John S.White + John S.White Theresa A.O’Connell Francis E.O’Mara 1994.amta-1.25 @@ -233,10 +233,10 @@ Is <fixed-case>MT</fixed-case> Research Doing Any Good? - KennethChurch - BonnieDorr - EduardHovy - SergeiNirenburg + KennethChurch + BonnieDorr + EduardHovy + SergeiNirenburg BernardScott VirginiaTeller 1994.amta-1.27 @@ -245,12 +245,12 @@ The Role of <fixed-case>MT</fixed-case> Evaluation ScottBennett - GeorgeDoddington + GeorgeDoddington MaryFlanagan LaurieGerber MaghiKing MarjorieLeón - JohnWhite + JohnWhite 1994.amta-1.28 bennett-etal-1994-role @@ -267,8 +267,8 @@ Voices of Experience: <fixed-case>MT</fixed-case> in Operational Settings - SusanArmstrong - RobertaMerchant + SusanArmstrong + RobertaMerchant KazunoriMuraki KarinSpalink MikeTacelosky @@ -280,11 +280,11 @@ Future Directions JosephPentheroudakis - JaimeCarbonell + JaimeCarbonell LutzGraunitz PierreIsabelle ChrisMontgomery - AlexWaibel + AlexWaibel 1994.amta-1.31 pentheroudakis-etal-1994-future @@ -316,8 +316,8 @@ <fixed-case>KANT</fixed-case>: Knowledge-Based, Accurate Natural Language Translation TerukoMitamura - EricNyberg - JaimeCarbonell + EricNyberg + JaimeCarbonell 1994.amta-1.36 mitamura-etal-1994-kant @@ -329,11 +329,11 @@ Machine-Aided Voice Translation (<fixed-case>MAVT</fixed-case>): Advanced Development Model - ChristineMontgomery - Bonnie GloverStalls + ChristineMontgomery + Bonnie GloverStalls RobertStumberger NaicongLi - RobertBelvin + RobertBelvin AlfredoArnaiz Susan HirshLitenatsky 1994.amta-1.38 @@ -353,14 +353,14 @@ <fixed-case>PANGLOSS</fixed-case> - JaimeCarbonell - DavidFarwell - RobertFrederking + JaimeCarbonell + DavidFarwell + RobertFrederking StevenHelmreich - EduardHovy + EduardHovy KevinKnight - LoriLevin - SergeiNirenburg + LoriLevin + SergeiNirenburg 1994.amta-1.41 carbonell-etal-1994-pangloss diff --git a/data/xml/1994.bcs.xml b/data/xml/1994.bcs.xml index 6d465116f6..27c09165dc 100644 --- a/data/xml/1994.bcs.xml +++ b/data/xml/1994.bcs.xml @@ -17,7 +17,7 @@ Some notes on the state of the art: Where are we now in <fixed-case>MT</fixed-case>: what works and what doesn’t? - YorickWilks + YorickWilks 1994.bcs-1.2 The paper examines briefly the impact of the “statistical turn” in machine translation (MT) R&D in the last decade, and particularly the way in which it has made large scale language resources (lexicons, text corpora etc.) more important than ever before and reinforced the role of evaluation in the development of the field. But resources mean, almost by definition, co-operation between groups and, in the case of MT, specifically co-operation between language groups and states. The paper then considers what alternatives there are now for MT R&D. One is to continue with interlingual methods of translation, even though those are not normally thought of as close to statistical methods. The reason is that statistical methods, taken alone, have almost certainly reached a ceiling in terms of the proportion of sentences and linguistic phenomena they can translate successfully. Interlingual methods remain popular within large electronics companies in Japan, and in a large US Government funded project (PANGLOSS). The question then discussed is what role there can be for interlinguas and interlingual methods in co-operation in MT across linguistic and national boundaries. The paper then turns to evaluation and asks whether, across national and continental boundaries, it can become a co-operative or a “hegemonic” enterprise. Finally the paper turns to resources themselves and asks why co-operation on resources is proving so hard, even though there are bright spots of real co-operation. wilks-1994-notes @@ -30,7 +30,7 @@ Research methods and system designs in machine translation: a ten-year review, 1984-1994 - JohnHutchins + JohnHutchins 1994.bcs-1.4 hutchins-1994-research @@ -61,7 +61,7 @@ Machine translation, ten years on: Discourse has yet to make a breakthrough - RuslanMitkov + RuslanMitkov JohannHaller 1994.bcs-1.8 Progress in Machine Translation (MT) during the last ten years has been observed at different levels, but discourse has yet to make a breakthrough. MT research and development has concentrated so far mostly on sentence translation (discourse analysis being a very complicated task) and the successful operation of most of the working MT systems does not usually go beyond the sentence level. To start with, the paper will refer to the MT research and development in the last ten years at the IAI in Saarbrücken. Next, the MT discourse issues will be discussed both from the point of view of source language analysis and target text generation, and on the basis of the preliminary results of an ongoing "discourse-oriented MT" project . Probably the most important aspect in successfully analysing multisentential source texts is the capacity to establish the anaphoric references to preceding discourse entities. The paper will discuss the problem of anaphora resolution from the perspective of MT. A new integrated model for anaphora resolution, developed for the needs of MT, will be also outlined. As already mentioned, most machine translation systems perform translation sentence by sentence. But even in the case of paragraph translation, the discourse structure of the target text tends to be identical to that of the source text. However, the sublanguage discourse structures may differ across the different languages, and thus a translated text which assumes the same discourse structure as the source text may sound unnatural and perhaps disguise the true intent of the writer. Finally, the paper will outline a new approach for generating discourse structures, appropriate to the target sublanguage and will discuss some of the complicated problems encountered. @@ -107,15 +107,15 @@ Translation by meaning and style in <fixed-case>LOLITA</fixed-case> - RichardMorgan - MarkSmith + RichardMorgan + MarkSmith SenganShort 1994.bcs-1.15 morgan-etal-1994-translation Providing factual information in <fixed-case>MAT</fixed-case> - Waltherv. Hahn + Waltherv. Hahn GaljaAngelova 1994.bcs-1.16 Most translations are needed for technical documents in specific domains and often the domain knowledge available to the translator is crucial for the efficiency and quality of the translation task. Our project1 aims at the investigation of a MAT-paradigm where the human user is supported by linguistic as well as by subject information ([vHa90], [vHAn92]). The basic hypotheses of the approach are: - domain knowledge is not encoded in the lexicon entries, i.e. we clearly distinguish between the language layer and the conceptual layer; - the representation of domain knowledge is language independent and replaces most of the semantic entries in a traditional semantic lexicon of MT/MAT-systems; - the user accesses domain information by highlighting a sequence in the source text and specifying the type of query; - factual explanations to the user should be simple and transparent although the underlying formalisms for knowledge representation and processing might be very complex; - as a language for knowledge representation, conceptual graphs (CGs) of Sowa [Sow84] were chosen. In providing connections between the terms (lexical entries) and the knowledge base our approach will be compared to terminological knowledge bases (TKBs) which are hybrid systems between concept-oriented term banks and knowledge bases. This paper presents: - a contrastive view to knowledge based techniques in MAT, - mechanisms for mapping the "ordinary" linguistic lexicon and the terminological lexicon of two languages onto one knowledge base, - methods to access the domain knowledge in a flexible way without allowing completely free linguistic dialogues, - techniques to present the result of queries to the translator in restricted natural language, and - use of domain knowledge to solve specific translation difficulties. @@ -154,7 +154,7 @@ Dialogue-Based <fixed-case>MT</fixed-case> and self-explaining documents as an alternative to <fixed-case>MAHT</fixed-case> and <fixed-case>MT</fixed-case> of controlled languages - ChristianBoitet + ChristianBoitet 1994.bcs-1.22 We argue that, in many situations, Dialogue-Based MT is likely to offer better solutions to translation needs than machine aids to translators or batch MT, even if controlled languages are used. Objections to DBMT have led us to introduce the new concept of “self-explaining document”, which might be used in monolingual as well as in multilingual contexts, and deeply change our way of understanding important or difficult written material. boitet-1994-dialogue @@ -182,7 +182,7 @@ Machine translation and philosophy of language - AlanMelby + AlanMelby 1994.bcs-1.26 melby-1994-machine diff --git a/data/xml/1994.eamt.xml b/data/xml/1994.eamt.xml index e3bc348560..9aceff8b37 100644 --- a/data/xml/1994.eamt.xml +++ b/data/xml/1994.eamt.xml @@ -56,7 +56,7 @@ Compensation - Louisdes Tombe + Louisdes Tombe des-tombe-1994-compensation @@ -76,7 +76,7 @@ Discourse processing for voice-to-voice machine translation - SusannLuperFoy + SusannLuperFoy luperfoy-1994-discourse diff --git a/data/xml/1994.tc.xml b/data/xml/1994.tc.xml index 053c7a57a9..ae792bfee7 100644 --- a/data/xml/1994.tc.xml +++ b/data/xml/1994.tc.xml @@ -11,7 +11,7 @@ A New Era in Machine Translation Research - JohnHutchins + JohnHutchins 1994.tc-1.1 hutchins-1994-new @@ -30,7 +30,7 @@ Interactive Corpus-based Translation Learning Tool (Translearn) - SteliosPiperidis + SteliosPiperidis 1994.tc-1.4 piperidis-1994-interactive @@ -56,7 +56,7 @@ Simplified <fixed-case>E</fixed-case>nglish grammar and style correction in an <fixed-case>MT</fixed-case> framework: The <fixed-case>LRE</fixed-case> <fixed-case>SECC</fixed-case> Project - GeertAdriaens + GeertAdriaens 1994.tc-1.8 adriaens-1994-simplified diff --git a/data/xml/1994.vlc.xml b/data/xml/1994.vlc.xml index 049e881e42..2522795882 100644 --- a/data/xml/1994.vlc.xml +++ b/data/xml/1994.vlc.xml @@ -15,7 +15,7 @@ <fixed-case>TEI</fixed-case>-Conformant Structural Markup of a Trilingual Parallel Corpus in the <fixed-case>ECI</fixed-case> Multilingual Corpus 1 DavidMcKelvieUniversity of Edinburgh - Henry S.ThompsonUniversity of Edinburgh + Henry S.ThompsonUniversity of Edinburgh 7-18 In this paper we provide an overview of the ACL European Corpus Initiative (ECI) Multilingual Corpus 1 (ECI/MC1). In particular, we look at one particular subcorpus in the ECI/MC1, the trilingual corpus of International Labour Organisation reports, and discuss the problems involved in TEI-compliant structural markup and preliminary alignment of this large corpus. We discuss gross structural alignment down to the level of text paragraphs. We see this as a necessary first step in corpus preparation before detailed (possibly automatic) alignment of text is possible. We try and generalise our experience with this corpus to illustrate the process of preliminary markup of large corpora which in their raw state can be in an arbitrary format (eg printers tapes, proprietary word-processor format); noisy (not fully parallel, with structure obscured by spelling mistakes); full of poorly documented formatting instructions; and whose structure is present but anything but explicit. We illustrate these points by reference to other parallel subcorpora of ECI/MC1. We attempt to define some guidelines for the development of corpus annotation toolkits which would aid this kind of structural preparation of large corpora. 1994.vlc-1.1 @@ -50,7 +50,7 @@ Iterative Alignment of Syntactic Structures for a Bilingual Corpus - RalphGrishmanNew York University + RalphGrishmanNew York University 57-68 Alignment of parallel bilingual corpora at the level of syntactic structure holds the promise of being able to discover detailed bilingual structural correspondences automatically. This paper describes a procedure for the alignment of regularized syntactic structures, proceeding bottom-up through the trees. It makes use of information about possible lexical correspondences, from a bilingual dictionary, to generate initial candidate alignments. We consider in particular how much dictionary coverage is needed for the alignment process, and how the alignment can be iteratively improved by having an initial alignment generate additional lexical correspondences for the dictionary, and then using this augmented dictionary for subsequent alignment passes. 1994.vlc-1.5 diff --git a/data/xml/1995.iwpt.xml b/data/xml/1995.iwpt.xml index ccb54fc484..2516882396 100644 --- a/data/xml/1995.iwpt.xml +++ b/data/xml/1995.iwpt.xml @@ -73,7 +73,7 @@ Parsing Non-Immediate Dominance Relations TilmanBecker - OwenRambow + OwenRambow 26-33 1995.iwpt-1.6 We present a new technique for parsing grammar formalisms that express non-immediate dominance relations by ‘dominance-links’. Dominance links have been introduced in various formalisms such as extensions to CFG and TAG in order to capture long-distance dependencies in free-word order languages (Becker et al., 1991; Rambow, 1994). We show how the addition of ‘link counters’ to standard parsing algorithms such as CKY- and Earley-based methods for TAG results in a polynomial time complexity algorithm for parsing lexicalized V-TAG, a multi-component version of TAGs defined in (Rambow, 1994). A variant of this method has previously been applied to context-free grammar based formalisms such as UVG-DL. @@ -89,8 +89,8 @@ Developing and Evaluating a Probabilistic <fixed-case>LR</fixed-case> Parser of Part-of-Speech and Punctuation Labels - TedBriscoe - JohnCarroll + TedBriscoe + JohnCarroll 48-58 1995.iwpt-1.8 We describe an approach to robust domain-independent syntactic parsing of unrestricted naturally-occurring (English) input. The technique involves parsing sequences of part-of-speech and punctuation labels using a unification-based grammar coupled with a probabilistic LR parser. We describe the coverage of several corpora using this grammar and report the results of a parsing experiment using probabilities derived from bracketed training data. We report the first substantial experiments to assess the contribution of punctuation to deriving an accurate syntactic analysis, by parsing identical texts both with and without naturally-occurring punctuation marks. @@ -117,11 +117,11 @@ Distributed Parsing With <fixed-case>HPSG</fixed-case> Grammars Abdel KaderDiagne - WalterKasper - Hans-UlrichKrieger + WalterKasper + Hans-UlrichKrieger 79-86 1995.iwpt-1.11 - + diagne-etal-1995-distributed @@ -154,7 +154,7 @@ A Robust Parsing Algorithm for Link Grammars DennisGrinberg - JohnLafferty + JohnLafferty DanielSleator 111-125 1995.iwpt-1.15 @@ -163,9 +163,9 @@ An Implementation of Syntactic Analysis of <fixed-case>C</fixed-case>zech - TomášHolan - VladislavKuboň - MartinPlátek + TomášHolan + VladislavKuboň + MartinPlátek 126-135 1995.iwpt-1.16 This paper describes current results achieved during the work on parsing of a free-word-order natural language (Czech) . It contains theoretical base for a new class of grammars - CFG extended for dependecies and non-projectivities – and also the description of the implementation of a parser and grammar-checker. The paper also describes some typical problems of parsing of free-word-order languages and their solutions (or discusssion of those problems), which are still subject of investigation. The implementation described here serves currently as a testing tool for the development of a large scale grammar of Czech. Some of the quantitative data from a processing of test sentences are also included. @@ -181,17 +181,17 @@ On Parsing Control for Efficient Text Analysis - FabioCiravegna - AlbertoLavelli + FabioCiravegna + AlbertoLavelli 148-149 1995.iwpt-1.18 - + ciravegna-lavelli-1995-parsing A Practical Dependency Parser VincenzoLombardo - LeonardoLesmo + LeonardoLesmo 150-151 1995.iwpt-1.19 The working assumption is that cognitive modeling of NLP and engineering solutions to free text parsing can converge to optimal parsing. The claim of the paper is that the methodology to achieve such a result is to develop a concrete environment with a flexible parser, that allows the testing of various psycholinguistic strategies on real texts. In this paper we outline a flexible parser based on a dependency grammar. @@ -217,7 +217,7 @@ Parsing Without Grammar ShinsukeMori - MakotoNagao + MakotoNagao 174-185 1995.iwpt-1.22 We describe and evaluate experimentally a method to parse a tagged corpus without grammar modeling a natural language on context-free language. This method is based on the following three hypotheses. 1) Part-of-speech sequences on the right-hand side of a rewriting rule are less constrained as to what part-of-speech precedes and follows them than non-constituent sequences. 2) Part-of-speech sequences directly derived from the same non-terminal symbol have similar environments. 3) The most suitable set of rewriting rules makes the greatest reduction of the corpus size. Based on these hypotheses, the system finds a set of constituent-like part-of-speech sequences and replaces them with a new symbol. The repetition of these processes brings us a set of rewriting rules, a grammar, and the bracketed corpus. @@ -250,7 +250,7 @@ A Corpus-based Probabilistic Grammar with Only Two Non-terminals SatoshiSekine - RalphGrishman + RalphGrishman 216-223 1995.iwpt-1.26 The availability of large, syntactically-bracketed corpora such as the Penn Tree Bank affords us the opportunity to automatically build or train broad-coverage grammars, and in particular to train probabilistic grammars. A number of recent parsing experiments have also indicated that grammars whose production probabilities are dependent on the context can be more effective than context-free grammars in selecting a correct parse. To make maximal use of context, we have automatically constructed, from the Penn Tree Bank version 2, a grammar in which the symbols S and NP are the only real nonterminals, and the other non-terminals or grammatical nodes are in effect embedded into the right-hand-sides of the S and NP rules. For example, one of the rules extracted from the tree bank would be S -> NP VBX JJ CC VBX NP [1] ( where NP is a non-terminal and the other symbols are terminals – part-of-speech tags of the Tree Bank). The most common structure in the Tree Bank associated with this expansion is (S NP (VP (VP VBX (ADJ JJ) CC (VP VBX NP)))) [2]. So if our parser uses rule [1] in parsing a sentence, it will generate structure [2] for the corresponding part of the sentence. Using 94% of the Penn Tree Bank for training, we extracted 32,296 distinct rules ( 23,386 for S, and 8,910 for NP). We also built a smaller version of the grammar based on higher frequency patterns for use as a back-up when the larger grammar is unable to produce a parse due to memory limitation. We applied this parser to 1,989 Wall Street Journal sentences (separate from the training set and with no limit on sentence length). Of the parsed sentences (1,899), the percentage of no-crossing sentences is 33.9%, and Parseval recall and precision are 73.43% and 72 .61%. @@ -258,8 +258,8 @@ Heuristics and Parse Ranking - B.Srinivas - ChristineDoran + B.Srinivas + ChristineDoran SethKulick 224-233 1995.iwpt-1.27 @@ -277,20 +277,20 @@ An <fixed-case>HPSG</fixed-case>-based Parser for Automatic Knowledge Acquisition KentaroTorisawa - Jun’ichiTsujii + Jun’ichiTsujii 250-251 1995.iwpt-1.29 - + torisawa-tsujii-1995-hpsg Parsing <fixed-case>D</fixed-case>-Tree Grammars - K.Vijay-Shanker - DavidWeir - OwenRambow + K.Vijay-Shanker + DavidWeir + OwenRambow 252-259 1995.iwpt-1.30 - + vijay-shanker-etal-1995-parsing @@ -304,10 +304,10 @@ Partitioning Grammars and Composing Parsers FuliangWeng - AndreasStolcke + AndreasStolcke 271-272 1995.iwpt-1.32 - + weng-stolcke-1995-partitioning diff --git a/data/xml/1995.mtsummit.xml b/data/xml/1995.mtsummit.xml index 592b5e7de5..db8784e09f 100644 --- a/data/xml/1995.mtsummit.xml +++ b/data/xml/1995.mtsummit.xml @@ -27,25 +27,25 @@ Problems with the second generation architecture and new trends in <fixed-case>MT</fixed-case> - HaroldSomers + HaroldSomers 1995.mtsummit-1.4 somers-1995-problems <fixed-case>E</fixed-case>urotra, history and results - BenteMaegaard + BenteMaegaard 1995.mtsummit-1.5 maegaard-1995-eurotra Reflections on the history and present state of <fixed-case>MT</fixed-case> - JohnHutchins + JohnHutchins 1995.mtsummit-1.6 hutchins-1995-reflections Factors for success and failure in <fixed-case>MT</fixed-case> - ChristianBoitet + ChristianBoitet 1995.mtsummit-1.7 boitet-1995-factors @@ -77,7 +77,7 @@ A bidirectional <fixed-case>R</fixed-case>ussian-<fixed-case>E</fixed-case>nglish <fixed-case>MT</fixed-case> system (<fixed-case>ETAP</fixed-case>-3) - IgorBoguslavsky + IgorBoguslavsky 1995.mtsummit-1.12 boguslavsky-1995-bidirectional @@ -144,7 +144,7 @@ Machine Translation in the <fixed-case>C</fixed-case>zech <fixed-case>R</fixed-case>epublic: history, methods, systems - JanHajič + JanHajič 1995.mtsummit-1.23 hajic-1995-machine @@ -204,7 +204,7 @@ What have we to do for the future of <fixed-case>MT</fixed-case> systems? - MakotoNagao + MakotoNagao 1995.mtsummit-1.33 nagao-1995-future @@ -227,7 +227,7 @@ Approaches to black box <fixed-case>MT</fixed-case> evaluation - John S.White + John S.White 1995.mtsummit-1.37 white-1995-approaches @@ -239,7 +239,7 @@ Verbmobil: Towards a <fixed-case>DRT</fixed-case>-based translation of spontaneous negotiation dialogues - WolfgangWahlster + WolfgangWahlster 1995.mtsummit-1.39 wahlster-1995-verbmobil @@ -251,7 +251,7 @@ Translation and interpretation of spontaneous speech - AlexWaibel + AlexWaibel waibel-1995-translation @@ -262,7 +262,7 @@ Issues in multimodal telecommunications - TsuyoshiMorimoto + TsuyoshiMorimoto 1995.mtsummit-1.43 morimoto-1995-issues @@ -274,7 +274,7 @@ Machine Translation for the office automation - Key-SunChoi + Key-SunChoi 1995.mtsummit-1.45 choi-1995-machine diff --git a/data/xml/1995.tc.xml b/data/xml/1995.tc.xml index 972e59a255..ad61de74f3 100644 --- a/data/xml/1995.tc.xml +++ b/data/xml/1995.tc.xml @@ -74,9 +74,9 @@ Using corpora to develop limited-domain speech translation systems - MannyRayner - PierretteBouillon - DavidCarter + MannyRayner + PierretteBouillon + DavidCarter 1995.tc-1.11 rayner-etal-1995-using diff --git a/data/xml/1995.tmi.xml b/data/xml/1995.tmi.xml index e433fe8b59..c6ae7228ee 100644 --- a/data/xml/1995.tmi.xml +++ b/data/xml/1995.tmi.xml @@ -19,8 +19,8 @@ Translation using <fixed-case>M</fixed-case>inimal <fixed-case>R</fixed-case>ecursion <fixed-case>S</fixed-case>emantics AnnCopestake - DanFlickinger - RobMalouf + DanFlickinger + RobMalouf SusanneRiehemann IvanSag 1995.tmi-1.2 @@ -41,13 +41,13 @@ A Sign-Based Approach to the Translation of Temporal Expressions - FrankVan Eynde + FrankVan Eynde 1995.tmi-1.5 van-eynde-1995-sign Anaphora Resolution in Machine Translation - RuslanMitkov + RuslanMitkov Sung-KwonChoi RandallSharp 1995.tmi-1.6 @@ -62,9 +62,9 @@ Apologiae Ontologiae - SergeiNirenburg + SergeiNirenburg VictorRaskin - BoyanOnyshkevych + BoyanOnyshkevych 1995.tmi-1.8 nirenburg-etal-1995-apologiae @@ -78,7 +78,7 @@ Technological evaluation of a controlled language application: precision, recall and convergence tests for <fixed-case>SECC</fixed-case> - GeertAdriaens + GeertAdriaens LieveMacken 1995.tmi-1.10 adriaens-macken-1995-technological @@ -99,14 +99,14 @@ Using Context in Machine Translation of Spoken Language - LoriLevin + LoriLevin OrenGlickman YanQu - Carolyn P.Rose - DonnaGates - AlonLavie - AlexWaibel - CarolVan Ess-Dykema + Carolyn P.Rose + DonnaGates + AlonLavie + AlexWaibel + CarolVan Ess-Dykema 1995.tmi-1.13 levin-etal-1995-using @@ -122,7 +122,7 @@ M.Gavalda Y-H.Seo B.Suhm - WayneWard + WayneWard A.Waibel 1995.tmi-1.15 mayfield-etal-1995-concept @@ -138,8 +138,8 @@ Applying Statistical <fixed-case>E</fixed-case>nglish Language Modelling to Symbolic Machine Translation - RalfBrown - RobertFrederking + RalfBrown + RobertFrederking 1995.tmi-1.17 brown-frederking-1995-applying @@ -159,14 +159,14 @@ Heterogeneous Computing for Example-Based Translation of Spoken Language - EiichiroSumita + EiichiroSumita HitoshiIida 1995.tmi-1.20 sumita-iida-1995-heterogeneous Machine Translation: an Integration Approach - Kuang-huaChen + Kuang-huaChen Hsin-HsiChen 1995.tmi-1.21 chen-chen-1995-machine @@ -188,7 +188,7 @@ Constituent Shifts in the Logos <fixed-case>E</fixed-case>nglish-<fixed-case>G</fixed-case>erman System - ClaudiaGdaniec + ClaudiaGdaniec PatriciaSchmid 1995.tmi-1.24 gdaniec-schmid-1995-constituent @@ -201,8 +201,8 @@ Spoken-Language Machine Translation in Limited Domains: Can it be Achieved by Finite-State Models? - Juan MiguelVilar - AntonioCastellanos + Juan MiguelVilar + AntonioCastellanos Juan MiguelJimenez J. A.Sanchez E.Vidal diff --git a/data/xml/1996.amta.xml b/data/xml/1996.amta.xml index 7eee34e973..56cccbfdcc 100644 --- a/data/xml/1996.amta.xml +++ b/data/xml/1996.amta.xml @@ -38,14 +38,14 @@ Translation differences and pragmatics-based <fixed-case>MT</fixed-case> StephenHelmreich - DavidFarwell + DavidFarwell 1996.amta-1.5 helmreich-farwell-1996-translation Abstraction and underspecification in semantic transfer BerndAbb - BiankaBuschbeck-Wolf + BiankaBuschbeck-Wolf ChristelTschernitschek 1996.amta-1.6 abb-etal-1996-abstraction @@ -60,7 +60,7 @@ Capturing motion verb generalizations in synchronous tree adjoining grammars - MarthaPalmer + MarthaPalmer JosephRosenzweig 1996.amta-1.8 palmer-rosenzweig-1996-capturing @@ -68,34 +68,34 @@ Evolution of the <fixed-case>L</fixed-case>ogos grammar: system design and development methodology PatriciaSchmid - ClaudiaGdaniec + ClaudiaGdaniec 1996.amta-1.9 schmid-gdaniec-1996-evolution Two principles and six techniques for rapid <fixed-case>MT</fixed-case> development - SergeiNirenburg + SergeiNirenburg StephenBeale StephenHelmreich - KaviMahesh + KaviMahesh EvelyneViegas - RémiZajac + RémiZajac 1996.amta-1.10 nirenburg-etal-1996-two Adaptation of the <fixed-case>DARPA</fixed-case> machine translation evlauation paradigm to end-to-end systems - John S.White + John S.White Theresa A.O’Connell 1996.amta-1.11 white-oconnell-1996-adaptation Combining machine readable lexical resources and bilingual corpora for broad word sense disambiguation - Jason J. S.Chang - Jen-NanChen + Jason J. S.Chang + Jen-NanChen Huei-HongSheng - Sur-JinKer + Sur-JinKer 1996.amta-1.12 chang-etal-1996-combining @@ -146,7 +146,7 @@ The state of machine translation in <fixed-case>E</fixed-case>urope - JohnHutchins + JohnHutchins 1996.amta-1.20 hutchins-1996-state @@ -159,7 +159,7 @@ The primacy of core technology <fixed-case>MT</fixed-case> evaluation - John S.White + John S.White 1996.amta-1.22 white-1996-primacy @@ -176,13 +176,13 @@ Panel: The limits of automation: optimists vs skeptics. - EduardHovy - KenChurch + EduardHovy + KenChurch DenisGachot MargeLeon - AlanMelby - SergeiNirenburg - YorickWilks + AlanMelby + SergeiNirenburg + YorickWilks 1996.amta-1.24 hovy-etal-1996-panel @@ -199,8 +199,8 @@ Panel: Next steps in <fixed-case>MT</fixed-case> research LynnCarlson - JaimeCarbonell - DavidFarwell + JaimeCarbonell + DavidFarwell PierreIsabelle JackieMurgida JohnO’Hara @@ -226,28 +226,28 @@ <fixed-case>ITSVOX</fixed-case> - EricWehrli + EricWehrli 1996.amta-1.29 wehrli-1996-itsvox <fixed-case>JANUS</fixed-case>: multi-lingual translation of spontaneous speech in limited domain - AlonLavie - LoriLevin - AlexWaibel - DonnaGates - MarsalGavalda - LauraMayfield + AlonLavie + LoriLevin + AlexWaibel + DonnaGates + MarsalGavalda + LauraMayfield 1996.amta-1.30 lavie-etal-1996-janus <fixed-case>JAPANGLOSS</fixed-case>: using statistics to fill knowledge gaps KevinKnight - YaserAl-Onaizan + YaserAl-Onaizan IshwarChander - EduardHovy - IreneLangkilde + EduardHovy + IreneLangkilde RichardWhitney KenjiYamada 1996.amta-1.31 @@ -274,8 +274,8 @@ The <fixed-case>P</fixed-case>angloss-<fixed-case>L</fixed-case>ite machine translation system - Robert E.Frederking - Ralf D.Brown + Robert E.Frederking + Ralf D.Brown 1996.amta-1.35 frederking-brown-1996-pangloss @@ -293,7 +293,7 @@ Towards a multilingual analyst’s workstation: Temple - RémiZajac + RémiZajac 1996.amta-1.38 zajac-1996-towards diff --git a/data/xml/1996.eamt.xml b/data/xml/1996.eamt.xml index 6bdca03ba0..a3f4eefdab 100644 --- a/data/xml/1996.eamt.xml +++ b/data/xml/1996.eamt.xml @@ -16,7 +16,7 @@ Introduction - JohnHutchins + JohnHutchins 1996.eamt-1.1 hutchins-1996-introduction @@ -28,7 +28,7 @@ Machine Translation, Translation Memories and the Phrasal Lexicon: The Localisation Perspective - ReinhardSchäler + ReinhardSchäler 1996.eamt-1.3 schaler-1996-machine diff --git a/data/xml/1996.tc.xml b/data/xml/1996.tc.xml index 5b9626f186..a3959286fd 100644 --- a/data/xml/1996.tc.xml +++ b/data/xml/1996.tc.xml @@ -41,7 +41,7 @@ Towards a more efficient use of <fixed-case>PC</fixed-case>-based <fixed-case>MT</fixed-case> in education - RuslanMitkov + RuslanMitkov 1996.tc-1.6 mitkov-1996-towards @@ -77,7 +77,7 @@ Computer Support for Authoring Multilingual Software Documentation - DoniaScott + DoniaScott 1996.tc-1.12 scott-1996-computer diff --git a/data/xml/1997.eamt.xml b/data/xml/1997.eamt.xml index 86311a3a72..43ebb0e219 100644 --- a/data/xml/1997.eamt.xml +++ b/data/xml/1997.eamt.xml @@ -12,7 +12,7 @@ Introduction - JohnHutchins + JohnHutchins 1997.eamt-1.1 hutchins-1997-introduction @@ -42,7 +42,7 @@ Why don’t they use translation tools? - HanneFersøe + HanneFersøe 1997.eamt-1.6 fersoe-1997-dont @@ -54,7 +54,7 @@ The workflow in a document production environment using translation tools - BenteMaegaard + BenteMaegaard 1997.eamt-1.8 maegaard-1997-workflow @@ -78,7 +78,7 @@ Providing multilingual term explanations in machine aided translation - Walthervon Hahn + Walthervon Hahn 1997.eamt-1.12 von-hahn-1997-providing @@ -91,7 +91,7 @@ Summary and conclusions DimitriTheologitis - BenteMaegaard + BenteMaegaard 1997.eamt-1.14 theologitis-maegaard-1997-summary diff --git a/data/xml/1997.iwpt.xml b/data/xml/1997.iwpt.xml index 3a6dc39d29..02ca41ea41 100644 --- a/data/xml/1997.iwpt.xml +++ b/data/xml/1997.iwpt.xml @@ -46,7 +46,7 @@ Intelligent Multimedia Information Access - Mark T.Maybury + Mark T.Maybury xvii-xviii 1997.iwpt-1.3 The expansion of the information highway has generated requirements for more effective access to global and corporate information repositories. These repositories are increasingly multimedia, including text, audio (e.g., spoken language, music), graphics, imagery, and video. The advent of large, multimedia digital libraries has turned attention toward the problem of processing and managing multiple and heterogeneous media in a principled manner, including their creation, storage, indexing, browsing, search, visualization, and summarization. Intelligent multimedia information access is a multidisciplinary area that lies at the intersection of artificial intelligence, information retrieval, human computer interaction, and multimedia computing. Intelligent multimedia information access includes those systems which go beyond traditional hypermedia or hypertext environments and analyze media, generate media, or support intelligent interaction with or via multiple media using knowledge of the user, discourse, domain, world, or the media itself. Providing machines with the ability to interpret, generate, and support interaction with multimedia artifacts (e.g., documents, broadcasts, hypermedia) will be a valuable facility for a number of key applications such as videoteleconference archiving, custom on-line news, and briefing assistants. These media facilities, in turn, may support a variety of tasks ranging from training to information analysis to decision support. In this talk I will describe our group’s efforts to provide content based access to broadcast news sources, including our use of corpus-based processing techniques to the problems of video indexing, segmentation, and summarization. In addition to better access to content, we also need to concern ourselves with enabling more effective, efficient and natural human computer or computer mediated human-human interaction. This will require automated understanding and generation of multimedia and demand explicit representation of and reasoning about the user, discourse, task and context (Maybury 1993). To this end, I will describe our work in progress that aims to fully instrument the interface and build ( automatically and semi-automatically) annotated corpora of human-machine interaction. We believe this will yield deeper and more comprehensive models of interaction which should ultimately enable more principled interface design. @@ -54,7 +54,7 @@ Making Use of Intonation in Interactive Dialogue Translation - MarkSteedman + MarkSteedman xix 1997.iwpt-1.4 Intonational information is frequently discarded in speech recognition, and assigned by default heuristics in text-to-speech generation. However, in many applications involving dialogue and interactive discourse, intonation conveys significant information, and we ignore it at our peril. Translating telephones and personal assistants are an interesting test case, in which the salience of rapidly shifting discourse topics and the fact that sentences are machine-generated, rather than written by humans, combine to make the application particularly vulnerable to our poor theoretical grasp of intonation and its functions. I will discuss a number of approaches to the problem for such applications, ranging from cheap tricks to a combinatory grammar-based theory of the semantics involved and a syntax-phonology interface for building and generating from interpretations. @@ -70,8 +70,8 @@ Encoding Frequency Information in Lexicalized Grammars - JohnCarroll - DavidWeir + JohnCarroll + DavidWeir 8-17 1997.iwpt-1.6 We address the issue of how to associate frequency information with lexicalized grammar formalisms, using Lexicalized Tree Adjoining Grammar as a representative framework. We consider systematically a number of alternative probabilistic frameworks, evaluating their adequacy from both a theoretical and empirical perspective using data from existing large treebanks. We also propose three orthogonal approaches fo r backing off probability estimates to cope with the large number of parameters involved. @@ -80,7 +80,7 @@ Towards a Reduced Commitment, <fixed-case>D</fixed-case>-Theory Style <fixed-case>TAG</fixed-case> Parser JohnChen - K.Vijay-Shankar + K.Vijay-Shankar 18-29 1997.iwpt-1.7 Many traditional TAG parsers handle ambiguity by considering all of the possible choices as they unfold during parsing. In contrast , D-theory parsers cope with ambiguity by using underspecified descriptions of trees. This paper introduces a novel approach to parsing TAG, namely one that explores how D-theoretic notions may be applied to TAG parsing. Combining the D-theoretic approach to TAG parsing as we do here raises new issues and problems. D-theoretic underspecification is used as a novel approach in the context of TAG parsing for delaying attachment decisions. Conversely, the use of TAG reveals the need for additional types of underspecification that have not been considered so far in the D-theoretic framework. These include combining sets of trees into their underspecified equivalents as well as underspecifying combinations of trees. In this paper, we examine various issues that arise in this new approach to TAG parsing and present solutions to some of the problems. We also describe other issues which need to be resolved for this method of parsing to be implemented. @@ -88,8 +88,8 @@ Controlling Bottom-Up Chart Parsers through Text Chunking - FabioCiravegna - AlbertoLavelli + FabioCiravegna + AlbertoLavelli 30-41 1997.iwpt-1.8 In this paper we propose to use text chunking for controlling a bottom-up parser. As it is well known, during analysis such parsers produce many constituents not contributing to the final solution(s). Most of these constituents are introduced due to t he parser inability of checking the input context around them. Preliminary text chunking allows to focus directly on the constituents that seem more likely and to prune the search space in the case some satisfactory solutions are found. Preliminary experiments show that a CYK-like parser controlled through chunking is definitely more efficient than a traditional parser without significantly losing in correctness. Moreover the quality of possible partial results produced by the controlled parser is high. The strategy is particularly suited for tasks like Information Extraction from text (IE) where sentences are often long and complex and it is very difficult to have a complete coverage. Hence, there is a strong necessity of focusing on the most likely solutions; furthermore, in IE the quality of partial results is important . @@ -105,16 +105,16 @@ Bilexical Grammars and a Cubic-time Probabilistic Parser - JasonEisner + JasonEisner 54-65 1997.iwpt-1.10 - + eisner-1997-bilexical Automaton-based Parsing for Lexicalised Grammars - RogerEvans - DavidWeir + RogerEvans + DavidWeir 66-76 1997.iwpt-1.11 In wide-coverage lexicalized grammars many of the elementary structures have substructures in common. This means that during parsing some of the computation associated with different structures is duplicated. This paper explores ways in which the grammar can be precompiled into finite state automata so that some of this shared structure results in shared computation at run-time. @@ -131,7 +131,7 @@ Probabilistic Feature Grammars - JoshuaGoodman + JoshuaGoodman 89-100 1997.iwpt-1.13 We present a new formalism, probabilistic feature grammar (PFG). PFGs combine most of the best properties of several other formalisms, including those of Collins, Magerman, and Charniak, and in experiments have comparable or better performance. PFGs generate features one at a time, probabilistically, conditioning the probabilities of each feature on other features in a local context. Because the conditioning is local, efficient polynomial time parsing algorithms exist for computing inside, outside, and Viterbi parses. PFGs can produce probabilities of strings, making them potentially useful for language modeling. Precision and recall results are comparable to the state of the art with words, and the best reported without words. @@ -176,7 +176,7 @@ Probabilistic Parsing using Left Corner Language Models - Christopher D.Manning + Christopher D.Manning BobCarpenter 147-158 1997.iwpt-1.18 @@ -209,7 +209,7 @@ Performance Evaluation of Supertagging for Partial Parsing - B.Srinivas + B.Srinivas 187-198 1997.iwpt-1.22 In previous work we introduced the idea of supertagging as a means of improving the efficiency of a lexicalized grammar parser. In this paper, we present supertagging in conjunction with a lightweight dependency analyzer as a robust and efficient partial parser. The present work is significant for two reasons. First, we have vastly improved our results; 92% accurate for supertag disambiguation using lexical information, larger training corpus and smoothing techniques. Second, we show how supertagging can be used for partial parsing and provide detailed evaluation results for detecting noun chunks, verb chunks, preposition phrase attachment and a variety of other linguistic constructions. Using supertag representation, we achieve a recall rate of 93.0% and a precision rate of 91.8% for noun chunking, improving on the best known result for noun chunking. @@ -281,9 +281,9 @@ Formal Tools for Separating Syntactically Correct and Incorrect Structures - MartinPlátek - VladislavKuboň - TomášHolan + MartinPlátek + VladislavKuboň + TomášHolan 247-248 1997.iwpt-1.30 In this paper we introduce a class of formal grammars with special measures capable to describe typical syntactic inconsistencies in free word order languages. By means of these measures it is possible to characterize more precisely the problems connected with the task of building a robust parser or a grammar checker of Czech. @@ -293,7 +293,7 @@ Parsers Optimization for Wide-coverage Unification-based Grammars using the Restriction Technique NoraLa Serna ArantxaDíaz - HoracioRodríguez + HoracioRodríguez 249-250 1997.iwpt-1.31 This article describes the methodology we have followed in order to improve the efficiency of a parsing algorithm for wide coverage unification-based grammars. The technique used is the restriction technique (Shieber 85), which has been recognized as an important operation to obtain efficient parsers for unification-based grammars. The main objective of the research is how to choose appropriate restrictors for using the restriction technique. We have developed a statistical model for selecting restrictors. Several experiments have been done in order to characterise those restrictors. diff --git a/data/xml/1997.mtsummit.xml b/data/xml/1997.mtsummit.xml index 605aaf7fab..ba0353e21a 100644 --- a/data/xml/1997.mtsummit.xml +++ b/data/xml/1997.mtsummit.xml @@ -12,7 +12,7 @@ A gentle introduction to <fixed-case>MT</fixed-case>: theory and current practice - EduardHovy + EduardHovy This tutorial provides a nontechnical introduction to machine translation. It reviews the whole scope of MT, outlining briefly its history and the major application areas today, and describing the various kinds of MT techniques that have been invented—from direct replacement through transfer to the holy grail of interlinguas. It briefly outlines the newest statistics-based techniques and provides an introduction to the difficult questions of MT evaluation. Topics include: History and development of MT; Theoretical foundations of MT; Traditional and modern MT techniques; Newest MT research; Thorny questions of evaluating MT systems hovy-1997-gentle @@ -24,7 +24,7 @@ <fixed-case>MT</fixed-case> evaluation: old, new, and recycled - JohnWhite + JohnWhite The tutorial addresses the issues peculiar to machine translation evaluation, namely the difficulty in determining what constitutes correct translation, and which types of evaluation are the most meaningful for evaluation "consumers." The tutorial is structured around evaluation methods designed for particular purposes: types of MT design, stages in the development lifecycle, and intended end-use of a system that includes MT. It will provide an overview of the issues and classic approaches to MT evaluation. The traditional processes, such as those outlined in the ALPAC report, will be examined for their value historically and in terms of today's environments. The tutorial also provides an insight into the latest evaluation techniques, designed to capture the value of MT systems in the context of current and future automated text handling processes. white-1997-mt @@ -55,7 +55,7 @@ First steps in Mechanical Translation - JohnHutchins + JohnHutchins 14-23 1997.mtsummit-plenaries.2 Although the first ideas for mechanical translation were made in the seventeenth century, it was not until this century that means became available for realization with the appearance of the electronic computer in the mid 1940s. Fifty years ago, in March 1947 Warren Weaver wrote to Norbert Wiener and met Andrew Booth, mentioning to both the use of computers for translation. The possibilities were investigated during the next seven years, until in January 1954 the first prototype program was demonstrated. This article is a brief chronicle of these early years of mechanizing translation processes. @@ -78,7 +78,7 @@ The Fulcrum Approach to Machine Translation - Christine A.Montgomery + Christine A.Montgomery 29-30 1997.mtsummit-plenaries.5 montgomery-1997-fulcrum @@ -120,7 +120,7 @@ Machine Translation Through Language Understanding - MakotoNagao + MakotoNagao 41-49 1997.mtsummit-plenaries.11 In this paper is described a general framework of a next generation machine translation system which translates a text not sentence by sentence but by considering inter-sentential discourse. The method is a step closer to human translation than the present-day machine translation systems. Particularly important are a detailed discourse analysis and a flexible text generation by using information obtained from the discourse analysis. @@ -128,7 +128,7 @@ The Current State of Machine Translation - Harold L.Somers + Harold L.Somers 115-124 1997.mtsummit-plenaries.12 This paper aims to survey the current state of research, development and use of Machine Translation (MT). Under ‘research’ the role of linguistics is discussed, and contrasted with research in ‘analogy- based’ MT. The range of languages covered by MT systems is discussed, and the lack of development for minority languages noted. The new research area of spoken language translation (SLT) is reviewed, with some major differences between SLT and text MT described. Under ‘use and users’ we discuss tools for users: Translation Memory, bilingual concordances and software to help checking for mistranslations. The use of MT on the World Wide Web is also discussed, regarding pre- and post-editing, the impact of ‘controlled language’ is reviewed, and finally a proposal is made that MT users can revise the input text in the light of errors that the system makes, thus ‘post-editing the source text’. @@ -136,7 +136,7 @@ Whither <fixed-case>MT</fixed-case>? - BenteMaegaard + BenteMaegaard 191-199 1997.mtsummit-plenaries.13 MT started out as a ‘technology push’: more than 50 years ago, researchers had the bright idea of doing translation with the use of the newly developed computers. MT remained in the technology push area for many years. However, in the nineties we are seeing the ‘market pull’ beginning to play a role and there are good reasons to believe that this trend will continue. MT is going where the market and the users wants it to go, and MT will be prospering in the future. MT will be available electronically over the network, and MT will be available in environments which also offer a variety of other tools for translation, as well as tools for other types of information management. Also in research and in development of new technologies, MT will further develop, e.g. along the lines of knowledge-based MT, advanced integration of different analysis techniques (rule-based, statistics-based, etc.), integration with speech etc. @@ -162,7 +162,7 @@ A Real-Time <fixed-case>MT</fixed-case> System for Translating Broadcast Captions - EricNyberg + EricNyberg TerukoMitamura 51-57 1997.mtsummit-papers.2 @@ -220,7 +220,7 @@ User-Friendly Machine Translation: Alternate Translations Based on Differing Beliefs - DavidFarwell + DavidFarwell StephenHelmreich 125-131 1997.mtsummit-papers.9 @@ -229,7 +229,7 @@ Sharable Formats and Their Supporting Environments for Exchanging User Dictionaries among Different <fixed-case>MT</fixed-case> Systems as a Part of <fixed-case>AAMT</fixed-case> Activities - Shin-ichiroKamei + Shin-ichiroKamei EtsuoItoh MikikoFujii TokuyukiHirai @@ -429,8 +429,8 @@ The <fixed-case>DIPLOMAT</fixed-case> Rapid Development Speech <fixed-case>MT</fixed-case> System - Robert E.Frederking - Ralf D.Brown + Robert E.Frederking + Ralf D.Brown ChristopherHogan 261-262 1997.mtsummit-systems.9 @@ -576,18 +576,18 @@ Associating semantic components with intersective Levin classes - Hoa TrangDang + Hoa TrangDang JosephRosenzweig - MarthaPalmer + MarthaPalmer 11-18 1997.mtsummit-workshop.2 dang-etal-1997-associating <fixed-case>S</fixed-case>panish <fixed-case>E</fixed-case>uro<fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et and <fixed-case>LCS</fixed-case>-based interlingual <fixed-case>MT</fixed-case> - Bonnie J.Dorr - M. AntoniaMartí - IreneCastellón + Bonnie J.Dorr + M. AntoniaMartí + IreneCastellón 19-31 1997.mtsummit-workshop.3 We present a machine translation framework in which the interlingua— Lexical Conceptual Structure (LCS)—is coupled with a definitional component that includes bilingual (EuroWordNet) links between words in the source and target languages. While the links between individual words are language-specific, the LCS is designed to be a language-independent, compositional representation. We take the view that the two types of information—shallower, transfer-like knowledge as well as deeper, compositional knowledge—can be reconciled in interlingual machine translation, the former for overcoming the intractability of LCS-based lexical selec- tion, and the latter for relating the underlying semantics of two words cross-linguistically. We describe the acquisition process for these two information types and present results of hand-verification of the acquired lexicon. Finally, we demonstrate the utility of the two information types in interlingual MT. @@ -595,8 +595,8 @@ Toward compact monotonically compositional interlingua using lexical aspect - Bonnie J.Dorr - Mari BromanOlsen + Bonnie J.Dorr + Mari BromanOlsen Scott C.Thomas 33-43 1997.mtsummit-workshop.4 @@ -605,7 +605,7 @@ On representing language-specific information in interlingua - DavidFarwell + DavidFarwell 45-50 1997.mtsummit-workshop.5 farwell-1997-representing @@ -620,14 +620,14 @@ Improving the precision of lexicon-to-ontology alignment algorithms Latifur R.Khan - Eduard H.Hovy + Eduard H.Hovy 53-58 1997.mtsummit-workshop.7 khan-hovy-1997-improving Interlingua developed and utilized in real multilingual <fixed-case>MT</fixed-case> product systems - Shin-ichiroKamei + Shin-ichiroKamei KazunoriMuraki 59-69 1997.mtsummit-workshop.8 @@ -637,18 +637,18 @@ Simplification of nomenclature leads to an ideal <fixed-case>IL</fixed-case> for human language communication Young-SukLee - CliffordWeinstein + CliffordWeinstein DineshTummala LindaKukolich - StephanieSeneff + StephanieSeneff 71-72 1997.mtsummit-workshop.9 lee-etal-1997-simplification The use of pegs computational discourse framework as an interlingua representation - SusannLuperfoy - KeithMiller + SusannLuperfoy + KeithMiller 73-80 1997.mtsummit-workshop.10 luperfoy-miller-1997-use @@ -666,8 +666,8 @@ Enriching lexical transfer with cross-linguistic semantic features or how to do interlingua without interlingua AlexisNasr - OwenRambow - MarthaPalmer + OwenRambow + MarthaPalmer JosephRosenzweig 91-98 1997.mtsummit-workshop.12 @@ -675,8 +675,8 @@ Using <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et to posit hierarchical structure in Levin’s verb classes - Mari BromanOlsen - Bonnie J.Dorr + Mari BromanOlsen + Bonnie J.Dorr David J.Clark 99-110 1997.mtsummit-workshop.13 diff --git a/data/xml/1997.tc.xml b/data/xml/1997.tc.xml index 77da022720..760f44ebd5 100644 --- a/data/xml/1997.tc.xml +++ b/data/xml/1997.tc.xml @@ -35,7 +35,7 @@ Evaluation of Language Tools - BenteMaegaard + BenteMaegaard 1997.tc-1.5 maegaard-1997-evaluation @@ -77,13 +77,13 @@ Scania <fixed-case>S</fixed-case>wedish - A Basis for Multilingual Translation - Anna SågvallHein + Anna SågvallHein 1997.tc-1.12 hein-1997-scania Machine Translation and Minority Languages - HaroldSomers + HaroldSomers 1997.tc-1.13 somers-1997-machine diff --git a/data/xml/1997.tmi.xml b/data/xml/1997.tmi.xml index dcacda7dbc..fbccef7e75 100644 --- a/data/xml/1997.tmi.xml +++ b/data/xml/1997.tmi.xml @@ -11,8 +11,8 @@ If you have it, flaunt it: using full ontological knowledge for word sense disambiguation - KaviMahesh - SergeiNirenburg + KaviMahesh + SergeiNirenburg StephenBeale 1997.tmi-1.1 mahesh-etal-1997-flaunt @@ -27,7 +27,7 @@ Looking back to 1952: the first <fixed-case>MT</fixed-case> conference - JohnHutchins + JohnHutchins 1997.tmi-1.3 hutchins-1997-looking @@ -47,7 +47,7 @@ <fixed-case>MT</fixed-case> at the paragraph level: improving <fixed-case>E</fixed-case>nglish synthesis in <fixed-case>SYSTRAN</fixed-case> - EduardHovy + EduardHovy LaurieGerber 1997.tmi-1.6 hovy-gerber-1997-mt @@ -90,20 +90,20 @@ <fixed-case>E</fixed-case>nglish adverb processing in <fixed-case>J</fixed-case>apanese-to-<fixed-case>E</fixed-case>nglish machine translation KentaroOgura - SatoshiShirai + SatoshiShirai FrancisBond 1997.tmi-1.11 ogura-etal-1997-english Language control and machine translation - AnnaSågvall Hein + AnnaSågvall Hein 1997.tmi-1.12 sagvall-hein-1997-language Automated dictionary extraction for “knowledge-free” example-based translation - Ralf D.Brown + Ralf D.Brown 1997.tmi-1.13 brown-1997-automated @@ -117,9 +117,9 @@ <fixed-case>T</fixed-case>op<fixed-case>A</fixed-case>lign: word alignment for bilingual corpora based on topical clusters of dictionary entries and translations Mathis H.Chen - Jason S.Chang - Sue J.Ker - Jen-NanChen + Jason S.Chang + Sue J.Ker + Jen-NanChen 1997.tmi-1.15 chen-etal-1997-topalign @@ -127,14 +127,14 @@ Error correcting parsing for text-to-text machine translation using finite state models Juan C.Amengual José M.Benedí - FranciscoCasacuberta - AsunciónCastaño - AntonioCastellanos - DavidLlorens - AndrésMarzal - FedericoPrat - EnriqueVidal - Juan M.Vilar + FranciscoCasacuberta + AsunciónCastaño + AntonioCastellanos + DavidLlorens + AndrésMarzal + FedericoPrat + EnriqueVidal + Juan M.Vilar 1997.tmi-1.16 amengual-etal-1997-error @@ -146,20 +146,20 @@ Word sense disambiguation: why statistics when we have these numbers? - KaviMahesh - SergeiNirenburg + KaviMahesh + SergeiNirenburg StephenBeale EvelyneViegas VictorRaskin - BoyanOnyshkevych + BoyanOnyshkevych 1997.tmi-1.18 mahesh-etal-1997-word Machine translation using neural networks and finite-state models - AsunciónCastaño - FranciscoCasacuberta - EnriqueVidal + AsunciónCastaño + FranciscoCasacuberta + EnriqueVidal 1997.tmi-1.19 castano-etal-1997-machine @@ -167,7 +167,7 @@ A left-to-right breadth-first algorithm for subcategorization frame selection of <fixed-case>J</fixed-case>apanese verbs KazunoriMuraki Shin’ichiroKamei - ShinichiDoi + ShinichiDoi 1997.tmi-1.20 muraki-etal-1997-left diff --git a/data/xml/1998.amta.xml b/data/xml/1998.amta.xml index 06ac963897..9aa3365847 100644 --- a/data/xml/1998.amta.xml +++ b/data/xml/1998.amta.xml @@ -14,12 +14,12 @@ <fixed-case>MT</fixed-case> evaluation - John S.White + John S.White white-1998-mt Survey of methodological approaches to <fixed-case>MT</fixed-case> - HaroldSomers + HaroldSomers somers-1998-survey @@ -29,7 +29,7 @@ Ontological semantics for knowledge-based <fixed-case>MT</fixed-case> - SergeiNirenburg + SergeiNirenburg nirenburg-1998-ontological @@ -39,12 +39,12 @@ Speech to speech machine translation - MonikaWoszczyna + MonikaWoszczyna woszczyna-1998-speech Multilingual text summarization - EduardHovy + EduardHovy DanelMarcu hovy-marcu-1998-multilingual @@ -63,7 +63,7 @@ A seal of approval for <fixed-case>MT</fixed-case> systems - EduardHovy + EduardHovy hovy-1998-seal @@ -73,7 +73,7 @@ Breaking the quality ceiling - DavidFarwell + DavidFarwell farwell-1998-breaking @@ -109,11 +109,11 @@ A modular approach to spoken language translation for large domains MonikaWoszczcyna MatthewBroadhead - DonnaGates + DonnaGates MarsalGavaldá - AlonLavie - LoriLevin - AlexWaibel + AlonLavie + LoriLevin + AlexWaibel 31-49 https://link.springer.com/chapter/10.1007/3-540-49478-2_3 The MT engine of the JANUS speech-to-speech translation system is designed around four main principles: 1) an interlingua approach that allows the efficient addition of new languages, 2) the use of semantic grammars that yield low cost high quality translations for limited domains, 3) modular grammars that support easy expansion into new domains, and 4) efficient integration of multiple grammars using multi-domain parse lattices and domain re-scoring. Within the framework of the C-STAR-II speech-to-speech translation effort, these principles are tested against the challenge of providing translation for a number of domains and language pairs with the additional restriction of a common interchange format. @@ -121,8 +121,8 @@ Enhancing automatic acquisition of the thematic structure in a large-scale lexicon for <fixed-case>M</fixed-case>andarin <fixed-case>C</fixed-case>hinese - Mari BromanOlsen - Bonnie J.Dorr + Mari BromanOlsen + Bonnie J.Dorr Scott C.Thomas 41-50 https://link.springer.com/chapter/10.1007/3-540-49478-2_4 @@ -132,7 +132,7 @@ Ordering translation templates by assigning confidence factors ZeynepÖz - IlyasCicekli + IlyasCicekli 51-61 https://link.springer.com/chapter/10.1007/3-540-49478-2_5 TTL (Translation Template Learner) algorithm learns lexical level correspondences between two translation examples by using analogical reasoning. The sentences used as translation examples have similar and different parts in the source language which must correspond to the similar and different parts in the target language. Therefore these correspondences are learned as translation templates. The learned translation templates are used in the translation of other sentences. However, we need to assign confidence factors to these translation templates to order translation results with respect to previously assigned confidence factors. This paper proposes a method for assigning confidence factors to translation templates learned by the TTL algorithm. Training data is used for collecting statistical information that will be used in confidence factor assignment process. In this process, each template is assigned a confidence factor according to the statistical information obtained from training data. Furthermore, some template combinations are also assigned confidence factors in order to eliminate certain combinations resulting bad translation. @@ -140,7 +140,7 @@ Quality and robustness in <fixed-case>MT</fixed-case>—<fixed-case>A</fixed-case> balancing act - BiankaBuschbeck-Wolf + BiankaBuschbeck-Wolf MichaelDorna 62-71 https://link.springer.com/chapter/10.1007/3-540-49478-2_6 @@ -161,7 +161,7 @@ GöklanTür KemalOflazer TerukoMitamura - Eric H.Nyberg, 3rd + Eric H.Nyberg, 3rd 83-94 https://link.springer.com/chapter/10.1007/3-540-49478-2_8 This paper describes the integration of a Turkish generation system with the KANT knowledge-based machine translation system to produce a prototype English-Turkish interlingua-based machine translation system. These two independently constructed systems were successfully integrated within a period of two months, through development of a module which maps KANT interlingua expressions to Turkish syntactic structures. The combined system is able to translate completely and correctly 44 of 52 benchmark sentences in the domain of broadcast news captions. This study is the first known application of knowledge-based machine translation from English to Turkish, and our initial results show promise for future development. @@ -169,8 +169,8 @@ Rapid prototyping of domain-apecific machine translation systems - MarthaPalmer - OwenRambow + MarthaPalmer + OwenRambow AlexisNasr 95-102 https://link.springer.com/chapter/10.1007/3-540-49478-2_9 @@ -180,7 +180,7 @@ An evaluation of the multi-engine <fixed-case>MT</fixed-case> architecture ChristopherHogan - Robert E.Frederking + Robert E.Frederking 113-123 https://link.springer.com/chapter/10.1007/3-540-49478-2_11 The Multi-Engine MT (MEMT) architecture combines the outputs of multiple MT engines using a statistical language model of the target language. It has been used successfully in a number of MT research systems, for both text and speech translation. Despite its perceived benefits, there has never been a rigorous, published, double-blind evaluation of the claim that the combined output of a MEMT system is in fact better than that of any one of the component MT engines. We report here the results of such an evaluation. The combined MEMT output is shown to indeed be better overall than the output of the component engines in a Croatian ↔ English MT system. This result is consistent in both translation directions, and between different raters. @@ -207,7 +207,7 @@ Fast document translation for cross-language information retrieval J.ScottMcCarley - SalimRoukos + SalimRoukos 150-157 https://link.springer.com/chapter/10.1007/3-540-49478-2_14 We describe a statistical algorithm for machine translation intended to provide translations of large document collections at speeds far in excess of traditional machine translation systems, and of sufficiently high quality to perform information retrieval on the translated document collections. The model is trained from a parallel corpus and is capable of disambiguating senses of words. Information retrieval (IR) experiments on a French language dataset from a recent cross-language information retrieval evaluation yields results superior to those obtained by participants in the evaluation, and confirm the importance of word sense disambiugation in cross-language information retrieval. @@ -239,9 +239,9 @@ A multilingual procedure for dictionary-based sentence alignment - AdamMeyers + AdamMeyers MichikoKosaka - RalphGrishman + RalphGrishman 187-198 https://link.springer.com/chapter/10.1007/3-540-49478-2_18 This paper describes a sentence alignment technique based on a machine readable dictionary. Alignment takes place in a single pass through the text, based on the scores of matches between pairs of source and target sentences. Pairings consisting of sets of matches are evaluated using a version of the Gale-Shapely solution to the stable marriage problem. An algorithm is described which can handle N-to-1 (or 1-to-N) matches, for n ≥ 0, i.e., deletions, 1-to-1 (including scrambling), and 1-to-many matches. A simple frequency based method for acquiring supplemental dictionary entries is also discussed. We achieve high quality alignments using available bilingual dictionaries, both for closely related language pairs (Spanish/English) and more distantly related pairs (Japanese/English). @@ -249,8 +249,8 @@ Taxonomy and lexical semantics—from the perspective of machine readable dictionary - Jason S.Chang - Sue J.Ker + Jason S.Chang + Sue J.Ker Mathis H.Chen 199-212 https://link.springer.com/chapter/10.1007/3-540-49478-2_19 @@ -295,7 +295,7 @@ When Stålhandske becomes Steelglove PernillaDanielsson - KatarinaMühlenbock + KatarinaMühlenbock 266-274 https://link.springer.com/chapter/10.1007/3-540-49478-2_24 Names can serve several purposes in the field of Machine Translation. The problems range from identifying to processing the various types of names. The paper begins with a short description of the search strategy and then continues with the classification of types into a typology. We present our findings according to degrees of translation from which we highlight clues. These clues indicate a first step towards formalization. @@ -337,7 +337,7 @@ Twisted pair grammar: support for rapid development of machine translation for low density languages - DouglasJones + DouglasJones RickHavrilla 318-332 https://link.springer.com/chapter/10.1007/3-540-49478-2_29 @@ -346,9 +346,9 @@ A thematic hierarchy for efficient generation from lexical-conceptual structure - BonnieDorr + BonnieDorr NizarHabash - DavidTraum + DavidTraum 333-343 https://link.springer.com/chapter/10.1007/3-540-49478-2_30 This paper describes an implemented algorithm for syntactic realization of a target-language sentence from an interlingual representation called Lexical Conceptual Structure (LCS). We provide a mapping between LCS thematic roles and Abstract Meaning Representation (AMR) relations; these relations serve as input to an off-the-shelf generator (Nitrogen). There are two contributions of this work: (1) the development of a thematic hierarchy that provides ordering information for realization of arguments in their surface positions; (2) the provision of a diagnostic tool for detecting inconsistencies in an existing online LCS-based lexicon that allows us to enhance principles for thematic-role assignment. @@ -356,7 +356,7 @@ The <fixed-case>LMT</fixed-case> Transformational System - MichaelMcCord + MichaelMcCord ArendseBernth 344-355 https://link.springer.com/chapter/10.1007/3-540-49478-2_31 @@ -375,7 +375,7 @@ Predicting what <fixed-case>MT</fixed-case> is good for: user judgments and task performance KathrynTaylor - JohnWhite + JohnWhite 364-373 https://link.springer.com/chapter/10.1007/3-540-49478-2_33 As part of the Machine Translation (MT) Proficiency Scale project at the US Federal Intelligent Document Understanding Laboratory (FIDUL), Litton PRC is developing a method to measure MT systems in terms of the tasks for which their output may be successfully used. This paper describes the development of a task inventory, i.e., a comprehensive list of the tasks analysts perform with translated material and details the capture of subjective user judgments and insights about MT samples. Also described are the user exercises conducted using machine and human translation samples and the assessment of task performance. By analyzing translation errors, user judgments about errors that interfere with task performance, and user task performance results, we isolate source language patterns which produce output problems. These patterns can then be captured in a single diagnostic test set, to be easily applied to any new Japanese-English system to predict the utility of its output. @@ -408,7 +408,7 @@ Lexical choice and syntactic generation in a transfer system: transformations in the new <fixed-case>LMT</fixed-case> <fixed-case>E</fixed-case>nglish-<fixed-case>G</fixed-case>erman system - ClaudiaGdaniec + ClaudiaGdaniec 408-420 https://link.springer.com/chapter/10.1007/3-540-49478-2_37 This paper argues that, contrary to received wisdom in the MT research community, a transfer system such as LMT is well suited to deal with most of the problems that MT faces. It may in fact be superior to other approaches in that it can handle target surface-structure constraints, variation of syntactic patterns, discourse-structure constraints, and stylistic preference. The paper describes the linguistic issues involved in LMT’s English⇒German transformational component, its interaction with the lexical transfer component, and types of transformations. It identifies context-dependent and context-independent transformations and among the context-dependent ones, it differentiates between those that are triggered by instructions in the lexicon, by semantic category, by syntactic context, and by setting of stylistic preference. The paper concludes with some examples of divergence between English and German and shows how LMT handles them. @@ -417,7 +417,7 @@ Translation with finite-state devices KevinKnight - YaserAl-Onaizan + YaserAl-Onaizan 421-437 https://link.springer.com/chapter/10.1007/3-540-49478-2_38 Statistical models have recently been applied to machine translation with interesting results. Algorithms for processing these models have not received wide circulation, however. By contrast, general finite-state transduction algorithms have been applied in a variety of tasks. This paper gives a finite-state reconstruction of statistical translation and demonstrates the use of standard tools to compute statistically likely translations. Ours is the first translation algorithm for “fertility/permutation” statistical models to be described in replicable detail. @@ -425,7 +425,7 @@ Lexical selection for cross-language applications: combining <fixed-case>LCS</fixed-case> with <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et - BonnieDorr + BonnieDorr MariaKatsova 438-447 https://link.springer.com/chapter/10.1007/3-540-49478-2_39 @@ -435,7 +435,7 @@ Improving translation quality by manipulating sentence length LaurieGerber - EduardHovy + EduardHovy 448-460 https://link.springer.com/chapter/10.1007/3-540-49478-2_40 Translation systems tend to have more trouble with long sentences than with short ones for a variety of reasons. When the source and target languages differ rather markedly, as do Japanese and English, this problem is reflected in lower quality output. To improve readability, we experimented with automatically splitting long sentences into shorter ones. This paper outlines the problem, describes the sentence splitting procedure and rules, and provides an evaluation of the results. @@ -451,7 +451,7 @@ A comparative study of query and document translation for cross-language information retrieval - Douglas W.Oard + Douglas W.Oard 472-483 https://link.springer.com/chapter/10.1007/3-540-49478-2_42 Cross-language retrieval systems use queries in one natural language to guide retrieval of documents that might be written in another. Acquisition and representation of translation knowledge plays a central role in this process. This paper explores the utility of two sources of translation knowledge for cross-language retrieval. We have implemented six query translation techniques that use bilingual term lists and one based on direct use of the translation output from an existing machine translation system; these are compared with a document translation technique that uses output from the same machine translation system. Average precision measures on a TREC collection suggest that arbitrarily selecting a single dictionary translation is typically no less effective than using every translation in the dictionary, that query translation using a machine translation system can achieve somewhat better effectiveness than simpler techniques, and that document translation may result in further improvements in retrieval effectiveness under some conditions. @@ -459,7 +459,7 @@ Lexicons as gold: mining, embellishment and reuse - Keith J.Miller + Keith J.Miller David M.Zajic 484-493 https://link.springer.com/chapter/10.1007/3-540-49478-2_43 diff --git a/data/xml/1998.eamt.xml b/data/xml/1998.eamt.xml index 0fbe063ddb..b325531b1f 100644 --- a/data/xml/1998.eamt.xml +++ b/data/xml/1998.eamt.xml @@ -8,7 +8,7 @@ April 2–3 1998 1998.eamt-1 - JohnHutchins + JohnHutchins eamt @@ -91,7 +91,7 @@ Summary of the concluding session DimitriosTheologitis - BenteMaegaard + BenteMaegaard 1998.eamt-1.13 theologitis-maegaard-1998-summary diff --git a/data/xml/1998.tc.xml b/data/xml/1998.tc.xml index 03384bacce..7d8d2e7308 100644 --- a/data/xml/1998.tc.xml +++ b/data/xml/1998.tc.xml @@ -11,7 +11,7 @@ Twenty Years of Translating and the Computer - JohnHutchins + JohnHutchins 1998.tc-1.1 hutchins-1998-twenty diff --git a/data/xml/1999.eamt.xml b/data/xml/1999.eamt.xml index 1db62cbcde..fdf86cc503 100644 --- a/data/xml/1999.eamt.xml +++ b/data/xml/1999.eamt.xml @@ -26,13 +26,13 @@ Experience from translation of <fixed-case>EU</fixed-case> documents - GáborPrószéky + GáborPrószéky 1999.eamt-1.3 proszeky-1999-experience Aligning and extracting translation equivalents from <fixed-case>EU</fixed-case> documents - a possible look on <fixed-case>EU</fixed-case> Integration - ElenaPaskaleva + ElenaPaskaleva paskaleva-1999-aligning @@ -64,21 +64,21 @@ Translation to and from <fixed-case>R</fixed-case>ussian: the <fixed-case>ETAP</fixed-case> system - IgorBoguslavsky + IgorBoguslavsky 1999.eamt-1.10 boguslavsky-1999-translation On intermediate structures and tectogrammatics - PetrSgall + PetrSgall 1999.eamt-1.11 sgall-1999-intermediate Machine translation of very closely related languages - JanHajič + JanHajič JanHric - VladislavKuboň + VladislavKuboň hajic-etal-1999-machine @@ -89,13 +89,13 @@ Automatic translation lexicon extraction from <fixed-case>E</fixed-case>nglish-<fixed-case>C</fixed-case>zech parallel texts - MartinČmejrek + MartinČmejrek JanCuřin cmejrek-curin-1999-automatic Human Language Technologies - possibilities in the <fixed-case>EU</fixed-case> 5th Framework Programme for Research and Technological Development - BenteMaegaard + BenteMaegaard maegaard-1999-human @@ -105,7 +105,7 @@ Post-Workshop commentary: New languages are not virgin languages: <fixed-case>EAMT</fixed-case> ‘99 Workshop from an “eastern” point of view - VladislavKuboň + VladislavKuboň kubon-1999-post diff --git a/data/xml/1999.mtsummit.xml b/data/xml/1999.mtsummit.xml index 6677540dc1..2f1b4ffa33 100644 --- a/data/xml/1999.mtsummit.xml +++ b/data/xml/1999.mtsummit.xml @@ -44,7 +44,7 @@ Retrospect and prospect in computer-based translation - JohnHutchins + JohnHutchins 30-36 1999.mtsummit-1.5 At the last MT Summit conference this century, this paper looks back briefly at what has happened in the 50 years since MT began, reviews the present situation, and speculates on what the future may bring. Progress in the basic processes of computerized translation has not been as dramatic as developments in computer technology and software. There is still much scope for the improvement of the linguistic quality of MT output, which hopefully developments in both rule-based and corpus-based methods can bring. Greater impact on the future MT scenario will probably come from the expected huge increase in demand for on-line real-time communication in many languages, where quality may be less important than accessibility and usability. @@ -121,7 +121,7 @@ Machine translation in <fixed-case>K</fixed-case>orea - Se-YoungPark + Se-YoungPark Gil-RokOh 100-106 1999.mtsummit-1.15 @@ -146,7 +146,7 @@ Translation systems under the <fixed-case>C</fixed-case>-<fixed-case>STAR</fixed-case> framework - AlexWaibel + AlexWaibel 121-124 1999.mtsummit-1.18 This talk will review our work on Speech Translation under the recent worldwide C-STAR demonstration. C-STAR is the Consortium for Speech Translation Advanced Research and now includes 6 partners and 20 partner/affiliate laboratories around the world. The work demonstrated concludes the second phase of the consortium, which has focused on translating conversational spontaneous speech as opposed to well formed, well structured text. As such, much of the work has focused on exploiting semantic and pragmatic constraints derived from the task domain and dialog situation to produce an understandable translation. Six partners have connected their respective systems with each other and allowed travel related spoken dialogs to provide communication between each of them. A common Interlingua representation was developed and used between the partners to make this multilingual deployment possible. The systems were also complemented by the introduction of Web based shared workspaces that allow one user in one country to communicate pictures, documents, sounds, tables, etc. to the other over the Web while referring to these documents in the dialog. Some of the partners' systems were also deployed in wearable situations, such as a traveler exploring a foreign city. In this case speech and language technology was installed on a wearable computer with a small hand-held display. It was used to provide language translation as well as human-machine information access for the purpose of navigation (using GPS localization) and tour guidance. This combination of human-machine and human-machine-human dialogs could allow a user explore a foreign environment more effectively by resorting to human-machine and human-human dialogs wherever most appropriate. @@ -154,7 +154,7 @@ A research perspective on how to democratize machine translation and translation aids aiming at high quality final output - ChristianBoitet + ChristianBoitet 125-133 1999.mtsummit-1.19 Machine Translation (MT) systems and Translation Aids (TA) aiming at cost-effective high quality final translation are not yet usable by small firms, departments and individuals, and handle only a few languages and language pairs. This is due to a variety of reasons, some of them not frequently mentioned. But commercial, technical and cultural reasons make it mandatory to find ways to democratize MT and TA. This goal could be attained by: (1) giving users, free of charge, TA client tools and server resources in exchange for the permission to store and refine on the server linguistic resources produced while using TA; (2) establishing a synergy between MT and TA, in particular by using them jointly in translation projects where translators codevelop the lexical resources specific to MT; (3) renouncing the illusion of fully automatic general purpose high quality MT (FAHQMT) and go for semi-automaticity (SAHQMT), where user participation, made possible by recent technical network-oriented advances, is used to solve ambiguities otherwise computationnally unsolvable due to the impossibility, intractability or cost of accessing the necessary knowledge; (4) adopting a hybrid (symbolic & numerical) and "pivot" approach for MT, where pivot lexemes arc UNL or UNL inspired English-oriented denotations of (sets of) interlingual acceptions or word/term senses, and the rest of the representation of utterances is either fully abstract and interlingual as in UNL, or, less ambitiously but more realistically, obtained by adding to an abstract English multilevel structure features underspecified in English but essential for other languages, including minority languages. @@ -194,7 +194,7 @@ A scalable cross-language metasearch architecture for multilingual information access on the Web - YoshihikoHayashi + YoshihikoHayashi GenichiroKikui ToshiakiIwadera 157-164 @@ -204,7 +204,7 @@ Complementing dictionary-based query translations with corpus statistics for cross-language <fixed-case>IR</fixed-case> - Sung HyonMyaeng + Sung HyonMyaeng Mung-GilJang 165-174 1999.mtsummit-1.25 @@ -213,7 +213,7 @@ Machine translation for the next century - Jun-ichiTsujii + Jun-ichiTsujii 175-176 1999.mtsummit-1.26 The panel intends to pick up some of the issues discussed in the Summit and discuss them further in the final session from broader perspectives. Since the Summit has not even started yet, I will just enumerate in this paper a list of possible perspectives on MT that I hope are relevant to our discussion. @@ -229,7 +229,7 @@ Sharing dictionaries among <fixed-case>MT</fixed-case> users by common formats and social filtering framework - Shin-ichiroKamei + Shin-ichiroKamei 180-181 1999.mtsummit-1.28 MT users have to build "user dictionaries" in order to obtain high-quality translation results. However, building dictionaries needs time and labor. In order to meet the speed of the information flow in the global network society, we need to have common formats for sharing dictionaries among different MT systems, and a new way of dictionary authorization, that is "social filtering". @@ -259,9 +259,9 @@ <fixed-case>MT</fixed-case> evaluation MargaretKing - EduardHovy - Benjamin K.Tsou - JohnWhite + EduardHovy + Benjamin K.Tsou + JohnWhite YusoffZaharin 197-207 1999.mtsummit-1.31 @@ -270,7 +270,7 @@ Applying <fixed-case>TDMT</fixed-case> to abstracts on science and technology - HidekiKashioka + HidekiKashioka HirokoOhta YoshikoShirokizawa KazutakaTakao @@ -281,8 +281,8 @@ <fixed-case>UNL</fixed-case>-<fixed-case>F</fixed-case>rench deconversion as transfer & generation from an interlingua with possible quality enhancement through offline human interaction - GillesSérasset - ChristianBoitet + GillesSérasset + ChristianBoitet 220-228 1999.mtsummit-1.33 We present the architecture of the UNL-French deconverter, which "generates" from the UNL interlingua by first "localizing" the UNL form for French, within UNL, and then applying slightly adapted but classical transfer and generation techniques, implemented in GETA's Ariane-G5 environment, supplemented by some UNL-specific tools. Online interaction can be used during deconversion to enhance output quality and is now used for development purposes. We show how interaction could be delayed and embedded in the postedition phase, which would then interact not directly with the output text, but indirectly with several components of the deconverter. Interacting online or offline can improve the quality not only of the utterance at hand, but also of the utterances processed later, as various preferences may be automatically changed to let the deconverter "learn". @@ -290,13 +290,13 @@ Solutions to problems inherent in spoken-language translation: the <fixed-case>ATR</fixed-case>-<fixed-case>MATRIX</fixed-case> approach - EiichiroSumita + EiichiroSumita SetsuoYamada KazuhideYamamoto - MichaelPaul - HidekiKashioka + MichaelPaul + HidekiKashioka KaiIshikawa - SatoshiShirai + SatoshiShirai 229-235 1999.mtsummit-1.34 ATR has built a multi-language speech translation system called ATR-MATRIX. It consists of a spoken-language translation subsystem, which is the focus of this paper, together with a highly accurate speech recognition subsystem and a high-definition speech synthesis subsystem. This paper gives a road map of solutions to the problems inherent in spoken-language translation. Spoken-language translation systems need to tackle difficult problems such as ungrammaticality. contextual phenomena, speech recognition errors, and the high-speeds required for real-time use. We have made great strides towards solving these problems in recent years. Our approach mainly uses an example-based translation model called TDMT. We have added the use of extra-linguistic information, a decision tree learning mechanism, and methods dealing with recognition errors. @@ -314,7 +314,7 @@ Example-based machine translation based on the synchronous <fixed-case>SSTC</fixed-case> annotation schema - Mosleh H.Al-Adhaileh + Mosleh H.Al-Adhaileh Tang EnyaKong 244-249 1999.mtsummit-1.36 @@ -372,11 +372,11 @@ Study on evaluation of <fixed-case>WWW</fixed-case> <fixed-case>MT</fixed-case> systems ShinichiroMiyazawa - ShoichiYokoyama + ShoichiYokoyama MasakiMatsudaira AkiraKumano ShujiKodama - HidekiKashioka + HidekiKashioka YoshikoShirokizawa YasuoNakajima 290-298 @@ -397,7 +397,7 @@ Machine translation for information access across the language barrier: the <fixed-case>M</fixed-case>u<fixed-case>ST</fixed-case> system - Chin-YewLin + Chin-YewLin 308-316 1999.mtsummit-1.45 In this paper we describe the design and implementation of MuST, a multilingual information retrieval, summarization, and translation system. MuST integrates machine translation and other text processing services to enable users to perform cross-language information retrieval using available search services such as commercial Internet search engines. To handle non-standard languages, a new Internet indexing agent can be deployed, specialized local search services can be built, and shallow MT can be added to provide useful functionality. A case study of augmenting MuST with Indonesian is included. MuST adopts ubiquitous web browsers as its primary user interface, and provides tightly integrated automated shallow translation and user biased summarization to help users quickly judge the relevance of documents. @@ -415,7 +415,7 @@ Interactive <fixed-case>MT</fixed-case> as support for non-native language authoring SvetlanaSheremetyeva - SergeiNirenburg + SergeiNirenburg 324-330 1999.mtsummit-1.47 The paper describes an approach to developing an interactive MT system for translating technical texts on the example of translating patent claims between Russian and English. The approach conforms to the human-aided machine translation paradigm. The system is meant for a source language (SL) speaker who does not know the target language (TL). It consists of i) an analysis module which includes a submodule of interactive syntactic analysis of SL text and a submodule of fully automated morphological analysis, ii) an automatic module for transferring the lexical and partially syntactic content of SL text into a similar content of the TL text and iii) a fully automated TL text generation module which relies on knowledge about the legal format of TL patent claims. An interactive analysis module guides the user through a sequence of SL analysis procedures, as a result of which the system produces a set of internal knowledge structures which serve as input to the TL text generation. Both analysis and generation rely heavily on the analysis of the sublanguage of patent claims. The model has been developed for English and Russian as both SLs and TLs but is readily extensible to other languages. @@ -451,7 +451,7 @@ The <fixed-case>ELAN</fixed-case> <fixed-case>S</fixed-case>lovene-<fixed-case>E</fixed-case>nglish aligned corpus - TomazErjavec + TomazErjavec 349-357 1999.mtsummit-1.51 Multilingual parallel corpora are a basic resource for research and development of MT. Such corpora are still scarce, especially for lower-diffusion languages. The paper presents a sentence-aligned tokenised Slovene-English corpus, developed in the scope of the EU ELAN project. The corpus contains 1 million words from fifteen recent terminology-rich texts and is encoded according to the Guidelines for Text Encoding and Interchange (TEI). Our document type definition is a parametrisation of the TEI which directly encodes translation units of the bi-texts. in a manner similar to that of translation memories. The corpus is aimed as a widely-distributable dataset for language engineering and for translation and terminology studies. The paper describes the compilation of the corpus, its composition, encoding and availability. We highlight the corpus acquisition and distribution bottlenecks and present our solutions. These have to do with the workflow in the project, and. not unrelatedly, with the encoding scheme for the corpus. @@ -459,8 +459,8 @@ Harmonised large-scale syntactic/semantic lexicons: a <fixed-case>E</fixed-case>uropean multilingual infrastructure - NicolettaCalzolari - AntonioZampolli + NicolettaCalzolari + AntonioZampolli 358-365 1999.mtsummit-1.52 The paper aims at providing an overview of the situation of Language Resources (LR) in Europe, in particular as emerging from a few European projects regarding the construction of large-scale harmonised resources to be used for many applicative purpose, also of multilingual nature. An important research aspect of the projects is given by the very fact that the large enterprise described is, at our knowledge, the first attempt at developing wide-coverage lexicons for so many languages (12 European languages), with a harmonised common model, and with encoding of structured "semantic types" and semantic (subcategorisation) frames on a large scale. Reaching a common agreed model grounded on sound theoretical approaches within a very large consortium is in itself a challenging task. The actual lexicons will then provide a framework for testing and evaluating the maturity of the current state-of-the-art in lexical semantics grounded on, and connected to. a syntactic foundation. Another research aspect is provided by the recognition of the necessity of accompanying these "static" lexicons with dynamic means of acquiring lexical information from large corpora. This is one of the challenging research aspects of a global strategy for building a large and useful multilingual LR infrastructure. @@ -477,7 +477,7 @@ A pipelined multi-engine approach to <fixed-case>C</fixed-case>hinese-to-<fixed-case>K</fixed-case>orean machine translation: <fixed-case>MATES</fixed-case>/<fixed-case>CK</fixed-case> MinZhang - Key-SunChoi + Key-SunChoi 375-379 1999.mtsummit-1.54 This paper presents MATES/CK, a Chinese-to-Korean machine translation system. We introduce the design philosophy, component modules, implementation and some other aspects of MATES/CK system in this paper. @@ -499,7 +499,7 @@ Rapid development of translation tools JanAmtrup KarineMegerdoomian - RemiZajac + RemiZajac 385-389 1999.mtsummit-1.56 The Computing Research Laboratory is currently developing technologies that allow rapid deployment of automatic translation capabilities. These technologies are designed to handle low-density languages for which resources, be that human informants or data in electronically readable form, are scarce. All tools are built in an incremental fashion, such that some simple tools (a bilingual dictionary or a glosser) can be delivered early in the development to support initial analysis tasks. More complex applications can be fielded in successive functional versions. The technology we demonstrate has first been applied to Persian-English machine translation within the Shiraz project and is currently extended to cover languages such as Arabic, Japanese, Korean and others. @@ -507,7 +507,7 @@ The use of abstracted knowledge from an automatically sense-tagged corpus for lexical transfer ambiguity resolution - Hui-FengLi + Hui-FengLi Namwon Heo. KyounghiMoon Jong-HyeokLee 390-396 @@ -545,9 +545,9 @@ Using a target language model for domain independent lexical disambiguation - JimCowie + JimCowie YevgenyLudovik - SergeiNirenburg + SergeiNirenburg 417-420 1999.mtsummit-1.61 In this paper we describe a lexical disambiguation algorithm based on a statistical language model we call maximum likelihood disambiguation. The maximum likelihood method depends solely on the target language. The model was trained on a corpus of American English newspaper texts. Its performance was tested using output from a transfer based translation system between Turkish and English. The method is source language independent, and can be used for systems translating from any language into English. @@ -565,7 +565,7 @@ Compound noun decomposition using a <fixed-case>M</fixed-case>arkov model JongwooLee Byoung-TakZhang - Yung TaekKim + Yung TaekKim 427-431 1999.mtsummit-1.63 A statistical method for compound noun decomposition is presented. Previous studies on this problem showed some statistical information are helpful. But applying statistical information was not so systemic that performance depends heavily on the algorithm and some algorithms usually have many separated steps. In our work statistical information is collected from manually decomposed compound noun corpus to build a Markov model for composition. Two Markov chains representing statistical information are assumed independent: one for the sequence of participants' lengths and another for the sequence of participants ' features. Besides Markov assumptions, least participants preference assumption also is used. These two assumptions enable the decomposition algorithm to be a kind of conditional dynamic programming so that efficient and systemic computation can be performed. When applied to test data of size 5027, we obtained a precision of 98.4%. @@ -576,9 +576,9 @@ Sung-KwonChoi TaewanKim SanghwaYuh - Han-MinJung + Han-MinJung Chul-MinSim - Sang-KyuPark + Sang-KyuPark 432-437 1999.mtsummit-1.64 The previous English-Korean MT system that have been developed in Korea have dealt with only written text as translation object. Most of them enumerated a following list of the problems that had not seemed to be easy to solve in the near future : 1) processing of non-continuous idiomatic expressions 2) reduction of too many POS or structural ambiguities 3) robust processing for long sentence and parsing failure 4) selecting correct word correspondence between several alternatives. The problems can be considered as important factors that have influence on the translation quality of machine translation system. This paper describes not only the solutions of problems of the previous English-to-Korean machine translation systems but also the HTML tags management between two structurally different languages, English and Korean. Through the solutions we translate successfully English web documents into Korean one in the English-to-Korean web translator "FromTo/Web-EK" which has been developed from 1997. @@ -624,7 +624,7 @@ Byong-RaeRyu YoungkilKim SanghwaYuh - SangkyuPark + SangkyuPark 469-475 1999.mtsummit-1.69 In this paper we describe and experimentally evaluate FromTo K/E, a rule-based Korean-English machine translation system adapting transfer methodology. In accordance with the view that a successful Korean-English machine translation system presumes a highly efficient robust Korean parser, we develop a parser reinforced with "Fail Softening", i.e. the long sentence segmentation and the recovery of failed parse trees. To overcome the language-typological differences between Korean and English, we adopt a powerful module for processing Korean multi-word lexemes and Korean idiomatic expressions. Prior to parsing Korean sentences, furthermore, we try to resolve the ambiguity of words with unknown grammatical functions on the basis of the collocation and subcategorization information. The results of the experimental evaluation show that the degree of understandability for sample 2000 sentences amounts to 2.67, indicating that the meaning of the translated English sentences is almost clear to users, but the sentences still include minor grammatical or stylistic errors up to max. 30% of the whole words. @@ -648,7 +648,7 @@ <fixed-case>WEBTRAN</fixed-case>: a controlled language machine translation system for building multilingual services on <fixed-case>I</fixed-case>nternet - AarnoLehtola + AarnoLehtola JarnoTenni CatherineBounsaythip KristiinaJaaranen @@ -658,7 +658,7 @@ Improvement of translation quality of <fixed-case>E</fixed-case>nglish newspaper headlines by automatic preediting - TakehikoYoshimi + TakehikoYoshimi IchikoSata 496-500 1999.mtsummit-1.73 @@ -668,7 +668,7 @@ Transfer in experience-guided machine translation GangZhao - JunichiTsujii + JunichiTsujii 501-508 1999.mtsummit-1.74 Experience-Guided Machine Translation (EGMT) seeks to represent the translators' knowledge of translation as experiences and translates by analogy. The transfer in EGMT finds the experiences most similar to a new text and its parts, segments it into units of translation and translates them by analogy to the experiences and then assembles them into a whole. A research prototype of analogical transfer from Chinese to English is built to prove the viability of the approach in the exploration of new architecture of machine translation. The paper discusses how the experiences are represented and selected with respect to a new text. It describes how units of translation are defined, partial translation is derived and composed into a whole. @@ -677,7 +677,7 @@ Example-based machine translation of part-of-speech tagged sentences by recursive division TantelyAndriamanankasina - KenjiAraki + KenjiAraki KojiTochinai 509-517 1999.mtsummit-1.75 @@ -703,7 +703,7 @@ Sources of linguistic knowledge for minority languages - Harold L.Somers + Harold L.Somers 531-537 1999.mtsummit-1.78 Language Engineering (LE) products and resources for the world’s “major” languages are steadily increasing, but there remains a major gap as regards less widely-used languages. This paper considers the current situation regarding LE resources for some of the languages in question, and some proposals for rectifying this situation are made, including techniques based on adapting existing resources and “knowledge extraction” techniques from machine-readable corpora. @@ -712,7 +712,7 @@ <fixed-case>BITS</fixed-case>: a method for bilingual text search over the Web XiaoyiMa - Mark Y.Liberman + Mark Y.Liberman 538-542 1999.mtsummit-1.79 Parallel corpus are valuable resource for machine translation, multi-lingual text retrieval, language education and other applications, but for various reasons, its availability is very limited at present. Noticed that the World Word Web is a potential source to mine parallel text, researchers are making their efforts to explore the Web in order to get a big collection of bitext. This paper presents BITS (Bilingual Internet Text Search), a system which harvests multilingual texts over the World Wide Web with virtually no human intervention. The technique is simple, easy to port to any language pairs, and with high accuracy. The results of the experiments on German-English pair proved that the method is very successful. @@ -738,9 +738,9 @@ A new approach to the translating telephone - RobertFrederking + RobertFrederking ChristopherHogan - AlexanderRudnicky + AlexanderRudnicky 556-563 1999.mtsummit-1.82 The Translating Telephone has been a major goal of speech translation for many years. Previous approaches have attempted to work from limited-domain, fully-automatic translation towards broad-coverage, fully-automatic translation. We are approaching the problem from a different direction: starting with a broad-coverage but not fully-automatic system, and working towards full automation. We believe that working in this direction will provide us with better feedback, by observing users and collecting language data under realistic conditions, and thus may allow more rapid progress towards the same ultimate goal. Our initial approach relies on the wide-spread availability of Internet connections and web browsers to provide a user interface. We describe our initial work, which is an extension of the Diplomat wearable speech translator. @@ -756,13 +756,13 @@ Quantitative evaluation of machine translation using two-way <fixed-case>MT</fixed-case> - ShoichiYokoyama + ShoichiYokoyama AkiraKumano MasakiMatsudaira YoshikoShirokizawa MutsumiKawagoe ShujiKodama - HidekiKashioka + HidekiKashioka TerumasaEhara ShinichiroMiyazawa YasuoNakajima @@ -773,9 +773,9 @@ Task-based evaluation for machine translation - Jennifer B.Doyon + Jennifer B.Doyon Kathryn B.Taylor - John S.White + John S.White 574-578 1999.mtsummit-1.85 In an effort to reduce the subjectivity, cost, and complexity of evaluation methods for machine translation (MT) and other language technologies, task-based assessment is examined as an alternative to metrics-based in human judgments about MT, i.e., the previously applied adequacy, fluency, and informativeness measures. For task-based evaluation strategies to be employed effectively to evaluate languageprocessing technologies in general, certain key elements must be known. Most importantly, the objectives the technology’s use is expected to accomplish must be known, the objectives must be expressed as tasks that accomplish the objectives, and then successful outcomes defined for the tasks. For MT, task-based evaluation is correlated to a scale of tasks, and has as its premise that certain tasks are more forgiving of errors than others. In other words, a poor translation may suffice to determine the general topic of a text, but may not permit accurate identification of participants or the specific event. The ordering of tasks according to their tolerance for errors, as determined by actual task outcomes provided in this paper, is the basis of a scale and repeatable process by which to measure MT systems that has advantages over previous methods. @@ -843,7 +843,7 @@ Linking translation memories with example-based machine translation MichaelCarl - SilviaHansen + SilviaHansen 617-624 1999.mtsummit-1.92 The paper reports on experiments which compare the translation outcome of three corpus-based MT systems, a string-based translation memory (STM), a lexeme-based translation memory (LTM) and the example-based machine translation (EBMT) system EDGAR. We use a fully automatic evaluation method to compare the outcome of each MT system and discuss the results. We investigate the benefits for the linkage of different MT strategies such as TMsystems and EBMT systems. @@ -859,8 +859,8 @@ Resolving category ambiguity of non-text symbols in <fixed-case>M</fixed-case>andarin text - Feng-LongHwang - Ming-ShingYu + Feng-LongHwang + Ming-ShingYu 633-640 1999.mtsummit-1.94 hwang-yu-1999-resolving @@ -876,7 +876,7 @@ A multilevel framework for incremental development of <fixed-case>MT</fixed-case> systems - RemiZajac + RemiZajac 646-653 1999.mtsummit-1.96 We describe a Machine Translation framework aimed at the rapid development of large scale robust machine translation systems for assimilation purposes, where the MT system is incorporated as one of the tools in an analyst’s workstation. The multilevel architecture of the system is designed to enable early delivery of functional translation capabilities and incremental improvement of quality. A crucial aspect of the framework is a careful articulation of a software architecture, a linguistic architecture and an incremental development process of linguistic knowledge. diff --git a/data/xml/1999.tc.xml b/data/xml/1999.tc.xml index b2fb7d6f64..40f4b183f5 100644 --- a/data/xml/1999.tc.xml +++ b/data/xml/1999.tc.xml @@ -43,7 +43,7 @@ A Multi-level Framework for Memory-Based Translation Aid Tools - SteliosPiperidis + SteliosPiperidis ChristosMalavazos IoannisTriantafyllou 1999.tc-1.6 @@ -75,7 +75,7 @@ A Building Blocks Approach to Translation Memory - KevinMcTait + KevinMcTait MaeveOlohan ArturoTrujillo 1999.tc-1.11 @@ -83,7 +83,7 @@ <fixed-case>MABL</fixed-case>e: A Multi-lingual Authoring Tool for Business Letters - JohnTait + JohnTait JeremyEllman DiomidisSpinelis 1999.tc-1.12 @@ -104,8 +104,8 @@ Integrating Translation Technologies Using <fixed-case>SALT</fixed-case> GerhardBudin - Alan K.Melby - Sue EllenWright + Alan K.Melby + Sue EllenWright DeryleLonsdale ArleLommel 1999.tc-1.15 diff --git a/data/xml/1999.tmi.xml b/data/xml/1999.tmi.xml index 6c27ad9795..af2632b3f9 100644 --- a/data/xml/1999.tmi.xml +++ b/data/xml/1999.tmi.xml @@ -10,7 +10,7 @@ Mental spaces, space builders and bilingual summarization of news reports - BarbaraGawronska + BarbaraGawronska JaanaAnttila Dan-IvarJacobsson 1999.tmi-1.1 @@ -26,7 +26,7 @@ Adding linguistic knowledge to a lexical example-based translation system - Ralf D.Brown + Ralf D.Brown 1999.tmi-1.3 brown-1999-adding @@ -42,7 +42,7 @@ Learning, forgetting and remembering: statistical support for rule-based <fixed-case>MT</fixed-case> OliverStreiter - Leonid L.Iomdin + Leonid L.Iomdin MunpyoHong UteHauck 1999.tmi-1.5 @@ -67,8 +67,8 @@ Bilingual clustering using monolingual algorithms - SergioBarrachina - Juan MiguelVilar + SergioBarrachina + Juan MiguelVilar 1999.tmi-1.8 barrachina-vilar-1999-bilingual @@ -81,7 +81,7 @@ A language-neutral sparse-data algorithm for extracting translation patterns - KevinMcTait + KevinMcTait ArturoTrujillo 1999.tmi-1.10 mctait-trujillo-1999-language @@ -101,13 +101,13 @@ Errors of omission in translation - GrahamRussell + GrahamRussell 1999.tmi-1.13 russell-1999-errors Profiling translation projects: an essential part of routing translations - Nancy L.Underwood + Nancy L.Underwood BartJongejan 1999.tmi-1.14 underwood-jongejan-1999-profiling @@ -115,8 +115,8 @@ Lexical selection with a target language monolingual corpus and an <fixed-case>MRD</fixed-case> Hyun AhLee - Jong C.Park - Gil ChangKim + Jong C.Park + Gil ChangKim 1999.tmi-1.15 lee-etal-1999-lexical @@ -152,14 +152,14 @@ Argument status in <fixed-case>J</fixed-case>apanese verb sense disambiguation - TimothyBaldwin + TimothyBaldwin HozumiTanaka 1999.tmi-1.20 baldwin-tanaka-1999-argument A valency dictionary architecture for Machine Translation - TimothyBaldwin + TimothyBaldwin FrancisBond BenHutchinson 1999.tmi-1.21 @@ -168,7 +168,7 @@ Multiple strategies for automatic disambiguation in technical translation TerukoMitamura - EricNyberg + EricNyberg EnriqueTorrejon RobertIgo 1999.tmi-1.22 @@ -177,7 +177,7 @@ Pipelined multi-engine Machine Translation: accomplishment of <fixed-case>MATES</fixed-case>/<fixed-case>CK</fixed-case> system MinZhang - Key-SunChoi + Key-SunChoi 1999.tmi-1.23 zhang-choi-1999-pipelined-multi diff --git a/data/xml/2000.amta.xml b/data/xml/2000.amta.xml index d2496498c0..fb7db418dc 100644 --- a/data/xml/2000.amta.xml +++ b/data/xml/2000.amta.xml @@ -12,18 +12,18 @@ Ontological semantics - SergeiNirenburg + SergeiNirenburg nirenburg-2000-ontological A gentle introduction to <fixed-case>MT</fixed-case>: theory and current practice - EduardHovy + EduardHovy hovy-2000-gentle Controlled languages TerukoMitamura - EricNyberg + EricNyberg mitamura-nyberg-2000-controlled @@ -39,7 +39,7 @@ <fixed-case>MT</fixed-case>ranslatability ArendseBernth - ClaudiaGdaniec + ClaudiaGdaniec bernth-gdaniec-2000-mtranslatability @@ -55,8 +55,8 @@ Building a <fixed-case>C</fixed-case>hinese-<fixed-case>E</fixed-case>nglish mapping between verb concepts for multilingual applications - Bonnie J.Dorr - Gina-AnneLevow + Bonnie J.Dorr + Gina-AnneLevow DekangLin 1-12 https://link.springer.com/chapter/10.1007/3-540-39965-8_1 @@ -84,7 +84,7 @@ A self-learning method of parallel texts alignment AntónioRibeiro - GabrielLopes + GabrielLopes JoãoMexia 30-39 https://link.springer.com/chapter/10.1007/3-540-39965-8_4 @@ -93,11 +93,11 @@ Handling structural divergences and recovering dropped arguments in a <fixed-case>K</fixed-case>orean/<fixed-case>E</fixed-case>nglish machine translation system - Chung-hyeHan - BenoitLavoie - MarthaPalmer - OwenRambow - RichardKittredge + Chung-hyeHan + BenoitLavoie + MarthaPalmer + OwenRambow + RichardKittredge TanyaKorelsky NariKim MyungheeKim @@ -109,11 +109,11 @@ A machine translation system from <fixed-case>E</fixed-case>nglish to <fixed-case>A</fixed-case>merican <fixed-case>S</fixed-case>ign <fixed-case>L</fixed-case>anguage LiweiZhao - KarinKipper + KarinKipper WilliamSchuler ChristianVogler NormanBadler - MarthaPalmer + MarthaPalmer 54-67 https://link.springer.com/chapter/10.1007/3-540-39965-8_6 Research in computational linguistics, computer graphics and autonomous agents has led to the development of increasingly sophisticated communicative agents over the past few years, bringing new perspective to machine translation research. The engineering of language- based smooth, expressive, natural-looking human gestures can give us useful insights into the design principles that have evolved in natural communication between people. In this paper we prototype a machine translation system from English to American Sign Language (ASL), taking into account not only linguistic but also visual and spatial information associated with ASL signs. @@ -138,7 +138,7 @@ The effect of source analysis on translation confidence ArendseBernth - Michael C.McCord + Michael C.McCord 89-99 https://link.springer.com/chapter/10.1007/3-540-39965-8_9 Translations produced by an MT system can automatically be assigned a number that reflects the MT system’s confidence in their quality. We describe the design of such a confidence index, with focus on the contribution of source analysis, which plays a crucial role in many MT systems, including ours. Various problematic areas of source analysis are identified, and their impact on the overall confidence index is given. We will describe two methods of training the confidence index, one by hand-tuning of the heuristics, the other by linear regression analysis. @@ -146,7 +146,7 @@ Contemplating automatic <fixed-case>MT</fixed-case> evaluation - John S.White + John S.White 100-108 https://link.springer.com/chapter/10.1007/3-540-39965-8_10 Researchers, developers, translators and information consumers all share the problem that there is no accepted standard for machine translation. The problem is much further confounded by the fact that MT evaluations properly done require a considerable commitment of time and resources, an anachronism in this day of cross-lingual information processing when new MT systems may developed in weeks instead of years. This paper surveys the needs addressed by several of the classic “types” of MT, and speculates on ways that each of these types might be automated to create relevant, near-instantaneous evaluation of approaches and systems. @@ -155,7 +155,7 @@ How are you doing? A look at <fixed-case>MT</fixed-case> evaluation MichelleVanni - FlorenceReeder + FlorenceReeder 109-116 https://link.springer.com/chapter/10.1007/3-540-39965-8_11 Machine Translation evaluation has been more magic and opinion than science. The history of MT evaluation is long and checkered - the search for objective, measurable, resource-reduced methods of evaluation continues. A recent trend towards task-based evaluation inspires the question - can we use methods of evaluation of language competence in language learners and apply them reasonably to MT evaluation? This paper is the first in a series of steps to look at this question. In this paper, we will present the theoretical framework for our ideas, the notions we ultimately aim towards and some very preliminary results of a small experiment along these lines. @@ -163,9 +163,9 @@ Recycling annotated parallel corpora for bilingual document composition - ArantzaCasillas + ArantzaCasillas JosebaAbaitua - RaquelMartínez + RaquelMartínez 117-126 https://link.springer.com/chapter/10.1007/3-540-39965-8_12 Parallel corpora enriched with descriptive annotations facilitate multilingual authoring development. Departing from an annotated bitext we show how SGML markup can be recycled to produce complementary language resources. On the one hand, several translation memory databases together with glossaries of proper nouns have been produced. On the other, DTDs for source and target documents have been derived and put into correspondence. This paper discusses how these resources have been automatically generated and applied to an interactive bilingual authoring system. This tool is capable of handling a substantial proportion of text both in the composition and translation of structured documents. @@ -182,7 +182,7 @@ What’s been forgotten in translation memory ElliottMacklovitch - GrahamRussell + GrahamRussell 137-146 https://link.springer.com/chapter/10.1007/3-540-39965-8_14 Although undeniably useful for the translation of certain types of repetitive document, current translation memory technology is limited by the rudimentary techniques employed for approximate matching. Such systems, moreover, incorporate no real notion of a document, since the databases that underlie them are essentially composed of isolated sentence strings. As a result, current TM products can only exploit a small portion of the knowledge residing in translators’ past production. This paper examines some of the changes that will have to be implemented if the technology is to be made more widely applicable. @@ -190,7 +190,7 @@ Understanding politics by studying weather: a cognitive approach to representation of <fixed-case>P</fixed-case>olish verbs of motion, appearance, and existence - BarbaraGawronska + BarbaraGawronska HannahDuczak 147-157 https://link.springer.com/chapter/10.1007/3-540-39965-8_15 @@ -200,7 +200,7 @@ Small but efficient: the misconception of high-frequency words in <fixed-case>S</fixed-case>candinavian translation PernillaDanielsson - KatarinaMühlenbock + KatarinaMühlenbock 158-168 https://link.springer.com/chapter/10.1007/3-540-39965-8_16 Machine translation has proved itself to be easier between languages that are closely related, such as German and English, while far apart languages, such as Chinese and English, encounter much more problems. The present study focuses upon Swedish and Norwegian; two languages so closely related that they would be referred to as dialects if it were not for the fact that they had a Royal house and an army connected to each of them. Despite their similarity though, some differences make the translation phase much less straight-forward than what could be expected. Taking the outset in sentence aligned parallel texts, this study aims at highlighting some of the differences, and to formalise the results. In order to do so, the texts have been aligned on smaller units, by a simple cognate alignment method. Not at all surprising, the longer words were easier to align, while shorter and often high-frequent words became a problem. Also when trying to align to a specific word sense in a dictionary, content words rendered better results. Therefore, we abandoned the use of single-word units, and searched for multi-word units whenever possible. This study reinforces the view that Machine Translation should rest upon methods based on multiword unit searches. @@ -211,7 +211,7 @@ ViolettaCavalli-Sforza KrzysztofCzuba TerukoMitamura - EricNyberg + EricNyberg 169-178 https://link.springer.com/chapter/10.1007/3-540-39965-8_17 We describe our experience in adapting an existing high- quality, interlingual, unidirectional machine translation system to a new domain and bidirectional translation for a new language pair (English and Italian). We focus on the interlingua design changes which were necessary to achieve high quality output in view of the language mismatches between English and Italian. The representation we propose contains features that are interpreted differently, depending on the translation direction. This decision simplified the process of creating the interlingua for individual sentences, and allows the system to defer mapping of language-specific features (such as tense and aspect), which are realized when the target syntactic feature structure is created. We also describe a set of problems we encountered in translating modal verbs, and discuss the representation of modality in our interlingua. @@ -220,7 +220,7 @@ Text meaning representation as a basis for representation of text interpretation StephenHelmreich - DavidFarwell + DavidFarwell 179-188 https://link.springer.com/chapter/10.1007/3-540-39965-8_18 In this paper we propose a representation for what we have called an interpretation of a text. We base this representation on TMR (Text Meaning Representation), an interlingual representation developed for Machine Translation purposes. A TMR consists of a complex feature-value structure, with the feature names and filler values drawn from an ontology, in this case, ONTOS, developed concurrently with TMR. We suggest on the basis of previous work, that a representation of an interpretation of a text must build on a TMR structure for the text in several ways: (1) by the inclusion of additional required features and feature values (which may themselves be complex feature structures); (2) by pragmatically filling in empty slots in the TMR structure itself; and (3) by supporting the connections between feature values by including, as part of the TMR itself, the chains of inferencing that link various parts of the structure. @@ -247,7 +247,7 @@ The <fixed-case>KANTOO</fixed-case> machine translation environment - EricNyberg + EricNyberg TerukoMitamura 192-195 https://link.springer.com/chapter/10.1007/3-540-39965-8_20 @@ -313,7 +313,7 @@ Is <fixed-case>MT</fixed-case> software documentation appropriate for <fixed-case>MT</fixed-case> users? DavidMowatt - HaroldSomers + HaroldSomers 223-238 https://link.springer.com/chapter/10.1007/3-540-39965-8_26 This paper discusses an informal methodology for evaluating Machine Translation software documentation with reference to a case study, in which a number of currently available MT packages are evaluated. Different types of documentation style are discussed, as well as different user profiles. It is found that documentation is often inadequate in identifying the level of linguistic background and knowledge necessary to use translation software, and in explaining technical (linguistic) terms needed to use the software effectively. In particular, the level of knowledge and training needed to use the software is often incompatible with the user profile implied by the documentation. Also, guidance on how to perform more complex tasks, which may be especially idiosyncratic, is often inadequate or missing altogether. @@ -332,13 +332,13 @@ Machine translation systems: <fixed-case>E</fixed-case>-K, K-<fixed-case>E</fixed-case>, <fixed-case>J</fixed-case>-K, K-<fixed-case>J</fixed-case> Yu SeopKim - Sung DongKim + Sung DongKim Seong BaePark Jong WooLee Jeong HoChang Kyu BaekHwang Min OJang - Yung TaekKim + Yung TaekKim 248-251 https://link.springer.com/chapter/10.1007/3-540-39965-8_28 We present four kinds of machine translation system in this description: E-K (English to Korean), K-E (Korean to English), J-K (Japanese to Korean), K-J (Korean to Japanese). Among these, E-K and K-J translation systems are published commercially, and the other systems have finished their development. This paper describes the structure and function of each system with figures and translation results. diff --git a/data/xml/2000.bcs.xml b/data/xml/2000.bcs.xml index 2ef5c37f8a..ae9452150f 100644 --- a/data/xml/2000.bcs.xml +++ b/data/xml/2000.bcs.xml @@ -11,16 +11,16 @@ Towards memory and template-based translation synthesis ChristosMalavazos - SteliosPiperidis - GeorgeCarayannis + SteliosPiperidis + GeorgeCarayannis 2000.bcs-1.1 malavazos-etal-2000-towards Building a lexicon for an <fixed-case>E</fixed-case>nglish-<fixed-case>B</fixed-case>asque <fixed-case>MT</fixed-case> system from heterogeneous wide-coverage dictionaries ArantxaDiaz de Ilarraza - AingeruMayor - KepaSarasola + AingeruMayor + KepaSarasola 2000.bcs-1.2 diaz-de-ilarraza-etal-2000-building @@ -29,7 +29,7 @@ IoannisTriantafyllou IasonDemiros ChristosMalavazos - SteliosPiperidis + SteliosPiperidis 2000.bcs-1.3 triantafyllou-etal-2000-alignment @@ -41,8 +41,8 @@ Effectiveness of layering translation rules based on transition networks in machine translation using inductive learning with genetic algorithms - HiroshiEchizen-ya - KenjiAraki + HiroshiEchizen-ya + KenjiAraki YoshioMomouchi KojiTochinai 2000.bcs-1.5 @@ -57,7 +57,7 @@ Learning machine translation strategies using commercial systems: discovering word reordering rules - Mikel L.Forcada + Mikel L.Forcada 2000.bcs-1.7 forcada-2000-learning @@ -77,7 +77,7 @@ An example-based <fixed-case>MT</fixed-case> system in news items domain from <fixed-case>E</fixed-case>nglish to <fixed-case>I</fixed-case>ndian languages - SivajiBandyopadhyay + SivajiBandyopadhyay 2000.bcs-1.10 bandyopadhyay-2000-example @@ -85,16 +85,16 @@ <fixed-case>EMILLE</fixed-case>: building a corpus of <fixed-case>S</fixed-case>outh <fixed-case>A</fixed-case>sian languages AnthonyMcEnery PaulBaker - RobGaizauskas - HamishCunningham + RobGaizauskas + HamishCunningham 2000.bcs-1.11 mcenery-etal-2000-emille Reusability of wide-coverage linguistic resources in the construction of multilingual technical documentation ArantxaDiaz de Ilarraza - AingeruMayor - KepaSarasola + AingeruMayor + KepaSarasola 2000.bcs-1.12 diaz-de-ilarraza-etal-2000-reusability @@ -113,65 +113,65 @@ Semi-automatic construction of multilingual lexicons - LynneCahill + LynneCahill 2000.bcs-1.15 cahill-2000-semi Evaluation of statistical tools for automatic extraction of lexical correspondences between parallel texts - OlivierKraif + OlivierKraif 2000.bcs-1.16 kraif-2000-evaluation Semantic approach to bridging reference resolution - RafaelMuñoz - MaximilianoSaiz-Noeda - ArmandoSuárez + RafaelMuñoz + MaximilianoSaiz-Noeda + ArmandoSuárez ManualPalomar 2000.bcs-1.17 munoz-etal-2000-semantic Evaluation environment for anaphora resolution - CatalinaBarbu - RuslanMitkov + CatalinaBarbu + RuslanMitkov 2000.bcs-1.18 barbu-mitkov-2000-evaluation <fixed-case>NLP</fixed-case> system oriented to anaphora resolution - MaximilianoSaiz-Noeda + MaximilianoSaiz-Noeda ManualPalomar - DavidFarwell + DavidFarwell 2000.bcs-1.19 saiz-noeda-etal-2000-nlp <fixed-case>LINGUA</fixed-case>: a robust architecture for text processing and anaphora resolution in <fixed-case>B</fixed-case>ulgarian - HristoTanev - RuslanMitkov + HristoTanev + RuslanMitkov 2000.bcs-1.20 tanev-mitkov-2000-lingua Grammar specification for the recognition of temporal expressions - EstelaSaquete - PatricioMartínez-Barco + EstelaSaquete + PatricioMartínez-Barco 2000.bcs-1.21 saquete-martinez-barco-2000-grammar <fixed-case>VASISTH</fixed-case>: an ellipsis resolution algorithm for <fixed-case>I</fixed-case>ndian languages - L.Sobha + L.Sobha B. N.Patnaik 2000.bcs-1.22 sobha-patnaik-2000-vasisth Generating personal profiles - JimCowie - SergeiNirenburg + JimCowie + SergeiNirenburg HugoMolina-Salgado 2000.bcs-1.23 cowie-etal-2000-generating @@ -184,9 +184,9 @@ Generating from a discourse model - RodolfoDelmonte + RodolfoDelmonte DarioBianchi - EmanuelePianta + EmanuelePianta 2000.bcs-1.25 delmonte-etal-2000-generating diff --git a/data/xml/2000.eamt.xml b/data/xml/2000.eamt.xml index de7c28355c..623d6d2226 100644 --- a/data/xml/2000.eamt.xml +++ b/data/xml/2000.eamt.xml @@ -11,7 +11,7 @@ Introduction - JohnHutchins + JohnHutchins 2000.eamt-1.1 hutchins-2000-introduction @@ -23,13 +23,13 @@ Extracting Terms and Terminological Collocations from the <fixed-case>ELAN</fixed-case> <fixed-case>S</fixed-case>lovene–<fixed-case>E</fixed-case>nglish Parallel Corpus - ŠpelaVintar + ŠpelaVintar 2000.eamt-1.3 vintar-2000-extracting Extracting Textual Associations in Part-of-Speech Tagged Corpora - GaëlDias + GaëlDias SylvieGuilloré José GabrielPereira Lopes 2000.eamt-1.4 @@ -37,28 +37,28 @@ Statistical Machine Translation - Franz JosefOch - HermannNey + Franz JosefOch + HermannNey 2000.eamt-1.5 och-ney-2000-statistical <fixed-case>POLENG</fixed-case>–Adjusting a Rule-Based <fixed-case>P</fixed-case>olish–<fixed-case>E</fixed-case>nglish Machine Translation System by Means of Corpus Analysis KrzysztofJassem - FilipGraliński + FilipGraliński GrzegorzKrynicki 2000.eamt-1.6 jassem-etal-2000-poleng <fixed-case>S</fixed-case>lovene–<fixed-case>E</fixed-case>nglish Datasets for <fixed-case>MT</fixed-case> - TomažErjavec + TomažErjavec 2000.eamt-1.7 erjavec-2000-slovene The <fixed-case>IAMT</fixed-case> Certification Initiative and Defining Translation System Categories - JohnHutchins + JohnHutchins 2000.eamt-1.8 hutchins-2000-iamt diff --git a/data/xml/2000.iwpt.xml b/data/xml/2000.iwpt.xml index 4e4b31f501..f4705560d5 100644 --- a/data/xml/2000.iwpt.xml +++ b/data/xml/2000.iwpt.xml @@ -43,7 +43,7 @@ Automatic Grammar Induction: Combining, Reducing and Doing Nothing EricBrill - John C.Henderson + John C.Henderson GraceNgai 1-5 2000.iwpt-1.2 @@ -68,9 +68,9 @@ A Bootstrapping Approach to Parser Development - IzaskunAldezabal - KoldoGojenola - KepaSarasola + IzaskunAldezabal + KoldoGojenola + KepaSarasola 17-28 2000.iwpt-1.5 This paper presents a robust parsing system for unrestricted Basque texts. It analyzes a sentence in two stages: a unification-based parser builds basic syntactic units such as NPs, PPs, and sentential complements, while a finite-state parser performs syntactic disambiguation and filtering of the results. The system has been applied to the acquisition of verbal subcategorization information, obtaining 66% recall and 87% precision in the determination of verb subcategorization instances. This information will be later incorporated to the parser, in order to improve its performance. @@ -78,10 +78,10 @@ New Tabular Algorithms for Parsing - Miguel A.Alonso + Miguel A.Alonso JorgeGraña ManuelVilares - Ericde la Clergerie + Ericde la Clergerie 29-40 2000.iwpt-1.6 We develop a set of new tabular parsing algorithms for Linear Indexed Grammars, including bottom-up algorithms and Earley-like algorithms with and without the valid prefix property, creating a continuum in which one algorithm can in turn be derived from another. The output of these algorithms is a shared forest in the form of a context-free grammar that encodes all possible derivations for a given input string. @@ -108,7 +108,7 @@ Automated Extraction of <fixed-case>TAG</fixed-case>s from the <fixed-case>Penn</fixed-case> <fixed-case>Treebank</fixed-case> JohnChen - K.Vijay-Shanker + K.Vijay-Shanker 65-76 2000.iwpt-1.9 The accuracy of statistical parsing models can be improved with the use of lexical information. Statistical parsing using Lexicalized tree adjoining grammar (LTAG), a kind of lexicalized grammar, has remained relatively unexplored. We believe that is largely in part due to the absence of large corpora accurately bracketed in terms of a perspicuous yet broad coverage LTAG. Our work attempts to alleviate this difficulty. We extract different LTAGs from the Penn Treebank. We show that certain strategies yield an improved extracted LTAG in terms of compactness, broad coverage, and supertagging accuracy. Furthermore, we perform a preliminary investigation in smoothing these grammars by means of an external linguistic resource, namely, the tree families of an XTAG grammar, a hand built grammar of English. @@ -116,7 +116,7 @@ From Cases to Rules and Vice Versa: Robust Practical Parsing With Analogy - Alex ChengyuFang + Alex ChengyuFang 77-88 2000.iwpt-1.10 This article describes the architecture of the Survey Parser and discusses two major components related to the analogy-based parsing of unrestricted English. Firstly, it discusses the automatic generation of a large declarative formal grammar from a corpus that has been syntactically analysed. Secondly, it describes analogy-based parsing that employs both the automatically learned rules and the database of cases to determine the syntactic structure of the input string. Statistics are presented to characterise the performance of the parsing system. @@ -124,7 +124,7 @@ A Transformation-based Parsing Technique With Anytime Properties - KilianFoth + KilianFoth IngoSchröder WolfgangMenzel 89-100 @@ -134,7 +134,7 @@ <fixed-case>SOUP</fixed-case>: A Parser for Real-world Spontaneous Speech - MarsalGavaldà + MarsalGavaldà 101-110 2000.iwpt-1.12 This paper describes the key features of SOUP, a stochastic, chart-based, top-down parser, especially engineered for real-time analysis of spoken language with very large, multi-domain semantic grammars. SOUP achieves flexibility by encoding context-free grammars, specified for example in the Java Speech Grammar Format, as probabilistic recursive transition networks, and robustness by allowing skipping of input words at any position and producing ranked interpretations that may consist of multiple parse trees. Moreover, SOUP is very efficient, which allows for practically instantaneous backend response. @@ -150,7 +150,7 @@ A Neural Network Parser that Handles Sparse Data - JamesHenderson + JamesHenderson 123-134 2000.iwpt-1.14 Previous work has demonstrated the viability of a particular neural network architecture, Simple Synchrony Networks, for syntactic parsing. Here we present additional results on the performance of this type of parser, including direct comparisons on the same dataset with a standard statistical parsing method, Probabilistic Context Free Grammars. We focus these experiments on demonstrating one of the main advantages of the SSN parser over the PCFG, handling sparse data. We use smaller datasets than are typically used with statistical methods, resulting in the PCFG finding parses for under half of the test sentences, while the SSN finds parses for all sentences. Even on the PCFG ‘s parsed half, the SSN performs better than the PCFG, as measure by recall and precision on both constituents and a dependency-like measure. @@ -158,8 +158,8 @@ A Context-free Approximation of <fixed-case>H</fixed-case>ead-driven <fixed-case>P</fixed-case>hrase <fixed-case>S</fixed-case>tructure <fixed-case>G</fixed-case>rammar - BerndKiefer - Hans-UlrichKrieger + BerndKiefer + Hans-UlrichKrieger 135-146 2000.iwpt-1.15 We present a context-free approximation of unification-based grammars, such as HPSG or PATR-II. The theoretical underpinning is established through a least fixpoint construction over a certain monotonic function. In order to reach a finite fixpoint, the concrete implementation can be parameterized in several ways , either by specifying a finite iteration depth, by using different restrictors, or by making the symbols of the CFG more complex adding annotations a la GPSG. We also present several methods that speed up the approximation process and help to limit the size of the resulting CF grammar. @@ -167,8 +167,8 @@ Optimal Ambiguity Packing in Context-free Parsers with Interleaved Unification - AlonLavie - Carolyn PensteinRosé + AlonLavie + Carolyn PensteinRosé 147-158 2000.iwpt-1.16 Ambiguity packing is a well known technique for enhancing the efficiency of context-free parsers. However, in the case of unification-augmented context-free parsers where parsing is interleaved with feature unification, the propagation of feature structures imposes difficulties on the ability of the parser to effectively perform ambiguity packing. We demonstrate that a clever heuristic for prioritizing the execution order of grammar rules and parsing actions can achieve a high level of ambiguity packing that is provably optimal. We present empirical evaluations of the proposed technique, performed with both a Generalized LR parser and a chart parser, that demonstrate its effectiveness. @@ -184,7 +184,7 @@ Improved Left-corner Chart Parsing for Large Context-free Grammars - Robert C.Moore + Robert C.Moore 171-182 2000.iwpt-1.18 We develop an improved form of left-corner chart parsing for large context-free grammars, introducing improvements that result in significant speed-ups more compared to previously-known variants of left corner parsing. We also compare our method to several other major parsing approaches, and find that our improved left-corner parsing method outperforms each of these across a range of grammars. Finally, we also describe a new technique for minimizing the extra information needed to efficiently recover parses from the data structures built in the course of parsing. @@ -209,7 +209,7 @@ An Efficient <fixed-case>LR</fixed-case> Parser Generator for <fixed-case>T</fixed-case>ree <fixed-case>A</fixed-case>djoining <fixed-case>G</fixed-case>rammars - Carlos A.Prolo + Carlos A.Prolo 207-218 2000.iwpt-1.21 The first published LR algorithm for Tree Adjoining Grammars (TAGs [Joshi and Schabes, 1996]) was due to Schabes and Vijay-Shanker [1990] . Nederhof [1998] showed that it was incorrect (after [Kinyon, 1997]), and proposed a new one. Experimenting with his new algorithm over the XTAG English Grammar [XTAG Research Group, 1998] he concluded that LR parsing was inadequate for use with reasonably sized grammars because the size of the generated table was unmanageable. Also the degree of conflicts is too high. In this paper we discuss issues involved with LR parsing for TAGs and propose a new version of the algorithm that, by maintaining the degree of prediction while deferring the “subtree reduction”, dramatically reduces both the average number of conflicts per state and the size of the parser. @@ -227,7 +227,7 @@ On the Use of Grammar Based Language Models for Statistical Machine Translation HassanSawaf KaiSchütz - HermannNey + HermannNey 231-241 2000.iwpt-1.23 In this paper, we describe some concepts of language models beyond the usually used standard trigram and use such language models for statistical machine translation. In statistical machine translation the language model is the a-priori knowledge source of the system about the target language. One important requirement for the language model is the correct word order, given a certain choice of words, and to score the translations generated by the translation model \textrm{Pr}(f_1^J/e^I_1), in view of the syntactic context. In addition to standard m-grams with long histories, we examine the use of Part-of-Speech based models as well as linguistically motivated grammars with stochastic parsing as a special type of language model. Translation results are given on the VERBMOBIL task, where translation is performed from German to English, with vocabulary sizes of 6500 and 4000 words, respectively. @@ -253,7 +253,7 @@ Parsing a Lattice with Multiple Grammars FuliangWeng - HelenMeng + HelenMeng Po ChuiLuk 266-277 2000.iwpt-1.26 @@ -262,7 +262,7 @@ Modular Unification-based Parsers - RémiZajac + RémiZajac JanAmtrup 278-290 2000.iwpt-1.27 @@ -296,11 +296,11 @@ Grammar Organization for Cascade-based Parsing in Information Extraction - FabioCiravegna - AlbertoLavelli + FabioCiravegna + AlbertoLavelli 297-298 2000.iwpt-1.31 - + ciravegna-lavelli-2000-grammar @@ -349,7 +349,7 @@ Exploiting Parallelism in Unification-based Parsing - Marcel P.van Lohuizen + Marcel P.van Lohuizen 309-310 2000.iwpt-1.37 Because of the nature of the parsing problem, unification-based parsers are hard to parallelize. We present a parallelization technique designed to cope with these difficulties. @@ -367,11 +367,11 @@ Uniquely Parsable Accepting Grammar Systems - CarlosMartín-Vide + CarlosMartín-Vide VictorMitrana 313-314 2000.iwpt-1.39 - + martin-vide-mitrana-2000-uniquely @@ -416,7 +416,7 @@ The Editing Distance in Shared Forest ManuelVilares - DavidCabrero + DavidCabrero Francisco J.Ribadas 323-324 2000.iwpt-1.44 diff --git a/data/xml/2000.tc.xml b/data/xml/2000.tc.xml index 1da7c950b2..ae3149bf8b 100644 --- a/data/xml/2000.tc.xml +++ b/data/xml/2000.tc.xml @@ -31,13 +31,13 @@ A Language Checker of Controlled Language and its Integration in a Documentation and Translation Workflow IngridAlmqvist - Anna SågvallHein + Anna SågvallHein 2000.tc-1.4 almqvist-hein-2000-language Evaluating Machine Translation: the Cloze Procedure Revisited - HaroldSomers + HaroldSomers ElizabethWild 2000.tc-1.5 somers-wild-2000-evaluating @@ -88,7 +88,7 @@ The <fixed-case>EU</fixed-case> <fixed-case>LE</fixed-case>4 <fixed-case>T</fixed-case>rans<fixed-case>R</fixed-case>outer Project - ReinhardSchäler + ReinhardSchäler 2000.tc-1.13 schaler-2000-eu diff --git a/data/xml/2001.jeptalnrecital.xml b/data/xml/2001.jeptalnrecital.xml index e3198ae44e..51db08dd25 100644 --- a/data/xml/2001.jeptalnrecital.xml +++ b/data/xml/2001.jeptalnrecital.xml @@ -48,7 +48,7 @@ Un corpus français arboré : quelques interrogations - AnneAbeillé + AnneAbeillé LionelClément AlexandraKinyon FrançoisToussenel @@ -81,7 +81,7 @@ Atelier <fixed-case>ATOLL</fixed-case> pour les grammaires d’arbres adjoints - FrançoisBarthélemy + FrançoisBarthélemy PierreBoullier PhilippeDeschamp LindaKaouane @@ -95,7 +95,7 @@ Modèle d’exploration contextuelle pour l’analyse sémantique de textes SlimBen Hazez - Jean-PierreDesclés + Jean-PierreDesclés Jean-LucMinel 73–82 Nous présentons dans cet article un modèle d’exploration contextuelle et une plate-forme logicielle qui permet d’accéder au contenu sémantique des textes et d’en extraire des séquences particulièrement pertinentes. L’objectif est de développer et d’exploiter des ressources linguistiques pour identifier dans les textes, indépendamment des domaines traités, certaines des relations organisatrices des connaissances ainsi que les organisations discursives mises en places par l’auteur. L’analyse sémantique du texte est guidée par le repérage d’indices linguistiques déclencheurs dont l’emploi est représentatif des notions étudiées. @@ -108,7 +108,7 @@ RomaricBesançon AntoineRozenknop Jean-CédricChappelier - MartinRajman + MartinRajman 83–91 Le sujet du présent article est l’intégration des sens portés par les mots en contexte dans une représentation vectorielle de textes, au moyen d’un modèle probabiliste. La représentation vectorielle considérée est le modèle DSIR, qui étend le modèle vectoriel (VS) standard en tenant compte à la fois des occurrences et des co-occurrences de mots dans les documents. L’intégration des sens dans cette représentation se fait à l’aide d’un modèle de Champ de Markov avec variables cachées, en utilisant une information sémantique dérivée de relations de synonymie extraites d’un dictionnaire de synonymes. 2001.jeptalnrecital-long.6 @@ -148,7 +148,7 @@ Etiquetage prosodique semi-automatique des corpus oraux EstelleCampione - JeanVéronis + JeanVéronis 122–131 La transcription manuelle de la prosodie est une tâche extrêmement coûteuse en temps, qui requiert des annotateurs très spécialisés, et qui est sujette à de multiples erreurs et une grande part de subjectivité. Une automatisation complète n’est pas envisageable dans l’état actuel de la technologie, mais nous présentons dans cette communication des outils et une méthodologie qui permettent une réduction substantielle du temps d’intervention manuelle, et améliorent l’objectivité et la cohérence du résultat. De plus, les étapes manuelles nécessaires ne demandent pas une expertise phonétique poussée et peuvent être menées à bien par des étudiants et des “linguistes de corpus”. 2001.jeptalnrecital-long.10 @@ -158,7 +158,7 @@ Grammaire à substitution d’arbre de complexité polynomiale : un cadre efficace pour <fixed-case>DOP</fixed-case> Jean-CédricChappelier - MartinRajman + MartinRajman 132–141 Trouver l’arbre d’analyse le plus probable dans le cadre du modèle DOP (Data-Oriented Parsing) — une version probabiliste de grammaire à substitution d’arbres développée par R. Bod (1992) — est connu pour être un problème NP-difficile dans le cas le plus général (Sima’an, 1996a). Cependant, si l’on introduit des restrictions a priori sur le choix des arbres élémentaires, on peut obtenir des instances particulières de DOP pour lesquelles la recherche de l’arbre d’analyse le plus probable peut être effectuée en un temps polynomial (par rapport à la taille de la phrase à analyser). La présente contribution se propose d’étudier une telle instance polynomiale de DOP, fondée sur le principe de sélection miminale-maximale et d’en évaluer les performances sur deux corpus différents. 2001.jeptalnrecital-long.11 @@ -232,7 +232,7 @@ Compréhension Automatique de la Parole combinant syntaxe locale et sémantique globale pour une <fixed-case>CHM</fixed-case> portant sur des tâches relativement complexes - JérômeGoulian + JérômeGoulian Jean-YvesAntoine 202–211 Nous présentons dans cet article un système de Compréhension Automatique de la Parole (CAP) tentant de concilier les contraintes antinomiques de robustesse et d’analyse détaillée de la parole spontanée. Dans une première partie, nous montrons l’importance de la mise en oeuvre d’une CAP fine dans l’optique d’une Communication Homme-Machine (CHM) sur des tâches moyennement complexes. Nous présentons ensuite l’architecture de notre système qui repose sur une analyse en deux étapes : une première étape d’analyse syntaxique de surface (Shallow Parsing) générique suivie d’une seconde étape d’analyse sémantico-pragmatique – dépendante du domaine d’application – de la structure profonde de l’ ́enoncé complet. @@ -242,8 +242,8 @@ Exploitation de l’expertise humaine dans un processus de constitution de terminologie - ThierryHamon - AdelineNazarenko + ThierryHamon + AdelineNazarenko 212–221 Le processus de construction de terminologie ne peut être entièrement automatisé. Les méthodes et des outils de la terminologie computationnelle permettent de prendre en charge une partie de la tâche, mais l’expertise humaine garde une place prépondérant. Le défi pour les outils terminologiques est de dégrossir les tâches qui sont soit trop longues soit trop complexes pour l’utilisateur tout en permettant à ce dernier d’intégrer ses propres connaissances spécialisées et en lui laissant le contrôle sur la terminologie à construire. Nous montrons ici comment le rôle de cette expertise est pris en compte dans SynoTerm, l’outil d’acquisition de relation de synonymie entre termes que nous avons d ́eveloppé. 2001.jeptalnrecital-long.19 @@ -271,7 +271,7 @@ Récupération de segments sous-phrastiques dans une mémoire de traduction - PhilippeLanglais + PhilippeLanglais MichelSimard 242–251 L’utilité des outils d’aide à la traduction reposant sur les mémoires de traduction est souvent limitée par la nature des segments que celles-ci mettent en correspondance, le plus souvent des phrases entières. Cet article examine le potentiel d’un type de système qui serait en mesure de récupérer la traduction de séquences de mots de longueur arbitraire. @@ -308,9 +308,9 @@ <fixed-case>DEFI</fixed-case>, un outil d’aide à la compréhension - ArchibaldMichiels + ArchibaldMichiels 282–292 - + 2001.jeptalnrecital-long.26 fra michiels-2001-defi @@ -326,8 +326,8 @@ Ontologies for Information Retrieval - AmaliaTodiraşcu - FrançoisRousselot + AmaliaTodiraşcu + FrançoisRousselot 303–312 The paper presents a system for querying (in natural language) a set of text documents from a limited domain. The domain knowledge, represented in description logics (DL), is used for filtering the documents returned as answer and it is extended dynamically (when new concepts are identified in the texts), as result of DL inference mechanisms. The conceptual hierarchy is built semi-automatically from the texts. Concept instances are identified using shallow natural language parsing techniques. 2001.jeptalnrecital-long.28 @@ -358,9 +358,9 @@ Modèles de langage hiérarchiques pour les applications de dialogue en parole spontanée - FrédéricBéchet + FrédéricBéchet YannickEstève - RenatoDe Mori + RenatoDe Mori 325–330 Le cadre de cette étude concerne les systèmes de dialogue via le téléphone entre un serveur de données et un utilisateur. Nous nous intéresserons au cas de dialogues non contraints où l’utilisateur à toute liberté pour formuler ses requêtes. Généralement, le module de Reconnaissance Automatique de la Parole (RAP) de tels serveurs utilise un seul Modèle de Langage (ML) de type bigramme ou trigramme pour modéliser l’ensemble des interventions possibles de l’utilisateur. Ces ML sont appris sur des corpus de phrases retranscrites à partir de sessions entre le serveur et plusieurs utilisateurs. Nous proposons dans cette étude une méthode de segmentation de corpus d’apprentissage de dialogue utilisant une stratégie mixte basée à la fois sur des connaissances explicites mais aussi sur l’optimisation d’un critère statistique. Nous montrons qu’un gain en terme de perplexité et de taux d’erreurs/mot peut être constaté en utilisant un ensemble de sous modèles de langage issus de la segmentation plutôt qu’un modèle unique appris sur l’ensemble du corpus. 2001.jeptalnrecital-poster.1 @@ -450,8 +450,8 @@ Gestionnaire de dialogue pour un système d’informations à reconnaissance vocale - SophieRosset - LoriLamel + SophieRosset + LoriLamel 381–386 Dans cet article, nous présentons un gestionnaire de dialogue pour un système de demande d’informations à reconnaissance vocale. Le gestionnaire de dialogue dispose de différentes sources de connaissance, des connaissances statiques et des connaissances dynamiques. Ces connaissances sont gérées et utilisées par le gestionnaire de dialogue via des stratégies. Elles sont mises en oeuvre et organisées en fonction des objectifs concernant le système de dialogue et en fonction des choix ergonomiques que nous avons retenus. Le gestionnaire de dialogue utilise un modèle de dialogue fondé sur la détermination de phases et un modèle de la tâche dynamique. Il augmente les possibilités d’adaptation de la stratégie en fonction des historiques et de l’état du dialogue. Ce gestionnaire de dialogue, implémenté et évalué lors de la dernière campagne d’évaluation du projet LE-3 ARISE, a permi une amélioration du taux de succès de dialogue (de 53% à 85%). 2001.jeptalnrecital-poster.10 @@ -471,7 +471,7 @@ Word Sense Disambiguation in a <fixed-case>S</fixed-case>panish Explanatory Dictionary GrigoriSidorov - AlexanderGelbukh + AlexanderGelbukh 393–398 We apply word sense disambiguation to the definitions in a Spanish explanatory dictionary. To calculate the scores of word senses basing on the context (which in our case is the dictionary definition), we use a modification of Lesk’s algorithm. The algorithm relies on a comparison between two words. In the original Lesk’s algorithm, the comparison is trivial: two words are either the same lexeme or not; our modification consists in fuzzy (weighted) comparison using a large synonym dictionary and a simple derivational morphology system. Application of disambiguation to dictionary definitions (in contrast to usual texts) allows for some simplifications of the algorithm, e.g., we do not have to care of context window size. 2001.jeptalnrecital-poster.12 @@ -479,9 +479,9 @@ L’apport de connaissances morphologiques pour la projection de requêtes sur une terminologie normalisée - PierreZweigenbaum + PierreZweigenbaum NataliaGrabar - StefanDarmoni + StefanDarmoni 399–404 L’apport de connaissances linguistiques à la recherche d’information reste un sujet de débat. Nous examinons ici l’influence de connaissances morphologiques (flexion, dérivation) sur les résultats d’une tâche spécifique de recherche d’information dans un domaine spécialisé. Cette influence est étudiée à l’aide d’une liste de requêtes réelles recueillies sur un serveur opérationnel ne disposant pas de connaissances linguistiques. Nous observons que pour cette tâche, flexion et dérivation apportent un gain modéré mais réel. 2001.jeptalnrecital-poster.13 @@ -505,7 +505,7 @@ Extraction de collocations à partir de textes - BéatriceDaille + BéatriceDaille 3–8 Les collocations sont intéressantes dans de nombreuses applications du TALN comme la l’analyse ou la génération de textes ou encore la lexicographie monolingue ou bilingue. Les premières tentatives d’extraction automatique de collocations à partir de textes ou de dictionnaires ont vu le jour dans les années 1970. Il s’agissait principalement de méthodes à base de statistiques lexicales. Aujourd’hui, les méthodes d’identification automatique font toujours appel à des statistiques mais qu’elles combinent avec des analyses linguistiques. Nous examinons quelques méthodes d’identification des collocations en corpus en soulignant pour chaque méthode les propriétés linguistiques des collocations qui ont été prises en compte. 2001.jeptalnrecital-tutoriel.1 @@ -532,16 +532,16 @@ Formal Languages for Linguists: Classical and Nonclassical Models - CarlosMartín-Vide + CarlosMartín-Vide 77–127 - + 2001.jeptalnrecital-tutoriel.4 fra martin-vide-2001-formal L’apport de connaissances linguistiques en recherche documentaire - ClaudeDe Loupy + ClaudeDe Loupy 128–142 L’utilisation de connaissances et de traitements linguistiques évolués en recherche documentaire ne fait pas l’unanimité dans le milieu scientifique. En effet, de nombreuses expériences semblent montrer que les résultats obtenus ne sont pas améliorés, voire sont parfois dégradés, lorsque de telles connaissances sont utilisées dans un système de RD. Dans ce tutoriel, nous montrons que les environnements d’évaluation ne sont pas adaptés aux besoins réels d’un utilisateur car celui-ci recherche presque toujours une information. Il veut donc retrouver des documents pertinents le plus rapidement possible car ce n’est pas là le but de sa recherche. Le temps global de la recherche est donc fondamentalement important. Néanmoins, le cadre d’évaluation TREC nous permet de montrer que l’utilisation de connaissances linguistiques permet d’augmenter la précision des premiers documents renvoyés, ce qui est très important pour diminuer le temps de recherche. 2001.jeptalnrecital-tutoriel.5 @@ -550,7 +550,7 @@ Intex et ses applications informatiques - MaxSilberztein + MaxSilberztein ThierryPoibeau AntonioBalvet 143–172 @@ -594,7 +594,7 @@ Bibliothèques d’automates finis et grammaires context-free : de nouveaux traitements informatiques - MatthieuConstant + MatthieuConstant 424–433 La quantité de documents disponibles via Internet explose. Cette situation nous incite à rechercher de nouveaux outils de localisation d’information dans des documents et, en particulier, à nous pencher sur l’algorithmique des grammaires context-free appliquée à des familles de graphes d’automates finis (strictement finis ou à cycles). Nous envisageons une nouvelle représentation et de nouveaux traitements informatiques sur ces grammaires, afin d’assurer un accès rapide aux données et un stockage peu coûteux en mémoire. 2001.jeptalnrecital-recital.3 diff --git a/data/xml/2001.mtsummit.xml b/data/xml/2001.mtsummit.xml index b335bd3eb4..0cf754776a 100644 --- a/data/xml/2001.mtsummit.xml +++ b/data/xml/2001.mtsummit.xml @@ -6,7 +6,7 @@
Santiago de Compostela, Spain
September 18-22 2001 - BenteMaegaard + BenteMaegaard mtsummit @@ -15,7 +15,7 @@ Towards a new vision for <fixed-case>MT</fixed-case> - JohnHutchins + JohnHutchins 2001.mtsummit-papers.1 hutchins-2001-towards @@ -33,7 +33,7 @@ Using multiple edit distances to automatically rank machine translation output YasuhiroAkiba KenjiImamura - EiichiroSumita + EiichiroSumita 2001.mtsummit-papers.3 This paper addresses the challenging problem of automatically evaluating output from machine translation (MT) systems in order to support the developers of these systems. Conventional approaches to the problem include methods that automatically assign a rank such as A, B, C, or D to MT output according to a single edit distance between this output and a correct translation example. The single edit distance can be differently designed, but changing its design makes assigning a certain rank more accurate, but another rank less accurate. This inhibits improving accuracy of rank assignment. To overcome this obstacle, this paper proposes an automatic ranking method that, by using multiple edit distances, encodes machine-translated sentences with a rank assigned by humans into multi-dimensional vectors from which a classifier of ranks is learned in the form of a decision tree (DT). The proposed method assigns a rank to MT output through the learned DT. The proposed method is evaluated using transcribed texts of real conversations in the travel arrangement domain. Experimental results show that the proposed method is more accurate than the single-edit-distance-based ranking methods, in both closed and open tests. Moreover, the proposed method could estimate MT quality within 3% error in some cases. akiba-etal-2001-using @@ -51,7 +51,7 @@ Finding translation correspondences from parallel parsed corpus for example-based translation EijiAramaki SadaoKurohashi - SatoshiSato + SatoshiSato HideoWatanabe 2001.mtsummit-papers.5 This paper describes a system for finding phrasal translation correspondences from parallel parsed corpus that are collections paired English and Japanese sentences. First, the system finds phrasal correspondences by Japanese-English translation dictionary consultation. Then, the system finds correspondences in remaining phrases by using sentences dependency structures and the balance of all correspondences. The method is based on an assumption that in parallel corpus most fragments in a source sentence have corresponding fragments in a target sentence. @@ -117,10 +117,10 @@
The <fixed-case>ISLE</fixed-case> in the ocean. Transatlantic standards for multilingual lexicons (with an eye to machine translation) - NicolettaCalzolari + NicolettaCalzolari AlessandroLenci - AntonioZampolli - NuriaBel + AntonioZampolli + NuriaBel MartaVillegas GregorThurmair 2001.mtsummit-papers.13 @@ -198,7 +198,7 @@ Using machine learning for system-internal evaluation of transferred linguistic representations MichaelGamon HisamiSuzuki - SimonCorston-Oliver + SimonCorston-Oliver 2001.mtsummit-papers.21 2001.mtsummit-papers.21.Presentation.pdf We present an automated, system-internal evaluation technique for linguistic representations in a large-scale, multilingual MT system. We use machine-learned classifiers to recognize the differences between linguistic representations generated from transfer in an MT context from representations that are produced by "native" analysis of the target language. In the MT scenario, convergence of the two is the desired result. Holding the feature set and the learning algorithm constant, the accuracy of the classifiers provides a measure of the overall difference between the two sets of linguistic representations: classifiers with higher accuracy correspond to more pronounced differences between representations. More importantly, the classifiers yield the basis for error-analysis by providing a ranking of the importance of linguistic features. The more salient a linguistic criterion is in discriminating transferred representations from "native" representations, the more work will be needed in order to get closer to the goal of producing native-like MT. We present results from using this approach on the Microsoft MT system and discuss its advantages and possible extensions. @@ -206,24 +206,24 @@ Search algorithms for statistical machine translation based on dynamic programming and pruning techniques - IsmaelGarcía-Varea - FranciscoCasacuberta + IsmaelGarcía-Varea + FranciscoCasacuberta 2001.mtsummit-papers.22 The increasing interest in the statistical approach to Machine Translation is due to the development of effective algorithms for training the probabilistic models proposed so far. However, one of the open problems with statistical machine translation is the design of efficient algorithms for translating a given input string. For some interesting models, only (good) approximate solutions can be found. Recently, a dynamic programming-like algorithm for the IBM-Model 2 has been proposed which is based on an iterative process of refinement solutions. A new dynamic programming-like algorithm is proposed here to deal with more complex IBM models (models 3 to 5). The computational cost of the algorithm is reduced by using an alignment-based pruning technique. Experimental results with the so-called “Tourist Task” are also presented. garcia-varea-casacuberta-2001-search <fixed-case>P</fixed-case>ol<fixed-case>V</fixed-case>erb<fixed-case>N</fixed-case>et: an experimental database for <fixed-case>P</fixed-case>olish verbs - BarbaraGawronska + BarbaraGawronska 2001.mtsummit-papers.23 The semantics of verbs implies, as is known, a great number of difficulties, when it is to be represented in a computational lexicon. The Slavic languages are especially challenging in respect of this task because of the huge complexity of verbs, where the stems are combined with prefixes indicating aspect and Aktionsart. The current paper describes an approach to build PolVerbNet, a database for Polish verbs, considering the internal structure of the aspect-Aktionsart system. PolVerbNet is thus implemented in a larger English-Polish MT-system, which incorporates WordNet. We report our translation procedure and the system’s performance is evaluated and discussed. gawronska-2001-polverbnet Derivational morphology to the rescue: how it can help resolve unfound words in <fixed-case>MT</fixed-case> - ClaudiaGdaniec + ClaudiaGdaniec EsméManandise - Michael C.McCord + Michael C.McCord 2001.mtsummit-papers.24 Machine Translation (MT) systems that process unrestricted text should be able to deal with words that are not found in the MT lexicon. Without some kind of recognition, the parse may be incomplete, there is no transfer for the unfound word, and tests for transfers for surrounding words will often fail, resulting in poor translation. Interestingly, not much has been published on unfound- word guessing in the context of MT although such work has been going on for other applications. In our work on the IBM MT system, we implemented a far-reaching strategy for recognizing unfound words based on rules of word formation and for generating transfers. What distinguishes our approach from others is the use of semantic and syntactic features for both analysis and transfer, a scoring system to assign levels of confidence to possible word structures, and the creation of transfers in the transformation component. We also successfully applied rules of derivational morphological analysis to non-derived unfound words. gdaniec-etal-2001-derivational @@ -239,15 +239,15 @@ Large scale language independent generation using thematic hierarchies NizarHabash - BonnieDorr + BonnieDorr 2001.mtsummit-papers.26 habash-dorr-2001-large <fixed-case>AGILE</fixed-case> - a system for multilingual generation of technical instructions - AnthonyHartley - DoniaScott - JohnBateman + AnthonyHartley + DoniaScott + JohnBateman DanailDochev 2001.mtsummit-papers.27 This paper presents a multilingual Natural Language Generation system that produces technical instruction texts in Bulgarian, Czech and Russian. It generates several types of texts, common for software manuals, in two styles. We illustrate the system’s functionality with examples of its input and output behaviour. We discuss the criteria and procedures adopted for evaluating the system and summarise their results. The system embodies novel approaches to providing multilingual documentation, ranging from the re-use of a large-scale, broad coverage grammar of English in order to develop the lexico-grammatical resources necessary for the generation in the three target languages, through to the adoption of a ‘knowledge editing’ approach to specifying the desired content of the texts to be generated independently of the target languages in which those texts finally appear. @@ -319,14 +319,14 @@ JineeMaeng Ji-YoungLee Young-SookChae - Key-SunChoi + Key-SunChoi 2001.mtsummit-papers.35 This paper describes KORTERM’s test suite and their practicability. The test-sets have been being constructed on the basis of fine-grained classification of linguistic phenomena to evaluate the technical status of English-to-Korean MT systems systematically. They consist of about 5000 test-sets and are growing. Each test-set contains an English sentence, a model Korean translation, a linguistic phenomenon category, and a yes/no question about the linguistic phenomenon. Two commercial systems were evaluated with a yes/no test of prepared questions. Total accuracy rates of the two systems were different (50% vs. 66%). In addition, a comprehension test was carried out. We found that one system was more comprehensible than the other system. These results seem to show that our test suite is practicable. koh-etal-2001-test Integrating bilingual lexicons in a probabilistic translation assistant - PhilippeLanglais + PhilippeLanglais GeorgeFoster GuyLapalme 2001.mtsummit-papers.36 @@ -368,7 +368,7 @@ Evaluation of machine translation systems at <fixed-case>CLS</fixed-case> Corporate Language Services <fixed-case>AG</fixed-case> - ElisabethMaier + ElisabethMaier AnthonyClarke Hans-UdoStadler 2001.mtsummit-papers.41 @@ -377,7 +377,7 @@ Scaling the <fixed-case>ISLE</fixed-case> taxonomy: development of metrics for the multi-dimensional characterization of machine translation quality - Keith J.Miller + Keith J.Miller MichelleVanni 2001.mtsummit-papers.42 The DARPA MT evaluations of the early 1990s, along with subsequent work on the MT Scale, and the International Standards for Language Engineering (ISLE) MT Evaluation framework represent two of the principal efforts in Machine Translation Evaluation (MTE) over the past decade. We describe a research program that builds on both of these efforts. This paper focuses on the selection of MT output features suggested in the ISLE framework, as well as the development of metrics for the features to be used in the study. We define each metric and describe the rationale for its development. We also discuss several of the finer points of the evaluation measures that arose as a result of verification of the measures against sample output texts from three machine translation systems. @@ -386,10 +386,10 @@ Pronominal anaphora resolution in <fixed-case>KANTOO</fixed-case> <fixed-case>E</fixed-case>nglish-to-<fixed-case>S</fixed-case>panish machine translation system TerukoMitamura - EricNyberg + EricNyberg EnriqueTorrejon DavidSvoboda - KathrynBaker + KathrynBaker 2001.mtsummit-papers.43 We describe the automatic resolution of pronominal anaphora using KANT Controlled English (KCE) and the KANTOO English-to-Spanish MT system. Our algorithm is based on a robust, syntax-based approach that applies a set of restrictions and preferences to select the correct antecedent. We report a success rate of 89.6% on a training corpus with 289 anaphors, and 87.5% on held-out data containing 145 anaphors. Resolution of anaphors is important in translation, due to gender mismatches among languages; our approach translates anaphors to Spanish with 97.2% accuracy. mitamura-etal-2001-pronominal @@ -403,32 +403,32 @@ Morpho-syntactic analysis for reordering in statistical machine translation - SonjaNiessen - HermannNey + SonjaNiessen + HermannNey 2001.mtsummit-papers.45 In the framework of statistical machine translation (SMT), correspondences between the words in the source and the target language are learned from bilingual corpora on the basis of so-called alignment models. Among other things these are meant to capture the differences in word order in different languages. In this paper we show that SMT can take advantage of the explicit introduction of some linguistic knowledge about the sentence structure in the languages under consideration. In contrast to previous publications dealing with the incorporation of morphological and syntactic information into SMT, we focus on two aspects of reordering for the language pair German and English, namely question inversion and detachable German verb prefixes. The results of systematic experiments are reported and demonstrate the applicability of the approach to both translation directions on a German-English corpus. niessen-ney-2001-morpho Statistical multi-source translation - Franz JosefOch - HermannNey + Franz JosefOch + HermannNey 2001.mtsummit-papers.46 We describe methods for translating a text given in multiple source languages into a single target language. The goal is to improve translation quality in applications where the ultimate goal is to translate the same document into many languages. We describe a statistical approach and two specific statistical models to deal with this problem. Our method is generally applicable as it is independent of specific models, languages or application domains. We evaluate the approach on a multilingual corpus covering all eleven official European Union languages that was collected automatically from the Internet. In various tests we show that these methods can significantly improve translation quality. As a side effect, we also compare the quality of statistical machine translation systems for many European languages in the same domain. och-ney-2001-statistical Implicit cues for explicit generation: using telicity as a cue for tense structure in a <fixed-case>C</fixed-case>hinese to <fixed-case>E</fixed-case>nglish <fixed-case>MT</fixed-case> system - MariOlsen - DavidTraum + MariOlsen + DavidTraum Carolvan Ess-Dykema - AmyWeinberg + AmyWeinberg 2001.mtsummit-papers.47 olsen-etal-2001-implicit Translation knowledge recycling for related languages - MichaelPaul + MichaelPaul 2001.mtsummit-papers.48 An increasing interest in multi-lingual translation systems demands a reconsideration of the development costs of machine translation engines for language pairs. This paper proposes an approach that reuses the existing translation knowledge resources of high-quality translation engines for translation into different, but related languages. The lexical information of the target representation is utilized to generate the corresponding translation in the related language by using a transfer dictionary for the mapping of words and a set of heuristic rules for the mapping of structural information. Experiments using a Japanese-English translation engine for the generation of German translations show a minor decrease of up to 5% in the acceptability of the German output compared with the English translation of unseen Japanese input. paul-2001-translation @@ -467,8 +467,8 @@ Cognates alignment AntónioRibeiro - GaëlDias - GabrielLopes + GaëlDias + GabrielLopes JoãoMexia 2001.mtsummit-papers.52 Some authors (Simard et al.; Melamed; Danielsson & Mühlenbock) have suggested measures of similarity of words in different languages so as to find extra clues for alignment of parallel texts. Cognate words, like ‘Parliament’ and ‘Parlement’, in English and French respectively, provide extra anchors that help to improve the quality of the alignment. In this paper, we will extend an alignment algorithm proposed by Ribeiro et al. using typical contiguous and non-contiguous sequences of characters extracted using a statistically sound method (Dias et al.). With these typical sequences, we are able to find more reliable correspondence points and improve the alignment quality without recurring to heuristics to identify cognates. @@ -477,7 +477,7 @@ Achieving commercial-quality translation with example-based methods StephenRichardson - WilliamDolan + WilliamDolan ArulMenezes JessiePinkham 2001.mtsummit-papers.53 @@ -494,13 +494,13 @@ A morphological analyser for machine translation based on finite-state transducers AlbertoSanchis - DavidPicó + DavidPicó JoanMiquel del Val FerranFabregat JesúsTomás MoisésPastor - FranciscoCasacuberta - EnriqueVidal + FranciscoCasacuberta + EnriqueVidal 2001.mtsummit-papers.55 A finite-state, rule-based morphological analyser is presented here, within the framework of machine translation system TAVAL. This morphological analyser introduces specific features which are particularly useful for translation, such as the detection and morphological tagging of word groups that act as a single lexical unit for translation purposes. The case where words in one such group are not strictly contiguous is also covered. A brief description of the Spanish-to-Catalan and Catalan-to-Spanish translation system TAVAL is given in the paper. sanchis-etal-2001-morphological @@ -509,7 +509,7 @@ New generation Systran translation system JeanSenellart PéterDienes - TamásVáradi + TamásVáradi 2001.mtsummit-papers.56 In this paper, we present the design of the new generation Systran translation systems, currently utilized in the development of English-Hungarian, English-Polish, English-Arabic, French-Arabic, Hungarian-French and Polish-French language pairs. The new design, based on the traditional Systran machine translation expertise and the existing linguistic resources, addresses the following aspects: efficiency, modularity, declarativity, reusability, and maintainability. Technically, the new systems rely on intensive use of state-of-the-art finite automaton and formal grammar implementation. The finite automata provide the essential lookup facilities and the natural capacity of factorizing intuitive linguistic sets. Linguistically, we have introduced a full monolingual description of linguistic information and the concept of implicit transfer. Finally, we present some by-products that are directly derived from the new architecture: intuitive coding tools, spell checker and syntactic tagger. senellart-etal-2001-new @@ -529,7 +529,7 @@ Young-AeSeo Yoon-HyungRoh Ki-YoungLee - Sang-KyuPark + Sang-KyuPark 2001.mtsummit-papers.58 seo-etal-2001-captioneye @@ -546,7 +546,7 @@ Sub-sentential exploitation of translation memories MichelSimard - PhilippeLanglais + PhilippeLanglais 2001.mtsummit-papers.60 Translation memory systems (TMS) are a family of computer tools whose purpose is to facilitate and encourage the re-use of existing translations. By searching a database of past translations, these systems can retrieve the translation of whole segments of text and propose them to the translator for re-use. However, the usefulness of existing TMS’s is limited by the nature of the text segments that that they are able to put in correspondence, generally whole sentences. This article examines the potential of a type of system that is able to recuperate the translation of sub-sentential sequences of words. simard-langlais-2001-sub @@ -571,7 +571,7 @@ Converting a bilingual dictionary into a bilingual knowledge bank based on the synchronous <fixed-case>SSTC</fixed-case> Enya KongTang - Mosleh H.Al-Adhaileh + Mosleh H.Al-Adhaileh 2001.mtsummit-papers.63 In this paper, we would like to present an approach to construct a huge Bilingual Knowledge Bank (BKB) from an English Malay bilingual dictionary based on the idea of synchronous Structured String-Tree Correspondence (SSTC). The SSTC is a general structure that can associate an arbitrary tree structure to string in a language as desired by the annotator to be the interpretation structure of the string, and more importantly is the facility to specify the correspondence between the string and the associated tree which can be non-projective. With this structure, we are able to match linguistic units at different inter levels of the structure (i.e. define the correspondence between substrings in the sentence, nodes in the tree, subtrees in the tree and sub-correspondences in the SSTC). This flexibility makes synchronous SSTC very well suited for the construction of a Bilingual Knowledge Bank we need for the English-Malay MT application. tang-al-adhaileh-2001-converting @@ -579,14 +579,14 @@ Monotone statistical translation using word groups JesúsTomás - FranciscoCasacuberta + FranciscoCasacuberta 2001.mtsummit-papers.64 A new system for statistical natural language translation for languages with similar grammar is introduced. Specifically, it can be used with Romanic Languages, such as French, Spanish or Catalan. The statistical translation uses two sources of information: a language model and a translation model. The language model used is a standard trigram model. A new approach is defined in the translation model. The two main properties of the translation model are: the translation probabilities are computed between groups of words and the alignment between those groups is monotone. That is, the order between the word groups in the source sentence is conserved in the target sentence. Once, the translation model has been defined, we present an algorithm to infer its parameters from training samples. The translation process is carried out with an efficient algorithm based on stack-decoding. Finally, we present some translation results from Catalan to Spanish and compare our model with other conventional models. tomas-casacuberta-2001-monotone Translatability checker: a tool to help decide whether to use <fixed-case>MT</fixed-case> - NancyUnderwood + NancyUnderwood BartJongejan 2001.mtsummit-papers.65 This paper describes a tool designed to assess the machine translatability of English source texts by assigning a translatability index to both individual sentences and the text as a whole. The tool is designed to be both stand-alone and integratable into a suite of other tools which together help to improve the quality of professional translation in the preparatory phase of the translation workflow. Assessing translatability is an important element in ensuring the most efficient and cost effective use of current translation technology, and the tool must be able to quickly determine the translatability of a text without itself using too many resources. It is therefore based on rather simple tagging and pattern matching technologies which bring with them a certain level of indeterminacy. This potential disadvantage can, however, be offset by the fact that an annotated version of the text is simultaneously produced to allow the user to interpret the results of the checker. @@ -615,8 +615,8 @@ An automatic evaluation method for machine translation using two-way <fixed-case>MT</fixed-case> - ShoichiYokoyama - HidekiKashioka + ShoichiYokoyama + HidekiKashioka AkiraKumano MasakiMatsudaira YoshikoShirokizawa @@ -630,10 +630,10 @@ Pre-processing of bilingual corpora for <fixed-case>M</fixed-case>andarin-<fixed-case>E</fixed-case>nglish <fixed-case>EBMT</fixed-case> - YingZhang - RalfBrown - RobertFrederking - AlonLavie + YingZhang + RalfBrown + RobertFrederking + AlonLavie 2001.mtsummit-papers.69 Pre-processing of bilingual corpora plays an important role in Example-Based Machine Translation (EBMT) and Statistical-Based Machine Translation (SBMT). For our Mandarin-English EBMT system, pre-processing includes segmentation for Mandarin, bracketing for English and building a statistical dictionary from the corpora. We used the Mandarin segmenter from the Linguistic Data Consortium (LDC). It uses dynamic programming with a frequency dictionary to segment the text. Although the frequency dictionary is large, it does not completely cover the corpora. In this paper, we describe the work we have done to improve the segmentation for Mandarin and the bracketing process for English to increase the length of English phrases. A statistical dictionary is built from the aligned bilingual corpus. It is used as feedback to segmentation and bracketing to re-segment / re-bracket the corpus. The process iterates several times to achieve better results. The final results of the corpus pre-processing are a segmented/bracketed aligned bilingual corpus and a statistical dictionary. We achieved positive results by increasing the average length of Chinese terms about 60% and 10% for English. The statistical dictionary gained about a 30% increase in coverage. zhang-etal-2001-pre @@ -669,9 +669,9 @@ Evaluating machine translation output for an unknown source language: report of an <fixed-case>ISLE</fixed-case>-based investigation - Keith J.Miller - Donna M.Gates - NancyUnderwood + Keith J.Miller + Donna M.Gates + NancyUnderwood JoseminaMagdalen 2001.mtsummit-eval.3 It is often assumed that knowledge of both the source and target languages is necessary in order to evaluate the output of a machine translation (MT) system. This paper reports on an experimental evaluation of Chinese-English MT and Spanish-English MT from output specifically designed for evaluators who do not read or speak Chinese or Spanish. An outline of the characteristics measured and evaluation follows. @@ -679,16 +679,16 @@ Setting a methodology for machine translation evaluation - WidadMustafa El Hadi - IsmailTimimi - MarianneDabbadie + WidadMustafa El Hadi + IsmailTimimi + MarianneDabbadie 2001.mtsummit-eval.4 In this paper some of the problems encountered in designing an evaluation for an MT system will be examined. The source text, in French, provided by INRA (Institut National pour la Recherche Agronomique i.e. National Institute for Agronomic Research) deals with biotechnology and animal reproduction. It has been translated into English. The output of the system (i.e. the result of the assembling of several components), as opposed to its individual modules or specific components (i.e. analysis, generation, grammar, lexicon, core, etc.), will be evaluated. Moreover, the evaluation will concentrate on translation quality and its fidelity to the source text. The evaluation is not comparative, which means that we tested a specific MT system, not necessarily representative of other MT systems that can be found on the market. mustafa-el-hadi-etal-2001-setting Towards a two-stage taxonomy for machine translation evaluation - AndreiPopescu-Belis + AndreiPopescu-Belis SandraManzi MaghiKing 2001.mtsummit-eval.5 @@ -696,7 +696,7 @@ Automatically predicting <fixed-case>MT</fixed-case> systems rankings compatible with fluency, adequacy and informativeness scores - MartinRajman + MartinRajman TonyHartley 2001.mtsummit-eval.6 The main goal of the work presented in this paper is to find an inexpensive and automatable way of predicting rankings of MT systems compatible with human evaluations of these systems expressed in the form of Fluency, Adequacy or Informativeness scores. Our approach is to establish whether there is a correlation between rankings derived from such scores and the ones that can be built on the basis of automatically computable attributes of syntactic or semantic nature. We present promising results obtained on the DARPA94 MT evaluation corpus. @@ -704,17 +704,17 @@ In one hundred words or less - FlorenceReeder + FlorenceReeder 2001.mtsummit-eval.7 This paper reports on research which aims to test the efficacy of applying automated evaluation techniques, originally designed for human second language learners, to machine translation (MT) system evaluation. We believe that such evaluation techniques will provide insight into MT evaluation, MT development, the human translation process and the human language learning process. The experiment described here looks only at the intelligibility of MT output. The evaluation technique is derived from a second language acquisition experiment that showed that assessors can differentiate native from non-native language essays in less than 100 words. Particularly illuminating for our purposes is the set of factor on which the assessors made their decisions. We duplicated this experiment to see if similar criteria could be elicited from duplicating the test using both human and machine translation outputs in the decision set. The encouraging results of this experiment, along with an analysis of language factors contributing to the successful outcomes, is presented here. reeder-2001-one The naming of things and the confusion of tongues: an <fixed-case>MT</fixed-case> metric - FlorenceReeder - KeithMiller - JenniferDoyon - JohnWhite + FlorenceReeder + KeithMiller + JenniferDoyon + JohnWhite 2001.mtsummit-eval.8 This paper reports the results of an experiment in machine translation (MT) evaluation, designed to determine whether easily/rapidly collected metrics can predict the human generated quality parameters of MT output. In this experiment we evaluated a system’s ability to translate named entities, and compared this measure with previous evaluation scores of fidelity and intelligibility. There are two significant benefits potentially associated with a correlation between traditional MT measures and named entity scores: the ability to automate named entity scoring and thus MT scoring; and insights into the linguistic aspects of task-based uses of MT, as captured in previous studies. reeder-etal-2001-naming @@ -722,21 +722,21 @@ Scaling the <fixed-case>ISLE</fixed-case> framework: validating tests of machine translation quality for multi-dimensional measurement MichelleVanni - Keith J.Miller + Keith J.Miller 2001.mtsummit-eval.9 Work on comparing a set of linguistic test scores for MT output to a set of the same tests’ scores for naturally-occurring target language text (Jones and Rusk 2000) broke new ground in automating MT Evaluation. However, the tests used were selected on an ad hoc basis. In this paper, we report on work to extend our understanding, through refinement and validation, of suitable linguistic tests in the context of our novel approach to MTE. This approach was introduced in Miller and Vanni (2001a) and employs standard, rather than randomly-chosen, tests of MT output quality selected from the ISLE framework as well as a scoring system for predicting the type of information processing task performable with the output. Since the intent is to automate the scoring system, this work can also be viewed as the preliminary steps of algorithm design. vanni-miller-2001-scaling Predicting intelligibility from fidelity in <fixed-case>MT</fixed-case> evaluation - JohnWhite + JohnWhite 2001.mtsummit-eval.10 Attempts to formulate methods of automatically evaluating machine translation (MT) have generally looked at some attrinbute of translation and then tried, explicitly or implicitly, to extrapolate the measurement to cover a broader class of attributes. In particular, some studies have focused on measuring fidelity of translation, and inferring intelligibility from that, and others have taken the opposite approach. In this paper we examine the more fundamental question of whether, and to what extent, the one attribute can be predicted by the other. As a starting point we use the 1994 DARPA MT corpus, which has measures for both attributes, and perform a simple comparison of the behavior of each. Two hypotheses about a predictable inference between fidelity and intelligibility are compared with the comparative behavior across all language pairs and all documents in the corpus. white-2001-predicting Predicting <fixed-case>MT</fixed-case> fidelity from noun-compound handling - JohnWhite + JohnWhite MonikaForner 2001.mtsummit-eval.11 Approaches to the automation of machine translation (MT) evaluation have attempted, or presumed, to connect some rapidly measurable phenomenon with general attributes of the MT output and/or system. In particular, measurements of the fluency of output are often asserted to be predictive of the usefulness of MT output in information-intensive, downstream tasks. The connections between the fluency (“intelligibility”) of translation and its informational adequacy (“fidelity”) are not actually straightforward. This paper discussed a small experiment in isolating a particular contrastive linguistic phenomena common to both French-English and Spanish-English pairs, and attempts to associate that behavior in machine and human translations with known fidelity properties of those translations. Our results show a definite correlative trend. @@ -767,7 +767,7 @@ Transfer-rule induction for example-based translation - Ralf D.Brown + Ralf D.Brown 2001.mtsummit-ebmt.1 brown-2001-transfer @@ -779,7 +779,7 @@ Linguistic knowledge and complexity in an <fixed-case>EBMT</fixed-case> system based on translation patterns - KevinMcTait + KevinMcTait 2001.mtsummit-ebmt.3 An approach to Example-Based Machine Translation is presented which operates by extracting translation patterns from a bilingual corpus aligned at the level of the sentence. This is carried out using a language-neutral recursive machine-learning algorithm based on the principle of similar distributions of strings. The translation patterns extracted represent generalisations of sentences that are translations of each other and, to some extent, resemble transfer rules but with fewer constraints. The strings and variables, of which translations patterns are composed, are aligned in order to provide a more refined bilingual knowledge source, necessary for the recombination phase. A non-structural approach based on surface forms is error prone and liable to produce translation patterns that are false translations. Such errors are highlighted and solutions are proposed by the addition of external linguistic resources, namely morphological analysis and part-of-speech tagging. The amount of linguistic resources added has consequences for computational complexity and portability. mctait-2001-linguistic @@ -794,14 +794,14 @@ Beyond translation memories - ReinhardSchäler + ReinhardSchäler 2001.mtsummit-ebmt.5 One key to the success of EBMT is the removal of the boundaries limiting the potential of translation memories. To bring EBMT to fruition, researchers and developers have to go beyond the self-imposed limitations of what is now traditional, in computing terms almost old fashioned, TM technology. Experiments have shown that the probability of finding exact matches at phrase level is higher than the probability of finding exact matches at the current TM segment level. We outline our implementation of a linguistically enhanced translation memory system (or Phrasal Lexicon) implementing phrasal matching. This system takes advantage of the huge and underused resources available in existing translation memories and develops a traditional TM into a sophisticated example-based machine translation engine which when integrated into a hybrid MT solution can yield significant improvements in translation quality. schaler-2001-beyond <fixed-case>EBMT</fixed-case> seen as case-based reasoning - HaroldSomers + HaroldSomers 2001.mtsummit-ebmt.6 This paper looks at EBMT from the perspective of the Case-based Reasoning (CBR) paradigm. We attempt to describe the task of machine translation (MT) seen as a potential application of CBR, and attempt to describe MT in standard CBR terms. The aim is to see if other applications of CBR can suggest better ways to approach EBMT. somers-2001-ebmt @@ -878,14 +878,14 @@ Discovering machine translation strategies beyond word-for-word translation: a laboratory assignment Juan AntonioPérez-Ortiz - Mikel L.Forcada + Mikel L.Forcada 2001.mtsummit-teach.7 It is a common mispreconception to say that machine translation programs translate word-for-word, but real systems follow strategies which are much more complex. This paper proposes a laboratory assignment to study the way in which some commercial machine translation programs translate whole sentences and how the translation differs from a word-for-word translation. Students are expected to infer some of these extra strategies by observing the outcome of real systems when translating a set of sentences designed on purpose. The assignment also makes students aware of the difficulty of constructing such programs while bringing some technological light into the apparent “magic” of machine translation. perez-ortiz-forcada-2001-discovering Three perspectives on <fixed-case>MT</fixed-case> in the classroom - HaroldSomers + HaroldSomers 2001.mtsummit-teach.8 This paper considers the role of translation software, especially Machine Translation (MT), in curricula for students of computational linguistics, for trainee translators and for language learners. These three sets of students have differing needs and interests, although there is some overlap between them. A brief historical view of MT in the classroom is given, including comments on the author’s 25 years of experience in the field. This is followed by discussion and examples of strategies for teaching about MT and related aspects of Language Engineering and Information Technology for the three types of student. somers-2001-three @@ -904,7 +904,7 @@
Santiago de Compostela, Spain
September 18-22 2001 - StevenKrauwer + StevenKrauwer mtsummit @@ -913,14 +913,14 @@ Four technical and organizational keys to handle more languages and improve quality (on demand) in <fixed-case>MT</fixed-case> - ChristianBoitet + ChristianBoitet 2001.mtsummit-road.1 Despite considerable investment over the past 50 years, only a small number of language pairs is covered by MT systems designed for information access, and even fewer are capable of quality translation or speech translation. To open the door toward MT of adequate quality for all languages (at least in principle), we propose four keys. On the technical side, we should (1) dramatically increase the use of learning techniques which have demonstrated their potential at the research level, and (2) use pivot architectures, the most universally usable pivot being UNL. On the organizational side, the keys are (3) the cooperative development of open source linguistic resources on the Web, and (4) the construction of systems where quality can be improved "on demand" by users, either a priori through interactive disambiguation, or a posteriori by correcting the pivot representation through any language, thereby unifying MT, computer-aided authoring, and multilingual generation. boitet-2001-four Towards pragmatics-based machine translation - DavidFarwell + DavidFarwell StephenHelmreich 2001.mtsummit-road.2 We propose a program of research which has as its goal establishing a framework and methodology for investigating the pragmatic aspects of the translation process and implementing a computational platform for carrying out systematic experiments on the pragmatics of translation. The program has four components. First, on the basis of a comparative study of multiple translations of the same document into a single target language, a pragmatics-based computational model is to be developed in which reasoning about the beliefs of the participants in the translation task and about the content of a text are central. Second, existing Natural Language Processing technologies are to be appraised as potential components of a computational platform that supports investigations into the effects of pragmatics on translation. Third, the platform is to be assembled and prototype translation systems implemented which conform to the pragmatics-based computational model of translation. Finally, a novel evaluation methodology is to be developed and evaluations of the systems carried out. @@ -944,15 +944,15 @@ Rethinking interaction: the solution for high-quality <fixed-case>MT</fixed-case>? ElliottMacklovitch - Antonio S.Valderrábanos + Antonio S.Valderrábanos 2001.mtsummit-road.5 Our focus is on high-quality (HQ) translation, the worldwide demand for which continues to increase exponentially and now far exceeds the capacity of the translation profession to satisfy it. To what extent is MT currently being used to satisfy this growing demand for HQ translation? Quite obviously, very little. Although MT is being used today by more people than ever before, very few of these users are professional translators. This represents a major change, for a mere ten years ago, translators were still the principal target market for most MT vendors. What happened to bring about this change? For that matter, what happened to most of those MT vendors? The view we present is that the most promising strategy for HQ MT is to embed MT systems in translation environments where the translator retains full control over their output. In our opinion, this new type of interactive MT will achieve better acceptance levels among translators and significantly improve the prospects of MT’s commercial success in the translation industry. macklovitch-valderrabanos-2001-rethinking What can machine translation learn from speech recognition? - Franz JosefOch - HermannNey + Franz JosefOch + HermannNey 2001.mtsummit-road.6 The performance of machine translation technology after 50 years of development leaves much to be desired. There is a high demand for well performing and cheap MT systems for many language pairs and domains, which automatically adapt to rapidly changing terminology. We argue that for successful MT systems it will be crucial to apply data-driven methods, especially statistical machine translation. In addition, it will be very important to establish common test environments. This includes the availability of large parallel training corpora, well defined test corpora and standardized evaluation criteria. Thereby research results can be compared and this will open the possibility for more competition in MT research. och-ney-2001-machine @@ -960,10 +960,10 @@ Design and implementation of controlled elicitation for machine translation of low-density languages KatharinaProbst - RalfBrown - JaimeCarbonell - AlonLavie - LoriLevin + RalfBrown + JaimeCarbonell + AlonLavie + LoriLevin ErikPeterson 2001.mtsummit-road.7 NICE is a machine translation project for low-density languages. We are building a tool that will elicit a controlled corpus from a bilingual speaker who is not an expert in linguistics. The corpus is intended to cover major typological phenomena, as it is designed to work for any language. Using implicational universals, we strive to minimize the number of sentences that each informant has to translate. From the elicited sentences, we learn transfer rules with a version space algorithm. Our vision for MT in the future is one in which systems can be quickly trained for new languages by native speakers, so that speakers of minor languages can participate in education, health care, government, and internet without having to give up their languages. @@ -978,8 +978,8 @@ Evaluating <fixed-case>C</fixed-case>hinese-<fixed-case>E</fixed-case>nglish translation systems for personal name coverage - Benjamin K.Tsou - Oi YeeKwong + Benjamin K.Tsou + Oi YeeKwong 2001.mtsummit-road.9 This paper discusses the challenges which Chinese-English machine translation (MT) systems face in translating personal names. We show that the translation of names between Chinese and English is complicated by different factors, including orthographic, phonetic, geographic and social ones. Four existing systems were tested for their capability in translating personal names from Chinese to English. Test data embodying geographic and sociolinguistic differences were obtained from a synchronous Chinese corpus of news media texts. It is obvious that systems vary considerably in their ability to identify personal names in the source language and render them properly in the target language. Given the criticality of personal name translation to the overall intelligibility of a translated text, the coverage of personal names should be one of the important criteria in the evaluation of MT performance. Moreover, name translation, which calls for a hybrid approach, would remain a central issue to the future development of MT systems, especially for online and real-time applications. tsou-kwong-2001-evaluating diff --git a/data/xml/2002.amta.xml b/data/xml/2002.amta.xml index f947756733..d112d92252 100644 --- a/data/xml/2002.amta.xml +++ b/data/xml/2002.amta.xml @@ -12,13 +12,13 @@ Example-based machine translation - RalfBrown + RalfBrown 2002.amta-tutorials.1 brown-2002-example The state of the art in language modeling - JoshuaGoodman + JoshuaGoodman 2002.amta-tutorials.2 goodman-2002-state @@ -35,13 +35,13 @@ Automatic rule learning for resource-limited <fixed-case>MT</fixed-case> - JaimeCarbonell + JaimeCarbonell KatharinaProbst ErikPeterson ChristianMonson - AlonLavie - RalfBrown - LoriLevin + AlonLavie + RalfBrown + LoriLevin 1-10 https://link.springer.com/chapter/10.1007/3-540-45820-4_1 Machine Translation of minority languages presents unique challenges, including the paucity of bilingual training data and the unavailability of linguistically-trained speakers. This paper focuses on a machine learning approach to transfer-based MT, where data in the form of translations and lexical alignments are elicited from bilingual speakers, and a seeded version-space learning algorithm formulates and refines transfer rules. A rule-generalization lattice is defined based on LFG-style f-structures, permitting generalization operators in the search for the most general rules consistent with the elicited data. The paper presents these methods and illustrates examples. @@ -51,7 +51,7 @@ Toward a hybrid integrated translation environment MichaelCarl AndyWay - ReinhardSchäler + ReinhardSchäler 11-20 https://link.springer.com/chapter/10.1007/3-540-45820-4_2 In this paper we present a model for the future use of Machine Translation (MT) and Computer Assisted Translation. In order to accommodate the future needs in middle value translations, we discuss a number of MT techniques and architectures. We anticipate a hybrid environment that integrates data- and rule-driven approaches where translations will be routed through the available translation options and consumers will receive accurate information on the quality, pricing and time implications of their translation choice. @@ -61,7 +61,7 @@ Adaptive bilingual sentence alignment Thomas C.Chuang G.N.You - JasonChang + JasonChang 21-30 https://link.springer.com/chapter/10.1007/3-540-45820-4_3 We present a new approach to the problem of aligning English and Chinese sentences in a bilingual corpus based on adaptive learning. While using length information alone produces surprisingly good results for aligning bilingual French and English sentences with success rates well over 95%, it does not fair as well for the alignment of English and Chinese sentences. The crux of the problem lies in greater variability of lengths and match types of the matched sentences. We propose to cope with such variability via a two-pass scheme under which model parameters can be learned from the data at hand. Experiments show that under the approach bilingual English-Chinese texts can be aligned effectively across diverse domains, genres and translation directions with accuracy rates approaching 99%. @@ -69,7 +69,7 @@ <fixed-case>DUST</fixed-case>er: a method for unraveling cross-language divergences for statistical word-level alignment - BonnieDorr + BonnieDorr LisaPearl RebeccaHwa NizarHabash @@ -81,7 +81,7 @@ Text prediction with fuzzy alignment GeorgeFoster - PhilippeLanglais + PhilippeLanglais GuyLapalme 44-53 https://link.springer.com/chapter/10.1007/3-540-45820-4_5 @@ -90,10 +90,10 @@ Efficient integration of maximum entropy lexicon models within the training of statistical alignment models - IsmaelGarcía-Varea - Franz J.Och - HermannNey - FranciscoCasacuberta + IsmaelGarcía-Varea + Franz J.Och + HermannNey + FranciscoCasacuberta 54-63 https://link.springer.com/chapter/10.1007/3-540-45820-4_6 Maximum entropy (ME) models have been successfully applied to many natural language problems. In this paper, we show how to integrate ME models efficiently within a maximum likelihood training scheme of statistical machine translation models. Specifically, we define a set of context-dependent ME lexicon models and we present how to perform an efficient training of these ME models within the conventional expectation-maximization (EM) training of statistical translation models. Experimental results are also given in order to demonstrate how these ME models improve the results obtained with the traditional translation models. The results are presented by means of alignment quality comparing the resulting alignments with manually annotated reference alignments. @@ -101,7 +101,7 @@ Using word formation rules to extend <fixed-case>MT</fixed-case> lexicons - ClaudiaGdaniec + ClaudiaGdaniec EsméManandise 64-73 https://link.springer.com/chapter/10.1007/3-540-45820-4_7 @@ -121,7 +121,7 @@ Handling translation divergences: combining statistical and symbolic techniques in generation-heavy machine translation NizarHabash - BonnieDorr + BonnieDorr 84-93 https://link.springer.com/chapter/10.1007/3-540-45820-4_9 This paper describes a novel approach to handling translation divergences in a Generation-Heavy Hybrid Machine Translation (GHMT) system. The translation divergence problem is usually reserved for Transfer and Interlingual MT because it requires a large combination of complex lexical and structural mappings. A major requirement of these approaches is the accessibility of large amounts of explicit symmetric knowledge for both source and target languages. This limitation renders Transfer and Interlingual approaches ineffective in the face of structurally-divergent language pairs with asymmetric resources. GHMT addresses the more common form of this problem, source-poor/targetrich, by fully exploiting symbolic and statistical target-language resources. This non-interlingual non-transfer approach is accomplished by using target-language lexical semantics, categorial variations and subcategorization frames to overgenerate multiple lexico-structural variations from a target-glossed syntactic dependency of the source-language sentence. The symbolic overgeneration, which accounts for different possible translation divergences, is constrained by a statistical target-language model. @@ -129,10 +129,10 @@ <fixed-case>K</fixed-case>orean-<fixed-case>C</fixed-case>hinese machine translation based on verb patterns - ChanghyunKim + ChanghyunKim MunpyoHong YinxiaHuang - Young KilKim + Young KilKim Sung IlYang Young AeSeo Sung-KwonChoi @@ -143,7 +143,7 @@ Merging example-based and statistical machine translation: an experiment - PhilippeLanglais + PhilippeLanglais MichelSimard 104-113 https://link.springer.com/chapter/10.1007/3-540-45820-4_11 @@ -168,7 +168,7 @@ Fast and accurate sentence alignment of bilingual corpora - Robert C.Moore + Robert C.Moore 135-144 https://link.springer.com/chapter/10.1007/3-540-45820-4_14 We present a new method for aligning sentences with their translations in a parallel bilingual corpus. Previous approaches have generally been based either on sentence length or word correspondences. Sentence-length-based methods are relatively fast and fairly accurate. Word-correspondence-based methods are generally more accurate but much slower, and usually depend on cognates or a bilingual lexicon. Our method adapts and combines these approaches, achieving high accuracy at a modest computational cost, and requiring no knowledge of the languages or the corpus beyond division into words and sentences. @@ -176,9 +176,9 @@ Deriving semantic knowledge from descriptive texts using an <fixed-case>MT</fixed-case> system - EricNyberg + EricNyberg TerukoMitamura - KathrynBaker + KathrynBaker DavidSvoboda BrianPeterson JenniferWilliams @@ -230,7 +230,7 @@ A report on the experiences of implementing an <fixed-case>MT</fixed-case> system for use in a commercial environment AnthonyClarke - ElisabethMaier + ElisabethMaier Hans-UdoStadler 187-194 https://link.springer.com/chapter/10.1007/3-540-45820-4_19 @@ -323,10 +323,10 @@ The <fixed-case>NESPOLE</fixed-case>! speech-to-speech translation system - AlonLavie - LoriLevin - RobertFrederking - FabioPianesi + AlonLavie + LoriLevin + RobertFrederking + FabioPianesi 240-243 https://link.springer.com/chapter/10.1007/3-540-45820-4_28 NESPOLE! is a speech-to-speech machine translation research system designed to provide fully functional speech-to-speech capabilities within real-world settings of common users involved in e-commerce applications. The project is funded jointly by the European Commission and the US NSF. The NESPOLE! system uses a client-server architecture to allow a common user, who is browsing web-pages on the internet, to connect seamlessly in real-time to an agent of the service provider, using a video-conferencing channel and with speech-to-speech translation services mediating the conversation. Shared web pages and annotated images supported via a Whiteboard application are available to enhance the communication. @@ -335,7 +335,7 @@ The <fixed-case>KANTOO</fixed-case> <fixed-case>MT</fixed-case> sytem: controlled language checker and lexical maintenance tool TeriukoMitamura - EricNyberg + EricNyberg KathyBaker PeterCramer JeongwooKo @@ -348,7 +348,7 @@ Approaches to spoken translation - Christine A.Montgomery + Christine A.Montgomery NaicongLi 248-252 https://link.springer.com/chapter/10.1007/3-540-45820-4_30 diff --git a/data/xml/2002.eamt.xml b/data/xml/2002.eamt.xml index c277306641..5dd420575e 100644 --- a/data/xml/2002.eamt.xml +++ b/data/xml/2002.eamt.xml @@ -17,7 +17,7 @@ Teaching <fixed-case>MT</fixed-case> - an <fixed-case>I</fixed-case>ndian pespective - SivajiBandyopadhyay + SivajiBandyopadhyay 2002.eamt-1.2 bandyopadhyay-2002-teaching @@ -35,7 +35,7 @@ Teaching contrastive linguistics for <fixed-case>MT</fixed-case> - PaulBennett + PaulBennett 2002.eamt-1.5 bennett-2002-teaching @@ -53,7 +53,7 @@ Architectures of “toy” systems for teaching machine translation - Waltherv. Hahn + Waltherv. Hahn CristinaVertan 2002.eamt-1.8 v-hahn-vertan-2002-architectures @@ -103,7 +103,7 @@ Explaining real <fixed-case>MT</fixed-case> to translators: between compositional semantics and word-for-word - Mikel L.Forcada + Mikel L.Forcada 2002.eamt-1.16 forcada-2002-explaining @@ -115,7 +115,7 @@ Teaching commercial <fixed-case>MT</fixed-case> to translators: bridging the gap between human and machine - NatalieKübler + NatalieKübler 2002.eamt-1.18 kubler-2002-teaching diff --git a/data/xml/2002.jeptalnrecital.xml b/data/xml/2002.jeptalnrecital.xml index 4771f9a49e..ce9b73dff7 100644 --- a/data/xml/2002.jeptalnrecital.xml +++ b/data/xml/2002.jeptalnrecital.xml @@ -50,7 +50,7 @@ Ressources terminologiques et traduction probabiliste: premiers pas positifs vers un système adaptatif - PhilippeLanglais + PhilippeLanglais 43–52 Cette dernière décennie a été le témoin d’importantes avancées dans le domaine de la traduction statistique (TS). Aucune évaluation fine n’a cependant été proposée pour mesurer l’adéquation de l’approche statistique dans un contexte applicatif réel. Dans cette étude, nous étudions le comportement d’un engin de traduction probabiliste lorsqu’il traduit un texte de nature très éloignée de celle du corpus utilisé lors de l’entraînement. Nous quantifions en particulier la baisse de performance du système et développons l’idée que l’intégration de ressources terminologiques dans le processus est une solution naturelle et salutaire à la traduction. Nous décrivons cette intégration et évaluons son potentiel. 2002.jeptalnrecital-long.2 @@ -59,7 +59,7 @@ Accentuation de mots inconnus : application au thesaurus biomédical <fixed-case>M</fixed-case>e<fixed-case>SH</fixed-case> - PierreZweigenbaum + PierreZweigenbaum NataliaGrabar 53–62 Certaines ressources textuelles ou terminologiques sont écrites sans signes diacritiques, ce qui freine leur utilisation pour le traitement automatique des langues. Dans un domaine spécialisé comme la médecine, il est fréquent que les mots rencontrés ne se trouvent pas dans les lexiques électroniques disponibles. Se pose alors la question de l’accentuation de mots inconnus : c’est le sujet de ce travail. Nous proposons deux méthodes d’accentuation de mots inconnus fondées sur un apprentissage par observation des contextes d’occurrence des lettres à accentuer dans un ensemble de mots d’entraînement, l’une adaptée de l’étiquetage morphosyntaxique, l’autre adaptée d’une méthode d’apprentissage de règles morphologiques. Nous présentons des résultats expérimentaux pour la lettre e sur un thesaurus biomédical en français : le MeSH. Ces méthodes obtiennent une précision de 86 à 96 % (+-4 %) pour un rappel allant de 72 à 86 %. @@ -106,7 +106,7 @@ Extraction d’informations à partir de corpus dégradés FabriceEven - ChantalEnguehard + ChantalEnguehard 105–115 Nous présentons une méthode automatique d’extraction d’information à partir d’un corpus mono-domaine de mauvaise qualité, sur lequel il est impossible d’appliquer les méthodes classiques de traitement de la langue naturelle. Cette approche se fonde sur la construction d’une ontologie semi-formelle (modélisant les informations contenues dans le corpus et les relations entre elles). Notre méthode se déroule en trois phases : 1) la normalisation du corpus, 2) la construction de l’ontologie, et 3) sa formalisation sous la forme d’une grammaire. L’extraction d’information à proprement parler exploite un étiquetage utilisant les règles définies par la grammaire. Nous illustrons notre démarche d’une application sur un corpus bancaire. 2002.jeptalnrecital-long.8 @@ -116,7 +116,7 @@ Identification thématique hiérarchique : Application aux forums de discussions BrigitteBigi - KamelSmaïli + KamelSmaïli 116–125 Les modèles statistiques du langage ont pour but de donner une représentation statistique de la langue mais souffrent de nombreuses imperfections. Des travaux récents ont montré que ces modèles peuvent être améliorés s’ils peuvent bénéficier de la connaissance du thème traité, afin de s’y adapter. Le thème du document est alors obtenu par un mécanisme d’identification thématique, mais les thèmes ainsi traités sont souvent de granularité différente, c’est pourquoi il nous semble opportun qu’ils soient organisés dans une hiérarchie. Cette structuration des thèmes implique la mise en place de techniques spécifiques d’identification thématique. Cet article propose un modèle statistique à base d’unigrammes pour identifier automatiquement le thème d’un document parmi une arborescence prédéfinie de thèmes possibles. Nous présentons également un critère qui permet au modèle de donner un degré de fiabilité à la décision prise. L’ensemble des expérimentations a été réalisé sur des données extraites du groupe ’fr’ des forums de discussion. 2002.jeptalnrecital-long.9 @@ -137,7 +137,7 @@ Filtrages syntaxiques de co-occurrences pour la représentation vectorielle de documents RomaricBesançon - MartinRajman + MartinRajman 136–145 L’intégration de co-occurrences dans les modèles de représentation vectorielle de documents s’est avérée une source d’amélioration de la pertinence des mesures de similarités textuelles calculées dans le cadre de ces modèles (Rajman et al., 2000; Besançon, 2001). Dans cette optique, la définition des contextes pris en compte pour les co-occurrences est cruciale, par son influence sur les performances des modèles à base de co-occurrences. Dans cet article, nous proposons d’étudier deux méthodes de filtrage des co-occurrences fondées sur l’utilisation d’informations syntaxiques supplémentaires. Nous présentons également une évaluation de ces méthodes dans le cadre de la tâche de la recherche documentaire. 2002.jeptalnrecital-long.11 @@ -147,7 +147,7 @@ <fixed-case>WSIM</fixed-case> : une méthode de détection de thème fondée sur la similarité entre mots ArmelleBrun - KamelSmaïli + KamelSmaïli Jean-PaulHaton 146–155 L’adaptation des modèles de langage dans les systèmes de reconnaissance de la parole est un des enjeux importants de ces dernières années. Elle permet de poursuivre la reconnaissance en utilisant le modèle de langage adéquat : celui correspondant au thème identifié. Dans cet article nous proposons une méthode originale de détection de thème fondée sur des vocabulaires caractéristiques de thèmes et sur la similarité entre mots et thèmes. Cette méthode dépasse la méthode classique (TFIDF) de 14%, ce qui représente un gain important en terme d’identification. Nous montrons également l’intérêt de choisir un vocabulaire adéquat. Notre méthode de détermination des vocabulaires atteint des performances 3 fois supérieures à celles obtenues avec des vocabulaires construits sur la fréquence des mots. @@ -166,7 +166,7 @@ <fixed-case>LOGUS</fixed-case> : un système formel de compréhension du français parlé spontané-présentation et évaluation - JeanneVillaneau + JeanneVillaneau Jean-YvesAntoine OlivierRidoux 167–176 @@ -178,7 +178,7 @@ Etude des relations entre pauses et ponctuations pour la synthèse de la parole à partir de texte EstelleCampione - JeanVéronis + JeanVéronis 177–186 Nous présentons dans cette communication la première étude à grande échelle de la relation entre pauses et ponctuations, à l’aide de l’analyse de plusieurs milliers de pauses dans un corpus comportant près de 5 heures de parole lue en cinq langues, faisant intervenir 50 locuteurs des deux sexes. Nos résultats remettent en cause l’idée reçue de rapports bi-univoques entre pauses et ponctuations. Nous mettons en évidence une proportion importante de pauses hors ponctuation, qui délimitent des constituants, mais aussi un pourcentage élevé de ponctuations faibles réalisées sans pauses. Nous notons également une très grande variabilité inter-locuteur, ainsi que des différences importantes entre langues. Enfin, nous montrons que la durée des pauses est liée au sexe des locuteurs. 2002.jeptalnrecital-long.15 @@ -216,7 +216,7 @@ Groupes prépositionnels arguments ou circonstants : vers un repérage automatique en corpus - CécileFabre + CécileFabre CécileFrérot 217–226 Dans cette étude, menée dans le cadre de la réalisation d’un analyseur syntaxique de corpus spécialisés, nous nous intéressons à la question des arguments et circonstants et à leur repérage automatique en corpus. Nous proposons une mesure simple pour distinguer automatiquement, au sein des groupes prépositionnels rattachés au verbe, des types de compléments différents. Nous réalisons cette distinction sur corpus, en mettant en oeuvre une stratégie endogène, et en utilisant deux mesures de productivité : la productivité du recteur verbal vis à vis de la préposition évalue le degré de cohésion entre le verbe et son groupe prépositionnel (GP), tandis que la productivité du régi vis à vis de la préposition permet d’évaluer le degré de cohésion interne du GP. Cet article présente ces deux mesures, commente les données obtenues, et détermine dans quelle mesure cette partition recouvre la distinction traditionnelle entre arguments et circonstants. @@ -226,7 +226,7 @@ Évaluation des taux de synonymie et de polysémie dans un texte - ClaudeDe Loupy + ClaudeDe Loupy 227–236 La polysémie et la synonymie sont deux aspects fondamentaux de la langue. Nous présentons ici une évaluation de l’importance de ces deux phénomènes à l’aide de statistiques basées sur le lexique WordNet et sur le SemCor. Ainsi, on a un taux de polysémie théorique de 5 sens par mot dans le SemCor. Mais si on regarde les occurrences réelles, moins de 50 % des sens possibles sont utilisés. De même, s’il y a, en moyenne, 2,7 mots possibles pour désigner un concept qui apparaît dans le corpus, plus de la moitié d’entre eux ne sont jamais utilisés. Ces résultats relativisent l’utilité de telles ressources sémantiques pour le traitement de la langue. 2002.jeptalnrecital-long.20 @@ -235,7 +235,7 @@ Acquisition automatique de sens à partir d’opérations morphologiques en français : études de cas - FiammettaNamer + FiammettaNamer 237–246 Cet article propose une méthode de codage automatique de traits lexicaux sémantiques en français. Cette approche exploite les relations fixées par l’instruction sémantique d’un opérateur de construction morphologique entre la base et le mot construit. En cela, la réflexion s’inspire des travaux de Marc Light (Light 1996) tout en exploitant le fonctionnement d’un système d’analyse morphologique existant : l’analyseur DériF. A ce jour, l’analyse de 12 types morphologiques conduit à l’étiquetage d’environ 10 % d’un lexique composé de 99000 lemmes. L’article s’achève par la description de deux techniques utilisées pour valider les traits sémantiques. 2002.jeptalnrecital-long.21 @@ -274,7 +274,7 @@ La coédition langue↔<fixed-case>UNL</fixed-case> pour partager la révision entre les langues d’un document multilingue : un concept unificateur - ChristianBoitet + ChristianBoitet Wang-JuTsai 277–288 La coédition d’un texte en langue naturelle et de sa représentation dans une forme interlingue semble le moyen le meilleur et le plus simple de partager la révision du texte vers plusieurs langues. Pour diverses raisons, les graphes UNL sont les meilleurs candidats dans ce contexte. Nous développons un prototype où, dans le scénario avec partage le plus simple, des utilisateurs “naïfs” interagissent directement avec le texte dans leur langue (L0), et indirectement avec le graphe associé pour corriger les erreurs. Le graphe modifié est ensuite envoyé au déconvertisseur UNL-L0 et le résultat est affiché. S’il est satisfaisant, les erreurs étaient probablement dues au graphe et non au déconvertisseur, et le graphe est envoyé aux déconvertisseurs vers d’autres langues. Les versions dans certaines autres langues connues de l’utilisateur peuvent être affichées, de sorte que le partage de l’amélioration soit visible et encourageant. Comme les nouvelles versions sont ajoutées dans le document multilingue original avec des balises et des attributs appropriés, rien n’est jamais perdu, et le travail coopératif sur un même document est rendu possible. Du côté interne, des liaisons sont établies entre des éléments du texte et du graphe en utilisant des ressources largement disponibles comme un dictionnaire L0-anglais, ou mieux L0-UNL, un analyseur morphosyntaxique de L0, et une transformation canonique de graphe UNL à arbre. On peut établir une “meilleure” correspondance entre “l’arbre-UNL+L0” et la “structure MS-L0”, une treille, en utilisant le dictionnaire et en cherchant à aligner l’arbre et une trajectoire avec aussi peu que possible de croisements de liaisons. Un but central de cette recherche est de fusionner les approches de la TA par pivot, de la TA interactive, et de la génération multilingue de texte. @@ -396,7 +396,7 @@ _MASSY) mis librement à la disposition de la communauté scientifique. Ces deux Polynomial Tree Substitution Grammars: Characterization and New Examples Jean-CédricChappelier - MartinRajman + MartinRajman AntoineRozenknop 357–362 Polynomial Tree Substitution Grammars, a subclass of STSGs for which finding the most probable parse is no longer NP-hard but polynomial, are defined and characterized in terms of general properties on the elementary trees in the grammar. Various sufficient and easy to compute properties for a STSG to be polynomial are presented. The min-max selection principle is shown to be one such sufficient property. In addition, another, new, instance of a sufficient property, based on lexical heads, is presented. The performances of both models are evaluated on several corpora. @@ -444,7 +444,7 @@ _MASSY) mis librement à la disposition de la communauté scientifique. Ces deux Compréhension Automatique de la Parole et <fixed-case>TAL</fixed-case> : une approche syntaxico-sémantique pour le traitement des inattendus structuraux du français parlé - JérômeGoulian + JérômeGoulian Jean-YvesAntoine FranckPoirier 388–393 @@ -455,7 +455,7 @@ _MASSY) mis librement à la disposition de la communauté scientifique. Ces deux Automatic Item Text Generation in Educational Assessment - CédrickFairon + CédrickFairon David M.Williamson 394–400 We present an automatic text generation system (ATG) developed for the generation of natural language text for automatically produced test items. This ATG has been developed to work with an automatic item generation system for analytical reasoning items for use in tests with high-stakes outcomes (such as college admissions decisions). As such, the development and implementation of this ATG is couched in the context and goals of automated item generation for educational assessment. @@ -491,7 +491,7 @@ _MASSY) mis librement à la disposition de la communauté scientifique. Ces deux Modélisation des liens lexicaux au moyen des fonctions lexicales - AlainPolguère + AlainPolguère 37–60 Ce tutoriel est une introduction à la modélisation lexicographique des liens lexicaux au moyen des fonctions lexicales de la théorie Sens-Texte. Il s’agit donc d’examiner un sous-ensemble des tâches effectuées en lexicographie formelle basée sur la lexicologie explicative et combinatoire. Plutôt que de viser l’introduction de toutes les fonctions lexicales identifiées par la théorie Sens- Texte, je vais m’attacher à introduire la notion de fonction lexicale de façon méthodique, en présentant d’abord les notions linguistiques plus générales sur lesquelles elle s’appuie (lexie, prédicat, actant, dérivation sémantique, collocation, etc.). Ce document vise essentiellement à récapituler les définitions des notions linguistiques qui vont être vues dans le tutoriel de façon pratique, par le biais d’exercices à caractère lexicographique. 2002.jeptalnrecital-tutoriel.2 @@ -561,7 +561,7 @@ _MASSY) mis librement à la disposition de la communauté scientifique. Ces deux Conceptualisation d’un système d’informations lexicales, une interface paramétrable pour le <fixed-case>T</fixed-case>.<fixed-case>A</fixed-case>.<fixed-case>L</fixed-case> - DjaméSeddah + DjaméSeddah EvelyneJacquey 435–444 La nécessité de ressources lexicales normalisées et publiques est avérée dans le domaine du TAL. Cet article vise à montrer comment, sur la base d’une partie du lexique MULTEXT disponible sur le serveur ABU, il serait possible de construire une architecture permettant tout à la fois l’accès aux ressources avec des attentes différentes (lemmatiseur, parseur, extraction d’informations, prédiction, etc.) et la mise à jour par un groupe restreint de ces ressources. Cette mise à jour consistant en l’intégration et la modification, automatique ou manuelle, de données existantes. Pour ce faire, nous cherchons à prendre en compte à la fois les besoins et les données accessibles. Ce modèle est évalué conceptuellement dans un premier temps en fonction des systèmes utilisés dans notre équipe : un analyseur TAG, un constructeur de grammaires TAGs, un extracteur d’information. diff --git a/data/xml/2002.tc.xml b/data/xml/2002.tc.xml index efefb8a0fd..7037505af3 100644 --- a/data/xml/2002.tc.xml +++ b/data/xml/2002.tc.xml @@ -17,7 +17,7 @@ <fixed-case>M</fixed-case>eta<fixed-case>M</fixed-case>orpho: A Pattern-Based Machine Translation System - GáborPrószéky + GáborPrószéky 2002.tc-1.2 proszeky-2002-metamorpho @@ -29,7 +29,7 @@ Can Translation Companies Survive the Current Economic Climate? - ReinhardSchäler + ReinhardSchäler 2002.tc-1.4 schaler-2002-translation diff --git a/data/xml/2002.tmi.xml b/data/xml/2002.tmi.xml index 14a97326f0..b885c38b13 100644 --- a/data/xml/2002.tmi.xml +++ b/data/xml/2002.tmi.xml @@ -18,21 +18,21 @@ Alternation-based lexicon reconstruction - TimothyBaldwin + TimothyBaldwin FrancisBond 2002.tmi-papers.2 baldwin-bond-2002-alternation Corpus-driven splitting of compound words - RalfBrown + RalfBrown 2002.tmi-papers.3 brown-2002-corpus Two experiments in situated <fixed-case>MT</fixed-case> - JimCowie - SergeiNirenburg + JimCowie + SergeiNirenburg 2002.tmi-papers.4 cowie-nirenburg-2002-two @@ -53,14 +53,14 @@ Incremental construction and maintenance of morphological analysers based on augmented letter transducers AliciaGarrido-Alenda - Mikel L.Forcada + Mikel L.Forcada Rafael C.Carrasco 2002.tmi-papers.7 garrido-alenda-etal-2002-incremental Extracting semantic classes and morphosyntactic features for <fixed-case>E</fixed-case>nglish-<fixed-case>P</fixed-case>olish machine translation - BarbaraGawronska + BarbaraGawronska BjörnErlendsson HannaDuczak 2002.tmi-papers.8 @@ -99,11 +99,11 @@ Pronominal anaphora resolution in the <fixed-case>KANTOO</fixed-case> multilingual machine translation system TerukoMitamura - EricNyberg + EricNyberg EnriqueTorrejon DaveSvoboda AnnelenBrunner - KathrynBaker + KathrynBaker 2002.tmi-papers.13 mitamura-etal-2002-pronominal @@ -133,7 +133,7 @@ Challenges in automated elicitation of a controlled bilingual corpus KatharinaProbst - LoriLevin + LoriLevin 2002.tmi-papers.17 probst-levin-2002-challenges @@ -146,8 +146,8 @@ Rapid adaptive development of semantic analysis grammars AliciaTribble - AlonLavie - LoriLevin + AlonLavie + LoriLevin 2002.tmi-papers.19 tribble-etal-2002-rapid @@ -155,7 +155,7 @@ Statistical machine translation based on hierarchical phrase alignment TaroWatanabe KenjiImamura - EiichiroSumita + EiichiroSumita 2002.tmi-papers.20 watanabe-etal-2002-statistical-machine @@ -178,7 +178,7 @@ Example-based machine translation - EiichiroSumita + EiichiroSumita KenjiImamura 2002.tmi-tutorials.1.Presentation.pdf sumita-imamura-2002-example @@ -190,7 +190,7 @@ Translation memories - TimothyBaldwin + TimothyBaldwin 2002.tmi-tutorials.3.Presentation.pdf baldwin-2002-translation @@ -216,13 +216,13 @@ Using multilingual content on the web to build fast finite-state direct translation systems - Mikel L.Forcada + Mikel L.Forcada 2002.tmi-tmiw.3 forcada-2002-using Machine translation in the mobile and wearable age - NigelWard + NigelWard 2002.tmi-tmiw.4 ward-2002-machine @@ -234,18 +234,18 @@ What are we celebrating today? - HaroldSomers + HaroldSomers somers-2002-celebrating Speech related technologies: Where will the field go in 10 years? - Niels OleBernsen + Niels OleBernsen 2002.tmi-tmiw.7 bernsen-2002-speech-related Towards a road map on human language technology: Natural language processing - AndreasEisele + AndreasEisele DorotheaZiegler-Eisele 2002.tmi-tmiw.8 eisele-ziegler-eisele-2002-towards-road diff --git a/data/xml/2003.eamt.xml b/data/xml/2003.eamt.xml index 14723ed2e8..414c371f8a 100644 --- a/data/xml/2003.eamt.xml +++ b/data/xml/2003.eamt.xml @@ -30,7 +30,7 @@ Tuning general translation knowledge to a sublanguage MichaelCarl - PhilippeLanglais + PhilippeLanglais 2003.eamt-1.4 carl-langlais-2003-tuning @@ -46,17 +46,17 @@ Adapting finite-state translation to the <fixed-case>T</fixed-case>rans<fixed-case>T</fixed-case>ype2 project ElsaCubel JorgeGonzález - AntonioLagarda - FranciscoCasacuberta - AlfonsJuan - EnriqueVidal + AntonioLagarda + FranciscoCasacuberta + AlfonsJuan + EnriqueVidal 2003.eamt-1.6 cubel-etal-2003-adapting Using monolingual corpora for statistical machine translation: the <fixed-case>METIS</fixed-case> system YannisDologlou - StellaMarkantonatou + StellaMarkantonatou GeorgeTambouratzis OlgaYannoutsou AthanassiaFourla @@ -72,7 +72,7 @@ Multilingual cataloguing of product information of specific domains: case Mkbeem system - AarnoLehtola + AarnoLehtola JarnoTenni TuulaKäpylä 2003.eamt-1.9 @@ -81,8 +81,8 @@ Diagnostics for interactive controlled language checking TerukoMitamura - KathrynBaker - EricNyberg + KathrynBaker + EricNyberg DavidSvoboda 2003.eamt-1.10 mitamura-etal-2003-diagnostics @@ -102,8 +102,8 @@ Multilingual generation of controlled languages RichardPower - DoniaScott - AnthonyHartley + DoniaScott + AnthonyHartley 2003.eamt-1.13 power-etal-2003-multilingual @@ -116,7 +116,7 @@ A specification and validating parser for simplified technical <fixed-case>S</fixed-case>panish RemediosRuiz Cascales - Richard F. E.Sutcliffe + Richard F. E.Sutcliffe 2003.eamt-1.15 ruiz-cascales-sutcliffe-2003-specification @@ -144,7 +144,7 @@ Mind your language! Controlled language for inference purposes - JanaSukkarieh + JanaSukkarieh 2003.eamt-1.19 sukkarieh-2003-mind @@ -163,7 +163,7 @@ Evaluating specifications for controlled <fixed-case>G</fixed-case>reek MarinaVassiliou - StellaMarkantonatou + StellaMarkantonatou YanisMaistros VangelisKarkaletsis 2003.eamt-1.22 @@ -172,7 +172,7 @@ Menu choice translation: a flexible menu-based controlled natural language system CristinaVertan - Walthervon Hahn + Walthervon Hahn 2003.eamt-1.23 vertan-von-hahn-2003-menu diff --git a/data/xml/2003.jeptalnrecital.xml b/data/xml/2003.jeptalnrecital.xml index f854dd29e0..99cfccbd98 100644 --- a/data/xml/2003.jeptalnrecital.xml +++ b/data/xml/2003.jeptalnrecital.xml @@ -3,7 +3,7 @@ Actes de la 10ème conférence sur le Traitement Automatique des Langues Naturelles. Articles longs - BéatriceDaille + BéatriceDaille EmmanuelMorin ATALA
Batz-sur-Mer, France
@@ -18,8 +18,8 @@ Quand le <fixed-case>TAL</fixed-case> robuste s’attaque au langage parlé : analyse incrémentale pour la compréhension de la parole spontanée Jean-YvesAntoine - JérômeGoulian - JeanneVillaneau + JérômeGoulian + JeanneVillaneau 25–34 Dans cet article, nous discutons de l’application au langage parlé des techniques d’analyse syntaxique robuste développées pour l’écrit. Nous présentons deux systèmes de compréhension de parole spontané en situation de dialogue homme-machine finalisé, dont les performances montrent la pertinence de ces méthodes pour atteindre une compréhension fine et robuste des énoncés oraux. 2003.jeptalnrecital-long.1 @@ -38,7 +38,7 @@ Nouvelle approche de la sélection de vocabulaire pour la détection de thème ArmelleBrun - KamelSmaïli + KamelSmaïli Jean-PaulHaton 45–54 En reconnaissance de la parole, un des moyens d’améliorer les performances des systèmes est de passer par l’adaptation des modèles de langage. Une étape cruciale de ce processus consiste à détecter le thème du document traité et à adapter ensuite le modèle de langage. Dans cet article, nous proposons une nouvelle approche de création des vocabulaires utilisés pour la détection de thème. Cette dernière est fondée sur le développement de vocabulaires spécifiques et caractéristiques des différents thèmes. Nous montrons que cette approche permet non seulement d’améliorer les performances des méthodes, mais exploite également des vocabulaires de taille réduite. De plus, elle permet d’améliorer de façon très significative les performances de méthodes de détection lorsqu’elles sont combinées. @@ -48,7 +48,7 @@ Classification automatique de textes à partir de leur analyse syntaxico-sémantique - JacquesChauché + JacquesChauché ViolainePrince SimonJaillet MaguelonneTeisseire @@ -69,7 +69,7 @@ Une plate-forme de conception et d’exploitation d’une grammaire d’arbres adjoints lexicalisés - BenoîtCrabbé + BenoîtCrabbé BertrandGaiffe AzimRoussanaly 75–84 @@ -81,8 +81,8 @@ Peut-on trouver la taille de contexte optimale en désambiguïsation sémantique? ÉricCrestan - MarcEl-Bèze - ClaudeDe Loupy + MarcEl-Bèze + ClaudeDe Loupy 85–94 Dans la tâche de désambiguïsation sémantique, la détermination de la taille optimale de fenêtre de contexte à utiliser, a fait l’objet de plusieurs études. Dans cet article, nous proposons une approche à deux niveaux pour répondre à cette problématique de manière automatique. Trois systèmes concurrents à base d’arbres de classification sémantique sont, dans un premier temps, utilisés pour déterminer les trois sens les plus vraisemblables d’un mot. Ensuite, un système décisionnel tranche entre ces sens au regard d’un contexte plus étendu. Les améliorations constatées lors d’expériences menées sur les données de SENSEVAL-1 et vérifiées sur les données SENSEVAL-2 sont significatives. 2003.jeptalnrecital-long.7 @@ -138,7 +138,7 @@ Contextual Grammars and Dependency Trees RaduGramatovici - CarlosMartín-Vide + CarlosMartín-Vide 135–144 A new variant of structured contextual grammar, which generates dependency trees, is introduced. The new generative model, called dependency contextual grammar, improves both the strong and weak generative power of contextual grammars, while being a potential candidate for the mathematical description of dependency-based syntactic models. 2003.jeptalnrecital-long.12 @@ -167,7 +167,7 @@ Vers la compréhension automatique de la parole : extraction de concepts par réseaux bayésiens SalmaJamoussi - KamelSmaïli + KamelSmaïli Jean-PaulHaton 165–174 La compréhension automatique de la parole peut être considérée comme un problème d’association entre deux langages différents. En entrée, la requête exprimée en langage naturel et en sortie, juste avant l’étape d’interprétation, la même requête exprimée en terme de concepts. Un concept représente un sens bien déterminé. Il est défini par un ensemble de mots partageant les mêmes propriétés sémantiques. Dans cet article, nous proposons une méthode à base de réseau bayésien pour l’extraction automatique des concepts ainsi que trois approches différentes pour la représentation vectorielle des mots. Ces représentations aident un réseau bayésien à regrouper les mots, construisant ainsi la liste adéquate des concepts à partir d’un corpus d’apprentissage. Nous conclurons cet article par la description d’une étape de post-traitement au cours de laquelle, nous étiquetons nos requêtes et nous générons les commandes SQL appropriées validant ainsi, notre approche de compréhension. @@ -195,7 +195,7 @@ De la traduction probabiliste aux mémoires de traduction (ou l’inverse) - PhilippeLanglais + PhilippeLanglais MichelSimard 195–204 En dépit des travaux réalisés cette dernière décennie dans le cadre général de la traduction probabiliste, nous sommes toujours bien loin du jour où un engin de traduction automatique (probabiliste ou pas) sera capable de répondre pleinement aux besoins d’un traducteur professionnel. Dans une étude récente (Langlais, 2002), nous avons montré comment un engin de traduction probabiliste pouvait bénéficier de ressources terminologiques extérieures. Dans cette étude, nous montrons que les techniques de traduction probabiliste peuvent être utilisées pour extraire des informations sous-phrastiques d’une mémoire de traduction. Ces informations peuvent à leur tour s’avérer utiles à un engin de traduction probabiliste. Nous rapportons des résultats sur un corpus de test de taille importante en utilisant la mémoire de traduction d’un concordancier bilingue commercial. @@ -226,7 +226,7 @@ Apprentissage discriminant pour les Grammaires à Substitution d’Arbres AntoineRozenknop Jean-CédricChappelier - MartinRajman + MartinRajman 225–234 Les grammaires stochastiques standards utilisent des modèles probabilistes de nature générative, fondés sur des probabilités de récriture conditionnées par le symbole récrit. Les expériences montrent qu’elles tendent ainsi par nature à pénaliser les dérivations les plus longues pour une mˆeme entrée, ce qui n’est pas forcément un comportement souhaitable, ni en analyse syntaxique, ni en reconnaissance de la parole. Dans cet article, nous proposons une approche probabiliste non-générative du modèle STSG (grammaire stochastique à substitution d’arbres), selon laquelle les probabilités sont conditionnées par les feuilles des arbres syntaxiques plutˆot que par leur racine, et qui par nature fait appel à un apprentissage discriminant. Plusieurs expériences sur ce modèle sont présentées. 2003.jeptalnrecital-long.21 @@ -248,8 +248,8 @@ <fixed-case>F</fixed-case>rench Amalgam: A machine-learned sentence realization system MartineSmets MichaelGamon - SimonCorston-Oliver - EricRingger + SimonCorston-Oliver + EricRingger 245–254 This paper presents the French implementation of Amalgam, a machine-learned sentence realization system. It presents in some detail two of the machine-learned models employed in Amalgam and shows how linguistic intuition and knowledge can be combined with statistical techniques to improve the performance of the models. 2003.jeptalnrecital-long.23 @@ -266,7 +266,7 @@ Cartographie lexicale pour la recherche d”information - JeanVéronis + JeanVéronis 265–274 Nous décrivons un algorithme, HyperLex, de détermination automatique des différents usages d’un mot dans une base textuelle sans utilisation d’un dictionnaire. Cet algorithme basé sur la détection des composantes de forte densité du graphe des cooccurrences de mots permet, contrairement aux méthodes précédemment proposées (vecteurs de mots), d’isoler des usages très peu fréquents. Il est associé à une technique de représentation graphique permettant à l’utilisateur de naviguer de façon visuelle à travers le lexique et d’explorer les différentes thématiques correspondant aux usages discriminés. 2003.jeptalnrecital-long.25 @@ -286,7 +286,7 @@ Apprentissage de relations morphologiques en corpus - PierreZweigenbaum + PierreZweigenbaum FadilaHadouche NataliaGrabar 285–294 @@ -299,7 +299,7 @@ Actes de la 10ème conférence sur le Traitement Automatique des Langues Naturelles. Posters - BéatriceDaille + BéatriceDaille EmmanuelMorin ATALA
Batz-sur-Mer, France
@@ -343,14 +343,14 @@ Indexation discursive pour la navigation intradocumentaire : cadres temporels et spatiaux dans l’information géographique FrédérikBilhaut - Lydia-MaiHo-Dac + Lydia-MaiHo-Dac AndréeBorillo ThierryCharnois PatriceEnjalbert AnneLe Draoulec YannMathet HélèneMiguet - Marie-PaulePéry-Woodley + Marie-PaulePéry-Woodley LaureSarda 315–320 Cet article concerne la structuration automatique de documents par des méthodes linguistiques. De telles procédures sont rendues nécessaires par les nouvelles tâches de recherche d’information intradocumentaires (systèmes de questions-réponses, navigation sélective dans des documents...). Nous développons une méthode exploitant la théorie de l’encadrement du discours de Charolles, avec une application visée en recherche d’information dans les documents géographiques - d’où l’intérêt tout particulier porté aux cadres spatiaux et temporels. Nous décrivons une implémentation de la méthode de délimitation de ces cadres et son exploitation pour une tâche d’indexation intratextuelle croisant les critères spatiaux et temporels avec des critères thématiques. @@ -371,7 +371,7 @@ Identification automatique des valeurs temporelles dans les textes MarieChagnoux SlimBen Hazez - Jean-PierreDesclés + Jean-PierreDesclés 327–332 Cet article présente une application qui associe un certain nombre de valeurs sémantiques à des segments textuels en vue de proposer un traitement automatique de la temporalité dans les textes. Il s’agit d’automatiser une analyse sémantique de surface à l’aide de règles heuristiques d’exploration contextuelle et d’une base organisée de marqueurs linguistiques. 2003.jeptalnrecital-poster.6 @@ -380,7 +380,7 @@ Structuration automatique de preuves mathématiques : de la logique à la rhétorique - AdilEl Ghali + AdilEl Ghali LaurentRoussarie 333–338 Nous présentons dans ses grandes lignes un modèle de structuration de documents pour la génération automatique de preuves mathématiques. Le modèle prend en entrée des sorties d’un prouveur automatique et vise à produire des textes dont le style s’approche le plus possible des démonstrations rédigées par des humains. Cela implique la mise au point d’une stratégie de planification de document capable de s’écarter de la structure purement logique de la preuve. La solution que nous proposons consiste à intégrer de manière simple des informations de type intentionnel afin d’enrichir la structure rhétorique finale du texte. @@ -390,7 +390,7 @@ <fixed-case>C</fixed-case>o<fixed-case>RR</fixed-case>ec<fixed-case>T</fixed-case> : Démarche coopérative pour l’évaluation de systèmes de reconnaissance de termes - ChantalEnguehard + ChantalEnguehard 339–346 La reconnaissance de termes dans les textes intervient dans de nombreux domaines du Traitement Automatique des Langues Naturelles, qu’il s’agisse d’indexation automatique, de traduction, ou d’extraction de connaissances. Nous présentons une méthodologie d’évaluation de Systèmes de Reconnaissance de Termes (SRT) qui vise à minimiser le temps d’expertise des spécialistes en faisant coopérer des SRT. La méthodologie est mise en oeuvre sur des textes en anglais dans le domaine de la chimie des métaux et à l’aide de deux SRT : FASTR et SYRETE. Le banc de test construit selon cette méthodologie a permis de valider les SRT et d’évaluer leurs performances en termes de rappel et de précision. 2003.jeptalnrecital-poster.8 @@ -417,9 +417,9 @@ Bases de connaissances pour asseoir la crédibilité des réponses d’un système de <fixed-case>Q</fixed-case>/<fixed-case>R</fixed-case> - LaurentGillard - PatriceBellot - MarcEl-Bèze + LaurentGillard + PatriceBellot + MarcEl-Bèze 359–364 Cet article présente un prototype de Question/Réponse (Q/R) impliquant un ensemble de bases de connaissances (BC) dont l’objectif est d’apporter un crédit supplémentaire aux réponses candidates trouvées. Ces BC et leur influence sur la stratégie d’ordonnancement mise en œuvre sont décrites dans le cadre de la participation du système à la campagne Q/R de TREC-2002. 2003.jeptalnrecital-poster.11 @@ -472,7 +472,7 @@ Prototypage rapide et évaluation de modèles de dialogue finalisés - MartinRajman + MartinRajman AndréaRajman FlorianSeydoux AlexTrutnev @@ -485,8 +485,8 @@ Text Tokenization for Knowledge-free Automatic Extraction of Lexical Similarities AristomenisThanopoulos - NikosFakotakis - GeorgeKokkinakis + NikosFakotakis + GeorgeKokkinakis 397–402 Previous studies on automatic extraction of lexical similarities have considered as semantic unit of text the word. However, the theory of contextual lexical semantics implies that larger segments of text, namely non-compositional multiwords, are more appropriate for this role. We experimentally tested the applicability of this notion applying automatic collocation extraction to identify and merge such multiwords prior to the similarity estimation process. Employing an automatic WordNet-based comparative evaluation scheme along with a manual evaluation procedure, we ascertain improvement of the extracted similarity relations. 2003.jeptalnrecital-poster.17 @@ -515,7 +515,7 @@ Actes de la 10ème conférence sur le Traitement Automatique des Langues Naturelles. Tutoriels - BéatriceDaille + BéatriceDaille EmmanuelMorin ATALA
Batz-sur-Mer, France
diff --git a/data/xml/2003.mtsummit.xml b/data/xml/2003.mtsummit.xml index 9fe08a35de..f89acb3833 100644 --- a/data/xml/2003.mtsummit.xml +++ b/data/xml/2003.mtsummit.xml @@ -37,7 +37,7 @@
Have we found the Holy Grail? - HermannNey + HermannNey 2003.mtsummit-plenaries.5.Presentation.pdf ney-2003-found @@ -55,7 +55,7 @@
Holy and unholy grails - EduardHovy + EduardHovy DeepakRavichandran 2003.mtsummit-plenaries.8.Presentation.pdf hovy-ravichandran-2003-holy @@ -72,10 +72,10 @@ Experimental comparison of <fixed-case>MT</fixed-case> evaluation methods: <fixed-case>RED</fixed-case> vs.<fixed-case>BLEU</fixed-case> YasuhiroAkiba - EiichiroSumita + EiichiroSumita HiromiNakaiwa SeiichiYamamoto - Hiroshi G.Okuno + Hiroshi G.Okuno 2003.mtsummit-papers.1 This paper experimentally compares two automatic evaluators, RED and BLEU, to determine how close the evaluation results of each automatic evaluator are to average evaluation results by human evaluators, following the ATR standard of MT evaluation. This paper gives several cautionary remarks intended to prevent MT developers from drawing misleading conclusions when using the automatic evaluators. In addition, this paper reports a way of using the automatic evaluators so that their results agree with those of human evaluators. akiba-etal-2003-experimental @@ -83,7 +83,7 @@ A hybrid approach to deriving selectional preferences ArendseBernth - Michael C.McCord + Michael C.McCord 2003.mtsummit-papers.2 A hybrid approach to automatic derivation of class-based selectional preferences is proposed. A lexicon of selectional preferences can assist in handling several forms of ambiguity, a major problem for MT. The approach combines knowledge-rich parsing and lexicons, with statistics and corpus data. We illustrate the use of a selectional preference lexicon for anaphora resolution. bernth-mccord-2003-hybrid @@ -98,11 +98,11 @@ Reducing boundary friction using translation-fragment overlap - Ralf D.Brown + Ralf D.Brown RebeccaHutchinson - Paul N.Bennett - Jaime G.Carbonell - PeterJansen + Paul N.Bennett + Jaime G.Carbonell + PeterJansen 2003.mtsummit-papers.4 Many corpus-based Machine Translation (MT) systems generate a number of partial translations which are then pieced together rather than immediately producing one overall translation. While this makes them more robust to ill-formed input, they are subject to disfluencies at phrasal translation boundaries even for well-formed input. We address this “boundary friction” problem by introducing a method that exploits overlapping phrasal translations and the increased confidence in translation accuracy they imply. We specify an efficient algorithm for producing translations using overlap. Finally, our empirical analysis indicates that this approach produces higher quality translations than the standard method of combining non-overlapping fragments generated by our Example-Based MT (EBMT) system in a peak-to-peak comparison. brown-etal-2003-reducing @@ -111,7 +111,7 @@ Communicative strategies and patterns of multimodal integration in a speech-to-speech translation system SusanneBurger EricaCostantini - FabioPianesi + FabioPianesi 2003.mtsummit-papers.5 When multilingual communication through a speech-to-speech translation system is supported by multimodal features, e.g. pen-based gestures, the following issues arise concerning the nature of the supported communication: a) to what extend does multilingual communication differ from ‘ordinary’ monolingual communication with respect to the dialogue structure and the communicative strategies used by participants; b) the patterns of integration between speech and gestures. Building on the outcomes of a previous work, we present results from a study aimed at addressing those issues. The initial findings confirm that multilingual communication, and the way in which it is realized by actual systems (e.g., with or without the push-to-talk mode) affects the form and structure of the conversation. burger-etal-2003-communicative @@ -134,7 +134,7 @@ Combining decision trees and transformation-based learning to correct transferred linguistic representations - SimonCorston-Oliver + SimonCorston-Oliver MichaelGamon 2003.mtsummit-papers.8 We approach to correcting features in transferred linguistic representations in machine translation. The hybrid approach combines decision trees and transformation-based learning. Decision trees serve as a filter on the intractably large search space of possible interrelations among features. Transformation-based learning results in a simple set of ordered rules that can be compiled and executed after transfer and before sentence realization in the target language. We measure the reduction in noise in the linguistic representations and the results of human evaluations of end-to-end English-German machine translation. @@ -149,7 +149,7 @@ The limits of n-gram translation evaluation metrics - ChristopherCuly + ChristopherCuly Susanne Z.Riehemann 2003.mtsummit-papers.10 N-gram measures of translation quality, such as BLEU and the related NIST metric, are becoming increasingly important in machine translation, yet their behaviors are not fully understood. In this paper we examine the performance of these metrics on professional human translations into German of two literary genres, the Bible and Tom Sawyer. The most surprising result is that some machine translations outscore some professional human translations. In addition, it can be difficult to distinguish some other human translations from machine translations with only two reference translations; with four reference translations it is much easier. Our results lead us to conclude that much care must be taken in using n-gram measures in formal evaluations of machine translation quality, though they are still valuable as part of the iterative development cycle. @@ -157,7 +157,7 @@ A hybrid approach to word order transfer in the <fixed-case>E</fixed-case>nglish-to-<fixed-case>V</fixed-case>ietnamese machine translation - DinhDien + DinhDien Nguyen Luu ThuyNgan Do XuanQuang Van ChiNam @@ -167,9 +167,9 @@ <fixed-case>BTL</fixed-case>: a hybrid model for <fixed-case>E</fixed-case>nglish-<fixed-case>V</fixed-case>ietnamese machine translation - DinhDien - KiemHoang - EduardHovy + DinhDien + KiemHoang + EduardHovy 2003.mtsummit-papers.12 Machine Translation (MT) is the most interesting and difficult task which has been posed since the beginning of computer history. The highest difficulty which computers had to face with, is the built-in ambiguity of Natural Languages. Formerly, a lot of human-devised rules have been used to disambiguate those ambiguities. Building such a complete rule-set is time-consuming and labor-intensive task whilst it doesn’t cover all the cases. Besides, when the scale of system increases, it is very difficult to control that rule-set. In this paper, we present a new model of learning-based MT (entitled BTL: Bitext-Transfer Learning) that learns from bilingual corpus to extract disambiguating rules. This model has been experimented in English-to-Vietnamese MT system (EVT) and it gave encouraging results. dien-etal-2003-btl @@ -178,15 +178,15 @@ An algorithm for word-level alignment of parallel dependency trees YuanDing DanielGildea - MarthaPalmer + MarthaPalmer 2003.mtsummit-papers.13 Structural divergence presents a challenge to the use of syntax in statistical machine translation. We address this problem with a new algorithm for alignment of loosely matched non-isomorphic dependency trees. The algorithm selectively relaxes the constraints of the two tree structures while keeping computational complexity polynomial in the length of the sentences. Experimentation with a large Chinese-English corpus shows an improvement in alignment results over the unstructured models of (Brown et al., 1993). ding-etal-2003-algorithm Effectiveness of automatic extraction of bilingual collocations using recursive chain-link-type learning - HiroshiEchizen-ya - KenjiAraki + HiroshiEchizen-ya + KenjiAraki YoshioMomouchi KojiTochinai 2003.mtsummit-papers.14 @@ -196,9 +196,9 @@ Statistical machine translation: rapid development with limited resources GeorgeFoster SimonaGandrabur - PhilippeLanglais + PhilippeLanglais PierrePlamondon - GrahamRussell + GrahamRussell MichelSimard 2003.mtsummit-papers.15 We describe an experiment in rapid development of a statistical machine translation (SMT) system from scratch, using limited resources: under this heading we include not only training data, but also computing power, linguistic knowledge, programming effort, and absolute time. @@ -215,7 +215,7 @@ Transliteration considering context information based on the maximum entropy method IsaoGoto - NaotoKato + NaotoKato NoriyoshiUratani TerumasaEhara 2003.mtsummit-papers.17 @@ -232,8 +232,8 @@ Identification of divergence for <fixed-case>E</fixed-case>nglish to <fixed-case>H</fixed-case>indi <fixed-case>EBMT</fixed-case> - DeepaGupta - NiladriChatterjee + DeepaGupta + NiladriChatterjee 2003.mtsummit-papers.19 Divergence is a key aspect of translation between two languages. Divergence occurs when structurally similar sentences of the source language do not translate into sentences that are similar in structures in the target language. Divergence assumes special significance in the domain of Example-Based Machine Translation (EBMT). An EBMT system generates translation of a given sentence by retrieving similar past translation examples from its example base and then adapting them suitably to meet the current translation requirements. Divergence imposes a great challenge to the success of EBMT. The present work provides a technique for identification of divergence without going into the semantic details of the underlying sentences. This identification helps in partitioning the example database into divergence / non-divergence categories, which in turn should facilitate efficient retrieval and adaptation in an EBMT system. gupta-chatterjee-2003-identification @@ -247,9 +247,9 @@ A simple multilingual machine translation system - JanHajič + JanHajič PetrHomola - VladislavKuboň + VladislavKuboň 2003.mtsummit-papers.21 The multilingual machine translation system described in the first part of this paper demonstrates that the translation memory (TM) can be used in a creative way for making the translation process more automatic (in a way which in fact does not depend on the languages used). The MT system is based upon exploitation of syntactic similarities between more or less related natural languages. It currently covers the translation from Czech to Slovak, Polish and Lithuanian. The second part of the paper also shows that one of the most popular TM based commercial systems, TRADOS, can be used not only for the translation itself, but also for a relatively fast and natural method of evaluation of the translation quality of MT systems. hajic-etal-2003-simple @@ -273,7 +273,7 @@ Has machine translation improved? some historical comparisons - JohnHutchins + JohnHutchins 2003.mtsummit-papers.24 The common assertion that MT systems have improved over the last decades is examined by informal comparisons of translations produced by operational systems in the 1960s, 1970s and 1980s and of translations of the same source texts produced by some currently available commercial and online systems. The scarcity of source and target texts for earlier systems means that the conclusions are consequently tentative and preliminary. hutchins-2003-machine @@ -289,7 +289,7 @@ Lexical knowledge representation with contextonyms HyungsukJi SabinePloux - EricWehrli + EricWehrli 2003.mtsummit-papers.26 Inter-word associations like stagger - drunken, or intra-word sense divisions (e.g. write a diary vs. write an article) are difficult to compile using a traditional lexicographic approach. As an alternative, we present a model that reflects this kind of subtle lexical knowledge. Based on the minimal sense of a word (clique), the model (1) selects contextually related words (contexonyms) and (2) classifies them in a multi-dimensional semantic space. Trained on very large corpora, the model provides relevant, organized contexonyms that reflect the fine-grained connotations and contextual usage of the target word, as well as the distinct senses of homonyms and polysemous words. Further study on the neighbor effect showed that the model can handle the data sparseness problem. ji-etal-2003-lexical @@ -306,14 +306,14 @@ Acquisition of bilingual <fixed-case>MT</fixed-case> lexicons from <fixed-case>OCR</fixed-case>ed dictionaries BurcuKaragol-Ayan DavidDoermann - Bonnie J.Dorr + Bonnie J.Dorr 2003.mtsummit-papers.28 This paper describes an approach to analyzing the lexical structure of OCRed bilingual dictionaries to construct resources suited for machine translation of low-density languages, where online resources are limited. A rule-based, an HMM-based, and a post-processed HMM-based method are used for rapid construction of MT lexicons based on systematic structural clues provided in the original dictionary. We evaluate the effectiveness of our techniques, concluding that: (1) the rule-based method performs better with dictionaries where the font is not an important distinguishing feature for determining information types; (2) the post-processed stochastic method improves the results of the stochastic method for phrasal entries; and (3) Our resulting bilingual lexicons are comprehensive enough to provide the basis for reasonable translation results when compared to human translations. karagol-ayan-etal-2003-acquisition Building a parallel corpus for monologues with clause alignment - HidekiKashioka + HidekiKashioka TakehikoMaruyama HidekiTanaka 2003.mtsummit-papers.29 @@ -323,8 +323,8 @@ <fixed-case>FEMTI</fixed-case>: creating and using a framework for <fixed-case>MT</fixed-case> evaluation MargaretKing - AndreiPopescu-Belis - EduardHovy + AndreiPopescu-Belis + EduardHovy 2003.mtsummit-papers.30 This paper presents FEMTI, a web-based Framework for the Evaluation of Machine Translation in ISLE. FEMTI offers structured descriptions of potential user needs, linked to an overview of technical characteristics of MT systems. The description of possible systems is mainly articulated around the quality characteristics for software product set out in ISO/IEC standard 9126. Following the philosophy set out there and in the related 14598 series of standards, each quality characteristic bottoms out in metrics which may be applied to a particular instance of a system in order to judge how satisfactory the system is with respect to that characteristic. An evaluator can use the description of user needs to help identify the specific needs of his evaluation and the relations between them. He can then follow the pointers to system description to determine what metrics should be applied and how. In the current state of the framework, emphasis is on being exhaustive, including as much as possible of the information available in the literature on machine translation evaluation. Future work will aim at being more analytic, looking at characteristics and metrics to see how they relate to one another, validating metrics and investigating the correlation between particular metrics and human judgement. king-etal-2003-femti @@ -341,14 +341,14 @@ A novel string-to-string distance measure with applications to machine translation evaluation GregorLeusch NicolaUeffing - HermannNey + HermannNey 2003.mtsummit-papers.32 We introduce a string-to-string distance measure which extends the edit distance by block transpositions as constant cost edit operation. An algorithm for the calculation of this distance measure in polynomial time is presented. We then demonstrate how this distance measure can be used as an evaluation criterion in machine translation. The correlation between this evaluation criterion and human judgment is systematically compared with that of other automatic evaluation measures on two translation tasks. In general, like other automatic evaluation measures, the criterion shows low correlation at sentence level, but good correlation at system level. leusch-etal-2003-novel Scalability in <fixed-case>MT</fixed-case> systems - ElisabethMaier + ElisabethMaier AnthonyClarke 2003.mtsummit-papers.33 In this paper we show why scalability is one of the most important aspects for the evaluation of Machine Translation (MT) systems and what scalability entails in the framework of MT. We illustrate the issue of scalability by reporting about an MT solution, which has been chosen in the course of a thorough hands-on evaluation and which in the meantime has been developed from a pilot system to a MT turnkey solution for mid-to large-scale enterprises. @@ -357,9 +357,9 @@ Source language diagnostics for <fixed-case>MT</fixed-case> TerukoMitamura - KathrynBaker + KathrynBaker DavidSvoboda - EricNyberg + EricNyberg 2003.mtsummit-papers.34 This paper presents a source language diagnostic system for controlled translation. Diagnostics were designed and implemented to address the most difficult rewrites for authors, based on an empirical analysis of log files containing over 180,000 sentences. The design and implementation of the diagnostic system are presented, along with experimental results from an empirical evaluation of the completed system. We found that the diagnostic system can correctly identify the problem in 90.2% of the cases. In addition, depending on the type of grammar problem, the diagnostic system may offer a rewritten sentence. We found that 89.4% of the rewritten sentences were correctly rewritten. The results suggest that these methods could be used as the basis for an automatic rewriting system in the future. mitamura-etal-2003-source @@ -380,8 +380,8 @@ Rapid-response machine translation for unexpected languages - Douglas W.Oard - Franz JosefOch + Douglas W.Oard + Franz JosefOch 2003.mtsummit-papers.37 Statistical techniques for machine translation offer promise for rapid development in response to unexpected requirements, but realizing that potential requires rapid acquisition of required resources as well. This paper reports the results of experiments with resources collected in ten days; about 1.3 million words of parallel text from five types of sources and a bilingual term list with about 20,000 term pairs. Systems were trained with resources individually and in combination, using an approach based on alignment templates. The use of all available resources was found to yield the best results in an automatic evaluation using the BLEU measure, but a single resource (the Bible) coupled with a small amount of in-domain manual translation (less than 6,000 words) achieved more than 85% of that upper baseline. With a concerted effort, such a system could be built in a single day. oard-och-2003-rapid @@ -404,9 +404,9 @@ On the use of statistical machine-translation techniques within a memory-based translation system (<fixed-case>AMETRA</fixed-case>) DanielOrtíz - IsmaelGarcía-Varea - FranciscoCasacuberta - AntonioLagarda + IsmaelGarcía-Varea + FranciscoCasacuberta + AntonioLagarda JorgeGonzález 2003.mtsummit-papers.40 The goal of the AMETRA project is to make a computer-assisted translation tool from the Spanish language to the Basque language under the memory-based translation framework. The system is based on a large collection of bilingual word-segments. These segments are obtained using linguistic or statistical techniques from a Spanish-Basque bilingual corpus consisting of sentences extracted from the Basque Country’s of£cial government record. One of the tasks within the global information document of the AMETRA project is to study the combination of well-known statistical techniques for the translation of short sequences and techniques for memory-based translation. In this paper, we address the problem of constructing a statistical module to deal with the task of translating segments. The task undertaken in the AMETRA project is compared with other existing translation tasks, This study includes the results of some preliminary experiments we have carried out using well-known statistical machine translation tools and techniques. @@ -414,7 +414,7 @@ An experiment in comparative evaluation: humans vs. computers - AndreiPopescu-Belis + AndreiPopescu-Belis 2003.mtsummit-papers.41 This paper reports results from an experiment that was aimed at comparing evaluation metrics for machine translation. Implemented as a workshop at a major conference in 2002, the experiment defined an evaluation task, description of the metrics, as well as test data consisting of human and machine translations of two texts. Several metrics, either applicable by human judges or automated, were used, and the overall results were analyzed. It appeared that most human metrics and automated metrics provided in general consistent rankings of the various candidate translations; the ranking of the human translations matched the one provided by translation professionals; and human translations were distinguished from machine translations. popescu-belis-2003-experiment @@ -432,7 +432,7 @@ MunpyoHong Sung-KwonChoi Ki-YoungLee - Sang-KyuPark + Sang-KyuPark 2003.mtsummit-papers.43 This paper describes a sentence pattern-based English-Korean machine translation system backed up by a rule-based module as a solution to the translation of long sentences. A rule-based English-Korean MT system typically suffers from low translation accuracy for long sentences due to poor parsing performance. In the proposed method we only use chunking information on the phrase-level of the parse result (i.e. NP, PP, and AP). By applying a sentence pattern directly to a chunking result, the high performance of analysis and a good quality of translation are expected. The parsing efficiency problem in the traditional RBMT approach is resolved by sentence partitioning, which is generally assumed to have many problems. However, we will show that the sentence partitioning has little side effect, if any, in our approach, because we use only the chunking results for the transfer. The coverage problem of a pattern-based method is overcome by applying sentence pattern matching recursively to the sub-sentences of the input sentence, in case there is no exact matching pattern to the input sentence. roh-etal-2003-proper @@ -449,8 +449,8 @@ <fixed-case>SYSTRAN</fixed-case> new generation: the <fixed-case>XML</fixed-case> translation workflow JeanSenellart - ChristianBoitet - LaurentRomary + ChristianBoitet + LaurentRomary 2003.mtsummit-papers.45 Customization of Machine Translation (MT) is a prerequisite for corporations to adopt the technology. It is therefore important but nonetheless challenging. Ongoing implementation proves that XML is an excellent exchange device between MT modules that efficiently enables interaction between the user and the processes to reach highly granulated structure-based customization. Accomplished through an innovative approach called the SYSTRAN Translation Stylesheet, this method is coherent with the current evolution of the “authoring process”. As a natural progression, the next stage in the customization process is the integration of MT in a multilingual tool kit designed for the “authoring process”. senellart-etal-2003-systran @@ -466,9 +466,9 @@ Example-based rough translation for speech-to-speech translation - MitsuoShimohata - EiichiroSumita - YujiMatsumoto + MitsuoShimohata + EiichiroSumita + YujiMatsumoto 2003.mtsummit-papers.47 Example-based machine translation (EBMT) is a promising translation method for speech-to-speech translation (S2ST) because of its robustness. However, it has two problems in that the performance degrades when input sentences are long and when the style of the input sentences and that of the example corpus are different. This paper proposes example-based rough translation to overcome these two problems. The rough translation method relies on “meaning-equivalent sentences,” which share the main meaning with an input sentence despite missing some unimportant information. This method facilitates retrieval of meaning-equivalent sentences for long input sentences. The retrieval of meaning-equivalent sentences is based on content words, modality, and tense. This method also provides robustness against the style differences between the input sentence and the example corpus. shimohata-etal-2003-example @@ -486,7 +486,7 @@ Evaluating commercial spoken language translation software - HaroldSomers + HaroldSomers YuriSugita 2003.mtsummit-papers.49 While spoken language translation remains a research goal, a crude form of it is widely available commercially for Japanese–English as a pipeline concatenation of speech-to-text recognition (SR), text-to-text translation (MT) and text-to-speech synthesis (SS). This paper proposes and illustrates an evaluation methodology for this noisy channel which tries to quantify the relative amount of degradation in translation quality due to each of the contributing modules. A small pilot experiment involving word-accuracy rate for the SR, and a fidelity evaluation for the MT and SS modules is proposed in which subjects are asked to paraphrase translated and/or synthesised sentences from a tourist’s phrasebook. Results show (as expected) that MT is the “noisiest” channel, with SS contributing least noise. The concatenation of the three channels is worse than could be predicted from the performance of each as individual tasks. @@ -495,14 +495,14 @@ Translation selection for <fixed-case>J</fixed-case>apanese-<fixed-case>E</fixed-case>nglish noun-noun compounds TakaakiTanaka - TimothyBaldwin + TimothyBaldwin 2003.mtsummit-papers.50 We present a method for compositionally translating Japanese NN compounds into English, using a word-level transfer dictionary and target language monolingual corpus. The method interpolates over fully-specified and partial translation data, based on corpus evidence. In evaluation, we demonstrate that interpolation over the two data types is superior to using either one, and show that our method performs at an F-score of 0.68 over translation-aligned inputs and 0.66 over a random sample of 500 NN compounds. tanaka-baldwin-2003-translation Evaluation of machine translation and its evaluation - Joseph P.Turian + Joseph P.Turian LukeShen I. DanMelamed 2003.mtsummit-papers.51 @@ -513,20 +513,20 @@ Confidence measures for statistical machine translation NicolaUeffing KlausMacherey - HermannNey + HermannNey 2003.mtsummit-papers.52 In this paper, we present several confidence measures for (statistical) machine translation. We introduce word posterior probabilities for words in the target sentence that can be determined either on a word graph or on an N best list. Two alternative confidence measures that can be calculated on N best lists are proposed. The performance of the measures is evaluated on two different translation tasks: on spontaneously spoken dialogues from the domain of appointment scheduling, and on a collection of technical manuals. ueffing-etal-2003-confidence The <fixed-case>CMU</fixed-case> statistical machine translation system - StephanVogel - YingZhang + StephanVogel + YingZhang FeiHuang AliciaTribble AshishVenugopal BingZhao - AlexWaibel + AlexWaibel 2003.mtsummit-papers.53 In this paper we describe the components of our statistical machine translation system. This system combines phrase-to-phrase translations extracted from a bilingual corpus using different alignment approaches. Special methods to extract and align named entities are used. We show how a manual lexicon can be incorporated into the statistical system in an optimized way. Experiments on Chinese-to-English and Arabic-to-English translation tasks are presented. vogel-etal-2003-cmu @@ -534,7 +534,7 @@ Example-based decoding for statistical machine translation TaroWatanabe - EiichiroSumita + EiichiroSumita 2003.mtsummit-papers.54 This paper presents a decoder for statistical machine translation that can take advantage of the example-based machine translation framework. The decoder presented here is based on the greedy approach to the decoding problem, but the search is initiated from a similar translation extracted from a bilingual corpus. The experiments on multilingual translations showed that the proposed method was far superior to a word-by-word generation beam search algorithm. watanabe-sumita-2003-example @@ -558,7 +558,7 @@ Customizing complex lexical entries for high-quality <fixed-case>MT</fixed-case> - RémiZajac + RémiZajac ElkeLange JinYang 2003.mtsummit-papers.57 @@ -628,7 +628,7 @@ A multi-language translation example browser IsaoGoto - NaotoKato + NaotoKato NoriyoshiUratani TerumasaEhara TadashiKumano @@ -647,7 +647,7 @@ <fixed-case>C</fixed-case>at<fixed-case>V</fixed-case>ar: a database of categorial variations for <fixed-case>E</fixed-case>nglish NizarHabash - BonnieDorr + BonnieDorr 2003.mtsummit-systems.9 We present a new large-scale database called “CatVar” (Habash and Dorr, 2003) which contains categorial variations of English lexemes. Due to the prevalence of cross-language categorial variation in multilingual applications, our categorial-variation resource may serve as an integral part of a diverse range of natural language applications. Thus, the research reported herein overlaps heavily with that of the machine-translation, lexicon-construction, and information-retrieval communities. We demonstrate this database, embedded in a graphical interface; we also show a GUI for user input of corrections to the database. habash-dorr-2003-catvar @@ -682,11 +682,11 @@ An integrated system for source language checking, analysis and term management - EricNyberg + EricNyberg TerukoMitamura DavidSvoboda JeongwooKo - KathrynBaker + KathrynBaker JeffreyMicher 2003.mtsummit-systems.13 This paper presents an overview of the tools provided by KANTOO MT system for controlled source language checking, source text analysis, and terminology management. The steps in each process are described, and screen images are provided to illustrate the system architecture and example tool interfaces. @@ -694,11 +694,11 @@ <fixed-case>MATS</fixed-case> – a glass box machine translation system - AnnaSågvall Hein + AnnaSågvall Hein EvaForsbom PerWeijnitz EbbaGustavii - JörgTiedemann + JörgTiedemann 2003.mtsummit-systems.14 sagvall-hein-etal-2003-mats @@ -712,7 +712,7 @@ <fixed-case>T</fixed-case>rans<fixed-case>T</fixed-case>ype2 - a new paradigm for translation automation - Antonio S.Valderrábanos + Antonio S.Valderrábanos JoséEsteban LuisIraola 2003.mtsummit-systems.16 @@ -721,7 +721,7 @@ Translation of words in context - EricWehrli + EricWehrli 2003.mtsummit-systems.17 TWiC is an on-line word and expression translation syste m which uses a powerful parser to (i) properly identify the relevant lexical units, (ii) retrieve the base form of the selected word and (iii) recognize the presence of a multiword expression (compound, idiom, collocation) the selected word may be part of. The conjunction of state-of-the-art natural language parsing, multiword expression identification and large bilingual databases provides a powerful and effective tool for people who want to read on-line material in a foreign language which they are not completely fluent in. A full prototype version of TWiC has been completed for the English-French pair of languages. wehrli-2003-translation @@ -737,7 +737,7 @@ <fixed-case>MT</fixed-case> customization - RémiZajac + RémiZajac 2003.mtsummit-tutorials.1 zajac-2003-mt @@ -764,20 +764,20 @@ <fixed-case>SMT</fixed-case> – <fixed-case>TIDES</fixed-case> – and all that - StephanVogel + StephanVogel 2003.mtsummit-semit.2.Presentation.pdf vogel-2003-smt The <fixed-case>CMU</fixed-case> <fixed-case>A</fixed-case>rabic-to-<fixed-case>E</fixed-case>nglish statistical <fixed-case>MT</fixed-case> system AliciaTribble - StephanVogel + StephanVogel 2003.mtsummit-semit.3.Presentation.pdf tribble-vogel-2003-cmu Issues in <fixed-case>A</fixed-case>rabic <fixed-case>MT</fixed-case> - AlexFraser + AlexFraser 2003.mtsummit-semit.4.Presentation.pdf fraser-2003-issues @@ -799,7 +799,7 @@ Application of corpus-based techniques to <fixed-case>A</fixed-case>mharic texts - SisayFissaha + SisayFissaha JohannHaller 2003.mtsummit-semit.7 A number of corpus-based techniques have been used in the development of natural language processing application. One area in which these techniques have extensively been applied is lexical development. The current work is being undertaken in the context of a machine translation project in which lexical development activities constitute a significant portion of the overall task. In the first part, we applied corpus-based techniques to the extraction of collocations from Amharic text corpus. Analysis of the output reveals important collocations that can usefully be incorporated in the lexicon. This is especially true for the extraction of idiomatic expressions. The patterns of idiom formation which are observed in a small manually collected data enabled extraction of large set of idioms which otherwise may be difficult or impossible to recognize. Furthermore, preliminary results of other corpus-based techniques, that is, clustering and classification, that are currently being under investigation are presented. The results show that clustering performed no better than the frequency base line whereas classification showed a clear performance improvement over the frequency base line. This in turn suggests the need to carry out further experiments using large sets of data and more contextual information. @@ -807,8 +807,8 @@ Towards semantic composition of <fixed-case>A</fixed-case>rabic: a λ-<fixed-case>DRT</fixed-case> based approach - BassamHaddad - MustafaYaseen + BassamHaddad + MustafaYaseen 2003.mtsummit-semit.8 This paper addresses issues related to employing logic-based semantic composition as a meaning representation for Arabic within a unification-based syntax-semantics interface. Since semantic representation has to be compositional on the level of semantic processing λ-calculus based on Discourse Representation Theory can be utilized as a helpful and practical technique for the semantic construction of ARABIC in Arabic understanding systems. As ARABIC computational linguistics is also short of feature-based compositional syntax-semantics interfaces we hope that this approach might be a further motivation to redirect research to modern semantic construction techniques for developing an adequate model of semantic processing for Arabic and even no existing formal theory is capable to provide a complete and consistent account of all phenomena involved in Arabic semantic processing. haddad-yaseen-2003-towards @@ -864,7 +864,7 @@ A 45-hour computers in translation course - Mikel L.Forcada + Mikel L.Forcada 2003.mtsummit-tttt.2 This paper describes how a 45-hour Computers in Translation course is actually taught to 3rd-year translation students at the University of Alacant; the course described started in year 1995–1996 and has undergone substantial redesign until its present form. It is hoped that this description may be of use to instructors who are forced to teach a similar subject in such as small slot of time and need some design guidelines. forcada-2003-45 @@ -879,8 +879,8 @@ Teaching machine translation in a graduate language technologies program TerukoMitamura - EricNyberg - RobertFrederking + EricNyberg + RobertFrederking 2003.mtsummit-tttt.4 This paper describes a graduate-level machine translation (MT) course taught at the Language Technologies Institute at Carnegie Mellon University. Most of the students in the course have a background in computer science. We discuss what we teach (the course syllabus), and how we teach it (lectures, homeworks, and projects). The course has evolved steadily over the past several years to incorporate refinements in the set of course topics, how they are taught, and how students “learn by doing”. The course syllabus has also evolved in response to changes in the field of MT and the role that MT plays in various social contexts. mitamura-etal-2003-teaching @@ -888,14 +888,14 @@ Teaching the automation of the translation process to future translators BenoîtRobichaud - Marie-ClaudeL’Homme + Marie-ClaudeL’Homme 2003.mtsummit-tttt.5 This paper describes the approach used for introducing CAT tools and MT systems into a course offered in translation curricula at the Université de Montréal (Canada). It focuses on the automation of the translation process and presents various strategies that have been developed to help students progressively acquire the knowledge necessary to understand and undertake the tasks involved in the automation of translation. We begin with very basic principles and techniques, and move towards complex processes of advanced CAT and revision tools, including ultimately MT systems. As we will see, teaching concepts related to MT serves both as a wrap-up for the subjects dealt with during the semester and a way to highlight the tasks involved in the transfer phase of translation. robichaud-lhomme-2003-teaching <fixed-case>P</fixed-case>rolog models of classical approaches to <fixed-case>MT</fixed-case> - HaroldSomers + HaroldSomers 2003.mtsummit-tttt.6 This paper describes a number of “toy” MT systems written in Prolog, designed as programming exercises and illustrations of various approaches to MT. The systems include a dumb word-for-word system, DCG-based “transfer” system, an interlingua-based system with an LFG-like interface structure, a first-generation-like Russian-English system, an interactive system, and an implementation based on early example-based MT. somers-2003-prolog @@ -903,7 +903,7 @@ Specification and evaluation of machine translation toy systems - criteria for laboratory assignments CristinaVertan - Walthervon Hahn + Walthervon Hahn 2003.mtsummit-tttt.7 Implementation of machine translation “toy” systems is a good practical exercise especially for computer science students. Our aim in a series of courses on MT in 2002 was to make students familiar both with typical problems of Machine Translation in particular and natural language processing in general, as well as with software implementation. In order to simulate a software implementation proc- ess as realistic as possible, we introduced more than 20 evaluation criteria to be filled by the students when they evaluated their own products. The criteria go far beyond such “toy” systems, but they should demonstrate the students, what a real software evaluation means, and which are the particularities of Machine Translation Evaluation. vertan-hahn-2003-specification @@ -927,8 +927,8 @@ Evaluation techniques applied to domain tuning of <fixed-case>MT</fixed-case> lexicons - Necip FazilAyan - Bonnie J.Dorr + Necip FazilAyan + Bonnie J.Dorr OkanKolak 2003.mtsummit-eval.1 ayan-etal-2003-evaluation @@ -941,7 +941,7 @@ Pragmatics-based translation and <fixed-case>MT</fixed-case> evaluation - DavidFarwell + DavidFarwell StephenHelmreich 2003.mtsummit-eval.3 In this paper the authors wish to present a view of translation equivalence related to a pragmatics-based approach to machine translation. We will argue that current evaluation methods which assume that there is a predictable correspondence between language forms cannot adequately account for this view. We will then describe a method for objectively determining the relative equivalence of two texts. However, given the need for both an open world assumption and non-monotonic inferencing, such a method cannot be realistically implemented and therefore certain "classic" evaluation strategies will continue to be preferable as practical methods of evaluation. @@ -956,8 +956,8 @@ Granularity in <fixed-case>MT</fixed-case> evaluation - FlorenceReeder - JohnWhite + FlorenceReeder + JohnWhite 2003.mtsummit-eval.5 This paper looks at granularity issues in machine translation evaluation. We start with work by (White, 2001) who examined the correlation between intelligibility and fidelity at the document level. His work showed that intelligibility and fidelity do not correlate well at the document level. These dissimilarities lead to our investigation of evaluation granularity. In particular, we revisit the intelligibility and fidelity relationship at the corpus level. We expect these to support certain assumptions in both evaluations as well as indicate issues germane to future evaluations. reeder-white-2003-granularity @@ -966,7 +966,7 @@ Task-based <fixed-case>MT</fixed-case> evaluation: tackling software, experimental design, & statistical models. CalandraTate SooyonLee - Clare R.Voss + Clare R.Voss 2003.mtsummit-eval.6 Even with recent, renewed attention to MT evaluation—due in part to n-gram-based metrics (Papineni et al., 2001; Doddington, 2002) and the extensive, online catalogue of MT metrics on the ISLE project (Hovy et al., 2001, 2003), few reports involving task-based metrics have surfaced. This paper presents our work on three parts of task-based MT evaluation: (i) software to track and record users' task performance via a browser, run from a desktop computer or remotely over the web, (ii) factorial experimental design with replicate observations to compare the MT engines, based on the accuracy of users' task responses, and (iii) the use of chi-squared and generalized linear models (GLMs) to permit finer-grained data analyses. We report on the experimental results of a six-way document categorization task, used for the evaluation of three Korean-English MT engines. The statistical models of the probabilities of correct responses yield an ordering of the MT engines, with one engine having a statistically significant lead over the other two. Future research will involve testing user performance on linguistically more complex tasks, as well as extending our initial GLMs with the documents' Bleu scores as variables, to test the scores as independent predictors of task results. tate-etal-2003-task diff --git a/data/xml/2003.tc.xml b/data/xml/2003.tc.xml index 279bb3de75..ebcfc6a5d2 100644 --- a/data/xml/2003.tc.xml +++ b/data/xml/2003.tc.xml @@ -53,7 +53,7 @@ Making a Business Case for Localisation - ReinhardSchäler + ReinhardSchäler 2003.tc-1.8 schaler-2003-making diff --git a/data/xml/2004.amta.xml b/data/xml/2004.amta.xml index 962dd06f58..d9670b5d82 100644 --- a/data/xml/2004.amta.xml +++ b/data/xml/2004.amta.xml @@ -53,8 +53,8 @@ A speech-to-speech translation system for <fixed-case>C</fixed-case>atalan, <fixed-case>S</fixed-case>panish, and <fixed-case>E</fixed-case>nglish VictoriaArranz - ElisabetComelles - DavidFarwell + ElisabetComelles + DavidFarwell ClimentNadeu JaumePadrell AlbertFebrer @@ -67,8 +67,8 @@ Multi-Align: combining linguistic and statistical techniques to improve alignments for adaptable <fixed-case>MT</fixed-case> - Necip FazilAyan - BonnieDorr + Necip FazilAyan + BonnieDorr NizarHabash 17-26 https://link.springer.com/chapter/10.1007/978-3-540-30194-3_3 @@ -77,7 +77,7 @@ A modified Burrows-Wheeler transform for highly scalable example-based translation - Ralf D.Brown + Ralf D.Brown 27-36 https://link.springer.com/chapter/10.1007/978-3-540-30194-3_4 The Burrows-Wheeler Transform (BWT) was originally developed for data compression, but can also be applied to indexing text. In this paper, an adaptation of the BWT to word-based indexing of the training corpus for an example-based machine translation (EBMT) system is presented. The adapted BWT embeds the necessary information to retrieve matched training instances without requiring any additional space and can be instantiated in a compressed form which reduces disk space and memory requirements by about 40% while still remaining searchable without decompression. Both the speed advantage from O(log N) lookups compared to the O(N) lookups in the inverted-file index which had previously been used and the structure of the index itself act as enablers for additional capabilities and run-time speed. Because the BWT groups all instances of any n-gram together, it can be used to quickly enumerate the most-frequent n-grams, for which translations can be precomputed and stored, resulting in an order-of-magnitude speedup at run time. @@ -95,7 +95,7 @@ Normalizing <fixed-case>G</fixed-case>erman and <fixed-case>E</fixed-case>nglish inflectional morphology to improve statistical word alignment - SimonCorston-Oliver + SimonCorston-Oliver MichaelGamon 48-57 https://link.springer.com/chapter/10.1007/978-3-540-30194-3_6 @@ -114,8 +114,8 @@ A fluency error categorization scheme to guide automated machine translation evaluation DebbieElliott - AnthonyHartley - EricAtwell + AnthonyHartley + EricAtwell 64-73 https://link.springer.com/chapter/10.1007/978-3-540-30194-3_8 Existing automated MT evaluation methods often require expert human translations. These are produced for every language pair evaluated and, due to this expense, subsequent evaluations tend to rely on the same texts, which do not necessarily reflect real MT use. In contrast, we are designing an automated MT evaluation system, intended for use by post-editors, purchasers and developers, that requires nothing but the raw MT output. Furthermore, our research is based on texts that reflect corporate use of MT. This paper describes our first step in system design: a hierarchical classification scheme of fluency errors in English MT output, to enable us to identify error types and frequencies, and guide the selection of errors for automated detection. We present results from the statistical analysis of 20,000 words of MT output, manually annotated using our classification scheme, and describe correlations between error frequencies and human scores for fluency and adequacy. @@ -132,7 +132,7 @@ Counting, measuring, ordering: translation problems and solutions StephenHelmreich - DavidFarwell + DavidFarwell 86-93 https://link.springer.com/chapter/10.1007/978-3-540-30194-3_10 This paper describes some difficulties associated with the translation of numbers (scalars) used for counting, measuring, or selecting items or properties. A set of problematic issues is described, and the presence of these difficulties is quantified by examining a set of texts and translations. An approach to a solution is suggested. @@ -150,7 +150,7 @@ The <fixed-case>G</fixed-case>eorgetown-<fixed-case>IBM</fixed-case> experiment demonstrated in <fixed-case>J</fixed-case>anuary 1954 - W. JohnHutchins + W. JohnHutchins 102-114 https://link.springer.com/chapter/10.1007/978-3-540-30194-3_12 The public demonstration of a Russian-English machine translation system in New York in January 1954 – a collaboration of IBM and Georgetown University – caused a great deal of public interest and much controversy. Although a small-scale experiment of just 250 words and six ‘grammar’ rules it raised expectations of automatic systems capable of high quality translation in the near future. This paper describes the system, its background, its impact and its implications. @@ -185,7 +185,7 @@ The significance of recall in automatic metrics for <fixed-case>MT</fixed-case> evaluation - AlonLavie + AlonLavie KenjiSagae ShyamsundarJayaraman 134-143 @@ -195,8 +195,8 @@ Alignment of bilingual named entities in parallel corpora using statistical model - Chun-JenLee - Jason S.Chang + Chun-JenLee + Jason S.Chang Thomas C.Chuang 144-153 https://link.springer.com/chapter/10.1007/978-3-540-30194-3_17 @@ -206,7 +206,7 @@ Weather report translation using a translation memory ThomasLeplus - PhilippeLanglais + PhilippeLanglais GuyLapalme 154-163 https://link.springer.com/chapter/10.1007/978-3-540-30194-3_18 @@ -225,8 +225,8 @@ Extraction of name and transliteration in monolingual and parallel corpora TracyLin - Jian-ChengWu - Jason S.Chang + Jian-ChengWu + Jason S.Chang 177-186 https://link.springer.com/chapter/10.1007/978-3-540-30194-3_20 Named-entities in free text represent a challenge to text analysis in Machine Translation and Cross Language Information Retrieval. These phrases are often transliterated into another language with a different sound inventory and writing system. Named-entities found in free text are often not listed in bilingual dictionaries. Although it is possible to identify and translate named-entities on the fly without a list of proper names and transliterations, an extensive list of existing transliterations certainly will ensure high precision rate. We use a seed list of proper names and transliterations to train a Machine Transliteration Model. With the model it is possible to extract proper names and their transliterations in monolingual or parallel corpora with high precision and recall rates. @@ -234,9 +234,9 @@ Error analysis of two types of grammar for the purpose of automatic rule refinement - AriadnaFont Llitjós + AriadnaFont Llitjós KatharinaProbst - JaimeCarbonell + JaimeCarbonell 187-196 https://link.springer.com/chapter/10.1007/978-3-540-30194-3_21 This paper compares a manually written MT grammar and a grammar learned automatically from an English-Spanish elicitation corpus with the ultimate purpose of automatically refining the translation rules. The experiment described here shows that the kind of automatic refinement operations required to correct a translation not only varies depending on the type of error, but also on the type of grammar. This paper describes the two types of grammars and gives a detailed error analysis of their output, indicating what kinds of refinements are required in each case. @@ -265,7 +265,7 @@ A structurally diverse minimal corpus for eliciting structural mappings between languages KatharinaProbst - AlonLavie + AlonLavie 217-226 https://link.springer.com/chapter/10.1007/978-3-540-30194-3_24 We describe an approach to creating a small but diverse corpus in English that can be used to elicit information about any target language. The focus of the corpus is on structural information. The resulting bilingual corpus can then be used for natural language processing tasks such as inferring transfer mappings for Machine Translation. The corpus is sufficiently small that a bilingual user can translate and word-align it within a matter of hours. We describe how the corpus is created and how its structural diversity is ensured. We then argue that it is not necessary to introduce a large amount of redundancy into the corpus. This is shown by creating an increasingly redundant corpus and observing that the information gained converges as redundancy increases. @@ -273,7 +273,7 @@ Investigation of intelligibility judgments - FlorenceReeder + FlorenceReeder 227-235 https://link.springer.com/chapter/10.1007/978-3-540-30194-3_25 This paper describes an intelligibility snap-judgment test. In this exercise, participants are shown a series of human translations and machine translations and are asked to determine whether the author was human or machine. The experiment shows that snap judgments on intelligibility are made successfully and that system rankings on snap judgments are consistent with more detailed intelligibility measures. In addition to demonstrating a quick intelligibility judgment, representing on a few minutes time of each participant, it details the types of errors which led to the snap judgments. @@ -281,16 +281,16 @@ Interlingual annotation for <fixed-case>MT</fixed-case> development - FlorenceReeder - BonnieDorr - DavidFarwell + FlorenceReeder + BonnieDorr + DavidFarwell NizarHabash StephenHelmreich - EduardHovy - LoriLevin + EduardHovy + LoriLevin TerukoMitamura - KeithMiller - OwenRambow + KeithMiller + OwenRambow AdvaithSiddharthan 236-245 https://link.springer.com/chapter/10.1007/978-3-540-30194-3_26 @@ -324,7 +324,7 @@ A super-function based <fixed-case>J</fixed-case>apanese-<fixed-case>C</fixed-case>hinese machine translation system for business users - XinZhao + XinZhao FujiRen StefanVoß 272-281 diff --git a/data/xml/2004.eamt.xml b/data/xml/2004.eamt.xml index 65362d07c2..46ff715e7a 100644 --- a/data/xml/2004.eamt.xml +++ b/data/xml/2004.eamt.xml @@ -27,7 +27,7 @@ Disambiguating translation strategies in <fixed-case>MT</fixed-case> using automatic named entity recognition BogdanBabych - AnthonyHartley + AnthonyHartley 2004.eamt-1.3 babych-hartley-2004-disambiguating @@ -42,8 +42,8 @@ Challenges in using an example-based <fixed-case>MT</fixed-case> system for a transnational digital government project ViolettaCavalli-Sforza - Ralf D.Brown - Jaime G.Carbonell + Ralf D.Brown + Jaime G.Carbonell Peter G.Jansen Jae DongKim 2004.eamt-1.5 @@ -51,7 +51,7 @@ Formal analysis of some aspects of <fixed-case>A</fixed-case>mharic noun phrases - Sisay FissahaAdafre + Sisay FissahaAdafre 2004.eamt-1.6 adafre-2004-formal @@ -79,14 +79,14 @@ Translation memory as a robust example-based translation system GáborHodász TamásGrőbler - BalázsKis + BalázsKis 2004.eamt-1.10 hodasz-etal-2004-translation A translation model for languages of accessing countries PetrHomola - VladislavKubon + VladislavKubon 2004.eamt-1.11 homola-kubon-2004-translation @@ -100,25 +100,25 @@ Towards an automated evaluation of an embedded <fixed-case>MT</fixed-case> system J.Laoudi C.Tate - Clare R.Voss + Clare R.Voss 2004.eamt-1.13 laoudi-etal-2004-towards A trainable transfer-based <fixed-case>MT</fixed-case> approach for languages with limited resources - AlonLavie + AlonLavie KatharinaProbst ErikPeterson - StephanVogel - LoriLevin - AriadnaFont-Llitjos - JaimeCarbonell + StephanVogel + LoriLevin + AriadnaFont-Llitjos + JaimeCarbonell 2004.eamt-1.14 lavie-etal-2004-trainable The <fixed-case>NEMLAR</fixed-case> project on <fixed-case>A</fixed-case>rabic language resources - BenteMaegaard + BenteMaegaard 2004.eamt-1.15 maegaard-2004-nemlar @@ -131,8 +131,8 @@ Moose: a robust high-performance parser and generator - GáborPrószéky - LászlóTihanyi + GáborPrószéky + LászlóTihanyi GáborUgray 2004.eamt-1.17 proszeky-etal-2004-moose diff --git a/data/xml/2004.iwslt.xml b/data/xml/2004.iwslt.xml index 3168437374..56eba1a6eb 100644 --- a/data/xml/2004.iwslt.xml +++ b/data/xml/2004.iwslt.xml @@ -14,29 +14,29 @@ MarcelloFederico NorikoKando HiromiNakaiwa - MichaelPaul - Jun’ichiTsujii + MichaelPaul + Jun’ichiTsujii 2004.iwslt-evaluation.1 akiba-etal-2004-overview <fixed-case>EBMT</fixed-case>, <fixed-case>SMT</fixed-case>, hybrid and more: <fixed-case>ATR</fixed-case> spoken language translation system - EiichiroSumita + EiichiroSumita YasuhiroAkiba TakaoDoi AndrewFinch KenjiImamura HideoOkuma - MichaelPaul - MitsuoShimohata + MichaelPaul + MitsuoShimohata TaroWatanabe 2004.iwslt-evaluation.2 sumita-etal-2004-ebmt Towards fairer evaluations of commercial <fixed-case>MT</fixed-case> systems on basic travel expressions corpora - HerveBlanchon - ChristianBoitet + HerveBlanchon + ChristianBoitet FrancisBrunet-Manquat MutsukoTomokiyo AgnesHamon @@ -57,7 +57,7 @@ Experimenting with phrase-based statistical translation within the <fixed-case>IWSLT</fixed-case> <fixed-case>C</fixed-case>hinese-to-<fixed-case>E</fixed-case>nglish shared translation task - PhilippeLanglais + PhilippeLanglais MichaelCarl OliverStreiter 2004.iwslt-evaluation.5 @@ -66,7 +66,7 @@ <fixed-case>IBM</fixed-case> spoken language translation system evaluation Young-SukLee - SalimRoukos + SalimRoukos 2004.iwslt-evaluation.6 lee-roukos-2004-ibm @@ -96,8 +96,8 @@ EmilEttelaie KevinKnight DanielMarcu - Dragos StefanMunteanu - Franz J.Och + Dragos StefanMunteanu + Franz J.Och IgnacioThayer QuamrulTipu 2004.iwslt-evaluation.9 @@ -106,16 +106,16 @@ The <fixed-case>ISL</fixed-case> <fixed-case>EDTRL</fixed-case> system JuergenReichert - AlexWaibel + AlexWaibel 2004.iwslt-evaluation.10 reichert-waibel-2004-isl The <fixed-case>ISL</fixed-case> statistical translation system for spoken language translation - StephanVogel + StephanVogel SanjikaHewavitharana MuntsinKolss - AlexWaibel + AlexWaibel 2004.iwslt-evaluation.11 vogel-etal-2004-isl @@ -123,7 +123,7 @@ Multi-engine based <fixed-case>C</fixed-case>hinese-to-<fixed-case>E</fixed-case>nglish translation system YuncunZuo YuZhou - ChengqingZong + ChengqingZong 2004.iwslt-evaluation.12 zuo-etal-2004-multi @@ -132,7 +132,7 @@ OliverBender RichardZens EvgenyMatusov - HermannNey + HermannNey 2004.iwslt-evaluation.13 bender-etal-2004-alignment @@ -161,9 +161,9 @@ Spoken dialogue translation systems evaluation: results, new trends, problems and proposals - HerveBlanchon - ChristianBoitet - LaurentBesacier + HerveBlanchon + ChristianBoitet + LaurentBesacier 2004.iwslt-papers.1 blanchon-etal-2004-spoken @@ -178,38 +178,38 @@ Phrase-based alignment combining corpus cooccurrences and linguistic knowledge Adriade Gispert Jose B.Marino - Josep M.Crego + Josep M.Crego 2004.iwslt-papers.3 de-gispert-etal-2004-phrase On feature selection in maximum entropy approach to statistical concept-based speech-to-speech translation LiangGu - YuqingGao + YuqingGao 2004.iwslt-papers.4 gu-gao-2004-feature <fixed-case>P</fixed-case>olyphra<fixed-case>Z</fixed-case>: a tool for the quantitative and subjective evaluation of parallel corpora NajehHajlaoui - ChristianBoitet + ChristianBoitet 2004.iwslt-papers.5 hajlaoui-boitet-2004-polyphraz-tool Toward named entity extraction and translation in spoken language translation FeiHuang - StephanVogel - AlexWaibel + StephanVogel + AlexWaibel 2004.iwslt-papers.6 huang-etal-2004-toward Statistical machine translation of spontaneous speech with scarce resources EvgenyMatusov - MajaPopovic + MajaPopovic RichardZens - HermannNey + HermannNey 2004.iwslt-papers.7 matusov-etal-2004-statistical @@ -219,7 +219,7 @@ KonstantinMarkov TakatoshiJitsuhiro Jin-SongZhang - HirofumiYamamoto + HirofumiYamamoto GenichiroKikui 2004.iwslt-papers.8 nakamura-etal-2004-multi diff --git a/data/xml/2004.jeptalnrecital.xml b/data/xml/2004.jeptalnrecital.xml index af9cbadabd..1e3d8feeb5 100644 --- a/data/xml/2004.jeptalnrecital.xml +++ b/data/xml/2004.jeptalnrecital.xml @@ -20,7 +20,7 @@ Evaluation de méthodes de segmentation thématique linéaire non supervisées après adaptation au français LaurianneSitbon - PatriceBellot + PatriceBellot 1–10 Nous proposons une évaluation de différentes méthodes et outils de segmentation thématique de textes. Nous présentons les outils de segmentation linéaire et non supervisée DotPlotting, Segmenter, C99, TextTiling, ainsi qu’une manière de les adapter et de les tester sur des documents français. Les résultats des tests montrent des différences en performance notables selon les sujets abordés dans les documents, et selon que le nombre de segments à trouver est fixé au préalable par l’utilisateur. Ces travaux font partie du projet Technolangue AGILE-OURAL. 2004.jeptalnrecital-long.1 @@ -117,8 +117,8 @@ Deux premières étapes vers les documents auto-explicatifs - HervéBlanchon - ChristianBoitet + HervéBlanchon + ChristianBoitet 100–109 Dans le cadre du projet LIDIA, nous avons montré que dans de nombreuses situations, la TA Fondée sur le Dialogue (TAFD) pour auteur monolingue peut offrir une meilleure solution en traduction multicible que les aides aux traducteurs, ou la traduction avec révision, même si des langages contrôlés sont utilisés. Nos premières expériences ont mis en évidence le besoin de conserver les « intentions de l’auteur » au moyen « d’annotations de désambiguïsation ». Ces annotations permettent de transformer le document source en un Document Auto-Explicatif (DAE). Nous présentons ici une solution pour intégrer ces annotations dans un document XML et les rendre visibles et utilisables par un lecteur pour une meilleure compréhension du « vrai contenu » du document. Le concept de Document Auto-Explicatif pourrait changer profondément notre façon de comprendre des documents importants ou écrits dans un style complexe. Nous montrerons aussi qu’un DAE, traduit dans une langue cible L, pourrait aussi être transformé, sans interaction humaine, en un DAE en langue L si un analyseur et un désambiguïseur sont disponibles pour cette langue L. Ainsi, un DAE pourrait être utilisé dans un contexte monolingue, mais aussi dans un contexte multilingue sans travail humain additionnel. 2004.jeptalnrecital-long.11 @@ -138,7 +138,7 @@ Extraction de terminologies bilingues à partir de corpus comparables EmmanuelMorin SamuelDufour-Kowalski - BéatriceDaille + BéatriceDaille 120–129 Cet article présente une méthode pour extraire, à partir de corpus comparables d’un domaine de spécialité, un lexique bilingue comportant des termes simples et complexes. Cette méthode extrait d’abord les termes complexes dans chaque langue, puis les aligne à l’aide de méthodes statistiques exploitant le contexte des termes. Après avoir rappelé les difficultés que pose l’alignement des termes complexes et précisé notre approche, nous présentons le processus d’extraction de terminologies bilingues adopté et les ressources utilisées pour nos expérimentations. Enfin, nous évaluons notre approche et démontrons son intérêt en particulier pour l’alignement de termes complexes non compositionnels. 2004.jeptalnrecital-long.13 @@ -147,7 +147,7 @@ Traduction, traduction de mots, traduction de phrases - ÉricWehrli + ÉricWehrli 130–138 Une des conséquences du développement d’Internet et de la globalisation des échanges est le nombre considérable d’individus amenés à consulter des documents en ligne dans une langue autre que la leur. Après avoir montré que ni la traduction automatique, ni les aides terminologiques en ligne ne constituent une réponse pleinement adéquate à ce nouveau besoin, cet article présente un système d’aide à la lecture en langue étrangère basé sur un analyseur syntaxique puissant. Pour un mot sélectionné par l’usager, ce système analyse la phrase entière, de manière (i) à choisir la lecture du mot sélectionné la mieux adaptée au contexte morphosyntaxique et (ii) à identifier une éventuelle expression idiomatique ou une collocation dont le mot serait un élément. Une démonstration de ce système, baptisé TWiC (Translation of words in context “Traduction de mots en contexte”), pourra être présentée. 2004.jeptalnrecital-long.14 @@ -167,7 +167,7 @@ Repérage et exploitation d’énoncés définitoires en corpus pour l’aide à la construction d’ontologie VéroniqueMalaisé - PierreZweigenbaum + PierreZweigenbaum BrunoBachimont 149–158 Pour construire une ontologie, un modéliseur a besoin d’objecter des informations sémantiques sur les termes principaux de son domaine d’étude. Les outils d’exploration de corpus peuvent aider à repérer ces types d’information, et l’identification de couples d’hyperonymes a fait l’objet de plusieurs travaux. Nous proposons d’exploiter des énoncés définitoires pour extraire d’un corpus des informations concernant les trois axes de l’ossature ontologique : l’axe vertical, lié à l’hyperonymie, l’axe horizontal, lié à la co-hyponymie et l’axe transversal, lié aux relations du domaine. Après un rappel des travaux existants en repérage d’énoncés définitoires en TAL, nous développons la méthode que nous avons mise en place, puis nous présentons son évaluation et les premiers résultats obtenus. Leur repérage atteint de 10% à 69% de précision suivant les patrons, celui des unités lexicales varie de 31% à 56%, suivant le référentiel adopté. @@ -209,7 +209,7 @@ Désambiguïsation de corpus monolingues par des approches de type <fixed-case>L</fixed-case>esk FlorentinaVasilescu - PhilippeLanglais + PhilippeLanglais 189–198 Cet article présente une analyse détaillée des facteurs qui déterminent les performances des approches de désambiguïsation dérivées de la méthode de Lesk (1986). Notre étude porte sur une série d’expériences concernant la méthode originelle de Lesk et des variantes que nous avons adaptées aux caractéristiques de WORDNET. Les variantes implémentées ont été évaluées sur le corpus de test de SENSEVAL2, English All Words, ainsi que sur des extraits du corpus SEMCOR. Notre évaluation se base d’un côté, sur le calcul de la précision et du rappel, selon le modèle de SENSEVAL, et d’un autre côté, sur une taxonomie des réponses qui permet de mesurer la prise de risque d’un décideur par rapport à un système de référence. 2004.jeptalnrecital-long.20 @@ -256,7 +256,7 @@ Une mesure de pertinence pour le tri de l’information dans un index de “fin de livre” TouriaAit El Mekki - AdelineNazarenko + AdelineNazarenko 239–248 Nous nous intéressons à la construction des index de fin de livres. Nous avons développé le système IndDoc qui aide la construction de tels index. L’un des enjeux de la construction d’index est la sélection des informations : sélection des entrées les plus pertinentes et des renvois au texte les plus intéressants. Cette sélection est évidemment utile pour le lecteur qui doit trouver suffisamment d’information mais sans en être submergé. Elle est également précieuse pour l’auteur de l’index qui doit valider et corriger une ébauche d’index produite automatiquement par IndDoc. Nous montrons comment cette sélection de l’information est réalisée par IndDoc. Nous proposons une mesure qui permet de trier les entrées par ordre de pertinence décroissante et une méthode pour calculer les renvois au texte à associer à chaque entrée de l’index. 2004.jeptalnrecital-long.25 @@ -277,7 +277,7 @@ Fiabilité de la référence humaine dans la détection de thème ArmelleBrun - KamelSmaïli + KamelSmaïli 259–268 Dans cet article, nous nous intéressons à la tâche de détection de thème dans le cadre de la reconnaissance automatique de la parole. La combinaison de plusieurs méthodes de détection montre ses limites, avec des performances de 93.1 %. Ces performances nous mènent à remetttre en cause le thème de référence des paragraphes de notre corpus. Nous avons ainsi effectué une étude sur la fiabilité de ces références, en utilisant notamment les mesures Kappa et erreur de Bayes. Nous avons ainsi pu montrer que les étiquettes thématiques des paragraphes du corpus de test comportaient vraisemblablement des erreurs, les performances de détection de thème obtenues doivent donc êtres exploitées prudemment. 2004.jeptalnrecital-long.27 @@ -315,8 +315,8 @@ La <fixed-case>FREEBANK</fixed-case> : vers une base libre de corpus annotés SusanneSalmon-Alt - EckhardBick - LaurentRomary + EckhardBick + LaurentRomary Jean-MariePierrel 299–308 Les corpus français librement accessibles annotés à d’autres niveaux linguistiques que morpho-syntaxique sont insuffisants à la fois quantitativement et qualitativement. Partant de ce constat, la FREEBANK – construite sur la base d’outils d’analyse automatique dont la sortie est révisée manuellement – se veut une base de corpus du français annotés à plusieurs niveaux (structurel, morphologique, syntaxique, coréférentiel) et à différents degrés de finesse linguistique qui soit libre d’accès, codée selon des schémas normalisés, intégrant des ressources existantes et ouverte à l’enrichissement progressif. @@ -328,11 +328,11 @@ Annoter en constituants pour évaluer des analyseurs syntaxiques AnneVilnat LauraMonceaux - PatrickParoubek + PatrickParoubek IsabelleRobba VéroniqueGendner GabrielIllouz - MichèleJardino + MichèleJardino 309–318 Cet article présente l’annotation en constituants menée dans le cadre d’un protocole d’évaluation des analyseurs syntaxiques (mis au point dans le pré-projet PEAS, puis dans le projet EASY). Le choix des constituants est décrit en détail et une première évaluation effectuée à partir des résultats de deux analyseurs est donnée. 2004.jeptalnrecital-long.32 @@ -341,7 +341,7 @@ Détermination de contenu dans <fixed-case>GEPHOX</fixed-case> - AdilEl Ghali + AdilEl Ghali 319–328 Le générateur GEPHOX que nous réalisons a pour ambition de produire des textes pour des définition ou preuves mathématiques écrites à l’aide de l’assistant de preuve PHOX. Dans cet article nous nous concentrons sur le module de détermination de contenu ContDet de GEPHOX. Après un aperçu sur l’entrée du générateur, i.e. la preuve formelle et l’ensemble des règles ayant permis de l’obtenir, nous décrivons les base de connaissances du générateur et le fonctionnement de l’algorithme de détermination de contenu. 2004.jeptalnrecital-long.33 @@ -368,7 +368,7 @@ Les Grammaires à Concaténation d’Intervalles (<fixed-case>RCG</fixed-case>) comme formalisme grammatical pour la linguistique - BenoîtSagot + BenoîtSagot PierreBoullier 349–358 Le but de cet article est de montrer pourquoi les Grammaires à Concaténation d’Intervalles (Range Concatenation Grammars, ou RCG) sont un formalisme particulièrement bien adapté à la description du langage naturel. Nous expliquons d’abord que la puissance nécessaire pour décrire le langage naturel est celle de PTIME. Ensuite, parmi les formalismes grammaticaux ayant cette puissance d’expression, nous justifions le choix des RCG. Enfin, après un aperçu de leur définition et de leurs propriétés, nous montrons comment leur utilisation comme grammaires linguistiques permet de traiter des phénomènes syntagmatiques complexes, de réaliser simultanément l’analyse syntaxique et la vérification des diverses contraintes (morphosyntaxiques, sémantique lexicale), et de construire dynamiquement des grammaires linguistiques modulaires. @@ -397,8 +397,8 @@ Mots composés dans les modèles de langue pour la recherche d’information CarmenAlvarez - PhilippeLanglais - Jian-YunNie + PhilippeLanglais + Jian-YunNie 1–6 Une approche classique en recherche d’information (RI) consiste à bâtir une représentation des documents et des requêtes basée sur les mots simples les constituant. L’utilisation de modèles bigrammes a été étudiée, mais les contraintes sur l’ordre et l’adjacence des mots dans ces travaux ne sont pas toujours justifiées pour la recherche d’information. Nous proposons une nouvelle approche basée sur les modèles de langue qui incorporent des affinités lexicales (ALs), c’est à dire des paires non ordonnées de mots qui se trouvent proches dans un texte. Nous décrivons ce modèle et le comparons aux plus traditionnels modèles unigrammes et bigrammes ainsi qu’au modèle vectoriel. 2004.jeptalnrecital-poster.1 @@ -407,7 +407,7 @@ Le Regroupement de Types de Mots et l’Unification d’Occurrences de Mots dans des Catégories grammaticales de mots (Clustering of Word Types and Unification of Word Tokens into Grammatical Word-Classes) - EricAtwell + EricAtwell 7–12 Ce papier discute la Néoposie: l’inférence auto-adaptive de catégories grammaticales de mots de la langue naturelle. L’inférence grammaticale peut être divisée en deux parties : l’inférence de catégories grammaticales de mots et l’inférence de la structure. Nous examinons les éléments de base de l’apprentissage auto-adaptif du marquage des catégories grammaticales, et discutons l’adaptation des trois types principaux de marqueurs des catégories grammaticales à l’inférence auto-adaptive de catégories grammaticales de mots. Des marqueurs statistiques de n-grammes suggèrent une approche de regroupement statistique, mais le regroupement n’aide ni avec les types de mots peu fréquents, ni avec les types de mots nombreux qui peuvent se présenter dans plus d’une catégorie grammaticale. Le marqueur alternatif d’apprentissage basé sur la transformation suggère une approche basée sur la contrainte de l’unification de contextes d’occurrences de mots. Celle-ci présente un moyen de regrouper des mots peu fréquents, et permet aux occurrences différentes d’un seul type de mot d’appartenir à des catégories différentes selon les contextes grammaticaux où ils se présentent. Cependant, la simple unification de contextes d’occurrences de mots produit un nombre incroyablement grand de catégories grammaticales de mots. Nous avons essayé d’unifier plus de catégories en modérant le contexte de la correspondance pour permettre l’unification des catégories de mots aussi bien que des occurrences de mots, mais cela entraîne des unifications fausses. Nous concluons que l’avenir peut être un hybride qui comprend le regroupement de types de mots peu fréquents, l’unification de contextes d’occurrences de mots, et le ‘seeding’ avec une connaissance linguistique limitée. Nous demandons un programme de nouvelles recherches pour développer une valise pour la découverte de la langue naturelle. 2004.jeptalnrecital-poster.2 @@ -438,8 +438,8 @@ Traduction de dialogue: résultats du projet <fixed-case>NESPOLE</fixed-case>! et pistes pour le domaine - HervéBlanchon - LaurentBesacier + HervéBlanchon + LaurentBesacier 25–30 Dans cet article, nous détaillons les résultats de la seconde évaluation du projet européen NESPOLE! auquel nous avons pris part pour le français. Dans ce projet, ainsi que dans ceux qui l’ont précédé, des techniques d’évaluation subjectives — réalisées par des évaluateurs humains — ont été mises en oeuvre. Nous présentons aussi les nouvelles techniques objectives — automatiques — proposées en traduction de l’écrit et mises en oeuvre dans le projet C-STAR III. Nous conclurons en proposant quelques idées et perspectives pour le domaine. 2004.jeptalnrecital-poster.5 @@ -521,7 +521,7 @@ <fixed-case>NLP</fixed-case> Applications Based on<fixed-case>W</fixed-case>eighted<fixed-case>M</fixed-case>ulti-Tape Automata - AndréKempe + AndréKempe 73–78 This article describes two practical applications of weighted multi-tape automata (WMTAs) in Natural Language Processing, that demonstrate the augmented descriptive power of WMTAs compared to weighted 1-tape and 2-tape automata. The two examples concern the preservation of intermediate results in transduction cascades and the search for similar words in two languages. As a basis for these applications, the article proposes a number of operations on WMTAs. Among others, it (re-)defines multi-tape intersection, where a number of tapes of one WMTA are intersected with the same number of tapes of another WMTA. In the proposed approach, multi-tape intersection is not an atomic operation but rather a sequence of more elementary ones, which facilitates its implementation. 2004.jeptalnrecital-poster.13 @@ -559,7 +559,7 @@ Apprentissage collectif et lexique JulienPoudade - PatrickParoubek + PatrickParoubek 97–102 Cet article présente l’influence de la zone de travail que possède une entité logicielle pour lui permettre de prédire l’état futur de son environnement, sur la constitution d’un lexique partagé par les différents membres d’une population, dans le cadre d’une variante “du jeu de désignation” (naming game). 2004.jeptalnrecital-poster.17 @@ -568,7 +568,7 @@ L’outil de traitement de corpus <fixed-case>LIKES</fixed-case> - FrançoisRousselot + FrançoisRousselot 103–112 LIKES (LInguistic and Knowledge Engineering Station) est une station d’ingénierie linguistique destinée à traiter des corpus, elle fonctionne pour l’instant sur la plupart des langues européennes et slaves en utilisant des ressources minimales pour chaque langue. Les corpus sont constitués d’un ou plusieurs textes en ASCII ou en HTML, l’interface donne la possibilité de constituer son corpus et d’y exécuter un certain nombre de tâches allant de simples tâches de découpage en mot, de tri ou de recherche de motifs à des tâches plus complexes d’aide à la synthèse de grammaire, d’aide au repérage de relations, d’aide à la construction d’une terminologie. Nous décrivons ici les principales fonctionnalités de LIKES en rapport avec le traitement des corpus et ce qui fait sa spécificité par rapport à d’autres environnements comparables : l’utilisation minimale de ressources linguistiques. 2004.jeptalnrecital-poster.18 @@ -601,7 +601,7 @@ JoaquimSilva ZornitsaKozareva VeskaNoncheva - GabrielLopes + GabrielLopes 125–130 Named entities and more generally Multiword Lexical Units (MWUs) are important for various applications. However, language independent methods for automatically extracting MWUs do not provide us with clean data. So, in this paper we propose a method for selecting possible named entities from automatically extracted MWUs, and later, a statistics-based language independent unsupervised approach is applied to possible named entities in order to cluster them according to their type. Statistical features used by our clustering process are described and motivated. The Model-Based Clustering Analysis (MBCA) software enabled us to obtain different clusters for proposed named entities. The method was applied to Bulgarian and English. For some clusters, precision is very high; other clusters still need further refinement. Based on the obtained clusters, it is also possible to classify new possible named entities. 2004.jeptalnrecital-poster.21 @@ -630,8 +630,8 @@ Modèle de langage sémantique pour la reconnaissance automatique de parole dans un contexte de traduction QuangVu-minh - LaurentBesacier - HervéBlanchon + LaurentBesacier + HervéBlanchon BrigitteBigi 147–152 Le travail présenté dans cet article a été réalisé dans le cadre d’un projet global de traduction automatique de la parole. L’approche de traduction est fondée sur un langage pivot ou Interchange Format (IF), qui représente le sens de la phrase indépendamment de la langue. Nous proposons une méthode qui intègre des informations sémantiques dans le modèle statistique de langage du système de Reconnaissance Automatique de Parole. Le principe consiste a utiliser certaines classes définies dans l’IF comme des classes sémantiques dans le modèle de langage. Ceci permet au système de reconnaissance de la parole d’analyser partiellement en IF les tours de parole. Les expérimentations realisées montrent qu’avec cette approche, le système de reconnaissance peut analyser directement en IF une partie des données de dialogues de notre application, sans faire appel au système de traduction (35% des mots ; 58% des tours de parole), tout en maintenant le même niveau de performance du système global. @@ -643,8 +643,8 @@ Actes de la 11ème conférence sur le Traitement Automatique des Langues Naturelles. REncontres jeunes Chercheurs en Informatique pour le Traitement Automatique des Langues - FrédéricBéchet - TristanVanrullen + FrédéricBéchet + TristanVanrullen ATALA
Fès, Maroc
April @@ -675,7 +675,7 @@
Indexation automatique de ressources de santé à l’aide d’un vocabulaire contrôlé - AurélieNévéol + AurélieNévéol 21–30 Nous présentons ici le système d’indexation automatique actuellement en cours de développement dans l’équipe CISMeF afin d’aider les documentalistes lors de l’indexation de ressources de santé. Nous détaillons l’architecture du système pour l’extraction de mots clés MeSH, et présentons les résultats d’une première évaluation. La stratégie d’indexation choisie atteint une précision comparable à celle des systèmes existants. De plus, elle permet d’extraire des paires mot clé/qualificatif, et non des termes isolés, ce qui constitue une indexation beaucoup plus fine. Les travaux en cours s’attachent à étendre la couverture des dictionnaires, et des tests à plus grande échelle sont envisagés afin de valider le système et d’évaluer sa valeur ajoutée dans le travail quotidien des documentalistes. 2004.jeptalnrecital-recital.3 @@ -733,8 +733,8 @@ Actes de la 11ème conférence sur le Traitement Automatique des Langues Naturelles. REncontres jeunes Chercheurs en Informatique pour le Traitement Automatique des Langues (Posters) - FrédéricBéchet - TristanVanrullen + FrédéricBéchet + TristanVanrullen ATALA
Fès, Maroc
April @@ -892,7 +892,7 @@
La relation de synonymie en génomique - DavyWeissenbacher + DavyWeissenbacher 97–102 L’accès au contenu des textes de génomique est aujourd’hui un enjeu important. Cela suppose au départ d’identifier les noms d’entités biologiques comme les gènes ou les protéines. Se pose alors la question de la variation de ces noms. Cette question revêt une importance particulière en génomique où les noms de gènes sont soumis à de nombreuses variations, notamment la synonymie. A partir d’une étude de corpus montrant que la synonymie est une relation stable et linguistiquement marquée, cet article propose une modélisation de la synonymie et une méthode d’extraction spécifiquement adaptée à cette relation. Au vu de nos premières expériences, cette méthode semble plus prometteuse que les approches génériques utilisées pour l’extraction de cette relation. 2004.jeptalnrecital-recitalposter.17 diff --git a/data/xml/2004.tc.xml b/data/xml/2004.tc.xml index 9b5eef6bda..68fdf67e83 100644 --- a/data/xml/2004.tc.xml +++ b/data/xml/2004.tc.xml @@ -101,7 +101,7 @@ The Certified Localisation Professional (<fixed-case>CLP</fixed-case>) - ReinhardSchäler + ReinhardSchäler 2004.tc-1.15 schaler-2004-certified diff --git a/data/xml/2004.tmi.xml b/data/xml/2004.tmi.xml index 861dc18da4..cc0c740b1e 100644 --- a/data/xml/2004.tmi.xml +++ b/data/xml/2004.tmi.xml @@ -10,7 +10,7 @@ Rapid prototyping of a transfer-based <fixed-case>H</fixed-case>ebrew-to-<fixed-case>E</fixed-case>nglish machine translation system - AlonLavie + AlonLavie ErikPeterson KatharinaProbst ShulyWintner @@ -25,10 +25,10 @@ Jan ToreLønning ErikVelldal DorotheeBeerman - JohnCarroll - DanFlickinger + JohnCarroll + DanFlickinger LarsHellan - Janne BondiJohannessen + Janne BondiJohannessen PaulMeurer TorbjørnNordgård VictoriaRosén @@ -37,9 +37,9 @@ Comparing rule-based and statistical approaches to speech understanding in a limited domain speech translation system - MannyRayner - PierretteBouillon - Beth AnnHockey + MannyRayner + PierretteBouillon + Beth AnnHockey NikosChatzichrisafis MarianneStarlander 2004.tmi-1.3 @@ -48,7 +48,7 @@ Non-contiguous tree parsing MarkDras - Chung-hyeHan + Chung-hyeHan 2004.tmi-1.4 dras-han-2004-non @@ -74,20 +74,20 @@ A learning approach to improving sentence-level <fixed-case>MT</fixed-case> evaluation AlexKulesza - Stuart M.Shieber + Stuart M.Shieber 2004.tmi-1.8 kulesza-shieber-2004-learning Measuring confidence intervals for the machine translation evaluation metrics - YingZhang - StephanVogel + YingZhang + StephanVogel 2004.tmi-1.9 zhang-vogel-2004-measuring Cross-language algorithms: the progressive conflation of the <fixed-case>MT</fixed-case> and <fixed-case>IR</fixed-case> paradigms - YorickWilks + YorickWilks wilks-2004-cross @@ -99,9 +99,9 @@ Method for retrieving a similar sentence and its application to machine translation - MitsuoShimohata - EiichiroSumita - YujiMatsumoto + MitsuoShimohata + EiichiroSumita + YujiMatsumoto 2004.tmi-1.12 shimohata-etal-2004-method @@ -117,7 +117,7 @@ ArulMenezes BobMoore ChrisQuirk - EricRingger + EricRingger 2004.tmi-1.14 aue-etal-2004-statistical @@ -125,13 +125,13 @@ Cooperative unsupervised training of the part-of-speech taggers in a bidirectional machine translation system FelipeSánchez-Martínez Juan AntonioPérez-Ortiz - Mikel L.Forcada + Mikel L.Forcada 2004.tmi-1.15 sanchez-martinez-etal-2004-cooperative
Latest challenges to <fixed-case>MT</fixed-case> <fixed-case>R</fixed-case>&<fixed-case>D</fixed-case> - HaroldSomers + HaroldSomers somers-2004-latest
diff --git a/data/xml/2005.eamt.xml b/data/xml/2005.eamt.xml index f4f3470dcc..ab90338199 100644 --- a/data/xml/2005.eamt.xml +++ b/data/xml/2005.eamt.xml @@ -11,13 +11,13 @@ Frontmatter - BenteMaegaard + BenteMaegaard 2005.eamt-1.1 maegaard-2005-frontamtter The Language Translation Interface - DominiqueEstival + DominiqueEstival 2005.eamt-1.2 estival-2005-language @@ -45,10 +45,10 @@ Comparison of generation strategies for interactive machine translation OliverBender - SašaHasan + SašaHasan DavidVilar RichardZens - HermannNey + HermannNey 2005.eamt-1.6 bender-etal-2005-comparison @@ -62,10 +62,10 @@
A generic multi-lingual open source platform for limited-domain medical speech translation - PierretteBouillon - MannyRayner + PierretteBouillon + MannyRayner NikosChatzichrisafis - Beth AnnHockey + Beth AnnHockey MarianneSantaholma MarianneStarlander YukieNakao @@ -92,32 +92,32 @@ <fixed-case>P</fixed-case>rague <fixed-case>C</fixed-case>zech-<fixed-case>E</fixed-case>nglish dependency treebank: resource for structure-based <fixed-case>MT</fixed-case> - MartinČmejrek - JanCuřín - JanHajič - JiříHavelka + MartinČmejrek + JanCuřín + JanHajič + JiříHavelka 2005.eamt-1.11 cmejrek-etal-2005-prague An open-source shallow-transfer machine translation engine for the <fixed-case>R</fixed-case>omance languages of <fixed-case>S</fixed-case>pain Antonio M.Corbi-Bellot - Mikel L.Forcada + Mikel L.Forcada SergioOrtíz-Rojas Juan AntonioPérez-Ortiz - GemaRamírez-Sánchez + GemaRamírez-Sánchez FelipeSánchez-Martínez - IñakiAlegria - AingeruMayor - KepaSarasola + IñakiAlegria + AingeruMayor + KepaSarasola 2005.eamt-1.12 corbi-bellot-etal-2005-open A framework for interactive and automatic refinement of transfer-based machine translation - AriadnaFont Llitjós - Jaime G.Carbonell - AlonLavie + AriadnaFont Llitjós + Jaime G.Carbonell + AlonLavie 2005.eamt-1.13 font-llitjos-etal-2005-framework @@ -143,41 +143,41 @@
Clustered language models based on regular expressions for <fixed-case>SMT</fixed-case> - SašaHasan - HermannNey + SašaHasan + HermannNey 2005.eamt-1.17 hasan-ney-2005-clustered Augmenting a statistical translation system with a translation memory SanjikaHewavitharana - StephanVogel - AlexWaibel + StephanVogel + AlexWaibel 2005.eamt-1.18 hewavitharana-etal-2005-augmenting Adaptation of the translation model for statistical machine translation based on information retrieval - Almut SiljaHildebrand + Almut SiljaHildebrand MatthiasEck - StephanVogel - AlexWaibel + StephanVogel + AlexWaibel 2005.eamt-1.19 hildebrand-etal-2005-adaptation Multi-engine machine translation guided by explicit word matching ShyamsundarJayaraman - AlonLavie + AlonLavie 2005.eamt-1.20 jayaraman-lavie-2005-multi Symmetric probabilistic alignment for example-based translation Jae DongKim - Ralf D.Brown - Peter J.Jansen - Jaime G.Carbonell + Ralf D.Brown + Peter J.Jansen + Jaime G.Carbonell 2005.eamt-1.21 kim-etal-2005-symmetric @@ -189,7 +189,7 @@
From the real world to real words: the <fixed-case>METEO</fixed-case> case - PhilippeLanglais + PhilippeLanglais ThomasLeplus SimonaGandrabur GuyLapalme @@ -207,7 +207,7 @@ Efficient statistical machine translation with constrained reordering EvgenyMatusov StephanKanthak - HermannNey + HermannNey 2005.eamt-1.25 matusov-etal-2005-efficient @@ -215,7 +215,7 @@ <fixed-case>T</fixed-case>rans<fixed-case>B</fixed-case>ooster: boosting the performance of wide-coverage machine translation systems BartMellebeek AnnaKhasin - JosefVan Genabith + JosefVan Genabith AndyWay 2005.eamt-1.26 mellebeek-etal-2005-transbooster @@ -224,7 +224,7 @@ Holistic regression testing for high-quality <fixed-case>MT</fixed-case>: some methodological and technological reflections StephanOepen HelgeDyvik - DanFlickinger + DanFlickinger Jan ToreLønning PaulMeurer VictoriaRosén @@ -233,24 +233,24 @@
Building a <fixed-case>WSD</fixed-case> module within an <fixed-case>MT</fixed-case> system to enable interactive resolution in the user’s source language - ConstantinOrasan + ConstantinOrasan TedMarshall RobertClark Le AnHa - RuslanMitkov + RuslanMitkov 2005.eamt-1.28 orasan-etal-2005-building Exploiting phrasal lexica and additional morpho-syntactic language resources for statistical machine translation with scarce training data - MajaPopovic - HermannNey + MajaPopovic + HermannNey 2005.eamt-1.29 popovic-ney-2005-exploiting An approach to machine translation via the rule-to-rule hypothesis - GáborPrószéky + GáborPrószéky 2005.eamt-1.30 proszeky-2005-approach @@ -284,14 +284,14 @@ Application of word-level confidence measures in interactive statistical machine translation NicolaUeffing - HermannNey + HermannNey 2005.eamt-1.35 ueffing-ney-2005-application Considerations in maximum mutual information and minimum classification error training for statistical machine translation AshishVengupol - StephanVogel + StephanVogel 2005.eamt-1.36 vengupol-vogel-2005-considerations @@ -299,7 +299,7 @@ Sentence segmentation using <fixed-case>IBM</fixed-case> word alignment model 1 JiaXu RichardZens - HermannNey + HermannNey 2005.eamt-1.37 xu-etal-2005-sentence
@@ -320,8 +320,8 @@
An efficient phrase-to-phrase alignment model for arbitrarily long phrase and large corpora - YingZhang - StephanVogel + YingZhang + StephanVogel 2005.eamt-1.39 zhang-vogel-2005-efficient diff --git a/data/xml/2005.iwslt.xml b/data/xml/2005.iwslt.xml index c2647bcbae..e766326c33 100644 --- a/data/xml/2005.iwslt.xml +++ b/data/xml/2005.iwslt.xml @@ -19,8 +19,8 @@ A decoding algorithm for word lattice translation in speech translation RuiqiangZhang GenichiroKikui - HirofumiYamamoto - Wai-KitLo + HirofumiYamamoto + Wai-KitLo 2005.iwslt-1.2 zhang-etal-2005-decoding
@@ -28,7 +28,7 @@ Using multiple recognition hypotheses to improve speech translation RuiqiangZhang GenichiroKikui - HirofumiYamamoto + HirofumiYamamoto 2005.iwslt-1.3 zhang-etal-2005-using
@@ -41,12 +41,12 @@
Nobody is perfect: <fixed-case>ATR</fixed-case>’s hybrid approach to spoken language translation - MichaelPaul + MichaelPaul TakaoDoi YoungsookHwang KenjiImamura HideoOkuma - EiichiroSumita + EiichiroSumita 2005.iwslt-1.5 paul-etal-2005-nobody @@ -58,16 +58,16 @@ AlmutSilja MatthiasEck ChioriHori - StephanVogel - AlexWaibel + StephanVogel + AlexWaibel 2005.iwslt-1.6 hewavitharana-etal-2005-cmu
Low Cost Portability for Statistical Machine Translation based on N-gram Frequency and <fixed-case>TF</fixed-case>-<fixed-case>IDF</fixed-case> MatthiasEck - StephanVogel - AlexWaibel + StephanVogel + AlexWaibel 2005.iwslt-1.7 eck-etal-2005-low @@ -116,7 +116,7 @@ The <fixed-case>MIT</fixed-case>-<fixed-case>LL</fixed-case>/<fixed-case>AFRL</fixed-case> <fixed-case>MT</fixed-case> System WadeShen BrianDelaney - TimAnderson + TimAnderson 2005.iwslt-1.13 shen-etal-2005-mit
@@ -127,7 +127,7 @@ ZhenbiaoChen WeiWei BoXu - ChengqingZong + ChengqingZong 2005.iwslt-1.14 pang-etal-2005-casia
@@ -162,7 +162,7 @@ JiaXu EvgenyMatusov RichardZens - HermannNey + HermannNey 2005.iwslt-1.18 xu-etal-2005-integrated
@@ -171,7 +171,7 @@ EvgenyMatusov GregorLeusch OliverBender - HermannNey + HermannNey 2005.iwslt-1.19 matusov-etal-2005-evaluating
@@ -179,12 +179,12 @@ The <fixed-case>RWTH</fixed-case> Phrase-based Statistical Machine Translation System RichardZens OliverBender - SasaHasan + SasaHasan ShahramKhadivi EvgenyMatusov JiaXu YuqiZhang - HermannNey + HermannNey 2005.iwslt-1.20 zens-etal-2005-rwth
@@ -193,7 +193,7 @@ YookyungKim JunHuang YoussefBillawala - DemitriosMaster + DemitriosMaster FarzadEhsani 2005.iwslt-1.21 kim-etal-2005-sehda @@ -207,23 +207,23 @@
Ngram-based versus Phrase-based Statistical Machine Translation - Josep M.Crego - Marta R.Costa-Jussa + Josep M.Crego + Marta R.Costa-Jussa Jose B.Marino - Jose A. R.Fonollosa + Jose A. R.Fonollosa 2005.iwslt-1.23 crego-etal-2005-ngram Tuning a phrase-based statistical translation system for the <fixed-case>IWSLT</fixed-case> 2005 <fixed-case>C</fixed-case>hinese to <fixed-case>E</fixed-case>nglish and <fixed-case>A</fixed-case>rabic to <fixed-case>E</fixed-case>nglish tasks - Marta R.Costa-Jussa - Jose A. R.Fonollosa + Marta R.Costa-Jussa + Jose A. R.Fonollosa 2005.iwslt-1.24 costa-jussa-fonollosa-2005-tuning The <fixed-case>TALP</fixed-case> Ngram-based <fixed-case>SMT</fixed-case> System for <fixed-case>IWSLT</fixed-case>’05 - Josep M.Crego + Josep M.Crego Adriade Gispert Jose B.Marino 2005.iwslt-1.25 @@ -232,7 +232,7 @@ Machine Translation Evaluation Inside <fixed-case>QARLA</fixed-case> EnrikeAmigo - JesusGimenez + JesusGimenez ChioriHori 2005.iwslt-1.26 amigo-etal-2005-machine diff --git a/data/xml/2005.jeptalnrecital.xml b/data/xml/2005.jeptalnrecital.xml index ae12e18afb..cd2fae3ebc 100644 --- a/data/xml/2005.jeptalnrecital.xml +++ b/data/xml/2005.jeptalnrecital.xml @@ -3,7 +3,7 @@ Actes de la 12ème conférence sur le Traitement Automatique des Langues Naturelles. Articles longs - MichèleJardino + MichèleJardino ATALA
Dourdan, France
June @@ -27,7 +27,7 @@ <fixed-case>XMG</fixed-case> : un Compilateur de Méta-Grammaires Extensible DenysDuchier - JosephLe Roux + JosephLe Roux YannickParmentier 11–20 Dans cet article, nous présentons un outil permettant de produire automatiquement des ressources linguistiques, en l’occurence des grammaires. Cet outil se caractérise par son extensibilité, tant du point de vue des formalismes grammaticaux supportés (grammaires d’arbres adjoints et grammaires d’interaction à l’heure actuelle), que de son architecture modulaire, qui facilite l’intégration de nouveaux modules ayant pour but de vérifier la validité des structures produites. En outre, cet outil offre un support adapté au développement de grammaires à portée sémantique. @@ -59,7 +59,7 @@ Recherche en corpus de réponses à des questions définitoires VéroniqueMalaisé ThierryDelbecque - PierreZweigenbaum + PierreZweigenbaum 41–50 Les systèmes de questions-réponses, essentiellement focalisés sur des questions factuelles en domaine ouvert, testent également d’autres tâches, comme le travail en domaine contraint ou la recherche de définitions. Nous nous intéressons ici à la recherche de réponses à des questions « définitoires » portant sur le domaine médical. La recherche de réponses de type définitoire se fait généralement en utilisant deux types de méthodes : celles s’appuyant essentiellement sur le contenu du corpus cible, et celles faisant appel à des connaissances externes. Nous avons choisi de nous limiter au premier de ces deux types de méthodes. Nous présentons une expérience dans laquelle nous réutilisons des patrons de repérage d’énoncés définitoires, conçus pour une autre tâche, pour localiser les réponses potentielles aux questions posées. Nous avons intégré ces patrons dans une chaîne de traitement que nous évaluons sur les questions définitoires et le corpus médical du projet EQueR sur l’évaluation de systèmes de questions-réponses. Cette évaluation montre que, si le rappel reste à améliorer, la « précision » des réponses obtenue (mesurée par la moyenne des inverses de rangs) est honorable. Nous discutons ces résultats et proposons des pistes d’amélioration. 2005.jeptalnrecital-long.5 @@ -78,7 +78,7 @@ Morphosémantique pour l’appariement de termes dans le vocabulaire médical : approche multilingue - FiammettaNamer + FiammettaNamer 61–70 Cet article s’intéresse à la manière dont la morphosémantique peut contribuer à l’appariement multilingue de variantes terminologiques entre termes. L’approche décrite permet de relier automatiquement entre eux les noms et adjectifs composés savants d’un corpus spécialisé en médecine (synonymie, hyponymie, approximation). L’acquisition de relations lexicales est une question particulièrement cruciale lors de l’élaboration de bases de données et de systèmes de recherche d’information multilingues. La méthode est applicable à au moins cinq langues européennes dont elle exploite les caractéristiques morphologiques similaires des mots composés dans les langues de spécialité. Elle consiste en l’intéraction de trois dispositifs : (1) un analyseur morphosémantique monolingue, (2) une table multilingue qui définit des relations de base entre les racines gréco-latines des lexèmes savants, (3) quatre règles indépendantes de la langue qui infèrent, à partir de ces relations de base, les relations lexicales entre les lexèmes contenant ces racines. L’approche décrite est implémentée en français, où l’on dispose d’un analyseur morphologique capable de calculer la définition de mots construits inconnus à partir du sens de ses composants. Le corpus de travail est un lexique spécialisé médical d’environ 29000 lexèmes, que le calcul des relations de synonymie, hyponymie et approximation a permis de regrouper en plus de 3000 familles lexicales. 2005.jeptalnrecital-long.7 @@ -99,7 +99,7 @@ Utilisation de corpus de spécialité pour le filtrage de synonymes de la langue générale NataliaGrabar - PierreZweigenbaum + PierreZweigenbaum 81–90 Les ressources linguistiques les plus facilement disponibles en TAL ressortissent généralement au registre général d’une langue. Lorsqu’elles doivent être utilisées sur des textes de spécialité il peut être utile de les adapter à ces textes. Cet article est consacré à l’adaptation de ressources synonymiques générales à la langue médicale. L’adaptation est obtenue suite à une série de filtrages sur un corpus du domaine. Les synonymes originaux et les synonymes filtrés sont ensuite utilisés comme une des ressources pour la normalisation de variantes de termes dans une tâche de structuration de terminologie. Leurs apports respectifs sont évalués par rapport à la structure terminologique de référence. Cette évaluation montre que les résultats sont globalement encourageants après les filtrages, pour une tâche comme la structuration de terminologies : une amélioration de la précision contre une légère diminution du rappel. 2005.jeptalnrecital-long.9 @@ -119,7 +119,7 @@ Chaînes de traitement syntaxique PierreBoullier LionelClément - BenoîtSagot + BenoîtSagot ÉricVillemonte De La Clergerie 101–110 Cet article expose l’ensemble des outils que nous avons mis en oeuvre pour la campagne EASy d’évaluation d’analyse syntaxique. Nous commençons par un aperçu du lexique morphologique et syntaxique utilisé. Puis nous décrivons brièvement les propriétés de notre chaîne de traitement pré-syntaxique qui permet de gérer des corpus tout-venant. Nous présentons alors les deux systèmes d’analyse que nous avons utilisés, un analyseur TAG issu d’une méta-grammaire et un analyseur LFG. Nous comparons ces deux systèmes en indiquant leurs points communs, comme l’utilisation intensive du partage de calcul et des représentations compactes de l’information, mais également leurs différences, au niveau des formalismes, des grammaires et des analyseurs. Nous décrivons ensuite le processus de post-traitement, qui nous a permis d’extraire de nos analyses les informations demandées par la campagne EASy. Nous terminons par une évaluation quantitative de nos architectures. @@ -182,8 +182,8 @@ Representational and architectural issues in a limited-domain medical speech translator - MannyRayner - PierretteBouillon + MannyRayner + PierretteBouillon MarianneSantaholma YukieNakao 161–170 @@ -242,7 +242,7 @@ Paradocs: un système d’identification automatique de documents parallèles AlexandrePatry - PhilippeLanglais + PhilippeLanglais 221–230 Les corpus parallèles sont d’une importance capitale pour les applications multilingues de traitement automatique des langues. Malheureusement, leur rareté est le maillon faible de plusieurs applications d’intérêt. Extraire de tels corpus duWeb est une solution viable, mais elle introduit une nouvelle problématique : il n’est pas toujours trivial d’identifier les documents parallèles parmi tous ceux qui ont été extraits. Dans cet article, nous nous intéressons à l’identification automatique des paires de documents parallèles contenues dans un corpus bilingue. Nous montrons que cette tâche peut être accomplie avec précision en utilisant un ensemble restreint d’invariants lexicaux. Nous évaluons également notre approche sur une tâche de traduction automatique et montrons qu’elle obtient des résultats supérieurs à un système de référence faisant usage d’un lexique bilingue. 2005.jeptalnrecital-long.23 @@ -255,9 +255,9 @@ NicolaCancedda BrunoCavestro MarcDymetman - EricGaussier - CyrilGoutte - PhilippeLanglais + EricGaussier + CyrilGoutte + PhilippeLanglais ArneMauser KenjiYamada 231–240 @@ -279,7 +279,7 @@ Traduction de termes biomédicaux par inférence de transducteurs VincentClaveau - PierreZweigenbaum + PierreZweigenbaum 251–260 Cet article propose et évalue une méthode de traduction automatique de termes biomédicaux simples du français vers l’anglais et de l’anglais vers le français. Elle repose sur une technique d’apprentissage artificiel supervisée permettant d’inférer des transducteurs à partir d’exemples de couples de termes bilingues ; aucune autre ressource ou connaissance n’est requise. Ces transducteurs, capturant les grandes régularités de traduction existant dans le domaine biomédical, sont ensuite utilisés pour traduire de nouveaux termes français en anglais et vice versa. Les évaluations menées montrent que le taux de bonnes traductions de notre technique se situe entre 52 et 67%. À travers un examen des erreurs les plus courantes, nous identifions quelques limites inhérentes à notre approche et proposons quelques pistes pour les dépasser. Nous envisageons enfin plusieurs extensions à ce travail. 2005.jeptalnrecital-long.26 @@ -307,7 +307,7 @@ Détection automatique d’actes de dialogue par l’utilisation d’indices multiniveaux - SophieRosset + SophieRosset DelphineTribout 281–290 Ces dernières années, il y a eu de nombreux travaux portant sur l’utilisation d’actes de dialogue pour caractériser les dialogues homme-homme ou homme-machine. Cet article fait état de nos travaux sur la détection automatique d’actes de dialogue dans des corpus réels de dialogue homme-homme. Notre travail est fondé essentiellement sur deux hypothèses . (i) la position des mots et la classe sémantique du mot sont plus importants que les mots eux-mêmes pour identifier l’acte de dialogue et (ii) il y a une forte prédictivité dans la succession des actes de dialogues portés sur un même segment dialogique. Une approche de type Memory Based Learning a été utilisée pour la détection automatique des actes de dialogue. Le premier modèle n’utilise pas d’autres informations que celles contenus dans le tour de parole. Dans lex expériences suivantes, des historiques dialogiques de taille variables sont utilisés. Le taux d’erreur de détection d’actes de dialogue est d’environ 16% avec le premier modèle est descend avec une utilisation plus large de l’historique du dialogue à environ 14%. @@ -318,8 +318,8 @@ Comment mesurer la couverture d’une ressource terminologique pour un corpus ? GoritsaNinova - AdelineNazarenko - ThierryHamon + AdelineNazarenko + ThierryHamon SylvieSzulman 291–300 Cet article propose une définition formelle de la notion de couverture lexicale. Celleci repose sur un ensemble de quatre métriques qui donnent une vue globale de l’adéquation d’une ressource lexicale à un corpus et permettent ainsi de guider le choix d’une ressource en fonction d’un corpus donné. Les métriques proposées sont testées dans le contexte de l’analyse de corpus spécialisés en génomique : 5 terminologies différentes sont confrontées à 4 corpus. La combinaison des valeurs obtenues permet de discerner différents types de relations entre ressources et corpus. @@ -367,7 +367,7 @@ Des arbres de dérivation aux forêts de dépendance : un chemin via les forêts partagées - DjaméSeddah + DjaméSeddah BertrandGaiffe 341–350 L’objectif de cet article est de montrer comment bâtir une structure de répresentation proche d’un graphe de dépendance à l’aide des deux structures de représentation canoniques fournies par les Grammaires d’Arbres Adjoints Lexicalisées . Pour illustrer cette approche, nous décrivons comment utiliser ces deux structures à partir d’une forêt partagée. @@ -378,7 +378,7 @@ Evaluation des Modèles de Langage n-gram et n/m-multigram PierreAlain - OlivierBoeffard + OlivierBoeffard 351–360 Cet article présente une évaluation de modèles statistiques du langage menée sur la langue Française. Nous avons cherché à comparer la performance de modèles de langage exotiques par rapport aux modèles plus classiques de n-gramme à horizon fixe. Les expériences réalisées montrent que des modèles de n-gramme à horizon variable peuvent faire baisser de plus de 10% en moyenne la perplexité d’un modèle de n-gramme à horizon fixe. Les modèles de n/m-multigramme demandent une adaptation pour pouvoir être concurrentiels. 2005.jeptalnrecital-long.36 @@ -409,7 +409,7 @@ Actes de la 12ème conférence sur le Traitement Automatique des Langues Naturelles. Articles courts - MichèleJardino + MichèleJardino ATALA
Dourdan, France
June @@ -434,7 +434,7 @@ Application du métalangage de la <fixed-case>BD</fixed-case>éf au traitement formel de la polysémie LucieBarque - AlainPolguère + AlainPolguère 391–396 Cet article a pour objet le métalangage définitionnel de la base de données lexicale BDéf, plus précisément l’utilisation de ce métalangage dans la modélisation des structures polysémiques du français. La Bdéf encode sous forme de définitions lexicographiques les sens lexicaux d’un sous-ensemble représentatif du lexique du français parmi lequel on compte environ 500 unités polysémiques appartenant aux principales parties du discours. L’article comprend deux sections. La première présente le métalangage de la BDéf et le situe par rapport aux différents types de définitions lexicales, qu’elles soient ou non formelles, qu’elles visent ou non l’informatisation. La seconde section présente une application de la BDéf qui vise à terme à rendre compte de la polysémie régulière du français. On y présente, à partir d’un cas spécifique, la notion de patron de polysémie. 2005.jeptalnrecital-court.2 @@ -454,7 +454,7 @@ Un analyseur <fixed-case>LFG</fixed-case> efficace pour le français : <fixed-case>SXLFG</fixed-case> PierreBoullier - BenoîtSagot + BenoîtSagot LionelClément 403–408 Dans cet article, nous proposons un nouvel analyseur syntaxique, qui repose sur une variante du modèle Lexical-Functional Grammars (Grammaires Lexicales Fonctionnelles) ou LFG. Cet analyseur LFG accepte en entrée un treillis de mots et calcule ses structures fonctionnelles sur une forêt partagée. Nous présentons également les différentes techniques de rattrapage d’erreurs que nous avons mises en oeuvre. Puis nous évaluons cet analyseur sur une grammaire à large couverture du français dans le cadre d’une utilisation à grande échelle sur corpus variés. Nous montrons que cet analyseur est à la fois efficace et robuste. @@ -476,9 +476,9 @@ Contextes multilingues alignés pour la désambiguïsation sémantique : une étude expérimentale BoxingChen MeriamHaddara - OlivierKraif - GrégoireMoreau de Montcheuil - MarcEl-Bèze + OlivierKraif + GrégoireMoreau de Montcheuil + MarcEl-Bèze 415–420 Cet article s’intéresse a la désambiguïsation sémantique d’unités lexicales alignées a travers un corpus multilingue. Nous appliquons une méthode automatique non supervisée basée sur la comparaison de réseaux sémantiques, et nous dégageons un critère permettant de déterminer a priori si 2 unités alignées ont une chance de se désambiguïser mutuellement. Enfin, nous développons une méthode fondée sur un apprentissage a partir de contextes bilingues. En appliquant ce critère afin de déterminer pour quelles unités l’information traductionnelle doit être prise en compte, nous obtenons une amélioration des résultats. 2005.jeptalnrecital-court.6 @@ -498,7 +498,7 @@ Projection et monotonie dans un langage de représentation lexico-grammatical - BenoîtCrabbé + BenoîtCrabbé 427–432 Cet article apporte une méthode de développement grammatical pour la réalisation de grammaires d’arbres adjoints (TAG) de taille importante augmentées d’une dimension sémantique. La méthode que nous présentons s’exprime dans un langage informatique de représentation grammatical qui est déclaratif et monotone. Pour arriver au résultat, nous montrons comment tirer parti de la théorie de la projection dans le langage de représentation que nous utilisons. Par conséquent cet article justifie l’utilisation d’un langage monotone pour la représentation lexico-grammaticale. 2005.jeptalnrecital-court.8 @@ -519,7 +519,7 @@ Ritel : un système de dialogue homme-machine à domaine ouvert OlivierGalibert GabrielIllouz - SophieRosset + SophieRosset 439–444 L’objectif du projet RITEL est de réaliser un système de dialogue homme-machine permettant à un utilisateur de poser oralement des questions, et de dialoguer avec un système de recherche d’information généraliste (par exemple, chercher sur l’Internet “Qui est le Président du Sénat ?”) et d’en étudier les potentialités. Actuellement, la plateforme RITEL permet de collecter des corpus de dialogue homme-machine. Les utilisateurs peuvent parfois obtenir une réponse, de type factuel (Q : qui est le président de la France ; R : Jacques Chirac.). Cet article présente brièvement la plateforme développée, le corpus collecté ainsi que les questions que soulèvent un tel système et quelques unes des premières solutions envisagées. 2005.jeptalnrecital-court.10 @@ -539,7 +539,7 @@ Segmentation de textes arabes basée sur l’analyse contextuelle des signes de ponctuations et de certaines particules - LamiaHadrich Belguith + LamiaHadrich Belguith LeilaBaccour MouradGhassan 451–456 @@ -558,7 +558,7 @@ Approches en corpus pour la traduction : le cas <fixed-case>MÉTÉO</fixed-case> - PhilippeLanglais + PhilippeLanglais ThomasLeplus SimonaGandrabur GuyLapalme @@ -579,9 +579,9 @@ Indexation automatique de ressources de santé à l’aide de paires de descripteurs <fixed-case>M</fixed-case>e<fixed-case>SH</fixed-case> - AurélieNévéol + AurélieNévéol AlexandrinaRogozan - StéfanDarmoni + StéfanDarmoni 475–480 Depuis quelques années, médecins et documentalistes doivent faire face à une demande croissante dans le domaine du codage médico-économique et de l’indexation des diverses sources d’information disponibles dans le domaine de la santé. Il est donc nécessaire de développer des outils d’indexation automatique qui réduisent les délais d’indexation et facilitent l’accès aux ressources médicales. Nous proposons deux méthodes d’indexation automatique de ressources de santé à l’aide de paires de descripteurs MeSH. La combinaison de ces deux méthodes permet d’optimiser les résulats en exploitant la complémentarité des approches. Les performances obtenues sont équivalentes à celles des outils de la littérature pour une indexation à l’aide de descripteurs seuls. 2005.jeptalnrecital-court.16 @@ -608,7 +608,7 @@ Les Méta-<fixed-case>RCG</fixed-case>: description et mise en oeuvre - BenoîtSagot + BenoîtSagot 493–498 Nous présentons dans cet article un nouveau formalisme linguistique qui repose sur les Grammaires à Concaténation d’Intervalles (RCG), appelé Méta-RCG. Nous exposons tout d’abord pourquoi la non-linéarité permet une représentation adéquate des phénomènes linguistiques, et en particulier de l’interaction entre les différents niveaux de description. Puis nous présentons les Méta-RCG et les concepts linguistiques supplémentaires qu’elles mettent en oeuvre, tout en restant convertibles en RCG classiques. Nous montrons que les analyses classiques (constituants, dépendances, topologie, sémantique prédicat-arguments) peuvent être obtenues par projection partielle d’une analyse Méta-RCG complète. Enfin, nous décrivons la grammaire du français que nous développons dans ce nouveau formalisme et l’analyseur efficace qui en découle. Nous illustrons alors la notion de projection partielle sur un exemple. 2005.jeptalnrecital-court.19 @@ -631,7 +631,7 @@ Segmentation thématique par chaînes lexicales pondérées LaurianneSitbon - PatriceBellot + PatriceBellot 505–510 Cet article propose une méthode innovante et efficace pour segmenter un texte en parties thématiquement cohérentes, en utilisant des chaînes lexicales pondérées. Les chaînes lexicales sont construites en fonction de hiatus variables, ou bien sans hiatus, ou encore pondérées en fonction de la densité des occurrences du terme dans la chaîne. D’autre part, nous avons constaté que la prise en compte du repérage d’entités nommées dans la chaîne de traitement, du moins sans résolution des anaphores, n’améliore pas significativement les performances. Enfin, la qualité de la segmentation proposée est stable sur différentes thématiques, ce qui montre une indépendance par rapport au type de document. 2005.jeptalnrecital-court.21 @@ -640,7 +640,7 @@ Une plateforme pour l’acquisition, la maintenance et la validation de ressources lexicales - TristanVanrullen + TristanVanrullen PhilippeBlache CristelPortes StéphaneRauzy diff --git a/data/xml/2005.mtsummit.xml b/data/xml/2005.mtsummit.xml index 58016384da..57b6bc822f 100644 --- a/data/xml/2005.mtsummit.xml +++ b/data/xml/2005.mtsummit.xml @@ -10,7 +10,7 @@ Reviewing Back the Past <fixed-case>MT</fixed-case> Summits - MakotoNagao + MakotoNagao nagao-2005-reviewing @@ -35,7 +35,7 @@ One Decade of Statistical Machine Translation: 1996-2005 - HermannNey + HermannNey 2005.mtsummit-invited.5 In the last decade, the statistical approach has found widespread use in machine translation both for written and spoken language and has had a major impact on the translation accuracy. This paper will cover the principles of statistical machine translation and summarize the progress made so far. ney-2005-one @@ -111,7 +111,7 @@ Selection of Entries for a Bilingual Dictionary from Aligned Translation Equivalents using Support Vector Machines TakeshiKutsumi - TakehikoYoshimi + TakehikoYoshimi KatsunoriKotani IchikoSata HitoshiIsahara @@ -124,7 +124,7 @@ Subword Clusters as Light-Weight Interlingua for Multilingual Document Retrieval UdoHahn KornelMarko - StefanSchulz + StefanSchulz 17-24 2005.mtsummit-papers.3 We introduce a light-weight interlingua for a cross-language document retrieval system in the medical domain. It is composed of equivalence classes of semantically primitive, language-specific subwords which are clustered by interlingual and intralingual synonymy. Each subword cluster represents a basic conceptual entity of the language-independent interlingua. Documents, as well as queries, are mapped to this interlingua level on which retrieval operations are performed. Evaluation experiments reveal that this interlingua-based retrieval model outperforms a direct translation approach. @@ -187,7 +187,7 @@ Document Authoring the <fixed-case>B</fixed-case>ible for Minority Language Translation StephenBeale - SergeiNirenburg + SergeiNirenburg MarjorieMcShane TodAllman 63-70 @@ -236,7 +236,7 @@ Semantically Relatable Sets: Building Blocks for Representing Semantics RajatKumar Mohanty AnupamaDutta - PushpakBhattacharyya + PushpakBhattacharyya 101-108 2005.mtsummit-papers.14 kumar-mohanty-etal-2005-semantically @@ -253,14 +253,14 @@ Evaluation of Machine Translation with Predictive Metrics beyond <fixed-case>BLEU</fixed-case>/<fixed-case>NIST</fixed-case>: <fixed-case>CESTA</fixed-case> Evaluation Campaign # 1 SylvainSurcin - OlivierHamon + OlivierHamon AntonyHartley - MartinRajman - AndreiPopescu-Belis - Widad Mustafa ElHadi - IsmaïlTimimi - MarianneDabbadie - KhalidChoukri + MartinRajman + AndreiPopescu-Belis + Widad Mustafa ElHadi + IsmaïlTimimi + MarianneDabbadie + KhalidChoukri 117-124 2005.mtsummit-papers.16 In this paper, we report on the results of a full-size evaluation campaign of various MT systems. This campaign is novel compared to the classical DARPA/NIST MT evaluation campaigns in the sense that French is the target language, and that it includes an experiment of meta-evaluation of various metrics claiming to better predict different attributes of translation quality. We first describe the campaign, its context, its protocol and the data we used. Then we summarise the results obtained by the participating systems and discuss the meta-evaluation of the metrics used. @@ -268,7 +268,7 @@ Inter-rater Agreement Measures, and the Refinement of Metrics in the <fixed-case>PLATO</fixed-case> <fixed-case>MT</fixed-case> Evaluation Paradigm - Keith J.Miller + Keith J.Miller MichelleVanni 125-132 2005.mtsummit-papers.17 @@ -287,9 +287,9 @@ Thot: a Toolkit To Train Phrase-based Statistical Translation Models - DanielOrtiz-Martínez - IsmaelGarcía-Varea - FranciscoCasacuberta + DanielOrtiz-Martínez + IsmaelGarcía-Varea + FranciscoCasacuberta 141-148 2005.mtsummit-papers.19 In this paper, we present the Thot toolkit, a set of tools to train phrase-based models for statistical machine translation, which is publicly available as open source software. The toolkit obtains phrase-based models from word-based alignment models; to our knowledge, this functionality has not been offered by any publicly available toolkit. The Thot toolkit also implements a new way for estimating phrase models, this allows to obtain more complete phrase models than the methods described in the literature, including a segmentation length submodel. The toolkit output can be given in different formats in order to be used by other statistical machine translation tools like Pharaoh, which is a beam search decoder for phrase-based alignment models which was used in order to perform translation experiments with the generated models. Additionally, the Thot toolkit can be used to obtain the best alignment between a sentence pair at phrase level. @@ -297,7 +297,7 @@ Machine Translation of Bi-lingual <fixed-case>H</fixed-case>indi-<fixed-case>E</fixed-case>nglish (<fixed-case>H</fixed-case>inglish) Text - R. Mahesh K.Sinha + R. Mahesh K.Sinha AnilThakur 149-156 2005.mtsummit-papers.20 @@ -316,7 +316,7 @@ <fixed-case>SEM</fixed-case>-<fixed-case>I</fixed-case> Rational <fixed-case>MT</fixed-case>: Enriching Deep Grammars with a Semantic Interface for Scalable Machine Translation - DanFlickinger + DanFlickinger Jan ToreLønning HelgeDyvik StephanOepen @@ -328,8 +328,8 @@ <fixed-case>DEMOCRAT</fixed-case>: Deciding between Multiple Outputs Created by Automatic Translation - Mennovan Zaanen - HaroldSomers + Mennovan Zaanen + HaroldSomers 173-180 2005.mtsummit-papers.23 van-zaanen-somers-2005-democrat @@ -337,12 +337,12 @@ Customizing a <fixed-case>K</fixed-case>orean-<fixed-case>E</fixed-case>nglish <fixed-case>MT</fixed-case> System for Patent Translation MunpyoHong - Young-GilKim - Chang-HyunKim + Young-GilKim + Chang-HyunKim Seong-IlYang Young-AeSeo CheolRyu - Sang-KyuPark + Sang-KyuPark 181-187 2005.mtsummit-papers.24 This paper addresses a customization process of a Korean-English MT system for patent translation. The major customization steps include terminology construction, linguistic study, and the modification of the existing analysis and generation-module. T o our knowledge, this is the first worth-mentioning large-scale customization effort of an MT system for Korean and English. This research was performed under the auspices of the MIC (Ministry of Information and Communication) of Korean government. A prototype patent MT system for electronics domain was installed and is being tested in the Korean Intellectual Property Office. @@ -351,11 +351,11 @@ Practicing Controlled Language through a Help System integrated into the Medical Speech Translation System (<fixed-case>M</fixed-case>ed<fixed-case>SLT</fixed-case>) MarianneStarlander - PierretteBouillon + PierretteBouillon NikosChatzichrisafis MarianneSantaholma - MannyRayner - Beth AnnHockey + MannyRayner + Beth AnnHockey HitoshiIsahara KyokoKanzaki YukieNakao @@ -367,8 +367,8 @@ The <fixed-case>FAME</fixed-case> Speech-to-Speech Translation System for <fixed-case>C</fixed-case>atalan, <fixed-case>E</fixed-case>nglish, and <fixed-case>S</fixed-case>panish VictoriaArranz - ElisabetComelles - DavidFarwell + ElisabetComelles + DavidFarwell 195-202 2005.mtsummit-papers.26 This paper describes the evaluation of the FAME interlingua-based speech-to-speech translation system for Catalan, English and Spanish. This system is an extension of the already existing NESPOLE! that translates between English, French, German and Italian. This article begins with a brief introduction followed by a description of the system architecture and the components of the translation module including the Speech Recognizer, the analysis chain, the generation chain and the Speech Synthesizer. Then we explain the interlingua formalism used, called Interchange Format (IF). We show the results obtained from the evaluation of the system and we describe the three types of evaluation done. We also compare the results of our system with those obtained by a stochastic translator which has been independently developed over the course of the FAME project. Finally, we conclude with future work. @@ -397,8 +397,8 @@ Probabilistic Model for Example-based Machine Translation EijiAramaki SadaoKurohashi - HidekiKashioka - NaotoKato + HidekiKashioka + NaotoKato 219-226 2005.mtsummit-papers.29 Example-based machine translation (EBMT) systems, so far, rely on heuristic measures in retrieving translation examples. Such a heuristic measure costs time to adjust, and might make its algorithm unclear. This paper presents a probabilistic model for EBMT. Under the proposed model, the system searches the translation example combination which has the highest probability. The proposed model clearly formalizes EBMT process. In addition, the model can naturally incorporate the context similarity of translation examples. The experimental results demonstrate that the proposed model has a slightly better translation quality than state-of-the-art EBMT systems. @@ -407,8 +407,8 @@ Low Cost Portability for Statistical Machine Translation based on N-gram Coverage MatthiasEck - StephanVogel - AlexWaibel + StephanVogel + AlexWaibel 227-234 2005.mtsummit-papers.30 Statistical machine translation relies heavily on the available training data. However, in some cases, it is necessary to limit the amount of training data that can be created for or actually used by the systems. To solve that problem, we introduce a weighting scheme that tries to select more informative sentences first. This selection is based on the previously unseen n-grams the sentences contain, and it allows us to sort the sentences according to their estimated importance. After sorting, we can construct smaller training corpora, and we are able to demonstrate that systems trained on much less training data show a very competitive performance compared to baseline systems using all available training data. @@ -437,7 +437,7 @@ <fixed-case>PESA</fixed-case>: Phrase Pair Extraction as Sentence Splitting - StephanVogel + StephanVogel 251-258 2005.mtsummit-papers.33 Most statistical machine translation systems use phrase-to-phrase translations to capture local context information, leading to better lexical choice and more reliable local reordering. The quality of the phrase alignment is crucial to the quality of the resulting translations. Here, we propose a new phrase alignment method, not based on the Viterbi path of word alignment models. Phrase alignment is viewed as a sentence splitting task. For a given spitting of the source sentence (source phrase, left segment, right segment) find a splitting for the target sentence, which optimizes the overall sentence alignment probability. Experiments on different translation tasks show that this phrase alignment method leads to highly competitive translation results. @@ -447,9 +447,9 @@ Statistical Machine Translation of <fixed-case>E</fixed-case>uropean Parliamentary Speeches DavidVilar EvgenyMatusov - SasaHasan + SasaHasan RichardZens - HermannNey + HermannNey 259-266 2005.mtsummit-papers.34 In this paper we present the ongoing work at RWTH Aachen University for building a speech-to-speech translation system within the TC-Star project. The corpus we work on consists of parliamentary speeches held in the European Plenary Sessions. To our knowledge, this is the first project that focuses on speech-to-speech translation applied to a real-life task. We describe the statistical approach used in the development of our system and analyze its performance under different conditions: dealing with syntactically correct input, dealing with the exact transcription of speech and dealing with the (noisy) output of an automatic speech recognition system. Experimental results show that our system is able to perform adequately in each of these conditions. @@ -459,7 +459,7 @@ Practical Approach to Syntax-based Statistical Machine Translation KenjiImamura HideoOkuma - EiichiroSumita + EiichiroSumita 267-274 2005.mtsummit-papers.35 This paper presents a practical approach to statistical machine translation (SMT) based on syntactic transfer. Conventionally, phrase-based SMT generates an output sentence by combining phrase (multiword sequence) translation and phrase reordering without syntax. On the other hand, SMT based on tree-to-tree mapping, which involves syntactic information, is theoretical, so its features remain unclear from the viewpoint of a practical system. The SMT proposed in this paper translates phrases with hierarchical reordering based on the bilingual parse tree. In our experiments, the best translation was obtained when both phrases and syntactic information were used for the translation process. @@ -467,12 +467,12 @@ Bilingual N-gram Statistical Machine Translation - José B.Mariño - Rafael E.Banchs - Josep M.Crego - Adriàde Gispert + José B.Mariño + Rafael E.Banchs + Josep M.Crego + Adriàde Gispert PatrikLambert - José A. R.Fonollosa + José A. R.Fonollosa MartaRuiz 275-282 2005.mtsummit-papers.36 @@ -481,9 +481,9 @@ Reordered Search, and Tuple Unfolding for Ngram-based <fixed-case>SMT</fixed-case> - Josep M.Crego - José B.Mariño - Adriàde Gispert + Josep M.Crego + José B.Mariño + Adriàde Gispert 283-289 2005.mtsummit-papers.37 In Statistical Machine Translation, the use of reordering for certain language pairs can produce a significant improvement on translation accuracy. However, the search problem is shown to be NP-hard when arbitrary reorderings are allowed. This paper addresses the question of reordering for an Ngram-based SMT approach following two complementary strategies, namely reordered search and tuple unfolding. These strategies interact to improve translation quality in a Chinese to English task. On the one hand, we allow for an Ngram-based decoder (MARIE) to perform a reordered search over the source sentence, while combining a translation tuples Ngram model, a target language model, a word penalty and a word distance model. Interestingly, even though the translation units are learnt sequentially, its reordered search produces an improved translation. On the other hand, we allow for a modification of the translation units that unfolds the tuples, so that shorter units are learnt from a new parallel corpus, where the source sentences are reordered according to the target language. This tuple unfolding technique reduces data sparseness and, when combined with the reordered search, further boosts translation performance. Translation accuracy and efficency results are reported for the IWSLT 2004 Chinese to English task. @@ -494,7 +494,7 @@ BartMellebeek AnnaKhasin KarolinaOwczarzak - JosefVan Genabith + JosefVan Genabith AndyWay 290-297 2005.mtsummit-papers.38 @@ -568,7 +568,7 @@ Divergence Patterns in Machine Translation between <fixed-case>H</fixed-case>indi and <fixed-case>E</fixed-case>nglish - R. Mahesh K.Sinha + R. Mahesh K.Sinha AnilThakur 346-353 2005.mtsummit-posters.4 @@ -587,7 +587,7 @@ Handling ki in <fixed-case>H</fixed-case>indi for <fixed-case>H</fixed-case>indi-<fixed-case>E</fixed-case>nglish <fixed-case>MT</fixed-case> - R. Mahesh K.Sinha + R. Mahesh K.Sinha AnilThakur 356-353 2005.mtsummit-posters.6 @@ -607,8 +607,8 @@ A Phrasal <fixed-case>EBMT</fixed-case> System for Translating <fixed-case>E</fixed-case>nglish to <fixed-case>B</fixed-case>engali - Sudip KumarNaskar - SivajiBandyopadhyay + Sudip KumarNaskar + SivajiBandyopadhyay 372-379 2005.mtsummit-posters.8 The present work describes a Phrasal Example Based Machine Translation system from English to Bengali that identifies the phrases in the input through a shallow analysis, retrieves the target phrases using a Phrasal Example base and finally combines the target language phrases employing some heuristics based on the phrase ordering rules for Bengali. The paper focuses on the structure of the noun, verb and prepositional phrases in English and how these phrases are realized in Bengali. This study has an effect on the design of the phrasal Example Base and recombination rules for the target language phrases. @@ -616,9 +616,9 @@ An <fixed-case>MT</fixed-case> System Recycled - OndřejBojar + OndřejBojar PetrHomola - VladislavKuboň + VladislavKuboň 380-387 2005.mtsummit-posters.9 This paper describes an attempt to recycle parts of the Czech-to-Russian machine translation system (MT) in the new Czech-to-English MT system. The paper describes the overall architecture of the new system and the details of the modules which have been added. A special attention is paid to the problem of named entity recognition and to the method of automatic acquisition of lexico-syntactic information for the bilingual dictionary of the system. @@ -627,8 +627,8 @@ Semi-Automated Elicitation Corpus Generation AlisonAlvarez - LoriLevin - RobertFrederking + LoriLevin + RobertFrederking ErikPeterson JeffGood 388-395 @@ -639,7 +639,7 @@ Data Inferred Multi-word Expressions for Statistical Machine Translation PatrickLambert - RafaelBanchs + RafaelBanchs 396-403 2005.mtsummit-posters.11 This paper presents a strategy for detecting and using multi-word expressions in Statistical Machine Translation. Performance of the proposed strategy is evaluated in terms of alignment quality as well as translation accuracy. Evaluations are performed by using the Verbmobil corpus. Results from translation tasks from English-to-Spanish and from Spanish-to-English are presented and discussed. @@ -660,7 +660,7 @@ Estimating the predictive Power of N-gram <fixed-case>MT</fixed-case> Evaluation Metrics across Language and Text Types BogdanBabych - AnthonyHartley + AnthonyHartley DebbieElliott 412-418 2005.mtsummit-posters.13 @@ -670,7 +670,7 @@ A Useful-based Evaluation of Reading Support Systems: Comprehension, Reading Speed and Effective Speed KatsunoriKotani - TakehikoYoshimi + TakehikoYoshimi TakeshiKutsumi IchikoSata HiroshiIsahara @@ -681,7 +681,7 @@ Word Alignment Viewer for Long Sentences - HidekiKashioka + HidekiKashioka 427-431 2005.mtsummit-posters.15 An aligned corpus is an important resource for developing machine translation systems. We consider suitable units for constructing the translation model through observing an aligned parallel corpus. We examine the characteristics of the aligned corpus. Long sentences are especially difficult for word alignment because the sentences can become very complicated. Also, each (source/target) word has a higher possibility to correspond to the (target/source) word. This paper introduces an alignment viewer a developer can use to correct alignment information. We discuss using the viewer on a patent parallel corpus because sentences in patents are often long and complicated. @@ -701,7 +701,7 @@ Rapid Ramp-up for Statistical Machine Translation: Minimal Training for Maximal Coverage HemaliMajithia PhilipRennart - EvelyneTzoukermann + EvelyneTzoukermann 438-444 2005.mtsummit-posters.17 This paper investigates optimal ways to get maximal coverage from minimal input training corpus. In effect, it seems antagonistic to think of minimal input training with a statistical machine translation system. Since statistics work well with repetition and thus capture well highly occurring words, one challenge has been to figure out the optimal number of “new” words that the system needs to be appropriately trained. Additionally, the goal is to minimize the human translation time for training a new language. In order to account for rapid ramp-up translation, we ran several experiments to figure out the minimal amount of data to obtain optimal translation results. @@ -741,8 +741,8 @@ Use of Machine Translation in <fixed-case>I</fixed-case>ndia: Current Status - SudipNaskar - SivajiBandyopadhyay + SudipNaskar + SivajiBandyopadhyay 465-470 2005.mtsummit-posters.21 A survey of the machine translation systems that have been developed in India for translation from English to Indian languages and among Indian languages reveals that the MT softwares are used in field testing or are available as web translation service. These systems are also used for teaching machine translation to the students and researchers. Most of these systems are in the English-Hindi or Indian language-Indian language domain. The translation domains are mostly government documents/reports and news stories. There are a number of other MT systems that are at their various phases of development and have been demonstrated at various forums. Many of these systems cover other Indian languages beside Hindi. @@ -768,7 +768,7 @@ Statistical Machine Translation: Foundations and Recent Advances - Franz JosefOch + Franz JosefOch 2005.mtsummit-tutorials.1.Presentation.pdf och-2005-statistical @@ -797,15 +797,15 @@ An Open Architecture for Transfer-based Machine Translation between <fixed-case>S</fixed-case>panish and <fixed-case>B</fixed-case>asque - IñakiAlegria - ArantzaDiaz de Ilarraza - GorkaLabaka - MikelLersundi - AingeruMayor - KepaSarasola - Mikel L.Forcada - SergioOrtiz-Rojas - LluísPadró + IñakiAlegria + ArantzaDiaz de Ilarraza + GorkaLabaka + MikelLersundi + AingeruMayor + KepaSarasola + Mikel L.Forcada + SergioOrtiz-Rojas + LluísPadró 7-14 2005.mtsummit-osmtw.2 We present the current status of development of an open architecture for the translation from Spanish into Basque. The machine translation architecture uses an open source analyser for Spanish and new modules mainly based on finite-state transducers. The project is integrated in the OpenTrad initiative, a larger government funded project shared among different universities and small companies, which will also include MT engines for translation among the main languages in Spain. The main objective is the construction of an open, reusable and interoperable framework. This paper describes the design of the engine, the formats it uses for the communication among the modules, the modules reused from other project named Matxin and the new modules we are building. @@ -817,7 +817,7 @@ StephanOepen MelanieSiegel AnnCopestake - DanFlickinger + DanFlickinger 15-22 2005.mtsummit-osmtw.3 bond-etal-2005-open @@ -826,12 +826,12 @@ An Open-Source Shallow-Transfer Machine Translation Toolbox: Consequences of Its Release and Availability CarmeArmentano-Oller Antonio M.Corbí-Bellot - Mikel L.Forcada - MireiaGinestí-Rosell + Mikel L.Forcada + MireiaGinestí-Rosell BoyanBonev - SergioOrtiz-Rojas + SergioOrtiz-Rojas Juan AntonioPérez-Ortiz - GemaRamírez-Sánchez + GemaRamírez-Sánchez FelipeSánchez-Martínez 23-30 2005.mtsummit-osmtw.4 @@ -850,7 +850,7 @@ An n-gram Approach to Exploiting a Monolingual Corpus for Machine Translation ToniBadia - GemmaBoleda + GemmaBoleda MaiteMelero AntoniOliver 1-7 @@ -859,7 +859,7 @@ Context-sensitive Retrieval for Example-based Translation - RalfBrown + RalfBrown 9-15 2005.mtsummit-ebmt.2 Example-Based Machine Translation (EBMT) systems have typically operated on individual sentences without taking into account prior context. By adding a simple reweighting of retrieved fragments of training examples on the basis of whether the previous translation retrieved any fragments from examples within a small window of the current instance, translation performance is improved. A further improvement is seen by performing a similar reweighting when another fragment of the current input sentence was retrieved from the same training example. Together, a simple, straightforward implementation of these two factors results in an improvement on the order of 1.0–1.6% in the BLEU metric across multiple data sets in multiple languages. @@ -877,7 +877,7 @@ Learning Translation Templates with Type Constraints - IlyasCicekli + IlyasCicekli 27-33 2005.mtsummit-ebmt.4 This paper presents a generalization technique that induces translation templates from given translation examples by replacing differing parts in these examples with typed variables. Since the type of each variable is also inferred during the learning process, each induced template is associated with a set of type constraints. The type constraints that are associated with a translation template restrict the usage of that translation template in certain contexts in order to avoid some of wrong translations. The types of variables are induced using the type lattices designed for both source language and target language. The proposed generalization technique has been implemented as a part of an EBMT system. @@ -903,8 +903,8 @@ Graph-based Retrieval for Example-based Machine Translation Using Edit-distance TakaoDoi - HirofumiYamamoto - EiichiroSumita + HirofumiYamamoto + EiichiroSumita 51-58 2005.mtsummit-ebmt.7 doi-etal-2005-graph @@ -919,7 +919,7 @@ Towards a Definition of Example-based Machine Translation - JohnHutchins + JohnHutchins 63-70 2005.mtsummit-ebmt.9 The example-based approach to MT is becoming increasingly popular. However, such is the variety of techniques and methods used that it is difficult to discern the overall conception of what example-based machine translation (EBMT) is and/or what its practitioners conceive it to be. Although definitions of MT systems are notoriously complex, an attempt is made to define EBMT in contrast to other MT architectures (RBMT and SMT). @@ -927,7 +927,7 @@ <fixed-case>EBMT</fixed-case> by Tree-Phrasing: a Pilot Study - PhilippeLanglais + PhilippeLanglais FabrizioGotti DidierBourigault ClaudeCoulombe @@ -947,7 +947,7 @@ Monolingual Corpus-based <fixed-case>MT</fixed-case> Using Chunks - StellaMarkantonatou + StellaMarkantonatou SokratisSofianopoulos VassilikiSpilioti YiorgosTambouratzis @@ -979,8 +979,8 @@ A Machine Learning Approach to Hypotheses Selection of Greedy Decoding for <fixed-case>SMT</fixed-case> - MichaelPaul - EiichiroSumita + MichaelPaul + EiichiroSumita SeiichiYamamoto 117-124 2005.mtsummit-ebmt.15 @@ -990,7 +990,7 @@ A Semantics-based <fixed-case>E</fixed-case>nglish-<fixed-case>B</fixed-case>engali <fixed-case>EBMT</fixed-case> System for Translating News Headlines DigantaSaha - SivajiBandyopadhyay + SivajiBandyopadhyay 125-133 2005.mtsummit-ebmt.16 The paper reports an Example based Machine Translation System for translating News Headlines from English to Bengali. The input headline is initially searched in the Direct Example Base. If it cannot be found, the input headline is tagged and the tagged headline is searched in the Generalized Tagged Example Base. If a match is obtained, the tagged headline in Bengali is retrieved from the example base, the output Bengali headline is generated after retrieving the Bengali equivalents of the English words from appropriate dictionaries and then applying relevant synthesis rules for generating the Bengali surface level words. If some named entities and acronyms are not present in the dictionary, transliteration scheme is applied for obtaining the Bengali equivalent. If a match is not found, the tagged input headline is analysed to identify the constituent phrase(s). The target translation is generated using English-Bengali phrasal example base, appropriate dictionaries and a set of heuristics for Bengali phrase reordering. If the headline still cannot be translated using example base strategy, a heuristic translation strategy will be applied. Any new input tagged headline along with its translation by the user will be inserted in the tagged Example base after generalization. @@ -1033,7 +1033,7 @@ Classification of Modified Relationships in <fixed-case>J</fixed-case>apanese Patent Sentences - ShoichiYokoyama + ShoichiYokoyama YuyaKaneda 16-20 2005.mtsummit-wpt.3 @@ -1087,14 +1087,14 @@ Terminology Construction Workflow for <fixed-case>K</fixed-case>orean-<fixed-case>E</fixed-case>nglish Patent <fixed-case>MT</fixed-case> - Young-GilKim + Young-GilKim Seong-IlYang MunpyoHong - Chang-HyunKim + Chang-HyunKim Young-AeSeo CheolRyu - Sang-KyuPark - Se-YoungPark + Sang-KyuPark + Se-YoungPark 55-59 2005.mtsummit-wpt.9 This paper addresses the workflow for terminology construction for Korean-English patent MT system. The workflow consists of the stage for setting lexical goals and the semi- automatic terminology construction stage. As there is no comparable system, it is difficult to determine how many terms are needed. To estimate the number of the needed terms, we analyzed 45,000 patent documents. Given the limited time and budget, we resorted to the semi-automatic methods to create the bilingual term dictionary in electronics domain. We will show that parenthesis information in Korean patent documents and bilingual title corpus can be successfully used to build a bilingual term dictionary. @@ -1126,7 +1126,7 @@ Challenges for the Multilingual Semantic Web - Waltherv. Hahn + Waltherv. Hahn CristinaVertan 5-9 2005.mtsummit-swtmt.3 diff --git a/data/xml/2005.sigdial.xml b/data/xml/2005.sigdial.xml index a27f02ad53..ca7699f8a9 100644 --- a/data/xml/2005.sigdial.xml +++ b/data/xml/2005.sigdial.xml @@ -3,7 +3,7 @@ Proceedings of the 6th SIGdial Workshop on Discourse and Dialogue - LailaDybkjær + LailaDybkjær WolfgangMinker Special Interest Group on Discourse and Dialogue (SIGdial)
Lisbon, Portugal
@@ -19,7 +19,7 @@ Where do we go from here? Research and Commercial Spoken Dialog Systems - RobertoPieraccini + RobertoPieraccini JuanHuerta 1–10 2005.sigdial-1.1 @@ -46,9 +46,9 @@ Partially Observable <fixed-case>M</fixed-case>arkov Decision Processes with Continuous Observations for Dialogue Management - Jason D.Williams + Jason D.Williams PascalPoupart - SteveYoung + SteveYoung 25–34 2005.sigdial-1.4 williams-etal-2005-partially @@ -67,7 +67,7 @@ Quantitative Evaluation of User Simulation Techniques for Spoken Dialogue Systems JostSchatzmann KallirroiGeorgila - SteveYoung + SteveYoung 45–54 2005.sigdial-1.6 schatzmann-etal-2005-quantitative @@ -75,8 +75,8 @@ Automatic Induction of Language Model Data for A Spoken Dialogue System - GraceChung - StephanieSeneff + GraceChung + StephanieSeneff ChaoWang 55–64 2005.sigdial-1.7 @@ -86,7 +86,7 @@ Does this Answer your Question? Towards Dialogue Management for Restricted Domain Question Answering Systems MatthiasDenecke - NorihitoYasuda + NorihitoYasuda 65–76 2005.sigdial-1.8 denecke-yasuda-2005-answer @@ -94,7 +94,7 @@ Using Machine Learning for Non-Sentential Utterance Classification - RaquelFernández + RaquelFernández JonathanGinzburg ShalomLappin 77–86 @@ -104,8 +104,8 @@ Using Bigrams to Identify Relationships Between Student Certainness States and Tutor Responses in a Spoken Dialogue Corpus - KateForbes-Riley - Diane J.Litman + KateForbes-Riley + Diane J.Litman 87–96 2005.sigdial-1.10 forbes-riley-litman-2005-using @@ -114,7 +114,7 @@ A Corpus Collection and Annotation Framework for Learning Multimodal Clarification Strategies VerenaRieser - IvanaKruijff-Korbayová + IvanaKruijff-Korbayová OliverLemon 97–106 2005.sigdial-1.11 @@ -143,8 +143,8 @@ Sorry and <fixed-case>I</fixed-case> Didn’t Catch That! - An Investigation of Non-understanding Errors and Recovery Strategies - DanBohus - Alexander I.Rudnicky + DanBohus + Alexander I.Rudnicky 128–143 2005.sigdial-1.14 bohus-rudnicky-2005-sorry @@ -153,7 +153,7 @@ Developing City Name Acquisition Strategies in Spoken Dialogue Systems Via User Simulation EdFilisko - StephanieSeneff + StephanieSeneff 144–155 2005.sigdial-1.15 filisko-seneff-2005-developing @@ -195,8 +195,8 @@ A Collaborative Problem-Solving Model of Dialogue - NateBlaylock - JamesAllen + NateBlaylock + JamesAllen 200–211 2005.sigdial-1.20 blaylock-allen-2005-collaborative @@ -234,7 +234,7 @@ <fixed-case>D</fixed-case>ialog<fixed-case>D</fixed-case>esigner - A Tool for Rapid System Design and Evaluation - HansDybkjær + HansDybkjær LailaDybkjær 227–231 2005.sigdial-1.24 @@ -243,13 +243,13 @@ Dealing with Doctors: A Virtual Human for Non-team Interaction - DavidTraum + DavidTraum WilliamSwartout JonathanGratch StacyMarsella PatrickKenny - EduardHovy - ShriNarayanan + EduardHovy + ShriNarayanan EdFast BilyanaMartinovski RahulBaghat @@ -265,7 +265,7 @@ Meet Hans Christian Andersen - Niels OleBernsen + Niels OleBernsen LailaDybkjær 237–241 2005.sigdial-1.26 diff --git a/data/xml/2005.tc.xml b/data/xml/2005.tc.xml index 767b0f5de1..bf4c40d095 100644 --- a/data/xml/2005.tc.xml +++ b/data/xml/2005.tc.xml @@ -25,14 +25,14 @@ Finding the System that Suits You Best: Towards the Normalization of <fixed-case>MT</fixed-case> Evaluation PaulaEstrella - AndreiPopescu-Belis - NancyUnderwood + AndreiPopescu-Belis + NancyUnderwood 2005.tc-1.3 estrella-etal-2005-finding Reverse Localisation - ReinhardSchäler + ReinhardSchäler 2005.tc-1.4 schaler-2005-reverse @@ -51,7 +51,7 @@ Automatic Detection of Translation Errors: The <fixed-case>T</fixed-case>rans<fixed-case>C</fixed-case>heck System - GrahamRussell + GrahamRussell 2005.tc-1.7 russell-2005-automatic diff --git a/data/xml/2006.amta.xml b/data/xml/2006.amta.xml index a8e506369d..006ce38707 100644 --- a/data/xml/2006.amta.xml +++ b/data/xml/2006.amta.xml @@ -40,7 +40,7 @@ Context-Based Machine Translation - JaimeCarbonell + JaimeCarbonell SteveKlein DavidMiller MikeSteinbaum @@ -53,8 +53,8 @@ Integration of <fixed-case>POS</fixed-case>tag-based Source Reordering into <fixed-case>SMT</fixed-case> Decoding by an Extended Search Graph - Josep M.Crego - José B.Mariño + Josep M.Crego + José B.Mariño 29-36 2006.amta-papers.4 This paper presents a reordering framework for statistical machine translation (SMT) where source-side reorderings are integrated into SMT decoding, allowing for a highly constrained reordered search graph. The monotone search is extended by means of a set of reordering patterns (linguistically motivated rewrite patterns). Patterns are automatically learnt in training from word-to-word alignments and source-side Part-Of-Speech (POS) tags. Traversing the extended search graph, the decoder evaluates every hypothesis making use of a group of widely used SMT models and helped by an additional Ngram language model of source-side POS tags. Experiments are reported on the Euparl task (Spanish-to-English and English-to- Spanish). Results are presented regarding translation accuracy (using human and automatic evaluations) and computational efficiency, showing significant improvements in translation quality for both translation directions at a very low computational cost. @@ -63,7 +63,7 @@ Better Learning and Decoding for Syntax Based <fixed-case>SMT</fixed-case> Using <fixed-case>PSDIG</fixed-case> YuanDing - MarthaPalmer + MarthaPalmer 37-45 2006.amta-papers.5 As an approach to syntax based statistical machine translation (SMT), Probabilistic Synchronous Dependency Insertion Grammars (PSDIG), introduced in (Ding and Palmer, 2005), are a version of synchronous grammars defined on dependency trees. In this paper we discuss better learning and decoding algorithms for a PSDIG MT system. We introduce two new grammar learners: (1) an exhaustive learner combining different heuristics, (2) an n-gram based grammar learner. Combining the grammar rules learned from the two learners improved the performance. We introduce a better decoding algorithm which incorporates a tri-gram language model. According to the Bleu metric, the PSDIG MT system performance is significantly better than IBM Model 4, while on par with the state-of-the-art phrase based system Pharaoh (Koehn, 2004). The improved integration of syntax on both source and target languages opens door to more sophisticated SMT processes. @@ -80,7 +80,7 @@ Challenges in Building an <fixed-case>A</fixed-case>rabic-<fixed-case>E</fixed-case>nglish <fixed-case>GHMT</fixed-case> System with <fixed-case>SMT</fixed-case> Components NizarHabash - BonnieDorr + BonnieDorr ChristofMonz 56-65 2006.amta-papers.7 @@ -91,7 +91,7 @@ Statistical Syntax-Directed Translation with Extended Domain of Locality LiangHuang KevinKnight - AravindJoshi + AravindJoshi 66-73 2006.amta-papers.8 In syntax-directed translation, the source-language input is first parsed into a parse-tree, which is then recursively converted into a string in the target-language. We model this conversion by an extended tree-to-string transducer that has multi-level trees on the source-side, which gives our system more expressive power and flexibility. We also define a direct probability model and use a linear-time dynamic programming algorithm to search for the best derivation. The model is then extended to the general log-linear frame-work in order to incorporate other features like n-gram language models. We devise a simple-yet-effective algorithm to generate non-duplicate k-best translations for n-gram rescoring. Preliminary experiments on English-to-Chinese translation show a significant improvement in terms of translation quality compared to a state-of-the- art phrase-based system. @@ -101,7 +101,7 @@ Corpus Variations for Translation Lexicon Induction RebeccaHwa CarolNichols - KhalilSima’an + KhalilSima’an 74-81 2006.amta-papers.9 Lexical mappings (word translations) between languages are an invaluable resource for multilingual processing. While the problem of extracting lexical mappings from parallel corpora is well-studied, the task is more challenging when the language samples are from non-parallel corpora. The goal of this work is to investigate one such scenario: finding lexical mappings between dialects of a diglossic language, in which people conduct their written communications in a prestigious formal dialect, but they communicate verbally in a colloquial dialect. Because the two dialects serve different socio-linguistic functions, parallel corpora do not naturally exist between them. An example of a diglossic dialect pair is Modern Standard Arabic (MSA) and Levantine Arabic. In this paper, we evaluate the applicability of a standard algorithm for inducing lexical mappings between comparable corpora (Rapp, 1999) to such diglossic corpora pairs. The focus of the paper is an in-depth error analysis, exploring the notion of relatedness in diglossic corpora and scrutinizing the effects of various dimensions of relatedness (such as mode, topic, style, and statistics) on the quality of the resulting translation lexicon. @@ -109,8 +109,8 @@ Toward an Interagency Language Roundtable Based Assessment of Speech-to-Speech Translation Capabilities - DouglasJones - TimothyAnderson + DouglasJones + TimothyAnderson SabineAtwell BrianDelaney JamesDirgin @@ -148,7 +148,7 @@ Multi-Engine Machine Translation by Recursive Sentence Decomposition BartMellebeek KarolinaOwczarzak - JosefVan Genabith + JosefVan Genabith AndyWay 110-118 2006.amta-papers.13 @@ -157,7 +157,7 @@ Toward Communicating Simple Sentences Using Pictorial Representations - RadaMihalcea + RadaMihalcea BenLeong 119-127 2006.amta-papers.14 @@ -167,8 +167,8 @@ Induction of Probabilistic Synchronous Tree-Insertion Grammars for Machine Translation RebeccaNesson - StuartShieber - AlexanderRush + StuartShieber + AlexanderRush 128-137 2006.amta-papers.15 The more expressive and flexible a base formalism for machine translation is, the less efficient parsing of it will be. However, even among formalisms with the same parse complexity, some formalisms better realize the desired characteristics for machine translation formalisms than others. We introduce a particular formalism, probabilistic synchronous tree-insertion grammar (PSTIG) that we argue satisfies the desiderata optimally within the class of formalisms that can be parsed no less efficiently than context-free grammars and demonstrate that it outperforms state-of-the-art word-based and phrase-based finite-state translation models on training and test data taken from the EuroParl corpus (Koehn, 2005). We then argue that a higher level of translation quality can be achieved by hybridizing our in- duced model with elementary structures produced using supervised techniques such as those of Groves et al. (2004). @@ -188,7 +188,7 @@ KarolinaOwczarzak BartMellebeek DeclanGroves - JosefVan Genabith + JosefVan Genabith AndyWay 148-155 2006.amta-papers.17 @@ -206,7 +206,7 @@ Direct Application of a Language Learner Test to <fixed-case>MT</fixed-case> Evaluation - FlorenceReeder + FlorenceReeder 166-175 2006.amta-papers.19 This paper shows the applicability of language testing techniques to machine translation (MT) evaluation through one of a set of related experiments. One straightforward experiment is to use language testing exams and scoring on MT output with little or no adaptation. This paper describes one such experiment, the first in a set. After an initial test (Vanni and Reeder, 2000), we expanded the experiment to include multiple raters and a more detailed analysis of the surprising results. Namely that unlike with humans, MT systems perform more poorly at both level zero and one than at level two and three. This paper presents these results as an illustration of both the applicability of language testing techniques and also the caution that needs to be applied. @@ -214,7 +214,7 @@ Measuring <fixed-case>MT</fixed-case> Adequacy Using Latent Semantic Analysis - FlorenceReeder + FlorenceReeder 176-184 2006.amta-papers.20 Translation adequacy is defined as the amount of semantic content from the source language document that is conveyed in the target language document. As such, it is more difficult to measure than intelligibility since semantic content must be measured in two documents and then compared. Latent Semantic Analysis is a content measurement technique used in language learner evaluation that exhibits characteristics attractive for re-use in machine translation evaluation (MTE). This experiment, which is a series of applications of the LSA algorithm in various configurations, demonstrates its usefulness as an MTE metric for adequacy. In addition, this experiment lays the groundwork for using LSA as a method to measure the accuracy of a translation without reliance on reference translations. @@ -233,7 +233,7 @@ Ambiguity Reduction for Machine Translation: Human-Computer Collaboration MarcusSammer KobiReiter - StephenSoderland + StephenSoderland KatrinKirchhoff OrenEtzioni 193-202 @@ -251,9 +251,9 @@ Combining Linguistic and Statistical Methods for Bi-directional <fixed-case>E</fixed-case>nglish <fixed-case>C</fixed-case>hinese Translation in the Flight Domain - StephanieSeneff + StephanieSeneff ChaoWang - JohnLee + JohnLee 213-222 2006.amta-papers.24 In this paper, we discuss techniques to combine an interlingua translation framework with phrase-based statistical methods, for translation from Chinese into English. Our goal is to achieve high-quality translation, suitable for use in language tutoring applications. We explore these ideas in the context of a flight domain, for which we have a large corpus of English queries, obtained from users interacting with a dialogue system. Our techniques exploit a pre-existing English-to-Chinese translation system to automatically produce a synthetic bilingual corpus. Several experiments were conducted combining linguistic and statistical methods, and manual evaluation was conducted for a set of 460 Chinese sentences. The best performance achieved an “adequate” or better analysis (3 or above rating) on nearly 94% of the 409 parsable subset. Using a Rover scheme to combine four systems resulted in an “adequate or better” rating for 88% of all the utterances. @@ -261,11 +261,11 @@ A Study of Translation Edit Rate with Targeted Human Annotation - MatthewSnover - BonnieDorr - RichSchwartz + MatthewSnover + BonnieDorr + RichSchwartz LinneaMicciulla - JohnMakhoul + JohnMakhoul 223-231 2006.amta-papers.25 We examine a new, intuitive measure for evaluating machine-translation output that avoids the knowledge intensiveness of more meaning-based approaches, and the labor-intensiveness of human judgments. Translation Edit Rate (TER) measures the amount of editing that a human would have to perform to change a system output so it exactly matches a reference translation. We show that the single-reference variant of TER correlates as well with human judgments of MT quality as the four-reference variant of BLEU. We also define a human-targeted TER (or HTER) and show that it yields higher correlations with human judgments than BLEU—even when BLEU is given human-targeted references. Our results indicate that HTER correlates with human judgments better than HMETEOR and that the four-reference variants of TER and HTER correlate with human judgments as well as—or better than—a second human judgment does. @@ -276,7 +276,7 @@ NicolasStroppa DeclanGroves AndyWay - KepaSarasola + KepaSarasola 232-241 2006.amta-papers.26 Basque is both a minority and a highly inflected language with free order of sentence constituents. Machine Translation of Basque is thus both a real need and a test bed for MT techniques. In this paper, we present a modular Data-Driven MT system which includes different chunkers as well as chunk aligners which can deal with the free order of sentence constituents of Basque. We conducted Basque to English translation experiments, evaluated on a large corpus (270,000 sentence pairs). The experimental results show that our system significantly outperforms state-of-the-art approaches according to several common automatic evaluation metrics. @@ -285,7 +285,7 @@ Combining Evaluation Metrics via Loss Functions CalandraTate - ClareVoss + ClareVoss 242-250 2006.amta-papers.27 When response metrics for evaluating the utility of machine translation (MT) output on a given task do not yield a single ranking of MT engines, how are MT users to decide which engine best supports their task? When the cost of different types of response errors vary, how are MT users to factor that information into their rankings? What impact do different costs have on response-based rankings? Starting with data from an extraction experiment detailed in Voss and Tate (2006), this paper describes three response-rate metrics developed to quantify different aspects of MT users’ performance identifying who/when/where-items in MT output, and then presents a loss function analysis over these rates to derive a single customizable metric, applying a range of values to correct responses and costs to different error types. For the given experimental dataset, loss function analyses provided a clearer characterization of the engines’ relative strength than did comparing the response rates to each other. For one MT engine, varying the costs had no impact: the engine consistently ranked best. By contrast, cost variations did impact the ranking of the other two engines: a rank reversal occurred on who-item extractions when incorrect responses were penalized more than non-responses. Future work with loss analysis, developing operational cost ratios of error rates to correct response rates, will require user studies and expert document-screening personnel to establish baseline values for effective MT engine support on wh-item extraction. @@ -294,7 +294,7 @@ Scalable Purely-Discriminative Training for Word and Tree Transducers BenjaminWellington - JosephTurian + JosephTurian ChrisPike DanMelamed 251-260 @@ -315,7 +315,7 @@ The Potential and Limitations of <fixed-case>MT</fixed-case> Paradigm DanielMarcu - AlanMelby + AlanMelby 2006.amta-talks.1 marcu-melby-2006-potential @@ -343,7 +343,7 @@ Expecting the Unexpected: Using <fixed-case>MT</fixed-case> Operationally - FlorenceReeder + FlorenceReeder reeder-2006-expecting @@ -419,7 +419,7 @@ <fixed-case>MT</fixed-case> for social impact - MichaelMcCord + MichaelMcCord 2006.amta-panel1.4 mccord-2006-mt @@ -445,7 +445,7 @@ Presentation - JaimeCarbonell + JaimeCarbonell 2006.amta-panel2.1 carbonell-2006-presentation @@ -463,14 +463,14 @@ Combining interlingua with <fixed-case>SMT</fixed-case> - StephanieSeneff + StephanieSeneff 2006.amta-panel2.4 seneff-2006-combining First strategies for integrating hybrid approaches into established systems JeanSenellart - John S.White + John S.White 2006.amta-panel2.5 senellart-white-2006-first @@ -486,7 +486,7 @@ A Gentle Introduction to Ontologies - EduardHovy + EduardHovy 2006.amta-tutorials.1 hovy-2006-gentle @@ -498,22 +498,22 @@ <fixed-case>A</fixed-case>rabic Dialect Processing - MonaDiab + MonaDiab NizarHabash 2006.amta-tutorials.3 diab-habash-2006-arabic An Overview of Statistical Machine Translation - DavidSmith + DavidSmith CharlesSchafer 2006.amta-tutorials.4 smith-schafer-2006-overview Name Translation - KeithMiller - SherriCondon + KeithMiller + SherriCondon 2006.amta-tutorials.5 miller-condon-2006-name diff --git a/data/xml/2006.bcs.xml b/data/xml/2006.bcs.xml index 2a1472440c..20a3892224 100644 --- a/data/xml/2006.bcs.xml +++ b/data/xml/2006.bcs.xml @@ -11,7 +11,7 @@ Challenges in Processing Colloquial <fixed-case>A</fixed-case>rabic AllaRozovskaya - RichardSproat + RichardSproat ElabbasBenmamoun 4-14 2006.bcs-1.1 @@ -21,10 +21,10 @@ <fixed-case>A</fixed-case>rabic <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et and the Challenges of <fixed-case>A</fixed-case>rabic SabriElkateb - WilliamBlack + WilliamBlack PiekVossen - DavidFarwell - HoracioRodríguez + DavidFarwell + HoracioRodríguez AdamPease MusaAlkhalifa ChristianeFellbaum @@ -35,7 +35,7 @@ Tips and Tricks of the <fixed-case>P</fixed-case>rague <fixed-case>A</fixed-case>rabic Dependency Treebank - OtakarSmrž + OtakarSmrž 25-34 2006.bcs-1.3 In this paper, we report on several software implementations that we have developed within Prague Arabic Dependency Treebank or some other projects concerned with Arabic Natural Language Processing. We try to guide the reader through some essential tasks and note the solutions that we have designed and used. We as well point to third-party computational systems that the research community might exploit in the future work in this field. @@ -43,7 +43,7 @@ Diacritization: A Challenge to <fixed-case>A</fixed-case>rabic Treebank Annotation and Parsing - MohamedMaamouri + MohamedMaamouri SethKulick AnnBies 35-47 @@ -62,7 +62,7 @@ Effective Stemming for <fixed-case>A</fixed-case>rabic Information Retrieval YoussefKadri - Jian-YunNie + Jian-YunNie 68-75 2006.bcs-1.6 Arabic has a very rich and complex morphology. Its appropriate morphological processing is very important for Information Retrieval (IR). In this paper, we propose a new stemming technique that tries to determine the stem of a word representing the semantic core of this word according to Arabic morphology. This method is compared to a commonly used light stemming technique which truncates a word by simple rules. Our tests on TREC collections show that the new stemming technique is more effective than the light stemming. @@ -89,8 +89,8 @@ Using Cross-language Information Retrieval for Sentence Alignment NasredineSemmar - MeriamaLaib - ChristianFluhr + MeriamaLaib + ChristianFluhr 95-104 2006.bcs-1.9 Cross-language information retrieval consists in providing a query in one language and searching documents in different languages. Retrieved documents are ordered by the probability of being relevant to the user's request with the highest ranked being considered the most relevant document. The LIC2M cross-language information retrieval system is a weighted Boolean search engine based on a deep linguistic analysis of the query and the documents to be indexed. This system, designed to work on Arabic, Chinese, English, French, German and Spanish, is composed of a multilingual linguistic analyzer, a statistical analyzer, a reformulator, a comparator and a search engine. The multilingual linguistic analyzer includes a morphological analyzer, a part-of-speech tagger and a syntactic analyzer. In the case of Arabic, a clitic stemmer is added to the morphological analyzer to segment the input words into proclitics, simple forms and enclitics. The linguistic analyzer processes both documents to be indexed and queries to produce a set of normalized lemmas, a set of named entities and a set of nominal compounds with their morpho-syntactic tags. The statistical analyzer computes for documents to be indexed concept weights based on concept database frequencies. The comparator computes intersections between queries and documents and provides a relevance weight for each intersection. Before this comparison, the reformulator expands queries during the search. The expansion is used to infer from the original query words other words expressing the same concepts. The expansion can be in the same language or in different languages. The search engine retrieves the ranked, relevant documents from the indexes according to the corresponding reformulated query and then merges the results obtained for each language, taking into account the original words of the query and their weights in order to score the documents. Sentence alignment consists in estimating which sentence or sentences in the source language correspond with which sentence or sentences in a target language. We present in this paper a new approach to aligning sentences from a parallel corpora based on the LIC2M cross-language information retrieval system. This approach consists in building a database of sentences of the target text and considering each sentence of the source text as a "query" to that database. The aligned bilingual parallel corpora can be used as a translation memory in a computer-aided translation tool. diff --git a/data/xml/2006.eamt.xml b/data/xml/2006.eamt.xml index 8f40dab3e7..94cc8dc858 100644 --- a/data/xml/2006.eamt.xml +++ b/data/xml/2006.eamt.xml @@ -8,7 +8,7 @@ June 19–20 2006 ViggoHansen - BenteMaegaard + BenteMaegaard eamt @@ -45,18 +45,18 @@ A Computer-Assisted Translation Tool based on Finite-State Technology JorgeCivera - Antonio L.Lagarda + Antonio L.Lagarda ElsaCubel - FranciscoCasacuberta - EnriqueVidal - Juan M.Vilar - SergioBarrachina + FranciscoCasacuberta + EnriqueVidal + Juan M.Vilar + SergioBarrachina 2006.eamt-1.5 civera-etal-2006-computer Detecting Inappropriate Use of Free Online Machine Translation by Language Students. A Special Case of Plagiarism Detection - HaroldSomers + HaroldSomers FedericoGaspari AnaNiño 2006.eamt-1.6 @@ -84,25 +84,25 @@ Exploiting Word Transformation in Statistical Machine Translation from <fixed-case>S</fixed-case>panish to <fixed-case>E</fixed-case>nglish - DeepaGupta + DeepaGupta MarcelloFederico 2006.eamt-1.10 gupta-federico-2006-exploiting A Flexible Architecture for <fixed-case>CAT</fixed-case> Applications - SašaHasan + SašaHasan ShahramKhadivi RichardZens - HermannNey + HermannNey 2006.eamt-1.11 hasan-etal-2006-flexible A Flexible Online Server for Machine Translation Evaluation MatthiasEck - StephanVogel - AlexWaibel + StephanVogel + AlexWaibel 2006.eamt-1.12 eck-etal-2006-flexible @@ -127,7 +127,7 @@ Identifying Complex Phenomena in a Corpus via a Treebank Lens - DanFlickinger + DanFlickinger 2006.eamt-1.16 flickinger-2006-identifying @@ -143,9 +143,9 @@ Leveraging Recurrent Phrase Structure in Large-scale Ontology Translation G. CraigMurray - Bonnie J.Dorr + Bonnie J.Dorr JimmyLin - JanHajič + JanHajič PavelPecina 2006.eamt-1.18 murray-etal-2006-leveraging @@ -159,7 +159,7 @@ Mixtures of <fixed-case>IBM</fixed-case> Model 2 JorgeCivera - AlfonsJuan + AlfonsJuan 2006.eamt-1.20 civera-juan-2006-mixtures @@ -167,7 +167,7 @@ Morpho-Syntax Based Statistical Methods for Automatic Sign Language Translation DanielStein JanBungeroth - HermannNey + HermannNey 2006.eamt-1.21 stein-etal-2006-morpho @@ -180,7 +180,7 @@ Pragmatics-based <fixed-case>MT</fixed-case> and the Translation of Puns - DavidFarwell + DavidFarwell StephenHelmreich 2006.eamt-1.23 farwell-helmreich-2006-pragmatics @@ -190,14 +190,14 @@ BartMellebeek KarolinaOwczarzak DeclanGroves - JosefVan Genabith + JosefVan Genabith AndyWay 2006.eamt-1.24 mellebeek-etal-2006-syntactic Task-based Evaluation of Machine Translation (<fixed-case>MT</fixed-case>) Engines. Measuring How Well People Extract Who, When, Where-Type Elements in <fixed-case>MT</fixed-case> Output - Clare R.Voss + Clare R.Voss Calandra R.Tate 2006.eamt-1.25 voss-tate-2006-task @@ -217,7 +217,7 @@ Translation Context Sensitive <fixed-case>WSD</fixed-case> LuciaSpecia - Mariadas Graças Volpe Nunes + Mariadas Graças Volpe Nunes MarkStevenson 2006.eamt-1.28 specia-etal-2006-translation @@ -244,10 +244,10 @@ <fixed-case>W</fixed-case>eb<fixed-case>B</fixed-case>oot<fixed-case>C</fixed-case>a<fixed-case>T</fixed-case>. Instant Domain-Specific Corpora to Support Human Translators - MarcoBaroni + MarcoBaroni AdamKilgarriff JanPomikalek - PavelRychly + PavelRychly 2006.eamt-1.31 baroni-etal-2006-webbootcat diff --git a/data/xml/2006.iwslt.xml b/data/xml/2006.iwslt.xml index c084a13686..e70919471c 100644 --- a/data/xml/2006.iwslt.xml +++ b/data/xml/2006.iwslt.xml @@ -16,7 +16,7 @@ Rosetta: an analyst’s co-pilot - SalimRoukos + SalimRoukos 2006.iwslt-plenaries.2.Presentation.pdf roukos-2006-rosetta @@ -31,13 +31,13 @@ Overview of the <fixed-case>IWSLT</fixed-case>06 evaluation campaign - MichaelPaul + MichaelPaul 2006.iwslt-evaluation.1 paul-2006-overview Finite-state transducer-based statistical machine translation using joint probabilities - SrinivasBangalore + SrinivasBangalore StephanKanthak PatrickHaffner 2006.iwslt-evaluation.2 @@ -45,11 +45,11 @@ <fixed-case>IWSLT</fixed-case>-06: experiments with commercial <fixed-case>MT</fixed-case> systems and lessons from subjective evaluations - ChristianBoitet + ChristianBoitet YoucefBey MutsukoTomokio WenjieCao - HervéBlanchon + HervéBlanchon 2006.iwslt-evaluation.3 boitet-etal-2006-iwslt @@ -107,7 +107,7 @@ The <fixed-case>MIT</fixed-case>-<fixed-case>LL</fixed-case>/<fixed-case>AFRL</fixed-case> <fixed-case>IWSLT</fixed-case>-2006 <fixed-case>MT</fixed-case> system WadeShen BrianDelaney - TimAnderson + TimAnderson 2006.iwslt-evaluation.10 shen-etal-2006-mit @@ -115,22 +115,22 @@ Phrase reordering for statistical machine translation based on predicate-argument structure MamoruKomachi MasaakiNagata - YujiMatsumoto + YujiMatsumoto 2006.iwslt-evaluation.11 komachi-etal-2006-phrase The <fixed-case>N</fixed-case>i<fixed-case>CT</fixed-case>-<fixed-case>ATR</fixed-case> statistical machine translation system for <fixed-case>IWSLT</fixed-case> 2006 RuiqiangZhang - HirofumiYamamoto - MichaelPaul + HirofumiYamamoto + MichaelPaul HideoOkuma KeijiYasuda YvesLepage EtienneDenoual DaichiMochihashi AndrewFinch - EiichiroSumita + EiichiroSumita 2006.iwslt-evaluation.12 zhang-etal-2006-nict @@ -142,7 +142,7 @@ PengLiu KeyanZhou YanqingHe - ChengqingZong + ChengqingZong 2006.iwslt-evaluation.13 chai-etal-2006-nlpr @@ -160,41 +160,41 @@ ArneMauser RichardZens EvgenyMatusov - SasaHasan - HermannNey + SasaHasan + HermannNey 2006.iwslt-evaluation.15 mauser-etal-2006-rwth The <fixed-case>SLE</fixed-case> example-based translation system - PeteWhitelock + PeteWhitelock VictorPoznanski 2006.iwslt-evaluation.16 whitelock-poznanski-2006-sle The <fixed-case>TALP</fixed-case> Ngram-based <fixed-case>SMT</fixed-case> systems for <fixed-case>IWSLT</fixed-case> 2006 - Josep M.Crego - Adriàde Gispert + Josep M.Crego + Adriàde Gispert PatrickLambert MaximKhalilov - Marta R.Costa-jussà - José B.Mariño - RafaelBanchs - José A. R.Fonollosa + Marta R.Costa-jussà + José B.Mariño + RafaelBanchs + José A. R.Fonollosa 2006.iwslt-evaluation.17 crego-etal-2006-talp <fixed-case>TALP</fixed-case> phrase-based system and <fixed-case>TALP</fixed-case> system combination for <fixed-case>IWSLT</fixed-case> 2006 - Marta R.Costa-jussà - Josep M.Crego - Adriàde Gispert + Marta R.Costa-jussà + Josep M.Crego + Adriàde Gispert PatrikLambert MaximKhalilov - José A. R.Fonollosa - José B.Mariño - RafaelBanchs + José A. R.Fonollosa + José B.Mariño + RafaelBanchs 2006.iwslt-evaluation.18 costa-jussa-etal-2006-talp-phrase @@ -206,9 +206,9 @@ SanjikaHewavitharana MuntsinKolss BingZhao - Almut SiljaHildebrand - StephanVogel - AlexWaibel + Almut SiljaHildebrand + StephanVogel + AlexWaibel 2006.iwslt-evaluation.19 eck-etal-2006-uka @@ -216,8 +216,8 @@ The <fixed-case>CMU</fixed-case>-<fixed-case>UKA</fixed-case> syntax augmented machine translation system for <fixed-case>IWSLT</fixed-case>-06 AndreasZollmann AshishVenugopal - StephanVogel - AlexWaibel + StephanVogel + AlexWaibel 2006.iwslt-evaluation.20 zollmann-etal-2006-cmu @@ -250,15 +250,15 @@ Automatic sentence segmentation and punctuation prediction for spoken language translation EvgenyMatusov ArneMauser - HermannNey + HermannNey 2006.iwslt-papers.1 matusov-etal-2006-automatic Continuous space language models for the <fixed-case>IWSLT</fixed-case> 2006 task HolgerSchwenk - Marta R.Costa-jussà - José A. R.Fonollosa + Marta R.Costa-jussà + José A. R.Fonollosa 2006.iwslt-papers.2 schwenk-etal-2006-continuous-space @@ -279,7 +279,7 @@ Tuning machine translation parameters with <fixed-case>SPSA</fixed-case> PatrikLambert - Rafael E.Banchs + Rafael E.Banchs 2006.iwslt-papers.5 lambert-banchs-2006-tuning @@ -287,24 +287,24 @@ An efficient graph search decoder for phrase-based statistical machine translation BrianDelaney WadeShen - TimothyAnderson + TimothyAnderson 2006.iwslt-papers.6 delaney-etal-2006-efficient
<fixed-case>AER</fixed-case>: do we need to “improve” our alignments? DavidVilar - MajaPopovic - HermannNey + MajaPopovic + HermannNey 2006.iwslt-papers.7 vilar-etal-2006-aer Development of client-server speech translation system on a multi-lingual speech communication platform - TohruShimizu + TohruShimizu YutakaAshikari - EiichiroSumita - HidekiKashioka + EiichiroSumita + HidekiKashioka SatoshiNakamura 2006.iwslt-papers.8 shimizu-etal-2006-development diff --git a/data/xml/2006.jeptalnrecital.xml b/data/xml/2006.jeptalnrecital.xml index 9068420aaf..84f733f847 100644 --- a/data/xml/2006.jeptalnrecital.xml +++ b/data/xml/2006.jeptalnrecital.xml @@ -4,7 +4,7 @@ Actes de la 13ème conférence sur le Traitement Automatique des Langues Naturelles. Conférences invitées PietMertens - CédrickFairon + CédrickFairon AnneDister PatrickWatrin ATALA @@ -28,7 +28,7 @@ At Last Parsing Is Now Operational - Gertjanvan Noord + Gertjanvan Noord 20–42 Natural language analysis systems which combine knowledge-based and corpus-based methods are now becoming accurate enough to be used in various applications. We describe one such parsing system for Dutch, known as Alpino, and we show how corpus-based methods are essential to obtain accurate knowledge-based parsers. In particular we show a variety of cases where large amounts of parser output are used to improve the parser. 2006.jeptalnrecital-invite.2 @@ -39,7 +39,7 @@ Actes de la 13ème conférence sur le Traitement Automatique des Langues Naturelles. Articles longs PietMertens - CédrickFairon + CédrickFairon AnneDister PatrickWatrin ATALA @@ -72,7 +72,7 @@ Un analyseur morphologique multi-niveaux utilisant la jointure - FrançoisBarthélemy + FrançoisBarthélemy 63–72 Dans cet article nous présentons un analyseur morphologique pour le verbe akkadien. Cette langue est de la famille des langues sémitiques. Les flexions du verbe font intervenir des changements internes à la racine. L’analyseur présenté ici illustre l’utilisation d’un formalisme multi-niveaux et d’opérateurs relationnels puissants, notamment la jointure. La multiplicité de niveaux intermédiaires entre les formes profondes et de surface, ainsi que les opérateurs de compositions permettent de diviser la description en contraintes relativement simples qui sont ensuite rassemblées pour s’exercer soit simultanément, soit en cascade, soit encore d’une façon mixte, c’est-à-dire simultanément pour certains des niveaux et en cascade pour d’autres. Ce mécanisme nous permet de décrire la vocalisation du radical comme un processus d’insertions successives de voyelles. Cela présente l’intérêt d’être plus simple que l’utilisation d’un schéma vocalique figé soumis à interdigitation. De plus, cela semble expliquer de façon plus économique les formes des verbes faibles. 2006.jeptalnrecital-long.3 @@ -93,8 +93,8 @@ Outilex, plate-forme logicielle de traitement de textes écrits OlivierBlanc - MatthieuConstant - ÉricLaporte + MatthieuConstant + ÉricLaporte 83–92 La plate-forme logicielle Outilex, qui sera mise à la disposition de la recherche, du développement et de l’industrie, comporte des composants logiciels qui effectuent toutes les opérations fondamentales du traitement automatique du texte écrit : traitements sans lexiques, exploitation de lexiques et de grammaires, gestion de ressources linguistiques. Les données manipulées sont structurées dans des formats XML, et également dans d’autres formats plus compacts, soit lisibles soit binaires, lorsque cela est nécessaire ; les convertisseurs de formats nécessaires sont inclus dans la plate-forme ; les formats de grammaires permettent de combiner des méthodes statistiques avec des méthodes fondées sur des ressources linguistiques. Enfin, des lexiques du français et de l’anglais issus du LADL, construits manuellement et d’une couverture substantielle seront distribués avec la plate-forme sous licence LGPL-LR. 2006.jeptalnrecital-long.5 @@ -103,8 +103,8 @@ Une grammaire multilingue partagée pour la traduction automatique de la parole - PierretteBouillon - MannyRayner + PierretteBouillon + MannyRayner BrunaNovellas YukieNakao MarianneSantaholma @@ -120,7 +120,7 @@ Prise en compte des disfluences dans un système d’analyse syntaxique automatique de l’oral RémiBove ChristineChardenon - JeanVéronis + JeanVéronis 103–111 Nous présentons dans cette étude un essai de prise en compte des disfluences dans un système d’analyse linguistique initialement prévu pour l’écrit, en vue de la réalisation d’un prototype de traduction parole-parole. À partir d’une étude approfondie sur corpus, nous montrons comment des modifications du lexique et de la grammaire ont permis de traiter les cas les plus simples (pauses remplies, répétitions de mots isolés, etc.). D’autres cas plus complexes comme répétitions et auto-corrections de syntagmes ont nécessité la mise au point d’un mécanisme de contrôle sémantique permettant de limiter la combinatoire. Cette étude a mis également en évidence la difficulté de traitement de phénomènes tels que les amorces (mots interrompus) et les constructions inachevées, qui pour l’instant restent sans solution satisfaisante. 2006.jeptalnrecital-long.7 @@ -139,7 +139,7 @@ Extraction de relations sémantiques entre noms et verbes au-delà des liens morphologiques - CécileFabre + CécileFabre DidierBourigault 121–129 Nous étudions les relations de proximité sémantique entre les noms et les verbes à partir de données calculées sur un corpus de 200 millions de mots par un programme d’analyse distributionnelle automatique. Nous exposons les résultats d’une méthode d’extraction de couples Nom/Verbe, qui combine un indice de proximité distributionnelle et un indice de cooccurrence : un couple est extrait si le nom et le verbe apparaissent avec les mêmes arguments sur l’ensemble du corpus, d’une part, et s’ils apparaissent au moins une fois dans un même paragraphe munis du même argument, d’autre part. L’article élabore une typologie des 1441 couples extraits et démontre l’intérêt de prendre en compte les couples non liés morphologiquement, qui constituent 70 % des données. @@ -180,9 +180,9 @@ Questions Booléennes : Oui ou Non, des Questions et des Réponses - LaurentGillard - PatriceBellot - MarcEl-Bèze + LaurentGillard + PatriceBellot + MarcEl-Bèze 159–166 Dans cet article, nous présentons une approche afin de traiter les questions booléennes, c’est-à-dire des questions dont la réponse peut être un Oui ou un Non, cela, dans le cadre d’un système de Questions-Réponses. En effet, la campagne Technolangue-EQueR, première campagne francophone de Questions-Réponses (QR) utilisant des questions et un corpus en français, a également été la première campagne QR à introduire une évaluation pour ce type de questions. Nous détaillons, parallèlement à notre approche, des pistes de réflexion sur les aspects sous-jacents à ces questions booléennes, notamment au travers d’une analyse des résultats obtenus par notre système dans un contexte similaire à celui de notre participation à la campagne officielle. 2006.jeptalnrecital-long.13 @@ -197,10 +197,10 @@ BernardFradin NabilHathout StéphanieLignon - FiammettaNamer + FiammettaNamer ClémentPlancq FrançoisYvon - PierreZweigenbaum + PierreZweigenbaum 167–177 Dans ce travail, nous étudions en corpus la productivité quantitative des suffixations par -Able et par -ité du français, d’abord indépendamment l’une de l’autre, puis lorsqu’elles s’enchaînent dérivationnellement (la suffixation en -ité s’applique à des bases en -Able dans environ 15 % des cas). Nous estimons la productivité de ces suffixations au moyen de mesures statistiques dont nous suivons l’évolution par rapport à la taille du corpus. Ces deux suffixations sont productives en français moderne : elles forment de nouveaux lexèmes tout au long des corpus étudiés sans qu’on n’observe de saturation, leurs indices de productivité montrent une évolution stable bien qu’étant dépendante des calculs qui leur sont appliqués. On note cependant que, de façon générale, de ces deux suffixations, c’est la suffixation par -ité qui est la plus fréquente en corpus journalistique, sauf précisément quand -ité s’applique à un adjectif en -Able. Étant entendu qu’un adjectif en -Able et le nom en -ité correspondant expriment la même propriété, ce résultat indique que la complexité de la base est un paramètre à prendre en considération dans la formation du lexique possible. 2006.jeptalnrecital-long.14 @@ -249,7 +249,7 @@ De la Chambre des communes à la chambre d’isolement : adaptabilité d’un système de traduction basé sur les segments de phrases - PhilippeLanglais + PhilippeLanglais FabrizioGotti AlexandrePatry 217–226 @@ -292,7 +292,7 @@ Résolution des références aux documents dans un corpus de dialogues humains - AndreiPopescu-Belis + AndreiPopescu-Belis 257–266 Cet article étudie la résolution des références à des entités lorsqu’une représentation informatique de ces entités est disponible. Nous nous intéressons à un corpus de dialogues entre humains, portant sur les grands titres de la presse francophone du jour, et proposons une méthode pour détecter et résoudre les références faites par les locuteurs aux articles des journaux. La détection des expressions nominales qui réfèrent à ces documents est réalisée grâce à une grammaire, alors que le problème de la détection des pronoms qui réfèrent aux documents est abordé par des moyens statistiques. La résolution de ces expressions, à savoir l’attribution des référents, fait quant à elle l’objet d’un algorithme inspiré de la résolution des coréférences. Ces propositions sont évaluées par le biais de mesures quantitatives spécifiques. 2006.jeptalnrecital-long.23 @@ -322,7 +322,7 @@ Trouver le coupable : Fouille d’erreurs sur des sorties d’analyseurs syntaxiques - BenoîtSagot + BenoîtSagot ÉricVillemonte De La Clergerie 288–297 Nous présentons une méthode de fouille d’erreurs pour détecter automatiquement des erreurs dans les ressources utilisées par les systèmes d’analyse syntaxique. Nous avons mis en oeuvre cette méthode sur le résultat de l’analyse de plusieurs millions de mots par deux systèmes d’analyse différents qui ont toutefois en commun le lexique syntaxique et la chaîne de traitement pré-syntaxique. Nous avons pu identifier ainsi des inexactitudes et des incomplétudes dans les ressources utilisées. En particulier, la comparaison des résultats obtenus sur les sorties des deux analyseurs sur un même corpus nous a permis d’isoler les problèmes issus des ressources partagées de ceux issus des grammaires. @@ -350,8 +350,8 @@ Using Stemming in Morphological Analysis to Improve <fixed-case>A</fixed-case>rabic Information Retrieval NasredineSemmar - MeriamaLaib - ChristianFluhr + MeriamaLaib + ChristianFluhr 318–327 Information retrieval (IR) consists in finding all relevant documents for a user query in a collection of documents. These documents are ordered by the probability of being relevant to the user’s query. The highest ranked document is considered to be the most likely relevant document. Natural Language Processing (NLP) for IR aims to transform the potentially ambiguous words of queries and documents into unambiguous internal representations on which matching and retrieval can take place. This transformation is generally achieved by several levels of linguistic analysis, morphological, syntactic and so forth. In this paper, we present the Arabic linguistic analyzer used in the LIC2M cross-lingual search engine. We focus on the morphological analyzer and particularly the clitic stemmer which segments the input words into proclitics, simple forms and enclitics. We demonstrate that stemming improves search engine recall and precision. 2006.jeptalnrecital-long.29 @@ -359,8 +359,8 @@ Décodage conceptuel et apprentissage automatique : application au corpus de dialogue Homme-Machine <fixed-case>MEDIA</fixed-case> - ChristopheServan - FrédéricBéchet + ChristopheServan + FrédéricBéchet 328–337 Cette étude présente les travaux du LIA effectués sur le corpus de dialogue homme-machine MEDIA et visant à proposer des méthodes d’analyse robuste permettant d’extraire d’un message audio une séquence de concepts élémentaires. Le modèle de décodage conceptuel présenté est basé sur une approche stochastique qui intègre directement le processus de compréhension au processus de Reconnaissance Automatique de la Parole (RAP). Cette approche permet de garder l’espace probabiliste des phrases produit en sortie du module de RAP et de le projeter vers un espace probabiliste de séquences de concepts. Les expériences menées sur le corpus MEDIA montrent que les performances atteintes par notre modèle sont au niveau des meilleurs systèmes ayant participé à l’évaluation sur des transcriptions manuelles de dialogues. En détaillant les performances du système en fonction de la taille du corpus d’apprentissage on peut mesurer le nombre minimal ainsi que le nombre optimal de dialogues nécessaires à l’apprentissage des modèles. Enfin nous montrons comment des connaissances a priori peuvent être intégrées dans nos modèles afin d’augmenter significativement leur couverture en diminuant, à performance égale, l’effort de constitution et d’annotation du corpus d’apprentissage. 2006.jeptalnrecital-long.30 @@ -371,8 +371,8 @@ Vers une prédiction automatique de la difficulté d’une question en langue naturelle LaurianneSitbon JensGrivolla - LaurentGillard - PatriceBellot + LaurentGillard + PatriceBellot PhilippeBlache 338–347 Nous proposons et testons deux méthodes de prédiction de la capacité d’un système à répondre à une question factuelle. Une telle prédiciton permet de déterminer si l’on doit initier un dialogue afin de préciser ou de reformuler la question posée par l’utilisateur. La première approche que nous proposons est une adaptation d’une méthode de prédiction dans le domaine de la recherche documentaire, basée soit sur des machines à vecteurs supports (SVM) soit sur des arbres de décision, avec des critères tels que le contenu des questions ou des documents, et des mesures de cohésion entre les documents ou passages de documents d’où sont extraits les réponses. L’autre approche vise à utiliser le type de réponse attendue pour décider de la capacité du système à répondre. Les deux approches ont été testées sur les données de la campagne Technolangue EQUER des systèmes de questions-réponses en français. L’approche à base de SVM est celle qui obtient les meilleurs résultats. Elle permet de distinguer au mieux les questions faciles, celles auxquelles notre système apporte une bonne réponse, des questions difficiles, celles restées sans réponses ou auxquelles le système a répondu de manière incorrecte. A l’opposé on montre que pour notre système, le type de réponse attendue (personnes, quantités, lieux...) n’est pas un facteur déterminant pour la difficulté d’une question. @@ -415,7 +415,7 @@ Actes de la 13ème conférence sur le Traitement Automatique des Langues Naturelles. Posters PietMertens - CédrickFairon + CédrickFairon AnneDister PatrickWatrin ATALA @@ -432,7 +432,7 @@ Étude et analyse de la phrase nominale arabe en <fixed-case>HPSG</fixed-case> AbdelkarimAbdelkader KaisHaddar - AbdelmajidBen Hamadou + AbdelmajidBen Hamadou 379–388 Dans cet article, nous proposons une démarche d’analyse syntaxique pour les phrases nominales arabes à l’aide du formalisme des grammaires syntagmatiques guidées par les têtes HPSG. Pour ce faire, nous commençons par étudier la typologie de la phrase nominale arabe en précisant ses différentes formes. Puis, nous élaborons une grammaire HPSG traitant ce type de phrase et qui respecte la spécificité de la langue arabe. Ensuite, nous présentons une démarche d’analyse syntaxique se basant sur une approche ascendante et sur le mécanisme d’unification. Enfin, nous donnons une idée sur l’implémentation et l’expérimentation du prototype réalisé. 2006.jeptalnrecital-poster.1 @@ -445,7 +445,7 @@ MariaFernanda Bacelar do Nascimento JoãoMiguel Casteleiro AmáliaMendes - LuísaPereira + LuísaPereira Tiago 389–397 This presentation reports on an on-going project aimed at building a large lexical database of corpus-extracted multiword (MW) expressions for the Portuguese language. MW expressions were automatically extracted from a balanced 50 million word corpus compiled for this project, furthermore these were statistically interpreted using lexical association measures, followed by a manual validation process. The lexical database covers different types of MW expressions, from named entities to lexical associations with different degrees of cohesion, ranging from totally frozen idioms to favoured co-occurring forms, such as collocations. We aim to achieve two main objectives with this resource. Firstly to build on the large set of data of different types of MW expressions, thus revising existing typologies of collocations and integrating them in a larger theory of MW units. Secondly, to use the extensive hand-checked data as training data to evaluate existing statistical lexical association measures. @@ -454,8 +454,8 @@ Ambiguous Turn-Taking Games in Conversations - GemmaBel-Enguix - Maria DoloresJiménez-López + GemmaBel-Enguix + Maria DoloresJiménez-López 398–406 Human-computer interfaces require models of dialogue structure that capture the variability and unpredictability within dialogue. Semantic and pragmatic context are continuously evolving during conversation, especially by the distribution of turns that have a direct effect in dialogue exchanges. In this paper we use a formal language paradigm for modelling multi-agent system conversations. Our computational model combines pragmatic minimal units –speech acts– for constructing dialogues. In this framework, we show how turn-taking distribution can be ambiguous and propose an algorithm for solving it, considering turn coherence, trajectories and turn pairing. Finally, we suggest overlapping as one of the possible phenomena emerging from an unresolved turn-taking. 2006.jeptalnrecital-poster.3 @@ -485,7 +485,7 @@ Exploration et utilisation d’informations distantes dans les modèles de langage statistiques ArmelleBrun DavidLanglois - KamelSmaïli + KamelSmaïli 425–434 Dans le cadre de la modélisation statistique du langage, nous montrons qu’il est possible d’utiliser un modèle n-grammes avec un historique qui n’est pas nécessairement celui avec lequel il a été appris. Par exemple, un adverbe présent dans l’historique peut ne pas avoir d’importance pour la prédiction, et devrait donc être ignoré en décalant l’historique utilisé pour la prédiction. Notre étude porte sur les modèles n-grammes classiques et les modèles n-grammes distants et est appliquée au cas des bigrammes. Nous présentons quatre cas d’utilisation pour deux modèles bigrammes : distants et non distants. Nous montrons que la combinaison linéaire dépendante de l’historique de ces quatre cas permet d’améliorer de 14 % la perplexité du modèle bigrammes classique. Par ailleurs, nous nous intéressons à quelques cas de combinaison qui permettent de mettre en valeur les historiques pour lesquels les modèles que nous proposons sont performants. 2006.jeptalnrecital-poster.6 @@ -495,7 +495,7 @@ Création d’une base terminologique juridique multilingue à l’aide de la plateforme générique Jibiki : le projet <fixed-case>L</fixed-case>ex<fixed-case>ALP</fixed-case> FrancisBrunet-Manquat - GillesSérasset + GillesSérasset 435–444 Cet article présente l’utilisation de « Jibiki » (la plateforme de développement du serveur Web Papillon) dans le cadre du projet LexALP1. Le but de ce projet est d’harmoniser la terminologie des quatre langues (français, allemand, italien et slovène) de la Convention Alpine2 de sorte que les états membres puissent coopérer efficacement. Pour cela, le projet utilise la plateforme Jibiki afin de construire une banque terminologique permettant de comparer la terminologie spécialisée de sept systèmes légaux dans quatre langues, et de l’harmoniser, optimisant ainsi la compréhension entre les états alpins sur des questions environnementales au niveau supranational. Dans cet article, nous présentons comment peut être employée la plateforme générique Jibiki afin de gérer un dictionnaire particulier. 2006.jeptalnrecital-poster.7 @@ -536,7 +536,7 @@ Annotation automatique de relations de contrôle dans des spécifications des besoins informatiques JorgeGarcía-Flores ElenaIvanova - Jean-PierreDesclés + Jean-PierreDesclés BrahimDjioua 473–482 La conception de logiciels est un processus technologique complexe, qui nécessite d’être assisté par des outils de traitement automatique des langues. Cet article présente une méthode pour l’annotation de relations discursives de contrôle dans des textes de spécification de besoins informatiques (SBI). La méthode vise à distinguer les actions contrôlées par le système de celles contrôlées par son environnement, ce qui permet d’établir de façon claire les limites et les responsabilités d’un système informatique. Notre méthode fait appel à la sémantique discursive pour analyser les moyens d’expression du contrôle dans un corpus de SBI industrielles ; l’expression du contrôle est identifiable par la présence, dans un certain contexte, de marqueurs linguistiques exprimés par des règles dites d’Exploration Contextuelle. La dernière partie montre le processus d’annotation automatique de la notion de contrôle par le système EXCOM et termine par la présentation d’un début d’évaluation de cette méthodologie. @@ -547,7 +547,7 @@ Vers l’intégration du contexte dans une mémoire de traduction sous-phrastique : détection du domaine de traduction FabrizioGotti - PhilippeLanglais + PhilippeLanglais ClaudeCoulombe 483–492 Nous présentons dans cet article une mémoire de traduction sous-phrastique sensible au domaine de traduction, une première étape vers l’intégration du contexte. Ce système est en mesure de recycler les traductions déjà « vues » par la mémoire, non seulement pour des phrases complètes, mais également pour des sous-séquences contiguës de ces phrases, via un aligneur de mots. Les séquences jugées intéressantes sont proposées au traducteur. Nous expliquons également la création d’un utilisateur artificiel, indispensable pour tester les performances du système en l’absence d’intervention humaine. Nous le testons lors de la traduction d’un ensemble disparate de corpus. Ces performances sont exprimées par un ensemble de métriques que nous définissons. Enfin, nous démontrons que la détection automatique du contexte de traduction peut s’avérer bénéfique et prometteuse pour améliorer le fonctionnement d’une telle mémoire, en agissant comme un filtre sur le matériel cible suggéré. @@ -557,7 +557,7 @@ Analyse et désambiguïsation morphologiques de textes arabes non voyellés - LamiaHadrich Belguith + LamiaHadrich Belguith NouhaChaâben 493–501 Dans ce papier nous proposons d’abord une méthode d’analyse et de désambiguïsation morphologiques de textes arabes non voyellés permettant de lever l’ambiguïté morphologique due à l’absence des marques de voyelles et aussi à l’irrégularité des formes dérivées de certains mots arabes (e.g. formes irrégulières du pluriel des noms et des adjectifs). Ensuite, nous présentons le système MORPH2, un analyseur morphologique de textes arabes non voyellés basé sur la méthode proposée. Ce système est évalué sur un livre scolaire et des articles de journaux. Les résultats obtenus son et très encourageants. En effet, les mesures de rappel et de précision globales sont respectivement de 69,77 % et 68,51 %. @@ -599,7 +599,7 @@ Graphes paramétrés et outils de lexicalisation - ÉricLaporte + ÉricLaporte SébastienPaumier 532–540 La lexicalisation des grammaires réduit le nombre des erreurs d’analyse syntaxique et améliore les résultats des applications. Cependant, cette modification affecte un système d’analyse syntaxique dans tous ses aspects. Un de nos objectifs de recherche est de mettre au point un modèle réaliste pour la lexicalisation des grammaires. Nous avons réalisé des expériences en ce sens avec une grammaire très simple par son contenu et son formalisme, et un lexique syntaxique très informatif, le lexique-grammaire du français élaboré au LADL. La méthode de lexicalisation est celle des graphes paramétrés. Nos résultats tendent à montrer que la plupart des informations contenues dans le lexique-grammaire peuvent être transférées dans une grammaire et exploitées avec succès dans l’analyse syntaxique de phrases. @@ -647,7 +647,7 @@ Étude de métaphores conceptuelles à l’aide de vues globales et temporelles sur un corpus ThibaultRoy - StéphaneFerrari + StéphaneFerrari PierreBeust 580–589 Cet article présente des expériences récentes menées dans le cadre d’un projet de recherche consacré à l’étude de métaphores conceptuelles. Ces expériences consistent à appréhender visuellement la répartition de trois domaines pouvant être à l’origine de métaphores conceptuelles dans un corpus d’articles boursiers. Les trois domaines étudiés sont la météorologie, la guerre et la santé, un grand nombre d’emplois métaphoriques du lexique de ces trois domaines ayant été observés dans le corpus d’étude. Afin de visualiser la répartition de ces domaines en corpus, nous exploitons la plate-forme ProxiDocs dédiée à la cartographie et à la catégorisation de corpus. Les cartes construites à partir du corpus et des domaines d’étude nous ont ainsi permis de localiser certaines métaphores conceptuelles dans des articles et des groupes d’articles du corpus. Des articles contenant des emplois non métaphoriques des domaines étudiés ont également été distingués sur les cartes. Des représentations cartographiques du corpus mettant dynamiquement en évidence l’évolution des trois domaines d’étude au fil du temps nous ont permis d’amorcer une étude sur le lien entre la présence de certaines métaphores conceptuelles et des faits d’actualité. @@ -676,8 +676,8 @@ Modélisation et analyse des coordinations elliptiques par l’exploitation dynamique des forêts de dérivation - DjaméSeddah - BenoîtSagot + DjaméSeddah + BenoîtSagot 609–618 Nous présentons dans cet article une approche générale pour la modélisation et l’analyse syntaxique des coordinations elliptiques. Nous montrons que les lexèmes élidés peuvent être remplacés, au cours de l’analyse, par des informations qui proviennent de l’autre membre de la coordination, utilisé comme guide au niveau des dérivations. De plus, nous montrons comment cette approche peut être effectivement mise en oeuvre par une légère extension des Grammaires d’Arbres Adjoints Lexicalisées (LTAG) à travers une opération dite de fusion. Nous décrivons les algorithmes de dérivation nécessaires pour l’analyse de constructions coordonnées pouvant comporter un nombre quelconque d’ellipses. 2006.jeptalnrecital-poster.25 @@ -721,7 +721,7 @@ Actes de la 13ème conférence sur le Traitement Automatique des Langues Naturelles. Tutoriels PietMertens - CédrickFairon + CédrickFairon AnneDister PatrickWatrin ATALA @@ -748,7 +748,7 @@ Actes de la 13ème conférence sur le Traitement Automatique des Langues Naturelles. REncontres jeunes Chercheurs en Informatique pour le Traitement Automatique des Langues PietMertens - CédrickFairon + CédrickFairon AnneDister PatrickWatrin ATALA @@ -837,7 +837,7 @@ Actes de la 13ème conférence sur le Traitement Automatique des Langues Naturelles. REncontres jeunes Chercheurs en Informatique pour le Traitement Automatique des Langues (Posters) PietMertens - CédrickFairon + CédrickFairon AnneDister PatrickWatrin ATALA @@ -906,7 +906,7 @@ The Application of Singular Value Decomposition to <fixed-case>D</fixed-case>utch Noun-Adjective Matrices - TimVan de Cruys + TimVan de Cruys 767–772 Automatic acquisition of semantics from text has received quite some attention in natural language processing. A lot of research has been done by looking at syntactically similar contexts. For example, semantically related nouns can be clustered by looking at the collocating adjectives. There are, however, two major problems with this approach : computational complexity and data sparseness. This paper describes the application of a mathematical technique called singular value decomposition, which has been succesfully applied in Information Retrieval to counter these problems. It is investigated whether this technique is also able to cluster nouns according to latent semantic dimensions in a reduced adjective space. 2006.jeptalnrecital-recitalposter.7 diff --git a/data/xml/2006.tal.xml b/data/xml/2006.tal.xml index 0ce11f5b84..98f1d9cdb3 100644 --- a/data/xml/2006.tal.xml +++ b/data/xml/2006.tal.xml @@ -48,7 +48,7 @@ Comparabilité de corpus et fouille terminologique multilingue [Corpus comparability and multilingual terminology Mining] EmmanuelMorin - BéatriceDaille + BéatriceDaille 113–136 2006.tal-1.5 fra @@ -84,8 +84,8 @@ Computational Approaches to Discourse and Document Processing - Marie-PaulePéry-Woodley - DoniaScott + Marie-PaulePéry-Woodley + DoniaScott 7–19 2006.tal-2.1 pery-woodley-scott-2006-computational @@ -93,8 +93,8 @@ Discourse-based answering of why-questions SuzanVerberne - LouBoves - Peter-ArnoCoppen + LouBoves + Peter-ArnoCoppen NellekeOostdijk 21–41 2006.tal-2.2 @@ -105,8 +105,8 @@ RashmiPrasad NikhilDinesh AlanLee - AravindJoshi - BonnieWebber + AravindJoshi + BonnieWebber 43–64 2006.tal-2.3 prasad-etal-2006-attribution @@ -139,7 +139,7 @@ Visualising discourse coherence in nonlinear documents ClaraMancini - DoniaScott + DoniaScott Simon BuckinghamShum 137–168 2006.tal-2.7 @@ -195,7 +195,7 @@ Une architecture de services pour mieux spécialiser les processus d’acquisition terminologique [A service architecture for better specialization of terminology acquisition processes] FaridCerbah - BéatriceDaille + BéatriceDaille 39–61 2006.tal-3.2 fra @@ -212,7 +212,7 @@ Modélisation de la coordination dans les grammaires d’interaction [Modeling coordination in interaction grammars] - JosephLe Roux + JosephLe Roux GuyPerrier 89–113 2006.tal-3.4 @@ -239,8 +239,8 @@ Une grammaire partagée multitâche pour le traitement de la parole : application aux langues romanes [A multitask shared grammar for speech processing: application to romance languages] - PierretteBouillon - MannyRayner + PierretteBouillon + MannyRayner BrunaNovellas MarianneStarlander MarianneSantaholma @@ -253,8 +253,8 @@ Traduction automatisée fondée sur le dialogue et documents auto-explicatifs : bilan du projet <fixed-case>LIDIA</fixed-case> [Machine translation based on dialogues and self-explanatory documents: an assessment of the <fixed-case>LIDIA</fixed-case> project] - HervéBlanchon - ChristianBoitet + HervéBlanchon + ChristianBoitet AliChoumane 175–204 2006.tal-3.8 diff --git a/data/xml/2007.iwslt.xml b/data/xml/2007.iwslt.xml index edb2badd30..f90170240f 100644 --- a/data/xml/2007.iwslt.xml +++ b/data/xml/2007.iwslt.xml @@ -10,7 +10,7 @@ Overview of the <fixed-case>IWSLT</fixed-case> 2007 evaluation campaign - Cameron ShawFordyce + Cameron ShawFordyce 2007.iwslt-1.1 In this paper we give an overview of the 2007 evaluation campaign for the International Workshop on Spoken Language Translation (IWSLT)1. As with previous evaluation campaigns, the primary focus of the workshop was the translation of spoken language in the travel domain. This year there were four language pairs; the translation of Chinese, Italian, Arabic, and Japanese into English. The input data consisted of the output of ASR systems for read speech and clean text. The exceptions were the challenge task of the Italian English language pair which used spontaneous speech ASR outputs and transcriptions and the Chinese English task which used only clean text. A new characteristic of this year’s evaluation campaign was an increased focus on the sharing of resources. Participants were requested to submit the data and supplementary resources used in building their systems so that the other participants might be able to take advantage of the same resources. A second new characteristic this year was the focus on the human evaluation of systems. Each primary run was judged in the human evaluation for every task using a straightforward ranking of systems. This year's workshop saw an increased participation over last year's workshop. This year 24 groups submitted runs to one or more of the tasks, compared to the 19 groups that submitted runs last year [1]. Automatic and human evaluation were carried out to measure MT performance under each condition, ASR system outputs for read speech, spontaneous travel dialogues, and clean text. fordyce-2007-overview @@ -20,8 +20,8 @@ AliciaPérez VíctorGuijarrubia RaquelJusto - M. InésTorres - FranciscoCasacuberta + M. InésTorres + FranciscoCasacuberta 2007.iwslt-1.2 The goal of this work is to improve current translation models by taking into account additional knowledge sources such as semantically motivated segmentation or statistical categorization. Specifically, two different approaches are discussed. On the one hand, phrase-based approach, and on the other hand, categorization. For both approaches, both statistical and linguistic alternatives are explored. As for translation framework, finite-state transducers are considered. These are versatile models that can be easily integrated on-the-fly with acoustic models for speech translation purposes. In what the experimental framework concerns, all the models presented were evaluated and compared taking confidence intervals into account. perez-etal-2007-comparison @@ -30,7 +30,7 @@ Improved chunk-level reordering for statistical machine translation YuqiZhang RichardZens - HermannNey + HermannNey 2007.iwslt-1.3 Inspired by previous chunk-level reordering approaches to statistical machine translation, this paper presents two methods to improve the reordering at the chunk level. By introducing a new lattice weighting factor and by reordering the training source data, an improvement is reported on TER and BLEU. Compared to the previous chunklevel reordering approach, the BLEU score improves 1.4% absolutely. The translation results are reported on IWSLT Chinese-English task. zhang-etal-2007-improved @@ -41,13 +41,13 @@ MatthaisEck PaisarnCharoenpornsawat ThiloKöhler - SebastianStüker - ThuyLinhNguyen + SebastianStüker + ThuyLinhNguyen RogerHsiao - AlexWaibel - StephanVogel + AlexWaibel + StephanVogel TanjaSchultz - Alan W.Black + Alan W.Black 2007.iwslt-1.4 The paper describes our portable two-way speech-to-speech translation system using a completely eyes-free/hands-free user interface. This system translates between the language pair English and Iraqi Arabic as well as between English and Farsi, and was built within the framework of the DARPA TransTac program. The Farsi language support was developed within a 90-day period, testing our ability to rapidly support new languages. The paper gives an overview of the system’s components along with the individual component objective measures and a discussion of issues relevant for the overall usage of the system. We found that usability, flexibility, and robustness serve as severe constraints on system architecture and design. bach-etal-2007-cmu @@ -56,7 +56,7 @@ <fixed-case>CASIA</fixed-case> phrase-based <fixed-case>SMT</fixed-case> system for <fixed-case>IWSLT</fixed-case>’07 YuZhou YanqingHe - ChengqingZong + ChengqingZong 2007.iwslt-1.5 zhou-etal-2007-casia @@ -82,7 +82,7 @@ JunSun HongfeiJiang MinZhang - Ai TiAw + Ai TiAw 2007.iwslt-1.8 In this paper, we describe the system and approach used by Institute for Infocomm Research (I2R) for the IWSLT 2007 spoken language evaluation campaign. A multi-pass approach is exploited to generate and select best translation. First, we use two decoders namely the open source Moses and an in-home syntax-based decoder to generate N-best lists. Next we spawn new translation entries through a word-based n-gram language model estimated on the former N-best entries. Finally, we join the N-best lists from the previous two passes, and select the best translation by rescoring them with additional feature functions. In particular, this paper reports our effort on new translation entry generation and system combination. The performance on development and test sets are reported. The system was ranked first with respect to the BLEU measure in Chinese-to-English open data track. chen-etal-2007-i2r @@ -91,20 +91,20 @@ The <fixed-case>CMU</fixed-case>-<fixed-case>UKA</fixed-case> statistical machine translation systems for <fixed-case>IWSLT</fixed-case> 2007 IanLane AndreasZollmann - Thuy LinhNguyen + Thuy LinhNguyen NguyenBach AshishVenugopal - StephanVogel + StephanVogel KayRottmann - YingZhang - AlexWaibel + YingZhang + AlexWaibel 2007.iwslt-1.9 This paper describes the CMU-UKA statistical machine translation systems submitted to the IWSLT 2007 evaluation campaign. Systems were submitted for three language-pairs: Japanese→English, Chinese→English and Arabic→English. All systems were based on a common phrase-based SMT (statistical machine translation) framework but for each language-pair a specific research problem was tackled. For Japanese→English we focused on two problems: first, punctuation recovery, and second, how to incorporate topic-knowledge into the translation framework. Our Chinese→English submission focused on syntax-augmented SMT and for the Arabic→English task we focused on incorporating morphological-decomposition into the SMT framework. This research strategy enabled us to evaluate a wide variety of approaches which proved effective for the language pairs they were evaluated on. lane-etal-2007-cmu <fixed-case>M</fixed-case>a<fixed-case>T</fixed-case>r<fixed-case>E</fixed-case>x: the <fixed-case>DCU</fixed-case> machine translation system for <fixed-case>IWSLT</fixed-case> 2007 - HanyHassan + HanyHassan YanjunMa AndyWay 2007.iwslt-1.10 @@ -143,7 +143,7 @@ The <fixed-case>MIT</fixed-case>-<fixed-case>LL</fixed-case>/<fixed-case>AFRL</fixed-case> <fixed-case>IWSLT</fixed-case>-2007 <fixed-case>MT</fixed-case> system WadeShen BrianDelaney - TimAnderson + TimAnderson RaySlyh 2007.iwslt-1.14 The MIT-LL/AFRL MT system implements a standard phrase-based, statistical translation model. It incorporates a number of extensions that improve performance for speech-based translation. During this evaluation our efforts focused on the rapid porting of our SMT system to a new language (Arabic) and novel approaches to translation from speech input. This paper discusses the architecture of the MIT-LL/AFRL MT system, improvements over our 2006 system, and experiments we ran during the IWSLT-2007 evaluation. Specifically, we focus on 1) experiments comparing the performance of confusion network decoding and direct lattice decoding techniques for machine translation of speech, 2) the application of lightweight morphology for Arabic MT preprocessing and 3) improved confusion network decoding. @@ -154,11 +154,11 @@ AndrewFinch EtienneDenoual HideoOkuma - MichaelPaul - HirofumiYamamoto + MichaelPaul + HirofumiYamamoto KeijiYasuda RuiqiangZhang - EiichiroSumita + EiichiroSumita 2007.iwslt-1.15 This paper describes the NiCT-ATR statistical machine translation (SMT) system used for the IWSLT 2007 evaluation campaign. We participated in three of the four language pair translation tasks (CE, JE, and IE). We used a phrase-based SMT system using log-linear feature models for all tracks. This year we decoded from the ASR n-best lists in the JE track and found a gain in performance. We also applied some new techniques to facilitate the use of out-of-domain external resources by model combination and also by utilizing a huge corpus of n-grams provided by Google Inc.. Using these resources gave mixed results that depended on the technique also the language pair however, in some cases we achieved consistently positive results. The results from model-interpolation in particular were very promising. finch-etal-2007-nict @@ -179,11 +179,11 @@ ZhongjunHe HaitaoMi YangLiu - DeyiXiong + DeyiXiong WeihuaLuo YunHuang ZhixiangRen - YajuanLu + YajuanLu QunLiu 2007.iwslt-1.17 In this paper, we give an overview of the ICT statistical machine translation systems for the evaluation campaign of the International Workshop on Spoken Language Translation (IWSLT) 2007. In this year’s evaluation, we participated in the Chinese-English transcript translation task, and developed three systems based on different techniques: a formally syntax-based system Bruin, an extended phrase-based system Confucius and a linguistically syntax-based system Lynx. We will describe the models of these three systems, and compare their performance in detail. We set Bruin as our primary system, which ranks 2 among the 15 primary results according to the official evaluation results. @@ -191,25 +191,25 @@ The <fixed-case>INESC</fixed-case>-<fixed-case>ID</fixed-case> <fixed-case>IWSLT</fixed-case>07 <fixed-case>SMT</fixed-case> system - João V.Graça + João V.Graça DiamantinoCaseiro - LuísaCoheur + LuísaCoheur 2007.iwslt-1.18 We present the machine translation system used by L2F from INESC-ID in the evaluation campaign of the International Workshop on Spoken Language Translation (2007), in the task of translating spontaneous conversations in the travel domain from Italian to English. graca-etal-2007-inesc Using word posterior probabilities in lattice translation - VicenteAlabau + VicenteAlabau AlbertoSanchis - FranciscoCasacuberta + FranciscoCasacuberta 2007.iwslt-1.19 In this paper we describe the statistical machine translation system developed at ITI/UPV, which aims especially at speech recognition and statistical machine translation integration, for the evaluation campaign of the International Workshop on Spoken Language Translation (2007). The system we have developed takes advantage of an improved word lattice representation that uses word posterior probabilities. These word posterior probabilities are then added as a feature to a log-linear model. This model includes a stochastic finite-state transducer which allows an easy lattice integration. Furthermore, it provides a statistical phrase-based reordering model that is able to perform local reorderings of the output. We have tested this model on the Italian-English corpus, for clean text, 1-best ASR and lattice ASR inputs. The results and conclusions of such experiments are reported at the end of this paper. alabau-etal-2007-using The <fixed-case>LIG</fixed-case> <fixed-case>A</fixed-case>rabic/<fixed-case>E</fixed-case>nglish speech translation system at <fixed-case>IWSLT</fixed-case>07 - LaurentBesacier + LaurentBesacier AmarMahdhaoui Viet-BacLe 2007.iwslt-1.20 @@ -218,7 +218,7 @@ <fixed-case>NUDT</fixed-case> machine translation system for <fixed-case>IWSLT</fixed-case>2007 - Wen-HanChao + Wen-HanChao Zhou-JunLi 2007.iwslt-1.21 In this paper, we describe our machine translation system which was used for the Chinese-to-English task in the IWSLT2007 evaluation campaign. The system is a statistical machine translation (SMT) system, while containing an example-based decoder. In this way, it will help to solve the re-ordering problem and other problems for spoken language MT, such as lots of omissions, idioms etc. We report the results of the system for the provided evaluation sets. @@ -227,8 +227,8 @@ <fixed-case>MISTRAL</fixed-case>: a lattice translation system for <fixed-case>IWSLT</fixed-case> 2007 AlexandrePatry - PhilippeLanglais - FrédéricBéchet + PhilippeLanglais + FrédéricBéchet 2007.iwslt-1.22 This paper describes MISTRAL, the lattice translation system that we developed for the Italian-English track of the International Workshop on Spoken Language Translation 2007. MISTRAL is a discriminative phrase-based system that translates a source word lattice in two passes. The first pass extracts a list of top ranked sentence pairs from the lattice and the second pass rescores this list with more complex features. Our experiments show that our system, when translating pruned lattices, is at least as good as a fair baseline that translates the first ranked sentences returned by a speech recognition system. patry-etal-2007-mistral @@ -257,7 +257,7 @@ DavidVilar GregorLeusch YuqiZhang - HermannNey + HermannNey 2007.iwslt-1.25 The RWTH system for the IWSLT 2007 evaluation is a combination of several statistical machine translation systems. The combination includes Phrase-Based models, a n-gram translation model and a hierarchical phrase model. We describe the individual systems and the method that was used for combining the system outputs. Compared to our 2006 system, we newly introduce a hierarchical phrase-based translation model and show improvements in system combination for Machine Translation. RWTH participated in the Italian-to-English and Chinese-to-English translation directions. mauser-etal-2007-rwth @@ -265,12 +265,12 @@ The <fixed-case>TALP</fixed-case> ngram-based <fixed-case>SMT</fixed-case> system for <fixed-case>IWSLT</fixed-case> 2007 PatrikLambert - Marta R.Costa-jussà - Josep M.Crego + Marta R.Costa-jussà + Josep M.Crego MaximKhalilov - José B.Mariño - Rafael E.Banchs - José A. R.Fonollosa + José B.Mariño + Rafael E.Banchs + José A. R.Fonollosa HolgerSchwenk 2007.iwslt-1.26 This paper describes TALPtuples, the 2007 N-gram-based statistical machine translation system developed at the TALP Research Center of the UPC (Universitat Polite`cnica de Catalunya) in Barcelona. Emphasis is put on improvements and extensions of the system of previous years. Mainly, these include optimizing alignment parameters in function of translation metric scores and rescoring with a neural network language model. Results on two translation directions are reported, namely from Arabic and Chinese into English, thoroughly explaining all language-related preprocessing and translation schemes. @@ -287,7 +287,7 @@ The <fixed-case>U</fixed-case>niversity of <fixed-case>M</fixed-case>aryland translation system for <fixed-case>IWSLT</fixed-case> 2007 - Christopher J.Dyer + Christopher J.Dyer 2007.iwslt-1.28 This paper describes the University of Maryland statistical machine translation system used in the IWSLT 2007 evaluation. Our focus was threefold: using hierarchical phrase-based models in spoken language translation, the incorporation of sub-lexical information in model estimation via morphological analysis (Arabic) and word and character segmentation (Chinese), and the use of n-gram sequence models for source-side punctuation prediction. Our efforts yield significant improvements in Chinese-English and Arabic-English translation tasks for both spoken language and human transcription conditions. dyer-2007-university diff --git a/data/xml/2007.jeptalnrecital.xml b/data/xml/2007.jeptalnrecital.xml index 0bdafb6908..4398b3e49a 100644 --- a/data/xml/2007.jeptalnrecital.xml +++ b/data/xml/2007.jeptalnrecital.xml @@ -19,7 +19,7 @@ Exploiting structural meeting-specific features for topic segmentation MariaGeorgescul AlexanderClarck - SusanArmstrong + SusanArmstrong 15–24 In this article we address the task of automatic text structuring into linear and non-overlapping thematic episodes. Our investigation reports on the use of various lexical, acoustic and syntactic features, and makes a comparison of how these features influence performance of automatic topic segmentation. Using datasets containing multi-party meeting transcriptions, we base our experiments on a proven state-of-the-art approach using support vector classification. 2007.jeptalnrecital-long.1 @@ -29,7 +29,7 @@ Énergie textuelle de mémoires associatives SilviaFernández EricSanjuan - Juan-ManuelTorres-Moreno + Juan-ManuelTorres-Moreno 25–34 Dans cet article, nous présentons une approche de réseaux de neurones inspirée de la physique statistique de systèmes magnétiques pour étudier des problèmes fondamentaux du Traitement Automatique de la Langue Naturelle. L’algorithme modélise un document comme un système de neurones où l’on déduit l’énergie textuelle. Nous avons appliqué cette approche aux problèmes de résumé automatique et de détection de frontières thématiques. Les résultats sont très encourageants. 2007.jeptalnrecital-long.2 @@ -48,8 +48,8 @@ Identifier les pronoms anaphoriques et trouver leurs antécédents : l’intérêt de la classification bayésienne - DavyWeissenbacher - AdelineNazarenko + DavyWeissenbacher + AdelineNazarenko 45–54 On oppose souvent en TAL les systèmes à base de connaissances linguistiques et ceux qui reposent sur des indices de surface. Chaque approche a ses limites et ses avantages. Nous proposons dans cet article une nouvelle approche qui repose sur les réseaux bayésiens et qui permet de combiner au sein d’une même représentation ces deux types d’informations hétérogènes et complémentaires. Nous justifions l’intérêt de notre approche en comparant les performances du réseau bayésien à celles des systèmes de l’état de l’art, sur un problème difficile du TAL, celui de la résolution d’anaphore. 2007.jeptalnrecital-long.4 @@ -67,7 +67,7 @@ Structures de traits typées et morphologie à partitions - FrançoisBarthélemy + FrançoisBarthélemy 65–74 Les structures de traits typées sont une façon abstraite et agréable de représenter une information partielle. Dans cet article, nous montrons comment la combinaison de deux techniques relativement classiques permet de définir une variante de morphologie à deux niveaux intégrant harmonieusement des structures de traits et se compilant en une machine finie. La première de ces techniques est la compilation de structure de traits en expressions régulières, la seconde est la morphologie à partition. Nous illustrons au moyen de deux exemples l’expressivité d’un formalisme qui rapproche les grammaires à deux niveaux des grammaires d’unification. 2007.jeptalnrecital-long.6 @@ -77,8 +77,8 @@ Analyse morphosémantique des composés savants : transposition du français à l’anglais LouiseDeléger - FiammettaNamer - PierreZweigenbaum + FiammettaNamer + PierreZweigenbaum 75–84 La plupart des vocabulaires spécialisés comprennent une part importante de lexèmes morphologiquement complexes, construits à partir de racines grecques et latines, qu’on appelle « composés savants ». Une analyse morphosémantique permet de décomposer et de donner des définitions à ces lexèmes, et semble pouvoir être appliquée de façon similaire aux composés de plusieurs langues. Cet article présente l’adaptation d’un analyseur morphosémantique, initialement dédié au français (DériF), à l’analyse de composés savants médicaux anglais, illustrant ainsi la similarité de structure de ces composés dans des langues européennes proches. Nous exposons les principes de cette transposition et ses performances. L’analyseur a été testé sur un ensemble de 1299 lexèmes extraits de la terminologie médicale WHO-ART : 859 ont pu être décomposés et définis, dont 675 avec succès. Outre une simple transposition d’une langue à l’autre, la méthode montre la potentialité d’un système multilingue. 2007.jeptalnrecital-long.7 @@ -88,7 +88,7 @@ A tool for detecting <fixed-case>F</fixed-case>rench-<fixed-case>E</fixed-case>nglish cognates and false friends OanaFrunza - DianaInkpen + DianaInkpen 85–94 Cognates are pairs of words in different languages similar in spelling and meaning. They can help a second-language learner on the tasks of vocabulary expansion and reading comprehension. False friends are pairs of words that have similar spelling but different meanings. Partial cognates are pairs of words in two languages that have the same meaning in some, but not all contexts. In this article we present a method to automatically classify a pair of words as cognates or false friends, by using several measures of orthographic similarity as features for classification. We use this method to create complete lists of cognates and false friends between two languages. We also disambiguate partial cognates in context. We applied all our methods to French and English, but they can be applied to other pairs of languages as well. We built a tool that takes the produced lists and annotates a French text with equivalent English cognates or false friends, in order to help second-language learners improve their reading comprehension skills and retention rate. 2007.jeptalnrecital-long.8 @@ -96,7 +96,7 @@ Enrichissement d’un lexique bilingue par analogie - PhilippeLanglais + PhilippeLanglais AlexandrePatry 95–104 La présence de mots inconnus dans les applications langagières représente un défi de taille bien connu auquel n’échappe pas la traduction automatique. Les systèmes professionnels de traduction offrent à cet effet à leurs utilisateurs la possibilité d’enrichir un lexique de base avec de nouvelles entrées. Récemment, Stroppa et Yvon (2005) démontraient l’intérêt du raisonnement par analogie pour l’analyse morphologique d’une langue. Dans cette étude, nous montrons que le raisonnement par analogie offre également une réponse adaptée au problème de la traduction d’entrées lexicales inconnues. @@ -127,7 +127,7 @@ Vers un méta-<fixed-case>EDL</fixed-case> complet, puis un <fixed-case>EDL</fixed-case> universel pour la <fixed-case>TAO</fixed-case> Hong-ThaiNguyen - ChristianBoitet + ChristianBoitet 125–134 Un “méta-EDL” (méta-Environnement de Développement Linguiciel) pour la TAO permet de piloter à distance un ou plusieurs EDL pour construire des systèmes de TAO hétérogènes. Partant de CASH, un méta-EDL dédié à Ariane-G5, et de WICALE 1.0, un premier méta-EDL générique mais aux fonctionnalités minimales, nous dégageons les problèmes liés à l’ajout de fonctionnalités riches comme l’édition et la navigation en local, et donnons une solution implémentée dans WICALE 2.0. Nous y intégrons maintenant une base lexicale pour les systèmes à « pivot lexical », comme UNL/U++. Un but à plus long terme est de passer d’un tel méta-EDL générique multifonctionnel à un EDL « universel », ce qui suppose la réingénierie des compilateurs et des moteurs des langages spécialisés pour la programmation linguistique (LSPL) supportés par les divers EDL. 2007.jeptalnrecital-long.12 @@ -137,7 +137,7 @@ Aides à la navigation dans un corpus de transcriptions d’oral FrederikCailliau - ClaudeDe Loupy + ClaudeDe Loupy 135–144 Dans cet article, nous évaluons les performances de fonctionnalités d’aide à la navigation dans un contexte de recherche dans un corpus audio. Nous montrons que les particularités de la transcription et, en particulier les erreurs, conduisent à une dégradation parfois importante des performances des outils d’analyse. Si la navigation par concepts reste dans des niveaux d’erreur acceptables, la reconnaissance des entités nommées, utilisée pour l’aide à la lecture, voit ses performances fortement baisser. Notre remise en doute de la portabilité de ces fonctions à un corpus oral est néanmoins atténuée par la nature même du corpus qui incite à considérer que toute méthodes permettant de réduire le temps d’accès à l’information est pertinente, même si les outils utilisés sont imparfaits. 2007.jeptalnrecital-long.13 @@ -185,7 +185,7 @@ Disambiguating automatic semantic annotation based on a thesaurus structure VéroniqueMalaisé LuitGazendam - HennieBrugman + HennieBrugman 185–194 The use/use for relationship a thesaurus is usually more complex than the (para-) synonymy recommended in the ISO-2788 standard describing the content of these controlled vocabularies. The fact that a non preferred term can refer to multiple preferred terms (only the latter are relevant in controlled indexing) makes this relationship difficult to use in automatic annotation applications : it generates ambiguity cases. In this paper, we present the CARROT algorithm, meant to rank the output of our Information Extraction pipeline, and how this algorithm can be used to select the relevant preferred term out of different possibilities. This selection is meant to provide suggestions of keywords to human annotators, in order to ease and speed up their daily process and is based on the structure of their thesaurus. We achieve a 95 % success, and discuss these results along with perspectives for this experiment. 2007.jeptalnrecital-long.18 @@ -213,7 +213,7 @@ Comparaison du Lexique-Grammaire des verbes pleins et de <fixed-case>DICOVALENCE</fixed-case> : vers une intégration dans le Lefff LaurenceDanlos - BenoîtSagot + BenoîtSagot 215–224 Cet article compare le Lexique-Grammaire des verbes pleins et DICOVALENCE, deux ressources lexicales syntaxiques pour le français développées par des linguistes depuis de nombreuses années. Nous étudions en particulier les divergences et les empiètements des modèles lexicaux sous-jacents. Puis nous présentons le Lefff , lexique syntaxique à grande échelle pour le TAL, et son propre modèle lexical. Nous montrons que ce modèle est à même d’intégrer les informations lexicales présentes dans le Lexique-Grammaire et dans DICOVALENCE. Nous présentons les résultats des premiers travaux effectués en ce sens, avec pour objectif à terme la constitution d’un lexique syntaxique de référence pour le TAL. 2007.jeptalnrecital-long.21 @@ -245,9 +245,9 @@ Résolution de la référence dans des dialogues homme-machine : évaluation sur corpus de deux approches symbolique et probabiliste - AlexandreDenis - FrédéricBéchet - MatthieuQuignard + AlexandreDenis + FrédéricBéchet + MatthieuQuignard 245–254 Cet article décrit deux approches, l’une numérique, l’autre symbolique, traitant le problème de la résolution de la référence dans un cadre de dialogue homme-machine. L’analyse des résultats obtenus sur le corpus MEDIA montre la complémentarité des deux systèmes développés : robustesse aux erreurs et hypothèses multiples pour l’approche numérique ; modélisation de phénomènes complexes et interprétation complète pour l’approche symbolique. 2007.jeptalnrecital-long.24 @@ -256,7 +256,7 @@ Annotation précise du français en sémantique de rôles par projection cross-linguistique - SebastianPadó + SebastianPadó GuillaumePitel 255–264 Dans le paradigme FrameNet, cet article aborde le problème de l’annotation précise et automatique de rôles sémantiques dans une langue sans lexique FrameNet existant. Nous évaluons la méthode proposée par Padó et Lapata (2005, 2006), fondée sur la projection de rôles et appliquée initialement à la paire anglais-allemand. Nous testons sa généralisabilité du point de vue (a) des langues, en l’appliquant à la paire (anglais-français) et (b) de la qualité de la source, en utilisant une annotation automatique du côté anglais. Les expériences montrent des résultats à la hauteur de ceux obtenus pour l’allemand, nous permettant de conclure que cette approche présente un grand potentiel pour réduire la quantité de travail nécessaire à la création de telles ressources dans de nombreuses langues. @@ -380,7 +380,7 @@ Collocation translation based on sentence alignment and parsing VioletaSeretan - ÉricWehrli + ÉricWehrli 375–384 Bien que de nombreux efforts aient été déployés pour extraire des collocations à partir de corpus de textes, seule une minorité de travaux se préoccupent aussi de rendre le résultat de l’extraction prêt à être utilisé dans les applications TAL qui pourraient en bénéficier, telles que la traduction automatique. Cet article décrit une méthode précise d’identification de la traduction des collocations dans un corpus parallèle, qui présente les avantages suivants : elle peut traiter des collocation flexibles (et pas seulement figées) ; elle a besoin de ressources limitées et d’un pouvoir de calcul raisonnable (pas d’alignement complet, pas d’entraînement) ; elle peut être appliquée à plusieurs paires des langues et fonctionne même en l’absence de dictionnaires bilingues. La méthode est basée sur l’information syntaxique provenant du parseur multilingue Fips. L’évaluation effectuée sur 4000 collocations de type verbe-objet correspondant à plusieurs paires de langues a montré une précision moyenne de 89.8% et une couverture satisfaisante (70.9%). Ces résultats sont supérieurs à ceux enregistrés dans l’évaluation d’autres méthodes de traduction de collocations. 2007.jeptalnrecital-long.37 @@ -390,7 +390,7 @@ Utilisation d’une approche basée sur la recherche cross-lingue d’information pour l’alignement de phrases à partir de textes bilingues Arabe-Français NasredineSemmar - ChristianFluhr + ChristianFluhr 385–394 L’alignement de phrases à partir de textes bilingues consiste à reconnaître les phrases qui sont traductions les unes des autres. Cet article présente une nouvelle approche pour aligner les phrases d’un corpus parallèle. Cette approche est basée sur la recherche crosslingue d’information et consiste à construire une base de données des phrases du texte cible et considérer chaque phrase du texte source comme une requête à cette base. La recherche crosslingue utilise un analyseur linguistique et un moteur de recherche. L’analyseur linguistique traite aussi bien les documents à indexer que les requêtes et produit un ensemble de lemmes normalisés, un ensemble d’entités nommées et un ensemble de mots composés avec leurs étiquettes morpho-syntaxiques. Le moteur de recherche construit les fichiers inversés des documents en se basant sur leur analyse linguistique et retrouve les documents pertinents à partir de leur indexes. L’aligneur de phrases a été évalué sur un corpus parallèle Arabe-Français et les résultats obtenus montrent que 97% des phrases ont été correctement alignées. 2007.jeptalnrecital-long.38 @@ -435,7 +435,7 @@ Segmentation en super-chunks OlivierBlanc - MatthieuConstant + MatthieuConstant PatrickWatrin 33–42 Depuis l’analyseur développé par Harris à la fin des années 50, les unités polylexicales ont peu à peu été intégrées aux analyseurs syntaxiques. Cependant, pour la plupart, elles sont encore restreintes aux mots composés qui sont plus stables et moins nombreux. Toutefois, la langue est remplie d’expressions semi-figées qui forment également des unités sémantiques : les expressions adverbiales et les collocations. De même que pour les mots composés traditionnels, l’identification de ces structures limite la complexité combinatoire induite par l’ambiguïté lexicale. Dans cet article, nous détaillons une expérience qui intègre ces notions dans un processus de segmentation en super-chunks, préalable à l’analyse syntaxique. Nous montrons que notre chunker, développé pour le français, atteint une précision et un rappel de 92,9 % et 98,7 %, respectivement. Par ailleurs, les unités polylexicales réalisent 36,6 % des attachements internes aux constituants nominaux et prépositionnels. @@ -447,7 +447,7 @@ Détection et prédiction de la satisfaction des usagers dans les dialogues Personne-Machine NarjèsBoufaden TruongLe Hoang - PierreDumouchel + PierreDumouchel 43–52 Nous étudions le rôle des entités nommées et marques discursives de rétroaction pour la tâche de classification et prédiction de la satisfaction usager à partir de dialogues. Les expériences menées sur 1027 dialogues Personne-Machine dans le domaine des agences de voyage montrent que les entités nommées et les marques discursives n’améliorent pas de manière significative le taux de classification des dialogues. Par contre, elles permettent une meilleure prédiction de la satisfaction usager à partir des premiers tours de parole usager. 2007.jeptalnrecital-poster.4 @@ -456,8 +456,8 @@ Les ellipses dans un système de traduction automatique de la parole - PierretteBouillon - MannyRayner + PierretteBouillon + MannyRayner MarianneStarlander MarianneSantaholma 53–62 @@ -469,9 +469,9 @@ Analyse automatique de sondages téléphoniques d’opinion NathalieCamelin - FrédéricBéchet - GéraldineDamnati - RenatoDe Mori + FrédéricBéchet + GéraldineDamnati + RenatoDe Mori 63–72 Cette étude présente la problématique de l’analyse automatique de sondages téléphoniques d’opinion. Cette analyse se fait en deux étapes : tout d’abord extraire des messages oraux les expressions subjectives relatives aux opinions de utilisateurs sur une dimension particulière (efficacité, accueil, etc.) ; puis sélectionner les messages fiables, selon un ensemble de mesures de confiance, et estimer la distribution des diverses opinions sur le corpus de test. Le but est d’estimer une distribution aussi proche que possible de la distribution de référence. Cette étude est menée sur un corpus de messages provenant de vrais utilisateurs fournis par France Télécom R&D. 2007.jeptalnrecital-poster.6 @@ -490,9 +490,9 @@ Analyse des échecs d’une approche pour traiter les questions définitoires soumises à un système de questions/réponses - LaurentGillard - PatriceBellot - MarcEl-Bèze + LaurentGillard + PatriceBellot + MarcEl-Bèze 83–92 Cet article revient sur le type particulier des questions définitoires étudiées dans le cadre des campagnes d’évaluation des systèmes de Questions/Réponses. Nous présentons l’approche développée suite à notre participation à la campagne EQueR et son évaluation lors de QA@CLEF 2006. La réponse proposée est la plus représentative des expressions présentes en apposition avec l’objet à définir, sa sélection est faite depuis des indices dérivés de ces appositions. Environ 80% de bonnes réponses sont trouvées sur les questions définitoires des volets francophones de CLEF. Les cas d’erreurs rencontrés sont analysés et discutés en détail. 2007.jeptalnrecital-poster.8 @@ -503,7 +503,7 @@ Caractérisation des discours scientifiques et vulgarisés en français, japonais et russe LorraineGoeuriot NataliaGrabar - BéatriceDaille + BéatriceDaille 93–102 L’objectif principal de notre travail consiste à étudier la notion de comparabilité des corpus, et nous abordons cette question dans un contexte monolingue en cherchant à distinguer les documents scientifiques et vulgarisés. Nous travaillons séparément sur des corpus composés de documents du domaine médical dans trois langues à forte distance linguistique (le français, le japonais et le russe). Dans notre approche, les documents sont caractérisés dans chaque langue selon leur thématique et une typologie discursive qui se situe à trois niveaux de l’analyse des documents : structurel, modal et lexical. Le typage des documents est implémenté avec deux algorithmes d’apprentissage (SVMlight et C4.5). L’évaluation des résultats montre que la typologie discursive proposée est portable d’une langue à l’autre car elle permet en effet de distinguer les deux discours. Nous constatons néanmoins des performances très variées selon les langues, les algorithmes et les types de caractéristiques discursives. 2007.jeptalnrecital-poster.9 @@ -512,9 +512,9 @@ <fixed-case>OGMIOS</fixed-case> : une plate-forme d’annotation linguistique de collection de documents issus du Web - ThierryHamon - JulienDerivière - AdelineNazarenko + ThierryHamon + JulienDerivière + AdelineNazarenko 103–112 L’un des objectifs du projet ALVIS est d’intégrer des informations linguistiques dans des moteurs de recherche spécialisés. Dans ce contexte, nous avons conçu une plate-forme d’enrichissement linguistique de documents issus du Web, OGMIOS, exploitant des outils de TAL existants. Les documents peuvent être en français ou en anglais. Cette architecture est distribuée, afin de répondre aux contraintes liées aux traitements de gros volumes de textes, et adaptable, pour permettre l’analyse de sous-langages. La plate-forme est développée en Perl et disponible sous forme de modules CPAN. C’est une structure modulaire dans lequel il est possible d’intégrer de nouvelles ressources ou de nouveaux outils de TAL. On peut ainsi définir des configuration différentes pour différents domaines et types de collections. Cette plateforme robuste permet d’analyser en masse des données issus du web qui sont par essence très hétérogènes. Nous avons évalué les performances de la plateforme sur plusieurs collections de documents. En distribuant les traitements sur vingt machines, une collection de 55 329 documents du domaine de la biologie (106 millions de mots) a été annotée en 35 heures tandis qu’une collection de 48 422 dépêches relatives aux moteurs de recherche (14 millions de mots) a été annotée en 3 heures et 15 minutes. 2007.jeptalnrecital-poster.10 @@ -554,7 +554,7 @@ Du bruit, du silence et des ambiguïtés : que faire du <fixed-case>TAL</fixed-case> pour l’apprentissage des langues ? - OlivierKraif + OlivierKraif ClaudePonton 143–152 Nous proposons une nouvelle approche pour l’intégration du TAL dans les systèmes d’apprentissage des langues assisté par ordinateur (ALAO), la stratégie « moinsdisante ». Cette approche tire profit des technologies élémentaires mais fiables du TAL et insiste sur la nécessité de traitements modulaires et déclaratifs afin de faciliter la portabilité et la prise en main didactique des systèmes. Basé sur cette approche, ExoGen est un premier prototype pour la génération automatique d’activités lacunaires ou de lecture d’exemples. Il intègre un module de repérage et de description des réponses des apprenants fondé sur la comparaison entre réponse attendue et réponse donnée. L’analyse des différences graphiques, orthographiques et morphosyntaxiques permet un diagnostic des erreurs de type fautes d’orthographe, confusions, problèmes d’accord, de conjugaison, etc. La première évaluation d’ExoGen sur un extrait du corpus d’apprenants FRIDA produit des résultats prometteurs pour le développement de cette approche « moins-disante », et permet d’envisager un modèle d’analyse performant et généralisable à une grande variété d’activités. @@ -564,7 +564,7 @@ Extraction automatique de cadres de sous-catégorisation verbale pour le français à partir d’un corpus arboré - AnnaKupsc + AnnaKupsc 153–162 Nous présentons une expérience d’extraction automatique des cadres de souscatégorisation pour 1362 verbes français. Nous exploitons un corpus journalistique richement annoté de 15 000 phrases dont nous extrayons 12 510 occurrences verbales. Nous évaluons dans un premier temps l’extraction des cadres basée sur la fonction des arguments, ce qui nous fournit 39 cadres différents avec une moyenne de 1.54 cadres par lemme. Ensuite, nous adoptons une approche mixte (fonction et catégorie syntaxique) qui nous fournit dans un premier temps 925 cadres différents, avec une moyenne de 3.44 cadres par lemme. Plusieurs méthodes de factorisation, neutralisant en particulier les variantes de réalisation avec le passif ou les pronoms clitiques, sont ensuite appliquées et nous permettent d’aboutir à 235 cadres différents avec une moyenne de 1.94 cadres par verbe. Nous comparons brièvement nos résultats avec les travaux existants pour le français et pour l’anglais. 2007.jeptalnrecital-poster.15 @@ -595,7 +595,7 @@ Ressources lexicales chinoises pour le <fixed-case>TALN</fixed-case> Huei-ChiLin - MaxSilberztein + MaxSilberztein 183–192 Nous voulons traiter des textes chinois automatiquement ; pour ce faire, nous formalisons le vocabulaire chinois, en utilisant principalement des dictionnaires et des grammaires morphologiques et syntaxiques formalisés avec le logiciel NooJ. Nous présentons ici les critères linguistiques qui nous ont permis de construire dictionnaires et grammaires, sachant que l’application envisagée (linguistique de corpus) nous impose certaines contraintes dans la formalisation des unités de la langue, en particulier des composés. 2007.jeptalnrecital-poster.18 @@ -640,8 +640,8 @@ Un Lexique Génératif de référence pour le français - FiammettaNamer - PierretteBouillon + FiammettaNamer + PierretteBouillon ÉvelyneJacquey 233–242 Cet article propose une approche originale visant la construction d’un lexique sémantique de référence sur le français. Sa principale caractéristique est de pouvoir s’appuyer sur les propriétés morphologiques des lexèmes. La méthode combine en effet des résultats d’analyse morphologique (Namer, 2002;2003), à partir de ressources lexicales de grande taille (nomenclatures du TLF) et des méthodologies d’acquisition d’information lexicale déjà éprouvées (Namer 2005; Sébillot 2002). Le format de représentation choisi, dans le cadre du Lexique Génératif, se distingue par ses propriétés d’expressivité et d’économie. Cette approche permet donc d’envisager la construction d’un lexique de référence sur le français caractérisé par une forte homogénéité tout en garantissant une couverture large, tant du point de vue de la nomenclature que du point de vue des contenus sémantiques. Une première validation de la méthode fournit une projection quantitative et qualitative des résultats attendus. @@ -651,10 +651,10 @@ Les résultats de la campagne <fixed-case>EASY</fixed-case> d’évaluation des analyseurs syntaxiques du français - PatrickParoubek + PatrickParoubek AnneVilnat IsabelleRobba - ChristelleAyache + ChristelleAyache 243–252 Dans cet article, nous présentons les résultats de la campagne d’évaluation EASY des analyseurs syntaxiques du français. EASY a été la toute première campagne d’évaluation comparative des analyseurs syntaxiques du français en mode boîte noire utilisant des mesures objectives quantitatives. EASY fait partie du programme TECHNOLANGUE du Ministère délégué à la Recherche et à l’Éducation, avec le soutien du ministère de délégué à l’industrie et du ministère de la culture et de la communication. Nous exposons tout d’abord la position de la campagne par rapport aux autres projets d’évaluation en analyse syntaxique, puis nous présentos son déroulement, et donnons les résultats des 15 analyseurs participants en fonction des différents types de corpus et des différentes annotations (constituants et relations). Nous proposons ensuite un ensemble de leçons à tirer de cette campagne, en particulier à propos du protocole d’évaluation, de la définition de la segmentation en unités linguistiques, du formalisme et des activités d’annotation, des critères de qualité des données, des annotations et des résultats, et finalement de la notion de référence en analyse syntaxique. Nous concluons en présentant comment les résultats d’EASY se prolongent dans le projet PASSAGE (ANR-06-MDCA-013) qui vient de débuter et dont l’objectif est d’étiqueter un grand corpus par plusieurs analyseurs en les combinant selon des paramètres issus de l’évaluation. 2007.jeptalnrecital-poster.24 @@ -664,8 +664,8 @@ Modèles statistiques enrichis par la syntaxe pour la traduction automatique HolgerSchwenk - DanielDéchelotte - HélèneBonneau-Maynard + DanielDéchelotte + HélèneBonneau-Maynard AlexandreAllauzen 253–262 La traduction automatique statistique par séquences de mots est une voie prometteuse. Nous présentons dans cet article deux évolutions complémentaires. La première permet une modélisation de la langue cible dans un espace continu. La seconde intègre des catégories morpho-syntaxiques aux unités manipulées par le modèle de traduction. Ces deux approches sont évaluées sur la tâche Tc-Star. Les résultats les plus intéressants sont obtenus par la combinaison de ces deux méthodes. @@ -676,7 +676,7 @@ Traitements phrastiques phonétiques pour la réécriture de phrases dysorthographiées LaurianneSitbon - PatriceBellot + PatriceBellot PhilippeBlache 263–272 Cet article décrit une méthode qui combine des hypothèses graphémiques et phonétiques au niveau de la phrase, à l’aide d’une réprésentation en automates à états finis et d’un modèle de langage, pour la réécriture de phrases tapées au clavier par des dysorthographiques. La particularité des écrits dysorthographiés qui empêche les correcteurs orthographiques d’être efficaces pour cette tâche est une segmentation en mots parfois incorrecte. La réécriture diffère de la correction en ce sens que les phrases réécrites ne sont pas à destination de l’utilisateur mais d’un système automatique, tel qu’un moteur de recherche. De ce fait l’évaluation est conduite sur des versions filtrées et lemmatisées des phrases. Le taux d’erreurs mots moyen passe de 51 % à 20 % avec notre méthode, et est de 0 % sur 43 % des phrases testées. @@ -696,7 +696,7 @@ Traitement sémantique par analyse distributionnelle des noms transdisciplinaires des écrits scientifiques - AgnèsTutin + AgnèsTutin 283–292 Dans cette étude sur le lexique transdisciplinaire des écrits scientifiques, nous souhaitons évaluer dans quelle mesure les méthodes distributionnelles de TAL peuvent faciliter la tâche du linguiste dans le traitement sémantique de ce lexique. Après avoir défini le champ lexical et les corpus exploités, nous testons plusieurs méthodes basées sur des dépendances syntaxiques et observons les proximités sémantiques et les classes établies. L’hypothèse que certaines relations syntaxiques - en particulier les relations de sous-catégorisation – sont plus appropriées pour établir des classements sémantiques n’apparaît qu’en partie vérifiée. Si les relations de sous-catégorisation génèrent des proximités sémantiques entre les mots de meilleure qualité, cela ne semble pas le cas pour la classification par voisinage. 2007.jeptalnrecital-poster.28 @@ -705,7 +705,7 @@ Une expérience de compréhension en contexte de dialogue avec le système <fixed-case>LOGUS</fixed-case>, approche logique de la compréhension de la langue orale - JeanneVillaneau + JeanneVillaneau 293–302 LOGUS est un système de compréhension de la langue orale dans le cadre d’un dialogue homme-machine finalisé. Il est la mise en oeuvre d’une approche logique qui utilise différents formalismes afin d’obtenir un système robuste mais néanmoins relativement extensible. Cet article décrit essentiellement l’étape de compréhension en contexte de dialogue implémentée sur LOGUS, développée et testée à partir d’un corpus de réservation hôtelière enregistré et annoté lors des travaux du groupe MEDIA du projet technolangue. Il décrit également les différentes interrogations et conclusions que peut susciter une telle expérience et les résultats obtenus par le système dans la résolution des références. Concernant l’approche elle-même, cette expérience semble montrer que le formalisme adopté pour la représentation sémantique des énoncés est bien adapté à la compréhension en contexte. 2007.jeptalnrecital-poster.29 @@ -786,7 +786,7 @@ _grammes de transitions. Actes de la 14ème conférence sur le Traitement Automatique des Langues Naturelles. REncontres jeunes Chercheurs en Informatique pour le Traitement Automatique des Langues - FarahBenamara + FarahBenamara SylwiaOzdowska ATALA
Toulouse, France
@@ -865,7 +865,7 @@ _grammes de transitions. Actes de la 14ème conférence sur le Traitement Automatique des Langues Naturelles. REncontres jeunes Chercheurs en Informatique pour le Traitement Automatique des Langues (Posters) - FarahBenamara + FarahBenamara SylwiaOzdowska ATALA
Toulouse, France
diff --git a/data/xml/2007.mtsummit.xml b/data/xml/2007.mtsummit.xml index 200f10aa0f..9173116d33 100644 --- a/data/xml/2007.mtsummit.xml +++ b/data/xml/2007.mtsummit.xml @@ -40,7 +40,7 @@
Copenhagen, Denmark
September 10-14 2007 - BenteMaegaard + BenteMaegaard mtsummit @@ -59,9 +59,9 @@
Improving speech-to-speech translation using word posterior probabilities - VicenteAlabau + VicenteAlabau AlbertoSanchis - FranciscoCasacuberta + FranciscoCasacuberta 2007.mtsummit-papers.2 alabau-etal-2007-improving @@ -82,7 +82,7 @@ Translating from under-resourced languages: comparing direct transfer against pivot translation BogdanBabych - AnthonyHartley + AnthonyHartley SergeSharoff 2007.mtsummit-papers.5 babych-etal-2007-translating @@ -109,7 +109,7 @@ A system to mine large-scale bilingual dictionaries from monolingual web pages GuihongCao JianfengGao - Jian-YunNie + Jian-YunNie 2007.mtsummit-papers.9 cao-etal-2007-system @@ -129,8 +129,8 @@ Enhancing image-based <fixed-case>A</fixed-case>rabic document translation using noisy channel correction model YiChang - YingZhang - StephanVogel + YingZhang + StephanVogel JieYang 2007.mtsummit-papers.12 chang-etal-2007-enhancing @@ -144,7 +144,7 @@ Incorporating constituent structure constraint into discriminative word alignment - Wen-HanChao + Wen-HanChao Zhou-JunLi 2007.mtsummit-papers.14 chao-li-2007-incorporating @@ -159,8 +159,8 @@ Syntax-enhanced n-gram-based <fixed-case>SMT</fixed-case> - Josep M.Crego - José B.Mariño + Josep M.Crego + José B.Mariño 2007.mtsummit-papers.16 crego-marino-2007-syntax @@ -174,11 +174,11 @@
A state-of-the-art statistical machine translation system based on <fixed-case>M</fixed-case>oses - DanielDéchelotte + DanielDéchelotte HolgerSchwenk - HélèneBonneau-Maynard + HélèneBonneau-Maynard AlexandreAllauzen - GillesAdda + GillesAdda 2007.mtsummit-papers.18 dechelotte-etal-2007-state @@ -190,7 +190,7 @@
<fixed-case>A</fixed-case>rabic diacritization in the context of statistical machine translation - MonaDiab + MonaDiab MahmoudGhoneim NizarHabash 2007.mtsummit-papers.20 @@ -198,24 +198,24 @@ Automatic evaluation of machine translation based on recursive acquisition of an intuitive common parts continuum - HiroshiEchizen-ya - KenjiAraki + HiroshiEchizen-ya + KenjiAraki 2007.mtsummit-papers.21 echizen-ya-araki-2007-automatic Estimating phrase pair relevance for translation model pruning MatthiasEck - StephanVogel - AlexWaibel + StephanVogel + AlexWaibel 2007.mtsummit-papers.22 eck-etal-2007-estimating How much data is needed for reliable <fixed-case>MT</fixed-case> evaluation? Using bootstrapping to study human and automatic metrics PaulaEstrella - OlivierHamon - AndreiPopescu-Belis + OlivierHamon + AndreiPopescu-Belis 2007.mtsummit-papers.23 estrella-etal-2007-much @@ -223,16 +223,16 @@ Lexical translation with application to image searching on the web OrenEtzioni KobiReiter - StephenSoderland + StephenSoderland MarcusSammer 2007.mtsummit-papers.24 etzioni-etal-2007-lexical
Improving transfer-based <fixed-case>MT</fixed-case> systems with automatic refinements - AriadnaFont Llitjós - JaimeCarbonell - AlonLavie + AriadnaFont Llitjós + JaimeCarbonell + AlonLavie 2007.mtsummit-papers.25 font-llitjos-etal-2007-improving @@ -245,13 +245,13 @@ Online and free! Ten years of online machine translation: origins, developments, current use and future prospects FedericoGaspari - JohnHutchins + JohnHutchins 2007.mtsummit-papers.27 gaspari-hutchins-2007-online <fixed-case>POS</fixed-case>-based reordering models for statistical machine translation - DeepaGupta + DeepaGupta MauroCettolo MarcelloFederico 2007.mtsummit-papers.28 @@ -265,57 +265,57 @@ End-to-end evaluation of a speech-to-speech translation system in <fixed-case>TC</fixed-case>-<fixed-case>STAR</fixed-case> - OlivierHamon - DjamelMostefa - KhalidChoukri + OlivierHamon + DjamelMostefa + KhalidChoukri 2007.mtsummit-papers.30 hamon-etal-2007-end Assessing human and automated quality judgments in the <fixed-case>F</fixed-case>rench <fixed-case>MT</fixed-case> evaluation campaign <fixed-case>CESTA</fixed-case> - OlivierHamon - AnthonyHartley - AndreiPopescu-Belis - KhalidChoukri + OlivierHamon + AnthonyHartley + AndreiPopescu-Belis + KhalidChoukri 2007.mtsummit-papers.31 hamon-etal-2007-assessing Report on the <fixed-case>NSF</fixed-case>-sponsored Human Language Technology Workshop on Industrial Centers - MaryHarper + MaryHarper AlexAcero - SrinivasBangalore - JaimeCarbonell - JordanCohen + SrinivasBangalore + JaimeCarbonell + JordanCohen BarbaraCuthill CarolEspy-Wilson ChristianeFellbaum - JohnGarofolo + JohnGarofolo Chin-HuiLee JimLester AndrewMcCallum NelsonMorgan MichaelPicheney JoePicone - LanceRamshaw + LanceRamshaw JeffReynar HadarShemtov - ClareVoss + ClareVoss 2007.mtsummit-papers.32 harper-etal-2007-report Experiments with a noun-phrase driven statistical machine translation system SanjikaHewavitharana - AlonLavie - StephanVogel + AlonLavie + StephanVogel 2007.mtsummit-papers.33 hewavitharana-etal-2007-experiments Domain adaptation of <fixed-case>MT</fixed-case> systems through automatic post-editing PierreIsabelle - CyrilGoutte + CyrilGoutte MichelSimard 2007.mtsummit-papers.34 isabelle-etal-2007-domain @@ -324,7 +324,7 @@ Development of a <fixed-case>J</fixed-case>apanese-<fixed-case>C</fixed-case>hinese machine translation system HitoshiIsahara SadaoKurohashi - Jun’ichiTsujii + Jun’ichiTsujii KiyotakaUchimoto HiroshiNakagawa HiroyukiKaji @@ -342,7 +342,7 @@ Comparing parallel corpora and evaluating their quality - Heiki-JaanKaalep + Heiki-JaanKaalep KaarelVeskis 2007.mtsummit-papers.37 kaalep-veskis-2007-comparing @@ -350,25 +350,25 @@ Iterative refinement of lexicon and phrasal alignment Jae DongKim - StephanVogel + StephanVogel 2007.mtsummit-papers.38 kim-vogel-2007-iterative Semi-automatic error analysis for large-scale statistical machine translation KatrinKirchhoff - OwenRambow + OwenRambow NizarHabash - MonaDiab + MonaDiab 2007.mtsummit-papers.39 kirchhoff-etal-2007-semi Comparing rule-based and data-driven approaches to <fixed-case>S</fixed-case>panish-to-<fixed-case>B</fixed-case>asque machine translation - GorkaLabaka + GorkaLabaka NicolasStroppa AndyWay - KepaSarasola + KepaSarasola 2007.mtsummit-papers.40 labaka-etal-2007-comparing @@ -390,7 +390,7 @@ Faster beam-search decoding for phrasal statistical machine translation - Robert C.Moore + Robert C.Moore ChrisQuirk 2007.mtsummit-papers.43 moore-quirk-2007-faster @@ -401,7 +401,7 @@ AndyWay DanielStein JanBungeroth - HermannNey + HermannNey 2007.mtsummit-papers.44 morrissey-etal-2007-combining @@ -422,7 +422,7 @@
Machine transliteration using multiple transliteration engines and hypothesis re-ranking - Jong-HoonOh + Jong-HoonOh HitoshiIsahara 2007.mtsummit-papers.47 oh-isahara-2007-machine @@ -430,8 +430,8 @@ Introducing translation dictionary into phrase-based <fixed-case>SMT</fixed-case> HideoOkuma - HirofumiYamamoto - EiichiroSumita + HirofumiYamamoto + EiichiroSumita 2007.mtsummit-papers.48 okuma-etal-2007-introducing @@ -439,7 +439,7 @@ Improving example-based machine translation through morphological generalization and adaptation Aaron B.Phillips ViolettaCavalli-Sforza - Ralf D.Brown + Ralf D.Brown 2007.mtsummit-papers.49 phillips-etal-2007-improving @@ -470,22 +470,22 @@ Building a sense-distinguished multilingual lexicon from monolingual corpora and bilingual lexicons MarcusSammer - StephenSoderland + StephenSoderland 2007.mtsummit-papers.53 sammer-soderland-2007-building Estimation of confidence measures for machine translation AlbertoSanchis - AlfonsJuan - EnriqueVidal + AlfonsJuan + EnriqueVidal 2007.mtsummit-papers.54 sanchis-etal-2007-estimation Getting professional translation through user interaction Young-AeSeo - Chang-HyunKim + Chang-HyunKim Seong-IlYang Young-gilKim 2007.mtsummit-papers.55 @@ -496,14 +496,14 @@ SmritiSingh MrugankDalal VishalVachhani - PushpakBhattacharyya - Om P.Damani + PushpakBhattacharyya + Om P.Damani 2007.mtsummit-papers.56 singh-etal-2007-hindi Using rich morphology in resolving certain <fixed-case>H</fixed-case>indi-<fixed-case>E</fixed-case>nglish machine translation divergence - R. Mahesh K.Sinha + R. Mahesh K.Sinha 2007.mtsummit-papers.57 sinha-2007-using @@ -534,8 +534,8 @@
A <fixed-case>MT</fixed-case> system from <fixed-case>T</fixed-case>urkmen to <fixed-case>T</fixed-case>urkish employing finite state and statistical methods - Ahmet CüneydTantuğ - EşrefAdali + Ahmet CüneydTantuğ + EşrefAdali KemalOflazer 2007.mtsummit-papers.61 tantug-etal-2007-mt @@ -558,8 +558,8 @@ <fixed-case>J</fixed-case>apanese-<fixed-case>H</fixed-case>ungarian dictionary generation using ontology resources - IstvánVarga - ShoichiYokoyama + IstvánVarga + ShoichiYokoyama 2007.mtsummit-papers.64 varga-yokoyama-2007-japanese @@ -590,8 +590,8 @@ Domain dependent statistical machine translation JiaXu YonggangDeng - YuqingGao - HermannNey + YuqingGao + HermannNey 2007.mtsummit-papers.68 xu-etal-2007-domain
@@ -599,7 +599,7 @@ Aspect marker generation in <fixed-case>E</fixed-case>nglish-to-<fixed-case>C</fixed-case>hinese machine translation YangYe Karl-MichaelSchneider - StevenAbney + StevenAbney 2007.mtsummit-papers.69 ye-etal-2007-aspect
@@ -614,17 +614,17 @@ A tree-to-tree alignment-based model for statistical machine translation MinZhang HongfeiJiang - Ai TiAw + Ai TiAw JunSun ShengLi - Chew LimTan + Chew LimTan 2007.mtsummit-papers.71 zhang-etal-2007-tree
<fixed-case>P</fixed-case>an<fixed-case>D</fixed-case>o<fixed-case>RA</fixed-case>: a large-scale two-way statistical machine translation system for hand-held devices - YingZhang - StephanVogel + YingZhang + StephanVogel 2007.mtsummit-papers.72 zhang-vogel-2007-pandora @@ -671,7 +671,7 @@ Context-based evaluation of <fixed-case>MT</fixed-case> systems: principles and tools MaghiKing - AndreiPopescu-Belis + AndreiPopescu-Belis PaulaEstrella 2007.mtsummit-tutorials.3 2007.mtsummit-tutorials.3.Presentation.pdf @@ -680,7 +680,7 @@ Using free online <fixed-case>MT</fixed-case> in multilingual websites FedericoGaspari - HaroldSomers + HaroldSomers 2007.mtsummit-tutorials.4 gaspari-somers-2007-using @@ -703,8 +703,8 @@
Copenhagen, Denmark
September 11 2007 - Jun’ichiTsujii - ShoichiYokoyama + Jun’ichiTsujii + ShoichiYokoyama mtsummit @@ -723,7 +723,7 @@ Sung-KwonChoi Ki-YoungLee Yoon-HyungRoh - Young-GilKim + Young-GilKim MunpyoHong 2007.mtsummit-wpt.2 kwon-etal-2007-english @@ -771,8 +771,8 @@ The <fixed-case>C</fixed-case>hinese Room Experiment: The Self-Organizing Feng Shui of <fixed-case>MT</fixed-case> - John S.White - FlorenceReeder + John S.White + FlorenceReeder 2007.mtsummit-cre.1 white-reeder-2007-chinese @@ -783,7 +783,7 @@
Copenhagen, Denmark
September 11 2007 - AnjaBelz + AnjaBelz SebastianVarges mtsummit @@ -801,7 +801,7 @@ Automatic evaluation of generation and parsing for machine translation with automatically acquired transfer rules YvetteGraham DeirdreHogan - Josefvan Genabith + Josefvan Genabith 2007.mtsummit-ucnlg.2 graham-etal-2007-automatic
@@ -813,8 +813,8 @@
Towards broad coverage surface realization with <fixed-case>CCG</fixed-case> - MichaelWhite - RajakrishnanRajkumar + MichaelWhite + RajakrishnanRajkumar ScottMartin 2007.mtsummit-ucnlg.4 white-etal-2007-towards @@ -822,8 +822,8 @@ Method of selecting training sets to build compact and efficient language model KeijiYasuda - HirofumiYamamoto - EiichiroSumita + HirofumiYamamoto + EiichiroSumita 2007.mtsummit-ucnlg.5 yasuda-etal-2007-method @@ -841,7 +841,7 @@ Declarative syntactic processing of natural language using concurrent constraint programming and probabilistic dependency modeling - IreneLangkilde-Geary + IreneLangkilde-Geary 2007.mtsummit-ucnlg.8 langkilde-geary-2007-declarative @@ -853,7 +853,7 @@
Evaluation of <fixed-case>NLG</fixed-case>: some analogies and differences with machine translation and reference resolution - AndreiPopescu-Belis + AndreiPopescu-Belis 2007.mtsummit-ucnlg.10 popescu-belis-2007-evaluation @@ -894,7 +894,7 @@ <fixed-case>NIL</fixed-case>: attribute selection for matching the task corpus using relative attribute groupings obtained from the test data RaquelHervás - PabloGervás + PabloGervás 2007.mtsummit-ucnlg.16 hervas-gervas-2007-nil @@ -913,10 +913,10 @@
Cost-based attribute selection for <fixed-case>GRE</fixed-case> (<fixed-case>GRAPH</fixed-case>-<fixed-case>SC</fixed-case>/<fixed-case>GRAPH</fixed-case>-<fixed-case>FP</fixed-case>) - MariëtTheune + MariëtTheune PascalTouset JetteViethen - EmielKrahmer + EmielKrahmer 2007.mtsummit-ucnlg.19 theune-etal-2007-cost @@ -930,7 +930,7 @@
Content determination in <fixed-case>GRE</fixed-case>: evaluating the evaluator - Keesvan Deemter + Keesvan Deemter AlbertGatt 2007.mtsummit-ucnlg.21 van-deemter-gatt-2007-content @@ -943,8 +943,8 @@ September 11 2007 GregorThurmair - KhalidChoukri - BenteMaegaard + KhalidChoukri + BenteMaegaard mtsummit @@ -953,7 +953,7 @@ The place of automatic evaluation metrics in external quality models for machine translation - AndreiPopescu-Belis + AndreiPopescu-Belis 2007.mtsummit-aptme.1.Presentation.pdf popescu-belis-2007-place @@ -966,22 +966,22 @@ Investigating why <fixed-case>BLEU</fixed-case> penalizes non-statistical systems - EduardHovy + EduardHovy 2007.mtsummit-aptme.3.Presentation.pdf hovy-2007-investigating Linguistic resources in support of various evaluation metrics - ChristopherCieri - StephanieStrassel - Meghan LammieGlenn + ChristopherCieri + StephanieStrassel + Meghan LammieGlenn LaurenFriedman 2007.mtsummit-aptme.4.Presentation.pdf cieri-etal-2007-linguistic Experiences and conclusions from the <fixed-case>CESTA</fixed-case> evaluation project - OlivierHamon + OlivierHamon 2007.mtsummit-aptme.5.Presentation.pdf hamon-2007-experiences @@ -994,15 +994,15 @@ Sensitivity of automated models for <fixed-case>MT</fixed-case> evaluation: proximity-based vs. performance-based methods BogdanBabych - AnthonyHartley + AnthonyHartley 2007.mtsummit-aptme.7.Presentation.pdf babych-hartley-2007-sensitivity <fixed-case>MT</fixed-case> evaluation & <fixed-case>TC</fixed-case>-<fixed-case>STAR</fixed-case> KhalidChoukri - OlivierHamon - DjamelMostefa + OlivierHamon + DjamelMostefa 2007.mtsummit-aptme.8.Presentation.pdf choukri-etal-2007-mt diff --git a/data/xml/2007.sigdial.xml b/data/xml/2007.sigdial.xml index 721741b212..3022243df9 100644 --- a/data/xml/2007.sigdial.xml +++ b/data/xml/2007.sigdial.xml @@ -3,7 +3,7 @@ Proceedings of the 8th SIGdial Workshop on Discourse and Dialogue - HarryBunt + HarryBunt SimonKeizer TimPaek Association for Computational Linguistics @@ -30,7 +30,7 @@ Collective States of Understanding ArashEshghi - Patrick G.T.Healey + Patrick G.T.Healey 2–9 2007.sigdial-1.2 eshghi-healey-2007-collective @@ -38,7 +38,7 @@ Contrasting the Automatic Identification of Two Discourse Markers in Multiparty Dialogues - AndreiPopescu-Belis + AndreiPopescu-Belis SandrineZufferey 10–17 2007.sigdial-1.3 @@ -48,7 +48,7 @@ Detecting and Summarizing Action Items in Multi-Party Dialogue MatthewPurver - JohnDowding + JohnDowding JohnNiekrasz PatrickEhlen ShararehNoorbaloochi @@ -62,7 +62,7 @@ Detecting Arguing and Sentiment in Meetings SwapnaSomasundaran JosefRuppenhofer - JanyceWiebe + JanyceWiebe 26–34 2007.sigdial-1.5 somasundaran-etal-2007-detecting @@ -71,7 +71,7 @@ A Model of Compliance and Emotion for Potentially Adversarial Dialogue Agents AntonioRoque - DavidTraum + DavidTraum 35–38 2007.sigdial-1.6 roque-traum-2007-model @@ -81,7 +81,7 @@ Acquiring and Evaluating a Dialog Corpus through a Dialog Simulation Technique DavidGriol Lluis F.Hurtado - EmilioSanchis + EmilioSanchis EncarnaSegarra 39–42 2007.sigdial-1.7 @@ -91,7 +91,7 @@ An Empirical View on <fixed-case>IQA</fixed-case> Follow-up Questions ManuelKirschner - RaffaellaBernardi + RaffaellaBernardi 43–46 2007.sigdial-1.8 kirschner-bernardi-2007-empirical @@ -101,7 +101,7 @@ An Implemented Method for Distributed Collection and Assessment of Speech Data AlexanderSiebert DavidSchlangen - RaquelFernández + RaquelFernández 47–50 2007.sigdial-1.9 siebert-etal-2007-implemented @@ -110,7 +110,7 @@ Beyond Repair – Testing the Limits of the Conversational Repair System DavidSchlangen - RaquelFernández + RaquelFernández 51–54 2007.sigdial-1.10 schlangen-fernandez-2007-beyond @@ -140,7 +140,7 @@ Emergent Conversational Recommendations: A Dialogue Behavior Approach PontusWärnestal LarsDegerstedt - ArneJönsson + ArneJönsson 63–66 2007.sigdial-1.13 warnestal-etal-2007-emergent @@ -156,13 +156,13 @@ <fixed-case>H</fixed-case>assan: A Virtual Human for Tactical Questioning - DavidTraum + DavidTraum AntonioRoque AntonLeuski - PanayiotisGeorgiou + PanayiotisGeorgiou JillianGerten BilyanaMartinovski - ShrikanthNarayanan + ShrikanthNarayanan SusanRobinson AshishVaswani 71–74 @@ -193,7 +193,7 @@ RohitMishra FengLin MatthewPurver - HarryBratt + HarryBratt YaoMeng StanleyPeters TobiasScheideck @@ -208,7 +208,7 @@ Commute <fixed-case>UX</fixed-case>: Telephone Dialog System for Location-based Services IvanTashev MichaelSeltzer - Yun-ChengJu + Yun-ChengJu DongYu AlexAcero 87–94 @@ -238,7 +238,7 @@ Releasing a Multimodal Dialogue System into the Wild: User Support Mechanisms AlexanderGruenstein - StephanieSeneff + StephanieSeneff 111–119 2007.sigdial-1.21 gruenstein-seneff-2007-releasing @@ -250,7 +250,7 @@ YukaNagano KotaroFunakoshi ToshihikoIto - KenjiAraki + KenjiAraki YujiHasegawa HiroshiTsujino 120–123 @@ -262,9 +262,9 @@ Comparing Spoken Dialog Corpora Collected with Recruited Subjects versus Real Users HuaAi AntoineRaux - DanBohus + DanBohus MaxineEskenazi - DianeLitman + DianeLitman 124–131 2007.sigdial-1.23 ai-etal-2007-comparing @@ -282,7 +282,7 @@ Referring under Restricted Interactivity Conditions - RaquelFernández + RaquelFernández TatjanaLucht DavidSchlangen 136–139 @@ -320,7 +320,7 @@ Measuring Adaptation Between Dialogs SvetlanaStenchikova - AmandaStent + AmandaStent 166–173 2007.sigdial-1.29 stenchikova-stent-2007-measuring @@ -347,8 +347,8 @@ Experimental Modeling of Human-human Multi-threaded Dialogues in the Presence of a Manual-visual Task AlexanderShyrokov - AndrewKun - PeterHeeman + AndrewKun + PeterHeeman 190–193 2007.sigdial-1.32 shyrokov-etal-2007-experimental @@ -357,7 +357,7 @@ Modeling Vocal Interaction for Text-Independent Classification of Conversation Type KornelLaskowski - MariOstendorf + MariOstendorf TanjaSchultz 194–201 2007.sigdial-1.33 @@ -369,7 +369,7 @@ KazunoriKomatani YuichiroFukubayashi TetsuyaOgata - Hiroshi G.Okuno + Hiroshi G.Okuno 202–205 2007.sigdial-1.34 komatani-etal-2007-introducing @@ -385,7 +385,7 @@ On the Training Data Requirements for an Automatic Dialogue Annotation Technique - Carlos D.Martínez-Hinarejos + Carlos D.Martínez-Hinarejos 211–214 2007.sigdial-1.36 martinez-hinarejos-2007-training @@ -393,7 +393,7 @@ Practical Dialogue Manager Development using <fixed-case>POMDP</fixed-case>s - Trung H.Bui + Trung H.Bui Borisvan Schooten DennisHofs 215–218 @@ -404,7 +404,7 @@ Problem-Sensitive Response Generation in Human-Robot Dialogs PetraGieselmann - MariOstendorf + MariOstendorf 219–222 2007.sigdial-1.38 gieselmann-ostendorf-2007-problem @@ -423,7 +423,7 @@ SurabhiGupta JohnNiekrasz MatthewPurver - DanJurafsky + DanJurafsky 227–230 2007.sigdial-1.40 gupta-etal-2007-resolving @@ -431,7 +431,7 @@ <fixed-case>SIDGRID</fixed-case>: A Framework for Distributed and Integrated Multimodal Annotation and Archiving and and Analysis - Gina-AnneLevow + Gina-AnneLevow BennettBertenthal MarkHereld SarahKenny @@ -481,8 +481,8 @@ Implicitly-supervised Learning in Spoken Language Interfaces: an Application to the Confidence Annotation Problem - DanBohus - AlexanderRudnicky + DanBohus + AlexanderRudnicky 256–264 2007.sigdial-1.46 bohus-rudnicky-2007-implicitly @@ -490,7 +490,7 @@ Planning Dialog Actions - MarkSteedman + MarkSteedman RonaldPetrick 265–272 2007.sigdial-1.47 @@ -501,7 +501,7 @@ Statistical User Simulation with a Hidden Agenda JostSchatzmann BlaiseThomson - SteveYoung + SteveYoung 273–282 2007.sigdial-1.48 schatzmann-etal-2007-statistical diff --git a/data/xml/2007.tal.xml b/data/xml/2007.tal.xml index f50f91b076..95a5958d8c 100644 --- a/data/xml/2007.tal.xml +++ b/data/xml/2007.tal.xml @@ -13,17 +13,17 @@ Principles of Evaluation in Natural Language Processing - PatrickParoubek + PatrickParoubek StéphaneChaudiron - LynetteHirschman + LynetteHirschman 7–31 2007.tal-1.1 paroubek-etal-2007-principles Pour l’évaluation externe des systèmes de <fixed-case>TA</fixed-case> par des méthodes fondées sur la tâche [For an external evaluation of <fixed-case>MT</fixed-case> systems by task-based methods] - HervéBlanchon - ChristianBoitet + HervéBlanchon + ChristianBoitet 33–65 2007.tal-1.2 fra @@ -31,7 +31,7 @@ Le rôle des métriques d’évaluation dans le processus de recherche en <fixed-case>TAL</fixed-case> [The role of evaluation metrics in the <fixed-case>NLP</fixed-case> research process] - AndreiPopescu-Belis + AndreiPopescu-Belis 67–91 2007.tal-1.3 fra @@ -49,9 +49,9 @@ <fixed-case>SIMDIAL</fixed-case> - Un paradigme pour évaluer automatiquement des systèmes de dialogue homme-machine en simulant un utilisateur de façon déterministe [<fixed-case>SIMDIAL</fixed-case> - A paradigm for the automatic evaluation of human-machine dialogue systems by deterministic simulation of a user] JosephAllemandou LaurentCharnay - LaurenceDevillers + LaurenceDevillers MurielLauvergne - JosephMariani + JosephMariani 115–139 2007.tal-1.5 fra @@ -76,7 +76,7 @@ Prosodic Phrase Break Prediction: Problems in the Evaluation of Models against a Gold Standard ClaireBrierley - EricAtwell + EricAtwell 187–206 2007.tal-1.8 brierley-atwell-2007-prediction @@ -139,7 +139,7 @@ Éléments pour adapter les systèmes de recherche d’information aux dyslexiques [Towards adapting information retrieval systems to dyslexic people] LaurianeSitbon - PatriceBellot + PatriceBellot PhilippeBlache 123–147 2007.tal-2.6 @@ -160,7 +160,7 @@ Préface [Foreword] ChristianCuxac - PatriceDalle + PatriceDalle 7–10 2007.tal-3.1 fra @@ -169,7 +169,7 @@ Research Directions in Sign Language Processing ChristianCuxac - PatriceDalle + PatriceDalle 15–30 2007.tal-3.2 cuxac-dalle-2007-research @@ -212,7 +212,7 @@ Description lexicale des signes — Intérêts linguistiques d’un modèle géométrique à dépendances [Lexical Description of Signs — Linguistic Benefits of a Geometric Dependency Model] MichaelFilhol - AnneliesBraffort + AnneliesBraffort 151–177 2007.tal-3.7 fra @@ -221,7 +221,7 @@ Modèles et méthodes de traitement d’images pour l’analyse de la langue des signes [Image processing models and methods for sign language analysis] FrédérickGianni - ChristopheCollet + ChristopheCollet FrançoisLefebvre 175–200 2007.tal-3.8 diff --git a/data/xml/2007.tc.xml b/data/xml/2007.tc.xml index 26f457ac3a..1a752495e5 100644 --- a/data/xml/2007.tc.xml +++ b/data/xml/2007.tc.xml @@ -12,7 +12,7 @@ Making a sow’s ear out of a silk purse: (mis)using online <fixed-case>MT</fixed-case> services as bilingual dictionaries FedericoGaspari - HaroldSomers + HaroldSomers 2007.tc-1.1 gaspari-somers-2007-making @@ -25,7 +25,7 @@ A dynamic dictionary for discovering indirect translation equivalents BogdanBabych - AnthonyHartley + AnthonyHartley SergeSharoff 2007.tc-1.3 babych-etal-2007-dynamic @@ -45,13 +45,13 @@ Lost in specialised translation: the corpus as an inexpensive and under-exploited aid for language service providers - GloriaCorpas Pastor + GloriaCorpas Pastor 2007.tc-1.6 corpas-pastor-2007-lost Medical spoken language translation: What do the users really need? - HaroldSomers + HaroldSomers 2007.tc-1.7 somers-2007-medical @@ -70,7 +70,7 @@ Building a bilingual dictionary from movie subtitles based on inter-lingual triggers CarolineLavecchia - KamelSmaili + KamelSmaili DavidLanglois 2007.tc-1.10 lavecchia-etal-2007-building diff --git a/data/xml/2007.tmi.xml b/data/xml/2007.tmi.xml index 90ec9a27a2..5dc36df288 100644 --- a/data/xml/2007.tmi.xml +++ b/data/xml/2007.tmi.xml @@ -12,20 +12,20 @@ Rule-based and statistical machine translation with a focus on <fixed-case>S</fixed-case>wedish - Anna SågvallHein + Anna SågvallHein 2007.tmi-plenaries.1 hein-2007-rule Statistical <fixed-case>MT</fixed-case> from <fixed-case>TMI</fixed-case>-1988 to <fixed-case>TMI</fixed-case>-2007: what has happened? - HermannNey + HermannNey 2007.tmi-plenaries.2 2007.tmi-plenaries.2.Presentation.pdf ney-2007-statistical Is <fixed-case>MT</fixed-case> in crisis? - StevenKrauwer + StevenKrauwer 2007.tmi-plenaries.3 2007.tmi-plenaries.3.Presentation.pdf krauwer-2007-mt @@ -38,7 +38,7 @@ September 7-9 2007 AndyWay - BarbaraGawronska + BarbaraGawronska tmi @@ -48,8 +48,8 @@ An assessment of language elicitation without the supervision of a linguist AlisonAlvarez - LoriLevin - RobertFrederking + LoriLevin + RobertFrederking JillLehman 2007.tmi-papers.1 2007.tmi-papers.1.Presentation.pdf @@ -59,7 +59,7 @@ Combining translation models in statistical machine translation JesúsAndrés-Ferrer IsmaelGarcia-Varea - FranciscoCasacuberta + FranciscoCasacuberta 2007.tmi-papers.2 2007.tmi-papers.2.Presentation.pdf andres-ferrer-etal-2007-combining @@ -109,7 +109,7 @@ A new method for the study of correlations between <fixed-case>MT</fixed-case> evaluation metrics PaulaEstrella - AndreiPopescu-Belis + AndreiPopescu-Belis MaghiKing 2007.tmi-papers.8 2007.tmi-papers.8.Presentation.pdf @@ -154,7 +154,7 @@ A greedy decoder for phrase-based statistical machine translation - PhilippeLanglais + PhilippeLanglais AlexandrePatry FabrizioGotti 2007.tmi-papers.13 @@ -190,7 +190,7 @@ EricNichols FrancisBond Darren ScottAppling - YujiMatsumoto + YujiMatsumoto 2007.tmi-papers.17 2007.tmi-papers.17.Presentation.pdf nichols-etal-2007-combining @@ -202,15 +202,15 @@ Jan ToreLønning PaulMeurer VictoriaRosén - DanFlickinger + DanFlickinger 2007.tmi-papers.18 oepen-etal-2007-towards Reducing human assessment of machine translation quality to binary classifiers - MichaelPaul + MichaelPaul AndrewFinch - EiichiroSumita + EiichiroSumita 2007.tmi-papers.19 2007.tmi-papers.19.Presentation.pdf paul-etal-2007-reducing @@ -224,7 +224,7 @@ Word reordering in statistical machine translation with a <fixed-case>POS</fixed-case>-based distortion model KayRottmann - StephanVogel + StephanVogel 2007.tmi-papers.21 2007.tmi-papers.21.Presentation.pdf rottmann-vogel-2007-word @@ -232,15 +232,15 @@ Automatic induction of shallow-transfer rules for open-source machine translation FelipeSánchez-Martínez - Mikel L.Forcada + Mikel L.Forcada 2007.tmi-papers.22 2007.tmi-papers.22.Presentation.pdf sanchez-martinez-forcada-2007-automatic Reordering via n-best lists for <fixed-case>S</fixed-case>panish-<fixed-case>B</fixed-case>asque translation - GermánSanchis - FranciscoCasacuberta + GermánSanchis + FranciscoCasacuberta 2007.tmi-papers.23 2007.tmi-papers.23.Presentation.pdf sanchis-casacuberta-2007-reordering @@ -251,13 +251,13 @@ VassilikiSpilioti MarinaVassiliou OlgaYannoutsou - StellaMarkantonatou + StellaMarkantonatou 2007.tmi-papers.24 sofianopoulos-etal-2007-demonstration Theoretical and methodological issues regarding the use of language technologies for patients with limited <fixed-case>E</fixed-case>nglish proficiency - HaroldSomers + HaroldSomers 2007.tmi-papers.25 2007.tmi-papers.25.Presentation.pdf somers-2007-theoretical @@ -266,7 +266,7 @@ Hand in hand: automatic sign language to <fixed-case>E</fixed-case>nglish translation DanielStein PhilippeDreuw - HermannNey + HermannNey SaraMorrissey AndyWay 2007.tmi-papers.26 @@ -284,7 +284,7 @@ Exploiting source similarity for <fixed-case>SMT</fixed-case> using context-informed features NicolasStroppa - Antalvan den Bosch + Antalvan den Bosch AndyWay 2007.tmi-papers.28 2007.tmi-papers.28.Presentation.pdf diff --git a/data/xml/2008.amta.xml b/data/xml/2008.amta.xml index 45191f1b26..e484453e15 100644 --- a/data/xml/2008.amta.xml +++ b/data/xml/2008.amta.xml @@ -47,14 +47,14 @@ <fixed-case>S</fixed-case>panish-to-<fixed-case>B</fixed-case>asque <fixed-case>M</fixed-case>ulti<fixed-case>E</fixed-case>ngine Machine Translation for a Restricted Domain - IñakiAlegria - ArantzaCasillas - ArantzaDiaz de Ilarraza + IñakiAlegria + ArantzaCasillas + ArantzaDiaz de Ilarraza JonIgartua - GorkaLabaka - MikelLersundi - AingeruMayor - KepaSarasola + GorkaLabaka + MikelLersundi + AingeruMayor + KepaSarasola 2008.amta-papers.1 37-45 We present our initial strategy for Spanish-to-Basque MultiEngine Machine Translation, a language pair with very different structure and word order and with no huge parallel corpus available. This hybrid proposal is based on the combination of three different MT paradigms: Example-Based MT, Statistical MT and Rule- Based MT. We have evaluated the system, reporting automatic evaluation metrics for a corpus in a test domain. The first results obtained are encouraging. @@ -62,7 +62,7 @@ Exploiting Document-Level Context for Data-Driven Machine Translation - RalfBrown + RalfBrown 2008.amta-papers.2 46-55 This paper presents a method for exploiting document-level similarity between the documents in the training corpus for a corpus-driven (statistical or example-based) machine translation system and the input documents it must translate. The method is simple to implement, efficient (increases the translation time of an example-based system by only a few percent), and robust (still works even when the actual document boundaries in the input text are not known). Experiments on French-English and Arabic-English showed relative gains over the same system without using document-level similarity of up to 7.4% and 5.4%, respectively, on the BLEU metric. @@ -90,8 +90,8 @@ Translation universals: do they exist? A corpus-based <fixed-case>NLP</fixed-case> study of convergence and simplification - GloriaCorpas Pastor - RuslanMitkov + GloriaCorpas Pastor + RuslanMitkov NaveedAfzal ViktorPekar 2008.amta-papers.5 @@ -101,8 +101,8 @@ Computing multiple weighted reordering hypotheses for a phrase-based statistical machine translation system - Marta R.Costa-Jussà - José A. R.Fonollosa + Marta R.Costa-Jussà + José A. R.Fonollosa 2008.amta-papers.6 82-88 Reordering is one source of error in statistical machine translation (SMT). This paper extends the study of the statistical machine reordering (SMR) approach, which uses the powerful techniques of the SMT systems to solve reordering problems. Here, the novelties yield in: (1) using the SMR approach in a SMT phrase-based system, (2) adding a feature function in the SMR step, and (3) analyzing the reordering hypotheses at several stages. Coherent improvements are reported in the TC-STAR task (Es/En) at a relatively low computational cost. @@ -141,7 +141,7 @@ A Generalized Reordering Model for Phrase-Based Statistical Machine Translation YanqingHe - ChengqingZong + ChengqingZong 2008.amta-papers.10 117-124 Phrase-based translation models are widely studied in statistical machine translation (SMT). However, the existing phrase-based translation models either can not deal with non-contiguous phrases or reorder phrases only by the rules without an effective reordering model. In this paper, we propose a generalized reordering model (GREM) for phrase-based statistical machine translation, which is not only able to capture the knowledge on the local and global reordering of phrases, but also is able to obtain some capabilities of phrasal generalization by using non-contiguous phrases. The experimental results have indicated that our model out- performs MEBTG (enhanced BTG with a maximum entropy-based reordering model) and HPTM (hierarchical phrase-based translation model) by improvement of 1.54% and 0.66% in BLEU. @@ -159,7 +159,7 @@ Large-scale Discriminative n-gram Language Models for Statistical Machine Translation ZhifeiLi - SanjeevKhudanpur + SanjeevKhudanpur 2008.amta-papers.12 133-142 We extend discriminative n-gram language modeling techniques originally proposed for automatic speech recognition to a statistical machine translation task. In this context, we propose a novel data selection method that leads to good models using a fraction of the training data. We carry out systematic experiments on several benchmark tests for Chinese to English translation using a hierarchical phrase-based machine translation system, and show that a discriminative language model significantly improves upon a state-of-the-art baseline. The experiments also highlight the benefits of our data selection method. @@ -169,8 +169,8 @@ Are Multiple Reference Translations Necessary? Investigating the Value of Paraphrased Reference Translations in Parameter Optimization NitinMadnani PhilipResnik - Bonnie J.Dorr - RichardSchwartz + Bonnie J.Dorr + RichardSchwartz 2008.amta-papers.13 143-152 Most state-of-the-art statistical machine translation systems use log-linear models, which are defined in terms of hypothesis features and weights for those features. It is standard to tune the feature weights in order to maximize a translation quality metric, using held-out test sentences and their corresponding reference translations. However, obtaining reference translations is expensive. In our earlier work (Madnani et al., 2007), we introduced a new full-sentence paraphrase technique, based on English-to-English decoding with an MT system, and demonstrated that the resulting paraphrases can be used to cut the number of human reference translations needed in half. In this paper, we take the idea a step further, asking how far it is possible to get with just a single good reference translation for each item in the development set. Our analysis suggests that it is necessary to invest in four or more human translations in order to significantly improve on a single translation augmented by monolingual paraphrases. @@ -216,8 +216,8 @@ Wider Pipelines: N-Best Alignments and Parses in <fixed-case>MT</fixed-case> Training AshishVenugopal AndreasZollmann - Noah A.Smith - StephanVogel + Noah A.Smith + StephanVogel 2008.amta-papers.18 192-201 State-of-the-art statistical machine translation systems use hypotheses from several maximum a posteriori inference steps, including word alignments and parse trees, to identify translational structure and estimate the parameters of translation models. While this approach leads to a modular pipeline of independently developed components, errors made in these “single-best” hypotheses can propagate to downstream estimation steps that treat these inputs as clean, trustworthy training data. In this work we integrate N-best alignments and parses by using a probability distribution over these alternatives to generate posterior fractional counts for use in downstream estimation. Using these fractional counts in a DOP-inspired syntax-based translation system, we show significant improvements in translation quality over a single-best trained baseline. @@ -226,9 +226,9 @@ Improving <fixed-case>E</fixed-case>nglish-to-<fixed-case>C</fixed-case>hinese Translation for Technical Terms using Morphological Information XianchaoWu - NaoakiOkazaki + NaoakiOkazaki TakashiTsunakawa - Jun’ichiTsujii + Jun’ichiTsujii 2008.amta-papers.19 202-211 The continuous emergence of new technical terms and the difficulty of keeping up with neologism in parallel corpora deteriorate the performance of statistical machine translation (SMT) systems. This paper explores the use of morphological information to improve English-to-Chinese translation for technical terms. To reduce the morpheme-level translation ambiguity, we group the morphemes into morpheme phrases and propose the use of domain information for translation candidate selection. In order to find correspondences of morpheme phrases between the source and target languages, we propose an algorithm to mine morpheme phrase translation pairs from a bilingual lexicon. We also build a cascaded translation model that dynamically shifts translation units from phrase level to word and morpheme phrase levels. The experimental results show the significant improvements over the current phrase-based SMT systems. @@ -236,10 +236,10 @@ Mining the Web for Domain-Specific Translations - Jian-ChengWu + Jian-ChengWu PeterWei-Huai Hsu - Chiung-HuiTseng - Jason S.Chang + Chiung-HuiTseng + Jason S.Chang 2008.amta-papers.20 212-221 We introduce a method for learning to find domain-specific translations for a given term on the Web. In our approach, the source term is transformed into an expanded query aimed at maximizing the probability of retrieving translations from a very large collection of mixed-code documents. The method involves automatically generating sets of target-language words from training data in specific domains, automatically selecting target words for effectiveness in retrieving documents containing the sought-after translations. At run time, the given term is transformed into an expanded query and submitted to a search engine, and ranked translations are extracted from the document snippets returned by the search engine. We present a prototype, TermMine, which applies the method to a Web search engine. Evaluations over a set of domains and terms show that TermMine outperforms state-of-the-art machine translation systems. @@ -248,7 +248,7 @@ Two-Stage Translation: A Combined Linguistic and Statistical Machine Translation Framework YushiXu - StephanieSeneff + StephanieSeneff 2008.amta-papers.21 222-231 We propose a two-stage system for spoken language machine translation. In the first stage, the source sentence is parsed and paraphrased into an intermediate language which retains the words in the source language but follows the word order of the target language as much as feasible. This stage is mostly linguistic. In the second stage, a statistical MT is performed to translate the intermediate language into the target language. For the task of English-to-Mandarin translation, we achieved a 2.5 increase in BLEU score and a 45% decrease in GIZA-Alignment Crossover, on IWSLT-06 data. In a human evaluation of the sentences that differed, the two-stage system was preferred three times as often as the baseline. @@ -267,7 +267,7 @@ Improving Syntax-Driven Translation Models by Re-structuring Divergent and Nonisomorphic Parse Tree Structures VamshiAmbati - AlonLavie + AlonLavie 2008.amta-srw.1 235-244 Syntax-based approaches to statistical MT require syntax-aware methods for acquiring their underlying translation models from parallel data. This acquisition process can be driven by syntactic trees for either the source or target language, or by trees on both sides. Work to date has demonstrated that using trees for both sides suffers from severe coverage problems. This is primarily due to the highly restrictive space of constituent segmentations that the trees on two sides introduce, which adversely affects the recall of the resulting translation models. Approaches that project from trees on one side, on the other hand, have higher levels of recall, but suffer from lower precision, due to the lack of syntactically-aware word alignments. In this paper we explore the issue of lexical coverage of the translation models learned in both of these scenarios. We specifically look at how the non-isomorphic nature of the parse trees for the two languages affects recall and coverage. We then propose a novel technique for restructuring target parse trees, that generates highly isomorphic target trees that preserve the syntactic boundaries of constituents that were aligned in the original parse trees. We evaluate the translation models learned from these restructured trees and show that they are significantly better than those learned using trees on both sides and trees on one side. @@ -275,7 +275,7 @@ Using Bilingual <fixed-case>C</fixed-case>hinese-<fixed-case>E</fixed-case>nglish Word Alignments to Resolve <fixed-case>PP</fixed-case>-attachment Ambiguity in <fixed-case>E</fixed-case>nglish - VictoriaFossum + VictoriaFossum KevinKnight 2008.amta-srw.2 245-253 @@ -284,8 +284,8 @@ Combination of Machine Translation Systems via Hypothesis Selection from Combined N-Best Lists - Almut SiljaHildebrand - StephanVogel + Almut SiljaHildebrand + StephanVogel 2008.amta-srw.3 254-261 Different approaches in machine translation achieve similar translation quality with a variety of translations in the output. Recently it has been shown, that it is possible to leverage the individual strengths of various systems and improve the overall translation quality by combining translation outputs. In this paper we present a method of hypothesis selection which is relatively simple compared to system combination methods which construct a synthesis of the input hypotheses. Our method uses information from n-best lists from several MT systems and features on the sentence level which are independent from the MT systems involved to improve the translation quality. @@ -302,8 +302,8 @@ Diacritization as a Machine Translation and as a Sequence Labeling Problem TimSchlippe - ThuyLinhNguyen - StephanVogel + ThuyLinhNguyen + StephanVogel 2008.amta-srw.5 270-278 In this paper we describe and compare two techniques for the automatic diacritization of Arabic text: First, we treat diacritization as a monotone machine translation problem, proposing and evaluating several translation and language models, including word and character-based models separately and combined as well as a model which uses statistical machine translation (SMT) to post-edit a rule-based diacritization system. Then we explore a more traditional view of diacritization as a sequence labeling problem, and propose a solution using conditional random fields (Lafferty et al., 2001). All these techniques are compared through word error rate and diacritization error rate both in terms of full diacritization and ignoring vowel endings. The empirical experiments showed that the machine translation approaches perform better than the sequence labeling approaches concerning the error rates. @@ -363,12 +363,11 @@ Many-to-Many Multilingual Medical Speech Translation on a <fixed-case>PDA</fixed-case> KyokoKanzaki YukieNakao - MannyRayner + MannyRayner MarianneSantaholma MarianneStarlander NikosTsourakis 2008.amta-govandcom.4 - Particularly considering the requirement of high reliability, we argue that the most appropriate architecture for a medical speech translator that can be realised using today’s technology combines unidirectional (doctor to patient) translation, medium-vocabulary controlled language coverage, interlingua-based translation, an embedded help component, and deployability on a hand-held hardware platform. We present an overview of the Open Source MedSLT prototype, which has been developed in accordance with these design principles. The system is implemented on top of the Regulus and Nuance 8.5 platforms, translates patient examination questions for all language pairs in the set {English, French, Japanese, Arabic, Catalan}, using vocabularies of about 400 to 1 100 words, and can be run in a distributed client/server environment, where the client application is hosted on a Nokia Internet Tablet device. kanzaki-etal-2008-many @@ -400,8 +399,8 @@ Automated Machine Translation Improvement Through Post-Editing Techniques: Analyst and Translator Experiments - JenniferDoyon - ChristineDoran + JenniferDoyon + ChristineDoran C. DonaldMeans DomeniqueParr 2008.amta-govandcom.8 @@ -412,7 +411,7 @@ User-centered <fixed-case>MT</fixed-case> Development and Implementation KathleenEgan - FrancisKubala + FrancisKubala AllenSears 2008.amta-govandcom.9 354-363 @@ -421,7 +420,7 @@ Identifying Common Challenges for Human and Machine Translation: A Case Study from the <fixed-case>GALE</fixed-case> Program LaurenFriedman - StephanieStrassel + StephanieStrassel 2008.amta-govandcom.10 364-369 The dramatic improvements shown by statistical machine translation systems in recent years clearly demonstrate the benefits of having large quantities of manually translated parallel text for system training and development. And while many competing evaluation metrics exist to evaluate MT technology, most of those methods also crucially rely on the existence of one or more high quality human translations to benchmark system performance. Given the importance of human translations in this framework, understanding the particular challenges of human translation-for-MT is key, as is comprehending the relative strengths and weaknesses of human versus machine translators in the context of an MT evaluation. Vanni (2000) argued that the metric used for evaluation of competence in human language learners may be applicable to MT evaluation; we apply similar thinking to improve the prediction of MT performance, which is currently unreliable. In the current paper we explore an alternate model based upon a set of genre-defining features that prove to be consistently challenging for both humans and MT systems. @@ -441,7 +440,7 @@ Designing and executing <fixed-case>MT</fixed-case> workflows through the Kepler Framework ReginaldHobbs - ClareVoss + ClareVoss 2008.amta-govandcom.12 380-389 hobbs-voss-2008-designing @@ -565,8 +564,8 @@ Applications of <fixed-case>MT</fixed-case> during Olympic Games 2008 - ChengqingZong - HeyanHuang + ChengqingZong + HeyanHuang ShumingShi 2008.amta-govandcom.26 470-479 diff --git a/data/xml/2008.eamt.xml b/data/xml/2008.eamt.xml index 9f3cc797c1..71302b37f8 100644 --- a/data/xml/2008.eamt.xml +++ b/data/xml/2008.eamt.xml @@ -7,9 +7,9 @@
Hamburg, Germany
September 22-23 2008 - JohnHutchins - Waltherv. Hahn - BenteMaegaard + JohnHutchins + Waltherv. Hahn + BenteMaegaard JohnHutchins eamt @@ -58,10 +58,10 @@
Hybrid machine translation architectures within and beyond the <fixed-case>E</fixed-case>uro<fixed-case>M</fixed-case>atrix project - AndreasEisele + AndreasEisele ChristianFedermann HansUszkoreit - HervéSaint-Amand + HervéSaint-Amand MartinKay MichaelJellinghaus SabineHunsicker @@ -83,17 +83,17 @@ A finite-state framework for log-linear models in machine translation JorgeGonzález - FranciscoCasacuberta + FranciscoCasacuberta 41-46 2008.eamt-1.8 gonzalez-casacuberta-2008-finite A novel alignment model inspired on <fixed-case>IBM</fixed-case> Model 1 - JesúsGonzález-Rubio - GermánSanchis-Trilles - AlfonsJuan - FranciscoCasacuberta + JesúsGonzález-Rubio + GermánSanchis-Trilles + AlfonsJuan + FranciscoCasacuberta 47-56 2008.eamt-1.9 gonzalez-rubio-etal-2008-novel @@ -101,7 +101,7 @@ Packed rules for automatic transfer-rule induction YvetteGraham - Josefvan Genabith + Josefvan Genabith 57-65 2008.eamt-1.10 graham-van-genabith-2008-packed @@ -117,7 +117,7 @@ Improving machine translation between closely related <fixed-case>R</fixed-case>omance languages PetrHomola - VladislavKuboň + VladislavKuboň 72-77 2008.eamt-1.12 homola-kubon-2008-improving @@ -132,8 +132,8 @@ Applying boosting to statistical machine translation - Antonio L.Lagarda - FranciscoCasacuberta + Antonio L.Lagarda + FranciscoCasacuberta 88-96 2008.eamt-1.14 lagarda-casacuberta-2008-applying @@ -141,7 +141,7 @@ Word association models and search strategies for discriminative word alignment PatrikLambert - Rafael E.Banchs + Rafael E.Banchs 97-103 2008.eamt-1.15 lambert-banchs-2008-word @@ -149,7 +149,7 @@ Automatic alignment of <fixed-case>C</fixed-case>zech and <fixed-case>E</fixed-case>nglish deep syntactic dependency trees DavidMareček - ZdeněkŽabokrtský + ZdeněkŽabokrtský VáclavNovák 103-113 2008.eamt-1.16 @@ -159,7 +159,7 @@ Explorations in using grammatical dependencies for contextual phrase translation disambiguation AurélienMax RafikMakhloufi - PhilippeLanglais + PhilippeLanglais 114-119 2008.eamt-1.17 max-etal-2008-explorations @@ -201,16 +201,16 @@ Phrase-level alignment generation using a smoothed loglinear phrase-based statistical alignment model - DanielOrtiz-Martínez - IsmaelGarcía-Varea - FranciscoCasacuberta + DanielOrtiz-Martínez + IsmaelGarcía-Varea + FranciscoCasacuberta 160-169 2008.eamt-1.22 ortiz-martinez-etal-2008-phrase Learning context-sensitive synchronous rules - AndersSøgaard + AndersSøgaard 170-175 2008.eamt-1.23 sogaard-2008-learning @@ -218,9 +218,9 @@ Comparing two different bidirectional versions of the limited-domain medical spoken language translator <fixed-case>M</fixed-case>ed<fixed-case>SLT</fixed-case> MarianneStarlander - PierretteBouillon + PierretteBouillon GlennFlores - MannyRayner + MannyRayner NikosTsourakis 176-181 2008.eamt-1.24 @@ -236,7 +236,7 @@ Boosting performance of weak <fixed-case>MT</fixed-case> engines automatically: using <fixed-case>MT</fixed-case> output to align segments & build statistical post-editors - Clare R.Voss + Clare R.Voss MatthewAguirre JeffreyMicher RichardChang diff --git a/data/xml/2008.iwslt.xml b/data/xml/2008.iwslt.xml index 5d4b34fd99..70ab389ad1 100644 --- a/data/xml/2008.iwslt.xml +++ b/data/xml/2008.iwslt.xml @@ -25,7 +25,7 @@ Overview of the <fixed-case>IWSLT</fixed-case> 2008 evaluation campaign. - MichaelPaul + MichaelPaul 1-7 2008.iwslt-evaluation.1 This paper gives an overview of the evaluation campaign results of the International1Workshop on Spoken Language Translation (IWSLT) 2008 . In this workshop, we focused on the translation of spontaneous speech recorded in a real situation and the feasability of pivot-language-based translation approaches. The translation directions were English into Chinese and vice versa for the Challenge Task, Chinese into English and English into Spanish for the Pivot Task, and Arabic, Chinese, Spanish into English for the standard BTEC Task. In total, 19 research groups building 58 MT engines participated in this year’s event. Automatic and subjective evaluations were carried out in order to investigate the impact of spontaneity aspects of field data experiments on automatic speech recognition (ASR) and machine translation (MT) system performance as well as the robustness of state-of-the-art MT systems towards speech-to-speech translation in real environments. @@ -35,7 +35,7 @@ The <fixed-case>CMU</fixed-case> syntax-augmented machine translation system: <fixed-case>SAMT</fixed-case> on Hadoop with n-best alignments. AndreasZollmann AshishVenugopal - StephanVogel + StephanVogel 18-25 2008.iwslt-evaluation.2 We present the CMU Syntax Augmented Machine Translation System that was used in the IWSLT-08 evaluation campaign. We participated in the Full-BTEC data track for Chinese-English translation, focusing on transcript translation. For this year’s evaluation, we ported the Syntax Augmented MT toolkit [1] to the Hadoop MapReduce [2] parallel processing architecture, allowing us to efficiently run experiments evaluating a novel “wider pipelines” approach to integrate evidence from N -best alignments into our translation models. We describe each step of the MapReduce pipeline as it is implemented in the open-source SAMT toolkit, and show improvements in translation quality by using N-best alignments in both hierarchical and syntax augmented translation systems. @@ -45,7 +45,7 @@ Exploiting alignment techniques in <fixed-case>MATREX</fixed-case>: the <fixed-case>DCU</fixed-case> machine translation system for <fixed-case>IWSLT</fixed-case> 2008. YanjunMa JohnTinsley - HanyHassan + HanyHassan JinhuaDu AndyWay 26-33 @@ -78,9 +78,9 @@ <fixed-case>I</fixed-case>2<fixed-case>R</fixed-case> multi-pass machine translation system for <fixed-case>IWSLT</fixed-case> 2008. BoxingChen - DeyiXiong + DeyiXiong MinZhang - AitiAw + AitiAw HaizhouLi 46-51 2008.iwslt-evaluation.6 @@ -95,7 +95,7 @@ YunHuang YangFeng WenbinJiang - YajuanLu + YajuanLu QunLiu 52-57 2008.iwslt-evaluation.7 @@ -126,7 +126,7 @@ The <fixed-case>MIT</fixed-case>-<fixed-case>LL</fixed-case>/<fixed-case>AFRL</fixed-case> <fixed-case>IWSLT</fixed-case>-2008 <fixed-case>MT</fixed-case> system. WadeShen BrianDelaney - TimAnderson + TimAnderson RaySlyh 69-76 2008.iwslt-evaluation.10 @@ -138,11 +138,11 @@ MasaoUtiyama AndrewFinch HideoOkuma - MichaelPaul + MichaelPaul HailongCao - HirofumiYamamoto + HirofumiYamamoto KeijiYasuda - EiichiroSumita + EiichiroSumita 77-84 2008.iwslt-evaluation.11 This paper describes the National Institute of Information and Communications Technology/Advanced Telecommunications Research Institute International (NICT/ATR) statistical machine translation (SMT) system used for the IWSLT 2008 evaluation campaign. We participated in the Chinese–English (Challenge Task), English–Chinese (Challenge Task), Chinese–English (BTEC Task), Chinese–Spanish (BTEC Task), and Chinese–English–Spanish (PIVOT Task) translation tasks. In the English–Chinese translation Challenge Task, we focused on exploring various factors for the English–Chinese translation because the research on the translation of English–Chinese is scarce compared to the opposite direction. In the Chinese–English translation Challenge Task, we employed a novel clustering method, where training sentences similar to the development data in terms of the word error rate formed a cluster. In the pivot translation task, we integrated two strategies for pivot translation by linear interpolation. @@ -156,7 +156,7 @@ LichengFang YufengChen YuZhou - ChengqingZong + ChengqingZong 85-91 2008.iwslt-evaluation.12 This paper describes our statistical machine translation system (CASIA) used in the evaluation campaign of the International Workshop on Spoken Language Translation (IWSLT) 2008. In this year's evaluation, we participated in challenge task for Chinese-English and English-Chinese, BTEC task for Chinese-English. Here, we mainly introduce the overview of our system, the primary modules, the key techniques, and the evaluation results. @@ -177,7 +177,7 @@ <fixed-case>POSTECH</fixed-case> machine translation system for <fixed-case>IWSLT</fixed-case> 2008 evaluation campaign. JonghoonLee - Gary GeunbaeLee + Gary GeunbaeLee 98-103 2008.iwslt-evaluation.14 In this paper, we describe POSTECH system for IWSLT 2008 evaluation campaign. The system is based on phrase based statistical machine translation. We set up a baseline system using well known freely available software. A preprocessing method and a language modeling method have been applied to the baseline system in order to improve machine translation quality. The preprocessing method is to identify and remove useless tokens in source texts. And the language modeling method models phrase level n-gram. We have participated in the BTEC tasks to see the effects of our methods. @@ -202,7 +202,7 @@ ArneMauser OliverBender SaabMansour - HermannNey + HermannNey 108-115 2008.iwslt-evaluation.16 RWTH’s system for the 2008 IWSLT evaluation consists of a combination of different phrase-based and hierarchical statistical machine translation systems. We participated in the translation tasks for the Chinese-to-English and Arabic-to-English language pairs. We investigated different preprocessing techniques, reordering methods for the phrase-based system, including reordering of speech lattices, and syntax-based enhancements for the hierarchical systems. We also tried the combination of the Arabic-to-English and Chinese-to-English outputs as an additional submission. @@ -213,13 +213,13 @@ MaximKhalilov Maria R.Costa-jussà Carlos A. HenríquezQ. - José A. R.Fonollosa + José A. R.Fonollosa Adolfo HernándezH. - José B.Mariño - Rafael E.Banchs + José B.Mariño + Rafael E.Banchs ChenBoxing MinZhang - AitiAw + AitiAw HaizhouLi 116-123 2008.iwslt-evaluation.17 @@ -234,7 +234,7 @@ ZhanyiLiu JianfengLi DengjunRen - ZhengyuNiu + ZhengyuNiu 124-131 2008.iwslt-evaluation.18 This paper reports on the first participation of TCH (Toshiba (China) Research and Development Center) at the IWSLT evaluation campaign. We participated in all the 5 translation tasks with Chinese as source language or target language. For Chinese-English and English-Chinese translation, we used hybrid systems that combine rule-based machine translation (RBMT) method and statistical machine translation (SMT) method. For Chinese-Spanish translation, phrase-based SMT models were used. For the pivot task, we combined the translations generated by a pivot based statistical translation model and a statistical transfer translation model (firstly, translating from Chinese to English, and then from English to Spanish). Moreover, for better performance of MT, we improved each module in the MT systems as follows: adapting Chinese word segmentation to spoken language translation, selecting out-of-domain corpus to build language models, using bilingual dictionaries to correct word alignment results, handling NE translation and selecting translations from the outputs of multiple systems. According to the automatic evaluation results on the full test sets, we top in all the 5 tasks. @@ -286,7 +286,7 @@ FrancisBond EricNichols DarrenScott Appling - MichaelPaul + MichaelPaul 150-157 2008.iwslt-papers.2 Large amounts of training data are essential for training statistical machine translations systems. In this paper we show how training data can be expanded by paraphrasing one side. The new data is made by parsing then generating using a precise HPSG based grammar, which gives sentences with the same meaning, but minor variations in lexical choice and word order. In experiments with Japanese and English, we showed consistent gains on the Tanaka Corpus with less consistent improvement on the IWSLT 2005 evaluation data. @@ -311,7 +311,7 @@ R.Prasad F.Choi P.Natarajan - DavidStallard + DavidStallard K.Krstovski M.Kamali 166-173 @@ -326,7 +326,7 @@ FlorianKraft JanNiehues MatthiasPaulik - AlexWaibel + AlexWaibel 174-181 2008.iwslt-papers.5 In an increasingly globalized world, situations in which people of different native tongues have to communicate with each other become more and more frequent. In many such situations, human interpreters are prohibitively expensive or simply not available. Automatic spoken language translation (SLT), as a cost-effective solution to this dilemma, has received increased attention in recent years. For a broad number of applications, including live SLT of lectures and oral presentations, these automatic systems should ideally operate in real time and with low latency. Large and highly specialized vocabularies as well as strong variations in speaking style – ranging from read speech to free presentations suffering from spontaneous events – make simultaneous SLT of lectures a challenging task. This paper presents our progress in building a simultaneous German-English lecture translation system. We emphasize some of the challenges which are particular to this language pair and propose solutions to tackle some of the problems encountered. @@ -344,7 +344,7 @@ Analysing soft syntax features and heuristics for hierarchical phrase based machine translation. DavidVilar DanielStein - HermannNey + HermannNey 190-197 2008.iwslt-papers.7 Similar to phrase-based machine translation, hierarchical systems produce a large proportion of phrases, most of which are supposedly junk and useless for the actual translation. For the hierarchical case, however, the amount of extracted rules is an order of magnitude bigger. In this paper, we investigate several soft constraints in the extraction of hierarchical phrases and whether these help as additional scores in the decoding to prune unneeded phrases. We show the methods that help best. @@ -353,7 +353,7 @@ Improvements in dynamic programming beam search for phrase-based statistical machine translation. RichardZens - HermannNey + HermannNey 195-205 2008.iwslt-papers.8 Search is a central component of any statistical machine translation system. We describe the search for phrase-based SMT in detail and show its importance for achieving good translation quality. We introduce an explicit distinction between reordering and lexical hypotheses and organize the pruning accordingly. We show that for the large Chinese-English NIST task already a small number of lexical alternatives is sufficient, whereas a large number of reordering hypotheses is required to achieve good translation quality. The resulting system compares favorably with the current stateof-the-art, in particular we perform a comparison with cube pruning as well as with Moses. diff --git a/data/xml/2008.jeptalnrecital.xml b/data/xml/2008.jeptalnrecital.xml index afe8f90df3..29c41181e4 100644 --- a/data/xml/2008.jeptalnrecital.xml +++ b/data/xml/2008.jeptalnrecital.xml @@ -3,8 +3,8 @@ Actes de la 15ème conférence sur le Traitement Automatique des Langues Naturelles. Articles longs - FrédéricBéchet - Jean-FrancoisBonastre + FrédéricBéchet + Jean-FrancoisBonastre ATALA
Avignon, France
June @@ -26,7 +26,7 @@
Réécriture et Détection d’Implication Textuelle - PaulBédaride + PaulBédaride ClaireGardent 11–20 Nous présentons un système de normalisation de la variation syntaxique qui permet de mieux reconnaître la relation d’implication textuelle entre deux phrases. Le système est évalué sur une suite de tests comportant 2 520 paires test et les résultats montrent un gain en précision par rapport à un système de base variant entre 29.8 et 78.5 points la complexité des cas considérés. @@ -60,7 +60,7 @@ Un modèle multi-sources pour la segmentation en sujets de journaux radiophoniques StéphaneHuet - GuillaumeGravier + GuillaumeGravier PascaleSébillot 41–50 Nous présentons une méthode de segmentation de journaux radiophoniques en sujets, basée sur la prise en compte d’indices lexicaux, syntaxiques et acoustiques. Partant d’un modèle statistique existant de segmentation thématique, exploitant la notion de cohésion lexicale, nous étendons le formalisme pour y inclure des informations d’ordre syntaxique et acoustique. Les résultats expérimentaux montrent que le seul modèle de cohésion lexicale ne suffit pas pour le type de documents étudié en raison de la taille variable des segments et de l’absence d’un lien direct entre segment et thème. L’utilisation d’informations syntaxiques et acoustiques permet une amélioration substantielle de la segmentation obtenue. @@ -71,9 +71,9 @@ Extraction automatique d’informations à partir de micro-textes non structurés CédricVidrequin - Juan-ManuelTorres-Moreno + Juan-ManuelTorres-Moreno Jean-JacquesSchneider - MarcEl-Bèze + MarcEl-Bèze 51–60 Nous présentons dans cet article une méthode d’extraction automatique d’informations sur des textes de très petite taille, faiblement structurés. Nous travaillons sur des textes dont la rédaction n’est pas normalisée, avec très peu de mots pour caractériser chaque information. Les textes ne contiennent pas ou très peu de phrases. Il s’agit le plus souvent de morceaux de phrases ou d’expressions composées de quelques mots. Nous comparons plusieurs méthodes d’extraction, dont certaines sont entièrement automatiques. D’autres utilisent en partie une connaissance du domaine que nous voulons réduite au minimum, de façon à minimiser le travail manuel en amont. Enfin, nous présentons nos résultats qui dépassent ce dont il est fait état dans la littérature, avec une précision équivalente et un rappel supérieur. 2008.jeptalnrecital-long.6 @@ -82,9 +82,9 @@ Quelles combinaisons de scores et de critères numériques pour un système de Questions/Réponses ? - LaurentGillard - PatriceBellot - MarcEl-Bèze + LaurentGillard + PatriceBellot + MarcEl-Bèze 61–70 Dans cet article, nous présentons une discussion sur la combinaison de différents scores et critères numériques pour la sélection finale d’une réponse dans la partie en charge des questions factuelles du système de Questions/Réponses développé au LIA. Ces scores et critères numériques sont dérivés de ceux obtenus en sortie de deux composants cruciaux pour notre système : celui de sélection des passages susceptibles de contenir une réponse et celui d’extraction et de sélection d’une réponse. Ils sont étudiés au regard de leur expressivité. Des comparaisons sont faites avec des approches de sélection de passages mettant en oeuvre des scores conventionnels en recherche d’information. Parallèlement, l’influence de la taille des contextes (en nombre de phrases) est évaluée. Cela permet de mettre en évidence que le choix de passages constitués de trois phrases autour d’une réponse candidate, avec une sélection des réponses basée sur une combinaison entre un score de passage de type Lucene ou Cosine et d’un score de compacité apparaît comme un compromis intéressant. 2008.jeptalnrecital-long.7 @@ -103,8 +103,8 @@ Modélisation du principe d’ancrage pour la robustesse des systèmes de dialogue homme-machine finalisés - AlexandreDenis - MatthieuQuignard + AlexandreDenis + MatthieuQuignard 81–90 Cet article présente une modélisation du principe d’ancrage (grounding) pour la robustesse des systèmes de dialogue finalisés. Ce principe, décrit dans (Clark & Schaefer, 1989), suggère que les participants à un dialogue fournissent des preuves de compréhension afin d’atteindre la compréhension mutuelle. Nous explicitons une définition computationnelle du principe d’ancrage fondée sur des jugements de compréhension qui, contrairement à d’autres modèles, conserve une motivation pour l’expression de la compréhension. Nous déroulons enfin le processus d’ancrage sur un exemple tiré de l’implémentation du modèle. 2008.jeptalnrecital-long.9 @@ -115,7 +115,7 @@ Enertex : un système basé sur l’énergie textuelle SilviaFernández EricSanjuan - Juan-ManuelTorres-Moreno + Juan-ManuelTorres-Moreno 91–100 Dans cet article, nous présentons des applications du système Enertex au Traitement Automatique de la Langue Naturelle. Enertex est basé sur l’énergie textuelle, une approche par réseaux de neurones inspirée de la physique statistique des systèmes magnétiques. Nous avons appliqué cette approche aux problèmes du résumé automatique multi-documents et de la détection de frontières thématiques. Les résultats, en trois langues : anglais, espagnol et français, sont très encourageants. 2008.jeptalnrecital-long.10 @@ -125,9 +125,9 @@ Intégration d’une étape de pré-filtrage et d’une fonction multiobjectif en vue d’améliorer le système <fixed-case>E</fixed-case>xtra<fixed-case>N</fixed-case>ews de résumé de documents multiples FatmaKallel Jaoua - LamiaHadrich Belguith + LamiaHadrich Belguith MaherJaoua - AbdelmajidBen Hamadou + AbdelmajidBen Hamadou 101–110 Dans cet article, nous présentons les améliorations que nous avons apportées au système ExtraNews de résumé automatique de documents multiples. Ce système se base sur l’utilisation d’un algorithme génétique qui permet de combiner les phrases des documents sources pour former les extraits, qui seront croisés et mutés pour générer de nouveaux extraits. La multiplicité des critères de sélection d’extraits nous a inspiré une première amélioration qui consiste à utiliser une technique d’optimisation multi-objectif en vue d’évaluer ces extraits. La deuxième amélioration consiste à intégrer une étape de pré-filtrage de phrases qui a pour objectif la réduction du nombre des phrases des textes sources en entrée. Une évaluation des améliorations apportées à notre système est réalisée sur les corpus de DUC’04 et DUC’07. 2008.jeptalnrecital-long.11 @@ -136,7 +136,7 @@ Recherche locale pour la traduction statistique à base de segments - PhilippeLanglais + PhilippeLanglais AlexandrePatry FabrizioGotti 111–120 @@ -149,7 +149,7 @@ Transcrire les <fixed-case>SMS</fixed-case> comme on reconnaît la parole CatherineKobus FrançoisYvon - GéraldineDamnati + GéraldineDamnati 121–130 Cet article présente une architecture inspirée des systèmes de reconnaissance vocale pour effectuer une normalisation orthographique de messages en « langage SMS ». Nous décrivons notre système de base, ainsi que diverses évolutions de ce système, qui permettent d’améliorer sensiblement la qualité des normalisations produites. 2008.jeptalnrecital-long.13 @@ -186,8 +186,8 @@ Expériences d’analyse syntaxique statistique du français - BenoîtCrabbé - MarieCandito + BenoîtCrabbé + MarieCandito 161–170 Nous montrons qu’il est possible d’obtenir une analyse syntaxique statistique satisfaisante pour le français sur du corpus journalistique, à partir des données issues du French Treebank du laboratoire LLF, à l’aide d’un algorithme d’analyse non lexicalisé. 2008.jeptalnrecital-long.17 @@ -196,7 +196,7 @@ Construction d’un wordnet libre du français à partir de ressources multilingues - BenoîtSagot + BenoîtSagot DarjaFišer 171–180 Cet article décrit la construction d’un Wordnet Libre du Français (WOLF) à partir du Princeton WordNet et de diverses ressources multilingues. Les lexèmes polysémiques ont été traités au moyen d’une approche reposant sur l’alignement en mots d’un corpus parallèle en cinq langues. Le lexique multilingue extrait a été désambiguïsé sémantiquement à l’aide des wordnets des langues concernées. Par ailleurs, une approche bilingue a été suffisante pour construire de nouvelles entrées à partir des lexèmes monosémiques. Nous avons pour cela extrait des lexiques bilingues à partir deWikipédia et de thésaurus. Le wordnet obtenu a été évalué par rapport au wordnet français issu du projet EuroWordNet. Les résultats sont encourageants, et des applications sont d’ores et déjà envisagées. @@ -220,7 +220,7 @@ AïdaKhemakhem BilelGargouri KaisHaddar - AbdelmajidBen Hamadou + AbdelmajidBen Hamadou 192–201 Le présent papier s’intéresse à l’élaboration des dictionnaires électroniques arabes à usage éditorial. Il propose un modèle unifié et normalisé de ces dictionnaires en se référant à la future norme LMF (Lexical Markup Framework) ISO 24613. Ce modèle permet de construire des dictionnaires extensibles, sur lesquels on peut réaliser, grâce à une structuration fine et standard, des fonctions de consultation génériques adaptées aux besoins des utilisateurs. La mise en oeuvre du modèle proposé est testée sur des dictionnaires existants de la langue arabe en utilisant, pour la consultation, le système ADIQTO (Arabic DIctionary Query TOols) que nous avons développé pour l’interrogation générique des dictionnaires normalisés de l’arabe. 2008.jeptalnrecital-long.20 @@ -240,7 +240,7 @@ Une alternative aux modèles de traduction statistique d’<fixed-case>IBM</fixed-case>: Les triggers inter-langues CarolineLavecchia - KamelSmaïli + KamelSmaïli DavidLanglois 212–221 Dans cet article, nous présentons une nouvelle approche pour la traduction automatique fondée sur les triggers inter-langues. Dans un premier temps, nous expliquons le concept de triggers inter-langues ainsi que la façon dont ils sont déterminés. Nous présentons ensuite les différentes expérimentations qui ont été menées à partir de ces triggers afin de les intégrer au mieux dans un processus complet de traduction automatique. Pour cela, nous construisons à partir des triggers inter-langues des tables de traduction suivant différentes méthodes. Nous comparons par la suite notre système de traduction fondé sur les triggers interlangues à un système état de l’art reposant sur le modèle 3 d’IBM (Brown & al., 1993). Les tests menés ont montré que les traductions automatiques générées par notre système améliorent le score BLEU (Papineni & al., 2001) de 2, 4% comparé à celles produites par le système état de l’art. @@ -259,7 +259,7 @@ Les architectures linguistiques et computationnelles en traduction automatique sont indépendantes - ChristianBoitet + ChristianBoitet 232–241 Contrairement à une idée répandue, les architectures linguistiques et computationnelles des systèmes de traduction automatique sont indépendantes. Les premières concernent le choix des représentations intermédiaires, les secondes le type d’algorithme, de programmation et de ressources utilisés. Il est ainsi possible d’utiliser des méthodes de calcul « expertes » ou « empiriques » pour construire diverses phases ou modules de systèmes d’architectures linguistiques variées. Nous terminons en donnant quelques éléments pour le choix de ces architectures en fonction des situations traductionnelles et des ressources disponibles, en termes de dictionnaires, de corpus, et de compétences humaines. 2008.jeptalnrecital-long.24 @@ -317,7 +317,7 @@ Apprentissage artificiel de règles d’indexation pour <fixed-case>MEDLINE</fixed-case> - AurélieNévéol + AurélieNévéol VincentClaveau 292–301 L’indexation est une composante importante de tout système de recherche d’information. Dans MEDLINE, la base documentaire de référence pour la littérature du domaine biomédical, le contenu des articles référencés est indexé à l’aide de descripteurs issus du thésaurus MeSH. Avec l’augmentation constante de publications à indexer pour maintenir la base à jour, le besoin d’outils automatiques se fait pressant pour les indexeurs. Dans cet article, nous décrivons l’utilisation et l’adaptation de la Programmation Logique Inductive (PLI) pour découvrir des règles d’indexation permettant de générer automatiquement des recommandations d’indexation pour MEDLINE. Les résultats obtenus par cette approche originale sont très satisfaisants comparés à ceux obtenus à l’aide de règles manuelles lorsque celles-ci existent. Ainsi, les jeux de règles obtenus par PLI devraient être prochainement intégrés au système produisant les recommandations d’indexation automatique pour MEDLINE. @@ -329,8 +329,8 @@ Actes de la 15ème conférence sur le Traitement Automatique des Langues Naturelles. Articles courts - FrédéricBéchet - Jean-FrancoisBonastre + FrédéricBéchet + Jean-FrancoisBonastre ATALA
Avignon, France
June @@ -353,7 +353,7 @@ Calculs d’unification sur les arbres de dérivation <fixed-case>TAG</fixed-case> SylvainSchmitz - JosephLe Roux + JosephLe Roux 11–20 Nous définissons un formalisme, les grammaires rationnelles d’arbres avec traits, et une traduction des grammaires d’arbres adjoints avec traits vers ce nouveau formalisme. Cette traduction préserve les structures de dérivation de la grammaire d’origine en tenant compte de l’unification de traits. La construction peut être appliquée aux réalisateurs de surface qui se fondent sur les arbres de dérivation. 2008.jeptalnrecital-court.2 @@ -403,8 +403,8 @@ <fixed-case>E</fixed-case>-Gen : Profilage automatique de candidatures RémyKessler - Juan-ManuelTorres-Moreno - MarcEl-Bèze + Juan-ManuelTorres-Moreno + MarcEl-Bèze 61–70 La croissance exponentielle de l’Internet a permis le développement de sites d’offres d’emploi en ligne. Le système E-Gen (Traitement automatique d’offres d’emploi) a pour but de permettre l’analyse et la catégorisation d’offres d’emploi ainsi qu’une analyse et classification des réponses des candidats (Lettre de motivation et CV). Nous présentons les travaux réalisés afin de résoudre la seconde partie : on utilise une représentation vectorielle de texte pour effectuer une classification des pièces jointes contenus dans le mail à l’aide de SVM. Par la suite, une évaluation de la candidature est effectuée à l’aide de différents classifieurs (SVM et n-grammes de mots). 2008.jeptalnrecital-court.7 @@ -413,7 +413,7 @@ Typage, produit cartésien et unités d’analyse pour les modèles à états finis - FrançoisBarthélemy + FrançoisBarthélemy 71–80 Dans cet article, nous présentons un nouveau langage permettant d’écrire des relations rationnelles compilées en automates finis. Les deux caractéristiques innovantes de ce langage sont de pourvoir décrire des relations à plusieurs niveaux, pas nécessairement deux et d’utiliser diverses unités d’analyse pour exprimer les liens entre niveaux. Cela permet d’aligner de façon fine des représentations multiples. 2008.jeptalnrecital-court.8 @@ -431,7 +431,7 @@ <fixed-case>POLYMOTS</fixed-case> : une base de données de constructions dérivationnelles en français à partir de radicaux phonologiques - NuriaGala + NuriaGala VéroniqueRey 91–100 Cet article présente POLYMOTS, une base de données lexicale contenant huit mille mots communs en français. L’originalité de l’approche proposée tient à l’analyse des mots. En effet, à la différence d’autres bases lexicales représentant la morphologie dérivationnelle des mots à partir d’affixes, ici l’idée a été d’isoler un radical commun à un ensemble de mots d’une même famille. Nous avons donc analysé les formes des mots et, par comparaison phonologique (forme phonique comparable) et morphologique (continuité de sens), nous avons regroupé les mots par familles, selon le type de radical phonologique. L’article présente les fonctionnalités de la base et inclut une discussion sur les applications et les perspectives d’une telle ressource. @@ -484,7 +484,7 @@ FabienPoulard ThierryWaszak NicolasHernandez - PatriceBellot + PatriceBellot 141–150 Dans le contexte de la recherche de plagiat, le repérage de citations et de ses constituants est primordial puisqu’il peut amener à évaluer le caractère licite ou illicite d’une reprise (source citée ou non). Nous proposons ici une comparaison de méthodes automatiques pour le repérage de ces informations et rapportons une évaluation quantitative de celles-ci. Un corpus d’écrits journalistiques français a été manuellement annoté pour nous servir de base d’apprentissage et de test. 2008.jeptalnrecital-court.15 @@ -513,7 +513,7 @@ Traduction multilingue : le projet <fixed-case>M</fixed-case>ul<fixed-case>T</fixed-case>ra - ÉricWehrli + ÉricWehrli LukaNerima 171–178 L’augmentation rapide des échanges et des communications pluriculturels, en particulier sur internet, intensifie les besoins d’outils multilingues y compris de traduction. Cet article décrit un projet en cours au LATL pour le développement d’un système de traduction multilingue basé sur un modèle linguistique abstrait et largement générique, ainsi que sur un modèle logiciel basé sur la notion d’objet. Les langues envisagées dans la première phase de ce projet sont l’allemand, le français, l’italien, l’espagnol et l’anglais. @@ -546,8 +546,8 @@ Marie-JeanMeurs FrédéricDuvert FrédéricBéchet - FabriceLefèvre - RenatoDe Mori + FabriceLefèvre + RenatoDe Mori 199–208 Cet article présente un formalisme de représentation des connaissances qui a été utilisé pour fournir des annotations sémantiques de haut niveau pour le corpus de dialogue oral MEDIA. Ces annotations en structures sémantiques, basées sur le paradigme FrameNet, sont obtenues de manière incrémentale et partiellement automatisée. Nous décrivons le processus d’interprétation automatique qui permet d’obtenir des compositions sémantiques et de générer des hypothèses de frames par inférence. Le corpus MEDIA est un corpus de dialogues en langue française dont les tours de parole de l’utilisateur ont été manuellement transcrits et annotés (niveaux mots et constituants sémantiques de base). Le processus proposé utilise ces niveaux pour produire une annotation de haut niveau en frames sémantiques. La base de connaissances développée (définitions des frames et règles de composition) est présentée, ainsi que les résultats de l’annotation automatique. 2008.jeptalnrecital-court.21 @@ -556,7 +556,7 @@ Dissymétrie entre l’indexation des documents et le traitement des requêtes pour la recherche d’information en langue arabe - RamziAbbès + RamziAbbès MalekBoualem 209–218 Les moteurs de recherches sur le web produisent des résultats comparables et assez satisfaisants pour la recherche de documents écrits en caractères latins. Cependant, ils présentent de sérieuses lacunes dès que l’ont s’intéresse à des langues peu dotées ou des langues sémitiques comme l’arabe. Dans cet article nous présentons une étude analytique et qualitative de la recherche d’information en langue arabe en mettant l’accent sur l’insuffisance des outils de recherche actuels, souvent mal adaptés aux spécificités de la langue arabe. Pour argumenter notre analyse, nous présentons des résultats issus d’observations et de tests autour de certains phénomènes linguistiques de l’arabe écrit. Pour la validation des ces observations, nous avons testé essentiellement le moteur de recherche Google. @@ -568,7 +568,7 @@ Actes de la 15ème conférence sur le Traitement Automatique des Langues Naturelles. REncontres jeunes Chercheurs en Informatique pour le Traitement Automatique des Langues - PatriceBellot + PatriceBellot Marie-LaureGuénot ATALA
Avignon, France
diff --git a/data/xml/2008.tal.xml b/data/xml/2008.tal.xml index c2f3e01f14..5ca2a16975 100644 --- a/data/xml/2008.tal.xml +++ b/data/xml/2008.tal.xml @@ -13,7 +13,7 @@ Enrichissement d’un lexique bilingue par apprentissage analogique [Enrichment of a Bilingual Lexicon by Analogical Learning] - PhilippeLanglais + PhilippeLanglais AlexandrePatry 13–40 2008.tal-1.1 @@ -22,8 +22,8 @@ Fouille d’erreurs sur des sorties d’analyseurs syntaxiques [Error Mining on Syntactic Parser Output] - BenoîtSagot - ÉricVillemonte de la Clergerie + BenoîtSagot + ÉricVillemonte de la Clergerie 41–60 2008.tal-1.2 fra @@ -60,7 +60,7 @@ Compréhension automatique de la parole arabe spontanée — Une modélisation numérique [Automatic Understanding of Spontaneous <fixed-case>A</fixed-case>rabic Speech — A Numerical Model] AnisZouaghi MounirZrigui - GeorgesAntoniadis + GeorgesAntoniadis 141–166 2008.tal-1.6 fra @@ -135,8 +135,8 @@ Le développement d’une plate-forme pour l’annotation spécialisée de documents Web : retour d’expérience [Developping a platform dedicated to the annotation of web documents: a case study] - ThierryHamon - AdelineNazarenko + ThierryHamon + AdelineNazarenko 127–154 2008.tal-2.6 fra @@ -144,7 +144,7 @@ <fixed-case>S</fixed-case>x<fixed-case>P</fixed-case>ipe 2 : architecture pour le traitement présyntaxique de corpus bruts [<fixed-case>S</fixed-case>x<fixed-case>P</fixed-case>ipe 2 : an architecture for surface preprocessing of raw corpora] - BenoîtSagot + BenoîtSagot PierreBoullier 155–188 2008.tal-2.7 @@ -174,8 +174,8 @@ <fixed-case>SEWS</fixed-case> : un serveur d’évaluation orienté Web pour la syntaxe [<fixed-case>SEWS</fixed-case> : an web-based server for evaluating syntactic annotation tools] - OlivierHamon - PatrickParoubek + OlivierHamon + PatrickParoubek DjamelMostef 247–270 2008.tal-2.10 @@ -186,7 +186,7 @@ Cocytus: parallel <fixed-case>NLP</fixed-case> over disparate data NoahEvans MasayukiAsahar - YujiMatsumoto + YujiMatsumoto 271–293 2008.tal-2.11 evans-etal-2008-cocytus @@ -213,8 +213,8 @@ Contributions du traitement automatique de la parole à l’étude des voyelles orales du français [Using automatic speech processing to study <fixed-case>F</fixed-case>rench oral vowels] - MartineAdda-Decker - CédricGendrot + MartineAdda-Decker + CédricGendrot NoëlNguyen 13–46 2008.tal-3.2 @@ -225,9 +225,9 @@ La parole spontanée : transcription et traitement [Processing and transcribing spontaneous speech] ThierryBazillon VincentJousse - FrédéricBéchet + FrédéricBéchet YannickEstève - GeorgesLinarès + GeorgesLinarès DanielLuzzati 47–76 2008.tal-3.3 @@ -252,7 +252,7 @@ RoxaneBertrand PhilippeBlache RobertEspesser - GaëlleFerré + GaëlleFerré ChristineMeunier BéatricePriego-Valverde StéphaneRauzy @@ -263,10 +263,10 @@ Accents étrangers et régionaux en français. Caractérisation et identification [Foreign and regional accents in <fixed-case>F</fixed-case>rench. Characterisation and identification] - PhilippeBoula de Mareüil + PhilippeBoula de Mareüil BiancaVieru-Dimulescu CécileWoehrling - MartineAdda-Decker + MartineAdda-Decker 135–163 2008.tal-3.6 fra @@ -275,8 +275,8 @@ Alignement automatique et analyse phonétique : comparaison de différents systèmes pour l’analyse du schwa [Automatic alignment and phonetic studies: Comparing alignment systems for the analysis of the schwa] AudreyBürki - CédricGendrot - GuillaumeGravier + CédricGendrot + GuillaumeGravier GeorgeLinarès CécileFougeron 165–197 @@ -286,8 +286,8 @@ Caractéristiques acoustiques et prosodiques des hésitations vocaliques dans trois langues [Acoustic and prosodic characteristics of vocalic hesitations in three languages] - IoanaVasilescu - MartineAdda-Decker + IoanaVasilescu + MartineAdda-Decker RenaNemoto 199–228 2008.tal-3.8 diff --git a/data/xml/2008.tc.xml b/data/xml/2008.tc.xml index e671a0f517..99a0278940 100644 --- a/data/xml/2008.tc.xml +++ b/data/xml/2008.tc.xml @@ -17,7 +17,7 @@ Hybrid Architectures for Multi-Engine Machine Translation - AndreasEisele + AndreasEisele 2008.tc-1.2 eisele-2008-hybrid diff --git a/data/xml/2008.wac.xml b/data/xml/2008.wac.xml index c593e2ab2f..b2742245ee 100644 --- a/data/xml/2008.wac.xml +++ b/data/xml/2008.wac.xml @@ -21,8 +21,8 @@ Reranking <fixed-case>G</fixed-case>oogle with <fixed-case>GR</fixed-case>e<fixed-case>G</fixed-case> - RodolfoDelmonte - Marco Aldo PiccolinoBoniforti + RodolfoDelmonte + Marco Aldo PiccolinoBoniforti 1-7 2008.wac-1.1 We present an experiment evaluating the contribution of a system called GReG for reranking the snippets returned by Google’s search engine in the 10 best links presented to the user, captured by the use of Google’s API. The evaluation aims at establishing whether or not the introduction of deep linguistic information may improve the accuracy of Google or rather it is the opposite case as maintained by the majority of people working in Information Retrieval, using a Bag Of Words approach. We used 900 questions, answers taken from TREC 8, 9 competitions, execute three different types of evaluation: one without any linguistic aid; a second one with tagging, syntactic constituency contribution; another run with what we call Partial Logical Form. Even though GReG is still work in progress, it is possible to draw clearcut conclusions: adding linguistic information to the evaluation process of the best snippet that can answer a question improves enormously the performance. In another experiment we used the actual associated to the Q/A pairs distributed by one of TREC’s participant, got even higher accuracy. @@ -31,7 +31,7 @@ <fixed-case>G</fixed-case>oogle for the Linguist on a Budget AndrásKornai - PéterHalácsy + PéterHalácsy 8-11 2008.wac-1.2 In this paper, we present GLB, yet another open source, free system to create, exploit linguistic corpora gathered from the web. A simple, robust web crawl algorithm, a multi-dimensional information retrieval tool„ a crude parallelization mechanism are proposed, especially for researchers working in resource-limited environments. @@ -53,7 +53,7 @@ PavlinaFragkou ArisTheodorakos VangelisKarkaletsis - Constantine D.Spyropoulos + Constantine D.Spyropoulos 18-25 2008.wac-1.4 The information explosion of the Web aggravates the problem of effective information retrieval. Even though linguistic approaches found in the literature perform linguistic annotation by creating metadata in the form of tokens, lemmas or part of speech tags, however, this process is insufficient. This is due to the fact that these linguistic metadata do not exploit the actual content of the page, leading to the need of performing semantic annotation based on a predefined semantic model. This paper proposes a new learning approach for performing automatic semantic annotation. This is the result of a two step procedure: the first step partitions a web page into blocks based on its visual layout, while the second, performs subsequent partitioning based on the examination of appearance of specific types of entities denoting the semantic category as well as the application of a number of simple heuristics. Preliminary experiments performed on a manually annotated corpus regarding athletics proved to be very promising. @@ -62,7 +62,7 @@ Identification of Duplicate News Stories in Web Pages JohnGibson - BenWellner + BenWellner SusanLubar 26-33 2008.wac-1.5 @@ -71,7 +71,7 @@ <fixed-case>G</fixed-case>lossa<fixed-case>N</fixed-case>et 2: a linguistic search engine for <fixed-case>RSS</fixed-case>-based corpora - CédrickFairon + CédrickFairon KévinMacé HubertNaets 34-39 @@ -94,7 +94,7 @@ Introducing, evaluating uk<fixed-case>W</fixed-case>a<fixed-case>C</fixed-case>, a very large web-derived corpus of <fixed-case>E</fixed-case>nglish AdrianoFerraresi ErosZanchetta - MarcoBaroni + MarcoBaroni SilviaBernardini 47-54 2008.wac-1.8 diff --git a/data/xml/2009.eamt.xml b/data/xml/2009.eamt.xml index b72a040516..3f423713f6 100644 --- a/data/xml/2009.eamt.xml +++ b/data/xml/2009.eamt.xml @@ -7,8 +7,8 @@
Barcelona, Spain
May 14–15 2009 - LluísMàrquez - HaroldSomers + LluísMàrquez + HaroldSomers eamt @@ -23,13 +23,13 @@
Adaptable, Community-Controlled, Language Technologies for Language Maintenance - LoriLevin + LoriLevin 2009.eamt-1.2 levin-2009-adaptable Character-Based <fixed-case>PSMT</fixed-case> for Closely Related Languages - JörgTiedemann + JörgTiedemann 2009.eamt-1.3 tiedemann-2009-character @@ -37,7 +37,7 @@ <fixed-case>TS</fixed-case>3: an Improved Version of the Bilingual Concordancer <fixed-case>T</fixed-case>rans<fixed-case>S</fixed-case>earch StéphaneHuet JulienBourdaillet - PhilippeLanglais + PhilippeLanglais 2009.eamt-1.4 huet-etal-2009-ts3
@@ -54,7 +54,7 @@ Evaluation-Guided Pre-Editing of Source Text: Improving <fixed-case>MT</fixed-case>-Tractability of Light Verb Constructions BogdanBabych - AnthonyHartley + AnthonyHartley SergeSharoff 2009.eamt-1.6 babych-etal-2009-evaluation @@ -69,49 +69,49 @@ Improving a <fixed-case>C</fixed-case>atalan-<fixed-case>S</fixed-case>panish Statistical Translation System using Morphosyntactic Knowledge MireiaFarrús - Marta R.Costa-jussà + Marta R.Costa-jussà MarcPoch - AdolfoHernández - José B.Mariño + AdolfoHernández + José B.Mariño 2009.eamt-1.8 farrus-etal-2009-improving Use of Rich Linguistic Information to Translate Prepositions and Grammar Cases to <fixed-case>B</fixed-case>asque - EnekoAgirre + EnekoAgirre AitziberAtutxa - GorkaLabaka - MikelLersundi - AingeruMayor - KepaSarasola + GorkaLabaka + MikelLersundi + AingeruMayor + KepaSarasola 2009.eamt-1.9 agirre-etal-2009-use Gappy Translation Units under Left-to-Right <fixed-case>SMT</fixed-case> Decoding - Josep M.Crego + Josep M.Crego FrançoisYvon 2009.eamt-1.10 crego-yvon-2009-gappy Relevance of Different Segmentation Options on <fixed-case>S</fixed-case>panish-<fixed-case>B</fixed-case>asque <fixed-case>SMT</fixed-case> - ArantzaDíaz de Ilarraza - GorkaLabaka - KepaSarasola + ArantzaDíaz de Ilarraza + GorkaLabaka + KepaSarasola 2009.eamt-1.11 diaz-de-ilarraza-etal-2009-relevance <fixed-case>E</fixed-case>nglish–<fixed-case>L</fixed-case>atvian Toponym Processing: Translation Strategies and Linguistic Patterns TatianaGornostay - IngunaSkadiņa + IngunaSkadiņa 2009.eamt-1.12 gornostay-skadina-2009-english An Environment for Named Entity Recognition and Translation - FilipGraliński + FilipGraliński KrzysztofJassem MichałMarcińczuk 2009.eamt-1.13 @@ -129,19 +129,19 @@ SylvainRaybaud CarolineLavecchia DavidLanglois - KamelSmaïli + KamelSmaïli 2009.eamt-1.15 raybaud-etal-2009-word Translating Questions for Cross-Lingual <fixed-case>QA</fixed-case> - JörgTiedemann + JörgTiedemann 2009.eamt-1.16 tiedemann-2009-translating Developing Prototypes for Machine Translation between Two <fixed-case>S</fixed-case>ami Languages - Francis M.Tyers + Francis M.Tyers LindaWiechetek TrondTrosterud 2009.eamt-1.17 @@ -149,7 +149,7 @@ Collocations in a Rule-Based <fixed-case>MT</fixed-case> System: A Case Study Evaluation of their Translation Adequacy - EricWehrli + EricWehrli VioletaSeretan LukaNerima LorenzaRusso @@ -187,7 +187,7 @@ A Phrase-Based Hidden Semi-<fixed-case>M</fixed-case>arkov Approach to Machine Translation JesúsAndrés-Ferrer - AlfonsJuan + AlfonsJuan 2009.eamt-1.23 andres-ferrer-juan-2009-phrase @@ -200,13 +200,13 @@ A Constraint Satisfaction Approach to Machine Translation SanderCanisius - Antalvan den Bosch + Antalvan den Bosch 2009.eamt-1.25 canisius-van-den-bosch-2009-constraint Introducing the Autshumato Integrated Translation Environment - Hendrik J.Groenewald + Hendrik J.Groenewald WildrichFourie 2009.eamt-1.26 groenewald-fourie-2009-introducing @@ -214,7 +214,7 @@ A New Subtree-Transfer Approach to Syntax-Based Reordering for Statistical Machine Translation MaximKhalilov - José A. R.Fonollosa + José A. R.Fonollosa MarkDras 2009.eamt-1.27 khalilov-etal-2009-new @@ -227,7 +227,7 @@ Rule-Based Augmentation of Training Data in <fixed-case>B</fixed-case>reton-<fixed-case>F</fixed-case>rench Statistical Machine Translation - Francis M.Tyers + Francis M.Tyers 2009.eamt-1.29 tyers-2009-rule @@ -242,14 +242,14 @@ Are Unaligned Words Important for Machine Translation? YuqiZhang EvgenyMatusov - HermannNey + HermannNey 2009.eamt-1.31 zhang-etal-2009-unaligned Using Supertags as Source Language Context in <fixed-case>SMT</fixed-case> RejwanulHaque - Sudip KumarNaskar + Sudip KumarNaskar YanjunMa AndyWay 2009.eamt-1.32 @@ -258,7 +258,7 @@ On <fixed-case>LM</fixed-case> Heuristics for the Cube Growing Algorithm DavidVilar - HermannNey + HermannNey 2009.eamt-1.33 vilar-ney-2009-lm @@ -290,7 +290,7 @@ Empirical machine translation and its evaluation - JesúsGiménez + JesúsGiménez 2009.eamt-smart.2 gimenez-2009-empirical @@ -326,7 +326,7 @@ Improving <fixed-case>SMT</fixed-case> by learning translation direction - CyrilGoutte + CyrilGoutte DavidKurokawa PierreIsabelle 2009.eamt-smart.7 diff --git a/data/xml/2009.freeopmt.xml b/data/xml/2009.freeopmt.xml index b51b3898e0..f2c105c2b5 100644 --- a/data/xml/2009.freeopmt.xml +++ b/data/xml/2009.freeopmt.xml @@ -8,7 +8,7 @@ 2009 Juan AntonioPérez-Ortiz FelipeSánchez-Martinez - Francis M.Tyers + Francis M.Tyers freeopmt @@ -17,7 +17,7 @@ Matxin: developing sustainable machine translation for a less-resourced language - KepaSarasola + KepaSarasola 2009.freeopmt-1.1 sarasola-2009-matxin @@ -29,9 +29,9 @@ The Apertium machine translation platform: Five years on - Mikel L.Forcada + Mikel L.Forcada Francis M.Tyers - GemaRamírez-Sánchez + GemaRamírez-Sánchez 3-10 2009.freeopmt-1.3 This paper describes Apertium: a free/open-source machine translation platform (engine, toolbox and data), its history, its philosophy of design, its technology, the community of developers, the research and business based on it, and its prospects and challenges, now that it is five years old. @@ -39,7 +39,7 @@ Matxin: Moving towards language independence - AingeruMayor + AingeruMayor Francis M.Tyers 11-18 2009.freeopmt-1.4 @@ -84,7 +84,7 @@ An open-source highly scalable web service architecture for the Apertium machine translation engine - Victor M.Sánchez-Cartagena + Victor M.Sánchez-Cartagena Juan AntonioPérez-Ortiz 51-58 2009.freeopmt-1.9 @@ -111,8 +111,8 @@ Joint efforts to further develop and incorporate Apertium into the document management flow at <fixed-case>U</fixed-case>niversitat Oberta de <fixed-case>C</fixed-case>atalunya LuisVillarejo Muñoz - SergioOrtiz Rojas - MireiaGinestí Rosell + SergioOrtiz Rojas + MireiaGinestí Rosell 75-82 2009.freeopmt-1.12 This article describes the needs of UOC regarding translation and how these needs are satisfied by Prompsit further developing a free rule-based machine translation system: Apertium. We initially describe the general framework regarding linguistic needs inside UOC. Then, section 2 introduces Apertium and outlines the development scenario that Prompsit executed. After that, section 3 outlines the specific needs of UOC and why Apertium was chosen as the machine translation engine. Then, section 4 describes some of the features specially developed in this project. Section 5 explains how the linguistic data was improved to increase the quality of the output in Catalan and Spanish. And, finally, we draw conclusions and outline further work originating from the project. diff --git a/data/xml/2009.iwslt.xml b/data/xml/2009.iwslt.xml index 0ccdb6e7a4..821dd305a1 100644 --- a/data/xml/2009.iwslt.xml +++ b/data/xml/2009.iwslt.xml @@ -17,7 +17,7 @@ Two-way speech-to-speech translation for communicating across language barriers - PremkumarNatarajan + PremkumarNatarajan 2009.iwslt-keynotes.2 natarajan-2009-two @@ -39,7 +39,7 @@ Overview of the <fixed-case>IWSLT</fixed-case> 2009 evaluation campaign - MichaelPaul + MichaelPaul 1-18 2009.iwslt-evaluation.1 2009.iwslt-evaluation.1.Presentation.pdf @@ -48,7 +48,7 @@ <fixed-case>A</fixed-case>pp<fixed-case>T</fixed-case>ek <fixed-case>T</fixed-case>urkish-<fixed-case>E</fixed-case>nglish machine translation system description for <fixed-case>IWSLT</fixed-case> 2009 - SelçukKöprü + SelçukKöprü 19-23 2009.iwslt-evaluation.2 2009.iwslt-evaluation.2.Presentation.pdf @@ -57,8 +57,8 @@ <fixed-case>B</fixed-case>arcelona Media <fixed-case>SMT</fixed-case> system description for the <fixed-case>IWSLT</fixed-case> 2009 - Marta R.Costa-jussà - Rafael E.Banchs + Marta R.Costa-jussà + Rafael E.Banchs 24-28 2009.iwslt-evaluation.3 2009.iwslt-evaluation.3.Presentation.pdf @@ -69,7 +69,7 @@ Low-resource machine translation using <fixed-case>M</fixed-case>a<fixed-case>T</fixed-case>r<fixed-case>E</fixed-case>x YanjunMa TsuyoshiOkita - ÖzlemÇetinoğlu + ÖzlemÇetinoğlu JinhuaDu AndyWay 29-36 @@ -83,7 +83,7 @@ NicolaBertoldi AriannaBisazza MauroCettolo - GermánSanchis-Trilles + GermánSanchis-Trilles MarcelloFederico 37-44 2009.iwslt-evaluation.5 @@ -105,7 +105,7 @@ <fixed-case>I</fixed-case>2<fixed-case>R</fixed-case>’s machine translation system for <fixed-case>IWSLT</fixed-case> 2009 XiangyuDuan - DeyiXiong + DeyiXiong HuiZhang MinZhang HaizhouLi @@ -137,8 +137,8 @@ <fixed-case>LIG</fixed-case> approach for <fixed-case>IWSLT</fixed-case>09 FethiBougares - LaurentBesacier - HervéBlanchon + LaurentBesacier + HervéBlanchon 60-64 2009.iwslt-evaluation.9 2009.iwslt-evaluation.9.Presentation.pdf @@ -162,7 +162,7 @@ WadeShen BrianDelaney A. RyanAminzadeh - TimAnderson + TimAnderson RaySlyh 71-78 2009.iwslt-evaluation.11 @@ -173,8 +173,8 @@ Two methods for stabilizing <fixed-case>MERT</fixed-case> MasaoUtiyama - HirofumiYamamoto - EiichiroSumita + HirofumiYamamoto + EiichiroSumita 79-82 2009.iwslt-evaluation.12 2009.iwslt-evaluation.12.Presentation.pdf @@ -186,7 +186,7 @@ MaoxiLi JiajunZhang YuZhou - ChengqingZong + ChengqingZong 83-90 2009.iwslt-evaluation.13 2009.iwslt-evaluation.13.Presentation.pdf @@ -195,7 +195,7 @@ The <fixed-case>NUS</fixed-case> statistical machine translation system for <fixed-case>IWSLT</fixed-case> 2009 - PreslavNakov + PreslavNakov ChangLiu WeiLu Hwee TouNg @@ -209,9 +209,9 @@ The <fixed-case>UOT</fixed-case> system XianchaoWu TakuyaMatsuzaki - NaoakiOkazaki + NaoakiOkazaki YusukeMiyao - Jun’ichiTsujii + Jun’ichiTsujii 99-106 2009.iwslt-evaluation.15 2009.iwslt-evaluation.15.Presentation.pdf @@ -243,7 +243,7 @@ <fixed-case>UPV</fixed-case> translation system for <fixed-case>IWSLT</fixed-case> 2009 GuillemGascó - Joan AndreuSánchez + Joan AndreuSánchez 118-123 2009.iwslt-evaluation.18 2009.iwslt-evaluation.18.Presentation.pdf @@ -283,7 +283,7 @@ Enriching <fixed-case>SCFG</fixed-case> rules directly from efficient bilingual chart parsing - MartinČmejrek + MartinČmejrek BowenZhou BingXiang 136-143 @@ -317,7 +317,7 @@ Online language model adaptation for spoken dialog translation - GermánSanchis-Trilles + GermánSanchis-Trilles MauroCettolo NicolaBertoldi MarcelloFederico @@ -331,11 +331,11 @@ Network-based speech-to-speech translation ChioriHori SakrianiSakti - MichaelPaul + MichaelPaul NoriyukiKimura YutakaAshikari RyosukeIsotani - EiichiroSumita + EiichiroSumita SatoshiNakamura 2009.iwslt-papers.6 2009.iwslt-papers.6.Presentation.pdf diff --git a/data/xml/2009.jeptalnrecital.xml b/data/xml/2009.jeptalnrecital.xml index 5df07c4d96..84cc40899b 100644 --- a/data/xml/2009.jeptalnrecital.xml +++ b/data/xml/2009.jeptalnrecital.xml @@ -3,7 +3,7 @@ Actes de la 16ème conférence sur le Traitement Automatique des Langues Naturelles. Articles longs - AdelineNazarenko + AdelineNazarenko ThierryPoibeau ATALA
Senlis, France
@@ -26,7 +26,7 @@
Analyse déductive pour les grammaires d’interaction - JosephLe Roux + JosephLe Roux 11–20 Nous proposons un algorithme d’analyse pour les grammaires d’interaction qui utilise le cadre formel de l’analyse déductive. Cette approche donne un point de vue nouveau sur ce problème puisque les méthodes précédentes réduisaient ce dernier à la réécriture de graphes et utilisaient des techniques de résolution de contraintes. D’autre part, cette présentation permet de décrire le processus de manière standard et d’exhiber les sources d’indéterminisme qui rendent ce problème difficile. 2009.jeptalnrecital-long.2 @@ -36,7 +36,7 @@ Analyse syntaxique en dépendances de l’oral spontané AlexisNasr - FrédéricBéchet + FrédéricBéchet 21–30 Cet article décrit un modèle d’analyse syntaxique de l’oral spontané axé sur la reconnaissance de cadres valenciels verbaux. Le modèle d’analyse se décompose en deux étapes : une étape générique, basée sur des ressources génériques du français et une étape de réordonnancement des solutions de l’analyseur réalisé par un modèle spécifique à une application. Le modèle est évalué sur le corpus MEDIA. 2009.jeptalnrecital-long.3 @@ -45,8 +45,8 @@ Analyse syntaxique du français : des constituants aux dépendances - MarieCandito - BenoîtCrabbé + MarieCandito + BenoîtCrabbé PascalDenis FrançoisGuérin 31–40 @@ -80,8 +80,8 @@ Classification d’un contenu encyclopédique en vue d’un étiquetage par entités nommées - EricCharton - Juan-ManuelTorres-Moreno + EricCharton + Juan-ManuelTorres-Moreno 61–70 On utilise souvent des ressources lexicales externes pour améliorer les performances des systèmes d’étiquetage d’entités nommées. Les contenus de ces ressources lexicales peuvent être variés : liste de noms propres, de lieux, de marques. On note cependant que la disponibilité de corpus encyclopédiques exhaustifs et ouverts de grande taille tels que Worldnet ou Wikipedia, a fait émerger de nombreuses propositions spécifiques d’exploitation de ces contenus par des systèmes d’étiquetage. Un problème demeure néanmoins ouvert avec ces ressources : celui de l’adaptation de leur taxonomie interne, complexe et composée de dizaines de milliers catégories, aux exigences particulières de l’étiquetage des entités nommées. Pour ces dernières, au plus de quelques centaines de classes sémantiques sont requises. Dans cet article nous explorons cette difficulté et proposons un système complet de transformation d’un arbre taxonomique encyclopédique en une système à classe sémantiques adapté à l’étiquetage d’entités nommées. 2009.jeptalnrecital-long.7 @@ -90,7 +90,7 @@ Étude quantitative de liens entre l’analogie formelle et la morphologie constructionnelle - PhilippeLanglais + PhilippeLanglais 71–80 Plusieurs travaux ont récemment étudié l’apport de l’apprentissage analogique dans des applications du traitement automatique des langues comme la traduction automatique, ou la recherche d’information. Il est souvent admis que les relations analogiques de forme entre les mots capturent des informations de nature morphologique. Le but de cette étude est de présenter une analyse des points de rencontre entre l’analyse morphologique et les analogies de forme. C’est à notre connaissance la première étude de ce type portant sur des corpus de grande taille et sur plusieurs langues. Bien que notre étude ne soit pas dédiée à une tâche particulière du traitement des langues, nous montrons cependant que le principe d’analogie permet de segmenter des mots en morphèmes avec une bonne précision. 2009.jeptalnrecital-long.8 @@ -102,8 +102,8 @@ Thi-Ngoc-DiepDo Viet-BacLe BrigitteBigi - LaurentBesacier - EricCastelli + LaurentBesacier + EricCastelli 81–90 Cet article présente nos premiers travaux en vue de la construction d’un système de traduction probabiliste pour le couple de langue vietnamien-français. La langue vietnamienne étant considérée comme une langue peu dotée, une des difficultés réside dans la constitution des corpus parallèles, indispensable à l’apprentissage des modèles. Nous nous concentrons sur la constitution d’un grand corpus parallèle vietnamien-français. La méthode d’identification automatique des paires de documents parallèles fondée sur la date de publication, les mots spéciaux et les scores d’alignements des phrases est appliquée. Cet article présente également la construction d’un premier système de traduction automatique probabiliste vietnamienfrançais et français-vietnamien à partir de ce corpus et discute l’opportunité d’utiliser des unités lexicales ou sous-lexicales pour le vietnamien (syllabes, mots, ou leurs combinaisons). Les performances du système sont encourageantes et se comparent avantageusement à celles du système de Google. 2009.jeptalnrecital-long.9 @@ -124,7 +124,7 @@ Intégration de l’alignement de mots dans le concordancier bilingue <fixed-case>T</fixed-case>rans<fixed-case>S</fixed-case>earch StéphaneHuet JulienBourdaillet - PhilippeLanglais + PhilippeLanglais 101–110 Malgré les nombreuses études visant à améliorer la traduction automatique, la traduction assistée par ordinateur reste la solution préférée des traducteurs lorsqu’une sortie de qualité est recherchée. Dans cet article, nous présentons nos travaux menés dans le but d’améliorer le concordancier bilingue TransSearch. Ce service, accessible sur le Web, repose principalement sur un alignement au niveau des phrases. Dans cette étude, nous discutons et évaluons l’intégration d’un alignement statistique au niveau des mots. Nous présentons deux nouvelles problématiques essentielles au succès de notre nouveau prototype : la détection des traductions erronées et le regroupement des variantes de traduction similaires. 2009.jeptalnrecital-long.11 @@ -135,7 +135,7 @@ Jugements d’évaluation et constituants périphériques AgataJackiewicz ThierryCharnois - StéphaneFerrari + StéphaneFerrari 111–120 L’article présente une étude portant sur des constituants détachés à valeur axiologique. Dans un premier temps, une analyse linguistique sur corpus met en évidence un ensemble de patrons caractéristiques du phénomène. Ensuite, une expérimentation informatique est proposée sur un corpus de plus grande taille afin de permettre l’observation des patrons en vue d’un retour sur le modèle linguistique. Ce travail s’inscrit dans un projet mené à l’interface de la linguistique et du TAL, qui se donne pour but d’enrichir, d’adapter au français et de formaliser le modèle général Appraisal de l’évaluation dans la langue. 2009.jeptalnrecital-long.12 @@ -148,7 +148,7 @@ AlbertGatt JimHunter EhudReiter - SomayajuluSripada + SomayajuluSripada 121–130 Notre société génère une masse d’information toujours croissante, que ce soit en médecine, en météorologie, etc. La méthode la plus employée pour analyser ces données est de les résumer sous forme graphique. Cependant, il a été démontré qu’un résumé textuel est aussi un mode de présentation efficace. L’objectif du prototype BT-45, développé dans le cadre du projet Babytalk, est de générer des résumés de 45 minutes de signaux physiologiques continus et d’événements temporels discrets en unité néonatale de soins intensifs (NICU). L’article présente l’aspect génération de texte de ce prototype. Une expérimentation clinique a montré que les résumés humains améliorent la prise de décision par rapport à l’approche graphique, tandis que les textes de BT-45 donnent des résultats similaires à l’approche graphique. Une analyse a identifié certaines des limitations de BT-45 mais en dépit de cellesci, notre travail montre qu’il est possible de produire automatiquement des résumés textuels efficaces de données complexes. 2009.jeptalnrecital-long.13 @@ -178,7 +178,7 @@ Prise en compte de dépendances syntaxiques pour la traduction contextuelle de segments AurélienMax RafikMaklhoufi - PhilippeLanglais + PhilippeLanglais 151–160 Dans un système standard de traduction statistique basé sur les segments, le score attribué aux différentes traductions d’un segment ne dépend pas du contexte dans lequel il apparaît. Plusieurs travaux récents tendent à montrer l’intérêt de prendre en compte le contexte source lors de la traduction, mais ces études portent sur des systèmes traduisant vers l’anglais, une langue faiblement fléchie. Dans cet article, nous décrivons nos expériences sur la prise en compte du contexte source dans un système statistique traduisant de l’anglais vers le français, basé sur l’approche proposée par Stroppa et al. (2007). Nous étudions l’impact de différents types d’indices capturant l’information contextuelle, dont des dépendances syntaxiques typées. Si les mesures automatiques d’évaluation de la qualité d’une traduction ne révèlent pas de gains significatifs de notre système par rapport à un système à l’état de l’art ne faisant pas usage du contexte, une évaluation manuelle conduite sur 100 phrases choisies aléatoirement est en faveur de notre système. Cette évaluation fait également ressortir que la prise en compte de certaines dépendances syntaxiques est bénéfique à notre système. 2009.jeptalnrecital-long.16 @@ -249,7 +249,7 @@ Trouver et confondre les coupables : un processus sophistiqué de correction de lexique LionelNicolas - BenoîtSagot + BenoîtSagot MiguelA. Molinero JacquesFarré ÉricVillemonte De La Clergerie @@ -330,7 +330,7 @@ Actes de la 16ème conférence sur le Traitement Automatique des Langues Naturelles. Prise de position - AdelineNazarenko + AdelineNazarenko ThierryPoibeau ATALA
Senlis, France
@@ -376,7 +376,7 @@ Actes de la 16ème conférence sur le Traitement Automatique des Langues Naturelles. Articles courts - AdelineNazarenko + AdelineNazarenko ThierryPoibeau ATALA
Senlis, France
@@ -390,9 +390,9 @@ Adaptation de parsers statistiques lexicalisés pour le français : Une évaluation complète sur corpus arborés - DjaméSeddah - MarieCandito - BenoîtCrabbé + DjaméSeddah + MarieCandito + BenoîtCrabbé 1–10 Cet article présente les résultats d’une évaluation exhaustive des principaux analyseurs syntaxiques probabilistes dit “lexicalisés” initialement conçus pour l’anglais, adaptés pour le français et évalués sur le CORPUS ARBORÉ DU FRANÇAIS (Abeillé et al., 2003) et le MODIFIED FRENCH TREEBANK (Schluter & van Genabith, 2007). Confirmant les résultats de (Crabbé & Candito, 2008), nous montrons que les modèles lexicalisés, à travers les modèles de Charniak (Charniak, 2000), ceux de Collins (Collins, 1999) et le modèle des TIG Stochastiques (Chiang, 2000), présentent des performances moindres face à un analyseur PCFG à Annotation Latente (Petrov et al., 2006). De plus, nous montrons que le choix d’un jeu d’annotations issus de tel ou tel treebank oriente fortement les résultats d’évaluations tant en constituance qu’en dépendance non typée. Comparés à (Schluter & van Genabith, 2008; Arun & Keller, 2005), tous nos résultats sont state-of-the-art et infirment l’hypothèse d’une difficulté particulière qu’aurait le français en terme d’analyse syntaxique probabiliste et de sources de données. 2009.jeptalnrecital-court.1 @@ -401,7 +401,7 @@ Analyse automatique des noms déverbaux composés : pourquoi et comment faire interagir analogie et système de règles - FiammettaNamer + FiammettaNamer 11–20 Cet article aborde deux problèmes d’analyse morpho-sémantique du lexique : (1) attribuer automatiquement une définition à des noms et verbes morphologiquement construits inconnus des dictionnaires mais présents dans les textes ; (2) proposer une analyse combinant règles et analogie, deux techniques généralement contradictoires. Les noms analysés sont apparemment suffixés et composés (HYDROMASSAGE). La plupart d’entre eux, massivement attestés dans les documents (journaux, Internet) sont absents des dictionnaires. Ils sont souvent reliés à des verbes (HYDROMASSER) également néologiques. Le nombre de ces noms et verbes est estimé à 5.400. L’analyse proposée leur attribue une définition par rapport à leur base, et enrichit un lexique de référence pour le TALN au moyen de cette base, si elle est néologique. L’implémentation des contraintes linguistiques qui régissent ces formations est reproductible dans d’autres langues européennes où sont rencontrés les mêmes types de données dont l’analyse reflète le même raisonnement que pour le français. 2009.jeptalnrecital-court.2 @@ -430,18 +430,18 @@ <fixed-case>ANNODIS</fixed-case>: une approche outillée de l’annotation de structures discursives - Marie-PaulePéry-Woodley - NicholasAsher + Marie-PaulePéry-Woodley + NicholasAsher PatriceEnjalbert - FarahBenamara + FarahBenamara MyriamBras - CécileFabre - StéphaneFerrari - Lydia-MaiHo-Dac + CécileFabre + StéphaneFerrari + Lydia-MaiHo-Dac AnneLe Draoulec YannMathet PhilippeMuller - LaurentPrévot + LaurentPrévot JosetteRebeyrolle LudovicTanguy MarianneVergez-Couret @@ -455,7 +455,7 @@ Apport de la syntaxe dans un système de question-réponse : étude du système <fixed-case>FIDJI</fixed-case>. - VéroniqueMoriceau + VéroniqueMoriceau XavierTannier 47–56 Cet article présente une série d’évaluations visant à étudier l’apport d’une analyse syntaxique robuste des questions et des documents dans un système de questions-réponses. Ces évaluations ont été effectuées sur le système FIDJI, qui utilise à la fois des informations syntaxiques et des techniques plus “traditionnelles”. La sélection des documents, l’extraction de la réponse ainsi que le comportement selon les différents types de questions ont été étudiés. @@ -485,7 +485,7 @@ Association automatique de lemmes et de paradigmes de flexion à un mot inconnu - ClaudeDe Loupy + ClaudeDe Loupy MichaëlBagur HelenaBlancafort 77–86 @@ -498,7 +498,7 @@ Catégorisation sémantico-discursive des évaluations exprimées dans la blogosphère MatthieuVernier LauraMonceaux - BéatriceDaille + BéatriceDaille EstelleDubreil 87–96 Les blogs constituent un support d’observations idéal pour des applications liées à la fouille d’opinion. Toutefois, ils imposent de nouvelles problématiques et de nouveaux défis au regard des méthodes traditionnelles du domaine. De ce fait, nous proposons une méthode automatique pour la détection et la catégorisation des évaluations localement exprimées dans un corpus de blogs multi-domaine. Celle-ci rend compte des spécificités du langage évaluatif décrites dans deux théories linguistiques. L’outil développé au sein de la plateforme UIMA vise d’une part à construire automatiquement une grammaire du langage évaluatif, et d’autre part à utiliser cette grammaire pour la détection et la catégorisation des passages évaluatifs d’un texte. La catégorisation traite en particulier l’aspect axiologique de l’évaluation, sa configuration d’énonciation et sa modalité dans le discours. @@ -530,7 +530,7 @@ Détection des émotions à partir du contenu linguistique d’énoncés oraux : application à un robot compagnon pour enfants fragilisés MarcLe Tallec - JeanneVillaneau + JeanneVillaneau Jean-YvesAntoine AgataSavary ArielleSyssau-Vaccarella @@ -542,7 +542,7 @@ Dispersion sémantique dans des familles morpho-phonologiques : éléments théoriques et empiriques - NuriaGala + NuriaGala VéroniqueRey LaurentTichit 120–127 @@ -562,8 +562,8 @@ Exploitation du terrain commun pour la production d’expressions référentielles dans les systèmes de dialogue - AlexandreDenis - MatthieuQuignard + AlexandreDenis + MatthieuQuignard 138–147 Cet article présente un moyen de contraindre la production d’expressions référentielles par un système de dialogue en fonction du terrain commun. Cette capacité, fondamentale pour atteindre la compréhension mutuelle, est trop souvent oubliée dans les systèmes de dialogue. Le modèle que nous proposons s’appuie sur une modélisation du processus d’ancrage (grounding process) en proposant un raffinement du statut d’ancrage appliqué à la description des référents. Il décrit quand et comment ce statut doit être révisé en fonction des jugements de compréhension des deux participants ainsi que son influence dans le choix d’une description partagée destinée à la génération d’une expression référentielle. 2009.jeptalnrecital-court.16 @@ -574,7 +574,7 @@ Gestion de dialogue oral Homme-machine en arabe YounèsBahou AmineBayoudhi - LamiaHadrich Belguith + LamiaHadrich Belguith 148–157 Dans le présent papier, nous présentons nos travaux sur la gestion du dialogue oral arabe Homme-machine. Ces travaux entrent dans le cadre de la réalisation du serveur vocal interactif SARF (Bahou et al., 2008) offrant des renseignements sur le transport ferroviaire tunisien en langue arabe standard moderne. Le gestionnaire de dialogue que nous proposons est basé sur une approche structurelle et est composé de deux modèles à savoir, le modèle de tâche et le modèle de dialogue. Le premier modèle permet de i) compléter et vérifier l’incohérence des structures sémantiques représentant les sens utiles des énoncés, ii) générer une requête vers l’application et iii) récupérer le résultat et de formuler une réponse à l’utilisateur en langage naturel. Quant au modèle de dialogue, il assure l’avancement du dialogue avec l’utilisateur et l’identification de ses intentions. L’interaction entre ces deux modèles est assurée grâce à un contexte du dialogue permettant le suivi et la mise à jour de l’historique du dialogue. 2009.jeptalnrecital-court.17 @@ -604,7 +604,7 @@ Intégrer les tables du Lexique-Grammaire à un analyseur syntaxique robuste à grande échelle - BenoîtSagot + BenoîtSagot ElsaTolone 177–186 Dans cet article, nous montrons comment nous avons converti les tables du Lexique-Grammaire en un format TAL, celui du lexique Lefff, permettant ainsi son intégration dans l’analyseur syntaxique FRMG. Nous présentons les fondements linguistiques de ce processus de conversion et le lexique obtenu. Nous validons le lexique obtenu en évaluant l’analyseur syntaxique FRMG sur le corpus de référence de la campagne EASy selon qu’il utilise les entrées verbales du Lefff ou celles des tables des verbes du Lexique-Grammaire ainsi converties. @@ -672,7 +672,7 @@ Collecte et analyses de réponses naturelles pour les systèmes de questions-réponses AnneGarcia-Fernandez - SophieRosset + SophieRosset AnneVilnat 237–246 Notre travail se situe dans le cadre des systèmes de réponse a une question et à pour but de fournir une réponse en langue naturelle aux questions posées en langue naturelle. Cet article présente une expérience permettant d’analyser les réponses de locuteurs du français à des questions que nous leur posons. L’expérience se déroule à l’écrit comme à l’oral et propose à des locuteurs français des questions relevant de différents types sémantiques et syntaxiques. Nous mettons en valeur une large variabilité dans les formes de réponses possibles en langue française. D’autre part nous établissons un certain nombre de liens entre formulation de question et formulation de réponse. Nous proposons d’autre part une comparaison des réponses selon la modalité oral / écrit. Ces résultats peuvent être intégrés à des systèmes existants pour produire une réponse en langue naturelle de façon dynamique. @@ -691,7 +691,7 @@ Plusieurs langues (bien choisies) valent mieux qu’une : traduction statistique multi-source par renforcement lexical - Josep MariaCrego + Josep MariaCrego AurélienMax FrançoisYvon 253–262 @@ -719,9 +719,9 @@ Profilage de candidatures assisté par Relevance Feedback RémyKessler NicolasBéchet - Juan-ManuelTorres-Moreno + Juan-ManuelTorres-Moreno MathieuRoche - MarcEl-Bèze + MarcEl-Bèze 273–282 Le marché d’offres d’emploi et des candidatures sur Internet connaît une croissance exponentielle. Ceci implique des volumes d’information (majoritairement sous la forme de texte libre) qu’il n’est plus possible de traiter manuellement. Une analyse et catégorisation assistées nous semble pertinente en réponse à cette problématique. Nous proposons E-Gen, système qui a pour but l’analyse et catégorisation assistés d’offres d’emploi et des réponses des candidats. Dans cet article nous présentons plusieurs stratégies, reposant sur les modèles vectoriel et probabiliste, afin de résoudre la problématique du profilage des candidatures en fonction d’une offre précise. Nous avons évalué une palette de mesures de similarité afin d’effectuer un classement pertinent des candidatures au moyen des courbes ROC. L’utilisation d’une forme de relevance feedback a permis de surpasser nos résultats sur ce problème difficile et sujet à une grande subjectivité. 2009.jeptalnrecital-court.30 @@ -730,7 +730,7 @@ Profilage sémantique endogène des relations de synonymie au sein de Gene Ontology - ThierryHamon + ThierryHamon NataliaGrabar 283–292 Le calcul de la similarité sémantique entre les termes repose sur l’existence et l’utilisation de ressources sémantiques. Cependant de telles ressources, qui proposent des équivalences entre entités, souvent des relations de synonymie, doivent elles-mêmes être d’abord analysées afin de définir des zones de fiabilité où la similarité sémantique est plus forte. Nous proposons une méthode d’acquisition de synonymes élémentaires grâce à l’exploitation des terminologies structurées au travers l’analyse de la structure syntaxique des termes complexes et de leur compositionnalité. Les synonymes acquis sont ensuite profilés grâce aux indicateurs endogènes inférés automatiquement à partir de ces mêmes terminologies (d’autres types de relations, inclusions lexicales, productivité, forme des composantes connexes). Dans le domaine biomédical, il existe de nombreuses terminologies structurées qui peuvent être exploitées pour la constitution de ressources sémantiques. Le travail présenté ici exploite une de ces terminologies, Gene Ontology. @@ -755,7 +755,7 @@ AnneDister HubertNaets KévinMacé - CédrickFairon + CédrickFairon 301–310 Cet article présente Recto /Verso, un système de traitement automatique du langage dédié à l’application des rectifications orthographiques de 1990. Ce système a été développé dans le cadre de la campagne de sensibilisation réalisée en mars dernier par le Service et le Conseil de la langue française et de la politique linguistique de la Communauté française de Belgique. Nous commençons par rappeler les motivations et le contenu de la réforme proposée, et faisons le point sur les principes didactiques retenus dans le cadre de la campagne. La plus grande partie de l’article est ensuite consacrée à l’implémentation du système. Nous terminons enfin par une première analyse de l’impact de la campagne sur les utilisateurs. 2009.jeptalnrecital-court.33 @@ -768,7 +768,7 @@ LuitGazendam WillemijnHeeren RoelandOrdelman - HennieBrugman + HennieBrugman 311–320 Semantic access to multimedia content in audiovisual archives is to a large extent dependent on quantity and quality of the metadata, and particularly the content descriptions that are attached to the individual items. However, the manual annotation of collections puts heavy demands on resources. A large number of archives are introducing (semi) automatic annotation techniques for generating and/or enhancing metadata. The NWO funded CATCH-CHOICE project has investigated the extraction of keywords from textual resources related to TV programs to be archived (context documents), in collaboration with the Dutch audiovisual archives, Sound and Vision. This paper investigates the suitability of Automatic Speech Recognition transcripts produced in the CATCH-CHoral project for generating such keywords, which we evaluate against manual annotations of the documents, and against keywords automatically generated from context documents describing the TV programs’ content. 2009.jeptalnrecital-court.34 @@ -777,7 +777,7 @@ Résumé automatique multi-document et indépendance de la langue : une première évaluation en français FlorianBoudin - Juan-ManuelTorres-Moreno + Juan-ManuelTorres-Moreno 321–330 Le résumé automatique de texte est une problématique difficile, fortement dépendante de la langue et qui peut nécessiter un ensemble de données d’apprentissage conséquent. L’approche par extraction peut aider à surmonter ces difficultés. (Mihalcea, 2004) a démontré l’intérêt des approches à base de graphes pour l’extraction de segments de texte importants. Dans cette étude, nous décrivons une approche indépendante de la langue pour la problématique du résumé automatique multi-documents. L’originalité de notre méthode repose sur l’utilisation d’une mesure de similarité permettant le rapprochement de segments morphologiquement proches. De plus, c’est à notre connaissance la première fois que l’évaluation d’une approche de résumé automatique multi-document est conduite sur des textes en français. 2009.jeptalnrecital-court.35 @@ -798,9 +798,9 @@ Segmentation multiple d’un flux de données textuelles pour la modélisation statistique du langage SopheapSeng - LaurentBesacier + LaurentBesacier BrigitteBigi - EricCastelli + EricCastelli 337–346 Dans cet article, nous traitons du problème de la modélisation statistique du langage pour les langues peu dotées et sans segmentation entre les mots. Tandis que le manque de données textuelles a un impact sur la performance des modèles, les erreurs introduites par la segmentation automatique peuvent rendre ces données encore moins exploitables. Pour exploiter au mieux les données textuelles, nous proposons une méthode qui effectue des segmentations multiples sur le corpus d’apprentissage au lieu d’une segmentation unique. Cette méthode basée sur les automates d’état finis permet de retrouver les n-grammes non trouvés par la segmentation unique et de générer des nouveaux n-grammes pour l’apprentissage de modèle du langage. L’application de cette approche pour l’apprentissage des modèles de langage pour les systèmes de reconnaissance automatique de la parole en langue khmère et vietnamienne s’est montrée plus performante que la méthode par segmentation unique, à base de règles. 2009.jeptalnrecital-court.37 @@ -852,7 +852,7 @@ Une approche exploratoire de compression automatique de phrases basée sur des critères thermodynamiques SilviaFernández Sabido - Juan-ManuelTorres-Moreno + Juan-ManuelTorres-Moreno 387–393 Nous présentons une approche exploratoire basée sur des notions thermodynamiques de la Physique statistique pour la compression de phrases. Nous décrivons le modèle magnétique des verres de spins, adapté à notre conception de la problématique. Des simulations Métropolis Monte-Carlo permettent d’introduire des fluctuations thermiques pour piloter la compression. Des comparaisons intéressantes de notre méthode ont été réalisées sur un corpus en français. 2009.jeptalnrecital-court.42 @@ -902,7 +902,7 @@ Actes de la 16ème conférence sur le Traitement Automatique des Langues Naturelles. Démonstrations - AdelineNazarenko + AdelineNazarenko ThierryPoibeau ATALA
Senlis, France
@@ -917,7 +917,7 @@ <fixed-case>ACOLAD</fixed-case> un environnement pour l’édition de corpus de dépendances FrancisBrunet-Manquat - JérômeGoulian + JérômeGoulian 1–3 Dans cette démonstration, nous présentons le prototype d’un environnement open-source pour l’édition de corpus de dépendances. Cet environnement, nommé ACOLAD (Annotation de COrpus Linguistique pour l’Analyse de dépendances), propose des services manuels de segmentation et d’annotation multi-niveaux (segmentation en mots et en syntagmes minimaux (chunks), annotation morphosyntaxique des mots, annotation syntaxique des chunks et annotation syntaxique des dépendances entre mots ou entre chunks). 2009.jeptalnrecital-demonstration.1 @@ -964,7 +964,7 @@ <fixed-case>ASSIST</fixed-case> : un moteur de recherche spécialisé pour l’analyse des cadres d’expériences - DavyWeissenbacher + DavyWeissenbacher ElisaPieri SophiaAnaniadou BrianRea @@ -991,7 +991,7 @@ <fixed-case>CIFLI</fixed-case>-<fixed-case>S</fixed-case>urvi<fixed-case>T</fixed-case>ra, deux facettes : démonstrateur de composants de <fixed-case>TA</fixed-case> fondée sur <fixed-case>UNL</fixed-case>, et phrasebook multilingue GeorgesFafiotte AchilleFalaise - JérômeGoulian + JérômeGoulian 19–21 CIFLI-SurviTra (“Survival Translation” assistant) est une plate-forme destinée à favoriser l’ingénierie et la mise au point de composants UNL de TA, à partir d’une mémoire de traduction formée de livres de phrases multilingues avec variables lexicales. SurviTra est aussi un phrasebook digital multilingue, assistant linguistique pour voyageurs monolingues (français, hindi, tamoul, anglais) en situation de “survie linguistique”. Le corpus d’un domaine-pilote (“Restaurant”) a été structuré et construit : sous-domaines de phrases alignées et classes lexicales de locutions quadrilingues, graphes UNL, dictionnaires UW++/français et UW++/hindi par domaines. L’approche, générique, est applicable à d’autres langues. Le prototype d’assistant linguistique (application Web, à interface textuelle) peut évoluer vers une application UNL embarquée sur SmartPhone, avec Traitement de Parole et multimodalité. 2009.jeptalnrecital-demonstration.7 @@ -1013,7 +1013,7 @@ <fixed-case>EXCOM</fixed-case> : Plate-forme d’annotation sémantique de textes multilingues MotasemAlrahabi - Jean-PierreDesclés + Jean-PierreDesclés 25–27 Nous proposons une plateforme d‟annotation sémantique, appelée « EXCOM ». Basée sur la méthode de l‟ « Exploration Contextuelle », elle permet, à travers une diversité de langues, de procéder à des annotations automatiques de segments textuels par l’analyse des formes de surface dans leur contexte. Les textes sont traités selon des « points de vue » discursifs dont les valeurs sont organisées dans une « carte sémantique ». L‟annotation se base sur un ensemble de règles linguistiques, écrites par un analyste, qui permettent d‟identifier les représentations textuelles sous-jacentes aux différentes catégories de la carte. Le système offre, à travers deux types d‟interfaces (développeur ou utilisateur), une chaîne de traitements automatiques de textes qui comprend la segmentation, l‟annotation et d‟autres fonctionnalités de post-traitement. Les documents annotés peuvent être utilisés, par exemple, pour des systèmes de recherche d‟information, de veille, de classification ou de résumé automatique. 2009.jeptalnrecital-demonstration.9 @@ -1077,7 +1077,7 @@ Actes de la 16ème conférence sur le Traitement Automatique des Langues Naturelles. REncontres jeunes Chercheurs en Informatique pour le Traitement Automatique des Langues ThibaultMondary AurélienBossard - ThierryHamon + ThierryHamon ATALA
Senlis, France
June @@ -1171,7 +1171,7 @@
Combinaison de contenus encyclopédiques multilingues pour une reconnaissance d’entités nommées en contexte - EricCharton + EricCharton 91–100 Dans cet article, nous présentons une méthode de transformation de Wikipédia en ressource d’information externe pour détecter et désambiguïser des entités nommées, en milieu ouvert et sans apprentissage spécifique. Nous expliquons comment nous construisons notre système, puis nous utilisons cinq éditions linguistiques de Wikipédia afin d’enrichir son lexique. Pour finir nous réalisons une évaluation et comparons les performances du système avec et sans compléments lexicaux issus des informations inter-linguistiques, sur une tâche d’extraction d’entités nommées appliquée à un corpus d’articles journalistiques. 2009.jeptalnrecital-recital.10 diff --git a/data/xml/2009.mtsummit.xml b/data/xml/2009.mtsummit.xml index 77d71083d5..c78a1ef464 100644 --- a/data/xml/2009.mtsummit.xml +++ b/data/xml/2009.mtsummit.xml @@ -34,7 +34,7 @@ Panel Summary: Educating and Assessing the Human Translator in an Age of Technology PatriciaPhillips-Batoma - RoxanaGirju + RoxanaGirju ElizabethLowe PatriciaMinacori 2009.mtsummit-plenaries.4 @@ -49,7 +49,7 @@ Technology in Translator Training and tools for translators - PierretteBouillon + PierretteBouillon MarianneStarlander 2009.mtsummit-plenaries.6 bouillon-starlander-2009-technology @@ -126,7 +126,7 @@ Source-side Dependency Tree Reordering Models with Subtree Movements and Constraints NguyenBach QinGao - StephanVogel + StephanVogel 2009.mtsummit-papers.1 bach-etal-2009-source @@ -140,13 +140,13 @@ Normalization for Automated Metrics: <fixed-case>E</fixed-case>nglish and <fixed-case>A</fixed-case>rabic Speech Translation - SherriCondon - Gregory A.Sanders + SherriCondon + Gregory A.Sanders DanParvaz AlanRubenstein ChristyDoran JohnAberdeen - BeatriceOshika + BeatriceOshika 2009.mtsummit-papers.3 condon-etal-2009-normalization @@ -160,9 +160,9 @@
Reassessment of the Role of Phrase Extraction in <fixed-case>PBSMT</fixed-case> - FranciscoGuzman + FranciscoGuzman QinGao - StephanVogel + StephanVogel 2009.mtsummit-papers.5 guzman-etal-2009-reassessment @@ -192,23 +192,23 @@ Automatic Detection of Translated Text and its Impact on Machine Translation DavidKurokawa - CyrilGoutte + CyrilGoutte PierreIsabelle 2009.mtsummit-papers.9 kurokawa-etal-2009-automatic Improving a Lexicalized Hierarchical Reordering Model Using Maximum Entropy - Vinh VanNguyen + Vinh VanNguyen AkiraShimazu - Minh LeNguyen + Minh LeNguyen Thai PhuongNguyen 2009.mtsummit-papers.10 nguyen-etal-2009-improving User choice as an evaluation metric for web translation in cross language instant messaging applications - WilliamOgden + WilliamOgden RonZacharski SieunAn YukiIshikawa @@ -218,7 +218,7 @@ Prediction of Words in Statistical Machine Translation using a Multilayer Perceptron AlexandrePatry - PhilippeLanglais + PhilippeLanglais 2009.mtsummit-papers.12 patry-langlais-2009-prediction @@ -228,7 +228,7 @@ RichardRose HaniSafadi SamuelLarkin - GillesBoulianne + GillesBoulianne 2009.mtsummit-papers.13 reddy-etal-2009-incorporating @@ -241,7 +241,7 @@
Lemmatic Machine Translation - StephenSoderland + StephenSoderland ChristopherLim Mausam BoQin @@ -273,7 +273,7 @@ MasaoUtiyama DaisukeKawahara KeijiYasuda - EiichiroSumita + EiichiroSumita 2009.mtsummit-papers.18 utiyama-etal-2009-mining @@ -289,7 +289,7 @@ HongmeiZhao JunXie QunLiu - Yajuan + Yajuan DongdongZhang MuLi 2009.mtsummit-papers.20 @@ -298,7 +298,7 @@ Inducing translations from officially published materials in <fixed-case>C</fixed-case>anadian government websites QiboZhu - DianaInkpen + DianaInkpen AshAsudeh 2009.mtsummit-papers.21 zhu-etal-2009-inducing @@ -322,8 +322,8 @@ Extraction of Syntactic Translation Models from Parallel Data using Syntax from Source and Target Languages VamshiAmbati - AlonLavie - JaimeCarbonell + AlonLavie + JaimeCarbonell 2009.mtsummit-posters.2 ambati-etal-2009-extraction @@ -337,8 +337,8 @@ Reordering on <fixed-case>S</fixed-case>panish-<fixed-case>B</fixed-case>asque <fixed-case>SMT</fixed-case> ArantzaDíaz de Ilaraza - GorkaLabaka - KepaSarasola + GorkaLabaka + KepaSarasola 2009.mtsummit-posters.4 diaz-de-ilaraza-etal-2009-reordering @@ -351,7 +351,7 @@ Selective addition of corpus-extracted phrasal lexical rules to a rule-based machine translation system - LoicDugast + LoicDugast JeanSenellart PhilippKoehn 2009.mtsummit-posters.6 @@ -376,7 +376,7 @@ Harnessing the Redundant Results of Translation Spotting StéphaneHuet JulienBourdaillet - PhilippeLanglais + PhilippeLanglais GuyLapalme 2009.mtsummit-posters.9 huet-etal-2009-harnessing @@ -385,7 +385,7 @@ Development of a <fixed-case>J</fixed-case>apanese-<fixed-case>E</fixed-case>nglish Software Manual Parallel Corpus TatsuyaIshisaka MasaoUtiyama - EiichiroSumita + EiichiroSumita KazuhideYamamoto 2009.mtsummit-posters.10 ishisaka-etal-2009-development @@ -393,8 +393,8 @@ Word Alignment by Thresholded Two-Dimensional Normalization HamidrezaKobdani - AlexanderFraser - HinrichSchütze + AlexanderFraser + HinrichSchütze 2009.mtsummit-posters.11 kobdani-etal-2009-word @@ -433,9 +433,9 @@ Using Artificial Data to Compare the Difficulty of Using Statistical Machine Translation in Different Language-Pairs - MannyRayner + MannyRayner PaulaEstrella - PierretteBouillon + PierretteBouillon YukieNakao 2009.mtsummit-posters.16 rayner-etal-2009-using-artificial @@ -449,7 +449,7 @@ Using Percolated Dependencies for Phrase Extraction in <fixed-case>SMT</fixed-case> - AnkitSrivastava + AnkitSrivastava AndyWay 2009.mtsummit-posters.18 srivastava-way-2009-using @@ -477,32 +477,32 @@ Hosting Volunteer Translators MasaoUtiyama TakeshiAbekawa - EiichiroSumita + EiichiroSumita KyoKageura 2009.mtsummit-posters.22 utiyama-etal-2009-hosting Transfer rule generation for a <fixed-case>J</fixed-case>apanese-<fixed-case>H</fixed-case>ungarian machine translation system - IstvánVarga - ShoichiYokoyama + IstvánVarga + ShoichiYokoyama 2009.mtsummit-posters.23 varga-yokoyama-2009-transfer Efficient Beam Thresholding for Statistical Machine Translation - DeyiXiong + DeyiXiong MinZhang - AitiAw + AitiAw HaizhouLi 2009.mtsummit-posters.24 xiong-etal-2009-efficient A Source Dependency Model for Statistical Machine Translation - DeyiXiong + DeyiXiong MinZhang - AitiAw + AitiAw HaizhouLi 2009.mtsummit-posters.25 xiong-etal-2009-source @@ -510,13 +510,13 @@ Bilingual Dictionary Extraction from <fixed-case>W</fixed-case>ikipedia KunYu - JunichiTsujii + JunichiTsujii 2009.mtsummit-posters.26 yu-tsujii-2009-bilingual Virtual <fixed-case>B</fixed-case>abel: Towards Context-Aware Machine Translation in Virtual Worlds - YingZhang + YingZhang NguyenBach 2009.mtsummit-posters.27 zhang-bach-2009-virtual @@ -574,7 +574,7 @@ Real Time Translation Services at <fixed-case>IBM</fixed-case> DavidLubensky - SalimRoukos + SalimRoukos 2009.mtsummit-commercial.7 lubensky-roukos-2009-real @@ -615,7 +615,7 @@ On beyond <fixed-case>TM</fixed-case>: When the Translator Leads the Design of a Translation Support Framework ReginaldHobbs - ClareVoss + ClareVoss JamalLaoudi 2009.mtsummit-government.3 hobbs-etal-2009-beyond @@ -650,11 +650,11 @@ Translation Memory Technology Assessment - CarolVan Ess-Dykema + CarolVan Ess-Dykema DennisPerzanowsky - SusanConverse + SusanConverse RachelRichardson - John S.White + John S.White TuckerManey 2009.mtsummit-government.8 van-ess-dykema-etal-2009-translation @@ -676,7 +676,7 @@ Machine Learning Approaches for Dealing with Bilingual Data in Statistical Machine Translation - GholamrezaHaffari + GholamrezaHaffari haffari-2009-machine @@ -734,8 +734,8 @@ Disfluency and Out-of-vocabulary Word Processing in <fixed-case>A</fixed-case>rabic Speech Understanding YounèsBahou - LamiaHadrich Belguith - AbdelmajidBen Hamadou + LamiaHadrich Belguith + AbdelmajidBen Hamadou 2009.mtsummit-caasl.3 bahou-etal-2009-disfluency @@ -743,7 +743,7 @@ <fixed-case>NP</fixed-case> Subject Detection in Verb-initial <fixed-case>A</fixed-case>rabic Clauses SpenceGreen ConalSathi - Christopher D.Manning + Christopher D.Manning 2009.mtsummit-caasl.4 green-etal-2009-np @@ -762,7 +762,7 @@ A Unification based Approach to the Morphological Analysis and Generation of <fixed-case>A</fixed-case>rabic - SelçukKöprü + SelçukKöprü JudeMiller 2009.mtsummit-caasl.7 kopru-miller-2009-unification @@ -825,7 +825,7 @@ Meta-evaluation of Automatic Evaluation Methods for Machine using Patent Translation Data in <fixed-case>NTCIR</fixed-case>-7 - HiroshiEchizen-ya + HiroshiEchizen-ya TerumasaEhara SayoriShimohata AtsushiFujii @@ -839,10 +839,10 @@ The Construction of a <fixed-case>C</fixed-case>hinese-<fixed-case>E</fixed-case>nglish Patent Parallel Corpus BinLu - Benjamin K.Tsou + Benjamin K.Tsou JingboZhu TaoJiang - Oi YeeKwong + Oi YeeKwong 2009.mtsummit-wpt.3 lu-etal-2009-construction @@ -854,7 +854,7 @@ Translation Disambiguation of Patent Sentences using Case Frames - ShoichiYokoyama + ShoichiYokoyama MasumiOkuyama 2009.mtsummit-wpt.5 yokoyama-okuyama-2009-translation @@ -937,14 +937,14 @@ The Web as a Source of Informative Background Knowledge - CarolineBarrière + CarolineBarrière 2009.mtsummit-btm.2 barriere-2009-web A Web Service Enabling Gradable Post-edition of Pre-translations Produced by Existing Translation Tools: Practical Use to Provide High-quality Translation of an Online Encyclopedia - HervéBlanchon - ChristianBoitet + HervéBlanchon + ChristianBoitet Cong-PhapHuynh 2009.mtsummit-btm.3 blanchon-etal-2009-web @@ -966,7 +966,7 @@ Bitextor: a Free/Open-source Software to Harvest Translation Memories from Multilingual Websites - MiquelEsplà-Gomis + MiquelEsplà-Gomis 2009.mtsummit-btm.6 espla-gomis-2009-bitextor diff --git a/data/xml/2009.tal.xml b/data/xml/2009.tal.xml index d25eb8661d..e987149796 100644 --- a/data/xml/2009.tal.xml +++ b/data/xml/2009.tal.xml @@ -81,7 +81,7 @@ ChristineJacquin SimonPetitrenaud YannickEstève - BéatriceDaille + BéatriceDaille 201–225 2009.tal-1.8 fra @@ -98,9 +98,9 @@ Évaluation des outils terminologiques : enjeux, difficultés et propositions [Evaluation of terminological tools : challenges, problems and propositions] - AdelineNazarenko + AdelineNazarenko HaïfaZargayouna - OlivierHamon + OlivierHamon Jonathanvan Puymbrouck 257–281 2009.tal-1.10 @@ -170,7 +170,7 @@ Building a Corpus-based Historical <fixed-case>P</fixed-case>ortuguese Dictionary : Challenges and Opportunities Arnaldo JuniorCandido - Sandra MariaAluísio + Sandra MariaAluísio 73–102 2009.tal-2.4 candido-aluisio-2009-building @@ -272,7 +272,7 @@ Cross-framework parser stacking for data-driven dependency parsing - LiljaØvrelid + LiljaØvrelid JonasKuhn KathrinSpreyer 109–138 diff --git a/data/xml/2009.tc.xml b/data/xml/2009.tc.xml index cb2226c23f..7b9249b025 100644 --- a/data/xml/2009.tc.xml +++ b/data/xml/2009.tc.xml @@ -20,14 +20,14 @@ Towards an effective toolkit for translators - AndreasEisele + AndreasEisele 2009.tc-1.2 eisele-2009-towards Computer-aided translation backed by machine translation OndřejOdcházal - OndřejBojar + OndřejBojar 2009.tc-1.3 odchazal-bojar-2009-computer @@ -35,7 +35,7 @@ Minna no Hon’yaku: a website for hosting, archiving, and promoting translations MasaoUtiyama TakeshiAbekawa - EiichiroSumita + EiichiroSumita KyoKageura 2009.tc-1.4 utiyama-etal-2009-minna diff --git a/data/xml/2010.amta.xml b/data/xml/2010.amta.xml index bae0184cf8..991bbab168 100644 --- a/data/xml/2010.amta.xml +++ b/data/xml/2010.amta.xml @@ -54,7 +54,7 @@ Combining Confidence Estimation and Reference-based Metrics for Segment-level <fixed-case>MT</fixed-case> Evaluation LuciaSpecia - JesúsGiménez + JesúsGiménez 2010.amta-papers.3 We describe an effort to improve standard reference-based metrics for Machine Translation (MT) evaluation by enriching them with Confidence Estimation (CE) features and using a learning mechanism trained on human annotations. Reference-based MT evaluation metrics compare the system output against reference translations looking for overlaps at different levels (lexical, syntactic, and semantic). These metrics aim at comparing MT systems or analyzing the progress of a given system and are known to have reasonably good correlation with human judgments at the corpus level, but not at the segment level. CE metrics, on the other hand, target the system in use, providing a quality score to the end-user for each translated segment. They cannot rely on reference translations, and use instead information extracted from the input text, system output and possibly external corpora to train machine learning algorithms. These metrics correlate better with human judgments at the segment level. However, they are usually highly biased by difficulty level of the input segment, and therefore are less appropriate for comparing multiple systems translating the same input segments. We show that these two classes of metrics are complementary and can be combined to provide MT evaluation metrics that achieve higher correlation with human judgments at the segment level. specia-gimenez-2010-combining @@ -62,7 +62,7 @@ The Impact of <fixed-case>A</fixed-case>rabic Morphological Segmentation on Broad-coverage <fixed-case>E</fixed-case>nglish-to-<fixed-case>A</fixed-case>rabic Statistical Machine Translation HassanAl-Haj - AlonLavie + AlonLavie 2010.amta-papers.4 Morphologically rich languages pose a challenge for statistical machine translation (SMT). This challenge is magnified when translating into a morphologically rich language. In this work we address this challenge in the framework of a broad-coverage English-to-Arabic phrase based statistical machine translation (PBSMT). We explore the full spectrum of Arabic segmentation schemes ranging from full word form to fully segmented forms and examine the effects on system performance. Our results show a difference of 2.61 BLEU points between the best and worst segmentation schemes indicating that the choice of the segmentation scheme has a significant effect on the performance of a PBSMT system in a large data scenario. We also show that a simple segmentation scheme can perform as good as the best and more complicated segmentation scheme. We also report results on a wide set of techniques for recombining the segmented Arabic output. al-haj-lavie-2010-impact @@ -79,7 +79,7 @@ ArafatAhsan PrasanthKolachina SudheerKolachina - DiptiMisra + DiptiMisra RajeevSangal 2010.amta-papers.6 In this paper, we present the insights gained from a detailed study of coupling a highly modular English-Hindi RBMT system with a standard phrase-based SMT system. Coupling the RBMT and SMT systems at various stages in the RBMT pipeline, we observe the effects of the source transformations at each stage on the performance of the coupled MT system. We propose an architecture that systematically exploits the structural transfer and robust generation capabilities of the RBMT system. Working with the English-Hindi language pair, we show that the coupling configurations explored in our experiments help address different aspects of the typological divergence between these languages. In spite of working with very small datasets, we report significant improvements both in terms of BLEU (7.14 and 0.87 over the RBMT and the SMT baselines respectively) and subjective evaluation (relative decrease of 17% in SSER). @@ -87,14 +87,14 @@ Semantically-Informed Syntactic Machine Translation: A Tree-Grafting Approach - KathrynBaker + KathrynBaker MichaelBloodgood ChrisCallison-Burch - BonnieDorr + BonnieDorr NathanielFilardo - LoriLevin + LoriLevin ScottMiller - ChristinePiatko + ChristinePiatko 2010.amta-papers.7 We describe a unified and coherent syntactic framework for supporting a semantically-informed syntactic approach to statistical machine translation. Semantically enriched syntactic tags assigned to the target-language training texts improved translation quality. The resulting system significantly outperformed a linguistically naive baseline model (Hiero), and reached the highest scores yet reported on the NIST 2009 Urdu-English translation task. This finding supports the hypothesis (posed by many researchers in the MT community, e.g., in DARPA GALE) that both syntactic and semantic information are critical for improving translation quality—and further demonstrates that large gains can be achieved for low-resource languages with different word order than English. baker-etal-2010-semantically @@ -104,7 +104,7 @@ DanielStein StephanPeitz DavidVilar - HermannNey + HermannNey 2010.amta-papers.8 In this work we review and compare three additional syntactic enhancements for the hierarchical phrase-based translation model, which have been presented in the last few years. We compare their performance when applied separately and study whether the combination may yield additional improvements. Our findings show that the models are complementary, and their combination achieve an increase of 1% in BLEU and a reduction of nearly 2% in TER. The models presented in this work are made available as part of the Jane open source machine translation toolkit. stein-etal-2010-cocktail @@ -120,7 +120,7 @@ f-align: An Open-Source Alignment Tool for <fixed-case>LFG</fixed-case> f-Structures AntonBryl - Josefvan Genabith + Josefvan Genabith 2010.amta-papers.10 Lexical-Functional Grammar (LFG) f-structures (Kaplan and Bresnan, 1982) have attracted some attention in recent years as an intermediate data representation for statistical machine translation. So far, however, there are no alignment tools capable of aligning f-structures directly, and plain word alignment is used for this purpose. In this way no use is made of the structural information contained in f-structures. We present the first version of a specialized f-structure alignment open-source software. bryl-van-genabith-2010-f @@ -145,10 +145,10 @@ Using Sublexical Translations to Handle the <fixed-case>OOV</fixed-case> Problem in <fixed-case>MT</fixed-case> - Chung-chiHuang + Chung-chiHuang Ho-chingYen - Shih-tingHuang - JasonChang + Shih-tingHuang + JasonChang 2010.amta-papers.13 We introduce a method for learning to translate out-of-vocabulary (OOV) words. The method focuses on combining sublexical/constituent translations of an OOV to generate its translation candidates. In our approach, wild-card searches are formulated based on our OOV analysis, aimed at maximizing the probability of retrieving OOVs’ sublexical translations from existing resource of machine translation (MT) systems. At run-time, translation candidates of the unknown words are generated from their suitable sublexical translations and ranked based on monolingual and bilingual information. We have incorporated the OOV model into a state-of-the-art MT system and experimental results show that our model indeed helps to ease the negative impact of OOVs on translation quality, especially for sentences containing more OOVs (significant improvement). huang-etal-2010-using @@ -164,8 +164,8 @@ Detecting Cross-lingual Semantic Similarity Using Parallel <fixed-case>P</fixed-case>rop<fixed-case>B</fixed-case>anks ShuminWu - JinhoChoi - MarthaPalmer + JinhoChoi + MarthaPalmer 2010.amta-papers.15 This paper suggests a method for detecting cross-lingual semantic similarity using parallel PropBanks. We begin by improving word alignments for verb predicates generated by GIZA++ by using information available in parallel PropBanks. We applied the Kuhn-Munkres method to measure predicate-argument matching and improved verb predicate alignments by an F-score of 12.6%. Using the enhanced word alignments we checked the set of target verbs aligned to a specific source verb for semantic consistency. For a set of English verbs aligned to a Chinese verb, we checked if the English verbs belong to the same semantic class using an existing lexical database, WordNet. For a set of Chinese verbs aligned to an English verb we manually checked semantic similarity between the Chinese verbs within a set. Our results show that the verb sets we generated have a high correlation with semantic classes. This could potentially lead to an automatic technique for generating semantic classes for verbs. wu-etal-2010-detecting @@ -175,9 +175,9 @@ PratyushBanerjee JinhuaDu BaoliLi - SudipNaskar + SudipNaskar AndyWay - Josefvan Genabith + Josefvan Genabith 2010.amta-papers.16 This paper presents a set of experiments on Domain Adaptation of Statistical Machine Translation systems. The experiments focus on Chinese-English and two domain-specific corpora. The paper presents a novel approach for combining multiple domain-trained translation models to achieve improved translation quality for both domain-specific as well as combined sets of sentences. We train a statistical classifier to classify sentences according to the appropriate domain and utilize the corresponding domain-specific MT models to translate them. Experimental results show that the method achieves a statistically significant absolute improvement of 1.58 BLEU (2.86% relative improvement) score over a translation model trained on combined data, and considerable improvements over a model using multiple decoding paths of the Moses decoder, for the combined domain test set. Furthermore, even for domain-specific test sets, our approach works almost as well as dedicated domain-specific models and perfect classification. banerjee-etal-2010-combining @@ -186,7 +186,7 @@ Using Variable Decoding Weight for Language Model in Statistical Machine Translation BehrangMohit RebeccaHwa - AlonLavie + AlonLavie 2010.amta-papers.17 This paper investigates varying the decoder weight of the language model (LM) when translating different parts of a sentence. We determine the condition under which the LM weight should be adapted. We find that a better translation can be achieved by varying the LM weight when decoding the most problematic spot in a sentence, which we refer to as a difficult segment. Two adaptation strategies are proposed and compared through experiments. We find that adapting a different LM weight for every difficult segment resulted in the largest improvement in translation quality. mohit-etal-2010-using @@ -204,7 +204,7 @@ Maximizing <fixed-case>TM</fixed-case> Performance through Sub-Tree Alignment and <fixed-case>SMT</fixed-case> VentsislavZhechev - Josefvan Genabith + Josefvan Genabith 2010.amta-papers.19 With the steadily increasing demand for high-quality translation, the localisation industry is constantly searching for technologies that would increase translator throughput, in particular focusing on the use of high-quality Statistical Machine Translation (SMT) supplementing the established Translation Memory (TM) technology. In this paper, we present a novel modular approach that utilises state-of-the-art sub-tree alignment and SMT techniques to turn the fuzzy matches from a TM into near-perfect translations. Rather than relegate SMT to a last-resort status where it is only used should the TM system fail to produce the desired output, for us SMT is an integral part of the translation process that we rely on to obtain high-quality results. We show that the presented system consistently produces better-quality output than the TM and performs on par or better than the standalone SMT system. zhechev-van-genabith-2010-maximizing @@ -212,7 +212,7 @@ Choosing the Right Evaluation for Machine Translation: an Examination of Annotator and Automatic Metric Performance on Human Judgment Tasks MichaelDenkowski - AlonLavie + AlonLavie 2010.amta-papers.20 This paper examines the motivation, design, and practical results of several types of human evaluation tasks for machine translation. In addition to considering annotator performance and task informativeness over multiple evaluations, we explore the practicality of tuning automatic evaluation metrics to each judgment type in a comprehensive experiment using the METEOR-NEXT metric. We present results showing clear advantages of tuning to certain types of judgments and discuss causes of inconsistency when tuning to various judgment data, as well as sources of difficulty in the human evaluation tasks themselves. denkowski-lavie-2010-choosing @@ -229,7 +229,7 @@ A Source-side Decoding Sequence Model for Statistical Machine Translation MinweiFeng ArneMauser - HermannNey + HermannNey 2010.amta-papers.22 We propose a source-side decoding sequence language model for phrase-based statistical machine translation. This model is a reordering model in the sense that it helps the decoder find the correct decoding sequence. The model uses word-aligned bilingual training data. We show improved translation quality of up to 1.34% BLEU and 0.54% TER using this model compared to three other widely used reordering models. feng-etal-2010-source @@ -237,8 +237,8 @@ Supertags as Source Language Context in Hierarchical Phrase-Based <fixed-case>SMT</fixed-case> RejwanulHaque - SudipNaskar - Antalvan den Bosch + SudipNaskar + Antalvan den Bosch AndyWay 2010.amta-papers.23 Statistical machine translation (SMT) models have recently begun to include source context modeling, under the assumption that the proper lexical choice of the translation for an ambiguous word can be determined from the context in which it appears. Various types of lexical and syntactic features have been explored as effective source context to improve phrase selection in SMT. In the present work, we introduce lexico-syntactic descriptions in the form of supertags as source-side context features in the state-of-the-art hierarchical phrase-based SMT (HPB) model. These features enable us to exploit source similarity in addition to target similarity, as modelled by the language model. In our experiments two kinds of supertags are employed: those from lexicalized tree-adjoining grammar (LTAG) and combinatory categorial grammar (CCG). We use a memory-based classification framework that enables the efficient estimation of these features. Despite the differences between the two supertagging approaches, they give similar improvements. We evaluate the performance of our approach on an English-to-Dutch translation task, and report statistically significant improvements of 4.48% and 6.3% BLEU scores in translation quality when adding CCG and LTAG supertags, respectively, as context-informed features. @@ -277,7 +277,7 @@ YanjunMa JohannRoturier AndyWay - Josefvan Genabith + Josefvan Genabith 2010.amta-papers.27 We report findings from a user study with professional post-editors using a translation recommendation framework (He et al., 2010) to integrate Statistical Machine Translation (SMT) output with Translation Memory (TM) systems. The framework recommends SMT outputs to a TM user when it predicts that SMT outputs are more suitable for post-editing than the hits provided by the TM. We analyze the effectiveness of the model as well as the reaction of potential users. Based on the performance statistics and the users’ comments, we find that translation recommendation can reduce the workload of professional post-editors and improve the acceptance of MT in the localization industry. he-etal-2010-improving @@ -295,7 +295,7 @@ Improving Reordering in Statistical Machine Translation from <fixed-case>F</fixed-case>arsi EvgenyMatusov - SelçukKöprü + SelçukKöprü 2010.amta-papers.29 In this paper, we propose a novel model for scoring reordering in phrase-based statistical machine translation (SMT) and successfully use it for translation from Farsi into English and Arabic. The model replaces the distance-based distortion model that is widely used in most SMT systems. The main idea of the model is to penalize each new deviation from the monotonic translation path. We also propose a way for combining this model with manually created reordering rules for Farsi which try to alleviate the difference in sentence structure between Farsi and English/Arabic by changing the position of the verb. The rules are used in the SMT search as soft constraints. In the experiments on two general-domain translation tasks, the proposed penalty-based model improves the BLEU score by up to 1.5% absolute as compared to the baseline of monotonic translation, and up to 1.2% as compared to using the distance-based distortion model. matusov-kopru-2010-improving @@ -324,7 +324,7 @@ MatthiasHuck MartinRatajczak PatrickLehnen - HermannNey + HermannNey 2010.amta-papers.32 In this work we give a detailed comparison of the impact of the integration of discriminative and trigger-based lexicon models in state-of-the-art hierarchical and conventional phrase-based statistical machine translation systems. As both types of extended lexicon models can grow very large, we apply certain restrictions to discard some of the less useful information. We show how these restrictions facilitate the training of the extended lexicon models. We finally evaluate systems that incorporate both types of models with different restrictions on a large-scale translation task for the Arabic-English language pair. Our results suggest that extended lexicon models can be substantially reduced in size while still giving clear improvements in translation performance. huck-etal-2010-comparison @@ -342,7 +342,7 @@ Voting on N-grams for Machine Translation System Combination KennethHeafield - AlonLavie + AlonLavie 2010.amta-papers.34 System combination exploits differences between machine translation systems to form a combined translation from several system outputs. Core to this process are features that reward n-gram matches between a candidate combination and each system output. Systems differ in performance at the n-gram level despite similar overall scores. We therefore advocate a new feature formulation: for each system and each small n, a feature counts n-gram matches between the system and candidate. We show post-evaluation improvement of 6.67 BLEU over the best system on NIST MT09 Arabic-English test data. Compared to a baseline system combination scheme from WMT 2009, we show improvement in the range of 1 BLEU point. heafield-lavie-2010-voting @@ -366,7 +366,7 @@ Statistical Machine Translation of <fixed-case>E</fixed-case>nglish-<fixed-case>M</fixed-case>anipuri using Morpho-syntactic and Semantic Information - Thoudam DorenSingh + Thoudam DorenSingh SavajiBandyopadhyay 2010.amta-srw.1 English-Manipuri language pair is one of the rarely investigated with restricted bilingual resources. The development of a factored Statistical Machine Translation (SMT) system between English as source and Manipuri, a morphologically rich language as target is reported. The role of the suffixes and dependency relations on the source side and case markers on the target side are identified as important translation factors. The morphology and dependency relations play important roles to improve the translation quality. A parallel corpus of 10350 sentences from news domain is used for training and the system is tested with 500 sentences. Using the proposed translation factors, the output of the translation quality is improved as indicated by the BLEU score and subjective evaluation. @@ -394,7 +394,7 @@ Machine Translation between <fixed-case>H</fixed-case>ebrew and <fixed-case>A</fixed-case>rabic: Needs, Challenges and Preliminary Solutions ReshefShilon NizarHabash - AlonLavie + AlonLavie ShulyWintner 2010.amta-srw.4 Hebrew and Arabic are related but mutually incomprehensible languages with complex morphology and scarce parallel corpora. Machine translation between the two languages is therefore interesting and challenging. We discuss similarities and differences between Hebrew and Arabic, the benefits and challenges that they induce, respectively, and their implications for machine translation. We highlight the shortcomings of using English as a pivot language and advocate a direct, transfer-based and linguistically-informed (but still statistical, and hence scalable) approach. We report preliminary results of such a system that we are currently developing. @@ -556,9 +556,9 @@ Paralinguist Assessment Decision Factors For Machine Translation Output: A Case Study - CarolVan Ess-Dykema + CarolVan Ess-Dykema JocelynPhillips - FlorenceReeder + FlorenceReeder LaurieGerber 2010.amta-government.1 We describe a case study that presents a framework for examining whether Machine Translation (MT) output enables translation professionals to translate faster while at the same time producing better quality translations than without MT output. We seek to find decision factors that enable a translation professional, known as a Paralinguist, to determine whether MT output is of sufficient quality to serve as a “seed translation” for post-editors. The decision factors, unlike MT developers’ automatic metrics, must function without a reference translation. We also examine the correlation of MT developers’ automatic metrics with error annotators’ assessments of post-edited translations. @@ -612,7 +612,7 @@ Task-based evaluation methods for machine translation, in practice and theory - Judith L.Klavans + Judith L.Klavans A panel of industry and government experts will discuss ways in which they have applied task-based evaluation for Machine Translation and other language technologies in their organizations and share ideas for new methods that could be tried in the future. As part of the discussion, the panelists will address some of the following points: What task-based evaluation means within their organization, i.e., how task-based evaluation is defined; How task-based evaluation impacts the use of MT technologies in their work environment; Whether task-based evaluation correlates with MT developers' automated metrics and if not, how do we arrive at automated metrics that do correlate with the more expensive task-based evaluation; What "lessons-learned" resulted from the course of performing task-based evaluation; How task-based evaluations can be generalized to multiple workflow environments. klavans-2010-task @@ -666,7 +666,7 @@ Parallel Corpus Development at <fixed-case>NVTC</fixed-case> - CarolVan Ess-Dykema + CarolVan Ess-Dykema LaurieGerber 2010.amta-government.19 In this paper, we describe the methods used to develop an exchangeable translation memory bank of sentence-aligned Mandarin Chinese - English sentences. This effort is part of a larger effort, initiated by the National Virtual Translation Center (NVTC), to foster collaboration and sharing of translation memory banks across the Intelligence Community and the Department of Defense. In this paper, we describe our corpus creation process - a largely automated process - highlighting the human interventions that are still deemed necessary. We conclude with a brief discussion of how this work will affect plans for NVTC's new translation management workflow and future research to increase the performance of the automated components of the corpus creation process. @@ -703,7 +703,7 @@ Evaluating the Output of Machine Translation Systems - AlonLavie + AlonLavie 2010.amta-tutorials.4 lavie-2010-evaluating @@ -772,10 +772,10 @@ <fixed-case>W</fixed-case>iki<fixed-case>BABEL</fixed-case>: A System for Multilingual <fixed-case>W</fixed-case>ikipedia Content - A.Kumaran + A.Kumaran NarenDatha B.Ashok - K.Saravanan + K.Saravanan AnilAnde AshwaniSharma SridharVedantham diff --git a/data/xml/2010.eamt.xml b/data/xml/2010.eamt.xml index febbad754e..8aceef81fb 100644 --- a/data/xml/2010.eamt.xml +++ b/data/xml/2010.eamt.xml @@ -44,7 +44,7 @@ Can inversion transduction grammars generate hand alignments - AndersSøgaard + AndersSøgaard 2010.eamt-1.5 2010.eamt-1.5.Presentation.pdf sogaard-2010-inversion @@ -52,8 +52,8 @@ A fully unsupervised approach for mining parallel data from comparable corpora Thi Ngoc DiepDo - LaurentBesacier - EricCastelli + LaurentBesacier + EricCastelli 2010.eamt-1.6 2010.eamt-1.6.Presentation.pdf do-etal-2010-fully @@ -85,7 +85,7 @@ Query translation using <fixed-case>W</fixed-case>ikipedia-based resources for analysis and disambiguation - BenoitGaillard + BenoitGaillard MalekBoualem OlivierCollin 2010.eamt-1.10 @@ -100,15 +100,15 @@ Linguistic-based Evaluation Criteria to identify Statistical Machine Translation Errors MireiaFarrús - Marta R.Costa-jussà - José B.Mariño - José A. R.Fonollosa + Marta R.Costa-jussà + José B.Mariño + José A. R.Fonollosa 2010.eamt-1.12 farrus-etal-2010-linguistic Rule-based <fixed-case>B</fixed-case>reton to <fixed-case>F</fixed-case>rench machine translation - FrancisTyers + FrancisTyers 2010.eamt-1.13 tyers-2010-rule @@ -122,31 +122,31 @@ Robust Estimation of Feature Weights in Statistical Machine Translation CristinaEspaña-Bonet - LluísMàrquez + LluísMàrquez 2010.eamt-1.15 espana-bonet-marquez-2010-robust Potential scope of a fully-integrated architecture for speech translation AliciaPérez - María InésTorres - FranciscoCasacuberta + María InésTorres + FranciscoCasacuberta 2010.eamt-1.16 perez-etal-2010-potential Integration of statistical collocation segmentations in a phrase-based statistical machine translation system Marta R.Costa-jussa - VidasDaudaravicius - Rafael E.Banchs + VidasDaudaravicius + Rafael E.Banchs 2010.eamt-1.17 costa-jussa-etal-2010-integration On the Use of Confidence Measures within an Interactive-predictive Machine Translation System - JesúsGonzález-Rubio - DanielOrtíz-Martínez - FranciscoCasacuberta + JesúsGonzález-Rubio + DanielOrtíz-Martínez + FranciscoCasacuberta 2010.eamt-1.18 gonzalez-rubio-etal-2010-use @@ -163,15 +163,15 @@ KeiHashimoto YoshihikoNankaku KeiichiTokuda - GermánSanchis-Trilles + GermánSanchis-Trilles 2010.eamt-1.20 gomez-etal-2010-deterministic <fixed-case>E</fixed-case>nglish to <fixed-case>B</fixed-case>angla Phrase-Based Machine Translation - ZahurulIslam - JörgTiedemann - AndreasEisele + ZahurulIslam + JörgTiedemann + AndreasEisele 2010.eamt-1.21 islam-etal-2010-english @@ -201,7 +201,7 @@ Bridging the Gap – <fixed-case>E</fixed-case>uro<fixed-case>T</fixed-case>erm<fixed-case>B</fixed-case>ank Terminology Delivered to Users’ Environment TatianaGornostay - AndrejsVasiljevs + AndrejsVasiljevs SigneRirdance RobertsRozis 2010.eamt-1.25 @@ -210,7 +210,7 @@ Lattice Score Based Data Cleaning for Phrase-Based Statistical Machine Translation JieJiang - JulieCarson-Berndsen + JulieCarson-Berndsen AndyWay 2010.eamt-1.26 jiang-etal-2010-lattice @@ -218,29 +218,29 @@ Chunk-Based <fixed-case>EBMT</fixed-case> Jae DongKim - RalfBrown - JaimeCarbonell + RalfBrown + JaimeCarbonell 2010.eamt-1.27 kim-etal-2010-chunk Source reordering using <fixed-case>M</fixed-case>ax<fixed-case>E</fixed-case>nt classifiers and supertags MaximKhalilov - KhalilSima’an + KhalilSima’an 2010.eamt-1.28 khalilov-simaan-2010-source Domain Adaptation in Statistical Machine Translation using Factored Translation Models JanNiehues - AlexWaibel + AlexWaibel 2010.eamt-1.29 2010.eamt-1.29.Presentation.pdf niehues-waibel-2010-domain Online Language Model adaptation via N-gram Mixtures for Statistical Machine Translation - GermánSanchis-Trilles + GermánSanchis-Trilles MauroCettolo 2010.eamt-1.30 2010.eamt-1.30.Presentation.pdf @@ -267,7 +267,7 @@ Hierarchical Hybrid Translation between <fixed-case>E</fixed-case>nglish and <fixed-case>G</fixed-case>erman YuChen - AndreasEisele + AndreasEisele 2010.eamt-1.33 2010.eamt-1.33.Presentation.pdf chen-eisele-2010-hierarchical @@ -286,7 +286,7 @@ Using the Apertium <fixed-case>S</fixed-case>panish-<fixed-case>B</fixed-case>razilian <fixed-case>P</fixed-case>ortuguese machine translation system for localization FrançoisMasselot PetraRibiczey - GemaRamírez-Sánchez + GemaRamírez-Sánchez 2010.eamt-1.35 2010.eamt-1.35.Presentation.pdf masselot-etal-2010-using @@ -301,7 +301,7 @@ <fixed-case>H</fixed-case>aitian <fixed-case>C</fixed-case>reole: How to Build and Ship an <fixed-case>MT</fixed-case> Engine from Scratch in 4 days, 17 hours, & 30 minutes - WilliamLewis + WilliamLewis 2010.eamt-1.37 2010.eamt-1.37.Presentation.pdf lewis-2010-haitian @@ -316,9 +316,9 @@ A Bootstrapped Interlingua-Based <fixed-case>SMT</fixed-case> Architecture - MannyRayner + MannyRayner PaulaEstrella - PierretteBouillon + PierretteBouillon 2010.eamt-1.39 2010.eamt-1.39.Presentation.pdf rayner-etal-2010-bootstrapped @@ -326,8 +326,8 @@ Automatic Determination of Number of clusters for creating Templates in Example-Based Machine Translation RashmiGangadharaiah - RalfBrown - JaimeCarbonell + RalfBrown + JaimeCarbonell 2010.eamt-1.40 2010.eamt-1.40.Presentation.pdf gangadharaiah-etal-2010-automatic diff --git a/data/xml/2010.iwslt.xml b/data/xml/2010.iwslt.xml index fbaf7e204b..fedc7c9bb9 100644 --- a/data/xml/2010.iwslt.xml +++ b/data/xml/2010.iwslt.xml @@ -24,7 +24,7 @@ Resources for adding semantics to machine translation - JanHajič + JanHajič 2010.iwslt-keynotes.3 hajic-2010-resources @@ -49,9 +49,9 @@ Overview of the <fixed-case>IWSLT</fixed-case> 2010 evaluation campaign - MichaelPaul + MichaelPaul MarcelloFederico - SebastianStüker + SebastianStüker 3-27 2010.iwslt-evaluation.1 This paper gives an overview of the evaluation campaign results of the 7th International Workshop on Spoken Language Translation (IWSLT 2010)1. This year, we focused on three spoken language tasks: (1) public speeches on a variety of topics (TALK) from English to French, (2) spoken dialog in travel situations (DIALOG) between Chinese and English, and (3) traveling expressions (BTEC) from Arabic, Turkish, and French to English. In total, 28 teams (including 7 firsttime participants) took part in the shared tasks, submitting 60 primary and 112 contrastive runs. Automatic and subjective evaluations of the primary runs were carried out in order to investigate the impact of different communication modalities, spoken language styles and semantic context on automatic speech recognition (ASR) and machine translation (MT) system performances. @@ -60,7 +60,7 @@ <fixed-case>A</fixed-case>pp<fixed-case>T</fixed-case>ek’s <fixed-case>APT</fixed-case> machine translation system for <fixed-case>IWSLT</fixed-case> 2010 EvgenyMatusov - SelçukKöprü + SelçukKöprü 29-36 2010.iwslt-evaluation.2 In this paper, we describe AppTek’s new APT machine translation system that we employed in the IWSLT 2010 evaluation campaign. This year, we participated in the Arabic-to-English and Turkish-to-English BTEC tasks. We discuss the architecture of the system, the preprocessing steps and the experiments carried out during the campaign. We show that competitive translation quality can be obtained with a system that can be turned into a real-life product without much effort. @@ -78,7 +78,7 @@ N-gram-based machine translation enhanced with neural networks FranciscoZamora-Martinez - Maria JoseCastro-Bleda + Maria JoseCastro-Bleda HolgerSchwenk 45-52 2010.iwslt-evaluation.4 @@ -110,10 +110,10 @@ <fixed-case>I</fixed-case>2<fixed-case>R</fixed-case>’s machine translation system for <fixed-case>IWSLT</fixed-case> 2010 XiangyuDuan - RafaelBanchs + RafaelBanchs JunLang - DeyiXiong - AitiAw + DeyiXiong + AitiAw MinZhang HaizhouLi 67-72 @@ -129,7 +129,7 @@ WeiLuo HaitaoMi YangLiu - Yajuan + Yajuan QunLiu 73-79 2010.iwslt-evaluation.8 @@ -139,8 +139,8 @@ The <fixed-case>INESC</fixed-case>-<fixed-case>ID</fixed-case> machine translation system for the <fixed-case>IWSLT</fixed-case> 2010 WangLing TiagoLuís - JoãoGraça - LuísaCoheur + JoãoGraça + LuísaCoheur IsabelTrancoso 81-84 2010.iwslt-evaluation.9 @@ -150,14 +150,14 @@ <fixed-case>ITI</fixed-case>-<fixed-case>UPV</fixed-case> machine translation system for <fixed-case>IWSLT</fixed-case> 2010 GuillemGascó - VicentAlabau + VicentAlabau Jesús-AndrésFerrer - JesúsGonzález-Rubio - Martha-AliciaRocha - GermánSanchis-Trilles - FranciscoCasacuberta + JesúsGonzález-Rubio + Martha-AliciaRocha + GermánSanchis-Trilles + FranciscoCasacuberta JorgeGonzález - Joan-AndreuSánchez + Joan-AndreuSánchez 85-92 2010.iwslt-evaluation.10 This paper presents the submissions of the PRHLT group for the evaluation campaign of the International Workshop on Spoken Language Translation. We focus on the development of reliable translation systems between syntactically different languages (DIALOG task) and on the efficient training of SMT models in resource-rich scenarios (TALK task). @@ -170,17 +170,17 @@ TeresaHerrmann MichaelHeck ChristianHerff - AlexWaibel + AlexWaibel 93-98 2010.iwslt-evaluation.11 niehues-etal-2010-kit <fixed-case>LIG</fixed-case> statistical machine translation systems for <fixed-case>IWSLT</fixed-case> 2010 - LaurentBesacier + LaurentBesacier HaitemAfli Thi Ngoc DiepDo - HervéBlanchon + HervéBlanchon MarionPotet 99-104 2010.iwslt-evaluation.12 @@ -189,8 +189,8 @@ <fixed-case>LIMSI</fixed-case> @ <fixed-case>IWSLT</fixed-case> 2010 AlexandreAllauzen - Josep M.Crego - İlknur DurgarEl-Kahlout + Josep M.Crego + İlknur DurgarEl-Kahlout LeHai-Son GuillaumeWisniewski FrançoisYvon @@ -203,7 +203,7 @@ <fixed-case>LIUM</fixed-case>’s statistical machine translation system for <fixed-case>IWSLT</fixed-case> 2010 AnthonyRousseau LoïcBarrault - PaulDeléglise + PaulDeléglise YannickEstève 113-117 2010.iwslt-evaluation.14 @@ -214,7 +214,7 @@ The <fixed-case>MIRACL</fixed-case> <fixed-case>A</fixed-case>rabic-<fixed-case>E</fixed-case>nglish statistical machine translation system for <fixed-case>IWSLT</fixed-case> 2010 InesTurki Khemakhem SalmaJamoussi - AbdelmajidBen Hamadou + AbdelmajidBen Hamadou 119-125 2010.iwslt-evaluation.15 This paper describes the MIRACL statistical Machine Translation system and the improvements that were developed during the IWSLT 2010 evaluation campaign. We participated to the Arabic to English BTEC tasks using a phrase-based statistical machine translation approach. In this paper, we first discuss some challenges in translating from Arabic to English and we explore various techniques to improve performances on a such task. Next, we present our solution for disambiguating the output of an Arabic morphological analyzer. In fact, The Arabic morphological analyzer used produces all possible morphological structures for each word, with an unique correct proposition. In this work we exploit the Arabic-English alignment to choose the correct segmented form and the correct morpho-syntactic features produced by our morphological analyzer. @@ -223,7 +223,7 @@ The <fixed-case>MIT</fixed-case>-<fixed-case>LL</fixed-case>/<fixed-case>AFRL</fixed-case> <fixed-case>IWSLT</fixed-case>-2010 <fixed-case>MT</fixed-case> system WadeShen - TimothyAnderson + TimothyAnderson RaymondSlyh A. RyanAminzadeh 127-134 @@ -249,11 +249,11 @@ The <fixed-case>NICT</fixed-case> translation system for <fixed-case>IWSLT</fixed-case> 2010 - Chooi-LingGoh + Chooi-LingGoh TaroWatanabe - MichaelPaul + MichaelPaul AndrewFinch - EiichiroSumita + EiichiroSumita 139-146 2010.iwslt-evaluation.18 This paper describes NICT’s participation in the IWSLT 2010 evaluation campaign for the DIALOG translation (Chinese-English) and the BTEC (French-English) translation shared-tasks. For the DIALOG translation, the main challenge to this task is applying context information during translation. Context information can be used to decide on word choice and also to replace missing information during translation. We applied discriminative reranking using contextual information as additional features. In order to provide more choices for re-ranking, we generated n-best lists from multiple phrase-based statistical machine translation systems that varied in the type of Chinese word segmentation schemes used. We also built a model that merged the phrase tables generated by the different segmentation schemes. Furthermore, we used a lattice-based system combination model to combine the output from different systems. A combination of all of these systems was used to produce the n-best lists for re-ranking. For the BTEC task, a general approach that used latticebased system combination of two systems, a standard phrasebased system and a hierarchical phrase-based system, was taken. We also tried to process some unknown words by replacing them with the same words but different inflections that are known to the system. @@ -290,7 +290,7 @@ StephanPeitz DavidVilar JoernWuebker - HermannNey + HermannNey 163-168 2010.iwslt-evaluation.22 In this paper we describe the statistical machine translation system of the RWTH Aachen University developed for the translation task of the IWSLT 2010. This year, we participated in the BTEC translation task for the Arabic to English language direction. We experimented with two state-of-theart decoders: phrase-based and hierarchical-based decoders. Extensions to the decoders included phrase training (as opposed to heuristic phrase extraction) for the phrase-based decoder, and soft syntactic features for the hierarchical decoder. Additionally, we experimented with various rule-based and statistical-based segmenters for Arabic. Due to the different decoders and the different methodologies that we apply for segmentation, we expect that there will be complimentary variation in the results achieved by each system. The next step would be to exploit these variations and achieve better results by combining the systems. We try different strategies for system combination and report significant improvements over the best single system. @@ -325,11 +325,11 @@ <fixed-case>UPC</fixed-case>-<fixed-case>BMIC</fixed-case>-<fixed-case>VDU</fixed-case> system description for the <fixed-case>IWSLT</fixed-case> 2010: testing several collocation segmentations in a phrase-based <fixed-case>SMT</fixed-case> system - CarlosHenríquez - Marta R.Costa-jussà - VidasDaudaravicius - Rafael E.Banchs - José B.Mariño + CarlosHenríquez + Marta R.Costa-jussà + VidasDaudaravicius + Rafael E.Banchs + José B.Mariño 189-195 2010.iwslt-evaluation.26 This paper describes the UPC-BMIC-VMU participation in the IWSLT 2010 evaluation campaign. The SMT system is a standard phrase-based enriched with novel segmentations. These novel segmentations are computed using statistical measures such as Log-likelihood, T-score, Chi-squared, Dice, Mutual Information or Gravity-Counts. The analysis of translation results allows to divide measures into three groups. First, Log-likelihood, Chi-squared and T-score tend to combine high frequency words and collocation segments are very short. They improve the SMT system by adding new translation units. Second, Mutual Information and Dice tend to combine low frequency words and collocation segments are short. They improve the SMT system by smoothing the translation units. And third, GravityCounts tends to combine high and low frequency words and collocation segments are long. However, in this case, the SMT system is not improved. Thus, the road-map for translation system improvement is to introduce new phrases with either low frequency or high frequency words. It is hard to introduce new phrases with low and high frequency words in order to improve translation quality. Experimental results are reported in the French-to-English IWSLT 2010 evaluation where our system was ranked 3rd out of nine systems. @@ -338,7 +338,7 @@ <fixed-case>ILLC</fixed-case>-<fixed-case>U</fixed-case>v<fixed-case>A</fixed-case> machine translation system for the <fixed-case>IWSLT</fixed-case> 2010 evaluation MaximKhalilov - KhalilSima’an + KhalilSima’an 197-203 2010.iwslt-evaluation.27 khalilov-simaan-2010-illc @@ -390,8 +390,8 @@ Improved <fixed-case>V</fixed-case>ietnamese-<fixed-case>F</fixed-case>rench parallel corpus mining using <fixed-case>E</fixed-case>nglish language Thi Ngoc DiepDo - LaurentBesacier - EricCastelli + LaurentBesacier + EricCastelli 235-242 2010.iwslt-papers.4 do-etal-2010-improved @@ -407,7 +407,7 @@ The pay-offs of preprocessing for <fixed-case>G</fixed-case>erman-<fixed-case>E</fixed-case>nglish statistical machine translation - Ilknur DurgarEl-Kahlout + Ilknur DurgarEl-Kahlout FrancoisYvon 251-258 2010.iwslt-papers.6 @@ -417,7 +417,7 @@ A <fixed-case>B</fixed-case>ayesian model of bilingual segmentation for transliteration AndrewFinch - EiichiroSumita + EiichiroSumita 259-266 2010.iwslt-papers.7 finch-sumita-2010-bayesian @@ -425,7 +425,7 @@ Faster cube pruning AndreaGesmundo - JamesHenderson + JamesHenderson 267-274 2010.iwslt-papers.8 gesmundo-henderson-2010-faster @@ -433,7 +433,7 @@ Factor templates for factored machine translation models YvetteGraham - Josefvan Genabith + Josefvan Genabith 275-282 2010.iwslt-papers.9 graham-van-genabith-2010-factor @@ -452,7 +452,7 @@ CarmenHeger JoernWuebker DavidVilar - HermannNey + HermannNey 291-297 2010.iwslt-papers.11 Currently most state-of-the-art statistical machine translation systems present a mismatch between training and generation conditions. Word alignments are computed using the well known IBM models for single-word based translation. Afterwards phrases are extracted using extraction heuristics, unrelated to the stochastic models applied for finding the word alignment. In the last years, several research groups have tried to overcome this mismatch, but only with limited success. Recently, the technique of forced alignments has shown to improve translation quality for a phrase-based system, applying a more statistically sound approach to phrase extraction. In this work we investigate the first steps to combine forced alignment with a hierarchical model. Experimental results on IWSLT and WMT data show improvements in translation quality of up to 0.7% BLEU and 1.0% TER. @@ -462,8 +462,8 @@ Multi-pivot translation by system combination GregorLeusch AurélienMax - Josep MariaCrego - HermannNey + Josep MariaCrego + HermannNey 299-306 2010.iwslt-papers.12 This paper describes a technique to exploit multiple pivot languages when using machine translation (MT) on language pairs with scarce bilingual resources, or where no translation system for a language pair is available. The principal idea is to generate intermediate translations in several pivot languages, translate them separately into the target language, and generate a consensus translation out of these using MT system combination techniques. Our technique can also be applied when a translation system for a language pair is available, but is limited in its translation accuracy because of scarce resources. Using statistical MT systems for the 11 different languages of Europarl, we show experimentally that a direct translation system can be replaced by this pivot approach without a loss in translation quality if about six pivot languages are available. Furthermore, we can already improve an existing MT system by adding two pivot systems to it. The maximum improvement was found to be 1.4% abs. in BLEU in our experiments for 8 or more pivot languages. @@ -471,9 +471,9 @@ Real-time spoken language identification and recognition for speech-to-speech translation - Daniel Chung YongLim + Daniel Chung YongLim IanLane - AlexWaibel + AlexWaibel 307-312 2010.iwslt-papers.13 lim-etal-2010-real @@ -482,8 +482,8 @@ Towards a general and extensible phrase-extraction algorithm WangLing TiagoLuís - JoãoGraça - LuísaCoheur + JoãoGraça + LuísaCoheur IsabelTrancoso 313-320 2010.iwslt-papers.14 @@ -511,7 +511,7 @@ Sign language machine translation overkill DanielStein ChristophSchmidt - HermannNey + HermannNey 337-344 2010.iwslt-papers.17 Sign languages represent an interesting niche for statistical machine translation that is typically hampered by the scarceness of suitable data, and most papers in this area apply only a few, well-known techniques and do not adapt them to small-sized corpora. In this paper, we will propose new methods for common approaches like scaling factor optimization and alignment merging strategies which helped improve our baseline. We also conduct experiments with different decoders and employ state-of-the-art techniques like soft syntactic labels as well as trigger-based and discriminative word lexica and system combination. All methods are evaluated on one of the largest sign language corpora available. @@ -522,7 +522,7 @@ DavidVilar DanielStein StephanPeitz - HermannNey + HermannNey 345-352 2010.iwslt-papers.18 vilar-etal-2010-parser diff --git a/data/xml/2010.jeptalnrecital.xml b/data/xml/2010.jeptalnrecital.xml index b8f4e74971..19c016dbb3 100644 --- a/data/xml/2010.jeptalnrecital.xml +++ b/data/xml/2010.jeptalnrecital.xml @@ -3,7 +3,7 @@ Actes de la 17e conférence sur le Traitement Automatique des Langues Naturelles. Conférences invitées - PhilippeLanglais + PhilippeLanglais MichelGagnon ATALA
Montréal, Canada
@@ -17,9 +17,9 @@ La phraséologie en langue, en dictionnaire et en <fixed-case>TALN</fixed-case> - IgorMel’čuk + IgorMel’čuk 1–14 - + 2010.jeptalnrecital-invite.1 fra melcuk-2010-la @@ -45,7 +45,7 @@ Actes de la 17e conférence sur le Traitement Automatique des Langues Naturelles. Articles longs - PhilippeLanglais + PhilippeLanglais MichelGagnon ATALA
Montréal, Canada
@@ -80,7 +80,7 @@ Exploitation d’une ressource lexicale pour la construction d’un étiqueteur morpho-syntaxique état-de-l’art du français PascalDenis - BenoîtSagot + BenoîtSagot 21–30 Cet article présente MEltfr, un étiqueteur morpho-syntaxique automatique du français. Il repose sur un modèle probabiliste séquentiel qui bénéficie d’informations issues d’un lexique exogène, à savoir le Lefff. Evalué sur le FTB, MEltfr atteint un taux de précision de 97.75% (91.36% sur les mots inconnus) sur un jeu de 29 étiquettes. Ceci correspond à une diminution du taux d’erreur de 18% (36.1% sur les mots inconnus) par rapport au même modèle sans couplage avec le Lefff. Nous étudions plus en détail la contribution de cette ressource, au travers de deux séries d’expériences. Celles-ci font apparaître en particulier que la contribution des traits issus du Lefff est de permettre une meilleure couverture, ainsi qu’une modélisation plus fine du contexte droit des mots. 2010.jeptalnrecital-long.3 @@ -109,7 +109,7 @@ Une approche cognitive de la fouille de grandes collections de documents - AdilEl Ghali + AdilEl Ghali YannVigile Hoareau 51–60 La récente éclosion du Web2.0 engendre un accroissement considérable de volumes textuels et intensifie ainsi l’importance d’une réflexion sur l’exploitation des connaissances à partir de grandes collections de documents. Dans cet article, nous présentons une approche de rechercher d’information qui s’inspire des certaines recherches issues de la psychologie cognitive pour la fouille de larges collections de documents. Nous utilisons un document comme requête permettant de récupérer des informations à partir d’une collection représentée dans un espace sémantique. Nous définissons les notions d’identité sémantique et de pollution sémantique dans un espace de documents. Nous illustrons notre approche par la description d’un système appelé BRAT (Blogosphere Random Analysis using Texts) basé sur les notions préalablement introduites d’identité et de pollution sématique appliquées à une tâche d’identification des actualités dans la blogosphère mondiale lors du concours TREC’09. Les premiers résultats produits sont tout à fait encourageant et indiquent les pistes des recherches à mettre en oeuvre afin d’améliorer les performances de BRAT. @@ -132,7 +132,7 @@ Approche quantitative en syntaxe : l’exemple de l’alternance de position de l’adjectif épithète en français JulietteThuilier GwendolineFox - BenoîtCrabbé + BenoîtCrabbé 71–80 Cet article présente une analyse statistique sur des données de syntaxe qui a pour but d’aider à mieux cerner le phénomène d’alternance de position de l’adjectif épithète par rapport au nom en français. Nous montrons comment nous avons utilisé les corpus dont nous disposons (French Treebank et le corpus de l’Est-Républicain) ainsi que les ressources issues du traitement automatique des langues, pour mener à bien notre étude. La modélisation à partir de 13 variables relevant principalement des propriétés du syntagme adjectival, de celles de l’item adjectival, ainsi que de contraintes basées sur la fréquence, permet de prédire à plus de 93% la position de l’adjectif. Nous insistons sur l’importance de contraintes relevant de l’usage pour le choix de la position de l’adjectif, notamment à travers la fréquence d’occurrence de l’adjectif, et la fréquence de contextes dans lesquels il apparaît. 2010.jeptalnrecital-long.8 @@ -170,8 +170,8 @@ Une approche hybride traduction/correction pour la normalisation des <fixed-case>SMS</fixed-case> RichardBeaufort SophieRoekhaut - Louise-AmélieCougnon - CédrickFairon + Louise-AmélieCougnon + CédrickFairon 111–120 Cet article présente une méthode hybride de normalisation des SMS, à mi-chemin entre correction orthographique et traduction automatique. La partie du système qui assure la normalisation utilise exclusivement des modèles entraînés sur corpus. Evalué en français par validation croisée, le système obtient un taux d’erreur au mot de 9.3% et un score BLEU de 0.83. 2010.jeptalnrecital-long.12 @@ -191,9 +191,9 @@ Extension d’un système d’étiquetage d’entités nommées en étiqueteur sémantique - EricCharton + EricCharton MichelGagnon - BenoitOzell + BenoitOzell 131–140 L’étiquetage sémantique consiste à associer un ensemble de propriétés à une séquence de mots contenue dans un texte. Bien que proche de la tâche d’étiquetage par entités nommées, qui revient à attribuer une classe de sens à un mot, la tâche d’étiquetage ou d’annotation sémantique cherche à établir la relation entre l’entité dans son texte et sa représentation ontologique. Nous présentons un étiqueteur sémantique qui s’appuie sur un étiqueteur d’entités nommées pour mettre en relation un mot ou un groupe de mots avec sa représentation ontologique. Son originalité est d’utiliser une ontologie intermédiaire de nature statistique pour établir ce lien. 2010.jeptalnrecital-long.14 @@ -211,8 +211,8 @@ Anatomie des structures énumératives - Lydia-MaiHo-Dac - Marie-PaulePéry-Woodley + Lydia-MaiHo-Dac + Marie-PaulePéry-Woodley LudovicTanguy 151–160 Cet article présente les premiers résultats d’une campagne d’annotation de corpus à grande échelle réalisée dans le cadre du projet ANNODIS. Ces résultats concernent la partie descendante du dispositif d’annotation, et plus spécifiquement les structures énumératives. Nous nous intéressons à la structuration énumérative en tant que stratégie de base de mise en texte, apparaissant à différents niveaux de granularité, associée à différentes fonctions discursives, et signalée par des indices divers. Avant l’annotation manuelle, une étape de pré-traitement a permis d’obtenir le marquage systématique de traits associés à la signalisation de l’organisation du discours. Nous décrivons cette étape de marquage automatique, ainsi que la procédure d’annotation. Nous proposons ensuite une première typologie des structures énumératives basée sur la description quantitative des données annotées manuellement, prenant en compte la couverture textuelle, la composition et les types d’indices. @@ -224,7 +224,7 @@ Identification des actants et circonstants par apprentissage machine FadilaHadouche GuyLapalme - Marie-ClaudeL’Homme + Marie-ClaudeL’Homme 161–170 Dans cet article, nous traitons de l’identification automatique des participants actants et circonstants de lexies prédicatives verbales tirées d’un corpus spécialisé en langue française. Les actants contribuent à la réalisation du sens de la lexie alors que les circonstants sont optionnels : ils ajoutent une information supplémentaire qui ne fait pas partie intégrante du sémantisme de la lexie. Nous proposons une classification de ces participants par apprentissage machine basée sur un corpus de lexies verbales du domaine de l’informatique, lexies qui ont été annotées manuellement avec des rôles sémantiques. Nous présentons des features qui nous permettent d’identifier les participants et de distinguer les actants des circonstants. 2010.jeptalnrecital-long.17 @@ -247,7 +247,7 @@ Classification du genre vidéo reposant sur des transcriptions automatiques StanislasOger MickaelRouvier - GeorgesLinarès + GeorgesLinarès 181–190 Dans cet article nous proposons une nouvelle méthode pour l’identification du genre vidéo qui repose sur une analyse de leur contenu linguistique. Cette approche consiste en l’analyse des mots apparaissant dans les transcriptions des pistes audio des vidéos, obtenues à l’aide d’un système de reconnaissance automatique de la parole. Les expériences sont réalisées sur un corpus composé de dessins animés, de films, de journaux télévisés, de publicités, de documentaires, d’émissions de sport et de clips de musique. L’approche proposée permet d’obtenir un taux de bonne classification de 74% sur cette tâche. En combinant cette approche avec des méthodes reposant sur des paramètres acoustiques bas-niveau, nous obtenons un taux de bonne classification de 95%. 2010.jeptalnrecital-long.19 @@ -268,7 +268,7 @@ Traitement des disfluences dans le cadre de la compréhension automatique de l’oral arabe spontané YounèsBahou AbirMasmoudi - LamiaHadrich Belguith + LamiaHadrich Belguith 201–210 Les disfluences inhérents de toute parole spontanée sont un vrai défi pour les systèmes de compréhension de la parole. Ainsi, nous proposons dans cet article, une méthode originale pour le traitement des disfluences (plus précisément, les autocorrections, les répétitions, les hésitations et les amorces) dans le cadre de la compréhension automatique de l’oral arabe spontané. Notre méthode est basée sur une analyse à la fois robuste et partielle, des énoncés oraux arabes. L’idée consiste à combiner une technique de reconnaissance de patrons avec une analyse sémantique superficielle par segments conceptuels. Cette méthode a été testée à travers le module de compréhension du système SARF, un serveur vocal interactif offrant des renseignements sur le transport ferroviaire tunisien (Bahou et al., 2008). Les résultats d’évaluation de ce module montrent que la méthode proposée est très prometteuse. En effet, les mesures de rappel, de précision et de F-Measure sont respectivement de 79.23%, 74.09% et 76.57%. 2010.jeptalnrecital-long.21 @@ -278,7 +278,7 @@ Utilisation de relations sémantiques pour améliorer la segmentation thématique de documents télévisuels CamilleGuinaudeau - GuillaumeGravier + GuillaumeGravier PascaleSébillot 211–220 Les méthodes de segmentation thématique exploitant une mesure de la cohésion lexicale peuvent être appliquées telles quelles à des transcriptions automatiques de programmes télévisuels. Cependant, elles sont moins efficaces dans ce contexte, ne prenant en compte ni les particularités des émissions TV, ni celles des transcriptions. Nous étudions ici l’apport de relations sémantiques pour rendre les techniques de segmentation thématique plus robustes. Nous proposons une méthode pour exploiter ces relations dans une mesure de la cohésion lexicale et montrons qu’elles permettent d’augmenter la F1-mesure de +1.97 et +11.83 sur deux corpus composés respectivement de 40h de journaux télévisés et de 40h d’émissions de reportage. Ces améliorations démontrent que les relations sémantiques peuvent rendre les méthodes de segmentation moins sensibles aux erreurs de transcription et au manque de répétitions constaté dans certaines émissions télévisées. @@ -290,7 +290,7 @@ Une évaluation de l’impact des types de textes sur la tâche de segmentation thématique ClémentineAdam PhilippeMuller - CécileFabre + CécileFabre 221–230 Cette étude a pour but de contribuer à la définition des objectifs de la segmentation thématique (ST), en incitant à prendre en considération le paramètre du type de textes dans cette tâche. Notre hypothèse est que, si la ST est certes pertinente pour traiter certains textes dont l’organisation est bien thématique, elle n’est pas adaptée à la prise en compte d’autres modes d’organisation (temporelle, rhétorique), et ne peut pas être appliquée sans précaution à des textes tout-venants. En comparant les performances d’un système de ST sur deux corpus, à organisation thématique “forte” et “faible”, nous montrons que cette tâche est effectivement sensible à la nature des textes. 2010.jeptalnrecital-long.23 @@ -310,10 +310,10 @@ Évaluation automatique de résumés avec et sans référence - Juan-ManuelTorres-Moreno + Juan-ManuelTorres-Moreno HoracioSaggion - Iriada Cunha - PatriciaVelázquez-Morales + Iriada Cunha + PatriciaVelázquez-Morales EricSanjuan 241–251 Nous étudions différentes méthodes d’évaluation de résumé de documents basées sur le contenu. Nous nous intéressons en particulier à la corrélation entre les mesures d’évaluation avec et sans référence humaine. Nous avons développé FRESA, un nouveau système d’évaluation fondé sur le contenu qui calcule les divergences entre les distributions de probabilité. Nous appliquons notre système de comparaison aux diverses mesures d’évaluation bien connues en résumé de texte telles que la Couverture, Responsiveness, Pyramids et Rouge en étudiant leurs associations dans les tâches du résumé multi-document générique (francais/anglais), focalisé (anglais) et résumé mono-document générique (français/espagnol). @@ -335,7 +335,7 @@ Comment formule-t-on une réponse en langue naturelle ? AnneGarcia-Fernandez - SophieRosset + SophieRosset AnneVilnat 262–271 Cet article présente l’étude d’un corpus de réponses formulées par des humains à des questions factuelles. Des observations qualitatives et quantitatives sur la reprise d’éléments de la question dans les réponses sont exposées. La notion d’information-réponse est introduite et une étude de la présence de cet élément dans le corpus est proposée. Enfin, les formulations des réponses sont étudiées. @@ -346,8 +346,8 @@ Apprentissage non supervisé pour la traduction automatique : application à un couple de langues peu doté ThiNgoc Diep - LaurentBesacier - EricCastelli + LaurentBesacier + EricCastelli 272–281 Cet article présente une méthode non-supervisée pour extraire des paires de phrases parallèles à partir d’un corpus comparable. Un système de traduction automatique est utilisé pour exploiter le corpus comparable et détecter les paires de phrases parallèles. Un processus itératif est exécuté non seulement pour augmenter le nombre de paires de phrases parallèles extraites, mais aussi pour améliorer la qualité globale du système de traduction. Une comparaison avec une méthode semi-supervisée est présentée également. Les expériences montrent que la méthode non-supervisée peut être réellement appliquée dans le cas où on manque de données parallèles. Bien que les expériences préliminaires soient menées sur la traduction français-anglais, cette méthode non-supervisée est également appliquée avec succès à un couple de langues peu doté : vietnamien-français. 2010.jeptalnrecital-long.28 @@ -394,7 +394,7 @@ Recherche contextuelle d’équivalents en banque de terminologie - CarolineBarrière + CarolineBarrière 321–330 Notre recherche démontre que l’utilisation du contenu d’un texte à traduire permet de mieux cibler dans une banque de terminologie les équivalents terminologiques pertinents à ce texte. Une banque de terminologie a comme particularité qu’elle catégorise ses entrées (fiches) en leur assignant un ou des domaines provenant d’une liste de domaines préétablie. La stratégie ici présentée repose sur l’utilisation de cette information sur les domaines. Un algorithme a été développé pour l’assignation automatique d’un profil de domaines à un texte. Celui-ci est combiné à un algorithme d’appariement entre les domaines d’un terme présent dans la banque de terminologie et le profil de domaines du texte. Pour notre expérimentation, des résumés bilingues (français et anglais) provenant de huit revues scientifiques nous fournissent un ensemble de 1130 paires d’équivalents terminologiques et le Grand Dictionnaire Terminologique (Office Québécois de la Langue Française) nous sert de ressource terminologique. Sur notre ensemble, nous démontrons une réduction de 75% du rang moyen de l’équivalent correct en comparaison avec un choix au hasard. 2010.jeptalnrecital-long.33 @@ -403,7 +403,7 @@ Réécriture de graphes de dépendances pour l’interface syntaxe-sémantique - GuillaumeBonfante + GuillaumeBonfante BrunoGuillaume MathieuMorey GuyPerrier @@ -416,7 +416,7 @@ Évaluer des annotations manuelles dispersées : les coefficients sont-ils suffisants pour estimer l’accord inter-annotateurs ? KarënFort - ClaireFrançois + ClaireFrançois MahaGhribi 341–350 L’objectif des travaux présentés dans cet article est l’évaluation de la qualité d’annotations manuelles de relations de renommage de gènes dans des résumés scientifiques, annotations qui présentent la caractéristique d’être très dispersées. Pour cela, nous avons calculé et comparé les coefficients les plus communément utilisés, entre autres kappa (Cohen, 1960) et pi (Scott, 1955), et avons analysé dans quelle mesure ils sont adaptés à nos données. Nous avons également étudié les différentes pondérations applicables à ces coefficients permettant de calculer le kappa pondéré (Cohen, 1968) et l’alpha (Krippendorff, 1980, 2004). Nous avons ainsi étudié le biais induit par la grande prévalence d’une catégorie et défini un mode de calcul des distances entre catégories reposant sur les annotations réalisées. @@ -426,7 +426,7 @@ An empirical study of maximum entropy approach for part-of-speech tagging of <fixed-case>V</fixed-case>ietnamese texts - PhuongLe-Hong + PhuongLe-Hong AzimRoussanaly ThiMinh Huyen Nguyen MathiasRossignol @@ -466,7 +466,7 @@ Développement de ressources pour le persan: lexique morphologique et chaîne de traitements de surface - BenoîtSagot + BenoîtSagot GéraldineWalther 393–402 Nous présentons PerLex, un lexique morphologique du persan à large couverture et librement disponible, accompagné d’une chaîne de traitements de surface pour cette langue. Nous décrivons quelques caractéristiques de la morphologie du persan, et la façon dont nous l’avons représentée dans le formalisme lexical Alexina, sur lequel repose PerLex. Nous insistons sur la méthodologie que nous avons employée pour construire les entrées lexicales à partir de diverses sources, ainsi que sur les problèmes liés à la normalisation typographique. Le lexique obtenu a une couverture satisfaisante sur un corpus de référence, et devrait donc constituer un bon point de départ pour le développement d’un lexique syntaxique du persan. @@ -478,7 +478,7 @@ Actes de la 17e conférence sur le Traitement Automatique des Langues Naturelles. Articles courts - PhilippeLanglais + PhilippeLanglais MichelGagnon ATALA
Montréal, Canada
@@ -525,7 +525,7 @@ Constitution d’une ressource sémantique issue du treillis des catégories de <fixed-case>W</fixed-case>ikipedia OlivierCollin - BenoîtGaillard + BenoîtGaillard Jean-LéonBouraoui 20–25 Le travail présenté dans cet article s’inscrit dans le thème de l’acquisition automatique de ressources sémantiques s’appuyant sur les données de Wikipedia. Nous exploitons le graphe des catégories associées aux pages de Wikipedia à partir duquel nous extrayons une hiérarchie de catégories parentes, sémantiquement et thématiquement liées. Cette extraction est le résultat d’une stratégie de plus court chemin appliquée au treillis global des catégories. Chaque page peut ainsi être représentée dans l’espace de ses catégories propres, ainsi que des catégories parentes. Nous montrons la possibilité d’utiliser cette ressource pour deux applications. La première concerne l’indexation et la classification des pages de Wikipedia. La seconde concerne la désambiguïsation dans le cadre d’un traducteur de requêtes français/anglais. Ce dernier travail a été réalisé en exploitant les catégories des pages anglaises. @@ -536,7 +536,7 @@ Ponctuations fortes abusives LaurenceDanlos - BenoîtSagot + BenoîtSagot 26–31 Certaines ponctuations fortes sont « abusivement » utilisées à la place de ponctuations faibles, débouchant sur des phrases graphiques qui ne sont pas des phrases grammaticales. Cet article présente une étude sur corpus de ce phénomène et une ébauche d’outil pour repérer automatiquement les ponctuations fortes abusives. 2010.jeptalnrecital-court.5 @@ -545,7 +545,7 @@ Une étude des questions “complexes” en question-réponse - VéroniqueMoriceau + VéroniqueMoriceau XavierTannier MathieuFalco 32–37 @@ -557,9 +557,9 @@ Weak Translation Problems – a case study of Scriptural Translation MuhammadGhulam Abbas Malik - ChristianBoitet - PushpakBhattacharyya - LaurentBesacier + ChristianBoitet + PushpakBhattacharyya + LaurentBesacier 38–43 General purpose, high quality and fully automatic MT is believed to be impossible. We are interested in scriptural translation problems, which are weak sub-problems of the general problem of translation. We introduce the characteristics of the weak problems of translation and of the scriptural translation problems, describe different computational approaches (finite-state, statistical and hybrid) to solve these problems, and report our results on several combinations of Indo-Pak languages and writing systems. 2010.jeptalnrecital-court.7 @@ -597,7 +597,7 @@ L’antonymie observée avec des méthodes de <fixed-case>TAL</fixed-case> : une relation à la fois syntagmatique et paradigmatique ? FrançoisMorlane-Hondère - CécileFabre + CécileFabre 62–67 Cette étude utilise des outils de TAL pour tester l’hypothèse avancée par plusieurs études linguistiques récentes selon laquelle la relation antonymique, classiquement décrite comme une relation paradigmatique, a la particularité de fonctionner également sur le plan syntagmatique, c’est-à-dire de réunir des mots qui sont non seulement substituables mais qui apparaissent également régulièrement dans des relations contextuelles. Nous utilisons deux méthodes – l’analyse distributionnelle pour le plan paradigmatique, la recherche par patrons antonymiques pour le plan syntagmatique. Les résultats montrent que le diagnostic d’antonymie n’est pas significativement meilleur lorsqu’on croise les deux méthodes, puisqu’une partie des antonymes identifiés ne répondent pas au test de substituabilité, ce qui semble confirmer la prépondérance du plan syntagmatique pour l’étude et l’acquisition de cette relation. 2010.jeptalnrecital-court.11 @@ -646,7 +646,7 @@ HelenaBlancafort GaëlleRecourcé JavierCouto - BenoîtSagot + BenoîtSagot RosaStern DenisTeyssou 86–91 @@ -673,7 +673,7 @@ Détection hors contexte des émotions à partir du contenu linguistique d’énoncés oraux : le système <fixed-case>E</fixed-case>mo<fixed-case>L</fixed-case>ogus MarcLe Tallec - JeanneVillaneau + JeanneVillaneau Jean-YvesAntoine AgataSavary ArielleSyssau-Vaccarella @@ -727,7 +727,7 @@ <fixed-case>R</fixed-case>ef<fixed-case>G</fixed-case>en : un module d’identification des chaînes de référence dépendant du genre textuel LaurenceLongo - AmaliaTodiraşcu + AmaliaTodiraşcu 129–134 Dans cet article, nous présentons RefGen, un module d’identification des chaînes de référence pour le français. RefGen effectue une annotation automatique des expressions référentielles puis identifie les relations de coréférence établies entre ces expressions pour former des chaînes de référence. Le calcul de la référence utilise des propriétés des chaînes de référence dépendantes du genre textuel, l’échelle d’accessibilité d’(Ariel, 1990) et une série de filtres lexicaux, morphosyntaxiques et sémantiques. Nous évaluons les premiers résultats de RefGen sur un corpus issu de rapports publics. 2010.jeptalnrecital-court.22 @@ -737,7 +737,7 @@ Détection et résolution d’entités nommées dans des dépêches d’agence RosaStern - BenoîtSagot + BenoîtSagot 135–140 Nous présentons NP, un système de reconnaissance d’entités nommées. Comprenant un module de résolution, il permet d’associer à chaque occurrence d’entité le référent qu’elle désigne parmi les entrées d’un référentiel dédié. NP apporte ainsi des informations pertinentes pour l’exploitation de l’extraction d’entités nommées en contexte applicatif. Ce système fait l’objet d’une évaluation grâce au développement d’un corpus annoté manuellement et adapté aux tâches de détection et de résolution. 2010.jeptalnrecital-court.23 @@ -747,7 +747,7 @@ Processus de décision à base de <fixed-case>SVM</fixed-case> pour la composition d’arbres de frames sémantiques Marie-JeanMeurs - FabriceLefèvre + FabriceLefèvre 141–146 Cet article présente un processus de décision basé sur des classifieurs à vaste marge (SVMDP) pour extraire l’information sémantique dans un système de dialogue oral. Dans notre composant de compréhension, l’information est représentée par des arbres de frames sémantiques définies selon le paradigme FrameNet. Le processus d’interprétation est réalisé en deux étapes. D’abord, des réseaux bayésiens dynamiques (DBN) sont utilisés comme modèles de génération pour inférer des fragments d’arbres de la requête utilisateur. Ensuite, notre SVMDP dépendant du contexte compose ces fragments afin d’obtenir la représentation sémantique globale du message. Les expériences sont menées sur le corpus de dialogue MEDIA. Une procédure semi-automatique fournit une annotation de référence en frames sur laquelle les paramètres des DBN et SVMDP sont appris. Les résultats montrent que la méthode permet d’améliorer les performances d’identification de frames pour les exemples de test les plus complexes par rapport à un processus de décision déterministe ad hoc. 2010.jeptalnrecital-court.24 @@ -768,7 +768,7 @@ Construction d’un lexique affectif pour le français à partir de <fixed-case>T</fixed-case>witter AlexanderPak - PatrickParoubek + PatrickParoubek 153–158 Un lexique affectif est un outil utile pour l’étude des émotions ainsi que pour la fouille d’opinion et l’analyse des sentiments. Un tel lexique contient des listes de mots annotés avec leurs évaluations émotionnelles. Il existe un certain nombre de lexiques affectifs pour la langue anglaise, espagnole, allemande, mais très peu pour le français. Un travail de longue haleine est nécessaire pour construire et enrichir un lexique affectif. Nous proposons d’utiliser Twitter, la plateforme la plus populaire de microblogging de nos jours, pour recueillir un corpus de textes émotionnels en français. En utilisant l’ensemble des données recueillies, nous avons estimé les normes affectives de chaque mot. Nous utilisons les données de la Norme Affective desMots Anglais (ANEW, Affective Norms of EnglishWords) que nous avons traduite en français afin de valider nos résultats. Les valeurs du coefficient tau de Kendall et du coefficient de corrélation de rang de Spearman montrent que nos scores estimés sont en accord avec les scores ANEW. 2010.jeptalnrecital-court.26 @@ -778,7 +778,7 @@ Analyse d’opinion : annotation sémantique de textes chinois LeiZhang - StéphaneFerrari + StéphaneFerrari 159–164 Notre travail concerne l’analyse automatique des énoncés d’opinion en chinois. En nous inspirant de la théorie linguistique de l’Appraisal, nous proposons une méthode fondée sur l’usage de lexiques et de règles locales pour déterminer les caractéristiques telles que la Force (intensité), le Focus (prototypicalité) et la polarité de tels énoncés. Nous présentons le modèle et sa mise en oeuvre sur un corpus journalistique. Si pour la détection d’énoncés d’opinion, la précision est bonne (94 %), le taux de rappel (67 %) pose cependant des questions sur l’enrichissement des ressources actuelles. 2010.jeptalnrecital-court.27 @@ -818,7 +818,7 @@ L’apport d’une approche hybride pour la reconnaissance des entités nommées en langue arabe InèsZribi SouhaMezghani Hammami - LamiaHadrich Belguith + LamiaHadrich Belguith 183–188 Dans cet article, nous proposons une méthode hybride pour la reconnaissance des entités nommées pour la langue arabe. Cette méthode profite, d’une part, des avantages de l’utilisation d’une méthode d’apprentissage pour extraire des règles permettant l’identification et la classification des entités nommées. D’autre part, elle repose sur un ensemble de règles extraites manuellement pour corriger et améliorer le résultat de la méthode d’apprentissage. Les résultats de l’évaluation de la méthode proposée sont encourageants. Nous avons obtenu un taux global de F-mesure égal à 79.24%. 2010.jeptalnrecital-court.31 @@ -858,7 +858,7 @@ Traduction de requêtes basée sur Wikipédia - BenoîtGaillard + BenoîtGaillard OlivierCollin MalekBoualem 207–212 @@ -881,7 +881,7 @@ Actes de la 17e conférence sur le Traitement Automatique des Langues Naturelles. Démonstrations - PhilippeLanglais + PhilippeLanglais MichelGagnon ATALA
Montréal, Canada
@@ -905,7 +905,7 @@
<fixed-case>T</fixed-case>ermino<fixed-case>W</fixed-case>eb : recherche et analyse d’information thématique - CarolineBarrière + CarolineBarrière 4–7 Notre démonstration porte sur le prototype TerminoWeb, une plateforme Web qui permet (1) la construction automatique d’un corpus thématique à partir d’une recherche de documents sur le Web, (2) l’extraction de termes du corpus, et (3) la recherche d’information définitionnelle sur ces termes en corpus. La plateforme intégrant les trois modules, elle aidera un langagier (terminologue, traducteur, rédacteur) à découvrir un nouveau domaine (thème) en facilitant la recherche et l’analyse de documents informatifs pertinents à ce domaine. 2010.jeptalnrecital-demonstration.2 @@ -914,10 +914,10 @@ The i<fixed-case>MAG</fixed-case> concept: multilingual access gateway to an elected Web sites with incremental quality increase through collaborative post-edition of <fixed-case>MT</fixed-case> pretranslations - ChristianBoitet + ChristianBoitet CongPhap Huynh HongThai Nguyen - ValérieBellynck + ValérieBellynck 8–15 We will demonstrate iMAGs (interactive Multilingual Access Gateways), in particular on a scientific laboratory web site and on the Greater Grenoble (La Métro) web site. 2010.jeptalnrecital-demonstration.3 @@ -939,7 +939,7 @@ Jean-PhilippeGoldman SophieRoekhaut AnneCatherine Simon - CédrickFairon + CédrickFairon RichardBeaufort 20–23 Nous présentons Expressive, un système de génération de parole expressive à partir de données non linguistiques. Ce système est composé de deux outils distincts : Taittingen, un générateur automatique de textes d’une grande variété lexico-syntaxique produits à partir d’une représentation conceptuelle du discours, et StyloPhone, un système de synthèse vocale multi-styles qui s’attache à rendre le discours produit attractif et naturel en proposant différents styles vocaux. @@ -959,9 +959,9 @@ Traitement automatique des langues des signes : le projet <fixed-case>D</fixed-case>icta-<fixed-case>S</fixed-case>ign, des corpus aux applications - AnneliesBraffort + AnneliesBraffort MichaelFilhol - JérémieSegouat + JérémieSegouat 28–31 Cet article présente Dicta-Sign, un projet de recherche sur le traitement automatique des langues des signes (LS), qui aborde un grand nombre de questions de recherche : linguistique de corpus, modélisation linguistique, reconnaissance et génération automatique. L’objectif de ce projet est de réaliser trois applications prototypes destinées aux usagers sourds : un traducteur de termes de LS à LS, un outil de recherche par l’exemple et un Wiki en LS. Pour cela, quatre corpus comparables de cinq heures de dialogue seront produits et analysés. De plus, des avancées significatives sont attendues dans le domaine des outils d’annotation. Dans ce projet, le LIMSI est en charge de l’élaboration des modèles linguistiques et participe aux aspects corpus et génération automatique. Nous nous proposons d’illustrer l’état d’avancement de Dicta-Sign au travers de vidéos extraites du corpus et de démonstrations des outils de traitement et de génération d’animations de signeur virtuel. 2010.jeptalnrecital-demonstration.7 @@ -992,7 +992,7 @@ Text-it /Voice-it Une application mobile de normalisation des <fixed-case>SMS</fixed-case> RichardBeaufort KévinMacé - CédrickFairon + CédrickFairon 40–43 Cet article présente Text-it / Voice-it, une application de normalisation des SMS pour téléphone mobile. L’application permet d’envoyer et de recevoir des SMS normalisés, et offre le choix entre un résultat textuel (Text-it) et vocal (Voice-it). 2010.jeptalnrecital-demonstration.10 @@ -1010,7 +1010,7 @@ <fixed-case>M</fixed-case>e<fixed-case>TAE</fixed-case> : Plate-forme d’annotation automatique et d’exploration sémantiques pour le domaine médical AsmaBen Abacha - PierreZweigenbaum + PierreZweigenbaum 48–51 Nous présentons une plate-forme d’annotation sémantique et d’exploration de textes médicaux, appelée « MeTAE ». Le processus d’annotation automatique comporte une première étape de reconnaissance des entités médicales présentes dans les textes suivie d’une étape d’identification des relations sémantiques qui les relient. Cette identification se fonde sur des patrons linguistiques construits manuellement pour chaque type de relation. MeTAE génère des annotations RDF à partir des informations extraites et offre une interface d’exploration des textes annotés avec des requêtes sous forme de formulaire. La plate-forme peut être utilisée pour analyser sémantiquement les textes médicaux ou interroger la base d’annotation disponible pour avoir une/des réponses à une requête donnée (e.g. « ?X prévient maladie d’Alzheimer », équivalent à la question « comment prévenir la maladie d’Alzheimer ? »). Cette application peut être la base d’un système de questions-réponses pour le domaine médical. 2010.jeptalnrecital-demonstration.12 @@ -1042,7 +1042,7 @@ Moz: Translation of Structured Terminology-Rich Text - GrahamRussell + GrahamRussell 60–63 Description of Moz, a translation support system designed for texts exhibiting a high proportion of structured and semi-structured terminological content. The system comprises a web-based collaborative translation memory, with high recall via subsentential linguistic analysis and facilities for messaging and quality assurance. It is in production use, translating some 140,000 words per week. 2010.jeptalnrecital-demonstration.15 @@ -1051,10 +1051,10 @@ <fixed-case>MACAON</fixed-case> Une chaîne linguistique pour le traitement de graphes de mots AlexisNasr - FrédéricBéchet + FrédéricBéchet Jean-FrançoisRey 64–67 - + 2010.jeptalnrecital-demonstration.16 fra nasr-etal-2010-macaon @@ -1064,7 +1064,7 @@ Actes de la 17e conférence sur le Traitement Automatique des Langues Naturelles. REncontres jeunes Chercheurs en Informatique pour le Traitement Automatique des Langues AlexandrePatry - PhilippeLanglais + PhilippeLanglais AurélienMax ATALA
Montréal, Canada
diff --git a/data/xml/2010.tal.xml b/data/xml/2010.tal.xml index 28021d076e..03e6588924 100644 --- a/data/xml/2010.tal.xml +++ b/data/xml/2010.tal.xml @@ -86,7 +86,7 @@ <fixed-case>PARADOCS</fixed-case> : l’entremetteur de documents parallèles indépendant de la langue [<fixed-case>PARADOCS</fixed-case>: A Language Independant Go-Between for Mating Parallel Documents] AlexandrePatry - PhilippeLanglais + PhilippeLanglais 41–63 2010.tal-2.3 fra @@ -94,10 +94,10 @@ Micro-adaptation lexicale en traduction automatique statistique [Lexical Micro-adaptation in Statistical Machine Translation] - Josep MariaCrego + Josep MariaCrego GregorLeusch AurélienMax - HermannNey + HermannNey FrançoisYvon 65–93 2010.tal-2.4 @@ -106,7 +106,7 @@ Transliteration as Alignment vs. Transliteration as Generation for Crosslingual Information Retrieval - Anil KumarSingh + Anil KumarSingh SethuramalingamSubramaniam TarakaRama 95–117 @@ -130,7 +130,7 @@ Préface [Introduction] AgataJackiewicz SusanHunston - MarcEl-Bèze + MarcEl-Bèze 7–17 2010.tal-3.1 fra @@ -159,7 +159,7 @@ Le microblogging pour la micro analyse des sentiments et des opinons [Microblogging for Micro Sentiment Analysis and Opinion Mining] AlexanderPak - PatrickParoubek + PatrickParoubek 75–100 2010.tal-3.4 fra @@ -169,7 +169,7 @@ Extraction probabiliste de chaînes de mots relatives à une opinion [A probabilistic approach for extracting opinion-related word chains from texts] RemiLavalley ChloeClavel - PatriceBellot + PatriceBellot 101–130 2010.tal-3.5 fra diff --git a/data/xml/2010.tc.xml b/data/xml/2010.tc.xml index 353e3769fa..4411468c14 100644 --- a/data/xml/2010.tc.xml +++ b/data/xml/2010.tc.xml @@ -61,7 +61,7 @@ Next generation translation and localization: users are taking charge SharonO’Brien - ReinhardSchäler + ReinhardSchäler 2010.tc-1.8 obrien-schaler-2010-next @@ -102,7 +102,7 @@ A hybrid word alignment approach to improve translation lexicons with compound words and idiomatic expressions NasredineSemmar - ChristopheServan + ChristopheServan Gaëlde Chalendar BenoîtLe Ny Jean-JacquesBouzaglou diff --git a/data/xml/2011.eamt.xml b/data/xml/2011.eamt.xml index bf1f7cab8e..1294016140 100644 --- a/data/xml/2011.eamt.xml +++ b/data/xml/2011.eamt.xml @@ -7,7 +7,7 @@
Leuven, Belgium
May 30–31 2011 - Mikel L.Forcada + Mikel L.Forcada HeidiDepraetere VincentVandeghinste eamt @@ -44,7 +44,7 @@ A Comparative Evaluation of Research vs. Online <fixed-case>MT</fixed-case> Systems AntonioToral FedericoGaspari - Sudip KumarNaskar + Sudip KumarNaskar AndyWay 2011.eamt-1.4 2011.eamt-1.4.Presentation.pdf @@ -52,7 +52,7 @@
Experiments on Domain Adaptation for Patent Machine Translation in the <fixed-case>PL</fixed-case>u<fixed-case>TO</fixed-case> project - AlexandruCeauşu + AlexandruCeauşu JohnTinsley JianZhang AndyWay @@ -72,8 +72,8 @@ Evaluation of <fixed-case>SMT</fixed-case> in localization to under-resourced inflected language RaivisSkadiņš MarisPuriņš - IngunaSkadiņa - AndrejsVasiļjevs + IngunaSkadiņa + AndrejsVasiļjevs 2011.eamt-1.7 2011.eamt-1.7.Presentation.pdf skadins-etal-2011-evaluation @@ -117,7 +117,7 @@ Using word alignments to assist computer-aided translation users by marking which target-side words to change or keep unedited - MiquelEsplà + MiquelEsplà FelipeSánchez-Martínez Mikel L.Forcada 2011.eamt-1.13 @@ -140,62 +140,62 @@ Searching Parallel Corpora for Contextually Equivalent Terms - CarolineBarrière + CarolineBarrière PierreIsabelle 2011.eamt-1.16 barriere-isabelle-2011-searching Rule-based Reordering Constraints for Phrase-based <fixed-case>SMT</fixed-case> - Chooi-LingGoh - TakashiOnishi - EiichiroSumita + Chooi-LingGoh + TakashiOnishi + EiichiroSumita 2011.eamt-1.17 goh-etal-2011-rule Deriving translation units using small additional corpora Carlos A. HenríquezQ. - José B.Mariño - Rafael E.Banchs + José B.Mariño + Rafael E.Banchs 2011.eamt-1.18 q-etal-2011-deriving <fixed-case>BP</fixed-case>2<fixed-case>EP</fixed-case> - Adaptation of <fixed-case>B</fixed-case>razilian <fixed-case>P</fixed-case>ortuguese texts to <fixed-case>E</fixed-case>uropean <fixed-case>P</fixed-case>ortuguese - LuisMarujo + LuisMarujo NunoGrazina TiagoLuis WangLing - LuisaCoheur + LuisaCoheur IsabelTrancoso 2011.eamt-1.19 marujo-etal-2011-bp2ep Deriving translation units using small additional corpora - Carlos A.Henríquez Q. - José B.Mariño - Rafael E.Banchs + Carlos A.Henríquez Q. + José B.Mariño + Rafael E.Banchs henriquez-q-etal-2011-deriving Cognate Identification for a <fixed-case>F</fixed-case>rench - <fixed-case>R</fixed-case>omanian Lexical Alignment System: Empirical Study MirabelaNavlea - AmaliaTodiraşcu + AmaliaTodiraşcu 2011.eamt-1.21 navlea-todirascu-2011-cognate Rapid rule-based machine translation between <fixed-case>D</fixed-case>utch and <fixed-case>A</fixed-case>frikaans PimOtte - Francis M.Tyers + Francis M.Tyers 2011.eamt-1.22 otte-tyers-2011-rapid Preliminary Experiments on Using Users’ Post-Editions to Enhance a <fixed-case>SMT</fixed-case> System Oracle-based Training for Phrase-based Statistical Machine Translation - AnkitSrivastava + AnkitSrivastava YanjunMa AndyWay 2011.eamt-1.23 @@ -205,8 +205,8 @@ Oracle-based Training for Phrase-based Statistical Machine Translation MarionPotet EmmanuelleEsperança-Rodier - HervéBlanchon - LaurentBesacier + HervéBlanchon + LaurentBesacier 2011.eamt-1.24 potet-etal-2011-oracle @@ -235,7 +235,7 @@
Using Example-Based <fixed-case>MT</fixed-case> to Support Statistical <fixed-case>MT</fixed-case> when Translating Homogeneous Data in a Resource-Poor Setting - SandipanDandapat + SandipanDandapat SaraMorrissey AndyWay Mikel L.Forcada @@ -248,7 +248,7 @@ SarahEbling AndyWay MartinVolk - Sudip KumarNaskar + Sudip KumarNaskar 2011.eamt-1.29 2011.eamt-1.29.Presentation.pdf ebling-etal-2011-combining @@ -258,7 +258,7 @@ Martha DísBrandt HrafhLoftsson HlynurSigurþórsson - Francis M.Tyers + Francis M.Tyers 2011.eamt-1.30 2011.eamt-1.30.Presentation.pdf brandt-etal-2011-apertium @@ -282,7 +282,7 @@ Minimum Error Rate Training Semiring - ArtemSokolov + ArtemSokolov FrançoisYvon 2011.eamt-1.33 2011.eamt-1.33.Presentation.pdf @@ -299,16 +299,16 @@ Bilingual segmentation for phrasetable pruning in Statistical Machine Translation - GermánSanchis-Trilles - DanielOrtiz-Martínez - JesúsGonzález-Rubio + GermánSanchis-Trilles + DanielOrtiz-Martínez + JesúsGonzález-Rubio JorgeGonzález 2011.eamt-1.35 sanchis-trilles-etal-2011-bilingual From Human to Automatic Error Classification for Machine Translation Output - MajaPopović + MajaPopović AljoschaBurchardt 2011.eamt-1.36 2011.eamt-1.36.Presentation.pdf @@ -319,7 +319,7 @@ MatthiasHuck DavidVilar DanielStein - HermannNey + HermannNey 2011.eamt-1.37 2011.eamt-1.37.Presentation.pdf huck-etal-2011-advancements diff --git a/data/xml/2011.freeopmt.xml b/data/xml/2011.freeopmt.xml index b0b581a7cb..24c8cbd53a 100644 --- a/data/xml/2011.freeopmt.xml +++ b/data/xml/2011.freeopmt.xml @@ -23,7 +23,7 @@ <fixed-case>F</fixed-case>ree<fixed-case>L</fixed-case>ing: open-source natural language processing for research and development - LluísPadró + LluísPadró 2 2011.freeopmt-1.2 padro-2011-freeling @@ -50,9 +50,9 @@ Bootstrapping a statistical speech translator from a rule-based one - MannyRayner + MannyRayner PaulaEstrella - PierretteBouillon + PierretteBouillon 21-28 2011.freeopmt-1.5 rayner-etal-2011-bootstrapping @@ -97,7 +97,7 @@ A widely used machine translation service and its migration to a free/open-source solution: the case of Softcatalà XavierIvars-Ribes - Victor M.Sánchez-Cartagena + Victor M.Sánchez-Cartagena 61-68 2011.freeopmt-1.10 Softcatala` is a non-profit association created more than 10 years ago to fight the marginalisation of the Catalan language in information and communication technologies. It has led the localisation of many applications and the creation of a website which allows its users to translate texts between Spanish and Catalan using an external closedsource translation engine. Recently, the closed-source translation back-end has been replaced by a free/open-source solution completely managed by Softcatala`: the Apertium machine translation platform and the ScaleMT web service framework. Thanks to the openness of the new solution, it is possible to take advantage of the huge amount of users of the Softcatala` translation service to improve it, using a series of methods presented in this paper. In addition, a study of the translations requested by the users has been carried out, and it shows that the translation back-end change has not affected the usage patterns. @@ -115,8 +115,8 @@ An <fixed-case>I</fixed-case>talian to <fixed-case>C</fixed-case>atalan <fixed-case>RBMT</fixed-case> system reusing data from existing language pairs AntonioToral - MireiaGinestí-Rosell - FrancisTyers + MireiaGinestí-Rosell + FrancisTyers 77-81 2011.freeopmt-1.12 This paper presents an Italian→Catalan RBMT system automatically built by combining the linguistic data of the existing pairs Spanish–Catalan and Spanish–Italian. A lightweight manual postprocessing is carried out in order to fix inconsistencies in the automatically derived dictionaries and to add very frequent words that are missing according to a corpus analysis. The system is evaluated on the KDE4 corpus and outperforms Google Translate by approximately ten absolute points in terms of both TER and GTM. diff --git a/data/xml/2011.iwslt.xml b/data/xml/2011.iwslt.xml index 60f9f783c0..0dc5e4e79b 100644 --- a/data/xml/2011.iwslt.xml +++ b/data/xml/2011.iwslt.xml @@ -14,7 +14,7 @@ Data-intensive approaches for <fixed-case>ASR</fixed-case> - SadaokiFurui + SadaokiFurui 2011.iwslt-keynotes.1 furui-2011-data @@ -26,7 +26,7 @@ Resource-rich research on natural language processing and understanding - JunichiTsujii + JunichiTsujii 2011.iwslt-keynotes.3 tsujii-2011-resource @@ -47,8 +47,8 @@ Overview of the <fixed-case>IWSLT</fixed-case> 2011 evaluation campaign MarcelloFederico LuisaBentivogli - MichaelPaul - SebastianStüker + MichaelPaul + SebastianStüker 11-27 2011.iwslt-evaluation.1 We report here on the eighth Evaluation Campaign organized by the IWSLT workshop. This year, the IWSLT evaluation focused on the automatic translation of public talks and included tracks for speech recognition, speech translation, text translation, and system combination. Unlike previous years, all data supplied for the evaluation has been publicly released on the workshop website, and is at the disposal of researchers interested in working on our benchmarks and in comparing their results with those published at the workshop. This paper provides an overview of the IWSLT 2011 Evaluation Campaign, which includes: descriptions of the supplied data and evaluation specifications of each track, the list of participants specifying their submitted runs, a detailed description of the subjective evaluation carried out, the main findings of each exercise drawn from the results and the system descriptions prepared by the participants, and, finally, several detailed tables reporting all the evaluation results. @@ -59,10 +59,10 @@ KazuhikoAbe YouzhengWu Chien-linHuang - Paul R.Dixon + Paul R.Dixon ShigekiMatsuda ChioriHori - HidekiKashioka + HidekiKashioka 28-33 2011.iwslt-evaluation.2 In this paper, we describe NICT’s participation in the IWSLT 2011 evaluation campaign for the ASR Track. To recognize spontaneous speech, we prepared an acoustic model trained by more spontaneous speech corpora and a language model constructed with text corpora distributed by the organizer. We built the multi-pass ASR system by adapting the acoustic and language models with previous ASR results. The target speech was selected from talks on the TED (Technology, Entertainment, Design) program. Here, a large reduction in word error rate was obtained by the speaker adaptation of the acoustic model with MLLR. Additional improvement was achieved not only by adaptation of the language model but also by parallel usage of the baseline and speaker-dependent acoustic models. Accordingly, the final WER was reduced by 30% from the baseline ASR for the distributed test set. @@ -71,7 +71,7 @@ The <fixed-case>MIT</fixed-case>-<fixed-case>LL</fixed-case>/<fixed-case>AFRL</fixed-case> <fixed-case>IWSLT</fixed-case>-2011 <fixed-case>MT</fixed-case> system A. RyanAminzadeh - TimAnderson + TimAnderson RaySlyh BrianOre EricHansen @@ -87,11 +87,11 @@ The <fixed-case>DCU</fixed-case> machine translation systems for <fixed-case>IWSLT</fixed-case> 2011 PratyushBanerjee HalaAlmaghout - SudipNaskar + SudipNaskar JohannRoturier JieJiang AndyWay - Josefvan Genabith + Josefvan Genabith 41-48 2011.iwslt-evaluation.4 In this paper, we provide a description of the Dublin City University’s (DCU) submissions in the IWSLT 2011 evaluationcampaign.1 WeparticipatedintheArabic-Englishand Chinese-English Machine Translation(MT) track translation tasks. We use phrase-based statistical machine translation (PBSMT) models to create the baseline system. Due to the open-domain nature of the data to be translated, we use domain adaptation techniques to improve the quality of translation. Furthermore, we explore target-side syntactic augmentation for an Hierarchical Phrase-Based (HPB) SMT model. Combinatory Categorial Grammar (CCG) is used to extract labels for target-side phrases and non-terminals in the HPB system. Combining the domain adapted language models with the CCG-augmented HPB system gave us the best translations for both language pairs providing statistically significant improvements of 6.09 absolute BLEU points (25.94% relative) and 1.69 absolute BLEU points (15.89% relative) over the unadapted PBSMT baselines for the Arabic-English and Chinese-English language pairs, respectively. @@ -100,9 +100,9 @@ The <fixed-case>NICT</fixed-case> translation system for <fixed-case>IWSLT</fixed-case> 2011 AndrewFinch - Chooi-LingGoh + Chooi-LingGoh GrahamNeubig - EiichiroSumita + EiichiroSumita 49-56 2011.iwslt-evaluation.5 This paper describes NICT’s participation in the IWSLT 2011 evaluation campaign for the TED speech translation ChineseEnglish shared-task. Our approach was based on a phrasebased statistical machine translation system that was augmented in two ways. Firstly we introduced rule-based re-ordering constraints on the decoding. This consisted of a set of rules that were used to segment the input utterances into segments that could be decoded almost independently. This idea here being that constraining the decoding process in this manner would greatly reduce the search space of the decoder, and cut out many possibilities for error while at the same time allowing for a correct output to be generated. The rules we used exploit punctuation and spacing in the input utterances, and we use these positions to delimit our segments. Not all punctuation/spacing positions were used as segment boundaries, and the set of used positions were determined by a set of linguistically-based heuristics. Secondly we used two heterogeneous methods to build the translation model, and lexical reordering model for our systems. The first method employed the popular method of using GIZA++ for alignment in combination with phraseextraction heuristics. The second method used a recentlydeveloped Bayesian alignment technique that is able to perform both phrase-to-phrase alignment and phrase pair extraction within a single unsupervised process. The models produced by this type of alignment technique are typically very compact whilst at the same time maintaining a high level of translation quality. We evaluated both of these methods of translation model construction in isolation, and our results show their performance is comparable. We also integrated both models by linear interpolation to obtain a model that outperforms either component. Finally, we added an indicator feature into the log-linear model to indicate those phrases that were in the intersection of the two translation models. The addition of this feature was also able to provide a small improvement in performance. @@ -114,7 +114,7 @@ AmittaiAxelrod LiDeng AlexAcero - Mei-YuhHwang + Mei-YuhHwang AlisaNguyen AndrewWang XiahuiHuang @@ -127,7 +127,7 @@ <fixed-case>LIMSI</fixed-case>’s experiments in domain adaptation for <fixed-case>IWSLT</fixed-case>11 ThomasLavergne AlexandreAllauzen - Hai-SonLe + Hai-SonLe FrançoisYvon 62-67 2011.iwslt-evaluation.7 @@ -137,8 +137,8 @@ <fixed-case>LIG</fixed-case> <fixed-case>E</fixed-case>nglish-<fixed-case>F</fixed-case>rench spoken language translation system for <fixed-case>IWSLT</fixed-case> 2011 BenjaminLecouteux - LaurentBesacier - HervéBlanchon + LaurentBesacier + HervéBlanchon 68-72 2011.iwslt-evaluation.8 This paper describes the system developed by the LIG laboratory for the 2011 IWSLT evaluation. We participated to the English-French MT and SLT tasks. The development of a reference translation system (MT task), as well as an ASR output translation system (SLT task) are presented. We focus this year on the SLT task and on the use of multiple 1-best ASR outputs to improve overall translation quality. The main experiment presented here compares the performance of a SLT system where multiple ASR 1-best are combined before translation (source combination), with a SLT system where multiple ASR 1-best are translated, the system combination being conducted afterwards on the target side (target combination). The experimental results show that the second approach (target combination) overpasses the first one, when the performance is measured with BLEU. @@ -150,7 +150,7 @@ EunachCho JanNiehues TeresaHerrmann - AlexWaibel + AlexWaibel 73-78 2011.iwslt-evaluation.9 This paper presents the KIT system participating in the English→French TALK Translation tasks in the framework of the IWSLT 2011 machine translation evaluation. Our system is a phrase-based translation system using POS-based reordering extended with many additional features. First of all, a special preprocessing is devoted to the Giga corpus in order to minimize the effect of the great amount of noise it contains. In addition, the system gives more importance to the in-domain data by adapting the translation and the language models as well as by using a wordcluster language model. Furthermore, the system is extended by a bilingual language model and a discriminative word lexicon. The automatic speech transcription input usually has no or wrong punctuation marks, therefore these marks were especially removed from the source training data for the SLT system training. @@ -160,7 +160,7 @@ <fixed-case>LIUM</fixed-case>’s systems for the <fixed-case>IWSLT</fixed-case> 2011 speech translation tasks AnthonyRousseau FethiBougares - PaulDeléglise + PaulDeléglise HolgerSchwenk YannickEstève 79-85 @@ -185,10 +185,10 @@ The 2011 <fixed-case>KIT</fixed-case> <fixed-case>E</fixed-case>nglish <fixed-case>ASR</fixed-case> system for the <fixed-case>IWSLT</fixed-case> evaluation - SebastianStüker + SebastianStüker KevinKilgour ChristianSaam - AlexWaibel + AlexWaibel 94-97 2011.iwslt-evaluation.12 This paper describes our English Speech-to-Text (STT) system for the 2011 IWSLT ASR track. The system consists of 2 subsystems with different front-ends—one MVDR based, one MFCC based—which are combined using confusion network combination to provide a base for a second pass speaker adapted MVDR system. We demonstrate that this set-up produces competitive results on the IWSLT 2010 dev and test sets. @@ -198,7 +198,7 @@ <fixed-case>DFKI</fixed-case>’s <fixed-case>SC</fixed-case> and <fixed-case>MT</fixed-case> submissions to <fixed-case>IWSLT</fixed-case> 2011 DavidVilar EleftheriosAvramidis - MajaPopović + MajaPopović SabineHunsicker 98-105 2011.iwslt-evaluation.13 @@ -214,7 +214,7 @@ MinweiFeng StephanPeitz ChristophSchmidt - HermannNey + HermannNey 106-113 2011.iwslt-evaluation.14 In this paper the statistical machine translation (SMT) systems of RWTH Aachen University developed for the evaluation campaign of the International Workshop on Spoken Language Translation (IWSLT) 2011 is presented. We participated in the MT (English-French, Arabic-English, ChineseEnglish) and SLT (English-French) tracks. Both hierarchical and phrase-based SMT decoders are applied. A number of different techniques are evaluated, including domain adaptation via monolingual and bilingual data selection, phrase training, different lexical smoothing methods, additional reordering models for the hierarchical system, various Arabic and Chinese segmentation methods, punctuation prediction for speech recognition output, and system combination. By application of these methods we can show considerable improvements over the respective baseline systems. @@ -223,17 +223,17 @@ Advances on spoken language translation in the Quaero program KarimBoudahmane - BiankaBuschbeck + BiankaBuschbeck EunahCho - Josep MariaCrego + Josep MariaCrego MarkusFreitag ThomasLavergne - HermannNey + HermannNey JanNiehues StephanPeitz JeanSenellart - ArtemSokolov - AlexWaibel + ArtemSokolov + AlexWaibel TonioWandmacher JoernWuebker FrançoisYvon @@ -244,7 +244,7 @@ Speech recognition for machine translation in Quaero - LoriLamel + LoriLamel SandrineCourcinous JulienDespres Jean-LucGauvain @@ -252,18 +252,18 @@ KevinKilgour FlorianKraft Viet-BacLe - HermannNey + HermannNey MarkusNußbaum-Thom IlyaOparin TimSchlippe - RalfSchlüter + RalfSchlüter TanjaSchultz ThiagoFraga da Silva - SebastianStüker + SebastianStüker MartinSundermeyer BiancaVieru Ngoc ThangVu - AlexanderWaibel + AlexanderWaibel CécileWoehrling 121-128 2011.iwslt-evaluation.16 @@ -273,7 +273,7 @@ Protocol and lessons learnt from the production of parallel corpora for the evaluation of speech translation systems VictoriaArranz - OlivierHamon + OlivierHamon KarimBoudahmane MartineGarnier-Rizet 129-135 @@ -323,10 +323,10 @@ Investigation of the effects of <fixed-case>ASR</fixed-case> tuning on speech translation performance - Paul R.Dixon + Paul R.Dixon AndrewFinch ChioriHori - HidekiKashioka + HidekiKashioka 167-174 2011.iwslt-evaluation.22 In this paper we describe some of our recent investigations into ASR and SMT coupling issues from an ASR perspective. Our study was motivated by several areas: Firstly, to understand how standard ASR tuning procedures effect the SMT performance and whether it is safe to perform this tuning in isolation. Secondly, to investigate how vocabulary and segmentation mismatches between the ASR and SMT system effect the performance. Thirdly, to uncover any practical issues that arise when using a WFST based speech decoder for tight coupling as opposed to a more traditional tree-search decoding architecture. On the IWSLT07 Japanese-English task we found that larger language model weights only helped the SMT performance when the ASR decoder was tuned in a sub-optimal manner. When we considered the performance with suitable wide beams that ensured the ASR accuracy had converged we observed the language model weight had little influence on the SMT BLEU scores. After the construction of the phrase table the actual SMT vocabulary can be less than the training data vocabulary. By reducing the ASR lexicon to only cover the words the SMT system could accept, we found this lead to an increase in the ASR error rates, however the SMT BLEU scores were nearly unchanged. From a practical point of view this is a useful result as it means we can significantly reduce the memory footprint of the ASR system. We also investigated coupling WFST based ASR to a simple WFST based translation decoder and found it was crucial to perform phrase table expansion to avoid OOV problems. For the WFST translation decoder we describe a semiring based approach for optimizing the log-linear weights. @@ -336,7 +336,7 @@ Extending a probabilistic phrase alignment approach for <fixed-case>SMT</fixed-case> MridulGupta SanjikaHewavitharana - StephanVogel + StephanVogel 175-182 2011.iwslt-evaluation.23 Phrase alignment is a crucial step in phrase-based statistical machine translation. We explore a way of improving phrase alignment by adding syntactic information in the form of chunks as soft constraints guided by an in-depth and detailed analysis on a hand-aligned data set. We extend a probabilistic phrase alignment model that extracts phrase pairs by optimizing phrase pair boundaries over the sentence pair [1]. The boundaries of the target phrase are chosen such that the overall sentence alignment probability is optimal. Viterbi alignment information is also added in the extended model with a view of improving phrase alignment. We extract phrase pairs using a relatively larger number of features which are discriminatively trained using a large-margin online learning algorithm, i.e., Margin Infused Relaxed Algorithm (MIRA) and integrate it in our approach. Initial experiments show improvements in both phrase alignment and translation quality for Arabic-English on a moderate-size translation task. @@ -372,7 +372,7 @@ MatthiasHuck SaabMansour SimonWiesler - HermannNey + HermannNey 191-198 2011.iwslt-papers.1 In this paper, we investigate lexicon models for hierarchical phrase-based statistical machine translation. We study five types of lexicon models: a model which is extracted from word-aligned training data and—given the word alignment matrix—relies on pure relative frequencies [1]; the IBM model 1 lexicon [2]; a regularized version of IBM model 1; a triplet lexicon model variant [3]; and a discriminatively trained word lexicon model [4]. We explore sourceto-target models with phrase-level as well as sentence-level scoring and target-to-source models with scoring on phrase level only. For the first two types of lexicon models, we compare several scoring variants. All models are used during search, i.e. they are incorporated directly into the log-linear model combination of the decoder. Phrase table smoothing with triplet lexicon models and with discriminative word lexicons are novel contributions. We also propose a new regularization technique for IBM model 1 by means of the Kullback-Leibler divergence with the empirical unigram distribution as regularization term. Experiments are carried out on the large-scale NIST Chinese→English translation task and on the English→French and Arabic→English IWSLT TED tasks. For Chinese→English and English→French, we obtain the best results by using the discriminative word lexicon to smooth our phrase tables. @@ -383,8 +383,8 @@ KevinKilgour ChristianSaam ChristianMohr - SebastianStüker - AlexWaibel + SebastianStüker + AlexWaibel 199-205 2011.iwslt-papers.2 This paper describes our current Spanish speech-to-text (STT) system with which we participated in the 2011 Quaero STT evaluation that is being developed within the Quaero program. The system consists of 4 separate subsystems, as well as the standard MFCC and MVDR phoneme based subsystems we included a both a phoneme and grapheme based bottleneck subsystem. We carefully evaluate the performance of each subsystem. After including several new techniques we were able to reduce the WER by over 30% from 20.79% to 14.53%. @@ -396,8 +396,8 @@ PávelCalado BrunoMartins IsabelTrancoso - AlanBlack - LuísaCoheur + AlanBlack + LuísaCoheur 206-213 2011.iwslt-papers.3 This work describes a process to extract Named Entity (NE) translations from the text available in web links (anchor texts). It translates a NE by retrieving a list of web documents in the target language, extracting the anchor texts from the links to those documents and finding the best translation from the anchor texts, using a combination of features, some of which, are specific to anchor texts. Experiments performed on a manually built corpora, suggest that over 70% of the NEs, ranging from unpopular to popular entities, can be translated correctly using sorely anchor texts. Tests on a Machine Translation task indicate that the system can be used to improve the quality of the translations of state-of-the-art statistical machine translation systems. @@ -408,7 +408,7 @@ PaulMaergner KevinKilgour IanLane - AlexWaibel + AlexWaibel 214-221 2011.iwslt-papers.4 In this work, we propose a novel method for vocabulary selection which enables simultaneous speech recognition systems for lectures to automatically adapt to the diverse topics that occur in educational and scientific lectures. Utilizing materials that are available before the lecture begins, such as lecture slides, our proposed framework iteratively searches for related documents on the World Wide Web and generates a lecture-specific vocabulary and language model based on the resulting documents. In this paper, we introduce a novel method for vocabulary selection where we rank vocabulary that occurs in the collected documents based on a relevance score which is calculated using a combination of word features. Vocabulary selection is a critical component for topic adaptation that has typically been overlooked in prior works. On the interACT German-English simultaneous lecture translation system our proposed approach significantly improved vocabulary coverage, reducing the out-of-vocabulary rate on average by 57.0% and up to 84.9%, compared to a lecture-independent baseline. Furthermore, our approach reduced the word error rate by up to 25.3% (on average 13.2% across all lectures), compared to a lectureindependent baseline. @@ -418,7 +418,7 @@ Combining translation and language model scoring for domain-specific data filtering SaabMansour JoernWuebker - HermannNey + HermannNey 222-229 2011.iwslt-papers.5 The increasing popularity of statistical machine translation (SMT) systems is introducing new domains of translation that need to be tackled. As many resources are already available, domain adaptation methods can be applied to utilize these recourses in the most beneficial way for the new domain. We explore adaptation via filtering, using the crossentropy scores to discard irrelevant sentences. We focus on filtering for two important components of an SMT system, namely the language model (LM) and the translation model (TM). Previous work has already applied LM cross-entropy based scoring for filtering. We argue that LM cross-entropy might be appropriate for LM filtering, but not as much for TM filtering. We develop a novel filtering approach based on a combined TM and LM cross-entropy scores. We experiment with two large-scale translation tasks, the Arabic-to-English and English-to-French IWSLT 2011 TED Talks MT tasks. For LM filtering, we achieve strong perplexity improvements which carry over to the translation quality with improvements up to +0.4% BLEU. For TM filtering, the combined method achieves small but consistent improvements over the standalone methods. As a side effect of adaptation via filtering, the fully fledged SMT system vocabulary size and phrase table size are reduced by a factor of at least 2 while up to +0.6% BLEU improvement is observed. @@ -427,7 +427,7 @@ Using <fixed-case>W</fixed-case>ikipedia to translate domain-specific terms in <fixed-case>SMT</fixed-case> JanNiehues - AlexWaibel + AlexWaibel 230-237 2011.iwslt-papers.6 When building a university lecture translation system, one important step is to adapt it to the target domain. One problem in this adaptation task is to acquire translations for domain specific terms. In this approach we tried to get these translations from Wikipedia, which provides articles on very specific topics in many different languages. To extract translations for the domain specific terms, we used the interlanguage links of Wikipedia . We analyzed different methods to integrate this corpus into our system and explored methods to disambiguate between different translations by using the text of the articles. In addition, we developed methods to handle different morphological forms of the specific terms in morphologically rich input languages like German. The results show that the number of out-of-vocabulary (OOV) words could be reduced by 50% on computer science lectures and the translation quality could be improved by more than 1 BLEU point. @@ -438,7 +438,7 @@ StephanPeitz MarkusFreitag ArneMauser - HermannNey + HermannNey 238-245 2011.iwslt-papers.7 Punctuation prediction is an important task in Spoken Language Translation. The output of speech recognition systems does not typically contain punctuation marks. In this paper we analyze different methods for punctuation prediction and show improvements in the quality of the final translation output. In our experiments we compare the different approaches and show improvements of up to 0.8 BLEU points on the IWSLT 2011 English French Speech Translation of Talks task using a translation system to translate from unpunctuated to punctuated text instead of a language model based punctuation prediction method. Furthermore, we do a system combination of the hypotheses of all our different approaches and get an additional improvement of 0.4 points in BLEU. @@ -448,7 +448,7 @@ Soft string-to-dependency hierarchical machine translation Jan-ThorstenPeter MatthiasHuck - HermannNey + HermannNey DanielStein 246-253 2011.iwslt-papers.8 @@ -481,7 +481,7 @@ KeijiYasuda HideoOkuma MasaoUtiyama - EiichiroSumita + EiichiroSumita 269-274 2011.iwslt-papers.11 In order to efficiently improve machine translation systems, we propose a method which selects data to be annotated (manually translated) from speech-to-speech translation field data. For the selection experiments, we used data from field experiments conducted during the 2009 fiscal year in five areas of Japan. For the selection experiments, we used data sets from two areas: one data set giving the lowest baseline speech translation performance for its test set, and another data set giving the highest. In the experiments, we compare two methods for selecting data to be manually translated from the field data. Both of them use source side language models for data selection, but in different manners. According to the experimental results, either or both of the methods show larger improvements compared to a random data selection. diff --git a/data/xml/2011.jeptalnrecital.xml b/data/xml/2011.jeptalnrecital.xml index f9c2e98cdf..3d468fbbb7 100644 --- a/data/xml/2011.jeptalnrecital.xml +++ b/data/xml/2011.jeptalnrecital.xml @@ -26,9 +26,9 @@ Theorie et Praxis Une optique sur les travaux en <fixed-case>TAL</fixed-case> sur le discours et le dialogue (Theory and Praxis A view on the <fixed-case>NLP</fixed-case> works in discourse and dialogue) - NicholasAsher + NicholasAsher 17–17 - + 2011.jeptalnrecital-invite.2 fra asher-2011-theorie @@ -70,9 +70,9 @@ Génération automatique de motifs de détection d’entités nommées en utilisant des contenus encyclopédiques (Automatic generation of named entity detection patterns using encyclopedic contents) - EricCharton + EricCharton MichelGagnon - BenoitOzell + BenoitOzell 13–24 Les encyclopédies numériques contiennent aujourd’hui de vastes inventaires de formes d’écritures pour des noms de personnes, de lieux, de produits ou d’organisation. Nous présentons un système hybride de détection d’entités nommées qui combine un classifieur à base de Champs Conditionnel Aléatoires avec un ensemble de motifs de détection extraits automatiquement d’un contenu encyclopédique. Nous proposons d’extraire depuis des éditions en plusieurs langues de l’encyclopédie Wikipédia de grandes quantités de formes d’écriture que nous utilisons en tant que motifs de détection des entités nommées. Nous décrivons une méthode qui nous assure de ne conserver dans cette ressources que des formes non ambiguës susceptibles de venir renforcer un système de détection d’entités nommées automatique. Nous procédons à un ensemble d’expériences qui nous permettent de comparer un système d’étiquetage à base de CRF avec un système utilisant exclusivement des motifs de détection. Puis nous fusionnons les résultats des deux systèmes et montrons qu’un gain de performances est obtenu grâce à cette proposition. 2011.jeptalnrecital-long.2 @@ -105,7 +105,7 @@ Utilisation d’un score de qualité de traduction pour le résumé multi-document cross-lingue (Using translation quality scores for cross-language multi-document summarization) StéphaneHuet FlorianBoudin - Juan-ManuelTorres-Moreno + Juan-ManuelTorres-Moreno 49–58 Le résumé automatique cross-lingue consiste à générer un résumé rédigé dans une langue différente de celle utilisée dans les documents sources. Dans cet article, nous proposons une approche de résumé automatique multi-document, basée sur une représentation par graphe, qui prend en compte des scores de qualité de traduction lors du processus de sélection des phrases. Nous évaluons notre méthode sur un sous-ensemble manuellement traduit des données utilisées lors de la campagne d’évaluation internationale DUC 2004. Les résultats expérimentaux indiquent que notre approche permet d’améliorer la lisibilité des résumés générés, sans pour autant dégrader leur informativité. 2011.jeptalnrecital-long.5 @@ -117,8 +117,8 @@ CyrilGrouin LouiseDeléger BrunoCartoni - SophieRosset - PierreZweigenbaum + SophieRosset + PierreZweigenbaum 59–70 Pourtant essentiel pour appréhender rapidement et globalement l’état de santé des patients, l’accès aux informations médicales liées aux prescriptions médicamenteuses et aux concepts médicaux par les outils informatiques se révèle particulièrement difficile. Ces informations sont en effet généralement rédigées en texte libre dans les comptes rendus hospitaliers et nécessitent le développement de techniques dédiées. Cet article présente les stratégies mises en oeuvre pour extraire les prescriptions médicales et les concepts médicaux dans des comptes rendus hospitaliers rédigés en anglais. Nos systèmes, fondés sur des approches à base de règles et d’apprentissage automatique, obtiennent une F1-mesure globale de 0,773 dans l’extraction des prescriptions médicales et dans le repérage et le typage des concepts médicaux. 2011.jeptalnrecital-long.6 @@ -128,8 +128,8 @@ Comparaison et combinaison d’approches pour la portabilité vers une nouvelle langue d’un système de compréhension de l’oral (Comparison and combination of approaches for the portability to a new language of an oral comprehension system) BassamJabaian - LaurentBesacier - FabriceLefèvre + LaurentBesacier + FabriceLefèvre 71–82 Dans cet article, nous proposons plusieurs approches pour la portabilité du module de compréhension de la parole (SLU) d’un système de dialogue d’une langue vers une autre. On montre que l’utilisation des traductions automatiques statistiques (SMT) aide à réduire le temps et le cout de la portabilité d’un tel système d’une langue source vers une langue cible. Pour la tache d’étiquetage sémantique on propose d’utiliser soit les champs aléatoires conditionnels (CRF), soit l’approche à base de séquences (PH-SMT). Les résultats expérimentaux montrent l’efficacité des méthodes proposées pour une portabilité rapide du SLU vers une nouvelle langue. On propose aussi deux méthodes pour accroître la robustesse du SLU aux erreurs de traduction. Enfin on montre que la combinaison de ces approches réduit les erreurs du système. Ces travaux sont motivés par la disponibilité du corpus MEDIA français et de la traduction manuelle vers l’italien d’une sous partie de ce corpus. 2011.jeptalnrecital-long.7 @@ -141,7 +141,7 @@ ThierryBazillon BenjaminMaza MickaelRouvier - FrédéricBéchet + FrédéricBéchet AlexisNasr 83–93 La fouille de données orales est un domaine de recherche visant à caractériser un flux audio contenant de la parole d’un ou plusieurs locuteurs, à l’aide de descripteurs liés à la forme et au contenu du signal. Outre la transcription automatique en mots des paroles prononcées, des informations sur le type de flux audio traité ainsi que sur le rôle et l’identité des locuteurs sont également cruciales pour permettre des requêtes complexes telles que : « chercher des débats sur le thème X », « trouver toutes les interviews de Y », etc. Dans ce cadre, et en traitant des conversations enregistrées lors d’émissions de radio ou de télévision, nous étudions la manière dont les locuteurs expriment des questions dans les conversations, en partant de l’intuition initiale que la forme des questions posées est une signature du rôle du locuteur dans la conversation (présentateur, invité, auditeur, etc.). En proposant une classification du type des questions et en utilisant ces informations en complément des descripteurs généralement utilisés dans la littérature pour classer les locuteurs par rôle, nous espérons améliorer l’étape de classification, et valider par la même occasion notre intuition initiale. @@ -162,9 +162,9 @@ Extraction de patrons sémantiques appliquée à la classification d’Entités Nommées (Extraction of semantic patterns applied to the classification of named entities) - IsmaïlEl Maarouf - JeanneVillaneau - SophieRosset + IsmaïlEl Maarouf + JeanneVillaneau + SophieRosset 106–116 La variabilité des corpus constitue un problème majeur pour les systèmes de reconnaissance d’entités nommées. L’une des pistes possibles pour y remédier est l’utilisation d’approches linguistiques pour les adapter à de nouveaux contextes : la construction de patrons sémantiques peut permettre de désambiguïser les entités nommées en structurant leur environnement syntaxico-sémantique. Cet article présente une première réalisation sur un corpus de presse d’un système de correction. Après une étape de segmentation sur des critères discursifs de surface, le système extrait et pondère les patrons liés à une classe d’entité nommée fournie par un analyseur. Malgré des modèles encore relativement élémentaires, les résultats obtenus sont encourageants et montrent la nécessité d’un traitement plus approfondi de la classe Organisation. 2011.jeptalnrecital-long.10 @@ -174,7 +174,7 @@ Désambiguïsation lexicale par propagation de mesures sémantiques locales par algorithmes à colonies de fourmis (Lexical disambiguation by propagation of local semantic measures using ant colony algorithms) DidierSchwab - JérômeGoulian + JérômeGoulian NathanGuillaume 117–128 Effectuer une tâche de désambiguïsation lexicale peut permettre d’améliorer de nombreuses applications du traitement automatique des langues comme l’extraction d’informations multilingues, ou la traduction automatique. Schématiquement, il s’agit de choisir quel est le sens le plus approprié pour chaque mot d’un texte. Une des approches classiques consiste à estimer la proximité sémantique qui existe entre deux sens de mots puis de l’étendre à l’ensemble du texte. La méthode la plus directe donne un score à toutes les paires de sens de mots puis choisit la chaîne de sens qui a le meilleur score. La complexité de cet algorithme est exponentielle et le contexte qu’il est calculatoirement possible d’utiliser s’en trouve réduit. Il ne s’agit donc pas d’une solution viable. Dans cet article, nous nous intéressons à une autre méthode, l’adaptation d’un algorithme à colonies de fourmis. Nous présentons ses caractéristiques et montrons qu’il permet de propager à un niveau global les résultats des algorithmes locaux et de tenir compte d’un contexte plus long et plus approprié en un temps raisonnable. @@ -184,10 +184,10 @@ Un turc mécanique pour les ressources linguistiques : critique de la myriadisation du travail parcellisé (<fixed-case>M</fixed-case>echanical <fixed-case>T</fixed-case>urk for linguistic resources: review of the crowdsourcing of parceled work) - BenoîtSagot + BenoîtSagot KarënFort - GillesAdda - JosephMariani + GillesAdda + JosephMariani BernardLang 129–140 Cet article est une prise de position concernant les plate-formes de type Amazon Mechanical Turk, dont l’utilisation est en plein essor depuis quelques années dans le traitement automatique des langues. Ces plateformes de travail en ligne permettent, selon le discours qui prévaut dans les articles du domaine, de faire développer toutes sortes de ressources linguistiques de qualité, pour un prix imbattable et en un temps très réduit, par des gens pour qui il s’agit d’un passe-temps. Nous allons ici démontrer que la situation est loin d’être aussi idéale, que ce soit sur le plan de la qualité, du prix, du statut des travailleurs ou de l’éthique. Nous rappellerons ensuite les solutions alternatives déjà existantes ou proposées. Notre but est ici double : informer les chercheurs, afin qu’ils fassent leur choix en toute connaissance de cause, et proposer des solutions pratiques et organisationnelles pour améliorer le développement de nouvelles ressources linguistiques en limitant les risques de dérives éthiques et légales, sans que cela se fasse au prix de leur coût ou de leur qualité. @@ -197,8 +197,8 @@ Degré de comparabilité, extraction lexicale bilingue et recherche d’information interlingue (Degree of comparability, bilingual lexical extraction and cross-language information retrieval) - BoLi - EricGaussier + BoLi + EricGaussier EmmanuelMorin AmirHazem 141–152 @@ -220,7 +220,7 @@ Comparaison d’une approche miroir et d’une approche distributionnelle pour l’extraction de mots sémantiquement reliés (Comparing a mirror approach and a distributional approach for extracting semantically related words) PhilippeMuller - PhilippeLanglais + PhilippeLanglais 165–176 Dans (Muller & Langlais, 2010), nous avons comparé une approche distributionnelle et une variante de l’approche miroir proposée par Dyvik (2002) sur une tâche d’extraction de synonymes à partir d’un corpus en français. Nous présentons ici une analyse plus fine des relations extraites automatiquement en nous intéressant cette fois-ci à la langue anglaise pour laquelle de plus amples ressources sont disponibles. Différentes façons d’évaluer notre approche corroborent le fait que l’approche miroir se comporte globalement mieux que l’approche distributionnelle décrite dans (Lin, 1998), une approche de référence dans le domaine. 2011.jeptalnrecital-long.15 @@ -284,7 +284,7 @@ Identifier la cible d’un passage d’opinion dans un corpus multithématique (Identifying the target of an opinion transition in a thematic corpus) MatthieuVernier LauraMonceaux - BéatriceDaille + BéatriceDaille 234–245 L’identification de la cible d’une d’opinion fait l’objet d’une attention récente en fouille d’opinion. Les méthodes existantes ont été testées sur des corpus monothématiques en anglais. Elles permettent principalement de traiter les cas où la cible se situe dans la même phrase que l’opinion. Dans cet article, nous abordons cette problématique pour le français dans un corpus multithématique et nous présentons une nouvelle méthode pour identifier la cible d’une opinion apparaissant hors du contexte phrastique. L’évaluation de la méthode montre une amélioration des résultats par rapport à l’existant. 2011.jeptalnrecital-long.21 @@ -293,7 +293,7 @@ Intégrer des connaissances linguistiques dans un <fixed-case>CRF</fixed-case> : application à l’apprentissage d’un segmenteur-étiqueteur du français (Integrating linguistic knowledge in a <fixed-case>CRF</fixed-case>: application to learning a segmenter-tagger of <fixed-case>F</fixed-case>rench) - MatthieuConstant + MatthieuConstant IsabelleTellier DenysDuchier YoannDupont @@ -308,7 +308,7 @@ Segmentation et induction de lexique non-supervisées du mandarin (Unsupervised segmentation and induction of mandarin lexicon) PierreMagistry - BenoîtSagot + BenoîtSagot 258–269 Pour la plupart des langues utilisant l’alphabet latin, le découpage d’un texte selon les espaces et les symboles de ponctuation est une bonne approximation d’un découpage en unités lexicales. Bien que cette approximation cache de nombreuses difficultés, elles sont sans comparaison avec celles que l’on rencontre lorsque l’on veut traiter des langues qui, comme le chinois mandarin, n’utilisent pas l’espace. Un grand nombre de systèmes de segmentation ont été proposés parmi lesquels certains adoptent une approche non-supervisée motivée linguistiquement. Cependant les méthodes d’évaluation communément utilisées ne rendent pas compte de toutes les propriétés de tels systèmes. Dans cet article, nous montrons qu’un modèle simple qui repose sur une reformulation en termes d’entropie d’une hypothèse indépendante de la langue énoncée par Harris (1955), permet de segmenter un corpus et d’en extraire un lexique. Testé sur le corpus de l’Academia Sinica, notre système permet l’induction d’une segmentation et d’un lexique qui ont de bonnes propriétés intrinsèques et dont les caractéristiques sont similaires à celles du lexique sous-jacent au corpus segmenté manuellement. De plus, on constate une certaine corrélation entre les résultats du modèle de segmentation et les structures syntaxiques fournies par une sous-partie arborée corpus. 2011.jeptalnrecital-long.23 @@ -338,8 +338,8 @@ Modèles génératif et discriminant en analyse syntaxique : expériences sur le corpus arboré de <fixed-case>P</fixed-case>aris 7 (Generative and discriminative models in parsing: experiments on the <fixed-case>P</fixed-case>aris 7 Treebank) - JosephLe Roux - BenoîtFavre + JosephLe Roux + BenoîtFavre SeyedAbolghasem Mirroshandel AlexisNasr 294–305 @@ -361,7 +361,7 @@ Enrichissement de structures en dépendances par réécriture de graphes (Dependency structure enrichment using graph rewriting) - GuillaumeBonfante + GuillaumeBonfante BrunoGuillaume MathieuMorey GuyPerrier @@ -374,7 +374,7 @@ Classification en polarité de sentiments avec une représentation textuelle à base de sous-graphes d’arbres de dépendances (Sentiment polarity classification using a textual representation based on subgraphs of dependency trees) AlexanderPak - PatrickParoubek + PatrickParoubek 329–339 Les approches classiques à base de n-grammes en analyse supervisée de sentiments ne peuvent pas correctement identifier les expressions complexes de sentiments à cause de la perte d’information induite par l’approche « sac de mots » utilisée pour représenter les textes. Dans notre approche, nous avons recours à des sous-graphes extraits des graphes de dépendances syntaxiques comme traits pour la classification de sentiments. Nous représentons un texte par un vecteur composé de ces sous-graphes syntaxiques et nous employons un classifieurs SVM état-de-l’art pour identifier la polarité d’un texte. Nos évaluations expérimentales sur des critiques de jeux vidéo montrent que notre approche à base de sous-graphes est meilleure que les approches standard à modèles « sac de mots » et n-grammes. Dans cet article nous avons travaillé sur le français, mais notre approche peut facilement être adaptée à d’autres langues. 2011.jeptalnrecital-long.29 @@ -423,7 +423,7 @@ <<fixed-case>T</fixed-case>ext<fixed-case>C</fixed-case>oop>: un analyseur de discours basé sur les grammaires logiques (<<fixed-case>T</fixed-case>ext<fixed-case>C</fixed-case>oop>: a discourse analyzer based on logical grammars) - PatrickSaint-Dizier + PatrickSaint-Dizier 388–399 Dans ce document, nous présentons les principales caractéristiques de <TextCoop>, un environnement basé sur les grammaires logiques dédié à l’analyse de structures discursives. Nous étudions en particulier le langage DisLog qui fixe la structure des règles et des spécifications qui les accompagnent. Nous présentons la structure du moteur de <TextCoop> en indiquant au fur et à mesure du texte l’état du travail, les performances et les orientations en particulier en matière d’environnement, d’aide à l’écriture de règles et de développement applicatif. 2011.jeptalnrecital-long.34 @@ -493,7 +493,7 @@ Evaluation de la détection des émotions, des opinions ou des sentiments : dictature de la majorité ou respect de la diversité d’opinions ? (Evaluation of the detection of emotions, opinions or sentiments: majority dictatorship or respect for opinion diversity?) Jean-YvesAntoine MarcLe Tallec - JeanneVillaneau + JeanneVillaneau 1–6 Détection d’émotion, fouille d’opinion et analyse des sentiments sont généralement évalués par comparaison des réponses du système concerné par rapport à celles contenues dans un corpus de référence. Les questions posées dans cet article concernent à la fois la définition de la référence et la fiabilité des métriques les plus fréquemment utilisées pour cette comparaison. Les expérimentations menées pour évaluer le système de détection d’émotions EmoLogus servent de base de réflexion pour ces deux problèmes. L’analyse des résultats d’EmoLogus et la comparaison entre les différentes métriques remettent en cause le choix du vote majoritaire comme référence. Par ailleurs elles montrent également la nécessité de recourir à des outils statistiques plus évolués que ceux généralement utilisés pour obtenir des évaluations fiables de systèmes qui travaillent sur des données intrinsèquement subjectives et incertaines. 2011.jeptalnrecital-court.1 @@ -521,8 +521,8 @@ Coopération de méthodes statistiques et symboliques pour l’adaptation non-supervisée d’un système d’étiquetage en entités nommées (Statistical and symbolic methods cooperation for the unsupervised adaptation of a named entity recognition system) - FrédéricBéchet - BenoîtSagot + FrédéricBéchet + BenoîtSagot RosaStern 19–24 La détection et le typage des entités nommées sont des tâches pour lesquelles ont été développés à la fois des systèmes symboliques et probabilistes. Nous présentons les résultats d’une expérience visant à faire interagir le système à base de règles NP, développé sur des corpus provenant de l’AFP, intégrant la base d’entités Aleda et qui a une bonne précision, et le système LIANE, entraîné sur des transcriptions de l’oral provenant du corpus ESTER et qui a un bon rappel. Nous montrons qu’on peut adapter à un nouveau type de corpus, de manière non supervisée, un système probabiliste tel que LIANE grâce à des corpus volumineux annotés automatiquement par NP. Cette adaptation ne nécessite aucune annotation manuelle supplémentaire et illustre la complémentarité des méthodes numériques et symboliques pour la résolution de tâches linguistiques. @@ -532,7 +532,7 @@ Création de clusters sémantiques dans des familles morphologiques à partir du <fixed-case>TLF</fixed-case>i (Creating semantic clusters in morphological families from the <fixed-case>TLF</fixed-case>i) - NuriaGala + NuriaGala NabilHathout AlexisNasr VéroniqueRey @@ -547,7 +547,7 @@ Génération automatique de questions à partir de textes en français (Automatic generation of questions from texts in <fixed-case>F</fixed-case>rench) Louisde Viron DelphineBernhard - VéroniqueMoriceau + VéroniqueMoriceau XavierTannier 31–36 Nous présentons dans cet article un générateur automatique de questions pour le français. Le système de génération procède par transformation de phrases déclaratives en interrogatives et se base sur une analyse syntaxique préalable de la phrase de base. Nous détaillons les différents types de questions générées. Nous présentons également une évaluation de l’outil, qui démontre que 41 % des questions générées par le système sont parfaitement bien formées. @@ -595,7 +595,7 @@ Alignement automatique pour la compréhension littérale de l’oral par approche segmentale (Automatic alignment for the literal oral understanding using a segmental approach) StéphaneHuet - FabriceLefèvre + FabriceLefèvre 55–60 Les approches statistiques les plus performantes actuellement pour la compréhension automatique du langage naturel nécessitent une annotation segmentale des données d’entraînement. Nous étudions dans cet article une alternative permettant d’obtenir de façon non-supervisée un alignement segmental d’unités conceptuelles sur les mots. L’impact de l’alignement automatique sur les performances du système de compréhension est évalué sur une tâche de dialogue oral. 2011.jeptalnrecital-court.10 @@ -606,7 +606,7 @@ Ajout d’informations contextuelles pour la recherche de passages au sein de Wikipédia (Integrating contextual information for passage retrieval in <fixed-case>W</fixed-case>ikipedia) RomainDeveaud EricSanjuan - PatriceBellot + PatriceBellot 61–66 La recherche de passages consiste à extraire uniquement des passages pertinents par rapport à une requête utilisateur plutôt qu’un ensemble de documents entiers. Cette récupération de passages est souvent handicapée par le manque d’informations complémentaires concernant le contexte de la recherche initiée par l’utilisateur. Des études montrent que l’ajout d’informations contextuelles par l’utilisateur peut améliorer les performances des systèmes de recherche de passages. Nous confirmons ces observations dans cet article, et nous introduisons également une méthode d’enrichissement de la requête à partir d’informations contextuelles issues de documents encyclopédiques. Nous menons des expérimentations en utilisant la collection et les méthodes d’évaluation proposées par la campagne INEX. Les résultats obtenus montrent que l’ajout d’informations contextuelles permet d’améliorer significativement les performances de notre système de recherche de passages. Nous observons également que notre approche automatique obtient les meilleurs résultats parmi les différentes approches que nous évaluons. 2011.jeptalnrecital-court.11 @@ -616,7 +616,7 @@ Construction d’un lexique des adjectifs dénominaux (Construction of a lexicon of denominal adjectives) JanaStrnadová - BenoîtSagot + BenoîtSagot 67–72 Après une brève analyse linguistique des adjectifs dénominaux en français, nous décrivons le processus automatique que nous avons mis en place à partir de lexiques et de corpus volumineux pour construire un lexique d’adjectifs dénominaux dérivés de manière régulière. Nous estimons à la fois la précision et la couverture du lexique dérivationnel obtenu. À terme, ce lexique librement disponible aura été validé manuellement et contiendra également les adjectifs dénominaux à base supplétive. 2011.jeptalnrecital-court.12 @@ -625,7 +625,7 @@ Développement de ressources pour le persan : <fixed-case>P</fixed-case>er<fixed-case>L</fixed-case>ex 2, nouveau lexique morphologique et <fixed-case>ME</fixed-case>ltfa, étiqueteur morphosyntaxique (Development of resources for <fixed-case>P</fixed-case>ersian: <fixed-case>P</fixed-case>er<fixed-case>L</fixed-case>ex 2, a new morphological lexicon and <fixed-case>ME</fixed-case>ltfa, a morphosyntactic tagger) - BenoîtSagot + BenoîtSagot GéraldineWalther PegahFaghiri PolletSamvelian @@ -638,7 +638,7 @@ Identification de cognats à partir de corpus parallèles français-roumain (Identification of cognates from <fixed-case>F</fixed-case>rench-<fixed-case>R</fixed-case>omanian parallel corpora) MirabelaNavlea - AmaliaTodiraşcu + AmaliaTodiraşcu 79–84 Cet article présente une méthode hybride d’identification de cognats français - roumain. Cette méthode exploite des corpus parallèles alignés au niveau propositionnel, lemmatisés et étiquetés (avec des propriétés morphosyntaxiques). Notre méthode combine des techniques statistiques et des informations linguistiques pour améliorer les résultats obtenus. Nous évaluons le module d’identification de cognats et nous faisons une comparaison avec des méthodes statistiques pures, afin d’étudier l’impact des informations linguistiques utilisées sur la qualité des résultats obtenus. Nous montrons que l’utilisation des informations linguistiques augmente significativement la performance de la méthode. 2011.jeptalnrecital-court.14 @@ -668,7 +668,7 @@ Le corpus <fixed-case>T</fixed-case>ext+<fixed-case>B</fixed-case>erg Une ressource parallèle alpin français-allemand (The <fixed-case>T</fixed-case>ext+<fixed-case>B</fixed-case>erg Corpus An Alpine <fixed-case>F</fixed-case>rench-<fixed-case>G</fixed-case>erman Parallel Resource) - AnneGöhring + AnneGöhring MartinVolk 97–102 Cet article présente un corpus parallèle français-allemand de plus de 4 millions de mots issu de la numérisation d’un corpus alpin multilingue. Ce corpus est une précieuse ressource pour de nombreuses études de linguistique comparée et du patrimoine culturel ainsi que pour le développement d’un système statistique de traduction automatique dans un domaine spécifique. Nous avons annoté un échantillon de ce corpus parallèle et aligné les structures arborées au niveau des mots, des constituants et des phrases. Cet “alpine treebank” est le premier corpus arboré parallèle français-allemand de haute qualité (manuellement contrôlé), de libre accès et dans un domaine et un genre nouveau : le récit d’alpinisme. @@ -697,7 +697,7 @@ <fixed-case>F</fixed-case>re<fixed-case>D</fixed-case>ist : Construction automatique d’un thésaurus distributionnel pour le Français (<fixed-case>F</fixed-case>re<fixed-case>D</fixed-case>ist : Automatic construction of distributional thesauri for <fixed-case>F</fixed-case>rench) - EnriqueHenestroza Anguiano + EnriqueHenestroza Anguiano PascalDenis 116–121 Dans cet article, nous présentons FreDist, un logiciel libre pour la construction automatique de thésaurus distributionnels à partir de corpus de texte, ainsi qu’une évaluation des différents ressources ainsi produites. Suivant les travaux de (Lin, 1998) et (Curran, 2004), nous utilisons un corpus journalistique de grande taille et implémentons différentes options pour : le type de relation contexte lexical, la fonction de poids, et la fonction de mesure de similarité. Prenant l’EuroWordNet français et le WOLF comme références, notre évaluation révèle, de manière originale, que c’est l’approche qui combine contextes linéaires (ici, de type bigrammes) et contextes syntaxiques qui semble fournir le meilleur thésaurus. Enfin, nous espérons que notre logiciel, distribué avec nos meilleurs thésaurus pour le français, seront utiles à la communauté TAL. @@ -750,7 +750,7 @@ Attribution de rôles sémantiques aux actants des lexies verbales (Assigning semantic roles to actants of verbal lexical units) FadilaHadouche GuyLapalme - Marie-ClaudeL’Homme + Marie-ClaudeL’Homme 146–151 Dans cet article, nous traitons de l’attribution des rôles sémantiques aux actants de lexies verbales en corpus spécialisé en français. Nous proposons une classification de rôles sémantiques par apprentissage machine basée sur un corpus de lexies verbales annotées manuellement du domaine de l’informatique et d’Internet. Nous proposons également une méthode de partitionnement semi-supervisé pour prendre en compte l’annotation de nouvelles lexies ou de nouveaux rôles sémantiques et de les intégrés dans le système. Cette méthode de partitionnement permet de regrouper les instances d’actants selon les valeurs communes correspondantes aux traits de description des actants dans des groupes d’instances d’actants similaires. La classification de rôles sémantique a obtenu une F-mesure de 93% pour Patient, de 90% pour Agent, de 85% pour Destination et de 76% pour les autres rôles pris ensemble. Quand au partitionnement en regroupant les instances selon leur similarité donne une F-mesure de 88% pour Patient, de 81% pour Agent, de 58% pour Destination et de 46% pour les autres rôles. 2011.jeptalnrecital-court.25 @@ -769,7 +769,7 @@ Un calcul de termes typés pour la pragmatique lexicale: chemins et voyageurs fictifs dans un corpus de récits de voyage (A calculation of typed terms for lexical pragmatics: paths and fictional travellers in a travel stories corpus) RichardMoot - LaurentPrévot + LaurentPrévot ChristianRetoré 158–163 Ce travail s’inscrit dans l’analyse automatique d’un corpus de récits de voyage. À cette fin, nous raffinons la sémantique de Montague pour rendre compte des phénomènes d’adaptation du sens des mots au contexte dans lequel ils apparaissent. Ici, nous modélisons les constructions de type ‘le chemin descend pendant une demi-heure’ où ledit chemin introduit un voyageur fictif qui le parcourt, en étendant des idées que le dernier auteur a développé avec Bassac et Mery. Cette introduction du voyageur utilise la montée de type afin que le quantificateur introduisant le voyageur porte sur toute la phrase et que les propriétés du chemin ne deviennent pas des propriétés du voyageur, fût-il fictif. Cette analyse sémantique (ou plutôt sa traduction en lambda-DRT) est d’ores et déjà implantée pour une partie du lexique de Grail. @@ -792,7 +792,7 @@ Mesure non-supervisée du degré d’appartenance d’une entité à un type (An unsupervised measure of the degree of belonging of an entity to a type) LudovicBonnefoy - PatriceBellot + PatriceBellot MichelBenoit 170–175 La recherche d’entités nommées a été le sujet de nombreux travaux. Cependant, la construction des ressources nécessaires à de tels systèmes reste un problème majeur. Dans ce papier, nous proposons une méthode complémentaire aux outils capables de reconnaître des entités de types larges, dont l’objectif est de déterminer si une entité est d’un type donné, et ce de manière non-supervisée et quel que soit le type. Nous proposons pour cela une approche basée sur la comparaison de modèles de langage estimés à partir du Web. L’intérêt de notre approche est validé par une évaluation sur 100 entités et 273 types différents. @@ -852,7 +852,7 @@ Règles et paradigmes en morphologie informatique lexématique (Rules and paradigms in lexematic computer morphology) NabilHathout - FiammettaNamer + FiammettaNamer 206–211 Les familles de mots produites par deux analyseurs morphologiques, DériF (basé sur des règles) et Morphonette (basé sur l’analogie), appliqués à un même corpus lexical, sont comparées. Cette comparaison conduit à l’examen de trois sous-ensembles : - un sous-ensemble commun aux deux systèmes dont la taille montre que, malgré leurs différences, les approches expérimentées par chaque système sont valides et décrivent en partie la même réalité morphologique. - un sous-ensemble propre à DériF et un autre à Morphonette. Ces ensembles (a) nous renseignent sur les caractéristiques propres à chaque système, et notamment sur ce que l’autre ne peut pas produire, (b) ils mettent en évidence les erreurs d’un système, en ce qu’elles n’apparaissent pas dans l’autre, (c) ils font apparaître certaines limites de la description, notamment celles qui sont liées aux objets et aux notions théoriques comme les familles morphologiques, les bases, l’existence de RCL « transversales » entre les lexèmes qui n’ont pas de relation d’ascendance ou de descendance. 2011.jeptalnrecital-court.35 @@ -882,8 +882,8 @@ Exploitation d’un corpus arboré pour non spécialistes par des requêtes guidées et des requêtes sémantiques (Exploiting a Treebank for non-specialists by guided queries and semantic queries) AchilleFalaise - AgnèsTutin - OlivierKraif + AgnèsTutin + OlivierKraif 224–229 L’exploitation de corpus analysés syntaxiquement (ou corpus arborés) pour le public non spécialiste n’est pas un problème trivial. Si la communauté du TAL souhaite mettre à la disposition des chercheurs non-informaticiens des corpus comportant des annotations linguistiques complexes, elle doit impérativement développer des interfaces simples à manipuler mais permettant des recherches fines. Dans cette communication, nous présentons les modes de recherche « grand public » développé(e)s dans le cadre du projet Scientext, qui met à disposition un corpus d’écrits scientifiques interrogeable par partie textuelle, par partie du discours et par fonction syntaxique. Les modes simples sont décrits : un mode libre et guidé, où l’utilisateur sélectionne lui-même les éléments de la requête, et un mode sémantique, qui comporte des grammaires locales préétablies à l’aide des fonctions syntaxiques. 2011.jeptalnrecital-court.38 @@ -893,7 +893,7 @@ Communautés <fixed-case>I</fixed-case>nternet comme sources de préterminologie (<fixed-case>I</fixed-case>nternet communities as sources of preterminology) MohammadDaoud - ChristianBoitet + ChristianBoitet 230–235 Cet article décrit deux expériences sur la construction de ressources terminologiques multilingues (preterminologies) préliminaires, mais grandes, grâce à des communautés Internet, et s’appuie sur ces expériences pour cibler des données terminologiques plus raffinées venant de communautés Internet et d’applications Web 2.0. La première expérience est une passerelle de contribution pour le site Web de la Route de la Soie numérique (DSR). Les visiteurs contribuent en effet à un référentiel lexical multilingue dédié, pendant qu’ils visitent et lisent les livres archivés, parce qu’ils sont intéressés par le domaine et ont tendance à être polygottes. Nous avons recueilli 1400 contributions lexicales en 4 mois. La seconde expérience est basée sur le JeuxDeMots arabe, où les joueurs en ligne contribuent à un réseau lexical arabe. L’expérience a entraîné une croissance régulière du nombre de joueurs et de contributions, ces dernières contenant des termes absents et des mots de dialectes oraux. 2011.jeptalnrecital-court.39 @@ -927,7 +927,7 @@ La traduction automatique des séquences clitiques dans un traducteur à base de règles (Automatic translation clitic sequences in a rule-based <fixed-case>MT</fixed-case> system) LorenzaRusso - ÉricWehrli + ÉricWehrli 248–253 Dans cet article, nous discutons la méthodologie utilisée par Its-2, un système de traduction à base de règles, pour la traduction des pronoms clitiques. En particulier, nous nous focalisons sur les séquences clitiques, pour la traduction automatique entre le français et l’anglais. Une évaluation basée sur un corpus de phrases construites montre le potentiel de notre approche pour des traductions de bonne qualité. 2011.jeptalnrecital-court.42 @@ -941,7 +941,7 @@ Jean-PhilippeGoldman SharidLoáiciga LukaNerima - ÉricWehrli + ÉricWehrli 254–259 Ce travail décrit la distribution des pronoms selon le style de texte (littéraire ou journalistique) et selon la langue (français, anglais, allemand et italien). Sur la base d’un étiquetage morpho-syntaxique effectué automatiquement puis vérifié manuellement, nous pouvons constater que la proportion des différents types de pronoms varie selon le type de texte et selon la langue. Nous discutons les catégories les plus ambiguës de manière détaillée. Comme nous avons utilisé l’analyseur syntaxique Fips pour l’étiquetage des pronoms, nous l’avons également évalué et obtenu une précision moyenne de plus de 95%. 2011.jeptalnrecital-court.43 @@ -955,7 +955,7 @@ Jean-PhilippeGoldman SharidLoáiciga LukaNerima - ÉricWehrli + ÉricWehrli 260–265 Dans cette étude, notre système de traduction automatique, Its-2, a fait l’objet d’une évaluation manuelle de la traduction des pronoms pour cinq paires de langues et sur deux corpus : un corpus littéraire et un corpus de communiqués de presse. Les résultats montrent que les pourcentages d’erreurs peuvent atteindre 60% selon la paire de langues et le corpus. Nous discutons ainsi deux pistes de recherche pour l’amélioration des performances de Its-2 : la résolution des ambiguïtés d’analyse et la résolution des anaphores pronominales. 2011.jeptalnrecital-court.44 @@ -985,9 +985,9 @@ CarolineHagège DenysProux QuentinGicquel - StéfanDarmoni + StéfanDarmoni SuzannePereira - FrédériqueSegond + FrédériqueSegond Marie-HelèneMetzger 278–283 Cet article décrit la première version et les résultats de l’évaluation d’un système de détection des épisodes d’infections associées aux soins. Cette détection est basée sur l’analyse automatique de comptes-rendus d’hospitalisation provenant de différents hôpitaux et différents services. Ces comptes-rendus sont sous forme de texte libre. Le système de détection a été développé à partir d’un analyseur linguistique que nous avons adapté au domaine médical et extrait à partir des documents des indices pouvant conduire à une suspicion d’infection. Un traitement de la négation et un traitement temporel des textes sont effectués permettant de restreindre et de raffiner l’extraction d’indices. Nous décrivons dans cet article le système que nous avons développé et donnons les résultats d’une évaluation préliminaire. @@ -1023,10 +1023,10 @@ <fixed-case>S</fixed-case>pati<fixed-case>A</fixed-case>nn, un outil pour annoter l’utilisation de l’espace dans les corpus vidéo (<fixed-case>S</fixed-case>pati<fixed-case>A</fixed-case>nn, a tool for annotating the use of space in video corpora) - AnneliesBraffort + AnneliesBraffort LaurenceBolot 2–2 - + 2011.jeptalnrecital-demonstration.2 fra braffort-bolot-2011-spatiann @@ -1044,7 +1044,7 @@ Une application de la grammaire structurelle: L’analyseur syntaxique du français <fixed-case>SYGFRAN</fixed-case> (An application of structural grammar: the <fixed-case>SYGFRAN</fixed-case> syntactic analyser) - JacquesChauché + JacquesChauché 4–4 La démonstration présentée produit une analyse syntaxique du français. Elle est écrite en SYGMART, fournie avec les actes, exécutable à l’adresse : http ://www.lirmm.fr/ chauche/ExempleAnl.html et téléchargeable à l’adresse : http ://www.sygtext.fr. 2011.jeptalnrecital-demonstration.4 @@ -1062,26 +1062,26 @@ <fixed-case>TTC</fixed-case> <fixed-case>T</fixed-case>erm<fixed-case>S</fixed-case>uite : une chaîne de traitement pour la fouille terminologique multilingue (<fixed-case>TTC</fixed-case> <fixed-case>T</fixed-case>erm<fixed-case>S</fixed-case>uite: a processing chain for multilingual terminology mining) - BéatriceDaille + BéatriceDaille ChristineJacquin LauraMonceaux EmmanuelMorin JéromeRocheteau 6–6 - + 2011.jeptalnrecital-demonstration.6 fra daille-etal-2011-ttc Une Suite d’interaction de fouille basée sur la compréhension du langage naturel (An Interaction Mining Suite Based On Natural Language Understanding) - RodolfoDelmonte + RodolfoDelmonte VincenzoPallotta VioletaSeretan LammertVrieling DavidWalker 7–7 - + 2011.jeptalnrecital-demonstration.7 fra delmonte-etal-2011-une @@ -1089,9 +1089,9 @@ Démonstration de l’<fixed-case>API</fixed-case> de <fixed-case>NLG</fixed-case>b<fixed-case>A</fixed-case>se (Demonstration of the <fixed-case>NLG</fixed-case>b<fixed-case>A</fixed-case>se <fixed-case>API</fixed-case>) François-XavierDesmarais - ÉricCharton + ÉricCharton 8–8 - + 2011.jeptalnrecital-demonstration.8 fra desmarais-charton-2011-demonstration @@ -1108,21 +1108,21 @@ <fixed-case>R</fixed-case>ef<fixed-case>G</fixed-case>en, outil d’identification automatique des chaînes de référence en français (<fixed-case>R</fixed-case>ef<fixed-case>G</fixed-case>en, an automatic identification tool of reference chains in <fixed-case>F</fixed-case>rench) LaurenceLongo - AmaliaTodirascu + AmaliaTodirascu 10–10 - + 2011.jeptalnrecital-demonstration.10 fra longo-todirascu-2011-refgen Babouk – exploration orientée du web pour la constitution de corpus et de terminologies (Babouk – oriented exploration of the web for the construction of corpora and terminologies) - Clémentde Groc + Clémentde Groc JavierCouto HelenaBlancafort - Claudede Loupy + Claudede Loupy 11–11 - + 2011.jeptalnrecital-demonstration.11 fra de-groc-etal-2011-babouk @@ -1137,10 +1137,10 @@ DelphineBernhard BrunoCartoni BrigitteGrau - SophieRosset - PierreZweigenbaum + SophieRosset + PierreZweigenbaum 12–12 - + 2011.jeptalnrecital-demonstration.12 fra grouin-etal-2011-extraction @@ -1148,9 +1148,9 @@ Système d’analyse catégorielle <fixed-case>ACCG</fixed-case> : adéquation au traitement de problèmes syntaxiques complexes (<fixed-case>ACCG</fixed-case> categorical analysis system: adequacy to the treatment of complex syntactic problems) JuyeonKang - Jean-PierreDesclés + Jean-PierreDesclés 13–13 - + 2011.jeptalnrecital-demonstration.13 fra kang-descles-2011-systeme @@ -1161,9 +1161,9 @@ MickaëlMounier HelenaBlancafort JavierCouto - Claudede Loupy + Claudede Loupy 14–14 - + 2011.jeptalnrecital-demonstration.14 fra ma-etal-2011-lol @@ -1190,11 +1190,11 @@ <fixed-case>EASYTEXT</fixed-case> : un système opérationnel de génération de textes (<fixed-case>EASYTEXT</fixed-case>: an operational system for text generation) - FrédéricMeunier + FrédéricMeunier LaurenceDanlos VanessaCombet 17–17 - + 2011.jeptalnrecital-demonstration.17 fra meunier-etal-2011-easytext @@ -1203,9 +1203,9 @@ Restad : un logiciel d’indexation et de stockage relationnel de contenus <fixed-case>XML</fixed-case> (Restad: an indexing and relational storing software for <fixed-case>XML</fixed-case> content) YoannMoreau EricSanJuan - PatriceBellot + PatriceBellot 18–18 - + 2011.jeptalnrecital-demonstration.18 fra moreau-etal-2011-restad @@ -1256,7 +1256,7 @@ Extraction Automatique d’Informations Pédagogiques Pertinentes à partir de Documents Textuels BoutheinaSmine RimFaiz - Jean-PierreDesclés + Jean-PierreDesclés 12–23 Plusieurs utilisateurs ont souvent besoin d’informations pédagogiques pour les intégrer dans leurs ressources pédagogiques, ou pour les utiliser dans un processus d’apprentissage. Une indexation de ces informations s’avère donc utile en vue d’une extraction des informations pédagogiques pertinentes en réponse à une requête utilisateur. La plupart des systèmes d’extraction d’informations pédagogiques existants proposent une indexation basée sur une annotation manuelle ou semi-automatique des informations pédagogiques, tâche qui n’est pas préférée par les utilisateurs. Dans cet article, nous proposons une approche d’indexation d’objets pédagogiques (Définition, Exemple, Exercice, etc.) basée sur une annotation sémantique par Exploration Contextuelle des documents. L’index généré servira à une extraction des objets pertinents répondant à une requête utilisateur sémantique. Nous procédons, ensuite, à un classement des objets extraits selon leur pertinence en utilisant l’algorithme Rocchio. Notre objectif est de mettre en valeur une indexation à partir de contextes sémantiques et non pas à partir de seuls termes linguistiques. 2011.jeptalnrecital-recital.2 @@ -1283,7 +1283,7 @@ Alignment of Monolingual Corpus by Reduction of the Search Space - PrajolShrestha + PrajolShrestha 48–56 Monolingual comparable corpora annotated with alignments between text segments (paragraphs, sentences, etc.) based on similarity are used in a wide range of natural language processing applications like plagiarism detection, information retrieval, summarization and so on. The drawback wanting to use them is that there aren’t many standard corpora which are aligned. Due to this drawback, the corpus is manually created, which is a time consuming and costly task. In this paper, we propose a method to significantly reduce the search space for manual alignment of the monolingual comparable corpus which in turn makes the alignment process faster and easier. This method can be used in making alignments on different levels of text segments. Using this method we create our own gold corpus aligned on the level of paragraph, which will be used for testing and building our algorithms for automatic alignment. We also present some experiments for the reduction of search space on the basis of stem overlap, word overlap, and cosine similarity measure which help us automatize the process to some extent and reduce human effort for alignment. 2011.jeptalnrecital-recital.5 @@ -1306,7 +1306,7 @@ Corpus-Based methods for Short Text Similarity - PrajolShrestha + PrajolShrestha 1–6 This paper presents corpus-based methods to find similarity between short text (sentences, paragraphs, ...) which has many applications in the field of NLP. Previous works on this problem have been based on supervised methods or have used external resources such as WordNet, British National Corpus etc. Our methods are focused on unsupervised corpus-based methods. We present a new method, based on Vector Space Model, to capture the contextual behavior, senses and correlation, of terms and show that this method performs better than the baseline method that uses vector based cosine similarity measure. The performance of existing document similarity measures, Dice and Resemblance, are also evaluated which in our knowledge have not been used for short text similarity. We also show that the performance of the vector-based baseline method is improved when using stems instead of words and using the candidate sentences for computing the parameters rather than some external resource. 2011.jeptalnrecital-recitalcourt.1 diff --git a/data/xml/2011.mtsummit.xml b/data/xml/2011.mtsummit.xml index 4542a9f32d..dce8f78ccf 100644 --- a/data/xml/2011.mtsummit.xml +++ b/data/xml/2011.mtsummit.xml @@ -25,7 +25,7 @@ Challenges of Patent <fixed-case>MT</fixed-case> – Term and Structure Translation - Jun’ichiTsujii + Jun’ichiTsujii tsujii-2011-challenges @@ -59,7 +59,7 @@ Training Machine Translation with a Second-Order <fixed-case>T</fixed-case>aylor Approximation of Weighted Translation Instances AaronPhillips - RalfBrown + RalfBrown 2011.mtsummit-papers.2 phillips-brown-2011-training @@ -75,7 +75,7 @@ <fixed-case>POS</fixed-case> Tagging of <fixed-case>E</fixed-case>nglish Particles for Machine Translation JianjunMa - DegenHuang + DegenHuang HaixiaLiu WenfengSheng 2011.mtsummit-papers.4 @@ -85,8 +85,8 @@ Multi-stage <fixed-case>C</fixed-case>hinese Dependency Parsing Based on Dependency Direction WenjingLang QiaoliZhou - GuipingZhang - DongfengCai + GuipingZhang + DongfengCai 2011.mtsummit-papers.5 lang-etal-2011-multi @@ -102,7 +102,7 @@ Phonetic Representation-Based Speech Translation JieJiang ZeeshanAhmed - JulieCarson-Berndsen + JulieCarson-Berndsen PeterCahill AndyWay 2011.mtsummit-papers.7 @@ -112,7 +112,7 @@ Unsupervised Vocabulary Selection for Domain-Independent Simultaneous Lecture Translation PaulMaergner IanLane - AlexWaibel + AlexWaibel 2011.mtsummit-papers.8 maergner-etal-2011-unsupervised-domain @@ -120,7 +120,7 @@ Context-aware Language Modeling for Conversational Speech Translation AvneeshSaluja IanLane - YingZhang + YingZhang 2011.mtsummit-papers.9 saluja-etal-2011-context @@ -129,7 +129,7 @@ QinGao WillLewis ChrisQuirk - Mei-YuhHwang + Mei-YuhHwang 2011.mtsummit-papers.10 gao-etal-2011-incremental @@ -145,8 +145,8 @@ Multi-Strategy Approaches to Active Learning for Statistical Machine Translation VamshiAmbati - StephanVogel - JaimeCarbonell + StephanVogel + JaimeCarbonell 2011.mtsummit-papers.12 ambati-etal-2011-multi @@ -170,8 +170,8 @@ Multimodal Building of Monolingual Dictionaries for Machine Translation by Non-Expert Users - MiquelEsplà-Gomis - Víctor M.Sánchez-Cartagena + MiquelEsplà-Gomis + Víctor M.Sánchez-Cartagena Juan AntonioPérez-Ortiz 2011.mtsummit-papers.15 espla-gomis-etal-2011-multimodal @@ -184,7 +184,7 @@ Qualitative Analysis of Post-Editing for High Quality Machine Translation - FrédéricBlain + FrédéricBlain JeanSenellart HolgerSchwenk MirkoPlitt @@ -194,9 +194,9 @@ Using machine translation in computer-aided translation to suggest the target-side words to change - MiquelEsplà-Gomis + MiquelEsplà-Gomis FelipeSánchez-Martínez - Mikel L.Forcada + Mikel L.Forcada 2011.mtsummit-papers.18 espla-gomis-etal-2011-using @@ -220,9 +220,9 @@ Phrase Segmentation Model using Collocation and Translational Entropy Hyoung-GyuLee - Joo-YoungLee + Joo-YoungLee Min-JeongKim - Hae-ChangRim + Hae-ChangRim Joong-HwiShin Young-SookHwang 2011.mtsummit-papers.21 @@ -239,7 +239,7 @@ Handling Multiword Expressions in Phrase-Based Statistical Machine Translation SantanuPal TanmoyChakraborty - SivajiBandyopadhyay + SivajiBandyopadhyay 2011.mtsummit-papers.23 pal-etal-2011-handling @@ -274,7 +274,7 @@ A Unified and Discriminative Soft Syntactic Constraint Model for Hierarchical Phrase-based Translation LemaoLiu - TiejunZhao + TiejunZhao ChaoWang HailongCao 2011.mtsummit-papers.28 @@ -285,7 +285,7 @@ FeifeiZhai JiajunZhang YuZhou - ChengqingZong + ChengqingZong 2011.mtsummit-papers.29 zhai-etal-2011-simple @@ -311,10 +311,10 @@ Domain Adaptation in Statistical Machine Translation of User-Forum Data using Component Level Mixture Modelling PratyushBanerjee - Sudip KumarNaskar + Sudip KumarNaskar JohannRoturier AndyWay - Josefvan Genabith + Josefvan Genabith 2011.mtsummit-papers.32 banerjee-etal-2011-domain @@ -322,7 +322,7 @@ Bagging-based System Combination for Domain Adaption LinfengSong HaitaoMi - Yajuan + Yajuan QunLiu 2011.mtsummit-papers.33 song-etal-2011-bagging @@ -339,9 +339,9 @@ Statistical Post-Editing for a Statistical <fixed-case>MT</fixed-case> System - HannaBechara + HannaBechara YanjunMa - Josefvan Genabith + Josefvan Genabith 2011.mtsummit-papers.35 bechara-etal-2011-statistical @@ -359,8 +359,8 @@ Searching Translation Memories for Paraphrases MasaoUtiyama GrahamNeubig - TakashiOnishi - EiichiroSumita + TakashiOnishi + EiichiroSumita 2011.mtsummit-papers.37 utiyama-etal-2011-searching @@ -383,7 +383,7 @@ MasamichiIdeue KazuhideYamamoto MasaoUtiyama - EiichiroSumita + EiichiroSumita 2011.mtsummit-papers.40 ideue-etal-2011-comparison @@ -391,14 +391,14 @@ Improving Low-Resource Statistical Machine Translation with a Novel Semantic Word Clustering Algorithm JeffMa SpyrosMatsoukas - RichardSchwartz + RichardSchwartz 2011.mtsummit-papers.41 ma-etal-2011-improving Multi-granularity Word Alignment and Decoding for Agglutinative Language Translation ZhiyangWang - Yajuan + Yajuan QunLiu 2011.mtsummit-papers.42 wang-etal-2011-multi @@ -429,7 +429,7 @@ Generating Virtual Parallel Corpus: A Compatibility Centric Method JiaXu - WeiweiSun + WeiweiSun 2011.mtsummit-papers.46 xu-sun-2011-generating @@ -470,8 +470,8 @@ A Comparison Study of Parsers for Patent Machine Translation IsaoGoto MasaoUtiyama - TakashiOnishi - EiichiroSumita + TakashiOnishi + EiichiroSumita 2011.mtsummit-papers.51 goto-etal-2011-comparison @@ -480,7 +480,7 @@ YifanHe YanjunMa AndyWay - Josefvan Genabith + Josefvan Genabith 2011.mtsummit-papers.52 he-etal-2011-rich @@ -496,20 +496,20 @@ The Cultivation of a <fixed-case>C</fixed-case>hinese-<fixed-case>E</fixed-case>nglish-<fixed-case>J</fixed-case>apanese Trilingual Parallel Corpus from Comparable Patents BinLu Ka PoChow - Benjamin K.Tsou + Benjamin K.Tsou 2011.mtsummit-papers.54 lu-etal-2011-cultivation Evaluation Methodology and Results for <fixed-case>E</fixed-case>nglish-to-<fixed-case>A</fixed-case>rabic <fixed-case>MT</fixed-case> - OlivierHamon - KhalidChoukri + OlivierHamon + KhalidChoukri 2011.mtsummit-papers.55 hamon-choukri-2011-evaluation Example-Based Machine Translation for Low-Resource Language Using Chunk-String Templates - Md. Anwarus SalamKhan + Md. Anwarus SalamKhan SetsuoYamada TetsuroNishino 2011.mtsummit-papers.56 @@ -517,8 +517,8 @@ Improve <fixed-case>SMT</fixed-case> with Source-Side “Topic-Document” Distributions - ZhengxianGong - GuodongZhou + ZhengxianGong + GuodongZhou LiangyouLi 2011.mtsummit-papers.57 gong-etal-2011-improve @@ -537,13 +537,13 @@ LuisaBentivogli MarcelloFederico GiovanniMoretti - MichaelPaul + MichaelPaul 2011.mtsummit-papers.59 bentivogli-etal-2011-getting A Framework for Diagnostic Evaluation of <fixed-case>MT</fixed-case> Based on Linguistic Checkpoints - Sudip KumarNaskar + Sudip KumarNaskar AntonioToral FedericoGaspari AndyWay @@ -553,13 +553,13 @@ Comparative Evaluation of Term Informativeness Measures in Machine Translation Evaluation Metrics BillyWong - ChunyuKit + ChunyuKit 2011.mtsummit-papers.61 wong-kit-2011-comparative System Combination for Machine Translation Based on Text-to-Text Generation - Wei-YunMa + Wei-YunMa KathleenMckeown 2011.mtsummit-papers.62 ma-mckeown-2011-system @@ -567,15 +567,15 @@ Hybrid Machine Translation Guided by a Rule–Based System CristinaEspaña-Bonet - GorkaLabaka - ArantzaDíaz de Ilarraza - LluísMàrquez + GorkaLabaka + ArantzaDíaz de Ilarraza + LluísMàrquez 2011.mtsummit-papers.63 espana-bonet-etal-2011-hybrid Integrating shallow-transfer rules into phrase-based statistical machine translation - Víctor M.Sánchez-Cartagena + Víctor M.Sánchez-Cartagena FelipeSánchez-Martínez Juan AntonioPérez-Ortiz 2011.mtsummit-papers.64 @@ -584,7 +584,7 @@ Hypergraph Training and Decoding of System Combination in <fixed-case>SMT</fixed-case> YupengLiu - TiejunZhao + TiejunZhao ShengLi 2011.mtsummit-papers.65 liu-etal-2011-hypergraph @@ -592,7 +592,7 @@ Study on the Impact Factors of the Translators’ Post-editing Efficiency in a Collaborative Translation Environment NaYe - GuipingZhang + GuipingZhang 2011.mtsummit-papers.66 ye-zhang-2011-study @@ -643,7 +643,7 @@ Broadcast news speech-to-text translation experiments SylvainRaybaud DavidLanglois - KamelSmaïli + KamelSmaïli 2011.mtsummit-systems.3 raybaud-etal-2011-broadcast @@ -658,9 +658,9 @@ <fixed-case>L</fixed-case>ets<fixed-case>MT</fixed-case>!: Cloud-Based Platform for Building User Tailored Machine Translation Engines - AndrejsVasiljevs + AndrejsVasiljevs RaivisSkadinš - JörgTiedemann + JörgTiedemann 2011.mtsummit-systems.5 vasiljevs-etal-2011-letsmt @@ -684,14 +684,14 @@ From the Confidence Estimation of Machine Translation to the Integration of <fixed-case>MT</fixed-case> and Translation Memory YanjunMa YifanHe - Josefvan Genabith + Josefvan Genabith 2011.mtsummit-tutorials.2 In this tutorial, we cover techniques that facilitate the integration of Machine Translation (MT) and Translation Memory (TM), which can help the adoption of MT technology in localisation industry. The tutorial covers four parts: i) brief introduction of MT and TM systems, ii) MT confidence estimation measures tailored for the TM environment, iii) segment-level MT and MT integration, iv) sub-segment level MT and TM integration, and v) human evaluation of MT and TM integration. We will first briefly describe and compare how translations are generated in MT and TM systems, and suggest possible avenues to combines these two systems. We will also cover current quality / cost estimation measures applied in MT and TM systems, such as the fuzzy-match score in the TM, and the evaluation/confidence metrics used to judge MT outputs. We then move on to introduce the recent developments in the field of MT confidence estimation tailored towards predicting post-editing efforts. We will especially focus on the confidence metrics proposed by Specia et al., which is shown to have high correlation with human preference, as well as post-editing time. For segment-level MT and TM integration, we present translation recommendation and translation re-ranking models, where the integration happens at the 1-best or the N-best level, respectively. Given an input to be translated, MT-TM recommendation compares the output from the MT and the TM systems, and presents the better one to the post-editor. MT-TM re-ranking, on the other hand, combines k-best lists from both systems, and generates a new list according to estimated post-editing effort. We observe high precision of these models in automatic and human evaluations, indicating that they can be integrated into TM environments without the risk of deteriorating the quality of the post-editing candidate. For sub-segment level MT and TM integration, we try to reuse high quality TM chunks to improve the quality of MT systems. We can also predict whether phrase pairs derived from fuzzy matches should be used to constrain the translation of an input segment. Using a series of linguistically- motivated features, our constraints lead both to more consistent translation output, and to improved translation quality, as is measured by automatic evaluation scores. Finally, we present several methodologies that can be used to track post-editing effort, perform human evaluation of MT-TM integration, or help translators to access MT outputs in a TM environment. ma-etal-2011-confidence Evaluating the Output of Machine Translation Systems - AlonLavie + AlonLavie 2011.mtsummit-tutorials.3 This half-day tutorial provides a broad overview of how to evaluate translations that are produced by machine translation systems. The range of issues covered includes a broad survey of both human evaluation measures and commonly-used automated metrics, and a review of how these are used for various types of evaluation tasks, such as assessing the translation quality of MT-translated sentences, comparing the performance of alternative MT systems, or measuring the productivity gains of incorporating MT into translation workflows. lavie-2011-evaluating @@ -737,7 +737,7 @@ Feedback Selecting of Manually Acquired Rules Using Automatic Evaluation XianhuaLi - Yajuan + Yajuan YaoMeng QunLiu HaoYu @@ -746,7 +746,7 @@ Investigation for Translation Disambiguation of Verbs in Patent Sentences using Word Grouping - ShoichiYokoyama + ShoichiYokoyama YuichiTakano 2011.mtsummit-wpt.5 yokoyama-takano-2011-investigation @@ -765,8 +765,8 @@ RamonaEnache AdamSlaski AarneRanta - LluísMàrquez - MeritxellGonzàlez + LluísMàrquez + MeritxellGonzàlez 2011.mtsummit-wpt.7 espana-bonet-etal-2011-patent diff --git a/data/xml/2011.tal.xml b/data/xml/2011.tal.xml index fa34c913a8..3fc02ebcc8 100644 --- a/data/xml/2011.tal.xml +++ b/data/xml/2011.tal.xml @@ -3,8 +3,8 @@ Traitement Automatique des Langues, Volume 52, Numéro 1 : Varia [Varia] - ÉricVillemonte de La Clergerie - BéatriceDaille + ÉricVillemonte de La Clergerie + BéatriceDaille YvesLepage FrançoisYvon ATALA (Association pour le Traitement Automatique des Langues) @@ -41,7 +41,7 @@ DenisMaurel NathalieFriburger Jean-YvesAntoine - IrisEshkol-Taravella + IrisEshkol-Taravella DamienNouvel 69–96 2011.tal-1.3 @@ -52,7 +52,7 @@ Identification des assertions dans les textes médicaux : application à la relation patient, problème médical [Identification of assertions in the medical texts: application to the relation patient, medical problem] AmandinePérinet NataliaGrabar - ThierryHamon + ThierryHamon 97–132 2011.tal-1.4 fra @@ -60,8 +60,8 @@ Pour une interlangue utile en traduction automatique de la parole dans des domaines limités [Towards an interlingua for speech translation in limited domains] - PierretteBouillon - MannyRayner + PierretteBouillon + MannyRayner PaulaEstella JohannaGerlach MariaGeorgescul @@ -72,8 +72,8 @@ Comprendre les effets des erreurs d’annotations des plateformes de <fixed-case>TAL</fixed-case>, une étude sur la résolution des anaphores pronominales [Understand the effects of erroneous annotations produced by <fixed-case>NLP</fixed-case> pipelines, a case study on the pronominal anaphora resolution] - DavyWeissenbacher - AdelineNazarenko + DavyWeissenbacher + AdelineNazarenko 161–185 2011.tal-1.6 fra @@ -84,7 +84,7 @@ Traitement Automatique des Langues, Volume 52, Numéro 2 : Vers la morphologie et au-delà [Toward Morphology and beyond] NabilHathout - FiammettaNamer + FiammettaNamer ATALA (Association pour le Traitement Automatique des Langues)
France
2011 @@ -107,7 +107,7 @@ Moranapho: un système multilingue d’analyse morphologique basé sur l’analogie formelle [Moranapho: a multilingual system for morphological analysis based on formal analogy] Jean-FrançoisLavallée - PhilippeLanglais + PhilippeLanglais 17–44 2011.tal-2.2 fra @@ -127,7 +127,7 @@ Modélisation et implémentation de phénomènes flexionnels non canoniques [Modeling and implementing non canonical morphological phenomena] GéraldineWalther - BenoîtSagot + BenoîtSagot 91–122 2011.tal-2.4 fra @@ -155,8 +155,8 @@ Traitement Automatique des Langues, Volume 52, Numéro 3 : Ressources linguistiques libres [Free Language Resources] - NuriaBel - BenoîtSagot + NuriaBel + BenoîtSagot ATALA (Association pour le Traitement Automatique des Langues)
France
2011 @@ -200,10 +200,10 @@
Le corpus <fixed-case>ANNODIS</fixed-case>, un corpus enrichi d’annotations discursives [The <fixed-case>ANNODIS</fixed-case> corpus, a corpus enriched with discourse annotations] - Marie-PaulePéry-Woodley + Marie-PaulePéry-Woodley Stergos D.Afantenos - Lydia-MaiHo-Dac - NicholasAsher + Lydia-MaiHo-Dac + NicholasAsher 71–101 2011.tal-3.4 fra @@ -212,8 +212,8 @@ Définition et conception d’une interface pour l’exploitation de corpus arborés pour non-informaticiens : la plateforme <fixed-case>S</fixed-case>cien<fixed-case>Q</fixed-case>uest du projet Scientext [Definition and design of an interface for treebanks exploitation by non-computer scientists: the <fixed-case>S</fixed-case>cien<fixed-case>Q</fixed-case>uest platform from Scientext project] AchilleFalaise - AgnèsTutin - OlivierKraif + AgnèsTutin + OlivierKraif 103–128 2011.tal-3.5 fra @@ -257,7 +257,7 @@ IsabellaChiari ElisabettaJezek LaureVieu - Fabio MassimoZanzotto + Fabio MassimoZanzotto 217–243 2011.tal-3.9 vetere-etal-2011-senso diff --git a/data/xml/2011.tc.xml b/data/xml/2011.tc.xml index 35dda78019..d3d24f9ae2 100644 --- a/data/xml/2011.tc.xml +++ b/data/xml/2011.tc.xml @@ -32,7 +32,7 @@ An effective model for insertion of translation technologies into <fixed-case>US</fixed-case> government translation environments - CarolVan Ess-Dykema + CarolVan Ess-Dykema 2011.tc-1.3 van-ess-dykema-2011-effective @@ -63,22 +63,22 @@ Towards on-line knowledge sharing dictionaries for <fixed-case>E</fixed-case>uropean law: the Legal Taxonomy Syllabus 3.0 ElenaGrasso - PiercarloRossi + PiercarloRossi AndreaViolato 2011.tc-1.8 grasso-etal-2011-towards Machine translation between uncommon language pairs via a third common language: the case of patents - Benjamin K.Tsou + Benjamin K.Tsou BinLu 2011.tc-1.9 tsou-lu-2011-machine Operationalization of interactive multilingual gateways (i<fixed-case>MAG</fixed-case>s) in the Traouiero project - ChristianBoitet - ValérieBellynck + ChristianBoitet + ValérieBellynck AchilleFalaise NguyenHong-Thai 2011.tc-1.10 diff --git a/data/xml/2012.amta.xml b/data/xml/2012.amta.xml index 5bdc0fac54..54a8f9a760 100644 --- a/data/xml/2012.amta.xml +++ b/data/xml/2012.amta.xml @@ -17,9 +17,9 @@ Domain Adaptation in Machine Translation: Findings from the 2012 <fixed-case>J</fixed-case>ohns <fixed-case>H</fixed-case>opkins Summer Workshop - HalDaumé III + HalDaumé III MarineCarpuat - AlexFraser + AlexFraser ChrisQuirk 2012.amta-keynotes.1 daume-iii-etal-2012-domain @@ -32,7 +32,7 @@ Language Research at <fixed-case>DARPA</fixed-case>-Machine Translation and Beyond - Bonnie J.Dorr + Bonnie J.Dorr 2012.amta-keynotes.3 dorr-2012-language @@ -62,7 +62,7 @@ Hierarchical Phrase-Based <fixed-case>MT</fixed-case> for Phonetic Representation-Based Speech Translation ZeeshanAhmed JieJiang - JulieCarson-Berndsen + JulieCarson-Berndsen PeterCahill AndyWay 2012.amta-papers.1 @@ -72,7 +72,7 @@ Identifying Infrequent Translations by Aligning Non Parallel Sentences JulienBourdaillet - PhilippeLanglais + PhilippeLanglais 2012.amta-papers.2 Aligning a sequence of words to one of its infrequent translations is a difficult task. We propose a simple and original solution to this problem that yields to significant gains over a state-of-the-art transpotting task. Our approach consists in aligning non parallel sentences from the training data in order to reinforce online the alignment models. We show that using only a few pairs of non parallel sentences allows to improve significantly the alignment of infrequent translations. bourdaillet-langlais-2012-identifying @@ -80,16 +80,16 @@ Sample Selection for Large-scale <fixed-case>MT</fixed-case> Discriminative Training YuanCao - SanjeevKhudanpur + SanjeevKhudanpur 2012.amta-papers.3 Discriminative training for MT usually involves numerous features and requires large-scale training set to reach reliable parameter estimation. Other than using the expensive human-labeled parallel corpora for training, semi-supervised methods have been proposed to generate huge amount of “hallucinated” data which relieves the data sparsity problem. However the large training set contains both good samples which are suitable for training and bad ones harmful to the training. How to select training samples from vast amount of data can greatly affect the training performance. In this paper we propose a method for selecting samples that are most suitable for discriminative training according to a criterion measuring the dataset quality. Our experimental results show that by adding samples to the training set selectively, we are able to exceed the performance of system trained with the same amount of samples selected randomly. cao-khudanpur-2012-sample One System, Many Domains: Open-Domain Statistical Machine Translation via Feature Augmentation - JonathanClark - AlonLavie - ChrisDyer + JonathanClark + AlonLavie + ChrisDyer 2012.amta-papers.4 In this paper, we introduce a simple technique for incorporating domain information into a statistical machine translation system that significantly improves translation quality when test data comes from multiple domains. Our approach augments (conjoins) standard translation model and language model features with domain indicator features and requires only minimal modifications to the optimization and decoding procedures. We evaluate our method on two language pairs with varying numbers of domains, and observe significant improvements of up to 1.0 BLEU. clark-etal-2012-one @@ -97,7 +97,7 @@ Identification of Fertile Translations in Comparable Corpora: A Morpho-Compositional Approach EstelleDelpech - BéatriceDaille + BéatriceDaille EmmanuelMorin ClaireLemaire 2012.amta-papers.5 @@ -107,14 +107,14 @@ Challenges in Predicting Machine Translation Utility for Human Post-Editors MichaelDenkowski - AlonLavie + AlonLavie 2012.amta-papers.6 As machine translation quality continues to improve, the idea of using MT to assist human translators becomes increasingly attractive. In this work, we discuss and provide empirical evidence of the challenges faced when adapting traditional MT systems to provide automatic translations for human post-editors to correct. We discuss the differences between this task and traditional adequacy-based tasks and the challenges that arise when using automatic metrics to predict the amount of effort required to post-edit translations. A series of experiments simulating a real-world localization scenario shows that current metrics under-perform on this task, even when tuned to maximize correlation with expert translator judgments, illustrating the need to rethink traditional MT pipelines when addressing the challenges of this translation task. denkowski-lavie-2012-challenges The Impact of Sentence Alignment Errors on Phrase-Based Machine Translation Performance - CyrilGoutte + CyrilGoutte MarineCarpuat GeorgeFoster 2012.amta-papers.7 @@ -124,7 +124,7 @@ Pivot Lightly-Supervised Training for Statistical Machine Translation MatthiasHuck - HermannNey + HermannNey 2012.amta-papers.8 In this paper, we investigate large-scale lightly-supervised training with a pivot language: We augment a baseline statistical machine translation (SMT) system that has been trained on human-generated parallel training corpora with large amounts of additional unsupervised parallel data; but instead of creating this synthetic data from monolingual source language data with the baseline system itself, or from target language data with a reverse system, we employ a parallel corpus of target language data and data in a pivot language. The pivot language data is automatically translated into the source language, resulting in a trilingual corpus with unsupervised source language side. We augment our baseline system with the unsupervised source-target parallel data. Experiments are conducted for the German-French language pair using the standard WMT newstest sets for development and testing. We obtain the unsupervised data by translating the English side of the English-French 109 corpus to German. With careful system design, we are able to achieve improvements of up to +0.4 points BLEU / -0.7 points TER over the baseline. huck-ney-2012-pivot @@ -139,7 +139,7 @@ Building <fixed-case>MT</fixed-case> for a Severely Under-Resourced Language: White <fixed-case>H</fixed-case>mong - WilliamLewis + WilliamLewis PhongYang 2012.amta-papers.10 In this paper, we discuss the development of statistical machine translation for English to/from White Hmong (Language code: mww). White Hmong is a Hmong-Mien language, originally spoken mostly in Southeast Asia, but now predominantly spoken by a large diaspora throughout the world, with populations in the United States, Australia, France, Thailand and elsewhere. Building statistical translation systems for Hmong proved to be incredibly challenging since there are no known parallel or monolingual corpora for the language; in fact, finding data for Hmong proved to be one of the biggest challenges to getting the project off the ground. It was only through a close collaboration with the Hmong community, and active and tireless participation of Hmong speakers, that it became possible to build up a critical mass of data to make the translation project a reality. We see this effort as potentially replicable for other severely resource poor languages of the world, which is likely the case for the majority of the languages still spoken on the planet. Further, the work here suggests that research and work on other severely under-resourced languages can have significant positive impacts for the affected communities, both for accessibility and language preservation. @@ -147,8 +147,8 @@ Phrase-level System Combination for Machine Translation Based on Target-to-Target Decoding - Wei-YunMa - KathleenMcKeown + Wei-YunMa + KathleenMcKeown 2012.amta-papers.11 In this paper, we propose a novel lattice-based MT combination methodology that we call Target-to-Target Decoding (TTD). The combination process is carried out as a “translation” from backbone to the combination result. This perspective suggests the use of existing phrase-based MT techniques in the combination framework. We show how phrase extraction rules and confidence estimations inspired from machine translation improve results. We also propose system-specific LMs for estimating N-gram consensus. Our results show that our approach yields a strong improvement over the best single MT system and competes with other state-of-the-art combination systems. ma-mckeown-2012-phrase @@ -157,7 +157,7 @@ Lost & Found in Translation: Impact of Machine Translated Results on Translingual Information Retrieval KristenParton NizarHabash - KathleenMcKeown + KathleenMcKeown 2012.amta-papers.12 In an ideal cross-lingual information retrieval (CLIR) system, a user query would generate a search over documents in a different language and the relevant results would be presented in the user’s language. In practice, CLIR systems are typically evaluated by judging result relevance in the document language, to factor out the effects of translating the results using machine translation (MT). In this paper, we investigate the influence of four different approaches for integrating MT and CLIR on both retrieval accuracy and user judgment of relevancy. We create a corpus with relevance judgments for both human and machine translated results, and use it to quantify the effect that MT quality has on end-to-end relevance. We find that MT errors result in a 16-39% decrease in mean average precision over the ground truth system that uses human translations. MT errors also caused relevant sentences to appear irrelevant – 5-19% of sentences were relevant in human translation, but were judged irrelevant in MT. To counter this degradation, we present two hybrid retrieval models and two automatic MT post-editing techniques and show that these approaches substantially mitigate the errors and improve the end-to-end relevance. parton-etal-2012-lost @@ -165,8 +165,8 @@ A Graph-based Strategy to Streamline Translation Quality Assessments DanielePighin - LluísFormiga - LluísMàrquez + LluísFormiga + LluísMàrquez 2012.amta-papers.13 We present a detailed analysis of a graph-based annotation strategy that we employed to annotate a corpus of 11,292 real-world English to Spanish automatic translations with relative (ranking) and absolute (adequate/non-adequate) quality assessments. The proposed approach, inspired by previous work in Interactive Evolutionary Computation and Interactive Genetic Algorithms, results in a simpler and faster annotation process. We empirically compare the method against a traditional, explicit ranking approach, and show that the graph-based strategy: 1) is considerably faster, and 2) produces consistently more reliable annotations. pighin-etal-2012-graph @@ -175,7 +175,7 @@ Machine Translation with Binary Feedback: a Large-Margin Approach AvneeshSaluja IanLane - YingZhang + YingZhang 2012.amta-papers.14 Viewing machine translation as a structured classification problem has provided a gateway for a host of structured prediction techniques to enter the field. In particular, large-margin structured prediction methods for discriminative training of feature weights, such as the structured perceptron or MIRA, have started to match or exceed the performance of existing methods such as MERT. One issue with structured problems in general is the difficulty in obtaining fully structured labels, e.g., in machine translation, obtaining reference translations or parallel sentence corpora for arbitrary language pairs. Another issue, more specific to the translation domain, is the difficulty in online training of machine translation systems, since existing methods often require bilingual knowledge to correct translation output online. We propose a solution to these two problems, by demonstrating a way to incorporate binary-labeled feedback (i.e., feedback on whether a translation hypothesis is a “good” or understandable one or not), a form of supervision that can be easily integrated in an online manner, into a machine translation framework. Experimental results show marked improvement by incorporating binary feedback on unseen test data, with gains exceeding 5.5 BLEU points. saluja-etal-2012-machine @@ -189,8 +189,8 @@ Compact Rule Extraction for Hierarchical Phrase-based Translation - BaskaranSankaran - GholamrezaHaffari + BaskaranSankaran + GholamrezaHaffari AnoopSarkar 2012.amta-papers.16 This paper introduces two novel approaches for extracting compact grammars for hierarchical phrase-based translation. The first is a combinatorial optimization approach and the second is a Bayesian model over Hiero grammars using Variational Bayes for inference. In contrast to the conventional Hiero (Chiang, 2007) rule extraction algorithm , our methods extract compact models reducing model size by 17.8% to 57.6% without impacting translation quality across several language pairs. The Bayesian model is particularly effective for resource-poor languages with evidence from Korean-English translation. To our knowledge, this is the first alternative to Hiero-style rule extraction that finds a more compact synchronous grammar without hurting translation performance. @@ -198,7 +198,7 @@ Non-linear n-best List Reranking with Few Features - ArtemSokolov + ArtemSokolov GuillaumeWisniewski FrançoisYvon 2012.amta-papers.17 @@ -210,7 +210,7 @@ WeiWang KlausMacherey WolfgangMacherey - FranzOch + FranzOch PengXu 2012.amta-papers.18 We present a simple and effective infrastructure for domain adaptation for statistical machine translation (MT). To build MT systems for different domains, it trains, tunes and deploys a single translation system that is capable of producing adapted domain translations and preserving the original generic accuracy at the same time. The approach unifies automatic domain detection and domain model parameterization into one system. Experiment results on 20 language pairs demonstrate its viability. @@ -219,7 +219,7 @@ Detailed Analysis of Different Strategies for Phrase Table Adaptation in <fixed-case>SMT</fixed-case> JanNiehues - AlexWaibel + AlexWaibel 2012.amta-papers.19 This paper gives a detailed analysis of different approaches to adapt a statistical machine translation system towards a target domain using small amounts of parallel in-domain data. Therefore, we investigate the differences between the approaches addressing adaptation on the two main steps of building a translation model: The candidate selection and the phrase scoring. For the latter step we characterized the differences by four key aspects. We performed experiments on two different tasks of speech translation and analyzed the influence of the different aspects on the overall translation quality. On both tasks we could show significant improvements by using the presented adaptation techniques. niehues-waibel-2012-detailed @@ -227,7 +227,7 @@ Machine Translation of Labeled Discourse Connectives ThomasMeyer - AndreiPopescu-Belis + AndreiPopescu-Belis NajehHajlaoui AndreaGesmundo 2012.amta-papers.20 @@ -271,8 +271,8 @@ Using Source-Language Transformations to Address Register Mismatches in <fixed-case>SMT</fixed-case> - MannyRayner - PierretteBouillon + MannyRayner + PierretteBouillon BarryHaddow 2012.amta-papers.25 Mismatches between training and test data are a ubiquitous problem for real SMT applications. In this paper, we examine a type of mismatch that commonly arises when translating from French and similar languages: available training data is mostly formal register, but test data may well be informal register. We consider methods for defining surface transformations that map common informal language constructions into their formal language counterparts, or vice versa; we then describe two ways to use these mappings, either to create artificial training data or to pre-process source text at run-time. An initial evaluation performed using crowd-sourced comparisons of alternate translations produced by a French-to-English SMT system suggests that both methods can improve performance, with run-time pre-processing being the more effective of the two. @@ -288,8 +288,8 @@ A Detailed Analysis of Phrase-based and Syntax-based <fixed-case>MT</fixed-case>: The Search for Systematic Differences - RasoulSamad Zadeh Kaljahi - RaphaelRubino + RasoulSamad Zadeh Kaljahi + RaphaelRubino JohannRoturier JenniferFoster 2012.amta-papers.27 @@ -328,7 +328,7 @@ WenqianZhao Cheng ChiehLien RyanKnudson - YingZhang + YingZhang 2012.amta-commercial.1 This paper describes the role of machine translation (MT) for multilingual information access, a service that is desired by digital libraries that wish to provide cross-cultural access to their collections. To understand the performance of MT, we have developed HeMT: an integrated multilingual evaluation platform (http://txcdk-v10.unt.edu/HeMT/) to facilitate human evaluation of machine translation. The results of human evaluation using HeMT on three online MT services are reported. Challenges and benefits of crowdsourcing and collaboration based on our experience are discussed. Additionally, we present the analysis of the translation errors and propose Multi-engine MT strategies to improve translation performance. chen-etal-2012-integrating @@ -457,7 +457,7 @@ <fixed-case>IPT</fixed-case>ranslator: Facilitating Patent Search with Machine Translation JohnTinsley - AlexandruCeausu + AlexandruCeausu JianZhang HeidiDepraetere JoeriVan de Walle @@ -513,7 +513,7 @@ Producing Data for Under-Resourced Languages: A <fixed-case>D</fixed-case>ari-<fixed-case>E</fixed-case>nglish Parallel Corpus of Multi-Genre Text - SherriCondon + SherriCondon 2012.amta-government.4 In Developers producing language technology for under-resourced languages often find relatively little machine readable text for data required to train machine translation systems. Typically, the kinds of text that are most accessible for production of parallel data are news and news-related genres, yet the language that requires translation for analysts and decision-makers reflects a broad range of forms and contents. The proposed paper will describe an effort funded by the ODNI FLPO in which the Army Research Laboratory, assisted by MITRE language technology researchers, produced a Dari-English parallel corpus containing text in a variety of styles and genres that more closely resemble the kinds of documents needed by government users than do traditional news genres. The data production effort began with a survey of Dari documents catalogued in a government repository of material obtained from the field in Afghanistan. Because the documents in the repository are not available for creation of parallel corpora, the goal was to quantify the types of documents in the collection and identify their linguistic features in order to find documents that are similar. Document images were obtained from two sources: (1) the Preserving and Creating Access to Unique Afghan Records collection, an online resource produced by the University of Arizona Libraries and the Afghanistan Centre at Kabul University and (2) The University of Nebraska Arthur Paul Afghanistan Collection. For the latter, document images were obtained by camera capture of books and by selecting pdf images of microfiche records. A set of 1395 document page images was selected to provide 250,000 translated English words in 10 content domains. The images were transcribed and translated according to specifications designed to maximize the quality and usefulness of the data. The corpus will be used to create a Dari-English glossary, and an experiment will quantify improvements to Dari-English translation of multi-genre text when a generic Dari-English machine translation system is customized using the corpus. The proposed paper will present highlights from these efforts. condon-2012-producing @@ -540,7 +540,7 @@ Government Catalog of Language Resources (<fixed-case>GCLR</fixed-case>) - JudithKlavans + JudithKlavans The purpose of this presentation is to discuss recent efforts within the government to address issues of evaluation and return on investment. Pressure to demonstrate value has increased with the growing amount of foreign language information available, with the variety of languages needing to be exploited, and with the increasing gaps between numbers of language-enabled people and the amount of work to be done. This pressure is only growing as budgets shrink, and as global development grows. Over the past year, the ODNI has led an effort to pull together different government stakeholders to determine some baseline standards for determining Return on Investment via task-based evaluation. Stakeholder consensus on major HLT tasks has involved examination of the different approaches to determining return on investment and how it relates use of HLT in the workflow. In addition to reporting on the goals and progress of this group, we will present future directions and invite community input. klavans-2012-government @@ -678,7 +678,7 @@ Reliably Assessing the Quality of Post-edited Translation Based on Formalized Structured Translation Specifications - Alan K.Melby + Alan K.Melby JasonHousley Paul J.Fields EmilyTuioti @@ -690,7 +690,7 @@ Learning to Automatically Post-Edit Dropped Words in <fixed-case>MT</fixed-case> JacobMundt KristenParton - KathleenMcKeown + KathleenMcKeown 2012.amta-wptp.5 Automatic post-editors (APEs) can improve adequacy of MT output by detecting and reinserting dropped content words, but the location where these words are inserted is critical. In this paper, we describe a probabilistic approach for learning reinsertion rules for specific languages and MT systems, as well as a method for synthesizing training data from reference translations. We test the insertion logic on MT systems for Chinese to English and Arabic to English. Our adaptive APE is able to insert within 3 words of the best location 73% of the time (32% in the exact location) in Arabic-English MT output, and 67% of the time in Chinese-English output (30% in the exact location), and delivers improved performance on automated adequacy metrics over a previous rule-based approach to insertion. We consider how particular aspects of the insertion problem make it particularly amenable to machine learning solutions. mundt-etal-2012-learning @@ -706,7 +706,7 @@ To post-edit or not to post-edit? Estimating the benefits of <fixed-case>MT</fixed-case> post-editing for a <fixed-case>E</fixed-case>uropean organization AlexandrosPoulis - DavidKolovratnik + DavidKolovratnik 2012.amta-wptp.7 In the last few years the European Parliament has witnessed a significant increase in translation demand. Although Translation Memory (TM) tools, terminology databases and bilingual concordancers have provided significant leverage in terms of quality and productivity the European Parliament is in need for advanced language technology to keep facing successfully the challenge of multilingualism. This paper describes an ongoing large-scale machine translation post-editing evaluation campaign the purpose of which is to estimate the business benefits from the use of machine translation for the European Parliament. This paper focuses mainly on the design, the methodology and the tools used by the evaluators but it also presents some preliminary results for the following language pairs: Polish-English, Danish-English, Lithuanian-English, English-German and English-French. poulis-kolovratnik-2012-post @@ -755,7 +755,7 @@ Translating <fixed-case>E</fixed-case>nglish Discourse Connectives into <fixed-case>A</fixed-case>rabic: a Corpus-based Analysis and an Evaluation Metric NajehHajlaoui - AndreiPopescu-Belis + AndreiPopescu-Belis 1-8 2012.amta-caas14.1 Discourse connectives can often signal multiple discourse relations, depending on their context. The automatic identification of the Arabic translations of seven English discourse connectives shows how these connectives are differently translated depending on their actual senses. Automatic labelling of English source connectives can help a machine translation system to translate them more correctly. The corpus-based analysis of Arabic translations also enables the definition of a connective-specific evaluation metric for machine translation, which is here validated by human judges on sample English/Arabic translation data. @@ -784,7 +784,7 @@ <fixed-case>ARNE</fixed-case> - A tool for Namend Entity Recognition from <fixed-case>A</fixed-case>rabic Text CarolinShihadeh - GünterNeumann + GünterNeumann 24-31 2012.amta-caas14.4 In this paper, we study the problem of finding named entities in the Arabic text. For this task we present the development of our pipeline software for Arabic named entity recognition (ARNE), which includes tokenization, morphological analysis, Buckwalter transliteration, part of speech tagging and named entity recognition of person, location and organisation named entities. In our first attempt to recognize named entites, we have used a simple, fast and language independent gazetteer lookup approach. In our second attempt, we have used the morphological analysis provided by our pipeline to remove affixes and observed hence an improvement in our performance. The pipeline presented in this paper, can be used in future as a basis for a named entity recognition system that recognized named entites not only using gazetteers, but also making use of morphological information and part of speech tagging. @@ -801,10 +801,10 @@ Using <fixed-case>A</fixed-case>rabic Transliteration to Improve Word Alignment from <fixed-case>F</fixed-case>rench- <fixed-case>A</fixed-case>rabic Parallel Corpora - HoudaSaadane + HoudaSaadane OuafaBenterki NasredineSemmar - ChristianFluhr + ChristianFluhr 38-46 2012.amta-caas14.6 In this paper, we focus on the use of Arabic transliteration to improve the results of a linguistics-based word alignment approach from parallel text corpora. This approach uses, on the one hand, a bilingual lexicon, named entities, cognates and grammatical tags to align single words, and on the other hand, syntactic dependency relations to align compound words. We have evaluated the word aligner integrating Arabic transliteration using two methods: A manual evaluation of the alignment quality and an evaluation of the impact of this alignment on the translation quality by using the Moses statistical machine translation system. The obtained results show that Arabic transliteration improves the quality of both alignment and translation. @@ -842,7 +842,7 @@ Exploiting <fixed-case>W</fixed-case>ikipedia as a Knowledge Base for the Extraction of Linguistic Resources: Application on <fixed-case>A</fixed-case>rabic-<fixed-case>F</fixed-case>rench Comparable Corpora and Bilingual Lexicons RahmaSellami FatihaSadat - LamiaHadrich Belguith + LamiaHadrich Belguith 72-79 2012.amta-caas14.10 We present simple and effective methods for extracting comparable corpora and bilingual lexicons from Wikipedia. We shall exploit the large scale and the structure of Wikipedia articles to extract two resources that will be very useful for natural language applications. We build a comparable corpus from Wikipedia using categories as topic restrictions and we extract bilingual lexicons from inter-language links aligned with statistical method or a combined statistical and linguistic method. @@ -853,7 +853,7 @@ Workshop on Monolingual Machine Translation TsuyoshiOkita - ArtemSokolov + ArtemSokolov TaroWatanabe Association for Machine Translation in the Americas
San Diego, California, USA
@@ -867,9 +867,9 @@ Improving <fixed-case>E</fixed-case>nglish to <fixed-case>S</fixed-case>panish Out-of-Domain Translations by Morphology Generalization and Generation - LluísFormiga - AdolfoHernández - José B.Mariño + LluísFormiga + AdolfoHernández + José B.Mariño EnricMonte 2012.amta-monomt.1 This paper presents a detailed study of a method for morphology generalization and generation to address out-of-domain translations in English-to-Spanish phrase-based MT. The paper studies whether the morphological richness of the target language causes poor quality translation when translating out-of-domain. In detail, this approach first translates into Spanish simplified forms and then predicts the final inflected forms through a morphology generation step based on shallow and deep-projected linguistic information available from both the source and target-language sentences. Obtained results highlight the importance of generalization, and therefore generation, for dealing with out-of-domain data. @@ -890,7 +890,7 @@ Shallow and Deep Paraphrasing for Improved Machine Translation Parameter Optimization Dennis N.Mehay - MichaelWhite + MichaelWhite 2012.amta-monomt.3 String comparison methods such as BLEU (Papineni et al., 2002) are the de facto standard in MT evaluation (MTE) and in MT system parameter tuning (Och, 2003). It is difficult for these metrics to recognize legitimate lexical and grammatical paraphrases, which is important for MT system tuning (Madnani, 2010). We present two methods to address this: a shallow lexical substitution technique and a grammar-driven paraphrasing technique. Grammatically precise paraphrasing is novel in the context of MTE, and demonstrating its usefulness is a key contribution of this paper. We use these techniques to paraphrase a single reference, which, when used for parameter tuning, leads to superior translation performance over baselines that use only human-authored references. mehay-white-2012-shallow @@ -906,14 +906,14 @@ Improving Word Alignment by Exploiting Adapted Word Similarity - Septina DianLarasati + Septina DianLarasati 2012.amta-monomt.5 This paper presents a method to improve a word alignment model in a phrase-based Statistical Machine Translation system for a low-resourced language using a string similarity approach. Our method captures similar words that can be seen as semi-monolingual across languages, such as numbers, named entities, and adapted/loan words. We use several string similarity metrics to measure the monolinguality of the words, such as Longest Common Subsequence Ratio (LCSR), Minimum Edit Distance Ratio (MEDR), and we also use a modified BLEU Score (modBLEU). Our approach is to add intersecting alignment points for word pairs that are orthographically similar, before applying a word alignment heuristic, to generate a better word alignment. We demonstrate this approach on Indonesian-to-English translation task, where the languages share many similar words that are poorly aligned given a limited training data. This approach gives a statistically significant improvement by up to 0.66 in terms of BLEU score. larasati-2012-improving Addressing some Issues of Data Sparsity towards Improving <fixed-case>E</fixed-case>nglish- <fixed-case>M</fixed-case>anipuri <fixed-case>SMT</fixed-case> using Morphological Information - Thoudam DorenSingh + Thoudam DorenSingh 2012.amta-monomt.6 The performance of an SMT system heavily depends on the availability of large parallel corpora. Unavailability of these resources in the required amount for many language pair is a challenging issue. The required size of the resource involving morphologically rich and highly agglutinative language is essentially much more for the SMT systems. This paper investigates on some of the issues on enriching the resource for this kind of languages. Handling of inflectional and derivational morphemes of the morphologically rich target language plays important role in the enrichment process. Mapping from the source to the target side is carried out for the English-Manipuri SMT task using factored model. The SMT system developed shows improvement in the performance both in terms of the automatic scoring and subjective evaluation over the baseline system. singh-2012-addressing diff --git a/data/xml/2012.eamt.xml b/data/xml/2012.eamt.xml index 52798f8590..1136682f33 100644 --- a/data/xml/2012.eamt.xml +++ b/data/xml/2012.eamt.xml @@ -58,9 +58,9 @@ User Evaluation of Interactive Machine Translation Systems VincentAlabau - Luis A.Leiva - DanielOrtiz-Martínez - FranciscoCasacuberta + Luis A.Leiva + DanielOrtiz-Martínez + FranciscoCasacuberta 20-23 2012.eamt-1.5 alabau-etal-2012-user @@ -94,11 +94,11 @@ Building Translation Awareness in Occasional Authors: A User Case from <fixed-case>J</fixed-case>apan MidoriTatsumi - AnthonyHartley + AnthonyHartley HitoshiIsahara KyoKageura ToshioOkamoto - KatsumasaShimizu + KatsumasaShimizu 53-56 2012.eamt-1.9 tatsumi-etal-2012-building @@ -153,7 +153,7 @@ VictorMuntés-Mulero PatriciaPaladini Adell CristinaEspaña-Bonet - LluísMàrquez + LluísMàrquez 77-80 2012.eamt-1.15 muntes-mulero-etal-2012-context @@ -273,9 +273,9 @@ Can Automatic Post-Editing Make <fixed-case>MT</fixed-case> More Meaningful KristenParton NizarHabash - KathleenMcKeown + KathleenMcKeown GonzaloIglesias - Adriàde Gispert + Adriàde Gispert 111-118 2012.eamt-1.34 parton-etal-2012-automatic @@ -291,15 +291,15 @@ Cascaded Phrase-Based Statistical Machine Translation Systems - DanTufiş - Ștefan DanielDumitrescu + DanTufiş + Ștefan DanielDumitrescu 129-136 2012.eamt-1.36 tufis-dumitrescu-2012-cascaded Hybrid Parallel Sentence Mining from Comparable Corpora - DanȘtefănescu + DanȘtefănescu RaduIon SabineHunsicker 137-144 @@ -312,7 +312,7 @@ AntonioToral VassilisPapavassiliou ProkopisProkopidis - Josefvan Genabith + Josefvan Genabith 145-152 2012.eamt-1.38 pecina-etal-2012-domain @@ -332,7 +332,7 @@ JacobDevlin HuaiguCao RohitPrasad - PremkumarNatarajan + PremkumarNatarajan 161-168 2012.eamt-1.40 chen-etal-2012-automatic @@ -340,10 +340,10 @@ Domain Adaptation in <fixed-case>SMT</fixed-case> of User-Generated Forum Content Guided by <fixed-case>OOV</fixed-case> Word Reduction: Normalization and/or Supplementary Data PratyushBanerjee - Sudip KumarNaskar + Sudip KumarNaskar JohannRoturier AndyWay - Josefvan Genabith + Josefvan Genabith 169-176 2012.eamt-1.41 banerjee-etal-2012-domain @@ -352,7 +352,7 @@ Long-distance reordering during search for hierarchical phrase-based <fixed-case>SMT</fixed-case> FabienneBraune AnitaGojun - AlexanderFraser + AlexanderFraser 177-184 2012.eamt-1.42 braune-etal-2012-long @@ -429,27 +429,27 @@ Flexible finite-state lexical selection for rule-based machine translation - Francis M.Tyers + Francis M.Tyers FelipeSánchez-Martínez - Mikel L.Forcada + Mikel L.Forcada 213-220 2012.eamt-1.54 tyers-etal-2012-flexible Statistical Post-Editing of Machine Translation for Domain Adaptation - RaphaëlRubino + RaphaëlRubino StéphaneHuet - FabriceLefèvre - GeorgesLinarès + FabriceLefèvre + GeorgesLinarès 221-228 2012.eamt-1.55 rubino-etal-2012-statistical Crowd-based <fixed-case>MT</fixed-case> Evaluation for non-<fixed-case>E</fixed-case>nglish Target Languages - MichaelPaul - EiichiroSumita + MichaelPaul + EiichiroSumita LuisaBentivogli MarcelloFederico 229-237 @@ -458,7 +458,7 @@ Readability and Translatability Judgments for “Controlled <fixed-case>J</fixed-case>apanese” - AnthonyHartley + AnthonyHartley MidoriTatsumi HitoshiIsahara KyoKageura @@ -485,7 +485,7 @@ <fixed-case>WIT</fixed-case>3: Web Inventory of Transcribed and Translated Talks MauroCettolo - ChristianGirardi + ChristianGirardi MarcelloFederico 261-268 2012.eamt-1.60 @@ -496,7 +496,7 @@ RamonaEnache CristinaEspaña-Bonet AarneRanta - LluísMàrquez + LluísMàrquez 269-276 2012.eamt-1.61 enache-etal-2012-hybrid @@ -513,7 +513,7 @@ Adjunct Alignment in Translation Data with an Application to Phrase Based Statistical Machine Translation SophieArnoult - KhalilSima’an + KhalilSima’an 287-294 2012.eamt-1.63 arnoult-simaan-2012-adjunct @@ -531,7 +531,7 @@ Learning Machine Translation from In-domain and Out-of-domain Data MarcoTurchi - CyrilGoutte + CyrilGoutte NelloCristianini 305-312 2012.eamt-1.65 @@ -542,7 +542,7 @@ MatthiasHuck StephanPeitz MarkusFreitag - HermannNey + HermannNey 313-320 2012.eamt-1.66 huck-etal-2012-discriminative diff --git a/data/xml/2012.freeopmt.xml b/data/xml/2012.freeopmt.xml index 167ccddd1e..c4d9864eb9 100644 --- a/data/xml/2012.freeopmt.xml +++ b/data/xml/2012.freeopmt.xml @@ -60,7 +60,7 @@ A rule-based machine translation system from <fixed-case>S</fixed-case>erbo-<fixed-case>C</fixed-case>roatian to <fixed-case>M</fixed-case>acedonian HrvojePeradin - FrancisTyers + FrancisTyers 55-64 2012.freeopmt-1.6 This paper describes the development of a one-way machine translation system from SerboCroatian to Macedonian on the Apertium platform. Details of resources and development methods are given, as well as an evaluation, and general directives for future work. @@ -69,10 +69,10 @@ Deep evaluation of hybrid architectures: use of different metrics in <fixed-case>MERT</fixed-case> weight optimization CristinaEspaña-Bonet - GorkaLabaka + GorkaLabaka ArantzaDíaz de Ilarranza - LluísMàrquez - KepaSarasola + LluísMàrquez + KepaSarasola 65-76 2012.freeopmt-1.7 espana-bonet-etal-2012-deep diff --git a/data/xml/2012.iwslt.xml b/data/xml/2012.iwslt.xml index 6e47166987..1fdc2d930d 100644 --- a/data/xml/2012.iwslt.xml +++ b/data/xml/2012.iwslt.xml @@ -53,10 +53,10 @@ YouzhengWu Chien-LinHuang XugangLu - Paul R.Dixon + Paul R.Dixon ShigekiMatsuda ChioriHori - HidekiKashioka + HidekiKashioka 34-37 2012.iwslt-evaluation.2 This paper describes our automatic speech recognition (ASR) system for the IWSLT 2012 evaluation campaign. The target data of the campaign is selected from the TED talks, a collection of public speeches on a variety of topics spoken in English. Our ASR system is based on weighted finite-state transducers and exploits an combination of acoustic models for spontaneous speech, language models based on n-gram and factored recurrent neural network trained with effectively selected corpora, and unsupervised topic adaptation framework utilizing ASR results. Accordingly, the system achieved 10.6% and 12.0% word error rate for the tst2011 and tst2012 evaluation set, respectively. @@ -71,7 +71,7 @@ EunachCho TeresaHerrmann RainerKärgel - AlexanderWaibel + AlexanderWaibel 38-45 2012.iwslt-evaluation.3 In this paper, we present the KIT systems participating in the English-French TED Translation tasks in the framework of the IWSLT 2012 machine translation evaluation. We also present several additional experiments on the English-German, English-Chinese and English-Arabic translation pairs. Our system is a phrase-based statistical machine translation system, extended with many additional models which were proven to enhance the translation quality. For instance, it uses the part-of-speech (POS)-based reordering, translation and language model adaptation, bilingual language model, word-cluster language model, discriminative word lexica (DWL), and continuous space language model. In addition to this, the system incorporates special steps in the preprocessing and in the post-processing step. In the preprocessing the noisy corpora are filtered by removing the noisy sentence pairs, whereas in the postprocessing the agreement between a noun and its surrounding words in the French translation is corrected based on POS tags with morphological information. Our system deals with speech transcription input by removing case information and punctuation except periods from the text translation model. @@ -100,7 +100,7 @@ TakamotoKano TetsuoKiso SakrianiSakti - TomokiToda + TomokiToda SatoshiNakamura 54-60 2012.iwslt-evaluation.5 @@ -128,7 +128,7 @@ JoernWuebker MalteNuhn MarkusNußbaum-Thom - HermannNey + HermannNey 69-76 2012.iwslt-evaluation.7 In this paper, the automatic speech recognition (ASR) and statistical machine translation (SMT) systems of RWTH Aachen University developed for the evaluation campaign of the International Workshop on Spoken Language Translation (IWSLT) 2012 are presented. We participated in the ASR (English), MT (English-French, Arabic-English, Chinese-English, German-English) and SLT (English-French) tracks. For the MT track both hierarchical and phrase-based SMT decoders are applied. A number of different techniques are evaluated in the MT and SLT tracks, including domain adaptation via data selection, translation model interpolation, phrase training for hierarchical and phrase-based systems, additional reordering model, word class language model, various Arabic and Chinese segmentation methods, postprocessing of speech recognition output with an SMT system, and system combination. By application of these methods we can show considerable improvements over the respective baseline systems. @@ -139,7 +139,7 @@ XiaoningZhu YimingCui ConghuiZhu - TiejunZhao + TiejunZhao HailongCao 77-80 2012.iwslt-evaluation.8 @@ -168,9 +168,9 @@ SebatianStüker SakrianiSakri GrahamNeubig - TomokiToda + TomokiToda SatoshiNakamura - AlexWaibel + AlexWaibel 87-90 2012.iwslt-evaluation.10 This paper describes our English Speech-to-Text (STT) systems for the 2012 IWSLT TED ASR track evaluation. The systems consist of 10 subsystems that are combinations of different front-ends, e.g. MVDR based and MFCC based ones, and two different phone sets. The outputs of the subsystems are combined via confusion network combination. Decoding is done in two stages, where the systems of the second stage are adapted in an unsupervised manner on the combination of the first stage outputs using VTLN, MLLR, and cM-LLR. @@ -182,14 +182,14 @@ KeigoKubo MatthiasSperber SakrianiSakti - SebastianStüker + SebastianStüker ChristianSaam KevinKilgour ChristianMohr GrahamNeubig - TomokiToda + TomokiToda SatoshiNakamura - AlexWaibel + AlexWaibel 91-95 2012.iwslt-evaluation.11 This paper describes the KIT-NAIST (Contrastive) English speech recognition system for the IWSLT 2012 Evaluation Campaign. In particular, we participated in the ASR track of the IWSLT TED task. The system was developed by Karlsruhe Institute of Technology (KIT) and Nara Institute of Science and Technology (NAIST) teams in collaboration within the interACT project. We employ single system decoding with fully continuous and semi-continuous models, as well as a three-stage, multipass system combination framework built with the Janus Recognition Toolkit. On the IWSLT 2010 test set our single system introduced in this work achieves a WER of 17.6%, and our final combination achieves a WER of 14.4%. @@ -207,10 +207,10 @@ The <fixed-case>LIG</fixed-case> <fixed-case>E</fixed-case>nglish to <fixed-case>F</fixed-case>rench machine translation system for <fixed-case>IWSLT</fixed-case> 2012 - LaurentBesacier + LaurentBesacier BenjaminLecouteux MarwenAzouzi - Ngoc QuangLuong + Ngoc QuangLuong 102-108 2012.iwslt-evaluation.13 This paper presents the LIG participation to the E-F MT task of IWSLT 2012. The primary system proposed made a large improvement (more than 3 point of BLEU on tst2010 set) compared to our last year participation. Part of this improvment was due to the use of an extraction from the Gigaword corpus. We also propose a preliminary adaptation of the driven decoding concept for machine translation. This method allows an efficient combination of machine translation systems, by rescoring the log-linear model at the N-best list level according to auxiliary systems: the basis technique is essentially guiding the search using one or previous system outputs. The results show that the approach allows a significant improvement in BLEU score using Google translate to guide our own SMT system. We also try to use a confidence measure as an additional log-linear feature but we could not get any improvment with this technique. @@ -220,7 +220,7 @@ The <fixed-case>MIT</fixed-case>-<fixed-case>LL</fixed-case>/<fixed-case>AFRL</fixed-case> <fixed-case>IWSLT</fixed-case> 2012 <fixed-case>MT</fixed-case> system JenniferDrexler WadeShen - TimAnderson + TimAnderson RaymondSlyh BrianOre EricHansen @@ -234,7 +234,7 @@ Minimum <fixed-case>B</fixed-case>ayes-risk decoding extended with similar examples: <fixed-case>NAIST</fixed-case>-<fixed-case>NCT</fixed-case> at <fixed-case>IWSLT</fixed-case> 2012 HiroakiShimizu MasaoUtiyama - EiichiroSumita + EiichiroSumita SatoshiNakamura 117-120 2012.iwslt-evaluation.15 @@ -245,7 +245,7 @@ The <fixed-case>NICT</fixed-case> translation system for <fixed-case>IWSLT</fixed-case> 2012 AndrewFinch OhnmarHtun - EiichiroSumita + EiichiroSumita 121-125 2012.iwslt-evaluation.16 finch-etal-2012-nict @@ -271,9 +271,9 @@ <fixed-case>R</fixed-case>omanian to <fixed-case>E</fixed-case>nglish automatic <fixed-case>MT</fixed-case> experiments at <fixed-case>IWSLT</fixed-case>12 – system description paper Ştefan DanielDumitrescu RaduIon - DanŞtefănescu - TiberiuBoroş - DanTufiş + DanŞtefănescu + TiberiuBoroş + DanTufiş 136-143 2012.iwslt-evaluation.19 The paper presents the system developed by RACAI for the ISWLT 2012 competition, TED task, MT track, Romanian to English translation. We describe the starting baseline phrase-based SMT system, the experiments conducted to adapt the language and translation models and our post-translation cascading system designed to improve the translation without external resources. We further present our attempts at creating a better controlled decoder than the open-source Moses system offers. @@ -283,7 +283,7 @@ The <fixed-case>TÜBİTAK</fixed-case> statistical machine translation system for <fixed-case>IWSLT</fixed-case> 2012 CoşkunMermer HamzaKaya - İlknur DurgarEl-Kahlout + İlknur DurgarEl-Kahlout Mehmet UğurDoğan 144-148 2012.iwslt-evaluation.20 @@ -311,7 +311,7 @@ AaronChallenner EnochKan ArvidNeelakantan - PremNatarajan + PremNatarajan 150-157 2012.iwslt-papers.1 We describe a novel two-way speech-to-speech (S2S) translation system that actively detects a wide variety of common error types and resolves them through user-friendly dialog with the user(s). We present algorithms for detecting out-of-vocabulary (OOV) named entities and terms, sense ambiguities, homophones, idioms, ill-formed input, etc. and discuss novel, interactive strategies for recovering from such errors. We also describe our approach for prioritizing different error types and an extensible architecture for implementing these decisions. We demonstrate the efficacy of our system by presenting analysis on live interactions in the English-to-Iraqi Arabic direction that are designed to invoke different error types for spoken language translation. Our analysis shows that the system can successfully resolve 47% of the errors, resulting in a dramatic improvement in the transfer of problematic concepts. @@ -323,7 +323,7 @@ SakrianiSakti ShinnosukeTakamichi GrahamNeubig - TomokiToda + TomokiToda SatoshiNakamura 158-163 2012.iwslt-papers.2 @@ -333,7 +333,7 @@ Continuous space language models using restricted Boltzmann machines JanNiehues - AlexWaibel + AlexWaibel 164-170 2012.iwslt-papers.3 We present a novel approach for continuous space language models in statistical machine translation by using Restricted Boltzmann Machines (RBMs). The probability of an n-gram is calculated by the free energy of the RBM instead of a feedforward neural net. Therefore, the calculation is much faster and can be integrated into the translation process instead of using the language model only in a re-ranking step. Furthermore, it is straightforward to introduce additional word factors into the language model. We observed a faster convergence in training if we include automatically generated word classes as an additional word factor. We evaluated the RBM-based language model on the German to English and English to French translation task of TED lectures. Instead of replacing the conventional n-gram-based language model, we trained the RBM-based language model on the more important but smaller in-domain data and combined them in a log-linear way. With this approach we could show improvements of about half a BLEU point on the translation task. @@ -368,7 +368,7 @@ A simple and effective weighted phrase extraction for machine translation adaptation SaabMansour - HermannNey + HermannNey 193-200 2012.iwslt-papers.7 The task of domain-adaptation attempts to exploit data mainly drawn from one domain (e.g. news) to maximize the performance on the test domain (e.g. weblogs). In previous work, weighting the training instances was used for filtering dissimilar data. We extend this by incorporating the weights directly into the standard phrase training procedure of statistical machine translation (SMT). This allows the SMT system to make the decision whether to use a phrase translation pair or not, a more methodological way than discarding phrase pairs completely when using filtering. Furthermore, we suggest a combined filtering and weighting procedure to achieve better results while reducing the phrase table size. The proposed methods are evaluated in the context of Arabicto-English translation on various conditions, where significant improvements are reported when using the suggested weighted phrase training. The weighting method also improves over filtering, and the combined filtering and weighting is better than a standalone filtering method. Finally, we experiment with mixture modeling, where additional improvements are reported when using weighted phrase extraction over a variety of baselines. @@ -378,7 +378,7 @@ Applications of data selection via cross-entropy difference for real-world statistical machine translation AmittaiAxelrod QingJunLi - William D.Lewis + William D.Lewis 201-208 2012.iwslt-papers.8 We broaden the application of data selection methods for domain adaptation to a larger number of languages, data, and decoders than shown in previous work, and explore comparable applications for both monolingual and bilingual cross-entropy difference methods. We compare domain adapted systems against very large general-purpose systems for the same languages, and do so without a bias to a particular direction. We present results against real-world generalpurpose systems tuned on domain-specific data, which are substantially harder to beat than standard research baseline systems. We show better performance for nearly all domain adapted systems, despite the fact that the domainadapted systems are trained on a fraction of the content of their general domain counterparts. The high performance of these methods suggest applicability to a wide variety of contexts, particularly in scenarios where only small supplies of unambiguously domain-specific data are available, yet it is believed that additional similar data is included in larger heterogenous-content general-domain corpora. @@ -388,7 +388,7 @@ A universal approach to translating numerical and time expressions MeiTu YuZhou - ChengqingZong + ChengqingZong 209-216 2012.iwslt-papers.9 Although statistical machine translation (SMT) has made great progress since it came into being, the translation of numerical and time expressions is still far from satisfactory. Generally speaking, numbers are likely to be out-of-vocabulary (OOV) words due to their non-exhaustive characteristics even when the size of training data is very large, so it is difficult to obtain accurate translation results for the infinite set of numbers only depending on traditional statistical methods. We propose a language-independent framework to recognize and translate numbers more precisely by using a rule-based method. Through designing operators, we succeed to make rules educible and totally separate from codes, thus, we can extend rules to various language-pairs without re-coding, which contributes a lot to the efficient development of an SMT system with good portability. We classify numbers and time expressions into seven types, which are Arabic number, cardinal numbers, ordinal numbers, date, time of day, day of week and figures. A greedy algorithm is developed to deal with rule conflicts. Experiments have shown that our approach can significantly improve the translation performance. @@ -398,8 +398,8 @@ Evaluation of interactive user corrections for lecture transcription HeinrichKolkhorst KevinKilgour - SebastianStüker - AlexWaibel + SebastianStüker + AlexWaibel 217-221 2012.iwslt-papers.10 In this work, we present and evaluate the usage of an interactive web interface for browsing and correcting lecture transcripts. An experiment performed with potential users without transcription experience provides us with a set of example corrections. On German lecture data, user corrections greatly improve the comprehensibility of the transcripts, yet only reduce the WER to 22%. The precision of user edits is relatively low at 77% and errors in inflection, case and compounds were rarely corrected. Nevertheless, characteristic lecture data errors, such as highly specific terms, were typically corrected, providing valuable additional information. @@ -412,7 +412,7 @@ XugangLu ShigekiMatsuda ChioriHori - HidekiKashioka + HidekiKashioka 222-228 2012.iwslt-papers.11 In this study, we extend recurrent neural network-based language models (RNNLMs) by explicitly integrating morphological and syntactic factors (or features). Our proposed RNNLM is called a factored RNNLM that is expected to enhance RNNLMs. A number of experiments are carried out on top of state-of-the-art LVCSR system that show the factored RNNLM improves the performance measured by perplexity and word error rate. In the IWSLT TED test data sets, absolute word error rate reductions over RNNLM and n-gram LM are 0.4∼0.8 points. @@ -420,7 +420,7 @@ Incremental adaptation using translation information and post-editing analysis - FrédéricBlain + FrédéricBlain HolgerSchwenk JeanSenellart 229-236 @@ -450,7 +450,7 @@ Segmentation and punctuation prediction in speech language translation using a monolingual translation system EunahCho JanNiehues - AlexWaibel + AlexWaibel 252-259 2012.iwslt-papers.15 In spoken language translation (SLT), finding proper segmentation and reconstructing punctuation marks are not only significant but also challenging tasks. In this paper we present our recent work on speech translation quality analysis for German-English by improving sentence segmentation and punctuation. From oracle experiments, we show an upper bound of translation quality if we had human-generated segmentation and punctuation on the output stream of speech recognition systems. In our oracle experiments we gain 1.78 BLEU points of improvements on the lecture test set. We build a monolingual translation system from German to German implementing segmentation and punctuation prediction as a machine translation task. Using the monolingual translation system we get an improvement of 1.53 BLEU points on the lecture test set, which is a comparable performance against the upper bound drawn by the oracle experiments. @@ -460,7 +460,7 @@ Sequence labeling-based reordering model for phrase-based <fixed-case>SMT</fixed-case> MinweiFeng Jan-ThorstenPeter - HermannNey + HermannNey 260-267 2012.iwslt-papers.16 For current statistical machine translation system, reordering is still a major problem for language pairs like Chinese-English, where the source and target language have significant word order differences. In this paper, we propose a novel reordering model based on sequence labeling techniques. Our model converts the reordering problem into a sequence labeling problem, i.e. a tagging task. For the given source sentence, we assign each source token a label which contains the reordering information for that token. We also design an unaligned word tag so that the unaligned word phenomenon is automatically implanted in the proposed model. Our reordering model is conditioned on the whole source sentence. Hence it is able to catch the long dependency in the source sentence. Although the learning on large scale task requests notably amounts of computational resources, the decoder makes use of the tagging information as soft constraints. Therefore, the training procedure of our model is computationally expensive for large task while in the test phase (during translation) our model is very efficient. We carried out experiments on five Chinese-English NIST tasks trained with BOLT data. Results show that our model improves the baseline system by 1.32 BLEU 1.53 TER on average. @@ -481,7 +481,7 @@ StephanPeitz SimonWiesler MarkusNußbaum-Thom - HermannNey + HermannNey 276-283 2012.iwslt-papers.18 In spoken language translation a machine translation system takes speech as input and translates it into another language. A standard machine translation system is trained on written language data and expects written language as input. In this paper we propose an approach to close the gap between the output of automatic speech recognition and the input of machine translation by training the translation system on automatically transcribed speech. In our experiments we show improvements of up to 0.9 BLEU points on the IWSLT 2012 English-to-French speech translation task. @@ -490,8 +490,8 @@ Towards a better understanding of statistical post-editing MarionPotet - LaurentBesacier - HervéBlanchon + LaurentBesacier + HervéBlanchon MarwenAzouzi 284-291 2012.iwslt-papers.19 diff --git a/data/xml/2012.tal.xml b/data/xml/2012.tal.xml index 2d9fbbb680..789eed803f 100644 --- a/data/xml/2012.tal.xml +++ b/data/xml/2012.tal.xml @@ -27,7 +27,7 @@ Une étude comparative empirique sur la reconnaissance des entités médicales [An empirical comparative study of medical entity recognition] AsmaBen Abacha - PierreZweigenbaum + PierreZweigenbaum 39–68 2012.tal-1.2 fra @@ -76,7 +76,7 @@ RémyKessler XavierTannier CarolineHagège - VéroniqueMoriceau + VéroniqueMoriceau AndréBittar 57–86 2012.tal-2.3 @@ -85,8 +85,8 @@ A Linguistically Grounded Annotation Language for Spatial Information - JamesPustejovsky - JessicaMoszkowicz + JamesPustejovsky + JessicaMoszkowicz MarcVerhagen 87–113 2012.tal-2.4 @@ -112,9 +112,9 @@ Street-level Geolocation from Natural Language Descriptions - NateBlaylock - JamesAllen - Williamde Beaumont + NateBlaylock + JamesAllen + Williamde Beaumont LucianGalescu HyuckchulJung 177–205 @@ -153,7 +153,7 @@ Atténuation des surdétections d’un correcteur grammatical de qualité commerciale [Reducing overdetections in a commercial grade grammar checker] FabrizioGotti - PhilippeLanglais + PhilippeLanglais GuyLapalme SimonCharest EricBrunelle diff --git a/data/xml/2012.tc.xml b/data/xml/2012.tc.xml index a266c8575c..9ec5254d81 100644 --- a/data/xml/2012.tc.xml +++ b/data/xml/2012.tc.xml @@ -12,7 +12,7 @@ <fixed-case>MNH</fixed-case>-<fixed-case>TT</fixed-case>: a collaborative platform for translator training BogdanBabych - AnthonyHartley + AnthonyHartley KyoKageura MartinThomas MasaoUtiyama @@ -74,7 +74,7 @@ Linport as a standard for interoperability between translation systems - Alan K.Melby + Alan K.Melby Tyler A.Snow 2012.tc-1.10 melby-snow-2012-linport @@ -120,7 +120,7 @@ <fixed-case>P</fixed-case>ro<fixed-case>T</fixed-case>ermino: a comprehensive web-based terminological management tool based on knowledge representation IsabelDurán Muñoz - GloriaCorpas Pastor + GloriaCorpas Pastor Le AnHa 2012.tc-1.17 munoz-etal-2012-protermino diff --git a/data/xml/2013.bitext.xml b/data/xml/2013.bitext.xml index fcdacc8712..4f3bd3bee8 100644 --- a/data/xml/2013.bitext.xml +++ b/data/xml/2013.bitext.xml @@ -3,9 +3,9 @@ Proceedings of the Workshop on Twenty Years of Bitext - ChrisDyer - Noah A.Smith - PhilBlunsom + ChrisDyer + Noah A.Smith + PhilBlunsom Association for Computational Linguistics
Seattle, Washington, USA
October @@ -26,8 +26,8 @@
Twenty Flavors of One Text - DanielZeman - OndřejBojar + DanielZeman + OndřejBojar zeman-bojar-2013-twenty @@ -44,10 +44,10 @@ Aligning Words in Bitexts using the Bilingual Web - JimChang + JimChang Joseph CheeChang - Jian-chengWu - Jason S.Chang + Jian-chengWu + Jason S.Chang chang-etal-2013-aligning @@ -57,9 +57,9 @@ Bitexts as Semantic Mirrors - JörgTiedemann - Lonnekevan der Plas - BegoñaVillada Moirón + JörgTiedemann + Lonnekevan der Plas + BegoñaVillada Moirón tiedemann-etal-2013-bitexts @@ -86,7 +86,7 @@ Lexicalized Reordering Model in Chart-based Machine Translation - ThuyLinhNguyen + ThuyLinhNguyen nguyen-2013-lexicalized diff --git a/data/xml/2013.iwslt.xml b/data/xml/2013.iwslt.xml index 0b41d31956..8473088122 100644 --- a/data/xml/2013.iwslt.xml +++ b/data/xml/2013.iwslt.xml @@ -29,7 +29,7 @@ Report on the 10th <fixed-case>IWSLT</fixed-case> evaluation campaign MauroCettolo JanNiehues - SebastianStüker + SebastianStüker LuisaBentivogli MarcelloFederico 2013.iwslt-evaluation.1 @@ -57,7 +57,7 @@ <fixed-case>MSR</fixed-case>-<fixed-case>FBK</fixed-case> <fixed-case>IWSLT</fixed-case> 2013 <fixed-case>SLT</fixed-case> system description AnthonyAue QinGao - HanyHassan + HanyHassan XiaodongHe GangLi NicholasRuiz @@ -78,7 +78,7 @@ The <fixed-case>NICT</fixed-case> <fixed-case>ASR</fixed-case> system for <fixed-case>IWSLT</fixed-case> 2013 Chien-LinHuang - Paul R.Dixon + Paul R.Dixon ShigekiMatsuda YouzhengWu XugangLu @@ -101,12 +101,12 @@ <fixed-case>QCRI</fixed-case> at <fixed-case>IWSLT</fixed-case> 2013: experiments in <fixed-case>A</fixed-case>rabic-<fixed-case>E</fixed-case>nglish and <fixed-case>E</fixed-case>nglish-<fixed-case>A</fixed-case>rabic spoken language translation HassanSajjad - FranciscoGuzmán - PreslavNakov + FranciscoGuzmán + PreslavNakov AhmedAbdelali KentonMurray FahadAl Obaidli - StephanVogel + StephanVogel 2013.iwslt-evaluation.8 We describe the Arabic-English and English-Arabic statistical machine translation systems developed by the Qatar Computing Research Institute for the IWSLT’2013 evaluation campaign on spoken language translation. We used one phrase-based and two hierarchical decoders, exploring various settings thereof. We further experimented with three domain adaptation methods, and with various Arabic word segmentation schemes. Combining the output of several systems yielded a gain of up to 3.4 BLEU points over the baseline. Here we also describe a specialized normalization scheme for evaluating Arabic output, which was adopted for the IWSLT’2013 evaluation campaign. sajjad-etal-2013-qcri-iwslt @@ -127,7 +127,7 @@ Jan-ThorstenPeter MinweiFeng MarkusFreitag - HermannNey + HermannNey 2013.iwslt-evaluation.10 This work describes the statistical machine translation (SMT) systems of RWTH Aachen University developed for the evaluation campaign International Workshop on Spoken Language Translation (IWSLT) 2013. We participated in the English→French, English↔German, Arabic→English, Chinese→English and Slovenian↔English MT tracks and the English→French and English→German SLT tracks. We apply phrase-based and hierarchical SMT decoders, which are augmented by state-of-the-art extensions. The novel techniques we experimentally evaluate include discriminative phrase training, a continuous space language model, a hierarchical reordering model, a word class language model, domain adaptation via data selection and system combination of standard and reverse order models. By application of these methods we can show considerable improvements over the respective baseline systems. wuebker-etal-2013-rwth @@ -164,8 +164,8 @@ JonasGehring MarkusMüller MatthiasSperber - SebastianStüker - AlexWaibel + SebastianStüker + AlexWaibel 2013.iwslt-evaluation.13 This paper describes our English Speech-to-Text (STT) systems for the 2013 IWSLT TED ASR track. The systems consist of multiple subsystems that are combinations of different front-ends, e.g. MVDR-MFCC based and lMel based ones, GMM and NN acoustic models and different phone sets. The outputs of the subsystems are combined via confusion network combination. Decoding is done in two stages, where the systems of the second stage are adapted in an unsupervised manner on the combination of the first stage outputs using VTLN, MLLR, and cMLLR. kilgour-etal-2013-2013 @@ -185,8 +185,8 @@ SimonWiesler MarkusNußbaum-Thom StephanPeitz - RalfSchlüter - HermannNey + RalfSchlüter + HermannNey 2013.iwslt-evaluation.15 In this paper, German and English large vocabulary continuous speech recognition (LVCSR) systems developed by the RWTH Aachen University for the IWSLT-2013 evaluation campaign are presented. Good improvements are obtained with state-of-the-art monolingual and multilingual bottleneck features. In addition, an open vocabulary approach using morphemic sub-lexical units is investigated along with the language model adaptation for the German LVCSR. For both the languages, competitive WERs are achieved using system combination. shaik-etal-2013-rwth @@ -196,7 +196,7 @@ MarkusFreitag StephanPeitz JoernWuebker - HermannNey + HermannNey NadirDurrani MatthiasHuck PhilippKoehn @@ -204,7 +204,7 @@ JanNiehues MohammedMediani TeresaHerrmann - AlexWaibel + AlexWaibel NicolaBertoldi MauroCettolo MarcelloFederico @@ -220,7 +220,7 @@ JessicaRay WadeShen TerryGleason - TimAnderson + TimAnderson GrantErdmann LaneSchwartz BrianOre @@ -234,8 +234,8 @@ The speech recognition and machine translation system of <fixed-case>IOIT</fixed-case> for <fixed-case>IWSLT</fixed-case> 2013 - Ngoc-QuanPham - Hai-SonLe + Ngoc-QuanPham + Hai-SonLe Tat-ThangVu Chi-MaiLuong 2013.iwslt-evaluation.18 @@ -245,7 +245,7 @@ <fixed-case>TÜBİTAK</fixed-case> <fixed-case>T</fixed-case>urkish-<fixed-case>E</fixed-case>nglish submissions for <fixed-case>IWSLT</fixed-case> 2013 ErtuğrulYılmaz - İlknur DurgarEl-Kahlout + İlknur DurgarEl-Kahlout BurakAydın Zişan SılaÖzil CoşkunMermer @@ -256,7 +256,7 @@ <fixed-case>FBK</fixed-case>’s machine translation systems for the <fixed-case>IWSLT</fixed-case> 2013 evaluation campaign NicolaBertoldi - M. AminFarajian + M. AminFarajian PrashantMathur NicholasRuiz MarcelloFederico @@ -290,7 +290,7 @@ SakrianiSakti KeigoKubo GrahamNeubig - TomokiToda + TomokiToda SatoshiNakamura 2013.iwslt-evaluation.23 This paper describes the NAIST English speech recognition system for the IWSLT 2013 Evaluation Campaign. In particular, we participated in the ASR track of the IWSLT TED task. Last year, we participated in collaboration with Karlsruhe Institute of Technology (KIT). This year is our first time to build a full-fledged ASR system for IWSLT solely developed by NAIST. Our final system utilizes weighted finitestate transducers with four-gram language models. The hypothesis selection is based on the principle of system combination. On the IWSLT official test set our system introduced in this work achieves a WER of 9.1% for tst2011, 10.0% for tst2012, and 16.2% for the new tst2013. @@ -305,7 +305,7 @@ EunahCho YuqiZhang IsabelSlawik - AlexWaibel + AlexWaibel 2013.iwslt-evaluation.24 In this paper, we present the KIT systems participating in all three official directions, namely English→German, German→English, and English→French, in translation tasks of the IWSLT 2013 machine translation evaluation. Additionally, we present the results for our submissions to the optional directions English→Chinese and English→Arabic. We used phrase-based translation systems to generate the translations. This year, we focused on adapting the systems towards ASR input. Furthermore, we investigated different reordering models as well as an extended discriminative word lexicon. Finally, we added a data selection approach for domain adaptation. ha-etal-2013-kit @@ -336,7 +336,7 @@ Using viseme recognition to improve a sign language translation system ChristophSchmidt OscarKoller - HermannNey + HermannNey ThomasHoyoux JustusPiater 2013.iwslt-papers.1 @@ -345,9 +345,9 @@ The <fixed-case>AMARA</fixed-case> corpus: building resources for translating the web’s educational content - FranciscoGuzman + FranciscoGuzman HassanSajjad - StephanVogel + StephanVogel AhmedAbdelali 2013.iwslt-papers.2 In this paper, we introduce a new parallel corpus of subtitles of educational videos: the AMARA corpus for online educational content. We crawl a multilingual collection community generated subtitles, and present the results of processing the Arabic–English portion of the data, which yields a parallel corpus of about 2.6M Arabic and 3.9M English words. We explore different approaches to align the segments, and extrinsically evaluate the resulting parallel corpus on the standard TED-talks tst-2010. We observe that the data can be successfully used for this task, and also observe an absolute improvement of 1.6 BLEU when it is used in combination with TED data. Finally, we analyze some of the specific challenges when translating the educational content. @@ -358,7 +358,7 @@ HiroakiShimizu GrahamNeubig SakrianiSakti - TomokiToda + TomokiToda SatoshiNakamura 2013.iwslt-papers.3 There has been a fair amount of work on automatic speech translation systems that translate in real-time, serving as a computerized version of a simultaneous interpreter. It has been noticed in the field of translation studies that simultaneous interpreters perform a number of tricks to make the content easier to understand in real-time, including dividing their translations into small chunks, or summarizing less important content. However, the majority of previous work has not specifically considered this fact, simply using translation data (made by translators) for learning of the machine translation system. In this paper, we examine the possibilities of additionally incorporating simultaneous interpretation data (made by simultaneous interpreters) in the learning process. First we collect simultaneous interpretation data from professional simultaneous interpreters of three levels, and perform an analysis of the data. Next, we incorporate the simultaneous interpretation data in the learning of the machine translation system. As a result, the translation style of the system becomes more similar to that of a highly experienced simultaneous interpreter. We also find that according to automatic evaluation metrics, our system achieves performance similar to that of a simultaneous interpreter that has 1 year of experience. @@ -366,17 +366,17 @@ Improving the minimum <fixed-case>B</fixed-case>ayes’ risk combination of machine translation systems - JesúsGonzález-Rubio - FranciscoCasacuberta + JesúsGonzález-Rubio + FranciscoCasacuberta 2013.iwslt-papers.4 We investigate the problem of combining the outputs of different translation systems into a minimum Bayes’ risk consensus translation. We explore different risk formulations based on the BLEU score, and provide a dynamic programming decoding algorithm for each of them. In our experiments, these algorithms generated consensus translations with better risk, and more efficiently, than previous proposals. gonzalez-rubio-casacuberta-2013-improving Emprical study of a two-step approach to estimate translation quality - JesúsGonzález-Rubio + JesúsGonzález-Rubio J. RamónNavarro-Cerdán - FranciscoCasacuberta + FranciscoCasacuberta 2013.iwslt-papers.5 We present a method to estimate the quality of automatic translations when reference translations are not available. Quality estimation is addressed as a two-step regression problem where multiple features are combined to predict a quality score. Given a set of features, we aim at automatically extracting the variables that better explain translation quality, and use them to predict the quality score. The soundness of our approach is assessed by the encouraging results obtained in an exhaustive experimentation with several feature sets. Moreover, the studied approach is highly-scalable allowing us to employ hundreds of features to predict translation quality. gonzalez-rubio-etal-2013-emprical @@ -386,8 +386,8 @@ JoshuaWinebarger BaoNguyen JonasGehring - SebastianStüker - AlexWaibel + SebastianStüker + AlexWaibel 2013.iwslt-papers.6 This paper describes our Speech-to-Text (STT) system for French, which was developed as part of our efforts in the Quaero program for the 2013 evaluation. Our STT system consists of six subsystems which were created by combining multiple complementary sources of pronunciation modeling including graphemes with various feature front-ends based on deep neural networks and tonal features. Both speaker-independent and speaker adaptively trained versions of the systems were built. The resulting systems were then combined via confusion network combination and crossadaptation. Through progressive advances and system combination we reach a word error rate (WER) of 16.5% on the 2012 Quaero evaluation data. winebarger-etal-2013-2013 @@ -404,9 +404,9 @@ Incremental unsupervised training for university lecture recognition MichaelHeck - SebastianStüker + SebastianStüker SakrianiSakti - AlexWaibel + AlexWaibel SatoshiNakamura 2013.iwslt-papers.8 In this paper we describe our work on unsupervised adaptation of the acoustic model of our simultaneous lecture translation system. We trained a speaker independent acoustic model, with which we produce automatic transcriptions of new lectures in order to improve the system for a specific lecturer. We compare our results against a model that was trained in a supervised way on an exact manual transcription. We examine four different ways of processing the decoder outputs of the automatic transcription with respect to the treatment of pronunciation variants and noise words. We will show that, instead of fixating the latter informations in the transcriptions, it is of advantage to let the Viterbi algorithm during training decide which pronunciations to use and where to insert which noise words. Further, we utilize word level posterior probabilities obtained during decoding by weighting and thresholding the words of a transcription. @@ -433,7 +433,7 @@ TeresaHerrmann JochenWeiner JanNiehues - AlexWaibel + AlexWaibel 2013.iwslt-papers.11 We analyze the performance of source sentence reordering, a common reordering approach, using oracle experiments on German-English and English-German translation. First, we show that the potential of this approach is very promising. Compared to a monotone translation, the optimally reordered source sentence leads to improvements of up to 4.6 and 6.2 BLEU points, depending on the language. Furthermore, we perform a detailed evaluation of the different aspects of the approach. We analyze the impact of the restriction of the search space by reordering lattices and we can show that using more complex rule types for reordering results in better approximation of the optimally reordered source. However, a gap of about 3 to 3.8 BLEU points remains, presenting a promising perspective for research on extending the search space through better reordering rules. When evaluating the ranking of different reordering variants, the results reveal that the search for the best path in the lattice performs very well for German-English translation. For English-German translation there is potential for an improvement of up to 1.4 BLEU points through a better ranking of the different reordering possibilities in the reordering lattice. herrmann-etal-2013-analyzing @@ -442,7 +442,7 @@ <fixed-case>CRF</fixed-case>-based disfluency detection using semantic features for <fixed-case>G</fixed-case>erman to <fixed-case>E</fixed-case>nglish spoken language translation EunahCho Than-LeHa - AlexWaibel + AlexWaibel 2013.iwslt-papers.12 Disfluencies in speech pose severe difficulties in machine translation of spontaneous speech. This paper presents our conditional random field (CRF)-based speech disfluency detection system developed on German to improve spoken language translation performance. In order to detect speech disfluencies considering syntactics and semantics of speech utterances, we carried out a CRF-based approach using information learned from the word representation and the phrase table used for machine translation. The word representation is gained using recurrent neural networks and projected words are clustered using the k-means algorithm. Using the output from the model trained with the word representations and phrase table information, we achieve an improvement of 1.96 BLEU points on the lecture test set. By keeping or removing humanannotated disfluencies, we show an upper bound and lower bound of translation quality. In an oracle experiment we gain 3.16 BLEU points of improvement on the lecture test set, compared to the same set with all disfluencies. cho-etal-2013-crf @@ -450,10 +450,10 @@ Maximum entropy language modeling for <fixed-case>R</fixed-case>ussian <fixed-case>ASR</fixed-case> EvgeniyShin - SebastianStüker + SebastianStüker KevinKilgour ChristianFügen - AlexWaibel + AlexWaibel 2013.iwslt-papers.13 Russian is a challenging language for automatic speech recognition systems due to its rich morphology. This rich morphology stems from Russian’s highly inflectional nature and the frequent use of preand suffixes. Also, Russian has a very free word order, changes in which are used to reflect connotations of the sentences. Dealing with these phenomena is rather difficult for traditional n-gram models. We therefore investigate in this paper the use of a maximum entropy language model for Russian whose features are specifically designed to deal with the inflections in Russian, as well as the loose word order. We combine this with a subword based language model in order to alleviate the problem of large vocabulary sizes necessary for dealing with highly inflecting languages. Applying the maximum entropy language model during re-scoring improves the word error rate of our recognition system by 1.2% absolute, while the use of the sub-word based language model reduces the vocabulary size from 120k to 40k and the OOV rate from 4.8% to 2.1%. shin-etal-2013-maximum @@ -465,7 +465,7 @@ AdamLopez DamianosKarakos ChrisCallison-Burch - SanjeevKhudanpur + SanjeevKhudanpur 2013.iwslt-papers.14 Research into the translation of the output of automatic speech recognition (ASR) systems is hindered by the dearth of datasets developed for that explicit purpose. For SpanishEnglish translation, in particular, most parallel data available exists only in vastly different domains and registers. In order to support research on cross-lingual speech applications, we introduce the Fisher and Callhome Spanish-English Speech Translation Corpus, supplementing existing LDC audio and transcripts with (a) ASR 1-best, lattice, and oracle output produced by the Kaldi recognition system and (b) English translations obtained on Amazon’s Mechanical Turk. The result is a four-way parallel dataset of Spanish audio, transcriptions, ASR lattices, and English translations of approximately 38 hours of speech, with defined training, development, and held-out test sets. We conduct baseline machine translation experiments using models trained on the provided training data, and validate the dataset by corroborating a number of known results in the field, including the utility of in-domain (information, conversational) training data, increased performance translating lattices (instead of recognizer 1-best output), and the relationship between word error rate and BLEU score. post-etal-2013-improved @@ -491,7 +491,7 @@ SankaranarayananAnanthakrishnan WeiChen RohitKumar - DennisMehay + DennisMehay 2013.iwslt-papers.17 Spoken language translation (SLT) systems typically follow a pipeline architecture, in which the best automatic speech recognition (ASR) hypothesis of an input utterance is fed into a statistical machine translation (SMT) system. Conversational speech often generates unrecoverable ASR errors owing to its rich vocabulary (e.g. out-of-vocabulary (OOV) named entities). In this paper, we study the possibility of alleviating the impact of unrecoverable ASR errors on translation performance by minimizing the contextual effects of incorrect source words in target hypotheses. Our approach is driven by locally-derived penalties applied to bilingual phrase pairs as well as target language model (LM) likelihoods in the vicinity of source errors. With oracle word error labels on an OOV word-rich English-to-Iraqi Arabic translation task, we show statistically significant relative improvements of 3.2% BLEU and 2.0% METEOR over an error-agnostic baseline SMT system. We then investigate the impact of imperfect source error labels on error-aware translation performance. Simulation experiments reveal that modest translation improvements are to be gained with this approach even when the source error labels are noisy. ananthakrishnan-etal-2013-source diff --git a/data/xml/2013.mtsummit.xml b/data/xml/2013.mtsummit.xml index f6e11a199c..0bd5ddadf0 100644 --- a/data/xml/2013.mtsummit.xml +++ b/data/xml/2013.mtsummit.xml @@ -11,7 +11,7 @@ The Operation Sequence Model: Integrating Translation and Reordering Operations in a Single Left-to-Right Model - HinrichSchütze + HinrichSchütze 2013.mtsummit-plenaries.1 schutze-2013-operation @@ -43,7 +43,7 @@ Generative and Discriminative Methods for Online Adaptation in <fixed-case>SMT</fixed-case> - KatharinaWäschle + KatharinaWäschle PatrickSimianer NicolaBertoldi StefanRiezler @@ -56,7 +56,7 @@ KeikoTaguchi AndrewFinch SeiichiYamamoto - EiichiroSumita + EiichiroSumita 2013.mtsummit-papers.3 taguchi-etal-2013-inducing @@ -83,7 +83,7 @@ RohitKumar EnochKan RohitPrasad - PremNatarajan + PremNatarajan 2013.mtsummit-papers.6 ananthakrishnan-etal-2013-semi @@ -91,22 +91,22 @@ Listwise Approach to Learning to Rank for Automatic Evaluation of Machine Translation MaoxiLi AiwenJiang - MingwenWang + MingwenWang 2013.mtsummit-papers.7 li-etal-2013-listwise <fixed-case>MWE</fixed-case> Alignment in Phrase Based Statistical Machine Translation SantanuPal - Sudip KumarNaskar - SivajiBandyopadhyay + Sudip KumarNaskar + SivajiBandyopadhyay 2013.mtsummit-papers.8 pal-etal-2013-mwe Real-life Translation Quality Estimation for <fixed-case>MT</fixed-case> System Selection - LluisFormiga - LluisMarquez + LluisFormiga + LluisMarquez JaumePujantell 2013.mtsummit-papers.9 formiga-etal-2013-real @@ -114,7 +114,7 @@ Yet Another Fast, Robust and Open Source Sentence Aligner. Time to<fixed-case>R</fixed-case>econsider Sentence Alignment? FethiLamraoui - PhilippeLanglais + PhilippeLanglais 2013.mtsummit-papers.10 lamraoui-langlais-2013-yet @@ -137,9 +137,9 @@ Quality Estimation-guided Data Selection for Domain Adaptation of <fixed-case>SMT</fixed-case> PratyushBanerjee - RaphaelRubino + RaphaelRubino JohannRoturier - Josefvan Genabith + Josefvan Genabith 2013.mtsummit-papers.13 banerjee-etal-2013-quality @@ -154,7 +154,7 @@ Design and Analysis of a Large Corpus of Post-Edited Translations: Quality Estimation, Failure Analysis and the Variability of Post-Edition GuillaumeWisniewski - Anil KumarSingh + Anil KumarSingh NataliaSegal FrançoisYvon 2013.mtsummit-papers.15 @@ -171,7 +171,7 @@ Meta-Evaluation of a Diagnostic Quality Metric for Machine Translation - Sudip KumarNaskar + Sudip KumarNaskar AntonioToral FedericoGaspari DeclanGroves @@ -182,7 +182,7 @@ Towards a Generic Approach for Bilingual Lexicon Extraction from Comparable Corpora DhouhaBouamor NasredineSemmar - PierreZweigenbaum + PierreZweigenbaum 2013.mtsummit-papers.18 bouamor-etal-2013-towards @@ -191,7 +191,7 @@ PatrickLehnen Jorn WiibkerJan-Thorsten Peter StephanPeitz - HermannNey + HermannNey 2013.mtsummit-papers.19 lehnen-etal-2013-hidden @@ -201,7 +201,7 @@ MinweiFeng MatthiasHuck StephanPeitz - HermannNey + HermannNey 2013.mtsummit-papers.20 freitag-etal-2013-reverse @@ -216,8 +216,8 @@ A Free/Open-source <fixed-case>K</fixed-case>azakh-<fixed-case>T</fixed-case>atar Machine Translation System IlnarSalimzyanov - JonathanWashington - FrancisTyers + JonathanWashington + FrancisTyers 2013.mtsummit-papers.22 salimzyanov-etal-2013-free @@ -250,7 +250,7 @@ Translating the <fixed-case>FINREP</fixed-case> Taxonomy using a Domain-specific Corpus - MihaelArcan + MihaelArcan Susan MarieThomas DerekDe Brandt PaulBuitelaar @@ -277,7 +277,7 @@ A <fixed-case>CCG</fixed-case>-based Quality Estimation Metric for Statistical Machine Translation Learning from Human Judgments of Machine Translation Output - MajaPopovic + MajaPopovic EleftheriosAvramidis AljoschaBurchardt SabineHunsicker @@ -289,7 +289,7 @@ Learning from Human Judgments of Machine Translation Output - MajaPopovic + MajaPopovic EleftheriosAvramidis AljoschaBurchardt SabineHunsicker @@ -301,8 +301,8 @@ Towards the Supervised Machine Translation: Real Word Alignments and Translations in a Multi-task Active Learning process - Martha-AliciaRocha - Joan-AndreuSanchez + Martha-AliciaRocha + Joan-AndreuSanchez 2013.mtsummit-posters.6 rocha-sanchez-2013-towards @@ -310,7 +310,7 @@ Comparing Forum Data Post-Editing Performance Using Translation Memory and Machine Translation Output: A Pilot Study LuciaMorado Vazquez SilviaRodriguez Vazquez - PierretteBouillon + PierretteBouillon 2013.mtsummit-posters.7 morado-vazquez-etal-2013-comparing @@ -337,7 +337,7 @@ MariaMateva RamonaEnache CristinaEspana-Bonet - LluisMarquez + LluisMarquez BorislavPopov AarneRanta 2013.mtsummit-posters.10 @@ -346,15 +346,15 @@ Application of Online Terminology Services in Statistical Machine Translation RaivisSkadins - MarcisPinnis + MarcisPinnis TatianaGornostay - AndrejsVasiljevs + AndrejsVasiljevs 2013.mtsummit-posters.11 skadins-etal-2013-application Key Problems in Conversion from Simplified to Traditional <fixed-case>C</fixed-case>hinese Characters Topic Models for Translation Quality Estimation for Gisting Purposes - RaphaelRubino + RaphaelRubino Jose GuilhermeCamargo de Souza JenniferFoster LuciaSpecia @@ -363,7 +363,7 @@ Topic Models for Translation Quality Estimation for Gisting Purposes - RaphaelRubino + RaphaelRubino Jose GuilhermeCamargo de Souza JenniferFoster LuciaSpecia @@ -384,7 +384,7 @@ Analyzing and Predicting <fixed-case>MT</fixed-case> Utility and Post-Editing Productivity in Enterprise-scale Translation Projects - AlonLavie + AlonLavie OlgaBeregovaya MichaelDenkowski DavidClarke @@ -438,8 +438,8 @@ Let’s<fixed-case>MT</fixed-case>! as a Learning Platform for <fixed-case>SMT</fixed-case> - HanneFersøe - Dorte HaltrupHansen + HanneFersøe + Dorte HaltrupHansen LeneOffersgaard SusiOlsen ClausPovlsen @@ -448,12 +448,12 @@ User Evaluation of Advanced Interaction Features for a Computer-Assisted Translation Workbench - VicenteAlabau + VicenteAlabau JesusGonzalez-Rubio - Luis A.Leiva - DanielOrtiz-Martínez + Luis A.Leiva + DanielOrtiz-Martínez GermanSanchis-Trilles - FranciscoCasacuberta + FranciscoCasacuberta BartoloméMesa-Lao RagnarBonk MichaelCarl @@ -466,7 +466,7 @@ ThierryEtchegoyhen MarkFishel JieJiang - Mirjam SepesyMaucec + Mirjam SepesyMaucec 2013.mtsummit-user.10 etchegoyhen-etal-2013-smt @@ -523,7 +523,7 @@ Automated Community Content Editing <fixed-case>P</fixed-case>or<fixed-case>T</fixed-case>al (<fixed-case>ACCEPT</fixed-case>) - PierretteBouillon + PierretteBouillon 2013.mtsummit-european.1 bouillon-2013-automated @@ -539,7 +539,7 @@ <fixed-case>CASMACAT</fixed-case>: Cognitive Analysis and Statistical Methods for Advanced Computer Aided Translation PhilippKoehn MichaelCarl - FranciscoCasacuberta + FranciscoCasacuberta EvaMarcos 2013.mtsummit-european.3 koehn-etal-2013-casmacat @@ -553,18 +553,18 @@ Bridges Across the Language Divide — <fixed-case>EU</fixed-case>-<fixed-case>BRIDGE</fixed-case> Excitement: Exploring Customer Interactions through Textual <fixed-case>E</fixed-case>ntail<fixed-case>MENT</fixed-case> IdoDagan - BernardoMagnini - GuenterNeumann - SebastianPado + BernardoMagnini + GuenterNeumann + SebastianPado 2013.mtsummit-european.5 dagan-etal-2013-bridges Excitement: Exploring Customer Interactions through Textual <fixed-case>E</fixed-case>ntail<fixed-case>MENT</fixed-case> IdoDagan - BernardoMagnini - GuenterNeumann - SebastianPado + BernardoMagnini + GuenterNeumann + SebastianPado 2013.mtsummit-european.6 dagan-etal-2013-excitement @@ -573,7 +573,7 @@ ManuelHerranz AlexHelle EliaYuste - RuslanMitkov + RuslanMitkov LuciaSpecia 2013.mtsummit-european.7 herranz-etal-2013-pangeanic @@ -581,7 +581,7 @@ <fixed-case>FAUST</fixed-case>: Feedback Analysis for User Adaptive Statistical Translation WilliamByrne - LluisMarquez + LluisMarquez 2013.mtsummit-european.8 byrne-marquez-2013-faust @@ -615,14 +615,14 @@ <fixed-case>MONNET</fixed-case>: Multilingual Ontologies for Networked Knowledge - MihaelArcan + MihaelArcan PaulBuitelaar 2013.mtsummit-european.13 arcan-buitelaar-2013-monnet <fixed-case>M</fixed-case>oses<fixed-case>C</fixed-case>ore: <fixed-case>M</fixed-case>oses Open Source Evaluation and Support Co-ordination for <fixed-case>O</fixed-case>ut<fixed-case>R</fixed-case>each and Exploitation <fixed-case>PANACEA</fixed-case>: Platform for Automatic, Normalised Annotation and Cost-Effective Acquisition of Language Resources for Human Language Technologies - NuriaBel + NuriaBel MarcPoch AntonioToral 2013.mtsummit-european.14 @@ -630,7 +630,7 @@ <fixed-case>PANACEA</fixed-case>: Platform for Automatic, Normalised Annotation and Cost-Effective Acquisition of Language Resources for Human Language Technologies - NuriaBel + NuriaBel MarcPoch AntonioToral 2013.mtsummit-european.15 @@ -648,12 +648,12 @@ <fixed-case>QTL</fixed-case>aunchpad StephenDoherty DeclanGroves - Josefvan Genabith + Josefvan Genabith ArleLommel AljoschaBurchardt HansUszkoreit LuciaSpecia - SteliosPiperidis + SteliosPiperidis 2013.mtsummit-european.17 doherty-etal-2013-qtlaunchpad @@ -661,7 +661,7 @@ <fixed-case>SIGNSPEAK</fixed-case>: Scientific Understanding and Vision-based Technological Development for Continuous Sign Language Recognition and Translation JensForster ChristophSchmidt - HermannNey + HermannNey 2013.mtsummit-european.18 forster-etal-2013-signspeak @@ -675,14 +675,14 @@ G.van Loenhout A.del Pozo D.Spiliotopoulos - Mirjam SepesyMaucec + Mirjam SepesyMaucec A.Turner 2013.mtsummit-european.19 georgakopoulou-etal-2013-sumat <fixed-case>T</fixed-case>aa<fixed-case>S</fixed-case>: Terminology as a Service - AndrejsVasiljevs + AndrejsVasiljevs TatianaGornostay 2013.mtsummit-european.20 vasiljevs-gornostay-2013-taas @@ -712,7 +712,7 @@
Nice, France
September 2 2013 - ShoichiYokoyama + ShoichiYokoyama mtsummit pslt @@ -747,7 +747,7 @@ YunJin Oh-WoogKwon Seung-HoonNa - Young-GilKim + Young-GilKim 2013.mtsummit-wpt.4 jin-etal-2013-patent
@@ -755,7 +755,7 @@ Exploiting multiple resources for <fixed-case>J</fixed-case>apanese to <fixed-case>E</fixed-case>nglish patent translation RahmaSellami FatihaSadat - LamiaHadrich Belguith + LamiaHadrich Belguith 2013.mtsummit-wpt.5 sellami-etal-2013-exploiting
@@ -779,7 +779,7 @@
What can we learn about the selection mechanism for post-editing? - MajaPopović + MajaPopović EleftheriosAvramidis AljoschaBurchardt DavidVilar @@ -815,26 +815,26 @@ Combining pre-editing and post-editing to improve <fixed-case>SMT</fixed-case> of user-generated content JohannaGerlach VictoriaPorro - PierretteBouillon + PierretteBouillon SabineLehmann 2013.mtsummit-wptp.6 gerlach-etal-2013-combining Advanced computer aided translation with a web-based workbench - VicentAlabau + VicentAlabau RagnarBonk ChristianBuck MichaelCarl - FranciscoCasacuberta - MercedesGarcía-Martínez + FranciscoCasacuberta + MercedesGarcía-Martínez JesúsGonzález PhilippKoehn - LuisLeiva + LuisLeiva BartoloméMesa-Lao DanielOriz - HervéSaint-Amand - GermánSanchis + HervéSaint-Amand + GermánSanchis CharaTsiukala 2013.mtsummit-wptp.7 alabau-etal-2013-advanced @@ -874,15 +874,15 @@ Online production of <fixed-case>HQ</fixed-case> parallel corpora and permanent task-based evaluation of multiple <fixed-case>MT</fixed-case> systems: both can be obtained through i<fixed-case>MAG</fixed-case>s with no added cost - LingxiaoWang - ChristianBoitet + LingxiaoWang + ChristianBoitet 2013.mtsummit-wptp.12 wang-boitet-2013-online Issues in incremental adaptation of statistical <fixed-case>MT</fixed-case> from human post-edits MauroCettolo - ChristopheServan + ChristopheServan NicolaBertoldi MarcelloFederico LoïcBarrault @@ -913,7 +913,7 @@ All that glitters is not gold when translating phraseological units - GloriaCorpas Pastor + GloriaCorpas Pastor 2013.mtsummit-wmwumttt.1.Presentation.pdf corpas-pastor-2013-glitters @@ -925,7 +925,7 @@ Anaphora resolution, collocations and translation - EricWehrli + EricWehrli LukaNerima 2013.mtsummit-wmwumttt.3 wehrli-nerima-2013-anaphora @@ -933,8 +933,8 @@ A flexible framework for collocation retrieval and translation from parallel and comparable corpora Oscar MendozaRivera - RuslanMitkov - GloriaCorpas Pastor + RuslanMitkov + GloriaCorpas Pastor 2013.mtsummit-wmwumttt.4 rivera-etal-2013-flexible @@ -967,7 +967,7 @@ How hard is it to automatically translate phrasal verbs from <fixed-case>E</fixed-case>nglish to <fixed-case>F</fixed-case>rench? CarlosRamish - LaurentBesacier + LaurentBesacier AlexanderKobzar 2013.mtsummit-wmwumttt.8 ramish-etal-2013-hard diff --git a/data/xml/2013.tal.xml b/data/xml/2013.tal.xml index 9c91fea39d..75f0730ddd 100644 --- a/data/xml/2013.tal.xml +++ b/data/xml/2013.tal.xml @@ -25,7 +25,7 @@ Stratégies discriminantes pour intégrer la reconnaissance des mots composés dans un analyseur syntaxique en constituants [Discriminative strategies for integrating multiword expression recognition in a constituent parser] - MatthieuConstant + MatthieuConstant AnthonySigogne PatrickWatrin 47–70 @@ -36,7 +36,7 @@ Evaluer et améliorer une ressource distributionnelle: protocole d’annotation de liens sémantiques en contexte [Evaluating and improving a distributional resource: protocol for in-context annotation of semantic links] ClémentineAdam - CécileFabre + CécileFabre PhilippeMuller 71–97 2013.tal-1.3 @@ -46,7 +46,7 @@ Désambiguïsation lexicale de textes : efficacité qualitative et temporelle d’un algorithme à colonies de fourmis [Lexical disambiguation of texts: qualitative and temporal efficiency of an ant colony algorithm] DidierSchwab - JérômeGoulian + JérômeGoulian AndonTchechmedjiev 99–138 2013.tal-1.4 @@ -66,7 +66,7 @@ Les apports du <fixed-case>TAL</fixed-case> à la lisibilité du français langue étrangère [Contributions of <fixed-case>NLP</fixed-case> to the readability of <fixed-case>F</fixed-case>rench as a foreign language] ThomasFrançois - CédrickFairon + CédrickFairon 171–202 2013.tal-1.6 fra @@ -98,7 +98,7 @@ Préface [Foreword] SophiaAnamiadou NathalieFriburger - SophieRosset + SophieRosset 7–11 2013.tal-2.1 fra @@ -141,7 +141,7 @@ Traitement automatique des entités nommées en arabe: détection et traduction [Automatic processing of <fixed-case>A</fixed-case>rabic named entities: detection and translation] SouhirGahbiche-Braham - HélèneBonneau-Maynard + HélèneBonneau-Maynard FrançoisYvon 101–132 2013.tal-2.5 @@ -184,7 +184,7 @@ Code-Mixing in Social Media Text AmitavaDas - BjörnGambäck + BjörnGambäck 41–64 2013.tal-3.3 das-gamback-2013-code diff --git a/data/xml/2014.amta.xml b/data/xml/2014.amta.xml index 748d2e31a2..6965b18d7f 100644 --- a/data/xml/2014.amta.xml +++ b/data/xml/2014.amta.xml @@ -22,7 +22,7 @@ <fixed-case>B</fixed-case>ayesian iterative-cascade framework for hierarchical phrase-based translation - BaskaranSankaran + BaskaranSankaran AnoopSarkar 15-27 2014.amta-researchers.2 @@ -44,7 +44,7 @@ Using any machine translation source for fuzzy-match repair in a computer-aided translation setting John E.Ortega FelipeSánchez-Martinez - Mikel L.Forcada + Mikel L.Forcada 42-53 2014.amta-researchers.4 When a computer-assisted translation (CAT) tool does not find an exact match for the source segment to translate in its translation memory (TM), translators must use fuzzy matches that come from translation units in the translation memory that do not completely match the source segment. We explore the use of a fuzzy-match repair technique called patching to repair translation proposals from a TM in a CAT environment using any available machine translation system, or any external bilingual source, regardless of its internals. Patching attempts to aid CAT tool users by repairing fuzzy matches and proposing improved translations. Our results show that patching improves the quality of translation proposals and reduces the amount of edit operations to perform, especially when a specific set of restrictions is applied. @@ -52,7 +52,7 @@ Enhancing statistical machine translation with bilingual terminology in a <fixed-case>CAT</fixed-case> environment - MihaelArcan + MihaelArcan MarcoTurchi SaraTopelli PaulBuitelaar @@ -92,7 +92,7 @@ Document-level re-ranking with soft lexical and semantic features for statistical machine translation ChenchenDing MasaoUtiyama - EiichiroSumita + EiichiroSumita 110-123 2014.amta-researchers.9 We introduce two document-level features to polish baseline sentence-level translations generated by a state-of-the-art statistical machine translation (SMT) system. One feature uses the word-embedding technique to model the relation between a sentence and its context on the target side; the other feature is a crisp document-level token-type ratio of target-side translations for source-side words to model the lexical consistency in translation. The weights of introduced features are tuned to optimize the sentence- and document-level metrics simultaneously on the basis of Pareto optimality. Experimental results on two different schemes with different corpora illustrate that the proposed approach can efficiently and stably integrate document-level information into a sentence-level SMT system. The best improvements were approximately 0.5 BLEU on test sets with statistical significance. @@ -123,7 +123,7 @@ PrashantMathur MauroCettolo MarcelloFederico - José G.C.de Souza + José G.C.de Souza 152-165 2014.amta-researchers.12 In this paper we investigate the problem of adapting a machine translation system to the feedback provided by multiple post-editors. It is well know that translators might have very different post-editing styles and that this variability hinders the application of online learning methods, which indeed assume a homogeneous source of adaptation data. We hence propose multi-task learning to leverage bias information from each single post-editors in order to constrain the evolution of the SMT system. A new framework for significance testing with sentence level metrics is described which shows that Multi-Task learning approaches outperforms existing online learning approaches, with significant gains of 1.24 and 1.88 TER score over a strong online adaptive baseline, on a test set of post-edits produced by four translators texts and on a popular benchmark with multiple references, respectively. @@ -142,7 +142,7 @@ Expanding machine translation training data with an out-of-domain corpus using language modeling based vocabulary saturation BurakAydın - ArzucanÖzgür + ArzucanÖzgür 180-192 2014.amta-researchers.14 The training data size is of utmost importance for statistical machine translation (SMT), since it affects the training time, model size, decoding speed, as well as the system’s overall success. One of the challenges for developing SMT systems for languages with less resources is the limited sizes of the available training data. In this paper, we propose an approach for expanding the training data by including parallel texts from an out-of-domain corpus. Selecting the best out-of-domain sentences for inclusion in the training set is important for the overall performance of the system. Our method is based on first ranking the out-of-domain sentences using a language modeling approach, and then, including the sentences to the training set by using the vocabulary saturation filter technique. We evaluated our approach for the English-Turkish language pair and obtained promising results. Performance improvements of up to +0.8 BLEU points for the English-Turkish translation system are achieved. We compared our results with the translation model combination approaches as well and reported the improvements. Moreover, we implemented our system with dependency parse tree based language modeling in addition to the n-gram based language modeling and reported comparable results. @@ -151,11 +151,11 @@ Comparison of data selection techniques for the translation of video lectures JoernWuebker - HermannNey + HermannNey AdriàMartínez-Villaronga AdriàGiménez - AlfonsJuan - ChristopheServan + AlfonsJuan + ChristopheServan MarcDymetman ShacharMirkin 193-207 @@ -169,7 +169,7 @@ HengYu HongmeiZhao QunLiu - Yajuan + Yajuan 208-221 2014.amta-researchers.16 This paper gives a general review and detailed analysis of China Workshop on Machine Translation (CWMT) Evaluation. Compared with the past CWMT evaluation campaigns, CWMT2013 evaluation is characterized as follows: first, adopting gray-box evaluation which makes the results more replicable and controllable; second, adding one rule-based system as a counterpart; third, carrying out manual evaluations on some specific tasks to give a more comprehensive analysis of the translation errors. Boosted by those new features, our analysis and case study on the evaluation results shows the pros and cons of both rule-based and statistical systems, and reveals some interesting correlations bewteen automatic and manual evaluation metrics on different translation systems. @@ -180,7 +180,7 @@ JanNiehues AlexanderAllauzen FrançoisYvon - AlexWaibel + AlexWaibel 222-233 2014.amta-researchers.17 This paper presents two improvements of language models based on Restricted Boltzmann Machine (RBM) for large machine translation tasks. In contrast to other continuous space approach, RBM based models can easily be integrated into the decoder and are able to directly learn a hidden representation of the n-gram. Previous work on RBM-based language models do not use a shared word representation and therefore, they might suffer of a lack of generalization for larger contexts. Moreover, since the training step is very time consuming, they are only used for quite small copora. In this work we add a shared word representation for the RBM-based language model by factorizing the weight matrix. In addition, we propose an efficient and tailored sampling algorithm that allows us to drastically speed up the training process. Experiments are carried out on two German to English translation tasks and the results show that the training time could be reduced by a factor of 10 without any drop in performance. Furthermore, the RBM-based model can also be trained on large size corpora. @@ -219,8 +219,8 @@ Using noun class information to model selectional preferences for translating prepositions in <fixed-case>SMT</fixed-case> MarionWeller - SabineSchulte im Walde - AlexanderFraser + SabineSchulte im Walde + AlexanderFraser 275-287 2014.amta-researchers.21 Translating prepositions is a difficult and under-studied problem in SMT. We present a novel method to improve the translation of prepositions by using noun classes to model their selectional preferences. We compare three variants of noun class information: (i) classes induced from the lexical resource GermaNet or obtained from clusterings based on either (ii) window information or (iii) syntactic features. Furthermore, we experiment with PP rule generalization. While we do not significantly improve over the baseline, our results demonstrate that (i) integrating selectional preferences as rigid class annotation in the parse tree is sub-optimal, and that (ii) clusterings based on window co-occurrence are more robust than syntax-based clusters or GermaNet classes for the task of modeling selectional preferences. @@ -238,7 +238,7 @@ Data selection for compact adapted <fixed-case>SMT</fixed-case> models ShacharMirkin - LaurentBesacier + LaurentBesacier 301-314 2014.amta-researchers.23 Data selection is a common technique for adapting statistical translation models for a specific domain, which has been shown to both improve translation quality and to reduce model size. Selection relies on some in-domain data, of the same domain of the texts expected to be translated. Selecting the sentence-pairs that are most similar to the in-domain data from a pool of parallel texts has been shown to be effective; yet, this approach holds the risk of resulting in a limited coverage, when necessary n-grams that do appear in the pool are less similar to in-domain data that is available in advance. Some methods select additional data based on the actual text that needs to be translated. While useful, this is not always a practical scenario. In this work we describe an extensive exploration of data selection techniques over Arabic to French datasets, and propose methods to address both similarity and coverage considerations while maintaining a limited model size. @@ -266,9 +266,9 @@ Automatic dialect classification for statistical machine translation SaabMansour - YaserAl-Onaizan + YaserAl-Onaizan GraemeBlackwood - ChristophTillmann + ChristophTillmann 342-355 2014.amta-researchers.26 The training data for statistical machine translation are gathered from various sources representing a mixture of domains. In this work, we argue that when translating dialects representing varieties of the same language, a manually assigned data source is not a reliable indicator of the dialect. We resort to automatic dialect classification to refine the training corpora according to the different dialects and build improved dialect specific systems. A fairly standard classifier for Arabic developed within this work achieves state-of-the-art performance, with classification precision above 90%, making it usefully accurate for our application. The classification of the data is then used to distinguish between the different dialects, split the data accordingly, and utilize the new splits for several adaptation techniques. Performing translation experiments on a large scale dialectal Arabic to English translation task, our results show that the classifier generates better contrast between the dialects and achieves superior translation quality than using the original manual corpora splits. @@ -347,9 +347,9 @@ Real-world challenges in application of <fixed-case>MT</fixed-case> for localization: the <fixed-case>B</fixed-case>altic case - MārcisPinnis + MārcisPinnis RaivisSkadiņš - AndrejsVasiļjevs + AndrejsVasiļjevs 66-79 2014.amta-users.7.Presentation.pdf pinnis-etal-2014-real @@ -391,9 +391,9 @@ Machine translation for e-government – the <fixed-case>B</fixed-case>altic case - AndrejsVasiļjevs - RihardsKalniņš - MārcisPinnis + AndrejsVasiļjevs + RihardsKalniņš + MārcisPinnis RaivisSkadiņš 181-193 2014.amta-users.13.Presentation.pdf @@ -404,16 +404,16 @@ TanyaHelmen VanesaJurica DanielleSilverman - ElizabethRicherson + ElizabethRicherson 194-202 2014.amta-users.14.Presentation.pdf helmen-etal-2014-panel A novel use of <fixed-case>MT</fixed-case> in the development of a text level analytic for language learning - CarolVan Ess-Dykema - SalimRoukos - AmyWeinberg + CarolVan Ess-Dykema + SalimRoukos + AmyWeinberg 203-212 2014.amta-users.15.Presentation.pdf van-ess-dykema-etal-2014-novel @@ -436,9 +436,9 @@ Handling entities in <fixed-case>MT</fixed-case>/<fixed-case>CAT</fixed-case>/<fixed-case>HLT</fixed-case> - KeithMiller + KeithMiller LindaMoreau - SherriCondon + SherriCondon 2014.amta-tutorials.1.Presentation.pdf miller-etal-2014-handling @@ -489,12 +489,12 @@ Integrating online and active learning in a computer-assisted translation workbench - VicentAlabau - JesúsGonzález-Rubio - DanielOrtiz-Martínez - GermánSanchis-Trilles - FranciscoCasacuberta - MercedesGarcía-Martínez + VicentAlabau + JesúsGonzález-Rubio + DanielOrtiz-Martínez + GermánSanchis-Trilles + FranciscoCasacuberta + MercedesGarcía-Martínez BartoloméMesa-Lao Dan CheungPetersen BarbaraDragsted @@ -506,9 +506,9 @@ Towards a combination of online and multitask learning for <fixed-case>MT</fixed-case> quality estimation: a preliminary study - José G.C.de Souza + José G.C.de Souza MarcoTurchi - MatteoNegri + MatteoNegri 9-19 2014.amta-workshop.2 Quality estimation (QE) for machine translation has emerged as a promising way to provide real-world applications with methods to estimate at run-time the reliability of automatic translations. Real-world applications, however, pose challenges that go beyond those of current QE evaluation settings. For instance, the heterogeneity and the scarce availability of training data might contribute to significantly raise the bar. To address these issues we compare two alternative machine learning paradigms, namely online and multi-task learning, measuring their capability to overcome the limitations of current batch methods. The results of our experiments, which are carried out in the same experimental setting, demonstrate the effectiveness of the two methods and suggest their complementarity. This indicates, as a promising research avenue, the possibility to combine their strengths into an online multi-task approach to the problem. @@ -546,7 +546,7 @@ DavidOrrego-Carmona Ashleigh RheaGonzales MichaelCarl - SrinivasBangalore + SrinivasBangalore 51-60 2014.amta-workshop.6 The purpose of the current investigation is to predict post-editor profiles based on user behaviour and demographics using machine learning techniques to gain a better understanding of post-editor styles. Our study extracts process unit features from the CasMaCat LS14 database from the CRITT Translation Process Research Database (TPR-DB). The analysis has two main research goals: We create n-gram models based on user activity and part-of-speech sequences to automatically cluster post-editors, and we use discriminative classifier models to characterize post-editors based on a diverse range of translation process features. The classification and clustering of participants resulting from our study suggest this type of exploration could be used as a tool to develop new translation tool features or customization possibilities. @@ -577,9 +577,9 @@ Comparison of post-editing productivity between professional translators and lay users NoraAranberri - GorkaLabaka - ArantzaDiaz de Ilarraza - KepaSarasola + GorkaLabaka + ArantzaDiaz de Ilarraza + KepaSarasola 20-33 2014.amta-wptp.2 This work compares the post-editing productivity of professional translators and lay users. We integrate an English to Basque MT system within Bologna Translation Service, an end-to-end translation management platform, and perform a producitivity experiment in a real working environment. Six translators and six lay users translate or post-edit two texts from English into Basque. Results suggest that overall, post-editing increases translation throughput for both translators and users, although the latter seem to benefit more from the MT output. We observe that translators and users perceive MT differently. Additionally, a preliminary analysis seems to suggest that familiarity with the domain, source text complexity and MT quality might affect potential productivity gain. @@ -605,7 +605,7 @@ Perception vs. reality: measuring machine translation post-editing productivity FedericoGaspari AntonioToral - Sudip KumarNaskar + Sudip KumarNaskar DeclanGroves AndyWay 60-72 @@ -617,7 +617,7 @@ Cognitive demand and cognitive effort in post-editing IsabelLacruz MichaelDenkowski - AlonLavie + AlonLavie 73-84 2014.amta-wptp.6 The pause to word ratio, the number of pauses per word in a post-edited MT segment, is an indicator of cognitive effort in post-editing (Lacruz and Shreve, 2014). We investigate how low the pause threshold can reasonably be taken, and we propose that 300 ms is a good choice, as pioneered by Schilperoord (1996). We then seek to identify a good measure of the cognitive demand imposed by MT output on the post-editor, as opposed to the cognitive effort actually exerted by the post-editor during post-editing. Measuring cognitive demand is closely related to measuring MT utility, the MT quality as perceived by the post-editor. HTER, an extrinsic edit to word ratio that does not necessarily correspond to actual edits per word performed by the post-editor, is a well-established measure of MT quality, but it does not comprehensively capture cognitive demand (Koponen, 2012). We investigate intrinsic measures of MT quality, and so of cognitive demand, through edited-error to word metrics. We find that the transfer-error to word ratio predicts cognitive effort better than mechanical-error to word ratio (Koby and Champe, 2013). We identify specific categories of cognitively challenging MT errors whose error to word ratios correlate well with cognitive effort. @@ -685,9 +685,9 @@ Real time adaptive machine translation: cdec and <fixed-case>T</fixed-case>rans<fixed-case>C</fixed-case>enter MichaelDenkowski - AlonLavie + AlonLavie IsabelLacruz - ChrisDyer + ChrisDyer 123 2014.amta-wptp.14 cdec Realtime and TransCenter provide an end-to-end experimental setup for machine translation post-editing research. Realtime provides a framework for building adaptive MT systems that learn from post-editor feedback while TransCenter incorporates a web-based translation interface that connects users to these systems and logs post-editing activity. This combination allows the straightforward deployment of MT systems specifically for post-editing and analysis of translator productivity when working with adaptive systems. Both toolkits are freely available under open source licenses. diff --git a/data/xml/2014.clib.xml b/data/xml/2014.clib.xml index db2f77a610..81fff76494 100644 --- a/data/xml/2014.clib.xml +++ b/data/xml/2014.clib.xml @@ -26,7 +26,7 @@ Harnessing Language Technologies in Multilingual Information Channelling Services - DimanKaragiozov + DimanKaragiozov 6–13 Scientists and industry have put significant efforts in creating suitable tools to analyze information flows. However, up to now there are no successful solutions for 1) dynamic modeling of the user-defined interests and further personalization of the results, 2) effective cross-language information retrieval, and 3) processing of multilingual content. As a consequence, much of the potentially relevant and otherwise accessible data from the media stream may elude users’ grasp. We present a multilingual information channeling system, MediaTalk, which offers broad integration between language technologies and advanced data processing algorithms for annotation, analysis and classification of multilingual content. As a result, the system not only provides an all-in-one monitoring service that covers both traditional and social media, but also offers dynamic modeling of user profiles, personalization of obtained data and cross-language information retrieval. Bulgarian and English press clipping services relying on this system implement advanced functionalities such as identification of emerging topics, forecasting and trend prediction, all of which allow the users to monitor their standing reputation, events and relations. The architecture of the system is robust, extensible and adheres to the Big Data paradigm. 2014.clib-1.2 @@ -51,7 +51,7 @@ MariaTodorova TsvetanaDimitrova BorislavRizov - VerginicaBarbu Mititelu + VerginicaBarbu Mititelu ElenaIrimia 23–31 Romanian and Bulgarian are Balkan languages with rich derivational morphology that, if introduced into their respective wordnets, can aid broadening of the wordnet content and the possible NLP applications. In this paper we present a joint work on introducing derivation into the Bulgarian and the Romanian WordNets, BulNet and RoWordNet, respectively, by identifying and subsequently labelling the derivationally and semantically related noun-verb pairs. Our research aims at providing a framework for a comparative study on derivation in the two languages and offering training material for the automatic identification and assignment of derivational and morphosemantic relations needed in various applications. @@ -62,9 +62,9 @@ Semi-Automatic Detection of Multiword Expressions in the <fixed-case>S</fixed-case>lovak Dependency Treebank DanielaMajchrakova OndrejDusek - JanHajic + JanHajic AgataKarcova - RadovanGarabik + RadovanGarabik 32–39 We describe a method for semi-automatic extraction of Slovak multiword expressions (MWEs) from a dependency treebank. The process uses an automatic conversion from dependency syntactic trees to deep syntax and automatic tagging of verbal argument nodes based on a valency dictionary. Both the valency dictionary and the treebank conversion were adapted from the corresponding Czech versions; the automatically translated valency dictionary has been manually proofread and corrected. There are two main achievements – a valency dictionary of Slovak MWEs with direct links to corresponding expressions in the Czech dictionary, PDT-Vallex, and a method of extraction of MWEs from the Slovak Dependency Treebank. The extraction reached very high precision but lower recall in a manual evaluation. This is a work in progress, the overall goal of which is twofold: to create a Slovak language valency dictionary paralleling the Czech one, with bilingual links; and to use the extracted verbal frames in a collocation dictionary of Slovak verbs. 2014.clib-1.5 @@ -107,7 +107,7 @@ Recognize the Generality Relation between Sentences Using Asymmetric Association Measures SebastiaoPais - GaelDias + GaelDias RumenMoraliyski 73–81 In this paper we focus on a particular case of entailment, namely entailment by generality. We argue that there exist various types of implication, a range of different levels of entailment reasoning, based on lexical, syntactic, logical and common sense clues, at different levels of difficulty. We introduce the paradigm of Textual Entailment (TE) by Generality, which can be defined as the entailment from a specific statement towards a relatively more general statement. In this context, the Text T entails the Hypothesis H, and at the same time H is more general than T . We propose an unsupervised and language-independent method to recognize TE by Generality given a case of Text − Hypothesis or T − H where entailment relation holds. @@ -117,7 +117,7 @@ Unsupervised and Language Independent Method to Recognize Textual Entailment by Generality SebastiaoPais - GaelDias + GaelDias JoaoCordeiro RumenMoraliyski 82–90 diff --git a/data/xml/2014.eamt.xml b/data/xml/2014.eamt.xml index 52cce634b8..7aaa962921 100644 --- a/data/xml/2014.eamt.xml +++ b/data/xml/2014.eamt.xml @@ -36,7 +36,7 @@ Combining bilingual terminology mining and morphological modeling for domain adaptation in <fixed-case>SMT</fixed-case> MarionWeller - AlexanderFraser + AlexanderFraser UlrichHeid 11–18 2014.eamt-1.3 @@ -44,11 +44,11 @@ An efficient method to assist non-expert users in extending dictionaries by assigning stems and inflectional paradigms to unknknown words - MiquelEsplà-Gomis + MiquelEsplà-Gomis Víctor M.Sánchez-Cartegna FelipeSánchez-Martínez Rafael C.Carrasco - Mikel L.Forcada + Mikel L.Forcada Juan AntonioPérez-Ortiz 19–26 2014.eamt-1.4 @@ -56,16 +56,16 @@ Efficient wordgraph for interactive translation prediction - GermánSanchis-Trilles - DanielOrtiz-Martínez - FranciscoCasacuberta + GermánSanchis-Trilles + DanielOrtiz-Martínez + FranciscoCasacuberta 27–34 sanchis-trilles-etal-2014-efficient Translation model based weighting for phrase extraction SaabMansour - HermannNey + HermannNey 35–43 2014.eamt-1.6 mansour-ney-2014-translation @@ -74,7 +74,7 @@ Data selection for discriminative training in statistical machine translation XingyiSong LuciaSpecia - TrevorCohn + TrevorCohn 45–52 2014.eamt-1.7 song-etal-2014-data @@ -110,7 +110,7 @@ <fixed-case>CASMACAT</fixed-case>: cognitive analysis and statistical methods for advanced computer aided translation PhilippKoehn MichaelCarl - FranciscoCasacuberta + FranciscoCasacuberta EvaMarcos 57 2014.eamt-1.12 @@ -154,13 +154,13 @@ <fixed-case>SEECAT</fixed-case>: <fixed-case>ASR</fixed-case> & Eye-tracking enabled computer-assisted translation - MercedesGarcía-Martínez + MercedesGarcía-Martínez KaranSingla AniruddhaTammewar BartoloméMesa-Lao AnkitaThakur AnusuyaM.A. - SrinivasBangalore + SrinivasBangalore MichaelCarl 81-88 2014.eamt-1.18 @@ -185,7 +185,7 @@ Document-level translation quality estimation: exploring discourse and pseudo-references - CarolinaScarton + CarolinaScarton LuciaSpecia 101–108 2014.eamt-1.21 @@ -201,8 +201,8 @@ An efficient two-pass decoder for <fixed-case>SMT</fixed-case> using word confidence estimation - Ngoc-QuangLuong - LaurentBesacier + Ngoc-QuangLuong + LaurentBesacier BenjaminLecouteux 117–124 2014.eamt-1.23 @@ -282,8 +282,8 @@ Collaborative web <fixed-case>UI</fixed-case> localization, or how to build feature-rich multilingual datasets - VicentAlabau - Luis A.Leiva + VicentAlabau + Luis A.Leiva 151–154 2014.eamt-1.35 alabau-leiva-2014-collaborative @@ -309,7 +309,7 @@ Using a new analytic measure for the annotation and analysis of <fixed-case>MT</fixed-case> errors on real data ArleLommel AljoschaBurchardt - MajaPopović + MajaPopović KimHarris EleftheriosAvramidis HansUszkoreit @@ -337,7 +337,7 @@ Relations between different types of post-editing operations, cognitive effort and temporal effort - MajaPopović + MajaPopović ArleLommel AljoschaBurchardt EleftheriosAvramidis @@ -356,9 +356,9 @@ Application of machine translation in localization into low-resourced languages RaivisSkadiņš - MārcisPinnis - AndrejsVasiļjevs - IngunaSkadiņa + MārcisPinnis + AndrejsVasiļjevs + IngunaSkadiņa TomasHudik 209–216 2014.eamt-1.43 @@ -375,11 +375,11 @@ Extrinsic evaluation of web-crawlers in machine translation: a study on <fixed-case>C</fixed-case>roatian-<fixed-case>E</fixed-case>nglish for the tourism domain AntonioToral - RaphaelRubino - MiquelEsplà-Gomis - TommiPirinen + RaphaelRubino + MiquelEsplà-Gomis + TommiPirinen AndyWay - GemaRamírez-Sánchez + GemaRamírez-Sánchez 221–224 2014.eamt-1.45 toral-etal-2014-extrinsic diff --git a/data/xml/2014.iwslt.xml b/data/xml/2014.iwslt.xml index 2594663f4b..68ae1269d2 100644 --- a/data/xml/2014.iwslt.xml +++ b/data/xml/2014.iwslt.xml @@ -25,7 +25,7 @@ December 4-5 2014 MarcelloFederico - SebastianStüker + SebastianStüker FrançoisYvon iwslt @@ -108,7 +108,7 @@ MarkusFreitag JoernWuebker StephanPeitz - HermannNey + HermannNey MatthiasHuck AlexandraBirch NadirDurrani @@ -117,7 +117,7 @@ IsabelSlawik JanNiehues EunachCho - AlexWaibel + AlexWaibel NicolaBertoldi MauroCettolo MarcelloFederico @@ -133,7 +133,7 @@ BrianThompson JessicaRay MichaelCoury - TimAnderson + TimAnderson GrantErdmann JeremyGwinnup KatherineYoung @@ -151,7 +151,7 @@ MarkusMüller MatthiasSperber SebastianStüker - AlexWaibel + AlexWaibel 73-79 2014.iwslt-evaluation.9 This paper describes our German, Italian and English Speech-to-Text (STT) systems for the 2014 IWSLT TED ASR track. Our setup uses ROVER and confusion network combination from various subsystems to achieve a good overall performance. The individual subsystems are built by using different front-ends, (e.g., MVDR-MFCC or lMel), acoustic models (GMM or modular DNN) and phone sets and by training on various subsets of the training data. Decoding is performed in two stages, where the GMM systems are adapted in an unsupervised manner on the combination of the first stage outputs using VTLN, MLLR, and cMLLR. The combination setup produces a final hypothesis that has a significantly lower WER than any of the individual subsystems. @@ -199,7 +199,7 @@ Achraf BenRomdhane SalmaJamoussi Abdelmajid BenHamadou - KamelSmaïli + KamelSmaïli 96-99 2014.iwslt-evaluation.13 In this paper, we present our submitted MT system for the IWSLT2014 Evaluation Campaign. We participated in the English-French translation task. In this article we focus on one of the most important component of SMT: the language model. The idea is to use a phrase-based language model. For that, sequences from the source and the target language models are retrieved and used to calculate a phrase n-gram language model. These phrases are used to rewrite the parallel corpus which is then used to calculate a new translation model. @@ -209,7 +209,7 @@ <fixed-case>LIUM</fixed-case> <fixed-case>E</fixed-case>nglish-to-<fixed-case>F</fixed-case>rench spoken language translation system and the Vecsys/<fixed-case>LIUM</fixed-case> automatic speech recognition system for <fixed-case>I</fixed-case>talian language for <fixed-case>IWSLT</fixed-case> 2014 AnthonyRousseau LoïcBarrault - PaulDeléglise + PaulDeléglise YannickEstève HolgerSchwenk SamirBennacef @@ -223,11 +223,11 @@ <fixed-case>LIMSI</fixed-case> <fixed-case>E</fixed-case>nglish-<fixed-case>F</fixed-case>rench speech translation system NataliaSegal - HélèneBonneau-Maynard - Quoc KhanhDo + HélèneBonneau-Maynard + Quoc KhanhDo AlexandreAllauzen Jean-LucGauvain - LoriLamel + LoriLamel FrançoisYvon 106-112 2014.iwslt-evaluation.15 @@ -256,7 +256,7 @@ EunahCho TeresaHerrmann Thanh-LeHa - AlexWaibel + AlexWaibel 119-126 2014.iwslt-evaluation.17 In this paper, we present the KIT systems participating in the TED translation tasks of the IWSLT 2014 machine translation evaluation. We submitted phrase-based translation systems for all three official directions, namely English→German, German→English, and English→French, as well as for the optional directions English→Chinese and English→Arabic. For the official directions we built systems both for the machine translation as well as the spoken language translation track. This year we improved our systems’ performance over last year through n-best list rescoring using neural network-based translation and language models and novel preordering rules based on tree information of multiple syntactic levels. Furthermore, we could successfully apply a novel phrase extraction algorithm and transliteration of unknown words for Arabic. We also submitted a contrastive system for German→English built with stemmed German adjectives. For the SLT tracks, we used a monolingual translation system to translate the lowercased ASR hypotheses with all punctuation stripped to truecased, punctuated output as a preprocessing step to our usual translation system. @@ -290,7 +290,7 @@ AndrewFinch MasaoUtiyama TaroWatanabe - EiichiroSumita + EiichiroSumita 139-142 2014.iwslt-evaluation.20 This paper describes NICT’s participation in the IWSLT 2014 evaluation campaign for the TED Chinese-English translation shared-task. Our approach used a combination of phrase-based and hierarchical statistical machine translation (SMT) systems. Our focus was in several areas, specifically system combination, word alignment, and various language modeling techniques including the use of neural network joint models. Our experiments on the test set from the 2013 shared task, showed that an improvement in BLEU score can be gained in translation performance through all of these techniques, with the largest improvements coming from using large data sizes to train the language model. @@ -310,7 +310,7 @@ JoernWuebker StephanPeitz AndreasGuta - HermannNey + HermannNey 150-154 2014.iwslt-evaluation.22 This work describes the statistical machine translation (SMT) systems of RWTH Aachen University developed for the evaluation campaign International Workshop on Spoken Language Translation (IWSLT) 2014. We participated in both the MT and SLT tracks for the English→French and German→English language pairs and applied the identical training pipeline and models on both language pairs. Our state-of-the-art phrase-based baseline systems are augmented with maximum expected BLEU training for phrasal, lexical and reordering models. Further, we apply rescoring with novel recurrent neural language and translation models. The same systems are used for the SLT track, where we additionally perform punctuation prediction on the automatic transcriptions employing hierarchical phrase-based translation. We are able to improve RWTH’s 2013 evaluation systems by 1.7-1.8% BLEU absolute. @@ -332,7 +332,7 @@ Advances in dialectal <fixed-case>A</fixed-case>rabic speech recognition: a study using <fixed-case>T</fixed-case>witter to improve <fixed-case>E</fixed-case>gyptian <fixed-case>ASR</fixed-case> AhmedAli HamdyMubarak - StephanVogel + StephanVogel 156-162 2014.iwslt-papers.1 This paper reports results in building an Egyptian Arabic speech recognition system as an example for under-resourced languages. We investigated different approaches to build the system using 10 hours for training the acoustic model, and results for both grapheme system and phoneme system using MADA. The phoneme-based system shows better results than the grapheme-based system. In this paper, we explore the use of tweets written in dialectal Arabic. Using 880K Egyptian tweets reduced the Out Of Vocabulary (OOV) rate from 15.1% to 3.2% and the WER from 59.6% to 44.7%, a relative gain 25% in WER. @@ -341,8 +341,8 @@ Towards simultaneous interpreting: the timing of incremental machine translation and speech synthesis TimoBaumann - SrinivasBangalore - JuliaHirschberg + SrinivasBangalore + JuliaHirschberg 163-168 2014.iwslt-papers.2 In simultaneous interpreting, human experts incrementally construct and extend partial hypotheses about the source speaker’s message, and start to verbalize a corresponding message in the target language, based on a partial translation – which may have to be corrected occasionally. They commence the target utterance in the hope that they will be able to finish understanding the source speaker’s message and determine its translation in time for the unfolding delivery. Of course, both incremental understanding and translation by humans can be garden-pathed, although experts are able to optimize their delivery so as to balance the goals of minimal latency, translation quality and high speech fluency with few corrections. We investigate the temporal properties of both translation input and output to evaluate the tradeoff between low latency and translation quality. In addition, we estimate the improvements that can be gained with a tempo-elastic speech synthesizer. @@ -364,7 +364,7 @@ Machine translation of multi-party meetings: segmentation and disfluency removal strategies EunahCho JanNiehues - AlexWaibel + AlexWaibel 176-183 2014.iwslt-papers.4 Translating meetings presents a challenge since multi-speaker speech shows a variety of disfluencies. In this paper we investigate the importance of transforming speech into well-written input prior to translating multi-party meetings. We first analyze the characteristics of this data and establish oracle scores. Sentence segmentation and punctuation are performed using a language model, turn information, or a monolingual translation system. Disfluencies are removed by a CRF model trained on in-domain and out-of-domain data. For comparison, we build a combined CRF model for punctuation insertion and disfluency removal. By applying these models, multi-party meetings are transformed into fluent input for machine translation. We evaluate the models with regard to translation performance and are able to achieve an improvement of 2.1 to 4.9 BLEU points depending on the availability of turn information. @@ -376,7 +376,7 @@ Ye KyawThu MasaoUtiyama AndrewFinch - EiichiroSumita + EiichiroSumita 184-191 2014.iwslt-papers.5 We conduct dependency-based head finalization for statistical machine translation (SMT) for Myanmar (Burmese). Although Myanmar is an understudied language, linguistically it is a head-final language with similar syntax to Japanese and Korean. So, applying the efficient techniques of Japanese and Korean processing to Myanmar is a natural idea. Our approach is a combination of two approaches. The first is a head-driven phrase structure grammar (HPSG) based head finalization for English-to-Japanese translation, the second is dependency-based pre-ordering originally designed for English-to-Korean translation. We experiment on Chinese-, English-, and French-to-Myanmar translation, using a statistical pre-ordering approach as a comparison method. Experimental results show the dependency-based head finalization was able to consistently improve a baseline SMT system, for different source languages and different segmentation schemes for the Myanmar language. @@ -384,7 +384,7 @@ Discriminative adaptation of continuous space translation models - Quoc-KhanhDo + Quoc-KhanhDo AlexandreAllauzen FrançoisYvon 192-199 @@ -397,7 +397,7 @@ MatthiasEck YuriZemlyanskiy JoyZhang - AlexWaibel + AlexWaibel 200-205 2014.iwslt-papers.7 We introduce two methods to collect additional training data for statistical machine translation systems from public social network content. The first method identifies multilingual content where the author self-translated their own post to reach additional friends, fans or customers. Once identified, we can split the post in the language segments and extract translation pairs from this content. The second methods considers web links (URLs) that users add as part of their post to point the reader to a video, article or website. If the same URL is shared from different language users, there is a chance they might give the same comment in their respective language. We use a support vector machine (SVM) as a classifier to identify true translations from all candidate pairs. We collected additional translation pairs using both methods for the language pairs Spanish-English and Portuguese-English. Testing the collected data as additional training data for statistical machine translations on in-domain test sets resulted in very significant improvements of up to 5 BLEU. @@ -407,7 +407,7 @@ An exploration of segmentation strategies in stream decoding AndrewFinch XiaolinWang - EiichiroSumita + EiichiroSumita 2014.iwslt-papers.8 In this paper we explore segmentation strategies for the stream decoder a method for decoding from a continuous stream of input tokens, rather than the traditional method of decoding from sentence segmented text. The behavior of the decoder is analyzed and modifications to the decoding algorithm are proposed to improve its performance. The experimental results show our proposed decoding strategies to be effective, and add support to the original findings that this approach is capable of approaching the performance of the underlying phrase-based machine translation decoder, at useful levels of latency. Our experiments evaluated the stream decoder on a broader set of language pairs than in previous work. We found most European language pairs were similar in character, and report results on English-Chinese and English-German pairs which are of interest due to the reordering required. 206-213 @@ -427,7 +427,7 @@ Lexical translation model using a deep neural network architecture Thanh-LeHa JanNiehues - AlexWaibel + AlexWaibel 223-229 2014.iwslt-papers.10 In this paper we combine the advantages of a model using global source sentence contexts, the Discriminative Word Lexicon, and neural networks. By using deep neural networks instead of the linear maximum entropy model in the Discriminative Word Lexicon models, we are able to leverage dependencies between different source words due to the non-linearity. Furthermore, the models for different target words can share parameters and therefore data sparsity problems are effectively reduced. By using this approach in a state-of-the-art translation system, we can improve the performance by up to 0.5 BLEU points for three different language pairs on the TED translation task. @@ -436,10 +436,10 @@ Anticipatory translation model adaptation for bilingual conversations SanjikaHewavitharana - DennisMehay + DennisMehay SankaranarayananAnanthakrishnan RohitKumar - JohnMakhoul + JohnMakhoul 230-235 2014.iwslt-papers.11 Conversational spoken language translation (CSLT) systems facilitate bilingual conversations in which the two participants speak different languages. Bilingual conversations provide additional contextual information that can be used to improve the underlying machine translation system. In this paper, we describe a novel translation model adaptation method that anticipates a participant’s response in the target language, based on his counterpart’s prior turn in the source language. Our proposed strategy uses the source language utterance to perform cross-language retrieval on a large corpus of bilingual conversations in order to obtain a set of potentially relevant target responses. The responses retrieved are used to bias translation choices towards anticipated responses. On an Iraqi-to-English CSLT task, our method achieves a significant improvement over the baseline system in terms of BLEU, TER and METEOR metrics. @@ -461,8 +461,8 @@ YuanCao RyanCotterell ChrisCallison-Burch - DanielPovey - SanjeevKhudanpur + DanielPovey + SanjeevKhudanpur 244-248 2014.iwslt-papers.13 Translation of the output of automatic speech recognition (ASR) systems, also known as speech translation, has received a lot of research interest recently. This is especially true for programs such as DARPA BOLT which focus on improving spontaneous human-human conversation across languages. However, this research is hindered by the dearth of datasets developed for this explicit purpose. For Egyptian Arabic-English, in particular, no parallel speechtranscription-translation dataset exists in the same domain. In order to support research in speech translation, we introduce the Callhome Egyptian Arabic-English Speech Translation Corpus. This supplements the existing LDC corpus with four reference translations for each utterance in the transcripts. The result is a three-way parallel dataset of Egyptian Arabic Speech, transcriptions and English translations. @@ -472,7 +472,7 @@ Improving in-domain data selection for small in-domain sets MohammedMediani JoshuaWinebarger - AlexanderWaibel + AlexanderWaibel 249-256 2014.iwslt-papers.14 Finding sufficient in-domain text data for language modeling is a recurrent challenge. Some methods have already been proposed for selecting parts of out-of-domain text data most closely resembling the in-domain data using a small amount of the latter. Including this new “near-domain” data in training can potentially lead to better language model performance, while reducing training resources relative to incorporating all data. One popular, state-of-the-art selection process based on cross-entropy scores makes use of in-domain and out-ofdomain language models. In order to compensate for the limited availability of the in-domain data required for this method, we introduce enhancements to two of its steps. Firstly, we improve the procedure for drawing the outof-domain sample data used for selection. Secondly, we use word-associations in order to extend the underlying vocabulary of the sample language models used for scoring. These enhancements are applied to selecting text for language modeling of talks given in a technical subject area. Besides comparing perplexity, we judge the resulting language models by their performance in automatic speech recognition and machine translation tasks. We evaluate our method in different contexts. We show that it yields consistent improvements, up to 2% absolute reduction in word error rate and 0.3 Bleu points. We achieve these improvements even given a much smaller in-domain set. @@ -481,10 +481,10 @@ Multilingual deep bottle neck features: a study on language selection and training techniques MarkusMüller - SebastianStüker + SebastianStüker ZaidSheikh FlorianMetze - AlexWaibel + AlexWaibel 257-264 2014.iwslt-papers.15 Previous work has shown that training the neural networks for bottle neck feature extraction in a multilingual way can lead to improvements in word error rate and average term weighted value in a telephone key word search task. In this work we conduct a systematic study on a) which multilingual training strategy to employ, b) the effect of language selection and amount of multilingual training data used and c) how to find a suitable combination for languages. We conducted our experiment on the key word search task and the languages of the IARPA BABEL program. In a first step, we assessed the performance of a single language out of all available languages in combination with the target language. Based on these results, we then combined a multitude of languages. We also examined the influence of the amount of training data per language, as well as different techniques for combining the languages during network training. Our experiments show that data from arbitrary additional languages does not necessarily increase the performance of a system. But when combining a suitable set of languages, a significant gain in performance can be achieved. @@ -507,7 +507,7 @@ Better punctuation prediction with hierarchical phrase-based translation StephanPeitz MarkusFreitag - HermannNey + HermannNey 271-278 2014.iwslt-papers.17 Punctuation prediction is an important task in spoken language translation and can be performed by using a monolingual phrase-based translation system to translate from unpunctuated to text with punctuation. However, a punctuation prediction system based on phrase-based translation is not able to capture long-range dependencies between words and punctuation marks. In this paper, we propose to employ hierarchical translation in place of phrase-based translation and show that this approach is more robust for unseen word sequences. Furthermore, we analyze different optimization criteria for tuning the scaling factors of a monolingual statistical machine translation system. In our experiments, we compare the new approach with other punctuation prediction methods and show improvements in terms of F1-Score and BLEU on the IWSLT 2014 German→English and English→French translation tasks. @@ -517,7 +517,7 @@ Rule-based preordering on multiple syntactic levels in statistical machine translation GeWu YuqiZhang - AlexanderWaibel + AlexanderWaibel 279-286 2014.iwslt-papers.18 We propose a novel data-driven rule-based preordering approach, which uses the tree information of multiple syntactic levels. This approach extend the tree-based reordering from one level into multiple levels, which has the capability to process more complicated reordering cases. We have conducted experiments in English-to-Chinese and Chinese-to-English translation directions. Our results show that the approach has led to improved translation quality both when it was applied separately or when it was combined with some other reordering approaches. As our reordering approach was used alone, it showed an improvement of 1.61 in BLEU score in the English-to-Chinese translation direction and an improvement of 2.16 in BLEU score in the Chinese-to-English translation direction, in comparison with the baseline, which used no word reordering. As our preordering approach were combined with the short rule [1], long rule [2] and tree rule [3] based preordering approaches, it showed further improvements of up to 0.43 in BLEU score in the English-to-Chinese translation direction and further improvements of up to 0.3 in BLEU score in the Chinese-to-English translation direction. Through the translations that used our preordering approach, we have also found many translation examples with improved syntactic structures. diff --git a/data/xml/2014.lilt.xml b/data/xml/2014.lilt.xml index c8f9ec8396..49761bcced 100644 --- a/data/xml/2014.lilt.xml +++ b/data/xml/2014.lilt.xml @@ -12,7 +12,7 @@ Introduction AnnieZaenen CleoCondoravdi - Valeriade Paiva + Valeriade Paiva 2014.lilt-9.1 zaenen-etal-2014-introduction @@ -34,15 +34,15 @@ Decomposing Semantic Inference ElanaCabria - BernardoMagnini + BernardoMagnini Beside formal approaches to semantic inference that rely on logical representation of meaning, the notion of Textual Entailment (TE) has been proposed as an applied framework to capture major semantic inference needs across applications in Computational Linguistics. Although several approaches have been tried and evaluation campaigns have shown improvements in TE, a renewed interest is rising in the research community towards a deeper and better understanding of the core phenomena involved in textual inference. Pursuing this direction, we are convinced that crucial progress will derive from a focus on decomposing the complexity of the TE task into basic phenomena and on their combination. In this paper, we carry out a deep analysis on TE data sets, investigating the relations among two relevant aspects of semantic inferences: the logical dimension, i.e. the capacity of the inference to prove the conclusion from its premises, and the linguistic dimension, i.e. the linguistic devices used to accomplish the goal of the inference. We propose a decomposition approach over TE pairs, where single linguistic phenomena are isolated in what we have called atomic inference pairs, and we show that at this granularity level the actual correlation between the linguistic and the logical dimensions of semantic inferences emerges and can be empirically observed. 2014.lilt-9.4 cabria-magnini-2014-decomposing Frege in Space: A Program for Composition Distributional Semantics - MarcoBaroni - RaffaellaBernardi + MarcoBaroni + RaffaellaBernardi RobertoZamparelli The lexicon of any natural language encodes a huge number of distinct word meanings. Just to understand this article, you will need to know what thousands of words mean. The space of possible sentential meanings is infinite: In this article alone, you will encounter many sentences that express ideas you have never heard before, we hope. Statistical semantics has addressed the issue of the vastness of word meaning by proposing methods to harvest meaning automatically from large collections of text (corpora). Formal semantics in the Fregean tradition has developed methods to account for the infinity of sentential meaning based on the crucial insight of compositionality, the idea that meaning of sentences is built incrementally by combining the meanings of their constituents. This article sketches a new approach to semantics that brings together ideas from statistical and formal semantics to account, in parallel, for the richness of lexical meaning and the combinatorial power of sentential semantics. We adopt, in particular, the idea that word meaning can be approximated by the patterns of co-occurrence of words in corpora from statistical semantics, and the idea that compositionality can be captured in terms of a syntax-driven calculus of function application from formal semantics. 2014.lilt-9.5 @@ -58,7 +58,7 @@ Recent Progress on Monotonicity Thomas F.Icard III - Lawrence S.Moss + Lawrence S.Moss This paper serves two purposes. It is a summary of much work concerning formal treatments of monotonicity and polarity in natural language, and it also discusses connections to related work on exclusion relations, and connections to psycholinguistics and computational linguistics. The second part of the paper presents a summary of some new work on a formal Monotonicity Calculus. 2014.lilt-9.7 icard-iii-moss-2014-recent @@ -72,7 +72,7 @@ <fixed-case>NL</fixed-case>og-like Inference and Commonsense Reasoning - LenhartSchubert + LenhartSchubert Recent implementations of Natural Logic (NLog) have shown that NLog provides a quite direct means of going from sentences in ordinary language to many of the obvious entailments of those sentences. We show here that Episodic Logic (EL) and its Epilog implementation are well-adapted to capturing NLog-like inferences, but beyond that, also support inferences that require a combination of lexical knowledge and world knowledge. However, broad language understanding and commonsense reasoning are still thwarted by the “knowledge acquisition bottleneck”, and we summarize some of our ongoing and contemplated attacks on that persistent difficulty. 2014.lilt-9.9 schubert-2014-nlog @@ -119,10 +119,10 @@ <fixed-case>CALL</fixed-case>-<fixed-case>SLT</fixed-case>: A Spoken <fixed-case>CALL</fixed-case> System Based on Grammar and Speech Recognition - MannyRayner + MannyRayner NikosIsourakis ClaudiaBaur - PierretteBouillon + PierretteBouillon JohannnaGerlach We describe CALL-SLT, a speech-enabled Computer-Assisted Language Learning application where the central idea is to prompt the student with an abstract representation of what they are supposed to say, and then use a combination of grammar-based speech recognition and rule-based translation to rate their response. The system has been developed to the level of a mature prototype, freely deployed on the web, with versions for several languages. We present an overview of the core system architecture and the various types of content we have developed. Finally, we describe several evaluations, the last of which is a study carried out over about a week using 130 subjects recruited through the Amazon Mechanical Turk, in which CALL-SLT was contrasted against a control version where the speech recognition component was disabled. The improvement in student learning performance between the two groups was significant at p < 0.02. 2 @@ -183,7 +183,7 @@ Démonette, a <fixed-case>F</fixed-case>rench derivational morpho-semantic network NabilHathout - FiammettaNamer + FiammettaNamer Démonette is a derivational morphological network created from information provided by two existing lexical resources, DériF and Morphonette. It features a formal architecture in which words are associated with semantic types and where morphological relations, labelled with concrete and abstract bi-oriented definitions, connect derived words with their base and indirectly related words with each other. 5 2014.lilt-11.6 diff --git a/data/xml/2014.tal.xml b/data/xml/2014.tal.xml index bdabf866fd..713f4f315e 100644 --- a/data/xml/2014.tal.xml +++ b/data/xml/2014.tal.xml @@ -36,7 +36,7 @@ <fixed-case>MEANS</fixed-case> : une approche sémantique pour la recherche de réponses aux questions médicales [<fixed-case>MEANS</fixed-case>: a semantic approach to medical question answering] AsmaBen Abacha - PierreZweigenbaum + PierreZweigenbaum 71–104 2014.tal-1.3 fra @@ -74,7 +74,7 @@ Préface [Foreword] - LaurentBesacier + LaurentBesacier WolfgangMinker 7–11 2014.tal-2.1 @@ -84,7 +84,7 @@ Traduire la parole: le cas des <fixed-case>TED</fixed-case> Talks [Speech translation: the <fixed-case>TED</fixed-case> Talks case study] NataliaSegal - HélèneBonneau-Maynard + HélèneBonneau-Maynard FrançoisYvon 13–45 2014.tal-2.2 @@ -95,7 +95,7 @@ Ajout de nouveaux noms propres au vocabulaire d’un système de transcription en utilisant un corpus diachronique [Adding proper names to the vocabulary of a speech transcription system using a contemporary diachronic corpus] IrinaIllina DominiqueFohr - GeorgesLinarès + GeorgesLinarès 47–72 2014.tal-2.3 fra @@ -104,9 +104,9 @@ De l’arabe standard vers l’arabe dialectal : projection de corpus et ressources linguistiques en vue du traitement automatique de l’oral dans les médias tunisiens [From <fixed-case>M</fixed-case>odern <fixed-case>S</fixed-case>tandard <fixed-case>A</fixed-case>rabic to <fixed-case>T</fixed-case>unisian dialect: corpus projection and linguistic resources towards the automatic processing of speech in the <fixed-case>T</fixed-case>unisian media] RahmaBoujelbane - MariemEllouze - FrédéricBéchet - LamiaBelguith + MariemEllouze + FrédéricBéchet + LamiaBelguith 73–96 2014.tal-2.4 fra @@ -117,7 +117,7 @@ AdèleDésoyer FrédéricLandragin IsabelleTellier - AnaïsLefeuvre + AnaïsLefeuvre Jean-YvesAntoine 97–121 2014.tal-2.5 @@ -126,7 +126,7 @@ Détection des états affectifs lors d’interactions parlées : robustesse des indices non verbaux [Automatic in-voice affective state detection in spontaneous speech: robustness of non-verbal cues] - LaurenceDevillers + LaurenceDevillers MarieTahon Mohamed A.Sehili AgnèsDelaborde @@ -184,7 +184,7 @@ Learning word meanings from images of natural scenes ÁkosKádár AfraAlishahi - GrzegorzChrupała + GrzegorzChrupała 73–95 2014.tal-3.3 kadar-etal-2014-learning @@ -208,9 +208,9 @@ EmmanuelNavarro YannDesalle HintatCheung - Shu-KaiHsieh + Shu-KaiHsieh PierreMagistry - LaurentPrévot + LaurentPrévot 97–121 2014.tal-3.5 gaume-etal-2014-skillex diff --git a/data/xml/2014.tc.xml b/data/xml/2014.tc.xml index 7a09367aaf..0ff9b79424 100644 --- a/data/xml/2014.tc.xml +++ b/data/xml/2014.tc.xml @@ -17,7 +17,7 @@ Almost fifty years after the (first?) <fixed-case>ALPAC</fixed-case> report - GáborPrószéky + GáborPrószéky 2014.tc-1.2 proszeky-2014-almost @@ -31,7 +31,7 @@ Using cross-language information retrieval and statistical language modelling in example-based machine translation NasredineSemmar OthmanZennaki - MeriamaLaib + MeriamaLaib 2014.tc-1.4 semmar-etal-2014-using @@ -44,7 +44,7 @@ i<fixed-case>C</fixed-case>ompile<fixed-case>C</fixed-case>orpora: a web-based application to semi-automatically compile multilingual comparable corpora HernaniCosta - GloriaCorpas Pastor + GloriaCorpas Pastor MiriamSeghiri 2014.tc-1.6 costa-etal-2014-icompilecorpora @@ -61,7 +61,7 @@ Rule-based automatic post-processing of <fixed-case>SMT</fixed-case> output to reduce human post-editing effort VictoriaPorro JohannaGerlach - PierretteBouillon + PierretteBouillon VioletaSeretan 2014.tc-1.8 porro-etal-2014-rule @@ -75,8 +75,8 @@ Intelligent translation memory matching and retrieval metric exploiting linguistic technology RohitGupta - HannaBechara - ConstantinOrasan + HannaBechara + ConstantinOrasan 2014.tc-1.10 gupta-etal-2014-intelligent @@ -124,7 +124,7 @@ Machine translation quality estimation adapted to the translation workflow SabineHunsicker - AlexandruCeausu + AlexandruCeausu 2014.tc-1.18 hunsicker-ceausu-2014-machine @@ -162,8 +162,8 @@ A tool for building multilingual voice questionnaires AlejandroArmando - PierretteBouillon - MannyRayner + PierretteBouillon + MannyRayner NikosTsourakis 2014.tc-1.24 armando-etal-2014-tool @@ -196,7 +196,7 @@ <fixed-case>T</fixed-case>witter Crowd Translation – design and objectives EduardŠubert - OndřejBojar + OndřejBojar 2014.tc-1.29 subert-bojar-2014-twitter diff --git a/data/xml/2015.eamt.xml b/data/xml/2015.eamt.xml index 7c40c90fef..6a744730e8 100644 --- a/data/xml/2015.eamt.xml +++ b/data/xml/2015.eamt.xml @@ -29,31 +29,31 @@ Exploiting portability to build an <fixed-case>RBMT</fixed-case> prototype for a new source language NoraAranberri - GorkaLabaka - ArantzaDíaz de Ilarraza - KepaSarasola + GorkaLabaka + ArantzaDíaz de Ilarraza + KepaSarasola 2015.eamt-1.2 aranberri-etal-2015-exploiting Building hybrid machine translation systems by using an <fixed-case>EBMT</fixed-case> preprocessor to create partialtranslations MikelArtetxe - GorkaLabaka - KepaSarasola + GorkaLabaka + KepaSarasola 2015.eamt-1.3 artetxe-etal-2015-building Using on-line available sources of bilingual information for word-level machine translation quality estimation - MiquelEsplà-Gomis + MiquelEsplà-Gomis FelipeSánchez-Martínez - Mikel L.Forcada + Mikel L.Forcada 2015.eamt-1.4 espla-gomis-etal-2015-using A general framework for minimizing translation effort: towards a principled combination of translation technologies in computer-aided translation - Mikel L.Forcada + Mikel L.Forcada FelipeSánchez-Martínez 2015.eamt-1.5 forcada-sanchez-martinez-2015-general @@ -61,10 +61,10 @@ Can Translation Memories afford not to use paraphrasing ? RohitGupta - ConstantinOrasan + ConstantinOrasan MarcosZampieri MihaelaVela - Josefvan Genabith + Josefvan Genabith 2015.eamt-1.6 gupta-etal-2015-translation @@ -88,7 +88,7 @@ Document-Level Machine Translation with Word Vector Models Eva MartinezGarcia CristinaEspana-Bonet - LluisMarquez + LluisMarquez 2015.eamt-1.9 garcia-etal-2015-document @@ -115,21 +115,21 @@ Dynamic Terminology Integration Methods in Statistical Machine Translation - MarcisPinnis + MarcisPinnis 2015.eamt-1.13 pinnis-2015-dynamic Identifying main obstacles for statistical machine translation of morphologically rich <fixed-case>S</fixed-case>outh <fixed-case>S</fixed-case>lavic languages - MajaPopovic - MihaelArcan + MajaPopovic + MihaelArcan 2015.eamt-1.14 popovic-arcan-2015-identifying Poor man’s lemmatisation for automatic error classification - MajaPopovic - MihaelArcan + MajaPopovic + MihaelArcan EleftheriosAvramidis AljoschaBurchardt 2015.eamt-1.15 @@ -144,10 +144,10 @@ Searching for Context: a Study on Document-Level Labels for Translation Quality Estimation - CarolinaScarton + CarolinaScarton MarcosZampieri MihaelaVela - Josefvan Genabith + Josefvan Genabith LuciaSpecia 2015.eamt-1.17 scarton-etal-2015-searching @@ -156,24 +156,24 @@ Stripping Adjectives: Integration Techniques for Selective Stemming in <fixed-case>SMT</fixed-case> Systems IsabelSlawik JanNiehues - AlexWaibel + AlexWaibel 2015.eamt-1.18 slawik-etal-2015-stripping Evaluating machine translation for assimilation via a gap-filling task EkaterinaAgeeva - Francis M.Tyers - Mikel L.Forcada + Francis M.Tyers + Mikel L.Forcada Juan AntonioPérez-Ortiz 2015.eamt-1.19 ageeva-etal-2015-evaluating Unsupervised training of maximum-entropy models for lexical selection i in rule-based machine translation - Francis M.Tyers + Francis M.Tyers FelipeSánchez-Martinez - Mikel L.Forcada + Mikel L.Forcada 2015.eamt-1.20 tyers-etal-2015-unsupervised @@ -187,13 +187,13 @@ Re-assessing the <fixed-case>WMT</fixed-case>2013 Human Evaluation with Professional Translators Trainees MihaelaVela - Josefvan Genabith + Josefvan Genabith 2015.eamt-1.22 vela-van-genabith-2015-assessing Integrating a Large, Monolingual Corpus as Translation Memory into Statistical Machine translation - KatharinaWäschle + KatharinaWäschle StefanRiezler 2015.eamt-1.23 waschle-riezler-2015-integrating @@ -222,7 +222,7 @@ Pre-reordering for Statistical Machine Translation of Non-fictional Subtitles - MagdalenaPlamada + MagdalenaPlamada GionLinder PhillipStröbel MartinVolk @@ -246,14 +246,14 @@ <fixed-case>M</fixed-case>ixed<fixed-case>E</fixed-case>motions: Social Semantic Emotion Analysis for Innovative Multilingual Big Data Analytics Markets - MihaelArcan + MihaelArcan PaulBuitelaar 2015.eamt-1.30 arcan-buitelaar-2015-mixedemotions The <fixed-case>ACCEPT</fixed-case> Academic Portal: Bringing Together Pre-editing, <fixed-case>MT</fixed-case> and Post-editing into a Learning Environment - PierretteBouillon + PierretteBouillon JohannaGerlach AsheeshGulati VictoriaPorro @@ -284,7 +284,7 @@ <fixed-case>H</fixed-case>andy<fixed-case>CAT</fixed-case> - An Open-Source Platform for <fixed-case>CAT</fixed-case> Tool Research - ChrisHokamp + ChrisHokamp QunLiu 2015.eamt-1.35 hokamp-liu-2015-handycat @@ -296,15 +296,15 @@ MarkusEgg AndyWay LexiBirch - KatiaKermanidis + KatiaKermanidis VilelminiSosoni DimitriosTsoumakos - Antalvan den Bosch + Antalvan den Bosch IrisHendrickx MichaelPapadopoulos PanayotaGeorgakopoulou MariaGialama - Mennovan Zaanen + Mennovan Zaanen IoanaBuliga MitjaJermol DavorOrlic @@ -326,7 +326,7 @@ <fixed-case>FALCON</fixed-case>: Federated Active Linguistic data <fixed-case>C</fixed-case>urati<fixed-case>ON</fixed-case> - DavidLewis + DavidLewis 2015.eamt-1.39 lewis-2015-falcon @@ -340,7 +340,7 @@ <fixed-case>O</fixed-case>kapi+<fixed-case>Q</fixed-case>u<fixed-case>E</fixed-case>st: Translation Quality Estimation within Okapi - Gustavo HenriquePaetzold + Gustavo HenriquePaetzold LuciaSpecia YvesSavourel 2015.eamt-1.41 @@ -368,13 +368,13 @@ <fixed-case>A</fixed-case>bu-<fixed-case>M</fixed-case>a<fixed-case>T</fixed-case>ran: Automatic building of Machine Translation AntonioToral - Tommi APirinen + Tommi APirinen AndyWay - GemaRamírez-Sánchez - Sergio OrtizRojas - RaphaelRubino - MiquelEsplà - MikelForcada + GemaRamírez-Sánchez + Sergio OrtizRojas + RaphaelRubino + MiquelEsplà + MikelForcada VassilisPapavassiliou ProkopisProkopidis NikolaLjubešić @@ -386,7 +386,7 @@ MasaoUtiyama KyoKageura MartinThomas - AnthonyHartley + AnthonyHartley 2015.eamt-1.46 utiyama-etal-2015-mnh @@ -394,7 +394,7 @@ Smart Computer Aided Translation Environment VincentVandeghinste TomVanallemeersch - FrankVan Eynde + FrankVan Eynde GeertHeyman SienMoens JorisPelemans @@ -402,7 +402,7 @@ IuliannaVan der Lek - Ciudin ArdaTezcan LieveMacken - VéroniqueHoste + VéroniqueHoste EvaGeurts MiekeHaesen 2015.eamt-1.47 diff --git a/data/xml/2015.iwslt.xml b/data/xml/2015.iwslt.xml index b2ea2cd245..7a4667e706 100644 --- a/data/xml/2015.iwslt.xml +++ b/data/xml/2015.iwslt.xml @@ -13,7 +13,7 @@ Improving <fixed-case>SMT</fixed-case> by model filtering and phrase embedding - ChengqingZong + ChengqingZong 2015.iwslt-keynotes.1 zong-2015-improving @@ -33,7 +33,7 @@ The <fixed-case>IWSLT</fixed-case> 2015 Evaluation Campaign MauroCettolo JanNiehues - SebastianStüker + SebastianStüker LuisaBentivogli RoldanoCattoni MarcelloFederico @@ -48,7 +48,7 @@ StephanPeitz ParniaBahar AndreasGuta - HermannNey + HermannNey 15-22 2015.iwslt-evaluation.2 peter-etal-2015-rwth-aachen @@ -58,7 +58,7 @@ MichaeelKazi BrianThompson ElizabethSalesky - TimothyAnderson + TimothyAnderson GrantErdmann EricHansen BrianOre @@ -86,7 +86,7 @@ AdriàGiménez Pastor José AlbertoSanchis Navarro JorgeCivera Saiz - AlfonsJuan-Císcar + AlfonsJuan-Císcar 39-44 2015.iwslt-evaluation.5 del-agua-teba-etal-2015-mllp @@ -106,7 +106,7 @@ MercedesGarcia Martínez LoïcBarrault AnthonyRousseau - PaulDeléglise + PaulDeléglise YannickEstève 50-54 2015.iwslt-evaluation.7 @@ -126,7 +126,7 @@ JanNiehues EunahCho MohammedMediani - AlexWaibel + AlexWaibel 62-69 2015.iwslt-evaluation.9 ha-etal-2015-kit-translation @@ -138,22 +138,22 @@ MatthiasSperber KevinKilgour SebastianStuker - AlexWaibel + AlexWaibel 70-75 2015.iwslt-evaluation.10 mueller-etal-2015-2015 <fixed-case>S</fixed-case>tanford neural machine translation systems for spoken language domains - Minh-ThangLuong - ChristopherManning + Minh-ThangLuong + ChristopherManning 76-79 2015.iwslt-evaluation.11 luong-manning-2015-stanford The <fixed-case>E</fixed-case>nglish-<fixed-case>V</fixed-case>ietnamese machine translation system for <fixed-case>IWSLT</fixed-case> 2015 - Viet HongTran + Viet HongTran Huyen VuThong NguyenVan-Vinh Trung LeTien @@ -184,7 +184,7 @@ The <fixed-case>JAIST</fixed-case>-<fixed-case>UET</fixed-case>-<fixed-case>MITI</fixed-case> machine translation systems for <fixed-case>IWSLT</fixed-case> 2015 Hai-LongTrieu Thanh-QuyenDang - Phuong-ThaiNguyen + Phuong-ThaiNguyen Le-MinhNuyen 93-100 2015.iwslt-evaluation.15 @@ -240,7 +240,7 @@ Applying cross-entropy difference for selecting parallel training data from publicly available sources for conversational machine translation - WilliamLewis + WilliamLewis ChristianFedermann YingXin 126-134 @@ -251,7 +251,7 @@ Source discriminative word lexicon for translation disambiguation TeresaHerrmann JanNiehues - AlexWaibel + AlexWaibel 135-142 2015.iwslt-papers.3 herrmann-etal-2015-source @@ -276,7 +276,7 @@ Multifeature modular deep neural network acoustic models KevinKilgour - AlexWaibel + AlexWaibel 159-166 2015.iwslt-papers.6 kilgour-waibel-2015-multifeature @@ -284,7 +284,7 @@ Using language adaptive deep neural networks for improved multilingual speech recognition MarkusMueller - AlexWaibel + AlexWaibel 167-172 2015.iwslt-papers.7 mueller-waibel-2015-using @@ -294,7 +294,7 @@ EunahCho JanNiehues KevinKilgour - AlexWaibel + AlexWaibel 173-179 2015.iwslt-papers.8 cho-etal-2015-punctuation @@ -303,7 +303,7 @@ Class-based N-gram language difference models for data selection AmittaiAxelrod YogarshiVyas - MariannaMartindale + MariannaMartindale MarineCarpuat 180-187 2015.iwslt-papers.9 @@ -319,11 +319,11 @@ An open-source toolkit for word-level confidence estimation in machine translation - ChristopheServan + ChristopheServan Ngoc TienLe - Ngoc QuangLuong + Ngoc QuangLuong BenjaminLecouteux - LaurentBesacier + LaurentBesacier 196-203 2015.iwslt-papers.11 servan-etal-2015-open @@ -333,7 +333,7 @@ Quoc TruongDo SakrianiSakti GrahamNeubig - TomokiToda + TomokiToda SatoshiNakamura 204-208 2015.iwslt-papers.12 @@ -349,7 +349,7 @@ Learning segmentations that balance latency versus quality in spoken language translation - HassanShavarani + HassanShavarani MaryamSiahbani Ramtin MehdizadehSeraj AnoopSarkar @@ -384,7 +384,7 @@ Risk-aware distribution of <fixed-case>SMT</fixed-case> outputs for translation of documents targeting many anonymous readers YoEhara MasaoUtiyama - EiichiroSumita + EiichiroSumita 240-247 2015.iwslt-papers.17 ehara-etal-2015-risk @@ -393,7 +393,7 @@ Inducing bilingual lexicons from small quantities of sentence-aligned phonemic transcriptions OliverAdams GrahamNeubig - TrevorCohn + TrevorCohn StevenBird 248-255 2015.iwslt-papers.18 diff --git a/data/xml/2015.jeptalnrecital.xml b/data/xml/2015.jeptalnrecital.xml index c7b7447739..a2c6fe32f1 100644 --- a/data/xml/2015.jeptalnrecital.xml +++ b/data/xml/2015.jeptalnrecital.xml @@ -37,9 +37,9 @@ Identification de facteurs de risque pour des patients diabétiques à partir de comptes-rendus cliniques par des approches hybrides CyrilGrouin - VéroniqueMoriceau - SophieRosset - PierreZweigenbaum + VéroniqueMoriceau + SophieRosset + PierreZweigenbaum 25–36 Dans cet article, nous présentons les méthodes que nous avons développées pour analyser des comptes- rendus hospitaliers rédigés en anglais. L’objectif de cette étude consiste à identifier les facteurs de risque de décès pour des patients diabétiques et à positionner les événements médicaux décrits par rapport à la date de création de chaque document. Notre approche repose sur (i) HeidelTime pour identifier les expressions temporelles, (ii) des CRF complétés par des règles de post-traitement pour identifier les traitements, les maladies et facteurs de risque, et (iii) des règles pour positionner temporellement chaque événement médical. Sur un corpus de 514 documents, nous obtenons une F-mesure globale de 0,8451. Nous observons que l’identification des informations directement mentionnées dans les documents se révèle plus performante que l’inférence d’informations à partir de résultats de laboratoire. 2015.jeptalnrecital-long.3 @@ -60,7 +60,7 @@ Analyse d’expressions temporelles dans les dossiers électroniques patients Mike Donald TapiNzali - AurélieNévéol + AurélieNévéol XavierTannier 49–58 Les références à des phénomènes du monde réel et à leur caractérisation temporelle se retrouvent dans beaucoup de types de discours en langue naturelle. Ainsi, l’analyse temporelle apparaît comme un élément important en traitement automatique de la langue. Cet article présente une analyse de textes en domaine de spécialité du point de vue temporel. En s’appuyant sur un corpus de documents issus de plusieurs dossiers électroniques patient désidentifiés, nous décrivons la construction d’une ressource annotée en expressions temporelles selon la norme TimeML. Par suite, nous utilisons cette ressource pour évaluer plusieurs méthodes d’extraction automatique d’expressions temporelles adaptées au domaine médical. Notre meilleur système statistique offre une performance de 0,91 de F-mesure, surpassant pour l’identification le système état de l’art HeidelTime. La comparaison de notre corpus de travail avec le corpus journalistique FR-Timebank permet également de caractériser les différences d’utilisation des expressions temporelles dans deux domaines de spécialité. @@ -71,7 +71,7 @@ Compréhension automatique de la parole sans données de référence EmmanuelFerreira BassamJabaian - FabriceLefèvre + FabriceLefèvre 59–70 La majorité des méthodes état de l’art en compréhension automatique de la parole ont en commun de devoir être apprises sur une grande quantité de données annotées. Cette dépendance aux données constitue un réel obstacle lors du développement d’un système pour une nouvelle tâche/langue. Aussi, dans cette étude, nous présentons une méthode visant à limiter ce besoin par un mécanisme d’apprentissage sans données de référence (zero-shot learning). Cette méthode combine une description ontologique minimale de la tâche visée avec l’utilisation d’un espace sémantique continu appris par des approches à base de réseaux de neurones à partir de données génériques non-annotées. Nous montrons que le modèle simple et peu coûteux obtenu peut atteindre, dès le démarrage, des performances comparables à celles des systèmes état de l’art reposant sur des règles expertes ou sur des approches probabilistes sur des tâches de compréhension de la parole de référence (tests des Dialog State Tracking Challenges, DSTC2 et DSTC3). Nous proposons ensuite une stratégie d’adaptation en ligne permettant d’améliorer encore les performances de notre approche à l’aide d’une supervision faible et ajustable par l’utilisateur. 2015.jeptalnrecital-long.6 @@ -92,7 +92,7 @@ Création rapide et efficace d’un système de désambiguïsation lexicale pour une langue peu dotée MohammadNasiruddin AndonTchechmedjiev - HervéBlanchon + HervéBlanchon DidierSchwab 83–94 Nous présentons une méthode pour créer rapidement un système de désambiguïsation lexicale (DL) pour une langue L peu dotée pourvu que l’on dispose d’un système de traduction automatique statistique (TAS) d’une langue riche en corpus annotés en sens (ici l’anglais) vers L. Il est, en effet, plus facile de disposer des ressources nécessaires à la création d’un système de TAS que des ressources dédiées nécessaires à la création d’un système de DL pour la langue L. Notre méthode consiste à traduire automatiquement un corpus annoté en sens vers la langue L, puis de créer le système de désambiguïsation pour L par des méthodes supervisées classiques. Nous montrons la faisabilité de la méthode et sa généricité en traduisant le SemCor, un corpus en anglais annoté grâce au Princeton WordNet, de l’anglais vers le bangla et de l’anglais vers le français. Nous montrons la validité de l’approche en évaluant les résultats sur la tâche de désambiguïsation lexicale multilingue de Semeval 2013. @@ -166,7 +166,7 @@ Extraction automatique de paraphrases grand public pour les termes médicaux NataliaGrabar - ThierryHamon + ThierryHamon 182–195 Nous sommes tous concernés par notre état de santé et restons sensibles aux informations de santé disponibles dans la société moderne à travers par exemple les résultats des recherches scientifiques, les médias sociaux de santé, les documents cliniques, les émissions de télé et de radio ou les nouvelles. Cependant, il est commun de rencontrer dans le domaine médical des termes très spécifiques (e.g., blépharospasme, alexitymie, appendicectomie), qui restent difficiles à comprendre par les non spécialistes. Nous proposons une méthode automatique qui vise l’acquisition de paraphrases pour les termes médicaux, qui soient plus faciles à comprendre que les termes originaux. La méthode est basée sur l’analyse morphologique des termes, l’analyse syntaxique et la fouille de textes non spécialisés. L’analyse et l’évaluation des résultats indiquent que de telles paraphrases peuvent être trouvées dans les documents non spécialisés et présentent une compréhension plus facile. En fonction des paramètres de la méthode, la précision varie entre 86 et 55 @@ -179,7 +179,7 @@ GaëlGuibon IsabelleTellier SophiePrévost - MatthieuConstant + MatthieuConstant KimGerdes 196–207 L’article présente des résultats d’expériences d’apprentissage automatique pour l’étiquetage morpho-syntaxique et l’analyse syntaxique en dépendance de l’ancien français. Ces expériences ont pour objectif de servir une exploration de corpus pour laquelle le corpus arboré SRCMF sert de données de référence. La nature peu standardisée de la langue qui y est utilisée implique des données d’entraînement hétérogènes et quantitativement limitées. Nous explorons donc diverses stratégies, fondées sur différents critères (variabilité du lexique, forme Vers/Prose des textes, dates des textes), pour constituer des corpus d’entrainement menant aux meilleurs résultats possibles. @@ -199,9 +199,9 @@ Mesurer la similarité entre phrases grâce à Wikipédia en utilisant une indexation aléatoire Hai HieuVu - JeanneVillaneau + JeanneVillaneau FaridaSaïd - Pierre-FrançoisMarteau + Pierre-FrançoisMarteau 220–231 Cet article présente une méthode pour mesurer la similarité sémantique entre phrases qui utilise Wikipédia comme unique ressource linguistique et qui est, de ce fait, utilisable pour un grand nombre de langues. Basée sur une représentation vectorielle, elle utilise une indexation aléatoire pour réduire la dimension des espaces manipulés. En outre, elle inclut une technique de calcul des vecteurs de termes qui corrige les défauts engendrés par l’utilisation d’un corpus aussi général que Wikipédia. Le système a été évalué sur les données de SemEval 2014 en anglais avec des résultats très encourageants, au-dessus du niveau moyen des systèmes en compétition. Il a également été testé sur un ensemble de paires de phrases en français, à partir de ressources que nous avons construites et qui seront mises à la libre disposition de la communauté scientifique. 2015.jeptalnrecital-long.19 @@ -219,7 +219,7 @@ Utilisation de mesures de confiance pour améliorer le décodage en traduction de parole - LaurentBesacier + LaurentBesacier BenjaminLecouteux Luong NgocQuang 244–254 @@ -229,7 +229,7 @@ Multialignement vs bialignement : à plusieurs, c’est mieux ! - OlivierKraif + OlivierKraif 255–266 Dans cet article, nous proposons une méthode originale destinée à effectuer l’alignement d’un corpus multiparallèle, i.e. comportant plus de deux langues, en prenant en compte toutes les langues simultanément (et non en composant une série de bialignements indépendants). Pour ce faire, nous nous appuyons sur les réseaux de correspondances lexicales constitués par les transfuges (chaînes identiques) et cognats (mots apparentés), et nous montrons comment divers tuilages des couples de langues permettent d’exploiter au mieux les ressemblances superficielles liées aux relations génétiques interlinguistiques. Nous évaluons notre méthode par rapport à une méthode de bialignement classique, et montrons en quoi le multialignement permet d’obtenir des résultats à la fois plus précis et plus robustes. 2015.jeptalnrecital-long.22 @@ -237,7 +237,7 @@ Apprentissage discriminant des modèles continus de traduction - Quoc-KhanhDo + Quoc-KhanhDo AlexandreAllauzen FrançoisYvon 267–278 @@ -248,7 +248,7 @@ Utiliser les interjections pour détecter les émotions AmelFraisse - PatrickParoubek + PatrickParoubek 279–290 Bien que les interjections soient un phénomène linguistique connu, elles ont été peu étudiées et cela continue d’être le cas pour les travaux sur les microblogs. Des travaux en analyse de sentiments ont montré l’intérêt des émoticônes et récemment des mots-dièses, qui s’avèrent être très utiles pour la classification en polarité. Mais malgré leur statut grammatical et leur richesse sémantique, les interjections sont restées marginalisées par les systèmes d’analyse de sentiments. Nous montrons dans cet article l’apport majeur des interjections pour la détection des émotions. Nous détaillons la production automatique, basée sur les interjections, d’un corpus étiqueté avec les émotions. Nous expliquons ensuite comment nous avons utilisé ce corpus pour en déduire, automatiquement, un lexique affectif pour le français. Ce lexique a été évalué sur une tâche de détection des émotions, qui a montré un gain en mesure F1 allant, selon les émotions, de +0,04 à +0,21. 2015.jeptalnrecital-long.24 @@ -257,7 +257,7 @@ Comparaison d’architectures neuronales pour l’analyse syntaxique en constituants MaximinCoavoux - BenoîtCrabbé + BenoîtCrabbé 291–302 L’article traite de l’analyse syntaxique lexicalisée pour les grammaires de constituants. On se place dans le cadre de l’analyse par transitions. Les modèles statistiques généralement utilisés pour cette tâche s’appuient sur une représentation non structurée du lexique. Les mots du vocabulaire sont représentés par des symboles discrets sans liens entre eux. À la place, nous proposons d’utiliser des représentations denses du type plongements (embeddings) qui permettent de modéliser la similarité entre symboles, c’est-à-dire entre mots, entre parties du discours et entre catégories syntagmatiques. Nous proposons d’adapter le modèle statistique sous-jacent à ces nouvelles représentations. L’article propose une étude de 3 architectures neuronales de complexité croissante et montre que l’utilisation d’une couche cachée non-linéaire permet de tirer parti des informations données par les plongements. 2015.jeptalnrecital-long.25 @@ -266,7 +266,7 @@ ...des conférences enfin disons des causeries... Détection automatique de segments en relation de paraphrase dans les reformulations de corpus oraux NataliaGrabar - IrisEshkol + IrisEshkol 303–316 Notre travail porte sur la détection automatique des segments en relation de reformulation paraphrastique dans les corpus oraux. L’approche proposée est une approche syntagmatique qui tient compte des marqueurs de reformulation paraphrastique et des spécificités de l’oral. Les données de référence sont consensuelles. Une méthode automatique fondée sur l’apprentissage avec les CRF est proposée afin de détecter les segments paraphrasés. Différents descripteurs sont exploités dans une fenêtre de taille variable. Les tests effectués montrent que les segments en relation de paraphrase sont assez difficiles à détecter, surtout avec leurs frontières correctes. Les meilleures moyennes atteignent 0,65 de F-mesure, 0,75 de précision et 0,63 de rappel. Nous avons plusieurs perspectives à ce travail pour améliorer la détection des segments en relation de paraphrase et pour étudier les données depuis d’autres points de vue. 2015.jeptalnrecital-long.26 @@ -312,7 +312,7 @@ Vous aimez ?...ou pas ? <fixed-case>L</fixed-case>ike<fixed-case>I</fixed-case>t, un jeu pour construire une ressource lexicale de polarité MathieuLafourcade - Nathalie LeBrun + Nathalie LeBrun AlainJoubert 14–20 En analyse de discours ou d’opinion, savoir caractériser la connotation générale d’un texte, les sentiments qu’il véhicule, est une aptitude recherchée, qui suppose la constitution préalable d’une ressource lexicale de polarité. Au sein du réseau lexical JeuxDeMots, nous avons mis au point LikeIt, un jeu qui permet d’affecter une valeur positive, négative, ou neutre à un terme, et de constituer ainsi pour chaque terme, à partir des votes, une polarité résultante. Nous présentons ici l’analyse quantitative des données de polarité obtenues, ainsi que la méthode pour les valider qualitativement. @@ -323,7 +323,7 @@ Étude des verbes introducteurs de noms de médicaments dans les forums de santé FrançoisMorlane-Hondère CyrilGrouin - PierreZweigenbaum + PierreZweigenbaum 21–27 Dans cet article, nous combinons annotations manuelle et automatique pour identifier les verbes utilisés pour introduire un médicament dans les messages sur les forums de santé. Cette information est notamment utile pour identifier la relation entre un médicament et un effet secondaire. La mention d’un médicament dans un message ne garantit pas que l’utilisateur a pris ce traitement mais qu’il effectue un retour. Nous montrons ensuite que ces verbes peuvent servir pour extraire automatiquement des variantes de noms de médicaments. Nous estimons que l’analyse de ces variantes pourrait permettre de modéliser les erreurs faites par les usagers des forums lorsqu’ils écrivent les noms de médicaments, et améliorer en conséquence les systèmes de recherche d’information. 2015.jeptalnrecital-court.4 @@ -333,7 +333,7 @@ Initialisation de Réseaux de Neurones à l’aide d’un Espace Thématique MohamedMorchid RichardDufour - GeorgesLinarès + GeorgesLinarès 28–33 Ce papier présente une méthode de traitement de documents parlés intégrant une représentation fondée sur un espace thématique dans un réseau de neurones artificiels (ANN) employé comme classifieur de document. La méthode proposée consiste à configurer la topologie d’un ANN ainsi que d’initialiser les connexions de celui-ci à l’aide des espaces thématiques appris précédemment. Il est attendu que l’initialisation fondée sur les probabilités thématiques permette d’optimiser le processus d’optimisation des poids du réseau ainsi qu’à accélérer la phase d’apprentissage tout en amélioration la précision de la classification d’un document de test. Cette méthode est évaluée lors d’une tâche de catégorisation de dialogues parlés entre des utilisateurs et des agents du service d’appels de la Régie Autonome Des Transports Parisiens (RATP). Les résultats montrent l’intérêt de la méthode proposée d’initialisation d’un réseau, avec un gain observé de plus de 4 points en termes de bonne classification comparativement à l’initialisation aléatoire. De plus, les expérimentations soulignent que les performances sont faiblement dépendantes de la topologie du ANN lorsque les poids de la couche cachée sont initialisés au moyen des espaces de thèmes issus d’une allocation latente de Dirichlet ou latent Dirichlet Allocation (LDA) en comparaison à une initialisation empirique. 2015.jeptalnrecital-court.5 @@ -387,7 +387,7 @@ Adaptation par enrichissement terminologique en traduction automatique statistique fondée sur la génération et le filtrage de bi-segments virtuels - ChristopheServan + ChristopheServan MarcDymetman 68–74 Nous présentons des travaux préliminaires sur une approche permettant d’ajouter des termes bilingues à un système de Traduction Automatique Statistique (TAS) à base de segments. Les termes sont non seulement inclus individuellement, mais aussi avec des contextes les englobant. Tout d’abord nous générons ces contextes en généralisant des motifs (ou patrons) observés pour des mots de même nature syntaxique dans un corpus bilingue. Enfin, nous filtrons les contextes qui n’atteignent pas un certain seuil de confiance, à l’aide d’une méthode de sélection de bi-segments inspirée d’une approche de sélection de données, précédemment appliquée à des textes bilingues alignés. @@ -442,7 +442,7 @@ <fixed-case>CANÉPHORE</fixed-case> : un corpus français pour la fouille d’opinion ciblée JosephLark EmmanuelMorin - Sebastián PeñaSaldarriaga + Sebastián PeñaSaldarriaga 102–108 La fouille d’opinion ciblée (aspect-based sentiment analysis) fait l’objet ces dernières années d’un intérêt particulier, visible dans les sujets des récentes campagnes d’évaluation comme SemEval 2014 et 2015 ou bien DEFT 2015. Cependant les corpus annotés et publiquement disponibles permettant l’évaluation de cette tâche sont rares. Dans ce travail nous présentons en premier lieu un corpus français librement accessible de 10 000 tweets manuellement annotés. Nous accompagnons ce corpus de résultats de référence pour l’extraction de marqueurs d’opinion non supervisée. Nous présentons ensuite une méthode améliorant les résultats de cette extraction, en suivant une approche semi-supervisée. 2015.jeptalnrecital-court.16 @@ -452,7 +452,7 @@ Extraction de Contextes Riches en Connaissances en corpus spécialisés FirasHmida EmmanuelMorin - BéatriceDaille + BéatriceDaille 109–115 Les banques terminologiques et les dictionnaires sont des ressources précieuses qui facilitent l’accès aux connaissances des domaines spécialisés. Ces ressources sont souvent assez pauvres et ne proposent pas toujours pour un terme à illustrer des exemples permettant d’appréhender le sens et l’usage de ce terme. Dans ce contexte, nous proposons de mettre en œuvre la notion de Contextes Riches en Connaissances (CRC) pour extraire directement de corpus spécialisés des exemples de contextes illustrant son usage. Nous définissons un cadre unifié pour exploiter tout à la fois des patrons de connaissances et des collocations avec une qualité acceptable pour une révision humaine. 2015.jeptalnrecital-court.17 @@ -480,7 +480,7 @@ Vers un diagnostic d’ambiguïté des termes candidats d’un texte GaëlLejeune - BéatriceDaille + BéatriceDaille 130–136 Les recherches autour de la désambiguïsation sémantique traitent de la question du sens à accorder à différentes occurrences d’un mot ou plus largement d’une unité lexicale. Dans cet article, nous nous intéressons à l’ambiguïté d’un terme en domaine de spécialité. Nous posons les premiers jalons de nos recherches sur une question connexe que nous nommons le diagnostic d’ambiguïté. Cette tâche consiste à décider si une occurrence d’un terme est ou n’est pas ambiguë. Nous mettons en œuvre une approche d’apprentissage supervisée qui exploite un corpus d’articles de sciences humaines rédigés en français dans lequel les termes ambigus ont été détectés par des experts. Le diagnostic s’appuie sur deux types de traits : syntaxiques et positionnels. Nous montrons l’intérêt de la structuration du texte pour établir le diagnostic d’ambiguïté. 2015.jeptalnrecital-court.20 @@ -499,9 +499,9 @@ Détection automatique de l’ironie dans les tweets en français JihenKaroui Farah BenamaraZitoune - VéroniqueMoriceau + VéroniqueMoriceau NathalieAussenac-Gilles - Lamia HadrichBelguith + Lamia HadrichBelguith 144–149 Cet article présente une méthode par apprentissage supervisé pour la détection de l’ironie dans les tweets en français. Un classifieur binaire utilise des traits de l’état de l’art dont les performances sont reconnues, ainsi que de nouveaux traits issus de notre étude de corpus. En particulier, nous nous sommes intéressés à la négation et aux oppositions explicites/implicites entre des expressions d’opinion ayant des polarités différentes. Les résultats obtenus sont encourageants. 2015.jeptalnrecital-court.22 @@ -540,7 +540,7 @@ Entre écrit et oral ? Analyse comparée de conversations de type tchat et de conversations téléphoniques dans un centre de contact client - GéraldineDamnati + GéraldineDamnati AleksandraGuerraz DelphineCharlet 171–177 @@ -560,7 +560,7 @@ Utilisation d’annotations sémantiques pour la validation automatique d’hypothèses dans des conversations téléphoniques CaroleLailler YannickEstève - RenatoDe Mori + RenatoDe Mori MohamedBouallègue MohamedMorchid 185–191 @@ -572,7 +572,7 @@ Etiquetage morpho-syntaxique en domaine de spécialité: le domaine médical ChristelleRabary ThomasLavergne - AurélieNévéol + AurélieNévéol 192–198 L’étiquetage morpho-syntaxique est une tâche fondamentale du Traitement Automatique de la Langue, sur laquelle reposent souvent des traitements plus complexes tels que l’extraction d’information ou la traduction automatique. L’étiquetage en domaine de spécialité est limité par la disponibilité d’outils et de corpus annotés spécifiques au domaine. Dans cet article, nous présentons le développement d’un corpus clinique du français annoté morpho-syntaxiquement à l’aide d’un jeu d’étiquettes issus des guides d’annotation French Treebank et Multitag. L’analyse de ce corpus nous permet de caractériser le domaine clinique et de dégager les points clés pour l’adaptation d’outils d’analyse morpho-syntaxique à ce domaine. Nous montrons également les limites d’un outil entraîné sur un corpus journalistique appliqué au domaine clinique. En perspective de ce travail, nous envisageons une application du corpus clinique annoté pour améliorer l’étiquetage morpho-syntaxique des documents cliniques en français. 2015.jeptalnrecital-court.29 @@ -581,7 +581,7 @@ Vers une typologie de liens entre contenus journalistiques RemiBois - GuillaumeGravier + GuillaumeGravier EmmanuelMorin PascaleSébillot 199–205 @@ -602,7 +602,7 @@ Utilisation des réseaux de neurones récurrents pour la projection interlingue d’étiquettes morpho-syntaxiques à partir d’un corpus parallèle OthmanZennaki NasredineSemmar - LaurentBesacier + LaurentBesacier 213–220 La construction d’outils d’analyse linguistique pour les langues faiblement dotées est limitée, entre autres, par le manque de corpus annotés. Dans cet article, nous proposons une méthode pour construire automatiquement des outils d’analyse via une projection interlingue d’annotations linguistiques en utilisant des corpus parallèles. Notre approche n’utilise pas d’autres sources d’information, ce qui la rend applicable à un large éventail de langues peu dotées. Nous proposons d’utiliser les réseaux de neurones récurrents pour projeter les annotations d’une langue à une autre (sans utiliser d’information d’alignement des mots). Dans un premier temps, nous explorons la tâche d’annotation morpho-syntaxique. Notre méthode combinée avec une méthode de projection d’annotation basique (utilisant l’alignement mot à mot), donne des résultats comparables à ceux de l’état de l’art sur une tâche similaire. 2015.jeptalnrecital-court.32 @@ -610,8 +610,8 @@ Segmentation et Titrage Automatique de Journaux Télévisés - AbdessalamBouchekif - GéraldineDamnati + AbdessalamBouchekif + GéraldineDamnati NathalieCamelin YannickEstève DelphineCharlet @@ -623,7 +623,7 @@ Un système hybride pour l’analyse de sentiments associés aux aspects CarolineBrun - Diana NicoletaPopa + Diana NicoletaPopa ClaudeRoux 228–234 Cet article présente en détails notre participation à la tâche 4 de SemEval2014 (Analyse de Sentiments associés aux Aspects). Nous présentons la tâche et décrivons précisément notre système qui consiste en une combinaison de composants linguistiques et de modules de classification. Nous exposons ensuite les résultats de son évaluation, ainsi que les résultats des meilleurs systèmes. Nous concluons par la présentation de quelques nouvelles expériences réalisées en vue de l’amélioration de ce système. @@ -682,8 +682,8 @@ Médicaments qui soignent, médicaments qui rendent malades : étude des relations causales pour identifier les effets secondaires FrançoisMorlane-Hondère CyrilGrouin - VéroniqueMoriceau - PierreZweigenbaum + VéroniqueMoriceau + PierreZweigenbaum 270–276 Dans cet article, nous nous intéressons à la manière dont sont exprimés les liens qui existent entre un traitement médical et un effet secondaire. Parce que les patients se tournent en priorité vers internet, nous fondons cette étude sur un corpus annoté de messages issus de forums de santé en français. L’objectif de ce travail consiste à mettre en évidence des éléments linguistiques (connecteurs logiques et expressions temporelles) qui pourraient être utiles pour des systèmes automatiques de repérage des effets secondaires. Nous observons que les modalités d’écriture sur les forums ne permettent pas de se fonder sur les expressions temporelles. En revanche, les connecteurs logiques semblent utiles pour identifier les effets secondaires. 2015.jeptalnrecital-court.40 @@ -702,7 +702,7 @@ KillianJanod MohamedMorchid RichardDufour - GeorgesLinares + GeorgesLinares 284–290 Les représentations vectorielles continues des mots sont en plein essor et ont déjà été appliquées avec succès à de nombreuses tâches en traitement automatique de la langue (TAL). Dans cet article, nous proposons d’intégrer l’information temporelle issue du contexte des mots au sein des architectures fondées sur les sacs-de-mots continus (continuous bag-of-words ou CBOW) ou sur les Skip-Grams. Ces approches sont manipulées au travers d’un réseau de neurones, l’architecture CBOW cherchant alors à prédire un mot sachant son contexte, alors que l’architecture Skip-Gram prédit un contexte sachant un mot. Cependant, ces modèles, au travers du réseau de neurones, s’appuient sur des représentations en sac-de-mots et ne tiennent pas compte, explicitement, de l’ordre des mots. En conséquence, chaque mot a potentiellement la même influence dans le réseau de neurones. Nous proposons alors une méthode originale qui intègre l’information temporelle des contextes des mots en utilisant leur position relative. Cette méthode s’inspire des modèles contextuels continus. L’information temporelle est traitée comme coefficient de pondération, en entrée du réseau de neurones par le CBOW et dans la couche de sortie par le Skip-Gram. Les premières expériences ont été réalisées en utilisant un corpus de test mesurant la qualité de la relation sémantique-syntactique des mots. Les résultats préliminaires obtenus montrent l’apport du contexte des mots, avec des gains de 7 et 7,7 points respectivement avec l’architecture Skip-Gram et l’architecture CBOW. 2015.jeptalnrecital-court.42 @@ -721,7 +721,7 @@ Caractériser les discours académiques et de vulgarisation : quelles propriétés ? - AmaliaTodirascu + AmaliaTodirascu Beatriz SanchezCardenas 298–304 L’article présente une étude des propriétés linguistiques (lexicales, morpho-syntaxiques, syntaxiques) permettant la classification automatique de documents selon leur genre (articles scientifiques et articles de vulgarisation), dans deux domaines différentes (médecine et informatique). Notre analyse, effectuée sur des corpus comparables en genre et en thèmes disponibles en français, permet de valider certaines propriétés identifiées dans la littérature comme caractéristiques des discours académiques ou de vulgarisation scientifique. Les premières expériences de classification évaluent l’influence de ces propriétés pour l’identification automatique du genre pour le cas spécifique des textes scientifiques ou de vulgarisation. @@ -842,10 +842,10 @@ Un patient virtuel dialogant LeonardoCampillos DhouhaBouamor - ÉricBilinski + ÉricBilinski Anne-LaureLigozat - PierreZweigenbaum - SophieRosset + PierreZweigenbaum + SophieRosset 16–17 Le démonstrateur que nous décrivons ici est un prototype de système de dialogue dont l’objectif est de simuler un patient. Nous décrivons son fonctionnement général en insistant sur les aspects concernant la langue et surtout le rapport entre langue médicale de spécialité et langue générale. 2015.jeptalnrecital-demonstration.8 @@ -874,8 +874,8 @@ CécileRobin AndréBittar XabierLarrucea - FrédériqueSegond - Marie-HélèneMetzger + FrédériqueSegond + Marie-HélèneMetzger 23–24 Le projet européen TIER (Integrated strategy for CBRN – Chemical, Biological, Radiological and Nuclear – Threat Identification and Emergency Response) vise à intégrer une stratégie complète et intégrée pour la réponse d’urgence dans un contexte de dangers biologiques, chimiques, radiologiques, nucléaires, ou liés aux explosifs, basée sur l’identification des menaces et d’évaluation des risques. Dans cet article, nous nous focalisons sur les risques biologiques. Nous présentons notre système expert fondé sur une analyse sémantique, permettant l’extraction de données structurées à partir de données non structurées dans le but de raisonner. 2015.jeptalnrecital-demonstration.11 @@ -917,7 +917,7 @@ Pourquoi construire des ressources terminologiques et pourquoi le faire différemment ? - Marie-ClaudeL’Homme + Marie-ClaudeL’Homme 2–2 Dans cette présentation, je défendrai l’idée selon laquelle des ressources terminologiques décrivant les propriétés lexico-sémantiques des termes constituent un complément nécessaire, voire indispensable, à d’autres types de ressources, À partir d’exemples anglais et français empruntés au domaine de l’environnement, je montrerai, d’une part, que les ressources lexicales générales (y compris celles qui ont une large couverture) n’offrent pas un portait complet du sens des termes ou de la structure lexicale observée du point de vue d’un domaine de spécialité. Je montrerai, d’autre part, que les ressources terminologiques (thésaurus, ontologies, banques de terminologie) souvent d’obédience conceptuelle, se concentrent sur le lien entre les termes et les connaissances dénotées par eux et s’attardent peu sur leur fonctionnement linguistique. Je présenterai un type de ressource décrivant les propriétés lexico-sémantiques des termes d’un domaine (structure actantielle, liens lexicaux, annotations contextuelles, etc.) et des éléments méthodologiques présidant à son élaboration. 2015.jeptalnrecital-invite.2 diff --git a/data/xml/2015.lilt.xml b/data/xml/2015.lilt.xml index a9e633bf71..7ca8e36d60 100644 --- a/data/xml/2015.lilt.xml +++ b/data/xml/2015.lilt.xml @@ -39,10 +39,10 @@ Literature Lifts Up Computational Linguistics - David K.Elson + David K.Elson AnnaFeldman AnnaKazantseva - StanSzpakowicz + StanSzpakowicz 1 2015.lilt-12.1 elson-etal-2015-literature @@ -60,7 +60,7 @@ A computational analysis of poetic style: Imagism and its influence on modern professional and amateur poetry Justine T.Kao - DanJurafsky + DanJurafsky How do standards of poetic beauty change as a function of time and expertise? Here we use computational methods to compare the stylistic features of 359 English poems written by 19th century professional poets, Imagist poets, contemporary professional poets, and contemporary amateur poets. Building upon techniques designed to analyze style and sentiment in texts, we examine elements of poetic craft such as imagery, sound devices, emotive language, and diction. We find that contemporary professional poets use significantly more concrete words than 19th century poets, fewer emotional words, and more complex sound devices. These changes are consistent with the tenets of Imagism, an early 20thcentury literary movement. Further analyses show that contemporary amateur poems resemble 19th century professional poems more than contemporary professional poems on several dimensions. The stylistic similarities between contemporary amateur poems and 19th century professional poems suggest that elite standards of poetic beauty in the past “trickled down” to influence amateur works in the present. Our results highlight the influence of Imagism on the modern aesthetic and reveal the dynamics between “high” and “low” art. We suggest that computational linguistics may shed light on the forces and trends that shape poetic style. 3 2015.lilt-12.3 diff --git a/data/xml/2015.mtsummit.xml b/data/xml/2015.mtsummit.xml index 62fba7330a..485a633f52 100644 --- a/data/xml/2015.mtsummit.xml +++ b/data/xml/2015.mtsummit.xml @@ -13,8 +13,8 @@ MasaruFuji AtsushiFujita MasaoUtiyama - EiichiroSumita - YujiMatsumoto + EiichiroSumita + YujiMatsumoto 2015.mtsummit-papers.1 fuji-etal-2015-patent @@ -29,9 +29,9 @@ Learning bilingual distributed phrase represenations for statistical machine translation ChaochaoWang - DeyiXiong + DeyiXiong MinZhang - ChunyuKit + ChunyuKit 2015.mtsummit-papers.3 wang-etal-2015-learning-bilingual @@ -39,7 +39,7 @@ Learning bilingual phrase representations with recurrent neural networks HideyaMino AndrewFinch - EiichiroSumita + EiichiroSumita 2015.mtsummit-papers.4 mino-etal-2015-learning @@ -60,17 +60,17 @@ <fixed-case>METEOR</fixed-case> for multiple target languages using <fixed-case>DB</fixed-case>nary ZiedElloumi - HervéBlanchon - GillesSerasset - LaurentBesacier + HervéBlanchon + GillesSerasset + LaurentBesacier 2015.mtsummit-papers.7 elloumi-etal-2015-meteor <fixed-case>J</fixed-case>apanese controlled language rules to improve machine translatability of municipal documents ReiMiyata - AnthonyHartley - CécileParis + AnthonyHartley + CécileParis MidoriTatsumi KyoKageura 2015.mtsummit-papers.8 @@ -87,22 +87,22 @@ Using joint models or domain adaptation in statistical machine translation NadirDurrani HassanSajjad - ShafiqJoty + ShafiqJoty AhmedAbdelali - StephanVogel + StephanVogel 2015.mtsummit-papers.10 durrani-etal-2015-using Machine translation evaluation made fuzzier: a study on post-editing productivity and evaluation metrics in commercial settings - CarlaParra Escartín + CarlaParra Escartín ManuelArcedillo 2015.mtsummit-papers.11 parra-escartin-arcedillo-2015-machine A distributed inflection model for translating into morphologically rich languages - KeTran + KeTran AriannaBisazza ChristofMonz 2015.mtsummit-papers.12 @@ -110,7 +110,7 @@ Bandit structured prediction for learning from partial feedback in statistical machine translation - ArtemSokolov + ArtemSokolov StefanRiezler TanguyUrvoy 2015.mtsummit-papers.13 @@ -119,10 +119,10 @@ An empirical study of segment prioritization for incrementally retrained post-editing-based <fixed-case>SMT</fixed-case> JinhuaDu - AnkitSrivastava + AnkitSrivastava AndyWay - AlfredoMaldonado-Guerra - DavidLewis + AlfredoMaldonado-Guerra + DavidLewis 2015.mtsummit-papers.14 du-etal-2015-empirical @@ -179,7 +179,7 @@ Topic adaptation for machine translation of e-commerce content PrashantMathur MarcelloFederico - SelçukKöprü + SelçukKöprü SharamKhadivi HassanSawaf 2015.mtsummit-papers.21 @@ -188,7 +188,7 @@ Machine translation with source-predicted target morphology JoachimDaiber - KhalilSima’an + KhalilSima’an 2015.mtsummit-papers.22 daiber-simaan-2015-machine @@ -205,7 +205,7 @@ Bilingual distributed phrase representations for statistical machin translation PeymanPassban - ChrisHokamp + ChrisHokamp QunLi 2015.mtsummit-papers.24 passban-etal-2015-bilingual @@ -290,7 +290,7 @@ Quality evaluation of four translations of a kidney document: focus on reliability - Alan K.Melby + Alan K.Melby 2015.mtsummit-users.8 melby-2015-quality @@ -369,13 +369,13 @@ Productivity promotion strategies for collaborative translation on huge volume technical documents - GuipingZhang + GuipingZhang NaYe FangCai ChuangWu XiangkuiSun JinfuYuan - DongfengCai + DongfengCai 2015.mtsummit-users.19 zhang-etal-2015-productivity @@ -433,7 +433,7 @@ AndyWay ZhengweiQiu AsankaWasala - ReinhardSchaler + ReinhardSchaler 2015.mtsummit-wptp.5 du-etal-2015-domain diff --git a/data/xml/2015.tal.xml b/data/xml/2015.tal.xml index 2edfb8af66..6e827c9382 100644 --- a/data/xml/2015.tal.xml +++ b/data/xml/2015.tal.xml @@ -61,7 +61,7 @@ Distributional Semantics Today - Introduction to the special issue - CécileFabre + CécileFabre AlessandroLenci 7–20 2015.tal-2.1 @@ -78,7 +78,7 @@ Méthode semi-compositionnelle pour l’extraction de synonymes des termes complexes [Semi-compositional method for synonym extraction of complex terms] AmirHazem - BéatriceDaille + BéatriceDaille 51–76 2015.tal-2.3 fra @@ -87,7 +87,7 @@ Analyse distributionnelle appliquée aux textes de spécialité - Réduction de la dispersion des données par abstraction des contextes [Distributional analysis applied to domain-specific texts - Data dispersion reduction by context abstraction] AmandinePérinet - ThierryHamon + ThierryHamon 77–102 2015.tal-2.4 fra @@ -119,7 +119,7 @@ Préface [Foreword] VincentClaveau - Jian-YunNie + Jian-YunNie 7–22 2015.tal-3.1 fra @@ -128,8 +128,8 @@ Analyse en dépendance et classification de requêtes en langue naturelle, application à la recommandation de livres [Dependency parsing and classification of natural language queries: application to book recommendation] AnaïsOllagnier - SébastienFournier - PatriceBellot + SébastienFournier + PatriceBellot 23–47 2015.tal-3.2 fra diff --git a/data/xml/2015.tc.xml b/data/xml/2015.tc.xml index 5cf902daa5..8c22e02008 100644 --- a/data/xml/2015.tc.xml +++ b/data/xml/2015.tc.xml @@ -11,7 +11,7 @@ <fixed-case>QT</fixed-case>21: A new era for translators and the computer - AlanMelby + AlanMelby 2015.tc-1.1 melby-2015-qt21 @@ -19,21 +19,21 @@ The reception of intralingual and interlingual automatic subtitling: An exploratory study within the <fixed-case>HBB</fixed-case>4<fixed-case>ALL</fixed-case> project AnnaMatamala AndreuOliver - AitorÁlvarez + AitorÁlvarez AndoniAzpeitia 2015.tc-1.2 matamala-etal-2015-reception The <fixed-case>EXPERT</fixed-case> project: Advancing the state of the art in hybrid translation technologies - ConstantinOrasan + ConstantinOrasan AlessandroCattelan - GloriaCorpas Pastor - Josefvan Genabith + GloriaCorpas Pastor + Josefvan Genabith ManuelHerranz JuanJosé Arevalillo QunLiu - KhalilSima’an + KhalilSima’an LuciaSpecia 2015.tc-1.3 orasan-etal-2015-expert @@ -72,7 +72,7 @@ Skype Translator: Breaking down language and hearing barriers. A behind the scenes look at near real-time speech translation - WilliamLewis + WilliamLewis 2015.tc-1.9 lewis-2015-skype diff --git a/data/xml/2016.amta.xml b/data/xml/2016.amta.xml index 4b30389ecb..d7dfb31374 100644 --- a/data/xml/2016.amta.xml +++ b/data/xml/2016.amta.xml @@ -18,9 +18,9 @@ Instance Selection for Online Automatic Post-Editing in a multi-domain scenario - RajenChatterjee - MihaelArcan - MatteoNegri + RajenChatterjee + MihaelArcan + MatteoNegri MarcoTurchi 1-15 2016.amta-researchers.1 @@ -40,7 +40,7 @@ Fuzzy-match repair using black-box machine translation systems: what can be expected? JohnOrtega FelipeSánchez-Martínez - MikelForcada + MikelForcada 27-39 2016.amta-researchers.3 Computer-aided translation (CAT) tools often use a translation memory (TM) as the key resource to assist translators. A TM contains translation units (TU) which are made up of source and target language segments; translators use the target segments in the TU suggested by the CAT tool by converting them into the desired translation. Proposals from TMs could be made more useful by using techniques such as fuzzy-match repair (FMR) which modify words in the target segment corresponding to mismatches identified in the source segment. Modifications in the target segment are done by translating the mismatched source sub-segments using an external source of bilingual information (SBI) and applying the translations to the corresponding positions in the target segment. Several combinations of translated sub-segments can be applied to the target segment which can produce multiple repair candidates. We provide a formal algorithmic description of a method that is capable of using any SBI to generate all possible fuzzy-match repairs and perform an oracle evaluation on three different language pairs to ascertain the potential of the method to improve translation productivity. Using DGT-TM translation memories and the machine system Apertium as the single source to build repair operators in three different language pairs, we show that the best repaired fuzzy matches are consistently closer to reference translations than either machine-translated segments or unrepaired fuzzy matches. @@ -72,7 +72,7 @@ Ranking suggestions for black-box interactive translation prediction systems with multilayer perceptrons DanielTorregrosa Juan AntonioPérez-Ortiz - MikelForcada + MikelForcada 65-78 2016.amta-researchers.6 The objective of interactive translation prediction (ITP), a paradigm of computer-aided translation, is to assist professional translators by offering context-based computer-generated suggestions as they type. While most state-of-the-art ITP systems are tightly coupled to a machine translation (MT) system (often created ad-hoc for this purpose), our proposal follows a resourceagnostic approach, one that does not need access to the inner workings of the bilingual resources (MT systems or any other bilingual resources) used to generate the suggestions, thus allowing to include new resources almost seamlessly. As we do not expect the user to tolerate more than a few proposals each time, the set of potential suggestions need to be filtered and ranked; the resource-agnostic approach has been evaluated before using a set of intuitive length-based and position-based heuristics designed to determine which suggestions to show, achieving promising results. In this paper, we propose a more principled suggestion ranking approach using a regressor (a multilayer perceptron) that achieves significantly better results. @@ -81,7 +81,7 @@ Multi-domain Adaptation for Statistical Machine Translation Based on Feature Augmentation KenjiImamura - EiichiroSumita + EiichiroSumita 79-92 2016.amta-researchers.7 Domain adaptation is a major challenge when applying machine translation to practical tasks. In this paper, we present domain adaptation methods for machine translation that assume multiple domains. The proposed methods combine two model types: a corpus-concatenated model covering multiple domains and single-domain models that are accurate but sparse in specific domains. We combine the advantages of both models using feature augmentation for domain adaptation in machine learning. Our experimental results show that the BLEU scores of the proposed method clearly surpass those of single-domain models for low-resource domains. For high-resource domains, the scores of the proposed method were superior to those of both single-domain and corpusconcatenated models. Even in domains having a million bilingual sentences, the translation quality was at least preserved and even improved in some domains. These results demonstrate that state-of-the-art domain adaptation can be realized with appropriate settings, even when using standard log-linear models. @@ -122,7 +122,7 @@ Improving Neural Machine Translation on resource-limited pairs using auxiliary data of a third language AnderMartinez - YujiMatsumoto + YujiMatsumoto 135-148 2016.amta-researchers.11 In the recent years interest in Deep Neural Networks (DNN) has grown in the field of Natural Language Processing, as new training methods have been proposed. The usage of DNN has achieved state-of-the-art performance in various areas. Neural Machine Translation (NMT) described by Bahdanau et al. (2014) and its successive variations have shown promising results. DNN, however, tend to over-fit on small data-sets, which makes this method impracticable for resource-limited language pairs. This article combines three different ideas (splitting words into smaller units, using an extra dataset of a related language pair and using monolingual data) for improving the performance of NMT models on language pairs with limited data. Our experiments show that, in some cases, our proposed approach to subword-units performs better than BPE (Byte pair encoding) and that auxiliary language-pairs and monolingual data can help improve the performance of languages with limited resources. @@ -160,7 +160,7 @@ Investigating the Impact of Various Partial Diacritization Schemes on <fixed-case>A</fixed-case>rabic-<fixed-case>E</fixed-case>nglish Statistical Machine Translation SawsanAlqahtani MahmoudGhoneim - MonaDiab + MonaDiab 191-204 2016.amta-researchers.15 Most diacritics in Arabic represent short vowels. In Arabic orthography, such diacritics are considered optional. The absence of these diacritics naturally leads to significant word ambiguity to top the inherent ambiguity present in fully diacritized words. Word ambiguity is a significant impediment for machine translation. Despite the ambiguity presented by lack of diacritization, context helps ameliorate the situation. Identifying the appropriate amount of diacritic restoration to reduce word sense ambiguity in the context of machine translation is the object of this paper. Diacritic marks help reduce the number of possible lexical word choices assigned to a source word which leads to better quality translated sentences. We investigate a variety of (linguistically motivated) partial diacritization schemes that preserve some of the semantics that in essence complement the implicit contextual information present in the sentences. We also study the effect of training data size and report results on three standard test sets that represent a combination of different genres. The results show statistically significant improvements for some schemes compared to two baselines: text with no diacritics (the typical writing system adopted for Arabic) and text that is fully diacritized. @@ -236,10 +236,10 @@ What Can We Really Learn from Post-editing? - MarcisPinnis - RihardsKalnins + MarcisPinnis + RihardsKalnins RaivisSkadins - IngunaSkadina + IngunaSkadina 86-91 2016.amta-users.8 pinnis-etal-2016-really @@ -247,8 +247,8 @@ An Empirical Study: Post-editing Effort for <fixed-case>E</fixed-case>nglish to <fixed-case>A</fixed-case>rabic Hybrid Machine Translation HassanSajjad - FranciscoGuzman - StephanVogel + FranciscoGuzman + StephanVogel 92-113 2016.amta-users.9.Presentation.pdf sajjad-etal-2016-empirical @@ -300,7 +300,7 @@ Improving Machine Translation for Post-Editing via Real Time Adaptation - DragosMunteanu + DragosMunteanu 193-221 2016.amta-users.16.Presentation.pdf munteanu-2016-improving @@ -374,7 +374,7 @@ Toward Temporally-aware <fixed-case>MT</fixed-case>: Can Information Extraction Help Preserve Temporal Interpretation? TaylorCassidy JamalLaoudi - ClareVoss + ClareVoss 371-384 2016.amta-users.25 cassidy-etal-2016-toward @@ -415,7 +415,7 @@ Proto-<fixed-case>MT</fixed-case> Evaluation for Humanitarian Assistance Disaster Response Scenarios - DouglasJones + DouglasJones 551-574 2016.amta-users.30.Presentation.pdf jones-2016-proto @@ -444,7 +444,7 @@ <fixed-case>M</fixed-case>o<fixed-case>J</fixed-case>o: Bringing Hybrid <fixed-case>MT</fixed-case> to the Center for Applied Machine Translation - MariannaMartindale + MariannaMartindale 654-714 2016.amta-users.34.Presentation.pdf martindale-2016-mojo diff --git a/data/xml/2016.clib.xml b/data/xml/2016.clib.xml index f72ba3fa9c..ae6fbd5cb0 100644 --- a/data/xml/2016.clib.xml +++ b/data/xml/2016.clib.xml @@ -37,7 +37,7 @@ Linguistic Data Retrievable from a Treebank - VerginicaBarbu Mititelu + VerginicaBarbu Mititelu ElenaIrimia 19–27 This paper describes the Romanian treebank annotated according to the Universal Dependency principles. We present the types of texts included in the treebank, their processing phases and the tools used for doing it, as well as the levels of annotation, with a focus on the syntactic level. We briefly present the syntactic formalism used, the principles followed and the set of relations. The perspective we adopted is the linguist’s who searches the treebank for information with relevance for the study of Romanian. (S)He can interpret the statistics based on the corpus and can also query the treebank for finding examples to support a theory, for testing hypothesis or for discovering new tendencies. We use here the passive constructions in Romanian as a case study for showing how statistical data help understanding this linguistic phenomenon. We also discuss the kinds of linguistic information retrievable and non-retrievable form the treebank, based on the annotation principles. @@ -76,7 +76,7 @@ Finding Good Answers in Online Forums: Community Question Answering for <fixed-case>B</fixed-case>ulgarian TsvetomilaMihaylova IvanKoychev - PreslavNakov + PreslavNakov IvelinaNikolova 54–63 Community Question Answering (CQA) is a form of question answering that is getting increasingly popular as a research direction recently. Given a question posted in an online community forum and the thread of answers to it, a common formulation of the task is to rank automatically the answers, so that the good ones are ranked higher than the bad ones. Despite the vast research in CQA for English, very little attention has been paid to other languages. To bridge this gap, here we present our method for Community Question Answering in Bulgarian. We create annotated training and testing datasets for Bulgarian, and we further explore the applicability of machine translation for reusing English CQA data for building a Bulgarian system. The evaluation results show improvement over the baseline and can serve as a basis for further research. diff --git a/data/xml/2016.eamt.xml b/data/xml/2016.eamt.xml index 575555bc3d..778a67cba2 100644 --- a/data/xml/2016.eamt.xml +++ b/data/xml/2016.eamt.xml @@ -14,9 +14,9 @@ <fixed-case>T</fixed-case>ecto<fixed-case>MT</fixed-case> – a deep linguistic core of the combined Cimera <fixed-case>MT</fixed-case> system MartinPopel RomanSudarikov - OndřejBojar + OndřejBojar RudolfRosa - JanHajič + JanHajič 2016.eamt-2.1 popel-etal-2016-tectomt @@ -41,14 +41,14 @@ Apertium: a free/open source platform for machine translation and basic language technology - Mikel L.Forcada - Francis M.Tyers + Mikel L.Forcada + Francis M.Tyers 2016.eamt-2.4 forcada-tyers-2016-apertium <fixed-case>B</fixed-case>abel<fixed-case>D</fixed-case>r: a web platform for rapid construction of phrasebook-style medical speech translation applications - PierretteBouillon + PierretteBouillon HervéSpechbach 2016.eamt-2.5 bouillon-spechbach-2016-babeldr @@ -77,19 +77,19 @@ <fixed-case>H</fixed-case>im<fixed-case>L</fixed-case>: Health in my language BarryHaddow - AlexFraser + AlexFraser 2016.eamt-2.7 haddow-fraser-2016-himl <fixed-case>OPUS</fixed-case> – parallel corpora for everyone - JörgTiedemann + JörgTiedemann 2016.eamt-2.8 tiedemann-2016-opus Integration of machine translation paradigms - Marta R.Costa-jussà + Marta R.Costa-jussà 2016.eamt-2.9 costa-jussa-2016-integration @@ -151,7 +151,7 @@ Amplexor <fixed-case>MTE</fixed-case>xpert – machine translation adapted to the translation workflow - AlexandruCeausu + AlexandruCeausu SabineHunsicker TudyDroumaguet 2016.eamt-2.18 @@ -160,8 +160,8 @@ <fixed-case>A</fixed-case>bu-<fixed-case>M</fixed-case>a<fixed-case>T</fixed-case>ran: automatic building of machine translation AntonioToral - SergioOrtiz Rojas - MikelForcada + SergioOrtiz Rojas + MikelForcada NikolaLubesic ProkopisProkopidis 2016.eamt-2.19 @@ -179,16 +179,16 @@ MariaGialama IrisHendrickx MitjaJermol - KatiaKermanidis + KatiaKermanidis JossMoorkens DavorOrlic MichaelPapadopoulos - MajaPopović + MajaPopović RicoSennrich VilelminiSosoni DimitriosTsoumakos - Antalvan den Bosch - Mennovan Zaanen + Antalvan den Bosch + Mennovan Zaanen AndyWay 2016.eamt-2.20 kordoni-etal-2016-tramooc diff --git a/data/xml/2016.gwc.xml b/data/xml/2016.gwc.xml index d3f9d165c1..629832051b 100644 --- a/data/xml/2016.gwc.xml +++ b/data/xml/2016.gwc.xml @@ -5,8 +5,8 @@ Proceedings of the 8th Global WordNet Conference (GWC) ChristianeFellbaum PiekVossen - Verginica BarbuMititelu - CorinaForascu + Verginica BarbuMititelu + CorinaForascu Global Wordnet Association
Bucharest, Romania
27--30 January @@ -21,7 +21,7 @@ Adverbs in <fixed-case>S</fixed-case>anskrit <fixed-case>W</fixed-case>ordnet TanujaAjotikar - MalharKulkarni + MalharKulkarni 1–8 The wordnet contains part-of-speech categories such as noun, verb, adjective and adverb. In Sanskrit, there is no formal distinction among nouns, adjectives and adverbs. This poses the question, is an adverb a separate category in Sanskrit? If not, then how do we accommodate it in a lexical resource? To investigate the issue, we attempt to study the complex nature of adverbs in Sanskrit and the policies adopted by Sanskrit lexicographers that would guide us in storing them in the Sanskrit wordnet. 2016.gwc-1.1 @@ -51,7 +51,7 @@ Detecting Most Frequent Sense using Word Embeddings and <fixed-case>B</fixed-case>abel<fixed-case>N</fixed-case>et Harpreet SinghArora SudhaBhingardive - PushpakBhattacharyya + PushpakBhattacharyya 21–25 Since the inception of the SENSEVAL evaluation exercises there has been a great deal of recent research into Word Sense Disambiguation (WSD). Over the years, various supervised, unsupervised and knowledge based WSD systems have been proposed. Beating the first sense heuristics is a challenging task for these systems. In this paper, we present our work on Most Frequent Sense (MFS) detection using Word Embeddings and BabelNet features. The semantic features from BabelNet viz., synsets, gloss, relations, etc. are used for generating sense embeddings. We compare word embedding of a word with its sense embeddings to obtain the MFS with the highest similarity. The MFS is detected for six languages viz., English, Spanish, Russian, German, French and Italian. However, this approach can be applied to any language provided that word embeddings are available for that language. 2016.gwc-1.4 @@ -70,7 +70,7 @@ MonicaBerti YuriBizzoni FedericoBoschetti - Gregory R.Crane + Gregory R.Crane Riccardo DelGratta TariqYousef 34–38 @@ -84,7 +84,7 @@ HanumantRedkar PrateekSappadla DhirendraSingh - PushpakBhattacharyya + PushpakBhattacharyya 39–43 Semantic similarity and relatedness measures play an important role in natural language processing applications. In this paper, we present the IndoWordNet::Similarity tool and interface, designed for computing the semantic similarity and relatedness between two words in IndoWordNet. A java based tool and a web interface have been developed to compute this semantic similarity and relatedness. Also, Java API has been developed for this purpose. This tool, web interface and the API are made available for the research purpose. 2016.gwc-1.7 @@ -103,7 +103,7 @@ <fixed-case>CILI</fixed-case>: the Collaborative Interlingual Index FrancisBond PiekVossen - John P.McCrae + John P.McCrae ChristianeFellbaum 50–57 This paper introduces the motivation for and design of the Collaborative InterLingual Index (CILI). It is designed to make possible coordination between multiple loosely coupled wordnet projects. The structure of the CILI is based on the Interlingual index first proposed in the EuroWordNet project with several pragmatic extensions: an explicit open license, definitions in English and links to wordnets in the Global Wordnet Grid. @@ -123,7 +123,7 @@ Word Substitution in Short Answer Extraction: A <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et-based Approach - QingqingCai + QingqingCai JamesGung MaochenGuan GeraldKurlandski @@ -135,11 +135,11 @@ An overview of <fixed-case>P</fixed-case>ortuguese <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>ets - Valeriade Paiva + Valeriade Paiva LivyReal - Hugo GonçaloOliveira + Hugo GonçaloOliveira AlexandreRademaker - CláudiaFreitas + CláudiaFreitas AlbertoSimões 74–82 Semantic relations between words are key to building systems that aim to understand and manipulate language. For English, the “de facto” standard for representing this kind of knowledge is Princeton’s WordNet. Here, we describe the wordnet-like resources currently available for Portuguese: their origins, methods of creation, sizes, and usage restrictions. We start tackling the problem of comparing them, but only in quantitative terms. Finally, we sketch ideas for potential collaboration between some of the projects that produce Portuguese wordnets. @@ -179,7 +179,7 @@ AnnaFeltracco LorenzoGatti ElisabettaJezek - BernardoMagnini + BernardoMagnini SimoneMagnolini 101–105 We present a methodology for building lexical sets for argument slots of Italian verbs. We start from an inventory of semantically typed Italian verb frames and through a mapping to WordNet we automatically annotate the sets of fillers for the argument positions in a corpus of sentences. We evaluate both a baseline algorithm and a syntax driven algorithm and show that the latter performs significantly better in terms of precision. @@ -241,7 +241,7 @@ Sophisticated Lexical Databases - Simplified Usage: Mobile Applications and Browser Plugins For Wordnets DipteshKanojia RajDabre - PushpakBhattacharyya + PushpakBhattacharyya 144–149 India is a country with 22 officially recognized languages and 17 of these have WordNets, a crucial resource. Web browser based interfaces are available for these WordNets, but are not suited for mobile devices which deters people from effectively using this resource. We present our initial work on developing mobile applications and browser extensions to access WordNets for Indian Languages. Our contribution is two fold: (1) We develop mobile applications for the Android, iOS and Windows Phone OS platforms for Hindi, Marathi and Sanskrit WordNets which allow users to search for words and obtain more information along with their translations in English and other Indian languages. (2) We also develop browser extensions for English, Hindi, Marathi, and Sanskrit WordNets, for both Mozilla Firefox, and Google Chrome. We believe that such applications can be quite helpful in a classroom scenario, where students would be able to access the WordNets as dictionaries as well as lexical knowledge bases. This can help in overcoming the language barrier along with furthering language understanding. 2016.gwc-1.22 @@ -251,7 +251,7 @@ A picture is worth a thousand words: Using <fixed-case>O</fixed-case>pen<fixed-case>C</fixed-case>lip<fixed-case>A</fixed-case>rt library for enriching <fixed-case>I</fixed-case>ndo<fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et DipteshKanojia ShehzaadDhuliawala - PushpakBhattacharyya + PushpakBhattacharyya 150–154 WordNet has proved to be immensely useful for Word Sense Disambiguation, and thence Machine translation, Information Retrieval and Question Answering. It can also be used as a dictionary for educational purposes. The semantic nature of concepts in a WordNet motivates one to try to express this meaning in a more visual way. In this paper, we describe our work of enriching IndoWordNet with image acquisitions from the OpenClipArt library. We describe an approach used to enrich WordNets for eighteen Indian languages. Our contribution is three fold: (1) We develop a system, which, given a synset in English, finds an appropriate image for the synset. The system uses the OpenclipArt library (OCAL) to retrieve images and ranks them. (2) After retrieving the images, we map the results along with the linkages between Princeton WordNet and Hindi WordNet, to link several synsets to corresponding images. We choose and sort top three images based on our ranking heuristic per synset. (3) We develop a tool that allows a lexicographer to manually evaluate these images. The top images are shown to a lexicographer by the evaluation tool for the task of choosing the best image representation. The lexicographer also selects the number of relevant images. Using our system, we obtain an Average Precision (P @ 3) score of 0.30. 2016.gwc-1.23 @@ -324,10 +324,10 @@ An empirically grounded expansion of the supersense inventory Hector MartinezAlonso - AndersJohannsen + AndersJohannsen SanniNimb SussiOlsen - BolettePedersen + BolettePedersen 199–208 In this article we present an expansion of the supersense inventory. All new super-senses are extensions of members of the current inventory, which we postulate by identifying semantically coherent groups of synsets. We cover the expansion of the already-established supernsense inventory for nouns and verbs, the addition of coarse supersenses for adjectives in absence of a canonical supersense inventory, and super-senses for verbal satellites. We evaluate the viability of the new senses examining the annotation agreement, frequency and co-ocurrence patterns. 2016.gwc-1.30 @@ -336,7 +336,7 @@ Adverbs in pl<fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et: Theory and Implementation MarekMaziarz - StanSzpakowicz + StanSzpakowicz MichalKalinski 209–217 Adverbs are seldom well represented in wordnets. Princeton WordNet, for example, derives from adjectives practically all its adverbs and whatever involvement they have. GermaNet stays away from this part of speech. Adverbs in plWordNet will be emphatically present in all their semantic and syntactic distinctness. We briefly discuss the linguistic background of the lexical system of Polish adverbs. We describe an automated generator of accurate candidate adverbs, and introduce the lexicographic procedures which will ensure high consistency of wordnet editors’ decisions about adverbs. @@ -375,7 +375,7 @@ AnupamMondal DipankarDas ErikCambria - SivajiBandyopadhyay + SivajiBandyopadhyay 243–248 In order to overcome the lack of medical corpora, we have developed a WordNet for Medical Events (WME) for identifying medical terms and their sense related information using a seed list. The initial WME resource contains 1654 medical terms or concepts. In the present research, we have reported the enhancement of WME with 6415 number of medical concepts along with their conceptual features viz. Parts-of-Speech (POS), gloss, semantics, polarity, sense and affinity. Several polarity lexicons viz. SentiWordNet, SenticNet, Bing Liu’s subjectivity list and Taboda’s adjective list were introduced with WordNet synonyms and hyponyms for expansion. The semantics feature guided us to build a semantic co-reference relation based network between the related medical concepts. These features help to prepare a medical concept network for better sense relation based visualization. Finally, we evaluated with respect to Adaptive Lesk Algorithm and conducted an agreement analysis for validating the expanded WME resource. 2016.gwc-1.35 @@ -385,7 +385,7 @@ Mapping and Generating Classifiers using an Open <fixed-case>C</fixed-case>hinese Ontology Luis Morgado DaCosta FrancisBond - HelenaGao + HelenaGao 249–256 In languages such as Chinese, classifiers (CLs) play a central role in the quantification of noun-phrases. This can be a problem when generating text from input that does not specify the classifier, as in machine translation (MT) from English to Chinese. Many solutions to this problem rely on dictionaries of noun-CL pairs. However, there is no open large-scale machine-tractable dictionary of noun-CL associations. Many published resources exist, but they tend to focus on how a CL is used (e.g. what kinds of nouns can be used with it, or what features seem to be selected by each CL). In fact, since nouns are open class words, producing an exhaustive definite list of noun-CL associations is not possible, since it would quickly get out of date. Our work tries to address this problem by providing an algorithm for automatic building of a frequency based dictionary of noun-CL pairs, mapped to concepts in the Chinese Open Wordnet (Wang and Bond, 2013), an open machine-tractable dictionary for Chinese. All results will released under an open license. 2016.gwc-1.36 @@ -394,8 +394,8 @@ <fixed-case>I</fixed-case>ndo<fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et Conversion to Web Ontology Language (<fixed-case>OWL</fixed-case>) ApurvaNagvenkar - JyotiPawar - PushpakBhattacharyya + JyotiPawar + PushpakBhattacharyya 257–260 WordNet plays a significant role in Linked Open Data (LOD) cloud. It has numerous application ranging from ontology annotation to ontology mapping. IndoWordNet is a linked WordNet connecting 18 Indian language WordNets with Hindi as a source WordNet. The Hindi WordNet was initially developed by linking it to English WordNet. In this paper, we present a data representation of IndoWordNet in Web Ontology Language (OWL). The schema of Princeton WordNet has been enhanced to support the representation of IndoWordNet. This IndoWordNet representation in OWL format is now available to link other web resources. This representation is implemented for eight Indian languages. 2016.gwc-1.37 @@ -443,7 +443,7 @@ pl<fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et 3.0 – Almost There MaciejPiasecki - StanSzpakowicz + StanSzpakowicz MarekMaziarz EwaRudnicka 292–301 @@ -455,7 +455,7 @@ Open <fixed-case>D</fixed-case>utch <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et MartenPostma Emielvan Miltenburg - RoxaneSegers + RoxaneSegers AnneleenSchoen PiekVossen 302–310 @@ -487,8 +487,8 @@ NileshJoshi SandhyaSingh IrawatiKulkarni - MalharKulkarni - PushpakBhattacharyya + MalharKulkarni + PushpakBhattacharyya 325–332 Samāsa or compounds are a regular feature of Indian Languages. They are also found in other languages like German, Italian, French, Russian, Spanish, etc. Compound word is constructed from two or more words to form a single word. The meaning of this word is derived from each of the individual words of the compound. To develop a system to generate, identify and interpret compounds, is an important task in Natural Language Processing. This paper introduces a web based tool - Samāsa-Kartā for producing compound words. Here, the focus is on Sanskrit language due to its richness in usage of compounds; however, this approach can be applied to any Indian language as well as other languages. IndoWordNet is used as a resource for words to be compounded. The motivation behind creating compound words is to create, to improve the vocabulary, to reduce sense ambiguity, etc. in order to enrich the WordNet. The Samāsa-Kartā can be used for various applications viz., compound categorization, sandhi creation, morphological analysis, paraphrasing, synset creation, etc. 2016.gwc-1.46 @@ -535,11 +535,11 @@ The Predicate Matrix and the Event and Implied Situation Ontology: Making More of Events - RoxaneSegers + RoxaneSegers EgoitzLaparra MarcoRospocher PiekVossen - GermanRigau + GermanRigau FilipIlievski 364–372 This paper presents the Event and Implied Situation Ontology (ESO), a resource which formalizes the pre and post situations of events and the roles of the entities affected by an event. The ontology reuses and maps across existing resources such as WordNet, SUMO, VerbNet, PropBank and FrameNet. We describe how ESO is injected into a new version of the Predicate Matrix and illustrate how these resources are used to detect information in large document collections that otherwise would have remained implicit. The model targets interpretations of situations rather than the semantics of verbs per se. The event is interpreted as a situation using RDF taking all event components into account. Hence, the ontology and the linked resources need to be considered from the perspective of this interpretation model. @@ -568,7 +568,7 @@ High, Medium or Low? Detecting Intensity Variation Among polar synonyms in <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et RakshaSharma - PushpakBhattacharyya + PushpakBhattacharyya 389–395 For fine-grained sentiment analysis, we need to go beyond zero-one polarity and find a way to compare adjectives (synonyms) that share the same sense. Choice of a word from a set of synonyms, provides a way to select the exact polarity-intensity. For example, choosing to describe a person as benevolent rather than kind1 changes the intensity of the expression. In this paper, we present a sense based lexical resource, where synonyms are assigned intensity levels, viz., high, medium and low. We show that the measure P (s|w) (probability of a sense s given the word w) can derive the intensity of a word within the sense. We observe a statistically significant positive correlation between P(s|w) and intensity of synonyms for three languages, viz., English, Marathi and Hindi. The average correlation scores are 0.47 for English, 0.56 for Marathi and 0.58 for Hindi. 2016.gwc-1.54 @@ -576,7 +576,7 @@ The Role of the <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et Relations in the Knowledge-based Word Sense Disambiguation Task - KirilSimov + KirilSimov AlexanderPopov PetyaOsenova 396–403 @@ -601,7 +601,7 @@ JayaSaraswati LaxmiKashyap DipteshKanojia - PushpakBhattacharyya + PushpakBhattacharyya 411–418 This paper reports the work of creating bilingual mappings in English for certain synsets of Hindi wordnet, the need for doing this, the methods adopted and the tools created for the task. Hindi wordnet, which forms the foundation for other Indian language wordnets, has been linked to the English WordNet. To maximize linkages, an important strategy of using direct and hypernymy linkages has been followed. However, the hypernymy linkages were found to be inadequate in certain cases and posed a challenge due to sense granularity of language. Thus, the idea of creating bilingual mappings was adopted as a solution. A bilingual mapping means a linkage between a concept in two different languages, with the help of translation and/or transliteration. Such mappings retain meaningful representations, while capturing semantic similarity at the same time. This has also proven to be a great enhancement of Hindi wordnet and can be a crucial resource for multilingual applications in natural language processing, including machine translation and cross language information retrieval. 2016.gwc-1.57 @@ -619,7 +619,7 @@ Toward a truly multilingual <fixed-case>G</fixed-case>lobal<fixed-case>W</fixed-case>ordnet Grid PiekVossen FrancisBond - JohnMcCrae + JohnMcCrae 424–431 In this paper, we describe a new and improved Global Wordnet Grid that takes advantage of the Collaborative InterLingual Index (CILI). Currently, the Open Multilingal Wordnet has made many wordnets accessible as a single linked wordnet, but as it used the Princeton Wordnet of English (PWN) as a pivot, it loses concepts that are not part of PWN. The technical solution to this, a central registry of concepts, as proposed in the EuroWordnet project through the InterLingual Index, has been known for many years. However, the practical issues of how to host this index and who decides what goes in remained unsolved. Inspired by current practice in the Semantic Web and the Linked Open Data community, we propose a way to solve this issue. In this paper we define the principles and protocols for contributing to the Grid. We tested them on two use cases, adding version 3.1 of the Princeton WordNet to a CILI based on 3.0 and adding the Open Dutch Wordnet, to validate the current set up. This paper aims to be a call for action that we hope will be further discussed and ultimately taken up by the whole wordnet community. 2016.gwc-1.59 @@ -628,8 +628,8 @@ This Table is Different: A <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et-Based Approach to Identifying References to Document Entities ShomirWilson - AlanBlack - JonOberlander + AlanBlack + JonOberlander 432–440 Writing intended to inform frequently contains references to document entities (DEs), a mixed class that includes orthographically structured items (e.g., illustrations, sections, lists) and discourse entities (arguments, suggestions, points). Such references are vital to the interpretation of documents, but they often eschew identifiers such as “Figure 1” for inexplicit phrases like “in this figure” or “from these premises”. We examine inexplicit references to DEs, termed DE references, and recast the problem of their automatic detection into the determination of relevant word senses. We then show the feasibility of machine learning for the detection of DE-relevant word senses, using a corpus of human-labeled synsets from WordNet. We test cross-domain performance by gathering lemmas and synsets from three corpora: website privacy policies, Wikipedia articles, and Wikibooks textbooks. Identifying DE references will enable language technologies to use the information encoded by them, permitting the automatic generation of finely-tuned descriptions of DEs and the presentation of richly-structured information to readers. 2016.gwc-1.60 diff --git a/data/xml/2016.iwslt.xml b/data/xml/2016.iwslt.xml index 4b26100104..167d404046 100644 --- a/data/xml/2016.iwslt.xml +++ b/data/xml/2016.iwslt.xml @@ -19,7 +19,7 @@ The <fixed-case>IWSLT</fixed-case> 2016 Evaluation Campaign MauroCettolo JanNiehues - SebastianStüker + SebastianStüker LuisaBentivogli RolandoCattoni MarcelloFederico @@ -29,16 +29,16 @@ Integrating Encyclopedic Knowledge into Neural Language Models - YangZhang + YangZhang JanNiehues - AlexanderWaibel + AlexanderWaibel Neural models have recently shown big improvements in the performance of phrase-based machine translation. Recurrent language models, in particular, have been a great success due to their ability to model arbitrary long context. In this work, we integrate global semantic information extracted from large encyclopedic sources into neural network language models. We integrate semantic word classes extracted from Wikipedia and sentence level topic information into a recurrent neural network-based language model. The new resulting models exhibit great potential in alleviating data sparsity problems with the additional knowledge provided. This approach of integrating global information is not restricted to language modeling but can also be easily applied to any model that profits from context or further data resources, e.g. neural machine translation. Using this model has improved rescoring quality of a state-of-the-art phrase-based translation system by 0.84 BLEU points. We performed experiments on two language pairs. 2016.iwslt-1.2 zhang-etal-2016-integrating Factored Neural Machine Translation Architectures - MercedesGarcía-Martínez + MercedesGarcía-Martínez LoïcBarrault FethiBougares In this paper we investigate the potential of the neural machine translation (NMT) when taking into consideration the linguistic aspect of target language. From this standpoint, the NMT approach with attention mechanism [1] is extended in order to produce several linguistically derived outputs. We train our model to simultaneously output the lemma and its corresponding factors (e.g. part-of-speech, gender, number). The word level translation is built with a mapping function using a priori linguistic information. Compared to the standard NMT system, factored architecture increases significantly the vocabulary coverage while decreasing the number of unknown words. With its richer architecture, the Factored NMT approach allows us to implement several training setup that will be discussed in detail along this paper. On the IWSLT’15 English-to-French task, FNMT model outperforms NMT model in terms of BLEU score. A qualitative analysis of the output on a set of test sentences shows the effectiveness of the FNMT model. @@ -49,7 +49,7 @@ Audio Segmentation for Robust Real-Time Speech Recognition Based on Neural Networks MichaWetzel MatthiasSperber - AlexanderWaibel + AlexanderWaibel Speech that contains multimedia content can pose a serious challenge for real-time automatic speech recognition (ASR) for two reasons: (1) The ASR produces meaningless output, hurting the readability of the transcript. (2) The search space of the ASR is blown up when multimedia content is encountered, resulting in large delays that compromise real-time requirements. This paper introduces a segmenter that aims to remove these problems by detecting music and noise segments in real-time and replacing them with silence. We propose a two step approach, consisting of frame classification and smoothing. First, a classifier detects speech and multimedia on the frame level. In the second step the smoothing algorithm considers the temporal context to prevent rapid class fluctuations. We investigate in frame classification and smoothing settings to obtain an appealing accuracy-latency-tradeoff. The proposed segmenter yields increases the transcript quality of an ASR system by removing on average 39 % of the errors caused by non-speech in the audio stream, while maintaining a real-time applicable delay of 270 milliseconds. 2016.iwslt-1.4 wetzel-etal-2016-audio @@ -67,7 +67,7 @@ Toward Multilingual Neural Machine Translation with Universal Encoder and Decoder Thanh-LeHa JanNiehues - AlexWaibel + AlexWaibel In this paper, we present our first attempts in building a multilingual Neural Machine Translation framework under a unified approach in which the information shared among languages can be helpful in the translation of individual language pairs. We are then able to employ attention-based Neural Machine Translation for many-to-many multilingual translation tasks. Our approach does not require any special treatment on the network architecture and it allows us to learn minimal number of free parameters in a standard way of training. Our approach has shown its effectiveness in an under-resourced translation scenario with considerable improvements up to 2.6 BLEU points. In addition, we point out a novel way to make use of monolingual data with Neural Machine Translation using the same approach with a 3.15-BLEU-score gain in IWSLT’16 English→German translation task. 2016.iwslt-1.6 ha-etal-2016-toward @@ -96,8 +96,8 @@ Towards Improving Low-Resource Speech Recognition Using Articulatory and Language Features MarkusMüller - SebastianStüker - AlexWaibel + SebastianStüker + AlexWaibel In an increasingly globalized world, there is a rising demand for speech recognition systems. Systems for languages like English, German or French do achieve a decent performance, but there exists a long tail of languages for which such systems do not yet exist. State-of-the-art speech recognition systems feature Deep Neural Networks (DNNs). Being a data driven method and therefore highly dependent on sufficient training data, the lack of resources directly affects the recognition performance. There exist multiple techniques to deal with such resource constraint conditions, one approach is the use of additional data from other languages. In the past, is was demonstrated that multilingually trained systems benefit from adding language feature vectors (LFVs) to the input features, similar to i-Vectors. In this work, we extend this approach by the addition of articulatory features (AFs). We show that AFs also benefit from LFVs and that multilingual system setups benefit from adding both AFs and LFVs. Pretending English to be a low-resource language, we restricted ourselves to use only 10h of English acoustic training data. For system training, we use additional data from French, German and Turkish. By using a combination of AFs and LFVs, we were able to decrease the WER from 18.1% to 17.3% after system combination in our setup using a multilingual phone set. 2016.iwslt-1.9 muller-etal-2016-towards @@ -107,14 +107,14 @@ EunahCho JanNiehues Thanh-LeHa - AlexWaibel + AlexWaibel In this paper, we investigate a multilingual approach for speech disfluency removal. A major challenge of this task comes from the costly nature of disfluency annotation. Motivated by the fact that speech disfluencies are commonly observed throughout different languages, we investigate the potential of multilingual disfluency modeling. We suggest that learning a joint representation of the disfluencies in multiple languages can be a promising solution to the data sparsity issue. In this work, we utilize a multilingual neural machine translation system, where a disfluent speech transcript is directly transformed into a cleaned up text. Disfluency removal experiments on English and German speech transcripts show that multilingual disfluency modeling outperforms the single language systems. In a following experiment, we show that the improvements are also observed in a downstream application using the disfluency-removed transcripts as input. 2016.iwslt-1.10 cho-etal-2016-multilingual A Neural Verb Lexicon Model with Source-side Syntactic Context for String-to-Tree Machine Translation - MariaNădejde + MariaNădejde AlexandraBirch PhilippKoehn String-to-tree MT systems translate verbs without lexical or syntactic context on the source side and with limited target-side context. The lack of context is one reason why verb translation recall is as low as 45.5%. We propose a verb lexicon model trained with a feed-forward neural network that predicts the target verb conditioned on a wide source-side context. We show that a syntactic context extracted from the dependency parse of the source sentence improves the model’s accuracy by 1.5% over a baseline trained on a window context. When used as an extra feature for re-ranking the n-best list produced by the string-to-tree MT system, the verb lexicon model improves verb translation recall by more than 7%. @@ -124,7 +124,7 @@ <fixed-case>M</fixed-case>icrosoft Speech Language Translation (<fixed-case>MSLT</fixed-case>) Corpus: The <fixed-case>IWSLT</fixed-case> 2016 release for <fixed-case>E</fixed-case>nglish, <fixed-case>F</fixed-case>rench and <fixed-case>G</fixed-case>erman ChristianFedermann - William D.Lewis + William D.Lewis We describe the Microsoft Speech Language Translation (MSLT) corpus, which was created in order to evaluate end-to-end conversational speech translation quality. The corpus was created from actual conversations over Skype, and we provide details on the recording setup and the different layers of associated text data. The corpus release includes Test and Dev sets with reference transcripts for speech recognition. Additionally, cleaned up transcripts and reference translations are available for evaluation of machine translation quality. The IWSLT 2016 release described here includes the source audio, raw transcripts, cleaned up transcripts, and translations to or from English for both French and German. 2016.iwslt-1.12 federmann-lewis-2016-microsoft @@ -133,7 +133,7 @@ Joint <fixed-case>ASR</fixed-case> and <fixed-case>MT</fixed-case> Features for Quality Estimation in Spoken Language Translation Ngoc-TienLe BenjaminLecouteux - LaurentBesacier + LaurentBesacier This paper aims to unravel the automatic quality assessment for spoken language translation (SLT). More precisely, we propose several effective estimators based on our estimation of transcription (ASR) quality, translation (MT) quality, or both (combined and joint features using ASR and MT information). Our experiments provide an important opportunity to advance the understanding of the prediction quality of words in a SLT output that were revealed by MT and ASR features. These results could be applied to interactive speech translation or computer-assisted translation of speeches and lectures. For reproducible experiments, the code allowing to call our WCE-LIG application and the corpora used are made available to the research community. 2016.iwslt-1.13 le-etal-2016-joint @@ -150,15 +150,15 @@ <fixed-case>FBK</fixed-case>’s Neural Machine Translation Systems for <fixed-case>IWSLT</fixed-case> 2016 - M. AminFarajian - RajenChatterjee + M. AminFarajian + RajenChatterjee CostanzaConforti ShahabJalalvand VevakeBalaraman - Mattia A.Di Gangi + Mattia A.Di Gangi DuyguAtaman MarcoTurchi - MatteoNegri + MatteoNegri MarcelloFederico In this paper, we describe FBK’s neural machine translation (NMT) systems submitted at the International Workshop on Spoken Language Translation (IWSLT) 2016. The systems are based on the state-of-the-art NMT architecture that is equipped with a bi-directional encoder and an attention mechanism in the decoder. They leverage linguistic information such as lemmas and part-of-speech tags of the source words in the form of additional factors along with the words. We compare performances of word and subword NMT systems along with different optimizers. Further, we explore different ensemble techniques to leverage multiple models within the same and across different networks. Several reranking methods are also explored. Our submissions cover all directions of the MSLT task, as well as en-{de, fr} and {de, fr}-en directions of TED. Compared to previously published best results on the TED 2014 test set, our models achieve comparable results on en-de and surpass them on en-fr (+2 BLEU) and fr-en (+7.7 BLEU) language pairs. 2016.iwslt-1.15 @@ -171,7 +171,7 @@ Thanh-LeHa MatthiasSperber MohammedMediani - AlexWaibel + AlexWaibel In this paper, we present the KIT systems of the IWSLT 2016 machine translation evaluation. We participated in the machine translation (MT) task as well as the spoken language language translation (SLT) track for English→German and German→English translation. We use attentional neural machine translation (NMT) for all our submissions. We investigated different methods to adapt the system using small in-domain data as well as methods to train the system on these small corpora. In addition, we investigated methods to combine NMT systems that encode the input as well as the output differently. We combine systems using different vocabularies, reverse translation systems, multi-source translation system. In addition, we used pre-translation systems that facilitate phrase-based machine translation systems. Results show that applying domain adaptation and ensemble technique brings a crucial improvement of 3-4 BLEU points over the baseline system. In addition, system combination using n-best lists yields further 1-2 BLEU points. 2016.iwslt-1.16 cho-etal-2016-adaptation @@ -181,8 +181,8 @@ WilfriedMichel ZoltánTüske M. Ali BashaShaik - RalfSchlüter - HermannNey + RalfSchlüter + HermannNey In this paper the RWTH large vocabulary continuous speech recognition (LVCSR) systems developed for the IWSLT-2016 evaluation campaign are described. This evaluation campaign focuses on transcribing spontaneous speech from Skype recordings. State-of-the-art bidirectional long short-term memory (LSTM) and deep, multilingually boosted feed-forward neural network (FFNN) acoustic models are trained an narrow and broadband features. An open vocabulary approach using subword units is also considered. LSTM and count-based full word and hybrid backoff language modeling methods are used to model the morphological richness of the German language. All these approaches are combined using confusion network combination (CNC) to yield a competitive WER. 2016.iwslt-1.17 michel-etal-2016-rwth @@ -192,7 +192,7 @@ NadirDurrani FahimDalvi HassanSajjad - StephanVogel + StephanVogel This paper describes QCRI’s machine translation systems for the IWSLT 2016 evaluation campaign. We participated in the Arabic→English and English→Arabic tracks. We built both Phrase-based and Neural machine translation models, in an effort to probe whether the newly emerged NMT framework surpasses the traditional phrase-based systems in Arabic-English language pairs. We trained a very strong phrase-based system including, a big language model, the Operation Sequence Model, Neural Network Joint Model and Class-based models along with different domain adaptation techniques such as MML filtering, mixture modeling and using fine tuning over NNJM model. However, a Neural MT system, trained by stacking data from different genres through fine-tuning, and applying ensemble over 8 models, beat our very strong phrase-based system by a significant 2 BLEU points margin in Arabic→English direction. We did not obtain similar gains in the other direction but were still able to outperform the phrase-based system. We also applied system combination on phrase-based and NMT outputs. 2016.iwslt-1.18 durrani-etal-2016-qcris @@ -214,8 +214,8 @@ SoniaPipa Alin FlorentinVasile IoanaIonașcu - Stefan DanielDumitrescu - TiberiuBoros + Stefan DanielDumitrescu + TiberiuBoros Spoken Language Translation is currently a hot topic in the research community. This task is very complex, involving automatic speech recognition, text-normalization and machine translation. We present our speech translation system, which was compared against the other systems participating in the IWSLT 2016 Shared Task. We introduce our ASR system for English and our MT system for English to French (En-Fr) and English to German (En-De) language pairs. Additionally, for the English to French Challenge we introduce a methodology that enables the enhancement of statistical phrase-based translation with translation equivalents deduced from monolingual corpora using neural word embedding. 2016.iwslt-1.20 pipa-etal-2016-racai @@ -227,7 +227,7 @@ BrianThompson JonathanTaylor JeremyGwinnup - TimothyAnderson + TimothyAnderson GrantErdmann EricHansen BrianOre @@ -243,7 +243,7 @@ AndreasGuta NickRossenbach MiguelGraça - HermannNey + HermannNey This work describes the statistical machine translation (SMT) systems of RWTH Aachen University developed for the evaluation campaign of International Workshop on Spoken Language Translation (IWSLT) 2016. We have participated in the MT track for the German→English language pair employing our state-of-the-art phrase-based system, neural machine translation implementation and our joint translation and reordering decoder. Furthermore, we have applied feed-forward and recurrent neural language and translation models for reranking. The attention-based approach has been used for reranking the n-best lists for both phrasebased and hierarchical setups. On top of these systems, we make use of system combination to enhance the translation quality by combining individually trained systems. 2016.iwslt-1.22 peter-etal-2016-rwth-aachen @@ -255,17 +255,17 @@ MatthiasSperber ThomasZenkel KevinKilgour - SebastianStüker - AlexWaibel + SebastianStüker + AlexWaibel This paper describes our German and English Speech-to-Text (STT) systems for the 2016 IWSLT evaluation campaign. The campaign focuses on the transcription of unsegmented TED talks. Our setup includes systems using both the Janus and Kaldi frameworks. We combined the outputs using both ROVER [1] and confusion network combination (CNC) [2] to archieve a good overall performance. The individual subsystems are built by using different speaker-adaptive feature combination (e.g., lMEL with i-vector or bottleneck speaker vector), acoustic models (GMM or DNN) and speaker adaption (MLLR or fMLLR). Decoding is performed in two stages, where the GMM and DNN systems are adapted on the combination of the first stage outputs using MLLR, and fMLLR. The combination setup produces a final hypothesis that has a significantly lower WER than any of the individual subsystems. For the English TED task, our best combination system has a WER of 7.8% on the development set while our other combinations gained 21.8% and 28.7% WERs for the English and German MSLT tasks. 2016.iwslt-1.23 nguyen-etal-2016-2016 <fixed-case>UFAL</fixed-case> Submissions to the <fixed-case>IWSLT</fixed-case> 2016 <fixed-case>MT</fixed-case> Track - OndřejBojar + OndřejBojar OndřejCífka - JindřichHelcl + JindřichHelcl TomKocmi RomanSudarikov We present our submissions to the IWSLT 2016 machine translation task, as our first attempt to translate subtitles and one of our early experiments with neural machine translation (NMT). We focus primarily on English→Czech translation direction but perform also basic adaptation experiments for NMT with German and also the reverse direction. Three MT systems are tested: (1) our Chimera, a tight combination of phrase-based MT and deep linguistic processing, (2) Neural Monkey, our implementation of a NMT system in TensorFlow and (3) Nematus, an established NMT system. diff --git a/data/xml/2016.jeptalnrecital.xml b/data/xml/2016.jeptalnrecital.xml index cb92b74be8..a66e4af297 100644 --- a/data/xml/2016.jeptalnrecital.xml +++ b/data/xml/2016.jeptalnrecital.xml @@ -4,7 +4,7 @@ Actes de la conférence conjointe JEP-TALN-RECITAL 2016. volume 1 : JEP LaurenceDanlos - ThierryHamon + ThierryHamon AFCP - ATALA
Paris, France
7 @@ -75,7 +75,7 @@ Alignement de séquences phonétiques pour une analyse phonologique des erreurs de transcription automatique (Phonetic sequences alignment for a phonemic analysis of automatic speech transcription errors ) fra CamilleDutrey - MartineAdda-Decker + MartineAdda-Decker NaomiYamaguchi 46–54 La transcription automatique de la parole obtient aujourd’hui des performances élevées avec des taux d’erreur qui tombent facilement en dessous de 10% pour une parole journalistique. Cependant, pour des conversations plus libres, ils stagnent souvent autour de 20–30%. En français, une grande partie des erreurs sont dues à des confusions entre homophones n’impliquant pas les niveaux acousticophonétique et phonologique. Cependant, de nombreuses erreurs peuvent s’expliquer par des variantes de productions non prévues par le système. Afin de mieux comprendre quels processus phonologiques pourraient expliquer ces variantes spécifiques de la parole spontanée, nous proposons une analyse des erreurs en comparant prononciations attendue (référence) et reconnue (hypothèse) via un alignement phonétique par programmation dynamique. Les distances locales entre paires de phonèmes appariés correspondent au nombre de traits phonétiques disjoints. Nos analyses permettent d’identifier les traits phonétiques les plus fréquemment impliqués dans les erreurs et donnent des pistes pour des interprétations phonologiques. @@ -107,8 +107,8 @@ KillianJanod MohamedMorchid RichardDufour - GeorgesLinarès - RenatoDe Mori + GeorgesLinarès + RenatoDe Mori 73–81 Les représentations de documents au moyen d’approches à base de réseaux de neurones ont montré des améliorations significatives dans de nombreuses tâches du traitement du langage naturel. Dans le cadre d’applications réelles, où des conditions d’enregistrement difficiles peuvent être rencontrées, la transcription automatique de documents parlés peut générer un nombre de mots mal transcrits important. Cet article propose une représentation des documents parlés très bruités utilisant des caractéristiques apprises par un auto-encodeur profond supervisé. La méthode proposée s’appuie à la fois sur les documents bruités et leur équivalent propre annoté manuellement pour estimer une représentation plus robuste des documents bruités. Cette représentation est évaluée sur le corpus DECODA sur une tâche de classification thématique de conversations téléphoniques atteignant une précision de 83% avec un gain d’environ 6%. 2016.jeptalnrecital-jep.9 @@ -166,7 +166,7 @@ Cartopho : un site web de cartographie de variantes de prononciation en français (Cartopho: a website for mapping pronunciation variants in <fixed-case>F</fixed-case>rench) fra - PhilippeBoula de Mareüil + PhilippeBoula de Mareüil Jean-PhilippeGoldman AlbertRilliard YvesScherrer @@ -181,7 +181,7 @@ fra OlivierGalibert JulietteKahn - SophieRosset + SophieRosset 128–136 Le travail que nous présentons ici s’inscrit dans le domaine de l’évaluation des systèmes de reconnaissance automatique de la parole en vue de leur utilisation dans une tâche aval, ici la reconnaissance des entités nommées. Plus largement, la question que nous nous posons est “que peut apporter une métrique d’évaluation en dehors d’un score ?". Nous nous intéressons particulièrement aux erreurs des systèmes et à leur analyse et éventuellement à l’utilisation de ce que nous connaissons de ces erreurs. Nous étudions dans ce travail les listes ordonnées d’erreurs générées à partir de différentes métriques et analysons ce qui en ressort. Nous avons appliqué la même méthode sur les sorties de différents systèmes de reconnaissance de la parole. Nos expériences mettent en évidence que certaines métriques apportent une information plus pertinente étant donné une tâche et transverse à différents systèmes. 2016.jeptalnrecital-jep.15 @@ -192,7 +192,7 @@ fra JonathanChevelu DamienLolive - Sébastien LeMaguer + Sébastien LeMaguer DavidGuennec 137–145 En proposant une nouvelle approche de synthèse de la parole, les études comportent généralement une évaluation subjective d’échantillons acoustiques produits par un système de référence et un nouveau système. Ces échantillons sont produits à partir d’un petit ensemble de phrases choisies aléatoirement dans un unique domaine. Ainsi, statistiquement, des échantillons pratiquement identiques sont présentés et réduisent les écarts de mesure entre les systèmes, au risque de les considérer comme non significatifs. Pour éviter cette problématique méthodologique, nous comparons deux systèmes sur des milliers d’échantillons de différents domaines. L’évaluation est réalisée uniquement sur les paires d’échantillons les plus pertinentes, c’est-à-dire les plus différentes acoustiquement. Cette méthode est appliquée sur un système de synthèse de type HTS et un second par sélection d’unités. La comparaison avec l’approche classique montre que cette méthode révèle des écarts qui jusqu’alors n’étaient pas significatifs. @@ -241,7 +241,7 @@ MohamedBouaziz MohamedMorchid RichardDufour - GeorgesLinarès + GeorgesLinarès ProsperCorrea 173–181 Cet article présente une méthode de prédiction de genres d’émissions télévisées couvrant 2 jours de diffusion de 4 chaînes TV françaises structurés en émissions annotées en genres. Ce travail traite des médias de masse de flux de chaînes télévisées et rejoint l’effort global d’extraction de connaissance à partir de cette grande quantité de données produites continuellement. Le corpus employé est fourni par l’entreprise EDD, anciennement appelée “L’Européenne de Données”, une entreprise spécialisée dans la gestion des flux multimédias. Les expériences détaillées dans cet article montrent qu’une approche simple fondée sur un modèle de n-grammes permet de prédire le genre d’une émission selon un historique avec une précision avoisinant les 50 %. @@ -254,7 +254,7 @@ DianeCaussade NathalieVallée NathalieHenrich Bernardoni - Jean-MarcColletta + Jean-MarcColletta SilvainGerber FrédériqueLetué Marie-JoséMartinez @@ -373,8 +373,8 @@ fra OlivierGalibert NathalieCamelin - PaulDeléglise - SophieRosset + PaulDeléglise + SophieRosset 274–282 Nous nous intéressons à l’évaluation de la qualité des systèmes de reconnaissance de la parole étant donné une tâche de compréhension. L’objectif de ce travail est de fournir un outil permettant la sélection d’un système de reconnaissance automatique de la parole le plus adapté pour un système de dialogue donné. Nous comparons ici différentes métriques, notamment le WER, NE-WER et ATENE métrique proposée récemment pour l’évaluation des systèmes de reconnaissance de la parole étant donné une tâche de reconnaissance d’entités nommées. Cette dernière métrique montrait une meilleure corrélation avec les résultats de la tâche globale que toutes les autres métriques testées. Nos mesures indiquent une très forte corrélation avec la mesure ATENE et une moins forte avec le WER. 2016.jeptalnrecital-jep.31 @@ -426,7 +426,7 @@ Etude par <fixed-case>EMA</fixed-case> des mouvements de la mâchoire inférieure durant les consonnes de l’arabe marocain (<fixed-case>EMA</fixed-case> study of jaw movements during <fixed-case>M</fixed-case>oroccan <fixed-case>A</fixed-case>rabic consonants) fra ChakirZeroual - PhilipHoole + PhilipHoole AdamantiosGafos 319–327 Cette étude est basée sur des données obtenues à l’aide d’EMA (AG500) enregistrant les mouvements de la mâchoire inférieure (Minf) durant les consonnes labiales, coronales, vélaires, uvulaires, pharyngales et laryngales de l’arabe marocain dans les contextes aCa et iCi. Nous avons montré que l’implication de la Minf est cruciale durant /s S t T/ (S T : consonnes emphatiques). Le recul de la racine de la langue n’est pas nécessairement corrélé à la baisse de la Minf. Les consonnes apicales ne sont pas toujours associées à l’abaissement de la Minf. La Minf ne semble pas impliquée durant les laryngales et les pharyngales, ce qui est en accord avec les déductions de Goldstein (1995). Les mouvements verticaux et horizontaux de la Minf sont relativement indépendants. @@ -481,8 +481,8 @@ Fusion d’espaces de représentations multimodaux pour la reconnaissance du rôle du locuteur dans des documents télévisuels (Multimodal embedding fusion for robust speaker role recognition in video broadcast ) fra SebastienDelecraz - FredericBechet - BenoitFavre + FredericBechet + BenoitFavre MickaelRouvier 364–372 L’identification du rôle d’un locuteur dans des émissions de télévision est un problème de classification de personne selon une liste de rôles comme présentateur, journaliste, invité, etc. À cause de la nonsynchronie entre les modalités, ainsi que par le manque de corpus de vidéos annotées dans toutes les modalités, seulement une des modalités est souvent utilisée. Nous présentons dans cet article une fusion multimodale des espaces de représentations de l’audio, du texte et de l’image pour la reconnaissance du rôle du locuteur pour des données asynchrones. Les espaces de représentations monomodaux sont entraînés sur des corpus de données exogènes puis ajustés en utilisant des réseaux de neurones profonds sur un corpus d’émissions françaises pour notre tâche de classification. Les expériences réalisées sur le corpus de données REPERE ont mis en évidence les gains d’une fusion au niveau des espaces de représentations par rapport aux méthodes de fusion tardive standard. @@ -574,7 +574,7 @@ EmmanuelFerreira AlexandreReiffers-Masson BassamJabaian - FabriceLefèvre + FabriceLefèvre 437–445 De nombreux modules de compréhension de la parole ont en commun d’être probabilistes et basés sur des algorithmes d’apprentissage automatique. Deux difficultés majeures, rencontrées par toutes les méthodes existantes sont : le coût de la collecte des données et l’adaptation d’un module existant à un nouveau domaine. Dans cet article, nous proposons un processus d’adaptation en ligne avec une politique apprise en utilisant un algorithme de type bandit contre un adversaire. Nous montrons que cette proposition peut permettre d’optimiser un équilibre entre le coût de la collecte des retours demandés aux utilisateurs et la performance globale de la compréhension du langage parlé après sa mise à jour. 2016.jeptalnrecital-jep.49 @@ -583,7 +583,7 @@ Patrons Rythmiques et Genres Littéraires en Synthèse de la Parole (How to improve rhythmic patterns according to literary genre in synthesized speech ⇤ ) fra - ElisabethDelais-Roussarie + ElisabethDelais-Roussarie DamienLolive HiyonYoo DavidGuennec @@ -680,7 +680,7 @@ Pics mélodiques prétoniques en portugais brésilien : une étude quantitative (Pre-stress pitch peaks in <fixed-case>B</fixed-case>razilian <fixed-case>P</fixed-case>ortuguese: a quantitative study) fra PlínioBarbosa - Philippe Boulade Mareüil + Philippe Boulade Mareüil 527–535 Le présent travail porte sur un trait prosodique assez typique du portugais brésilien : un pic mélodique en position prétonique en fin d’énoncé déclaratif. Il vise à quantifier le phénomène, à partir d’enregistrements de cinq hommes et cinq femmes de l’état de São Paulo, en lecture et en narration. Il en résulte que des montées sur les prétoniques de 4 demi-tons suivies de descentes de 8 demi-tons, en moyenne, s’observent dans les deux styles de parole, chez les femmes. Chez les hommes, ces valeurs sont respectivement de 3 et 7 demi-tons. Ces montées-descentes d’une tierce et d’une quinte, respectivement, peuvent donner au portugais brésilien cette musicalité particulière et, puisque les descentes sont plus rapides chez les femmes, elles ouvrent des perspectives sociolinguistiques intéressantes. 2016.jeptalnrecital-jep.59 @@ -704,7 +704,7 @@ AngéliqueAmelot GrégoireBachman CatherineHerrgott - MartineAdda-Decker + MartineAdda-Decker LiseCrevier-Buchman 545–553 Quelles sont les caractéristiques acoustiques et articulatoires des voyelles parlées et chantées du Cantu in Paghjella (polyphonie corse à trois voix), en fonction du chanteur, de la voyelle et de la fréquence fondamentale ? L’analyse acoustique des quatre premiers formants de la parole au chant et celle des mouvements articulatoires lingual et labial, montrent généralement (i) une significative augmentation de F1 avec abaissement lingual mais fermeture labiale, en lien avec une corrélation entre F0 et F1 ; (ii) une baisse de F2 pour les voyelles antérieures, une postériorisation linguale et un recul de l’ombre hyoïdienne uniquement pour le bassu ; (iii) une nette augmentation de F3 et F4 surtout chez le bassu ; (iv) une augmentation du Singing Power Ratio surtout chez les bassu et secunda. Ses valeurs sont toutefois inférieures à celles de chanteurs lyriques, et ne correspondant pas comme ces derniers à un rapprochement de F3 et F4. @@ -773,10 +773,10 @@ Réalisation phonétique et contraste phonologique marginal : une étude automatique des voyelles du roumain (Phonetic realization and marginal phonemic contrast : an automatic study of the <fixed-case>R</fixed-case>omanian vowels) fra - IoanaVasilescu + IoanaVasilescu MargaretRenwick CamilleDutrey - LoriLamel + LoriLamel BianaVieru 597–606 Cet article est dédié à l’analyse acoustique des voyelles du roumain : des productions en parole continue sont comparées à des prononciations “de laboratoire”. Les objectifs sont : (1) décrire les traits acoustiques des voyelles en fonction du style de parole ; (2) estimer la relation entre traits acoustiques et contrastes phonémiques de la langue ; (3) estimer dans quelle mesure l’étude de l’oral apporte des éclairages au sujet des attributs phonémiques des voyelles centrales [2] et [1], dont le statut (phonèmes vs allophones) est controversé. Nous montrons que les traits acoustiques sont comparables pour la parole journalistique vs contrôlée pour l’ensemble de l’inventaire sauf [2] et [1]. Dans la parole contrôlée [2] et [1] sont distinctes, mais confondues en faveur du timbre [2] à l’oral. La confusion de timbres n’est pas source d’inintelligibilité car [2] et [1] sont en distribution quasicomplémentaire. Ce résultat apporte des éclairages sur la question du contraste phonémique graduel et marginal (Goldsmith, 1995; Scobbie & Stuart-Smith, 2008; Hall, 2013). @@ -821,7 +821,7 @@ Rôle des contextes lexical et post-lexical dans la réalisation du schwa : apports du traitement automatique de grands corpus (Role of lexical and post-lexical contexts in <fixed-case>F</fixed-case>rench schwa realisations : benefits of automatic processing of large corpora ) fra YaruWu - MartineAdda-Decker + MartineAdda-Decker CécileFougeron 633–641 Le rôle du contexte est connu dans la réalisation ou non du schwa en français. Deux grands corpus oraux de parole journalistique (ETAPE) et de parole familière (NCCFr), dans lesquels la realisation de schwa est déterminée à partir d’un alignement automatique, ont été utilisés pour examiner la contribution du contexte au sein du mot contenant schwa (lexical) vs. au travers de la frontière avec le mot précédent (post-lexical). Nos résultats montrent l’importance du contexte pré-frontière dans l’explication de la chute du schwa dans la première syllabe d’un mot polysyllabique en parole spontanée. Si le mot précédant se termine par une consonne, nous pouvons faire appel à la loi des trois consonnes et au principe de sonorité pour expliquer des différences de comportement en fonction de la nature des consonnes en contact. @@ -832,7 +832,7 @@ Des Réseaux de Neurones avec Mécanisme d’Attention pour la Compréhension de la Parole (Exploring the use of Attention-Based Recurrent Neural Networks For Spoken Language Understanding ) fra EdwinSimonnet - PaulDeléglise + PaulDeléglise NathalieCamelin YannickEstève 642–650 @@ -849,7 +849,7 @@ RichardDufour KillianJanod Waad BenKheder - GeorgesLinarès + GeorgesLinarès 651–659 Les applications de compréhension du langage parlé sont moins performantes si les documents transcrits automatiquement contiennent un taux d’erreur-mot élevé. Des solutions récentes proposent de projeter ces transcriptions dans un espace de thèmes, comme par exemple l’allocation latente de Dirichlet (LDA), la LDA supervisée ainsi que le modèle author-topic (AT). Une représentation compacte originale, appelée c-vector, a été récemment introduite afin de surmonter la difficulté liée au choix de la taille de ces espaces thématiques. Cette représentation améliore la robustesse aux erreurs de transcription, en compactant les différentes représentations LDA d’un document parlé dans un espace réduit. Le défaut majeur de cette méthode est le nombre élevé de sous-tâches nécessaires à la construction de l’espace c-vector. Cet article propose de corriger ce défaut en utilisant un cadre original fondé sur un espace de caractéristiques robustes de faible dimension provenant d’un ensemble de modèles AT considérant à la fois le contenu du dialogue parlé (les mots) et la classe du document. Les expérimentations, conduites sur le corpus DECODA, montrent que la représentation proposée permet un gain de plus de 2.5 points en termes de conversations correctement classifiées. 2016.jeptalnrecital-jep.73 @@ -907,7 +907,7 @@ Sur les traces acoustiques de /ʃ/ et /ç/ en allemand <fixed-case>L</fixed-case>2 (Acoustic tracing of /<fixed-case>S</fixed-case>/ and /ç/ in <fixed-case>G</fixed-case>erman <fixed-case>L</fixed-case>2) fra JaneWottawa - MartineAdda-Decker + MartineAdda-Decker 696–704 Les apprenants français de l’allemand ont des difficultés à produire la fricative palatale sourde allemande /ç/ (Ich-Laut) et ont tendance à la remplacer par la fricative post-alvéolaire /S/. Nous nous demandons si avec des mesures acoustiques ces imprécisions de production peuvent être quantifiées d’une manière plus objective. Deux mesures acoustiques ont été examinées afin de distinguer au mieux /S/ et /ç/ dans un contexte VC en position finale de mot dans des productions de locuteurs germanophones natifs. Elles servent ensuite à quantifier les difficultés de production des apprenants français. 285 tokens de 20 locuteurs natifs et 20 locuteurs L2 ont été analysés. Les mesures appliquées sont le centre de gravité spectral et des rapports d’intensité par bande de fréquence. Sur les productions de locuteurs natifs, les résultats montrent que la mesure la plus fiable pour distinguer acoustiquement /S/ et /ç/ est le ratio d’intensité entre fréquences hautes (4-7 kHz) et basses (1-4 kHz). Les mesures confirment également les difficultés de production des locuteurs natifs français. 2016.jeptalnrecital-jep.78 @@ -927,7 +927,7 @@ De l’utilisation de descripteurs issus de la linguistique computationnelle dans le cadre de la synthèse par <fixed-case>HMM</fixed-case> (Toward the use of information density based descriptive features in <fixed-case>HMM</fixed-case> based speech synthesis) fra - Sébastien LeMaguer + Sébastien LeMaguer BerndMoebius IngmarSteiner DamienLolive @@ -944,7 +944,7 @@ NathalieCamelin CamilleDutrey FabianSantiago - MartineAdda-Decker + MartineAdda-Decker 723–731 Récemment, l’utilisation des représentations continues de mots a connu beaucoup de succès dans plusieurs tâches de traitement du langage naturel. Dans cet article, nous proposons d’étudier leur utilisation dans une architecture neuronale pour la tâche de détection des erreurs au sein de transcriptions automatiques de la parole. Nous avons également expérimenté et évalué l’utilisation de paramètres prosodiques en suppléments des paramètres classiques (lexicaux, syntaxiques, . . .). La principale contribution de cet article porte sur la combinaison de différentes représentations continues de mots : plusieurs approches de combinaison sont proposées et évaluées afin de tirer profit de leurs complémentarités. Les expériences sont effectuées sur des transcriptions automatiques du corpus ETAPE générées par le système de reconnaissance automatique du LIUM. Les résultats obtenus sont meilleurs que ceux d’un système état de l’art basé sur les champs aléatoires conditionnels. Pour terminer, nous montrons que la mesure de confiance produite est particulièrement bien calibrée selon une évaluation en terme d’Entropie Croisée Normalisée (NCE). 2016.jeptalnrecital-jep.81 @@ -1009,7 +1009,7 @@ Actes de la conférence conjointe JEP-TALN-RECITAL 2016. volume 2 : TALN (Articles longs) LaurenceDanlos - ThierryHamon + ThierryHamon AFCP - ATALA
Paris, France
7 @@ -1036,8 +1036,8 @@ <fixed-case>B</fixed-case>leu, contusion, ecchymose : tri automatique de synonymes en fonction de leur difficulté de lecture et compréhension (Automatic ranking of synonyms according to their reading and comprehension difficulty) fra ThomasFrancois - Mokhtar B.Billami - NúriaGala + Mokhtar B.Billami + NúriaGala DelphineBernhard 15–28 La lisibilité d’un texte dépend fortement de la difficulté des unités lexicales qui le composent. La simplification lexicale vise ainsi à remplacer les termes complexes par des équivalents sémantiques plus simples à comprendre : par exemple, BLEU (‘résultat d’un choc’) est plus simple que CONTUSION ou ECCHYMOSE. Il est pour cela nécessaire de disposer de ressources qui listent des synonymes pour des sens donnés et les trient par ordre de difficulté. Cet article décrit une méthode pour constituer une ressource de ce type pour le français. Les listes de synonymes sont extraites de BabelNet et de JeuxDeMots, puis triées grâce à un algorithme statistique d’ordonnancement. Les résultats du tri sont évalués par rapport à 36 listes de synonymes ordonnées manuellement par quarante annotateurs. @@ -1059,7 +1059,7 @@ Construire un lexique de sentiments par crowdsourcing et propagation (Building a sentiment lexicon through crowdsourcing and spreading) fra MathieuLafourcade - Nathalie LeBrun + Nathalie LeBrun AlainJoubert 43–56 Cet article présente une méthode de construction d’une ressource lexicale de sentiments/émotions. Son originalité est d’associer le crowdsourcing via un GWAP (Game With A Purpose) à un algorithme de propagation, les deux ayant pour support et source de données le réseau lexical JeuxDeMots. Nous décrivons le jeu permettant de collecter des informations de sentiments, ainsi que les principes et hypothèses qui sous-tendent le fonctionnement de l’algorithme qui les propage au sein du réseau. Enfin, nous donnons les résultats quantitatifs et expliquons les méthodes d’évaluation qualitative des données obtenues, à la fois par le jeu et par la propagation par l’algorithme. Ces méthodes incluent une comparaison avec Emolex, une autre ressource de sentiments/émotions. @@ -1069,9 +1069,9 @@ Détection de concepts pertinents pour le résumé automatique de conversations par recombinaison de patrons (Relevant concepts detection for the automatic summary of conversations using patterns recombination ) fra - JérémyTrione - BenoitFavre - FredericBechet + JérémyTrione + BenoitFavre + FredericBechet 57–69 automatique de conversations par recombinaison de patrons Jérémy Trione Benoit Favre Frédéric Béchet Aix-Marseille Université, CNRS, LIF UMR 7279, 13000, Marseille, France prénom.nom@lif.univ-mrs.fr R ÉSUMÉ Ce papier décrit une approche pour créer des résumés de conversations parlées par remplissage de patrons. Les patrons sont générés automatiquement à partir de fragments généralisés depuis un corpus de résumés d’apprentissage. Les informations nécessaires pour remplir les patrons sont détectées dans les transcriptions des conversations et utilisées pour sélectionner les fragments candidats. L’approche obtient un score ROUGE-2 de 0.116 sur le corpus RATP-DECODA. Les résultats obtenus montrent que cette approche abstractive est plus performante que les approches extractives utilisées habituellement dans le domaine du résumé automatique. 2016.jeptalnrecital-long.5 @@ -1095,7 +1095,7 @@ fra NasredineSemmar OthmanZennaki - MeriamaLaib + MeriamaLaib 84–97 La traduction automatique statistique bien que performante est aujourd’hui limitée parce qu’elle nécessite de gros volumes de corpus parallèles qui n’existent pas pour tous les couples de langues et toutes les spécialités et que leur production est lente et coûteuse. Nous présentons, dans cet article, un prototype d’un moteur de traduction à base d’exemples utilisant la recherche d’information interlingue et ne nécessitant qu’un corpus de textes en langue cible. Plus particulièrement, nous proposons d’étudier l’impact d’un lexique bilingue de spécialité sur la performance de ce prototype. Nous évaluons ce prototype de traduction et comparons ses résultats à ceux du système de traduction statistique Moses en utilisant les corpus parallèles anglais-français Europarl (European Parliament Proceedings) et Emea (European Medicines Agency Documents). Les résultats obtenus montrent que le score BLEU du prototype du moteur de traduction à base d’exemples est proche de celui du système Moses sur des documents issus du corpus Europarl et meilleur sur des documents extraits du corpus Emea. 2016.jeptalnrecital-long.7 @@ -1116,7 +1116,7 @@ fra VincentLetard GabrielIllouz - SophieRosset + SophieRosset 112–124 Cet article examine l’utilisation du raisonnement analogique dans le contexte de l’apprentissage incrémental. Le problème d’apprentissage sous-jacent développé est le transfert de requêtes formulées en langue naturelle vers des commandes dans un langage de programmation. Nous y explorons deux questions principales : Comment se comporte le raisonnement par analogie dans le contexte de l’apprentissage incrémental ? De quelle manière la séquence d’apprentissage influence-t-elle la performance globale ? Pour y répondre, nous proposons un protocole expérimental simulant deux utilisateurs et différentes séquences d’apprentissage. Nous montrons que l’ordre dans la séquence d’apprentissage incrémental n’a d’influence notable que sous des conditions spécifiques. Nous constatons également la complémentarité de l’apprentissage incrémental avec l’analogie pour un nombre d’exemples d’apprentissage minimal. 2016.jeptalnrecital-long.9 @@ -1135,8 +1135,8 @@ Évaluation dune nouvelle structuration thématique hiérarchique des textes dans un cadre de résumé automatique et de détection d’ancres au sein de vidéos (Evaluation of a novel hierarchical thematic structuring of texts in the framework of text summarization and anchor detection for video hyperlinking ) fra - AncaSimon - GuillaumeGravier + AncaSimon + GuillaumeGravier PascaleSébillot 139–152 automatique et de détection d’ancres au sein de vidéos Anca Simon1 Guillaume Gravier2 Pascale Sébillot3 (1) Université de Rennes 1, IRISA & INRIA Rennes, Campus de Beaulieu, 35042 Rennes, France (2) CNRS, IRISA & INRIA Rennes, Campus de Beaulieu, 35042 Rennes, France (3) INSA, IRISA & INRIA Rennes, Campus de Beaulieu, 35042 Rennes, France anca.simon@irisa.fr, guillaume.gravier@irisa.fr, pascale.sebillot@irisa.fr R ÉSUMÉ Dans cet article, nous évaluons, à travers son intérêt pour le résumé automatique et la détection d’ancres dans des vidéos, le potentiel d’une nouvelle structure thématique extraite de données textuelles, composée d’une hiérarchie de fragments thématiquement focalisés. Cette structure est produite par un algorithme exploitant les distributions temporelles d’apparition des mots dans les textes en se fondant sur une analyse de salves lexicales. La hiérarchie obtenue a pour objet de filtrer le contenu non crucial et de ne conserver que l’information saillante des textes, à différents niveaux de détail. Nous montrons qu’elle permet d’améliorer la production de résumés ou au moins de maintenir les résultats de l’état de l’art, tandis que pour la détection d’ancres, elle nous conduit à la meilleure précision dans le contexte de la tâche Search and Anchoring in Video Archives à MediaEval. Les expériences sont réalisées sur du texte écrit et sur un corpus de transcriptions automatiques d’émissions de télévision. @@ -1169,7 +1169,7 @@ fra AlexisLinard EmmanuelMorin - BéatriceDaille + BéatriceDaille 180–193 L’extraction de lexiques bilingues à partir de corpus comparables se réalise traditionnellement en s’appuyant sur deux langues. Des travaux précédents en extraction de lexiques bilingues à partir de corpus parallèles ont démontré que l’utilisation de plus de deux langues peut être utile pour améliorer la qualité des alignements extraits. Nos travaux montrent qu’il est possible d’utiliser la même stratégie pour des corpus comparables. Nous avons défini deux méthodes originales impliquant des langues pivots et nous les avons évaluées sur quatre langues et deux langues pivots en particulier. Nos expérimentations ont montré que lorsque l’alignement entre la langue source et la langue pivot est de bonne qualité, l’extraction du lexique en langue cible s’en trouve améliorée. 2016.jeptalnrecital-long.14 @@ -1205,7 +1205,7 @@ AnaïsTack ThomasFrançois Anne-LaureLigozat - CédrickFairon + CédrickFairon 221–234 Cette étude examine l’utilisation de méthodes d’apprentissage incrémental supervisé afin de prédire la compétence lexicale d’apprenants de français langue étrangère (FLE). Les apprenants ciblés sont des néerlandophones ayant un niveau A2/B1 selon le Cadre européen commun de référence pour les langues (CECR). À l’instar des travaux récents portant sur la prédiction de la maîtrise lexicale à l’aide d’indices de complexité, nous élaborons deux types de modèles qui s’adaptent en fonction d’un retour d’expérience, révélant les connaissances de l’apprenant. En particulier, nous définissons (i) un modèle qui prédit la compétence lexicale de tous les apprenants du même niveau de maîtrise et (ii) un modèle qui prédit la compétence lexicale d’un apprenant individuel. Les modèles obtenus sont ensuite évalués par rapport à un modèle de référence déterminant la compétence lexicale à partir d’un lexique spécialisé pour le FLE et s’avèrent gagner significativement en exactitude (9%-17%). 2016.jeptalnrecital-long.17 @@ -1216,7 +1216,7 @@ fra AdrienBougouin FlorianBoudin - BeatriceDaille + BeatriceDaille 235–247 Dans cet article, nous nous intéressons à l’indexation de documents de domaines de spécialité par l’intermédiaire de leurs termes-clés. Plus particulièrement, nous nous intéressons à l’indexation telle qu’elle est réalisée par les documentalistes de bibliothèques numériques. Après analyse de la méthodologie de ces indexeurs professionnels, nous proposons une méthode à base de graphe combinant les informations présentes dans le document et la connaissance du domaine pour réaliser une indexation (hybride) libre et contrôlée. Notre méthode permet de proposer des termes-clés ne se trouvant pas nécessairement dans le document. Nos expériences montrent aussi que notre méthode surpasse significativement l’approche à base de graphe état de l’art. 2016.jeptalnrecital-long.18 @@ -1237,7 +1237,7 @@ Prédiction automatique de fonctions pragmatiques dans les reformulations (Automatic prediction of pragmatic functions in reformulations) fra NataliaGrabar - IrisEshkol-Taravella + IrisEshkol-Taravella 262–275 La reformulation participe à la structuration du discours, notamment dans le cas des dialogues, et contribue également à la dynamique du discours. Reformuler est un acte significatif qui poursuit des objectifs précis. L’objectif de notre travail est de prédire automatiquement la raison pour laquelle un locuteur effectue une reformulation. Nous utilisons une classification de onze fonctions pragmatiques inspirées des travaux existants et des données analysées. Les données de référence sont issues d’annotations manuelles et consensuelles des reformulations spontanées formées autour de trois marqueurs (c’est-à-dire, je veux dire, disons). Les données proviennent d’un corpus oral et d’un corpus de discussions sur les forums de santé. Nous exploitons des algorithmes de catégorisation supervisée et un ensemble de plusieurs descripteurs (syntaxiques, formels, sémantiques et discursifs) pour prédire les catégories de reformulation. La distribution des énoncés et phrases selon les catégories n’est pas homogène. Les expériences sont positionnées à deux niveaux : générique et spécifique. Nos résultats indiquent qu’il est plus facile de prédire les types de fonctions au niveau générique (la moyenne des F-mesures est autour de 0,80), qu’au niveau des catégories individuelles (la moyenne des F-mesures est autour de 0,40). L’influence de différents paramètres est étudiée. 2016.jeptalnrecital-long.20 @@ -1248,7 +1248,7 @@ fra OthmanZennaki NasredineSemmar - LaurentBesacier + LaurentBesacier 276–289 Nos travaux portent sur la construction rapide d’outils d’analyse linguistique pour des langues peu dotées en ressources. Dans une précédente contribution, nous avons proposé une méthode pour la construction automatique d’un analyseur morpho-syntaxique via une projection interlingue d’annotations linguistiques à partir de corpus parallèles (méthode fondée sur les réseaux de neurones récurrents). Nous présentons, dans cet article, une amélioration de notre modèle neuronal, avec la prise en compte d’informations linguistiques externes pour un annotateur plus complexe. En particulier, nous proposons d’intégrer des annotations morpho-syntaxiques dans notre architecture neuronale pour l’apprentissage non supervisé d’annotateurs sémantiques multilingues à gros grain (annotation en SuperSenses). Nous montrons la validité de notre méthode et sa généricité sur l’italien et le français et étudions aussi l’impact de la qualité du corpus parallèle sur notre approche (généré par traduction manuelle ou automatique). Nos expériences portent sur la projection d’annotations de l’anglais vers le français et l’italien. 2016.jeptalnrecital-long.21 @@ -1269,10 +1269,10 @@ <fixed-case>W</fixed-case>ord2<fixed-case>V</fixed-case>ec vs <fixed-case>DB</fixed-case>nary ou comment (ré)concilier représentations distribuées et réseaux lexico-sémantiques ? Le cas de l’évaluation en traduction automatique (<fixed-case>W</fixed-case>ord2<fixed-case>V</fixed-case>ec vs <fixed-case>DB</fixed-case>nary or how to bring back together vector representations and lexical resources ? A case study for machine translation evaluation) fra - ChristopheServan + ChristopheServan ZiedElloumi - HervéBlanchon - LaurentBesacier + HervéBlanchon + LaurentBesacier 304–317 Cet article présente une approche associant réseaux lexico-sémantiques et représentations distribuées de mots appliquée à l’évaluation de la traduction automatique. Cette étude est faite à travers l’enrichissement d’une métrique bien connue pour évaluer la traduction automatique (TA) : METEOR. METEOR permet un appariement approché (similarité morphologique ou synonymie) entre une sortie de système automatique et une traduction de référence. Nos expérimentations s’appuient sur la tâche Metrics de la campagne d’évaluation WMT 2014 et montrent que les représentations distribuées restent moins performantes que les ressources lexico-sémantiques pour l’évaluation en TA mais peuvent néammoins apporter un complément d’information intéressant à ces dernières. 2016.jeptalnrecital-long.23 @@ -1283,7 +1283,7 @@ Actes de la conférence conjointe JEP-TALN-RECITAL 2016. volume 2 : TALN (Posters) LaurenceDanlos - ThierryHamon + ThierryHamon AFCP - ATALA
Paris, France
7 @@ -1298,7 +1298,7 @@ Amélioration de la traduction automatique d’un corpus annoté (Improvement of the automatic translation of an annotated corpus) fra Marwa HadjSalah - HervéBlanchon + HervéBlanchon MounirZrigui DidierSchwab 318–324 @@ -1310,7 +1310,7 @@ Analyse d’une tâche de substitution lexicale : quelles sont les sources de difficulté ? (Difficulty analysis for a lexical substitution task) fra LudovicTanguy - CécileFabre + CécileFabre CamilleMercier 325–332 Nous proposons dans cet article une analyse des résultats de la campagne SemDis 2014 qui proposait une tâche de substitution lexicale en français. Pour les 300 phrases du jeu de test, des annotateurs ont proposé des substituts à un mot cible, permettant ainsi d’établir un gold standard sur lequel les systèmes participants ont été évalués. Nous cherchons à identifier les principales caractéristiques des items du jeu de test qui peuvent expliquer les variations de performance pour les humains comme pour les systèmes, en nous basant sur l’accord inter-annotateurs des premiers et les scores de rappel des seconds. Nous montrons que si plusieurs caractéristiques communes sont associées aux deux types de difficulté (rareté du sens dans lequel le mot-cible est employé, fréquence d’emploi du mot-cible), d’autres sont spécifiques aux systèmes (degré de polysémie du mot-cible, complexité syntaxique). @@ -1320,7 +1320,7 @@ L’anti-correcteur : outil d’évaluation positive de l’orthographe et de la grammaire (The ”anticorrecteur”: a positive evaluation module for spell and grammar checking) fra - Lydia-MaiHo-Dac + Lydia-MaiHo-Dac SophieMuller ValentineDelbar 333–341 @@ -1342,7 +1342,7 @@ Approximate unsupervised summary optimisation for selections of <fixed-case>ROUGE</fixed-case> NatalieSchluter - HéctorMartínez Alonso + HéctorMartínez Alonso 349–354 Approximate summary optimisation for selections of ROUGE It is standard to measure automatic summariser performance using the ROUGE metric. Unfortunately, ROUGE is not appropriate for unsupervised summarisation approaches. On the other hand, we show that it is possible to optimise approximately for ROUGE-n by using a document-weighted ROUGE objective. Doing so results in state-of-the-art summariser performance for single and multiple document summaries for both English and French. This is despite a non-correlation of the documentweighted ROUGE metric with human judgments, unlike the original ROUGE metric. These findings suggest a theoretical approximation link between the two metrics. 2016.jeptalnrecital-poster.5 @@ -1352,7 +1352,7 @@ L’architecture d’un modèle hybride pour la normalisation de <fixed-case>SMS</fixed-case> (A hybrid model architecture for <fixed-case>SMS</fixed-case> normalization) fra EleniKogkitsidou - GeorgesAntoniadis + GeorgesAntoniadis 355–363 La communication par SMS (Short Message Service), aussi bien que tout autre type de communication virtuelle sous forme de textes courts (mails, microblogs, tweets, etc.), présente certaines particularités spécifiques (syntaxe irrégulière, fusionnement et phonétisation de mots, formes abrégées, etc.). A cause de ces caractéristiques, l’application d’outils en Traitement Automatique du Langage (TAL) rend difficile l’exploitation d’informations utiles contenues dans des messages bruités. Nous proposons un modèle de normalisation en deux étapes fondé sur une approche symbolique et statistique. La première partie vise à produire une représentation intermédiaire du message SMS par l’application des grammaires locales, tandis que la deuxième utilise un système de traduction automatique à base de règles pour convertir la représentation intermédiaire vers une forme standard. 2016.jeptalnrecital-poster.6 @@ -1361,7 +1361,7 @@ Une catégorisation de fins de lignes non-supervisée (End-of-line classification with no supervision) fra - PierreZweigenbaum + PierreZweigenbaum CyrilGrouin ThomasLavergne 364–371 @@ -1375,7 +1375,7 @@ AdelineMüller ThomasFrancois SophieRoekhaut - CedrickFairon + CedrickFairon 372–380 Cet article présente une approche visant à évaluer automatiquement la difficulté de dictées en vue de les intégrer dans une plateforme d’apprentissage de l’orthographe. La particularité de l’exercice de la dictée est de devoir percevoir du code oral et de le retranscrire via le code écrit. Nous envisageons ce double niveau de difficulté à l’aide de 375 variables mesurant la difficulté de compréhension d’un texte ainsi que les phénomènes orthographiques et grammaticaux complexes qu’il contient. Un sous-ensemble optimal de ces variables est combiné à l’aide d’un modèle par machines à vecteurs de support (SVM) qui classe correctement 56% des textes. Les variables lexicales basées sur la liste orthographique de Catach (1984) se révèlent les plus informatives pour le modèle. 2016.jeptalnrecital-poster.8 @@ -1395,7 +1395,7 @@ Comparing Named-Entity Recognizers in a Targeted Domain: Handcrafted Rules vs Machine Learning IoannisPartalas CédricLopez - FrédériqueSegond + FrédériqueSegond 389–395 Comparing Named-Entity Recognizers in a Targeted Domain : Handcrafted Rules vs. Machine Learning Named-Entity Recognition concerns the classification of textual objects in a predefined set of categories such as persons, organizations, and localizations. While Named-Entity Recognition is well studied since 20 years, the application to specialized domains still poses challenges for current systems. We developed a rule-based system and two machine learning approaches to tackle the same task : recognition of product names, brand names, etc., in the domain of Cosmetics, for French. Our systems can thus be compared under ideal conditions. In this paper, we introduce both systems and we compare them. 2016.jeptalnrecital-poster.10 @@ -1427,7 +1427,7 @@ Description de la juxtaposition en Langue des Signes Française à partir d’une grammaire récursive (The present communication tackles formal grammar developpement of <fixed-case>F</fixed-case>rench <fixed-case>S</fixed-case>ign <fixed-case>L</fixed-case>anguage (<fixed-case>LSF</fixed-case>)) fra - Mohamed NassimeHadjadj + Mohamed NassimeHadjadj MichaelFilhol 411–418 La présente communication s’inscrit dans le cadre du développement d’une grammaire formelle pour la langue des signes française (LSF). Générer automatiquement des énoncés en LSF implique la définition de certaines règles de production pour synchroniser les différents articulateurs du corps, signes, mouvements, etc. Cet article présente dans sa première partie notre méthodologie pour définir des règles de production à partir d’une étude de corpus. Dans la deuxième partie nous présenterons notre étude qui portera sur deux règles de production pour juxtaposer quelques types de structures en LSF. Nous finissons par une discussion sur la nature et l’apport de notre démarche par rapport aux approches existantes. @@ -1457,7 +1457,7 @@ Étiquetage multilingue en parties du discours avec <fixed-case>ME</fixed-case>lt (Multilingual part-of-speech tagging with <fixed-case>ME</fixed-case>lt) fra - BenoîtSagot + BenoîtSagot 435–442 Nous présentons des travaux récents réalisés autour de MElt, système discriminant d’étiquetage en parties du discours. MElt met l’accent sur l’exploitation optimale d’informations lexicales externes pour améliorer les performances des étiqueteurs par rapport aux modèles entraînés seulement sur des corpus annotés. Nous avons entraîné MElt sur plus d’une quarantaine de jeux de données couvrant plus d’une trentaine de langues. Comparé au système état-de-l’art MarMoT, MElt obtient en moyenne des résultats légèrement moins bons en l’absence de lexique externe, mais meilleurs lorsque de telles ressources sont disponibles, produisant ainsi des étiqueteurs état-de-l’art pour plusieurs langues. 2016.jeptalnrecital-poster.16 @@ -1469,7 +1469,7 @@ GrégoireJadi LauraMonceaux VincentClaveau - BéatriceDaille + BéatriceDaille 443–450 Dans cet article, nous présentons le développement d’un système d’extraction d’expressions-cibles pour l’anglais et sa transposition au français. En complément, nous avons réalisé une étude de l’efficacité des traits en anglais et en français qui tend à montrer qu’il est possible de réaliser un système d’extraction d’expressions-cibles indépendant du domaine. Pour finir, nous proposons une analyse comparative des erreurs commises par nos systèmes en anglais et français et envisageons différentes solutions à ces problèmes. 2016.jeptalnrecital-poster.17 @@ -1480,7 +1480,7 @@ fra JosephLark EmmanuelMorin - Sebastián PeñaSaldarriaga + Sebastián PeñaSaldarriaga 451–458 Nous détectons dans des corpus d’avis clients en français des expressions d’opinion ne contenant pas de marqueur d’opinion explicitement positif ou négatif. Nous procédons pour cela en deux étapes en nous appuyant sur des méthodes existantes : nous identifions ces expressions à l’aide de fenêtres de mots puis nous les classifions en polarité. Le processus global présente des résultats satisfaisants pour notre cadre applicatif demandant une haute précision. 2016.jeptalnrecital-poster.18 @@ -1491,7 +1491,7 @@ fra JulienTourille OlivierFerret - AurélieNévéol + AurélieNévéol XavierTannier 459–466 L’analyse temporelle des documents cliniques permet d’obtenir des représentations riches des informations contenues dans les dossiers électroniques patient. Cette analyse repose sur l’extraction d’événements, d’expressions temporelles et des relations entre eux. Dans ce travail, nous considérons que nous disposons des événements et des expressions temporelles pertinents et nous nous intéressons aux relations temporelles entre deux événements ou entre un événement et une expression temporelle. Nous présentons des modèles de classification supervisée pour l’extraction de des relations en français et en anglais. Les performances obtenues sont comparables dans les deux langues, suggérant ainsi que différents domaines cliniques et différentes langues pourraient être abordés de manière similaire. @@ -1503,9 +1503,9 @@ fra WafaNeifar ThierryHamon - PierreZweigenbaum - MariemEllouze - Lamia HadrichBelguith + PierreZweigenbaum + MariemEllouze + Lamia HadrichBelguith 467–474 Nous présentons, dans cet article, une adaptation à l’arabe standard moderne d’un extracteur de termes pour le français et l’anglais. L’adaptation a d’abord consisté à décrire le processus d’extraction des termes de manière similaire à celui défini pour l’anglais et le français en prenant en compte certains particularités morpho-syntaxiques de la langue arabe. Puis, nous avons considéré le phénomène de l’agglutination de la langue arabe. L’évaluation a été réalisée sur un corpus de textes médicaux. Les résultats montrent que parmi 400 termes candidats maximaux analysés, 288 sont jugés corrects par rapport au domaine (72,1%). Les erreurs d’extraction sont dues à l’étiquetage morpho-syntaxique et à la non-voyellation des textes mais aussi à des phénomènes d’agglutination. 2016.jeptalnrecital-poster.20 @@ -1536,7 +1536,7 @@ Investigating gender adaptation for speech translation RachelBawden GuillaumeWisniewski - HélèneMaynard + HélèneMaynard 490–497 In this paper we investigate the impact of the integration of context into dialogue translation. We present a new contextual parallel corpus of television subtitles and show how taking into account speaker gender can significantly improve machine translation quality in terms of B LEU and M ETEOR scores. We perform a manual analysis, which suggests that these improvements are not necessary related to the morphological consequences of speaker gender, but to more general linguistic divergences. 2016.jeptalnrecital-poster.23 @@ -1557,7 +1557,7 @@ Mise au point d’une méthode d’annotation morphosyntaxique fine du serbe (Developping a method for detailed morphosyntactic tagging of <fixed-case>S</fixed-case>erbian) fra AleksandraMiletic - CécileFabre + CécileFabre DejanStosic 506–513 Cet article présente une expérience d’annotation morphosyntaxique fine du volet serbe du corpus parallèle ParCoLab (corpus serbe-français-anglais). Elle a consisté à enrichir une annotation existante en parties du discours avec des traits morphosyntaxiques fins, afin de préparer une étape ultérieure de parsing. Nous avons comparé trois approches : 1) annotation manuelle ; 2) préannotation avec un étiqueteur entraîné sur le croate suivie d’une correction manuelle ; 3) réentraînement de l’outil sur un petit échantillon validé du corpus, suivi de l’annotation automatique et de la correction manuelle. Le modèle croate maintient une stabilité globale en passant au serbe, mais les différences entre les deux jeux d’étiquettes exigent des interventions manuelles importantes. Le modèle ré-entraîné sur un échantillon de taille limité (20K tokens) atteint la même exactitude que le modèle existant et le gain de temps observé montre que cette méthode optimise la phase de correction. @@ -1595,7 +1595,7 @@ VictorPineau ConstanceNin SolenQuiniou - BéatriceDaille + BéatriceDaille 531–538 La segmentation d’un texte en rhèses, unités-membres signifiantes de la phrase, permet de fournir des adaptations de celui-ci pour faciliter la lecture aux personnes dyslexiques. Dans cet article, nous proposons une méthode d’identification automatique des rhèses basée sur un apprentissage supervisé à partir d’un corpus que nous avons annoté. Nous comparons celle-ci à l’identification manuelle ainsi qu’à l’utilisation d’outils et de concepts proches, tels que la segmentation d’un texte en chunks. 2016.jeptalnrecital-poster.28 @@ -1638,7 +1638,7 @@ Actes de la conférence conjointe JEP-TALN-RECITAL 2016. volume 3 : RECITAL LaurenceDanlos - ThierryHamon + ThierryHamon AFCP - ATALA
Paris, France
7 @@ -1727,7 +1727,7 @@ Actes de la conférence conjointe JEP-TALN-RECITAL 2016. Volume 4 : Conférences invitées LaurenceDanlos - ThierryHamon + ThierryHamon AFCP - ATALA
Paris, France
7 @@ -1748,7 +1748,7 @@
From Human Language Technology to Human Language Science - MarkLiberman + MarkLiberman 3–3 Thirty years ago, in order to get past roadblocks in Machine Translation and Automatic Speech Recognition, DARPA invented a new way to organize and manage technological R&D : a “common task” is defined by a formal quantitative evaluation metric and a body of shared training data, and researchers join an open competition to compare approaches. Over the past three decades, this method has produced steadily improving technologies, with many practical applications now possible. And Moore’s law has created a sort of digital shadow universe, which increasingly mirrors the real world in flows and stores of bits, while the same improvements in digital hardware and software make it increasingly easy to pull content out of the these rivers and oceans of information. It’s natural to be excited about these technologies, where we can see an open road to rapid improvements beyond the current state of the art, and an explosion of near-term commercial applications. But there are some important opportunities in a less obvious direction. Several areas of scientific and humanistic research are being revolutionized by the application of Human Language Technology. At a minimum, orders of magnitude more data can be addressed with orders of magnitude less effort - but this change also transforms old theoretical questions, and poses new ones. And eventually, new modes of research organization and funding are likely to emerge.. 2016.jeptalnrecital-invite.2 @@ -1759,7 +1759,7 @@ Actes de la conférence conjointe JEP-TALN-RECITAL 2016. volume 5 : Démonstrations LaurenceDanlos - ThierryHamon + ThierryHamon AFCP - ATALA
Paris, France
7 @@ -1795,7 +1795,7 @@ fra PaulBui-Quang BrigitteGrau - PatrickParoubek + PatrickParoubek 6–8 AppFM 1 est un outil à mi-chemin entre un environnement de création de chaînes modulaires de TAL et un gestionnaire de services systèmes. Il permet l’intégration d’applications ayant des dépendances complexes en des chaînes de traitements réutilisables facilement par le biais de multiples interfaces. 2016.jeptalnrecital-demo.3 @@ -1834,7 +1834,7 @@ Exploration de collections d’archives multimédia dans le contexte des Humanités Numériques : revisiter <fixed-case>TALN</fixed-case>’2015 ? (Exploring multimedia archives in the context of Digital Humanities: browsing <fixed-case>TALN</fixed-case>’2015?) fra - GéraldineDamnati + GéraldineDamnati MarcDenjean DelphineCharlet 18–20 @@ -1855,7 +1855,7 @@ Héloïse, une plate-forme pour développer des systèmes de <fixed-case>TA</fixed-case> compatibles Ariane en réseau (Heloise, a platform for collaborative development of Ariane-compatible <fixed-case>MT</fixed-case> systems) fra VincentBerment - ChristianBoitet + ChristianBoitet Guillaumede Malézieux 24–26 Dans cette démo, nous montrons comment utiliser Héloïse pour développer des systèmes de TA. @@ -1914,7 +1914,7 @@ fra LukaNerima VioletaSeretan - EricWehrli + EricWehrli 37–39 Cette démonstration présente la version web d’un outil multilingue d’extraction de collocations. Elle est destinée aux lexicographes, aux traducteurs, aux enseignants et apprenants L2 et, plus généralement, aux linguistes désireux d’analyser et d’exploiter leurs propres corpus. 2016.jeptalnrecital-demo.14 @@ -1959,7 +1959,7 @@ GuillaumeDubuisson Duplessis VincentLetard Anne-LaureLigozat - SophieRosset + SophieRosset 49–51 Cette démonstration présente un système de dialogue en domaine ouvert qui utilise une base d’exemples de dialogue automatiquement constituée depuis un corpus de sous-titres afin de gérer un dialogue social de type « chatbot ». 2016.jeptalnrecital-demo.18 @@ -1972,7 +1972,7 @@ ElenaManishina MaxenceBusson FabriceMaurel - StephaneFerrari + StephaneFerrari 52–54 Dans cette démonstration, nous proposons un système qui permettrait aux utilisateurs non-voyants d’obtenir le first glance d’une page web. L’objectif est de réduire le temps d’accès à la structure logico-thématique de la page et de favoriser le développement de stratégies de lecture de haut niveau. Notre concept, appelé Tag Thunder, s’appuie sur une phase de segmentation de la page en zones, suivie d’une étape de représentation des zones par un mot ou groupe de mots, puis une vocalisation simultanée de ces représentants. 2016.jeptalnrecital-demo.19 diff --git a/data/xml/2016.lilt.xml b/data/xml/2016.lilt.xml index fba0e49c06..833b2fa670 100644 --- a/data/xml/2016.lilt.xml +++ b/data/xml/2016.lilt.xml @@ -19,8 +19,8 @@ Many speakers, many worlds: Interannotator variations in the quantification of feature norms - AurélieHerbelot - Eva MariaVecchi + AurélieHerbelot + Eva MariaVecchi Quantification (see e.g. Peters and Westerst ̊ahl, 2006) is probably one of the most extensively studied phenomena in formal semantics. But because of the specific representation of meaning assumed by modeltheoretic semantics (one where a true model of the world is a priori available), research in the area has primarily focused on one question: what is the relation of a quantifier to the truth value of a sentence? In contrast, relatively little has been said about the way the underlying model comes about, and its relation to individual speakers’ conceptual knowledge. In this paper, we make a first step in investigating how native speakers of English model relations between non-grounded sets, by observing how they quantify simple statements. We first give some motivation for our task, from both a theoretical linguistic and computational semantic point of view (§2). We then describe our annotation setup (§3) and follow on with an analysis of the produced dataset, conducting a quantitative evaluation which includes inter-annotator agreement for different classes of predicates (§4). We observe that there is significant agreement between speakers but also noticeable variations. We posit that in settheoretic terms, there are as many worlds as there are speakers (§5), but the overwhelming use of underspecified quantification in ordinary language covers up the individual differences that might otherwise be observed. 2 2016.lilt-13.2 @@ -85,7 +85,7 @@ A linguistically-motivated annotation model of modality in <fixed-case>E</fixed-case>nglish and <fixed-case>S</fixed-case>panish: Insights from <fixed-case>MULTINOT</fixed-case> - JuliaLavid + JuliaLavid MartaCarretrero Juan RafaelZamorano-Mansilla In this paper we present current work on the design and validation of a linguistically-motivated annotation model of modality in English and Spanish in the context of the MULTINOT project. Our annotation model captures four basic modal meanings and their subtypes, on the one hand, and provides a fine-grained characterisation of the syntactic realisations of those meanings in English and Spanish, on the other. We validate the modal tagset proposed through an agreement study performed on a bilingual sample of four hundred sentences extracted from original texts of the MULTINOT corpus, and discuss the difficult cases encountered in the annotation experiment. We also describe current steps in the implementation of the proposed scheme for the large-scale annotation of the bilingual corpus using both automatic and manual procedures. @@ -121,7 +121,7 @@ KoenHallmann FlorianKunneman ChristineLiebrecht - Antalvan den Bosch + Antalvan den Bosch Margotvan Mulken Verbal irony, or sarcasm, presents a significant technical and conceptual challenge when it comes to automatic detection. Moreover, it can be a disruptive factor in sentiment analysis and opinion mining, because it changes the polarity of a message implicitly. Extant methods for automatic detection are mostly based on overt clues to ironic intent such as hashtags, also known as irony markers. In this paper, we investigate whether people who know each other make use of irony markers less often than people who do not know each other. We trained a machinelearning classifier to detect sarcasm in Twitter messages (tweets) that were addressed to specific users, and in tweets that were not addressed to a particular user. Human coders analyzed the top-1000 features found to be most discriminative into ten categories of irony markers. The classifier was also tested within and across the two categories. We find that tweets with a user mention contain fewer irony markers than tweets not addressed to a particular user. Classification experiments confirm that the irony in the two types of tweets is signaled differently. The within-category performance of the classifier is about 91% for both categories, while cross-category experiments yield substantially lower generalization performance scores of 75% and 71%. We conclude that irony markers are used more often when there is less mutual knowledge between sender and receiver. Senders addressing other Twitter users less often use irony markers, relying on mutual knowledge which should lead the receiver to infer ironic intent from more implicit clues. With regard to automatic detection, we conclude that our classifier is able to detect ironic tweets addressed at another user as reliably as tweets that are not addressed at at a particular person. 7 diff --git a/data/xml/2016.tal.xml b/data/xml/2016.tal.xml index 6156b92bad..f847372131 100644 --- a/data/xml/2016.tal.xml +++ b/data/xml/2016.tal.xml @@ -17,7 +17,7 @@ Predicting Liaison: an Example-Based Approach - Antalvan den Bosch + Antalvan den Bosch AlexanderGreefhorst 13–32 2016.tal-1.1 @@ -29,7 +29,7 @@ QuentinPradet LucieBarque TakuyaNakamura - MatthieuConstant + MatthieuConstant 33–58 2016.tal-1.2 fra @@ -38,7 +38,7 @@ Prédiction structurée pour l’analyse syntaxique en constituants par transitions : modèles denses et modèles creux [Structured Prediction for Transition-based Constituent Parsing: Dense and Sparse Models] MaximinCoavoux - BenoîtCrabbé + BenoîtCrabbé 59–83 2016.tal-1.3 fra @@ -47,7 +47,7 @@ Exploiting morphology for the automatic extraction of general public paraphrases of medical terms NataliaGrabar - ThierryHamon + ThierryHamon 85–109 2016.tal-1.4 fra @@ -55,7 +55,7 @@ Apprentissage discriminant de modèles neuronaux pour la traduction automatique [Discriminative training of continuous space translation models] - Quoc-KhanhDo + Quoc-KhanhDo AlexandreAllauzen FrançoisYvon 111–135 @@ -80,7 +80,7 @@ Éthique et traitement automatique des langues et de la parole : entre truismes et tabous [Ethics and natural language and speech processing: between truisms and taboos] KarënFort - GillesAdda + GillesAdda KevinBretonnel Cohen 7–19 2016.tal-2.1 @@ -134,7 +134,7 @@ <fixed-case>NLP</fixed-case> for learning and teaching: challenges and opportunities - GeorgesAntoniadis + GeorgesAntoniadis PietDesmet 7–13 2016.tal-3.1 @@ -181,7 +181,7 @@ <fixed-case>M</fixed-case>y<fixed-case>A</fixed-case>nnotator: A Tool for Technology-Mediated Written Corrective Feedback Marie-JoséeHamel NikolaySlavkov - DianaInkpen + DianaInkpen DingwenXiao 119–142 2016.tal-3.6 diff --git a/data/xml/2016.tc.xml b/data/xml/2016.tc.xml index d4530bbc31..0d062aa306 100644 --- a/data/xml/2016.tc.xml +++ b/data/xml/2016.tc.xml @@ -91,7 +91,7 @@ How to configure statistical machine translation with linked open data resources - AnkitSrivastava + AnkitSrivastava FelixSasaki Peter Bourgonje. JulianMoreno-Schneider JanNehring diff --git a/data/xml/2017.iwslt.xml b/data/xml/2017.iwslt.xml index 67fe4a98e2..f122c64e50 100644 --- a/data/xml/2017.iwslt.xml +++ b/data/xml/2017.iwslt.xml @@ -22,7 +22,7 @@ MarcelloFederico LuisaBentivogli JanNiehues - SebastianStüker + SebastianStüker KatsuhitoSudoh KoichiroYoshino ChristianFedermann @@ -34,7 +34,7 @@ Going beyond zero-shot <fixed-case>MT</fixed-case>: combining phonological, morphological and semantic factors. The <fixed-case>U</fixed-case>d<fixed-case>S</fixed-case>-<fixed-case>DFKI</fixed-case> System at <fixed-case>IWSLT</fixed-case> 2017 CristinaEspaña-Bonet - Josefvan Genabith + Josefvan Genabith 15-22 This paper describes the UdS-DFKI participation to the multilingual task of the IWSLT Evaluation 2017. Our approach is based on factored multilingual neural translation systems following the small data and zero-shot training conditions. Our systems are designed to fully exploit multilinguality by including factors that increase the number of common elements among languages such as phonetic coarse encodings and synsets, besides shallow part-of-speech tags, stems and lemmas. Document level information is also considered by including the topic of every document. This approach improves a baseline without any additional factor for all the language pairs and even allows beyond-zero-shot translation. That is, the translation from unseen languages is possible thanks to the common elements —especially synsets in our models— among languages. 2017.iwslt-1.2 @@ -57,7 +57,7 @@ ParniaBahar JanRosendahl NickRossenbach - HermannNey + HermannNey 29-34 This work describes the Neural Machine Translation (NMT) system of the RWTH Aachen University developed for the English$German tracks of the evaluation campaign of the International Workshop on Spoken Language Translation (IWSLT) 2017. We use NMT systems which are augmented by state-of-the-art extensions. Furthermore, we experiment with techniques that include data filtering, a larger vocabulary, two extensions to the attention mechanism and domain adaptation. Using these methods, we can show considerable improvements over the respective baseline systems and our IWSLT 2016 submission. 2017.iwslt-1.4 @@ -68,7 +68,7 @@ Surafel M.Lakew Quintino F.Lotito MarcoTurchi - MatteoNegri + MatteoNegri MarcelloFederico 35-41 Neural Machine Translation has been shown to enable inference and cross-lingual knowledge transfer across multiple language directions using a single multilingual model. Focusing on this multilingual translation scenario, this work summarizes FBK’s participation in the IWSLT 2017 shared task. Our submissions rely on two multilingual systems trained on five languages (English, Dutch, German, Italian, and Romanian). The first one is a 20 language direction model, which handles all possible combinations of the five languages. The second multilingual system is trained only on 16 directions, leaving the others as zero-shot translation directions (i.e representing a more complex inference task on language pairs not seen at training time). More specifically, our zero-shot directions are Dutch$German and Italian$Romanian (resulting in four language combinations). Despite the small amount of parallel data used for training these systems, the resulting multilingual models are effective, even in comparison with models trained separately for every language pair (i.e. in more favorable conditions). We compare and show the results of the two multilingual models against a baseline single language pair systems. Particularly, we focus on the four zero-shot directions and show how a multilingual model trained with small data can provide reasonable results. Furthermore, we investigate how pivoting (i.e using a bridge/pivot language for inference in a source!pivot!target translations) using a multilingual model can be an alternative to enable zero-shot translation in a low resource setting. @@ -77,12 +77,12 @@ <fixed-case>KIT</fixed-case>’s Multilingual Neural Machine Translation systems for <fixed-case>IWSLT</fixed-case> 2017 - Ngoc-QuanPham + Ngoc-QuanPham MatthiasSperber ElizabethSalesky Thanh-LeHa JanNiehues - AlexanderWaibel + AlexanderWaibel 42-47 In this paper, we present KIT’s multilingual neural machine translation (NMT) systems for the IWSLT 2017 evaluation campaign machine translation (MT) and spoken language translation (SLT) tasks. For our MT task submissions, we used our multi-task system, modified from a standard attentional neural machine translation framework, instead of building 20 individual NMT systems. We investigated different architectures as well as different data corpora in training such a multilingual system. We also suggested an effective adaptation scheme for multilingual systems which brings great improvements compared to monolingual systems. For the SLT track, in addition to a monolingual neural translation system used to generate correct punctuations and true cases of the data prior to training our multilingual system, we introduced a noise model in order to make our system more robust. Results show that our novel modifications improved our systems considerably on all tasks. 2017.iwslt-1.6 @@ -100,7 +100,7 @@ <fixed-case>K</fixed-case>yoto <fixed-case>U</fixed-case>niversity <fixed-case>MT</fixed-case> System Description for <fixed-case>IWSLT</fixed-case> 2017 RajDabre - FabienCromieres + FabienCromieres SadaoKurohashi We describe here our Machine Translation (MT) model and the results we obtained for the IWSLT 2017 Multilingual Shared Task. Motivated by Zero Shot NMT [1] we trained a Multilingual Neural Machine Translation by combining all the training data into one single collection by appending the tokens to the source sentences in order to indicate the target language they should be translated to. We observed that even in a low resource situation we were able to get translations whose quality surpass the quality of those obtained by Phrase Based Statistical Machine Translation by several BLEU points. The most surprising result we obtained was in the zero shot setting for Dutch-German and Italian-Romanian where we observed that despite using no parallel corpora between these language pairs, the NMT model was able to translate between these languages and the translations were either as good as or better (in terms of BLEU) than the non zero resource setting. We also verify that the NMT models that use feed forward layers and self attention instead of recurrent layers are extremely fast in terms of training which is useful in a NMT experimental setting. 55-59 @@ -113,8 +113,8 @@ MarkusMüller MatthiasSperber ThomasZenkel - SebastianStüker - AlexWaibel + SebastianStüker + AlexWaibel 60-64 This paper describes our German and English Speech-to-Text (STT) systems for the 2017 IWSLT evaluation campaign. The campaign focuses on the transcription of unsegmented lecture talks. Our setup includes systems using both the Janus and Kaldi frameworks. We combined the outputs using both ROVER [1] and confusion network combination (CNC) [2] to achieve a good overall performance. The individual subsystems are built by using different speaker-adaptive feature combination (e.g., lMEL with i-vector or bottleneck speaker vector), acoustic models (GMM or DNN) and speaker adaptation (MLLR or fMLLR). Decoding is performed in two stages, where the GMM and DNN systems are adapted on the combination of the first stage outputs using MLLR, and fMLLR. The combination setup produces a final hypothesis that has a significantly lower WER than any of the individual sub-systems. For the English lecture task, our best combination system has a WER of 8.3% on the tst2015 development set while our other combinations gained 25.7% WER for German lecture tasks. 2017.iwslt-1.9 @@ -126,7 +126,7 @@ NadirDurrani FahimDalvi YonatanBelinkov - StephanVogel + StephanVogel 66-73 In this paper, we explore alternative ways to train a neural machine translation system in a multi-domain scenario. We investigate data concatenation (with fine tuning), model stacking (multi-level fine tuning), data selection and multi-model ensemble. Our findings show that the best translation quality can be achieved by building an initial system on a concatenation of available out-of-domain data and then fine-tuning it on in-domain data. Model stacking works best when training begins with the furthest out-of-domain data and the model is incrementally fine-tuned with the next furthest domain and so on. Data selection did not give the best results, but can be considered as a decent compromise between training time and translation quality. A weighted ensemble of different individual models performed better than data selection. It is beneficial in a scenario when there is no time for fine-tuning an already trained model. 2017.iwslt-1.10 @@ -136,7 +136,7 @@ Domain-independent Punctuation and Segmentation Insertion EunahCho JanNiehues - AlexWaibel + AlexWaibel 74-81 Punctuation and segmentation is crucial in spoken language translation, as it has a strong impact to translation performance. However, the impact of rare or unknown words in the performance of punctuation and segmentation insertion has not been thoroughly studied. In this work, we simulate various degrees of domain-match in testing scenario and investigate their impact to the punctuation insertion task. We explore three rare word generalizing schemes using part-of-speech (POS) tokens. Experiments show that generalizing rare and unknown words greatly improves the punctuation insertion performance, reaching up to 8.8 points of improvement in F-score when applied to the out-of-domain test scenario. We show that this improvement in punctuation quality has a positive impact on a following machine translation (MT) performance, improving it by 2 BLEU points. 2017.iwslt-1.11 @@ -144,7 +144,7 @@ Synthetic Data for Neural Machine Translation of Spoken-Dialects - HanyHassan + HanyHassan MostafaElaraby Ahmed Y.Tawfik 82-89 @@ -156,7 +156,7 @@ Toward Robust Neural Machine Translation for Noisy Input Sequences MatthiasSperber JanNiehues - AlexWaibel + AlexWaibel 90-96 Translating noisy inputs, such as the output of a speech recognizer, is a difficult but important challenge for neural machine translation. One way to increase robustness of neural models is by introducing artificial noise to the training data. In this paper, we experiment with appropriate forms of such noise, exploring a middle ground between general-purpose regularizers and highly task-specific forms of noise induction. We show that with a simple generative noise model, moderate gains can be achieved in translating erroneous speech transcripts, provided that type and amount of noise are properly calibrated. The optimal amount of noise at training time is much smaller than the amount of noise in our test data, indicating limitations due to trainability issues. We note that unlike our baseline model, models trained on noisy data are able to generate outputs of proper length even for noisy inputs, while gradually reducing output length for higher amount of noise, as might also be expected from a human translator. We discuss these findings in details and give suggestions for future work. 2017.iwslt-1.13 @@ -164,7 +164,7 @@ Monolingual Embeddings for Low Resourced Neural Machine Translation - Mattia AntoninoDi Gangi + Mattia AntoninoDi Gangi MarcelloFederico 97-104 Neural machine translation (NMT) is the state of the art for machine translation, and it shows the best performance when there is a considerable amount of data available. When only little data exist for a language pair, the model cannot produce good representations for words, particularly for rare words. One common solution consists in reducing data sparsity by segmenting words into sub-words, in order to allow rare words to have shared representations with other words. Taking a different approach, in this paper we present a method to feed an NMT network with word embeddings trained on monolingual data, which are combined with the task-specific embeddings learned at training time. This method can leverage an embedding matrix with a huge number of words, which can therefore extend the word-level vocabulary. Our experiments on two language pairs show good results for the typical low-resourced data scenario (IWSLT in-domain dataset). Our consistent improvements over the baselines represent a positive proof about the possibility to leverage models pre-trained on monolingual data in NMT. @@ -175,7 +175,7 @@ Effective Strategies in Zero-Shot Neural Machine Translation Thanh-LeHa JanNiehues - AlexanderWaibel + AlexanderWaibel 105-112 In this paper, we proposed two strategies which can be applied to a multilingual neural machine translation system in order to better tackle zero-shot scenarios despite not having any parallel corpus. The experiments show that they are effective in terms of both performance and computing resources, especially in multilingual translation of unbalanced data in real zero-resourced condition when they alleviate the language bias problem. 2017.iwslt-1.15 @@ -185,7 +185,7 @@ Improving Zero-Shot Translation of Low-Resource Languages Surafel M.Lakew Quintino F.Lotito - MatteoNegri + MatteoNegri MarcoTurchi MarcelloFederico 113-119 diff --git a/data/xml/2017.jeptalnrecital.xml b/data/xml/2017.jeptalnrecital.xml index 76748ec19a..329f8eb9b2 100644 --- a/data/xml/2017.jeptalnrecital.xml +++ b/data/xml/2017.jeptalnrecital.xml @@ -3,7 +3,7 @@ Actes des 24ème Conférence sur le Traitement Automatique des Langues Naturelles. Volume 1 - Articles longs - IrisEshkol-Taravella + IrisEshkol-Taravella Jean-YvesAntoine ATALA
Orléans, France
@@ -61,7 +61,7 @@ fra KamelBouzidi ZiedElloumi - LaurentBesacier + LaurentBesacier BenjaminLecouteux Mohamed-FaouziBenzeghiba 63–76 @@ -73,7 +73,7 @@ Représentation et analyse automatique des discontinuités syntaxiques dans les corpus arborés en constituants du français (Representation and parsing of syntactic discontinuities in <fixed-case>F</fixed-case>rench constituent treebanks) fra MaximinCoavoux - BenoîtCrabbé + BenoîtCrabbé 77–92 Nous présentons de nouvelles instanciations de trois corpus arborés en constituants du français, où certains phénomènes syntaxiques à l’origine de dépendances à longue distance sont représentés directement à l’aide de constituants discontinus. Les arbres obtenus relèvent de formalismes grammaticaux légèrement sensibles au contexte (LCFRS). Nous montrons ensuite qu’il est possible d’analyser automatiquement de telles structures de manière efficace à condition de s’appuyer sur une méthode d’inférence approximative. Pour cela, nous présentons un analyseur syntaxique par transitions, qui réalise également l’analyse morphologique et l’étiquetage fonctionnel des mots de la phrase. Enfin, nos expériences montrent que la rareté des phénomènes concernés dans les données françaises pose des difficultés pour l’apprentissage et l’évaluation des structures discontinues. 2017.jeptalnrecital-long.6 @@ -90,9 +90,9 @@
Projection Aléatoire Non-Négative pour le Calcul de Word Embedding / Non-Negative Randomized Word Embedding - BehrangQasemizadeh + BehrangQasemizadeh LauraKallmeyer - AurelieHerbelot + AurelieHerbelot 109–122 Non-Negative Randomized Word Embedding We propose a word embedding method which is based on a novel random projection technique. We show that weighting methods such as positive pointwise mutual information (PPMI) can be applied to our models after their construction and at a reduced dimensionality. Hence, the proposed technique can efficiently transfer words onto semantically discriminative spaces while demonstrating high computational performance, besides benefits such as ease of update and a simple mechanism for interoperability. We report the performance of our method on several tasks and show that it yields competitive results compared to neural embedding methods in monolingual corpus-based setups. 2017.jeptalnrecital-long.8 @@ -101,8 +101,8 @@ Création et validation de signatures sémantiques : application à la mesure de similarité sémantique et à la substitution lexicale (Creating and validating semantic signatures : application for measuring semantic similarity and lexical substitution) fra - Mokhtar-BoumedyenBillami - NúriaGala + Mokhtar-BoumedyenBillami + NúriaGala 123–138 L’intégration de la notion de similarité sémantique entre les unités lexicales est essentielle dans différentes applications de Traitement Automatique des Langues (TAL). De ce fait, elle a reçu un intérêt considérable qui a eu comme conséquence le développement d’une vaste gamme d’approches pour en déterminer une mesure. Ainsi, plusieurs types de mesures de similarité existent, elles utilisent différentes représentations obtenues à partir d’informations soit dans des ressources lexicales, soit dans de gros corpus de données ou bien dans les deux. Dans cet article, nous nous intéressons à la création de signatures sémantiques décrivant des représentations vectorielles de mots à partir du réseau lexical JeuxDeMots (JDM). L’évaluation de ces signatures est réalisée sur deux tâches différentes : mesures de similarité sémantique et substitution lexicale. Les résultats obtenus sont très satisfaisants et surpassent, dans certains cas, les performances des systèmes de l’état de l’art. 2017.jeptalnrecital-long.9 @@ -133,7 +133,7 @@ Construction automatique d’une base de données étymologiques à partir du wiktionary (Automatic construction of an etymological database using <fixed-case>W</fixed-case>iktionary) fra - BenoîtSagot + BenoîtSagot 169–181 Les ressources lexicales électroniques ne contiennent quasiment jamais d’informations étymologiques. De telles informations, convenablement formalisées, permettraient pourtant de développer des outils automatiques au service de la linguistique historique et comparative, ainsi que d’améliorer significativement le traitement automatique de langues anciennes. Nous décrivons ici le processus que nous avons mis en œuvre pour extraire des données étymologiques à partir des notices étymologiques du wiktionary, rédigées en anglais. Nous avons ainsi produit une base multilingue de près d’un million de lexèmes et une base de plus d’un demi-million de relations étymologiques entre lexèmes. 2017.jeptalnrecital-long.12 @@ -142,12 +142,12 @@ Apprendre des représentations jointes de mots et d’entités pour la désambiguïsation d’entités (Combining Word and Entity Embeddings for Entity Linking) fra - José G.Moreno + José G.Moreno RomaricBesançon RomainBeaumont EvaD’Hondt Anne-LaureLigozat - SophieRosset + SophieRosset XavierTannier BrigitteGrau 182–195 @@ -159,7 +159,7 @@ Analyse et évolution de la compréhension de termes techniques (Analysis and Evolution of Understanding of Technical Terms) fra NataliaGrabar - ThierryHamon + ThierryHamon 196–211 Nous faisons l’hypothèse que les mots techniques inconnus dotés d’une structure interne (mots affixés ou composés) peuvent fournir des indices linguistiques à un locuteur, ce qui peut l’aider à analyser et à comprendre ces mots. Afin de tester notre hypothèse, nous proposons de travailler sur un ensemble de mots techniques provenant du domaine médical. Un grand ensemble de mots techniques est annoté par cinq annotateurs. Nous effectuons deux types d’analyses : l’analyse de l’évolution des mots compréhensibles et incompréhensibles (de manière générale et en fonction de certains suffixes) et l’analyse des clusters avec ces mots créés par apprentissage non-supervisé, sur la base des descripteurs linguistiques et extra-linguistiques. Nos résultats indiquent que, selon la sensibilité linguistique des annotateurs, les mots techniques peuvent devenir décodables et compréhensibles. Quant aux clusters, le contenu de certains reflète la difficulté des mots qui les composent et montre également la progression des annotateurs dans leur compréhension. La ressource construite est disponible pour la recherche : http://natalia.grabar.free.fr/rated-lexicon.html. 2017.jeptalnrecital-long.14 @@ -169,7 +169,7 @@ Actes des 24ème Conférence sur le Traitement Automatique des Langues Naturelles. Volume 2 - Articles courts - IrisEshkol-Taravella + IrisEshkol-Taravella Jean-YvesAntoine ATALA
Orléans, France
@@ -184,8 +184,8 @@ Annotation d’expressions polylexicales verbales en français (Annotation of verbal multiword expressions in <fixed-case>F</fixed-case>rench) fra - MarieCandito - MathieuConstant + MarieCandito + MathieuConstant CarlosRamisch AgataSavary YannickParmentier @@ -240,8 +240,8 @@ Analyse automatique <fixed-case>F</fixed-case>rame<fixed-case>N</fixed-case>et : une étude sur un corpus français de textes encyclopédiques (<fixed-case>F</fixed-case>rame<fixed-case>N</fixed-case>et automatic analysis : a study on a <fixed-case>F</fixed-case>rench corpus of encyclopedic texts) fra GabrielMarzinotto - GéraldineDamnati - FrédéricBéchet + GéraldineDamnati + FrédéricBéchet 44–51 Cet article présente un système d’analyse automatique en cadres sémantiques évalué sur un corpus de textes encyclopédiques d’histoire annotés selon le formalisme FrameNet. L’approche choisie repose sur un modèle intégré d’étiquetage de séquence qui optimise conjointement l’identification des cadres, la segmentation et l’identification des rôles sémantiques associés. Nous cherchons dans cette étude à analyser la complexité de la tâche selon plusieurs dimensions. Une analyse détaillée des performances du système est ainsi proposée, à la fois selon l’angle des paramètres du modèle et de la nature des données. 2017.jeptalnrecital-court.6 @@ -251,7 +251,7 @@ Détection de coréférences de bout en bout en français (End-to-end coreference resolution for <fixed-case>F</fixed-case>rench) fra ElisabethGodbert - BenoitFavre + BenoitFavre 52–59 Notre objectif est l’élaboration d’un système de détection automatique de relations de coréférence le plus général possible, pour le traitement des anaphores pronominales et les coréférences directes. Nous décrivons dans cet article les différentes étapes de traitement des textes dans le système que nous avons développé : (i) l’annotation en traits lexicaux et syntaxiques par le système Macaon ; (ii) le repérage des mentions par un modèle obtenu par apprentissage sur le corpus ANCOR ; (iii) l’annotation sémantique des mentions à partir de deux ressources : le DEM et le LVF ; (iv) l’annotation en coréférences par un système à base de règles. Le système est évalué sur le corpus ANCOR. 2017.jeptalnrecital-court.7 @@ -315,7 +315,7 @@ DamienSileo CamillePradel PhilippeMuller - TimVan de Cruys + TimVan de Cruys 102–109 Plusieurs tâches en traitement du langage naturel impliquent de modifier des phrases en conservant au mieux leur sens, comme la reformulation, la compression, la simplification, chacune avec leurs propres données et modèles. Nous introduisons ici une méthode générale s’adressant à tous ces problèmes, utilisant des données plus simples à obtenir : un ensemble de phrases munies d’indicateurs sur leur style, comme des phrases et le type de sentiment qu’elles expriment. Cette méthode repose sur un modèle d’apprentissage de représentations non supervisé (un auto-encodeur variationnel), puis sur le changement des représentations apprises pour correspondre à un style donné. Le résultat est évalué qualitativement, puis quantitativement sur le jeu de données de compression de phrases Microsoft, avec des résultats encourageants. 2017.jeptalnrecital-court.13 @@ -348,7 +348,7 @@ Simbow : une mesure de similarité sémantique entre textes (Simbow : a semantic similarity metric between texts) fra DelphineCharlet - GéraldineDamnati + GéraldineDamnati 126–133 Cet article décrit une mesure de similarité sémantique non-supervisée qui repose sur l’introduction d’une matrice de relations entre mots, dans un paradigme de mesure cosinus entre sacs de mots. La métrique obtenue, apparentée à soft-cosinus, tient compte des relations entre mots qui peuvent être d’ordre lexical ou sémantique selon la matrice considérée. La mise en œuvre de cette métrique sur la tâche qui consiste à mesurer des similarités sémantiques entre questions posées sur un forum, a remporté la campagne d’évaluation SemEval2017. Si l’approche soumise à la campagne est une combinaison supervisée de différentes mesures non-supervisées, nous présentons dans cet article en détail les métriques non-supervisées, qui présentent l’avantage de produire de bons résultats sans nécessiter de ressources spécifiques autres que des données non annotées du domaine considéré. 2017.jeptalnrecital-court.16 @@ -383,7 +383,7 @@ Parcourir, reconnaître et réfléchir. Combinaison de méthodes légères pour l’extraction de relations sémantiques (Browse, recognize and think) fra MathieuLafourcade - NathalieLe Brun + NathalieLe Brun 150–157 La capture de relations sémantiques entre termes à partir de textes est un moyen privilégié de constituer/alimenter une base de connaissances, ressource indispensable pour l’analyse de textes. Nous proposons et évaluons la combinaison de trois méthodes de production de relations lexicosémantiques. 2017.jeptalnrecital-court.19 @@ -394,7 +394,7 @@ fra AlainJoubert MathieuLafourcade - NathalieLe Brun + NathalieLe Brun 158–164 La correction des erreurs dans une collection de données est un problème délicat. Elle peut être réalisée manuellement par un expert, ou en utilisant des méthodes de crowdsourcing, ou encore automatiquement au moyen d’algorithmes. Nous présentons ici des méthodes automatiques permettant de détecter les erreurs potentielles « secondaires » induites par les mécanismes automatiques d’inférences de relations, lorsqu’ils s’appuient sur des relations erronées « initiales » détectées manuellement. Des résultats encourageants, mesurés sur le réseau JeuxDeMots, nous invitent à envisager également des stratégies qui permettraient de détecter automatiquement les relations erronées « initiales », ce qui pourrait conduire à une détection automatique de la majorité des erreurs présentes dans le réseau. 2017.jeptalnrecital-court.20 @@ -405,7 +405,7 @@ fra KarënFort BrunoGuillaume - NicolasLefebvre + NicolasLefebvre LauraRamírez MathildeRegnault MaryCollins @@ -444,7 +444,7 @@ MatthieuRiou BassamJabaian StéphaneHuet - FabriceLefèvre + FabriceLefèvre 192–199 Récemment, de nouveaux modèles à base de réseaux de neurones récurrents ont été proposés pour traiter la génération en langage naturel dans des systèmes de dialogue (Wen et al., 2016a). Ces modèles demandent une grande quantité de données d’apprentissage ; or la collecte et l’annotation de ces données peuvent être laborieuses. Pour répondre à cette problématique, nous nous intéressons ici à la mise en place d’un protocole d’apprentissage en ligne basé sur un apprentissage par renforcement, permettant d’améliorer l’utilisation d’un modèle initial appris sur un corpus plus restreint généré par patrons. Dans cette étude exploratoire, nous proposons une approche basée sur un algorithme de bandit contre un adversaire, afin d’en étudier l’intérêt et les limites. 2017.jeptalnrecital-court.24 @@ -455,7 +455,7 @@ fra LoïcGrobol IsabelleTellier - Éricde La Clergerie + Éricde La Clergerie MarcoDinarelli FrédéricLandragin 200–208 @@ -475,8 +475,8 @@ Adaptation incrémentale de modèles de traduction neuronaux (Incremental adaptation of neural machine translation models) fra - ChristopheServan - JosepCrego + ChristopheServan + JosepCrego JeanSenellart 218–225 L’adaptation au domaine est un verrou scientifique en traduction automatique. Il englobe généralement l’adaptation de la terminologie et du style, en particulier pour la post-édition humaine dans le cadre d’une traduction assistée par ordinateur. Avec la traduction automatique neuronale, nous étudions une nouvelle approche d’adaptation au domaine que nous appelons “spécialisation” et qui présente des résultats prometteurs tant dans la vitesse d’apprentissage que dans les scores de traduction. Dans cet article, nous proposons d’explorer cette approche. @@ -486,7 +486,7 @@ Détection de concepts et granularité de l’annotation (Concept detection and annotation granularity ) fra - PierreZweigenbaum + PierreZweigenbaum ThomasLavergne 226–233 Nous nous intéressons ici à une tâche de détection de concepts dans des textes sans exigence particulière de passage par une phase de détection d’entités avec leurs frontières. Il s’agit donc d’une tâche de catégorisation de textes multiétiquette, avec des jeux de données annotés au niveau des textes entiers. Nous faisons l’hypothèse qu’une annotation à un niveau de granularité plus fin, typiquement au niveau de l’énoncé, devrait améliorer la performance d’un détecteur automatique entraîné sur ces données. Nous examinons cette hypothèse dans le cas de textes courts particuliers : des certificats de décès où l’on cherche à reconnaître des diagnostics, avec des jeux de données initialement annotés au niveau du certificat entier. Nous constatons qu’une annotation au niveau de la « ligne » améliore effectivement les résultats, mais aussi que le simple fait d’appliquer au niveau de la ligne un classifieur entraîné au niveau du texte est déjà une source d’amélioration. @@ -498,8 +498,8 @@ fra ChristopherNorman MariskaLeeflang - PierreZweigenbaum - AurélieNévéol + PierreZweigenbaum + AurélieNévéol 234–241 Les revues systématiques de la littérature dans le domaine biomédical reposent essentiellement sur le travail bibliographique manuel d’experts. Nous évaluons les performances de la classification supervisée pour la découverte automatique d’articles à l’aide de plusieurs définitions des critères d’inclusion. Nous appliquons un modèle de regression logistique sur deux corpus issus de revues systématiques conduites dans le domaine du traitement automatique de la langue et de l’efficacité des médicaments. La classification offre une aire sous la courbe moyenne (AUC) de 0.769 si le classifieur est contruit à partir des jugements experts portés sur les titres et résumés des articles, et de 0.835 si on utilise les jugements portés sur le texte intégral. Ces résultats indiquent l’importance des jugements portés dès le début du processus de sélection pour développer un classifieur efficace pour accélérer l’élaboration des revues systématiques à l’aide d’un algorithme de classification standard. 2017.jeptalnrecital-court.29 @@ -508,10 +508,10 @@ Une approche linguistique pour la détection des dialectes arabes (A linguistic approach for the detection of <fixed-case>A</fixed-case>rabic dialects) fra - HoudaSaâdane + HoudaSaâdane DamienNouvel HosniSeffih - ChristianFluhr + ChristianFluhr 242–250 Dans cet article, nous présentons un processus d’identification automatique de l’origine dialectale pour la langue arabe de textes écrits en caractères arabes ou en écriture latine (arabizi). Nous décrivons le processus d’annotation des ressources construites et du système de translittération adopté. Deux approches d’identification de la langue sont comparées : la première est linguistique et exploite des dictionnaires, la seconde est statistique et repose sur des méthodes traditionnelles d’apprentissage automatique (n-grammes). L’évaluation de ces approches montre que la méthode linguistique donne des résultats satisfaisants, sans être dépendante des corpus d’apprentissage. 2017.jeptalnrecital-court.30 @@ -521,7 +521,7 @@ Actes des 24ème Conférence sur le Traitement Automatique des Langues Naturelles. 19es REncontres jeunes Chercheurs en Informatique pour le TAL (RECITAL 2017) - IrisEshkol-Taravella + IrisEshkol-Taravella Jean-YvesAntoine ATALA
Orléans, France
@@ -652,7 +652,7 @@ Actes des 24ème Conférence sur le Traitement Automatique des Langues Naturelles. Volume 3 - Démonstrations - IrisEshkol-Taravella + IrisEshkol-Taravella Jean-YvesAntoine ATALA
Orléans, France
@@ -704,9 +704,9 @@ Apprentissage d’agents conversationnels pour la gestion de relations clients (Training chatbots for customer relation management) fra - BenoitFavre - FredericBechet - GéraldineDamnati + BenoitFavre + FredericBechet + GéraldineDamnati DelphineCharlet 17–18 Ce travail démontre la faisabilité d’entraîner des chatbots sur des traces de conversations dans le domaine de la relation client. Des systèmes à base de modèles de langage, de recherche d’information et de traduction sont comparés pour la tâche. @@ -716,14 +716,14 @@ Conception d’une solution de détection d’événements basée sur <fixed-case>T</fixed-case>witter (Design of a solution for event detection from Tweeter) fra - ChristopheServan + ChristopheServan CatherineKobus YongchaoDeng CyrilTouffet JungiKim InèsKapp - DjamelMostefa - JosepCrego + DjamelMostefa + JosepCrego AurélienCoquard JeanSenellart 19–20 @@ -736,7 +736,7 @@ fra GaëlGuibon MagalieOchs - PatriceBellot + PatriceBellot 21–23 Nous présentons une interface de recommandation d’emojis porteurs de sentiments qui utilise un modèle de prédiction appris sur des messages informels privés. Chacun étant associé à deux scores de polarité prédits. Cette interface permet permet également d’enregistrer les choix de l’utilisateur pour confirmer ou infirmer la recommandation. 2017.jeptalnrecital-demo.7 @@ -780,8 +780,8 @@ ChristopherNorman CyrilGrouin ThomasLavergne - AurélieNévéol - PierreZweigenbaum + AurélieNévéol + PierreZweigenbaum 33–34 Nous proposons des démonstrations de trois outils développés par le LIMSI en traitement automatique des langues appliqué au domaine biomédical : la détection de concepts médicaux dans des textes courts, la catégorisation d’articles scientifiques pour l’assistance à l’écriture de revues systématiques, et l’anonymisation de textes cliniques. 2017.jeptalnrecital-demo.11 diff --git a/data/xml/2017.lilt.xml b/data/xml/2017.lilt.xml index 219be79c15..1afea2dfae 100644 --- a/data/xml/2017.lilt.xml +++ b/data/xml/2017.lilt.xml @@ -10,8 +10,8 @@ Lexical Factorization and Syntactic Behavior - JamesPustejovsky - AravindJoshi + JamesPustejovsky + AravindJoshi In this paper, we examine the correlation between lexical semantics and the syntactic realization of the different components of a word’s meaning in natural language. More specifically, we will explore the effect that lexical factorization in verb semantics has on the suppression or expression of semantic features within the sentence. Factorization was a common analytic tool employed in early generative linguistic approaches to lexical decomposition, and continues to play a role in contemporary semantics, in various guises and modified forms. Building on the unpublished analysis of verbs of seeing in Joshi (1972), we argue here that the significance of lexical factorization is twofold: first, current models of verb meaning owe much of their insight to factor-based theories of meaning; secondly, the factorization properties of a lexical item appear to influence, both directly and indirectly, the possible syntactic expressibility of arguments and adjuncts in sentence composition. We argue that this information can be used to compute what we call the factor expression likelihood (FEL) associated with a verb in a sentence. This is the likelihood that the overt syntactic expression of a factor will cooccur with the verb. This has consequences for the compositional mechanisms responsible for computing the meaning of the sentence, as well as significance in the creation of computational models attempting to capture linguistic behavior over large corpora. 1 2017.lilt-15.1 @@ -19,7 +19,7 @@ Factorization of Verbs: An Analysis of Verbs of Seeing - AravindJoshi + AravindJoshi 1 2017.lilt-15.2 joshi-2017-factorization diff --git a/data/xml/2017.mtsummit.xml b/data/xml/2017.mtsummit.xml index fac736bd1b..d64efca60d 100644 --- a/data/xml/2017.mtsummit.xml +++ b/data/xml/2017.mtsummit.xml @@ -19,7 +19,7 @@ Empirical Study of Dropout Scheme for Neural Machine Translation XiaolinWang MasaoUtiyama - EiichiroSumita + EiichiroSumita 1-14 2017.mtsummit-papers.1 wang-etal-2017-empirical @@ -28,7 +28,7 @@ A Target Attention Model for Neural Machine Translation HideyaMino AndrewFinch - EiichiroSumita + EiichiroSumita 15-26 2017.mtsummit-papers.2 mino-etal-2017-target @@ -55,11 +55,11 @@ Translation Quality and Productivity: A Study on Rich Morphology Languages LuciaSpecia KimHarris - FrédéricBlain + FrédéricBlain AljoschaBurchardt VivivenMacketanz IngunaSkadin - MatteoNegri + MatteoNegri MarcoTurchi 55-71 2017.mtsummit-papers.5 @@ -68,7 +68,7 @@ The <fixed-case>M</fixed-case>icrosoft Speech Language Translation (<fixed-case>MSLT</fixed-case>) Corpus for <fixed-case>C</fixed-case>hinese and <fixed-case>J</fixed-case>apanese: Conversational Test data for Machine Translation and Speech Recognition ChristianFedermann - William D.Lewis + William D.Lewis 72-85 2017.mtsummit-papers.6 federmann-lewis-2017-microsoft @@ -76,7 +76,7 @@ Paying Attention to Multi-Word Expressions in Neural Machine Translation MatīssRikters - OndřejBojar + OndřejBojar 86-95 2017.mtsummit-papers.7 rikters-bojar-2017-paying @@ -84,7 +84,7 @@ Enabling Multi-Source Neural Machine Translation By Concatenating Source Sentences In Multiple Languages RajDabre - FabienCromieres + FabienCromieres SadaoKurohashi 96-107 2017.mtsummit-papers.8 @@ -109,7 +109,7 @@ PanayotaGeorgakopoulou PintuLohar AndyWay - Antonio ValerioMiceli-Barone + Antonio ValerioMiceli-Barone MariaGialama 116-131 2017.mtsummit-papers.10 @@ -117,8 +117,8 @@ One-parameter models for sentence-level post-editing effort estimation - Mikel L.Forcada - MiquelEsplà-Gomis + Mikel L.Forcada + MiquelEsplà-Gomis FelipeSánchez-Martínez LuciaSpecia 132-143 @@ -127,7 +127,7 @@ A Minimal Cognitive Model for Translating and Post-editing - MoritzSchaeffer + MoritzSchaeffer MichaelCarl 144-155 2017.mtsummit-papers.12 @@ -163,7 +163,7 @@ Elastic-substitution decoding for Hierarchical <fixed-case>SMT</fixed-case>: efficiency, richer search and double labels GideonMaillette de Buy Wenniger - KhalilSima’an + KhalilSima’an AndyWay 201-215 2017.mtsummit-papers.16 @@ -172,8 +172,8 @@ Development of a classifiers/quantifiers dictionary towards <fixed-case>F</fixed-case>rench-<fixed-case>J</fixed-case>apanese <fixed-case>MT</fixed-case> MutsukoTomokiyo - MathieuMangeot - ChristianBoitet + MathieuMangeot + ChristianBoitet 216-226 2017.mtsummit-papers.17 tomokiyo-etal-2017-development @@ -192,7 +192,7 @@ Usefulness of <fixed-case>MT</fixed-case> output for comprehension — an analysis from the point of view of linguistic intercomprehension KennethJordan Núñez - Mikel L.Forcada + Mikel L.Forcada EsteveClua 241-253 2017.mtsummit-papers.19 @@ -200,7 +200,7 @@ Machine Translation as an Academic Writing Aid for Medical Practitioners - CarlaParra Escartín + CarlaParra Escartín SharonO’Brien Marie-JoséeGoulet MichelSimard @@ -211,16 +211,16 @@ A Multilingual Parallel Corpus for Improving Machine Translation on <fixed-case>S</fixed-case>outheast <fixed-case>A</fixed-case>sian Languages Hai-LongTrieu - Le-MinhNguyen + Le-MinhNguyen 268-281 2017.mtsummit-papers.21 trieu-nguyen-2017-multilingual Exploring Hypotheses Spaces in Neural Machine Translation - FrédéricBlain + FrédéricBlain LuciaSpecia - PranavaMadhyastha + PranavaMadhyastha 282-298 2017.mtsummit-papers.22 blain-etal-2017-exploring @@ -237,7 +237,7 @@ Disentangling <fixed-case>ASR</fixed-case> and <fixed-case>MT</fixed-case> Errors in Speech Translation Ngoc-TienLe BenjaminLecouteux - LaurentBesacier + LaurentBesacier 312-323 2017.mtsummit-papers.24 le-etal-2017-disentangling @@ -249,7 +249,7 @@ MohammadHasanuzzaman AsifEkbal AndyWay - PushpakBhattacharyya + PushpakBhattacharyya 324-336 2017.mtsummit-papers.25 kamila-etal-2017-temporality @@ -329,8 +329,8 @@ SharonO’Brien Chao-HongLiu AndyWay - JoãoGraça - AndréMartins + JoãoGraça + AndréMartins HelenaMoniz EllieKemp RebeccaPetras @@ -340,7 +340,7 @@ A Case Study of Machine Translation in Financial Sentiment Analysis - ChongZhang + ChongZhang MatteoCapelletti AlexandrosPoulis ThorbenStemann diff --git a/data/xml/2017.tal.xml b/data/xml/2017.tal.xml index 0f41bfddf7..c7144c5d97 100644 --- a/data/xml/2017.tal.xml +++ b/data/xml/2017.tal.xml @@ -40,10 +40,10 @@ Noise or music? Investigating the usefulness of normalisation for robust sentiment analysis on social media data CynthiaVan Hee MarjanVan de Kauter - OrphéeDe Clercq + OrphéeDe Clercq ElsLefever BartDesmet - VéroniqueHoste + VéroniqueHoste 63–87 2017.tal-1.3 van-hee-etal-2017-noise @@ -52,7 +52,7 @@ Traitement Automatique des Langues, Volume 58, Numéro 2 : Traitement automatique de la langue juridique [Legal Natural Language Processing] - AdelineNazarenko + AdelineNazarenko AdamWyner ATALA (Association pour le Traitement Automatique des Langues)
France
@@ -76,9 +76,9 @@ Sentence Boundary Detection in Adjudicatory Decisions in the <fixed-case>U</fixed-case>nited <fixed-case>S</fixed-case>tates JaromirSavelka - Vern R.Walker + Vern R.Walker MatthiasGrabmair - Kevin D.Ashley + Kevin D.Ashley 21–45 2017.tal-2.2 savelka-etal-2017-sentence @@ -96,7 +96,7 @@ Traitement Automatique des Langues, Volume 58, Numéro 3 : Traitement automatique de l'arabe et des langues apparentées [NLP for Arabic and Related Languages] - MonaDiab + MonaDiab NizarHabash ImedZitouni ATALA (Association pour le Traitement Automatique des Langues) @@ -113,7 +113,7 @@ Préambule [Preamble] EmmanuelMorin - SophieRosset + SophieRosset PascaleSébillot 8–8 2017.tal-3.1 @@ -133,7 +133,7 @@ Modern Trends in <fixed-case>A</fixed-case>rabic Sentiment Analysis: A Survey HalaMulki HatemHaddad - IsmailBabaoğlu + IsmailBabaoğlu 15–39 2017.tal-3.3 mulki-etal-2017-modern @@ -142,7 +142,7 @@ Une approche fondée sur les lexiques d’analyse de sentiments du dialecte algérien [A lexicon-based approach for sentiment analysis in the <fixed-case>A</fixed-case>lgerian dialect] ImaneGuellil FaicalAzouaou - HoudaSaâdane + HoudaSaâdane NasredineSemmar 41–65 2017.tal-3.4 diff --git a/data/xml/2018.clib.xml b/data/xml/2018.clib.xml index d7eeaf62ea..da56265ab4 100644 --- a/data/xml/2018.clib.xml +++ b/data/xml/2018.clib.xml @@ -17,7 +17,7 @@ With a little help from <fixed-case>NLP</fixed-case>: My Language Technology applications with impact on society - RuslanMitkov + RuslanMitkov 1–4 The keynote speech presents the speaker’s vision that research should lead to the development of applications which benefit society. To support this, the speaker will present three original methodologies proposed by him which underpin applications jointly implemented with colleagues from across his research group. These Language Technology tools already have a substantial societal impact in the following areas: learning and assessment, translation and care for people with language disabilities. 2018.clib-1.1 @@ -44,7 +44,7 @@ Abstractive Text Summarization with Application to <fixed-case>B</fixed-case>ulgarian News Articles NikolaTaushanov IvanKoychev - PreslavNakov + PreslavNakov 15–22 With the development of the Internet, a huge amount of information is available every day. Therefore, text summarization has become critical part of our first access to the information. There are two major approaches for automatic text summarization: abstractive and extractive. In this work, we apply abstractive summarization algorithms on a corpus of Bulgarian news articles. In particular, we compare selected algorithms of both techniques and we show results which provide evidence that the selected state-of-the-art algorithms for abstractive text summarization perform better than the extractive ones for articles in Bulgarian. For the purpose of our experiments we collected a new dataset consisting of around 70,000 news articles and their topics. For research purposes we are also sharing the tools to easily collect and process such datasets. 2018.clib-1.4 @@ -69,7 +69,7 @@ Knowledge and Rule-Based Diacritic Restoration in <fixed-case>S</fixed-case>erbian CvetanaKrstev - RankaStanković + RankaStanković DuškoVitas 41–51 In this paper we present a procedure for the restoration of diacritics in Serbian texts written using the degraded Latin alphabet. The procedure relies on the comprehensive lexical resources for Serbian: the morphological electronic dictionaries, the Corpus of Contemporary Serbian and local grammars. Dictionaries are used to identify possible candidates for the restoration, while the data obtained from SrpKor and local grammars assists in making a decision between several candidates in cases of ambiguity. The evaluation results reveal that, depending on the text, accuracy ranges from 95.03% to 99.36%, while the precision (average 98.93%) is always higher than the recall (average 94.94%). @@ -120,8 +120,8 @@ Ontologies for Natural Language Processing: Case of <fixed-case>R</fixed-case>ussian - NataliaLoukachevitch - BorisDobrov + NataliaLoukachevitch + BorisDobrov 93–103 The paper describes the RuThes family of Russian thesauri intended for natural language processing and information retrieval applications. RuThes-like thesauri include, besides RuThes, Sociopolitical thesaurus, Security Thesaurus, and Ontology on Natural Sciences and Technologies. The RuThes format is based on three approaches for developing computer resources: Princeton WordNet, information-retrieval thesauri, and formal ontologies. The published version of RuThes thesaurus (RuThes-lite 2.0) became a basis for semi-automatic generation of RuWordNet, WordNet-like thesaurus for Russian. Currently researchers can use both RuThes-lite or RuWordNet and compare them in applications. Other RuThes-like resources are being prepared to publication. 2018.clib-1.13 @@ -129,9 +129,9 @@ Resource-based <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et Augmentation and Enrichment - RankaStanković + RankaStanković MiljanaMladenović - IvanObradović + IvanObradović MarkoVitas CvetanaKrstev 104–114 @@ -152,7 +152,7 @@ A Pilot Study for Enriching the <fixed-case>R</fixed-case>omanian <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et with Medical Terms MariaMitrofan - VerginicaBarbu Mititelu + VerginicaBarbu Mititelu GrigorinaMitrofan 126–134 This paper presents the preliminary investigations in the process of integrating a specialized vocabulary, namely medical terminology, into the Romanian wordnet. We focus here on four classes from this vocabulary: anatomy (or body parts), disorders, medical procedures and chemicals. In this pilot study we selected two large concepts from each class and created the Romanian terminological (sub)trees for each of them, starting from a medical thesaurus (SNOMED CT) and translating the terms, process which raised various challenges, all of them asking for the expertise of a specialist in the health care domain. The integration of these (sub)trees in the Romanian wordnet also required careful decision making, given the structural differences between a wordnet and a terminological thesaurus. They are presented and discussed herein. diff --git a/data/xml/2018.eamt.xml b/data/xml/2018.eamt.xml index ab25132105..baac2f4752 100644 --- a/data/xml/2018.eamt.xml +++ b/data/xml/2018.eamt.xml @@ -8,12 +8,12 @@ 2018 Juan AntonioPérez-Ortiz FelipeSánchez-Martínez - MiquelEsplà-Gomis - MajaPopović + MiquelEsplà-Gomis + MajaPopović CeliaRico - AndréMartins + AndréMartins JoachimVan den Bogaert - Mikel L.Forcada + Mikel L.Forcada 2018.eamt-main eamt @@ -26,7 +26,7 @@ Contextual Handling in Neural Machine Translation: Look behind, ahead and on both sides RuchitAgrawal MarcoTurchi - MatteoNegri + MatteoNegri 31-40 2018.eamt-main.1 A salient feature of Neural Machine Translation (NMT) is the end-to-end nature of training employed, eschewing the need of separate components to model different linguistic phenomena. Rather, an NMT model learns to translate individual sentences from the labeled data itself. However, traditional NMT methods trained on large parallel corpora with a one-to-one sentence mapping make an implicit assumption of sentence independence. This makes it challenging for current NMT systems to model inter-sentential discourse phenomena. While recent research in this direction mainly leverages a single previous source sentence to model discourse, this paper proposes the incorporation of a context window spanning previous as well as next sentences as source-side context and previously generated output as target-side context, using an effective non-recurrent architecture based on self-attention. Experiments show improvement over non-contextual models as well as contextual methods using only previous context. @@ -44,7 +44,7 @@ Compositional Source Word Representations for Neural Machine Translation DuyguAtaman - Mattia AntoninoDi Gangi + Mattia AntoninoDi Gangi MarcelloFederico 51-60 2018.eamt-main.3 @@ -64,8 +64,8 @@ SevilayBayatli SeferKurnaz IlnarSalimzyanov - JonathanWashington - Francis M.Tyers + JonathanWashington + Francis M.Tyers 69-79 2018.eamt-main.5 This paper presents a shallow-transfer machine translation (MT) system for translating from Kazakh to Turkish. Background on the differences between the languages is presented, followed by how the system was designed to handle some of these differences. The system is based on the Apertium free/open-source machine translation platform. The structure of the system and how it works is described, along with an evaluation against two competing systems. Linguistic components were developed, including a Kazakh-Turkish bilingual dictionary, Constraint Grammar disambiguation rules, lexical selection rules, and structural transfer rules. With many known issues yet to be addressed, our RBMT system has reached performance comparable to publicly-available corpus-based MT systems between the languages. @@ -102,8 +102,8 @@ Are Automatic Metrics Robust and Reliable in Specific Machine Translation Tasks? MaraChinea-Rios - AlvaroPeris - FranciscoCasacuberta + AlvaroPeris + FranciscoCasacuberta 109-118 2018.eamt-main.9 We present a comparison of automatic metrics against human evaluations of translation quality in several scenarios which were unexplored up to now. Our experimentation was conducted on translation hypotheses that were problematic for the automatic metrics, as the results greatly diverged from one metric to another. We also compared three different translation technologies. Our evaluation shows that in most cases, the metrics capture the human criteria. However, we face failures of the automatic metrics when applied to some domains and systems. Interestingly, we find that automatic metrics applied to the neural machine translation hypotheses provide the most reliable results. Finally, we provide some advice when dealing with these problematic domains. @@ -112,8 +112,8 @@ Creating the best development corpus for Statistical Machine Translation systems MaraChinea-Rios - GermánSanchis-Trilles - FranciscoCasacuberta + GermánSanchis-Trilles + FranciscoCasacuberta 119-128 2018.eamt-main.10 We propose and study three different novel approaches for tackling the problem of development set selection in Statistical Machine Translation. We focus on a scenario where a machine translation system is leveraged for translating a specific test set, without further data from the domain at hand. Such test set stems from a real application of machine translation, where the texts of a specific e-commerce were to be translated. For developing our development-set selection techniques, we first conducted experiments in a controlled scenario, where labelled data from different domains was available, and evaluated the techniques both with classification and translation quality metrics. Then, the bestperforming techniques were evaluated on the e-commerce data at hand, yielding consistent improvements across two language directions. @@ -121,8 +121,8 @@ Training Deployable General Domain <fixed-case>MT</fixed-case> for a Low Resource Language Pair: <fixed-case>E</fixed-case>nglish-<fixed-case>B</fixed-case>angla - SandipanDandapat - WilliamLewis + SandipanDandapat + WilliamLewis 129-138 2018.eamt-main.11 A large percentage of the world’s population speaks a language of the Indian subcontinent, what we will call here Indic languages, comprising languages from both Indo-European (e.g., Hindi, Bangla, Gujarati, etc.) and Dravidian (e.g., Tamil, Telugu, Malayalam, etc.) families, upwards of 1.5 Billion people. A universal characteristic of Indic languages is their complex morphology, which, when combined with the general lack of sufficient quantities of high quality parallel data, can make developing machine translation (MT) for these languages difficult. In this paper, we describe our efforts towards developing general domain English–Bangla MT systems which are deployable to the Web. We initially developed and deployed SMT-based systems, but over time migrated to NMT-based systems. Our initial SMT-based systems had reasonably good BLEU scores, however, using NMT systems, we have gained significant improvement over SMT baselines. This is achieved using a number of ideas to boost the data store and counter data sparsity: crowd translation of intelligently selected monolingual data (throughput enhanced by an IME (Input Method Editor) designed specifically for QWERTY keyboard entry for Devanagari scripted languages), back-translation, different regularization techniques, dataset augmentation and early stopping. @@ -130,7 +130,7 @@ Deep Neural Machine Translation with Weakly-Recurrent Units - Mattia A.Di Gangi + Mattia A.Di Gangi MarcelloFederico 139-148 2018.eamt-main.12 @@ -140,7 +140,7 @@ Spelling Normalization of Historical Documents by Using a Machine Translation Approach MiguelDomingo - FranciscoCasacuberta + FranciscoCasacuberta 149-158 2018.eamt-main.13 The lack of a spelling convention in historical documents makes their orthography to change depending on the author and the time period in which each document was written. This represents a problem for the preservation of the cultural heritage, which strives to create a digital text version of a historical document. With the aim of solving this problem, we propose three approaches—based on statistical, neural and character-based machine translation—to adapt the document’s spelling to modern standards. We tested these approaches in different scenarios, obtaining very encouraging results. @@ -149,14 +149,14 @@ Neural Machine Translation of <fixed-case>B</fixed-case>asque ThierryEtchegoyhen - EvaMartínez Garcia + EvaMartínez Garcia AndoniAzpeitia - GorkaLabaka - IñakiAlegria + GorkaLabaka + IñakiAlegria ItziarCortes Etxabe AmaiaJauregi Carrera IgorEllakuria Santos - MaiteMartin + MaiteMartin EusebiCalonge 159-168 2018.eamt-main.14 @@ -165,9 +165,9 @@ Evaluation of Terminology Translation in Instance-Based Neural <fixed-case>MT</fixed-case> Adaptation - M. AminFarajian + M. AminFarajian NicolaBertoldi - MatteoNegri + MatteoNegri MarcoTurchi MarcelloFederico 169-178 @@ -227,8 +227,8 @@ Letting a Neural Network Decide Which Machine Translation System to Use for Black-Box Fuzzy-Match Repair John E.Ortega - WeiyiLu - AdamMeyers + WeiyiLu + AdamMeyers KyunghyunCho 229-238 2018.eamt-main.21 @@ -238,8 +238,8 @@ Data selection for <fixed-case>NMT</fixed-case> using Infrequent n-gram Recovery ZuzannaParcheta - GermánSanchis-Trilles - FranciscoCasacuberta + GermánSanchis-Trilles + FranciscoCasacuberta 239-248 2018.eamt-main.22 Neural Machine Translation (NMT) has achieved promising results comparable with Phrase-Based Statistical Machine Translation (PBSMT). However, to train a neural translation engine, much more powerful machines are required than those required to develop translation engines based on PBSMT. One solution to reduce the training cost of NMT systems is the reduction of the training corpus through data selection (DS) techniques. There are many DS techniques applied in PBSMT which bring good results. In this work, we show that the data selection technique based on infrequent n-gram occurrence described in (Gasco ́ et al., 2012) commonly used for PBSMT systems also works well for NMT systems. We focus our work on selecting data according to specific corpora using the previously mentioned technique. The specific-domain corpora used for our experiments are IT domain and medical domain. The DS technique significantly reduces the execution time required to train the model between 87% and 93%. Also, it improves translation quality by up to 2.8 BLEU points. The improvements are obtained with just a small fraction of the data that accounts for between 6% and 20% of the total data. @@ -248,7 +248,7 @@ Translating Short Segments with <fixed-case>NMT</fixed-case>: A Case Study in <fixed-case>E</fixed-case>nglish-to-<fixed-case>H</fixed-case>indi ShantipriyaParida - OndřejBojar + OndřejBojar 249-258 2018.eamt-main.23 This paper presents a case study in translating short image captions of the Visual Genome dataset from English into Hindi using out-of-domain data sets of varying size. We experiment with three NMT models: the shallow and deep sequence-tosequence and the Transformer model as implemented in Marian toolkit. Phrase-based Moses serves as the baseline. The results indicate that the Transformer model outperforms others in the large data setting in a number of automatic metrics and manual evaluation, and it also produces the fewest truncated sentences. Transformer training is however very sensitive to the hyperparameters, so it requires more experimenting. The deep sequence-to-sequence model produced more flawless outputs in the small data setting and it was generally more stable, at the cost of more training iterations. @@ -298,7 +298,7 @@ Integrating <fixed-case>MT</fixed-case> at <fixed-case>S</fixed-case>wiss Post’s Language Service: preliminary results - PierretteBouillon + PierretteBouillon SabrinaGirletti PaulaEstrella JonathanMutal @@ -311,7 +311,7 @@ Iterative Data Augmentation for Neural Machine Translation: a Low Resource Case Study for <fixed-case>E</fixed-case>nglish-<fixed-case>T</fixed-case>elugu - SandipanDandapat + SandipanDandapat ChristianFedermann 307-312 2018.eamt-main.29 @@ -329,7 +329,7 @@ Implementing a neural machine translation engine for mobile devices: the Lingvanex use case ZuzannaParcheta - GermánSanchis-Trilles + GermánSanchis-Trilles AliakseiRudak SiarheiBratchenia 317-322 @@ -340,7 +340,7 @@ Bootstrapping Multilingual Intent Models via Machine Translation for Dialog Automation NicholasRuiz - SrinivasBangalore + SrinivasBangalore JohnChen 323-328 2018.eamt-main.32 @@ -472,7 +472,7 @@ Developing a New <fixed-case>S</fixed-case>wiss Research Centre for Barrier-Free Communication - PierretteBouillon + PierretteBouillon SilviaRodríguez Vázquez IreneStrasly 367 @@ -483,10 +483,10 @@ Massively multilingual accessible audioguides via cell phones ItziarCortes - IgorLeturia + IgorLeturia IńakiAlegria AitzolAstigarraga - KepaSarasola + KepaSarasola ManexGaraio 369 2018.eamt-main.48 @@ -498,7 +498,7 @@ ThierryEtchegoyhen BorjaAnza Porras AndoniAzpeitia - EvaMartínez Garcia + EvaMartínez Garcia PauloVale José LuisFonseca TeresaLynn @@ -506,13 +506,13 @@ FedericoGaspari AndyWay VictoriaArranz - KhalidChoukri + KhalidChoukri VladimirPopescu PedroNeiva RuiNeto MaiteMelero DavidPerez Fernandez - AntonioBranco + AntonioBranco RubenBranco LuisGomes 371 @@ -524,7 +524,7 @@ The <fixed-case>SUMMA</fixed-case> Platform: Scalable Understanding of Multilingual Media UlrichGermann Peggyvan der Kreeft - GuntisBarzdins + GuntisBarzdins AlexandraBirch 373 2018.eamt-main.50 @@ -597,12 +597,12 @@ TomVanallemeersch BramBulté LiesbethAugustinus - FrankVan Eynde + FrankVan Eynde JorisPelemans LyanVerwimp PatrickWambacq GeertHeyman - Marie-FrancineMoens + Marie-FrancineMoens Iuliannavan der Lek-Ciudin FriedaSteurs AylaRigouts Terryn diff --git a/data/xml/2018.gwc.xml b/data/xml/2018.gwc.xml index cc992eb283..14ab7c4a81 100644 --- a/data/xml/2018.gwc.xml +++ b/data/xml/2018.gwc.xml @@ -33,7 +33,7 @@ AnupamMondal DipankarDas ErikCambria - SivajiBandyopadhyay + SivajiBandyopadhyay 10–16 Information extraction in the medical domain is laborious and time-consuming due to the insufficient number of domain-specific lexicons and lack of involvement of domain experts such as doctors and medical practitioners. Thus, in the present work, we are motivated to design a new lexicon, WME 3.0 (WordNet of Medical Events), which contains over 10,000 medical concepts along with their part of speech, gloss (descriptive explanations), polarity score, sentiment, similar sentiment words, category, affinity score and gravity score features. In addition, the manual annotators help to validate the overall as well as individual category level of medical concepts of WME 3.0 using Cohen’s Kappa agreement metric. The agreement score indicates almost correct identification of medical concepts and their assigned features in WME 3.0. 2018.gwc-1.2 @@ -54,7 +54,7 @@ Towards Cross-checking <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et and <fixed-case>SUMO</fixed-case> Using Meronymy JavierÁlvez - GermanRigau + GermanRigau 25–33 We describe the practical application of a black-box testing methodology for the validation of the knowledge encoded in WordNet, SUMO and their mapping by using automated theorem provers. In this paper,weconcentrateonthepart-whole information provided by WordNet and create a large set of tests on the basis of few question patterns. From our preliminary evaluation results, we report on some of the detected inconsistencies. 2018.gwc-1.4 @@ -62,9 +62,9 @@ Comparing Two Thesaurus Representations for <fixed-case>R</fixed-case>ussian - NataliaLoukachevitch + NataliaLoukachevitch GermanLashevich - BorisDobrov + BorisDobrov 34–43 In the paper we presented a new Russian wordnet, RuWordNet, which was semi-automatically obtained by transformation of the existing Russian thesaurus RuThes. At the first step, the basic structure of wordnets was reproduced: synsets’ hierarchy for each part of speech and the basic set of relations between synsets (hyponym-hypernym, part-whole, antonyms). At the second stage, we added causation, entailment and domain relations between synsets. Also derivation relations were established for single words and the component structure for phrases included in RuWordNet. The described procedure of transformation highlights the specific features of each type of thesaurus representations. 2018.gwc-1.5 @@ -89,7 +89,7 @@ Mapping <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et Instances to <fixed-case>W</fixed-case>ikipedia - John P.McCrae + John P.McCrae 61–68 Lexical resource differ from encyclopaedic resources and represent two distinct types of resource covering general language and named entities respectively. However, many lexical resources, including Princeton WordNet, contain many proper nouns, referring to named entities in the world yet it is not possible or desirable for a lexical resource to cover all named entities that may reasonably occur in a text. In this paper, we propose that instead of including synsets for instance concepts PWN should instead provide links to Wikipedia articles describing the concept. In order to enable this we have created a gold-quality mapping between all of the 7,742 instances in PWN and Wikipedia (where such a mapping is possible). As such, this resource aims to provide a gold standard for link discovery, while also allowing PWN to distinguish itself from other resources such as DBpedia or BabelNet. Moreover, this linking connects PWN to the Linguistic Linked Open Data cloud, thus creating a richer, more usable resource for natural language processing. 2018.gwc-1.8 @@ -109,8 +109,8 @@ Improving Wordnets for Under-Resourced Languages Using Machine Translation Bharathi RajaChakravarthi - MihaelArcan - John P.McCrae + MihaelArcan + John P.McCrae 77–86 Wordnets are extensively used in natural language processing, but the current approaches for manually building a wordnet from scratch involves large research groups for a long period of time, which are typically not available for under-resourced languages. Even if wordnet-like resources are available for under-resourced languages, they are often not easily accessible, which can alter the results of applications using these resources. Our proposed method presents an expand approach for improving and generating wordnets with the help of machine translation. We apply our methods to improve and extend wordnets for the Dravidian languages, i.e., Tamil, Telugu, Kannada, which are severly under-resourced languages. We report evaluation results of the generated wordnet senses in term of precision for these languages. In addition to that, we carried out a manual evaluation of the translations for the Tamil language, where we demonstrate that our approach can aid in improving wordnet resources for under-resourced Dravidian languages. 2018.gwc-1.10 @@ -196,7 +196,7 @@ The Company They Keep: Extracting <fixed-case>J</fixed-case>apanese Neologisms Using Language Patterns JamesBreen - TimothyBaldwin + TimothyBaldwin FrancisBond 163–171 We describe an investigation into the identification and extraction of unrecorded potential lexical items in Japanese text by detecting text passages containing selected language patterns typically associated with such items. We identified a set of suitable patterns, then tested them with two large collections of text drawn from the WWW and Twitter. Samples of the extracted items were evaluated, and it was demonstrated that the approach has considerable potential for identifying terms for later lexicographic analysis. @@ -216,7 +216,7 @@ Towards a principled approach to sense clustering – a case study of wordnet and dictionary senses in <fixed-case>D</fixed-case>anish - BolettePedersen + BolettePedersen ManexAgirrezabal SanniNimb IdaOlsen @@ -232,7 +232,7 @@ AgnieszkaDziob MaciejPiasecki ChakavehSaedi - AntónioBranco + AntónioBranco 190–199 The paper presents a new re-built and expanded, version 2.0 of WordnetLoom – an open wordnet editor. It facilitates work on a multilingual system of wordnets, is based on efficient software architecture of thin client, and offers more flexibility in enriching wordnet representation. This new version is built on the experience collected during the use of the previous one for more than 10 years of plWordNet development. We discuss its extensions motivated by the collected experience. A special focus is given to the development of a variant for the needs of MultiWordnet of Portuguese, which is based on a very different wordnet development model. 2018.gwc-1.22 @@ -240,7 +240,7 @@ Translation Equivalence and Synonymy: Preserving the Synsets in Cross-lingual Wordnets - Oi YeeKwong + Oi YeeKwong 200–208 The Princeton WordNet for English was founded on the synonymy relation, and multilingual wordnets are primarily developed by creating equivalent synsets in the respective languages. The process would often rely on translation equivalents obtained from existing bilingual dictionaries. This paper discusses some observations from the Chinese Open Wordnet, especially from the adjective subnet, to illuminate potential blind spots of the approach which may lead to the formation of non-synsets in the new wordnet. With cross-linguistic differences duly taken into account, alternative representations of cross-lingual lexical relations are proposed to better capture the language-specific properties. It is also suggested that such cross-lingual representation encompassing the cognitive as well as linguistic aspects of meaning is beneficial for a lexical resource to be used by both humans and computers. 2018.gwc-1.23 @@ -284,8 +284,8 @@ Distant Supervision for Relation Extraction with Multi-sense Word Embedding SanghaNam KijongHan - Eun-KyungKim - Key-SunChoi + Eun-KyungKim + Key-SunChoi 239–244 Distant supervision can automatically generate labeled data between a large-scale corpus and a knowledge base without utilizing human efforts. Therefore, many studies have used the distant supervision approach in relation extraction tasks. However, existing studies have a disadvantage in that they do not reflect the homograph in the word embedding used as an input of the relation extraction model. Thus, it can be seen that the relation extraction model learns without grasping the meaning of the word accurately. In this paper, we propose a relation extraction model with multi-sense word embedding. We learn multi-sense word embedding using a word sense disambiguation module. In addition, we use convolutional neural network and piecewise max pooling convolutional neural network relation extraction models that efficiently grasp key features in sentences. To evaluate the performance of the proposed model, two additional methods of word embedding were learned and compared. Accordingly, our method showed the highest performance among them. 2018.gwc-1.27 @@ -296,7 +296,7 @@ RahmadMahendra HeninggarSeptiantri Haryo AkbariantoWibowo - RuliManurung + RuliManurung MirnaAdriani 245–250 Ambiguity is a problem we frequently face in Natural Language Processing. Word Sense Disambiguation (WSD) is a task to determine the correct sense of an ambiguous word. However, research in WSD for Indonesian is still rare to find. The availability of English-Indonesian parallel corpora and WordNet for both languages can be used as training data for WSD by applying Cross-Lingual WSD method. This training data is used as an input to build a model using supervised machine learning algorithms. Our research also examines the use of Word Embedding features to build the WSD model. @@ -316,7 +316,7 @@ Simple Embedding-Based Word Sense Disambiguation DiekeOele - Gertjanvan Noord + Gertjanvan Noord 259–265 We present a simple knowledge-based WSD method that uses word and sense embeddings to compute the similarity between the gloss of a sense and the context of the word. Our method is inspired by the Lesk algorithm as it exploits both the context of the words and the definitions of the senses. It only requires large unlabeled corpora and a sense inventory such as WordNet, and therefore does not rely on annotated data. We explore whether additional extensions to Lesk are compatible with our method. The results of our experiments show that by lexically extending the amount of words in the gloss and context, although it works well for other implementations of Lesk, harms our method. Using a lexical selection method on the context words, on the other hand, improves it. The combination of our method with lexical selection enables our method to outperform state-of the art knowledge-based systems. 2018.gwc-1.30 @@ -326,7 +326,7 @@ Semi-automatic <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et Linking using Word Embeddings KevinPatel DipteshKanojia - PushpakBhattacharyya + PushpakBhattacharyya 266–271 Wordnets are rich lexico-semantic resources. Linked wordnets are extensions of wordnets, which link similar concepts in wordnets of different languages. Such resources are extremely useful in many Natural Language Processing (NLP) applications, primarily those based on knowledge-based approaches. In such approaches, these resources are considered as gold standard/oracle. Thus, it is crucial that these resources hold correct information. Thereby, they are created by human experts. However, manual maintenance of such resources is a tedious and costly affair. Thus techniques that can aid the experts are desirable. In this paper, we propose an approach to link wordnets. Given a synset of the source language, the approach returns a ranked list of potential candidate synsets in the target language from which the human expert can choose the correct one(s). Our technique is able to retrieve a winner synset in the top 10 ranked list for 60% of all synsets and 70% of noun synsets. 2018.gwc-1.31 @@ -343,7 +343,7 @@ Grammatical Role Embeddings for Enhancements of Relation Density in the <fixed-case>P</fixed-case>rinceton <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et - KirilSimov + KirilSimov AlexanderPopov IlianaSimova PetyaOsenova @@ -355,7 +355,7 @@ An Iterative Approach for Unsupervised Most Frequent Sense Detection using <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et and Word Embeddings KevinPatel - PushpakBhattacharyya + PushpakBhattacharyya 293–297 Given a word, what is the most frequent sense in which it occurs in a given corpus? Most Frequent Sense (MFS) is a strong baseline for unsupervised word sense disambiguation. If we have large amounts of sense-annotated corpora, MFS can be trivially created. However, sense-annotated corpora are a rarity. In this paper, we propose a method which can compute MFS from raw corpora. Our approach iteratively exploits the semantic congruity among related words in corpus. Our method performs better compared to another similar work. 2018.gwc-1.34 @@ -365,7 +365,7 @@ Automatic Identification of Basic-Level Categories ChadMills FrancisBond - Gina-AnneLevow + Gina-AnneLevow 298–305 Basic-level categories have been shown to be both psychologically significant and useful in a wide range of practical applications. We build a rule-based system to identify basic-level categories in WordNet, achieving 77% accuracy on a test set derived from prior psychological experiments. With additional annotations we found our system also has low precision, in part due to the existence of many categories that do not fit into the three classes (superordinate, basic-level, and subordinate) relied on in basic-level category research. 2018.gwc-1.35 @@ -373,7 +373,7 @@ <fixed-case>A</fixed-case>frican <fixed-case>W</fixed-case>ordnet: facilitating language learning in <fixed-case>A</fixed-case>frican languages - SonjaBosch + SonjaBosch MarissaGriesel 306–313 The development of the African Wordnet (AWN) has reached a stage of maturity where the first steps towards an application can be attempted. The AWN is based on the expand method, and to compensate for the general resource scarceness of the African languages, various development strategies were used. The aim of this paper is to investigate the usefulness of the current isiZulu Wordnet in an application such as language learning. The advantage of incorporating the wordnet of a language into a language learning system is that it provides learners with an integrated application to enhance their learning experience by means of the unique sense identification features of wordnets. In this paper it will be demonstrated by means of a variety of examples within the context of a basic free online course how the isiZulu Wordnet can offer the language learner improved decision support. @@ -389,8 +389,8 @@ LaxmiKashyap DipteshKanojia PreethiJyothi - MalharKulkarni - PushpakBhattacharyya + MalharKulkarni + PushpakBhattacharyya 314–323 This paper reports the work related to making Hindi Wordnet1 available as a digital resource for language learning and teaching, and the experiences and lessons that were learnt during the process. The language data of the Hindi Wordnet has been suitably modified and enhanced to make it into a language learning aid. This aid is based on modern pedagogical axioms and is aligned to the learning objectives of the syllabi of the school education in India. To make it into a comprehensive language tool, grammatical information has also been encoded, as far as these can be marked on the lexical items. The delivery of information is multi-layered, multi-sensory and is available across multiple digital platforms. The front end has been designed to offer an eye-catching user-friendly interface which is suitable for learners starting from age six onward. Preliminary testing of the tool has been done and it has been modified as per the feedbacks that were received. Above all, the entire exercise has offered gainful insights into learning based on associative networks and how knowledge based on such networks can be made available to modern learners. 2018.gwc-1.37 @@ -418,8 +418,8 @@ <fixed-case>ELEXIS</fixed-case> - a <fixed-case>E</fixed-case>uropean infrastructure fostering cooperation and information exchange among lexicographical research communities - BolettePedersen - JohnMcCrae + BolettePedersen + JohnMcCrae CaroleTiberius SimonKrek 335–340 @@ -494,7 +494,7 @@ pyiwn: A Python based <fixed-case>API</fixed-case> to access <fixed-case>I</fixed-case>ndian Language <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>ets RiteshPanjwani DipteshKanojia - PushpakBhattacharyya + PushpakBhattacharyya 378–383 Indian language WordNets have their individual web-based browsing interfaces along with a common interface for IndoWordNet. These interfaces prove to be useful for language learners and in an educational domain, however, they do not provide the functionality of connecting to them and browsing their data through a lucid application programming interface or an API. In this paper, we present our work on creating such an easy-to-use framework which is bundled with the data for Indian language WordNets and provides NLTK WordNet interface like core functionalities in Python. Additionally, we use a pre-built speech synthesis system for Hindi language and augment Hindi data with audios for words, glosses, and example sentences. We provide a detailed usage of our API and explain the functions for ease of the user. Also, we package the IndoWordNet data along with the source code and provide it openly for the purpose of research. We aim to provide all our work as an open source framework for further development. 2018.gwc-1.47 @@ -502,8 +502,8 @@ Sinitic <fixed-case>W</fixed-case>ordnet: Laying the Groundwork with <fixed-case>C</fixed-case>hinese Varieties Written in Traditional Characters - Chih-YaoLee - Shu-KaiHsieh + Chih-YaoLee + Shu-KaiHsieh 384–387 The present work seeks to make the logographic nature of Chinese script a relevant research ground in wordnet studies. While wordnets are not so much about words as about the concepts represented in words, synset formation inevitably involves the use of orthographic and/or phonetic representations to serve as headword for a given concept. For wordnets of Chinese languages, if their synsets are mapped with each other, the connection from logographic forms to lexicalized concepts can be explored backwards to, for instance, help trace the development of cognates in different varieties of Chinese. The Sinitic Wordnet project is an attempt to construct such an integrated wordnet that aggregates three Chinese varieties that are widely spoken in Taiwan and all written in traditional Chinese characters. 2018.gwc-1.48 @@ -513,7 +513,7 @@ Synthesizing Audio for <fixed-case>H</fixed-case>indi <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et DipteshKanojia PreethiJyothi - PushpakBhattacharyya + PushpakBhattacharyya 388–393 In this paper, we describe our work on the creation of a voice model using a speech synthesis system for the Hindi Language. We use pre-existing “voices”, use publicly available speech corpora to create a “voice” using the Festival Speech Synthesis System (Black, 1997). Our contribution is two-fold: (1) We scrutinize multiple speech synthesis systems and provide an extensive report on the currently available state-of-the-art systems. We also develop voices using the existing implementations of the aforementioned systems, and (2) We use these voices to generate sample audios for randomly chosen words; manually evaluate the audio generated, and produce audio for all WordNet words using the winner voice model. We also produce audios for the Hindi WordNet Glosses and Example sentences. We describe our efforts to use pre-existing implementations for WaveNet - a model to generate raw audio using neural nets (Oord et al., 2016) and generate speech for Hindi. Our lexicographers perform a manual evaluation of the audio generated using multiple voices. A qualitative and quantitative analysis reveals that the voice model generated by us performs the best with an accuracy of 0.44. 2018.gwc-1.49 @@ -531,7 +531,7 @@ Towards a Crowd-Sourced <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et for Colloquial <fixed-case>E</fixed-case>nglish - John P.McCrae + John P.McCrae IanWood AmandaHicks 401–406 diff --git a/data/xml/2018.icon.xml b/data/xml/2018.icon.xml index a5d9d1f9ad..1a96919381 100644 --- a/data/xml/2018.icon.xml +++ b/data/xml/2018.icon.xml @@ -3,8 +3,8 @@ Proceedings of the 15th International Conference on Natural Language Processing - Gurpreet SinghLehal - Dipti MisraSharma + Gurpreet SinghLehal + Dipti MisraSharma RajeevSangal NLP Association of India
International Institute of Information Technology, Hyderabad, India
@@ -19,7 +19,7 @@ Abstractive Summarization Using Attentive Neural Techniques JacobKrantz - JugalKalita + JugalKalita 1–9 2018.icon-1.1 krantz-kalita-2018-abstractive @@ -47,7 +47,7 @@ PranawKumar BiraChandraSingh Prakash B.Pimpale - SasikumarM. + SasikumarM. 28–34 2018.icon-1.4 ghone-etal-2018-automatic @@ -74,8 +74,8 @@ SwapnilHingmire SachinPawar SangameshwarPatil - Girish K.Palshikar - PushpakBhattacharyya + Girish K.Palshikar + PushpakBhattacharyya VasudevaVerma 50–58 2018.icon-1.7 @@ -85,7 +85,7 @@ Deep Learning methods for Semantic Role Labeling in <fixed-case>I</fixed-case>ndian Languages AishwaryGupta AkshayPawale - ManishShrivastava + ManishShrivastava 59–68 2018.icon-1.8 gupta-etal-2018-deep-learning @@ -126,7 +126,7 @@ Does Curriculum Learning help Deep Learning for Natural Language Generation? SandhyaSingh KevinPatel - PushpakBhattacharya + PushpakBhattacharya KrishnanjanBhattacharjee HemantDarbari SeemaVerma @@ -138,7 +138,7 @@ <fixed-case>W</fixed-case>up<fixed-case>L</fixed-case>e<fixed-case>B</fixed-case>leu: The Word-net Based Evaluation Metric for Machine Translation DebajyotyBanik AsifEkbal - PushpakBhattacharyya + PushpakBhattacharyya 99–103 2018.icon-1.14 banik-etal-2018-wuplebleu @@ -146,7 +146,7 @@ “Is This A Joke?”: A Large Humor Classification Dataset FarazFaruqi - ManishShrivastava + ManishShrivastava 104–109 2018.icon-1.15 faruqi-shrivastava-2018-joke @@ -163,7 +163,7 @@ A Content-based Recommendation System for Medical Concepts: Disease and Symptom AnupamMondal DipankarDas - SivajiBandyopadhyay + SivajiBandyopadhyay 115–121 2018.icon-1.17 mondal-etal-2018-content @@ -173,7 +173,7 @@ ZishanAhmad SahooSovan Kumar AsifEkbal - PushpakBhattacharyya + PushpakBhattacharyya 122–131 2018.icon-1.18 ahmad-etal-2018-deep @@ -191,7 +191,7 @@ Improving Computer Generated Dialog with Auxiliary Loss Functions and Custom Evaluation Metrics ThomasConley JackSt. Clair - JugalKalita + JugalKalita 138–144 2018.icon-1.20 conley-etal-2018-improving @@ -230,10 +230,10 @@ <fixed-case>SMT</fixed-case> vs <fixed-case>NMT</fixed-case>: A Comparison over <fixed-case>H</fixed-case>indi and <fixed-case>B</fixed-case>engali Simple Sentences - Sainik KumarMahata - SoumilMandal + Sainik KumarMahata + SoumilMandal DipankarDas - SivajiBandyopadhyay + SivajiBandyopadhyay 175–182 2018.icon-1.25 mahata-etal-2018-smt @@ -241,9 +241,9 @@ Helping each Other: A Framework for Customer-to-Customer Suggestion Mining using a Semi-supervised Deep Neural Network HiteshGolchha - DeepakGupta + DeepakGupta AsifEkbal - PushpakBhattacharyya + PushpakBhattacharyya 183–192 2018.icon-1.26 golchha-etal-2018-helping diff --git a/data/xml/2018.ijclclp.xml b/data/xml/2018.ijclclp.xml index e7d567e349..1eed3a5a41 100644 --- a/data/xml/2018.ijclclp.xml +++ b/data/xml/2018.ijclclp.xml @@ -3,7 +3,7 @@ International Journal of Computational Linguistics & Chinese Language Processing, Volume 23, Number 1, June 2018 - Jen-TzungChien + Jen-TzungChien Chia-HuiChang Association for Computational Linguistics and Chinese Language Processing
Taipei, Taiwan
@@ -30,7 +30,7 @@ Yu-ShuoLiu Chin-PoChen Susan Shur-FenGau - Chi-ChunLee + Chi-ChunLee 2018.ijclclp-1.2 zho liu-etal-2018-chang @@ -38,7 +38,7 @@ 整合個人化磁振造影深度神經網路之演算法技術 (Joint Modeling of Individual Neural Responses using a Deep Voting Fusion Network for Automatic Emotion Perception Decoding) Wan-TingHsieh - Chi-ChunLee + Chi-ChunLee 2018.ijclclp-1.3 zho hsieh-lee-2018-zheng @@ -64,7 +64,7 @@ 使用長短期記憶類神經網路建構中文語音辨識器之研究 (A Study on <fixed-case>M</fixed-case>andarin Speech Recognition using Long Short- Term Memory Neural Network) Chien-hungLai - Yih-RuWang + Yih-RuWang 2018.ijclclp-2.1 zho lai-wang-2018-shi-yong @@ -108,7 +108,7 @@ 以深層類神經網路標記中文階層式多標籤語意概念 (Hierarchical Multi-Label <fixed-case>C</fixed-case>hinese Word Semantic Labeling using Deep Neural Network) Wei-ChiehChou - Yih-RuWang + Yih-RuWang 2018.ijclclp-2.6 zho chou-wang-2018-yi-shen diff --git a/data/xml/2018.iwslt.xml b/data/xml/2018.iwslt.xml index 68168ed2d5..e58feae793 100644 --- a/data/xml/2018.iwslt.xml +++ b/data/xml/2018.iwslt.xml @@ -21,7 +21,7 @@ The <fixed-case>IWSLT</fixed-case> 2018 Evaluation Campaign JanNiehues RolandoCattoni - SebastianStüker + SebastianStüker MauroCettolo MarcoTurchi MarcelloFederico @@ -35,7 +35,7 @@ ViktorHangya FabienneBraune YuliyaKalasouskaya - AlexanderFraser + AlexanderFraser 7-13 Mining parallel sentences from comparable corpora is of great interest for many downstream tasks. In the BUCC 2017 shared task, systems performed well by training on gold standard parallel sentences. However, we often want to mine parallel sentences without bilingual supervision. We present a simple approach relying on bilingual word embeddings trained in an unsupervised fashion. We incorporate orthographic similarity in order to handle words with similar surface forms. In addition, we propose a dynamic threshold method to decide if a candidate sentence-pair is parallel which eliminates the need to fine tune a static value for different datasets. Since we do not employ any language specific engineering our approach is highly generic. We show that our approach is effective, on three language-pairs, without the use of any bilingual signal which is important because parallel sentence mining is most useful in low resource scenarios. 2018.iwslt-1.2 @@ -55,7 +55,7 @@ Analyzing Knowledge Distillation in Neural Machine Translation DakunZhang - JosepCrego + JosepCrego JeanSenellart 23-30 Knowledge distillation has recently been successfully applied to neural machine translation. It allows for building shrunk networks while the resulting systems retain most of the quality of the original model. Despite the fact that many authors report on the benefits of knowledge distillation, few have discussed the actual reasons why it works, especially in the context of neural MT. In this paper, we conduct several experiments aimed at understanding why and how distillation impacts accuracy on an English-German translation task. We show that translation complexity is actually reduced when building a distilled/synthesised bi-text when compared to the reference bi-text. We further remove noisy data from synthesised translations and merge filtered synthesised data together with original reference, thus achieving additional gains in terms of accuracy. @@ -77,7 +77,7 @@ A Machine Translation Approach for Modernizing Historical Documents Using Backtranslation MiguelDomingo - FranciscoCasacuberta + FranciscoCasacuberta 39-47 Human language evolves with the passage of time. This makes historical documents to be hard to comprehend by contemporary people and, thus, limits their accessibility to scholars specialized in the time period in which a certain document was written. Modernization aims at breaking this language barrier and increase the accessibility of historical documents to a broader audience. To do so, it generates a new version of a historical document, written in the modern version of the document’s original language. In this work, we propose several machine translation approaches for modernizing historical documents. We tested these approaches in different scenarios, obtaining very encouraging results. 2018.iwslt-1.6 @@ -99,7 +99,7 @@ Transfer Learning in Multilingual Neural Machine Translation with Dynamic Vocabulary Surafel M.Lakew AliiaErofeeva - MatteoNegri + MatteoNegri MarcelloFederico MarcoTurchi 54-61 @@ -137,7 +137,7 @@ The <fixed-case>ADAPT</fixed-case> System Description for the <fixed-case>IWSLT</fixed-case> 2018 <fixed-case>B</fixed-case>asque to <fixed-case>E</fixed-case>nglish Translation Task AlbertoPoncelas AndyWay - KepaSarasola + KepaSarasola 76-82 In this paper we present the ADAPT system built for the Basque to English Low Resource MT Evaluation Campaign. Basque is a low-resourced, morphologically-rich language. This poses a challenge for Neural Machine Translation models which usually achieve better performance when trained with large sets of data. Accordingly, we used synthetic data to improve the translation quality produced by a model built using only authentic data. Our proposal uses back-translated data to: (a) create new sentences, so the system can be trained with more data; and (b) translate sentences that are close to the test set, so the model can be fine-tuned to the document to be translated. 2018.iwslt-1.11 @@ -154,7 +154,7 @@ The <fixed-case>M</fixed-case>e<fixed-case>MAD</fixed-case> Submission to the <fixed-case>IWSLT</fixed-case> 2018 Speech Translation Task UmutSulubacak - JörgTiedemann + JörgTiedemann AkuRouhe Stig-ArneGrönroos MikkoKurimo @@ -165,7 +165,7 @@ Prompsit’s Submission to the <fixed-case>IWSLT</fixed-case> 2018 Low Resource Machine Translation Task - Víctor M.Sánchez-Cartagena + Víctor M.Sánchez-Cartagena 95-103 This paper presents Prompsit Language Engineering’s submission to the IWSLT 2018 Low Resource Machine Translation task. Our submission is based on cross-lingual learning: a multilingual neural machine translation system was created with the sole purpose of improving translation quality on the Basque-to-English language pair. The multilingual system was trained on a combination of in-domain data, pseudo in-domain data obtained via cross-entropy data selection and backtranslated data. We morphologically segmented Basque text with a novel approach that only requires a dictionary such as those used by spell checkers and proved that this segmentation approach outperforms the widespread byte pair encoding strategy for this task. 2018.iwslt-1.14 @@ -233,13 +233,13 @@ <fixed-case>KIT</fixed-case>’s <fixed-case>IWSLT</fixed-case> 2018 <fixed-case>SLT</fixed-case> Translation System MatthiasSperber - Ngoc-QuanPham + Ngoc-QuanPham Thai-SonNguyen JanNiehues MarkusMüller Thanh-LeHa - SebastianStüker - AlexWaibel + SebastianStüker + AlexWaibel 131-135 This paper describes KIT’s submission to the IWSLT 2018 Translation task. We describe a system participating in the baseline condition and a system participating in the end-to-end condition. The baseline system is a cascade of an ASR system, a system to segment the ASR output and a neural machine translation system. We investigate the combination of different ASR systems. For the segmentation and machine translation components, we focused on transformer-based architectures. 2018.iwslt-1.19 @@ -251,7 +251,7 @@ HongjieChen KaiFan Cheung-ChiLeung - BoLi + BoLi ChongjiaNi RongTong PeiZhang @@ -266,8 +266,8 @@ <fixed-case>CUNI</fixed-case> <fixed-case>B</fixed-case>asque-to-<fixed-case>E</fixed-case>nglish Submission in <fixed-case>IWSLT</fixed-case>18 TomKocmi - DušanVariš - OndřejBojar + DušanVariš + OndřejBojar 142-146 We present our submission to the IWSLT18 Low Resource task focused on the translation from Basque-to-English. Our submission is based on the current state-of-the-art self-attentive neural network architecture, Transformer. We further improve this strong baseline by exploiting available monolingual data using the back-translation technique. We also present further improvements gained by a transfer learning, a technique that trains a model using a high-resource language pair (Czech-English) and then fine-tunes the model using the target low-resource language pair (Basque-English). 2018.iwslt-1.21 @@ -275,10 +275,10 @@ Fine-tuning on Clean Data for End-to-End Speech Translation: <fixed-case>FBK</fixed-case> @ <fixed-case>IWSLT</fixed-case> 2018 - Mattia AntoninoDi Gangi + Mattia AntoninoDi Gangi RobertoDessì RoldanoCattoni - MatteoNegri + MatteoNegri MarcoTurchi 147-152 This paper describes FBK’s submission to the end-to-end English-German speech translation task at IWSLT 2018. Our system relies on a state-of-the-art model based on LSTMs and CNNs, where the CNNs are used to reduce the temporal dimension of the audio input, which is in general much higher than machine translation input. Our model was trained only on the audio-to-text parallel data released for the task, and fine-tuned on cleaned subsets of the original training corpus. The addition of weight normalization and label smoothing improved the baseline system by 1.0 BLEU point on our validation set. The final submission also featured checkpoint averaging within a training run and ensemble decoding of models trained during multiple runs. On test data, our best single model obtained a BLEU score of 9.7, while the ensemble obtained a BLEU score of 10.24. @@ -310,7 +310,7 @@ Learning to Segment Inputs for <fixed-case>NMT</fixed-case> Favors Character-Level Processing JuliaKreutzer - ArtemSokolov + ArtemSokolov 166-172 Most modern neural machine translation (NMT) systems rely on presegmented inputs. Segmentation granularity importantly determines the input and output sequence lengths, hence the modeling depth, and source and target vocabularies, which in turn determine model size, computational costs of softmax normalization, and handling of out-of-vocabulary words. However, the current practice is to use static, heuristic-based segmentations that are fixed before NMT training. This begs the question whether the chosen segmentation is optimal for the translation task. To overcome suboptimal segmentation choices, we present an algorithm for dynamic segmentation, that is trainable end-to-end and driven by the NMT objective. In an evaluation on four translation tasks we found that, given the freedom to navigate between different segmentation levels, the model prefers to operate on (almost) character level, providing support for purely character-level NMT models from a novel angle. 2018.iwslt-1.25 diff --git a/data/xml/2018.jeptalnrecital.xml b/data/xml/2018.jeptalnrecital.xml index e4a89eb8d5..a44d6d415f 100644 --- a/data/xml/2018.jeptalnrecital.xml +++ b/data/xml/2018.jeptalnrecital.xml @@ -73,7 +73,7 @@ fra PierreMagistry Anne-LaureLigozat - SophieRosset + SophieRosset 75–86 Cet article présente une nouvelle méthode d’étiquetage en parties du discours adaptée aux langues peu dotées : la définition du contexte utilisé pour construire les plongements lexicaux est adaptée à la tâche, et de nouveaux vecteurs sont créés pour les mots inconnus. Les expériences menées sur le picard, le malgache et l’alsacien montrent que cette méthode améliore l’état de l’art pour ces trois langues peu dotées. 2018.jeptalnrecital-long.6 @@ -154,7 +154,7 @@ fra SébastienDelecraz LeonorBecerra-Bonache - BenoîtFavre + BenoîtFavre AlexisNasr FrédéricBechet 171–182 @@ -204,7 +204,7 @@ Détection automatique de phrases en domaine de spécialité en français (Sentence boundary detection for specialized domains in <fixed-case>F</fixed-case>rench ) fra ArthurBoyer - AurélieNévéol + AurélieNévéol 205–214 La détection de frontières de phrase est généralement considéré comme un problème résolu. Cependant, les outils performant sur des textes en domaine général, ne le sont pas forcement sur des domaines spécialisés, ce qui peut engendrer des dégradations de performance des outils intervenant en aval dans une chaîne de traitement automatique s’appuyant sur des textes découpés en phrases. Dans cet article, nous évaluons 5 outils de segmentation en phrase sur 3 corpus issus de différent domaines. Nous ré-entrainerons l’un de ces outils sur un corpus de spécialité pour étudier l’adaptation en domaine. Notamment, nous utilisons un nouveau corpus biomédical annoté spécifiquement pour cette tâche. La detection de frontières de phrase à l’aide d’un modèle OpenNLP entraîné sur un corpus clinique offre une F-mesure de .73, contre .66 pour la version standard de l’outil. 2018.jeptalnrecital-court.2 @@ -226,9 +226,9 @@ fra JeremyAuguste DelphineCharlet - GéraldineDamnati - BenoitFavre - FredericBechet + GéraldineDamnati + BenoitFavre + FredericBechet 225–232 Cet article présente des méthodes permettant l’évaluation de la satisfaction client à partir de très vastes corpus de conversation de type “chat” entre des clients et des opérateurs. Extraire des connaissances dans ce contexte demeure un défi pour les méthodes de traitement automatique des langues de par la dimension interactive et les propriétés de ce nouveau type de langage à l’intersection du langage écrit et parlé. Nous présentons une étude utilisant des réponses à des sondages utilisateurs comme supervision faible permettant de prédire la satisfaction des usagers d’un service en ligne d’assistance technique et commerciale. 2018.jeptalnrecital-court.4 @@ -238,8 +238,8 @@ Détection d’erreurs dans des transcriptions <fixed-case>OCR</fixed-case> de documents historiques par réseaux de neurones récurrents multi-niveau (Combining character level and word level <fixed-case>RNN</fixed-case>s for post-<fixed-case>OCR</fixed-case> error detection) fra ThibaultMagallon - FredericBechet - BenoitFavre + FredericBechet + BenoitFavre 233–240 Le traitement à posteriori de transcriptions OCR cherche à détecter les erreurs dans les sorties d’OCR pour tenter de les corriger, deux tâches évaluées par la compétition ICDAR-2017 Post-OCR Text Correction. Nous présenterons dans ce papier un système de détection d’erreurs basé sur un modèle à réseaux récurrents combinant une analyse du texte au niveau des mots et des caractères en deux temps. Ce système a été classé second dans trois catégories évaluées parmi 11 candidats lors de la compétition. 2018.jeptalnrecital-court.5 @@ -319,7 +319,7 @@ ElvysLinhares Pontes StéphaneHuet Andréa CarneiroLinhares - Juan-ManuelTorres-Moreno + Juan-ManuelTorres-Moreno 311–320 Semantic Textual Similarity (STS) is the basis of many applications in Natural Language Processing (NLP). Our system combines convolution and recurrent neural networks to measure the semantic similarity of sentences. It uses a convolution network to take account of the local context of words and an LSTM to consider the global context of sentences. This combination of networks helps to preserve the relevant information of sentences and improves the calculation of the similarity between sentences. Our model has achieved good results and is competitive with the best state-of-the-art systems. 2018.jeptalnrecital-court.13 @@ -341,7 +341,7 @@ fra MarwaHadj Salah LoïcVial - HervéBlanchon + HervéBlanchon MounirZrigui DidierSchwab 329–336 @@ -393,7 +393,7 @@ Annotation automatique des types de discours dans des livres audio en vue d’une oralisation par un système de synthèse (Automatic annotation of discourse types in audio-books) fra AghilasSini - ElisabethDelais-Roussarie + ElisabethDelais-Roussarie DamienLolive 375–382 Pour synthétiser automatiquement et de manière expressive des livres audio, il est nécessaire de connaître le type des discours à oraliser. Ceci étant, dans un roman ou une nouvelle, les perspectives narratives et les types de discours évoluent souvent entre de la narration, du récitatif, du discours direct, du discours rapporté, voire des dialogues. Dans ce travail, nous allons présenter un outil qui a été développé à partir de l’analyse d’un corpus de livres audio (extraits de Madame Bovary et des Mystères de Paris) et qui prend comme unité de base pour l’analyse le paragraphe. Cet outil permet donc non seulement de déterminer automatiquement les types de discours (narration, discours direct, dialogue), et donc de savoir qui parle, mais également d’annoter l’extension des modifications discursives. Ce dernier point est important, notamment dans le cas d’incises de citation où le narrateur reprend la parole dans une séquence au discours direct. Dans sa forme actuelle, l’outil atteint un taux de 89 % de bonne détection. @@ -403,11 +403,11 @@ Impact du Prétraitement Linguistique sur l’Analyse de Sentiment du Dialecte Tunisien () fra - ChediBechikh Ali + ChediBechikh Ali HalaMulki HatemHaddad 383–392 - + 2018.jeptalnrecital-court.21 bechikh-ali-etal-2018-impact @@ -415,7 +415,7 @@ Detecting context-dependent sentences in parallel corpora RachelBawden ThomasLavergne - SophieRosset + SophieRosset 393–400 In this article, we provide several approaches to the automatic identification of parallel sentences that require sentence-external linguistic context to be correctly translated. Our long-term goal is to automatically construct a test set of context-dependent sentences in order to evaluate machine translation models designed to improve the translation of contextual, discursive phenomena. We provide a discussion and critique that show that current approaches do not allow us to achieve our goal, and suggest that for now evaluating individual phenomena is likely the best solution. 2018.jeptalnrecital-court.22 @@ -425,7 +425,7 @@ Predicting failure of a mediated conversation in the context of asymetric role dialogues RomainCarbou DelphineCharlet - GéraldineDamnati + GéraldineDamnati FrédéricLandragin JeanLéon Bouraoui 401–408 @@ -473,12 +473,12 @@ Détection des couples de termes translittérés à partir d’un corpus parallèle anglais-arabe () fra WafaNeifar - ThierryHamon - PierreZweigenbaum - MariemEllouze + ThierryHamon + PierreZweigenbaum + MariemEllouze Lamia-HadrichBelguith 437–446 - + 2018.jeptalnrecital-court.27 neifar-etal-2018-detection @@ -548,7 +548,7 @@ A comparative study of word embeddings and other features for lexical complexity detection in <fixed-case>F</fixed-case>rench - AinaGarí Soler + AinaGarí Soler MariannaApidianaki AlexandreAllauzen 499–508 @@ -563,7 +563,7 @@ AzouaouFaical FodilBenali AlaEddine Hachani - HoudaSaadane + HoudaSaadane 509–518 Dans cet article, nous présentons une approche hybride pour la translitération de l’arabizi algérien. Nous avons élaboré un ensemble de règles permettant le passage de l’arabizi vers l’arabe. Á partir de ces règles nous générons un ensemble de candidats pour la translitération de chaque mot en arabizi vers l’arabe, et un parmi ces candidats sera ensuite identifié et extrait comme le meilleur candidat. Cette approche a été expérimentée en utilisant trois corpus de tests. Les résultats obtenus montrent une amélioration du score de précision qui était pour le meilleur des cas de l’ordre de 75,11%. Ces résultats ont aussi permis de vérifier que notre approche est très compétitive par rapport aux travaux traitant de la translitération de l’arabizi en général. 2018.jeptalnrecital-court.35 @@ -583,7 +583,7 @@ JulienPlu KevinCousot MathieuLafourcade - RaphaëlTroncy + RaphaëlTroncy GiuseppeRizzo 529–538 Entity linking systems typically rely on encyclopedic knowledge bases such as DBpedia or Freebase. In this paper, we use, instead, a French lexical-semantic network named JeuxDeMots to jointly type and link entities. Our approach combines word embeddings and a path-based similarity resulting in encouraging results over a set of documents from the French Le Monde newspaper. @@ -595,7 +595,7 @@ fra GaëlGuibon MagalieOchs - PatriceBellot + PatriceBellot 539–546 L’utilisation des emojis dans les messageries sociales n’a eu de cesse d’augmenter ces dernières années. Plusieurs travaux récents ont porté sur la prédiction d’emojis afin d’épargner à l’utillisateur le parcours de librairies d’emojis de plus en plus conséquentes. Nous proposons une méthode permettant de récupérer automatiquement les catégories d’emojis à partir de leur contexte d’utilisation afin d’améliorer la prédiction finale. Pour ce faire nous utilisons des plongements lexicaux en considérant les emojis comme des mots présents dans des tweets. Nous appliquons ensuite un regroupement automatique restreint aux emojis visages afin de vérifier l’adéquation des résultats avec la théorie d’Ekman. L’approche est reproductible et applicable sur tous types d’emojis, ou lorsqu’il est nécessaire de prédire de nombreuses classes. 2018.jeptalnrecital-court.38 @@ -665,7 +665,7 @@ Construction de patrons lexico-syntaxiques d’extraction pour l’acquisition de connaissances à partir du web (Relation pattern extraction and information extraction from the web) fra ChloéMonnin - OlivierHamon + OlivierHamon 3–16 Cet article présente une méthode permettant de collecter sur le web des informations complémentaires à une information prédéfinie, afin de remplir une base de connaissances. Notre méthode utilise des patrons lexico-syntaxiques, servant à la fois de requêtes de recherche et de patrons d’extraction permettant l’analyse de documents non structurés. Pour ce faire, il nous a fallu définir au préalable les critères pertinents issus des analyses dans l’objectif de faciliter la découverte de nouvelles valeurs. 2018.jeptalnrecital-recital.1 @@ -701,7 +701,7 @@ Résumé automatique guidé de textes: État de l’art et perspectives (Guided Summarization : State-of-the-art and perspectives ) fra SalimaLamsiyah - SaidOuatik El Alaoui + SaidOuatik El Alaoui BernardEspinasse 55–72 Les systèmes de résumé automatique de textes (SRAT) consistent à produire une représentation condensée et pertinente à partir d’un ou de plusieurs documents textuels. La majorité des SRAT sont basés sur des approches extractives. La tendance actuelle consiste à s’orienter vers les approches abstractives. Dans ce contexte, le résumé guidé défini par la campagne d’évaluation internationale TAC (Text Analysis Conference) en 2010, vise à encourager la recherche sur ce type d’approche, en se basant sur des techniques d’analyse en profondeur de textes. Dans ce papier, nous nous penchons sur le résumé automatique guidé de textes. Dans un premier temps, nous définissons les différentes caractéristiques et contraintes liées à cette tâche. Ensuite, nous dressons un état de l’art des principaux systèmes existants en mettant l’accent sur les travaux les plus récents, et en les classifiant selon les approches adoptées, les techniques utilisées, et leurs évaluations sur des corpus de références. Enfin, nous proposons les grandes étapes d’une méthode spécifique devant permettre le développement d’un nouveau type de systèmes de résumé guidé. @@ -867,7 +867,7 @@ Un corpus en arabe annoté manuellement avec des sens <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et (<fixed-case>A</fixed-case>rabic Manually Sense Annotated Corpus with <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et Senses) fra MarwaHadj Salah - HervéBlanchon + HervéBlanchon MounirZrigui DidierSchwab 213–216 @@ -894,16 +894,16 @@ <fixed-case>DEFT</fixed-case>2018 : recherche d’information et analyse de sentiments dans des tweets concernant les transports en <fixed-case>Î</fixed-case>le de <fixed-case>F</fixed-case>rance (<fixed-case>DEFT</fixed-case>2018 : Information Retrieval and Sentiment Analysis in Tweets about Public Transportation in <fixed-case>Î</fixed-case>le de <fixed-case>F</fixed-case>rance Region ) fra - PatrickParoubek + PatrickParoubek CyrilGrouin - PatriceBellot + PatriceBellot VincentClaveau - IrisEshkol-Taravella + IrisEshkol-Taravella AmelFraisse AgataJackiewicz JihenKaroui LauraMonceaux - Juan-ManuelTorres-Moreno + Juan-ManuelTorres-Moreno 219–230 Cet article présente l’édition 2018 de la campagne d’évaluation DEFT (Défi Fouille de Textes). A partir d’un corpus de tweets, quatre tâches ont été proposées : identifier les tweets sur la thématique des transports, puis parmi ces derniers, identifier la polarité (négatif, neutre, positif, mixte), identifier les marqueurs de sentiment et la cible, et enfin, annoter complètement chaque tweet en source et cible des sentiments exprimés. Douze équipes ont participé, majoritairement sur les deux premières tâches. Sur l’identification de la thématique des transports, la micro F-mesure varie de 0,827 à 0,908. Sur l’identification de la polarité globale, la micro F-mesure varie de 0,381 à 0,823. 2018.jeptalnrecital-deft.1 @@ -936,7 +936,7 @@ Modèles en Caractères pour la Détection de Polarité dans les Tweets (Character-level Models for Polarity Detection in Tweets ) fra DavideBuscaldi - JosephLe Roux + JosephLe Roux GaëlLejeune 249–258 Dans cet article, nous présentons notre contribution au Défi Fouille de Textes 2018 au travers de trois méthodes originales pour la classification thématique et la détection de polarité dans des tweets en français. Nous y avons ajouté un système de vote. Notre première méthode est fondée sur des lexiques (mots et emojis), les n-grammes de caractères et un classificateur à vaste marge (ou SVM). tandis que les deux autres sont des méthodes endogènes fondées sur l’extraction de caractéristiques au grain caractères : un modèle à mémoire à court-terme persistante (ou BiLSTM pour Bidirectionnal Long Short-Term Memory) et perceptron multi-couche d’une part et un modèle de séquences de caractères fermées fréquentes et classificateur SVM d’autre part. Le BiLSTM a produit de loin les meilleurs résultats puisqu’il a obtenu la première place sur la tâche 1, classification binaire de tweets selon qu’ils traitent ou non des transports, et la troisième place sur la tâche 2, classification de la polarité en 4 classes. Ce résultat est d’autant plus intéressant que la méthode proposée est faiblement paramétrique, totalement endogène et qu’elle n’implique aucun pré-traitement. @@ -947,7 +947,7 @@ Concaténation de réseaux de neurones pour la classification de tweets, <fixed-case>DEFT</fixed-case>2018 (Concatenation of neural networks for tweets classification, <fixed-case>DEFT</fixed-case>2018 ) fra DamienSileo - TimVan de Cruys + TimVan de Cruys PhilippeMuller CamillePradel 259–264 @@ -986,7 +986,7 @@ EmmanuelleDusserre RuslanKalitvianski MathieuRuhlmann - MuntsaPadró + MuntsaPadró 287–298 Cet article décrit les systèmes de l’équipe Eloquant pour la catégorisation de tweets en français dans les tâches 1 (détection de la thématique transports en commun) et 2 (détection de la polarité globale) du DEFT 2018. Nos systèmes reposent sur un enrichissement sémantique, l’apprentissage automatique et, pour la tâche 1 une approche symbolique. Nous avons effectué deux runs pour chacune des tâches. Nos meilleures F-mesures (0.897 pour la tâche 1 et 0.800 pour la tâche 2) sont au-dessus de la moyenne globale pour chaque tâche, et nous placent dans les 30% supérieurs de tous les runs pour la tâche 2. 2018.jeptalnrecital-deft.8 @@ -999,7 +999,7 @@ HugoLinsenmaier AlexandreMajed XavierCadet - AbdessalamBouchekif + AbdessalamBouchekif 299–310 Dans ce papier, nous décrivons les systèmes développés au LSE pour le DEFT 2018 sur les tâches 1 et 2 qui consistent à classifier des tweets. La première tâche consiste à déterminer si un message concerne les transports ou non. La deuxième, consiste à classifier les tweets selon leur polarité globale. Pour les deux tâches nous avons développé des systèmes basés sur des réseaux de neurones convolutifs (CNN) et récurrents (LSTM, BLSTM et GRU). Chaque mot d’un tweet donné est représenté par un vecteur dense appris à partir des données relativement proches de celles de la compétition. Le score final officiel est de 0.891 pour la tâche 1 et de 0.781 pour la tâche 2. 2018.jeptalnrecital-deft.9 @@ -1010,7 +1010,7 @@ fra ChloéMonnin OlivierQuerné - OlivierHamon + OlivierHamon 311–318 Nous présentons la participation de Syllabs à la tâche de classification de tweets dans le domaine du transport lors de DEFT 2018. Pour cette première participation à une campagne DEFT, nous avons choisi de tester plusieurs algorithmes de classification état de l’art. Après une étape de prétraitement commune à l’ensemble des algorithmes, nous effectuons un apprentissage sur le seul contenu des tweets. Les résultats étant somme toute assez proches, nous effectuons un vote majoritaire sur les trois algorithmes ayant obtenus les meilleurs résultats. 2018.jeptalnrecital-deft.10 diff --git a/data/xml/2018.tal.xml b/data/xml/2018.tal.xml index 378611fa34..0351c08b25 100644 --- a/data/xml/2018.tal.xml +++ b/data/xml/2018.tal.xml @@ -4,7 +4,7 @@ Traitement Automatique des Langues, Volume 59, Numéro 1 : Varia [Varia] EmmanuelMorin - SophieRosset + SophieRosset PascaleSébillot ATALA (Association pour le Traitement Automatique des Langues)
France
@@ -50,7 +50,7 @@ Traitement Automatique des Langues, Volume 59, Numéro 2 : Apprentissage profond pour le traitement automatique des langues [Deep Learning for natural language processing] AlexandreAllauzen - HinrichSchütze + HinrichSchütze ATALA (Association pour le Traitement Automatique des Langues)
France
2018 @@ -75,7 +75,7 @@ Classifying Semantic Clause Types With Recurrent Neural Networks: Analysis of Attention, Context & Genre Characteristics MariaBecker MichaelStaniek - ViviNastase + ViviNastase AlexisPalmer AnetteFrank 15–48 @@ -87,7 +87,7 @@ ZiedElloumi BenjaminLecouteux OlivierGalibert - LaurentBesacier + LaurentBesacier 49–76 2018.tal-2.3 fra @@ -131,7 +131,7 @@ De la constitution d’un corpus arboré à l’analyse syntaxique du serbe [From the constitution of a treebank to the syntactic analysis of the <fixed-case>S</fixed-case>erbian language] AleksandraMiletic - CécileFabre + CécileFabre DejanStosic 15–39 2018.tal-3.2 @@ -149,7 +149,7 @@ Analyse syntaxique de langues faiblement dotées à partir de plongements de mots multilingues [Syntactic analysis of under-resourced languages from multilingual word embeddings] - KyungTaeLim + KyungTaeLim NikoPartanen ThierryPoibeau 67–91 diff --git a/data/xml/2019.ccnlg.xml b/data/xml/2019.ccnlg.xml index bdb22ca1be..1446429e59 100644 --- a/data/xml/2019.ccnlg.xml +++ b/data/xml/2019.ccnlg.xml @@ -4,7 +4,7 @@ Proceedings of the 4th Workshop on Computational Creativity in Language Generation BenjaminBurtenshaw - EnriqueManjavacas + EnriqueManjavacas Association for Computational Linguistics
Tokyo, Japan
29 October--3 November @@ -64,7 +64,7 @@ Noun Generation for Nominalization in Academic Writing DariushSaberi - JohnLee + JohnLee 47–51 2019.ccnlg-1.6 saberi-lee-2019-noun diff --git a/data/xml/2019.gwc.xml b/data/xml/2019.gwc.xml index 0e3052d410..566f48092f 100644 --- a/data/xml/2019.gwc.xml +++ b/data/xml/2019.gwc.xml @@ -35,7 +35,7 @@ Thesaurus Verification Based on Distributional Similarities - NataliaLoukachevitch + NataliaLoukachevitch EkaterinaParkhomenko 16–23 In this paper we consider an approach to verification of large lexical-semantic resources as WordNet. The method of verification procedure is based on the analysis of discrepancies of corpus-based and thesaurus-based word similarities. We calculated such word similarities on the basis of a Russian news collection and Russian wordnet (RuWordNet). We applied the procedure to more than 30 thousand words and found some serious errors in word sense description, including incorrect or absent relations or missed main senses of ambiguous words. @@ -45,7 +45,7 @@ Including <fixed-case>S</fixed-case>wiss <fixed-case>S</fixed-case>tandard <fixed-case>G</fixed-case>erman in <fixed-case>G</fixed-case>erma<fixed-case>N</fixed-case>et EvaHuber - ErhardHinrichs + ErhardHinrichs 24–32 GermaNet (Henrich and Hinrichs, 2010; Hamp and Feldweg, 1997) is a comprehensive wordnet of Standard German spoken in the Federal Republic of Germany. The GermaNet team aims at modelling the basic vocabulary of the language. German is an official language or a minority language in many countries. It is an official language in Austria, Germany and Switzerland, each with its own codified standard variety (Auer, 2014, p. 21), and also in Belgium, Liechtenstein, and Luxemburg. German is recognized as a minority language in thirteen additional countries, including Brasil, Italy, Poland, and Russia. However, the different standard varieties of German are currently not represented in GermaNet. With this project, we make a start on changing this by including one variety, namely Swiss Standard German, into GermaNet. This shall give a more inclusive perspective on the German language. We will argue that Swiss Standard German words, Helvetisms, are best included into the already existing wordnet GermaNet, rather than creating them as a separate wordnet. 2019.gwc-1.4 @@ -87,7 +87,7 @@ Linking <fixed-case>R</fixed-case>ussian <fixed-case>W</fixed-case>ordnet <fixed-case>R</fixed-case>u<fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et to <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et - NataliaLoukachevitch + NataliaLoukachevitch AnastasiaGerasimova 64–71 In this paper we consider the linking procedure of Russian wordnet (RuWordNet) to Wordnet. The specificity of the procedure in our case is based on the fact that a lot of bilingual (Russian and English) lexical data have been gathered in another Russian thesaurus RuThes, which has a different structure than WordNet. Previously, RuThes has been semi-automatically transformed into RuWordNet, having the WordNet-like structure. Now, the RuThes English data are utilized to establish matching from the RuWordNet synsets to the WordNet synsets. @@ -96,7 +96,7 @@ Fast developing of a Natural Language Interface for a <fixed-case>P</fixed-case>ortuguese <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et: Leveraging on Sentence Embeddings - Hugo GonçaloOliveira + Hugo GonçaloOliveira AlexandreRademaker 72–78 We describe how a natural language interface can be developed for a wordnet with a small set of handcrafted templates, leveraging on sentence embeddings. The proposed approach does not use rules for parsing natural language queries but experiments showed that the embeddings model is tolerant enough for correctly predicting relation types that do not match known patterns exactly. It was tested with OpenWordNet-PT, for which this method may provide an alternative interface, with benefits also on the curation process. @@ -153,7 +153,7 @@ Merging <fixed-case>D</fixed-case>an<fixed-case>N</fixed-case>et with <fixed-case>P</fixed-case>rinceton <fixed-case>W</fixed-case>ordnet - Bolette SandfordPedersen + Bolette SandfordPedersen SanniNimb Ida RørmannOlsen SussiOlsen @@ -175,9 +175,9 @@ Synthetic, yet natural: Properties of <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et random walk corpora and the impact of rare words on embedding performance FilipKlubička - AlfredoMaldonado + AlfredoMaldonado AbhijitMahalunkar - JohnKelleher + JohnKelleher 140–150 Creating word embeddings that reflect semantic relationships encoded in lexical knowledge resources is an open challenge. One approach is to use a random walk over a knowledge graph to generate a pseudo-corpus and use this corpus to train embeddings. However, the effect of the shape of the knowledge graph on the generated pseudo-corpora, and on the resulting word embeddings, has not been studied. To explore this, we use English WordNet, constrained to the taxonomic (tree-like) portion of the graph, as a case study. We investigate the properties of the generated pseudo-corpora, and their impact on the resulting embeddings. We find that the distributions in the psuedo-corpora exhibit properties found in natural corpora, such as Zipf’s and Heaps’ law, and also observe that the proportion of rare words in a pseudo-corpus affects the performance of its embeddings on word similarity. 2019.gwc-1.18 @@ -186,7 +186,7 @@ Augmenting <fixed-case>C</fixed-case>hinese <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et semantic relations with contextualized embeddings Yu-HsiangTseng - Shu-KaiHsieh + Shu-KaiHsieh 151–159 Constructing semantic relations in WordNet has been a labour-intensive task, especially in a dynamic and fast-changing language environment. Combined with recent advancements of contextualized embeddings, this paper proposes the concept of morphology-guided sense vectors, which can be used to semi-automatically augment semantic relations in Chinese Wordnet (CWN). This paper (1) built sense vectors with pre-trained contextualized embedding models; (2) demonstrated the sense vectors computed were consistent with the sense distinctions made in CWN; and (3) predicted the potential semantically-related sense pairs with high accuracy by sense vectors model. 2019.gwc-1.19 @@ -235,7 +235,7 @@ Thinking globally, acting locally – Progress in the <fixed-case>A</fixed-case>frican <fixed-case>W</fixed-case>ordnet Project MarissaGriesel - SonjaBosch + SonjaBosch Mampaka LydiaMojapelo 191–196 The African Wordnet Project (AWN) includes all nine indigenous South African languages, namely isiZulu, isiXhosa, Setswana, Sesotho sa Leboa, Tshivenda, Siswati, Sesotho, isiNdebele and Xitsonga. The AWN currently includes 61 000 synsets as well as definitions and usage examples for a large part of the synsets. The project recently received extended funding from the South African Centre for Digital Language Resources (SADiLaR) and aims to update all aspects of the current resource, including the seed list used for new development, software tools used and mapping the AWN to the latest version of PWN 3.1. As with any resource development project, it is essential to also include phases of focused quality assurance and updating of the basis on which the resource is built. The African languages remain under-resourced. This paper describes progress made in the development of the AWN as well as recent technical improvements. @@ -246,7 +246,7 @@ Commonsense Reasoning Using <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et and <fixed-case>SUMO</fixed-case>: a Detailed Analysis JavierÁlvez ItziarGonzalez-Dios - GermanRigau + GermanRigau 197–205 We describe a detailed analysis of a sample of large benchmark of commonsense reasoning problems that has been automatically obtained from WordNet, SUMO and their mapping. The objective is to provide a better assessment of the quality of both the benchmark and the involved knowledge resources for advanced commonsense reasoning tasks. By means of this analysis, we are able to detect some knowledge misalignments, mapping errors and lack of knowledge and resources. Our final objective is the extraction of some guidelines towards a better exploitation of this commonsense knowledge framework by the improvement of the included resources. 2019.gwc-1.25 @@ -299,7 +299,7 @@ <fixed-case>E</fixed-case>nglish <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et 2019 – An Open-Source <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et for <fixed-case>E</fixed-case>nglish - John P.McCrae + John P.McCrae AlexandreRademaker FrancisBond EwaRudnicka @@ -312,9 +312,9 @@ Assessing Wordnets with <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et Embeddings RubenBranco - JoãoRodrigues + JoãoRodrigues ChakavehSaedi - AntónioBranco + AntónioBranco 253–259 An effective conversion method was proposed in the literature to obtain a lexical semantic space from a lexical semantic graph, thus permitting to obtain WordNet embeddings from WordNets. In this paper, we propose the exploitation of this conversion methodology as the basis for the comparative assessment of WordNets: given two WordNets, their relative quality in terms of capturing the lexical semantics of a given language, can be assessed by (i) converting each WordNet into the corresponding semantic space (i.e. into WordNet embeddings), (ii) evaluating the resulting WordNet embeddings under the typical semantic similarity prediction task used to evaluate word embeddings in general; and (iii) comparing the performance in that task of the two word embeddings, extracted from the two WordNets. A better performance in that evaluation task results from the word embeddings that are better at capturing the semantic similarity of words, which, in turn, result from the WordNet that is of higher quality at capturing the semantics of words. 2019.gwc-1.32 @@ -358,7 +358,7 @@ Aligning the <fixed-case>B</fixed-case>ulgarian <fixed-case>BTB</fixed-case> <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et with the <fixed-case>B</fixed-case>ulgarian <fixed-case>W</fixed-case>ikipedia - KirilSimov + KirilSimov PetyaOsenova LaskaLaskova IvajloRadev @@ -460,7 +460,7 @@ <fixed-case>P</fixed-case>ortuguese Manners of Speaking - Valeriade Paiva + Valeriade Paiva AlexandreRademaker 373–377 Lexical resources need to be as complete as possible. Very little work seems to have been done on adverbs, the smallest part of speech class in Princeton WordNet counting the number of synsets. Amongst adverbs, manner adverbs ending in ‘-ly’ seem the easiest to work with, as their meaning is almost the same as the one of the associated adjective. This phenomenon seems to be parallel in English and Portuguese, where these manner adverbs finish in the suffix ‘-mente’. We use this correspondence to improve the coverage of adverbs in the lexical resource OpenWordNet-PT, a wordnet for Portuguese. @@ -504,8 +504,8 @@ Utilizing Wordnets for Cognate Detection among <fixed-case>I</fixed-case>ndian Languages DipteshKanojia KevinPatel - MalharKulkarni - PushpakBhattacharyya + MalharKulkarni + PushpakBhattacharyya GholemrezaHaffari 404–412 Automatic Cognate Detection (ACD) is a challenging task which has been utilized to help NLP applications like Machine Translation, Information Retrieval and Computational Phylogenetics. Unidentified cognate pairs can pose a challenge to these applications and result in a degradation of performance. In this paper, we detect cognate word pairs among ten Indian languages with Hindi and use deep learning methodologies to predict whether a word pair is cognate or not. We identify IndoWordnet as a potential resource to detect cognate word pairs based on orthographic similarity-based methods and train neural network models using the data obtained from it. We identify parallel corpora as another potential resource and perform the same experiments for them. We also validate the contribution of Wordnets through further experimentation and report improved performance of up to 26%. We discuss the nuances of cognate detection among closely related Indian languages and release the lists of detected cognates as a dataset. We also observe the behaviour of, to an extent, unrelated Indian language pairs and release the lists of detected cognates among them as well. diff --git a/data/xml/2019.icon.xml b/data/xml/2019.icon.xml index 511a146f4f..354e6e6317 100644 --- a/data/xml/2019.icon.xml +++ b/data/xml/2019.icon.xml @@ -3,8 +3,8 @@ Proceedings of the 16th International Conference on Natural Language Processing - Dipti MisraSharma - PushpakBhattacharya + Dipti MisraSharma + PushpakBhattacharya NLP Association of India
International Institute of Information Technology, Hyderabad, India
December @@ -50,7 +50,7 @@ Introducing Aspects of Creativity in Automatic Poetry Generation BrendanBena - JugalKalita + JugalKalita 26–35 Poetry Generation involves teaching systems to automatically generate text that resembles poetic work. A deep learning system can learn to generate poetry on its own by training on a corpus of poems and modeling the particular style of language. In this paper, we propose taking an approach that fine-tunes GPT-2, a pre-trained language model, to our downstream task of poetry generation. We extend prior work on poetry generation by introducing creative elements. Specifically, we generate poems that express emotion and elicit the same in readers, and poems that use the language of dreams—called dream poetry. We are able to produce poems that correctly elicit the emotions of sadness and joy 87.5 and 85 percent, respectively, of the time. We produce dreamlike poetry by training on a corpus of texts that describe dreams. Poems from this model are shown to capture elements of dream poetry with scores of no less than 3.2 on the Likert scale. We perform crowdsourced human-evaluation for all our poems. We also make use of the Coh-Metrix tool, outlining metrics we use to gauge the quality of text generated. 2019.icon-1.4 @@ -59,9 +59,9 @@ Incorporating Sub-Word Level Information in Language Invariant Neural Event Detection SuhanPrabhu - PranavGoel + PranavGoel AlokDebnath - ManishShrivastava + ManishShrivastava 36–44 Detection of TimeML events in text have traditionally been done on corpora such as TimeBanks. However, deep learning methods have not been applied to these corpora, because these datasets seldom contain more than 10,000 event mentions. Traditional architectures revolve around highly feature engineered, language specific statistical models. In this paper, we present a Language Invariant Neural Event Detection (ALINED) architecture. ALINED uses an aggregation of both sub-word level features as well as lexical and structural information. This is achieved by combining convolution over character embeddings, with recurrent layers over contextual word embeddings. We find that our model extracts relevant features for event span identification without relying on language specific features. We compare the performance of our language invariant model to the current state-of-the-art in English, Spanish, Italian and French. We outperform the F1-score of the state of the art in English by 1.65 points. We achieve F1-scores of 84.96, 80.87 and 74.81 on Spanish, Italian and French respectively which is comparable to the current states of the art for these languages. We also introduce the automatic annotation of events in Hindi, a low resource language, with an F1-Score of 77.13. 2019.icon-1.5 @@ -69,10 +69,10 @@ Event Centric Entity Linking for <fixed-case>H</fixed-case>indi News Articles: A Knowledge Graph Based Approach - PranavGoel + PranavGoel SuhanPrabhu AlokDebnath - ManishShrivastava + ManishShrivastava 45–55 We describe the development of a knowledge graph from an event annotated corpus by presenting a pipeline that identifies and extracts the relations between entities and events from Hindi news articles. Due to the semantic implications of argument identification for events in Hindi, we use a combined syntactic argument and semantic role identification methodology. To the best of our knowledge, no other architecture exists for this purpose. The extracted combined role information is incorporated in a knowledge graph that can be queried via subgraph extraction for basic questions. The architectures presented in this paper can be used for participant extraction and event-entity linking in most Indo-Aryan languages, due to similar syntactic and semantic properties of event arguments. 2019.icon-1.6 @@ -184,9 +184,9 @@ Development of <fixed-case>POS</fixed-case> tagger for <fixed-case>E</fixed-case>nglish-<fixed-case>B</fixed-case>engali Code-Mixed data TathagataRaha - SainikMahata + SainikMahata DipankarDas - SivajiBandyopadhyay + SivajiBandyopadhyay 143–149 Code-mixed texts are widespread nowadays due to the advent of social media. Since these texts combine two languages to formulate a sentence, it gives rise to various research problems related to Natural Language Processing. In this paper, we try to excavate one such problem, namely, Parts of Speech tagging of code-mixed texts. We have built a system that can POS tag English-Bengali code-mixed data where the Bengali words were written in Roman script. Our approach initially involves the collection and cleaning of English-Bengali code-mixed tweets. These tweets were used as a development dataset for building our system. The proposed system is a modular approach that starts by tagging individual tokens with their respective languages and then passes them to different POS taggers, designed for different languages (English and Bengali, in our case). Tags given by the two systems are later joined together and the final result is then mapped to a universal POS tag set. Our system was checked using 100 manually POS tagged code-mixed sentences and it returned an accuracy of 75.29%. 2019.icon-1.17 @@ -234,7 +234,7 @@ Kunji : A Resource Management System for Higher Productivity in Computer Aided Translation Tools PriyankGupta - ManishShrivastava + ManishShrivastava Dipti MisraSharma RashidAhmad 184–192 diff --git a/data/xml/2019.ijclclp.xml b/data/xml/2019.ijclclp.xml index 857f5fce13..2d688cbbd6 100644 --- a/data/xml/2019.ijclclp.xml +++ b/data/xml/2019.ijclclp.xml @@ -3,7 +3,7 @@ International Journal of Computational Linguistics & Chinese Language Processing, Volume 24, Number 1, June 2019 - Jen-TzungChien + Jen-TzungChien Chia-HuiChang Association for Computational Linguistics and Chinese Language Processing
Taipei, Taiwan
@@ -86,7 +86,7 @@ Bai-HongHuang Yuan-FuLiao Guang-FengDeng - MatúšPleva + MatúšPleva DanielHládek 2019.ijclclp-2.3 zho diff --git a/data/xml/2019.iwslt.xml b/data/xml/2019.iwslt.xml index 85e4c645df..42a657af9d 100644 --- a/data/xml/2019.iwslt.xml +++ b/data/xml/2019.iwslt.xml @@ -24,8 +24,8 @@ The <fixed-case>IWSLT</fixed-case> 2019 Evaluation Campaign JanNiehues RolandoCattoni - SebastianStüker - MatteoNegri + SebastianStüker + MatteoNegri MarcoTurchi Thanh-LeHa ElizabethSalesky @@ -48,14 +48,14 @@
The <fixed-case>IWSLT</fixed-case> 2019 <fixed-case>KIT</fixed-case> Speech Translation System - Ngoc-QuanPham + Ngoc-QuanPham Thai-SonNguyen Thanh-LeHa JuanHussain FelixSchneider JanNiehues - SebastianStüker - AlexanderWaibel + SebastianStüker + AlexanderWaibel This paper describes KIT’s submission to the IWSLT 2019 Speech Translation task on two sub-tasks corresponding to two different datasets. We investigate different end-to-end architectures for the speech recognition module, including our new transformer-based architectures. Overall, our modules in the pipe-line are based on the transformer architecture which has recently achieved great results in various fields. In our systems, using transformer is also advantageous compared to traditional hybrid systems in term of simplicity while still having competent results. 2019.iwslt-1.3 pham-etal-2019-iwslt @@ -93,7 +93,7 @@ End-to-end Speech Translation System Description of <fixed-case>LIT</fixed-case> for <fixed-case>IWSLT</fixed-case> 2019 MeiTu - WeiLiu + WeiLiu LijieWang XiaoChen XueWen @@ -140,7 +140,7 @@ BenjaminLecouteux DidierSchwab HangLe - LaurentBesacier + LaurentBesacier In this paper, we present our submission for the English to Czech Text Translation Task of IWSLT 2019. Our system aims to study how pre-trained language models, used as input embeddings, can improve a specialized machine translation system trained on few data. Therefore, we implemented a Transformer-based encoder-decoder neural system which is able to use the output of a pre-trained language model as input embeddings, and we compared its performance under three configurations: 1) without any pre-trained language model (constrained), 2) using a language model trained on the monolingual parts of the allowed English-Czech data (constrained), and 3) using a language model trained on a large quantity of external monolingual data (unconstrained). We used BERT as external pre-trained language model (configuration 3), and BERT architecture for training our own language model (configuration 2). Regarding the training data, we trained our MT system on a small quantity of parallel text: one set only consists of the provided MuST-C corpus, and the other set consists of the MuST-C corpus and the News Commentary corpus from WMT. We observed that using the external pre-trained BERT improves the scores of our system by +0.8 to +1.5 of BLEU on our development set, and +0.97 to +1.94 of BLEU on the test set. However, using our own language model trained only on the allowed parallel data seems to improve the machine translation performances only when the system is trained on the smallest dataset. 2019.iwslt-1.11 vial-etal-2019-lig @@ -158,15 +158,15 @@ <fixed-case>KIT</fixed-case>’s Submission to the <fixed-case>IWSLT</fixed-case> 2019 Shared Task on Text Translation FelixSchneider - AlexWaibel + AlexWaibel In this paper, we describe KIT’s submission for the IWSLT 2019 shared task on text translation. Our system is based on the transformer model [1] using our in-house implementation. We augment the available training data using back-translation and employ fine-tuning for the final model. For our best results, we used a 12-layer transformer-big config- uration, achieving state-of-the-art results on the WMT2018 test set. We also experiment with student-teacher models to improve performance of smaller models. 2019.iwslt-1.13 schneider-waibel-2019-kits Data Augmentation for End-to-End Speech Translation: <fixed-case>FBK</fixed-case>@<fixed-case>IWSLT</fixed-case> ‘19 - Mattia A.Di Gangi - MatteoNegri + Mattia A.Di Gangi + MatteoNegri Viet NhatNguyen AmirhosseinTebbifakhr MarcoTurchi @@ -178,8 +178,8 @@ How Transformer Revitalizes Character-based Neural Machine Translation: An Investigation on <fixed-case>J</fixed-case>apanese-<fixed-case>V</fixed-case>ietnamese Translation Systems Thi-VinhNgo Thanh-LeHa - Phuong-ThaiNguyen - Le-MinhNguyen + Phuong-ThaiNguyen + Le-MinhNguyen While translating between East Asian languages, many works have discovered clear advantages of using characters as the translation unit. Unfortunately, traditional recurrent neural machine translation systems hinder the practical usage of those character-based systems due to their architectural limitations. They are unfavorable in handling extremely long sequences as well as highly restricted in parallelizing the computations. In this paper, we demonstrate that the new transformer architecture can perform character-based trans- lation better than the recurrent one. We conduct experiments on a low-resource language pair: Japanese-Vietnamese. Our models considerably outperform the state-of-the-art systems which employ word-based recurrent architectures. 2019.iwslt-1.15 ngo-etal-2019-transformer @@ -189,7 +189,7 @@ Surafel M.Lakew AlinaKarakanta MarcelloFederico - MatteoNegri + MatteoNegri MarcoTurchi Multilingual Neural Machine Translation (MNMT) for low- resource languages (LRL) can be enhanced by the presence of related high-resource languages (HRL), but the relatedness of HRL usually relies on predefined linguistic assumptions about language similarity. Recently, adapting MNMT to a LRL has shown to greatly improve performance. In this work, we explore the problem of adapting an MNMT model to an unseen LRL using data selection and model adapta- tion. In order to improve NMT for LRL, we employ perplexity to select HRL data that are most similar to the LRL on the basis of language distance. We extensively explore data selection in popular multilingual NMT settings, namely in (zero-shot) translation, and in adaptation from a multilingual pre-trained model, for both directions (LRL↔en). We further show that dynamic adaptation of the model’s vocabulary results in a more favourable segmentation for the LRL in comparison with direct adaptation. Experiments show re- ductions in training time and significant performance gains over LRL baselines, even with zero LRL data (+13.0 BLEU), up to +17.0 BLEU for pre-trained multilingual model dynamic adaptation with related data selection. Our method outperforms current approaches, such as massively multilingual models and data augmentation, on four LRL. 2019.iwslt-1.16 @@ -197,7 +197,7 @@ Transformers without Tears: Improving the Normalization of Self-Attention - Toan Q.Nguyen + Toan Q.Nguyen JulianSalazar We evaluate three simple, normalization-centric changes to improve Transformer training. First, we show that pre-norm residual connections (PRENORM) and smaller initializations enable warmup-free, validation-based training with large learning rates. Second, we propose l2 normalization with a single scale parameter (SCALENORM) for faster training and better performance. Finally, we reaffirm the effectiveness of normalizing word embeddings to a fixed length (FIXNORM). On five low-resource translation pairs from TED Talks-based corpora, these changes always converge, giving an average +1.1 BLEU over state-of-the-art bilingual baselines and a new 32.8 BLEU on IWSLT '15 English-Vietnamese. We ob- serve sharper performance curves, more consistent gradient norms, and a linear relationship between activation scaling and decoder depth. Surprisingly, in the high-resource setting (WMT '14 English-German), SCALENORM and FIXNORM remain competitive but PRENORM degrades performance. 2019.iwslt-1.17 @@ -209,7 +209,7 @@ LiezlPuzon JiataoGu XutaiMa - Arya D.McCarthy + Arya D.McCarthy DeepakGopinath For automatic speech translation (AST), end-to-end approaches are outperformed by cascaded models that transcribe with automatic speech recognition (ASR), then trans- late with machine translation (MT). A major cause of the performance gap is that, while existing AST corpora are small, massive datasets exist for both the ASR and MT subsystems. In this work, we evaluate several data augmentation and pretraining approaches for AST, by comparing all on the same datasets. Simple data augmentation by translating ASR transcripts proves most effective on the English–French augmented LibriSpeech dataset, closing the performance gap from 8.2 to 1.4 BLEU, compared to a very strong cascade that could directly utilize copious ASR and MT data. The same end-to-end approach plus fine-tuning closes the gap on the English–Romanian MuST-C dataset from 6.7 to 3.7 BLEU. In addition to these results, we present practical rec- ommendations for augmentation and pretraining approaches. Finally, we decrease the performance gap to 0.01 BLEU us- ing a Transformer-based architecture. 2019.iwslt-1.18 @@ -228,7 +228,7 @@ JanRosendahl Viet Anh KhoaTran WeiyueWang - HermannNey + HermannNey In this work we analyze and compare the behavior of the Transformer architecture when using different positional encoding methods. While absolute and relative positional encoding perform equally strong overall, we show that relative positional encoding is vastly superior (4.4% to 11.9% BLEU) when translating a sentence that is longer than any observed training sentence. We further propose and analyze variations of relative positional encoding and observe that the number of trainable parameters can be reduced without a performance loss, by using fixed encoding vectors or by removing some of the positional encoding vectors. 2019.iwslt-1.20 rosendahl-etal-2019-analysis @@ -236,7 +236,7 @@ Using Whole Document Context in Neural Machine Translation ValentinMacé - ChristopheServan + ChristopheServan In Machine Translation, considering the document as a whole can help to resolve ambiguities and inconsistencies. In this paper, we propose a simple yet promising approach to add contextual information in Neural Machine Translation. We present a method to add source context that capture the whole document with accurate boundaries, taking every word into account. We provide this additional information to a Transformer model and study the impact of our method on three language pairs. The proposed approach obtains promising results in the English-German, English-French and French-English document-level translation tasks. We observe interesting cross-sentential behaviors where the model learns to use document-level information to improve translation coherence. 2019.iwslt-1.21 mace-servan-2019-using @@ -245,8 +245,8 @@ On Using <fixed-case>S</fixed-case>pec<fixed-case>A</fixed-case>ugment for End-to-End Speech Translation ParniaBahar AlbertZeyer - RalfSchlüter - HermannNey + RalfSchlüter + HermannNey This work investigates a simple data augmentation technique, SpecAugment, for end-to-end speech translation. SpecAugment is a low-cost implementation method applied directly to the audio input features and it consists of masking blocks of frequency channels, and/or time steps. We apply SpecAugment on end-to-end speech translation tasks and achieve up to +2.2% BLEU on LibriSpeech Audiobooks En→Fr and +1.2% on IWSLT TED-talks En→De by alleviating overfitting to some extent. We also examine the effectiveness of the method in a variety of data scenarios and show that the method also leads to significant improvements in various data conditions irrespective of the amount of training data. 2019.iwslt-1.22 bahar-etal-2019-using @@ -254,8 +254,8 @@ Estimating post-editing effort: a study on human judgements, task-based and reference-based metrics of <fixed-case>MT</fixed-case> quality ScartonScarton - Mikel L.Forcada - MiquelEsplà-Gomis + Mikel L.Forcada + MiquelEsplà-Gomis LuciaSpecia Devising metrics to assess translation quality has always been at the core of machine translation (MT) research. Traditional automatic reference-based metrics, such as BLEU, have shown correlations with human judgements of adequacy and fluency and have been paramount for the advancement of MT system development. Crowd-sourcing has popularised and enabled the scalability of metrics based on human judgments, such as subjective direct assessments (DA) of adequacy, that are believed to be more reliable than reference-based automatic metrics. Finally, task-based measurements, such as post-editing time, are expected to provide a more de- tailed evaluation of the usefulness of translations for a specific task. Therefore, while DA averages adequacy judgements to obtain an appraisal of (perceived) quality independently of the task, and reference-based automatic metrics try to objectively estimate quality also in a task-independent way, task-based metrics are measurements obtained either during or after performing a specific task. In this paper we argue that, although expensive, task-based measurements are the most reliable when estimating MT quality in a specific task; in our case, this task is post-editing. To that end, we report experiments on a dataset with newly-collected post-editing indicators and show their usefulness when estimating post-editing effort. Our results show that task-based metrics comparing machine-translated and post-edited versions are the best at tracking post-editing effort, as expected. These metrics are followed by DA, and then by metrics comparing the machine-translated version and independent references. We suggest that MT practitioners should be aware of these differences and acknowledge their implications when decid- ing how to evaluate MT for post-editing purposes. 2019.iwslt-1.23 @@ -266,7 +266,7 @@ YingboGao ChristianHerold WeiyueWang - HermannNey + HermannNey Prominently used in support vector machines and logistic re-gressions, kernel functions (kernels) can implicitly map data points into high dimensional spaces and make it easier to learn complex decision boundaries. In this work, by replacing the inner product function in the softmax layer, we explore the use of kernels for contextual word classification. In order to compare the individual kernels, experiments are conducted on standard language modeling and machine translation tasks. We observe a wide range of performances across different kernel settings. Extending the results, we look at the gradient properties, investigate various mixture strategies and examine the disambiguation abilities. 2019.iwslt-1.24 gao-etal-2019-exploring @@ -282,8 +282,8 @@ Generic and Specialized Word Embeddings for Multi-Domain Machine Translation - MinhQuangPham - JosepCrego + MinhQuangPham + JosepCrego FrançoisYvon JeanSenellart Supervised machine translation works well when the train and test data are sampled from the same distribution. When this is not the case, adaptation techniques help ensure that the knowledge learned from out-of-domain texts generalises to in-domain sentences. We study here a related setting, multi-domain adaptation, where the number of domains is potentially large and adapting separately to each domain would waste training resources. Our proposal transposes to neural machine translation the feature expansion technique of (Daumé III, 2007): it isolates domain-agnostic from domain-specific lexical representations, while sharing the most of the network across domains. Our experiments use two architectures and two language pairs: they show that our approach, while simple and computationally inexpensive, outperforms several strong baselines and delivers a multi-domain system that successfully translates texts from diverse sources. @@ -293,7 +293,7 @@ Lexical Micro-adaptation for Neural Machine Translation JitaoXu - JosepCrego + JosepCrego JeanSenellart This work is inspired by a typical machine translation industry scenario in which translators make use of in-domain data for facilitating translation of similar or repeating sentences. We introduce a generic framework applied at inference in which a subset of segment pairs are first extracted from training data according to their similarity to the input sentences. These segments are then used to dynamically update the parameters of a generic NMT network, thus performing a lexical micro-adaptation. Our approach demonstrates strong adaptation performance to new and existing datasets including pseudo in-domain data. We evaluate our approach on a heterogeneous English-French training dataset showing accuracy gains on all evaluated domains when compared to strong adaptation baselines. 2019.iwslt-1.27 @@ -322,7 +322,7 @@ Controlling Utterance Length in <fixed-case>NMT</fixed-case>-based Word Segmentation with Attention PierreGodard - LaurentBesacier + LaurentBesacier FrançoisYvon One of the basic tasks of computational language documentation (CLD) is to identify word boundaries in an unsegmented phonemic stream. While several unsupervised monolingual word segmentation algorithms exist in the literature, they are challenged in real-world CLD settings by the small amount of available data. A possible remedy is to take advantage of glosses or translation in a foreign, well- resourced, language, which often exist for such data. In this paper, we explore and compare ways to exploit neural machine translation models to perform unsupervised boundary detection with bilingual information, notably introducing a new loss function for jointly learning alignment and segmentation. We experiment with an actual under-resourced language, Mboshi, and show that these techniques can effectively control the output segmentation length. 2019.iwslt-1.30 @@ -331,7 +331,7 @@ Controlling the Output Length of Neural Machine Translation Surafel MelakuLakew - MattiaDi Gangi + MattiaDi Gangi MarcelloFederico The recent advances introduced by neural machine translation (NMT) are rapidly expanding the application fields of machine translation, as well as reshaping the quality level to be targeted. In particular, if translations have to fit some given layout, quality should not only be measured in terms of adequacy and fluency, but also length. Exemplary cases are the translation of document files, subtitles, and scripts for dubbing, where the output length should ideally be as close as possible to the length of the input text. This pa-per addresses for the first time, to the best of our knowledge, the problem of controlling the output length in NMT. We investigate two methods for biasing the output length with a transformer architecture: i) conditioning the output to a given target-source length-ratio class and ii) enriching the transformer positional embedding with length information. Our experiments show that both methods can induce the network to generate shorter translations, as well as acquiring inter- pretable linguistic skills. 2019.iwslt-1.31 diff --git a/data/xml/2019.jeptalnrecital.xml b/data/xml/2019.jeptalnrecital.xml index e99e8dc5fc..2766cd53aa 100644 --- a/data/xml/2019.jeptalnrecital.xml +++ b/data/xml/2019.jeptalnrecital.xml @@ -4,8 +4,8 @@ Actes de la Conférence sur le Traitement Automatique des Langues Naturelles (TALN) PFIA 2019. Volume I : Articles longs EmmanuelMorin - SophieRosset - PierreZweigenbaum + SophieRosset + PierreZweigenbaum ATALA
Toulouse, France
7 @@ -62,7 +62,7 @@ Corpus annoté de cas cliniques en français (Annotated corpus with clinical cases in <fixed-case>F</fixed-case>rench) NataliaGrabar CyrilGrouin - ThierryHamon + ThierryHamon VincentClaveau 71–84 Les corpus textuels sont utiles pour diverses applications de traitement automatique des langues (TAL) en fournissant les données nécessaires pour leur création, adaptation ou évaluation. Cependant, dans certains domaines comme le domaine médical, l’accès aux données est rendu compliqué, voire impossible, pour des raisons de confidentialité et d’éthique. Il existe néanmoins de réels besoins en corpus cliniques pour l’enseignement et la recherche. Pour répondre à ce défi, nous présentons dans cet article le corpus CAS contenant des cas cliniques de patients, réels ou fictifs, que nous avons compilés. Ces cas cliniques en français couvrent plusieurs spécialités médicales et focalisent donc sur différentes situations cliniques. Actuellement, le corpus contient 4 300 cas (environ 1,5M d’occurrences de mots). Il est accompagné d’informations (discussions des cas cliniques, mots-clés, etc.) et d’annotations que nous avons effectuées au regard des besoins de la recherche en TAL dans ce domaine. Nous présentons également les résultats de premières expériences de recherche et d’extraction d’information qui ont été effectuées avec ce corpus annoté. Ces expériences peuvent fournir une baseline à d’autres chercheurs souhaitant travailler avec les données. @@ -95,7 +95,7 @@
La génération automatique de poésie en français (Automatic Poetry Generation in <fixed-case>F</fixed-case>rench) - TimVan de Cruys + TimVan de Cruys 113–126 La génération automatique de poésie est une tâche ardue pour un système informatique. Pour qu’un poème ait du sens, il est important de prendre en compte à la fois des aspects linguistiques et littéraires. Ces dernières années, un certain nombre d’approches fructueuses sont apparues, capables de modéliser de manière adéquate divers aspects du langage naturel. En particulier, les modèles de langue basés sur les réseaux de neurones ont amélioré l’état de l’art par rapport à la modélisation prédictive de langage, tandis que les topic models sont capables de capturer une certaine cohérence thématique. Dans cet article, on explorera comment ces approches peuvent être adaptées et combinées afin de modéliser les aspects linguistiques et littéraires nécessaires pour la génération de poésie. Le système est exclusivement entraîné sur des textes génériques, et sa sortie est contrainte afin de conférer un caractère poétique au vers généré. Le cadre présenté est appliqué à la génération de poèmes en français, et évalué à l’aide d’une évaluation humaine. 2019.jeptalnrecital-long.8 @@ -114,7 +114,7 @@ <fixed-case>P</fixed-case>olylex<fixed-case>FLE</fixed-case> : une base de données d’expressions polylexicales pour le <fixed-case>FLE</fixed-case> (<fixed-case>P</fixed-case>olylex<fixed-case>FLE</fixed-case> : a database of multiword expressions for <fixed-case>F</fixed-case>rench <fixed-case>L</fixed-case>2 language learning) - AmaliaTodirascu + AmaliaTodirascu MarionCargill ThomasFrancois 143–156 @@ -128,8 +128,8 @@ Actes de la Conférence sur le Traitement Automatique des Langues Naturelles (TALN) PFIA 2019. Volume II : Articles courts EmmanuelMorin - SophieRosset - PierreZweigenbaum + SophieRosset + PierreZweigenbaum ATALA
Toulouse, France
7 @@ -143,7 +143,7 @@ Analyse faiblement supervisée de conversation en actes de dialogue (Weakly supervised dialog act analysis) CatherineThompson - NicholasAsher + NicholasAsher PhilippeMuller JérémyAuguste 159–166 @@ -169,7 +169,7 @@ Apprentissage faiblement supervisé de la structure discursive (Learning discourse structure using weak supervision ) SoniaBadene CatherineThompson - NicholasAsher + NicholasAsher Jean-PierreLorré 175–184 L’avènement des techniques d’apprentissage automatique profond a fait naître un besoin énorme de données d’entraînement. De telles données d’entraînement sont extrêmement coûteuses à créer, surtout lorsqu’une expertise dans le domaine est requise. L’une de ces tâches est l’apprentissage de la structure sémantique du discours, tâche très complexe avec des structures récursives avec des données éparses, mais qui est essentielle pour extraire des informations sémantiques profondes du texte. Nous décrivons nos expérimentations sur l’attachement des unités discursives pour former une structure, en utilisant le paradigme du data programming dans lequel peu ou pas d’annotations sont utilisées pour construire un ensemble de données d’entraînement “bruité”. Le corpus de dialogues utilisé illustre des contraintes à la fois linguistiques et non-linguistiques intéressantes qui doivent être apprises. Nous nous concentrons sur la structure des règles utilisées pour construire un modèle génératif et montrons la compétitivité de notre approche par rapport à l’apprentissage supervisé classique. @@ -179,10 +179,10 @@ <fixed-case>CALOR</fixed-case>-<fixed-case>QUEST</fixed-case> : un corpus d’entraînement et d’évaluation pour la compréhension automatique de textes (Machine reading comprehension is a task related to Question-Answering where questions are not generic in scope but are related to a particular document) - FredericBechet + FredericBechet CindyAloui DelphineCharlet - GeraldineDamnati + GeraldineDamnati JohannesHeinecke AlexisNasr FredericHerledan @@ -194,7 +194,7 @@ Chunker différents types de discours oraux : défis pour l’apprentissage automatique (Chunking different spoken speech types : challenges for machine learning) - IrisEshkol-Taravella + IrisEshkol-Taravella MariameMaarouf MarieSkrovec FloraBadin @@ -227,7 +227,7 @@ De l’extraction des interactions médicament-médicament vers les interactions aliment-médicament à partir de textes biomédicaux: Adaptation de domaine (From the extraction of drug-drug interactions to the food-drug interactions in biomedical texts : domain adaptation) TsantaRandriatsitohaina - ThierryHamon + ThierryHamon 223–232 Les interactions aliments-médicaments (FDI) se produisent lorsque des aliments et des médicaments sont pris simultanément et provoquent un effet inattendu. Nous considérons l’extraction de ces interactions dans les textes comme une tâche d’extraction de relation pouvant être résolue par des méthodes de classification. Toutefois, étant donné que ces interactions sont décrites de manière très fine, nous sommes confrontés au manque de données et au manque d’exemples par type de relation. Pour résoudre ce problème, nous proposons d’appliquer une adaptation de domaine à partir des interactions médicament-médicament (DDI) qui est une tâche similaire, afin d’établir une correspondance entre les types de relations et d’étiqueter les instances FDI selon les types DDI. Notre approche confirme une cohérence entre les 2 domaines et fournit une base pour la spécification des relations et la pré-annotation de nouvelles données. Les performances des modèles de classification appuie également l’efficacité de l’adaptation de domaine sur notre tâche. 2019.jeptalnrecital-court.8 @@ -236,7 +236,7 @@ Demonette2 - Une base de données dérivationnelle du français à grande échelle : premiers résultats (Demonette2 – A large scale derivational database for <fixed-case>F</fixed-case>rench: first results) - FiammettaNamer + FiammettaNamer LucieBarque OlivierBonami PaulineHaas @@ -270,7 +270,7 @@ Développement d’un lexique morphologique et syntaxique de l’ancien français (Development of a morphological and syntactic lexicon of <fixed-case>O</fixed-case>ld <fixed-case>F</fixed-case>rench) - BenoîtSagot + BenoîtSagot 265–274 Nous décrivons dans cet article notre travail de développement d’un lexique morphologique et syntaxique à grande échelle de l’ancien français pour le traitement automatique des langues. Nous nous sommes appuyés sur des ressources dictionnairiques et lexicales dans lesquelles l’extraction d’informations structurées et exploitables a nécessité des développements spécifiques. De plus, la mise en correspondance d’informations provenant de ces différentes sources a soulevé des difficultés. Nous donnons quelques indications quantitatives sur le lexique obtenu, et discutons de sa fiabilité dans sa version actuelle et des perspectives d’amélioration permises par l’existence d’une première version, notamment au travers de l’analyse automatique de données textuelles. 2019.jeptalnrecital-court.12 @@ -316,7 +316,7 @@ Exploring sentence informativeness SyrielleMontariol - AinaGarí Soler + AinaGarí Soler AlexandreAllauzen 303–312 This study is a preliminary exploration of the concept of informativeness –how much information a sentence gives about a word it contains– and its potential benefits to building quality word representations from scarce data. We propose several sentence-level classifiers to predict informativeness, and we perform a manual annotation on a set of sentences. We conclude that these two measures correspond to different notions of informativeness. However, our experiments show that using the classifiers’ predictions to train word embeddings has an impact on embedding quality. @@ -373,8 +373,8 @@ Multilingual and Multitarget Hate Speech Detection in Tweets PatriciaChiril - FarahBenamara Zitoune - VéroniqueMoriceau + FarahBenamara Zitoune + VéroniqueMoriceau MarlèneCoulomb-Gully AbhishekKumar 351–360 @@ -384,7 +384,7 @@ Observation de l’expérience client dans les restaurants (Mapping Reviewers’ Experience in Restaurants) - IrisEshkol-Taravella + IrisEshkol-Taravella HyunJung Kang 361–370 Ces dernières années, les recherches sur la fouille d’opinions ou l’analyse des sentiments sont menées activement dans le domaine du Traitement Automatique des Langues (TAL). De nombreuses études scientifiques portent sur l’extraction automatique des opinions positives ou négatives et de leurs cibles. Ce travail propose d’identifier automatiquement une évaluation, exprimée explicitement ou implicitement par des internautes dans le corpus d’avis tiré du Web. Six catégories d’évaluation sont proposées : opinion positive, opinion négative, opinion mixte, intention, suggestion et description. La méthode utilisée est fondée sur l’apprentissage supervisé qui tient compte des caractéristiques linguistiques de chaque catégorie retenue. L’une des difficultés que nous avons rencontrée concerne le déséquilibre entre les classes d’évaluation créées, cependant, cet obstacle a pu être surmonté dans l’apprentissage grâce aux stratégies de sur-échantillonnage et aux stratégies algorithmiques. @@ -410,7 +410,7 @@ NathalieCamelin ChafikAloulou YannickEstève - LamiaHadrich Belguith + LamiaHadrich Belguith 381–390 Nous nous intéressons, dans cet article, à la tâche d’analyse d’opinions en arabe. Nous étudions la spécificité de la langue arabe pour la détection de polarité. Nous nous focalisons ici sur les caractéristiques d’agglutination et de richesse morphologique de cette langue. Nous avons particulièrement étudié différentes représentations d’unité lexicale : token, lemme et light stemme. Nous avons construit et testé des espaces continus de ces différentes représentations lexicales. Nous avons mesuré l’apport de tels types de representations vectorielles dans notre cadre spécifique. Les performances du réseau CNN montrent un gain significatif de 2% par rapport à l’état de l’art. 2019.jeptalnrecital-court.24 @@ -450,7 +450,7 @@ Réutilisation de Textes dans les Manuscrits Anciens (Text Reuse in Ancient Manuscripts) AmirHazem - BéatriceDaille + BéatriceDaille DominiqueStutzmann JacobCurrie ChristineJacquin @@ -502,8 +502,8 @@ Actes de la Conférence sur le Traitement Automatique des Langues Naturelles (TALN) PFIA 2019. Volume III : RECITAL EmmanuelMorin - SophieRosset - PierreZweigenbaum + SophieRosset + PierreZweigenbaum ATALA
Toulouse, France
7 @@ -618,8 +618,8 @@ Actes de la Conférence sur le Traitement Automatique des Langues Naturelles (TALN) PFIA 2019. Volume IV : Démonstrations EmmanuelMorin - SophieRosset - PierreZweigenbaum + SophieRosset + PierreZweigenbaum ATALA
Toulouse, France
7 @@ -659,7 +659,7 @@ Démonstrateur en-ligne du projet <fixed-case>ANR</fixed-case> <fixed-case>PARSEME</fixed-case>-<fixed-case>FR</fixed-case> sur les expressions polylexicales (On-line demonstrator of the <fixed-case>PARSEME</fixed-case>-<fixed-case>FR</fixed-case> project on multiword expressions) MarineSchmitt EliseMoreau - MathieuConstant + MathieuConstant AgataSavary 627–630 Nous présentons le démonstrateur en-ligne du projet ANR PARSEME-FR dédié aux expressions polylexicales. Il inclut différents outils d’identification de telles expressions et un outil d’exploration des ressources linguistiques de ce projet. @@ -669,7 +669,7 @@
<fixed-case>S</fixed-case>yl<fixed-case>N</fixed-case>ews, un agréfilter multilingue (<fixed-case>S</fixed-case>yl<fixed-case>N</fixed-case>ews, a multilingual aggrefilter) - OlivierHamon + OlivierHamon KévinEspasa SaraQuispe 631–634 @@ -683,9 +683,9 @@ IoanCalapodescu CarolineBrun VassilinaNikoulina - SalahAït-Mokhtar + SalahAït-Mokhtar 635–638 - + 2019.jeptalnrecital-demo.5 fra calapodescu-etal-2019-sentiment @@ -708,8 +708,8 @@ Actes de la Conférence sur le Traitement Automatique des Langues Naturelles (TALN) PFIA 2019. Défi Fouille de Textes (atelier TALN-RECITAL) EmmanuelMorin - SophieRosset - PierreZweigenbaum + SophieRosset + PierreZweigenbaum ATALA
Toulouse, France
7 @@ -724,7 +724,7 @@ Recherche et extraction d’information dans des cas cliniques. Présentation de la campagne d’évaluation <fixed-case>DEFT</fixed-case> 2019 (Information Retrieval and Information Extraction from Clinical Cases) NataliaGrabar CyrilGrouin - ThierryHamon + ThierryHamon VincentClaveau 7–16 Cet article présente la campagne d’évaluation DEFT 2019 sur l’analyse de textes cliniques rédigés en français. Le corpus se compose de cas cliniques publiés et discutés dans des articles scientifiques, et indexés par des mots-clés. Nous proposons trois tâches indépendantes : l’indexation des cas cliniques et discussions, évaluée prioritairement par la MAP (mean average precision), l’appariement entre cas cliniques et discussions, évalué au moyen d’une précision, et l’extraction d’information parmi quatre catégories (âge, genre, origine de la consultation, issue), évaluée en termes de rappel, précision et F-mesure. Nous présentons les résultats obtenus par les participants sur chaque tâche. @@ -770,7 +770,7 @@ Indexation et appariements de documents cliniques pour le Deft 2019 (Indexing and pairing texts of the medical domain ) DavideBuscaldi DhaouGhoul - JosephLe Roux + JosephLe Roux GaëlLejeune 49–56 Dans cet article, nous présentons nos méthodes pour les tâches d’indexation et d’appariements du Défi Fouile de Textes (Deft) 2019. Pour la taĉhe d’indexation nous avons testé deux méthodes, une fondée sur l’appariemetn préalable des documents du jeu de tset avec les documents du jeu d’entraînement et une autre méthode fondée sur l’annotation terminologique. Ces méthodes ont malheureusement offert des résultats assez faible. Pour la tâche d’appariement, nous avons dévellopé une méthode sans apprentissage fondée sur des similarités de chaînes de caractères ainsi qu’une méthode exploitant des réseaux siamois. Là encore les résultats ont été plutôt décevant même si la méthode non supervisée atteint un score plutôt honorable pour une méthode non-supervisée : 62% . @@ -794,7 +794,7 @@ EstelleMaudet OralieCattan Maureende Seyssel - ChristopheServan + ChristopheServan 67–80 Dans ce papier, nous présentons la participation de Qwant Research aux tâches 2 et 3 de l’édition 2019 du défi fouille de textes (DEFT) portant sur l’analyse de documents cliniques rédigés en français. La tâche 2 est une tâche de similarité sémantique qui demande d’apparier cas cliniques et discussions médicales. Pour résoudre cette tâche, nous proposons une approche reposant sur des modèles de langue et évaluons l’impact de différents pré-traitements et de différentes techniques d’appariement sur les résultats. Pour la tâche 3, nous avons développé un système d’extraction d’information qui produit des résultats encourageants en termes de précision. Nous avons expérimenté deux approches différentes, l’une se fondant exclusivement sur l’utilisation de réseaux de neurones pour traiter la tâche, l’autre reposant sur l’exploitation des informations linguistiques issues d’une analyse syntaxique. 2019.jeptalnrecital-deft.7 @@ -804,7 +804,7 @@ Aprentissage non-supervisé pour l’appariement et l’étiquetage de cas cliniques en français - <fixed-case>DEFT</fixed-case>2019 (Unsupervised learning for matching and labelling of <fixed-case>F</fixed-case>rench clinical cases - <fixed-case>DEFT</fixed-case>2019 ) DamienSileo - TimVan de Cruys + TimVan de Cruys PhilippeMuller CamillePradel 81–90 @@ -830,8 +830,8 @@ Actes de la Conférence sur le Traitement Automatique des Langues Naturelles (TALN) PFIA 2019. Terminologie et Intelligence Artificielle (atelier TALN-RECITAL \& IC) EmmanuelMorin - SophieRosset - PierreZweigenbaum + SophieRosset + PierreZweigenbaum ATALA
Toulouse, France
7 @@ -845,7 +845,7 @@ Terminology systematization for Cybersecurity domain in <fixed-case>I</fixed-case>talian Language ClaudiaLanza - BéatriceDaille + BéatriceDaille 7–18 This paper aims at presenting the first steps to improve the quality of the first draft of an Italian thesaurus for Cybersecurity terminology that has been realized for a specific project activity in collaboration with CybersecurityLab at Informatics and Telematics Institute (IIT) of the National Council of Research (CNR) in Italy. In particular, the paper will focus, first, on the terminological knowledge base built to retrieve the most representative candidate terms of Cybersecurity domain in Italian language, giving examples of the main gold standard repositories that have been used to build this semantic tool. Attention will be then given to the methodology and software employed to configure a system of NLP rules to get the desired semantic results and to proceed with the enhancement of the candidate terms selection which are meant to be inserted in the controlled vocabulary. 2019.jeptalnrecital-tia.1 @@ -854,7 +854,7 @@ Identification des catégories de relations aliment-médicament (Identification of categories of food-drug relations) TsantaRandriatsitohaina - ThierryHamon + ThierryHamon 19–30 Les interactions aliment-médicament se produisent lorsque des aliments et des médicaments pris ensembles provoquent un effet inattendu. Leur reconnaissance automatique dans les textes peut être considérée comme une tâche d’extraction de relation à l’aide de méthodes de classification. Toutefois, étant donné que ces interactions sont décrites de manière très fine, nous sommes confrontés au manque de données et au manque d’exemples par type de relation. Pour résoudre ce problème, nous proposons une approche efficace pour regrouper des relations partageant une représentation similaire en groupes et réduire le manque d’exemples. Notre approche améliore les performances de la classification des FDI. Enfin, nous contrastons une méthode de regroupement intuitive basée sur la définition des types de relation et un apprentissage non supervisé basé sur les instances de chaque type de relation. 2019.jeptalnrecital-tia.2 @@ -864,7 +864,7 @@ Terminology-based Text Embedding for Computing Document Similarities on Technical Content HamidMirisaee - EricGaussier + EricGaussier CedricLagnier AgnesGuerraz 31–42 diff --git a/data/xml/2019.lilt.xml b/data/xml/2019.lilt.xml index f4da0fad44..48809d66ab 100644 --- a/data/xml/2019.lilt.xml +++ b/data/xml/2019.lilt.xml @@ -11,7 +11,7 @@ Syntactic composition and selectional preferences in <fixed-case>H</fixed-case>indi Light Verb Constructions AshwiniVaidya - MarthaPalmer + MarthaPalmer Previous work on light verb constructions (e.g. chorii kar ‘theft do; steal’) in Hindi describes their syntactic formation via co-predication (Ahmed et al., 2012, Butt, 2014). This implies that both noun and light verb contribute their arguments, and these overlapping argument structures must be composed in the syntax. In this paper, we present a co-predication analysis using Tree-Adjoining Grammar, which models syntactic composition and semantic selectional preferences without transformations (deletion or argument identification). The analysis has two key components (i) an underspecified category for the nominal and (ii) combinatorial constraints on the noun and light verb to specify selectional preferences. The former has the advantage of syntactic composition without argument identification and the latter prevents over-generalization, while recognizing the semantic contribution of both predicates. This work additionally accounts for the agreement facts for the Hindi LVC. 1 2019.lilt-17.1 @@ -27,7 +27,7 @@ Complex predicates: Structure, potential structure and underspecification - StefanMüller + StefanMüller This paper compares a recent TAG-based analysis of complex predicates in Hindi/Urdu with its HPSG analog. It points out that TAG combines actual structure while HPSG (and Categorial Grammar and other valence-based frameworks) specify valence of lexical items and hence potential structure. This makes it possible to have light verbs decide which arguments of embedded heads get realized, somthing that is not possible in TAG. TAG has to retreat to disjunctions instead. While this allows straight-forward analyses of active/passive alternations based on the light verb in valence-based frameworks, such an option does not exist for TAG and it has to be assumed that preverbs come with different sets of arguments. 3 2019.lilt-17.3 @@ -56,7 +56,7 @@ PrashantPardeshi AlistairButler StephenHorn - KeiYoshimoto + KeiYoshimoto IkuNagasaki 0 2019.lilt-18.1 diff --git a/data/xml/2019.nsurl.xml b/data/xml/2019.nsurl.xml index 64a2ead1b5..aae0b37ae5 100644 --- a/data/xml/2019.nsurl.xml +++ b/data/xml/2019.nsurl.xml @@ -97,14 +97,14 @@ Motivations, challenges, and perspectives for the development of an Automatic Speech Recognition System for the under-resourced <fixed-case>N</fixed-case>giemboon Language PatriceYemmene - LaurentBesacier + LaurentBesacier 59–67 2019.nsurl-1.9 yemmene-besacier-2019-motivations <fixed-case>NITK</fixed-case>-<fixed-case>IT</fixed-case>_<fixed-case>NLP</fixed-case>@<fixed-case>NSURL</fixed-case>2019: Transfer Learning based <fixed-case>POS</fixed-case> Tagger for Under Resourced <fixed-case>B</fixed-case>hojpuri and <fixed-case>M</fixed-case>agahi Language - Anand KumarM + Anand KumarM 68–72 2019.nsurl-1.10 m-2019-nitk diff --git a/data/xml/2019.rocling.xml b/data/xml/2019.rocling.xml index 14351c27a5..82d9be5f28 100644 --- a/data/xml/2019.rocling.xml +++ b/data/xml/2019.rocling.xml @@ -5,7 +5,7 @@ Proceedings of the 31st Conference on Computational Linguistics and Speech Processing (ROCLING 2019) Chen-YuChiag Min-YuhDay - Jen-TzungChien + Jen-TzungChien The Association for Computational Linguistics and Chinese Language Processing (ACLCLP)
New Taipei City, Taiwan
October @@ -133,7 +133,7 @@ Sequence to Sequence Convolutional Neural Network for Automatic Spelling Correction DanielHládek - MatúšPleva + MatúšPleva JánStaš Yuan-FuLiao 102–111 @@ -144,7 +144,7 @@ 基於深度學習之簡答題問答系統初步探討(A Preliminary Study on Deep Learning-based Short Answer Question Answering System) Yu-ChenLin Yuan-FuLiao - MatúšPleva + MatúšPleva DanielHládek 112–121 2019.rocling-1.13 @@ -168,7 +168,7 @@ Yuan-FuLiao Chen-MingPan Tzu-HsiuKuo - MatúšPleva + MatúšPleva DanielHládek 137–151 2019.rocling-1.15 @@ -179,7 +179,7 @@ 適合漸凍人使用之語音轉換系統初步研究(Deep Neural-Network Bandwidth Extension and Denoising Voice Conversion System for <fixed-case>ALS</fixed-case> Patients) Bai-HongHuang Yuan-FuLiao - MatúšPleva + MatúšPleva DanielHládek 152–166 2019.rocling-1.16 @@ -198,7 +198,7 @@ 基於卷積神經網路之台語關鍵詞辨識(<fixed-case>T</fixed-case>aiwanese keyword recognition using Convolutional Neural Networks) Chi-HungLiu - Ren-YuanLyu + Ren-YuanLyu Wei-ZhongZhan Jie-ShuWu Da-DaoZhu @@ -211,7 +211,7 @@ Extracting Semantic Representations of Sexual Biases from Word Vectors Ying-YuChen - Shu-KaiHsieh + Shu-KaiHsieh 192–201 2019.rocling-1.19 chen-hsieh-2019-extracting @@ -242,7 +242,7 @@ Kuan-YiLiu Syu-SiangWang YuTsao - Jeih-weihHung + Jeih-weihHung 226–240 2019.rocling-1.22 liu-etal-2019-speech @@ -252,7 +252,7 @@ Wen-ChaoYeh Yu-LunHsieh Yung-ChunChang - Wen-LianHsu + Wen-LianHsu 241–245 2019.rocling-1.23 zho @@ -301,7 +301,7 @@ Influences of Prosodic Feature Replacement on the Perceived Singing Voice Identity Kuan-YiKang Yi-WenLiu - Hsin-MinWang + Hsin-MinWang 296–309 2019.rocling-1.28 kang-etal-2019-influences @@ -322,10 +322,10 @@ Building of children speech corpus for improving automatic subtitling services - MatusPleva + MatusPleva StanislavOndas DanielHládek - JozefJuhar + JozefJuhar JánStaš Yuan-FuLiao 325–333 @@ -336,7 +336,7 @@ 基於階層式編碼架構之文本可讀性預測(A Hierarchical Encoding Framework for Text Readability Prediction) Shi-YanWeng Hou-ChiangTseng - Yao-TingSung + Yao-TingSung BerlinChen 334–342 2019.rocling-1.31 @@ -346,7 +346,7 @@ 國語語音辨識系統中之人名語言模型(The Personal Name Modeling in <fixed-case>M</fixed-case>andarin <fixed-case>ASR</fixed-case> System) Hong-BinLiang - Yih-RuWang + Yih-RuWang 343–357 2019.rocling-1.32 zho @@ -393,7 +393,7 @@ Four-word Idioms Containing Opposites in <fixed-case>M</fixed-case>andarin - Siaw-FongChung + Siaw-FongChung 398–407 2019.rocling-1.37 chung-2019-four @@ -404,7 +404,7 @@ Ching-Yu HelenYang Ying-ZhuChen Jhih-JieChen - Jason S.Chang + Jason S.Chang 408–422 2019.rocling-1.38 zho @@ -423,7 +423,7 @@ 標註英中同步樣式文法之研究(Annotating Synchronous Grammar Patterns across <fixed-case>E</fixed-case>nglish and <fixed-case>C</fixed-case>hinese) Ching-Yu HelenYang Ying-ZhuChen - Jason S.Chang + Jason S.Chang Yi-ChienLin Wei-Tien DylanTsai 424–433 diff --git a/data/xml/2019.tal.xml b/data/xml/2019.tal.xml index aa3b53069b..0da6c8b182 100644 --- a/data/xml/2019.tal.xml +++ b/data/xml/2019.tal.xml @@ -3,9 +3,9 @@ Traitement Automatique des Langues, Volume 60, Numéro 1 : Varia [Varia] - CécileFabre + CécileFabre EmmanuelMorin - SophieRosset + SophieRosset PascaleSébillot ATALA (Association pour le Traitement Automatique des Langues)
France
@@ -32,8 +32,8 @@ Traitement Automatique des Langues, Volume 60, Numéro 2 : Corpus annotés [Annotated corpora] - MarieCandito - MarkLiberman + MarieCandito + MarkLiberman ATALA (Association pour le Traitement Automatique des Langues)
France
2019 @@ -55,7 +55,7 @@
Un corpus arboré pour le français : le <fixed-case>F</fixed-case>rench Treebank [A parsed corpus for <fixed-case>F</fixed-case>rench: the <fixed-case>F</fixed-case>rench treebank] - AnneAbeillé + AnneAbeillé LionelClément LoïcLiégeois 19–43 @@ -67,7 +67,7 @@ Redonner du sens à l’accord interannotateur : vers une interprétation des mesures d’accord en termes de reproductibilité de l’annotation [Interpreting inter-annotator agreement measures : towards an interpretation in terms of annotation reproducibility] DanyBregeon Jean-YvesAntoine - JeanneVillaneau + JeanneVillaneau AnaïsHalftermeyer 45–69 2019.tal-2.3 @@ -77,7 +77,7 @@ Conversion et améliorations de corpus du français annotés en <fixed-case>U</fixed-case>niversal <fixed-case>D</fixed-case>ependencies [Conversion and Improvement of <fixed-case>U</fixed-case>niversal <fixed-case>D</fixed-case>ependencies <fixed-case>F</fixed-case>rench corpora] BrunoGuillaume - Marie-Catherinede Marneffe + Marie-Catherinede Marneffe GuyPerrier 71–95 2019.tal-2.4 @@ -112,7 +112,7 @@ Transcription automatique et segmentation thématique de livres d’heures manuscrits [Automatic transcription and thematic segmentation of Books of Hours] - BéatriceDaille + BéatriceDaille AmirHazem ChristopherKermorvant MartinMaarand diff --git a/data/xml/2020.aacl.xml b/data/xml/2020.aacl.xml index 38ffb8dd2e..b772726ce7 100644 --- a/data/xml/2020.aacl.xml +++ b/data/xml/2020.aacl.xml @@ -3,7 +3,7 @@ Proceedings of the 1st Conference of the Asia-Pacific Chapter of the Association for Computational Linguistics and the 10th International Joint Conference on Natural Language Processing - Kam-FaiWong + Kam-FaiWong KevinKnight HuaWu Association for Computational Linguistics @@ -25,7 +25,7 @@ JiajunZhang LemaoLiu GuopingHuang - ChengqingZong + ChengqingZong 1–11 We propose a touch-based editing method for translation, which is more flexible than traditional keyboard-mouse-based translation postediting. This approach relies on touch actions that users perform to indicate translation errors. We present a dual-encoder model to handle the actions and generate refined translations. To mimic the user feedback, we adopt the TER algorithm comparing between draft translations and references to automatically extract the simulated actions for training data construction. Experiments on translation datasets with simulated editing actions show that our method significantly improves original translation of Transformer (up to 25.31 BLEU) and outperforms existing interactive translation methods (up to 16.64 BLEU). We also conduct experiments on post-editing dataset to further prove the robustness and effectiveness of our method. 2020.aacl-main.1 @@ -38,7 +38,7 @@ LiDong FuruWei XianlingMao - HeyanHuang + HeyanHuang 12–17 Multilingual pretrained language models (such as multilingual BERT) have achieved impressive results for cross-lingual transfer. However, due to the constant model capacity, multilingual pre-training usually lags behind the monolingual competitors. In this work, we present two approaches to improve zero-shot cross-lingual classification, by transferring the knowledge from monolingual pretrained models to multilingual ones. Experimental results on two cross-lingual classification benchmarks show that our methods outperform vanilla multilingual fine-tuning. 2020.aacl-main.2 @@ -60,7 +60,7 @@ Graph Attention Network with Memory Fusion for Aspect-level Sentiment Analysis LiYuan JinWang - Liang-ChihYu + Liang-ChihYu XuejieZhang 27–36 Aspect-level sentiment analysis(ASC) predicts each specific aspect term’s sentiment polarity in a given text or review. Recent studies used attention-based methods that can effectively improve the performance of aspect-level sentiment analysis. These methods ignored the syntactic relationship between the aspect and its corresponding context words, leading the model to focus on syntactically unrelated words mistakenly. One proposed solution, the graph convolutional network (GCN), cannot completely avoid the problem. While it does incorporate useful information about syntax, it assigns equal weight to all the edges between connected words. It may still incorrectly associate unrelated words to the target aspect through the iterations of graph convolutional propagation. In this study, a graph attention network with memory fusion is proposed to extend GCN’s idea by assigning different weights to edges. Syntactic constraints can be imposed to block the graph convolutional propagation of unrelated words. A convolutional layer and a memory fusion were applied to learn and exploit multiword relations and draw different weights of words to improve performance further. Experimental results on five datasets show that the proposed method yields better performance than existing methods. @@ -134,7 +134,7 @@ ChenlongHu HidetakaKamigaito HiroyaTakamura - ManabuOkumura + ManabuOkumura 80–86 We propose a simple and effective method for incorporating word clusters into the Continuous Bag-of-Words (CBOW) model. Specifically, we propose to replace infrequent input and output words in CBOW model with their clusters. The resulting cluster-incorporated CBOW model produces embeddings of frequent words and a small amount of cluster embeddings, which will be fine-tuned in downstream tasks. We empirically show our replacing method works well on several downstream tasks. Through our analysis, we show that our method might be also useful for other similar models which produce word embeddings. 2020.aacl-main.10 @@ -167,7 +167,7 @@ High-order Refining for End-to-end <fixed-case>C</fixed-case>hinese Semantic Role Labeling HaoFei YafengRen - DonghongJi + DonghongJi 100–105 Current end-to-end semantic role labeling is mostly accomplished via graph-based neural models. However, these all are first-order models, where each decision for detecting any predicate-argument pair is made in isolation with local features. In this paper, we present a high-order refining mechanism to perform interaction between all predicate-argument pairs. Based on the baseline graph model, our high-order refining module learns higher-order features between all candidate pairs via attention calculation, which are later used to update the original token representations. After several iterations of refinement, the underlying token representations can be enriched with globally interacted features. Our high-order model achieves state-of-the-art results on Chinese SRL data, including CoNLL09 and Universal Proposition Bank, meanwhile relieving the long-range dependency issues. 2020.aacl-main.13 @@ -176,7 +176,7 @@ Exploiting <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et Synset and Hypernym Representations for Answer Selection - WeikangLi + WeikangLi YunfangWu 106–115 Answer selection (AS) is an important subtask of document-based question answering (DQA). In this task, the candidate answers come from the same document, and each answer sentence is semantically related to the given question, which makes it more challenging to select the true answer. WordNet provides powerful knowledge about concepts and their semantic relations so we employ WordNet to enrich the abilities of paraphrasing and reasoning of the network-based question answering model. Specifically, we exploit the synset and hypernym concepts to enrich the word representation and incorporate the similarity scores of two concepts that share the synset or hypernym relations into the attention mechanism. The proposed WordNet-enhanced hierarchical model (WEHM) consists of four modules, including WordNet-enhanced word representation, sentence encoding, WordNet-enhanced attention mechanism, and hierarchical document encoding. Extensive experiments on the public WikiQA and SelQA datasets demonstrate that our proposed model significantly improves the baseline system and outperforms all existing state-of-the-art methods by a large margin. @@ -200,7 +200,7 @@ ZhengZhang LiziLiao XiaoyanZhu - Tat-SengChua + Tat-SengChua ZitaoLiu YanHuang MinlieHuang @@ -316,7 +316,7 @@ WeiyueWang ChristianHerold ZijianYang - HermannNey + HermannNey 212–223 In order to combat overfitting and in pursuit of better generalization, label smoothing is widely applied in modern neural machine translation systems. The core idea is to penalize over-confident outputs and regularize the model so that its outputs do not diverge too much from some prior distribution. While training perplexity generally gets worse, label smoothing is found to consistently improve test performance. In this work, we aim to better understand label smoothing in the context of neural machine translation. Theoretically, we derive and explain exactly what label smoothing is optimizing for. Practically, we conduct extensive experiments by varying which tokens to smooth, tuning the probability mass to be deducted from the true targets and considering different prior distributions. We show that label smoothing is theoretically well-motivated, and by carefully choosing hyperparameters, the practical performance of strong neural machine translation systems can be further improved. 2020.aacl-main.25 @@ -387,7 +387,7 @@ Dushyant SinghChauhan DhanushS R AsifEkbal - PushpakBhattacharyya + PushpakBhattacharyya 281–290 In this paper, we aim at learning the relationships and similarities of a variety of tasks, such as humour detection, sarcasm detection, offensive content detection, motivational content detection and sentiment analysis on a somewhat complicated form of information, i.e., memes. We propose a multi-task, multi-modal deep learning framework to solve multiple tasks simultaneously. For multi-tasking, we propose two attention-like mechanisms viz., Inter-task Relationship Module (iTRM) and Inter-class Relationship Module (iCRM). The main motivation of iTRM is to learn the relationship between the tasks to realize how they help each other. In contrast, iCRM develops relations between the different classes of tasks. Finally, representations from both the attentions are concatenated and shared across the five tasks (i.e., humour, sarcasm, offensive, motivational, and sentiment) for multi-tasking. We use the recently released dataset in the Memotion Analysis task @ SemEval 2020, which consists of memes annotated for the classes as mentioned above. Empirical results on Memotion dataset show the efficacy of our proposed approach over the existing state-of-the-art systems (Baseline and SemEval 2020 winner). The evaluation also indicates that the proposed multi-task framework yields better performance over the single-task learning. 2020.aacl-main.31 @@ -411,7 +411,7 @@ MukunthaNarayanan Sundararaman ZishanAhmad AsifEkbal - PushpakBhattacharyya + PushpakBhattacharyya 303–312 Unsupervised style transfer in text has previously been explored through the sentiment transfer task. The task entails inverting the overall sentiment polarity in a given input sentence, while preserving its content. From the Aspect-Based Sentiment Analysis (ABSA) task, we know that multiple sentiment polarities can often be present together in a sentence with multiple aspects. In this paper, the task of aspect-level sentiment controllable style transfer is introduced, where each of the aspect-level sentiments can individually be controlled at the output. To achieve this goal, a BERT-based encoder-decoder architecture with saliency weighted polarity injection is proposed, with unsupervised training strategies, such as ABSA masked-language-modelling. Through both automatic and manual evaluation, we show that the system is successful in controlling aspect-level sentiments. 2020.aacl-main.33 @@ -448,7 +448,7 @@ MoinNadeem TianxingHe KyunghyunCho - JamesGlass + JamesGlass 334–346 This work studies the widely adopted ancestral sampling algorithms for auto-regressive language models. We use the quality-diversity (Q-D) trade-off to investigate three popular sampling methods (top-k, nucleus and tempered sampling). We focus on the task of open-ended language generation, and first show that the existing sampling algorithms have similar performance. By carefully inspecting the transformations defined by different sampling algorithms, we identify three key properties that are shared among them: entropy reduction, order preservation, and slope preservation. To validate the importance of the identified properties, we design two sets of new sampling methods: one set in which each algorithm satisfies all three properties, and one set in which each algorithm violates at least one of the properties. We compare their performance with existing algorithms, and find that violating the identified properties could lead to drastic performance degradation, as measured by the Q-D trade-off. On the other hand, we find that the set of sampling algorithms that satisfy these properties performs on par with the existing sampling algorithms. 2020.aacl-main.36 @@ -474,7 +474,7 @@ Renxuan AlbertLi IhabHajjar FeliciaGoldstein - Jinho D.Choi + Jinho D.Choi 358–365 This paper presents a new dataset, B-SHARP, that can be used to develop NLP models for the detection of Mild Cognitive Impairment (MCI) known as an early sign of Alzheimer’s disease. Our dataset contains 1-2 min speech segments from 326 human subjects for 3 topics, (1) daily activity, (2) room environment, and (3) picture description, and their transcripts so that a total of 650 speech segments are collected. Given the B-SHARP dataset, several hierarchical text classification models are developed that jointly learn combinatory features across all 3 topics. The best performance of 74.1% is achieved by an ensemble model that adapts 3 types of transformer encoders. To the best of our knowledge, this is the first work that builds deep learning-based text classification models on multiple contents for the detection of MCI. 2020.aacl-main.38 @@ -485,11 +485,11 @@ An Exploratory Study on Multilingual Quality Estimation ShuoSun MarinaFomicheva - FrédéricBlain + FrédéricBlain VishravChaudhary AhmedEl-Kishky AdithyaRenduchintala - FranciscoGuzmán + FranciscoGuzmán LuciaSpecia 366–377 Predicting the quality of machine translation has traditionally been addressed with language-specific models, under the assumption that the quality label distribution or linguistic features exhibit traits that are not shared across languages. An obvious disadvantage of this approach is the need for labelled data for each given language pair. We challenge this assumption by exploring different approaches to multilingual Quality Estimation (QE), including using scores from translation models. We show that these outperform single-language models, particularly in less balanced quality label distributions and low-resource settings. In the extreme case of zero-shot QE, we show that it is possible to accurately predict quality for any given new language from models trained on other languages. Our findings indicate that state-of-the-art neural QE models based on powerful pre-trained representations generalise well across languages, making them more applicable in real-world settings. @@ -500,7 +500,7 @@ <fixed-case>E</fixed-case>nglish-to-<fixed-case>C</fixed-case>hinese Transliteration with Phonetic Auxiliary Task YuanHe - Shay B.Cohen + Shay B.Cohen 378–388 Approaching named entities transliteration as a Neural Machine Translation (NMT) problem is common practice. While many have applied various NMT techniques to enhance machine transliteration models, few focus on the linguistic features particular to the relevant languages. In this paper, we investigate the effect of incorporating phonetic features for English-to-Chinese transliteration under the multi-task learning (MTL) setting—where we define a phonetic auxiliary task aimed to improve the generalization performance of the main transliteration task. In addition to our system, we also release a new English-to-Chinese dataset and propose a novel evaluation metric which considers multiple possible transliterations given a source name. Our results show that the multi-task model achieves similar performance as the previous state of the art with a model of a much smaller size. 2020.aacl-main.40 @@ -512,7 +512,7 @@ ZijianYang YingboGao WeiyueWang - HermannNey + HermannNey 389–395 Attention-based encoder-decoder models have achieved great success in neural machine translation tasks. However, the lengths of the target sequences are not explicitly predicted in these models. This work proposes length prediction as an auxiliary task and set up a sub-network to obtain the length information from the encoder. Experimental results show that the length prediction sub-network brings improvements over the strong baseline system and that the predicted length can be used as an alternative to length normalization during decoding. 2020.aacl-main.41 @@ -533,7 +533,7 @@ Heads-up! Unsupervised Constituency Parsing via Self-Attention Heads BowenLi TaeukKim - Reinald KimAmplayo + Reinald KimAmplayo FrankKeller 409–424 Transformer-based pre-trained language models (PLMs) have dramatically improved the state of the art in NLP across many tasks. This has led to substantial interest in analyzing the syntactic knowledge PLMs learn. Previous approaches to this question have been limited, mostly using test suites or probes. Here, we propose a novel fully unsupervised parsing approach that extracts constituency trees from PLM attention heads. We rank transformer attention heads based on their inherent properties, and create an ensemble of high-ranking heads to produce the final tree. Our method is adaptable to low-resource languages, as it does not rely on development sets, which can be expensive to annotate. Our experiments show that the proposed method often outperform existing approaches if there is no development set present. Our unsupervised parser can also be used as a tool to analyze the grammars PLMs learn implicitly. For this, we use the parse trees induced by our method to train a neural PCFG and compare it to a grammar derived from a human-annotated treebank. @@ -545,7 +545,7 @@ Building Location Embeddings from Physical Trajectories and Textual Representations LauraBiester CarmenBanea - RadaMihalcea + RadaMihalcea 425–434 Word embedding methods have become the de-facto way to represent words, having been successfully applied to a wide array of natural language processing tasks. In this paper, we explore the hypothesis that embedding methods can also be effectively used to represent spatial locations. Using a new dataset consisting of the location trajectories of 729 students over a seven month period and text data related to those locations, we implement several strategies to create location embeddings, which we then use to create embeddings of the sequences of locations a student has visited. To identify the surface level properties captured in the representations, we propose a number of probing tasks such as the presence of a specific location in a sequence or the type of activities that take place at a location. We then leverage the representations we generated and employ them in more complex downstream tasks ranging from predicting a student’s area of study to a student’s depression level, showing the effectiveness of these location embeddings. 2020.aacl-main.44 @@ -609,7 +609,7 @@ Systematic Generalization on g<fixed-case>SCAN</fixed-case> with Language Conditioned Embedding TongGao QiHuang - RaymondMooney + RaymondMooney 491–503 Systematic Generalization refers to a learning algorithm’s ability to extrapolate learned behavior to unseen situations that are distinct but semantically similar to its training data. As shown in recent work, state-of-the-art deep learning models fail dramatically even on tasks for which they are designed when the test set is systematically different from the training data. We hypothesize that explicitly modeling the relations between objects in their contexts while learning their representations will help achieve systematic generalization. Therefore, we propose a novel method that learns objects’ contextualized embeddings with dynamic message passing conditioned on the input natural language and end-to-end trainable with other downstream deep learning modules. To our knowledge, this model is the first one that significantly outperforms the provided baseline and reaches state-of-the-art performance on grounded SCAN (gSCAN), a grounded natural language navigation dataset designed to require systematic generalization in its test splits. 2020.aacl-main.49 @@ -619,7 +619,7 @@ Are Scene Graphs Good Enough to Improve Image Captioning? VictorMilewski - Marie-FrancineMoens + Marie-FrancineMoens IacerCalixto 504–515 Many top-performing image captioning models rely solely on object features computed with an object detection model to generate image descriptions. However, recent studies propose to directly use scene graphs to introduce information about object relations into captioning, hoping to better describe interactions between objects. In this work, we thoroughly investigate the use of scene graphs in image captioning. We empirically study whether using additional scene graph encoders can lead to better image descriptions and propose a conditional graph attention network (C-GAT), where the image captioning decoder state is used to condition the graph updates. Finally, we determine to what extent noise in the predicted scene graphs influence caption quality. Overall, we find no significant difference between models that use scene graph features and models that only use object detection features across different captioning metrics, which suggests that existing scene graph generation models are still too noisy to be useful in image captioning. Moreover, although the quality of predicted scene graphs is very low in general, when using high quality scene graphs we obtain gains of up to 3.3 CIDEr compared to a strong Bottom-Up Top-Down baseline. @@ -656,7 +656,7 @@ ChenguangZhu YuShi MichaelZeng - XuedongHuang + XuedongHuang 536–541 Cross-lingual Summarization (CLS) aims at producing a summary in the target language for an article in the source language. Traditional solutions employ a two-step approach, i.e. translate -> summarize or summarize -> translate. Recently, end-to-end models have achieved better results, but these approaches are mostly limited by their dependence on large-scale labeled data. We propose a solution based on mixed-lingual pre-training that leverages both cross-lingual tasks such as translation and monolingual tasks like masked language models. Thus, our model can leverage the massive monolingual data to enhance its modeling of language. Moreover, the architecture has no task-specific components, which saves memory and increases optimization efficiency. We show in experiments that this pre-training scheme can effectively boost the performance of cross-lingual summarization. In NCLS dataset, our model achieves an improvement of 2.82 (English to Chinese) and 1.15 (Chinese to English) ROUGE-1 scores over state-of-the-art results. 2020.aacl-main.53 @@ -677,11 +677,11 @@ Leveraging Structured Metadata for Improving Question Answering on the Web XinyaDu - Ahmed HassanAwadallah + Ahmed HassanAwadallah AdamFourney RobertSim - PaulBennett - ClaireCardie + PaulBennett + ClaireCardie 551–556 We show that leveraging metadata information from web pages can improve the performance of models for answer passage selection/reranking. We propose a neural passage selection model that leverages metadata information with a fine-grained encoding strategy, which learns the representation for metadata predicates in a hierarchical way. The models are evaluated on the MS MARCO (Nguyen et al., 2016) and Recipe-MARCO datasets. Results show that our models significantly outperform baseline models, which do not incorporate metadata. We also show that the fine-grained encoding’s advantage over other strategies for encoding the metadata. 2020.aacl-main.55 @@ -696,8 +696,8 @@ YadaPruksachatkun HaokunLiu ClaraVania - KatharinaKann - Samuel R.Bowman + KatharinaKann + Samuel R.Bowman 557–575 Intermediate-task training—fine-tuning a pretrained model on an intermediate task before fine-tuning again on the target task—often improves model performance substantially on language understanding tasks in monolingual English settings. We investigate whether English intermediate-task training is still helpful on non-English target tasks. Using nine intermediate language-understanding tasks, we evaluate intermediate-task transfer in a zero-shot cross-lingual setting on the XTREME benchmark. We see large improvements from intermediate training on the BUCC and Tatoeba sentence retrieval tasks and moderate improvements on question-answering target tasks. MNLI, SQuAD and HellaSwag achieve the best overall results as intermediate tasks, while multi-task intermediate offers small additional improvements. Using our best intermediate-task models for each target task, we obtain a 5.4 point improvement over XLM-R Large on the XTREME benchmark, setting the state of the art as of June 2020. We also investigate continuing multilingual MLM during intermediate-task training and using machine-translated intermediate-task data, but neither consistently outperforms simply performing English intermediate-task training. 2020.aacl-main.56 @@ -739,7 +739,7 @@ Liputan6: A Large-scale <fixed-case>I</fixed-case>ndonesian Dataset for Text Summarization FajriKoto Jey HanLau - TimothyBaldwin + TimothyBaldwin 598–608 In this paper, we introduce a large-scale Indonesian summarization dataset. We harvest articles from Liputan6.com, an online news portal, and obtain 215,827 document–summary pairs. We leverage pre-trained language models to develop benchmark extractive and abstractive summarization methods over the dataset with multilingual and monolingual BERT-based models. We include a thorough error analysis by examining machine-generated summaries that have low ROUGE scores, and expose both issues with ROUGE itself, as well as with extractive and abstractive summarization models. 2020.aacl-main.60 @@ -760,7 +760,7 @@ Massively Multilingual Document Alignment with Cross-lingual Sentence-Mover’s Distance AhmedEl-Kishky - FranciscoGuzmán + FranciscoGuzmán 616–625 Document alignment aims to identify pairs of documents in two distinct languages that are of comparable content or translations of each other. Such aligned data can be used for a variety of NLP tasks from training cross-lingual representations to mining parallel data for machine translation. In this paper we develop an unsupervised scoring function that leverages cross-lingual sentence embeddings to compute the semantic distance between documents in different languages. These semantic distances are then used to guide a document alignment algorithm to properly pair cross-lingual web documents across a variety of low, mid, and high-resource language pairs. Recognizing that our proposed scoring function and other state of the art methods are computationally intractable for long web documents, we utilize a more tractable greedy algorithm that performs comparably. We experimentally demonstrate that our distance metric performs better alignment than current baselines outperforming them by 7% on high-resource language pairs, 15% on mid-resource language pairs, and 22% on low-resource language pairs. 2020.aacl-main.62 @@ -795,7 +795,7 @@ <fixed-case>DAPPER</fixed-case>: Learning Domain-Adapted Persona Representation Using Pretrained <fixed-case>BERT</fixed-case> and External Memory PrashanthVijayaraghavan EricChu - DebRoy + DebRoy 643–652 Research in building intelligent agents have emphasized the need for understanding characteristic behavior of people. In order to reflect human-like behavior, agents require the capability to comprehend the context, infer individualized persona patterns and incrementally learn from experience. In this paper, we present a model called DAPPER that can learn to embed persona from natural language and alleviate task or domain-specific data sparsity issues related to personas. To this end, we implement a text encoding strategy that leverages a pretrained language model and an external memory to produce domain-adapted persona representations. Further, we evaluate the transferability of these embeddings by simulating low-resource scenarios. Our comparative study demonstrates the capability of our method over other approaches towards learning rich transferable persona embeddings. Empirical evidence suggests that the learnt persona embeddings can be effective in downstream tasks like hate speech detection. 2020.aacl-main.65 @@ -828,7 +828,7 @@ <fixed-case>A</fixed-case>sking <fixed-case>C</fixed-case>rowdworkers to <fixed-case>W</fixed-case>rite <fixed-case>E</fixed-case>ntailment <fixed-case>E</fixed-case>xamples: <fixed-case>T</fixed-case>he <fixed-case>B</fixed-case>est of <fixed-case>B</fixed-case>ad Options ClaraVania RuijieChen - Samuel R.Bowman + Samuel R.Bowman 672–686 Large-scale natural language inference (NLI) datasets such as SNLI or MNLI have been created by asking crowdworkers to read a premise and write three new hypotheses, one for each possible semantic relationships (entailment, contradiction, and neutral). While this protocol has been used to create useful benchmark data, it remains unclear whether the writing-based annotation protocol is optimal for any purpose, since it has not been evaluated directly. Furthermore, there is ample evidence that crowdworker writing can introduce artifacts in the data. We investigate two alternative protocols which automatically create candidate (premise, hypothesis) pairs for annotators to label. Using these protocols and a writing-based baseline, we collect several new English NLI datasets of over 3k examples each, each using a fixed amount of annotator time, but a varying number of examples to fit that time budget. Our experiments on NLI and transfer learning show negative results: None of the alternative protocols outperforms the baseline in evaluations of generalization within NLI or on transfer to outside target tasks. We conclude that crowdworker writing still the best known option for entailment data, highlighting the need for further data collection work to focus on improving writing-based annotation processes. 2020.aacl-main.68 @@ -867,8 +867,8 @@ HaiminZhang DebanjanMahata RakeshGosangi - Rajiv RatnShah - AmandaStent + Rajiv RatnShah + AmandaStent 706–719 An NLP model’s ability to reason should be independent of language. Previous works utilize Natural Language Inference (NLI) to understand the reasoning ability of models, mostly focusing on high resource languages like English. To address scarcity of data in low-resource languages such as Hindi, we use data recasting to create NLI datasets for four existing text classification datasets. Through experiments, we show that our recasted dataset is devoid of statistical irregularities and spurious patterns. We further study the consistency in predictions of the textual entailment models and propose a consistency regulariser to remove pairwise-inconsistencies in predictions. We propose a novel two-step classification method which uses textual-entailment predictions for classification task. We further improve the performance by using a joint-objective for classification and textual entailment. We therefore highlight the benefits of data recasting and improvements on classification performance using our approach with supporting experimental results. 2020.aacl-main.71 @@ -880,7 +880,7 @@ Keng-TeLiao Cheng-SyuanLee Zhong-YuHuang - Shou-deLin + Shou-deLin 720–725 Disentangled representations have attracted increasing attention recently. However, how to transfer the desired properties of disentanglement to word representations is unclear. In this work, we propose to transform typical dense word vectors into disentangled embeddings featuring improved interpretability via encoding polysemous semantics separately. We also found the modular structure of our disentangled word embeddings helps generate more efficient and effective features for natural language processing tasks. 2020.aacl-main.72 @@ -893,7 +893,7 @@ MengfeiGuo YufengChen YingLi - JinanXu + JinanXu YajuanLyu YongZhu 726–734 @@ -985,7 +985,7 @@ Point-of-Interest Type Inference from Social Media Text DanaeSánchez Villegas - DanielPreotiuc-Pietro + DanielPreotiuc-Pietro NikolaosAletras 804–810 Physical places help shape how we perceive the experiences we have there. We study the relationship between social media text and the type of the place from where it was posted, whether a park, restaurant, or someplace else. To facilitate this, we introduce a novel data set of ~200,000 English tweets published from 2,761 different points-of-interest in the U.S., enriched with place type information. We train classifiers to predict the type of the location a tweet was sent from that reach a macro F1 of 43.67 across eight classes and uncover the linguistic markers associated with each type of place. The ability to predict semantic place information from a tweet has applications in recommendation systems, personalization services and cultural geography. @@ -1039,7 +1039,7 @@ MingyuWan QiSu Chu-RenHuang - QinLu + QinLu 833–842 Mandarin Alphabetical Word (MAW) is one indispensable component of Modern Chinese that demonstrates unique code-mixing idiosyncrasies influenced by language exchanges. Yet, this interesting phenomenon has not been properly addressed and is mostly excluded from the Chinese language system. This paper addresses the core problem of MAW identification and proposes to construct a large collection of MAWs from Sina Weibo (SMAW) using an automatic web-based technique which includes rule-based identification, informatics-based extraction, as well as Baidu search engine validation. A collection of 16,207 qualified SMAWs are obtained using this technique along with an annotated corpus of more than 200,000 sentences for linguistic research and applicable inquiries. 2020.aacl-main.84 @@ -1051,7 +1051,7 @@ <fixed-case>I</fixed-case>ndo<fixed-case>NLU</fixed-case>: Benchmark and Resources for Evaluating <fixed-case>I</fixed-case>ndonesian Natural Language Understanding BryanWilie KarissaVincentio - Genta IndraWinata + Genta IndraWinata SamuelCahyawijaya XiaohongLi Zhi YuanLim @@ -1072,7 +1072,7 @@ RudraMurthy DipteshKanojia AbhijitMishra - PushpakBhattacharyya + PushpakBhattacharyya 858–872 The gaze behaviour of a reader is helpful in solving several NLP tasks such as automatic essay grading. However, collecting gaze behaviour from readers is costly in terms of time and money. In this paper, we propose a way to improve automatic essay grading using gaze behaviour, which is learnt at run time using a multi-task learning framework. To demonstrate the efficacy of this multi-task learning based approach to automatic essay grading, we collect gaze behaviour for 48 essays across 4 essay sets, and learn gaze behaviour for the rest of the essays, numbering over 7000 essays. Using the learnt gaze behaviour, we can achieve a statistically significant improvement in performance over the state-of-the-art system for the essay sets where we have gaze data. We also achieve a statistically significant improvement for 4 other essay sets, numbering about 6000 essays, where we have no gaze behaviour data available. Our approach establishes that learning gaze behaviour improves automatic essay grading. 2020.aacl-main.86 @@ -1105,7 +1105,7 @@ MasatoNeishi YutaHayashibe HirokiOuchi - NaoakiOkazaki + NaoakiOkazaki 890–899 Explainable recommendation is a good way to improve user satisfaction. However, explainable recommendation in dialogue is challenging since it has to handle natural language as both input and output. To tackle the challenge, this paper proposes a novel and practical task to explain evidences in recommending hotels given vague requests expressed freely in natural language. We decompose the process into two subtasks on hotel reviews: Evidence Identification and Evidence Explanation. The former predicts whether or not a sentence contains evidence that expresses why a given request is satisfied. The latter generates a recommendation sentence given a request and an evidence sentence. In order to address these subtasks, we build an Evidence-based Explanation dataset, which is the largest dataset for explaining evidences in recommending hotels for vague requests. The experimental results demonstrate that the BERT model can find evidence sentences with respect to various vague requests and that the LSTM-based model can generate recommendation sentences. 2020.aacl-main.89 @@ -1114,10 +1114,10 @@ A Unified Framework for Multilingual and Code-Mixed Visual Question Answering - DeepakGupta + DeepakGupta PabitraLenka AsifEkbal - PushpakBhattacharyya + PushpakBhattacharyya 900–913 In this paper, we propose an effective deep learning framework for multilingual and code- mixed visual question answering. The pro- posed model is capable of predicting answers from the questions in Hindi, English or Code- mixed (Hinglish: Hindi-English) languages. The majority of the existing techniques on Vi- sual Question Answering (VQA) focus on En- glish questions only. However, many applica- tions such as medical imaging, tourism, visual assistants require a multilinguality-enabled module for their widespread usages. As there is no available dataset in English-Hindi VQA, we firstly create Hindi and Code-mixed VQA datasets by exploiting the linguistic properties of these languages. We propose a robust tech- nique capable of handling the multilingual and code-mixed question to provide the answer against the visual information (image). To better encode the multilingual and code-mixed questions, we introduce a hierarchy of shared layers. We control the behaviour of these shared layers by an attention-based soft layer sharing mechanism, which learns how shared layers are applied in different ways for the dif- ferent languages of the question. Further, our model uses bi-linear attention with a residual connection to fuse the language and image fea- tures. We perform extensive evaluation and ablation studies for English, Hindi and Code- mixed VQA. The evaluation shows that the proposed multilingual model achieves state-of- the-art performance in all these settings. 2020.aacl-main.90 @@ -1128,8 +1128,8 @@ Toxic Language Detection in Social Media for <fixed-case>B</fixed-case>razilian <fixed-case>P</fixed-case>ortuguese: New Dataset and Multilingual Analysis João AugustoLeite DiegoSilva - KalinaBontcheva - CarolinaScarton + KalinaBontcheva + CarolinaScarton 914–924 Hate speech and toxic comments are a common concern of social media platform users. Although these comments are, fortunately, the minority in these platforms, they are still capable of causing harm. Therefore, identifying these comments is an important task for studying and preventing the proliferation of toxicity in social media. Previous work in automatically detecting toxic comments focus mainly in English, with very few work in languages like Brazilian Portuguese. In this paper, we propose a new large-scale dataset for Brazilian Portuguese with tweets annotated as either toxic or non-toxic or in different types of toxicity. We present our dataset collection and annotation process, where we aimed to select candidates covering multiple demographic groups. State-of-the-art BERT models were able to achieve 76% macro-F1 score using monolingual data in the binary case. We also show that large-scale monolingual data is still needed to create more accurate models, despite recent advances in multilingual approaches. An error analysis and experiments with multi-label classification show the difficulty of classifying certain types of toxic comments that appear less frequently in our data and highlights the need to develop models that are aware of different categories of toxicity. 2020.aacl-main.91 @@ -1138,9 +1138,9 @@ Measuring What Counts: The Case of Rumour Stance Classification - CarolinaScarton + CarolinaScarton DiegoSilva - KalinaBontcheva + KalinaBontcheva 925–932 Stance classification can be a powerful tool for understanding whether and which users believe in online rumours. The task aims to automatically predict the stance of replies towards a given rumour, namely support, deny, question, or comment. Numerous methods have been proposed and their performance compared in the RumourEval shared tasks in 2017 and 2019. Results demonstrated that this is a challenging problem since naturally occurring rumour stance data is highly imbalanced. This paper specifically questions the evaluation metrics used in these shared tasks. We re-evaluate the systems submitted to the two RumourEval tasks and show that the two widely adopted metrics – accuracy and macro-F1 – are not robust for the four-class imbalanced task of rumour stance classification, as they wrongly favour systems with highly skewed accuracy towards the majority class. To overcome this problem, we propose new evaluation metrics for rumour stance detection. These are not only robust to imbalanced data but also score higher systems that are capable of recognising the two most informative minority classes (support and deny). 2020.aacl-main.92 @@ -1278,7 +1278,7 @@ Formal <fixed-case>S</fixed-case>anskrit Syntax: A Specification for Programming Language K. KabiKhanganba - GirishJha + GirishJha 72–78 The paper discusses the syntax of the primary statements of the Sanskritam, a programming language specification based on natural Sanskrit under a doctoral thesis. By a statement, we mean a syntactic unit regardless of its computational operations of variable declarations, program executions or evaluations of Boolean expressions etc. We have selected six common primary statements of declaration, assignment, inline initialization, if-then-else, for loop and while loop. The specification partly overlaps the ideas of natural language programming, Controlled Natural Language (Kunh, 2013), and Natural Language subset. The practice and application of structured natural language set in a discourse are deeply rooted in the theoretical text tradition of Sanskrit, like the sūtra-based disciplines and Navya-Nyāya (NN) formal language, etc. The effort is a kind of continuation and application of such traditions and their techniques in the modern field of Sanskrit NLP. 2020.aacl-srw.11 @@ -1288,7 +1288,7 @@ Resource Creation and Evaluation of Aspect Based Sentiment Analysis in <fixed-case>U</fixed-case>rdu SadafRani - Muhammad WaqasAnwar + Muhammad WaqasAnwar 79–84 Along with the rise of people generated content on social sites, sentiment analysis has gained more importance. Aspect Based Sentiment Analysis (ABSA) is a task of identifying the sentiment at aspect level. It has more importance than sentiment analysis from commercial point of view. To the best of our knowledge, there is very few work on ABSA in Urdu language. Recent work on ABSA has limitations. Only predefined aspects are identified in a specific domain. So our focus is on the creation and evaluation of dataset for ABSA in Urdu language which will support multiple aspects. This dataset will provide a baseline evaluation for ABSA systems. 2020.aacl-srw.12 @@ -1298,7 +1298,7 @@ Making a Point: Pointer-Generator Transformers for Disjoint Vocabularies NikhilPrabhu - KatharinaKann + KatharinaKann 85–92 Explicit mechanisms for copying have improved the performance of neural models for sequence-to-sequence tasks in the low-resource setting. However, they rely on an overlap between source and target vocabularies. Here, we propose a model that does not: a pointer-generator transformer for disjoint vocabularies. We apply our model to a low-resource version of the grapheme-to-phoneme conversion (G2P) task, and show that it outperforms a standard transformer by an average of 5.1 WER over 15 languages. While our model does not beat the the best performing baseline, we demonstrate that it provides complementary information to it: an oracle that combines the best outputs of the two models improves over the strongest baseline by 7.7 WER on average in the low-resource setting. In the high-resource setting, our model performs comparably to a standard transformer. 2020.aacl-srw.13 @@ -1319,7 +1319,7 @@ Document-Level Neural Machine Translation Using <fixed-case>BERT</fixed-case> as Context Encoder ZhiyuGuo - Minh LeNguyen + Minh LeNguyen 101–107 Large-scale pre-trained representations such as BERT have been widely used in many natural language understanding tasks. The methods of incorporating BERT into document-level machine translation are still being explored. BERT is able to understand sentence relationships since BERT is pre-trained using the next sentence prediction task. In our work, we leverage this property to improve document-level machine translation. In our proposed model, BERT performs as a context encoder to achieve document-level contextual information, which is then integrated into both the encoder and decoder. Experiment results show that our proposed method can significantly outperform strong document-level machine translation baselines on BLEU score. Moreover, the ablation study shows our method can capture document-level context information to boost translation performance. 2020.aacl-srw.15 @@ -1418,7 +1418,7 @@ AnkurSonawane Sujeet KumarVishwakarma BhavanaSrivastava - AnilKumar Singh + AnilKumar Singh 165–171 Automated grammatical error correction has been explored as an important research problem within NLP, with the majority of the work being done on English and similar resource-rich languages. Grammar correction using neural networks is a data-heavy task, with the recent state of the art models requiring datasets with millions of annotated sentences for proper training. It is difficult to find such resources for Indic languages due to their relative lack of digitized content and complex morphology, compared to English. We address this problem by generating a large corpus of artificial inflectional errors for training GEC models. Moreover, to evaluate the performance of models trained on this dataset, we create a corpus of real Hindi errors extracted from Wikipedia edits. Analyzing this dataset with a modified version of the ERRANT error annotation toolkit, we find that inflectional errors are very common in this language. Finally, we produce the initial baseline results using state of the art methods developed for English. 2020.aacl-srw.24 @@ -1461,8 +1461,8 @@ <fixed-case>A</fixed-case>uto<fixed-case>NLU</fixed-case>: An On-demand Cloud-based Natural Language Understanding System for Enterprises NhamLe - TuanLai - TrungBui + TuanLai + TrungBui Doo SoonKim 8–13 With the renaissance of deep learning, neural networks have achieved promising results on many natural language understanding (NLU) tasks. Even though the source codes of many neural network models are publicly available, there is still a large gap from open-sourced models to solving real-world problems in enterprises. Therefore, to fill this gap, we introduce AutoNLU, an on-demand cloud-based system with an easy-to-use interface that covers all common use-cases and steps in developing an NLU model. AutoNLU has supported many product teams within Adobe with different use-cases and datasets, quickly delivering them working models. To demonstrate the effectiveness of AutoNLU, we present two case studies. i) We build a practical NLU model for handling various image-editing requests in Photoshop. ii) We build powerful keyphrase extraction models that achieve state-of-the-art results on two public benchmarks. In both cases, end users only need to write a small amount of code to convert their datasets into a common format used by AutoNLU. @@ -1472,8 +1472,8 @@ <fixed-case>ISA</fixed-case>: An Intelligent Shopping Assistant - TuanLai - TrungBui + TuanLai + TrungBui NedimLipka 14–19 Despite the growth of e-commerce, brick-and-mortar stores are still the preferred destinations for many people. In this paper, we present ISA, a mobile-based intelligent shopping assistant that is designed to improve shopping experience in physical stores. ISA assists users by leveraging advanced techniques in computer vision, speech processing, and natural language processing. An in-store user only needs to take a picture or scan the barcode of the product of interest, and then the user can talk to the assistant about the product. The assistant can also guide the user through the purchase process or recommend other similar products to the user. We take a data-driven approach in building the engines of ISA’s natural language processing component, and the engines achieve good performance. diff --git a/data/xml/2020.acl.xml b/data/xml/2020.acl.xml index 1a6c45f663..02e79339f1 100644 --- a/data/xml/2020.acl.xml +++ b/data/xml/2020.acl.xml @@ -3,10 +3,10 @@ Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics - DanJurafsky - JoyceChai + DanJurafsky + JoyceChai NatalieSchluter - JoelTetreault + JoelTetreault Association for Computational Linguistics
Online
July @@ -21,7 +21,7 @@ Learning to Understand Child-directed and Adult-directed Speech LiekeGelderloos - GrzegorzChrupała + GrzegorzChrupała AfraAlishahi 1–6 Speech directed to children differs from adult-directed speech in linguistic aspects such as repetition, word choice, and sentence length, as well as in aspects of the speech signal itself, such as prosodic and phonemic variation. Human language acquisition research indicates that child-directed speech helps language learners. This study explores the effect of child-directed speech when learning to extract semantic information from speech directly. We compare the task performance of models trained on adult-directed speech (ADS) and child-directed speech (CDS). We find indications that CDS helps in the initial stages of learning, but eventually, models trained on ADS reach comparable task performance, and generalize better. The results suggest that this is at least partially due to linguistic rather than acoustic properties of the two registers, as we see the same pattern when looking at models trained on acoustically comparable synthetic speech. @@ -34,7 +34,7 @@ Predicting Depression in Screening Interviews from Latent Categorization of Interview Prompts AlexRinaldi - JeanFox Tree + JeanFox Tree SnigdhaChaturvedi 7–18 Accurately diagnosing depression is difficult– requiring time-intensive interviews, assessments, and analysis. Hence, automated methods that can assess linguistic patterns in these interviews could help psychiatric professionals make faster, more informed decisions about diagnosis. We propose JLPC, a model that analyzes interview transcripts to identify depression while jointly categorizing interview prompts into latent categories. This latent categorization allows the model to define high-level conversational contexts that influence patterns of language in depressed individuals. We show that the proposed model not only outperforms competitive baselines, but that its latent prompt categories provide psycholinguistic insights about depression. @@ -46,7 +46,7 @@ <fixed-case>C</fixed-case>oach: A Coarse-to-Fine Approach for Cross-domain Slot Filling ZihanLiu - Genta IndraWinata + Genta IndraWinata PengXu PascaleFung 19–25 @@ -72,10 +72,10 @@ Dialogue State Tracking with Explicit Slot Connection Modeling YawenOuyang MoxinChen - XinyuDai + XinyuDai YinggongZhao ShujianHuang - JiajunChen + JiajunChen 34–40 Recent proposed approaches have made promising progress in dialogue state tracking (DST). However, in multi-domain scenarios, ellipsis and reference are frequently adopted by users to express values that have been mentioned by slots from other domains. To handle these phenomena, we propose a Dialogue State Tracking with Slot Connections (DST-SC) model to explicitly consider slot correlations across different domains. Given a target slot, the slot connecting mechanism in DST-SC can infer its source slot and copy the source slot value directly, thus significantly reducing the difficulty of learning and reasoning. Experimental results verify the benefits of explicit slot connection modeling, and our model achieves state-of-the-art performance on MultiWOZ 2.0 and MultiWOZ 2.1 datasets. 2020.acl-main.5 @@ -199,7 +199,7 @@ <fixed-case>T</fixed-case>rans<fixed-case>S</fixed-case>-Driven Joint Learning Architecture for Implicit Discourse Relation Recognition RuifangHe - JianWang + JianWang FengyuGuo YuguiHan 139–148 @@ -250,7 +250,7 @@ Few-Shot <fixed-case>NLG</fixed-case> with Pre-Trained Language Model - ZhiyuChen + ZhiyuChen HariniEavani WenhuChen YinyinLiu @@ -312,7 +312,7 @@ Pre-train and Plug-in: Flexible Conditional Text Generation with Variational Auto-Encoders - YuDuan + YuDuan CanwenXu JiaxinPei JialongHan @@ -400,8 +400,8 @@ JoeBarrow RajivJain VladMorariu - VarunManjunatha - DouglasOard + VarunManjunatha + DouglasOard PhilipResnik 313–322 Text segmentation aims to uncover latent structure by dividing text from a document into coherent sections. Where previous work on text segmentation considers the tasks of document segmentation and segment labeling separately, we show that the tasks contain complementary information and are best addressed jointly. We introduce Segment Pooling LSTM (S-LSTM), which is capable of jointly segmenting a document and labeling segments. In support of joint training, we develop a method for teaching the model to recover from errors by aligning the predicted and ground truth segments. We show that S-LSTM reduces segmentation error by 30% on average, while also improving segment labeling. @@ -471,7 +471,7 @@ KehaiChen RuiWang MasaoUtiyama - EiichiroSumita + EiichiroSumita 358–364 Neural machine translation (NMT) encodes the source sentence in a universal way to generate the target sentence word-by-word. However, NMT does not consider the importance of word in the sentence meaning, for example, some words (i.e., content words) express more important meaning than others (i.e., function words). To address this limitation, we first utilize word frequency information to distinguish between content and function words in a sentence, and then design a content word-aware NMT to improve translation performance. Empirical results on the WMT14 English-to-German, WMT14 English-to-French, and WMT17 Chinese-to-English translation tasks show that the proposed methods can significantly improve the performance of Transformer-based NMT. 2020.acl-main.34 @@ -509,8 +509,8 @@ Learning Source Phrase Representations for Neural Machine Translation HongfeiXu - Josefvan Genabith - DeyiXiong + Josefvan Genabith + DeyiXiong QiuhuiLiu JingyiZhang 386–396 @@ -524,8 +524,8 @@ Lipschitz Constrained Parameter Initialization for Deep Transformers HongfeiXu QiuhuiLiu - Josefvan Genabith - DeyiXiong + Josefvan Genabith + DeyiXiong JingyiZhang 397–402 The Transformer translation model employs residual connection and layer normalization to ease the optimization difficulties caused by its multi-layer encoder/decoder structure. Previous research shows that even with residual connection and layer normalization, deep Transformers still have difficulty in training, and particularly Transformer models with more than 12 encoder/decoder layers fail to converge. In this paper, we first empirically demonstrate that a simple modification made in the official implementation, which changes the computation order of residual connection and layer normalization, can significantly ease the optimization of deep Transformers. We then compare the subtle differences in computation order in considerable detail, and present a parameter initialization method that leverages the Lipschitz constraint on the initialization of Transformer parameters that effectively ensures training convergence. In contrast to findings in previous research we further demonstrate that with Lipschitz parameter initialization, deep Transformers with the original computation order can converge, and obtain significant BLEU improvements with up to 24 layers. In contrast to previous research which focuses on deep encoders, our approach additionally enables Transformers to also benefit from deep decoders. @@ -595,7 +595,7 @@ GailWeiss YoavGoldberg RoySchwartz - Noah A.Smith + Noah A.Smith EranYahav 443–459 We develop a formal hierarchy of the expressive capacity of RNN architectures. The hierarchy is based on two formal properties: space complexity, which measures the RNN’s memory, and rational recurrence, defined as whether the recurrent update can be described by a weighted finite-state machine. We place several RNN variants within this hierarchy. For example, we prove the LSTM is not rational, which formally separates it from the related QRNN (Bradbury et al., 2016). We also show how these models’ expressive capacity is expanded by stacking multiple layers or composing them with different pooling functions. Our results build on the theory of “saturated” RNNs (Merrill, 2019). While formally extending these findings to unsaturated RNNs is left to future work, we hypothesize that the practical learnable capacity of unsaturated RNNs obeys a similar hierarchy. We provide empirical results to support this conjecture. Experimental findings from training unsaturated networks on formal languages support this conjecture. @@ -608,7 +608,7 @@ A Three-Parameter Rank-Frequency Relation in Natural Languages ChenchenDing MasaoUtiyama - EiichiroSumita + EiichiroSumita 460–464 We present that, the rank-frequency relation in textual data follows f \propto r^{-\alpha}(r+\gamma)^{-\beta}, where f is the token frequency and r is the rank by frequency, with (\alpha, \beta, \gamma) as parameters. The formulation is derived based on the empirical observation that d^2 (x+y)/dx^2 is a typical impulse function, where (x,y)=(\log r, \log f). The formulation is the power law when \beta=0 and the Zipf–Mandelbrot law when \alpha=0. We illustrate that \alpha is related to the analytic features of syntax and \beta+\gamma to those of morphology in natural languages from an investigation of multilingual corpora. 2020.acl-main.44 @@ -687,7 +687,7 @@ PeterStefanov KareemDarwish AtanasAtanasov - PreslavNakov + PreslavNakov 527–537 Discovering the stances of media outlets and influential people on current, debatable topics is important for social statisticians and policy makers. Many supervised solutions exist for determining viewpoints, but manually annotating training data is costly. In this paper, we propose a cascaded method that uses unsupervised learning to ascertain the stance of Twitter users with respect to a polarizing topic by leveraging their retweet behavior; then, it uses supervised learning based on user labels to characterize both the general political leaning of online media and of popular Twitter users, as well as their stance with respect to the target polarizing topic. We evaluate the model by comparing its predictions to gold labels from the Media Bias/Fact Check website, achieving 82.6% accuracy. 2020.acl-main.50 @@ -699,7 +699,7 @@ Simple, Interpretable and Stable Method for Detecting Words with Usage Change across Corpora HilaGonen GaneshJawahar - DjaméSeddah + DjaméSeddah YoavGoldberg 538–555 The problem of comparing two bodies of text and searching for words that differ in their usage between them arises often in digital humanities and computational social science. This is commonly approached by training word embeddings on each corpus, aligning the vector spaces, and looking for words whose cosine distance in the aligned space is large. However, these methods often require extensive filtering of the vocabulary to perform well, and - as we show in this work - result in unstable, and hence less reliable, results. We propose an alternative approach that does not use vector space alignment, and instead considers the neighbors of each word. The method is simple, interpretable and stable. We demonstrate its effectiveness in 9 different setups, considering different corpus splitting criteria (age, gender and profession of tweet authors, time of tweet) and different languages (English, French and Hebrew). @@ -881,8 +881,8 @@ JiahuanLi YuBao ShujianHuang - XinyuDai - JiajunChen + XinyuDai + JiajunChen 708–717 Definition generation, which aims to automatically generate dictionary definitions for words, has recently been proposed to assist the construction of dictionaries and help people understand unfamiliar texts. However, previous works hardly consider explicitly modeling the “components” of definitions, leading to under-specific generation results. In this paper, we propose ESD, namely Explicit Semantic Decomposition for definition Generation, which explicitly decomposes the meaning of words into semantic components, and models them with discrete latent variables for definition generation. Experimental results show that achieves top results on WordNet and Oxford benchmarks, outperforming strong previous baselines. 2020.acl-main.65 @@ -893,7 +893,7 @@ Improved Natural Language Generation via Loss Truncation DanielKang - Tatsunori B.Hashimoto + Tatsunori B.Hashimoto 718–731 Neural language models are usually trained to match the distributional properties of large-scale corpora by minimizing the log loss. While straightforward to optimize, this approach forces the model to reproduce all variations in the dataset, including noisy and invalid references (e.g., misannotations and hallucinated facts). Even a small fraction of noisy data can degrade the performance of log loss. As an alternative, prior work has shown that minimizing the distinguishability of generated samples is a principled and robust loss that can handle invalid references. However, distinguishability has not been used in practice due to challenges in optimization and estimation. We propose loss truncation: a simple and scalable procedure which adaptively removes high log loss examples as a way to optimize for distinguishability. Empirically, we demonstrate that loss truncation outperforms existing baselines on distinguishability on a summarization task. Furthermore, we show that samples generated by the loss truncation model have factual accuracy ratings that exceed those of baselines and match human references. 2020.acl-main.66 @@ -932,13 +932,13 @@ Syn-<fixed-case>QG</fixed-case>: Syntactic and Shallow Semantic Rules for Question Generation KaustubhDhole - Christopher D.Manning + Christopher D.Manning 752–765 Question Generation (QG) is fundamentally a simple syntactic transformation; however, many aspects of semantics influence what questions are good to form. We implement this observation by developing Syn-QG, a set of transparent syntactic rules leveraging universal dependencies, shallow semantic parsing, lexical resources, and custom rules which transform declarative sentences into question-answer pairs. We utilize PropBank argument descriptions and VerbNet state predicates to incorporate shallow semantic content, which helps generate questions of a descriptive nature and produce inferential and semantically richer questions than existing systems. In order to improve syntactic fluency and eliminate grammatically incorrect questions, we employ back-translation over the output of these syntactic rules. A set of crowd-sourced evaluations shows that our system can generate a larger number of highly grammatical and relevant questions than previous QG systems and that back-translation drastically improves grammaticality at a slight cost of generating irrelevant questions. 2020.acl-main.69 2020.acl-main.69.Source.zip - 10.18653/v1/2020.acl-main.69 2020.acl-main.69.Dataset.pdf + 10.18653/v1/2020.acl-main.69 A Methodology for Creating Question Answering Corpora Using Inverse Data Annotation - JanDeriu + JanDeriu KatsiarynaMlynchyk PhilippeSchläpfer - AlvaroRodrigo - Dirkvon Grünigen + AlvaroRodrigo + Dirkvon Grünigen NicolasKaiser KurtStockinger - EnekoAgirre + EnekoAgirre MarkCieliebak 897–911 In this paper, we introduce a novel methodology to efficiently construct a corpus for question answering over structured data. For this, we introduce an intermediate representation that is based on the logical query plan in a database, called Operation Trees (OT). This representation allows us to invert the annotation process without loosing flexibility in the types of queries that we generate. Furthermore, it allows for fine-grained alignment of the tokens to the operations. Thus, we randomly generate OTs from a context free grammar and annotators just have to write the appropriate question and assign the tokens. We compare our corpus OTTA (Operation Trees and Token Assignment), a large semantic parsing corpus for evaluating natural language interfaces to databases, to Spider and LC-QuaD 2.0 and show that our methodology more than triples the annotation speed while maintaining the complexity of the queries. Finally, we train a state-of-the-art semantic parsing model on our data and show that our dataset is a challenging dataset and that the token alignment can be leveraged to significantly increase the performance. @@ -1200,7 +1200,7 @@ Explicit Memory Tracker with Coarse-to-Fine Reasoning for Conversational Machine Reading YifanGao Chien-ShengWu - ShafiqJoty + ShafiqJoty CaimingXiong RichardSocher IrwinKing @@ -1286,7 +1286,7 @@ Moving Down the Long Tail of Word Sense Disambiguation with Gloss Informed Bi-encoders TerraBlevins - LukeZettlemoyer + LukeZettlemoyer 1006–1017 A major obstacle in Word Sense Disambiguation (WSD) is that word senses are not uniformly distributed, causing existing models to generally perform poorly on senses that are either rare or unseen during training. We propose a bi-encoder model that independently embeds (1) the target word with its surrounding context and (2) the dictionary definition, or gloss, of each sense. The encoders are jointly optimized in the same representation space, so that sense disambiguation can be performed by finding the nearest sense embedding for each target word embedding. Our system outperforms previous state-of-the-art models on English all-words WSD; these gains predominantly come from improved performance on rare senses, leading to a 31.1% error reduction on less frequent senses over prior work. This demonstrates that rare senses can be more effectively disambiguated by modeling their definitions. 2020.acl-main.95 @@ -1326,7 +1326,7 @@ Towards Conversational Recommendation over Multi-Type Dialogs ZemingLiu HaifengWang - Zheng-YuNiu + Zheng-YuNiu HuaWu WanxiangChe TingLiu @@ -1363,7 +1363,7 @@ LiangmingPan Min-YenKan ZhiyuanLiu - Tat-SengChua + Tat-SengChua 1061–1071 The curse of knowledge can impede communication between experts and laymen. We propose a new task of expertise style transfer and contribute a manually annotated dataset with the goal of alleviating such cognitive biases. Solving this task not only simplifies the professional language, but also improves the accuracy and expertise level of laymen descriptions using simple words. This is a challenging task, unaddressed in previous work, as it requires the models to have expert intelligence in order to modify text with a deep understanding of domain knowledge and structures. We establish the benchmark performance of five state-of-the-art models for style transfer and text simplification. The results demonstrate a significant gap between machine and human performance. We also discuss the challenges of automatic evaluation, to provide insights into future research directions. The dataset is publicly available at https://srhthu.github.io/expertise-style-transfer/. 2020.acl-main.100 @@ -1433,7 +1433,7 @@ Keyphrase Generation for Scientific Document Retrieval FlorianBoudin YgorGallina - AkikoAizawa + AkikoAizawa 1118–1126 Sequence-to-sequence models have lead to significant progress in keyphrase generation, but it remains unknown whether they are reliable enough to be beneficial for document retrieval. This study provides empirical evidence that such models can significantly improve retrieval performance, and introduces a new extrinsic evaluation framework that allows for a better understanding of the limitations of keyphrase generation models. Using this framework, we point out and discuss the difficulties encountered with supplementing documents with -not present in text- keyphrases, and generalizing models across domains. Our code is available at https://github.com/boudinfl/ir-using-kg 2020.acl-main.105 @@ -1444,8 +1444,8 @@ A Graph Auto-encoder Model of Derivational Morphology ValentinHofmann - HinrichSchütze - JanetPierrehumbert + HinrichSchütze + JanetPierrehumbert 1127–1138 There has been little work on modeling the morphological well-formedness (MWF) of derivatives, a problem judged to be complex and difficult in linguistics. We present a graph auto-encoder that learns embeddings capturing information about the compatibility of affixes and stems in derivation. The auto-encoder models MWF in English surprisingly well by combining syntactic and semantic information with associative information from the mental lexicon. 2020.acl-main.106 @@ -1455,13 +1455,13 @@ Building a User-Generated Content <fixed-case>N</fixed-case>orth-<fixed-case>A</fixed-case>frican <fixed-case>A</fixed-case>rabizi Treebank: Tackling Hell - DjaméSeddah + DjaméSeddah FarahEssaidi AmalFethi MatthieuFuteral BenjaminMuller - Pedro JavierOrtiz Suárez - BenoîtSagot + Pedro JavierOrtiz Suárez + BenoîtSagot AbhishekSrivastava 1139–1150 We introduce the first treebank for a romanized user-generated content variety of Algerian, a North-African Arabic dialect known for its frequent usage of code-switching. Made of 1500 sentences, fully annotated in morpho-syntax and Universal Dependency syntax, with full translation at both the word and the sentence levels, this treebank is made freely available. It is supplemented with 50k unlabeled sentences collected from Common Crawl and web-crawled data using intensive data-mining techniques. Preliminary experiments demonstrate its usefulness for POS tagging and dependency parsing. We believe that what we present in this paper is useful beyond the low-resource language community. This is the first time that enough unlabeled and annotated data is provided for an emerging user-generated content dialectal language with rich morphology and code switching, making it an challenging test-bed for most recent NLP approaches. @@ -1473,7 +1473,7 @@ Crawling and Preprocessing Mailing Lists At Scale for Dialog Analysis JanekBevendorff - KhalidAl Khatib + KhalidAl Khatib MartinPotthast BennoStein 1151–1158 @@ -1527,7 +1527,7 @@ Learning and Evaluating Emotion Lexicons for 91 Languages - SvenBuechel + SvenBuechel SusannaRücker UdoHahn 1202–1217 @@ -1541,7 +1541,7 @@ Multi-Hypothesis Machine Translation Evaluation MarinaFomicheva LuciaSpecia - FranciscoGuzmán + FranciscoGuzmán 1218–1232 Reliably evaluating Machine Translation (MT) through automated metrics is a long-standing problem. One of the main challenges is the fact that multiple outputs can be equally valid. Attempts to minimise this issue include metrics that relax the matching of MT output and reference strings, and the use of multiple references. The latter has been shown to significantly improve the performance of evaluation metrics. However, collecting multiple references is expensive and in practice a single reference is generally used. In this paper, we propose an alternative approach: instead of modelling linguistic variation in human reference we exploit the MT model uncertainty to generate multiple diverse translations and use these: (i) as surrogates to reference translations; (ii) to obtain a quantification of translation variability to either complement existing metric scores or (iii) replace references altogether. We show that for a number of popular evaluation metrics our variability estimates lead to substantial improvements in correlation with human judgements of quality by up 15%. 2020.acl-main.113 @@ -1552,7 +1552,7 @@ Multimodal Quality Estimation for Machine Translation ShuOkabe - FrédéricBlain + FrédéricBlain LuciaSpecia 1233–1240 We propose approaches to Quality Estimation (QE) for Machine Translation that explore both text and visual modalities for Multimodal QE. We compare various multimodality integration and fusion strategies. For both sentence-level and document-level predictions, we show that state-of-the-art neural and feature-based QE frameworks obtain better results when using the additional modality. @@ -1563,7 +1563,7 @@ <fixed-case>P</fixed-case>uzz<fixed-case>L</fixed-case>ing <fixed-case>M</fixed-case>achines: <fixed-case>A</fixed-case> <fixed-case>C</fixed-case>hallenge on <fixed-case>L</fixed-case>earning <fixed-case>F</fixed-case>rom <fixed-case>S</fixed-case>mall <fixed-case>D</fixed-case>ata - Gözde GülŞahin + Gözde GülŞahin YovaKementchedjhieva PhillipRust IrynaGurevych @@ -1596,20 +1596,20 @@ RishavChakravarti SaswatiDana AnthonyFerritto - RaduFlorian + RaduFlorian MartinFranz DineshGarg DineshKhandelwal - ScottMcCarley + ScottMcCarley MichaelMcCawley MohamedNasr LinPan CezarPendus - JohnPitrelli + JohnPitrelli SaurabhPujar - SalimRoukos + SalimRoukos AndrzejSakrajda - AviSil + AviSil RosarioUceda-Sosa ToddWard RongZhang @@ -1646,8 +1646,8 @@ A Large-Scale Multi-Document Summarization Dataset from the <fixed-case>W</fixed-case>ikipedia Current Events Portal DemianGholipour Ghalandari - ChrisHokamp - Nghia ThePham + ChrisHokamp + Nghia ThePham JohnGlover GeorgianaIfrim 1302–1308 @@ -1662,7 +1662,7 @@ JunnanZhu YuZhou JiajunZhang - ChengqingZong + ChengqingZong 1309–1321 Cross-lingual summarization aims at summarizing a document in one language (e.g., Chinese) into another language (e.g., English). In this paper, we propose a novel method inspired by the translation pattern in the process of obtaining a cross-lingual summary. We first attend to some words in the source text, then translate them into the target language, and summarize to get the final summary. Specifically, we first employ the encoder-decoder attention distribution to attend to the source words. Second, we present three strategies to acquire the translation probability, which helps obtain the translation candidates for each source word. Finally, each summary word is generated either from the neural distribution or from the translation candidates of source words. Experimental results on Chinese-to-English and English-to-Chinese summarization tasks have shown that our proposed method can significantly outperform the baselines, achieving comparable performance with the state-of-the-art. 2020.acl-main.121 @@ -1685,7 +1685,7 @@ Improving Truthfulness of Headline Generation KazukiMatsumaru ShoTakase - NaoakiOkazaki + NaoakiOkazaki 1335–1346 Most studies on abstractive summarization report ROUGE scores between system and reference summaries. However, we have a concern about the truthfulness of generated summaries: whether all facts of a generated summary are mentioned in the source text. This paper explores improving the truthfulness in headline generation on two popular datasets. Analyzing headlines generated by the state-of-the-art encoder-decoder model, we show that the model sometimes generates untruthful headlines. We conjecture that one of the reasons lies in untruthful supervision data used for training the model. In order to quantify the truthfulness of article-headline pairs, we consider the textual entailment of whether an article entails its headline. After confirming quite a few untruthful instances in the datasets, this study hypothesizes that removing untruthful instances from the supervision data may remedy the problem of the untruthful behaviors of the model. Building a binary classifier that predicts an entailment relation between an article and its headline, we filter out untruthful instances from the supervision data. Experimental results demonstrate that the headline generation model trained on filtered supervision data shows no clear difference in ROUGE scores but remarkable improvements in automatic and manual evaluations of the generated headlines. 2020.acl-main.123 @@ -1766,8 +1766,8 @@ <fixed-case>L</fixed-case>earning <fixed-case>D</fixed-case>ialog <fixed-case>P</fixed-case>olicies from <fixed-case>W</fixed-case>eak <fixed-case>D</fixed-case>emonstrations GabrielGordon-Hall - Philip JohnGorinski - Shay B.Cohen + Philip JohnGorinski + Shay B.Cohen 1394–1405 Deep reinforcement learning is a promising approach to training a dialog manager, but current methods struggle with the large state and action spaces of multi-domain dialog systems. Building upon Deep Q-learning from Demonstrations (DQfD), an algorithm that scores highly in difficult Atari games, we leverage dialog data to guide the agent to successfully respond to a user’s requests. We make progressively fewer assumptions about the data needed, using labeled, reduced-labeled, and even unlabeled data to train expert demonstrators. We introduce Reinforced Fine-tune Learning, an extension to DQfD, enabling us to overcome the domain gap between the datasets and the environment. Experiments in a challenging multi-domain dialog system framework validate our approaches, and get high success rates even when trained on out-of-domain data. 2020.acl-main.129 @@ -1848,7 +1848,7 @@ LiangmingPan YuxiXie YansongFeng - Tat-SengChua + Tat-SengChua Min-YenKan 1463–1475 This paper proposes the problem of Deep Question Generation (DQG), which aims to generate complex questions that require reasoning over multiple pieces of information about the input passage. In order to capture the global structure of the document and facilitate reasoning, we propose a novel framework that first constructs a semantic-level graph for the input document and then encodes the semantic graph by introducing an attention-based GGNN (Att-GGNN). Afterward, we fuse the document-level and graph-level representations to perform joint training of content selection and question decoding. On the HotpotQA deep-question centric dataset, our model greatly improves performance over questions requiring reasoning over multiple facts, leading to state-of-the-art performance. The code is publicly available at https://github.com/WING-NUS/SG-Deep-Question-Generation. @@ -1889,7 +1889,7 @@ <fixed-case>NAT</fixed-case>: Noise-Aware Training for Robust Neural Sequence Labeling MarcinNamysl SvenBehnke - JoachimKöhler + JoachimKöhler 1501–1517 Sequence labeling systems should perform reliably not only under ideal conditions but also with corrupted inputs—as these systems often process user-generated text or follow an error-prone upstream component. To this end, we formulate the noisy sequence labeling problem, where the input may undergo an unknown noising process and propose two Noise-Aware Training (NAT) objectives that improve robustness of sequence labeling performed on perturbed input: Our data augmentation method trains a neural model using a mixture of clean and noisy samples, whereas our stability training algorithm encourages the model to create a noise-invariant latent representation. We employ a vanilla noise model at training time. For evaluation, we use both the original data and its variants perturbed with real OCR errors and misspellings. Extensive experiments on English and German named entity recognition benchmarks confirmed that NAT consistently improved robustness of popular sequence labeling models, preserving accuracy on the original input. We make our code and data publicly available for the research community. 2020.acl-main.138 @@ -1968,7 +1968,7 @@ Boosting Neural Machine Translation with Similar Translations JitaoXu - JosepCrego + JosepCrego JeanSenellart 1580–1590 This paper explores data augmentation methods for training Neural Machine Translation to make use of similar translations, in a comparable way a human translator employs fuzzy matches. In particular, we show how we can simply present the neural model with information of both source and target sides of the fuzzy matches, we also extend the similarity to include semantically related translations retrieved using sentence distributed representations. We show that translations based on fuzzy matching provide the model with “copy” information while translations based on embedding similarities tend to extend the translation “context”. Results indicate that the effect from both similar sentences are adding up to further boost accuracy, combine naturally with model fine-tuning and are providing dynamic adaptation for unseen translation pairs. Tests on multiple data sets and domains show consistent accuracy improvements. To foster research around these techniques, we also release an Open-Source toolkit with efficient and flexible fuzzy-match implementation. @@ -1980,7 +1980,7 @@ Character-Level Translation with Self-attention YingqiangGao - Nikola I.Nikolov + Nikola I.Nikolov YuhuangHu Richard H.R.Hahnloser 1591–1604 @@ -2005,7 +2005,7 @@ Enhancing Machine Translation with Dependency-Aware Self-Attention EmanueleBugliarello - NaoakiOkazaki + NaoakiOkazaki 1618–1627 Most neural machine translation models only rely on pairs of parallel sentences, assuming syntactic information is automatically learned by an attention mechanism. In this work, we investigate different approaches to incorporate syntactic knowledge in the Transformer model and also propose a novel, parameter-free, dependency-aware self-attention mechanism that improves its translation quality, especially for long sentences and in low-resource scenarios. We show the efficacy of each approach on WMT English-German and English-Turkish, and WAT English-Japanese translation tasks. 2020.acl-main.147 @@ -2029,10 +2029,10 @@ It’s Easier to Translate out of <fixed-case>E</fixed-case>nglish than into it: <fixed-case>M</fixed-case>easuring Neural Translation Difficulty by Cross-Mutual Information EmanueleBugliarello - Sabrina J.Mielke + Sabrina J.Mielke AntoniosAnastasopoulos RyanCotterell - NaoakiOkazaki + NaoakiOkazaki 1640–1649 The performance of neural machine translation systems is commonly evaluated in terms of BLEU. However, due to its reliance on target language properties and generation, the BLEU metric does not allow an assessment of which translation directions are more difficult to model. In this paper, we propose cross-mutual information (XMI): an asymmetric information-theoretic metric of machine translation difficulty that exploits the probabilistic nature of most neural machine translation models. XMI allows us to better evaluate the difficulty of translating text into the target language while controlling for the difficulty of the target-side generation component independent of the translation task. We then present the first systematic and controlled study of cross-lingual translation difficulties using modern neural translation systems. Code for replicating our experiments is available online at https://github.com/e-bug/nmt-difficulty. 2020.acl-main.149 @@ -2114,7 +2114,7 @@ KalliopiMeladaki MahsaMonshizadeh AntonioKrüger - Josefvan Genabith + Josefvan Genabith 1691–1702 Current advances in machine translation (MT) increase the need for translators to switch from traditional translation to post-editing (PE) of machine-translated text, a process that saves time and reduces errors. This affects the design of translation interfaces, as the task changes from mainly generating text to correcting errors within otherwise helpful translation proposals. Since this paradigm shift offers potential for modalities other than mouse and keyboard, we present MMPE, the first prototype to combine traditional input modes with pen, touch, and speech modalities for PE of MT. The results of an evaluation with professional translators suggest that pen and touch interaction are suitable for deletion and reordering tasks, while they are of limited use for longer insertions. On the other hand, speech and multi-modal combinations of select & speech are considered suitable for replacements and insertions but offer less potential for deletion and reordering. Overall, participants were enthusiastic about the new modalities and saw them as good extensions to mouse & keyboard, but not as a complete substitute. 2020.acl-main.155 @@ -2124,9 +2124,9 @@ A Monolingual Approach to Contextualized Word Embeddings for Mid-Resource Languages - Pedro JavierOrtiz Suárez - LaurentRomary - BenoîtSagot + Pedro JavierOrtiz Suárez + LaurentRomary + BenoîtSagot 1703–1714 We use the multilingual OSCAR corpus, extracted from Common Crawl via language classification, filtering and cleaning, to train monolingual contextualized word embeddings (ELMo) for five mid-resource languages. We then compare the performance of OSCAR-based and Wikipedia-based ELMo embeddings for these languages on the part-of-speech tagging and parsing tasks. We show that, despite the noise in the Common-Crawl-based OSCAR data, embeddings trained on OSCAR perform much better than monolingual embeddings trained on Wikipedia. They actually equal or improve the current state of the art in tagging and parsing for all five languages. In particular, they also improve over multilingual Wikipedia-based contextual embeddings (multilingual BERT), which almost always constitutes the previous state of the art, thereby showing that the benefit of a larger, more diverse corpus surpasses the cross-lingual benefit of multilingual embedding architectures. 2020.acl-main.156 @@ -2155,7 +2155,7 @@ JonGauthier PengQian EthanWilcox - RogerLevy + RogerLevy 1725–1744 While state-of-the-art neural network models continue to achieve lower perplexity scores on language modeling benchmarks, it remains unknown whether optimizing for broad-coverage predictive performance leads to human-like syntactic knowledge. Furthermore, existing work has not provided a clear picture about the model properties required to produce proper syntactic generalizations. We present a systematic evaluation of the syntactic knowledge of neural language models, testing 20 combinations of model types and data sizes on a set of 34 English-language syntactic test suites. We find substantial differences in syntactic generalization performance by model architecture, with sequential models underperforming other architectures. Factorially manipulating model architecture and training dataset size (1M-40M words), we find that variability in syntactic generalization performance is substantially greater by architecture than by dataset size for the corpora tested in our experiments. Our results also reveal a dissociation between perplexity and syntactic generalization performance. 2020.acl-main.158 @@ -2166,7 +2166,7 @@ Inflecting When There’s No Majority: Limitations of Encoder-Decoder Neural Networks as Cognitive Models for <fixed-case>G</fixed-case>erman Plurals KateMcCurdy - SharonGoldwater + SharonGoldwater AdamLopez 1745–1756 Can artificial neural networks learn to represent inflectional morphology and generalize to new words as human speakers do? Kirov and Cotterell (2018) argue that the answer is yes: modern Encoder-Decoder (ED) architectures learn human-like behavior when inflecting English verbs, such as extending the regular past tense form /-(e)d/ to novel words. However, their work does not address the criticism raised by Marcus et al. (1995): that neural models may learn to extend not the regular, but the most frequent class — and thus fail on tasks like German number inflection, where infrequent suffixes like /-s/ can still be productively generalized. To investigate this question, we first collect a new dataset from German speakers (production and ratings of plural forms for novel nouns) that is designed to avoid sources of information unavailable to the ED model. The speaker data show high variability, and two suffixes evince ‘regular’ behavior, appearing more often with phonologically atypical inputs. Encoder-decoder models do generalize the most frequently produced plural class, but do not show human-like variability or ‘regular’ extension of these other plural markers. We conclude that modern neural models may still struggle with minority-class generalization. @@ -2257,7 +2257,7 @@ Conversational Graph Grounded Policy Learning for Open-Domain Conversation Generation JunXu HaifengWang - Zheng-YuNiu + Zheng-YuNiu HuaWu WanxiangChe TingLiu @@ -2271,12 +2271,12 @@ <fixed-case>GPT</fixed-case>-too: A Language-Model-First Approach for <fixed-case>AMR</fixed-case>-to-Text Generation ManuelMager - RamónFernandez Astudillo + RamónFernandez Astudillo TahiraNaseem - Md ArafatSultan + Md ArafatSultan Young-SukLee - RaduFlorian - SalimRoukos + RaduFlorian + SalimRoukos 1846–1852 Abstract Meaning Representations (AMRs) are broad-coverage sentence-level semantic graphs. Existing approaches to generating text from AMR have focused on training sequence-to-sequence or graph-to-sequence models on AMR annotated data only. In this paper, we propose an alternative approach that combines a strong pre-trained language model with cycle consistency-based re-scoring. Despite the simplicity of the approach, our experimental results show these models outperform all previous techniques on the English LDC2017T10 dataset, including the recent use of transformer architectures. In addition to the standard evaluation metrics, we provide human evaluation experiments that further substantiate the strength of our approach. 2020.acl-main.167 @@ -2290,7 +2290,7 @@ PengyuNie MilosGligoric Junyi JessyLi - RaymondMooney + RaymondMooney 1853–1868 We formulate the novel task of automatically updating an existing natural language comment based on changes in the body of code it accompanies. We propose an approach that learns to correlate changes across two distinct language representations, to generate a sequence of edits that are applied to the existing comment to reflect the source code modifications. We train and evaluate our model using a dataset that we collected from commit histories of open-source software projects, with each example consisting of a concurrent update to a method and its corresponding comment. We compare our approach against multiple baselines using both automatic metrics and human evaluation. Results reflect the challenge of this task and that our model outperforms baselines with respect to making edits. 2020.acl-main.168 @@ -2307,7 +2307,7 @@ GrahamNeubig YimingYang RuslanSalakhutdinov - Alan WBlack + Alan WBlack ShrimaiPrabhumoye 1869–1881 This paper introduces a new task of politeness transfer which involves converting non-polite sentences to polite sentences while preserving the meaning. We also provide a dataset of more than 1.39 instances automatically labeled for politeness to encourage benchmark evaluations on this new task. We design a tag and generate pipeline that identifies stylistic attributes and subsequently generates a sentence in the target style while preserving most of the source content. For politeness as well as five other transfer tasks, our model outperforms the state-of-the-art methods on automatic metrics for content preservation, with a comparable or better performance on style transfer accuracy. Additionally, our model surpasses existing methods on human evaluations for grammaticality, meaning preservation and transfer accuracy across all the six style transfer tasks. The data and code is located at https://github.com/tag-and-generate. @@ -2379,7 +2379,7 @@ Unsupervised Opinion Summarization with Noising and Denoising - Reinald KimAmplayo + Reinald KimAmplayo MirellaLapata 1934–1945 The supervised training of high-capacity models on large datasets containing hundreds of thousands of document-summary pairs is critical to the recent success of deep learning techniques for abstractive summarization. Unfortunately, in most domains (other than news) such training data is not available and cannot be easily sourced. In this paper we enable the use of supervised learning for the setting where there are only documents available (e.g., product or business reviews) without ground truth summaries. We create a synthetic dataset from a corpus of user reviews by sampling a review, pretending it is a summary, and generating noisy versions thereof which we treat as pseudo-review input. We introduce several linguistically motivated noise generation functions and a summarization model which learns to denoise the input and generate the original review. At test time, the model accepts genuine reviews and generates a summary containing salient opinions, treating those that do not reach consensus as noise. Extensive automatic and human evaluation shows that our model brings substantial improvements over both abstractive and extractive baselines. @@ -2390,8 +2390,8 @@ A Tale of Two Perplexities: Sensitivity of Neural Language Models to Lexical Retrieval Deficits in Dementia of the <fixed-case>A</fixed-case>lzheimer’s Type - TrevorCohen - SergueiPakhomov + TrevorCohen + SergueiPakhomov 1946–1957 In recent years there has been a burgeoning interest in the use of computational methods to distinguish between elicited speech samples produced by patients with dementia, and those from healthy controls. The difference between perplexity estimates from two neural language models (LMs) - one trained on transcripts of speech produced by healthy participants and one trained on those with dementia - as a single feature for diagnostic classification of unseen transcripts has been shown to produce state-of-the-art performance. However, little is known about why this approach is effective, and on account of the lack of case/control matching in the most widely-used evaluation set of transcripts (DementiaBank), it is unclear if these approaches are truly diagnostic, or are sensitive to other variables. In this paper, we interrogate neural LMs trained on participants with and without dementia by using synthetic narratives previously developed to simulate progressive semantic dementia by manipulating lexical frequency. We find that perplexity of neural LMs is strongly and differentially associated with lexical frequency, and that using a mixture model resulting from interpolating control and dementia LMs improves upon the current state-of-the-art for models trained on transcript text exclusively. 2020.acl-main.176 @@ -2405,7 +2405,7 @@ Probing Linguistic Systematicity EmilyGoodwin KoustuvSinha - Timothy J.O’Donnell + Timothy J.O’Donnell 1958–1969 Recently, there has been much interest in the question of whether deep natural language understanding (NLU) models exhibit systematicity, generalizing such that units like words make consistent contributions to the meaning of the sentences in which they appear. There is accumulating evidence that neural models do not learn systematically. We examine the notion of systematicity from a linguistic perspective, defining a set of probing tasks and a set of metrics to measure systematic behaviour. We also identify ways in which network architectures can generalize non-systematically, and discuss why such forms of generalization may be unsatisfying. As a case study, we perform a series of experiments in the setting of natural language inference (NLI). We provide evidence that current state-of-the-art NLU systems do not generalize systematically, despite overall high performance. 2020.acl-main.177 @@ -2418,7 +2418,7 @@ MaartenSap EricHorvitz YejinChoi - Noah A.Smith + Noah A.Smith JamesPennebaker 1970–1978 We investigate the use of NLP as a measure of the cognitive processes involved in storytelling, contrasting imagination and recollection of events. To facilitate this, we collect and release Hippocorpus, a dataset of 7,000 stories about imagined and recalled events. We introduce a measure of narrative flow and use this to examine the narratives for imagined and recalled events. Additionally, we measure the differential recruitment of knowledge attributed to semantic memory versus episodic memory (Tulving, 1972) for imagined and recalled storytelling by comparing the frequency of descriptions of general commonsense events with more specific realis events. Our analyses show that imagined stories have a substantially more linear narrative flow, compared to recalled stories in which adjacent sentences are more disconnected. In addition, while recalled stories rely more on autobiographical events based on episodic memory, imagined stories express more commonsense knowledge based on semantic memory. Finally, our measures reveal the effect of narrativization of memories in stories (e.g., stories about frequently recalled memories flow more linearly; Bartlett, 1932). Our findings highlight the potential of using NLP tools to study the traces of human cognition in language. @@ -2430,7 +2430,7 @@ Recurrent Neural Network Language Models Always Learn <fixed-case>E</fixed-case>nglish-Like Relative Clause Attachment ForrestDavis - Martenvan Schijndel + Martenvan Schijndel 1979–1990 A standard approach to evaluating language models analyzes how models assign probabilities to valid versus invalid syntactic constructions (i.e. is a grammatical sentence more probable than an ungrammatical sentence). Our work uses ambiguous relative clause attachment to extend such evaluations to cases of multiple simultaneous valid interpretations, where stark grammaticality differences are absent. We compare model performance in English and Spanish to show that non-linguistic biases in RNN LMs advantageously overlap with syntactic structure in English but not Spanish. Thus, English models may appear to acquire human-like syntactic preferences, while models trained on Spanish fail to acquire comparable human-like preferences. We conclude by relating these results to broader concerns about the relationship between comprehension (i.e. typical language model use cases) and production (which generates the training data for language models), suggesting that necessary linguistic biases are not present in the training signal at all. 2020.acl-main.179 @@ -2505,7 +2505,7 @@ Negative Training for Neural Dialogue Response Generation TianxingHe - JamesGlass + JamesGlass 2044–2058 Although deep learning models have brought tremendous advancements to the field of open-domain dialogue response generation, recent research results have revealed that the trained models have undesirable generation behaviors, such as malicious responses and generic (boring) responses. In this work, we propose a framework named “Negative Training” to minimize such behaviors. Given a trained model, the framework will first find generated samples that exhibit the undesirable behavior, and then use them to feed negative training signals for fine-tuning the model. Our experiments show that negative training can significantly reduce the hit rate of malicious responses, or discourage frequent responses and improve response diversity. 2020.acl-main.185 @@ -2539,7 +2539,7 @@ Calibrating Structured Output Predictors for Natural Language Processing - AbhyudayJagannatha + AbhyudayJagannatha HongYu 2078–2092 We address the problem of calibrating prediction confidence for output entities of interest in natural language processing (NLP) applications. It is important that NLP applications such as named entity recognition and question answering produce calibrated confidence scores for their predictions, especially if the applications are to be deployed in a safety-critical domain such as healthcare. However the output space of such structured prediction models are often too large to directly adapt binary or multi-class calibration methods. In this study, we propose a general calibration scheme for output entities of interest in neural network based structured prediction models. Our proposed method can be used with any binary class calibration scheme and a neural network model. Additionally, we show that our calibration method can also be used as an uncertainty-aware, entity-specific decoding step to improve the performance of the underlying model at no additional training cost or data requirements. We show that our method outperforms current calibration techniques for Named Entity Recognition, Part-of-speech tagging and Question Answering systems. We also observe an improvement in model performance from our decoding step across several tasks and benchmark datasets. Our method improves the calibration and model performance on out-of-domain test scenarios as well. @@ -2552,7 +2552,7 @@ Active Imitation Learning with Noisy Guidance KiantéBrantley AmrSharaf - HalDaumé III + HalDaumé III 2093–2105 Imitation learning algorithms provide state-of-the-art results on many structured prediction tasks by learning near-optimal search policies. Such algorithms assume training-time access to an expert that can provide the optimal action at any queried state; unfortunately, the number of such queries is often prohibitive, frequently rendering these approaches impractical. To combat this query complexity, we consider an active learning setting in which the learning algorithm has additional access to a much cheaper noisy heuristic that provides noisy guidance. Our algorithm, LEAQI, learns a difference classifier that predicts when the expert is likely to disagree with the heuristic, and queries the expert only when necessary. We apply LEAQI to three sequence labelling tasks, demonstrating significantly fewer queries to the expert and comparable (or better) accuracies over a passive approach. 2020.acl-main.189 @@ -2577,7 +2577,7 @@ <fixed-case>GAN</fixed-case>-<fixed-case>BERT</fixed-case>: Generative Adversarial Learning for Robust Text Classification with a Bunch of Labeled Examples DaniloCroce GiuseppeCastellucci - RobertoBasili + RobertoBasili 2114–2119 Recent Transformer-based architectures, e.g., BERT, provide impressive results in many Natural Language Processing tasks. However, most of the adopted benchmarks are made of (sometimes hundreds of) thousands of examples. In many real scenarios, obtaining high- quality annotated data is expensive and time consuming; in contrast, unlabeled examples characterizing the target task can be, in general, easily collected. One promising method to enable semi-supervised learning has been proposed in image processing, based on Semi- Supervised Generative Adversarial Networks. In this paper, we propose GAN-BERT that ex- tends the fine-tuning of BERT-like architectures with unlabeled data in a generative adversarial setting. Experimental results show that the requirement for annotated examples can be drastically reduced (up to only 50-100 annotated examples), still obtaining good performances in several sentence classification tasks. 2020.acl-main.191 @@ -2602,7 +2602,7 @@ Learning to Contextually Aggregate Multi-Source Supervision for Sequence Labeling OuyuLan XiaoHuang - Bill YuchenLin + Bill YuchenLin HeJiang LiyuanLiu XiangRen @@ -2685,7 +2685,7 @@ SarthakDash Md. Faisal MahbubChowdhury NandanaMihindukulasooriya - AlfioGliozzo + AlfioGliozzo 2198–2208 Extracting lexico-semantic relations as graph-structured taxonomies, also known as taxonomy construction, has been beneficial in a variety of NLP applications. Recently Graph Neural Network (GNN) has shown to be powerful in successfully tackling many tasks. However, there has been no attempt to exploit GNN to create taxonomies. In this paper, we propose Graph2Taxo, a GNN-based cross-domain transfer framework for the taxonomy construction task. Our main contribution is to learn the latent features of taxonomy construction from existing domains to guide the structure learning of an unseen domain. We also propose a novel method of directed acyclic graph (DAG) generation for taxonomy construction. Specifically, our proposed Graph2Taxo uses a noisy graph constructed from automatically extracted noisy hyponym hypernym candidate pairs, and a set of taxonomies for some known domains for training. The learned model is then used to generate taxonomy for a new unknown domain given a set of terms for that domain. Experiments on benchmark datasets from science and environment domains show that our approach attains significant improvements correspondingly over the state of the art. 2020.acl-main.199 @@ -2709,7 +2709,7 @@ Why Overfitting Isn’t Always Bad: Retrofitting Cross-Lingual Word Embeddings to Dictionaries MozhiZhang YoshinariFujinuma - Michael J.Paul + Michael J.Paul JordanBoyd-Graber 2214–2220 Cross-lingual word embeddings (CLWE) are often evaluated on bilingual lexicon induction (BLI). Recent CLWE methods use linear projections, which underfit the training dictionary, to generalize on BLI. However, underfitting can hinder generalization to other downstream tasks that rely on words from the training dictionary. We address this limitation by retrofitting CLWE to the training dictionary, which pulls training translation pairs closer in the embedding space and overfits the training dictionary. This simple post-processing step often improves accuracy on two downstream tasks, despite lowering BLI test accuracy. We also retrofit to both the training dictionary and a synthetic dictionary induced from CLWE, which sometimes generalizes even better on downstream tasks. Our results confirm the importance of fully exploiting training dictionary in downstream tasks and explains why BLI is a flawed CLWE evaluation. @@ -2759,8 +2759,8 @@ Efficient Strategies for Hierarchical Text Classification: External Knowledge and Auxiliary Tasks KervyRivas Rojas GinaBustamante - ArturoOncevay - Marco AntonioSobrevilla Cabezudo + ArturoOncevay + Marco AntonioSobrevilla Cabezudo 2252–2257 In hierarchical text classification, we perform a sequence of inference steps to predict the category of a document from top to bottom of a given class taxonomy. Most of the studies have focused on developing novels neural network architectures to deal with the hierarchical structure, but we prefer to look for efficient ways to strengthen a baseline model. We first define the task as a sequence-to-sequence problem. Afterwards, we propose an auxiliary synthetic task of bottom-up-classification. Then, from external dictionaries, we retrieve textual definitions for the classes of all the hierarchy’s layers, and map them into the word vector space. We use the class-definition embeddings as an additional input to condition the prediction of the next layer and in an adapted beam search. Whereas the modified search did not provide large gains, the combination of the auxiliary task and the additional input of class-definitions significantly enhance the classification accuracy. With our efficient approaches, we outperform previous studies, using a drastically reduced number of parameters, in two well-known English datasets. 2020.acl-main.205 @@ -2787,7 +2787,7 @@ SergeyFeldman IzBeltagy DougDowney - DanielWeld + DanielWeld 2270–2282 Representation learning is a critical ingredient for natural language processing systems. Recent Transformer language models like BERT learn powerful textual representations, but these models are targeted towards token- and sentence-level training objectives and do not leverage information on inter-document relatedness, which limits their document-level representation power. For applications on scientific documents, such as classification and recommendation, accurate embeddings of documents are a necessity. We propose SPECTER, a new method to generate document-level embedding of scientific papers based on pretraining a Transformer language model on a powerful signal of document-level relatedness: the citation graph. Unlike existing pretrained language models, Specter can be easily applied to downstream applications without task-specific fine-tuning. Additionally, to encourage further research on document-level models, we introduce SciDocs, a new evaluation benchmark consisting of seven document-level tasks ranging from citation prediction, to document classification and recommendation. We show that Specter outperforms a variety of competitive baselines on the benchmark. 2020.acl-main.207 @@ -2842,7 +2842,7 @@ JieFu Marc-AlexandreCôté YiTay - ChrisPal + ChrisPal AdamTrischler 2325–2338 Existing machine reading comprehension (MRC) models do not scale effectively to real-world applications like web-level information retrieval and question answering (QA). We argue that this stems from the nature of MRC datasets: most of these are static environments wherein the supporting documents and all necessary information are fully observed. In this paper, we propose a simple method that reframes existing MRC datasets as interactive, partially observable environments. Specifically, we “occlude” the majority of a document’s text and add context-sensitive commands that reveal “glimpses” of the hidden text to a model. We repurpose SQuAD and NewsQA as an initial case study, and then show how the interactive corpora can be used to train a model that seeks relevant information through sequential decision making. We believe that this setting can contribute in scaling models to web-level QA scenarios. @@ -2854,7 +2854,7 @@ Syntactic Data Augmentation Increases Robustness to Inference Heuristics JunghyunMin - R. ThomasMcCoy + R. ThomasMcCoy DipanjanDas EmilyPitler TalLinzen @@ -2868,7 +2868,7 @@ Improved Speech Representations with Multi-Target Autoregressive Predictive Coding Yu-AnChung - JamesGlass + JamesGlass 2353–2358 Training objectives based on predictive coding have recently been shown to be very effective at learning meaningful representations from unlabeled speech. One example is Autoregressive Predictive Coding (Chung et al., 2019), which trains an autoregressive RNN to generate an unseen future frame given a context such as recent past frames. The basic hypothesis of these approaches is that hidden states that can accurately predict future frames are a useful representation for many downstream tasks. In this paper we extend this hypothesis and aim to enrich the information encoded in the hidden states by training the model to make more accurate future predictions. We propose an auxiliary objective that serves as a regularization to improve generalization of the future frame prediction task. Experimental results on phonetic classification, speech recognition, and speech translation not only support the hypothesis, but also demonstrate the effectiveness of our approach in learning representations that contain richer phonetic content. 2020.acl-main.213 @@ -2902,7 +2902,7 @@ LorenzoBelgrano NicolaiJacobsen RegitzeSdun - ŽeljkoAgić + ŽeljkoAgić 2370–2380 We address a challenging and practical task of labeling questions in speech in real time during telephone calls to emergency medical services in English, which embeds within a broader decision support system for emergency call-takers. We propose a novel multimodal approach to real-time sequence labeling in speech. Our model treats speech and its own textual representation as two separate modalities or views, as it jointly learns from streamed audio and its noisy transcription into text via automatic speech recognition. Our results show significant gains of jointly learning from the two modalities when compared to text or audio only, under adverse noise and limited volume of training data. The results generalize to medical symptoms detection where we observe a similar pattern of improvements with multimodal learning. 2020.acl-main.215 @@ -2926,7 +2926,7 @@ Phone Features Improve Speech Translation ElizabethSalesky - Alan WBlack + Alan WBlack 2388–2397 End-to-end models for speech translation (ST) more tightly couple speech recognition (ASR) and machine translation (MT) than a traditional cascade of separate ASR and MT models, with simpler model architectures and the potential for reduced error propagation. Their performance is often assumed to be superior, though in many conditions this is not yet the case. We compare cascaded and end-to-end models across high, medium, and low-resource conditions, and show that cascades remain stronger baselines. Further, we introduce two methods to incorporate phone features into ST models. We show that these features improve both architectures, closing the gap between end-to-end models and cascades, and outperforming previous academic work – by up to 9 BLEU on our low-resource setting. 2020.acl-main.217 @@ -3002,7 +3002,7 @@ Automatic Poetry Generation from Prosaic Text - TimVan de Cruys + TimVan de Cruys 2471–2480 In the last few years, a number of successful approaches have emerged that are able to adequately model various aspects of natural language. In particular, language models based on neural networks have improved the state of the art with regard to predictive language modeling, while topic models are successful at capturing clear-cut, semantic dimensions. In this paper, we will explore how these approaches can be adapted and combined to model the linguistic and literary aspects needed for poetry generation. The system is exclusively trained on standard, non-poetic text, and its output is constrained in order to confer a poetic character to the generated verse. The framework is applied to the generation of poems in both English and French, and is equally evaluated for both languages. Even though it only uses standard, non-poetic text as input, the system yields state of the art results for poetry generation. 2020.acl-main.223 @@ -3013,7 +3013,7 @@ Bridging the Structural Gap Between Encoding and Decoding for Data-To-Text Generation ChaoZhao - MarilynWalker + MarilynWalker SnigdhaChaturvedi 2481–2491 Generating sequential natural language descriptions from graph-structured data (e.g., knowledge graph) is challenging, partly because of the structural differences between the input graph and the output text. Hence, popular sequence-to-sequence models, which require serialized input, are not a natural fit for this task. Graph neural networks, on the other hand, can better encode the input graph but broaden the structural gap between the encoder and decoder, making faithful generation difficult. To narrow this gap, we propose DualEnc, a dual encoding model that can not only incorporate the graph structure, but can also cater to the linear structure of the output text. Empirical comparisons with strong single-encoder baselines demonstrate that dual encoding can significantly improve the quality of the generated text. @@ -3068,7 +3068,7 @@ Simple and Effective Retrieve-Edit-Rerank Text Generation NabilHossain MarjanGhazvininejad - LukeZettlemoyer + LukeZettlemoyer 2532–2538 Retrieve-and-edit seq2seq methods typically retrieve an output from the training set and learn a model to edit it to produce the final output. We propose to extend this framework with a simple and effective post-generation ranking approach. Our framework (i) retrieves several potentially relevant outputs for each input, (ii) edits each candidate independently, and (iii) re-ranks the edited candidates to select the final output. We use a standard editing model with simple task-specific re-ranking approaches, and we show empirically that this approach outperforms existing, significantly more complex methodologies. Experiments on two machine translation (MT) datasets show new state-of-art results. We also achieve near state-of-art performance on the Gigaword summarization dataset, where our analyses show that there is significant room for performance improvement with better candidate output selection in future work. 2020.acl-main.228 @@ -3100,7 +3100,7 @@ SpencerWhitehead DiLu HengJi - Shih-FuChang + Shih-FuChang 2557–2568 We introduce a new task, MultiMedia Event Extraction, which aims to extract events and their arguments from multimedia documents. We develop the first benchmark and collect a dataset of 245 multimedia news articles with extensively annotated events and arguments. We propose a novel method, Weakly Aligned Structured Embedding (WASE), that encodes structured representations of semantic information from textual and visual data into a common embedding space. The structures are aligned across modalities by employing a weakly supervised training strategy, which enables exploiting available resources without explicit cross-media annotation. Compared to uni-modal state-of-the-art methods, our approach achieves 4.0% and 9.8% absolute F-score gains on text event argument role labeling and visual event extraction. Compared to state-of-the-art multimedia unstructured representations, we achieve 8.3% and 5.0% absolute F-score gains on multimedia event extraction and argument role labeling, respectively. By utilizing images, we extract 21.4% more event mentions than traditional text-only methods. 2020.acl-main.230 @@ -3112,8 +3112,8 @@ Learning to Segment Actions from Observation and Narration DanielFried Jean-BaptisteAlayrac - PhilBlunsom - ChrisDyer + PhilBlunsom + ChrisDyer StephenClark AidaNematzadeh 2569–2588 @@ -3141,7 +3141,7 @@ LiweiWang YelongShen DongYu - TamaraBerg + TamaraBerg MohitBansal 2603–2614 Generating multi-sentence descriptions for videos is one of the most challenging captioning tasks due to its high requirements for not only visual relevance but also discourse-based coherence across the sentences in the paragraph. Towards this goal, we propose a new approach called Memory-Augmented Recurrent Transformer (MART), which uses a memory module to augment the transformer architecture. The memory module generates a highly summarized memory state from the video segments and the sentence history so as to help better prediction of the next sentence (w.r.t. coreference and repetition aspects), thus encouraging coherent paragraph generation. Extensive experiments, human evaluations, and qualitative analyses on two popular datasets ActivityNet Captions and YouCookII show that MART generates more coherent and less repetitive paragraph captions than baseline methods, while maintaining relevance to the input video events. @@ -3154,7 +3154,7 @@ What is Learned in Visually Grounded Neural Syntax Acquisition NoriyukiKojima HadarAverbuch-Elor - AlexanderRush + AlexanderRush YoavArtzi 2615–2635 Visual features are a promising signal for learning bootstrap textual models. However, blackbox learning models make it difficult to isolate the specific contribution of visual components. In this analysis, we consider the case study of the Visually Grounded Neural Syntax Learner (Shi et al., 2019), a recent approach for learning syntax from a visual training signal. By constructing simplified versions of the model, we isolate the core factors that yield the model’s strong performance. Contrary to what the model might be capable of learning, we find significantly less expressive versions produce similar predictions and perform just as well, or even better. We also find that a simple lexical signal of noun concreteness plays the main role in the model’s predictions as opposed to more complex syntactic reasoning. @@ -3195,7 +3195,7 @@ Interactive Classification by Asking Informative Questions LiliYu HowardChen - Sida I.Wang + Sida I.Wang TaoLei YoavArtzi 2664–2680 @@ -3232,7 +3232,7 @@ Masked Language Model Scoring JulianSalazar DavisLiang - Toan Q.Nguyen + Toan Q.Nguyen KatrinKirchhoff 2699–2712 Pretrained masked language models (MLMs) require finetuning for most NLP tasks. Instead, we evaluate MLMs out of the box via their pseudo-log-likelihood scores (PLLs), which are computed by masking tokens one by one. We show that PLLs outperform scores from autoregressive language models like GPT-2 in a variety of tasks. By rescoring ASR and NMT hypotheses, RoBERTa reduces an end-to-end LibriSpeech model’s WER by 30% relative and adds up to +1.7 BLEU on state-of-the-art baselines for low-resource translation pairs, with further gains from domain adaptation. We attribute this success to PLL’s unsupervised expression of linguistic acceptability without a left-to-right bias, greatly improving on scores from GPT-2 (+10 points on island effects, NPI licensing in BLiMP). One can finetune MLMs to give scores without masking, enabling computation in a single inference pass. In all, PLLs and their associated pseudo-perplexities (PPPLs) enable plug-and-play use of the growing number of pretrained MLMs; e.g., we use a single cross-lingual model to rescore translations in multiple languages. We release our library for language model scoring at https://github.com/awslabs/mlm-scoring. @@ -3245,7 +3245,7 @@ Orthogonal Relation Transforms with Graph Context Modeling for Knowledge Graph Embedding YunTang - JingHuang + JingHuang GuangtaoWang XiaodongHe BowenZhou @@ -3273,7 +3273,7 @@ Posterior Control of Blackbox Generation Xiang LisaLi - AlexanderRush + AlexanderRush 2731–2743 Text generation often requires high-precision output that obeys task-specific rules. This fine-grained control is difficult to enforce with off-the-shelf deep learning models. In this work, we consider augmenting neural generation models with discrete control states learned through a structured latent-variable approach. Under this formulation, task-specific knowledge can be encoded through a range of rich, posterior constraints that are effectively trained into the model. This approach allows users to ground internal model decisions based on prior knowledge, without sacrificing the representational power of neural generative models. Experiments consider applications of this approach for text generation. We find that this method improves over standard benchmarks, while also providing fine-grained control. 2020.acl-main.243 @@ -3326,14 +3326,14 @@ Span Selection Pre-training for Question Answering - MichaelGlass - AlfioGliozzo + MichaelGlass + AlfioGliozzo RishavChakravarti AnthonyFerritto LinPan G P ShrivatsaBhargav DineshGarg - AviSil + AviSil 2773–2782 BERT (Bidirectional Encoder Representations from Transformers) and related pre-trained Transformers have provided large gains across many language understanding tasks, achieving a new state-of-the-art (SOTA). BERT is pretrained on two auxiliary tasks: Masked Language Model and Next Sentence Prediction. In this paper we introduce a new pre-training task inspired by reading comprehension to better align the pre-training from memorization to understanding. Span Selection PreTraining (SSPT) poses cloze-like training instances, but rather than draw the answer from the model’s parameters, it is selected from a relevant passage. We find significant and consistent improvements over both BERT-BASE and BERT-LARGE on multiple Machine Reading Comprehension (MRC) datasets. Specifically, our proposed model has strong empirical evidence as it obtains SOTA results on Natural Questions, a new benchmark MRC dataset, outperforming BERT-LARGE by 3 F1 points on short answer prediction. We also show significant impact in HotpotQA, improving answer prediction F1 by 4 points and supporting fact prediction F1 by 1 point and outperforming the previous best system. Moreover, we show that our pre-training approach is particularly effective when training data is limited, improving the learning curve by a large amount. 2020.acl-main.247 @@ -3345,7 +3345,7 @@ Topological Sort for Sentence Ordering ShrimaiPrabhumoye RuslanSalakhutdinov - Alan WBlack + Alan WBlack 2783–2792 Sentence ordering is the task of arranging the sentences of a given text in the correct order. Recent work using deep neural networks for this task has framed it as a sequence prediction problem. In this paper, we propose a new framing of this task as a constraint solving problem and introduce a new technique to solve it. Additionally, we propose a human evaluation for this task. The results on both automatic and human metrics across four different datasets show that this new technique is better at capturing coherence in documents. 2020.acl-main.248 @@ -3395,7 +3395,7 @@ AnkurBapna YuanCao OrhanFirat - MiaChen + MiaChen SnehaKudugunta NaveenArivazhagan YonghuiWu @@ -3450,7 +3450,7 @@ <fixed-case>G</fixed-case>lyph2<fixed-case>V</fixed-case>ec: Learning <fixed-case>C</fixed-case>hinese Out-of-Vocabulary Word Embedding from Glyphs Hong-YouChen Sz-HanYu - Shou-deLin + Shou-deLin 2865–2871 Chinese NLP applications that rely on large text often contain huge amounts of vocabulary which are sparse in corpus. We show that characters’ written form, Glyphs, in ideographic languages could carry rich semantics. We present a multi-modal model, Glyph2Vec, to tackle Chinese out-of-vocabulary word embedding problem. Glyph2Vec extracts visual features from word glyphs to expand current word embedding space for out-of-vocabulary word embedding, without the need of accessing any corpus, which is useful for improving Chinese NLP systems, especially for low-resource scenarios. Experiments across different applications show the significant effectiveness of our model. 2020.acl-main.256 @@ -3477,7 +3477,7 @@ AnnaHätty DominikSchlechtweg MichaelDorna - SabineSchulte im Walde + SabineSchulte im Walde 2883–2889 While automatic term extraction is a well-researched area, computational approaches to distinguish between degrees of technicality are still understudied. We semi-automatically create a German gold standard of technicality across four domains, and illustrate the impact of a web-crawled general-language corpus on technicality prediction. When defining a classification approach that combines general-language and domain-specific word embeddings, we go beyond previous work and align vector spaces to gain comparative embeddings. We suggest two novel models to exploit general- vs. domain-specific comparisons: a simple neural network model with pre-computed comparative-embedding information as input, and a multi-channel model computing the comparison internally. Both models outperform previous approaches, with the multi-channel model performing best. 2020.acl-main.258 @@ -3516,7 +3516,7 @@ Give Me Convenience and Give Her Death: Who Should Decide What Uses of <fixed-case>NLP</fixed-case> are Appropriate, and on What Basis? KobiLeins Jey HanLau - TimothyBaldwin + TimothyBaldwin 2908–2913 As part of growing NLP capabilities, coupled with an awareness of the ethical dimensions of research, questions have been raised about whether particular datasets and tasks should be deemed off-limits for NLP research. We examine this question with respect to a paper on automatic legal sentencing from EMNLP 2019 which was a source of some debate, in asking whether the paper should have been allowed to be published, who should have been charged with making such a decision, and on what basis. We focus in particular on the role of data statements in ethically assessing research, but also discuss the topic of dual use, and examine the outcomes of similar debates in other scientific disciplines. 2020.acl-main.261 @@ -3537,7 +3537,7 @@ It’s Morphin’ Time! <fixed-case>C</fixed-case>ombating Linguistic Discrimination with Inflectional Perturbations SamsonTan - ShafiqJoty + ShafiqJoty Min-YenKan RichardSocher 2920–2935 @@ -3584,7 +3584,7 @@ A Probabilistic Generative Model for Typographical Analysis of Early Modern Printing KartikGoyal - ChrisDyer + ChrisDyer ChristopherWarren MaxwellG’Sell TaylorBerg-Kirkpatrick @@ -3612,7 +3612,7 @@ Estimating the influence of auxiliary tasks for multi-task learning of sequence tagging tasks FynnSchröder - ChrisBiemann + ChrisBiemann 2971–2985 Multi-task learning (MTL) and transfer learning (TL) are techniques to overcome the issue of data scarcity when training state-of-the-art neural networks. However, finding beneficial auxiliary datasets for MTL or TL is a time- and resource-consuming trial-and-error approach. We propose new methods to automatically assess the similarity of sequence tagging datasets to identify beneficial auxiliary data for MTL or TL setups. Our methods can compute the similarity between any two sequence tagging datasets, they do not need to be annotated with the same tagset or multiple labels in parallel. Additionally, our methods take tokens and their labels into account, which is more robust than only using either of them as an information source, as conducted in prior work. We empirically show that our similarity measures correlate with the change in test score of neural networks that use the auxiliary dataset for MTL to increase the main task performance. We provide an efficient, open-source implementation. 2020.acl-main.268 @@ -3638,7 +3638,7 @@ Improving Transformer Models by Reordering their Sublayers OfirPress - Noah A.Smith + Noah A.Smith OmerLevy 2996–3005 Multilayer transformer networks consist of interleaved self-attention and feedforward sublayers. Could ordering the sublayers in a different pattern lead to better performance? We generate randomly ordered transformers and train them with the language modeling objective. We observe that some of these models are able to achieve better performance than the interleaved baseline, and that those successful variants tend to have more self-attention at the bottom and more feedforward sublayers at the top. We propose a new transformer pattern that adheres to this property, the sandwich transformer, and show that it improves perplexity on multiple word-level and character-level language modeling benchmarks, at no cost in parameters, memory, or training time. However, the sandwich reordering pattern does not guarantee performance gains across every task, as we demonstrate on machine translation models. Instead, we suggest that further exploration of task-specific sublayer reorderings is needed in order to unlock additional gains. @@ -3709,7 +3709,7 @@ Dynamic Programming Encoding for Subword Segmentation in Neural Machine Translation XuanliHe - GholamrezaHaffari + GholamrezaHaffari MohammadNorouzi 3042–3051 This paper introduces Dynamic Programming Encoding (DPE), a new segmentation algorithm for tokenizing sentences into subword units. We view the subword segmentation of output sentences as a latent variable that should be marginalized out for learning and inference. A mixed character-subword transformer is proposed, which enables exact log marginal likelihood estimation and exact MAP inference to find target segmentations with maximum posterior probability. DPE uses a lightweight mixed character-subword transformer as a means of pre-processing parallel data to segment output sentences using dynamic programming. Empirical results on machine translation suggest that DPE is effective for segmenting output sentences and can be combined with BPE dropout for stochastic segmentation of source sentences. DPE achieves an average improvement of 0.9 BLEU over BPE (Sennrich et al., 2016) and an average improvement of 0.55 BLEU over BPE dropout (Provilkov et al., 2019) on several WMT datasets including English <=> (German, Romanian, Estonian, Finnish, Hungarian). @@ -3749,7 +3749,7 @@ ShuoWang ZhaopengTu ShumingShi - YangLiu + YangLiu 3070–3079 Confidence calibration, which aims to make model predictions equal to the true correctness measures, is important for neural machine translation (NMT) because it is able to offer useful indicators of translation errors in the generated output. While prior studies have shown that NMT models trained with label smoothing are well-calibrated on the ground-truth training data, we find that miscalibration still remains a severe challenge for NMT during inference due to the discrepancy between training and inference. By carefully designing experiments on three language pairs, our work provides in-depth analyses of the correlation between calibration and translation performance as well as linguistic properties of miscalibration and reports a number of interesting findings that might help humans better analyze, understand and improve NMT models. Based on these observations, we further propose a new graduated label smoothing method that can improve both inference calibration and translation performance. 2020.acl-main.278 @@ -3761,7 +3761,7 @@ Camouflaged <fixed-case>C</fixed-case>hinese Spam Content Detection with Semi-supervised Generative Active Learning ZhuorenJiang ZheGao - YuDuan + YuDuan YangyangKang ChanglongSun QiongZhang @@ -3792,7 +3792,7 @@ Hiring Now: A Skill-Aware Multi-Attention Model for Job Posting Generation LitingLiu JieLiu - WenzhengZhang + WenzhengZhang ZimingChi WenxuanShi YalouHuang @@ -3881,14 +3881,14 @@ <fixed-case>A</fixed-case>nalyzing the <fixed-case>P</fixed-case>ersuasive <fixed-case>E</fixed-case>ffect of <fixed-case>S</fixed-case>tyle in <fixed-case>N</fixed-case>ews <fixed-case>E</fixed-case>ditorial <fixed-case>A</fixed-case>rgumentation RoxanneEl Baff HenningWachsmuth - KhalidAl Khatib + KhalidAl Khatib BennoStein 3154–3160 News editorials argue about political issues in order to challenge or reinforce the stance of readers with different ideologies. Previous research has investigated such persuasive effects for argumentative content. In contrast, this paper studies how important the style of news editorials is to achieve persuasion. To this end, we first compare content- and style-oriented classifiers on editorials from the liberal NYTimes with ideology-specific effect annotations. We find that conservative readers are resistant to NYTimes style, but on liberals, style even has more impact than content. Focusing on liberals, we then cluster the leads, bodies, and endings of editorials, in order to learn about writing style patterns of effective argumentation. 2020.acl-main.287 2020.acl-main.287.Software.zip - 10.18653/v1/2020.acl-main.287 2020.acl-main.287.Dataset.pdf + 10.18653/v1/2020.acl-main.287 @@ -3948,7 +3948,7 @@ DevamanyuHazarika AbhinabaRoy NavonilMajumder - RadaMihalcea + RadaMihalcea SoujanyaPoria 3198–3210 Cross-domain sentiment analysis has received significant attention in recent years, prompted by the need to combat the domain gap between different applications that make use of sentiment analysis. In this paper, we take a novel perspective on this task by exploring the role of external commonsense knowledge. We introduce a new framework, KinGDOM, which utilizes the ConceptNet knowledge graph to enrich the semantics of a document by providing both domain-specific and domain-general background concepts. These concepts are learned by training a graph convolutional autoencoder that leverages inter-domain concepts in a domain-invariant manner. Conditioning a popular domain-adversarial baseline method with these learned concepts helps improve its performance over state-of-the-art approaches, demonstrating the efficacy of our proposed framework. @@ -4042,7 +4042,7 @@ A Span-based Linearization for Constituent Trees YangWei YuanbinWu - ManLan + ManLan 3267–3277 We propose a novel linearization of a constituent tree, together with a new locally normalized model. For each split point in a sentence, our model computes the normalizer on all spans ending with that split point, and then predicts a tree span from them. Compared with global models, our model is fast and parallelizable. Different from previous local models, our linearization method is tied on the spans directly and considers more local features when performing span prediction, which is more interpretable and effective. Experiments on PTB (95.8 F1) and CTB (92.4 F1) show that our model significantly outperforms existing local models and efficiently achieves competitive results with global models. 2020.acl-main.299 @@ -4068,7 +4068,7 @@ Efficient Constituency Parsing by Pointing Thanh-TungNguyen Xuan-PhiNguyen - ShafiqJoty + ShafiqJoty XiaoliLi 3284–3294 We propose a novel constituency parsing model that casts the parsing problem into a series of pointing tasks. Specifically, our model estimates the likelihood of a span being a legitimate tree constituent via the pointing score corresponding to the boundary words of the span. Our parsing model supports efficient top-down decoding and our learning objective is able to enforce structural consistency without resorting to the expensive CKY inference. The experiments on the standard English Penn Treebank parsing task show that our method achieves 92.78 F1 without using pre-trained models, which is higher than all the existing methods with similar time complexity. Using pre-trained BERT, our model achieves 95.48 F1, which is competitive with the state-of-the-art while being faster. Our approach also establishes new state-of-the-art in Basque and Swedish in the SPMRL shared tasks on multilingual constituency parsing. @@ -4093,7 +4093,7 @@ Representations of Syntax <fixed-case>[MASK]</fixed-case> Useful: <fixed-case>E</fixed-case>ffects of Constituency and Dependency Structure in Recursive <fixed-case>LSTM</fixed-case>s MichaelLepori TalLinzen - R. ThomasMcCoy + R. ThomasMcCoy 3306–3316 Sequence-based neural networks show significant sensitivity to syntactic structure, but they still perform less well on syntactic tasks than tree-based networks. Such tree-based networks can be provided with a constituency parse, a dependency parse, or both. We evaluate which of these two representational schemes more effectively introduces biases for syntactic structure that increase performance on the subject-verb agreement prediction task. We find that a constituency-based network generalizes more robustly than a dependency-based one, and that combining the two types of structure does not yield further improvement. Finally, we show that the syntactic robustness of sequential models can be substantially improved by fine-tuning on a small amount of constructed data, suggesting that data augmentation is a viable alternative to explicit constituency structure for imparting the syntactic biases that sequential models are lacking. 2020.acl-main.303 @@ -4122,7 +4122,7 @@ JingLi LuWang ZhimingMao - Kam-FaiWong + Kam-FaiWong 3331–3341 Trending topics in social media content evolve over time, and it is therefore crucial to understand social media users and their interpersonal communications in a dynamic manner. Here we study dynamic online conversation recommendation, to help users engage in conversations that satisfy their evolving interests. While most prior work assumes static user interests, our model is able to capture the temporal aspects of user interests, and further handle future conversations that are unseen during training time. Concretely, we propose a neural architecture to exploit changes of user interactions and interests over time, to predict which discussions they are likely to enter. We conduct experiments on large-scale collections of Reddit conversations, and results on three subreddits show that our model significantly outperforms state-of-the-art models that make a static assumption of user interests. We further evaluate on handling “cold start”, and observe consistently better performance by our model when considering various degrees of sparsity of user’s chatting history and conversation contexts. Lastly, analyses on our model outputs indicate user interest change, explaining the advantage and efficacy of our approach. 2020.acl-main.305 @@ -4146,7 +4146,7 @@ Stock Embeddings Acquired from News Articles and Price History, and an Application to Portfolio Optimization XinDu - KumikoTanaka-Ishii + KumikoTanaka-Ishii 3353–3363 Previous works that integrated news articles to better process stock prices used a variety of neural networks to predict price movements. The textual and price information were both encoded in the neural network, and it is therefore difficult to apply this approach in situations other than the original framework of the notoriously hard problem of price prediction. In contrast, this paper presents a method to encode the influence of news articles through a vector representation of stocks called a stock embedding. The stock embedding is acquired with a deep learning framework using both news articles and price history. Because the embedding takes the operational form of a vector, it is applicable to other financial problems besides price prediction. As one example application, we show the results of portfolio optimization using Reuters & Bloomberg headlines, producing a capital gain 2.8 times larger than that obtained with a baseline method using only stock price data. This suggests that the proposed stock embedding can leverage textual financial semantics to solve financial prediction problems. 2020.acl-main.307 @@ -4162,8 +4162,8 @@ HaewoonKwak YoanDinkov AhmedAli - JamesGlass - PreslavNakov + JamesGlass + PreslavNakov 3364–3374 Predicting the political bias and the factuality of reporting of entire news outlets are critical elements of media profiling, which is an understudied but an increasingly important research direction. The present level of proliferation of fake, biased, and propagandistic content online has made it impossible to fact-check every single suspicious claim, either manually or automatically. Thus, it has been proposed to profile entire news outlets and to look for those that are likely to publish fake or biased content. This makes it possible to detect likely “fake news” the moment they are published, by simply checking the reliability of their source. From a practical perspective, political bias and factuality of reporting have a linguistic aspect but also a social context. Here, we study the impact of both, namely (i) what was written (i.e., what was published by the target medium, and how it describes itself in Twitter) vs. (ii) who reads it (i.e., analyzing the target medium’s audience on social media). We further study (iii) what was written about the target medium (in Wikipedia). The evaluation results show that what was written matters most, and we further show that putting all information sources together yields huge improvements over the current state-of-the-art. 2020.acl-main.308 @@ -4198,7 +4198,7 @@ Roles and Utilization of Attention Heads in Transformer-based Neural Language Models Jae-youngJo - Sung-HyonMyaeng + Sung-HyonMyaeng 3404–3417 Sentence encoders based on the transformer architecture have shown promising results on various natural language tasks. The main impetus lies in the pre-trained neural language models that capture long-range dependencies among words, owing to multi-head attention that is unique in the architecture. However, little is known for how linguistic properties are processed, represented, and utilized for downstream tasks among hundreds of attention heads inside the pre-trained transformer-based model. For the initial goal of examining the roles of attention heads in handling a set of linguistic features, we conducted a set of experiments with ten probing tasks and three downstream tasks on four pre-trained transformer families (GPT, GPT2, BERT, and ELECTRA). Meaningful insights are shown through the lens of heat map visualization and utilized to propose a relatively simple sentence representation method that takes advantage of most influential attention heads, resulting in additional performance improvements on the downstream tasks. 2020.acl-main.311 @@ -4297,14 +4297,14 @@ WeiZou ShujianHuang JunXie - XinyuDai - JiajunChen + XinyuDai + JiajunChen 3486–3497 Neural machine translation systems tend to fail on less decent inputs despite its significant efficacy, which may significantly harm the credibility of these systems—fathoming how and when neural-based systems fail in such cases is critical for industrial maintenance. Instead of collecting and analyzing bad cases using limited handcrafted error features, here we investigate this issue by generating adversarial examples via a new paradigm based on reinforcement learning. Our paradigm could expose pitfalls for a given performance metric, e.g., BLEU, and could target any given neural machine translation architecture. We conduct experiments of adversarial attacks on two mainstream neural machine translation architectures, RNN-search, and Transformer. The results show that our method efficiently produces stable attacks with meaning-preserving adversarial examples. We also present a qualitative and quantitative analysis for the preference pattern of the attack, demonstrating its capability of pitfall exposure. 2020.acl-main.319 2020.acl-main.319.Software.zip - 10.18653/v1/2020.acl-main.319 2020.acl-main.319.Dataset.pdf + 10.18653/v1/2020.acl-main.319 @@ -4354,8 +4354,8 @@ Dynamically Adjusting Transformer Batch Size by Monitoring Gradient Direction Change HongfeiXu - Josefvan Genabith - DeyiXiong + Josefvan Genabith + DeyiXiong QiuhuiLiu 3519–3524 The choice of hyper-parameters affects the performance of neural models. While much previous research (Sutskever et al., 2013; Duchi et al., 2011; Kingma and Ba, 2015) focuses on accelerating convergence and reducing the effects of the learning rate, comparatively few papers concentrate on the effect of batch size. In this paper, we analyze how increasing batch size affects gradient direction, and propose to evaluate the stability of gradients with their angle change. Based on our observations, the angle change of gradient direction first tends to stabilize (i.e. gradually decrease) while accumulating mini-batches, and then starts to fluctuate. We propose to automatically and dynamically determine batch sizes by accumulating gradients of mini-batches and performing an optimization step at just the time when the direction of gradients starts to fluctuate. To improve the efficiency of our approach for large models, we propose a sampling approach to select gradients of parameters sensitive to the batch size. Our approach dynamically determines proper and efficient batch sizes during training. In our experiments on the WMT 14 English to German and English to French tasks, our approach improves the Transformer with a fixed 25k batch size by +0.73 and +0.82 BLEU respectively. @@ -4370,8 +4370,8 @@ RuiWang KehaiChen MasaoUtiyama - EiichiroSumita - TiejunZhao + EiichiroSumita + TiejunZhao 3525–3535 Unsupervised neural machine translation (UNMT) has recently achieved remarkable results for several language pairs. However, it can only translate between a single language pair and cannot produce translation results for multiple language pairs at the same time. That is, research on multilingual UNMT has been limited. In this paper, we empirically introduce a simple method to translate between thirteen languages using a single encoder and a single decoder, making use of multilingual data to improve UNMT for all language pairs. On the basis of the empirical findings, we propose two knowledge distillation methods to further enhance multilingual UNMT performance. Our experiments on a dataset with English translated to and from twelve other languages (including three language families and six language branches) show remarkable results, surpassing strong unsupervised individual baselines while achieving promising performance between non-English language pairs in zero-shot translation scenarios and alleviating poor performance in low-resource language pairs. 2020.acl-main.324 @@ -4417,7 +4417,7 @@ <fixed-case>C</fixed-case>hart<fixed-case>D</fixed-case>ialogs: <fixed-case>P</fixed-case>lotting from <fixed-case>N</fixed-case>atural <fixed-case>L</fixed-case>anguage <fixed-case>I</fixed-case>nstructions YutongShao - NdapaNakashole + NdapaNakashole 3559–3574 This paper presents the problem of conversational plotting agents that carry out plotting actions from natural language instructions. To facilitate the development of such agents, we introduce ChartDialogs, a new multi-turn dialog dataset, covering a popular plotting library, matplotlib. The dataset contains over 15,000 dialog turns from 3,200 dialogs covering the majority of matplotlib plot types. Extensive experiments show the best-performing method achieving 61% plotting accuracy, demonstrating that the dataset presents a non-trivial challenge for future research on this task. 2020.acl-main.328 @@ -4428,7 +4428,7 @@ <fixed-case>GLUEC</fixed-case>o<fixed-case>S</fixed-case>: An Evaluation Benchmark for Code-Switched <fixed-case>NLP</fixed-case> SimranKhanuja - SandipanDandapat + SandipanDandapat AnirudhSrinivasan SunayanaSitaram MonojitChoudhury @@ -4478,7 +4478,7 @@ ShadenShaar NikolayBabulkov GiovanniDa San Martino - PreslavNakov + PreslavNakov 3607–3618 The recent proliferation of ”fake news” has triggered a number of responses, most notably the emergence of several manual fact-checking initiatives. As a result and over time, a large number of fact-checked claims have been accumulated, which increases the likelihood that a new claim in social media or a new statement by a politician might have already been fact-checked by some trusted fact-checking organization, as viral claims often come back after a while in social media, and politicians like to repeat their favorite statements, true or false, over and over again. As manual fact-checking is very time-consuming (and fully automatic fact-checking has credibility issues), it is important to try to save this effort and to avoid wasting time on claims that have already been fact-checked. Interestingly, despite the importance of the task, it has been largely ignored by the research community so far. Here, we aim to bridge this gap. In particular, we formulate the task and we discuss how it relates to, but also differs from, previous work. We further create a specialized dataset, which we release to the research community. Finally, we present learning-to-rank experiments that demonstrate sizable improvements over state-of-the-art retrieval and textual similarity approaches. 2020.acl-main.332 @@ -4557,7 +4557,7 @@ ShoushanLi LuoSi MinZhang - GuodongZhou + GuodongZhou 3667–3677 In the literature, existing studies always consider Aspect Sentiment Classification (ASC) as an independent sentence-level classification problem aspect by aspect, which largely ignore the document-level sentiment preference information, though obviously such information is crucial for alleviating the information deficiency problem in ASC. In this paper, we explore two kinds of sentiment preference information inside a document, i.e., contextual sentiment consistency w.r.t. the same aspect (namely intra-aspect sentiment consistency) and contextual sentiment tendency w.r.t. all the related aspects (namely inter-aspect sentiment tendency). On the basis, we propose a Cooperative Graph Attention Networks (CoGAN) approach for cooperatively learning the aspect-related sentence representation. Specifically, two graph attention networks are leveraged to model above two kinds of document-level sentiment preference information respectively, followed by an interactive mechanism to integrate the two-fold preference. Detailed evaluation demonstrates the great advantage of the proposed approach to ASC over the state-of-the-art baselines. This justifies the importance of the document-level sentiment preference information to ASC and the effectiveness of our approach capturing such information. 2020.acl-main.338 @@ -4571,7 +4571,7 @@ YuHong BoweiZou MengCheng - JianminYao + JianminYao 3678–3684 The current aspect extraction methods suffer from boundary errors. In general, these errors lead to a relatively minor difference between the extracted aspects and the ground-truth. However, they hurt the performance severely. In this paper, we propose to utilize a pointer network for repositioning the boundaries. Recycling mechanism is used, which enables the training data to be collected without manual intervention. We conduct the experiments on the benchmark datasets SE14 of laptop and SE14-16 of restaurant. Experimental results show that our method achieves substantial improvements over the baseline, and outperforms state-of-the-art methods. 2020.acl-main.339 @@ -4684,7 +4684,7 @@ Meta-Transfer Learning for Code-Switched Speech Recognition - Genta IndraWinata + Genta IndraWinata SamuelCahyawijaya ZhaojiangLin ZihanLiu @@ -4714,7 +4714,7 @@ YiRen JinglinLiu XuTan - ChenZhang + ChenZhang TaoQin ZhouZhao Tie-YanLiu @@ -4730,7 +4730,7 @@ KaranSingla ZhuohaoChen DavidAtkins - ShrikanthNarayanan + ShrikanthNarayanan 3797–3803 Spoken language understanding tasks usually rely on pipelines involving complex processing blocks such as voice activity detection, speaker diarization and Automatic speech recognition (ASR). We propose a novel framework for predicting utterance level labels directly from speech features, thus removing the dependency on first generating transcripts, and transcription free behavioral coding. Our classifier uses a pretrained Speech-2-Vector encoder as bottleneck to generate word-level representations from speech features. This pretrained encoder learns to encode speech features for a word using an objective similar to Word2Vec. Our proposed approach just uses speech features and word segmentation information for predicting spoken utterance-level target labels. We show that our model achieves competitive results to other state-of-the-art approaches which use transcribed text for the task of predicting psychotherapy-relevant behavior codes. 2020.acl-main.351 @@ -4770,7 +4770,7 @@ InkitPadhi PierreDognin KeBai - CíceroNogueira dos Santos + CíceroNogueira dos Santos VijilChenthamarakshan YoussefMroueh PayelDas @@ -4852,7 +4852,7 @@ Successfully Applying the Stabilized Lottery Ticket Hypothesis to the Transformer Architecture ChristopherBrix ParniaBahar - HermannNey + HermannNey 3909–3915 Sparse models require less memory for storage and enable a faster inference by reducing the necessary number of FLOPs. This is relevant both for time-critical and on-device computations using neural networks. The stabilized lottery ticket hypothesis states that networks can be pruned after none or few training iterations, using a mask computed based on the unpruned converged model. On the transformer architecture and the WMT 2014 English-to-German and English-to-French tasks, we show that stabilized lottery ticket pruning performs similar to magnitude pruning for sparsity levels of up to 85%, and propose a new combination of pruning techniques that outperforms all other techniques for even higher levels of sparsity. Furthermore, we confirm that the parameter’s initial sign and not its specific value is the primary factor for successful training, and show that magnitude pruning cannot be used to find winning lottery tickets. 2020.acl-main.360 @@ -4893,7 +4893,7 @@ An Effectiveness Metric for Ordinal Classification: Formal Properties and Experimental Results - EnriqueAmigo + EnriqueAmigo JulioGonzalo StefanoMizzaro JorgeCarrillo-de-Albornoz @@ -4920,7 +4920,7 @@ Analysing Lexical Semantic Change with Contextualised Word Representations MarioGiulianelli MarcoDel Tredici - RaquelFernández + RaquelFernández 3960–3973 This paper presents the first unsupervised approach to lexical semantic change that makes use of contextualised word representations. We propose a novel method that exploits the BERT neural language model to obtain representations of word usages, clusters these representations into usage types, and measures change along time with three proposed metrics. We create a new evaluation dataset and show that the model representations and the detected semantic shifts are positively correlated with human judgements. Our extensive qualitative analysis demonstrates that our method captures a variety of synchronic and diachronic linguistic phenomena. We expect our work to inspire further research in this direction. 2020.acl-main.365 @@ -4956,7 +4956,7 @@ <fixed-case>BERTRAM</fixed-case>: Improved Word Embeddings Have Big Impact on Contextualized Model Performance TimoSchick - HinrichSchütze + HinrichSchütze 3996–4007 Pretraining deep language models has led to large performance gains in NLP. Despite this success, Schick and Schütze (2020) recently showed that these models struggle to understand rare words. For static word embeddings, this problem has been addressed by separately learning representations for rare words. In this work, we transfer this idea to pretrained language models: We introduce BERTRAM, a powerful architecture based on BERT that is capable of inferring high-quality embeddings for rare words that are suitable as input representations for deep language models. This is achieved by enabling the surface form and contexts of a word to interact with each other in a deep architecture. Integrating BERTRAM into BERT leads to large performance increases due to improved representations of rare and medium frequency words on both a rare word probing task and three downstream tasks. 2020.acl-main.368 @@ -5023,8 +5023,8 @@ He said “who’s gonna take care of your children when you are at <fixed-case>ACL</fixed-case>?”: Reported Sexist Acts are Not Sexist PatriciaChiril - VéroniqueMoriceau - FarahBenamara + VéroniqueMoriceau + FarahBenamara AldaMari GloriaOriggi MarlèneCoulomb-Gully @@ -5081,7 +5081,7 @@ Exact yet Efficient Graph Parsing, Bi-directional Locality and the Constructivist Hypothesis YajieYe - WeiweiSun + WeiweiSun 4100–4110 A key problem in processing graph-based meaning representations is graph parsing, i.e. computing all possible derivations of a given graph according to a (competence) grammar. We demonstrate, for the first time, that exact graph parsing can be efficient for large graphs and with large Hyperedge Replacement Grammars (HRGs). The advance is achieved by exploiting locality as terminal edge-adjacency in HRG rules. In particular, we highlight the importance of 1) a terminal edge-first parsing strategy, 2) a categorization of a subclass of HRG, i.e. what we call Weakly Regular Graph Grammar, and 3) distributing argument-structures to both lexical and phrasal rules. 2020.acl-main.377 @@ -5093,7 +5093,7 @@ Max-Margin Incremental <fixed-case>CCG</fixed-case> Parsing MilošStanojević - MarkSteedman + MarkSteedman 4111–4122 Incremental syntactic parsing has been an active research area both for cognitive scientists trying to model human sentence processing and for NLP researchers attempting to combine incremental parsing with language modelling for ASR and MT. Most effort has been directed at designing the right transition mechanism, but less has been done to answer the question of what a probabilistic model for those transition parsers should look like. A very incremental transition mechanism of a recently proposed CCG parser when trained in straightforward locally normalised discriminative fashion produces very bad results on English CCGbank. We identify three biases as the causes of this problem: label bias, exposure bias and imbalanced probabilities bias. While known techniques for tackling these biases improve results, they still do not make the parser state of the art. Instead, we tackle all of these three biases at the same time using an improved version of beam search optimisation that minimises all beam search violations instead of minimising only the biggest violation. The new incremental parser gives better results than all previously published incremental CCG parsers, and outperforms even some widely used non-incremental CCG parsers. 2020.acl-main.378 @@ -5121,7 +5121,7 @@ JunqiZhang KunBai ConghuiZhu - TiejunZhao + TiejunZhao 4134–4145 With the recent proliferation of the use of text classifications, researchers have found that there are certain unintended biases in text classification datasets. For example, texts containing some demographic identity-terms (e.g., “gay”, “black”) are more likely to be abusive in existing abusive language detection datasets. As a result, models trained with these datasets may consider sentences like “She makes me happy to be gay” as abusive simply because of the word “gay.” In this paper, we formalize the unintended biases in text classification datasets as a kind of selection bias from the non-discrimination distribution to the discrimination distribution. Based on this formalization, we further propose a model-agnostic debiasing training framework by recovering the non-discrimination distribution using instance weighting, which does not require any extra resources or annotations apart from a pre-defined set of demographic identity-terms. Experiments demonstrate that our method can effectively alleviate the impacts of the unintended biases without significantly hurting models’ generalization ability. 2020.acl-main.380 @@ -5131,7 +5131,7 @@ Analyzing analytical methods: The case of phonology in neural models of spoken language - GrzegorzChrupała + GrzegorzChrupała BertrandHigy AfraAlishahi 4146–4156 @@ -5147,7 +5147,7 @@ BrendanShillingford PasqualeMinervini ThomasLukasiewicz - PhilBlunsom + PhilBlunsom 4157–4165 To increase trust in artificial intelligence systems, a promising research direction consists of designing neural models capable of generating natural language explanations for their predictions. In this work, we show that such models are nonetheless prone to generating mutually inconsistent explanations, such as ”Because there is a dog in the image.” and ”Because there is no dog in the [same] image.”, exposing flaws in either the decision-making process of the model or in the generation of the explanations. We introduce a simple yet effective adversarial framework for sanity checking models against the generation of inconsistent natural language explanations. Moreover, as part of the framework, we address the problem of adversarial attacks with full target sequences, a scenario that was not previously addressed in sequence-to-sequence attacks. Finally, we apply our framework on a state-of-the-art neural natural language inference model that provides natural language explanations for its predictions. Our framework shows that this model is capable of generating a significant number of inconsistent explanations. 2020.acl-main.382 @@ -5170,9 +5170,9 @@ Probing for Referential Information in Language Models - Ionut-TeodorSorodoc + Ionut-TeodorSorodoc KristinaGulordava - GemmaBoleda + GemmaBoleda 4177–4189 Language models keep track of complex information about the preceding context – including, e.g., syntactic relations in a sentence. We investigate whether they also capture information beneficial for resolving pronominal anaphora in English. We analyze two state of the art models with LSTM and Transformer architectures, via probe tasks and analysis on a coreference annotated corpus. The Transformer outperforms the LSTM in all analyses. Our results suggest that language models are more successful at learning grammatical constraints than they are at learning truly referential information, in the sense of capturing the fact that we use language to refer to entities in the world. However, we find traces of the latter aspect, too. 2020.acl-main.384 @@ -5207,9 +5207,9 @@ Akash KumarMohankumar PrekshaNema SharanNarasimhan - Mitesh M.Khapra + Mitesh M.Khapra Balaji VasanSrinivasan - BalaramanRavindran + BalaramanRavindran 4206–4216 Recent studies on interpretability of attention distributions have led to notions of faithful and plausible explanations for a model’s predictions. Attention distributions can be considered a faithful explanation if a higher attention weight implies a greater impact on the model’s prediction. They can be considered a plausible explanation if they provide a human-understandable justification for the model’s predictions. In this work, we first explain why current attention mechanisms in LSTM based encoders can neither provide a faithful nor a plausible explanation of the model’s predictions. We observe that in LSTM based encoders the hidden representations at different time-steps are very similar to each other (high conicity) and attention weights in these situations do not carry much meaning because even a random permutation of the attention weights does not affect the model’s predictions. Based on experiments on a wide variety of tasks and datasets, we observe attention distributions often attribute the model’s predictions to unimportant words such as punctuation and fail to offer a plausible explanation for the predictions. To make attention mechanisms more faithful and plausible, we propose a modified LSTM cell with a diversity-driven training objective that ensures that the hidden representations learned at different time steps are diverse. We show that the resulting attention distributions offer more transparency as they (i) provide a more precise importance ranking of the hidden states (ii) are better indicative of words important for the model’s predictions (iii) correlate better with gradient-based attribution methods. Human evaluations indicate that the attention distributions learned by our model offer a plausible explanation of the model’s predictions. Our code has been made publicly available at https://github.com/akashkm99/Interpretable-Attention 2020.acl-main.387 @@ -5233,8 +5233,8 @@ Modeling Word Formation in <fixed-case>E</fixed-case>nglish–<fixed-case>G</fixed-case>erman Neural Machine Translation - MarionWeller-Di Marco - AlexanderFraser + MarionWeller-Di Marco + AlexanderFraser 4227–4232 This paper studies strategies to model word formation in NMT using rich linguistic information, namely a word segmentation approach that goes beyond splitting into substrings by considering fusional morphology. Our linguistically sound segmentation is combined with a method for target-side inflection to accommodate modeling word formation. The best system variants employ source-side morphological analysis and model complex target-side words, improving over a standard system. 2020.acl-main.389 @@ -5290,7 +5290,7 @@ Identifying Principals and Accessories in a Complex Case based on the Comprehension of Fact Description YakunHu ZhunchenLuo - WenhanChao + WenhanChao 4265–4269 In this paper, we study the problem of identifying the principals and accessories from the fact description with multiple defendants in a criminal case. We treat the fact descriptions as narrative texts and the defendants as roles over the narrative story. We propose to model the defendants with behavioral semantic information and statistical characteristics, then learning the importances of defendants within a learning-to-rank framework. Experimental results on a real-world dataset demonstrate the behavior analysis can effectively model the defendants’ impacts in a complex case. 2020.acl-main.393 @@ -5327,7 +5327,7 @@ Toxicity Detection: Does Context Really Matter? JohnPavlopoulos - JeffreySorensen + JeffreySorensen LucasDixon NithumThain IonAndroutsopoulos @@ -5342,7 +5342,7 @@ <fixed-case>AMR</fixed-case> Parsing with Latent Structural Information QijiZhou YueZhang - DonghongJi + DonghongJi HaoTang 4306–4319 Abstract Meaning Representations (AMRs) capture sentence-level semantics structural representations to broad-coverage natural sentences. We investigate parsing AMR with explicit dependency structures and interpretable latent structures. We generate the latent soft structure without additional annotations, and fuse both dependency and latent structure via an extended graph neural networks. The fused structural information helps our experiments results to achieve the best reported results on both AMR 2.0 (77.5% Smatch F1 on LDC2017T10) and AMR 1.0 ((71.8% Smatch F1 on LDC2014T12). @@ -5355,7 +5355,7 @@ <fixed-case>T</fixed-case>a<fixed-case>P</fixed-case>as: Weakly Supervised Table Parsing via Pre-training JonathanHerzig Pawel KrzysztofNowak - ThomasMüller + ThomasMüller FrancescoPiccinno JulianEisenschlos 4320–4333 @@ -5394,7 +5394,7 @@ Dushyant SinghChauhan DhanushS R AsifEkbal - PushpakBhattacharyya + PushpakBhattacharyya 4351–4360 In this paper, we hypothesize that sarcasm is closely related to sentiment and emotion, and thereby propose a multi-task deep learning framework to solve all these three problems simultaneously in a multi-modal conversational scenario. We, at first, manually annotate the recently released multi-modal MUStARD sarcasm dataset with sentiment and emotion classes, both implicit and explicit. For multi-tasking, we propose two attention mechanisms, viz. Inter-segment Inter-modal Attention (Ie-Attention) and Intra-segment Inter-modal Attention (Ia-Attention). The main motivation of Ie-Attention is to learn the relationship between the different segments of the sentence across the modalities. In contrast, Ia-Attention focuses within the same segment of the sentence across the modalities. Finally, representations from both the attentions are concatenated and shared across the five classes (i.e., sarcasm, implicit sentiment, explicit sentiment, implicit emotion, explicit emotion) for multi-tasking. Experimental results on the extended version of the MUStARD dataset show the efficacy of our proposed approach for sarcasm detection over the existing state-of-the-art systems. The evaluation also shows that the proposed multi-task framework yields better performance for the primary task, i.e., sarcasm detection, with the help of two secondary tasks, emotion and sentiment analysis. 2020.acl-main.401 @@ -5407,7 +5407,7 @@ TulikaSaha AdityaPatra SriparnaSaha - PushpakBhattacharyya + PushpakBhattacharyya 4361–4372 The task of Dialogue Act Classification (DAC) that purports to capture communicative intent has been studied extensively. But these studies limit themselves to text. Non-verbal features (change of tone, facial expressions etc.) can provide cues to identify DAs, thus stressing the benefit of incorporating multi-modal inputs in the task. Also, the emotional state of the speaker has a substantial effect on the choice of the dialogue act, since conversations are often influenced by emotions. Hence, the effect of emotion too on automatic identification of DAs needs to be studied. In this work, we address the role of both multi-modality and emotion recognition (ER) in DAC. DAC and ER help each other by way of multi-task learning. One of the major contributions of this work is a new dataset- multimodal Emotion aware Dialogue Act dataset called EMOTyDA, collected from open-sourced dialogue datasets. To demonstrate the utility of EMOTyDA, we build an attention based (self, inter-modal, inter-task) multi-modal, multi-task Deep Neural Network (DNN) for joint learning of DAs and emotions. We show empirically that multi-modality and multi-tasking achieve better performance of DAC compared to uni-modal and single task DAC variants. 2020.acl-main.402 @@ -5419,7 +5419,7 @@ Analyzing Political Parody in Social Media AntonisMaronikolakis DanaeSánchez Villegas - DanielPreotiuc-Pietro + DanielPreotiuc-Pietro NikolaosAletras 4373–4384 Parody is a figurative device used to imitate an entity for comedic or critical purposes and represents a widespread phenomenon in social media through many popular parody accounts. In this paper, we present the first computational study of parody. We introduce a new publicly available data set of tweets from real politicians and their corresponding parody accounts. We run a battery of supervised machine learning models for automatically detecting parody tweets with an emphasis on robustness by testing on tweets from accounts unseen in training, across different genders and across countries. Our results show that political parody tweets can be predicted with an accuracy up to 90%. Finally, we identify the markers of parody through a linguistic analysis. Beyond research in linguistics and political communication, accurately and automatically detecting parody is important to improving fact checking for journalists and analytics such as sentiment analysis through filtering out parodical utterances. @@ -5431,7 +5431,7 @@ Masking Actor Information Leads to Fairer Political Claims Detection ErenayDayanik - SebastianPadó + SebastianPadó 4385–4391 A central concern in Computational Social Sciences (CSS) is fairness: where the role of NLP is to scale up text analysis to large corpora, the quality of automatic analyses should be as independent as possible of textual properties. We analyze the performance of a state-of-the-art neural model on the task of political claims detection (i.e., the identification of forward-looking statements made by political actors) and identify a strong frequency bias: claims made by frequent actors are recognized better. We propose two simple debiasing methods which mask proper names and pronouns during training of the model, thus removing personal information bias. We find that (a) these methods significantly decrease frequency bias while keeping the overall performance stable; and (b) the resulting models improve when evaluated in an out-of-domain setting. 2020.acl-main.404 @@ -5470,7 +5470,7 @@ EugeneKharitonov DianeBouchacourt EmmanuelDupoux - MarcoBaroni + MarcoBaroni 4427–4442 Natural language allows us to refer to novel composite concepts by combining expressions denoting their parts according to systematic rules, a property known as compositionality. In this paper, we study whether the language emerging in deep multi-agent simulations possesses a similar ability to refer to novel primitive combinations, and whether it accomplishes this feat by strategies akin to human-language compositionality. Equipped with new ways to measure compositionality in emergent languages inspired by disentanglement in representation learning, we establish three main results: First, given sufficiently large input spaces, the emergent language will naturally develop the ability to refer to novel composite concepts. Second, there is no correlation between the degree of compositionality of an emergent language and its ability to generalize. Third, while compositionality is not necessary for generalization, it provides an advantage in terms of language transmission: The more compositional a language is, the more easily it will be picked up by new learners, even when the latter differ in architecture from the original agents. We conclude that compositionality does not arise from simple generalization pressure, but if an emergent language does chance upon it, it will be more likely to survive and thrive. 2020.acl-main.407 @@ -5486,7 +5486,7 @@ EricLehman CaimingXiong RichardSocher - Byron C.Wallace + Byron C.Wallace 4443–4458 State-of-the-art models in NLP are now predominantly based on deep neural networks that are opaque in terms of how they come to make predictions. This limitation has increased interest in designing more interpretable deep models for NLP that reveal the ‘reasoning’ behind model outputs. But work in this direction has been conducted on different datasets and tasks with correspondingly unique aims and metrics; this makes it difficult to track progress. We propose the Evaluating Rationales And Simple English Reasoning (ERASER a benchmark to advance research on interpretable models in NLP. This benchmark comprises multiple datasets and tasks for which human annotations of “rationales” (supporting evidence) have been collected. We propose several metrics that aim to capture how well the rationales provided by models align with human rationales, and also how faithful these rationales are (i.e., the degree to which provided rationales influenced the corresponding predictions). Our hope is that releasing this benchmark facilitates progress on designing more interpretable NLP systems. The benchmark, code, and documentation are available at https://www.eraserbenchmark.com/ 2020.acl-main.408 @@ -5500,7 +5500,7 @@ SarthakJain SarahWiegreffe YuvalPinter - Byron C.Wallace + Byron C.Wallace 4459–4473 In many settings it is important for one to be able to understand why a model made a particular prediction. In NLP this often entails extracting snippets of an input text ‘responsible for’ corresponding model output; when such a snippet comprises tokens that indeed informed the model’s prediction, it is a faithful explanation. In some settings, faithfulness may be critical to ensure transparency. Lei et al. (2016) proposed a model to produce faithful rationales for neural text classification by defining independent snippet extraction and prediction modules. However, the discrete selection over input tokens performed by this method complicates training, leading to high variance and requiring careful hyperparameter tuning. We propose a simpler variant of this approach that provides faithful explanations by construction. In our scheme, named FRESH, arbitrary feature importance scores (e.g., gradients from a trained model) are used to induce binary labels over token inputs, which an extractor can be trained to predict. An independent classifier module is then trained exclusively on snippets provided by the extractor; these snippets thus constitute faithful explanations, even if the classifier is arbitrarily complex. In both automatic and manual evaluations we find that variants of this simple framework yield predictive performance superior to ‘end-to-end’ approaches, while being more general and easier to train. Code is available at https://github.com/successar/FRESH. 2020.acl-main.409 @@ -5537,7 +5537,7 @@ Improving Multi-hop Question Answering over Knowledge Graphs using Knowledge Base Embeddings ApoorvSaxena AditayTripathi - ParthaTalukdar + ParthaTalukdar 4498–4507 Knowledge Graphs (KG) are multi-relational graphs consisting of entities as nodes and relations among them as typed edges. Goal of the Question Answering over KG (KGQA) task is to answer natural language queries posed over the KG. Multi-hop KGQA requires reasoning over multiple edges of the KG to arrive at the right answer. KGs are often incomplete with many missing links, posing additional challenges for KGQA, especially for multi-hop KGQA. Recent research on multi-hop KGQA has attempted to handle KG sparsity using relevant external text, which isn’t always readily available. In a separate line of research, KG embedding methods have been proposed to reduce KG sparsity by performing missing link prediction. Such KG embedding methods, even though highly relevant, have not been explored for multi-hop KGQA so far. We fill this gap in this paper and propose EmbedKGQA. EmbedKGQA is particularly effective in performing multi-hop KGQA over sparse KGs. EmbedKGQA also relaxes the requirement of answer selection from a pre-specified neighborhood, a sub-optimal constraint enforced by previous multi-hop KGQA methods. Through extensive experiments on multiple benchmark datasets, we demonstrate EmbedKGQA’s effectiveness over other state-of-the-art baselines. 2020.acl-main.412 @@ -5547,7 +5547,7 @@ Template-Based Question Generation from Retrieved Sentences for Improved Unsupervised Question Answering - AlexanderFabbri + AlexanderFabbri PatrickNg ZhiguoWang RameshNallapati @@ -5578,8 +5578,8 @@ TiagoPimentel MatthewWiesner RyanCotterell - Alan WBlack - JasonEisner + Alan WBlack + JasonEisner 4526–4546 A major hurdle in data-driven research on typology is having sufficient data in many languages to draw meaningful conclusions. We present VoxClamantis v1.0, the first large-scale corpus for phonetic typology, with aligned segments and estimated phoneme-level labels in 690 readings spanning 635 languages, along with acoustic-phonetic measures of vowels and sibilants. Access to such data can greatly facilitate investigation of phonetic typology at a large scale and across many languages. However, it is non-trivial and computationally intensive to obtain such alignments for hundreds of languages, many of which have few to no resources presently available. We describe the methodology to create our corpus, discuss caveats with current methods and their impact on the utility of this data, and illustrate possible research directions through a series of case studies on the 48 highest-quality readings. Our corpus and scripts are publicly available for non-commercial use at https://voxclamantisproject.github.io. 2020.acl-main.415 @@ -5590,7 +5590,7 @@ <fixed-case>D</fixed-case>scorer: A Fast Evaluation Metric for Discourse Representation Structure Parsing JiangmingLiu - Shay B.Cohen + Shay B.Cohen MirellaLapata 4547–4554 Discourse representation structures (DRSs) are scoped semantic representations for texts of arbitrary length. Evaluating the accuracy of predicted DRSs plays a key role in developing semantic parsers and improving their performance. DRSs are typically visualized as boxes which are not straightforward to process automatically. Counter transforms DRSs to clauses and measures clause overlap by searching for variable mappings between two DRSs. However, this metric is computationally costly (with respect to memory and CPU time) and does not scale with longer texts. We introduce Dscorer, an efficient new metric which converts box-style DRSs to graphs and then measures the overlap of n-grams. Experiments show that Dscorer computes accuracy scores that are correlated with Counter at a fraction of the time. @@ -5606,14 +5606,14 @@ BarryHaddow KennethHeafield HieuHoang - MiquelEsplà-Gomis - Mikel L.Forcada + MiquelEsplà-Gomis + Mikel L.Forcada AmirKamran FaheemKirefu PhilippKoehn - SergioOrtiz Rojas + SergioOrtiz Rojas LeopoldoPla Sempere - GemaRamírez-Sánchez + GemaRamírez-Sánchez ElsaSarrías MarekStrelec BrianThompson @@ -5630,7 +5630,7 @@ Toward Gender-Inclusive Coreference Resolution Yang TristaCao - HalDaumé III + HalDaumé III 4568–4595 Correctly resolving textual mentions of people fundamentally entails making inferences about those people. Such inferences raise the risk of systemic biases in coreference resolution systems, including biases that can harm binary and non-binary trans and cis stakeholders. To better understand such biases, we foreground nuanced conceptualizations of gender from sociology and sociolinguistics, and develop two new datasets for interrogating bias in crowd annotations and in existing coreference resolution systems. Through these studies, conducted on English text, we confirm that without acknowledging and building systems that recognize the complexity of gender, we build systems that lead to many potential harms. 2020.acl-main.418 @@ -5686,7 +5686,7 @@ HassanSajjad NadirDurrani FahimDalvi - JamesGlass + JamesGlass 4638–4655 This paper investigates contextual word representation models from the lens of similarity analysis. Given a collection of trained models, we measure the similarity of their internal representations and attention. Critically, these models come from vastly different architectures. We use existing and novel similarity measures that aim to gauge the level of localization of information in the deep models, and facilitate the investigation of which design factors affect model similarity, without requiring any external linguistic annotation. The analysis reveals that models within the same family are more similar to one another, as may be expected. Surprisingly, different architectures have rather similar representations, but different individual neurons. We also observed differences in information localization in lower and higher layers and found that higher layers are more affected by fine-tuning on downstream tasks. 2020.acl-main.422 @@ -5717,8 +5717,8 @@ FernandoAlva-Manchego LouisMartin AntoineBordes - CarolinaScarton - BenoîtSagot + CarolinaScarton + BenoîtSagot LuciaSpecia 4668–4679 In order to simplify a sentence, human editors perform multiple rewriting transformations: they split it into several shorter sentences, paraphrase words (i.e. replacing complex words or phrases by simpler synonyms), reorder components, and/or delete information deemed unnecessary. Despite these varied range of possible text alterations, current models for automatic sentence simplification are evaluated using datasets that are focused on a single transformation, such as lexical paraphrasing or splitting. This makes it impossible to understand the ability of simplification models in more realistic settings. To alleviate this limitation, this paper introduces ASSET, a new dataset for assessing sentence simplification in English. ASSET is a crowdsourced multi-reference corpus where each simplification was produced by executing several rewriting transformations. Through quantitative and qualitative experiments, we show that simplifications in ASSET are better at capturing characteristics of simplicity when compared to other standard evaluation datasets for the task. Furthermore, we motivate the need for developing better methods for automatic evaluation using ASSET, since we show that current popular metrics may not be suitable when multiple simplification transformations are performed. @@ -5746,7 +5746,7 @@ HeeyoungKwon MohaddesehBastan NiranjanBalasubramanian - NathanaelChambers + NathanaelChambers 4687–4692 Predicting how events induce emotions in the characters of a story is typically seen as a standard multi-label classification task, which usually treats labels as anonymous classes to predict. They ignore information that may be conveyed by the emotion labels themselves. We propose that the semantics of emotion labels can guide a model’s attention when representing the input story. Further, we observe that the emotions evoked by an event are often related: an event that evokes joy is unlikely to also evoke sadness. In this work, we explicitly model label classes via label embeddings, and add mechanisms that track label-label correlations both during training and inference. We also introduce a new semi-supervision strategy that regularizes for the correlations on unlabeled data. Our empirical evaluations show that modeling label semantics yields consistent benefits, and we advance the state-of-the-art on an emotion inference task. 2020.acl-main.426 @@ -5813,7 +5813,7 @@ <fixed-case>I</fixed-case>nterpreting <fixed-case>P</fixed-case>retrained <fixed-case>C</fixed-case>ontextualized <fixed-case>R</fixed-case>epresentations via <fixed-case>R</fixed-case>eductions to <fixed-case>S</fixed-case>tatic <fixed-case>E</fixed-case>mbeddings RishiBommasani KellyDavis - ClaireCardie + ClaireCardie 4758–4781 Contextualized representations (e.g. ELMo, BERT) have become the default pretrained representations for downstream NLP applications. In some settings, this transition has rendered their static embedding predecessors (e.g. Word2Vec, GloVe) obsolete. As a side-effect, we observe that older interpretability methods for static embeddings — while more diverse and mature than those available for their dynamic counterparts — are underutilized in studying newer contextualized representations. Consequently, we introduce simple and fully general methods for converting from contextualized representations to static lookup-table embeddings which we apply to 5 popular pretrained models and 9 sets of pretrained weights. Our analysis of the resulting static embeddings notably reveals that pooling over many contexts significantly improves representational quality under intrinsic evaluation. Complementary to analyzing representational quality, we consider social biases encoded in pretrained representations with respect to gender, race/ethnicity, and religion and find that bias is encoded disparately across pretrained models and internal layers even for models with the same training data. Concerningly, we find dramatic inconsistencies between social bias estimators for word embeddings. 2020.acl-main.431 @@ -5828,7 +5828,7 @@ MansiGupta BhuwanDhingra GrahamNeubig - Zachary C.Lipton + Zachary C.Lipton 4782–4793 Attention mechanisms are ubiquitous components in neural architectures applied to natural language processing. In addition to yielding gains in predictive accuracy, attention weights are often claimed to confer interpretability, purportedly useful both for providing insights to practitioners and for explaining why a model makes its decisions to stakeholders. We call the latter use of attention mechanisms into question by demonstrating a simple method for training models to produce deceptive attention masks. Our method diminishes the total weight assigned to designated impermissible tokens, even when the models can be shown to nevertheless rely on these features to drive predictions. Across multiple models and tasks, our approach manipulates attention weights while paying surprisingly little cost in accuracy. Through a human study, we show that our manipulated attention-based explanations deceive people into thinking that predictions from a model biased against gender minorities do not rely on the gender. Consequently, our results cast doubt on attention’s reliability as a tool for auditing algorithms in the context of fairness and accountability. 2020.acl-main.432 @@ -5875,7 +5875,7 @@ Shaping Visual Representations with Language for Few-Shot Classification JesseMu PercyLiang - NoahGoodman + NoahGoodman 4823–4830 By describing the features and abstractions of our world, language is a crucial tool for human learning and a promising source of supervision for machine learning models. We use language to improve few-shot visual classification in the underexplored scenario where natural language task descriptions are available during training, but unavailable for novel tasks at test time. Existing models for this setting sample new descriptions at test time and use those to classify images. Instead, we propose language-shaped learning (LSL), an end-to-end model that regularizes visual representations to predict language. LSL is conceptually simpler, more data efficient, and outperforms baselines in two challenging few-shot domains. 2020.acl-main.436 @@ -5929,7 +5929,7 @@ ElnazNouri ChrisBrockett DebadeeptaDey - BillDolan + BillDolan 4871–4884 Many high-level procedural tasks can be decomposed into sequences of instructions that vary in their order and choice of tools. In the cooking domain, the web offers many, partially-overlapping, text and video recipes (i.e. procedures) that describe how to make the same dish (i.e. high-level task). Aligning instructions for the same dish across different sources can yield descriptive visual explanations that are far richer semantically than conventional textual instructions, providing commonsense insight into how real-world procedures are structured. Learning to align these different instruction sets is challenging because: a) different recipes vary in their order of instructions and use of ingredients; and b) video instructions can be noisy and tend to contain far more information than text instructions. To address these challenges, we use an unsupervised alignment algorithm that learns pairwise alignments between instructions of different recipes for the same dish. We then use a graph algorithm to derive a joint alignment between multiple text and multiple video recipes for the same dish. We release the Microsoft Research Multimodal Aligned Recipe Corpus containing ~150K pairwise alignments between recipes across 4262 dishes with rich commonsense information. 2020.acl-main.440 @@ -5954,7 +5954,7 @@ Beyond Accuracy: Behavioral Testing of <fixed-case>NLP</fixed-case> Models with <fixed-case>C</fixed-case>heck<fixed-case>L</fixed-case>ist - Marco TulioRibeiro + Marco TulioRibeiro TongshuangWu CarlosGuestrin SameerSingh @@ -5984,7 +5984,7 @@ Dialogue-Based Relation Extraction DianYu KaiSun - ClaireCardie + ClaireCardie DongYu 4927–4940 We present the first human-annotated dialogue-based relation extraction (RE) dataset DialogRE, aiming to support the prediction of relation(s) between two arguments that appear in a dialogue. We further offer DialogRE as a platform for studying cross-sentence RE as most facts span multiple sentences. We argue that speaker-related information plays a critical role in the proposed task, based on an analysis of similarities and differences between dialogue-based and traditional RE tasks. Considering the timeliness of communication in a dialogue, we design a new metric to evaluate the performance of RE methods in a conversational setting and investigate the performance of several representative RE methods on DialogRE. Experimental results demonstrate that a speaker-aware extension on the best-performing model leads to gains in both the standard and conversational evaluation settings. DialogRE is available at https://dataset.org/dialogre/. @@ -6011,7 +6011,7 @@ More Diverse Dialogue Datasets via Diversity-Informed Data Collection KatherineStasaski Grace HuiYang - Marti A.Hearst + Marti A.Hearst 4958–4968 Automated generation of conversational dialogue using modern neural architectures has made notable advances. However, these models are known to have a drawback of often producing uninteresting, predictable responses; this is known as the diversity problem. We introduce a new strategy to address this problem, called Diversity-Informed Data Collection. Unlike prior approaches, which modify model architectures to solve the problem, this method uses dynamically computed corpus-level statistics to determine which conversational participants to collect data from. Diversity-Informed Data Collection produces significantly more diverse data than baseline data collection methods, and better results on two downstream tasks: emotion classification and dialogue generation. This method is generalizable and can be used with other corpus-level metrics. 2020.acl-main.446 @@ -6022,10 +6022,10 @@ <fixed-case>S</fixed-case>2<fixed-case>ORC</fixed-case>: The Semantic Scholar Open Research Corpus KyleLo - Lucy LuWang + Lucy LuWang MarkNeumann RodneyKinney - DanielWeld + DanielWeld 4969–4983 We introduce S2ORC, a large corpus of 81.1M English-language academic papers spanning many academic disciplines. The corpus consists of rich metadata, paper abstracts, resolved bibliographic references, as well as structured full text for 8.1M open access papers. Full text is annotated with automatically-detected inline mentions of citations, figures, and tables, each linked to their corresponding paper objects. In S2ORC, we aggregate papers from hundreds of academic publishers and digital archives into a unified source, and create the largest publicly-available collection of machine-readable academic text to date. We hope this resource will facilitate research and development of tools and tasks for text mining over academic text. 2020.acl-main.447 @@ -6036,8 +6036,8 @@ Tangled up in <fixed-case>BLEU</fixed-case>: Reevaluating the Evaluation of Automatic Machine Translation Evaluation Metrics NitikaMathur - TimothyBaldwin - TrevorCohn + TimothyBaldwin + TrevorCohn 4984–4997 Automatic metrics are fundamental for the development and evaluation of machine translation systems. Judging whether, and to what extent, automatic metrics concur with the gold standard of human evaluation is not a straightforward problem. We show that current methods for judging metrics are highly sensitive to the translations used for assessment, particularly the presence of outliers, which often leads to falsely confident conclusions about a metric’s efficacy. Finally, we turn to pairwise system ranking, developing a method for thresholding performance improvement under an automatic metric against human judgements, which allows quantification of type I versus type II errors incurred, i.e., insignificant human differences in system quality that are accepted, and significant human differences that are rejected. Together, these findings suggest improvements to the protocols for metric evaluation and system performance evaluation in machine translation. Honorable Mention for Best Overall Paper @@ -6048,7 +6048,7 @@ A Transformer-based Approach for Source Code Summarization - WasiAhmad + WasiAhmad SaikatChakraborty BaishakhiRay Kai-WeiChang @@ -6103,8 +6103,8 @@ Exploring Content Selection in Summarization of Novel Chapters FaisalLadhak BryanLi - YaserAl-Onaizan - KathleenMcKeown + YaserAl-Onaizan + KathleenMcKeown 5043–5054 We present a new summarization task, generating summaries of novel chapters using summary/chapter pairs from online study guides. This is a harder task than the news summarization task, given the chapter length as well as the extreme paraphrasing and generalization found in the summaries. We focus on extractive summarization, which requires the creation of a gold-standard set of extractive summaries. We present a new metric for aligning reference summary sentences with chapter sentences to create gold extracts and also experiment with different alignment methods. Our experiments demonstrate significant improvement over prior alignment approaches for our task as shown through automatic metrics and a crowd-sourced pyramid analysis. 2020.acl-main.453 @@ -6116,7 +6116,7 @@ <fixed-case>FEQA</fixed-case>: A Question Answering Evaluation Framework for Faithfulness Assessment in Abstractive Summarization EsinDurmus HeHe - MonaDiab + MonaDiab 5055–5070 Neural abstractive summarization models are prone to generate content inconsistent with the source document, i.e. unfaithful. Existing automatic metrics do not capture such mistakes effectively. We tackle the problem of evaluating faithfulness of a generated summary given its source document. We first collected human annotations of faithfulness for outputs from numerous models on two datasets. We find that current models exhibit a trade-off between abstractiveness and faithfulness: outputs with less word overlap with the source document are more likely to be unfaithful. Next, we propose an automatic question answering (QA) based metric for faithfulness, FEQA, which leverages recent advances in reading comprehension. Given question-answer pairs generated from the summary, a QA model extracts answers from the document; non-matched answers indicate unfaithful information in the summary. Among metrics based on word overlap, embedding similarity, and learned language understanding models, our QA-based metric has significantly higher correlation with human faithfulness scores, especially on highly abstractive summaries. 2020.acl-main.454 @@ -6169,7 +6169,7 @@ YuhaoZhang DerekMerck EmilyTsai - Christopher D.Manning + Christopher D.Manning CurtisLanglotz 5108–5120 Neural abstractive summarization models are able to generate summaries which have high overlap with human references. However, existing models are not optimized for factual correctness, a critical metric in real-world applications. In this work, we develop a general framework where we evaluate the factual correctness of a generated summary by fact-checking it automatically against its reference using an information extraction module. We further propose a training strategy which optimizes a neural summarization model with a factual correctness reward via reinforcement learning. We apply the proposed method to the summarization of radiology reports, where factual correctness is a key requirement. On two separate datasets collected from hospitals, we show via both automatic and human evaluation that the proposed approach substantially improves the factual correctness and overall quality of outputs over a competitive neural summarization system, producing radiology summaries that approach the quality of human-authored ones. @@ -6195,7 +6195,7 @@ PhilippeLaban AndrewHsi JohnCanny - Marti A.Hearst + Marti A.Hearst 5135–5150 This work presents a new approach to unsupervised abstractive summarization based on maximizing a combination of coverage and fluency for a given length constraint. It introduces a novel method that encourages the inclusion of key terms from the original document into the summary: key terms are masked out of the original document and must be filled in by a coverage model using the current generated summary. A novel unsupervised training procedure leverages this coverage model along with a fluency model to generate and score summaries. When tested on popular news summarization datasets, the method outperforms previous unsupervised methods by more than 2 R-1 points, and approaches results of competitive supervised methods. Our model attains higher levels of abstraction with copied passages roughly two times shorter than prior work, and learns to compress and merge sentences without supervision. 2020.acl-main.460 @@ -6218,7 +6218,7 @@ (<fixed-case>R</fixed-case>e)construing Meaning in <fixed-case>NLP</fixed-case> SeanTrott - Tiago TimponiTorrent + Tiago TimponiTorrent NancyChang NathanSchneider 5170–5184 @@ -6230,7 +6230,7 @@ Climbing towards <fixed-case>NLU</fixed-case>: <fixed-case>On</fixed-case> Meaning, Form, and Understanding in the Age of Data - Emily M.Bender + Emily M.Bender AlexanderKoller 5185–5198 The success of the large neural language models on many NLP tasks is exciting. However, we find that these successes sometimes lead to hype in which these models are being described as “understanding” language or capturing “meaning”. In this position paper, we argue that a system trained only on form has a priori no way to learn meaning. In keeping with the ACL 2020 theme of “Taking Stock of Where We’ve Been and Where We’re Going”, we argue that a clear understanding of the distinction between form and meaning will help guide the field towards better science around natural language understanding. @@ -6242,7 +6242,7 @@ Examining Citations of Natural Language Processing Literature - Saif M.Mohammad + Saif M.Mohammad 5199–5209 We extracted information from the ACL Anthology (AA) and Google Scholar (GS) to examine trends in citations of NLP papers. We explore questions such as: how well cited are papers of different types (journal articles, conference papers, demo papers, etc.)? how well cited are papers from different areas of within NLP? etc. Notably, we show that only about 56% of the papers in AA are cited ten or more times. CL Journal has the most cited papers, but its citation dominance has lessened in recent years. On average, long papers get almost three times as many citations as short papers; and papers on sentiment classification, anaphora resolution, and entity recognition have the highest median citations. The analyses presented here, and the associated dataset of NLP papers mapped to citations, have a number of uses including: understanding how the field is growing and quantifying the impact of different types of papers. 2020.acl-main.464 @@ -6285,8 +6285,8 @@ XiaoyiZhang Richard YuanzhePang ClaraVania - KatharinaKann - Samuel R.Bowman + KatharinaKann + Samuel R.Bowman 5231–5247 While pretrained models such as BERT have shown large gains across natural language understanding tasks, their performance can be improved by further training the model on a data-rich intermediate task, before fine-tuning it on a target task. However, it is still poorly understood when and why intermediate-task training is beneficial for a given target task. To investigate this, we perform a large-scale study on the pretrained RoBERTa model with 110 intermediate-target task combinations. We further evaluate all trained models with 25 probing tasks meant to reveal the specific skills that drive transfer. We observe that intermediate tasks requiring high-level inference and reasoning abilities tend to work best. We also observe that target task performance is strongly correlated with higher-level abilities such as coreference resolution. However, we fail to observe more granular correlations between probing and target task performance, highlighting the need for further work on broad-coverage probing benchmarks. We also observe evidence that the forgetting of knowledge learned during pretraining may limit our analysis, highlighting the need for further work on transfer learning methods in these settings. 2020.acl-main.467 @@ -6297,7 +6297,7 @@ Predictive Biases in Natural Language Processing Models: A Conceptual Framework and Overview Deven SantoshShah - H. AndrewSchwartz + H. AndrewSchwartz DirkHovy 5248–5264 An increasing number of natural language processing papers address the effect of bias on predictions, introducing mitigation techniques at different parts of the standard NLP pipeline (data and models). However, these works have been conducted individually, without a unifying framework to organize efforts within the field. This situation leads to repetitive approaches, and focuses overly on bias symptoms/effects, rather than on their origins, which could limit the development of effective countermeasures. In this paper, we propose a unifying predictive bias framework for NLP. We summarize the NLP literature and suggest general mathematical definitions of predictive bias. We differentiate two consequences of bias: outcome disparities and error disparities, as well as four potential origins of biases: label bias, selection bias, model overamplification, and semantic bias. Our framework serves as an overview of predictive bias in NLP, integrating existing work into a single structure, and providing a conceptual baseline for improved frameworks. @@ -6349,7 +6349,7 @@ Hierarchical Modeling for User Personality Prediction: The Role of Message-Level Attention VeronicaLynn NiranjanBalasubramanian - H. AndrewSchwartz + H. AndrewSchwartz 5306–5316 Not all documents are equally important. Language processing is increasingly finding use as a supplement for questionnaires to assess psychological attributes of consenting individuals, but most approaches neglect to consider whether all documents of an individual are equally informative. In this paper, we present a novel model that uses message-level attention to learn the relative weight of users’ social media posts for assessing their five factor personality traits. We demonstrate that models with message-level attention outperform those with word-level attention, and ultimately yield state-of-the-art accuracies for all five traits by using both word and message attention in combination with past approaches (an average increase in Pearson r of 2.5%). In addition, examination of the high-signal posts identified by our model provides insight into the relationship between language and personality, helping to inform future work. 2020.acl-main.472 @@ -6362,7 +6362,7 @@ Measuring Forecasting Skill from Text ShiZong AlanRitter - EduardHovy + EduardHovy 5317–5331 People vary in their ability to make accurate predictions about the future. Prior studies have shown that some individuals can predict the outcome of future events with consistently better accuracy. This leads to a natural question: what makes some forecasters better than others? In this paper we explore connections between the language people use to describe their predictions and their forecasting skill. Datasets from two different forecasting domains are explored: (1) geopolitical forecasts from Good Judgment Open, an online prediction forum and (2) a corpus of company earnings forecasts made by financial analysts. We present a number of linguistic metrics which are computed over text associated with people’s predictions about the future including: uncertainty, readability, and emotion. By studying linguistic factors associated with predictions, we are able to shed some light on the approach taken by skilled forecasters. Furthermore, we demonstrate that it is possible to accurately predict forecasting skill using a model that is based solely on language. This could potentially be useful for identifying accurate predictions or potentially skilled forecasters earlier. 2020.acl-main.473 @@ -6386,7 +6386,7 @@ Text-Based Ideal Points KeyonVafa SureshNaidu - DavidBlei + DavidBlei 5345–5357 Ideal point models analyze lawmakers’ votes to quantify their political positions, or ideal points. But votes are not the only way to express a political position. Lawmakers also give speeches, release press statements, and post tweets. In this paper, we introduce the text-based ideal point model (TBIP), an unsupervised probabilistic topic model that analyzes texts to quantify the political positions of its authors. We demonstrate the TBIP with two types of politicized text data: U.S. Senate speeches and senator tweets. Though the model does not analyze their votes or political affiliations, the TBIP separates lawmakers by party, learns interpretable politicized topics, and infers ideal points close to the classical vote-based ideal points. One benefit of analyzing texts, as opposed to votes, is that the TBIP can estimate ideal points of anyone who authors political texts, including non-voting actors. To this end, we use it to study tweets from the 2020 Democratic presidential candidates. Using only the texts of their tweets, it identifies them along an interpretable progressive-to-moderate spectrum. 2020.acl-main.475 @@ -6412,9 +6412,9 @@ DonovanOng JieFu AlvinChan - NancyChen - Anh TuanLuu - ChrisPal + NancyChen + Anh TuanLuu + ChrisPal 5369–5373 Understanding human preferences, along with cultural and social nuances, lives at the heart of natural language understanding. Concretely, we present a new task and corpus for learning alignments between machine and human preferences. Our newly introduced problem is concerned with predicting the preferable options from two sentences describing scenarios that may involve social and cultural situations. Our problem is framed as a natural language inference task with crowd-sourced preference votes by human players, obtained from a gamified voting platform. We benchmark several state-of-the-art neural models, along with BERT and friends on this task. Our experimental results show that current state-of-the-art NLP models still leave much room for improvement. 2020.acl-main.477 @@ -6426,7 +6426,7 @@ Discourse as a Function of Event: Profiling Discourse Structure in News Articles around the Main Event - Prafulla KumarChoubey + Prafulla KumarChoubey AaronLee RuihongHuang LuWang @@ -6508,7 +6508,7 @@ Double-Hard Debias: Tailoring Word Embeddings for Gender Bias Mitigation TianluWang - Xi VictoriaLin + Xi VictoriaLin Nazneen FatemaRajani BryanMcCann VicenteOrdonez @@ -6524,8 +6524,8 @@ Language (Technology) is Power: A Critical Survey of “Bias” in <fixed-case>NLP</fixed-case> Su LinBlodgett SolonBarocas - HalDaumé III - HannaWallach + HalDaumé III + HannaWallach 5454–5476 We survey 146 papers analyzing “bias” in NLP systems, finding that their motivations are often vague, inconsistent, and lacking in normative reasoning, despite the fact that analyzing “bias” is an inherently normative process. We further find that these papers’ proposed quantitative techniques for measuring or mitigating “bias” are poorly matched to their motivations and do not engage with the relevant literature outside of NLP. Based on these findings, we describe the beginnings of a path forward by proposing three recommendations that should guide work analyzing “bias” in NLP systems. These recommendations rest on a greater recognition of the relationships between language and social hierarchies, encouraging researchers and practitioners to articulate their conceptualizations of “bias”—i.e., what kinds of system behaviors are harmful, in what ways, to whom, and why, as well as the normative reasoning underlying these statements—and to center work around the lived experiences of members of communities affected by NLP systems, while interrogating and reimagining the power relations between technologists and such communities. 2020.acl-main.485 @@ -6539,7 +6539,7 @@ SaadiaGabriel LianhuiQin DanJurafsky - Noah A.Smith + Noah A.Smith YejinChoi 5477–5490 Warning: this paper contains content that may be offensive or upsetting. Language has the power to reinforce stereotypes and project social biases onto others. At the core of the challenge is that it is rarely what is stated explicitly, but rather the implied meanings, that frame people’s judgments about others. For example, given a statement that “we shouldn’t lower our standards to hire more women,” most listeners will infer the implicature intended by the speaker - that “women (candidates) are less qualified.” Most semantic formalisms, to date, do not capture such pragmatic implications in which people express social biases and power differentials in language. We introduce Social Bias Frames, a new conceptual formalism that aims to model the pragmatic frames in which people project social biases and stereotypes onto others. In addition, we introduce the Social Bias Inference Corpus to support large-scale modelling and evaluation with 150k structured annotations of social media posts, covering over 34k implications about a thousand demographic groups. We then establish baseline approaches that learn to recover Social Bias Frames from unstructured text. We find that while state-of-the-art neural models are effective at high-level categorization of whether a given statement projects unwanted social bias (80% F1), they are not effective at spelling out more detailed explanations in terms of Social Bias Frames. Our study motivates future work that combines structured pragmatic inference with commonsense reasoning on social implications. @@ -6584,7 +6584,7 @@ ZhiqingSun ShikharVashishth SoumyaSanyal - ParthaTalukdar + ParthaTalukdar YimingYang 5516–5522 Knowledge Graph Completion (KGC) aims at automatically predicting missing links for large-scale knowledge graphs. A vast number of state-of-the-art KGC techniques have got published at top conferences in several research fields, including data mining, machine learning, and natural language processing. However, we notice that several recent papers report very high performance, which largely outperforms previous state-of-the-art methods. In this paper, we find that this can be attributed to the inappropriate evaluation protocol used by them and propose a simple evaluation protocol to address this problem. The proposed protocol is robust to handle bias in the model, which can substantially affect the final results. We conduct extensive experiments and report performance of several existing methods using our protocol. The reproducible code has been made publicly available. @@ -6621,7 +6621,7 @@ Explaining Black Box Predictions and Unveiling Data Artifacts through Influence Functions XiaochuangHan - Byron C.Wallace + Byron C.Wallace YuliaTsvetkov 5553–5563 Modern deep learning models for NLP are notoriously opaque. This has motivated the development of methods for interpreting such models, e.g., via gradient-based saliency maps or the visualization of attention weights. Such approaches aim to provide explanations for a particular model prediction by highlighting important words in the corresponding input text. While this might be useful for tasks where decisions are explicitly influenced by individual tokens in the input, we suspect that such highlighting is not suitable for tasks where model decisions should be driven by more complex reasoning. In this work, we investigate the use of influence functions for NLP, providing an alternative approach to interpreting neural text classifiers. Influence functions explain the decisions of a model by identifying influential training examples. Despite the promise of this approach, influence functions have not yet been extensively evaluated in the context of NLP, a gap addressed by this work. We conduct a comparison between influence functions and common word-saliency methods on representative tasks. As suspected, we find that influence functions are particularly useful for natural language inference, a task in which ‘saliency maps’ may not have clear interpretation. Furthermore, we develop a new quantitative measure based on influence functions that can reveal artifacts in training data. @@ -6634,7 +6634,7 @@ Finding Universal Grammatical Relations in Multilingual <fixed-case>BERT</fixed-case> Ethan A.Chi JohnHewitt - Christopher D.Manning + Christopher D.Manning 5564–5577 Recent work has found evidence that Multilingual BERT (mBERT), a transformer-based multilingual masked language model, is capable of zero-shot cross-lingual transfer, suggesting that some aspects of its representations are shared cross-lingually. To better understand this overlap, we extend recent work on finding syntactic trees in neural networks’ internal representations to the multilingual setting. We show that subspaces of mBERT representations recover syntactic tree distances in languages other than English, and that these subspaces are approximately shared across languages. Motivated by these results, we present an unsupervised analysis method that provides evidence mBERT learns representations of syntactic dependency labels, in the form of clusters which largely agree with the Universal Dependencies taxonomy. This evidence suggests that even without explicit supervision, multilingual masked language models learn certain linguistic universals. 2020.acl-main.493 @@ -6723,9 +6723,9 @@ On the Importance of Diversity in Question Generation for <fixed-case>QA</fixed-case> - Md ArafatSultan + Md ArafatSultan ShubhamChandel - RamónFernandez Astudillo + RamónFernandez Astudillo VittorioCastelli 5651–5656 Automatic question generation (QG) has shown promise as a source of synthetic training data for question answering (QA). In this paper we ask: Is textual diversity in QG beneficial for downstream QA? Using top-p nucleus sampling to derive samples from a transformer-based question generator, we show that diversity-promoting QG indeed provides better QA training than likelihood maximization approaches such as beam search. We also show that standard QG evaluation metrics such as BLEU, ROUGE and METEOR are inversely correlated with diversity, and propose a diversity-aware intrinsic measure of overall QG quality that correlates well with extrinsic evaluation on QA. @@ -6751,7 +6751,7 @@ <fixed-case>SCDE</fixed-case>: Sentence Cloze Dataset with High Quality Distractors From Examinations XiangKong VarunGangal - EduardHovy + EduardHovy 5668–5683 We introduce SCDE, a dataset to evaluate the performance of computational models through sentence prediction. SCDE is a human created sentence cloze dataset, collected from public school English examinations. Our task requires a model to fill up multiple blanks in a passage from a shared candidate set with distractors designed by English teachers. Experimental results demonstrate that this task requires the use of non-local, discourse-level context beyond the immediate sentence neighborhood. The blanks require joint solving and significantly impair each other’s context. Furthermore, through ablations, we show that the distractors are of high quality and make the task more challenging. Our experiments show that there is a significant performance gap between advanced models (72%) and humans (87%), encouraging future models to bridge this gap. 2020.acl-main.502 @@ -6785,7 +6785,7 @@ Transformers to Learn Hierarchical Contexts in Multiparty Dialogue for Span-based Question Answering ChangmaoLi - Jinho D.Choi + Jinho D.Choi 5709–5714 We introduce a novel approach to transformers that learns hierarchical representations in multiparty dialogue. First, three language modeling tasks are used to pre-train the transformers, token- and utterance-level language modeling and utterance order prediction, that learn both token and utterance embeddings for better understanding in dialogue contexts. Then, multi-task learning between the utterance prediction and the token span prediction is applied to fine-tune for span-based question answering (QA). Our approach is evaluated on the FriendsQA dataset and shows improvements of 3.8% and 1.4% over the two state-of-the-art transformer models, BERT and RoBERTa, respectively. 2020.acl-main.505 @@ -6811,7 +6811,7 @@ <fixed-case>STARC</fixed-case>: Structured Annotations for Reading Comprehension YevgeniBerzak JonathanMalmaud - RogerLevy + RogerLevy 5726–5735 We present STARC (Structured Annotations for Reading Comprehension), a new annotation framework for assessing reading comprehension with multiple choice questions. Our framework introduces a principled structure for the answer choices and ties them to textual span annotations. The framework is implemented in OneStopQA, a new high-quality dataset for evaluation and analysis of reading comprehension in English. We use this dataset to demonstrate that STARC can be leveraged for a key new application for the development of SAT-like reading comprehension materials: automatic annotation quality probing via span ablation experiments. We further show that it enables in-depth analyses and comparisons between machine and human reading comprehension behavior, including error distributions and guessing ability. Our experiments also reveal that the standard multiple choice dataset in NLP, RACE, is limited in its ability to measure reading comprehension. 47% of its questions can be guessed by machines without accessing the passage, and 18% are unanimously judged by humans as not having a unique correct answer. OneStopQA provides an alternative test set for reading comprehension which alleviates these shortcomings and has a substantially higher human ceiling performance. 2020.acl-main.507 @@ -6882,7 +6882,7 @@ <fixed-case>O</fixed-case>pinion<fixed-case>D</fixed-case>igest: A Simple Framework for Opinion Summarization - YoshihikoSuhara + YoshihikoSuhara XiaolanWang StefanosAngelidis Wang-ChiewTan @@ -6924,7 +6924,7 @@ Generate, Delete and Rewrite: A Three-Stage Framework for Improving Persona Consistency of Dialogue Generation HaoyuSong YanWang - Wei-NanZhang + Wei-NanZhang XiaojiangLiu TingLiu 5821–5831 @@ -6976,10 +6976,10 @@ An Effective Transition-based Model for Discontinuous <fixed-case>NER</fixed-case> - XiangDai + XiangDai SarvnazKarimi BenHachey - CecileParis + CecileParis 5860–5870 Unlike widely used Named Entity Recognition (NER) data sets in generic domains, biomedical NER data sets often contain mentions consisting of discontinuous spans. Conventional sequence tagging techniques encode Markov assumptions that are efficient but preclude recovery of these mentions. We propose a simple, effective transition-based model with generic neural encoding for discontinuous NER. Through extensive experiments on three biomedical data sets, we show that our model can effectively recognize discontinuous mentions without sacrificing the accuracy on continuous mentions. 2020.acl-main.520 @@ -7059,7 +7059,7 @@ ZhiwenXie GuangyouZhou JinLiu - Jimmy XiangjiHuang + Jimmy XiangjiHuang 5929–5939 The goal of Knowledge graph embedding (KGE) is to learn how to represent the low dimensional vectors for entities and relations based on the observed triples. The conventional shallow models are limited to their expressiveness. ConvE (Dettmers et al., 2018) takes advantage of CNN and improves the expressive power with parameter efficient operators by increasing the interactions between head and relation embeddings. However, there is no structural information in the embedding space of ConvE, and the performance is still limited by the number of interactions. The recent KBGAT (Nathani et al., 2019) provides another way to learn embeddings by adaptively utilizing structural information. In this paper, we take the benefits of ConvE and KBGAT together and propose a Relation-aware Inception network with joint local-global structural information for knowledge graph Embedding (ReInceptionE). Specifically, we first explore the Inception network to learn query embedding, which aims to further increase the interactions between head and relation embeddings. Then, we propose to use a relation-aware attention mechanism to enrich the query embedding with the local neighborhood and global entity information. Experimental results on both WN18RR and FB15k-237 datasets demonstrate that ReInceptionE achieves competitive performance compared with state-of-the-art methods. 2020.acl-main.526 @@ -7086,7 +7086,7 @@ MinlongPeng QiZhang ZhongyuWei - XuanjingHuang + XuanjingHuang 5951–5960 Recently, many works have tried to augment the performance of Chinese named entity recognition (NER) using word lexicons. As a representative, Lattice-LSTM has achieved new benchmark results on several public Chinese NER datasets. However, Lattice-LSTM has a complex model architecture. This limits its application in many industrial areas where real-time NER responses are needed. In this work, we propose a simple but effective method for incorporating the word lexicon into the character representations. This method avoids designing a complicated sequence modeling architecture, and for any neural NER model, it requires only subtle adjustment of the character representation layer to introduce the lexicon information. Experimental studies on four benchmark Chinese NER datasets show that our method achieves an inference speed up to 6.15 times faster than those of state-of-the-art methods, along with a better performance. The experimental results also show that the proposed method can be easily incorporated with pre-trained models like BERT. 2020.acl-main.528 @@ -7113,7 +7113,7 @@ Contextual Neural Machine Translation Improves Translation of Cataphoric Pronouns KayYenWong SameenMaruf - GholamrezaHaffari + GholamrezaHaffari 5971–5978 The advent of context-aware NMT has resulted in promising improvements in the overall translation quality and specifically in the translation of discourse phenomena such as pronouns. Previous works have mainly focused on the use of past sentences as context with a focus on anaphora translation. In this work, we investigate the effect of future sentences as context by comparing the performance of a contextual NMT model trained with the future context to the one trained with the past context. Our experiments and evaluation, using generic and pronoun-focused automatic metrics, show that the use of future context not only achieves significant improvements over the context-agnostic Transformer, but also demonstrates comparable and in some cases improved performance over its counterpart trained on past context. We also perform an evaluation on a targeted cataphora test suite and report significant gains over the context-agnostic Transformer in terms of BLEU. 2020.acl-main.530 @@ -7138,7 +7138,7 @@ Tagged Back-translation Revisited: Why Does It Really Work? BenjaminMarie - RaphaelRubino + RaphaelRubino AtsushiFujita 5990–5997 In this paper, we show that neural machine translation (NMT) systems trained on large back-translated data overfit some of the characteristics of machine-translated texts. Such NMT systems better translate human-produced translations, i.e., translationese, but may largely worsen the translation quality of original texts. Our analysis reveals that adding a simple tag to back-translations prevents this quality degradation and improves on average the overall translation quality by helping the NMT system to distinguish back-translated data from original parallel data during training. We also show that, in contrast to high-resource configurations, NMT systems trained in low-resource settings are much less vulnerable to overfit back-translations. We conclude that the back-translations in the training data should always be tagged especially when the origin of the text to be translated is unknown. @@ -7192,7 +7192,7 @@ AlexisConneau ShijieWu HaoranLi - LukeZettlemoyer + LukeZettlemoyer VeselinStoyanov 6022–6034 We study the problem of multilingual masked language modeling, i.e. the training of a single model on concatenated text from multiple languages, and present a detailed study of several factors that influence why these models are so effective for cross-lingual transfer. We show, contrary to what was previously hypothesized, that transfer is possible even when there is no shared vocabulary across the monolingual corpora and also when the text comes from very different domains. The only requirement is that there are some shared parameters in the top layers of the multi-lingual encoder. To better understand this result, we also show that representations from monolingual BERT models in different languages can be aligned post-hoc quite effectively, strongly suggesting that, much like for non-contextual word embeddings, there are universal latent symmetries in the learned embedding spaces. For multilingual masked language modeling, these symmetries are automatically discovered and aligned during the joint training process. @@ -7218,7 +7218,7 @@ Incorporating External Knowledge through Pre-training for Natural Language to Code Generation - Frank F.Xu + Frank F.Xu ZhengbaoJiang PengchengYin BogdanVasilescu @@ -7341,7 +7341,7 @@ XiaolongJin JiafengGuo YuanzhuoWang - XueqiCheng + XueqiCheng 6141–6151 Knowledge inference on knowledge graph has attracted extensive attention, which aims to find out connotative valid facts in knowledge graph and is very helpful for improving the performance of many downstream applications. However, researchers have mainly poured attention to knowledge inference on binary facts. The studies on n-ary facts are relatively scarcer, although they are also ubiquitous in the real world. Therefore, this paper addresses knowledge inference on n-ary facts. We represent each n-ary fact as a primary triple coupled with a set of its auxiliary descriptive attribute-value pair(s). We further propose a neural network model, NeuInfer, for knowledge inference on n-ary facts. Besides handling the common task to infer an unknown element in a whole fact, NeuInfer can cope with a new type of task, flexible knowledge inference. It aims to infer an unknown element in a partial fact consisting of the primary triple coupled with any number of its auxiliary description(s). Experimental results demonstrate the remarkable superiority of NeuInfer. 2020.acl-main.546 @@ -7429,7 +7429,7 @@ YiranChen DanqingWang XipengQiu - XuanjingHuang + XuanjingHuang 6197–6208 This paper creates a paradigm shift with regard to the way we build neural extractive summarization systems. Instead of following the commonly used framework of extracting sentences individually and modeling the relationship between sentences, we formulate the extractive summarization task as a semantic text matching problem, in which a source document and candidate summaries will be (extracted from the original text) matched in a semantic space. Notably, this paradigm shift to semantic matching framework is well-grounded in our comprehensive analysis of the inherent gap between sentence-level and summary-level extractors based on the property of the dataset. Besides, even instantiating the framework with a simple form of a matching model, we have driven the state-of-the-art extractive result on CNN/DailyMail to a new level (44.41 in ROUGE-1). Experiments on the other five datasets also show the effectiveness of the matching framework. We believe the power of this matching-based summarization framework has not been fully exploited. To encourage more instantiations in the future, we have released our codes, processed dataset, as well as generated summaries in https://github.com/maszhongming/MatchSum. 2020.acl-main.552 @@ -7443,7 +7443,7 @@ PengfeiLiu YiningZheng XipengQiu - XuanjingHuang + XuanjingHuang 6209–6219 As a crucial step in extractive document summarization, learning cross-sentence relations has been explored by a plethora of approaches. An intuitive way is to put them in the graph-based neural network, which has a more complex structure for capturing inter-sentence relationships. In this paper, we present a heterogeneous graph-based neural network for extractive summarization (HETERSUMGRAPH), which contains semantic nodes of different granularity levels apart from sentences. These additional nodes act as the intermediary between sentences and enrich the cross-sentence relations. Besides, our graph structure is flexible in natural extension from a single-document setting to multi-document via introducing document nodes. To our knowledge, we are the first one to introduce different types of nodes into graph-based neural networks for extractive document summarization and perform a comprehensive qualitative analysis to investigate their benefits. The code will be released on Github. 2020.acl-main.553 @@ -7504,7 +7504,7 @@ Are we Estimating or Guesstimating Translation Quality? ShuoSun - FranciscoGuzmán + FranciscoGuzmán LuciaSpecia 6262–6267 Recent advances in pre-trained multilingual language models lead to state-of-the-art results on the task of quality estimation (QE) for machine translation. A carefully engineered ensemble of such models won the QE shared task at WMT19. Our in-depth analysis, however, shows that the success of using pre-trained language models for QE is over-estimated due to three issues we observed in current QE datasets: (i) The distributions of quality scores are imbalanced and skewed towards good quality scores; (iii) QE models can perform well on these datasets while looking at only source or translated sentences; (iii) They contain statistical artifacts that correlate well with human-annotated QE labels. Our findings suggest that although QE models might capture fluency of translated sentences and complexity of source sentences, they cannot model adequacy of translations effectively. @@ -7519,7 +7519,7 @@ RonenTamari ChenShani TomHope - Miriam R LPetruck + Miriam R LPetruck OmriAbend DafnaShahaf 6268–6281 @@ -7545,7 +7545,7 @@ The Unstoppable Rise of Computational Linguistics in Deep Learning - JamesHenderson + JamesHenderson 6294–6306 In this paper, we trace the history of neural networks applied to natural language understanding tasks, and identify key contributions which the nature of language has made to the development of neural network architectures. We focus on the importance of variable binding and its instantiation in attention-based models, and argue that Transformer is not a sequence model but an induced-structure model. This perspective leads to predictions of the challenges facing research in deep learning architectures for natural language understanding. 2020.acl-main.561 @@ -7616,7 +7616,7 @@ Learning Efficient Dialogue Policy from Demonstrations through Shaping HuiminWang BaolinPeng - Kam-FaiWong + Kam-FaiWong 6355–6365 Training a task-oriented dialogue agent with reinforcement learning is prohibitively expensive since it requires a large volume of interactions with users. Human demonstrations can be used to accelerate learning progress. However, how to effectively leverage demonstrations to learn dialogue policy remains less explored. In this paper, we present Sˆ2Agent that efficiently learns dialogue policy from demonstrations through policy shaping and reward shaping. We use an imitation model to distill knowledge from demonstrations, based on which policy shaping estimates feedback on how the agent should act in policy space. Reward shaping is then incorporated to bonus state-actions similar to demonstrations explicitly in value space encouraging better exploration. The effectiveness of the proposed Sˆ2Agentt is demonstrated in three dialogue domains and a challenging domain adaptation task with both user simulator evaluation and human evaluation. 2020.acl-main.566 @@ -7641,7 +7641,7 @@ Speaker Sensitive Response Evaluation Model JinYeongBak - AliceOh + AliceOh 6376–6385 Automatic evaluation of open-domain dialogue response generation is very challenging because there are many appropriate responses for a given context. Existing evaluation models merely compare the generated response with the ground truth response and rate many of the appropriate responses as inappropriate if they deviate from the ground truth. One approach to resolve this problem is to consider the similarity of the generated response with the conversational context. In this paper, we propose an automatic evaluation model based on that idea and learn the model parameters from an unlabeled conversation corpus. Our approach considers the speakers in defining the different levels of similar context. We use a Twitter conversation corpus that contains many speakers and conversations to test our evaluation model. Experiments show that our model outperforms the other existing evaluation metrics in terms of high correlation with human annotation scores. We also show that our model trained on Twitter can be applied to movie dialogues without any additional training. We provide our code and the learned parameters so that they can be used for automatic evaluation of dialogue response generation models. 2020.acl-main.568 @@ -7655,13 +7655,13 @@ YuqingXing FangKong PeifengLi - GuodongZhou + GuodongZhou 6386–6395 Due to its great importance in deep natural language understanding and various down-stream applications, text-level parsing of discourse rhetorical structure (DRS) has been drawing more and more attention in recent years. However, all the previous studies on text-level discourse parsing adopt bottom-up approaches, which much limit the DRS determination on local information and fail to well benefit from global information of the overall discourse. In this paper, we justify from both computational and perceptive points-of-view that the top-down architecture is more suitable for text-level DRS parsing. On the basis, we propose a top-down neural architecture toward text-level DRS parsing. In particular, we cast discourse parsing as a recursive split point ranking task, where a split point is classified to different levels according to its rank and the elementary discourse units (EDUs) associated with it are arranged accordingly. In this way, we can determine the complete DRS as a hierarchical tree structure via an encoder-decoder with an internal stack. Experimentation on both the English RST-DT corpus and the Chinese CDTB corpus shows the great effectiveness of our proposed top-down approach towards text-level DRS parsing. 2020.acl-main.569 2020.acl-main.569.Software.zip - 10.18653/v1/2020.acl-main.569 2020.acl-main.569.Dataset.pdf + 10.18653/v1/2020.acl-main.569 @@ -7768,7 +7768,7 @@ Named Entity Recognition as Dependency Parsing JuntaoYu BerndBohnet - MassimoPoesio + MassimoPoesio 6470–6476 Named Entity Recognition (NER) is a fundamental task in Natural Language Processing, concerned with identifying spans of text expressing references to entities. NER research is often focused on flat entities only (flat NER), ignoring the fact that entity references can be nested, as in [Bank of [China]] (Finkel and Manning, 2009). In this paper, we use ideas from graph-based dependency parsing to provide our model a global view on the input via a biaffine model (Dozat and Manning, 2017). The biaffine model scores pairs of start and end tokens in a sentence which we use to explore all spans, so that the model is able to predict named entities accurately. We show that the model works well for both nested and flat NER through evaluation on 8 corpora and achieving SoTA performance on all of them, with accuracy gains of up to 2.2 percentage points. 2020.acl-main.577 @@ -7822,7 +7822,7 @@ Single-/Multi-Source Cross-Lingual <fixed-case>NER</fixed-case> via Teacher-Student Learning on Unlabeled Data in Target Language QianhuiWu ZijiaLin - Börje F.Karlsson + Börje F.Karlsson Jian-GuangLou BiqingHuang 6505–6514 @@ -7837,7 +7837,7 @@ ShaoweiChen JieLiu YuWang - WenzhengZhang + WenzhengZhang ZimingChi 6515–6524 Opinion entity extraction is a fundamental task in fine-grained opinion mining. Related studies generally extract aspects and/or opinion expressions without recognizing the relations between them. However, the relations are crucial for downstream tasks, including sentiment classification, opinion summarization, etc. In this paper, we explore Aspect-Opinion Pair Extraction (AOPE) task, which aims at extracting aspects and opinion expressions in pairs. To deal with this task, we propose Synchronous Double-channel Recurrent Network (SDRN) mainly consisting of an opinion entity extraction unit, a relation detection unit, and a synchronization unit. The opinion entity extraction unit and the relation detection unit are developed as two channels to extract opinion entities and relations simultaneously. Furthermore, within the synchronization unit, we design Entity Synchronization Mechanism (ESM) and Relation Synchronization Mechanism (RSM) to enhance the mutual benefit on the above two channels. To verify the performance of SDRN, we manually build three datasets based on SemEval 2014 and 2015 benchmarks. Extensive experiments demonstrate that SDRN achieves state-of-the-art performances. @@ -7864,8 +7864,8 @@ <fixed-case>K</fixed-case>nowledge Supports Visual Language Grounding: <fixed-case>A</fixed-case> Case Study on Colour Terms - SimeonSchüz - SinaZarrieß + SimeonSchüz + SinaZarrieß 6536–6542 In human cognition, world knowledge supports the perception of object colours: knowing that trees are typically green helps to perceive their colour in certain contexts. We go beyond previous studies on colour terms using isolated colour swatches and study visual grounding of colour terms in realistic objects. Our models integrate processing of visual information and object-specific knowledge via hard-coded (late) or learned (early) fusion. We find that both models consistently outperform a bottom-up baseline that predicts colour terms solely from visual inputs, but show interesting differences when predicting atypical colours of so-called colour diagnostic objects. Our models also achieve promising results when tested on new object categories not seen during training. 2020.acl-main.584 @@ -7890,8 +7890,8 @@ Words Aren’t Enough, Their Order Matters: On the Robustness of Grounding Visual Referring Expressions ArjunAkula SpandanaGella - YaserAl-Onaizan - Song-ChunZhu + YaserAl-Onaizan + Song-ChunZhu SivaReddy 6555–6565 Visual referring expression recognition is a challenging task that requires natural language understanding in the context of an image. We critically examine RefCOCOg, a standard benchmark for this task, using a human study and show that 83.7% of test instances do not require reasoning on linguistic structure, i.e., words are enough to identify the target object, the word order doesn’t matter. To measure the true progress of existing models, we split the test set into two sets, one which requires reasoning on linguistic structure and the other which doesn’t. Additionally, we create an out-of-distribution dataset Ref-Adv by asking crowdworkers to perturb in-domain examples such that the target object changes. Using these datasets, we empirically show that existing methods fail to exploit linguistic structure and are 12% to 23% lower in performance than the established progress for this task. We also propose two methods, one based on contrastive learning and the other based on multi-task learning, to increase the robustness of ViLBERT, the current state-of-the-art model for this task. Our datasets are publicly available at https://github.com/aws/aws-refcocog-adv. @@ -7905,7 +7905,7 @@ HaoPeng RoySchwartz DianqiLi - Noah A.Smith + Noah A.Smith 6566–6577 Multi-head attentive neural architectures have achieved state-of-the-art results on a variety of natural language processing tasks. Evidence has shown that they are overparameterized; attention heads can be pruned without significant performance loss. In this work, we instead “reallocate” them—the model learns to activate different heads on different inputs. Drawing connections between multi-head attention and mixture of experts, we propose the mixture of attentive experts model (MAE). MAE is trained using a block coordinate descent algorithm that alternates between updating (1) the responsibilities of the experts and (2) their parameters. Experiments on machine translation and language modeling show that MAE outperforms strong baselines on both tasks. Particularly, on the WMT14 English to German translation dataset, MAE improves over “transformer-base” by 0.8 BLEU, with a comparable number of parameters. Our analysis shows that our model learns to specialize different experts to different inputs. 2020.acl-main.587 @@ -7916,7 +7916,7 @@ Dependency Graph Enhanced Dual-transformer Structure for Aspect-based Sentiment Classification HaoTang - DonghongJi + DonghongJi ChenliangLi QijiZhou 6578–6588 @@ -7931,7 +7931,7 @@ Differentiable Window for Dynamic Local Attention Thanh-TungNguyen Xuan-PhiNguyen - ShafiqJoty + ShafiqJoty XiaoliLi 6589–6599 We propose Differentiable Window, a new neural module and general purpose component for dynamic window selection. While universally applicable, we demonstrate a compelling use case of utilizing Differentiable Window to improve standard attention modules by enabling more focused attentions over the input regions. We propose two variants of Differentiable Window, and integrate them within the Transformer architecture in two novel ways. We evaluate our proposed approach on a myriad of NLP tasks, including machine translation, sentiment analysis, subject-verb agreement and language modeling. Our experimental results demonstrate consistent and sizable improvements across all tasks. @@ -7947,7 +7947,7 @@ YiZhou Cho-JuiHsieh MinhaoCheng - XuanjingHuang + XuanjingHuang 6600–6610 Despite achieving prominent performance on many important tasks, it has been reported that neural networks are vulnerable to adversarial examples. Previously studies along this line mainly focused on semantic tasks such as sentiment analysis, question answering and reading comprehension. In this study, we show that adversarial examples also exist in dependency parsing: we propose two approaches to study where and how parsers make mistakes by searching over perturbations to existing texts at sentence and phrase levels, and design algorithms to construct such examples in both of the black-box and white-box settings. Our experiments with one of state-of-the-art parsers on the English Penn Treebank (PTB) show that up to 77% of input examples admit adversarial perturbations, and we also show that the robustness of parsing models can be improved by crafting high-quality adversaries and including them in the training stage, while suffering little to no performance drop on the clean input data. 2020.acl-main.590 @@ -7960,7 +7960,7 @@ WenyuDu ZhouhanLin YikangShen - Timothy J.O’Donnell + Timothy J.O’Donnell YoshuaBengio YueZhang 6611–6628 @@ -7994,7 +7994,7 @@ GabrielStanovsky SwabhaSwayamdipta JesseDodge - Noah A.Smith + Noah A.Smith 6640–6651 As NLP models become larger, executing a trained model requires significant computational resources incurring monetary and environmental costs. To better respect a given inference budget, we propose a modification to contextual representation fine-tuning which, during inference, allows for an early (and fast) “exit” from neural network calculations for simple instances, and late (and accurate) exit for hard instances. To achieve this, we add classifiers to different layers of BERT and use their calibrated confidence scores to make early exit decisions. We test our proposed modification on five different datasets in two tasks: three text classification datasets and two natural language inference benchmarks. Our method presents a favorable speed/accuracy tradeoff in almost all cases, producing models which are up to five times faster than the state of the art, while preserving their accuracy. Our method also requires almost no additional training resources (in either time or parameters) compared to the baseline BERT model. Finally, our method alleviates the need for costly retraining of multiple models at different levels of efficiency; we allow users to control the inference speed/accuracy tradeoff using a single trained model, by setting a single variable at inference time. We publicly release our code. 2020.acl-main.593 @@ -8033,8 +8033,8 @@ Modeling Morphological Typology for Unsupervised Learning of Language Morphology HongzhiXu JordanKodner - MitchellMarcus - CharlesYang + MitchellMarcus + CharlesYang 6672–6681 This paper describes a language-independent model for fully unsupervised morphological analysis that exploits a universal framework leveraging morphological typology. By modeling morphological processes including suffixation, prefixation, infixation, and full and partial reduplication with constrained stem change rules, our system effectively constrains the search space and offers a wide coverage in terms of morphological typology. The system is tested on nine typologically and genetically diverse languages, and shows superior performance over leading systems. We also investigate the effect of an oracle that provides only a handful of bits per language to signal morphological type. 2020.acl-main.596 @@ -8047,15 +8047,15 @@ AdinaWilliams TiagoPimentel HagenBlix - Arya D.McCarthy + Arya D.McCarthy EleanorChodroff RyanCotterell 6682–6695 The noun lexica of many natural languages are divided into several declension classes with characteristic morphological properties. Class membership is far from deterministic, but the phonological form of a noun and/or its meaning can often provide imperfect clues. Here, we investigate the strength of those clues. More specifically, we operationalize this by measuring how much information, in bits, we can glean about declension class from knowing the form and/or meaning of nouns. We know that form and meaning are often also indicative of grammatical gender—which, as we quantitatively verify, can itself share information with declension class—so we also control for gender. We find for two Indo-European languages (Czech and German) that form and meaning respectively share significant amounts of information with class (and contribute additional information above and beyond gender). The three-way interaction between class, form, and meaning (given gender) is also significant. Our study is important for two reasons: First, we introduce a new method that provides additional quantitative support for a classic linguistic finding that form and meaning are relevant for the classification of nouns into declensions. Secondly, we show not only that individual declensions classes vary in the strength of their clues within a language, but also that these variations themselves vary across languages. 2020.acl-main.597 2020.acl-main.597.Software.zip - 10.18653/v1/2020.acl-main.597 2020.acl-main.597.Dataset.pdf + 10.18653/v1/2020.acl-main.597 @@ -8065,8 +8065,8 @@ LiweiCai YihuiPeng ChenXia - AryaMcCarthy - KatharinaKann + AryaMcCarthy + KatharinaKann 6696–6707 We propose the task of unsupervised morphological paradigm completion. Given only raw text and a lemma list, the task consists of generating the morphological paradigms, i.e., all inflected forms, of the lemmas. From a natural language processing (NLP) perspective, this is a challenging unsupervised task, and high-performing systems have the potential to improve tools for low-resource languages or to assist linguistic annotators. From a cognitive science perspective, this can shed light on how children acquire morphological knowledge. We further introduce a system for the task, which generates morphological paradigms via the following steps: (i) EDIT TREE retrieval, (ii) additional lemma retrieval, (iii) paradigm size discovery, and (iv) inflection generation. We perform an evaluation on 14 typologically diverse languages. Our system outperforms trivial baselines with ease and, for some languages, even obtains a higher accuracy than minimally supervised systems. 2020.acl-main.598 @@ -8108,7 +8108,7 @@ Low-Resource Generation of Multi-hop Reasoning Questions JianxingYu - WeiLiu + WeiLiu ShuangQiu QinliangSu KaiWang @@ -8167,7 +8167,7 @@ Parsing into Variable-in-situ Logico-Semantic Graphs YufeiChen - WeiweiSun + WeiweiSun 6772–6782 We propose variable-in-situ logico-semantic graphs to bridge the gap between semantic graph and logical form parsing. The new type of graph-based meaning representation allows us to include analysis for scope-related phenomena, such as quantification, negation and modality, in a way that is consistent with the state-of-the-art underspecification approach. Moreover, the well-formedness of such a graph is clear, since model-theoretic interpretation is available. We demonstrate the effectiveness of this new perspective by developing a new state-of-the-art semantic parser for English Resource Semantics. At the core of this parser is a novel neural graph rewriting system which combines the strengths of Hyperedge Replacement Grammar, a knowledge-intensive model, and Graph Neural Networks, a data-intensive model. Our parser achieves an accuracy of 92.39% in terms of elementary dependency match, which is a 2.88 point improvement over the best data-driven model in the literature. The output of our parser is highly coherent: at least 91% graphs are valid, in that they allow at least one sound scope-resolved logical form. 2020.acl-main.605 @@ -8179,7 +8179,7 @@ Semantic Parsing for <fixed-case>E</fixed-case>nglish as a Second Language YuanyuanZhao - WeiweiSun + WeiweiSun JunjieCao XiaojunWan 6783–6794 @@ -8207,7 +8207,7 @@ RuishengCao SuZhu ChenyuYang - ChenLiu + ChenLiu RaoMa YanbinZhao LuChen @@ -8249,7 +8249,7 @@ XiaonanLi HangYan XipengQiu - XuanjingHuang + XuanjingHuang 6836–6842 Recently, the character-word lattice structure has been proved to be effective for Chinese named entity recognition (NER) by incorporating the word information. However, since the lattice structure is complex and dynamic, the lattice-based models are hard to fully utilize the parallel computation of GPUs and usually have a low inference speed. In this paper, we propose FLAT: Flat-LAttice Transformer for Chinese NER, which converts the lattice structure into a flat structure consisting of spans. Each span corresponds to a character or latent word and its position in the original lattice. With the power of Transformer and well-designed position encoding, FLAT can fully leverage the lattice information and has an excellent parallel ability. Experiments on four datasets show FLAT outperforms other lexicon-based models in performance and efficiency. 2020.acl-main.611 @@ -8335,7 +8335,7 @@ Classification-Based Self-Learning for Weakly Supervised Bilingual Lexicon Induction - Vanja MladenKaran + Vanja MladenKaran IvanVulić AnnaKorhonen GoranGlavaš @@ -8350,8 +8350,8 @@ Gender in Danger? Evaluating Speech Translation Technology on the <fixed-case>M</fixed-case>u<fixed-case>ST</fixed-case>-<fixed-case>SHE</fixed-case> Corpus LuisaBentivogli BeatriceSavoldi - MatteoNegri - Mattia A.Di Gangi + MatteoNegri + Mattia A.Di Gangi RoldanoCattoni MarcoTurchi 6923–6933 @@ -8446,7 +8446,7 @@ JonathanMamou JulianMichael GabrielStanovsky - LukeZettlemoyer + LukeZettlemoyer IdoDagan 7008–7013 Question-answer driven Semantic Role Labeling (QA-SRL) was proposed as an attractive open and natural flavour of SRL, potentially attainable from laymen. Recently, a large-scale crowdsourced QA-SRL corpus and a trained parser were released. Trying to replicate the QA-SRL annotation for new texts, we found that the resulting annotations were lacking in quality, particularly in coverage, making them insufficient for further research and evaluation. In this paper, we present an improved crowdsourcing protocol for complex semantic annotation, involving worker selection and training, and a data consolidation phase. Applying this protocol to QA-SRL yielded high-quality annotation with drastically higher coverage, producing a new gold evaluation dataset. We believe that our annotation protocol and gold standard will facilitate future replicable research of natural semantic annotations. @@ -8459,7 +8459,7 @@ Cross-Lingual Semantic Role Labeling with High-Quality Translated Training Corpus HaoFei MeishanZhang - DonghongJi + DonghongJi 7014–7026 Many efforts of research are devoted to semantic role labeling (SRL) which is crucial for natural language understanding. Supervised approaches have achieved impressing performances when large-scale corpora are available for resource-rich languages such as English. While for the low-resource languages with no annotated SRL dataset, it is still challenging to obtain competitive performances. Cross-lingual SRL is one promising way to address the problem, which has achieved great advances with the help of model transferring and annotation projection. In this paper, we propose a novel alternative based on corpus translation, constructing high-quality training datasets for the target languages from the source gold-standard SRL annotations. Experimental results on Universal Proposition Bank show that the translation-based method is highly effective, and the automatic pseudo datasets can improve the target-language SRL performances significantly. 2020.acl-main.627 @@ -8471,7 +8471,7 @@ Sentence Meta-Embeddings for Unsupervised Semantic Textual Similarity NinaPoerner UlliWaltinger - HinrichSchütze + HinrichSchütze 7027–7034 We address the task of unsupervised Semantic Textual Similarity (STS) by ensembling diverse pre-trained sentence encoders into sentence meta-embeddings. We apply, extend and evaluate different meta-embedding methods from the word embedding literature at the sentence level, including dimensionality reduction (Yin and Schütze, 2016), generalized Canonical Correlation Analysis (Rastogi et al., 2015) and cross-view auto-encoders (Bollegala and Bao, 2018). Our sentence meta-embeddings set a new unsupervised State of The Art (SoTA) on the STS Benchmark and on the STS12-STS16 datasets, with gains of between 3.7% and 6.4% Pearson’s r over single-source systems. 2020.acl-main.628 @@ -8518,7 +8518,7 @@ Exploiting Personal Characteristics of Debaters for Predicting Persuasiveness - KhalidAl Khatib + KhalidAl Khatib MichaelVölske ShahbazSyed NikolayKolyada @@ -8595,7 +8595,7 @@ Modeling Long Context for Task-Oriented Dialogue State Generation JunQuan - DeyiXiong + DeyiXiong 7119–7124 Based on the recently proposed transferable dialogue state generator (TRADE) that predicts dialogue states from utterance-concatenated dialogue context, we propose a multi-task learning model with a simple yet effective utterance tagging technique and a bidirectional language model as an auxiliary task for task-oriented dialogue state generation. By enabling the model to learn a better representation of the long dialogue context, our approaches attempt to solve the problem that the performance of the baseline significantly drops when the input dialogue context sequence is long. In our experiments, our proposed model achieves a 7.03% relative improvement over the baseline, establishing a new state-of-the-art joint goal accuracy of 52.04% on the MultiWOZ 2.0 dataset. 2020.acl-main.637 @@ -8606,7 +8606,7 @@ Multi-Domain Dialogue Acts and Response Co-Generation KaiWang - JunfengTian + JunfengTian RuiWang XiaojunQuan JianxingYu @@ -8620,7 +8620,7 @@ Exploring Contextual Word-level Style Relevance for Unsupervised Style Transfer ChulunZhou - LiangyuChen + LiangyuChen JiachenLiu XinyanXiao JinsongSu @@ -8701,12 +8701,12 @@ <fixed-case>C</fixed-case>amem<fixed-case>BERT</fixed-case>: a Tasty <fixed-case>F</fixed-case>rench Language Model LouisMartin BenjaminMuller - Pedro JavierOrtiz Suárez + Pedro JavierOrtiz Suárez YoannDupont - LaurentRomary - Éricde la Clergerie - DjaméSeddah - BenoîtSagot + LaurentRomary + Éricde la Clergerie + DjaméSeddah + BenoîtSagot 7203–7219 Pretrained language models are now ubiquitous in Natural Language Processing. Despite their success, most available models have either been trained on English data or on the concatenation of data in multiple languages. This makes practical use of such models –in all languages except English– very limited. In this paper, we investigate the feasibility of training monolingual Transformer-based language models for other languages, taking French as an example and evaluating our language models on part-of-speech tagging, dependency parsing, named entity recognition and natural language inference tasks. We show that the use of web crawled data is preferable to the use of Wikipedia data. More surprisingly, we show that a relatively small web crawled dataset (4GB) leads to results that are as good as those obtained using larger datasets (130+GB). Our best performing model CamemBERT reaches or improves the state of the art in all four downstream tasks. 2020.acl-main.645 @@ -8741,7 +8741,7 @@ 2kenize: Tying Subword Sequences for <fixed-case>C</fixed-case>hinese Script Conversion - PranavA + PranavA IsabelleAugenstein 7257–7272 Simplified Chinese to Traditional Chinese character conversion is a common preprocessing step in Chinese NLP. Despite this, current approaches have insufficient performance because they do not take into account that a simplified Chinese character can correspond to multiple traditional characters. Here, we propose a model that can disambiguate between mappings and convert between the two scripts. The model is based on subword segmentation, two language models, as well as a method for mapping between subword sequences. We further construct benchmark datasets for topic classification and script conversion. Our proposed method outperforms previous Chinese Character conversion approaches by 6 points in accuracy. These results are further confirmed in a downstream application, where 2kenize is used to convert pretraining dataset for topic classification. An error analysis reveals that our method’s particular strengths are in dealing with code mixing and named entities. @@ -8753,8 +8753,8 @@ Predicting the Growth of Morphological Families from Social and Linguistic Factors ValentinHofmann - JanetPierrehumbert - HinrichSchütze + JanetPierrehumbert + HinrichSchütze 7273–7283 We present the first study that examines the evolution of morphological families, i.e., sets of morphologically related words such as “trump”, “antitrumpism”, and “detrumpify”, in social media. We introduce the novel task of Morphological Family Expansion Prediction (MFEP) as predicting the increase in the size of a morphological family. We create a ten-year Reddit corpus as a benchmark for MFEP and evaluate a number of baselines on this benchmark. Our experiments demonstrate very good performance on MFEP. 2020.acl-main.649 @@ -8776,7 +8776,7 @@ <fixed-case>C</fixed-case>lar<fixed-case>Q</fixed-case>: A large-scale and diverse dataset for Clarification Question Generation VaibhavKumar - Alan WBlack + Alan WBlack 7296–7301 Question answering and conversational systems are often baffled and need help clarifying certain ambiguities. However, limitations of existing datasets hinder the development of large-scale models capable of generating and utilising clarification questions. In order to overcome these limitations, we devise a novel bootstrapping framework (based on self-supervision) that assists in the creation of a diverse, large-scale dataset of clarification questions based on post-comment tuples extracted from stackexchange. The framework utilises a neural network based architecture for classifying clarification questions. It is a two-step method where the first aims to increase the precision of the classifier and second aims to increase its recall. We quantitatively demonstrate the utility of the newly created dataset by applying it to the downstream task of question-answering. The final dataset, ClarQ, consists of ~2M examples distributed across 173 domains of stackexchange. We release this dataset in order to foster research into the field of clarification question generation with the larger goal of enhancing dialog and question answering systems. 2020.acl-main.651 @@ -8788,10 +8788,10 @@ <fixed-case>D</fixed-case>o<fixed-case>QA</fixed-case> - Accessing Domain-Specific <fixed-case>FAQ</fixed-case>s via Conversational <fixed-case>QA</fixed-case> Jon AnderCampos ArantxaOtegi - AitorSoroa - JanDeriu + AitorSoroa + JanDeriu MarkCieliebak - EnekoAgirre + EnekoAgirre 7302–7314 The goal of this work is to build conversational Question Answering (QA) interfaces for the large body of domain-specific information available in FAQ sites. We present DoQA, a dataset with 2,437 dialogues and 10,917 QA pairs. The dialogues are collected from three Stack Exchange sites using the Wizard of Oz method with crowdsourcing. Compared to previous work, DoQA comprises well-defined information needs, leading to more coherent and natural conversations with less factoid questions and is multi-domain. In addition, we introduce a more realistic information retrieval (IR) scenario where the system needs to find the answer in any of the FAQ documents. The results of an existing, strong, system show that, thanks to transfer learning from a Wikipedia QA dataset and fine tuning on a single FAQ domain, it is possible to build high quality conversational QA systems for FAQs without in-domain training data. The good results carry over into the more challenging IR scenario. In both cases, there is still ample room for improvement, as indicated by the higher human upperbound. 2020.acl-main.652 @@ -8856,7 +8856,7 @@ Premise Selection in Natural Language Mathematical Texts DeborahFerreira - AndréFreitas + AndréFreitas 7365–7374 The discovery of supporting evidence for addressing complex mathematical problems is a semantically challenging task, which is still unexplored in the field of natural language processing for mathematical text. The natural language premise selection task consists in using conjectures written in both natural language and mathematical formulae to recommend premises that most likely will be useful to prove a particular statement. We propose an approach to solve this task as a link prediction problem, using Deep Convolutional Graph Neural Networks. This paper also analyses how different baselines perform in this task and shows that a graph structure can provide higher F1-score, especially when considering multi-hop premise selection. 2020.acl-main.657 @@ -8869,8 +8869,8 @@ MikelArtetxe SebastianRuder DaniYogatama - GorkaLabaka - EnekoAgirre + GorkaLabaka + EnekoAgirre 7375–7388 We review motivations, definition, approaches, and methodology for unsupervised cross-lingual learning and call for a more rigorous position in each of them. An existing rationale for such research is based on the lack of parallel data for many of the world’s languages. However, we argue that a scenario without any parallel data and abundant monolingual data is unrealistic in practice. We also discuss different training signals that have been used in previous work, which depart from the pure unsupervised setting. We then describe common methodological issues in tuning and evaluation of unsupervised cross-lingual models and present best practices. Finally, we provide a unified outlook for different types of research in this area (i.e., cross-lingual word embeddings, deep multilingual pretraining, and unsupervised machine translation) and argue for comparable evaluation of these models. 2020.acl-main.658 @@ -8919,7 +8919,7 @@ What Question Answering can Learn from Trivia Nerds JordanBoyd-Graber - BenjaminBörschinger + BenjaminBörschinger 7422–7435 In addition to the traditional task of machines answering questions, question answering (QA) research creates interesting, challenging questions that help systems how to answer questions and reveal the best systems. We argue that creating a QA dataset—and the ubiquitous leaderboard that goes with it—closely resembles running a trivia tournament: you write questions, have agents (either humans or machines) answer the questions, and declare a winner. However, the research community has ignored the hard-learned lessons from decades of the trivia community creating vibrant, fair, and effective question answering competitions. After detailing problems with existing QA datasets, we outline the key lessons—removing ambiguity, discriminating skill, and adjudicating disputes—that can transfer to QA research and how they might be implemented. 2020.acl-main.662 @@ -8982,7 +8982,7 @@ XiangKong ZhengzhongLiu XuezheMa - EduardHovy + EduardHovy 7479–7485 In this work, we explore the implicit event argument detection task, which studies event arguments beyond sentence boundaries. The addition of cross-sentence argument candidates imposes great challenges for modeling. To reduce the number of candidates, we adopt a two-step approach, decomposing the problem into two sub-problems: argument head-word detection and head-to-span expansion. Evaluated on the recent RAMS dataset (Ebner et al., 2020), our model achieves overall better performance than a strong sequence labeling baseline. We further provide detailed error analysis, presenting where the model mainly makes errors and indicating directions for future improvements. It remains a challenge to detect implicit arguments, calling for more future work of document-level modeling for this task. 2020.acl-main.667 @@ -8995,7 +8995,7 @@ OrHonovich LucasTorroba Hennigen OmriAbend - Shay B.Cohen + Shay B.Cohen 7486–7497 Machine reading is an ambitious goal in NLP that subsumes a wide range of text understanding capabilities. Within this broad framework, we address the task of machine reading the time of historical events, compile datasets for the task, and develop a model for tackling it. Given a brief textual description of an event, we show that good performance can be achieved by extracting relevant sentences from Wikipedia, and applying a combination of task-specific and general-purpose feature embeddings for the classification. Furthermore, we establish a link between the historical event ordering task and the event focus time task from the information retrieval literature, showing they also provide a challenging test case for machine reading algorithms. 2020.acl-main.668 @@ -9136,7 +9136,7 @@ MariaBarrett YonatanBelinkov DesmondElliott - AndersSøgaard + AndersSøgaard 7590–7604 Large-scale pretrained language models are the major driving force behind recent improvements in perfromance on the Winograd Schema Challenge, a widely employed test of commonsense reasoning ability. We show, however, with a new diagnostic dataset, that these models are sensitive to linguistic perturbations of the Winograd examples that minimally affect human understanding. Our results highlight interesting differences between humans and language models: language models are more sensitive to number or gender alternations and synonym replacements than humans, and humans are more stable and consistent in their predictions, maintain a much higher absolute performance, and perform better on non-associative instances than associative ones. 2020.acl-main.679 @@ -9148,7 +9148,7 @@ Temporally-Informed Analysis of Named Entity Recognition ShrutiRijhwani - DanielPreotiuc-Pietro + DanielPreotiuc-Pietro 7605–7617 Natural language processing models often have to make predictions on text data that evolves over time as a result of changes in language use or the information described in the text. However, evaluation results on existing data sets are seldom reported by taking the timestamp of the document into account. We analyze and propose methods that make better use of temporally-diverse training data, with a focus on the task of named entity recognition. To support these experiments, we introduce a novel data set of English tweets annotated with named entities. We empirically demonstrate the effect of temporal drift on performance, and how the temporal information of documents can be used to obtain better models compared to those that disregard temporal information. Our analysis gives insights into why this information is useful, in the hope of informing potential avenues of improvement for named entity recognition as well as other NLP tasks under similar experimental setups. 2020.acl-main.680 @@ -9159,7 +9159,7 @@ Towards Open Domain Event Trigger Identification using Adversarial Domain Adaptation AakankshaNaik - CarolynRose + CarolynRose 7618–7624 We tackle the task of building supervised event trigger identification models which can generalize better across domains. Our work leverages the adversarial domain adaptation (ADA) framework to introduce domain-invariance. ADA uses adversarial training to construct representations that are predictive for trigger identification, but not predictive of the example’s domain. It requires no labeled data from the target domain, making it completely unsupervised. Experiments with two domains (English literature and news) show that ADA leads to an average F1 score improvement of 3.9 on out-of-domain data. Our best performing model (BERT-A) reaches 44-49 F1 across both domains, using no labeled target data. Preliminary experiments reveal that finetuning on 1% labeled data, followed by self-training leads to substantial improvement, reaching 51.5 and 67.2 F1 on literature and news respectively. 2020.acl-main.681 @@ -9200,7 +9200,7 @@ ShashankSrivastava OleksandrPolozov NebojsaJojic - ChristopherMeek + ChristopherMeek 7652–7662 We explore learning web-based tasks from a human teacher through natural language explanations and a single demonstration. Our approach investigates a new direction for semantic parsing that models explaining a demonstration in a context, rather than mapping explanations to demonstrations. By leveraging the idea of inverse semantics from program synthesis to reason backwards from observed demonstrations, we ensure that all considered interpretations are consistent with executable actions in any context, thus simplifying the problem of search over logical forms. We present a dataset of explanations paired with demonstrations for web-based tasks. Our methods show better task completion rates than a supervised semantic parsing baseline (40% relative improvement on average), and are competitive with simple exploration-and-demonstration based methods, while requiring no exploration of the environment. In learning to align explanations with demonstrations, basic properties of natural language syntax emerge as learned behavior. This is an interesting example of pragmatic language acquisition without any linguistic annotation. 2020.acl-main.684 @@ -9378,7 +9378,7 @@ Negated and Misprimed Probes for Pretrained Language Models: Birds Can Talk, But Cannot Fly NoraKassner - HinrichSchütze + HinrichSchütze 7811–7818 Building on Petroni et al. 2019, we propose two new probing tasks analyzing factual knowledge stored in Pretrained Language Models (PLMs). (1) Negation. We find that PLMs do not distinguish between negated (‘‘Birds cannot [MASK]”) and non-negated (‘‘Birds can [MASK]”) cloze questions. (2) Mispriming. Inspired by priming methods in human psychology, we add “misprimes” to cloze questions (‘‘Talk? Birds can [MASK]”). We find that PLMs are easily distracted by misprimes. These results suggest that PLMs still have a long way to go to adequately learn human-like factual knowledge. 2020.acl-main.698 @@ -9414,8 +9414,8 @@ JesseDunietz GregBurnham AkashBharadwaj - OwenRambow - JenniferChu-Carroll + OwenRambow + JenniferChu-Carroll DaveFerrucci 7839–7859 Many tasks aim to measure machine reading comprehension (MRC), often focusing on question types presumed to be difficult. Rarely, however, do task designers start by considering what systems should in fact comprehend. In this paper we make two key contributions. First, we argue that existing approaches do not adequately define comprehension; they are too unsystematic about what content is tested. Second, we present a detailed definition of comprehension—a “Template of Understanding”—for a widely useful class of texts, namely short narratives. We then conduct an experiment that strongly suggests existing systems are not up to the task of narrative understanding as we define it. @@ -9427,7 +9427,7 @@ Gender Gap in Natural Language Processing Research: Disparities in Authorship and Citations - Saif M.Mohammad + Saif M.Mohammad 7860–7870 Disparities in authorship and citations across gender can have substantial adverse consequences not just on the disadvantaged genders, but also on the field of study as a whole. Measuring gender gaps is a crucial step towards addressing them. In this work, we examine female first author percentages and the citations to their papers in Natural Language Processing (1965 to 2019). We determine aggregate-level statistics using an existing manually curated author–gender list as well as first names strongly associated with a gender. We find that only about 29% of first authors are female and only about 25% of last authors are female. Notably, this percentage has not improved since the mid 2000s. We also show that, on average, female first authors are cited less than male first authors, even when controlling for experience and area of research. Finally, we discuss the ethical considerations involved in automatic demographic analysis. 2020.acl-main.702 @@ -9446,7 +9446,7 @@ AbdelrahmanMohamed OmerLevy VeselinStoyanov - LukeZettlemoyer + LukeZettlemoyer 7871–7880 We present BART, a denoising autoencoder for pretraining sequence-to-sequence models. BART is trained by (1) corrupting text with an arbitrary noising function, and (2) learning a model to reconstruct the original text. It uses a standard Tranformer-based neural machine translation architecture which, despite its simplicity, can be seen as generalizing BERT (due to the bidirectional encoder), GPT (with the left-to-right decoder), and other recent pretraining schemes. We evaluate a number of noising approaches, finding the best performance by both randomly shuffling the order of sentences and using a novel in-filling scheme, where spans of text are replaced with a single mask token. BART is particularly effective when fine tuned for text generation but also works well for comprehension tasks. It matches the performance of RoBERTa on GLUE and SQuAD, and achieves new state-of-the-art results on a range of abstractive dialogue, question answering, and summarization tasks, with gains of up to 3.5 ROUGE. BART also provides a 1.1 BLEU increase over a back-translation system for machine translation, with only target language pretraining. We also replicate other pretraining schemes within the BART framework, to understand their effect on end-task performance. 2020.acl-main.703 @@ -9458,7 +9458,7 @@ <fixed-case>BLEURT</fixed-case>: Learning Robust Metrics for Text Generation ThibaultSellam DipanjanDas - AnkurParikh + AnkurParikh 7881–7892 Text generation has made significant advances in the last few years. Yet, evaluation metrics have lagged behind, as the most popular choices (e.g., BLEU and ROUGE) may correlate poorly with human judgment. We propose BLEURT, a learned evaluation metric for English based on BERT. BLEURT can model human judgment with a few thousand possibly biased training examples. A key aspect of our approach is a novel pre-training scheme that uses millions of synthetic examples to help the model generalize. BLEURT provides state-of-the-art results on the last three years of the WMT Metrics shared task and the WebNLG data set. In contrast to a vanilla BERT-based approach, it yields superior results even when the training data is scarce and out-of-distribution. 2020.acl-main.704 @@ -9491,7 +9491,7 @@ AbhijitGupta CaimingXiong RichardSocher - DragomirRadev + DragomirRadev 7906–7917 Neural networks lack the ability to reason about qualitative physics and so cannot generalize to scenarios and tasks unseen during training. We propose ESPRIT, a framework for commonsense reasoning about qualitative physics in natural language that generates interpretable descriptions of physical events. We use a two-step approach of first identifying the pivotal physical events in an environment and then generating natural language descriptions of those events using a data-to-text approach. Our framework learns to generate explanations of how the physical simulation will causally evolve so that an agent or a human can easily reason about a solution using those interpretable descriptions. Human evaluations indicate that ESPRIT produces crucial fine-grained details and has high coverage of physical concepts compared to even human annotations. Dataset, code and documentation are available at https://github.com/salesforce/esprit. 2020.acl-main.706 @@ -9517,7 +9517,7 @@ WenhuChen JianshuChen YuSu - ZhiyuChen + ZhiyuChen William YangWang 7929–7942 Neural natural language generation (NLG) models have recently shown remarkable progress in fluency and coherence. However, existing studies on neural NLG are primarily focused on surface-level realizations with limited emphasis on logical inference, an important aspect of human thinking and language. In this paper, we suggest a new NLG task where a model is tasked with generating natural language statements that can be logically entailed by the facts in an open-domain semi-structured table. To facilitate the study of the proposed logical NLG problem, we use the existing TabFact dataset~(CITATION) featured with a wide range of logical/symbolic inferences as our testbed, and propose new automatic metrics to evaluate the fidelity of generation models w.r.t. logical inference. The new task poses challenges to the existing monotonic generation frameworks due to the mismatch between sequence order and logical order. In our experiments, we comprehensively survey different generation architectures (LSTM, Transformer, Pre-Trained LM) trained with different algorithms (RL, Adversarial Training, Coarse-to-Fine) on the dataset and made following observations: 1) Pre-Trained LM can significantly boost both the fluency and logical fidelity metrics, 2) RL and Adversarial Training are trading fluency for fidelity, 3) Coarse-to-Fine generation can help partially alleviate the fidelity issue while maintaining high language fluency. The code and data are available at https://github.com/wenhuchen/LogicNLG. @@ -9604,7 +9604,7 @@ Document-Level Event Role Filler Extraction using Multi-Granularity Contextualized Encoding XinyaDu - ClaireCardie + ClaireCardie 8010–8020 Few works in the literature of event extraction have gone beyond individual sentences to make extraction decisions. This is problematic when the information needed to recognize an event argument is spread across multiple sentences. We argue that document-level event extraction is a difficult task since it requires a view of a larger context to determine which spans of text correspond to event role fillers. We first investigate how end-to-end neural sequence models (with pre-trained language model representations) perform on document-level role filler extraction, as well as how the length of context captured affects the models’ performance. To dynamically aggregate information captured by neural representations learned at different levels of granularity (e.g., the sentence- and paragraph-level), we propose a novel multi-granularity reader. We evaluate our models on the MUC-4 event extraction dataset, and show that our best system performs substantially better than prior work. We also report findings on the relationship between context length and neural model performance on the task. 2020.acl-main.714 @@ -9694,7 +9694,7 @@ <fixed-case>Z</fixed-case>ero<fixed-case>S</fixed-case>hot<fixed-case>C</fixed-case>eres: Zero-Shot Relation Extraction from Semi-Structured Webpages ColinLockard PrashantShiralkar - Xin LunaDong + Xin LunaDong HannanehHajishirzi 8105–8117 In many documents, such as semi-structured webpages, textual semantics are augmented with additional information conveyed using visual elements including layout, font size, and color. Prior work on information extraction from semi-structured websites has required learning an extraction model specific to a given template via either manually labeled or distantly supervised data from that template. In this work, we propose a solution for “zero-shot” open-domain relation extraction from webpages with a previously unseen template, including from websites with little overlap with existing sources of knowledge for distant supervision and websites in entirely new subject verticals. Our model uses a graph neural network-based approach to build a rich representation of text fields on a webpage and the relationships between them, enabling generalization to new templates. Experiments show this approach provides a 31% F1 gain over a baseline for zero-shot extraction in a new subject vertical. @@ -9708,7 +9708,7 @@ ShrutiRijhwani ShuyanZhou GrahamNeubig - JaimeCarbonell + JaimeCarbonell 8118–8123 Traditional named entity recognition models use gazetteers (lists of entities) as features to improve performance. Although modern neural network models do not require such hand-crafted features for strong performance, recent work has demonstrated their utility for named entity recognition on English data. However, designing such features for low-resource languages is challenging, because exhaustive entity gazetteers do not exist in these languages. To address this problem, we propose a method of “soft gazetteers” that incorporates ubiquitously available information from English knowledge bases, such as Wikipedia, into neural named entity recognition models through cross-lingual entity linking. Our experiments on four low-resource languages show an average improvement of 4 points in F1 score. 2020.acl-main.722 @@ -9720,7 +9720,7 @@ A Prioritization Model for Suicidality Risk Assessment Han-ChinShing PhilipResnik - DouglasOard + DouglasOard 8124–8137 We reframe suicide risk assessment from social media as a ranking problem whose goal is maximizing detection of severely at-risk individuals given the time available. Building on measures developed for resource-bounded document retrieval, we introduce a well founded evaluation paradigm, and demonstrate using an expert-annotated test collection that meaningful improvements over plausible cascade model baselines can be achieved using an approach that jointly ranks individuals and their social media posts. 2020.acl-main.723 @@ -9783,7 +9783,7 @@ History for Visual Dialog: Do we really need it? ShubhamAgarwal - TrungBui + TrungBui Joon-YoungLee IoannisKonstas VerenaRieser @@ -9812,7 +9812,7 @@ <fixed-case>TVQA</fixed-case>+: Spatio-Temporal Grounding for Video Question Answering JieLei LichengYu - TamaraBerg + TamaraBerg MohitBansal 8211–8225 We present the task of Spatio-Temporal Video Question Answering, which requires intelligent systems to simultaneously retrieve relevant moments and detect referenced visual concepts (people and objects) to answer natural language questions about videos. We first augment the TVQA dataset with 310.8K bounding boxes, linking depicted objects to visual concepts in questions and answers. We name this augmented version as TVQA+. We then propose Spatio-Temporal Answerer with Grounded Evidence (STAGE), a unified framework that grounds evidence in both spatial and temporal domains to answer questions about videos. Comprehensive experiments and analyses demonstrate the effectiveness of our framework and how the rich annotations in our TVQA+ dataset can contribute to the question answering task. Moreover, by performing this joint task, our model is able to produce insightful and interpretable spatio-temporal attention visualizations. @@ -9826,7 +9826,7 @@ Po-YaoHuang JunjieHu XiaojunChang - AlexanderHauptmann + AlexanderHauptmann 8226–8237 Unsupervised machine translation (MT) has recently achieved impressive results with monolingual corpora only. However, it is still challenging to associate source-target sentences in the latent space. As people speak different languages biologically share similar visual systems, the potential of achieving better alignment through visual content is promising yet under-explored in unsupervised multimodal MT (MMT). In this paper, we investigate how to utilize visual content for disambiguation and promoting latent space alignment in unsupervised MMT. Our model employs multimodal back-translation and features pseudo visual pivoting in which we learn a shared multilingual visual-semantic embedding space and incorporate visually-pivoted captioning as additional weak supervision. The experimental results on the widely used Multi30K dataset show that the proposed model significantly improves over the state-of-the-art methods and generalizes well when images are not available at the testing time. 2020.acl-main.731 @@ -9838,7 +9838,7 @@ A Multitask Learning Approach for Diacritic Restoration SawsanAlqahtani AjayMishra - MonaDiab + MonaDiab 8238–8247 In many languages like Arabic, diacritics are used to specify pronunciations as well as meanings. Such diacritics are often omitted in written text, increasing the number of possible pronunciations and meanings for a word. This results in a more ambiguous text making computational processing on such text more difficult. Diacritic restoration is the task of restoring missing diacritics in the written text. Most state-of-the-art diacritic restoration models are built on character level information which helps generalize the model to unseen data, but presumably lose useful information at the word level. Thus, to compensate for this loss, we investigate the use of multi-task learning to jointly optimize diacritic restoration with related NLP problems namely word segmentation, part-of-speech tagging, and syntactic diacritization. We use Arabic as a case study since it has sufficient data resources for tasks that we consider in our joint modeling. Our joint models significantly outperform the baselines and are comparable to the state-of-the-art models that are more complex relying on morphological analyzers and/or a lot more data (e.g. dialectal data). 2020.acl-main.732 @@ -9905,7 +9905,7 @@ Phonetic and Visual Priors for Decipherment of Informal <fixed-case>R</fixed-case>omanization MariaRyskina - Matthew R.Gormley + Matthew R.Gormley TaylorBerg-Kirkpatrick 8308–8319 Informal romanization is an idiosyncratic process used by humans in informal digital communication to encode non-Latin script languages into Latin character sets found on common keyboards. Character substitution choices differ between users but have been shown to be governed by the same main principles observed across a variety of languages—namely, character pairs are often associated through phonetic or visual similarity. We propose a noisy-channel WFST cascade model for deciphering the original non-Latin script from observed romanized text in an unsupervised fashion. We train our model directly on romanized data from two languages: Egyptian Arabic and Russian. We demonstrate that adding inductive bias through phonetic and visual priors on character mappings substantially improves the model’s performance on both languages, yielding results much closer to the supervised skyline. Finally, we introduce a new dataset of romanized Russian, collected from a Russian social network website and partially annotated for our experiments. @@ -9916,9 +9916,9 @@ Active Learning for Coreference Resolution using Discrete Annotation - Belinda Z.Li + Belinda Z.Li GabrielStanovsky - LukeZettlemoyer + LukeZettlemoyer 8320–8331 We improve upon pairwise annotation for active learning in coreference resolution, by asking annotators to identify mention antecedents if a presented mention pair is deemed not coreferent. This simple modification, when combined with a novel mention clustering algorithm for selecting which examples to label, is much more efficient in terms of the performance obtained per annotation budget. In experiments with existing benchmark coreference datasets, we show that the signal from this additional question leads to significant performance gains per human-annotation hour. Future work can use our annotation protocol to effectively develop coreference models for new domains. Our code is publicly available. 2020.acl-main.738 @@ -9946,7 +9946,7 @@ KyleLo IzBeltagy DougDowney - Noah A.Smith + Noah A.Smith 8342–8360 Language models pretrained on text from a wide variety of sources form the foundation of today’s NLP. In light of the success of these broad-coverage models, we investigate whether it is still helpful to tailor a pretrained model to the domain of a target task. We present a study across four domains (biomedical and computer science publications, news, and reviews) and eight classification tasks, showing that a second phase of pretraining in-domain (domain-adaptive pretraining) leads to performance gains, under both high- and low-resource settings. Moreover, adapting to the task’s unlabeled data (task-adaptive pretraining) improves performance even after domain-adaptive pretraining. Finally, we show that adapting to a task corpus augmented using simple data selection strategies is an effective alternative, especially when resources for domain-adaptive pretraining might be unavailable. Overall, we consistently find that multi-phase adaptive pretraining offers large gains in task performance. Honorable Mention for Best Overall Paper @@ -9997,7 +9997,7 @@ Structured Tuning for Semantic Role Labeling TaoLi Parth AnandJawale - MarthaPalmer + MarthaPalmer VivekSrikumar 8402–8412 Recent neural network-driven semantic role labeling (SRL) systems have shown impressive improvements in F1 scores. These improvements are due to expressive input representations, which, at least at the surface, are orthogonal to knowledge-rich constrained decoding mechanisms that helped linear SRL models. Introducing the benefits of structure to inform neural models presents a methodological challenge. In this paper, we present a structured tuning framework to improve models using softened constraints only at training time. Our framework leverages the expressiveness of neural networks and provides supervision with structured loss components. We start with a strong baseline (RoBERTa) to validate the impact of our approach, and show that our framework outperforms the baseline by learning to comply with declarative constraints. Additionally, our experiments with smaller training sizes show that we can achieve consistent improvements under low-resource scenarios. @@ -10010,7 +10010,7 @@ <fixed-case>T</fixed-case>a<fixed-case>BERT</fixed-case>: Pretraining for Joint Understanding of Textual and Tabular Data PengchengYin GrahamNeubig - Wen-tauYih + Wen-tauYih SebastianRiedel 8413–8426 Recent years have witnessed the burgeoning of pretrained language models (LMs) for text-based natural language (NL) understanding tasks. Such models are typically trained on free-form NL text, hence may not be suitable for tasks like semantic parsing over structured data, which require reasoning over both free-form NL questions and structured tabular data (e.g., database tables). In this paper we present TaBERT, a pretrained LM that jointly learns representations for NL sentences and (semi-)structured tables. TaBERT is trained on a large corpus of 26 million tables and their English contexts. In experiments, neural semantic parsers using TaBERT as feature representation layers achieve new best results on the challenging weakly-supervised semantic parsing benchmark WikiTableQuestions, while performing competitively on the text-to-SQL dataset Spider. @@ -10039,10 +10039,10 @@ NamanGoyal VishravChaudhary GuillaumeWenzek - FranciscoGuzmán - EdouardGrave + FranciscoGuzmán + EdouardGrave MyleOtt - LukeZettlemoyer + LukeZettlemoyer VeselinStoyanov 8440–8451 This paper shows that pretraining multilingual language models at scale leads to significant performance gains for a wide range of cross-lingual transfer tasks. We train a Transformer-based masked language model on one hundred languages, using more than two terabytes of filtered CommonCrawl data. Our model, dubbed XLM-R, significantly outperforms multilingual BERT (mBERT) on a variety of cross-lingual benchmarks, including +14.6% average accuracy on XNLI, +13% average F1 score on MLQA, and +2.4% F1 score on NER. XLM-R performs particularly well on low-resource languages, improving 15.7% in XNLI accuracy for Swahili and 11.4% for Urdu over previous XLM models. We also present a detailed empirical analysis of the key factors that are required to achieve these gains, including the trade-offs between (1) positive transfer and capacity dilution and (2) the performance of high and low resource languages at scale. Finally, we show, for the first time, the possibility of multilingual modeling without sacrificing per-language performance; XLM-R is very competitive with strong monolingual models on the GLUE and XNLI benchmarks. We will make our code and models publicly available. @@ -10079,7 +10079,7 @@ Multi-Domain Named Entity Recognition with Genre-Aware and Agnostic Inference JingWang MayankKulkarni - DanielPreotiuc-Pietro + DanielPreotiuc-Pietro 8476–8488 Named entity recognition is a key component of many text processing pipelines and it is thus essential for this component to be robust to different types of input. However, domain transfer of NER models with data from multiple genres has not been widely studied. To this end, we conduct NER experiments in three predictive setups on data from: a) multiple domains; b) multiple domains where the genre label is unknown at inference time; c) domains not encountered in training. We introduce a new architecture tailored to this task by using shared and private domain parameters and multi-task learning. This consistently outperforms all other baseline and competitive methods on all three experimental setups, with differences ranging between +1.95 to +3.11 average F1 across multiple genres when compared to standard approaches. These results illustrate the challenges that need to be taken into account when building real-world NLP applications that are robust to various types of text and the methods that can help, at least partially, alleviate these issues. 2020.acl-main.750 @@ -10091,7 +10091,7 @@ <fixed-case>TX</fixed-case>tract: Taxonomy-Aware Knowledge Extraction for Thousands of Product Categories GiannisKaramanolakis JunMa - Xin LunaDong + Xin LunaDong 8489–8502 Extracting structured knowledge from product profiles is crucial for various applications in e-Commerce. State-of-the-art approaches for knowledge extraction were each designed for a single category of product, and thus do not apply to real-life e-Commerce scenarios, which often contain thousands of diverse categories. This paper proposes TXtract, a taxonomy-aware knowledge extraction model that applies to thousands of product categories organized in a hierarchical taxonomy. Through category conditional self-attention and multi-task learning, our approach is both scalable, as it trains a single model for thousands of categories, and effective, as it extracts category-specific attribute values. Experiments on products from a taxonomy with 4,000 categories show that TXtract outperforms state-of-the-art approaches by up to 10% in F1 and 15% in coverage across all categories. 2020.acl-main.751 @@ -10101,7 +10101,7 @@ <fixed-case>T</fixed-case>rigger<fixed-case>NER</fixed-case>: Learning with Entity Triggers as Explanations for Named Entity Recognition - Bill YuchenLin + Bill YuchenLin Dong-HoLee MingShen RyanMoreno @@ -10117,7 +10117,7 @@ Addressing Posterior Collapse with Mutual Information for Improved Variational Neural Machine Translation - Arya D.McCarthy + Arya D.McCarthy XianLi JiataoGu NingDong @@ -10144,8 +10144,8 @@ Evaluating Robustness to Input Perturbations for Neural Machine Translation XingNiu PrashantMathur - GeorgianaDinu - YaserAl-Onaizan + GeorgianaDinu + YaserAl-Onaizan 8538–8544 Neural Machine Translation (NMT) models are sensitive to small perturbations in the input. Robustness to such perturbations is typically measured using translation quality metrics such as BLEU on the noisy input. This paper proposes additional metrics which measure the relative degradation and changes in translation when small perturbations are added to the input. We focus on a class of models employing subword regularization to address robustness and perform extensive evaluations of these models using the robustness measures proposed. Results show that our proposed metrics reveal a clear trend of improved robustness to perturbations when subword regularization methods are used. 2020.acl-main.755 @@ -10195,7 +10195,7 @@ Automated Topical Component Extraction Using Neural Network Attention Scores from Source-based Essay Scoring HaoranZhang - DianeLitman + DianeLitman 8569–8584 While automated essay scoring (AES) can reliably grade essays at scale, automated writing evaluation (AWE) additionally provides formative feedback to guide essay revision. However, a neural AES typically does not provide useful feature representations for supporting AWE. This paper presents a method for linking AWE and neural AES, by extracting Topical Components (TCs) representing evidence from a source text using the intermediate output of attention layers. We evaluate performance using a feature-based AES requiring TCs. Results show that performance is comparable whether using automatically or manually constructed TCs for 1) representing essays as rubric-based features, 2) grading essays. 2020.acl-main.759 @@ -10222,7 +10222,7 @@ TariqAlhindi SiddharthVaria KristeKrstovski - MonaDiab + MonaDiab SmarandaMuresan 8593–8606 The increased focus on misinformation has spurred development of data and systems for detecting the veracity of a claim as well as retrieving authoritative evidence. The Fact Extraction and VERification (FEVER) dataset provides such a resource for evaluating endto- end fact-checking, requiring retrieval of evidence from Wikipedia to validate a veracity prediction. We show that current systems for FEVER are vulnerable to three categories of realistic challenges for fact-checking – multiple propositions, temporal reasoning, and ambiguity and lexical variation – and introduce a resource with these types of claims. Then we present a system designed to be resilient to these “attacks” using multiple pointer networks for document selection and jointly modeling a sequence of evidence sentences and veracity relation predictions. We find that in handling these attacks we obtain state-of-the-art results on FEVER, largely due to improved evidence retrieval. @@ -10254,7 +10254,7 @@ RandaElanwar PrakashIshwar MargritBetke - Derry TantiWijaya + Derry TantiWijaya 8614–8624 News framing refers to the practice in which aspects of specific issues are highlighted in the news to promote a particular interpretation. In NLP, although recent works have studied framing in English news, few have studied how the analysis can be extended to other languages and in a multi-label setting. In this work, we explore multilingual transfer learning to detect multiple frames from just the news headline in a genuinely low-resource context where there are few/no frame annotations in the target language. We propose a novel method that can leverage elementary resources consisting of a dictionary and few annotations to detect frames in the target language. Our method performs comparably or better than translating the entire target language headline to the source language for which we have annotated data. This work opens up an exciting new capability of scaling up frame analysis to many languages, even those without existing translation technologies. Lastly, we apply our method to detect frames on the issue of U.S. gun violence in multiple languages and obtain exciting insights on the relationship between different frames of the same problem across different countries with different languages. 2020.acl-main.763 @@ -10281,7 +10281,7 @@ YutaoZhu RuihuaSong ZhichengDou - Jian-YunNie + Jian-YunNie JinZhou 8647–8657 It is appealing to have a system that generates a story or scripts automatically from a storyline, even though this is still out of our reach. In dialogue systems, it would also be useful to drive dialogues by a dialogue plan. In this paper, we address a key problem involved in these applications - guiding a dialogue by a narrative. The proposed model ScriptWriter selects the best response among the candidates that fit the context as well as the given narrative. It keeps track of what in the narrative has been said and what is to be said. A narrative plays a different role than the context (i.e., previous utterances), which is generally used in current dialogue systems. Due to the unavailability of data for this new application, we construct a new large-scale data collection GraphMovie from a movie website where end- users can upload their narratives freely when watching a movie. Experimental results on the dataset show that our proposed approach based on narratives significantly outperforms the baselines that simply use the narrative as a kind of context. @@ -10307,7 +10307,7 @@ SubhabrataMukherjee MarcelloHasegawa AhmedHassan Awadallah - RyenWhite + RyenWhite 8680–8689 Intelligent features in email service applications aim to increase productivity by helping people organize their folders, compose their emails and respond to pending tasks. In this work, we explore a new application, Smart-To-Do, that helps users with task management over emails. We introduce a new task and dataset for automatically generating To-Do items from emails where the sender has promised to perform an action. We design a two-stage process leveraging recent advances in neural text generation and sequence-to-sequence learning, obtaining BLEU and ROUGE scores of 0.23 and 0.63 for this task. To the best of our knowledge, this is the first work to address the problem of composing To-Do items from emails. 2020.acl-main.767 @@ -10333,7 +10333,7 @@ End-to-End Bias Mitigation by Modelling Biases in Corpora RabeehKarimi Mahabadi YonatanBelinkov - JamesHenderson + JamesHenderson 8706–8716 Several recent studies have shown that strong natural language understanding (NLU) models are prone to relying on unwanted dataset biases without learning the underlying task, resulting in models that fail to generalize to out-of-domain datasets and are likely to perform poorly in real-world scenarios. We propose two learning strategies to train neural models, which are more robust to such biases and transfer better to out-of-domain datasets. The biases are specified in terms of one or more bias-only models, which learn to leverage the dataset biases. During training, the bias-only models’ predictions are used to adjust the loss of the base model to reduce its reliance on biases by down-weighting the biased examples and focusing the training on the hard examples. We experiment on large-scale natural language inference and fact verification benchmarks, evaluating on out-of-domain datasets that are specifically designed to assess the robustness of models against known biases in the training data. Results show that our debiasing methods greatly improve robustness in all settings and better transfer to other textual entailment datasets. Our code and data are publicly available in https://github.com/rabeehk/robust-nli. 2020.acl-main.769 @@ -10346,7 +10346,7 @@ Mind the Trade-off: Debiasing <fixed-case>NLU</fixed-case> Models without Degrading the In-distribution Performance Prasetya AjieUtama - Nafise SadatMoosavi + Nafise SadatMoosavi IrynaGurevych 8717–8729 Models for natural language understanding (NLU) tasks often rely on the idiosyncratic biases of the dataset, which make them brittle against test cases outside the training distribution. Recently, several proposed debiasing methods are shown to be very effective in improving out-of-distribution performance. However, their improvements come at the expense of performance drop when models are evaluated on the in-distribution data, which contain examples with higher diversity. This seemingly inevitable trade-off may not tell us much about the changes in the reasoning and understanding capabilities of the resulting models on broader types of examples beyond the small subset represented in the out-of-distribution data. In this paper, we address this trade-off by introducing a novel debiasing method, called confidence regularization, which discourage models from exploiting biases while enabling them to receive enough incentive to learn from all the training examples. We evaluate our method on three NLU tasks and show that, in contrast to its predecessors, it improves the performance on out-of-distribution datasets (e.g., 7pp gain on HANS dataset) while maintaining the original in-distribution accuracy. @@ -10358,7 +10358,7 @@ <fixed-case>NILE</fixed-case> : Natural Language Inference with Faithful Natural Language Explanations SawanKumar - ParthaTalukdar + ParthaTalukdar 8730–8742 The recent growth in the popularity and success of deep learning models on NLP classification tasks has accompanied the need for generating some form of natural language explanation of the predicted labels. Such generated natural language (NL) explanations are expected to be faithful, i.e., they should correlate well with the model’s internal decision making. In this work, we focus on the task of natural language inference (NLI) and address the following question: can we build NLI systems which produce labels with high accuracy, while also generating faithful explanations of its decisions? We propose Natural-language Inference over Label-specific Explanations (NILE), a novel NLI method which utilizes auto-generated label-specific NL explanations to produce labels along with its faithful explanation. We demonstrate NILE’s effectiveness over previously reported methods through automated and human evaluation of the produced labels and explanations. Our evaluation of NILE also supports the claim that accurate systems capable of providing testable explanations of their decisions can be designed. We discuss the faithfulness of NILE’s explanations in terms of sensitivity of the decisions to the corresponding explanations. We argue that explicit evaluation of faithfulness, in addition to label and explanation accuracy, is an important step in evaluating model’s explanations. Further, we demonstrate that task-specific probes are necessary to establish such sensitivity. 2020.acl-main.771 @@ -10392,7 +10392,7 @@ Uncertain Natural Language Inference TongfeiChen - ZhengpingJiang + ZhengpingJiang AdamPoliak KeisukeSakaguchi BenjaminVan Durme @@ -10416,8 +10416,8 @@ Revisiting Higher-Order Dependency Parsers - ErickFonseca - André F. T.Martins + ErickFonseca + André F. T.Martins 8795–8800 Neural encoders have allowed dependency parsers to shift from higher-order structured models to simpler first-order ones, making decoding faster and still achieving better accuracy than non-neural parsers. This has led to a belief that neural encoders can implicitly encode structural constraints, such as siblings and grandparents in a tree. We tested this hypothesis and found that neural parsers may benefit from higher-order features, even when employing a powerful pre-trained encoder, such as BERT. While the gains of higher-order features are small in the presence of a powerful encoder, they are consistent for long-range dependencies and long sentences. In particular, higher-order models are more accurate on full sentence parses and on the exact match of modifier lists, indicating that they deal better with larger, more complex structures. 2020.acl-main.776 @@ -10589,8 +10589,8 @@ <fixed-case>T</fixed-case>rialstreamer: Mapping and Browsing Medical Evidence in Real-Time BenjaminNye AniNenkova - IainMarshall - Byron C.Wallace + IainMarshall + Byron C.Wallace 63–69 We introduce Trialstreamer, a living database of clinical trial reports. Here we mainly describe the evidence extraction component; this extracts from biomedical abstracts key pieces of information that clinicians need when appraising the literature, and also the relations between these. Specifically, the system extracts descriptions of trial participants, the treatments compared in each arm (the interventions), and which outcomes were measured. The system then attempts to infer which interventions were reported to work best by determining their relationship with identified trial outcome measures. In addition to summarizing individual trials, these extracted data elements allow automatic synthesis of results across many trials on the same topic. We apply the system at scale to all reports of randomized controlled trials indexed in MEDLINE, powering the automatic generation of evidence maps, which provide a global view of the efficacy of different interventions combining data from all relevant clinical trials on a topic. We make all code and models freely available alongside a demonstration of the web interface. 2020.acl-demos.9 @@ -10604,7 +10604,7 @@ JenniferHu EthanWilcox PengQian - RogerLevy + RogerLevy 70–76 Targeted syntactic evaluations have yielded insights into the generalizations learned by neural network language models. However, this line of research requires an uncommon confluence of skills: both the theoretical knowledge needed to design controlled psycholinguistic experiments, and the technical proficiency needed to train and deploy large-scale language models. We present SyntaxGym, an online platform designed to make targeted evaluations accessible to both experts in NLP and linguistics, reproducible across computing environments, and standardized following the norms of psycholinguistic experimental design. This paper releases two tools of independent value for the computational linguistics community: 1. A website, syntaxgym.org, which centralizes the process of targeted syntactic evaluation and provides easy tools for analysis and visualization; 2. Two command-line tools, ‘syntaxgym‘ and ‘lm-zoo‘, which allow any user to reproduce targeted syntactic evaluations and general language model inference on their own machine. 2020.acl-demos.10 @@ -10622,8 +10622,8 @@ BrianChen BoWu HengJi - Shih-FuChang - ClareVoss + Shih-FuChang + ClareVoss DanielNapierski MarjorieFreedman 77–86 @@ -10676,7 +10676,7 @@ YuhaoZhang YuhuiZhang JasonBolton - Christopher D.Manning + Christopher D.Manning 101–108 We introduce Stanza, an open-source Python natural language processing toolkit supporting 66 human languages. Compared to existing widely used toolkits, Stanza features a language-agnostic fully neural pipeline for text analysis, including tokenization, multi-word token expansion, lemmatization, part-of-speech and morphological feature tagging, dependency parsing, and named entity recognition. We have trained Stanza on a total of 112 datasets, including the Universal Dependencies treebanks and other multilingual corpora, and show that the same neural architecture generalizes well and achieves competitive performance on all languages tested. Additionally, Stanza includes a native Python interface to the widely used Java Stanford CoreNLP software, which further extends its functionality to cover other tasks such as coreference resolution and relation extraction. Source code, documentation, and pretrained models for 66 languages are available at https://stanfordnlp.github.io/stanza/. 2020.acl-demos.14 @@ -10693,7 +10693,7 @@ Phu MonHtut AlexWang IanTenney - Samuel R.Bowman + Samuel R.Bowman 109–117 We introduce jiant, an open source toolkit for conducting multitask and transfer learning experiments on English NLU tasks. jiant enables modular and configuration driven experimentation with state-of-the-art models and a broad set of tasks for probing, transfer learning, and multitask training experiments. jiant implements over 50 NLU tasks, including all GLUE and SuperGLUE benchmark tasks. We demonstrate that jiant reproduces published performance on a variety of tasks and models, e.g., RoBERTa and BERT. 2020.acl-demos.15 @@ -10726,7 +10726,7 @@ Chung-TingTsai Jhih-JieChen Ching-YuYang - Jason S.Chang + Jason S.Chang 127–133 This paper presents LinggleWrite, a writing coach that provides writing suggestions, assesses writing proficiency levels, detects grammatical errors, and offers corrective feedback in response to user’s essay. The method involves extracting grammar patterns, training models for automated essay scoring (AES) and grammatical error detection (GED), and finally retrieving plausible corrections from a n-gram search engine. Experiments on public test sets indicate that both AES and GED models achieve state-of-the-art performance. These results show that LinggleWrite is potentially useful in helping learners improve their writing skills. 2020.acl-demos.17 @@ -10769,7 +10769,7 @@ <fixed-case>O</fixed-case>pus<fixed-case>F</fixed-case>ilter: A Configurable Parallel Corpus Filtering Toolbox MikkoAulamo SamiVirpioja - JörgTiedemann + JörgTiedemann 150–156 This paper introduces OpusFilter, a flexible and modular toolbox for filtering parallel corpora. It implements a number of components based on heuristic filters, language identification libraries, character-based language models, and word alignment tools, and it can easily be extended with custom filters. Bitext segments can be ranked according to their quality or domain match using single features or a logistic regression model that can be trained without manually labeled training data. We demonstrate the effectiveness of OpusFilter on the example of a Finnish-English news translation task based on noisy web-crawled training data. Applying our tool leads to improved translation quality while significantly reducing the size of the training data, also clearly outperforming an alternative ranking given in the crawled data set. Furthermore, we show the ability of OpusFilter to perform data selection for domain adaptation. 2020.acl-demos.20 @@ -10820,7 +10820,7 @@ <fixed-case>P</fixed-case>hoton: A Robust Cross-Domain Text-to-<fixed-case>SQL</fixed-case> System JichuanZeng - Xi VictoriaLin + Xi VictoriaLin Steven C.H.Hoi RichardSocher CaimingXiong @@ -10836,7 +10836,7 @@ Interactive Task Learning from <fixed-case>GUI</fixed-case>-Grounded Natural Language Instructions and Demonstrations Toby Jia-JunLi - TomMitchell + TomMitchell Brad A.Myers 215–223 We show SUGILITE, an intelligent task automation agent that can learn new tasks and relevant associated concepts interactively from the user’s natural language instructions and demonstrations, using the graphical user interfaces (GUIs) of third-party mobile apps. This system provides several interesting features: (1) it allows users to teach new task procedures and concepts through verbal instructions together with demonstration of the steps of a script using GUIs; (2) it supports users in clarifying their intents for demonstrated actions using GUI-grounded verbal instructions; (3) it infers parameters of tasks and their possible values in utterances using the hierarchical structures of the underlying app GUIs; and (4) it generalizes taught concepts to different contexts and task domains. We describe the architecture of the SUGILITE system, explain the design and implementation of its key features, and show a prototype in the form of a conversational assistant on Android. @@ -10849,7 +10849,7 @@ <fixed-case>M</fixed-case>ixing<fixed-case>B</fixed-case>oard: a Knowledgeable Stylized Integrated Text Generation Platform XiangGao MichelGalley - BillDolan + BillDolan 224–231 We present MixingBoard, a platform for quickly building demos with a focus on knowledge grounded stylized text generation. We unify existing text generation algorithms in a shared codebase and further adapt earlier algorithms for constrained generation. To borrow advantages from different models, we implement strategies for cross-model integration, from the token probability level to the latent space level. An interface to external knowledge is provided via a module that retrieves, on-the-fly, relevant knowledge from passages on the web or a document collection. A user interface for local development, remote webpage access, and a RESTful API are provided to make it simple for users to build their own demos. 2020.acl-demos.26 @@ -10859,7 +10859,7 @@ <fixed-case>NLP</fixed-case> Scholar: An Interactive Visual Explorer for Natural Language Processing Literature - Saif M.Mohammad + Saif M.Mohammad 232–255 As part of the NLP Scholar project, we created a single unified dataset of NLP papers and their meta-information (including citation numbers), by extracting and aligning information from the ACL Anthology and Google Scholar. In this paper, we describe several interconnected interactive visualizations (dashboards) that present various aspects of the data. Clicking on an item within a visualization or entering query terms in the search boxes filters the data in all visualizations in the dashboard. This allows users to search for papers in the area of their interest, published within specific time periods, published by specified authors, etc. The interactive visualizations presented here, and the associated dataset of papers mapped to citations, have additional uses as well including understanding how the field is growing (both overall and across sub-areas), as well as quantifying the impact of different types of papers on subsequent publications. 2020.acl-demos.27 @@ -10882,7 +10882,7 @@ <fixed-case>U</fixed-case>snea: An Authorship Tool for Interactive Fiction using Retrieval Based Semantic Parsing - BenSwanson + BenSwanson BorisSmus 263–269 The reader of a choose your own adventure novel and the user of a modern virtual assistant have a subtle similarity; both may, through the right lens, be viewed as engaging with a work of Interactive Fiction. This literary form emerged in the 1970s and has grown like a vine along the branch of modern technology, one guided by the advances of the other. In this work we weave together threads from the Interactive Fiction community and neural semantic parsing for dialog systems, defining the data model and necessary algorithms for a novel type of Interactive Fiction and open sourcing its accompanying authoring tool. Specifically, our work integrates retrieval based semantic parsing predicates into the branching story structures well known to the Interactive Fiction community, relaxing the relatively strict lexical options of preexisting systems. @@ -10901,7 +10901,7 @@ XiangGao JianfengGao JingjingLiu - BillDolan + BillDolan 270–278 We present a large, tunable neural conversational response generation model, DIALOGPT (dialogue generative pre-trained transformer). Trained on 147M conversation-like exchanges extracted from Reddit comment chains over a period spanning from 2005 through 2017, DialoGPT extends the Hugging Face PyTorch transformer to attain a performance close to human both in terms of automatic and human evaluation in single-turn dialogue settings. We show that conversational systems that leverage DialoGPT generate more relevant, contentful and context-consistent responses than strong baseline systems. The pre-trained model and training pipeline are publicly released to facilitate research into neural response generation and the development of more intelligent open-domain dialogue systems. 2020.acl-demos.30 @@ -10937,7 +10937,7 @@ YifanZhang SeunghakYu AlbertoBarrón-Cedeño - PreslavNakov + PreslavNakov 287–293 Recent events, such as the 2016 US Presidential Campaign, Brexit and the COVID-19 “infodemic”, have brought into the spotlight the dangers of online disinformation. There has been a lot of research focusing on fact-checking and disinformation detection. However, little attention has been paid to the specific rhetorical and psychological techniques used to convey propaganda messages. Revealing the use of such techniques can help promote media literacy and critical thinking, and eventually contribute to limiting the impact of “fake news” and disinformation campaigns. Prta (Propaganda Persuasion Techniques Analyzer) allows users to explore the articles crawled on a regular basis by highlighting the spans in which propaganda techniques occur and to compare them on the basis of their use of propaganda techniques. The system further reports statistics about the use of such techniques, overall and over time, or according to filtering criteria specified by the user based on time interval, keywords, and/or political orientation of the media. Moreover, it allows users to analyze any text or URL through a dedicated interface or via an API. The system is available online: https://www.tanbih.org/prta. Honorable Mention for Best Demonstration Paper @@ -10983,7 +10983,7 @@ <fixed-case>P</fixed-case>enman: An Open-Source Library and Tool for <fixed-case>AMR</fixed-case> Graphs - Michael WayneGoodman + Michael WayneGoodman 312–319 Abstract Meaning Representation (AMR) (Banarescu et al., 2013) is a framework for semantic dependencies that encodes its rooted and directed acyclic graphs in a format called PENMAN notation. The format is simple enough that users of AMR data often write small scripts or libraries for parsing it into an internal graph representation, but there is enough complexity that these users could benefit from a more sophisticated and well-tested solution. The open-source Python library Penman provides a robust parser, functions for graph inspection and manipulation, and functions for formatting graphs into PENMAN notation. Many functions are also available in a command-line tool, thus extending its utility to non-Python setups. 2020.acl-demos.35 @@ -10995,7 +10995,7 @@ Embedding-based Scientific Literature Discovery in a Text Editor Application OnurGökçe JonathanPrada - Nikola I.Nikolov + Nikola I.Nikolov NianlongGu Richard H.R.Hahnloser 320–326 @@ -11014,7 +11014,7 @@ MahsaMonshizadeh VladislavHnatovskiy AntonioKrüger - Josefvan Genabith + Josefvan Genabith 327–334 The shift from traditional translation to post-editing (PE) of machine-translated (MT) text can save time and reduce errors, but it also affects the design of translation interfaces, as the task changes from mainly generating text to correcting errors within otherwise helpful translation proposals. Since this paradigm shift offers potential for modalities other than mouse and keyboard, we present MMPE, the first prototype to combine traditional input modes with pen, touch, and speech modalities for PE of MT. Users can directly cross out or hand-write new text, drag and drop words for reordering, or use spoken commands to update the text in place. All text manipulations are logged in an easily interpretable format to simplify subsequent translation process research. The results of an evaluation with professional translators suggest that pen and touch interaction are suitable for deletion and reordering tasks, while speech and multi-modal combinations of select & speech are considered suitable for replacements and insertions. Overall, experiment participants were enthusiastic about the new modalities and saw them as useful extensions to mouse & keyboard, but not as a complete substitute. 2020.acl-demos.37 @@ -11024,7 +11024,7 @@ Torch-Struct: Deep Structured Prediction Library - AlexanderRush + AlexanderRush 335–342 The literature on structured prediction for NLP describes a rich collection of distributions and algorithms over sequences, segmentations, alignments, and trees; however, these algorithms are difficult to utilize in deep learning frameworks. We introduce Torch-Struct, a library for structured prediction designed to take advantage of and integrate with vectorized, auto-differentiation based frameworks. Torch-Struct includes a broad collection of probabilistic structures accessed through a simple and flexible distribution-based API that connects to any deep learning model. The library utilizes batched, vectorized operations and exploits auto-differentiation to produce readable, fast, and testable code. Internally, we also include a number of general-purpose optimizations to provide cross-algorithm efficiency. Experiments show significant performance gains over fast baselines and case-studies demonstrate the benefits of the library. Torch-Struct is available at https://github.com/harvardnlp/pytorch-struct. 2020.acl-demos.38 @@ -11070,7 +11070,7 @@ <fixed-case>SUPP</fixed-case>.<fixed-case>AI</fixed-case>: finding evidence for supplement-drug interactions - Lucy LuWang + Lucy LuWang OyvindTafjord ArmanCohan SarthakJain @@ -11089,7 +11089,7 @@ <fixed-case>LEAN</fixed-case>-<fixed-case>LIFE</fixed-case>: A Label-Efficient Annotation Framework Towards Learning from Explanation Dong-HoLee RahulKhanna - Bill YuchenLin + Bill YuchenLin SeyeonLee QinyuanYe ElizabethBoschee @@ -11106,7 +11106,7 @@ What’s The Latest? A Question-driven News Chatbot PhilippeLaban JohnCanny - Marti A.Hearst + Marti A.Hearst 380–387 This work describes an automatic news chatbot that draws content from a diverse set of news articles and creates conversations with a user about the news. Key components of the system include the automatic organization of news articles into topical chatrooms, integration of automatically generated questions into the conversation, and a novel method for choosing which questions to present which avoids repetitive suggestions. We describe the algorithmic framework and present the results of a usability study that shows that news readers using the system successfully engage in multi-turn conversations about specific news stories. 2020.acl-demos.43 @@ -11158,7 +11158,7 @@ SharanPai NikhilSachdeva PrinceSachdeva - Rajiv RatnShah + Rajiv RatnShah 13–19 Aphasia is a speech and language disorder which results from brain damage, often characterized by word retrieval deficit (anomia) resulting in naming errors (paraphasia). Automatic paraphasia detection has many benefits for both treatment and diagnosis of Aphasia and its type. But supervised learning methods cant be properly utilized as there is a lack of aphasic speech data. In this paper, we describe our novel unsupervised method which can be implemented without the need for labeled paraphasia data. Our evaluations show that our method outperforms previous work based on supervised learning and transfer learning approaches for English. We demonstrate the utility of our method as an essential first step in developing augmentative and alternative communication (AAC) devices for patients suffering from aphasia in any language. 2020.acl-srw.3 @@ -11239,8 +11239,8 @@ Combining Subword Representations into Word-level Representations in the Transformer Architecture NoeCasas - Marta R.Costa-jussà - José A. R.Fonollosa + Marta R.Costa-jussà + José A. R.Fonollosa 66–71 In Neural Machine Translation, using word-level tokens leads to degradation in translation quality. The dominant approaches use subword-level tokens, but this increases the length of the sequences and makes it difficult to profit from word-level information such as POS tags or semantic dependencies. We propose a modification to the Transformer model to combine subword-level representations into word-level ones in the first layers of the encoder, reducing the effective length of the sequences in the following layers and providing a natural point to incorporate extra word-level information. Our experiments show that this approach maintains the translation quality with respect to the normal Transformer model when no extra word-level information is injected and that it is superior to the currently dominant method for incorporating word-level source language information to models based on subword-level vocabularies. 2020.acl-srw.10 @@ -11274,7 +11274,7 @@ <fixed-case>SCAR</fixed-case>: Sentence Compression using Autoencoders for Reconstruction ChanakyaMalireddy TirthManiar - ManishShrivastava + ManishShrivastava 88–94 Sentence compression is the task of shortening a sentence while retaining its meaning. Most methods proposed for this task rely on labeled or paired corpora (containing pairs of verbose and compressed sentences), which is often expensive to collect. To overcome this limitation, we present a novel unsupervised deep learning framework (SCAR) for deletion-based sentence compression. SCAR is primarily composed of two encoder-decoder pairs: a compressor and a reconstructor. The compressor masks the input, and the reconstructor tries to regenerate it. The model is entirely trained on unlabeled data and does not require additional inputs such as explicit syntactic information or optimal compression length. SCAR’s merit lies in the novel Linkage Loss function, which correlates the compressor and its effect on reconstruction, guiding it to drop inferable tokens. SCAR achieves higher ROUGE scores on benchmark datasets than the existing state-of-the-art methods and baselines. We also conduct a user study to demonstrate the application of our model as a text highlighting system. Using our model to underscore salient information facilitates speed-reading and reduces the time required to skim a document. 2020.acl-srw.13 @@ -11344,8 +11344,8 @@ A Simple and Effective Dependency Parser for <fixed-case>T</fixed-case>elugu SnehaNallani - ManishShrivastava - DiptiSharma + ManishShrivastava + DiptiSharma 143–149 We present a simple and effective dependency parser for Telugu, a morphologically rich, free word order language. We propose to replace the rich linguistic feature templates used in the past approaches with a minimal feature function using contextual vector representations. We train a BERT model on the Telugu Wikipedia data and use vector representations from this model to train the parser. Each sentence token is associated with a vector representing the token in the context of that sentence and the feature vectors are constructed by concatenating two token representations from the stack and one from the buffer. We put the feature representations through a feedforward network and train with a greedy transition based approach. The resulting parser has a very simple architecture with minimal feature engineering and achieves state-of-the-art results for Telugu. 2020.acl-srw.19 @@ -11356,9 +11356,9 @@ Pointwise Paraphrase Appraisal is Potentially Problematic - HannahChen + HannahChen YangfengJi - DavidEvans + DavidEvans 150–155 The prevailing approach for training and evaluating paraphrase identification models is constructed as a binary classification problem: the model is given a pair of sentences, and is judged by how accurately it classifies pairs as either paraphrases or non-paraphrases. This pointwise-based evaluation method does not match well the objective of most real world applications, so the goal of our work is to understand how models which perform well under pointwise evaluation may fail in practice and find better methods for evaluating paraphrase identification models. As a first step towards that goal, we show that although the standard way of fine-tuning BERT for paraphrase identification by pairing two sentences as one sequence results in a model with state-of-the-art performance, that model may perform poorly on simple tasks like identifying pairs with two identical sentences. Moreover, we show that these models may even predict a pair of randomly-selected sentences with higher paraphrase score than a pair of identical ones. 2020.acl-srw.20 @@ -11370,7 +11370,7 @@ Efficient Neural Machine Translation for Low-Resource Languages via Exploiting Related Languages VikrantGoyal SouravKumar - Dipti MisraSharma + Dipti MisraSharma 162–168 A large percentage of the world’s population speaks a language of the Indian subcontinent, comprising languages from both Indo-Aryan (e.g. Hindi, Punjabi, Gujarati, etc.) and Dravidian (e.g. Tamil, Telugu, Malayalam, etc.) families. A universal characteristic of Indian languages is their complex morphology, which, when combined with the general lack of sufficient quantities of high-quality parallel data, can make developing machine translation (MT) systems for these languages difficult. Neural Machine Translation (NMT) is a rapidly advancing MT paradigm and has shown promising results for many language pairs, especially in large training data scenarios. Since the condition of large parallel corpora is not met for Indian-English language pairs, we present our efforts towards building efficient NMT systems between Indian languages (specifically Indo-Aryan languages) and English via efficiently exploiting parallel data from the related languages. We propose a technique called Unified Transliteration and Subword Segmentation to leverage language similarity while exploiting parallel data from related language pairs. We also propose a Multilingual Transfer Learning technique to leverage parallel data from multiple related languages to assist translation for low resource language pair of interest. Our experiments demonstrate an overall average improvement of 5 BLEU points over the standard Transformer-based NMT baselines. 2020.acl-srw.22 @@ -11383,7 +11383,7 @@ Exploring Interpretability in Event Extraction: Multitask Learning of a Neural Event Classifier and an Explanation Decoder ZhengTang - GusHahn-Powell + GusHahn-Powell MihaiSurdeanu 169–175 We propose an interpretable approach for event extraction that mitigates the tension between generalization and interpretability by jointly training for the two goals. Our approach uses an encoder-decoder architecture, which jointly trains a classifier for event extraction, and a rule decoder that generates syntactico-semantic rules that explain the decisions of the event classifier. We evaluate the proposed approach on three biomedical events and show that the decoder generates interpretable rules that serve as accurate explanations for the event classifier’s decisions, and, importantly, that the joint training generally improves the performance of the event classifier. Lastly, we show that our approach can be used for semi-supervised learning, and that its performance improves when trained on automatically-labeled data generated by a rule-based system. @@ -11528,9 +11528,9 @@ Unsupervised Multilingual Sentence Embeddings for Parallel Corpus Mining IvanaKvapilíková MikelArtetxe - GorkaLabaka - EnekoAgirre - OndřejBojar + GorkaLabaka + EnekoAgirre + OndřejBojar 255–262 Existing models of multilingual sentence embeddings require large parallel data resources which are not available for low-resource languages. We propose a novel unsupervised method to derive multilingual sentence embeddings relying only on monolingual data. We first produce a synthetic parallel corpus using unsupervised machine translation, and use it to fine-tune a pretrained cross-lingual masked language model (XLM) to derive the multilingual sentence representations. The quality of the representations is evaluated on two parallel corpus mining tasks with improvements of up to 22 F1 points over vanilla XLM. In addition, we observe that a single synthetic bilingual corpus is able to improve results for other language pairs. 2020.acl-srw.34 @@ -11554,7 +11554,7 @@ Enhancing Word Embeddings with Knowledge Extracted from Lexical Resources MagdalenaBiesialska BardiaRafieian - Marta R.Costa-jussà + Marta R.Costa-jussà 271–278 In this work, we present an effective method for semantic specialization of word vector representations. To this end, we use traditional word embeddings and apply specialization methods to better capture semantic relations between words. In our approach, we leverage external knowledge from rich lexical resources such as BabelNet. We also show that our proposed post-specialization method based on an adversarial neural network with the Wasserstein distance allows to gain improvements over state-of-the-art methods on two tasks: word similarity and dialog state tracking. 2020.acl-srw.36 @@ -11569,7 +11569,7 @@ ZhuoyuanMao FeiCheng SadaoKurohashi - EiichiroSumita + EiichiroSumita 279–285 Sequence-to-sequence (S2S) pre-training using large monolingual data is known to improve performance for various S2S NLP tasks. However, large monolingual corpora might not always be available for the languages of interest (LOI). Thus, we propose to exploit monolingual corpora of other languages to complement the scarcity of monolingual corpora for the LOI. We utilize script mapping (Chinese to Japanese) to increase the similarity (number of cognates) between the monolingual corpora of helping languages and LOI. An empirical case study of low-resource Japanese-English neural machine translation (NMT) reveals that leveraging large Chinese and French monolingual corpora can help overcome the shortage of Japanese and English monolingual corpora, respectively, for S2S pre-training. Using only Chinese and French monolingual corpora, we were able to improve Japanese-English translation quality by up to 8.5 BLEU in low-resource scenarios. 2020.acl-srw.37 @@ -11581,7 +11581,7 @@ Checkpoint Reranking: An Approach to Select Better Hypothesis for Neural Machine Translation Systems VinayPandramish - Dipti MisraSharma + Dipti MisraSharma 286–291 In this paper, we propose a method of re-ranking the outputs of Neural Machine Translation (NMT) systems. After the decoding process, we select a few last iteration outputs in the training process as the N-best list. After training a Neural Machine Translation (NMT) baseline system, it has been observed that these iteration outputs have an oracle score higher than baseline up to 1.01 BLEU points compared to the last iteration of the trained system.We come up with a ranking mechanism by solely focusing on the decoder’s ability to generate distinct tokens and without the usage of any language model or data. With this method, we achieved a translation improvement up to +0.16 BLEU points over baseline.We also evaluate our approach by applying the coverage penalty to the training process.In cases of moderate coverage penalty, the oracle scores are higher than the final iteration up to +0.99 BLEU points, and our algorithm gives an improvement up to +0.17 BLEU points.With excessive penalty, there is a decrease in translation quality compared to the baseline system. Still, an increase in oracle scores up to +1.30 is observed with the re-ranking algorithm giving an improvement up to +0.15 BLEU points is found in case of excessive penalty.The proposed re-ranking method is a generic one and can be extended to other language pairs as well. 2020.acl-srw.38 @@ -11616,7 +11616,7 @@ Exploring the Role of Context to Distinguish Rhetorical and Information-Seeking Questions YuanZhuang - EllenRiloff + EllenRiloff 306–312 Social media posts often contain questions, but many of the questions are rhetorical and do not seek information. Our work studies the problem of distinguishing rhetorical and information-seeking questions on Twitter. Most work has focused on features of the question itself, but we hypothesize that the prior context plays a role too. This paper introduces a new dataset containing questions in tweets paired with their prior tweets to provide context. We create classification models to assess the difficulty of distinguishing rhetorical and information-seeking questions, and experiment with different properties of the prior context. Our results show that the prior tweet and topic features can improve performance on this task. 2020.acl-srw.41 @@ -11679,7 +11679,7 @@ Integrating Ethics into the <fixed-case>NLP</fixed-case> Curriculum - Emily M.Bender + Emily M.Bender DirkHovy AlexandraSchofield 6–9 @@ -11700,10 +11700,10 @@ Reviewing Natural Language Processing Research - KevinCohen + KevinCohen KarënFort MargotMieskes - AurélieNévéol + AurélieNévéol 16–18 This tutorial will cover the theory and practice of reviewing research in natural language processing. Heavy reviewing burdens on natural language processing researchers have made it clear that our community needs to increase the size of our pool of potential reviewers. Simultaneously, notable “false negatives”—rejection by our conferences of work that was later shown to be tremendously important after acceptance by other conferences—have raised awareness of the fact that our reviewing practices leave something to be desired. We do not often talk about “false positives” with respect to conference papers, but leaders in the field have noted that we seem to have a publication bias towards papers that report high performance, with perhaps not much else of interest in them. It need not be this way. Reviewing is a learnable skill, and you will learn it here via lectures and a considerable amount of hands-on practice. 2020.acl-tutorials.4 @@ -11722,7 +11722,7 @@ Multi-modal Information Extraction from Text, Semi-structured, and Tabular Data on the Web - Xin LunaDong + Xin LunaDong HannanehHajishirzi ColinLockard PrashantShiralkar @@ -11748,7 +11748,7 @@ Open-Domain Question Answering DanqiChen - Wen-tauYih + Wen-tauYih 34–37 This tutorial provides a comprehensive and coherent overview of cutting-edge research in open-domain question answering (QA), the task of answering questions using a large collection of documents of diversified topics. We will start by first giving a brief historical background, discussing the basic setup and core technical challenges of the research problem, and then describe modern datasets with the common evaluation metrics and benchmarks. The focus will then shift to cutting-edge models proposed for open-domain QA, including two-stage retriever-reader approaches, dense retriever and end-to-end training, and retriever-free methods. Finally, we will cover some hybrid approaches using both text and large knowledge bases and conclude the tutorial with important open questions. We hope that the tutorial will not only help the audience to acquire up-to-date knowledge but also provide new perspectives to stimulate the advances of open-domain QA research in the next phase. 2020.acl-tutorials.8 diff --git a/data/xml/2020.aespen.xml b/data/xml/2020.aespen.xml index 37b7b4bc59..72d0fbd342 100644 --- a/data/xml/2020.aespen.xml +++ b/data/xml/2020.aespen.xml @@ -3,10 +3,10 @@ Proceedings of the Workshop on Automated Extraction of Socio-political Events from News 2020 - AliHürriyetoğlu + AliHürriyetoğlu ErdemYörük VanniZavarella - HristoTanev + HristoTanev European Language Resources Association (ELRA)
Marseille, France
May @@ -54,7 +54,7 @@ Analyzing <fixed-case>ELM</fixed-case>o and <fixed-case>D</fixed-case>istil<fixed-case>BERT</fixed-case> on Socio-political News Classification BerfuBüyüköz AliHürriyetoğlu - ArzucanÖzgür + ArzucanÖzgür 9–18 This study evaluates the robustness of two state-of-the-art deep contextual language representations, ELMo and DistilBERT, on supervised learning of binary protest news classification (PC) and sentiment analysis (SA) of product reviews. A ”cross-context” setting is enabled using test sets that are distinct from the training data. The models are fine-tuned and fed into a Feed-Forward Neural Network (FFNN) and a Bidirectional Long Short Term Memory network (BiLSTM). Multinomial Naive Bayes (MNB) and Linear Support Vector Machine (LSVM) are used as traditional baselines. The results suggest that DistilBERT can transfer generic semantic knowledge to other domains better than ELMo. DistilBERT is also 30% smaller and 83% faster than ELMo, which suggests superiority for smaller computational training budgets. When generalization is not the utmost preference and test domain is similar to the training domain, the traditional machine learning (ML) algorithms can still be considered as more economic alternatives to deep language representations. 2020.aespen-1.4 @@ -121,7 +121,7 @@ Protest Event Analysis: A Longitudinal Analysis for <fixed-case>G</fixed-case>reece KonstantinaPapanikolaou - HarisPapageorgiou + HarisPapageorgiou 57–62 The advent of Big Data has shifted social science research towards computational methods. The volume of data that is nowadays available has brought a radical change in traditional approaches due to the cost and effort needed for processing. Knowledge extraction from heterogeneous and ample data is not an easy task to tackle. Thus, interdisciplinary approaches are necessary, combining experts of both social and computer science. This paper aims to present a work in the context of protest analysis, which falls into the scope of Computational Social Science. More specifically, the contribution of this work is to describe a Computational Social Science methodology for Event Analysis. The presented methodology is generic in the sense that it can be applied in every event typology and moreover, it is innovative and suitable for interdisciplinary tasks as it incorporates the human-in-the-loop. Additionally, a case study is presented concerning Protest Analysis in Greece over the last two decades. The conceptual foundation lies mainly upon claims analysis, and newspaper data were used in order to map, document and discuss protests in Greece in a longitudinal perspective. 2020.aespen-1.10 diff --git a/data/xml/2020.alta.xml b/data/xml/2020.alta.xml index 20faf76de7..da733c09f5 100644 --- a/data/xml/2020.alta.xml +++ b/data/xml/2020.alta.xml @@ -4,7 +4,7 @@ Proceedings of the 18th Annual Workshop of the Australasian Language Technology Association MariaKim - DanielBeck + DanielBeck MeladelMistica Australasian Language Technology Association
Virtual Workshop
@@ -20,7 +20,7 @@ Domain Adaptative Causality Encoder FarhadMoghimifar - GholamrezaHaffari + GholamrezaHaffari MahsaBaktashmotlagh 1–10 Automated discovery of causal relationships from text is a challenging task. Current approaches which are mainly based on the extraction of low-level relations among individual events are limited by the shortage of publicly available labelled data. Therefore, the resulting models perform poorly when applied to a distributionally different domain for which labelled data did not exist at the time of training. To overcome this limitation, in this paper, we leverage the characteristics of dependency trees and adversarial learning to address the tasks of adaptive causality identification and localisation. The term adaptive is used since the training and test data come from two distributionally different datasets, which to the best of our knowledge, this work is the first to address. Moreover, we present a new causality dataset, namely MedCaus, which integrates all types of causality in the text. Our experiments on four different benchmark causality datasets demonstrate the superiority of our approach over the existing baselines, by up to 7% improvement, on the tasks of identification and localisation of the causal relations from the text. @@ -113,7 +113,7 @@ XiangjueDong Mohammed AliAl-Garadi AbeedSarker - CecileParis + CecileParis Diego MolláAliod 86–91 Free text data from social media is now widely used in natural language processing research, and one of the most common machine learning tasks performed on this data is classification. Generally speaking, performances of supervised classification algorithms on social media datasets are lower than those on texts from other sources, but recently-proposed transformer-based models have considerably improved upon legacy state-of-the-art systems. Currently, there is no study that compares the performances of different variants of transformer-based models on a wide range of social media text classification datasets. In this paper, we benchmark the performances of transformer-based pre-trained models on 25 social media text classification datasets, 6 of which are health-related. We compare three pre-trained language models, RoBERTa-base, BERTweet and ClinicalBioBERT in terms of classification accuracy. Our experiments show that RoBERTa-base and BERTweet perform comparably on most datasets, and considerably better than ClinicalBioBERT, even on health-related datasets. @@ -140,7 +140,7 @@ Rohit KumarGupta SaketKhandelwal JeanniePaterson - TimothyBaldwin + TimothyBaldwin DanielBeck 98–103 ‘Common Law’ judicial systems follow the doctrine of precedent, which means the legal principles articulated in court judgements are binding in subsequent cases in lower courts. For this reason, lawyers must search prior judgements for the legal principles that are relevant to their case. The difficulty for those within the legal profession is that the information that they are looking for may be contained within a few paragraphs or sentences, but those few paragraphs may be buried within a hundred-page document. In this study, we create a schema based on the relevant information that legal professionals seek within judgements and perform text classification based on it, with the aim of not only assisting lawyers in researching cases, but eventually enabling large-scale analysis of legal judgements to find trends in court outcomes over time. @@ -151,7 +151,7 @@ Convolutional and Recurrent Neural Networks for Spoken Emotion Recognition AaronKeesing IanWatson - MichaelWitbrock + MichaelWitbrock 104–109 We test four models proposed in the speech emotion recognition (SER) literature on 15 public and academic licensed datasets in speaker-independent cross-validation. Results indicate differences in the performance of the models which is partly dependent on the dataset and features used. We also show that a standard utterance-level feature set still performs competitively with neural models on some datasets. This work serves as a starting point for future model comparisons, in addition to open-sourcing the testing code. 2020.alta-1.13 @@ -160,8 +160,8 @@ Popularity Prediction of Online Petitions using a Multimodal <fixed-case>D</fixed-case>eep<fixed-case>R</fixed-case>egression Model KotaroKitayama - ShivashankarSubramanian - TimothyBaldwin + ShivashankarSubramanian + TimothyBaldwin 110–114 Online petitions offer a mechanism for peopleto initiate a request for change and gather sup-port from others to demonstrate support for thecause. In this work, we model the task of peti-tion popularity using both text and image rep-resentations across four different languages,and including petition metadata. We evaluateour proposed approach using a dataset of 75kpetitions from Avaaz.org, and find strong com-plementarity between text and images. 2020.alta-1.14 @@ -188,7 +188,7 @@ Overview of the 2020 <fixed-case>ALTA</fixed-case> Shared Task: Assess Human Behaviour - DiegoMollá + DiegoMollá 127–130 The 2020 ALTA shared task is the 11th in stance of a series of shared tasks organised by ALTA since 2010. The task is to classify texts posted in social media according to human judgements expressed in them. The data used for this task is a subset of SemEval 2018 AIT DISC, which has been annotated by domain experts for this task. In this paper we introduce the task, describe the data and present the results of participating systems. 2020.alta-1.17 @@ -197,7 +197,7 @@ Automatically Predicting Judgement Dimensions of Human Behaviour Segun TaofeekAroyehun - AlexanderGelbukh + AlexanderGelbukh 131–134 This paper describes our submission to the ALTA-2020 shared task on assessing behaviour from short text, We evaluate the effectiveness of traditional machine learning and recent transformers pre-trained models. Our submission with the Roberta-large model and prediction threshold achieved first place on the private leaderboard. 2020.alta-1.18 diff --git a/data/xml/2020.alvr.xml b/data/xml/2020.alvr.xml index 66f07829ad..e29eeb722d 100644 --- a/data/xml/2020.alvr.xml +++ b/data/xml/2020.alvr.xml @@ -62,7 +62,7 @@ On the role of effective and referring questions in <fixed-case>G</fixed-case>uess<fixed-case>W</fixed-case>hat?! MauricioMazuecos AlbertoTestoni - RaffaellaBernardi + RaffaellaBernardi LucianaBenotti 19–25 Task success is the standard metric used to evaluate referential visual dialogue systems. In this paper we propose two new metrics that evaluate how each question contributes to the goal. First, we measure how effective each question is by evaluating whether the question discards objects that are not the referent. Second, we define referring questions as those that univocally identify one object in the image. We report the new metrics for human dialogues and for state of the art publicly available models on GuessWhat?!. Regarding our first metric, we find that successful dialogues do not have a higher percentage of effective questions for most models. With respect to the second metric, humans make questions at the end of the dialogue that are referring, confirming their guess before guessing. Human dialogues that use this strategy have a higher task success but models do not seem to learn it. diff --git a/data/xml/2020.alw.xml b/data/xml/2020.alw.xml index 9e215f1f8b..1b902c9e4f 100644 --- a/data/xml/2020.alw.xml +++ b/data/xml/2020.alw.xml @@ -6,7 +6,7 @@ SeyiAkiwowo BertieVidgen VinodkumarPrabhakaran - ZeerakWaseem + ZeerakWaseem Association for Computational Linguistics
Online
November @@ -35,7 +35,7 @@ JuliaGuo Sarah ItaLevitan SusanMcGregor - JuliaHirschberg + JuliaHirschberg 7–15 Most efforts at identifying abusive speech online rely on public corpora that have been scraped from websites using keyword-based queries or released by site or platform owners for research purposes. These are typically labeled by crowd-sourced annotators – not the targets of the abuse themselves. While this method of data collection supports fast development of machine learning classifiers, the models built on them often fail in the context of real-world harassment and abuse, which contain nuances less easily identified by non-targets. Here, we present a mixed-methods approach to create classifiers for abuse and harassment which leverages direct engagement with the target group in order to achieve high quality and ecological validity of data sets and labels, and to generate deeper insights into the key tactics of bad actors. We use women journalists’ experience on Twitter as an initial community of focus. We identify several structural mechanisms of abuse that we believe will generalize to other target communities. 2020.alw-1.2 @@ -46,7 +46,7 @@ Using Transfer-based Language Models to Detect Hateful and Offensive Language Online VebjørnIsaksen - BjörnGambäck + BjörnGambäck 16–27 Distinguishing hate speech from non-hate offensive language is challenging, as hate speech not always includes offensive slurs and offensive language not always express hate. Here, four deep learners based on the Bidirectional Encoder Representations from Transformers (BERT), with either general or domain-specific language models, were tested against two datasets containing tweets labelled as either ‘Hateful’, ‘Normal’ or ‘Offensive’. The results indicate that the attention-based models profoundly confuse hate speech with offensive and normal language. However, the pre-trained models outperform state-of-the-art results in terms of accurately predicting the hateful instances. 2020.alw-1.3 @@ -120,7 +120,7 @@ Investigating Sampling Bias in Abusive Language Detection DanteRazo - SandraKübler + SandraKübler 70–78 Abusive language detection is becoming increasingly important, but we still understand little about the biases in our datasets for abusive language detection, and how these biases affect the quality of abusive language detection. In the work reported here, we reproduce the investigation of Wiegand et al. (2019) to determine differences between different sampling strategies. They compared boosted random sampling, where abusive posts are upsampled, and biased topic sampling, which focuses on topics that are known to cause abusive language. Instead of comparing individual datasets created using these sampling strategies, we use the sampling strategies on a single, large dataset, thus eliminating the textual source of the dataset as a potential confounding factor. We show that differences in the textual source can have more effect than the chosen sampling strategy. 2020.alw-1.9 @@ -204,7 +204,7 @@ GuillaumeSylvain NithumThain LucasDixon - JeffreySorensen + JeffreySorensen 114–124 We present a new dataset of approximately 44000 comments labeled by crowdworkers. Each comment is labelled as either ‘healthy’ or ‘unhealthy’, in addition to binary labels for the presence of six potentially ‘unhealthy’ sub-attributes: (1) hostile; (2) antagonistic, insulting, provocative or trolling; (3) dismissive; (4) condescending or patronising; (5) sarcastic; and/or (6) an unfair generalisation. Each label also has an associated confidence score. We argue that there is a need for datasets which enable research based on a broad notion of ‘unhealthy online conversation’. We build this typology to encompass a substantial proportion of the individual comments which contribute to unhealthy online conversation. For some of these attributes, this is the first publicly available dataset of this scale. We explore the quality of the dataset, present some summary statistics and initial models to illustrate the utility of this data, and highlight limitations and directions for further research. 2020.alw-1.15 @@ -253,7 +253,7 @@ Detecting <fixed-case>E</fixed-case>ast <fixed-case>A</fixed-case>sian Prejudice on Social Media BertieVidgen - Scott A.Hale + Scott A.Hale EllaGuest HelenMargetts DavidBroniatowski diff --git a/data/xml/2020.amta.xml b/data/xml/2020.amta.xml index c606e9384d..a5d3c8434e 100644 --- a/data/xml/2020.amta.xml +++ b/data/xml/2020.amta.xml @@ -19,7 +19,7 @@ A New Approach to Parameter-Sharing in Multilingual Neural Machine Translation BenyaminAhmadnia - BonnieDorr + BonnieDorr 1-6 ahmadnia-dorr-2020-new This paper has been removed by the Association for Machine Translation in the Americas (AMTA) due to duplication of previous scholarly work, known to the first author, without attribution. @@ -28,7 +28,7 @@ Investigation of Transformer-based Latent Attention Models for Neural Machine Translation ParniaBahar NikitaMakarov - HermannNey + HermannNey 7-20 2020.amta-research.2 bahar-etal-2020-investigation @@ -78,7 +78,7 @@ Towards Handling Compositionality in Low-Resource Bilingual Word Induction ViktorHangya - AlexanderFraser + AlexanderFraser 89-101 2020.amta-research.8 hangya-fraser-2020-towards @@ -129,9 +129,9 @@ On Target Segmentation for Direct Speech Translation - Mattia A.Di Gangi + Mattia A.Di Gangi MarcoGaido - MatteoNegri + MatteoNegri MarcoTurchi 137-150 2020.amta-research.13 @@ -140,7 +140,7 @@ Domain Robustness in Neural Machine Translation MathiasMüller - AnnetteRios + AnnetteRios RicoSennrich 151-164 2020.amta-research.14 @@ -204,7 +204,7 @@ CraigStewart RicardoRei CatarinaFarinha - AlonLavie + AlonLavie 78-109 2020.amta-user.4.Presentation.pdf stewart-etal-2020-comet @@ -261,7 +261,7 @@ RubénMartínez-Domínguez MatīssRikters ArtūrsVasiļevskis - MārcisPinnis + MārcisPinnis PaulaReichenberg 217-223 2020.amta-user.11 @@ -370,12 +370,12 @@ A Tale of Eight Countries or the <fixed-case>EU</fixed-case> Council Presidency Translator in Retrospect - MārcisPinnis + MārcisPinnis TomsBergmanis KristīneMetuzāle ValtersŠics ArtūrsVasiļevskis - AndrejsVasiļjevs + AndrejsVasiļjevs 525-546 2020.amta-user.25 2020.amta-user.25.Presentation.pdf @@ -412,8 +412,8 @@ 2020 John E.Ortega MarcelloFederico - ConstantinOrasan - MajaPopovic + ConstantinOrasan + MajaPopovic amta @@ -452,7 +452,7 @@ <fixed-case>COPECO</fixed-case>: a Collaborative Post-Editing Corpus in Pedagogical Context JonathanMutal - PierretteBouillon + PierretteBouillon PerrineSchumacher JohannaGerlach 61-78 @@ -464,7 +464,7 @@ MaaritKoponen UmutSulubacak KaisaVitikainen - JörgTiedemann + JörgTiedemann 79-92 2020.amta-pemdt.6 koponen-etal-2020-mt-subtitling @@ -476,7 +476,7 @@ TimDüwel RakshaShenoy AntonioKrüger - Josefvan Genabith + Josefvan Genabith 93-108 2020.amta-pemdt.7 herbig-etal-2020-improving @@ -515,7 +515,7 @@ Responsible ‘Gist’ <fixed-case>MT</fixed-case> Use in the Age of Neural <fixed-case>MT</fixed-case> - Marianna J.Martindale + Marianna J.Martindale 18-45 2020.amta-impact.2.Presentation.pdf martindale-2020-responsible diff --git a/data/xml/2020.argmining.xml b/data/xml/2020.argmining.xml index ab76ade288..c26db7cc31 100644 --- a/data/xml/2020.argmining.xml +++ b/data/xml/2020.argmining.xml @@ -87,8 +87,8 @@ PrakashPoudyal JaromirSavelka AagjeIeven - Marie FrancineMoens - TeresaGoncalves + Marie FrancineMoens + TeresaGoncalves PauloQuaresma 67–75 In this paper, we publicly release an annotated corpus of 42 decisions of the European Court of Human Rights (ECHR). The corpus is annotated in terms of three types of clauses useful in argument mining: premise, conclusion, and non-argument parts of the text. Furthermore, relationships among the premises and conclusions are mapped. We present baselines for three tasks that lead from unstructured texts to structured arguments. The tasks are argument clause recognition, clause relation prediction, and premise/conclusion recognition. Despite a straightforward application of the bidirectional encoders from Transformers (BERT), we obtained very promising results F1 0.765 on argument recognition, 0.511 on relation prediction, and 0.859/0.628 on premise/conclusion recognition). The results suggest the usefulness of pre-trained language models based on deep neural network architectures in argument mining. Because of the simplicity of the baselines, there is ample space for improvement in future work based on the released corpus. @@ -107,7 +107,7 @@ Use of Claim Graphing and Argumentation Schemes in Biomedical Literature: A Manual Approach to Analysis EliMoser - Robert E.Mercer + Robert E.Mercer 88–99 Argumentation in an experimental life science paper consists of a main claim being supported with reasoned argumentative steps based on the data garnered from the experiments that were carried out. In this paper we report on an investigation of the large scale argumentation structure found when examining five biochemistry journal publications. One outcome of this investigation of biochemistry articles suggests that argumentation schemes originally designed for genetic research articles may transfer to experimental biomedical literature in general. Our use of these argumentation schemes shows that claims depend not only on experimental data but also on other claims. The tendency for claims to use other claims as their supporting evidence in addition to the experimental data led to two novel models that have provided a better understanding of the large scale argumentation structure of a complete biochemistry paper. First, the claim graph displays the claims within a paper, their interactions, and their evidence. Second, another aspect of this argumentation network is further illustrated by the Model of Informational Hierarchy (MIH) which visualizes at a meta-level the flow of reasoning provided by the authors of the paper and also connects the main claim to the paper’s title. Together, these models, which have been produced by a manual examination of the biochemistry articles, would be likely candidates for a computational method that analyzes the large scale argumentation structure. 2020.argmining-1.10 @@ -123,7 +123,7 @@ Style Analysis of Argumentative Texts by Mining Rhetorical Devices - KhalidAl Khatib + KhalidAl Khatib ViorelMorari BennoStein 106–116 @@ -135,7 +135,7 @@ Creating a Domain-diverse Corpus for Theory-based Argument Quality Assessment LilyNg AnneLauscher - JoelTetreault + JoelTetreault CourtneyNapoles 117–126 Computational models of argument quality (AQ) have focused primarily on assessing the overall quality or just one specific characteristic of an argument, such as its convincingness or its clarity. However, previous work has claimed that assessment based on theoretical dimensions of argumentation could benefit writers, but developing such models has been limited by the lack of annotated data. In this work, we describe GAQCorpus, the first large, domain-diverse annotated corpus of theory-based AQ. We discuss how we designed the annotation task to reliably collect a large number of judgments with crowdsourcing, formulating theory-based guidelines that helped make subjective judgments of AQ more objective. We demonstrate how to identify arguments and adapt the annotation task for three diverse domains. Our work will inform research on theory-based argumentation annotation and enable the creation of more diverse corpora to support computational AQ assessment. diff --git a/data/xml/2020.autosimtrans.xml b/data/xml/2020.autosimtrans.xml index 5f155eae7d..0a889da08c 100644 --- a/data/xml/2020.autosimtrans.xml +++ b/data/xml/2020.autosimtrans.xml @@ -7,7 +7,7 @@ ColinCherry LiangHuang ZhongjunHe - MarkLiberman + MarkLiberman JamesCross YangLiu Association for Computational Linguistics @@ -36,8 +36,8 @@ End-to-End Speech Translation with Adversarial Training XuancaiLi ChenKehai - TiejunZhao - MuyunYang + TiejunZhao + MuyunYang 10–14 End-to-End speech translation usually leverages audio-to-text parallel data to train an available speech translation model which has shown impressive results on various speech translation tasks. Due to the artificial cost of collecting audio-to-text parallel data, the speech translation is a natural low-resource translation scenario, which greatly hinders its improvement. In this paper, we proposed a new adversarial training method to leverage target monolingual data to relieve the low-resource shortcoming of speech translation. In our method, the existing speech translation model is considered as a Generator to gain a target language output, and another neural Discriminator is used to guide the distinction between outputs of speech translation model and true target monolingual sentences. Experimental results on the CCMT 2019-BSTC dataset speech translation task demonstrate that the proposed methods can significantly improve the performance of the End-to-End speech translation system. 2020.autosimtrans-1.2 @@ -62,7 +62,7 @@ Improving Autoregressive <fixed-case>NMT</fixed-case> with Non-Autoregressive Model LongZhou JiajunZhang - ChengqingZong + ChengqingZong 24–29 Autoregressive neural machine translation (NMT) models are often used to teach non-autoregressive models via knowledge distillation. However, there are few studies on improving the quality of autoregressive translation (AT) using non-autoregressive translation (NAT). In this work, we propose a novel Encoder-NAD-AD framework for NMT, aiming at boosting AT with global information produced by NAT model. Specifically, under the semantic guidance of source-side context captured by the encoder, the non-autoregressive decoder (NAD) first learns to generate target-side hidden state sequence in parallel. Then the autoregressive decoder (AD) performs translation from left to right, conditioned on source-side and target-side hidden states. Since AD has global information generated by low-latency NAD, it is more likely to produce a better translation with less time delay. Experiments on WMT14 En-De, WMT16 En-Ro, and IWSLT14 De-En translation tasks demonstrate that our framework achieves significant improvements with only 8% speed degeneration over the autoregressive NMT. 2020.autosimtrans-1.4 diff --git a/data/xml/2020.bea.xml b/data/xml/2020.bea.xml index 2beb4f2dce..2056b9f138 100644 --- a/data/xml/2020.bea.xml +++ b/data/xml/2020.bea.xml @@ -25,7 +25,7 @@ Linguistic Features for Readability Assessment TovlyDeutsch MasoudJasbi - StuartShieber + StuartShieber 1–17 Readability assessment aims to automatically classify text by the level appropriate for learning readers. Traditional approaches to this task utilize a variety of linguistically motivated features paired with simple machine learning models. More recent methods have improved performance by discarding these features and utilizing deep learning models. However, it is unknown whether augmenting deep learning models with linguistically motivated features would improve performance further. This paper combines these two approaches with the goal of improving overall model performance and addressing this question. Evaluating on two large readability corpora, we find that, given sufficient training data, augmenting deep learning models with linguistically motivated features does not improve state-of-the-art performance. Our results provide preliminary evidence for the hypothesis that the state-of-the-art deep learning models represent linguistic features of the text related to readability. Future research on the nature of representations formed in these models can shed light on the learned features and their relations to linguistically motivated ones hypothesized in traditional approaches. 2020.bea-1.1 @@ -51,11 +51,11 @@ Multiple Instance Learning for Content Feedback Localization without Annotation ScottHellman - WilliamMurray + WilliamMurray AdamWiemerslage MarkRosenstein - PeterFoltz - LeeBecker + PeterFoltz + LeeBecker MarciaDerr 30–40 Automated Essay Scoring (AES) can be used to automatically generate holistic scores with reliability comparable to human scoring. In addition, AES systems can provide formative feedback to learners, typically at the essay level. In contrast, we are interested in providing feedback specialized to the content of the essay, and specifically for the content areas required by the rubric. A key objective is that the feedback should be localized alongside the relevant essay text. An important step in this process is determining where in the essay the rubric designated points and topics are discussed. A natural approach to this task is to train a classifier using manually annotated data; however, collecting such data is extremely resource intensive. Instead, we propose a method to predict these annotation spans without requiring any labeled annotation data. Our approach is to consider AES as a Multiple Instance Learning (MIL) task. We show that such models can both predict content scores and localize content by leveraging their sentence-level score predictions. This capability arises despite never having access to annotation training data. Implications are discussed for improving formative feedback and explainable AES models. @@ -78,7 +78,7 @@ <fixed-case>CIMA</fixed-case>: A Large Open Access Dialogue Dataset for Tutoring KatherineStasaski KimberlyKao - Marti A.Hearst + Marti A.Hearst 52–64 One-to-one tutoring is often an effective means to help students learn, and recent experiments with neural conversation systems are promising. However, large open datasets of tutoring conversations are lacking. To remedy this, we propose a novel asynchronous method for collecting tutoring dialogue via crowdworkers that is both amenable to the needs of deep learning algorithms and reflective of pedagogical concerns. In this approach, extended conversations are obtained between crowdworkers role-playing as both students and tutors. The CIMA collection, which we make publicly available, is novel in that students are exposed to overlapping grounded concepts between exercises and multiple relevant tutoring responses are collected for the same input. CIMA contains several compelling properties from an educational perspective: student role-players complete exercises in fewer turns during the course of the conversation and tutor players adopt strategies that conform with some educational conversational norms, such as providing hints versus asking questions in appropriate contexts. The dataset enables a model to be trained to generate the next tutoring utterance in a conversation, conditioned on a provided action strategy. 2020.bea-1.5 @@ -102,7 +102,7 @@ Annotation and Classification of Evidence and Reasoning Revisions in Argumentative Writing TazinAfrin Elaine LinWang - DianeLitman + DianeLitman Lindsay ClareMatsumura RichardCorrenti 75–84 @@ -115,7 +115,7 @@ Can Neural Networks Automatically Score Essay Traits? SandeepMathias - PushpakBhattacharyya + PushpakBhattacharyya 85–91 Essay traits are attributes of an essay that can help explain how well written (or badly written) the essay is. Examples of traits include Content, Organization, Language, Sentence Fluency, Word Choice, etc. A lot of research in the last decade has dealt with automatic holistic essay scoring - where a machine rates an essay and gives a score for the essay. However, writers need feedback, especially if they want to improve their writing - which is why trait-scoring is important. In this paper, we show how a deep-learning based system can outperform feature-based machine learning systems, as well as a string kernel system in scoring essay traits. 2020.bea-1.8 @@ -201,7 +201,7 @@ Should You Fine-Tune <fixed-case>BERT</fixed-case> for Automated Essay Scoring? ElijahMayfield - Alan WBlack + Alan WBlack 151–162 Most natural language processing research now recommends large Transformer-based models with fine-tuning for supervised classification tasks; older strategies like bag-of-words features and linear models have fallen out of favor. Here we investigate whether, in automated essay scoring (AES) research, deep neural models are an appropriate technological choice. We find that fine-tuning BERT produces similar performance to classical models at significant additional cost. We argue that while state-of-the-art strategies do match existing best results, they come with opportunity costs in computational resources. We conclude with a review of promising areas for research on student essays where the unique characteristics of Transformers may provide benefits over classical methods to justify the costs. 2020.bea-1.15 @@ -233,7 +233,7 @@ Automated Scoring of Clinical Expressive Language Evaluation Tasks YiyiWang - EmilyPrud’hommeaux + EmilyPrud’hommeaux MeysamAsgari JillDolata 177–185 diff --git a/data/xml/2020.bionlp.xml b/data/xml/2020.bionlp.xml index 65b7a4d449..c36ef56d4d 100644 --- a/data/xml/2020.bionlp.xml +++ b/data/xml/2020.bionlp.xml @@ -4,9 +4,9 @@ Proceedings of the 19th SIGBioMed Workshop on Biomedical Language Processing DinaDemner-Fushman - Kevin BretonnelCohen + Kevin BretonnelCohen SophiaAnaniadou - JunichiTsujii + JunichiTsujii Association for Computational Linguistics
Online
July @@ -33,7 +33,7 @@ Sequence-to-Set Semantic Tagging for Complex Query Reformulation and Automated Text Categorization in Biomedical <fixed-case>IR</fixed-case> using Self-Attention ManirupaDas JuanxiLi - EricFosler-Lussier + EricFosler-Lussier SimonLin SteveRust YunguiHuang @@ -64,7 +64,7 @@ Improving Biomedical Analogical Retrieval with Embedding of Structural Dependencies AmandalynnePaullada BethanyPercha - TrevorCohen + TrevorCohen 38–48 Inferring the nature of the relationships between biomedical entities from text is an important problem due to the difficulty of maintaining human-curated knowledge bases in rapidly evolving fields. Neural word embeddings have earned attention for an apparent ability to encode relational information. However, word embedding models that disregard syntax during training are limited in their ability to encode the structural relationships fundamental to cognitive theories of analogy. In this paper, we demonstrate the utility of encoding dependency structure in word embeddings in a model we call Embedding of Structural Dependencies (ESD) as a way to represent biomedical relationships in two analogical retrieval tasks: a relationship retrieval (RR) task, and a literature-based discovery (LBD) task meant to hypothesize plausible relationships between pairs of entities unseen in training. We compare our model to skip-gram with negative sampling (SGNS), using 19 databases of biomedical relationships as our evaluation data, with improvements in performance on 17 (LBD) and 18 (RR) of these sets. These results suggest embeddings encoding dependency path information are of value for biomedical analogy retrieval. 2020.bionlp-1.4 @@ -76,7 +76,7 @@ AnnaKoroleva SanjayKamath PatrickBossuyt - PatrickParoubek + PatrickParoubek 49–59 Improving the quality of medical research reporting is crucial to reduce avoidable waste in research and to improve the quality of health care. Despite various initiatives aiming at improving research reporting – guidelines, checklists, authoring aids, peer review procedures, etc. – overinterpretation of research results, also known as spin, is still a serious issue in research reporting. In this paper, we propose a Natural Language Processing (NLP) system for detecting several types of spin in biomedical articles reporting randomized controlled trials (RCTs). We use a combination of rule-based and machine learning approaches to extract important information on trial design and to detect potential spin. The proposed spin detection system includes algorithms for text structure analysis, sentence classification, entity and relation extraction, semantic similarity assessment. Our algorithms achieved operational performance for the these tasks, F-measure ranging from 79,42 to 97.86% for different tasks. The most difficult task is extracting reported outcomes. Our tool is intended to be used as a semi-automated aid tool for assisting both authors and peer reviewers to detect potential spin. The tool incorporates a simple interface that allows to run the algorithms and visualize their output. It can also be used for manual annotation and correction of the errors in the outputs. The proposed tool is the first tool for spin detection. The tool and the annotated dataset are freely available. 2020.bionlp-1.5 @@ -106,7 +106,7 @@ A <fixed-case>BERT</fixed-case>-based One-Pass Multi-Task Model for Clinical Temporal Relation Extraction ChenLin - TimothyMiller + TimothyMiller DmitriyDligach FarigSadeque StevenBethard @@ -135,7 +135,7 @@ ZeljkoKraljevic DanielBean RichardDobson - RobertStewart + RobertStewart RebeccaBendayan AngusRoberts 86–94 @@ -151,7 +151,7 @@ LiyanXu JulienHogan Rachel E.Patzer - Jinho D.Choi + Jinho D.Choi 95–104 This paper presents a reinforcement learning approach to extract noise in long clinical documents for the task of readmission prediction after kidney transplant. We face the challenges of developing robust models on a small dataset where each document may consist of over 10K tokens with full of noise including tabular text and task-irrelevant sentences. We first experiment four types of encoders to empirically decide the best document representation, and then apply reinforcement learning to remove noisy text from the long documents, which models the noise extraction process as a sequential decision problem. Our results show that the old bag-of-words encoder outperforms deep learning-based encoders on this task, and reinforcement learning is able to improve upon baseline while pruning out 25% text segments. Our analysis depicts that reinforcement learning is able to identify both typical noisy tokens and task-specific noisy text. 2020.bionlp-1.10 @@ -162,8 +162,8 @@ Evaluating the Utility of Model Configurations and Data Augmentation on Clinical Semantic Textual Similarity YuxiaWang FeiLiu - KarinVerspoor - TimothyBaldwin + KarinVerspoor + TimothyBaldwin 105–111 In this paper, we apply pre-trained language models to the Semantic Textual Similarity (STS) task, with a specific focus on the clinical domain. In low-resource setting of clinical STS, these large models tend to be impractical and prone to overfitting. Building on BERT, we study the impact of a number of model design choices, namely different fine-tuning and pooling strategies. We observe that the impact of domain-specific fine-tuning on clinical STS is much less than that in the general domain, likely due to the concept richness of the domain. Based on this, we propose two data augmentation techniques. Experimental results on N2C2-STS 1 demonstrate substantial improvements, validating the utility of the proposed methods. 2020.bionlp-1.11 @@ -190,8 +190,8 @@ JayDeYoung EricLehman BenjaminNye - IainMarshall - Byron C.Wallace + IainMarshall + Byron C.Wallace 123–132 How do we most effectively treat a disease or condition? Ideally, we could consult a database of evidence gleaned from clinical trials to answer such questions. Unfortunately, no such database exists; clinical trial results are instead disseminated primarily via lengthy natural language articles. Perusing all such articles would be prohibitively time-consuming for healthcare practitioners; they instead tend to depend on manually compiled systematic reviews of medical literature to inform care. NLP may speed this process up, and eventually facilitate immediate consult of published evidence. The Evidence Inference dataset was recently released to facilitate research toward this end. This task entails inferring the comparative performance of two treatments, with respect to a given outcome, from a particular article (describing a clinical trial) and identifying supporting evidence. For instance: Does this article report that chemotherapy performed better than surgery for five-year survival rates of operable cancers? In this paper, we collect additional annotations to expand the Evidence Inference dataset by 25%, provide stronger baseline models, systematically inspect the errors that these make, and probe dataset quality. We also release an abstract only (as opposed to full-texts) version of the task for rapid model prototyping. The updated corpus, documentation, and code for new baselines and evaluations are available at http://evidence-inference.ebm-nlp.com/. 2020.bionlp-1.13 @@ -236,8 +236,8 @@ Domain Adaptation and Instance Selection for Disease Syndrome Classification over Veterinary Clinical Notes BrianHur - TimothyBaldwin - KarinVerspoor + TimothyBaldwin + KarinVerspoor LauraHardefeldt JamesGilkerson 156–166 @@ -265,7 +265,7 @@ Extensive Error Analysis and a Learning-Based Evaluation of Medical Entity Recognition Systems to Approximate User Experience IsarNejadgholi - Kathleen C.Fraser + Kathleen C.Fraser Berryde Bruijn 177–186 When comparing entities extracted by a medical entity recognition system with gold standard annotations over a test set, two types of mismatches might occur, label mismatch or span mismatch. Here we focus on span mismatch and show that its severity can vary from a serious error to a fully acceptable entity extraction due to the subjectivity of span annotations. For a domain-specific BERT-based NER system, we showed that 25% of the errors have the same labels and overlapping span with gold standard entities. We collected expert judgement which shows more than 90% of these mismatches are accepted or partially accepted by the user. Using the training set of the NER system, we built a fast and lightweight entity classifier to approximate the user experience of such mismatches through accepting or rejecting them. The decisions made by this classifier are used to calculate a learning-based F-score which is shown to be a better approximation of a forgiving user’s experience than the relaxed F-score. We demonstrated the results of applying the proposed evaluation metric for a variety of deep learning medical entity recognition models trained with two datasets. @@ -278,7 +278,7 @@ SaadullahAmin Katherine AnnDunfield AnnaVechkaeva - GuenterNeumann + GuenterNeumann 187–194 Fact triples are a common form of structured knowledge used within the biomedical domain. As the amount of unstructured scientific texts continues to grow, manual annotation of these texts for the task of relation extraction becomes increasingly expensive. Distant supervision offers a viable approach to combat this by quickly producing large amounts of labeled, but considerably noisy, data. We aim to reduce such noise by extending an entity-enriched relation classification BERT model to the problem of multiple instance learning, and defining a simple data encoding scheme that significantly reduces noise, reaching state-of-the-art performance for distantly-supervised biomedical relation extraction. Our approach further encodes knowledge about the direction of relation triples, allowing for increased focus on relation learning by reducing noise and alleviating the need for joint learning with knowledge graph completion. 2020.bionlp-1.20 @@ -290,7 +290,7 @@ Global Locality in Biomedical Relation and Event Extraction ElahehShafieiBavani - AntonioJimeno Yepes + AntonioJimeno Yepes XuZhong DavidMartinez Iraola 195–204 @@ -301,7 +301,7 @@ An Empirical Study of Multi-Task Learning on <fixed-case>BERT</fixed-case> for Biomedical Text Mining - YifanPeng + YifanPeng QingyuChen ZhiyongLu 205–214 diff --git a/data/xml/2020.blackboxnlp.xml b/data/xml/2020.blackboxnlp.xml index 22629e4ade..c8157e05f7 100644 --- a/data/xml/2020.blackboxnlp.xml +++ b/data/xml/2020.blackboxnlp.xml @@ -5,7 +5,7 @@ Proceedings of the Third BlackboxNLP Workshop on Analyzing and Interpreting Neural Networks for NLP AfraAlishahi YonatanBelinkov - GrzegorzChrupała + GrzegorzChrupała DieuwkeHupkes YuvalPinter HassanSajjad @@ -22,7 +22,7 @@ <fixed-case>BERT</fixed-case>ering <fixed-case>RAMS</fixed-case>: What and How Much does <fixed-case>BERT</fixed-case> Already Know About Event Arguments? - A Study on the <fixed-case>RAMS</fixed-case> Dataset VarunGangal - EduardHovy + EduardHovy 1–10 Using the attention map based probing framework from (Clark et al., 2019), we observe that, on the RAMS dataset (Ebner et al., 2020), BERT’s attention heads have modest but well above-chance ability to spot event arguments sans any training or domain finetuning, varying from a low of 17.77% for Place to a high of 51.61% for Artifact. Next, we find that linear combinations of these heads, estimated with approx. 11% of available total event argument detection supervision, can push performance well higher for some roles — highest two being Victim (68.29% Accuracy) and Artifact (58.82% Accuracy). Furthermore, we investigate how well our methods do for cross-sentence event arguments. We propose a procedure to isolate “best heads” for cross-sentence argument detection separately of those for intra-sentence arguments. The heads thus estimated have superior cross-sentence performance compared to their jointly estimated equivalents, albeit only under the unrealistic assumption that we already know the argument is present in another sentence. Lastly, we seek to isolate to what extent our numbers stem from lexical frequency based associations between gold arguments and roles. We propose NONCE, a scheme to create adversarial test examples by replacing gold arguments with randomly generated “nonce” words. We find that learnt linear combinations are robust to NONCE, though individual best heads can be more sensitive. 2020.blackboxnlp-1.1 @@ -33,7 +33,7 @@ Emergent Language Generalization and Acquisition Speed are not tied to Compositionality EugeneKharitonov - MarcoBaroni + MarcoBaroni 11–15 Studies of discrete languages emerging when neural agents communicate to solve a joint task often look for evidence of compositional structure. This stems for the expectation that such a structure would allow languages to be acquired faster by the agents and enable them to generalize better. We argue that these beneficial properties are only loosely connected to compositionality. In two experiments, we demonstrate that, depending on the task, non-compositional languages might show equal, or better, generalization performance and acquisition speed than compositional ones. Further research in the area should be clearer about what benefits are expected from compositionality, and how the latter would lead to them. 2020.blackboxnlp-1.2 @@ -126,7 +126,7 @@ The Explanation Game: Towards Prediction Explainability through Sparse Communication MarcosTreviso - André F. T.Martins + André F. T.Martins 107–118 Explainability is a topic of growing importance in NLP. In this work, we provide a unified perspective of explainability as a communication problem between an explainer and a layperson about a classifier’s decision. We use this framework to compare several explainers, including gradient methods, erasure, and attention mechanisms, in terms of their communication success. In addition, we reinterpret these methods in the light of classical feature selection, and use this as inspiration for new embedded explainers, through the use of selective, sparse attention. Experiments in text classification and natural language inference, using different configurations of explainers and laypeople (including both machines and humans), reveal an advantage of attention-based explainers over gradient and erasure methods, and show that selective attention is a simpler alternative to stochastic rationalizers. Human experiments show strong results on text classification with post-hoc explainers trained to optimize communication success. 2020.blackboxnlp-1.10 @@ -162,7 +162,7 @@ Controlling the Imprint of Passivization and Negation in Contextualized Representations HandeCelikkanat SamiVirpioja - JörgTiedemann + JörgTiedemann MariannaApidianaki 136–148 Contextualized word representations encode rich information about syntax and semantics, alongside specificities of each context of use. While contextual variation does not always reflect actual meaning shifts, it can still reduce the similarity of embeddings for word instances having the same meaning. We explore the imprint of two specific linguistic alternations, namely passivization and negation, on the representations generated by neural models trained with two different objectives: masked language modeling and translation. Our exploration methodology is inspired by an approach previously proposed for removing societal biases from word vectors. We show that passivization and negation leave their traces on the representations, and that neutralizing this information leads to more similar embeddings for words that should preserve their meaning in the transformation. We also find clear differences in how the respective features generalize across datasets. @@ -244,7 +244,7 @@ Exploring Neural Entity Representations for Semantic Information AndrewRunge - EduardHovy + EduardHovy 204–216 Neural methods for embedding entities are typically extrinsically evaluated on downstream tasks and, more recently, intrinsically using probing tasks. Downstream task-based comparisons are often difficult to interpret due to differences in task structure, while probing task evaluations often look at only a few attributes and models. We address both of these issues by evaluating a diverse set of eight neural entity embedding methods on a set of simple probing tasks, demonstrating which methods are able to remember words used to describe entities, learn type, relationship and factual information, and identify how frequently an entity is mentioned. We also compare these methods in a unified framework on two entity linking tasks and discuss how they generalize to different model architectures and datasets. 2020.blackboxnlp-1.20 @@ -253,7 +253,7 @@ <fixed-case>BERT</fixed-case>s of a feather do not generalize together: Large variability in generalization across models with similar test set performance - R. ThomasMcCoy + R. ThomasMcCoy JunghyunMin TalLinzen 217–227 @@ -276,7 +276,7 @@ Discovering the Compositional Structure of Vector Representations with Role Learning Networks PaulSoulos - R. ThomasMcCoy + R. ThomasMcCoy TalLinzen PaulSmolensky 238–254 @@ -301,7 +301,7 @@ Investigating Novel Verb Learning in <fixed-case>BERT</fixed-case>: Selectional Preference Classes and Alternation-Based Syntactic Generalization TristanThrush EthanWilcox - RogerLevy + RogerLevy 265–275 Previous studies investigating the syntactic abilities of deep learning models have not targeted the relationship between the strength of the grammatical generalization and the amount of evidence to which the model is exposed during training. We address this issue by deploying a novel word-learning paradigm to test BERT’s few-shot learning capabilities for two aspects of English verbs: alternations and classes of selectional preferences. For the former, we fine-tune BERT on a single frame in a verbal-alternation pair and ask whether the model expects the novel verb to occur in its sister frame. For the latter, we fine-tune BERT on an incomplete selectional network of verbal objects and ask whether it expects unattested but plausible verb/object pairs. We find that BERT makes robust grammatical generalizations after just one or two instances of a novel word in fine-tuning. For the verbal alternation tests, we find that the model displays behavior that is consistent with a transitivity bias: verbs seen few times are expected to take direct objects, but verbs seen with direct objects are not expected to occur intransitively. 2020.blackboxnlp-1.25 @@ -313,7 +313,7 @@ BenjaminNewman JohnHewitt PercyLiang - Christopher D.Manning + Christopher D.Manning 276–291 Extrapolation to unseen sequence lengths is a challenge for neural generative models of language. In this work, we characterize the effect on length extrapolation of a modeling decision often overlooked: predicting the end of the generative process through the use of a special end-of-sequence (EOS) vocabulary item. We study an oracle setting - forcing models to generate to the correct sequence length at test time - to compare the length-extrapolative behavior of networks trained to predict EOS (+EOS) with networks not trained to (-EOS). We find that -EOS substantially outperforms +EOS, for example extrapolating well to lengths 10 times longer than those seen at training time in a bracket closing task, as well as achieving a 40% improvement over +EOS in the difficult SCAN dataset length generalization task. By comparing the hidden states and dynamics of -EOS and +EOS models, we observe that +EOS models fail to generalize because they (1) unnecessarily stratify their hidden states by their linear position is a sequence (structures we call length manifolds) or (2) get stuck in clusters (which we refer to as length attractors) once the EOS token is the highest-probability prediction. 2020.blackboxnlp-1.26 diff --git a/data/xml/2020.bucc.xml b/data/xml/2020.bucc.xml index a14ceb1aa1..8a5e26a1b6 100644 --- a/data/xml/2020.bucc.xml +++ b/data/xml/2020.bucc.xml @@ -4,7 +4,7 @@ Proceedings of the 13th Workshop on Building and Using Comparable Corpora ReinhardRapp - PierreZweigenbaum + PierreZweigenbaum SergeSharoff European Language Resources Association
Marseille, France
@@ -65,7 +65,7 @@
Mining Semantic Relations from Comparable Corpora through Intersections of Word Embeddings - ŠpelaVintar + ŠpelaVintar LarisaGrčić Simeunović MatejMartinc SenjaPollak @@ -101,8 +101,8 @@ <fixed-case>LMU</fixed-case> Bilingual Dictionary Induction System with Word Surface Similarity Scores for <fixed-case>BUCC</fixed-case> 2020 SilviaSeverini ViktorHangya - AlexanderFraser - HinrichSchütze + AlexanderFraser + HinrichSchütze 49–55 The task of Bilingual Dictionary Induction (BDI) consists of generating translations for source language words which is important in the framework of machine translation (MT). The aim of the BUCC 2020 shared task is to perform BDI on various language pairs using comparable corpora. In this paper, we present our approach to the task of English-German and English-Russian language pairs. Our system relies on Bilingual Word Embeddings (BWEs) which are often used for BDI when only a small seed lexicon is available making them particularly effective in a low-resource setting. On the other hand, they perform well on high frequency words only. In order to improve the performance on rare words as well, we combine BWE based word similarity with word surface similarity methods, such as orthography In addition to the often used top-n translation method, we experiment with a margin based approach aiming for dynamic number of translations for each source word. We participate in both the open and closed tracks of the shared task and we show improved results of our method compared to simple vector similarity based approaches. Our system was ranked in the top-3 teams and achieved the best results for English-Russian. 2020.bucc-1.8 diff --git a/data/xml/2020.calcs.xml b/data/xml/2020.calcs.xml index f984828f07..dc1a542bfe 100644 --- a/data/xml/2020.calcs.xml +++ b/data/xml/2020.calcs.xml @@ -8,7 +8,7 @@ KalikaBali SunayanaSitaram AmitavaDas - MonaDiab + MonaDiab European Language Resources Association
Marseille, France
May @@ -32,7 +32,7 @@ A New Dataset for Natural Language Inference from Code-mixed Conversations SimranKhanuja - SandipanDandapat + SandipanDandapat SunayanaSitaram MonojitChoudhury 9–16 @@ -97,7 +97,7 @@ Code-mixed parse trees and how to find them AnirudhSrinivasan - SandipanDandapat + SandipanDandapat MonojitChoudhury 57–64 In this paper, we explore the methods of obtaining parse trees of code-mixed sentences and analyse the obtained trees. Existing work has shown that linguistic theories can be used to generate code-mixed sentences from a set of parallel sentences. We build upon this work, using one of these theories, the Equivalence-Constraint theory to obtain the parse trees of synthetically generated code-mixed sentences and evaluate them with a neural constituency parser. We highlight the lack of a dataset non-synthetic code-mixed constituency parse trees and how it makes our evaluation difficult. To complete our evaluation, we convert a code-mixed dependency parse tree set into “pseudo constituency trees” and find that a parser trained on synthetically generated trees is able to decently parse these as well. diff --git a/data/xml/2020.ccl.xml b/data/xml/2020.ccl.xml index fe19e40900..b97e06c7a5 100644 --- a/data/xml/2020.ccl.xml +++ b/data/xml/2020.ccl.xml @@ -30,7 +30,7 @@ 基于语料库的武侠与仙侠网络小说文体、词汇及主题对比分析(A Corpus-based Contrastive Analysis of Style, Vocabulary and Theme of Wuxia and Xianxia <fixed-case>I</fixed-case>nternet Novels) SanleZhang三乐 - PengyuanLiu鹏远 + PengyuanLiu鹏远 HuZhang 10–19 网络文学在我国发展迅猛,其数量和影响力呈现逐年上升的趋势,但目前尚无公开的较大规模网络文学作品语料库,鲜见基于语料库对网络文学具体类别作品的定量研究。本文初步建立了一个网络文学语料库,其中包括武侠和仙侠网络小说,使用文本计量、词频统计以及主题挖掘的方法对两类小说的文体风格、具体词汇使用和小说主题进行对比分析。通过比较,我们发现两类小说的文体风格大致相同,它们在词汇的使用和主题上既有共性又各具特色。从微观到宏观,从表面到内容,将定量统计和定性分析相结合,多角度、多层次的对武侠和仙侠网络小说进行比较。 @@ -41,7 +41,7 @@ 基于计量的百年中国人名用字性别特征研究(A Quantified Research on Gender Characteristics of <fixed-case>C</fixed-case>hinese Names in A Century) BingjieDu冰洁 - PengyuanLiu鹏远 + PengyuanLiu鹏远 YongshengTian永胜 20–30 本文构建了一个包含11万以上条目规模的中国名人人名数据库,每条数据含有人名、性别、出生地等社会文化标签,同时含有拼音、笔画、偏旁等文字信息标签,这是目前已知最大的可用于研究的汉语真人人名数据库。基于该数据库,本文从中选择1919年至今的人名,用定性与定量结合的方法探究人名中汉字的特征和其性别差异以及历时变化。从人名长度来看,男性人名比女性人名长;从人名用字的难易度来看,女性用字比男性更复杂;从用字丰富度来看,人名用字越来越单一和集中化,男性人名的用字丰富度大于女性人名。计算人名用字的性别偏度后发现女性人名的专用自更多。两性用字意象有明显的不同,用字的意象随着时间发生改变,但改变最明显的时间节点是改革开放前后,其中女性的变化比男性显著。除此之外,我们还得出人名中的性别极性字表、各个阶段的高频字表、用字变化趋势表等。 @@ -52,7 +52,7 @@ 伟大的男人和倔强的女人:基于语料库的形容词性别偏度历时研究(Great Males and Stubborn Females: A Diachronic Study of Corpus-Based Gendered Skewness in <fixed-case>C</fixed-case>hinese Adjectives) ShuchengZhu述承 - PengyuanLiu鹏远 + PengyuanLiu鹏远 31–42 性别偏见现象是社会语言学和计算语学学者均关注的研究热点,但目前大多数研究都是基于英语的,鲜有对汉语中性别偏见现象,特别是基于形容词的研究缺乏。而形容词是衡量社会对男性和女性角色规约的有力抓手。本文首先利用调查问卷的方法,构建了一个含有466个形容词的数据集,定义性别偏度为特定形容词词义和男性或女性群体相匹配的程度,并计算了数据集中每个形容词的性别偏度。然后基于DCC语料库,研究了《人民日报》的形容词性别偏度的历时总体变化,并考察了和姓名搭配的形容词的历时变化。发现《人民日报》所使用的形容词随时间的推移整体呈现中性化趋势,但在文化大革命期间呈现非常男性化的特征,和男性姓名搭配的形容词整体呈现中性化趋势。 2020.ccl-1.4 @@ -184,7 +184,7 @@ 融合目标端句法的<fixed-case>AMR</fixed-case>-to-Text生成(<fixed-case>AMR</fixed-case>-to-Text Generation with Target Syntax) JieZhu - JunhuiLi军辉 + JunhuiLi军辉 162–171 抽象语义表示到文本(AMR-to-Text)生成的任务是给定AMR图,生成相同语义表示的文本。可以把此任务当作一个从源端AMR图到目标端句子的机器翻译任务。目前存在的一些方法都在探索如何更好的对图结构进行建模。然而,它们都存在一个未限定的问题,因为在生成阶段许多句法的决策并不受语义图的约束,从而忽略了句子内部潜藏的句法信息。为了明确考虑这一不足,该文提出一种直接而有效的方法,显示的在AMR-to-Text生成的任务中融入句法信息,并在Transformer和目前该任务最优性能的模型上进行了实验。实验结果表明,在现存的两份标准英文数据集LDC2018E86和LDC2017T10上,都取得了显著的提升,达到了新的最高性能。 2020.ccl-1.16 @@ -211,7 +211,7 @@ FengJiang XiaominChu晓敏 PeifengLi培峰 - QiaomingZhu巧明 + QiaomingZhu巧明 183–194 作为宏观篇章分析中的基础任务,篇章结构识别任务的目的是识别相邻篇章单元之间的结构,并层次化构建篇章结构树。已有的工作只考虑局部的结构和语义信息或只考虑全局信息。因此,本文提出了一种融合全局和局部信息的指针网络模型,该模型在考虑全局的语义信息同时,又考虑局部段落间的语义关系密切程度,从而有效地提高宏观篇章结构识别的能力。在汉语宏观篇章树库(MCDTB)的实验结果表明,本文所提出的模型性能优于目前性能最好的模型。 2020.ccl-1.18 @@ -224,7 +224,7 @@ MingtongLiu明童 YuanmengChen圆梦 YujieZhang玉洁 - JinanXu金安 + JinanXu金安 YufengChen钰枫 195–206 组合原则表明句子的语义由其构成成分的语义按照一定规则组合而成, 由此基于句法结构的语义组合计算一直是一个重要的探索方向,其中采用树结构的组合计算方法最具有代表性。但是该方法难以应用于大规模数据处理,主要问题是其语义组合的顺序依赖于具体树的结构,无法实现并行处理。本文提出一种基于图的依存句法分析和语义组合计算的联合框架,并借助复述识别任务训练语义组合模型和句法分析模型。一方面图模型可以在训练和预测阶段采用并行处理,极大缩短计算时间;另一方面联合句法分析的语义组合框架不必依赖外部句法分析器,同时两个任务的联合学习可使语义表示同时学习句法结构和语义的上下文信息。我们在公开汉语复述识别数据集LCQMC上进行评测,实验结果显示准确率接近树结构组合方法,达到79.54%,而预测速度提升高达30倍。 @@ -247,7 +247,7 @@ 联合依存分析的汉语语义组合模型(<fixed-case>C</fixed-case>hinese Semantic Composition Model with Dependency Parsing) YuanmengChen圆梦 YujieZhang玉洁 - JinanXu金安 + JinanXu金安 YufengChen钰枫 215–224 在语义组合方法中,结构化方法强调以结构信息指导词义表示的组合方式。现有结构化语义组合方法使用外部分析器获取句法结构信息,导致句法分析与语义组合相互割裂,句法分析的精度严重制约语义组合模型的性能,且训练数据领域不一致等问题会进一步加剧性能的下降。对此,本文提出联合依存分析的语义组合模型,将依存分析与语义组合进行联合,一方面在训练语义组合模型时对依存分析模型进行微调,使其能够更适应语义组合模型使用的训练数据的领域特点;另一方面,在语义组合部分加入依存分析的中间信息表示,获取更丰富的结构信息和语义信息,以此来降低语义组合模型对依存分析错误结果的敏感度,提升模型的鲁棒性。我们以汉语为具体研究对象,将语义组合模型用于复述识别任务,并在CTB5汉语依存分析数据和LCQMC汉语复述识别数据上验证本文提出的模型。实验结果显示,本文所提方法在复述识别任务上的预测正确率和F1值上分别达到76.81%和78.03%;我们进一步设计实验对联合学习和中间信息利用的有效性进行验证,并与相关代表性工作进行了对比分析。 @@ -260,7 +260,7 @@ MengyuGuan梦雨 ZhongqingWang中卿 ShoushanLi寿山 - GuodongZhou国栋 + GuodongZhou国栋 225–235 现有的对话系统中存在着生成“好的”、“我不知道”等无意义的安全回复问题。日常对话中,对话者通常围绕特定的主题进行讨论且每句话都有明显的情感和意图。因此该文提出了基于对话约束的回复生成模型,即在Seq2Seq模型的基础上,结合对对话的主题、情感、意图的识别。该方法对生成回复的主题、情感和意图进行约束,从而生成具有合理的情感和意图且与对话主题相关的回复。实验证明,该文提出的方法能有效地提高生成回复的质量。 2020.ccl-1.22 @@ -400,7 +400,7 @@ 小样本关系分类研究综述(Few-Shot Relation Classification: A Survey) HanHu - PengyuanLiu鹏远 + PengyuanLiu鹏远 363–375 关系分类作为构建结构化知识的重要一环,在自然语言处理领域备受关注。但在很多应用领域中(医疗、金融领域),收集充足的用于训练关系分类模型的数据是十分困难的。近年来,仅需要少量训练样本的小样本学习研究逐渐新兴于各大领域。本文对近期小样本关系分类模型与方法进行了系统的综述。根据度量方法的不同,将现有方法分为原型式和分布式两大类。根据是否利用额外信息,将模型分为预训练和非预训练两大类。此外,除了常规设定下的小样本学习,本文还梳理了跨领域和稀缺资源场景下的小样本学习,并探讨了目前小样本关系分类方法的局限性,分析了跨领域小样本 学习面临的技术挑战。最后,展望了小样本关系分类未来的发展方向。 2020.ccl-1.34 @@ -413,7 +413,7 @@ FanWu ZhongqingWang中卿 PeifengLi培峰 - QiaomingZhu巧明 + QiaomingZhu巧明 376–389 传统的事件论元抽取方法把该任务当作句子中实体提及的多分类或序列标注任务,论元角色的类别在这些方法中只能作为向量表示,而忽略了论元角色的先验信息。实际上,论元角色的语义和论元本身有很大关系。对此,本文提议将其当作机器阅读理解任务,把论元角色表述为自然语言描述的问题,通过在上下文中回答这些问题来抽取论元。该方法更好地利用了论元角色类别的先验信息,在ACE2005中文语料上的实验证明了该方法的有效性。 2020.ccl-1.35 @@ -456,7 +456,7 @@ “细粒度英汉机器翻译错误分析语料库”的构建与思考(Construction of Fine-Grained Error Analysis Corpus of <fixed-case>E</fixed-case>nglish-<fixed-case>C</fixed-case>hinese Machine Translation and Its Implications) BailianQiu白莲 - MingwenWang明文 + MingwenWang明文 MaoxiLi茂西 CongChen FanXu @@ -469,8 +469,8 @@ 层次化结构全局上下文增强的篇章级神经机器翻译(Hierarchical Global Context Augmented Document-level Neural Machine Translation) LinqingChen林卿 - JunhuiLi军辉 - ZhengxianGong正仙 + JunhuiLi军辉 + ZhengxianGong正仙 434–445 如何有效利用篇章上下文信息一直是篇章级神经机器翻译研究领域的一大挑战。本文提出利用来源于整个篇章的层次化全局上下文提高篇章级神经机器翻译性能。为了实现该目标,本文模型分别获取当前句内单词与篇章内所有句子及单词之间的依赖关系,结合不同层次的依赖关系以获取含有层次化篇章信息的全局上下文。最终源语言当前句子中的每个单词都能获取其独有的综合词和句级别依赖关系的上下文。为了充分利用平行句对语料在训练中的优势本文使用两步训练法,在句子级语料训练模型的基础上使用含有篇章信息的语料进行二次训练以获得捕获全局上下文的能力。在若干基准语料数据集上的实验表明本文提出的模型与若干强基准模型相比取得了有意义的翻译质量提升。实验进一步表明,结合层次化篇章信息的上下文比仅使用词级别上下文更具优势。除此之外,本文尝试通过不同方式将全局上下文与翻译模型结合并观察其对模型性能的影响,并初步探究篇章翻译中全局上下文在篇章中的分布情况。 2020.ccl-1.40 @@ -614,9 +614,9 @@ 面向医学文本处理的医学实体标注规范(Medical Entity Annotation Standard for Medical Text Processing) HuanZhang YuanZong - BaobaoChang宝宝 + BaobaoChang宝宝 ZhifangSui志方 - HongyingZan红英 + HongyingZan红英 KunliZhang坤丽 561–571 随着智慧医疗的普及,利用自然语言处理技术识别医学信息的需求日益增长。目前,针对医学实体而言,医学共享语料库仍处于空白状态,这对医学文本信息处理各项任务的进展造成了巨大阻力。如何判断不同的医学实体类别?如何界定不同实体间的涵盖范围?这些问题导致缺乏类似通用场景的大规模规范标注的医学文本数据。针对上述问题,该文参考了UMLS中定义的语义类型,提出面向医学文本信息处理的医学实体标注规范,涵盖了疾病、临床表现、医疗程序、医疗设备等9种医学实体,以及基于规范构建医学实体标注语料库。该文综述了标注规范的描述体系、分类原则、混淆处理、语料标注过程以及医学实体自动标注基线实验等相关问题,希望能为医学实体语料库的构建提供可参考的标注规范,以及为医学实体识别提供语料支持。 @@ -653,7 +653,7 @@ <fixed-case>CDCPP</fixed-case>:跨领域中文标点符号预测(<fixed-case>CDCPP</fixed-case>: Cross-Domain <fixed-case>C</fixed-case>hinese Punctuation Prediction) - PengyuanLiu鹏远 + PengyuanLiu鹏远 WeikangWang伟康 LikunQiu立坤 BingjieDu冰洁 @@ -665,7 +665,7 @@ 多目标情感分类中文数据集构建及分析研究(Construction and Analysis of <fixed-case>C</fixed-case>hinese Multi-Target Sentiment Classification Dataset) - PengyuanLiu鹏远 + PengyuanLiu鹏远 YongshengTian永胜 ChengyuDu成玉 LikunQiu立坤 @@ -704,7 +704,7 @@ HengruiGuo恒睿 ZhongqingWang中卿 PeifengLi培峰 - QiaomingZhu巧明 + QiaomingZhu巧明 634–644 面向社交媒体的事件聚类旨在根据事件特征对短文本聚类。目前,事件聚类模型主要分为无监督模型和有监督模型。无监督模型聚类效果较差,有监督模型依赖大量标注数据。基于此,本文提出了一种半监督事件聚类模型(SemiEC),该模型在小规模标注数据的基础上,利用LSTM表征事件,利用线性模型计算文本相似度,进行增量聚类,利用增量聚类产生的标注数据对模型再训练,结束后对不确定样本再聚类。实验表明,SemiEC的性能相比其他模型均有所提高。 2020.ccl-1.59 @@ -910,7 +910,7 @@ XingchenLi MingtongLiu YujieZhang - JinanXu + JinanXu YufengChen 820–830 In Chinese dependency parsing, the joint model of word segmentation, POS tagging and dependency parsing has become the mainstream framework because it can eliminate error propagation and share knowledge, where the transition-based model with feature templates maintains the best performance. Recently, the graph-based joint model (Yan et al., 2019) on word segmentation and dependency parsing has achieved better performance, demonstrating the advantages of the graph-based models. However, this work can not provide POS information for downstream tasks, and the POS tagging task was proved to be helpful to the dependency parsing according to the research of the transition-based model. Therefore, we propose a graph-based joint model for Chinese word segmentation, POS tagging and dependency parsing. We designed a charater-level POS tagging task, and then train it jointly with the model of Yan et al. (2019). We adopt two methods of joint POS tagging task, one is by sharing parameters, the other is by using tag attention mechanism, which enables the three tasks to better share intermediate information and improve each other’s performance. The experimental results on the Penn Chinese treebank (CTB5) show that our proposed joint model improved by 0.38% on dependency parsing than the model of Yan et al. (2019). Compared with the best transition-based joint model, our model improved by 0.18%, 0.35% and 5.99% respectively in terms of word segmentation, POS tagging and dependency parsing. @@ -968,7 +968,7 @@ <fixed-case>C</fixed-case>hinese Long and Short Form Choice Exploiting Neural Network Language Modeling Approaches LinLi - Keesvan Deemter + Keesvan Deemter DenisPaperno 874–880 This paper presents our work in long and short form choice, a significant question of lexical choice, which plays an important role in many Natural Language Understanding tasks. Long and short form sharing at least one identical word meaning but with different number of syllables is a highly frequent linguistic phenomenon in Chinese like 老虎-虎(laohu-hu, tiger) @@ -1218,7 +1218,7 @@ <fixed-case>CAN</fixed-case>-<fixed-case>GRU</fixed-case>: a Hierarchical Model for Emotion Recognition in Dialogue TingJiang BingXu - TiejunZhao + TiejunZhao ShengLi 1101–1111 Emotion recognition in dialogue systems has gained attention in the field of natural language processing recent years, because it can be applied in opinion mining from public conversational data on social media. In this paper, we propose a hierarchical model to recognize emotions in the dialogue. In the first layer, in order to extract textual features of utterances, we propose a convolutional self-attention network(CAN). Convolution is used to capture n-gram information and attention mechanism is used to obtain the relevant semantic information among words in the utterance. In the second layer, a GRU-based network helps to capture contextual information in the conversation. Furthermore, we discuss the effects of unidirectional and bidirectional networks. We conduct experiments on Friends dataset and EmotionPush dataset. The results show that our proposed model(CAN-GRU) and its variants achieve better performance than baselines. @@ -1286,7 +1286,7 @@ XuZhao LeiZhuang QiXie - HongyingZan + HongyingZan 1155–1165 The obstetric Electronic Medical Record (EMR) contains a large amount of medical data and health information. It plays a vital role in improving the quality of the diagnosis assistant service. In this paper, we treat the diagnosis assistant as a multi-label classification task and propose a Knowledge-Enabled Diagnosis Assistant (KEDA) model for the obstetric diagnosis assistant. We utilize the numerical information in EMRs and the external knowledge from Chinese Obstetric Knowledge Graph (COKG) to enhance the text representation of EMRs. Specifically, the bidirectional maximum matching method and similarity-based approach are used to obtain the entities set contained in EMRs and linked to the COKG. The final knowledge representation is obtained by a weight-based disease prediction algorithm, and it is fused with the text representation through a linear weighting method. Experiment results show that our approach can bring about +3.53 F1 score improvements upon the strong BERT baseline in the diagnosis assistant task. 2020.ccl-1.107 diff --git a/data/xml/2020.cl.xml b/data/xml/2020.cl.xml index df25199de4..b684195154 100644 --- a/data/xml/2020.cl.xml +++ b/data/xml/2020.cl.xml @@ -16,7 +16,7 @@ NadirDurrani FahimDalvi HassanSajjad - JamesGlass + JamesGlass 10.1162/coli_a_00367 Despite the recent success of deep neural networks in natural language processing and other spheres of artificial intelligence, their interpretability remains a challenge. We analyze the representations learned by neural machine translation (NMT) models at various levels of granularity and evaluate their quality through relevant extrinsic properties. In particular, we seek answers to the following questions: (i) How accurately is word structure captured within the learned representations, which is an important aspect in translating morphologically rich languages? (ii) Do the representations capture long-range dependencies, and effectively handle syntactically divergent languages? (iii) Do the representations capture lexical semantics? We conduct a thorough investigation along several parameters: (i) Which layers in the architecture capture each of these linguistic phenomena; (ii) How does the choice of translation unit (word, character, or subword unit) impact the linguistic properties captured by the underlying representations? (iii) Do the encoder and decoder learn differently and independently? (iv) Do the representations learned by multilingual NMT models capture the same amount of linguistic information as their bilingual counterparts? Our data-driven, quantitative evaluation illuminates important aspects in NMT models and their ability to capture various linguistic phenomena. We show that deep NMT models trained in an end-to-end fashion, without being provided any direct supervision during the training process, learn a non-trivial amount of linguistic information. Notable findings include the following observations: (i) Word morphology and part-of-speech information are captured at the lower layers of the model; (ii) In contrast, lexical semantics or non-local syntactic and semantic dependencies are better represented at the higher layers of the model; (iii) Representations learned using characters are more informed about word-morphology compared to those learned using subword units; and (iv) Representations learned by multilingual models are richer compared to bilingual models. 1–52 @@ -28,7 +28,7 @@ LiZhou JianfengGao DiLi - Heung-YeungShum + Heung-YeungShum 10.1162/coli_a_00368 This article describes the development of Microsoft XiaoIce, the most popular social chatbot in the world. XiaoIce is uniquely designed as an artifical intelligence companion with an emotional connection to satisfy the human need for communication, affection, and social belonging. We take into account both intelligent quotient and emotional quotient in system design, cast human–machine social chat as decision-making over Markov Decision Processes, and optimize XiaoIce for long-term user engagement, measured in expected Conversation-turns Per Session (CPS). We detail the system architecture and key components, including dialogue manager, core chat, skills, and an empathetic computing module. We show how XiaoIce dynamically recognizes human feelings and states, understands user intent, and responds to user needs throughout long conversations. Since the release in 2014, XiaoIce has communicated with over 660 million active users and succeeded in establishing long-term relationships with many of them. Analysis of large-scale online logs shows that XiaoIce has achieved an average CPS of 23, which is significantly higher than that of other chatbots and even human conversations. 53–93 @@ -38,7 +38,7 @@ An Empirical Study on Crosslingual Transfer in Probabilistic Topic Models ShudongHao - Michael J.Paul + Michael J.Paul 10.1162/coli_a_00369 Probabilistic topic modeling is a common first step in crosslingual tasks to enable knowledge transfer and extract multilingual features. Although many multilingual topic models have been developed, their assumptions about the training corpus are quite varied, and it is not clear how well the different models can be utilized under various training conditions. In this article, the knowledge transfer mechanisms behind different multilingual topic models are systematically studied, and through a broad set of experiments with four models on ten languages, we provide empirical insights that can inform the selection and future development of multilingual topic models. 95–134 @@ -48,7 +48,7 @@ Data-Driven Sentence Simplification: Survey and Benchmark FernandoAlva-Manchego - CarolinaScarton + CarolinaScarton LuciaSpecia 10.1162/coli_a_00370 Sentence Simplification (SS) aims to modify a sentence in order to make it easier to read and understand. In order to do so, several rewriting transformations can be performed such as replacement, reordering, and splitting. Executing these transformations while keeping sentences grammatical, preserving their main idea, and generating simpler output, is a challenging and still far from solved problem. In this article, we survey research on SS, focusing on approaches that attempt to learn how to simplify using corpora of aligned original-simplified sentence pairs in English, which is the dominant paradigm nowadays. We also include a benchmark of different approaches on common data sets so as to compare them and highlight their strengths and limitations. We expect that this survey will serve as a starting point for researchers interested in the task and help spark new ideas for future developments. @@ -58,10 +58,10 @@ Corpora Annotated with Negation: An Overview - Salud MaríaJiménez-Zafra + Salud MaríaJiménez-Zafra RoserMorante - María TeresaMartín-Valdivia - L. AlfonsoUreña-López + María TeresaMartín-Valdivia + L. AlfonsoUreña-López 10.1162/coli_a_00371 Negation is a universal linguistic phenomenon with a great qualitative impact on natural language processing applications. The availability of corpora annotated with negation is essential to training negation processing systems. Currently, most corpora have been annotated for English, but the presence of languages other than English on the Internet, such as Chinese or Spanish, is greater every day. In this study, we present a review of the corpora annotated with negation information in several languages with the goal of evaluating what aspects of negation have been annotated and how compatible the corpora are. We conclude that it is very difficult to merge the existing corpora because we found differences in the annotation schemes used, and most importantly, in the annotation guidelines: the way in which each corpus was tokenized and the negation elements that have been annotated. Differently than for other well established tasks like semantic role labeling or parsing, for negation there is no standard annotation scheme nor guidelines, which hampers progress in its treatment. 1–52 @@ -80,10 +80,10 @@ Multilingual and Interlingual Semantic Representations for Natural Language Processing: A Brief Introduction - Marta R.Costa-jussà + Marta R.Costa-jussà CristinaEspaña-Bonet PascaleFung - Noah A.Smith + Noah A.Smith 10.1162/coli_a_00373 We introduce the Computational Linguistics special issue on Multilingual and Interlingual Semantic Representations for Natural Language Processing. We situate the special issue’s five articles in the context of our fast-changing field, explaining our motivation for this project. We offer a brief summary of the work in the issue, which includes developments on lexical and sentential semantic representations, from symbolic and neural perspectives. 249–255 @@ -92,8 +92,8 @@ Unsupervised Word Translation with Adversarial Autoencoder - TasnimMohiuddin - ShafiqJoty + TasnimMohiuddin + ShafiqJoty 10.1162/coli_a_00374 Crosslingual word embeddings learned from monolingual embeddings have a crucial role in many downstream tasks, ranging from machine translation to transfer learning. Adversarial training has shown impressive success in learning crosslingual embeddings and the associated word translation task without any parallel data by mapping monolingual embeddings to a shared space. However, recent work has shown superior performance for non-adversarial methods in more challenging language pairs. In this article, we investigate adversarial autoencoder for unsupervised word translation and propose two novel extensions to it that yield more stable training and improved results. Our method includes regularization terms to enforce cycle consistency and input reconstruction, and puts the target encoders as an adversary against the corresponding discriminator. We use two types of refinement procedures sequentially after obtaining the trained encoders and mappings from the adversarial training, namely, refinement with Procrustes solution and refinement with symmetric re-weighting. Extensive experimentations with high- and low-resource languages from two different data sets show that our method achieves better performance than existing adversarial and non-adversarial approaches and is also competitive with the supervised system. Along with performing comprehensive ablation studies to understand the contribution of different components of our adversarial model, we also conduct a thorough analysis of the refinement procedures to understand their effects. 257–288 @@ -113,7 +113,7 @@ <fixed-case>LINSPECTOR</fixed-case>: Multilingual Probing Tasks for Word Representations - Gözde GülŞahin + Gözde GülŞahin ClaraVania IliaKuznetsov IrynaGurevych @@ -128,7 +128,7 @@ RaúlVázquez AlessandroRaganato MathiasCreutz - JörgTiedemann + JörgTiedemann 10.1162/coli_a_00377 Neural machine translation has considerably improved the quality of automatic translations by learning good representations of input sentences. In this article, we explore a multilingual translation model capable of producing fixed-size sentence representations by incorporating an intermediate crosslingual shared layer, which we refer to as attention bridge. This layer exploits the semantics from each language and develops into a language-agnostic meaning representation that can be efficiently used for transfer learning. We systematically study the impact of the size of the attention bridge and the effect of including additional languages in the model. In contrast to related previous work, we demonstrate that there is no conflict between translation performance and the use of sentence representations in downstream tasks. In particular, we show that larger intermediate layers not only improve translation quality, especially for long sentences, but also push the accuracy of trainable classification tasks. Nevertheless, shorter representations lead to increased compression that is beneficial in non-trainable similarity tasks. Similarly, we show that trainable downstream tasks benefit from multilingual models, whereas additional language signals do not improve performance in non-trainable benchmarks. This is an important insight that helps to properly design models for specific applications. Finally, we also include an in-depth analysis of the proposed attention bridge and its ability to encode linguistic properties. We carefully analyze the information that is captured by individual attention heads and identify interesting patterns that explain the performance of specific settings in linguistic probing tasks. 387–424 @@ -139,7 +139,7 @@ Abstract Syntax as Interlingua: Scaling Up the Grammatical Framework from Controlled Languages to Robust Pipelines AarneRanta KrasimirAngelov - NormundsGruzitis + NormundsGruzitis PrasanthKolachina 10.1162/coli_a_00378 Abstract syntax is an interlingual representation used in compilers. Grammatical Framework (GF) applies the abstract syntax idea to natural languages. The development of GF started in 1998, first as a tool for controlled language implementations, where it has gained an established position in both academic and commercial projects. GF provides grammar resources for over 40 languages, enabling accurate generation and translation, as well as grammar engineering tools and components for mobile and Web applications. On the research side, the focus in the last ten years has been on scaling up GF to wide-coverage language processing. The concept of abstract syntax offers a unified view on many other approaches: Universal Dependencies, WordNets, FrameNets, Construction Grammars, and Abstract Meaning Representations. This makes it possible for GF to utilize data from the other approaches and to build robust pipelines. In return, GF can contribute to data-driven approaches by methods to transfer resources from one language to others, to augment data by rule-based generation, to check the consistency of hand-annotated corpora, and to pipe analyses into high-precision semantic back ends. This article gives an overview of the use of abstract syntax as interlingua through both established and emerging NLP applications involving GF. @@ -182,8 +182,8 @@ Tractable <fixed-case>L</fixed-case>exical-<fixed-case>F</fixed-case>unctional <fixed-case>G</fixed-case>rammar - JürgenWedekind - Ronald M.Kaplan + JürgenWedekind + Ronald M.Kaplan 10.1162/coli_a_00384 The formalism for Lexical-Functional Grammar (LFG) was introduced in the 1980s as one of the first constraint-based grammatical formalisms for natural language. It has led to substantial contributions to the linguistic literature and to the construction of large-scale descriptions of particular languages. Investigations of its mathematical properties have shown that, without further restrictions, the recognition, emptiness, and generation problems are undecidable, and that they are intractable in the worst case even with commonly applied restrictions. However, grammars of real languages appear not to invoke the full expressive power of the formalism, as indicated by the fact that algorithms and implementations for recognition and generation have been developed that run—even for broad-coverage grammars—in typically polynomial time. This article formalizes some restrictions on the notation and its interpretation that are compatible with conventions and principles that have been implicit or informally stated in linguistic theory. We show that LFG grammars that respect these restrictions, while still suitable for the description of natural languages, are equivalent to linear context-free rewriting systems and allow for tractable computation. 515–569 @@ -202,8 +202,8 @@ Sentence Meaning Representations Across Languages: What Can We Learn from Existing Frameworks? - ZdeněkŽabokrtský - DanielZeman + ZdeněkŽabokrtský + DanielZeman MagdaŠevčíková 10.1162/coli_a_00385 This article gives an overview of how sentence meaning is represented in eleven deep-syntactic frameworks, ranging from those based on linguistic theories elaborated for decades to rather lightweight NLP-motivated approaches. We outline the most important characteristics of each framework and then discuss how particular language phenomena are treated across those frameworks, while trying to shed light on commonalities as well as differences. @@ -267,7 +267,7 @@ AmrithKrishna BishalSantra AshimGupta - PavankumarSatuluri + PavankumarSatuluri PawanGoyal 10.1162/coli_a_00390 We propose a framework using energy-based models for multiple structured prediction tasks in Sanskrit. Ours is an arc-factored model, similar to the graph-based parsing approaches, and we consider the tasks of word segmentation, morphological parsing, dependency parsing, syntactic linearization, and prosodification, a “prosody-level” task we introduce in this work. Ours is a search-based structured prediction framework, which expects a graph as input, where relevant linguistic information is encoded in the nodes, and the edges are then used to indicate the association between these nodes. Typically, the state-of-the-art models for morphosyntactic tasks in morphologically rich languages still rely on hand-crafted features for their performance. But here, we automate the learning of the feature function. The feature function so learned, along with the search space we construct, encode relevant linguistic information for the tasks we consider. This enables us to substantially reduce the training data requirements to as low as 10%, as compared to the data requirements for the neural state-of-the-art models. Our experiments in Czech and Sanskrit show the language-agnostic nature of the framework, where we train highly competitive models for both the languages. Moreover, our framework enables us to incorporate language-specific constraints to prune the search space and to filter the candidates during inference. We obtain significant improvements in morphosyntactic tasks for Sanskrit by incorporating language-specific constraints into the model. In all the tasks we discuss for Sanskrit, we either achieve state-of-the-art results or ours is the only data-driven solution for those tasks. diff --git a/data/xml/2020.clib.xml b/data/xml/2020.clib.xml index 41ab77f7fd..66349c93bc 100644 --- a/data/xml/2020.clib.xml +++ b/data/xml/2020.clib.xml @@ -106,7 +106,7 @@ It Takes Two to Tango – Towards a Multilingual <fixed-case>MWE</fixed-case> Resource SvetlozaraLeseva - Verginica BarbuMititelu + Verginica BarbuMititelu IvelinaStoyanova 101–111 Mature wordnets offer the opportunity of digging out interesting linguistic information otherwise not explicitly marked in the network. The focus in this paper is on the ways the results already obtained at two levels, derivation and multiword expressions, may be further employed. The parallel recent development of the two resources under discussion, the Bulgarian and the Romanian wordnets, has enabled interlingual analyses that reveal similarities and differences between the linguistic knowledge encoded in the two wordnets. In this paper we show how the resources developed and the knowledge gained are put together towards devising a linked MWE resource that is informed by layered dictionary representation and corpus annotation and analysis. This work is a proof of concept for the adopted method of compiling a multilingual MWE resource on the basis of information extracted from the Bulgarian, the Romanian and the Princeton wordnet, as well as additional language resources and automatic procedures. @@ -150,7 +150,7 @@ A Customizable <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et Editor Andrei-MariusAvram - VerginicaBarbu Mititelu + VerginicaBarbu Mititelu 147–154 This paper presents an open-source wordnet editor that has been developed to ensure further expansion of the Romanian wordnet. It comes with a web interface that offers capabilities in selecting new synsets to be implemented, editing the list of literals and their sense numbers and adding these new synsets to the existing network, by importing from Princeton WordNet (and adjusting, when necessary) all the relations in which the newly created synsets and their literals are involved. The application also comes with an authorization mechanism that ensures control of the new synsets added in novice or lexicographer accounts. Although created to serve the current (more or less specific) needs in the development of the Romanian wordnet, it can be customized to fulfill new requirements from developers, either of the same wordnet or of a different one for which a similar approach is adopted. 2020.clib-1.16 @@ -159,7 +159,7 @@ Comparison of Genres in Word Sense Disambiguation using Automatically Generated Text Collections AngelinaBolshina - NataliaLoukachevitch + NataliaLoukachevitch 155–164 The best approaches in Word Sense Disambiguation (WSD) are supervised and rely on large amounts of hand-labelled data, which is not always available and costly to create. In our work we describe an approach that is used to create an automatically labelled collection based on the monosemous relatives (related unambiguous entries) for Russian. The main contribution of our work is that we extracted monosemous relatives that can be located at relatively long distances from a target ambiguous word and ranked them according to the similarity measure to the target sense. We evaluated word sense disambiguation models based on a nearest neighbour classification on BERT and ELMo embeddings and two text collections. Our work relies on the Russian wordnet RuWordNet. 2020.clib-1.17 diff --git a/data/xml/2020.clinicalnlp.xml b/data/xml/2020.clinicalnlp.xml index 3dbe93e382..98aa4fd357 100644 --- a/data/xml/2020.clinicalnlp.xml +++ b/data/xml/2020.clinicalnlp.xml @@ -64,7 +64,7 @@ Incorporating Risk Factor Embeddings in Pre-trained Transformers Improves Sentiment Prediction in Psychiatric Discharge Summaries XiyuDing Mei-HuaHall - TimothyMiller + TimothyMiller 35–40 Reducing rates of early hospital readmission has been recognized and identified as a key to improve quality of care and reduce costs. There are a number of risk factors that have been hypothesized to be important for understanding re-admission risk, including such factors as problems with substance abuse, ability to maintain work, relations with family. In this work, we develop Roberta-based models to predict the sentiment of sentences describing readmission risk factors in discharge summaries of patients with psychosis. We improve substantially on previous results by a scheme that shares information across risk factors while also allowing the model to learn risk factor-specific information. 2020.clinicalnlp-1.4 @@ -108,7 +108,7 @@ JennyCopara Yohan BonesckiGumiel Lucas Ferro Antunes deOliveira - Emerson CabreraParaiso + Emerson CabreraParaiso DouglasTeodoro Cláudia Maria Cabral MoroBarra 65–72 @@ -138,7 +138,7 @@ ToreGundersen HaldorHusby ØysteinNytrø - LiljaØvrelid + LiljaØvrelid 79–84 Loss of consciousness, so-called syncope, is a commonly occurring symptom associated with worse prognosis for a number of heart-related diseases. We present a comparison of methods for a diagnosis classification task in Norwegian clinical notes, targeting syncope, i.e. fainting cases. We find that an often neglected baseline with keyword matching constitutes a rather strong basis, but more advanced methods do offer some improvement in classification performance, especially a convolutional neural network model. The developed pipeline is planned to be used for quantifying unregistered syncope cases in Norway. 2020.clinicalnlp-1.9 @@ -177,7 +177,7 @@ Automatic recognition of abdominal lymph nodes from clinical text - YifanPeng + YifanPeng SungwonLee Daniel C.Elton ThomasShen @@ -235,7 +235,7 @@ Knowledge Grounded Conversational Symptom Detection with Graph Memory Networks HongyinLuo Shang-WenLi - JamesGlass + JamesGlass 136–145 In this work, we propose a novel goal-oriented dialog task, automatic symptom detection. We build a system that can interact with patients through dialog to detect and collect clinical symptoms automatically, which can save a doctor’s time interviewing the patient. Given a set of explicit symptoms provided by the patient to initiate a dialog for diagnosing, the system is trained to collect implicit symptoms by asking questions, in order to collect more information for making an accurate diagnosis. After getting the reply from the patient for each question, the system also decides whether current information is enough for a human doctor to make a diagnosis. To achieve this goal, we propose two neural models and a training pipeline for the multi-step reasoning task. We also build a knowledge graph as additional inputs to further improve model performance. Experiments show that our model significantly outperforms the baseline by 4%, discovering 67% of implicit symptoms on average with a limited number of questions. 2020.clinicalnlp-1.16 @@ -272,11 +272,11 @@ ZixuWang JuliaIve SineadMoylett - ChristophMueller + ChristophMueller RudolfCardinal SumithraVelupillai JohnO’Brien - RobertStewart + RobertStewart 168–177 While Dementia with Lewy Bodies (DLB) is the second most common type of neurodegenerative dementia following Alzheimer’s Disease (AD), it is difficult to distinguish from AD. We propose a method for DLB detection by using mental health record (MHR) documents from a (3-month) period before a patient has been diagnosed with DLB or AD. Our objective is to develop a model that could be clinically useful to differentiate between DLB and AD across datasets from different healthcare institutions. We cast this as a classification task using Convolutional Neural Network (CNN), an efficient neural model for text classification. We experiment with different representation models, and explore the features that contribute to model performances. In addition, we apply temperature scaling, a simple but efficient model calibration method, to produce more reliable predictions. We believe the proposed method has important potential for clinical applications using routine healthcare records, and for generalising to other relevant clinical record datasets. To the best of our knowledge, this is the first attempt to distinguish DLB from AD using mental health records, and to improve the reliability of DLB predictions. 2020.clinicalnlp-1.19 @@ -299,7 +299,7 @@ Extracting Relations between Radiotherapy Treatment Details DanielleBitterman - TimothyMiller + TimothyMiller DavidHarris ChenLin SeanFinan @@ -365,8 +365,8 @@ Learning from Unlabelled Data for Clinical Semantic Textual Similarity YuxiaWang - KarinVerspoor - TimothyBaldwin + KarinVerspoor + TimothyBaldwin 227–233 Domain pretraining followed by task fine-tuning has become the standard paradigm for NLP tasks, but requires in-domain labelled data for task fine-tuning. To overcome this, we propose to utilise domain unlabelled data by assigning pseudo labels from a general model. We evaluate the approach on two clinical STS datasets, and achieve r= 0.80 on N2C2-STS. Further investigation reveals that if the data distribution of unlabelled sentence pairs is closer to the test data, we can obtain better performance. By leveraging a large general-purpose STS dataset and small-scale in-domain training data, we obtain further improvements to r= 0.90, a new SOTA. 2020.clinicalnlp-1.25 diff --git a/data/xml/2020.cllrd.xml b/data/xml/2020.cllrd.xml index 2a7aebc441..46550a7561 100644 --- a/data/xml/2020.cllrd.xml +++ b/data/xml/2020.cllrd.xml @@ -4,8 +4,8 @@ Proceedings of the LREC 2020 Workshop on "Citizen Linguistics in Language Resource Development" JamesFiumara - ChristopherCieri - MarkLiberman + ChristopherCieri + MarkLiberman ChrisCallison-Burch European Language Resources Association
Marseille, France
@@ -54,8 +54,8 @@ Speaking Outside the Box: Exploring the Benefits of Unconstrained Input in Crowdsourcing and Citizen Science Platforms JonChamberlain - UdoKruschwitz - MassimoPoesio + UdoKruschwitz + MassimoPoesio 26–34 Crowdsourcing approaches provide a difficult design challenge for developers. There is a trade-off between the efficiency of the task to be done and the reward given to the user for participating, whether it be altruism, social enhancement, entertainment or money. This paper explores how crowdsourcing and citizen science systems collect data and complete tasks, illustrated by a case study from the online language game-with-a-purpose Phrase Detectives. The game was originally developed to be a constrained interface to prevent player collusion, but subsequently benefited from posthoc analysis of over 76k unconstrained inputs from users. Understanding the interface design and task deconstruction are critical for enabling users to participate in such systems and the paper concludes with a discussion of the idea that social networks can be viewed as form of citizen science platform with both constrained and unconstrained inputs making for a highly complex dataset. 2020.cllrd-1.4 @@ -66,7 +66,7 @@ Leveraging Non-Specialists for Accurate and Time Efficient <fixed-case>AMR</fixed-case> Annotation MaryMartin CeciliaMauceri - MarthaPalmer + MarthaPalmer ChristofferHeckman 35–39 Abstract Meaning Representations (AMRs), a syntax-free representation of phrase semantics are useful for capturing the meaning of a phrase and reflecting the relationship between concepts that are referred to. However, annotating AMRs are time consuming and expensive. The existing annotation process requires expertly trained workers who have knowledge of an extensive set of guidelines for parsing phrases. In this paper, we propose a cost-saving two-step process for the creation of a corpus of AMR-phrase pairs for spatial referring expressions. The first step uses non-specialists to perform simple annotations that can be leveraged in the second step to accelerate the annotation performed by the experts. We hypothesize that our process will decrease the cost per annotation and improve consistency across annotators. Few corpora of spatial referring expressions exist and the resulting language resource will be valuable for referring expression comprehension and generation modeling. diff --git a/data/xml/2020.clssts.xml b/data/xml/2020.clssts.xml index fe033dc714..9254060057 100644 --- a/data/xml/2020.clssts.xml +++ b/data/xml/2020.clssts.xml @@ -3,10 +3,10 @@ Proceedings of the workshop on Cross-Language Search and Summarization of Text and Speech (CLSSTS2020) - KathyMcKeown - Douglas W.Oard + KathyMcKeown + Douglas W.Oard Elizabeth - RichardSchwartz + RichardSchwartz European Language Resources Association
Marseille, France
May @@ -99,7 +99,7 @@ RabihZbib WilliamHartmann RichardSchwartz - JohnMakhoul + JohnMakhoul 38–43 In the IARPA MATERIAL program, information retrieval (IR) is treated as a hard detection problem; the system has to output a single global ranking over all queries, and apply a hard threshold on this global list to come up with all the hypothesized relevant documents. This means that how queries are ranked relative to each other can have a dramatic impact on performance. In this paper, we study such a performance measure, the Average Query Weighted Value (AQWV), which is a combination of miss and false alarm rates. AQWV requires that the same detection threshold is applied to all queries. Hence, detection scores of different queries should be comparable, and, to do that, a score normalization technique (commonly used in keyword spotting from speech) should be used. We describe unsupervised methods for score normalization, which are borrowed from the speech field and adapted accordingly for IR, and demonstrate that they greatly improve AQWV on the task of cross-language information retrieval (CLIR), on three low-resource languages used in MATERIAL. We also present a novel supervised score normalization approach which gives additional gains. 2020.clssts-1.7 @@ -119,7 +119,7 @@ LingjunZhao ZhuolinJiang RichardSchwartz - JohnMakhoul + JohnMakhoul 44–51 In this paper, we describe a cross-lingual information retrieval (CLIR) system that, given a query in English, and a set of audio and text documents in a foreign language, can return a scored list of relevant documents, and present findings in a summary form in English. Foreign audio documents are first transcribed by a state-of-the-art pretrained multilingual speech recognition model that is finetuned to the target language. For text documents, we use multiple multilingual neural machine translation (MT) models to achieve good translation results, especially for low/medium resource languages. The processed documents and queries are then scored using a probabilistic CLIR model that makes use of the probability of translation from GIZA translation tables and scores from a Neural Network Lexical Translation Model (NNLTM). Additionally, advanced score normalization, combination, and thresholding schemes are employed to maximize the Average Query Weighted Value (AQWV) scores. The CLIR output, together with multiple translation renderings, are selected and translated into English snippets via a summarization model. Our turnkey system is language agnostic and can be quickly trained for a new low-resource language in few days. 2020.clssts-1.8 @@ -129,7 +129,7 @@ What Set of Documents to Present to an Analyst? RichardSchwartz - JohnMakhoul + JohnMakhoul LeeTarlin DamianosKarakos 52–57 @@ -155,7 +155,7 @@ Subtitles to Segmentation: Improving Low-Resource Speech-to-<fixed-case>T</fixed-case>ext<fixed-case>T</fixed-case>ranslation Pipelines DavidWan - ZhengpingJiang + ZhengpingJiang ChrisKedzie ElsbethTurcan PeterBell diff --git a/data/xml/2020.cmcl.xml b/data/xml/2020.cmcl.xml index 3c76c9a2be..fcc322da9e 100644 --- a/data/xml/2020.cmcl.xml +++ b/data/xml/2020.cmcl.xml @@ -4,9 +4,9 @@ Proceedings of the Workshop on Cognitive Modeling and Computational Linguistics EmmanueleChersoni - CassandraJacobs + CassandraJacobs YoheiOseki - LaurentPrévot + LaurentPrévot EnricoSantus Association for Computational Linguistics
Online
@@ -44,7 +44,7 @@
Production-based Cognitive Models as a Test Suite for Reinforcement Learning Algorithms - AdrianBrasoveanu + AdrianBrasoveanu JakubDotlacil 28–37 We introduce a framework in which production-rule based computational cognitive modeling and Reinforcement Learning can systematically interact and inform each other. We focus on linguistic applications because the sophisticated rule-based cognitive models needed to capture linguistic behavioral data promise to provide a stringent test suite for RL algorithms, connecting RL algorithms to both accuracy and reaction-time experimental data. Thus, we open a path towards assembling an experimentally rigorous and cognitively realistic benchmark for RL algorithms. We extend our previous work on lexical decision tasks and tabular RL algorithms (Brasoveanu and Dotlačil, 2020b) with a discussion of neural-network based approaches, and a discussion of how parsing can be formalized as an RL problem. @@ -84,7 +84,7 @@ Development of Multi-level Linguistic Alignment in Child-adult Conversations ThomasMisiek - BenoitFavre + BenoitFavre AbdellahFourtassi 54–58 Interactive alignment is a major mechanism of linguistic coordination. Here we study the way this mechanism emerges in development across the lexical, syntactic, and conceptual levels. We leverage NLP tools to analyze a large-scale corpus of child-adult conversations between 2 and 5 years old. We found that, across development, children align consistently to adults above chance and that adults align consistently more to children than vice versa (even controlling for language production abilities). Besides these consistencies, we found a diversity of developmental trajectories across linguistic levels. These corpus-based findings provide strong support for an early onset of multi-level linguistic alignment in children and invites new experimental work. @@ -96,7 +96,7 @@ Conditioning, but on Which Distribution? Grammatical Gender in <fixed-case>G</fixed-case>erman Plural Inflection KateMcCurdy AdamLopez - SharonGoldwater + SharonGoldwater 59–65 Grammatical gender is a consistent and informative cue to the plural class of German nouns. We find that neural encoder-decoder models learn to rely on this cue to predict plural class, but adult speakers are relatively insensitive to it. This suggests that the neural models are not an effective cognitive model of German plural formation. 2020.cmcl-1.8 @@ -106,7 +106,7 @@ Learning Pronoun Case from Distributional Cues: Flexible Frames for Case Acquisition XiaomengMa - MartinChodorow + MartinChodorow VirginiaValian 66–74 Case is an abstract grammatical feature that indicates argument relationship in a sentence. In English, cases are expressed on pronouns, as nominative case (e.g. I, he), accusative case (e.g. me, him) and genitive case (e.g. my, his). Children correctly use cased pronouns at a very young age. How do they acquire abstract case in the first place, when different cases are not associated with different meanings? This paper proposes that the distributional patterns in parents’ input could be used to distinguish grammatical cases in English. diff --git a/data/xml/2020.cmlc.xml b/data/xml/2020.cmlc.xml index 309ed892d9..f62ca6b762 100644 --- a/data/xml/2020.cmlc.xml +++ b/data/xml/2020.cmlc.xml @@ -7,7 +7,7 @@ AdrienBarbaresi SimonClematide MarcKupietz - HaraldLüngen + HaraldLüngen InesPisetta European Language Ressources Association
Marseille, France
@@ -48,9 +48,9 @@ <fixed-case>F</fixed-case>rench Contextualized Word-Embeddings with a sip of <fixed-case>C</fixed-case>a<fixed-case>B</fixed-case>e<fixed-case>R</fixed-case>net: a New <fixed-case>F</fixed-case>rench Balanced Reference Corpus MuriellePopa-Fabre - Pedro JavierOrtiz Suárez - BenoîtSagot - Éricde la Clergerie + Pedro JavierOrtiz Suárez + BenoîtSagot + Éricde la Clergerie 15–23 This paper investigates the impact of different types and size of training corpora on language models. By asking the fundamental question of quality versus quantity, we compare four French corpora by pre-training four different ELMos and evaluating them on dependency parsing, POS-tagging and Named Entities Recognition downstream tasks. We present and asses the relevance of a new balanced French corpus, CaBeRnet, that features a representative range of language usage, including a balanced variety of genres (oral transcriptions, newspapers, popular magazines, technical reports, fiction, academic texts), in oral and written styles. We hypothesize that a linguistically representative corpus will allow the language models to be more efficient, and therefore yield better evaluation scores on different evaluation sets and tasks. This paper offers three main contributions: (1) two newly built corpora: (a) CaBeRnet, a French Balanced Reference Corpus and (b) CBT-fr a domain-specific corpus having both oral and written style in youth literature, (2) five versions of ELMo pre-trained on differently built corpora, and (3) a whole array of computational results on downstream tasks that deepen our understanding of the effects of corpus balance and register in NLP evaluation. 2020.cmlc-1.3 @@ -62,7 +62,7 @@ RosaFilgueira ClaireGrover MelissaTerras - BeatriceAlex + BeatriceAlex 24–30 This paper describes work in progress on devising automatic and parallel methods for geoparsing large digital historical textual data by combining the strengths of three natural language processing (NLP) tools, the Edinburgh Geoparser, spaCy and defoe, and employing different tokenisation and named entity recognition (NER) techniques. We apply these tools to a large collection of nineteenth century Scottish geographical dictionaries, and describe preliminary results obtained when processing this data. 2020.cmlc-1.4 diff --git a/data/xml/2020.codi.xml b/data/xml/2020.codi.xml index 30748b04ff..1dffefe32c 100644 --- a/data/xml/2020.codi.xml +++ b/data/xml/2020.codi.xml @@ -40,7 +40,7 @@ Using Type Information to Improve Entity Coreference Resolution SopanKhosla - CarolynRose + CarolynRose 20–31 Coreference resolution (CR) is an essential part of discourse analysis. Most recently, neural approaches have been proposed to improve over SOTA models from earlier paradigms. So far none of the published neural models leverage external semantic knowledge such as type information. This paper offers the first such model and evaluation, demonstrating modest gains in accuracy by introducing either gold standard or predicted types. In the proposed approach, type information serves both to (1) improve mention representation and (2) create a soft type consistency check between coreference candidate mentions. Our evaluation covers two different grain sizes of types over four different benchmark corpora. 2020.codi-1.3 @@ -73,7 +73,7 @@ Exploring Coreference Features in Heterogeneous Data EkaterinaLapshinova-Koltunski - KerstinKunz + KerstinKunz 53–64 The present paper focuses on variation phenomena in coreference chains. We address the hypothesis that the degree of structural variation between chain elements depends on language-specific constraints and preferences and, even more, on the communicative situation of language production. We define coreference features that also include reference to abstract entities and events. These features are inspired through several sources – cognitive parameters, pragmatic factors and typological status. We pay attention to the distributions of these features in a dataset containing English and German texts of spoken and written discourse mode, which can be classified into seven different registers. We apply text classification and feature selection to find out how these variational dimensions (language, mode and register) impact on coreference features. Knowledge on the variation under analysis is valuable for contrastive linguistics, translation studies and multilingual natural language processing (NLP), e.g. machine translation or cross-lingual coreference resolution. 2020.codi-1.6 @@ -133,7 +133,7 @@ YoumnaFarag JosefValvoda HelenYannakoudakis - TedBriscoe + TedBriscoe 102–112 In this work, we systematically investigate how well current models of coherence can capture aspects of text implicated in discourse organisation. We devise two datasets of various linguistic alterations that undermine coherence and test model sensitivity to changes in syntax and semantics. We furthermore probe discourse embedding space and examine the knowledge that is encoded in representations of coherence. We hope this study shall provide further insight into how to frame the task and improve models of coherence assessment further. Finally, we make our datasets publicly available as a resource for researchers to use to test discourse coherence models. 2020.codi-1.11 @@ -144,7 +144,7 @@ Computational Interpretations of Recency for the Choice of Referring Expressions in Discourse FahimeSame - Keesvan Deemter + Keesvan Deemter 113–123 First, we discuss the most common linguistic perspectives on the concept of recency and propose a taxonomy of recency metrics employed in Machine Learning studies for choosing the form of referring expressions in discourse context. We then report on a Multi-Layer Perceptron study and a Sequential Forward Search experiment, followed by Bayes Factor analysis of the outcomes. The results suggest that recency metrics counting paragraphs and sentences contribute to referential choice prediction more than other recency-related metrics. Based on the results of our analysis, we argue that, sensitivity to discourse structure is important for recency metrics used in determining referring expression forms. 2020.codi-1.12 @@ -168,7 +168,7 @@ Extending Implicit Discourse Relation Recognition to the <fixed-case>PDTB</fixed-case>-3 LiLiang ZhengZhao - BonnieWebber + BonnieWebber 135–147 The PDTB-3 contains many more Implicit discourse relations than the previous PDTB-2. This is in part because implicit relations have now been annotated within sentences as well as between them. In addition, some now co-occur with explicit discourse relations, instead of standing on their own. Here we show that while this can complicate the problem of identifying the location of implicit discourse relations, it can in turn simplify the problem of identifying their senses. We present data to support this claim, as well as methods that can serve as a non-trivial baseline for future state-of-the-art recognizers for implicit discourse relations. 2020.codi-1.14 diff --git a/data/xml/2020.cogalex.xml b/data/xml/2020.cogalex.xml index 4bf0859010..5830acc371 100644 --- a/data/xml/2020.cogalex.xml +++ b/data/xml/2020.cogalex.xml @@ -23,7 +23,7 @@ LaraMüller AndreRölke RalphRadach - ChrisBiemann + ChrisBiemann 1–11 The corpus, from which a predictive language model is trained, can be considered the experience of a semantic system. We recorded everyday reading of two participants for two months on a tablet, generating individual corpus samples of 300/500K tokens. Then we trained word2vec models from individual corpora and a 70 million-sentence newspaper corpus to obtain individual and norm-based long-term memory structure. To test whether individual corpora can make better predictions for a cognitive task of long-term memory retrieval, we generated stimulus materials consisting of 134 sentences with uncorrelated individual and norm-based word probabilities. For the subsequent eye tracking study 1-2 months later, our regression analyses revealed that individual, but not norm-corpus-based word probabilities can account for first-fixation duration and first-pass gaze duration. Word length additionally affected gaze duration and total viewing duration. The results suggest that corpora representative for an individual’s long-term memory structure can better explain reading performance than a norm corpus, and that recently acquired information is lexically accessed rapidly. 2020.cogalex-1.1 @@ -51,8 +51,8 @@ Less is Better: A cognitively inspired unsupervised model for language segmentation JinbiaoYang - Stefan L.Frank - Antalvan den Bosch + Stefan L.Frank + Antalvan den Bosch 33–45 Language users process utterances by segmenting them into many cognitive units, which vary in their sizes and linguistic levels. Although we can do such unitization/segmentation easily, its cognitive mechanism is still not clear. This paper proposes an unsupervised model, Less-is-Better (LiB), to simulate the human cognitive process with respect to language unitization/segmentation. LiB follows the principle of least effort and aims to build a lexicon which minimizes the number of unit tokens (alleviating the effort of analysis) and number of unit types (alleviating the effort of storage) at the same time on any given corpus. LiB’s workflow is inspired by empirical cognitive phenomena. The design makes the mechanism of LiB cognitively plausible and the computational requirement light-weight. The lexicon generated by LiB performs the best among different types of lexicons (e.g. ground-truth words) both from an information-theoretical view and a cognitive view, which suggests that the LiB lexicon may be a plausible proxy of the mental lexicon. 2020.cogalex-1.4 @@ -91,7 +91,7 @@ <fixed-case>C</fixed-case>og<fixed-case>AL</fixed-case>ex-<fixed-case>VI</fixed-case> Shared Task: Bidirectional Transformer based Identification of Semantic Relations SauravKarmakar - John P.McCrae + John P.McCrae 65–71 This paper presents a bidirectional transformer based approach for recognising semantic relationships between a pair of words as proposed by CogALex VI shared task in 2020. The system presented here works by employing BERT embeddings of the words and passing the same over tuned neural network to produce a learning model for the pair of words and their relationships. Afterwards the very same model is used for the relationship between unknown words from the test set. CogALex VI provided Subtask 1 as the identification of relationship of three specific categories amongst English pair of words and the presented system opts to work on that. The resulted relationships of the unknown words are analysed here which shows a balanced performance in overall characteristics with some scope for improvement. 2020.cogalex-1.8 @@ -109,8 +109,8 @@ Definition Extraction Feature Analysis: From Canonical to Naturally-Occurring Definitions MireiaRoig Mirapeix - LuisEspinosa Anke - JoseCamacho-Collados + LuisEspinosa Anke + JoseCamacho-Collados 81–91 Textual definitions constitute a fundamental source of knowledge when seeking the meaning of words, and they are the cornerstone of lexical resources like glossaries, dictionaries, encyclopedia or thesauri. In this paper, we present an in-depth analytical study on the main features relevant to the task of definition extraction. Our main goal is to study whether linguistic structures from canonical (the Aristotelian or genus et differentia model) can be leveraged to retrieve definitions from corpora in different domains of knowledge and textual genres alike. To this end, we develop a simple linear classifier and analyze the contribution of several (sets of) linguistic features. Finally, as a result of our experiments, we also shed light on the particularities of existing benchmarks as well as the most challenging aspects of the task. 2020.cogalex-1.10 @@ -143,7 +143,7 @@ Translating Collocations: The Need for Task-driven Word Associations - Oi YeeKwong + Oi YeeKwong 112–116 Existing dictionaries may help collocation translation by suggesting associated words in the form of collocations, thesaurus, and example sentences. We propose to enhance them with task-driven word associations, illustrating the need by a few scenarios and outlining a possible approach based on word embedding. An example is given, using pre-trained word embedding, while more extensive investigation with more refined methods and resources is underway. 2020.cogalex-1.14 @@ -172,7 +172,7 @@ Automatic Word Association Norms (<fixed-case>AWAN</fixed-case>) JorgeReyes-Magaña GerardoSierra Martínez - GemmaBel-Enguix + GemmaBel-Enguix HelenaGomez-Adorno 142–153 Word Association Norms (WAN) are collections that present stimuli words and the set of their associated responses. The corpus is widely used in diverse areas of expertise. In order to reduce the effort to have a good quality resource that can be reproduced in many languages with minimum sources, a methodology to build Automatic Word Association Norms is proposed (AWAN). The methodology has an input of two simple elements: a) dictionary, and b) pre-processed Word Embeddings. This new kind of WAN is evaluated in two ways: i) learning word embeddings based on the node2vec algorithm and comparing them with human annotated benchmarks, and ii) performing a lexical search for a reverse dictionary. Both evaluations are done in a weighted graph with the AWAN lexical elements. The results showed that the methodology produces good quality AWANs. diff --git a/data/xml/2020.coling.xml b/data/xml/2020.coling.xml index d5b3b1cda9..c50fa06035 100644 --- a/data/xml/2020.coling.xml +++ b/data/xml/2020.coling.xml @@ -3,9 +3,9 @@ Proceedings of the 28th International Conference on Computational Linguistics - DoniaScott - NuriaBel - ChengqingZong + DoniaScott + NuriaBel + ChengqingZong International Committee on Computational Linguistics
Barcelona, Spain (Online)
December @@ -19,7 +19,7 @@ Exploring Controllable Text Generation Techniques ShrimaiPrabhumoye - Alan WBlack + Alan WBlack RuslanSalakhutdinov 1–14 Neural controllable text generation is an important area gaining attention due to its plethora of applications. Although there is a large body of prior work in controllable text generation, there is no unifying theme. In this work, we provide a new schema of the pipeline of the generation process by classifying it into five modules. The control of attributes in the generation process requires modification of these modules. We present an overview of different techniques used to perform the modulation of these modules. We also provide an analysis on the advantages and disadvantages of these techniques. We further pave ways to develop new architectures based on the combination of the modules described in this paper. @@ -95,7 +95,7 @@ TaoZhang CongyingXia Chun-TaLu - PhilipYu + PhilipYu 77–87 Named entity typing (NET) is a classification task of assigning an entity mention in the context with given semantic types. However, with the growing size and granularity of the entity types, few previous researches concern with newly emerged entity types. In this paper, we propose MZET, a novel memory augmented FNET (Fine-grained NET) model, to tackle the unseen types in a zero-shot manner. MZET incorporates character-level, word-level, and contextural-level information to learn the entity mention representation. Besides, MZET considers the semantic meaning and the hierarchical structure into the entity type representation. Finally, through the memory component which models the relationship between the entity mention and the entity type, MZET transfers the knowledge from seen entity types to the zero-shot ones. Extensive experiments on three public datasets show the superior performance obtained by MZET, which surpasses the state-of-the-art FNET neural network models with up to 8% gain in Micro-F1 and Macro-F1 score. 2020.coling-main.7 @@ -267,7 +267,7 @@ Understanding Pre-trained <fixed-case>BERT</fixed-case> for Aspect-based Sentiment Analysis HuXu LeiShu - PhilipYu + PhilipYu BingLiu 244–250 This paper analyzes the pre-trained hidden representations learned from reviews on BERT for tasks in aspect-based sentiment analysis (ABSA). Our work is motivated by the recent progress in BERT-based language models for ABSA. However, it is not clear how the general proxy task of (masked) language model trained on unlabeled corpus without annotations of aspects or opinions can provide important features for downstream tasks in ABSA. By leveraging the annotated datasets in ABSA, we investigate both the attentions and the learned representations of BERT pre-trained on reviews. We found that BERT uses very few self-attention heads to encode context words (such as prepositions or pronouns that indicating an aspect) and opinion words for an aspect. Most features in the representation of an aspect are dedicated to the fine-grained semantics of the domain (or product category) and the aspect itself, instead of carrying summarized opinions from its context. We hope this investigation can help future research in improving self-supervised learning, unsupervised learning and fine-tuning for ABSA. The pre-trained model and code can be found at https://github.com/howardhsu/BERT-for-RRC-ABSA. @@ -288,7 +288,7 @@ Improving Sentiment Analysis over non-<fixed-case>E</fixed-case>nglish Tweets using Multilingual Transformers and Automatic Translation for Data-Augmentation ValentinBarriere - AlexandraBalahur + AlexandraBalahur 266–271 Tweets are specific text data when compared to general text. Although sentiment analysis over tweets has become very popular in the last decade for English, it is still difficult to find huge annotated corpora for non-English languages. The recent rise of the transformer models in Natural Language Processing allows to achieve unparalleled performances in many tasks, but these models need a consequent quantity of text to adapt to the tweet domain. We propose the use of a multilingual transformer model, that we pre-train over English tweets on which we apply data-augmentation using automatic translation to adapt the model to non-English languages. Our experiments in French, Spanish, German and Italian suggest that the proposed technique is an efficient way to improve the results of the transformers over small corpora of tweets in a non-English language. 2020.coling-main.23 @@ -350,7 +350,7 @@ ShogoFujita HidetakaKamigaito HiroyaTakamura - ManabuOkumura + ManabuOkumura 316–327 We tackle the task of automatically generating a function name from source code. Existing generators face difficulties in generating low-frequency or out-of-vocabulary subwords. In this paper, we propose two strategies for copying low-frequency or out-of-vocabulary subwords in inputs. Our best performing model showed an improvement over the conventional method in terms of our modified F1 and accuracy on the Java-small and Java-large datasets. 2020.coling-main.28 @@ -372,7 +372,7 @@ <fixed-case>CEREC</fixed-case>: A Corpus for Entity Resolution in Email Conversations Parag PravinDakle - DanMoldovan + DanMoldovan 339–349 We present the first large scale corpus for entity resolution in email conversations (CEREC). The corpus consists of 6001 email threads from the Enron Email Corpus containing 36,448 email messages and 38,996 entity coreference chains. The annotation is carried out as a two-step process with minimal manual effort. Experiments are carried out for evaluating different features and performance of four baselines on the created corpus. For the task of mention identification and coreference resolution, a best performance of 54.1 F1 is reported, highlighting the room for improvement. An in-depth qualitative and quantitative error analysis is presented to understand the limitations of the baselines considered. 2020.coling-main.30 @@ -383,7 +383,7 @@ <fixed-case>SQL</fixed-case> Generation via Machine Reading Comprehension ZeyuYan JianqiangMa - YangZhang + YangZhang JianpingShen 350–356 Text-to-SQL systems offers natural language interfaces to databases, which can automatically generates SQL queries given natural language questions. On the WikiSQL benchmark, state-of- the-art text-to-SQL systems typically take a slot-filling approach by building several specialized models for each type of slot. Despite being effective, such modularized systems are complex and also fall short in jointly learning for different slots. To solve these problems, this paper proposes a novel approach that formulates the task as a question answering problem, where different slots are predicted by a unified machine reading comprehension (MRC) model. For this purpose, we use a BERT-based MRC model, which can also benefit from intermediate training on other MRC datasets. The proposed method can achieve competitive results on WikiSQL, suggesting it being a promising direction for text-to-SQL. @@ -397,7 +397,7 @@ YousufAli Mohammed SandraDerbring ArildMatsson - BeataMegyesi + BeataMegyesi 357–369 This article reports on an ongoing project aiming at automatization of pseudonymization of learner essays. The process includes three steps: identification of personal information in an unstructured text, labeling for a category, and pseudonymization. We experiment with rule-based methods for detection of 15 categories out of the suggested 19 (Megyesi et al., 2018) that we deem important and/or doable with automatic approaches. For the detection and labeling steps,we use resources covering personal names, geographic names, company and university names and others. For the pseudonymization step, we replace the item using another item of the same type from the above-mentioned resources. Evaluation of the detection and labeling steps are made on a set of manually anonymized essays. The results are promising and show that 89% of the personal information can be successfully identified in learner data, and annotated correctly with an inter-annotator agreement of 86% measured as Fleiss kappa and Krippendorff’s alpha. 2020.coling-main.32 @@ -442,7 +442,7 @@ Leveraging <fixed-case>HTML</fixed-case> in Free Text Web Named Entity Recognition ColinAshby - DavidWeir + DavidWeir 407–413 HTML tags are typically discarded in free text Named Entity Recognition from Web pages. We investigate whether these discarded tags might be used to improve NER performance. We compare Text+Tags sentences with their Text-Only equivalents, over five datasets, two free text segmentation granularities and two NER models. We find an increased F1 performance for Text+Tags of between 0.9% and 13.2% over all datasets, variants and models. This performance increase, over datasets of varying entity types, HTML density and construction quality, indicates our method is flexible and adaptable. These findings imply that a similar technique might be of use in other Web-aware NLP tasks, including the enrichment of deep language models. 2020.coling-main.36 @@ -504,7 +504,7 @@ Hsien-chinLin MarcoMoresi Carelvan Niekerk - MilicaGasic + MilicaGasic 465–479 Reinforcement learning (RL) can enable task-oriented dialogue systems to steer the conversation towards successful task completion. In an end-to-end setting, a response can be constructed in a word-level sequential decision making process with the entire system vocabulary as action space. Policies trained in such a fashion do not require expert-defined action spaces, but they have to deal with large action spaces and long trajectories, making RL impractical. Using the latent space of a variational model as action space alleviates this problem. However, current approaches use an uninformed prior for training and optimize the latent distribution solely on the context. It is therefore unclear whether the latent representation truly encodes the characteristics of different actions. In this paper, we explore three ways of leveraging an auxiliary task to shape the latent variable distribution: via pre-training, to obtain an informed prior, and via multitask learning. We choose response auto-encoding as the auxiliary task, as this captures the generative factors of dialogue responses while requiring low computational cost and neither additional data nor labels. Our approach yields a more action-characterized latent representations which support end-to-end dialogue policy optimization and achieves state-of-the-art success rates. These results warrant a more wide-spread use of RL in end-to-end dialogue models. 2020.coling-main.41 @@ -514,7 +514,7 @@ Recent Neural Methods on Slot Filling and Intent Classification for Task-Oriented Dialogue Systems: A Survey SamuelLouvan - BernardoMagnini + BernardoMagnini 480–496 In recent years, fostered by deep learning technologies and by the high demand for conversational AI, various approaches have been proposed that address the capacity to elicit and understand user’s needs in task-oriented dialogue systems. We focus on two core tasks, slot filling (SF) and intent classification (IC), and survey how neural based models have rapidly evolved to address natural language understanding in dialogue systems. We introduce three neural architectures: independent models, which model SF and IC separately, joint models, which exploit the mutual benefit of the two tasks simultaneously, and transfer learning models, that scale the model to new domains. We discuss the current state of the research in SF and IC, and highlight challenges that still require attention. 2020.coling-main.42 @@ -599,7 +599,7 @@ <fixed-case>S</fixed-case>enti<fixed-case>X</fixed-case>: A Sentiment-Aware Pre-Trained Model for Cross-Domain Sentiment Analysis JieZhou - JunfengTian + JunfengTian RuiWang YuanbinWu WenmingXiao @@ -653,7 +653,7 @@ Modeling Local Contexts for Joint Dialogue Act Recognition and Sentiment Classification with Bi-channel Dynamic Convolutions JingyeLi HaoFei - DonghongJi + DonghongJi 616–626 In this paper, we target improving the joint dialogue act recognition (DAR) and sentiment classification (SC) tasks by fully modeling the local contexts of utterances. First, we employ the dynamic convolution network (DCN) as the utterance encoder to capture the dialogue contexts. Further, we propose a novel context-aware dynamic convolution network (CDCN) to better leverage the local contexts when dynamically generating kernels. We extended our frameworks into bi-channel version (i.e., BDCN and BCDCN) under multi-task learning to achieve the joint DAR and SC. Two channels can learn their own feature representations for DAR and SC, respectively, but with latent interaction. Besides, we suggest enhancing the tasks by employing the DiaBERT language model. Our frameworks obtain state-of-the-art performances against all baselines on two benchmark datasets, demonstrating the importance of modeling the local contexts. 2020.coling-main.53 @@ -684,10 +684,10 @@ A Joint Learning Approach based on Self-Distillation for Keyphrase Extraction from Scientific Documents - TuanLai - TrungBui + TuanLai + TrungBui Doo SoonKim - Quan HungTran + Quan HungTran 649–656 Keyphrase extraction is the task of extracting a small set of phrases that best describe a document. Most existing benchmark datasets for the task typically have limited numbers of annotated documents, making it challenging to train increasingly complex neural networks. In contrast, digital libraries store millions of scientific articles online, covering a wide range of topics. While a significant portion of these articles contain keyphrases provided by their authors, most other articles lack such kind of annotations. Therefore, to effectively utilize these large amounts of unlabeled articles, we propose a simple and efficient joint learning approach based on the idea of self-distillation. Experimental results show that our approach consistently improves the performance of baseline models for keyphrase extraction. Furthermore, our best models outperform previous methods for the task, achieving new state-of-the-art results on two public benchmarks: Inspec and SemEval-2017. 2020.coling-main.56 @@ -811,7 +811,7 @@ FajriKoto AfshinRahimi Jey HanLau - TimothyBaldwin + TimothyBaldwin 757–770 Although the Indonesian language is spoken by almost 200 million people and the 10th most spoken language in the world, it is under-represented in NLP research. Previous work on Indonesian has been hampered by a lack of annotated datasets, a sparsity of language resources, and a lack of resource standardization. In this work, we release the IndoLEM dataset comprising seven tasks for the Indonesian language, spanning morpho-syntax, semantics, and discourse. We additionally release IndoBERT, a new pre-trained language model for Indonesian, and evaluate it over IndoLEM, in addition to benchmarking it against existing resources. Our experiments show that IndoBERT achieves state-of-the-art performance over most of the tasks in IndoLEM. 2020.coling-main.66 @@ -858,7 +858,7 @@ Attention Transfer Network for Aspect-level Sentiment Classification FeiZhao ZhenWu - XinyuDai + XinyuDai 811–821 Aspect-level sentiment classification (ASC) aims to detect the sentiment polarity of a given opinion target in a sentence. In neural network-based methods for ASC, most works employ the attention mechanism to capture the corresponding sentiment words of the opinion target, then aggregate them as evidence to infer the sentiment of the target. However, aspect-level datasets are all relatively small-scale due to the complexity of annotation. Data scarcity causes the attention mechanism sometimes to fail to focus on the corresponding sentiment words of the target, which finally weakens the performance of neural models. To address the issue, we propose a novel Attention Transfer Network (ATN) in this paper, which can successfully exploit attention knowledge from resource-rich document-level sentiment classification datasets to improve the attention capability of the aspect-level sentiment classification task. In the ATN model, we design two different methods to transfer attention knowledge and conduct experiments on two ASC benchmark datasets. Extensive experimental results show that our methods consistently outperform state-of-the-art works. Further analysis also validates the effectiveness of ATN. 2020.coling-main.70 @@ -1024,7 +1024,7 @@ A High Precision Pipeline for Financial Knowledge Graph Construction SarahElhammadi LaksV.S. Lakshmanan - RaymondNg + RaymondNg MichaelSimpson BaoxingHuai ZhefengWang @@ -1051,9 +1051,9 @@ Answering Legal Questions by Learning Neural Attentive Text Representation Phi ManhKien Ha-ThanhNguyen - Ngo XuanBach + Ngo XuanBach VuTran - Minh LeNguyen + Minh LeNguyen Tu MinhPhuong 988–998 Text representation plays a vital role in retrieval-based question answering, especially in the legal domain where documents are usually long and complicated. The better the question and the legal documents are represented, the more accurate they are matched. In this paper, we focus on the task of answering legal questions at the article level. Given a legal question, the goal is to retrieve all the correct and valid legal articles, that can be used as the basic to answer the question. We present a retrieval-based model for the task by learning neural attentive text representation. Our text representation method first leverages convolutional neural networks to extract important information in a question and legal articles. Attention mechanisms are then used to represent the question and articles and select appropriate information to align them in a matching process. Experimental results on an annotated corpus consisting of 5,922 Vietnamese legal questions show that our model outperforms state-of-the-art retrieval-based methods for question answering by large margins in terms of both recall and NDCG. @@ -1065,7 +1065,7 @@ Joint Transformer/<fixed-case>RNN</fixed-case> Architecture for Gesture Typing in Indic Languages EmilBiju AnirudhSriram - Mitesh M.Khapra + Mitesh M.Khapra PratyushKumar 999–1010 Gesture typing is a method of typing words on a touch-based keyboard by creating a continuous trace passing through the relevant keys. This work is aimed at developing a keyboard that supports gesture typing in Indic languages. We begin by noting that when dealing with Indic languages, one needs to cater to two different sets of users: (i) users who prefer to type in the native Indic script (Devanagari, Bengali, etc.) and (ii) users who prefer to type in the English script but want the transliterated output in the native script. In both cases, we need a model that takes a trace as input and maps it to the intended word. To enable the development of these models, we create and release two datasets. First, we create a dataset containing keyboard traces for 193,658 words from 7 Indic languages. Second, we curate 104,412 English-Indic transliteration pairs from Wikidata across these languages. Using these datasets we build a model that performs path decoding, transliteration and transliteration correction. Unlike prior approaches, our proposed model does not make co-character independence assumptions during decoding. The overall accuracy of our model across the 7 languages varies from 70-95%. @@ -1077,7 +1077,7 @@ Automatic Charge Identification from Facts: A Few Sentence-Level Charge Annotations is All You Need ShounakPaul PawanGoyal - SaptarshiGhosh + SaptarshiGhosh 1011–1022 Automatic Charge Identification (ACI) is the task of identifying the relevant charges given the facts of a situation and the statutory laws that define these charges, and is a crucial aspect of the judicial process. Existing works focus on learning charge-side representations by modeling relationships between the charges, but not much effort has been made in improving fact-side representations. We observe that only a small fraction of sentences in the facts actually indicates the charges. We show that by using a very small subset (< 3%) of fact descriptions annotated with sentence-level charges, we can achieve an improvement across a range of different ACI models, as compared to modeling just the main document-level task on a much larger dataset. Additionally, we propose a novel model that utilizes sentence-level charge labels as an auxiliary task, coupled with the main task of document-level charge identification in a multi-task learning framework. The proposed model comprehensively outperforms a large number of recent baselines for ACI. The improvement in performance is particularly noticeable for the rare charges which are known to be especially challenging to identify. 2020.coling-main.88 @@ -1108,7 +1108,7 @@ Seid MuhieYimam Hizkiel MitikuAlemayehu AbinewAyele - ChrisBiemann + ChrisBiemann 1048–1060 This paper presents the study of sentiment analysis for Amharic social media texts. As the number of social media users is ever-increasing, social media platforms would like to understand the latent meaning and sentiments of a text to enhance decision-making procedures. However, low-resource languages such as Amharic have received less attention due to several reasons such as lack of well-annotated datasets, unavailability of computing resources, and fewer or no expert researchers in the area. This research addresses three main research questions. We first explore the suitability of existing tools for the sentiment analysis task. Annotation tools are scarce to support large-scale annotation tasks in Amharic. Also, the existing crowdsourcing platforms do not support Amharic text annotation. Hence, we build a social-network-friendly annotation tool called ‘ASAB’ using the Telegram bot. We collect 9.4k tweets, where each tweet is annotated by three Telegram users. Moreover, we explore the suitability of machine learning approaches for Amharic sentiment analysis. The FLAIR deep learning text classifier, based on network embeddings that are computed from a distributional thesaurus, outperforms other supervised classifiers. We further investigate the challenges in building a sentiment analysis system for Amharic and we found that the widespread usage of sarcasm and figurative speech are the main issues in dealing with the problem. To advance the sentiment analysis research in Amharic and other related low-resource languages, we release the dataset, the annotation tool, source code, and models publicly under a permissive. 2020.coling-main.91 @@ -1141,7 +1141,7 @@ MinghuiAn JingjingWang ShoushanLi - GuodongZhou + GuodongZhou 1078–1089 From the perspective of health psychology, human beings with long-term and sustained negativity are highly possible to be diagnosed with depression. Inspired by this, we argue that the global topic information derived from user-generated contents (e.g., texts and images) is crucial to boost the performance of the depression detection task, though this information has been neglected by almost all previous studies on depression detection. To this end, we propose a new Multimodal Topic-enriched Auxiliary Learning (MTAL) approach, aiming at capturing the topic information inside different modalities (i.e., texts and images) for depression detection. Especially, in our approach, a modality-agnostic topic model is proposed to be capable of mining the topical clues from either the discrete textual signals or the continuous visual signals. On this basis, the topic modeling w.r.t. the two modalities are cast as two auxiliary tasks for improving the performance of the primary task (i.e., depression detection). Finally, the detailed evaluation demonstrates the great advantage of our MTAL approach to depression detection over the state-of-the-art baselines. This justifies the importance of the multimodal topic information to depression detection and the effectiveness of our approach in capturing such information. 2020.coling-main.94 @@ -1167,7 +1167,7 @@ Situated and Interactive Multimodal Conversations SeungwhanMoon SatwikKottur - PaulCrook + PaulCrook AnkitaDe ShivaniPoddar TheodoreLevin @@ -1214,9 +1214,9 @@ <fixed-case>R</fixed-case>-<fixed-case>VGAE</fixed-case>: Relational-variational Graph Autoencoder for Unsupervised Prerequisite Chain Learning IreneLi - AlexanderFabbri + AlexanderFabbri SwapnilHingmire - DragomirRadev + DragomirRadev 1147–1157 The task of concept prerequisite chain learning is to automatically determine the existence of prerequisite relationships among concept pairs. In this paper, we frame learning prerequisite relationships among concepts as an unsupervised task with no access to labeled concept pairs during training. We propose a model called the Relational-Variational Graph AutoEncoder (R-VGAE) to predict concept relations within a graph consisting of concept and resource nodes. Results show that our unsupervised approach outperforms graph-based semi-supervised methods and other baseline methods by up to 9.77% and 10.47% in terms of prerequisite relation prediction accuracy and F1 score. Our method is notably the first graph-based model that attempts to make use of deep learning representations for the task of unsupervised prerequisite learning. We also expand an existing corpus which totals 1,717 English Natural Language Processing (NLP)-related lecture slide files and manual concept pair annotations over 322 topics. 2020.coling-main.99 @@ -1300,7 +1300,7 @@ JingyiHe KcTsiolis KianKenyon-Dean - Jackie Chi KitCheung + Jackie Chi KitCheung 1229–1241 Word embeddings are trained to predict word cooccurrence statistics, which leads them to possess different lexical properties (syntactic, semantic, etc.) depending on the notion of context defined at training time. These properties manifest when querying the embedding space for the most similar vectors, and when used at the input layer of deep neural networks trained to solve downstream NLP problems. Meta-embeddings combine multiple sets of differently trained word embeddings, and have been shown to successfully improve intrinsic and extrinsic performance over equivalent models which use just one set of source embeddings. We introduce word prisms: a simple and efficient meta-embedding method that learns to combine source embeddings according to the task at hand. Word prisms learn orthogonal transformations to linearly combine the input source embeddings, which allows them to be very efficient at inference time. We evaluate word prisms in comparison to other meta-embedding methods on six extrinsic evaluations and observe that word prisms offer improvements in performance on all tasks. 2020.coling-main.106 @@ -1309,7 +1309,7 @@ Always Keep your Target in Mind: Studying Semantics and Improving Performance of Neural Lexical Substitution - NikolayArefyev + NikolayArefyev BorisSheludko AlexanderPodolskiy AlexanderPanchenko @@ -1334,7 +1334,7 @@ How Relevant Are Selectional Preferences for Transformer-based Language Models? EleniMetheniti - TimVan de Cruys + TimVan de Cruys NabilHathout 1266–1278 Selectional preference is defined as the tendency of a predicate to favor particular arguments within a certain linguistic context, and likewise, reject others that result in conflicting or implausible meanings. The stellar success of contextual word embedding models such as BERT in NLP tasks has led many to question whether these models have learned linguistic information, but up till now, most research has focused on syntactic information. We investigate whether Bert contains information on the selectional preferences of words, by examining the probability it assigns to the dependent word given the presence of a head word in a sentence. We are using word pairs of head-dependent words in five different syntactic relations from the SP-10K corpus of selectional preference (Zhang et al., 2019b), in sentences from the ukWaC corpus, and we are calculating the correlation of the plausibility score (from SP-10K) and the model probabilities. Our results show that overall, there is no strong positive or negative correlation in any syntactic relation, but we do find that certain head words have a strong correlation and that masking all words but the head word yields the most positive correlations in most scenarios –which indicates that the semantics of the predicate is indeed an integral and influential factor for the selection of the argument. @@ -1345,7 +1345,7 @@ Embedding Semantic Taxonomies AlyssaLees - ChrisWelty + ChrisWelty ShubinZhao JacekKorycki SaraMc Carthy @@ -1359,7 +1359,7 @@ A Retrofitting Model for Incorporating Semantic Relations into Word Embeddings SapanShah SreedharReddy - PushpakBhattacharyya + PushpakBhattacharyya 1292–1298 We present a novel retrofitting model that can leverage relational knowledge available in a knowledge resource to improve word embeddings. The knowledge is captured in terms of relation inequality constraints that compare similarity of related and unrelated entities in the context of an anchor entity. These constraints are used as training data to learn a non-linear transformation function that maps original word vectors to a vector space respecting these constraints. The transformation function is learned in a similarity metric learning setting using Triplet network architecture. We applied our model to synonymy, antonymy and hypernymy relations in WordNet and observed large gains in performance over original distributional models as well as other retrofitting approaches on word similarity task and significant overall improvement on lexical entailment detection task. 2020.coling-main.111 @@ -1419,7 +1419,7 @@ ManuelaSanguinetti CristinaBosco PaoloRosso - FarahBenamara + FarahBenamara 1346–1358 This paper presents an in-depth investigation of the effectiveness of dependency-based syntactic features on the irony detection task in a multilingual perspective (English, Spanish, French and Italian). It focuses on the contribution from syntactic knowledge, exploiting linguistic resources where syntax is annotated according to the Universal Dependencies scheme. Three distinct experimental settings are provided. In the first, a variety of syntactic dependency-based features combined with classical machine learning classifiers are explored. In the second scenario, two well-known types of word embeddings are trained on parsed data and tested against gold standard datasets. In the third setting, dependency-based syntactic features are combined into the Multilingual BERT architecture. The results suggest that fine-grained dependency-based syntactic information is informative for the detection of irony. 2020.coling-main.116 @@ -1454,9 +1454,9 @@ DipteshKanojia RajDabre ShubhamDewangan - PushpakBhattacharyya - GholamrezaHaffari - MalharKulkarni + PushpakBhattacharyya + GholamrezaHaffari + MalharKulkarni 1384–1395 Cognates are variants of the same lexical form across different languages; for example “fonema” in Spanish and “phoneme” in English are cognates, both of which mean “a unit of sound”. The task of automatic detection of cognates among any two languages can help downstream NLP tasks such as Cross-lingual Information Retrieval, Computational Phylogenetics, and Machine Translation. In this paper, we demonstrate the use of cross-lingual word embeddings for detecting cognates among fourteen Indian Languages. Our approach introduces the use of context from a knowledge graph to generate improved feature representations for cognate detection. We, then, evaluate the impact of our cognate detection mechanism on neural machine translation (NMT), as a downstream task. We evaluate our methods to detect cognates on a challenging dataset of twelve Indian languages, namely, Sanskrit, Hindi, Assamese, Oriya, Kannada, Gujarati, Tamil, Telugu, Punjabi, Bengali, Marathi, and Malayalam. Additionally, we create evaluation datasets for two more Indian languages, Konkani and Nepali. We observe an improvement of up to 18% points, in terms of F-score, for cognate detection. Furthermore, we observe that cognates extracted using our method help improve NMT quality by up to 2.76 BLEU. We also release our code, newly constructed datasets and cross-lingual models publicly. 2020.coling-main.119 @@ -1555,7 +1555,7 @@ Contextual Argument Component Classification for Class Discussions LucaLugini - DianeLitman + DianeLitman 1475–1480 Argument mining systems often consider contextual information, i.e. information outside of an argumentative discourse unit, when trained to accomplish tasks such as argument component identification, classification, and relation extraction. However, prior work has not carefully analyzed the utility of different contextual properties in context-aware models. In this work, we show how two different types of contextual information, local discourse context and speaker context, can be incorporated into a computational model for classifying argument components in multi-party classroom discussions. We find that both context types can improve performance, although the improvements are dependent on context size and position. 2020.coling-main.128 @@ -1590,7 +1590,7 @@ Event-Guided Denoising for Multilingual Relation Learning AmithAnanthram EmilyAllaway - KathleenMcKeown + KathleenMcKeown 1505–1512 General purpose relation extraction has recently seen considerable gains in part due to a massively data-intensive distant supervision technique from Soares et al. (2019) that produces state-of-the-art results across many benchmarks. In this work, we present a methodology for collecting high quality training data for relation extraction from unlabeled text that achieves a near-recreation of their zero-shot and few-shot results at a fraction of the training cost. Our approach exploits the predictable distributional structure of date-marked news articles to build a denoised corpus – the extraction process filters out low quality examples. We show that a smaller multilingual encoder trained on this corpus performs comparably to the current state-of-the-art (when both receive little to no fine-tuning) on few-shot and standard relation benchmarks in English and Spanish despite using many fewer examples (50k vs. 300mil+). 2020.coling-main.131 @@ -1650,7 +1650,7 @@ Graph Enhanced Dual Attention Network for Document-Level Relation Extraction - BoLi + BoLi WeiYe ZhonghaoSheng RuiXie @@ -1724,7 +1724,7 @@ RajdeepSarkar Bharathi RajaChakravarthi TheodorusFransen - John P.McCrae + John P.McCrae 1606–1617 Automatic Language Identification (LI) or Dialect Identification (DI) of short texts of closely related languages or dialects, is one of the primary steps in many natural language processing pipelines. Language identification is considered a solved task in many cases; however, in the case of very closely related languages, or in an unsupervised scenario (where the languages are not known in advance), performance is still poor. In this paper, we propose the Unsupervised Deep Language and Dialect Identification (UDLDI) method, which can simultaneously learn sentence embeddings and cluster assignments from short texts. The UDLDI model understands the sentence constructions of languages by applying attention to character relations which helps to optimize the clustering of languages. We have performed our experiments on three short-text datasets for different language families, each consisting of closely related languages or dialects, with very minimal training sets. Our experimental evaluations on these datasets have shown significant improvement over state-of-the-art unsupervised methods and our model has outperformed state-of-the-art LI and DI systems in supervised settings. 2020.coling-main.141 @@ -1763,7 +1763,7 @@ Biased <fixed-case>T</fixed-case>ext<fixed-case>R</fixed-case>ank: Unsupervised Graph-Based Content Extraction AshkanKazemi VerónicaPérez-Rosas - RadaMihalcea + RadaMihalcea 1642–1652 We introduce Biased TextRank, a graph-based content extraction method inspired by the popular TextRank algorithm that ranks text spans according to their importance for language processing tasks and according to their relevance to an input “focus.” Biased TextRank enables focused content extraction for text by modifying the random restarts in the execution of TextRank. The random restart probabilities are assigned based on the relevance of the graph nodes to the focus of the task. We present two applications of Biased TextRank: focused summarization and explanation extraction, and show that our algorithm leads to improved performance on two different datasets by significant ROUGE-N score margins. Much like its predecessor, Biased TextRank is unsupervised, easy to implement and orders of magnitude faster and lighter than current state-of-the-art Natural Language Processing methods for similar tasks. 2020.coling-main.144 @@ -1799,7 +1799,7 @@ Unsupervised Fact Checking by Counter-Weighted Positive and Negative Evidential Paths in A Knowledge Graph JiseongKim - Key-sunChoi + Key-sunChoi 1677–1686 Misinformation spreads across media, community, and knowledge graphs in the Web by not only human agents but also information extraction algorithms that extract factual statements from unstructured textual data to populate the existing knowledge graphs. Traditional fact checking by experts or crowds is increasingly difficult to keep pace with the volume of newly created misinformation in the Web. Therefore, it is important and necessary to enhance the computational ability to determine whether a given factual statement is truthful or not. We view this problem as a truth scoring task in a knowledge graph. We present a novel rule-based approach that finds positive and negative evidential paths in a knowledge graph for a given factual statement and calculates a truth score for the given statement by unsupervised ensemble of the found positive and negative evidential paths. For example, we can determine the factual statement “United States is the birth place of Barack Obama” as truthful if there is the positive evidential path (Barack Obama, birthPlace, Hawaii) ∧ (Hawaii, country, United States) in a knowledge graph. For another example, we can determine the factual statement “Canada is the nationality of Barack Obama” as untruthful if there is the negative evidential path (Barack Obama, nationality, United States) ∧ (United States, ≠, Canada) in a knowledge graph. For evaluating on a real-world situation, we constructed an evaluation dataset by labeling truth or untruth label on factual statements that were extracted from Wikipedia texts by using the state-of-the-art BERT-based information extraction system. Our evaluation results show that our approach outperforms the state-of-the-art unsupervised approaches significantly by up to 0.12 AUC-ROC and even outperforms the supervised approach by up to 0.05 AUC-ROC not only in our dataset but also in the two different standard datasets. 2020.coling-main.147 @@ -1864,7 +1864,7 @@ BosungKim TaesukHong YoungjoongKo - JungyunSeo + JungyunSeo 1737–1743 As research on utilizing human knowledge in natural language processing has attracted considerable attention in recent years, knowledge graph (KG) completion has come into the spotlight. Recently, a new knowledge graph completion method using a pre-trained language model, such as KG-BERT, is presented and showed high performance. However, its scores in ranking metrics such as Hits@k are still behind state-of-the-art models. We claim that there are two main reasons: 1) failure in sufficiently learning relational information in knowledge graphs, and 2) difficulty in picking out the correct answer from lexically similar candidates. In this paper, we propose an effective multi-task learning method to overcome the limitations of previous works. By combining relation prediction and relevance ranking tasks with our target link prediction, the proposed model can learn more relational properties in KGs and properly perform even when lexical similarity occurs. Experimental results show that we not only largely improve the ranking performances compared to KG-BERT but also achieve the state-of-the-art performances in Mean Rank and Hits@10 on the WN18RR dataset. 2020.coling-main.153 @@ -1936,7 +1936,7 @@ A Deep Generative Approach to Native Language Identification EhsanLotfi IliaMarkov - WalterDaelemans + WalterDaelemans 1778–1783 Native language identification (NLI) – identifying the native language (L1) of a person based on his/her writing in the second language (L2) – is useful for a variety of purposes, including marketing, security, and educational applications. From a traditional machine learning perspective,NLI is usually framed as a multi-class classification task, where numerous designed features are combined in order to achieve the state-of-the-art results. We introduce a deep generative language modelling (LM) approach to NLI, which consists in fine-tuning a GPT-2 model separately on texts written by the authors with the same L1, and assigning a label to an unseen text based on the minimum LM loss with respect to one of these fine-tuned GPT-2 models. Our method outperforms traditional machine learning approaches and currently achieves the best results on the benchmark NLI datasets. 2020.coling-main.159 @@ -1982,7 +1982,7 @@ Detecting de minimis Code-Switching in Historical <fixed-case>G</fixed-case>erman Books ShijiaLiu - DavidSmith + DavidSmith 1808–1814 Code-switching has long interested linguists, with computational work in particular focusing on speech and social media data (Sitaram et al., 2019). This paper contrasts these informal instances of code-switching to its appearance in more formal registers, by examining the mixture of languages in the Deutsches Textarchiv (DTA), a corpus of 1406 primarily German books from the 17th to 19th centuries. We automatically annotate and manually inspect spans of six embedded languages (Latin, French, English, Italian, Spanish, and Greek) in the corpus. We quantitatively analyze the differences between code-switching patterns in these books and those in more typically studied speech and social media corpora. Furthermore, we address the practical task of predicting code-switching from features of the matrix language alone in the DTA corpus. Such classifiers can help reduce errors when optical character recognition or speech transcription is applied to a large corpus with rare embedded languages. 2020.coling-main.163 @@ -1993,7 +1993,7 @@ Lin: Unsupervised Extraction of Tasks from Textual Communication ParthDiwanji HuiGuo - MunindarSingh + MunindarSingh AnupKalia 1815–1819 Commitments and requests are a hallmark of collaborative communication, especially in team settings. Identifying specific tasks being committed to or request from emails and chat messages can enable important downstream tasks, such as producing todo lists, reminders, and calendar entries. State-of-the-art approaches for task identification rely on large annotated datasets, which are not always available, especially for domain-specific tasks. Accordingly, we propose Lin, an unsupervised approach of identifying tasks that leverages dependency parsing and VerbNet. Our evaluations show that Lin yields comparable or more accurate results than supervised models on domains with large training sets, and maintains its excellent performance on unseen domains. @@ -2089,9 +2089,9 @@ Humans Meet Models on Object Naming: A New Dataset and Analysis CarinaSilberer - SinaZarrieß + SinaZarrieß MatthijsWestera - GemmaBoleda + GemmaBoleda 1893–1905 We release ManyNames v2 (MN v2), a verified version of an object naming dataset that contains dozens of valid names per object for 25K images. We analyze issues in the data collection method originally employed, standard in Language & Vision (L&V), and find that the main source of noise in the data comes from simulating a naming context solely from an image with a target object marked with a bounding box, which causes subjects to sometimes disagree regarding which object is the target. We also find that both the degree of this uncertainty in the original data and the amount of true naming variation in MN v2 differs substantially across object domains. We use MN v2 to analyze a popular L&V model and demonstrate its effectiveness on the task of object naming. However, our fine-grained analysis reveals that what appears to be human-like model behavior is not stable across domains, e.g., the model confuses people and clothing objects much more frequently than humans do. We also find that standard evaluations underestimate the actual effectiveness of the naming model: on the single-label names of the original dataset (Visual Genome), it obtains −27% accuracy points than on MN v2, that includes all valid object names. 2020.coling-main.172 @@ -2113,7 +2113,7 @@ Language-Driven Region Pointer Advancement for Controllable Image Captioning AnnikaLindh RobertRoss - JohnKelleher + JohnKelleher 1922–1935 Controllable Image Captioning is a recent sub-field in the multi-modal task of Image Captioning wherein constraints are placed on which regions in an image should be described in the generated natural language caption. This puts a stronger focus on producing more detailed descriptions, and opens the door for more end-user control over results. A vital component of the Controllable Image Captioning architecture is the mechanism that decides the timing of attending to each region through the advancement of a region pointer. In this paper, we propose a novel method for predicting the timing of region pointer advancement by treating the advancement step as a natural part of the language structure via a NEXT-token, motivated by a strong correlation to the sentence structure in the training data. We find that our timing agrees with the ground-truth timing in the Flickr30k Entities test data with a precision of 86.55% and a recall of 97.92%. Our model implementing this technique improves the state-of-the-art on standard captioning metrics while additionally demonstrating a considerably larger effective vocabulary size. 2020.coling-main.174 @@ -2135,7 +2135,7 @@ Image Caption Generation for News Articles ZhishenYang - NaoakiOkazaki + NaoakiOkazaki 1941–1951 In this paper, we address the task of news-image captioning, which generates a description of an image given the image and its article body as input. This task is more challenging than the conventional image captioning, because it requires a joint understanding of image and text. We present a Transformer model that integrates text and image modalities and attends to textual features from visual features in generating a caption. Experiments based on automatic evaluation metrics and human evaluation show that an article text provides primary information to reproduce news-image captions written by journalists. The results also demonstrate that the proposed model outperforms the state-of-the-art model. In addition, we also confirm that visual features contribute to improving the quality of news-image captions. 2020.coling-main.176 @@ -2182,7 +2182,7 @@ The <fixed-case>A</fixed-case>ppos<fixed-case>C</fixed-case>orpus: a new multilingual, multi-domain dataset for factual appositive generation YovaKementchedjhieva DiLu - JoelTetreault + JoelTetreault 1989–2003 News articles, image captions, product reviews and many other texts mention people and organizations whose name recognition could vary for different audiences. In such cases, background information about the named entities could be provided in the form of an appositive noun phrase, either written by a human or generated automatically. We expand on the previous work in appositive generation with a new, more realistic, end-to-end definition of the task, instantiated by a dataset that spans four languages (English, Spanish, German and Polish), two entity types (person and organization) and two domains (Wikipedia and News). We carry out an extensive analysis of the data and the task, pointing to the various modeling challenges it poses. The results we obtain with standard language generation methods show that the task is indeed non-trivial, and leaves plenty of room for improvement. 2020.coling-main.180 @@ -2207,7 +2207,7 @@ SiyuanWang YamengHuang JianJiao - XuanjingHuang + XuanjingHuang NanDuan RuofeiZhang 2014–2025 @@ -2275,7 +2275,7 @@ Taking the Correction Difficulty into Account in Grammatical Error Correction Evaluation - TakumiGotou + TakumiGotou RyoNagata MasatoMita KazuakiHanawa @@ -2289,7 +2289,7 @@ Automatic Distractor Generation for Multiple Choice Questions in Standard Tests ZhaopengQiu XianWu - WeiFan + WeiFan 2096–2106 To assess knowledge proficiency of a learner, multiple choice question is an efficient and widespread form in standard tests. However, the composition of the multiple choice question, especially the construction of distractors is quite challenging. The distractors are required to both incorrect and plausible enough to confuse the learners who did not master the knowledge. Currently, the distractors are generated by domain experts which are both expensive and time-consuming. This urges the emergence of automatic distractor generation, which can benefit various standard tests in a wide range of domains. In this paper, we propose a question and answer guided distractor generation (EDGE) framework to automate distractor generation. EDGE consists of three major modules: (1) the Reforming Question Module and the Reforming Passage Module apply gate layers to guarantee the inherent incorrectness of the generated distractors; (2) the Distractor Generator Module applies attention mechanism to control the level of plausibility. Experimental results on a large-scale public dataset demonstrate that our model significantly outperforms existing models and achieves a new state-of-the-art. 2020.coling-main.189 @@ -2323,7 +2323,7 @@ TatsuyaAoki HidetakaKamigaito HiroyaTakamura - ManabuOkumura + ManabuOkumura 2126–2131 We propose neural models that can normalize text by considering the similarities of word strings and sounds. We experimentally compared a model that considers the similarities of both word strings and sounds, a model that considers only the similarity of word strings or of sounds, and a model without the similarities as a baseline. Results showed that leveraging the word string similarity succeeded in dealing with misspellings and abbreviations, and taking into account the sound similarity succeeded in dealing with phonetic substitutions and emphasized characters. So that the proposed models achieved higher F1 scores than the baseline. 2020.coling-main.192 @@ -2370,7 +2370,7 @@ Automatic Assistance for Academic Word Usage DariushSaberi - JohnLee + JohnLee JonathanJames Webster 2163–2168 This paper describes a writing assistance system that helps students improve their academic writing. Given an input text, the system suggests lexical substitutions that aim to incorporate more academic vocabulary. The substitution candidates are drawn from an academic word list and ranked by a masked language model. Experimental results show that lexical formality analysis can improve the quality of the suggestions, in comparison to a baseline that relies on the masked language model only. @@ -2380,10 +2380,10 @@ Style versus Content: A distinction without a (learnable) difference? - SomayehJafaritazehjani + SomayehJafaritazehjani GwénoléLecorvé DamienLolive - JohnKelleher + JohnKelleher 2169–2180 Textual style transfer involves modifying the style of a text while preserving its content. This assumes that it is possible to separate style from content. This paper investigates whether this separation is possible. We use sentiment transfer as our case study for style transfer analysis. Our experimental methodology frames style transfer as a multi-objective problem, balancing style shift with content preservation and fluency. Due to the lack of parallel data for style transfer we employ a variety of adversarial encoder-decoder networks in our experiments. Also, we use of a probing methodology to analyse how these models encode style-related features in their latent spaces. The results of our experiments which are further confirmed by a human evaluation reveal the inherent trade-off between the multiple style transfer objectives which indicates that style cannot be usefully separated from content within these style-transfer systems. 2020.coling-main.197 @@ -2393,8 +2393,8 @@ Contextualized Embeddings for Enriching Linguistic Analyses on Politeness AhmadAljanaideh - EricFosler-Lussier - Marie-Catherinede Marneffe + EricFosler-Lussier + Marie-Catherinede Marneffe 2181–2190 Linguistic analyses in natural language processing (NLP) have often been performed around the static notion of words where the context (surrounding words) is not considered. For example, previous analyses on politeness have focused on comparing the use of static words such as personal pronouns across (im)polite requests without taking the context of those words into account. Current word embeddings in NLP do capture context and thus can be leveraged to enrich linguistic analyses. In this work, we introduce a model which leverages the pre-trained BERT model to cluster contextualized representations of a word based on (1) the context in which the word appears and (2) the labels of items the word occurs in. Using politeness as case study, this model is able to automatically discover interpretable, fine-grained context patterns of words, some of which align with existing theories on politeness. Our model further discovers novel finer-grained patterns associated with (im)polite language. For example, the word please can occur in impolite contexts that are predictable from BERT clustering. The approach proposed here is validated by showing that features based on fine-grained patterns inferred from the clustering improve over politeness-word baselines. 2020.coling-main.198 @@ -2427,7 +2427,7 @@ Cycle-Consistent Adversarial Autoencoders for Unsupervised Text Style Transfer YufangHuang WentaoZhu - DeyiXiong + DeyiXiong YiyeZhang ChangjianHu FeiyuXu @@ -2453,7 +2453,7 @@ YuWu LiliMou ZhoujunLi - WenHanChao + WenHanChao 2236–2249 Conventional approaches for formality style transfer borrow models from neural machine translation, which typically requires massive parallel data for training. However, the dataset for formality style transfer is considerably smaller than translation corpora. Moreover, we observe that informal and formal sentences closely resemble each other, which is different from the translation task where two languages have different vocabularies and grammars. In this paper, we present a new approach, Sequence-to-Sequence with Shared Latent Space (S2S-SLS), for formality style transfer, where we propose two auxiliary losses and adopt joint training of bi-directional transfer and auto-encoding. Experimental results show that S2S-SLS (with either RNN or Transformer architectures) consistently outperforms baselines in various settings, especially when we have limited data. 2020.coling-main.203 @@ -2469,7 +2469,7 @@ HaijunShan JiZhang QiZhang - XuanjingHuang + XuanjingHuang 2250–2260 Visual storytelling aims to generate a narrative paragraph from a sequence of images automatically. Existing approaches construct text description independently for each image and roughly concatenate them as a story, which leads to the problem of generating semantically incoherent content. In this paper, we propose a new way for visual storytelling by introducing a topic description task to detect the global semantic context of an image stream. A story is then constructed with the guidance of the topic description. In order to combine the two generation tasks, we propose a multi-agent communication framework that regards the topic description generator and the story generator as two agents and learn them simultaneously via iterative updating mechanism. We validate our approach on VIST dataset, where quantitative results, ablations, and human evaluation demonstrate our method’s good ability in generating stories with higher quality compared to state-of-the-art methods. 2020.coling-main.204 @@ -2479,7 +2479,7 @@ Referring to what you know and do not know: Making Referring Expression Generation Models Generalize To Unseen Entities RossanaCunha - ThiagoCastro Ferreira + ThiagoCastro Ferreira AdrianaPagano FabioAlves 2261–2272 @@ -2505,7 +2505,7 @@ Retrieval-Augmented Controllable Review Generation JihyeokKim SeungtaekChoi - Reinald KimAmplayo + Reinald KimAmplayo Seung-wonHwang 2284–2295 In this paper, we study review generation given a set of attribute identifiers which are user ID, product ID and rating. This is a difficult subtask of natural language generation since models are limited to the given identifiers, without any specific descriptive information regarding the inputs, when generating the text. The capacity of these models is thus confined and dependent to how well the models can capture vector representations of attributes. We thus propose to additionally leverage references, which are selected from a large pool of texts labeled with one of the attributes, as textual information that enriches inductive biases of given attributes. With these references, we can now pose the problem as an instance of text-to-text generation, which makes the task easier since texts that are syntactically, semantically similar with the output text are provided as input. Using this framework, we address issues such as selecting references from a large candidate set without textual context and improving the model complexity for generation. Our experiments show that our models improve over previous approaches on both automatic and human evaluation metrics. @@ -2528,11 +2528,11 @@ A Learning-Exploring Method to Generate Diverse Paraphrases with Multi-Objective Deep Reinforcement Learning MingtongLiu ErguangYang - DeyiXiong + DeyiXiong YujieZhang YaoMeng ChangjianHu - JinanXu + JinanXu YufengChen 2310–2321 Paraphrase generation (PG) is of great importance to many downstream tasks in natural language processing. Diversity is an essential nature to PG for enhancing generalization capability and robustness of downstream applications. Recently, neural sequence-to-sequence (Seq2Seq) models have shown promising results in PG. However, traditional model training for PG focuses on optimizing model prediction against single reference and employs cross-entropy loss, which objective is unable to encourage model to generate diverse paraphrases. In this work, we present a novel approach with multi-objective learning to PG. We propose a learning-exploring method to generate sentences as learning objectives from the learned data distribution, and employ reinforcement learning to combine these new learning objectives for model training. We first design a sample-based algorithm to explore diverse sentences. Then we introduce several reward functions to evaluate the sampled sentences as learning signals in terms of expressive diversity and semantic fidelity, aiming to generate diverse and high-quality paraphrases. To effectively optimize model performance satisfying different evaluating aspects, we use a GradNorm-based algorithm that automatically balances these training objectives. Experiments and analyses on Quora and Twitter datasets demonstrate that our proposed method not only gains a significant increase in diversity but also improves generation quality over several state-of-the-art baselines. @@ -2543,7 +2543,7 @@ Curious Case of Language Generation Evaluation Metrics: A Cautionary Tale OzanCaglayan - PranavaMadhyastha + PranavaMadhyastha LuciaSpecia 2322–2328 Automatic evaluation of language generation systems is a well-studied problem in Natural Language Processing. While novel metrics are proposed every year, a few popular metrics remain as the de facto metrics to evaluate tasks such as image captioning and machine translation, despite their known limitations. This is partly due to ease of use, and partly because researchers expect to see them and know how to interpret them. In this paper, we urge the community for more careful consideration of how they automatically evaluate their models by demonstrating important failure cases on multiple datasets, language pairs and tasks. Our experiments show that metrics (i) usually prefer system outputs to human-authored texts, (ii) can be insensitive to correct translations of rare words, (iii) can yield surprisingly high scores when given a single sentence as system output for the entire test set. @@ -2680,7 +2680,7 @@ XiujunZhu YueZhang ShoushanLi - GuodongZhou + GuodongZhou 2448–2458 Sentiment forecasting in dialog aims to predict the polarity of next utterance to come, and can help speakers revise their utterances in sentimental utterances generation. However, the polarity of next utterance is normally hard to predict, due to the lack of content of next utterance (yet to come). In this study, we propose a Neural Sentiment Forecasting (NSF) model to address inherent challenges. In particular, we employ a neural simulation model to simulate the next utterance based on the context (previous utterances encountered). Moreover, we employ a sequence influence model to learn both pair-wise and seq-wise influence. Empirical studies illustrate the importance of proposed sentiment forecasting task, and justify the effectiveness of our NSF model over several strong baselines. 2020.coling-main.221 @@ -2693,7 +2693,7 @@ DongsukOh YoonnaJang KisuYang - HeuiseokLim + HeuiseokLim 2459–2471 CommonsenseQA is a task in which a correct answer is predicted through commonsense reasoning with pre-defined knowledge. Most previous works have aimed to improve the performance with distributed representation without considering the process of predicting the answer from the semantic representation of the question. To shed light upon the semantic interpretation of the question, we propose an AMR-ConceptNet-Pruned (ACP) graph. The ACP graph is pruned from a full integrated graph encompassing Abstract Meaning Representation (AMR) graph generated from input questions and an external commonsense knowledge graph, ConceptNet (CN). Then the ACP graph is exploited to interpret the reasoning path as well as to predict the correct answer on the CommonsenseQA task. This paper presents the manner in which the commonsense reasoning process can be interpreted with the relations and concepts provided by the ACP graph. Moreover, ACP-based models are shown to outperform the baselines. 2020.coling-main.222 @@ -2724,7 +2724,7 @@ Multitask Easy-First Dependency Parsing: Exploiting Complementarities of Different Dependency Representations YashKankanampati - JosephLe Roux + JosephLe Roux NadiTomeh DimaTaji NizarHabash @@ -2738,7 +2738,7 @@ Context Dependent Semantic Parsing: A Survey ZhuangLi LizhenQu - GholamrezaHaffari + GholamrezaHaffari 2509–2521 Semantic parsing is the task of translating natural language utterances into machine-readable meaning representations. Currently, most semantic parsing methods are not able to utilize the contextual information (e.g. dialogue and comments history), which has a great potential to boost the semantic parsing systems. To address this issue, context dependent semantic parsing has recently drawn a lot of attention. In this survey, we investigate progress on the methods for the context dependent semantic parsing, together with the current datasets and tasks. We then point out open problems and challenges for future research in this area. 2020.coling-main.226 @@ -2788,8 +2788,8 @@ Jon AnderCampos KyunghyunCho ArantxaOtegi - AitorSoroa - EnekoAgirre + AitorSoroa + EnekoAgirre GorkaAzkune 2561–2571 The interaction of conversational systems with users poses an exciting opportunity for improving them after deployment, but little evidence has been provided of its feasibility. In most applications, users are not able to provide the correct answer to the system, but they are able to provide binary (correct, incorrect) feedback. In this paper we propose feedback-weighted learning based on importance sampling to improve upon an initial supervised system using binary user feedback. We perform simulated experiments on document classification (for development) and Conversational Question Answering datasets like QuAC and DoQA, where binary user feedback is derived from gold annotations. The results show that our method is able to improve over the initial supervised system, getting close to a fully-supervised system that has access to the same labeled examples in in-domain experiments (QuAC), and even matching in out-of-domain experiments (DoQA). Our work opens the prospect to exploit interactions with real users and improve conversational systems after deployment. @@ -2945,8 +2945,8 @@ BoweiZou YuHong WeiZhang - AiTiAw - GuodongZhou + AiTiAw + GuodongZhou 2687–2698 Reading comprehension (RC) on social media such as Twitter is a critical and challenging task due to its noisy, informal, but informative nature. Most existing RC models are developed on formal datasets such as news articles and Wikipedia documents, which severely limit their performances when directly applied to the noisy and informal texts in social media. Moreover, these models only focus on a certain type of RC, extractive or generative, but ignore the integration of them. To well address these challenges, we come up with a noisy user-generated text-oriented RC model. In particular, we first introduce a set of text normalizers to transform the noisy and informal texts to the formal ones. Then, we integrate the extractive and the generative RC model by a multi-task learning mechanism and an answer selection module. Experimental results on TweetQA demonstrate that our NUT-RC model significantly outperforms the state-of-the-art social media-oriented RC models. 2020.coling-main.242 @@ -2982,8 +2982,8 @@ Neural Networks approaches focused on <fixed-case>F</fixed-case>rench Spoken Language Understanding: application to the <fixed-case>MEDIA</fixed-case> Evaluation Task SaharGhannay - ChristopheServan - SophieRosset + ChristopheServan + SophieRosset 2722–2727 In this paper, we present a study on a French Spoken Language Understanding (SLU) task: the MEDIA task. Many works and studies have been proposed for many tasks, but most of them are focused on English language and tasks. The exploration of a richer language like French within the framework of a SLU task implies to recent approaches to handle this difficulty. Since the MEDIA task seems to be one of the most difficult, according several previous studies, we propose to explore Neural Networks approaches focusing of three aspects: firstly, the Neural Network inputs and more specifically the word embeddings; secondly, we compared French version of BERT against the best setup through different ways; Finally, the comparison against State-of-the-Art approaches. Results show that the word embeddings trained on a small corpus need to be updated during SLU model training. Furthermore, the French BERT fine-tuned approaches outperform the classical Neural Network Architectures and achieves state of the art results. However, the contextual embeddings extracted from one of the French BERT approaches achieve comparable results in comparison to word embedding, when integrated into the proposed neural architecture. 2020.coling-main.245 @@ -3018,9 +3018,9 @@ Robust Machine Reading Comprehension by Learning Soft labels ZhenyuZhao ShuangzhiWu - MuyunYang + MuyunYang KehaiChen - TiejunZhao + TiejunZhao 2754–2759 Neural models have achieved great success on the task of machine reading comprehension (MRC), which are typically trained on hard labels. We argue that hard labels limit the model capability on generalization due to the label sparseness problem. In this paper, we propose a robust training method for MRC models to address this problem. Our method consists of three strategies, 1) label smoothing, 2) word overlapping, 3) distribution prediction. All of them help to train models on soft labels. We validate our approach on the representative architecture - ALBERT. Experimental results show that our method can greatly boost the baseline with 1% improvement in average, and achieve state-of-the-art performance on NewsQA and QUOREF. 2020.coling-main.248 @@ -3029,11 +3029,11 @@ Reinforced Multi-task Approach for Multi-hop Question Generation - DeepakGupta + DeepakGupta HardikChauhan Ravi TejAkella AsifEkbal - PushpakBhattacharyya + PushpakBhattacharyya 2760–2775 Question generation (QG) attempts to solve the inverse of question answering (QA) problem by generating a natural language question given a document and an answer. While sequence to sequence neural models surpass rule-based systems for QG, they are limited in their capacity to focus on more than one supporting fact. For QG, we often require multiple supporting facts to generate high-quality questions. Inspired by recent works on multi-hop reasoning in QA, we take up Multi-hop question generation, which aims at generating relevant questions based on supporting facts in the context. We employ multitask learning with the auxiliary task of answer-aware supporting fact prediction to guide the question generator. In addition, we also proposed a question-aware reward function in a Reinforcement Learning (RL) framework to maximize the utilization of the supporting facts. We demonstrate the effectiveness of our approach through experiments on the multi-hop question answering dataset, HotPotQA. Empirical evaluation shows our model to outperform the single-hop neural question generation models on both automatic evaluation metrics such as BLEU, METEOR, and ROUGE and human evaluation metrics for quality and coverage of the generated questions. 2020.coling-main.249 @@ -3084,7 +3084,7 @@ AparnaGarimella CarmenBanea NabilHossain - RadaMihalcea + RadaMihalcea 2814–2825 The subjective nature of humor makes computerized humor generation a challenging task. We propose an automatic humor generation framework for filling the blanks in Mad Libs® stories, while accounting for the demographic backgrounds of the desired audience. We collect a dataset consisting of such stories, which are filled in and judged by carefully selected workers on Amazon Mechanical Turk. We build upon the BERT platform to predict location-biased word fillings in incomplete sentences, and we fine-tune BERT to classify location-specific humor in a sentence. We leverage these components to produce YodaLib, a fully-automated Mad Libs style humor generation framework, which selects and ranks appropriate candidate words and sentences in order to generate a coherent and funny story tailored to certain demographics. Our experimental results indicate that YodaLib outperforms a previous semi-automated approach proposed for this task, while also surpassing human annotators in both qualitative and quantitative analyses. 2020.coling-main.253 @@ -3105,7 +3105,7 @@ Noise Isn’t Always Negative: Countering Exposure Bias in Sequence-to-Sequence Inflection Models GarrettNicolai - MiikkaSilfverberg + MiikkaSilfverberg 2837–2846 Morphological inflection, like many sequence-to-sequence tasks, sees great performance from recurrent neural architectures when data is plentiful, but performance falls off sharply in lower-data settings. We investigate one aspect of neural seq2seq models that we hypothesize contributes to overfitting - teacher forcing. By creating different training and test conditions, exposure bias increases the likelihood that a system too closely models its training data. Experiments show that teacher-forced models struggle to recover when they enter unknown territory. However, a simple modification to the training algorithm to more closely mimic test conditions creates models that are better able to generalize to unseen environments. 2020.coling-main.255 @@ -3137,7 +3137,7 @@ Computational Modeling of Affixoid Behavior in <fixed-case>C</fixed-case>hinese Morphology Yu-HsiangTseng - Shu-KaiHsieh + Shu-KaiHsieh Pei-YiChen SaraCourt 2879–2888 @@ -3173,7 +3173,7 @@ Autoregressive Affective Language Forecasting: A Self-Supervised Task MatthewMatero - H. AndrewSchwartz + H. AndrewSchwartz 2913–2923 Human natural language is mentioned at a specific point in time while human emotions change over time. While much work has established a strong link between language use and emotional states, few have attempted to model emotional language in time. Here, we introduce the task of affective language forecasting – predicting future change in language based on past changes of language, a task with real-world applications such as treating mental health or forecasting trends in consumer confidence. We establish some of the fundamental autoregressive characteristics of the task (necessary history size, static versus dynamic length, varying time-step resolutions) and then build on popular sequence models for words to instead model sequences of language-based emotion in time. Over a novel Twitter dataset of 1,900 users and weekly + daily scores for 6 emotions and 2 additional linguistic attributes, we find a novel dual-sequence GRU model with decayed hidden states achieves best results (r = .66) significantly out-predicting, e.g., a moving averaging based on the past time-steps (r = .49). We make our anonymized dataset as well as task setup and evaluation code available for others to build on. 2020.coling-main.261 @@ -3194,7 +3194,7 @@ End to End <fixed-case>C</fixed-case>hinese Lexical Fusion Recognition with Sememe Knowledge YijiangLiu MeishanZhang - DonghongJi + DonghongJi 2935–2946 In this paper, we present Chinese lexical fusion recognition, a new task which could be regarded as one kind of coreference recognition. First, we introduce the task in detail, showing the relationship with coreference recognition and differences from the existing tasks. Second, we propose an end-to-end model for the task, handling mentions as well as coreference relationship jointly. The model exploits the state-of-the-art contextualized BERT representations as an encoder, and is further enhanced with the sememe knowledge from HowNet by graph attention networks. We manually annotate a benchmark dataset for the task and then conduct experiments on it. Results demonstrate that our final model is effective and competitive for the task. Detailed analysis is offered for comprehensively understanding the new task and our proposed model. 2020.coling-main.263 @@ -3271,7 +3271,7 @@ When Beards Start Shaving Men: A Subject-object Resolution Test Suite for Morpho-syntactic and Semantic Model Introspection PatriciaFischer Daniëlde Kok - ErhardHinrichs + ErhardHinrichs 3019–3035 In this paper, we introduce the SORTS Subject-Object Resolution Test Suite of German minimal sentence pairs for model introspection. The full test suite consists of 18,502 transitive clauses with manual annotations of 8 word order patterns, 5 morphological and syntactic and 11 semantic property classes. The test suite has been constructed such that sentences are minimal pairs with respect to a property class. Each property has been selected with a particular focus on its effect on subject-object resolution, the second-most error-prone task within syntactic parsing of German after prepositional phrase attachment (Fischer et al., 2019). The size and detail of annotations make the test suite a valuable resource for natural language processing applications with syntactic and semantic tasks. We use dependency parsing to demonstrate how the test suite allows insights into the process of subject-object resolution. Based on the test suite annotations, word order and case syncretism can be identified as most important factors that affect subject-object resolution. 2020.coling-main.269 @@ -3297,7 +3297,7 @@ ShuheiKondo HiroyukiShindo TaroWatanabe - YujiMatsumoto + YujiMatsumoto 3043–3049 We propose a simple method for nominal coordination boundary identification. As the main strength of our method, it can identify the coordination boundaries without training on labeled data, and can be applied even if coordination structure annotations are not available. Our system employs pre-trained word embeddings to measure the similarities of words and detects the span of coordination, assuming that conjuncts share syntactic and semantic similarities. We demonstrate that our method yields good results in identifying coordinated noun phrases in the GENIA corpus and is comparable to a recent supervised method for the case when the coordinator conjoins simple noun phrases. 2020.coling-main.271 @@ -3308,7 +3308,7 @@ Learning Semantic Correspondences from Noisy Data-text Pairs by Local-to-Global Alignments FengNie JinpengWang - Chin-YewLin + Chin-YewLin 3050–3059 Learning semantic correspondences between structured input data (e.g., slot-value pairs) and associated texts is a core problem for many downstream NLP applications, e.g., data-to-text generation. Large-scale datasets recently proposed for generation contain loosely corresponding data text pairs, where part of spans in text cannot be aligned to its incomplete paired input. To learn semantic correspondences from such datasets, we propose a two-stage local-to-global alignment (L2GA) framework. First, a local model based on multi-instance learning is applied to build alignments for texts spans that can be directly grounded to its paired structured input. Then, a novel global model built upon a memory-guided conditional random field (CRF) layer aims to infer missing alignments for text spans which not supported by paired incomplete inputs, where the memory is designed to leverage alignment clues provided by the local model to strengthen the global model. In this way, the local model and global model can work jointly to learn semantic correspondences in the same framework. Experimental results show that our proposed method can be generalized to both restaurant and computer domains and improve the alignment accuracy. 2020.coling-main.272 @@ -3319,7 +3319,7 @@ Definition Frames: Using Definitions for Hybrid Concept Representations EvangeliaSpiliopoulou ArtidoroPagnoni - EduardHovy + EduardHovy 3060–3068 Advances in word representations have shown tremendous improvements in downstream NLP tasks, but lack semantic interpretability. In this paper, we introduce Definition Frames (DF), a matrix distributed representation extracted from definitions, where each dimension is semantically interpretable. DF dimensions correspond to the Qualia structure relations: a set of relations that uniquely define a term. Our results show that DFs have competitive performance with other distributional semantic approaches on word similarity tasks. 2020.coling-main.273 @@ -3334,7 +3334,7 @@ DanielaStepanov HangfengHe DanRoth - LukeZettlemoyer + LukeZettlemoyer IdoDagan 3069–3083 We propose a new semantic scheme for capturing predicate-argument relations for nominalizations, termed QANom. This scheme extends the QA-SRL formalism (He et al., 2015), modeling the relations between nominalizations and their arguments via natural language question-answer pairs. We construct the first QANom dataset using controlled crowdsourcing, analyze its quality and compare it to expertly annotated nominal-SRL annotations, as well as to other QA-driven annotations. In addition, we train a baseline QANom parser for identifying nominalizations and labeling their arguments with question-answer pairs. Finally, we demonstrate the extrinsic utility of our annotations for downstream tasks using both indirect supervision and zero-shot settings. @@ -3348,7 +3348,7 @@ XiaolongJin SaipingGuan JiafengGuo - XueqiCheng + XueqiCheng 3084–3094 Event coreference resolution aims to classify all event mentions that refer to the same real-world event into the same group, which is necessary to information aggregation and many downstream applications. To resolve event coreference, existing methods usually calculate the similarities between event mentions and between specific kinds of event arguments. However, they fail to accurately identify paraphrase relations between events and may suffer from error propagation while extracting event components (i.e., event mentions and their arguments). Therefore, we propose a new model based on Event-specific Paraphrases and Argument-aware Semantic Embeddings, thus called EPASE, for event coreference resolution. EPASE recognizes deep paraphrase relations in an event-specific context of sentences and can cover event paraphrases of more situations, bringing about a better generalization. Additionally, the embeddings of argument roles are encoded into event embedding without relying on a fixed number and type of arguments, which results in the better scalability of EPASE. Experiments on both within- and cross-document event coreference demonstrate its consistent and significant superiority compared to existing methods. 2020.coling-main.275 @@ -3360,7 +3360,7 @@ IrinaNikishina VarvaraLogacheva AlexanderPanchenko - NataliaLoukachevitch + NataliaLoukachevitch 3095–3106 Ontologies, taxonomies, and thesauri have always been in high demand in a large number of NLP tasks. However, most studies are focused on the creation of lexical resources rather than the maintenance of the existing ones and keeping them up-to-date. In this paper, we address the problem of taxonomy enrichment. Namely, we explore the possibilities of taxonomy extension in a resource-poor setting and present several methods which are applicable to a large number of languages. We also create novel English and Russian datasets for training and evaluating taxonomy enrichment systems and describe a technique of creating such datasets for other languages. 2020.coling-main.276 @@ -3434,7 +3434,7 @@ YuHong YangXu ZhenHuang - GuodongZhou + GuodongZhou MinZhang 3168–3178 We tackle implicit discourse relation recognition. Both self-attention and interactive-attention mechanisms have been applied for attention-aware representation learning, which improves the current discourse analysis models. To take advantages of the two attention mechanisms simultaneously, we develop a propagative attention learning model using a cross-coupled two-channel network. We experiment on Penn Discourse Treebank. The test results demonstrate that our model yields substantial improvements over the baselines (BiLSTM and BERT). @@ -3444,15 +3444,15 @@ Dual Attention Model for Citation Recommendation - YangZhang + YangZhang QiangMa 3179–3189 Based on an exponentially increasing number of academic articles, discovering and citing comprehensive and appropriate resources has become a non-trivial task. Conventional citation recommender methods suffer from severe information loss. For example, they do not consider the section of the paper that the user is writing and for which they need to find a citation, the relatedness between the words in the local context (the text span that describes a citation), or the importance on each word from the local context. These shortcomings make such methods insufficient for recommending adequate citations to academic manuscripts. In this study, we propose a novel embedding-based neural network called “dual attention model for citation recommendation (DACR)” to recommend citations during manuscript preparation. Our method adapts embedding of three semantic information: words in the local context, structural contexts, and the section on which a user is working. A neural network model is designed to maximize the similarity between the embedding of the three input (local context words, section and structural contexts) and the target citation appearing in the context. The core of the neural network model is composed of self-attention and additive attention, where the former aims to capture the relatedness between the contextual words and structural context, and the latter aims to learn the importance of them. The experiments on real-world datasets demonstrate the effectiveness of the proposed approach. 2020.coling-main.283 This revision corrects a mistake in Section 5.3, correcting "200 iterations" to "300 iterations". - 10.18653/v1/2020.coling-main.283 Minor correction to Section 5.3 + 10.18653/v1/2020.coling-main.283 zhang-ma-2020-dual @@ -3494,7 +3494,7 @@ YihuanMao YujingWang ChufanWu - ChenZhang + ChenZhang YangWang QuanluZhang YamingYang @@ -3561,7 +3561,7 @@ What Does This Acronym Mean? Introducing a New Dataset for Acronym Identification and Disambiguation AmirPouran Ben Veyseh FranckDernoncourt - Quan HungTran + Quan HungTran Thien HuuNguyen 3285–3301 Acronyms are the short forms of phrases that facilitate conveying lengthy sentences in documents and serve as one of the mainstays of writing. Due to their importance, identifying acronyms and corresponding phrases (i.e., acronym identification (AI)) and finding the correct meaning of each acronym (i.e., acronym disambiguation (AD)) are crucial for text understanding. Despite the recent progress on this task, there are some limitations in the existing datasets which hinder further improvement. More specifically, limited size of manually annotated AI datasets or noises in the automatically created acronym identification datasets obstruct designing advanced high-performing acronym identification models. Moreover, the existing datasets are mostly limited to the medical domain and ignore other domains. In order to address these two limitations, we first create a manually annotated large AI dataset for scientific domain. This dataset contains 17,506 sentences which is substantially larger than previous scientific AI datasets. Next, we prepare an AD dataset for scientific domain with 62,441 samples which is significantly larger than previous scientific AD dataset. Our experiments show that the existing state-of-the-art models fall far behind human-level performance on both datasets proposed by this work. In addition, we propose a new deep learning model which utilizes the syntactical structure of the sentence to expand an ambiguous acronym in a sentence. The proposed model outperforms the state-of-the-art models on the new AD dataset, providing a strong baseline for future research on this dataset. @@ -3585,9 +3585,9 @@ Temporal Relations Annotation and Extrapolation Based on Semi-intervals and Boundig Relations AlejandroPimentel - GemmaBel Enguix + GemmaBel Enguix GerardoSierra Martínez - AzucenaMontes + AzucenaMontes 3313–3323 The computational treatment of temporal relations is based on the work of Allen, who establishes 13 different types, and Freksa, who designs a cognitive procedure to manage them. Freksa’s notation is not widely used because, although it has cognitive and expressive advantages, it is too complex from the computational perspective. This paper proposes a system for the annotation and management of temporal relations that combines the richness and expressiveness of Freksa’s approach with the simplicity of Allen’s notation. Our method is summarized in the application of bounding relations, thanks to which it is possible to obtain the temporary representation of complete neighborhoods capable of representing vague temporal relations such as those that can be frequently found in a text. Such advantages are obtained without the need to greatly increase the complexity of the labeling process since the markup language is almost the same as TimeML, to which only a second temporary “relType”’ type label relationship is added. Our experiments show that the temporal relationships that present vagueness are in fact much more common than those in which a single relationship can be established precisely. For these reasons, our new labeling system achieves a more agreeable representation of temporal relations. 2020.coling-main.294 @@ -3619,7 +3619,7 @@ An Unsupervised Method for Learning Representations of Multi-word Expressions for Semantic Classification RobertVacareanu - Marco A.Valenzuela-Escárcega + Marco A.Valenzuela-Escárcega RebeccaSharp MihaiSurdeanu 3346–3356 @@ -3676,7 +3676,7 @@ Collective Wisdom: Improving Low-resource Neural Machine Translation using Adaptive Knowledge Distillation FahimehSaleh WrayBuntine - GholamrezaHaffari + GholamrezaHaffari 3413–3421 Scarcity of parallel sentence-pairs poses a significant hurdle for training high-quality Neural Machine Translation (NMT) models in bilingually low-resource scenarios. A standard approach is transfer learning, which involves taking a model trained on a high-resource language-pair and fine-tuning it on the data of the low-resource MT condition of interest. However, it is not clear generally which high-resource language-pair offers the best transfer learning for the target MT setting. Furthermore, different transferred models may have complementary semantic and/or syntactic strengths, hence using only one model may be sub-optimal. In this paper, we tackle this problem using knowledge distillation, where we propose to distill the knowledge of ensemble of teacher models to a single student model. As the quality of these teacher models varies, we propose an effective adaptive knowledge distillation approach to dynamically adjust the contribution of the teacher models during the distillation process. Experiments on transferring from a collection of six language pairs from IWSLT to five low-resource language-pairs from TED Talks demonstrate the effectiveness of our approach, achieving up to +0.9 BLEU score improvements compared to strong baselines. 2020.coling-main.302 @@ -3687,7 +3687,7 @@ Enabling Interactive Transcription in an Indigenous Community EricLe Ferrand StevenBird - LaurentBesacier + LaurentBesacier 3422–3428 We propose a novel transcription workflow which combines spoken term detection and human-in-the-loop, together with a pilot experiment. This work is grounded in an almost zero-resource scenario where only a few terms have so far been identified, involving two endangered languages. We show that in the early stages of transcription, when the available data is insufficient to train a robust ASR system, it is possible to take advantage of the transcription of a small number of isolated words in order to bootstrap the transcription of a speech collection. 2020.coling-main.303 @@ -3710,7 +3710,7 @@ CongyingXia WenpengYin TingtingLiang - PhilipYu + PhilipYu LifangHe 3436–3440 Mixup is a latest data augmentation technique that linearly interpolates input examples and the corresponding labels. It has shown strong effectiveness in image classification by interpolating images at the pixel level. Inspired by this line of research, in this paper, we explore i) how to apply mixup to natural language processing tasks since text data can hardly be mixed in the raw format; ii) if mixup is still effective in transformer-based learning models,e.g., BERT.To achieve the goal, we incorporate mixup to transformer-based pre-trained architecture, named“mixup-transformer”, for a wide range of NLP tasks while keeping the whole end-to-end training system. We evaluate the proposed framework by running extensive experiments on the GLUEbenchmark. Furthermore, we also examine the performance of mixup-transformer in low-resource scenarios by reducing the training data with a certain ratio. Our studies show that mixup is a domain-independent data augmentation technique to pre-trained language models, resulting in significant performance improvement for transformer-based models. @@ -3723,7 +3723,7 @@ GiwonHong JunmoKang DoyeonLim - Sung-HyonMyaeng + Sung-HyonMyaeng 3441–3448 Advances in Question Answering (QA) research require additional datasets for new domains, languages, and types of questions, as well as for performance increases. Human creation of a QA dataset like SQuAD, however, is expensive. As an alternative, an unsupervised QA approach has been proposed so that QA training data can be generated automatically. However, the performance of unsupervised QA is much lower than that of supervised QA models. We identify two anomalies in the automatically generated questions and propose how they can be mitigated. We show our approach helps improve unsupervised QA significantly across a number of QA tasks. 2020.coling-main.306 @@ -3735,7 +3735,7 @@ Jheng-HongYang Sheng-ChiehLin RodrigoNogueira - Ming-FengTsai + Ming-FengTsai Chuan-JuWang JimmyLin 3449–3453 @@ -3758,8 +3758,8 @@ Using Bilingual Patents for Translation Training - JohnLee - BenjaminTsou + JohnLee + BenjaminTsou TianyuanCai 3461–3466 While bilingual corpora have been instrumental for machine translation, their utility for training translators has been less explored. We investigate the use of bilingual corpora as pedagogical tools for translation in the technical domain. In a user study, novice translators revised Chinese translations of English patents through bilingual concordancing. Results show that concordancing with an in-domain bilingual corpus can yield greater improvement in translation quality of technical terms than a general-domain bilingual corpus. @@ -3816,7 +3816,7 @@ ChanghanWang JiataoGu DidierSchwab - LaurentBesacier + LaurentBesacier 3520–3533 We introduce dual-decoder Transformer, a new model architecture that jointly performs automatic speech recognition (ASR) and multilingual speech translation (ST). Our models are based on the original Transformer architecture (Vaswani et al., 2017) but consist of two decoders, each responsible for one task (ASR or ST). Our major contribution lies in how these decoders interact with each other: one decoder can attend to different information sources from the other via a dual-attention mechanism. We propose two variants of these architectures corresponding to two different levels of dependencies between the decoders, called the parallel and cross dual-decoder Transformers, respectively. Extensive experiments on the MuST-C dataset show that our models outperform the previously-reported highest translation performance in the multilingual settings, and outperform as well bilingual one-to-one results. Furthermore, our parallel models demonstrate no trade-off between ASR and ST compared to the vanilla multi-task architecture. Our code and pre-trained models are available at https://github.com/formiel/speech-translation. 2020.coling-main.314 @@ -3826,7 +3826,7 @@ Multitask Learning-Based Neural Bridging Reference Resolution JuntaoYu - MassimoPoesio + MassimoPoesio 3534–3546 We propose a multi task learning-based neural model for resolving bridging references tackling two key challenges. The first challenge is the lack of large corpora annotated with bridging references. To address this, we use multi-task learning to help bridging reference resolution with coreference resolution. We show that substantial improvements of up to 8 p.p. can be achieved on full bridging resolution with this architecture. The second challenge is the different definitions of bridging used in different corpora, meaning that hand-coded systems or systems using special features designed for one corpus do not work well with other corpora. Our neural model only uses a small number of corpus independent features, thus can be applied to different corpora. Evaluations with very different bridging corpora (ARRAU, ISNOTES, BASHI and SCICORP) suggest that our architecture works equally well on all corpora, and achieves the SoTA results on full bridging resolution for all corpora, outperforming the best reported results by up to 36.3 p.p.. 2020.coling-main.315 @@ -3840,7 +3840,7 @@ ChristopherKlein MayankGupta WilliamLi - Jason D.Williams + Jason D.Williams 3547–3557 This paper develops and implements a scalable methodology for (a) estimating the noisiness of labels produced by a typical crowdsourcing semantic annotation task, and (b) reducing the resulting error of the labeling process by as much as 20-30% in comparison to other common labeling strategies. Importantly, this new approach to the labeling process, which we name Dynamic Automatic Conflict Resolution (DACR), does not require a ground truth dataset and is instead based on inter-project annotation inconsistencies. This makes DACR not only more accurate but also available to a broad range of labeling tasks. In what follows we present results from a text classification task performed at scale for a commercial personal assistant, and evaluate the inherent ambiguity uncovered by this annotation strategy as compared to other common labeling strategies. 2020.coling-main.316 @@ -3850,8 +3850,8 @@ Automatic Discovery of Heterogeneous Machine Learning Pipelines: An Application to Natural Language Processing SuilanEstevez-Velarde - YoanGutiérrez - AndresMontoyo + YoanGutiérrez + AndresMontoyo YudiviánAlmeida Cruz 3558–3568 This paper presents AutoGOAL, a system for automatic machine learning (AutoML) that uses heterogeneous techniques. In contrast with existing AutoML approaches, our contribution can automatically build machine learning pipelines that combine techniques and algorithms from different frameworks, including shallow classifiers, natural language processing tools, and neural networks. We define the heterogeneous AutoML optimization problem as the search for the best sequence of algorithms that transforms specific input data into the desired output. This provides a novel theoretical and practical approach to AutoML. Our proposal is experimentally evaluated in diverse machine learning problems and compared with alternative approaches, showing that it is competitive with other AutoML alternatives in standard benchmarks. Furthermore, it can be applied to novel scenarios, such as several NLP tasks, where existing alternatives cannot be directly deployed. The system is freely available and includes in-built compatibility with a large number of popular machine learning frameworks, which makes our approach useful for solving practical problems with relative ease and effort. @@ -3933,7 +3933,7 @@ Increasing Learning Efficiency of Self-Attention Networks through Direct Position Interactions, Learnable Temperature, and Convoluted Attention PhilippDufter MartinSchmitt - HinrichSchütze + HinrichSchütze 3630–3636 Self-Attention Networks (SANs) are an integral part of successful neural architectures such as Transformer (Vaswani et al., 2017), and thus of pretrained language models such as BERT (Devlin et al., 2019) or GPT-3 (Brown et al., 2020). Training SANs on a task or pretraining them on language modeling requires large amounts of data and compute resources. We are searching for modifications to SANs that enable faster learning, i.e., higher accuracies after fewer update steps. We investigate three modifications to SANs: direct position interactions, learnable temperature, and convoluted attention. When evaluating them on part-of-speech tagging, we find that direct position interactions are an alternative to position embeddings, and convoluted attention has the potential to speed up the learning process. 2020.coling-main.324 @@ -3943,7 +3943,7 @@ Picking <fixed-case>BERT</fixed-case>’s Brain: Probing for Linguistic Dependencies in Contextualized Embeddings Using Representational Similarity Analysis MichaelLepori - R. ThomasMcCoy + R. ThomasMcCoy 3637–3651 As the name implies, contextualized representations of language are typically motivated by their ability to encode context. Which aspects of context are captured by such representations? We introduce an approach to address this question using Representational Similarity Analysis (RSA). As case studies, we investigate the degree to which a verb embedding encodes the verb’s subject, a pronoun embedding encodes the pronoun’s antecedent, and a full-sentence representation encodes the sentence’s head word (as determined by a dependency parse). In all cases, we show that BERT’s contextualized embeddings reflect the linguistic dependency being studied, and that BERT encodes these dependencies to a greater degree than it encodes less linguistically-salient controls. These results demonstrate the ability of our approach to adjudicate between hypotheses about which aspects of context are encoded in representations of language. 2020.coling-main.325 @@ -3969,7 +3969,7 @@ XipengQiu QipengGuo YaruHu - XuanjingHuang + XuanjingHuang ZhengZhang 3660–3670 With the emerging branch of incorporating factual knowledge into pre-trained language models such as BERT, most existing models consider shallow, static, and separately pre-trained entity embeddings, which limits the performance gains of these models. Few works explore the potential of deep contextualized knowledge representation when injecting knowledge. In this paper, we propose the Contextualized Language and Knowledge Embedding (CoLAKE), which jointly learns contextualized representation for both language and knowledge with the extended MLM objective. Instead of injecting only entity embeddings, CoLAKE extracts the knowledge context of an entity from large-scale knowledge bases. To handle the heterogeneity of knowledge context and language context, we integrate them in a unified data structure, word-knowledge graph (WK graph). CoLAKE is pre-trained on large-scale WK graphs with the modified Transformer encoder. We conduct experiments on knowledge-driven tasks, knowledge probing tasks, and language understanding tasks. Experimental results show that CoLAKE outperforms previous counterparts on most of the tasks. Besides, CoLAKE achieves surprisingly high performance on our synthetic task called word-knowledge graph completion, which shows the superiority of simultaneously contextualizing language and knowledge representation. @@ -3993,7 +3993,7 @@ RanWang KunTao JialiZeng - XinyuDai + XinyuDai 3684–3695 Machine reading comprehension (MRC) is the task that asks a machine to answer questions based on a given context. For Chinese MRC, due to the non-literal and non-compositional semantic characteristics, Chinese idioms pose unique challenges for machines to understand. Previous studies tend to treat idioms separately without fully exploiting the relationship among them. In this paper, we first define the concept of literal meaning coverage to measure the consistency between semantics and literal meanings for Chinese idioms. With the definition, we prove that the literal meanings of many idioms are far from their semantics, and we also verify that the synonymic relationship can mitigate this inconsistency, which would be beneficial for idiom comprehension. Furthermore, to fully utilize the synonymic relationship, we propose the synonym knowledge enhanced reader. Specifically, for each idiom, we first construct a synonym graph according to the annotations from the high-quality synonym dictionary or the cosine similarity between the pre-trained idiom embeddings and then incorporate the graph attention network and gate mechanism to encode the graph. Experimental results on ChID, a large-scale Chinese idiom reading comprehension dataset, show that our model achieves state-of-the-art performance. 2020.coling-main.329 @@ -4005,7 +4005,7 @@ HaonanLi MariaVasardani MartinTomko - TimothyBaldwin + TimothyBaldwin 3696–3707 Existing metonymy resolution approaches rely on features extracted from external resources like dictionaries and hand-crafted lexical resources. In this paper, we propose an end-to-end word-level classification approach based only on BERT, without dependencies on taggers, parsers, curated dictionaries of place names, or other external resources. We show that our approach achieves the state-of-the-art on 5 datasets, surpassing conventional BERT models and benchmarks by a large margin. We also show that our approach generalises well to unseen data. 2020.coling-main.330 @@ -4153,7 +4153,7 @@ An Analysis of Simple Data Augmentation for Named Entity Recognition - XiangDai + XiangDai HeikeAdel 3861–3867 Simple yet effective data augmentation techniques have been proposed for sentence-level and sentence-pair natural language processing tasks. Inspired by these efforts, we design and compare data augmentation for named entity recognition, which is usually modeled as a token-level sequence labeling problem. Through experiments on two data sets from the biomedical and materials science domains (i2b2-2010 and MaSciP), we show that simple augmentation can boost performance for both recurrent and transformer-based models, especially for small training sets. @@ -4175,7 +4175,7 @@ Towards Instance-Level Parser Selection for Cross-Lingual Transfer of Dependency Parsers RobertLitschko IvanVulić - ŽeljkoAgić + ŽeljkoAgić GoranGlavaš 3886–3898 Current methods of cross-lingual parser transfer focus on predicting the best parser for a low-resource target language globally, that is, “at treebank level”. In this work, we propose and argue for a novel cross-lingual transfer paradigm: instance-level parser selection (ILPS), and present a proof-of-concept study focused on instance-level selection in the framework of delexicalized parser transfer. Our work is motivated by an empirical observation that different source parsers are the best choice for different Universal POS-sequences (i.e., UPOS sentences) in the target language. We then propose to predict the best parser at the instance level. To this end, we train a supervised regression model, based on the Transformer architecture, to predict parser accuracies for individual POS-sequences. We compare ILPS against two strong single-best parser selection baselines (SBPS): (1) a model that compares POS n-gram distributions between the source and target languages (KL) and (2) a model that selects the source based on the similarity between manually created language vectors encoding syntactic properties of languages (L2V). The results from our extensive evaluation, coupling 42 source parsers and 20 diverse low-resource test languages, show that ILPS outperforms KL and L2V on 13/20 and 14/20 test languages, respectively. Further, we show that by predicting the best parser “at treebank level” (SBPS), using the aggregation of predictions from our instance-level model, we outperform the same baselines on 17/20 and 16/20 test languages. @@ -4208,7 +4208,7 @@ Integrating Domain Terminology into Neural Machine Translation EliseMichon - JosepCrego + JosepCrego JeanSenellart 3925–3937 This paper extends existing work on terminology integration into Neural Machine Translation, a common industrial practice to dynamically adapt translation to a specific domain. Our method, based on the use of placeholders complemented with morphosyntactic annotation, efficiently taps into the ability of the neural network to deal with symbolic knowledge to surpass the surface generalization shown by alternative techniques. We compare our approach to state-of-the-art systems and benchmark them through a well-defined evaluation framework, focusing on actual application of terminology and not just on the overall performance. Results indicate the suitability of our method in the use-case where terminology is used in a system trained on generic data only. @@ -4218,7 +4218,7 @@ Understanding the effects of word-level linguistic annotations in under-resourced neural machine translation - Víctor M.Sánchez-Cartagena + Víctor M.Sánchez-Cartagena Juan AntonioPérez-Ortiz FelipeSánchez-Martínez 3938–3950 @@ -4232,7 +4232,7 @@ MarcoGaido BeatriceSavoldi LuisaBentivogli - MatteoNegri + MatteoNegri MarcoTurchi 3951–3964 In automatic speech translation (ST), traditional cascade approaches involving separate transcription and translation steps are giving ground to increasingly competitive and more robust direct solutions. In particular, by translating speech audio data without intermediate transcription, direct ST models are able to leverage and preserve essential information present in the input (e.g.speaker’s vocal characteristics) that is otherwise lost in the cascade framework. Although such ability proved to be useful for gender translation, direct ST is nonetheless affected by gender bias just like its cascade counterpart, as well as machine translation and numerous other natural language processing applications. Moreover, direct ST systems that exclusively rely on vocal biometric features as a gender cue can be unsuitable or even potentially problematic for certain users. Going beyond speech signals, in this paper we compare different approaches to inform direct ST models about the speaker’s gender and test their ability to handle gender translation from English into Italian and French. To this aim, we manually annotated large datasets with speak-ers’ gender information and used them for experiments reflecting different possible real-world scenarios. Our results show that gender-aware direct ST solutions can significantly outperform strong – but gender-unaware – direct ST models. In particular, the translation of gender-marked words can increase up to 30 points in accuracy while preserving overall translation quality. @@ -4280,8 +4280,8 @@ Comparative Probing of Lexical Semantics Theories for Cognitive Plausibility and Technological Usefulness - AntónioBranco - JoãoAntónio Rodrigues + AntónioBranco + JoãoAntónio Rodrigues MalgorzataSalawa RubenBranco ChakavehSaedi @@ -4419,7 +4419,7 @@ Towards Topic-Guided Conversational Recommender System KunZhou YuanhangZhou - Wayne XinZhao + Wayne XinZhao XiaokeWang Ji-RongWen 4128–4139 @@ -4455,7 +4455,7 @@ Deconstruct to Reconstruct a Configurable Evaluation Metric for Open-Domain Dialogue Systems VitouPhy YangZhao - AkikoAizawa + AkikoAizawa 4164–4178 Many automatic evaluation metrics have been proposed to score the overall quality of a response in open-domain dialogue. Generally, the overall quality is comprised of various aspects, such as relevancy, specificity, and empathy, and the importance of each aspect differs according to the task. For instance, specificity is mandatory in a food-ordering dialogue task, whereas fluency is preferred in a language-teaching dialogue system. However, existing metrics are not designed to cope with such flexibility. For example, BLEU score fundamentally relies only on word overlapping, whereas BERTScore relies on semantic similarity between reference and candidate response. Thus, they are not guaranteed to capture the required aspects, i.e., specificity. To design a metric that is flexible to a task, we first propose making these qualities manageable by grouping them into three groups: understandability, sensibleness, and likability, where likability is a combination of qualities that are essential for a task. We also propose a simple method to composite metrics of each aspect to obtain a single metric called USL-H, which stands for Understandability, Sensibleness, and Likability in Hierarchy. We demonstrated that USL-H score achieves good correlations with human judgment and maintains its configurability towards different aspects and metrics. 2020.coling-main.368 @@ -4466,8 +4466,8 @@ Suggest me a movie for tonight: Leveraging Knowledge Graphs for Conversational Recommendation RajdeepSarkar KoustavaGoswami - MihaelArcan - John P.McCrae + MihaelArcan + John P.McCrae 4179–4189 Conversational recommender systems focus on the task of suggesting products to users based on the conversation flow. Recently, the use of external knowledge in the form of knowledge graphs has shown to improve the performance in recommendation and dialogue systems. Information from knowledge graphs aids in enriching those systems by providing additional information such as closely related products and textual descriptions of the items. However, knowledge graphs are incomplete since they do not contain all factual information present on the web. Furthermore, when working on a specific domain, knowledge graphs in its entirety contribute towards extraneous information and noise. In this work, we study several subgraph construction methods and compare their performance across the recommendation task. We incorporate pre-trained embeddings from the subgraphs along with positional embeddings in our models. Extensive experiments show that our method has a relative improvement of at least 5.62% compared to the state-of-the-art on multiple metrics on the recommendation task. 2020.coling-main.369 @@ -4477,7 +4477,7 @@ <fixed-case>H</fixed-case>i<fixed-case>T</fixed-case>rans: A Transformer-Based Context- and Speaker-Sensitive Model for Emotion Detection in Conversations JingyeLi - DonghongJi + DonghongJi FeiLi MeishanZhang YijiangLiu @@ -4514,7 +4514,7 @@ A Two-Level Interpretation of Modality in Human-Robot Dialogue LuciaDonatelli KennethLai - JamesPustejovsky + JamesPustejovsky 4222–4238 We analyze the use and interpretation of modal expressions in a corpus of situated human-robot dialogue and ask how to effectively represent these expressions for automatic learning. We present a two-level annotation scheme for modality that captures both content and intent, integrating a logic-based, semantic representation and a task-oriented, pragmatic representation that maps to our robot’s capabilities. Data from our annotation task reveals that the interpretation of modal expressions in human-robot dialogue is quite diverse, yet highly constrained by the physical environment and asymmetrical speaker/addressee relationship. We sketch a formal model of human-robot common ground in which modality can be grounded and dynamically interpreted. 2020.coling-main.373 @@ -4528,8 +4528,8 @@ KehaiChen XugangLu MasaoUtiyama - EiichiroSumita - TiejunZhao + EiichiroSumita + TiejunZhao 4239–4250 Unsupervised neural machine translation (UNMT) has recently attracted great interest in the machine translation community. The main advantage of the UNMT lies in its easy collection of required large training text sentences while with only a slightly worse performance than supervised neural machine translation which requires expensive annotated translation pairs on some translation tasks. In most studies, the UMNT is trained with clean data without considering its robustness to the noisy data. However, in real-world scenarios, there usually exists noise in the collected input sentences which degrades the performance of the translation system since the UNMT is sensitive to the small perturbations of the input sentences. In this paper, we first time explicitly take the noisy data into consideration to improve the robustness of the UNMT based systems. First of all, we clearly defined two types of noises in training sentences, i.e., word noise and word order noise, and empirically investigate its effect in the UNMT, then we propose adversarial training methods with denoising process in the UNMT. Experimental results on several language pairs show that our proposed methods substantially improved the robustness of the conventional UNMT systems in noisy scenarios. 2020.coling-main.374 @@ -4553,7 +4553,7 @@ RajDabre ChenchenDing MasaoUtiyama - EiichiroSumita + EiichiroSumita 4263–4274 In this study, linguistic knowledge at different levels are incorporated into the neural machine translation (NMT) framework to improve translation quality for language pairs with extremely limited data. Integrating manually designed or automatically extracted features into the NMT framework is known to be beneficial. However, this study emphasizes that the relevance of the features is crucial to the performance. Specifically, we propose two methods, 1) self relevance and 2) word-based relevance, to improve the representation of features for NMT. Experiments are conducted on translation tasks from English to eight Asian languages, with no more than twenty thousand sentences for training. The proposed methods improve translation quality for all tasks by up to 3.09 BLEU points. Discussions with visualization provide the explainability of the proposed methods where we show that the relevance methods provide weights to features thereby enhancing their impact on low-resource machine translation. 2020.coling-main.376 @@ -4579,7 +4579,7 @@ MasaoUtiyama AkihiroTamura TakashiNinomiya - EiichiroSumita + EiichiroSumita 4287–4297 This paper proposed a new subword segmentation method for neural machine translation, “Bilingual Subword Segmentation,” which tokenizes sentences to minimize the difference between the number of subword units in a sentence and that of its translation. While existing subword segmentation methods tokenize a sentence without considering its translation, the proposed method tokenizes a sentence by using subword units induced from bilingual sentences; this method could be more favorable to machine translation. Evaluations on WAT Asian Scientific Paper Excerpt Corpus (ASPEC) English-to-Japanese and Japanese-to-English translation tasks and WMT14 English-to-German and German-to-English translation tasks show that our bilingual subword segmentation improves the performance of Transformer neural machine translation (up to +0.81 BLEU). 2020.coling-main.378 @@ -4627,7 +4627,7 @@ SupratikBhattacharya ShravanNayak TimoBaumann - MatteoNegri + MatteoNegri MarcoTurchi 4327–4333 Dubbing has two shades; synchronisation constraints are applied only when the actor’s mouth is visible on screen, while the translation is unconstrained for off-screen dubbing. Consequently, different synchronisation requirements, and therefore translation strategies, are applied depending on the type of dubbing. In this work, we manually annotate an existing dubbing corpus (Heroes) for this dichotomy. We show that, even though we did not observe distinctive features between on- and off-screen dubbing at the textual level, on-screen dubbing is more difficult for MT (-4 BLEU points). Moreover, synchronisation constraints dramatically decrease translation quality for off-screen dubbing. We conclude that, distinguishing between on-screen and off-screen dubbing is necessary for determining successful strategies for dubbing-customised Machine Translation. @@ -4638,7 +4638,7 @@ Filtering Back-Translated Data in Unsupervised Neural Machine Translation JyotsanaKhatri - PushpakBhattacharyya + PushpakBhattacharyya 4334–4339 Unsupervised neural machine translation (NMT) utilizes only monolingual data for training. The quality of back-translated data plays an important role in the performance of NMT systems. In back-translation, all generated pseudo parallel sentence pairs are not of the same quality. Taking inspiration from domain adaptation where in-domain sentences are given more weight in training, in this paper we propose an approach to filter back-translated data as part of the training process of unsupervised NMT. Our approach gives more weight to good pseudo parallel sentence pairs in the back-translation phase. We calculate the weight of each pseudo parallel sentence pair using sentence-wise round-trip BLEU score which is normalized batch-wise. We compare our approach with the current state of the art approaches for unsupervised NMT. 2020.coling-main.383 @@ -4649,7 +4649,7 @@ Lost in Back-Translation: Emotion Preservation in Neural Machine Translation EnricaTroiano RomanKlinger - SebastianPadó + SebastianPadó 4340–4354 Machine translation provides powerful methods to convert text between languages, and is therefore a technology enabling a multilingual world. An important part of communication, however, takes place at the non-propositional level (e.g., politeness, formality, emotions), and it is far from clear whether current MT methods properly translate this information. This paper investigates the specific hypothesis that the non-propositional level of emotions is at least partially lost in MT. We carry out a number of experiments in a back-translation setup and establish that (1) emotions are indeed partially lost during translation; (2) this tendency can be reversed almost completely with a simple re-ranking approach informed by an emotion classifier, taking advantage of diversity in the n-best list; (3) the re-ranking approach can also be applied to change emotions, obtaining a model for emotion style transfer. An in-depth qualitative analysis reveals that there are recurring linguistic changes through which emotions are toned down or amplified, such as change of modality. 2020.coling-main.384 @@ -4658,8 +4658,8 @@ Intermediate Self-supervised Learning for Machine Translation Quality Estimation - RaphaelRubino - EiichiroSumita + RaphaelRubino + EiichiroSumita 4355–4360 Pre-training sentence encoders is effective in many natural language processing tasks including machine translation (MT) quality estimation (QE), due partly to the scarcity of annotated QE data required for supervised learning. In this paper, we investigate the use of an intermediate self-supervised learning task for sentence encoder aiming at improving QE performances at the sentence and word levels. Our approach is motivated by a problem inherent to QE: mistakes in translation caused by wrongly inserted and deleted tokens. We modify the translation language model (TLM) training objective of the cross-lingual language model (XLM) to orientate the pre-trained model towards the target task. The proposed method does not rely on annotated data and is complementary to QE methods involving pre-trained sentence encoders and domain adaptation. Experiments on English-to-German and English-to-Russian translation directions show that intermediate learning improves over domain adaptated models. Additionally, our method reaches results in par with state-of-the-art QE models without requiring the combination of several approaches and outperforms similar methods based on pre-trained sentence encoders. 2020.coling-main.385 @@ -4670,7 +4670,7 @@ Unifying Input and Output Smoothing in Neural Machine Translation YingboGao BaohaoLiao - HermannNey + HermannNey 4361–4372 Soft contextualized data augmentation is a recent method that replaces one-hot representation of words with soft posterior distributions of an external language model, smoothing the input of neural machine translation systems. Label smoothing is another effective method that penalizes over-confident model outputs by discounting some probability mass from the true target word, smoothing the output of neural machine translation systems. Having the benefit of updating all word vectors in each optimization step and better regularizing the models, the two smoothing methods are shown to bring significant improvements in translation performance. In this work, we study how to best combine the methods and stack the improvements. Specifically, we vary the prior distributions to smooth with, the hyperparameters that control the smoothing strength, and the token selection procedures. We conduct extensive experiments on small datasets, evaluate the recipes on larger datasets, and examine the implications when back-translation is further used. Our results confirm cumulative improvements when input and output smoothing are used in combination, giving up to +1.9 BLEU scores on standard machine translation tasks and reveal reasons why these smoothing methods should be preferred. 2020.coling-main.386 @@ -4681,7 +4681,7 @@ Neural Transduction for Multilingual Lexical Translation DylanLewis WinstonWu - Arya D.McCarthy + Arya D.McCarthy DavidYarowsky 4373–4384 We present a method for completing multilingual translation dictionaries. Our probabilistic approach can synthesize new word forms, allowing it to operate in settings where correct translations have not been observed in text (cf. cross-lingual embeddings). In addition, we propose an approximate Maximum Mutual Information (MMI) decoding objective to further improve performance in both many-to-one and one-to-one word level translation tasks where we use either multiple input languages for a single target language or more typical single language pair translation. The model is trained in a many-to-many setting, where it can leverage information from related languages to predict words in each of its many target languages. We focus on 6 languages: French, Spanish, Italian, Portuguese, Romanian, and Turkish. When indirect multilingual information is available, ensembling with mixture-of-experts as well as incorporating related languages leads to a 27% relative improvement in whole-word accuracy of predictions over a single-source baseline. To seed the completion when multilingual data is unavailable, it is better to decode with an MMI objective. @@ -4757,7 +4757,7 @@ MauajamaFirdaus HardikChauhan AsifEkbal - PushpakBhattacharyya + PushpakBhattacharyya 4441–4453 Emotion and sentiment classification in dialogues is a challenging task that has gained popularity in recent times. Humans tend to have multiple emotions with varying intensities while expressing their thoughts and feelings. Emotions in an utterance of dialogue can either be independent or dependent on the previous utterances, thus making the task complex and interesting. Multi-label emotion detection in conversations is a significant task that provides the ability to the system to understand the various emotions of the users interacting. Sentiment analysis in dialogue/conversation, on the other hand, helps in understanding the perspective of the user with respect to the ongoing conversation. Along with text, additional information in the form of audio and video assist in identifying the correct emotions with the appropriate intensity and sentiments in an utterance of a dialogue. Lately, quite a few datasets have been made available for dialogue emotion and sentiment classification, but these datasets are imbalanced in representing different emotions and consist of an only single emotion. Hence, we present at first a large-scale balanced Multimodal Multi-label Emotion, Intensity, and Sentiment Dialogue dataset (MEISD), collected from different TV series that has textual, audio and visual features, and then establish a baseline setup for further research. 2020.coling-main.393 @@ -4782,7 +4782,7 @@ Leveraging Discourse Rewards for Document-Level Neural Machine Translation InigoJauregi Unanue NazaninEsmaili - GholamrezaHaffari + GholamrezaHaffari MassimoPiccardi 4467–4482 Document-level machine translation focuses on the translation of entire documents from a source to a target language. It is widely regarded as a challenging task since the translation of the individual sentences in the document needs to retain aspects of the discourse at document level. However, document-level translation models are usually not trained to explicitly ensure discourse quality. Therefore, in this paper we propose a training approach that explicitly optimizes two established discourse metrics, lexical cohesion and coherence, by using a reinforcement learning objective. Experiments over four different language pairs and three translation domains have shown that our training approach has been able to achieve more cohesive and coherent document translations than other competitive approaches, yet without compromising the faithfulness to the reference translation. In the case of the Zh-En language pair, our method has achieved an improvement of 2.46 percentage points (pp) in LC and 1.17 pp in COH over the runner-up, while at the same time improving 0.63 pp in BLEU score and 0.47 pp in F-BERT. @@ -4842,7 +4842,7 @@ Living Machines: A study of atypical animacy - MarionaColl Ardanuy + MarionaColl Ardanuy FedericoNanni KasparBeelen KasraHosseini @@ -4863,7 +4863,7 @@ ThomasKober MaliheAlikhani MatthewStone - MarkSteedman + MarkSteedman 4546–4562 The interpretation of the lexical aspect of verbs in English plays a crucial role in tasks such as recognizing textual entailment and learning discourse-level inferences. We show that two elementary dimensions of aspectual class, states vs. events, and telic vs. atelic events, can be modelled effectively with distributional semantics. We find that a verb’s local context is most indicative of its aspectual class, and we demonstrate that closed class words tend to be stronger discriminating contexts than content words. Our approach outperforms previous work on three datasets. Further, we present a new dataset of human-human conversations annotated with lexical aspects and present experiments that show the correlation of telicity with genre and discourse goals. 2020.coling-main.401 @@ -4875,7 +4875,7 @@ AnneLauscher LilyNg CourtneyNapoles - JoelTetreault + JoelTetreault 4563–4574 Though preceding work in computational argument quality (AQ) mostly focuses on assessing overall AQ, researchers agree that writers would benefit from feedback targeting individual dimensions of argumentation theory. However, a large-scale theory-based corpus and corresponding computational models are missing. We fill this gap by conducting an extensive analysis covering three diverse domains of online argumentative writing and presenting GAQCorpus: the first large-scale English multi-domain (community Q&A forums, debate forums, review forums) corpus annotated with theory-based AQ scores. We then propose the first computational approaches to theory-based assessment, which can serve as strong baselines for future work. We demonstrate the feasibility of large-scale AQ annotation, show that exploiting relations between dimensions yields performance improvements, and explore the synergies between theory-based prediction and practical AQ assessment. 2020.coling-main.402 @@ -4885,7 +4885,7 @@ A Linguistic Perspective on Reference: Choosing a Feature Set for Generating Referring Expressions in Context FahimeSame - Keesvan Deemter + Keesvan Deemter 4575–4586 This paper reports on a structured evaluation of feature-based Machine Learning algorithms for selecting the form of a referring expression in discourse context. Based on this evaluation, we selected seven feature sets from the literature, amounting to 65 distinct linguistic features. The features were then grouped into 9 broad classes. After building Random Forest models, we used Feature Importance Ranking and Sequential Forward Search methods to assess the “importance” of the features. Combining the results of the two methods, we propose a consensus feature set. The 6 features in our consensus set come from 4 different classes, namely grammatical role, inherent features of the referent, antecedent form and recency. 2020.coling-main.403 @@ -4972,7 +4972,7 @@ Semi-supervised <fixed-case>URL</fixed-case> Segmentation with Recurrent Neural Networks Pre-trained on Knowledge Graph Entities HaoZhang JaeRo - RichardSproat + RichardSproat 4667–4675 Breaking domain names such as openresearch into component words open and research is important for applications like Text-to-Speech synthesis and web search. We link this problem to the classic problem of Chinese word segmentation and show the effectiveness of a tagging model based on Recurrent Neural Networks (RNNs) using characters as input. To compensate for the lack of training data, we propose a pre-training method on concatenated entity names in a large knowledge database. Pre-training improves the model by 33% and brings the sequence accuracy to 85%. 2020.coling-main.411 @@ -5001,11 +5001,11 @@ Detecting Urgency Status of Crisis Tweets: A Transfer Learning Approach for Low Resource Languages - EfsunSarioglu Kayi + EfsunSarioglu Kayi LinyongNan BohanQu - MonaDiab - KathleenMcKeown + MonaDiab + KathleenMcKeown 4693–4703 We release an urgency dataset that consists of English tweets relating to natural crises, along with annotations of their corresponding urgency status. Additionally, we release evaluation datasets for two low-resource languages, i.e. Sinhala and Odia, and demonstrate an effective zero-shot transfer from English to these two languages by training cross-lingual classifiers. We adopt cross-lingual embeddings constructed using different methods to extract features of the tweets, including a few state-of-the-art contextual embeddings such as BERT, RoBERTa and XLM-R. We train classifiers of different architectures on the extracted features. We also explore semi-supervised approaches by utilizing unlabeled tweets and experiment with ensembling different classifiers. With very limited amounts of labeled data in English and zero data in the low resource languages, we show a successful framework of training monolingual and cross-lingual classifiers using deep learning methods which are known to be data hungry. Specifically, we show that the recent deep contextual embeddings are also helpful when dealing with very small-scale datasets. Classifiers that incorporate RoBERTa yield the best performance for English urgency detection task, with F1 scores that are more than 25 points over our baseline classifier. For the zero-shot transfer to low resource languages, classifiers that use LASER features perform the best for Sinhala transfer while XLM-R features benefit the Odia transfer the most. 2020.coling-main.414 @@ -5042,7 +5042,7 @@ DarioStojanovski BennoKrojer DenisPeskov - AlexanderFraser + AlexanderFraser 4732–4749 Recent high scores on pronoun translation using context-aware neural machine translation have suggested that current approaches work well. ContraPro is a notable example of a contrastive challenge set for English→German pronoun translation. The high scores achieved by transformer models may suggest that they are able to effectively model the complicated set of inferences required to carry out pronoun translation. This entails the ability to determine which entities could be referred to, identify which entity a source-language pronoun refers to (if any), and access the target-language grammatical gender for that entity. We first show through a series of targeted adversarial attacks that in fact current approaches are not able to model all of this information well. Inserting small amounts of distracting information is enough to strongly reduce scores, which should not be the case. We then create a new template test set ContraCAT, designed to individually assess the ability to handle the specific steps necessary for successful pronoun translation. Our analyses show that current approaches to context-aware NMT rely on a set of surface heuristics, which break down when translations require real reasoning. We also propose an approach for augmenting the training data, with some improvements. 2020.coling-main.417 @@ -5137,7 +5137,7 @@ Manual Clustering and Spatial Arrangement of Verbs for Multilingual Evaluation and Typology Analysis OlgaMajewska IvanVulić - DianaMcCarthy + DianaMcCarthy AnnaKorhonen 4810–4824 We present the first evaluation of the applicability of a spatial arrangement method (SpAM) to a typologically diverse language sample, and its potential to produce semantic evaluation resources to support multilingual NLP, with a focus on verb semantics. We demonstrate SpAM’s utility in allowing for quick bottom-up creation of large-scale evaluation datasets that balance cross-lingual alignment with language specificity. Starting from a shared sample of 825 English verbs, translated into Chinese, Japanese, Finnish, Polish, and Italian, we apply a two-phase annotation process which produces (i) semantic verb classes and (ii) fine-grained similarity scores for nearly 130 thousand verb pairs. We use the two types of verb data to (a) examine cross-lingual similarities and variation, and (b) evaluate the capacity of static and contextualised representation models to accurately reflect verb semantics, contrasting the performance of large language specific pretraining models with their multilingual equivalent on semantic clustering and lexical similarity, across different domains of verb meaning. We release the data from both phases as a large-scale multilingual resource, comprising 85 verb classes and nearly 130k pairwise similarity scores, offering a wealth of possibilities for further evaluation and research on multilingual verb semantics. @@ -5150,7 +5150,7 @@ JingunKwon HidetakaKamigaito Young-InSong - ManabuOkumura + ManabuOkumura 4825–4834 Recently, automatic trivia fact extraction has attracted much research interest. Modern search engines have begun to provide trivia facts as the information for entities because they can motivate more user engagement. In this paper, we propose a new unsupervised algorithm that automatically mines trivia facts for a given entity. Unlike previous studies, the proposed algorithm targets at a single Wikipedia article and leverages its hierarchical structure via top-down processing. Thus, the proposed algorithm offers two distinctive advantages: it does not incur high computation time, and it provides a domain-independent approach for extracting trivia facts. Experimental results demonstrate that the proposed algorithm is over 100 times faster than the existing method which considers Wikipedia categories. Human evaluation demonstrates that the proposed algorithm can mine better trivia facts regardless of the target entity domain and outperforms the existing methods. 2020.coling-main.424 @@ -5174,7 +5174,7 @@ RamitSawhney ArnavWadhwa ShivamAgarwal - Rajiv RatnShah + Rajiv RatnShah 4847–4859 Parliamentary debates present a valuable language resource for analyzing comprehensive options in electing representatives under a functional, free society. However, the esoteric nature of political speech coupled with non-linguistic aspects such as political cohesion between party members presents a complex and underexplored task of contextual parliamentary debate analysis. We introduce GPolS, a neural model for political speech sentiment analysis jointly exploiting both semantic language representations and relations between debate transcripts, motions, and political party members. Through experiments on real-world English data and by visualizing attention, we provide a use case of GPolS as a tool for political speech analysis and polarity prediction. 2020.coling-main.426 @@ -5243,7 +5243,7 @@ Balanced Joint Adversarial Training for Robust Intent Detection and Slot Filling XuCao - DeyiXiong + DeyiXiong ChongyangShi ChaoWang YaoMeng @@ -5270,7 +5270,7 @@ Understanding Unnatural Questions Improves Reasoning over Text XiaoyuGuo Yuan-FangLi - GholamrezaHaffari + GholamrezaHaffari 4949–4955 Complex question answering (CQA) over raw text is a challenging task. A prominent approach to this task is based on the programmer-interpreter framework, where the programmer maps the question into a sequence of reasoning actions and the interpreter then executes these actions on the raw text. Learning an effective CQA model requires large amounts of human-annotated data, consisting of the ground-truth sequence of reasoning actions, which is time-consuming and expensive to collect at scale. In this paper, we address the challenge of learning a high-quality programmer (parser) by projecting natural human-generated questions into unnatural machine-generated questions which are more convenient to parse. We firstly generate synthetic (question, action sequence) pairs by a data generator, and train a semantic parser that associates synthetic questions with their corresponding action sequences. To capture the diversity when applied to natural questions, we learn a projection model to map natural questions into their most similar unnatural questions for which the parser can work well. Without any natural training data, our projection model provides high-quality action sequences for the CQA task. Experimental results show that the QA model trained exclusively with synthetic data outperforms its state-of-the-art counterpart trained on human-labeled data. 2020.coling-main.434 @@ -5294,7 +5294,7 @@ A Large-Scale Corpus of <fixed-case>E</fixed-case>-mail Conversations with Standard and Two-Level Dialogue Act Annotations MotokiTaniguchi - YoshihiroUeda + YoshihiroUeda TomokiTaniguchi TomokoOhkuma 4969–4980 @@ -5382,7 +5382,7 @@ EmmanuelleEsperança-Rodier FrancisBrunet-Manquat JakobVerbeek - LaurentBesacier + LaurentBesacier 5047–5058 We conduct in this work an evaluation study comparing offline and online neural machine translation architectures. Two sequence-to-sequence models: convolutional Pervasive Attention (Elbayad et al. 2018) and attention-based Transformer (Vaswani et al. 2017) are considered. We investigate, for both architectures, the impact of online decoding constraints on the translation quality through a carefully designed human evaluation on English-German and German-English language pairs, the latter being particularly sensitive to latency constraints. The evaluation results allow us to identify the strengths and shortcomings of each model when we shift to the online setup. 2020.coling-main.443 @@ -5391,7 +5391,7 @@ Informative Manual Evaluation of Machine Translation Output - MajaPopović + MajaPopović 5059–5069 This work proposes a new method for manual evaluation of Machine Translation (MT) output based on marking actual issues in the translated text. The novelty is that the evaluators are not assigning any scores, nor classifying errors, but marking all problematic parts (words, phrases, sentences) of the translation. The main advantage of this method is that the resulting annotations do not only provide overall scores by counting words with assigned tags, but can be further used for analysis of errors and challenging linguistic phenomena, as well as inter-annotator disagreements. Detailed analysis and understanding of actual problems are not enabled by typical manual evaluations where the annotators are asked to assign overall scores or to rank two or more translations. The proposed method is very general: it can be applied on any genre/domain and language pair, and it can be guided by various types of quality criteria. Also, it is not restricted to MT output, but can be used for other types of generated text. 2020.coling-main.444 @@ -5401,8 +5401,8 @@ <fixed-case>T</fixed-case>rans<fixed-case>Q</fixed-case>uest: Translation Quality Estimation with Cross-lingual Transformers TharinduRanasinghe - ConstantinOrasan - RuslanMitkov + ConstantinOrasan + RuslanMitkov 5070–5081 Recent years have seen big advances in the field of sentence-level quality estimation (QE), largely as a result of using neural-based architectures. However, the majority of these methods work only on the language pair they are trained on and need retraining for new language pairs. This process can prove difficult from a technical point of view and is usually computationally expensive. In this paper we propose a simple QE framework based on cross-lingual transformers, and we use it to implement and evaluate two different neural architectures. Our evaluation shows that the proposed methods achieve state-of-the-art results outperforming current open-source quality estimation frameworks when trained on datasets from WMT. In addition, the framework proves very useful in transfer learning settings, especially when dealing with low-resourced languages, allowing us to obtain very competitive results. 2020.coling-main.445 @@ -5413,7 +5413,7 @@ Monolingual and Multilingual Reduction of Gender Bias in Contextualized Representations ShengLiang PhilippDufter - HinrichSchütze + HinrichSchütze 5082–5093 Pretrained language models (PLMs) learn stereotypes held by humans and reflected in text from their training corpora, including gender bias. When PLMs are used for downstream tasks such as picking candidates for a job, people’s lives can be negatively affected by these learned stereotypes. Prior work usually identifies a linear gender subspace and removes gender information by eliminating the subspace. Following this line of work, we propose to use DensRay, an analytical method for obtaining interpretable dense subspaces. We show that DensRay performs on-par with prior approaches, but provide arguments that it is more robust and provide indications that it preserves language model performance better. By applying DensRay to attention heads and layers of BERT we show that gender information is spread across all attention heads and most of the layers. Also we show that DensRay can obtain gender bias scores on both token and sentence levels. Finally, we demonstrate that we can remove bias multilingually, e.g., from Chinese, using only English training data. 2020.coling-main.446 @@ -5493,7 +5493,7 @@ MohammedAldawsari AdrianPerez DeyaBanisakher - MarkFinlayson + MarkFinlayson 5171–5180 Determining whether an event in a news article is a foreground or background event would be useful in many natural language processing tasks, for example, temporal relation extraction, summarization, or storyline generation. We introduce the task of distinguishing between foreground and background events in news articles as well as identifying the general temporal position of background events relative to the foreground period (past, present, future, and their combinations). We achieve good performance (0.73 F1 for background vs. foreground and temporal position, and 0.79 F1 for background vs. foreground only) on a dataset of news articles by leveraging discourse information in a featurized model. We release our implementation and annotated data for other researchers 2020.coling-main.453 @@ -5526,9 +5526,9 @@ Explain by Evidence: An Explainable Memory-based Neural Network for Question Answering - Quan HungTran + Quan HungTran NhanDam - TuanLai + TuanLai FranckDernoncourt TrungLe NhamLe @@ -5563,7 +5563,7 @@ Hy-<fixed-case>NLI</fixed-case>: a Hybrid system for Natural Language Inference Aikaterini-LidaKalouli RichardCrouch - Valeriade Paiva + Valeriade Paiva 5235–5249 Despite the advances in Natural Language Inference through the training of massive deep models, recent work has revealed the generalization difficulties of such models, which fail to perform on adversarial datasets with challenging linguistic phenomena. Such phenomena, however, can be handled well by symbolic systems. Thus, we propose Hy-NLI, a hybrid system that learns to identify an NLI pair as linguistically challenging or not. Based on that, it uses its symbolic or deep learning component, respectively, to make the final inference decision. We show how linguistically less complex cases are best solved by robust state-of-the-art models, like BERT and XLNet, while hard linguistic phenomena are best handled by our implemented symbolic engine. Our thorough evaluation shows that our hybrid system achieves state-of-the-art performance across mainstream and adversarial datasets and opens the way for further research into the hybrid direction. 2020.coling-main.459 @@ -5584,7 +5584,7 @@ Global Context-enhanced Graph Convolutional Networks for Document-level Relation Extraction - HuiweiZhou + HuiweiZhou YibinXu WeihongYao ZheLiu @@ -5614,7 +5614,7 @@ BingLiu PararthShah BingLiu - PhilipYu + PhilipYu 5288–5308 We study an end-to-end approach for conversational recommendation that dynamically manages and reasons over users’ past (offline) preferences and current (online) requests through a structured and cumulative user memory knowledge graph. This formulation extends existing state tracking beyond the boundary of a single dialog to user state tracking (UST). For this study, we create a new Memory Graph (MG) <-> Conversational Recommendation parallel corpus called MGConvRex with 7K+ human-to-human role-playing dialogs, grounded on a large-scale user memory bootstrapped from real-world user scenarios. MGConvRex captures human-level reasoning over user memory and has disjoint training/testing sets of users for zero-shot (cold-start) reasoning for recommendation. We propose a simple yet expandable formulation for constructing and updating the MG, and an end-to-end graph-based reasoning model that updates MG from unstructured utterances and predicts optimal dialog policies (eg recommendation) based on updated MG. The prediction of our proposed model inherits the graph structure, providing a natural way to explain policies. Experiments are conducted for both offline metrics and online simulation, showing competitive results. 2020.coling-main.463 @@ -5625,7 +5625,7 @@ Diverse and Non-redundant Answer Set Extraction on Community <fixed-case>QA</fixed-case> based on <fixed-case>DPP</fixed-case>s ShogoFujita TomohideShibata - ManabuOkumura + ManabuOkumura 5309–5320 In community-based question answering (CQA) platforms, it takes time for a user to get useful information from among many answers. Although one solution is an answer ranking method, the user still needs to read through the top-ranked answers carefully. This paper proposes a new task of selecting a diverse and non-redundant answer set rather than ranking the answers. Our method is based on determinantal point processes (DPPs), and it calculates the answer importance and similarity between answers by using BERT. We built a dataset focusing on a Japanese CQA site, and the experiments on this dataset demonstrated that the proposed method outperformed several baseline methods. 2020.coling-main.464 @@ -5663,7 +5663,7 @@ LizhenQu YueZhuo MahsaBaktashmotlagh - GholamrezaHaffari + GholamrezaHaffari 5347–5359 Commonsense reasoning refers to the ability of evaluating a social situation and acting accordingly. Identification of the implicit causes and effects of a social context is the driving capability which can enable machines to perform commonsense reasoning. The dynamic world of social interactions requires context-dependent on-demand systems to infer such underlying information. However, current approaches in this realm lack the ability to perform commonsense reasoning upon facing an unseen situation, mostly due to incapability of identifying a diverse range of implicit social relations. Hence they fail to estimate the correct reasoning path. In this paper, we present Conditional Seq2Seq-based Mixture model (CosMo), which provides us with the capabilities of dynamic and diverse content generation. We use CosMo to generate context-dependent clauses, which form a dynamic Knowledge Graph (KG) on-the-fly for commonsense reasoning. To show the adaptability of our model to context-dependant knowledge generation, we address the task of zero-shot commonsense question answering. The empirical results indicate an improvement of up to +5.2% over the state-of-the-art models. 2020.coling-main.467 @@ -5698,7 +5698,7 @@ ShahbazSyed RoxanneEl Baff JohannesKiesel - KhalidAl Khatib + KhalidAl Khatib BennoStein MartinPotthast 5384–5396 @@ -5713,7 +5713,7 @@ SatoruOzaki AntoniosAnastasopoulos GrahamNeubig - LoriLevin + LoriLevin 5397–5408 Interlinear Glossed Text (IGT) is a widely used format for encoding linguistic information in language documentation projects and scholarly papers. Manual production of IGT takes time and requires linguistic expertise. We attempt to address this issue by creating automatic glossing models, using modern multi-source neural models that additionally leverage easy-to-collect translations. We further explore cross-lingual transfer and a simple output length control mechanism, further refining our models. Evaluated on three challenging low-resource scenarios, our approach significantly outperforms a recent, state-of-the-art baseline, particularly improving on overall accuracy as well as lemma and tag recall. 2020.coling-main.471 @@ -5777,7 +5777,7 @@ Words are the Window to the Soul: Language-based User Representations for Fake News Detection MarcoDel Tredici - RaquelFernández + RaquelFernández 5467–5479 Cognitive and social traits of individuals are reflected in language use. Moreover, individuals who are prone to spread fake news online often share common traits. Building on these ideas, we introduce a model that creates representations of individuals on social media based only on the language they produce, and use them to detect fake news. We show that language-based user representations are beneficial for this task. We also present an extended analysis of the language of fake news spreaders, showing that its main features are mostly domain independent and consistent across two English datasets. Finally, we exploit the relation between language use and connections in the social graph to assess the presence of the Echo Chamber effect in our data. 2020.coling-main.477 @@ -5825,7 +5825,7 @@ Go Simple and Pre-Train on Domain-Specific Corpora: On the Role of Training Data for Text Classification AleksandraEdwards - JoseCamacho-Collados + JoseCamacho-Collados HélèneDe Ribaupierre AlunPreece 5522–5529 @@ -5852,7 +5852,7 @@ HikariTanabe TetsujiOgawa TetsunoriKobayashi - YoshihikoHayashi + YoshihikoHayashi 5535–5540 Recognition of the mental state of a human character in text is a major challenge in natural language processing. In this study, we investigate the efficacy of the narrative context in recognizing the emotional states of human characters in text and discuss an approach to make use of a priori knowledge regarding the employed emotion category system. Specifically, we experimentally show that the accuracy of emotion classification is substantially increased by encoding the preceding context of the target sentence using a BERT-based text encoder. We also compare ways to incorporate a priori knowledge of emotion categories by altering the loss function used in training, in which our proposal of multi-task learning that jointly learns to classify positive/negative polarity of emotions is included. The experimental results suggest that, when using Plutchik’s Wheel of Emotions, it is better to jointly classify the basic emotion categories with positive/negative polarity rather than directly exploiting its characteristic structure in which eight basic categories are arranged in a wheel. 2020.coling-main.483 @@ -5907,7 +5907,7 @@ Automatically Identifying Words That Can Serve as Labels for Few-Shot Text Classification TimoSchick HelmutSchmid - HinrichSchütze + HinrichSchütze 5569–5578 A recent approach for few-shot text classification is to convert textual inputs to cloze questions that contain some form of task description, process them with a pretrained language model and map the predicted words to labels. Manually defining this mapping between words and labels requires both domain expertise and an understanding of the language model’s abilities. To mitigate this issue, we devise an approach that automatically finds such a mapping given small amounts of training data. For a number of tasks, the mapping found by our approach performs almost as well as hand-crafted label-to-word mappings. 2020.coling-main.488 @@ -5917,7 +5917,7 @@ Knowledge Base Embedding By Cooperative Knowledge Distillation RaphaëlSourty - Jose G.Moreno + Jose G.Moreno François-PaulServant LyndaTamine-Lechani 5579–5590 @@ -5991,7 +5991,7 @@ <fixed-case>WSL</fixed-case>-<fixed-case>DS</fixed-case>: Weakly Supervised Learning with Distant Supervision for Query Focused Multi-Document Abstractive Summarization Md Tahmid RahmanLaskar EnamulHoque - Jimmy XiangjiHuang + Jimmy XiangjiHuang 5647–5654 In the Query Focused Multi-Document Summarization (QF-MDS) task, a set of documents and a query are given where the goal is to generate a summary from these documents based on the given query. However, one major challenge for this task is the lack of availability of labeled training datasets. To overcome this issue, in this paper, we propose a novel weakly supervised learning approach via utilizing distant supervision. In particular, we use datasets similar to the target dataset as the training data where we leverage pre-trained sentence similarity models to generate the weak reference summary of each individual document in a document set from the multi-document gold reference summaries. Then, we iteratively train our summarization model on each single-document to alleviate the computational complexity issue that occurs while training neural summarization models in multiple documents (i.e., long sequences) at once. Experimental results on the Document Understanding Conferences (DUC) datasets show that our proposed approach sets a new state-of-the-art result in terms of various evaluation metrics. 2020.coling-main.495 @@ -6051,7 +6051,7 @@ An Anchor-Based Automatic Evaluation Metric for Document Summarization KexiangWang TianyuLiu - BaobaoChang + BaobaoChang ZhifangSui 5696–5701 The widespread adoption of reference-based automatic evaluation metrics such as ROUGE has promoted the development of document summarization. In this paper, we consider a new protocol for designing reference-based metrics that require the endorsement of source document(s). Following protocol, we propose an anchored ROUGE metric fixing each summary particle on source document, which bases the computation on more solid ground. Empirical results on benchmark datasets validate that source document helps to induce a higher correlation with human judgments for ROUGE metric. Being self-explanatory and easy-to-implement, the protocol can naturally foster various effective designs of reference-based metrics besides the anchored ROUGE introduced here. @@ -6124,7 +6124,7 @@ XiaominChu PeifengLi FangKong - QiaomingZhu + QiaomingZhu 5749–5759 Discourse structure tree construction is the fundamental task of discourse parsing and most previous work focused on English. Due to the cultural and linguistic differences, existing successful methods on English discourse parsing cannot be transformed into Chinese directly, especially in paragraph level suffering from longer discourse units and fewer explicit connectives. To alleviate the above issues, we propose two reading modes, i.e., the global backward reading and the local reverse reading, to construct Chinese paragraph level discourse trees. The former processes discourse units from the end to the beginning in a document to utilize the left-branching bias of discourse structure in Chinese, while the latter reverses the position of paragraphs in a discourse unit to enhance the differentiation of coherence between adjacent discourse units. The experimental results on Chinese MCDTB demonstrate that our model outperforms all strong baselines. 2020.coling-main.506 @@ -6144,7 +6144,7 @@ Variation in Coreference Strategies across Genres and Production Media - BerfinAktaş + BerfinAktaş ManfredStede 5774–5785 In response to (i) inconclusive results in the literature as to the properties of coreference chains in written versus spoken language, and (ii) a general lack of work on automatic coreference resolution on both spoken language and social media, we undertake a corpus study involving the various genre sections of Ontonotes, the Switchboard corpus, and a corpus of Twitter conversations. Using a set of measures that previously have been applied individually to different data sets, we find fairly clear patterns of “behavior” for the different genres/media. Besides their role for psycholinguistic investigation (why do we employ different coreference strategies when we write or speak) and for the placement of Twitter in the spoken–written continuum, we see our results as a contribution to approaching genre-/media-specific coreference resolution. @@ -6157,7 +6157,7 @@ KordulaDe Kuthy MadeeswaranKannan HaemanthSanthi Ponnusamy - DetmarMeurers + DetmarMeurers 5786–5798 Questions under Discussion (QUD; Roberts, 2012) are emerging as a conceptually fruitful approach to spelling out the connection between the information structure of a sentence and the nature of the discourse in which the sentence can function. To make this approach useful for analyzing authentic data, Riester, Brunetti & De Kuthy (2018) presented a discourse annotation framework based on explicit pragmatic principles for determining a QUD for every assertion in a text. De Kuthy et al. (2018) demonstrate that this supports more reliable discourse structure annotation, and Ziai and Meurers (2018) show that based on explicit questions, automatic focus annotation becomes feasible. But both approaches are based on manually specified questions. In this paper, we present an automatic question generation approach to partially automate QUD annotation by generating all potentially relevant questions for a given sentence. While transformation rules can concisely capture the typical question formation process, a rule-based approach is not sufficiently robust for authentic data. We therefore employ the transformation rules to generate a large set of sentence-question-answer triples and train a neural question generation model on them to obtain both systematic question type coverage and robustness. 2020.coling-main.509 @@ -6172,7 +6172,7 @@ YouzhengWu XiaodongHe BowenZhou - TiejunZhao + TiejunZhao 5799–5809 This paper aims to enhance the few-shot relation classification especially for sentences that jointly describe multiple relations. Due to the fact that some relations usually keep high co-occurrence in the same context, previous few-shot relation classifiers struggle to distinguish them with few annotated instances. To alleviate the above relation confusion problem, we propose CTEG, a model equipped with two novel mechanisms to learn to decouple these easily-confused relations. On the one hand, an Entity -Guided Attention (EGA) mechanism, which leverages the syntactic relations and relative positions between each word and the specified entity pair, is introduced to guide the attention to filter out information causing confusion. On the other hand, a Confusion-Aware Training (CAT) method is proposed to explicitly learn to distinguish relations by playing a pushing-away game between classifying a sentence into a true relation and its confusing relation. Extensive experiments are conducted on the FewRel dataset, and the results show that our proposed model achieves comparable and even much better results to strong baselines in terms of accuracy. Furthermore, the ablation test and case study verify the effectiveness of our proposed EGA and CAT, especially in addressing the relation confusion problem. 2020.coling-main.510 @@ -6197,7 +6197,7 @@ João MarcosMunguba Vieira Ericados Santos Rodrigues ElisângelaNogueira Teixeira - SandraAluísio + SandraAluísio 5821–5831 Sentence complexity assessment is a relatively new task in Natural Language Processing. One of its aims is to highlight in a text which sentences are more complex to support the simplification of contents for a target audience (e.g., children, cognitively impaired users, non-native speakers and low-literacy readers (Scarton and Specia, 2018)). This task is evaluated using datasets of pairs of aligned sentences including the complex and simple version of the same sentence. For Brazilian Portuguese, the task was addressed by (Leal et al., 2018), who set up the first dataset to evaluate the task in this language, reaching 87.8% of accuracy with linguistic features. The present work advances these results, using models inspired by (Gonzalez-Garduño and Søgaard, 2018), which hold the state-of-the-art for the English language, with multi-task learning and eye-tracking measures. First-Pass Duration, Total Regression Duration and Total Fixation Duration were used in two moments; first to select a subset of linguistic features and then as an auxiliary task in the multi-task and sequential learning models. The best model proposed here reaches the new state-of-the-art for Portuguese with 97.5% accuracy 1 , an increase of almost 10 points compared to the best previous results, in addition to proposing improvements in the public dataset after analysing the errors of our best model. 2020.coling-main.512 @@ -6232,7 +6232,7 @@ AliEmami KaheerSuleman AdamTrischler - Jackie Chi KitCheung + Jackie Chi KitCheung 5855–5865 The Winograd Schema Challenge (WSC) and variants inspired by it have become important benchmarks for common-sense reasoning (CSR). Model performance on the WSC has quickly progressed from chance-level to near-human using neural language models trained on massive corpora. In this paper, we analyze the effects of varying degrees of overlaps that occur between these corpora and the test instances in WSC-style tasks. We find that a large number of test instances overlap considerably with the pretraining corpora on which state-of-the-art models are trained, and that a significant drop in classification accuracy occurs when models are evaluated on instances with minimal overlap. Based on these results, we provide the WSC-Web dataset, consisting of over 60k pronoun disambiguation problems scraped from web data, being both the largest corpus to date, and having a significantly lower proportion of overlaps with current pretraining corpora. 2020.coling-main.515 @@ -6251,9 +6251,9 @@ DelaneyLothian AidanPine CarolineRunning Wolf - EddieSantos + EddieSantos DarleneStewart - GillesBoulianne + GillesBoulianne VishwaGupta BrianMaracle Owennatékha Akwiratékha’Martin @@ -6286,7 +6286,7 @@ Don’t Patronize Me! An Annotated Dataset with Patronizing and Condescending Language towards Vulnerable Communities CarlaPerez Almendros - LuisEspinosa Anke + LuisEspinosa Anke StevenSchockaert 5891–5902 In this paper, we introduce a new annotated dataset which is aimed at supporting the development of NLP models to identify and categorize language that is patronizing or condescending towards vulnerable communities (e.g. refugees, homeless people, poor families). While the prevalence of such language in the general media has long been shown to have harmful effects, it differs from other types of harmful language, in that it is generally used unconsciously and with good intentions. We furthermore believe that the often subtle nature of patronizing and condescending language (PCL) presents an interesting technical challenge for the NLP community. Our analysis of the proposed dataset shows that identifying PCL is hard for standard NLP models, with language models such as BERT achieving the best results. @@ -6320,7 +6320,7 @@ KunsongZhao JinLiu GuangyouZhou - Jimmy XiangjiHuang + Jimmy XiangjiHuang 5918–5928 Cross-lingual entity alignment, which aims to match equivalent entities in KGs with different languages, has attracted considerable focus in recent years. Recently, many graph neural network (GNN) based methods are proposed for entity alignment and obtain promising results. However, existing GNN-based methods consider the two KGs independently and learn embeddings for different KGs separately, which ignore the useful pre-aligned links between two KGs. In this paper, we propose a novel Contextual Alignment Enhanced Cross Graph Attention Network (CAECGAT) for the task of cross-lingual entity alignment, which is able to jointly learn the embeddings in different KGs by propagating cross-KG information through pre-aligned seed alignments. We conduct extensive experiments on three benchmark cross-lingual entity alignment datasets. The experimental results demonstrate that our proposed method obtains remarkable performance gains compared to state-of-the-art methods. 2020.coling-main.520 @@ -6356,8 +6356,8 @@ <fixed-case>W</fixed-case>iki<fixed-case>UMLS</fixed-case>: Aligning <fixed-case>UMLS</fixed-case> to <fixed-case>W</fixed-case>ikipedia via Cross-lingual Neural Ranking AfshinRahimi - TimothyBaldwin - KarinVerspoor + TimothyBaldwin + KarinVerspoor 5957–5962 We present our work on aligning the Unified Medical Language System (UMLS) to Wikipedia, to facilitate manual alignment of the two resources. We propose a cross-lingual neural reranking model to match a UMLS concept with a Wikipedia page, which achieves a recall@1of 72%, a substantial improvement of 20% over word- and char-level BM25, enabling manual alignment with minimal effort. We release our resources, including ranked Wikipedia pages for 700k UMLSconcepts, and WikiUMLS, a dataset for training and evaluation of alignment models between UMLS and Wikipedia collected from Wikidata. This will provide easier access to Wikipedia for health professionals, patients, and NLP systems, including in multilingual settings. 2020.coling-main.523 @@ -6369,9 +6369,9 @@ SantanuPal HongfeiXu NicoHerbig - Sudip KumarNaskar + Sudip KumarNaskar AntonioKrüger - Josefvan Genabith + Josefvan Genabith 5963–5974 In automatic post-editing (APE) it makes sense to condition post-editing (pe) decisions on both the source (src) and the machine translated text (mt) as input. This has led to multi-encoder based neural APE approaches. A research challenge now is the search for architectures that best support the capture, preparation and provision of src and mt information and its integration with pe decisions. In this paper we present an efficient multi-encoder based APE model, called transference. Unlike previous approaches, it (i) uses a transformer encoder block for src, (ii) followed by a decoder block, but without masking for self-attention on mt, which effectively acts as second encoder combining src –> mt, and (iii) feeds this representation into a final decoder block generating pe. Our model outperforms the best performing systems by 1 BLEU point on the WMT 2016, 2017, and 2018 English–German APE shared tasks (PBSMT and NMT). Furthermore, the results of our model on the WMT 2019 APE task using NMT data shows a comparable performance to the state-of-the-art system. The inference time of our model is similar to the vanilla transformer-based NMT system although our model deals with two separate encoders. We further investigate the importance of our newly introduced second encoder and find that a too small amount of layers does hurt the performance, while reducing the number of layers of the decoder does not matter much. 2020.coling-main.524 @@ -6409,7 +6409,7 @@ MartinLaville AmirHazem EmmanuelMorin - PhillippeLanglais + PhillippeLanglais 6002–6012 Narrow specialized comparable corpora are often small in size. This particularity makes it difficult to build efficient models to acquire translation equivalents, especially for less frequent and rare words. One way to overcome this issue is to enrich the specialized corpora with out-of-domain resources. Although some recent studies have shown improvements using data augmentation, the enrichment method was roughly conducted by adding out-of-domain data with no particular attention given to how to enrich words and how to do it optimally. In this paper, we contrast several data selection techniques to improve bilingual lexicon induction from specialized comparable corpora. We first apply two well-established data selection techniques often used in machine translation that is: Tf-Idf and cross entropy. Then, we propose to exploit BERT for data selection. Overall, all the proposed techniques improve the quality of the extracted bilingual lexicons by a large margin. The best performing model is the cross entropy, obtaining a gain of about 4 points in MAP while decreasing computation time by a factor of 10. 2020.coling-main.527 @@ -6450,8 +6450,8 @@ Combining Word Embeddings with Bilingual Orthography Embeddings for Bilingual Dictionary Induction SilviaSeverini ViktorHangya - AlexanderFraser - HinrichSchütze + AlexanderFraser + HinrichSchütze 6044–6055 Bilingual dictionary induction (BDI) is the task of accurately translating words to the target language. It is of great importance in many low-resource scenarios where cross-lingual training data is not available. To perform BDI, bilingual word embeddings (BWEs) are often used due to their low bilingual training signal requirements. They achieve high performance, but problematic cases still remain, such as the translation of rare words or named entities, which often need to be transliterated. In this paper, we enrich BWE-based BDI with transliteration information by using Bilingual Orthography Embeddings (BOEs). BOEs represent source and target language transliteration word pairs with similar vectors. A key problem in our BDI setup is to decide which information source – BWEs (or semantics) vs. BOEs (or orthography) – is more reliable for a particular word pair. We propose a novel classification-based BDI system that uses BWEs, BOEs and a number of other features to make this decision. We test our system on English-Russian BDI and show improved performance. In addition, we show the effectiveness of our BOEs by successfully using them for transliteration mining based on cosine similarity. 2020.coling-main.531 @@ -6460,9 +6460,9 @@ Understanding Translationese in Multi-view Embedding Spaces - KoelDutta Chowdhury + KoelDutta Chowdhury CristinaEspaña-Bonet - Josefvan Genabith + Josefvan Genabith 6056–6062 Recent studies use a combination of lexical and syntactic features to show that footprints of the source language remain visible in translations, to the extent that it is possible to predict the original source language from the translation. In this paper, we focus on embedding-based semantic spaces, exploiting departures from isomorphism between spaces built from original target language and translations into this target language to predict relations between languages in an unsupervised way. We use different views of the data — words, parts of speech, semantic tags and synsets — to track translationese. Our analysis shows that (i) semantic distances between original target language and translations into this target language can be detected using the notion of isomorphism, (ii) language family ties with characteristics similar to linguistically motivated phylogenetic trees can be inferred from the distances and (iii) with delexicalised embeddings exhibiting source-language interference most significantly, other levels of abstraction display the same tendency, indicating the lexicalised results to be not “just” due to possible topic differences between original and translated texts. To the best of our knowledge, this is the first time departures from isomorphism between embedding spaces are used to track translationese. 2020.coling-main.532 @@ -6472,7 +6472,7 @@ Building The First <fixed-case>E</fixed-case>nglish-<fixed-case>B</fixed-case>razilian <fixed-case>P</fixed-case>ortuguese Corpus for Automatic Post-Editing FelipeAlmeida Costa - ThiagoCastro Ferreira + ThiagoCastro Ferreira AdrianaPagano WagnerMeira 6063–6069 @@ -6485,7 +6485,7 @@ Analysing cross-lingual transfer in lemmatisation for <fixed-case>I</fixed-case>ndian languages KumarSaurav KumarSaunack - PushpakBhattacharyya + PushpakBhattacharyya 6070–6076 Lemmatization aims to reduce the sparse data problem by relating the inflected forms of a word to its dictionary form. However, most of the prior work on this topic has focused on high resource languages. In this paper, we evaluate cross-lingual approaches for low resource languages, especially in the context of morphologically rich Indian languages. We test our model on six languages from two different families and develop linguistic insights into each model’s performance. 2020.coling-main.534 @@ -6508,7 +6508,7 @@ LabibaJahan RahulMittal W. VictorYarlott - MarkFinlayson + MarkFinlayson 6089–6100 One of the most fundamental elements of narrative is character: if we are to understand a narrative, we must be able to identify the characters of that narrative. Therefore, character identification is a critical task in narrative natural language understanding. Most prior work has lacked a narratologically grounded definition of character, instead relying on simplified or implicit definitions that do not capture essential distinctions between characters and other referents in narratives. In prior work we proposed a preliminary definition of character that was based in clear narratological principles: a character is an animate entity that is important to the plot. Here we flesh out this concept, demonstrate that it can be reliably annotated (0.78 Cohen’s κ), and provide annotations of 170 narrative texts, drawn from 3 different corpora, containing 1,347 character co-reference chains and 21,999 non-character chains that include 3,937 animate chains. Furthermore, we have shown that a supervised classifier using a simple set of easily computable features can effectively identify these characters (overall F1 of 0.90). A detailed error analysis shows that character identification is first and foremost affected by co-reference quality, and further, that the shorter a chain is the harder it is to effectively identify as a character. We release our code and data for the benefit of other researchers 2020.coling-main.536 @@ -6527,9 +6527,9 @@ Free the Plural: Unrestricted Split-Antecedent Anaphora Resolution JuntaoYu - Nafise SadatMoosavi + Nafise SadatMoosavi SilviuPaun - MassimoPoesio + MassimoPoesio 6113–6125 Now that the performance of coreference resolvers on the simpler forms of anaphoric reference has greatly improved, more attention is devoted to more complex aspects of anaphora. One limitation of virtually all coreference resolution models is the focus on single-antecedent anaphors. Plural anaphors with multiple antecedents-so-called split-antecedent anaphors (as in John met Mary. They went to the movies) have not been widely studied, because they are not annotated in ONTONOTES and are relatively infrequent in other corpora. In this paper, we introduce the first model for unrestricted resolution of split-antecedent anaphors. We start with a strong baseline enhanced by BERT embeddings, and show that we can substantially improve its performance by addressing the sparsity issue. To do this, we experiment with auxiliary corpora where split-antecedent anaphors were annotated by the crowd, and with transfer learning models using element-of bridging references and single-antecedent coreference as auxiliary tasks. Evaluation on the gold annotated ARRAU corpus shows that the out best model uses a combination of three auxiliary corpora achieved F1 scores of 70% and 43.6% when evaluated in a lenient and strict setting, respectively, i.e., 11 and 21 percentage points gain when compared with our baseline. 2020.coling-main.538 @@ -6553,7 +6553,7 @@ Fact vs. Opinion: the Role of Argumentation Features in News Classification TariqAlhindi SmarandaMuresan - DanielPreotiuc-Pietro + DanielPreotiuc-Pietro 6139–6149 A 2018 study led by the Media Insight Project showed that most journalists think that a clearmarking of what is news reporting and what is commentary or opinion (e.g., editorial, op-ed)is essential for gaining public trust. We present an approach to classify news articles into newsstories (i.e., reporting of factual information) and opinion pieces using models that aim to sup-plement the article content representation with argumentation features. Our hypothesis is thatthe nature of argumentative discourse is important in distinguishing between news stories andopinion articles. We show that argumentation features outperform linguistic features used previ-ously and improve on fine-tuned transformer-based models when tested on data from publishersunseen in training. Automatically flagging opinion pieces vs. news stories can aid applicationssuch as fact-checking or event extraction. 2020.coling-main.540 @@ -6591,7 +6591,7 @@ Multilingual Epidemiological Text Classification: A Comparative Study StephenMutuvi - EmanuelaBoros + EmanuelaBoros AntoineDoucet AdamJatowt GaëlLejeune @@ -6634,7 +6634,7 @@ Chieh-HanWu PoChunChen KuansanWang - Shou-deLin + Shou-deLin 6207–6216 We focus on a recently deployed system built for summarizing academic articles by concept tagging. The system has shown great coverage and high accuracy of concept identification which could be contributed by the knowledge acquired from millions of publications. Provided with the interpretable concepts and knowledge encoded in a pre-trained neural model, we investigate whether the tagged concepts can be applied to a broader class of applications. We propose transforming the tagged concepts into sparse vectors as representations of academic documents. The effectiveness of the representations is analyzed theoretically by a proposed framework. We also empirically show that the representations can have advantages on academic topic discovery and paper recommendation. On these applications, we reveal that the knowledge encoded in the tagging system can be effectively utilized and can help infer additional features from data with limited information. 2020.coling-main.546 @@ -6645,7 +6645,7 @@ “What is on your mind?” Automated Scoring of Mindreading in Childhood and Early Adolescence VenelinKovatchev PhillipSmith - MarkLee + MarkLee ImogenGrumley Traynor IreneLuque Aguilera RoryDevine @@ -6658,8 +6658,8 @@ A Deep Metric Learning Method for Biomedical Passage Retrieval AndrésRosso-Mateus - Fabio A.González - ManuelMontes-y-Gómez + Fabio A.González + ManuelMontes-y-Gómez 6229–6239 Passage retrieval is the task of identifying text snippets that are valid answers for a natural language posed question. One way to address this problem is to look at it as a metric learning problem, where we want to induce a metric between questions and passages that assign smaller distances to more relevant passages. In this work, we present a novel method for passage retrieval that learns a metric for questions and passages based on their internal semantic interactions. The method uses a similar approach to that of triplet networks, where the training samples are composed of one anchor (the question) and two positive and negative samples (passages). However,and in contrast with triplet networks, the proposed method uses a novel deep architecture that better exploits the particularities of text and takes into consideration complementary relatedness measures. Besides, the paper presents a sampling strategy that selects both easy and hard negative samples which improves the accuracy of the trained model. The method is particularly well suited for domain-specific passage retrieval where it is very important to take into account different sources of information. The proposed approach was evaluated in a biomedical passage retrieval task, the BioASQ challenge, outperforming standard triplet loss substantially by 10%,and state-of-the-art performance by 26%. 2020.coling-main.548 @@ -6669,7 +6669,7 @@ Hierarchical Text Segmentation for Medieval Manuscripts AmirHazem - BeatriceDaille + BeatriceDaille DominiqueStutzmann ChristopherKermorvant LouisChevalier @@ -6709,7 +6709,7 @@ EeshaDutta ParyulJain ManishGupta - ManishShrivastava + ManishShrivastava PonnurangamKumaraguru 6277–6283 While extensive popularity of online social media platforms has made information dissemination faster, it has also resulted in widespread online abuse of different types like hate speech, offensive language, sexist and racist opinions, etc. Detection and curtailment of such abusive content is critical for avoiding its psychological impact on victim communities, and thereby preventing hate crimes. Previous works have focused on classifying user posts into various forms of abusive behavior. But there has hardly been any focus on estimating the severity of abuse and the target. In this paper, we present a first of the kind dataset with 7,601 posts from Gab which looks at online abuse from the perspective of presence of abuse, severity and target of abusive behavior. We also propose a system to address these tasks, obtaining an accuracy of ∼80% for abuse presence, ∼82% for abuse target prediction, and ∼65% for abuse severity prediction. @@ -6719,7 +6719,7 @@ A Survey of Automatic Personality Detection from Texts - SanjaStajner + SanjaStajner SerenYenikent 6284–6295 Personality profiling has long been used in psychology to predict life outcomes. Recently, automatic detection of personality traits from written messages has gained significant attention in computational linguistics and natural language processing communities, due to its applicability in various fields. In this survey, we show the trajectory of research towards automatic personality detection from purely psychology approaches, through psycholinguistics, to the recent purely natural language processing approaches on large datasets automatically extracted from social media. We point out what has been gained and what lost during that trajectory, and show what can be realistic expectations in the field. @@ -6748,7 +6748,7 @@ CongyingXia JianxinLi LifangHe - PhilipYu + PhilipYu 6302–6314 Review rating prediction of text reviews is a rapidly growing technology with a wide range of applications in natural language processing. However, most existing methods either use hand-crafted features or learn features using deep learning with simple text corpus as input for review rating prediction, ignoring the hierarchies among data. In this paper, we propose a Hierarchical bi-directional self-attention Network framework (HabNet) for paper review rating prediction and recommendation, which can serve as an effective decision-making tool for the academic paper review process. Specifically, we leverage the hierarchical structure of the paper reviews with three levels of encoders: sentence encoder (level one), intra-review encoder (level two) and inter-review encoder (level three). Each encoder first derives contextual representation of each level, then generates a higher-level representation, and after the learning process, we are able to identify useful predictors to make the final acceptance decision, as well as to help discover the inconsistency between numerical review ratings and text sentiment conveyed by reviewers. Furthermore, we introduce two new metrics to evaluate models in data imbalance situations. Extensive experiments on a publicly available dataset (PeerRead) and our own collected dataset (OpenReview) demonstrate the superiority of the proposed approach compared with state-of-the-art methods. 2020.coling-main.555 @@ -6788,7 +6788,7 @@ <fixed-case>XH</fixed-case>ate-999: Analyzing and Detecting Abusive Language Across Domains and Languages GoranGlavaš - Vanja MladenKaran + Vanja MladenKaran IvanVulić 6350–6365 We present XHate-999, a multi-domain and multilingual evaluation data set for abusive language detection. By aligning test instances across six typologically diverse languages, XHate-999 for the first time allows for disentanglement of the domain transfer and language transfer effects in abusive language detection. We conduct a series of domain- and language-transfer experiments with state-of-the-art monolingual and multilingual transformer models, setting strong baseline results and profiling XHate-999 as a comprehensive evaluation resource for abusive language detection. Finally, we show that domain- and language-adaption, via intermediate masked language modeling on abusive corpora in the target language, can lead to substantially improved abusive language detection in the target language in the zero-shot transfer setups. @@ -6816,7 +6816,7 @@ JingLi BaohuaZhou QiZhang - XuanjingHuang + XuanjingHuang 6377–6387 Previous work for rumor resolution concentrates on exploiting time-series characteristics or modeling topology structure separately. However, how local interactive pattern affects global information assemblage has not been explored. In this paper, we attempt to address the problem by learning evolution of message interaction. We model confrontation and reciprocity between message pairs via discrete variational autoencoders which effectively reflects the diversified opinion interactivity. Moreover, we capture the variation of message interaction using a hierarchical framework to better integrate information flow of a rumor cascade. Experiments on PHEME dataset demonstrate our proposed model achieves higher accuracy than existing methods. 2020.coling-main.561 @@ -6862,7 +6862,7 @@ Graph Convolution over Multiple Dependency Sub-graphs for Relation Extraction - AngroshMandya + AngroshMandya DanushkaBollegala FransCoenen 6424–6435 @@ -6890,7 +6890,7 @@ Multi-choice Relational Reasoning for Machine Reading Comprehension WuyaChen XiaojunQuan - ChunyuKit + ChunyuKit ZhengchengMin JiahaiWang 6448–6458 @@ -6904,7 +6904,7 @@ ShuaiPang JianqiangMa ZeyuYan - YangZhang + YangZhang JianpingShen 6459–6469 Recently, pre-trained language models such as BERT have shown state-of-the-art accuracies in text matching. When being applied to IR (or QA), the BERT-based matching models need to online calculate the representations and interactions for all query-candidate pairs. The high inference cost has prohibited the deployments of BERT-based matching models in many practical applications. To address this issue, we propose a novel BERT-based text matching model, in which the representations and the interactions are decoupled. Then, the representations of the candidates can be calculated and stored offline, and directly retrieved during the online matching phase. To conduct the interactions and generate final matching scores, a lightweight attention network is designed. Experiments based on several large scale text matching datasets show that the proposed model, called FASTMATCH, can achieve up to 100X speed-up to BERT and RoBERTa at the online matching phase, while keeping more up to 98.7% of the performance. @@ -6950,7 +6950,7 @@ <fixed-case>NYTWIT</fixed-case>: A Dataset of Novel Words in the <fixed-case>N</fixed-case>ew <fixed-case>Y</fixed-case>ork <fixed-case>T</fixed-case>imes YuvalPinter - Cassandra L.Jacobs + Cassandra L.Jacobs MaxBittker 6509–6515 We present the New York Times Word Innovation Types dataset, or NYTWIT, a collection of over 2,500 novel English words published in the New York Times between November 2017 and March 2019, manually annotated for their class of novelty (such as lexical derivation, dialectal variation, blending, or compounding). We present baseline results for both uncontextual and contextual prediction of novelty class, showing that there is room for improvement even for state-of-the-art NLP systems. We hope this resource will prove useful for linguists and NLP practitioners by providing a real-world environment of novel word appearance. @@ -6974,7 +6974,7 @@ Continual Lifelong Learning in Natural Language Processing: A Survey MagdalenaBiesialska KatarzynaBiesialska - Marta R.Costa-jussà + Marta R.Costa-jussà 6523–6541 Continual learning (CL) aims to enable information systems to learn from a continuous data stream across time. However, it is difficult for existing deep learning architectures to learn a new task without largely forgetting previously acquired knowledge. Furthermore, CL is particularly challenging for language learning, as natural language is ambiguous: it is discrete, compositional, and its meaning is context-dependent. In this work, we look at the problem of CL through the lens of various NLP tasks. Our survey discusses major challenges in CL and current methods applied in neural network models. We also provide a critical review of the existing CL evaluation methods and datasets in NLP. Finally, we present our outlook on future research directions. 2020.coling-main.574 @@ -6986,7 +6986,7 @@ EmilyÖhman MarcPàmies KaislaKajava - JörgTiedemann + JörgTiedemann 6542–6552 We introduce XED, a multilingual fine-grained emotion dataset. The dataset consists of human-annotated Finnish (25k) and English sentences (30k), as well as projected annotations for 30 additional languages, providing new resources for many low-resource languages. We use Plutchik’s core emotions to annotate the dataset with the addition of neutral to create a multilabel multiclass dataset. The dataset is carefully evaluated using language-specific BERT models and SVMs to show that XED performs on par with other similar datasets and is therefore a useful tool for sentiment analysis and emotion detection. 2020.coling-main.575 @@ -6997,9 +6997,9 @@ Human or Neural Translation? ShivendraBhardwaj DavidAlfonso Hermelo - PhillippeLanglais + PhillippeLanglais GabrielBernier-Colborne - CyrilGoutte + CyrilGoutte MichelSimard 6553–6564 Deep neural models tremendously improved machine translation. In this context, we investigate whether distinguishing machine from human translations is still feasible. We trained and applied 18 classifiers under two settings: a monolingual task, in which the classifier only looks at the translation; and a bilingual task, in which the source text is also taken into consideration. We report on extensive experiments involving 4 neural MT systems (Google Translate, DeepL, as well as two systems we trained) and varying the domain of texts. We show that the bilingual task is the easiest one and that transfer-based deep-learning classifiers perform best, with mean accuracies around 85% in-domain and 75% out-of-domain . @@ -7023,7 +7023,7 @@ Domain-Specific Sentiment Lexicons Induced from Labeled Documents SM MazharulIslam - XinDong + XinDong Gerardde Melo 6576–6587 Sentiment analysis is an area of substantial relevance both in industry and in academia, including for instance in social studies. Although supervised learning algorithms have advanced considerably in recent years, in many settings it remains more practical to apply an unsupervised technique. The latter are oftentimes based on sentiment lexicons. However, existing sentiment lexicons reflect an abstract notion of polarity and do not do justice to the substantial differences of word polarities between different domains. In this work, we draw on a collection of domain-specific data to induce a set of 24 domain-specific sentiment lexicons. We rely on initial linear models to induce initial word intensity scores, and then train new deep models based on word vector representations to overcome the scarcity of the original seed data. Our analysis shows substantial differences between domains, which make domain-specific sentiment lexicons a promising form of lexical resource in downstream tasks, and the predicted lexicons indeed perform effectively on tasks such as review classification and cross-lingual word sentiment prediction. @@ -7048,7 +7048,7 @@ XanhHo Anh-KhoaDuong Nguyen SakuSugawara - AkikoAizawa + AkikoAizawa 6609–6625 A multi-hop question answering (QA) dataset aims to test reasoning and inference skills by requiring a model to read multiple paragraphs to answer a given question. However, current datasets do not provide a complete explanation for the reasoning process from the question to the answer. Further, previous studies revealed that many examples in existing multi-hop datasets do not require multi-hop reasoning to answer a question. In this study, we present a new multi-hop QA dataset, called 2WikiMultiHopQA, which uses structured and unstructured data. In our dataset, we introduce the evidence information containing a reasoning path for multi-hop questions. The evidence information has two benefits: (i) providing a comprehensive explanation for predictions and (ii) evaluating the reasoning skills of a model. We carefully design a pipeline and a set of templates when generating a question-answer pair that guarantees the multi-hop steps and the quality of the questions. We also exploit the structured format in Wikidata and use logical rules to create questions that are natural but still require multi-hop reasoning. Through experiments, we demonstrate that our dataset is challenging for multi-hop models and it ensures that multi-hop reasoning is required. 2020.coling-main.580 @@ -7079,7 +7079,7 @@ <fixed-case>D</fixed-case>a<fixed-case>N</fixed-case>+: <fixed-case>D</fixed-case>anish Nested Named Entities and Lexical Normalization - BarbaraPlank + BarbaraPlank Kristian NørgaardJensen Robvan der Goot 6649–6662 @@ -7128,7 +7128,7 @@ Abhinav ReddyAppidi Vamshi KrishnaSrirangam DarsiSuhas - ManishShrivastava + ManishShrivastava 6703–6709 Emotion prediction is a critical task in the field of Natural Language Processing (NLP). There has been a significant amount of work done in emotion prediction for resource-rich languages. There has been work done on code-mixed social media corpus but not on emotion prediction of Kannada-English code-mixed Twitter data. In this paper, we analyze the problem of emotion prediction on corpus obtained from code-mixed Kannada-English extracted from Twitter annotated with their respective ‘Emotion’ for each tweet. We experimented with machine learning prediction models using features like Character N-Grams, Word N-Grams, Repetitive characters, and others on SVM and LSTM on our corpus, which resulted in an accuracy of 30% and 32% respectively. 2020.coling-main.587 @@ -7177,7 +7177,7 @@ Multilingual Neural <fixed-case>RST</fixed-case> Discourse Parsing ZhengyuanLiu KeShi - NancyChen + NancyChen 6730–6738 Text discourse parsing plays an important role in understanding information flow and argumentative structure in natural language. Previous research under the Rhetorical Structure Theory (RST) has mostly focused on inducing and evaluating models from the English treebank. However, the parsing tasks for other languages such as German, Dutch, and Portuguese are still challenging due to the shortage of annotated data. In this work, we investigate two approaches to establish a neural, cross-lingual discourse parser via: (1) utilizing multilingual vector representations; and (2) adopting segment-level translation of the source content. Experiment results show that both methods are effective even with limited training data, and achieve state-of-the-art performance on cross-lingual, document-level discourse parsing on all sub-tasks. 2020.coling-main.591 @@ -7217,7 +7217,7 @@ Statistical Parsing of Tree Wrapping Grammars TatianaBladier - JakubWaszczuk + JakubWaszczuk LauraKallmeyer 6759–6766 We describe an approach to statistical parsing with Tree-Wrapping Grammars (TWG). TWG is a tree-rewriting formalism which includes the tree-combination operations of substitution, sister-adjunction and tree-wrapping substitution. TWGs can be extracted from constituency treebanks and aim at representing long distance dependencies (LDDs) in a linguistically adequate way. We present a parsing algorithm for TWGs based on neural supertagging and A* parsing. We extract a TWG for English from the treebanks for Role and Reference Grammar and discuss first parsing results with this grammar. @@ -7233,7 +7233,7 @@ NurulLubis MarcoMoresi Carelvan Niekerk - MilicaGasic + MilicaGasic 6767–6774 Dialog state tracking (DST) suffers from severe data sparsity. While many natural language processing (NLP) tasks benefit from transfer learning and multi-task learning, in dialog these methods are limited by the amount of available data and by the specificity of dialog applications. In this work, we successfully utilize non-dialog data from unrelated NLP tasks to train dialog state trackers. This opens the door to the abundance of unrelated NLP corpora to mitigate the data sparsity issue inherent to DST. 2020.coling-main.596 @@ -7244,7 +7244,7 @@ Resource Constrained Dialog Policy Learning Via Differentiable Inductive Logic Programming ZhenpengZhou AhmadBeirami - PaulCrook + PaulCrook PararthShah RajenSubba AlborzGeramifard @@ -7314,7 +7314,7 @@ Neural Unsupervised Domain Adaptation in <fixed-case>NLP</fixed-case>—<fixed-case>A</fixed-case> Survey AlanRamponi - BarbaraPlank + BarbaraPlank 6838–6855 Deep neural networks excel at learning from labeled data and achieve state-of-the-art results on a wide array of Natural Language Processing tasks. In contrast, learning from unlabeled data, especially under domain shift, remains a challenge. Motivated by the latest advances, in this survey we review neural unsupervised domain adaptation techniques which do not require labeled target domain data. This is a more challenging yet a more widely applicable setup. We outline methods, from early traditional non-neural methods to pre-trained model transfer. We also revisit the notion of domain, and we uncover a bias in the type of Natural Language Processing tasks which received most attention. Lastly, we outline future directions, particularly the broader need for out-of-distribution generalization of future NLP. 2020.coling-main.603 @@ -7326,7 +7326,7 @@ CharlesWelch Jonathan K.Kummerfeld VerónicaPérez-Rosas - RadaMihalcea + RadaMihalcea 6856–6862 In this paper, we introduce personalized word embeddings, and examine their value for language modeling. We compare the performance of our proposed prediction model when using personalized versus generic word representations, and study how these representations can be leveraged for improved performance. We provide insight into what types of words can be more accurately predicted when building personalized models. Our results show that a subset of words belonging to specific psycholinguistic categories tend to vary more in their representations across users and that combining generic and personalized word embeddings yields the best performance, with a 4.7% relative reduction in perplexity. Additionally, we show that a language model using personalized word embeddings can be effectively used for authorship attribution. 2020.coling-main.604 @@ -7386,8 +7386,8 @@ OlivierFerret ThomasLavergne HiroshiNoji - PierreZweigenbaum - Jun’ichiTsujii + PierreZweigenbaum + Jun’ichiTsujii 6903–6915 Due to the compelling improvements brought by BERT, many recent representation models adopted the Transformer architecture as their main building block, consequently inheriting the wordpiece tokenization system despite it not being intrinsically linked to the notion of Transformers. While this system is thought to achieve a good balance between the flexibility of characters and the efficiency of full words, using predefined wordpiece vocabularies from the general domain is not always suitable, especially when building models for specialized domains (e.g., the medical domain). Moreover, adopting a wordpiece tokenization shifts the focus from the word level to the subword level, making the models conceptually more complex and arguably less convenient in practice. For these reasons, we propose CharacterBERT, a new variant of BERT that drops the wordpiece system altogether and uses a Character-CNN module instead to represent entire words by consulting their characters. We show that this new model improves the performance of BERT on a variety of medical domain tasks while at the same time producing robust, word-level, and open-vocabulary representations. 2020.coling-main.609 @@ -7398,7 +7398,7 @@ Autoregressive Reasoning over Chains of Facts with Transformers RubenCartuyvels GrahamSpinks - Marie-FrancineMoens + Marie-FrancineMoens 6916–6930 This paper proposes an iterative inference algorithm for multi-hop explanation regeneration, that retrieves relevant factual evidence in the form of text snippets, given a natural language question and its answer. Combining multiple sources of evidence or facts for multi-hop reasoning becomes increasingly hard when the number of sources needed to make an inference grows. Our algorithm copes with this by decomposing the selection of facts from a corpus autoregressively, conditioning the next iteration on previously selected facts. This allows us to use a pairwise learning-to-rank loss. We validate our method on datasets of the TextGraphs 2019 and 2020 Shared Tasks for explanation regeneration. Existing work on this task either evaluates facts in isolation or artificially limits the possible chains of facts, thus limiting multi-hop inference. We demonstrate that our algorithm, when used with a pre-trained transformer model, outperforms the previous state-of-the-art in terms of precision, training time and inference efficiency. 2020.coling-main.610 @@ -7412,7 +7412,7 @@ AniketDidolkar DiJin RamitSawhney - Rajiv RatnShah + Rajiv RatnShah 6931–6936 Models with a large number of parameters are prone to over-fitting and often fail to capture the underlying input distribution. We introduce Emix, a data augmentation method that uses interpolations of word embeddings and hidden layer representations to construct virtual examples. We show that Emix shows significant improvements over previously used interpolation based regularizers and data augmentation techniques. We also demonstrate how our proposed method is more robust to sparsification. We highlight the merits of our proposed methodology by performing thorough quantitative and qualitative assessments. 2020.coling-main.611 @@ -7424,7 +7424,7 @@ ZhihongLei WeiyueWang ChristianDugast - HermannNey + HermannNey 6937–6941 Named entity recognition is a key component in various natural language processing systems, and neural architectures provide significant improvements over conventional approaches. Regardless of different word embedding and hidden layer structures of the networks, a conditional random field layer is commonly used for the output. This work proposes to use a neural language model as an alternative to the conditional random field layer, which is more flexible for the size of the corpus. Experimental results show that the proposed system has a significant advantage in terms of training speed, with a marginal performance degradation. 2020.coling-main.612 @@ -7487,9 +7487,9 @@ Demo Application for the <fixed-case>A</fixed-case>uto<fixed-case>GOAL</fixed-case> Framework SuilanEstevez-Velarde AlejandroPiad-Morffis - YoanGutiérrez - AndresMontoyo - RafaelMuñoz-Guillena + YoanGutiérrez + AndresMontoyo + RafaelMuñoz-Guillena YudiviánAlmeida Cruz 18–22 This paper introduces a web demo that showcases the main characteristics of the AutoGOAL framework. AutoGOAL is a framework in Python for automatically finding the best way to solve a given task. It has been designed mainly for automatic machine learning(AutoML) but it can be used in any scenario where several possible strategies are available to solve a given computational task. In contrast with alternative frameworks, AutoGOAL can be applied seamlessly to Natural Language Processing as well as structured classification problems. This paper presents an overview of the framework’s design and experimental evaluation in several machine learning problems, including two recent NLP challenges. The accompanying software demo is available online (https://autogoal.github.io/demo) and full source code is provided under the MIT open-source license (https://autogoal.github.io). @@ -7499,7 +7499,7 @@ Fast Word Predictor for On-Device Application - Huy TienNguyen + Huy TienNguyen Khoi TuanNguyen Anh TuanNguyen Thanh Lac ThiTran @@ -7538,9 +7538,9 @@ MihaelaBornea KaziHasan RishavChakravarti - SalimRoukos - RaduFlorian - AviSil + SalimRoukos + RaduFlorian + AviSil 41–47 This paper presents M-GAAMA, a Multilingual Question Answering architecture and demo system. This is the first multilingual machine reading comprehension (MRC) demo which is able to answer questions in over 100 languages. M-GAAMA answers questions from a given passage in the same or different language. It incorporates several existing multilingual models that can be used interchangeably in the demo such as M-BERT and XLM-R. The M-GAAMA demo also improves language accessibility by incorporating the IBM Watson machine translation widget to provide additional capabilities to the user to see an answer in their desired language. We also show how M-GAAMA can be used in downstream tasks by incorporating it into an END-TO-END-QA system using CFO (Chakravarti et al., 2019). We experiment with our system architecture on the Multi-Lingual Question Answering (MLQA) and the COVID-19 CORD (Wang et al., 2020; Tang et al., 2020) datasets to provide insights into the performance of the system. 2020.coling-demos.8 @@ -7551,7 +7551,7 @@ <fixed-case>X</fixed-case>plai<fixed-case>NLI</fixed-case>: Explainable Natural Language Inference through Visual Analytics Aikaterini-LidaKalouli RitaSevastjanova - Valeriade Paiva + Valeriade Paiva RichardCrouch MennatallahEl-Assady 48–52 @@ -7565,7 +7565,7 @@ LucaLugini ChristopherOlshefski RavneetSingh - DianeLitman + DianeLitman AmandaGodley 53–58 Teaching collaborative argumentation is an advanced skill that many K-12 teachers struggle to develop. To address this, we have developed Discussion Tracker, a classroom discussion analytics system based on novel algorithms for classifying argument moves, specificity, and collaboration. Results from a classroom deployment indicate that teachers found the analytics useful, and that the underlying classifiers perform with moderate to substantial agreement with humans. @@ -7648,7 +7648,7 @@ Proceedings of the 28th International Conference on Computational Linguistics: Tutorial Abstracts LuciaSpecia - DanielBeck + DanielBeck International Committee for Computational Linguistics
Barcelona, Spain (Online)
December @@ -7674,7 +7674,7 @@
Embeddings in Natural Language Processing - JoseCamacho-Collados + JoseCamacho-Collados Mohammad TaherPilehvar 10–15 Embeddings have been one of the most important topics of interest in NLP for the past decade. Representing knowledge through a low-dimensional vector which is easily integrable in modern machine learning models has played a central role in the development of the field. Embedding techniques initially focused on words but the attention soon started to shift to other forms. This tutorial will provide a high-level synthesis of the main embedding techniques in NLP, in the broad sense. We will start by conventional word embeddings (e.g., Word2Vec and GloVe) and then move to other types of embeddings, such as sense-specific and graph alternatives. We will finalize with an overview of the trending contextualized representations (e.g., ELMo and BERT) and explain their potential and impact in NLP. @@ -7695,7 +7695,7 @@ Detection and Resolution of Rumors and Misinformation with <fixed-case>NLP</fixed-case> - LeonDerczynski + LeonDerczynski ArkaitzZubiaga 22–26 Detecting and grounding false and misleading claims on the web has grown to form a substantial sub-field of NLP. The sub-field addresses problems at multiple different levels of misinformation detection: identifying check-worthy claims; tracking claims and rumors; rumor collection and annotation; grounding claims against knowledge bases; using stance to verify claims; and applying style analysis to detect deception. This half-day tutorial presents the theory behind each of these steps as well as the state-of-the-art solutions. @@ -7804,7 +7804,7 @@ Semantic Diversity for Natural Language Understanding Evaluation in Dialog Systems EnricoPalumbo AndreaMezzalira - CristinaMarco + CristinaMarco AlessandroManzotti DanieleAmberti 44–49 @@ -7841,7 +7841,7 @@ AnujKumar ShawnMei KarthikMohan - MichaelWhite + MichaelWhite 64–77 Natural language generation (NLG) is a critical component in conversational systems, owing to its role of formulating a correct and natural text response. Traditionally, NLG components have been deployed using template-based solutions. Although neural network solutions recently developed in the research community have been shown to provide several benefits, deployment of such model-based solutions has been challenging due to high latency, correctness issues, and high data needs. In this paper, we present approaches that have helped us deploy data-efficient neural solutions for NLG in conversational systems to production. We describe a family of sampling and modeling techniques to attain production quality with light-weight neural network models using only a fraction of the data that would be necessary otherwise, and show a thorough comparison between each. Our results show that domain complexity dictates the appropriate approach to achieve high data efficiency. Finally, we distill the lessons from our experimental findings into a list of best practices for production-level NLG model development, and present them in a brief runbook. Importantly, the end products of all of the techniques are small sequence-to-sequence models (~2Mb) that we can reliably deploy in production. These models achieve the same quality as large pretrained models (~1Gb) as judged by human raters. 2020.coling-industry.7 @@ -7867,9 +7867,9 @@ AnthonyFerritto BhavaniIyer LinPan - RaduFlorian - SalimRoukos - AviSil + RaduFlorian + SalimRoukos + AviSil 90–101 Industry-scale NLP systems necessitate two features. 1. Robustness: “zero-shot transfer learning” (ZSTL) performance has to be commendable and 2. Efficiency: systems have to train efficiently and respond instantaneously. In this paper, we introduce the development of a production model called GAAMA (Go Ahead Ask Me Anything) which possess the above two characteristics. For robustness, it trains on the recently introduced Natural Questions (NQ) dataset. NQ poses additional challenges over older datasets like SQuAD: (a) QA systems need to read and comprehend an entire Wikipedia article rather than a small passage, and (b) NQ does not suffer from observation bias during construction, resulting in less lexical overlap between the question and the article. GAAMA consists of Attention-over-Attention, diversity among attention heads, hierarchical transfer learning, and synthetic data augmentation while being computationally inexpensive. Building on top of the powerful BERTQA model, GAAMA provides a ∼2.0% absolute boost in F1 over the industry-scale state-of-the-art (SOTA) system on NQ. Further, we show that GAAMA transfers zero-shot to unseen real life and important domains as it yields respectable performance on two benchmarks: the BioASQ and the newly introduced CovidQA datasets. 2020.coling-industry.9 @@ -7914,7 +7914,7 @@ hinglish<fixed-case>N</fixed-case>orm - A Corpus of <fixed-case>H</fixed-case>indi-<fixed-case>E</fixed-case>nglish Code Mixed Sentences for Text Normalization PiyushMakhija - AnkitKumar + AnkitKumar AnujGupta 136–145 We present hinglishNorm - a human annotated corpus of Hindi-English code-mixed sentences for text normalization task. Each sentence in the corpus is aligned to its corresponding human annotated normalized form. To the best of our knowledge, there is no corpus of Hindi-English code-mixed sentences for text normalization task that is publicly available. Our work is the first attempt in this direction. The corpus contains 13494 segments annotated for text normalization. Further, we present baseline normalization results on this corpus. We obtain a Word Error Rate (WER) of 15.55, BiLingual Evaluation Understudy (BLEU) score of 71.2, and Metric for Evaluation of Translation with Explicit ORdering (METEOR) score of 0.50. @@ -7925,7 +7925,7 @@ Assessing Social License to Operate from the Public Discourse on Social Media ChangXu - CecileParis + CecileParis RossSparks SuryaNepal KeithVanderLinden @@ -7937,7 +7937,7 @@ Extreme Model Compression for On-device Natural Language Understanding - KanthashreeMysore Sathyendra + KanthashreeMysore Sathyendra SamridhiChoudhary LeahNicolich-Henkin 160–171 @@ -7950,10 +7950,10 @@ Scalable Cross-lingual Treebank Synthesis for Improved Production Dependency Parsers YousefEl-Kurdi HiroshiKanayama - EfsunSarioglu Kayi + EfsunSarioglu Kayi VittorioCastelli ToddWard - RaduFlorian + RaduFlorian 172–178 We present scalable Universal Dependency (UD) treebank synthesis techniques that exploit advances in language representation modeling which leverage vast amounts of unlabeled general-purpose multilingual text. We introduce a data augmentation technique that uses synthetic treebanks to improve production-grade parsers. The synthetic treebanks are generated using a state-of-the-art biaffine parser adapted with pretrained Transformer models, such as Multilingual BERT (M-BERT). The new parser improves LAS by up to two points on seven languages. The production models’ LAS performance improves as the augmented treebanks scale in size, surpassing performance of production models trained on originally annotated UD treebanks. 2020.coling-industry.16 @@ -8004,7 +8004,7 @@ VishwasSuryanarayanan ChalaFufa PamelaBhattacharya - CharlesLee + CharlesLee 214–227 A prominent problem faced by conversational agents working with large documents (Eg: email-based assistants) is the frequent presence of information in the document that is irrelevant to the assistant. This in turn makes it harder for the agent to accurately detect intents, extract entities relevant to those intents and perform the desired action. To address this issue we present a neural model for scoping relevant information for the agent from a large document. We show that when used as the first step in a popularly used email-based assistant for helping users schedule meetings, our proposed model helps improve the performance of the intent detection and entity extraction tasks required by the agent for correctly scheduling meetings: across a suite of 6 downstream tasks, by using our proposed method, we observe an average gain of 35% in precision without any drop in recall. Additionally, we demonstrate that the same approach can be used for component level analysis in large documents, such as signature block identification. 2020.coling-industry.20 @@ -8015,8 +8015,8 @@ Uncertainty Modeling for Machine Comprehension Systems using Efficient <fixed-case>B</fixed-case>ayesian Neural Networks ZhengyuanLiu PavitraKrishnaswamy - Ai TiAw - NancyChen + Ai TiAw + NancyChen 228–235 While neural approaches have achieved significant improvement in machine comprehension tasks, models often work as a black-box, resulting in lower interpretability, which requires special attention in domains such as healthcare or education. Quantifying uncertainty helps pave the way towards more interpretable neural networks. In classification and regression tasks, Bayesian neural networks have been effective in estimating model uncertainty. However, inference time increases linearly due to the required sampling process in Bayesian neural networks. Thus speed becomes a bottleneck in tasks with high system complexity such as question-answering or dialogue generation. In this work, we propose a hybrid neural architecture to quantify model uncertainty using Bayesian weight approximation but boosts up the inference speed by 80% relative at test time, and apply it for a clinical dialogue comprehension task. The proposed approach is also used to enable active learning so that an updated model can be trained more optimally with new incoming data by selecting samples that are not well-represented in the current training scheme. 2020.coling-industry.21 diff --git a/data/xml/2020.computerm.xml b/data/xml/2020.computerm.xml index 26db91e0b0..fed6bde2c2 100644 --- a/data/xml/2020.computerm.xml +++ b/data/xml/2020.computerm.xml @@ -3,7 +3,7 @@ Proceedings of the 6th International Workshop on Computational Terminology - BéatriceDaille + BéatriceDaille KyoKageura Ayla RigoutsTerryn European Language Resources Association @@ -21,7 +21,7 @@ Automatic Term Extraction from Newspaper Corpora: Making the Most of Specificity and Common Features PatrickDrouin Jean-BenoîtMorel - Marie-ClaudeL’ Homme + Marie-ClaudeL’ Homme 1–7 The first step of any terminological work is to setup a reliable, specialized corpus composed of documents written by specialists and then to apply automatic term extraction (ATE) methods to this corpus in order to retrieve a first list of potential terms. In this paper, the experiment we describe differs quite drastically from this usual process since we are applying ATE to unspecialized corpora. The corpus used for this study was built from newspaper articles retrieved from the Web using a short list of keywords. The general intuition on which this research is based is that ATE based corpus comparison techniques can be used to capture both similarities and dissimilarities between corpora. The former are exploited through a termhood measure and the latter through word embeddings. Our initial results were validated manually and show that combining a traditional ATE method that focuses on dissimilarities between corpora to newer methods that exploit similarities (more specifically distributional features of candidates) leads to promising results. 2020.computerm-1.1 @@ -101,7 +101,7 @@ VidPodpečan DraganaMiljkovic UrošStepišnik - ŠpelaVintar + ŠpelaVintar 55–61 We present the NetViz terminology visualization tool and apply it to the domain modeling of karstology, a subfield of geography studying karst phenomena. The developed tool allows for high-performance online network visualization where the user can upload the terminological data in a simple CSV format, define the nodes (terms, categories), edges (relations) and their properties (by assigning different node colors), and then edit and interactively explore domain knowledge in the form of a network. We showcase the usefulness of the tool on examples from the karstology domain, where in the first use case we visualize the domain knowledge as represented in a manually annotated corpus of domain definitions, while in the second use case we show the power of visualization for domain understanding by visualizing automatically extracted knowledge in the form of triplets extracted from the karstology domain corpus. The application is entirely web-based without any need for downloading or special configuration. The source code of the web application is also available under the permissive MIT license, allowing future extensions for developing new terminological applications. 2020.computerm-1.8 @@ -121,7 +121,7 @@ Supporting terminology extraction with dependency parses - MalgorzataMarciniak + MalgorzataMarciniak PiotrRychlik AgnieszkaMykowiecka 72–79 @@ -143,7 +143,7 @@ <fixed-case>T</fixed-case>erm<fixed-case>E</fixed-case>val 2020: Shared Task on Automatic Term Extraction Using the Annotated Corpora for Term Extraction Research (<fixed-case>ACTER</fixed-case>) Dataset AylaRigouts Terryn - VeroniqueHoste + VeroniqueHoste PatrickDrouin ElsLefever 85–94 diff --git a/data/xml/2020.conll.xml b/data/xml/2020.conll.xml index e49a592d3b..50b9507f2f 100644 --- a/data/xml/2020.conll.xml +++ b/data/xml/2020.conll.xml @@ -3,7 +3,7 @@ Proceedings of the 24th Conference on Computational Natural Language Learning - RaquelFernández + RaquelFernández TalLinzen Association for Computational Linguistics
Online
@@ -43,7 +43,7 @@ Neural Proof Nets KonstantinosKogkalidis - MichaelMoortgat + MichaelMoortgat RichardMoot 26–40 Linear logic and the linear λ-calculus have a long standing tradition in the study of natural language form and meaning. Among the proof calculi of linear logic, proof nets are of particular interest, offering an attractive geometric representation of derivations that is unburdened by the bureaucratic complications of conventional prooftheoretic formats. Building on recent advances in set-theoretic learning, we propose a neural variant of proof nets based on Sinkhorn networks, which allows us to translate parsing as the problem of extracting syntactic primitives and permuting them into alignment. Our methodology induces a batch-efficient, end-to-end differentiable architecture that actualizes a formally grounded yet highly efficient neuro-symbolic parser. We test our approach on ÆThel, a dataset of type-logical derivations for written Dutch, where it manages to correctly transcribe raw text sentences into proofs and terms of the linear λ-calculus with an accuracy of as high as 70%. @@ -113,7 +113,7 @@ Understanding the Source of Semantic Regularities in Word Embeddings Hsiao-YuChiang - JoseCamacho-Collados + JoseCamacho-Collados ZacharyPardos 119–131 Semantic relations are core to how humans understand and express concepts in the real world using language. Recently, there has been a thread of research aimed at modeling these relations by learning vector representations from text corpora. Most of these approaches focus strictly on leveraging the co-occurrences of relationship word pairs within sentences. In this paper, we investigate the hypothesis that examples of a lexical relation in a corpus are fundamental to a neural word embedding’s ability to complete analogies involving the relation. Our experiments, in which we remove all known examples of a relation from training corpora, show only marginal degradation in analogy completion performance involving the removed relation. This finding enhances our understanding of neural word embeddings, showing that co-occurrence information of a particular semantic relation is not the main source of their structural regularity. @@ -125,7 +125,7 @@ Finding The Right One and Resolving it PayalKhullar ArghyaBhattacharya - ManishShrivastava + ManishShrivastava 132–141 One-anaphora has figured prominently in theoretical linguistic literature, but computational linguistics research on the phenomenon is sparse. Not only that, the long standing linguistic controversy between the determinative and the nominal anaphoric element one has propagated in the limited body of computational work on one-anaphora resolution, making this task harder than it is. In the present paper, we resolve this by drawing from an adequate linguistic analysis of the word one in different syntactic environments - once again highlighting the significance of linguistic theory in Natural Language Processing (NLP) tasks. We prepare an annotated corpus marking actual instances of one-anaphora with their textual antecedents, and use the annotations to experiment with state-of-the art neural models for one-anaphora resolution. Apart from presenting a strong neural baseline for this task, we contribute a gold-standard corpus, which is, to the best of our knowledge, the biggest resource on one-anaphora till date. 2020.conll-1.10 @@ -135,7 +135,7 @@ Bridging Information-Seeking Human Gaze and Machine Reading Comprehension JonathanMalmaud - RogerLevy + RogerLevy YevgeniBerzak 142–152 In this work, we analyze how human gaze during reading comprehension is conditioned on the given reading comprehension question, and whether this signal can be beneficial for machine reading comprehension. To this end, we collect a new eye-tracking dataset with a large number of participants engaging in a multiple choice reading comprehension task. Our analysis of this data reveals increased fixation times over parts of the text that are most relevant for answering the question. Motivated by this finding, we propose making automated reading comprehension more human-like by mimicking human information-seeking reading behavior during reading comprehension. We demonstrate that this approach leads to performance gains on multiple choice question answering in English for a state-of-the-art reading comprehension model. @@ -147,7 +147,7 @@ A Corpus of Very Short Scientific Summaries YifanChen TamaraPolajnar - ColinBatchelor + ColinBatchelor SimoneTeufel 153–164 We present a new summarisation task, taking scientific articles and producing journal table-of-contents entries in the chemistry domain. These are one- or two-sentence author-written summaries that present the key findings of a paper. This is a first look at this summarisation task with an open access publication corpus consisting of titles and abstracts, as input texts, and short author-written advertising blurbs, as the ground truth. We introduce the dataset and evaluate it with state-of-the-art summarisation methods. @@ -158,7 +158,7 @@ Recurrent babbling: evaluating the acquisition of grammar from limited input data LudovicaPannitto - AurélieHerbelot + AurélieHerbelot 165–176 Recurrent Neural Networks (RNNs) have been shown to capture various aspects of syntax from raw linguistic input. In most previous experiments, however, learning happens over unrealistic corpora, which do not reflect the type and amount of data a child would be exposed to. This paper remedies this state of affairs by training an LSTM over a realistically sized subset of child-directed input. The behaviour of the network is analysed over time using a novel methodology which consists in quantifying the level of grammatical abstraction in the model’s generated output (its ‘babbling’), compared to the language it has been exposed to. We show that the LSTM indeed abstracts new structures as learning proceeds. 2020.conll-1.13 @@ -192,7 +192,7 @@ Identifying Incorrect Labels in the <fixed-case>C</fixed-case>o<fixed-case>NLL</fixed-case>-2003 Corpus - FrederickReiss + FrederickReiss HongXu BryanCutler KarthikMuthuraman @@ -226,7 +226,7 @@ Relations between comprehensibility and adequacy errors in machine translation output - MajaPopović + MajaPopović 256–264 This work presents a detailed analysis of translation errors perceived by readers as comprehensibility and/or adequacy issues. The main finding is that good comprehensibility, similarly to good fluency, can mask a number of adequacy errors. Of all major adequacy errors, 30% were fully comprehensible, thus fully misleading the reader to accept the incorrect information. Another 25% of major adequacy errors were perceived as almost comprehensible, thus being potentially misleading. Also, a vast majority of omissions (about 70%) is hidden by comprehensibility. Further analysis of misleading translations revealed that the most frequent error types are ambiguity, mistranslation, noun phrase error, word-by-word translation, untranslated word, subject-verb agreement, and spelling error in the source text. However, none of these error types appears exclusively in misleading translations, but are also frequent in fully incorrect (incomprehensible inadequate) and discarded correct (incomprehensible adequate) translations. Deeper analysis is needed to potentially detect underlying phenomena specifically related to misleading translations. 2020.conll-1.19 @@ -259,7 +259,7 @@ Catplayinginthesnow: Impact of Prior Segmentation on a Model of Visually Grounded Speech WilliamHavard - LaurentBesacier + LaurentBesacier Jean-PierreChevrot 291–301 The language acquisition literature shows that children do not build their lexicon by segmenting the spoken input into phonemes and then building up words from them, but rather adopt a top-down approach and start by segmenting word-like units and then break them down into smaller units. This suggests that the ideal way of learning a language is by starting from full semantic units. In this paper, we investigate if this is also the case for a neural model of Visually Grounded Speech trained on a speech-image retrieval task. We evaluated how well such a network is able to learn a reliable speech-to-image mapping when provided with phone, syllable, or word boundary information. We present a simple way to introduce such information into an RNN-based model and investigate which type of boundary is the most efficient. We also explore at which level of the network’s architecture such information should be introduced so as to maximise its performances. Finally, we show that using multiple boundary types at once in a hierarchical structure, by which low-level segments are used to recompose high-level segments, is beneficial and yields better results than using low-level or high-level segments in isolation. @@ -284,7 +284,7 @@ Representation Learning for Type-Driven Composition GijsWijnholds - MehrnooshSadrzadeh + MehrnooshSadrzadeh StephenClark 313–324 This paper is about learning word representations using grammatical type information. We use the syntactic types of Combinatory Categorial Grammar to develop multilinear representations, i.e. maps with n arguments, for words with different functional types. The multilinear maps of words compose with each other to form sentence representations. We extend the skipgram algorithm from vectors to multi- linear maps to learn these representations and instantiate it on unary and binary maps for transitive verbs. These are evaluated on verb and sentence similarity and disambiguation tasks and a subset of the SICK relatedness dataset. Our model performs better than previous type- driven models and is competitive with state of the art representation learning methods such as BERT and neural sentence encoders. @@ -296,7 +296,7 @@ Word Representations Concentrate and This is Good News! RomainCouillet Yagmur GizemCinar - EricGaussier + EricGaussier MuhammadImran 325–334 This article establishes that, unlike the legacy tf*idf representation, recent natural language representations (word embedding vectors) tend to exhibit a so-called concentration of measure phenomenon, in the sense that, as the representation size p and database size n are both large, their behavior is similar to that of large dimensional Gaussian random vectors. This phenomenon may have important consequences as machine learning algorithms for natural language data could be amenable to improvement, thereby providing new theoretical insights into the field of natural language processing. @@ -318,7 +318,7 @@ Re-solve it: simulating the acquisition of core semantic competences from small data - AurélieHerbelot + AurélieHerbelot 344–354 Many tasks are considered to be ‘solved’ in the computational linguistics literature, but the corresponding algorithms operate in ways which are radically different from human cognition. I illustrate this by coming back to the notion of semantic competence, which includes basic linguistic skills encompassing both referential phenomena and generic knowledge, in particular a) the ability to denote, b) the mastery of the lexicon, or c) the ability to model one’s language use on others. Even though each of those faculties has been extensively tested individually, there is still no computational model that would account for their joint acquisition under the conditions experienced by a human. In this paper, I focus on one particular aspect of this problem: the amount of linguistic data available to the child or machine. I show that given the first competence mentioned above (a denotation function), the other two can in fact be learned from very limited data (2.8M token), reaching state-of-the-art performance. I argue that both the nature of the data and the way it is presented to the system matter to acquisition. 2020.conll-1.27 @@ -371,7 +371,7 @@ Discourse structure interacts with reference but not syntax in neural language models ForrestDavis - Martenvan Schijndel + Martenvan Schijndel 396–407 Language models (LMs) trained on large quantities of text have been claimed to acquire abstract linguistic representations. Our work tests the robustness of these abstractions by focusing on the ability of LMs to learn interactions between different linguistic representations. In particular, we utilized stimuli from psycholinguistic studies showing that humans can condition reference (i.e. coreference resolution) and syntactic processing on the same discourse structure (implicit causality). We compared both transformer and long short-term memory LMs to find that, contrary to humans, implicit causality only influences LM behavior for reference, not syntax, despite model representations that encode the necessary discourse information. Our results further suggest that LM behavior can contradict not only learned representations of discourse but also syntactic agreement, pointing to shortcomings of standard language modeling. 2020.conll-1.32 @@ -384,7 +384,7 @@ RobertHawkins MinaeKwon DorsaSadigh - NoahGoodman + NoahGoodman 408–419 To communicate with new partners in new contexts, humans rapidly form new linguistic conventions. Recent neural language models are able to comprehend and produce the existing conventions present in their training data, but are not able to flexibly and interactively adapt those conventions on the fly as humans do. We introduce an interactive repeated reference task as a benchmark for models of adaptation in communication and propose a regularized continual learning framework that allows an artificial agent initialized with a generic language model to more accurately and efficiently communicate with a partner over time. We evaluate this framework through simulations on COCO and in real-time reference game experiments with human partners. 2020.conll-1.33 @@ -396,7 +396,7 @@ Diverse and Relevant Visual Storytelling with Scene Graph Embeddings XudongHong RakshithShetty - AsadSayeed + AsadSayeed KhushbooMehra VeraDemberg BerntSchiele @@ -408,11 +408,11 @@ Alleviating Digitization Errors in Named Entity Recognition for Historical Documents - EmanuelaBoros + EmanuelaBoros AhmedHamdi ElvysLinhares Pontes - Luis AdriánCabrera-Diego - Jose G.Moreno + Luis AdriánCabrera-Diego + Jose G.Moreno NicolasSidere AntoineDoucet 431–441 @@ -458,7 +458,7 @@ Filler-gaps that neural networks fail to generalize DebasmitaBhattacharya - Martenvan Schijndel + Martenvan Schijndel 486–495 It can be difficult to separate abstract linguistic knowledge in recurrent neural networks (RNNs) from surface heuristics. In this work, we probe for highly abstract syntactic constraints that have been claimed to govern the behavior of filler-gap dependencies across different surface constructions. For models to generalize abstract patterns in expected ways to unseen data, they must share representational features in predictable ways. We use cumulative priming to test for representational overlap between disparate filler-gap constructions in English and find evidence that the models learn a general representation for the existence of filler-gap dependencies. However, we find no evidence that the models learn any of the shared underlying grammatical constraints we tested. Our work raises questions about the degree to which RNN language models learn abstract linguistic representations. 2020.conll-1.39 @@ -495,7 +495,7 @@ LeenaShekhar HeeyoungKwon NiranjanBalasubramanian - NathanaelChambers + NathanaelChambers 520–530 Early work on narrative modeling used explicit plans and goals to generate stories, but the language generation itself was restricted and inflexible. Modern methods use language models for more robust generation, but often lack an explicit representation of the scaffolding and dynamics that guide a coherent narrative. This paper introduces a new model that integrates explicit narrative structure with neural language models, formalizing narrative modeling as a Switching Linear Dynamical System (SLDS). A SLDS is a dynamical system in which the latent dynamics of the system (i.e. how the state vector transforms over time) is controlled by top-level discrete switching variables. The switching variables represent narrative structure (e.g., sentiment or discourse states), while the latent state vector encodes information on the current state of the narrative. This probabilistic formulation allows us to control generation, and can be learned in a semi-supervised fashion using both labeled and unlabeled data. Additionally, we derive a Gibbs sampler for our model that can “fill in” arbitrary parts of the narrative, guided by the switching variables. Our filled-in (English language) narratives outperform several baselines on both automatic and human evaluations 2020.conll-1.42 @@ -529,7 +529,7 @@ Are Pretrained Language Models Symbolic Reasoners over Knowledge? NoraKassner BennoKrojer - HinrichSchütze + HinrichSchütze 552–564 How can pretrained language models (PLMs) learn factual knowledge from the training set? We investigate the two most important mechanisms: reasoning and memorization. Prior work has attempted to quantify the number of facts PLMs learn, but we present, using synthetic data, the first study that investigates the causal relation between facts present in training and facts learned by the PLM. For reasoning, we show that PLMs seem to learn to apply some symbolic reasoning rules correctly but struggle with others, including two-hop reasoning. Further analysis suggests that even the application of learned reasoning rules is flawed. For memorization, we identify schema conformity (facts systematically supported by other facts) and frequency as key factors for its success. 2020.conll-1.45 @@ -541,7 +541,7 @@ TanmayParekh EmilyAhn YuliaTsvetkov - Alan WBlack + Alan WBlack 565–577 Code-switching is a ubiquitous phenomenon in multilingual communities. Natural language technologies that wish to communicate like humans must therefore adaptively incorporate code-switching techniques when they are deployed in multilingual settings. To this end, we propose a Hindi-English human-machine dialogue system that elicits code-switching conversations in a controlled setting. It uses different code-switching agent strategies to understand how users respond and accommodate to the agent’s language choice. Through this system, we collect and release a new dataset CommonDost, comprising of 439 human-machine multilingual conversations. We adapt pre-defined metrics to discover linguistic accommodation from users to agents. Finally, we compare these dialogues with Spanish-English dialogues collected in a similar setting, and analyze the impact of linguistic and socio-cultural factors on code-switching patterns across the two language pairs. 2020.conll-1.46 @@ -566,7 +566,7 @@ TianyuLiu ZhengXin XiaoanDing - BaobaoChang + BaobaoChang ZhifangSui 596–608 The prior work on natural language inference (NLI) debiasing mainly targets at one or few known biases while not necessarily making the models more robust. In this paper, we focus on the model-agnostic debiasing strategies and explore how to (or is it possible to) make the NLI models robust to multiple distinct adversarial attacks while keeping or even strengthening the models’ generalization power. We firstly benchmark prevailing neural NLI models including pretrained ones on various adversarial datasets. We then try to combat distinct known biases by modifying a mixture of experts (MoE) ensemble method and show that it’s nontrivial to mitigate multiple NLI biases at the same time, and that model-level ensemble method outperforms MoE ensemble method. We also perform data augmentation including text swap, word substitution and paraphrase and prove its efficiency in combating various (though not all) adversarial attacks at the same time. Finally, we investigate several methods to merge heterogeneous training data (1.35M) and perform model ensembling, which are straightforward but effective to strengthen NLI models. @@ -578,7 +578,7 @@ Cloze Distillation: Improving Neural Language Models with Human Next-Word Prediction TiwalayoEisape NogaZaslavsky - RogerLevy + RogerLevy 609–619 Contemporary autoregressive language models (LMs) trained purely on corpus data have been shown to capture numerous features of human incremental processing. However, past work has also suggested dissociations between corpus probabilities and human next-word predictions. Here we evaluate several state-of-the-art language models for their match to human next-word predictions and to reading time behavior from eye movements. We then propose a novel method for distilling the linguistic information implicit in human linguistic predictions into pre-trained LMs: Cloze Distillation. We apply this method to a baseline neural LM and show potential improvement in reading time prediction and generalization to held-out human cloze data. 2020.conll-1.49 @@ -613,7 +613,7 @@ MichałPietruszka ŁukaszBorchmann JakubChłędowski - FilipGraliński + FilipGraliński 641–651 This paper investigates various Transformer architectures on the WikiReading Information Extraction and Machine Reading Comprehension dataset. The proposed dual-source model outperforms the current state-of-the-art by a large margin. Next, we introduce WikiReading Recycled - a newly developed public dataset, and the task of multiple-property extraction. It uses the same data as WikiReading but does not inherit its predecessor’s identified disadvantages. In addition, we provide a human-annotated test set with diagnostic subsets for a detailed analysis of model performance. 2020.conll-1.52 @@ -638,12 +638,12 @@ OmriAbend LashaAbzianidze JohanBos - JanHajič + JanHajič DanielHershcovich BinLi TimO'Gorman NianwenXue - DanielZeman + DanielZeman Association for Computational Linguistics
Online
November diff --git a/data/xml/2020.crac.xml b/data/xml/2020.crac.xml index 2f5058251b..cba85b24d9 100644 --- a/data/xml/2020.crac.xml +++ b/data/xml/2020.crac.xml @@ -6,7 +6,7 @@ MaciejOgrodniczuk VincentNg YuliaGrishina - SameerPradhan + SameerPradhan Association for Computational Linguistics
Barcelona, Spain (online)
December @@ -28,8 +28,8 @@
It’s absolutely divine! Can fine-grained sentiment analysis benefit from coreference resolution? - OrpheeDe Clercq - VeroniqueHoste + OrpheeDe Clercq + VeroniqueHoste 11–21 While it has been claimed that anaphora or coreference resolution plays an important role in opinion mining, it is not clear to what extent coreference resolution actually boosts performance, if at all. In this paper, we investigate the potential added value of coreference resolution for the aspect-based sentiment analysis of restaurant reviews in two languages, English and Dutch. We focus on the task of aspect category classification and investigate whether including coreference information prior to classification to resolve implicit aspect mentions is beneficial. Because coreference resolution is not a solved task in NLP, we rely on both automatically-derived and gold-standard coreference relations, allowing us to investigate the true upper bound. By training a classifier on a combination of lexical and semantic features, we show that resolving the coreferential relations prior to classification is beneficial in a joint optimization setup. However, this is only the case when relying on gold-standard relations and the result is more outspoken for English than for Dutch. When validating the optimal models, however, we found that only the Dutch pipeline is able to achieve a satisfying performance on a held-out test set and does so regardless of whether coreference information was included. 2020.crac-1.2 @@ -38,7 +38,7 @@ Anaphoric Zero Pronoun Identification: A Multilingual Approach AbdulrahmanAloraini - MassimoPoesio + MassimoPoesio 22–32 Pro-drop languages such as Arabic, Chinese, Italian or Japanese allow morphologically null but referential arguments in certain syntactic positions, called anaphoric zero-pronouns. Much NLP work on anaphoric zero-pronouns (AZP) is based on gold mentions, but models for their identification are a fundamental prerequisite for their resolution in real-life applications. Such identification requires complex language understanding and knowledge of real-world entities. Transfer learning models, such as BERT, have recently shown to learn surface, syntactic, and semantic information,which can be very useful in recognizing AZPs. We propose a BERT-based multilingual model for AZP identification from predicted zero pronoun positions, and evaluate it on the Arabic and Chinese portions of OntoNotes 5.0. As far as we know, this is the first neural network model of AZP identification for Arabic; and our approach outperforms the stateof-the-art for Chinese. Experiment results suggest that BERT implicitly encode information about AZPs through their surrounding context. 2020.crac-1.3 @@ -66,7 +66,7 @@ <fixed-case>T</fixed-case>wi<fixed-case>C</fixed-case>onv: A Coreference-annotated Corpus of <fixed-case>T</fixed-case>witter Conversations - BerfinAktaş + BerfinAktaş AnnalenaKohnert 47–54 This article introduces TwiConv, an English coreference-annotated corpus of microblog conversations from Twitter. We describe the corpus compilation process and the annotation scheme, and release the corpus publicly, along with this paper. We manually annotated nominal coreference in 1756 tweets arranged in 185 conversation threads. The annotation achieves satisfactory annotation agreement results. We also present a new method for mapping the tweet contents with distributed stand-off annotations, which can easily be adapted to different annotation tasks. @@ -75,7 +75,7 @@ Integrating knowledge graph embeddings to improve mention representation for bridging anaphora resolution - OnkarPandit + OnkarPandit PascalDenis LivaRalaivola 55–67 @@ -102,8 +102,8 @@ Partially-supervised Mention Detection - LeslyMiculicich - JamesHenderson + LeslyMiculicich + JamesHenderson 91–98 Learning to detect entity mentions without using syntactic information can be useful for integration and joint optimization with other tasks. However, it is common to have partially annotated data for this problem. Here, we investigate two approaches to deal with partial annotation of mentions: weighted loss and soft-target classification. We also propose two neural mention detection approaches: a sequence tagging, and an exhaustive search. We evaluate our methods with coreference resolution as a downstream task, using multitask learning. The results show that the recall and F1 score improve for all methods. 2020.crac-1.10 @@ -113,7 +113,7 @@ Neural Coreference Resolution for <fixed-case>A</fixed-case>rabic AbdulrahmanAloraini JuntaoYu - MassimoPoesio + MassimoPoesio 99–110 No neural coreference resolver for Arabic exists, in fact we are not aware of any learning-based coreference resolver for Arabic since (Björkelund and Kuhn, 2014). In this paper, we introduce a coreference resolution system for Arabic based on Lee et al’s end-to-end architecture combined with the Arabic version of bert and an external mention detector. As far as we know, this is the first neural coreference resolution system aimed specifically to Arabic, and it substantially outperforms the existing state-of-the-art on OntoNotes 5.0 with a gain of 15.2 points conll F1. We also discuss the current limitations of the task for Arabic and possible approaches that can tackle these challenges. 2020.crac-1.11 @@ -122,7 +122,7 @@ Enhanced Labelling in Active Learning for Coreference Resolution VebjørnEspeland - BeatriceAlex + BeatriceAlex BenjaminBach 111–121 In this paper we describe our attempt to increase the amount of information that can be retrieved through active learning sessions compared to previous approaches. We optimise the annotator’s labelling process using active learning in the context of coreference resolution. Using simulated active learning experiments, we suggest three adjustments to ensure the labelling time is spent as efficiently as possible. All three adjustments provide more information to the machine learner than the baseline, though a large impact on the F1 score over time is not observed. Compared to previous models, we report a marginal F1 improvement on the final coreference models trained using for two out of the three approaches tested when applied to the English OntoNotes 2012 Coreference Resolution data. Our best-performing model achieves 58.01 F1, an increase of 0.93 F1 over the baseline model. @@ -132,7 +132,7 @@ Reference in Team Communication for Robot-Assisted Disaster Response: An Initial Analysis NataliaSkachkova - IvanaKruijff-Korbayova + IvanaKruijff-Korbayova 122–132 We analyze reference phenomena in a corpus of robot-assisted disaster response team communication. The annotation scheme we designed for this purpose distinguishes different types of entities, roles, reference units and relations. We focus particularly on mission-relevant objects, locations and actors and also annotate a rich set of reference links, including co-reference and various other kinds of relations. We explain the categories used in our annotation, present their distribution in the corpus and discuss challenging cases. 2020.crac-1.13 @@ -142,7 +142,7 @@ Resolving Pronouns in <fixed-case>T</fixed-case>witter Streams: Context can Help! AnietieAndy ChrisCallison-Burch - Derry TantiWijaya + Derry TantiWijaya 133–138 Many people live-tweet televised events like Presidential debates and popular TV-shows and discuss people or characters in the event. Naturally, many tweets make pronominal reference to these people/characters. We propose an algorithm for resolving personal pronouns that make reference to people involved in an event, in tweet streams collected during the event. 2020.crac-1.14 diff --git a/data/xml/2020.deelio.xml b/data/xml/2020.deelio.xml index 72935fd9e1..41f196e55f 100644 --- a/data/xml/2020.deelio.xml +++ b/data/xml/2020.deelio.xml @@ -3,7 +3,7 @@ Proceedings of Deep Learning Inside Out (DeeLIO): The First Workshop on Knowledge Extraction and Integration for Deep Learning Architectures - EnekoAgirre + EnekoAgirre MariannaApidianaki IvanVulić Association for Computational Linguistics @@ -43,7 +43,7 @@ Generalization to Mitigate Synonym Substitution Attacks BasemahAlshemali - JugalKalita + JugalKalita 20–28 Studies have shown that deep neural networks (DNNs) are vulnerable to adversarial examples – perturbed inputs that cause DNN-based models to produce incorrect results. One robust adversarial attack in the NLP domain is the synonym substitution. In attacks of this variety, the adversary substitutes words with synonyms. Since synonym substitution perturbations aim to satisfy all lexical, grammatical, and semantic constraints, they are difficult to detect with automatic syntax check as well as by humans. In this paper, we propose a structure-free defensive method that is capable of improving the performance of DNN-based models with both clean and adversarial data. Our findings show that replacing the embeddings of the important words in the input samples with the average of their synonyms’ embeddings can significantly improve the generalization of DNN-based classifiers. By doing so, we reduce model sensitivity to particular words in the input samples. Our results indicate that the proposed defense is not only capable of defending against adversarial attacks, but is also capable of improving the performance of DNN-based models when tested on benign data. On average, the proposed defense improved the classification accuracy of the CNN and Bi-LSTM models by 41.30% and 55.66%, respectively, when tested under adversarial attacks. Extended investigation shows that our defensive method can improve the robustness of nonneural models, achieving an average of 17.62% and 22.93% classification accuracy increase on the SVM and XGBoost models, respectively. The proposed defensive method has also shown an average of 26.60% classification accuracy improvement when tested with the infamous BERT model. Our algorithm is generic enough to be applied in any NLP domain and to any model trained on any natural language. 2020.deelio-1.3 @@ -57,7 +57,7 @@ VarunGangal DongyeopKang TerukoMitamura - EduardHovy + EduardHovy 29–42 In this paper, we investigate data augmentation for text generation, which we call GenAug. Text generation and language modeling are important tasks within natural language processing, and are especially challenging for low-data regimes. We propose and evaluate various augmentation methods, including some that incorporate external knowledge, for finetuning GPT-2 on a subset of Yelp Reviews. We also examine the relationship between the amount of augmentation and the quality of the generated text. We utilize several metrics that evaluate important aspects of the generated text including its diversity and fluency. Our experiments demonstrate that insertion of character-level synthetic noise and keyword replacement with hypernyms are effective augmentation methods, and that the quality of generations improves to a peak at approximately three times the amount of original data. 2020.deelio-1.4 @@ -121,7 +121,7 @@ KarthikGopalakrishnan BehnamHedayatnia PeiZhou - DilekHakkani-Tur + DilekHakkani-Tur 74–79 Pretrained language models have excelled at many NLP tasks recently; however, their social intelligence is still unsatisfactory. To enable this, machines need to have a more general understanding of our complicated world and develop the ability to perform commonsense reasoning besides fitting the specific downstream tasks. External commonsense knowledge graphs (KGs), such as ConceptNet, provide rich information about words and their relationships. Thus, towards general commonsense learning, we propose two approaches to implicitly and explicitly infuse such KGs into pretrained language models. We demonstrate our proposed methods perform well on SocialIQA, a social commonsense reasoning task, in both limited and full training data regimes. 2020.deelio-1.9 diff --git a/data/xml/2020.dmr.xml b/data/xml/2020.dmr.xml index 762d3dcdc3..334b555cce 100644 --- a/data/xml/2020.dmr.xml +++ b/data/xml/2020.dmr.xml @@ -6,11 +6,11 @@ NianwenXue JohanBos WilliamCroft - JanHajič + JanHajič Chu-RenHuang StephanOepen - MarthaPalmer - JamesPustejovsky + MarthaPalmer + JamesPustejovsky Association for Computational Linguistics
Barcelona Spain (online)
December @@ -88,11 +88,11 @@
<fixed-case>I</fixed-case>nfo<fixed-case>F</fixed-case>orager: Leveraging Semantic Search with <fixed-case>AMR</fixed-case> for <fixed-case>COVID</fixed-case>-19 Research - ClaireBonial - Stephanie M.Lukin + ClaireBonial + Stephanie M.Lukin DavidDoughty StevenHill - ClareVoss + ClareVoss 67–77 This paper examines how Abstract Meaning Representation (AMR) can be utilized for finding answers to research questions in medical scientific documents, in particular, to advance the study of UV (ultraviolet) inactivation of the novel coronavirus that causes the disease COVID-19. We describe the development of a proof-of-concept prototype tool, InfoForager, which uses AMR to conduct a semantic search, targeting the meaning of the user question, and matching this to sentences in medical documents that may contain information to answer that question. This work was conducted as a sprint over a period of six weeks, and reveals both promising results and challenges in reducing the user search time relating to COVID-19 research, and in general, domain adaption of AMR for this task. 2020.dmr-1.7 diff --git a/data/xml/2020.eamt.xml b/data/xml/2020.eamt.xml index 3f11d15626..07f1376ddb 100644 --- a/data/xml/2020.eamt.xml +++ b/data/xml/2020.eamt.xml @@ -3,13 +3,13 @@ Proceedings of the 22nd Annual Conference of the European Association for Machine Translation - AndréMartins + AndréMartins HelenaMoniz SaraFumega BrunoMartins FernandoBatista - LuisaCoheur - CarlaParra + LuisaCoheur + CarlaParra IsabelTrancoso MarcoTurchi AriannaBisazza @@ -17,7 +17,7 @@ AnaGuerberof MaryNurminen LenaMarg - Mikel L.Forcada + Mikel L.Forcada European Association for Machine Translation
Lisboa, Portugal
November @@ -49,7 +49,7 @@ Efficiently Reusing Old Models Across Languages via Transfer Learning TomKocmi - OndřejBojar + OndřejBojar 19–28 Recent progress in neural machine translation (NMT) is directed towards larger neural networks trained on an increasing amount of hardware resources. As a result, NMT models are costly to train, both financially, due to the electricity and hardware cost, and environmentally, due to the carbon footprint. It is especially true in transfer learning for its additional cost of training the “parent” model before transferring knowledge and training the desired “child” model. In this paper, we propose a simple method of re-using an already trained model for different language pairs where there is no need for modifications in model architecture. Our approach does not need a separate parent model for each investigated language pair, as it is typical in NMT transfer learning. To show the applicability of our method, we recycle a Transformer model trained by different researchers and use it to seed models for different language pairs. We achieve better translation quality and shorter convergence times than when training from random initialization. 2020.eamt-1.3 @@ -71,7 +71,7 @@ When and Why is Unsupervised Neural Machine Translation Useless? YunsuKim MiguelGraça - HermannNey + HermannNey 35–44 This paper studies the practicality of the current state-of-the-art unsupervised methods in neural machine translation (NMT). In ten translation tasks with various data settings, we analyze the conditions under which the unsupervised methods fail to produce reasonable translations. We show that their performance is severely affected by linguistic dissimilarity and domain mismatch between source and target monolingual data. Such conditions are common for low-resource language pairs, where unsupervised learning works poorly. In all of our experiments, supervised and semi-supervised baselines with 50k-sentence bilingual data outperform the best unsupervised results. Our analyses pinpoint the limits of the current unsupervised NMT and also suggest immediate research directions. 2020.eamt-1.5 @@ -81,9 +81,9 @@ Incorporating External Annotation to improve Named Entity Translation in <fixed-case>NMT</fixed-case> MaciejModrzejewski MiriamExel - BiankaBuschbeck + BiankaBuschbeck Thanh-LeHa - AlexanderWaibel + AlexanderWaibel 45–51 The correct translation of named entities (NEs) still poses a challenge for conventional neural machine translation (NMT) systems. This study explores methods incorporating named entity recognition (NER) into NMT with the aim to improve named entity translation. It proposes an annotation method that integrates named entities and inside–outside–beginning (IOB) tagging into the neural network input with the use of source factors. Our experiments on English→German and English→ Chinese show that just by including different NE classes and IOB tagging, we can increase the BLEU score by around 1 point using the standard test set from WMT2019 and achieve up to 12% increase in NE translation rates over a strong baseline. 2020.eamt-1.6 @@ -103,7 +103,7 @@ A multi-source approach for <fixed-case>B</fixed-case>reton–<fixed-case>F</fixed-case>rench hybrid machine translation - Víctor M.Sánchez-Cartagena + Víctor M.Sánchez-Cartagena Mikel L.Forcada FelipeSánchez-Martínez 61–70 @@ -126,7 +126,7 @@ Low-Resource Unsupervised <fixed-case>NMT</fixed-case>: Diagnosing the Problem and Providing a Linguistically Motivated Solution LukasEdman AntonioToral - Gertjanvan Noord + Gertjanvan Noord 81–90 Unsupervised Machine Translation has been advancing our ability to translate without parallel data, but state-of-the-art methods assume an abundance of monolingual data. This paper investigates the scenario where monolingual data is limited as well, finding that current unsupervised methods suffer in performance under this stricter setting. We find that the performance loss originates from the poor quality of the pretrained monolingual embeddings, and we offer a potential solution: dependency-based word embeddings. These embeddings result in a complementary word representation which offers a boost in performance of around 1.5 BLEU points compared to standard word2vec when monolingual data is limited to 1 million sentences per language. We also find that the inclusion of sub-word information is crucial to improving the quality of the embeddings. 2020.eamt-1.10 @@ -158,7 +158,7 @@ MaaritKoponen UmutSulubacak KaisaVitikainen - JörgTiedemann + JörgTiedemann 115–124 This paper presents a user evaluation of machine translation and post-editing for TV subtitles. Based on a process study where 12 professional subtitlers translated and post-edited subtitles, we compare effort in terms of task time and number of keystrokes. We also discuss examples of specific subtitling features like condensation, and how these features may have affected the post-editing results. In addition to overall MT quality, segmentation and timing of the subtitles are found to be important issues to be addressed in future work. 2020.eamt-1.13 @@ -185,7 +185,7 @@ Quality In, Quality Out: Learning from Actual Mistakes - FredericBlain + FredericBlain NikolaosAletras LuciaSpecia 145–153 @@ -213,8 +213,8 @@ Intelligent Translation Memory Matching and Retrieval with Sentence Encoders TharinduRanasinghe - ConstantinOrasan - RuslanMitkov + ConstantinOrasan + RuslanMitkov 175–184 Matching and retrieving previously translated segments from the Translation Memory is a key functionality in Translation Memories systems. However this matching and retrieving process is still limited to algorithms based on edit distance which we have identified as a major drawback in Translation Memories systems. In this paper, we introduce sentence encoders to improve matching and retrieving process in Translation Memories systems - an effective and efficient solution to replace edit distance-based algorithms. 2020.eamt-1.19 @@ -233,7 +233,7 @@ Kamal KumarGupta RejwanulHaque AsifEkbal - PushpakBhattacharyya + PushpakBhattacharyya AndyWay 195–204 In interactive machine translation (MT), human translators correct errors in automatic translations in collaboration with the MT systems, which is seen as an effective way to improve the productivity gain in translation. In this study, we model source-language syntactic constituency parse and target-language syntactic descriptions in the form of supertags as conditional context for interactive prediction in neural MT (NMT). We found that the supertags significantly improve productivity gain in translation in interactive-predictive NMT (INMT), while syntactic parsing somewhat found to be effective in reducing human effort in translation. Furthermore, when we model this source- and target-language syntactic information together as the conditional context, both types complement each other and our fully syntax-informed INMT model statistically significantly reduces human efforts in a French–to–English translation task, achieving 4.30 points absolute (corresponding to 9.18% relative) improvement in terms of word prediction accuracy (WPA) and 4.84 points absolute (corresponding to 9.01% relative) reduction in terms of word stroke ratio (WSR) over the baseline. @@ -262,7 +262,7 @@ Document-level Neural <fixed-case>MT</fixed-case>: A Systematic Comparison AntónioLopes - M. AminFarajian + M. AminFarajian RachelBawden MichaelZhang André F. T.Martins @@ -274,7 +274,7 @@ Automatic Translation for Multiple <fixed-case>NLP</fixed-case> tasks: a Multi-task Approach to Machine-oriented <fixed-case>NMT</fixed-case> Adaptation AmirhosseinTebbifakhr - MatteoNegri + MatteoNegri MarcoTurchi 235–244 Although machine translation (MT) traditionally pursues “human-oriented” objectives, humans are not the only possible consumers of MT output. For instance, when automatic translations are used to feed downstream Natural Language Processing (NLP) components in cross-lingual settings, they should ideally pursue “machine-oriented” objectives that maximize the performance of these components. Tebbifakhr et al. (2019) recently proposed a reinforcement learning approach to adapt a generic neural MT(NMT) system by exploiting the reward from a downstream sentiment classifier. But what if the downstream NLP tasks to serve are more than one? How to avoid the costs of adapting and maintaining one dedicated NMT system for each task? We address this problem by proposing a multi-task approach to machine-oriented NMT adaptation, which is capable to serve multiple downstream tasks with a single system. Through experiments with Spanish and Italian data covering three different tasks, we show that our approach can outperform a generic NMT system, and compete with single-task models in most of the settings. @@ -311,7 +311,7 @@ Terminology-Constrained Neural Machine Translation at <fixed-case>SAP</fixed-case> MiriamExel - BiankaBuschbeck + BiankaBuschbeck LauritzBrandt SimonaDoneva 271–280 @@ -323,7 +323,7 @@ Ellipsis Translation for a Medical Speech to Speech Translation System JonathanMutal JohannaGerlach - PierretteBouillon + PierretteBouillon HervéSpechbach 281–290 In diagnostic interviews, elliptical utterances allow doctors to question patients in a more efficient and economical way. However, literal translation of such incomplete utterances is rarely possible without affecting communication. Previous studies have focused on automatic ellipsis detection and resolution, but only few specifically address the problem of automatic translation of ellipsis. In this work, we evaluate four different approaches to translate ellipsis in medical dialogues in the context of the speech to speech translation system BabelDr. We also investigate the impact of training data, using an under-sampling method and data with elliptical utterances in context. Results show that the best model is able to translate 88% of elliptical utterances. @@ -332,10 +332,10 @@ Bifixer and Bicleaner: two open-source tools to clean your parallel data - GemaRamírez-Sánchez + GemaRamírez-Sánchez JaumeZaragoza-Bernabeu MartaBañón - Sergio OrtizRojas + Sergio OrtizRojas 291–298 This paper shows the utility of two open-source tools designed for parallel data cleaning: Bifixer and Bicleaner. Already used to clean highly noisy parallel content from crawled multilingual websites, we evaluate their performance in a different scenario: cleaning publicly available corpora commonly used to train machine translation systems. We choose four English–Portuguese corpora which we plan to use internally to compute paraphrases at a later stage. We clean the four corpora using both tools, which are described in detail, and analyse the effect of some of the cleaning steps on them. We then compare machine translation training times and quality before and after cleaning these corpora, showing a positive impact particularly for the noisiest ones. 2020.eamt-1.31 @@ -344,10 +344,10 @@ An <fixed-case>E</fixed-case>nglish-<fixed-case>S</fixed-case>wahili parallel corpus and its use for neural machine translation in the news domain FelipeSánchez-Martínez - Víctor M.Sánchez-Cartagena + Víctor M.Sánchez-Cartagena Juan AntonioPérez-Ortiz Mikel L.Forcada - MiquelEsplà-Gomis + MiquelEsplà-Gomis AndrewSecker SusieColeman JulieWall @@ -368,12 +368,12 @@ A User Study of the Incremental Learning in <fixed-case>NMT</fixed-case> MiguelDomingo - MercedesGarcía-Martínez - ÁlvaroPeris + MercedesGarcía-Martínez + ÁlvaroPeris AlexandreHelle AmandoEstela LaurentBié - FranciscoCasacuberta + FranciscoCasacuberta ManuelHerranz 319–328 In the translation industry, human experts usually supervise and post-edit machine translation hypotheses. Adaptive neural machine translation systems, able to incrementally update the underlying models under an online learning regime, have been proven to be useful to improve the efficiency of this workflow. However, this incremental adaptation is somewhat unstable, and it may lead to undesirable side effects. One of them is the sporadic appearance of made-up words, as a byproduct of an erroneous application of subword segmentation techniques. In this work, we extend previous studies on on-the-fly adaptation of neural machine translation systems. We perform a user study involving professional, experienced post-editors, delving deeper on the aforementioned problems. Results show that adaptive systems were able to learn how to generate the correct translation for task-specific terms, resulting in an improvement of the user’s productivity. We also observed a close similitude, in terms of morphology, between made-up words and the words that were expected. @@ -385,7 +385,7 @@ Daniel MarínBuj DanielIbáñez García ZuzannaParcheta - FranciscoCasacuberta + FranciscoCasacuberta 329–338 In this paper, we present a machine translation system implemented by the Translation Centre for the Bodies of the European Union (CdT). The main goal of this project is to create domain-specific machine translation engines in order to support machine translation services and applications to the Translation Centre’s clients. In this article, we explain the entire implementation process of NICE: Neural Integrated Custom Engines. We describe the problems identified and the solutions provided, and present the final results for different language pairs. Finally, we describe the work that will be done on this project in the future. 2020.eamt-1.35 @@ -419,7 +419,7 @@ On the differences between human translations - MajaPopovic + MajaPopovic 365–374 Many studies have confirmed that translated texts exhibit different features than texts originally written in the given language. This work explores texts translated by different translators taking into account expertise and native language. A set of computational analyses was conducted on three language pairs, English-Croatian, German-French and English-Finnish, and the results show that each of the factors has certain influence on the features of the translated texts, especially on sentence length and lexical richness. The results also indicate that for translations used for machine translation evaluation, it is important to specify these factors, especially if comparing machine translation quality with human translation quality is involved. 2020.eamt-1.39 @@ -433,7 +433,7 @@ JonathanMutal SabrinaGirletti LiseVolkart - PierretteBouillon + PierretteBouillon 375–382 We believe that machine translation (MT) must be introduced to translation students as part of their training, in preparation for their professional life. In this paper we present a new version of the tool called MT3, which builds on and extends a joint effort undertaken by the Faculty of Languages of the University of Córdoba and Faculty of Translation and Interpreting of the University of Geneva to develop an open-source web platform to teach MT to translation students. We also report on a pilot experiment with the goal of testing the viability of using MT^3 in an MT course. The pilot let us identify areas for improvement and collect students’ feedback about the tool’s usability. 2020.eamt-1.40 @@ -501,7 +501,7 @@ Machine Translation Quality: A comparative evaluation of <fixed-case>SMT</fixed-case>, <fixed-case>NMT</fixed-case> and tailored-<fixed-case>NMT</fixed-case> outputs MariaStasimioti VilelminiSosoni - KatiaKermanidis + KatiaKermanidis DespoinaMouratidis 441–450 The present study aims to compare three systems: a generic statistical machine translation (SMT), a generic neural machine translation (NMT) and a tailored-NMT system focusing on the English to Greek language pair. The comparison is carried out following a mixed-methods approach, i.e. automatic metrics, as well as side-by-side ranking, adequacy and fluency rating, measurement of actual post editing (PE) effort and human error analysis performed by 16 postgraduate Translation students. The findings reveal a higher score for both the generic NMT and the tailored-NMT outputs as regards automatic metrics and human evaluation metrics, with the tailored-NMT output faring even better than the generic NMT output. @@ -554,7 +554,7 @@ <fixed-case>QR</fixed-case>ev: Machine Translation of User Reviews: What Influences the Translation Quality? - MajaPopovic + MajaPopovic 461–462 This project aims to identify the important aspects of translation quality of user reviews which will represent a starting point for developing better automatic MT metrics and challenge test sets, and will be also helpful for developing MT systems for this genre. We work on two types of reviews: Amazon products and IMDb movies, written in English and translated into two closely related target languages, Croatian and Serbian. 2020.eamt-1.52 @@ -562,10 +562,10 @@ <fixed-case>ELITR</fixed-case>: <fixed-case>E</fixed-case>uropean Live Translator - OndřejBojar + OndřejBojar DominikMacháček SangeetSagar - OtakarSmrž + OtakarSmrž JonášKratochvíl EbrahimAnsari DarioFranceschini @@ -574,7 +574,7 @@ Thai-SonNguyen FelixSchneider SebastianStücker - AlexWaibel + AlexWaibel BarryHaddow RicoSennrich PhilipWilliams @@ -625,21 +625,21 @@ VictoriaArranz LaurentBié AleixCerdà-i-Cucó - KhalidChoukri + KhalidChoukri MontseCuadros HansDegroote AmandoEstela ThierryEtchegoyhen - MercedesGarcía-Martínez + MercedesGarcía-Martínez AitorGarcía-Pablos ManuelHerranz AlejandroKohan MaiteMelero - MikeRosner + MikeRosner RobertsRozis - PatrickParoubek + PatrickParoubek ArtūrsVasiļevskis - PierreZweigenbaum + PierreZweigenbaum 471–472 We describe the MAPA project, funded under the Connecting Europe Facility programme, whose goal is the development of an open-source de-identification toolkit for all official European Union languages. It will be developed since January 2020 until December 2021. 2020.eamt-1.57 @@ -672,13 +672,13 @@ AleixCerdà-i-Cucó HansDegroote AmandoEstela - MercedesGarcía-Martínez + MercedesGarcía-Martínez ManuelHerranz AlejandroKohan MaiteMelero TonyO’Dowd SinéadO’Gorman - MārcisPinnis + MārcisPinnis RobertsRozis RiccardoSuperbo ArtūrsVasiļevskis @@ -689,7 +689,7 @@ <fixed-case>OPUS</fixed-case>-<fixed-case>MT</fixed-case> – Building open translation services for the World - JörgTiedemann + JörgTiedemann SanthoshThottingal 479–480 This paper presents OPUS-MT a project that focuses on the development of free resources and tools for machine translation. The current status is a repository of over 1,000 pre-trained neural machine translation models that are ready to be launched in on-line translation services. For this we also provide open source implementations of web applications that can run efficiently on average desktop hardware with a straightforward setup and installation. @@ -707,7 +707,7 @@ AlinaKramchaninova AnnaBardadym TomVanallemeersch - PavelSmrž + PavelSmrž MichalHradiš 481–482 The OCCAM project (Optical Character recognition, ClassificAtion & Machine Translation) aims at integrating the CEF (Connecting Europe Facility) Automated Translation service with image classification, Translation Memories (TMs), Optical Character Recognition (OCR), and Machine Translation (MT). It will support the automated translation of scanned business documents (a document format that, currently, cannot be processed by the CEF eTranslation service) and will also lead to a tool useful for the Digital Humanities domain. @@ -769,7 +769,7 @@ Project <fixed-case>MAIA</fixed-case>: Multilingual <fixed-case>AI</fixed-case> Agent Assistant André F. T.Martins - JoaoGraca + JoaoGraca PauloDimas HelenaMoniz GrahamNeubig diff --git a/data/xml/2020.ecnlp.xml b/data/xml/2020.ecnlp.xml index a11751034c..afe92f77e4 100644 --- a/data/xml/2020.ecnlp.xml +++ b/data/xml/2020.ecnlp.xml @@ -3,7 +3,7 @@ Proceedings of the 3rd Workshop on e-Commerce and NLP - ShervinMalmasi + ShervinMalmasi SuryaKallumadi NicolaUeffing OlegRokhlenko @@ -102,10 +102,10 @@ Semi-Supervised Iterative Approach for Domain-Specific Complaint Detection in Social Media - AkashGautam + AkashGautam DebanjanMahata RakeshGosangi - Rajiv RatnShah + Rajiv RatnShah 46–53 In this paper, we present a semi-supervised bootstrapping approach to detect product or service related complaints in social media. Our approach begins with a small collection of annotated samples which are used to identify a preliminary set of linguistic indicators pertinent to complaints. These indicators are then used to expand the dataset. The expanded dataset is again used to extract more indicators. This process is applied for several iterations until we can no longer find any new indicators. We evaluated this approach on a Twitter corpus specifically to detect complaints about transportation services. We started with an annotated set of 326 samples of transportation complaints, and after four iterations of the approach, we collected 2,840 indicators and over 3,700 tweets. We annotated a random sample of 700 tweets from the final dataset and observed that nearly half the samples were actual transportation complaints. Lastly, we also studied how different features based on semantics, orthographic properties, and sentiment contribute towards the prediction of complaints. 2020.ecnlp-1.7 @@ -166,7 +166,7 @@ e-Commerce and Sentiment Analysis: Predicting Outcomes of Class Action Lawsuits StaceyTaylor - VladoKeselj + VladoKeselj 77–85 In recent years, the focus of e-Commerce research has been on better understanding the relationship between the internet marketplace, customers, and goods and services. This has been done by examining information that can be gleaned from consumer information, recommender systems, click rates, or the way purchasers go about making buying decisions, for example. This paper takes a very different approach and examines the companies themselves. In the past ten years, e-Commerce giants such as Amazon, Skymall, Wayfair, and Groupon have been embroiled in class action security lawsuits promulgated under Rule 10b(5), which, in short, is one of the Securities and Exchange Commission’s main rules surrounding fraud. Lawsuits are extremely expensive to the company and can damage a company’s brand extensively, with the shareholders left to suffer the consequences. We examined the Management Discussion and Analysis and the Market Risks for 96 companies using sentiment analysis on selected financial measures and found that we were able to predict the outcome of the lawsuits in our dataset using sentiment (tone) alone to a recall of 0.8207 using the Random Forest classifier. We believe that this is an important contribution as it has cross-domain implications and potential, and opens up new areas of research in e-Commerce, finance, and law, as the settlements from the class action lawsuits in our dataset alone are in excess of $1.6 billion dollars, in aggregate. 2020.ecnlp-1.12 @@ -176,7 +176,7 @@ On Application of <fixed-case>B</fixed-case>ayesian Parametric and Non-parametric Methods for User Cohorting in Product Search - ShashankGupta + ShashankGupta 86–89 In this paper, we study the applicability of Bayesian Parametric and Non-parametric methods for user clustering in an E-commerce search setting. To the best of our knowledge, this is the first work that presents a comparative study of various Bayesian clustering methods in the context of product search. Specifically, we cluster users based on their topical patterns from their respective product search queries. To evaluate the quality of the clusters formed, we perform a collaborative query recommendation task. Our findings indicate that simple parametric model like Latent Dirichlet Allocation (LDA) outperforms more sophisticated non-parametric methods like Distance Dependent Chinese Restaurant Process and Dirichlet Process-based clustering in both tasks. 2020.ecnlp-1.13 diff --git a/data/xml/2020.ecomnlp.xml b/data/xml/2020.ecomnlp.xml index e149c724fd..da2ead448b 100644 --- a/data/xml/2020.ecomnlp.xml +++ b/data/xml/2020.ecomnlp.xml @@ -23,7 +23,7 @@ <fixed-case>E</fixed-case>-Commerce Content and Collaborative-based Recommendation using K-Nearest Neighbors and Enriched Weighted Vectors BardiaRafieian - Marta R.Costa-jussà + Marta R.Costa-jussà 1–10 In this paper, we present two productive and functional recommender methods to improve the ac- curacy of predicting the right product for the user. One proposal is a survey-based recommender system that uses k-nearest neighbors. It recommends products by asking questions from the user, efficiently applying a binary product vector to the product attributes, and processing the request with a minimum error. The second proposal uses an enriched collaborative-based recommender system using enriched weighted vectors. Thanks to the style rules, the enriched collaborative- based method recommends outfits with competitive recommendation quality. We evaluated both of the proposals on a Kaggle fashion-dataset along with iMaterialist and, results show equivalent performance on binary gender and product attributes. 2020.ecomnlp-1.1 diff --git a/data/xml/2020.emnlp.xml b/data/xml/2020.emnlp.xml index c99fbf4121..228d9394a6 100644 --- a/data/xml/2020.emnlp.xml +++ b/data/xml/2020.emnlp.xml @@ -3,8 +3,8 @@ Proceedings of the 2020 Conference on Empirical Methods in Natural Language Processing (EMNLP) - BonnieWebber - TrevorCohn + BonnieWebber + TrevorCohn YulanHe YangLiu Association for Computational Linguistics @@ -23,8 +23,8 @@ YohanJo SeojinBang EmaadManzoor - EduardHovy - ChrisReed + EduardHovy + ChrisReed 1–23 Finding attackable sentences in an argument is the first step toward successful refutation in argumentation. We present a first large-scale analysis of sentence attackability in online arguments. We analyze driving reasons for attacks in argumentation and identify relevant characteristics of sentences. We demonstrate that a sentence’s attackability is associated with many of these characteristics regarding the sentence’s content, proposition types, and tone, and that an external knowledge source can provide useful information about attackability. Building on these findings, we demonstrate that machine learning models can automatically detect attackable sentences in arguments, significantly better than several baselines and comparably well to laypeople. 2020.emnlp-main.1 @@ -36,8 +36,8 @@ Extracting Implicitly Asserted Propositions in Argumentation YohanJo JackyVisser - ChrisReed - EduardHovy + ChrisReed + EduardHovy 24–38 Argumentation accommodates various rhetorical devices, such as questions, reported speech, and imperatives. These rhetorical tools usually assert argumentatively relevant propositions rather implicitly, so understanding their true meaning is key to understanding certain arguments properly. However, most argument mining systems and computational linguistics research have paid little attention to implicitly asserted propositions in argumentation. In this paper, we examine a wide range of computational methods for extracting propositions that are implicitly asserted in questions, reported speech, and imperatives in argumentation. By evaluating the models on a corpus of 2016 U.S. presidential debates and online commentary, we demonstrate the effectiveness and limitations of the computational models. Our study may inform future research on argument mining and the semantics of these rhetorical devices in argumentation. 2020.emnlp-main.2 @@ -207,7 +207,7 @@ YianZhang XiaochengLi HaokunLiu - Samuel R.Bowman + Samuel R.Bowman 217–235 One reason pretraining on self-supervised linguistic tasks is effective is that it teaches models features that are helpful for language understanding. However, we want pretrained models to learn not only to represent linguistic features, but also to use those features preferentially during fine-turning. With this goal in mind, we introduce a new English-language diagnostic set called MSGS (the Mixed Signals Generalization Set), which consists of 20 ambiguous binary classification tasks that we use to test whether a pretrained model prefers linguistic or surface generalizations during finetuning. We pretrain RoBERTa from scratch on quantities of data ranging from 1M to 1B words and compare their performance on MSGS to the publicly available RoBERTa_BASE. We find that models can learn to represent linguistic features with little pretraining data, but require far more data to learn to prefer linguistic generalizations over surface ones. Eventually, with about 30B words of pretraining data, RoBERTa_BASE does consistently demonstrate a linguistic bias with some regularity. We conclude that while self-supervised pretraining is an effective way to learn helpful inductive biases, there is likely room to improve the rate at which models learn which features matter. 2020.emnlp-main.16 @@ -235,7 +235,7 @@ <fixed-case>KERMIT</fixed-case>: Complementing Transformer Architectures with Encoders of Explicit Syntactic Interpretations - Fabio MassimoZanzotto + Fabio MassimoZanzotto AndreaSantilli LeonardoRanaldi DarioOnorati @@ -270,9 +270,9 @@ Pre-Training Transformers as Energy-Based Cloze Models KevinClark - Minh-ThangLuong - QuocLe - Christopher D.Manning + Minh-ThangLuong + QuocLe + Christopher D.Manning 285–294 We introduce Electric, an energy-based cloze model for representation learning over text. Like BERT, it is a conditional generative model of tokens given their contexts. However, Electric does not use masking or output a full distribution over tokens that could occur in a context. Instead, it assigns a scalar energy score to each input token indicating how likely it is given its context. We train Electric using an algorithm based on noise-contrastive estimation and elucidate how this learning objective is closely related to the recently proposed ELECTRA pre-training method. Electric performs well when transferred to downstream tasks and is particularly effective at producing likelihood scores for text: it re-ranks speech recognition n-best lists better than language models and much faster than masked language models. Furthermore, it offers a clearer and more principled view of what ELECTRA learns during pre-training. 2020.emnlp-main.20 @@ -338,7 +338,7 @@ NathanielMills ChulakaGunasekara YosiMass - SachindraJoshi + SachindraJoshi LuisLastras DavidKonopnicki 349–356 @@ -364,7 +364,7 @@ Augmented Natural Language for Generative Sequence Labeling BenAthiwaratkun - CiceroNogueira dos Santos + CiceroNogueira dos Santos JasonKrone BingXiang 375–385 @@ -380,7 +380,7 @@ YizheZhang MichelGalley ChrisBrockett - BillDolan + BillDolan 386–395 Existing open-domain dialog models are generally trained to minimize the perplexity of target human responses. However, some human replies are more engaging than others, spawning more followup interactions. Current conversational models are increasingly capable of producing turns that are context-relevant, but in order to produce compelling agents, these models need to be able to predict and optimize for turns that are genuinely engaging. We leverage social media feedback data (number of replies and upvotes) to build a large-scale training dataset for feedback prediction. To alleviate possible distortion between the feedback and engagingness, we convert the ranking problem to a comparison of response pairs which involve few confounding factors. We trained DialogRPT, a set of GPT-2 based models on 133M pairs of human feedback data and the resulting ranker outperformed several baselines. Particularly, our ranker outperforms the conventional dialog perplexity baseline with a large margin on predicting Reddit feedback. We finally combine the feedback prediction models and a human-like scoring model to rank the machine-generated dialog responses. Crowd-sourced human evaluation shows that our ranking method correlates better with real human preferences than baseline models. 2020.emnlp-main.28 @@ -432,7 +432,7 @@ A Spectral Method for Unsupervised Multi-Document Summarization KexiangWang - BaobaoChang + BaobaoChang ZhifangSui 435–445 Multi-document summarization (MDS) aims at producing a good-quality summary for several related documents. In this paper, we propose a spectral-based hypothesis, which states that the goodness of summary candidate is closely linked to its so-called spectral impact. Here spectral impact considers the perturbation to the dominant eigenvalue of affinity matrix when dropping the summary candidate from the document cluster. The hypothesis is validated by three theoretical perspectives: semantic scaling, propagation dynamics and matrix perturbation. According to the hypothesis, we formulate the MDS task as the combinatorial optimization of spectral impact and propose an accelerated greedy solution based on a surrogate of spectral impact. The evaluation results on various datasets demonstrate: (1) The performance of the summary candidate is positively correlated with its spectral impact, which accords with our hypothesis; (2) Our spectral-based method has a competitive result as compared to state-of-the-art MDS systems. @@ -533,9 +533,9 @@ Efficient Meta Lifelong-Learning with Limited Memory ZiruiWang - Sanket VaibhavMehta + Sanket VaibhavMehta BarnabasPoczos - JaimeCarbonell + JaimeCarbonell 535–548 Current natural language processing models work well on a single task, yet they often fail to continuously learn new tasks without forgetting previous ones as they are re-trained throughout their lifetime, a challenge known as lifelong learning. State-of-the-art lifelong language learning methods store past examples in episodic memory and replay them at both training and inference time. However, as we show later in our experiments, there are three significant impediments: (1) needing unrealistically large memory module to achieve good performance, (2) suffering from negative transfer, (3) requiring multiple local adaptation steps for each test example that significantly slows down the inference speed. In this paper, we identify three common principles of lifelong learning methods and propose an efficient meta-lifelong framework that combines them in a synergistic fashion. To achieve sample efficiency, our method trains the model in a manner that it learns a better initialization for local adaptation. Extensive experiments on text classification and question answering benchmarks demonstrate the effectiveness of our framework by achieving state-of-the-art performance using merely 1% memory size and narrowing the gap with multi-task learning. We further show that our method alleviates both catastrophic forgetting and negative transfer at the same time. 2020.emnlp-main.39 @@ -657,7 +657,7 @@ Event Extraction by Answering (Almost) Natural Questions XinyaDu - ClaireCardie + ClaireCardie 671–683 The problem of event extraction requires detecting the event trigger and extracting its corresponding arguments. Existing work in event argument extraction typically relies heavily on entity recognition as a preprocessing/concurrent step, causing the well-known problem of error propagation. To avoid this issue, we introduce a new paradigm for event extraction by formulating it as a question answering (QA) task that extracts the event arguments in an end-to-end manner. Empirical results demonstrate that our framework outperforms prior methods substantially; in addition, it is capable of extracting event arguments for roles not seen at training time (i.e., in a zero-shot learning setting). 2020.emnlp-main.49 @@ -673,8 +673,8 @@ KyunghyunCho HengJi JonathanMay - NathanaelChambers - ClareVoss + NathanaelChambers + ClareVoss 684–695 Event schemas can guide our understanding and ability to make predictions with respect to what might happen next. We propose a new Event Graph Schema, where two event types are connected through multiple paths involving entities that fill important roles in a coherent story. We then introduce Path Language Model, an auto-regressive language model trained on event-event paths, and select salient and coherent paths to probabilistically construct these graph schemas. We design two evaluation metrics, instance coverage and instance coherence, to evaluate the quality of graph schema induction, by checking when coherent event instances are covered by the schema graph. Intrinsic evaluations show that our approach is highly effective at inducing salient and coherent schemas. Extrinsic evaluations show the induced schema repository provides significant improvement to downstream end-to-end Information Extraction over a state-of-the-art joint neural extraction model, when used as additional global features to unfold instance graphs. 2020.emnlp-main.50 @@ -909,9 +909,9 @@ <fixed-case>R</fixed-case>i<fixed-case>SAWOZ</fixed-case>: A Large-Scale Multi-Domain <fixed-case>W</fixed-case>izard-of-<fixed-case>O</fixed-case>z Dataset with Rich Semantic Annotations for Task-Oriented Dialogue Modeling JunQuan ShianZhang - QianCao + QianCao ZizhongLi - DeyiXiong + DeyiXiong 930–940 In order to alleviate the shortage of multi-domain data and to capture discourse phenomena for task-oriented dialogue modeling, we propose RiSAWOZ, a large-scale multi-domain Chinese Wizard-of-Oz dataset with Rich Semantic Annotations. RiSAWOZ contains 11.2K human-to-human (H2H) multi-turn semantically annotated dialogues, with more than 150K utterances spanning over 12 domains, which is larger than all previous annotated H2H conversational datasets. Both single- and multi-domain dialogues are constructed, accounting for 65% and 35%, respectively. Each dialogue is labeled with comprehensive dialogue annotations, including dialogue goal in the form of natural language description, domain, dialogue states and acts at both the user and system side. In addition to traditional dialogue annotations, we especially provide linguistic annotations on discourse phenomena, e.g., ellipsis and coreference, in dialogues, which are useful for dialogue coreference and ellipsis resolution tasks. Apart from the fully annotated dataset, we also present a detailed description of the data collection procedure, statistics and analysis of the dataset. A series of benchmark models and results are reported, including natural language understanding (intent detection & slot filling), dialogue state tracking and dialogue context-to-text generation, as well as coreference and ellipsis resolution, which facilitate the baseline comparison for future research on this corpus. 2020.emnlp-main.67 @@ -946,7 +946,7 @@ Predicting Reference: What do Language Models Learn about Discourse Models? ShivaUpadhye LeonBergen - AndrewKehler + AndrewKehler 977–982 Whereas there is a growing literature that probes neural language models to assess the degree to which they have latently acquired grammatical knowledge, little if any research has investigated their acquisition of discourse modeling ability. We address this question by drawing on a rich psycholinguistic literature that has established how different contexts affect referential biases concerning who is likely to be referred to next. The results reveal that, for the most part, the prediction behavior of neural language models does not resemble that of human language users. 2020.emnlp-main.70 @@ -1013,8 +1013,8 @@ Multi-task Learning for Multilingual Neural Machine Translation YirenWang - ChengXiangZhai - HanyHassan + ChengXiangZhai + HanyHassan 1022–1034 While monolingual data has been shown to be useful in improving bilingual neural machine translation (NMT), effectively and efficiently leveraging monolingual data for Multilingual NMT (MNMT) systems is a less explored area. In this work, we propose a multi-task learning (MTL) framework that jointly trains the model with the translation task on bitext data and two denoising tasks on the monolingual data. We conduct extensive empirical studies on MNMT systems with 10 language pairs from WMT datasets. We show that the proposed approach can effectively improve the translation quality for both high-resource and low-resource languages with large margin, achieving significantly better results than the individual bilingual models. We also demonstrate the efficacy of the proposed approach in the zero-shot setup for language pairs without bitext training data. Furthermore, we show the effectiveness of MTL over pre-training approaches for both NMT and cross-lingual transfer learning NLU tasks; the proposed approach outperforms massive scale models trained on single task. 2020.emnlp-main.75 @@ -1067,7 +1067,7 @@ Incorporating a Local Translation Mechanism into Non-autoregressive Translation XiangKong ZhisongZhang - EduardHovy + EduardHovy 1067–1073 In this work, we introduce a novel local autoregressive translation (LAT) mechanism into non-autoregressive translation (NAT) models so as to capture local dependencies among target outputs. Specifically, for each target decoding position, instead of only one token, we predict a short sequence of tokens in an autoregressive way. We further design an efficient merging algorithm to align and merge the output pieces into one final output sequence. We integrate LAT into the conditional masked language model (CMLM) (Ghazvininejad et al.,2019) and similarly adopt iterative decoding. Empirical results on five translation tasks show that compared with CMLM, our method achieves comparable or better performance with fewer decoding iterations, bringing a 2.5x speedup. Further analysis indicates that our method reduces repeated translations and performs better at longer sentences. Our code will be released to the public. 2020.emnlp-main.79 @@ -1205,7 +1205,7 @@ <fixed-case>ToTTo</fixed-case>: A Controlled Table-To-Text Generation Dataset - AnkurParikh + AnkurParikh XuezhiWang SebastianGehrmann ManaalFaruqui @@ -1265,7 +1265,7 @@ Reading Between the Lines: Exploring Infilling in Visual Narratives Khyathi RaghaviChandu Ruo-PingDong - Alan WBlack + Alan WBlack 1220–1229 Generating long form narratives such as stories and procedures from multiple modalities has been a long standing dream for artificial intelligence. In this regard, there is often crucial subtext that is derived from the surrounding contexts. The general seq2seq training methods render the models shorthanded while attempting to bridge the gap between these neighbouring contexts. In this paper, we tackle this problem by using infilling techniques involving prediction of missing steps in a narrative while generating textual descriptions from a sequence of images. We also present a new large scale visual procedure telling (ViPT) dataset with a total of 46,200 procedures and around 340k pairwise images and textual descriptions that is rich in such contextual dependencies. Generating steps using infilling technique demonstrates the effectiveness in visual procedures with more coherent texts. We conclusively show a METEOR score of 27.51 on procedures which is higher than the state-of-the-art on visual storytelling. We also demonstrate the effects of interposing new text with missing images during inference. The code and the dataset will be publicly available at https://visual-narratives.github.io/Visual-Narratives/. 2020.emnlp-main.93 @@ -1276,7 +1276,7 @@ Acrostic Poem Generation RajatAgarwal - KatharinaKann + KatharinaKann 1230–1240 We propose a new task in the area of computational creativity: acrostic poem generation in English. Acrostic poems are poems that contain a hidden message; typically, the first letter of each line spells out a word or short phrase. We define the task as a generation task with multiple constraints: given an input word, 1) the initial letters of each line should spell out the provided word, 2) the poem’s semantics should also relate to it, and 3) the poem should conform to a rhyming scheme. We further provide a baseline model for the task, which consists of a conditional neural language model in combination with a neural rhyming model. Since no dedicated datasets for acrostic poem generation exist, we create training data for our task by first training a separate topic prediction model on a small set of topic-annotated poems and then predicting topics for additional poems. Our experiments show that the acrostic poems generated by our baseline are received well by humans and do not lose much quality due to the additional constraints. Last, we confirm that poems generated by our model are indeed closely related to the provided prompts, and that pretraining on Wikipedia can boost performance. 2020.emnlp-main.94 @@ -1302,7 +1302,7 @@ Grounded Compositional Outputs for Adaptive Language Modeling NikolaosPappas PhoebeMulcaire - Noah A.Smith + Noah A.Smith 1252–1267 Language models have emerged as a central component across NLP, and a great deal of progress depends on the ability to cheaply adapt them (e.g., through finetuning) to new domains and tasks. A language model’s vocabulary—typically selected before training and permanently fixed later—affects its size and is part of what makes it resistant to such adaptation. Prior work has used compositional input embeddings based on surface forms to ameliorate this issue. In this work, we go one step beyond and propose a fully compositional output embedding layer for language models, which is further grounded in information from a structured lexicon (WordNet), namely semantically related words and free-text definitions. To our knowledge, the result is the first word-level language model with a size that does not depend on the training vocabulary. We evaluate the model on conventional language modeling as well as challenging cross-domain settings with an open vocabulary, finding that it matches or outperforms previous state-of-the-art output embedding methods and adaptation approaches. Our analysis attributes the improvements to sample efficiency: our model is more accurate for low-frequency words. 2020.emnlp-main.96 @@ -1339,7 +1339,7 @@ Scalable Multi-Hop Relational Reasoning for Knowledge-Aware Question Answering YanlinFeng XinyueChen - Bill YuchenLin + Bill YuchenLin PeifengWang JunYan XiangRen @@ -1355,7 +1355,7 @@ Improving Bilingual Lexicon Induction for Low Frequency Words JiajiHuang XingyuCai - KennethChurch + KennethChurch 1310–1314 This paper designs a Monolingual Lexicon Induction task and observes that two factors accompany the degraded accuracy of bilingual lexicon induction for rare words. First, a diminishing margin between similarities in low frequency regime, and secondly, exacerbated hubness at low frequency. Based on the observation, we further propose two methods to address these two factors, respectively. The larger issue is hubness. Addressing that improves induction accuracy significantly, especially for low-frequency words. 2020.emnlp-main.100 @@ -1382,7 +1382,7 @@ YuchenZhuang JieLyu TuoZhao - ChaoZhang + ChaoZhang 1326–1340 Fine-tuned pre-trained language models can suffer from severe miscalibration for both in-distribution and out-of-distribution (OOD) data due to over-parameterization. To mitigate this issue, we propose a regularized fine-tuning method. Our method introduces two types of regularization for better calibration: (1) On-manifold regularization, which generates pseudo on-manifold samples through interpolation within the data manifold. Augmented training with these pseudo samples imposes a smoothness regularization to improve in-distribution calibration. (2) Off-manifold regularization, which encourages the model to output uniform distributions for pseudo off-manifold samples to address the over-confidence issue for OOD data. Our experiments demonstrate that the proposed method outperforms existing calibration methods for text classification in terms of expectation calibration error, misclassification detection, and OOD detection on six datasets. Our code can be found at https://github.com/Lingkai-Kong/Calibrated-BERT-Fine-Tuning. 2020.emnlp-main.102 @@ -1393,7 +1393,7 @@ Scaling Hidden <fixed-case>M</fixed-case>arkov Language Models JustinChiu - AlexanderRush + AlexanderRush 1341–1349 The hidden Markov model (HMM) is a fundamental tool for sequence modeling that cleanly separates the hidden state from the emission structure. However, this separation makes it difficult to fit HMMs to large datasets in modern NLP, and they have fallen out of use due to very poor performance compared to fully observed models. This work revisits the challenge of scaling HMMs to language modeling datasets, taking ideas from recent approaches to neural modeling. We propose methods for scaling HMMs to massive state spaces while maintaining efficient exact inference, a compact parameterization, and effective regularization. Experiments show that this approach leads to models that are much more accurate than previous HMMs and n-gram-based methods, making progress towards the performance of state-of-the-art NN models. 2020.emnlp-main.103 @@ -1405,7 +1405,7 @@ Coding Textual Inputs Boosts the Accuracy of Neural Networks Abdul RafaeKhan JiaXu - WeiweiSun + WeiweiSun 1350–1360 Natural Language Processing (NLP) tasks are usually performed word by word on textual inputs. We can use arbitrary symbols to represent the linguistic meaning of a word and use these symbols as inputs. As “alternatives” to a text representation, we introduce Soundex, MetaPhone, NYSIIS, logogram to NLP, and develop fixed-output-length coding and its extension using Huffman coding. Each of those codings combines different character/digital sequences and constructs a new vocabulary based on codewords. We find that the integration of those codewords with text provides more reliable inputs to Neural-Network-based NLP systems through redundancy than text-alone inputs. Experiments demonstrate that our approach outperforms the state-of-the-art models on the application of machine translation, language modeling, and part-of-speech tagging. The source code is available at https://github.com/abdulrafae/coding_nmt. 2020.emnlp-main.104 @@ -1418,7 +1418,7 @@ OrionWeller NicholasLourie MattGardner - Matthew E.Peters + Matthew E.Peters 1361–1375 Typically, machine learning systems solve new tasks by training on thousands of examples. In contrast, humans can solve new tasks by reading some instructions, with perhaps an example or two. To take a step toward closing this gap, we introduce a framework for developing NLP systems that solve new tasks after reading their descriptions, synthesizing prior work in this area. We instantiate this frame- work with a new English language dataset, ZEST, structured for task-oriented evaluation on unseen tasks. Formulating task descriptions as questions, we ensure each is general enough to apply to many possible inputs, thus comprehensively evaluating a model’s ability to solve each task. Moreover, the dataset’s structure tests specific types of systematic generalization. We find that the state-of-the-art T5 model achieves a score of 12% on ZEST, leaving a significant challenge for NLP researchers. 2020.emnlp-main.105 @@ -1563,7 +1563,7 @@ YiningWang LuXiang YuZhou - ChengqingZong + ChengqingZong 1490–1499 Medical entity normalization, which links medical mentions in the text to entities in knowledge bases, is an important research topic in medical natural language processing. In this paper, we focus on Chinese medical procedure entity normalization. However, nonstandard Chinese expressions and combined procedures present challenges in our problem. The existing strategies relying on the discriminative model are poorly to cope with normalizing combined procedure mentions. We propose a sequence generative framework to directly generate all the corresponding medical procedure entities. we adopt two strategies: category-based constraint decoding and category-based model refining to avoid unrealistic results. The method is capable of linking entities when a mention contains multiple procedure concepts and our comprehensive experiments demonstrate that the proposed model can achieve remarkable improvements over existing baselines, particularly significant in the case of multi-implication Chinese medical procedures. 2020.emnlp-main.116 @@ -1577,7 +1577,7 @@ SaahilJain PranavRajpurkar AnujPareek - AndrewNg + AndrewNg MatthewLungren 1500–1519 The extraction of labels from radiology text reports enables large-scale training of medical imaging models. Existing approaches to report labeling typically rely either on sophisticated feature engineering based on medical domain knowledge or manual annotations by experts. In this work, we introduce a BERT-based approach to medical image report labeling that exploits both the scale of available rule-based systems and the quality of expert annotations. We demonstrate superior performance of a biomedically pretrained BERT model first trained on annotations of a rule-based labeler and then finetuned on a small set of expert annotations augmented with automated backtranslation. We find that our final model, CheXbert, is able to outperform the previous best rules-based labeler with statistical significance, setting a new SOTA for report labeling on one of the largest datasets of chest x-rays. @@ -1621,7 +1621,7 @@ HaejunLee Drew A.Hudson KangwookLee - Christopher D.Manning + Christopher D.Manning 1551–1562 We introduce Sentence-level Language Modeling, a new pre-training objective for learning a discourse language representation in a fully self-supervised manner. Recent pre-training methods in NLP focus on learning either bottom or top-level language representations: contextualized word representations derived from language model objectives at one extreme and a whole sequence representation learned by order classification of two given textual segments at the other. However, these models are not directly encouraged to capture representations of intermediate-size structures that exist in natural languages such as sentences and the relationships among them. To that end, we propose a new approach to encourage learning of a contextualized sentence-level representation by shuffling the sequence of input sentences and training a hierarchical transformer model to reconstruct the original ordering. Through experiments on downstream tasks such as GLUE, SQuAD, and DiscoEval, we show that this feature of our model improves the performance of the original BERT by large margins. 2020.emnlp-main.120 @@ -1679,7 +1679,7 @@ Compositional Phrase Alignment and Beyond YukiArase - Jun’ichiTsujii + Jun’ichiTsujii 1611–1623 Phrase alignment is the basis for modelling sentence pair interactions, such as paraphrase and textual entailment recognition. Most phrase alignments are compositional processes such that an alignment of a phrase pair is constructed based on the alignments of their child phrases. Nonetheless, studies have revealed that non-compositional alignments involving long-distance phrase reordering are prevalent in practice. We address the phrase alignment problem by combining an unordered tree mapping algorithm and phrase representation modelling that explicitly embeds the similarity distribution in the sentences onto powerful contextualized representations. Experimental results demonstrate that our method effectively handles compositional and non-compositional global phrase alignments. Our method significantly outperforms that used in a previous study and achieves a performance competitive with that of experienced human annotators. 2020.emnlp-main.125 @@ -1706,7 +1706,7 @@ Double Graph Based Reasoning for Document-level Relation Extraction ShuangZeng RunxinXu - BaobaoChang + BaobaoChang LeiLi 1630–1640 Document-level relation extraction aims to extract relations among entities within a document. Different from sentence-level relation extraction, it requires reasoning over multiple sentences across paragraphs. In this paper, we propose Graph Aggregation-and-Inference Network (GAIN), a method to recognize such relations for long paragraphs. GAIN constructs two graphs, a heterogeneous mention-level graph (MG) and an entity-level graph (EG). The former captures complex interaction among different mentions and the latter aggregates mentions underlying for the same entities. Based on the graphs we propose a novel path reasoning mechanism to infer relations between entities. Experiments on the public dataset, DocRED, show GAIN achieves a significant performance improvement (2.85 on F1) over the previous state-of-the-art. Our code is available at https://github.com/PKUnlp-icler/GAIN. @@ -1805,7 +1805,7 @@ Beyond [<fixed-case>CLS</fixed-case>] through Ranking by Generation - CiceroNogueira dos Santos + CiceroNogueira dos Santos XiaofeiMa RameshNallapati ZhihengHuang @@ -1821,7 +1821,7 @@ Tired of Topic Models? Clusters of Pretrained Word Embeddings Make for Fast and Good Topics too! SuzannaSia AyushDalmia - Sabrina J.Mielke + Sabrina J.Mielke 1728–1736 Topic models are a useful analysis tool to uncover the underlying themes within document collections. The dominant approach is to use probabilistic topic models that posit a generative story, but in this paper we propose an alternative way to obtain topics: clustering pre-trained word embeddings while incorporating document information for weighted clustering and reranking top words. We provide benchmarks for the combination of different word embeddings and clustering algorithms, and analyse their performance under dimensionality reduction with PCA. The best performing combination for our approach performs as well as classical topic models, but with lower runtime and computational complexity. 2020.emnlp-main.135 @@ -1884,7 +1884,7 @@ Incorporating Multimodal Information in Open-Domain Web Keyphrase Extraction YansenWang ZhenFan - CarolynRose + CarolynRose 1790–1800 Open-domain Keyphrase extraction (KPE) on the Web is a fundamental yet complex NLP task with a wide range of practical applications within the field of Information Retrieval. In contrast to other document types, web page designs are intended for easy navigation and information finding. Effective designs encode within the layout and formatting signals that point to where the important information can be found. In this work, we propose a modeling approach that leverages these multi-modal signals to aid in the KPE task. In particular, we leverage both lexical and visual features (e.g., size, font, position) at the micro-level to enable effective strategy induction and meta-level features that describe pages at a macro-level to aid in strategy selection. Our evaluation demonstrates that a combination of effective strategy induction and strategy selection within this approach for the KPE task outperforms state-of-the-art models. A qualitative post-hoc analysis illustrates how these features function within the model. 2020.emnlp-main.140 @@ -1953,7 +1953,7 @@ <fixed-case>B</fixed-case>i<fixed-case>ST</fixed-case>: Bi-directional Spatio-Temporal Reasoning for Video-Grounded Dialogues HungLe DoyenSahoo - NancyChen + NancyChen Steven C.H.Hoi 1846–1859 Video-grounded dialogues are very challenging due to (i) the complexity of videos which contain both spatial and temporal variations, and (ii) the complexity of user utterances which query different segments and/or different objects in videos over multiple dialogue turns. However, existing approaches to video-grounded dialogues often focus on superficial temporal-level visual cues, but neglect more fine-grained spatial signals from videos. To address this drawback, we proposed Bi-directional Spatio-Temporal Learning (BiST), a vision-language neural framework for high-resolution queries in videos based on textual cues. Specifically, our approach not only exploits both spatial and temporal-level information, but also learns dynamic information diffusion between the two feature spaces through spatial-to-temporal and temporal-to-spatial reasoning. The bidirectional strategy aims to tackle the evolving semantics of user queries in the dialogue setting. The retrieved visual cues are used as contextual information to construct relevant responses to the users. Our empirical results and comprehensive qualitative analysis show that BiST achieves competitive performance and generates reasonable responses on a large-scale AVSD benchmark. We also adapt our BiST models to the Video QA setting, and substantially outperform prior approaches on the TGIF-QA benchmark. @@ -1967,7 +1967,7 @@ HungLe DoyenSahoo ChenghaoLiu - NancyChen + NancyChen Steven C.H.Hoi 1860–1877 Building an end-to-end conversational agent for multi-domain task-oriented dialogues has been an open challenge for two main reasons. First, tracking dialogue states of multiple domains is non-trivial as the dialogue agent must obtain complete states from all relevant domains, some of which might have shared slots among domains as well as unique slots specifically for one domain only. Second, the dialogue agent must also process various types of information across domains, including dialogue context, dialogue states, and database, to generate natural responses to users. Unlike the existing approaches that are often designed to train each module separately, we propose “UniConv” - a novel unified neural architecture for end-to-end conversational systems in multi-domain task-oriented dialogues, which is designed to jointly train (i) a Bi-level State Tracker which tracks dialogue states by learning signals at both slot and domain level independently, and (ii) a Joint Dialogue Act and Response Generator which incorporates information from various input components and models dialogue acts and target responses simultaneously. We conduct comprehensive experiments in dialogue state tracking, context-to-text, and end-to-end settings on the MultiWOZ2.1 benchmark, achieving superior performance over competitive baselines. @@ -1997,7 +1997,7 @@ FengShi TaoYuan ZhouYu - Song-ChunZhu + Song-ChunZhu 1889–1899 Inducing a meaningful structural representation from one or a set of dialogues is a crucial but challenging task in computational linguistics. Advancement made in this area is critical for dialogue system design and discourse analysis. It can also be extended to solve grammatical inference. In this work, we propose to incorporate structured attention layers into a Variational Recurrent Neural Network (VRNN) model with discrete latent states to learn dialogue structure in an unsupervised fashion. Compared to a vanilla VRNN, structured attention enables a model to focus on different parts of the source sentence embeddings while enforcing a structural inductive bias. Experiments show that on two-party dialogue datasets, VRNN with structured attention learns semantic structures that are similar to templates used to generate this dialogue corpus. While on multi-party dialogue datasets, our model learns an interactive structure demonstrating its capability of distinguishing speakers or addresses, automatically disentangling dialogues without explicit human annotation. 2020.emnlp-main.148 @@ -2014,7 +2014,7 @@ XiaozhongLiu ChanglongSun ConghuiZhu - TiejunZhao + TiejunZhao 1900–1910 In the past few years, audiences from different fields witness the achievements of sequence-to-sequence models (e.g., LSTM+attention, Pointer Generator Networks and Transformer) to enhance dialogue content generation. While content fluency and accuracy often serve as the major indicators for model training, dialogue logics, carrying critical information for some particular domains, are often ignored. Take customer service and court debate dialogue as examples, compatible logics can be observed across different dialogue instances, and this information can provide vital evidence for utterance generation. In this paper, we propose a novel network architecture - Cross Copy Networks (CCN) to explore the current dialog context and similar dialogue instances’ logical structure simultaneously. Experiments with two tasks, court debate and customer service content generation, proved that the proposed algorithm is superior to existing state-of-art content generation models. 2020.emnlp-main.149 @@ -2027,7 +2027,7 @@ QiJia YizhuLiu SiyuRen - KennyZhu + KennyZhu HaifengTang 1911–1920 Multi-turn response selection is a task designed for developing dialogue agents. The performance on this task has a remarkable improvement with pre-trained language models. However, these models simply concatenate the turns in dialogue history as the input and largely ignore the dependencies between the turns. In this paper, we propose a dialogue extraction algorithm to transform a dialogue history into threads based on their dependency relations. Each thread can be regarded as a self-contained sub-dialogue. We also propose Thread-Encoder model to encode threads and candidates into compact representations by pre-trained Transformers and finally get the matching score through an attention layer. The experiments show that dependency relations are helpful for dialogue context understanding, and our model outperforms the state-of-the-art baselines on both DSTC7 and DSTC8*, with competitive results on UbuntuV2. @@ -2068,7 +2068,7 @@ MandarJoshi JohnThickstun HannanehHajishirzi - LukeZettlemoyer + LukeZettlemoyer 1938–1952 Decisions of complex models for language understanding can be explained by limiting the inputs they are provided to a relevant subsequence of the original text — a rationale. Models that condition predictions on a concise rationale, while being more interpretable, tend to be less accurate than models that are able to use the entire context. In this paper, we show that it is possible to better manage the trade-off between concise explanations and high task accuracy by optimizing a bound on the Information Bottleneck (IB) objective. Our approach jointly learns an explainer that predicts sparse binary masks over input sentences without explicit supervision, and an end-task predictor that considers only the residual sentences. Using IB, we derive a learning objective that allows direct control of mask sparsity levels through a tunable sparse prior. Experiments on the ERASER benchmark demonstrate significant gains over previous work for both task performance and agreement with human rationales. Furthermore, we find that in the semi-supervised setting, a modest amount of gold rationales (25% of training examples with gold masks) can close the performance gap with a model that uses the full input. 2020.emnlp-main.153 @@ -2081,7 +2081,7 @@ NikitaNangia ClaraVania RasikaBhalerao - Samuel R.Bowman + Samuel R.Bowman 1953–1967 Pretrained language models, especially masked language models (MLMs) have seen success across many NLP tasks. However, there is ample evidence that they use the cultural biases that are undoubtedly present in the corpora they are trained on, implicitly creating harm with biased representations. To measure some forms of social bias in language models against protected demographic groups in the US, we introduce the Crowdsourced Stereotype Pairs benchmark (CrowS-Pairs). CrowS-Pairs has 1508 examples that cover stereotypes dealing with nine types of bias, like race, religion, and age. In CrowS-Pairs a model is presented with two sentences: one that is more stereotyping and another that is less stereotyping. The data focuses on stereotypes about historically disadvantaged groups and contrasts them with advantaged groups. We find that all three of the widely-used MLMs we evaluate substantially favor sentences that express stereotypes in every category in CrowS-Pairs. As work on building less biased models advances, this dataset can be used as a benchmark to evaluate progress. 2020.emnlp-main.154 @@ -2107,7 +2107,7 @@ MichaelHahn SuryaGanguli PercyLiang - Christopher D.Manning + Christopher D.Manning 1978–2010 Recurrent neural networks empirically generate natural language with high syntactic fidelity. However, their success is not well-understood theoretically. We provide theoretical insight into this success, proving in a finite-precision setting that RNNs can efficiently generate bounded hierarchical languages that reflect the scaffolding of natural language syntax. We introduce Dyck-(k,m), the language of well-nested brackets (of k types) and m-bounded nesting depth, reflecting the bounded memory needs and long-distance dependencies of natural language syntax. The best known results use O(k^{\frac{m}{2}}) memory (hidden units) to generate these languages. We prove that an RNN with O(m \log k) hidden units suffices, an exponential reduction in memory, by an explicit construction. Finally, we show that no algorithm, even with unbounded computation, can suffice with o(m \log k) hidden units. 2020.emnlp-main.156 @@ -2266,7 +2266,7 @@ Retrofitting Structure-aware Transformer Language Model for End Tasks HaoFei YafengRen - DonghongJi + DonghongJi 2151–2161 We consider retrofitting structure-aware Transformer language model for facilitating end tasks by proposing to exploit syntactic distance to encode both the phrasal constituency and dependency connection into the language model. A middle-layer structural learning strategy is leveraged for structure integration, accomplished with main semantic task training under multi-task learning scheme. Experimental results show that the retrofitted structure-aware Transformer language model achieves improved perplexity, meanwhile inducing accurate syntactic phrases. By performing structure-aware fine-tuning, our model achieves significant improvements for both semantic- and syntactic-dependent tasks. 2020.emnlp-main.168 @@ -2280,7 +2280,7 @@ ZhijiangGuo ZhiyangTeng WeiLu - Shay B.Cohen + Shay B.Cohen ZuozhuLiu LidongBing 2162–2172 @@ -2307,7 +2307,7 @@ Understanding the Mechanics of <fixed-case>SPIGOT</fixed-case>: Surrogate Gradients for Latent Structure Learning TsvetomilaMihaylova VladNiculae - André F. T.Martins + André F. T.Martins 2186–2202 Latent structure models are a powerful tool for modeling language data: they can mitigate the error propagation and annotation bottleneck in pipeline systems, while simultaneously uncovering linguistic insights about the data. One challenge with end-to-end training of these models is the argmax operation, which has null gradient. In this paper, we focus on surrogate gradients, a popular strategy to deal with this problem. We explore latent structure learning through the angle of pulling back the downstream learning objective. In this paradigm, we discover a principled motivation for both the straight-through estimator (STE) as well as the recently-proposed SPIGOT – a variant of STE for structured models. Our perspective leads to new algorithms in the same family. We empirically compare the known and the novel pulled-back estimators against the popular alternatives, yielding new insight for practitioners and revealing intriguing failure cases. 2020.emnlp-main.171 @@ -2347,7 +2347,7 @@ TaoLin FeiMi MartinJaggi - HinrichSchütze + HinrichSchütze 2226–2241 We present an efficient method of utilizing pretrained language models, where we learn selective binary masks for pretrained weights in lieu of modifying them through finetuning. Extensive evaluations of masking BERT, RoBERTa, and DistilBERT on eleven diverse NLP tasks show that our masking scheme yields performance comparable to finetuning, yet has a much smaller memory footprint when several tasks need to be inferred. Intrinsic evaluations show that representations computed by our binary masked language models encode information necessary for solving downstream tasks. Analyzing the loss landscape, we show that masking and finetuning produce models that reside in minima that can be connected by a line segment with nearly constant test accuracy. This confirms that masking can be utilized as an efficient alternative to finetuning. 2020.emnlp-main.174 @@ -2360,7 +2360,7 @@ XiaomianKang YangZhao JiajunZhang - ChengqingZong + ChengqingZong 2242–2254 Document-level neural machine translation has yielded attractive improvements. However, majority of existing methods roughly use all context sentences in a fixed scope. They neglect the fact that different source sentences need different sizes of context. To address this problem, we propose an effective approach to select dynamic context so that the document-level translation model can utilize the more useful selected context sentences to produce better translations. Specifically, we introduce a selection module that is independent of the translation module to score each candidate context sentence. Then, we propose two strategies to explicitly select a variable number of context sentences and feed them into the translation module. We train the two modules end-to-end via reinforcement learning. A novel reward is proposed to encourage the selection and utilization of dynamic context sentences. Experiments demonstrate that our approach can select adaptive context sentences for different source sentences, and significantly improves the performance of document-level translation methods. 2020.emnlp-main.175 @@ -2386,7 +2386,7 @@ Pronoun-Targeted Fine-tuning for <fixed-case>NMT</fixed-case> with Hybrid Losses PrathyushaJwalapuram - ShafiqJoty + ShafiqJoty YoulinShen 2267–2279 Popular Neural Machine Translation model training uses strategies like backtranslation to improve BLEU scores, requiring large amounts of additional data and training. We introduce a class of conditional generative-discriminative hybrid losses that we use to fine-tune a trained machine translation model. Through a combination of targeted fine-tuning objectives and intuitive re-use of the training data the model has failed to adequately learn from, we improve the model performance of both a sentence-level and a contextual model without using any additional data. We target the improvement of pronoun translations through our fine-tuning and evaluate our models on a pronoun benchmark testset. Our sentence-level model shows a 0.5 BLEU improvement on both the WMT14 and the IWSLT13 De-En testsets, while our contextual model achieves the best results, improving from 31.81 to 32 BLEU on WMT14 De-En testset, and from 32.10 to 33.13 on the IWSLT13 De-En testset, with corresponding improvements in pronoun translation. We further show the generalizability of our method by reproducing the improvements on two additional language pairs, Fr-En and Cs-En. @@ -2431,7 +2431,7 @@ AhmetÜstün AriannaBisazza GosseBouma - Gertjanvan Noord + Gertjanvan Noord 2302–2315 Recent advances in multilingual dependency parsing have brought the idea of a truly universal parser closer to reality. However, cross-language interference and restrained model capacity remain major obstacles. To address this, we propose a novel multilingual task adaptation approach based on contextual parameter generation and adapter modules. This approach enables to learn adapters via language embeddings while sharing model parameters across languages. It also allows for an easy but effective integration of existing linguistic typology features into the parsing network. The resulting parser, UDapter, outperforms strong monolingual and multilingual baselines on the majority of both high-resource and low-resource (zero-shot) languages, showing the success of the proposed adaptation approach. Our in-depth analyses show that soft parameter sharing via typological features is key to this success. 2020.emnlp-main.180 @@ -2447,7 +2447,7 @@ ZhengyanLi ZichuFei YeyunGong - XuanjingHuang + XuanjingHuang 2316–2326 Conditional random fields (CRF) for label decoding has become ubiquitous in sequence labeling tasks. However, the local label dependencies and inefficient Viterbi decoding have always been a problem to be solved. In this work, we introduce a novel two-stage label decoding framework to model long-term label dependencies, while being much more computationally efficient. A base model first predicts draft labels, and then a novel two-stream self-attention model makes refinements on these draft predictions based on long-range label dependencies, which can achieve parallel decoding for a faster prediction. In addition, in order to mitigate the side effects of incorrect draft labels, Bayesian neural networks are used to indicate the labels with a high probability of being wrong, which can greatly assist in preventing error propagation. The experimental results on three sequence labeling benchmarks demonstrated that the proposed method not only outperformed the CRF-based methods but also greatly accelerated the inference process. 2020.emnlp-main.181 @@ -2470,7 +2470,7 @@ Position-Aware Tagging for Aspect Sentiment Triplet Extraction - LuXu + LuXu HaoLi WeiLu LidongBing @@ -2487,7 +2487,7 @@ OzanCaglayan JuliaIve VenetaHaralampieva - PranavaMadhyastha + PranavaMadhyastha LoïcBarrault LuciaSpecia 2350–2361 @@ -2527,7 +2527,7 @@ Bridging Linguistic Typology and Multilingual Machine Translation with Multi-View Language Representations - ArturoOncevay + ArturoOncevay BarryHaddow AlexandraBirch 2391–2406 @@ -2554,7 +2554,7 @@ YeonSeonwoo Ji-HoonKim Jung-WooHa - AliceOh + AliceOh 2418–2428 Extractive QA models have shown very promising performance in predicting the correct answer to a question for a given passage. However, they sometimes result in predicting the correct answer text but in a context irrelevant to the given question. This discrepancy becomes especially important as the number of occurrences of the answer text in a passage increases. To resolve this issue, we propose BLANC (BLock AttentioN for Context prediction) based on two main ideas: context prediction as an auxiliary task in multi-task learning manner, and a block attention method that learns the context prediction task. With experiments on reading comprehension, we show that BLANC outperforms the state-of-the-art QA models, and the performance gap increases as the number of answer text occurrences increases. We also conduct an experiment of training the models using SQuAD and predicting the supporting facts on HotpotQA and show that BLANC outperforms all baseline models in this zero-shot setting. 2020.emnlp-main.189 @@ -2578,7 +2578,7 @@ YifanGao Chien-ShengWu JingjingLi - ShafiqJoty + ShafiqJoty Steven C.H.Hoi CaimingXiong IrwinKing @@ -2646,10 +2646,10 @@ Improving <fixed-case>AMR</fixed-case> Parsing with Sequence-to-Sequence Pre-training DongqinXu - JunhuiLi + JunhuiLi MuhuaZhu MinZhang - GuodongZhou + GuodongZhou 2501–2511 In the literature, the research on abstract meaning representation (AMR) parsing is much restricted by the size of human-curated dataset which is critical to build an AMR parser with good performance. To alleviate such data size restriction, pre-trained models have been drawing more and more attention in AMR parsing. However, previous pre-trained models, like BERT, are implemented for general purpose which may not work as expected for the specific task of AMR parsing. In this paper, we focus on sequence-to-sequence (seq2seq) AMR parsing and propose a seq2seq pre-training approach to build pre-trained models in both single and joint way on three relevant tasks, i.e., machine translation, syntactic parsing, and AMR parsing itself. Moreover, we extend the vanilla fine-tuning method to a multi-task learning fine-tuning method that optimizes for the performance of AMR parsing while endeavors to preserve the response of pre-trained models. Extensive experimental results on two English benchmark datasets show that both the single and joint pre-trained models significantly improve the performance (e.g., from 71.5 to 80.2 on AMR 2.0), which reaches the state of the art. The result is very encouraging since we achieve this with seq2seq models rather than complex models. We make our code and model available at https://github.com/xdqkid/S2S-AMR-Parser. 2020.emnlp-main.196 @@ -2674,7 +2674,7 @@ SungjoonPark KiwoongPark JaimeenAhn - AliceOh + AliceOh 2523–2531 We analyze social media for detecting the suicidal risk of military personnel, which is especially crucial for countries with compulsory military service such as the Republic of Korea. From a widely-used Korean social Q&A site, we collect posts containing military-relevant content written by active-duty military personnel. We then annotate the posts with two groups of experts: military experts and mental health experts. Our dataset includes 2,791 posts with 13,955 corresponding expert annotations of suicidal risk levels, and this dataset is available to researchers who consent to research ethics agreement. Using various fine-tuned state-of-the-art language models, we predict the level of suicide risk, reaching .88 F1 score for classifying the risks. 2020.emnlp-main.198 @@ -2720,7 +2720,7 @@ Self-Induced Curriculum Learning in Self-Supervised Neural Machine Translation DanaRuiter - Josefvan Genabith + Josefvan Genabith CristinaEspaña-Bonet 2560–2571 Self-supervised neural machine translation (SSNMT) jointly learns to identify and select suitable training data from comparable (rather than parallel) corpora and to translate, in a way that the two tasks support each other in a virtuous circle. In this study, we provide an in-depth analysis of the sampling choices the SSNMT model makes during training. We show how, without it having been told to do so, the model self-selects samples of increasing (i) complexity and (ii) task-relevance in combination with (iii) performing a denoising curriculum. We observe that the dynamics of the mutual-supervision signals of both system internal representation types are vital for the extraction and translation performance. We show that in terms of the Gunning-Fog Readability index, SSNMT starts extracting and learning from Wikipedia data suitable for high school students and quickly moves towards content suitable for first year undergraduate students. @@ -2732,7 +2732,7 @@ Towards Reasonably-Sized Character-Level Transformer <fixed-case>NMT</fixed-case> by Finetuning Subword Systems JindřichLibovický - AlexanderFraser + AlexanderFraser 2572–2579 Applying the Transformer architecture on the character level usually requires very deep architectures that are difficult and slow to train. These problems can be partially overcome by incorporating a segmentation into tokens in the model. We show that by initially training a subword model and then finetuning it on characters, we can obtain a neural machine translation model that works at the character level without requiring token segmentation. We use only the vanilla 6-layer Transformer Base architecture. Our character-level models better capture morphological phenomena and show more robustness to noise at the expense of somewhat worse overall translation quality. Our study is a significant step towards high-performance and easy to train character-based models that are not extremely large. 2020.emnlp-main.203 @@ -2744,9 +2744,9 @@ Transfer Learning and Distant Supervision for Multilingual Transformer Models: A Study on <fixed-case>A</fixed-case>frican Languages Michael A.Hedderich - David I.Adelani + David I.Adelani DaweiZhu - JesujobaAlabi + JesujobaAlabi UdiaMarkus DietrichKlakow 2580–2591 @@ -2760,7 +2760,7 @@ Translation Quality Estimation by Jointly Learning to Score and Rank JingyiZhang - Josefvan Genabith + Josefvan Genabith 2592–2598 The translation quality estimation (QE) task, particularly the QE as a Metric task, aims to evaluate the general quality of a translation based on the translation and the source sentence without using reference translations. Supervised learning of this QE task requires human evaluation of translation quality as training data. Human evaluation of translation quality can be performed in different ways, including assigning an absolute score to a translation or ranking different translations. In order to make use of different types of human evaluation data for supervised learning, we present a multi-task learning QE model that jointly learns two tasks: score a translation and rank two translations. Our QE model exploits cross-lingual sentence embeddings from pre-trained multilingual language models. We obtain new state-of-the-art results on the WMT 2019 QE as a Metric task and outperform sentBLEU on the WMT 2019 Metrics task. 2020.emnlp-main.205 @@ -2775,7 +2775,7 @@ Joan AlbertSilvestre-Cerdà PauBaquero-Arnal JorgeCivera Saiz - AlfonsJuan + AlfonsJuan 2599–2611 The cascade approach to Speech Translation (ST) is based on a pipeline that concatenates an Automatic Speech Recognition (ASR) system followed by a Machine Translation (MT) system. These systems are usually connected by a segmenter that splits the ASR output into hopefully, semantically self-contained chunks to be fed into the MT system. This is specially challenging in the case of streaming ST, where latency requirements must also be taken into account. This work proposes novel segmentation models for streaming ST that incorporate not only textual, but also acoustic information to decide when the ASR output is split into a chunk. An extensive and throughly experimental setup is carried out on the Europarl-ST dataset to prove the contribution of acoustic information to the performance of the segmentation model in terms of BLEU score in a streaming ST scenario. Finally, comparative results with previous work also show the superiority of the segmentation models proposed in this work. 2020.emnlp-main.206 @@ -2820,7 +2820,7 @@ MariaBarrett RasmusHvingelby KellieWebster - AndersSøgaard + AndersSøgaard 2637–2648 The one-sided focus on English in previous studies of gender bias in NLP misses out on opportunities in other languages: English challenge datasets such as GAP and WinoGender highlight model preferences that are “hallucinatory”, e.g., disambiguating gender-ambiguous occurrences of ‘doctor’ as male doctors. We show that for languages with type B reflexivization, e.g., Swedish and Russian, we can construct multi-task challenge datasets for detecting gender bias that lead to unambiguously wrong model predictions: In these languages, the direct translation of ‘the doctor removed his mask’ is not ambiguous between a coreferential reading and a disjoint reading. Instead, the coreferential reading requires a non-gendered pronoun, and the gendered, possessive pronouns are anti-reflexive. We present a multilingual, multi-task challenge dataset, which spans four languages and four NLP tasks and focuses only on this phenomenon. We find evidence for gender bias across all task-language combinations and correlate model bias with national labor market statistics. 2020.emnlp-main.209 @@ -2873,7 +2873,7 @@ RicardoRei CraigStewart Ana CFarinha - AlonLavie + AlonLavie 2685–2702 We present COMET, a neural framework for training multilingual machine translation evaluation models which obtains new state-of-the-art levels of correlation with human judgements. Our framework leverages recent breakthroughs in cross-lingual pretrained language modeling resulting in highly multilingual and adaptable MT evaluation models that exploit information from both the source input and a target-language reference translation in order to more accurately predict MT quality. To showcase our framework, we train three models with different types of human judgements: Direct Assessments, Human-mediated Translation Edit Rate and Multidimensional Quality Metric. Our models achieve new state-of-the-art performance on the WMT 2019 Metrics shared task and demonstrate robustness to high-performing systems. 2020.emnlp-main.213 @@ -2885,7 +2885,7 @@ <fixed-case>R</fixed-case>eusing a <fixed-case>P</fixed-case>retrained <fixed-case>L</fixed-case>anguage <fixed-case>M</fixed-case>odel on <fixed-case>L</fixed-case>anguages with <fixed-case>L</fixed-case>imited <fixed-case>C</fixed-case>orpora for <fixed-case>U</fixed-case>nsupervised <fixed-case>NMT</fixed-case> AlexandraChronopoulou DarioStojanovski - AlexanderFraser + AlexanderFraser 2703–2711 Using a language model (LM) pretrained on two languages with large monolingual data in order to initialize an unsupervised neural machine translation (UNMT) system yields state-of-the-art results. When limited data is available for one language, however, this method leads to poor translations. We present an effective approach that reuses an LM that is pretrained only on the high-resource language. The monolingual LM is fine-tuned on both languages and is then used to initialize a UNMT model. To reuse the pretrained LM, we have to modify its predefined vocabulary, to account for the new language. We therefore propose a novel vocabulary extension method. Our approach, RE-LM, outperforms a competitive cross-lingual pretraining model (XLM) in English-Macedonian (En-Mk) and English-Albanian (En-Sq), yielding more than +8.3 BLEU points for all four translation directions. 2020.emnlp-main.214 @@ -2896,9 +2896,9 @@ <fixed-case>LNM</fixed-case>ap: Departures from Isomorphic Assumption in Bilingual Lexicon Induction Through Non-Linear Mapping in Latent Space - TasnimMohiuddin + TasnimMohiuddin M SaifulBari - ShafiqJoty + ShafiqJoty 2712–2723 Most of the successful and predominant methods for Bilingual Lexicon Induction (BLI) are mapping-based, where a linear mapping function is learned with the assumption that the word embedding spaces of different languages exhibit similar geometric structures (i.e. approximately isomorphic). However, several recent studies have criticized this simplified assumption showing that it does not hold in general even for closely related languages. In this work, we propose a novel semi-supervised method to learn cross-lingual word embeddings for BLI. Our model is independent of the isomorphic assumption and uses non-linear mapping in the latent space of two independently pre-trained autoencoders. Through extensive experiments on fifteen (15) different language pairs (in both directions) comprising resource-rich and low-resource languages from two different datasets, we demonstrate that our method outperforms existing models by a good margin. Ablation studies show the importance of different model components and the necessity of non-linear mapping. 2020.emnlp-main.215 @@ -2937,7 +2937,7 @@ Parsing Gapping Constructions Based on Grammatical and Semantic Roles - YoshihideKato + YoshihideKato ShigekiMatsubara 2747–2752 A gapping construction consists of a coordinated structure where redundant elements are elided from all but one conjuncts. This paper proposes a method of parsing sentences with gapping to recover elided elements. The proposed method is based on constituent trees annotated with grammatical and semantic roles that are useful for identifying elided elements. Our method outperforms the previous method in terms of F-measure and recall. @@ -2959,7 +2959,7 @@ Some Languages Seem Easier to Parse Because Their Treebanks Leak - AndersSøgaard + AndersSøgaard 2765–2770 Cross-language differences in (universal) dependency parsing performance are mostly attributed to treebank size, average sentence length, average dependency length, morphological complexity, and domain differences. We point at a factor not previously discussed: If we abstract away from words and dependency labels, how many graphs in the test data were seen in the training data? We compute graph isomorphisms, and show that, treebank size aside, overlap between training and test graphs explain more of the observed variation than standard explanations such as the above. 2020.emnlp-main.220 @@ -2995,7 +2995,7 @@ <fixed-case>TED</fixed-case>-<fixed-case>CDB</fixed-case>: A Large-Scale <fixed-case>C</fixed-case>hinese Discourse Relation Dataset on <fixed-case>TED</fixed-case> Talks WanqiuLong BonnieWebber - DeyiXiong + DeyiXiong 2793–2803 As different genres are known to differ in their communicative properties and as previously, for Chinese, discourse relations have only been annotated over news text, we have created the TED-CDB dataset. TED-CDB comprises a large set of TED talks in Chinese that have been manually annotated according to the goals and principles of Penn Discourse Treebank, but adapted to features that are not present in English. It serves as a unique Chinese corpus of spoken discourse. Benchmark experiments show that TED-CDB poses a challenge for state-of-the-art discourse relation classifiers, whose F1 performance on 4-way classification is 60%. This is a dramatic drop of 35% from performance on the news text in the Chinese Discourse Treebank. Transfer learning experiments have been carried out with the TED-CDB for both same-language cross-domain transfer and same-domain cross-language transfer. Both demonstrate that the TED-CDB can improve the performance of systems being developed for languages other than Chinese and would be helpful for insufficient or unbalanced data in other corpora. The dataset and our Chinese annotation guidelines will be made freely available. 2020.emnlp-main.223 @@ -3037,7 +3037,7 @@ MohammadShoeybi RaulPuri PascaleFung - AnimaAnandkumar + AnimaAnandkumar BryanCatanzaro 2831–2845 Existing pre-trained large language models have shown unparalleled generative capabilities. However, they are not controllable. In this paper, we propose MEGATRON-CNTRL, a novel framework that uses large-scale language models and adds control to text generation by incorporating an external knowledge base. Our framework consists of a keyword predictor, a knowledge retriever, a contextual knowledge ranker, and a conditional text generator. As we do not have access to ground-truth supervision for the knowledge ranker, we make use of weak supervision from sentence embedding. The empirical results show that our model generates more fluent, consistent, and coherent stories with less repetition and higher diversity compared to prior work on the ROC story dataset. We showcase the controllability of our model by replacing the keywords used to generate stories and re-running the generation process. Human evaluation results show that 77.5% of these stories are successfully controlled by the new keywords. Furthermore, by scaling our model from 124 million to 8.3 billion parameters we demonstrate that larger models improve both the quality of generation (from 74.5% to 93.0% for consistency) and controllability (from 77.5% to 91.5%). @@ -3289,7 +3289,7 @@ Multi-Step Inference for Reasoning Over Paragraphs JiangmingLiu MattGardner - Shay B.Cohen + Shay B.Cohen MirellaLapata 3040–3050 Complex reasoning over text requires understanding and chaining together free-form predicates and logical connectives. Prior work has largely tried to do this either symbolically or with black-box transformers. We present a middle ground between these two extremes: a compositional model reminiscent of neural module networks that can perform chained logical reasoning. This model first finds relevant sentences in the context and then chains them together using neural modules. Our model gives significant performance improvements (up to 29% relative error reduction when combined with a reranker) on ROPES, a recently-introduced complex reasoning dataset. @@ -3443,7 +3443,7 @@ Are All Good Word Vector Spaces Isomorphic? IvanVulić SebastianRuder - AndersSøgaard + AndersSøgaard 3178–3192 Existing algorithms for aligning cross-lingual word vector spaces assume that vector spaces are approximately isomorphic. As a result, they perform poorly or fail completely on non-isomorphic spaces. Such non-isomorphism has been hypothesised to result from typological differences between languages. In this work, we ask whether non-isomorphism is also crucially a sign of degenerate word vector spaces. We present a series of experiments across diverse languages which show that variance in performance across language pairs is not only due to typological differences, but can mostly be attributed to the size of the monolingual resources available, and to the properties and duration of monolingual training (e.g. “under-training”). 2020.emnlp-main.257 @@ -3484,7 +3484,7 @@ MadhuraPande PrekshaNema PratyushKumar - Mitesh M.Khapra + Mitesh M.Khapra 3230–3235 Given the success of Transformer-based models, two directions of study have emerged: interpreting role of individual attention heads and down-sizing the models for efficiency. Our work straddles these two streams: We analyse the importance of basing pruning strategies on the interpreted role of the attention heads. We evaluate this on Transformer and BERT models on multiple NLP tasks. Firstly, we find that a large fraction of the attention heads can be randomly pruned with limited effect on accuracy. Secondly, for Transformers, we find no advantage in pruning attention heads identified to be important based on existing studies that relate importance to the location of a head. On the BERT model too we find no preference for top or bottom layers, though the latter are reported to have higher importance. However, strategies that avoid pruning middle layers and consecutive layers perform better. Finally, during fine-tuning the compensation for pruned attention heads is roughly equally distributed across the un-pruned heads. Our results thus suggest that interpretation of attention heads does not strongly inform pruning. 2020.emnlp-main.260 @@ -3497,7 +3497,7 @@ SahanaRamnath PrekshaNema DeepSahni - Mitesh M.Khapra + Mitesh M.Khapra 3236–3242 BERT and its variants have achieved state-of-the-art performance in various NLP tasks. Since then, various works have been proposed to analyze the linguistic information being captured in BERT. However, the current works do not provide an insight into how BERT is able to achieve near human-level performance on the task of Reading Comprehension based Question Answering. In this work, we attempt to interpret BERT for RCQA. Since BERT layers do not have predefined roles, we define a layer’s role or functionality using Integrated Gradients. Based on the defined roles, we perform a preliminary analysis across all layers. We observed that the initial layers focus on query-passage interaction, whereas later layers focus more on contextual understanding and enhancing the answer prediction. Specifically for quantifier questions (how much/how many), we notice that BERT focuses on confusing words (i.e., on other numerical quantities in the passage) in the later layers, but still manages to predict the answer correctly. The fine-tuning and analysis scripts will be publicly available at https://github.com/iitmnlp/BERT-Analysis-RCQA. 2020.emnlp-main.261 @@ -3508,7 +3508,7 @@ How do Decisions Emerge across Layers in Neural Models? Interpretation with Differentiable Masking NicolaDe Cao - Michael SejrSchlichtkrull + Michael SejrSchlichtkrull WilkerAziz IvanTitov 3243–3255 @@ -3599,7 +3599,7 @@ <fixed-case>VD-BERT</fixed-case>: <fixed-case>A</fixed-case> <fixed-case>U</fixed-case>nified <fixed-case>V</fixed-case>ision and <fixed-case>D</fixed-case>ialog <fixed-case>T</fixed-case>ransformer with <fixed-case>BERT</fixed-case> YueWang - ShafiqJoty + ShafiqJoty MichaelLyu IrwinKing CaimingXiong @@ -3656,7 +3656,7 @@ <fixed-case>M</fixed-case>in<fixed-case>TL</fixed-case>: Minimalist Transfer Learning for Task-Oriented Dialogue Systems ZhaojiangLin AndreaMadotto - Genta IndraWinata + Genta IndraWinata PascaleFung 3391–3405 In this paper, we propose Minimalist Transfer Learning (MinTL) to simplify the system design process of task-oriented dialogue systems and alleviate the over-dependency on annotated data. MinTL is a simple yet effective transfer learning framework, which allows us to plug-and-play pre-trained seq2seq models, and jointly learn dialogue state tracking and dialogue response generation. Unlike previous approaches, which use a copy mechanism to “carryover” the old dialogue states to the new one, we introduce Levenshtein belief spans (Lev), that allows efficient dialogue state tracking with a minimal generation length. We instantiate our learning framework with two pre-trained backbones: T5 and BART, and evaluate them on MultiWOZ. Extensive experiments demonstrate that: 1) our systems establish new state-of-the-art results on end-to-end response generation, 2) MinTL-based systems are more robust than baseline methods in the low resource setting, and they achieve competitive results with only 20% training data, and 3) Lev greatly improves the inference efficiency. @@ -3670,7 +3670,7 @@ Kang MinYoo HanbitLee FranckDernoncourt - TrungBui + TrungBui WalterChang Sang-gooLee 3406–3425 @@ -3699,7 +3699,7 @@ Counterfactual Off-Policy Training for Neural Dialogue Generation QingfuZhu - Wei-NanZhang + Wei-NanZhang TingLiu William YangWang 3438–3448 @@ -3794,7 +3794,7 @@ Don’t Neglect the Obvious: On the Role of Unambiguous Words in Word Sense Disambiguation DanielLoureiro - JoseCamacho-Collados + JoseCamacho-Collados 3514–3520 State-of-the-art methods for Word Sense Disambiguation (WSD) combine two different features: the power of pre-trained language models and a propagation method to extend the coverage of such models. This propagation is needed as current sense-annotated corpora lack coverage of many instances in the underlying sense inventory (usually WordNet). At the same time, unambiguous words make for a large portion of all words in WordNet, while being poorly covered in existing sense-annotated corpora. In this paper, we propose a simple method to provide annotations for most unambiguous words in a large corpus. We introduce the UWA (Unambiguous Word Annotations) dataset and show how a state-of-the-art propagation-based model can use it to extend the coverage and quality of its word sense embeddings by a significant margin, improving on its original results on WSD. 2020.emnlp-main.283 @@ -3828,7 +3828,7 @@ Convolution over Hierarchical Syntactic and Lexical Graphs for Aspect Level Sentiment Analysis - MiZhang + MiZhang TieyunQian 3540–3549 The state-of-the-art methods in aspect-level sentiment classification have leveraged the graph based models to incorporate the syntactic structure of a sentence. While being effective, these methods ignore the corpus level word co-occurrence information, which reflect the collocations in linguistics like “nothing special”. Moreover, they do not distinguish the different types of syntactic dependency, e.g., a nominal subject relation “food-was” is treated equally as an adjectival complement relation “was-okay” in “food was okay”. To tackle the above two limitations, we propose a novel architecture which convolutes over hierarchical syntactic and lexical graphs. Specifically, we employ a global lexical graph to encode the corpus level word co-occurrence information. Moreover, we build a concept hierarchy on both the syntactic and lexical graphs for differentiating various types of dependency relations or lexical word pairs. Finally, we design a bi-level interactive graph convolution network to fully exploit these two graphs. Extensive experiments on five bench- mark datasets show that our method outperforms the state-of-the-art baselines. @@ -3852,7 +3852,7 @@ Aspect Sentiment Classification with Aspect-Specific Opinion Spans - LuXu + LuXu LidongBing WeiLu FeiHuang @@ -3892,10 +3892,10 @@ Multi-modal Multi-label Emotion Detection with Modality and Label Dependence DongZhang XinchengJu - JunhuiLi + JunhuiLi ShoushanLi - QiaomingZhu - GuodongZhou + QiaomingZhu + GuodongZhou 3584–3593 As an important research issue in the natural language processing community, multi-label emotion detection has been drawing more and more attention in the last few years. However, almost all existing studies focus on one modality (e.g., textual modality). In this paper, we focus on multi-label emotion detection in a multi-modal scenario. In this scenario, we need to consider both the dependence among different labels (label dependence) and the dependence between each predicting label and different modalities (modality dependence). Particularly, we propose a multi-modal sequence-to-set approach to effectively model both kinds of dependence in multi-modal multi-label emotion detection. The detailed evaluation demonstrates the effectiveness of our approach. 2020.emnlp-main.291 @@ -3910,7 +3910,7 @@ DiJin BingningWang QiZhang - XuanjingHuang + XuanjingHuang 3594–3605 Aspect-based sentiment analysis (ABSA) aims to predict the sentiment towards a specific aspect in the text. However, existing ABSA test sets cannot be used to probe whether a model can distinguish the sentiment of the target aspect from the non-target aspects. To solve this problem, we develop a simple but effective approach to enrich ABSA test sets. Specifically, we generate new examples to disentangle the confounding sentiments of the non-target aspects from the target aspect’s sentiment. Based on the SemEval 2014 dataset, we construct the Aspect Robustness Test Set (ARTS) as a comprehensive probe of the aspect robustness of ABSA models. Over 92% data of ARTS show high fluency and desired sentiment on all aspects by human evaluation. Using ARTS, we analyze the robustness of nine ABSA models, and observe, surprisingly, that their accuracy drops by up to 69.73%. We explore several ways to improve aspect robustness, and find that adversarial training can improve models’ performance on ARTS by up to 32.85%. Our code and new test set are available at https://github.com/zhijing-jin/ARTS_TestSet 2020.emnlp-main.292 @@ -4009,7 +4009,7 @@ LijieWen YusongXu ChenweiZhang - PhilipYu + PhilipYu 3673–3682 Open relation extraction is the task of extracting open-domain relation facts from natural language sentences. Existing works either utilize heuristics or distant-supervised annotations to train a supervised classifier over pre-defined relations, or adopt unsupervised methods with additional assumptions that have less discriminative power. In this work, we propose a self-supervised framework named SelfORE, which exploits weak, self-supervised signals by leveraging large pretrained language model for adaptive clustering on contextualized relational features, and bootstraps the self-supervised signals by improving contextualized features in relation classification. Experimental results on three datasets show the effectiveness and robustness of SelfORE on open-domain Relation Extraction when comparing with competitive baselines. 2020.emnlp-main.299 @@ -4188,7 +4188,7 @@ Yun-HsuanJen Chieh-YangHuang MeiHuaChen - Ting-HaoHuang + Ting-HaoHuang Lun-WeiKu 3807–3817 Many English-as-a-second language learners have trouble using near-synonym words (e.g., small vs.little; briefly vs.shortly) correctly, and often look for example sentences to learn how two nearly synonymous terms differ. Prior work uses hand-crafted scores to recommend sentences but has difficulty in adopting such scores to all the near-synonyms as near-synonyms differ in various ways. We notice that the helpfulness of the learning material would reflect on the learners’ performance. Thus, we propose the inference-based learner-like agent to mimic learner behavior and identify good learning materials by examining the agent’s performance. To enable the agent to behave like a learner, we leverage entailment modeling’s capability of inferring answers from the provided materials. Experimental results show that the proposed agent is equipped with good learner-like behavior to achieve the best performance in both fill-in-the-blank (FITB) and good example sentence selection tasks. We further conduct a classroom user study with college ESL learners. The results of the user study show that the proposed agent can find out example sentences that help students learn more easily and efficiently. Compared to other models, the proposed agent improves the score of more than 17% of students after learning. @@ -4244,8 +4244,8 @@ <fixed-case>D</fixed-case>ago<fixed-case>BERT</fixed-case>: <fixed-case>G</fixed-case>enerating Derivational Morphology with a Pretrained Language Model ValentinHofmann - JanetPierrehumbert - HinrichSchütze + JanetPierrehumbert + HinrichSchütze 3848–3861 Can pretrained language models (PLMs) generate derivationally complex words? We present the first study investigating this question, taking BERT as the example PLM. We examine BERT’s derivational capabilities in different settings, ranging from using the unmodified pretrained model to full finetuning. Our best model, DagoBERT (Derivationally and generatively optimized BERT), clearly outperforms the previous state of the art in derivation generation (DG). Furthermore, our experiments show that the input segmentation crucially impacts BERT’s derivational knowledge, suggesting that the performance of PLMs could be further improved if a morphologically informed vocabulary of units were used. 2020.emnlp-main.316 @@ -4267,7 +4267,7 @@ A Joint Multiple Criteria Model in Transfer Learning for Cross-domain <fixed-case>C</fixed-case>hinese Word Segmentation KaiyuHuang - DegenHuang + DegenHuang ZhuangLiu FengranMo 3873–3882 @@ -4295,7 +4295,7 @@ NanDuan WanjunZhong ZhongyuWei - XuanjingHuang + XuanjingHuang DaxinJiang MingZhou 3895–3903 @@ -4367,14 +4367,14 @@ Spot The Bot: A Robust and Efficient Framework for the Evaluation of Conversational Dialogue Systems - JanDeriu + JanDeriu DonTuggener Piusvon Däniken Jon AnderCampos - AlvaroRodrigo + AlvaroRodrigo ThiziriBelkacem - AitorSoroa - EnekoAgirre + AitorSoroa + EnekoAgirre MarkCieliebak 3971–3984 Honorable Mention Paper @@ -4469,7 +4469,7 @@ Towards Better Context-aware Lexical Semantics:Adjusting Contextualized Representations through Static Anchors QianchuLiu - DianaMcCarthy + DianaMcCarthy AnnaKorhonen 4066–4075 One of the most powerful features of contextualized models is their dynamic embeddings for words in context, leading to state-of-the-art representations for context-aware lexical semantics. In this paper, we present a post-processing technique that enhances these representations by learning a transformation through static anchors. Our method requires only another pre-trained model and no labeled data is needed. We show consistent improvement in a range of benchmark tasks that test contextual variations of meaning both across different usages of a word and across different words as they are used in context. We demonstrate that while the original contextual representations can be improved by another embedding space from both contextualized and static models, the static embeddings, which have lower computational requirements, provide the most gains. @@ -4483,7 +4483,7 @@ CharlesWelch Jonathan K.Kummerfeld VerónicaPérez-Rosas - RadaMihalcea + RadaMihalcea 4076–4089 Word embeddings are usually derived from corpora containing text from many individuals, thus leading to general purpose representations rather than individually personalized representations. While personalized embeddings can be useful to improve language model performance and other language processing tasks, they can only be computed for people with a large amount of longitudinal data, which is not the case for new users. We propose a new form of personalized word embeddings that use demographic-specific word representations derived compositionally from full or partial demographic information for a user (i.e., gender, age, location, religion). We show that the resulting demographic-aware word representations outperform generic word representations on two tasks for English: language modeling and word associations. We further explore the trade-off between the number of available attributes and their relative effectiveness and discuss the ethical implications of using them. 2020.emnlp-main.334 @@ -4608,7 +4608,7 @@ Adversarial Semantic Collisions CongzhengSong - AlexanderRush + AlexanderRush VitalyShmatikov 4198–4210 We study semantic collisions: texts that are semantically unrelated but judged as similar by NLP models. We develop gradient-based approaches for generating semantic collisions and demonstrate that state-of-the-art models for many tasks which rely on analyzing the meaning and similarity of texts—including paraphrase identification, document retrieval, response suggestion, and extractive summarization—are vulnerable to semantic collisions. For example, given a target query, inserting a crafted collision into an irrelevant document can shift its retrieval rank from 1000 to top 3. We show how to generate semantic collisions that evade perplexity-based filtering and discuss other potential mitigations. Our code is available at https://github.com/csong27/collision-bert. @@ -4663,7 +4663,7 @@ Sparse Text Generation Pedro HenriqueMartins ZitaMarinho - André F. T.Martins + André F. T.Martins 4252–4273 Current state-of-the-art text generators build on powerful language models such as GPT-2, achieving impressive performance. However, to avoid degenerate text, they require sampling from a modified softmax, via temperature parameters or ad-hoc truncation techniques, as in top-k or nucleus sampling. This creates a mismatch between training and testing conditions. In this paper, we use the recently introduced entmax transformation to train and sample from a natively sparse language model, avoiding this mismatch. The result is a text generator with favorable performance in terms of fluency and consistency, fewer repetitions, and n-gram diversity closer to human text. In order to evaluate our model, we propose three new metrics for comparing sparse or truncated distributions: \epsilon-perplexity, sparsemax score, and Jensen-Shannon divergence. Human-evaluated experiments in story completion and dialogue generation show that entmax sampling leads to more engaging and coherent stories and conversations. 2020.emnlp-main.348 @@ -4699,7 +4699,7 @@ Content Planning for Neural Story Generation with Aristotelian Rescoring SeraphinaGoldfarb-Tarrant TuhinChakrabarty - RalphWeischedel + RalphWeischedel NanyunPeng 4319–4338 Long-form narrative text generated from large language models manages a fluent impersonation of human writing, but only at the local sentence level, and lacks structure or global cohesion. We posit that many of the problems of story generation can be addressed via high-quality content planning, and present a system that focuses on how to learn good plot structures to guide story generation. We utilize a plot-generation language model along with an ensemble of rescoring models that each implement an aspect of good story-writing as detailed in Aristotle’s Poetics. We find that stories written with our more principled plot-structure are both more relevant to a given prompt and higher quality than baselines that do not content plan, or that plan in an unprincipled way. @@ -4727,7 +4727,7 @@ MarioGiulianelli SandroPezzelle ArabellaSinclair - RaquelFernández + RaquelFernández 4350–4368 Dialogue participants often refer to entities or situations repeatedly within a conversation, which contributes to its cohesiveness. Subsequent references exploit the common ground accumulated by the interlocutors and hence have several interesting properties, namely, they tend to be shorter and reuse expressions that were effective in previous mentions. In this paper, we tackle the generation of first and subsequent references in visually grounded dialogue. We propose a generation model that produces referring utterances grounded in both the visual and the conversational context. To assess the referring effectiveness of its output, we also implement a reference resolution system. Our experiments and analyses show that the model produces better, more effective referring utterances than a model not grounded in the dialogue context, and generates subsequent references that exhibit linguistic patterns akin to humans. 2020.emnlp-main.353 @@ -4789,7 +4789,7 @@ Identifying Elements Essential for <fixed-case>BERT</fixed-case>’s Multilinguality PhilippDufter - HinrichSchütze + HinrichSchütze 4423–4437 It has been shown that multilingual BERT (mBERT) yields high quality multilingual representations and enables effective zero-shot transfer. This is surprising given that mBERT does not use any crosslingual signal during training. While recent literature has studied this phenomenon, the reasons for the multilinguality are still somewhat obscure. We aim to identify architectural properties of BERT and linguistic properties of languages that are necessary for BERT to become multilingual. To allow for fast experimentation we propose an efficient setup with small BERT models trained on a mix of synthetic and natural data. Overall, we identify four architectural and two linguistic elements that influence multilinguality. Based on our insights, we experiment with a multilingual pretraining setup that modifies the masking strategy using VecMap, i.e., unsupervised embedding alignment. Experiments on XNLI with three languages indicate that our findings transfer from our small setup to larger scale settings. 2020.emnlp-main.358 @@ -4800,7 +4800,7 @@ On Negative Interference in Multilingual Models: Findings and A Meta-Learning Treatment ZiruiWang - Zachary C.Lipton + Zachary C.Lipton YuliaTsvetkov 4438–4450 Modern multilingual models are trained on concatenated text from multiple languages in hopes of conferring benefits to each (positive transfer), with the most pronounced benefits accruing to low-resource languages. However, recent work has shown that this approach can degrade performance on high-resource languages, a phenomenon known as negative interference. In this paper, we present the first systematic study of negative interference. We show that, contrary to previous belief, negative interference also impacts low-resource languages. While parameters are maximally shared to learn language-universal structures, we demonstrate that language-specific parameters do exist in multilingual models and they are a potential cause of negative interference. Motivated by these observations, we also present a meta-learning algorithm that obtains better cross-lingual transferability and alleviates negative interference, by adding language-specific layers as meta-parameters and training them in a manner that explicitly improves shared layers’ generalization on all languages. Overall, our results show that negative interference is more common than previously known, suggesting new directions for improving multilingual representations. @@ -4816,7 +4816,7 @@ XingyuanZhao YucenLi MicaelahSt Johns - LoriLevin + LoriLevin 4451–4464 Cross-lingual word embedding (CWE) algorithms represent words in multiple languages in a unified vector space. Multi-Word Expressions (MWE) are common in every language. When training word embeddings, each component word of an MWE gets its own separate embedding, and thus, MWEs are not translated by CWEs. We propose a simple method for word translation of MWEs to and from English in ten languages: we first compile lists of MWEs in each language and then tokenize the MWEs as single tokens before training word embeddings. CWEs are trained on a word-translation task using the dictionaries that only contain single words. In order to evaluate MWE translation, we created bilingual word lists from multilingual WordNet that include single-token words and MWEs, and most importantly, include MWEs that correspond to single words in another language. We release these dictionaries to the research community. We show that the pre-tokenization of MWEs as single tokens performs better than averaging the embeddings of the individual tokens of the MWE. We can translate MWEs at a top-10 precision of 30-60%. The tokenization of MWEs makes the occurrences of single words in a training corpus more sparse, but we show that it does not pose negative impacts on single-word translations. 2020.emnlp-main.360 @@ -4829,7 +4829,7 @@ JerinPhilip AlexandreBerard MatthiasGallé - LaurentBesacier + LaurentBesacier 4465–4470 We propose a novel adapter layer formalism for adapting multilingual models. They are more parameter-efficient than existing adapter layers while obtaining as good or better performance. The layers are specific to one language (as opposed to bilingual adapters) allowing to compose them and generalize to unseen language-pairs. In this zero-shot setting, they obtain a median improvement of +2.77 BLEU points over a strong 20-language multilingual Transformer baseline trained on TED talks. 2020.emnlp-main.361 @@ -4866,7 +4866,7 @@ Distilling Multiple Domains for Neural Machine Translation AnnaCurrey PrashantMathur - GeorgianaDinu + GeorgianaDinu 4500–4511 Neural machine translation achieves impressive results in high-resource conditions, but performance often suffers when the input domain is low-resource. The standard practice of adapting a separate model for each domain of interest does not scale well in practice from both a quality perspective (brittleness under domain shift) as well as a cost perspective (added maintenance and inference complexity). In this paper, we propose a framework for training a single multi-domain neural machine translation model that is able to translate several domains without increasing inference time or memory usage. We show that this model can improve translation on both high- and low-resource domains over strong multi-domain baselines. In addition, our proposed model is effective when domain labels are unknown during training, as well as robust under noisy data conditions. 2020.emnlp-main.364 @@ -4929,7 +4929,7 @@ PhillipKeung YichaoLu GyörgySzarvas - Noah A.Smith + Noah A.Smith 4563–4568 We present the Multilingual Amazon Reviews Corpus (MARC), a large-scale collection of Amazon reviews for multilingual text classification. The corpus contains reviews in English, Japanese, German, French, Spanish, and Chinese, which were collected between 2015 and 2019. Each record in the dataset contains the review text, the review title, the star rating, an anonymized reviewer ID, an anonymized product ID, and the coarse-grained product category (e.g., ‘books’, ‘appliances’, etc.) The corpus is balanced across the 5 possible star ratings, so each rating constitutes 20% of the reviews in each language. For each language, there are 200,000, 5,000, and 5,000 reviews in the training, development, and test sets, respectively. We report baseline results for supervised text classification and zero-shot cross-lingual transfer learning by fine-tuning a multilingual BERT model on reviews data. We propose the use of mean absolute error (MAE) instead of classification accuracy for this task, since MAE accounts for the ordinal nature of the ratings. 2020.emnlp-main.369 @@ -4945,7 +4945,7 @@ DavidBuchanan LaurenBerkowitz OrBiran - JenniferChu-Carroll + JenniferChu-Carroll 4569–4586 Honorable Mention Paper When humans read or listen, they make implicit commonsense inferences that frame their understanding of what happened and why. As a step toward AI systems that can build similar mental models, we introduce GLUCOSE, a large-scale dataset of implicit commonsense causal knowledge, encoded as causal mini-theories about the world, each grounded in a narrative context. To construct GLUCOSE, we drew on cognitive psychology to identify ten dimensions of causal explanation, focusing on events, states, motivations, and emotions. Each GLUCOSE entry includes a story-specific causal statement paired with an inference rule generalized from the statement. This paper details two concrete contributions. First, we present our platform for effectively crowdsourcing GLUCOSE data at scale, which uses semi-structured templates to elicit causal explanations. Using this platform, we collected a total of ~670K specific statements and general rules that capture implicit commonsense knowledge about everyday situations. Second, we show that existing knowledge resources and pretrained language models do not include or readily predict GLUCOSE’s rich inferential content. However, when state-of-the-art neural models are trained on this knowledge, they can start to make commonsense inferences on unseen stories that match humans’ mental models. @@ -5016,7 +5016,7 @@ PengQian RichardFutrell RyosukeKohita - RogerLevy + RogerLevy MiguelBallesteros 4640–4652 Humans can learn structural properties about a word from minimal experience, and deploy their learned syntactic representations uniformly in different grammatical contexts. We assess the ability of modern neural language models to reproduce this behavior in English and evaluate the effect of structural supervision on learning outcomes. First, we assess few-shot learning capabilities by developing controlled experiments that probe models’ syntactic nominal number and verbal argument structure generalizations for tokens seen as few as two times during training. Second, we assess invariance properties of learned representation: the ability of a model to transfer syntactic generalizations from a base context (e.g., a simple declarative active-voice sentence) to a transformed context (e.g., an interrogative sentence). We test four models trained on the same dataset: an n-gram baseline, an LSTM, and two LSTM-variants trained with explicit structural supervision. We find that in most cases, the neural models are able to induce the proper syntactic generalizations after minimal exposure, often from just two examples during training, and that the two structurally supervised models generalize more accurately than the LSTM model. All neural models are able to leverage information learned in base contexts to drive expectations in transformed contexts, indicating that they have learned some invariance properties of syntax. @@ -5029,8 +5029,8 @@ Investigating representations of verb bias in neural language models RobertHawkins TakateruYamakoshi - ThomasGriffiths - AdeleGoldberg + ThomasGriffiths + AdeleGoldberg 4653–4663 Languages typically provide more than one grammatical construction to express certain types of messages. A speaker’s choice of construction is known to depend on multiple factors, including the choice of main verb – a phenomenon known as verb bias. Here we introduce DAIS, a large benchmark dataset containing 50K human judgments for 5K distinct sentence pairs in the English dative alternation. This dataset includes 200 unique verbs and systematically varies the definiteness and length of arguments. We use this dataset, as well as an existing corpus of naturally occurring data, to evaluate how well recent neural language models capture human preferences. Results show that larger models perform better than smaller models, and transformer architectures (e.g. GPT-2) tend to out-perform recurrent architectures (e.g. LSTMs) even under comparable parameter and training settings. Additional analyses of internal feature representations suggest that transformers may better integrate specific lexical information with grammatical constructions. 2020.emnlp-main.376 @@ -5043,7 +5043,7 @@ EceTakmaz SandroPezzelle LisaBeinborn - RaquelFernández + RaquelFernández 4664–4677 When speakers describe an image, they tend to look at objects before mentioning them. In this paper, we investigate such sequential cross-modal alignment by modelling the image description generation process computationally. We take as our starting point a state-of-the-art image captioning system and develop several model variants that exploit information from human gaze patterns recorded during language production. In particular, we propose the first approach to image description generation where visual processing is modelled sequentially. Our experiments and analyses confirm that better descriptions can be obtained by exploiting gaze-driven attention and shed light on human cognitive processes by comparing different ways of aligning the gaze modality with language production. We find that processing gaze data sequentially leads to descriptions that are better aligned to those produced by speakers, more diverse, and more natural—particularly when gaze is encoded with a dedicated recurrent component. 2020.emnlp-main.377 @@ -5070,7 +5070,7 @@ <fixed-case>B</fixed-case>io<fixed-case>M</fixed-case>egatron: Larger Biomedical Domain Language Model Hoo-ChangShin - YangZhang + YangZhang EvelinaBakhturina RaulPuri MostofaPatwary @@ -5188,7 +5188,7 @@ VictorMartinez KrishnaSomandepalli YaldaTehranian-Uhls - ShrikanthNarayanan + ShrikanthNarayanan 4780–4790 Exposure to violent, sexual, or substance-abuse content in media increases the willingness of children and adolescents to imitate similar behaviors. Computational methods that identify portrayals of risk behaviors from audio-visual cues are limited in their applicability to films in post-production, where modifications might be prohibitively expensive. To address this limitation, we propose a model that estimates content ratings based on the language use in movie scripts, making our solution available at the earlier stages of creative production. Our model significantly improves the state-of-the-art by adapting novel techniques to learn better movie representations from the semantic and sentiment aspects of a character’s language use, and by leveraging the co-occurrence of risk behaviors, following a multi-task approach. Additionally, we show how this approach can be useful to learn novel insights on the joint portrayal of these behaviors, and on the subtleties that filmmakers may otherwise not pick up on. 2020.emnlp-main.387 @@ -5201,7 +5201,7 @@ AmrithKrishna AshimGupta DeepakGarasangi - PavankumarSatuluri + PavankumarSatuluri PawanGoyal 4791–4797 Morphologically rich languages seem to benefit from joint processing of morphology and syntax, as compared to pipeline architectures. We propose a graph-based model for joint morphological parsing and dependency parsing in Sanskrit. Here, we extend the Energy based model framework (Krishna et al., 2020), proposed for several structured prediction tasks in Sanskrit, in 2 simple yet significant ways. First, the framework’s default input graph generation method is modified to generate a multigraph, which enables the use of an exact search inference. Second, we prune the input search space using a linguistically motivated approach, rooted in the traditional grammatical analysis of Sanskrit. Our experiments show that the morphological parsing from our joint model outperforms standalone morphological parsers. We report state of the art results in morphological parsing, and in dependency parsing, both in standalone (with gold morphological tags) and joint morphosyntactic parsing setting. @@ -5239,7 +5239,7 @@ Unsupervised Cross-Lingual Part-of-Speech Tagging for Truly Low-Resource Scenarios RamyEskander SmarandaMuresan - MichaelCollins + MichaelCollins 4820–4831 We describe a fully unsupervised cross-lingual transfer approach for part-of-speech (POS) tagging under a truly low resource scenario. We assume access to parallel translations between the target language and one or more source languages for which POS taggers are available. We use the Bible as parallel data in our experiments: small size, out-of-domain and covering many diverse languages. Our approach innovates in three ways: 1) a robust approach of selecting training instances via cross-lingual annotation projection that exploits best practices of unsupervised type and token constraints, word-alignment confidence and density of projected POS, 2) a Bi-LSTM architecture that uses contextualized word embeddings, affix embeddings and hierarchical Brown clusters, and 3) an evaluation on 12 diverse languages in terms of language family and morphological typology. In spite of the use of limited and out-of-domain parallel data, our experiments demonstrate significant improvements in accuracy over previous work. In addition, we show that using multi-source information, either via projection or output combination, improves the performance for most target languages. 2020.emnlp-main.391 @@ -5265,7 +5265,7 @@ Utility is in the Eye of the User: A Critique of <fixed-case>NLP</fixed-case> Leaderboards KawinEthayarajh - DanJurafsky + DanJurafsky 4846–4853 Benchmarks such as GLUE have helped drive advances in NLP by incentivizing the creation of more accurate models. While this leaderboard paradigm has been remarkably successful, a historical focus on performance-based evaluation has been at the expense of other qualities that the NLP community values in models, such as compactness, fairness, and energy efficiency. In this opinion paper, we study the divergence between what is incentivized by leaderboards and what is useful in practice through the lens of microeconomic theory. We frame both the leaderboard and NLP practitioners as consumers and the benefit they get from a model as its utility to them. With this framing, we formalize how leaderboards – in their current form – can be poor proxies for the NLP community at large. For example, a highly inefficient model would provide less utility to practitioners but not to a leaderboard, since it is a cost that only the former must bear. To allow practitioners to better estimate a model’s utility to them, we advocate for more transparency on leaderboards, such as the reporting of statistics that are of practical concern (e.g., model size, energy efficiency, and inference latency). 2020.emnlp-main.393 @@ -5302,7 +5302,7 @@ Dissecting Span Identification Tasks with Performance Prediction SeanPapay RomanKlinger - SebastianPadó + SebastianPadó 4881–4895 Span identification (in short, span ID) tasks such as chunking, NER, or code-switching detection, ask models to identify and classify relevant spans in a text. Despite being a staple of NLP, and sharing a common structure, there is little insight on how these tasks’ properties influence their difficulty, and thus little guidance on what model families work well on span ID tasks, and why. We analyze span ID tasks via performance prediction, estimating how well neural architectures do on different tasks. Our contributions are: (a) we identify key properties of span ID tasks that can inform performance prediction; (b) we carry out a large-scale experiment on English data, building a model to predict performance for unseen span ID tasks that can support architecture choices; (c), we investigate the parameters of the meta model, yielding new insights on how model and task properties interact to affect span ID performance. We find, e.g., that span frequency is especially important for LSTMs, and that CRFs help when spans are infrequent and boundaries non-distinctive. 2020.emnlp-main.396 @@ -5390,8 +5390,8 @@ We Can Detect Your Bias: Predicting the Political Ideology of News Articles RamyBaly GiovanniDa San Martino - JamesGlass - PreslavNakov + JamesGlass + PreslavNakov 4982–4991 We explore the task of predicting the leading political ideology or bias of news articles. First, we collect and release a large dataset of 34,737 articles that were manually annotated for political ideology –left, center, or right–, which is well-balanced across both topics and media. We further use a challenging experimental setup where the test examples come from media that were not seen during training, which prevents the model from learning to detect the source of the target news article instead of predicting its political ideology. From a modeling perspective, we propose an adversarial media adaptation, as well as a specially adapted triplet loss. We further add background information about the source, and we show that it is quite helpful for improving article-level prediction. Our experimental results show very sizable improvements over using state-of-the-art pre-trained Transformers in this challenging setup. 2020.emnlp-main.404 @@ -5418,7 +5418,7 @@ Training for <fixed-case>G</fixed-case>ibbs Sampling on Conditional Random Fields with Neural Scoring Factors SidaGao - Matthew R.Gormley + Matthew R.Gormley 4999–5011 Most recent improvements in NLP come from changes to the neural network architectures modeling the text input. Yet, state-of-the-art models often rely on simple approaches to model the label space, e.g. bigram Conditional Random Fields (CRFs) in sequence tagging. More expressive graphical models are rarely used due to their prohibitive computational cost. In this work, we present an approach for efficiently training and decoding hybrids of graphical models and neural networks based on Gibbs sampling. Our approach is the natural adaptation of SampleRank (Wick et al., 2011) to neural models, and is widely applicable to tasks beyond sequence tagging. We apply our approach to named entity recognition and present a neural skip-chain CRF model, for which exact inference is impractical. The skip-chain model improves over a strong baseline on three languages from CoNLL-02/03. We obtain new state-of-the-art results on Dutch. 2020.emnlp-main.406 @@ -5430,7 +5430,7 @@ Multilevel Text Alignment with Cross-Document Attention XuhuiZhou NikolaosPappas - Noah A.Smith + Noah A.Smith 5012–5025 Text alignment finds application in tasks such as citation recommendation and plagiarism detection. Existing alignment methods operate at a single, predefined level and cannot learn to align texts at, for example, sentence and document levels. We propose a new learning approach that equips previously established hierarchical attention encoders for representing documents with a cross-document attention component, enabling structural comparisons across different levels (document-to-document and sentence-to-document). Our component is weakly supervised from document pairs and can align at multiple levels. Our evaluation on predicting document-to-document relationships and sentence-to-document relationships on the tasks of citation recommendation and plagiarism detection shows that our approach outperforms previously established hierarchical, attention encoders based on recurrent and transformer contextualization that are unaware of structural correspondence between documents. 2020.emnlp-main.407 @@ -5489,7 +5489,7 @@ WenhaoLiu Chien-ShengWu YaoWan - PhilipYu + PhilipYu RichardSocher CaimingXiong 5064–5082 @@ -5519,7 +5519,7 @@ XilunChen AsishGhoshal YasharMehdad - LukeZettlemoyer + LukeZettlemoyer SonalGupta 5090–5100 Task-oriented semantic parsing is a critical component of virtual assistants, which is responsible for understanding the user’s intents (set reminder, play music, etc.). Recent advances in deep learning have enabled several approaches to successfully parse more complex queries (Gupta et al., 2018; Rongali et al.,2020), but these models require a large amount of annotated training data to parse queries on new domains (e.g. reminder, music). In this paper, we focus on adapting task-oriented semantic parsers to low-resource domains, and propose a novel method that outperforms a supervised neural model at a 10-fold data reduction. In particular, we identify two fundamental factors for low-resource domain adaptation: better representation learning and better training techniques. Our representation learning uses BART (Lewis et al., 2019) to initialize our model which outperforms encoder-only pre-trained representations used in previous work. Furthermore, we train with optimization-based meta-learning (Finn et al., 2017) to improve generalization to low-resource domains. This approach significantly outperforms all baseline methods in the experiments on a newly collected multi-domain task-oriented semantic parsing dataset (TOPv2), which we release to the public. @@ -5597,7 +5597,7 @@ Controllable Meaning Representation to Text Generation: Linearization and Data Augmentation Strategies ChrisKedzie - KathleenMcKeown + KathleenMcKeown 5160–5185 We study the degree to which neural sequence-to-sequence models exhibit fine-grained controllability when performing natural language generation from a meaning representation. Using two task-oriented dialogue generation benchmarks, we systematically compare the effect of four input linearization strategies on controllability and faithfulness. Additionally, we evaluate how a phrase-based data augmentation method can improve performance. We find that properly aligning input sequences during training leads to highly controllable generation, both when training from scratch or when fine-tuning a larger pre-trained model. Data augmentation further improves control on difficult, randomly generated utterance plans. 2020.emnlp-main.419 @@ -5635,7 +5635,7 @@ AditiChaudhary AntoniosAnastasopoulos AdithyaPratapa - David R.Mortensen + David R.Mortensen ZaidSheikh YuliaTsvetkov GrahamNeubig @@ -5649,8 +5649,8 @@ Tackling the Low-resource Challenge for Canonical Segmentation ManuelMager - ÖzlemÇetinoğlu - KatharinaKann + ÖzlemÇetinoğlu + KatharinaKann 5237–5250 Canonical morphological segmentation consists of dividing words into their standardized morphemes. Here, we are interested in approaches for the task when training data is limited. We compare model performance in a simulated low-resource setting for the high-resource languages German, English, and Indonesian to experiments on new datasets for the truly low-resource languages Popoluca and Tepehua. We explore two new models for the task, borrowing from the closely related area of morphological generation: an LSTM pointer-generator and a sequence-to-sequence model with hard monotonic attention trained with imitation learning. We find that, in the low-resource setting, the novel approaches out-perform existing ones on all languages by up to 11.4% accuracy. However, while accuracy in emulated low-resource scenarios is over 50% for all languages, for the truly low-resource languages Popoluca and Tepehua, our best model only obtains 37.4% and 28.4% accuracy, respectively. Thus, we conclude that canonical segmentation is still a challenging task for low-resource languages. 2020.emnlp-main.423 @@ -5660,10 +5660,10 @@ <fixed-case>IGT</fixed-case>2<fixed-case>P</fixed-case>: From Interlinear Glossed Texts to Paradigms - SarahMoeller + SarahMoeller LingLiu ChangbingYang - KatharinaKann + KatharinaKann MansHulden 5251–5262 An intermediate step in the linguistic analysis of an under-documented language is to find and organize inflected forms that are attested in natural speech. From this data, linguists generate unseen inflected word forms in order to test hypotheses about the language’s inflectional patterns and to complete inflectional paradigm tables. To get the data linguists spend many hours manually creating interlinear glossed texts (IGTs). We introduce a new task that speeds this process and automatically generates new morphological resources for natural language processing systems: IGT-to-paradigms (IGT2P). IGT2P generates entire morphological paradigms from IGT input. We show that existing morphological reinflection models can solve the task with 21% to 64% accuracy, depending on the language. We further find that (i) having a language expert spend only a few hours cleaning the noisy IGT data improves performance by as much as 21 percentage points, and (ii) POS tags, which are generally considered a necessary part of NLP morphological reinflection input, have no effect on the accuracy of the models considered here. @@ -5726,7 +5726,7 @@ TalAugust LaurenKim KatharinaReinecke - Noah A.Smith + Noah A.Smith 5327–5344 Communicating complex scientific ideas without misleading or overwhelming the public is challenging. While science communication guides exist, they rarely offer empirical evidence for how their strategies are used in practice. Writing strategies that can be automatically recognized could greatly support science communication efforts by enabling tools to detect and suggest strategies for writers. We compile a set of writing strategies drawn from a wide range of prescriptive sources and develop an annotation scheme allowing humans to recognize them. We collect a corpus of 128k science writing documents in English and annotate a subset of this corpus. We use the annotations to train transformer-based classifiers and measure the strategies’ use in the larger corpus. We find that the use of strategies, such as storytelling and emphasizing the most important findings, varies significantly across publications with different reader audiences. 2020.emnlp-main.429 @@ -5754,7 +5754,7 @@ AlanRamponi Robvan der Goot RosarioLombardo - BarbaraPlank + BarbaraPlank 5357–5367 We introduce Biomedical Event Extraction as Sequence Labeling (BeeSL), a joint end-to-end neural information extraction model. BeeSL recasts the task as sequence labeling, taking advantage of a multi-label aware encoding strategy and jointly modeling the intermediate tasks via multi-task learning. BeeSL is fast, accurate, end-to-end, and unlike current methods does not require any external knowledge base or preprocessing tools. BeeSL outperforms the current best system (Li et al., 2019) on the Genia 2011 benchmark by 1.57% absolute F1 score reaching 60.22% F1, establishing a new state of the art for the task. Importantly, we also provide first results on biomedical event extraction without gold entity information. Empirical results show that BeeSL’s speed and accuracy makes it a viable approach for large-scale real-world scenarios. 2020.emnlp-main.431 @@ -5823,8 +5823,8 @@ YogarshiVyas JieMa ParminderBhatia - KathleenMcKeown - YaserAl-Onaizan + KathleenMcKeown + YaserAl-Onaizan 5412–5417 In this paper, we propose a neural architecture and a set of training methods for ordering events by predicting temporal relations. Our proposed models receive a pair of events within a span of text as input and they identify temporal relations (Before, After, Equal, Vague) between them. Given that a key challenge with this task is the scarcity of annotated data, our models rely on either pretrained representations (i.e. RoBERTa, BERT or ELMo), transfer and multi-task learning (by leveraging complementary datasets), and self-training techniques. Experiments on the MATRES dataset of English documents establish a new state-of-the-art on this task. 2020.emnlp-main.436 @@ -5851,7 +5851,7 @@ DimitrinaZlatkova YoanDinkov IvanKoychev - PreslavNakov + PreslavNakov 5427–5444 We propose EXAMS – a new benchmark dataset for cross-lingual and multilingual question answering for high school examinations. We collected more than 24,000 high-quality high school exam questions in 16 languages, covering 8 language families and 24 school subjects from Natural Sciences and Social Sciences, among others.EXAMS offers unique fine-grained evaluation framework across multiple languages and subjects, which allows precise analysis and comparison of the proposed models. We perform various experiments with existing top-performing multilingual pre-trained models and show that EXAMS offers multiple challenges that require multilingual knowledge and reasoning in multiple domains. We hope that EXAMS will enable researchers to explore challenging reasoning and knowledge transfer methods and pre-trained models for school question answering in various languages which was not possible by now. The data, code, pre-trained models, and evaluation are available at http://github.com/mhardalov/exams-qa. 2020.emnlp-main.438 @@ -5862,7 +5862,7 @@ End-to-End Synthetic Data Generation for Domain Adaptation of Question Answering Systems SiamakShakeri - CiceroNogueira dos Santos + CiceroNogueira dos Santos HenghuiZhu PatrickNg FengNan @@ -5880,13 +5880,13 @@ Multi-Stage Pre-training for Low-Resource Domain Adaptation RongZhang RevanthGangi Reddy - Md ArafatSultan + Md ArafatSultan VittorioCastelli AnthonyFerritto - RaduFlorian - EfsunSarioglu Kayi - SalimRoukos - AviSil + RaduFlorian + EfsunSarioglu Kayi + SalimRoukos + AviSil ToddWard 5461–5468 Transfer learning techniques are particularly useful for NLP tasks where a sizable amount of high-quality annotated data is difficult to obtain. Current approaches directly adapt a pretrained language model (LM) on in-domain text before fine-tuning to downstream tasks. We show that extending the vocabulary of the LM with domain-specific terms leads to further gains. To a bigger effect, we utilize structure in the unlabeled data to create auxiliary synthetic tasks, which helps the LM transfer to downstream tasks. We apply these approaches incrementally on a pretrained Roberta-large LM and show considerable performance gain on three tasks in the IT domain: Extractive Reading Comprehension, Document Ranking and Duplicate Question Detection. @@ -5897,7 +5897,7 @@ <fixed-case>ISAAQ</fixed-case> - Mastering Textbook Questions with Pre-trained Transformers and Bottom-Up and Top-Down Attention - Jose ManuelGomez-Perez + Jose ManuelGomez-Perez RaúlOrtega 5469–5479 Textbook Question Answering is a complex task in the intersection of Machine Comprehension and Visual Question Answering that requires reasoning with multimodal information from text and diagrams. For the first time, this paper taps on the potential of transformer language models and bottom-up and top-down attention to tackle the language and visual understanding challenges this task entails. Rather than training a language-visual transformer from scratch we rely on pre-trained transformers, fine-tuning and ensembling. We add bottom-up and top-down attention to identify regions of interest corresponding to diagram constituents and their relationships, improving the selection of relevant visual information for each question and answer options. Our system ISAAQ reports unprecedented success in all TQA question types, with accuracies of 81.36%, 71.11% and 55.12% on true/false, text-only and diagram multiple choice questions. ISAAQ also demonstrates its broad applicability, obtaining state-of-the-art results in other demanding datasets. @@ -5976,7 +5976,7 @@ Sequence-Level Mixed Sample Data Augmentation DemiGuo YoonKim - AlexanderRush + AlexanderRush 5547–5552 Despite their empirical success, neural networks still have difficulty capturing compositional aspects of natural language. This work proposes a simple data augmentation approach to encourage compositional behavior in neural models for sequence-to-sequence problems. Our approach, SeqMix, creates new synthetic examples by softly combining input/output sequences from the training set. We connect this approach to existing techniques such as SwitchOut and word dropout, and show that these techniques are all essentially approximating variants of a single objective. SeqMix consistently yields approximately 1.0 BLEU improvement on five different translation datasets over strong Transformer baselines. On tasks that require strong compositional generalization such as SCAN and semantic parsing, SeqMix also offers further improvements. 2020.emnlp-main.447 @@ -6039,7 +6039,7 @@ Affective Event Classification with Discourse-enhanced Self-training YuanZhuang TianyuJiang - EllenRiloff + EllenRiloff 5608–5617 Prior research has recognized the need to associate affective polarities with events and has produced several techniques and lexical resources for identifying affective events. Our research introduces new classification models to assign affective polarity to event phrases. First, we present a BERT-based model for affective event classification and show that the classifier achieves substantially better performance than a large affective event knowledge base. Second, we present a discourse-enhanced self-training method that iteratively improves the classifier with unlabeled data. The key idea is to exploit event phrases that occur with a coreferent sentiment expression. The discourse-enhanced self-training algorithm iteratively labels new event phrases based on both the classifier’s predictions and the polarities of the event’s coreferent sentiment expressions. Our results show that discourse-enhanced self-training further improves both recall and precision for affective event classification. 2020.emnlp-main.452 @@ -6075,7 +6075,7 @@ Mind Your Inflections! <fixed-case>I</fixed-case>mproving <fixed-case>NLP</fixed-case> for Non-Standard <fixed-case>E</fixed-case>nglishes with <fixed-case>B</fixed-case>ase-<fixed-case>I</fixed-case>nflection <fixed-case>E</fixed-case>ncoding SamsonTan - ShafiqJoty + ShafiqJoty LavVarshney Min-YenKan 5647–5663 @@ -6089,7 +6089,7 @@ Measuring the Similarity of Grammatical Gender Systems by Comparing Partitions - Arya D.McCarthy + Arya D.McCarthy AdinaWilliams ShijiaLiu DavidYarowsky @@ -6106,7 +6106,7 @@ JinlanFu PengfeiLiu QiZhang - XuanjingHuang + XuanjingHuang 5676–5686 The performance of the Chinese Word Segmentation (CWS) systems has gradually reached a plateau with the rapid development of deep neural networks, especially the successful use of large pre-trained models. In this paper, we take stock of what we have achieved and rethink what’s left in the CWS task. Methodologically, we propose a fine-grained evaluation for existing CWS systems, which not only allows us to diagnose the strengths and weaknesses of existing models (under the in-dataset setting), but enables us to quantify the discrepancy between different criterion and alleviate the negative transfer problem when doing multi-criteria learning. Strategically, despite not aiming to propose a novel model in this paper, our comprehensive experiments on eight models and seven datasets, as well as thorough analysis, could search for some promising direction for future research. We make all codes publicly available and release an interface that can quickly evaluate and diagnose user’s models: https://github.com/neulab/InterpretEval 2020.emnlp-main.457 @@ -6175,7 +6175,7 @@ <fixed-case>T</fixed-case>e<fixed-case>MP</fixed-case>: Temporal Message Passing for Temporal Knowledge Graph Completion JiapengWu MengCao - Jackie Chi KitCheung + Jackie Chi KitCheung William L.Hamilton 5730–5746 Inferring missing facts in temporal knowledge graphs (TKGs) is a fundamental and challenging task. Previous works have approached this problem by augmenting methods for static knowledge graphs to leverage time-dependent representations. However, these methods do not explicitly leverage multi-hop structural information and temporal facts from recent time steps to enhance their predictions. Additionally, prior work does not explicitly address the temporal sparsity and variability of entity distributions in TKGs. We propose the Temporal Message Passing (TeMP) framework to address these challenges by combining graph neural networks, temporal dynamics models, data imputation and frequency-based gating techniques. Experiments on standard TKG tasks show that our approach provides substantial gains compared to the previous state of the art, achieving a 10.7% average relative improvement in Hits@10 across three standard benchmarks. Our analysis also reveals important sources of variability both within and across TKG datasets, and we introduce several simple but strong baselines that outperform the prior state of the art in certain settings. @@ -6203,7 +6203,7 @@ An Empirical Study of Generation Order for Machine Translation WilliamChan MitchellStern - JamieKiros + JamieKiros JakobUszkoreit 5764–5773 In this work, we present an empirical study of generation order for machine translation. Building on recent advances in insertion-based modeling, we first introduce a soft order-reward framework that enables us to train models to follow arbitrary oracle generation policies. We then make use of this framework to explore a large variety of generation orders, including uninformed orders, location-based orders, frequency-based orders, content-based orders, and model-based orders. Curiously, we find that for the WMT’14 English \to German and WMT’18 English \to Chinese translation tasks, order does not have a substantial impact on output quality. Moreover, for English \to German, we even discover that unintuitive orderings such as alphabetical and shortest-first can match the performance of a standard Transformer, suggesting that traditional left-to-right generation may not be necessary to achieve high performance. @@ -6230,7 +6230,7 @@ SewonMin JulianMichael HannanehHajishirzi - LukeZettlemoyer + LukeZettlemoyer 5783–5797 Ambiguity is inherent to open-domain question answering; especially when exploring new topics, it can be difficult to ask questions that have a single, unambiguous answer. In this paper, we introduce AmbigQA, a new open-domain question answering task which involves finding every plausible answer, and then rewriting the question for each one to resolve the ambiguity. To study this task, we construct AmbigNQ, a dataset covering 14,042 questions from NQ-open, an existing open-domain QA benchmark. We find that over half of the questions in NQ-open are ambiguous, with diverse sources of ambiguity such as event and entity references. We also present strong baseline models for AmbigQA which we show benefit from weakly supervised learning that incorporates NQ-open, strongly suggesting our new task and data will support significant future research effort. Our data and baselines are available at https://nlp.cs.washington.edu/ambigqa. 2020.emnlp-main.466 @@ -6273,7 +6273,7 @@ Few-Shot Complex Knowledge Base Question Answering via Meta Reinforcement Learning YunchengHua Yuan-FangLi - GholamrezaHaffari + GholamrezaHaffari GuilinQi TongtongWu 5827–5837 @@ -6311,7 +6311,7 @@ MuhammadAbdul-Mageed ChiyuZhang AbdelRahimElmadany - LyleUngar + LyleUngar 5855–5876 Although prediction of dialects is an important language processing task, with a wide range of applications, existing work is largely limited to coarse-grained varieties. Inspired by geolocation research, we propose the novel task of Micro-Dialect Identification (MDI) and introduce MARBERT, a new language model with striking abilities to predict a fine-grained variety (as small as that of a city) given a single, short message. For modeling, we offer a range of novel spatially and linguistically-motivated multi-task learning models. To showcase the utility of our models, we introduce a new, large-scale dataset of Arabic micro-varieties (low-resource) suited to our tasks. MARBERT predicts micro-dialects with 9.9% F1, 76 better than a majority class baseline. Our new language model also establishes new state-of-the-art on several external tasks. 2020.emnlp-main.472 @@ -6419,7 +6419,7 @@ <fixed-case>CCA</fixed-case>ligned: A Massive Collection of Cross-Lingual Web-Document Pairs AhmedEl-Kishky VishravChaudhary - FranciscoGuzmán + FranciscoGuzmán PhilippKoehn 5960–5969 Cross-lingual document alignment aims to identify pairs of documents in two distinct languages that are of comparable content or translations of each other. In this paper, we exploit the signals embedded in URLs to label web documents at scale with an average precision of 94.5% across different language pairs. We mine sixty-eight snapshots of the Common Crawl corpus and identify web document pairs that are translations of each other. We release a new web dataset consisting of over 392 million URL pairs from Common Crawl covering documents in 8144 language pairs of which 137 pairs include English. In addition to curating this massive dataset, we introduce baseline methods that leverage cross-lingual representations to identify aligned documents based on their textual content. Finally, we demonstrate the value of this parallel documents dataset through a downstream task of mining parallel sentences and measuring the quality of machine translations from models trained on this mined data. Our objective in releasing this dataset is to foster new research in cross-lingual NLP across a variety of low, medium, and high-resource languages. @@ -6551,7 +6551,7 @@ LidongBing CanasaiKruengkrai Thien HaiNguyen - ShafiqJoty + ShafiqJoty LuoSi ChunyanMiao 6045–6057 @@ -6595,8 +6595,8 @@ FlorianMai NikolaosPappas IvanMontero - Noah A.Smith - JamesHenderson + Noah A.Smith + JamesHenderson 6076–6092 Text autoencoders are commonly used for conditional generation tasks such as style transfer. We propose methods which are plug and play, where any pretrained autoencoder can be used, and only require learning a mapping within the autoencoder’s embedding space, training embedding-to-embedding (Emb2Emb). This reduces the need for labeled training data for the task and makes the training procedure more efficient. Crucial to the success of this method is a loss term for keeping the mapped embedding on the manifold of the autoencoder and a mapping which is trained to navigate the manifold by learning offset vectors. Evaluations on style transfer tasks both with and without sequence-to-sequence supervision show that our method performs better than or comparable to strong baselines while being up to four times faster. 2020.emnlp-main.491 @@ -6675,9 +6675,9 @@ Effective Unsupervised Domain Adaptation with Adversarially Trained Language Models - Thuy-TrangVu + Thuy-TrangVu DinhPhung - GholamrezaHaffari + GholamrezaHaffari 6163–6173 Recent work has shown the importance of adaptation of broad-coverage contextualised embedding models on the domain of the target task of interest. Current self-supervised adaptation methods are simplistic, as the training signal comes from a small percentage of randomly masked-out tokens. In this paper, we show that careful masking strategies can bridge the knowledge gap of masked language models (MLMs) about the domains more effectively by allocating self-supervision where it is needed. Furthermore, we propose an effective training strategy by adversarially masking out those tokens which are harder to reconstruct by the underlying MLM. The adversarial objective leads to a challenging combinatorial optimisation problem over subsets of tokens, which we tackle efficiently through relaxation to a variational lowerbound and dynamic programming. On six unsupervised domain adaptation tasks involving named entity recognition, our method strongly outperforms the random masking strategy and achieves up to +1.64 F1 score improvements. 2020.emnlp-main.497 @@ -6757,7 +6757,7 @@ Interpreting Open-Domain Modifiers: Decomposition of <fixed-case>W</fixed-case>ikipedia Categories into Disambiguated Property-Value Pairs - MariusPasca + MariusPasca 6218–6228 This paper proposes an open-domain method for automatically annotating modifier constituents (20th-century’) within Wikipedia categories (20th-century male writers) with properties (date of birth). The annotations offer a semantically-anchored understanding of the role of the constituents in defining the underlying meaning of the categories. In experiments over an evaluation set of Wikipedia categories, the proposed method annotates constituent modifiers as semantically-anchored properties, rather than as mere strings in a previous method. It does so at a better trade-off between precision and recall. 2020.emnlp-main.503 @@ -6799,7 +6799,7 @@ MengCao YueDong JiapengWu - Jackie Chi KitCheung + Jackie Chi KitCheung 6251–6258 Neural abstractive summarization systems have achieved promising progress, thanks to the availability of large-scale datasets and models pre-trained with self-supervised methods. However, ensuring the factual consistency of the generated summaries for abstractive summarization systems is a challenge. We propose a post-editing corrector module to address this issue by identifying and correcting factual errors in generated summaries. The neural corrector model is pre-trained on artificial examples that are created by applying a series of heuristic transformations on reference summaries. These transformations are inspired by the error analysis of state-of-the-art summarization model outputs. Experimental results show that our model is able to correct factual errors in summaries generated by other neural summarization models and outperforms previous models on factual consistency evaluation on the CNN/DailyMail dataset. We also find that transferring from artificial error correction to downstream settings is still very challenging. 2020.emnlp-main.506 @@ -6850,7 +6850,7 @@ Summarizing Text on Any Aspects: A Knowledge-Informed Weakly-Supervised Approach BowenTan LianhuiQin - EricXing + EricXing ZhitingHu 6301–6309 Given a document and a target aspect (e.g., a topic of interest), aspect-based abstractive summarization attempts to generate a summary with respect to the aspect. Previous studies usually assume a small pre-defined set of aspects and fall short of summarizing on other diverse topics. In this work, we study summarizing on arbitrary aspects relevant to the document, which significantly expands the application of the task in practice. Due to the lack of supervision data, we develop a new weak supervision construction method and an aspect modeling scheme, both of which integrate rich external knowledge sources such as ConceptNet and Wikipedia. Experiments show our approach achieves performance boosts on summarizing both real and synthetic documents given pre-defined or arbitrary aspects. @@ -6873,7 +6873,7 @@ Online Conversation Disentanglement with Pointer Networks TaoYu - ShafiqJoty + ShafiqJoty 6321–6330 Huge amounts of textual conversations occur online every day, where multiple conversations take place concurrently. Interleaved conversations lead to difficulties in not only following the ongoing discussions but also extracting relevant information from simultaneous messages. Conversation disentanglement aims to separate intermingled messages into detached conversations. However, existing disentanglement methods rely mostly on handcrafted features that are dataset specific, which hinders generalization and adaptability. In this work, we propose an end-to-end online framework for conversation disentanglement that avoids time-consuming domain-specific feature engineering. We design a novel way to embed the whole utterance that comprises timestamp, speaker, and message text, and propose a custom attention mechanism that models disentanglement as a pointing problem while effectively capturing inter-utterance interactions in an end-to-end fashion. We also introduce a joint-learning objective to better capture contextual information. Our experiments on the Ubuntu IRC dataset show that our method achieves state-of-the-art performance in both link and conversation prediction tasks. 2020.emnlp-main.512 @@ -6915,7 +6915,7 @@ LiangmingPan JuanziLi ZhiyuanLiu - Tat-SengChua + Tat-SengChua 6355–6364 Entity alignment (EA) aims at building a unified Knowledge Graph (KG) of rich content by linking the equivalent entities from various KGs. GNN-based EA methods present promising performance by modeling the KG structure defined by relation triples. However, attribute triples can also provide crucial alignment signal but have not been well explored yet. In this paper, we propose to utilize an attributed value encoder and partition the KG into subgraphs to model the various types of attribute triples efficiently. Besides, the performances of current EA methods are overestimated because of the name-bias of existing EA datasets. To make an objective evaluation, we propose a hard experimental setting where we select equivalent entity pairs with very different names as the test set. Under both the regular and hard settings, our method achieves significant improvements (5.10% on average Hits@1 in DBP15k) over 12 baselines in cross-lingual and monolingual datasets. Ablation studies on different subgraphs and a case study about attribute types further demonstrate the effectiveness of our method. Source code and data can be found at https://github.com/thunlp/explore-and-evaluate. 2020.emnlp-main.515 @@ -6966,7 +6966,7 @@ FabioPetroni MartinJosifoski SebastianRiedel - LukeZettlemoyer + LukeZettlemoyer 6397–6407 This paper introduces a conceptually simple, scalable, and highly effective BERT-based entity linking model, along with an extensive evaluation of its accuracy-speed trade-off. We present a two-stage zero-shot linking algorithm, where each entity is defined only by a short textual description. The first stage does retrieval in a dense space defined by a bi-encoder that independently embeds the mention context and the entity descriptions. Each candidate is then re-ranked with a cross-encoder, that concatenates the mention and entity text. Experiments demonstrate that this approach is state of the art on recent zero-shot benchmarks (6 point absolute gains) and also on more established non-zero-shot evaluations (e.g. TACKBP-2010), despite its relative simplicity (e.g. no explicit entity embeddings or manually engineered mention tables). We also show that bi-encoder linking is very fast with nearest neighbor search (e.g. linking with 5.9 million candidates in 2 milliseconds), and that much of the accuracy gain from the more expensive cross-encoder can be transferred to the bi-encoder via knowledge distillation. Our code and models are available at https://github.com/facebookresearch/BLINK. 2020.emnlp-main.519 @@ -6978,12 +6978,12 @@ A Dataset for Tracking Entities in Open Domain Procedural Text NiketTandon KeisukeSakaguchi - BhavanaDalvi + BhavanaDalvi DheerajRajagopal PeterClark MichalGuerquin KyleRichardson - EduardHovy + EduardHovy 6408–6417 We present the first dataset for tracking state changes in procedural text from arbitrary domains by using an unrestricted (open) vocabulary. For example, in a text describing fog removal using potatoes, a car window may transition between being foggy, sticky, opaque, and clear. Previous formulations of this task provide the text and entities involved, and ask how those entities change for just a small, pre-defined set of attributes (e.g., location), limiting their fidelity. Our solution is a new task formulation where given just a procedural text as input, the task is to generate a set of state change tuples (entity, attribute, before-state, after-state) for each step, where the entity, attribute, and state values must be predicted from an open vocabulary. Using crowdsourcing, we create OPENPI, a high-quality (91.5% coverage as judged by humans and completely vetted), and large-scale dataset comprising 29,928 state changes over 4,050 sentences from 810 procedural real-world paragraphs from WikiHow.com. A current state-of-the-art generation model on this task achieves 16.1% F1 based on BLEU metric, leaving enough room for novel model architectures. 2020.emnlp-main.520 @@ -7007,11 +7007,11 @@ Efficient One-Pass End-to-End Entity Linking for Questions - Belinda Z.Li + Belinda Z.Li SewonMin SrinivasanIyer YasharMehdad - Wen-tauYih + Wen-tauYih 6433–6441 We present ELQ, a fast end-to-end entity linking model for questions, which uses a biencoder to jointly perform mention detection and linking in one pass. Evaluated on WebQSP and GraphQuestions with extended annotations that cover multiple entities per question, ELQ outperforms the previous state of the art by a large margin of +12.7% and +19.6% F1, respectively. With a very fast inference time (1.57 examples/s on a single CPU), ELQ can be useful for downstream question answering systems. In a proof-of-concept experiment, we demonstrate that using ELQ significantly improves the downstream QA performance of GraphRetriever. 2020.emnlp-main.522 @@ -7025,7 +7025,7 @@ AkariAsai HiroyukiShindo HideakiTakeda - YujiMatsumoto + YujiMatsumoto 6442–6454 Entity representations are useful in natural language tasks involving entities. In this paper, we propose new pretrained contextualized representations of words and entities based on the bidirectional transformer. The proposed model treats words and entities in a given text as independent tokens, and outputs contextualized representations of them. Our model is trained using a new pretraining task based on the masked language model of BERT. The task involves predicting randomly masked words and entities in a large entity-annotated corpus retrieved from Wikipedia. We also propose an entity-aware self-attention mechanism that is an extension of the self-attention mechanism of the transformer, and considers the types of tokens (words or entities) when computing attention scores. The proposed model achieves impressive empirical performance on a wide range of entity-related tasks. In particular, it obtains state-of-the-art results on five well-known datasets: Open Entity (entity typing), TACRED (relation classification), CoNLL-2003 (named entity recognition), ReCoRD (cloze-style question answering), and SQuAD 1.1 (extractive question answering). Our source code and pretrained representations are available at https://github.com/studio-ousia/luke. 2020.emnlp-main.523 @@ -7065,7 +7065,7 @@ AllisonHegel SudhaRao AsliCelikyilmaz - BillDolan + BillDolan 6485–6504 Existing language models excel at writing from scratch, but many real-world scenarios require rewriting an existing document to fit a set of constraints. Although sentence-level rewriting has been fairly well-studied, little work has addressed the challenge of rewriting an entire document coherently. In this work, we introduce the task of document-level targeted content transfer and address it in the recipe domain, with a recipe as the document and a dietary restriction (such as vegan or dairy-free) as the targeted constraint. We propose a novel model for this task based on the generative pre-trained language model (GPT-2) and train on a large number of roughly-aligned recipe pairs. Both automatic and human evaluations show that our model out-performs existing methods by generating coherent and diverse rewrites that obey the constraint while remaining close to the original document. Finally, we analyze our model’s rewrites to assess progress toward the goal of making language generation more attuned to constraints that are substantive rather than stylistic. 2020.emnlp-main.526 @@ -7100,7 +7100,7 @@ Plan ahead: Self-Supervised Text Planning for Paragraph Completion Task DongyeopKang - EduardHovy + EduardHovy 6533–6543 Despite the recent success of contextualized language models on various NLP tasks, language model itself cannot capture textual coherence of a long, multi-sentence document (e.g., a paragraph). Humans often make structural decisions on what and how to say about before making utterances. Guiding surface realization with such high-level decisions and structuring text in a coherent way is essentially called a planning process. Where can the model learn such high-level coherence? A paragraph itself contains various forms of inductive coherence signals called self-supervision in this work, such as sentence orders, topical keywords, rhetorical structures, and so on. Motivated by that, this work proposes a new paragraph completion task PARCOM; predicting masked sentences in a paragraph. However, the task suffers from predicting and selecting appropriate topical content with respect to the given context. To address that, we propose a self-supervised text planner SSPlanner that predicts what to say first (content prediction), then guides the pretrained language model (surface realization) using the predicted content. SSPlanner outperforms the baseline generation models on the paragraph completion task in both automatic and human evaluation. We also find that a combination of noun and verb types of keywords is the most effective for content selection. As more number of content keywords are provided, overall generation quality also increases. 2020.emnlp-main.529 @@ -7126,7 +7126,7 @@ Towards Persona-Based Empathetic Conversational Models PeixiangZhong - ChenZhang + ChenZhang HaoWang YongLiu ChunyanMiao @@ -7142,7 +7142,7 @@ QiongkaiXu LizhenQu ZeyuGao - GholamrezaHaffari + GholamrezaHaffari 6567–6580 The global market size of conversational assistants (chatbots) is expected to grow to USD 9.4 billion by 2024, according to MarketsandMarkets. Despite the wide use of chatbots, leakage of personal information through chatbots poses serious privacy concerns for their users. In this work, we propose to protect personal information by warning users of detected suspicious sentences generated by conversational assistants. The detection task is formulated as an alignment optimization problem and a new dataset PERSONA-LEAKAGE is collected for evaluation. In this paper, we propose two novel constrained alignment models, which consistently outperform baseline methods on Moreover, we conduct analysis on the behavior of recently proposed personalized chit-chat dialogue systems. The empirical results show that those systems suffer more from personal information disclosure than the widely used Seq2Seq model and the language model. In those cases, a significant number of information leaking utterances can be detected by our models with high precision. 2020.emnlp-main.532 @@ -7154,7 +7154,7 @@ Response Selection for Multi-Party Conversations with Dynamic Topic Tracking WeishiWang Steven C.H.Hoi - ShafiqJoty + ShafiqJoty 6581–6591 While participants in a multi-party multi-turn conversation simultaneously engage in multiple conversation topics, existing response selection methods are developed mainly focusing on a two-party single-conversation scenario. Hence, the prolongation and transition of conversation topics are ignored by current methods. In this work, we frame response selection as a dynamic topic tracking task to match the topic between the response and relevant conversation context. With this new formulation, we propose a novel multi-task learning framework that supports efficient encoding through large pretrained models with only two utterances at once to perform dynamic topic disentanglement and response selection. We also propose Topic-BERT an essential pretraining step to embed topic information into BERT with self-supervised learning. Experimental results on the DSTC-8 Ubuntu IRC dataset show state-of-the-art results in response selection and topic disentanglement tasks outperforming existing methods by a good margin. 2020.emnlp-main.533 @@ -7228,7 +7228,7 @@ JingLi XingshanZeng HaisongZhang - Kam-FaiWong + Kam-FaiWong 6640–6650 Quotations are crucial for successful explanations and persuasions in interpersonal communications. However, finding what to quote in a conversation is challenging for both humans and machines. This work studies automatic quotation generation in an online conversation and explores how language consistency affects whether a quotation fits the given context. Here, we capture the contextual consistency of a quotation in terms of latent topics, interactions with the dialogue history, and coherence to the query turn’s existing contents. Further, an encoder-decoder neural framework is employed to continue the context with a quotation via language generation. Experiment results on two large-scale datasets in English and Chinese demonstrate that our quotation generation model outperforms the state-of-the-art models. Further analysis shows that topic, interaction, and query consistency are all helpful to learn how to quote in online conversations. 2020.emnlp-main.538 @@ -7239,7 +7239,7 @@ Profile Consistency Identification for Open-domain Dialogue Agents HaoyuSong YanWang - Wei-NanZhang + Wei-NanZhang ZhengyuZhao TingLiu XiaojiangLiu @@ -7360,7 +7360,7 @@ MuchengRen XiuboGeng TaoQin - HeyanHuang + HeyanHuang DaxinJiang 6745–6758 We focus on the task of reasoning over paragraph effects in situation, which requires a model to understand the cause and effect described in a background paragraph, and apply the knowledge to a novel situation. Existing works ignore the complicated reasoning process and solve it with a one-step “black box” model. Inspired by human cognitive processes, in this paper we propose a sequential approach for this task which explicitly models each step of the reasoning process with neural network modules. In particular, five reasoning modules are designed and learned in an end-to-end manner, which leads to a more interpretable model. Experimental results on the ROPES dataset demonstrate the effectiveness and explainability of our proposed approach. @@ -7396,7 +7396,7 @@ LedellWu SergeyEdunov DanqiChen - Wen-tauYih + Wen-tauYih 6769–6781 Open-domain question answering relies on efficient passage retrieval to select candidate contexts, where traditional sparse vector space models, such as TF-IDF or BM25, are the de facto method. In this work, we show that retrieval can be practically implemented using dense representations alone, where embeddings are learned from a small number of questions and passages by a simple dual-encoder framework. When evaluated on a wide range of open-domain QA datasets, our dense retriever outperforms a strong Lucene-BM25 system greatly by 9%-19% absolute in terms of top-20 passage retrieval accuracy, and helps our end-to-end QA system establish new state-of-the-art on multiple open-domain QA benchmarks. 2020.emnlp-main.550 @@ -7444,7 +7444,7 @@ <fixed-case>L</fixed-case>earning <fixed-case>M</fixed-case>usic <fixed-case>H</fixed-case>elps <fixed-case>Y</fixed-case>ou <fixed-case>R</fixed-case>ead: <fixed-case>U</fixed-case>sing Transfer to Study Linguistic Structure in Language Models IsabelPapadimitriou - DanJurafsky + DanJurafsky 6829–6839 We propose transfer learning as a method for analyzing the encoding of grammatical structure in neural language models. We train LSTMs on non-linguistic data and evaluate their performance on natural language to assess which kinds of data induce generalizable structural features that LSTMs can use for natural language. We find that training on non-linguistic data with latent structure (MIDI music or Java code) improves test performance on natural language, despite no overlap in surface form or vocabulary. To pinpoint the kinds of abstract structure that models may be encoding to lead to this improvement, we run similar experiments with two artificial parentheses languages: one which has a hierarchical recursive structure, and a control which has paired tokens but no recursion. Surprisingly, training a model on either of these artificial languages leads the same substantial gains when testing on natural language. Further experiments on transfer between natural languages controlling for vocabulary overlap show that zero-shot performance on a test language is highly correlated with typological syntactic similarity to the training language, suggesting that representations induced by pre-training correspond to the cross-linguistic syntactic properties. Our results provide insights into the ways that neural models represent abstract syntactic structure, and also about the kind of structural inductive biases which allow for natural language acquisition. 2020.emnlp-main.554 @@ -7477,7 +7477,7 @@ <fixed-case>B</fixed-case>irds have four legs?! <fixed-case>N</fixed-case>umer<fixed-case>S</fixed-case>ense: <fixed-case>P</fixed-case>robing <fixed-case>N</fixed-case>umerical <fixed-case>C</fixed-case>ommonsense <fixed-case>K</fixed-case>nowledge of <fixed-case>P</fixed-case>re-<fixed-case>T</fixed-case>rained <fixed-case>L</fixed-case>anguage <fixed-case>M</fixed-case>odels - Bill YuchenLin + Bill YuchenLin SeyeonLee RahulKhanna XiangRen @@ -7492,8 +7492,8 @@ Grounded Adaptation for Zero-shot Executable Semantic Parsing VictorZhong MikeLewis - Sida I.Wang - LukeZettlemoyer + Sida I.Wang + LukeZettlemoyer 6869–6882 We propose Grounded Adaptation for Zeroshot Executable Semantic Parsing (GAZP) to adapt an existing semantic parser to new environments (e.g. new database schemas). GAZP combines a forward semantic parser with a backward utterance generator to synthesize data (e.g. utterances and SQL queries) in the new environment, then selects cycle-consistent examples to adapt the parser. Unlike data-augmentation, which typically synthesizes unverified examples in the training environment, GAZP synthesizes examples in the new environment whose input-output consistency are verified through execution. On the Spider, Sparc, and CoSQL zero-shot semantic parsing tasks, GAZP improves logical form and execution accuracy of the baseline parser. Our analyses show that GAZP outperforms data-augmentation in the training environment, performance increases with the amount of GAZP-synthesized data, and cycle-consistency is central to successful adaptation. 2020.emnlp-main.558 @@ -7506,7 +7506,7 @@ An Imitation Game for Learning Semantic Parsers from User Interaction ZiyuYao YiqiTang - Wen-tauYih + Wen-tauYih HuanSun YuSu 6883–6902 @@ -7565,7 +7565,7 @@ JianqiangMa ZeyuYan ShuaiPang - YangZhang + YangZhang JianpingShen 6936–6942 On the WikiSQL benchmark, state-of-the-art text-to-SQL systems typically take a slot- filling approach by building several dedicated models for each type of slots. Such modularized systems are not only complex but also of limited capacity for capturing inter-dependencies among SQL clauses. To solve these problems, this paper proposes a novel extraction-linking approach, where a unified extractor recognizes all types of slot mentions appearing in the question sentence before a linker maps the recognized columns to the table schema to generate executable SQL queries. Trained with automatically generated annotations, the proposed method achieves the first place on the WikiSQL benchmark. @@ -7582,7 +7582,7 @@ TianGan WeiLu Min-YenKan - Tat-SengChua + Tat-SengChua 6943–6954 In existing sophisticated text-to-SQL models, schema linking is often considered as a simple, minor component, belying its importance. By providing a schema linking corpus based on the Spider text-to-SQL dataset, we systematically study the role of schema linking. We also build a simple BERT-based baseline, called Schema-Linking SQL (SLSQL) to perform a data-driven study. We find when schema linking is done well, SLSQL demonstrates good performance on Spider despite its structural simplicity. Many remaining errors are attributable to corpus noise. This suggests schema linking is the crux for the current text-to-SQL task. Our analytic studies provide insights on the characteristics of schema linking for future developments of text-to-SQL tasks. 2020.emnlp-main.564 @@ -7755,7 +7755,7 @@ MartinSchmitt SahandSharifzadeh VolkerTresp - HinrichSchütze + HinrichSchütze 7117–7130 Knowledge graphs (KGs) can vary greatly from one domain to another. Therefore supervised approaches to both graph-to-text generation and text-to-graph knowledge extraction (semantic parsing) will always suffer from a shortage of domain-specific parallel graph-text data; at the same time, adapting a model trained on a different domain is often impossible due to little or no overlap in entities and relations. This situation calls for an approach that (1) does not need large amounts of annotated data and thus (2) does not need to rely on domain adaptation techniques to work well on different domains. To this end, we present the first approach to unsupervised text generation from KGs and show simultaneously how it can be used for unsupervised semantic parsing. We evaluate our approach on WebNLG v2.1 and a new benchmark leveraging scene graphs from Visual Genome. Our system outperforms strong baselines for both text<->graph conversion tasks without any manual adaptation from one dataset to the other. In additional experiments, we investigate the impact of using different unsupervised objectives. 2020.emnlp-main.577 @@ -7781,7 +7781,7 @@ QinzhuoWu QiZhang JinlanFu - XuanjingHuang + XuanjingHuang 7137–7146 With the advancements in natural language processing tasks, math word problem solving has received increasing attention. Previous methods have achieved promising results but ignore background common-sense knowledge not directly provided by the problem. In addition, during generation, they focus on local features while neglecting global information. To incorporate external knowledge and global expression information, we propose a novel knowledge-aware sequence-to-tree (KA-S2T) network in which the entities in the problem sequences and their categories are modeled as an entity graph. Based on this entity graph, a graph attention network is used to capture knowledge-aware problem representations. Further, we use a tree-structured decoder with a state aggregation mechanism to capture the long-distance dependency and global expression information. Experimental results on the Math23K dataset revealed that the KA-S2T model can achieve better performance than previously reported best results. 2020.emnlp-main.579 @@ -7854,7 +7854,7 @@ <fixed-case>XL</fixed-case>-<fixed-case>W</fixed-case>i<fixed-case>C</fixed-case>: A Multilingual Benchmark for Evaluating Semantic Contextualization AlessandroRaganato TommasoPasini - JoseCamacho-Collados + JoseCamacho-Collados Mohammad TaherPilehvar 7193–7206 The ability to correctly model distinct meanings of a word is crucial for the effectiveness of semantic representation techniques. However, most existing evaluation benchmarks for assessing this criterion are tied to sense inventories (usually WordNet), restricting their usage to a small subset of knowledge-based representation techniques. The Word-in-Context dataset (WiC) addresses the dependence on sense inventories by reformulating the standard disambiguation task as a binary classification problem; but, it is limited to the English language. We put forward a large multilingual benchmark, XL-WiC, featuring gold standards in 12 new languages from varied language families and with different degrees of resource availability, opening room for evaluation scenarios such as zero-shot cross-lingual transfer. We perform a series of experiments to determine the reliability of the datasets and to set performance baselines for several recent contextualized multilingual models. Experimental results show that even when no tagged instances are available for a target language, models trained solely on the English data can attain competitive performance in the task of distinguishing different meanings of a word, even for distant languages. XL-WiC is available at https://pilehvar.github.io/xlwic/. @@ -7893,7 +7893,7 @@ Cross-lingual Spoken Language Understanding with Regularized Representation Alignment ZihanLiu - Genta IndraWinata + Genta IndraWinata PengXu ZhaojiangLin PascaleFung @@ -7924,7 +7924,7 @@ DhirajMadan DanishContractor HarshitKumar - SachindraJoshi + SachindraJoshi 7263–7269 Neural Conversational QA tasks such as ShARC require systems to answer questions based on the contents of a given passage. On studying recent state-of-the-art models on the ShARC QA task, we found indications that the model(s) learn spurious clues/patterns in the data-set. Further, a heuristic-based program, built to exploit these patterns, had comparative performance to that of the neural models. In this paper we share our findings about the four types of patterns in the ShARC corpus and how the neural models exploit them. Motivated by the above findings, we create and share a modified data-set that has fewer spurious patterns than the original data-set, consequently allowing models to learn better. 2020.emnlp-main.589 @@ -8042,7 +8042,7 @@ <fixed-case>BERT</fixed-case> Knows <fixed-case>P</fixed-case>unta <fixed-case>C</fixed-case>ana is not just beautiful, it’s gorgeous: Ranking Scalar Adjectives with Contextualised Representations - AinaGarí Soler + AinaGarí Soler MariannaApidianaki 7371–7385 Adjectives like pretty, beautiful and gorgeous describe positive properties of the nouns they modify but with different intensity. These differences are important for natural language understanding and reasoning. We propose a novel BERT-based approach to intensity detection for scalar adjectives. We model intensity by vectors directly derived from contextualised representations and show they can successfully rank scalar adjectives. We evaluate our models both intrinsically, on gold standard datasets, and on an Indirect Question Answering task. Our results demonstrate that BERT encodes rich knowledge about the semantics of scalar adjectives, and is able to provide better quality intensity rankings than static embeddings and previous models with access to dedicated resources. @@ -8129,7 +8129,7 @@ Keeping Up Appearances: Computational Modeling of Face Acts in Persuasion Oriented Discussions RitamDutt RishabhJoshi - CarolynRose + CarolynRose 7473–7485 The notion of face refers to the public self-image of an individual that emerges both from the individual’s own actions as well as from the interaction with others. Modeling face and understanding its state changes throughout a conversation is critical to the study of maintenance of basic human needs in and through interaction. Grounded in the politeness theory of Brown and Levinson (1978), we propose a generalized framework for modeling face acts in persuasion conversations, resulting in a reliable coding manual, an annotated corpus, and computational models. The framework reveals insights about differences in face act utilization between asymmetric roles in persuasion conversations. Using computational models, we are able to successfully identify face acts as well as predict a key conversational outcome (e.g. donation success). Finally, we model a latent representation of the conversational state to analyze the impact of predicted face acts on the probability of a positive conversational outcome and observe several correlations that corroborate previous findings. 2020.emnlp-main.605 @@ -8185,7 +8185,7 @@ DavidWadden ShanchuanLin KyleLo - Lucy LuWang + Lucy LuWang Madeleinevan Zuylen ArmanCohan HannanehHajishirzi @@ -8200,7 +8200,7 @@ Semantic Role Labeling as Syntactic Dependency Parsing TianzeShi IgorMalioutov - OzanIrsoy + OzanIrsoy 7551–7571 We reduce the task of (span-based) PropBank-style semantic role labeling (SRL) to syntactic dependency parsing. Our approach is motivated by our empirical analysis that shows three common syntactic patterns account for over 98% of the SRL annotations for both English and Chinese data. Based on this observation, we present a conversion scheme that packs SRL annotations into dependency tree representations through joint labels that permit highly accurate recovery back to the original format. This representation allows us to train statistical dependency parsers to tackle SRL and achieve competitive performance with the current state of the art. Our findings show the promise of syntactic dependency trees in encoding semantic role relations within their syntactic domain of locality, and point to potential further integration of syntactic methods into semantic role labeling in the future. 2020.emnlp-main.610 @@ -8237,7 +8237,7 @@ Towards Debiasing <fixed-case>NLU</fixed-case> Models from Unknown Biases Prasetya AjieUtama - Nafise SadatMoosavi + Nafise SadatMoosavi IrynaGurevych 7597–7610 NLU models often exploit biases to achieve high dataset-specific performance without properly learning the intended task. Recently proposed debiasing methods are shown to be effective in mitigating this tendency. However, these methods rely on a major assumption that the types of bias should be known a-priori, which limits their application to many NLU tasks and datasets. In this work, we present the first step to bridge this gap by introducing a self-debiasing framework that prevents models from mainly utilizing biases without knowing them in advance. The proposed framework is general and complementary to the existing debiasing methods. We show that it allows these existing methods to retain the improvement on the challenge datasets (i.e., sets of examples designed to expose models’ reliance on biases) without specifically targeting certain biases. Furthermore, the evaluation suggests that applying the framework results in improved overall robustness. @@ -8248,7 +8248,7 @@ On the Role of Supervision in Unsupervised Constituency Parsing - HaoyueShi + HaoyueShi KarenLivescu KevinGimpel 7611–7621 @@ -8300,8 +8300,8 @@ Translation Artifacts in Cross-lingual Transfer Learning MikelArtetxe - GorkaLabaka - EnekoAgirre + GorkaLabaka + EnekoAgirre 7674–7684 Both human and machine translation play a central role in cross-lingual transfer learning: many multilingual datasets have been created through professional translation services, and using machine translation to translate either the test set or the training set is a widely used transfer technique. In this paper, we show that such translation process can introduce subtle artifacts that have a notable impact in existing cross-lingual models. For instance, in natural language inference, translating the premise and the hypothesis independently can reduce the lexical overlap between them, which current models are highly sensitive to. We show that some previous findings in cross-lingual transfer learning need to be reconsidered in the light of this phenomenon. Based on the gained insights, we also improve the state-of-the-art in XNLI for the translate-test and zero-shot approaches by 4.3 and 2.8 points, respectively. 2020.emnlp-main.618 @@ -8314,7 +8314,7 @@ RamitSawhney HarshitJoshi SaumyaGandhi - Rajiv RatnShah + Rajiv RatnShah 7685–7697 Social media’s ubiquity fosters a space for users to exhibit suicidal thoughts outside of traditional clinical settings. Understanding the build-up of such ideation is critical for the identification of at-risk users and suicide prevention. Suicide ideation is often linked to a history of mental depression. The emotional spectrum of a user’s historical activity on social media can be indicative of their mental state over time. In this work, we focus on identifying suicidal intent in English tweets by augmenting linguistic models with historical context. We propose STATENet, a time-aware transformer based model for preliminary screening of suicidal risk on social media. STATENet outperforms competitive methods, demonstrating the utility of emotional and temporal contextual cues for suicide risk assessment. We discuss the empirical, qualitative, practical, and ethical aspects of STATENet for suicide ideation detection. 2020.emnlp-main.619 @@ -8335,7 +8335,7 @@ Where Are the Facts? Searching for Fact-checked Information to Alleviate the Spread of Fake News - NguyenVo + NguyenVo KyuminLee 7717–7731 Although many fact-checking systems have been developed in academia and industry, fake news is still proliferating on social media. These systems mostly focus on fact-checking but usually neglect online users who are the main drivers of the spread of misinformation. How can we use fact-checked information to improve users’ consciousness of fake news to which they are exposed? How can we stop users from spreading fake news? To tackle these questions, we propose a novel framework to search for fact-checking articles, which address the content of an original tweet (that may contain misinformation) posted by online users. The search can directly warn fake news posters and online users (e.g. the posters’ followers) about misinformation, discourage them from spreading fake news, and scale up verified content on social media. Our framework uses both text and images to search for fact-checking articles, and achieves promising results on real-world datasets. Our code and datasets are released at https://github.com/nguyenvo09/EMNLP2020. @@ -8398,8 +8398,8 @@ <fixed-case>M</fixed-case>ed<fixed-case>F</fixed-case>ilter: <fixed-case>I</fixed-case>mproving <fixed-case>E</fixed-case>xtraction of <fixed-case>T</fixed-case>ask-relevant <fixed-case>U</fixed-case>tterances through <fixed-case>I</fixed-case>ntegration of <fixed-case>D</fixed-case>iscourse <fixed-case>S</fixed-case>tructure and <fixed-case>O</fixed-case>ntological <fixed-case>K</fixed-case>nowledge SopanKhosla ShikharVashishth - Jill FainLehman - CarolynRose + Jill FainLehman + CarolynRose 7781–7797 Information extraction from conversational data is particularly challenging because the task-centric nature of conversation allows for effective communication of implicit information by humans, but is challenging for machines. The challenges may differ between utterances depending on the role of the speaker within the conversation, especially when relevant expertise is distributed asymmetrically across roles. Further, the challenges may also increase over the conversation as more shared context is built up through information communicated implicitly earlier in the dialogue. In this paper, we propose the novel modeling approach MedFilter, which addresses these insights in order to increase performance at identifying and categorizing task-relevant utterances, and in so doing, positively impacts performance at a downstream information extraction task. We evaluate this approach on a corpus of nearly 7,000 doctor-patient conversations where MedFilter is used to identify medically relevant contributions to the discussion (achieving a 10% improvement over SOTA baselines in terms of area under the PR curve). Identifying task-relevant utterances benefits downstream medical processing, achieving improvements of 15%, 105%, and 23% respectively for the extraction of symptoms, medications, and complaints. 2020.emnlp-main.626 @@ -8451,7 +8451,7 @@ <fixed-case>E</fixed-case>ntity <fixed-case>L</fixed-case>inking in 100 <fixed-case>L</fixed-case>anguages Jan A.Botha ZifeiShan - DanielGillick + DanielGillick 7833–7845 We propose a new formulation for multilingual entity linking, where language-specific mentions resolve to a language-agnostic Knowledge Base. We train a dual encoder in this new setting, building on prior work with improved feature representation, negative mining, and an auxiliary entity-pairing task, to obtain a single entity retrieval model that covers 100+ languages and 20 million entities. The model outperforms state-of-the-art results from a far more limited cross-lingual linking task. Rare entities and low-resource languages pose challenges at this large-scale, so we advocate for an increased focus on zero- and few-shot evaluation. To this end, we provide Mewsli-9, a large new multilingual dataset matched to our setting, and show how frequency-based analysis provided key insights for our model and training enhancements. 2020.emnlp-main.630 @@ -8462,7 +8462,7 @@ <fixed-case>P</fixed-case>atch<fixed-case>BERT</fixed-case>: Just-in-Time, Out-of-Vocabulary Patching SangwhanMoon - NaoakiOkazaki + NaoakiOkazaki 7846–7852 Large scale pre-trained language models have shown groundbreaking performance improvements for transfer learning in the domain of natural language processing. In our paper, we study a pre-trained multilingual BERT model and analyze the OOV rate on downstream tasks, how it introduces information loss, and as a side-effect, obstructs the potential of the underlying model. We then propose multiple approaches for mitigation and demonstrate that it improves performance with the same parameter count when combined with fine-tuning. 2020.emnlp-main.631 @@ -8514,7 +8514,7 @@ Exploring and Predicting Transferability across <fixed-case>NLP</fixed-case> Tasks - TuVu + TuVu TongWang TsendsurenMunkhdalai AlessandroSordoni @@ -8537,7 +8537,7 @@ SmarandaMuresan JieMa FaisalLadhak - YaserAl-Onaizan + YaserAl-Onaizan 7927–7934 Leveraging large amounts of unlabeled data using Transformer-like architectures, like BERT, has gained popularity in recent times owing to their effectiveness in learning general representations that can then be further fine-tuned for downstream tasks to much success. However, training these models can be costly both from an economic and environmental standpoint. In this work, we investigate how to effectively use unlabeled data: by exploring the task-specific semi-supervised approach, Cross-View Training (CVT) and comparing it with task-agnostic BERT in multiple settings that include domain and task relevant English data. CVT uses a much lighter model architecture and we show that it achieves similar performance to BERT on a set of sequence tagging tasks, with lesser financial and environmental impact. 2020.emnlp-main.636 @@ -8559,7 +8559,7 @@ <fixed-case>A</fixed-case>ctive <fixed-case>L</fixed-case>earning for <fixed-case>BERT</fixed-case>: <fixed-case>A</fixed-case>n <fixed-case>E</fixed-case>mpirical <fixed-case>S</fixed-case>tudy - LiatEin-Dor + LiatEin-Dor AlonHalfon ArielGera EyalShnarch @@ -8592,7 +8592,7 @@ MatthewKhoury RumenDangovski LongwuOu - PreslavNakov + PreslavNakov YichenShen LiJing 7975–7984 @@ -8618,8 +8618,8 @@ The role of context in neural pitch accent detection in <fixed-case>E</fixed-case>nglish ElizabethNielsen - MarkSteedman - SharonGoldwater + MarkSteedman + SharonGoldwater 7994–8000 Prosody is a rich information source in natural language, serving as a marker for phenomena such as contrast. In order to make this information available to downstream tasks, we need a way to detect prosodic events in speech. We propose a new model for pitch accent detection, inspired by the work of Stehwien et al. (2018), who presented a CNN-based model for this task. Our model makes greater use of context by using full utterances as input and adding an LSTM layer. We find that these innovations lead to an improvement from 87.5% to 88.7% accuracy on pitch accent detection on American English speech in the Boston University Radio News Corpus, a state-of-the-art result. We also find that a simple baseline that just predicts a pitch accent on every content word yields 82.2% accuracy, and we suggest that this is the appropriate baseline for this task. Finally, we conduct ablation tests that show pitch is the most important acoustic feature for this task and this corpus. 2020.emnlp-main.642 @@ -8634,7 +8634,7 @@ ArshiyaAggarwal TaruJain PuneetMathur - Rajiv RatnShah + Rajiv RatnShah 8001–8013 Natural language processing has recently made stock movement forecasting and volatility forecasting advances, leading to improved financial forecasting. Transcripts of companies’ earnings calls are well studied for risk modeling, offering unique investment insight into stock performance. However, vocal cues in the speech of company executives present an underexplored rich source of natural language data for estimating financial risk. Additionally, most existing approaches ignore the correlations between stocks. Building on existing work, we introduce a neural model for stock volatility prediction that accounts for stock interdependence via graph convolutions while fusing verbal, vocal, and financial features in a semi-supervised multi-task risk forecasting formulation. Our proposed model, VolTAGE, outperforms existing methods demonstrating the effectiveness of multimodal learning for volatility prediction. 2020.emnlp-main.643 @@ -8659,8 +8659,8 @@ HaiminZhang DebanjanMahata RakeshGosangi - Rajiv RatnShah - AmandaStent + Rajiv RatnShah + AmandaStent 8021–8030 We introduce a new keyphrase generation approach using Generative Adversarial Networks (GANs). For a given document, the generator produces a sequence of keyphrases, and the discriminator distinguishes between human-curated and machine-generated keyphrases. We evaluated this approach on standard benchmark datasets. We observed that our model achieves state-of-the-art performance in the generation of abstractive keyphrases and is comparable to the best performing extractive techniques. Although we achieve promising results using GANs, they are not significantly better than the state-of-the-art generative models. To our knowledge, this is one of the first works that use GANs for keyphrase generation. We present a detailed analysis of our observations and expect that these findings would help other researchers to further study the use of GANs for the task of keyphrase generation. 2020.emnlp-main.645 @@ -8672,7 +8672,7 @@ <fixed-case>TESA</fixed-case>: A <fixed-case>T</fixed-case>ask in <fixed-case>E</fixed-case>ntity <fixed-case>S</fixed-case>emantic <fixed-case>A</fixed-case>ggregation for Abstractive Summarization ClémentJumel AnnieLouis - Jackie Chi KitCheung + Jackie Chi KitCheung 8031–8050 Human-written texts contain frequent generalizations and semantic aggregation of content. In a document, they may refer to a pair of named entities such as ‘London’ and ‘Paris’ with different expressions: “the major cities”, “the capital cities” and “two European cities”. Yet generation, especially, abstractive summarization systems have so far focused heavily on paraphrasing and simplifying the source content, to the exclusion of such semantic abstraction capabilities. In this paper, we present a new dataset and task aimed at the semantic aggregation of entities. TESA contains a dataset of 5.3K crowd-sourced entity aggregations of Person, Organization, and Location named entities. The aggregations are document-appropriate, meaning that they are produced by annotators to match the situational context of a given news article from the New York Times. We then build baseline models for generating aggregations given a tuple of entities and document context. We finetune on TESA an encoder-decoder language model and compare it with simpler classification methods based on linguistically informed features. Our quantitative and qualitative evaluations show reasonable performance in making a choice from a given list of expressions, but free-form expressions are understandably harder to generate and evaluate. 2020.emnlp-main.646 @@ -8709,7 +8709,7 @@ Intrinsic Evaluation of Summarization Datasets RishiBommasani - ClaireCardie + ClaireCardie 8075–8096 High quality data forms the bedrock for building meaningful statistical models in NLP. Consequently, data quality must be evaluated either during dataset construction or *post hoc*. Almost all popular summarization datasets are drawn from natural sources and do not come with inherent quality assurance guarantees. In spite of this, data quality has gone largely unquestioned for many of these recent datasets. We perform the first large-scale evaluation of summarization datasets by introducing 5 intrinsic metrics and applying them to 10 popular datasets. We find that data usage in recent summarization research is sometimes inconsistent with the underlying properties of the data. Further, we discover that our metrics can serve the additional purpose of being inexpensive heuristics for detecting generically low quality examples. 2020.emnlp-main.649 @@ -8739,7 +8739,7 @@ Conversational Semantic Parsing for Dialog State Tracking JianpengCheng DevangAgrawal - HéctorMartínez Alonso + HéctorMartínez Alonso ShrutiBhargava JorisDriesen FedericoFlego @@ -8747,10 +8747,10 @@ DimitriKartsaklis LinLi DhivyaPiraviperumal - Jason D.Williams + Jason D.Williams HongYu DiarmuidÓ Séaghdha - AndersJohannsen + AndersJohannsen 8107–8117 We consider a new perspective on dialog state tracking (DST), the task of estimating a user’s goal through the course of a dialog. By formulating DST as a semantic parsing task over hierarchical representations, we can incorporate semantic compositionality, cross-domain knowledge sharing and co-reference. We present TreeDST, a dataset of 27k conversations annotated with tree-structured dialog states and system acts. We describe an encoder-decoder framework for DST with hierarchical representations, which leads to ~20% improvement over state-of-the-art DST approaches that operate on a flat meaning space of slot-value pairs. 2020.emnlp-main.651 @@ -8764,7 +8764,7 @@ HuiWan ChulakaGunasekara SivaPatel - SachindraJoshi + SachindraJoshi LuisLastras 8118–8128 We introduce doc2dial, a new dataset of goal-oriented dialogues that are grounded in the associated documents. Inspired by how the authors compose documents for guiding end users, we first construct dialogue flows based on the content elements that corresponds to higher-level relations across text sections as well as lower-level relations between discourse units within a section. Then we present these dialogue flows to crowd contributors to create conversational utterances. The dataset includes over 4500 annotated conversations with an average of 14 turns that are grounded in over 450 documents from four domains. Compared to the prior document-grounded dialogue datasets, this dataset covers a variety of dialogue scenes in information-seeking conversations. For evaluating the versatility of the dataset, we introduce multiple dialogue modeling tasks and present baseline approaches. @@ -8804,7 +8804,7 @@ Information Seeking in the Spirit of Learning: A Dataset for Conversational Curiosity PedroRodriguez - PaulCrook + PaulCrook SeungwhanMoon ZhiguangWang 8153–8172 @@ -8833,7 +8833,7 @@ Discriminatively-<fixed-case>T</fixed-case>uned <fixed-case>G</fixed-case>enerative <fixed-case>C</fixed-case>lassifiers for <fixed-case>R</fixed-case>obust <fixed-case>N</fixed-case>atural <fixed-case>L</fixed-case>anguage <fixed-case>I</fixed-case>nference XiaoanDing TianyuLiu - BaobaoChang + BaobaoChang ZhifangSui KevinGimpel 8189–8202 @@ -8845,7 +8845,7 @@ New Protocols and Negative Results for Textual Entailment Data Collection - Samuel R.Bowman + Samuel R.Bowman JennimariaPalomaki LivioBaldini Soares EmilyPitler @@ -8873,7 +8873,7 @@ Universal Natural Language Processing with Limited Annotations: Try Few-shot Textual Entailment as a Start WenpengYin Nazneen FatemaRajani - DragomirRadev + DragomirRadev RichardSocher CaimingXiong 8229–8239 @@ -8911,7 +8911,7 @@ Multitask Learning for Cross-Lingual Transfer of Broad-coverage Semantic Dependencies MaryamAminian Mohammad SadeghRasooli - MonaDiab + MonaDiab 8268–8274 We describe a method for developing broad-coverage semantic dependency parsers for languages for which no semantically annotated resource is available. We leverage a multitask learning framework coupled with annotation projection. We use syntactic parsing as the auxiliary task in our multitask setup. Our annotation projection experiments from English to Czech show that our multitask setup yields 3.1% (4.2%) improvement in labeled F1-score on in-domain (out-of-domain) test set compared to a single-task baseline. 2020.emnlp-main.663 @@ -8924,7 +8924,7 @@ HaokunLiu WilliamHuang DharaMungra - Samuel R.Bowman + Samuel R.Bowman 8275–8280 Performance on the Winograd Schema Challenge (WSC), a respected English commonsense reasoning benchmark, recently rocketed from chance accuracy to 89% on the SuperGLUE leaderboard, with relatively little corroborating evidence of a correspondingly large improvement in reasoning ability. We hypothesize that much of this improvement comes from recent changes in task formalization—the combination of input specification, loss function, and reuse of pretrained parameters—by users of the dataset, rather than improvements in the pretrained model’s reasoning ability. We perform an ablation on two Winograd Schema datasets that interpolates between the formalizations used before and after this surge, and find (i) framing the task as multiple choice improves performance dramatically and (ii)several additional techniques, including the reuse of a pretrained language modeling head, can mitigate the model’s extreme sensitivity to hyperparameters. We urge future benchmark creators to impose additional structure to minimize the impact of formalization decisions on reported results. 2020.emnlp-main.664 @@ -8938,7 +8938,7 @@ PasqualeMinervini HaimDubossarsky SebastianRiedel - TimRocktäschel + TimRocktäschel 8281–8291 Natural Language Inference (NLI) datasets contain annotation artefacts resulting in spurious correlations between the natural language utterances and their respective entailment classes. These artefacts are exploited by neural networks even when only considering the hypothesis and ignoring the premise, leading to unwanted biases. Belinkov et al. (2019b) proposed tackling this problem via adversarial training, but this can lead to learned sentence representations that still suffer from the same biases. We show that the bias can be reduced in the sentence representations by using an ensemble of adversaries, encouraging the model to jointly decrease the accuracy of these different adversaries while fitting the data. This approach produces more robust NLI models, outperforming previous de-biasing efforts when generalised to 12 other NLI datasets (Belinkov et al., 2019a; Mahabadi et al., 2020). In addition, we find that the optimal number of adversarial classifiers depends on the dimensionality of the sentence representations, with larger sentence representations being more difficult to de-bias while benefiting from using a greater number of adversaries. 2020.emnlp-main.665 @@ -9028,7 +9028,7 @@ <fixed-case>C</fixed-case>hapter <fixed-case>C</fixed-case>aptor: <fixed-case>T</fixed-case>ext <fixed-case>S</fixed-case>egmentation in <fixed-case>N</fixed-case>ovels CharutaPethe AllenKim - SteveSkiena + SteveSkiena 8373–8383 Books are typically segmented into chapters and sections, representing coherent sub-narratives and topics. We investigate the task of predicting chapter boundaries, as a proxy for the general task of segmenting long texts. We build a Project Gutenberg chapter segmentation data set of 9,126 English novels, using a hybrid approach combining neural inference and rule matching to recognize chapter title headers in books, achieving an F1-score of 0.77 on this task. Using this annotated data as ground truth after removing structural cues, we present cut-based and neural methods for chapter segmentation, achieving a F1-score of 0.453 on the challenging task of exact break prediction over book-length documents. Finally, we reveal interesting historical trends in the chapter structure of novels. 2020.emnlp-main.672 @@ -9064,7 +9064,7 @@ Towards Modeling Revision Requirements in wiki<fixed-case>H</fixed-case>ow Instructions - IrshadBhat + IrshadBhat TalitaAnthonio MichaelRoth 8407–8414 @@ -9079,7 +9079,7 @@ RamitSawhney ShivamAgarwal ArnavWadhwa - Rajiv RatnShah + Rajiv RatnShah 8415–8426 In the financial domain, risk modeling and profit generation heavily rely on the sophisticated and intricate stock movement prediction task. Stock forecasting is complex, given the stochastic dynamics and non-stationary behavior of the market. Stock movements are influenced by varied factors beyond the conventionally studied historical prices, such as social media and correlations among stocks. The rising ubiquity of online content and knowledge mandates an exploration of models that factor in such multimodal signals for accurate stock forecasting. We introduce an architecture that achieves a potent blend of chaotic temporal signals from financial data, social media, and inter-stock relationships via a graph neural network in a hierarchical temporal fashion. Through experiments on real-world S&P 500 index data and English tweets, we show the practical applicability of our model as a tool for investment decision making and trading. 2020.emnlp-main.676 @@ -9105,7 +9105,7 @@ BarunPatra ChalaFufa PamelaBhattacharya - CharlesLee + CharlesLee 8445–8455 State of the art research for date-time entity extraction from text is task agnostic. Consequently, while the methods proposed in literature perform well for generic date-time extraction from texts, they don’t fare as well on task specific date-time entity extraction where only a subset of the date-time entities present in the text are pertinent to solving the task. Furthermore, some tasks require identifying negation constraints associated with the date-time entities to correctly reason over time. We showcase a novel model for extracting task-specific date-time entities along with their negation constraints. We show the efficacy of our method on the task of date-time understanding in the context of scheduling meetings for an email-based digital AI scheduling assistant. Our method achieves an absolute gain of 19% f-score points compared to baseline methods in detecting the date-time entities relevant to scheduling meetings and a 4% improvement over baseline methods for detecting negation constraints over date-time entities. 2020.emnlp-main.678 @@ -9120,7 +9120,7 @@ RebeccaThomas StevePittard VickiHertzberg - Jinho D.Choi + Jinho D.Choi 8456–8466 This paper presents a comprehensive study on resume classification to reduce the time and labor needed to screen an overwhelming number of applications significantly, while improving the selection of suitable candidates. A total of 6,492 resumes are extracted from 24,933 job applications for 252 positions designated into four levels of experience for Clinical Research Coordinators (CRC). Each resume is manually annotated to its most appropriate CRC position by experts through several rounds of triple annotation to establish guidelines. As a result, a high Kappa score of 61% is achieved for inter-annotator agreement. Given this dataset, novel transformer-based classification models are developed for two tasks: the first task takes a resume and classifies it to a CRC level (T1), and the second task takes both a resume and a job description to apply and predicts if the application is suited to the job (T2). Our best models using section encoding and a multi-head attention decoding give results of 73.3% to T1 and 79.2% to T2. Our analysis shows that the prediction errors are mostly made among adjacent CRC levels, which are hard for even experts to distinguish, implying the practical value of our models in real HR platforms. 2020.emnlp-main.679 @@ -9134,7 +9134,7 @@ OphélieLacroix HelenYannakoudakis MarekRei - AndersSøgaard + AndersSøgaard 8467–8478 Evaluation of grammatical error correction (GEC) systems has primarily focused on essays written by non-native learners of English, which however is only part of the full spectrum of GEC applications. We aim to broaden the target domain of GEC and release CWEB, a new benchmark for GEC consisting of website text generated by English speakers of varying levels of proficiency. Website data is a common and important domain that contains far fewer grammatical errors than learner essays, which we show presents a challenge to state-of-the-art GEC systems. We demonstrate that a factor behind this is the inability of systems to rely on a strong internal language model in low error density domains. We hope this work shall facilitate the development of open-domain GEC models that generalize to different topics and genres. 2020.emnlp-main.680 @@ -9146,7 +9146,7 @@ Deconstructing word embedding algorithms KianKenyon-Dean EdwardNewell - Jackie Chi KitCheung + Jackie Chi KitCheung 8479–8484 Word embeddings are reliable feature representations of words used to obtain high quality results for various NLP applications. Uncontextualized word embeddings are used in many NLP tasks today, especially in resource-limited settings where high memory capacity and GPUs are not available. Given the historical success of word embeddings in NLP, we propose a retrospective on some of the most well-known word embedding algorithms. In this work, we deconstruct Word2vec, GloVe, and others, into a common form, unveiling some of the common conditions that seem to be required for making performant word embeddings. We believe that the theoretical findings in this paper can provide a basis for more informed development of future models. 2020.emnlp-main.681 @@ -9206,7 +9206,7 @@ Revealing the Myth of Higher-Order Inference in Coreference Resolution LiyanXu - Jinho D.Choi + Jinho D.Choi 8527–8533 This paper analyzes the impact of higher-order inference (HOI) on the task of coreference resolution. HOI has been adapted by almost all recent coreference resolution models without taking much investigation on its true effectiveness over representation learning. To make a comprehensive analysis, we implement an end-to-end coreference system as well as four HOI approaches, attended antecedent, entity equalization, span clustering, and cluster merging, where the latter two are our original methods. We find that given a high-performing encoder such as SpanBERT, the impact of HOI is negative to marginal, providing a new perspective of HOI to this task. Our best model using cluster merging shows the Avg-F1 of 80.2 on the CoNLL 2012 shared task dataset in English. 2020.emnlp-main.686 @@ -9269,7 +9269,7 @@ <fixed-case>S</fixed-case>eq<fixed-case>M</fixed-case>ix: Augmenting Active Sequence Labeling via Sequence Mixup RongzhiZhang YueYu - ChaoZhang + ChaoZhang 8566–8579 Active learning is an important technique for low-resource sequence labeling tasks. However, current active sequence labeling methods use the queried samples alone in each iteration, which is an inefficient way of leveraging human annotations. We propose a simple but effective data augmentation method to improve label efficiency of active sequence labeling. Our method, SeqMix, simply augments the queried samples by generating extra labeled sequences in each iteration. The key difficulty is to generate plausible sequences along with token-level labels. In SeqMix, we address this challenge by performing mixup for both sequences and token-level labels of the queried samples. Furthermore, we design a discriminator during sequence mixup, which judges whether the generated sequences are plausible or not. Our experiments on Named Entity Recognition and Event Detection tasks show that SeqMix can improve the standard active sequence labeling method by 2.27%–3.75% in terms of F_1 scores. The code and data for SeqMix can be found at https://github.com/rz-zhang/SeqMix. 2020.emnlp-main.691 @@ -9313,7 +9313,7 @@ PierreDognin IgorMelnyk InkitPadhi - CiceroNogueira dos Santos + CiceroNogueira dos Santos PayelDas 8605–8616 In this work, we present a dual learning approach for unsupervised text to path and path to text transfers in Commonsense Knowledge Bases (KBs). We investigate the impact of weak supervision by creating a weakly supervised dataset and show that even a slight amount of supervision can significantly improve the model performance and enable better-quality transfers. We examine different model architectures, and evaluation metrics, proposing a novel Commonsense KB completion metric tailored for generative models. Extensive experimental results show that the proposed method compares very favorably to the existing baselines. This approach is a viable step towards a more advanced system for automatic KB construction/expansion and the reverse operation of KB conversion to coherent textual descriptions. @@ -9337,7 +9337,7 @@ Improving Low Compute Language Modeling with In-Domain Embedding Initialisation CharlesWelch - RadaMihalcea + RadaMihalcea Jonathan K.Kummerfeld 8625–8634 Many NLP applications, such as biomedical data and technical support, have 10-100 million tokens of in-domain data and limited computational resources for learning from it. How should we train a language model in this scenario? Most language modeling research considers either a small dataset with a closed vocabulary (like the standard 1 million token Penn Treebank), or the whole web with byte-pair encoding. We show that for our target setting in English, initialising and freezing input embeddings using in-domain data can improve language model performance by providing a useful representation of rare words, and this pattern holds across several different domains. In the process, we show that the standard convention of tying input and output embeddings does not improve perplexity when initializing with embeddings trained on in-domain data. @@ -9367,7 +9367,7 @@ ChunyuanLi ZheGan ChrisBrockett - BillDolan + BillDolan 8649–8670 Large-scale pre-trained language models, such as BERT and GPT-2, have achieved excellent performance in language representation learning and free-form text generation. However, these models cannot be directly employed to generate text under specified lexical constraints. To address this challenge, we present POINTER (PrOgressive INsertion-based TransformER), a simple yet novel insertion-based approach for hard-constrained text generation. The proposed method operates by progressively inserting new tokens between existing tokens in a parallel manner. This procedure is recursively applied until a sequence is completed. The resulting coarse-to-fine hierarchy makes the generation process intuitive and interpretable. We pre-train our model with the proposed progressive insertion-based objective on a 12GB Wikipedia dataset, and fine-tune it on downstream hard-constrained generation tasks. Non-autoregressive decoding yields a logarithmic time complexity during inference time. Experimental results on both News and Yelp datasets demonstrate that Pointer achieves state-of-the-art performance on constrained text generation. We released the pre-trained models and the source code to facilitate future research. 2020.emnlp-main.698 @@ -9433,13 +9433,13 @@ JesseThomason JacobAndreas YoshuaBengio - JoyceChai + JoyceChai MirellaLapata AngelikiLazaridou JonathanMay AleksandrNisnevich NicolasPinto - JosephTurian + JosephTurian 8718–8735 Language understanding research is held back by a failure to relate language to the physical world it describes and to the social interactions it facilitates. Despite the incredible effectiveness of language processing models to tackle tasks after being trained on text alone, successful linguistic communication relies on a shared experience of the world. It is this shared experience that makes utterances meaningful. Natural language processing is a diverse field, and progress throughout its development has come from new representational theories, modeling techniques, data collection paradigms, and tasks. We posit that the present success of representation learning approaches trained on large, text-only corpora requires the parallel tradition of research on the broader physical and social context of language to address the deeper questions of communication. 2020.emnlp-main.703 @@ -9465,7 +9465,7 @@ AdamFisch KentonLee Ming-WeiChang - JonathanClark + JonathanClark ReginaBarzilay 8755–8768 The traditional image captioning task uses generic reference captions to provide textual information about images. Different user populations, however, will care about different visual aspects of images. In this paper, we propose a new task, Captioning with A Purpose (CapWAP). Our goal is to develop systems that can be tailored to be useful for the information needs of an intended population, rather than merely provide generic information about an image. In this task, we use question-answer (QA) pairs—a natural expression of information need—from users, instead of reference captions, for both training and post-inference evaluation. We show that it is possible to use reinforcement learning to directly optimize for the intended information need, by rewarding outputs that allow a question answering model to provide correct answers to sampled user questions. We convert several visual question answering datasets into CapWAP datasets, and demonstrate that under a variety of scenarios our purposeful captioning system learns to anticipate and fulfill specific information needs better than its generic counterparts, as measured by QA performance on user questions from unseen images, when using the caption alone as context. @@ -9478,7 +9478,7 @@ What is More Likely to Happen Next? Video-and-Language Future Event Prediction JieLei LichengYu - TamaraBerg + TamaraBerg MohitBansal 8769–8784 Given a video with aligned dialogue, people can often infer what is more likely to happen next. Making such predictions requires not only a deep understanding of the rich dynamics underlying the video and dialogue, but also a significant amount of commonsense knowledge. In this work, we explore whether AI models are able to learn to make such multimodal commonsense next-event predictions. To support research in this direction, we collect a new dataset, named Video-and-Language Event Prediction (VLEP), with 28,726 future event prediction examples (along with their rationales) from 10,234 diverse TV Show and YouTube Lifestyle Vlog video clips. In order to promote the collection of non-trivial challenging examples, we employ an adversarial human-and-model-in-the-loop data collection procedure. We also present a strong baseline incorporating information from video, dialogue, and commonsense knowledge. Experiments show that each type of information is useful for this challenging task, and that compared to the high human performance on VLEP, our model provides a good starting point but leaves large room for future work. @@ -9574,7 +9574,7 @@ Unsupervised Question Decomposition for Question Answering EthanPerez PatrickLewis - Wen-tauYih + Wen-tauYih KyunghyunCho DouweKiela 8864–8880 @@ -9610,7 +9610,7 @@ Exploring the Role of Argument Structure in Online Debate Persuasion JialuLi EsinDurmus - ClaireCardie + ClaireCardie 8905–8912 Online debate forums provide users a platform to express their opinions on controversial topics while being exposed to opinions from diverse set of viewpoints. Existing work in Natural Language Processing (NLP) has shown that linguistic features extracted from the debate text and features encoding the characteristics of the audience are both critical in persuasion studies. In this paper, we aim to further investigate the role of discourse structure of the arguments from online debates in their persuasiveness. In particular, we use the factor graph model to obtain features for the argument structure of debates from an online debating platform and incorporate these features to an LSTM-based model to predict the debater that makes the most convincing arguments. We find that incorporating argument structure features play an essential role in achieving the best predictive performance in assessing the persuasiveness of the arguments on online debates. 2020.emnlp-main.716 @@ -9621,7 +9621,7 @@ <fixed-case>Z</fixed-case>ero-<fixed-case>S</fixed-case>hot <fixed-case>S</fixed-case>tance <fixed-case>D</fixed-case>etection: <fixed-case>A</fixed-case> <fixed-case>D</fixed-case>ataset and <fixed-case>M</fixed-case>odel using <fixed-case>G</fixed-case>eneralized <fixed-case>T</fixed-case>opic <fixed-case>R</fixed-case>epresentations EmilyAllaway - KathleenMcKeown + KathleenMcKeown 8913–8931 Stance detection is an important component of understanding hidden influences in everyday life. Since there are thousands of potential topics to take a stance on, most with little to no training data, we focus on zero-shot stance detection: classifying stance from no training examples. In this paper, we present a new dataset for zero-shot stance detection that captures a wider range of topics and lexical variation than in previous datasets. Additionally, we propose a new model for stance detection that implicitly captures relationships between topics using generalized topic representations and show that this model improves performance on a number of challenging linguistic phenomena. 2020.emnlp-main.717 @@ -9673,8 +9673,8 @@ ShanshanPeng JiankunLu DeepanwayGhosal - AlexanderGelbukh - RadaMihalcea + AlexanderGelbukh + RadaMihalcea SoujanyaPoria 8968–8979 Current approaches to empathetic response generation view the set of emotions expressed in the input text as a flat structure, where all the emotions are treated uniformly. We argue that empathetic responses often mimic the emotion of the user to a varying degree, depending on its positivity or negativity and content. We show that the consideration of these polarity-based emotion clusters and emotional mimicry results in improved empathy and contextual relevance of the response as compared to the state-of-the-art. Also, we introduce stochasticity into the emotion mixture that yields emotionally more varied empathetic responses than the previous work. We demonstrate the importance of these factors to empathetic response generation using both automatic- and human-based evaluations. The implementation of MIME is publicly available at https://github.com/declare-lab/MIME. @@ -9717,7 +9717,7 @@ JiaxinHuang ChenyanXiong HengJi - ChaoZhang + ChaoZhang JiaweiHan 9006–9017 Current text classification methods typically require a good number of human-labeled documents as training data, which can be costly and difficult to obtain in real applications. Humans can perform classification without seeing any labeled examples but only based on a small set of words describing the categories to be classified. In this paper, we explore the potential of only using the label name of each class to train classification models on unlabeled data, without using any labeled documents. We use pre-trained neural language models both as general linguistic knowledge sources for category understanding and as representation learning models for document classification. Our method (1) associates semantically related words with the label names, (2) finds category-indicative words and trains the model to predict their implied categories, and (3) generalizes the model via self-training. We show that our model achieves around 90% accuracy on four benchmark datasets including topic and sentiment classification without using any labeled documents but learning from unlabeled data supervised by at most 3 words (1 in most cases) per class as the label name. @@ -9788,7 +9788,7 @@ ZengfengHuang WeijianSun QiZhang - XuanjingHuang + XuanjingHuang 9066–9075 Existing research for question generation encodes the input text as a sequence of tokens without explicitly modeling fact information. These models tend to generate irrelevant and uninformative questions. In this paper, we explore to incorporate facts in the text for question generation in a comprehensive way. We present a novel task of question generation given a query path in the knowledge graph constructed from the input text. We divide the task into two steps, namely, query representation learning and query-based question generation. We formulate query representation learning as a sequence labeling problem for identifying the involved facts to form a query and employ an RNN-based generator for question generation. We first train the two modules jointly in an end-to-end fashion, and further enforce the interaction between these two modules in a variational framework. We construct the experimental datasets on top of SQuAD and results show that our model outperforms other state-of-the-art approaches, and the performance margin is larger when target questions are complex. Human evaluation also proves that our model is able to generate relevant and informative questions. 2020.emnlp-main.729 @@ -9801,7 +9801,7 @@ What time is it? Temporal Analysis of Novels AllenKim CharutaPethe - SteveSkiena + SteveSkiena 9076–9086 Recognizing the flow of time in a story is a crucial aspect of understanding it. Prior work related to time has primarily focused on identifying temporal expressions or relative sequencing of events, but here we propose computationally annotating each line of a book with wall clock times, even in the absence of explicit time-descriptive phrases. To do so, we construct a data set of hourly time phrases from 52,183 fictional books. We then construct a time-of-day classification model that achieves an average error of 2.27 hours. Furthermore, we show that by analyzing a book in whole using dynamic programming of breakpoints, we can roughly partition a book into segments that each correspond to a particular time-of-day. This approach improves upon baselines by over two hour. Finally, we apply our model to a corpus of literature categorized by different periods in history, to show interesting trends of hourly activity throughout the past. Among several observations we find that the fraction of events taking place past 10 P.M jumps past 1880 - coincident with the advent of the electric light bulb and city lights. 2020.emnlp-main.730 @@ -10020,7 +10020,7 @@ UrvashiKhandelwal RobinJia KyleMahowald - DanJurafsky + DanJurafsky 9263–9274 Despite its importance to experimental design, statistical power (the probability that, given a real effect, an experiment will reject the null hypothesis) has largely been ignored by the NLP community. Underpowered experiments make it more difficult to discern the difference between statistical noise and meaningful model improvements, and increase the chances of exaggerated findings. By meta-analyzing a set of existing NLP papers and datasets, we characterize typical power for a variety of settings and conclude that underpowered experiments are common in the NLP literature. In particular, for several tasks in the popular GLUE benchmark, small test sets mean that most attempted comparisons to state of the art models will not be adequately powered. Similarly, based on reasonable assumptions, we find that the most typical experimental design for human rating studies will be underpowered to detect small model differences, of the sort that are frequently studied. For machine translation, we find that typical test sets of 2000 sentences have approximately 75% power to detect differences of 1 BLEU point. To improve the situation going forward, we give an overview of best practices for power analysis in NLP and release a series of notebooks to assist with future power analyses. 2020.emnlp-main.745 @@ -10035,7 +10035,7 @@ NicholasLourie YizhongWang HannanehHajishirzi - Noah A.Smith + Noah A.Smith YejinChoi 9275–9293 Large datasets have become commonplace in NLP research. However, the increased emphasis on data quantity has made it challenging to assess the quality of data. We introduce Data Maps—a model-based tool to characterize and diagnose datasets. We leverage a largely ignored source of information: the behavior of the model on individual instances during training (training dynamics) for building data maps. This yields two intuitive measures for each example—the model’s confidence in the true class, and the variability of this confidence across epochs—obtained in a single run of training. Experiments on four datasets show that these model-dependent measures reveal three distinct regions in the data map, each with pronounced characteristics. First, our data maps show the presence of “ambiguous” regions with respect to the model, which contribute the most towards out-of-distribution generalization. Second, the most populous regions in the data are “easy to learn” for the model, and play an important role in model optimization. Finally, data maps uncover a region with instances that the model finds “hard to learn”; these often correspond to labeling errors. Our results indicate that a shift in focus from quantity to quality of data could lead to robust models and improved out-of-distribution generalization. @@ -10061,7 +10061,7 @@ JonathanPilault RaymondLi SandeepSubramanian - ChrisPal + ChrisPal 9308–9319 We present a method to produce abstractive summaries of long documents that exceed several thousand words via neural abstractive summarization. We perform a simple extractive step before generating a summary, which is then used to condition the transformer language model on relevant information before being tasked with generating a summary. We also show that this approach produces more abstractive summaries compared to prior work that employs a copy mechanism while still achieving higher ROUGE scores. We provide extensive comparisons with strong baseline methods, prior state of the art work as well as multiple variants of our approach including those using only transformers, only extractive techniques and combinations of the two. We examine these models using four different summarization tasks and datasets: arXiv papers, PubMed papers, the Newsroom and BigPatent datasets. We find that transformer based methods produce summaries with fewer n-gram copies, leading to n-gram copying statistics that are more similar to human generated abstracts. We include a human evaluation, finding that transformers are ranked highly for coherence and fluency, but purely extractive methods score higher for informativeness and relevance. We hope that these architectures and experiments may serve as strong points of comparison for future work. Note: The abstract above was collaboratively written by the authors and one of the models presented in this paper based on an earlier draft of this paper. 2020.emnlp-main.748 @@ -10075,7 +10075,7 @@ ShuohangWang ZheGan YuCheng - Jackie Chi KitCheung + Jackie Chi KitCheung JingjingLiu 9320–9331 Pre-trained neural abstractive summarization systems have dominated extractive strategies on news summarization performance, at least in terms of ROUGE. However, system-generated abstractive summaries often face the pitfall of factual inconsistency: generating incorrect facts with respect to the source text. To address this challenge, we propose Span-Fact, a suite of two factual correction models that leverages knowledge learned from question answering models to make corrections in system-generated summaries via span selection. Our models employ single or multi-masking strategies to either iteratively or auto-regressively replace entities in order to ensure semantic consistency w.r.t. the source text, while retaining the syntactic structure of summaries generated by abstractive summarization models. Experiments show that our models significantly boost the factual consistency of system-generated summaries without sacrificing summary quality in terms of both automatic metrics and human evaluation. @@ -10188,7 +10188,7 @@ HiroyukiShindo HideakiTakeda YoshiyasuTakefuji - YujiMatsumoto + YujiMatsumoto 23–30 The embeddings of entities in a large knowledge base (e.g., Wikipedia) are highly beneficial for solving various natural language tasks that involve real world knowledge. In this paper, we present Wikipedia2Vec, a Python-based open-source tool for learning the embeddings of words and entities from Wikipedia. The proposed tool enables users to learn the embeddings efficiently by issuing a single command with a Wikipedia dump file as an argument. We also introduce a web-based demonstration of our tool that allows users to visualize and explore the learned embeddings. In our experiments, our tool achieved a state-of-the-art result on the KORE entity relatedness dataset, and competitive results on various standard benchmark datasets. Furthermore, our tool has been used as a key component in various recent studies. We publicize the source code, demonstration, and the pretrained embeddings for 12 languages at https://wikipedia2vec.github.io/. 2020.emnlp-demos.4 @@ -10200,10 +10200,10 @@ AnthonyFerritto LinPan RishavChakravarti - SalimRoukos - RaduFlorian + SalimRoukos + RaduFlorian J. WilliamMurdock - AviSil + AviSil 31–37 We introduce ARES (A Reading Comprehension Ensembling Service): a novel Machine Reading Comprehension (MRC) demonstration system which utilizes an ensemble of models to increase F1 by 2.3 points. While many of the top leaderboard submissions in popular MRC benchmarks such as the Stanford Question Answering Dataset (SQuAD) and Natural Questions (NQ) use model ensembles, the accompanying papers do not publish their ensembling strategies. In this work, we detail and evaluate various ensembling strategies using the NQ dataset. ARES leverages the CFO (Chakravarti et al., 2019) and ReactJS distributed frameworks to provide a scalable interactive Question Answering experience that capitalizes on the agreement (or lack thereof) between models to improve the answer visualization experience. 2020.emnlp-demos.5 @@ -10233,7 +10233,7 @@ SylvainGugger MariamaDrame QuentinLhoest - AlexanderRush + AlexanderRush 38–45 Honorable Demonstration Paper Recent progress in natural language processing has been driven by advances in both model architecture and model pretraining. Transformer architectures have facilitated building higher-capacity models and pretraining has made it possible to effectively utilize this capacity for a wide variety of tasks. Transformers is an open-source library with the goal of opening up these advances to the wider machine learning community. The library consists of carefully engineered state-of-the art Transformer architectures under a unified API. Backing this library is a curated collection of pretrained models made by and available for the community. Transformers is designed to be extensible by researchers, simple for practitioners, and fast and robust in industrial deployments. The library is available at https://github.com/huggingface/transformers. @@ -10276,7 +10276,7 @@ <fixed-case>D</fixed-case>eezy<fixed-case>M</fixed-case>atch: A Flexible Deep Learning Approach to Fuzzy String Matching KasraHosseini FedericoNanni - MarionaColl Ardanuy + MarionaColl Ardanuy 62–69 We present DeezyMatch, a free, open-source software library written in Python for fuzzy string matching and candidate ranking. Its pair classifier supports various deep neural network architectures for training new classifiers and for fine-tuning a pretrained model, which paves the way for transfer learning in fuzzy string matching. This approach is especially useful where only limited training examples are available. The learned DeezyMatch models can be used to generate rich vector representations from string inputs. The candidate ranker component in DeezyMatch uses these vector representations to find, for a given query, the best matching candidates in a knowledge base. It uses an adaptive searching algorithm applicable to large knowledge bases and query sets. We describe DeezyMatch’s functionality, design and implementation, accompanied by a use case in toponym matching and candidate ranking in realistic noisy datasets. 2020.emnlp-demos.9 @@ -10285,7 +10285,7 @@ <fixed-case>C</fixed-case>o<fixed-case>S</fixed-case>a<fixed-case>T</fixed-case>a: A Constraint Satisfaction Solver and Interpreted Language for Semi-Structured Tables of Sentences - PeterJansen + PeterJansen 70–76 This work presents CoSaTa, an intuitive constraint satisfaction solver and interpreted language for knowledge bases of semi-structured tables expressed as text. The stand-alone CoSaTa solver allows easily expressing complex compositional “inference patterns” for how knowledge from different tables tends to connect to support inference and explanation construction in question answering and other downstream tasks, while including advanced declarative features and the ability to operate over multiple representations of text (words, lemmas, or part-of-speech tags). CoSaTa also includes a hybrid imperative/declarative interpreted language for expressing simple models through minimally-specified simulations grounded in constraint patterns, helping bridge the gap between question answering, question explanation, and model simulation. The solver and interpreter are released as open source. Screencast Demo: https://youtu.be/t93Acsz7LyE 2020.emnlp-demos.10 @@ -10406,8 +10406,8 @@ KishoreVasan JonathanBorchardt EricHorvitz - DanielWeld - MartiHearst + DanielWeld + MartiHearst JevinWest 135–143 The COVID-19 pandemic has sparked unprecedented mobilization of scientists, generating a deluge of papers that makes it hard for researchers to keep track and explore new directions. Search engines are designed for targeted queries, not for discovery of connections across a corpus. In this paper, we present SciSight, a system for exploratory search of COVID-19 research integrating two key capabilities: first, exploring associations between biomedical facets automatically extracted from papers (e.g., genes, drugs, diseases, patient outcomes); second, combining textual and network information to search and visualize groups of researchers and their ties. SciSight has so far served over 15K users with over 42K page views and 13% returns. @@ -10442,7 +10442,7 @@ DanishContractor SivaPatel Q. VeraLiao - SachindraJoshi + SachindraJoshi LuisLastras DavidKonopnicki 151–157 @@ -10531,7 +10531,7 @@ HaoranShi XiaodanLiang TerukoMitamura - EricXing + EricXing ZhitingHu 197–204 Empirical natural language processing (NLP) systems in application domains (e.g., healthcare, finance, education) involve interoperation among multiple components, ranging from data ingestion, human annotation, to text retrieval, analysis, generation, and visualization. We establish a unified open-source framework to support fast development of such sophisticated NLP workflows in a composable manner. The framework introduces a uniform data representation to encode heterogeneous results by a wide range of NLP tasks. It offers a large repository of processors for NLP tasks, visualization, and annotation, which can be easily assembled with full interoperability under the unified representation. The highly extensible framework allows plugging in custom processors from external off-the-shelf NLP and deep learning libraries. The whole framework is delivered through two modularized yet integratable open-source projects, namely Forte (for workflow infrastructure and NLP function processors) and Stave (for user interaction, visualization, and annotation). @@ -10608,7 +10608,7 @@ Fact-Checking, Fake News, Propaganda, and Media Bias: Truth Seeking in the Post-Truth Era - PreslavNakov + PreslavNakov GiovanniDa San Martino 7–19 The rise of social media has democratized content creation and has made it easy for everybody to share and spread information online. On the positive side, this has given rise to citizen journalism, thus enabling much faster dissemination of information compared to what was possible with newspapers, radio, and TV. On the negative side, stripping traditional media from their gate-keeping role has left the public unprotected against the spread of misinformation, which could now travel at breaking-news speed over the same democratic channel. This has given rise to the proliferation of false information specifically created to affect individual people’s beliefs, and ultimately to influence major events such as political elections. There are strong indications that false information was weaponized at an unprecedented scale during Brexit and the 2016 U.S. presidential elections. “Fake news,” which can be defined as fabricated information that mimics news media content in form but not in organizational process or intent, became the Word of the Year for 2017, according to Collins Dictionary. Thus, limiting the spread of “fake news” and its impact has become a major focus for computer scientists, journalists, social media companies, and regulatory authorities. The tutorial will offer an overview of the broad and emerging research area of disinformation, with focus on the latest developments and research directions. @@ -10644,8 +10644,8 @@ Representation, Learning and Reasoning on Spatial Language for Downstream <fixed-case>NLP</fixed-case> Tasks ParisaKordjamshidi - JamesPustejovsky - Marie-FrancineMoens + JamesPustejovsky + Marie-FrancineMoens 28–33 Understating spatial semantics expressed in natural language can become highly complex in real-world applications. This includes applications of language grounding, navigation, visual question answering, and more generic human-machine interaction and dialogue systems. In many of such downstream tasks, explicit representation of spatial concepts and relationships can improve the capabilities of machine learning models in reasoning and deep language understanding. In this tutorial, we overview the cutting-edge research results and existing challenges related to spatial language understanding including semantic annotations, existing corpora, symbolic and sub-symbolic representations, qualitative spatial reasoning, spatial common sense, deep and structured learning models. We discuss the recent results on the above-mentioned applications –that need spatial language learning and reasoning – and highlight the research gaps and future directions. 2020.emnlp-tutorials.5 diff --git a/data/xml/2020.eval4nlp.xml b/data/xml/2020.eval4nlp.xml index 883867ca0c..5e703ee274 100644 --- a/data/xml/2020.eval4nlp.xml +++ b/data/xml/2020.eval4nlp.xml @@ -7,7 +7,7 @@ YangGao MaximePeyrard WeiZhao - EduardHovy + EduardHovy Association for Computational Linguistics
Online
November @@ -45,7 +45,7 @@ Item Response Theory for Efficient Human Evaluation of Chatbots JoãoSedoc - LyleUngar + LyleUngar 21–33 Conversational agent quality is currently assessed using human evaluation, and often requires an exorbitant number of comparisons to achieve statistical significance. In this paper, we introduce Item Response Theory (IRT) for chatbot evaluation, using a paired comparison in which annotators judge which system responds better to the next turn of a conversation. IRT is widely used in educational testing for simultaneously assessing the ability of test takers and the quality of test questions. It is similarly well suited for chatbot evaluation since it allows the assessment of both models and the prompts used to evaluate them. We use IRT to efficiently assess chatbots, and show that different examples from the evaluation set are better suited for comparing high-quality (nearer to human performance) than low-quality systems. Finally, we use IRT to reduce the number of evaluation examples assessed by human annotators while retaining discriminative power. 2020.eval4nlp-1.3 @@ -60,7 +60,7 @@ SeunghyunYoon FranckDernoncourt Doo SoonKim - TrungBui + TrungBui KyominJung 34–39 In this paper, we propose an evaluation metric for image captioning systems using both image and text information. Unlike the previous methods that rely on textual representations in evaluating the caption, our approach uses visiolinguistic representations. The proposed method generates image-conditioned embeddings for each token using ViLBERT from both generated and reference texts. Then, these contextual embeddings from each of the two sentence-pair are compared to compute the similarity score. Experimental results on three benchmark datasets show that our method correlates significantly better with human judgments than all existing metrics. @@ -98,7 +98,7 @@ On the Evaluation of Machine Translation n-best Lists JacobBremerman HudaKhayrallah - DouglasOard + DouglasOard MattPost 60–68 The standard machine translation evaluation framework measures the single-best output of machine translation systems. There are, however, many situations where n-best lists are needed, yet there is no established way of evaluating them. This paper establishes a framework for addressing n-best evaluation by outlining three different questions one could consider when determining how one would define a ‘good’ n-best list and proposing evaluation measures for each question. The first and principal contribution is an evaluation measure that characterizes the translation quality of an entire n-best list by asking whether many of the valid translations are placed near the top of the list. The second is a measure that uses gold translations with preference annotations to ask to what degree systems can produce ranked lists in preference order. The third is a measure that rewards partial matches, evaluating the closeness of the many items in an n-best list to a set of many valid references. These three perspectives make clear that having access to many references can be useful when n-best evaluation is the goal. @@ -160,7 +160,7 @@ JesperBrink Andersen MikkelBak Bertelsen MikkelHørby Schou - Manuel R.Ciosici + Manuel R.Ciosici IraAssent 120–130 Word embeddings are an active topic in the NLP research community. State-of-the-art neural models achieve high performance on downstream tasks, albeit at the cost of computationally expensive training. Cost aware solutions require cheaper models that still achieve good performance. We present several reproduction studies of intrinsic evaluation tasks that evaluate non-contextual word representations in multiple languages. Furthermore, we present 50-8-8, a new data set for the outlier identification task, which avoids limitations of the original data set, such as ambiguous words, infrequent words, and multi-word tokens, while increasing the number of test cases. The data set is expanded to contain semantic and syntactic tests and is multilingual (English, German, and Italian). We provide an in-depth analysis of word embedding models with a range of hyper-parameters. Our analysis shows the suitability of different models and hyper-parameters for different tasks and the greater difficulty of representing German and Italian languages. diff --git a/data/xml/2020.evalnlgeval.xml b/data/xml/2020.evalnlgeval.xml index c93095a690..89a4db36ef 100644 --- a/data/xml/2020.evalnlgeval.xml +++ b/data/xml/2020.evalnlgeval.xml @@ -46,7 +46,7 @@ Emielvan Miltenburg Chrisvan der Lee ThiagoCastro-Ferreira - EmielKrahmer + EmielKrahmer 17–27 NLG researchers often use uncontrolled corpora to train and evaluate their systems, using textual similarity metrics, such as BLEU. This position paper argues in favour of two alternative evaluation strategies, using grammars or rule-based systems. These strategies are particularly useful to identify the strengths and weaknesses of different systems. We contrast our proposals with the (extended) WebNLG dataset, which is revealed to have a skewed distribution of predicates. We predict that this distribution affects the quality of the predictions for systems trained on this data. However, this hypothesis can only be thoroughly tested (without any confounds) once we are able to systematically manipulate the skewness of the data, using a rule-based approach. 2020.evalnlgeval-1.3 diff --git a/data/xml/2020.fever.xml b/data/xml/2020.fever.xml index 0b5d2371f5..00d9b3d7e2 100644 --- a/data/xml/2020.fever.xml +++ b/data/xml/2020.fever.xml @@ -70,9 +70,9 @@ Language Models as Fact Checkers? NayeonLee - Belinda Z.Li + Belinda Z.Li SinongWang - Wen-tauYih + Wen-tauYih HaoMa MadianKhabsa 36–41 @@ -84,7 +84,7 @@ Maintaining Quality in <fixed-case>FEVER</fixed-case> Annotation - LeonDerczynski + LeonDerczynski JulieBinau HenriSchulte 42–46 diff --git a/data/xml/2020.figlang.xml b/data/xml/2020.figlang.xml index df48209ed9..2045ee4f34 100644 --- a/data/xml/2020.figlang.xml +++ b/data/xml/2020.figlang.xml @@ -47,10 +47,10 @@ A Report on the 2020 <fixed-case>VUA</fixed-case> and <fixed-case>TOEFL</fixed-case> Metaphor Detection Shared Task - Chee Wee (Ben)Leong + Chee Wee (Ben)Leong BeataBeigman Klebanov ChrisHamill - EgonStemle + EgonStemle RutujaUbale XianyangChen 18–29 @@ -127,7 +127,7 @@ Applying Transformers and Aspect-based Sentiment Analysis approaches on Sarcasm Detection TahaShangipour ataei SoroushJavdan - BehrouzMinaei-Bidgoli + BehrouzMinaei-Bidgoli 67–71 Sarcasm is a type of figurative language broadly adopted in social media and daily conversations. The sarcasm can ultimately alter the meaning of the sentence, which makes the opinion analysis process error-prone. In this paper, we propose to employ bidirectional encoder representations transformers (BERT), and aspect-based sentiment analysis approaches in order to extract the relation between context dialogue sequence and response and determine whether or not the response is sarcastic. The best performing method of ours obtains an F1 score of 0.73 on the Twitter dataset and 0.734 over the Reddit dataset at the second workshop on figurative language processing Shared Task 2020. 2020.figlang-1.9 @@ -260,7 +260,7 @@ Recognizing Euphemisms and Dysphemisms Using Sentiment Analysis ChristianFelt - EllenRiloff + EllenRiloff 136–145 This paper presents the first research aimed at recognizing euphemistic and dysphemistic phrases with natural language processing. Euphemisms soften references to topics that are sensitive, disagreeable, or taboo. Conversely, dysphemisms refer to sensitive topics in a harsh or rude way. For example, “passed away” and “departed” are euphemisms for death, while “croaked” and “six feet under” are dysphemisms for death. Our work explores the use of sentiment analysis to recognize euphemistic and dysphemistic language. First, we identify near-synonym phrases for three topics (firing, lying, and stealing) using a bootstrapping algorithm for semantic lexicon induction. Next, we classify phrases as euphemistic, dysphemistic, or neutral using lexical sentiment cues and contextual sentiment analysis. We introduce a new gold standard data set and present our experimental results for this task. 2020.figlang-1.20 @@ -284,7 +284,7 @@ Adaptation of Word-Level Benchmark Datasets for Relation-Level Metaphor Identification OmniaZayed - John PhilipMcCrae + John PhilipMcCrae PaulBuitelaar 154–164 Metaphor processing and understanding has attracted the attention of many researchers recently with an increasing number of computational approaches. A common factor among these approaches is utilising existing benchmark datasets for evaluation and comparisons. The availability, quality and size of the annotated data are among the main difficulties facing the growing research area of metaphor processing. The majority of current approaches pertaining to metaphor processing concentrate on word-level processing due to data availability. On the other hand, approaches that process metaphors on the relation-level ignore the context where the metaphoric expression. This is due to the nature and format of the available data. Word-level annotation is poorly grounded theoretically and is harder to use in downstream tasks such as metaphor interpretation. The conversion from word-level to relation-level annotation is non-trivial. In this work, we attempt to fill this research gap by adapting three benchmark datasets, namely the VU Amsterdam metaphor corpus, the TroFi dataset and the TSV dataset, to suit relation-level metaphor identification. We publish the adapted datasets to facilitate future research in relation-level metaphor processing. @@ -295,7 +295,7 @@ Generating Ethnographic Models from Communities’ Online Data - TomekStrzalkowski + TomekStrzalkowski AnnaNewheiser NathanKemper NingSa @@ -305,8 +305,8 @@ In this paper we describe computational ethnography study to demonstrate how machine learning techniques can be utilized to exploit bias resident in language data produced by communities with online presence. Specifically, we leverage the use of figurative language (i.e., the choice of metaphors) in online text (e.g., news media, blogs) produced by distinct communities to obtain models of community worldviews that can be shown to be distinctly biased and thus different from other communities’ models. We automatically construct metaphor-based community models for two distinct scenarios: gun rights and marriage equality. We then conduct a series of experiments to validate the hypothesis that the metaphors found in each community’s online language convey the bias in the community’s worldview. 2020.figlang-1.23 2020.figlang-1.23.Software.zip - 10.18653/v1/2020.figlang-1.23 2020.figlang-1.23.Dataset.pdf + 10.18653/v1/2020.figlang-1.23 @@ -361,7 +361,7 @@ Augmenting Neural Metaphor Detection with Concreteness GhadiAlnafesah HarishTayyar Madabushi - MarkLee + MarkLee 204–210 The idea that a shift in concreteness within a sentence indicates the presence of a metaphor has been around for a while. However, recent methods of detecting metaphor that have relied on deep neural models have ignored concreteness and related psycholinguistic information. We hypothesis that this information is not available to these models and that their addition will boost the performance of these models in detecting metaphor. We test this hypothesis on the Metaphor Detection Shared Task 2020 and find that the addition of concreteness information does in fact boost deep neural models. We also run tests on data from a previous shared task and show similar results. 2020.figlang-1.28 @@ -373,7 +373,7 @@ RafaelEhren TimmLichte LauraKallmeyer - JakubWaszczuk + JakubWaszczuk 211–220 Supervised disambiguation of verbal idioms (VID) poses special demands on the quality and quantity of the annotated data used for learning and evaluation. In this paper, we present a new VID corpus for German and perform a series of VID disambiguation experiments on it. Our best classifier, based on a neural architecture, yields an error reduction across VIDs of 57% in terms of accuracy compared to a simple majority baseline. 2020.figlang-1.29 @@ -411,7 +411,7 @@ Go Figure! Multi-task transformer-based architecture for metaphor detection using idioms: <fixed-case>ETS</fixed-case> team in 2020 metaphor shared task XianyangChen - Chee Wee (Ben)Leong + Chee Wee (Ben)Leong MichaelFlor BeataBeigman Klebanov 235–243 @@ -448,7 +448,7 @@ Testing the role of metadata in metaphor identification - EgonStemle + EgonStemle AlexanderOnysko 256–263 This paper describes the adaptation and application of a neural network system for the automatic detection of metaphors. The LSTM BiRNN system participated in the shared task of metaphor identification that was part of the Second Workshop of Figurative Language Processing (FigLang2020) held at the Annual Conference of the Association for Computational Linguistics (ACL2020). The particular focus of our approach is on the potential influence that the metadata given in the ETS Corpus of Non-Native Written English might have on the automatic detection of metaphors in this dataset. The article first discusses the annotated ETS learner data, highlighting some of its peculiarities and inherent biases of metaphor use. A series of evaluations follow in order to test whether specific metadata influence the system performance in the task of automatic metaphor identification. The system is available under the APLv2 open-source license. @@ -463,7 +463,7 @@ BenBurtenshaw EhsanLotfi IliaMarkov - WalterDaelemans + WalterDaelemans 264–269 We present an ensemble approach for the detection of sarcasm in Reddit and Twitter responses in the context of The Second Workshop on Figurative Language Processing held in conjunction with ACL 2020. The ensemble is trained on the predicted sarcasm probabilities of four component models and on additional features, such as the sentiment of the comment, its length, and source (Reddit or Twitter) in order to learn which of the component models is the most reliable for which input. The component models consist of an LSTM with hashtag and emoji representations; a CNN-LSTM with casing, stop word, punctuation, and sentiment representations; an MLP based on Infersent embeddings; and an SVM trained on stylometric and emotion-based features. All component models use the two conversational turns preceding the response as context, except for the SVM, which only uses features extracted from the response. The ensemble itself consists of an adaboost classifier with the decision tree algorithm as base estimator and yields F1-scores of 67% and 74% on the Reddit and Twitter test data, respectively. 2020.figlang-1.36 @@ -490,7 +490,7 @@ Transformer-based Context-aware Sarcasm Detection in Conversation Threads from Social Media XiangjueDong ChangmaoLi - Jinho D.Choi + Jinho D.Choi 276–280 We present a transformer-based sarcasm detection model that accounts for the context from the entire conversation thread for more robust predictions. Our model uses deep transformer layers to perform multi-head attentions among the target utterance and the relevant context in the thread. The context-aware models are evaluated on two datasets from social media, Twitter and Reddit, and show 3.1% and 7.0% improvements over their baselines. Our best models give the F1-scores of 79.0% and 75.0% for the Twitter and Reddit datasets respectively, becoming one of the highest performing systems among 36 participants in this shared task. 2020.figlang-1.38 diff --git a/data/xml/2020.findings.xml b/data/xml/2020.findings.xml index d823daab71..3dcb8bb3a4 100644 --- a/data/xml/2020.findings.xml +++ b/data/xml/2020.findings.xml @@ -3,7 +3,7 @@ Findings of the Association for Computational Linguistics: EMNLP 2020 - TrevorCohn + TrevorCohn YulanHe YangLiu Association for Computational Linguistics @@ -46,7 +46,7 @@ Stay Hungry, Stay Focused: Generating Informative and Specific Questions in Information-Seeking Conversations PengQi YuhaoZhang - Christopher D.Manning + Christopher D.Manning 25–40 We investigate the problem of generating informative questions in information-asymmetric conversations. Unlike previous work on question generation which largely assumes knowledge of what the answer might be, we are interested in the scenario where the questioner is not given the context from which answers are drawn, but must reason pragmatically about how to acquire new information, given the shared conversation history. We identify two core challenges: (1) formally defining the informativeness of potential questions, and (2) exploring the prohibitively large space of potential questions to find the good candidates. To generate pragmatic questions, we use reinforcement learning to optimize an informativeness metric we propose, combined with a reward function designed to promote more specific questions. We demonstrate that the resulting pragmatic questioner substantially improves the informativeness and specificity of questions generated over a baseline model, as evaluated by our metrics as well as humans. 2020.findings-emnlp.3 @@ -109,7 +109,7 @@ Improving Text Understanding via Deep Syntax-Semantics Communication HaoFei YafengRen - DonghongJi + DonghongJi 84–93 Recent studies show that integrating syntactic tree models with sequential semantic models can bring improved task performance, while these methods mostly employ shallow integration of syntax and semantics. In this paper, we propose a deep neural communication model between syntax and semantics to improve the performance of text understanding. Local communication is performed between syntactic tree encoder and sequential semantic encoder for mutual learning of information exchange. Global communication can further ensure comprehensive information propagation. Results on multiple syntax-dependent tasks show that our model outperforms strong baselines by a large margin. In-depth analysis indicates that our method is highly effective in composing sentence semantics. 2020.findings-emnlp.8 @@ -180,7 +180,7 @@ Neural Speed Reading Audited - AndersSøgaard + AndersSøgaard 148–153 Several approaches to neural speed reading have been presented at major NLP and machine learning conferences in 2017–20; i.e., “human-inspired” recurrent network architectures that learn to “read” text faster by skipping irrelevant words, typically optimizing the joint objective of minimizing classification error rate and FLOPs used at inference time. This paper reflects on the meaningfulness of the speed reading task, showing that (a) better and faster approaches to, say, document classification, already exist, which also learn to ignore part of the input (I give an example with 7% error reduction and a 136x speed-up over the state of the art in neural speed reading); and that (b) any claims that neural speed reading is “human-inspired”, are ill-founded. 2020.findings-emnlp.14 @@ -231,7 +231,7 @@ Mimic and Conquer: Heterogeneous Tree Structure Distillation for Syntactic <fixed-case>NLP</fixed-case> HaoFei YafengRen - DonghongJi + DonghongJi 183–193 Syntax has been shown useful for various NLP tasks, while existing work mostly encodes singleton syntactic tree using one hierarchical neural network. In this paper, we investigate a simple and effective method, Knowledge Distillation, to integrate heterogeneous structure knowledge into a unified sequential LSTM encoder. Experimental results on four typical syntax-dependent tasks show that our method outperforms tree encoders by effectively integrating rich heterogeneous structure syntax, meanwhile reducing error propagation, and also outperforms ensemble methods, in terms of both the efficiency and accuracy. 2020.findings-emnlp.18 @@ -243,7 +243,7 @@ ChenguangZhu RuochenXu MichaelZeng - XuedongHuang + XuedongHuang 194–203 With the abundance of automatic meeting transcripts, meeting summarization is of great interest to both participants and other parties. Traditional methods of summarizing meetings depend on complex multi-step pipelines that make joint optimization intractable. Meanwhile, there are a handful of deep neural models for text summarization and dialogue systems. However, the semantic structure and styles of meeting transcripts are quite different from articles and conversations. In this paper, we propose a novel abstractive summary network that adapts to the meeting scenario. We design a hierarchical structure to accommodate long meeting transcripts and a role vector to depict the difference among speakers. Furthermore, due to the inadequacy of meeting summary data, we pretrain the model on large-scale news summary data. Empirical results show that our model outperforms previous approaches in both automatic metrics and human evaluation. For example, on ICSI dataset, the ROUGE-1 score increases from 34.66% to 46.28%. 2020.findings-emnlp.19 @@ -284,7 +284,7 @@ FabioPetroni AleksandraPiktus MyleOtt - TimRocktäschel + TimRocktäschel VassilisPlachouras FabrizioSilvestri SebastianRiedel @@ -354,7 +354,7 @@ Understanding tables with intermediate pre-training JulianEisenschlos SyrineKrichene - ThomasMüller + ThomasMüller 281–296 Table entailment, the binary classification task of finding if a sentence is supported or refuted by the content of a table, requires parsing language and table structure as well as numerical and discrete reasoning. While there is extensive work on textual entailment, table entailment is less well studied. We adapt TAPAS (Herzig et al., 2020), a table-based BERT model, to recognize entailment. Motivated by the benefits of data augmentation, we create a balanced dataset of millions of automatically created training examples which are learned in an intermediate step prior to fine-tuning. This new data is not only useful for table entailment, but also for SQA (Iyyer et al., 2017), a sequential table QA task. To be able to use long examples as input of BERT models, we evaluate table pruning techniques as a pre-processing step to drastically improve the training and prediction efficiency at a moderate drop in accuracy. The different methods set the new state-of-the-art on the TabFact (Chen et al., 2020) and SQA datasets. 2020.findings-emnlp.27 @@ -419,7 +419,7 @@ The <fixed-case>RELX</fixed-case> Dataset and Matching the Multilingual Blanks for Cross-Lingual Relation Classification AbdullatifKöksal - ArzucanÖzgür + ArzucanÖzgür 340–350 Relation classification is one of the key topics in information extraction, which can be used to construct knowledge bases or to provide useful information for question answering. Current approaches for relation classification are mainly focused on the English language and require lots of training data with human annotations. Creating and annotating a large amount of training data for low-resource languages is impractical and expensive. To overcome this issue, we propose two cross-lingual relation classification models: a baseline model based on Multilingual BERT and a new multilingual pretraining setup, which significantly improves the baseline with distant supervision. For evaluation, we introduce a new public benchmark dataset for cross-lingual relation classification in English, French, German, Spanish, and Turkish, called RELX. We also provide the RELX-Distant dataset, which includes hundreds of thousands of sentences with relations from Wikipedia and Wikidata collected by distant supervision for these languages. Our code and data are available at: https://github.com/boun-tabi/RELX 2020.findings-emnlp.32 @@ -467,7 +467,7 @@ Contextual Modulation for Relation-Level Metaphor Identification OmniaZayed - John P.McCrae + John P.McCrae PaulBuitelaar 388–406 Identifying metaphors in text is very challenging and requires comprehending the underlying comparison. The automation of this cognitive process has gained wide attention lately. However, the majority of existing approaches concentrate on word-level identification by treating the task as either single-word classification or sequential labelling without explicitly modelling the interaction between the metaphor components. On the other hand, while existing relation-level approaches implicitly model this interaction, they ignore the context where the metaphor occurs. In this work, we address these limitations by introducing a novel architecture for identifying relation-level metaphoric expressions of certain grammatical relations based on contextual modulation. In a methodology inspired by works in visual reasoning, our approach is based on conditioning the neural network computation on the deep contextualised features of the candidate expressions using feature-wise linear modulation. We demonstrate that the proposed architecture achieves state-of-the-art results on benchmark datasets. The proposed methodology is generic and could be applied to other textual classification problems that benefit from contextual interaction. @@ -492,7 +492,7 @@ HuaiyuZhu Jonathan K.Kummerfeld YunyaoLi - WalterLasecki + WalterLasecki 415–421 Resources for Semantic Role Labeling (SRL) are typically annotated by experts at great expense. Prior attempts to develop crowdsourcing methods have either had low accuracy or required substantial expert annotation. We propose a new multi-stage crowd workflow that substantially reduces expert involvement without sacrificing accuracy. In particular, we introduce a unique filter stage based on the key observation that crowd workers are able to almost perfectly filter out incorrect options for labels. Our three-stage workflow produces annotations with 95% accuracy for predicate labels and 93% for argument labels, which is comparable to expert agreement. Compared to prior work on crowdsourcing for SRL, we decrease expert effort by 4x, from 56% to 14% of cases. Our approach enables more scalable annotation of SRL, and could enable annotation of NLP tasks that have previously been considered too complex to effectively crowdsource. 2020.findings-emnlp.38 @@ -612,7 +612,7 @@ Dynamic Data Selection for Curriculum Learning via Ability Estimation - John P.Lalor + John P.Lalor HongYu 545–555 Curriculum learning methods typically rely on heuristics to estimate the difficulty of training examples or the ability of the model. In this work, we propose replacing difficulty heuristics with learned difficulty parameters. We also propose Dynamic Data selection for Curriculum Learning via Ability Estimation (DDaCLAE), a strategy that probes model ability at each training epoch to select the best training examples at that point. We show that models using learned difficulty and/or ability outperform heuristic-based curriculum learning models on the GLUE classification tasks. @@ -624,7 +624,7 @@ Fixed Encoder Self-Attention Patterns in Transformer-Based Machine Translation AlessandroRaganato YvesScherrer - JörgTiedemann + JörgTiedemann 556–568 Transformer-based models have brought a radical change to neural machine translation. A key feature of the Transformer architecture is the so-called multi-head attention mechanism, which allows the model to focus simultaneously on different parts of the input. However, recent works have shown that most attention heads learn simple, and often redundant, positional patterns. In this paper, we propose to replace all but one attention head of each encoder layer with simple fixed – non-learnable – attentive patterns that are solely based on position and do not require any external knowledge. Our experiments with different data sizes and multiple language pairs show that fixing the attention heads on the encoder side of the Transformer at training time does not impact the translation quality and even increases BLEU scores by up to 3 points in low-resource scenarios. 2020.findings-emnlp.49 @@ -764,7 +764,7 @@ LujunZhao MengxiWei ChanglongSun - XuanjingHuang + XuanjingHuang 678–688 In this work, we explore the way to quickly adjust an existing named entity recognition (NER) system to make it capable of recognizing entity types not defined in the system. As an illustrative example, consider the case that a NER system has been built to recognize person and organization names, and now it requires to additionally recognize job titles. Such a situation is common in the industrial areas, where the entity types required to recognize vary a lot in different products and keep changing. To avoid laborious data labeling and achieve fast adaptation, we propose to adjust the existing NER system using the previously labeled data and entity lexicons of the newly introduced entity types. We formulate such a task as a partially supervised learning problem and accordingly propose an effective algorithm to solve the problem. Comprehensive experimental studies on several public NER datasets validate the effectiveness of our method. 2020.findings-emnlp.60 @@ -826,10 +826,10 @@ Rethinking Self-Attention: Towards Interpretability in Neural Parsing KhalilMrini FranckDernoncourt - Quan HungTran - TrungBui + Quan HungTran + TrungBui WalterChang - NdapaNakashole + NdapaNakashole 731–742 Attention mechanisms have improved the performance of NLP tasks while allowing models to remain explainable. Self-attention is currently widely used, however interpretability is difficult due to the numerous attention distributions. Recent work has shown that model representations can benefit from label-specific information, while facilitating interpretation of predictions. We introduce the Label Attention Layer: a new form of self-attention where attention heads represent labels. We test our novel layer by running constituency and dependency parsing experiments and show our new model obtains new state-of-the-art results for both tasks on both the Penn Treebank (PTB) and Chinese Treebank. Additionally, our model requires fewer self-attention layers compared to existing work. Finally, we find that the Label Attention heads learn relations between syntactic categories and show pathways to analyze errors. 2020.findings-emnlp.65 @@ -838,7 +838,7 @@ <fixed-case>P</fixed-case>olicy<fixed-case>QA</fixed-case>: A Reading Comprehension Dataset for Privacy Policies - WasiAhmad + WasiAhmad JianfengChi YuanTian Kai-WeiChang @@ -852,7 +852,7 @@ A Linguistic Analysis of Visually Grounded Dialogues Based on Spatial Expressions TakumaUdagawa TakatoYamazaki - AkikoAizawa + AkikoAizawa 750–765 Recent models achieve promising results in visually grounded dialogues. However, existing datasets often contain undesirable biases and lack sophisticated linguistic analyses, which make it difficult to understand how well current models recognize their precise linguistic structures. To address this problem, we make two design choices: first, we focus on OneCommon Corpus (CITATION), a simple yet challenging common grounding dataset which contains minimal bias by design. Second, we analyze their linguistic structures based on spatial expressions and provide comprehensive and reliable annotation for 600 dialogues. We show that our annotation captures important linguistic structures including predicate-argument structure, modification and ellipsis. In our experiments, we assess the model’s understanding of these structures through reference resolution. We demonstrate that our annotation can reveal both the strengths and weaknesses of baseline models in essential levels of detail. Overall, we propose a novel framework and resource for investigating fine-grained language understanding in visually grounded dialogues. 2020.findings-emnlp.67 @@ -907,7 +907,7 @@ <fixed-case>E</fixed-case>-<fixed-case>BERT</fixed-case>: Efficient-Yet-Effective Entity Embeddings for <fixed-case>BERT</fixed-case> NinaPoerner UlliWaltinger - HinrichSchütze + HinrichSchütze 803–818 We present a novel way of injecting factual knowledge about entities into the pretrained BERT model (Devlin et al., 2019): We align Wikipedia2Vec entity vectors (Yamada et al., 2016) with BERT’s native wordpiece vector space and use the aligned entity vectors as if they were wordpiece vectors. The resulting entity-enhanced version of BERT (called E-BERT) is similar in spirit to ERNIE (Zhang et al., 2019) and KnowBert (Peters et al., 2019), but it requires no expensive further pre-training of the BERT encoder. We evaluate E-BERT on unsupervised question answering (QA), supervised relation classification (RC) and entity linking (EL). On all three tasks, E-BERT outperforms BERT and other baselines. We also show quantitatively that the original BERT model is overly reliant on the surface form of entity names (e.g., guessing that someone with an Italian-sounding name speaks Italian), and that E-BERT mitigates this problem. 2020.findings-emnlp.71 @@ -918,7 +918,7 @@ A Multi-task Learning Framework for Opinion Triplet Extraction - ChenZhang + ChenZhang QiuchiLi DaweiSong BenyouWang @@ -946,7 +946,7 @@ Improving <fixed-case>QA</fixed-case> Generalization by Concurrent Modeling of Multiple Biases MingzhuWu - Nafise SadatMoosavi + Nafise SadatMoosavi AndreasRücklé IrynaGurevych 839–853 @@ -960,7 +960,7 @@ Actor-Double-Critic: Incorporating Model-Based Critic for Task-Oriented Dialogue Systems Yen-chenWu Bo-HsiangTseng - MilicaGasic + MilicaGasic 854–863 In order to improve the sample-efficiency of deep reinforcement learning (DRL), we implemented imagination augmented agent (I2A) in spoken dialogue systems (SDS). Although I2A achieves a higher success rate than baselines by augmenting predicted future into a policy network, its complicated architecture introduces unwanted instability. In this work, we propose actor-double-critic (ADC) to improve the stability and overall performance of I2A. ADC simplifies the architecture of I2A to reduce excessive parameters and hyper-parameters. More importantly, a separate model-based critic shares parameters between actions and makes back-propagation explicit. In our experiments on Cambridge Restaurant Booking task, ADC enhances success rates considerably and shows robustness to imperfect environment models. In addition, ADC exhibits the stability and sample-efficiency as significantly reducing the baseline standard deviation of success rates and reaching the 80% success rate with half training data. 2020.findings-emnlp.75 @@ -1063,7 +1063,7 @@ Cross-lingual Alignment Methods for Multilingual <fixed-case>BERT</fixed-case>: A Comparative Study SaurabhKulshreshtha Jose LuisRedondo Garcia - Ching-YunChang + Ching-YunChang 933–942 Multilingual BERT (mBERT) has shown reasonable capability for zero-shot cross-lingual transfer when fine-tuned on downstream tasks. Since mBERT is not pre-trained with explicit cross-lingual supervision, transfer performance can further be improved by aligning mBERT with cross-lingual signal. Prior work propose several approaches to align contextualised embeddings. In this paper we analyse how different forms of cross-lingual supervision and various alignment methods influence the transfer capability of mBERT in zero-shot setting. Specifically, we compare parallel corpora vs dictionary-based supervision and rotational vs fine-tuning based alignment methods. We evaluate the performance of different alignment methodologies across eight languages on two tasks: Name Entity Recognition and Semantic Slot Filling. In addition, we propose a novel normalisation method which consistently improves the performance of rotation-based alignment including a notable 3% F1 improvement for distant and typologically dissimilar languages. Importantly we identify the biases of the alignment methods to the type of task and proximity to the transfer language. We also find that supervision from parallel corpus is generally superior to dictionary alignments. 2020.findings-emnlp.83 @@ -1112,11 +1112,11 @@ Scene Graph Modification Based on Natural Language Commands XuanliHe - Quan HungTran - GholamrezaHaffari + Quan HungTran + GholamrezaHaffari WalterChang ZheLin - TrungBui + TrungBui FranckDernoncourt NhanDam 972–990 @@ -1139,11 +1139,11 @@ Transition-based Parsing with Stack-Transformers - RamónFernandez Astudillo + RamónFernandez Astudillo MiguelBallesteros TahiraNaseem AustinBlodgett - RaduFlorian + RaduFlorian 1001–1007 Modeling the parser state is key to good performance in transition-based parsing. Recurrent Neural Networks considerably improved the performance of transition-based systems by modelling the global state, e.g. stack-LSTM parsers, or local state modeling of contextualized features, e.g. Bi-LSTM parsers. Given the success of Transformer architectures in recent parsing systems, this work explores modifications of the sequence-to-sequence Transformer architecture to model either global or local parser states in transition-based parsing. We show that modifications of the cross attention mechanism of the Transformer considerably strengthen performance both on dependency and Abstract Meaning Representation (AMR) parsing tasks, particularly for smaller models or limited training data. 2020.findings-emnlp.89 @@ -1173,7 +1173,7 @@ <fixed-case>H</fixed-case>ybrid<fixed-case>QA</fixed-case>: A Dataset of Multi-Hop Question Answering over Tabular and Textual Data WenhuChen HanwenZha - ZhiyuChen + ZhiyuChen WenhanXiong HongWang William YangWang @@ -1195,9 +1195,9 @@ <fixed-case>EST</fixed-case>e<fixed-case>R</fixed-case>: Combining Word Co-occurrences and Word Associations for Unsupervised Emotion Detection - Sujatha DasGollapalli + Sujatha DasGollapalli PolinaRozenshtein - See-KiongNg + See-KiongNg 1043–1056 Accurate detection of emotions in user- generated text was shown to have several applications for e-commerce, public well-being, and disaster management. Currently, the state-of-the-art performance for emotion detection in text is obtained using complex, deep learning models trained on domain-specific, labeled data. In this paper, we propose ESTeR , an unsupervised model for identifying emotions using a novel similarity function based on random walks on graphs. Our model combines large-scale word co-occurrence information with word-associations from lexicons avoiding not only the dependence on labeled datasets, but also an explicit mapping of words to latent spaces used in emotion-enriched word embeddings. Our similarity function can also be computed efficiently. We study a range of datasets including recent tweets related to COVID-19 to illustrate the superior performance of our model and report insights on public emotions during the on-going pandemic. 2020.findings-emnlp.93 @@ -1222,7 +1222,7 @@ PengWu BoweiZou RidongJiang - AiTiAw + AiTiAw 1063–1073 As an essential component of task-oriented dialogue systems, Dialogue State Tracking (DST) takes charge of estimating user intentions and requests in dialogue contexts and extracting substantial goals (states) from user utterances to help the downstream modules to determine the next actions of dialogue systems. For practical usages, a major challenge to constructing a robust DST model is to process a conversation with multi-domain states. However, most existing approaches trained DST on a single domain independently, ignoring the information across domains. To tackle the multi-domain DST task, we first construct a dialogue state graph to transfer structured features among related domain-slot pairs across domains. Then, we encode the graph information of dialogue states by graph convolutional networks and utilize a hard copy mechanism to directly copy historical states from the previous conversation. Experimental results show that our model improves the performances of the multi-domain DST baseline (TRADE) with the absolute joint accuracy of 2.0% and 1.0% on the MultiWOZ 2.0 and 2.1 dialogue datasets, respectively. 2020.findings-emnlp.95 @@ -1326,7 +1326,7 @@ <fixed-case>H</fixed-case>yper<fixed-case>T</fixed-case>ext: Endowing <fixed-case>F</fixed-case>ast<fixed-case>T</fixed-case>ext with Hyperbolic Geometry YudongZhu DiZhou - JinghuiXiao + JinghuiXiao XinJiang XiaoChen QunLiu @@ -1339,7 +1339,7 @@ <fixed-case>A</fixed-case>uto<fixed-case>ETER</fixed-case>: Automated Entity Type Representation for Knowledge Graph Embedding GuanglinNiu - BoLi + BoLi YongfeiZhang ShiliangPu JingyangLi @@ -1354,8 +1354,8 @@ Learning Robust and Multilingual Speech Representations KazuyaKawakami LuyuWang - ChrisDyer - PhilBlunsom + ChrisDyer + PhilBlunsom Aaronvan den Oord 1182–1192 Unsupervised speech representation learning has shown remarkable success at finding representations that correlate with phonetic structures and improve downstream speech recognition performance. However, most research has been focused on evaluating the representations in terms of their ability to improve the performance of speech recognition systems on read English (e.g. Wall Street Journal and LibriSpeech). This evaluation methodology overlooks two important desiderata that speech representations should have: robustness to domain shifts and transferability to other languages. In this paper we learn representations from up to 8000 hours of diverse and noisy speech data and evaluate the representations by looking at their robustness to domain shifts and their ability to improve recognition performance in many languages. We find that our representations confer significant robustness advantages to the resulting recognition systems: we see significant improvements in out-of-domain transfer relative to baseline feature sets and the features likewise provide improvements in 25 phonetically diverse languages. @@ -1381,7 +1381,7 @@ HoangNguyen ChenweiZhang CongyingXia - PhilipYu + PhilipYu 1209–1218 Few-shot Intent Detection is challenging due to the scarcity of available annotated utterances. Although recent works demonstrate that multi-level matching plays an important role in transferring learned knowledge from seen training classes to novel testing classes, they rely on a static similarity measure and overly fine-grained matching components. These limitations inhibit generalizing capability towards Generalized Few-shot Learning settings where both seen and novel classes are co-existent. In this paper, we propose a novel Semantic Matching and Aggregation Network where semantic components are distilled from utterances via multi-head self-attention with additional dynamic regularization constraints. These semantic components capture high-level information, resulting in more effective matching between instances. Our multi-perspective matching method provides a comprehensive matching measure to enhance representations of both labeled and unlabeled instances. We also propose a more challenging evaluation setting that considers classification on the joint all-class label space. Extensive experimental results demonstrate the effectiveness of our method. Our code and data are publicly available. 2020.findings-emnlp.108 @@ -1395,7 +1395,7 @@ MengjieZhao PhilippDufter YadollahYaghoobzadeh - HinrichSchütze + HinrichSchütze 1219–1234 Pretrained language models achieve state-of-the-art results on many NLP tasks, but there are still many open questions about how and why they work so well. We investigate the contextualization of words in BERT. We quantify the amount of contextualization, i.e., how well words are interpreted in context, by studying the extent to which semantic classes of a word can be inferred from its contextualized embedding. Quantifying contextualization helps in understanding and utilizing pretrained language models. We show that the top layer representations support highly accurate inference of semantic classes; that the strongest contextualization effects occur in the lower layers; that local context is mostly sufficient for contextualizing words; and that top layer representations are more task-specific after finetuning while lower layer representations are more transferable. Finetuning uncovers task-related features, but pretrained knowledge about contextualization is still well preserved. 2020.findings-emnlp.109 @@ -1505,11 +1505,11 @@ DanielKhashabi KevinLin JiangmingLiu - Nelson F.Liu + Nelson F.Liu PhoebeMulcaire QiangNing SameerSingh - Noah A.Smith + Noah A.Smith SanjaySubramanian ReutTsarfaty EricWallace @@ -1525,7 +1525,7 @@ Parsing with Multilingual <fixed-case>BERT</fixed-case>, a Small Corpus, and a Small Treebank Ethan C.Chau Lucy H.Lin - Noah A.Smith + Noah A.Smith 1324–1334 Pretrained multilingual contextual representations have shown great success, but due to the limits of their pretraining data, their benefits do not apply equally to all language varieties. This presents a challenge for language varieties unfamiliar to these models, whose labeled and unlabeled data is too limited to train a monolingual model effectively. We propose the use of additional language-specific pretraining and vocabulary augmentation to adapt multilingual models to low-resource settings. Using dependency parsing of four diverse low-resource language varieties as a case study, we show that these methods significantly improve performance over baselines, especially in the lowest-resource cases, and demonstrate the importance of the relationship between such models’ pretraining data and target language varieties. 2020.findings-emnlp.118 @@ -1553,7 +1553,7 @@ ShoTakase KeiUchiumi AtsushiKeyaki - NaoakiOkazaki + NaoakiOkazaki 1341–1351 In traditional NLP, we tokenize a given sentence as a preprocessing, and thus the tokenization is unrelated to a target downstream task. To address this issue, we propose a novel method to explore a tokenization which is appropriate for the downstream task. Our proposed method, optimizing tokenization (OpTok), is trained to assign a high probability to such appropriate tokenization based on the downstream task loss. OpTok can be used for any downstream task which uses a vector representation of a sentence such as text classification. Experimental results demonstrate that OpTok improves the performance of sentiment analysis and textual entailment. In addition, we introduce OpTok into BERT, the state-of-the-art contextualized embeddings and report a positive effect. 2020.findings-emnlp.120 @@ -1575,7 +1575,7 @@ A Compare Aggregate Transformer for Understanding Document-grounded Dialogue LongxuanMa - Wei-NanZhang + Wei-NanZhang RunxinSun TingLiu 1358–1367 @@ -1616,7 +1616,7 @@ AlexTamkin TrishaSingh DavideGiovanardi - NoahGoodman + NoahGoodman 1393–1401 How does language model pretraining help transfer learning? We consider a simple ablation technique for determining the impact of each pretrained layer on transfer task performance. This method, partial reinitialization, involves replacing different layers of a pretrained model with random weights, then finetuning the entire model on the transfer task and observing the change in performance. This technique reveals that in BERT, layers with high probing performance on downstream GLUE tasks are neither necessary nor sufficient for high accuracy on those tasks. Furthermore, the benefit of using pretrained parameters for a layer varies dramatically with finetuning dataset size: parameters that provide tremendous performance improvement when data is plentiful may provide negligible benefits in data-scarce settings. These results reveal the complexity of the transfer learning process, highlighting the limitations of methods that operate on frozen models or single data samples. 2020.findings-emnlp.125 @@ -1666,7 +1666,7 @@ ex<fixed-case>BERT</fixed-case>: Extending Pre-trained Models with Domain-specific Vocabulary Under Constrained Training Resources WenTai H. T.Kung - XinDong + XinDong MarcusComiter Chang-FuKuo 1433–1439 @@ -1693,7 +1693,7 @@ Conditional Neural Generation using Sub-Aspect Functions for Extractive News Summarization ZhengyuanLiu KeShi - NancyChen + NancyChen 1453–1463 Much progress has been made in text summarization, fueled by neural architectures using large-scale training corpora. However, in the news domain, neural models easily overfit by leveraging position-related features due to the prevalence of the inverted pyramid writing style. In addition, there is an unmet need to generate a variety of summaries for different users. In this paper, we propose a neural framework that can flexibly control summary generation by introducing a set of sub-aspect functions (i.e. importance, diversity, position). These sub-aspect functions are regulated by a set of control codes to decide which sub-aspect to focus on during summary generation. We demonstrate that extracted summaries with minimal position bias is comparable with those generated by standard models that take advantage of position preference. We also show that news summaries generated with a focus on diversity can be more preferred by human raters. These results suggest that a more flexible neural summarization framework providing more control options could be desirable in tailoring to different user preferences, which is useful since it is often impractical to articulate such preferences for different applications a priori. 2020.findings-emnlp.131 @@ -1728,7 +1728,7 @@ Inexpensive Domain Adaptation of Pretrained Language Models: Case Studies on Biomedical <fixed-case>NER</fixed-case> and Covid-19 <fixed-case>QA</fixed-case> NinaPoerner UlliWaltinger - HinrichSchütze + HinrichSchütze 1482–1490 Domain adaptation of Pretrained Language Models (PTLMs) is typically achieved by unsupervised pretraining on target-domain text. While successful, this approach is expensive in terms of hardware, runtime and CO 2 emissions. Here, we propose a cheaper alternative: We train Word2Vec on target-domain text and align the resulting word vectors with the wordpiece vectors of a general-domain PTLM. We evaluate on eight English biomedical Named Entity Recognition (NER) tasks and compare against the recently proposed BioBERT model. We cover over 60% of the BioBERT - BERT F1 delta, at 5% of BioBERT’s CO 2 footprint and 2% of its cloud compute cost. We also show how to quickly adapt an existing general-domain Question Answering (QA) model to an emerging domain: the Covid-19 pandemic. 2020.findings-emnlp.134 @@ -1767,7 +1767,7 @@ HuyVu SuhaibAbdurahman SudeepBhatia - LyleUngar + LyleUngar 1512–1524 Psychologists routinely assess people’s emotions and traits, such as their personality, by collecting their responses to survey questionnaires. Such assessments can be costly in terms of both time and money, and often lack generalizability, as existing data cannot be used to predict responses for new survey questions or participants. In this study, we propose a method for predicting a participant’s questionnaire response using their social media texts and the text of the survey question they are asked. Specifically, we use Natural Language Processing (NLP) tools such as BERT embeddings to represent both participants (via the text they write) and survey questions as embeddings vectors, allowing us to predict responses for out-of-sample participants and questions. Our novel approach can be used by researchers to integrate new participants or new questions into psychological studies without the constraint of costly data collection, facilitating novel practical applications and furthering the development of psychological theory. Finally, as a side contribution, the success of our model also suggests a new approach to study survey questions using NLP tools such as text embeddings rather than response data used in traditional methods. 2020.findings-emnlp.137 @@ -1778,7 +1778,7 @@ Will it Unblend? YuvalPinter - Cassandra L.Jacobs + Cassandra L.Jacobs JacobEisenstein 1525–1535 Natural language processing systems often struggle with out-of-vocabulary (OOV) terms, which do not appear in training data. Blends, such as “innoventor”, are one particularly challenging class of OOV, as they are formed by fusing together two or more bases that relate to the intended meaning in unpredictable manners and degrees. In this work, we run experiments on a novel dataset of English OOV blends to quantify the difficulty of interpreting the meanings of blends by large-scale contextual language models such as BERT. We first show that BERT’s processing of these blends does not fully access the component meanings, leaving their contextual representations semantically impoverished. We find this is mostly due to the loss of characters resulting from blend formation. Then, we assess how easily different models can recognize the structure and recover the origin of blends, and find that context-aware embedding systems outperform character-level and context-free embeddings, although their results are still far from satisfactory. @@ -1867,8 +1867,8 @@ WentaoWang ZichaoYang XiaodanLiang - Frank F.Xu - EricXing + Frank F.Xu + EricXing ZhitingHu 1589–1598 Recent neural approaches to data-to-text generation have mostly focused on improving content fidelity while lacking explicit control over writing styles (e.g., sentence structures, word choices). More traditional systems use templates to determine the realization of text. Yet manual or automatic construction of high-quality templates is difficult, and a template acting as hard constraints could harm content fidelity when it does not match the record perfectly. We study a new way of stylistic control by using existing sentences as “soft” templates. That is, a model learns to imitate the writing style of any given exemplar sentence, with automatic adaptions to faithfully describe the record. The problem is challenging due to the lack of parallel data. We develop a neural approach that includes a hybrid attention-copy mechanism, learns with weak supervisions, and is enhanced with a new content coverage constraint. We conduct experiments in restaurants and sports domains. Results show our approach achieves stronger performance than a range of comparison methods. Our approach balances well between content fidelity and style control given exemplars that match the records to varying degrees. @@ -1905,7 +1905,7 @@ MasoudJalili Sabet PhilippDufter FrançoisYvon - HinrichSchütze + HinrichSchütze 1627–1643 Word alignments are useful for tasks like statistical and neural machine translation (NMT) and cross-lingual annotation projection. Statistical word aligners perform well, as do methods that extract alignments jointly with translations in NMT. However, most approaches require parallel training data and quality decreases as less training data is available. We propose word alignment methods that require no parallel data. The key idea is to leverage multilingual word embeddings – both static and contextualized – for word alignment. Our multilingual embeddings are created from monolingual data only without relying on any parallel data or dictionaries. We find that alignments created from embeddings are superior for four and comparable for two language pairs compared to those produced by traditional statistical aligners – even with abundant parallel data; e.g., contextualized embeddings achieve a word alignment F1 for English-German that is 5 percentage points higher than eflomal, a high-quality statistical aligner, trained on 100k parallel sentences. 2020.findings-emnlp.147 @@ -1916,8 +1916,8 @@ <fixed-case>T</fixed-case>weet<fixed-case>E</fixed-case>val: Unified Benchmark and Comparative Evaluation for Tweet Classification FrancescoBarbieri - JoseCamacho-Collados - LuisEspinosa Anke + JoseCamacho-Collados + LuisEspinosa Anke LeonardoNeves 1644–1650 The experimental landscape in natural language processing for social media is too fragmented. Each year, new shared tasks and datasets are proposed, ranging from classics like sentiment analysis to irony detection or emoji prediction. Therefore, it is unclear what the current state of the art is, as there is no standardized evaluation protocol, neither a strong set of baselines trained on such domain-specific data. In this paper, we propose a new evaluation framework (TweetEval) consisting of seven heterogeneous Twitter-specific classification tasks. We also provide a strong set of baselines as starting point, and compare different language modeling pre-training strategies. Our initial experiments show the effectiveness of starting off with existing pre-trained generic language models, and continue training them on Twitter corpora. @@ -1929,7 +1929,7 @@ Octa: Omissions and Conflicts in Target-Aspect Sentiment Analysis ZheZhang Chung-WeiHang - MunindarSingh + MunindarSingh 1651–1662 Sentiments in opinionated text are often determined by both aspects and target words (or targets). We observe that targets and aspects interrelate in subtle ways, often yielding conflicting sentiments. Thus, a naive aggregation of sentiments from aspects and targets treated separately, as in existing sentiment analysis models, impairs performance. We propose Octa, an approach that jointly considers aspects and targets when inferring sentiments. To capture and quantify relationships between targets and context words, Octa uses a selective self-attention mechanism that handles implicit or missing targets. Specifically, Octa involves two layers of attention mechanisms for, respectively, selective attention between targets and context words and attention over words based on aspects. On benchmark datasets, Octa outperforms leading models by a large margin, yielding (absolute) gains in accuracy of 1.6% to 4.3%. 2020.findings-emnlp.149 @@ -1940,7 +1940,7 @@ On the Language Neutrality of Pre-trained Multilingual Representations JindřichLibovický RudolfRosa - AlexanderFraser + AlexanderFraser 1663–1674 Multilingual contextual embeddings, such as multilingual BERT and XLM-RoBERTa, have proved useful for many multi-lingual tasks. Previous work probed the cross-linguality of the representations indirectly using zero-shot transfer learning on morphological and syntactic tasks. We instead investigate the language-neutrality of multilingual contextual embeddings directly and with respect to lexical semantics. Our results show that contextual embeddings are more language-neutral and, in general, more informative than aligned static word-type embeddings, which are explicitly trained for language neutrality. Contextual embeddings are still only moderately language-neutral by default, so we propose two simple methods for achieving stronger language neutrality: first, by unsupervised centering of the representation for each language and second, by fitting an explicit projection on small parallel data. Besides, we show how to reach state-of-the-art accuracy on language identification and match the performance of statistical methods for word alignment of parallel sentences without using parallel data. 2020.findings-emnlp.150 @@ -1950,10 +1950,10 @@ Cost-effective Selection of Pretraining Data: A Case Study of Pretraining <fixed-case>BERT</fixed-case> on Social Media - XiangDai + XiangDai SarvnazKarimi BenHachey - CecileParis + CecileParis 1675–1681 Recent studies on domain-specific BERT models show that effectiveness on downstream tasks can be improved when models are pretrained on in-domain data. Often, the pretraining data used in these models are selected based on their subject matter, e.g., biology or computer science. Given the range of applications using social media text, and its unique language variety, we pretrain two models on tweets and forum text respectively, and empirically demonstrate the effectiveness of these two resources. In addition, we investigate how similarity measures can be used to nominate in-domain pretraining data. We publicly release our pretrained models at https://bit.ly/35RpTf0. 2020.findings-emnlp.151 @@ -1967,7 +1967,7 @@ KhushbuSaxena VivekKulkarni ThomasRunkler - HinrichSchütze + HinrichSchütze 1682–1690 Prior research notes that BERT’s computational cost grows quadratically with sequence length thus leading to longer training times, higher GPU memory constraints and carbon emissions. While recent work seeks to address these scalability issues at pre-training, these issues are also prominent in fine-tuning especially for long sequence tasks like document classification. Our work thus focuses on optimizing the computational cost of fine-tuning for document classification. We achieve this by complementary learning of both topic and language models in a unified framework, named TopicBERT. This significantly reduces the number of self-attention operations – a main performance bottleneck. Consequently, our model achieves a 1.4x ( 40%) speedup with 40% reduction in CO2 emission while retaining 99.9% performance over 5 datasets. 2020.findings-emnlp.152 @@ -2007,7 +2007,7 @@ Multi-Agent Mutual Learning at Sentence-Level and Token-Level for Neural Machine Translation BaohaoLiao YingboGao - HermannNey + HermannNey 1715–1724 Mutual learning, where multiple agents learn collaboratively and teach one another, has been shown to be an effective way to distill knowledge for image classification tasks. In this paper, we extend mutual learning to the machine translation task and operate at both the sentence-level and the token-level. Firstly, we co-train multiple agents by using the same parallel corpora. After convergence, each agent selects and learns its poorly predicted tokens from other agents. The poorly predicted tokens are determined by the acceptance-rejection sampling algorithm. Our experiments show that sequential mutual learning at the sentence-level and the token-level improves the results cumulatively. Absolute improvements compared to strong baselines are obtained on various translation tasks. On the IWSLT’14 German-English task, we get a new state-of-the-art BLEU score of 37.0. We also report a competitive result, 29.9 BLEU score, on the WMT’14 English-German task. 2020.findings-emnlp.155 @@ -2019,7 +2019,7 @@ HuXu BingLiu LeiShu - PhilipYu + PhilipYu 1725–1731 This paper focuses on learning domain-oriented language models driven by end tasks, which aims to combine the worlds of both general-purpose language models (such as ELMo and BERT) and domain-specific language understanding. We propose DomBERT, an extension of BERT to learn from both in-domain corpus and relevant domain corpora. This helps in learning domain language models with low-resources. Experiments are conducted on an assortment of tasks in aspect-based sentiment analysis (ABSA), demonstrating promising results. 2020.findings-emnlp.156 @@ -2045,7 +2045,7 @@ HanchengCao MengjieCheng ZhepengCen - DanielMcFarland + DanielMcFarland XiangRen 1746–1757 What kind of basic research ideas are more likely to get applied in practice? There is a long line of research investigating patterns of knowledge transfer, but it generally focuses on documents as the unit of analysis and follow their transfer into practice for a specific scientific domain. Here we study translational research at the level of scientific concepts for all scientific fields. We do this through text mining and predictive modeling using three corpora: 38.6 million paper abstracts, 4 million patent documents, and 0.28 million clinical trials. We extract scientific concepts (i.e., phrases) from corpora as instantiations of “research ideas”, create concept-level features as motivated by literature, and then follow the trajectories of over 450,000 new concepts (emerged from 1995-2014) to identify factors that lead only a small proportion of these ideas to be used in inventions and drug trials. Results from our analysis suggest several mechanisms that distinguish which scientific concept will be adopted in practice, and which will not. We also demonstrate that our derived features can be used to explain and predict knowledge transfer with high accuracy. Our work provides greater understanding of knowledge transfer for researchers, practitioners, and government agencies interested in encouraging translational research. @@ -2071,8 +2071,8 @@ An Empirical Exploration of Local Ordering Pre-training for Structured Prediction ZhisongZhang XiangKong - LoriLevin - EduardHovy + LoriLevin + EduardHovy 1770–1783 Recently, pre-training contextualized encoders with language model (LM) objectives has been shown an effective semi-supervised method for structured prediction. In this work, we empirically explore an alternative pre-training method for contextualized encoders. Instead of predicting words in LMs, we “mask out” and predict word order information, with a local ordering strategy and word-selecting objectives. With evaluations on three typical structured prediction tasks (dependency parsing, POS tagging, and NER) over four languages (English, Finnish, Czech, and Italian), we show that our method is consistently beneficial. We further conduct detailed error analysis, including one that examines a specific type of parsing error where the head is misidentified. The results show that pre-trained contextual encoders can bring improvements in a structured way, suggesting that they may be able to capture higher-order patterns and feature combinations from unlabeled data. 2020.findings-emnlp.160 @@ -2137,7 +2137,7 @@ <fixed-case>C</fixed-case>ommon<fixed-case>G</fixed-case>en: A Constrained Text Generation Challenge for Generative Commonsense Reasoning - Bill YuchenLin + Bill YuchenLin WangchunshuZhou MingShen PeiZhou @@ -2156,7 +2156,7 @@ DanielPressel AmyHemmeter SagnikRay Choudhury - SrinivasBangalore + SrinivasBangalore 1841–1848 Current state-of-the-art models for named entity recognition (NER) are neural models with a conditional random field (CRF) as the final layer. Entities are represented as per-token labels with a special structure in order to decode them into spans. Current work eschews prior knowledge of how the span encoding scheme works and relies on the CRF learning which transitions are illegal and which are not to facilitate global coherence. We find that by constraining the output to suppress illegal transitions we can train a tagger with a cross-entropy loss twice as fast as a CRF with differences in F1 that are statistically insignificant, effectively eliminating the need for a CRF. We analyze the dynamics of tag co-occurrence to explain when these constraints are most effective and provide open source implementations of our tagger in both PyTorch and TensorFlow. 2020.findings-emnlp.166 @@ -2170,7 +2170,7 @@ TianzeShi ChenZhao JordanBoyd-Graber - HalDaumé III + HalDaumé III LillianLee 1849–1864 Large-scale semantic parsing datasets annotated with logical forms have enabled major advances in supervised approaches. But can richer supervision help even more? To explore the utility of fine-grained, lexical-level supervision, we introduce SQUALL, a dataset that enriches 11,276 WIKITABLEQUESTIONS English-language questions with manually created SQL equivalents plus alignments between SQL and question fragments. Our annotation enables new training possibilities for encoderdecoder models, including approaches from machine translation previously precluded by the absence of alignments. We propose and test two methods: (1) supervised attention; (2) adopting an auxiliary objective of disambiguating references in the input queries to table columns. In 5-fold cross validation, these strategies improve over strong baselines by 4.4% execution accuracy. Oracle experiments suggest that annotated alignments can support further accuracy gains of up to 23.9%. @@ -2184,7 +2184,7 @@ ChenguangZhu RobertGmyr MichaelZeng - XuedongHuang + XuedongHuang EricDarve 1865–1874 Text summarization aims to extract essential information from a piece of text and transform the text into a concise version. Existing unsupervised abstractive summarization models leverage recurrent neural networks framework while the recently proposed transformer exhibits much more capability. Moreover, most of previous summarization models ignore abundant unlabeled corpora resources available for pretraining. In order to address these issues, we propose TED, a transformer-based unsupervised abstractive summarization system with pretraining on large-scale data. We first leverage the lead bias in news articles to pretrain the model on millions of unlabeled corpora. Next, we finetune TED on target domains through theme modeling and a denoising autoencoder to enhance the quality of generated summaries. Notably, TED outperforms all unsupervised abstractive baselines on NYT, CNN/DM and English Gigaword datasets with various document styles. Further analysis shows that the summaries generated by TED are highly abstractive, and each component in the objective function of TED is highly effective. @@ -2288,7 +2288,7 @@ Learning Visual-Semantic Embeddings for Reporting Abnormal Findings on Chest <fixed-case>X</fixed-case>-rays JianmoNi - Chun-NanHsu + Chun-NanHsu AmilcareGentili JulianMcAuley 1954–1960 @@ -2461,7 +2461,7 @@ <fixed-case>L</fixed-case>ogic2<fixed-case>T</fixed-case>ext: High-Fidelity Natural Language Generation from Logical Forms - ZhiyuChen + ZhiyuChen WenhuChen HanwenZha XiyouZhou @@ -2477,7 +2477,7 @@ <fixed-case>M</fixed-case>ed<fixed-case>IC</fixed-case>a<fixed-case>T</fixed-case>: A Dataset of Medical Images, Captions, and Textual References SanjaySubramanian - Lucy LuWang + Lucy LuWang BenBogin SachinMehta Madeleinevan Zuylen @@ -2633,8 +2633,8 @@ The Role of Reentrancies in <fixed-case>A</fixed-case>bstract <fixed-case>M</fixed-case>eaning <fixed-case>R</fixed-case>epresentation Parsing IdaSzubert MarcoDamonte - Shay B.Cohen - MarkSteedman + Shay B.Cohen + MarkSteedman 2198–2207 Abstract Meaning Representation (AMR) parsing aims at converting sentences into AMR representations. These are graphs and not trees because AMR supports reentrancies (nodes with more than one parent). Following previous findings on the importance of reen- trancies for AMR, we empirically find and discuss several linguistic phenomena respon- sible for reentrancies in AMR, some of which have not received attention before. We cate- gorize the types of errors AMR parsers make with respect to reentrancies. Furthermore, we find that correcting these errors provides an in- crease of up to 5% Smatch in parsing perfor- mance and 20% in reentrancy prediction 2020.findings-emnlp.199 @@ -2688,8 +2688,8 @@ Reducing Quantity Hallucinations in Abstractive Summarization ZhengZhao - Shay B.Cohen - BonnieWebber + Shay B.Cohen + BonnieWebber 2237–2249 It is well-known that abstractive summaries are subject to hallucination—including material that is not supported by the original text. While summaries can be made hallucination-free by limiting them to general phrases, such summaries would fail to be very informative. Alternatively, one can try to avoid hallucinations by verifying that any specific entities in the summary appear in the original text in a similar context. This is the approach taken by our system, Herman. The system learns to recognize and verify quantity entities (dates, numbers, sums of money, etc.) in a beam-worth of abstractive summaries produced by state-of-the-art models, in order to up-rank those summaries whose quantity terms are supported by the original text. Experimental results demonstrate that the ROUGE scores of such up-ranked summaries have a higher Precision than summaries that have not been up-ranked, without a comparable loss in Recall, resulting in higher F1. Preliminary human evaluation of up-ranked vs. original summaries shows people’s preference for the former. 2020.findings-emnlp.203 @@ -2717,9 +2717,9 @@ A Semi-supervised Approach to Generate the Code-Mixed Text using Pre-trained Encoder and Transfer Learning - DeepakGupta + DeepakGupta AsifEkbal - PushpakBhattacharyya + PushpakBhattacharyya 2267–2280 Code-mixing, the interleaving of two or more languages within a sentence or discourse is ubiquitous in multilingual societies. The lack of code-mixed training data is one of the major concerns for the development of end-to-end neural network-based models to be deployed for a variety of natural language processing (NLP) applications. A potential solution is to either manually create or crowd-source the code-mixed labelled data for the task at hand, but that requires much human efforts and often not feasible because of the language specific diversity in the code-mixed text. To circumvent the data scarcity issue, we propose an effective deep learning approach for automatically generating the code-mixed text from English to multiple languages without any parallel data. In order to train the neural network, we create synthetic code-mixed texts from the available parallel corpus by modelling various linguistic properties of code-mixing. Our codemixed text generator is built upon the encoder-decoder framework, where the encoder is augmented with the linguistic and task-agnostic features obtained from the transformer based language model. We also transfer the knowledge from a neural machine translation (NMT) to warm-start the training of code-mixed generator. Experimental results and in-depth analysis show the effectiveness of our proposed code-mixed text generation on eight diverse language pairs. 2020.findings-emnlp.206 @@ -2731,7 +2731,7 @@ <fixed-case>BERT</fixed-case>-<fixed-case>MK</fixed-case>: Integrating Graph Contextualized Knowledge into Pre-trained Language Models BinHe DiZhou - JinghuiXiao + JinghuiXiao XinJiang QunLiu Nicholas JingYuan @@ -2748,7 +2748,7 @@ YikangShen AlessandroSordoni AaronCourville - Timothy J.O’Donnell + Timothy J.O’Donnell 2291–2307 We model the recursive production property of context-free grammars for natural and synthetic languages. To this end, we present a dynamic programming algorithm that marginalises over latent binary tree structures with N leaves, allowing us to compute the likelihood of a sequence of N tokens under a latent tree model, which we maximise to train a recursive neural function. We demonstrate performance on two synthetic tasks: SCAN, where it outperforms previous models on the LENGTH split, and English question formation, where it performs comparably to decoders with the ground-truth tree structure. We also present experimental results on German-English translation on the Multi30k dataset, and qualitatively analyse the induced tree structures our model learns for the SCAN tasks and the German-English translation task. 2020.findings-emnlp.208 @@ -2759,11 +2759,11 @@ Guided Dialogue Policy Learning without Adversarial Learning in the Loop ZimingLi - SungjinLee + SungjinLee BaolinPeng JinchaoLi JuliaKiseleva - Maartende Rijke + Maartende Rijke ShahinShayandeh JianfengGao 2308–2317 @@ -2837,7 +2837,7 @@ Learning Knowledge Bases with Parameters for Task-Oriented Dialogue Systems AndreaMadotto SamuelCahyawijaya - Genta IndraWinata + Genta IndraWinata YanXu ZihanLiu ZhaojiangLin @@ -2925,7 +2925,7 @@ Adapting Coreference Resolution to <fixed-case>T</fixed-case>witter Conversations - BerfinAktaş + BerfinAktaş VeronikaSolopova AnnalenaKohnert ManfredStede @@ -2950,8 +2950,8 @@ <fixed-case>COSMIC</fixed-case>: <fixed-case>CO</fixed-case>mmon<fixed-case>S</fixed-case>ense knowledge for e<fixed-case>M</fixed-case>otion Identification in Conversations DeepanwayGhosal NavonilMajumder - AlexanderGelbukh - RadaMihalcea + AlexanderGelbukh + RadaMihalcea SoujanyaPoria 2470–2481 In this paper, we address the task of utterance level emotion recognition in conversations using commonsense knowledge. We propose COSMIC, a new framework that incorporates different elements of commonsense such as mental states, events, and causal relations, and build upon them to learn interactions between interlocutors participating in a conversation. Current state-of-theart methods often encounter difficulties in context propagation, emotion shift detection, and differentiating between related emotion classes. By learning distinct commonsense representations, COSMIC addresses these challenges and achieves new state-of-the-art results for emotion recognition on four different benchmark conversational datasets. Our code is available at https://github.com/declare-lab/conv-emotion. @@ -2975,11 +2975,11 @@ Answer Span Correction in Machine Reading Comprehension RevanthGangi Reddy - Md ArafatSultan - EfsunSarioglu Kayi + Md ArafatSultan + EfsunSarioglu Kayi RongZhang VittorioCastelli - AviSil + AviSil 2496–2501 Answer validation in machine reading comprehension (MRC) consists of verifying an extracted answer against an input context and question pair. Previous work has looked at re-assessing the “answerability” of the question given the extracted answer. Here we address a different problem: the tendency of existing MRC systems to produce partially correct answers when presented with answerable questions. We explore the nature of such errors and propose a post-processing correction method that yields statistically significant performance improvements over state-of-the-art MRC systems in both monolingual and multilingual evaluation. 2020.findings-emnlp.226 @@ -3049,7 +3049,7 @@ JiezhongQiu HaoMa OmerLevy - Wen-tauYih + Wen-tauYih SinongWang JieTang 2555–2565 @@ -3077,7 +3077,7 @@ ChengcanYing FeiZhao ZhifangFan - XinyuDai + XinyuDai RuiXia 2576–2585 Aspect-oriented Fine-grained Opinion Extraction (AFOE) aims at extracting aspect terms and opinion terms from review in the form of opinion pairs or additionally extracting sentiment polarity of aspect term to form opinion triplet. Because of containing several opinion factors, the complete AFOE task is usually divided into multiple subtasks and achieved in the pipeline. However, pipeline approaches easily suffer from error propagation and inconvenience in real-world scenarios. To this end, we propose a novel tagging scheme, Grid Tagging Scheme (GTS), to address the AFOE task in an end-to-end fashion only with one unified grid tagging task. Additionally, we design an effective inference strategy on GTS to exploit mutual indication between different opinion factors for more accurate extractions. To validate the feasibility and compatibility of GTS, we implement three different GTS models respectively based on CNN, BiLSTM, and BERT, and conduct experiments on the aspect-oriented opinion pair extraction and opinion triplet extraction datasets. Extensive experimental results indicate that GTS models outperform strong baselines significantly and achieve state-of-the-art performance. @@ -3210,7 +3210,7 @@ Textual <fixed-case>S</fixed-case>upervision for <fixed-case>V</fixed-case>isually <fixed-case>G</fixed-case>rounded <fixed-case>S</fixed-case>poken <fixed-case>L</fixed-case>anguage <fixed-case>U</fixed-case>nderstanding BertrandHigy DesmondElliott - GrzegorzChrupała + GrzegorzChrupała 2698–2709 Visually-grounded models of spoken language understanding extract semantic information directly from speech, without relying on transcriptions. This is useful for low-resource languages, where transcriptions can be expensive or impossible to obtain. Recent work showed that these models can be improved if transcriptions are available at training time. However, it is not clear how an end-to-end approach compares to a traditional pipeline-based approach when one has access to transcriptions. Comparing different strategies, we find that the pipeline approach works better when enough text is available. With low-resource languages in mind, we also show that translations can be effectively used in place of transcriptions but more data is needed to obtain similar results. 2020.findings-emnlp.244 @@ -3245,12 +3245,12 @@ SashankSanthanam ZhuoCheng BrodieMather - BonnieDorr + BonnieDorr ArchnaBhatia BryannaHebenstreit AlanZemel AdamDalton - TomekStrzalkowski + TomekStrzalkowski SamiraShaikh 2736–2750 Achieving true human-like ability to conduct a conversation remains an elusive goal for open-ended dialogue systems. We posit this is because extant approaches towards natural language generation (NLG) are typically construed as end-to-end architectures that do not adequately model human generation processes. To investigate, we decouple generation into two separate phases: planning and realization. In the planning phase, we train two planners to generate plans for response utterances. The realization phase uses response plans to produce an appropriate response. Through rigorous evaluations, both automated and human, we demonstrate that decoupling the process into planning and realization performs better than an end-to-end approach. @@ -3265,7 +3265,7 @@ ClaudioGreco GretaGandolfi EleonoraGualdoni - RaffaellaBernardi + RaffaellaBernardi 2751–2767 This paper introduces BD2BB, a novel language and vision benchmark that requires multimodal models combine complementary information from the two modalities. Recently, impressive progress has been made to develop universal multimodal encoders suitable for virtually any language and vision tasks. However, current approaches often require them to combine redundant information provided by language and vision. Inspired by real-life communicative contexts, we propose a novel task where either modality is necessary but not sufficient to make a correct prediction. To do so, we first build a dataset of images and corresponding sentences provided by human participants. Second, we evaluate state-of-the-art models and compare their performance against human speakers. We show that, while the task is relatively easy for humans, best-performing models struggle to achieve similar results. 2020.findings-emnlp.248 @@ -3328,7 +3328,7 @@ ChandraBhagavatula Jae sungPark RonanLe Bras - Noah A.Smith + Noah A.Smith YejinChoi 2810–2829 Natural language rationales could provide intuitive, higher-level explanations that are easily understandable by humans, complementing the more broadly studied lower-level explanations based on gradients or attention weights. We present the first study focused on generating natural language rationales across several complex visual reasoning tasks: visual commonsense reasoning, visual-textual entailment, and visual question answering. The key challenge of accurate rationalization is comprehensive image understanding at all levels: not just their explicit content at the pixel level, but their contextual contents at the semantic and pragmatic levels. We present RationaleˆVT Transformer, an integrated model that learns to generate free-text rationales by combining pretrained language models with object recognition, grounded visual semantic frames, and visual commonsense graphs. Our experiments show that free-text rationalization is a promising research direction to complement model interpretability for complex visual-textual reasoning tasks. In addition, we find that integration of richer semantic and pragmatic visual features improves visual fidelity of rationales. @@ -3414,7 +3414,7 @@ XipengQiu HengzhiPei HangYan - XuanjingHuang + XuanjingHuang 2887–2897 Multi-criteria Chinese word segmentation (MCCWS) aims to exploit the relations among the multiple heterogeneous segmentation criteria and further improve the performance of each single criterion. Previous work usually regards MCCWS as different tasks, which are learned together under the multi-task learning framework. In this paper, we propose a concise but effective unified model for MCCWS, which is fully-shared for all the criteria. By leveraging the powerful ability of the Transformer encoder, the proposed unified model can segment Chinese text according to a unique criterion-token indicating the output criterion. Besides, the proposed unified model can segment both simplified and traditional Chinese and has an excellent transfer capability. Experiments on eight datasets with different criteria show that our model outperforms our single-criterion baseline model and other multi-criteria models. Source codes of this paper are available on Github. 2020.findings-emnlp.260 @@ -3489,7 +3489,7 @@ JianhanXu XiaoqingZheng Kai-WeiChang - XuanjingHuang + XuanjingHuang 2938–2948 We propose a novel approach to cross-lingual dependency parsing based on word reordering. The words in each sentence of a source language corpus are rearranged to meet the word order in a target language under the guidance of a part-of-speech based language model (LM). To obtain the highest reordering score under the LM, a population-based optimization algorithm and its genetic operators are designed to deal with the combinatorial nature of such word reordering. A parser trained on the reordered corpus then can be used to parse sentences in the target language. We demonstrate through extensive experimentation that our approach achieves better or comparable results across 25 target languages (1.73% increase in average), and outperforms a baseline by a significant margin on the languages that are greatly different from the source one. For example, when transferring the English parser to Hindi and Latin, our approach outperforms the baseline by 15.3% and 6.7% respectively. 2020.findings-emnlp.265 @@ -3578,7 +3578,7 @@ Learning to Model and Ignore Dataset Bias with Mixed Capacity Ensembles ChristopherClark MarkYatskar - LukeZettlemoyer + LukeZettlemoyer 3031–3045 Many datasets have been shown to contain incidental correlations created by idiosyncrasies in the data collection process. For example, sentence entailment datasets can have spurious word-class correlations if nearly all contradiction sentences contain the word “not”, and image recognition datasets can have tell-tale object-background correlations if dogs are always indoors. In this paper, we propose a method that can automatically detect and ignore these kinds of dataset-specific patterns, which we call dataset biases. Our method trains a lower capacity model in an ensemble with a higher capacity model. During training, the lower capacity model learns to capture relatively shallow correlations, which we hypothesize are likely to reflect dataset bias. This frees the higher capacity model to focus on patterns that should generalize better. We ensure the models learn non-overlapping approaches by introducing a novel method to make them conditionally independent. Importantly, our approach does not require the bias to be known in advance. We evaluate performance on synthetic datasets, and four datasets built to penalize models that exploit known biases on textual entailment, visual question answering, and image recognition tasks. We show improvement in all settings, including a 10 point gain on the visual question answering dataset. 2020.findings-emnlp.272 @@ -3588,7 +3588,7 @@ Learning to Generalize for Sequential Decision Making XusenYin - RalphWeischedel + RalphWeischedel JonathanMay 3046–3063 We consider problems of making sequences of decisions to accomplish tasks, interacting via the medium of language. These problems are often tackled with reinforcement learning approaches. We find that these models do not generalize well when applied to novel task domains. However, the large amount of computation necessary to adequately train and explore the search space of sequential decision making, under a reinforcement learning paradigm, precludes the inclusion of large contextualized language models, which might otherwise enable the desired generalization ability. We introduce a teacher-student imitation learning methodology and a means of converting a reinforcement learning model into a natural language understanding model. Together, these methodologies enable the introduction of contextualized language models into the sequential decision making problem space. We show that models can learn faster and generalize more, leveraging both the imitation learning and the reformulation. Our models exceed teacher performance on various held-out decision problems, by up to 7% on in-domain problems and 24% on out-of-domain problems. @@ -3641,7 +3641,7 @@ Hsien-chinLin NurulLubis MarcoMoresi - MilicaGasic + MilicaGasic 3096–3102 The ability to accurately track what happens during a conversation is essential for the performance of a dialogue system. Current state-of-the-art multi-domain dialogue state trackers achieve just over 55% accuracy on the current go-to benchmark, which means that in almost every second dialogue turn they place full confidence in an incorrect dialogue state. Belief trackers, on the other hand, maintain a distribution over possible dialogue states. However, they lack in performance compared to dialogue state trackers, and do not produce well calibrated distributions. In this work we present state-of-the-art performance in calibration for multi-domain dialogue belief trackers using a calibrated ensemble of models. Our resulting dialogue belief tracker also outperforms previous dialogue belief tracking models in terms of accuracy. 2020.findings-emnlp.277 @@ -3712,7 +3712,7 @@ XavierGarcia PierreForet ThibaultSellam - AnkurParikh + AnkurParikh 3160–3170 We present a probabilistic framework for multilingual neural machine translation that encompasses supervised and unsupervised setups, focusing on unsupervised translation. In addition to studying the vanilla case where there is only monolingual data available, we propose a novel setup where one language in the (source, target) pair is not associated with any parallel data, but there may exist auxiliary parallel data that contains the other. This auxiliary data can naturally be utilized in our probabilistic framework via a novel cross-translation loss term. Empirically, we show that our approach results in higher BLEU scores over state-of-the-art unsupervised models on the WMT’14 English-French, WMT’16 English-German, and WMT’16 English-Romanian datasets in most directions. 2020.findings-emnlp.283 @@ -3778,11 +3778,11 @@ Pushing the Limits of <fixed-case>AMR</fixed-case> Parsing with Self-Learning Young-SukLee - RamónFernandez Astudillo + RamónFernandez Astudillo TahiraNaseem RevanthGangi Reddy - RaduFlorian - SalimRoukos + RaduFlorian + SalimRoukos 3208–3214 Abstract Meaning Representation (AMR) parsing has experienced a notable growth in performance in the last two years, due both to the impact of transfer learning and the development of novel architectures specific to AMR. At the same time, self-learning techniques have helped push the performance boundaries of other natural language processing applications, such as machine translation or question answering. In this paper, we explore different ways in which trained models can be applied to improve AMR parsing performance, including generation of synthetic text and AMR annotations as well as refinement of actions oracle. We show that, without any additional human annotations, these techniques improve an already performant parser and achieve state-of-the-art results on AMR 1.0 and AMR 2.0. 2020.findings-emnlp.288 @@ -3818,7 +3818,7 @@ Towards <fixed-case>C</fixed-case>ontrollable <fixed-case>B</fixed-case>iases in <fixed-case>L</fixed-case>anguage <fixed-case>G</fixed-case>eneration EmilySheng Kai-WeiChang - PremNatarajan + PremNatarajan NanyunPeng 3239–3254 We present a general approach towards controllable societal biases in natural language generation (NLG). Building upon the idea of adversarial triggers, we develop a method to induce societal biases in generated text when input prompts contain mentions of specific demographic groups. We then analyze two scenarios: 1) inducing negative biases for one demographic and positive biases for another demographic, and 2) equalizing biases between demographics. The former scenario enables us to detect the types of biases present in the model. Specifically, we show the effectiveness of our approach at facilitating bias analysis by finding topics that correspond to demographic inequalities in generated text and comparing the relative effectiveness of inducing biases for different demographics. The second scenario is useful for mitigating biases in downstream applications such as dialogue generation. In our experiments, the mitigation technique proves to be effective at equalizing the amount of biases across demographics while simultaneously generating less negatively biased text overall. @@ -3842,7 +3842,7 @@ JunmoKang GiwonHong HaritzPuerto San Roman - Sung-HyonMyaeng + Sung-HyonMyaeng 3266–3277 Unsupervised question answering (UQA) has been proposed to avoid the high cost of creating high-quality datasets for QA. One approach to UQA is to train a QA model with questions generated automatically. However, the generated questions are either too similar to a word sequence in the context or too drifted from the semantics of the context, thereby making it difficult to train a robust QA model. We propose a novel regularization method based on teacher-student architecture to avoid bias toward a particular question generation strategy and modulate the process of generating individual words when a question is generated. Our experiments demonstrate that we have achieved the goal of generating higher-quality questions for UQA across diverse QA datasets and tasks. We also show that this method can be useful for creating a QA model with few-shot learning. 2020.findings-emnlp.293 @@ -3852,7 +3852,7 @@ Graph-to-Graph Transformer for Transition-based Dependency Parsing AlirezaMohammadshahi - JamesHenderson + JamesHenderson 3278–3289 We propose the Graph2Graph Transformer architecture for conditioning on and predicting arbitrary graphs, and apply it to the challenging task of transition-based dependency parsing. After proposing two novel Transformer models of transition-based dependency parsing as strong baselines, we show that adding the proposed mechanisms for conditioning on and predicting graphs of Graph2Graph Transformer results in significant improvements, both with and without BERT pre-training. The novel baselines and their integration with Graph2Graph Transformer significantly outperform the state-of-the-art in traditional transition-based dependency parsing on both English Penn Treebank, and 13 languages of Universal Dependencies Treebanks. Graph2Graph Transformer can be integrated with many previous structured prediction methods, making it easy to apply to a wide range of NLP tasks. 2020.findings-emnlp.294 @@ -3883,7 +3883,7 @@ Detecting Stance in Media On Global Warming YiweiLuo DallasCard - DanJurafsky + DanJurafsky 3296–3315 Citing opinions is a powerful yet understudied strategy in argumentation. For example, an environmental activist might say, “Leading scientists agree that global warming is a serious concern,” framing a clause which affirms their own stance (“that global warming is serious”) as an opinion endorsed ("[scientists] agree”) by a reputable source (“leading”). In contrast, a global warming denier might frame the same clause as the opinion of an untrustworthy source with a predicate connoting doubt: “Mistaken scientists claim [...]." Our work studies opinion-framing in the global warming (GW) debate, an increasingly partisan issue that has received little attention in NLP. We introduce DeSMOG, a dataset of stance-labeled GW sentences, and train a BERT classifier to study novel aspects of argumentation in how different sides of a debate represent their own and each other’s opinions. From 56K news articles, we find that similar linguistic devices for self-affirming and opponent-doubting discourse are used across GW-accepting and skeptic media, though GW-skeptical media shows more opponent-doubt. We also find that authors often characterize sources as hypocritical, by ascribing opinions expressing the author’s own view to source entities known to publicly endorse the opposing view. We release our stance dataset, model, and lexicons of framing devices for future work on opinion-framing and the automatic detection of GW stance. 2020.findings-emnlp.296 @@ -3937,8 +3937,8 @@ DheerajRajagopal NiketTandon PeterClark - BhavanaDalvi - EduardHovy + BhavanaDalvi + EduardHovy 3345–3355 Our goal is to explain the effects of perturbations in procedural text, e.g., given a passage describing a rabbit’s life cycle, explain why illness (the perturbation) may reduce the rabbit population (the effect). Although modern systems are able to solve the original prediction task well (e.g., illness results in less rabbits), the explanation task - identifying the causal chain of events from perturbation to effect - remains largely unaddressed, and is the goal of this research. We present QUARTET, a system that constructs such explanations from paragraphs, by modeling the explanation task as a multitask learning problem. QUARTET constructs explanations from the sentences in the procedural text, achieving ~18 points better on explanation accuracy compared to several strong baselines on a recent process comprehension benchmark. On an end task on this benchmark, we show a surprising finding that good explanations do not have to come at the expense of end task performance, in fact leading to a 7% F1 improvement over SOTA. 2020.findings-emnlp.300 @@ -3951,7 +3951,7 @@ SuchinGururangan MaartenSap YejinChoi - Noah A.Smith + Noah A.Smith 3356–3369 Pretrained neural language models (LMs) are prone to generating racist, sexist, or otherwise toxic language which hinders their safe deployment. We investigate the extent to which pretrained LMs can be prompted to generate toxic language, and the effectiveness of controllable text generation algorithms at preventing such toxic degeneration. We create and release RealToxicityPrompts, a dataset of 100K naturally occurring, sentence-level prompts derived from a large corpus of English web text, paired with toxicity scores from a widely-used toxicity classifier. Using RealToxicityPrompts, we find that pretrained LMs can degenerate into toxic text even from seemingly innocuous prompts. We empirically assess several controllable generation methods, and find that while data- or compute-intensive methods (e.g., adaptive pretraining on non-toxic data) are more effective at steering away from toxicity than simpler solutions (e.g., banning “bad” words), no current method is failsafe against neural toxic degeneration. To pinpoint the potential cause of such persistent toxic degeneration, we analyze two web text corpora used to pretrain several LMs (including GPT-2; Radford et. al, 2019), and find a significant amount of offensive, factually unreliable, and otherwise toxic content. Our work provides a test bed for evaluating toxic generations by LMs and stresses the need for better data selection processes for pretraining. 2020.findings-emnlp.301 @@ -3962,8 +3962,8 @@ Improving Event Duration Prediction via Time-aware Pre-training ZonglinYang XinyaDu - AlexanderRush - ClaireCardie + AlexanderRush + ClaireCardie 3370–3378 End-to-end models in NLP rarely encode external world knowledge about length of time. We introduce two effective models for duration prediction, which incorporate external knowledge by reading temporal-related news sentences (time-aware pre-training). Specifically, one model predicts the range/unit where the duration value falls in (R-PRED); and the other predicts the exact duration value (E-PRED). Our best model – E-PRED, substantially outperforms previous work, and captures duration information more accurately than R-PRED. We also demonstrate our models are capable of duration prediction in the unsupervised setting, outperforming the baselines. 2020.findings-emnlp.302 @@ -3974,7 +3974,7 @@ Composed Variational Natural Language Generation for Few-shot Intents CongyingXia CaimingXiong - PhilipYu + PhilipYu RichardSocher 3379–3388 In this paper, we focus on generating training examples for few-shot intents in the realistic imbalanced scenario. To build connections between existing many-shot intents and few-shot intents, we consider an intent as a combination of a domain and an action, and propose a composed variational natural language generator (CLANG), a transformer-based conditional variational autoencoder. CLANG utilizes two latent variables to represent the utterances corresponding to two different independent parts (domain and action) in the intent, and the latent variables are composed together to generate natural examples. Additionally, to improve the generator learning, we adopt the contrastive regularization loss that contrasts the in-class with the out-of-class utterance generation given the intent. To evaluate the quality of the generated utterances, experiments are conducted on the generalized few-shot intent detection task. Empirical results show that our proposed model achieves state-of-the-art performances on two real-world intent detection datasets. @@ -4022,7 +4022,7 @@ <fixed-case>BERT</fixed-case>-k<fixed-case>NN</fixed-case>: Adding a k<fixed-case>NN</fixed-case> Search Component to Pretrained Language Models for Better <fixed-case>QA</fixed-case> NoraKassner - HinrichSchütze + HinrichSchütze 3424–3430 Khandelwal et al. (2020) use a k-nearest-neighbor (kNN) component to improve language model performance. We show that this idea is beneficial for open-domain question answering (QA). To improve the recall of facts encountered during training, we combine BERT (Devlin et al., 2019) with a traditional information retrieval step (IR) and a kNN search over a large datastore of an embedded text collection. Our contributions are as follows: i) BERT-kNN outperforms BERT on cloze-style QA by large margins without any further training. ii) We show that BERT often identifies the correct response category (e.g., US city), but only kNN recovers the factually correct answer (e.g.,“Miami”). iii) Compared to BERT, BERT-kNN excels for rare facts. iv) BERT-kNN can easily handle facts not covered by BERT’s training set, e.g., recent events. 2020.findings-emnlp.307 @@ -4112,8 +4112,8 @@ KyleRichardson LiangXu LuLi - SandraKübler - LawrenceMoss + SandraKübler + LawrenceMoss 3512–3526 Despite the tremendous recent progress on natural language inference (NLI), driven largely by large-scale investment in new datasets (e.g.,SNLI, MNLI) and advances in modeling, most progress has been limited to English due to a lack of reliable datasets for most of the world’s languages. In this paper, we present the first large-scale NLI dataset (consisting of ~56,000 annotated sentence pairs) for Chinese called the Original Chinese Natural Language Inference dataset (OCNLI). Unlike recent attempts at extending NLI to other languages, our dataset does not rely on any automatic translation or non-expert annotation. Instead, we elicit annotations from native speakers specializing in linguistics. We follow closely the annotation protocol used for MNLI, but create new strategies for eliciting diverse hypotheses. We establish several baseline results on our dataset using state-of-the-art pre-trained models for Chinese, and find even the best performing models to be far outpaced by human performance (~12% absolute performance gap), making it a challenging new resource that we hope will help to accelerate progress in Chinese NLU. To the best of our knowledge, this is the first human-elicited MNLI-style corpus for a non-English language. 2020.findings-emnlp.314 @@ -4126,8 +4126,8 @@ RameshNallapati HenghuiZhu FengNan - CiceroNogueira dos Santos - KathleenMcKeown + CiceroNogueira dos Santos + KathleenMcKeown BingXiang 3527–3536 Unsupervised domain adaptation addresses the problem of leveraging labeled data in a source domain to learn a well-performing model in a target domain where labels are unavailable. In this paper, we improve upon a recent theoretical work (Zhang et al., 2019b) and adopt the Margin Disparity Discrepancy (MDD) unsupervised domain adaptation algorithm to solve the cross-lingual text labeling problems. Experiments on cross-lingual document classification and NER demonstrate the proposed domain adaptation approach advances the state-of-the-art results by a large margin. Specifically, we improve MDD by efficiently optimizing the margin loss on the source domain via Virtual Adversarial Training (VAT). This bridges the gap between theory and the loss function used in the original work Zhang et al.(2019b), and thereby significantly boosts the performance. Our numerical results also indicate that VAT can remarkably improve the generalization performance of both domains for various domain adaptation approaches. @@ -4139,7 +4139,7 @@ Rethinking Supervised Learning and Reinforcement Learning in Task-Oriented Dialogue Systems ZimingLi JuliaKiseleva - Maartende Rijke + Maartende Rijke 3537–3546 Dialogue policy learning for task-oriented dialogue systems has enjoyed great progress recently mostly through employing reinforcement learning methods. However, these approaches have become very sophisticated. It is time to re-evaluate it. Are we really making progress developing dialogue agents only based on reinforcement learning? We demonstrate how (1) traditional supervised learning together with (2) a simulator-free adversarial learning method can be used to achieve performance comparable to state-of-the-art reinforcement learning-based methods. First, we introduce a simple dialogue action decoder to predict the appropriate actions. Then, the traditional multi-label classification solution for dialogue policy learning is extended by adding dense layers to improve the dialogue agent performance. Finally, we employ the Gumbel-Softmax estimator to alternatively train the dialogue agent and the dialogue reward model without using reinforcement learning. Based on our extensive experimentation, we can conclude the proposed methods can achieve more stable and higher performance with fewer efforts, such as the domain knowledge required to design a user simulator and the intractable parameter tuning in reinforcement learning. Our main goal is not to beat RL with supervised learning, but to demonstrate the value of rethinking the role of reinforcement learning and supervised learning in optimizing task-oriented dialogue systems. 2020.findings-emnlp.316 @@ -4164,7 +4164,7 @@ ShuaiWang RishitaAnubhai MiguelBallesteros - YaserAl-Onaizan + YaserAl-Onaizan 3554–3559 Event argument extraction (EAE) aims to identify the arguments of an event and classify the roles that those arguments play. Despite great efforts made in prior work, there remain many challenges: (1) Data scarcity. (2) Capturing the long-range dependency, specifically, the connection between an event trigger and a distant event argument. (3) Integrating event trigger information into candidate argument representation. For (1), we explore using unlabeled data. For (2), we use Transformer that uses dependency parses to guide the attention mechanism. For (3), we propose a trigger-aware sequence encoder with several types of trigger-dependent sequence representations. We also support argument extraction either from text annotated with gold entities or from plain text. Experiments on the English ACE 2005 benchmark show that our approach achieves a new state-of-the-art. 2020.findings-emnlp.318 @@ -4263,7 +4263,7 @@ The Box is in the Pen: Evaluating Commonsense Reasoning in Neural Machine Translation JieHe TaoWang - DeyiXiong + DeyiXiong QunLiu 3662–3672 Does neural machine translation yield translations that are congenial with common sense? In this paper, we present a test suite to evaluate the commonsense reasoning capability of neural machine translation. The test suite consists of three test sets, covering lexical and contextless/contextual syntactic ambiguity that requires commonsense knowledge to resolve. We manually create 1,200 triples, each of which contain a source sentence and two contrastive translations, involving 7 different common sense types. Language models pretrained on large-scale corpora, such as BERT, GPT-2, achieve a commonsense reasoning accuracy of lower than 72% on target translations of this test suite. We conduct extensive experiments on the test suite to evaluate commonsense reasoning in neural machine translation and investigate factors that have impact on this capability. Our experiments and analyses demonstrate that neural machine translation performs poorly on commonsense reasoning of the three ambiguity types in terms of both reasoning accuracy ( 6 60.1%) and reasoning consistency (6 31%). We will release our test suite as a machine translation commonsense reasoning testbed to promote future work in this direction. @@ -4291,7 +4291,7 @@ Zi-YiDou DanqingWang XipengQiu - XuanjingHuang + XuanjingHuang 3679–3691 Neural network-based models augmented with unsupervised pre-trained knowledge have achieved impressive performance on text summarization. However, most existing evaluation methods are limited to an in-domain setting, where summarizers are trained and evaluated on the same dataset. We argue that this approach can narrow our understanding of the generalization ability for different summarization systems. In this paper, we perform an in-depth analysis of characteristics of different datasets and investigate the performance of different summarization models under a cross-dataset setting, in which a summarizer trained on one corpus will be evaluated on a range of out-of-domain corpora. A comprehensive study of 11 representative summarization systems on 5 datasets from different domains reveals the effect of model architectures and generation ways (i.e. abstractive and extractive) on model generalization ability. Further, experimental results shed light on the limitations of existing summarizers. Brief introduction and supplementary code can be found in https://github.com/zide05/CDEvalSumm. 2020.findings-emnlp.329 @@ -4354,7 +4354,7 @@ HantingSu DavidKartchner CassieMitchell - ChaoZhang + ChaoZhang 3739–3754 We study the problem of learning neural text classifiers without using any labeled data, but only easy-to-provide rules as multiple weak supervision sources. This problem is challenging because rule-induced weak labels are often noisy and incomplete. To address these two challenges, we design a label denoiser, which estimates the source reliability using a conditional soft attention mechanism and then reduces label noise by aggregating rule-annotated weak labels. The denoised pseudo labels then supervise a neural classifier to predicts soft labels for unmatched samples, which address the rule coverage issue. We evaluate our model on five benchmarks for sentiment, topic, and relation classifications. The results show that our model outperforms state-of-the-art weakly-supervised and semi-supervised methods consistently, and achieves comparable performance with fully-supervised methods even without any labeled data. Our code can be found at https://github.com/weakrules/Denoise-multi-weak-sources. 2020.findings-emnlp.334 @@ -4413,9 +4413,9 @@ Controllable Text Generation with Focused Variation LeiShu - AlexandrosPapangelis + AlexandrosPapangelis Yi-ChiaWang - GokhanTur + GokhanTur HuXu ZhalehFeizollahi BingLiu @@ -4434,7 +4434,7 @@ GargiSawhney AnmolShukla Keerthi KumarKallur - NathanaelChambers + NathanaelChambers NiranjanBalasubramanian 3818–3828 Preconditions provide a form of logical connection between events that explains why some events occur together and information that is complementary to the more widely studied relations such as causation, temporal ordering, entailment, and discourse relations. Modeling preconditions in text has been hampered in part due to the lack of large scale labeled data grounded in text. This paper introduces PeKo, a crowd-sourced annotation of preconditions between event pairs in newswire, an order of magnitude larger than prior text annotations. To complement this new corpus, we also introduce two challenge tasks aimed at modeling preconditions: (i) Precondition Identification – a standard classification task defined over pairs of event mentions, and (ii) Precondition Generation – a generative task aimed at testing a more general ability to reason about a given event. Evaluation on both tasks shows that modeling preconditions is challenging even for today’s large language models (LM). This suggests that precondition knowledge is not easily accessible in LM-derived representations alone. Our generation results show that fine-tuning an LM on PeKo yields better conditional relations than when trained on raw text or temporally-ordered corpora. @@ -4486,8 +4486,8 @@ Event-Related Bias Removal for Real-time Disaster Events SalvadorMedina Maza EvangeliaSpiliopoulou - EduardHovy - AlexanderHauptmann + EduardHovy + AlexanderHauptmann 3858–3868 Social media has become an important tool to share information about crisis events such as natural disasters and mass attacks. Detecting actionable posts that contain useful information requires rapid analysis of huge volumes of data in real-time. This poses a complex problem due to the large amount of posts that do not contain any actionable information. Furthermore, the classification of information in real-time systems requires training on out-of-domain data, as we do not have any data from a new emerging crisis. Prior work focuses on models pre-trained on similar event types. However, those models capture unnecessary event-specific biases, like the location of the event, which affect the generalizability and performance of the classifiers on new unseen data from an emerging new event. In our work, we train an adversarial neural model to remove latent event-specific biases and improve the performance on tweet importance classification. 2020.findings-emnlp.344 @@ -4514,7 +4514,7 @@ RenjieZheng HairongLiu KainanPeng - KennethChurch + KennethChurch LiangHuang 3886–3896 Text-to-speech synthesis (TTS) has witnessed rapid progress in recent years, where neural methods became capable of producing audios with high naturalness. However, these efforts still suffer from two types of latencies: (a) the computational latency (synthesizing time), which grows linearly with the sentence length, and (b) the input latency in scenarios where the input text is incrementally available (such as in simultaneous translation, dialog generation, and assistive technologies). To reduce these latencies, we propose a neural incremental TTS approach using the prefix-to-prefix framework from simultaneous translation. We synthesize speech in an online fashion, playing a segment of audio while generating the next, resulting in an O(1) rather than O(n) latency. Experiments on English and Chinese TTS show that our approach achieves similar speech naturalness compared to full sentence TTS, but only with a constant (1-2 words) latency. @@ -4557,7 +4557,7 @@ BaigongZheng KaiboLiu JiahongYuan - KennethChurch + KennethChurch LiangHuang 3928–3937 Simultaneous speech-to-speech translation is an extremely challenging but widely useful scenario that aims to generate target-language speech only a few seconds behind the source-language speech. In addition, we have to continuously translate a speech of multiple sentences, but all recent solutions merely focus on the single-sentence scenario. As a result, current approaches will accumulate more and more latencies in later sentences when the speaker talks faster and introduce unnatural pauses into translated speech when the speaker talks slower. To overcome these issues, we propose Self-Adaptive Translation which flexibly adjusts the length of translations to accommodate different source speech rates. At similar levels of translation quality (as measured by BLEU), our method generates more fluent target speech latency than the baseline, in both Zh<->En directions. @@ -4608,7 +4608,7 @@ DanishPruthi BhuwanDhingra GrahamNeubig - Zachary C.Lipton + Zachary C.Lipton 3965–3970 For many prediction tasks, stakeholders desire not only predictions but also supporting evidence that a human can use to verify its correctness. However, in practice, evidence annotations may only be available for a minority of training examples (if available at all). In this paper, we propose new methods to combine few evidence annotations (strong semi-supervision) with abundant document-level labels (weak supervision) for the task of evidence extraction. Evaluating on two classification tasks that feature evidence annotations, we find that our methods outperform baselines adapted from the interpretability literature to our task. Our approach yields gains with as few as hundred evidence annotations. 2020.findings-emnlp.353 @@ -4669,8 +4669,8 @@ Effects of Naturalistic Variation in Goal-Oriented Dialog JatinGanhotra - RobertMoore - SachindraJoshi + RobertMoore + SachindraJoshi KahiniWadhawan 4013–4020 Existing benchmarks used to evaluate the performance of end-to-end neural dialog systems lack a key component: natural variation present in human conversations. Most datasets are constructed through crowdsourcing, where the crowd workers follow a fixed template of instructions while enacting the role of a user/agent. This results in straight-forward, somewhat routine, and mostly trouble-free conversations, as crowd workers do not think to represent the full range of actions that occur naturally with real users. In this work, we investigate the impact of naturalistic variation on two goal-oriented datasets: bAbI dialog task and Stanford Multi-Domain Dataset (SMD). We also propose new and more effective testbeds for both datasets, by introducing naturalistic variation by the user. We observe that there is a significant drop in performance (more than 60% in Ent. F1 on SMD and 85% in per-dialog accuracy on bAbI task) of recent state-of-the-art end-to-end neural methods such as BossNet and GLMP on both datasets. @@ -4695,8 +4695,8 @@ <fixed-case>W</fixed-case>iki<fixed-case>L</fixed-case>ingua: A New Benchmark Dataset for Cross-Lingual Abstractive Summarization FaisalLadhak EsinDurmus - ClaireCardie - KathleenMcKeown + ClaireCardie + KathleenMcKeown 4034–4048 We introduce WikiLingua, a large-scale, multilingual dataset for the evaluation of cross-lingual abstractive summarization systems. We extract article and summary pairs in 18 languages from WikiHow, a high quality, collaborative resource of how-to guides on a diverse set of topics written by human authors. We create gold-standard article-summary alignments across languages by aligning the images that are used to describe each how-to step in an article. As a set of baselines for further studies, we evaluate the performance of existing cross-lingual abstractive summarization methods on our dataset. We further propose a method for direct cross-lingual summarization (i.e., without requiring translation at inference time) by leveraging synthetic data and Neural Machine Translation as a pre-training step. Our method significantly outperforms the baseline approaches, while being more cost efficient during inference. 2020.findings-emnlp.360 @@ -4837,7 +4837,7 @@ HaiZhao RuiWang MasaoUtiyama - EiichiroSumita + EiichiroSumita 4151–4162 Exploiting a common language as an auxiliary for better translation has a long tradition in machine translation and lets supervised learning-based machine translation enjoy the enhancement delivered by the well-used pivot language in the absence of a source language to target language parallel corpus. The rise of unsupervised neural machine translation (UNMT) almost completely relieves the parallel corpus curse, though UNMT is still subject to unsatisfactory performance due to the vagueness of the clues available for its core back-translation training. Further enriching the idea of pivot translation by extending the use of parallel corpora beyond the source-target paradigm, we propose a new reference language-based framework for UNMT, RUNMT, in which the reference language only shares a parallel corpus with the source, but this corpus still indicates a signal clear enough to help the reconstruction training of UNMT through a proposed reference agreement mechanism. Experimental results show that our methods improve the quality of UNMT over that of a strong baseline that uses only one auxiliary language, demonstrating the usefulness of the proposed reference language-based UNMT and establishing a good start for the community. 2020.findings-emnlp.371 @@ -4905,7 +4905,7 @@ Multichannel <fixed-case>G</fixed-case>enerative <fixed-case>L</fixed-case>anguage <fixed-case>M</fixed-case>odel: <fixed-case>L</fixed-case>earning <fixed-case>A</fixed-case>ll <fixed-case>P</fixed-case>ossible <fixed-case>F</fixed-case>actorizations <fixed-case>W</fixed-case>ithin and <fixed-case>A</fixed-case>cross <fixed-case>C</fixed-case>hannels HarrisChan - JamieKiros + JamieKiros WilliamChan 4208–4220 A channel corresponds to a viewpoint or transformation of an underlying meaning. A pair of parallel sentences in English and French express the same underlying meaning, but through two separate channels corresponding to their languages. In this work, we present the Multichannel Generative Language Model (MGLM). MGLM is a generative joint distribution model over channels. MGLM marginalizes over all possible factorizations within and across all channels. MGLM endows flexible inference, including unconditional generation, conditional generation (where 1 channel is observed and other channels are generated), and partially observed generation (where incomplete observations are spread across all the channels). We experiment with the Multi30K dataset containing English, French, Czech, and German. We demonstrate experiments with unconditional, conditional, and partially conditional generation. We provide qualitative samples sampled unconditionally from the generative joint distribution. We also quantitatively analyze the quality-diversity trade-offs and find MGLM outperforms traditional bilingual discriminative models. @@ -4964,7 +4964,7 @@ GabrielaPałka KarolKaczmarek AgnieszkaKaliska - FilipGraliński + FilipGraliński 4254–4268 We propose a new shared task of semantic retrieval from legal texts, in which a so-called contract discovery is to be performed – where legal clauses are extracted from documents, given a few examples of similar clauses from other legal acts. The task differs substantially from conventional NLI and shared tasks on legal information extraction (e.g., one has to identify text span instead of a single document, page, or paragraph). The specification of the proposed task is followed by an evaluation of multiple solutions within the unified framework proposed for this branch of methods. It is shown that state-of-the-art pretrained encoders fail to provide satisfactory results on the task proposed. In contrast, Language Model-based solutions perform better, especially when unsupervised fine-tuning is applied. Besides the ablation studies, we addressed questions regarding detection accuracy for relevant text fragments depending on the number of examples available. In addition to the dataset and reference results, LMs specialized in the legal domain were made publicly available. 2020.findings-emnlp.380 @@ -4997,7 +4997,7 @@ Detecting Media Bias in News Articles using <fixed-case>G</fixed-case>aussian Bias Distributions Wei-FanChen - KhalidAl Khatib + KhalidAl Khatib BennoStein HenningWachsmuth 4290–4300 @@ -5034,8 +5034,8 @@ Looking inside Noun Compounds: Unsupervised Prepositional and Free Paraphrasing GirishkumarPonkiya RudraMurthy - PushpakBhattacharyya - GirishPalshikar + PushpakBhattacharyya + GirishPalshikar 4313–4323 A noun compound is a sequence of contiguous nouns that acts as a single noun, although the predicate denoting the semantic relation between its components is dropped. Noun Compound Interpretation is the task of uncovering the relation, in the form of a preposition or a free paraphrase. Prepositional paraphrasing refers to the use of preposition to explain the semantic relation, whereas free paraphrasing refers to invoking an appropriate predicate denoting the semantic relation. In this paper, we propose an unsupervised methodology for these two types of paraphrasing. We use pre-trained contextualized language models to uncover the ‘missing’ words (preposition or predicate). These language models are usually trained to uncover the missing word/words in a given input sentence. Our approach uses templates to prepare the input sequence for the language model. The template uses a special token to indicate the missing predicate. As the model has already been pre-trained to uncover a missing word (or a sequence of words), we exploit it to predict missing words for the input sequence. Our experiments using four datasets show that our unsupervised approach (a) performs comparably to supervised approaches for prepositional paraphrasing, and (b) outperforms supervised approaches for free paraphrasing. Paraphrasing (prepositional or free) using our unsupervised approach is potentially helpful for NLP tasks like machine translation and information extraction. 2020.findings-emnlp.386 @@ -5104,7 +5104,7 @@ YanyanLan LiangPang JiafengGuo - XueqiCheng + XueqiCheng 4379–4389 This paper proposes a novel approach to learn commonsense from images, instead of limited raw texts or costly constructed knowledge bases, for the commonsense reasoning problem in NLP. Our motivation comes from the fact that an image is worth a thousand words, where richer scene information could be leveraged to help distill the commonsense knowledge, which is often hidden in languages. Our approach, namely Loire, consists of two stages. In the first stage, a bi-modal sequence-to-sequence approach is utilized to conduct the scene layout generation task, based on a text representation model ViBERT. In this way, the required visual scene knowledge, such as spatial relations, will be encoded in ViBERT by the supervised learning process with some bi-modal data like COCO. Then ViBERT is concatenated with a pre-trained language model to perform the downstream commonsense reasoning tasks. Experimental results on two commonsense reasoning problems, i.e.commonsense question answering and pronoun resolution, demonstrate that Loire outperforms traditional language-based methods. We also give some case studies to show what knowledge is learned from images and explain how the generated scene layout helps the commonsense reasoning process. 2020.findings-emnlp.392 @@ -5137,7 +5137,7 @@ Visually-Grounded Planning without Vision: Language Models Infer Detailed Plans from High-level Instructions - PeterJansen + PeterJansen 4412–4417 The recently proposed ALFRED challenge task aims for a virtual robotic agent to complete complex multi-step everyday tasks in a virtual home environment from high-level natural language directives, such as “put a hot piece of bread on a plate”. Currently, the best-performing models are able to complete less than 1% of these tasks successfully. In this work we focus on modeling the translation problem of converting natural language directives into detailed multi-step sequences of actions that accomplish those goals in the virtual environment. We empirically demonstrate that it is possible to generate gold multi-step plans from language directives alone without any visual input in 26% of unseen cases. When a small amount of visual information, the starting location in the virtual environment, is incorporated, our best-performing GPT-2 model successfully generates gold command sequences in 58% of cases, suggesting contextualized language models may provide strong planning modules for grounded virtual agents. 2020.findings-emnlp.395 @@ -5266,7 +5266,7 @@ An Empirical Investigation of Beam-Aware Training in Supertagging RenatoNegrinho - Matthew R.Gormley + Matthew R.Gormley GeoffGordon 4534–4542 Structured prediction is often approached by training a locally normalized model with maximum likelihood and decoding approximately with beam search. This approach leads to mismatches as, during training, the model is not exposed to its mistakes and does not use beam search. Beam-aware training aims to address these problems, but unfortunately, it is not yet widely used due to a lack of understanding about how it impacts performance, when it is most useful, and whether it is stable. Recently, Negrinho et al. (2018) proposed a meta-algorithm that captures beam-aware training algorithms and suggests new ones, but unfortunately did not provide empirical results. In this paper, we begin an empirical investigation: we train the supertagging model of Vaswani et al. (2018) and a simpler model with instantiations of the meta-algorithm. We explore the influence of various design choices and make recommendations for choosing them. We observe that beam-aware training improves performance for both models, with large improvements for the simpler model which must effectively manage uncertainty during decoding. Our results suggest that a model must be learned with search to maximize its effectiveness. @@ -5281,7 +5281,7 @@ AmirPouran Ben Veyseh NasimNouri FranckDernoncourt - Quan HungTran + Quan HungTran DejingDou Thien HuuNguyen 4543–4548 @@ -5294,7 +5294,7 @@ <fixed-case>D</fixed-case>ecoding Language Spatial Relations to 2<fixed-case>D</fixed-case> Spatial Arrangements GorjanRadevski GuillemCollell - Marie-FrancineMoens + Marie-FrancineMoens TinneTuytelaars 4549–4560 We address the problem of multimodal spatial understanding by decoding a set of language-expressed spatial relations to a set of 2D spatial arrangements in a multi-object and multi-relationship setting. We frame the task as arranging a scene of clip-arts given a textual description. We propose a simple and effective model architecture Spatial-Reasoning Bert (SR-Bert), trained to decode text to 2D spatial arrangements in a non-autoregressive manner. SR-Bert can decode both explicit and implicit language to 2D spatial arrangements, generalizes to out-of-sample data to a reasonable extent and can generate complete abstract scenes if paired with a clip-arts predictor. Finally, we qualitatively evaluate our method with a user study, validating that our generated spatial arrangements align with human expectation. @@ -5420,7 +5420,7 @@ ChandraBhagavatula MaxwellForbes RonanLe Bras - Noah A.Smith + Noah A.Smith YejinChoi 4661–4675 Defeasible inference is a mode of reasoning in which an inference (X is a bird, therefore X flies) may be weakened or overturned in light of new evidence (X is a penguin). Though long recognized in classical AI and philosophy, defeasible inference has not been extensively studied in the context of contemporary data-driven research on natural language inference and commonsense reasoning. We introduce Defeasible NLI (abbreviated \delta-NLI), a dataset for defeasible inference in natural language. Defeasible NLI contains extensions to three existing inference datasets covering diverse modes of reasoning: common sense, natural language inference, and social norms. From Defeasible NLI, we develop both a classification and generation task for defeasible inference, and demonstrate that the generation task is much more challenging. Despite lagging human performance, however, generative models trained on this data are capable of writing sentences that weaken or strengthen a specified inference up to 68% of the time. @@ -5445,7 +5445,7 @@ Language-<fixed-case>C</fixed-case>onditioned <fixed-case>F</fixed-case>eature <fixed-case>P</fixed-case>yramids for <fixed-case>V</fixed-case>isual <fixed-case>S</fixed-case>election <fixed-case>T</fixed-case>asks TaichiIki - AkikoAizawa + AkikoAizawa 4687–4697 Referring expression comprehension, which is the ability to locate language to an object in an image, plays an important role in creating common ground. Many models that fuse visual and linguistic features have been proposed. However, few models consider the fusion of linguistic features with multiple visual features with different sizes of receptive fields, though the proper size of the receptive field of visual features intuitively varies depending on expressions. In this paper, we introduce a neural network architecture that modulates visual features with varying sizes of receptive field by linguistic features. We evaluate our architecture on tasks related to referring expression comprehension in two visual dialogue games. The results show the advantages and broad applicability of our architecture. Source code is available at https://github.com/Alab-NII/lcfp . 2020.findings-emnlp.420 @@ -5474,7 +5474,7 @@ JiajiePeng ZengfengHuang WeijianSun - XuanjingHuang + XuanjingHuang 4705–4710 Terms contained in Gene Ontology (GO) have been widely used in biology and bio-medicine. Most previous research focuses on inferring new GO terms, while the term names that reflect the gene function are still named by the experts. To fill this gap, we propose a novel task, namely term name generation for GO, and build a large-scale benchmark dataset. Furthermore, we present a graph-based generative model that incorporates the relations between genes, words and terms for term name generation, which exhibits great advantages over the strong baselines. 2020.findings-emnlp.422 @@ -5527,9 +5527,9 @@ Finding <fixed-case>F</fixed-case>riends and Flipping Frenemies: Automatic Paraphrase Dataset Augmentation Using Graph Theory - HannahChen + HannahChen YangfengJi - DavidEvans + DavidEvans 4741–4751 Most NLP datasets are manually labeled, so suffer from inconsistent labeling or limited size. We propose methods for automatically improving datasets by viewing them as graphs with expected semantic properties. We construct a paraphrase graph from the provided sentence pair labels, and create an augmented dataset by directly inferring labels from the original sentence pairs using a transitivity property. We use structural balance theory to identify likely mislabelings in the graph, and flip their labels. We evaluate our methods on paraphrase models trained using these datasets starting from a pretrained BERT model, and find that the automatically-enhanced training sets result in more accurate models. 2020.findings-emnlp.426 @@ -5555,7 +5555,7 @@ IsabelCachola KyleLo ArmanCohan - DanielWeld + DanielWeld 4766–4777 We introduce TLDR generation, a new form of extreme summarization, for scientific papers. TLDR generation involves high source compression and requires expert background knowledge and understanding of complex domain-specific language. To facilitate study on this task, we introduce SCITLDR, a new multi-target dataset of 5.4K TLDRs over 3.2K papers. SCITLDR contains both author-written and expert-derived TLDRs, where the latter are collected using a novel annotation protocol that produces high-quality summaries while minimizing annotation burden. We propose CATTS, a simple yet effective learning strategy for generating TLDRs that exploits titles as an auxiliary training signal. CATTS improves upon strong baselines under both automated metrics and human evaluations. Data and code are publicly available at https://github.com/allenai/scitldr. 2020.findings-emnlp.428 @@ -5682,7 +5682,7 @@ Bridging Textual and Tabular Data for Cross-Domain Text-to-<fixed-case>SQL</fixed-case> Semantic Parsing - Xi VictoriaLin + Xi VictoriaLin RichardSocher CaimingXiong 4870–4888 @@ -5724,7 +5724,7 @@ LinQiu HaoZhou MingxuanWang - WeinanZhang + WeinanZhang YongYu LeiLi 4908–4917 @@ -5777,7 +5777,7 @@ SatishGolla GokulN.C. AvikBhattacharyya - Mitesh M.Khapra + Mitesh M.Khapra PratyushKumar 4948–4961 In this paper, we introduce NLP resources for 11 major Indian languages from two major language families. These resources include: (a) large-scale sentence-level monolingual corpora, (b) pre-trained word embeddings, (c) pre-trained language models, and (d) multiple NLU evaluation datasets (IndicGLUE benchmark). The monolingual corpora contains a total of 8.8 billion tokens across all 11 languages and Indian English, primarily sourced from news crawls. The word embeddings are based on FastText, hence suitable for handling morphological complexity of Indian languages. The pre-trained language models are based on the compact ALBERT model. Lastly, we compile the (IndicGLUE benchmark for Indian language NLU. To this end, we create datasets for the following tasks: Article Genre Classification, Headline Prediction, Wikipedia Section-Title Prediction, Cloze-style Multiple choice QA, Winograd NLI and COPA. We also include publicly available datasets for some Indic languages for tasks like Named Entity Recognition, Cross-lingual Sentence Retrieval, Paraphrase detection, etc. Our embeddings are competitive or better than existing pre-trained embeddings on multiple tasks. We hope that the availability of the dataset will accelerate Indic NLP research which has the potential to impact more than a billion people. It can also help the community in evaluating advances in NLP over a more diverse pool of languages. The data and models are available at https://indicnlp.ai4bharat.org. @@ -5788,7 +5788,7 @@ Weakly-Supervised Modeling of Contextualized Event Embedding for Discourse Relations I-TaLee - Maria LeonorPacheco + Maria LeonorPacheco DanGoldwasser 4962–4972 Representing, and reasoning over, long narratives requires models that can deal with complex event structures connected through multiple relationship types. This paper suggests to represent this type of information as a narrative graph and learn contextualized event representations over it using a relational graph neural network model. We train our model to capture event relations, derived from the Penn Discourse Tree Bank, on a huge corpus, and show that our multi-relational contextualized event representation can improve performance when learning script knowledge without direct supervision and provide a better representation for the implicit discourse sense classification task. diff --git a/data/xml/2020.finnlp.xml b/data/xml/2020.finnlp.xml index e7e7d8c405..7ae35654a9 100644 --- a/data/xml/2020.finnlp.xml +++ b/data/xml/2020.finnlp.xml @@ -78,7 +78,7 @@ Using Extractive Lexicon-based Sentiment Analysis to Enhance Understanding ofthe Impact of Non-<fixed-case>GAAP</fixed-case> Measures in Financial Reporting StaceyTaylor - VladoKeselj + VladoKeselj 40–46 2020.finnlp-1.7 taylor-keselj-2020-using @@ -123,7 +123,7 @@ AmanKhullar Sarath ChandraPakala VishnuRamesh - ManishShrivastava + ManishShrivastava 75–80 2020.finnlp-1.12 arora-etal-2020-subtl diff --git a/data/xml/2020.fnp.xml b/data/xml/2020.fnp.xml index 3752fcbd16..c75a4fb359 100644 --- a/data/xml/2020.fnp.xml +++ b/data/xml/2020.fnp.xml @@ -26,7 +26,7 @@ The Financial Narrative Summarisation Shared Task (<fixed-case>FNS</fixed-case> 2020) MahmoudEl-Haj - AhmedAbuRa’ed + AhmedAbuRa’ed MarinaLitvak NikiforosPittaras GeorgeGiannakopoulos @@ -39,7 +39,7 @@ The Financial Document Structure Extraction Shared task (<fixed-case>F</fixed-case>in<fixed-case>T</fixed-case>oc 2020) Najah-ImaneBentabet RémiJuge - IsmailEl Maarouf + IsmailEl Maarouf VirginieMouilleron DialektiValsamou-Stanislawski MahmoudEl-Haj @@ -65,7 +65,7 @@ <fixed-case>L</fixed-case>ang<fixed-case>R</fixed-case>esearch<fixed-case>L</fixed-case>ab_<fixed-case>NC</fixed-case> at <fixed-case>F</fixed-case>in<fixed-case>C</fixed-case>ausal 2020, Task 1: A Knowledge Induced Neural Net for Causality Detection RakshaAgarwal IshaanVerma - NiladriChatterjee + NiladriChatterjee 33–39 Identifying causal relationships in a text is essential for achieving comprehensive natural language understanding. The present work proposes a combination of features derived from pre-trained BERT with linguistic features for training a supervised classifier for the task of Causality Detection. The Linguistic features help to inject knowledge about the semantic and syntactic structure of the input sentences. Experiments on the FinCausal Shared Task1 datasets indicate that the combination of Linguistic features with BERT improves overall performance for causality detection. The proposed system achieves a weighted average F1 score of 0.952 on the post-evaluation dataset. 2020.fnp-1.4 @@ -84,7 +84,7 @@ DenisGordeev AdisDavletov AlexeyRey - NikolayArefiev + NikolayArefiev 45–49 In this paper, we describe the results of team LIORI at the FinCausal 2020 Shared task held as a part of the 1st Joint Workshop on Financial Narrative Processing and MultiLingual Financial Summarisation. The shared task consisted of two subtasks: classifying whether a sentence contains any causality and labelling phrases that indicate causes and consequences. Our team ranked 1st in the first subtask and 4th in the second one. We used Transformer-based models with joint-task learning and their ensembles. 2020.fnp-1.6 @@ -114,7 +114,7 @@ <fixed-case>NITK</fixed-case> <fixed-case>NLP</fixed-case> at <fixed-case>F</fixed-case>in<fixed-case>C</fixed-case>ausal-2020 Task 1 Using <fixed-case>BERT</fixed-case> and Linear models. HariharanR L - Anand KumarM + Anand KumarM 60–63 FinCausal-2020 is the shared task which focuses on the causality detection of factual data for financial analysis. The financial data facts don’t provide much explanation on the variability of these data. This paper aims to propose an efficient method to classify the data into one which is having any financial cause or not. Many models were used to classify the data, out of which SVM model gave an F-Score of 0.9435, BERT with specific fine-tuning achieved best results with F-Score of 0.9677. 2020.fnp-1.9 @@ -242,7 +242,7 @@ Knowledge Graph and Deep Neural Network for Extractive Text Summarization by Utilizing Triples AmitVhatkar - PushpakBhattacharyya + PushpakBhattacharyya KaviArya 130–136 In our research work, we represent the content of the sentence in graphical form after extracting triples from the sentences. In this paper, we will discuss novel methods to generate an extractive summary by scoring the triples. Our work has also touched upon sequence-to-sequence encoding of the content of the sentence, to classify it as a summary or a non-summary sentence. Our findings help to decide the nature of the sentences forming the summary and the length of the system generated summary as compared to the length of the reference summary. @@ -271,7 +271,7 @@ <fixed-case>SUMSUM</fixed-case>@<fixed-case>FNS</fixed-case>-2020 Shared Task SiyanZheng AnnelieseLu - ClaireCardie + ClaireCardie 148–152 This paper describes the SUMSUM systems submitted to the Financial Narrative Summarization Shared Task (FNS-2020). We explore a section-based extractive summarization method tailored to the structure of financial reports: our best system parses the report Table of Contents (ToC), splits the report into narrative sections based on the ToC, and applies a BERT-based classifier to each section to determine whether it should be included in the summary. Our best system ranks 4^{\textrm{th}}, 1^{\textrm{st}}, 2^{\textrm{nd}} and 17^{\textrm{th}} on the Rouge-1, Rouge-2, Rouge-SU4, and Rouge-L official metrics, respectively. We also report results on the validation set using an alternative set of Rouge-based metrics that measure performance with respect to the best-matching of the available gold summaries. 2020.fnp-1.25 @@ -345,7 +345,7 @@ Mitigating Silence in Compliance Terminology during Parsing of Utterances - EsmeManandise + EsmeManandise Conradde Peuter 204–212 This paper reports on an approach to increase multi-token-term recall in a parsing task. We use a compliance-domain parser to extract, during the process of parsing raw text, terms that are unlisted in the terminology. The parser uses a similarity measure (Generalized Dice Coefficient) between listed terms and unlisted term candidates to (i) determine term status, (ii) serve putative terms to the parser, (iii) decrease parsing complexity by glomming multi-tokens as lexical singletons, and (iv) automatically augment the terminology after parsing of an utterance completes. We illustrate a small experiment with examples from the tax-and-regulations domain. Bootstrapping the parsing process to detect out- of-vocabulary terms at runtime increases parsing accuracy in addition to producing other benefits to a natural-language-processing pipeline, which translates arithmetic calculations written in English into computer-executable operations. @@ -374,7 +374,7 @@ Extracting Fine-Grained Economic Events from Business News GillesJacobs - VeroniqueHoste + VeroniqueHoste 235–245 Based on a recently developed fine-grained event extraction dataset for the economic domain, we present in a pilot study for supervised economic event extraction. We investigate how a state-of-the-art model for event extraction performs on the trigger and argument identification and classification. While F1-scores of above 50% are obtained on the task of trigger identification, we observe a large gap in performance compared to results on the benchmark ACE05 dataset. We show that single-token triggers do not provide sufficient discriminative information for a fine-grained event detection setup in a closed domain such as economics, since many classes have a large degree of lexico-semantic and contextual overlap. 2020.fnp-1.36 diff --git a/data/xml/2020.framenet.xml b/data/xml/2020.framenet.xml index 62c03c1f96..0221946b99 100644 --- a/data/xml/2020.framenet.xml +++ b/data/xml/2020.framenet.xml @@ -3,11 +3,11 @@ Proceedings of the International FrameNet Workshop 2020: Towards a Global, Multilingual FrameNet - Tiago T.Torrent - Collin F.Baker + Tiago T.Torrent + Collin F.Baker OliverCzulo KyokoOhara - Miriam R. L.Petruck + Miriam R. L.Petruck European Language Resources Association
Marseille, France
May @@ -54,7 +54,7 @@ FredericoBelcavello MarceloViridiano AlexandreDiniz da Costa - Ely Edison da SilvaMatos + Ely Edison da SilvaMatos Tiago TimponiTorrent 23–30 Multimodal aspects of human communication are key in several applications of Natural Language Processing, such as Machine Translation and Natural Language Generation. Despite recent advances in integrating multimodality into Computational Linguistics, the merge between NLP and Computer Vision techniques is still timid, especially when it comes to providing fine-grained accounts for meaning construction. This paper reports on research aiming to determine appropriate methodology and develop a computational tool to annotate multimodal corpora according to a principled structured semantic representation of events, relations and entities: FrameNet. Taking a Brazilian television travel show as corpus, a pilot study was conducted to annotate the frames that are evoked by the audio and the ones that are evoked by visual elements. We also implemented a Multimodal Annotation tool which allows annotators to choose frames and locate frame elements both in the text and in the images, while keeping track of the time span in which those elements are active in each modality. Results suggest that adding a multimodal domain to the linguistic layer of annotation and analysis contributes both to enrich the kind of information that can be tagged in a corpus, and to enhance FrameNet as a model of linguistic cognition. @@ -87,7 +87,7 @@
<fixed-case>G</fixed-case>reek within the Global <fixed-case>F</fixed-case>rame<fixed-case>N</fixed-case>et Initiative: Challenges and Conclusions so far - VoulaGiouli + VoulaGiouli VeraPilitsidou HephaestionChristopoulos 48–55 @@ -98,7 +98,7 @@ Using Verb Frames for Text Difficulty Assessment - JohnLee + JohnLee MeichunLiu TianyuanCai 56–62 @@ -109,11 +109,11 @@ Deriving a <fixed-case>P</fixed-case>rop<fixed-case>B</fixed-case>ank Corpus from Parallel <fixed-case>F</fixed-case>rame<fixed-case>N</fixed-case>et and <fixed-case>UD</fixed-case> Corpora - NormundsGruzitis + NormundsGruzitis RobertsDarģis LauraRituma GuntaNešpore-Bērzkalne - BaibaSaulite + BaibaSaulite 63–69 We propose an approach for generating an accurate and consistent PropBank-annotated corpus, given a FrameNet-annotated corpus which has an underlying dependency annotation layer, namely, a parallel Universal Dependencies (UD) treebank. The PropBank annotation layer of such a multi-layer corpus can be semi-automatically derived from the existing FrameNet and UD annotation layers, by providing a mapping configuration from lexical units in [a non-English language] FrameNet to [English language] PropBank predicates, and a mapping configuration from FrameNet frame elements to PropBank semantic arguments for the given pair of a FrameNet frame and a PropBank predicate. The latter mapping generally depends on the underlying UD syntactic relations. To demonstrate our approach, we use Latvian FrameNet, annotated on top of Latvian UD Treebank, for generating Latvian PropBank in compliance with the Universal Propositions approach. 2020.framenet-1.9 @@ -142,9 +142,9 @@ Building Multilingual Specialized Resources Based on <fixed-case>F</fixed-case>rame<fixed-case>N</fixed-case>et: Application to the Field of the Environment - Marie-ClaudeL’ Homme + Marie-ClaudeL’ Homme BenoîtRobichaud - CarlosSubirats + CarlosSubirats 85–92 The methodology developed within the FrameNet project is being used to compile resources in an increasing number of specialized fields of knowledge. The methodology along with the theoretical principles on which it is based, i.e. Frame Semantics, are especially appealing as they allow domain-specific resources to account for the conceptual background of specialized knowledge and to explain the linguistic properties of terms against this background. This paper presents a methodology for building a multilingual resource that accounts for terms of the environment. After listing some lexical and conceptual differences that need to be managed in such a resource, we explain how the FrameNet methodology is adapted for describing terms in different languages. We first applied our methodology to French and then extended it to English. Extensions to Spanish, Portuguese and Chinese were made more recently. Up to now, we have defined 190 frames: 112 frames are new; 38 are used as such; and 40 are slightly different (a different number of obligatory participants; a significant alternation, etc.) when compared to Berkeley FrameNet. 2020.framenet-1.12 diff --git a/data/xml/2020.gamnlp.xml b/data/xml/2020.gamnlp.xml index 272055ea90..67b0cac4f1 100644 --- a/data/xml/2020.gamnlp.xml +++ b/data/xml/2020.gamnlp.xml @@ -3,7 +3,7 @@ Workshop on Games and Natural Language Processing - Stephanie M.Lukin + Stephanie M.Lukin European Language Resources Association
Marseille, France
May @@ -19,7 +19,7 @@ Creating a Sentiment Lexicon with Game-Specific Words for Analyzing <fixed-case>NPC</fixed-case> Dialogue in The Elder Scrolls <fixed-case>V</fixed-case>: Skyrim ThérèseBergsma Judithvan Stegeren - MariëtTheune + MariëtTheune 1–9 A weak point of rule-based sentiment analysis systems is that the underlying sentiment lexicons are often not adapted to the domain of the text we want to analyze. We created a game-specific sentiment lexicon for video game Skyrim based on the E-ANEW word list and a dataset of Skyrim’s in-game documents. We calculated sentiment ratings for NPC dialogue using both our lexicon and E-ANEW and compared the resulting sentiment ratings to those of human raters. Both lexicons perform comparably well on our evaluation dialogues, but the game-specific extension performs slightly better on the dominance dimension for dialogue segments and the arousal dimension for full dialogues. To our knowledge, this is the first time that a sentiment analysis lexicon has been adapted to the video game domain. 2020.gamnlp-1.1 @@ -125,7 +125,7 @@ RichardBartle JonChamberlain SilviuPaun - MassimoPoesio + MassimoPoesio 79–84 As the uses of Games-With-A-Purpose (GWAPs) broadens, the systems that incorporate its usages have expanded in complexity. The types of annotations required within the NLP paradigm set such an example, where tasks can involve varying complexity of annotations. Assigning more complex tasks to more skilled players through a progression mechanism can achieve higher accuracy in the collected data while acting as a motivating factor that rewards the more skilled players. In this paper, we present the progression technique implemented in Wormingo , an NLP GWAP that currently includes two layers of task complexity. For the experiment, we have implemented four different progression scenarios on 192 players and compared the accuracy and engagement achieved with each scenario. 2020.gamnlp-1.11 diff --git a/data/xml/2020.gebnlp.xml b/data/xml/2020.gebnlp.xml index ba6e2a2935..280a0b3ccf 100644 --- a/data/xml/2020.gebnlp.xml +++ b/data/xml/2020.gebnlp.xml @@ -3,7 +3,7 @@ Proceedings of the Second Workshop on Gender Bias in Natural Language Processing - Marta R.Costa-jussà + Marta R.Costa-jussà ChristianHardmeier WillRadford KellieWebster @@ -60,7 +60,7 @@ MasashiTakeshita YukiKatsumata RafalRzepka - KenjiAraki + KenjiAraki 44–55 It is known that word embeddings exhibit biases inherited from the corpus, and those biases reflect social stereotypes. Recently, many studies have been conducted to analyze and mitigate biases in word embeddings. Unsupervised Bias Enumeration (UBE) (Swinger et al., 2019) is one of approach to analyze biases for English, and Hard Debias (Bolukbasi et al., 2016) is the common technique to mitigate gender bias. These methods focused on English, or, in smaller extent, on Indo-European languages. However, it is not clear whether these methods can be generalized to other languages. In this paper, we apply these analyzing and mitigating methods, UBE and Hard Debias, to Japanese word embeddings. Additionally, we examine whether these methods can be used for Japanese. We experimentally show that UBE and Hard Debias cannot be sufficiently adapted to Japanese embeddings. 2020.gebnlp-1.5 @@ -100,7 +100,7 @@ Investigating Societal Biases in a Poetry Composition System EmilySheng - DavidUthus + DavidUthus 93–106 There is a growing collection of work analyzing and mitigating societal biases in language understanding, generation, and retrieval tasks, though examining biases in creative tasks remains underexplored. Creative language applications are meant for direct interaction with users, so it is important to quantify and mitigate societal biases in these applications. We introduce a novel study on a pipeline to mitigate societal biases when retrieving next verse suggestions in a poetry composition system. Our results suggest that data augmentation through sentiment style transfer has potential for mitigating societal biases. 2020.gebnlp-1.9 @@ -111,7 +111,7 @@ LucyHavens MelissaTerras BenjaminBach - BeatriceAlex + BeatriceAlex 107–124 We propose a bias-aware methodology to engage with power relations in natural language processing (NLP) research. NLP research rarely engages with bias in social contexts, limiting its ability to mitigate bias. While researchers have recommended actions, technical methods, and documentation practices, no methodology exists to integrate critical reflections on bias with technical NLP methods. In this paper, after an extensive and interdisciplinary literature review, we contribute a bias-aware methodology for NLP research. We also contribute a definition of biased text, a discussion of the implications of biased NLP systems, and a case study demonstrating how we are executing the bias-aware methodology in research on archival metadata descriptions. 2020.gebnlp-1.10 @@ -120,7 +120,7 @@ Gender and sentiment, critics and authors: a dataset of <fixed-case>N</fixed-case>orwegian book reviews SamiaTouileb - LiljaØvrelid + LiljaØvrelid ErikVelldal 125–138 Gender bias in models and datasets is widely studied in NLP. The focus has usually been on analysing how females and males express themselves, or how females and males are described. However, a less studied aspect is the combination of these two perspectives, how female and male describe the same or opposite gender. In this paper, we present a new gender annotated sentiment dataset of critics reviewing the works of female and male authors. We investigate if this newly annotated dataset contains differences in how the works of male and female authors are critiqued, in particular in terms of positive and negative sentiment. We also explore the differences in how this is done by male and female critics. We show that there are differences in how critics assess the works of authors of the same or opposite gender. For example, male critics rate crime novels written by females, and romantic and sentimental works written by males, more negatively. diff --git a/data/xml/2020.globalex.xml b/data/xml/2020.globalex.xml index 3d8a8c99b8..8db25a9a9b 100644 --- a/data/xml/2020.globalex.xml +++ b/data/xml/2020.globalex.xml @@ -5,7 +5,7 @@ Proceedings of the 2020 Globalex Workshop on Linked Lexicography IlanKernerman SimonKrek - John P.McCrae + John P.McCrae JorgeGracia SinaAhmadi BesimKabashi @@ -26,7 +26,7 @@ MaximIonov Jessede Does KatrienDepuydt - Anas FahadKhan + Anas FahadKhan SanderStolk ThierryDeclerck John PhilipMcCrae @@ -38,10 +38,10 @@ <fixed-case>S</fixed-case>yn<fixed-case>S</fixed-case>em<fixed-case>C</fixed-case>lass Linked Lexicon: Mapping Synonymy between Languages - ZdenkaUresova - EvaFucikova - EvaHajicova - JanHajic + ZdenkaUresova + EvaFucikova + EvaHajicova + JanHajic 10–19 This paper reports on an extended version of a synonym verb class lexicon, newly called SynSemClass (formerly CzEngClass). This lexicon stores cross-lingual semantically similar verb senses in synonym classes extracted from a richly annotated parallel corpus, the Prague Czech-English Dependency Treebank. When building the lexicon, we make use of predicate-argument relations (valency) and link them to semantic roles; in addition, each entry is linked to several external lexicons of more or less “semantic” nature, namely FrameNet, WordNet, VerbNet, OntoNotes and PropBank, and Czech VALLEX. The aim is to provide a linguistic resource that can be used to compare semantic roles and their syntactic properties and features across languages within and across synonym groups (classes, or ’synsets’), as well as gold standard data for automatic NLP experiments with such synonyms, such as synonym discovery, feature mapping, etc. However, perhaps the most important goal is to eventually build an event type ontology that can be referenced and used as a human-readable and human-understandable “database” for all types of events, processes and states. While the current paper describes primarily the content of the lexicon, we are also presenting a preliminary design of a format compatible with Linked Data, on which we are hoping to get feedback during discussions at the workshop. Once the resource (in whichever form) is applied to corpus annotation, deep analysis will be possible using such combined resources as training data. 2020.globalex-1.2 @@ -80,7 +80,7 @@ Widening the Discussion on “False <fixed-case>F</fixed-case>riends” in Multilingual Wordnets - HugoGonçalo Oliveira + HugoGonçalo Oliveira AnaLuís 36 There are wordnets in many languages, many aligned with Princeton WordNet, some of which in a (semi-)automatic process, but we rarely see actual discussions on the role of false friends in this process. Having in mind known issues related to such words in language translation, and further motivated by false friend-related issues on the alignment of a Portuguese wordnet with Princeton Wordnet, we aim to widen this discussion, while suggesting preliminary ideas of how wordnets could benefit from this kind of research. @@ -100,8 +100,8 @@ Building Sense Representations in <fixed-case>D</fixed-case>anish by Combining Word Embeddings with Lexical Resources IdaRørmann Olsen - BolettePedersen - AsadSayeed + BolettePedersen + AsadSayeed 45–52 Our aim is to identify suitable sense representations for NLP in Danish. We investigate sense inventories that correlate with human interpretations of word meaning and ambiguity as typically described in dictionaries and wordnets and that are well reflected distributionally as expressed in word embeddings. To this end, we study a number of highly ambiguous Danish nouns and examine the effectiveness of sense representations constructed by combining vectors from a distributional model with the information from a wordnet. We establish representations based on centroids obtained from wordnet synests and example sentences as well as representations established via are tested in a word sense disambiguation task. We conclude that the more information extracted from the wordnet entries (example sentence, definition, semantic relations) the more successful the sense representation vector. 2020.globalex-1.8 @@ -140,7 +140,7 @@ <fixed-case>MWSA</fixed-case> Task at <fixed-case>G</fixed-case>loba<fixed-case>L</fixed-case>ex 2020: <fixed-case>RACAI</fixed-case>’s Word Sense Alignment System using a Similarity Measurement of Dictionary Definitions VasilePais - DanTufiș + DanTufiș RaduIon 69–75 This paper describes RACAI’s word sense alignment system, which participated in the Monolingual Word Sense Alignment shared task organized at GlobaLex 2020 workshop. We discuss the system architecture, some of the challenges that we faced as well as present our results on several of the languages available for the task. @@ -173,7 +173,7 @@ <fixed-case>NUIG</fixed-case> at <fixed-case>TIAD</fixed-case>: Combining Unsupervised <fixed-case>NLP</fixed-case> and Graph Metrics for Translation Inference John PhilipMcCrae - MihaelArcan + MihaelArcan 92–97 In this paper, we present the NUIG system at the TIAD shard task. This system includes graph-based metrics calculated using novel algorithms, with an unsupervised document embedding tool called ONETA and an unsupervised multi-way neural machine translation method. The results are an improvement over our previous system and produce the highest precision among all systems in the task as well as very competitive F-Measure results. Incorporating features from other systems should be easy in the framework we describe in this paper, suggesting this could very easily be extended to an even stronger result. 2020.globalex-1.15 diff --git a/data/xml/2020.icon.xml b/data/xml/2020.icon.xml index 003dab2521..8c9cc74590 100644 --- a/data/xml/2020.icon.xml +++ b/data/xml/2020.icon.xml @@ -3,8 +3,8 @@ Proceedings of the 17th International Conference on Natural Language Processing (ICON) - PushpakBhattacharyya - Dipti MisraSharma + PushpakBhattacharyya + Dipti MisraSharma RajeevSangal NLP Association of India (NLPAI)
Indian Institute of Technology Patna, Patna, India
@@ -20,7 +20,7 @@ The <fixed-case>WEAVE</fixed-case> Corpus: Annotating Synthetic Chemical Procedures in Patents with Chemical Named Entities RavindraNittala - ManishShrivastava + ManishShrivastava 1–9 The Modern pharmaceutical industry depends on the iterative design of novel synthetic routes for drugs while not infringing on existing intellectual property rights. Such a design process calls for analyzing many existing synthetic chemical reactions and planning the synthesis of novel chemicals. These procedures have been historically available in unstructured raw text form in publications and patents. To facilitate automated synthetic chemical reactions analysis and design the novel synthetic reactions using Natural Language Processing (NLP) methods, we introduce a Named Entity Recognition (NER) dataset of the Examples section in 180 full-text patent documents with 5188 synthetic procedures annotated by domain experts. All the chemical entities which are part of the synthetic discourse were annotated with suitable class labels. We present the second-largest chemical NER corpus with 100,129 annotations and the highest IAA value of 98.73% (F-measure) on a 45 document subset. We discuss this new resource in detail and highlight some specific challenges in annotating synthetic chemical procedures with chemical named entities. We make the corpus available to the community to promote further research and development of downstream NLP systems applications. We also provide baseline results for the NER model to the community to improve on. 2020.icon-main.1 @@ -38,7 +38,7 @@ Treatment of optional forms in Mathematical modelling of <fixed-case>P</fixed-case>āṇini AnupriyaAggarwal - MalharKulkarni + MalharKulkarni 15–21 Pāṇini in his Aṣṭādhyāyī has written the grammar of Sanskrit in an extremely concise manner in the form of about 4000 sūtras. We have attempted to mathematically remodel the data produced by these sūtras. The mathematical modelling is a way to show that the Pāṇinian approach is a minimal method of capturing the grammatical data for Sanskrit which is a natural language. The sūtras written by Pāṇini can be written as functions, that is for a single input the function produces a single output of the form y=f(x), where x and y is the input and output respectively. However, we observe that for some input dhātus, we get multiple outputs. For such cases, we have written multivalued functions that is the functions which give two or more outputs for a single input. In other words, multivalued function is a way to represent optional output forms which are expressed in Pāṇinian grammar with the help of 3 terms i.e. vā, vibhaṣā, and anyatarasyam. Comparison between the techniques employed by Pāṇini and our notation of functions helps us understand how Pāṇinian techniques ensure brevity and terseness, hence illustrating that Pāṇinian grammar is minimal. 2020.icon-main.3 @@ -47,7 +47,7 @@ Automatic <fixed-case>H</fixed-case>adith Segmentation using <fixed-case>PPM</fixed-case> Compression TaghreedTarmom - EricAtwell + EricAtwell MohammadAlsalka 22–29 In this paper we explore the use of Prediction by partial matching (PPM) compression based to segment Hadith into its two main components (Isnad and Matan). The experiments utilized the PPMD variant of the PPM, showing that PPMD is effective in Hadith segmentation. It was also tested on Hadith corpora of different structures. In the first experiment we used the non- authentic Hadith (NAH) corpus for train- ing models and testing, and in the second experiment we used the NAH corpus for training models and the Leeds University and King Saud University (LK) Hadith cor- pus for testing PPMD segmenter. PPMD of order 7 achieved an accuracy of 92.76% and 90.10% in the first and second experiments, respectively. @@ -78,10 +78,10 @@ <fixed-case>E</fixed-case>nglish to <fixed-case>M</fixed-case>anipuri and Mizo Post-Editing Effort and its Impact on Low Resource Machine Translation LoitongbamSanayai Meetei - Thoudam DorenSingh - SivajiBandyopadhyay + Thoudam DorenSingh + SivajiBandyopadhyay MihaelaVela - Josefvan Genabith + Josefvan Genabith 50–59 We present the first study on the post-editing (PE) effort required to build a parallel dataset for English-Manipuri and English-Mizo, in the context of a project on creating data for machine translation (MT). English source text from a local daily newspaper are machine translated into Manipuri and Mizo using PBSMT systems built in-house. A Computer Assisted Translation (CAT) tool is used to record the time, keystroke and other indicators to measure PE effort in terms of temporal and technical effort. A positive correlation between the technical effort and the number of function words is seen for English-Manipuri and English-Mizo but a negative correlation between the technical effort and the number of noun words for English-Mizo. However, average time spent per token in PE English-Mizo text is negatively correlated with the temporal effort. The main reason for these results are due to (i) English and Mizo using the same script, while Manipuri uses a different script and (ii) the agglutinative nature of Manipuri. Further, we check the impact of training a MT system in an incremental approach, by including the post-edited dataset as additional training data. The result shows an increase in HBLEU of up to 4.6 for English-Manipuri. 2020.icon-main.7 @@ -91,7 +91,7 @@ Learning to Interact: An Adaptive Interaction Framework for Knowledge Graph Embeddings .Chandrahas NileshAgrawal - ParthaTalukdar + ParthaTalukdar 60–69 Knowledge Graph (KG) Embedding methods have been widely studied in the past few years and many methods have been proposed. These methods represent entities and relations in the KG as vectors in a vector space, trained to distinguish correct edges from the incorrect ones. For this distinction, simple functions of vectors’ dimensions, called interactions, are used. These interactions are used to calculate the candidate tail entity vector which is matched against all entities in the KG. However, for most of the existing methods, these interactions are fixed and manually specified. In this work, we propose an automated framework for discovering the interactions while training the KG Embeddings. The proposed method learns relevant interactions along with other parameters during training, allowing it to adapt to different datasets. Many of the existing methods can be seen as special cases of the proposed framework. We demonstrate the effectiveness of the proposed method on link prediction task by extensive experiments on multiple benchmark datasets. 2020.icon-main.8 @@ -102,7 +102,7 @@ .Chandrahas TathagataSengupta CibiPragadeesh - ParthaTalukdar + ParthaTalukdar 70–75 We study the problem of inducing interpretability in Knowledge Graph (KG) embeddings. Learning KG embeddings has been an active area of research in the past few years, resulting in many different models. However, most of these methods do not address the interpretability (semantics) of individual dimensions of the learned embeddings. In this work, we study this problem and propose a method for inducing interpretability in KG embeddings using entity co-occurrence statistics. The proposed method significantly improves the interpretability, while maintaining comparable performance in other KG tasks. 2020.icon-main.9 @@ -111,7 +111,7 @@ Solving Arithmetic Word Problems Using Transformer and Pre-processing of Problem Texts KadenGriffith - JugalKalita + JugalKalita 76–84 This paper outlines the use of Transformer networks trained to translate math word problems to equivalent arithmetic expressions in infix, prefix, and postfix notations. We compare results produced by a large number of neural configurations and find that most configurations outperform previously reported approaches on three of four datasets with significant increases in accuracy of over 20 percentage points. The best neural approaches boost accuracy by 30% on average when compared to the previous state-of-the-art. 2020.icon-main.10 @@ -140,7 +140,7 @@ Abhinav ReddyAppidi Vamshi KrishnaSrirangam DarsiSuhas - ManishShrivastava + ManishShrivastava 101–107 Part-of-Speech (POS) is one of the essential tasks for many Natural Language Processing (NLP) applications. There has been a significant amount of work done in POS tagging for resource-rich languages. POS tagging is an essential phase of text analysis in understanding the semantics and context of language. These tags are useful for higher-level tasks such as building parse trees, which can be used for Named Entity Recognition, Coreference resolution, Sentiment Analysis, and Question Answering. There has been work done on code-mixed social media corpus but not on POS tagging of Kannada-English code-mixed data. Here, we present Kannada-English code- mixed social media corpus annotated with corresponding POS tags. We also experimented with machine learning classification models CRF, Bi-LSTM, and Bi-LSTM-CRF models on our corpus. 2020.icon-main.13 @@ -218,7 +218,7 @@ A New Approach to Claim Check-Worthiness Prediction and Claim Verification ShukritySi AnishaDatta - SudipNaskar + SudipNaskar 155–160 The more we are advancing towards a modern world, the more it opens the path to falsification in every aspect of life. Even in case of knowing the surrounding, common people can not judge the actual scenario as the promises, comments and opinions of the influential people at power keep changing every day. Therefore computationally determining the truthfulness of such claims and comments has a very important societal impact. This paper describes a unique method to extract check-worthy claims from the 2016 US presidential debates and verify the truthfulness of the check-worthy claims. We classify the claims for check-worthiness with our modified Tf-Idf model which is used in background training on fact-checking news articles (NBC News and Washington Post). We check the truthfulness of the claims by using POS, sentiment score and cosine similarity features. 2020.icon-main.20 @@ -227,7 +227,7 @@ Improving Passage Re-Ranking with Word N-Gram Aware Coattention Encoder ChaitanyaAlaparthi - ManishShrivastava + ManishShrivastava 161–169 In text matching applications, coattentions have proved to be highly effective attention mechanisms. Coattention enables the learning to attend based on computing word level affinity scores between two texts. In this paper, we propose two improvements to coattention mechanism in the context of passage ranking (re-ranking). First, we extend the coattention mechanism by applying it across all word n-grams of query and passage. We show that these word n-gram coattentions can capture local context in query and passage to better judge the relevance between them. Second, we further improve the model performance by proposing a query based attention pooling on passage encodings. We evaluate these two methods on MSMARCO passage re-ranking task. The experiment results shows that these two methods resulted in a relative increase of 8.04% in Mean Reciprocal Rank @10 (MRR@10) compared to the naive coattention mechanism. At the time of writing this paper, our methods are the best non transformer model on MS MARCO passage re-ranking task and are competitive to BERT base while only having less than 10% of the parameters. 2020.icon-main.21 @@ -236,7 +236,7 @@ Language Model Metrics and <fixed-case>P</fixed-case>rocrustes Analysis for Improved Vector Transformation of <fixed-case>NLP</fixed-case> Embeddings ThomasConley - JugalKalita + JugalKalita 170–174 Artificial Neural networks are mathematical models at their core. This truism presents some fundamental difficulty when networks are tasked with Natural Language Processing. A key problem lies in measuring the similarity or distance among vectors in NLP embedding space, since the mathematical concept of distance does not always agree with the linguistic concept. We suggest that the best way to measure linguistic distance among vectors is by employing the Language Model (LM) that created them. We introduce Language Model Distance (LMD) for measuring accuracy of vector transformations based on the Distributional Hypothesis ( LMD Accuracy ). We show the efficacy of this metric by applying it to a simple neural network learning the Procrustes algorithm for bilingual word mapping. 2020.icon-main.22 @@ -256,7 +256,7 @@ Automated <fixed-case>A</fixed-case>rabic Essay Evaluation AbeerAlqahtani - AmalAlsaif + AmalAlsaif 181–190 Although the manual evaluation of essays is a time-consuming process, writing essays has a significant role in assessing learning outcomes. Therefore, automated essay evaluation represents a solution, especially for schools, universities, and testing companies. Moreover, the existence of such systems overcomes some factors that influence manual evaluation such as the evaluator’s mental state, the disparity between evaluators, and others. In this paper, we propose an Arabic essay evaluation system based on a support vector regression (SVR) model along with a wide range of features including morphological, syntactic, semantic, and discourse features. The system evaluates essays according to five criteria: spelling, essay structure, coherence level, style, and punctuation marks, without the need for domain-representative essays (a model essay). A specific model is developed for each criterion; thus, the overall evaluation of the essay is a combination of the previous criteria results. We develop our dataset based on essays written by university students and journalists whose native language is Arabic. The dataset is then evaluated by experts. The experimental results show that 96% of our dataset is correctly evaluated in the overall score and the correlation between the system and the experts’ evaluation is 0.87. Additionally, the system shows variant results in evaluating criteria separately. 2020.icon-main.24 @@ -298,7 +298,7 @@ Self-Supervised Claim Identification for Automated Fact Checking ArchitaPathak Mohammad AbuzarShaikh - RohiniSrihari + RohiniSrihari 213–227 We propose a novel, attention-based self-supervised approach to identify “claim-worthy” sentences in a fake news article, an important first step in automated fact-checking. We leverage aboutness of headline and content using attention mechanism for this task. The identified claims can be used for downstream task of claim verification for which we are releasing a benchmark dataset of manually selected compelling articles with veracity labels and associated evidence. This work goes beyond stylistic analysis to identifying content that influences reader belief. Experiments with three datasets show the strength of our model. 2020.icon-main.28 @@ -329,7 +329,7 @@ Parsing <fixed-case>I</fixed-case>ndian <fixed-case>E</fixed-case>nglish News Headlines SamapikaRoy SukhadaSukhada - AnilKumar Singh + AnilKumar Singh 239–242 Parsing news Headlines is one of the difficult tasks of Natural Language Processing. It is mostly because news Headlines (NHs) are not complete grammatical sentences. News editors use all sorts of tricks to grab readers’ attention, for instance, unusual capitalization as in the headline’ Ear SHOT ashok rajagopalan’; some are world knowledge demanding like ‘Church reformation celebrated’ where the ‘Church reformation’ refers to a historical event and not a piece of news about an ordinary church. The lack of transparency in NHs can be linguistic, cultural, social, or contextual. The lack of space provided for a news headline has led to creative liberty. Though many works like news value extraction, summary generation, emotion classification of NHs have been going on, parsing them had been a tough challenge. Linguists have also been interested in NHs for creativity in the language used by bending traditional grammar rules. Researchers have conducted studies on news reportage, discourse analysis of NHs, and many more. While the creativity seen in NHs is fascinating for language researchers, it poses a computational challenge for Natural Language Processing researchers. This paper presents an outline of the ongoing doctoral research on the parsing of Indian English NHs. The ultimate aim of this research is to provide a module that will generate correctly parsed NHs. The intention is to enhance the broad applicability of newspaper corpus for future Natural Language Processing applications. 2020.icon-main.31 @@ -347,7 +347,7 @@ Sentimental Poetry Generation Kasper AalbergRøstvold - BjörnGambäck + BjörnGambäck 246–256 The paper investigates how well poetry can be generated to contain a specific sentiment, and whether readers of the poetry experience the intended sentiment. The poetry generator consists of a bi-directional Long Short-Term Memory (LSTM) model, combined with rhyme pair generation, rule-based word prediction methods, and tree search for extending generation possibilities. The LSTM network was trained on a set of English poetry written and published by users on a public website. Human judges evaluated poems generated by the system, both with a positive and negative sentiment. The results indicate that while there are some weaknesses in the system compared to other state-of-the-art solutions, it is fully capable of generating poetry with an inherent sentiment that is perceived by readers. 2020.icon-main.33 @@ -356,7 +356,7 @@ <fixed-case>WEKA</fixed-case> in Forensic Authorship Analysis: A corpus-based approach of Saudi Authors MashaelAlAmr - EricAtwell + EricAtwell 257–260 This is a pilot study that aims to explore the potential of using WEKA in forensic authorship analysis. It is a corpus-based research using data from Twitter collected from thirteen authors from Riyadh, Saudi Arabia. It examines the performance of unbalanced and balanced data sets using different classifiers and parameters of word grams. The attributes are dialect-specific linguistic features categorized as word grams. The findings further support previous studies in computational authorship identification. 2020.icon-main.34 @@ -365,7 +365,7 @@ Native-Language Identification with Attention StianSteinbakken - BjörnGambäck + BjörnGambäck 261–271 The paper explores how an attention-based approach can increase performance on the task of native-language identification (NLI), i.e., to identify an author’s first language given information expressed in a second language. Previously, Support Vector Machines have consistently outperformed deep learning-based methods on the TOEFL11 data set, the de facto standard for evaluating NLI systems. The attention-based system BERT (Bidirectional Encoder Representations from Transformers) was first tested in isolation on the TOEFL11 data set, then used in a meta-classifier stack in combination with traditional techniques to produce an accuracy of 0.853. However, more labelled NLI data is now available, so BERT was also trained on the much larger Reddit-L2 data set, containing 50 times as many examples as previously used for English NLI, giving an accuracy of 0.902 on the Reddit-L2 in-domain test scenario, improving the state-of-the-art by 21.2 percentage points. 2020.icon-main.35 @@ -517,7 +517,7 @@ Weak Supervision using Linguistic Knowledge for Information Extraction SachinPawar - GirishPalshikar + GirishPalshikar AnkitaJain JyotiBhat SimiJohnson @@ -531,7 +531,7 @@ ParthPatel ManthanMehta PushpakBhattacharya - ArjunAtreya + ArjunAtreya 373–378 In this paper we present a novel transliteration technique based on Orthographic Syllable(OS) segmentation for low-resource Indian languages (ILs). Given that alignment has produced promising results in Statistical Machine Transliteration systems and phonology plays an important role in transliteration, we introduce a new model which uses alignment representation similar to that of IBM model 3 to pre-process the tokenized input sequence and then use pre-trained source and target OS-embeddings for training. We apply our model for transliteration from ILs to English and report our accuracy based on Top-1 Exact Match. We also compare our accuracy with a previously proposed Phrase-Based model and report improvements. 2020.icon-main.51 @@ -578,7 +578,7 @@ A Rule Based Lightweight <fixed-case>B</fixed-case>engali Stemmer SouvickDas RajatPandit - Sudip KumarNaskar + Sudip KumarNaskar 400–408 In the field of Natural Language Processing (NLP) the process of stemming plays a significant role. Stemmer transforms an inflected word to its root form. Stemmer significantly increases the efficiency of Information Retrieval (IR) systems. It is a very basic yet fundamental text pre-processing task widely used in many NLP tasks. Several important works on stemming have been carried out by researchers in English and other major languages. In this paper, we study and review existing works on stemming in Bengali and other Indian languages. Finally, we propose a rule based approach that explores Bengali morphology and leverages WordNet to achieve better accuracy. Our algorithm produced stemming accuracy of 98.86% for Nouns and 99.75% for Verbs. 2020.icon-main.55 @@ -599,7 +599,7 @@ Deep Neural Model for <fixed-case>M</fixed-case>anipuri Multiword Named Entity Recognition with Unsupervised Cluster Feature JimmyLaishram KishorjitNongmeikapam - SudipNaskar + SudipNaskar 420–429 The recognition task of Multi-Word Named Entities (MNEs) in itself is a challenging task when the language is inflectional and agglutinative. Having breakthrough NLP researches with deep neural network and language modelling techniques, the applicability of such techniques/algorithms for Indian language like Manipuri remains unanswered. In this paper an attempt to recognize Manipuri MNE is performed using a Long Short Term Memory (LSTM) recurrent neural network model in conjunction with Part Of Speech (POS) embeddings. To further improve the classification accuracy, word cluster information using K-means clustering approach is added as a feature embedding. The cluster information is generated using a Skip-gram based words vector that contains the semantic and syntactic information of each word. The model so proposed does not use extensive language morphological features to elevate its accuracy. Finally the model’s performance is compared with the other machine learning based Manipuri MNE models. 2020.icon-main.57 @@ -686,7 +686,7 @@ Developing a <fixed-case>F</fixed-case>aroese <fixed-case>P</fixed-case>o<fixed-case>S</fixed-case>-tagging solution using <fixed-case>I</fixed-case>celandic methods HinrikHafsteinsson - Anton KarlIngason + Anton KarlIngason 481–490 We describe the development of a dedicated, high-accuracy part-of-speech (PoS) tagging solution for Faroese, a North Germanic language with about 50,000 speakers. To achieve this, a state-of-the-art neural PoS tagger for Icelandic, ABLTagger, was trained on a 100,000 word PoS-tagged corpus for Faroese, standardised with methods previously applied to Icelandic corpora. This tagger was supplemented with a novel Experimental Database of Faroese Inflection (EDFM), which contains morphological information on 67,488 Faroese words with about one million inflectional forms. This approach produced a PoS-tagging model for Faroese which achieves a 91.40% overall accuracy when evaluated with 10-fold cross validation, which is currently the highest reported accuracy for a dedicated Faroese PoS-tagger. The tagging model, morphological database, proposed revised PoS tagset for Faroese as well as a revised and standardised PoS tagged corpus are all presented as products of this project and are made available for use in further research in Faroese language technology 2020.icon-main.65 @@ -705,12 +705,12 @@ Proceedings of the 17th International Conference on Natural Language Processing (ICON): TechDOfication 2020 Shared Task - Dipti MisraSharma + Dipti MisraSharma AsifEkbal KaruneshArora - Sudip KumarNaskar + Sudip KumarNaskar DipankarGanguly - SobhaL + SobhaL RadhikaMamidi SunitaArora PruthwikMishra @@ -817,12 +817,12 @@ Proceedings of the 17th International Conference on Natural Language Processing (ICON): TermTraction 2020 Shared Task - Dipti MisraSharma + Dipti MisraSharma AsifEkbal KaruneshArora - Sudip KumarNaskar + Sudip KumarNaskar DipankarGanguly - SobhaL + SobhaL RadhikaMamidi SunitaArora PruthwikMishra @@ -871,12 +871,12 @@ Proceedings of the 17th International Conference on Natural Language Processing (ICON): Adap-MT 2020 Shared Task - Dipti MisraSharma + Dipti MisraSharma AsifEkbal KaruneshArora - Sudip KumarNaskar + Sudip KumarNaskar DipankarGanguly - SobhaL + SobhaL RadhikaMamidi SunitaArora PruthwikMishra @@ -894,9 +894,9 @@
<fixed-case>JUNLP</fixed-case>@<fixed-case>ICON</fixed-case>2020: Low Resourced Machine Translation for Indic Languages - SainikMahata + SainikMahata DipankarDas - SivajiBandyopadhyay + SivajiBandyopadhyay 1–5 In the current work, we present the description of the systems submitted to a machine translation shared task organized by ICON 2020: 17th International Conference on Natural Language Processing. The systems were developed to show the capability of general domain machine translation when translating into Indic languages, English-Hindi, in our case. The paper shows the training process and quantifies the performance of two state-of-the-art translation systems, viz., Statistical Machine Translation and Neural Machine Translation. While Statistical Machine Translation systems work better in a low-resource setting, Neural Machine Translation systems are able to generate sentences that are fluent in nature. Since both these systems have contrasting advantages, a hybrid system, incorporating both, was also developed to leverage all the strong points. The submitted systems garnered BLEU scores of 8.701943312, 0.6361336198, and 11.78873307 respectively and the scores of the hybrid system helped us to the fourth spot in the competition leaderboard. 2020.icon-adapmt.1 @@ -1009,7 +1009,7 @@ <fixed-case>U</fixed-case>rdu To <fixed-case>P</fixed-case>unjabi Machine Translation System Umrinder PalSingh VishalGoyal - GurpreetLehal + GurpreetLehal 16–18 Machine Translation is a popular area of NLP research field. There are various approaches to develop a machine translation system like Rule-Based, Statistical, Neural and Hybrid. A rule-Based system is based on grammatical rules and uses bilingual lexicons. Statistical and Neural use the large parallel corpus for training the respective models. Where the Hybrid MT system is a mixture of different approaches. In these days the corpus-based machine translation system is quite popular in NLP research area. But these models demands huge parallel corpus. In this research, we have used a hybrid approach to develop Urdu to Punjabi machine translation system. In the developed system, statistical and various sub-system based on the linguistic rule has been used. The system yield 80% accuracy on a different set of the sentence related to domains like Political, Entertainment, Tourism, Sports and Health. The complete system has been developed in a C#.NET programming language. 2020.icon-demos.6 @@ -1173,7 +1173,7 @@ <fixed-case>E</fixed-case>mp<fixed-case>L</fixed-case>ite: A Lightweight Sequence Labeling Model for Emphasis Selection of Short Texts VibhavAgarwal SouravGhosh - KrantiChalamalasetti + KrantiChalamalasetti BharathChalla SonalKumari Harshavardhana diff --git a/data/xml/2020.ijclclp.xml b/data/xml/2020.ijclclp.xml index 16278b9125..d4ed837f5c 100644 --- a/data/xml/2020.ijclclp.xml +++ b/data/xml/2020.ijclclp.xml @@ -23,7 +23,7 @@ Hai-LunTu Ching-YuYang Chiao-WenLi - Jason S.Chang + Jason S.Chang 2020.ijclclp-1.1 chen-etal-2020-chinese @@ -48,7 +48,7 @@ Linguistic Input and Child Vocalization of 7 Children from 5 to 30 Months: A Longitudinal Study with <fixed-case>LENA</fixed-case> Automatic Analysis Chia-ChengLee - Li-meiChen + Li-meiChen D. KimbroughOller 2020.ijclclp-1.4 lee-etal-2020-linguistic @@ -103,7 +103,7 @@ 改善詞彙對齊以擷取片語翻譯之方法 (Improving Word Alignment for Extraction Phrasal Translation) Yi-JyunChen Ching-Yu HelenYang - Jason S.Chang + Jason S.Chang 2020.ijclclp-2.3 zho chen-etal-2020-gai @@ -133,7 +133,7 @@ 基於深度聲學模型其狀態精確度最大化之強健語音特徵擷取的初步研究 (The Preliminary Study of Robust Speech Feature Extraction based on Maximizing the Accuracy of States in Deep Acoustic Models) Li-ChiaChang - Jeih-weihHung + Jeih-weihHung 2020.ijclclp-2.6 zho chang-hung-2020-ji diff --git a/data/xml/2020.inlg.xml b/data/xml/2020.inlg.xml index 62e6e0b56d..20a6dae2ec 100644 --- a/data/xml/2020.inlg.xml +++ b/data/xml/2020.inlg.xml @@ -5,7 +5,7 @@ Proceedings of the 13th International Conference on Natural Language Generation BrianDavis YvetteGraham - JohnKelleher + JohnKelleher YajiSripada Association for Computational Linguistics
Dublin, Ireland
@@ -88,7 +88,7 @@ Studying the Impact of Filling Information Gaps on the Output Quality of Neural Data-to-Text CraigThomson ZhijieZhao - SomayajuluSripada + SomayajuluSripada 35–40 It is unfair to expect neural data-to-text to produce high quality output when there are gaps between system input data and information contained in the training text. Thomson et al. (2020) identify and narrow information gaps in Rotowire, a popular data-to-text dataset. In this paper, we describe a study which finds that a state-of-the-art neural data-to-text system produces higher quality output, according to the information extraction (IE) based metrics, when additional input data is carefully selected from this newly available source. It remains to be shown, however, whether IE metrics used in this study correlate well with humans in judging text quality. 2020.inlg-1.6 @@ -132,7 +132,7 @@ Chrisvan der Lee ChrisEmmery SanderWubben - EmielKrahmer + EmielKrahmer 68–79 This paper describes the CACAPO dataset, built for training both neural pipeline and end-to-end data-to-text language generation systems. The dataset is multilingual (Dutch and English), and contains almost 10,000 sentences from human-written news texts in the sports, weather, stocks, and incidents domain, together with aligned attribute-value paired data. The dataset is unique in that the linguistic variation and indirect ways of expressing data in these texts reflect the challenges of real world NLG tasks. 2020.inlg-1.10 @@ -153,7 +153,7 @@ <fixed-case>S</fixed-case>imple<fixed-case>NLG</fixed-case>-<fixed-case>TI</fixed-case>: Adapting <fixed-case>S</fixed-case>imple<fixed-case>NLG</fixed-case> to <fixed-case>T</fixed-case>ibetan ZewangKuanzhuo - LiLin + LiLin ZhaoWeina 86–90 Surface realisation is the last but not the least phase of Natural Language Generation, which aims to produce high-quality natural language text based on meaning representations. In this article, we present our work on SimpleNLG-TI, a Tibetan surface realiser, which follows the design paradigm of SimpleNLG-EN. SimpleNLG-TI is built up by our investigation of the core features of Tibetan morphology and syntax. Through this work, we provide a robust and flexible surface realiser for Tibetan generation systems. @@ -186,7 +186,7 @@ André LuizRosa Teixeira JoãoCampos RossanaCunha - ThiagoCastro Ferreira + ThiagoCastro Ferreira AdrianaPagano FabioCozman 103–106 @@ -275,11 +275,11 @@ Twenty Years of Confusion in Human Evaluation: <fixed-case>NLG</fixed-case> Needs Evaluation Sheets and Standardised Definitions - David M.Howcroft - AnyaBelz - Miruna-AdrianaClinciu + David M.Howcroft + AnyaBelz + Miruna-AdrianaClinciu DimitraGkatzia - Sadid A.Hasan + Sadid A.Hasan SaadMahamood SimonMille Emielvan Miltenburg @@ -293,9 +293,9 @@ Disentangling the Properties of Human Evaluation Methods: A Classification System to Support Comparability, Meta-Evaluation and Reproducibility Testing - AnyaBelz + AnyaBelz SimonMille - David M.Howcroft + David M.Howcroft 183–194 Current standards for designing and reporting human evaluations in NLP mean it is generally unclear which evaluations are comparable and can be expected to yield similar results when applied to the same system outputs. This has serious implications for reproducibility testing and meta-evaluation, in particular given that human evaluation is considered the gold standard against which the trustworthiness of automatic metrics is gauged. %and merging others, as well as deciding which evaluations should be able to reproduce each other’s results. Using examples from NLG, we propose a classification system for evaluations based on disentangling (i) what is being evaluated (which aspect of quality), and (ii) how it is evaluated in specific (a) evaluation modes and (b) experimental designs. We show that this approach provides a basis for determining comparability, hence for comparison of evaluations across papers, meta-evaluation experiments, reproducibility testing. 2020.inlg-1.24 @@ -347,7 +347,7 @@ <fixed-case>R</fixed-case>epro<fixed-case>G</fixed-case>en: Proposal for a Shared Task on Reproducibility of Human Evaluations in <fixed-case>NLG</fixed-case> - AnyaBelz + AnyaBelz ShubhamAgarwal AnastasiaShimorina EhudReiter @@ -375,7 +375,7 @@ <fixed-case>BERT</fixed-case>-Based Simplification of <fixed-case>J</fixed-case>apanese Sentence-Ending Predicates in Descriptive Text TaichiKato ReiMiyata - SatoshiSato + SatoshiSato 242–251 Japanese sentence-ending predicates intricately combine content words and functional elements, such as aspect, modality, and honorifics; this can often hinder the understanding of language learners and children. Conventional lexical simplification methods, which replace difficult target words with simpler synonyms acquired from lexical resources in a word-by-word manner, are not always suitable for the simplification of such Japanese predicates. Given this situation, we propose a BERT-based simplification method, the core feature of which is the high ability to substitute the whole predicates with simple ones while maintaining their core meanings in the context by utilizing pre-trained masked language models. Experimental results showed that our proposed methods consistently outperformed the conventional thesaurus-based method by a wide margin. Furthermore, we investigated in detail the effectiveness of the average token embedding and dropout, and the remaining errors of our BERT-based methods. 2020.inlg-1.31 @@ -385,7 +385,7 @@ Amplifying the Range of News Stories with Creativity: Methods and their Evaluation, in <fixed-case>P</fixed-case>ortuguese RuiMendes - HugoGonçalo Oliveira + HugoGonçalo Oliveira 252–262 Headlines are key for attracting people to a story, but writing appealing headlines requires time and talent. This work aims to automate the production of creative short texts (e.g., news headlines) for an input context (e.g., existing headlines), thus amplifying its range. Well-known expressions (e.g., proverbs, movie titles), which typically include word-play and resort to figurative language, are used as a starting point. Given an input text, they can be recommended by exploiting Semantic Textual Similarity (STS) techniques, or adapted towards higher relatedness. For the latter, three methods that exploit static word embeddings are proposed. Experimentation in Portuguese lead to some conclusions, based on human opinions: STS methods that look exclusively at the surface text, recommend more related expressions; resulting expressions are somewhat related to the input, but adaptation leads to higher relatedness and novelty; humour can be an indirect consequence, but most outputs are not funny. 2020.inlg-1.32 @@ -395,7 +395,7 @@ Lessons from Computational Modelling of Reference Production in <fixed-case>M</fixed-case>andarin and <fixed-case>E</fixed-case>nglish GuanyiChen - Keesvan Deemter + Keesvan Deemter 263–272 Referring expression generation (REG) algorithms offer computational models of the production of referring expressions. In earlier work, a corpus of referring expressions (REs) in Mandarin was introduced. In the present paper, we annotate this corpus, evaluate classic REG algorithms on it, and compare the results with earlier results on the evaluation of REG for English referring expressions. Next, we offer an in-depth analysis of the corpus, focusing on issues that arise from the grammar of Mandarin. We discuss shortcomings of previous REG evaluations that came to light during our investigation and we highlight some surprising results. Perhaps most strikingly, we found a much higher proportion of under-specified expressions than previous studies had suggested, not just in Mandarin but in English as well. 2020.inlg-1.33 @@ -423,7 +423,7 @@ AnjaliNarayan-Chen TagyoungChung AnushreeVenkatesh - DilekHakkani-Tur + DilekHakkani-Tur 283–295 Neural network based approaches to data-to-text natural language generation (NLG) have gained popularity in recent years, with the goal of generating a natural language prompt that accurately realizes an input meaning representation. To facilitate the training of neural network models, researchers created large datasets of paired utterances and their meaning representations. However, the creation of such datasets is an arduous task and they mostly consist of simple meaning representations composed of slot and value tokens to be realized. These representations do not include any contextual information that an NLG system can use when trying to generalize, such as domain information and descriptions of slots and values. In this paper, we present the novel task of Schema-Guided Natural Language Generation (SG-NLG). Here, the goal is still to generate a natural language prompt, but in SG-NLG, the input MRs are paired with rich schemata providing contextual information. To generate a dataset for SG-NLG we re-purpose an existing dataset for another task: dialog state tracking, which includes a large and rich schema spanning multiple different attributes, including information about the domain, user intent, and slot descriptions. We train different state-of-the-art models for neural natural language generation on this dataset and show that in many cases, including rich schema information allows our models to produce higher quality outputs both in terms of semantics and diversity. We also conduct experiments comparing model performance on seen versus unseen domains, and present a human evaluation demonstrating high ratings for overall output quality. 2020.inlg-1.35 @@ -445,7 +445,7 @@ AleksandreMaskharashvili AmyIsard XintongLi - MichaelWhite + MichaelWhite 306–315 While classic NLG systems typically made use of hierarchically structured content plans that included discourse relations as central components, more recent neural approaches have mostly mapped simple, flat inputs to texts without representing discourse relations explicitly. In this paper, we investigate whether it is beneficial to include discourse relations in the input to neural data-to-text generators for texts where discourse relations play an important role. To do so, we reimplement the sentence planning and realization components of a classic NLG system, Methodius, using LSTM sequence-to-sequence (seq2seq) models. We find that although seq2seq models can learn to generate fluent and grammatical texts remarkably well with sufficiently representative Methodius training data, they cannot learn to correctly express Methodius’s similarity and contrast comparisons unless the corresponding RST relations are included in the inputs. Additionally, we experiment with using self-training and reverse model reranking to better handle train/test data mismatches, and find that while these methods help reduce content errors, it remains essential to include discourse relations in the input to obtain optimal performance. 2020.inlg-1.37 @@ -456,10 +456,10 @@ From “Before” to “After”: Generating Natural Language Instructions from Image Pairs in a Simple Visual Domain RobinRojowiec - JanaGötze + JanaGötze PhilippSadler HenrikVoigt - SinaZarrieß + SinaZarrieß DavidSchlangen 316–326 While certain types of instructions can be com-pactly expressed via images, there are situations where one might want to verbalise them, for example when directing someone. We investigate the task of Instruction Generation from Before/After Image Pairs which is to derive from images an instruction for effecting the implied change. For this, we make use of prior work on instruction following in a visual environment. We take an existing dataset, the BLOCKS data collected by Bisk et al. (2016) and investigate whether it is suitable for training an instruction generator as well. We find that it is, and investigate several simple baselines, taking these from the related task of image captioning. Through a series of experiments that simplify the task (by making image processing easier or completely side-stepping it; and by creating template-based targeted instructions), we investigate areas for improvement. We find that captioning models get some way towards solving the task, but have some difficulty with it, and future improvements must lie in the way the change is detected in the instruction. @@ -508,7 +508,7 @@ Rapformer: Conditional Rap Lyrics Generation with Denoising Autoencoders - Nikola I.Nikolov + Nikola I.Nikolov EricMalmi CurtisNorthcutt LoretoParisi @@ -549,11 +549,11 @@ Gradations of Error Severity in Automatic Image Descriptions Emielvan Miltenburg Wei-TingLu - EmielKrahmer + EmielKrahmer AlbertGatt GuanyiChen LinLi - Keesvan Deemter + Keesvan Deemter 398–411 Earlier research has shown that evaluation metrics based on textual similarity (e.g., BLEU, CIDEr, Meteor) do not correlate well with human evaluation scores for automatically generated text. We carried out an experiment with Chinese speakers, where we systematically manipulated image descriptions to contain different kinds of errors. Because our manipulated descriptions form minimal pairs with the reference descriptions, we are able to assess the impact of different kinds of errors on the perceived quality of the descriptions. Our results show that different kinds of errors elicit significantly different evaluation scores, even though all erroneous descriptions differ in only one character from the reference descriptions. Evaluation metrics based solely on textual similarity are unable to capture these differences, which (at least partially) explains their poor correlation with human judgments. Our work provides the foundations for future work, where we aim to understand why different errors are seen as more or less severe. 2020.inlg-1.45 @@ -568,7 +568,7 @@ SeokhwanKim YangLiu MihailEric - DilekHakkani-Tur + DilekHakkani-Tur 412–421 Open-domain dialog systems aim to generate relevant, informative and engaging responses. In this paper, we propose using a dialog policy to plan the content and style of target, open domain responses in the form of an action plan, which includes knowledge sentences related to the dialog context, targeted dialog acts, topic information, etc. For training, the attributes within the action plan are obtained by automatically annotating the publicly released Topical-Chat dataset. We condition neural response generators on the action plan which is then realized as target utterances at the turn and sentence levels. We also investigate different dialog policy models to predict an action plan given the dialog context. Through automated and human evaluation, we measure the appropriateness of the generated responses and check if the generation models indeed learn to realize the given action plans. We demonstrate that a basic dialog policy that operates at the sentence level generates better responses in comparison to turn level generation as well as baseline models with no action plan. Additionally the basic dialog policy has the added benefit of controllability. 2020.inlg-1.46 diff --git a/data/xml/2020.insights.xml b/data/xml/2020.insights.xml index 5b5c922b7b..8726f2f0a0 100644 --- a/data/xml/2020.insights.xml +++ b/data/xml/2020.insights.xml @@ -44,7 +44,7 @@ How Far Can We Go with Data Selection? A Case Study on Semantic Sequence Tagging Tasks SamuelLouvan - BernardoMagnini + BernardoMagnini 15–21 Although several works have addressed the role of data selection to improve transfer learning for various NLP tasks, there is no consensus about its real benefits and, more generally, there is a lack of shared practices on how it can be best applied. We propose a systematic approach aimed at evaluating data selection in scenarios of increasing complexity. Specifically, we compare the case in which source and target tasks are the same while source and target domains are different, against the more challenging scenario where both tasks and domains are different. We run a number of experiments on semantic sequence tagging tasks, which are relatively less investigated in data selection, and conclude that data selection has more benefit on the scenario when the tasks are the same, while in case of different (although related) tasks from distant domains, a combination of data selection and multi-task learning is ineffective for most cases. 2020.insights-1.3 @@ -174,7 +174,7 @@ Counterfactually-Augmented <fixed-case>SNLI</fixed-case> Training Data Does Not Yield Better Generalization Than Unaugmented Data WilliamHuang HaokunLiu - Samuel R.Bowman + Samuel R.Bowman 82–87 A growing body of work shows that models exploit annotation artifacts to achieve state-of-the-art performance on standard crowdsourced benchmarks—datasets collected from crowdworkers to create an evaluation task—while still failing on out-of-domain examples for the same task. Recent work has explored the use of counterfactually-augmented data—data built by minimally editing a set of seed examples to yield counterfactual labels—to augment training data associated with these benchmarks and build more robust classifiers that generalize better. However, Khashabi et al. (2020) find that this type of augmentation yields little benefit on reading comprehension tasks when controlling for dataset size and cost of collection. We build upon this work by using English natural language inference data to test model generalization and robustness and find that models trained on a counterfactually-augmented SNLI dataset do not generalize better than unaugmented datasets of similar size and that counterfactual augmentation can hurt performance, yielding models that are less robust to challenge examples. Counterfactual augmentation of natural language understanding data through standard crowdsourcing techniques does not appear to be an effective way of collecting training data and further innovation is required to make this general line of work viable. 2020.insights-1.13 diff --git a/data/xml/2020.intellang.xml b/data/xml/2020.intellang.xml index 842ef3bca1..fdd36567b1 100644 --- a/data/xml/2020.intellang.xml +++ b/data/xml/2020.intellang.xml @@ -3,7 +3,7 @@ Proceedings of the Workshop on Intelligent Information Processing and Natural Language Generation - DanielSánchez + DanielSánchez RaquelHervás AlbertGatt Association for Computational Lingustics @@ -51,7 +51,7 @@ <fixed-case>S</fixed-case>port<fixed-case>S</fixed-case>ett:Basketball - A robust and maintainable data-set for Natural Language Generation CraigThomson EhudReiter - SomayajuluSripada + SomayajuluSripada 32–40 2020.intellang-1.4 thomson-etal-2020-sportsett @@ -89,7 +89,7 @@ Fuzzy Logic for Vagueness Management in Referring Expression Generation - NicolásMarín + NicolásMarín GustavoRivas-Gervilla DanielSánchez 71–76 diff --git a/data/xml/2020.intexsempar.xml b/data/xml/2020.intexsempar.xml index 3dc9bfb634..3058ac9784 100644 --- a/data/xml/2020.intexsempar.xml +++ b/data/xml/2020.intexsempar.xml @@ -5,8 +5,8 @@ Proceedings of the First Workshop on Interactive and Executable Semantic Parsing BenBogin SrinivasanIyer - Xi VictoriaLin - DragomirRadev + Xi VictoriaLin + DragomirRadev AlaneSuhr Panupong CaimingXiong diff --git a/data/xml/2020.isa.xml b/data/xml/2020.isa.xml index ae789a2815..fb32cc8755 100644 --- a/data/xml/2020.isa.xml +++ b/data/xml/2020.isa.xml @@ -3,7 +3,7 @@ Proceedings of the 16th Joint ACL-ISO Workshop on Interoperable Semantic Annotation - HarryBunt + HarryBunt European Language Resources Association
Marseille
May @@ -26,11 +26,11 @@
<fixed-case>H</fixed-case>indi <fixed-case>T</fixed-case>ime<fixed-case>B</fixed-case>ank: An <fixed-case>ISO</fixed-case>-<fixed-case>T</fixed-case>ime<fixed-case>ML</fixed-case> Annotated Reference Corpus - PranavGoel + PranavGoel SuhanPrabhu AlokDebnath PriyankModi - ManishShrivastava + ManishShrivastava 13–21 ISO-TimeML is an international standard for multilingual event annotation, detection, categorization and linking. In this paper, we present the Hindi TimeBank, an ISO-TimeML annotated reference corpus for the detection and classification of events, states and time expressions, and the links between them. Based on contemporary developments in Hindi event recognition, we propose language independent and language-specific deviations from the ISO-TimeML guidelines, but preserve the schema. These deviations include the inclusion of annotator confidence, and an independent mechanism of identifying and annotating states such as copulars and existentials) With this paper, we present an open-source corpus, the Hindi TimeBank. The Hindi TimeBank is a 1,000 article dataset, with over 25,000 events, 3,500 states and 2,000 time expressions. We analyze the dataset in detail and provide a class-wise distribution of events, states and time expressions. Our guidelines and dataset are backed by high average inter-annotator agreement scores. 2020.isa-1.2 @@ -81,7 +81,7 @@ A Consolidated Dataset for Knowledge-based Question Generation using Predicate Mapping of Linked Data JohannaMelly GabrielLuthier - AndreiPopescu-Belis + AndreiPopescu-Belis 59–66 In this paper, we present the ForwardQuestions data set, made of human-generated questions related to knowledge triples. This data set results from the conversion and merger of the existing SimpleDBPediaQA and SimpleQuestionsWikidata data sets, including the mapping of predicates from DBPedia to Wikidata, and the selection of ‘forward’ questions as opposed to ‘backward’ ones. The new data set can be used to generate novel questions given an unseen Wikidata triple, by replacing the subjects of existing questions with the new one and then selecting the best candidate questions using semantic and syntactic criteria. Evaluation results indicate that the question generation method using ForwardQuestions improves the quality of questions by about 20% with respect to a baseline not using ranking criteria. 2020.isa-1.7 @@ -114,7 +114,7 @@ UjwalNarayan AlokDebnath SumukhS - ManishShrivastava + ManishShrivastava 88–93 In this paper, we provide the basic guidelines towards the detection and linguistic analysis of events in Kannada. Kannada is a morphologically rich, resource poor Dravidian language spoken in southern India. As most information retrieval and extraction tasks are resource intensive, very little work has been done on Kannada NLP, with almost no efforts in discourse analysis and dataset creation for representing events or other semantic annotations in the text. In this paper, we linguistically analyze what constitutes an event in this language, the challenges faced with discourse level annotation and representation due to the rich derivational morphology of the language that allows free word order, numerous multi-word expressions, adverbial participle constructions and constraints on subject-verb relations. Therefore, this paper is one of the first attempts at a large scale discourse level annotation for Kannada, which can be used for semantic annotation and corpus development for other tasks in the language. 2020.isa-1.10 diff --git a/data/xml/2020.iwclul.xml b/data/xml/2020.iwclul.xml index c165a6d1ce..02ba3cd064 100644 --- a/data/xml/2020.iwclul.xml +++ b/data/xml/2020.iwclul.xml @@ -3,8 +3,8 @@ Proceedings of the Sixth International Workshop on Computational Linguistics of Uralic Languages - Tommi APirinen - Francis M.Tyers + Tommi APirinen + Francis M.Tyers MichaelRießler Association for Computational Linguistics
Wien, Austria
diff --git a/data/xml/2020.iwdp.xml b/data/xml/2020.iwdp.xml index 276d0683b8..354054aff5 100644 --- a/data/xml/2020.iwdp.xml +++ b/data/xml/2020.iwdp.xml @@ -4,7 +4,7 @@ Proceedings of the Second International Workshop of Discourse Processing QunLiu - DeyiXiong + DeyiXiong ShiliGe XiaojunZhang Association for Computational Linguistics @@ -65,7 +65,7 @@ KaiyuHuang JunpengLiu JingxiangCao - DegenHuang + DegenHuang 22–28 Previous neural approaches achieve significant progress for Chinese word segmentation (CWS) as a sentence-level task, but it suffers from limitations on real-world scenario. In this paper, we address this issue with a context-aware method and optimize the solution at document-level. This paper proposes a three-step strategy to improve the performance for discourse CWS. First, the method utilizes an auxiliary segmenter to remedy the limitation on pre-segmenter. Then the context-aware algorithm computes the confidence of each split. The maximum probability path is reconstructed via this algorithm. Besides, in order to evaluate the performance in discourse, we build a new benchmark consisting of the latest news and Chinese medical articles. Extensive experiments on this benchmark show that our proposed method achieves a competitive performance on a document-level real-world scenario for CWS. 2020.iwdp-1.5 @@ -99,7 +99,7 @@
Bridging Question Answering and Discourse The case of Multi-Sentence Questions - BonnieWebber + BonnieWebber 48 In human question-answering (QA), questions are often expressed in the form of multiple sentences. One can see this in both spoken QA interactions, when one person asks a question of another, and written QA, such as are found on-line in FAQs and in what are called ”Community Question-Answering Forums”. Computer-based QA has taken the challenge of these ”multi-sentence questions” to be that of breaking them into an appropriately ordered sequence of separate questions, with both the previous questions and their answers serving as context for the next question. This can be seen, for example, in two recent workshops at AAAI called ”Reasoning for Complex QA” [https://rcqa-ws.github.io/program/]. We claim that, while appropriate for some types of ”multi-sentence questions” (MSQs), it is not appropriate for all, because they are essentially different types of discourse. To support this claim, we need to provide evidence that: • different types of MSQs are answered differently in written or spoken QA between people; • people can (and do) distinguish these different types of MSQs; • systems can be made to both distinguish different types of MSQs and provide appropriate answers. 2020.iwdp-1.8 diff --git a/data/xml/2020.iwltp.xml b/data/xml/2020.iwltp.xml index 256c7b7d67..6ca9fc54ba 100644 --- a/data/xml/2020.iwltp.xml +++ b/data/xml/2020.iwltp.xml @@ -4,11 +4,11 @@ Proceedings of the 1st International Workshop on Language Technology Platforms GeorgRehm - KalinaBontcheva - KhalidChoukri - JanHajič - SteliosPiperidis - AndrejsVasiļjevs + KalinaBontcheva + KhalidChoukri + JanHajič + SteliosPiperidis + AndrejsVasiļjevs European Language Resources Association
Marseille, France
May @@ -22,11 +22,11 @@ Infrastructure for the Science and Technology of Language <fixed-case>PORTULAN</fixed-case> <fixed-case>CLARIN</fixed-case> - AntónioBranco + AntónioBranco AmáliaMendes PauloQuaresma LuísGomes - JoãoSilva + JoãoSilva AndreaTeixeira 1–7 This paper presents the PORTULAN CLARIN Research Infrastructure for the Science and Technology of Language, which is part of the European research infrastructure CLARIN ERIC as its Portuguese national node, and belongs to the Portuguese National Roadmap of Research Infrastructures of Strategic Relevance. It encompasses a repository, where resources and metadata are deposited for long-term archiving and access, and a workbench, where Language Technology tools and applications are made available through different modes of interaction, among many other services. It is an asset of utmost importance for the technological development of natural languages and for their preparation for the digital age, contributing to ensure the citizenship of their speakers in the information society. @@ -40,7 +40,7 @@ BettinaKlimek ChristianFäth ThierryDeclerck - John PhilipMcCrae + John PhilipMcCrae 8–15 In this paper we describe the current state of development of the Linguistic Linked Open Data (LLOD) infrastructure, an LOD(sub-)cloud of linguistic resources, which covers various linguistic data bases, lexicons, corpora, terminology and metadata repositories. We give in some details an overview of the contributions made by the European H2020 projects “Prêt-à-LLOD” (‘Ready-to-useMultilingual Linked Language Data for Knowledge Services across Sectors’) and “ELEXIS” (‘European Lexicographic Infrastructure’) to the further development of the LLOD. 2020.iwltp-1.2 @@ -66,10 +66,10 @@ IonuțPistol ȘerbanBoghiu Anca-DianaBibiri - DanielaGîfu + DanielaGîfu AndreiScutelnicu - MihaelaOnofrei - DianaTrandabăț + MihaelaOnofrei + DianaTrandabăț GeorgeBugeag 22–27 This paper describes the on-going work carried out within the CoBiLiRo (Bimodal Corpus for Romanian Language) research project, part of ReTeRom (Resources and Technologies for Developing Human-Machine Interfaces in Romanian). Data annotation finds increasing use in speech recognition and synthesis with the goal to support learning processes. In this context, a variety of different annotation systems for application to Speech and Text Processing environments have been presented. Even if many designs for the data annotations workflow have emerged, the process of handling metadata, to manage complex user-defined annotations, is not covered enough. We propose a design of the format aimed to serve as an annotation standard for bimodal resources, which facilitates searching, editing and statistical analysis operations over it. The design and implementation of an infrastructure that houses the resources are also presented. The goal is widening the dissemination of bimodal corpora for research valorisation and use in applications. Also, this study reports on the main operations of the web Platform which hosts the corpus and the automatic conversion flows that brings the submitted files at the format accepted by the Platform. @@ -83,12 +83,12 @@ Franciskade Jong AlexanderKönig DarjaFišer - DieterVan Uytvanck + DieterVan Uytvanck TeroAalto LarsBorin OlgaGerassimenko JanHajic - Henkvan den Heuvel + Henkvan den Heuvel NeemeKahusk KristaLiin MartinMatthiesen @@ -105,7 +105,7 @@ ThierryEtchegoyhen BorjaAnza Porras AndoniAzpeitia - EvaMartínez Garcia + EvaMartínez Garcia José LuisFonseca PatriciaFonseca PauloVale @@ -121,7 +121,7 @@ RuiNeto MaiteMelero DavidPerez - AntónioBranco + AntónioBranco RubenBranco LuísGomes 35–43 @@ -137,18 +137,18 @@ IvanSimonini ArminSchweinfurth AdelheidGlott - SebastianStüker + SebastianStüker Thai-SonNguyen FelixSchneider Thanh-LeHa - AlexWaibel + AlexWaibel BarryHaddow PhilipWilliams RicoSennrich - OndřejBojar + OndřejBojar SangeetSagar DominikMacháček - OtakarSmrž + OtakarSmrž 44–49 This paper presents our progress towards deploying a versatile communication platform in the task of highly multilingual live speech translation for conferences and remote meetings live subtitling. The platform has been designed with a focus on very low latency and high flexibility while allowing research prototypes of speech and text processing tools to be easily connected, regardless of where they physically run. We outline our architecture solution and also briefly compare it with the ELG platform. Technical details are provided on the most important components and we summarize the test deployment events we ran so far. 2020.iwltp-1.7 @@ -157,7 +157,7 @@ <fixed-case>E</fixed-case>co.pangeamt: Industrializing Neural <fixed-case>MT</fixed-case> - MercedesGarcía-Martínez + MercedesGarcía-Martínez ManuelHerranz AmandoEstela ÁngelaFranco @@ -180,7 +180,7 @@ Towards Standardization of Web Service Protocols for <fixed-case>NLP</fixed-case>aa<fixed-case>S</fixed-case> Jin-DongKim - NancyIde + NancyIde KeithSuderman 59–65 Several web services for various natural language processing (NLP) tasks (‘‘NLP-as-a-service” or NLPaaS) have recently been made publicly available. However, despite their similar functionality these services often differ in the protocols they use, thus complicating the development of clients accessing them. A survey of currently available NLPaaS services suggests that it may be possible to identify a minimal application layer protocol that can be shared by NLPaaS services without sacrificing functionality or convenience, while at the same time simplifying the development of clients for these services. In this paper, we hope to raise awareness of the interoperability problems caused by the variety of existing web service protocols, and describe an effort to identify a set of best practices for NLPaaS protocol design. To that end, we survey and compare protocols used by NLPaaS services and suggest how these protocols may be further aligned to reduce variation. @@ -203,7 +203,7 @@ A Workflow Manager for Complex <fixed-case>NLP</fixed-case> and Content Curation Workflows - JulianMoreno-Schneider + JulianMoreno-Schneider PeterBourgonje FlorianKintzel GeorgRehm @@ -217,7 +217,7 @@ A Processing Platform Relating Data and Tools for <fixed-case>R</fixed-case>omanian Language VasilePăiș RaduIon - DanTufiș + DanTufiș 81–88 This paper presents RELATE (http://relate.racai.ro), a high-performance natural language platform designed for Romanian language. It is meant both for demonstration of available services, from text-span annotations to syntactic dependency trees as well as playing or automatically synthesizing Romanian words, and for the development of new annotated corpora. It also incorporates the search engines for the large COROLA reference corpus of contemporary Romanian and the Romanian wordnet. It integrates multiple text and speech processing modules and exposes their functionality through a web interface designed for the linguist researcher. It makes use of a scheduler-runner architecture, allowing processing to be distributed across multiple computing nodes. A series of input/output converters allows large corpora to be loaded, processed and exported according to user preferences. 2020.iwltp-1.13 @@ -241,22 +241,22 @@ Towards an Interoperable Ecosystem of <fixed-case>AI</fixed-case> and <fixed-case>LT</fixed-case> Platforms: A Roadmap for the Implementation of Different Levels of Interoperability GeorgRehm - DimitrisGalanis - PennyLabropoulou + DimitrisGalanis + PennyLabropoulou SteliosPiperidis MartinWelß RicardoUsbeck - JoachimKöhler + JoachimKöhler MiltosDeligiannis KaterinaGkirtzou JohannesFischer ChristianChiarcos NilsFeldhus - JulianMoreno-Schneider + JulianMoreno-Schneider FlorianKintzel ElenaMontiel VíctorRodríguez Doncel - John PhilipMcCrae + John PhilipMcCrae DavidLaqua Irina PatriciaTheile ChristianDittmar diff --git a/data/xml/2020.iwpt.xml b/data/xml/2020.iwpt.xml index 1929816bde..e3f9f3709b 100644 --- a/data/xml/2020.iwpt.xml +++ b/data/xml/2020.iwpt.xml @@ -4,14 +4,14 @@ Proceedings of the 16th International Conference on Parsing Technologies and the IWPT 2020 Shared Task on Parsing into Enhanced Universal Dependencies GosseBouma - YujiMatsumoto + YujiMatsumoto StephanOepen KenjiSagae - DjaméSeddah - WeiweiSun - AndersSøgaard + DjaméSeddah + WeiweiSun + AndersSøgaard ReutTsarfaty - DanZeman + DanZeman Association for Computational Linguistics
Online
July @@ -58,7 +58,7 @@
Integrating Graph-Based and Transition-Based Dependency Parsers in the Deep Contextualized Era - AgnieszkaFalenska + AgnieszkaFalenska AndersBjörkelund JonasKuhn 25–39 @@ -95,7 +95,7 @@ ZhifengHu SerhiiHavrylov IvanTitov - Shay B.Cohen + Shay B.Cohen 62–72 The goal of homomorphic encryption is to encrypt data such that another party can operate on it without being explicitly exposed to the content of the original data. We introduce an idea for a privacy-preserving transformation on natural language data, inspired by homomorphic encryption. Our primary tool is obfuscation, relying on the properties of natural language. Specifically, a given English text is obfuscated using a neural model that aims to preserve the syntactic relationships of the original sentence so that the obfuscated sentence can be parsed instead of the original one. The model works at the word level, and learns to obfuscate each word separately by changing it into a new word that has a similar syntactic role. The text obfuscated by our model leads to better performance on three syntactic parsers (two dependency and one constituency parsers) in comparison to an upper-bound random substitution baseline. More specifically, the results demonstrate that as more terms are obfuscated (by their part of speech), the substitution upper bound significantly degrades, while the neural model maintains a relatively high performing parser. All of this is done without much sacrifice of privacy compared to the random substitution upper bound. We also further analyze the results, and discover that the substituted words have similar syntactic properties, but different semantic content, compared to the original words. 2020.iwpt-1.7 @@ -107,7 +107,7 @@ Tensors over Semirings for Latent-Variable Weighted Logic Programs EsmaBalkir DanielGildea - Shay B.Cohen + Shay B.Cohen 73–90 Semiring parsing is an elegant framework for describing parsers by using semiring weighted logic programs. In this paper we present a generalization of this concept: latent-variable semiring parsing. With our framework, any semiring weighted logic program can be latentified by transforming weights from scalar values of a semiring to rank-n arrays, or tensors, of semiring values, allowing the modelling of latent-variable models within the semiring parsing framework. Semiring is too strong a notion when dealing with tensors, and we have to resort to a weaker structure: a partial semiring. We prove that this generalization preserves all the desired properties of the original semiring framework while strictly increasing its expressiveness. 2020.iwpt-1.8 @@ -140,8 +140,8 @@ Self-Training for Unsupervised Parsing with <fixed-case>PRPN</fixed-case> AnhadMohananey - KatharinaKann - Samuel R.Bowman + KatharinaKann + Samuel R.Bowman 105–110 Neural unsupervised parsing (UP) models learn to parse without access to syntactic annotations, while being optimized for another task like language modeling. In this work, we propose self-training for neural UP models: we leverage aggregated annotations predicted by copies of our model as supervision for future copies. To be able to use our model’s predictions during training, we extend a recent neural UP architecture, the PRPN (Shen et al., 2018a), such that it can be trained in a semi-supervised fashion. We then add examples with parses predicted by our model to our unlabeled UP training data. Our self-trained model outperforms the PRPN by 8.1% F1 and the previous state of the art by 1.6% F1. In addition, we show that our architecture can also be helpful for semi-supervised parsing in ultra-low-resource settings. 2020.iwpt-1.11 @@ -152,7 +152,7 @@ Span-Based <fixed-case>LCFRS</fixed-case>-2 Parsing MilošStanojević - MarkSteedman + MarkSteedman 111–121 The earliest models for discontinuous constituency parsers used mildly context-sensitive grammars, but the fashion has changed in recent years to grammar-less transition-based parsers that use strong neural probabilistic models to greedily predict transitions. We argue that grammar-based approaches still have something to contribute on top of what is offered by transition-based parsers. Concretely, by using a grammar formalism to restrict the space of possible trees we can use dynamic programming parsing algorithms for exact search for the most probable tree. Previous chart-based parsers for discontinuous formalisms used probabilistically weak generative models. We instead use a span-based discriminative neural model that preserves the dynamic programming properties of the chart parsers. Our parser does not use an explicit grammar, but it does use explicit grammar formalism constraints: we generate only trees that are within the LCFRS-2 formalism. These properties allow us to construct a new parsing algorithm that runs in lower worst-case time complexity of O(l nˆ4 +nˆ6), where n is the sentence length and l is the number of unique non-terminal labels. This parser is efficient in practice, provides best results among chart-based parsers, and is competitive with the best transition based parsers. We also show that the main bottleneck for further improvement in performance is in the restriction of fan-out to degree 2. We show that well-nestedness is helpful in speeding up parsing, but lowers accuracy. 2020.iwpt-1.12 @@ -167,7 +167,7 @@ HyonsuChoe SeokwonPark HanHe - Jinho D.Choi + Jinho D.Choi Na-RaeHan Jena D.Hwang HansaemKim @@ -237,7 +237,7 @@ Adaptation of Multilingual Transformer Encoder for Robust Enhanced <fixed-case>U</fixed-case>niversal <fixed-case>D</fixed-case>ependency Parsing HanHe - Jinho D.Choi + Jinho D.Choi 181–191 This paper presents our enhanced dependency parsing approach using transformer encoders, coupled with a simple yet powerful ensemble algorithm that takes advantage of both tree and graph dependency parsing. Two types of transformer encoders are compared, a multilingual encoder and language-specific encoders. Our dependency tree parsing (DTP) approach generates only primary dependencies to form trees whereas our dependency graph parsing (DGP) approach handles both primary and secondary dependencies to form graphs. Since DGP does not guarantee the generated graphs are acyclic, the ensemble algorithm is designed to add secondary arcs predicted by DGP to primary arcs predicted by DTP. Our results show that models using the multilingual encoder outperform ones using the language specific encoders for most languages. The ensemble models generally show higher labeled attachment score on enhanced dependencies (ELAS) than the DTP and DGP models. As the result, our best models rank the third place on the macro-average ELAS over 17 languages. 2020.iwpt-1.19 diff --git a/data/xml/2020.iwslt.xml b/data/xml/2020.iwslt.xml index 5e71039de8..05d083690b 100644 --- a/data/xml/2020.iwslt.xml +++ b/data/xml/2020.iwslt.xml @@ -4,14 +4,14 @@ Proceedings of the 17th International Conference on Spoken Language Translation MarcelloFederico - AlexWaibel + AlexWaibel KevinKnight SatoshiNakamura - HermannNey + HermannNey JanNiehues - SebastianStüker + SebastianStüker DekaiWu - JosephMariani + JosephMariani FrancoisYvon Association for Computational Linguistics
Online
@@ -29,7 +29,7 @@ EbrahimAnsari AmittaiAxelrod NguyenBach - OndřejBojar + OndřejBojar RoldanoCattoni FahimDalvi NadirDurrani @@ -40,7 +40,7 @@ KevinKnight XutaiMa AjayNagesh - MatteoNegri + MatteoNegri JanNiehues JuanPino ElizabethSalesky @@ -66,7 +66,7 @@ AntoineCaubrière BenjaminLecouteux YannickEstève - LaurentBesacier + LaurentBesacier 35–43 This paper describes the ON-TRAC Consortium translation systems developed for two challenge tracks featured in the Evaluation Campaign of IWSLT 2020, offline speech translation and simultaneous speech translation. ON-TRAC Consortium is composed of researchers from three French academic laboratories: LIA (Avignon Université), LIG (Université Grenoble Alpes), and LIUM (Le Mans Université). Attention-based encoder-decoder models, trained end-to-end, were used for our submissions to the offline speech translation track. Our contributions focused on data augmentation and ensembling of multiple models. In the simultaneous speech translation track, we build on Transformer-based wait-k models for the text-to-text subtask. For speech-to-text simultaneous translation, we attach a wait-k MT system to a hybrid ASR system. We propose an algorithm to control the latency of the ASR+MT cascade and achieve a good latency-quality trade-off on both subtasks. 2020.iwslt-1.2 @@ -92,7 +92,7 @@
<fixed-case>KIT</fixed-case>’s <fixed-case>IWSLT</fixed-case> 2020 <fixed-case>SLT</fixed-case> Translation System - Ngoc-QuanPham + Ngoc-QuanPham FelixSchneider Tuan-NamNguyen Thanh-LeHa @@ -109,9 +109,9 @@ End-to-End Simultaneous Translation System for <fixed-case>IWSLT</fixed-case>2020 Using Modality Agnostic Meta-Learning - Hou JeungHan + Hou JeungHan Mohd AbbasZaidi - Sathish ReddyIndurthi + Sathish ReddyIndurthi Nikhil KumarLakumarapu BeomseokLee SanghaKim @@ -138,8 +138,8 @@ End-to-End Offline Speech Translation System for <fixed-case>IWSLT</fixed-case> 2020 using Modality Agnostic Meta-Learning Nikhil KumarLakumarapu BeomseokLee - Sathish ReddyIndurthi - Hou JeungHan + Sathish ReddyIndurthi + Hou JeungHan Mohd AbbasZaidi SanghaKim 73–79 @@ -152,8 +152,8 @@ End-to-End Speech-Translation with Knowledge Distillation: <fixed-case>FBK</fixed-case>@<fixed-case>IWSLT</fixed-case>2020 MarcoGaido - Mattia A.Di Gangi - MatteoNegri + Mattia A.Di Gangi + MatteoNegri MarcoTurchi 80–88 This paper describes FBK’s participation in the IWSLT 2020 offline speech translation (ST) task. The task evaluates systems’ ability to translate English TED talks audio into German texts. The test talks are provided in two versions: one contains the data already segmented with automatic tools and the other is the raw data without any segmentation. Participants can decide whether to work on custom segmentation or not. We used the provided segmentation. Our system is an end-to-end model based on an adaptation of the Transformer for speech data. Its training process is the main focus of this paper and it is based on: i) transfer learning (ASR pretraining and knowledge distillation), ii) data augmentation (SpecAugment, time stretch and synthetic data), iii)combining synthetic and real data marked as different domains, and iv) multi-task learning using the CTC loss. Finally, after the training with word-level knowledge distillation is complete, our ST models are fine-tuned using label smoothed cross entropy. Our best model scored 29 BLEU on the MuST-CEn-De test set, which is an excellent result compared to recent papers, and 23.7 BLEU on the same data segmented with VAD, showing the need for researching solutions addressing this specific data condition. @@ -177,7 +177,7 @@ RaúlVázquez MikkoAulamo UmutSulubacak - JörgTiedemann + JörgTiedemann 95–102 This paper describes the University of Helsinki Language Technology group’s participation in the IWSLT 2020 offline speech translation task, addressing the translation of English audio into German text. In line with this year’s task objective, we train both cascade and end-to-end systems for spoken language translation. We opt for an end-to-end multitasking architecture with shared internal representations and a cascade approach that follows a standard procedure consisting of ASR, correction, and MT stages. We also describe the experiments that served as a basis for the submitted systems. Our experiments reveal that multitasking training with shared internal representations is not only possible but allows for knowledge-transfer across modalities. 2020.iwslt-1.10 @@ -189,7 +189,7 @@ The <fixed-case>AFRL</fixed-case> <fixed-case>IWSLT</fixed-case> 2020 Systems: Work-From-Home Edition BrianOre EricHansen - TimAnderson + TimAnderson JeremyGwinnup 103–108 This report summarizes the Air Force Research Laboratory (AFRL) submission to the offline spoken language translation (SLT) task as part of the IWSLT 2020 evaluation campaign. As in previous years, we chose to adopt the cascade approach of using separate systems to perform speech activity detection, automatic speech recognition, sentence segmentation, and machine translation. All systems were neural based, including a fully-connected neural network for speech activity detection, a Kaldi factorized time delay neural network with recurrent neural network (RNN) language model rescoring for speech recognition, a bidirectional RNN with attention mechanism for sentence segmentation, and transformer networks trained with OpenNMT and Marian for machine translation. Our primary submission yielded BLEU scores of 21.28 on tst2019 and 23.33 on tst2020. @@ -247,7 +247,7 @@ LongZhou YangZhao JiajunZhang - ChengqingZong + ChengqingZong 130–139 This paper describes the CASIA’s system for the IWSLT 2020 open domain translation task. This year we participate in both Chinese→Japanese and Japanese→Chinese translation tasks. Our system is neural machine translation system based on Transformer model. We augment the training data with knowledge distillation and back translation to improve the translation performance. Domain data classification and weighted domain model ensemble are introduced to generate the final translation result. We compare and analyze the performance on development data with different model settings and different data processing techniques. 2020.iwslt-1.15 @@ -331,7 +331,7 @@ NikhilSaini JyotsanaKhatri PreethiJyothi - PushpakBhattacharyya + PushpakBhattacharyya 178–186 Machine translation systems perform reasonably well when the input is well-formed speech or text. Conversational speech is spontaneous and inherently consists of many disfluencies. Producing fluent translations of disfluent source text would typically require parallel disfluent to fluent training data. However, fluent translations of spontaneous speech are an additional resource that is tedious to obtain. This work describes the submission of IIT Bombay to the Conversational Speech Translation challenge at IWSLT 2020. We specifically tackle the problem of disfluency removal in disfluent-to-fluent text-to-text translation assuming no access to fluent references during training. Common patterns of disfluency are extracted from disfluent references and a noise induction model is used to simulate them starting from a clean monolingual corpus. This synthetically constructed dataset is then considered as a proxy for labeled data during training. We also make use of additional fluent text in the target language to help generate fluent translations. This work uses no fluent references during training and beats a baseline model by a margin of 4.21 and 3.11 BLEU points where the baseline uses disfluent and fluent references, respectively. Index Terms- disfluency removal, machine translation, noise induction, leveraging monolingual data, denoising for disfluency removal. 2020.iwslt-1.22 @@ -362,7 +362,7 @@ PeterPolák SangeetSagar DominikMacháček - OndřejBojar + OndřejBojar 191–199 In this paper, we present our submission to the Non-Native Speech Translation Task for IWSLT 2020. Our main contribution is a proposed speech recognition pipeline that consists of an acoustic model and a phoneme-to-grapheme model. As an intermediate representation, we utilize phonemes. We demonstrate that the proposed pipeline surpasses commercially used automatic speech recognition (ASR) and submit it into the ASR track. We complement this ASR with off-the-shelf MT systems to take part also in the speech translation track. 2020.iwslt-1.24 @@ -375,7 +375,7 @@ JonášKratochvíl SangeetSagar MatúšŽilinec - OndřejBojar + OndřejBojar Thai-SonNguyen FelixSchneider PhilipWilliams @@ -390,7 +390,7 @@ Is 42 the Answer to Everything in Subtitling-oriented Speech Translation? AlinaKarakanta - MatteoNegri + MatteoNegri MarcoTurchi 209–219 Subtitling is becoming increasingly important for disseminating information, given the enormous amounts of audiovisual content becoming available daily. Although Neural Machine Translation (NMT) can speed up the process of translating audiovisual content, large manual effort is still required for transcribing the source language, and for spotting and segmenting the text into proper subtitles. Creating proper subtitles in terms of timing and segmentation highly depends on information present in the audio (utterance duration, natural pauses). In this work, we explore two methods for applying Speech Translation (ST) to subtitling, a) a direct end-to-end and b) a classical cascade approach. We discuss the benefit of having access to the source language speech for improving the conformity of the generated subtitles to the spatial and temporal subtitling constraints and show that length is not the answer to everything in the case of subtitling-oriented ST. @@ -452,7 +452,7 @@ From Speech-to-Speech Translation to Automatic Dubbing MarcelloFederico RobertEnyedi - RobertoBarra-Chicote + RobertoBarra-Chicote RitwikGiri UmutIsik ArvindhKrishnaswamy @@ -466,11 +466,11 @@ Joint Translation and Unit Conversion for End-to-end Localization - GeorgianaDinu + GeorgianaDinu PrashantMathur MarcelloFederico StanislasLauly - YaserAl-Onaizan + YaserAl-Onaizan 265–271 A variety of natural language tasks require processing of textual data which contains a mix of natural language and formal languages such as mathematical expressions. In this paper, we take unit conversions as an example and propose a data augmentation technique which lead to models learning both translation and conversion tasks as well as how to adequately switch between them for end-to-end localization. 2020.iwslt-1.32 @@ -495,8 +495,8 @@ YuriBizzoni Tom SJuzek CristinaEspaña-Bonet - KoelDutta Chowdhury - Josefvan Genabith + KoelDutta Chowdhury + Josefvan Genabith ElkeTeich 280–290 Translationese is a phenomenon present in human translations, simultaneous interpreting, and even machine translations. Some translationese features tend to appear in simultaneous interpreting with higher frequency than in human text translation, but the reasons for this are unclear. This study analyzes translationese patterns in translation, interpreting, and machine translation outputs in order to explore possible reasons. In our analysis we – (i) detail two non-invasive ways of detecting translationese and (ii) compare translationese across human and machine translations from text and speech. We find that machine translation shows traces of translationese, but does not reproduce the patterns found in human translation, offering support to the hypothesis that such patterns are due to the model (human vs machine) rather than to the data (written vs spoken). diff --git a/data/xml/2020.jeptalnrecital.xml b/data/xml/2020.jeptalnrecital.xml index 73b2dfad90..8ed626414d 100644 --- a/data/xml/2020.jeptalnrecital.xml +++ b/data/xml/2020.jeptalnrecital.xml @@ -96,7 +96,7 @@ Où en sommes-nous dans la reconnaissance des entités nommées structurées à partir de la parole ? (Where are we in Named Entity Recognition from speech ?) AntoineCaubrière - SophieRosset + SophieRosset YannickEstève AntoineLaurent EmmanuelMorin @@ -111,7 +111,7 @@ AnaïsChanclu LaurianneGeorgeton CorinneFredouille - Jean-FrancoisBonastre + Jean-FrancoisBonastre 73–81 Cet article présente la base de données PTSVOX, créée par le Service Central de la Police Technique et Scientifique (SCPTS) spécifiquement pour la comparaison de voix dans le cadre judiciaire. PTSVOX contient 369 locuteurs et locutrices qui ont été enregistrés au microphone et au téléphone. PTSVOX a été conçue pour mesurer l’influence de différents facteurs de variabilité fréquemment rencontrés dans les cas pratiques en identification judiciaire, comme le type de parole, le temps écoulé et le matériel d’enregistrement. Pour cela, 24 des locuteurs de PTSVOX (12 hommes et 12 femmes) ont été enregistrés une fois par mois pendant 3 mois, en parole spontanée et en parole lue. Dans cet article, nous présentons dans un premier temps la base PTSVOX, puis nous décrivons des protocoles standards ainsi que les systèmes de référence associés à PTSVOX, avec une évaluation de leur performance. 2020.jeptalnrecital-jep.9 @@ -123,7 +123,7 @@ EstelleChardenon CécileFougeron NicolasAudibert - CédricGendrot + CédricGendrot 82–90 Si l’étude de la variabilité entre locuteurs permet d’identifier des caractéristiques phonétiques potentiellement discriminantes, voire spécifiques, il est essentiel de comprendre, si et comment, ces caractéristiques varient chez un même locuteur. Ici, nous examinons la variabilité de caractéristiques liées à la gestion temporelle de la parole sur un nombre limité de locuteurs, enregistrés sur plusieurs répétitions dans une même session, et sur 6 à 7 sessions espacées d’une année. Sur cette vingtaine d’enregistrements par locuteur, nous observons comment le débit articulatoire, les modulations de ce débit, et la durée des pauses varient en fonction de la répétition et de la session et en interaction avec le locuteur. Les résultats montrent que c’est dans la variation de gestion temporelle de la parole que les locuteurs se distinguent les uns des autres, en termes de régularité ou non entre enregistrements et au sein d’un même enregistrement. 2020.jeptalnrecital-jep.10 @@ -133,7 +133,7 @@ Caractérisation du locuteur par <fixed-case>CNN</fixed-case> à l’aide des contours d’intensité et d’intonation : comparaison avec le spectrogramme (<fixed-case>CNN</fixed-case> speaker characterisation through prosody : spectrogram comparison ) GabrieleChignoli - CédricGendrot + CédricGendrot EmmanuelFerragne 91–99 Dans ce travail nous avons recours aux variations de f0 et d’intensité de 44 locuteurs francophones à partir de séquences de 4 secondes de parole spontanée pour comprendre comment ces paramètres prosodiques peuvent être utilisés pour caractériser des locuteurs. Une classification automatique est effectuée avec un réseau de neurones convolutifs, fournissant comme réponse des scores de probabilité pour chacun des 44 locuteurs modélisés. Une représentation par spectrogrammes a été utilisée comme référence pour le même système de classification. Nous avons pu mettre en avant la pertinence de l’intensité, et lorsque les deux paramètres prosodiques sont combinés pour représenter les locuteurs nous observons un score qui atteint en moyenne 59 % de bonnes classifications. @@ -195,7 +195,7 @@ Unités prosodiques et grammaire intonative du français : vers une nouvelle approche (Prosodic Units and Intonational Grammar in <fixed-case>F</fixed-case>rench: towards a new Approach) - ElisabethDelais-Roussarie + ElisabethDelais-Roussarie BrechtjePost HiyonYoo 145–153 @@ -206,7 +206,7 @@ Quel type de systèmes utiliser pour la transcription automatique du français ? Les <fixed-case>HMM</fixed-case> font de la résistance (What system for the automatic transcription of <fixed-case>F</fixed-case>rench in audiovisual broadcasts ?) - PaulDeléglise + PaulDeléglise CaroleLailler 154–162 Forts d’une utilisation couronnée de succès en traduction automatique, les systèmes end-to-end dont la sortie réside en une suite de caractères, ont vu leur utilisation étendue à la transcription automatique de la parole. De nombreuses comparaisons ont alors été effectuées sur des corpus anglais libres de droits, de parole lue. Nous proposons ici de réaliser une comparaison entre deux systèmes état de l’art, non pas sur de la parole lue mais bel et bien sur un corpus d’émissions audiovisuelles françaises présentant différents degrés de spontanéité. Le premier est un end-to-end et le second est un système hybride (HMM/DNN). L’obtention de résultats satisfaisants pour le end-to-end nécessitant un lexique et modèle de langage dédiés, il est intéressant de constater qu’une meilleure intégration dans les systèmes hybrides (HMM/DNN) est source de performances supérieures, notamment en Français où le contexte est primordial pour capturer un énoncé. @@ -240,7 +240,7 @@ Statistiques des sons naturels et hypothèse du codage efficace pour la perception de la musique et de la parole: Mise en place d’une méthodologie d’évaluation (Natural sound statistics and the efficient coding hypothesis for music and speech perception : setting-up an evaluation methodology) AgnieszkaDuniec OlivierCrouzet - ElisabethDelais-Roussarie + ElisabethDelais-Roussarie 181–189 L’hypothèse du codage efficace prédit que les systèmes perceptifs sont optimalement adaptés aux propriétés statistiques des signaux naturels. Ce caractère optimal a été récemment évalué sur la base d’analyses statistiques réalisées sur des décompositions spectrales de signaux de parole représentés comme des modulations d’énergie. Ces travaux pourraient trouver des applications directes dans l’amélioration du codage des signaux acoustiques par des implants cochléaires. Cependant, les recherches sur la perception de la musique par des personnes sourdes portant un implant cochléaire mettent en avant des limites qui semblent discordantes avec les performances observées concernant certaines propriétés fondamentales de la parole. Nous comparons les résultats d’analyses statistiques de signaux musicaux avec ceux qui ont été réalisés sur de la parole dans le but d’évaluer les impacts respectifs de ces deux gammes de signaux sonores pour évaluer leurs contributions à cette proposition théorique. Des résultats préliminaires et les perspectives futures sont discutés. 2020.jeptalnrecital-jep.21 @@ -329,7 +329,7 @@ Représentation du genre dans des données open source de parole (Gender representation in open source speech resources 1 With the rise of artificial intelligence (<fixed-case>AI</fixed-case>) and the growing use of deep-learning architectures, the question of ethics and transparency in <fixed-case>AI</fixed-case> systems has become a central concern within the research community) MahaultGarnerin SolangeRossato - LaurentBesacier + LaurentBesacier 244–252 Avec l’essor de l’intelligence artificielle (IA) et l’utilisation croissante des architectures d’apprentissage profond, la question de l’éthique et de la transparence des systèmes d’IA est devenue une préoccupation centrale au sein de la communauté de recherche. Dans cet article, nous proposons une étude sur la représentation du genre dans les ressources de parole disponibles sur la plateforme Open Speech and Language Resource. Un tout premier résultat est la difficulté d’accès aux informations sur le genre des locuteurs. Ensuite, nous montrons que l’équilibre entre les catégories de genre dépend de diverses caractéristiques des corpus (discours élicité ou non, tâche adressée). En nous appuyant sur des travaux antérieurs, nous reprenons quelques principes concernant les métadonnées dans l’optique d’assurer une meilleure transparence des systèmes de parole construits à l’aide de ces corpus. 2020.jeptalnrecital-jep.28 @@ -350,7 +350,7 @@ Informations segmentales pour la caractérisation phonétique du locuteur : variabilité inter- et intra-locuteurs (An automatic classification task involving 44 speakers was performed using convolutional neural networks (<fixed-case>CNN</fixed-case>) on broadband spectrograms extracted from 2-second sequences of a spontaneous speech corpus (<fixed-case>NCCF</fixed-case>r)) - CedricGendrot + CedricGendrot EmmanuelFerragne ThomasPellegrini 262–270 @@ -377,7 +377,7 @@ AdrienGresse MathiasQuillot RichardDufour - Jean-FrançoisBonastre + Jean-FrançoisBonastre 280–288 La recherche d’acteurs vocaux pour les productions audiovisuelles est réalisée par des directeurs artistiques (DA). Les DA sont constamment à la recherche de nouveaux talents vocaux, mais ne peuvent effectuer des auditions à grande échelle. Les outils automatiques capables de suggérer des voix présentent alors un grand intérêt pour l’industrie audiovisuelle. Dans les travaux précédents, nous avons montré l’existence d’informations acoustiques permettant de reproduire des choix du DA. Dans cet article, nous proposons une approche à base de réseaux de neurones pour construire une représentation adaptée aux personnages/rôles visés, appelée p-vecteur. Nous proposons ensuite de tirer parti de données externes pour la représentation de voix, proches de celles d’origine, au moyen de méthodes de distillation de la connaissance. Les expériences menées sur des extraits de voix de jeux vidéo montrent une amélioration significative de l’approche p-vecteur, avec distillation de la connaissance, par rapport à une représentation x-vecteur, état-de-l’art en reconnaissance du locuteur. 2020.jeptalnrecital-jep.32 @@ -388,9 +388,9 @@ Lénition et fortition des occlusives en coda finale dans deux langues romanes : le français et le roumain (Lenition and fortition of word-final stops in two <fixed-case>R</fixed-case>omance languages: <fixed-case>F</fixed-case>rench and <fixed-case>R</fixed-case>omanian) MathildeHutin AdèleJatteau - IoanaVasilescu - LoriLamel - MartineAdda-Decker + IoanaVasilescu + LoriLamel + MartineAdda-Decker 289–298 L’exploration automatisée de grands corpus permet d’analyser plus finement la relation entre motifs de variation phonétique synchronique et changements diachroniques : les erreurs dans les transcriptions automatiques sont riches d’enseignements sur la variation contextuelle en parole continue et sur les possibles mutations systémiques sur le point d’apparaître. Dès lors, il est intéressant de se pencher sur des phénomènes phonologiques largement attestés dans les langues en diachronie comme en synchronie pour établir leur émergence ou non dans des langues qui n’y sont pas encore sujettes. La présente étude propose donc d’utiliser l’alignement forcé avec variantes de prononciation pour observer les alternances de voisement en coda finale de mot dans deux langues romanes : le français et le roumain. Il sera mis en évidence, notamment, que voisement et dévoisement non-canoniques des codas françaises comme roumaines ne sont pas le fruit du hasard mais bien des instances de dévoisement final et d’assimilation régressive de trait laryngal, qu’il s’agisse de voisement ou de non-voisement. 2020.jeptalnrecital-jep.33 @@ -673,7 +673,7 @@ RichardDufour AxelRoebel NicolasObin - Jean-FrançoisBonastre + Jean-FrançoisBonastre EmmanuelEthis 525–533 La voix actée représente un défi majeur pour les futures interfaces vocales avec un potentiel d’application extrêmement important pour la transformation numérique des secteurs de la culture et de la communication, comme la production ou la post-production de voix pour les séries ou le cinéma. Un aspect central de la voix actée repose sur la notion d’interprétation, un aspect peu étudié dans la communauté scientifique de la parole. Cet article propose un état des lieux et une réflexion sur les défis scientifiques et les applications technologiques de la voix actée : à la croisée de l’acoustique, de la linguistique, de la culture, et de l’apprentissage machine. Une analyse préliminaire des pratiques permet de rendre compte de la diversité de l’écosystème des “métiers de la voix” et de pointer les fonctions et les conventions qui s’y rattachent. Nous nous intéresserons ensuite à la pratique particulière du doublage de voix, en faisant ressortir ses enjeux et problématiques spécifiques puis en présentant des solutions proposées pour modéliser les codes expressifs de la voix d’un acteur ou les choix d’un opérateur pour le doublage. @@ -803,7 +803,7 @@ Réduction temporelle en français spontané : où se cache-t-elle ? Une étude des segments, des mots et séquences de mots fréquemment réduits () YaruWu - MartineAdda-Decker + MartineAdda-Decker 627–635 Cette étude vise à proposer une méthode adaptée à l’étude de divers phénomènes de variation dans les grands corpus utilisant l’alignement automatique de la parole. Cette méthode est appliquée pour étudier la réduction temporelle en français spontané. Nous proposons de qualifier la réduction temporelle comme la réalisation de suites de segments courts consécutifs. Environ 14% du corpus est considéré comme réduit. Les résultats de l’alignement montrent que ces zones impliquent le plus souvent plus d’un mot (81%), et que sinon, la position interne du mot est la plus concernée. Parmi les exemples de suites de mots les plus réduits, on trouve des locutions utilisées comme des marqueurs discursifs. 2020.jeptalnrecital-jep.70 @@ -903,7 +903,7 @@ Classification de relations pour l’intelligence économique et concurrentielle (Relation Classification for Competitive and Economic Intelligence ) HadjerKhaldi AmineAbdaoui - FarahBenamara + FarahBenamara GrégoireSigel NathalieAussenac-Gilles 27–39 @@ -927,12 +927,12 @@ Les modèles de langue contextuels Camembert pour le français : impact de la taille et de l’hétérogénéité des données d’entrainement (<fixed-case>C</fixed-case> <fixed-case>AMEM</fixed-case> <fixed-case>BERT</fixed-case> Contextual Language Models for <fixed-case>F</fixed-case>rench: Impact of Training Data Size and Heterogeneity ) LouisMartin BenjaminMuller - Pedro JavierOrtiz Suárez + Pedro JavierOrtiz Suárez YoannDupont - LaurentRomary - ÉricVillemonte de la Clergerie - BenoîtSagot - DjaméSeddah + LaurentRomary + ÉricVillemonte de la Clergerie + BenoîtSagot + DjaméSeddah 54–65 Les modèles de langue neuronaux contextuels sont désormais omniprésents en traitement automatique des langues. Jusqu’à récemment, la plupart des modèles disponibles ont été entraînés soit sur des données en anglais, soit sur la concaténation de données dans plusieurs langues. L’utilisation pratique de ces modèles — dans toutes les langues sauf l’anglais — était donc limitée. La sortie récente de plusieurs modèles monolingues fondés sur BERT (Devlin et al., 2019), notamment pour le français, a démontré l’intérêt de ces modèles en améliorant l’état de l’art pour toutes les tâches évaluées. Dans cet article, à partir d’expériences menées sur CamemBERT (Martin et al., 2019), nous montrons que l’utilisation de données à haute variabilité est préférable à des données plus uniformes. De façon plus surprenante, nous montrons que l’utilisation d’un ensemble relativement petit de données issues du web (4Go) donne des résultats aussi bons que ceux obtenus à partir d’ensembles de données plus grands de deux ordres de grandeurs (138Go). 2020.jeptalnrecital-taln.5 @@ -942,7 +942,7 @@ Génération automatique de définitions pour le français (Definition Modeling in <fixed-case>F</fixed-case>rench) TimotheeMickus - MathieuConstant + MathieuConstant DenisPaperno 66–80 La génération de définitions est une tâche récente qui vise à produire des définitions lexicographiques à partir de plongements lexicaux. Nous remarquons deux lacunes : (i) l’état de l’art actuel ne s’est penché que sur l’anglais et le chinois, et (ii) l’utilisation escomptée en tant que méthode d’évaluation des plongements lexicaux doit encore être vérifiée. Pour y remédier, nous proposons un jeu de données pour la génération de définitions en français, ainsi qu’une évaluation des performances d’un modèle de génération de définitions simple selon les plongements lexicaux fournis en entrée. @@ -954,7 +954,7 @@ Du bon usage d’ingrédients linguistiques spéciaux pour classer des recettes exceptionnelles (Using Special Linguistic Ingredients to Classify Exceptional Recipes ) ElhamMohammadi LouisMarceau - EricCharton + EricCharton LeilaKosseim LukaNerima Marie-JeanMeurs @@ -988,7 +988,7 @@ Impact de la structure logique des documents sur les modèles distributionnels : expérimentations sur le corpus <fixed-case>TALN</fixed-case> (Impact of document structure on distributional semantics models: a case study on <fixed-case>NLP</fixed-case> research articles ) LudovicTanguy - CécileFabre + CécileFabre YoannBard 122–135 Nous présentons une expérience visant à mesurer en quoi la structure logique d’un document impacte les représentations lexicales dans les modèles de sémantique distributionnelle. En nous basant sur des documents structurés (articles de recherche en TAL) nous comparons des modèles construits sur des corpus obtenus par suppression de certaines parties des textes du corpus : titres de section, résumés, introductions et conclusions. Nous montrons que malgré des différences selon les parties et le lexique pris en compte, ces zones réputées particulièrement informatives du contenu d’un article ont un impact globalement moins significatif que le reste du texte sur la construction du modèle. @@ -999,7 +999,7 @@ Prédire automatiquement les intentions du locuteur dans des questions issues du discours oral spontané (Automatically predicting the speaker’s intentions in questions from spontaneous oral speech) AngèleBarbedette - IrisEshkol-Taravella + IrisEshkol-Taravella 137–145 Cette étude porte sur la classification automatique des intentions exprimées dans des questions issues d’un corpus d’échanges oraux spontanés. Nous proposons une typologie dans laquelle nous distinguons trois classes d’intentions (AVIS, VOLONTÉ et DOUTE). Après plusieurs prétraitements et ajouts de traits lexicaux aux données (lexiques, nombre de mots et de caractères), nous implémentons un algorithme de classification automatique et nous en présentons et évaluons les résultats qui atteignent une F-mesure de 0,62. Nous proposons ensuite une interprétation de ceux-ci, basée sur une comparaison entre les expériences menées et des mesures liées aux traits linguistiques intégrés avant la tâche de classification. 2020.jeptalnrecital-taln.11 @@ -1099,7 +1099,7 @@ Prédire le niveau de langue d’apprenants d’anglais (Predict the language level for <fixed-case>E</fixed-case>nglish learners) NataliaGrabar - ThierryHamon + ThierryHamon BertCappelle CyrilGrandin BenoîtLeclercq @@ -1122,7 +1122,7 @@ Segmentation automatique en périodes pour le français parlé (Automatic Period Segmentation of Oral <fixed-case>F</fixed-case>rench) NataliaKalashnikova - IrisEshkol-Taravella + IrisEshkol-Taravella LoïcGrobol FrançoisDelafontaine 241–248 @@ -1134,7 +1134,7 @@ Les avis sur les restaurants à l’épreuve de l’apprentissage automatique (An Empirical Examination of Online Restaurant Reviews) HyunJung Kang - IrisEshkol-Taravella + IrisEshkol-Taravella 249–257 Dans la fouille d’opinions, de nombreuses études portent sur l’extraction automatique des opinions positives ou négatives. Cependant les recherches ayant pour objet la fouille de suggestions et d’intentions sont moins importantes, malgré leur lien profond avec l’opinion. Cet article vise à détecter six catégories (opinion positive/mixte/négative, suggestion, intention, description) dans les avis en ligne sur les restaurants en exploitant deux méthodes : l’apprentissage de surface et l’apprentissage profond supervisés. Les performances obtenues pour chaque catégorie sont interprétées ensuite en tenant compte des spécificités du corpus traité. 2020.jeptalnrecital-taln.24 @@ -1160,8 +1160,8 @@ MaximinCoavoux BenjaminLecouteux AlexandreAllauzen - BenoîtCrabbé - LaurentBesacier + BenoîtCrabbé + LaurentBesacier DidierSchwab 268–278 Les modèles de langue pré-entraînés sont désormais indispensables pour obtenir des résultats à l’état-de-l’art dans de nombreuses tâches du TALN. Tirant avantage de l’énorme quantité de textes bruts disponibles, ils permettent d’extraire des représentations continues des mots, contextualisées au niveau de la phrase. L’efficacité de ces représentations pour résoudre plusieurs tâches de TALN a été démontrée récemment pour l’anglais. Dans cet article, nous présentons et partageons FlauBERT, un ensemble de modèles appris sur un corpus français hétérogène et de taille importante. Des modèles de complexité différente sont entraînés à l’aide du nouveau supercalculateur Jean Zay du CNRS. Nous évaluons nos modèles de langue sur diverses tâches en français (classification de textes, paraphrase, inférence en langage naturel, analyse syntaxique, désambiguïsation automatique) et montrons qu’ils surpassent souvent les autres approches sur le référentiel d’évaluation FLUE également présenté ici. @@ -1173,7 +1173,7 @@ Relation, es-tu là ? Détection de relations par <fixed-case>LSTM</fixed-case> pour améliorer l’extraction de relations (Relation, are you there ? <fixed-case>LSTM</fixed-case>-based relation detection to improve knowledge extraction ) CyrielleMallart MichelLe Nouy - GuillaumeGravier + GuillaumeGravier PascaleSébillot 279–287 De nombreuses méthodes d’extraction et de classification de relations ont été proposées et testées sur des données de référence. Cependant, dans des données réelles, le nombre de relations potentielles est énorme et les heuristiques souvent utilisées pour distinguer de vraies relations de co-occurrences fortuites ne détectent pas les signaux faibles pourtant importants. Dans cet article, nous étudions l’apport d’un modèle de détection de relations, identifiant si un couple d’entités dans une phrase exprime ou non une relation, en tant qu’étape préliminaire à la classification des relations. Notre modèle s’appuie sur le plus court chemin de dépendances entre deux entités, modélisé par un LSTM et combiné avec les types des entités. Sur la tâche de détection de relations, nous obtenons de meilleurs résultats qu’un modèle état de l’art pour la classification de relations, avec une robustesse accrue aux relations inédites. Nous montrons aussi qu’une détection binaire en amont d’un modèle de classification améliore significativement ce dernier. @@ -1185,8 +1185,8 @@ Analyse automatique en cadres sémantiques pour l’apprentissage de modèles de compréhension de texte (Semantic Frame Parsing for training Machine Reading Comprehension models) GabrielMarzinotto DelphineCharlet - GéraldineDamnati - FrédéricBéchet + GéraldineDamnati + FrédéricBéchet 288–295 Dans le cadre de la compréhension automatique de documents, cet article propose une évaluation intrinsèque et extrinsèque d’un modèle d’analyse automatique en cadres sémantiques (Frames). Le modèle proposé est un modèle état de l’art à base de GRU bi-directionnel, enrichi par l’utilisation d’embeddings contextuels. Nous montrons qu’un modèle de compréhension de documents appris sur un corpus de triplets générés à partir d’un corpus analysé automatiquement avec l’analyseur en cadre sémantique présente des performances inférieures de seulement 2.5% en relatif par rapport à un modèle appris sur un corpus de triplets générés à partir d’un corpus analysé manuellement. 2020.jeptalnrecital-taln.28 @@ -1198,7 +1198,7 @@ MohamedAmine Menacer KarimaAbidi NouhaOthman - KamelSmaïli + KamelSmaïli 296–304 La plupart des travaux existant sur l’analyse de sentiments traitent l’arabe standard moderne et ne prennent pas en considération les spécificités de l’arabe dialectal. Cet article présente un système d’analyse de sentiments de textes extraits de vidéos exprimées en dialecte algérien. Dans ce travail, nous avons deux défis à surmonter, la reconnaissance automatique de la parole pour le dialecte algérien et l’analyse de sentiments du texte reconnu. Le développement du système de reconnaissance automatique de la parole est basé sur un corpus oral restreint. Pour pallier le manque de données, nous proposons d’exploiter des données ayant un impact sur le dialecte algérien, à savoir l’arabe standard et le français. L’analyse de sentiments est fondée sur la détection automatique de la polarité des mots en fonction de leur proximité sémantique avec d’autres mots ayant une polarité prédéterminée. 2020.jeptalnrecital-taln.29 @@ -1229,7 +1229,7 @@ Identification des problèmes d’annotation pour l’extraction de relations (Identification of annotation problem for the relation extraction) TsantaRandriatsitohaina - ThierryHamon + ThierryHamon 323–331 L’annotation d’un corpus est une tâche difficile et laborieuse, notamment sur des textes de spécialité comme les textes biomédicaux. Ainsi, dans un contexte comme l’extraction des interactions alimentmédicament (FDI), l’annotation du corpus POMELO a été réalisée par un seul annotateur et présente des risques d’erreur. Dans cet article, nous proposons d’identifier ces problèmes d’annotation en utilisant un corpus Silver Standard (CSS) que nous établissons à partir d’un vote majoritaire parmi les annotations proposées par des modèles entraînés sur un domaine similaire (interaction médicamentmédicament – DDI) et l’annotation manuelle à évaluer. Les résultats obtenus montrent que l’annotation dans POMELO est considérablement éloignée du CSS. L’analyse des erreurs permet d’en identifier les principales causes et de proposer des solutions pour corriger l’annotation existante. 2020.jeptalnrecital-taln.32 @@ -1242,7 +1242,7 @@ Anne-LaureLigozat FrancoisYvon GabrielIllouz - ThierryHamon + ThierryHamon 332–341 La simplification de textes a émergé comme un sous-domaine actif du traitement automatique des langues, du fait des problèmes pratiques et théoriques qu’elle permet d’aborder, ainsi que de ses nombreuses applications pratiques. Des corpus de simplification sont nécessaires pour entrainer des systèmes de simplification automatique ; ces ressources sont toutefois rares et n’existent que pour un petit nombre de langues. Nous montrons ici que dans un contexte où les ressources pour la simplification sont rares, il reste néanmoins possible de construire des systèmes de simplification, en ayant recours à des corpus synthétiques, par exemple obtenus par traduction automatique, et nous évaluons diverses manières de les constituer. 2020.jeptalnrecital-taln.33 @@ -1253,7 +1253,7 @@ Représentation sémantique des familles dérivationnelles au moyen de frames morphosémantiques (Semantic representation of derivational families by means of morphosemantic frames ) DanieleSanacore NabilHathout - FiammettaNamer + FiammettaNamer 342–350 L’article présente un formalisme de représentation des relations morphologiques dérivationnelles inspiré de la Sémantique des Frames. La description morphosémantique y est réalisée au niveau des familles dérivationnelles au moyen de frames morphosémantiques dans lesquels les lexèmes sont définis les uns relativement aux autres. Les frames morphosémantiques permettent de rendre compte de la structure paradigmatique du lexique morphologique par l’alignement des familles qui présentent les mêmes oppositions de sens. La seconde partie de l’article est consacrée aux données qui seront utilisées pour produire (semi-) automatiquement ces représentations. 2020.jeptalnrecital-taln.34 @@ -1264,7 +1264,7 @@ Modèle neuronal pour la résolution de la coréférence dans les dossiers médicaux électroniques (Neural approach for coreference resolution in electronic health records ) JulienTourille OlivierFerret - AurélieNévéol + AurélieNévéol XavierTannier 351–360 La résolution de la coréférence est un élément essentiel pour la constitution automatique de chronologies médicales à partir des dossiers médicaux électroniques. Dans ce travail, nous présentons une approche neuronale pour la résolution de la coréférence dans des textes médicaux écrits en anglais pour les entités générales et cliniques en nous évaluant dans le cadre de référence pour cette tâche que constitue la tâche 1C de la campagne i2b2 2011. @@ -1275,7 +1275,7 @@ Un corpus d’évaluation pour un système de simplification discursive (An Evaluation Corpus for Automatic Discourse Simplification) RodrigoWilkens - AmaliaTodirascu + AmaliaTodirascu 361–369 Nous présentons un nouveau corpus simplifié, disponible en français pour l’évaluation d’un système de simplification discursive. Ce système utilise des chaînes de référence pour simplifier et pour préserver la cohésion textuelle après simplification. Nous présentons la méthodologie de collecte de corpus (via un formulaire, qui recueille les simplifications manuelles faites par des participants experts), les règles présentées dans le guide, une analyse des types de simplifications et une évaluation de notre corpus, par comparaison avec la sortie du système de simplification automatique. 2020.jeptalnrecital-taln.36 @@ -1482,7 +1482,7 @@ Démo de <fixed-case>AMALD</fixed-case>-serveur et <fixed-case>AMALD</fixed-case>-corpus, dédiés à l’analyse morphologique de l’allemand (Demonstration of <fixed-case>AMALD</fixed-case>-serveur and <fixed-case>AMALD</fixed-case>-corpus, dedicated to the morphological analysis of <fixed-case>G</fixed-case>erman) - ChristianBoitet + ChristianBoitet VincentBerment Jean-PhilippeGuilbaud ClaireLemaire @@ -1521,7 +1521,7 @@ EmmanuelleDusserre RuslanKalitvianski MathieuRuhlmann - MuntsaPadró + MuntsaPadró 14–17 Dans cet article, nous présentons la mise en œuvre d’une chaîne de traitement sémantique complète dédiée aux conversations audio issues de centres d’appel téléphoniques, depuis la phase de transcription automatique jusqu’à l’exploitation des résultats, en passant par l’étape d’analyse sémantique des énoncés. Nous décrivons ici le fonctionnement des différentes analyses que notre équipe développe, ainsi que la plateforme interactive permettant de restituer les résultats agrégés de toutes les conversations analysées. 2020.jeptalnrecital-demos.4 @@ -1550,7 +1550,7 @@ DamienLolive GwénoléLecorvé JonathanChevelu - SébastienLe Maguer + SébastienLe Maguer 22–25 Nous présentons FlexEval, un outil de conception et déploiement de tests perceptifs multimédias sous la forme d’un site web léger. S’appuyant sur des technologies standards et ouvertes du web, notamment le framework Flask, FlexEval offre une grande souplesse de conception, des gages de pérennité, ainsi que le support de communautés actives d’utilisateurs. L’application est disponible en open-source via le dépôt Git https://gitlab.inria.fr/expression/tools/flexeval. 2020.jeptalnrecital-demos.6 @@ -1560,7 +1560,7 @@ Vers une analyse automatique de la perception relative à un lieu (Towards an Automatic Analysis of Place Perception) HélèneFlamein - IrisEshkol-Taravella + IrisEshkol-Taravella 26–29 Le travail présenté s’intéresse à la perception qu’ont les habitants de leur ville en se fondant sur un corpus de conversations orales spontanées. La chaîne de traitement conditionnant l’analyse de la perception se décompose en trois étapes : la détection des noms de lieux, l’analyse de la perception identifiée et la visualisation cartographique des informations extraites. 2020.jeptalnrecital-demos.7 @@ -1627,7 +1627,7 @@ Conception d’un système de détection d’intention pour un moteur de recherche sur <fixed-case>I</fixed-case>nternet (Designing a User Intention Detection system for a Web Search Engine) EstelleMaudet - ChristopheServan + ChristopheServan 50–52 Dans les moteurs de recherche sur Internet, l’une des tâches les plus importantes vise à identifier l’intention de l’utilisateur. Cet article présente notre étude pour proposer un nouveau système de détection d’intention pour le moteur de recherche sur Internet Qwant. Des logs de clic au système de détection d’intention, l’ensemble du processus est expliqué, y compris les contraintes industrielles qui ont dû être prises en compte. Une analyse manuelle des données groupées a d’abord été appliquée sur les journaux afin de mieux comprendre les objectifs de l’utilisateur et de choisir les catégories d’intention pertinentes. Lorsque la recherche satisfait aux contraintes industrielles, il faut faire des choix architecturaux et faire des concessions. Cet article explique les contraintes et les résultats obtenus pour ce nouveau système en ligne. 2020.jeptalnrecital-demos.13 @@ -1641,7 +1641,7 @@ DenisJouvet KarimaAbidi DavidLanglois - KamelSmaïli + KamelSmaïli 53–56 La démonstration de résumé et de traduction automatique de vidéos résulte de nos travaux dans le projet AMIS. L’objectif du projet était d’aider un voyageur à comprendre les nouvelles dans un pays étranger. Pour cela, le projet propose de résumer et traduire automatiquement une vidéo en langue étrangère (ici, l’arabe). Un autre objectif du projet était aussi de comparer les opinions et sentiments exprimés dans plusieurs vidéos comparables. La démonstration porte sur l’aspect résumé, transcription et traduction. Les exemples montrés permettront de comprendre et mesurer qualitativement les résultats du projet. 2020.jeptalnrecital-demos.14 @@ -1651,7 +1651,7 @@ La résolution d’anaphores au-delà de la frontière de la phrase (The Anaphora Resolution Beyond Sentence Boundary) LukaNerima - EricWehrli + EricWehrli 57–59 Cette démonstration présente une extension de nos outils d’analyse syntaxique et d’étiquetage morphosyntaxique qui prend en compte la résolution d’anaphores pronominales non seulement à l’intérieur d’une phrase, mais également si l’antécédent se trouve dans la phrase précédente. Autant l’analyseur que l’étiqueteur effectuant une analyse syntaxique complète des phrases, ces outils affichent également les fonctions grammaticales des constituants (sujet, objet direct, etc.) et les arguments des verbes. Une version de cette démonstration est disponible sur le Web. 2020.jeptalnrecital-demos.15 @@ -1661,7 +1661,7 @@ Spiderlex et compagnie (Spiderlex & Co) SandrineOllinger - AlainPolguère + AlainPolguère YannickChudy BrunoGaume 60–63 @@ -1693,9 +1693,9 @@ Analyse sémantique robuste par apprentissage antagoniste pour la généralisation de domaine (Robust Semantic Parsing with Adversarial Learning for Domain Generalization ) GabrielMarzinotto - GéraldineDamnati - FrédéricBéchet - BenoîtFavre + GéraldineDamnati + FrédéricBéchet + BenoîtFavre 71–72 Nous présentons des résumés en français et en anglais de l’article (Marzinotto et al., 2019) présenté à la conférence North American Chapter of the Association for Computational Linguistics : Human Language Technologies en 2019. 2020.jeptalnrecital-demos.19 @@ -1709,7 +1709,7 @@ RémiCardon NataliaGrabar CyrilGrouin - ThierryHamon + ThierryHamon ATALA et AFCP
Nancy, France
6 @@ -1737,7 +1737,7 @@ DavideBuscaldi GhaziFelhi DhaouGhoul - JosephLe Roux + JosephLe Roux GaëlLejeune XudongZhang 14–25 @@ -1858,7 +1858,7 @@ Actes de la 6e conférence conjointe Journées d'Études sur la Parole (JEP, 33e édition), Traitement Automatique des Langues Naturelles (TALN, 27e édition), Rencontre des Étudiants Chercheurs en Informatique pour le Traitement Automatique des Langues (RÉCITAL, 22e édition). 2e atelier Éthique et TRaitemeNt Automatique des Langues (ETeRNAL) - GillesAdda + GillesAdda MaximeAmblard KarënFort ATALA et AFCP @@ -1875,7 +1875,7 @@ Pratiques d’évaluation en <fixed-case>ASR</fixed-case> et biais de performance (Evaluation methodology in <fixed-case>ASR</fixed-case> and performance bias) MahaultGarnerin SolangeRossato - LaurentBesacier + LaurentBesacier 1–9 Nous proposons une réflexion sur les pratiques d’évaluation des systèmes de reconnaissance automatique de la parole (ASR). Après avoir défini la notion de discrimination d’un point de vue légal et la notion d’équité dans les systèmes d’intelligence artificielle, nous nous intéressons aux pratiques actuelles lors des grandes campagnes d’évaluation. Nous observons que la variabilité de la parole et plus particulièrement celle de l’individu n’est pas prise en compte dans les protocoles d’évaluation actuels rendant impossible l’étude de biais potentiels dans les systèmes. 2020.jeptalnrecital-eternal.1 @@ -1918,7 +1918,7 @@
1990-2020 : retours sur 30 ans d’échanges autour de l’identification de voix en milieu judiciaire (1990-2020: A look back at 30 years of discussions on voice identification in the judicial system) - Jean-FrancoisBonastre + Jean-FrancoisBonastre 38–47 Des enregistrements de voix se trouvent de plus en plus souvent au cœur d’affaires judiciaires importantes, notamment de par l’essor de la téléphonie mobile. La justice demande à ce que des expertises en identification de voix soient réalisées alors que dans le même temps, la pertinence scientifique de telles expertises est fortement mise en cause par les scientifiques. Ainsi, dès 1990, les chercheurs en communication parlée réunis dans le GFCP, devenu depuis AFCP, ont voté une motion affirmant que « l’identification d’un individu par sa voix est à l’heure actuelle un problème à sa connaissance non résolu ». Cette motion est toujours en vigueur, après avoir été réaffirmée en 1997 et renforcée par une pétition en 2002. Malgré cela, des expertises judiciaires en identification de voix sont réalisées en France chaque année. Cet article revient sur les actions menées par le GFCP et l’AFCP depuis la motion initiale jusqu’aux actions contemporaines. Il se propose d’évaluer les répercussions de ces actions, tant au niveau de la Justice qu’au niveau académique. 2020.jeptalnrecital-eternal.5 diff --git a/data/xml/2020.knlp.xml b/data/xml/2020.knlp.xml index c44b2472b3..aa17def63f 100644 --- a/data/xml/2020.knlp.xml +++ b/data/xml/2020.knlp.xml @@ -5,7 +5,7 @@ Proceedings of Knowledgeable NLP: the First Workshop on Integrating Structured Knowledge and Neural Networks for NLP Oren SarShalom AlexanderPanchenko - Cicerodos Santos + Cicerodos Santos VarvaraLogacheva AlessandroMoschitti IdoDagan diff --git a/data/xml/2020.lantern.xml b/data/xml/2020.lantern.xml index 920be490f0..34d31c41cc 100644 --- a/data/xml/2020.lantern.xml +++ b/data/xml/2020.lantern.xml @@ -6,7 +6,7 @@ AdityaMogadala SandroPezzelle DietrichKlakow - Marie-FrancineMoens + Marie-FrancineMoens ZeynepAkata Association for Computational Linguistics
Barcelona, Spain
diff --git a/data/xml/2020.latechclfl.xml b/data/xml/2020.latechclfl.xml index b121b1b4b4..c703dcde16 100644 --- a/data/xml/2020.latechclfl.xml +++ b/data/xml/2020.latechclfl.xml @@ -6,7 +6,7 @@ StefaniaDeGaetano AnnaKazantseva NilsReiter - StanSzpakowicz + StanSzpakowicz International Committee on Computational Linguistics
Online
December @@ -55,7 +55,7 @@ Neural Machine Translation of Artwork Titles Using Iconclass Codes NikolayBanar - WalterDaelemans + WalterDaelemans MikeKestemont 42–51 We investigate the use of Iconclass in the context of neural machine translation for NL<->EN artwork titles. Iconclass is a widely used iconographic classification system used in the cultural heritage domain to describe and retrieve subjects represented in the visual arts. The resource contains keywords and definitions to encode the presence of objects, people, events and ideas depicted in artworks, such as paintings. We propose a simple concatenation approach that improves the quality of automatically generated title translations for artworks, by leveraging textual information extracted from Iconclass. Our results demonstrate that a neural machine translation system is able to exploit this metadata to boost the translation performance of artwork titles. This technology enables interesting applications of machine learning in resource-scarce domains in the cultural sector. @@ -82,7 +82,7 @@ Vital Records: Uncover the past from historical handwritten records - HerveDejean + HerveDejean Jean-LucMeunier 69–73 We present Vital Records, a demonstrator based on deep-learning approaches to handwritten-text recognition, table processing and information extraction, which enables data from century-old documents to be parsed and analysed, making it possible to explore death records in space and time. This demonstrator provides a user interface for browsing and visualising data extracted from 80,000 handwritten pages of tabular data. @@ -121,7 +121,7 @@ Zero-shot cross-lingual identification of direct speech using distant supervision MurathanKurfalı - MatsWirén + MatsWirén 105–111 Prose fiction typically consists of passages alternating between the narrator’s telling of the story and the characters’ direct speech in that story. Detecting direct speech is crucial for the downstream analysis of narrative structure, and may seem easy at first thanks to quotation marks. However, typographical conventions vary across languages, and as a result, almost all approaches to this problem have been monolingual. In contrast, the aim of this paper is to provide a multilingual method for identifying direct speech. To this end, we created a training corpus by using a set of heuristics to automatically find texts where quotation marks appear sufficiently consistently. We then removed the quotation marks and developed a sequence classifier based on multilingual-BERT which classifies each token as belonging to narration or speech. Crucially, by training the classifier with the quotation marks removed, it was forced to learn the linguistic characteristics of direct speech rather than the typography of quotation marks. The results in the zero-shot setting of the proposed model are comparable to the strong supervised baselines, indicating that this is a feasible approach. 2020.latechclfl-1.12 @@ -211,7 +211,7 @@ AmelFraisse RonaldJenn Shelley FisherFishkin - PierreZweigenbaum + PierreZweigenbaum 167–171 TL-Explorer is a digital humanities tool for mapping and analyzing translated literature, encompassing the World Map and the Translation Dashboard. The World Map displays collected literature of different languages, locations, and cultures and establishes the foundation for further analysis. It comprises three global maps for spatial and temporal interpretation. A further investigation into an individual point on the map leads to the Translation Dashboard. Each point represents one edition or translation. Collected translations are processed in order to build multilingual parallel corpora for a large number of under-resourced languages as well as to highlight the transnational circulation of knowledge. Our first rendition of TL-Explorer was conducted on the well-traveled American novel, Adventures of Huckleberry Finn, by Mark Twain. The maps currently chronicle nearly 400 translations of this novel. And the dashboard supports over 30 collected translations. However, the TL-Explore is easily extended to other works of literature and is not limited to type of texts, such as academic manuscripts or constitutional documents to name a few. 2020.latechclfl-1.20 diff --git a/data/xml/2020.law.xml b/data/xml/2020.law.xml index 6ad7c3552f..6295620d0d 100644 --- a/data/xml/2020.law.xml +++ b/data/xml/2020.law.xml @@ -144,7 +144,7 @@ JieChi TomHosking NinaMarkl - BonnieWebber + BonnieWebber 138–147 Multi-sentence questions (MSQs) are sequences of questions connected by relations which, unlike sequences of standalone questions, need to be answered as a unit. Following Rhetorical Structure Theory (RST), we recognise that different “question discourse relations” between the subparts of MSQs reflect different speaker intents, and consequently elicit different answering strategies. Correctly identifying these relations is therefore a crucial step in automatically answering MSQs. We identify five different types of MSQs in English, and define five novel relations to describe them. We extract over 162,000 MSQs from Stack Exchange to enable future research. Finally, we implement a high-precision baseline classifier based on surface features. 2020.law-1.13 @@ -153,7 +153,7 @@ Annotating Errors and Emotions in Human-Chatbot Interactions in <fixed-case>I</fixed-case>talian ManuelaSanguinetti - AlessandroMazzei + AlessandroMazzei VivianaPatti MarcoScalerandi DarioMana @@ -176,7 +176,7 @@ py<fixed-case>MMAX</fixed-case>2: Deep Access to <fixed-case>MMAX</fixed-case>2 Projects from Python - Mark-ChristophMüller + Mark-ChristophMüller 167–173 pyMMAX2 is an API for processing MMAX2 stand-off annotation data in Python. It provides a lightweight basis for the development of code which opens up the Java- and XML-based ecosystem of MMAX2 for more recent, Python-based NLP and data science methods. While pyMMAX2 is pure Python, and most functionality is implemented from scratch, the API re-uses the complex implementation of the essential business logic for MMAX2 annotation schemes by interfacing with the original MMAX2 Java libraries. pyMMAX2 is available for download at http://github.com/nlpAThits/pyMMAX2. 2020.law-1.16 diff --git a/data/xml/2020.ldl.xml b/data/xml/2020.ldl.xml index b1362afc7f..eab419d11e 100644 --- a/data/xml/2020.ldl.xml +++ b/data/xml/2020.ldl.xml @@ -4,7 +4,7 @@ Proceedings of the 7th Workshop on Linked Data in Linguistics (LDL-2020) MaximIonov - John P.McCrae + John P.McCrae ChristianChiarcos ThierryDeclerck JuliaBosque-Gil @@ -43,7 +43,7 @@ Representing Temporal Information in Lexical Linked Data Resources - FahadKhan + FahadKhan 15–22 The increasing recognition of the utility of Linked Data as a means of publishing lexical resource has helped to underline the need for RDF based data models which have the flexibility and expressivity to be able to represent the most salient kinds of information contained in such resources as structured data, including, notably, information relating to time and the temporal dimension. In this article we describe a perdurantist approach to modelling diachronic lexical information which builds upon work which we have previously presented and which is based on the ontolex-lemon vocabulary. We present two extended examples, one taken from the Oxford English Dictionary, the other from a work on etymology, to show how our approach can handle different kinds of temporal information often found in lexical resources. 2020.ldl-1.3 @@ -66,7 +66,7 @@ Terme-à-<fixed-case>LLOD</fixed-case>: Simplifying the Conversion and Hosting of Terminological Resources as Linked Data Maria Piadi Buono - PhilippCimiano + PhilippCimiano Mohammad FazlehElahi FrankGrimm 28–35 @@ -121,7 +121,7 @@ Involving Lexicographers in the <fixed-case>LLOD</fixed-case> Cloud with <fixed-case>L</fixed-case>ex<fixed-case>O</fixed-case>, an Easy-to-use Editor of Lemon Lexical Resources AndreaBellandi - EmilianoGiovannetti + EmilianoGiovannetti 70–74 In this contribution, we show LexO, a user-friendly web collaborative editor of lexical resources based on the lemon model. LexO has been developed in the context of Digital Humanities projects, in which a key point in the design of an editor was the ease of use by lexicographers with no skill in Linked Data or Semantic Web technologies. Though the tool already allows creating a lemon lexicon from scratch and lets a team of users work on it collaboratively, many developments are possible. The involvement of the LLOD community appears now crucial both to find new users and application fields where to test it, and, even more importantly, to understand in which way it should evolve. 2020.ldl-1.10 diff --git a/data/xml/2020.lifelongnlp.xml b/data/xml/2020.lifelongnlp.xml index 8d2f4bbe77..e10a3ceab8 100644 --- a/data/xml/2020.lifelongnlp.xml +++ b/data/xml/2020.lifelongnlp.xml @@ -4,9 +4,9 @@ Proceedings of the 2nd Workshop on Life-long Learning for Spoken Language Systems William M.Campbell - AlexWaibel - DilekHakkani-Tur - Timothy J.Hazen + AlexWaibel + DilekHakkani-Tur + Timothy J.Hazen KevinKilgour EunahCho VarunKumar @@ -38,7 +38,7 @@ JuanHussain Tuan-NamNguyen KaihangSong - SebastianStüker + SebastianStüker AlexanderWaibel 9–17 When training speech recognition systems, one often faces the situation that sufficient amounts of training data for the language in question are available but only small amounts of data for the domain in question. This problem is even bigger for end-to-end speech recognition systems that only accept transcribed speech as training data, which is harder and more expensive to obtain than text data. In this paper we present experiments in adapting end-to-end speech recognition systems by a method which is called batch-weighting and which we contrast against regular fine-tuning, i.e., to continue to train existing neural speech recognition models on adaptation data. We perform experiments using theses techniques in adapting to topic, accent and vocabulary, showing that batch-weighting consistently outperforms fine-tuning. In order to show the generalization capabilities of batch-weighting we perform experiments in several languages, i.e., Arabic, English and German. Due to its relatively small computational requirements batch-weighting is a suitable technique for supervised life-long learning during the life-time of a speech recognition system, e.g., from user corrections. diff --git a/data/xml/2020.lincr.xml b/data/xml/2020.lincr.xml index 7d40559328..33915d6adf 100644 --- a/data/xml/2020.lincr.xml +++ b/data/xml/2020.lincr.xml @@ -80,7 +80,7 @@ The Little Prince in 26 Languages: Towards a Multilingual Neuro-Cognitive Corpus SabrinaStehwien LenaHenke - JohnHale + JohnHale JonathanBrennan LarsMeyer 43–49 diff --git a/data/xml/2020.loresmt.xml b/data/xml/2020.loresmt.xml index be4ed4be79..aacfa10d52 100644 --- a/data/xml/2020.loresmt.xml +++ b/data/xml/2020.loresmt.xml @@ -4,14 +4,14 @@ Proceedings of the 3rd Workshop on Technologies for MT of Low Resource Languages AlinaKarakanta - Atul Kr.Ojha + Atul Kr.Ojha Chao-HongLiu JadeAbbott JohnOrtega - JonathanWashington + JonathanWashington NathanielOco Surafel MelakuLakew - Tommi APirinen + Tommi APirinen ValentinMalykh VarvaraLogacheva XiaobingZhao @@ -84,7 +84,7 @@ Sahinur RahmanLaskar Abdullah Faiz Ur RahmanKhilji ParthaPakray - SivajiBandyopadhyay + SivajiBandyopadhyay 38–42 Neural machine translation (NMT) is a widely accepted approach in the machine translation (MT) community, translating from one natural language to another natural language. Although, NMT shows remarkable performance in both high and low resource languages, it needs sufficient training corpus. The availability of a parallel corpus in low resource language pairs is one of the challenging tasks in MT. To mitigate this issue, NMT attempts to utilize a monolingual corpus to get better at translation for low resource language pairs. Workshop on Technologies for MT of Low Resource Languages (LoResMT 2020) organized shared tasks of low resource language pair translation using zero-shot NMT. Here, the parallel corpus is not used and only monolingual corpora is allowed. We have participated in the same shared task with our team name CNLP-NITS for the Russian-Hindi language pair. We have used masked sequence to sequence pre-training for language generation (MASS) with only monolingual corpus following the unsupervised NMT architecture. The evaluated results are declared at the LoResMT 2020 shared task, which reports that our system achieves the bilingual evaluation understudy (BLEU) score of 0.59, precision score of 3.43, recall score of 5.48, F-measure score of 4.22, and rank-based intuitive bilingual evaluation score (RIBES) of 0.180147 in Russian to Hindi translation. And for Hindi to Russian translation, we have achieved BLEU, precision, recall, F-measure, and RIBES score of 1.11, 4.72, 4.41, 4.56, and 0.026842 respectively. 2020.loresmt-1.5 @@ -95,7 +95,7 @@ Unsupervised Approach for Zero-Shot Experiments: <fixed-case>B</fixed-case>hojpuri–<fixed-case>H</fixed-case>indi and <fixed-case>M</fixed-case>agahi–<fixed-case>H</fixed-case>indi@<fixed-case>L</fixed-case>o<fixed-case>R</fixed-case>es<fixed-case>MT</fixed-case> 2020 AmitKumar Rajesh KumarMundotiya - Anil KumarSingh + Anil KumarSingh 43–46 This paper reports a Machine Translation (MT) system submitted by the NLPRL team for the Bhojpuri–Hindi and Magahi–Hindi language pairs at LoResMT 2020 shared task. We used an unsupervised domain adaptation approach that gives promising results for zero or extremely low resource languages. Task organizers provide the development and the test sets for evaluation and the monolingual data for training. Our approach is a hybrid approach of domain adaptation and back-translation. Metrics used to evaluate the trained model are BLEU, RIBES, Precision, Recall and F-measure. Our approach gives relatively promising results, with a wide range, of 19.5, 13.71, 2.54, and 3.16 BLEU points for Bhojpuri to Hindi, Magahi to Hindi, Hindi to Bhojpuri and Hindi to Magahi language pairs, respectively. 2020.loresmt-1.6 @@ -115,10 +115,10 @@ Improving Multilingual Neural Machine Translation For Low-Resource Languages: <fixed-case>F</fixed-case>rench, <fixed-case>E</fixed-case>nglish - <fixed-case>V</fixed-case>ietnamese Thi-VinhNgo - Phuong-ThaiNguyen + Phuong-ThaiNguyen Thanh-LeHa Khac-QuyDinh - Le-MinhNguyen + Le-MinhNguyen 55–61 Prior works have demonstrated that a low-resource language pair can benefit from multilingual machine translation (MT) systems, which rely on many language pairs’ joint training. This paper proposes two simple strategies to address the rare word issue in multilingual MT systems for two low-resource language pairs: French-Vietnamese and English-Vietnamese. The first strategy is about dynamical learning word similarity of tokens in the shared space among source languages while another one attempts to augment the translation ability of rare words through updating their embeddings during the training. Besides, we leverage monolingual data for multilingual MT systems to increase the amount of synthetic parallel corpora while dealing with the data sparsity problem. We have shown significant improvements of up to +1.62 and +2.54 BLEU points over the bilingual baseline systems for both language pairs and released our datasets for the research community. 2020.loresmt-1.8 @@ -130,7 +130,7 @@ Sahinur RahmanLaskar Abdullah Faiz Ur RahmanKhilji ParthaPakray - SivajiBandyopadhyay + SivajiBandyopadhyay 62–68 The corpus preparation is one of the important challenging task for the domain of machine translation especially in low resource language scenarios. Country like India where multiple languages exists, machine translation attempts to minimize the communication gap among people with different linguistic backgrounds. Although Google Translation covers automatic translation of various languages all over the world but it lags in some languages including Assamese. In this paper, we have developed EnAsCorp1.0, corpus of English-Assamese low resource pair where parallel and monolingual data are collected from various online sources. We have also implemented baseline systems with statistical machine translation and neural machine translation approaches for the same corpus. 2020.loresmt-1.9 @@ -140,7 +140,7 @@ Unsupervised Neural Machine Translation for <fixed-case>E</fixed-case>nglish and <fixed-case>M</fixed-case>anipuri Salam MichaelSingh - Thoudam DorenSingh + Thoudam DorenSingh 69–78 Availability of bitext dataset has been a key challenge in the conventional machine translation system which requires surplus amount of parallel data. In this work, we devise an unsupervised neural machine translation (UNMT) system consisting of a transformer based shared encoder and language specific decoders using denoising autoencoder and backtranslation with an additional Manipuri side multiple test reference. We report our work on low resource setting for English (en) - Manipuri (mni) language pair and attain a BLEU score of 3.1 for en-mni and 2.7 for mni-en respectively. Subjective evaluation on translated output gives encouraging findings. 2020.loresmt-1.10 diff --git a/data/xml/2020.louhi.xml b/data/xml/2020.louhi.xml index bdca033a9e..cec22fa463 100644 --- a/data/xml/2020.louhi.xml +++ b/data/xml/2020.louhi.xml @@ -4,10 +4,10 @@ Proceedings of the 11th International Workshop on Health Text Mining and Information Analysis EbenHolderness - AntonioJimeno Yepes - AlbertoLavelli + AntonioJimeno Yepes + AlbertoLavelli Anne-LyseMinard - JamesPustejovsky + JamesPustejovsky FabioRinaldi Association for Computational Linguistics
Online
@@ -57,7 +57,7 @@ Not a cute stroke: Analysis of Rule- and Neural Network-based Information Extraction Systems for Brain Radiology Reports AndreasGrivas - BeatriceAlex + BeatriceAlex ClaireGrover RichardTobin WilliamWhiteley @@ -153,7 +153,7 @@ NemanjaVaci QiangLiu HaoNi - GoranNenadic + GoranNenadic AlejoNevado-Holgado 97–103 In this work we addressed the problem of capturing sequential information contained in longitudinal electronic health records (EHRs). Clinical notes, which is a particular type of EHR data, are a rich source of information and practitioners often develop clever solutions how to maximise the sequential information contained in free-texts. We proposed a systematic methodology for learning from chronological events available in clinical notes. The proposed methodological path signature framework creates a non-parametric hierarchical representation of sequential events of any type and can be used as features for downstream statistical learning tasks. The methodology was developed and externally validated using the largest in the UK secondary care mental health EHR data on a specific task of predicting survival risk of patients diagnosed with Alzheimer’s disease. The signature-based model was compared to a common survival random forest model. Our results showed a 15.4% increase of risk prediction AUC at the time point of 20 months after the first admission to a specialist memory clinic and the signature method outperformed the baseline mixed-effects model by 13.2 %. @@ -166,11 +166,11 @@ Defining and Learning Refined Temporal Relations in the Clinical Narrative KristinWright-Bettner ChenLin - TimothyMiller + TimothyMiller StevenBethard DmitriyDligach - MarthaPalmer - James H.Martin + MarthaPalmer + James H.Martin GuerganaSavova 104–114 We present refinements over existing temporal relation annotations in the Electronic Medical Record clinical narrative. We refined the THYME corpus annotations to more faithfully represent nuanced temporality and nuanced temporal-coreferential relations. The main contributions are in re-defining CONTAINS and OVERLAP relations into CONTAINS, CONTAINS-SUBEVENT, OVERLAP and NOTED-ON. We demonstrate that these refinements lead to substantial gains in learnability for state-of-the-art transformer models as compared to previously reported results on the original THYME corpus. We thus establish a baseline for the automatic extraction of these refined temporal relations. Although our study is done on clinical narrative, we believe it addresses far-reaching challenges that are corpus- and domain- agnostic. @@ -230,10 +230,10 @@ Detection of Mental Health from <fixed-case>R</fixed-case>eddit via Deep Contextualized Representations - ZhengpingJiang + ZhengpingJiang Sarah ItaLevitan JonathanZomick - JuliaHirschberg + JuliaHirschberg 147–156 We address the problem of automatic detection of psychiatric disorders from the linguistic content of social media posts. We build a large scale dataset of Reddit posts from users with eight disorders and a control user group. We extract and analyze linguistic characteristics of posts and identify differences between diagnostic groups. We build strong classification models based on deep contextualized word representations and show that they outperform previously applied statistical models with simple linguistic features by large margins. We compare user-level and post-level classification performance, as well as an ensembled multiclass model. 2020.louhi-1.16 diff --git a/data/xml/2020.lr4sshoc.xml b/data/xml/2020.lr4sshoc.xml index 46548b1c90..a0e3a3822c 100644 --- a/data/xml/2020.lr4sshoc.xml +++ b/data/xml/2020.lr4sshoc.xml @@ -3,7 +3,7 @@ Proceedings of the Workshop about Language Resources for the SSH Cloud - DaanBroeder + DaanBroeder MariaEskevich MonicaMonachini European Language Resources Association @@ -56,7 +56,7 @@ MariaPontiki MariaGavriilidou DimitrisGkoumas - SteliosPiperidis + SteliosPiperidis 19–26 We present a replication of a data-driven and linguistically inspired Verbal Aggression analysis framework that was designed to examine Twitter verbal attacks against predefined target groups of interest as an indicator of xenophobic attitudes during the financial crisis in Greece, in particular during the period 2013-2016. The research goal in this paper is to re-examine Verbal Aggression as an indicator of xenophobic attitudes in Greek Twitter three years later, in order to trace possible changes regarding the main targets, the types and the content of the verbal attacks against the same targets in the post crisis era, given also the ongoing refugee crisis and the political landscape in Greece as it was shaped after the elections in 2019. The results indicate an interesting rearrangement of the main targets of the verbal attacks, while the content and the types of the attacks provide valuable insights about the way these targets are being framed as compared to the respective dominant perceptions and stereotypes about them during the period 2013-2016. 2020.lr4sshoc-1.4 @@ -97,7 +97,7 @@ Stretching Disciplinary Boundaries in Language Resource Development and Use: a <fixed-case>L</fixed-case>inguistic <fixed-case>D</fixed-case>ata <fixed-case>C</fixed-case>onsortium Position Paper - ChristopherCieri + ChristopherCieri 39–41 Given the persistent gap between demand and supply, the impetus to reuse language resources is great. Researchers benefit from building upon the work of others including reusing data, tools and methodology. Such reuse should always consider the original intent of the language resource and how that impacts potential reanalysis. When the reuse crosses disciplinary boundaries, the re-user also needs to consider how research standards that differ between social science and humanities on the one hand and human language technologies on the other might lead to differences in unspoken assumptions. Data centers that aim to support multiple research communities have a responsibility to build bridges across disciplinary divides by sharing data in all directions, encouraging re-use and re-sharing and engaging directly in research that improves methodologies. 2020.lr4sshoc-1.8 @@ -106,7 +106,7 @@ Crossing the <fixed-case>SSH</fixed-case> Bridge with Interview Data - Henkvan den Heuvel + Henkvan den Heuvel 42–44 Spoken audio data, such as interview data, is a scientific instrument used by researchers in various disciplines crossing the boundaries of social sciences and humanities. In this paper, we will have a closer look at a portal designed to perform speech-to-text conversion on audio recordings through Automatic Speech Recognition (ASR) in the CLARIN infrastructure. Within the cluster cross-domain EU project SSHOC the potential value of such a linguistic tool kit for processing spoken language recording has found uptake in a webinar about the topic, and in a task addressing audio analysis of panel survey data. The objective of this contribution is to show that the processing of interviews as a research instrument has opened up a fascinating and fruitful area of collaboration between Social Sciences and Humanities (SSH). 2020.lr4sshoc-1.9 diff --git a/data/xml/2020.lrec.xml b/data/xml/2020.lrec.xml index 8a7fc4d713..02afc075dc 100644 --- a/data/xml/2020.lrec.xml +++ b/data/xml/2020.lrec.xml @@ -3,20 +3,20 @@ Proceedings of the Twelfth Language Resources and Evaluation Conference - NicolettaCalzolari - FrédéricBéchet + NicolettaCalzolari + FrédéricBéchet PhilippeBlache - KhalidChoukri - ChristopherCieri + KhalidChoukri + ChristopherCieri ThierryDeclerck SaraGoggi HitoshiIsahara - BenteMaegaard - JosephMariani + BenteMaegaard + JosephMariani HélèneMazo - AsuncionMoreno - JanOdijk - SteliosPiperidis + AsuncionMoreno + JanOdijk + SteliosPiperidis European Language Resources Association
Marseille, France
May @@ -32,7 +32,7 @@ Neural Mention Detection JuntaoYu BerndBohnet - MassimoPoesio + MassimoPoesio 1–10 Mention detection is an important preprocessing step for annotation and interpretation in applications such as NER and coreference resolution, but few stand-alone neural models have been proposed able to handle the full range of mentions. In this work, we propose and compare three neural network-based approaches to mention detection. The first approach is based on the mention detection part of a state of the art coreference resolution system; the second uses ELMO embeddings together with a bidirectional LSTM and a biaffine classifier; the third approach uses the recently introduced BERT model. Our best model (using a biaffine classifier) achieves gains of up to 1.8 percentage points on mention recall when compared with a strong baseline in a HIGH RECALL coreference annotation setting. The same model achieves improvements of up to 5.3 and 6.2 p.p. when compared with the best-reported mention detection F1 on the CONLL and CRAC coreference data sets respectively in a HIGH F1 annotation setting. We then evaluate our models for coreference resolution by using mentions predicted by our best model in start-of-the-art coreference systems. The enhanced model achieved absolute improvements of up to 1.7 and 0.7 p.p. when compared with our strong baseline systems (pipeline system and end-to-end system) respectively. For nested NER, the evaluation of our model on the GENIA corpora shows that our model matches or outperforms state-of-the-art models despite not being specifically designed for this task. 2020.lrec-1.1 @@ -43,7 +43,7 @@ A Cluster Ranking Model for Full Anaphora Resolution JuntaoYu AlexandraUma - MassimoPoesio + MassimoPoesio 11–20 Anaphora resolution (coreference) systems designed for the CONLL 2012 dataset typically cannot handle key aspects of the full anaphora resolution task such as the identification of singletons and of certain types of non-referring expressions (e.g., expletives), as these aspects are not annotated in that corpus. However, the recently released dataset for the CRAC 2018 Shared Task can now be used for that purpose. In this paper, we introduce an architecture to simultaneously identify non-referring expressions (including expletives, predicative s, and other types) and build coreference chains, including singletons. Our cluster-ranking system uses an attention mechanism to determine the relative importance of the mentions in the same cluster. Additional classifiers are used to identify singletons and non-referring markables. Our contributions are as follows. First all, we report the first result on the CRAC data using system mentions; our result is 5.8% better than the shared task baseline system, which used gold mentions. Second, we demonstrate that the availability of singleton clusters and non-referring expressions can lead to substantially improved performance on non-singleton clusters as well. Third, we show that despite our model not being designed specifically for the CONLL data, it achieves a score equivalent to that of the state-of-the-art system by Kantor and Globerson (2019) on that dataset. 2020.lrec-1.2 @@ -74,7 +74,7 @@ <fixed-case>N</fixed-case>o<fixed-case>E</fixed-case>l: An Annotated Corpus for Noun Ellipsis in <fixed-case>E</fixed-case>nglish PayalKhullar KushalMajmundar - ManishShrivastava + ManishShrivastava 34–43 Ellipsis resolution has been identified as an important step to improve the accuracy of mainstream Natural Language Processing (NLP) tasks such as information retrieval, event extraction, dialog systems, etc. Previous computational work on ellipsis resolution has focused on one type of ellipsis, namely Verb Phrase Ellipsis (VPE) and a few other related phenomenon. We extend the study of ellipsis by presenting the No(oun)El(lipsis) corpus - an annotated corpus for noun ellipsis and closely related phenomenon using the first hundred movies of Cornell Movie Dialogs Dataset. The annotations are carried out in a standoff annotation scheme that encodes the position of the licensor, the antecedent boundary, and Part-of-Speech (POS) tags of the licensor and antecedent modifier. Our corpus has 946 instances of exophoric and endophoric noun ellipsis, making it the biggest resource of noun ellipsis in English, to the best of our knowledge. We present a statistical study of our corpus with novel insights on the distribution of noun ellipsis, its licensors and antecedents. Finally, we perform the tasks of detection and resolution of noun ellipsis with different classifiers trained on our corpus and report baseline results. 2020.lrec-1.5 @@ -106,7 +106,7 @@ A Study on Entity Resolution for Email Conversations Parag PravinDakle TakshakDesai - DanMoldovan + DanMoldovan 65–73 This paper investigates the problem of entity resolution for email conversations and presents a seed annotated corpus of email threads labeled with entity coreference chains. Characteristics of email threads concerning reference resolution are first discussed, and then the creation of the corpus and annotation steps are explained. Finally, performance of the current state-of-the-art deep learning models on the seed corpus is evaluated and qualitative error analysis on the predictions obtained is presented. 2020.lrec-1.8 @@ -116,7 +116,7 @@ Model-based Annotation of Coreference RahulAralikatte - AndersSøgaard + AndersSøgaard 74–79 Humans do not make inferences over texts, but over models of what texts are about. When annotators are asked to annotate coreferent spans of text, it is therefore a somewhat unnatural task. This paper presents an alternative in which we preprocess documents, linking entities to a knowledge base, and turn the coreference annotation task – in our case limited to pronouns – into an annotation task where annotators are asked to assign pronouns to entities. Model-based annotation is shown to lead to faster annotation and higher inter-annotator agreement, and we argue that it also opens up an alternative approach to coreference resolution. We present two new coreference benchmark datasets, for English Wikipedia and English teacher-student dialogues, and evaluate state-of-the-art coreference resolvers on them. 2020.lrec-1.9 @@ -128,7 +128,7 @@ RodrigoWilkens BrunoOberle FrédéricLandragin - AmaliaTodirascu + AmaliaTodirascu 80–89 Coreference resolution aims at identifying and grouping all mentions referring to the same entity. In French, most systems run different setups, making their comparison difficult. In this paper, we present an extensive comparison of several coreference resolution systems for French. The systems have been trained on two corpora (ANCOR for spoken language and Democrat for written language) annotated with coreference chains, and augmented with syntactic and semantic information. The models are compared with different configurations (e.g. with and without singletons). In addition, we evaluate mention detection and coreference resolution apart. We present a full-stack model that outperforms other approaches. This model allows us to study the impact of mention detection errors on coreference resolution. Our analysis shows that mention detection can be improved by focusing on boundary identification while advances in the pronoun-noun relation detection can help the coreference task. Another contribution of this work is the first end-to-end neural French coreference resolution model trained on Democrat (written texts), which compares to the state-of-the-art systems for oral French. 2020.lrec-1.10 @@ -138,7 +138,7 @@ Cross-lingual Zero Pronoun Resolution AbdulrahmanAloraini - MassimoPoesio + MassimoPoesio 90–98 In languages like Arabic, Chinese, Italian, Japanese, Korean, Portuguese, Spanish, and many others, predicate arguments in certain syntactic positions are not realized instead of being realized as overt pronouns, and are thus called zero- or null-pronouns. Identifying and resolving such omitted arguments is crucial to machine translation, information extraction and other NLP tasks, but depends heavily on semantic coherence and lexical relationships. We propose a BERT-based cross-lingual model for zero pronoun resolution, and evaluate it on the Arabic and Chinese portions of OntoNotes 5.0. As far as we know, ours is the first neural model of zero-pronoun resolution for Arabic; and our model also outperforms the state-of-the-art for Chinese. In the paper we also evaluate BERT feature extraction and fine-tune models on the task, and compare them with our model. We also report on an investigation of BERT layers indicating which layer encodes the most suitable representation for the task. 2020.lrec-1.11 @@ -149,7 +149,7 @@ Exploiting Cross-Lingual Hints to Discover Event Pronouns SharidLoáiciga ChristianHardmeier - AsadSayeed + AsadSayeed 99–103 Non-nominal co-reference is much less studied than nominal coreference, partly because of the lack of annotated corpora. We explore the possibility to exploit parallel multilingual corpora as a means of cheap supervision for the classification of three different readings of the English pronoun ‘it’: entity, event or pleonastic, from their translation in several languages. We found that the ‘event’ reading is not very frequent, but can be easily predicted provided that the construction used to translate the ‘it’ example is a pronoun as well. These cases, nevertheless, are not enough to generalize to other types of non-nominal reference. 2020.lrec-1.12 @@ -173,7 +173,7 @@ YunfeiLong MingyuWan JinghangGu - QinLu + QinLu Chu-RenHuang 112–119 Deep neural network models have played a critical role in sentiment analysis with promising results in the recent decade. One of the essential challenges, however, is how external sentiment knowledge can be effectively utilized. In this work, we propose a novel affection-driven approach to incorporating affective knowledge into neural network models. The affective knowledge is obtained in the form of a lexicon under the Affect Control Theory (ACT), which is represented by vectors of three-dimensional attributes in Evaluation, Potency, and Activity (EPA). The EPA vectors are mapped to an affective influence value and then integrated into Long Short-term Memory (LSTM) models to highlight affective terms. Experimental results show a consistent improvement of our approach over conventional LSTM models by 1.0% to 1.5% in accuracy on three large benchmark datasets. Evaluations across a variety of algorithms have also proven the effectiveness of leveraging affective terms for deep model enhancement. @@ -187,7 +187,7 @@ JonathanBrennan Wen-MingLuh BertaFranzluebbers - JohnHale + JohnHale 120–125 The Alice Datasets are a set of datasets based on magnetic resonance data and electrophysiological data, collected while participants heard a story in English. Along with the datasets and the text of the story, we provide a variety of different linguistic and computational measures ranging from prosodic predictors to predictors capturing hierarchical syntactic information. These ecologically valid datasets can be easily reused to replicate prior work and to test new hypotheses about natural language comprehension in the brain. 2020.lrec-1.15 @@ -209,7 +209,7 @@ Cortical Speech Databases For Deciphering the Articulatory Code - HaraldHöge + HaraldHöge 133–137 The paper relates to following ‘AC-hypotheses’: The articulatory code (AC) is a neural code exchanging multi-item messages between the short-term memory and cortical areas as the vSMC and STG. In these areas already neurons active in the presence of articulatory features have been measured. The AC codes the content of speech segmented in chunks and is the same for both modalities - speech perception and speech production. Each AC-message is related to a syllable. The items of each message relate to coordinated articulatory gestures composing the syllable. The mechanism to transport the AC and to segment the auditory signal is based on Ɵ/γ-oscillations, where a Ɵ-cycle has the duration of a Ɵ-syllable. The paper describes the findings from neuroscience, phonetics and the science of evolution leading to the AC-hypotheses. The paper proposes to verify the AC-hypotheses by measuring the activity of all ensembles of neurons coding and decoding the AC. Due to state of the art, the cortical measurements to be prepared, done and further processed need a high effort from scientists active in different areas. We propose to launch a project to produce cortical speech databases with cortical recordings synchronized with the speech signal allowing to decipher the articulatory code. 2020.lrec-1.17 @@ -243,7 +243,7 @@ The <fixed-case>ACQDIV</fixed-case> Corpus Database and Aggregation Pipeline AnnaJancso - StevenMoran + StevenMoran SabineStoll 156–165 We present the ACQDIV corpus database and aggregation pipeline, a tool developed as part of the European Research Council (ERC) funded project ACQDIV, which aims to identify the universal cognitive processes that allow children to acquire any language. The corpus database represents 15 corpora from 14 typologically maximally diverse languages. Here we give an overview of the project, database, and our extensible software package for adding more corpora to the current language sample. Lastly, we discuss how we use the corpus database to mine for universal patterns in child language acquisition corpora and we describe avenues for future research. @@ -269,7 +269,7 @@ Orthographic Codes and the Neighborhood Effect: Lessons from Information Theory StéphanTulkens DominiekSandra - WalterDaelemans + WalterDaelemans 172–181 We consider the orthographic neighborhood effect: the effect that words with more orthographic similarity to other words are read faster. The neighborhood effect serves as an important control variable in psycholinguistic studies of word reading, and explains variance in addition to word length and word frequency. Following previous work, we model the neighborhood effect as the average distance to neighbors in feature space for three feature sets: slots, character ngrams and skipgrams. We optimize each of these feature sets and find evidence for language-independent optima, across five megastudy corpora from five alphabetic languages. Additionally, we show that weighting features using the inverse of mutual information (MI) improves the neighborhood effect significantly for all languages. We analyze the inverse feature weighting, and show that, across languages, grammatical morphemes get the lowest weights. Finally, we perform the same experiments on Korean Hangul, a non-alphabetic writing system, where we find the opposite results: slower responses as a function of denser neighborhoods, and a negative effect of inverse feature weighting. This raises the question of whether this is a cognitive effect, or an effect of the way we represent Hangul orthography, and indicates more research is needed. 2020.lrec-1.22 @@ -334,8 +334,8 @@ KijongHan KuntaeKim SoojiYoon - Eun-kyungKim - Key-SunChoi + Eun-kyungKim + Key-SunChoi 212–219 Information extraction from unstructured texts plays a vital role in the field of natural language processing. Although there has been extensive research into each information extraction task (i.e., entity linking, coreference resolution, and relation extraction), data are not available for a continuous and coherent evaluation of all information extraction tasks in a comprehensive framework. Given that each task is performed and evaluated with a different dataset, analyzing the effect of the previous task on the next task with a single dataset throughout the information extraction process is impossible. This paper aims to propose a Korean information extraction initiative point and promote research in this field by presenting crowdsourcing data collected for four information extraction tasks from the same corpus and the training and evaluation results for each task of a state-of-the-art model. These machine learning data for Korean information extraction are the first of their kind, and there are plans to continuously increase the data volume. The test results will serve as an initiative result for each Korean information extraction task and are expected to serve as a comparison target for various studies on Korean information extraction using the data collected in this study. 2020.lrec-1.27 @@ -366,13 +366,13 @@ Crowdsourcing in the Development of a Multilingual <fixed-case>F</fixed-case>rame<fixed-case>N</fixed-case>et: A Case Study of <fixed-case>K</fixed-case>orean <fixed-case>F</fixed-case>rame<fixed-case>N</fixed-case>et - YounggyunHahm + YounggyunHahm YoungbinNoh Ji YoonHan Tae HwanOh HyonsuChoe HansaemKim - Key-SunChoi + Key-SunChoi 236–244 Using current methods, the construction of multilingual resources in FrameNet is an expensive and complex task. While crowdsourcing is a viable alternative, it is difficult to include non-native English speakers in such efforts as they often have difficulty with English-based FrameNet tools. In this work, we investigated cross-lingual issues in crowdsourcing approaches for multilingual FrameNets, specifically in the context of the newly constructed Korean FrameNet. To accomplish this, we evaluated the effectiveness of various crowdsourcing settings whereby certain types of information are provided to workers, such as English definitions in FrameNet or translated definitions. We then evaluated whether the crowdsourced results accurately captured the meaning of frames both cross-culturally and cross-linguistically, and found that by allowing the crowd workers to make intuitive choices, they achieved a quality comparable to that of trained FrameNet experts (F1 > 0.75). The outcomes of this work are now publicly available as a new release of Korean FrameNet 1.1. 2020.lrec-1.30 @@ -402,7 +402,7 @@ Do You Believe It Happened? Assessing <fixed-case>C</fixed-case>hinese Readers’ Veridicality Judgments Yu-YunChang - Shu-KaiHsieh + Shu-KaiHsieh 259–267 This work collects and studies Chinese readers’ veridicality judgments to news events (whether an event is viewed as happening or not). For instance, in “The FBI alleged in court documents that Zazi had admitted having a handwritten recipe for explosives on his computer”, do people believe that Zazi had a handwritten recipe for explosives? The goal is to observe the pragmatic behaviors of linguistic features under context which affects readers in making veridicality judgments. Exploring from the datasets, it is found that features such as event-selecting predicates (ESP), modality markers, adverbs, temporal information, and statistics have an impact on readers’ veridicality judgments. We further investigated that modality markers with high certainty do not necessarily trigger readers to have high confidence in believing an event happened. Additionally, the source of information introduced by an ESP presents low effects to veridicality judgments, even when an event is attributed to an authority (e.g. “The FBI”). A corpus annotated with Chinese readers’ veridicality judgments is released as the Chinese PragBank for further analysis. 2020.lrec-1.33 @@ -414,7 +414,7 @@ LionelNicolas VerenaLyding ClaudiaBorg - CorinaForascu + CorinaForascu KarënFort KaterinaZdravkova IztokKosem @@ -428,7 +428,7 @@ AnisiaKatinskaia AnabelaBarreiro LaviniaAparaschivei - YaakovHaCohen-Kerner + YaakovHaCohen-Kerner 268–278 We introduce in this paper a generic approach to combine implicit crowdsourcing and language learning in order to mass-produce language resources (LRs) for any language for which a crowd of language learners can be involved. We present the approach by explaining its core paradigm that consists in pairing specific types of LRs with specific exercises, by detailing both its strengths and challenges, and by discussing how much these challenges have been addressed at present. Accordingly, we also report on on-going proof-of-concept efforts aiming at developing the first prototypical implementation of the approach in order to correct and extend an LR called ConceptNet based on the input crowdsourced from language learners. We then present an international network called the European Network for Combining Language Learning with Crowdsourcing Techniques (enetCollect) that provides the context to accelerate the implementation of this generic approach. Finally, we exemplify how it can be used in several language learning scenarios to produce a multitude of NLP resources and how it can therefore alleviate the long-standing NLP issue of the lack of LRs. 2020.lrec-1.34 @@ -451,7 +451,7 @@ Francisco JavierChiyah Garcia JoséLopes XingkunLiu - HelenHastie + HelenHastie 288–297 Large corpora of task-based and open-domain conversational dialogues are hugely valuable in the field of data-driven dialogue systems. Crowdsourcing platforms, such as Amazon Mechanical Turk, have been an effective method for collecting such large amounts of data. However, difficulties arise when task-based dialogues require expert domain knowledge or rapid access to domain-relevant information, such as databases for tourism. This will become even more prevalent as dialogue systems become increasingly ambitious, expanding into tasks with high levels of complexity that require collaboration and forward planning, such as in our domain of emergency response. In this paper, we propose CRWIZ: a framework for collecting real-time Wizard of Oz dialogues through crowdsourcing for collaborative, complex tasks. This framework uses semi-guided dialogue to avoid interactions that breach procedures and processes only known to experts, while enabling the capture of a wide variety of interactions. 2020.lrec-1.36 @@ -508,13 +508,13 @@ MatthiasButterweck CathyChua CatiaCucchiarin - GülşenEryiğit + GülşenEryiğit JohannaGerlach HaniehHabibi NeasaNí Chiaráin - MannyRayner + MannyRayner SteinþórSteingrímsson - HelmerStrik + HelmerStrik 323–331 LARA (Learning and Reading Assistant) is an open source platform whose purpose is to support easy conversion of plain texts into multimodal online versions suitable for use by language learners. This involves semi-automatically tagging the text, adding other annotations and recording audio. The platform is suitable for creating texts in multiple languages via crowdsourcing techniques that can be used for teaching a language via reading and listening. We present results of initial experiments by various collaborators where we measure the time required to produce substantial LARA resources, up to the length of short novels, in Dutch, English, Farsi, French, German, Icelandic, Irish, Swedish and Turkish. The first results are encouraging. Although there are some startup problems, the conversion task seems manageable for the languages tested so far. The resulting enriched texts are posted online and are freely available in both source and compiled form. 2020.lrec-1.40 @@ -524,9 +524,9 @@ A Dataset for Investigating the Impact of Feedback on Student Revision Outcome IldikoPilan - JohnLee + JohnLee Chak YanYeung - JonathanWebster + JonathanWebster 332–339 We present an annotation scheme and a dataset of teacher feedback provided for texts written by non-native speakers of English. The dataset consists of student-written sentences in their original and revised versions with teacher feedback provided for the errors. Feedback appears both in the form of open-ended comments and error category tags. We focus on a specific error type, namely linking adverbial (e.g. however, moreover) errors. The dataset has been annotated for two aspects: (i) revision outcome establishing whether the re-written student sentence was correct and (ii) directness, indicating whether teachers provided explicitly the correction in their feedback. This dataset allows for studies around the characteristics of teacher feedback and how these influence students’ revision outcome. We describe the data preparation process and we present initial statistical investigations regarding the effect of different feedback characteristics on revision outcome. These show that open-ended comments and mitigating expressions appear in a higher proportion of successful revisions than unsuccessful ones, while directness and metalinguistic terms have no effect. Given that the use of this type of data is relatively unexplored in natural language processing (NLP) applications, we also report some observations and challenges when working with feedback data. 2020.lrec-1.41 @@ -574,8 +574,8 @@ A Process-oriented Dataset of Revisions during Writing RianneConijn EmilyDux Speltz - Mennovan Zaanen - LuukVan Waes + Mennovan Zaanen + LuukVan Waes EvgenyChukharev-Hudilainen 363–368 Revision plays a major role in writing and the analysis of writing processes. Revisions can be analyzed using a product-oriented approach (focusing on a finished product, the text that has been produced) or a process-oriented approach (focusing on the process that the writer followed to generate this product). Although several language resources exist for the product-oriented approach to revisions, there are hardly any resources available yet for an in-depth analysis of the process of revisions. Therefore, we provide an extensive dataset on revisions made during writing (accessible via https://hdl.handle.net/10411/VBDYGX). This dataset is based on keystroke data and eye tracking data of 65 students from a variety of backgrounds (undergraduate and graduate English as a first language and English as a second language students) and a variety of tasks (argumentative text and academic abstract). In total, 7,120 revisions were identified in the dataset. For each revision, 18 features have been manually annotated and 31 features have been automatically extracted. As a case study, we show two potential use cases of the dataset. In addition, future uses of the dataset are described. @@ -600,7 +600,7 @@ <fixed-case>TLT</fixed-case>-school: a Corpus of Non Native Children Speech RobertoGretter - MarcoMatassoni + MarcoMatassoni StefanoBannò FalavignaDaniele 378–385 @@ -624,7 +624,7 @@ Quality Focused Approach to a Learner Corpus Development RobertsDarģis IlzeAuziņa - KristīneLevāne-Petrova + KristīneLevāne-Petrova IngaKaija 392–396 The paper presents quality focused approach to a learner corpus development. The methodology was developed with multiple design considerations put in place to make the annotation process easier and at the same time reduce the amount of mistakes that could be introduced due to inconsistent text correction or carelessness. The approach suggested in this paper consists of multiple parts: comparison of digitized texts by several annotators, text correction, automated morphological analysis, and manual review of annotations. The described approach is used to create Latvian Language Learner corpus (LaVA) which is part of a currently ongoing project Development of Learner corpus of Latvian: methods, tools and applications. @@ -634,7 +634,7 @@ An Exploratory Study into Automated Précis Grading - OrpheeDe Clercq + OrpheeDe Clercq SenneVan Hoecke 397–404 Automated writing evaluation is a popular research field, but the main focus has been on evaluating argumentative essays. In this paper, we consider a different genre, namely précis texts. A précis is a written text that provides a coherent summary of main points of a spoken or written text. We present a corpus of English précis texts which all received a grade assigned by a highly-experienced English language teacher and were subsequently annotated following an exhaustive error typology. With this corpus we trained a machine learning model which relies on a number of linguistic, automatic summarization and AWE features. Our results reveal that this model is able to predict the grade of précis texts with only a moderate error margin. @@ -645,8 +645,8 @@ Adjusting Image Attributes of Localized Regions with Low-level Dialogue Tzu-HsiangLin - AlexanderRudnicky - TrungBui + AlexanderRudnicky + TrungBui Doo SoonKim JeanOh 405–412 @@ -658,7 +658,7 @@ Alignment Annotation for Clinic Visit Dialogue to Clinical Note Sentence Language Generation Wen-waiYim - MelihaYetisgen + MelihaYetisgen JennyHuang MicahGrossman 413–421 @@ -678,7 +678,7 @@ AdarshKumar AnujGoyal PeterKu - DilekHakkani-Tur + DilekHakkani-Tur 422–428 MultiWOZ 2.0 (Budzianowski et al., 2018) is a recently released multi-domain dialogue dataset spanning 7 distinct domains and containing over 10,000 dialogues. Though immensely useful and one of the largest resources of its kind to-date, MultiWOZ 2.0 has a few shortcomings. Firstly, there are substantial noise in the dialogue state annotations and dialogue utterances which negatively impact the performance of state-tracking models. Secondly, follow-up work (Lee et al., 2019) has augmented the original dataset with user dialogue acts. This leads to multiple co-existent versions of the same dataset with minor modifications. In this work we tackle the aforementioned issues by introducing MultiWOZ 2.1. To fix the noisy state annotations, we use crowdsourced workers to re-annotate state and utterances based on the original utterances in the dataset. This correction process results in changes to over 32% of state annotations across 40% of the dialogue turns. In addition, we fix 146 dialogue utterances by canonicalizing slot values in the utterances to the values in the dataset ontology. To address the second problem, we combined the contributions of the follow-up works into MultiWOZ 2.1. Hence, our dataset also includes user dialogue acts as well as multiple slot descriptions per dialogue state slot. We then benchmark a number of state-of-the-art dialogue state tracking models on the MultiWOZ 2.1 dataset and show the joint state tracking performance on the corrected state annotations. We are publicly releasing MultiWOZ 2.1 to the community, hoping that this dataset resource will allow for more effective models across various dialogue subproblems to be built in the future. 2020.lrec-1.53 @@ -702,8 +702,8 @@ ArantxaOtegi AitorAgirre Jon AnderCampos - AitorSoroa - EnekoAgirre + AitorSoroa + EnekoAgirre 436–442 Conversational Question Answering (CQA) systems meet user information needs by having conversations with them, where answers to the questions are retrieved from text. There exist a variety of datasets for English, with tens of thousands of training examples, and pre-trained language models have allowed to obtain impressive results. The goal of our research is to test the performance of CQA systems under low-resource conditions which are common for most non-English languages: small amounts of native annotations and other limitations linked to low resource languages, like lack of crowdworkers or smaller wikipedias. We focus on the Basque language, and present the first non-English CQA dataset and results. Our experiments show that it is possible to obtain good results with low amounts of native data thanks to cross-lingual transfer, with quality comparable to those obtained for English. We also discovered that dialogue history models are not directly transferable to another language, calling for further research. The dataset is publicly available. 2020.lrec-1.55 @@ -726,12 +726,12 @@ <fixed-case>BLISS</fixed-case>: An Agent for Collecting Spoken Dialogue Data about Health and Well-being Jeltevan Waterschoot IrisHendrickx - ArifKhan - EstherKlabbers + ArifKhan + EstherKlabbers Marcelde Korte - HelmerStrik - CatiaCucchiarini - MariëtTheune + HelmerStrik + CatiaCucchiarini + MariëtTheune 449–458 An important objective in health-technology is the ability to gather information about people’s well-being. Structured interviews can be used to obtain this information, but are time-consuming and not scalable. Questionnaires provide an alternative way to extract such information, though typically lack depth. In this paper, we present our first prototype of the BLISS agent, an artificial intelligent agent which intends to automatically discover what makes people happy and healthy. The goal of Behaviour-based Language-Interactive Speaking Systems (BLISS) is to understand the motivations behind people’s happiness by conducting a personalized spoken dialogue based on a happiness model. We built our first prototype of the model to collect 55 spoken dialogues, in which the BLISS agent asked questions to users about their happiness and well-being. Apart from a description of the BLISS architecture, we also provide details about our dataset, which contains over 120 activities and 100 motivations and is made available for usage. 2020.lrec-1.57 @@ -852,8 +852,8 @@ Mapping the Dialog Act Annotations of the <fixed-case>LEGO</fixed-case> Corpus into <fixed-case>ISO</fixed-case> 24617-2 Communicative Functions EugénioRibeiro - RicardoRibeiro - DavidMartins de Matos + RicardoRibeiro + DavidMartins de Matos 531–539 ISO 24617-2, the ISO standard for dialog act annotation, sets the ground for more comparable research in the area. However, the amount of data annotated according to it is still reduced, which impairs the development of approaches for automatic recognition. In this paper, we describe a mapping of the original dialog act labels of the LEGO corpus, which have been neglected, into the communicative functions of the standard. Although this does not lead to a complete annotation according to the standard, the 347 dialogs provide a relevant amount of data that can be used in the development of automatic communicative function recognition approaches, which may lead to a wider adoption of the standard. Using the 17 English dialogs of the DialogBank as gold standard, our preliminary experiments have shown that including the mapped dialogs during the training phase leads to improved performance while recognizing communicative functions in the Task dimension. 2020.lrec-1.67 @@ -875,13 +875,13 @@ The <fixed-case>ISO</fixed-case> Standard for Dialogue Act Annotation, Second Edition - HarryBunt + HarryBunt VolhaPetukhova EmerGilmartin CatherinePelachaud AlexFang SimonKeizer - LaurentPrévot + LaurentPrévot 549–558 ISO standard 24617-2 for dialogue act annotation, established in 2012, has in the past few years been used both in corpus annotation and in the design of components for spoken and multimodal dialogue systems. This has brought some inaccuracies and undesirbale limitations of the standard to light, which are addressed in a proposed second edition. This second edition allows a more accurate annotation of dependence relations and rhetorical relations in dialogue. Following the ISO 24617-4 principles of semantic annotation, and borrowing ideas from EmotionML, a triple-layered plug-in mechanism is introduced which allows dialogue act descriptions to be enriched with information about their semantic content, about accompanying emotions, and other information, and allows the annotation scheme to be customised by adding application-specific dialogue act types. 2020.lrec-1.69 @@ -890,7 +890,7 @@ The <fixed-case>AICO</fixed-case> Multimodal Corpus – Data Collection and Preliminary Analyses - KristiinaJokinen + KristiinaJokinen 559–564 This paper describes data collection and the first explorative research on the AICO Multimodal Corpus. The corpus contains eye-gaze, Kinect, and video recordings of human-robot and human-human interactions, and was collected to study cooperation, engagement and attention of human participants in task-based as well as in chatty type interactive situations. In particular, the goal was to enable comparison between human-human and human-robot interactions, besides studying multimodal behaviour and attention in the different dialogue activities. The robot partner was a humanoid Nao robot, and it was expected that its agent-like behaviour would render humanrobot interactions similar to human-human interaction but also high-light important differences due to the robot’s limited conversational capabilities. The paper reports on the preliminary studies on the corpus, concerning the participants’ eye-gaze and gesturing behaviours,which were chosen as objective measures to study differences in their multimodal behaviour patterns with a human and a robot partner. 2020.lrec-1.70 @@ -950,7 +950,7 @@ <fixed-case>RDG</fixed-case>-Map: A Multimodal Corpus of Pedagogical Human-Agent Spoken Interactions. MaikePaetzel DeepthiKarkada - RameshManuvinakurike + RameshManuvinakurike 600–609 This paper presents a multimodal corpus of 209 spoken game dialogues between a human and a remote-controlled artificial agent. The interactions involve people collaborating with the agent to identify countries on the world map as quickly as possible, which allows studying rapid and spontaneous dialogue with complex anaphoras, disfluent utterances and incorrect descriptions. The corpus consists of two parts: 8 hours of game interactions have been collected with a virtual unembodied agent online and 26.8 hours have been recorded with a physically embodied robot in a research lab. In addition to spoken audio recordings available for both parts, camera recordings and skeleton-, facial expression- and eye-gaze tracking data have been collected for the lab-based part of the corpus. In this paper, we introduce the pedagogical reference resolution game (RDG-Map) and the characteristics of the corpus collected. We also present an annotation scheme we developed in order to study the dialogue strategies utilized by the players. Based on a subset of 330 minutes of interactions annotated so far, we discuss initial insights into these strategies as well as the potential of the corpus for future research. 2020.lrec-1.75 @@ -1052,7 +1052,7 @@ BirgitRauchbauer YoussefHmamouche BrigitteBigi - LaurentPrévot + LaurentPrévot MagalieOchs ThierryChaminade 668–675 @@ -1077,15 +1077,15 @@ Dialogue-<fixed-case>AMR</fixed-case>: <fixed-case>A</fixed-case>bstract <fixed-case>M</fixed-case>eaning <fixed-case>R</fixed-case>epresentation for Dialogue - ClaireBonial + ClaireBonial LuciaDonatelli MitchellAbrams - Stephanie M.Lukin + Stephanie M.Lukin StephenTratz MatthewMarge RonArtstein - DavidTraum - ClareVoss + DavidTraum + ClareVoss 684–695 This paper describes a schema that enriches Abstract Meaning Representation (AMR) in order to provide a semantic representation for facilitating Natural Language Understanding (NLU) in dialogue systems. AMR offers a valuable level of abstraction of the propositional content of an utterance; however, it does not capture the illocutionary force or speaker’s intended contribution in the broader dialogue context (e.g., make a request or ask a question), nor does it capture tense or aspect. We explore dialogue in the domain of human-robot interaction, where a conversational robot is engaged in search and navigation tasks with a human partner. To address the limitations of standard AMR, we develop an inventory of speech acts suitable for our domain, and present “Dialogue-AMR”, an enhanced AMR that represents not only the content of an utterance, but the illocutionary force behind it, as well as tense and aspect. To showcase the coverage of the schema, we use both manual and automatic methods to construct the “DialAMR” corpus—a corpus of human-robot dialogue annotated with standard AMR and our enriched Dialogue-AMR schema. Our automated methods can be used to incorporate AMR into a larger NLU pipeline supporting human-robot dialogue. 2020.lrec-1.86 @@ -1118,7 +1118,7 @@ <fixed-case>B</fixed-case>rain<fixed-case>P</fixed-case>redict: a Tool for Predicting and Visualising Local Brain Activity YoussefHmamouche - LaurentPrévot + LaurentPrévot MagalieOchs ThierryChaminade 710–716 @@ -1144,7 +1144,7 @@ KallirroiGeorgila CarlaGordon VolodymyrYanov - DavidTraum + DavidTraum 726–734 We collected a corpus of dialogues in a Wizard of Oz (WOz) setting in the Internet of Things (IoT) domain. We asked users participating in these dialogues to rate the system on a number of aspects, namely, intelligence, naturalness, personality, friendliness, their enjoyment, overall quality, and whether they would recommend the system to others. Then we asked dialogue observers, i.e., Amazon Mechanical Turkers (MTurkers), to rate these dialogues on the same aspects. We also generated simulated dialogues between dialogue policies and simulated users and asked MTurkers to rate them again on the same aspects. Using linear regression, we developed dialogue evaluation functions based on features from the simulated dialogues and the MTurkers’ ratings, the WOz dialogues and the MTurkers’ ratings, and the WOz dialogues and the WOz participants’ ratings. We applied all these dialogue evaluation functions to a held-out portion of our WOz dialogues, and we report results on the predictive power of these different types of dialogue evaluation functions. Our results suggest that for three conversational aspects (intelligence, naturalness, overall quality) just training evaluation functions on simulated data could be sufficient. 2020.lrec-1.91 @@ -1155,7 +1155,7 @@ Which Model Should We Use for a Real-World Conversational Dialogue System? a Cross-Language Relevance Model or a Deep Neural Net? Seyed HosseinAlavi AntonLeuski - DavidTraum + DavidTraum 735–742 We compare two models for corpus-based selection of dialogue responses: one based on cross-language relevance with a cross-language LSTM model. Each model is tested on multiple corpora, collected from two different types of dialogue source material. Results show that while the LSTM model performs adequately on a very large corpus (millions of utterances), its performance is dominated by the cross-language relevance model for a more moderate-sized corpus (ten thousands of utterances). 2020.lrec-1.92 @@ -1189,7 +1189,7 @@ An Annotation Approach for Social and Referential Gaze in Dialogue VidyaSomashekarappa ChristineHowes - AsadSayeed + AsadSayeed 759–765 This paper introduces an approach for annotating eye gaze considering both its social and the referential functions in multi-modal human-human dialogue. Detecting and interpreting the temporal patterns of gaze behavior cues is natural for humans and also mostly an unconscious process. However, these cues are difficult for conversational agents such as robots or avatars to process or generate. The key factor is to recognize these variants and carry out a successful conversation, as misinterpretation can lead to total failure of the given interaction. This paper introduces an annotation scheme for eye-gaze in human-human dyadic interactions that is intended to facilitate the learning of eye-gaze patterns in multi-modal natural dialogue. 2020.lrec-1.95 @@ -1211,7 +1211,7 @@ Books of Hours. the First Liturgical Data Set for Text Segmentation. AmirHazem - BeatriceDaille + BeatriceDaille ChristopherKermorvant DominiqueStutzmann Marie-LaurenceBonhomme @@ -1271,7 +1271,7 @@ Automatic Section Recognition in Obituaries ValentinoSabbatino - Laura Ana MariaBostan + Laura Ana MariaBostan RomanKlinger 817–825 Obituaries contain information about people’s values across times and cultures, which makes them a useful resource for exploring cultural history. They are typically structured similarly, with sections corresponding to Personal Information, Biographical Sketch, Characteristics, Family, Gratitude, Tribute, Funeral Information and Other aspects of the person. To make this information available for further studies, we propose a statistical model which recognizes these sections. To achieve that, we collect a corpus of 20058 English obituaries from TheDaily Item, Remembering.CA and The London Free Press. The evaluation of our annotation guidelines with three annotators on 1008 obituaries shows a substantial agreement of Fleiss κ = 0.87. Formulated as an automatic segmentation task, a convolutional neural network outperforms bag-of-words and embedding-based BiLSTMs and BiLSTM-CRFs with a micro F1 = 0.81. @@ -1292,7 +1292,7 @@ <fixed-case>R</fixed-case>i<fixed-case>Q</fixed-case>u<fixed-case>A</fixed-case>: A Corpus of Rich Quotation Annotation for <fixed-case>E</fixed-case>nglish Literary Text SeanPapay - SebastianPadó + SebastianPadó 835–841 We introduce RiQuA (RIch QUotation Annotations), a corpus that provides quotations, including their interpersonal structure (speakers and addressees) for English literary text. The corpus comprises 11 works of 19th-century literature that were manually doubly annotated for direct and indirect quotations. For each quotation, its span, speaker, addressee, and cue are identified (if present). This provides a rich view of dialogue structures not available from other available corpora. We detail the process of creating this dataset, discuss the annotation guidelines, and analyze the resulting corpus in terms of inter-annotator agreement and its properties. RiQuA, along with its annotations guidelines and associated scripts, are publicly available for use, modification, and experimentation. 2020.lrec-1.104 @@ -1312,9 +1312,9 @@ The <fixed-case>BDC</fixed-case>amões Collection of <fixed-case>P</fixed-case>ortuguese Literary Documents: a Research Resource for Digital Humanities and Language Technology SaraGrilo MárciaBolrinha - JoãoSilva + JoãoSilva RuiVaz - AntónioBranco + AntónioBranco 849–854 This paper presents the BDCamões Collection of Portuguese Literary Documents, a new corpus of literary texts written in Portuguese that in its inaugural version includes close to 4 million words from over 200 complete documents from 83 authors in 14 genres, covering a time span from the 16th to the 21st century, and adhering to different orthographic conventions. Many of the texts in the corpus have also been automatically parsed with state-of-the-art language processing tools, forming the BDCamões Treebank subcorpus. This set of characteristics makes of BDCamões an invaluable resource for research in language technology (e.g. authorship detection, genre classification, etc.) and in language science and digital humanities (e.g. comparative literature, diachronic linguistics, etc.). 2020.lrec-1.106 @@ -1347,7 +1347,7 @@ <fixed-case>NLP</fixed-case> Scholar: A Dataset for Examining the State of <fixed-case>NLP</fixed-case> Research - Saif M.Mohammad + Saif M.Mohammad 868–877 Google Scholar is the largest web search engine for academic literature that also provides access to rich metadata associated with the papers. The ACL Anthology (AA) is the largest repository of articles on Natural Language Processing (NLP). We extracted information from AA for about 44 thousand NLP papers and identified authors who published at least three papers there. We then extracted citation information from Google Scholar for all their papers (not just their AA papers). This resulted in a dataset of 1.1 million papers and associated Google Scholar information. We aligned the information in the AA and Google Scholar datasets to create the NLP Scholar Dataset – a single unified source of information (from both AA and Google Scholar) for tens of thousands of NLP papers. It can be used to identify broad trends in productivity, focus, and impact of NLP research. We present here initial work on analyzing the volume of research in NLP over the years and identifying the most cited papers in NLP. We also list a number of additional potential applications. 2020.lrec-1.109 @@ -1403,7 +1403,7 @@ “Voices of the Great War”: A Richly Annotated Corpus of <fixed-case>I</fixed-case>talian Texts on the First World War FedericoBoschetti IreneDe Felice - StefanoDei Rossi + StefanoDei Rossi FeliceDell’Orletta MicheleDi Giorgio MartinaMiliani @@ -1412,7 +1412,7 @@ GiuliaVenturi NicolaLabanca AlessandroLenci - SimonettaMontemagni + SimonettaMontemagni 911–918 “Voices of the Great War” is the first large corpus of Italian historical texts dating back to the period of First World War. This corpus differs from other existing resources in several respects. First, from the linguistic point of view it gives account of the wide range of varieties in which Italian was articulated in that period, namely from a diastratic (educated vs. uneducated writers), diaphasic (low/informal vs. high/formal registers) and diatopic (regional varieties, dialects) points of view. From the historical perspective, through a collection of texts belonging to different genres it represents different views on the war and the various styles of narrating war events and experiences. The final corpus is balanced along various dimensions, corresponding to the textual genre, the language variety used, the author type and the typology of conveyed contents. The corpus is fully annotated with lemmas, part-of-speech, terminology, and named entities. Significant corpus samples representative of the different “voices” have also been enriched with meta-linguistic and syntactic information. The layer of syntactic annotation forms the first nucleus of an Italian historical treebank complying with the Universal Dependencies standard. The paper illustrates the final resource, the methodology and tools used to build it, and the Web Interface for navigating it. 2020.lrec-1.114 @@ -1422,12 +1422,12 @@ <fixed-case>DE</fixed-case>bate<fixed-case>N</fixed-case>et-mig15:Tracing the 2015 Immigration Debate in <fixed-case>G</fixed-case>ermany Over Time GabriellaLapesa - AndreBlessing + AndreBlessing NicoBlokker ErenayDayanik SebastianHaunss JonasKuhn - SebastianPadó + SebastianPadó 919–927 DEbateNet-migr15 is a manually annotated dataset for German which covers the public debate on immigration in 2015. The building block of our annotation is the political science notion of a claim, i.e., a statement made by a political actor (a politician, a party, or a group of citizens) that a specific action should be taken (e.g., vacant flats should be assigned to refugees). We identify claims in newspaper articles, assign them to actors and fine-grained categories and annotate their polarity and date. The aim of this paper is two-fold: first, we release the full DEbateNet-mig15 corpus and document it by means of a quantitative and qualitative analysis; second, we demonstrate its application in a discourse network analysis framework, which enables us to capture the temporal dynamics of the political debate 2020.lrec-1.115 @@ -1445,7 +1445,7 @@ A New <fixed-case>L</fixed-case>atin Treebank for <fixed-case>U</fixed-case>niversal <fixed-case>D</fixed-case>ependencies: Charters between <fixed-case>A</fixed-case>ncient <fixed-case>L</fixed-case>atin and <fixed-case>R</fixed-case>omance Languages - Flavio MassimilianoCecchini + Flavio MassimilianoCecchini TimoKorkiakangas MarcoPassarotti 933–942 @@ -1534,7 +1534,7 @@ <fixed-case>D</fixed-case>isc<fixed-case>S</fixed-case>ense: Automated Semantic Analysis of Discourse Markers DamienSileo - TimVan de Cruys + TimVan de Cruys CamillePradel PhilippeMuller 991–999 @@ -1558,8 +1558,8 @@ Machine-Aided Annotation for Fine-Grained Proposition Types in Argumentation YohanJo ElijahMayfield - ChrisReed - EduardHovy + ChrisReed + EduardHovy 1008–1018 We introduce a corpus of the 2016 U.S. presidential debates and commentary, containing 4,648 argumentative propositions annotated with fine-grained proposition types. Modern machine learning pipelines for analyzing argument have difficulty distinguishing between types of propositions based on their factuality, rhetorical positioning, and speaker commitment. Inability to properly account for these facets leaves such systems inaccurate in understanding of fine-grained proposition types. In this paper, we demonstrate an approach to annotating for four complex proposition types, namely normative claims, desires, future possibility, and reported speech. We develop a hybrid machine learning and human workflow for annotation that allows for efficient and reliable annotation of complex linguistic phenomena, and demonstrate with preliminary analysis of rhetorical strategies and structure in presidential debates. This new dataset and method can support technical researchers seeking more nuanced representations of argument, as well as argumentation theorists developing new quantitative analyses. 2020.lrec-1.127 @@ -1583,8 +1583,8 @@ WanqiuLong XinyiCai JamesReid - BonnieWebber - DeyiXiong + BonnieWebber + DeyiXiong 1025–1032 Text corpora annotated with language-related properties are an important resource for the development of Language Technology. The current work contributes a new resource for Chinese Language Technology and for Chinese-English translation, in the form of a set of TED talks (some originally given in English, some in Chinese) that have been annotated with discourse relations in the style of the Penn Discourse TreeBank, adapted to properties of Chinese text that are not present in English. The resource is currently unique in annotating discourse-level properties of planned spoken monologues rather than of written text. An inter-annotator agreement study demonstrates that the annotation scheme is able to achieve highly reliable results. 2020.lrec-1.129 @@ -1596,7 +1596,7 @@ ChristopherOlshefski LucaLugini RavneetSingh - DianeLitman + DianeLitman AmandaGodley 1033–1043 Although NLP research on argument mining has advanced considerably in recent years, most studies draw on corpora of asynchronous and written texts, often produced by individuals. Few published corpora of synchronous, multi-party argumentation are available. The Discussion Tracker corpus, collected in high school English classes, is an annotated dataset of transcripts of spoken, multi-party argumentation. The corpus consists of 29 multi-party discussions of English literature transcribed from 985 minutes of audio. The transcripts were annotated for three dimensions of collaborative argumentation: argument moves (claims, evidence, and explanations), specificity (low, medium, high) and collaboration (e.g., extensions of and disagreements about others’ ideas). In addition to providing descriptive statistics on the corpus, we provide performance benchmarks and associated code for predicting each dimension separately, illustrate the use of the multiple annotations in the corpus to improve performance via multi-task learning, and finally discuss other ways the corpus might be used to further NLP research. @@ -1650,7 +1650,7 @@ Joint Learning of Syntactic Features Helps Discourse Segmentation TakshakDesai Parag PravinDakle - DanMoldovan + DanMoldovan 1073–1080 This paper describes an accurate framework for carrying out multi-lingual discourse segmentation with BERT (Devlin et al., 2019). The model is trained to identify segments by casting the problem as a token classification problem and jointly learning syntactic features like part-of-speech tags and dependency relations. This leads to significant improvements in performance. Experiments are performed in different languages, such as English, Dutch, German, Portuguese Brazilian and Basque to highlight the cross-lingual effectiveness of the segmenter. In particular, the model achieves a state-of-the-art F-score of 96.7 for the RST-DT corpus (Carlson et al., 2003) improving on the previous best model by 7.2%. Additionally, a qualitative explanation is provided for how proposed changes contribute to model performance by analyzing errors made on the test data. 2020.lrec-1.135 @@ -1683,7 +1683,7 @@ <fixed-case>D</fixed-case>i<fixed-case>ML</fixed-case>ex-<fixed-case>B</fixed-case>angla: A Lexicon of <fixed-case>B</fixed-case>angla Discourse Connectives DebopamDas ManfredStede - Soumya SankarGhosh + Soumya SankarGhosh LahariChatterjee 1097–1102 We present DiMLex-Bangla, a newly developed lexicon of discourse connectives in Bangla. The lexicon, upon completion of its first version, contains 123 Bangla connective entries, which are primarily compiled from the linguistic literature and translation of English discourse connectives. The lexicon compilation is later augmented by adding more connectives from a currently developed corpus, called the Bangla RST Discourse Treebank (Das and Stede, 2018). DiMLex-Bangla provides information on syntactic categories of Bangla connectives, their discourse semantics and non-connective uses (if any). It uses the format of the German connective lexicon DiMLex (Stede and Umbach, 1998), which provides a cross-linguistically applicable XML schema. The resource is the first of its kind in Bangla, and is freely available for use in studies on discourse structure and computational applications. @@ -1748,7 +1748,7 @@ Simplifying Coreference Chains for Dyslexic Children RodrigoWilkens - AmaliaTodirascu + AmaliaTodirascu 1142–1151 We present a work aiming to generate adapted content for dyslexic children for French, in the context of the ALECTOR project. Thus, we developed a system to transform the texts at the discourse level. This system modifies the coreference chains, which are markers of text cohesion, by using rules. These rules were designed following a careful study of coreference chains in both original texts and its simplified versions. Moreover, in order to define reliable transformation rules, we analysed several coreference properties as well as the concurrent simplification operations in the aligned texts. This information is coded together with a coreference resolution system and a text rewritten tool in the proposed system, which comprise a coreference module specialised in written text and seven text transformation operations. The evaluation of the system firstly focused on check the simplification by manual validation of three judges. These errors were grouped into five classes that combined can explain 93% of the errors. The second evaluation step consisted of measuring the simplification perception by 23 judges, which allow us to measure the simplification impact of the proposed rules. 2020.lrec-1.144 @@ -1769,7 +1769,7 @@ What Speakers really Mean when they Ask Questions: Classification of Intentions with a Supervised Approach AngèleBarbedette - IrisEshkol-Taravella + IrisEshkol-Taravella 1159–1166 This paper focuses on the automatic detection of hidden intentions of speakers in questions asked during meals. Our corpus is composed of a set of transcripts of spontaneous oral conversations from ESLO’s corpora. We suggest a typology of these intentions based on our research work and the exploration and annotation of the corpus, in which we define two “explicit” categories (request for agreement and request for information) and three “implicit” categories (opinion, will and doubt). We implement a supervised automatic classification model based on annotated data and selected linguistic features and we evaluate its results and performances. We finally try to interpret these results by looking more deeply and specifically into the predictions of the algorithm and the features it used. There are many motivations for this work which are part of ongoing challenges such as opinion analysis, irony detection or the development of conversational agents. 2020.lrec-1.146 @@ -1791,7 +1791,7 @@ Stigma Annotation Scheme and Stigmatized Language Detection in Health-Care Discussions on Social Media NadiyaStraton HyejuJang - RaymondNg + RaymondNg 1178–1190 Much research has been done within the social sciences on the interpretation and influence of stigma on human behaviour and health, which result in out-of-group exclusion, distancing, cognitive separation, status loss, discrimination, in-group pressure, and often lead to disengagement, non-adherence to treatment plan, and prescriptions by the doctor. However, little work has been conducted on computational identification of stigma in general and in social media discourse in particular. In this paper, we develop the annotation scheme and improve the annotation process for stigma identification, which can be applied to other health-care domains. The data from pro-vaccination and anti-vaccination discussion groups are annotated by trained annotators who have professional background in social science and health-care studies, therefore the group can be considered experts on the subject in comparison to non-expert crowd. Amazon MTurk annotators is another group of annotator with no knowledge on their education background, they are initially treated as non-expert crowd on the subject matter of stigma. We analyze the annotations with visualisation techniques, features from LIWC (Linguistic Inquiry and Word Count) list and make prediction based on bi-grams with traditional and deep learning models. Data augmentation method and application of CNN show high performance accuracy in comparison to other models. Success of the rigorous annotation process on identifying stigma is reconfirmed by achieving high prediction rate with CNN. 2020.lrec-1.148 @@ -1809,8 +1809,8 @@ DebanjanMahata RakeshGosangi HaiminZhang - Rajiv RatnShah - AmandaStent + Rajiv RatnShah + AmandaStent 1191–1196 In this paper, we present a new corpus consisting of sentences from Hindi short stories annotated for five different discourse modes argumentative, narrative, descriptive, dialogic and informative. We present a detailed account of the entire data collection and annotation processes. The annotations have a very high inter-annotator agreement (0.87 k-alpha). We analyze the data in terms of label distributions, part of speech tags, and sentence lengths. We characterize the performance of various classification algorithms on this dataset and perform ablation studies to understand the nature of the linguistic models suitable for capturing the nuances of the embedded discourse structures in the presented corpus. 2020.lrec-1.149 @@ -1819,7 +1819,7 @@ Multi-class Multilingual Classification of <fixed-case>W</fixed-case>ikipedia Articles Using Extended Named Entity Tag Set - Hassan S.Shavarani + Hassan S.Shavarani SatoshiSekine 1197–1201 Wikipedia is a great source of general world knowledge which can guide NLP models better understand their motivation to make predictions. Structuring Wikipedia is the initial step towards this goal which can facilitate fine-grain classification of articles. In this work, we introduce the Shinra 5-Language Categorization Dataset (SHINRA-5LDS), a large multi-lingual and multi-labeled set of annotated Wikipedia articles in Japanese, English, French, German, and Farsi using Extended Named Entity (ENE) tag set. We evaluate the dataset using the best models provided for ENE label set classification and show that the currently available classification models struggle with large datasets using fine-grained tag sets. @@ -1831,7 +1831,7 @@ An <fixed-case>A</fixed-case>lgerian Corpus and an Annotation Platform for Opinion and Emotion Analysis LeilaMoudjari KarimaAkli-Astouati - FarahBenamara + FarahBenamara 1202–1210 In this paper, we address the lack of resources for opinion and emotion analysis related to North African dialects, targeting Algerian dialect. We present TWIFIL (TWItter proFILing) a collaborative annotation platform for crowdsourcing annotation of tweets at different levels of granularity. The plateform allowed the creation of the largest Algerian dialect dataset annotated for both sentiment (9,000 tweets), emotion (about 5,000 tweets) and extra-linguistic information including author profiling (age and gender). The annotation resulted also in the creation of the largest Algerien dialect subjectivity lexicon of about 9,000 entries which can constitute a valuable resources for the development of future NLP applications for Algerian dialect. To test the validity of the dataset, a set of deep learning experiments were conducted to classify a given tweet as positive, negative or neutral. We discuss our results and provide an error analysis to better identify classification errors. 2020.lrec-1.151 @@ -1861,7 +1861,7 @@ Cross-domain Author Gender Classification in <fixed-case>B</fixed-case>razilian <fixed-case>P</fixed-case>ortuguese RafaelDias - IvandréParaboni + IvandréParaboni 1227–1234 Author profiling models predict demographic characteristics of a target author based on the text that they have written. Systems of this kind will often follow a single-domain approach, in which the model is trained from a corpus of labelled texts in a given domain, and it is subsequently validated against a test corpus built from precisely the same domain. Although single-domain settings are arguably ideal, this strategy gives rise to the question of how to proceed when no suitable training corpus (i.e., a corpus that matches the test domain) is available. To shed light on this issue, this paper discusses a cross-domain gender classification task based on four domains (Facebook, crowd sourced opinions, Blogs and E-gov requests) in the Brazilian Portuguese language. A number of simple gender classification models using word- and psycholinguistics-based features alike are introduced, and their results are compared in two kinds of cross-domain setting: first, by making use of a single text source as training data for each task, and subsequently by combining multiple sources. Results confirm previous findings related to the effects of corpus size and domain similarity in English, and pave the way for further studies in the field. 2020.lrec-1.154 @@ -1929,7 +1929,7 @@ Corpora and Baselines for Humour Recognition in <fixed-case>P</fixed-case>ortuguese - HugoGonçalo Oliveira + HugoGonçalo Oliveira AndréClemêncio AnaAlves 1278–1285 @@ -1965,7 +1965,7 @@ SumithraVelupillai RachelHolden MaximKikoler - KalinaBontcheva + KalinaBontcheva RinaDutta AngusRoberts 1303–1310 @@ -2008,7 +2008,7 @@ Email Classification Incorporating Social Networks and Thread Structure SakharAlkhereyf - OwenRambow + OwenRambow 1336–1345 Existing methods for different document classification tasks in the context of social networks typically only capture the semantics of texts, while ignoring the users who exchange the text and the network they form. However, some work has shown that incorporating the social network information in addition to information from language is effective for various NLP applications including sentiment analysis, inferring user attributes, and predicting inter-personal relations. In this paper, we present an empirical study of email classification into “Business” and “Personal” categories. We represent the email communication using various graph structures. As features, we use both the textual information from the email content and social network information from the communication graphs. We also model the thread structure for emails. We focus on detecting personal emails, and we evaluate our methods on two corpora, only one of which we train on. The experimental results reveal that incorporating social network information improves over the performance of an approach based on textual information only. The results also show that considering the thread structure of emails improves the performance further. Furthermore, our approach improves over a state-of-the-art baseline which uses node embeddings based on both lexical and social network information. 2020.lrec-1.167 @@ -2017,7 +2017,7 @@ Development and Validation of a Corpus for Machine Humor Comprehension - Yuen-HsienTseng + Yuen-HsienTseng Wun-SyuanWu Chia-YuehChang Hsueh-ChihChen @@ -2030,7 +2030,7 @@ <fixed-case>A</fixed-case>lector: A Parallel Corpus of Simplified <fixed-case>F</fixed-case>rench Texts with Alignments of Misreadings by Poor and Dyslexic Readers - NúriaGala + NúriaGala AnaïsTack LudivineJavourey-Drevet ThomasFrançois @@ -2062,9 +2062,9 @@ Multilingual Stance Detection in Tweets: The <fixed-case>C</fixed-case>atalonia Independence Corpus ElenaZotova - RodrigoAgerri + RodrigoAgerri ManuelNuñez - GermanRigau + GermanRigau 1368–1375 Stance detection aims to determine the attitude of a given text with respect to a specific topic or claim. While stance detection has been fairly well researched in the last years, most the work has been focused on English. This is mainly due to the relative lack of annotated data in other languages. The TW-10 referendum Dataset released at IberEval 2018 is a previous effort to provide multilingual stance-annotated data in Catalan and Spanish. Unfortunately, the TW-10 Catalan subset is extremely imbalanced. This paper addresses these issues by presenting a new multilingual dataset for stance detection in Twitter for the Catalan and Spanish languages, with the aim of facilitating research on stance detection in multilingual and cross-lingual settings. The dataset is annotated with stance towards one topic, namely, the ndependence of Catalonia. We also provide a semi-automatic method to annotate the dataset based on a categorization of Twitter users. We experiment on the new corpus with a number of supervised approaches, including linear classifiers and deep learning methods. Comparison of our new corpus with the with the TW-1O dataset shows both the benefits and potential of a well balanced corpus for multilingual and cross-lingual research on stance detection. Finally, we establish new state-of-the-art results on the TW-10 dataset, both for Catalan and Spanish. 2020.lrec-1.171 @@ -2093,7 +2093,7 @@ NoéCécillon VincentLabatut RichardDufour - GeorgesLinarès + GeorgesLinarès 1382–1390 With the spread of online social networks, it is more and more difficult to monitor all the user-generated content. Automating the moderation process of the inappropriate exchange content on Internet has thus become a priority task. Methods have been proposed for this purpose, but it can be challenging to find a suitable dataset to train and develop them. This issue is especially true for approaches based on information derived from the structure and the dynamic of the conversation. In this work, we propose an original framework, based on the the Wikipedia Comment corpus, with comment-level abuse annotations of different types. The major contribution concerns the reconstruction of conversations, by comparison to existing corpora, which focus only on isolated messages (i.e. taken out of their conversational context). This large corpus of more than 380k annotated messages opens perspectives for online abuse detection and especially for context-based approaches. We also propose, in addition to this corpus, a complete benchmarking platform to stimulate and fairly compare scientific works around the problem of content abuse detection, trying to avoid the recurring problem of result replication. Finally, we apply two classification methods to our dataset to demonstrate its potential. 2020.lrec-1.173 @@ -2114,8 +2114,8 @@ An Annotated Corpus for Sexism Detection in <fixed-case>F</fixed-case>rench Tweets PatriciaChiril - VéroniqueMoriceau - FarahBenamara + VéroniqueMoriceau + FarahBenamara AldaMari GloriaOriggi MarlèneCoulomb-Gully @@ -2130,10 +2130,10 @@ RoneySantos GabrielaPedro SidneyLeal - OtoVale + OtoVale ThiagoPardo - KalinaBontcheva - CarolinaScarton + KalinaBontcheva + CarolinaScarton 1404–1413 The proliferation of fake news is a current issue that influences a number of important areas of society, such as politics, economy and health. In the Natural Language Processing area, recent initiatives tried to detect fake news in different ways, ranging from language-based approaches to content-based verification. In such approaches, the choice of the features for the classification of fake and true news is one of the most important parts of the process. This paper presents a study on the impact of readability features to detect fake news for the Brazilian Portuguese language. The results show that such features are relevant to the task (achieving, alone, up to 92% classification accuracy) and may improve previous classification results. 2020.lrec-1.176 @@ -2142,7 +2142,7 @@ When Shallow is Good Enough: Automatic Assessment of Conceptual Text Complexity using Shallow Semantic Features - SanjaStajner + SanjaStajner IoanaHulpuș 1414–1422 According to psycholinguistic studies, the complexity of concepts used in a text and the relations between mentioned concepts play the most important role in text understanding and maintaining reader’s interest. However, the classical approaches to automatic assessment of text complexity, and their commercial applications, take into consideration mainly syntactic and lexical complexity. Recently, we introduced the task of automatic assessment of conceptual text complexity, proposing a set of graph-based deep semantic features using DBpedia as a proxy to human knowledge. Given that such graphs can be noisy, incomplete, and computationally expensive to deal with, in this paper, we propose the use of textual features and shallow semantic features that only require entity linking. We compare the results obtained with new features with those of the state-of-the-art deep semantic features on two tasks: (1) pairwise comparison of two versions of the same text; and (2) five-level classification of texts. We find that the shallow features achieve state-of-the-art results on both tasks, significantly outperforming performances of the deep semantic features on the five-level classification task. Interestingly, the combination of the shallow and deep semantic features lead to a significant improvement of the performances on that task. @@ -2180,7 +2180,7 @@ XiaoleiHuang LinziXing FranckDernoncourt - Michael J.Paul + Michael J.Paul 1440–1448 Existing research on fairness evaluation of document classification models mainly uses synthetic monolingual data without ground truth for author demographic attributes. In this work, we assemble and publish a multilingual Twitter corpus for the task of hate speech detection with inferred four author demographic factors: age, country, gender and race/ethnicity. The corpus covers five languages: English, Italian, Polish, Portuguese and Spanish. We evaluate the inferred demographic labels with a crowdsourcing platform, Figure Eight. To examine factors that can cause biases, we take an empirical analysis of demographic predictability on the English corpus. We measure the performance of four popular document classifiers and evaluate the fairness and bias of the baseline classifiers on the author-level demographic attributes. 2020.lrec-1.180 @@ -2247,8 +2247,8 @@ <fixed-case>E</fixed-case>mo<fixed-case>E</fixed-case>vent: A Multilingual Emotion Corpus based on different Events Flor MiriamPlaza del Arco CarloStrapparava - L. AlfonsoUrena Lopez - MaiteMartin + L. AlfonsoUrena Lopez + MaiteMartin 1492–1498 In recent years emotion detection in text has become more popular due to its potential applications in fields such as psychology, marketing, political science, and artificial intelligence, among others. While opinion mining is a well-established task with many standard data sets and well-defined methodologies, emotion mining has received less attention due to its complexity. In particular, the annotated gold standard resources available are not enough. In order to address this shortage, we present a multilingual emotion data set based on different events that took place in April 2019. We collected tweets from the Twitter platform. Then one of seven emotions, six Ekman’s basic emotions plus the “neutral or other emotions”, was labeled on each tweet by 3 Amazon MTurkers. A total of 8,409 in Spanish and 7,303 in English were labeled. In addition, each tweet was also labeled as offensive or no offensive. We report some linguistic statistics about the data set in order to observe the difference between English and Spanish speakers when they express emotions related to the same events. Moreover, in order to validate the effectiveness of the data set, we also propose a machine learning approach for automatically detecting emotions in tweets for both languages, English and Spanish. 2020.lrec-1.186 @@ -2261,7 +2261,7 @@ Cristian-PaulBara YuanhangLuo MihaiBurzo - RadaMihalcea + RadaMihalcea Emily MowerProvost 1499–1510 Endowing automated agents with the ability to provide support, entertainment and interaction with human beings requires sensing of the users’ affective state. These affective states are impacted by a combination of emotion inducers, current psychological state, and various conversational factors. Although emotion classification in both singular and dyadic settings is an established area, the effects of these additional factors on the production and perception of emotion is understudied. This paper presents a new dataset, Multimodal Stressed Emotion (MuSE), to study the multimodal interplay between the presence of stress and expressions of affect. We describe the data collection protocol, the possible areas of use, and the annotations for the emotional content of the recordings. The paper also presents several baselines to measure the performance of multimodal features for emotion and stress classification. @@ -2274,7 +2274,7 @@ LinruiZhang Hsin-LunHuang YangYu - DanMoldovan + DanMoldovan 1511–1516 People convey sentiments and emotions through language. To understand these affectual states is an essential step towards understanding natural language. In this paper, we propose a transfer-learning based approach to inferring the affectual state of a person from their tweets. As opposed to the traditional machine learning models which require considerable effort in designing task specific features, our model can be well adapted to the proposed tasks with a very limited amount of fine-tuning, which significantly reduces the manual effort in feature engineering. We aim to show that by leveraging the pre-learned knowledge, transfer learning models can achieve competitive results in the affectual content analysis of tweets, compared to the traditional models. As shown by the experiments on SemEval-2018 Task 1: Affect in Tweets, our model ranking 2nd, 4th and 6th place in four of its subtasks proves the effectiveness of our idea. 2020.lrec-1.188 @@ -2307,7 +2307,7 @@ Abusive language in <fixed-case>S</fixed-case>panish children and young teenager’s conversations: data preparation and short text classification with contextual word embeddings - Marta R.Costa-jussà + Marta R.Costa-jussà EstherGonzález AsuncionMoreno EudaldCumalat @@ -2343,7 +2343,7 @@ <fixed-case>G</fixed-case>ood<fixed-case>N</fixed-case>ews<fixed-case>E</fixed-case>veryone: A Corpus of News Headlines Annotated with Emotions, Semantic Roles, and Reader Perception - Laura Ana MariaBostan + Laura Ana MariaBostan EvgenyKim RomanKlinger 1554–1566 @@ -2357,7 +2357,7 @@ SvetlanaKiritchenko WillHipson RobertCoplan - Saif M.Mohammad + Saif M.Mohammad 1567–1577 The state of being alone can have a substantial impact on our lives, though experiences with time alone diverge significantly among individuals. Psychologists distinguish between the concept of solitude, a positive state of voluntary aloneness, and the concept of loneliness, a negative state of dissatisfaction with the quality of one’s social interactions. Here, for the first time, we conduct a large-scale computational analysis to explore how the terms associated with the state of being alone are used in online language. We present SOLO (State of Being Alone), a corpus of over 4 million tweets collected with query terms solitude, lonely, and loneliness. We use SOLO to analyze the language and emotions associated with the state of being alone. We show that the term solitude tends to co-occur with more positive, high-dominance words (e.g., enjoy, bliss) while the terms lonely and loneliness frequently co-occur with negative, low-dominance words (e.g., scared, depressed), which confirms the conceptual distinctions made in psychology. We also show that women are more likely to report on negative feelings of being lonely as compared to men, and there are more teenagers among the tweeters that use the word lonely than among the tweeters that use the word solitude. 2020.lrec-1.195 @@ -2367,7 +2367,7 @@ <fixed-case>P</fixed-case>o<fixed-case>K</fixed-case>i: A Large Dataset of Poems by Children WillHipson - Saif M.Mohammad + Saif M.Mohammad 1578–1589 Child language studies are crucial in improving our understanding of child well-being; especially in determining the factors that impact happiness, the sources of anxiety, techniques of emotion regulation, and the mechanisms to cope with stress. However, much of this research is stymied by the lack of availability of large child-written texts. We present a new corpus of child-written text, PoKi, which includes about 62 thousand poems written by children from grades 1 to 12. PoKi is especially useful in studying child language because it comes with information about the age of the child authors (their grade). We analyze the words in PoKi along several emotion dimensions (valence, arousal, dominance) and discrete emotions (anger, fear, sadness, joy). We use non-parametric regressions to model developmental differences from early childhood to late-adolescence. Results show decreases in valence that are especially pronounced during mid-adolescence, while arousal and dominance peaked during adolescence. Gender differences in the developmental trajectory of emotions are also observed. Our results support and extend the current state of emotion development research. 2020.lrec-1.196 @@ -2424,7 +2424,7 @@ <fixed-case>CEASE</fixed-case>, a Corpus of Emotion Annotated Suicide notes in <fixed-case>E</fixed-case>nglish SoumitraGhosh AsifEkbal - PushpakBhattacharyya + PushpakBhattacharyya 1618–1626 A suicide note is usually written shortly before the suicide and it provides a chance to comprehend the self-destructive state of mind of the deceased. From a psychological point of view, suicide notes have been utilized for recognizing the motive behind the suicide. To the best of our knowledge, there is no openly accessible suicide note corpus at present, making it challenging for the researchers and developers to deep dive into the area of mental health assessment and suicide prevention. In this paper, we create a fine-grained emotion annotated corpus (CEASE) of suicide notes in English and develop various deep learning models to perform emotion detection on the curated dataset. The corpus consists of 2393 sentences from around 205 suicide notes collected from various sources. Each sentence is annotated with a particular emotion class from a set of 15 fine-grained emotion labels, namely (forgiveness, happiness_peacefulness, love, pride, hopefulness, thankfulness, blame, anger, fear, abuse, sorrow, hopelessness, guilt, information, instructions). For the evaluation, we develop an ensemble architecture, where the base models correspond to three supervised deep learning models, namely Convolutional Neural Network (CNN), Gated Recurrent Unit (GRU) and Long Short Term Memory (LSTM). We obtain the highest test accuracy of 60.17% and cross-validation accuracy of 60.32% 2020.lrec-1.201 @@ -2445,7 +2445,7 @@ An Event-comment Social Media Corpus for Implicit Emotion Analysis - Sophia Yat MeiLee + Sophia Yat MeiLee Helena Yan PingLau 1633–1642 The classification of implicit emotions in text has always been a great challenge to emotion processing. Even though the majority of emotion expressed implicitly, most previous attempts at emotions have focused on the examination of explicit emotions. The poor performance of existing emotion identification and classification models can partly be attributed to the disregard of implicit emotions. In view of this, this paper presents the development of a Chinese event-comment social media emotion corpus. The corpus deals with both explicit and implicit emotions with more emphasis being placed on the implicit ones. This paper specifically describes the data collection and annotation of the corpus. An annotation scheme has been proposed for the annotation of emotion-related information including the emotion type, the emotion cause, the emotion reaction, the use of rhetorical question, the opinion target (i.e. the semantic role in an event that triggers an emotion), etc. Corpus data shows that the annotated items are of great value to the identification of implicit emotions. We believe that the corpus will be a useful resource for both explicit and implicit emotion classification and detection as well as event classification. @@ -2456,8 +2456,8 @@ An Emotional Mess! Deciding on a Framework for Building a <fixed-case>D</fixed-case>utch Emotion-Annotated Corpus LunaDe Bruyne - OrpheeDe Clercq - VeroniqueHoste + OrpheeDe Clercq + VeroniqueHoste 1643–1651 Seeing the myriad of existing emotion models, with the categorical versus dimensional opposition the most important dividing line, building an emotion-annotated corpus requires some well thought-out strategies concerning framework choice. In our work on automatic emotion detection in Dutch texts, we investigate this problem by means of two case studies. We find that the labels joy, love, anger, sadness and fear are well-suited to annotate texts coming from various domains and topics, but that the connotation of the labels strongly depends on the origin of the texts. Moreover, it seems that information is lost when an emotional state is forcedly classified in a limited set of categories, indicating that a bi-representational format is desirable when creating an emotion corpus. 2020.lrec-1.204 @@ -2480,10 +2480,10 @@ Learning Word Ratings for Empathy and Distress from Document-Level User Responses JoãoSedoc - SvenBuechel + SvenBuechel YehonathanNachmany AnnekeBuffone - LyleUngar + LyleUngar 1664–1673 Despite the excellent performance of black box approaches to modeling sentiment and emotion, lexica (sets of informative words and associated weights) that characterize different emotions are indispensable to the NLP community because they allow for interpretable and robust predictions. Emotion analysis of text is increasing in popularity in NLP; however, manually creating lexica for psychological constructs such as empathy has proven difficult. This paper automatically creates empathy word ratings from document-level ratings. The underlying problem of learning word ratings from higher-level supervision has to date only been addressed in an ad hoc fashion and has not used deep learning methods. We systematically compare a number of approaches to learning word ratings from higher-level supervision against a Mixed-Level Feed Forward Network (MLFFN), which we find performs best, and use the MLFFN to create the first-ever empathy lexicon. We then use Signed Spectral Clustering to gain insights into the resulting words. The empathy and distress lexica are publicly available at: http://www.wwbp.org/lexica.html. 2020.lrec-1.206 @@ -2540,7 +2540,7 @@ <fixed-case>H</fixed-case>ard<fixed-case>E</fixed-case>val: Focusing on Challenging Tokens to Assess Robustness of <fixed-case>NER</fixed-case> GabrielBernier-Colborne - PhillippeLanglais + PhillippeLanglais 1704–1711 To assess the robustness of NER systems, we propose an evaluation method that focuses on subsets of tokens that represent specific sources of errors: unknown words and label shift or ambiguity. These subsets provide a system-agnostic basis for evaluating specific sources of NER errors and assessing room for improvement in terms of robustness. We analyze these subsets of challenging tokens in two widely-used NER benchmarks, then exploit them to evaluate NER systems in both in-domain and out-of-domain settings. Results show that these challenging tokens explain the majority of errors made by modern NER systems, although they represent only a small fraction of test tokens. They also indicate that label shift is harder to deal with than unknown words, and that there is much more room for improvement than the standard NER evaluation procedure would suggest. We hope this work will encourage NLP researchers to adopt rigorous and meaningful evaluation methods, and will help them develop more robust models. 2020.lrec-1.211 @@ -2551,7 +2551,7 @@ An Evaluation Dataset for Identifying Communicative Functions of Sentences in <fixed-case>E</fixed-case>nglish Scholarly Papers KenichiIwatsuki FlorianBoudin - AkikoAizawa + AkikoAizawa 1712–1720 Formulaic expressions, such as ‘in this paper we propose’, are used by authors of scholarly papers to perform communicative functions; the communicative function of the present example is ‘stating the aim of the paper’. Collecting such expressions and pairing them with their communicative functions would be highly valuable for various tasks, particularly for writing assistance. However, such collection and paring in a principled and automated manner would require high-quality annotated data, which are not available. In this study, we address this shortcoming by creating a manually annotated dataset for detecting communicative functions in sentences. Starting from a seed list of labelled formulaic expressions, we retrieved new sentences from scholarly papers in the ACL Anthology and asked multiple human evaluators to label communicative functions. To show the usefulness of our dataset, we conducted a series of experiments that determined to what extent sentence representations acquired by recent models, such as word2vec and BERT, can be employed to detect communicative functions in sentences. 2020.lrec-1.212 @@ -2585,7 +2585,7 @@ Yi-AnLai XuanZhu YiZhang - MonaDiab + MonaDiab 1739–1746 Summarizing data samples by quantitative measures has a long history, with descriptive statistics being a case in point. However, as natural language processing methods flourish, there are still insufficient characteristic metrics to describe a collection of texts in terms of the words, sentences, or paragraphs they comprise. In this work, we propose metrics of diversity, density, and homogeneity that quantitatively measure the dispersion, sparsity, and uniformity of a text collection. We conduct a series of simulations to verify that each metric holds desired properties and resonates with human intuitions. Experiments on real-world datasets demonstrate that the proposed characteristic metrics are highly correlated with text classification performance of a renowned model, BERT, which could inspire future applications. 2020.lrec-1.215 @@ -2608,8 +2608,8 @@ AndreaHorbach ItziarAldabe MarieBexte - OierLopez de Lacalle - MontseMaritxalar + OierLopez de Lacalle + MontseMaritxalar 1753–1762 Automatic generation of reading comprehension questions is a topic receiving growing interest in the NLP community, but there is currently no consensus on evaluation metrics and many approaches focus on linguistic quality only while ignoring the pedagogic value and appropriateness of questions. This paper overcomes such weaknesses by a new evaluation scheme where questions from the questionnaire are structured in a hierarchical way to avoid confronting human annotators with evaluation measures that do not make sense for a certain question. We show through an annotation study that our scheme can be applied, but that expert annotators with some level of expertise are needed. We also created and evaluated two new evaluation data sets from the biology domain for Basque and German, composed of questions written by people with an educational background, which will be publicly released. Results show that manually generated questions are in general both of higher linguistic as well as pedagogic quality and that among the human generated questions, teacher-generated ones tend to be most useful. 2020.lrec-1.217 @@ -2800,7 +2800,7 @@ MajaBuljan JoakimNivre StephanOepen - LiljaØvrelid + LiljaØvrelid 1902–1909 We discuss methodological choices in contrastive and diagnostic evaluation in meaning representation parsing, i.e. mapping from natural language utterances to graph-based encodings of its semantic structure. Drawing inspiration from earlier work in syntactic dependency parsing, we transfer and refine several quantitative diagnosis techniques for use in the context of the 2019 shared task on Meaning Representation Parsing (MRP). As in parsing proper, moving evaluation from simple rooted trees to general graphs brings along its own range of challenges. Specifically, we seek to begin to shed light on relative strenghts and weaknesses in different broad families of parsing techniques. In addition to these theoretical reflections, we conduct a pilot experiment on a selection of top-performing MRP systems and one of the five meaning representation frameworks in the shared task. Empirical results suggest that the proposed methodology can be meaningfully applied to parsing into graph-structured target representations, uncovering hitherto unknown properties of the different systems that can inform future development and cross-fertilization across approaches. 2020.lrec-1.234 @@ -2813,7 +2813,7 @@ Chi-YenChen Yi-HuiLee Qian-huiZeng - Wei-YunMa + Wei-YunMa Chen-YangShih Wei-JhihChen 1910–1917 @@ -2879,7 +2879,7 @@ <fixed-case>WEXEA</fixed-case>: <fixed-case>W</fixed-case>ikipedia <fixed-case>EX</fixed-case>haustive Entity Annotation MichaelStrobl AmineTrabelsi - OsmarZaiane + OsmarZaiane 1951–1958 Building predictive models for information extraction from text, such as named entity recognition or the extraction of semantic relationships between named entities in text, requires a large corpus of annotated text. Wikipedia is often used as a corpus for these tasks where the annotation is a named entity linked by a hyperlink to its article. However, editors on Wikipedia are only expected to link these mentions in order to help the reader to understand the content, but are discouraged from adding links that do not add any benefit for understanding an article. Therefore, many mentions of popular entities (such as countries or popular events in history), or previously linked articles, as well as the article’s entity itself, are not linked. In this paper, we discuss WEXEA, a Wikipedia EXhaustive Entity Annotation system, to create a text corpus based on Wikipedia with exhaustive annotations of entity mentions, i.e. linking all mentions of entities to their corresponding articles. This results in a huge potential for additional annotations that can be used for downstream NLP tasks, such as Relation Extraction. We show that our annotations are useful for creating distantly supervised datasets for this task. Furthermore, we publish all code necessary to derive a corpus from a raw Wikipedia dump, so that it can be reproduced by everyone. 2020.lrec-1.240 @@ -2893,8 +2893,8 @@ MouhamadouBa LouiseDeléger ThomasLavergne - PierreZweigenbaum - ClaireNédellec + PierreZweigenbaum + ClaireNédellec 1959–1966 Entity normalization (or entity linking) is an important subtask of information extraction that links entity mentions in text to categories or concepts in a reference vocabulary. Machine learning based normalization methods have good adaptability as long as they have enough training data per reference with a sufficient quality. Distributional representations are commonly used because of their capacity to handle different expressions with similar meanings. However, in specific technical and scientific domains, the small amount of training data and the relatively small size of specialized corpora remain major challenges. Recently, the machine learning-based CONTES method has addressed these challenges for reference vocabularies that are ontologies, as is often the case in life sciences and biomedical domains. And yet, its performance is dependent on manually annotated corpus. Furthermore, like other machine learning based methods, parametrization remains tricky. We propose a new approach to address the scarcity of training data that extends the CONTES method by corpus selection, pre-processing and weak supervision strategies, which can yield high-performance results without any manually annotated examples. We also study which hyperparameters are most influential, with sometimes different patterns compared to previous work. The results show that our approach significantly improves accuracy and outperforms previous state-of-the-art algorithms. 2020.lrec-1.241 @@ -2929,9 +2929,9 @@ AnanyaSubburathinam HengJi JonathanMay - Shih-FuChang - AviSil - ClareVoss + Shih-FuChang + AviSil + ClareVoss 1976–1981 Most of the current cross-lingual transfer learning methods for Information Extraction (IE) have been only applied to name tagging. To tackle more complex tasks such as event extraction we need to transfer graph structures (event trigger linked to multiple arguments with various roles) across languages. We develop a novel share-and-transfer framework to reach this goal with three steps: (1) Convert each sentence in any language to language-universal graph structures; in this paper we explore two approaches based on universal dependency parses and complete graphs, respectively. (2) Represent each node in the graph structure with a cross-lingual word embedding so that all sentences in multiple languages can be represented with one shared semantic space. (3) Using this common semantic space, train event extractors from English training data and apply them to languages that do not have any event annotations. Experimental results on three languages (Spanish, Russian and Ukrainian) without any annotations show this framework achieves comparable performance to a state-of-the-art supervised model trained from more than 1,500 manually annotated event mentions. 2020.lrec-1.243 @@ -2941,7 +2941,7 @@ Cross-Domain Evaluation of Edge Detection for Biomedical Event Extraction AlanRamponi - BarbaraPlank + BarbaraPlank RosarioLombardo 1982–1989 Biomedical event extraction is a crucial task in order to automatically extract information from the increasingly growing body of biomedical literature. Despite advances in the methods in recent years, most event extraction systems are still evaluated in-domain and on complete event structures only. This makes it hard to determine the performance of intermediate stages of the task, such as edge detection, across different corpora. Motivated by these limitations, we present the first cross-domain study of edge detection for biomedical event extraction. We analyze differences between five existing gold standard corpora, create a standardized benchmark corpus, and provide a strong baseline model for edge detection. Experiments show a large drop in performance when the baseline is applied on out-of-domain data, confirming the need for domain adaptation methods for the task. To encourage research efforts in this direction, we make both the data and the baseline available to the research community: https://www.cosbi.eu/cfx/9985. @@ -2982,7 +2982,7 @@ IdilHasan KateBaird SumithraVelupillai - RobertStewart + RobertStewart AngusRoberts 2009–2016 Free text fields within electronic health records (EHRs) contain valuable clinical information which is often missed when conducting research using EHR databases. One such type of information is medications which are not always available in structured fields, especially in mental health records. Most use cases that require medication information also generally require the associated temporal information (e.g. current or past) and attributes (e.g. dose, route, frequency). The purpose of this study is to develop a corpus of medication annotations in mental health records. The aim is to provide a more complete picture behind the mention of medications in the health records, by including additional contextual information around them, and to create a resource for use when developing and evaluating applications for the extraction of medications from EHR text. Thus far, an analysis of temporal information related to medications mentioned in a sample of mental health records has been conducted. The purpose of this analysis was to understand the complexity of medication mentions and their associated temporal information in the free text of EHRs, with a specific focus on the mental health domain. @@ -2992,7 +2992,7 @@ Do not let the history haunt you: Mitigating Compounding Errors in Conversational Question Answering - AngroshMandya + AngroshMandya JamesO’ Neill DanushkaBollegala FransCoenen @@ -3118,7 +3118,7 @@ <fixed-case>WN</fixed-case>-Salience: A Corpus of News Articles with Entity Salience Annotations ChuanWu EvangelosKanoulas - Maartende Rijke + Maartende Rijke WeiLu 2095–2102 Entities can be found in various text genres, ranging from tweets and web pages to user queries submitted to web search engines. Existing research either considers all entities in the text equally important, or heuristics are used to measure their salience. We believe that a key reason for the relatively limited work on entity salience is the lack of appropriate datasets. To support research on entity salience, we present a new dataset, the WikiNews Salience dataset (WN-Salience), which can be used to benchmark tasks such as entity salience detection and salient entity linking. WN-Salience is built on top of Wikinews, a Wikimedia project whose mission is to present reliable news articles. Entities in Wikinews articles are identified by the authors of the articles and are linked to Wikinews categories when they are salient or to Wikipedia pages otherwise. The dataset is built automatically, and consists of approximately 7,000 news articles, and 90,000 in-text entity annotations. We compare the WN-Salience dataset against existing datasets on the task and analyze their differences. Furthermore, we conduct experiments on entity salience detection; the results demonstrate that WN-Salience is a challenging testbed that is complementary to existing ones. @@ -3139,8 +3139,8 @@ Comparing Machine Learning and Deep Learning Approaches on <fixed-case>NLP</fixed-case> Tasks for the <fixed-case>I</fixed-case>talian Language - BernardoMagnini - AlbertoLavelli + BernardoMagnini + AlbertoLavelli SimoneMagnolini 2110–2119 We present a comparison between deep learning and traditional machine learning methods for various NLP tasks in Italian. We carried on experiments using available datasets (e.g., from the Evalita shared tasks) on two sequence tagging tasks (i.e., named entities recognition and nominal entities recognition) and four classification tasks (i.e., lexical relations among words, semantic relations among sentences, sentiment analysis and text classification). We show that deep learning approaches outperform traditional machine learning algorithms in sequence tagging, while for classification tasks that heavily rely on semantics approaches based on feature engineering are still competitive. We think that a similar analysis could be carried out for other languages to provide an assessment of machine learning / deep learning models across different languages. @@ -3185,7 +3185,7 @@ Evaluating Information Loss in Temporal Dependency Trees MustafaOcal - MarkFinlayson + MarkFinlayson 2148–2156 Temporal Dependency Trees (TDTs) have emerged as an alternative to full temporal graphs for representing the temporal structure of texts, with a key advantage being that TDTs can be straightforwardly computed using adapted dependency parsers. Relative to temporal graphs, the tree form of TDTs naturally omits some fraction of temporal relationships, which intuitively should decrease the amount of temporal information available, potentially increasing temporal indeterminacy of the global ordering. We demonstrate a new method for quantifying this indeterminacy that relies on solving temporal constraint problems to extract timelines, and show that TDTs result in up to a 109% increase in temporal indeterminacy over their corresponding temporal graphs for the three corpora we examine. On average, the increase in indeterminacy is 32%, and we show that this increase is a result of the TDT representation eliminating on average only 2.4% of total temporal relations. This result suggests that small differences can have big effects in temporal graphs, and the use of TDTs must be balanced against their deficiencies, with tasks requiring an accurate global temporal ordering potentially calling for use of the full temporal graph 2020.lrec-1.263 @@ -3195,8 +3195,8 @@ Populating Legal Ontologies using Semantic Role Labeling LlioHumphreys - GuidoBoella - LuigiDi Caro + GuidoBoella + LuigiDi Caro LivioRobaldo Leonvan der Torre SepidehGhanavati @@ -3221,7 +3221,7 @@ Natural Language Premise Selection: Finding Supporting Statements for Mathematical Text DeborahFerreira - AndréFreitas + AndréFreitas 2175–2182 Mathematical text is written using a combination of words and mathematical expressions. This combination, along with a specific way of structuring sentences makes it challenging for state-of-art NLP tools to understand and reason on top of mathematical discourse. In this work, we propose a new NLP task, the natural premise selection, which is used to retrieve supporting definitions and supporting propositions that are useful for generating an informal mathematical proof for a particular statement. We also make available a dataset, NL-PS, which can be used to evaluate different approaches for the natural premise selection task. Using different baselines, we demonstrate the underlying interpretation challenges associated with the task. 2020.lrec-1.266 @@ -3230,8 +3230,8 @@ <fixed-case>O</fixed-case>dinson: A Fast Rule-based Information Extraction Framework - Marco A.Valenzuela-Escárcega - GusHahn-Powell + Marco A.Valenzuela-Escárcega + GusHahn-Powell DaneBell 2183–2191 We present Odinson, a rule-based information extraction framework, which couples a simple yet powerful pattern language that can operate over multiple representations of text, with a runtime system that operates in near real time. In the Odinson query language, a single pattern may combine regular expressions over surface tokens with regular expressions over graphs such as syntactic dependencies. To guarantee the rapid matching of these patterns, our framework indexes most of the necessary information for matching patterns, including directed graphs such as syntactic dependencies, into a custom Lucene index. Indexing minimizes the amount of expensive pattern matching that must take place at runtime. As a result, the runtime system matches a syntax-based graph traversal in 2.8 seconds in a corpus of over 134 million sentences, nearly 150,000 times faster than its predecessor. @@ -3257,7 +3257,7 @@ <fixed-case>M</fixed-case>ath<fixed-case>A</fixed-case>lign: Linking Formula Identifiers to their Contextual Natural Language Descriptions MariaAlexeeva RebeccaSharp - Marco A.Valenzuela-Escárcega + Marco A.Valenzuela-Escárcega JenniferKadowaki AdarshPyarelal ClaytonMorrison @@ -3270,9 +3270,9 @@ Domain Adapted Distant Supervision for Pedagogically Motivated Relation Extraction OscarSainz - OierLopez de Lacalle + OierLopez de Lacalle ItziarAldabe - MontseMaritxalar + MontseMaritxalar 2213–2222 In this paper we present a relation extraction system that given a text extracts pedagogically motivated relation types, as a previous step to obtaining a semantic representation of the text which will make possible to automatically generate questions for reading comprehension. The system maps pedagogically motivated relations with relations from ConceptNet and deploys Distant Supervision for relation extraction. We run a study on a subset of those relationships in order to analyse the viability of our approach. For that, we build a domain-specific relation extraction system and explore two relation extraction models: a state-of-the-art model based on transfer learning and a discrete feature based machine learning model. Experiments show that the neural model obtains better results in terms of F-score and we yield promising results on the subset of relations suitable for pedagogical purposes. We thus consider that distant supervision for relation extraction is a valid approach in our target domain, i.e. biology. 2020.lrec-1.270 @@ -3307,7 +3307,7 @@ Sovan KumarSahoo SaumajitSaha AsifEkbal - PushpakBhattacharyya + PushpakBhattacharyya 2241–2250 Event Extraction is an important task in the widespread field of Natural Language Processing (NLP). Though this task is adequately addressed in English with sufficient resources, we are unaware of any benchmark setup in Indian languages. Hindi is one of the most widely spoken languages in the world. In this paper, we present an Event Extraction framework for Hindi language by creating an annotated resource for benchmarking, and then developing deep learning based models to set as the baselines. We crawl more than seventeen hundred disaster related Hindi news articles from the various news sources. We also develop deep learning based models for Event Trigger Detection and Classification, Argument Detection and Classification and Event-Argument Linking. 2020.lrec-1.273 @@ -3331,7 +3331,7 @@ <fixed-case>NLP</fixed-case> Analytics in Finance with <fixed-case>D</fixed-case>o<fixed-case>R</fixed-case>e: A <fixed-case>F</fixed-case>rench 250<fixed-case>M</fixed-case> Tokens Corpus of Corporate Annual Reports CorentinMasson - PatrickParoubek + PatrickParoubek 2261–2267 Recent advances in neural computing and word embeddings for semantic processing open many new applications areas which had been left unaddressed so far because of inadequate language understanding capacity. But this new kind of approaches rely even more on training data to be operational. Corpora for financial applications exists, but most of them concern stock market prediction and are in English. To address this need for the French language and regulation oriented applications which require a deeper understanding of the text content, we hereby present “DoRe”, a French and dialectal French Corpus for NLP analytics in Finance, Regulation and Investment. This corpus is composed of: (a) 1769 Annual Reports from 336 companies among the most capitalized companies in: France (Euronext Paris) & Belgium (Euronext Brussels), covering a time frame from 2009 to 2019, and (b) related MetaData containing information for each company about its ISIN code, capitalization and sector. This corpus is designed to be as modular as possible in order to allow for maximum reuse in different tasks pertaining to Economics, Finance and Regulation. After presenting existing resources, we relate the construction of the DoRe corpus and the rationale behind our choices, concluding on the spectrum of possible uses of this new resource for NLP applications. 2020.lrec-1.275 @@ -3341,7 +3341,7 @@ The Language of Brain Signals: Natural Language Processing of Electroencephalography Reports RamonMaldonado - SandaHarabagiu + SandaHarabagiu 2268–2275 Brain signals are captured by clinical electroencephalography (EEG) which is an excellent tool for probing neural function. When EEG tests are performed, a textual EEG report is generated by the neurologist to document the findings, thus using language that describes the brain signals and its clinical correlations. Even with the impetus provided by the BRAIN initiative (brainitititive.nih.gov), there are no annotations available in texts that capture language describing the brain activities and their correlations with various pathologies. In this paper we describe an annotation effort carried out on a large corpus of EEG reports, providing examples of EEG-specific and clinically relevant concepts. In addition, we detail our annotation schema for brain signal attributes. We also discuss the resulting annotation of long-distance relations between concepts in EEG reports. By exemplifying a self-attention joint-learning to predict similar annotations in the EEG report corpus, we discuss the promising results, hoping that our effort will inform the design of novel knowledge capture techniques that will include the language of brain signals. 2020.lrec-1.276 @@ -3428,7 +3428,7 @@ Multiple Knowledge <fixed-case>G</fixed-case>raph<fixed-case>DB</fixed-case> (<fixed-case>MKGDB</fixed-case>) StefanoFaralli - PaolaVelardi + PaolaVelardi FaridYusifli 2325–2331 We present MKGDB, a large-scale graph database created as a combination of multiple taxonomy backbones extracted from 5 existing knowledge graphs, namely: ConceptNet, DBpedia, WebIsAGraph, WordNet and the Wikipedia category hierarchy. MKGDB, thanks the versatility of the Neo4j graph database manager technology, is intended to favour and help the development of open-domain natural language processing applications relying on knowledge bases, such as information extraction, hypernymy discovery, topic clustering, and others. Our resource consists of a large hypernymy graph which counts more than 37 million nodes and more than 81 million hypernymy relations. @@ -3438,7 +3438,7 @@ Orchestrating <fixed-case>NLP</fixed-case> Services for the Legal Domain - JulianMoreno-Schneider + JulianMoreno-Schneider GeorgRehm ElenaMontiel-Ponsoda VíctorRodriguez-Doncel @@ -3482,7 +3482,7 @@ Representing Multiword Term Variation in a Terminological Knowledge Base: a Corpus-Based Study - PilarLeón-Araúz + PilarLeón-Araúz ArianneReimerink MelaniaCabezas-García 2358–2367 @@ -3567,10 +3567,10 @@ BrianRoark LawrenceWolf-Sonkin ChristoKirov - Sabrina J.Mielke + Sabrina J.Mielke CibuJohny - IsinDemirsahin - KeithHall + IsinDemirsahin + KeithHall 2413–2423 This paper describes the Dakshina dataset, a new resource consisting of text in both the Latin and native scripts for 12 South Asian languages. The dataset includes, for each language: 1) native script Wikipedia text; 2) a romanization lexicon; and 3) full sentence parallel data in both a native script of the language and the basic Latin alphabet. We document the methods used for preparation and selection of the Wikipedia text in each language; collection of attested romanizations for sampled lexicons; and manual romanization of held-out sentences from the native script collections. We additionally provide baseline results on several tasks made possible by the dataset, including single word transliteration, full sentence transliteration, and language modeling of native script and romanized text. 2020.lrec-1.294 @@ -3593,7 +3593,7 @@ Embedding Space Correlation as a Measure of Domain Similarity AnneBeyer GöranKauermann - HinrichSchütze + HinrichSchütze 2431–2439 Prior work has determined domain similarity using text-based features of a corpus. However, when using pre-trained word embeddings, the underlying text corpus might not be accessible anymore. Therefore, we propose the CCA measure, a new measure of domain similarity based directly on the dimension-wise correlations between corresponding embedding spaces. Our results suggest that an inherent notion of domain can be captured this way, as we are able to reproduce our findings for different domain comparisons for English, German, Spanish and Czech as well as in cross-lingual comparisons. We further find a threshold at which the CCA measure indicates that two corpora come from the same domain in a monolingual setting by applying permutation tests. By evaluating the usability of the CCA measure in a domain adaptation application, we also show that it can be used to determine which corpora are more similar to each other in a cross-domain sentiment detection task. 2020.lrec-1.296 @@ -3659,8 +3659,8 @@ MaximinCoavoux BenjaminLecouteux AlexandreAllauzen - BenoitCrabbé - LaurentBesacier + BenoitCrabbé + LaurentBesacier DidierSchwab 2479–2490 Language models have become a key step to achieve state-of-the art results in many different Natural Language Processing (NLP) tasks. Leveraging the huge amount of unlabeled texts nowadays available, they provide an efficient way to pre-train continuous word representations that can be fine-tuned for a downstream task, along with their contextualization at the sentence level. This has been widely demonstrated for English using contextualized representations (Dai and Le, 2015; Peters et al., 2018; Howard and Ruder, 2018; Radford et al., 2018; Devlin et al., 2019; Yang et al., 2019b). In this paper, we introduce and share FlauBERT, a model learned on a very large and heterogeneous French corpus. Models of different sizes are trained using the new CNRS (French National Centre for Scientific Research) Jean Zay supercomputer. We apply our French language models to diverse NLP tasks (text classification, paraphrasing, natural language inference, parsing, word sense disambiguation) and show that most of the time they outperform other pre-training approaches. Different versions of FlauBERT as well as a unified evaluation protocol for the downstream tasks, called FLUE (French Language Understanding Evaluation), are shared to the research community for further reproducible experiments in French NLP. @@ -3670,9 +3670,9 @@ Accelerated High-Quality Mutual-Information Based Word Clustering - Manuel R.Ciosici + Manuel R.Ciosici IraAssent - LeonDerczynski + LeonDerczynski 2491–2496 Word clustering groups words that exhibit similar properties. One popular method for this is Brown clustering, which uses short-range distributional information to construct clusters. Specifically, this is a hard hierarchical clustering with a fixed-width beam that employs bi-grams and greedily minimizes global mutual information loss. The result is word clusters that tend to outperform or complement other word representations, especially when constrained by small datasets. However, Brown clustering has high computational complexity and does not lend itself to parallel computation. This, together with the lack of efficient implementations, limits their applicability in NLP. We present efficient implementations of Brown clustering and the alternative Exchange clustering as well as a number of methods to accelerate the computation of both hierarchical and flat clusters. We show empirically that clusters obtained with the accelerated method match the performance of clusters computed using the original methods. 2020.lrec-1.303 @@ -3716,7 +3716,7 @@ Automatic Transcription Challenges for <fixed-case>I</fixed-case>nuktitut, a Low-Resource Polysynthetic Language VishwaGupta - GillesBoulianne + GillesBoulianne 2521–2527 We introduce the first attempt at automatic speech recognition (ASR) in Inuktitut, as a representative for polysynthetic, low-resource languages, like many of the 900 Indigenous languages spoken in the Americas. As most previous work on Inuktitut, we use texts from parliament proceedings, but in addition we have access to 23 hours of transcribed oral stories. With this corpus, we show that Inuktitut displays a much higher degree of polysynthesis than other agglutinative languages usually considered in ASR, such as Finnish or Turkish. Even with a vocabulary of 1.3 million words derived from proceedings and stories, held-out stories have more than 60% of words out-of-vocabulary. We train bi-directional LSTM acoustic models, then investigate word and subword units, morphemes and syllables, and a deep neural network that finds word boundaries in subword sequences. We show that acoustic decoding using syllables decorated with word boundary markers results in the lowest word error rate. 2020.lrec-1.307 @@ -3787,7 +3787,7 @@ Exploring Bilingual Word Embeddings for <fixed-case>H</fixed-case>iligaynon, a Low-Resource Language LeahMichel ViktorHangya - AlexanderFraser + AlexanderFraser 2573–2580 This paper investigates the use of bilingual word embeddings for mining Hiligaynon translations of English words. There is very little research on Hiligaynon, an extremely low-resource language of Malayo-Polynesian origin with over 9 million speakers in the Philippines (we found just one paper). We use a publicly available Hiligaynon corpus with only 300K words, and match it with a comparable corpus in English. As there are no bilingual resources available, we manually develop a English-Hiligaynon lexicon and use this to train bilingual word embeddings. But we fail to mine accurate translations due to the small amount of data. To find out if the same holds true for a related language pair, we simulate the same low-resource setup on English to German and arrive at similar results. We then vary the size of the comparable English and German corpora to determine the minimum corpus size necessary to achieve competitive results. Further, we investigate the role of the seed lexicon. We show that with the same corpus size but with a smaller seed lexicon, performance can surpass results of previous studies. We release the lexicon of 1,200 English-Hiligaynon word pairs we created to encourage further investigation. 2020.lrec-1.313 @@ -3798,7 +3798,7 @@ A Finite-State Morphological Analyser for <fixed-case>E</fixed-case>venki AnnaZueva AnastasiaKuznetsova - FrancisTyers + FrancisTyers 2581–2589 It has been widely admitted that morphological analysis is an important step in automated text processing for morphologically rich languages. Evenki is a language with rich morphology, therefore a morphological analyser is highly desirable for processing Evenki texts and developing applications for Evenki. Although two morphological analysers for Evenki have already been developed, they are able to analyse less than a half of the available Evenki corpora. The aim of this paper is to create a new morphological analyser for Evenki. It is implemented using the Helsinki Finite-State Transducer toolkit (HFST). The lexc formalism is used to specify the morphotactic rules, which define the valid orderings of morphemes in a word. Morphophonological alternations and orthographic rules are described using the twol formalism. The lexicon is extracted from available machine-readable dictionaries. Since a part of the corpora belongs to texts in Evenki dialects, a version of the analyser with relaxed rules is developed for processing dialectal features. We evaluate the analyser on available Evenki corpora and estimate precision, recall and F-score. We obtain coverage scores of between 61% and 87% on the available Evenki corpora. 2020.lrec-1.314 @@ -3831,7 +3831,7 @@ EdressonCasanova MarcosTreviso LilianHübner - SandraAluísio + SandraAluísio 2605–2614 Automatic analysis of connected speech by natural language processing techniques is a promising direction for diagnosing cognitive impairments. However, some difficulties still remain: the time required for manual narrative transcription and the decision on how transcripts should be divided into sentences for successful application of parsers used in metrics, such as Idea Density, to analyze the transcripts. The main goal of this paper was to develop a generic segmentation system for narratives of neuropsychological language tests. We explored the performance of our previous single-dataset-trained sentence segmentation architecture in a richer scenario involving three new datasets used to diagnose cognitive impairments, comprising different stories and two types of stimulus presentation for eliciting narratives — visual and oral — via illustrated story-book and sequence of scenes, and by retelling. Also, we proposed and evaluated three modifications to our previous RCNN architecture: (i) the inclusion of a Linear Chain CRF; (ii) the inclusion of a self-attention mechanism; and (iii) the replacement of the LSTM recurrent layer by a Quasi-Recurrent Neural Network layer. Our study allowed us to develop two new models for segmenting impaired speech transcriptions, along with an ideal combination of datasets and specific groups of narratives to be used as the training set. 2020.lrec-1.317 @@ -3896,7 +3896,7 @@ The Nisvai Corpus of Oral Narrative Practices from <fixed-case>M</fixed-case>alekula (<fixed-case>V</fixed-case>anuatu) and its Associated Language Resources JocelynAznar - NúriaGala + NúriaGala 2649–2656 In this paper, we present a corpus of oral narratives from the Nisvai linguistic community and four associated language resources. Nisvai is an oral language spoken by 200 native speakers in the South-East of Malekula, an island of Vanuatu, Oceania. This language had never been the focus of a research before the one leading to this article. The corpus we present is made of 32 annotated narratives segmented into intonation units. The audio records were transcribed using the written conventions specifically developed for the language and translated into French. Four associated language resources have been generated by organizing the annotations into written documents: two of them are available online and two in paper format. The online resources allow the users to listen to the audio recordings whilereading the annotations. They were built to share the results of our fieldwork and to communicate on the Nisvai narrative practices with the researchers as well as with a more general audience. The bilingual paper resources, a booklet of narratives and a Nisvai-French French-Nisvai lexicon, were designed for the Nisvai community by taking into account their future uses (i.e. primary school). 2020.lrec-1.323 @@ -3943,7 +3943,7 @@ Towards a Spell Checker for <fixed-case>Z</fixed-case>amboanga <fixed-case>C</fixed-case>havacano Orthography Marcelo YujiHimoro - AntonioPareja-Lora + AntonioPareja-Lora 2685–2697 Zamboanga Chabacano (ZC) is the most vibrant variety of Philippine Creole Spanish, with over 400,000 native speakers in the Philippines (as of 2010). Following its introduction as a subject and a medium of instruction in the public schools of Zamboanga City from Grade 1 to 3 in 2012, an official orthography for this variety - the so-called “Zamboanga Chavacano Orthography” - has been approved in 2014. Its complexity, however, is a barrier to most speakers, since it does not necessarily reflect the particular phonetic evolution in ZC, but favours etymology instead. The distance between the correct spelling and the different spelling variations is often so great that delivering acceptable performance with the current de facto spell checking technologies may be challenging. The goals of this research have been to propose i) a spelling error taxonomy for ZC, formalised as an ontology and ii) an adaptive spell checking approach using Character-Based Statistical Machine Translation to correct spelling errors in ZC. Our results show that this approach is suitable for the goals mentioned and that it could be combined with other current spell checking technologies to achieve even higher performance. 2020.lrec-1.327 @@ -3990,8 +3990,8 @@ LucyLinder SandraDjambazovska AlexandrosLazaridis - TanjaSamardžić - ClaudiuMusat + TanjaSamardžić + ClaudiuMusat 2720–2725 We introduce a dictionary containing normalized forms of common words in various Swiss German dialects into High German. As Swiss German is, for now, a predominantly spoken language, there is a significant variation in the written forms, even between speakers of the same dialect. To alleviate the uncertainty associated with this diversity, we complement the pairs of Swiss German - High German words with the Swiss German phonetic transcriptions (SAMPA). This dictionary becomes thus the first resource to combine large-scale spontaneous translation with phonetic transcriptions. Moreover, we control for the regional distribution and insure the equal representation of the major Swiss dialects. The coupling of the phonetic and written Swiss German forms is powerful. We show that they are sufficient to train a Transformer-based phoneme to grapheme model that generates credible novel Swiss German writings. In addition, we show that the inverse mapping - from graphemes to phonemes - can be modeled with a transformer trained with the novel dictionary. This generation of pronunciations for previously unknown words is key in training extensible automated speech recognition (ASR) systems, which are key beneficiaries of this dictionary. 2020.lrec-1.331 @@ -4025,7 +4025,7 @@ JacquelineBrixey DavidSides TimothyVizthum - DavidTraum + DavidTraum KhalilIskarous 2746–2753 This work introduces additions to the corpus ChoCo, a multimodal corpus for the American indigenous language Choctaw. Using texts from the corpus, we develop new computational resources by using two off-the-shelf tools: word2vec and Linguistica. Our work illustrates how these tools can be successfully implemented with a small corpus. @@ -4035,9 +4035,9 @@ Massive vs. Curated Embeddings for Low-Resourced Languages: the Case of <fixed-case>Y</fixed-case>orùbá and <fixed-case>T</fixed-case>wi - Jesujoba O.Alabi + Jesujoba O.Alabi KwabenaAmponsah-Kaakyire - David I.Adelani + David I.Adelani CristinaEspaña-Bonet 2754–2762 The success of several architectures to learn semantic representations from unannotated text and the availability of these kind of texts in online multilingual resources such as Wikipedia has facilitated the massive and automatic creation of resources for multiple languages. The evaluation of such resources is usually done for the high-resourced languages, where one has a smorgasbord of tasks and test sets to evaluate on. For low-resourced languages, the evaluation is more difficult and normally ignored, with the hope that the impressive capability of deep learning architectures to learn (multilingual) representations in the high-resourced setting holds in the low-resourced setting too. In this paper we focus on two African languages, Yorùbá and Twi, and compare the word embeddings obtained in this way, with word embeddings obtained from curated corpora and a language-dependent processing. We analyse the noise in the publicly available corpora, collect high quality and noisy data for the two languages and quantify the improvements that depend not only on the amount of data but on the quality too. We also use different architectures that learn word representations both from surface forms and characters to further exploit all the available information which showed to be important for these languages. For the evaluation, we manually translate the wordsim-353 word pairs dataset from English into Yorùbá and Twi. We extend the analysis to contextual word embeddings and evaluate multilingual BERT on a named entity recognition task. For this, we annotate with named entities the Global Voices corpus for Yorùbá. As output of the work, we provide corpora, embeddings and the test suits for both languages. @@ -4061,7 +4061,7 @@ Collection and Annotation of the <fixed-case>R</fixed-case>omanian Legal Corpus - DanTufiș + DanTufiș MariaMitrofan VasilePăiș RaduIon @@ -4095,8 +4095,8 @@ Building a Task-oriented Dialog System for Languages with no Training Data: the Case for <fixed-case>B</fixed-case>asque - MaddalenLópez de Lacalle - XabierSaralegi + MaddalenLópez de Lacalle + XabierSaralegi IñakiSan Vicente 2796–2802 This paper presents an approach for developing a task-oriented dialog system for less-resourced languages in scenarios where training data is not available. Both intent classification and slot filling are tackled. We project the existing annotations in rich-resource languages by means of Neural Machine Translation (NMT) and posterior word alignments. We then compare training on the projected monolingual data with direct model transfer alternatives. Intent Classifiers and slot filling sequence taggers are implemented using a BiLSTM architecture or by fine-tuning BERT transformer models. Models learnt exclusively from Basque projected data provide better accuracies for slot filling. Combining Basque projected train data with rich-resource languages data outperforms consistently models trained solely on projected data for intent classification. At any rate, we achieve competitive performance in both tasks, with accuracies of 81% for intent classification and 77% for slot filling. @@ -4108,7 +4108,7 @@ <fixed-case>SENCORPUS</fixed-case>: A <fixed-case>F</fixed-case>rench-<fixed-case>W</fixed-case>olof Parallel Corpus Elhadji MamadouNguer AllaLo - Cheikh M. BambaDione + Cheikh M. BambaDione Sileye O.Ba MoussaLo 2803–2811 @@ -4197,7 +4197,7 @@ Towards Computational Resource Grammars for <fixed-case>R</fixed-case>unyankore and Rukiga DavidBamutura - PeterLjunglöf + PeterLjunglöf PeterNebende 2846–2854 In this paper, we present computational resource grammars of Runyankore and Rukiga (R&R) languages. Runyankore and Rukiga are two under-resourced Bantu Languages spoken by about 6 million people indigenous to South- Western Uganda, East Africa. We used Grammatical Framework (GF), a multilingual grammar formalism and a special- purpose functional programming language to formalise the descriptive grammar of these languages. To the best of our knowledge, these computational resource grammars are the first attempt to the creation of language resources for R&R. In Future Work, we plan to use these grammars to bootstrap the generation of other linguistic resources such as multilingual corpora that make use of data-driven approaches to natural language processing feasible. In the meantime, they can be used to build Computer-Assisted Language Learning (CALL) applications for these languages among others. @@ -4238,8 +4238,8 @@ Sai KrishnaRallabandi RodolfoVega AntoniosAnastasopoulos - LoriLevin - Alan WBlack + LoriLevin + Alan WBlack 2872–2877 We present a resource for computational experiments on Mapudungun, a polysynthetic indigenous language spoken in Chile with upwards of 200 thousand speakers. We provide 142 hours of culturally significant conversations in the domain of medical treatment. The conversations are fully transcribed and translated into Spanish. The transcriptions also include annotations for code-switching and non-standard pronunciations. We also provide baseline results on three core NLP tasks: speech recognition, speech synthesis, and machine translation between Spanish and Mapudungun. We further explore other applications for which the corpus will be suitable, including the study of code-switching, historical orthography change, linguistic structure, and sociological and anthropological studies. 2020.lrec-1.350 @@ -4260,7 +4260,7 @@ The <fixed-case>J</fixed-case>ohns <fixed-case>H</fixed-case>opkins <fixed-case>U</fixed-case>niversity <fixed-case>B</fixed-case>ible Corpus: 1600+ Tongues for Typological Exploration - Arya D.McCarthy + Arya D.McCarthy RachelWicks DylanLewis AaronMueller @@ -4310,7 +4310,7 @@ No Data to Crawl? Monolingual Corpus Creation from <fixed-case>PDF</fixed-case> Files of Truly low-Resource Languages in <fixed-case>P</fixed-case>eru GinaBustamante - ArturoOncevay + ArturoOncevay RobertoZariquiey 2914–2923 We introduce new monolingual corpora for four indigenous and endangered languages from Peru: Shipibo-konibo, Ashaninka, Yanesha and Yine. Given the total absence of these languages in the web, the extraction and processing of texts from PDF files is relevant in a truly low-resource language scenario. Our procedure for monolingual corpus creation considers language-specific and language-agnostic steps, and focuses on educational PDF files with multilingual sentences, noisy pages and low-structured content. Through an evaluation based on language modelling and character-level perplexity on a subset of manually extracted sentences, we determine that our method allows the creation of clean corpora for the four languages, a key resource for natural language processing tasks nowadays. @@ -4321,7 +4321,7 @@ Creating a Parallel <fixed-case>I</fixed-case>celandic Dependency Treebank from Raw Text to <fixed-case>U</fixed-case>niversal <fixed-case>D</fixed-case>ependencies HildurJónsdóttir - Anton KarlIngason + Anton KarlIngason 2924–2931 Making the low-resource language, Icelandic, accessible and usable in Language Technology is a work in progress and is supported by the Icelandic government. Creating resources and suitable training data (e.g., a dependency treebank) is a fundamental part of that work. We describe work on a parallel Icelandic dependency treebank based on Universal Dependencies (UD). This is important because it is the first parallel treebank resource for the language and since several other languages already have a resource based on the same text. Two Icelandic treebanks based on phrase-structure grammar have been built and ongoing work aims to convert them to UD. Previously, limited work has been done on dependency grammar for Icelandic. The current project aims to ameliorate this situation by creating a small dependency treebank from scratch. Creating a treebank is a laborious task so the process was implemented in an accessible manner using freely available tools and resources. The parallel data in the UD project was chosen as a source because this would furthermore give us the first parallel treebank for Icelandic. The Icelandic parallel UD corpus will be published as part of UD version 2.6. 2020.lrec-1.357 @@ -4356,7 +4356,7 @@ <fixed-case>CPLM</fixed-case>, a Parallel Corpus for <fixed-case>M</fixed-case>exican Languages: Development and Interface GerardoSierra Martínez CynthiaMontaño - GemmaBel-Enguix + GemmaBel-Enguix DiegoCórdova MargaritaMota Montoya 2947–2952 @@ -4407,7 +4407,7 @@ HourKaing KhinMar Soe MasaoUtiyama - EiichiroSumita + EiichiroSumita 2980–2983 Transliteration is generally a phonetically based transcription across different writing systems. It is a crucial task for various downstream natural language processing applications. For the Myanmar (Burmese) language, robust automatic transliteration for borrowed English words is a challenging task because of the complex Myanmar writing system and the lack of data. In this study, we constructed a Myanmar-English named entity dictionary containing more than eighty thousand transliteration instances. The data have been released under a CC BY-NC-SA license. We evaluated the automatic transliteration performance using statistical and neural network-based approaches based on the prepared data. The neural network model outperformed the statistical model significantly in terms of the BLEU score on the character level. Different units used in the Myanmar script for processing were also compared and discussed. 2020.lrec-1.364 @@ -4418,7 +4418,7 @@ <fixed-case>CA</fixed-case>-<fixed-case>EHN</fixed-case>: Commonsense Analogy from <fixed-case>E</fixed-case>-<fixed-case>H</fixed-case>ow<fixed-case>N</fixed-case>et Peng-HsuanLi Tsan-YuYang - Wei-YunMa + Wei-YunMa 2984–2990 Embedding commonsense knowledge is crucial for end-to-end models to generalize inference beyond training corpora. However, existing word analogy datasets have tended to be handcrafted, involving permutations of hundreds of words with only dozens of pre-defined relations, mostly morphological relations and named entities. In this work, we model commonsense knowledge down to word-level analogical reasoning by leveraging E-HowNet, an ontology that annotates 88K Chinese words with their structured sense definitions and English translations. We present CA-EHN, the first commonsense word analogy dataset containing 90,505 analogies covering 5,656 words and 763 relations. Experiments show that CA-EHN stands out as a great indicator of how well word representations embed commonsense knowledge. The dataset is publicly available at https://github.com/ckiplab/CA-EHN. 2020.lrec-1.365 @@ -4429,7 +4429,7 @@ Building Semantic Grams of Human Knowledge ValentinaLeone GiovanniSiragusa - LuigiDi Caro + LuigiDi Caro RobertoNavigli 2991–3000 Word senses are typically defined with textual definitions for human consumption and, in computational lexicons, put in context via lexical-semantic relations such as synonymy, antonymy, hypernymy, etc. In this paper we embrace a radically different paradigm that provides a slot-filler structure, called “semagram”, to define the meaning of words in terms of their prototypical semantic information. We propose a semagram-based knowledge model composed of 26 semantic relationships which integrates features from a range of different sources, such as computational lexicons and property norms. We describe an annotation exercise regarding 50 concepts over 10 different categories and put forward different automated approaches for extending the semagram base to thousands of concepts. We finally evaluated the impact of the proposed resource on a semantic similarity task, showing significant improvements over state-of-the-art word embeddings. @@ -4440,7 +4440,7 @@ Automatically Building a Multilingual Lexicon of False <fixed-case>F</fixed-case>riends With No Supervision Ana SabinaUban - Liviu P.Dinu + Liviu P.Dinu 3001–3007 Cognate words, defined as words in different languages which derive from a common etymon, can be useful for language learners, who can leverage the orthographical similarity of cognates to more easily understand a text in a foreign language. Deceptive cognates, or false friends, do not share the same meaning anymore; these can be instead deceiving and detrimental for language acquisition or text understanding in a foreign language. We use an automatic method of detecting false friends from a set of cognates, in a fully unsupervised fashion, based on cross-lingual word embeddings. We implement our method for English and five Romance languages, including a low-resource language (Romanian), and evaluate it against two different gold standards. The method can be extended easily to any language pair, requiring only large monolingual corpora for the involved languages and a small bilingual dictionary for the pair. We additionally propose a measure of “falseness” of a false friends pair. We publish freely the database of false friends in the six languages, along with the falseness scores for each cognate pair. The resource is the largest of the kind that we are aware of, both in terms of languages covered and number of word pairs. 2020.lrec-1.367 @@ -4533,7 +4533,7 @@ Odi et <fixed-case>A</fixed-case>mo. Creating, Evaluating and Extending Sentiment Lexicons for <fixed-case>L</fixed-case>atin. - RacheleSprugnoli + RacheleSprugnoli MarcoPassarotti DanielaCorbetta AndreaPeverelli @@ -4545,7 +4545,7 @@ <fixed-case>W</fixed-case>ord<fixed-case>W</fixed-case>ars: A Dataset to Examine the Natural Selection of Words - Saif M.Mohammad + Saif M.Mohammad 3087–3095 There is a growing body of work on how word meaning changes over time: mutation. In contrast, there is very little work on how different words compete to represent the same meaning, and how the degree of success of words in that competition changes over time: natural selection. We present a new dataset, WordWars, with historical frequency data from the early 1800s to the early 2000s for monosemous English words in over 5000 synsets. We explore three broad questions with the dataset: (1) what is the degree to which predominant words in these synsets have changed, (2) how do prominent word features such as frequency, length, and concreteness impact natural selection, and (3) what are the differences between the predominant words of the 2000s and the predominant words of early 1800s. We show that close to one third of the synsets undergo a change in the predominant word in this time period. Manual annotation of these pairs shows that about 15% of these are orthographic variations, 25% involve affix changes, and 60% have completely different roots. We find that frequency, length, and concreteness all impact natural selection, albeit in different ways. 2020.lrec-1.377 @@ -4555,9 +4555,9 @@ Challenge Dataset of Cognates and False Friend Pairs from <fixed-case>I</fixed-case>ndian Languages DipteshKanojia - MalharKulkarni - PushpakBhattacharyya - GholamrezaHaffari + MalharKulkarni + PushpakBhattacharyya + GholamrezaHaffari 3096–3102 Cognates are present in multiple variants of the same text across different languages (e.g., “hund” in German and “hound” in the English language mean “dog”). They pose a challenge to various Natural Language Processing (NLP) applications such as Machine Translation, Cross-lingual Sense Disambiguation, Computational Phylogenetics, and Information Retrieval. A possible solution to address this challenge is to identify cognates across language pairs. In this paper, we describe the creation of two cognate datasets for twelve Indian languages namely Sanskrit, Hindi, Assamese, Oriya, Kannada, Gujarati, Tamil, Telugu, Punjabi, Bengali, Marathi, and Malayalam. We digitize the cognate data from an Indian language cognate dictionary and utilize linked Indian language Wordnets to generate cognate sets. Additionally, we use the Wordnet data to create a False Friends’ dataset for eleven language pairs. We also evaluate the efficacy of our dataset using previously available baseline cognate detection approaches. We also perform a manual evaluation with the help of lexicographers and release the curated gold-standard dataset with this paper. 2020.lrec-1.378 @@ -4580,7 +4580,7 @@ A Lexicon-Based Approach for Detecting Hedges in Informal Text JumayelIslam LuXiao - Robert E.Mercer + Robert E.Mercer 3109–3113 Hedging is a commonly used strategy in conversational management to show the speaker’s lack of commitment to what they communicate, which may signal problems between the speakers. Our project is interested in examining the presence of hedging words and phrases in identifying the tension between an interviewer and interviewee during a survivor interview. While there have been studies on hedging detection in the natural language processing literature, all existing work has focused on structured texts and formal communications. Our project thus investigated a corpus of eight unstructured conversational interviews about the Rwanda Genocide and identified hedging patterns in the interviewees’ responses. Our work produced three manually constructed lists of hedge words, booster words, and hedging phrases. Leveraging these lexicons, we developed a rule-based algorithm that detects sentence-level hedges in informal conversations such as survivor interviews. Our work also produced a dataset of 3000 sentences having the categories Hedge and Non-hedge annotated by three researchers. With experiments on this annotated dataset, we verify the efficacy of our proposed algorithm. Our work contributes to the further development of tools that identify hedges from informal conversations and discussions. 2020.lrec-1.380 @@ -4621,9 +4621,9 @@ Towards a Semi-Automatic Detection of Reflexive and Reciprocal Constructions and Their Representation in a Valency Lexicon VáclavaKettnerová - MarketaLopatkova + MarketaLopatkova AnnaVernerová - PetraBarancikova + PetraBarancikova 3136–3144 Valency lexicons usually describe valency behavior of verbs in non-reflexive and non-reciprocal constructions. However, reflexive and reciprocal constructions are common morphosyntactic forms of verbs. Both of these constructions are characterized by regular changes in morphosyntactic properties of verbs, thus they can be described by grammatical rules. On the other hand, the possibility to create reflexive and/or reciprocal constructions cannot be trivially derived from the morphosyntactic structure of verbs as it is conditioned by their semantic properties as well. A large-coverage valency lexicon allowing for rule based generation of all well formed verb constructions should thus integrate the information on reflexivity and reciprocity. In this paper, we propose a semi-automatic procedure, based on grammatical constraints on reflexivity and reciprocity, detecting those verbs that form reflexive and reciprocal constructions in corpus data. However, exploitation of corpus data for this purpose is complicated due to the diverse functions of reflexive markers crossing the domain of reflexivity and reciprocity. The list of verbs identified by the previous procedure is thus further used in an automatic experiment, applying word embeddings for detecting semantically similar verbs. These candidate verbs have been manually verified and annotation of their reflexive and reciprocal constructions has been integrated into the valency lexicon of Czech verbs VALLEX. 2020.lrec-1.384 @@ -4669,8 +4669,8 @@ Modelling Etymology in <fixed-case>LMF</fixed-case>/<fixed-case>TEI</fixed-case>: The Grande Dicionário Houaiss da Língua Portuguesa Dictionary as a Use Case - FahadKhan - LaurentRomary + FahadKhan + LaurentRomary AnaSalgado JackBowers MohamedKhemakhem @@ -4697,8 +4697,8 @@ Some Issues with Building a Multilingual <fixed-case>W</fixed-case>ordnet FrancisBond LuisMorgado da Costa - Michael WayneGoodman - John PhilipMcCrae + Michael WayneGoodman + John PhilipMcCrae AhtiLohk 3189–3197 In this paper we discuss the experience of bringing together over 40 different wordnets. We introduce some extensions to the GWA wordnet LMF format proposed in Vossen et al. (2016) and look at how this new information can be displayed. Notable extensions include: confidence, corpus frequency, orthographic variants, lexicalized and non-lexicalized synsets and lemmas, new parts of speech, and more. Many of these extensions already exist in multiple wordnets – the challenge was to find a compatible representation. To this end, we introduce a new version of the Open Multilingual Wordnet (Bond and Foster, 2013), that integrates a new set of tools that tests the extensions introduced by this new format, while also ensuring the integrity of the Collaborative Interlingual Index (CILI: Bond et al., 2016), avoiding the same new concept to be introduced through multiple projects. @@ -4718,7 +4718,7 @@ Methodological Aspects of Developing and Managing an Etymological Lexical Resource: Introducing <fixed-case>E</fixed-case>tym<fixed-case>DB</fixed-case>-2.0 ClémentineFourrier - BenoîtSagot + BenoîtSagot 3207–3216 Diachronic lexical information is not only important in the field of historical linguistics, but is also increasingly used in NLP, most recently for machine translation of low resource languages. Therefore, there is a need for fine-grained, large-coverage and accurate etymological lexical resources. In this paper, we propose a set of guidelines to generate such resources, for each step of the life-cycle of an etymological lexicon: creation, update, evaluation, dissemination, and exploitation. To illustrate the guidelines, we introduce EtymDB 2.0, an etymological database automatically generated from the Wiktionary, which contains 1.8 million lexemes, linked by more than 700,000 fine-grained etymological relations, across 2,536 living and dead languages. We also introduce use cases for which EtymDB 2.0 could represent a key resource, such as phylogenetic tree generation, low resource machine translation or medieval languages study. 2020.lrec-1.392 @@ -4728,7 +4728,7 @@ <fixed-case>OF</fixed-case>r<fixed-case>L</fixed-case>ex: A Computational Morphological and Syntactic Lexicon for <fixed-case>O</fixed-case>ld <fixed-case>F</fixed-case>rench GaëlGuibon - BenoîtSagot + BenoîtSagot 3217–3225 In this paper we describe our work on the development and enrichment of OFrLex, a freely available, large-coverage morphological and syntactic Old French lexicon. We rely on several heterogeneous language resources to extract structured and exploitable information. The extraction follows a semi-automatic procedure with substantial manual steps to respond to difficulties encountered while aligning lexical entries from distinct language resources. OFrLex aims at improving natural language processing tasks on Old French such as part-of-speech tagging and dependency parsing. We provide quantitative information on OFrLex and discuss its reliability. We also describe and evaluate a semi-automatic, word-embedding-based lexical enrichment process aimed at increasing the accuracy of the resource. Results of this extension technique will be manually validated in the near future, a step that will take advantage of OFrLex’s viewing, searching and editing interface, which is already accessible online. 2020.lrec-1.393 @@ -4737,8 +4737,8 @@ Automatic Reconstruction of Missing <fixed-case>R</fixed-case>omanian Cognates and Unattested <fixed-case>L</fixed-case>atin Words - Alina MariaCiobanu - Liviu P.Dinu + Alina MariaCiobanu + Liviu P.Dinu LaurentiuZoicas 3226–3231 Producing related words is a key concern in historical linguistics. Given an input word, the task is to automatically produce either its proto-word, a cognate pair or a modern word derived from it. In this paper, we apply a method for producing related words based on sequence labeling, aiming to fill in the gaps in incomplete cognate sets in Romance languages with Latin etymology (producing Romanian cognates that are missing) and to reconstruct uncertified Latin words. We further investigate an ensemble-based aggregation for combining and re-ranking the word productions of multiple languages. @@ -4749,11 +4749,11 @@ A Multilingual Evaluation Dataset for Monolingual Word Sense Alignment SinaAhmadi - John PhilipMcCrae + John PhilipMcCrae SanniNimb - FahadKhan + FahadKhan MonicaMonachini - BolettePedersen + BolettePedersen ThierryDeclerck TanjaWissik AndreaBellandi @@ -4762,7 +4762,7 @@ SussiOlsen SimonKrek VeronikaLipp - TamásVáradi + TamásVáradi LászlóSimon AndrásGyorffy CaroleTiberius @@ -4783,11 +4783,11 @@ JoséLuis Sancho Rafael-J.Ureña-Ruiz JordiPorta Zamorano - KirilSimov + KirilSimov PetyaOsenova ZaraKancheva IvayloRadev - RankaStanković + RankaStanković AndrejPerdih DejanGabrovsek 3232–3242 @@ -4798,10 +4798,10 @@ A Broad-Coverage Deep Semantic Lexicon for Verbs - JamesAllen + JamesAllen HannahAn RitwikBose - Willde Beaumont + Willde Beaumont Choh ManTeng 3243–3251 Progress on deep language understanding is inhibited by the lack of a broad coverage lexicon that connects linguistic behavior to ontological concepts and axioms. We have developed COLLIE-V, a deep lexical resource for verbs, with the coverage of WordNet and syntactic and semantic details that meet or exceed existing resources. Bootstrapping from a hand-built lexicon and ontology, new ontological concepts and lexical entries, together with semantic role preferences and entailment axioms, are automatically derived by combining multiple constraints from parsing dictionary definitions and examples. We evaluated the accuracy of the technique along a number of different dimensions and were able to obtain high accuracy in deriving new concepts and lexical entries. COLLIE-V is publicly available. @@ -4883,10 +4883,10 @@ World Class Language Technology - Developing a Language Technology Strategy for <fixed-case>D</fixed-case>anish SabineKirchmeier - BolettePedersen + BolettePedersen SanniNimb PhilipDiderichsen - Peter JuelHenrichsen + Peter JuelHenrichsen 3297–3301 Although Denmark is one of the most digitized countries in Europe, no coordinated efforts have been made in recent years to support the Danish language with regard to language technology and artificial intelligence. In March 2019, however, the Danish government adopted a new, ambitious strategy for LT and artificial intelligence. In this paper, we describe the process behind the development of the language-related parts of the strategy: A Danish Language Technology Committee was constituted and a comprehensive series of workshops were organized in which users, suppliers, developers, and researchers gave their valuable input based on their experiences. We describe how, based on this experience, the focus areas and recommendations for the LT strategy were established, and which steps are currently taken in order to put the strategy into practice. 2020.lrec-1.403 @@ -4908,7 +4908,7 @@ The <fixed-case>CLARIN</fixed-case> Knowledge Centre for Atypical Communication Expertise - Henkvan den Heuvel + Henkvan den Heuvel NellekeOostdijk CarolineRowland PaulTrilsbeek @@ -4920,7 +4920,7 @@ Corpora of Disordered Speech in the Light of the <fixed-case>GDPR</fixed-case>: Two Use Cases from the <fixed-case>DELAD</fixed-case> Initiative - Henkvan den Heuvel + Henkvan den Heuvel AlekseiKelli KatarzynaKlessa SatuSalaasti @@ -4936,46 +4936,46 @@ KatrinMarheinecke StefanieHegele SteliosPiperidis - KalinaBontcheva - JanHajič + KalinaBontcheva + JanHajič KhalidChoukri - AndrejsVasiļjevs + AndrejsVasiļjevs GerhardBackfried ChristophPrinz - José ManuelGómez-Pérez + José ManuelGómez-Pérez LucMeertens PaulLukowicz - Josefvan Genabith + Josefvan Genabith AndreaLösch PhilippSlusallek MortenIrgens PatrickGatellier - JoachimKöhler + JoachimKöhler LaureLe Bars DimitraAnastasiou AlbinaAuksoriūtė - NúriaBel - AntónioBranco + NúriaBel + AntónioBranco GerhardBudin - WalterDaelemans - KoenraadDe Smedt - RadovanGarabík + WalterDaelemans + KoenraadDe Smedt + RadovanGarabík MariaGavriilidou DagmarGromann SvetlaKoeva SimonKrek CvetanaKrstev - KristerLindén - BernardoMagnini + KristerLindén + BernardoMagnini JanOdijk MaciejOgrodniczuk - EiríkurRögnvaldsson - MikeRosner - BolettePedersen - IngunaSkadiņa + EiríkurRögnvaldsson + MikeRosner + BolettePedersen + IngunaSkadiņa MarkoTadić - DanTufiș - TamásVáradi + DanTufiș + TamásVáradi KadriVider AndyWay FrançoisYvon @@ -4999,7 +4999,7 @@ Gigafida 2.0: The Reference Corpus of Written Standard <fixed-case>S</fixed-case>lovene SimonKrek ŠpelaArhar Holdt - TomažErjavec + TomažErjavec JakaČibej AndrazRepar PolonaGantar @@ -5027,10 +5027,10 @@ A <fixed-case>CLARIN</fixed-case> Transcription Portal for Interview Data ChristophDraxler - Henkvan den Heuvel + Henkvan den Heuvel Arjanvan Hessen SilviaCalamai - LouiseCorti + LouiseCorti 3353–3359 In this paper we present a first version of a transcription portal for audio files based on automatic speech recognition (ASR) in various languages. The portal is implemented in the CLARIN resources research network and intended for use by non-technical scholars. We explain the background and interdisciplinary nature of interview data, the perks and quirks of using ASR for transcribing the audio in a research context, the dos and don’ts for optimal use of the portal, and future developments foreseen. The portal is promoted in a range of workshops, but there are a number of challenges that have to be met. These challenges concern privacy issues, ASR quality, and cost, amongst others. 2020.lrec-1.411 @@ -5050,25 +5050,25 @@ <fixed-case>E</fixed-case>uropean Language Grid: An Overview GeorgRehm - MariaBerger + MariaBerger ElaElsholz StefanieHegele FlorianKintzel KatrinMarheinecke SteliosPiperidis MiltosDeligiannis - DimitrisGalanis + DimitrisGalanis KaterinaGkirtzou - PennyLabropoulou - KalinaBontcheva + PennyLabropoulou + KalinaBontcheva DavidJones IanRoberts - JanHajič + JanHajič JanaHamrlová LukášKačena KhalidChoukri VictoriaArranz - AndrejsVasiļjevs + AndrejsVasiļjevs OriansAnvari AndisLagzdiņš JūlijaMeļņika @@ -5079,7 +5079,7 @@ ChristophPrinz SeverinStampler DorotheaThomas-Aniola - José ManuelGómez-Pérez + José ManuelGómez-Pérez AndresGarcia Silva ChristianBerrío UlrichGermann @@ -5093,8 +5093,8 @@ The Competitiveness Analysis of the <fixed-case>E</fixed-case>uropean Language Technology Market - AndrejsVasiļjevs - IngunaSkadiņa + AndrejsVasiļjevs + IngunaSkadiņa IndraSamite KasparsKauliņš ĒriksAjausks @@ -5109,7 +5109,7 @@ Constructing a Bilingual <fixed-case>H</fixed-case>adith Corpus Using a Segmentation Tool ShathaAltammami - EricAtwell + EricAtwell AmmarAlsalka 3390–3398 This article describes the process of gathering and constructing a bilingual parallel corpus of Islamic Hadith, which is the set of narratives reporting different aspects of the prophet Muhammad’s life. The corpus data is gathered from the six canonical Hadith collections using a custom segmentation tool that automatically segments and annotates the two Hadith components with 92% accuracy. This Hadith segmenter minimises the costs of language resource creation and produces consistent results independently from previous knowledge and experiences that usually influence human annotators. The corpus includes more than 10M tokens and will be freely available via the LREC repository. @@ -5133,7 +5133,7 @@ Franciskade Jong BenteMaegaard DarjaFišer - Dietervan Uytvanck + Dietervan Uytvanck AndreasWitt 3406–3413 CLARIN is a European Research Infrastructure providing access to language resources and technologies for researchers in the humanities and social sciences. It supports the use and study of language data in general and aims to increase the potential for comparative research of cultural and societal phenomena across the boundaries of languages and disciplines, all in line with the European agenda for Open Science. Data infrastructures such as CLARIN have recently embarked on the emerging frameworks for the federation of infrastructural services, such as the European Open Science Cloud and the integration of services resulting from multidisciplinary collaboration in federated services for the wider SSH domain. In this paper we describe the interoperability requirements that arise through the existing ambitions and the emerging frameworks. The interoperability theme will be addressed at several levels, including organisation and ecosystem, design of workflow services, data curation, performance measurement and collaboration. @@ -5145,9 +5145,9 @@ Language Technology Programme for <fixed-case>I</fixed-case>celandic 2019-2023 AnnaNikulásdóttir JónGuðnason - Anton KarlIngason + Anton KarlIngason HrafnLoftsson - EiríkurRögnvaldsson + EiríkurRögnvaldsson Einar FreyrSigurðsson SteinþórSteingrímsson 3414–3422 @@ -5168,20 +5168,20 @@ Making Metadata Fit for Next Generation Language Technology Platforms: The Metadata Schema of the <fixed-case>E</fixed-case>uropean Language Grid - PennyLabropoulou + PennyLabropoulou KaterinaGkirtzou MariaGavriilidou MiltosDeligiannis - DimitrisGalanis + DimitrisGalanis SteliosPiperidis GeorgRehm - MariaBerger - ValérieMapelli + MariaBerger + ValérieMapelli MichaelRigault VictoriaArranz KhalidChoukri GerhardBackfried - José ManuelGómez-Pérez + José ManuelGómez-Pérez AndresGarcia-Silva 3428–3437 The current scientific and technological landscape is characterised by the increasing availability of data resources and processing tools and services. In this setting, metadata have emerged as a key factor facilitating management, sharing and usage of such digital assets. In this paper we present ELG-SHARE, a rich metadata schema catering for the description of Language Resources and Technologies (processing and generation services and tools, models, corpora, term lists, etc.), as well as related entities (e.g., organizations, projects, supporting documents, etc.). The schema powers the European Language Grid platform that aims to be the primary hub and marketplace for industry-relevant Language Technology in Europe. ELG-SHARE has been based on various metadata schemas, vocabularies, and ontologies, as well as related recommendations and guidelines. @@ -5193,7 +5193,7 @@ Related Works in the <fixed-case>L</fixed-case>inguistic <fixed-case>D</fixed-case>ata <fixed-case>C</fixed-case>onsortium Catalog DanielJaquette ChristopherCieri - DeniseDiPersio + DeniseDiPersio 3438–3442 Defining relations between language resources provides an archive with the ability to better serve its users. This paper covers the development and implementation of a Related Works addition to the Linguistic Data Consortium’s (LDC) catalog. The authors go step-by-step through the development of the Related Works schema, implementation of the software and database changes, and data entry of the relations. The Related Work schema involved developing of a set of controlled terms for relations based on previous work and other schema. Software and database changes consisted of both front and back end interface additions, along with modification and additions to the LDC Catalog database tables. Data entry consisted of two parts: seed data from previous work and 2019 language resources, and ongoing legacy population. Previous work in this area is discussed as well as overview information about the LDC Catalog. A list of the full LDC Related Works terms is included with brief explanations. 2020.lrec-1.421 @@ -5204,7 +5204,7 @@ Language Data Sharing in <fixed-case>E</fixed-case>uropean Public Services – Overcoming Obstacles and Creating Sustainable Data Sharing Infrastructures LilliSmal AndreaLösch - Josefvan Genabith + Josefvan Genabith MariaGiagkou ThierryDeclerck StephanBusemann @@ -5218,10 +5218,10 @@ A Progress Report on Activities at the <fixed-case>L</fixed-case>inguistic <fixed-case>D</fixed-case>ata <fixed-case>C</fixed-case>onsortium Benefitting the <fixed-case>LREC</fixed-case> Community ChristopherCieri JamesFiumara - StephanieStrassel + StephanieStrassel JonathanWright - DeniseDiPersio - MarkLiberman + DeniseDiPersio + MarkLiberman 3449–3456 This latest in a series of Linguistic Data Consortium (LDC) progress reports to the LREC community does not describe any single language resource, evaluation campaign or technology but sketches the activities, since the last report, of a data center devoted to supporting the work of LREC attendees among other research communities. Specifically, we describe 96 new corpora released in 2018-2020 to date, a new technology evaluation campaign, ongoing activities to support multiple common task human language technology programs, and innovations to advance the methodology of language data collection and annotation. 2020.lrec-1.423 @@ -5282,7 +5282,7 @@ Namoos HayatQasmi Haris BinZia AwaisAthar - Agha AliRaza + Agha AliRaza 3484–3489 This paper presents the first attempt at Automatic Text Simplification (ATS) for Urdu, the language of 170 million people worldwide. Being a low-resource language in terms of standard linguistic resources, recent text simplification approaches that rely on manually crafted simplified corpora or lexicons such as WordNet are not applicable to Urdu. Urdu is a morphologically rich language that requires unique considerations such as proper handling of inflectional case and honorifics. We present an unsupervised method for lexical simplification of complex Urdu text. Our method only requires plain Urdu text and makes use of word embeddings together with a set of morphological features to generate simplifications. Our system achieves a BLEU score of 80.15 and SARI score of 42.02 upon automatic evaluation on manually crafted simplified corpora. We also report results for human evaluations for correctness, grammaticality, meaning-preservation and simplicity of the output. Our code and corpus are publicly available to make our results reproducible. 2020.lrec-1.428 @@ -5292,7 +5292,7 @@ Jamo Pair Encoding: Subcharacter Representation-based Extreme <fixed-case>K</fixed-case>orean Vocabulary Compression for Efficient Subword Tokenization SangwhanMoon - NaoakiOkazaki + NaoakiOkazaki 3490–3497 In the context of multilingual language model pre-training, vocabulary size for languages with a broad set of potential characters is an unsolved problem. We propose two algorithms applicable in any unsupervised multilingual pre-training task, increasing the elasticity of budget required for building the vocabulary in Byte-Pair Encoding inspired tokenizers, significantly reducing the cost of supporting Korean in a multilingual model. 2020.lrec-1.429 @@ -5302,7 +5302,7 @@ Offensive Language and Hate Speech Detection for <fixed-case>D</fixed-case>anish Gudbjartur IngiSigurbergsson - LeonDerczynski + LeonDerczynski 3498–3508 The presence of offensive language on social media platforms and the implications this poses is becoming a major concern in modern society. Given the enormous amount of content created every day, automatic methods are required to detect and deal with this type of content. Until now, most of the research has focused on solving the problem for the English language, while the problem is multilingual. We construct a Danish dataset DKhate containing user-generated comments from various social media platforms, and to our knowledge, the first of its kind, annotated for various types and target of offensive language. We develop four automatic classification systems, each designed to work for both the English and the Danish language. In the detection of offensive language in English, the best performing system achieves a macro averaged F1-score of 0.74, and the best performing system for Danish achieves a macro averaged F1-score of 0.70. In the detection of whether or not an offensive post is targeted, the best performing system for English achieves a macro averaged F1-score of 0.62, while the best performing system for Danish achieves a macro averaged F1-score of 0.73. Finally, in the detection of the target type in a targeted offensive post, the best performing system for English achieves a macro averaged F1-score of 0.56, and the best performing system for Danish achieves a macro averaged F1-score of 0.63. Our work for both the English and the Danish language captures the type and targets of offensive language, and present automatic methods for detecting different kinds of offensive language such as hate speech and cyberbullying. 2020.lrec-1.430 @@ -5312,7 +5312,7 @@ Semi-supervised Deep Embedded Clustering with Anomaly Detection for Semantic Frame Induction Zheng XinYong - Tiago TimponiTorrent + Tiago TimponiTorrent 3509–3519 Although FrameNet is recognized as one of the most fine-grained lexical databases, its coverage of lexical units is still limited. To tackle this issue, we propose a two-step frame induction process: for a set of lexical units not yet present in Berkeley FrameNet data release 1.7, first remove those that cannot fit into any existing semantic frame in FrameNet; then, assign the remaining lexical units to their correct frames. We also present the Semi-supervised Deep Embedded Clustering with Anomaly Detection (SDEC-AD) model—an algorithm that maps high-dimensional contextualized vector representations of lexical units to a low-dimensional latent space for better frame prediction and uses reconstruction error to identify lexical units that cannot evoke frames in FrameNet. SDEC-AD outperforms the state-of-the-art methods in both steps of the frame induction process. Empirical results also show that definitions provide contextual information for representing and characterizing the frame membership of lexical units. 2020.lrec-1.431 @@ -5323,7 +5323,7 @@ Search Query Language Identification Using Weak Labeling RitizTambi AjinkyaKale - Tracy HollowayKing + Tracy HollowayKing 3520–3527 Language identification is a well-known task for natural language documents. In this paper we explore search query language identification which is usually the first task before any other query understanding. Without loss of generalization, we run our experiments on the Adobe Stock search engine. Even though the domain is relatively generic because Adobe Stock queries cover a broad range of objects and concepts, out-of-the-box language identifiers do not perform well due to the extremely short text found in queries. Unlike other well-studied supervised approaches for this task, we examine a practical approach for the cold start problem for automatically getting large-scale query-language pairs for training. We describe the process of creating weak-labeled training data and then human-annotated evaluation data for the search query language identification task. The effectiveness of this technique is demonstrated by training a gradient boosting model for language classification given a query. We out-perform the open domain text model baselines by a large margin. 2020.lrec-1.432 @@ -5333,9 +5333,9 @@ Automated Phonological Transcription of <fixed-case>A</fixed-case>kkadian Cuneiform Text AleksiSahala - MiikkaSilfverberg + MiikkaSilfverberg AnttiArppe - KristerLindén + KristerLindén 3528–3534 Akkadian was an East-Semitic language spoken in ancient Mesopotamia. The language is attested on hundreds of thousands of cuneiform clay tablets. Several Akkadian text corpora contain only the transliterated text. In this paper, we investigate automated phonological transcription of the transliterated corpora. The phonological transcription provides a linguistically appealing form to represent Akkadian, because the transcription is normalized according to the grammatical description of a given dialect and explicitly shows the Akkadian renderings for Sumerian logograms. Because cuneiform text does not mark the inflection for logograms, the inflected form needs to be inferred from the sentence context. To the best of our knowledge, this is the first documented attempt to automatically transcribe Akkadian. Using a context-aware neural network model, we are able to automatically transcribe syllabic tokens at near human performance with 96% recall @ 3, while the logogram transcription remains more challenging at 82% recall @ 3. 2020.lrec-1.433 @@ -5344,8 +5344,8 @@ <fixed-case>COSTRA</fixed-case> 1.0: A Dataset of Complex Sentence Transformations - PetraBarancikova - OndřejBojar + PetraBarancikova + OndřejBojar 3535–3541 We present COSTRA 1.0, a dataset of complex sentence transformations. The dataset is intended for the study of sentence-level embeddings beyond simple word alternations or standard paraphrasing. This first version of the dataset is limited to sentences in Czech but the construction method is universal and we plan to use it also for other languages. The dataset consist of 4,262 unique sentences with average length of 10 words, illustrating 15 types of modifications such as simplification, generalization, or formal and informal language variation. The hope is that with this dataset, we should be able to test semantic properties of sentence embeddings and perhaps even to find some topologically interesting “skeleton” in the sentence embedding space. A preliminary analysis using LASER, multi-purpose multi-lingual sentence embeddings suggests that the LASER space does not exhibit the desired properties. 2020.lrec-1.434 @@ -5432,7 +5432,7 @@ <fixed-case>SEDAR</fixed-case>: a Large Scale <fixed-case>F</fixed-case>rench-<fixed-case>E</fixed-case>nglish Financial Domain Parallel Corpus AbbasGhaddar - PhillippeLanglais + PhillippeLanglais 3595–3602 This paper describes the acquisition, preprocessing and characteristics of SEDAR, a large scale English-French parallel corpus for the financial domain. Our extensive experiments on machine translation show that SEDAR is essential to obtain good performance on finance. We observe a large gain in the performance of machine translation systems trained on SEDAR when tested on finance, which makes SEDAR suitable to study domain adaptation for neural machine translation. The first release of the corpus comprises 8.6 million high quality sentence pairs that are publicly available for research at https://github.com/autorite/sedar-bitext. 2020.lrec-1.442 @@ -5490,7 +5490,7 @@ ShoShimazu ShoTakase ToshiakiNakazawa - NaoakiOkazaki + NaoakiOkazaki 3630–3634 In natural language, we often omit some words that are easily understandable from the context. In particular, pronouns of subject, object, and possessive cases are often omitted in Japanese; these are known as zero pronouns. In translation from Japanese to other languages, we need to find a correct antecedent for each zero pronoun to generate a correct and coherent translation. However, it is difficult for conventional automatic evaluation metrics (e.g., BLEU) to focus on the success of zero pronoun resolution. Therefore, we present a hand-crafted dataset to evaluate whether translation models can resolve the zero pronoun problems in Japanese to English translations. We manually and statistically validate that our dataset can effectively evaluate the correctness of the antecedents selected in translations. Through the translation experiments using our dataset, we reveal shortcomings of an existing context-aware neural machine translation model. 2020.lrec-1.447 @@ -5549,7 +5549,7 @@ An Evaluation Benchmark for Testing the Word Sense Disambiguation Capabilities of Machine Translation Systems AlessandroRaganato YvesScherrer - JörgTiedemann + JörgTiedemann 3668–3675 Lexical ambiguity is one of the many challenging linguistic phenomena involved in translation, i.e., translating an ambiguous word with its correct sense. In this respect, previous work has shown that the translation quality of neural machine translation systems can be improved by explicitly modeling the senses of ambiguous words. Recently, several evaluation test sets have been proposed to measure the word sense disambiguation (WSD) capability of machine translation systems. However, to date, these evaluation test sets do not include any training data that would provide a fair setup measuring the sense distributions present within the training data itself. In this paper, we present an evaluation benchmark on WSD for machine translation for 10 language pairs, comprising training data with known sense distributions. Our approach for the construction of the benchmark builds upon the wide-coverage multilingual sense inventory of BabelNet, the multilingual neural parsing pipeline TurkuNLP, and the OPUS collection of translated texts from the web. The test suite is available at http://github.com/Helsinki-NLP/MuCoW. 2020.lrec-1.452 @@ -5558,8 +5558,8 @@ <fixed-case>MEDLINE</fixed-case> as a Parallel Corpus: a Survey to Gain Insight on <fixed-case>F</fixed-case>rench-, <fixed-case>S</fixed-case>panish- and <fixed-case>P</fixed-case>ortuguese-speaking Authors’ Abstract Writing Practice - AurélieNévéol - AntonioJimeno Yepes + AurélieNévéol + AntonioJimeno Yepes MarianaNeves 3676–3682 Background: Parallel corpora are used to train and evaluate machine translation systems. To alleviate the cost of producing parallel resources for evaluation campaigns, existing corpora are leveraged. However, little information may be available about the methods used for producing the corpus, including translation direction. Objective: To gain insight on MEDLINE parallel corpus used in the biomedical task at the Workshop on Machine Translation in 2019 (WMT 2019). Material and Methods: Contact information for the authors of MEDLINE articles included in the English/Spanish (EN/ES), English/French (EN/FR), and English/Portuguese (EN/PT) WMT 2019 test sets was obtained from PubMed and publisher websites. The authors were asked about their abstract writing practices in a survey. Results: The response rate was above 20%. Authors reported that they are mainly native speakers of languages other than English. Although manual translation, sometimes via professional translation services, was commonly used for abstract translation, authors of articles in the EN/ES and EN/PT sets also relied on post-edited machine translation. Discussion: This study provides a characterization of MEDLINE authors’ language skills and abstract writing practices. Conclusion: The information collected in this study will be used to inform test set design for the next WMT biomedical task. @@ -5570,7 +5570,7 @@ <fixed-case>JASS</fixed-case>: <fixed-case>J</fixed-case>apanese-specific Sequence to Sequence Pre-training for Neural Machine Translation ZhuoyuanMao - FabienCromieres + FabienCromieres RajDabre HaiyueSong SadaoKurohashi @@ -5601,7 +5601,7 @@ Linguistically Informed <fixed-case>H</fixed-case>indi-<fixed-case>E</fixed-case>nglish Neural Machine Translation VikrantGoyal PruthwikMishra - Dipti MisraSharma + Dipti MisraSharma 3698–3703 Hindi-English Machine Translation is a challenging problem, owing to multiple factors including the morphological complexity and relatively free word order of Hindi, in addition to the lack of sufficient parallel training data. Neural Machine Translation (NMT) is a rapidly advancing MT paradigm and has shown promising results for many language pairs, especially in large training data scenarios. To overcome the data sparsity issue caused by the lack of large parallel corpora for Hindi-English, we propose a method to employ additional linguistic knowledge which is encoded by different phenomena depicted by Hindi. We generalize the embedding layer of the state-of-the-art Transformer model to incorporate linguistic features like POS tag, lemma and morph features to improve the translation performance. We compare the results obtained on incorporating this knowledge with the baseline systems and demonstrate significant performance improvements. Although, the Transformer NMT models have a strong efficacy to learn language constructs, we show that the usage of specific features further help in improving the translation performance. 2020.lrec-1.456 @@ -5622,7 +5622,7 @@ An Analysis of Massively Multilingual Neural Machine Translation for Low-Resource Languages AaronMueller GarrettNicolai - Arya D.McCarthy + Arya D.McCarthy DylanLewis WinstonWu DavidYarowsky @@ -5646,7 +5646,7 @@ <fixed-case>M</fixed-case>u<fixed-case>ST</fixed-case>-Cinema: a Speech-to-Subtitles corpus AlinaKarakanta - MatteoNegri + MatteoNegri MarcoTurchi 3727–3734 Growing needs in localising audiovisual content in multiple languages through subtitles call for the development of automatic solutions for human subtitling. Neural Machine Translation (NMT) can contribute to the automatisation of subtitling, facilitating the work of human subtitlers and reducing turn-around times and related costs. NMT requires high-quality, large, task-specific training data. The existing subtitling corpora, however, are missing both alignments to the source language audio and important information about subtitle breaks. This poses a significant limitation for developing efficient automatic approaches for subtitling, since the length and form of a subtitle directly depends on the duration of the utterance. In this work, we present MuST-Cinema, a multilingual speech translation corpus built from TED subtitles. The corpus is comprised of (audio, transcription, translation) triplets. Subtitle breaks are preserved by inserting special symbols. We show that the corpus can be used to build models that efficiently segment sentences into subtitles and propose a method for annotating existing subtitling corpora with subtitle breaks, conforming to the constraint of length. @@ -5657,7 +5657,7 @@ On Context Span Needed for Machine Translation Evaluation SheilaCastilho - MajaPopović + MajaPopović AndyWay 3735–3742 Despite increasing efforts to improve evaluation of machine translation (MT) by going beyond the sentence level to the document level, the definition of what exactly constitutes a “document level” is still not clear. This work deals with the context span necessary for a more reliable MT evaluation. We report results from a series of surveys involving three domains and 18 target languages designed to identify the necessary context span as well as issues related to it. Our findings indicate that, despite the fact that some issues and spans are strongly dependent on domain and on the target language, a number of common patterns can be observed so that general guidelines for context-aware MT evaluation can be drawn. @@ -5689,7 +5689,7 @@ The <fixed-case>MARCELL</fixed-case> Legislative Corpus - TamásVáradi + TamásVáradi SvetlaKoeva MartinYamalov MarkoTadić @@ -5697,13 +5697,13 @@ BartłomiejNitoń MaciejOgrodniczuk PiotrPęzik - VerginicaBarbu Mititelu + VerginicaBarbu Mititelu RaduIon ElenaIrimia MariaMitrofan VasilePăiș - DanTufiș - RadovanGarabík + DanTufiș + RadovanGarabík SimonKrek AndrazRepar MatjažRihtar @@ -5741,7 +5741,7 @@ MikkoAulamo UmutSulubacak SamiVirpioja - JörgTiedemann + JörgTiedemann 3782–3789 This paper introduces OpusTools, a package for downloading and processing parallel corpora included in the OPUS corpus collection. The package implements tools for accessing compressed data in their archived release format and make it possible to easily convert between common formats. OpusTools also includes tools for language identification and data filtering as well as tools for importing data from various sources into the OPUS format. We show the use of these tools in parallel corpus creation and data diagnostics. The latter is especially useful for the identification of potential problems and errors in the extensive data set. Using these tools, we can now monitor the validity of data sets and improve the overall quality and consitency of the data collection. 2020.lrec-1.467 @@ -5771,7 +5771,7 @@ The <fixed-case>FISKMÖ</fixed-case> Project: Resources and Tools for <fixed-case>F</fixed-case>innish-<fixed-case>S</fixed-case>wedish Machine Translation and Cross-Linguistic Research - JörgTiedemann + JörgTiedemann TommiNieminen MikkoAulamo JennaKanerva @@ -5808,7 +5808,7 @@ Finite State Machine Pattern-Root <fixed-case>A</fixed-case>rabic Morphological Generator, Analyzer and Diacritizer MahaAlkhairy AfshanJafri - DavidSmith + DavidSmith 3834–3841 We describe and evaluate the Finite-State Arabic Morphologizer (FSAM) – a concatenative (prefix-stem-suffix) and templatic (root- pattern) morphologizer that generates and analyzes undiacritized Modern Standard Arabic (MSA) words, and diacritizes them. Our bidirectional unified-architecture finite state machine (FSM) is based on morphotactic MSA grammatical rules. The FSM models the root-pattern structure related to semantics and syntax, making it readily scalable unlike stem-tabulations in prevailing systems. We evaluate the coverage and accuracy of our model, with coverage being percentage of words in Tashkeela (a large corpus) that can be analyzed. Accuracy is computed against a gold standard, comprising words and properties, created from the intersection of UD PADT treebank and Tashkeela. Coverage of analysis (extraction of root and properties from word) is 82%. Accuracy results are: root computed from a word (92%), word generation from a root (100%), non-root properties of a word (97%), and diacritization (84%). FSAM’s non-root results match or surpass MADAMIRA’s, and root result comparisons are not made because of the concatenative nature of publicly available morphologizers. 2020.lrec-1.473 @@ -5818,9 +5818,9 @@ An Unsupervised Method for Weighting Finite-state Morphological Analyzers AmrKeleg - FrancisTyers + FrancisTyers NickHowell - TommiPirinen + TommiPirinen 3842–3850 Morphological analysis is one of the tasks that have been studied for years. Different techniques have been used to develop models for performing morphological analysis. Models based on finite state transducers have proved to be more suitable for languages with low available resources. In this paper, we have developed a method for weighting a morphological analyzer built using finite state transducers in order to disambiguate its results. The method is based on a word2vec model that is trained in a completely unsupervised way using raw untagged corpora and is able to capture the semantic meaning of the words. Most of the methods used for disambiguating the results of a morphological analyzer relied on having tagged corpora that need to manually built. Additionally, the method developed uses information about the token irrespective of its context unlike most of the other techniques that heavily rely on the word’s context to disambiguate its set of candidate analyses. 2020.lrec-1.474 @@ -5842,7 +5842,7 @@ A Supervised Part-Of-Speech Tagger for the <fixed-case>G</fixed-case>reek Language of the Social Web Maria NefeliNikiforos - Katia LidaKermanidis + Katia LidaKermanidis 3861–3867 The increasing volume of communication via microblogging messages on social networks has created the need for efficient Natural Language Processing (NLP) tools, especially for unstructured text processing. Extracting information from unstructured social text is one of the most demanding NLP tasks. This paper presents the first part-of-speech tagged data set of social text in Greek, as well as the first supervised part-of-speech tagger developed for such data sets. 2020.lrec-1.476 @@ -5865,7 +5865,7 @@ NabilHathout FranckSajous BasilioCalderone - FiammettaNamer + FiammettaNamer 3877–3885 Glawinette is a derivational lexicon of French that will be used to feed the Démonette database. It has been created from the GLAWI machine readable dictionary. We collected couples of words from the definitions and the morphological sections of the dictionary and then selected the ones that form regular formal analogies and that instantiate frequent enough formal patterns. The graph structure of the morphological families has then been used to identify for each couple of lexemes derivational patterns that are close to the intuition of the morphologists. 2020.lrec-1.478 @@ -5875,9 +5875,9 @@ <fixed-case>B</fixed-case>aby<fixed-case>FST</fixed-case> - Towards a Finite-State Based Computational Model of Ancient <fixed-case>B</fixed-case>abylonian AleksiSahala - MiikkaSilfverberg + MiikkaSilfverberg AnttiArppe - KristerLindén + KristerLindén 3886–3894 Akkadian is a fairly well resourced extinct language that does not yet have a comprehensive morphological analyzer available. In this paper we describe a general finite-state based morphological model for Babylonian, a southern dialect of the Akkadian language, that can achieve a coverage up to 97.3% and recall up to 93.7% on lemmatization and POS-tagging task on token level from a transcribed input. Since Akkadian word forms exhibit a high degree of morphological ambiguity, in that only 20.1% of running word tokens receive a single unambiguous analysis, we attempt a first pass at weighting our finite-state transducer, using existing extensive Akkadian corpora which have been partially validated for their lemmas and parts-of-speech but not the entire morphological analyses. The resultant weighted finite-state transducer yields a moderate improvement so that for 57.4% of the word tokens the highest ranked analysis is the correct one. We conclude with a short discussion on how morphological ambiguity in the analysis of Akkadian could be further reduced with improvements in the training data used in weighting the finite-state transducer as well as through other, context-based techniques. 2020.lrec-1.479 @@ -5898,7 +5898,7 @@ Wikinflection Corpus: A (Better) Multilingual, Morpheme-Annotated Inflectional Corpus EleniMetheniti - GuenterNeumann + GuenterNeumann 3905–3912 Multilingual, inflectional corpora are a scarce resource in the NLP community, especially corpora with annotated morpheme boundaries. We are evaluating a generated, multilingual inflectional corpus with morpheme boundaries, generated from the English Wiktionary (Metheniti and Neumann, 2018), against the largest, multilingual, high-quality inflectional corpus of the UniMorph project (Kirov et al., 2018). We confirm that the generated Wikinflection corpus is not of such quality as UniMorph, but we were able to extract a significant amount of words from the intersection of the two corpora. Our Wikinflection corpus benefits from the morpheme segmentations of Wiktionary/Wikinflection and from the manually-evaluated morphological feature tags of the UniMorph project, and has 216K lemmas and 5.4M word forms, in a total of 68 languages. 2020.lrec-1.481 @@ -5919,16 +5919,16 @@ <fixed-case>U</fixed-case>ni<fixed-case>M</fixed-case>orph 3.0: <fixed-case>U</fixed-case>niversal <fixed-case>M</fixed-case>orphology - Arya D.McCarthy + Arya D.McCarthy ChristoKirov MatteoGrella AmritNidhi PatrickXia KyleGorman EkaterinaVylomova - Sabrina J.Mielke + Sabrina J.Mielke GarrettNicolai - MiikkaSilfverberg + MiikkaSilfverberg TimofeyArkhangelskiy NatalyKrizhanovsky AndrewKrizhanovsky @@ -5937,7 +5937,7 @@ JohnMansfield ValtsErnštreits YuvalPinter - Cassandra L.Jacobs + Cassandra L.Jacobs RyanCotterell MansHulden DavidYarowsky @@ -5994,7 +5994,7 @@ Fine-grained Morphosyntactic Analysis and Generation Tools for More Than One Thousand Languages GarrettNicolai DylanLewis - Arya D.McCarthy + Arya D.McCarthy AaronMueller WinstonWu DavidYarowsky @@ -6010,7 +6010,7 @@ InjyHamed SlimAbdennadher Ngoc ThangVu - ÖzlemÇetinoğlu + ÖzlemÇetinoğlu 3973–3977 Code-switching has become a prevalent phenomenon across many communities. It poses a challenge to NLP researchers, mainly due to the lack of available data needed for training and testing applications. In this paper, we introduce a new resource: a corpus of Egyptian- Arabic code-switch speech data that is fully tokenized, lemmatized and annotated for part-of-speech tags. Beside the corpus itself, we provide annotation guidelines to address the unique challenges of annotating code-switch data. Another challenge that we address is the fact that Egyptian Arabic orthography and grammar are not standardized. 2020.lrec-1.489 @@ -6052,11 +6052,11 @@ Morphological Segmentation for Low Resource Languages JustinMott AnnBies - StephanieStrassel + StephanieStrassel JordanKodner CaitlinRichter HongzhiXu - MitchellMarcus + MitchellMarcus 3996–4002 This paper describes a new morphology resource created by Linguistic Data Consortium and the University of Pennsylvania for the DARPA LORELEI Program. The data consists of approximately 2000 tokens annotated for morphological segmentation in each of 9 low resource languages, along with root information for 7 of the languages. The languages annotated show a broad diversity of typological features. A minimal annotation scheme for segmentation was developed such that it could capture the patterns of a wide range of languages and also be performed reliably by non-linguist annotators. The basic annotation guidelines were designed to be language-independent, but included language-specific morphological paradigms and other specifications. The resulting annotated corpus is designed to support and stimulate the development of unsupervised morphological segmenters and analyzers by providing a gold standard for their evaluation on a more typologically diverse set of languages than has previously been available. By providing root annotation, this corpus is also a step toward supporting research in identifying richer morphological structures than simple morpheme boundaries. 2020.lrec-1.493 @@ -6069,9 +6069,9 @@ Marie-AnneLachaux AlexisConneau VishravChaudhary - FranciscoGuzmán + FranciscoGuzmán ArmandJoulin - EdouardGrave + EdouardGrave 4003–4012 Pre-training text representations have led to significant improvements in many areas of natural language processing. The quality of these models benefits greatly from the size of the pretraining corpora as long as its quality is preserved. In this paper, we describe an automatic pipeline to extract massive high-quality monolingual datasets from Common Crawl for a variety of languages. Our pipeline follows the data processing introduced in fastText (Mikolov et al., 2017; Grave et al., 2018), that deduplicates documents and identifies their language. We augment this pipeline with a filtering step to select documents that are close to high quality corpora like Wikipedia. 2020.lrec-1.494 @@ -6080,9 +6080,9 @@ On the Robustness of Unsupervised and Semi-supervised Cross-lingual Word Embedding Learning - YeraiDoval - JoseCamacho-Collados - LuisEspinosa Anke + YeraiDoval + JoseCamacho-Collados + LuisEspinosa Anke StevenSchockaert 4013–4023 Cross-lingual word embeddings are vector representations of words in different languages where words with similar meaning are represented by similar vectors, regardless of the language. Recent developments which construct these embeddings by aligning monolingual spaces have shown that accurate alignments can be obtained with little or no supervision, which usually comes in the form of bilingual dictionaries. However, the focus has been on a particular controlled scenario for evaluation, and there is no strong evidence on how current state-of-the-art systems would fare with noisy text or for language pairs with major linguistic differences. In this paper we present an extensive evaluation over multiple cross-lingual embedding models, analyzing their strengths and limitations with respect to different variables such as target language, training corpora and amount of supervision. Our conclusions put in doubt the view that high-quality cross-lingual embeddings can always be learned without much supervision. @@ -6106,14 +6106,14 @@ <fixed-case>U</fixed-case>niversal <fixed-case>D</fixed-case>ependencies v2: An Evergrowing Multilingual Treebank Collection JoakimNivre - Marie-Catherinede Marneffe + Marie-Catherinede Marneffe FilipGinter - JanHajič - Christopher D.Manning + JanHajič + Christopher D.Manning SampoPyysalo SebastianSchuster - FrancisTyers - DanielZeman + FrancisTyers + DanielZeman 4034–4043 Universal Dependencies is an open community effort to create cross-linguistically consistent treebank annotation for many languages within a dependency-based lexicalist framework. The annotation consists in a linguistically motivated word segmentation; a morphological layer comprising lemmas, universal part-of-speech tags, and standardized morphological features; and a syntactic layer focusing on syntactic relations between predicates, arguments and modifiers. In this paper, we describe version 2 of the universal guidelines (UD v2), discuss the major changes from UD v1 to UD v2, and give an overview of the currently available treebanks for 90 languages. 2020.lrec-1.497 @@ -6123,7 +6123,7 @@ <fixed-case>EMPAC</fixed-case>: an <fixed-case>E</fixed-case>nglish–<fixed-case>S</fixed-case>panish Corpus of Institutional Subtitles IrisSerrat Roozen - José ManuelMartínez Martínez + José ManuelMartínez Martínez 4044–4053 The EuroparlTV Multimedia Parallel Corpus (EMPAC) is a collection of subtitles in English and Spanish for videos from the EuropeanParliament’s Multimedia Centre. The corpus has been compiled with the EMPAC toolkit. The aim of this corpus is to provide a resource to study institutional subtitling on the one hand, and, on the other hand, facilitate the analysis of web accessibility to institutional multimedia content. The corpus covers a time span from 2009 to 2017, it is made up of 4,000 texts amounting to two and half millions of tokens for every language, corresponding to approximately 280 hours of video. This paper provides 1) a review of related corpora; 2) a revision of typical compilation methodologies of subtitle corpora; 3) a detailed account of the corpus compilation methodology followed; and, 4) a description of the corpus. In the conclusion, the key findings are summarised regarding formal aspects of the subtitles conditioning the accessibility to the multimedia content of the EuroparlTV. 2020.lrec-1.498 @@ -6133,7 +6133,7 @@ Cross-Lingual Word Embeddings for <fixed-case>T</fixed-case>urkic Languages ElmurodKuriyozov - YeraiDoval + YeraiDoval CarlosGómez-Rodríguez 4054–4062 There has been an increasing interest in learning cross-lingual word embeddings to transfer knowledge obtained from a resource-rich language, such as English, to lower-resource languages for which annotated data is scarce, such as Turkish, Russian, and many others. In this paper, we present the first viability study of established techniques to align monolingual embedding spaces for Turkish, Uzbek, Azeri, Kazakh and Kyrgyz, members of the Turkic family which is heavily affected by the low-resource constraint. Those techniques are known to require little explicit supervision, mainly in the form of bilingual dictionaries, hence being easily adaptable to different domains, including low-resource ones. We obtain new bilingual dictionaries and new word embeddings for these languages and show the steps for obtaining cross-lingual word embeddings using state-of-the-art techniques. Then, we evaluate the results using the bilingual dictionary induction task. Our experiments confirm that the obtained bilingual dictionaries outperform previously-available ones, and that word embeddings from a low-resource language can benefit from resource-rich closely-related languages when they are aligned together. Furthermore, evaluation on an extrinsic task (Sentiment analysis on Uzbek) proves that monolingual word embeddings can, although slightly, benefit from cross-lingual alignments. @@ -6166,7 +6166,7 @@ <fixed-case>G</fixed-case>e<fixed-case>B</fixed-case>io<fixed-case>T</fixed-case>oolkit: Automatic Extraction of Gender-Balanced Multilingual Corpus of <fixed-case>W</fixed-case>ikipedia Biographies - Marta R.Costa-jussà + Marta R.Costa-jussà PauLi Lin CristinaEspaña-Bonet 4081–4088 @@ -6271,8 +6271,8 @@ Analysis of <fixed-case>G</fixed-case>lobal<fixed-case>P</fixed-case>hone and <fixed-case>E</fixed-case>thiopian Languages Speech Corpora for Multilingual <fixed-case>ASR</fixed-case> - Martha YifiruTachbelie - Solomon TeferraAbate + Martha YifiruTachbelie + Solomon TeferraAbate TanjaSchultz 4152–4156 In this paper, we present the analysis of GlobalPhone (GP) and speech corpora of Ethiopian languages (Amharic, Tigrigna, Oromo and Wolaytta). The aim of the analysis is to select speech data from GP for the development of multilingual Automatic Speech Recognition (ASR) system for the Ethiopian languages. To this end, phonetic overlaps among GP and Ethiopian languages have been analyzed. The result of our analysis shows that there is much phonetic overlap among Ethiopian languages although they are from three different language families. From GP, Turkish, Uyghur and Croatian are found to have much overlap with the Ethiopian languages. On the other hand, Korean has less phonetic overlap with the rest of the languages. Moreover, morphological complexity of the GP and Ethiopian languages, reflected by type to token ration (TTR) and out of vocabulary (OOV) rate, has been analyzed. Both metrics indicated the morphological complexity of the languages. Korean and Amharic have been identified as extremely morphologically complex compared to the other languages. Tigrigna, Russian, Turkish, Polish, etc. are also among the morphologically complex languages. @@ -6292,8 +6292,8 @@ Large Vocabulary Read Speech Corpora for Four <fixed-case>E</fixed-case>thiopian Languages: <fixed-case>A</fixed-case>mharic, <fixed-case>T</fixed-case>igrigna, <fixed-case>O</fixed-case>romo and <fixed-case>W</fixed-case>olaytta - Solomon TeferraAbate - Martha YifiruTachbelie + Solomon TeferraAbate + Martha YifiruTachbelie MichaelMelese HafteAbera TewodrosAbebe @@ -6301,7 +6301,7 @@ YaregalAssabie MillionMeshesha SolomonAfnafu - Binyam EphremSeyoum + Binyam EphremSeyoum 4167–4171 Automatic Speech Recognition (ASR) is one of the most important technologies to support spoken communication in modern life. However, its development benefits from large speech corpus. The development of such a corpus is expensive and most of the human languages, including the Ethiopian languages, do not have such resources. To address this problem, we have developed four large (about 22 hours) speech corpora for four Ethiopian languages: Amharic, Tigrigna, Oromo and Wolaytta. To assess usability of the corpora for (the purpose of) speech processing, we have developed ASR systems for each language. In this paper, we present the corpora and the baseline ASR systems we have developed. We have achieved word error rates (WERs) of 37.65%, 31.03%, 38.02%, 33.89% for Amharic, Tigrigna, Oromo and Wolaytta, respectively. This results show that the corpora are suitable for further investigation towards the development of ASR systems. Thus, the research community can use the corpora to further improve speech processing systems. From our results, it is clear that the collection of text corpora to train strong language models for all of the languages is still required, especially for Oromo and Wolaytta. 2020.lrec-1.513 @@ -6312,7 +6312,7 @@ Incorporating Politeness across Languages in Customer Care Responses: Towards building a Multi-lingual Empathetic Dialogue Agent MauajamaFirdaus AsifEkbal - PushpakBhattacharyya + PushpakBhattacharyya 4172–4182 Customer satisfaction is an essential aspect of customer care systems. It is imperative for such systems to be polite while handling customer requests/demands. In this paper, we present a large multi-lingual conversational dataset for English and Hindi. We choose data from Twitter having both generic and courteous responses between customer care agents and aggrieved users. We also propose strong baselines that can induce courteous behaviour in generic customer care response in a multi-lingual scenario. We build a deep learning framework that can simultaneously handle different languages and incorporate polite behaviour in the customer care agent’s responses. Our system is competent in generating responses in different languages (here, English and Hindi) depending on the customer’s preference and also is able to converse with humans in an empathetic manner to ensure customer satisfaction and retention. Experimental results show that our proposed models can converse in both the languages and the information shared between the languages helps in improving the performance of the overall system. Qualitative and quantitative analysis shows that the proposed method can converse in an empathetic manner by incorporating courteousness in the responses and hence increasing customer satisfaction. 2020.lrec-1.514 @@ -6323,7 +6323,7 @@ <fixed-case>W</fixed-case>iki<fixed-case>B</fixed-case>ank: Using <fixed-case>W</fixed-case>ikidata to Improve Multilingual Frame-Semantic Parsing CezarSas MeriemBeloucif - AndersSøgaard + AndersSøgaard 4183–4189 Frame-semantic annotations exist for a tiny fraction of the world’s languages, Wikidata, however, links knowledge base triples to texts in many languages, providing a common, distant supervision signal for semantic parsers. We present WikiBank, a multilingual resource of partial semantic structures that can be used to extend pre-existing resources rather than creating new man-made resources from scratch. We also integrate this form of supervision into an off-the-shelf frame-semantic parser and allow cross-lingual transfer. Using Google’s Sling architecture, we show significant improvements on the English and Spanish CoNLL 2009 datasets, whether training on the full available datasets or small subsamples thereof. 2020.lrec-1.515 @@ -6334,7 +6334,7 @@ Multilingual Corpus Creation for Multilingual Semantic Similarity Task MahtabAhmed ChahnaDixit - Robert E.Mercer + Robert E.Mercer AtifKhan Muhammad RifayatSamee FelipeUrra @@ -6388,7 +6388,7 @@ MichaelHenretty ReubenMorais LindsaySaunders - FrancisTyers + FrancisTyers GregorWeber 4218–4222 The Common Voice corpus is a massively-multilingual collection of transcribed speech intended for speech technology research and development. Common Voice is designed for Automatic Speech Recognition purposes but can be useful in other domains (e.g. language identification). To achieve scale and sustainability, the Common Voice project employs crowdsourcing for both data collection and data validation. The most recent release includes 29 languages, and as of November 2019 there are a total of 38 languages collecting data. Over 50,000 individuals have participated so far, resulting in 2,500 hours of collected audio. To our knowledge this is the largest audio corpus in the public domain for speech recognition, both in terms of number of hours and number of languages. As an example use case for Common Voice, we present speech recognition experiments using Mozilla’s DeepSpeech Speech-to-Text toolkit. By applying transfer learning from a source English model, we find an average Character Error Rate improvement of 5.99 ± 5.48 for twelve target languages (German, French, Italian, Turkish, Catalan, Slovenian, Welsh, Irish, Breton, Tatar, Chuvash, and Kabyle). For most of these languages, these are the first ever published results on end-to-end Automatic Speech Recognition. @@ -6404,7 +6404,7 @@ YeonjuLee-Sikka SeanMiller AlanWong - Arya D.McCarthy + Arya D.McCarthy KyleGorman 4223–4228 We introduce WikiPron, an open-source command-line tool for extracting pronunciation data from Wiktionary, a collaborative multilingual online dictionary. We first describe the design and use of WikiPron. We then discuss the challenges faced scaling this tool to create an automatically-generated database of 1.7 million pronunciations from 165 languages. Finally, we validate the pronunciation database by using it to train and evaluating a collection of generic grapheme-to-phoneme models. The software, pronunciation data, and models are all made available under permissive open-source licenses. @@ -6445,7 +6445,7 @@ AleksandrKhakhmovich SvetlanaPavlova KiraKirillova - NikolayArefyev + NikolayArefyev EkaterinaSavilova 4247–4255 Out-of-vocabulary words are still a challenge in cross-lingual Natural Language Processing tasks, for which transliteration from source to target language or script is one of the solutions. In this study, we collect a personal name dataset in 445 Wikidata languages (37 scripts), train Transformer-based multilingual transliteration models on 6 high- and 4 less-resourced languages, compare them with bilingual models from (Merhav and Ash, 2018) and determine that multilingual models perform better for less-resourced languages. We discover that intrinsic evaluation, i.e comparison to a single gold standard, might not be appropriate in the task of transliteration due to its high variability. For this reason, we propose using extrinsic evaluation of transliteration via the cross-lingual named entity list search task (e.g. personal name search in contacts list). Our code and datasets are publicly available online. @@ -6457,7 +6457,7 @@ Serial Speakers: a Dataset of <fixed-case>TV</fixed-case> Series XavierBost VincentLabatut - GeorgesLinares + GeorgesLinares 4256–4264 For over a decade, TV series have been drawing increasing interest, both from the audience and from various academic fields. But while most viewers are hooked on the continuous plots of TV serials, the few annotated datasets available to researchers focus on standalone episodes of classical TV series. We aim at filling this gap by providing the multimedia/speech processing communities with “Serial Speakers”, an annotated dataset of 155 episodes from three popular American TV serials: “Breaking Bad”, “Game of Thrones” and “House of Cards”. “Serial Speakers” is suitable both for investigating multimedia retrieval in realistic use case scenarios, and for addressing lower level speech related tasks in especially challenging conditions. We publicly release annotations for every speech turn (boundaries, speaker) and scene boundary, along with annotations for shot boundaries, recurring shots, and interacting speakers in a subset of episodes. Because of copyright restrictions, the textual content of the speech turns is encrypted in the public version of the dataset, but we provide the users with a simple online tool to recover the plain text from their own subtitle files. 2020.lrec-1.525 @@ -6512,7 +6512,7 @@ NicolasHernandez RichardDufour DelphineCharlet - GeraldineDamnati + GeraldineDamnati SolenQuiniou NathalieCamelin 4293–4301 @@ -6556,7 +6556,7 @@ <fixed-case>E</fixed-case>:Calm Resource: a Resource for Studying Texts Produced by <fixed-case>F</fixed-case>rench Pupils and Students - Lydia-MaiHo-Dac + Lydia-MaiHo-Dac SergeFleury ClaudePonton 4327–4332 @@ -6568,7 +6568,7 @@ Introducing <fixed-case>MULAI</fixed-case>: A Multimodal Database of Laughter during Dyadic Interactions Michel-PierreJansen - Khiet P.Truong + Khiet P.Truong Dirk K.J.Heylen Deniece S.Nazareth 4333–4342 @@ -6580,7 +6580,7 @@ The Connection between the Text and Images of News Articles: New Insights for Multimedia Analysis NellekeOostdijk - Hansvan Halteren + Hansvan Halteren ErkanBașar MarthaLarson 4343–4351 @@ -6597,7 +6597,7 @@ CristinaNoujaim RuoyaoWang JiaDeng - RadaMihalcea + RadaMihalcea 4352–4358 We introduce LifeQA, a benchmark dataset for video question answering that focuses on day-to-day real-life situations. Current video question answering datasets consist of movies and TV shows. However, it is well-known that these visual domains are not representative of our day-to-day lives. Movies and TV shows, for example, benefit from professional camera movements, clean editing, crisp audio recordings, and scripted dialog between professional actors. While these domains provide a large amount of data for training models, their properties make them unsuitable for testing real-life question answering systems. Our dataset, by contrast, consists of video clips that represent only real-life scenarios. We collect 275 such video clips and over 2.3k multiple-choice questions. In this paper, we analyze the challenging but realistic aspects of LifeQA, and we apply several state-of-the-art video question answering models to provide benchmarks for future research. The full dataset is publicly available at https://lit.eecs.umich.edu/lifeqa/. 2020.lrec-1.536 @@ -6609,7 +6609,7 @@ JuliaBettinger AnnaHätty MichaelDorna - SabineSchulte im Walde + SabineSchulte im Walde 4359–4367 We present a dataset with difficulty ratings for 1,030 German closed noun compounds extracted from domain-specific texts for do-it-ourself (DIY), cooking and automotive. The dataset includes two-part compounds for cooking and DIY, and two- to four-part compounds for automotive. The compounds were identified in text using the Simple Compound Splitter (Weller-Di Marco, 2017); a subset was filtered and balanced for frequency and productivity criteria as basis for manual annotation and fine-grained interpretation. This study presents the creation, the final dataset with ratings from 20 annotators and statistics over the dataset, to provide insight into the perception of domain-specific term difficulty. It is particularly striking that annotators agree on a coarse, binary distinction between easy vs. difficult domain-specific compounds but that a more fine grained distinction of difficulty is not meaningful. We finally discuss the challenges of an annotation for difficulty, which includes both the task description as well as the selection of the data basis. 2020.lrec-1.537 @@ -6621,7 +6621,7 @@ YanaStrakatova NeeleFalk IsabelFuhrmann - ErhardHinrichs + ErhardHinrichs DanielaRossmann 4368–4378 In this paper we present the GerCo dataset of adjective-noun collocations for German, such as alter Freund ‘old friend’ and tiefe Liebe ‘deep love’. The annotation has been performed by experts based on the annotation scheme introduced in this paper. The resulting dataset contains 4,732 positive and negative instances of collocations and covers all the 16 semantic classes of adjectives as defined in the German wordnet GermaNet. The dataset can serve as a reliable empirical basis for comparing different theoretical frameworks concerned with collocations or as material for data-driven approaches to the studies of collocations including different machine learning experiments. This paper addresses the latter issue by using the GerCo dataset for evaluating different models on the task of automatic collocation identification. We compare lexical association measures with static and contextualized word embeddings. The experiments show that word embeddings outperform methods based on statistical association measures by a wide margin. @@ -6632,7 +6632,7 @@ Variants of Vector Space Reductions for Predicting the Compositionality of <fixed-case>E</fixed-case>nglish Noun Compounds PegahAlipoor - SabineSchulte im Walde + SabineSchulte im Walde 4379–4387 Predicting the degree of compositionality of noun compounds such as “snowball” and “butterfly” is a crucial ingredient for lexicography and Natural Language Processing applications, to know whether the compound should be treated as a whole, or through its constituents, and what it means. Computational approaches for an automatic prediction typically represent and compare compounds and their constituents within a vector space and use distributional similarity as a proxy to predict the semantic relatedness between the compounds and their constituents as the compound’s degree of compositionality. This paper provides a systematic evaluation of vector-space reduction variants across kinds, exploring reductions based on part-of-speech next to and also in combination with Principal Components Analysis using Singular Value and word2vec embeddings. We show that word2vec and nouns only dimensionality reductions are the most successful and stable vector space variants for our task. 2020.lrec-1.539 @@ -6643,7 +6643,7 @@ Varying Vector Representations and Integrating Meaning Shifts into a <fixed-case>P</fixed-case>age<fixed-case>R</fixed-case>ank Model for Automatic Term Extraction AnuragNigam AnnaHätty - SabineSchulte im Walde + SabineSchulte im Walde 4388–4394 We perform a comparative study for automatic term extraction from domain-specific language using a PageRank model with different edge-weighting methods. We vary vector space representations within the PageRank graph algorithm, and we go beyond standard co-occurrence and investigate the influence of measures of association strength and first- vs. second-order co-occurrence. In addition, we incorporate meaning shifts from general to domain-specific language as personalized vectors, in order to distinguish between termhood strengths of ambiguous words across word senses. Our study is performed for two domain-specific English corpora: ACL and do-it-yourself (DIY); and a domain-specific German corpus: cooking. The models are assessed by applying average precision and the roc score as evaluation metrices. 2020.lrec-1.540 @@ -6655,8 +6655,8 @@ KarënFort BrunoGuillaume Yann-AlanPilatte - MathieuConstant - NicolasLefèbvre + MathieuConstant + NicolasLefèbvre 4395–4401 We present here Rigor Mortis, a gamified crowdsourcing platform designed to evaluate the intuition of the speakers, then train them to annotate multi-word expressions (MWEs) in French corpora. We previously showed that the speakers’ intuition is reasonably good (65% in recall on non-fixed MWE). We detail here the annotation results, after a training phase using some of the tests developed in the PARSEME-FR project. 2020.lrec-1.541 @@ -6668,7 +6668,7 @@ MurathanKurfalı RobertÖstling JohanSjons - MatsWirén + MatsWirén 4402–4409 We present a new set of 96 Swedish multi-word expressions annotated with degree of (non-)compositionality. In contrast to most previous compositionality datasets we also consider syntactically complex constructions and publish a formal specification of each expression. This allows evaluation of computational models beyond word bigrams, which have so far been the norm. Finally, we use the annotations to evaluate a system for automatic compositionality estimation based on distributional semantics. Our analysis of the disagreements between human annotators and the distributional model reveal interesting questions related to the perception of compositionality, and should be informative to future work in the area. 2020.lrec-1.542 @@ -6689,8 +6689,8 @@ Dedicated Language Resources for Interdisciplinary Research on Multiword Expressions: Best Thing since Sliced Bread FerdyHubers - CatiaCucchiarini - HelmerStrik + CatiaCucchiarini + HelmerStrik 4418–4425 Multiword expressions such as idioms (beat about the bush), collocations (plastic surgery) and lexical bundles (in the middle of) are challenging for disciplines like Natural Language Processing (NLP), psycholinguistics and second language acquisition, , due to their more or less fixed character. Idiomatic expressions are especially problematic, because they convey a figurative meaning that cannot always be inferred from the literal meanings of the component words. Researchers acknowledge that important properties that characterize idioms such as frequency of exposure, familiarity, transparency, and imageability, should be taken into account in research, but these are typically properties that rely on subjective judgments. This is probably one of the reasons why many studies that investigated idiomatic expressions collected limited information about idiom properties for very small numbers of idioms only. In this paper we report on cross-boundary work aimed at developing a set of tools and language resources that are considered crucial for this kind of multifaceted research. We discuss the results of our research and suggest possible avenues for future research 2020.lrec-1.544 @@ -6710,7 +6710,7 @@ Introducing <fixed-case>RONEC</fixed-case> - the <fixed-case>R</fixed-case>omanian Named Entity Corpus - Stefan DanielDumitrescu + Stefan DanielDumitrescu Andrei-MariusAvram 4436–4443 We present RONEC - the Named Entity Corpus for the Romanian language. The corpus contains over 26000 entities in ~5000 annotated sentences, belonging to 16 distinct classes. The sentences have been extracted from a copy-right free newspaper, covering several styles. This corpus represents the first initiative in the Romanian language space specifically targeted for named entity recognition. It is available in BRAT and CoNLL-U Plus formats, and it is free to use and extend at github.com/dumitrescustefan/ronec @@ -6766,7 +6766,7 @@ A Dataset of <fixed-case>G</fixed-case>erman Legal Documents for Named Entity Recognition ElenaLeitner GeorgRehm - JulianMoreno-Schneider + JulianMoreno-Schneider 4478–4485 We describe a dataset developed for Named Entity Recognition in German federal court decisions. It consists of approx. 67,000 sentences with over 2 million tokens. The resource contains 54,000 manually annotated entities, mapped to 19 fine-grained semantic classes: person, judge, lawyer, country, city, street, landscape, organization, company, institution, court, brand, law, ordinance, European legal norm, regulation, contract, court decision, and legal literature. The legal documents were, furthermore, automatically annotated with more than 35,000 TimeML-based time expressions. The dataset, which is available under a CC-BY 4.0 license in the CoNNL-2002 format, was developed for training an NER service for German legal documents in the EU project Lynx. 2020.lrec-1.551 @@ -6787,7 +6787,7 @@ Named Entities in Medical Case Reports: Corpus and Experiments SarahSchulz - JuricaŠeva + JuricaŠeva SamuelRodriguez MalteOstendorff GeorgRehm @@ -6820,7 +6820,7 @@ Where are we in Named Entity Recognition from Speech? AntoineCaubrière - SophieRosset + SophieRosset YannickEstève AntoineLaurent EmmanuelMorin @@ -6848,7 +6848,7 @@ HannahSmith ZeyuZhang JohnCulnan - PeterJansen + PeterJansen 4529–4546 Named entity recognition identifies common classes of entities in text, but these entity labels are generally sparse, limiting utility to downstream tasks. In this work we present ScienceExamCER, a densely-labeled semantic classification corpus of 133k mentions in the science exam domain where nearly all (96%) of content words have been annotated with one or more fine-grained semantic class labels including taxonomic groups, meronym groups, verb/action groups, properties and values, and synonyms. Semantic class labels are drawn from a manually-constructed fine-grained typology of 601 classes generated through a data-driven analysis of 4,239 science exam questions. We show an off-the-shelf BERT-based named entity recognition model modified for multi-label classification achieves an accuracy of 0.85 F1 on this task, suggesting strong utility for downstream tasks in science domain question answering requiring densely-labeled semantic classification. 2020.lrec-1.558 @@ -6860,7 +6860,7 @@ FredrikJørgensen TobiasAasmoe Anne-StineRuud Husevåg - LiljaØvrelid + LiljaØvrelid ErikVelldal 4547–4556 This paper presents NorNE, a manually annotated corpus of named entities which extends the annotation of the existing Norwegian Dependency Treebank. Comprising both of the official standards of written Norwegian (Bokmål and Nynorsk), the corpus contains around 600,000 tokens and annotates a rich set of entity types including persons, organizations, locations, geo-political entities, products, and events, in addition to a class corresponding to nominals derived from names. We here present details on the annotation effort, guidelines, inter-annotator agreement and an experimental analysis of the corpus using a neural sequence labeling architecture. @@ -6938,7 +6938,7 @@ MariaBarrett ChristinaRosted Lasse MalmLidegaard - AndersSøgaard + AndersSøgaard 4597–4604 We present a named entity annotation for the Danish Universal Dependencies treebank using the CoNLL-2003 annotation scheme: DaNE. It is the largest publicly available, Danish named entity gold annotation. We evaluate the quality of our annotations intrinsically by double annotating the entire treebank and extrinsically by comparing our annotations to a recently released named entity annotation of the validation and test sections of the Danish Universal Dependencies treebank. We benchmark the new resource by training and evaluating competitive architectures for supervised named entity recognition (NER), including FLAIR, monolingual (Danish) BERT and multilingual BERT. We explore cross-lingual transfer in multilingual BERT from five related languages in zero-shot and direct transfer setups, and we show that even with our modestly-sized training set, we improve Danish NER over a recent cross-lingual approach, as well as over zero-shot transfer from five related languages. Using multilingual BERT, we achieve higher performance by fine-tuning on both DaNE and a larger Bokmål (Norwegian) training set compared to only using DaNE. However, the highest performance isachieved by using a Danish BERT fine-tuned on DaNE. Our dataset enables improvements and applicability for Danish NER beyond cross-lingual methods. We employ a thorough error analysis of the predictions of the best models for seen and unseen entities, as well as their robustness on un-capitalized text. The annotated dataset and all the trained models are made publicly available. 2020.lrec-1.565 @@ -6975,7 +6975,7 @@ JoaquimSantos DiogoGomes FabioCordeiro - RenataVieira + RenataVieira VivianeMoreira 4625–4630 This work focuses on Portuguese Named Entity Recognition (NER) in the Geology domain. The only domain-specific dataset in the Portuguese language annotated for NER is the GeoCorpus. Our approach relies on BiLSTM-CRF neural networks (a widely used type of network for this area of research) that use vector and tensor embedding representations. Three types of embedding models were used (Word Embeddings, Flair Embeddings, and Stacked Embeddings) under two versions (domain-specific and generalized). The domain specific Flair Embeddings model was originally trained with a generalized context in mind, but was then fine-tuned with domain-specific Oil and Gas corpora, as there simply was not enough domain corpora to properly train such a model. Each of these embeddings was evaluated separately, as well as stacked with another embedding. Finally, we achieved state-of-the-art results for this domain with one of our embeddings, and we performed an error analysis on the language model that achieved the best results. Furthermore, we investigated the effects of domain-specific versus generalized embeddings. @@ -6985,11 +6985,11 @@ Establishing a New State-of-the-Art for <fixed-case>F</fixed-case>rench Named Entity Recognition - Pedro JavierOrtiz Suárez + Pedro JavierOrtiz Suárez YoannDupont BenjaminMuller - LaurentRomary - BenoîtSagot + LaurentRomary + BenoîtSagot 4631–4638 The French TreeBank developed at the University Paris 7 is the main source of morphosyntactic and syntactic annotations for French. However, it does not include explicit information related to named entities, which are among the most useful information for several natural language processing tasks and applications. Moreover, no large-scale French corpus with named entity annotations contain referential information, which complement the type and the span of each mention with an indication of the entity it refers to. We have manually annotated the French TreeBank with such information, after an automatic pre-annotation step. We sketch the underlying annotation guidelines and we provide a few figures about the resulting annotations. 2020.lrec-1.569 @@ -7012,7 +7012,7 @@ IvaMarinova LaskaLaskova PetyaOsenova - KirilSimov + KirilSimov AlexanderPopov 4647–4652 The paper reports on the usage of deep learning methods for improving a Named Entity Recognition (NER) training corpus and for predicting and annotating new types in a test corpus. We show how the annotations in a type-based corpus of named entities (NE) were populated as occurrences within it, thus ensuring density of the training information. A deep learning model was adopted for discovering inconsistencies in the initial annotation and for learning new NE types. The evaluation results get improved after data curation, randomization and deduplication. @@ -7083,8 +7083,8 @@ Controllable Sentence Simplification LouisMartin - Éricde la Clergerie - BenoîtSagot + Éricde la Clergerie + BenoîtSagot AntoineBordes 4689–4698 Text simplification aims at making a text easier to read and understand by simplifying grammar and structure while keeping the underlying information identical. It is often considered an all-purpose generic task where the same simplification is suitable for all; however multiple audiences can benefit from simplified text in different ways. We adapt a discrete parametrization mechanism that provides explicit control on simplification systems based on Sequence-to-Sequence models. As a result, users can condition the simplifications returned by a model on attributes such as length, amount of paraphrasing, lexical complexity and syntactic complexity. We also show that carefully chosen values of these attributes allow out-of-the-box Sequence-to-Sequence models to outperform their standard counterparts on simplification benchmarks. Our model, which we call ACCESS (as shorthand for AudienCe-CEntric Sentence Simplification), establishes the state of the art at 41.87 SARI on the WikiLarge test set, a +1.42 improvement over the best previously reported score. @@ -7173,7 +7173,7 @@ Towards a Gold Standard for Evaluating <fixed-case>D</fixed-case>anish Word Embeddings NinaSchneidermann RasmusHvingelby - BolettePedersen + BolettePedersen 4754–4763 This paper presents the process of compiling a model-agnostic similarity goal standard for evaluating Danish word embeddings based on human judgments made by 42 native speakers of Danish. Word embeddings resemble semantic similarity solely by distribution (meaning that word vectors do not reflect relatedness as differing from similarity), and we argue that this generalization poses a problem in most intrinsic evaluation scenarios. In order to be able to evaluate on both dimensions, our human-generated dataset is therefore designed to reflect the distinction between relatedness and similarity. The goal standard is applied for evaluating the “goodness” of six existing word embedding models for Danish, and it is discussed how a relatively low correlation can be explained by the fact that semantic similarity is substantially more challenging to model than relatedness, and that there seems to be a need for future human judgments to measure similarity in full context and along more than a single spectrum. 2020.lrec-1.585 @@ -7206,13 +7206,13 @@ Give your Text Representation Models some Love: the Case for <fixed-case>B</fixed-case>asque - RodrigoAgerri + RodrigoAgerri IñakiSan Vicente Jon AnderCampos AnderBarrena - XabierSaralegi - AitorSoroa - EnekoAgirre + XabierSaralegi + AitorSoroa + EnekoAgirre 4781–4788 Word embeddings and pre-trained language models allow to build rich representations of text and have enabled improvements across most NLP tasks. Unfortunately they are very expensive to train, and many small companies and research groups tend to use models that have been pre-trained and made available by third parties, rather than building their own. This is suboptimal as, for many languages, the models have been trained on smaller (or lower quality) corpora. In addition, monolingual pre-trained models for non-English languages are not always available. At best, models for those languages are included in multilingual versions, where each language shares the quota of substrings and parameters with the rest of the languages. This is particularly true for smaller languages such as Basque. In this paper we show that a number of monolingual models (FastText word embeddings, FLAIR and BERT language models) trained with larger Basque corpora produce much better results than publicly available versions in downstream NLP tasks, including topic classification, sentiment classification, PoS tagging and NER. This work sets a new state-of-the-art in those tasks for Basque. All benchmarks and models used in this work are publicly available. 2020.lrec-1.588 @@ -7224,7 +7224,7 @@ FrançoisTorregrossa VincentClaveau NihelKooli - GuillaumeGravier + GuillaumeGravier RobinAllesiardo 4789–4797 Word embeddings intervene in a wide range of natural language processing tasks. These geometrical representations are easy to manipulate for automatic systems. Therefore, they quickly invaded all areas of language processing. While they surpass all predecessors, it is still not straightforward why and how they do so. In this article, we propose to investigate all kind of evaluation metrics on various datasets in order to discover how they correlate with each other. Those correlations lead to 1) a fast solution to select the best word embeddings among many others, 2) a new criterion that may improve the current state of static Euclidean word embeddings, and 3) a way to create a set of complementary datasets, i.e. each dataset quantifies a different aspect of word embeddings. @@ -7279,7 +7279,7 @@ Word Embedding Evaluation in Downstream Tasks and Semantic Analogies JoaquimSantos BernardoConsoli - RenataVieira + RenataVieira 4828–4834 Language Models have long been a prolific area of study in the field of Natural Language Processing (NLP). One of the newer kinds of language models, and some of the most used, are Word Embeddings (WE). WE are vector space representations of a vocabulary learned by a non-supervised neural network based on the context in which words appear. WE have been widely used in downstream tasks in many areas of study in NLP. These areas usually use these vector models as a feature in the processing of textual data. This paper presents the evaluation of newly released WE models for the Portuguese langauage, trained with a corpus composed of 4.9 billion tokens. The first evaluation presented an intrinsic task in which WEs had to correctly build semantic and syntactic relations. The second evaluation presented an extrinsic task in which the WE models were used in two downstream tasks: Named Entity Recognition and Semantic Similarity between Sentences. Our results show that a diverse and comprehensive corpus can often outperform a larger, less textually diverse corpus, and that batch training may cause quality loss in WE models. 2020.lrec-1.594 @@ -7292,10 +7292,10 @@ SándorDarányi ChristianGeng MoniekKuijpers - OierLopez de Lacalle + OierLopez de Lacalle Jean-ChristopheMensonides SimoneRebora - UweReichel + UweReichel 4835–4841 To detect how and when readers are experiencing engagement with a literary work, we bring together empirical literary studies and language technology via focusing on the affective state of absorption. The goal of our resource development is to enable the detection of different levels of reading absorption in millions of user-generated reviews hosted on social reading platforms. We present a corpus of social book reviews in English that we annotated with reading absorption categories. Based on these data, we performed supervised, sentence level, binary classification of the explicit presence vs. absence of the mental state of absorption. We compared the performances of classical machine learners where features comprised sentence representations obtained from a pretrained embedding model (Universal Sentence Encoder) vs. neural classifiers in which sentence embedding vector representations are adapted or fine-tuned while training for the absorption recognition task. We discuss the challenges in creating the labeled data as well as the possibilities for releasing a benchmark corpus. 2020.lrec-1.595 @@ -7323,15 +7323,15 @@ The <fixed-case>MWN</fixed-case>.<fixed-case>PT</fixed-case> <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et for <fixed-case>P</fixed-case>ortuguese: Projection, Validation, Cross-lingual Alignment and Distribution - AntónioBranco + AntónioBranco SaraGrilo MárciaBolrinha ChakavehSaedi RubenBranco - JoãoSilva + JoãoSilva AndreiaQuerido Ritade Carvalho - RosaGaudio + RosaGaudio MarianaAvelãs ClaraPinto 4859–4866 @@ -7364,7 +7364,7 @@ Spatial <fixed-case>AMR</fixed-case>: Expanded Spatial Annotation in the Context of a Grounded <fixed-case>M</fixed-case>inecraft Corpus JuliaBonn - MarthaPalmer + MarthaPalmer ZhengCai KristinWright-Bettner 4883–4892 @@ -7376,9 +7376,9 @@ <fixed-case>E</fixed-case>nglish <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et Random Walk Pseudo-Corpora FilipKlubička - AlfredoMaldonado + AlfredoMaldonado AbhijitMahalunkar - JohnKelleher + JohnKelleher 4893–4902 This is a resource description paper that describes the creation and properties of a set of pseudo-corpora generated artificially from a random walk over the English WordNet taxonomy. Our WordNet taxonomic random walk implementation allows the exploration of different random walk hyperparameters and the generation of a variety of different pseudo-corpora. We find that different combinations of parameters result in varying statistical properties of the generated pseudo-corpora. We have published a total of 81 pseudo-corpora that we have used in our previous research, but have not exhausted all possible combinations of hyperparameters, which is why we have also published a codebase that allows the generation of additional WordNet taxonomic pseudo-corpora as needed. Ultimately, such pseudo-corpora can be used to train taxonomic word embeddings, as a way of transferring taxonomic knowledge into a word embedding space. 2020.lrec-1.602 @@ -7388,7 +7388,7 @@ On the Formal Standardization of Terminology Resources: The Case Study of <fixed-case>T</fixed-case>ri<fixed-case>MED</fixed-case> FedericaVezzani - Giorgio MariaDi Nunzio + Giorgio MariaDi Nunzio 4903–4910 The process of standardization plays an important role in the management of terminological resources. In this context, we present the work of re-modeling an existing multilingual terminological database for the medical domain, named TriMED. This resource was conceived in order to tackle some problems related to the complexity of medical terminology and to respond to different users’ needs. We provide a methodology that should be followed in order to make a termbase compliant to the three most recent ISO/TC 37 standards. In particular, we focus on the definition of i) the structural meta-model of the resource, ii) the data categories provided, and iii) the TBX format for its implementation. In addition to the formal standardization of the resource, we describe the realization of a new data category repository for the management of the TriMED terminological data and a Web application that can be used to access the multilingual terminological records. 2020.lrec-1.603 @@ -7398,7 +7398,7 @@ Metaphorical Expressions in Automatic <fixed-case>A</fixed-case>rabic Sentiment Analysis IsraaAlsiyat - ScottPiao + ScottPiao 4911–4916 Over the recent years, Arabic language resources and NLP tools have been under rapid development. One of the important tasks for Arabic natural language processing is the sentiment analysis. While a significant improvement has been achieved in this research area, the existing computational models and tools still suffer from the lack of capability of dealing with Arabic metaphorical expressions. Metaphor has an important role in Arabic language due to its unique history and culture. Metaphors provide a linguistic mechanism for expressing ideas and notions that can be different from their surface form. Therefore, in order to efficiently identify true sentiment of Arabic language data, a computational model needs to be able to “read between lines”. In this paper, we examine the issue of metaphors in automatic Arabic sentiment analysis by carrying out an experiment, in which we observe the performance of a state-of-art Arabic sentiment tool on metaphors and analyse the result to gain a deeper insight into the issue. Our experiment evidently shows that metaphors have a significant impact on the performance of current Arabic sentiment tools, and it is an important task to develop Arabic language resources and computational models for Arabic metaphors. 2020.lrec-1.604 @@ -7443,7 +7443,7 @@ An Empirical Examination of Online Restaurant Reviews Hyun JungKang - IrisEshkol-Taravella + IrisEshkol-Taravella 4942–4947 In the wake of (Pang et al., 2002; Turney, 2002; Liu, 2012) inter alia, opinion mining and sentiment analysis have focused on extracting either positive or negative opinions from texts and determining the targets of these opinions. In this study, we go beyond the coarse-grained positive vs. negative opposition and propose a corpus-based scheme that detects evaluative language at a finer-grained level. We classify each sentence into one of four evaluation types based on the proposed scheme: (1) the reviewer’s opinion on the restaurant (positive, negative, or mixed); (2) the reviewer’s input/feedback to potential customers and restaurant owners (suggestion, advice, or warning) (3) whether the reviewer wants to return to the restaurant (intention); (4) the factual statement about the experience (description). We apply classical machine learning and deep learning methods to show the effectiveness of our scheme. We also interpret the performances that we obtained for each category by taking into account the specificities of the corpus treated. 2020.lrec-1.608 @@ -7466,7 +7466,7 @@ NathalieCamelin ChafikAloulou YannickEstève - LamiaHadrich Belguith + LamiaHadrich Belguith 4955–4963 In this paper, we propose several protocols to evaluate specific embeddings for Arabic sentiment analysis (SA) task. In fact, Arabic language is characterized by its agglutination and morphological richness contributing to great sparsity that could affect embedding quality. This work presents a study that compares embeddings based on words and lemmas in SA frame. We propose first to study the evolution of embedding models trained with different types of corpora (polar and non polar) and explore the variation between embeddings by observing the sentiment stability of neighbors in embedding spaces. Then, we evaluate embeddings with a neural architecture based on convolutional neural network (CNN). We make available our pre-trained embeddings to Arabic NLP research community with free to use. We provide also for free resources used to evaluate our embeddings. Experiments are done on the Large Arabic-Book Reviews (LABR) corpus in binary (positive/negative) classification frame. Our best result reaches 91.9%, that is higher than the best previous published one (91.5%). 2020.lrec-1.610 @@ -7501,7 +7501,7 @@ AkashSheoran DipteshKanojia AdityaJoshi - PushpakBhattacharyya + PushpakBhattacharyya 4982–4990 Cross-domain sentiment analysis (CDSA) helps to address the problem of data scarcity in scenarios where labelled data for a domain (known as the target domain) is unavailable or insufficient. However, the decision to choose a domain (known as the source domain) to leverage from is, at best, intuitive. In this paper, we investigate text similarity metrics to facilitate source domain selection for CDSA. We report results on 20 domains (all possible pairs) using 11 similarity metrics. Specifically, we compare CDSA performance with these metrics for different domain-pairs to enable the selection of a suitable source domain, given a target domain. These metrics include two novel metrics for evaluating domain adaptability to help source domain selection of labelled data and utilize word and sentence-based embeddings as metrics for unlabelled data. The goal of our experiments is a recommendation chart that gives the K best source domains for CDSA for a given target domain. We show that the best K source domains returned by our similarity metrics have a precision of over 50%, for varying values of K. 2020.lrec-1.613 @@ -7527,7 +7527,7 @@ NadaNaji LouisMarceau MarcQueudot - EricCharton + EricCharton LeilaKosseim Marie-JeanMeurs 5000–5009 @@ -7560,7 +7560,7 @@ A Fine-grained Sentiment Dataset for <fixed-case>N</fixed-case>orwegian - LiljaØvrelid + LiljaØvrelid PetterMæhlum JeremyBarnes ErikVelldal @@ -7602,7 +7602,7 @@ Multi-domain Tweet Corpora for Sentiment Analysis: Resource Creation and Evaluation Mamta AsifEkbal - PushpakBhattacharyya + PushpakBhattacharyya ShikhaSrivastava AlkaKumar TistaSaha @@ -7614,10 +7614,10 @@ Reproduction and Revival of the Argument Reasoning Comprehension Task - JoãoAntónio Rodrigues + JoãoAntónio Rodrigues RubenBranco - JoãoSilva - AntónioBranco + JoãoSilva + AntónioBranco 5055–5064 Reproduction of scientific findings is essential for scientific development across all scientific disciplines and reproducing results of previous works is a basic requirement for validating the hypothesis and conclusions put forward by them. This paper reports on the scientific reproduction of several systems addressing the Argument Reasoning Comprehension Task of SemEval2018. Given a recent publication that pointed out spurious statistical cues in the data set used in the shared task, and that produced a revised version of it, we also evaluated the reproduced systems with this new data set. The exercise reported here shows that, in general, the reproduction of these systems is successful with scores in line with those reported in SemEval2018. However, the performance scores are worst than those, and even below the random baseline, when the reproduced systems are run over the revised data set expunged from data artifacts. This demonstrates that this task is actually a much harder challenge than what could have been perceived from the inflated, close to human-level performance scores obtained with the data set used in SemEval2018. This calls for a revival of this task as there is much room for improvement until systems may come close to the upper bound provided by human performance. 2020.lrec-1.622 @@ -7626,7 +7626,7 @@ Design and Evaluation of <fixed-case>S</fixed-case>enti<fixed-case>E</fixed-case>con: a fine-grained Economic/Financial Sentiment Lexicon from a Corpus of Business News - AntonioMoreno-Ortiz + AntonioMoreno-Ortiz JavierFernandez-Cruz Chantal Pérez ChantalHernández 5065–5072 @@ -7638,7 +7638,7 @@ <fixed-case>P</fixed-case>arl<fixed-case>V</fixed-case>ote: A Corpus for Sentiment Analysis of Political Debates GavinAbercrombie - RizaBatista-Navarro + RizaBatista-Navarro 5073–5078 Debate transcripts from the UK Parliament contain information about the positions taken by politicians towards important topics, but are difficult for people to process manually. While sentiment analysis of debate speeches could facilitate understanding of the speakers’ stated opinions, datasets currently available for this task are small when compared to the benchmark corpora in other domains. We present ParlVote, a new, larger corpus of parliamentary debate speeches for use in the evaluation of sentiment analysis systems for the political domain. We also perform a number of initial experiments on this dataset, testing a variety of approaches to the classification of sentiment polarity in debate speeches. These include a linear classifier as well as a neural network trained using a transformer word embedding model (BERT), and fine-tuned on the parliamentary speeches. We find that in many scenarios, a linear classifier trained on a bag-of-words text representation achieves the best results. However, with the largest dataset, the transformer-based model combined with a neural classifier provides the best performance. We suggest that further experimentation with classification models and observations of the debate content and structure are required, and that there remains much room for improvement in parliamentary sentiment analysis. 2020.lrec-1.624 @@ -7648,7 +7648,7 @@ Offensive Language Detection Using Brown Clustering ZuoyuTian - SandraKübler + SandraKübler 5079–5087 In this study, we investigate the use of Brown clustering for offensive language detection. Brown clustering has been shown to be of little use when the task involves distinguishing word polarity in sentiment analysis tasks. In contrast to previous work, we train Brown clusters separately on positive and negative sentiment data, but then combine the information into a single complex feature per word. This way of representing words results in stable improvements in offensive language detection, when used as the only features or in combination with words or character n-grams. Brown clusters add important information, even when combined with words or character n-grams or with standard word embeddings in a convolutional neural network. However, we also found different trends between the two offensive language data sets we used. 2020.lrec-1.625 @@ -7659,7 +7659,7 @@ Annotating for Hate Speech: The <fixed-case>M</fixed-case>a<fixed-case>N</fixed-case>e<fixed-case>C</fixed-case>o Corpus and Some Input from Critical Discourse Analysis StavrosAssimakopoulos RebeccaVella Muskat - Lonnekevan der Plas + Lonnekevan der Plas AlbertGatt 5088–5097 This paper presents a novel scheme for the annotation of hate speech in corpora of Web 2.0 commentary. The proposed scheme is motivated by the critical analysis of posts made in reaction to news reports on the Mediterranean migration crisis and LGBTIQ+ matters in Malta, which was conducted under the auspices of the EU-funded C.O.N.T.A.C.T. project. Based on the realisation that hate speech is not a clear-cut category to begin with, appears to belong to a continuum of discriminatory discourse and is often realised through the use of indirect linguistic means, it is argued that annotation schemes for its detection should refrain from directly including the label ‘hate speech,’ as different annotators might have different thresholds as to what constitutes hate speech and what not. In view of this, we propose a multi-layer annotation scheme, which is pilot-tested against a binary ±hate speech classification and appears to yield higher inter-annotator agreement. Motivating the postulation of our scheme, we then present the MaNeCo corpus on which it will eventually be used; a substantial corpus of on-line newspaper comments spanning 10 years. @@ -7703,7 +7703,7 @@ Syntax and Semantics in a Treebank for <fixed-case>E</fixed-case>speranto - EckhardBick + EckhardBick 5120–5127 In this paper we describe and evaluate syntactic and semantic aspects of Arbobanko, a treebank for the artificial language Esperanto, as well as tools and methods used in the production of the treebank. In addition to classical morphosyntax and dependency structure, the treebank was enriched with a lexical-semantic layer covering named entities, a semantic type ontology for nouns and adjectives and a framenet-inspired semantic classification of verbs. For an under-resourced language, the quality of automatic syntactic and semantic pre-annotation is of obvious importance, and by evaluating the underlying parser and the coverage of its semantic ontologies, we try to answer the question whether the language’s extremely regular morphology and transparent semantic affixes translate into a more regular syntax and higher parsing accuracy. On the linguistic side, the treebank allows us to address and quantify typological issues such as the question of word order, auxiliary constructions, lexical transparency and semantic type ambiguity in Esperanto. 2020.lrec-1.630 @@ -7712,7 +7712,7 @@ Implementation and Evaluation of an <fixed-case>LFG</fixed-case>-based Parser for <fixed-case>W</fixed-case>olof - Cheikh M. BambaDione + Cheikh M. BambaDione 5128–5136 This paper reports on a parsing system for Wolof based on the LFG formalism. The parser covers core constructions of Wolof, including noun classes, cleft, copula, causative and applicative sentences. It also deals with several types of coordination, including same constituent coordination, asymmetric and asyndetic coordination. The system uses a cascade of finite-state transducers for word tokenization and morphological analysis as well as various lexicons. In addition, robust parsing techniques, including fragmenting and skimming, are used to optimize grammar coverage. Parsing coverage is evaluated by running test-suites of naturally occurring Wolof sentences through the parser. The evaluation of parsing coverage reveals that 72.72% of the test sentences receive full parses; 27.27% receive partial parses. To measure accuracy, the parsed sentences are disambiguated manually using an incremental parsebanking approach based on discriminants. The evaluation of parsing quality reveals that the parser achieves 67.2% recall, 92.8% precision and an f-score of 77.9%. 2020.lrec-1.631 @@ -7754,7 +7754,7 @@ Chunk Different Kind of Spoken Discourse: Challenges for Machine Learning - IrisEshkol-Taravella + IrisEshkol-Taravella MariameMaarouf FloraBadin MarieSkrovec @@ -7767,7 +7767,7 @@ <fixed-case>GRAIN</fixed-case>-<fixed-case>S</fixed-case>: Manually Annotated Syntax for <fixed-case>G</fixed-case>erman Interviews - AgnieszkaFalenska + AgnieszkaFalenska ZoltánCzesznak KerstinJung MoritzVölkel @@ -7782,7 +7782,7 @@ <fixed-case>Y</fixed-case>orùbá Dependency Treebank (<fixed-case>YTB</fixed-case>) OlájídéIshola - DanielZeman + DanielZeman 5178–5186 Low-resource languages present enormous NLP opportunities as well as varying degrees of difficulties. The newly released treebank of hand-annotated parts of the Yoruba Bible provides an avenue for dependency analysis of the Yoruba language; the application of a new grammar formalism to the language. In this paper, we discuss our choice of Universal Dependencies, important dependency annotation decisions considered in the creation of the first annotation guidelines for Yoruba and results of our parsing experiments. We also lay the foundation for future incorporation of other domains with the initial test on Yoruba Wikipedia articles and highlighted future directions for the rapid expansion of the treebank. 2020.lrec-1.637 @@ -7793,7 +7793,7 @@ <fixed-case>E</fixed-case>nglish Recipe Flow Graph Corpus YokoYamakata ShinsukeMori - JohnCarroll + JohnCarroll 5187–5194 We present an annotated corpus of English cooking recipe procedures, and describe and evaluate computational methods for learning these annotations. The corpus consists of 300 recipes written by members of the public, which we have annotated with domain-specific linguistic and semantic structure. Each recipe is annotated with (1) ‘recipe named entities’ (r-NEs) specific to the recipe domain, and (2) a flow graph representing in detail the sequencing of steps, and interactions between cooking tools, food ingredients and the products of intermediate steps. For these two kinds of annotations, inter-annotator agreement ranges from 82.3 to 90.5 F1, indicating that our annotation scheme is appropriate and consistent. We experiment with producing these annotations automatically. For r-NE tagging we train a deep neural network NER tool; to compute flow graphs we train a dependency-style parsing procedure which we apply to the entire sequence of r-NEs in a recipe. In evaluations, our systems achieve 71.1 to 87.5 F1, demonstrating that our annotation scheme is learnable. 2020.lrec-1.638 @@ -7824,7 +7824,7 @@ <fixed-case>P</fixed-case>rague Dependency Treebank - Consolidated 1.0 - JanHajič + JanHajič EduardBejček JaroslavaHlavacova MarieMikulová @@ -7851,7 +7851,7 @@ Parsing as Tagging RobertVacareanu George CaiqueGouveia Barbosa - Marco A.Valenzuela-Escárcega + Marco A.Valenzuela-Escárcega MihaiSurdeanu 5225–5231 We propose a simple yet accurate method for dependency parsing that treats parsing as tagging (PaT). That is, our approach addresses the parsing of dependency trees with a sequence model implemented with a bidirectional LSTM over BERT embeddings, where the “tag” to be predicted at each token position is the relative position of the corresponding head. For example, for the sentence John eats cake, the tag to be predicted for the token cake is -1 because its head (eats) occurs one token to the left. Despite its simplicity, our approach performs well. For example, our approach outperforms the state-of-the-art method of (Fernández-González and Gómez-Rodríguez, 2019) on Universal Dependencies (UD) by 1.76% unlabeled attachment score (UAS) for English, 1.98% UAS for French, and 1.16% UAS for German. On average, on 12 UD languages, our method with minimal tuning performs comparably with this state-of-the-art approach: better by 0.11% UAS, and worse by 0.58% LAS. @@ -7876,12 +7876,12 @@ ManuelaSanguinetti CristinaBosco LaurenCassidy - ÖzlemÇetinoğlu + ÖzlemÇetinoğlu Alessandra TeresaCignarella TeresaLynn InesRehbein JosefRuppenhofer - DjaméSeddah + DjaméSeddah AmirZeldes 5240–5250 The paper presents a discussion on the main linguistic phenomena of user-generated texts found in web and social media, and proposes a set of annotation guidelines for their treatment within the Universal Dependencies (UD) framework. Given on the one hand the increasing number of treebanks featuring user-generated content, and its somewhat inconsistent treatment in these resources on the other, the aim of this paper is twofold: (1) to provide a short, though comprehensive, overview of such treebanks - based on available literature - along with their main features and a comparative analysis of their annotation criteria, and (2) to propose a set of tentative UD-based annotation guidelines, to promote consistent treatment of the particular phenomena found in these types of texts. The main goal of this paper is to provide a common framework for those teams interested in developing similar resources in UD, thus enabling cross-linguistic consistency, which is a principle that has always been in the spirit of UD. @@ -7902,7 +7902,7 @@ <fixed-case>ÆTHEL</fixed-case>: Automatically Extracted Typelogical Derivations for <fixed-case>D</fixed-case>utch KonstantinosKogkalidis - MichaelMoortgat + MichaelMoortgat RichardMoot 5257–5266 We present ÆTHEL, a semantic compositionality dataset for written Dutch. ÆTHEL consists of two parts. First, it contains a lexicon of supertags for about 900 000 words in context. The supertags correspond to types of the simply typed linear lambda-calculus, enhanced with dependency decorations that capture grammatical roles supplementary to function-argument structures. On the basis of these types, ÆTHEL further provides 72 192 validated derivations, presented in four formats: natural-deduction and sequent-style proofs, linear logic proofnets and the associated programs (lambda terms) for meaning composition. ÆTHEL’s types and derivations are obtained by means of an extraction algorithm applied to the syntactic analyses of LASSY Small, the gold standard corpus of written Dutch. We discuss the extraction algorithm and show how ‘virtual elements’ in the original LASSY annotation of unbounded dependencies and coordination phenomena give rise to higher-order types. We suggest some example usecases highlighting the benefits of a type-driven approach at the syntax semantics interface. The following resources are open-sourced with ÆTHEL: the lexical mappings between words and types, a subset of the dataset consisting of 7 924 semantic parses, and the Python code that implements the extraction algorithm. @@ -7987,7 +7987,7 @@ EitanGrossman EladEisen DmitryNikolaev - StevenMoran + StevenMoran 5316–5322 Phonological segment borrowing is a process through which languages acquire new contrastive speech sounds as the result of borrowing new words from other languages. Despite the fact that phonological segment borrowing is documented in many of the world’s languages, to date there has been no large-scale quantitative study of the phenomenon. In this paper, we present SegBo, a novel cross-linguistic database of borrowed phonological segments. We describe our data aggregation pipeline and the resulting language sample. We also present two short case studies based on the database. The first deals with the impact of large colonial languages on the sound systems of the world’s languages; the second deals with universals of borrowing in the domain of rhotic consonants. 2020.lrec-1.654 @@ -8007,13 +8007,13 @@ <fixed-case>A</fixed-case>llo<fixed-case>V</fixed-case>era: A Multilingual Allophone Database - David R.Mortensen + David R.Mortensen XinjianLi PatrickLittell AlexisMichaud ShrutiRijhwani AntoniosAnastasopoulos - Alan WBlack + Alan WBlack FlorianMetze GrahamNeubig 5329–5336 @@ -8040,7 +8040,7 @@ AndreKåsen KristinHagen AndersNøklestad - JoelPriestley + JoelPriestley 5343–5350 The present article presents four experiments with two different methods for measuring dialect similarity in Norwegian: the Levenshtein method and the neural long short term memory (LSTM) autoencoder network, a machine learning algorithm. The visual output in the form of dialect maps is then compared with canonical maps found in the dialect literature. All of this enables us to say that one does not need fine-grained transcriptions of speech to replicate classical classification patterns. 2020.lrec-1.658 @@ -8062,9 +8062,9 @@ A Framework for Evaluation of Machine Reading Comprehension Gold Standards ViktorSchlegel MarcoValentino - AndreFreitas - GoranNenadic - RizaBatista-Navarro + AndreFreitas + GoranNenadic + RizaBatista-Navarro 5359–5369 Machine Reading Comprehension (MRC) is the task of answering a question over a paragraph of text. While neural MRC systems gain popularity and achieve noticeable performance, issues are being raised with the methodology used to establish their performance, particularly concerning the data design of gold standards that are used to evaluate them. There is but a limited understanding of the challenges present in this data, which makes it hard to draw comparisons and formulate reliable hypotheses. As a first step towards alleviating the problem, this paper proposes a unifying framework to systematically investigate the present linguistic features, required reasoning and background knowledge and factual correctness on one hand, and the presence of lexical cues as a lower bound for the requirement of understanding on the other hand. We propose a qualitative annotation schema for the first and a set of approximative metrics for the latter. In a first application of the framework, we analyse modern MRC gold standards and present our findings: the absence of features that contribute towards lexical ambiguity, the varying factual correctness of the expected answers and the presence of lexical cues, all of which potentially lower the reading comprehension complexity and quality of the evaluation data. 2020.lrec-1.660 @@ -8074,7 +8074,7 @@ Multi-class Hierarchical Question Classification for Multiple Choice Science Exams DongfangXu - PeterJansen + PeterJansen JaycieMartin ZhengnanXie VikasYadav @@ -8114,7 +8114,7 @@ SeunghyunYoon FranckDernoncourt Doo SoonKim - TrungBui + TrungBui KyominJung 5400–5407 In this study, we propose a novel graph neural network called propagate-selector (PS), which propagates information over sentences to understand information that cannot be inferred when considering sentences in isolation. First, we design a graph structure in which each node represents an individual sentence, and some pairs of nodes are selectively connected based on the text structure. Then, we develop an iterative attentive aggregation and a skip-combine method in which a node interacts with its neighborhood nodes to accumulate the necessary information. To evaluate the performance of the proposed approaches, we conduct experiments with the standard HotpotQA dataset. The empirical results demonstrate the superiority of our proposed approach, which obtains the best performances, compared to the widely used answer-selection models that do not consider the intersentential relationship. @@ -8175,12 +8175,12 @@ <fixed-case>AIA</fixed-case>-<fixed-case>BDE</fixed-case>: A Corpus of <fixed-case>FAQ</fixed-case>s in <fixed-case>P</fixed-case>ortuguese and their Variations - HugoGonçalo Oliveira + HugoGonçalo Oliveira JoãoFerreira JoséSantos PedroFialho RicardoRodrigues - LuisaCoheur + LuisaCoheur AnaAlves 5442–5449 We present AIA-BDE, a corpus of 380 domain-oriented FAQs in Portuguese and their variations, i.e., paraphrases or entailed questions, created manually, by humans, or automatically, with Google Translate. Its aims to be used as a benchmark for FAQ retrieval and automatic question-answering, but may be useful in other contexts, such as the development of task-oriented dialogue systems, or models for natural language inference in an interrogative context. We also report on two experiments. Matching variations with their original questions was not trivial with a set of unsupervised baselines, especially for manually created variations. Besides high performances obtained with ELMo and BERT embeddings, an Information Retrieval system was surprisingly competitive when considering only the first hit. In the second experiment, text classifiers were trained with the original questions, and tested when assigning each variation to one of three possible sources, or assigning them as out-of-domain. Here, the difference between manual and automatic variations was not so significant. @@ -8194,7 +8194,7 @@ SeokhwanKim FranckDernoncourt SiddheshGupte - ZheWang + ZheWang Doo SoonKim 5450–5455 Despite the number of currently available datasets on video-question answering, there still remains a need for a dataset involving multi-step and non-factoid answers. Moreover, relying on video transcripts remains an under-explored topic. To adequately address this, we propose a new question answering task on instructional videos, because of their verbose and narrative nature. While previous studies on video question answering have focused on generating a short text as an answer, given a question and video clip, our task aims to identify a span of a video segment as an answer which contains instructional details with various granularities. This work focuses on screencast tutorial videos pertaining to an image editing program. We introduce a dataset, TutorialVQA, consisting of about 6,000 manually collected triples of (video, question, answer span). We also provide experimental results with several baseline algorithms using the video transcripts. The results indicate that the task is challenging and call for the investigation of new algorithms. @@ -8209,7 +8209,7 @@ JaycieMartin ElizabethWainwright StevenMarmorstein - PeterJansen + PeterJansen 5456–5473 Explainable question answering for complex questions often requires combining large numbers of facts to answer a question while providing a human-readable explanation for the answer, a process known as multi-hop inference. Standardized science questions require combining an average of 6 facts, and as many as 16 facts, in order to answer and explain, but most existing datasets for multi-hop reasoning focus on combining only two facts, significantly limiting the ability of multi-hop inference algorithms to learn to generate large inferences. In this work we present the second iteration of the WorldTree project, a corpus of 5,114 standardized science exam questions paired with large detailed multi-fact explanations that combine core scientific knowledge and world knowledge. Each explanation is represented as a lexically-connected “explanation graph” that combines an average of 6 facts drawn from a semi-structured knowledge base of 9,216 facts across 66 tables. We use this explanation corpus to author a set of 344 high-level science domain inference patterns similar to semantic frames supporting multi-hop inference. Together, these resources provide training data and instrumentation for developing many-fact multi-hop inference models for question answering. 2020.lrec-1.671 @@ -8219,7 +8219,7 @@ Chat or Learn: a Data-Driven Robust Question-Answering System GabrielLuthier - AndreiPopescu-Belis + AndreiPopescu-Belis 5474–5480 We present a voice-based conversational agent which combines the robustness of chatbots and the utility of question answering (QA) systems. Indeed, while data-driven chatbots are typically user-friendly but not goal-oriented, QA systems tend to perform poorly at chitchat. The proposed chatbot relies on a controller which performs dialogue act classification and feeds user input either to a sequence-to-sequence chatbot or to a QA system. The resulting chatbot is a spoken QA application for the Google Home smart speaker. The system is endowed with general-domain knowledge from Wikipedia articles and uses coreference resolution to detect relatedness between questions. We present our choices of data sets for training and testing the components, and present the experimental results that helped us optimize the parameters of the chatbot. In particular, we discuss the appropriateness of using the SQuAD dataset for evaluating end-to-end QA, in the light of our system’s behavior. 2020.lrec-1.672 @@ -8245,7 +8245,7 @@ Cross-lingual and Cross-domain Evaluation of Machine Reading Comprehension with Squad and <fixed-case>CALOR</fixed-case>-Quest Corpora DelphineCharlet - GeraldineDamnati + GeraldineDamnati FredericBechet GabrielMarzinotto JohannesHeinecke @@ -8259,7 +8259,7 @@ <fixed-case>S</fixed-case>cholarly<fixed-case>R</fixed-case>ead: A New Dataset for Scientific Article Reading Comprehension TanikSaikh AsifEkbal - PushpakBhattacharyya + PushpakBhattacharyya 5498–5504 We present ScholarlyRead, span-of-word-based scholarly articles’ Reading Comprehension (RC) dataset with approximately 10K manually checked passage-question-answer instances. ScholarlyRead was constructed in semi-automatic way. We consider the articles from two popular journals of a reputed publishing house. Firstly, we generate questions from these articles in an automatic way. Generated questions are then manually checked by the human annotators. We propose a baseline model based on Bi-Directional Attention Flow (BiDAF) network that yields the F1 score of 37.31%. The framework would be useful for building Question-Answering (QA) systems on scientific articles. 2020.lrec-1.675 @@ -8269,7 +8269,7 @@ Contextualized Embeddings based Transformer Encoder for Sentence Similarity Modeling in Answer Selection Task Md Tahmid RahmanLaskar - Jimmy XiangjiHuang + Jimmy XiangjiHuang EnamulHoque 5505–5514 Word embeddings that consider context have attracted great attention for various natural language processing tasks in recent years. In this paper, we utilize contextualized word embeddings with the transformer encoder for sentence similarity modeling in the answer selection task. We present two different approaches (feature-based and fine-tuning-based) for answer selection. In the feature-based approach, we utilize two types of contextualized embeddings, namely the Embeddings from Language Models (ELMo) and the Bidirectional Encoder Representations from Transformers (BERT) and integrate each of them with the transformer encoder. We find that integrating these contextual embeddings with the transformer encoder is effective to improve the performance of sentence similarity modeling. In the second approach, we fine-tune two pre-trained transformer encoder models for the answer selection task. Based on our experiments on six datasets, we find that the fine-tuning approach outperforms the feature-based approach on all of them. Among our fine-tuning-based models, the Robustly Optimized BERT Pretraining Approach (RoBERTa) model results in new state-of-the-art performance across five datasets. @@ -8280,8 +8280,8 @@ Automatic <fixed-case>S</fixed-case>panish Translation of <fixed-case>SQ</fixed-case>u<fixed-case>AD</fixed-case> Dataset for Multi-lingual Question Answering Casimiro PioCarrino - Marta R.Costa-jussà - José A. R.Fonollosa + Marta R.Costa-jussà + José A. R.Fonollosa 5515–5523 Recently, multilingual question answering became a crucial research topic, and it is receiving increased interest in the NLP community. However, the unavailability of large-scale datasets makes it challenging to train multilingual QA systems with performance comparable to the English ones. In this work, we develop the Translate Align Retrieve (TAR) method to automatically translate the Stanford Question Answering Dataset (SQuAD) v1.1 to Spanish. We then used this dataset to train Spanish QA systems by fine-tuning a Multilingual-BERT model. Finally, we evaluated our QA models with the recently proposed MLQA and XQuAD benchmarks for cross-lingual Extractive QA. Experimental results show that our models outperform the previous Multilingual-BERT baselines achieving the new state-of-the-art values of 68.1 F1 on the Spanish MLQA corpus and 77.6 F1 on the Spanish XQuAD corpus. The resulting, synthetically generated SQuAD-es v1.1 corpora, with almost 100% of data contained in the original English version, to the best of our knowledge, is the first large-scale QA training resource for Spanish. 2020.lrec-1.677 @@ -8310,12 +8310,12 @@ A Shared Task of a New, Collaborative Type to Foster Reproducibility: A First Exercise in the Area of Language Science and Technology with <fixed-case>REPROLANG</fixed-case>2020 - AntónioBranco + AntónioBranco NicolettaCalzolari PiekVossen - GertjanVan Noord - Dietervan Uytvanck - JoãoSilva + GertjanVan Noord + Dietervan Uytvanck + JoãoSilva LuísGomes AndréMoreira WillemElbers @@ -8363,7 +8363,7 @@ KyeongminRim JingxuanTu KelleyLynch - JamesPustejovsky + JamesPustejovsky 5569–5578 Within the natural language processing (NLP) community, shared tasks play an important role. They define a common goal and allowthe the comparison of different methods on the same data. SemEval-2018 Task 7 involves the identification and classification of relationsin abstracts from computational linguistics (CL) publications. In this paper we describe an attempt to reproduce the methods and resultsfrom the top performing system at for SemEval-2018 Task 7. We describe challenges we encountered in the process, report on the resultsof our system, and discuss the ways that our attempt at reproduction can inform best practices. 2020.lrec-1.684 @@ -8478,14 +8478,14 @@ Recent Developments for the Linguistic Linked Open Data Infrastructure ThierryDeclerck - John PhilipMcCrae - MatthiasHartung + John PhilipMcCrae + MatthiasHartung JorgeGracia ChristianChiarcos ElenaMontiel-Ponsoda - PhilippCimiano + PhilippCimiano ArtemRevenko - RoserSaurí + RoserSaurí DeirdreLee StefaniaRacioppa JamalAbdul Nasir @@ -8575,7 +8575,7 @@ YunfeiLong AnranLi EmmanueleChersoni - QinLu + QinLu Chu-RenHuang 5714–5720 Automatic Chinese irony detection is a challenging task, and it has a strong impact on linguistic research. However, Chinese irony detection often lacks labeled benchmark datasets. In this paper, we introduce Ciron, the first Chinese benchmark dataset available for irony detection for machine learning models. Ciron includes more than 8.7K posts, collected from Weibo, a micro blogging platform. Most importantly, Ciron is collected with no pre-conditions to ensure a much wider coverage. Evaluation on seven different machine learning classifiers proves the usefulness of Ciron as an important resource for Chinese irony detection. @@ -8586,7 +8586,7 @@ wiki<fixed-case>H</fixed-case>ow<fixed-case>T</fixed-case>o<fixed-case>I</fixed-case>mprove: A Resource and Analyses on Edits in Instructional Texts TalitaAnthonio - IrshadBhat + IrshadBhat MichaelRoth 5721–5729 Instructional texts, such as articles in wikiHow, describe the actions necessary to accomplish a certain goal. In wikiHow and other resources, such instructions are subject to revision edits on a regular basis. Do these edits improve instructions only in terms of style and correctness, or do they provide clarifications necessary to follow the instructions and to accomplish the goal? We describe a resource and first studies towards answering this question. Specifically, we create wikiHowToImprove, a collection of revision histories for about 2.7 million sentences from about 246000 wikiHow articles. We describe human annotation studies on categorizing a subset of sentence-level edits and provide baseline models for the task of automatically distinguishing “older” from “newer” revisions of a sentence. @@ -8617,7 +8617,7 @@ Spatial Multi-Arrangement for Clustering and Multi-way Similarity Dataset Construction OlgaMajewska - DianaMcCarthy + DianaMcCarthy Jaspervan den Bosch NikolausKriegeskorte IvanVulić @@ -8631,7 +8631,7 @@ A Short Survey on Sense-Annotated Corpora TommasoPasini - JoseCamacho-Collados + JoseCamacho-Collados 5759–5765 Large sense-annotated datasets are increasingly necessary for training deep supervised systems in Word Sense Disambiguation. However, gathering high-quality sense-annotated data for as many instances as possible is a laborious and expensive task. This has led to the proliferation of automatic and semi-automatic methods for overcoming the so-called knowledge-acquisition bottleneck. In this short survey we present an overview of sense-annotated corpora, annotated either manually- or (semi)automatically, that are currently available for different languages and featuring distinct lexical resources as inventory of senses, i.e. WordNet, Wikipedia, BabelNet. Furthermore, we provide the reader with general statistics of each dataset and an analysis of their specific features. 2020.lrec-1.706 @@ -8654,7 +8654,7 @@ SalvadorLima Lopez NaiaraPerez MontseCuadros - GermanRigau + GermanRigau 5772–5781 This paper introduces the first version of the NUBes corpus (Negation and Uncertainty annotations in Biomedical texts in Spanish). The corpus is part of an on-going research and currently consists of 29,682 sentences obtained from anonymised health records annotated with negation and uncertainty. The article includes an exhaustive comparison with similar corpora in Spanish, and presents the main annotation and design decisions. Additionally, we perform preliminary experiments using deep learning algorithms to validate the annotated dataset. As far as we know, NUBes is the largest available corpora for negation in Spanish and the first that also incorporates the annotation of speculation cues, scopes, and events. 2020.lrec-1.708 @@ -8665,7 +8665,7 @@ Decomposing and Comparing Meaning Relations: Paraphrasing, Textual Entailment, Contradiction, and Specificity VenelinKovatchev DarinaGold - M. AntoniaMarti + M. AntoniaMarti MariaSalamo TorstenZesch 5782–5791 @@ -8677,8 +8677,8 @@ Object Naming in Language and Vision: A Survey and a New Dataset CarinaSilberer - SinaZarrieß - GemmaBoleda + SinaZarrieß + GemmaBoleda 5792–5801 People choose particular names for objects, such as dog or puppy for a given dog. Object naming has been studied in Psycholinguistics, but has received relatively little attention in Computational Linguistics. We review resources from Language and Vision that could be used to study object naming on a large scale, discuss their shortcomings, and create a new dataset that affords more opportunities for analysis and modeling. Our dataset, ManyNames, provides 36 name annotations for each of 25K objects in images selected from VisualGenome. We highlight the challenges involved and provide a preliminary analysis of the ManyNames data, showing that there is a high level of agreement in naming, on average. At the same time, the average number of name types associated with an object is much higher in our dataset than in existing corpora for Language and Vision, such that ManyNames provides a rich resource for studying phenomena like hierarchical variation (chihuahua vs. dog), which has been discussed at length in the theoretical literature, and other less well studied phenomena like cross-classification (cake vs. dessert). 2020.lrec-1.710 @@ -8701,7 +8701,7 @@ Figure Me Out: A Gold Standard Dataset for Metaphor Interpretation OmniaZayed - John PhilipMcCrae + John PhilipMcCrae PaulBuitelaar 5810–5819 Metaphor comprehension and understanding is a complex cognitive task that requires interpreting metaphors by grasping the interaction between the meaning of their target and source concepts. This is very challenging for humans, let alone computers. Thus, automatic metaphor interpretation is understudied in part due to the lack of publicly available datasets. The creation and manual annotation of such datasets is a demanding task which requires huge cognitive effort and time. Moreover, there will always be a question of accuracy and consistency of the annotated data due to the subjective nature of the problem. This work addresses these issues by presenting an annotation scheme to interpret verb-noun metaphoric expressions in text. The proposed approach is designed with the goal of reducing the workload on annotators and maintain consistency. Our methodology employs an automatic retrieval approach which utilises external lexical resources, word embeddings and semantic similarity to generate possible interpretations of identified metaphors in order to enable quick and accurate annotation. We validate our proposed approach by annotating around 1,500 metaphors in tweets which were annotated by six native English speakers. As a result of this work, we publish as linked data the first gold standard dataset for metaphor interpretation which will facilitate research in this area. @@ -8750,7 +8750,7 @@ Word Attribute Prediction Enhanced by Lexical Entailment Tasks MikaHasegawa TetsunoriKobayashi - YoshihikoHayashi + YoshihikoHayashi 5846–5854 Human semantic knowledge about concepts acquired through perceptual inputs and daily experiences can be expressed as a bundle of attributes. Unlike the conventional distributed word representations that are purely induced from a text corpus, a semantic attribute is associated with a designated dimension in attribute-based vector representations. Thus, semantic attribute vectors can effectively capture the commonalities and differences among concepts. However, as semantic attributes have been generally created by psychological experimental settings involving human annotators, an automatic method to create or extend such resources is highly demanded in terms of language resource development and maintenance. This study proposes a two-stage neural network architecture, Word2Attr, in which initially acquired attribute representations are then fine-tuned by employing supervised lexical entailment tasks. The quantitative empirical results demonstrated that the fine-tuning was indeed effective in improving the performances of semantic/visual similarity/relatedness evaluation tasks. Although the qualitative analysis confirmed that the proposed method could often discover valid but not-yet human-annotated attributes, they also exposed future issues to be worked: we should refine the inventory of semantic attributes that currently relies on an existing dataset. 2020.lrec-1.716 @@ -8764,7 +8764,7 @@ JuliaBonn ArchnaBhatia ZhengCai - MarthaPalmer + MarthaPalmer DanRoth 5855–5864 Spatial Reasoning from language is essential for natural language understanding. Supporting it requires a representation scheme that can capture spatial phenomena encountered in language as well as in images and videos. Existing spatial representations are not sufficient for describing spatial configurations used in complex tasks. This paper extends the capabilities of existing spatial representation languages and increases coverage of the semantic aspects that are needed to ground spatial meaning of natural language text in the world. Our spatial relation language is able to represent a large, comprehensive set of spatial concepts crucial for reasoning and is designed to support composition of static and dynamic spatial configurations. We integrate this language with the Abstract Meaning Representation (AMR) annotation schema and present a corpus annotated by this extended AMR. To exhibit the applicability of our representation scheme, we annotate text taken from diverse datasets and show how we extend the capabilities of existing spatial representation languages with fine-grained decomposition of semantics and blend it seamlessly with AMRs of sentences and discourse representations as a whole. @@ -8785,7 +8785,7 @@ Are White Ravens Ever White? - Non-Literal Adjective-Noun Phrases in <fixed-case>P</fixed-case>olish AgnieszkaMykowiecka - MalgorzataMarciniak + MalgorzataMarciniak 5871–5877 In the paper we describe two resources of Polish data focused on literal and metaphorical meanings of adjective-noun phrases. The first one is FigAN and consists of isolated phrases which are divided into three types: phrases with only literal meaning, with only metaphorical meaning, and phrases which can be interpreted as literal or metaphorical ones depending on a context of use. The second data is the FigSen corpus which consists of 1833 short fragments of texts containing at least one phrase from the FigAN data which may have both meanings. The corpus is annotated in two ways. One approach concerns annotation of all adjective-noun phrases. In the second approach, literal or metaphorical senses are assigned to all adjectives and nouns in the data. The paper addresses statistics of data and compares two types of annotation. The corpora were used in experiments of automatic recognition of Polish non-literal adjective noun phrases. 2020.lrec-1.719 @@ -8823,8 +8823,8 @@ Automatic Compilation of Resources for Academic Writing and Evaluating with Informal Word Identification and Paraphrasing System Seid MuhieYimam GopalakrishnanVenkatesh - JohnLee - ChrisBiemann + JohnLee + ChrisBiemann 5896–5904 We present the first approach to automatically building resources for academic writing. The aim is to build a writing aid system that automatically edits a text so that it better adheres to the academic style of writing. On top of existing academic resources, such as the Corpus of Contemporary American English (COCA) academic Word List, the New Academic Word List, and the Academic Collocation List, we also explore how to dynamically build such resources that would be used to automatically identify informal or non-academic words or phrases. The resources are compiled using different generic approaches that can be extended for different domains and languages. We describe the evaluation of resources with a system implementation. The system consists of an informal word identification (IWI), academic candidate paraphrase generation, and paraphrase ranking components. To generate candidates and rank them in context, we have used the PPDB and WordNet paraphrase resources. We use the Concepts in Context (CoInCO) “All-Words” lexical substitution dataset both for the informal word identification and paraphrase generation experiments. Our informal word identification component achieves an F-1 score of 82%, significantly outperforming a stratified classifier baseline. The main contribution of this work is a domain-independent methodology to build targeted resources for writing aids. 2020.lrec-1.722 @@ -8848,8 +8848,8 @@ PaulineHaas RichardHuyghe DelphineTribout - MarieCandito - BenoitCrabbé + MarieCandito + BenoitCrabbé VincentSegonne 5912–5918 French, as many languages, lacks semantically annotated corpus data. Our aim is to provide the linguistic and NLP research communities with a gold standard sense-annotated corpus of French, using WordNet Unique Beginners as semantic tags, thus allowing for interoperability. In this paper, we report on the first phase of the project, which focused on the annotation of common nouns. The resulting dataset consists of more than 12,000 French noun occurrences which were annotated in double blind and adjudicated according to a carefully redefined set of supersenses. The resource is released online under a Creative Commons Licence. @@ -8860,7 +8860,7 @@ A Formal Analysis of Multimodal Referring Strategies Under Common Ground NikhilKrishnaswamy - JamesPustejovsky + JamesPustejovsky 5919–5927 In this paper, we present an analysis of computationally generated mixed-modality definite referring expressions using combinations of gesture and linguistic descriptions. In doing so, we expose some striking formal semantic properties of the interactions between gesture and language, conditioned on the introduction of content into the common ground between the (computational) speaker and (human) viewer, and demonstrate how these formal features can contribute to training better models to predict viewer judgment of referring expressions, and potentially to the generation of more natural and informative referring expressions. 2020.lrec-1.725 @@ -8870,7 +8870,7 @@ Improving Neural Metaphor Detection with Visual Datasets GititKehat - JamesPustejovsky + JamesPustejovsky 5928–5933 We present new results on Metaphor Detection by using text from visual datasets. Using a straightforward technique for sampling text from Vision-Language datasets, we create a data structure we term a visibility word embedding. We then combine these embeddings in a relatively simple BiLSTM module augmented with contextualized word representations (ELMo), and show improvement over previous state-of-the-art approaches that use more complex neural network architectures and richer linguistic features, for the task of verb classification. 2020.lrec-1.726 @@ -8896,8 +8896,8 @@ DmitryUstalov AndreyKutuzov EkaterinaArtemova - ChrisBiemann - Simone PaoloPonzetto + ChrisBiemann + Simone PaoloPonzetto AlexanderPanchenko 5943–5952 Disambiguation of word senses in context is easy for humans, but is a major challenge for automatic approaches. Sophisticated supervised and knowledge-based models were developed to solve this task. However, (i) the inherent Zipfian distribution of supervised training instances for a given word and/or (ii) the quality of linguistic knowledge representations motivate the development of completely unsupervised and knowledge-free approaches to word sense disambiguation (WSD). They are particularly useful for under-resourced languages which do not have any resources for building either supervised and/or knowledge-based models. In this paper, we present a method that takes as input a standard pre-trained word embedding model and induces a fully-fledged word sense inventory, which can be used for disambiguation in context. We use this method to induce a collection of sense inventories for 158 languages on the basis of the original pre-trained fastText word embeddings by Grave et al., (2018), enabling WSD in these languages. Models and system are available online. @@ -8909,7 +8909,7 @@ Extraction of Hyponymic Relations in <fixed-case>F</fixed-case>rench with Knowledge-Pattern-Based Word Sketches AntonioSan Martín CatherineTrekker - PilarLeón-Araúz + PilarLeón-Araúz 5953–5961 Hyponymy is the cornerstone of taxonomies and concept hierarchies. However, the extraction of hypernym-hyponym pairs from a corpus can be time-consuming, and reconstructing the hierarchical network of a domain is often an extremely complex process. This paper presents the development and evaluation of the French EcoLexicon Semantic Sketch Grammar (ESSG-fr), a French hyponymic sketch grammar for Sketch Engine based on knowledge patterns. It offers a user-friendly way of extracting hyponymic pairs in the form of word sketches in any user-owned corpus. The ESSG-fr contains three times more hyponymic patterns than its English counterpart and has been tested in a multidisciplinary corpus. It is thus expected to be domain-independent. Moreover, the following methodological innovations have been included in its development: (1) use of English hyponymic patterns in a parallel corpus to find new French patterns; (2) automatic inclusion of the results of the Sketch Engine thesaurus to find new variants of the patterns. As for its evaluation, the ESSG-fr returns 70% valid hyperonyms and hyponyms, measured on 180 extracted pairs of terms in three different domains. 2020.lrec-1.729 @@ -8940,7 +8940,7 @@ One Classifier for All Ambiguous Words: Overcoming Data Sparsity by Utilizing Sense Correlations Across Words - Prafulla KumarChoubey + Prafulla KumarChoubey RuihongHuang 5978–5985 Most supervised word sense disambiguation (WSD) systems build word-specific classifiers by leveraging labeled data. However, when using word-specific classifiers, the sparseness of annotations leads to inferior sense disambiguation performance on less frequently seen words. To combat data sparsity, we propose to learn a single model that derives sense representations and meanwhile enforces congruence between a word instance and its right sense by using both sense-annotated data and lexical resources. The model is shared across words that allows utilizing sense correlations across words, and therefore helps to transfer common disambiguation rules from annotation-rich words to annotation-lean words. Empirical evaluation on benchmark datasets shows that the proposed shared model outperforms the equivalent classifier-based models by 1.7%, 2.5% and 3.8% in F1-score when using GloVe, ELMo and BERT word embeddings respectively. @@ -8964,9 +8964,9 @@ The <fixed-case>R</fixed-case>ussian <fixed-case>P</fixed-case>rop<fixed-case>B</fixed-case>ank - SarahMoeller + SarahMoeller IrinaWagner - MarthaPalmer + MarthaPalmer KathrynConger SkatjeMyers 5995–6002 @@ -9011,7 +9011,7 @@ Annotating a Fable in <fixed-case>I</fixed-case>talian <fixed-case>S</fixed-case>ign <fixed-case>L</fixed-case>anguage (<fixed-case>LIS</fixed-case>) SerenaTrolvi - RodolfoDelmonte + RodolfoDelmonte 6025–6034 This paper introduces work carried out for the automatic generation of a written text in Italian starting from glosses of a fable in Italian Sign Language (LIS). The paper gives a brief overview of sign languages (SLs) and some peculiarities of SL fables such as the use of space, the strategy of Role Shift and classifiers. It also presents the annotation of the fable “The Tortoise and the Hare” - signed in LIS and made available by Alba Cooperativa Sociale -, which was annotated manually by first author for her master’s thesis. The annotation was the starting point of a generation process that allowed us to automatically generate a text in Italian starting from LIS glosses. LIS sentences have been transcribed with Italian words into tables on simultaneous layers, each of which contains specific linguistic or non-linguistic pieces of information. In addition, the present work discusses problems encountered in the annotation and generation process. 2020.lrec-1.738 @@ -9021,7 +9021,7 @@ <fixed-case>H</fixed-case>am<fixed-case>N</fixed-case>o<fixed-case>S</fixed-case>y<fixed-case>S</fixed-case>2<fixed-case>S</fixed-case>i<fixed-case>GML</fixed-case>: Translating <fixed-case>H</fixed-case>am<fixed-case>N</fixed-case>o<fixed-case>S</fixed-case>ys Into <fixed-case>S</fixed-case>i<fixed-case>GML</fixed-case> CarolinaNeves - LuísaCoheur + LuísaCoheur HugoNicolau 6035–6039 Sign Languages are visual languages and the main means of communication used by Deaf people. However, the majority of the information available online is presented through written form. Hence, it is not of easy access to the Deaf community. Avatars that can animate sign languages have gained an increase of interest in this area due to their flexibility in the process of generation and edition. Synthetic animation of conversational agents can be achieved through the use of notation systems. HamNoSys is one of these systems, which describes movements of the body through symbols. Its XML-compliant, SiGML, is a machine-readable input of HamNoSys able to animate avatars. Nevertheless, current tools have no freely available open source libraries that allow the conversion from HamNoSys to SiGML. Our goal is to develop a tool of open access, which can perform this conversion independently from other platforms. This system represents a crucial intermediate step in the bigger pipeline of animating signing avatars. Two cases studies are described in order to illustrate different applications of our tool. @@ -9032,7 +9032,7 @@ <fixed-case>D</fixed-case>icta-<fixed-case>S</fixed-case>ign-<fixed-case>LSF</fixed-case>-v2: Remake of a Continuous <fixed-case>F</fixed-case>rench <fixed-case>S</fixed-case>ign <fixed-case>L</fixed-case>anguage Dialogue Corpus and a First Baseline for Automatic Sign Language Processing ValentinBelissen - AnneliesBraffort + AnneliesBraffort MichèleGouiffès 6040–6048 While the research in automatic Sign Language Processing (SLP) is growing, it has been almost exclusively focused on recognizing lexical signs, whether isolated or within continuous SL production. However, Sign Languages include many other gestural units like iconic structures, which need to be recognized in order to go towards a true SL understanding. In this paper, we propose a newer version of the publicly available SL corpus Dicta-Sign, limited to its French Sign Language part. Involving 16 different signers, this dialogue corpus was produced with very few constraints on the style and content. It includes lexical and non-lexical annotations over 11 hours of video recording, with 35000 manual units. With the aim of stimulating research in SL understanding, we also provide a baseline for the recognition of lexical signs and non-lexical structures on this corpus. A very compact modeling of a signer is built and a Convolutional-Recurrent Neural Network is trained and tested on Dicta-Sign-LSF-v2, with state-of-the-art results, including the ability to detect iconicity in SL production. @@ -9064,7 +9064,7 @@ <fixed-case>MEDIAPI</fixed-case>-<fixed-case>SKEL</fixed-case> - A 2<fixed-case>D</fixed-case>-Skeleton Video Database of <fixed-case>F</fixed-case>rench <fixed-case>S</fixed-case>ign <fixed-case>L</fixed-case>anguage With Aligned <fixed-case>F</fixed-case>rench Subtitles HannahBull - AnneliesBraffort + AnneliesBraffort MichèleGouiffès 6063–6068 This paper presents MEDIAPI-SKEL, a 2D-skeleton database of French Sign Language videos aligned with French subtitles. The corpus contains 27 hours of video of body, face and hand keypoints, aligned to subtitles with a vocabulary size of 17k tokens. In contrast to existing sign language corpora such as videos produced under laboratory conditions or translations of TV programs into sign language, this database is constructed using original sign language content largely produced by deaf journalists at the media company Média-Pi. Moreover, the videos are accurately synchronized with French subtitles. We propose three challenges appropriate for this corpus that are related to processing units of signs in context: automatic alignment of text and video, semantic segmentation of sign language, and production of video-text embeddings for cross-modal retrieval. These challenges deviate from the classic task of identifying a limited number of lexical signs in a video stream. @@ -9125,7 +9125,7 @@ JieGao SoojiHan XingyiSong - FabioCiravegna + FabioCiravegna 6094–6105 Early rumor detection (ERD) on social media platform is very challenging when limited, incomplete and noisy information is available. Most of the existing methods have largely worked on event-level detection that requires the collection of posts relevant to a specific event and relied only on user-generated content. They are not appropriate to detect rumor sources in the very early stages, before an event unfolds and becomes widespread. In this paper, we address the task of ERD at the message level. We present a novel hybrid neural network architecture, which combines a task-specific character-based bidirectional language model and stacked Long Short-Term Memory (LSTM) networks to represent textual contents and social-temporal contexts of input source tweets, for modelling propagation patterns of rumors in the early stages of their development. We apply multi-layered attention models to jointly learn attentive context embeddings over multiple context inputs. Our experiments employ a stringent leave-one-out cross-validation (LOO-CV) evaluation setup on seven publicly available real-life rumor event data sets. Our models achieve state-of-the-art(SoA) performance for detecting unseen rumors on large augmented data which covers more than 12 events and 2,967 rumors. An ablation study is conducted to understand the relative contribution of each component of our proposed model. 2020.lrec-1.748 @@ -9147,7 +9147,7 @@ Searching <fixed-case>B</fixed-case>razilian <fixed-case>T</fixed-case>witter for Signs of Mental Health Issues WesleySantos AmandaFunabashi - IvandréParaboni + IvandréParaboni 6111–6117 Depression and related mental health issues are often reflected in the language employed by the individuals who suffer from these conditions and, accordingly, research in Natural Language Processing (NLP) and related fields have developed an increasing number of studies devoted to their recognition in social media text. Some of these studies have also attempted to go beyond recognition by focusing on the early signs of these illnesses, and by analysing the users’ publication history over time to potentially prevent further harm. The two kinds of study are of course overlapping, and often make use of supervised machine learning methods based on annotated corpora. However, as in many other fields, existing resources are largely devoted to English NLP, and there is little support for these studies in under resourced languages. To bridge this gap, in this paper we describe the initial steps towards building a novel resource of this kind - a corpus intended to support both the recognition of mental health issues and the temporal analysis of these illnesses - in the Brazilian Portuguese language, and initial results of a number of experiments in text classification addressing both tasks. 2020.lrec-1.750 @@ -9168,7 +9168,7 @@ An Annotated Social Media Corpus for <fixed-case>G</fixed-case>erman - EckhardBick + EckhardBick 6127–6135 This paper presents the German Twitter section of a large (2 billion word) bilingual Social Media corpus for Hate Speech research, discussing the compilation, pseudonymization and grammatical annotation of the corpus, as well as special linguistic features and peculiarities encountered in the data. Among other things, compounding, accidental and intentional orthographic variation, gendering and the use of emoticons/emojis are addressed in a genre-specific fashion. We present the different layers of linguistic annotation (morphosyntactic, dependencies and semantic types) and explain how a general parser (GerGram) can be made to work on Social Media data, pointing out necessary adaptations and extensions. In an evaluation run on a random cross-section of tweets, the modified parser achieved F-scores of 97% for morphology (fine-grained POS) and 92% for syntax (labeled attachment score). Predictably, performance was twice as good in tweets with standard orthography than in tweets with spelling/casing irregularities or lack of sentence separation, the effect being more marked for morphology than for syntax. 2020.lrec-1.752 @@ -9213,7 +9213,7 @@ Optimising <fixed-case>T</fixed-case>witter-based Political Election Prediction with Relevance and<fixed-case>S</fixed-case>entiment Filters EricSanders - Antalvan den Bosch + Antalvan den Bosch 6158–6165 We study the relation between the number of mentions of political parties in the last weeks before the elections and the election results. In this paper we focus on the Dutch elections of the parliament in 2012 and for the provinces (and the senate) in 2011 and 2015. With raw counts, without adaptations, we achieve a mean absolute error (MAE) of 2.71% for 2011, 2.02% for 2012 and 2.89% for 2015. A set of over 17,000 tweets containing political party names were annotated by at least three annotators per tweet on ten features denoting communicative intent (including the presence of sarcasm, the message’s polarity, the presence of an explicit voting endorsement or explicit voting advice, etc.). The annotations were used to create oracle (gold-standard) filters. Tweets with or without a certain majority annotation are held out from the tweet counts, with the goal of attaining lower MAEs. With a grid search we tested all combinations of filters and their responding MAE to find the best filter ensemble. It appeared that the filters show markedly different behaviour for the three elections and only a small MAE improvement is possible when optimizing on all three elections. Larger improvements for one election are possible, but result in deterioration of the MAE for the other elections. 2020.lrec-1.756 @@ -9223,7 +9223,7 @@ A Real-Time System for Credibility on <fixed-case>T</fixed-case>witter AdrianIftene - DanielaGifu + DanielaGifu Andrei-RemusMiron Mihai-StefanDudu 6166–6173 @@ -9392,7 +9392,7 @@ Small Town or Metropolis? Analyzing the Relationship between Population Size and Language AmyRechkemmer StevenWilson - RadaMihalcea + RadaMihalcea 6287–6291 The variance in language used by different cultures has been a topic of study for researchers in linguistics and psychology, but often times, language is compared across multiple countries in order to show a difference in culture. As a geographically large country that is diverse in population in terms of the background and experiences of its citizens, the U.S. also contains cultural differences within its own borders. Using a set of over 2 million posts from distinct Twitter users around the country dating back as far as 2014, we ask the following question: is there a difference in how Americans express themselves online depending on whether they reside in an urban or rural area? We categorize Twitter users as either urban or rural and identify ideas and language that are more commonly expressed in tweets written by one population over the other. We take this further by analyzing how the language from specific cities of the U.S. compares to the language of other cities and by training predictive models to predict whether a user is from an urban or rural area. We publicly release the tweet and user IDs that can be used to reconstruct the dataset for future studies in this direction. 2020.lrec-1.771 @@ -9403,7 +9403,7 @@ Inferring Social Media Users’ Mental Health Status from Multimodal Information ZhentaoXu VerónicaPérez-Rosas - RadaMihalcea + RadaMihalcea 6292–6299 Worldwide, an increasing number of people are suffering from mental health disorders such as depression and anxiety. In the United States alone, one in every four adults suffers from a mental health condition, which makes mental health a pressing concern. In this paper, we explore the use of multimodal cues present in social media posts to predict users’ mental health status. Specifically, we focus on identifying social media activity that either indicates a mental health condition or its onset. We collect posts from Flickr and apply a multimodal approach that consists of jointly analyzing language, visual, and metadata cues and their relation to mental health. We conduct several classification experiments aiming to discriminate between (1) healthy users and users affected by a mental health illness; and (2) healthy users and users prone to mental illness. Our experimental results indicate that using multiple modalities can improve the performance of this classification task as compared to the use of one modality at a time, and can provide important cues into a user’s mental status. 2020.lrec-1.772 @@ -9477,7 +9477,7 @@ Evaluating and Improving Child-Directed Automatic Speech Recognition EricBooth JakeCarns - CaseyKennington + CaseyKennington NaderRafla 6340–6345 Speech recognition has seen dramatic improvements in the last decade, though those improvements have focused primarily on adult speech. In this paper, we assess child-directed speech recognition and leverage a transfer learning approach to improve child-directed speech recognition by training the recent DeepSpeech2 model on adult data, then apply additional tuning to varied amounts of child speech data. We evaluate our model using the CMU Kids dataset as well as our own recordings of child-directed prompts. The results from our experiment show that even a small amount of child audio data improves significantly over a baseline of adult-only or child-only trained models. We report a final general Word-Error-Rate of 29% over a baseline of 62% that uses the adult-trained model. Our analyses show that our model adapts quickly using a small amount of data and that the general child model works better than school grade-specific models. We make available our trained model and our data collection tool. @@ -9502,7 +9502,7 @@ OliverWalter ChristophSchmidt SvenBehnke - JoachimKöhler + JoachimKöhler 6354–6362 While recent automatic speech recognition systems achieve remarkable performance when large amounts of adequate, high quality annotated speech data is used for training, the same systems often only achieve an unsatisfactory result for tasks in domains that greatly deviate from the conditions represented by the training data. For many real-world applications, there is a lack of sufficient data that can be directly used for training robust speech recognition systems. To address this issue, we propose and investigate an approach that performs a robust acoustic model adaption to a target domain in a cross-lingual, multi-staged manner. Our approach enables the exploitation of large-scale training data from other domains in both the same and other languages. We evaluate our approach using the challenging task of German oral history interviews, where we achieve a relative reduction of the word error rate by more than 30% compared to a model trained from scratch only on the target domain, and 6-7% relative compared to a model trained robustly on 1000 hours of same-language out-of-domain training data. 2020.lrec-1.780 @@ -9513,7 +9513,7 @@ Large Corpus of <fixed-case>C</fixed-case>zech Parliament Plenary Hearings JonášKratochvil PeterPolák - OndřejBojar + OndřejBojar 6363–6367 We present a large corpus of Czech parliament plenary sessions. The corpus consists of approximately 1200 hours of speech data and corresponding text transcriptions. The whole corpus has been segmented to short audio segments making it suitable for both training and evaluation of automatic speech recognition (ASR) systems. The source language of the corpus is Czech, which makes it a valuable resource for future research as only a few public datasets are available in the Czech language. We complement the data release with experiments of two baseline ASR systems trained on the presented data: the more traditional approach implemented in the Kaldi ASRtoolkit which combines hidden Markov models and deep neural networks (NN) and a modern ASR architecture implemented in Jaspertoolkit which uses deep NNs in an end-to-end fashion. 2020.lrec-1.781 @@ -9549,7 +9549,7 @@ AlbertGatt AndreaDeMarco ClaudiaBorg - Lonnekevan der Plas + Lonnekevan der Plas AmandaMuscat IanPadovani 6381–6388 @@ -9562,7 +9562,7 @@ Automatic Period Segmentation of Oral <fixed-case>F</fixed-case>rench NataliaKalashnikova LoïcGrobol - IrisEshkol-Taravella + IrisEshkol-Taravella FrançoisDelafontaine 6389–6394 Natural Language Processing in oral speech segmentation is still looking for a minimal unit to analyze. In this work, we present a comparison of two automatic segmentation methods of macro-syntactic periods which allows to take into account syntactic and prosodic components of speech. We compare the performances of an existing tool Analor (Avanzi, Lacheret-Dujour, Victorri, 2008) developed for automatic segmentation of prosodic periods and of CRF models relying on syntactic and / or prosodic features. We find that Analor tends to divide speech into smaller segments and that CRF models detect larger segments rather than macro-syntactic periods. However, in general CRF models perform better results than Analor in terms of F-measure. @@ -9671,9 +9671,9 @@ The <fixed-case>SAFE</fixed-case>-<fixed-case>T</fixed-case> Corpus: A New Resource for Simulated Public Safety Communications DanaDelgado KevinWalker - StephanieStrassel - KarenJones - ChristopherCaruso + StephanieStrassel + KarenJones + ChristopherCaruso DavidGraff 6450–6457 We introduce a new resource, the SAFE-T (Speech Analysis for Emergency Response Technology) Corpus, designed to simulate first-responder communications by inducing high vocal effort and urgent speech with situational background noise in a game-based collection protocol. Linguistic Data Consortium developed the SAFE-T Corpus to support the NIST (National Institute of Standards and Technology) OpenSAT (Speech Analytic Technologies) evaluation series, whose goal is to advance speech analytic technologies including automatic speech recognition, speech activity detection and keyword search in multiple domains including simulated public safety communications data. The corpus comprises over 300 hours of audio from 115 unique speakers engaged in a collaborative problem-solving activity representative of public safety communications in terms of speech content, noise types and noise levels. Portions of the corpus have been used in the OpenSAT 2019 evaluation and the full corpus will be published in the LDC catalog. We describe the design and implementation of the SAFE-T Corpus collection, discuss the approach of capturing spontaneous speech from study participants through game-based speech collection, and report on the collection results including several challenges associated with the collection. @@ -9711,7 +9711,7 @@ KallirroiGeorgila AntonLeuski VolodymyrYanov - DavidTraum + DavidTraum 6469–6476 We evaluate several publicly available off-the-shelf (commercial and research) automatic speech recognition (ASR) systems across diverse dialogue domains (in US-English). Our evaluation is aimed at non-experts with limited experience in speech recognition. Our goal is not only to compare a variety of ASR systems on several diverse data sets but also to measure how much ASR technology has advanced since our previous large-scale evaluations on the same data sets. Our results show that the performance of each speech recognizer can vary significantly depending on the domain. Furthermore, despite major recent progress in ASR technology, current state-of-the-art speech recognizers perform poorly in domains that require special vocabulary and language models, and under noisy conditions. We expect that our evaluation will prove useful to ASR consumers and dialogue system designers. 2020.lrec-1.797 @@ -9721,7 +9721,7 @@ <fixed-case>CEASR</fixed-case>: A Corpus for Evaluating Automatic Speech Recognition Malgorzata AnnaUlasik - ManuelaHürlimann + ManuelaHürlimann FabianGermann EsinGedik FernandoBenites @@ -9738,7 +9738,7 @@ WilliamHavard MahaultGarnerin ÉricLe Ferrand - LaurentBesacier + LaurentBesacier 6486–6493 The CMU Wilderness Multilingual Speech Dataset (Black, 2019) is a newly published multilingual speech dataset based on recorded readings of the New Testament. It provides data to build Automatic Speech Recognition (ASR) and Text-to-Speech (TTS) models for potentially 700 languages. However, the fact that the source content (the Bible) is the same for all the languages is not exploited to date. Therefore, this article proposes to add multilingual links between speech segments in different languages, and shares a large and clean dataset of 8,130 parallel spoken utterances across 8 languages (56 language pairs). We name this corpus MaSS (Multilingual corpus of Sentence-aligned Spoken utterances). The covered languages (Basque, English, Finnish, French, Hungarian, Romanian, Russian and Spanish) allow researches on speech-to-speech alignment as well as on translation for typologically different language pairs. The quality of the final corpus is attested by human evaluation performed on a corpus subset (100 utterances, 8 language pairs). Lastly, we showcase the usefulness of the final product on a bilingual speech retrieval task. 2020.lrec-1.799 @@ -9753,7 +9753,7 @@ ClaraRivera AnnaKatanova AlexanderGutkin - IsinDemirsahin + IsinDemirsahin CibuJohny MartinJansche SupheakmungkolSarin @@ -9767,7 +9767,7 @@ Crowdsourcing <fixed-case>L</fixed-case>atin <fixed-case>A</fixed-case>merican <fixed-case>S</fixed-case>panish for Low-Resource Text-to-Speech AdrianaGuevara-Rukoz - IsinDemirsahin + IsinDemirsahin FeiHe Shan-Hui CathyChu SupheakmungkolSarin @@ -9805,7 +9805,7 @@ Open-source Multi-speaker Corpora of the <fixed-case>E</fixed-case>nglish Accents in the <fixed-case>B</fixed-case>ritish Isles - IsinDemirsahin + IsinDemirsahin OddurKjartansson AlexanderGutkin ClaraRivera @@ -9922,7 +9922,7 @@ Gender Representation in Open Source Speech Resources MahaultGarnerin SolangeRossato - LaurentBesacier + LaurentBesacier 6599–6605 With the rise of artificial intelligence (AI) and the growing use of deep-learning architectures, the question of ethics, transparency and fairness of AI systems has become a central concern within the research community. We address transparency and fairness in spoken language systems by proposing a study about gender representation in speech resources available through the Open Speech and Language Resource platform. We show that finding gender information in open source corpora is not straightforward and that gender balance depends on other corpus characteristics (elicited/non elicited speech, low/high resource language, speech task targeted). The paper ends with recommendations about metadata and gender information for researchers in order to assure better transparency of the speech systems built using such corpora. 2020.lrec-1.813 @@ -9954,8 +9954,8 @@ Call My Net 2: A New Resource for Speaker Recognition - KarenJones - StephanieStrassel + KarenJones + StephanieStrassel KevinWalker JonathanWright 6621–6626 @@ -9968,8 +9968,8 @@ <fixed-case>D</fixed-case>a<fixed-case>CT</fixed-case>o<fixed-case>R</fixed-case>: A Data Collection Tool for the <fixed-case>RELATER</fixed-case> Project JuanHussain OussamaZenkri - SebastianStüker - AlexWaibel + SebastianStüker + AlexWaibel 6627–6632 Collecting domain-specific data for under-resourced languages, e.g., dialects of languages, can be very expensive, potentially financially prohibitive and taking long time. Moreover, in the case of rarely written languages, the normalization of non-canonical transcription might be another time consuming but necessary task. In order to collect domain-specific data in such circumstances in a time and cost-efficient way, collecting read data of pre-prepared texts is often a viable option. In order to collect data in the domain of psychiatric diagnosis in Arabic dialects for the project RELATER, we have prepared the data collection tool DaCToR for collecting read texts by speakers in the respective countries and districts in which the dialects are spoken. In this paper we describe our tool, its purpose within the project RELATER and the dialects which we have started to collect with the tool. 2020.lrec-1.817 @@ -9979,8 +9979,8 @@ Development and Evaluation of Speech Synthesis Corpora for <fixed-case>L</fixed-case>atvian RobertsDarģis - PeterisPaikens - NormundsGruzitis + PeterisPaikens + NormundsGruzitis IlzeAuzina AgateAkmane 6633–6637 @@ -9991,7 +9991,7 @@ Abstractive Document Summarization without Parallel Data - Nikola I.Nikolov + Nikola I.Nikolov RichardHahnloser 6638–6644 Abstractive summarization typically relies on large collections of paired articles and summaries. However, in many cases, parallel data is scarce and costly to obtain. We develop an abstractive summarization system that relies only on large collections of example summaries and non-matching articles. Our approach consists of an unsupervised sentence extractor that selects salient sentences to include in the final summary, as well as a sentence abstractor that is trained on pseudo-parallel and synthetic data, that paraphrases each of the extracted sentences. We perform an extensive evaluation of our method: on the CNN/DailyMail benchmark, on which we compare our approach to fully supervised baselines, as well as on the novel task of automatically generating a press release from a scientific journal article, which is well suited for our system. We show promising performance on both tasks, without relying on any article-summary pairs. @@ -10031,7 +10031,7 @@ Two Huge Title and Keyword Generation Corpora of Research Articles ErionÇano - OndřejBojar + OndřejBojar 6663–6671 Recent developments in sequence-to-sequence learning with neural networks have considerably improved the quality of automatically generated text summaries and document keywords, stipulating the need for even bigger training corpora. Metadata of research articles are usually easy to find online and can be used to perform research on various tasks. In this paper, we introduce two huge datasets for text summarization (OAGSX) and keyword generation (OAGKX) research, containing 34 million and 23 million records, respectively. The data were retrieved from the Open Academic Graph which is a network of research profiles and publications. We carefully processed each record and also tried several extractive and abstractive methods of both tasks to create performance baselines for other researchers. We further illustrate the performance of those methods previewing their outputs. In the near future, we would like to apply topic modeling on the two sets to derive subsets of research articles from more specific disciplines. 2020.lrec-1.823 @@ -10040,7 +10040,7 @@ A Multi-level Annotated Corpus of Scientific Papers for Scientific Document Summarization and Cross-document Relation Discovery - AhmedAbuRa’ed + AhmedAbuRa’ed HoracioSaggion LuisChiruzzo 6672–6679 @@ -10052,7 +10052,7 @@ Abstractive Text Summarization based on Language Model Conditioning and Locality Modeling DmitriiAksenov - JulianMoreno-Schneider + JulianMoreno-Schneider PeterBourgonje RobertSchwarzenberg LeonhardHennig @@ -10066,7 +10066,7 @@ A Data Set for the Analysis of Text Quality Dimensions in Summarization Evaluation MargotMieskes - EneldoLoza Mencía + EneldoLoza Mencía TimKronsbein 6690–6699 Automatic evaluation of summarization focuses on developing a metric to represent the quality of the resulting text. However, text qualityis represented in a variety of dimensions ranging from grammaticality to readability and coherence. In our work, we analyze the depen-dencies between a variety of quality dimensions on automatically created multi-document summaries and which dimensions automaticevaluation metrics such as ROUGE, PEAK or JSD are able to capture. Our results indicate that variants of ROUGE are correlated tovarious quality dimensions and that some automatic summarization methods achieve higher quality summaries than others with respectto individual summary quality dimensions. Our results also indicate that differentiating between quality dimensions facilitates inspectionand fine-grained comparison of summarization methods and its characteristics. We make the data from our two summarization qualityevaluation experiments publicly available in order to facilitate the future development of specialized automatic evaluation methods. @@ -10292,7 +10292,7 @@ <fixed-case>H</fixed-case>ypo<fixed-case>NLI</fixed-case>: Exploring the Artificial Patterns of Hypothesis-only Bias in Natural Language Inference TianyuLiu ZhengXin - BaobaoChang + BaobaoChang ZhifangSui 6852–6860 Many recent studies have shown that for models trained on datasets for natural language inference (NLI), it is possible to make correct predictions by merely looking at the hypothesis while completely ignoring the premise. In this work, we manage to derive adversarial examples in terms of the hypothesis-only bias and explore eligible ways to mitigate such bias. Specifically, we extract various phrases from the hypotheses (artificial patterns) in the training sets, and show that they have been strong indicators to the specific labels. We then figure out ‘hard’ and ‘easy’ instances from the original test sets whose labels are opposite to or consistent with those indications. We also set up baselines including both pretrained models (BERT, RoBerta, XLNet) and competitive non-pretrained models (InferSent, DAM, ESIM). Apart from the benchmark and baselines, we also investigate two debiasing approaches which exploit the artificial pattern modeling to mitigate such hypothesis-only bias: down-sampling and adversarial training. We believe those methods can be treated as competitive baselines in NLI debiasing tasks. @@ -10368,11 +10368,11 @@ Detecting Negation Cues and Scopes in <fixed-case>S</fixed-case>panish - Salud MaríaJiménez-Zafra + Salud MaríaJiménez-Zafra RoserMorante EduardoBlanco María TeresaMartín Valdivia - L. AlfonsoUreña López + L. AlfonsoUreña López 6902–6911 In this work we address the processing of negation in Spanish. We first present a machine learning system that processes negation in Spanish. Specifically, we focus on two tasks: i) negation cue detection and ii) scope identification. The corpus used in the experimental framework is the SFU Corpus. The results for cue detection outperform state-of-the-art results, whereas for scope detection this is the first system that performs the task for Spanish. Moreover, we provide a qualitative error analysis aimed at understanding the limitations of the system and showing which negation cues and scopes are straightforward to predict automatically, and which ones are challenging. 2020.lrec-1.853 @@ -10398,9 +10398,9 @@ MatthewCoole IgnatiusEzeani SherylPrentice - NancyIde + NancyIde JoKnight - ScottPiao + ScottPiao JohnMariani PaulRayson KeithSuderman @@ -10428,7 +10428,7 @@ Jerry AlanFails SolePera KatherineWright - CaseyKennington + CaseyKennington 6937–6946 For help with their spelling errors, children often turn to spellcheckers integrated in software applications like word processors and search engines. However, existing spellcheckers are usually tuned to the needs of traditional users (i.e., adults) and generally prove unsatisfactory for children. Motivated by this issue, we introduce KidSpell, an English spellchecker oriented to the spelling needs of children. KidSpell applies (i) an encoding strategy for mapping both misspelled words and spelling suggestions to their phonetic keys and (ii) a selection process that prioritizes candidate spelling suggestions that closely align with the misspelled word based on their respective keys. To assess the effectiveness of, we compare the model’s performance against several popular, mainstream spellcheckers in a number of offline experiments using existing and novel datasets. The results of these experiments show that KidSpell outperforms existing spellcheckers, as it accurately prioritizes relevant spelling corrections when handling misspellings generated by children in both essay writing and online search tasks. As a byproduct of our study, we create two new datasets comprised of spelling errors generated by children from hand-written essays and web search inquiries, which we make available to the research community. 2020.lrec-1.857 @@ -10439,10 +10439,10 @@ <fixed-case>T</fixed-case>hai<fixed-case>LMC</fixed-case>ut: Unsupervised Pretraining for <fixed-case>T</fixed-case>hai Word Segmentation SuteeraSeeha IvanBilan - LilianaMamani Sanchez + LilianaMamani Sanchez JohannesHuber MichaelMatuschek - HinrichSchütze + HinrichSchütze 6947–6957 We propose ThaiLMCut, a semi-supervised approach for Thai word segmentation which utilizes a bi-directional character language model (LM) as a way to leverage useful linguistic knowledge from unlabeled data. After the language model is trained on substantial unlabeled corpora, the weights of its embedding and recurrent layers are transferred to a supervised word segmentation model which continues fine-tuning them on a word segmentation task. Our experimental results demonstrate that applying the LM always leads to a performance gain, especially when the amount of labeled data is small. In such cases, the F1 Score increased by up to 2.02%. Even on abig labeled dataset, a small improvement gain can still be obtained. The approach has also shown to be very beneficial for out-of-domain settings with a gain in F1 Score of up to 3.13%. Finally, we show that ThaiLMCut can outperform other open source state-of-the-art models achieving an F1 Score of 98.78% on the standard benchmark, InterBEST2009. 2020.lrec-1.858 @@ -10454,7 +10454,7 @@ ReemAlatrash DominikSchlechtweg JonasKuhn - SabineSchulte im Walde + SabineSchulte im Walde 6958–6966 Modelling language change is an increasingly important area of interest within the fields of sociolinguistics and historical linguistics. In recent years, there has been a growing number of publications whose main concern is studying changes that have occurred within the past centuries. The Corpus of Historical American English (COHA) is one of the most commonly used large corpora in diachronic studies in English. This paper describes methods applied to the downloadable version of the COHA corpus in order to overcome its main limitations, such as inconsistent lemmas and malformed tokens, without compromising its qualitative and distributional properties. The resulting corpus CCOHA contains a larger number of cleaned word tokens which can offer better insights into language change and allow for a larger variety of tasks to be performed. 2020.lrec-1.859 @@ -10464,7 +10464,7 @@ Outbound Translation User Interface Ptakopět: A Pilot Study VilémZouhar - OndřejBojar + OndřejBojar 6967–6975 It is not uncommon for Internet users to have to produce a text in a foreign language they have very little knowledge of and are unable to verify the translation quality. We call the task “outbound translation” and explore it by introducing an open-source modular system Ptakopět. Its main purpose is to inspect human interaction with MT systems enhanced with additional subsystems, such as backward translation and quality estimation. We follow up with an experiment on (Czech) human annotators tasked to produce questions in a language they do not speak (German), with the help of Ptakopět. We focus on three real-world use cases (communication with IT support, describing administrative issues and asking encyclopedic questions) from which we gain insight into different strategies users take when faced with outbound translation tasks. Round trip translation is known to be unreliable for evaluating MT systems but our experimental evaluation documents that it works very well for users, at least on MT systems of mid-range quality. 2020.lrec-1.860 @@ -10475,7 +10475,7 @@ <fixed-case>S</fixed-case>eshat: a Tool for Managing and Verifying Annotation Campaigns of Audio Data HadrienTiteux RachidRiad - Xuan-NgaCao + Xuan-NgaCao NicolasHamilakis KrisMadden AlejandrinaCristia @@ -10643,9 +10643,9 @@ YutakaYagi TakenoriNakamura MasayukiAsahara - KikuoMaekawa + KikuoMaekawa ToshinobuOgiso - HanaeKoiso + HanaeKoiso KumikoSakoda NobukoKibe 7077–7083 @@ -10691,7 +10691,7 @@ RamyEskander FrancescaCallejas ElizabethNichols - JudithKlavans + JudithKlavans SmarandaMuresan 7112–7122 Computational morphological segmentation has been an active research topic for decades as it is beneficial for many natural language processing tasks. With the high cost of manually labeling data for morphology and the increasing interest in low-resource languages, unsupervised morphological segmentation has become essential for processing a typologically diverse set of languages, whether high-resource or low-resource. In this paper, we present and release MorphAGram, a publicly available framework for unsupervised morphological segmentation that uses Adaptor Grammars (AG) and is based on the work presented by Eskander et al. (2016). We conduct an extensive quantitative and qualitative evaluation of this framework on 12 languages and show that the framework achieves state-of-the-art results across languages of different typologies (from fusional to polysynthetic and from high-resource to low-resource). @@ -10703,7 +10703,7 @@ <fixed-case>CTAP</fixed-case> for <fixed-case>I</fixed-case>talian: Integrating Components for the Analysis of <fixed-case>I</fixed-case>talian into a Multilingual Linguistic Complexity Analysis Tool NadezdaOkinina Jennifer-CarmenFrey - ZarahWeiss + ZarahWeiss 7123–7131 Linguistic complexity research being a very actively developing field, an increasing number of text analysis tools are created that use natural language processing techniques for the automatic extraction of quantifiable measures of linguistic complexity. While most tools are designed to analyse only one language, the CTAP open source linguistic complexity measurement tool is capable of processing multiple languages, making cross-lingual comparisons possible. Although it was originally developed for English, the architecture has been ex-tended to support multi-lingual analyses. Here we present the Italian component of CTAP, describe its implementation and compare it to the existing linguistic complexity tools for Italian. Offering general text length statistics and features for lexical, syntactic, and morpho-syntactic complexity (including measures of lexical frequency, lexical diversity, lexical and syntactical variation, part-of-speech density), CTAP is currently the most comprehensive linguistic complexity measurement tool for Italian and the only one allowing the comparison of Italian texts to multiple other languages within one tool. 2020.lrec-1.880 @@ -10713,7 +10713,7 @@ Do you Feel Certain about your Annotation? A Web-based Semantic Frame Annotation Tool Considering Annotators’ Concerns and Behaviors ReginaStodden - BehrangQasemiZadeh + BehrangQasemiZadeh LauraKallmeyer 7132–7139 In this system demonstration paper, we present an open-source web-based application with a responsive design for modular semantic frame annotation (SFA). Besides letting experienced and inexperienced users do suggestion-based and slightly-controlled annotations, the system keeps track of the time and changes during the annotation process and stores the users’ confidence with the current annotation. This collected metadata can be used to get insights regarding the difficulty of an annotation with the same type or frame or can be used as an input of an annotation cost measurement for an active learning algorithm. The tool was already used to build a manually annotated corpus with semantic frames and its arguments for task 2 of SemEval 2019 regarding unsupervised lexical frame induction (QasemiZadeh et al., 2019). Although English sentences from the Wall Street Journal corpus of the Penn Treebank were annotated for this task, it is also possible to use the proposed tool for the annotation of sentences in other languages. @@ -10738,7 +10738,7 @@ AndreaCimino FeliceDell’Orletta GiuliaVenturi - SimonettaMontemagni + SimonettaMontemagni 7145–7151 In this paper, we introduce Profiling–UD, a new text analysis tool inspired to the principles of linguistic profiling that can support language variation research from different perspectives. It allows the extraction of more than 130 features, spanning across different levels of linguistic description. Beyond the large number of features that can be monitored, a main novelty of Profiling–UD is that it has been specifically devised to be multilingual since it is based on the Universal Dependencies framework. In the second part of the paper, we demonstrate the effectiveness of these features in a number of theoretical and applicative studies in which they were successfully used for text and author profiling. 2020.lrec-1.883 @@ -10779,7 +10779,7 @@ <fixed-case>C</fixed-case>o<fixed-case>C</fixed-case>o: A Tool for Automatically Assessing Conceptual Complexity of Texts - SanjaStajner + SanjaStajner SergiuNisioi IoanaHulpuș 7179–7186 @@ -10841,7 +10841,7 @@ Contemplata, a Free Platform for Constituency Treebank Annotation - JakubWaszczuk + JakubWaszczuk IlaineWang Jean-YvesAntoine AnaïsHalftermeyer @@ -10856,8 +10856,8 @@ KyeongminRim KelleyLynch MarcVerhagen - NancyIde - JamesPustejovsky + NancyIde + JamesPustejovsky 7230–7237 Promoting interoperrable computational linguistics (CL) and natural language processing (NLP) application platforms and interchange-able data formats have contributed improving discoverabilty and accessbility of the openly available NLP software. In this paper, wediscuss the enhanced data visualization capabilities that are also enabled by inter-operating NLP pipelines and interchange formats. For adding openly available visualization tools and graphical annotation tools to the Language Applications Grid (LAPPS Grid) andComputational Linguistics Applications for Multimedia Services (CLAMS) toolboxes, we have developed interchange formats that cancarry annotations and metadata for text and audiovisual source data. We descibe those data formats and present case studies where wesuccessfully adopt open-source visualization tools and combine them with CL tools. 2020.lrec-1.893 @@ -10886,7 +10886,7 @@ RobsonFidalgo AdrianChifu BernardEspinasse - SébastienFournier + SébastienFournier 7244–7251 Natural Language Processing (NLP) of textual data is usually broken down into a sequence of several subtasks, where the output of one the subtasks becomes the input to the following one, which constitutes an NLP pipeline. Many third-party NLP tools are currently available, each performing distinct NLP subtasks. However, it is difficult to integrate several NLP toolkits into a pipeline due to many problems, including different input/output representations or formats, distinct programming languages, and tokenization issues. This paper presents DeepNLPF, a framework that enables easy integration of third-party NLP tools, allowing the user to preprocess natural language texts at lexical, syntactic, and semantic levels. The proposed framework also provides an API for complete pipeline customization including the definition of input/output formats, integration plugin management, transparent ultiprocessing execution strategies, corpus-level statistics, and database persistence. Furthermore, the DeepNLPF user-friendly GUI allows its use even by a non-expert NLP user. We conducted runtime performance analysis showing that DeepNLPF not only easily integrates existent NLP toolkits but also reduces significant runtime processing compared to executing the same NLP pipeline in a sequential manner. 2020.lrec-1.895 diff --git a/data/xml/2020.lt4gov.xml b/data/xml/2020.lt4gov.xml index 9dc3e88559..afab9bf3c1 100644 --- a/data/xml/2020.lt4gov.xml +++ b/data/xml/2020.lt4gov.xml @@ -45,7 +45,7 @@ Enhancing Job Searches in <fixed-case>M</fixed-case>exico City with Language Technologies GerardoSierra Martínez - GemmaBel-Enguix + GemmaBel-Enguix HelenaGómez-Adorno Juan ManuelTorres Moreno TonatiuhHernández-García @@ -65,7 +65,7 @@ IoannaGrypari DimitrisPappas NataliaManola - HarisPapageorgiou + HarisPapageorgiou 22–27 Cat. 2 Show-case: We present the Data4Impact (D4I) platform, a novel end-to-end system for evidence-based, timely and accurate monitoring and evaluation of research and innovation (R&I) activities. Using the latest technological advances in Human Language Technology (HLT) and our data-driven methodology, we build a novel set of indicators in order to track funded projects and their impact on science, the economy and the society as a whole, during and after the project life-cycle. We develop our methodology by targeting Health-related EC projects from 2007 to 2019 to produce solutions that meet the needs of stakeholders (mainly policy-makers and research funders). Various D4I text analytics workflows process datasets and their metadata, extract valuable insights and estimate intermediate results and metrics, culminating in a set of robust indicators that the users can interact with through our dashboard, the D4I Monitor (available at monitor.data4impact.eu). Therefore, our approach, which can be generalized to different contexts, is multidimensional (technology, tools, indicators, dashboard) and the resulting system can provide an innovative solution for public administrators in their policy-making needs related to RDI funding allocation. 2020.lt4gov-1.4 diff --git a/data/xml/2020.lt4hala.xml b/data/xml/2020.lt4hala.xml index 354421b778..a97d5cdc74 100644 --- a/data/xml/2020.lt4hala.xml +++ b/data/xml/2020.lt4hala.xml @@ -3,7 +3,7 @@ Proceedings of LT4HALA 2020 - 1st Workshop on Language Technologies for Historical and Ancient Languages - RacheleSprugnoli + RacheleSprugnoli MarcoPassarotti European Language Resources Association (ELRA)
Marseille, France
@@ -59,7 +59,7 @@ Computerized Forward Reconstruction for Analysis in Diachronic Phonology, and <fixed-case>L</fixed-case>atin to <fixed-case>F</fixed-case>rench Reflex Prediction ClaytonMarr - David R.Mortensen + David R.Mortensen 28–36 Traditionally, historical phonologists have relied on tedious manual derivations to calibrate the sequences of sound changes that shaped the phonological evolution of languages. However, humans are prone to errors, and cannot track thousands of parallel word derivations in any efficient manner. We propose to instead automatically derive each lexical item in parallel, and we demonstrate forward reconstruction as both a computational task with metrics to optimize, and as an empirical tool for inquiry. For this end we present DiaSim, a user-facing application that simulates “cascades” of diachronic developments over a language’s lexicon and provides diagnostics for “debugging” those cascades. We test our methodology on a Latin-to-French reflex prediction task, using a newly compiled dataset FLLex with 1368 paired Latin/French forms. We also present, FLLAPS, which maps 310 Latin reflexes through five stages until Modern French, derived from Pope (1934)’s sound tables. Our publicly available rule cascades include the baselines BaseCLEF and BaseCLEF*, representing the received view of Latin to French development, and DiaCLEF, build by incremental corrections to BaseCLEF aided by DiaSim’s diagnostics. DiaCLEF vastly outperforms the baselines, improving final accuracy on FLLex from 3.2%to 84.9%, and similar improvements across FLLAPS’ stages. 2020.lt4hala-1.5 @@ -126,8 +126,8 @@ Word Probability Findings in the <fixed-case>V</fixed-case>oynich Manuscript ColinLayfield - Lonnekevan der Plas - MichaelRosner + Lonnekevan der Plas + MichaelRosner JohnAbela 74–78 The Voynich Manuscript has baffled scholars for centuries. Some believe the elaborate 15th century codex to be a hoax whilst others believe it is a real medieval manuscript whose contents are as yet unknown. In this paper, we provide additional evidence that the text of the manuscript displays the hallmarks of a proper natural language with respect to the relationship between word probabilities and (i) average information per subword segment and (ii) the relative positioning of consecutive subword segments necessary to uniquely identify words of different probabilities. @@ -138,7 +138,7 @@ Comparing Statistical and Neural Models for Learning Sound Correspondences ClémentineFourrier - BenoîtSagot + BenoîtSagot 79–83 Cognate prediction and proto-form reconstruction are key tasks in computational historical linguistics that rely on the study of sound change regularity. Solving these tasks appears to be very similar to machine translation, though methods from that field have barely been applied to historical linguistics. Therefore, in this paper, we investigate the learnability of sound correspondences between a proto-language and daughter languages for two machine-translation-inspired models, one statistical, the other neural. We first carry out our experiments on plausible artificial languages, without noise, in order to study the role of each parameter on the algorithms respective performance under almost perfect conditions. We then study real languages, namely Latin, Italian and Spanish, to see if those performances generalise well. We show that both model types manage to learn sound changes despite data scarcity, although the best performing model type depends on several parameters such as the size of the training data, the ambiguity, and the prediction direction. 2020.lt4hala-1.12 @@ -160,7 +160,7 @@ <fixed-case>L</fixed-case>atin-<fixed-case>S</fixed-case>panish Neural Machine Translation: from the <fixed-case>B</fixed-case>ible to Saint Augustine - EvaMartínez Garcia + EvaMartínez Garcia ÁlvaroGarcía Tejedor 94–99 Although there are several sources where to find historical texts, they usually are available in the original language that makes them generally inaccessible. This paper presents the development of state-of-the-art Neural Machine Systems for the low-resourced Latin-Spanish language pair. First, we build a Transformer-based Machine Translation system on the Bible parallel corpus. Then, we build a comparable corpus from Saint Augustine texts and their translations. We use this corpus to study the domain adaptation case from the Bible texts to Saint Augustine’s works. Results show the difficulties of handling a low-resourced language as Latin. First, we noticed the importance of having enough data, since the systems do not achieve high BLEU scores. Regarding domain adaptation, results show how using in-domain data helps systems to achieve a better quality translation. Also, we observed that it is needed a higher amount of data to perform an effective vocabulary extension that includes in-domain vocabulary. @@ -187,7 +187,7 @@ Overview of the <fixed-case>E</fixed-case>va<fixed-case>L</fixed-case>atin 2020 Evaluation Campaign RacheleSprugnoli MarcoPassarotti - Flavio MassimilianoCecchini + Flavio MassimilianoCecchini MatteoPellegrini 105–110 This paper describes the first edition of EvaLatin, a campaign totally devoted to the evaluation of NLP tools for Latin. The two shared tasks proposed in EvaLatin 2020, i. e. Lemmatization and Part-of-Speech tagging, are aimed at fostering research in the field of language technologies for Classical languages. The shared dataset consists of texts taken from the Perseus Digital Library, processed with UDPipe models and then manually corrected by Latin experts. The training set includes only prose texts by Classical authors. The test set, alongside with prose texts by the same authors represented in the training set, also includes data relative to poetry and to the Medieval period. This also allows us to propose the Cross-genre and Cross-time subtasks for each task, in order to evaluate the portability of NLP tools for Latin across different genres and time periods. The results obtained by the participants for each task and subtask are presented and discussed. diff --git a/data/xml/2020.mmw.xml b/data/xml/2020.mmw.xml index 8b062093bc..c1bec5f118 100644 --- a/data/xml/2020.mmw.xml +++ b/data/xml/2020.mmw.xml @@ -5,7 +5,7 @@ Proceedings of the LREC 2020 Workshop on Multimodal Wordnets (MMW2020) ThierryDeclerk ItziarGonzalez-Dios - GermanRigau + GermanRigau The European Language Resources Association (ELRA)
Marseille, France
May @@ -43,7 +43,7 @@
<fixed-case>E</fixed-case>nglish <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et 2020: Improving and Extending a <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et for <fixed-case>E</fixed-case>nglish using an Open-Source Methodology - John PhilipMcCrae + John PhilipMcCrae AlexandreRademaker EwaRudnicka FrancisBond diff --git a/data/xml/2020.msr.xml b/data/xml/2020.msr.xml index e53d1889c5..e3891f9235 100644 --- a/data/xml/2020.msr.xml +++ b/data/xml/2020.msr.xml @@ -3,7 +3,7 @@ Proceedings of the Third Workshop on Multilingual Surface Realisation - AnyaBelz + AnyaBelz BerndBohnet Thiago CastroFerreira YvetteGraham @@ -24,7 +24,7 @@ SimonMille AnyaBelz BerndBohnet - ThiagoCastro Ferreira + ThiagoCastro Ferreira YvetteGraham LeoWanner 1–20 @@ -73,7 +73,7 @@ <fixed-case>NILC</fixed-case> at <fixed-case>SR</fixed-case>’20: Exploring Pre-Trained Models in Surface Realisation - Marco AntonioSobrevilla Cabezudo + Marco AntonioSobrevilla Cabezudo ThiagoPardo 50–56 This paper describes the submission by the NILC Computational Linguistics research group of the University of S ̃ao Paulo/Brazil to the English Track 2 (closed sub-track) at the Surface Realisation Shared Task 2020. The success of the current pre-trained models like BERT or GPT-2 in several tasks is well-known, however, this is not the case for data-to-text generation tasks and just recently some initiatives focused on it. This way, we explore how a pre-trained model (GPT-2) performs on the UD-to-text generation task. In general, the achieved results were poor, but there are some interesting ideas to explore. Among the learned lessons we may note that it is necessary to study strategies to represent UD inputs and to introduce structural knowledge into these pre-trained models. diff --git a/data/xml/2020.multilingualbio.xml b/data/xml/2020.multilingualbio.xml index 18fbcb302d..bb91183398 100644 --- a/data/xml/2020.multilingualbio.xml +++ b/data/xml/2020.multilingualbio.xml @@ -29,7 +29,7 @@ Building a <fixed-case>N</fixed-case>orwegian Lexical Resource for Medical Entity Recognition IldikoPilan Pål H.Brekke - LiljaØvrelid + LiljaØvrelid 9–14 We present a large Norwegian lexical resource of categorized medical terms. The resource, which merges information from large medical databases, contains over 56,000 entries, including automatically mapped terms from a Norwegian medical dictionary. We describe the methodology behind this automatic dictionary entry mapping based on keywords and suffixes and further present the results of a manual evaluation performed on a subset by a domain expert. The evaluation indicated that ca. 80% of the mappings were correct. 2020.multilingualbio-1.2 @@ -38,7 +38,7 @@ Localising the Clinical Terminology <fixed-case>SNOMED</fixed-case> <fixed-case>CT</fixed-case> by Semi-automated Creation of a <fixed-case>G</fixed-case>erman Interface Vocabulary - StefanSchulz + StefanSchulz LarissaHammer DavidHashemian-Nik MarkusKreuzthaler @@ -64,8 +64,8 @@ Transfer learning applied to text classification in <fixed-case>S</fixed-case>panish radiological reports PilarLópez Úbeda Manuel CarlosDíaz-Galiano - L. AlfonsoUrena Lopez - MaiteMartin + L. AlfonsoUrena Lopez + MaiteMartin TeodoroMartín-Noguerol AntonioLuna 29–32 diff --git a/data/xml/2020.mwe.xml b/data/xml/2020.mwe.xml index 0fbf83ceb2..366d17c43a 100644 --- a/data/xml/2020.mwe.xml +++ b/data/xml/2020.mwe.xml @@ -3,8 +3,8 @@ Proceedings of the Joint Workshop on Multiword Expressions and Electronic Lexicons - StellaMarkantonatou - JohnMcCrae + StellaMarkantonatou + JohnMcCrae JelenaMitrović CaroleTiberius CarlosRamisch @@ -24,7 +24,7 @@ <fixed-case>C</fixed-case>oll<fixed-case>F</fixed-case>r<fixed-case>E</fixed-case>n: Rich Bilingual <fixed-case>E</fixed-case>nglish–<fixed-case>F</fixed-case>rench Collocation Resource BeatrizFisas - LuisEspinosa Anke + LuisEspinosa Anke JoanCodina-Filbá LeoWanner 1–12 @@ -62,7 +62,7 @@ <fixed-case>P</fixed-case>olish corpus of verbal multiword expressions AgataSavary - JakubWaszczuk + JakubWaszczuk 32–43 This paper describes a manually annotated corpus of verbal multi-word expressions in Polish. It is among the 4 biggest datasets in release 1.2 of the PARSEME multiligual corpus. We describe the data sources, as well as the annotation process and its outcomes. We also present interesting phenomena encountered during the annotation task and put forward enhancements for the PARSEME annotation guidelines. 2020.mwe-1.5 @@ -110,7 +110,7 @@ Multi-word Expressions for Abusive Speech Detection in <fixed-case>S</fixed-case>erbian - RankaStanković + RankaStanković JelenaMitrović DankaJokić CvetanaKrstev @@ -150,13 +150,13 @@ CarlosRamisch AgataSavary BrunoGuillaume - JakubWaszczuk - MarieCandito + JakubWaszczuk + MarieCandito AshwiniVaidya - VerginicaBarbu Mititelu + VerginicaBarbu Mititelu ArchnaBhatia UxoaIñurrieta - VoulaGiouli + VoulaGiouli TungaGüngör MenghanJiang TimmLichte diff --git a/data/xml/2020.ngt.xml b/data/xml/2020.ngt.xml index 0ab43387b0..46a8c46df3 100644 --- a/data/xml/2020.ngt.xml +++ b/data/xml/2020.ngt.xml @@ -56,7 +56,7 @@ Balancing Cost and Benefit with Tied-Multi Transformers RajDabre - RaphaelRubino + RaphaelRubino AtsushiFujita 24–34 We propose a novel procedure for training multiple Transformers with tied parameters which compresses multiple models into one enabling the dynamic choice of the number of encoder and decoder layers during decoding. In training an encoder-decoder model, typically, the output of the last layer of the N-layer encoder is fed to the M-layer decoder, and the output of the last decoder layer is used to compute loss. Instead, our method computes a single loss consisting of NxM losses, where each loss is computed from the output of one of the M decoder layers connected to one of the N encoder layers. Such a model subsumes NxM models with different number of encoder and decoder layers, and can be used for decoding with fewer than the maximum number of encoder and decoder layers. Given our flexible tied model, we also address to a-priori selection of the number of encoder and decoder layers for faster decoding, and explore recurrent stacking of layers and knowledge distillation for model compression. We present a cost-benefit analysis of applying the proposed approaches for neural machine translation and show that they reduce decoding costs while preserving translation quality. @@ -79,8 +79,8 @@ Meta-Learning for Few-Shot <fixed-case>NMT</fixed-case> Adaptation AmrSharaf - HanyHassan - HalDaumé III + HanyHassan + HalDaumé III 43–53 We present META-MT, a meta-learning approach to adapt Neural Machine Translation (NMT) systems in a few-shot setting. META-MT provides a new approach to make NMT models easily adaptable to many target do- mains with the minimal amount of in-domain data. We frame the adaptation of NMT systems as a meta-learning problem, where we learn to adapt to new unseen domains based on simulated offline meta-training domain adaptation tasks. We evaluate the proposed meta-learning strategy on ten domains with general large scale NMT systems. We show that META-MT significantly outperforms classical domain adaptation when very few in- domain examples are available. Our experiments shows that META-MT can outperform classical fine-tuning by up to 2.5 BLEU points after seeing only 4, 000 translated words (300 parallel sentences). 2020.ngt-1.5 @@ -129,7 +129,7 @@ SaschaRothe SimonBaumgartner CongYu - AbeIttycheriah + AbeIttycheriah 79–87 We evaluate the performance of transformer encoders with various decoders for information organization through a new task: generation of section headings for Wikipedia articles. Our analysis shows that decoders containing attention mechanisms over the encoder output achieve high-scoring results by generating extractive text. In contrast, a decoder without attention better facilitates semantic encoding and can be used to generate section embeddings. We additionally introduce a new loss function, which further encourages the decoder to generate high-quality embeddings. 2020.ngt-1.9 @@ -233,7 +233,7 @@ Expand and Filter: <fixed-case>CUNI</fixed-case> and <fixed-case>LMU</fixed-case> Systems for the <fixed-case>WNGT</fixed-case> 2020 <fixed-case>D</fixed-case>uolingo Shared Task JindřichLibovický ZdeněkKasner - JindřichHelcl + JindřichHelcl OndřejDušek 153–160 We present our submission to the Simultaneous Translation And Paraphrase for Language Education (STAPLE) challenge. We used a standard Transformer model for translation, with a crosslingual classifier predicting correct translations on the output n-best list. To increase the diversity of the outputs, we used additional data to train the translation model, and we trained a paraphrasing model based on the Levenshtein Transformer architecture to generate further synonymous translations. The paraphrasing results were again filtered using our classifier. While the use of additional data and our classifier filter were able to improve results, the paraphrasing model produced too many invalid outputs to further improve the output quality. Our model without the paraphrasing component finished in the middle of the field for the shared task, improving over the best baseline by a margin of 10-22 % weighted F1 absolute. @@ -281,7 +281,7 @@ The <fixed-case>JHU</fixed-case> Submission to the 2020 <fixed-case>D</fixed-case>uolingo Shared Task on Simultaneous Translation and Paraphrase for Language Education HudaKhayrallah JacobBremerman - Arya D.McCarthy + Arya D.McCarthy KentonMurray WinstonWu MattPost @@ -324,7 +324,7 @@ GuillaumeKlein DakunZhang ClémentChouteau - JosepCrego + JosepCrego JeanSenellart 211–217 This paper describes the OpenNMT submissions to the WNGT 2020 efficiency shared task. We explore training and acceleration of Transformer models with various sizes that are trained in a teacher-student setup. We also present a custom and optimized C++ inference engine that enables fast CPU and GPU decoding with few dependencies. By combining additional optimizations and parallelization techniques, we create small, efficient, and high-quality neural machine translation models. diff --git a/data/xml/2020.nl4xai.xml b/data/xml/2020.nl4xai.xml index 451293d300..8a481b7984 100644 --- a/data/xml/2020.nl4xai.xml +++ b/data/xml/2020.nl4xai.xml @@ -3,7 +3,7 @@ 2nd Workshop on Interactive Natural Language Technology for Explainable Artificial Intelligence - Jose M.Alonso + Jose M.Alonso AlejandroCatala Association for Computational Linguistics
Dublin, Ireland
@@ -37,7 +37,7 @@ LucaAnselma MirkoDi Lascio DarioMana - AlessandroMazzei + AlessandroMazzei ManuelaSanguinetti 5–10 This paper describes a content selection module for the generation of explanations in a dialogue system designed for customer care domain. First we describe the construction of a corpus of a dialogues containing explanation requests from customers to a virtual agent of a telco, and second we study and formalize the importance of a specific information content for the generated message. In particular, we adapt the notions of importance and relevance in the case of schematic knowledge bases. @@ -81,7 +81,7 @@ Explaining <fixed-case>B</fixed-case>ayesian Networks in Natural Language: State of the Art and Challenges ConorHennessy - AlbertoBugarín + AlbertoBugarín EhudReiter 28–33 In order to increase trust in the usage of Bayesian Networks and to cement their role as a model which can aid in critical decision making, the challenge of explainability must be faced. Previous attempts at explaining Bayesian Networks have largely focused on graphical or visual aids. In this paper we aim to highlight the importance of a natural language approach to explanation and to discuss some of the previous and state of the art attempts of the textual explanation of Bayesian Networks. We outline several challenges that remain to be addressed in the generation and validation of natural language explanations of Bayesian Networks. This can serve as a reference for future work on natural language explanations of Bayesian Networks. @@ -99,7 +99,7 @@ Towards Generating Effective Explanations of Logical Formulas: Challenges and Strategies AlexandraMayn - Keesvan Deemter + Keesvan Deemter 39–43 While the problem of natural language generation from logical formulas has a long tradition, thus far little attention has been paid to ensuring that the generated explanations are optimally effective for the user. We discuss issues related to deciding what such output should look like and strategies for addressing those issues. We stress the importance of informing generation of NL explanations of logical formulas through reader studies and findings on the comprehension of logic from Pragmatics and Cognitive Science. We then illustrate the discussed issues and potential ways of addressing them using a simple demo system’s output generated from a propositional logic formula. 2020.nl4xai-1.9 @@ -119,7 +119,7 @@ Toward Natural Language Mitigation Strategies for Cognitive Biases in Recommender Systems AlisaRieger - MariëtTheune + MariëtTheune NavaTintarev 50–54 Cognitive biases in the context of consuming online information filtered by recommender systems may lead to sub-optimal choices. One approach to mitigate such biases is through interface and interaction design. This survey reviews studies focused on cognitive bias mitigation of recommender system users during two processes: 1) item selection and 2) preference elicitation. It highlights a number of promising directions for Natural Language Generation research for mitigating cognitive bias including: the need for personalization, as well as for transparency and control. diff --git a/data/xml/2020.nli.xml b/data/xml/2020.nli.xml index 49ee45cb84..9177048203 100644 --- a/data/xml/2020.nli.xml +++ b/data/xml/2020.nli.xml @@ -3,10 +3,10 @@ Proceedings of the First Workshop on Natural Language Interfaces - Ahmed HassanAwadallah + Ahmed HassanAwadallah YuSu HuanSun - Scott Wen-tauYih + Scott Wen-tauYih Association for Computational Linguistics
Online
July @@ -61,7 +61,7 @@ Efficient Deployment of Conversational Natural Language Interfaces over Databases AnthonyColas - TrungBui + TrungBui FranckDernoncourt MoumitaSinha Doo SoonKim diff --git a/data/xml/2020.nlp4call.xml b/data/xml/2020.nlp4call.xml index 5e5f23b870..f847c048bd 100644 --- a/data/xml/2020.nlp4call.xml +++ b/data/xml/2020.nlp4call.xml @@ -22,7 +22,7 @@ Substituto – A Synchronous Educational Language Game for Simultaneous Teaching and Crowdsourcing Marianne GraceAraneta - GülşenEryiğit + GülşenEryiğit AlexanderKönig Ji-UngLee AnaLuís @@ -50,7 +50,7 @@ Polygloss - A conversational agent for language practice Etieneda Cruz Dalcol - MassimoPoesio + MassimoPoesio 21–36 2020.nlp4call-1.3 da-cruz-dalcol-poesio-2020-polygloss diff --git a/data/xml/2020.nlp4convai.xml b/data/xml/2020.nlp4convai.xml index 81712e0d78..c2bea5bea8 100644 --- a/data/xml/2020.nlp4convai.xml +++ b/data/xml/2020.nlp4convai.xml @@ -6,7 +6,7 @@ Tsung-HsienWen AsliCelikyilmaz ZhouYu - AlexandrosPapangelis + AlexandrosPapangelis MihailEric AnujKumar IñigoCasanueva @@ -39,8 +39,8 @@ On Incorporating Structural Information to improve Dialogue Response Generation NikitaMoghe PriyeshVijayan - BalaramanRavindran - Mitesh M.Khapra + BalaramanRavindran + Mitesh M.Khapra 11–24 We consider the task of generating dialogue responses from background knowledge comprising of domain specific resources. Specifically, given a conversation around a movie, the task is to generate the next response based on background knowledge about the movie such as the plot, review, Reddit comments etc. This requires capturing structural, sequential and semantic information from the conversation context and the background resources. We propose a new architecture that uses the ability of BERT to capture deep contextualized representations in conjunction with explicit structure and sequence information. More specifically, we use (i) Graph Convolutional Networks (GCNs) to capture structural information, (ii) LSTMs to capture sequential information and (iii) BERT for the deep contextualized representations that capture semantic information. We analyze the proposed architecture extensively. To this end, we propose a plug-and-play Semantics-Sequences-Structures (SSS) framework which allows us to effectively combine such linguistic information. Through a series of experiments we make some interesting observations. First, we observe that the popular adaptation of the GCN model for NLP tasks where structural information (GCNs) was added on top of sequential information (LSTMs) performs poorly on our task. This leads us to explore interesting ways of combining semantic and structural information to improve the performance. Second, we observe that while BERT already outperforms other deep contextualized representations such as ELMo, it still benefits from the additional structural information explicitly added using GCNs. This is a bit surprising given the recent claims that BERT already captures structural information. Lastly, the proposed SSS framework gives an improvement of 7.95% on BLUE score over the baseline. 2020.nlp4convai-1.2 @@ -52,7 +52,7 @@ <fixed-case>C</fixed-case>opy<fixed-case>BERT</fixed-case>: A Unified Approach to Question Generation with Self-Attention StalinVaranasi SaadullahAmin - GuenterNeumann + GuenterNeumann 25–31 Contextualized word embeddings provide better initialization for neural networks that deal with various natural language understanding (NLU) tasks including Question Answering (QA) and more recently, Question Generation(QG). Apart from providing meaningful word representations, pre-trained transformer models (Vaswani et al., 2017), such as BERT (Devlin et al., 2019) also provide self-attentions which encode syntactic information that can be probed for dependency parsing (Hewitt and Manning, 2019) and POStagging (Coenen et al., 2019). In this paper, we show that the information from selfattentions of BERT are useful for language modeling of questions conditioned on paragraph and answer phrases. To control the attention span, we use semi-diagonal mask and utilize a shared model for encoding and decoding, unlike sequence-to-sequence. We further employ copy-mechanism over self-attentions to acheive state-of-the-art results for Question Generation on SQuAD v1.1 (Rajpurkar et al., 2016). 2020.nlp4convai-1.3 @@ -146,7 +146,7 @@ SanchitAgarwal DiJin TagyoungChung - DilekHakkani-Tur + DilekHakkani-Tur 79–89 Dialogue state tracking (DST) is at the heart of task-oriented dialogue systems. However, the scarcity of labeled data is an obstacle to building accurate and robust state tracking systems that work across a variety of domains. Existing approaches generally require some dialogue data with state information and their ability to generalize to unknown domains is limited. In this paper, we propose using machine reading comprehension (RC) in state tracking from two perspectives: model architectures and datasets. We divide the slot types in dialogue state into categorical or extractive to borrow the advantages from both multiple-choice and span-based reading comprehension models. Our method achieves near the current state-of-the-art in joint goal accuracy on MultiWOZ 2.1 given full training data. More importantly, by leveraging machine reading comprehension datasets, our method outperforms the existing approaches by many a large margin in few-shot scenarios when the availability of in-domain data is limited. Lastly, even without any state tracking data, i.e., zero-shot scenario, our proposed approach achieves greater than 90% average slot accuracy in 12 out of 30 slots in MultiWOZ 2.1. 2020.nlp4convai-1.10 @@ -170,7 +170,7 @@ Learning to Classify Intents and Slot Labels Given a Handful of Examples JasonKrone YiZhang - MonaDiab + MonaDiab 96–108 Intent classification (IC) and slot filling (SF) are core components in most goal-oriented dialogue systems. Current IC/SF models perform poorly when the number of training examples per class is small. We propose a new few-shot learning task, few-shot IC/SF, to study and improve the performance of IC and SF models on classes not seen at training time in ultra low resource scenarios. We establish a few-shot IC/SF benchmark by defining few-shot splits for three public IC/SF datasets, ATIS, TOP, and Snips. We show that two popular few-shot learning algorithms, model agnostic meta learning (MAML) and prototypical networks, outperform a fine-tuning baseline on this benchmark. Prototypical networks achieves significant gains in IC performance on the ATIS and TOP datasets, while both prototypical networks and MAML outperform the baseline with respect to SF on all three datasets. In addition, we demonstrate that joint training as well as the use of pre-trained language models, ELMo and BERT in our case, are complementary to these few-shot learning methods and yield further gains. 2020.nlp4convai-1.12 @@ -213,7 +213,7 @@ TovlyDeutsch StephenCasper YonatanBelinkov - StuartShieber + StuartShieber 132–143 The predominant approach to open-domain dialog generation relies on end-to-end training of neural models on chat datasets. However, this approach provides little insight as to what these models learn (or do not learn) about engaging in dialog. In this study, we analyze the internal representations learned by neural open-domain dialog systems and evaluate the quality of these representations for learning basic conversational skills. Our results suggest that standard open-domain dialog systems struggle with answering questions, inferring contradiction, and determining the topic of conversation, among other tasks. We also find that the dyadic, turn-taking nature of dialog is not fully leveraged by these models. By exploring these limitations, we highlight the need for additional research into architectures and training methods that can better capture high-level information about dialog. 2020.nlp4convai-1.15 diff --git a/data/xml/2020.nlp4if.xml b/data/xml/2020.nlp4if.xml index 853e730df9..1ba4f6f9e1 100644 --- a/data/xml/2020.nlp4if.xml +++ b/data/xml/2020.nlp4if.xml @@ -8,7 +8,7 @@ Giovanni LucaCiampaglia AnnaFeldman ChrisLeberknight - PreslavNakov + PreslavNakov International Committee on Computational Linguistics (ICCL)
Barcelona, Spain (Online)
December diff --git a/data/xml/2020.nlp4musa.xml b/data/xml/2020.nlp4musa.xml index 32c46b5949..c75b18ca3e 100644 --- a/data/xml/2020.nlp4musa.xml +++ b/data/xml/2020.nlp4musa.xml @@ -4,7 +4,7 @@ Proceedings of the 1st Workshop on NLP for Music and Audio (NLP4MusA) SergioOramas - LuisEspinosa-Anke + LuisEspinosa-Anke ElenaEpure RosieJones MohamedSordo diff --git a/data/xml/2020.nlpbt.xml b/data/xml/2020.nlpbt.xml index 41b3a47f2f..dc0633ab7c 100644 --- a/data/xml/2020.nlpbt.xml +++ b/data/xml/2020.nlpbt.xml @@ -57,7 +57,7 @@
A Benchmark for Structured Procedural Knowledge Extraction from Cooking Videos - Frank F.Xu + Frank F.Xu LeiJi BotianShi JunyiDu @@ -107,7 +107,7 @@ Towards End-to-End In-Image Neural Machine Translation ElmanMansimov MitchellStern - MiaChen + MiaChen OrhanFirat JakobUszkoreit PuneetJain diff --git a/data/xml/2020.nlpcovid19.xml b/data/xml/2020.nlpcovid19.xml index cc9bf4a325..747481bc3b 100644 --- a/data/xml/2020.nlpcovid19.xml +++ b/data/xml/2020.nlpcovid19.xml @@ -3,14 +3,14 @@ Proceedings of the 1st Workshop on NLP for COVID-19 at ACL 2020 - KarinVerspoor - Kevin BretonnelCohen + KarinVerspoor + Kevin BretonnelCohen MarkDredze EmilioFerrara JonathanMay RobertMunro - CecileParis - ByronWallace + CecileParis + ByronWallace Association for Computational Linguistics
Online
July @@ -23,7 +23,7 @@ <fixed-case>CORD-19</fixed-case>: The <fixed-case>COVID-19</fixed-case> Open Research Dataset - Lucy LuWang + Lucy LuWang KyleLo YoganandChandrasekhar RussellReas @@ -48,7 +48,7 @@ ChristopherWilhelm BoyaXie Douglas M.Raymond - Daniel S.Weld + Daniel S.Weld OrenEtzioni SebastianKohlmeier The COVID-19 Open Research Dataset (CORD-19) is a growing resource of scientific papers on COVID-19 and related historical coronavirus research. CORD-19 is designed to facilitate the development of text mining and information retrieval systems over its rich collection of metadata and structured full text papers. Since its release, CORD-19 has been downloaded over 200K times and has served as the basis of many COVID-19 text mining and discovery systems. In this article, we describe the mechanics of dataset construction, highlighting challenges and key design decisions, provide an overview of how CORD-19 has been used, and describe several shared tasks built around the dataset. We hope this resource will continue to bring together the computing community, biomedical experts, and policy makers in the search for effective treatments and management policies for COVID-19. @@ -98,7 +98,7 @@ <fixed-case>CODA-19</fixed-case>: Using a Non-Expert Crowd to Annotate Research Aspects on 10,000+ Abstracts in the <fixed-case>COVID-19</fixed-case> Open Research Dataset - Ting-Hao KennethHuang + Ting-Hao KennethHuang Chieh-YangHuang Chien-Kuang CorneliaDing Yen-ChiaHsu @@ -231,13 +231,13 @@ Proceedings of the 1st Workshop on NLP for COVID-19 (Part 2) at EMNLP 2020 - KarinVerspoor - Kevin BretonnelCohen + KarinVerspoor + Kevin BretonnelCohen MichaelConway Berryde Bruijn MarkDredze - RadaMihalcea - ByronWallace + RadaMihalcea + ByronWallace Association for Computational Linguistics
Online
December @@ -390,7 +390,7 @@ BrandonWaldon Shrinidhi KLakshmikanth IshanShah - Sharath ChandraGuntuku + Sharath ChandraGuntuku GarrickSherman JamesZou JohannesEichstaedt @@ -415,10 +415,10 @@
Improved Topic Representations of Medical Documents to Assist <fixed-case>COVID</fixed-case>-19 Literature Exploration - YuliaOtmakhova + YuliaOtmakhova KarinVerspoor - TimothyBaldwin - SimonŠuster + TimothyBaldwin + SimonŠuster Efficient discovery and exploration of biomedical literature has grown in importance in the context of the COVID-19 pandemic, and topic-based methods such as latent Dirichlet allocation (LDA) are a useful tool for this purpose. In this study we compare traditional topic models based on word tokens with topic models based on medical concepts, and propose several ways to improve topic coherence and specificity. 2020.nlpcovid19-2.12 10.18653/v1/2020.nlpcovid19-2.12 @@ -427,7 +427,7 @@ A System for Worldwide <fixed-case>COVID</fixed-case>-19 Information Aggregation - AkikoAizawa + AkikoAizawa FredericBergeron JunjieChen FeiCheng @@ -447,7 +447,7 @@ YugoMurawaki KazumasaOmura HaiyueSong - EiichiroSumita + EiichiroSumita ShinjiSuzuki RibekaTanaka YuTanaka @@ -481,8 +481,8 @@ ArantxaOtegi Jon AnderCampos GorkaAzkune - AitorSoroa - EnekoAgirre + AitorSoroa + EnekoAgirre We present a Question Answering (QA) system that won one of the tasks of the Kaggle CORD-19 Challenge, according to the qualitative evaluation of experts. The system is a combination of an Information Retrieval module and a reading comprehension module that finds the answers in the retrieved passages. In this paper we present a quantitative and qualitative analysis of the system. The quantitative evaluation using manually annotated datasets contradicted some of our design choices, e.g. the fact that using QuAC for fine-tuning provided better answers over just using SQuAD. We analyzed this mismatch with an additional A/B test which showed that the system using QuAC was indeed preferred by users, confirming our intuition. Our analysis puts in question the suitability of automatic metrics and its correlation to user preferences. We also show that automatic metrics are highly dependent on the characteristics of the gold standard, such as the average length of the answers. 2020.nlpcovid19-2.15 10.18653/v1/2020.nlpcovid19-2.15 @@ -506,7 +506,7 @@ Public Sentiment on Governmental <fixed-case>COVID</fixed-case>-19 Measures in <fixed-case>D</fixed-case>utch Social Media ShihanWang MarijnSchraagen - ErikTjong Kim Sang + ErikTjong Kim Sang MehdiDastani Public sentiment (the opinion, attitude or feeling that the public expresses) is a factor of interest for government, as it directly influences the implementation of policies. Given the unprecedented nature of the COVID-19 crisis, having an up-to-date representation of public sentiment on governmental measures and announcements is crucial. In this paper, we analyse Dutch public sentiment on governmental COVID-19 measures from text data collected across three online media sources (Twitter, Reddit and Nu.nl) from February to September 2020. We apply sentiment analysis methods to analyse polarity over time, as well as to identify stance towards two specific pandemic policies regarding social distancing and wearing face masks. The presented preliminary results provide valuable insights into the narratives shown in vast social media text data, which help understand the influence of COVID-19 measures on the general public. 2020.nlpcovid19-2.17 @@ -540,10 +540,10 @@ <fixed-case>COVID</fixed-case>-19: A Semantic-Based Pipeline for Recommending Biomedical Entities Marcia AfonsoBarros - AndreLamurias + AndreLamurias DianaSousa PedroRuas - Francisco M.Couto + Francisco M.Couto With the increasing number of publications about COVID-19, it is a challenge to extract personalized knowledge suitable for each researcher. This work aims to build a new semantic-based pipeline for recommending biomedical entities to scientific researchers. To this end, we developed a pipeline that creates an implicit feedback matrix based on Named Entity Recognition (NER) on a corpus of documents, using multidisciplinary ontologies for recognizing and linking the entities. Our hypothesis is that by using ontologies from different fields in the NER phase, we can improve the results for state-of-the-art collaborative-filtering recommender systems applied to the dataset created. The tests performed using the COVID-19 Open Research Dataset (CORD-19) dataset show that when using four ontologies, the results for precision@k, for example, reach the 80%, whereas when using only one ontology, the results for precision@k drops to 20%, for the same users. Furthermore, the use of multi-fields entities may help in the discovery of new items, even if the researchers do not have items from that field in their set of preferences. 2020.nlpcovid19-2.20 10.18653/v1/2020.nlpcovid19-2.20 @@ -555,7 +555,7 @@ HilalDönmez RızaÖzçelik ElifOzkirimli - ArzucanÖzgür + ArzucanÖzgür Coronavirus Disease of 2019 (COVID-19) created dire consequences globally and triggered an intense scientific effort from different domains. The resulting publications created a huge text collection in which finding the studies related to a biomolecule of interest is challenging for general purpose search engines because the publications are rich in domain specific terminology. Here, we present Vapur: an online COVID-19 search engine specifically designed to find related protein - chemical pairs. Vapur is empowered with a relation-oriented inverted index that is able to retrieve and group studies for a query biomolecule with respect to its related entities. The inverted index of Vapur is automatically created with a BioNLP pipeline and integrated with an online user interface. The online interface is designed for the smooth traversal of the current literature by domain researchers and is publicly available at https://tabilab.cmpe.boun.edu.tr/vapur/. 2020.nlpcovid19-2.21 10.18653/v1/2020.nlpcovid19-2.21 @@ -566,9 +566,9 @@ AlejandroPiad-Morffis SuilanEstevez-Velarde Ernesto LuisEstevanell-Valladares - YoanGutiérrez - AndrésMontoyo - RafaelMuñoz + YoanGutiérrez + AndrésMontoyo + RafaelMuñoz YudiviánAlmeida-Cruz This paper presents the preliminary results of an ongoing project that analyzes the growing body of scientific research published around the COVID-19 pandemic. In this research, a general-purpose semantic model is used to double annotate a batch of 500 sentences that were manually selected by the researchers from the CORD-19 corpus. Afterwards, a baseline text-mining pipeline is designed and evaluated via a large batch of 100,959 sentences. We present a qualitative analysis of the most interesting facts automatically extracted and highlight possible future lines of development. The preliminary results show that general-purpose semantic models are a useful tool for discovering fine-grained knowledge in large corpora of scientific documents. 2020.nlpcovid19-2.22 @@ -634,10 +634,10 @@ <fixed-case>A</fixed-case>sk<fixed-case>M</fixed-case>e: A <fixed-case>LAPPS</fixed-case> <fixed-case>G</fixed-case>rid-based <fixed-case>NLP</fixed-case> Query and Retrieval System for Covid-19 Literature KeithSuderman - NancyIde + NancyIde VerhagenMarc BrentCochran - JamesPustejovsky + JamesPustejovsky In a recent project, the Language Application Grid was augmented to support the mining of scientific publications. The results of that ef- fort have now been repurposed to focus on Covid-19 literature, including modification of the LAPPS Grid “AskMe” query and retrieval engine. We describe the AskMe system and discuss its functionality as compared to other query engines available to search covid-related publications. 2020.nlpcovid19-2.28 10.18653/v1/2020.nlpcovid19-2.28 @@ -708,7 +708,7 @@ <fixed-case>W</fixed-case>eibo-<fixed-case>COV</fixed-case>: A Large-Scale <fixed-case>COVID</fixed-case>-19 Social Media Dataset from <fixed-case>W</fixed-case>eibo YongHu - HeyanHuang + HeyanHuang AnfanChen Xian-LingMao With the rapid development of COVID-19 around the world, people are requested to maintain “social distance” and “stay at home”. In this scenario, extensive social interactions transfer to cyberspace, especially on social media platforms like Twitter and Sina Weibo. People generate posts to share information, express opinions and seek help during the pandemic outbreak, and these kinds of data on social media are valuable for studies to prevent COVID-19 transmissions, such as early warning and outbreaks detection. Therefore, in this paper, we release a novel and fine-grained large-scale COVID-19 social media dataset collected from Sina Weibo, named Weibo-COV, contains more than 40 million posts ranging from December 1, 2019 to April 30, 2020. Moreover, this dataset includes comprehensive information nuggets like post-level information, interactive information, location information, and repost network. We hope this dataset can promote studies of COVID-19 from multiple perspectives and enable better and rapid researches to suppress the spread of this pandemic. @@ -719,10 +719,10 @@ Detecting Emerging Symptoms of <fixed-case>COVID</fixed-case>-19 using Context-based <fixed-case>T</fixed-case>witter Embeddings RoshanSantosh - H. AndrewSchwartz + H. AndrewSchwartz JohannesEichstaedt - LyleUngar - Sharath ChandraGuntuku + LyleUngar + Sharath ChandraGuntuku In this paper, we present an iterative graph-based approach for the detection of symptoms of COVID-19, the pathology of which seems to be evolving. More generally, the method can be applied to finding context-specific words and texts (e.g. symptom mentions) in large imbalanced corpora (e.g. all tweets mentioning #COVID-19). Given the novelty of COVID-19, we also test if the proposed approach generalizes to the problem of detecting Adverse Drug Reaction (ADR). We find that the approach applied to Twitter data can detect symptom mentions substantially before to their being reported by the Centers for Disease Control (CDC). 2020.nlpcovid19-2.35 10.18653/v1/2020.nlpcovid19-2.35 diff --git a/data/xml/2020.nlpcss.xml b/data/xml/2020.nlpcss.xml index 009dd63e32..c1eff98353 100644 --- a/data/xml/2020.nlpcss.xml +++ b/data/xml/2020.nlpcss.xml @@ -50,7 +50,7 @@ NicoBlokker ErenayDayanik GabriellaLapesa - SebastianPadó + SebastianPadó 24–34 Manifestos are official documents of political parties, providing a comprehensive topical overview of the electoral programs. Voters, however, seldom read them and often prefer other channels, such as newspaper articles, to understand the party positions on various policy issues. The natural question to ask is how compatible these two formats (manifesto and newspaper reports) are in their representation of party positioning. We address this question with an approach that combines political science (manual annotation and analysis) and natural language processing (supervised claim identification) in a cross-text type setting: we train a classifier on annotated newspaper data and test its performance on manifestos. Our findings show a) strong performance for supervised classification even across text types and b) a substantive overlap between the two formats in terms of party positioning, with differences regarding the salience of specific issues. 2020.nlpcss-1.3 @@ -61,7 +61,7 @@ Does Social Support (Expressed in Post Titles) Elicit Comments in Online Substance Use Recovery Forums? AnietieAndy - Sharath ChandraGuntuku + Sharath ChandraGuntuku 35–40 Individuals recovering from substance use often seek social support (emotional and informational) on online recovery forums, where they can both write and comment on posts, expressing their struggles and successes. A common challenge in these forums is that certain posts (some of which may be support seeking) receive no comments. In this work, we use data from two Reddit substance recovery forums: /r/Leaves and /r/OpiatesRecovery, to determine the relationship between the social supports expressed in the titles of posts and the number of comments they receive. We show that the types of social support expressed in post titles that elicit comments vary from one substance use recovery forum to the other. 2020.nlpcss-1.4 @@ -83,7 +83,7 @@ Assessing population-level symptoms of anxiety, depression, and suicide risk in real time using <fixed-case>NLP</fixed-case> applied to social media data - AlexFine + AlexFine PatrickCrutchley JennyBlase JoshuaCarroll @@ -188,7 +188,7 @@ Recalibrating classifiers for interpretable abusive content detection BertieVidgen - Scott A.Hale + Scott A.Hale SamStaton TomMelham HelenMargetts @@ -204,7 +204,7 @@ Predicting independent living outcomes from written reports of social workers AngelikaMaier - PhilippCimiano + PhilippCimiano 139–148 In social care environments, the main goal of social workers is to foster independent living by their clients. An important task is thus to monitor progress towards reaching independence in different areas of their patients’ life. To support this task, we present an approach that extracts indications of independence on different life aspects from the day-to-day documentation that social workers create. We describe the process of collecting and annotating a corresponding corpus created from data records of two social work institutions with a focus on disability care. We show that the agreement on the task of annotating the observations of social workers with respect to discrete independent levels yields a high agreement of .74 as measured by Fleiss’ Kappa. We present a classification approach towards automatically classifying an observation into the discrete independence levels and present results for different types of classifiers. Against our original expectation, we show that we reach F-Measures (macro) of 95% averaged across topics, showing that this task can be automatically solved. 2020.nlpcss-1.15 @@ -215,7 +215,7 @@ Analyzing Political Bias and Unfairness in News Articles at Different Levels of Granularity Wei-FanChen - KhalidAl Khatib + KhalidAl Khatib HenningWachsmuth BennoStein 149–154 @@ -260,7 +260,7 @@ Social media data as a lens onto care-seeking behavior among women veterans of the <fixed-case>US</fixed-case> armed forces KacieKelly - AlexFine + AlexFine GlenCoppersmith 184–192 In this article, we examine social media data as a lens onto support-seeking among women veterans of the US armed forces. Social media data hold a great deal of promise as a source of information on needs and support-seeking among individuals who are excluded from or systematically prevented from accessing clinical or other institutions ostensibly designed to support them. We apply natural language processing (NLP) techniques to more than 3 million Tweets collected from 20,000 Twitter users. We find evidence that women veterans are more likely to use social media to seek social and community engagement and to discuss mental health and veterans’ issues significantly more frequently than their male counterparts. By contrast, male veterans tend to use social media to amplify political ideologies or to engage in partisan debate. Our results have implications for how organizations can provide outreach and services to this uniquely vulnerable population, and illustrate the utility of non-traditional observational data sources such as social media to understand the needs of marginalized groups. @@ -272,9 +272,9 @@ Understanding Weekly <fixed-case>COVID</fixed-case>-19 Concerns through Dynamic Content-Specific <fixed-case>LDA</fixed-case> Topic Modeling MohammadzamanZamani - H. AndrewSchwartz + H. AndrewSchwartz JohannesEichstaedt - Sharath ChandraGuntuku + Sharath ChandraGuntuku AdithyaVirinchipuram Ganesan SeanClouston SalvatoreGiorgi diff --git a/data/xml/2020.nlpmc.xml b/data/xml/2020.nlpmc.xml index 7c85057190..468d61a8fe 100644 --- a/data/xml/2020.nlpmc.xml +++ b/data/xml/2020.nlpmc.xml @@ -6,11 +6,11 @@ ParminderBhatia StevenLin RashmiGangadharaiah - ByronWallace + ByronWallace IzhakShafran ChaitanyaShivade NanDu - MonaDiab + MonaDiab Association for Computational Linguistics
Online
July @@ -27,7 +27,7 @@ XiyuDing MichaelBarnett AteevMehrotra - TimothyMiller + TimothyMiller 1–6 Electronic consult (eConsult) systems allow specialists more flexibility to respond to referrals more efficiently, thereby increasing access in under-resourced healthcare settings like safety net systems. Understanding the usage patterns of eConsult system is an important part of improving specialist efficiency. In this work, we develop and apply classifiers to a dataset of eConsult questions from primary care providers to specialists, classifying the messages for how they were triaged by the specialist office, and the underlying type of clinical question posed by the primary care provider. We show that pre-trained transformer models are strong baselines, with improving performance from domain-specific training and shared representations. 2020.nlpmc-1.1 @@ -104,7 +104,7 @@ ItikaGupta BarbaraDi Eugenio DevikaSalunke - AndrewBoyd + AndrewBoyd PaulaAllen-Meares CarolynDickens OlgaGarcia @@ -119,7 +119,7 @@ On the Utility of Audiovisual Dialog Technologies and Signal Analytics for Real-time Remote Monitoring of Depression Biomarkers MichaelNeumann OliverRoessler - DavidSuendermann-Oeft + DavidSuendermann-Oeft VikramRamanarayanan 47–52 We investigate the utility of audiovisual dialog systems combined with speech and video analytics for real-time remote monitoring of depression at scale in uncontrolled environment settings. We collected audiovisual conversational data from participants who interacted with a cloud-based multimodal dialog system, and automatically extracted a large set of speech and vision metrics based on the rich existing literature of laboratory studies. We report on the efficacy of various audio and video metrics in differentiating people with mild, moderate and severe depression, and discuss the implications of these results for the deployment of such technologies in real-world neurological diagnosis and monitoring applications. diff --git a/data/xml/2020.nlposs.xml b/data/xml/2020.nlposs.xml index ae70e41e8b..de90860b24 100644 --- a/data/xml/2020.nlposs.xml +++ b/data/xml/2020.nlposs.xml @@ -6,7 +6,7 @@ Eunjeong L.Park MasatoHagiwara DmitrijsMilajevs - Nelson F.Liu + Nelson F.Liu GeetickaChauhan LilingTan Association for Computational Linguistics @@ -83,7 +83,7 @@ Flexible retrieval with <fixed-case>NMSLIB</fixed-case> and <fixed-case>F</fixed-case>lex<fixed-case>N</fixed-case>eu<fixed-case>ART</fixed-case> LeonidBoytsov - EricNyberg + EricNyberg 32–43 Our objective is to introduce to the NLP community NMSLIB, describe a new retrieval toolkit FlexNeuART, as well as their integration capabilities. NMSLIB, while being one the fastest k-NN search libraries, is quite generic and supports a variety of distance/similarity functions. Because the library relies on the distance-based structure-agnostic algorithms, it can be further extended by adding new distances. FlexNeuART is a modular, extendible and flexible toolkit for candidate generation in IR and QA applications, which supports mixing of classic and neural ranking signals. FlexNeuART can efficiently retrieve mixed dense and sparse representations (with weights learned from training data), which is achieved by extending NMSLIB. In that, other retrieval systems work with purely sparse representations (e.g., Lucene), purely dense representations (e.g., FAISS and Annoy), or only perform mixing at the re-ranking stage. 2020.nlposs-1.6 @@ -234,7 +234,7 @@ PasqualeLisena IsmailHarrando OussamaKandakji - RaphaelTroncy + RaphaelTroncy 132–140 From LDA to neural models, different topic modeling approaches have been proposed in the literature. However, their suitability and performance is not easy to compare, particularly when the algorithms are being used in the wild on heterogeneous datasets. In this paper, we introduce ToModAPI (TOpic MOdeling API), a wrapper library to easily train, evaluate and infer using different topic modeling algorithms through a unified interface. The library is extensible and can be used in Python environments or through a Web API. 2020.nlposs-1.19 @@ -256,7 +256,7 @@ <fixed-case>WAFFLE</fixed-case>: A Graph for <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et Applied to <fixed-case>F</fixed-case>ree<fixed-case>F</fixed-case>orm Linguistic Exploration BerkEkmekci - BlakeHowald + BlakeHowald 147–157 The WordNet database of English (Fellbaum, 1998) is a key source of semantic information for research and development of natural language processing applications. As the sophistication of these applications increases with the use of large datasets, deep learning, and graph-based methods, so should the use of WordNet. To this end, we introduce WAFFLE: WordNet Applied to FreeForm Linguistic Exploration which makes WordNet available in an open source graph data structure. The WAFFLE graph relies on platform agnostic formats for robust interrogation and flexibility. Where existing implementations of WordNet offer dictionary-like lookup, single degree neighborhood operations, and path based similarity-scoring, the WAFFLE graph makes all nodes (semantic relation sets) and relationships queryable at scale, enabling local and global analysis of all relationships without the need for custom code. We demonstrate WAFFLE’s ease of use, visualization capabilities, and scalable efficiency with common queries, operations, and interactions. WAFFLE is available at github.com/TRSS-NLP/WAFFLE. 2020.nlposs-1.21 diff --git a/data/xml/2020.nlptea.xml b/data/xml/2020.nlptea.xml index 75c759f549..a419ebf7e0 100644 --- a/data/xml/2020.nlptea.xml +++ b/data/xml/2020.nlptea.xml @@ -100,7 +100,7 @@ YongchangCao LiangHe RobertRidley - XinyuDai + XinyuDai 49–56 This paper describes our proposed model for the Chinese Grammatical Error Diagnosis (CGED) task in NLPTEA2020. The goal of CGED is to use natural language processing techniques to automatically diagnose Chinese grammatical errors in sentences. To this end, we design and implement a CGED model named BERT with Score-feature Gates Error Diagnoser (BSGED), which is based on the BERT model, Bidirectional Long Short-Term Memory (BiLSTM) and conditional random field (CRF). In order to address the problem of losing partial-order relationships when embedding continuous feature items as with previous works, we propose a gating mechanism for integrating continuous feature items, which effectively retains the partial-order relationships between feature items. We perform LSTM processing on the encoding result of the BERT model, and further extract the sequence features. In the final test-set evaluation, we obtained the highest F1 score at the detection level and are among the top 3 F1 scores at the identification level. 2020.nlptea-1.7 @@ -127,7 +127,7 @@ MeiyuanFang KaiFu JipingWang - YangLiu + YangLiu JinHuang YitaoDuan 67–77 @@ -177,7 +177,7 @@ YingjieYan YangchaoHan RuiChao - HongyingZan + HongyingZan 97–101 Chinese Grammatical Error Diagnosis (CGED) is a natural language processing task for the NLPTEA6 workshop. The goal of this task is to automatically diagnose grammatical errors in Chinese sentences written by L2 learners. This paper proposes a RoBERTa-BiLSTM-CRF model to detect grammatical errors in sentences. Firstly, RoBERTa model is used to obtain word vectors. Secondly, word vectors are input into BiLSTM layer to learn context features. Last, CRF layer without hand-craft features work for processing the output by BiLSTM. The optimal global sequences are obtained according to state transition matrix of CRF and adjacent labels of training data. In experiments, the result of RoBERTa-CRF model and ERNIE-BiLSTM-CRF model are compared, and the impacts of parameters of the models and the testing datasets are analyzed. In terms of evaluation results, our recall score of RoBERTa-BiLSTM-CRF ranks fourth at the detection level. 2020.nlptea-1.13 @@ -188,7 +188,7 @@ <fixed-case>C</fixed-case>hinese Grammatical Errors Diagnosis System Based on <fixed-case>BERT</fixed-case> at <fixed-case>NLPTEA</fixed-case>-2020 <fixed-case>CGED</fixed-case> Shared Task - HongyingZan + HongyingZan YangchaoHan HaotianHuang YingjieYan diff --git a/data/xml/2020.nuse.xml b/data/xml/2020.nuse.xml index 84071bf9f9..93178fe07d 100644 --- a/data/xml/2020.nuse.xml +++ b/data/xml/2020.nuse.xml @@ -3,7 +3,7 @@ Proceedings of the First Joint Workshop on Narrative Understanding, Storylines, and Events - ClaireBonial + ClaireBonial TommasoCaselli SnigdhaChaturvedi ElizabethClark @@ -15,7 +15,7 @@ BenMiller TerukoMitamura NanyunPeng - JoelTetreault + JoelTetreault Association for Computational Linguistics
Online
July @@ -31,7 +31,7 @@ New Insights into Cross-Document Event Coreference: Systematic Comparison and a Simplified Approach AndresCremisini - MarkFinlayson + MarkFinlayson 1–10 Cross-Document Event Coreference (CDEC) is the task of finding coreference relationships between events in separate documents, most commonly assessed using the Event Coreference Bank+ corpus (ECB+). At least two different approaches have been proposed for CDEC on ECB+ that use only event triggers, and at least four have been proposed that use both triggers and entities. Comparing these approaches is complicated by variation in the systems’ use of gold vs. computed labels, as well as variation in the document clustering pre-processing step. We present an approach that matches or slightly beats state-of-the-art performance on CDEC over ECB+ with only event trigger annotations, but with a significantly simpler framework and much smaller feature set relative to prior work. This study allows us to directly compare with prior systems and draw conclusions about the effectiveness of various strategies. Additionally, we provide the first cross-validated evaluation on the ECB+ dataset; the first explicit evaluation of the pairwise event coreference classification step; and the first quantification of the effect of document clustering on system performance. The last in particular reveals that while document clustering is a crucial pre-processing step, improvements can at most provide for a 3 point improvement in CDEC performance, though this might be attributable to ease of document clustering on ECB+. 2020.nuse-1.1 @@ -44,7 +44,7 @@ Ming-ChangChiu TiantianFeng XiangRen - ShrikanthNarayanan + ShrikanthNarayanan 11–16 Deciding which scripts to turn into movies is a costly and time-consuming process for filmmakers. Thus, building a tool to aid script selection, an initial phase in movie production, can be very beneficial. Toward that goal, in this work, we present a method to evaluate the quality of a screenplay based on linguistic cues. We address this in a two-fold approach: (1) we define the task as predicting nominations of scripts at major film awards with the hypothesis that the peer-recognized scripts should have a greater chance to succeed. (2) based on industry opinions and narratology, we extract and integrate domain-specific features into common classification techniques. We face two challenges (1) scripts are much longer than other document datasets (2) nominated scripts are limited and thus difficult to collect. However, with narratology-inspired modeling and domain features, our approach offers clear improvements over strong baselines. Our work provides a new approach for future work in screenplay analysis. 2020.nuse-1.2 @@ -58,7 +58,7 @@ W. VictorYarlott MohammedAldawsari NaphtaliRishe - MarkFinlayson + MarkFinlayson 17–25 Identifying the discourse structure of documents is an important task in understanding written text. Building on prior work, we demonstrate an improved approach to automatically identifying the discourse function of paragraphs in news articles. We start with the hierarchical theory of news discourse developed by van Dijk (1988) which proposes how paragraphs function within news articles. This discourse information is a level intermediate between phrase- or sentence-sized discourse segments and document genre, characterizing how individual paragraphs convey information about the events in the storyline of the article. Specifically, the theory categorizes the relationships between narrated events and (1) the overall storyline (such as Main Events, Background, or Consequences) as well as (2) commentary (such as Verbal Reactions and Evaluations). We trained and tested a linear chain conditional random field (CRF) with new features to model van Dijk’s labels and compared it against several machine learning models presented in previous work. Our model significantly outperformed all baselines and prior approaches, achieving an average of 0.71 F1 score which represents a 31.5% improvement over the previously best-performing support vector machine model. 2020.nuse-1.3 @@ -69,7 +69,7 @@ Systematic Evaluation of a Framework for Unsupervised Emotion Recognition for Narrative Text SamiraZad - MarkFinlayson + MarkFinlayson 26–37 Identifying emotions as expressed in text (a.k.a. text emotion recognition) has received a lot of attention over the past decade. Narratives often involve a great deal of emotional expression, and so emotion recognition on narrative text is of great interest to computational approaches to narrative understanding. Prior work by Kim et al. 2010 was the work with the highest reported emotion detection performance, on a corpus of fairy tales texts. Close inspection of that work, however, revealed significant reproducibility problems, and we were unable to reimplement Kim’s approach as described. As a consequence, we implemented a framework inspired by Kim’s approach, where we carefully evaluated the major design choices. We identify the highest-performing combination, which outperforms Kim’s reported performance by 7.6 F_1 points on average. Close inspection of the annotated data revealed numerous missing and incorrect emotion terms in the relevant lexicon, WordNetAffect (WNA; Strapparava and Valitutti, 2004), which allowed us to augment it in a useful way. More generally, this showed that numerous clearly emotive words and phrases are missing from WNA, which suggests that effort invested in augmenting or refining emotion ontologies could be useful for improving the performance of emotion recognition systems. We release our code and data to definitely enable future reproducibility of this work. 2020.nuse-1.4 @@ -95,13 +95,13 @@ MaartenSap ElizabethClark KatharinaReinecke - Noah A.Smith + Noah A.Smith 46–54 Current story writing or story editing systems rely on human judgments of story quality for evaluating performance, often ignoring the subjectivity in ratings. We analyze the effect of author and reader characteristics and story writing setup on the quality of stories in a short storytelling task. To study this effect, we create and release STORIESINTHEWILD, containing 1,630 stories collected on a volunteer-based crowdsourcing platform. Each story is rated by three different readers, and comes paired with the author’s and reader’s age, gender, and personality. Our findings show significant effects of authors’ and readers’ identities, as well as writing setup, on story writing and ratings. Notably, compared to younger readers, readers age 45 and older consider stories significantly less creative and less entertaining. Readers also prefer stories written all at once, rather than in chunks, finding them more coherent and creative. We also observe linguistic differences associated with authors’ demographics (e.g., older authors wrote more vivid and emotional stories). Our findings suggest that reader and writer demographics, as well as writing setup, should be accounted for in story writing evaluations. 2020.nuse-1.6 2020.nuse-1.6.Software.zip - 10.18653/v1/2020.nuse-1.6 2020.nuse-1.6.Dataset.pdf + 10.18653/v1/2020.nuse-1.6 @@ -141,7 +141,7 @@ Exploring aspects of similarity between spoken personal narratives by disentangling them into narrative clause types BelenSaldias - DebRoy + DebRoy 78–86 Sharing personal narratives is a fundamental aspect of human social behavior as it helps share our life experiences. We can tell stories and rely on our background to understand their context, similarities, and differences. A substantial effort has been made towards developing storytelling machines or inferring characters’ features. However, we don’t usually find models that compare narratives. This task is remarkably challenging for machines since they, as sometimes we do, lack an understanding of what similarity means. To address this challenge, we first introduce a corpus of real-world spoken personal narratives comprising 10,296 narrative clauses from 594 video transcripts. Second, we ask non-narrative experts to annotate those clauses under Labov’s sociolinguistic model of personal narratives (i.e., action, orientation, and evaluation clause types) and train a classifier that reaches 84.7% F-score for the highest-agreed clauses. Finally, we match stories and explore whether people implicitly rely on Labov’s framework to compare narratives. We show that actions followed by the narrator’s evaluation of these are the aspects non-experts consider the most. Our approach is intended to help inform machine learning methods aimed at studying or representing personal narratives. 2020.nuse-1.10 @@ -155,8 +155,8 @@ NitinRamrakhiyani Avinash KumarSingh SangameshwarPatil - GirishPalshikar - PushpakBhattacharyya + GirishPalshikar + PushpakBhattacharyya VasudevaVarma 87–96 In this paper, we propose the use of Message Sequence Charts (MSC) as a representation for visualizing narrative text in Hindi. An MSC is a formal representation allowing the depiction of actors and interactions among these actors in a scenario, apart from supporting a rich framework for formal inference. We propose an approach to extract MSC actors and interactions from a Hindi narrative. As a part of the approach, we enrich an existing event annotation scheme where we provide guidelines for annotation of the mood of events (realis vs irrealis) and guidelines for annotation of event arguments. We report performance on multiple evaluation criteria by experimenting with Hindi narratives from Indian History. Though Hindi is the fourth most-spoken first language in the world, from the NLP perspective it has comparatively lesser resources than English. Moreover, there is relatively less work in the context of event processing in Hindi. Hence, we believe that this work is among the initial works for Hindi event processing. diff --git a/data/xml/2020.onion.xml b/data/xml/2020.onion.xml index 25ee76e27d..26719be39b 100644 --- a/data/xml/2020.onion.xml +++ b/data/xml/2020.onion.xml @@ -32,7 +32,7 @@ Analysis of Body Behaviours in Human-Human and Human-Robot Interactions TaigaMori - KristiinaJokinen + KristiinaJokinen YasuharuDen 7–14 We conducted preliminary comparison of human-robot (HR) interaction with human-human (HH) interaction conducted in English and in Japanese. As the result, body gestures increased in HR, while hand and head gestures decreased in HR. Concerning hand gesture, they were composed of more diverse and complex forms, trajectories and functions in HH than in HR. Moreover, English speakers produced 6 times more hand gestures than Japanese speakers in HH. Regarding head gesture, even though there was no difference in the frequency of head gestures between English speakers and Japanese speakers in HH, Japanese speakers produced slightly more nodding during the robot’s speaking than English speakers in HR. Furthermore, positions of nod were different depending on the language. Concerning body gesture, participants produced body gestures mostly to regulate appropriate distance with the robot in HR. Additionally, English speakers produced slightly more body gestures than Japanese speakers. diff --git a/data/xml/2020.osact.xml b/data/xml/2020.osact.xml index 03bc106be5..52cd8a4eec 100644 --- a/data/xml/2020.osact.xml +++ b/data/xml/2020.osact.xml @@ -125,7 +125,7 @@ <fixed-case>ASU</fixed-case>_<fixed-case>OPTO</fixed-case> at <fixed-case>OSACT</fixed-case>4 - Offensive Language Detection for <fixed-case>A</fixed-case>rabic text AmrKeleg - Samhaa R.El-Beltagy + Samhaa R.El-Beltagy MahmoudKhalil 66–70 In the past years, toxic comments and offensive speech are polluting the internet and manual inspection of these comments is becoming a tiresome task to manage. Having a machine learning based model that is able to filter offensive Arabic content is of high need nowadays. In this paper, we describe the model that was submitted to the Shared Task on Offensive Language Detection that is organized by (The 4th Workshop on Open-Source Arabic Corpora and Processing Tools). Our model makes use transformer based model (BERT) to detect offensive content. We came in the fourth place in subtask A (detecting Offensive Speech) and in the third place in subtask B (detecting Hate Speech). @@ -179,7 +179,7 @@ Combining Character and Word Embeddings for the Detection of Offensive Language in <fixed-case>A</fixed-case>rabic Abdullah I.Alharbi - MarkLee + MarkLee 91–96 Twitter and other social media platforms offer users the chance to share their ideas via short posts. While the easy exchange of ideas has value, these microblogs can be leveraged by people who want to share hatred. and such individuals can share negative views about an individual, race, or group with millions of people at the click of a button. There is thus an urgent need to establish a method that can automatically identify hate speech and offensive language. To contribute to this development, during the OSACT4 workshop, a shared task was undertaken to detect offensive language in Arabic. A key challenge was the uniqueness of the language used on social media, prompting the out-of-vocabulary (OOV) problem. In addition, the use of different dialects in Arabic exacerbates this problem. To deal with the issues associated with OOV, we generated a character-level embeddings model, which was trained on a massive data collected carefully. This level of embeddings can work effectively in resolving the problem of OOV words through its ability to learn the vectors of character n-grams or parts of words. The proposed systems were ranked 7th and 8th for Subtasks A and B, respectively. 2020.osact-1.15 diff --git a/data/xml/2020.paclic.xml b/data/xml/2020.paclic.xml index 8ab54ff56f..cdf177430a 100644 --- a/data/xml/2020.paclic.xml +++ b/data/xml/2020.paclic.xml @@ -3,7 +3,7 @@ Proceedings of the 34th Pacific Asia Conference on Language, Information and Computation - Minh LeNguyen + Minh LeNguyen Mai ChiLuong SanghounSong Association for Computational Linguistics @@ -31,7 +31,7 @@ Improving Sequence Tagging for <fixed-case>V</fixed-case>ietnamese Text using Transformer-based Neural Models The VietBui Thi OanhTran - PhuongLe-Hong + PhuongLe-Hong 13–20 2020.paclic-1.2 bui-etal-2020-improving @@ -58,7 +58,7 @@ Exploiting weak-supervision for classifying Non-Sentential Utterances in <fixed-case>M</fixed-case>andarin Conversations Xin-YiChen - LaurentPrévot + LaurentPrévot 42–50 2020.paclic-1.5 chen-prevot-2020-exploiting @@ -73,7 +73,7 @@ Metaphoricity Rating of <fixed-case>C</fixed-case>hinese <fixed-case>KIND</fixed-case> Metaphor Expressions - Siaw-FongChung + Siaw-FongChung Meng-HsienShih Yu-HsiangShen Wei-TingTseng @@ -129,7 +129,7 @@ From Sense to Action: A Word-Action Disambiguation Task in <fixed-case>NLP</fixed-case> - Shu-KaiHsieh + Shu-KaiHsieh Yu-HsiangTseng Chiung-YuChiang RichardLian @@ -196,7 +196,7 @@ Simple is Better! Lightweight Data Augmentation for Low Resource Slot Filling and Intent Classification SamuelLouvan - BernardoMagnini + BernardoMagnini 167–177 2020.paclic-1.20 louvan-magnini-2020-simple @@ -236,7 +236,7 @@ Iterative Multilingual Neural Machine Translation for Less-Common and Zero-Resource Language Pairs Minh ThuanNguyen - Phuong ThaiNguyen + Phuong ThaiNguyen Van VinhNguyen Minh Cong NguyenHoang 207–215 @@ -251,12 +251,12 @@ CheolhunHeo YongbinJeong YoosungJeong - YounggyunHahm + YounggyunHahm TaehwanOh HyonsuChoe SeokwonPark Jin-DongKim - Key-SunChoi + Key-SunChoi 216–224 2020.paclic-1.25 noh-etal-2020-enhancing @@ -346,7 +346,7 @@ A corpus-based comparative study of light verbs in three <fixed-case>C</fixed-case>hinese speech communities - Benjamin KTsou + Benjamin KTsou Ka-FaiYip 302–311 2020.paclic-1.35 @@ -357,7 +357,7 @@ MingyuWan BaixiXing QiSu - PengyuanLiu + PengyuanLiu Chu-RenHuang 312–317 2020.paclic-1.36 @@ -410,7 +410,7 @@ Imbalanced <fixed-case>C</fixed-case>hinese Multi-label Text Classification Based on Alternating Attention HongliangBi HanHu - PengyuanLiu + PengyuanLiu 368–374 2020.paclic-1.42 bi-etal-2020-imbalanced @@ -455,7 +455,7 @@ Exploring Discourse on Same-sex Marriage in <fixed-case>T</fixed-case>aiwan: A Case Study of Near-Synonym of <fixed-case>HOMOSEXUAL</fixed-case> in Opposing Stances Han-TangHung - Shu-KaiHsieh + Shu-KaiHsieh 411–419 2020.paclic-1.47 hung-hsieh-2020-exploring @@ -492,7 +492,7 @@ Identifying Authors Based on Stylometric measures of <fixed-case>V</fixed-case>ietnamese texts Ho NgocLam Vo DiepNhu - DinhDien + DinhDien Nguyen TuyetNhung 447–452 2020.paclic-1.51 @@ -521,7 +521,7 @@ Rajesh KumarMundotiya VikrantKumar ArpitMehta - Anil KumarSingh + Anil KumarSingh 471–477 2020.paclic-1.54 mundotiya-etal-2020-attention @@ -564,7 +564,7 @@ Redefining Verbal Nouns in <fixed-case>J</fixed-case>apanese: From the Perspective of Polycategoriality - David Y.Oshima + David Y.Oshima MidoriHayashi 514–522 2020.paclic-1.59 @@ -642,9 +642,9 @@ Bilingual Multi-word Expressions, Multiple-correspondence, and their cultivation from parallel patents: The <fixed-case>C</fixed-case>hinese-<fixed-case>E</fixed-case>nglish case - Benjamin K.Tsou + Benjamin K.Tsou Ka PoChow - JohnLee + JohnLee Ka-FaiYip YaxuanJi KevinWu diff --git a/data/xml/2020.pam.xml b/data/xml/2020.pam.xml index d4a7ac1188..c710cc7228 100644 --- a/data/xml/2020.pam.xml +++ b/data/xml/2020.pam.xml @@ -119,7 +119,7 @@ A toy distributional model for fuzzy generalised quantifiers - MehrnooshSadrzadeh + MehrnooshSadrzadeh GijsWijnholds 86–94 Recent work in compositional distributional semantics showed how bialgebras model generalised quantifiers of natural language. That technique requires working with vector space over power sets of bases, and therefore is computationally costly. It is possible to overcome the computational hurdles by working with fuzzy generalised quantifiers. In this paper, we show that the compositional notion of semantics of natural language, guided by a grammar, extends from a binary to a many valued setting and instantiate in it the fuzzy computations. We import vector representations of words and predicates, learnt from large scale compositional distributional semantics, interpret them as fuzzy sets, and analyse their performance on a toy inference dataset. @@ -131,7 +131,7 @@ SabaAnwar ArtemShelmanov AlexanderPanchenko - ChrisBiemann + ChrisBiemann 95–103 Semantic frames are formal linguistic structures describing situations/actions/events, e.g. Commercial transfer of goods. Each frame provides a set of roles corresponding to the situation participants, e.g. Buyer and Goods, and lexical units (LUs) – words and phrases that can evoke this particular frame in texts, e.g. Sell. The scarcity of annotated resources hinders wider adoption of frame semantics across languages and domains. We investigate a simple yet effective method, lexical substitution with word representation models, to automatically expand a small set of frame-annotated sentences with new words for their respective roles and LUs. We evaluate the expansion quality using FrameNet. Contextualized models demonstrate overall superior performance compared to the non-contextualized ones on roles. However, the latter show comparable performance on the task of LU expansion. 2020.pam-1.13 @@ -146,7 +146,7 @@ ShijieZhao ShawnLin WenxingLiu - DerryWijaya + DerryWijaya 104–108 At the intersection between computer vision and natural language processing, there has been recent progress on two natural language generation tasks: Dense Image Captioning and Referring Expression Generation for objects in complex scenes. The former aims to provide a caption for a specified object in a complex scene for the benefit of an interlocutor who may not be able to see it. The latter aims to produce a referring expression that will serve to identify a given object in a scene that the interlocutor can see. The two tasks are designed for different assumptions about the common ground between the interlocutors, and serve very different purposes, although they both associate a linguistic description with an object in a complex scene. Despite these fundamental differences, the distinction between these two tasks is sometimes overlooked. Here, we undertake a side-by-side comparison between image captioning and reference game human datasets and show that they differ systematically with respect to informativity. We hope that an understanding of the systematic differences among these human datasets will ultimately allow them to be leveraged more effectively in the associated engineering tasks. 2020.pam-1.14 @@ -174,7 +174,7 @@ Word Sense Distance in Human Similarity Judgements and Contextualised Word Embeddings JanoschHaber - MassimoPoesio + MassimoPoesio 128–145 Homonymy is often used to showcase one of the advantages of context-sensitive word embedding techniques such as ELMo and BERT. In this paper we want to shift the focus to the related but less exhaustively explored phenomenon of polysemy, where a word expresses various distinct but related senses in different contexts. Specifically, we aim to i) investigate a recent model of polyseme sense clustering proposed by Ortega-Andres & Vicente (2019) through analysing empirical evidence of word sense grouping in human similarity judgements, ii) extend the evaluation of context-sensitive word embedding systems by examining whether they encode differences in word sense similarity and iii) compare the word sense similarities of both methods to assess their correlation and gain some intuition as to how well contextualised word embeddings could be used as surrogate word sense similarity judgements in linguistic experiments. 2020.pam-1.17 diff --git a/data/xml/2020.parlaclarin.xml b/data/xml/2020.parlaclarin.xml index aa7274c2bc..d4c93bed44 100644 --- a/data/xml/2020.parlaclarin.xml +++ b/data/xml/2020.parlaclarin.xml @@ -49,7 +49,7 @@ Compiling <fixed-case>C</fixed-case>zech Parliamentary Stenographic Protocols into a Corpus - BarboraHladka + BarboraHladka MatyášKopp PavelStraňák 18–22 @@ -72,7 +72,7 @@ The si<fixed-case>P</fixed-case>arl corpus of <fixed-case>S</fixed-case>lovene parliamentary proceedings AndrejPancur - TomažErjavec + TomažErjavec 28–34 The paper describes the process of acquisition, up-translation, encoding, annotation, and distribution of siParl, a collection of the parliamentary debates from the Assembly of the Republic of Slovenia from 1990–2018, covering the period from just before Slovenia became an independent country in 1991, and almost up to the present. The entire corpus, comprising over 8 thousand sessions, 1 million speeches and 200 million words was uniformly encoded in accordance with the TEI-based Parla-CLARIN schema for encoding corpora of parliamentary debates, and contains extensive meta-data about the speakers, a typology of sessions etc. and structural and editorial annotations. The corpus was also part-of-speech tagged and lemmatised using state-of-the-art tools. The corpus is maintained on GitHub with its major versions archived in the CLARIN.SI repository and is available for linguistic analysis in the scope of the on-line CLARIN.SI concordancers, thus offering an invaluable resource for scholars studying Slovenian political history. 2020.parlaclarin-1.6 diff --git a/data/xml/2020.peoples.xml b/data/xml/2020.peoples.xml index 732836527c..985d5c46de 100644 --- a/data/xml/2020.peoples.xml +++ b/data/xml/2020.peoples.xml @@ -5,7 +5,7 @@ Proceedings of the Third Workshop on Computational Modeling of People's Opinions, Personality, and Emotion's in Social Media MalvinaNissim VivianaPatti - BarbaraPlank + BarbaraPlank EsinDurmus Association for Computational Linguistics
Barcelona, Spain (Online)
@@ -48,7 +48,7 @@ Persuasiveness of News Editorials depending on Ideology and Personality RoxanneEl Baff - KhalidAl Khatib + KhalidAl Khatib BennoStein HenningWachsmuth 29–40 @@ -89,7 +89,7 @@ Social Media Unrest Prediction during the <fixed-case>COVID</fixed-case>-19 Pandemic: Neural Implicit Motive Pattern Recognition as Psychometric Signs of Severe Crises DirkJohannßen - ChrisBiemann + ChrisBiemann 74–86 The COVID-19 pandemic has caused international social tension and unrest. Besides the crisis itself, there are growing signs of rising conflict potential of societies around the world. Indicators of global mood changes are hard to detect and direct questionnaires suffer from social desirability biases. However, so-called implicit methods can reveal humans intrinsic desires from e.g. social media texts. We present psychologically validated social unrest predictors and replicate scalable and automated predictions, setting a new state of the art on a recent German shared task dataset. We employ this model to investigate a change of language towards social unrest during the COVID-19 pandemic by comparing established psychological predictors on samples of tweets from spring 2019 with spring 2020. The results show a significant increase of the conflict indicating psychometrics. With this work, we demonstrate the applicability of automated NLP-based approaches to quantitative psychological research. 2020.peoples-1.8 @@ -126,7 +126,7 @@ Experiencers, Stimuli, or Targets: Which Semantic Roles Enable Machine Learning to Infer the Emotions? - Laura Ana MariaOberländer + Laura Ana MariaOberländer KevinReich RomanKlinger 119–128 @@ -136,10 +136,10 @@ Learning Emotion from 100 Observations: Unexpected Robustness of Deep Learning under Strong Data Limitations - SvenBuechel + SvenBuechel JoãoSedoc - H. AndrewSchwartz - LyleUngar + H. AndrewSchwartz + LyleUngar 129–139 One of the major downsides of Deep Learning is its supposed need for vast amounts of training data. As such, these techniques appear ill-suited for NLP areas where annotated data is limited, such as less-resourced languages or emotion analysis, with its many nuanced and hard-to-acquire annotation formats. We conduct a questionnaire study indicating that indeed the vast majority of researchers in emotion analysis deems neural models inferior to traditional machine learning when training data is limited. In stark contrast to those survey results, we provide empirical evidence for English, Polish, and Portuguese that commonly used neural architectures can be trained on surprisingly few observations, outperforming n-gram based ridge regression on only 100 data points. Our analysis suggests that high-quality, pre-trained word embeddings are a main factor for achieving those results. 2020.peoples-1.13 @@ -160,7 +160,7 @@ NikolaLjubešić IliaMarkov DarjaFišer - WalterDaelemans + WalterDaelemans 153–157 In this paper, we present emotion lexicons of Croatian, Dutch and Slovene, based on manually corrected automatic translations of the English NRC Emotion lexicon. We evaluate the impact of the translation changes by measuring the change in supervised classification results of socially unacceptable utterances when lexicon information is used for feature construction. We further showcase the usage of the lexicons by calculating the difference in emotion distributions in texts containing and not containing socially unacceptable discourse, comparing them across four languages (English, Croatian, Dutch, Slovene) and two topics (migrants and LGBT). We show significant and consistent improvements in automatic classification across all languages and topics, as well as consistent (and expected) emotion distributions across all languages and topics, proving for the manually corrected lexicons to be a useful addition to the severely lacking area of emotion lexicons, the crucial resource for emotive analysis of text. 2020.peoples-1.15 diff --git a/data/xml/2020.privatenlp.xml b/data/xml/2020.privatenlp.xml index 5570d5fc28..50dd1fb66f 100644 --- a/data/xml/2020.privatenlp.xml +++ b/data/xml/2020.privatenlp.xml @@ -5,7 +5,7 @@ Proceedings of the Second Workshop on Privacy in NLP OluwaseyiFeyisetan SepidehGhanavati - ShervinMalmasi + ShervinMalmasi PatriciaThaine Association for Computational Linguistics
Online
diff --git a/data/xml/2020.rail.xml b/data/xml/2020.rail.xml index a404a2fcac..ced8daab59 100644 --- a/data/xml/2020.rail.xml +++ b/data/xml/2020.rail.xml @@ -7,7 +7,7 @@ PhathutshedzoRamukhadi MmasibidiSetaka ValenciaWagner - Mennovan Zaanen + Mennovan Zaanen European Language Resources Association (ELRA)
Marseille, France
May @@ -32,7 +32,7 @@ Usability and Accessibility of <fixed-case>B</fixed-case>antu Language Dictionaries in the Digital Age: Mobile Access in an Open Environment ThomasEckart - SonjaBosch + SonjaBosch UweQuasthoff ErikKörner DirkGoldhahn @@ -75,9 +75,9 @@ Comparing Neural Network Parsers for a Less-resourced and Morphologically-rich Language: <fixed-case>A</fixed-case>mharic Dependency Parser - Binyam EphremSeyoum + Binyam EphremSeyoum YusukeMiyao - Baye YimamMekonnen + Baye YimamMekonnen 25–30 In this paper, we compare four state-of-the-art neural network dependency parsers for the Semitic language Amharic. As Amharic is a morphologically-rich and less-resourced language, the out-of-vocabulary (OOV) problem will be higher when we develop data-driven models. This fact limits researchers to develop neural network parsers because the neural network requires large quantities of data to train a model. We empirically evaluate neural network parsers when a small Amharic treebank is used for training. Based on our experiment, we obtain an 83.79 LAS score using the UDPipe system. Better accuracy is achieved when the neural parsing system uses external resources like word embedding. Using such resources, the LAS score for UDPipe improves to 85.26. Our experiment shows that the neural networks can learn dependency relations better from limited data while segmentation and POS tagging require much data. 2020.rail-1.5 @@ -105,7 +105,7 @@ Navigating Challenges of Multilingual Resource Development for Under-Resourced Languages: The Case of the <fixed-case>A</fixed-case>frican <fixed-case>W</fixed-case>ordnet Project MarissaGriesel - SonjaBosch + SonjaBosch 45–50 Creating a new wordnet is by no means a trivial task and when the target language is under-resourced as is the case for the languages currently included in the multilingual African Wordnet (AfWN), developers need to rely heavily on human expertise. During the different phases of development of the AfWN, we incorporated various methods of fast-tracking to ease the tedious and time-consuming work. Some methods have proven effective while others seem to have little positive impact on the work rate. As in the case of many other under-resourced languages, the expand model was implemented throughout, thus depending on English source data such as the English Princeton Wordnet (PWN) which is then translated into the target language with the assumption that the new language shares an underlying structure with the PWN. The paper discusses some problems encountered along the way and points out various possibilities of (semi) automated quality assurance measures and further refinement of the AfWN to ensure accelerated growth. In this paper we aim to highlight some of the lessons learnt from hands-on experience in order to facilitate similar projects, in particular for languages from other African countries. 2020.rail-1.8 diff --git a/data/xml/2020.rdsm.xml b/data/xml/2020.rdsm.xml index a415e3ca32..cd891136d3 100644 --- a/data/xml/2020.rdsm.xml +++ b/data/xml/2020.rdsm.xml @@ -49,8 +49,8 @@ Revisiting Rumour Stance Classification: Dealing with Imbalanced Data - YueLi - CarolinaScarton + YueLi + CarolinaScarton 38–44 Correctly classifying stances of replies can be significantly helpful for the automatic detection and classification of online rumours. One major challenge is that there are considerably more non-relevant replies (comments) than informative ones (supports and denies), making the task highly imbalanced. In this paper we revisit the task of rumour stance classification, aiming to improve the performance over the informative minority classes. We experiment with traditional methods for imbalanced data treatment with feature- and BERT-based classifiers. Our models outperform all systems in RumourEval 2017 shared task and rank second in RumourEval 2019. 2020.rdsm-1.4 @@ -79,7 +79,7 @@ Fake or Real? A Study of <fixed-case>A</fixed-case>rabic Satirical Fake News HadeelSaadany - ConstantinOrasan + ConstantinOrasan EmadMohamed 70–80 One very common type of fake news is satire which comes in a form of a news website or an online platform that parodies reputable real news agencies to create a sarcastic version of reality. This type of fake news is often disseminated by individuals on their online platforms as it has a much stronger effect in delivering criticism than through a straightforward message. However, when the satirical text is disseminated via social media without mention of its source, it can be mistaken for real news. This study conducts several exploratory analyses to identify the linguistic properties of Arabic fake news with satirical content. It shows that although it parodies real news, Arabic satirical news has distinguishing features on the lexico-grammatical level. We exploit these features to build a number of machine learning models capable of identifying satirical fake news with an accuracy of up to 98.6%. The study introduces a new dataset (3185 articles) scraped from two Arabic satirical news websites (‘Al-Hudood’ and ‘Al-Ahram Al-Mexici’) which consists of fake news. The real news dataset consists of 3710 articles collected from three official news sites: the ‘BBC-Arabic’, the ‘CNN-Arabic’ and ‘Al-Jazeera news’. Both datasets are concerned with political issues related to the Middle East. diff --git a/data/xml/2020.readi.xml b/data/xml/2020.readi.xml index cc7f12a679..677e27ac52 100644 --- a/data/xml/2020.readi.xml +++ b/data/xml/2020.readi.xml @@ -3,7 +3,7 @@ Proceedings of the 1st Workshop on Tools and Resources to Empower People with REAding DIfficulties (READI) - NúriaGala + NúriaGala RodrigoWilkens European Language Resources Association
Marseille, France
@@ -19,7 +19,7 @@ Disambiguating Confusion Sets as an Aid for Dyslexic Spelling Steinunn RutFriðriksdóttir - Anton KarlIngason + Anton KarlIngason 1–5 Spell checkers and other proofreading software are crucial tools for people with dyslexia and other reading disabilities. Most spell checkers automatically detect spelling mistakes by looking up individual words and seeing if they exist in the vocabulary. However, one of the biggest challenges of automatic spelling correction is how to deal with real-word errors, i.e. spelling mistakes which lead to a real but unintended word, such as when then is written in place of than. These errors account for 20% of all spelling mistakes made by people with dyslexia. As both words exist in the vocabulary, a simple dictionary lookup will not detect the mistake. The only way to disambiguate which word was actually intended is to look at the context in which the word appears. This problem is particularly apparent in languages with rich morphology where there is often minimal orthographic difference between grammatical items. In this paper, we present our novel confusion set corpus for Icelandic and discuss how it could be used for context-sensitive spelling correction. We have collected word pairs from seven different categories, chosen for their homophonous properties, along with sentence examples and frequency information from said pairs. We present a small-scale machine learning experiment using a decision tree binary classification which results range from 73% to 86% average accuracy with 10-fold cross validation. While not intended as a finalized result, the method shows potential and will be improved in future research. 2020.readi-1.1 @@ -49,9 +49,9 @@ Automatically Assess Children’s Reading Skills OrnellaMich - NadiaMana + NadiaMana RobertoGretter - MarcoMatassoni + MarcoMatassoni DanieleFalavigna 20–26 Assessing reading skills is an important task teachers have to perform at the beginning of a new scholastic year to evaluate the starting level of the class and properly plan next learning activities. Digital tools based on automatic speech recognition (ASR) may be really useful to support teachers in this task, currently very time consuming and prone to human errors. This paper presents a web application for automatically assessing fluency and accuracy of oral reading in children attending Italian primary and lower secondary schools. Our system, based on ASR technology, implements the Cornoldi’s MT battery, which is a well-known Italian test to assess reading skills. The front-end of the system has been designed following the participatory design approach by involving end users from the beginning of the creation process. Teachers may use our system to both test student’s reading skills and monitor their performance over time. In fact, the system offers an effective graphical visualization of the assessment results for both individual students and entire class. The paper also presents the results of a pilot study to evaluate the system usability with teachers. @@ -99,7 +99,7 @@ Visualizing Facets of Text Complexity across Registers MarinaSantini - ArneJonsson + ArneJonsson EvelinaRennes 49–56 In this paper, we propose visualizing results of a corpus-based study on text complexity using radar charts. We argue that the added value of this type of visualisation is the polygonal shape that provides an intuitive grasp of text complexity similarities across the registers of a corpus. The results that we visualize come from a study where we explored whether it is possible to automatically single out different facets of text complexity across the registers of a Swedish corpus. To this end, we used factor analysis as applied in Biber’s Multi-Dimensional Analysis framework. The visualization of text complexity facets with radar charts indicates that there is correspondence between linguistic similarity and similarity of shape across registers. @@ -169,7 +169,7 @@ Coreference-Based Text Simplification RodrigoWilkens BrunoOberle - AmaliaTodirascu + AmaliaTodirascu 93–100 Text simplification aims at adapting documents to make them easier to read by a given audience. Usually, simplification systems consider only lexical and syntactic levels, and, moreover, are often evaluated at the sentence level. Thus, studies on the impact of simplification in text cohesion are lacking. Some works add coreference resolution in their pipeline to address this issue. In this paper, we move forward in this direction and present a rule-based system for automatic text simplification, aiming at adapting French texts for dyslexic children. The architecture of our system takes into account not only lexical and syntactic but also discourse information, based on coreference chains. Our system has been manually evaluated in terms of grammaticality and cohesion. We have also built and used an evaluation corpus containing multiple simplification references for each sentence. It has been annotated by experts following a set of simplification guidelines, and can be used to run automatic evaluation of other simplification systems. Both the system and the evaluation corpus are freely available. 2020.readi-1.14 diff --git a/data/xml/2020.repl4nlp.xml b/data/xml/2020.repl4nlp.xml index d9094b4459..d2189c9b54 100644 --- a/data/xml/2020.repl4nlp.xml +++ b/data/xml/2020.repl4nlp.xml @@ -25,7 +25,7 @@ Zero-Resource Cross-Domain Named Entity Recognition ZihanLiu - Genta IndraWinata + Genta IndraWinata PascaleFung 1–6 Existing models for cross-domain named entity recognition (NER) rely on numerous unlabeled corpus or labeled NER training data in target domains. However, collecting data for low-resource target domains is not only expensive but also time-consuming. Hence, we propose a cross-domain NER model that does not use any external resources. We first introduce a Multi-Task Learning (MTL) by adding a new objective function to detect whether tokens are named entities or not. We then introduce a framework called Mixture of Entity Experts (MoEE) to improve the robustness for zero-resource domain adaptation. Finally, experimental results show that our model outperforms strong unsupervised cross-domain sequence labeling models, and the performance of our model is close to that of the state-of-the-art model which leverages extensive resources. @@ -37,7 +37,7 @@ Encodings of Source Syntax: Similarities in <fixed-case>NMT</fixed-case> Representations Across Target Languages Tyler A.Chang - AnnaRafferty + AnnaRafferty 7–16 We train neural machine translation (NMT) models from English to six target languages, using NMT encoder representations to predict ancestor constituent labels of source language words. We find that NMT encoders learn similar source syntax regardless of NMT target language, relying on explicit morphosyntactic cues to extract syntactic features from source sentences. Furthermore, the NMT encoders outperform RNNs trained directly on several of the constituent label prediction tasks, suggesting that NMT encoder representations can be used effectively for natural language tasks involving syntax. However, both the NMT encoders and the directly-trained RNNs learn substantially different syntactic information from a probabilistic context-free grammar (PCFG) parser. Despite lower overall accuracy scores, the PCFG often performs well on sentences for which the RNN-based models perform poorly, suggesting that RNN architectures are constrained in the types of syntax they can learn. 2020.repl4nlp-1.2 @@ -61,7 +61,7 @@ SiddharthBhat AlokDebnath SouvikBanerjee - ManishShrivastava + ManishShrivastava 24–33 In this paper, we provide an alternate perspective on word representations, by reinterpreting the dimensions of the vector space of a word embedding as a collection of features. In this reinterpretation, every component of the word vector is normalized against all the word vectors in the vocabulary. This idea now allows us to view each vector as an n-tuple (akin to a fuzzy set), where n is the dimensionality of the word representation and each element represents the probability of the word possessing a feature. Indeed, this representation enables the use fuzzy set theoretic operations, such as union, intersection and difference. Unlike previous attempts, we show that this representation of words provides a notion of similarity which is inherently asymmetric and hence closer to human similarity judgements. We compare the performance of this representation with various benchmarks, and explore some of the unique properties including function word detection, detection of polysemous words, and some insight into the interpretability provided by set theoretic operations. 2020.repl4nlp-1.4 @@ -165,7 +165,7 @@ A Metric Learning Approach to Misogyny Categorization Juan ManuelCoria SaharGhannay - SophieRosset + SophieRosset HervéBredin 89–94 The task of automatic misogyny identification and categorization has not received as much attention as other natural language tasks have, even though it is crucial for identifying hate speech in social Internet interactions. In this work, we address this sentence classification task from a representation learning perspective, using both a bidirectional LSTM and BERT optimized with the following metric learning loss functions: contrastive loss, triplet loss, center loss, congenerous cosine loss and additive angular margin loss. We set new state-of-the-art for the task with our fine-tuned BERT, whose sentence embeddings can be compared with a simple cosine distance, and we release all our code as open source for easy reproducibility. Moreover, we find that almost every loss function performs equally well in this setting, matching the regular cross entropy loss. @@ -261,7 +261,7 @@ A Cross-Task Analysis of Text Span Representations ShubhamToshniwal - HaoyueShi + HaoyueShi BowenShi LingyuGao KarenLivescu diff --git a/data/xml/2020.rocling.xml b/data/xml/2020.rocling.xml index 1c504cf607..1e7c533902 100644 --- a/data/xml/2020.rocling.xml +++ b/data/xml/2020.rocling.xml @@ -40,7 +40,7 @@ Improving Phrase Translation Based on Sentence Alignment of <fixed-case>C</fixed-case>hinese-<fixed-case>E</fixed-case>nglish Parallel Corpus Yi-JyunChen Ching-Yu HelenYang - Jason S.Chang + Jason S.Chang 6–7 2020.rocling-1.3 chen-etal-2020-improving-phrase @@ -48,7 +48,7 @@ Mitigating Impacts of Word Segmentation Errors on Collocation Extraction in <fixed-case>C</fixed-case>hinese YongfuLiao - Shu-KaiHsieh + Shu-KaiHsieh 8–20 2020.rocling-1.4 liao-hsieh-2020-mitigating @@ -142,7 +142,7 @@ The preliminary study of robust speech feature extraction based on maximizing the accuracy of states in deep acoustic models Li-chiaChang - Jeih-weihHung + Jeih-weihHung 118–119 2020.rocling-1.14 chang-hung-2020-preliminary @@ -150,7 +150,7 @@ Multi-view Attention-based Speech Enhancement Model for Noise-robust Automatic Speech Recognition Fu-AnChao - Jeih-weihHung + Jeih-weihHung BerlinChen 120–135 2020.rocling-1.15 @@ -176,7 +176,7 @@ Lectal Variation of the Two <fixed-case>C</fixed-case>hinese Causative Auxiliaries Cing-FangShih Mao-ChangKu - Shu-KaiHsieh + Shu-KaiHsieh 163–177 2020.rocling-1.18 shih-etal-2020-lectal @@ -184,7 +184,7 @@ The Semantic Features and Cognitive Concepts of Mang2 ‘Busy’: A Corpus-Based Study Hsin-RouLin - Siaw-FongChung + Siaw-FongChung 178–192 2020.rocling-1.19 lin-chung-2020-semantic @@ -192,7 +192,7 @@ An Analysis of Multimodal Document Intent in <fixed-case>I</fixed-case>nstagram Posts Ying-YuChen - Shu-KaiHsieh + Shu-KaiHsieh 193–207 2020.rocling-1.20 chen-hsieh-2020-analysis @@ -202,7 +202,7 @@ Wen-jetWang Chia-JungChen Chien-yuLai - Chia-mingLee + Chia-mingLee Hsin-hungLin 208–222 2020.rocling-1.21 @@ -211,7 +211,7 @@ An Adaptive Method for Building a <fixed-case>C</fixed-case>hinese Dimensional Sentiment Lexicon Ying-LungLin - Liang-ChihYu + Liang-ChihYu 223–237 2020.rocling-1.22 lin-yu-2020-adaptive @@ -244,7 +244,7 @@ Sentiment Analysis for Investment Atmosphere Scoring Chih-HsiangPeng - Liang-ChihYu + Liang-ChihYu 275–289 2020.rocling-1.26 peng-yu-2020-sentiment @@ -278,7 +278,7 @@ The Analysis and Annotation of Propaganda Techniques in <fixed-case>C</fixed-case>hinese News Texts Meng-HsienShih Ren-fengDuann - Siaw-FongChung + Siaw-FongChung 331–345 2020.rocling-1.30 shih-etal-2020-analysis @@ -296,7 +296,7 @@ Yuh-ShyangWang Lung-HaoLee Bo-LinLin - Liang-ChihYu + Liang-ChihYu 359–371 2020.rocling-1.32 wang-etal-2020-scientific @@ -311,7 +311,7 @@ Email Writing Assistant System - Jason S.Chang + Jason S.Chang Ching-YuYang Guan-FuPeng 387–397 diff --git a/data/xml/2020.scil.xml b/data/xml/2020.scil.xml index 51ad1ee71d..28d867adf4 100644 --- a/data/xml/2020.scil.xml +++ b/data/xml/2020.scil.xml @@ -4,7 +4,7 @@ Proceedings of the Society for Computation in Linguistics 2020 AllysonEttinger - GajaJarosz + GajaJarosz JoePater Association for Computational Linguistics
New York, New York
@@ -116,7 +116,7 @@
Evolving constraints and rules in Harmonic Grammar - ElliottMoreton + ElliottMoreton 100–111 2020.scil-1.13 moreton-2020-evolving @@ -166,7 +166,7 @@ Acquisition of Inflectional Morphology in Artificial Neural Networks With Prior Knowledge - KatharinaKann + KatharinaKann 144–154 2020.scil-1.19 kann-2020-acquisition @@ -258,7 +258,7 @@ Frequency-(in)dependent regularization in language production and cultural transmission EmilyMorgan - RogerLevy + RogerLevy 248–249 2020.scil-1.30 morgan-levy-2020-frequency @@ -266,7 +266,7 @@ Graph-to-Graph Meaning Representation Transformations for Human-Robot Dialogue MitchellAbrams - ClaireBonial + ClaireBonial LuciaDonatelli 250–253 2020.scil-1.31 @@ -277,7 +277,7 @@ EmilyAhn CeciliaJimenez YuliaTsvetkov - Alan WBlack + Alan WBlack 254–264 2020.scil-1.32 ahn-etal-2020-code @@ -294,7 +294,7 @@ Tensor Product Decomposition Networks: Uncovering Representations of Structure Learned by Neural Networks - R. ThomasMcCoy + R. ThomasMcCoy TalLinzen EwanDunbar PaulSmolensky @@ -306,8 +306,8 @@ What do you mean, <fixed-case>BERT</fixed-case>? TimotheeMickus DenisPaperno - MathieuConstant - Keesvan Deemter + MathieuConstant + Keesvan Deemter 279–290 2020.scil-1.35 mickus-etal-2020-mean @@ -332,7 +332,7 @@ ShohiniBhattasali MurielleFabre ChristophePallier - JohnHale + JohnHale 313–322 2020.scil-1.38 bhattasali-etal-2020-modeling @@ -341,7 +341,7 @@ A closer look at the performance of neural language models on reflexive anaphor licensing JenniferHu Sherry YongChen - RogerLevy + RogerLevy 323–333 2020.scil-1.39 hu-etal-2020-closer @@ -352,8 +352,8 @@ QiChen KyleRichardson AtreyeeMukherjee - Lawrence S.Moss - SandraKuebler + Lawrence S.Moss + SandraKuebler 334–344 2020.scil-1.40 hu-etal-2020-monalog @@ -361,7 +361,7 @@ Modeling the learning of the Person Case Constraint AdamLiter - NaomiFeldman + NaomiFeldman 345–354 2020.scil-1.41 liter-feldman-2020-modeling @@ -378,7 +378,7 @@ MariaRyskina EllaRabinovich TaylorBerg-Kirkpatrick - DavidMortensen + DavidMortensen YuliaTsvetkov 367–376 2020.scil-1.43 @@ -416,7 +416,7 @@ AnhadMohananey WeiPeng Sheng-FuWang - Samuel R.Bowman + Samuel R.Bowman 409–410 2020.scil-1.47 warstadt-etal-2020-blimp @@ -453,7 +453,7 @@ Lexical databases for computational analyses: A linguistic perspective - RobertMalouf + RobertMalouf FarrellAckerman ArtursSemenuks 446–456 @@ -498,7 +498,7 @@ Interpreting Sequence-to-Sequence Models for <fixed-case>R</fixed-case>ussian Inflectional Morphology - DavidKing + DavidKing AndreaSims MichaElsner 481–490 diff --git a/data/xml/2020.sdp.xml b/data/xml/2020.sdp.xml index db6adb6692..b20e988e28 100644 --- a/data/xml/2020.sdp.xml +++ b/data/xml/2020.sdp.xml @@ -3,12 +3,12 @@ Proceedings of the First Workshop on Scholarly Document Processing - Muthu KumarChandrasekaran + Muthu KumarChandrasekaran Anitade Waard GuyFeigenblat - DayneFreitag + DayneFreitag TirthankarGhosal - EduardHovy + EduardHovy PetrKnoth DavidKonopnicki PhilippMayr @@ -139,7 +139,7 @@ Reconstructing Manual Information Extraction with <fixed-case>DB</fixed-case>-to-Document Backprojection: Experiments in the Life Science Domain - Mark-ChristophMüller + Mark-ChristophMüller SuchetaGhosh MajaRey UlrikeWittig @@ -155,7 +155,7 @@ <fixed-case>D</fixed-case>eep<fixed-case>P</fixed-case>aper<fixed-case>C</fixed-case>omposer: A Simple Solution for Training Data Preparation for Parsing Research Papers MengLing - JianChen + JianChen 91–96 We present DeepPaperComposer, a simple solution for preparing highly accurate (100%) training data without manual labeling to extract content from scholarly articles using convolutional neural networks (CNNs). We used our approach to generate data and trained CNNs to extract eight categories of both textual (titles, abstracts, headers, figure and table captions, and other texts) and non-textural content (figures and tables) from 30 years of IEEE VIS conference papers, of which a third were scanned bitmap PDFs. We curated this dataset and named it VISpaper-3K. We then showed our initial benchmark performance using VISpaper-3K over itself and CS-150 using YOLOv3 and Faster-RCNN. We open-source DeepPaperComposer of our training data generation and released the resulting annotation data VISpaper-3K to promote re-producible research. 2020.sdp-1.10 @@ -226,7 +226,7 @@ Towards Grounding of Formulae TakutoAsakura AndréGreiner-Petter - AkikoAizawa + AkikoAizawa YusukeMiyao 138–147 A large amount of scientific knowledge is represented within mixed forms of natural language texts and mathematical formulae. Therefore, a collaboration of natural language processing and formula analyses, so-called mathematical language processing, is necessary to enable computers to understand and retrieve information from the documents. However, as we will show in this project, a mathematical notation can change its meaning even within the scope of a single paragraph. This flexibility makes it difficult to extract the exact meaning of a mathematical formula. In this project, we will propose a new task direction for grounding mathematical formulae. Particularly, we are addressing the widespread misconception of various research projects in mathematical information retrieval, which presume that mathematical notations have a fixed meaning within a single document. We manually annotated a long scientific paper to illustrate the task concept. Our high inter-annotator agreement shows that the task is well understood for humans. Our results indicate that it is worthwhile to grow the techniques for the proposed task to contribute to the further progress of mathematical language processing. @@ -279,7 +279,7 @@ On the Use of Web Search to Improve Scientific Collections KrutarthPatel CorneliaCaragea - Sujatha DasGollapalli + Sujatha DasGollapalli 174–183 Despite the advancements in search engine features, ranking methods, technologies, and the availability of programmable APIs, current-day open-access digital libraries still rely on crawl-based approaches for acquiring their underlying document collections. In this paper, we propose a novel search-driven framework for acquiring documents for such scientific portals. Within our framework, publicly-available research paper titles and author names are used as queries to a Web search engine. We were able to obtain ~267,000 unique research papers through our fully-automated framework using ~76,000 queries, resulting in almost 200,000 more papers than the number of queries. Moreover, through a combination of title and author name search, we were able to recover 78% of the original searched titles. 2020.sdp-1.20 @@ -307,8 +307,8 @@ AndrewHead RishamSidhu KyleLo - DanielWeld - Marti A.Hearst + DanielWeld + Marti A.Hearst 196–206 The task of definition detection is important for scholarly papers, because papers often make use of technical terminology that may be unfamiliar to readers. Despite prior work on definition detection, current approaches are far from being accurate enough to use in realworld applications. In this paper, we first perform in-depth error analysis of the current best performing definition detection system and discover major causes of errors. Based on this analysis, we develop a new definition detection system, HEDDEx, that utilizes syntactic features, transformer encoders, and heuristic filters, and evaluate it on a standard sentence-level benchmark. Because current benchmarks evaluate randomly sampled sentences, we propose an alternative evaluation that assesses every sentence within a document. This allows for evaluating recall in addition to precision. HEDDEx outperforms the leading system on both the sentence-level and the document-level tasks, by 12.7 F1 points and 14.4 F1 points, respectively. We note that performance on the high-recall document-level task is much lower than in the standard evaluation approach, due to the necessity of incorporation of document structure as features. We discuss remaining challenges in document-level definition detection, ideas for improvements, and potential issues for the development of reading aid applications. 2020.sdp-1.22 @@ -380,7 +380,7 @@ SaichethanReddy NaveenSaini SriparnaSaha - PushpakBhattacharyya + PushpakBhattacharyya 242–250 In this paper, we present the IIIT Bhagalpur and IIT Patna team’s effort to solve the three shared tasks namely, CL-SciSumm 2020, CL-LaySumm 2020, LongSumm 2020 at SDP 2020. The theme of these tasks is to generate medium-scale, lay and long summaries, respectively, for scientific articles. For the first two tasks, unsupervised systems are developed, while for the third one, we develop a supervised system. The performances of all the systems were evaluated on the associated datasets with the shared tasks in term of well-known ROUGE metric. 2020.sdp-1.27 @@ -418,7 +418,7 @@ HarshavardhanKundarapu NaveenSaini SriparnaSaha - PushpakBhattacharyya + PushpakBhattacharyya 270–276 The publication rate of scientific literature increases rapidly, which poses a challenge for researchers to keep themselves updated with new state-of-the-art. Scientific document summarization solves this problem by summarizing the essential fact and findings of the document. In the current paper, we present the participation of IITP-AI-NLP-ML team in three shared tasks, namely, CL-SciSumm 2020, LaySumm 2020, LongSumm 2020, which aims to generate medium, lay, and long summaries of the scientific articles, respectively. To solve CL-SciSumm 2020 and LongSumm 2020 tasks, three well-known clustering techniques are used, and then various sentence scoring functions, including textual entailment, are used to extract the sentences from each cluster for a summary generation. For LaySumm 2020, an encoder-decoder based deep learning model has been utilized. Performances of our developed systems are evaluated in terms of ROUGE measures on the associated datasets with the shared task. 2020.sdp-1.30 diff --git a/data/xml/2020.semeval.xml b/data/xml/2020.semeval.xml index 209b5dd0e9..04c87e72ea 100644 --- a/data/xml/2020.semeval.xml +++ b/data/xml/2020.semeval.xml @@ -3,7 +3,7 @@ Proceedings of the Fourteenth Workshop on Semantic Evaluation - AurelieHerbelot + AurelieHerbelot XiaodanZhu AlexisPalmer NathanSchneider @@ -37,7 +37,7 @@ GoranGlavaš IvanVulić AnnaKorhonen - Simone PaoloPonzetto + Simone PaoloPonzetto 24–35 Lexical entailment (LE) is a fundamental asymmetric lexico-semantic relation, supporting the hierarchies in lexical resources (e.g., WordNet, ConceptNet) and applications like natural language inference and taxonomy induction. Multilingual and cross-lingual NLP applications warrant models for LE detection that go beyond language boundaries. As part of SemEval 2020, we carried out a shared task (Task 2) on multilingual and cross-lingual LE. The shared task spans three dimensions: (1) monolingual vs. cross-lingual LE, (2) binary vs. graded LE, and (3) a set of 6 diverse languages (and 15 corresponding language pairs). We offered two different evaluation tracks: (a) Dist: for unsupervised, fully distributional models that capture LE solely on the basis of unannotated corpora, and (b) Any: for externally informed models, allowed to leverage any resources, including lexico-semantic networks (e.g., WordNet or BabelNet). In the Any track, we recieved runs that push state-of-the-art across all languages and language pairs, for both binary LE detection and graded LE prediction. 2020.semeval-1.2 @@ -109,7 +109,7 @@ JensKaiser DominikSchlechtweg SeanPapay - SabineSchulte im Walde + SabineSchulte im Walde 81–89 We present the results of our system for SemEval-2020 Task 1 that exploits a commonly used lexical semantic change detection model based on Skip-Gram with Negative Sampling. Our system focuses on Vector Initialization (VI) alignment, compares VI to the currently top-ranking models for Subtask 2 and demonstrates that these can be outperformed if we optimize VI dimensionality. We demonstrate that differences in performance can largely be attributed to model-specific sources of noise, and we reveal a strong relationship between dimensionality and frequency-induced noise in VI alignment. Our results suggest that lexical semantic change models integrating vector space alignment should pay more attention to the role of the dimensionality parameter. 2020.semeval-1.8 @@ -204,7 +204,7 @@ <fixed-case>M</fixed-case>ineria<fixed-case>UNAM</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2020 Task 3: Predicting Contextual <fixed-case>W</fixed-case>ord<fixed-case>S</fixed-case>imilarity Using a Centroid Based Approach and Word Embeddings HelenaGomez-Adorno - GemmaBel-Enguix + GemmaBel-Enguix JorgeReyes-Magaña BenjamínMoreno RamónCasillas @@ -217,7 +217,7 @@ <fixed-case>MULTISEM</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2020 Task 3: Fine-tuning <fixed-case>BERT</fixed-case> for Lexical Meaning - AinaGarí Soler + AinaGarí Soler MariannaApidianaki 158–165 We present the MULTISEM systems submitted to SemEval 2020 Task 3: Graded Word Similarity in Context (GWSC). We experiment with injecting semantic knowledge into pre-trained BERT models through fine-tuning on lexical semantic tasks related to GWSC. We use existing semantically annotated datasets, and propose to approximate similarity through automatically generated lexical substitutes in context. We participate in both GWSC subtasks and address two languages, English and Finnish. Our best English models occupy the third and fourth positions in the ranking for the two subtasks. Performance is lower for the Finnish models which are mid-ranked in the respective subtasks, highlighting the important role of data availability for fine-tuning. @@ -236,7 +236,7 @@ <fixed-case>BOS</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2020 Task 1: Word Sense Induction via Lexical Substitution for Lexical Semantic Change Detection - NikolayArefyev + NikolayArefyev VasilyZhikov 171–179 SemEval-2020 Task 1 is devoted to detection of changes in word meaning over time. The first subtask raises a question if a particular word has acquired or lost any of its senses during the given time period. The second subtask requires estimating the change in frequencies of the word senses. We have submitted two solutions for both subtasks. The first solution performs word sense induction (WSI) first, then makes the decision based on the induced word senses. We extend the existing WSI method based on clustering of lexical substitutes generated with neural language models and adapt it to the task. The second solution exploits a well-known approach to semantic change detection, that includes building word2vec SGNS vectors, aligning them with Orthogonal Procrustes and calculating cosine distance between resulting vectors. While WSI-based solution performs better in Subtask 1, which requires binary decisions, the second solution outperforms it in Subtask 2 and obtains the 3rd best result in this subtask. @@ -279,7 +279,7 @@ <fixed-case>E</fixed-case>mb<fixed-case>L</fixed-case>ex<fixed-case>C</fixed-case>hange at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2020 Task 1: Unsupervised Embedding-based Detection of Lexical Semantic Changes EhsaneddinAsgari ChristophRinglstetter - HinrichSchütze + HinrichSchütze 201–207 This paper describes EmbLexChange, a system introduced by the “Life-Language” team for SemEval-2020 Task 1, on unsupervised detection of lexical-semantic changes. EmbLexChange is defined as the divergence between the embedding based profiles of word w (calculated with respect to a set of reference words) in the source and the target domains (source and target domains can be simply two time frames t_1 and t_2). The underlying assumption is that the lexical-semantic change of word w would affect its co-occurring words and subsequently alters the neighborhoods in the embedding spaces. We show that using a resampling framework for the selection of reference words (with conserved senses), we can more reliably detect lexical-semantic changes in English, German, Swedish, and Latin. EmbLexChange achieved second place in the binary detection of semantic changes in the SemEval-2020. 2020.semeval-1.24 @@ -513,7 +513,7 @@ <fixed-case>C</fixed-case>ardiff <fixed-case>U</fixed-case>niversity at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2020 Task 6: Fine-tuning <fixed-case>BERT</fixed-case> for Domain-Specific Definition Classification ShelanJeawak - LuisEspinosa-Anke + LuisEspinosa-Anke StevenSchockaert 361–366 We describe the system submitted to SemEval-2020 Task 6, Subtask 1. The aim of this subtask is to predict whether a given sentence contains a definition or not. Unsurprisingly, we found that strong results can be achieved by fine-tuning a pre-trained BERT language model. In this paper, we analyze the performance of this strategy. Among others, we show that results can be improved by using a two-step fine-tuning process, in which the BERT model is first fine-tuned on the full training set, and then further specialized towards a target domain. @@ -526,7 +526,7 @@ AnandhKonar ChenyangHuang AmineTrabelsi - OsmarZaiane + OsmarZaiane 367–373 In this paper, we describe our mUlti-task learNIng for cOmmonsense reasoNing (UNION) system submitted for Task C of the SemEval2020 Task 4, which is to generate a reason explaining why a given false statement is non-sensical. However, we found in the early experiments that simple adaptations such as fine-tuning GPT2 often yield dull and non-informative generations (e.g. simple negations). In order to generate more meaningful explanations, we propose UNION, a unified end-to-end framework, to utilize several existing commonsense datasets so that it allows a model to learn more dynamics under the scope of commonsense reasoning. In order to perform model selection efficiently, accurately, and promptly, we also propose a couple of auxiliary automatic evaluation metrics so that we can extensively compare the models from different perspectives. Our submitted system not only results in a good performance in the proposed metrics but also outperforms its competitors with the highest achieved score of 2.10 for human evaluation while remaining a BLEU score of 15.7. Our code is made publicly available. 2020.semeval-1.45 @@ -538,7 +538,7 @@ JosefJon MartinFajcik MartinDocekal - PavelSmrz + PavelSmrz 374–390 We participated in all three subtasks. In subtasks A and B, our submissions are based on pretrained language representation models (namely ALBERT) and data augmentation. We experimented with solving the task for another language, Czech, by means of multilingual models and machine translated dataset, or translated model inputs. We show that with a strong machine translation system, our system can be used in another language with a small accuracy loss. In subtask C, our submission, which is based on pretrained sequence-to-sequence model (BART), ranked 1st in BLEU score ranking, however, we show that the correlation between BLEU and human evaluation, in which our submission ended up 4th, is low. We analyse the metrics used in the evaluation and we propose an additional score based on model from subtask B, which correlates well with our manual ranking, as well as reranking method based on the same principle. We performed an error and dataset analysis for all subtasks and we present our findings. 2020.semeval-1.46 @@ -553,7 +553,7 @@ Kwong SakLeung JiaZhu Gabriel Pui CheongFung - Kam-FaiWong + Kam-FaiWong 391–400 This paper describes our system submitted to task 4 of SemEval 2020: Commonsense Validation and Explanation (ComVE) which consists of three sub-tasks. The task is to directly validate the given sentence whether or not to make sense and require the model to explain it. Based on BERT architecture with the multi-task setting, we propose an effective and interpretable “Explain, Reason and Predict” (ERP) system to solve the three sub-tasks about commonsense: (a) Validation, (b) Reasoning, and (c) Explanation. Inspired by cognitive studies of common sense, our system first generates a reason or understanding of the sentences and then choose which one statement makes sense, which is achieved by multi-task learning. During the post-evaluation, our system has reached 92.9% accuracy in subtask A (rank 11), 89.7% accuracy in subtask B (rank 9), and BLEU score of 12.9 in subtask C (rank 8). 2020.semeval-1.47 @@ -595,7 +595,7 @@ <fixed-case>SWAG</fixed-case>ex at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2020 Task 4: Commonsense Explanation as Next Event Prediction WiemBen Rim - NaoakiOkazaki + NaoakiOkazaki 422–429 We describe the system submitted by the SWAGex team to the SemEval-2020 Commonsense Validation and Explanation Task. We use multiple methods on the pre-trained language model BERT (Devlin et al., 2018) for tasks that require the system to recognize sentences against commonsense and justify the reasoning behind this decision. Our best performing model is BERT trained on SWAG and fine-tuned for the task. We investigate the ability to transfer commonsense knowledge from SWAG to SemEval-2020 by training a model for the Explanation task with Next Event Prediction data 2020.semeval-1.51 @@ -607,7 +607,7 @@ ThanetMarkchom BhuvanaDhruva ChandreshPravin - HuizhiLiang + HuizhiLiang 430–436 SemEval Task 4 Commonsense Validation and Explanation Challenge is to validate whether a system can differentiate natural language statements that make sense from those that do not make sense. Two subtasks, A and B, are focused in this work, i.e., detecting against-common-sense statements and selecting explanations of why they are false from the given options. Intuitively, commonsense validation requires additional knowledge beyond the given statements. Therefore, we propose a system utilising pre-trained sentence transformer models based on BERT, RoBERTa and DistillBERT architectures to embed the statements before classification. According to the results, these embeddings can improve the performance of the typical MLP and LSTM classifiers as downstream models of both subtasks compared to regular tokenised statements. These embedded statements are shown to comprise additional information from external resources which help validate common sense in natural language. 2020.semeval-1.52 @@ -619,7 +619,7 @@ MartinFajcik JosefJon MartinDocekal - PavelSmrz + PavelSmrz 437–444 This paper describes BUT-FIT’s submission at SemEval-2020 Task 5: Modelling Causal Reasoning in Language: Detecting Counterfactuals. The challenge focused on detecting whether a given statement contains a counterfactual (Subtask 1) and extracting both antecedent and consequent parts of the counterfactual from the text (Subtask 2). We experimented with various state-of-the-art language representation models (LRMs). We found RoBERTa LRM to perform the best in both subtasks. We achieved the first place in both exact match and F1 for Subtask 2 and ranked second for Subtask 1. 2020.semeval-1.53 @@ -651,7 +651,7 @@ <fixed-case>IITK</fixed-case>-<fixed-case>RSA</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2020 Task 5: Detecting Counterfactuals Anirudh AnilOjha RohinGarg - ShashankGupta + ShashankGupta AshutoshModi 458–467 This paper describes our efforts in tackling Task 5 of SemEval-2020. The task involved detecting a class of textual expressions known as counterfactuals and separating them into their constituent elements. Our final submitted approaches were an ensemble of various fine-tuned transformer-based and CNN-based models for the first subtask and a transformer model with dependency tree information for the second subtask. We ranked 4-th and 9-th in the overall leaderboard. We also explored various other approaches that involved classical methods, other neural architectures and incorporation of different linguistic features. @@ -685,7 +685,7 @@ Gorynych Transformer at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2020 Task 6: Multi-task Learning for Definition Extraction AdisDavletov - NikolayArefyev + NikolayArefyev AlexanderShatilov DenisGordeev AlexeyRey @@ -702,7 +702,7 @@ YangFan PengJin YuanchaoLiu - BingquanLiu + BingquanLiu 494–500 This paper describes our system that participated in the SemEval-2020 task 4: Commonsense Validation and Explanation. For this task, it is obvious that external knowledge, such as Knowledge graph, can help the model understand commonsense in natural language statements. But how to select the right triples for statements remains unsolved, so how to reduce the interference of irrelevant triples on model performance is a research focus. This paper adopt a modified K-BERT as the language encoder, to enhance language representation through triples from knowledge graphs. Experiments show that our method is better than models without external knowledge, and is slightly better than the original K-BERT. We got an accuracy score of 0.97 in subtaskA, ranking 1/45, and got an accuracy score of 0.948, ranking 2/35. 2020.semeval-1.60 @@ -1075,8 +1075,8 @@ <fixed-case>RGCL</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2020 Task 6: Neural Approaches to <fixed-case>D</fixed-case>efinition<fixed-case>E</fixed-case>xtraction TharinduRanasinghe AlistairPlum - ConstantinOrasan - RuslanMitkov + ConstantinOrasan + RuslanMitkov 717–723 This paper presents the RGCL team submission to SemEval 2020 Task 6: DeftEval, subtasks 1 and 2. The system classifies definitions at the sentence and token levels. It utilises state-of-the-art neural network architectures, which have some task-specific adaptations, including an automatically extended training set. Overall, the approach achieves acceptable evaluation scores, while maintaining flexibility in architecture selection. 2020.semeval-1.94 @@ -1140,7 +1140,7 @@ AmitavaDas TanmoyChakraborty ViswanathPulabaigari - BjörnGambäck + BjörnGambäck 759–773 Information on social media comprises of various modalities such as textual, visual and audio. NLP and Computer Vision communities often leverage only one prominent modality in isolation to study social media. However, computational processing of Internet memes needs a hybrid approach. The growing ubiquity of Internet memes on social media platforms such as Facebook, Instagram, and Twitter further suggests that we can not ignore such multimodal content anymore. To the best of our knowledge, there is not much attention towards meme emotion analysis. The objective of this proposal is to bring the attention of the research community towards the automatic processing of Internet memes. The task Memotion analysis released approx 10K annotated memes- with human annotated labels namely sentiment(positive, negative, neutral), type of emotion(sarcastic,funny,offensive, motivation) and their corresponding intensity. The challenge consisted of three subtasks: sentiment (positive, negative, and neutral) analysis of memes,overall emotion (humor, sarcasm, offensive, and motivational) classification of memes, and classifying intensity of meme emotion. The best performances achieved were F1 (macro average) scores of 0.35, 0.51 and 0.32, respectively for each of the three subtasks. 2020.semeval-1.99 @@ -1154,7 +1154,7 @@ SudiptaKar SurajPandey SrinivasPYKL - BjörnGambäck + BjörnGambäck TanmoyChakraborty ThamarSolorio AmitavaDas @@ -1209,7 +1209,7 @@ Nicolaj FilrupRasmussen ThaiWang MarcoPlacenti - BarbaraPlank + BarbaraPlank 824–832 This paper describes a system that aims at assessing humour intensity in edited news headlines as part of the 7th task of SemEval-2020 on “Humor, Emphasis and Sentiment”. Various factors need to be accounted for in order to assess the funniness of an edited headline. We propose an architecture that uses hand-crafted features, knowledge bases and a language model to understand humour, and combines them in a regression model. Our system outperforms two baselines. In general, automatic humour assessment remains a difficult task. 2020.semeval-1.104 @@ -1232,7 +1232,7 @@ MartinDocekal MartinFajcik JosefJon - PavelSmrz + PavelSmrz 843–851 This paper describes our system that was designed for Humor evaluation within the SemEval-2020 Task 7. The system is based on convolutional neural network architecture. We investigate the system on the official dataset, and we provide more insight to model itself to see how the learned inner features look. 2020.semeval-1.106 @@ -1362,7 +1362,7 @@ <fixed-case>FII</fixed-case>-<fixed-case>UAIC</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2020 Task 9: Sentiment Analysis for Code-Mixed Social Media Text Using <fixed-case>CNN</fixed-case> LaviniaAparaschivei AndreiPalihovici - DanielaGîfu + DanielaGîfu 928–933 The “Sentiment Analysis for Code-Mixed Social Media Text” task at the SemEval 2020 competition focuses on sentiment analysis in code-mixed social media text , specifically, on the combination of English with Spanish (Spanglish) and Hindi (Hinglish). In this paper, we present a system able to classify tweets, from Spanish and English languages, into positive, negative and neutral. Firstly, we built a classifier able to provide corresponding sentiment labels. Besides the sentiment labels, we provide the language labels at the word level. Secondly, we generate a word-level representation, using Convolutional Neural Network (CNN) architecture. Our solution indicates promising results for the Sentimix Spanglish-English task (0.744), the team, Lavinia_Ap, occupied the 9th place. However, for the Sentimix Hindi-English task (0.324) the results have to be improved. 2020.semeval-1.118 @@ -1416,7 +1416,7 @@ JasonAngel Segun TaofeekAroyehun AntonioTamayo - AlexanderGelbukh + AlexanderGelbukh 957–962 Code-switching is a phenomenon in which two or more languages are used in the same message. Nowadays, it is quite common to find messages with languages mixed in social media. This phenomenon presents a challenge for sentiment analysis. In this paper, we use a standard convolutional neural network model to predict the sentiment of tweets in a blend of Spanish and English languages. Our simple approach achieved a F1-score of 0:71 on test set on the competition. We analyze our best model capabilities and perform error analysis to expose important difficulties for classifying sentiment in a code-switching setting. 2020.semeval-1.123 @@ -1439,7 +1439,7 @@ PriyaRani Bharathi RajaChakravarthi TheodorusFransen - John P.McCrae + John P.McCrae 968–974 Code mixing is a common phenomena in multilingual societies where people switch from one language to another for various reasons. Recent advances in public communication over different social media sites have led to an increase in the frequency of code-mixed usage in written language. In this paper, we present the Generative Morphemes with Attention (GenMA) Model sentiment analysis system contributed to SemEval 2020 Task 9 SentiMix. The system aims to predict the sentiments of the given English-Hindi code-mixed tweets without using word-level language tags instead inferring this automatically using a morphological model. The system is based on a novel deep neural network (DNN) architecture, which has outperformed the baseline F1-score on the test data-set as well as the validation data-set. Our results can be found under the user name “koustava” on the “Sentimix Hindi English” page. 2020.semeval-1.125 @@ -1484,7 +1484,7 @@ <fixed-case>ECNU</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2020 Task 7: Assessing Humor in Edited News Headlines Using <fixed-case>B</fixed-case>i<fixed-case>LSTM</fixed-case> with Attention TiantianZhang ZhixuanChen - ManLan + ManLan 995–1000 In this paper we describe our system submitted to SemEval 2020 Task 7: “Assessing Humor in Edited News Headlines”. We participated in all subtasks, in which the main goal is to predict the mean funniness of the edited headline given the original and the edited headline. Our system involves two similar sub-networks, which generate vector representations for the original and edited headlines respectively. And then we do a subtract operation of the outputs from two sub-networks to predict the funniness of the edited headline. 2020.semeval-1.129 @@ -1551,7 +1551,7 @@ SofieLabat OlhaKaminska ElsLefever - VeroniqueHoste + VeroniqueHoste 1033–1040 This paper presents two different systems for the SemEval shared task 7 on Assessing Humor in Edited News Headlines, sub-task 1, where the aim was to estimate the intensity of humor generated in edited headlines. Our first system is a feature-based machine learning system that combines different types of information (e.g. word embeddings, string similarity, part-of-speech tags, perplexity scores, named entity recognition) in a Nu Support Vector Regressor (NuSVR). The second system is a deep learning-based approach that uses the pre-trained language model RoBERTa to learn latent features in the news headlines that are useful to predict the funniness of each headline. The latter system was also our final submission to the competition and is ranked seventh among the 49 participating teams, with a root-mean-square error (RMSE) of 0.5253. 2020.semeval-1.135 @@ -1601,7 +1601,7 @@ <fixed-case>UTFPR</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2020 Task 7: Using Co-occurrence Frequencies to Capture Unexpectedness - Gustavo HenriquePaetzold + Gustavo HenriquePaetzold 1066–1070 We describe the UTFPR system for SemEval-2020’s Task 7: Assessing Humor in Edited News Headlines. Ours is a minimalist unsupervised system that uses word co-occurrence frequencies from large corpora to capture unexpectedness as a mean to capture funniness. Our system placed 22nd on the shared task’s Task 2. We found that our approach requires more text than we used to perform reliably, and that unexpectedness alone is not sufficient to gauge funniness for humorous content that targets a diverse target audience. 2020.semeval-1.140 @@ -1660,7 +1660,7 @@ ZhenLi YaojieZhang BingXu - TiejunZhao + TiejunZhao 1100–1105 Internet memes emotion recognition is focused by many researchers. In this paper, we adopt BERT and ResNet for evaluation of detecting the emotions of Internet memes. We focus on solving the problem of data imbalance and data contains noise. We use RandAugment to enhance the data of the picture, and use Training Signal Annealing (TSA) to solve the impact of the imbalance of the label. At the same time, a new loss function is designed to ensure that the model is not affected by input noise which will improve the robustness of the model. We participated in sub-task a and our model based on BERT obtains 34.58% macro F1 score, ranking 10/32. 2020.semeval-1.145 @@ -1728,7 +1728,7 @@ Infotec + <fixed-case>C</fixed-case>entro<fixed-case>GEO</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2020 Task 8: Deep Learning and Text Categorization approach for Memes classification GuillermoRuiz - Eric S.Tellez + Eric S.Tellez DanielaMoctezuma SabinoMiranda-Jiménez TaniaRamírez-delReal @@ -1798,7 +1798,7 @@ <fixed-case>SIS</fixed-case>@<fixed-case>IIITH</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2020 Task 8: An Overview of Simple Text Classification Methods for Meme Analysis SravaniBoinepelli - ManishShrivastava + ManishShrivastava VasudevaVarma 1190–1194 Memes are steadily taking over the feeds of the public on social media. There is always the threat of malicious users on the internet posting offensive content, even through memes. Hence, the automatic detection of offensive images/memes is imperative along with detection of offensive text. However, this is a much more complex task as it involves both visual cues as well as language understanding and cultural/context knowledge. This paper describes our approach to the task of SemEval-2020 Task 8: Memotion Analysis. We chose to participate only in Task A which dealt with Sentiment Classification, which we formulated as a text classification problem. Through our experiments, we explored multiple training models to evaluate the performance of simple text classification algorithms on the raw text obtained after running OCR on meme images. Our submitted model achieved an accuracy of 72.69% and exceeded the existing baseline’s Macro F1 score by 8% on the official test dataset. Apart from describing our official submission, we shall elucidate how different classification models respond to this task. @@ -1821,7 +1821,7 @@ ZehaoLiu EmmanuelOsei-Brefo SiyuanChen - HuizhiLiang + HuizhiLiang 1201–1207 Memes are widely used on social media. They usually contain multi-modal information such as images and texts, serving as valuable data sources to analyse opinions and sentiment orientations of online communities. The provided memes data often face an imbalanced data problem, that is, some classes or labelled sentiment categories significantly outnumber other classes. This often results in difficulty in applying machine learning techniques where balanced labelled input data are required. In this paper, a Gaussian Mixture Model sampling method is proposed to tackle the problem of class imbalance for the memes sentiment classification task. To utilise both text and image data, a multi-modal CNN-LSTM model is proposed to jointly learn latent features for positive, negative and neutral category predictions. The experiments show that the re-sampling model can slightly improve the accuracy on the trial data of sub-task A of Task 8. The multi-modal CNN-LSTM model can achieve macro F1 score 0.329 on the test set. 2020.semeval-1.159 @@ -1853,7 +1853,7 @@ <fixed-case>BAKSA</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2020 Task 9: Bolstering <fixed-case>CNN</fixed-case> with Self-Attention for Sentiment Analysis of Code Mixed Text - AyushKumar + AyushKumar HarshAgarwal KeshavBansal AshutoshModi @@ -1922,7 +1922,7 @@ <fixed-case>IIT</fixed-case> <fixed-case>G</fixed-case>andhinagar at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2020 Task 9: Code-Mixed Sentiment Classification Using Candidate Sentence Generation and Selection VivekSrivastava - MayankSingh + MayankSingh 1259–1264 Code-mixing is the phenomenon of using multiple languages in the same utterance. It is a frequently used pattern of communication on social media sites such as Facebook, Twitter, etc. Sentiment analysis of the monolingual text is a well-studied task. Code-mixing adds to the challenge of analyzing the sentiment of the text on various platforms such as social media, online gaming, forums, product reviews, etc. We present a candidate sentence generation and selection based approach on top of the Bi-LSTM based neural classifier to classify the Hinglish code-mixed text into one of the three sentiment classes positive, negative, or neutral. The proposed candidate sentence generation and selection based approach show an improvement in the system performance as compared to the Bi-LSTM based neural classifier. We can extend the proposed method to solve other problems with code-mixing in the textual data, such as humor-detection, intent classification, etc. 2020.semeval-1.168 @@ -1944,7 +1944,7 @@ <fixed-case>IUST</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2020 Task 9: Sentiment Analysis for Code-Mixed Social Media Text Using Deep Neural Networks and Linear Baselines SoroushJavdan TahaShangipour ataei - BehrouzMinaei-Bidgoli + BehrouzMinaei-Bidgoli 1270–1275 Sentiment Analysis is a well-studied field of Natural Language Processing. However, the rapid growth of social media and noisy content within them poses significant challenges in addressing this problem with well-established methods and tools. One of these challenges is code-mixing, which means using different languages to convey thoughts in social media texts. Our group, with the name of IUST(username: TAHA), participated at the SemEval-2020 shared task 9 on Sentiment Analysis for Code-Mixed Social Media Text, and we have attempted to develop a system to predict the sentiment of a given code-mixed tweet. We used different preprocessing techniques and proposed to use different methods that vary from NBSVM to more complicated deep neural network models. Our best performing method obtains an F1 score of 0.751 for the Spanish-English sub-task and 0.706 over the Hindi-English sub-task. 2020.semeval-1.170 @@ -1954,7 +1954,7 @@ <fixed-case>JUNLP</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2020 Task 9: Sentiment Analysis of <fixed-case>H</fixed-case>indi-<fixed-case>E</fixed-case>nglish Code Mixed Data Using Grid Search Cross Validation AvishekGarain - SainikMahata + SainikMahata DipankarDas 1276–1280 Code-mixing is a phenomenon which arises mainly in multilingual societies. Multilingual people, who are well versed in their native languages and also English speakers, tend to code-mix using English-based phonetic typing and the insertion of anglicisms in their main language. This linguistic phenomenon poses a great challenge to conventional NLP domains such as Sentiment Analysis, Machine Translation, and Text Summarization, to name a few. In this work, we focus on working out a plausible solution to the domain of Code-Mixed Sentiment Analysis. This work was done as participation in the SemEval-2020 Sentimix Task, where we focused on the sentiment analysis of English-Hindi code-mixed sentences. our username for the submission was “sainik.mahata” and team name was “JUNLP”. We used feature extraction algorithms in conjunction with traditional machine learning algorithms such as SVR and Grid Search in an attempt to solve the task. Our approach garnered an f1-score of 66.2% when tested using metrics prepared by the organizers of the task. @@ -1966,7 +1966,7 @@ <fixed-case>LIMSI</fixed-case>_<fixed-case>UPV</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2020 Task 9: Recurrent Convolutional Neural Network for Code-mixed Sentiment Analysis SomnathBanerjee SaharGhannay - SophieRosset + SophieRosset AnneVilnat PaoloRosso 1281–1287 @@ -2001,7 +2001,7 @@ Subhra JyotiBaroi NiveditaSingh RingkiDas - Thoudam DorenSingh + Thoudam DorenSingh 1298–1303 Sentiment Analysis refers to the process of interpreting what a sentence emotes and classifying them as positive, negative, or neutral. The widespread popularity of social media has led to the generation of a lot of text data and specifically, in the Indian social media scenario, the code-mixed Hinglish text i.e, the words of Hindi language, written in the Roman script along with other English words is a common sight. The ability to effectively understand the sentiments in these texts is much needed. This paper proposes a system titled NITS-Hinglish to effectively carry out the sentiment analysis of such code-mixed Hinglish text. The system has fared well with a final F-Score of 0.617 on the test data. 2020.semeval-1.175 @@ -2132,7 +2132,7 @@ AlbertoBarrón-Cedeño HenningWachsmuth RostislavPetrov - PreslavNakov + PreslavNakov 1377–1414 We present the results and the main findings of SemEval-2020 Task 11 on Detection of Propaganda Techniques in News Articles. The task featured two subtasks. Subtask SI is about Span Identification: given a plain-text document, spot the specific text fragments containing propaganda. Subtask TC is about Technique Classification: given a specific text fragment, in the context of a full document, determine the propaganda technique it uses, choosing from an inventory of 14 possible propaganda techniques. The task attracted a large number of participants: 250 teams signed up to participate and 44 made a submission on the test set. In this paper, we present the task, analyze the results, and discuss the system submissions and the methods they used. For both subtasks, the best systems used pre-trained Transformers and ensembles. Best Task Honorable Mention @@ -2145,7 +2145,7 @@ DawidJurkiewicz ŁukaszBorchmann IzabelaKosmala - FilipGraliński + FilipGraliński 1415–1424 This paper presents the winning system for the propaganda Technique Classification (TC) task and the second-placed system for the propaganda Span Identification (SI) task. The purpose of TC task was to identify an applied propaganda technique given propaganda text fragment. The goal of SI task was to find specific text fragments which contain at least one propaganda technique. Both of the developed solutions used semi-supervised learning technique of self-training. Interestingly, although CRF is barely used with transformer-based language models, the SI task was approached with RoBERTa-CRF architecture. An ensemble of RoBERTa-based models was proposed for the TC task, with one of them making use of Span CLS layers we introduce in the present paper. In addition to describing the submitted systems, an impact of architectural decisions and training schemes is investigated along with remarks regarding training models of the same or better quality with lower computational budget. Finally, the results of error analysis are presented. Best Paper @@ -2156,12 +2156,12 @@ <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2020 Task 12: Multilingual Offensive Language Identification in Social Media (<fixed-case>O</fixed-case>ffens<fixed-case>E</fixed-case>val 2020) MarcosZampieri - PreslavNakov + PreslavNakov SaraRosenthal PepaAtanasova GeorgiKaradzhov HamdyMubarak - LeonDerczynski + LeonDerczynski ZesesPitenis ÇağrıÇöltekin 1425–1447 @@ -2202,7 +2202,7 @@ Aschern at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2020 Task 11: It Takes Three to Tango: <fixed-case>R</fixed-case>o<fixed-case>BERT</fixed-case>a, <fixed-case>CRF</fixed-case>, and Transfer Learning AntonChernyavskiy DmitryIlvovsky - PreslavNakov + PreslavNakov 1462–1468 We describe our system for SemEval-2020 Task 11 on Detection of Propaganda Techniques in News Articles. We developed ensemble models using RoBERTa-based neural architectures, additional CRF layers, transfer learning between the two subtasks, and advanced post-processing to handle the multi-label nature of the task, the consistency between nested spans, repetitions, and labels from similar spans in training. We achieved sizable improvements over baseline fine-tuned RoBERTa models, and the official evaluation ranked our system 3rd (almost tied with the 2nd) out of 36 teams on the span identification subtask with an F1 score of 0.491, and 2nd (almost tied with the 1st) out of 31 teams on the technique classification subtask with an F1 score of 0.62. 2020.semeval-1.191 @@ -2300,7 +2300,7 @@ <fixed-case>B</fixed-case>ham<fixed-case>NLP</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2020 Task 12: An Ensemble of Different Word Embeddings and Emotion Transfer Learning for <fixed-case>A</fixed-case>rabic Offensive Language Identification in Social Media Abdullah I.Alharbi - MarkLee + MarkLee 1532–1538 Social media platforms such as Twitter offer people an opportunity to publish short posts in which they can share their opinions and perspectives. While these applications can be valuable, they can also be exploited to promote negative opinions, insults, and hatred against a person, race, or group. These opinions can be spread to millions of people at the click of a mouse. As such, there is a need to develop mechanisms by which offensive language can be automatically detected in social media channels and managed in a timely manner. To help achieve this goal, SemEval 2020 offered a shared task (OffensEval 2020) that involved the detection of offensive text in Arabic. We propose an ensemble approach that combines different levels of word embedding models and transfers learning from other sources of emotion-related tasks. The proposed system ranked 9th out of the 52 entries within the Arabic Offensive language identification subtask. 2020.semeval-1.200 @@ -2364,7 +2364,7 @@ MarcPàmies EmilyÖhman KaislaKajava - JörgTiedemann + JörgTiedemann 1569–1575 This paper presents the different models submitted by the LT@Helsinki team for the SemEval 2020 Shared Task 12. Our team participated in sub-tasks A and C; titled offensive language identification and offense target identification, respectively. In both cases we used the so-called Bidirectional Encoder Representation from Transformer (BERT), a model pre-trained by Google and fine-tuned by us on the OLID and SOLID datasets. The results show that offensive tweet classification is one of several language-based tasks where BERT can achieve state-of-the-art results. 2020.semeval-1.205 @@ -2376,7 +2376,7 @@ HwijeenAhn JiminSun Chan YoungPark - JungyunSeo + JungyunSeo 1576–1586 This paper describes our approach to the task of identifying offensive languages in a multilingual setting. We investigate two data augmentation strategies: using additional semi-supervised labels with different thresholds and cross-lingual transfer with data selection. Leveraging the semi-supervised dataset resulted in performance improvements compared to the baseline trained solely with the manually-annotated dataset. We propose a new metric, Translation Embedding Distance, to measure the transferability of instances for cross-lingual data selection. We also introduce various preprocessing steps tailored for social media text along with methods to fine-tune the pre-trained multilingual BERT (mBERT) for offensive language identification. Our multilingual systems achieved competitive results in Greek, Danish, and Turkish at OffensEval 2020. 2020.semeval-1.206 @@ -2396,7 +2396,7 @@ <fixed-case>NUIG</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2020 Task 12: Pseudo Labelling for Offensive Content Classification ShardulSuryawanshi - MihaelArcan + MihaelArcan PaulBuitelaar 1598–1604 This work addresses the classification problem defined by sub-task A (English only) of the OffensEval 2020 challenge. We used a semi-supervised approach to classify given tweets into an offensive (OFF) or not-offensive (NOT) class. As the OffensEval 2020 dataset is loosely labelled with confidence scores given by unsupervised models, we used last year’s offensive language identification dataset (OLID) to label the OffensEval 2020 dataset. Our approach uses a pseudo-labelling method to annotate the current dataset. We trained four text classifiers on the OLID dataset and the classifier with the highest macro-averaged F1-score has been used to pseudo label the OffensEval 2020 dataset. The same model which performed best amongst four text classifiers on OLID dataset has been trained on the combined dataset of OLID and pseudo labelled OffensEval 2020. We evaluated the classifiers with precision, recall and macro-averaged F1-score as the primary evaluation metric on the OLID and OffensEval 2020 datasets. This work is licensed under a Creative Commons Attribution 4.0 International Licence. Licence details: http://creativecommons.org/licenses/by/4.0/. @@ -2429,8 +2429,8 @@ <fixed-case>SINAI</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2020 Task 12: Offensive Language Identification Exploring Transfer Learning Models Flor MiriamPlaza del Arco M. DoloresMolina González - AlfonsoUreña-López - MaiteMartin + AlfonsoUreña-López + MaiteMartin 1622–1627 This paper describes the participation of SINAI team at Task 12: OffensEval 2: Multilingual Offensive Language Identification in Social Media. In particular, the participation in Sub-task A in English which consists of identifying tweets as offensive or not offensive. We preprocess the dataset according to the language characteristics used on social media. Then, we select a small set from the training set provided by the organizers and fine-tune different Transformerbased models in order to test their effectiveness. Our team ranks 20th out of 85 participants in Subtask-A using the XLNet model. 2020.semeval-1.211 @@ -2450,7 +2450,7 @@ <fixed-case>UHH</fixed-case>-<fixed-case>LT</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2020 Task 12: Fine-Tuning of Pre-Trained Transformer Networks for Offensive Language Detection GregorWiedemann Seid MuhieYimam - ChrisBiemann + ChrisBiemann 1638–1644 Fine-tuning of pre-trained transformer networks such as BERT yield state-of-the-art results for text classification tasks. Typically, fine-tuning is performed on task-specific training datasets in a supervised manner. One can also fine-tune in unsupervised manner beforehand by further pre-training the masked language modeling (MLM) task. Hereby, in-domain data for unsupervised MLM resembling the actual classification target dataset allows for domain adaptation of the model. In this paper, we compare current pre-trained transformer networks with and without MLM fine-tuning on their performance for offensive language detection. Our MLM fine-tuned RoBERTa-based classifier officially ranks 1st in the SemEval 2020 Shared Task 12 for the English language. Further experiments with the ALBERT model even surpass this result. 2020.semeval-1.213 @@ -2461,7 +2461,7 @@ <fixed-case>EL</fixed-case>-<fixed-case>BERT</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2020 Task 10: A Multi-Embedding Ensemble Based Approach for Emphasis Selection in Visual Media ChandreshKanani SriparnaSaha - PushpakBhattacharyya + PushpakBhattacharyya 1645–1651 In visual media, text emphasis is the strengthening of words in a text to convey the intent of the author. Text emphasis in visual media is generally done by using different colors, backgrounds, or font for the text; it helps in conveying the actual meaning of the message to the readers. Emphasis selection is the task of choosing candidate words for emphasis, it helps in automatically designing posters and other media contents with written text. If we consider only the text and do not know the intent, then there can be multiple valid emphasis selections. We propose the use of ensembles for emphasis selection to improve over single emphasis selection models. We show that the use of multi-embedding helps in enhancing the results for base models. To show the efficacy of proposed approach we have also done a comparison of our results with state-of-the-art models. 2020.semeval-1.214 @@ -2524,7 +2524,7 @@ DebanjanMahata RakeshGosangi HaiminZhang - Rajiv RatnShah + Rajiv RatnShah 1678–1684 This paper presents our submission to the SemEval 2020 - Task 10 on emphasis selection in written text. We approach this emphasis selection problem as a sequence labeling task where we represent the underlying text with various contextual embedding models. We also employ label distribution learning to account for annotator disagreements. We experiment with the choice of model architectures, trainability of layers, and different contextual embeddings. Our best performing architecture is an ensemble of different models, which achieved an overall matching score of 0.783, placing us 15th out of 31 participating teams. Lastly, we analyze the results in terms of parts of speech tags, sentence lengths, and word ordering. 2020.semeval-1.219 @@ -2546,7 +2546,7 @@ <fixed-case>T</fixed-case>ext<fixed-case>L</fixed-case>earner at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2020 Task 10: A Contextualized Ranking System in Solving Emphasis Selection in Text ZhishenYang LarsWolfsteller - NaoakiOkazaki + NaoakiOkazaki 1691–1697 This paper describes the emphasis selection system of the team TextLearner for SemEval 2020 Task 10: Emphasis Selection For Written Text in Visual Media. The system aims to learn the emphasis selection distribution using contextual representations extracted from pre-trained language models and a two-staged ranking model. The experimental results demonstrate the strong contextual representation power of the recent advanced transformer-based language model RoBERTa, which can be exploited using a simple but effective architecture on top. 2020.semeval-1.221 @@ -2645,7 +2645,7 @@ SopanKhosla RishabhJoshi RitamDutt - Alan WBlack + Alan WBlack YuliaTsvetkov 1756–1763 In this paper we describe our submission for the task of Propaganda Span Identification in news articles. We introduce a BERT-BiLSTM based span-level propaganda classification model that identifies which token spans within the sentence are indicative of propaganda. The ”multi-granular” model incorporates linguistic knowledge at various levels of text granularity, including word, sentence and document level syntactic, semantic and pragmatic affect features, which significantly improve model performance, compared to its language-agnostic variant. To facilitate better representation learning, we also collect a corpus of 10k news articles, and use it for fine-tuning the model. The final model is a majority-voting ensemble which learns different propaganda class boundaries by leveraging different subsets of incorporated knowledge. @@ -2737,7 +2737,7 @@ Team <fixed-case>D</fixed-case>i<fixed-case>S</fixed-case>aster at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2020 Task 11: Combining <fixed-case>BERT</fixed-case> and Hand-crafted Features for Identifying Propaganda Techniques in News AndersKaas Viktor TorpThomsen - BarbaraPlank + BarbaraPlank 1817–1822 The identification of communication techniques in news articles such as propaganda is important, as such techniques can influence the opinions of large numbers of people. Most work so far focused on the identification at the news article level. Recently, a new dataset and shared task has been proposed for the identification of propaganda techniques at the finer-grained span level. This paper describes our system submission to the subtask of technique classification (TC) for the SemEval 2020 shared task on detection of propaganda techniques in news articles. We propose a method of combining neural BERT representations with hand-crafted features via stacked generalization. Our model has the added advantage that it combines the power of contextual representations from BERT with simple span-based and article-based global features. We present an ablation study which shows that even though BERT representations are very powerful also for this task, BERT still benefits from being combined with carefully designed task-specific features. 2020.semeval-1.238 @@ -2768,7 +2768,7 @@ <fixed-case>UAIC</fixed-case>1860 at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2020 Task 11: Detection of Propaganda Techniques in News Articles VladErmurachi - DanielaGifu + DanielaGifu 1835–1840 The “Detection of Propaganda Techniques in News Articles” task at the SemEval 2020 competition focuses on detecting and classifying propaganda, pervasive in news article. In this paper, we present a system able to evaluate on sentence level, three traditional text representation techniques for these study goals, using: tf*idf, word and character n-grams. Firstly, we built a binary classifier able to provide corresponding propaganda labels, propaganda or non-propaganda. Secondly, we build a multilabel multiclass model to identify applied propaganda. 2020.semeval-1.241 @@ -2843,7 +2843,7 @@ <fixed-case>A</fixed-case>lex<fixed-case>U</fixed-case>-<fixed-case>B</fixed-case>ack<fixed-case>T</fixed-case>ranslation-<fixed-case>TL</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2020 Task 12: Improving Offensive Language Detection Using Data Augmentation and Transfer Learning MaiIbrahim MarwanTorki - NagwaEl-Makky + NagwaEl-Makky 1881–1890 Social media platforms, online news commenting spaces, and many other public forums have become widely known for issues of abusive behavior such as cyber-bullying and personal attacks. In this paper, we use the annotated tweets of the Offensive Language Identification Dataset (OLID) to train three levels of deep learning classifiers to solve the three sub-tasks associated with the dataset. Sub-task A is to determine if the tweet is toxic or not. Then, for offensive tweets, sub-task B requires determining whether the toxicity is targeted. Finally, for sub-task C, we predict the target of the offense; i.e. a group, individual, or other entity. In our solution, we tackle the problem of class imbalance in the dataset by using back translation for data augmentation and utilizing the fine-tuned BERT model in an ensemble of deep learning classifiers. We used this solution to participate in the three English sub-tasks of SemEval-2020 task 12. The proposed solution achieved 0.91393, 0.6300, and 0.57607 macro F1-average in sub-tasks A, B, and C respectively. We achieved the 9th, 14th, and 22nd places for sub-tasks A, B and C respectively. 2020.semeval-1.248 @@ -2988,7 +2988,7 @@ <fixed-case>IITP</fixed-case>-<fixed-case>AINLPML</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2020 Task 12: Offensive Tweet Identification and Target Categorization in a Multitask Environment SoumitraGhosh AsifEkbal - PushpakBhattacharyya + PushpakBhattacharyya 1983–1991 In this paper, we describe the participation of IITP-AINLPML team in the SemEval-2020 SharedTask 12 on Offensive Language Identification and Target Categorization in English Twitter data. Our proposed model learns to extract textual features using a BiGRU-based deep neural network supported by a Hierarchical Attention architecture to focus on the most relevant areas in the text. We leverage the effectiveness of multitask learning while building our models for sub-task A and B. We do necessary undersampling of the over-represented classes in the sub-tasks A and C.During training, we consider a threshold of 0.5 as the separation margin between the instances belonging to classes OFF and NOT in sub-task A and UNT and TIN in sub-task B. For sub-task C, the class corresponding to the maximum score among the given confidence scores of the classes(IND, GRP and OTH) is considered as the final label for an instance. Our proposed model obtains the macro F1-scores of 90.95%, 55.69% and 63.88% in sub-task A, B and C, respectively. 2020.semeval-1.261 @@ -2998,7 +2998,7 @@ <fixed-case>INGEOTEC</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2020 Task 12: Multilingual Classification of Offensive Text SabinoMiranda-Jiménez - Eric S.Tellez + Eric S.Tellez MarioGraff DanielaMoctezuma 1992–1997 @@ -3043,7 +3043,7 @@ <fixed-case>JCT</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2020 Task 12: Offensive Language Detection in Tweets Using Preprocessing Methods, Character and Word N-grams MosheUzan - YaakovHaCohen-Kerner + YaakovHaCohen-Kerner 2017–2022 In this paper, we describe our submissions to SemEval-2020 contest. We tackled subtask 12 - “Multilingual Offensive Language Identification in Social Media”. We developed different models for four languages: Arabic, Danish, Greek, and Turkish. We applied three supervised machine learning methods using various combinations of character and word n-gram features. In addition, we applied various combinations of basic preprocessing methods. Our best submission was a model we built for offensive language identification in Danish using Random Forest. This model was ranked at the 6 position out of 39 submissions. Our result is lower by only 0.0025 than the result of the team that won the 4 place using entirely non-neural methods. Our experiments indicate that char ngram features are more helpful than word ngram features. This phenomenon probably occurs because tweets are more characterized by characters than by words, tweets are short, and contain various special sequences of characters, e.g., hashtags, shortcuts, slang words, and typos. 2020.semeval-1.266 @@ -3129,7 +3129,7 @@ <fixed-case>LIIR</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2020 Task 12: A Cross-Lingual Augmentation Approach for Multilingual Offensive Language Identification ErfanGhadery - Marie-FrancineMoens + Marie-FrancineMoens 2073–2079 This paper presents our system entitled ‘LIIR’ for SemEval-2020 Task 12 on Multilingual Offensive Language Identification in Social Media (OffensEval 2). We have participated in sub-task A for English, Danish, Greek, Arabic, and Turkish languages. We adapt and fine-tune the BERT and Multilingual Bert models made available by Google AI for English and non-English languages respectively. For the English language, we use a combination of two fine-tuned BERT models. For other languages we propose a cross-lingual augmentation approach in order to enrich training data and we use Multilingual BERT to obtain sentence representations. 2020.semeval-1.274 @@ -3139,7 +3139,7 @@ <fixed-case>LISAC</fixed-case> <fixed-case>FSDM</fixed-case>-<fixed-case>USMBA</fixed-case> Team at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2020 Task 12: Overcoming <fixed-case>A</fixed-case>ra<fixed-case>BERT</fixed-case>’s pretrain-finetune discrepancy for <fixed-case>A</fixed-case>rabic offensive language identification HamzaAlami - SaidOuatik El Alaoui + SaidOuatik El Alaoui AbdessamadBenlahbib NoureddineEn-nahnahi 2080–2085 @@ -3182,7 +3182,7 @@ <fixed-case>NTU</fixed-case>_<fixed-case>NLP</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2020 Task 12: Identifying Offensive Tweets Using Hierarchical Multi-Task Learning Approach - Po-ChunChen + Po-ChunChen Hen-HsenHuang Hsin-HsiChen 2105–2110 @@ -3227,7 +3227,7 @@ FatemahHusain JooyeonLee SamHenry - OzlemUzuner + OzlemUzuner 2133–2139 This paper describes SalamNET, an Arabic offensive language detection system that has been submitted to SemEval 2020 shared task 12: Multilingual Offensive Language Identification in Social Media. Our approach focuses on applying multiple deep learning models and conducting in depth error analysis of results to provide system implications for future development considerations. To pursue our goal, a Recurrent Neural Network (RNN), a Gated Recurrent Unit (GRU), and Long-Short Term Memory (LSTM) models with different design architectures have been developed and evaluated. The SalamNET, a Bi-directional Gated Recurrent Unit (Bi-GRU) based model, reports a macro-F1 score of 0.83% 2020.semeval-1.283 @@ -3236,7 +3236,7 @@ Smatgrisene at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2020 Task 12: Offense Detection by <fixed-case>AI</fixed-case> - with a Pinch of Real <fixed-case>I</fixed-case> - Peter JuelHenrichsen + Peter JuelHenrichsen MarianneRathje 2140–2145 This paper discusses how ML based classifiers can be enhanced disproportionately by adding small amounts of qualitative linguistic knowledge. As an example we present the Danish classifier Smatgrisene, our contribution to the recent OffensEval Challenge 2020. The classifier was trained on 3000 social media posts annotated for offensiveness, supplemented by rules extracted from the reference work on Danish offensive language (Rathje 2014b). Smatgrisene did surprisingly well in the competition in spite of its extremely simple design, showing an interesting trade-off between technological muscle and linguistic intelligence. Finally, we comment on the perspectives in combining qualitative and quantitative methods for NLP. @@ -3373,7 +3373,7 @@ <fixed-case>UTFPR</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val 2020 Task 12: Identifying Offensive Tweets with Lightweight Ensembles Marcos Aurélio HermogenesBoriola - Gustavo HenriquePaetzold + Gustavo HenriquePaetzold 2232–2236 Offensive language is a common issue on social media platforms nowadays. In an effort to address this issue, the SemEval 2020 event held the OffensEval 2020 shared task where the participants were challenged to develop systems that identify and classify offensive language in tweets. In this paper, we present a system that uses an Ensemble model stacking a BOW model and a CNN model that led us to place 29th in the ranking for English sub-task A. 2020.semeval-1.297 @@ -3394,7 +3394,7 @@ <fixed-case>XD</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2020 Task 12: Ensemble Approach to Offensive Language Identification in Social Media Using Transformer Encoders XiangjueDong - Jinho D.Choi + Jinho D.Choi 2244–2250 This paper presents six document classification models using the latest transformer encoders and a high-performing ensemble model for a task of offensive language identification in social media. For the individual models, deep transformer layers are applied to perform multi-head attentions. For the ensemble model, the utterance representations taken from those individual models are concatenated and fed into a linear decoder to make the final decisions. Our ensemble model outperforms the individual models and shows up to 8.6% improvement over the individual models on the development set. On the test set, it achieves macro-F1 of 90.9% and becomes one of the high performing systems among 85 participants in the sub-task A of this shared task. Our analysis shows that although the ensemble model significantly improves the accuracy on the development set, the improvement is not as evident on the test set. 2020.semeval-1.299 diff --git a/data/xml/2020.sigdial.xml b/data/xml/2020.sigdial.xml index 50fd9598c1..eb56e67f7c 100644 --- a/data/xml/2020.sigdial.xml +++ b/data/xml/2020.sigdial.xml @@ -6,7 +6,7 @@ OlivierPietquin SmarandaMuresan VivianChen - CaseyKennington + CaseyKennington DavidVandyke NinaDethlefs KojiInoue @@ -26,7 +26,7 @@ Semantic Guidance of Dialogue Generation with Reinforcement Learning Cheng-HsunHsueh - Wei-YunMa + Wei-YunMa 1–9 Neural encoder-decoder models have shown promising performance for human-computer dialogue systems over the past few years. However, due to the maximum-likelihood objective for the decoder, the generated responses are often universal and safe to the point that they lack meaningful information and are no longer relevant to the post. To address this, in this paper, we propose semantic guidance using reinforcement learning to ensure that the generated responses indeed include the given or predicted semantics and that these semantics do not appear repeatedly in the response. Synsets, which comprise sets of manually defined synonyms, are used as the form of assigned semantics. For a given/assigned/predicted synset, only one of its synonyms should appear in the generated response; this constitutes a simple but effective semantic-control mechanism. We conduct both quantitative and qualitative evaluations, which show that the generated responses are not only higher-quality but also reflect the assigned semantic controls. 2020.sigdial-1.1 @@ -38,7 +38,7 @@ Counseling-Style Reflection Generation Using Generative Pretrained Transformers with Augmented Context SiqiShen CharlesWelch - RadaMihalcea + RadaMihalcea VerónicaPérez-Rosas 10–20 We introduce a counseling dialogue system that seeks to assist counselors while they are learning and refining their counseling skills. The system generates counselors’reflections – i.e., responses that reflect back on what the client has said given the dialogue history. Our method builds upon the new generative pretrained transformer architecture and enhances it with context augmentation techniques inspired by traditional strategies used during counselor training. Through a set of comparative experiments, we show that the system that incorporates these strategies performs better in the reflection generation task than a system that is just fine-tuned with counseling conversations. To confirm our findings, we present a human evaluation study that shows that our system generates naturally-looking reflections that are also stylistically and grammatically correct. @@ -52,8 +52,8 @@ LenaReed VrindavanHarrison ShereenOraby - DilekHakkani-Tur - MarilynWalker + DilekHakkani-Tur + MarilynWalker 21–34 Natural language generators (NLGs) for task-oriented dialogue typically take a meaning representation (MR) as input, and are trained end-to-end with a corpus of MR/utterance pairs, where the MRs cover a specific set of dialogue acts and domain attributes. Creation of such datasets is labor intensive and time consuming. Therefore, dialogue systems for new domain ontologies would benefit from using data for pre-existing ontologies. Here we explore, for the first time, whether it is possible to train an NLG for a new larger ontology using existing training sets for the restaurant domain, where each set is based on a different ontology. We create a new, larger combined ontology, and then train an NLG to produce utterances covering it. For example, if one dataset has attributes for family friendly and rating information, and the other has attributes for decor and service, our aim is an NLG for the combined ontology that can produce utterances that realize values for family friendly, rating, decor and service. Initial experiments with a baseline neural sequence-to-sequence model show that this task is surprisingly challenging. We then develop a novel self-training method that identifies (errorful) model outputs, automatically constructs a corrected MR input to form a new (MR, utterance) training pair, and then repeatedly adds these new instances back into the training data. We then test the resulting model on a new test set. The result is a self-trained model whose performance is an absolute 75.4% improvement over the baseline model. We also report a human qualitative evaluation of the final model showing that it achieves high naturalness, semantic coherence and grammaticality. 2020.sigdial-1.3 @@ -69,7 +69,7 @@ ChristianGeishauser Hsien-ChinLin MarcoMoresi - MilicaGasic + MilicaGasic 35–44 Task-oriented dialog systems rely on dialog state tracking (DST) to monitor the user’s goal during the course of an interaction. Multi-domain and open-vocabulary settings complicate the task considerably and demand scalable solutions. In this paper we present a new approach to DST which makes use of various copy mechanisms to fill slots with values. Our model has no need to maintain a list of candidate values. Instead, all values are extracted from the dialog context on-the-fly. A slot is filled by one of three copy mechanisms: (1) Span prediction may extract values directly from the user input; (2) a value may be copied from a system inform memory that keeps track of the system’s inform operations (3) a value may be copied over from a different slot that is already contained in the dialog state to resolve coreferences within and across domains. Our approach combines the advantages of span-based slot filling methods with memory methods to avoid the use of value picklists altogether. We argue that our strategy simplifies the DST task while at the same time achieving state of the art performance on various popular evaluation sets including Multiwoz 2.1, where we achieve a joint goal accuracy beyond 55%. 2020.sigdial-1.4 @@ -135,8 +135,8 @@ YeLiu TaoYang ZeyuYou - WeiFan - Philip S.Yu + WeiFan + Philip S.Yu 61–73 Human tackle reading comprehension not only based on the given context itself but often rely on the commonsense beyond. To empower the machine with commonsense reasoning, in this paper, we propose a Commonsense Evidence Generation and Injection framework in reading comprehension, named CEGI. The framework injects two kinds of auxiliary commonsense evidence into comprehensive reading to equip the machine with the ability of rational thinking. Specifically, we build two evidence generators: one aims to generate textual evidence via a language model; the other aims to extract factual evidence (automatically aligned text-triples) from a commonsense knowledge graph after graph completion. Those evidences incorporate contextual commonsense and serve as the additional inputs to the reasoning model. Thereafter, we propose a deep contextual encoder to extract semantic relationships among the paragraph, question, option, and evidence. Finally, we employ a capsule network to extract different linguistic units (word and phrase) from the relations, and dynamically predict the optimal option based on the extracted units. Experiments on the CosmosQA dataset demonstrate that the proposed CEGI model outperforms the current state-of-the-art approaches and achieves the highest accuracy (83.6%) on the leaderboard. 2020.sigdial-1.9 @@ -146,7 +146,7 @@ Identifying Collaborative Conversations using Latent Discourse Behaviors AyushJain - Maria LeonorPacheco + Maria LeonorPacheco StevenLancette MahakGoindani DanGoldwasser @@ -224,7 +224,7 @@ A Spoken Dialogue System for Spatial Question Answering in a Physical Blocks World GeorgiyPlatonov - LenhartSchubert + LenhartSchubert BenjaminKane AaronGindi 128–131 @@ -322,7 +322,7 @@ How Self-Attention Improves Rare Class Performance in a Question-Answering Dialogue Agent AdamStiff QiSong - EricFosler-Lussier + EricFosler-Lussier 196–202 Contextualized language modeling using deep Transformer networks has been applied to a variety of natural language processing tasks with remarkable success. However, we find that these models are not a panacea for a question-answering dialogue agent corpus task, which has hundreds of classes in a long-tailed frequency distribution, with only thousands of data points. Instead, we find substantial improvements in recall and accuracy on rare classes from a simple one-layer RNN with multi-headed self-attention and static word embeddings as inputs. While much research has used attention weights to illustrate what input is important for a task, the complexities of our dialogue corpus offer a unique opportunity to examine how the model represents what it attends to, and we offer a detailed analysis of how that contributes to improved performance on rare classes. A particularly interesting phenomenon we observe is that the model picks up implicit meanings by splitting different aspects of the semantics of a single word across multiple attention heads. 2020.sigdial-1.24 @@ -333,8 +333,8 @@ Filtering conversations through dialogue acts labels for improving corpus-based convergence studies SimoneFuscone - BenoitFavre - LaurentPrévot + BenoitFavre + LaurentPrévot 203–208 Cognitive models of conversation and research on user-adaptation in dialogue systems involves a better understanding of speakers convergence in conversation. Convergence effects have been established on controlled data sets, for various acoustic and linguistic variables. Tracking interpersonal dynamics on generic corpora has provided positive but more contrasted outcomes. We propose here to enrich large conversational corpora with dialogue act (DA) information. We use DA-labels as filters in order to create data sub sets featuring homogeneous conversational activity. Those data sets allow a more precise comparison between speakers’ speech variables. Our experiences consist of comparing convergence on low level variables (Energy, Pitch, Speech Rate) measured on raw data sets, with human and automatically DA-labelled data sets. We found that such filtering does help in observing convergence suggesting that studies on interpersonal dynamics should consider such high level dialogue activity types and their related NLP topics as important ingredients of their toolboxes. 2020.sigdial-1.25 @@ -345,7 +345,7 @@ Nontrivial Lexical Convergence in a Geography-Themed Game AmandaBergqvist - RameshManuvinakurike + RameshManuvinakurike DeepthiKarkada MaikePaetzel 209–214 @@ -359,7 +359,7 @@ Ramiro H.Gálvez LaraGauder JordiLuque - AgustínGravano + AgustínGravano 215–224 Acoustic/prosodic (a/p) entrainment has been associated with multiple positive social aspects of human-human conversations. However, research on its effects is still preliminary, first because how to model it is far from standardized, and second because most of the reported findings rely on small corpora or on corpora collected in experimental setups. The present article has a twofold purpose: 1) it proposes a unifying statistical framework for modeling a/p entrainment, and 2) it tests on two large corpora of spontaneous telephone interactions whether three metrics derived from this framework predict positive social aspects of the conversations. The corpora differ in their spoken language, domain, and positive social outcome attached. To our knowledge, this is the first article studying relations between a/p entrainment and positive social outcomes in such large corpora of spontaneous dialog. Our results suggest that our metrics effectively predict, up to some extent, positive social aspects of conversations, which not only validates the methodology, but also provides further insights into the elusive topic of entrainment in human-human conversation. 2020.sigdial-1.27 @@ -381,7 +381,7 @@ Towards Unified Dialogue System Evaluation: A Comprehensive Analysis of Current Evaluation Protocols Sarah E.Finch - Jinho D.Choi + Jinho D.Choi 236–245 As conversational AI-based dialogue management has increasingly become a trending topic, the need for a standardized and reliable evaluation procedure grows even more pressing. The current state of affairs suggests various evaluation protocols to assess chat-oriented dialogue management systems, rendering it difficult to conduct fair comparative studies across different approaches and gain an insightful understanding of their values. To foster this research, a more robust evaluation protocol must be set in place. This paper presents a comprehensive synthesis of both automated and human evaluation methods on dialogue systems, identifying their shortcomings while accumulating evidence towards the most effective evaluation dimensions. A total of 20 papers from the last two years are surveyed to analyze three types of evaluation protocols: automated, static, and interactive. Finally, the evaluation dimensions used in these papers are compared against our expert evaluation on the system-user dialogue data collected from the Alexa Prize 2020. 2020.sigdial-1.29 @@ -412,7 +412,7 @@ YansenWang R. CharlesMurray HaogangBao - CarolynRose + CarolynRose 257–260 For the past 15 years, in computer-supported collaborative learning applications, conversational agents have been used to structure group interactions in online chat-based environments. A series of experimental studies has provided an empirical foundation for the design of chat-based conversational agents that significantly improve learning over no-support control conditions and static-support control conditions. In this demo, we expand upon this foundation, bringing conversational agents to structure group interaction into physical spaces, with the specific goal of facilitating collaboration and learning in workplace scenarios. 2020.sigdial-1.31 @@ -423,7 +423,7 @@ Emora <fixed-case>STDM</fixed-case>: A Versatile Framework for Innovative Dialogue System Development James D.Finch - Jinho D.Choi + Jinho D.Choi 261–264 This demo paper presents Emora STDM (State Transition Dialogue Manager), a dialogue system development framework that provides novel workflows for rapid prototyping of chat-based dialogue managers as well as collaborative development of complex interactions. Our framework caters to a wide range of expertise levels by supporting interoperability between two popular approaches, state machine and information state, to dialogue management. Our Natural Language Expression package allows seamless integration of pattern matching, custom NLP modules, and database querying, that makes the workflows much more efficient. As a user study, we adopt this framework to an interdisciplinary undergraduate course where students with both technical and non-technical backgrounds are able to develop creative dialogue managers in a short period of time. 2020.sigdial-1.32 @@ -457,8 +457,8 @@ MihailEric KarthikGopalakrishnan BehnamHedayatnia - YangLiu - DilekHakkani-Tur + YangLiu + DilekHakkani-Tur 278–289 Most prior work on task-oriented dialogue systems are restricted to a limited coverage of domain APIs, while users oftentimes have domain related requests that are not covered by the APIs. In this paper, we propose to expand coverage of task-oriented dialogue systems by incorporating external unstructured knowledge sources. We define three sub-tasks: knowledge-seeking turn detection, knowledge selection, and knowledge-grounded response generation, which can be modeled individually or jointly. We introduce an augmented version of MultiWOZ 2.1, which includes new out-of-API-coverage turns and responses grounded on external knowledge sources. We present baselines for each sub-task using both conventional and neural approaches. Our experimental results demonstrate the need for further research in this direction to enable more informative conversational systems. 2020.sigdial-1.35 @@ -540,7 +540,7 @@ AishanLiu SweekarSudhakara AlanWagner - RebeccaPassonneau + RebeccaPassonneau 339–351 This paper presents MDP policy learning for agents to learn strategic behavior–how to play board games–during multimodal dialogues. Policies are trained offline in simulation, with dialogues carried out in a formal language. The agent has a temporary belief state for the dialogue, and a persistent knowledge store represented as an extensive-form game tree. How well the agent learns a new game from a dialogue with a simulated partner is evaluated by how well it plays the game, given its dialogue-final knowledge state. During policy training, we control for the simulated dialogue partner’s level of informativeness in responding to questions. The agent learns best when its trained policy matches the current dialogue partner’s informativeness. We also present a novel data collection for training natural language modules. Human subjects who engaged in dialogues with a baseline system rated the system’s language skills as above average. Further, results confirm that human dialogue partners also vary in their informativeness. 2020.sigdial-1.41 diff --git a/data/xml/2020.sigmorphon.xml b/data/xml/2020.sigmorphon.xml index f7ec43e3f7..7c40572462 100644 --- a/data/xml/2020.sigmorphon.xml +++ b/data/xml/2020.sigmorphon.xml @@ -22,14 +22,14 @@ EkaterinaVylomova JenniferWhite ElizabethSalesky - Sabrina J.Mielke + Sabrina J.Mielke ShijieWu Edoardo MariaPonti Rowan HallMaudslay RanZmigrod JosefValvoda SvetlanaToldova - FrancisTyers + FrancisTyers ElenaKlyachko IlyaYegorov NataliaKrizhanovsky @@ -45,7 +45,7 @@ HilariaCruz EleanorChodroff RyanCotterell - MiikkaSilfverberg + MiikkaSilfverberg MansHulden 1–39 A broad goal in natural language processing (NLP) is to develop a system that has the capacity to process any natural language. Most systems, however, are developed using data from just one language such as English. The SIGMORPHON 2020 shared task on morphological reinflection aims to investigate systems’ ability to generalize across typologically distinct languages, many of which are low resource. Systems were developed using data from 45 languages and just 5 language families, fine-tuned with data from an additional 45 languages and 10 language families (13 in total), and evaluated on all 90 languages. A total of 22 systems (19 neural) from 10 teams were submitted to the task. All four winning systems were neural (two monolingual transformers and two massively multilingual RNN-based models with gated attention). Most teams demonstrate utility of data hallucination and augmentation, ensembles, and multilingual training for low-resource languages. Non-neural learners and manually designed grammars showed competitive and even superior performance on some languages (such as Ingrian, Tajik, Tagalog, Zarma, Lingala), especially with very limited data. Some language families (Afro-Asiatic, Niger-Congo, Turkic) were relatively easy for most systems and achieved over 90% mean accuracy while others were more challenging. @@ -59,7 +59,7 @@ KyleGorman Lucas F.E.Ashby AaronGoyzueta - AryaMcCarthy + AryaMcCarthy ShijieWu DanielYou 40–50 @@ -71,8 +71,8 @@ The <fixed-case>SIGMORPHON</fixed-case> 2020 Shared Task on Unsupervised Morphological Paradigm Completion - KatharinaKann - Arya D.McCarthy + KatharinaKann + Arya D.McCarthy GarrettNicolai MansHulden 51–62 @@ -85,7 +85,7 @@ One-Size-Fits-All Multilingual Models BenPeters - André F. T.Martins + André F. T.Martins 63–69 This paper presents DeepSPIN’s submissions to Tasks 0 and 1 of the SIGMORPHON 2020 Shared Task. For both tasks, we present multilingual models, training jointly on data in all languages. We perform no language-specific hyperparameter tuning – each of our submissions uses the same model for all languages. Our basic architecture is the sparse sequence-to-sequence model with entmax attention and loss, which allows our models to learn sparse, local alignments while still being trainable with gradient-based techniques. For Task 1, we achieve strong performance with both RNN- and transformer-based sparse models. For Task 0, we extend our RNN-based model to a multi-encoder set-up in which separate modules encode the lemma and inflection sequences. Despite our models’ lack of language-specific tuning, they tie for first in Task 0 and place third in Task 1. 2020.sigmorphon-1.4 @@ -126,7 +126,7 @@ The <fixed-case>NYU</fixed-case>-<fixed-case>CUB</fixed-case>oulder Systems for <fixed-case>SIGMORPHON</fixed-case> 2020 Task 0 and Task 2 AssafSinger - KatharinaKann + KatharinaKann 90–98 We describe the NYU-CUBoulder systems for the SIGMORPHON 2020 Task 0 on typologically diverse morphological inflection and Task 2 on unsupervised morphological paradigm completion. The former consists of generating morphological inflections from a lemma and a set of morphosyntactic features describing the target form. The latter requires generating entire paradigms for a set of given lemmas from raw text alone. We model morphological inflection as a sequence-to-sequence problem, where the input is the sequence of the lemma’s characters with morphological tags, and the output is the sequence of the inflected form’s characters. First, we apply a transformer model to the task. Second, as inflected forms share most characters with the lemma, we further propose a pointer-generator transformer model to allow easy copying of input characters. 2020.sigmorphon-1.8 @@ -136,7 +136,7 @@ The <fixed-case>IMS</fixed-case>–<fixed-case>CUB</fixed-case>oulder System for the <fixed-case>SIGMORPHON</fixed-case> 2020 Shared Task on Unsupervised Morphological Paradigm Completion ManuelMager - KatharinaKann + KatharinaKann 99–105 In this paper, we present the systems of the University of Stuttgart IMS and the University of Colorado Boulder (IMS–CUBoulder) for SIGMORPHON 2020 Task 2 on unsupervised morphological paradigm completion (Kann et al., 2020). The task consists of generating the morphological paradigms of a set of lemmas, given only the lemmas themselves and unlabeled text. Our proposed system is a modified version of the baseline introduced together with the task. In particular, we experiment with substituting the inflection generation component with an LSTM sequence-to-sequence model and an LSTM pointer-generator network. Our pointer-generator system obtains the best score of all seven submitted systems on average over all languages, and outperforms the official baseline, which was best overall, on Bulgarian and Kannada. 2020.sigmorphon-1.9 @@ -156,7 +156,7 @@ <fixed-case>KU</fixed-case>-<fixed-case>CST</fixed-case> at the <fixed-case>SIGMORPHON</fixed-case> 2020 Task 2 on Unsupervised Morphological Paradigm Completion ManexAgirrezabal - JürgenWedekind + JürgenWedekind 111–116 We present a model for the unsupervised dis- covery of morphological paradigms. The goal of this model is to induce morphological paradigms from the bible (raw text) and a list of lemmas. We have created a model that splits each lemma in a stem and a suffix, and then we try to create a plausible suffix list by con- sidering lemma pairs. Our model was not able to outperform the official baseline, and there is still room for improvement, but we believe that the ideas presented here are worth considering. 2020.sigmorphon-1.11 @@ -179,7 +179,7 @@ Frustratingly Easy Multilingual Grapheme-to-Phoneme Conversion NikhilPrabhu - KatharinaKann + KatharinaKann 123–127 In this paper, we describe two CU-Boulder submissions to the SIGMORPHON 2020 Task 1 on multilingual grapheme-to-phoneme conversion (G2P). Inspired by the high performance of a standard transformer model (Vaswani et al., 2017) on the task, we improve over this approach by adding two modifications: (i) Instead of training exclusively on G2P, we additionally create examples for the opposite direction, phoneme-to-grapheme conversion (P2G). We then perform multi-task training on both tasks. (ii) We produce ensembles of our models via majority voting. Our approaches, though being conceptually simple, result in systems that place 6th and 8th amongst 23 submitted systems, and obtain the best results out of all systems on Lithuanian and Modern Greek, respectively. 2020.sigmorphon-1.13 @@ -216,7 +216,7 @@ One Model to Pronounce Them All: Multilingual Grapheme-to-Phoneme Conversion With a Transformer Ensemble KailiVesik MuhammadAbdul-Mageed - MiikkaSilfverberg + MiikkaSilfverberg 146–152 The task of grapheme-to-phoneme (G2P) conversion is important for both speech recognition and synthesis. Similar to other speech and language processing tasks, in a scenario where only small-sized training data are available, learning G2P models is challenging. We describe a simple approach of exploiting model ensembles, based on multilingual Transformers and self-training, to develop a highly effective G2P solution for 15 languages. Our models are developed as part of our participation in the SIGMORPHON 2020 Shared Task 1 focused at G2P. Our best models achieve 14.99 word error rate (WER) and 3.30 phoneme error rate (PER), a sizeable improvement over the shared task competitive baselines. 2020.sigmorphon-1.16 diff --git a/data/xml/2020.signlang.xml b/data/xml/2020.signlang.xml index 9683a124da..5d08b6815c 100644 --- a/data/xml/2020.signlang.xml +++ b/data/xml/2020.signlang.xml @@ -4,7 +4,7 @@ Proceedings of the LREC2020 9th Workshop on the Representation and Processing of Sign Languages: Sign Language Resources in the Service of the Language Community, Technological Challenges and Application Perspectives EleniEfthimiou - Stavroula-EvitaFotinea + Stavroula-EvitaFotinea ThomasHanke Julie A.Hochgesang JetteKristoffersen @@ -35,7 +35,7 @@ Improving and Extending Continuous Sign Language Recognition: Taking Iconicity and Spatial Language into account ValentinBelissen MichèleGouiffès - AnneliesBraffort + AnneliesBraffort 7–12 In a lot of recent research, attention has been drawn to recognizing sequences of lexical signs in continuous Sign Language corpora, often artificial. However, as SLs are structured through the use of space and iconicity, focusing on lexicon only prevents the field of Continuous Sign Language Recognition (CSLR) from extending to Sign Language Understanding and Translation. In this article, we propose a new formulation of the CSLR problem and discuss the possibility of recognizing higher-level linguistic structures in SL videos, like classifier constructions. These structures show much more variability than lexical signs, and are fundamentally different than them in the sense that form and meaning can not be disentangled. Building on the recently published French Sign Language corpus Dicta-Sign-LSF-v2, we discuss the performance and relevance of a simple recurrent neural network trained to recognize illustrative structures. 2020.signlang-1.2 @@ -80,7 +80,7 @@ PedroCabral MatildeGonçalves HugoNicolau - LuísaCoheur + LuísaCoheur RubenSantos 33–38 Software for the production of sign languages is much less common than for spoken languages. Such software usually relies on 3D humanoid avatars to produce signs which, inevitably, necessitates the use of animation. One barrier to the use of popular animation tools is their complexity and steep learning curve, which can be hard to master for inexperienced users. Here, we present PE2LGP, an authoring system that features a 3D avatar that signs Portuguese Sign Language. Our Animator is designed specifically to craft sign language animations using a key frame method, and is meant to be easy to use and learn to users without animation skills. We conducted a preliminary evaluation of the Animator, where we animated seven Portuguese Sign Language sentences and asked four sign language users to evaluate their quality. This evaluation revealed that the system, in spite of its simplicity, is indeed capable of producing comprehensible messages. @@ -103,14 +103,14 @@ <fixed-case>LSE</fixed-case>_<fixed-case>UVIGO</fixed-case>: A Multi-source Database for <fixed-case>S</fixed-case>panish <fixed-case>S</fixed-case>ign <fixed-case>L</fixed-case>anguage Recognition - LauraDocío-Fernández + LauraDocío-Fernández José LuisAlba-Castro SoledadTorres-Guijarro EduardoRodríguez-Banga ManuelRey-Area AniaPérez-Pérez SoniaRico-Alonso - CarmenGarcía-Mateo + CarmenGarcía-Mateo 45–52 This paper presents LSE_UVIGO, a multi-source database designed to foster research on Sign Language Recognition. It is being recorded and compiled for Spanish Sign Language (LSE acronym in Spanish) and contains also spoken Galician language, so it is very well fitted to research on these languages, but also quite useful for fundamental research in any other sign language. LSE_UVIGO is composed of two datasets: LSE_Lex40_UVIGO, a multi-sensor and multi-signer dataset acquired from scratch, designed as an incremental dataset, both in complexity of the visual content and in the variety of signers. It contains static and co-articulated sign recordings, fingerspelled and gloss-based isolated words, and sentences. Its acquisition is done in a controlled lab environment in order to obtain good quality videos with sharp video frames and RGB and depth information, making them suitable to try different approaches to automatic recognition. The second subset, LSE_TVGWeather_UVIGO is being populated from the regional television weather forecasts interpreted to LSE, as a faster way to acquire high quality, continuous LSE recordings with a domain-restricted vocabulary and with a correspondence to spoken sentences. 2020.signlang-1.8 @@ -288,7 +288,7 @@ Towards Large-Scale Data Mining for Data-Driven Analysis of Sign Languages BorisMocialov GrahamTurner - HelenHastie + HelenHastie 145–150 Access to sign language data is far from adequate. We show that it is possible to collect the data from social networking services such as TikTok, Instagram, and YouTube by applying data filtering to enforce quality standards and by discovering patterns in the filtered data, making it easier to analyse and model. Using our data collection pipeline, we collect and examine the interpretation of songs in both the American Sign Language (ASL) and the Brazilian Sign Language (Libras). We explore their differences and similarities by looking at the co-dependence of the orientation and location phonological parameters. 2020.signlang-1.24 @@ -332,7 +332,7 @@ Design and Evaluation for a Prototype of an Online Tool to Access Mathematics Notions in Sign Language CamilleNadal - ChristopheCollet + ChristopheCollet 171–176 The Sign’Maths project aims at giving access to pedagogical resources in Sign Language (SL). It will provide Deaf students and teachers with mathematics vocabulary in SL, this in order to contribute to the standardisation of the vocabulary used at school. The work conducted led to Sign’Maths, an online interactive tool that gives Deaf students access to mathematics definitions in SL. A group of mathematics teachers for Deafs and teachers experts in SL collaborated to create signs to express mathematics concepts, and to produce videos of definitions, examples and illustrations for these concepts. In parallel, we are working on the conception and the design of Sign’Maths software and user interface. Our research work investigated ways to include SL in pedagogical resources in order to present information but also to navigate through the content. User tests revealed that users appreciate the use of SL in a pedagogical resource. However, they pointed out that SL content should be complemented with French to support bilingual education. Our final solution takes advantage of the complementarity of SL, French and visual content to provide an interface that will suit users no matter what their education background is. Future work will investigate a tool for text and signs’ search within Sign’Maths. 2020.signlang-1.28 @@ -365,7 +365,7 @@ Unsupervised Term Discovery for Continuous Sign Language KorhanPolat - MuratSaraçlar + MuratSaraçlar 189–196 Most of the sign language recognition (SLR) systems rely on supervision for training and available annotated sign language resources are scarce due to the difficulties of manual labeling. Unsupervised discovery of lexical units would facilitate the annotation process and thus lead to better SLR systems. Inspired by the unsupervised spoken term discovery in speech processing field, we investigate whether a similar approach can be applied in sign language to discover repeating lexical units. We adapt an algorithm that is designed for spoken term discovery by using hand shape and pose features instead of speech features. The experiments are run on a large scale continuous sign corpus and the performance is evaluated using gloss level annotations. This work introduces a new task for sign language processing that has not been addressed before. 2020.signlang-1.31 @@ -407,7 +407,7 @@ Cross-Lingual Keyword Search for Sign Language Nazif CanTamer - MuratSaraçlar + MuratSaraçlar 217–223 Sign language research most often relies on exhaustively annotated and segmented data, which is scarce even for the most studied sign languages. However, parallel corpora consisting of sign language interpreting are rarely explored. By utilizing such data for the task of keyword search, this work aims to enable information retrieval from sign language with the queries from the translated written language. With the written language translations as labels, we train a weakly supervised keyword search model for sign language and further improve the retrieval performance with two context modeling strategies. In our experiments, we compare the gloss retrieval and cross language retrieval performance on RWTH-PHOENIX-Weather 2014T dataset. 2020.signlang-1.35 diff --git a/data/xml/2020.sigtyp.xml b/data/xml/2020.sigtyp.xml index cfecf9022d..2f350afab3 100644 --- a/data/xml/2020.sigtyp.xml +++ b/data/xml/2020.sigtyp.xml @@ -6,7 +6,7 @@ EkaterinaVylomova Edoardo M.Ponti EitanGrossman - Arya D.McCarthy + Arya D.McCarthy YevgeniBerzak HaimDubossarsky IvanVulić @@ -27,7 +27,7 @@ <fixed-case>SIGTYP</fixed-case> 2020 Shared Task: Prediction of Typological Features JohannesBjerva ElizabethSalesky - Sabrina J.Mielke + Sabrina J.Mielke AditiChaudhary Giuseppe G. A.Celano Edoardo MariaPonti @@ -47,7 +47,7 @@ DeepakAlok AkankshaBansal BorniniLahiri - Atul Kr.Ojha + Atul Kr.Ojha 12–16 This paper enumerates SigTyP 2020 Shared Task on the prediction of typological features as performed by the KMI-Panlingua-IITKGP team. The task entailed the prediction of missing values in a particular language, provided, the name of the language family, its genus, location (in terms of latitude and longitude coordinates and name of the country where it is spoken) and a set of feature-value pair are available. As part of fulfillment of the aforementioned task, the team submitted 3 kinds of system - 2 rule-based and one hybrid system. Of these 3, one rule-based system generated the best performance on the test set. All the systems were ‘constrained’ in the sense that no additional dataset or information, other than those provided by the organisers, was used for developing the systems. 2020.sigtyp-1.2 @@ -58,7 +58,7 @@ <fixed-case>NEMO</fixed-case>: Frequentist Inference Approach to Constrained Linguistic Typology Feature Prediction in <fixed-case>SIGTYP</fixed-case> 2020 Shared Task AlexanderGutkin - RichardSproat + RichardSproat 17–28 This paper describes the NEMO submission to SIGTYP 2020 shared task (Bjerva et al., 2020) which deals with prediction of linguistic typological features for multiple languages using the data derived from World Atlas of Language Structures (WALS). We employ frequentist inference to represent correlations between typological features and use this representation to train simple multi-class estimators that predict individual features. We describe two submitted ridge regression-based configurations which ranked second and third overall in the constrained task. Our best configuration achieved the microaveraged accuracy score of 0.66 on 149 test languages. 2020.sigtyp-1.3 @@ -69,7 +69,7 @@ Predicting Typological Features in <fixed-case>WALS</fixed-case> using Language Embeddings and Conditional Probabilities: <fixed-case>ÚFAL</fixed-case> Submission to the <fixed-case>SIGTYP</fixed-case> 2020 Shared Task MartinVastl - DanielZeman + DanielZeman RudolfRosa 29–35 We present our submission to the SIGTYP 2020 Shared Task on the prediction of typological features. We submit a constrained system, predicting typological features only based on the WALS database. We investigate two approaches. The simpler of the two is a system based on estimating correlation of feature values within languages by computing conditional probabilities and mutual information. The second approach is to train a neural predictor operating on precomputed language embeddings based on WALS features. Our submitted system combines the two approaches based on their self-estimated confidence scores. We reach the accuracy of 70.7% on the test data and rank first in the shared task. diff --git a/data/xml/2020.sltu.xml b/data/xml/2020.sltu.xml index e8775cc976..7972f5bcf4 100644 --- a/data/xml/2020.sltu.xml +++ b/data/xml/2020.sltu.xml @@ -4,7 +4,7 @@ Proceedings of the 1st Joint Workshop on Spoken Language Technologies for Under-resourced languages (SLTU) and Collaboration and Computing for Under-Resourced Languages (CCURL) DorotheeBeermann - LaurentBesacier + LaurentBesacier SakrianiSakti ClaudiaSoria European Language Resources association @@ -41,7 +41,7 @@ OddurKjartansson AlexanderGutkin AlenaButryna - IsinDemirsahin + IsinDemirsahin ClaraRivera 21–27 This paper introduces new open speech datasets for three of the languages of Spain: Basque, Catalan and Galician. Catalan is furthermore the official language of the Principality of Andorra. The datasets consist of high-quality multi-speaker recordings of the three languages along with the associated transcriptions. The resulting corpora include over 33 hours of crowd-sourced recordings of 132 male and female native speakers. The recording scripts also include material for elicitation of global and local place names, personal and business names. The datasets are released under a permissive license and are available for free download for commercial, academic and personal use. The high-quality annotated speech datasets described in this paper can be used to, among other things, build text-to-speech systems, serve as adaptation data in automatic speech recognition and provide useful phonetic and phonological insights in corpus linguistics. @@ -89,7 +89,7 @@ XiaohuiZhang KritikaSingh YatharthSaraf - GeoffreyZweig + GeoffreyZweig 46–52 Towards developing high-performing ASR for low-resource languages, approaches to address the lack of resources are to make use of data from multiple languages, and to augment the training data by creating acoustic variations. In this work we present a single grapheme-based ASR model learned on 7 geographically proximal languages, using standard hybrid BLSTM-HMM acoustic models with lattice-free MMI objective. We build the single ASR grapheme set via taking the union over each language-specific grapheme set, and we find such multilingual graphemic hybrid ASR model can perform language-independent recognition on all 7 languages, and substantially outperform each monolingual ASR model. Secondly, we evaluate the efficacy of multiple data augmentation alternatives within language, as well as their complementarity with multilingual modeling. Overall, we show that the proposed multilingual graphemic hybrid ASR with various data augmentation can not only recognize any within training set languages, but also provide large ASR performance improvements. 2020.sltu-1.7 @@ -99,11 +99,11 @@ Neural Text-to-Speech Synthesis for an Under-Resourced Language in a Diglossic Environment: the Case of <fixed-case>G</fixed-case>ascon <fixed-case>O</fixed-case>ccitan AnderCorral - IgorLeturia + IgorLeturia AureSéguier MichäelBarret BenasetDazéas - PhilippeBoula de Mareüil + PhilippeBoula de Mareüil NicolasQuint 53–60 Occitan is a minority language spoken in Southern France, some Alpine Valleys of Italy, and the Val d’Aran in Spain, which only very recently started developing language and speech technologies. This paper describes the first project for designing a Text-to-Speech synthesis system for one of its main regional varieties, namely Gascon. We used a state-of-the-art deep neural network approach, the Tacotron2-WaveGlow system. However, we faced two additional difficulties or challenges: on the one hand, we wanted to test if it was possible to obtain good quality results with fewer recording hours than is usually reported for such systems; on the other hand, we needed to achieve a standard, non-Occitan pronunciation of French proper names, therefore we needed to record French words and test phoneme-based approaches. The evaluation carried out over the various developed systems and approaches shows promising results with near production-ready quality. It has also allowed us to detect the phenomena for which some flaws or fall of quality occur, pointing at the direction of future work to improve the quality of the actual system and for new systems for other language varieties and voices. @@ -147,7 +147,7 @@ Design and evaluation of a smartphone keyboard for <fixed-case>P</fixed-case>lains <fixed-case>C</fixed-case>ree syllabics - Eddie AntonioSantos + Eddie AntonioSantos AtticusHarrigan 88–96 Plains Cree is a less-resourced language in Canada. To promote its usage online, we describe previous keyboard layouts for typing Plains Cree syllabics on smartphones. We describe our own solution whose development was guided by ergonomics research and corpus statistics. We then describe a case study in which three participants used a previous layout and our own, and we collected quantitative and qualitative data. We conclude that, despite observing accuracy improvements in user testing, introducing a brand new paradigm for typing Plains Cree syllabics may not be ideal for the community. @@ -157,7 +157,7 @@ <fixed-case>M</fixed-case>ulti<fixed-case>S</fixed-case>eg: Parallel Data and Subword Information for Learning Bilingual Embeddings in Low Resource Scenarios - EfsunSarioglu Kayi + EfsunSarioglu Kayi VishalAnand SmarandaMuresan 97–105 @@ -201,8 +201,8 @@ Fully Convolutional <fixed-case>ASR</fixed-case> for Less-Resourced Endangered Languages BaoThai RobertJimerson - RaymondPtucha - EmilyPrud’hommeaux + RaymondPtucha + EmilyPrud’hommeaux 126–130 The application of deep learning to automatic speech recognition (ASR) has yielded dramatic accuracy increases for languages with abundant training data, but languages with limited training resources have yet to see accuracy improvements on this scale. In this paper, we compare a fully convolutional approach for acoustic modelling in ASR with a variety of established acoustic modeling approaches. We evaluate our method on Seneca, a low-resource endangered language spoken in North America. Our method yields word error rates up to 40% lower than those reported using both standard GMM-HMM approaches and established deep neural methods, with a substantial reduction in training time. These results show particular promise for languages like Seneca that are both endangered and lack extensive documentation. 2020.sltu-1.17 @@ -290,7 +290,7 @@ NavyaJose ShardulSuryawanshi ElizabethSherly - John PhilipMcCrae + John PhilipMcCrae 177–184 There is an increasing demand for sentiment analysis of text from social media which are mostly code-mixed. Systems trained on monolingual data fail for code-mixed data due to the complexity of mixing at different levels of the text. However, very few resources are available for code-mixed data to create models specific for this data. Although much research in multilingual and cross-lingual sentiment analysis has used semi-supervised or unsupervised methods, supervised methods still performs better. Only a few datasets for popular languages such as English-Spanish, English-Hindi, and English-Chinese are available. There are no resources available for Malayalam-English code-mixed data. This paper presents a new gold standard corpus for sentiment analysis of code-mixed text in Malayalam-English annotated by voluntary annotators. This gold standard corpus obtained a Krippendorff’s alpha above 0.8 for the dataset. We use this new corpus to provide the benchmark for sentiment analysis in Malayalam-English code-mixed texts. 2020.sltu-1.25 @@ -322,7 +322,7 @@ Bharathi RajaChakravarthi VigneshwaranMuralidaran RubaPriyadharshini - John PhilipMcCrae + John PhilipMcCrae 202–210 Understanding the sentiment of a comment from a video or an image is an essential task in many applications. Sentiment analysis of a text can be useful for various decision-making processes. One such application is to analyse the popular sentiments of videos on social media based on viewer comments. However, comments from social media do not follow strict rules of grammar, and they contain mixing of more than one language, often written in non-native scripts. Non-availability of annotated code-mixed data for a low-resourced language like Tamil also adds difficulty to this problem. To overcome this, we created a gold standard Tamil-English code-switched, sentiment-annotated corpus containing 15,744 comment posts from YouTube. In this paper, we describe the process of creating the corpus and assigning polarities. We present inter-annotator agreement and show the results of sentiment analysis trained on this corpus as a benchmark. 2020.sltu-1.28 @@ -360,9 +360,9 @@ Lenition and Fortition of Stop Codas in <fixed-case>R</fixed-case>omanian MathildeHutin OanaNiculescu - IoanaVasilescu - LoriLamel - MartineAdda-Decker + IoanaVasilescu + LoriLamel + MartineAdda-Decker 226–234 The present paper aims at providing a first study of lenition- and fortition-type phenomena in coda position in Romanian, a language that can be considered as less-resourced. Our data show that there are two contexts for devoicing in Romanian: before a voiceless obstruent, which means that there is regressive voicelessness assimilation in the language, and before pause, which means that there is a tendency towards final devoicing proper. The data also show that non-canonical voicing is an instance of voicing assimilation, as it is observed mainly before voiced consonants (voiced obstruents and sonorants alike). Two conclusions can be drawn from our analyses. First, from a phonetic point of view, the two devoicing phenomena exhibit the same behavior regarding place of articulation of the coda, while voicing assimilation displays the reverse tendency. In particular, alveolars, which tend to devoice the most, also voice the least. Second, the two assimilation processes have similarities that could distinguish them from final devoicing as such. Final devoicing seems to be sensitive to speech style and gender of the speaker, while assimilation processes do not. This may indicate that the two kinds of processes are phonologized at two different degrees in the language, assimilation being more accepted and generalized than final devoicing. 2020.sltu-1.31 @@ -394,8 +394,8 @@ Automatic Extraction of Verb Paradigms in Regional Languages: the case of the Linguistic Crescent varieties ElenaKnyazeva - GillesAdda - PhilippeBoula de Mareüil + GillesAdda + PhilippeBoula de Mareüil MaximilienGuérin NicolasQuint 245–249 @@ -427,8 +427,8 @@ <fixed-case>DNN</fixed-case>-Based Multilingual Automatic Speech Recognition for <fixed-case>W</fixed-case>olaytta using <fixed-case>O</fixed-case>romo Speech - Martha YifiruTachbelie - Solomon TeferraAbate + Martha YifiruTachbelie + Solomon TeferraAbate TanjaSchultz 265–270 It is known that Automatic Speech Recognition (ASR) is very useful for human-computer interaction in all the human languages. However, due to its requirement for a big speech corpus, which is very expensive, it has not been developed for most of the languages. Multilingual ASR (MLASR) has been suggested to share existing speech corpora among related languages to develop an ASR for languages which do not have the required speech corpora. Literature shows that phonetic relatedness goes across language families. We have, therefore, conducted experiments on MLASR taking two language families: one as source (Oromo from Cushitic) and the other as target (Wolaytta from Omotic). Using Oromo Deep Neural Network (DNN) based acoustic model, Wolaytta pronunciation dictionary and language model we have achieved Word Error Rate (WER) of 48.34% for Wolaytta. Moreover, our experiments show that adding only 30 minutes of speech data from the target language (Wolaytta) to the whole training data (22.8 hours) of the source language (Oromo) results in a relative WER reduction of 32.77%. Our results show the possibility of developing ASR system for a language, if we have pronunciation dictionary and language model, using an existing speech corpus of another language irrespective of their language family. @@ -449,7 +449,7 @@ Basic Language Resources for 31 Languages (Plus <fixed-case>E</fixed-case>nglish): The <fixed-case>LORELEI</fixed-case> Representative and Incident Language Packs JenniferTracey - StephanieStrassel + StephanieStrassel 277–284 This paper documents and describes the thirty-one basic language resource packs created for the DARPA LORELEI program for use in development and testing of systems capable of providing language-independent situational awareness in emerging scenarios in a low resource language context. Twenty-four Representative Language Packs cover a broad range of language families and typologies, providing large volumes of monolingual and parallel text, smaller volumes of entity and semantic annotations, and a variety of grammatical resources and tools designed to support research into language universals and cross-language transfer. Seven Incident Language Packs provide test data to evaluate system capabilities on a previously unseen low resource language. We discuss the makeup of Representative and Incident Language Packs, the methods used to produce them, and the evolution of their design and implementation over the course of the multi-year LORELEI program. We conclude with a summary of the final language packs including their low-cost publication in the LDC catalog. 2020.sltu-1.39 @@ -545,7 +545,7 @@ TimofeyArkhangelskiy NikoPartanen MichaelRießler - FrancisTyers + FrancisTyers 336–341 In this paper, we expand on previous work on automatic speech recognition in a low-resource scenario typical of data collected by field linguists. We train DeepSpeech models on 35 hours of dialectal Komi speech recordings and correct the output using language models constructed from various sources. Previous experiments showed that transfer learning using DeepSpeech can improve the accuracy of a speech recognizer for Komi, though the error rate remained very high. In this paper we present further experiments with language models created using KenLM from text materials available online. These are constructed from two corpora, one containing literary texts, one for social media content, and another combining the two. We then trained the model using each language model to explore the impact of the language model data source on the speech recognition model. Our results show significant improvements of over 25% in character error rate and nearly 20% in word error rate. This offers important methodological insight into how ASR results can be improved under low-resource conditions: transfer learning can be used to compensate the lack of training data in the target language, and online texts are a very useful resource when developing language models in this context. 2020.sltu-1.47 @@ -563,15 +563,15 @@ MatthewLee AditiChaudhary LukeGessler - StevenAbney + StevenAbney Shirley AnugrahHayati AntoniosAnastasopoulos OlgaZamaraeva - EmilyPrud’hommeaux + EmilyPrud’hommeaux JennetteChild SaraChild RebeccaKnowles - SarahMoeller + SarahMoeller JeffreyMicher YiyuanLi SydneyZink @@ -589,7 +589,7 @@ KumarSaurav KumarSaunack DipteshKanojia - PushpakBhattacharyya + PushpakBhattacharyya 352–357 Dense word vectors or ‘word embeddings’ which encode semantic properties of words, have now become integral to NLP tasks like Machine Translation (MT), Question Answering (QA), Word Sense Disambiguation (WSD), and Information Retrieval (IR). In this paper, we use various existing approaches to create multiple word embeddings for 14 Indian languages. We place these embeddings for all these languages, viz., Assamese, Bengali, Gujarati, Hindi, Kannada, Konkani, Malayalam, Marathi, Nepali, Odiya, Punjabi, Sanskrit, Tamil, and Telugu in a single repository. Relatively newer approaches that emphasize catering to context (BERT, ELMo, etc.) have shown significant improvements, but require a large amount of resources to generate usable models. We release pre-trained embeddings generated using both contextual and non-contextual approaches. We also use MUSE and XLM to train cross-lingual embeddings for all pairs of the aforementioned languages. To show the efficacy of our embeddings, we evaluate our embedding models on XPOS, UPOS and NER tasks for all these languages. We release a total of 436 models using 8 different approaches. We hope they are useful for the resource-constrained Indian language NLP. The title of this paper refers to the famous novel “A Passage to India” by E.M. Forster, published initially in 1924. 2020.sltu-1.49 @@ -598,7 +598,7 @@ A Counselling Corpus in <fixed-case>C</fixed-case>antonese - JohnLee + JohnLee TianyuanCai WenxiuXie LamXing @@ -611,7 +611,7 @@ Speech Transcription Challenges for Resource Constrained Indigenous Language <fixed-case>C</fixed-case>ree VishwaGupta - GillesBoulianne + GillesBoulianne 362–367 Cree is one of the most spoken Indigenous languages in Canada. From a speech recognition perspective, it is a low-resource language, since very little data is available for either acoustic or language modeling. This has prevented development of speech technology that could help revitalize the language. We describe our experiments with available Cree data to improve automatic transcription both in speaker- independent and dependent scenarios. While it was difficult to get low speaker-independent word error rates with only six speakers, we were able to get low word and phoneme error rates in the speaker-dependent scenario. We compare our phoneme recognition with two state-of-the-art open-source phoneme recognition toolkits, which use end-to-end training and sequence-to-sequence modeling. Our phoneme error rate (8.7%) is significantly lower than that achieved by the best of these systems (15.1%). With these systems and varying amounts of transcribed and text data, we show that pre-training on other languages is important for speaker-independent recognition, and even small amounts of additional text-only documents are useful. These results can guide practical language documentation work, when deciding how much transcribed and text data is needed to achieve useful phoneme accuracies. 2020.sltu-1.51 diff --git a/data/xml/2020.smm4h.xml b/data/xml/2020.smm4h.xml index 24cf9b3f92..a7ac89f50c 100644 --- a/data/xml/2020.smm4h.xml +++ b/data/xml/2020.smm4h.xml @@ -3,10 +3,10 @@ Proceedings of the Fifth Social Media Mining for Health Applications Workshop & Shared Task - GracielaGonzalez-Hernandez + GracielaGonzalez-Hernandez Ari Z.Klein IvanFlores - DavyWeissenbacher + DavyWeissenbacher ArjunMagge KarenO'Connor AbeedSarker @@ -85,7 +85,7 @@ HuongDang KahyunLee SamHenry - ÖzlemUzuner + ÖzlemUzuner 37–41 Twitter is a valuable source of patient-generated data that has been used in various population health studies. The first step in many of these studies is to identify and capture Twitter messages (tweets) containing medication mentions. In this article, we describe our submission to Task 1 of the Social Media Mining for Health Applications (SMM4H) Shared Task 2020. This task challenged participants to detect tweets that mention medications or dietary supplements in a natural, highly imbalance dataset. Our system combined a handcrafted preprocessing step with an ensemble of 20 BERT-based classifiers generated by dividing the training dataset into subsets using 10-fold cross validation and exploiting two BERT embedding models. Our system ranked first in this task, and improved the average F1 score across all participating teams by 19.07% with a precision, recall, and F1 on the test set of 83.75%, 87.01%, and 85.35% respectively. 2020.smm4h-1.5 @@ -165,8 +165,8 @@ Towards Preemptive Detection of Depression and Anxiety in <fixed-case>T</fixed-case>witter DavidOwen - JoseCamacho-Collados - LuisEspinosa Anke + JoseCamacho-Collados + LuisEspinosa Anke 82–89 Depression and anxiety are psychiatric disorders that are observed in many areas of everyday life. For example, these disorders manifest themselves somewhat frequently in texts written by nondiagnosed users in social media. However, detecting users with these conditions is not a straightforward task as they may not explicitly talk about their mental state, and if they do, contextual cues such as immediacy must be taken into account. When available, linguistic flags pointing to probable anxiety or depression could be used by medical experts to write better guidelines and treatments. In this paper, we develop a dataset designed to foster research in depression and anxiety detection in Twitter, framing the detection task as a binary tweet classification problem. We then apply state-of-the-art classification models to this dataset, providing a competitive set of baselines alongside qualitative error analysis. Our results show that language models perform reasonably well, and better than more traditional baselines. Nonetheless, there is clear room for improvement, particularly with unbalanced training sets and in cases where seemingly obvious linguistic cues (keywords) are used counter-intuitively. 2020.smm4h-1.12 @@ -201,7 +201,7 @@ <fixed-case>FBK</fixed-case>@<fixed-case>SMM</fixed-case>4<fixed-case>H</fixed-case>2020: <fixed-case>R</fixed-case>o<fixed-case>BERT</fixed-case>a for Detecting Medications on <fixed-case>T</fixed-case>witter SilviaCasola - AlbertoLavelli + AlbertoLavelli 101–103 This paper describes a classifier for tweets that mention medications or supplements, based on a pretrained transformer. We developed such a system for our participation in Subtask 1 of the Social Media Mining for Health Application workshop, which featured an extremely unbalanced dataset. The model showed promising results, with an F1 of 0.8 (task mean: 0.66). 2020.smm4h-1.15 @@ -212,7 +212,7 @@ SougataSaha SouvikDas PrashiKhurana - RohiniSrihari + RohiniSrihari 104–109 This paper details a system designed for Social Media Mining for Health Applications (SMM4H) Shared Task 2020. We specifically describe the systems designed to solve task 2: Automatic classification of multilingual tweets that report adverse effects, and task 3: Automatic extraction and normalization of adverse effects in English tweets. Fine tuning RoBERTa large for classifying English tweets enables us to achieve a F1 score of 56%, which is an increase of +10% compared to the average F1 score for all the submissions. Using BERT based NER and question answering, we are able to achieve a F1 score of 57.6% for extracting adverse reaction mentions from tweets, which is an increase of +1.2% compared to the average F1 score for all the submissions. 2020.smm4h-1.16 @@ -288,8 +288,8 @@ <fixed-case>LITL</fixed-case> at <fixed-case>SMM</fixed-case>4<fixed-case>H</fixed-case>: An Old-school Feature-based Classifier for Identifying Adverse Effects in Tweets LudovicTanguy - Lydia-MaiHo-Dac - CécileFabre + Lydia-MaiHo-Dac + CécileFabre RoxaneBois Touati Mohamed YacineHaddad ClaireIbarboure @@ -345,7 +345,7 @@ <fixed-case>NLP</fixed-case>@<fixed-case>VCU</fixed-case>: Identifying Adverse Effects in <fixed-case>E</fixed-case>nglish Tweets for Unbalanced Data DarshiniMahendran CoraLewis - BridgetMcInnes + BridgetMcInnes 158–160 This paper describes our participation in the Social Media Mining for Health Application (SMM4H 2020) Challenge Track 2 for identifying tweets containing Adverse Effects (AEs). Our system uses Convolutional Neural Networks. We explore downsampling, oversampling, and adjusting the class weights to account for the imbalanced nature of the dataset. Our results showed downsampling outperformed oversampling and adjusting the class weights on the test set however all three obtained similar results on the development set. 2020.smm4h-1.29 diff --git a/data/xml/2020.socialnlp.xml b/data/xml/2020.socialnlp.xml index 2a64d2f2fd..1aeb379264 100644 --- a/data/xml/2020.socialnlp.xml +++ b/data/xml/2020.socialnlp.xml @@ -47,7 +47,7 @@ SayanSinha SohanPatro KripaGhosh - SaptarshiGhosh + SaptarshiGhosh 15–24 Although a lot of research has been done on utilising Online Social Media during disasters, there exists no system for a specific task that is critical in a post-disaster scenario – identifying resource-needs and resource-availabilities in the disaster-affected region, coupled with their subsequent matching. To this end, we present NARMADA, a semi-automated platform which leverages the crowd-sourced information from social media posts for assisting post-disaster relief coordination efforts. The system employs Natural Language Processing and Information Retrieval techniques for identifying resource-needs and resource-availabilities from microblogs, extracting resources from the posts, and also matching the needs to suitable availabilities. The system is thus capable of facilitating the judicious management of resources during post-disaster relief operations. 2020.socialnlp-1.3 diff --git a/data/xml/2020.splu.xml b/data/xml/2020.splu.xml index e7ac463d7c..22501103e1 100644 --- a/data/xml/2020.splu.xml +++ b/data/xml/2020.splu.xml @@ -8,7 +8,7 @@ MaliheAlikhani JasonBaldridge MohitBansal - Marie-FrancineMoens + Marie-FrancineMoens Association for Computational Linguistics
Online
November @@ -64,7 +64,7 @@ MauricioMazuecos AgataMarcante LucianaBenotti - RaffaellaBernardi + RaffaellaBernardi 29–38 In this paper, we study the grounding skills required to answer spatial questions asked by humans while playing the GuessWhat?! game. We propose a classification for spatial questions dividing them into absolute, relational, and group questions. We build a new answerer model based on the LXMERT multimodal transformer and we compare a baseline with and without visual features of the scene. We are interested in studying how the attention mechanisms of LXMERT are used to answer spatial questions since they require putting attention on more than one region simultaneously and spotting the relation holding among them. We show that our proposed model outperforms the baseline by a large extent (9.70% on spatial questions and 6.27% overall). By analyzing LXMERT errors and its attention mechanisms, we find that our classification helps to gain a better understanding of the skills required to answer different spatial questions. 2020.splu-1.4 diff --git a/data/xml/2020.spnlp.xml b/data/xml/2020.spnlp.xml index 1b53be87a7..d48b467330 100644 --- a/data/xml/2020.spnlp.xml +++ b/data/xml/2020.spnlp.xml @@ -7,7 +7,7 @@ ZornitsaKozareva JuliaKreutzer GerasimosLampouras - AndréMartins + AndréMartins SujithRavi AndreasVlachos Association for Computational Linguistics @@ -23,8 +23,8 @@ Syntax-driven Iterative Expansion Language Models for Controllable Text Generation NoeCasas - José A. R.Fonollosa - Marta R.Costa-jussà + José A. R.Fonollosa + Marta R.Costa-jussà 1–10 The dominant language modeling paradigm handles text as a sequence of discrete tokens. While that approach can capture the latent structure of the text, it is inherently constrained to sequential dynamics for text generation. We propose a new paradigm for introducing a syntactic inductive bias into neural text generation, where the dependency parse tree is used to drive the Transformer model to generate sentences iteratively. Our experiments show that this paradigm is effective at text generation, with quality between LSTMs and Transformers, and comparable diversity, requiring less than half their decoding steps, and its generation process allows direct control over the syntactic constructions of the generated text, enabling the induction of stylistic variations. 2020.spnlp-1.1 @@ -50,7 +50,7 @@ Generating Synthetic Data for Task-Oriented Semantic Parsing with Hierarchical Representations - KeTran + KeTran MingTan 17–21 Modern conversational AI systems support natural language understanding for a wide variety of capabilities. While a majority of these tasks can be accomplished using a simple and flat representation of intents and slots, more sophisticated capabilities require complex hierarchical representations supported by semantic parsing. State-of-the-art semantic parsers are trained using supervised learning with data labeled according to a hierarchical schema which might be costly to obtain or not readily available for a new domain. In this work, we explore the possibility of generating synthetic data for neural semantic parsing using a pretrained denoising sequence-to-sequence model (i.e., BART). Specifically, we first extract masked templates from the existing labeled utterances, and then fine-tune BART to generate synthetic utterances conditioning on the extracted templates. Finally, we use an auxiliary parser (AP) to filter the generated utterances. The AP guarantees the quality of the generated data. We show the potential of our approach when evaluating on the Facebook TOP dataset for navigation domain. @@ -62,7 +62,7 @@ Structured Prediction for Joint Class Cardinality and Entity Property Inference in Model-Complete Text Comprehension Hendrikter Horst - PhilippCimiano + PhilippCimiano 22–32 Model-complete text comprehension aims at interpreting a natural language text with respect to a semantic domain model describing the classes and their properties relevant for the domain in question. Solving this task can be approached as a structured prediction problem, consisting in inferring the most probable instance of the semantic model given the text. In this work, we focus on the challenging sub-problem of cardinality prediction that consists in predicting the number of distinct individuals of each class in the semantic model. We show that cardinality prediction can successfully be approached by modeling the overall task as a joint inference problem, predicting the number of individuals of certain classes while at the same time extracting their properties. We approach this task with probabilistic graphical models computing the maximum-a-posteriori instance of the semantic model. Our main contribution lies on the empirical investigation and analysis of different approximative inference strategies based on Gibbs sampling. We present and evaluate our models on the task of extracting key parameters from scientific full text articles describing pre-clinical studies in the domain of spinal cord injury. 2020.spnlp-1.4 @@ -74,7 +74,7 @@ Energy-based Neural Modelling for Large-Scale Multiple Domain Dialogue State Tracking Anh DuongTrinh Robert J.Ross - John D.Kelleher + John D.Kelleher 33–42 Scaling up dialogue state tracking to multiple domains is challenging due to the growth in the number of variables being tracked. Furthermore, dialog state tracking models do not yet explicitly make use of relationships between dialogue variables, such as slots across domains. We propose using energy-based structure prediction methods for large-scale dialogue state tracking task in two multiple domain dialogue datasets. Our results indicate that: (i) modelling variable dependencies yields better results; and (ii) the structured prediction output aligns with the dialogue slot-value constraint principles. This leads to promising directions to improve state-of-the-art models by incorporating variable dependencies into their prediction process. 2020.spnlp-1.5 diff --git a/data/xml/2020.starsem.xml b/data/xml/2020.starsem.xml index 09226e959c..d8962f8c34 100644 --- a/data/xml/2020.starsem.xml +++ b/data/xml/2020.starsem.xml @@ -19,7 +19,7 @@ Improving Medical <fixed-case>NLI</fixed-case> Using Context-Aware Domain Knowledge ShaikaChowdhury - PhilipYu + PhilipYu YuanLuo 1–11 Domain knowledge is important to understand both the lexical and relational associations of words in natural language text, especially for domain-specific tasks like Natural Language Inference (NLI) in the medical domain, where due to the lack of a large annotated dataset such knowledge cannot be implicitly learned during training. However, because of the linguistic idiosyncrasies of clinical texts (e.g., shorthand jargon), solely relying on domain knowledge from an external knowledge base (e.g., UMLS) can lead to wrong inference predictions as it disregards contextual information and, hence, does not return the most relevant mapping. To remedy this, we devise a knowledge adaptive approach for medical NLI that encodes the premise/hypothesis texts by leveraging supplementary external knowledge, alongside the UMLS, based on the word contexts. By incorporating refined domain knowledge at both the lexical and relational levels through a multi-source attention mechanism, it is able to align the token-level interactions between the premise and hypothesis more effectively. Comprehensive experiments and case study on the recently released MedNLI dataset are conducted to validate the effectiveness of the proposed approach. @@ -51,7 +51,7 @@ Automatic Learning of Modality Exclusivity Norms with Crosslingual Word Embeddings EmmanueleChersoni RongXiang - QinLu + QinLu Chu-RenHuang 32–38 Collecting modality exclusivity norms for lexical items has recently become a common practice in psycholinguistics and cognitive research. However, these norms are available only for a relatively small number of languages and often involve a costly and time-consuming collection of ratings. In this work, we aim at learning a mapping between word embeddings and modality norms. Our experiments focused on crosslingual word embeddings, in order to predict modality association scores by training on a high-resource language and testing on a low-resource one. We ran two experiments, one in a monolingual and the other one in a crosslingual setting. Results show that modality prediction using off-the-shelf crosslingual embeddings indeed has moderate-to-high correlations with human ratings even when regression algorithms are trained on an English resource and tested on a completely unseen language. @@ -79,7 +79,7 @@ Token Sequence Labeling vs. Clause Classification for <fixed-case>E</fixed-case>nglish Emotion Stimulus Detection - Laura Ana MariaOberländer + Laura Ana MariaOberländer RomanKlinger 58–70 Emotion stimulus detection is the task of finding the cause of an emotion in a textual description, similar to target or aspect detection for sentiment analysis. Previous work approached this in three ways, namely (1) as text classification into an inventory of predefined possible stimuli (“Is the stimulus category A or B?”), (2) as sequence labeling of tokens (“Which tokens describe the stimulus?”), and (3) as clause classification (“Does this clause contain the emotion stimulus?”). So far, setting (3) has been evaluated broadly on Mandarin and (2) on English, but no comparison has been performed. Therefore, we analyze whether clause classification or token sequence labeling is better suited for emotion stimulus detection in English. We propose an integrated framework which enables us to evaluate the two different approaches comparably, implement models inspired by state-of-the-art approaches in Mandarin, and test them on four English data sets from different domains. Our results show that token sequence labeling is superior on three out of four datasets, in both clause-based and token sequence-based evaluation. The only case in which clause classification performs better is one data set with a high density of clause annotations. Our error analysis further confirms quantitatively and qualitatively that clauses are not the appropriate stimulus unit in English. @@ -112,10 +112,10 @@ On the Systematicity of Probing Contextualized Word Representations: The Case of Hypernymy in <fixed-case>BERT</fixed-case> AbhilashaRavichander - EduardHovy + EduardHovy KaheerSuleman AdamTrischler - Jackie Chi KitCheung + Jackie Chi KitCheung 88–102 Contextualized word representations have become a driving force in NLP, motivating widespread interest in understanding their capabilities and the mechanisms by which they operate. Particularly intriguing is their ability to identify and encode conceptual abstractions. Past work has probed BERT representations for this competence, finding that BERT can correctly retrieve noun hypernyms in cloze tasks. In this work, we ask the question: do probing studies shed light on systematic knowledge in BERT representations? As a case study, we examine hypernymy knowledge encoded in BERT representations. In particular, we demonstrate through a simple consistency probe that the ability to correctly retrieve hypernyms in cloze tasks, as used in prior work, does not correspond to systematic knowledge in BERT. Our main conclusion is cautionary: even if BERT demonstrates high probing accuracy for a particular competence, it does not necessarily follow that BERT ‘understands’ a concept, and it cannot be expected to systematically generalize across applicable contexts. 2020.starsem-1.10 @@ -124,7 +124,7 @@ Topology of Word Embeddings: Singularities Reflect Polysemy AlexanderJakubowski - MilicaGasic + MilicaGasic MarcusZibrowius 103–113 The manifold hypothesis suggests that word vectors live on a submanifold within their ambient vector space. We argue that we should, more accurately, expect them to live on a pinched manifold: a singular quotient of a manifold obtained by identifying some of its points. The identified, singular points correspond to polysemous words, i.e. words with multiple meanings. Our point of view suggests that monosemous and polysemous words can be distinguished based on the topology of their neighbourhoods. We present two kinds of empirical evidence to support this point of view: (1) We introduce a topological measure of polysemy based on persistent homology that correlates well with the actual number of meanings of a word. (2) We propose a simple, topologically motivated solution to the SemEval-2010 task on Word Sense Induction & Disambiguation that produces competitive results. @@ -134,7 +134,7 @@ Assessing Polyseme Sense Similarity through Co-predication Acceptability and Contextualised Embedding Distance JanoschHaber - MassimoPoesio + MassimoPoesio 114–124 Co-predication is one of the most frequently used linguistic tests to tell apart shifts in polysemic sense from changes in homonymic meaning. It is increasingly coming under criticism as evidence is accumulating that it tends to mis-classify specific cases of polysemic sense alteration as homonymy. In this paper, we collect empirical data to investigate these accusations. We asses how co-predication acceptability relates to explicit ratings of polyseme word sense similarity, and how well either measure can be predicted through the distance between target words’ contextualised word embeddings. We find that sense similarity appears to be a major contributor in determining co-predication acceptability, but that co-predication judgements tend to rate especially less similar sense interpretations equally as unacceptable as homonym pairs, effectively mis-classifying these instances. The tested contextualised word embeddings fail to predict word sense similarity consistently, but the similarities between BERT embeddings show a significant correlation with co-predication ratings. We take this finding as evidence that BERT embeddings might be better representations of context than encodings of word meaning. 2020.starsem-1.12 @@ -162,7 +162,7 @@ Learning Negation Scope from Syntactic Structure NickMcKenna - MarkSteedman + MarkSteedman 137–142 We present a semi-supervised model which learns the semantics of negation purely through analysis of syntactic structure. Linguistic theory posits that the semantics of negation can be understood purely syntactically, though recent research relies on combining a variety of features including part-of-speech tags, word embeddings, and semantic representations to achieve high task performance. Our simplified model returns to syntactic theory and achieves state-of-the-art performance on the task of Negation Scope Detection while demonstrating the tight relationship between the syntax and semantics of negation. 2020.starsem-1.15 @@ -183,7 +183,7 @@ KazumaHashimoto Chien-ShengWu YaoWang - PhilipYu + PhilipYu RichardSocher CaimingXiong 154–167 diff --git a/data/xml/2020.stoc.xml b/data/xml/2020.stoc.xml index 707251f30e..e7e18e6833 100644 --- a/data/xml/2020.stoc.xml +++ b/data/xml/2020.stoc.xml @@ -34,8 +34,8 @@ SashankSanthanam SamiraShaikh AlanZemel - TomekStrzalkowski - Bonnie J.Dorr + TomekStrzalkowski + Bonnie J.Dorr 1–8 We describe a system that supports natural language processing (NLP) components for active defenses against social engineering attacks. We deploy a pipeline of human language technology, including Ask and Framing Detection, Named Entity Recognition, Dialogue Engineering, and Stylometry. The system processes modern message formats through a plug-in architecture to accommodate innovative approaches for message analysis, knowledge representation and dialogue generation. The novelty of the system is that it uses NLP for cyber defense and engages the attacker using bots to elicit evidence to attribute to the attacker and to waste the attacker’s time and resources. 2020.stoc-1.1 @@ -50,8 +50,8 @@ SashankSanthanam SamiraShaikh AlanZemel - TomekStrzalkowski - Bonnie J.Dorr + TomekStrzalkowski + Bonnie J.Dorr 9–14 We present a paradigm for extensible lexicon development based on Lexical Conceptual Structure to support social engineering detection and response generation. We leverage the central notions of ask (elicitation of behaviors such as providing access to money) and framing (risk/reward implied by the ask). We demonstrate improvements in ask/framing detection through refinements to our lexical organization and show that response generation qualitatively improves as ask/framing detection performance improves. The paradigm presents a systematic and efficient approach to resource adaptation for improved task-specific performance. 2020.stoc-1.2 @@ -126,7 +126,7 @@ PengLiu Kartik-SinghThakur AdamDalton - TomekStrzalkowski + TomekStrzalkowski 48–55 This paper describes different approaches to detect malicious content in email interactions through a combination of machine learning and natural language processing tools. Specifically, several neural network designs are tested on word embedding representations to detect suspicious messages and separate them from non-suspicious, benign email. The proposed approaches are trained and tested on distinct email collections, including datasets constructed from publicly available corpora (such as Enron, APWG, etc.) as well as several smaller, non-public datasets used in recent government evaluations. Experimental results show that back-propagation both with and without recurrent neural layers outperforms current state of the art techniques that include supervised learning algorithms with stylometric elements of texts as features. Our results also demonstrate that word embedding vectors are effective means for capturing certain aspects of text meaning that can be teased out through machine learning in non-linear/complex neural networks, in order to obtain highly accurate detection of malicious emails based on email text alone. 2020.stoc-1.8 diff --git a/data/xml/2020.sustainlp.xml b/data/xml/2020.sustainlp.xml index dc9dce91a2..302aad1435 100644 --- a/data/xml/2020.sustainlp.xml +++ b/data/xml/2020.sustainlp.xml @@ -3,11 +3,11 @@ Proceedings of SustaiNLP: Workshop on Simple and Efficient Natural Language Processing - Nafise SadatMoosavi + Nafise SadatMoosavi AngelaFan VeredShwartz GoranGlavaš - ShafiqJoty + ShafiqJoty AlexWang ThomasWolf Association for Computational Linguistics @@ -202,7 +202,7 @@ ParulAwasthy BishwaranjanBhattacharjee JohnKender - RaduFlorian + RaduFlorian 113–118 Transfer learning is a popular technique to learn a task using less training data and fewer compute resources. However, selecting the correct source model for transfer learning is a challenging task. We demonstrate a novel predictive method that determines which existing source model would minimize error for transfer learning to a given target. This technique does not require learning for prediction, and avoids computational costs of trail-and-error. We have evaluated this technique on nine datasets across diverse domains, including newswire, user forums, air flight booking, cybersecurity news, etc. We show that it per-forms better than existing techniques such as fine-tuning over vanilla BERT, or curriculum learning over the largest dataset on top of BERT, resulting in average F1 score gains in excess of 3%. Moreover, our technique consistently selects the best model using fewer tries. 2020.sustainlp-1.15 @@ -262,7 +262,7 @@ <fixed-case>F</fixed-case>ast<fixed-case>F</fixed-case>ormers: Highly Efficient Transformer Models for Natural Language Understanding Young JinKim - HanyHassan + HanyHassan 149–158 Transformer-based models are the state-of-the-art for Natural Language Understanding (NLU) applications. Models are getting bigger and better on various tasks. However, Transformer models remain computationally challenging since they are not efficient at inference-time compared to traditional approaches. In this paper, we present FastFormers, a set of recipes to achieve efficient inference-time performance for Transformer-based models on various NLU tasks. We show how carefully utilizing knowledge distillation, structured pruning and numerical optimization can lead to drastic improvements on inference efficiency. We provide effective recipes that can guide practitioners to choose the best settings for various NLU tasks and pretrained models. Applying the proposed recipes to the SuperGLUE benchmark, we achieve from 9.8x up to 233.9x speed-up compared to out-of-the-box models on CPU. On GPU, we also achieve up to 12.4x speed-up with the presented methods. We show that FastFormers can drastically reduce cost of serving 100 million requests from 4,223 USD to just 18 USD on an Azure F16s_v2 instance. This translates to a sustainable runtime by reducing energy consumption 6.9x - 125.8x according to the metrics used in the SustaiNLP 2020 shared task. 2020.sustainlp-1.20 diff --git a/data/xml/2020.tacl.xml b/data/xml/2020.tacl.xml index efacc42bac..cdd3bba24f 100644 --- a/data/xml/2020.tacl.xml +++ b/data/xml/2020.tacl.xml @@ -62,8 +62,8 @@ MandarJoshi DanqiChen YinhanLiu - Daniel S.Weld - LukeZettlemoyer + Daniel S.Weld + LukeZettlemoyer OmerLevy 10.1162/tacl_a_00300 We present SpanBERT, a pre-training method that is designed to better represent and predict spans of text. Our approach extends BERT by (1) masking contiguous random spans, rather than random tokens, and (2) training the span boundary representations to predict the entire content of the masked span, without relying on the individual token representations within it. SpanBERT consistently outperforms BERT and our better-tuned baselines, with substantial gains on span selection tasks such as question answering and coreference resolution. In particular, with the same training data and model size as BERTlarge, our single model obtains 94.6% and 88.7% F1 on SQuAD 1.1 and 2.0 respectively. We also achieve a new state of the art on the OntoNotes coreference resolution task (79.6% F1), strong performance on the TACRED relation extraction benchmark, and even gains on GLUE.1 @@ -75,7 +75,7 @@ A Graph-based Model for Joint <fixed-case>C</fixed-case>hinese Word Segmentation and Dependency Parsing HangYan XipengQiu - XuanjingHuang + XuanjingHuang 10.1162/tacl_a_00301 Chinese word segmentation and dependency parsing are two fundamental tasks for Chinese natural language processing. The dependency parsing is defined at the word-level. Therefore word segmentation is the precondition of dependency parsing, which makes dependency parsing suffer from error propagation and unable to directly make use of character-level pre-trained language models (such as BERT). In this paper, we propose a graph-based model to integrate Chinese word segmentation and dependency parsing. Different from previous transition-based joint models, our proposed model is more concise, which results in fewer efforts of feature engineering. Our graph-based joint model achieves better performance than previous joint models and state-of-the-art results in both Chinese word segmentation and dependency parsing. Additionally, when BERT is combined, our model can substantially reduce the performance gap of dependency parsing between joint models and gold-segmented word-based models. Our code is publicly available at https://github.com/fastnlp/JointCwsParser 78–92 @@ -100,7 +100,7 @@ ShuyanZhou ShrutiRijhwani JohnWieting - JaimeCarbonell + JaimeCarbonell GrahamNeubig 10.1162/tacl_a_00303 Cross-lingual entity linking (XEL) is the task of finding referents in a target-language knowledge base (KB) for mentions extracted from source-language texts. The first step of (X)EL is candidate generation, which retrieves a list of plausible candidate entities from the target-language KB for each mention. Approaches based on resources from Wikipedia have proven successful in the realm of relatively high-resource languages, but these do not extend well to low-resource languages with few, if any, Wikipedia pages. Recently, transfer learning methods have been shown to reduce the demand for resources in the low-resource languages by utilizing resources in closely related languages, but the performance still lags far behind their high-resource counterparts. In this paper, we first assess the problems faced by current entity candidate generation methods for low-resource XEL, then propose three improvements that (1) reduce the disconnect between entity mentions and KB entries, and (2) improve the robustness of the model to low-resource scenarios. The methods are simple, but effective: We experiment with our approach on seven XEL datasets and find that they yield an average gain of 16.9% in Top-30 gold candidate recall, compared with state-of-the-art baselines. Our improved model also yields an average gain of 7.9% in in-KB accuracy of end-to-end XEL.1 @@ -110,7 +110,7 @@ Does Syntax Need to Grow on Trees? Sources of Hierarchical Inductive Bias in Sequence-to-Sequence Networks - R. ThomasMcCoy + R. ThomasMcCoy RobertFrank TalLinzen 10.1162/tacl_a_00304 @@ -124,7 +124,7 @@ KaiSun DianYu DongYu - ClaireCardie + ClaireCardie 10.1162/tacl_a_00305 Machine reading comprehension tasks require a machine reader to answer questions relevant to the given document. In this paper, we present the first free-form multiple-Choice Chinese machine reading Comprehension dataset (C3), containing 13,369 documents (dialogues or more formally written mixed-genre texts) and their associated 19,577 multiple-choice free-form questions collected from Chinese-as-a-second-language examinations. We present a comprehensive analysis of the prior knowledge (i.e., linguistic, domain-specific, and general world knowledge) needed for these real-world problems. We implement rule-based and popular neural methods and find that there is still a significant performance gap between the best performing model (68.5%) and human readers (96.0%), especiallyon problems that require prior knowledge. We further study the effects of distractor plausibility and data augmentation based on translated relevant datasets for English on model performance. We expect C3 to present great challenges to existing systems as answering 86.8% of questions requires both knowledge within and beyond the accompanying document, and we hope that C3 can serve as a platform to study how to leverage various kinds of prior knowledge to better understand a given written or orally oriented text. C3 is available at https://dataset.org/c3/. 141–155 @@ -174,7 +174,7 @@ Sarah ItaLevitan MichelleLevine MarkoMandic - JuliaHirschberg + JuliaHirschberg 10.1162/tacl_a_00311 Humans rarely perform better than chance at lie detection. To better understand human perception of deception, we created a game framework, LieCatcher, to collect ratings of perceived deception using a large corpus of deceptive and truthful interviews. We analyzed the acoustic-prosodic and linguistic characteristics of language trusted and mistrusted by raters and compared these to characteristics of actual truthful and deceptive language to understand how perception aligns with reality. With this data we built classifiers to automatically distinguish trusted from mistrusted speech, achieving an F1 of 66.1%. We next evaluated whether the strategies raters said they used to discriminate between truthful and deceptive responses were in fact useful. Our results show that, although several prosodic and lexical features were consistently perceived as trustworthy, they were not reliable cues. Also, the strategies that judges reported using in deception detection were not helpful for the task. Our work sheds light on the nature of trusted language and provides insight into the challenging problem of human deception detection. 199–214 @@ -255,7 +255,7 @@ Learning Lexical Subspaces in a Distributional Vector Space KushalArora AishikChakraborty - Jackie C. K.Cheung + Jackie C. K.Cheung 10.1162/tacl_a_00316 In this paper, we propose LexSub, a novel approach towards unifying lexical and distributional semantics. We inject knowledge about lexical-semantic relations into distributional word embeddings by defining subspaces of the distributional vector space in which a lexical relation should hold. Our framework can handle symmetric attract and repel relations (e.g., synonymy and antonymy, respectively), as well as asymmetric relations (e.g., hypernymy and meronomy). In a suite of intrinsic benchmarks, we show that our model outperforms previous approaches on relatedness tasks and on hypernymy classification and detection, while being competitive on word similarity tasks. It also outperforms previous systems on extrinsic classification tasks that benefit from exploiting lexical relational cues. We perform a series of analyses to understand the behaviors of our model.1Code available at https://github.com/aishikchakraborty/LexSub. 311–329 @@ -267,7 +267,7 @@ AshutoshKumar KabirAhuja RaghuramVadapalli - ParthaTalukdar + ParthaTalukdar 10.1162/tacl_a_00318 Given a sentence (e.g., “I like mangoes”) and a constraint (e.g., sentiment flip), the goal of controlled text generation is to produce a sentence that adapts the input sentence to meet the requirements of the constraint (e.g., “I hate mangoes”). Going beyond such simple constraints, recent work has started exploring the incorporation of complex syntactic-guidance as constraints in the task of controlled paraphrase generation. In these methods, syntactic-guidance is sourced from a separate exemplar sentence. However, this prior work has only utilized limited syntactic information available in the parse tree of the exemplar sentence. We address this limitation in the paper and propose Syntax Guided Controlled Paraphraser (SGCP), an end-to-end framework for syntactic paraphrase generation. We find that Sgcp can generate syntax-conforming sentences while not compromising on relevance. We perform extensive automated and human evaluations over multiple real-world English language datasets to demonstrate the efficacy of Sgcp over state-of-the-art baselines. To drive future research, we have made Sgcp’s source code available.1 329–345 @@ -281,8 +281,8 @@ WojciechStokowiec WangLing LingpengKong - PhilBlunsom - ChrisDyer + PhilBlunsom + ChrisDyer 10.1162/tacl_a_00319 We show that Bayes’ rule provides an effective mechanism for creating document translation models that can be learned from only parallel sentences and monolingual documents a compelling benefit because parallel documents are not always available. In our formulation, the posterior probability of a candidate translation is the product of the unconditional (prior) probability of the candidate output document and the “reverse translation probability” of translating the candidate output back into the source language. Our proposed model uses a powerful autoregressive language model as the prior on target language documents, but it assumes that each sentence is translated independently from the target to the source language. Crucially, at test time, when a source document is observed, the document language model prior induces dependencies between the translations of the source sentences in the posterior. The model’s independence assumption not only enables efficient use of available data, but it additionally admits a practical left-to-right beam-search algorithm for carrying out inference. Experiments show that our model benefits from using cross-sentence context in the language model, and it outperforms existing document translation approaches. 346–360 @@ -307,7 +307,7 @@ AnhadMohananey WeiPeng Sheng-FuWang - Samuel R.Bowman + Samuel R.Bowman 10.1162/tacl_a_00321 We introduce The Benchmark of Linguistic Minimal Pairs (BLiMP),1 a challenge set for evaluating the linguistic knowledge of language models (LMs) on major grammatical phenomena in English. BLiMP consists of 67 individual datasets, each containing 1,000 minimal pairs—that is, pairs of minimally different sentences that contrast in grammatical acceptability and isolate specific phenomenon in syntax, morphology, or semantics. We generate the data according to linguist-crafted grammar templates, and human aggregate agreement with the labels is 96.4%. We evaluate n-gram, LSTM, and Transformer (GPT-2 and Transformer-XL) LMs by observing whether they assign a higher probability to the acceptable sentence in each minimal pair. We find that state-of-the-art models identify morphological contrasts related to agreement reliably, but they struggle with some subtle semantic and syntactic phenomena, such as negative polarity items and extraction islands. 377–392 @@ -337,7 +337,7 @@ How Can We Know What Language Models Know? ZhengbaoJiang - Frank F.Xu + Frank F.Xu JunAraki GrahamNeubig 10.1162/tacl_a_00324 @@ -350,7 +350,7 @@ Topic Modeling in Embedding Spaces Adji B.Dieng Francisco J. R.Ruiz - David M.Blei + David M.Blei 10.1162/tacl_a_00325 Topic modeling analyzes documents to learn meaningful patterns of words. However, existing topic models fail to learn interpretable topics when working with large and heavy-tailed vocabularies. To this end, we develop the embedded topic model (etm), a generative model of documents that marries traditional topic models with word embeddings. More specifically, the etm models each word with a categorical distribution whose natural parameter is the inner product between the word’s embedding and an embedding of its assigned topic. To fit the etm, we develop an efficient amortized variational inference algorithm. The etm discovers interpretable topics even with large vocabularies that include rare words and stop words. It outperforms existing document models, such as latent Dirichlet allocation, in terms of both topic quality and predictive performance. 439–453 @@ -359,9 +359,9 @@ <fixed-case>T</fixed-case>y<fixed-case>D</fixed-case>i <fixed-case>QA</fixed-case>: A Benchmark for Information-Seeking Question Answering in Typologically Diverse Languages - Jonathan H.Clark + Jonathan H.Clark EunsolChoi - MichaelCollins + MichaelCollins DanGarrette TomKwiatkowski VitalyNikolaev @@ -422,8 +422,8 @@ MarinaFomicheva ShuoSun LisaYankovskaya - FrédéricBlain - FranciscoGuzmán + FrédéricBlain + FranciscoGuzmán MarkFishel NikolaosAletras VishravChaudhary @@ -439,13 +439,13 @@ JacobAndreas JohnBufe DavidBurkett - CharlesChen + CharlesChen JoshClausman JeanCrawford KateCrim JordanDeLoach LeahDorner - JasonEisner + JasonEisner HaoFang AlanGuo DavidHall @@ -512,7 +512,7 @@ Nested Named Entity Recognition via Second-best Sequence Learning and Decoding TakashiShibuya - EduardHovy + EduardHovy 10.1162/tacl_a_00334 When an entity name contains other names within it, the identification of all combinations of names can become difficult and expensive. We propose a new method to recognize not only outermost named entities but also inner nested ones. We design an objective function for training a neural model that treats the tag sequence for nested entities as the second best path within the span of their parent entity. In addition, we provide the decoding method for inference that extracts entities iteratively from outermost ones to inner ones in an outside-to-inside way. Our method has no additional hyperparameters to the conditional random field based model widely used for flat named entity recognition tasks. Experiments demonstrate that our method performs better than or at least as well as existing methods capable of handling nested entities, achieving F1-scores of 85.82%, 84.34%, and 77.36% on ACE-2004, ACE-2005, and GENIA datasets, respectively. 605–620 @@ -612,7 +612,7 @@ SergeyEdunov MarjanGhazvininejad MikeLewis - LukeZettlemoyer + LukeZettlemoyer 10.1162/tacl_a_00343 This paper demonstrates that multilingual denoising pre-training produces significant performance gains across a wide variety of machine translation (MT) tasks. We present mBART—a sequence-to-sequence denoising auto-encoder pre-trained on large-scale monolingual corpora in many languages using the BART objective (Lewis et al., 2019). mBART is the first method for pre-training a complete sequence-to-sequence model by denoising full texts in multiple languages, whereas previous approaches have focused only on the encoder, decoder, or reconstructing parts of the text. Pre-training a complete model allows it to be directly fine-tuned for supervised (both sentence-level and document-level) and unsupervised machine translation, with no task- specific modifications. We demonstrate that adding mBART initialization produces performance gains in all but the highest-resource settings, including up to 12 BLEU points for low resource MT and over 5 BLEU points for many document-level and unsupervised models. We also show that it enables transfer to language pairs with no bi-text or that were not in the pre-training corpus, and present extensive analysis of which factors contribute the most to effective pre-training.1 726–742 @@ -649,8 +649,8 @@ DanielFried DaniYogatama LauraRimell - ChrisDyer - PhilBlunsom + ChrisDyer + PhilBlunsom 10.1162/tacl_a_00345 Textual representation learners trained on large amounts of data have achieved notable success on downstream tasks; intriguingly, they have also performed well on challenging tests of syntactic competence. Hence, it remains an open question whether scalable learners like BERT can become fully proficient in the syntax of natural language by virtue of data scale alone, or whether they still benefit from more explicit syntactic biases. To answer this question, we introduce a knowledge distillation strategy for injecting syntactic biases into BERT pretraining, by distilling the syntactically informative predictions of a hierarchical—albeit harder to scale—syntactic language model. Since BERT models masked words in bidirectional context, we propose to distill the approximate marginal distribution over words in context from the syntactic LM. Our approach reduces relative error by 2–21% on a diverse set of structured prediction tasks, although we obtain mixed results on the GLUE benchmark. Our findings demonstrate the benefits of syntactic biases, even for representation learners that exploit large amounts of data, and contribute to a better understanding of where syntactic biases are helpful in benchmarks of natural language understanding. 776–794 @@ -673,7 +673,7 @@ Ananya B.Sai Akash KumarMohankumar SiddharthaArora - Mitesh M.Khapra + Mitesh M.Khapra 10.1162/tacl_a_00347 There is an increasing focus on model-based dialog evaluation metrics such as ADEM, RUBER, and the more recent BERT-based metrics. These models aim to assign a high score to all relevant responses and a low score to all irrelevant responses. Ideally, such models should be trained using multiple relevant and irrelevant responses for any given context. However, no such data is publicly available, and hence existing models are usually trained using a single relevant response and multiple randomly selected responses from other contexts (random negatives). To allow for better training and robust evaluation of model-based metrics, we introduce the DailyDialog++ dataset, consisting of (i) five relevant responses for each context and (ii) five adversarially crafted irrelevant responses for each context. Using this dataset, we first show that even in the presence of multiple correct references, n-gram based metrics and embedding based metrics do not perform well at separating relevant responses from even random negatives. While model-based metrics perform better than n-gram and embedding based metrics on random negatives, their performance drops substantially when evaluated on adversarial examples. To check if large scale pretraining could help, we propose a new BERT-based evaluation metric called DEB, which is pretrained on 727M Reddit conversations and then finetuned on our dataset. DEB significantly outperforms existing models, showing better correlation with human judgments and better performance on random negatives (88.27% accuracy). However, its performance again drops substantially when evaluated on adversarial responses, thereby highlighting that even large-scale pretrained evaluation models are not robust to the adversarial examples in our dataset. The dataset1 and code2 are publicly available. 810–827 @@ -685,7 +685,7 @@ PhillipKeung JulianSalazar YichaoLu - Noah A.Smith + Noah A.Smith 10.1162/tacl_a_00348 We describe an unsupervised method to create pseudo-parallel corpora for machine translation (MT) from unaligned text. We use multilingual BERT to create source and target sentence embeddings for nearest-neighbor search and adapt the model via self-training. We validate our technique by extracting parallel sentence pairs on the BUCC 2017 bitext mining task and observe up to a 24.5 point increase (absolute) in F1 scores over previous unsupervised methods. We then improve an XLM-based unsupervised neural MT system pre-trained on Wikipedia by supplementing it with pseudo-parallel text mined from the same corpus, boosting unsupervised translation performance by up to 3.5 BLEU on the WMT’14 French-English and WMT’16 German-English tasks and outperforming the previous state-of-the-art. Finally, we enrich the IWSLT’15 English-Vietnamese corpus with pseudo-parallel Wikipedia sentence pairs, yielding a 1.2 BLEU improvement on the low-resource MT task. We demonstrate that unsupervised bitext mining is an effective way of augmenting MT datasets and complements existing techniques like initializing with pre-trained contextual embeddings. 828–841 diff --git a/data/xml/2020.tal.xml b/data/xml/2020.tal.xml index 6af52f7c5d..dbab8ebb62 100644 --- a/data/xml/2020.tal.xml +++ b/data/xml/2020.tal.xml @@ -3,9 +3,9 @@ Traitement Automatique des Langues, Volume 61, Numéro 1 : Varia [Varia] - CécileFabre + CécileFabre EmmanuelMorin - SophieRosset + SophieRosset PascaleSébillot ATALA (Association pour le Traitement Automatique des Langues)
France
@@ -20,11 +20,11 @@ Alternances de voisement et processus de lénition et de fortition : une étude automatisée de grands corpus en cinq langues romanes [Voicing alternations in relation with lenition and fortition phenomena: an automated study of large corpora in five <fixed-case>R</fixed-case>omance languages] - IoanaVasilescu + IoanaVasilescu YaruWu AdèleJatteau - MartineAdda-Decker - LoriLamel + MartineAdda-Decker + LoriLamel 13–37 2020.tal-1.1 fra @@ -34,7 +34,7 @@ Traitement Automatique des Langues, Volume 61, Numéro 2 : TAL et Santé [NLP and Health] - AurélieNévéol + AurélieNévéol Berryde Bruijn CorinneFredouille ATALA (Association pour le Traitement Automatique des Langues) @@ -71,8 +71,8 @@ A Multi-pass Sieve for Clinical Concept Normalization YuxiaWang BrianHur - KarinVerspoor - TimothyBaldwin + KarinVerspoor + TimothyBaldwin 41–65 2020.tal-2.3 wang-etal-2020-multi-pass @@ -114,7 +114,7 @@ Situated Meaning in Multimodal Dialogue: Human-Robot and Human-Computer Interactions - JamesPustejovsky + JamesPustejovsky NikhilKrishnaswamy 17–41 2020.tal-3.2 @@ -134,7 +134,7 @@ CharlieHallart JulietteMaes NicolasSpatola - LaurentPrévot + LaurentPrévot ThierryChaminade 69–93 2020.tal-3.4 diff --git a/data/xml/2020.textgraphs.xml b/data/xml/2020.textgraphs.xml index 558bf3a020..ed53c9ec2b 100644 --- a/data/xml/2020.textgraphs.xml +++ b/data/xml/2020.textgraphs.xml @@ -8,7 +8,7 @@ AlexanderPanchenko Fragkiskos D.Malliaros IoanaHulpuș - PeterJansen + PeterJansen AbhikJana Association for Computational Linguistics
Barcelona, Spain (Online)
@@ -95,7 +95,7 @@ SanderBijl de Vroe Mohammad JavadHosseini MarkJohnson - MarkSteedman + MarkSteedman 60–71 We present a novel method for injecting temporality into entailment graphs to address the problem of spurious entailments, which may arise from similar but temporally distinct events involving the same pair of entities. We focus on the sports domain in which the same pairs of teams play on different occasions, with different outcomes. We present an unsupervised model that aims to learn entailments such as win/lose → play, while avoiding the pitfall of learning non-entailments such as win ̸→ lose. We evaluate our model on a manually constructed dataset, showing that incorporating time intervals and applying a temporal window around them, are effective strategies. 2020.textgraphs-1.7 diff --git a/data/xml/2020.tlt.xml b/data/xml/2020.tlt.xml index ed61f55dae..b4efe981ed 100644 --- a/data/xml/2020.tlt.xml +++ b/data/xml/2020.tlt.xml @@ -8,7 +8,7 @@ RafaelEhren SimonPetitjean EstherSeyffarth - DjaméSeddah + DjaméSeddah Association for Computational Linguistics
Düsseldorf, Germany
October @@ -37,7 +37,7 @@ ZuoyuTian YiwenZhang HeZhou - SandraKuebler + SandraKuebler Chien-Jer CharlesLin 18–30 2020.tlt-1.2 @@ -59,8 +59,8 @@ Fine-Grained Morpho-Syntactic Analysis for the Under-Resourced Language Chaghatay KennethSteimel AkbarAmat - ArienneDwyer - SandraKübler + ArienneDwyer + SandraKübler 43–54 2020.tlt-1.4 10.18653/v1/2020.tlt-1.4 @@ -72,7 +72,7 @@ TatianaBladier LauraKallmeyer RainerOsswald - JakubWaszczuk + JakubWaszczuk 55–61 2020.tlt-1.5 10.18653/v1/2020.tlt-1.5 @@ -111,7 +111,7 @@ Estimating <fixed-case>POS</fixed-case> Annotation Consistency of Different Treebanks in a Language AkshayAggarwal - DanielZeman + DanielZeman 93–110 2020.tlt-1.9 10.18653/v1/2020.tlt-1.9 @@ -133,7 +133,7 @@ MikkoLuukko AleksiSahala SamHardwick - KristerLindén + KristerLindén 124–134 2020.tlt-1.11 10.18653/v1/2020.tlt-1.11 @@ -143,7 +143,7 @@ Dependency Relations for <fixed-case>S</fixed-case>anskrit Parsing and Treebank AmbaKulkarni - PavankumarSatuluri + PavankumarSatuluri SanjeevPanchal MalayMaity AmrutaMalvade @@ -156,7 +156,7 @@ <fixed-case>A</fixed-case>lpino<fixed-case>G</fixed-case>raph: A Graph-based Search Engine for Flexible and Efficient Treebank Search PeterKleiweg - Gertjanvan Noord + Gertjanvan Noord 151–161 2020.tlt-1.13 10.18653/v1/2020.tlt-1.13 @@ -167,7 +167,7 @@ Implementing an End-to-End Treebank-Informed Pipeline for <fixed-case>B</fixed-case>ulgarian AlexanderPopov PetyaOsenova - KirilSimov + KirilSimov 162–167 2020.tlt-1.14 10.18653/v1/2020.tlt-1.14 diff --git a/data/xml/2020.trac.xml b/data/xml/2020.trac.xml index 6bcdc12b4e..350af3ee43 100644 --- a/data/xml/2020.trac.xml +++ b/data/xml/2020.trac.xml @@ -4,10 +4,10 @@ Proceedings of the Second Workshop on Trolling, Aggression and Cyberbullying RiteshKumar - Atul Kr.Ojha + Atul Kr.Ojha BorniniLahiri MarcosZampieri - ShervinMalmasi + ShervinMalmasi VanessaMurdock DanielKadar European Language Resources Association (ELRA) @@ -50,7 +50,7 @@ Syed SarfarazAkhtar NamanJain VinaySingh - ManishShrivastava + ManishShrivastava 13–20 The advent of social media has immensely proliferated the amount of opinions and arguments voiced on the internet. These virtual debates often present cases of aggression. While research has been focused largely on analyzing aggression and stance in isolation from each other, this work is the first attempt to gain an extensive and fine-grained understanding of patterns of aggression and figurative language use when voicing opinion. We present a Hindi-English code-mixed dataset of opinion on the politico-social issue of ‘2016 India banknote demonetisation‘ and annotate it across multiple dimensions such as aggression, hate speech, emotion arousal and figurative language usage (such as sarcasm/irony, metaphors/similes, puns/word-play). 2020.trac-1.3 @@ -82,7 +82,7 @@ Multimodal Meme Dataset (<fixed-case>M</fixed-case>ulti<fixed-case>OFF</fixed-case>) for Identifying Offensive Content in Image and Text ShardulSuryawanshi Bharathi RajaChakravarthi - MihaelArcan + MihaelArcan PaulBuitelaar 32–41 A meme is a form of media that spreads an idea or emotion across the internet. As posting meme has become a new form of communication of the web, due to the multimodal nature of memes, postings of hateful memes or related events like trolling, cyberbullying are increasing day by day. Hate speech, offensive content and aggression content detection have been extensively explored in a single modality such as text or image. However, combining two modalities to detect offensive content is still a developing area. Memes make it even more challenging since they express humour and sarcasm in an implicit way, because of which the meme may not be offensive if we only consider the text or the image. Therefore, it is necessary to combine both modalities to identify whether a given meme is offensive or not. Since there was no publicly available dataset for multimodal offensive meme content detection, we leveraged the memes related to the 2016 U.S. presidential election and created the MultiOFF multimodal meme dataset for offensive content detection dataset. We subsequently developed a classifier for this task using the MultiOFF dataset. We use an early fusion technique to combine the image and text modality and compare it with a text- and an image-only baseline to investigate its effectiveness. Our results show improvements in terms of Precision, Recall, and F-Score. The code and dataset for this paper is published in @@ -99,7 +99,7 @@ KoustavaGoswami Bharathi RajaChakravarthi TheodorusFransen - John PhilipMcCrae + John PhilipMcCrae 42–48 Hate speech detection in social media communication has become one of the primary concerns to avoid conflicts and curb undesired activities. In an environment where multilingual speakers switch among multiple languages, hate speech detection becomes a challenging task using methods that are designed for monolingual corpora. In our work, we attempt to analyze, detect and provide a comparative study of hate speech in a code-mixed social media text. We also provide a Hindi-English code-mixed data set consisting of Facebook and Twitter posts and comments. Our experiments show that deep learning models trained on this code-mixed corpus perform better. 2020.trac-1.7 @@ -178,7 +178,7 @@ AnishaDatta ShukritySi UrbiChakraborty - Sudip KumarNaskar + Sudip KumarNaskar 87–92 In the last few years, hate speech and aggressive comments have covered almost all the social media platforms like facebook, twitter etc. As a result hatred is increasing. This paper describes our (Team name: Spyder) participation in the Shared Task on Aggression Detection organised by TRAC-2, Second Workshop on Trolling, Aggression and Cyberbullying. The Organizers provided datasets in three languages – English, Hindi and Bengali. The task was to classify each instance of the test sets into three categories – “Overtly Aggressive” (OAG), “Covertly Aggressive” (CAG) and “Non-Aggressive” (NAG). In this paper, we propose three different models using Tf-Idf, sentiment polarity and machine learning based classifiers. We obtained f1 score of 43.10%, 59.45% and 44.84% respectively for English, Hindi and Bengali. @@ -258,7 +258,7 @@ María JoséDíaz-Torres Paulina AlejandraMorán-Méndez LuisVillasenor-Pineda - ManuelMontes-y-Gómez + ManuelMontes-y-Gómez JuanAguilera LuisMeneses-Lerín 132–136 diff --git a/data/xml/2020.udw.xml b/data/xml/2020.udw.xml index 1ed823724c..a36fc6451d 100644 --- a/data/xml/2020.udw.xml +++ b/data/xml/2020.udw.xml @@ -3,7 +3,7 @@ Proceedings of the Fourth Workshop on Universal Dependencies (UDW 2020) - Marie-Catherinede Marneffe + Marie-Catherinede Marneffe Miryamde Lhoneux JoakimNivre SebastianSchuster @@ -41,7 +41,7 @@ HinrikHafsteinsson Einar FreyrSigurðsson KristínBjarnadóttir - Anton KarlIngason + Anton KarlIngason HildurJónsdóttir SteinþórSteingrímsson 16–25 @@ -77,7 +77,7 @@ From <fixed-case>LFG</fixed-case> To <fixed-case>UD</fixed-case>: A Combined Approach - Cheikh M. BambaDione + Cheikh M. BambaDione 57–66 This paper reports on a systematic approach for deriving Universal Dependencies from LFG structures. The conversion starts with a step-wise transformation of the c-structure, combining part-of-speech (POS) information and the embedding path to determine the true head of dependency structures. The paper discusses several issues faced by existing algorithms when applied on Wolof and presents the strategies used to account for these issues. An experimental evaluation indicated that our approach was able to generate the correct output in more than 90% of the cases, leading to a substantial improvement in conversion accuracy compared to the previous models. 2020.udw-1.7 @@ -199,7 +199,7 @@ <fixed-case>U</fixed-case>niversal <fixed-case>D</fixed-case>ependencies for <fixed-case>A</fixed-case>lbanian MarsidaToska JoakimNivre - DanielZeman + DanielZeman 178–188 In this paper, we introduce the first Universal Dependencies (UD) treebank for standard Albanian, consisting of 60 sentences collected from the Albanian Wikipedia, annotated with lemmas, universal part-of-speech tags, morphological features and syntactic dependencies. In addition to presenting the treebank itself, we discuss a selection of linguistic constructions in Albanian whose analysis in UD is not self-evident, including core arguments and the status of indirect objects, pronominal clitics, genitive constructions, prearticulated adjectives, and modal verbs. 2020.udw-1.20 @@ -219,7 +219,7 @@ Dependency annotation of noun incorporation in polysynthetic languages - FrancisTyers + FrancisTyers KarinaMishchenkova 195–204 This paper describes an approach to annotating noun incorporation in Universal Dependencies. It motivates the need to annotate this particular morphosyntactic phenomenon and justifies it with respect to frequency of the construction. A case study is presented in which the proposed annotation scheme is applied to Chukchi, a language that exhibits noun incorporation. We compare argument encoding in Chukchi, English and Russian and find that while in English and Russian discourse elements are primarily tracked through noun phrases and pronouns, in Chukchi they are tracked through agreement marking and incorporation, with a lesser role for noun phrases. @@ -232,8 +232,8 @@ <fixed-case>U</fixed-case>niversal <fixed-case>D</fixed-case>ependency Treebank for <fixed-case>X</fixed-case>ibe HeZhou JuyeonChung - SandraKübler - FrancisTyers + SandraKübler + FrancisTyers 205–215 We present our work of constructing the first treebank for the Xibe language following the Universal Dependencies (UD) annotation scheme. Xibe is a low-resourced and severely endangered Tungusic language spoken by the Xibe minority living in the Xinjiang Uygur Autonomous Region of China. We collected 810 sentences so far, including 544 sentences from a grammar book on written Xibe and 266 sentences from Cabcal News. We annotated those sentences manually from scratch. In this paper, we report the procedure of building this treebank and analyze several important annotation issues of our treebank. Finally, we propose our plans for future work. 2020.udw-1.23 diff --git a/data/xml/2020.vardial.xml b/data/xml/2020.vardial.xml index e0290ba1d8..0529765f63 100644 --- a/data/xml/2020.vardial.xml +++ b/data/xml/2020.vardial.xml @@ -4,9 +4,9 @@ Proceedings of the 7th Workshop on NLP for Similar Languages, Varieties and Dialects MarcosZampieri - PreslavNakov + PreslavNakov NikolaLjubešić - JörgTiedemann + JörgTiedemann YvesScherrer International Committee on Computational Linguistics (ICCL)
Barcelona, Spain (Online)
@@ -25,7 +25,7 @@ Radu TudorIonescu HeidiJauhiainen TommiJauhiainen - KristerLindén + KristerLindén NikolaLjubešić NikoPartanen ChristophPurschke @@ -40,7 +40,7 @@ <fixed-case>ASR</fixed-case> for Non-standardised Languages with Dialectal Variation: the case of <fixed-case>S</fixed-case>wiss <fixed-case>G</fixed-case>erman IuliiaNigmatulina TannonKew - TanjaSamardzic + TanjaSamardzic 15–24 Strong regional variation, together with the lack of standard orthography, makes Swiss German automatic speech recognition (ASR) particularly difficult in a multi-dialectal setting. This paper focuses on one of the many challenges, namely, the choice of the output text to represent non-standardised Swiss German. We investigate two potential options: a) dialectal writing – approximate phonemic transcriptions that provide close correspondence between grapheme labels and the acoustic signal but are highly inconsistent and b) normalised writing – transcriptions resembling standard German that are relatively consistent but distant from the acoustic signal. To find out which writing facilitates Swiss German ASR, we build several systems using the Kaldi toolkit and a dataset covering 14 regional varieties. A formal comparison shows that the system trained on the normalised transcriptions achieves better results in word error rate (WER) (29.39%) but underperforms at the character level, suggesting dialectal transcriptions offer a viable solution for downstream applications where dialectal differences are important. To better assess word-level performance for dialectal transcriptions, we use a flexible WER measure (FlexWER). When evaluated with this metric, the system trained on dialectal transcriptions outperforms that trained on the normalised writing. Besides establishing a benchmark for Swiss German multi-dialectal ASR, our findings can be helpful in designing ASR systems for other languages without standard orthography. 2020.vardial-1.2 @@ -60,7 +60,7 @@ Machine-oriented <fixed-case>NMT</fixed-case> Adaptation for Zero-shot <fixed-case>NLP</fixed-case> tasks: Comparing the Usefulness of Close and Distant Languages AmirhosseinTebbifakhr - MatteoNegri + MatteoNegri MarcoTurchi 36–46 Neural Machine Translation (NMT) models are typically trained by considering humans as end-users and maximizing human-oriented objectives. However, in some scenarios, their output is consumed by automatic NLP components rather than by humans. In these scenarios, translations’ quality is measured in terms of their “fitness for purpose” (i.e. maximizing performance of external NLP tools) rather than in terms of standard human fluency/adequacy criteria. Recently, reinforcement learning techniques exploiting the feedback from downstream NLP tools have been proposed for “machine-oriented” NMT adaptation. In this work, we tackle the problem in a multilingual setting where a single NMT model translates from multiple languages for downstream automatic processing in the target language. Knowledge sharing across close and distant languages allows to apply our machine-oriented approach in the zero-shot setting where no labeled data for the test language is seen at training time. Moreover, we incorporate multi-lingual BERT in the source side of our NMT system to benefit from the knowledge embedded in this model. Our experiments show coherent performance gains, for different language directions over both i) “generic” NMT models (trained for human consumption), and ii) fine-tuned multilingual BERT. This gain for zero-shot language directions (e.g. Spanish–English) is higher when the models are fine-tuned on a closely-related source language (Italian) than a distant one (German). @@ -70,7 +70,7 @@ Character Alignment in Morphologically Complex Translation Sets for Related Languages MichaelGasser - Binyam EphremSeyoum + Binyam EphremSeyoum Nazareth AmlesomKifle 47–56 For languages with complex morphology, word-to-word translation is a task with various potential applications, for example, in information retrieval, language instruction, and dictionary creation, as well as in machine translation. In this paper, we confine ourselves to the subtask of character alignment for the particular case of families of related languages with very few resources for most or all members. There are many such families; we focus on the subgroup of Semitic languages spoken in Ethiopia and Eritrea. We begin with an adaptation of the familiar alignment algorithms behind statistical machine translation, modifying them as appropriate for our task. We show how character alignment can reveal morphological, phonological, and orthographic correspondences among related languages. @@ -81,10 +81,10 @@ Bilingual Lexicon Induction across Orthographically-distinct Under-Resourced <fixed-case>D</fixed-case>ravidian Languages Bharathi RajaChakravarthi NavaneethanRajasekaran - MihaelArcan + MihaelArcan KevinMcGuinness NoelE. O’Connor - John P.McCrae + John P.McCrae 57–69 Bilingual lexicons are a vital tool for under-resourced languages and recent state-of-the-art approaches to this leverage pretrained monolingual word embeddings using supervised or semi-supervised approaches. However, these approaches require cross-lingual information such as seed dictionaries to train the model and find a linear transformation between the word embedding spaces. Especially in the case of low-resourced languages, seed dictionaries are not readily available, and as such, these methods produce extremely weak results on these languages. In this work, we focus on the Dravidian languages, namely Tamil, Telugu, Kannada, and Malayalam, which are even more challenging as they are written in unique scripts. To take advantage of orthographic information and cognates in these languages, we bring the related languages into a single script. Previous approaches have used linguistically sub-optimal measures such as the Levenshtein edit distance to detect cognates, whereby we demonstrate that the longest common sub-sequence is linguistically more sound and improves the performance of bilingual lexicon induction. We show that our approach can increase the accuracy of bilingual lexicon induction methods on these languages many times, making bilingual lexicon induction approaches feasible for such under-resourced languages. 2020.vardial-1.6 @@ -122,7 +122,7 @@ Neural Machine Translation for translating into <fixed-case>C</fixed-case>roatian and <fixed-case>S</fixed-case>erbian - MajaPopović + MajaPopović AlbertoPoncelas MarijaBrkic AndyWay @@ -144,7 +144,7 @@ Badr M.Abdullah JacekKudera TaniaAvgustinova - BerndMöbius + BerndMöbius DietrichKlakow 128–139 Deep neural networks have been employed for various spoken language recognition tasks, including tasks that are multilingual by definition such as spoken language identification (LID). In this paper, we present a neural model for Slavic language identification in speech signals and analyze its emergent representations to investigate whether they reflect objective measures of language relatedness or non-linguists’ perception of language similarity. While our analysis shows that the language representation space indeed captures language relatedness to a great extent, we find perceptual confusability to be the best predictor of the language representation similarity. @@ -178,7 +178,7 @@ Towards Augmenting Lexical Resources for Slang and <fixed-case>A</fixed-case>frican <fixed-case>A</fixed-case>merican <fixed-case>E</fixed-case>nglish AlyssaHwang William R.Frey - KathleenMcKeown + KathleenMcKeown 160–172 Researchers in natural language processing have developed large, robust resources for understanding formal Standard American English (SAE), but we lack similar resources for variations of English, such as slang and African American English (AAE). In this work, we use word embeddings and clustering algorithms to group semantically similar words in three datasets, two of which contain high incidence of slang and AAE. Since high-quality clusters would contain related words, we could also infer the meaning of an unfamiliar word based on the meanings of words clustered with it. After clustering, we compute precision and recall scores using WordNet and ConceptNet as gold standards and show that these scores are unimportant when the given resources do not fully represent slang and AAE. Amazon Mechanical Turk and expert evaluations show that clusters with low precision can still be considered high quality, and we propose the new Cluster Split Score as a metric for machine-generated clusters. These contributions emphasize the gap in natural language processing research for variations of English and motivate further work to close it. 2020.vardial-1.15 @@ -189,7 +189,7 @@ TommiJauhiainen HeidiJauhiainen NikoPartanen - KristerLindén + KristerLindén 173–185 This article introduces the Wanca 2017 web corpora from which the sentences written in minor Uralic languages were collected for the test set of the Uralic Language Identification (ULI) 2020 shared task. We describe the ULI shared task and how the test set was constructed using the Wanca 2017 corpora and texts in different languages from the Leipzig corpora collection. We also provide the results of a baseline language identification experiment conducted using the ULI 2020 dataset. 2020.vardial-1.16 @@ -234,7 +234,7 @@ Experiments in Language Variety Geolocation and Dialect Identification TommiJauhiainen HeidiJauhiainen - KristerLindén + KristerLindén 220–231 In this paper we describe the systems we used when participating in the VarDial Evaluation Campaign organized as part of the 7th workshop on NLP for similar languages, varieties and dialects. The shared tasks we participated in were the second edition of the Romanian Dialect Identification (RDI) and the first edition of the Social Media Variety Geolocation (SMG). The submissions of our SUKI team used generative language models based on Naive Bayes and character n-grams. 2020.vardial-1.21 @@ -263,7 +263,7 @@ <fixed-case>ZHAW</fixed-case>-<fixed-case>I</fixed-case>n<fixed-case>IT</fixed-case> - Social Media Geolocation at <fixed-case>V</fixed-case>ar<fixed-case>D</fixed-case>ial 2020 FernandoBenites - ManuelaHürlimann + ManuelaHürlimann Piusvon Däniken MarkCieliebak 254–264 @@ -283,7 +283,7 @@ Challenges in Neural Language Identification: <fixed-case>NRC</fixed-case> at <fixed-case>V</fixed-case>ar<fixed-case>D</fixed-case>ial 2020 GabrielBernier-Colborne - CyrilGoutte + CyrilGoutte 273–282 We describe the systems developed by the National Research Council Canada for the Uralic language identification shared task at the 2020 VarDial evaluation campaign. Although our official results were well below the baseline, we show in this paper that this was not due to the neural approach to language identification in general, but to a flaw in the function we used to sample data for training and evaluation purposes. Preliminary experiments conducted after the evaluation period suggest that our neural approach to language identification can achieve state-of-the-art results on this task, although further experimentation is required. 2020.vardial-1.26 diff --git a/data/xml/2020.vlsp.xml b/data/xml/2020.vlsp.xml index a85bdc2ab4..4219fd7f8f 100644 --- a/data/xml/2020.vlsp.xml +++ b/data/xml/2020.vlsp.xml @@ -42,7 +42,7 @@ An Empirical Study of Using Pre-trained <fixed-case>BERT</fixed-case> Models for <fixed-case>V</fixed-case>ietnamese Relation Extraction Task at <fixed-case>VLSP</fixed-case> 2020 - Minh Quang NhatPham + Minh Quang NhatPham 13–18 2020.vlsp-1.3 pham-2020-empirical @@ -175,9 +175,9 @@ Overview of <fixed-case>VLSP</fixed-case> <fixed-case>R</fixed-case>el<fixed-case>E</fixed-case>x shared task: A Data Challenge for Semantic Relation Extraction from <fixed-case>V</fixed-case>ietnamese News Vu TranMai - Hoang-QuynhLe + Hoang-QuynhLe Duy-CatCan - Thi Minh HuyenNguyen + Thi Minh HuyenNguyen Tran Ngoc LinhNguyen Thanh TamDoan 92–98 diff --git a/data/xml/2020.wac.xml b/data/xml/2020.wac.xml index 354ab6c99e..7faf04e724 100644 --- a/data/xml/2020.wac.xml +++ b/data/xml/2020.wac.xml @@ -6,7 +6,7 @@ AdrienBarbaresi FelixBildhauer RolandSchäfer - EgonStemle + EgonStemle European Language Resources Association
Marseille, France
May @@ -21,9 +21,9 @@ Current Challenges in Web Corpus Building MilošJakubíček - VojtěchKovář - PavelRychlý - VitSuchomel + VojtěchKovář + PavelRychlý + VitSuchomel 1–4 In this paper we discuss some of the current challenges in web corpus building that we faced in the recent years when expanding the corpora in Sketch Engine. The purpose of the paper is to provide an overview and raise discussion on possible solutions, rather than bringing ready solutions to the readers. For every issue we try to assess its severity and briefly discuss possible mitigation options. 2020.wac-1.1 @@ -60,7 +60,7 @@ Building Web Corpora for Minority Languages HeidiJauhiainen TommiJauhiainen - KristerLindén + KristerLindén 23–32 Web corpora creation for minority languages that do not have their own top-level Internet domain is no trivial matter. Web pages in such minority languages often contain text and links to pages in the dominant language of the country. When building corpora in specific languages, one has to decide how and at which stage to make sure the texts gathered are in the desired language. In the “Finno-Ugric Languages and the Internet” (Suki) project, we created web corpora for Uralic minority languages using web crawling combined with a language identification system in order to identify the language while crawling. In addition, we used language set identification and crowdsourcing before making sentence corpora out of the downloaded texts. In this article, we describe a strategy for collecting textual material from the Internet for minority languages. The strategy is based on the experiences we gained during the Suki project. 2020.wac-1.4 @@ -104,7 +104,7 @@ Streaming Language-Specific <fixed-case>T</fixed-case>witter Data with Optimal Keywords TimKreutz - WalterDaelemans + WalterDaelemans 57–64 The Twitter Streaming API has been used to create language-specific corpora with varying degrees of success. Selecting a filter of frequent yet distinct keywords for German resulted in a near-complete collection of German tweets. This method is promising as it keeps within Twitter endpoint limitations and could be applied to other languages besides German. But so far no research has compared methods for selecting optimal keywords for this task. This paper proposes a method for finding optimal key phrases based on a greedy solution to the maximum coverage problem. We generate candidate key phrases for the 50 most frequent languages on Twitter. Candidates are then iteratively selected based on a variety of scoring functions applied to their coverage of target tweets. Selecting candidates based on the scoring function that exponentiates the precision of a key phrase and weighs it by recall achieved the best results overall. Some target languages yield lower results than what could be expected from their prevalence on Twitter. Upon analyzing the errors, we find that these are languages that are very close to more prevalent languages. In these cases, key phrases that limit finding the competitive language are selected, and overall recall on the target language also decreases. We publish the resulting optimized lists for each language as a resource. The code to generate lists for other research objectives is also supplied. 2020.wac-1.8 diff --git a/data/xml/2020.wanlp.xml b/data/xml/2020.wanlp.xml index 5bbce999d1..82b411eeaf 100644 --- a/data/xml/2020.wanlp.xml +++ b/data/xml/2020.wanlp.xml @@ -26,8 +26,8 @@ MohammedMediani MoritzBehr M. AminCheragui - SebastianStüker - AlexanderWaibel + SebastianStüker + AlexanderWaibel 1–11 In this paper we present the natural language processing components of our German-Arabic speech-to-speech translation system which is being deployed in the context of interpretation during psychiatric, diagnostic interviews. For this purpose we have built a pipe-lined speech-to-speech translation system consisting of automatic speech recognition, text post-processing/segmentation, machine translation and speech synthesis systems. We have implemented two pipe-lines, from German to Arabic and Arabic to German, in order to be able to conduct interpreted two-way dialogues between psychiatrists and potential patients. All systems in our pipeline have been realized as all-neural end-to-end systems, using different architectures suitable for the different components. The speech recognition systems use an encoder/decoder + attention architecture, the text segmentation component and the machine translation system are based on the Transformer architecture, and for the speech synthesis systems we use Tacotron 2 for generating spectrograms and WaveGlow as vocoder. The speech translation is deployed in a server-based speech translation application that implements a turn based translation between a German speaking psychiatrist administrating the Mini-International Neuropsychiatric Interview (M.I.N.I.) and an Arabic speaking person answering the interview. As this is a very specific domain, in addition to the linguistic challenges posed by translating between Arabic and German, we also focus in this paper on the methods we implemented for adapting our speech translation system to the domain of this psychiatric interview. 2020.wanlp-1.1 @@ -45,7 +45,7 @@ Is it Great or Terrible? Preserving Sentiment in Neural Machine Translation of <fixed-case>A</fixed-case>rabic Reviews HadeelSaadany - ConstantinOrasan + ConstantinOrasan 24–37 Since the advent of Neural Machine Translation (NMT) approaches there has been a tremendous improvement in the quality of automatic translation. However, NMT output still lacks accuracy in some low-resource languages and sometimes makes major errors that need extensive postediting. This is particularly noticeable with texts that do not follow common lexico-grammatical standards, such as user generated content (UGC). In this paper we investigate the challenges involved in translating book reviews from Arabic into English, with particular focus on the errors that lead to incorrect translation of sentiment polarity. Our study points to the special characteristics of Arabic UGC, examines the sentiment transfer errors made by Google Translate of Arabic UGC to English, analyzes why the problem occurs, and proposes an error typology specific of the translation of Arabic UGC. Our analysis shows that the output of online translation tools of Arabic UGC can either fail to transfer the sentiment at all by producing a neutral target text, or completely flips the sentiment polarity of the target word or phrase and hence delivers a wrong affect message. We address this problem by fine-tuning an NMT model with respect to sentiment polarity showing that this approach can significantly help with correcting sentiment errors detected in the online translation of Arabic UGC. 2020.wanlp-1.3 @@ -54,7 +54,7 @@ Deep Diacritization: Efficient Hierarchical Recurrence for Improved <fixed-case>A</fixed-case>rabic Diacritization BadrAlKhamissi - MuhammadElNokrashy + MuhammadElNokrashy MohamedGabr 38–48 We propose a novel architecture for labelling character sequences that achieves state-of-the-art results on the Tashkeela Arabic diacritization benchmark. The core is a two-level recurrence hierarchy that operates on the word and character levels separately—enabling faster training and inference than comparable traditional models. A cross-level attention module further connects the two and opens the door for network interpretability. The task module is a softmax classifier that enumerates valid combinations of diacritics. This architecture can be extended with a recurrent decoder that optionally accepts priors from partially diacritized text, which improves results. We employ extra tricks such as sentence dropout and majority voting to further boost the final result. Our best model achieves a WER of 5.34%, outperforming the previous state-of-the-art with a 30.56% relative error reduction. @@ -141,7 +141,7 @@ Tracing Traditions: Automatic Extraction of Isnads from Classical <fixed-case>A</fixed-case>rabic Texts RyanMuther - DavidSmith + DavidSmith 130–138 We present our work on automatically detecting isnads, the chains of authorities for a re-port that serve as citations in hadith and other classical Arabic texts. We experiment with both sequence labeling methods for identifying isnads in a single pass and a hybrid “retrieve-and-tag” approach, in which a retrieval model first identifies portions of the text that are likely to contain start points for isnads, then a sequence labeling model identifies the exact starting locations within these much smaller retrieved text chunks. We find that the usefulness of full-document sequence to sequence models is limited due to memory limitations and the ineffectiveness of such models at modeling very long documents. We conclude by sketching future improvements on the tagging task and more in-depth analysis of the people and relationships involved in the social network that influenced the evolution of the written tradition over time. 2020.wanlp-1.12 @@ -181,7 +181,7 @@ Multi-Task Sequence Prediction For <fixed-case>T</fixed-case>unisian <fixed-case>A</fixed-case>rabizi Multi-Level Annotation ElisaGugliotta MarcoDinarelli - OlivierKraif + OlivierKraif 178–191 In this paper we propose a multi-task sequence prediction system, based on recurrent neural networks and used to annotate on multiple levels an Arabizi Tunisian corpus. The annotation performed are text classification, tokenization, PoS tagging and encoding of Tunisian Arabizi into CODA* Arabic orthography. The system is learned to predict all the annotation levels in cascade, starting from Arabizi input. We evaluate the system on the TIGER German corpus, suitably converting data to have a multi-task problem, in order to show the effectiveness of our neural architecture. We show also how we used the system in order to annotate a Tunisian Arabizi corpus, which has been afterwards manually corrected and used to further evaluate sequence models on Tunisian data. Our system is developed for the Fairseq framework, which allows for a fast and easy use for any other sequence prediction problem. 2020.wanlp-1.16 @@ -191,7 +191,7 @@ <fixed-case>A</fixed-case>ra<fixed-case>WEAT</fixed-case>: Multidimensional Analysis of Biases in <fixed-case>A</fixed-case>rabic Word Embeddings AnneLauscher RafikTakieddin - Simone PaoloPonzetto + Simone PaoloPonzetto GoranGlavaš 192–199 Recent work has shown that distributional word vector spaces often encode human biases like sexism or racism. In this work, we conduct an extensive analysis of biases in Arabic word embeddings by applying a range of recently introduced bias tests on a variety of embedding spaces induced from corpora in Arabic. We measure the presence of biases across several dimensions, namely: embedding models (Skip-Gram, CBOW, and FastText) and vector sizes, types of text (encyclopedic text, and news vs. user-generated content), dialects (Egyptian Arabic vs. Modern Standard Arabic), and time (diachronic analyses over corpora from different time periods). Our analysis yields several interesting findings, e.g., that implicit gender bias in embeddings trained on Arabic news corpora steadily increases over time (between 2007 and 2017). We make the Arabic bias specifications (AraWEAT) publicly available. @@ -202,7 +202,7 @@ Parallel resources for <fixed-case>T</fixed-case>unisian <fixed-case>A</fixed-case>rabic Dialect Translation SaméhKchaou RahmaBoujelbane - LamiaHadrich-Belguith + LamiaHadrich-Belguith 200–206 The difficulty of processing dialects is clearly observed in the high cost of building representative corpus, in particular for machine translation. Indeed, all machine translation systems require a huge amount and good management of training data, which represents a challenge in a low-resource setting such as the Tunisian Arabic dialect. In this paper, we present a data augmentation technique to create a parallel corpus for Tunisian Arabic dialect written in social media and standard Arabic in order to build a Machine Translation (MT) model. The created corpus was used to build a sentence-based translation model. This model reached a BLEU score of 15.03% on a test set, while it was limited to 13.27% utilizing the corpus without augmentation. 2020.wanlp-1.18 @@ -332,7 +332,7 @@ The <fixed-case>QMUL</fixed-case>/<fixed-case>HRBDT</fixed-case> contribution to the <fixed-case>NADI</fixed-case> <fixed-case>A</fixed-case>rabic Dialect Identification Shared Task AbdulrahmanAloraini - MassimoPoesio + MassimoPoesio AymanAlhelbawy 295–301 We present the Arabic dialect identification system that we used for the country-level subtask of the NADI challenge. Our model consists of three components: BiLSTM-CNN, character-level TF-IDF, and topic modeling features. We represent each tweet using these features and feed them into a deep neural network. We then add an effective heuristic that improves the overall performance. We achieved an F1-Macro score of 20.77% and an accuracy of 34.32% on the test set. The model was also evaluated on the Arabic Online Commentary dataset, achieving results better than the state-of-the-art. diff --git a/data/xml/2020.wat.xml b/data/xml/2020.wat.xml index e724cd717f..82154b57e2 100644 --- a/data/xml/2020.wat.xml +++ b/data/xml/2020.wat.xml @@ -9,14 +9,14 @@ RajDabre AnoopKunchukuttan Win PaPa - OndřejBojar + OndřejBojar ShantipriyaParida IsaoGoto HidayaMino HiroshiManabe KatsuhitoSudoh SadaoKurohashi - PushpakBhattacharyya + PushpakBhattacharyya Association for Computational Linguistics
Suzhou, China
December @@ -62,7 +62,7 @@ Transformer-based Double-token Bidirectional Autoregressive Decoding in Neural Machine Translation KenjiImamura - EiichiroSumita + EiichiroSumita 50–57 This paper presents a simple method that extends a standard Transformer-based autoregressive decoder, to speed up decoding. The proposed method generates a token from the head and tail of a sentence (two tokens in total) in each step. By simultaneously generating multiple tokens that rarely depend on each other, the decoding speed is increased while the degradation in translation quality is minimized. In our experiments, the proposed method increased the translation speed by around 113%-155% in comparison with a standard autoregressive decoder, while degrading the BLEU scores by no more than 1.03. It was faster than an iterative non-autoregressive decoder in many conditions. 2020.wat-1.3 @@ -173,7 +173,7 @@ Sahinur RahmanLaskar Abdullah Faiz Ur RahmanKhilji ParthaPakray - SivajiBandyopadhyay + SivajiBandyopadhyay 109–113 Machine translation (MT) focuses on the automatic translation of text from one natural language to another natural language. Neural machine translation (NMT) achieves state-of-the-art results in the task of machine translation because of utilizing advanced deep learning techniques and handles issues like long-term dependency, and context-analysis. Nevertheless, NMT still suffers low translation quality for low resource languages. To encounter this challenge, the multi-modal concept comes in. The multi-modal concept combines textual and visual features to improve the translation quality of low resource languages. Moreover, the utilization of monolingual data in the pre-training step can improve the performance of the system for low resource language translations. Workshop on Asian Translation 2020 (WAT2020) organized a translation task for multimodal translation in English to Hindi. We have participated in the same in two-track submission, namely text-only and multi-modal translation with team name CNLP-NITS. The evaluated results are declared at the WAT2020 translation task, which reports that our multi-modal NMT system attained higher scores than our text-only NMT on both challenge and evaluation test set. For the challenge test data, our multi-modal neural machine translation system achieves Bilingual Evaluation Understudy (BLEU) score of 33.57, Rank-based Intuitive Bilingual Evaluation Score (RIBES) 0.754141, Adequacy-Fluency Metrics (AMFM) score 0.787320 and for evaluation test data, BLEU, RIBES, and, AMFM score of 40.51, 0.803208, and 0.820980 for English to Hindi translation respectively. 2020.wat-1.11 @@ -270,7 +270,7 @@ A Parallel Evaluation Data Set of Software Documentation with Document Structure Annotation - BiankaBuschbeck + BiankaBuschbeck MiriamExel 160–169 This paper accompanies the software documentation data set for machine translation, a parallel evaluation data set of data originating from the SAP Help Portal, that we released to the machine translation community for research purposes. It offers the possibility to tune and evaluate machine translation systems in the domain of corporate software documentation and contributes to the availability of a wider range of evaluation scenarios. The data set comprises of the language pairs English to Hindi, Indonesian, Malay and Thai, and thus also increases the test coverage for the many low-resource language pairs. Unlike most evaluation data sets that consist of plain parallel text, the segments in this data set come with additional metadata that describes structural information of the document context. We provide insights into the origin and creation, the particularities and characteristics of the data set as well as machine translation results. diff --git a/data/xml/2020.webnlg.xml b/data/xml/2020.webnlg.xml index 6c4868d1f8..4d63e4159b 100644 --- a/data/xml/2020.webnlg.xml +++ b/data/xml/2020.webnlg.xml @@ -3,7 +3,7 @@ Proceedings of the 3rd International Workshop on Natural Language Generation from the Semantic Web (WebNLG+) - ThiagoCastro Ferreira + ThiagoCastro Ferreira ClaireGardent NikolaiIlinykh Chrisvan der Lee @@ -83,7 +83,7 @@ DavidBergés RoserCantenys RogerCreus - José A. R.Fonollosa + José A. R.Fonollosa 40–47 establishes key guidelines on how, which and when Machine Translation (MT) techniques are worth applying to RDF-to-Text task. Not only do we apply and compare the most prominent MT architecture, the Transformer, but we also analyze state-of-the-art techniques such as Byte Pair Encoding or Back Translation to demonstrate an improvement in generalization. In addition, we empirically show how to tailor these techniques to enhance models relying on learned embeddings rather than using pretrained ones. Automatic metrics suggest that Back Translation can significantly improve model performance up to 7 BLEU points, hence, opening a window for surpassing state-of-the-art results with appropriate architectures. 2020.webnlg-1.5 @@ -92,7 +92,7 @@ Utilising Knowledge Graph Embeddings for Data-to-Text Generation NivranshuPasricha - MihaelArcan + MihaelArcan PaulBuitelaar 48–53 Data-to-text generation has recently seen a move away from modular and pipeline architectures towards end-to-end architectures based on neural networks. In this work, we employ knowledge graph embeddings and explore their utility for end-to-end approaches in a data-to-text generation task. Our experiments show that using knowledge graph embeddings can yield an improvement of up to 2 – 3 BLEU points for seen categories on the WebNLG corpus without modifying the underlying neural network architecture. @@ -118,7 +118,7 @@ QipengGuo ZhijingJin XipengQiu - WeinanZhang + WeinanZhang DavidWipf ZhengZhang 77–88 @@ -132,7 +132,7 @@ BettyFabre TanguyUrvoy JohannesHeinecke - LinaRojas-Barahona + LinaRojas-Barahona 89–99 The task of verbalization of RDF triples has known a growth in popularity due to the rising ubiquity of Knowledge Bases (KBs). The formalism of RDF triples is a simple and efficient way to store facts at a large scale. However, its abstract representation makes it difficult for humans to interpret. For this purpose, the WebNLG challenge aims at promoting automated RDF-to-text generation. We propose to leverage pre-trainings from augmented data with the Transformer model using a data augmentation strategy. Our experiment results show a minimum relative increases of 3.73%, 126.05% and 88.16% in BLEU score for seen categories, unseen entities and unseen categories respectively over the standard training. 2020.webnlg-1.9 @@ -171,7 +171,7 @@ XintongLi AleksandreMaskharashvili SymonJory Stevens-Guille - MichaelWhite + MichaelWhite 117–124 In this paper, we report experiments on finetuning large pretrained models to realize resource description framework (RDF) triples to natural language. We provide the details of how to build one of the top-ranked English generation models in WebNLG Challenge 2020. We also show that there appears to be considerable potential for reranking to improve the current state of the art both in terms of statistical metrics and model-based metrics. Our human analyses of the generated texts show that for Russian, pretrained models showed some success, both in terms of lexical and morpho-syntactic choices for generation, as well as for content aggregation. Nevertheless, in a number of cases, the model can be unpredictable, both in terms of failure or success. Omissions of the content and hallucinations, which in many cases occurred at the same time, were major problems. By contrast, the models for English showed near perfect performance on the validation set. 2020.webnlg-1.12 @@ -191,7 +191,7 @@ <fixed-case>NILC</fixed-case> at <fixed-case>W</fixed-case>eb<fixed-case>NLG</fixed-case>+: Pretrained Sequence-to-Sequence Models on <fixed-case>RDF</fixed-case>-to-Text Generation - Marco AntonioSobrevilla Cabezudo + Marco AntonioSobrevilla Cabezudo Thiago A. S.Pardo 131–136 This paper describes the submission by the NILC Computational Linguistics research group of the University of São Paulo/Brazil to the RDF-to-Text task for English at the WebNLG+ challenge. The success of the current pretrained models like BERT or GPT-2 in text-to-text generation tasks is well-known, however, its application/success on data-totext generation has not been well-studied and proven. This way, we explore how good a pretrained model, in particular BART, performs on the data-to-text generation task. The results obtained were worse than the baseline and other systems in almost all automatic measures. However, the human evaluation shows better results for our system. Besides, results suggest that BART may generate paraphrases of reference texts. @@ -201,7 +201,7 @@ <fixed-case>NUIG</fixed-case>-<fixed-case>DSI</fixed-case> at the <fixed-case>W</fixed-case>eb<fixed-case>NLG</fixed-case>+ challenge: Leveraging Transfer Learning for <fixed-case>RDF</fixed-case>-to-text generation NivranshuPasricha - MihaelArcan + MihaelArcan PaulBuitelaar 137–143 This paper describes the system submitted by NUIG-DSI to the WebNLG+ challenge 2020 in the RDF-to-text generation task for the English language. For this challenge, we leverage transfer learning by adopting the T5 model architecture for our submission and fine-tune the model on the WebNLG+ corpus. Our submission ranks among the top five systems for most of the automatic evaluation metrics achieving a BLEU score of 51.74 over all categories with scores of 58.23 and 45.57 across seen and unseen categories respectively. @@ -239,7 +239,7 @@ RoserCantenys RogerCreus OriolDomingo - José A. R.Fonollosa + José A. R.Fonollosa 167–170 This work describes the end-to-end system architecture presented at WebNLG Challenge 2020. The system follows the traditional Machine Translation (MT) pipeline, based on the Transformer model, applied in most text-totext problems. Our solution is enriched by means of a Back Translation step over the original corpus. Thus, the system directly relies on lexicalise format since the synthetic data limits the use of delexicalisation. 2020.webnlg-1.19 diff --git a/data/xml/2020.wildre.xml b/data/xml/2020.wildre.xml index 5e02027ab0..5b29911d38 100644 --- a/data/xml/2020.wildre.xml +++ b/data/xml/2020.wildre.xml @@ -3,11 +3,11 @@ Proceedings of the WILDRE5– 5th Workshop on Indian Language Data: Resources and Evaluation - Girish NathJha + Girish NathJha KalikaBali SobhaL. S. S.Agrawal - Atul Kr.Ojha + Atul Kr.Ojha European Language Resources Association (ELRA)
Marseille, France
May @@ -25,8 +25,8 @@ NileshJoshi GeetanjaliRane HanumantRedkar - MalharKulkarni - PushpakBhattacharyya + MalharKulkarni + PushpakBhattacharyya 1–6 Part of Speech (POS) annotation is a significant challenge in natural language processing. The paper discusses issues and challenges faced in the process of POS annotation of the Marathi data from four domains viz., tourism, health, entertainment and agriculture. During POS annotation, a lot of issues were encountered. Some of the major ones are discussed in detail in this paper. Also, the two approaches viz., the lexical (L approach) and the functional (F approach) of POS tagging have been discussed and presented with examples. Further, some ambiguous cases in POS annotation are presented in the paper. 2020.wildre-1.1 @@ -38,8 +38,8 @@ ShardulSuryawanshi Bharathi RajaChakravarthi PranavVerma - MihaelArcan - John PhilipMcCrae + MihaelArcan + John PhilipMcCrae PaulBuitelaar 7–13 Social media are interactive platforms that facilitate the creation or sharing of information, ideas or other forms of expression among people. This exchange is not free from offensive, trolling or malicious contents targeting users or communities. One way of trolling is by making memes, which in most cases combines an image with a concept or catchphrase. The challenge of dealing with memes is that they are region-specific and their meaning is often obscured in humour or sarcasm. To facilitate the computational modelling of trolling in the memes for Indian languages, we created a meme dataset for Tamil (TamilMemes). We annotated and released the dataset containing suspected trolls and not-troll memes. In this paper, we use the a image classification to address the difficulties involved in the classification of troll memes with the existing methods. We found that the identification of a troll meme with such an image classifier is not feasible which has been corroborated with precision, recall and F1-score. @@ -51,7 +51,7 @@ <fixed-case>O</fixed-case>di<fixed-case>E</fixed-case>n<fixed-case>C</fixed-case>orp 2.0: <fixed-case>O</fixed-case>dia-<fixed-case>E</fixed-case>nglish Parallel Corpus for Machine Translation ShantipriyaParida Satya RanjanDash - OndřejBojar + OndřejBojar PetrMotlicek PriyankaPattnaik Debasish KumarMallick @@ -63,8 +63,8 @@
Handling Noun-Noun Coreference in <fixed-case>T</fixed-case>amil - VijaySundar Ram - SobhaLalitha Devi + VijaySundar Ram + SobhaLalitha Devi 20–24 Natural language understanding by automatic tools is the vital requirement for document processing tools. To achieve it, automatic system has to understand the coherence in the text. Co-reference chains bring coherence to the text. The commonly occurring reference markers which bring cohesiveness are Pronominal, Reflexives, Reciprocals, Distributives, One-anaphors, Noun–noun reference. Here in this paper, we deal with noun-noun reference in Tamil. We present the methodology to resolve these noun-noun anaphors and also present the challenges in handling the noun-noun anaphoric relations in Tamil. 2020.wildre-1.4 @@ -95,7 +95,7 @@ <fixed-case>U</fixed-case>niversal <fixed-case>D</fixed-case>ependency Treebanks for Low-Resource <fixed-case>I</fixed-case>ndian Languages: The Case of <fixed-case>B</fixed-case>hojpuri Atul Kr.Ojha - DanielZeman + DanielZeman 33–38 This paper presents the first dependency treebank for Bhojpuri, a resource-poor language that belongs to the Indo-Aryan language family. The objective behind the Bhojpuri Treebank (BHTB) project is to create a substantial, syntactically annotated treebank which not only acts as a valuable resource in building language technological tools, also helps in cross-lingual learning and typological research. Currently, the treebank consists of 4,881 annotated tokens in accordance with the annotation scheme of Universal Dependencies (UD). A Bhojpuri tagger and parser were created using machine learning approach. The accuracy of the model is 57.49% UAS, 45.50% LAS, 79.69% UPOS accuracy and 77.64% XPOS accuracy. The paper describes the details of the project including a discussion on linguistic analysis and annotation process of the Bhojpuri UD treebank. 2020.wildre-1.7 @@ -105,8 +105,8 @@ A Fully Expanded Dependency Treebank for <fixed-case>T</fixed-case>elugu SnehaNallani - ManishShrivastava - DiptiSharma + ManishShrivastava + DiptiSharma 39–44 Treebanks are an essential resource for syntactic parsing. The available Paninian dependency treebank(s) for Telugu is annotated only with inter-chunk dependency relations and not all words of a sentence are part of the parse tree. In this paper, we automatically annotate the intra-chunk dependencies in the treebank using a Shift-Reduce parser based on Context Free Grammar rules for Telugu chunks. We also propose a few additional intra-chunk dependency relations for Telugu apart from the ones used in Hindi treebank. Annotating intra-chunk dependencies finally provides a complete parse tree for every sentence in the treebank. Having a fully expanded treebank is crucial for developing end to end parsers which produce complete trees. We present a fully expanded dependency treebank for Telugu consisting of 3220 sentences. In this paper, we also convert the treebank annotated with Anncorra part-of-speech tagset to the latest BIS tagset. The BIS tagset is a hierarchical tagset adopted as a unified part-of-speech standard across all Indian Languages. The final treebank is made publicly available. 2020.wildre-1.8 @@ -125,7 +125,7 @@ <fixed-case>P</fixed-case>olish Lexicon-Grammar Development Methodology as an Example for Application to other Languages ZygmuntVetulani - GrażynaVetulani + GrażynaVetulani 51–59 In the paper we present our methodology with the intention to propose it as a reference for creating lexicon-grammars. We share our long-term experience gained during research projects (past and on-going) concerning the description of Polish using this approach. The above-mentioned methodology, linking semantics and syntax, has revealed useful for various IT applications. Among other, we address this paper to researchers working on “less” or “middle-resourced” Indo-European languages as a proposal of a long term academic cooperation in the field. We believe that the confrontation of our lexicon-grammar methodology with other languages – Indo-European, but also Non-Indo-European languages of India, Ugro-Finish or Turkic languages in Eurasia – will allow for better understanding of the level of versatility of our approach and, last but not least, will create opportunities to intensify comparative studies. The reason of presenting some our works on language resources within the Wildre workshop is the intention not only to take up the challenge thrown down in the CFP of this workshop which is: “To provide opportunity for researchers from India to collaborate with researchers from other parts of the world”, but also to generalize this challenge to other languages. 2020.wildre-1.10 @@ -145,7 +145,7 @@ A Deeper Study on Features for Named Entity Recognition MalarkodiC S - SobhaLalitha Devi + SobhaLalitha Devi 66–72 This paper deals with the various features used for the identification of named entities. The performance of the machine learning system heavily depends on the feature selection criteria. The intention to trace the essential features required for the development of named entity system across languages motivated us to conduct this study. The linguistic analysis was done to find out the part of speech patterns surrounding the context of named entities and from the observation linguistic oriented features are identified for both Indian and European languages. The Indian languages belongs to Dravidian language family such as Tamil, Telugu, Malayalam, Indo-Aryan language family such as Hindi, Punjabi, Bengali and Marathi, European languages such as English, Spanish, Dutch, German and Hungarian are used in this work. The machine learning technique CRFs was used for the system development. The experiments were conducted using the linguistic features and the results obtained for each languages are comparable with state-of-art systems. 2020.wildre-1.12 diff --git a/data/xml/2020.winlp.xml b/data/xml/2020.winlp.xml index faf378fbc2..06e0edb622 100644 --- a/data/xml/2020.winlp.xml +++ b/data/xml/2020.winlp.xml @@ -66,8 +66,8 @@ Large Vocabulary Read Speech Corpora for Four <fixed-case>E</fixed-case>thiopian Languages: <fixed-case>A</fixed-case>mharic, <fixed-case>T</fixed-case>igrigna, <fixed-case>O</fixed-case>romo, and <fixed-case>W</fixed-case>olaytta - Solomon TeferraAbate - Martha YifiruTachbelie + Solomon TeferraAbate + Martha YifiruTachbelie MichaelMelese HafteAbera TewodrosGebreselassie @@ -75,7 +75,7 @@ YaregalAssabie Million MesheshaBeyene SolomonAtinafu - Binyam EphremSeyoum + Binyam EphremSeyoum 13–17 Automatic Speech Recognition (ASR) is one of the most important technologies to help people live a better life in the 21st century. However, its development requires a big speech corpus for a language. The development of such a corpus is expensive especially for under-resourced Ethiopian languages. To address this problem we have developed four medium-sized (longer than 22 hours each) speech corpora for four Ethiopian languages: Amharic, Tigrigna, Oromo, and Wolaytta. In a way of checking the usability of the corpora and deliver a baseline ASR for each language. In this paper, we present the corpora and the baseline ASR systems for each language. The word error rates (WERs) we achieved show that the corpora are usable for further investigation and we recommend the collection of text corpora to train strong language models for Oromo and Wolaytta compared to others. 10.18653/v1/2020.winlp-1.5 @@ -85,8 +85,8 @@ <fixed-case>SIMPLEX</fixed-case>-<fixed-case>PB</fixed-case> 2.0: A Reliable Dataset for Lexical Simplification in <fixed-case>B</fixed-case>razilian <fixed-case>P</fixed-case>ortuguese NathanHartmann - Gustavo HenriquePaetzold - SandraAluísio + Gustavo HenriquePaetzold + SandraAluísio 18–22 Most research on Lexical Simplification (LS) addresses non-native speakers of English, since they are numerous and easy to recruit. This makes it difficult to create LS solutions for other languages and target audiences. This paper presents SIMPLEX-PB 2.0, a dataset for LS in Brazilian Portuguese that, unlike its predecessor SIMPLEX-PB, accurately captures the needs of Brazilian underprivileged children. To create SIMPLEX-PB 2.0, we addressed all limitations of the old SIMPLEX-PB through multiple rounds of manual annotation. As a result, SIMPLEX-PB 2.0 features much more reliable and numerous candidate substitutions to complex words, as well as word complexity rankings produced by a group underprivileged children. 10.18653/v1/2020.winlp-1.6 @@ -108,7 +108,7 @@ Effective questions in referential visual dialogue MauricioMazuecos AlbertoTestoni - RaffaellaBernardi + RaffaellaBernardi LucianaBenotti 31–35 An interesting challenge for situated dialogue systems is referential visual dialog: by asking questions, the system has to identify the referent to which the user refers to. Task success is the standard metric used to evaluate these systems. However, it does not consider how effective each question is, that is how much each question contributes to the goal. We propose a new metric, that measures question effectiveness. As a preliminary study, we report the new metric for state of the art publicly available models on GuessWhat?!. Surprisingly, successful dialogues do not have a higher percentage of effective questions than failed dialogues. This suggests that a system with high task success is not necessarily one that generates good questions. @@ -139,7 +139,7 @@ Variants of Vector Space Reductions for Predicting the Compositionality of <fixed-case>E</fixed-case>nglish Noun Compounds PegahAlipoormolabashi - SabineSchulte im Walde + SabineSchulte im Walde 51–54 Predicting the degree of compositionality of noun compounds is a crucial ingredient for lexicography and NLP applications, to know whether the compound should be treated as a whole, or through its constituents. Computational approaches for an automatic prediction typically represent compounds and their constituents within a vector space to have a numeric relatedness measure for the words. This paper provides a systematic evaluation of using different vector-space reduction variants for the prediction. We demonstrate that Word2vec and nouns-only dimensionality reductions are the most successful and stable vector space reduction variants for our task. 10.18653/v1/2020.winlp-1.13 @@ -190,7 +190,7 @@ Can <fixed-case>W</fixed-case>ikipedia Categories Improve Masked Language Model Pretraining? DikshaMeghwal - KatharinaKann + KatharinaKann IacerCalixto StanislawJastrzebski 78 @@ -243,8 +243,8 @@ Towards Mitigating Gender Bias in a decoder-based Neural Machine Translation model by Adding Contextual Information ChristineBasta - Marta R.Costa-jussà - José A. R.Fonollosa + Marta R.Costa-jussà + José A. R.Fonollosa 99–102 Gender bias negatively impacts many natural language processing applications, including machine translation (MT). The motivation behind this work is to study whether recent proposed MT techniques are significantly contributing to attenuate biases in document-level and gender-balanced data. For the study, we consider approaches of adding the previous sentence and the speaker information, implemented in a decoder-based neural MT system. We show improvements both in translation quality (+1 BLEU point) as well as in gender bias mitigation on WinoMT (+5% accuracy). 10.18653/v1/2020.winlp-1.25 @@ -285,8 +285,8 @@ The human unlikeness of neural language models in next-word prediction - Cassandra L.Jacobs - Arya D.McCarthy + Cassandra L.Jacobs + Arya D.McCarthy 115 The training objective of unidirectional language models (LMs) is similar to a psycholinguistic benchmark known as the cloze task, which measures next-word predictability. However, LMs lack the rich set of experiences that people do, and humans can be highly creative. To assess human parity in these models’ training objective, we compare the predictions of three neural language models to those of human participants in a freely available behavioral dataset (Luke & Christianson, 2016). Our results show that while neural models show a close correspondence to human productions, they nevertheless assign insufficient probability to how often speakers guess upcoming words, especially for open-class content words. 10.18653/v1/2020.winlp-1.29 @@ -355,7 +355,7 @@ Using Social Media For Bitcoin Day Trading Behavior Prediction Anna PaulaPawlicka Maule - KristenJohnson + KristenJohnson 140–143 This abstract presents preliminary work in the application of natural language processing techniques and social network modeling for the prediction of cryptocurrency trading and investment behavior. Specifically, we are building models to use language and social network behaviors to predict if the tweets of a 24-hour period can be used to buy or sell cryptocurrency to make a profit. In this paper we present our novel task and initial language modeling studies. 10.18653/v1/2020.winlp-1.37 @@ -397,7 +397,7 @@ Enhanced <fixed-case>U</fixed-case>rdu Word Segmentation using Conditional Random Fields and Morphological Context Features AamirFarhan MashrukhIslam - Dipti MisraSharma + Dipti MisraSharma 156–159 Word segmentation is a fundamental task for most of the NLP applications. Urdu adopts Nastalique writing style which does not have a concept of space. Furthermore, the inherent non-joining attributes of certain characters in Urdu create spaces within a word while writing in digital format. Thus, Urdu not only has space omission but also space insertion issues which make the word segmentation task challenging. In this paper, we improve upon the results of Zia, Raza and Athar (2018) by using a manually annotated corpus of 19,651 sentences along with morphological context features. Using the Conditional Random Field sequence modeler, our model achieves F 1 score of 0.98 for word boundary identification and 0.92 for sub-word boundary identification tasks. The results demonstrated in this paper outperform the state-of-the-art methods. 10.18653/v1/2020.winlp-1.41 diff --git a/data/xml/2020.wmt.xml b/data/xml/2020.wmt.xml index 9613f9566f..0cc5d9c4c4 100644 --- a/data/xml/2020.wmt.xml +++ b/data/xml/2020.wmt.xml @@ -4,25 +4,25 @@ Proceedings of the Fifth Conference on Machine Translation LoïcBarrault - OndřejBojar + OndřejBojar FethiBougares - RajenChatterjee - Marta R.Costa-jussà + RajenChatterjee + Marta R.Costa-jussà ChristianFedermann MarkFishel - AlexanderFraser + AlexanderFraser YvetteGraham PacoGuzman BarryHaddow MatthiasHuck - Antonio JimenoYepes + Antonio JimenoYepes PhilippKoehn - AndréMartins + AndréMartins MakotoMorishita ChristofMonz MasaakiNagata ToshiakiNakazawa - MatteoNegri + MatteoNegri Association for Computational Linguistics
Online
November @@ -76,11 +76,11 @@
Findings of the <fixed-case>WMT</fixed-case> 2020 Shared Task on Chat Translation - M. AminFarajian + M. AminFarajian António V.Lopes André F. T.Martins SameenMaruf - GholamrezaHaffari + GholamrezaHaffari 65–75 We report the results of the first edition of the WMT shared task on chat translation. The task consisted of translating bilingual conversational text, in particular customer support chats for the English-German language pair (English agent, German customer). This task varies from the other translation shared tasks, i.e. news and biomedical, mainly due to the fact that the conversations are bilingual, less planned, more informal, and often ungrammatical. Furthermore, such conversations are usually characterized by shorter and simpler sentences and contain more pronouns. We received 14 submissions from 6 participating teams, all of them covering both directions, i.e. En->De for agent utterances and De->En for customer messages. We used automatic metrics (BLEU and TER) for evaluating the translations of both agent and customer messages and human document-level direct assessments (DDA) to evaluate the agent translations. 2020.wmt-1.3 @@ -93,7 +93,7 @@ ZhenhaoLi JuanPino VishravChaudhary - FranciscoGuzmán + FranciscoGuzmán GrahamNeubig NadirDurrani YonatanBelinkov @@ -112,8 +112,8 @@ RachelBawden AlexandraBirch RadinaDobreva - ArturoOncevay - Antonio ValerioMiceli Barone + ArturoOncevay + Antonio ValerioMiceli Barone PhilipWilliams 92–99 We describe the University of Edinburgh’s submissions to the WMT20 news translation shared task for the low resource language pair English-Tamil and the mid-resource language pair English-Inuktitut. We use the neural machine translation transformer architecture for all submissions and explore a variety of techniques to improve translation quality to compensate for the lack of parallel training data. For the very low-resource English-Tamil, this involves exploring pretraining, using both language model objectives and translation using an unrelated high-resource language pair (German-English), and iterative backtranslation. For English-Inuktitut, we explore the use of multilingual systems, which, despite not being part of the primary submission, would have achieved the best results on the test set. @@ -167,7 +167,7 @@ Linguistically Motivated Subwords for <fixed-case>E</fixed-case>nglish-<fixed-case>T</fixed-case>amil Translation: <fixed-case>U</fixed-case>niversity of <fixed-case>G</fixed-case>roningen’s Submission to <fixed-case>WMT</fixed-case>-2020 PrajitDhar AriannaBisazza - Gertjanvan Noord + Gertjanvan Noord 126–133 This paper describes our submission for the English-Tamil news translation task of WMT-2020. The various techniques and Neural Machine Translation (NMT) models used by our team are presented and discussed, including back-translation, fine-tuning and word dropout. Additionally, our experiments show that using a linguistically motivated subword segmentation technique (Ataman et al., 2017) does not consistently outperform the more widely used, non-linguistically motivated SentencePiece algorithm (Kudo and Richardson, 2018), despite the agglutinative nature of Tamil morphology. 2020.wmt-1.9 @@ -178,7 +178,7 @@ The <fixed-case>TALP</fixed-case>-<fixed-case>UPC</fixed-case> System Description for <fixed-case>WMT</fixed-case>20 News Translation Task: Multilingual Adaptation for Low Resource <fixed-case>MT</fixed-case> CarlosEscolano Marta R.Costa-jussà - José A. R.Fonollosa + José A. R.Fonollosa 134–138 In this article, we describe the TALP-UPC participation in the WMT20 news translation shared task for Tamil-English. Given the low amount of parallel training data, we resort to adapt the task to a multilingual system to benefit from the positive transfer from high resource languages. We use iterative backtranslation to fine-tune the system and benefit from the monolingual data available. In order to measure the effectivity of such methods, we compare our results to a bilingual baseline system. 2020.wmt-1.10 @@ -233,8 +233,8 @@ Tilde at <fixed-case>WMT</fixed-case> 2020: News Task Systems - RihardsKrišlauks - MārcisPinnis + RihardsKrišlauks + MārcisPinnis 175–180 This paper describes Tilde’s submission to the WMT2020 shared task on news translation for both directions of the English-Polish language pair in both the constrained and the unconstrained tracks. We follow our submissions form the previous years and build our baseline systems to be morphologically motivated sub-word unit-based Transformer base models that we train using the Marian machine translation toolkit. Additionally, we experiment with different parallel and monolingual data selection schemes, as well as sampled back-translation. Our final models are ensembles of Transformer base and Transformer big models which feature right-to-left re-ranking. 2020.wmt-1.15 @@ -284,7 +284,7 @@ VikrantGoyal AnoopKunchukuttan RahulKejriwal - SiddharthJain + SiddharthJain AmitBhagwat 202–206 We describe our submission for the English→Tamil and Tamil→English news translation shared task. In this submission, we focus on exploring if a low-resource language (Tamil) can benefit from a high-resource language (Hindi) with which it shares contact relatedness. We show utilizing contact relatedness via multilingual NMT can significantly improve translation quality for English-Tamil translation. @@ -295,7 +295,7 @@ The <fixed-case>AFRL</fixed-case> <fixed-case>WMT</fixed-case>20 News Translation Systems JeremyGwinnup - TimAnderson + TimAnderson 207–212 This report summarizes the Air Force Research Laboratory (AFRL) machine translation (MT) systems submitted to the news-translation task as part of the 2020 Conference on Machine Translation (WMT20) evaluation campaign. This year we largely repurpose strategies from previous years’ efforts with larger datasets and also train models with precomputed word alignments under various settings in an effort to improve translation quality. 2020.wmt-1.20 @@ -318,7 +318,7 @@ RuiWang KehaiChen MasaoUtiyama - EiichiroSumita + EiichiroSumita 218–229 In this paper, we introduced our joint team SJTU-NICT ‘s participation in the WMT 2020 machine translation shared task. In this shared task, we participated in four translation directions of three language pairs: English-Chinese, English-Polish on supervised machine translation track, German-Upper Sorbian on low-resource and unsupervised machine translation tracks. Based on different conditions of language pairs, we have experimented with diverse neural machine translation (NMT) techniques: document-enhanced NMT, XLM pre-trained language model enhanced NMT, bidirectional translation as a pre-training, reference language based UNMT, data-dependent gaussian prior objective, and BT-BLEU collaborative filtering self-training. We also used the TF-IDF algorithm to filter the training set to obtain a domain more similar set with the test set for finetuning. In our submissions, the primary systems won the first place on English to Chinese, Polish to English, and German to Upper Sorbian translation directions. 2020.wmt-1.22 @@ -328,7 +328,7 @@ Combination of Neural Machine Translation Systems at <fixed-case>WMT</fixed-case>20 BenjaminMarie - RaphaelRubino + RaphaelRubino AtsushiFujita 230–238 This paper presents neural machine translation systems and their combination built for the WMT20 English-Polish and Japanese->English translation tasks. We show that using a Transformer Big architecture, additional training data synthesized from monolingual data, and combining many NMT systems through n-best list reranking improve translation quality. However, while we observed such improvements on the validation data, we did not observed similar improvements on the test data. Our analysis reveals that the presence of translationese texts in the validation data led us to take decisions in building NMT systems that were not optimal to obtain the best results on the test data. @@ -366,13 +366,13 @@ e<fixed-case>T</fixed-case>ranslation’s Submissions to the <fixed-case>WMT</fixed-case> 2020 News Translation Task CsabaOravecz - KatinaBontcheva - LászlóTihanyi - DavidKolovratnik + KatinaBontcheva + LászlóTihanyi + DavidKolovratnik BhavaniBhaskar AdrienLardilleux SzymonKlocek - AndreasEisele + AndreasEisele 254–261 The paper describes the submissions of the eTranslation team to the WMT 2020 news translation shared task. Leveraging the experience from the team’s participation last year we developed systems for 5 language pairs with various strategies. Compared to last year, for some language pairs we dedicated a lot more resources to training, and tried to follow standard best practices to build competitive systems which can achieve good results in the rankings. By using deep and complex architectures we sacrificed direct re-usability of our systems in production environments but evaluation showed that this approach could result in better models that significantly outperform baseline architectures. We submitted two systems to the zero shot robustness task. These submissions are described briefly in this paper as well. 2020.wmt-1.26 @@ -516,8 +516,8 @@ AgustinDal Lago YotamDoron SusannahYoung - PhilBlunsom - ChrisDyer + PhilBlunsom + ChrisDyer 326–337 This paper describes the DeepMind submission to the Chinese\rightarrowEnglish constrained data track of the WMT2020 Shared Task on News Translation. The submission employs a noisy channel factorization as the backbone of a document translation system. This approach allows the flexible combination of a number of independent component models which are further augmented with back-translation, distillation, fine-tuning with in-domain data, Monte-Carlo Tree Search decoding, and improved uncertainty estimation. In order to address persistent issues with the premature truncation of long sequences we included specialized length models and sentence segmentation techniques. Our final system provides a 9.9 BLEU points improvement over a baseline Transformer on our test set (newstest 2019). 2020.wmt-1.36 @@ -578,7 +578,7 @@ The <fixed-case>MUCOW</fixed-case> word sense disambiguation test suite at <fixed-case>WMT</fixed-case> 2020 YvesScherrer AlessandroRaganato - JörgTiedemann + JörgTiedemann 365–370 This paper reports on our participation with the MUCOW test suite at the WMT 2020 news translation task. We introduced MUCOW at WMT 2019 to measure the ability of MT systems to perform word sense disambiguation (WSD), i.e., to translate an ambiguous word with its correct sense. MUCOW is created automatically using existing resources, and the evaluation process is also entirely automated. We evaluate all participating systems of the language pairs English -> Czech, English -> German, and English -> Russian and compare the results with those obtained at WMT 2019. While current NMT systems are fairly good at handling ambiguous source words, we could not identify any substantial progress - at least to the extent that it is measurable by the MUCOW method - in that area over the last year. 2020.wmt-1.40 @@ -623,7 +623,7 @@ AmitKumar RupjyotiBaruah Rajesh KumarMundotiya - Anil KumarSingh + Anil KumarSingh 393–395 This paper reports the results for the Machine Translation (MT) system submitted by the NLPRL team for the Hindi – Marathi Similar Translation Task at WMT 2020. We apply the Transformer-based Neural Machine Translation (NMT) approach on both translation directions for this language pair. The trained model is evaluated on the corpus provided by shared task organizers, using BLEU, RIBES, and TER scores. There were a total of 23 systems submitted for Marathi to Hindi and 21 systems submitted for Hindi to Marathi in the shared task. Out of these, our submission ranked 6th and 9th, respectively. 2020.wmt-1.44 @@ -635,7 +635,7 @@ Sahinur RahmanLaskar Abdullah Faiz Ur RahmanKhilji ParthaPakray - SivajiBandyopadhyay + SivajiBandyopadhyay 396–401 Machine Translation (MT) is a vital tool for aiding communication between linguistically separate groups of people. The neural machine translation (NMT) based approaches have gained widespread acceptance because of its outstanding performance. We have participated in WMT20 shared task of similar language translation on Hindi-Marathi pair. The main challenge of this task is by utilization of monolingual data and similarity features of similar language pair to overcome the limitation of available parallel data. In this work, we have implemented NMT based model that simultaneously learns bilingual embedding from both the source and target language pairs. Our model has achieved Hindi to Marathi bilingual evaluation understudy (BLEU) score of 11.59, rank-based intuitive bilingual evaluation score (RIBES) score of 57.76 and translation edit rate (TER) score of 79.07 and Marathi to Hindi BLEU score of 15.44, RIBES score of 61.13 and TER score of 75.96. 2020.wmt-1.45 @@ -667,7 +667,7 @@ <fixed-case>NMT</fixed-case> based Similar Language Translation for <fixed-case>H</fixed-case>indi - <fixed-case>M</fixed-case>arathi VandanMujadia - DiptiSharma + DiptiSharma 414–417 This paper describes the participation of team F1toF6 (LTRC, IIIT-Hyderabad) for the WMT 2020 task, similar language translation. We experimented with attention based recurrent neural network architecture (seq2seq) for this task. We explored the use of different linguistic features like POS and Morph along with back translation for Hindi-Marathi and Marathi-Hindi machine translation. 2020.wmt-1.48 @@ -675,12 +675,12 @@ <fixed-case>NUIG</fixed-case>-Panlingua-<fixed-case>KMI</fixed-case> <fixed-case>H</fixed-case>indi-<fixed-case>M</fixed-case>arathi <fixed-case>MT</fixed-case> Systems for Similar Language Translation Task @ <fixed-case>WMT</fixed-case> 2020 - Atul Kr.Ojha + Atul Kr.Ojha PriyaRani AkankshaBansal Bharathi RajaChakravarthi RiteshKumar - John P.McCrae + John P.McCrae 418–423 NUIG-Panlingua-KMI submission to WMT 2020 seeks to push the state-of-the-art in Similar Language Translation Task for Hindi↔Marathi language pair. As part of these efforts, we conducteda series of experiments to address the challenges for translation between similar languages. Among the 4 MT systems prepared under this task, 1 PBSMT systems were prepared for Hindi↔Marathi each and 1 NMT systems were developed for Hindi↔Marathi using Byte PairEn-coding (BPE) into subwords. The results show that different architectures NMT could be an effective method for developing MT systems for closely related languages. Our Hindi-Marathi NMT system was ranked 8th among the 14 teams that participated and our Marathi-Hindi NMT system was ranked 8th among the 11 teams participated for the task. 2020.wmt-1.49 @@ -698,7 +698,7 @@ Neural Machine Translation between similar <fixed-case>S</fixed-case>outh-<fixed-case>S</fixed-case>lavic languages - MajaPopović + MajaPopović AlbertoPoncelas 430–436 This paper describes the ADAPT-DCU machine translation systems built for the WMT 2020 shared task on Similar Language Translation. We explored several set-ups for NMT for Croatian–Slovenian and Serbian–Slovenian language pairs in both translation directions. Our experiments focus on different amounts and types of training data: we first apply basic filtering on the OpenSubtitles training corpora, then we perform additional cleaning of remaining misaligned segments based on character n-gram matching. Finally, we make use of additional monolingual data by creating synthetic parallel data through back-translation. Automatic evaluation shows that multilingual systems with joint Serbian and Croatian data are better than bilingual, as well as that character-based cleaning leads to improved scores while using less data. The results also confirm once more that adding back-translated data further improves the performance, especially when the synthetic data is similar to the desired domain of the development and test set. This, however, might come at a price of prolonged training time, especially for multitarget systems. @@ -743,7 +743,7 @@ A3-108 Machine Translation System for Similar Language Translation Shared Task 2020 SaumitraYadav - ManishShrivastava + ManishShrivastava 451–455 In this paper, we describe our submissions for Similar Language Translation Shared Task 2020. We built 12 systems in each direction for Hindi⇐⇒Marathi language pair. This paper outlines initial baseline experiments with various tokenization schemes to train statistical models. Using optimal tokenization scheme among these we created synthetic source side text with back translation. And prune synthetic text with language model scores. This synthetic data was then used along with training data in various settings to build translation models. We also report configuration of the submitted systems and results produced by them. 2020.wmt-1.55 @@ -835,9 +835,9 @@ Priming Neural Machine Translation - Minh QuangPham + Minh QuangPham JitaoXu - JosepCrego + JosepCrego FrançoisYvon JeanSenellart 516–527 @@ -848,7 +848,7 @@ Subword Segmentation and a Single Bridge Language Affect Zero-Shot Neural Machine Translation - AnnetteRios + AnnetteRios MathiasMüller RicoSennrich 528–537 @@ -932,7 +932,7 @@ YingboGao LeonardDahlmann ShahramKhadivi - HermannNey + HermannNey 604–616 Context-aware neural machine translation (NMT) is a promising direction to improve the translation quality by making use of the additional context, e.g., document-level translation, or having meta-information. Although there exist various architectures and analyses, the effectiveness of different context-aware NMT models is not well explored yet. This paper analyzes the performance of document-level NMT models on four diverse domains with a varied amount of parallel document-level bilingual data. We conduct a comprehensive set of experiments to investigate the impact of document-level NMT. We find that there is no single best approach to document-level NMT, but rather that different architectures come out on top on different tasks. Looking at task-specific problems, such as pronoun resolution or headline translation, we find improvements in the context-aware systems, even in cases where the corpus-level metrics like BLEU show no significant improvement. We also show that document-level back-translation significantly helps to compensate for the lack of document-level bi-texts. 2020.wmt-1.71 @@ -942,8 +942,8 @@ A Study of Residual Adapters for Multi-Domain Neural Machine Translation - Minh QuangPham - Josep MariaCrego + Minh QuangPham + Josep MariaCrego FrançoisYvon JeanSenellart 617–628 @@ -956,7 +956,7 @@ Mitigating Gender Bias in Machine Translation with Target Gender Annotations ArtūrsStafanovičs TomsBergmanis - MārcisPinnis + MārcisPinnis 629–638 When translating “The secretary asked for details.” to a language with grammatical gender, it might be necessary to determine the gender of the subject “secretary”. If the sentence does not contain the necessary information, it is not always possible to disambiguate. In such cases, machine translation systems select the most common translation option, which often corresponds to the stereotypical translations, thus potentially exacerbating prejudice and marginalisation of certain groups and people. We argue that the information necessary for an adequate translation can not always be deduced from the sentence being translated or even might depend on external knowledge. Therefore, in this work, we propose to decouple the task of acquiring the necessary information from the task of learning to translate correctly when such information is available. To that end, we present a method for training machine translation systems to use word-level annotations containing information about subject’s gender. To prepare training data, we annotate regular source language words with grammatical gender information of the corresponding target language words. Using such data to train machine translation systems reduces their reliance on gender stereotypes when information about the subject’s gender is available. Our experiments on five language pairs show that this allows improving accuracy on the WinoMT test set by up to 25.8 percentage points. 2020.wmt-1.73 @@ -991,15 +991,15 @@ Findings of the <fixed-case>WMT</fixed-case> 2020 Biomedical Translation Shared Task: <fixed-case>B</fixed-case>asque, <fixed-case>I</fixed-case>talian and <fixed-case>R</fixed-case>ussian as New Additional Languages RachelBawden - Giorgio MariaDi Nunzio + Giorgio MariaDi Nunzio CristianGrozea InigoJauregi Unanue AntonioJimeno Yepes NancyMah - DavidMartinez - AurélieNévéol + DavidMartinez + AurélieNévéol MarianaNeves - MaiteOronoz + MaiteOronoz OlatzPerez-de-Viñaspre MassimoPiccardi RolandRoller @@ -1034,7 +1034,7 @@ AhmedEl-Kishky NamanGoyal Peng-JenChen - FranciscoGuzmán + FranciscoGuzmán 726–742 Following two preceding WMT Shared Task on Parallel Corpus Filtering (Koehn et al., 2018, 2019), we posed again the challenge of assigning sentence-level quality scores for very noisy corpora of sentence pairs crawled from the web, with the goal of sub-selecting the highest-quality data to be used to train ma-chine translation systems. This year, the task tackled the low resource condition of Pashto–English and Khmer–English and also included the challenge of sentence alignment from document pairs. 2020.wmt-1.78 @@ -1044,11 +1044,11 @@ Findings of the <fixed-case>WMT</fixed-case> 2020 Shared Task on Quality Estimation LuciaSpecia - FrédéricBlain + FrédéricBlain MarinaFomicheva - ErickFonseca + ErickFonseca VishravChaudhary - FranciscoGuzmán + FranciscoGuzmán André F. T.Martins 743–764 We report the results of the WMT20 shared task on Quality Estimation, where the challenge is to predict the quality of the output of neural machine translation systems at the word, sentence and document levels. This edition included new data with open domain texts, direct assessment annotations, and multiple language pairs: English-German, English-Chinese, Russian-English, Romanian-English, Estonian-English, Sinhala-English and Nepali-English data for the sentence-level subtasks, English-German and English-Chinese for the word-level subtask, and English-French data for the document-level subtask. In addition, we made neural machine translation models available to participants. 19 participating teams from 27 institutions submitted altogether 1374 systems to different task variants and language pairs. @@ -1080,7 +1080,7 @@ WonKeeLee JaehunShin BaikjinJung - Young-KilKim + Young-KilKim Jong-HyeokLee 777–782 This paper describes POSTECH-ETRI’s submission to WMT2020 for the shared task on automatic post-editing (APE) for 2 language pairs: English-German (En-De) and English-Chinese (En-Zh). We propose APE systems based on a cross-lingual language model, which jointly adopts translation language modeling (TLM) and masked language modeling (MLM) training objectives in the pre-training stage; the APE models then utilize jointly learned language representations between the source language and the target language. In addition, we created 19 million new sythetic triplets as additional training data for our final ensemble model. According to experimental results on the WMT2020 APE development data set, our models showed an improvement over the baseline by TER of -3.58 and a BLEU score of +5.3 for the En-De subtask; and TER of -5.29 and a BLEU score of +7.32 for the En-Zh subtask. @@ -1140,7 +1140,7 @@ <fixed-case>LIMSI</fixed-case> @ <fixed-case>WMT</fixed-case> 2020 SadafAbdul Rauf José CarlosRosales Núñez - Minh QuangPham + Minh QuangPham FrançoisYvon 803–812 This paper describes LIMSI’s submissions to the translation shared tasks at WMT’20. This year we have focused our efforts on the biomedical translation task, developing a resource-heavy system for the translation of medical abstracts from English into French, using back-translated texts, terminological resources as well as multiple pre-processing pipelines, including pre-trained representations. Systems were also prepared for the robustness task for translating from English into German; for this large-scale task we developed multi-domain, noise-robust, translation systems aim to handle the two test conditions: zero-shot and few-shot domain adaptation. @@ -1151,7 +1151,7 @@ Elhuyar submission to the Biomedical Translation Task 2020 on terminology and abstracts translation AnderCorral - XabierSaralegi + XabierSaralegi 813–819 This article describes the systems submitted by Elhuyar to the 2020 Biomedical Translation Shared Task, specifically the systems presented in the subtasks of terminology translation for English-Basque and abstract translation for English-Basque and English-Spanish. In all cases a Transformer architecture was chosen and we studied different strategies to combine open domain data with biomedical domain data for building the training corpora. For the English-Basque pair, given the scarcity of parallel corpora in the biomedical domain, we set out to create domain training data in a synthetic way. The systems presented in the terminology and abstract translation subtasks for the English-Basque language pair ranked first in their respective tasks among four participants, achieving 0.78 accuracy for terminology translation and a BLEU of 0.1279 for the translation of abstracts. In the abstract translation task for the English-Spanish pair our team ranked second (BLEU=0.4498) in the case of OK sentences. 2020.wmt-1.87 @@ -1254,8 +1254,8 @@ Ixamed’s submission description for <fixed-case>WMT</fixed-case>20 Biomedical shared task: benefits and limitations of using terminologies for domain adaptation XabierSoto OlatzPerez-de-Viñaspre - GorkaLabaka - MaiteOronoz + GorkaLabaka + MaiteOronoz 875–880 In this paper we describe the systems developed at Ixa for our participation in WMT20 Biomedical shared task in three language pairs, en-eu, en-es and es-en. When defining our approach, we have put the focus on making an efficient use of corpora recently compiled for training Machine Translation (MT) systems to translate Covid-19 related text, as well as reusing previously compiled corpora and developed systems for biomedical or clinical domain. Regarding the techniques used, we base on the findings from our previous works for translating clinical texts into Basque, making use of clinical terminology for adapting the MT systems to the clinical domain. However, after manually inspecting some of the outputs generated by our systems, for most of the submissions we end up using the system trained only with the basic corpus, since the systems including the clinical terminologies generated outputs shorter in length than the corresponding references. Thus, we present simple baselines for translating abstracts between English and Spanish (en/es); while for translating abstracts and terms from English into Basque (en-eu), we concatenate the best en-es system for each kind of text with our es-eu system. We present automatic evaluation results in terms of BLEU scores, and analyse the effect of including clinical terminology on the average sentence length of the generated outputs. Following the recent recommendations for a responsible use of GPUs for NLP research, we include an estimation of the generated CO2 emissions, based on the power consumed for training the MT systems. 2020.wmt-1.96 @@ -1310,7 +1310,7 @@ RicardoRei CraigStewart Ana CFarinha - AlonLavie + AlonLavie 911–920 We present the contribution of the Unbabel team to the WMT 2020 Shared Task on Metrics. We intend to participate on the segmentlevel, document-level and system-level tracks on all language pairs, as well as the “QE as a Metric” track. Accordingly, we illustrate results of our models in these tracks with reference to test sets from the previous year. Our submissions build upon the recently proposed COMET framework: we train several estimator models to regress on different humangenerated quality scores and a novel ranking model trained on relative ranks obtained from Direct Assessments. We also propose a simple technique for converting segment-level predictions into a document-level score. Overall, our systems achieve strong results for all language pairs on previous test sets and in many cases set a new state-of-the-art. 2020.wmt-1.101 @@ -1327,7 +1327,7 @@ QijunTan MarkusFreitag DipanjanDas - AnkurParikh + AnkurParikh 921–927 The quality of machine translation systems has dramatically improved over the last decade, and as a result, evaluation has become an increasingly challenging problem. This paper describes our contribution to the WMT 2020 Metrics Shared Task, the main benchmark for automatic evaluation of translation. We make several submissions based on BLEURT, a previously published which uses transfer learning. We extend the metric beyond English and evaluate it on 14 language pairs for which fine-tuning data is available, as well as 4 “zero-shot” language pairs, for which we have no labelled examples. Additionally, we focus on English to German and demonstrate how to combine BLEURT’s predictions with those of YiSi and use alternative reference translations to enhance the performance. Empirical results show that the models achieve competitive results on the WMT Metrics 2019 Shared Task, indicating their promise for the 2020 edition. 2020.wmt-1.102 @@ -1338,7 +1338,7 @@ Towards a Better Evaluation of Metrics for Machine Translation PeterStanchev WeiyueWang - HermannNey + HermannNey 928–933 An important aspect of machine translation is its evaluation, which can be achieved through the use of a variety of metrics. To compare these metrics, the workshop on statistical machine translation annually evaluates metrics based on their correlation with human judgement. Over the years, methods for measuring correlation with humans have changed, but little research has been performed on what the optimal methods for acquiring human scores are and how human correlation can be measured. In this work, the methods for evaluating metrics at both system- and segment-level are analyzed in detail and their shortcomings are pointed out. 2020.wmt-1.103 @@ -1371,12 +1371,12 @@ Score Combination for Improved Parallel Corpus Filtering for Low Resource Conditions - MuhammadElNokrashy + MuhammadElNokrashy AmrHendy MohamedAbdelghaffar MohamedAfify AhmedTawfik - HanyHassan Awadalla + HanyHassan Awadalla 947–951 This paper presents the description of our submission to WMT20 sentence filtering task. We combine scores from custom LASER built for each source language, a classifier built to distinguish positive and negative pairs and the original scores provided with the task. For the mBART setup, provided by the organizers, our method shows 7% and 5% relative improvement, over the baseline, in sacreBLEU score on the test set for Pashto and Khmer respectively. 2020.wmt-1.106 @@ -1385,8 +1385,8 @@ Bicleaner at <fixed-case>WMT</fixed-case> 2020: <fixed-case>U</fixed-case>niversitat d’Alacant-Prompsit’s submission to the parallel corpus filtering shared task - MiquelEsplà-Gomis - Víctor M.Sánchez-Cartagena + MiquelEsplà-Gomis + Víctor M.Sánchez-Cartagena JaumeZaragoza-Bernabeu FelipeSánchez-Martínez 952–958 @@ -1407,7 +1407,7 @@ Dual Conditional Cross Entropy Scores and <fixed-case>LASER</fixed-case> Similarity Scores for the <fixed-case>WMT</fixed-case>20 Parallel Corpus Filtering Shared Task - FeliciaKoerner + FeliciaKoerner PhilippKoehn 966–971 This paper describes our submission to the WMT20 Parallel Corpus Filtering and Alignment for Low-Resource Conditions Shared Task. This year’s corpora are noisy Khmer-English and Pashto-English, with 58.3 million and 11.6 million words respectively (English token count). Our submission focuses on filtering Pashto-English, building on previously successful methods to produce two sets of scores: LASER_LM, a combination of the LASER similarity scores provided in the shared task and perplexity scores from language models, and DCCEF_DUP, dual conditional cross entropy scores combined with a duplication penalty. We improve slightly on the LASER similarity score and find that the provided clean data can successfully be supplemented with a subsampled set of the noisy data, effectively increasing the training data for the models used for dual conditional cross entropy scoring. @@ -1465,7 +1465,7 @@ <fixed-case>RTM</fixed-case> Ensemble Learning Results at Quality Estimation Task - ErgunBiçici + ErgunBiçici 999–1003 We obtain new results using referential translation machines (RTMs) with predictions mixed and stacked to obtain a better mixture of experts prediction. We are able to achieve better results than the baseline model in Task 1 subtasks. Our stacking results significantly improve the results on the training sets but decrease the test set results. RTMs can achieve to become the 5th among 13 models in ru-en subtask and 5th in the multilingual track of sentence-level Task 1 based on MAE. 2020.wmt-1.114 @@ -1477,7 +1477,7 @@ QuCui XiangGeng ShujianHuang - JiajunChen + JiajunChen 1004–1009 This paper describes our system of the sentence-level and word-level Quality Estimation Shared Task of WMT20. Our system is based on the QE Brain, and we simply enhance it by injecting noise at the target side. And to obtain the deep bi-directional information, we use a masked language model at the target side instead of two single directional decoders. Meanwhile, we try to use the extra QE data from the WMT17 and WMT19 to improve our system’s performance. Finally, we ensemble the features or the results from different models to get our best results. Our system finished fifth in the end at sentence-level on both EN-ZH and EN-DE language pairs. 2020.wmt-1.115 @@ -1489,10 +1489,10 @@ MarinaFomicheva ShuoSun LisaYankovskaya - FrédéricBlain + FrédéricBlain VishravChaudhary MarkFishel - FranciscoGuzmán + FranciscoGuzmán LuciaSpecia 1010–1017 This paper presents our submission to the WMT2020 Shared Task on Quality Estimation (QE). We participate in Task and Task 2 focusing on sentence-level prediction. We explore (a) a black-box approach to QE based on pre-trained representations; and (b) glass-box approaches that leverage various indicators that can be extracted from the neural MT systems. In addition to training a feature-based regression model using glass-box quality indicators, we also test whether they can be used to predict MT quality directly with no supervision. We assess our systems in a multi-lingual setting and show that both types of approaches generalise well across languages. Our black-box QE models tied for the winning submission in four out of seven language pairs inTask 1, thus demonstrating very strong performance. The glass-box approaches also performed competitively, representing a light-weight alternative to the neural-based models. @@ -1534,7 +1534,7 @@ JoãoMoura MiguelVera Daanvan Stigt - FabioKepler + FabioKepler André F. T.Martins 1029–1036 We present the joint contribution of IST and Unbabel to the WMT 2020 Shared Task on Quality Estimation. Our team participated on all tracks (Direct Assessment, Post-Editing Effort, Document-Level), encompassing a total of 14 submissions. Our submitted systems were developed by extending the OpenKiwi framework to a transformer-based predictor-estimator architecture, and to cope with glass-box, uncertainty-based features coming from neural machine translation systems. @@ -1556,7 +1556,7 @@ <fixed-case>NICT</fixed-case> <fixed-case>K</fixed-case>yoto Submission for the <fixed-case>WMT</fixed-case>’20 Quality Estimation Task: Intermediate Training for Domain and Task Adaptation - RaphaelRubino + RaphaelRubino 1042–1048 This paper describes the NICT Kyoto submission for the WMT’20 Quality Estimation (QE) shared task. We participated in Task 2: Word and Sentence-level Post-editing Effort, which involved Wikipedia data and two translation directions, namely English-to-German and English-to-Chinese. Our approach is based on multi-task fine-tuned cross-lingual language models (XLM), initially pre-trained and further domain-adapted through intermediate training using the translation language model (TLM) approach complemented with a novel self-supervised learning task which aim is to model errors inherent to machine translation outputs. Results obtained on both word and sentence-level QE show that the proposed intermediate training method is complementary to language model domain adaptation and outperforms the fine-tuning only approach. 2020.wmt-1.121 @@ -1566,8 +1566,8 @@ <fixed-case>T</fixed-case>rans<fixed-case>Q</fixed-case>uest at <fixed-case>WMT</fixed-case>2020: Sentence-Level Direct Assessment TharinduRanasinghe - ConstantinOrasan - RuslanMitkov + ConstantinOrasan + RuslanMitkov 1049–1055 This paper presents the team TransQuest’s participation in Sentence-Level Direct Assessment shared task in WMT 2020. We introduce a simple QE framework based on cross-lingual transformers, and we use it to implement and evaluate two different neural architectures. The proposed methods achieve state-of-the-art results surpassing the results obtained by OpenKiwi, the baseline used in the shared task. We further fine tune the QE framework by performing ensemble and data augmentation. Our approach is the winning solution in all of the language pairs according to the WMT 2020 official results. 2020.wmt-1.122 @@ -1658,10 +1658,10 @@ <fixed-case>U</fixed-case>d<fixed-case>S</fixed-case>-<fixed-case>DFKI</fixed-case>@<fixed-case>WMT</fixed-case>20: Unsupervised <fixed-case>MT</fixed-case> and Very Low Resource Supervised <fixed-case>MT</fixed-case> for <fixed-case>G</fixed-case>erman-<fixed-case>U</fixed-case>pper <fixed-case>S</fixed-case>orbian SouravDutta - JesujobaAlabi + JesujobaAlabi SaptarashmiBandyopadhyay DanaRuiter - Josefvan Genabith + Josefvan Genabith 1092–1098 This paper describes the UdS-DFKI submission to the shared task for unsupervised machine translation (MT) and very low-resource supervised MT between German (de) and Upper Sorbian (hsb) at the Fifth Conference of Machine Translation (WMT20). We submit systems for both the supervised and unsupervised tracks. Apart from various experimental approaches like bitext mining, model pre-training, and iterative back-translation, we employ a factored machine translation approach on a small BPE vocabulary. 2020.wmt-1.129 @@ -1672,7 +1672,7 @@ Data Selection for Unsupervised Translation of <fixed-case>G</fixed-case>erman–<fixed-case>U</fixed-case>pper <fixed-case>S</fixed-case>orbian LukasEdman AntonioToral - Gertjanvan Noord + Gertjanvan Noord 1099–1103 This paper describes the methods behind the systems submitted by the University of Groningen for the WMT 2020 Unsupervised Machine Translation task for German–Upper Sorbian. We investigate the usefulness of data selection in the unsupervised setting. We find that we can perform data selection using a pretrained model and show that the quality of a set of sentences or documents can have a great impact on the performance of the UNMT system trained on it. Furthermore, we show that document-level data selection should be preferred for training the XLM model when possible. Finally, we show that there is a trade-off between quality and quantity of the data used to train UNMT systems. 2020.wmt-1.130 @@ -1728,8 +1728,8 @@ The <fixed-case>NITS</fixed-case>-<fixed-case>CNLP</fixed-case> System for the Unsupervised <fixed-case>MT</fixed-case> Task at <fixed-case>WMT</fixed-case> 2020 Salam MichaelSingh - Thoudam DorenSingh - SivajiBandyopadhyay + Thoudam DorenSingh + SivajiBandyopadhyay 1139–1143 We describe NITS-CNLP’s submission to WMT 2020 unsupervised machine translation shared task for German language (de) to Upper Sorbian (hsb) in a constrained setting i.e, using only the data provided by the organizers. We train our unsupervised model using monolingual data from both the languages by jointly pre-training the encoder and decoder and fine-tune using backtranslation loss. The final model uses the source side (de) monolingual data and the target side (hsb) synthetic data as a pseudo-parallel data to train a pseudo-supervised system which is tuned using the provided development set(dev set). 2020.wmt-1.135 @@ -1757,7 +1757,7 @@ How Should Markup Tags Be Translated? GregHanneman - GeorgianaDinu + GeorgianaDinu 1160–1173 The ability of machine translation (MT) models to correctly place markup is crucial to generating high-quality translations of formatted input. This paper compares two commonly used methods of representing markup tags and tests the ability of MT models to learn tag placement via training data augmentation. We study the interactions of tag representation, data augmentation size, tag complexity, and language pair to show the drawbacks and benefits of each method. We construct and release new test sets containing tagged data for three language pairs of varying difficulty. 2020.wmt-1.138 @@ -1766,7 +1766,7 @@ The Tatoeba Translation Challenge – Realistic Data Sets for Low Resource and Multilingual <fixed-case>MT</fixed-case> - JörgTiedemann + JörgTiedemann 1174–1182 This paper describes the development of a new benchmark for machine translation that provides training and test data for thousands of language pairs covering over 500 languages and tools for creating state-of-the-art translation models from that collection. The main goal is to trigger the development of open translation tools and models with a much broader coverage of the World’s languages. Using the package it is possible to work on realistic low-resource scenarios avoiding artificially reduced setups that are common when demonstrating zero-shot or few-shot learning. For the first time, this package provides a comprehensive collection of diverse data sets in hundreds of languages with systematic language and script annotation and data splits to extend the narrow coverage of existing benchmarks. Together with the data release, we also provide a growing number of pre-trained baseline models for individual language pairs and selected language groups. 2020.wmt-1.139 @@ -1791,7 +1791,7 @@ ChrisKedzie FaisalLadhak MarineCarpuat - KathleenMcKeown + KathleenMcKeown 1193–1204 Users of machine translation (MT) may want to ensure the use of specific lexical terminologies. While there exist techniques for incorporating terminology constraints during inference for MT, current APE approaches cannot ensure that they will appear in the final translation. In this paper, we present both autoregressive and non-autoregressive models for lexically constrained APE, demonstrating that our approach enables preservation of 95% of the terminologies and also improves translation quality on English-German benchmarks. Even when applied to lexically constrained MT output, our approach is able to improve preservation of the terminologies. However, we show that our models do not learn to copy constraints systematically and suggest a simple data augmentation technique that leads to improved performance and robustness. 2020.wmt-1.141 diff --git a/data/xml/2020.wnut.xml b/data/xml/2020.wnut.xml index 51dc5695a2..e29b973388 100644 --- a/data/xml/2020.wnut.xml +++ b/data/xml/2020.wnut.xml @@ -5,7 +5,7 @@ Proceedings of the Sixth Workshop on Noisy User-generated Text (W-NUT 2020) WeiXu AlanRitter - TimBaldwin + TimBaldwin AfshinRahimi Association for Computational Linguistics
Online
@@ -40,7 +40,7 @@
Noisy Text Data: Achilles’ Heel of <fixed-case>BERT</fixed-case> - AnkitKumar + AnkitKumar PiyushMakhija AnujGupta 16–21 @@ -65,7 +65,7 @@ Combining <fixed-case>BERT</fixed-case> with Static Word Embeddings for Categorizing Social Media IsraaAlghanmi - LuisEspinosa Anke + LuisEspinosa Anke StevenSchockaert 28–33 Pre-trained neural language models (LMs) have achieved impressive results in various natural language processing tasks, across different languages. Surprisingly, this extends to the social media genre, despite the fact that social media often has very different characteristics from the language that LMs have seen during training. A particularly striking example is the performance of AraBERT, an LM for the Arabic language, which is successful in categorizing social media posts in Arabic dialects, despite only having been trained on Modern Standard Arabic. Our hypothesis in this paper is that the performance of LMs for social media can nonetheless be improved by incorporating static word vectors that have been specifically trained on social media. We show that a simple method for incorporating such word vectors is indeed successful in several Arabic and English benchmarks. Curiously, however, we also find that similar improvements are possible with word vectors that have been trained on traditional text sources (e.g. Wikipedia). @@ -89,7 +89,7 @@ <fixed-case>PHINC</fixed-case>: A Parallel <fixed-case>H</fixed-case>inglish Social Media Code-Mixed Corpus for Machine Translation VivekSrivastava - MayankSingh + MayankSingh 41–49 Code-mixing is the phenomenon of using more than one language in a sentence. In the multilingual communities, it is a very frequently observed pattern of communication on social media platforms. Flexibility to use multiple languages in one text message might help to communicate efficiently with the target audience. But, the noisy user-generated code-mixed text adds to the challenge of processing and understanding natural language to a much larger extent. Machine translation from monolingual source to the target language is a well-studied research problem. Here, we demonstrate that widely popular and sophisticated translation systems such as Google Translate fail at times to translate code-mixed text effectively. To address this challenge, we present a parallel corpus of the 13,738 code-mixed Hindi-English sentences and their corresponding human translation in English. In addition, we also propose a translation pipeline build on top of Google Translate. The evaluation of the proposed pipeline on PHINC demonstrates an increase in the performance of the underlying system. With minimal effort, we can extend the dataset and the proposed approach to other code-mixing language pairs. 2020.wnut-1.7 @@ -167,7 +167,7 @@ An Empirical Analysis of Human-Bot Interaction on <fixed-case>R</fixed-case>eddit Ming-ChengMa - John P.Lalor + John P.Lalor 101–106 Automated agents (“bots”) have emerged as an ubiquitous and influential presence on social media. Bots engage on social media platforms by posting content and replying to other users on the platform. In this work we conduct an empirical analysis of the activity of a single bot on Reddit. Our goal is to determine whether bot activity (in the form of posted comments on the website) has an effect on how humans engage on Reddit. We find that (1) the sentiment of a bot comment has a significant, positive effect on the subsequent human reply, and (2) human Reddit users modify their comment behaviors to overlap with the text of the bot, similar to how humans modify their text to mimic other humans in conversation. Understanding human-bot interactions on social media with relatively simple bots is important for preparing for more advanced bots in the future. 2020.wnut-1.14 @@ -260,7 +260,7 @@ Detecting Entailment in Code-Mixed <fixed-case>H</fixed-case>indi-<fixed-case>E</fixed-case>nglish Conversations SharanyaChakravarthy AnjanaUmapathy - Alan WBlack + Alan WBlack 165–170 The presence of large-scale corpora for Natural Language Inference (NLI) has spurred deep learning research in this area, though much of this research has focused solely on monolingual data. Code-mixing is the intertwined usage of multiple languages, and is commonly seen in informal conversations among polyglots. Given the rising importance of dialogue agents, it is imperative that they understand code-mixing, but the scarcity of code-mixed Natural Language Understanding (NLU) datasets has precluded research in this area. The dataset by Khanuja et. al. for detecting conversational entailment in code-mixed Hindi-English text is the first of its kind. We investigate the effectiveness of language modeling, data augmentation, translation, and architectural approaches to address the code-mixed, conversational, and low-resource aspects of this dataset. We obtain an 8.09% increase in test set accuracy over the current state of the art. 2020.wnut-1.22 @@ -510,7 +510,7 @@ <fixed-case>NLP</fixed-case> North at <fixed-case>WNUT</fixed-case>-2020 Task 2: Pre-training versus Ensembling for Detection of Informative <fixed-case>COVID</fixed-case>-19 <fixed-case>E</fixed-case>nglish Tweets AndersGiovanni Møller Robvan der Goot - BarbaraPlank + BarbaraPlank 331–336 With the COVID-19 pandemic raging world-wide since the beginning of the 2020 decade, the need for monitoring systems to track relevant information on social media is vitally important. This paper describes our submission to the WNUT-2020 Task 2: Identification of informative COVID-19 English Tweets. We investigate the effectiveness for a variety of classification models, and found that domain-specific pre-trained BERT models lead to the best performance. On top of this, we attempt a variety of ensembling strategies, but these attempts did not lead to further improvements. Our final best model, the standalone CT-BERT model, proved to be highly competitive, leading to a shared first place in the shared task. Our results emphasize the importance of domain and task-related pre-training. 2020.wnut-1.44 @@ -592,7 +592,7 @@ VaradPimpalkhute DivyaRallapalli DavidSiguenza - GracielaGonzalez-Hernandez + GracielaGonzalez-Hernandez 378–382 Increasing usage of social media presents new non-traditional avenues for monitoring disease outbreaks, virus transmissions and disease progressions through user posts describing test results or disease symptoms. However, the discussions on the topic of infectious diseases that are informative in nature also span various topics such as news, politics and humor which makes the data mining challenging. We present a system to identify tweets about the COVID19 disease outbreak that are deemed to be informative on Twitter for use in downstream applications. The system scored a F1-score of 0.8941, Precision of 0.9028, Recall of 0.8856 and Accuracy of 0.9010. In the shared task organized as part of the 6th Workshop of Noisy User-generated Text (WNUT), the system was ranked 18th by F1-score and 13th by Accuracy. 2020.wnut-1.52 @@ -682,7 +682,7 @@ Rajesh KumarMundotiya RupjyotiBaruah BhavanaSrivastava - Anil KumarSingh + Anil KumarSingh 419–422 The Coronavirus pandemic has been a dominating news on social media for the last many months. Efforts are being made to reduce its spread and reduce the casualties as well as new infections. For this purpose, the information about the infected people and their related symptoms, as available on social media, such as Twitter, can help in prevention and taking precautions. This is an example of using noisy text processing for disaster management. This paper discusses the NLPRL results in Shared Task-2 of WNUT-2020 workshop. We have considered this problem as a binary classification problem and have used a pre-trained ELMo embedding with GRU units. This approach helps classify the tweets with accuracy as 80.85% and 78.54% as F1-score on the provided test dataset. The experimental code is available online. 2020.wnut-1.60 @@ -843,7 +843,7 @@ <fixed-case>COVCOR</fixed-case>20 at <fixed-case>WNUT</fixed-case>-2020 Task 2: An Attempt to Combine Deep Learning and Expert rules - AliHürriyetoğlu + AliHürriyetoğlu AliSafaya OsmanMutlu NellekeOostdijk @@ -901,8 +901,8 @@ <fixed-case>HLTRI</fixed-case> at <fixed-case>W</fixed-case>-<fixed-case>NUT</fixed-case> 2020 Shared Task-3: <fixed-case>COVID</fixed-case>-19 Event Extraction from <fixed-case>T</fixed-case>witter Using Multi-Task Hopfield Pooling - MaxwellWeinzierl - SandaHarabagiu + MaxwellWeinzierl + SandaHarabagiu 530–538 Extracting structured knowledge involving self-reported events related to the COVID-19 pandemic from Twitter has the potential to inform surveillance systems that play a critical role in public health. The event extraction challenge presented by the W-NUT 2020 Shared Task 3 focused on the identification of five types of events relevant to the COVID-19 pandemic and their respective set of pre-defined slots encoding demographic, epidemiological, clinical as well as spatial, temporal or subjective knowledge. Our participation in the challenge led to the design of a neural architecture for jointly identifying all Event Slots expressed in a tweet relevant to an event of interest. This architecture uses COVID-Twitter-BERT as the pre-trained language model. In addition, to learn text span embeddings for each Event Slot, we relied on a special case of Hopfield Networks, namely Hopfield pooling. The results of the shared task evaluation indicate that our system performs best when it is trained on a larger dataset, while it remains competitive when training on smaller datasets. 2020.wnut-1.80 diff --git a/data/xml/2020.wosp.xml b/data/xml/2020.wosp.xml index 29228a040b..24fc99a57f 100644 --- a/data/xml/2020.wosp.xml +++ b/data/xml/2020.wosp.xml @@ -24,7 +24,7 @@ Virtual Citation Proximity (<fixed-case>VCP</fixed-case>): Empowering Document Recommender Systems by Learning a Hypothetical In-Text Citation-Proximity Metric for Uncited Documents PaulMolloy JoeranBeel - AkikoAizawa + AkikoAizawa 1–8 The relatedness of research articles, patents, court rulings, web pages, and other document types is often calculated with citation or hyperlink-based approaches like co-citation (proximity) analysis. The main limitation of citation-based approaches is that they cannot be used for documents that receive little or no citations. We propose Virtual Citation Proximity (VCP), a Siamese Neural Network architecture, which combines the advantages of co-citation proximity analysis (diverse notions of relatedness / high recommendation performance), with the advantage of content-based filtering (high coverage). VCP is trained on a corpus of documents with textual features, and with real citation proximity as ground truth. VCP then predicts for any two documents, based on their title and abstract, in what proximity the two documents would be co-cited, if they were indeed co-cited. The prediction can be used in the same way as real citation proximity to calculate document relatedness, even for uncited documents. In our evaluation with 2 million co-citations from Wikipedia articles, VCP achieves an MAE of 0.0055, i.e. an improvement of 20% over the baseline, though the learning curve suggests that more work is needed. 2020.wosp-1.1 diff --git a/data/xml/2021.acl.xml b/data/xml/2021.acl.xml index 0e06512674..5e882a7a2c 100644 --- a/data/xml/2021.acl.xml +++ b/data/xml/2021.acl.xml @@ -3,7 +3,7 @@ Proceedings of the 59th Annual Meeting of the Association for Computational Linguistics and the 11th International Joint Conference on Natural Language Processing (Volume 1: Long Papers) - ChengqingZong + ChengqingZong FeiXia WenjieLi RobertoNavigli @@ -67,9 +67,9 @@ PaulRöttger BertieVidgen DongNguyen - ZeerakWaseem + ZeerakWaseem HelenMargetts - JanetPierrehumbert + JanetPierrehumbert 41–58 Detecting online hate is a difficult task that even state-of-the-art models struggle with. Typically, hate speech detection models are evaluated by measuring their performance on held-out test data using metrics such as accuracy and F1 score. However, this approach makes it difficult to identify specific model weak points. It also risks overestimating generalisable model performance due to increasingly well-evidenced systematic gaps and biases in hate speech datasets. To enable more targeted diagnostic insights, we introduce HateCheck, a suite of functional tests for hate speech detection models. We specify 29 model functionalities motivated by a review of previous research and a series of interviews with civil society stakeholders. We craft test cases for each functionality and validate their quality through a structured annotation process. To illustrate HateCheck’s utility, we test near-state-of-the-art transformer models as well as two popular commercial models, revealing critical model weaknesses. 2021.acl-long.4 @@ -207,7 +207,7 @@ HaoyuSong YanWang KaiyanZhang - Wei-NanZhang + Wei-NanZhang TingLiu 167–177 Maintaining a consistent persona is essential for dialogue agents. Although tremendous advancements have been brought, the limited-scale of annotated personalized dialogue datasets is still a barrier towards training robust and consistent persona-based dialogue models. This work shows how this challenge can be addressed by disentangling persona-based dialogue generation into two sub-tasks with a novel BERT-over-BERT (BoB) model. Specifically, the model consists of a BERT-based encoder and two BERT-based decoders, where one decoder is for response generation, and another is for consistency understanding. In particular, to learn the ability of consistency understanding from large-scale non-dialogue inference data, we train the second decoder in an unlikelihood manner. Under different limited data settings, both automatic and human evaluations demonstrate that the proposed model outperforms strong baselines in response quality and persona consistency. @@ -238,7 +238,7 @@ TianxiangSun HangYan XipengQiu - XuanjingHuang + XuanjingHuang 189–199 Both performance and efficiency are crucial factors for sequence labeling tasks in many real-world scenarios. Although the pre-trained models (PTMs) have significantly improved the performance of various sequence labeling tasks, their computational cost is expensive. To alleviate this problem, we extend the recent successful early-exit mechanism to accelerate the inference of PTMs for sequence labeling tasks. However, existing early-exit mechanisms are specifically designed for sequence-level tasks, rather than sequence labeling. In this paper, we first propose a simple extension of sentence-level early-exit for sequence labeling tasks. To further reduce the computational cost, we also propose a token-level early-exit mechanism that allows partial tokens to exit early at different layers. Considering the local dependency inherent in sequence labeling, we employed a window-based criterion to decide for a token whether or not to exit. The token-level early-exit brings the gap between training and inference, so we introduce an extra self-sampling fine-tuning stage to alleviate it. The extensive experiments on three popular sequence labeling tasks show that our approach can save up to 66%∼75% inference cost with minimal performance degradation. Compared with competitive compressed models such as DistilBERT, our approach can achieve better performance under the same speed-up ratios of 2×, 3×, and 4×. 2021.acl-long.16 @@ -338,8 +338,8 @@ Multi-Head Highly Parallelized <fixed-case>LSTM</fixed-case> Decoder for Neural Machine Translation HongfeiXu QiuhuiLiu - Josefvan Genabith - DeyiXiong + Josefvan Genabith + DeyiXiong MengZhang 273–282 One of the reasons Transformer translation models are popular is that self-attention networks for context modelling can be easily parallelized at sequence level. However, the computational complexity of a self-attention network is O(n^2), increasing quadratically with sequence length. By contrast, the complexity of LSTM-based approaches is only O(n). In practice, however, LSTMs are much slower to train than self-attention networks as they cannot be parallelized at sequence level: to model context, the current LSTM state relies on the full LSTM computation of the preceding state. This has to be computed n times for a sequence of length n. The linear transformations involved in the LSTM gate and state computations are the major cost factors in this. To enable sequence-level parallelization of LSTMs, we approximate full LSTM context modelling by computing hidden states and gates with the current input and a simple bag-of-words representation of the preceding tokens context. This allows us to compute each input step efficiently in parallel, avoiding the formerly costly sequential linear transformations. We then connect the outputs of each parallel step with computationally cheap element-wise computations. We call this the Highly Parallelized LSTM. To further constrain the number of LSTM parameters, we compute several small HPLSTMs in parallel like multi-head attention in the Transformer. The experiments show that our MHPLSTM decoder achieves significant BLEU improvements, while being even slightly faster than the self-attention network in training, and much faster than the standard LSTM. @@ -351,7 +351,7 @@ A Bidirectional Transformer Based Alignment Model for Unsupervised Word Alignment JingyiZhang - Josefvan Genabith + Josefvan Genabith 283–292 Word alignment and machine translation are two closely related tasks. Neural translation models, such as RNN-based and Transformer models, employ a target-to-source attention mechanism which can provide rough word alignments, but with a rather low accuracy. High-quality word alignment can help neural machine translation in many different ways, such as missing word detection, annotation transfer and lexicon injection. Existing methods for learning word alignment include statistical word aligners (e.g. GIZA++) and recently neural word alignment models. This paper presents a bidirectional Transformer based alignment (BTBA) model for unsupervised learning of the word alignment task. Our BTBA model predicts the current target word by attending the source context and both left-side and right-side target context to produce accurate target-to-source attention (alignment). We further fine-tune the target-to-source attention in the BTBA model to obtain better alignments using a full context based optimization method and self-supervised training. We test our method on three word alignment tasks and show that our method outperforms both previous neural word alignment approaches and the popular statistical word aligner GIZA++. 2021.acl-long.24 @@ -495,7 +495,7 @@ <fixed-case>DESCGEN</fixed-case>: A Distantly Supervised Datasetfor Generating Entity Descriptions WeijiaShi MandarJoshi - LukeZettlemoyer + LukeZettlemoyer 415–427 Short textual descriptions of entities provide summaries of their key attributes and have been shown to be useful sources of background knowledge for tasks such as entity linking and question answering. However, generating entity descriptions, especially for new and long-tail entities, can be challenging since relevant information is often scattered across multiple sources with varied content and style. We introduce DESCGEN: given mentions spread over multiple documents, the goal is to generate an entity summary description. DESCGEN consists of 37K entity descriptions from Wikipedia and Fandom, each paired with nine evidence documents on average. The documents were collected using a combination of entity linking and hyperlinks into the entity pages, which together provide high-quality distant supervision. Compared to other multi-document summarization tasks, our task is entity-centric, more abstractive, and covers a wide range of domains. We also propose a two-stage extract-then-generate baseline and show that there exists a large gap (19.9% in ROUGE-L) between state-of-art models and human performance, suggesting that the data will support significant future work. 2021.acl-long.35 @@ -663,7 +663,7 @@ RabeehKarimi Mahabadi SebastianRuder MostafaDehghani - JamesHenderson + JamesHenderson 565–576 State-of-the-art parameter-efficient fine-tuning methods rely on introducing adapter modules between the layers of a pretrained language model. However, such modules are trained separately for each task and thus do not enable sharing information across tasks. In this paper, we show that we can learn adapter parameters for all layers and tasks by generating them using shared hypernetworks, which condition on task, adapter position, and layer id in a transformer model. This parameter-efficient multi-task learning framework allows us to achieve the best of both worlds by sharing knowledge across tasks via hypernetworks while enabling the model to adapt to each individual task through task-specific adapters. Experiments on the well-known GLUE benchmark show improved performance in multi-task learning while adding only 0.29% parameters per task. We additionally demonstrate substantial performance improvements in few-shot domain generalization across a variety of tasks. Our code is publicly available in https://github.com/rabeehk/hyperformer. 2021.acl-long.47 @@ -700,7 +700,7 @@ Understanding and Countering Stereotypes: A Computational Approach to the Stereotype Content Model - Kathleen C.Fraser + Kathleen C.Fraser IsarNejadgholi SvetlanaKiritchenko 600–616 @@ -756,7 +756,7 @@ BaolinPeng ZhouYu Ying NianWu - Song-ChunZhu + Song-ChunZhu 658–670 Inferring social relations from dialogues is vital for building emotionally intelligent robots to interpret human language better and act accordingly. We model the social network as an And-or Graph, named SocAoG, for the consistency of relations among a group and leveraging attributes as inference cues. Moreover, we formulate a sequential structure prediction task, and propose an \alpha-\beta-\gamma strategy to incrementally parse SocAoG for the dynamic inference upon any incoming utterance: (i) an \alpha process predicting attributes and relations conditioned on the semantics of dialogues, (ii) a \beta process updating the social relations based on related attributes, and (iii) a \gamma process updating individual’s attributes based on interpersonal social relations. Empirical results on DialogRE and MovieGraph show that our model infers social relations more accurately than the state-of-the-art methods. Moreover, the ablation study shows the three processes complement each other, and the case study demonstrates the dynamic relational inference. 2021.acl-long.54 @@ -805,7 +805,7 @@ Increasing Faithfulness in Knowledge-Grounded Dialogue with Controllable Features HannahRashkin DavidReitter - Gaurav SinghTomar + Gaurav SinghTomar DipanjanDas 704–718 Knowledge-grounded dialogue systems are intended to convey information that is based on evidence provided in a given source text. We discuss the challenges of training a generative neural dialogue model for such systems that is controlled to stay faithful to the evidence. Existing datasets contain a mix of conversational responses that are faithful to selected evidence as well as more subjective or chit-chat style responses. We propose different evaluation measures to disentangle these different styles of responses by quantifying the informativeness and objectivity. At training time, additional inputs based on these evaluation measures are given to the dialogue model. At generation time, these additional inputs act as stylistic controls that encourage the model to generate responses that are faithful to the provided evidence. We also investigate the usage of additional controls at decoding time using resampling techniques. In addition to automatic metrics, we perform a human evaluation study where raters judge the output of these controlled generation models to be generally more objective and faithful to the evidence compared to baseline dialogue systems. @@ -848,7 +848,7 @@ WeileChen HuiqiangJiang QianhuiWu - Börje F.Karlsson + Börje F.Karlsson YiGuan 743–753 Neural methods have been shown to achieve high performance in Named Entity Recognition (NER), but rely on costly high-quality labeled data for training, which is not always available across languages. While previous works have shown that unlabeled data in a target language can be used to improve cross-lingual model performance, we propose a novel adversarial approach (AdvPicker) to better leverage such data and further improve results. We design an adversarial learning framework in which an encoder learns entity domain knowledge from labeled source-language data and better shared features are captured via adversarial training - where a discriminator selects less language-dependent target-language data via similarity to the source language. Experimental results on standard benchmark datasets well demonstrate that the proposed method benefits strongly from this data selection process and outperforms existing state-of-the-art methods; without requiring any additional external resources (e.g., gazetteers or via machine translation). @@ -912,7 +912,7 @@ PatrickFernandes DanishPruthi AditiChaudhary - André F. T.Martins + André F. T.Martins GrahamNeubig 788–801 Context-aware machine translation models are designed to leverage contextual information, but often fail to do so. As a result, they inaccurately disambiguate pronouns and polysemous words that require context for resolution. In this paper, we ask several questions: What contexts do human translators use to resolve ambiguous words? Are models paying large amounts of attention to the same context? What if we explicitly train them to do so? To answer these questions, we introduce SCAT (Supporting Context for Ambiguous Translations), a new English-French dataset comprising supporting context words for 14K translations that professional translators found useful for pronoun disambiguation. Using SCAT, we perform an in-depth analysis of the context used to disambiguate, examining positional and lexical characteristics of the supporting words. Furthermore, we measure the degree of alignment between the model’s attention scores and the supporting context from SCAT, and apply a guided attention strategy to encourage agreement between the two. @@ -928,10 +928,10 @@ AdithyaRenduchintala VishravChaudhary NamanGoyal - FranciscoGuzmán + FranciscoGuzmán PascaleFung PhilippKoehn - MonaDiab + MonaDiab 802–812 The scarcity of parallel data is a major obstacle for training high-quality machine translation systems for low-resource languages. Fortunately, some low-resource languages are linguistically related or similar to high-resource languages; these related languages may share many lexical or syntactic structures. In this work, we exploit this linguistic overlap to facilitate translating to and from a low-resource language with only monolingual data, in addition to any parallel data in the related high-resource language. Our method, NMT-Adapt, combines denoising autoencoding, back-translation and adversarial objectives to utilize monolingual data for low-resource adaptation. We experiment on 7 languages from three different language families and show that our technique significantly improves translation into low-resource language compared to other translation baselines. 2021.acl-long.66 @@ -941,9 +941,9 @@ Bilingual Lexicon Induction via Unsupervised Bitext Construction and Word Alignment - HaoyueShi - LukeZettlemoyer - Sida I.Wang + HaoyueShi + LukeZettlemoyer + Sida I.Wang 813–826 Bilingual lexicons map words in one language to their translations in another, and are typically induced by learning linear projections to align monolingual word embedding spaces. In this paper, we show it is possible to produce much higher quality lexicons with methods that combine (1) unsupervised bitext mining and (2) unsupervised word alignment. Directly applying a pipeline that uses recent algorithms for both subproblems significantly improves induced lexicon quality and further gains are possible by learning to filter the resulting lexical entries, with both unsupervised and semi-supervised schemes. Our final model outperforms the state of the art on the BUCC 2020 shared task by 14 F1 points averaged over 12 language pairs, while also providing a more interpretable approach that allows for rich reasoning of word meaning in context. Further analysis of our output and the standard reference lexicons suggests they are of comparable quality, and new benchmarks may be needed to measure further progress on this task. 2021.acl-long.67 @@ -1020,10 +1020,10 @@ <fixed-case>XLPT</fixed-case>-<fixed-case>AMR</fixed-case>: Cross-Lingual Pre-Training via Multi-Task Learning for Zero-Shot <fixed-case>AMR</fixed-case> Parsing and Text Generation DongqinXu - JunhuiLi + JunhuiLi MuhuaZhu MinZhang - GuodongZhou + GuodongZhou 896–907 Due to the scarcity of annotated data, Abstract Meaning Representation (AMR) research is relatively limited and challenging for languages other than English. Upon the availability of English AMR dataset and English-to- X parallel datasets, in this paper we propose a novel cross-lingual pre-training approach via multi-task learning (MTL) for both zeroshot AMR parsing and AMR-to-text generation. Specifically, we consider three types of relevant tasks, including AMR parsing, AMR-to-text generation, and machine translation. We hope that knowledge gained while learning for English AMR parsing and text generation can be transferred to the counterparts of other languages. With properly pretrained models, we explore four different finetuning methods, i.e., vanilla fine-tuning with a single task, one-for-all MTL fine-tuning, targeted MTL fine-tuning, and teacher-studentbased MTL fine-tuning. Experimental results on AMR parsing and text generation of multiple non-English languages demonstrate that our approach significantly outperforms a strong baseline of pre-training approach, and greatly advances the state of the art. In detail, on LDC2020T07 we have achieved 70.45%, 71.76%, and 70.80% in Smatch F1 for AMR parsing of German, Spanish, and Italian, respectively, while for AMR-to-text generation of the languages, we have obtained 25.69, 31.36, and 28.42 in BLEU respectively. We make our code available on github https://github.com/xdqkid/XLPT-AMR. 2021.acl-long.73 @@ -1059,7 +1059,7 @@ A Targeted Assessment of Incremental Processing in Neural Language Models and Humans EthanWilcox PranaliVani - RogerLevy + RogerLevy 939–952 We present a targeted, scaled-up comparison of incremental processing in humans and neural language models by collecting by-word reaction time data for sixteen different syntactic test suites across a range of structural phenomena. Human reaction time data comes from a novel online experimental paradigm called the Interpolated Maze task. We compare human reaction times to by-word probabilities for four contemporary language models, with different architectures and trained on a range of data set sizes. We find that across many phenomena, both humans and language models show increased processing difficulty in ungrammatical sentence regions with human and model ‘accuracy’ scores a la Marvin and Linzen (2018) about equal. However, although language model outputs match humans in direction, we show that models systematically under-predict the difference in magnitude of incremental processing difficulty between grammatical and ungrammatical sentences. Specifically, when models encounter syntactic violations they fail to accurately predict the longer reading times observed in the human data. These results call into question whether contemporary language models are approaching human-like performance for sensitivity to syntactic violations. 2021.acl-long.76 @@ -1083,7 +1083,7 @@ To <fixed-case>POS</fixed-case> Tag or Not to <fixed-case>POS</fixed-case> Tag: The Impact of <fixed-case>POS</fixed-case> Tags on Morphological Learning in Low-Resource Settings - SarahMoeller + SarahMoeller LingLiu MansHulden 966–978 @@ -1096,8 +1096,8 @@ Prosodic segmentation for parsing spoken dialogue ElizabethNielsen - MarkSteedman - SharonGoldwater + MarkSteedman + SharonGoldwater 979–992 Parsing spoken dialogue poses unique difficulties, including disfluencies and unmarked boundaries between sentence-like units. Previous work has shown that prosody can help with parsing disfluent speech (Tran et al. 2018), but has assumed that the input to the parser is already segmented into sentence-like units (SUs), which isn’t true in existing speech applications. We investigate how prosody affects a parser that receives an entire dialogue turn as input (a turn-based model), instead of gold standard pre-segmented SUs (an SU-based model). In experiments on the English Switchboard corpus, we find that when using transcripts alone, the turn-based model has trouble segmenting SUs, leading to worse parse performance than the SU-based model. However, prosody can effectively replace gold standard SU boundaries: with prosody, the turn-based model performs as well as the SU-based model (91.38 vs. 91.06 F1 score, respectively), despite performing two tasks (SU segmentation and parsing) rather than one (parsing alone). Analysis shows that pitch and intensity features are the most important for this corpus, since they allow the model to correctly distinguish an SU boundary from a speech disfluency – a distinction that the model otherwise struggles to make. 2021.acl-long.79 @@ -1132,7 +1132,7 @@ GilsiniaLopez AlexandraOlteanu RobertSim - HannaWallach + HannaWallach 1004–1015 Auditing NLP systems for computational harms like surfacing stereotypes is an elusive goal. Several recent efforts have focused on benchmark datasets consisting of pairs of contrastive sentences, which are often accompanied by metrics that aggregate an NLP system’s behavior on these pairs into measurements of harms. We examine four such benchmarks constructed for two NLP tasks: language modeling and coreference resolution. We apply a measurement modeling lens—originating from the social sciences—to inventory a range of pitfalls that threaten these benchmarks’ validity as measurement models for stereotyping. We find that these benchmarks frequently lack clear articulations of what is being measured, and we highlight a range of ambiguities and unstated assumptions that affect how these benchmarks conceptualize and operationalize stereotyping. 2021.acl-long.81 @@ -1145,8 +1145,8 @@ JustinLovelace DenisNewman-Griffis ShikharVashishth - Jill FainLehman - CarolynRosé + Jill FainLehman + CarolynRosé 1016–1029 Knowledge Graph (KG) completion research usually focuses on densely connected benchmark datasets that are not representative of real KGs. We curate two KG datasets that include biomedical and encyclopedic knowledge and use an existing commonsense KG dataset to explore KG completion in the more realistic setting where dense connectivity is not guaranteed. We develop a deep convolutional network that utilizes textual entity representations and demonstrate that our model outperforms recent KG completion methods in this challenging setting. We find that our model’s performance improvements stem primarily from its robustness to sparsity. We then distill the knowledge from the convolutional network into a student network that re-ranks promising candidate entities. This re-ranking stage leads to further improvements in performance and demonstrates the effectiveness of entity re-ranking for KG completion. 2021.acl-long.82 @@ -1239,7 +1239,7 @@ JeanMaillard VladimirKarpukhin FabioPetroni - Wen-tauYih + Wen-tauYih BarlasOguz VeselinStoyanov GargiGhosh @@ -1255,7 +1255,7 @@ YianZhang AlexWarstadt XiaochengLi - Samuel R.Bowman + Samuel R.Bowman 1112–1125 NLP is currently dominated by language models like RoBERTa which are pretrained on billions of words. But what exact knowledge or skills do Transformer LMs learn from large-scale pretraining that they cannot learn from less data? To explore this question, we adopt five styles of evaluation: classifier probing, information-theoretic probing, unsupervised relative acceptability judgments, unsupervised language model knowledge probing, and fine-tuning on NLU tasks. We then draw learning curves that track the growth of these different measures of model ability with respect to pretraining data volume using the MiniBERTas, a group of RoBERTa models pretrained on 1M, 10M, 100M and 1B words. We find that these LMs require only about 10M to 100M words to learn to reliably encode most syntactic and semantic features we test. They need a much larger quantity of data in order to acquire enough commonsense knowledge and other skills required to master typical downstream NLU tasks. The results suggest that, while the ability to encode linguistic features is almost certainly necessary for language understanding, it is likely that other, unidentified, forms of knowledge are the major drivers of recent improvements in language understanding among large pretrained models. 2021.acl-long.90 @@ -1285,7 +1285,7 @@ JasonPhang HaokunLiu KyunghyunCho - Samuel R.Bowman + Samuel R.Bowman 1141–1158 Recent years have seen numerous NLP datasets introduced to evaluate the performance of fine-tuned models on natural language understanding tasks. Recent results from large pretrained models, though, show that many of these datasets are largely saturated and unlikely to be able to detect further progress. What kind of datasets are still effective at discriminating among strong models, and what kind of datasets should we expect to be able to detect future improvements? To measure this uniformly across datasets, we draw on Item Response Theory and evaluate 29 datasets using predictions from 18 pretrained Transformer models on individual test examples. We find that Quoref, HellaSwag, and MC-TACO are best suited for distinguishing among state-of-the-art models, while SNLI, MNLI, and CommitmentBank seem to be saturated for current strong models. We also observe span selection task format, which is used for QA datasets like QAMR or SQuAD2.0, is effective in differentiating between strong and weak models. 2021.acl-long.92 @@ -1296,7 +1296,7 @@ Uncovering Constraint-Based Behavior in Neural Models via Targeted Fine-Tuning ForrestDavis - Martenvan Schijndel + Martenvan Schijndel 1159–1171 A growing body of literature has focused on detailing the linguistic knowledge embedded in large, pretrained language models. Existing work has shown that non-linguistic biases in models can drive model behavior away from linguistic generalizations. We hypothesized that competing linguistic processes within a language, rather than just non-linguistic model biases, could obscure underlying linguistic knowledge. We tested this claim by exploring a single phenomenon in four languages: English, Chinese, Spanish, and Italian. While human behavior has been found to be similar across languages, we find cross-linguistic variation in model behavior. We show that competing processes in a language act as constraints on model behavior and demonstrate that targeted fine-tuning can re-weight the learned constraints, uncovering otherwise dormant linguistic knowledge in models. Our results suggest that models need to learn both the linguistic constraints in a language and their relative ranking, with mismatches in either producing non-human-like behavior. 2021.acl-long.93 @@ -1309,7 +1309,7 @@ RishabhBhardwaj NavonilMajumder SoujanyaPoria - EduardHovy + EduardHovy 1172–1182 Interpretability is an important aspect of the trustworthiness of a model’s predictions. Transformer’s predictions are widely explained by the attention weights, i.e., a probability distribution generated at its self-attention unit (head). Current empirical studies provide shreds of evidence that attention weights are not explanations by proving that they are not unique. A recent study showed theoretical justifications to this observation by proving the non-identifiability of attention weights. For a given input to a head and its output, if the attention weights generated in it are unique, we call the weights identifiable. In this work, we provide deeper theoretical analysis and empirical observations on the identifiability of attention weights. Ignored in the previous works, we find the attention weights are more identifiable than we currently perceive by uncovering the hidden role of the key vector. However, the weights are still prone to be non-unique attentions that make them unfit for interpretation. To tackle this issue, we provide a variant of the encoder layer that decouples the relationship between key and value vector and provides identifiable weights up to the desired length of the input. We prove the applicability of such variations by providing empirical justifications on varied text classification tasks. The implementations are available at https://github.com/declare-lab/identifiable-transformers. 2021.acl-long.94 @@ -1322,7 +1322,7 @@ XinnuoXu GuoyinWang Young-BumKim - SungjinLee + SungjinLee 1183–1195 Natural Language Generation (NLG) is a key component in a task-oriented dialogue system, which converts the structured meaning representation (MR) to the natural language. For large-scale conversational systems, where it is common to have over hundreds of intents and thousands of slots, neither template-based approaches nor model-based approaches are scalable. Recently, neural NLGs started leveraging transfer learning and showed promising results in few-shot settings. This paper proposes AugNLG, a novel data augmentation approach that combines a self-trained neural retrieval model with a few-shot learned NLU model, to automatically create MR-to-Text data from open-domain texts. The proposed system mostly outperforms the state-of-the-art methods on the FewshotWOZ data in both BLEU and Slot Error Rate. We further confirm improved results on the FewshotSGD data and provide comprehensive analysis results on key components of our system. Our code and data are available at https://github.com/XinnuoXu/AugNLG. 2021.acl-long.95 @@ -1334,7 +1334,7 @@ Can vectors read minds better than experts? Comparing data augmentation strategies for the automated scoring of children’s mindreading ability VenelinKovatchev PhillipSmith - MarkLee + MarkLee RoryDevine 1196–1206 In this paper we implement and compare 7 different data augmentation strategies for the task of automatic scoring of children’s ability to understand others’ thoughts, feelings, and desires (or “mindreading”). We recruit in-domain experts to re-annotate augmented samples and determine to what extent each strategy preserves the original rating. We also carry out multiple experiments to measure how much each augmentation strategy improves the performance of automatic scoring systems. To determine the capabilities of automatic systems to generalize to unseen data, we create UK-MIND-20 - a new corpus of children’s performance on tests of mindreading, consisting of 10,320 question-answer pairs. We obtain a new state-of-the-art performance on the MIND-CA corpus, improving macro-F1-score by 6 points. Results indicate that both the number of training examples and the quality of the augmentation strategies affect the performance of the systems. The task-specific augmentations generally outperform task-agnostic augmentations. Automatic augmentations based on vectors (GloVe, FastText) perform the worst. We find that systems trained on MIND-CA generalize well to UK-MIND-20. We demonstrate that data augmentation strategies also improve the performance on unseen data. @@ -1350,7 +1350,7 @@ BudhadityaDeb GuoqingZheng MiladShokouhi - Ahmed HassanAwadallah + Ahmed HassanAwadallah 1207–1220 Reply suggestion models help users process emails and chats faster. Previous work only studies English reply suggestion. Instead, we present MRS, a multilingual reply suggestion dataset with ten languages. MRS can be used to compare two families of models: 1) retrieval models that select the reply from a fixed set and 2) generation models that produce the reply from scratch. Therefore, MRS complements existing cross-lingual generalization benchmarks that focus on classification and sequence labeling tasks. We build a generation model and a retrieval model as baselines for MRS. The two models have different strengths in the monolingual setting, and they require different strategies to generalize across languages. MRS is publicly available at https://github.com/zhangmozhi/mrs. 2021.acl-long.97 @@ -1365,7 +1365,7 @@ HarshTrivedi AlexWarstadt ClaraVania - Samuel R.Bowman + Samuel R.Bowman 1221–1235 Crowdsourcing is widely used to create data for common natural language understanding tasks. Despite the importance of these datasets for measuring and refining model understanding of language, there has been little focus on the crowdsourcing methods used for collecting the datasets. In this paper, we compare the efficacy of interventions that have been proposed in prior work as ways of improving data quality. We use multiple-choice question answering as a testbed and run a randomized trial by assigning crowdworkers to write questions under one of four different data collection protocols. We find that asking workers to write explanations for their examples is an ineffective stand-alone strategy for boosting NLU example difficulty. However, we find that training crowdworkers, and then using an iterative process of collecting data, sending feedback, and qualifying workers based on expert judgments is an effective means of collecting challenging data. But using crowdsourced, instead of expert judgments, to qualify workers and send feedback does not prove to be effective. We observe that the data from the iterative protocol with expert assessments is more challenging by several measures. Notably, the human–model gap on the unanimous agreement portion of this data is, on average, twice as large as the gap for the baseline protocol data. 2021.acl-long.98 @@ -1382,7 +1382,7 @@ ShangyiNing YanchengHe ChangjianJiang - XuanjingHuang + XuanjingHuang 1236–1246 Ideology of legislators is typically estimated by ideal point models from historical records of votes. It represents legislators and legislation as points in a latent space and shows promising results for modeling voting behavior. However, it fails to capture more specific attitudes of legislators toward emerging issues and is unable to model newly-elected legislators without voting histories. In order to mitigate these two problems, we explore to incorporate both voting behavior and public statements on Twitter to jointly model legislators. In addition, we propose a novel task, namely hashtag usage prediction to model the ideology of legislators on Twitter. In practice, we construct a heterogeneous graph for the legislative context and use relational graph neural networks to learn the representation of legislators with the guidance of historical records of their voting and hashtag usage. Experiment results indicate that our model yields significant improvements for the task of roll call vote prediction. Further analysis further demonstrates that legislator representation we learned captures nuances in statements. 2021.acl-long.99 @@ -1406,7 +1406,7 @@ DanniLiu JanNiehues JamesCross - FranciscoGuzmán + FranciscoGuzmán XianLi 1259–1273 Multilingual neural machine translation has shown the capability of directly translating between language pairs unseen in training, i.e. zero-shot translation. Despite being conceptually attractive, it often suffers from low output quality. The difficulty of generalizing to new translation directions suggests the model representations are highly specific to those language pairs seen in training. We demonstrate that a main factor causing the language-specific representations is the positional correspondence to input tokens. We show that this can be easily alleviated by removing residual connections in an encoder layer. With this modification, we gain up to 18.5 BLEU points on zero-shot translation while retaining quality on supervised directions. The improvements are particularly prominent between related languages, where our proposed model outperforms pivot-based translation. Moreover, our approach allows easy integration of new languages, which substantially expands translation coverage. By thorough inspections of the hidden layer outputs, we show that our approach indeed leads to more language-independent representations. @@ -1418,7 +1418,7 @@ Common Sense Beyond <fixed-case>E</fixed-case>nglish: Evaluating and Improving Multilingual Language Models for Commonsense Reasoning - Bill YuchenLin + Bill YuchenLin SeyeonLee XiaoyangQiao XiangRen @@ -1447,7 +1447,7 @@ Diverse Pretrained Context Encodings Improve Document Translation DomenicDonato LeiYu - ChrisDyer + ChrisDyer 1299–1311 We propose a new architecture for adapting a sentence-level sequence-to-sequence transformer by incorporating multiple pre-trained document context signals and assess the impact on translation performance of (1) different pretraining approaches for generating these signals, (2) the quantity of parallel data for which document context is available, and (3) conditioning on source, target, or source and target contexts. Experiments on the NIST Chinese-English, and IWSLT and WMT English-German tasks support four general conclusions: that using pre-trained context representations markedly improves sample efficiency, that adequate parallel data resources are crucial for learning to use document context, that jointly conditioning on multiple context representations outperforms any single representation, and that source context is more valuable for translation performance than target side context. Our best multi-context model consistently outperforms the best existing context-aware transformers. 2021.acl-long.104 @@ -1461,7 +1461,7 @@ SarveshMehtani VaidehiPatil AbhijeetAwasthi - ParthaTalukdar + ParthaTalukdar SunitaSarawagi 1312–1323 Recent research in multilingual language models (LM) has demonstrated their ability to effectively handle multiple languages in a single model. This holds promise for low web-resource languages (LRL) as multilingual models can enable transfer of supervision from high resource languages to LRLs. However, incorporating a new language in an LM still remains a challenge, particularly for languages with limited corpora and in unseen scripts. In this paper we argue that relatedness among languages in a language family may be exploited to overcome some of the corpora limitations of LRLs, and propose RelateLM. We focus on Indian languages, and exploit relatedness along two dimensions: (1) script (since many Indic scripts originated from the Brahmic script), and (2) sentence structure. RelateLM uses transliteration to convert the unseen script of limited LRL text into the script of a Related Prominent Language (RPL) (Hindi in our case). While exploiting similar sentence structures, RelateLM utilizes readily available bilingual dictionaries to pseudo translate RPL text into LRL corpora. Experiments on multiple real-world benchmark datasets provide validation to our hypothesis that using a related language as pivot, along with transliteration and pseudo translation based data augmentation, can be an effective way to adapt LMs for LRLs, rather than direct training or pivoting through English. @@ -1485,7 +1485,7 @@ Towards Argument Mining for Social Good: A Survey - Eva MariaVecchi + Eva MariaVecchi NeeleFalk ImanJundi GabriellaLapesa @@ -1540,7 +1540,7 @@ Select, Extract and Generate: Neural Keyphrase Generation with Layer-wise Coverage Attention - WasiAhmad + WasiAhmad XiaoBai SoominLee Kai-WeiChang @@ -1595,7 +1595,7 @@ Lya HulliyyatusSuadaa HidetakaKamigaito KotaroFunakoshi - ManabuOkumura + ManabuOkumura HiroyaTakamura 1451–1465 Recent neural text generation models have shown significant improvement in generating descriptive text from structured data such as table formats. One of the remaining important challenges is generating more analytical descriptions that can be inferred from facts in a data source. The use of a template-based generator and a pointer-generator is among the potential alternatives for table-to-text generators. In this paper, we propose a framework consisting of a pre-trained model and a copy mechanism. The pre-trained models are fine-tuned to produce fluent text that is enriched with numerical reasoning. However, it still lacks fidelity to the table contents. The copy mechanism is incorporated in the fine-tuning step by using general placeholders to avoid producing hallucinated phrases that are not supported by a table while preserving high fluency. In summary, our contributions are (1) a new dataset for numerical table-to-text generation using pairs of a table and a paragraph of a table description with richer inference from scientific papers, and (2) a table-to-text generation framework enriched with numerical reasoning. @@ -1651,10 +1651,10 @@ KhalilMrini FranckDernoncourt SeunghyunYoon - TrungBui + TrungBui WalterChang EmiliaFarcas - NdapaNakashole + NdapaNakashole 1505–1515 Users of medical question answering systems often submit long and detailed questions, making it hard to achieve high recall in answer retrieval. To alleviate this problem, we propose a novel Multi-Task Learning (MTL) method with data augmentation for medical question understanding. We first establish an equivalence between the tasks of question summarization and Recognizing Question Entailment (RQE) using their definitions in the medical domain. Based on this equivalence, we propose a data augmentation algorithm to use just one dataset to optimize for both tasks, with a weighted MTL loss. We introduce gradually soft parameter-sharing: a constraint for decoder parameters to be close, that is gradually loosened as we move to the highest layer. We show through ablation studies that our proposed novelties improve performance. Our method outperforms existing MTL methods across 4 datasets of medical question pairs, in ROUGE scores, RQE accuracy and human evaluation. Finally, we show that our method fares better than single-task learning under 4 low-resource settings. 2021.acl-long.119 @@ -1745,8 +1745,8 @@ NedimLipka FranckDernoncourt VladMorariu - VarunManjunatha - DouglasOard + VarunManjunatha + DouglasOard PhilipResnik HenningWachsmuth 1583–1595 @@ -1804,8 +1804,8 @@ ZidMancenido JulieCohen HeatherHill - DanJurafsky - TatsunoriHashimoto + DanJurafsky + TatsunoriHashimoto 1638–1653 In conversation, uptake happens when a speaker builds on the contribution of their interlocutor by, for example, acknowledging, repeating or reformulating what they have said. In education, teachers’ uptake of student contributions has been linked to higher student achievement. Yet measuring and improving teachers’ uptake at scale is challenging, as existing methods require expensive annotation by experts. We propose a framework for computationally measuring uptake, by (1) releasing a dataset of student-teacher exchanges extracted from US math classroom transcripts annotated for uptake by experts; (2) formalizing uptake as pointwise Jensen-Shannon Divergence (pJSD), estimated via next utterance classification; (3) conducting a linguistically-motivated comparison of different unsupervised measures and (4) correlating these measures with educational outcomes. We find that although repetition captures a significant part of uptake, pJSD outperforms repetition-based baselines, as it is capable of identifying a wider range of uptake phenomena like question answering and reformulation. We apply our uptake measure to three different educational datasets with outcome indicators. Unlike baseline measures, pJSD correlates significantly with instruction quality in all three, providing evidence for its generalizability and for its potential to serve as an automated professional development tool for teachers. 2021.acl-long.130 @@ -1817,7 +1817,7 @@ A Survey of Code-switching: Linguistic and Social Perspectives for Language Technologies A. SezaDoğruöz SunayanaSitaram - Barbara E.Bullock + Barbara E.Bullock Almeida JacquelineToribio 1654–1666 The analysis of data in which multiple languages are represented has gained popularity among computational linguists in recent years. So far, much of this research focuses mainly on the improvement of computational methods and largely ignores linguistic and social aspects of C-S discussed across a wide range of languages within the long-established literature in linguistics. To fill this gap, we offer a survey of code-switching (C-S) covering the literature in linguistics with a reflection on the key issues in language technologies. From the linguistic perspective, we provide an overview of structural and functional patterns of C-S focusing on the literature from European and Indian contexts as highly multilingual areas. From the language technologies perspective, we discuss how massive language models fail to represent diverse C-S types due to lack of appropriate training data, lack of robust evaluation benchmarks for C-S (across multilingual situations and types of C-S) and lack of end-to- end systems that cover sociolinguistic aspects of C-S as well. Our survey will be a step to- wards an outcome of mutual benefit for computational scientists and linguists with a shared interest in multilingualism and C-S. @@ -1830,7 +1830,7 @@ Learning from the Worst: Dynamically Generated Datasets to Improve Online Hate Detection BertieVidgen TristanThrush - ZeerakWaseem + ZeerakWaseem DouweKiela 1667–1682 We present a human-and-model-in-the-loop process for dynamically generating datasets and training better performing and more robust hate detection models. We provide a new dataset of 40,000 entries, generated and labelled by trained annotators over four rounds of dynamic data creation. It includes 15,000 challenging perturbations and each hateful entry has fine-grained labels for the type and target of hate. Hateful entries make up 54% of the dataset, which is substantially higher than comparable datasets. We show that model performance is substantially improved using this approach. Models trained on later rounds of data collection perform better on test sets and are harder for annotators to trick. They also have better performance on HateCheck, a suite of functional tests for online hate detection. We provide the code, dataset and annotation guidelines for other researchers to use. @@ -1847,10 +1847,10 @@ RevanthGangi Reddy SandeepPolisetty HengJi - Shih-FuChang - KathleenMcKeown + Shih-FuChang + KathleenMcKeown MohitBansal - AviSil + AviSil 1683–1698 To defend against machine-generated fake news, an effective mechanism is urgently needed. We contribute a novel benchmark for fake news detection at the knowledge element level, as well as a solution for this task which incorporates cross-media consistency checking to detect the fine-grained knowledge elements making news articles misinformative. Due to training data scarcity, we also formulate a novel data synthesis method by manipulating knowledge elements within the knowledge graph to generate noisy training data with specific, hard to detect, known inconsistencies. Our detection approach outperforms the state-of-the-art (up to 16.8% accuracy gain), and more critically, yields fine-grained explanations. 2021.acl-long.133 @@ -1890,7 +1890,7 @@ JunXu ZeyangLei HaifengWang - Zheng-YuNiu + Zheng-YuNiu HuaWu WanxiangChe 1726–1739 @@ -1995,7 +1995,7 @@ Implicit Representations of Meaning in Neural Language Models - Belinda Z.Li + Belinda Z.Li MaxwellNye JacobAndreas 1813–1827 @@ -2010,7 +2010,7 @@ MatthewFinlayson AaronMueller SebastianGehrmann - StuartShieber + StuartShieber TalLinzen YonatanBelinkov 1828–1843 @@ -2052,7 +2052,7 @@ Poisoning Knowledge Graph Embeddings via Relation Inference Patterns PeruBhardwaj - JohnKelleher + JohnKelleher LucaCostabello DeclanO’Sullivan 1875–1888 @@ -2078,7 +2078,7 @@ A Survey of Race, Racism, and Anti-Racism in <fixed-case>NLP</fixed-case> AnjalieField Su LinBlodgett - ZeerakWaseem + ZeerakWaseem YuliaTsvetkov 1905–1925 Despite inextricable ties between race and language, little work has considered race in NLP research and development. In this work, we survey 79 papers from the ACL anthology that mention race. These papers reveal various types of race-related bias in all stages of NLP model development, highlighting the need for proactive consideration of how NLP systems can uphold racial hierarchies. However, persistent gaps in research on race and NLP remain: race has been siloed as a niche topic and remains ignored in many NLP tasks; most work operationalizes race as a fixed single-dimensional variable with a ground-truth label, which risks reinforcing differences produced by historical racism; and the voices of historically marginalized people are nearly absent in NLP literature. By identifying where and how NLP literature has and has not considered race, especially in comparison to related fields, our work calls for inclusion and racial justice in NLP research practices. @@ -2145,8 +2145,8 @@ <fixed-case>UXLA</fixed-case>: A Robust Unsupervised Data Augmentation Framework for Zero-Resource Cross-Lingual <fixed-case>NLP</fixed-case> M SaifulBari - TasnimMohiuddin - ShafiqJoty + TasnimMohiuddin + ShafiqJoty 1978–1992 Transfer learning has yielded state-of-the-art (SoTA) results in many supervised NLP tasks. However, annotated data for every target task in every target language is rare, especially for low-resource languages. We propose UXLA, a novel unsupervised data augmentation framework for zero-resource transfer learning scenarios. In particular, UXLA aims to solve cross-lingual adaptation problems from a source language task distribution to an unknown target language task distribution, assuming no training label in the target language. At its core, UXLA performs simultaneous self-training with data augmentation and unsupervised sample selection. To show its effectiveness, we conduct extensive experiments on three diverse zero-resource cross-lingual transfer tasks. UXLA achieves SoTA results in all the tasks, outperforming the baselines by a good margin. With an in-depth framework dissection, we demonstrate the cumulative contributions of different components to its success. 2021.acl-long.154 @@ -2161,7 +2161,7 @@ YuBao MingxuanWang LinQiu - WeinanZhang + WeinanZhang YongYu LeiLi 1993–2003 @@ -2219,7 +2219,7 @@ <fixed-case>PIGL</fixed-case>e<fixed-case>T</fixed-case>: Language Grounding Through Neuro-Symbolic Interaction in a 3<fixed-case>D</fixed-case> World RowanZellers AriHoltzman - MatthewPeters + MatthewPeters RoozbehMottaghi AniruddhaKembhavi AliFarhadi @@ -2286,7 +2286,7 @@ WenjieZi KeyiTang ChenyangHuang - Jackie Chi KitCheung + Jackie Chi KitCheung Simon J.D.Prince YanshuaiCao 2089–2102 @@ -2298,7 +2298,7 @@ <fixed-case>BERTAC</fixed-case>: Enhancing Transformer-based Language Models with Adversarially Pretrained Convolutional Neural Networks - Jong-HoonOh + Jong-HoonOh RyuIida JulienKloetzer KentaroTorisawa @@ -2325,10 +2325,10 @@ Explaining Relationships Between Scientific Documents KelvinLuu XinyiWu - RikKoncel-Kedziorski + RikKoncel-Kedziorski KyleLo IsabelCachola - Noah A.Smith + Noah A.Smith 2130–2144 We address the task of explaining relationships between two scientific documents using natural language text. This task requires modeling the complex content of long technical documents, deducing a relationship between these documents, and expressing the details of that relationship in text. In addition to the theoretical interest of this task, successful solutions can help improve researcher efficiency in search and review. In this paper we establish a dataset of 622K examples from 154K documents. We pretrain a large language model to serve as the foundation for autoregressive approaches to the task. We explore the impact of taking different views on the two documents, including the use of dense representations extracted with scientific IE systems. We provide extensive automatic and human evaluations which show the promise of such models, but make clear challenges for future work. 2021.acl-long.166 @@ -2583,7 +2583,7 @@ Style is <fixed-case>NOT</fixed-case> a single variable: Case Studies for Cross-Stylistic Language Understanding DongyeopKang - EduardHovy + EduardHovy 2376–2387 Every natural text is written in some style. Style is formed by a complex combination of different stylistic factors, including formality markers, emotions, metaphors, etc. One cannot form a complete understanding of a text without considering these factors. The factors combine and co-vary in complex ways to form styles. Studying the nature of the covarying combinations sheds light on stylistic language in general, sometimes called cross-style language understanding. This paper provides the benchmark corpus (XSLUE) that combines existing datasets and collects a new one for sentence-level cross-style language understanding and evaluation. The benchmark contains text in 15 different styles under the proposed four theoretical groupings: figurative, personal, affective, and interpersonal groups. For valid evaluation, we collect an additional diagnostic set by annotating all 15 styles on the same text. Using XSLUE, we propose three interesting cross-style applications in classification, correlation, and generation. First, our proposed cross-style classifier trained with multiple styles together helps improve overall classification performance against individually-trained style classifiers. Second, our study shows that some styles are highly dependent on each other in human-written text. Finally, we find that combinations of some contradictive styles likely generate stylistically less appropriate text. We believe our benchmark and case studies help explore interesting future directions for cross-style research. The preprocessed datasets and code are publicly available. 2021.acl-long.185 @@ -2690,7 +2690,7 @@ Comprehensive Study: How the Context Information of Different Granularity Affects Dialogue State Tracking? PuhaiYang - HeyanHuang + HeyanHuang Xian-LingMao 2481–2491 Dialogue state tracking (DST) plays a key role in task-oriented dialogue systems to monitor the user’s goal. In general, there are two strategies to track a dialogue state: predicting it from scratch and updating it from previous state. The scratch-based strategy obtains each slot value by inquiring all the dialogue history, and the previous-based strategy relies on the current turn dialogue to update the previous dialogue state. However, it is hard for the scratch-based strategy to correctly track short-dependency dialogue state because of noise; meanwhile, the previous-based strategy is not very useful for long-dependency dialogue state tracking. Obviously, it plays different roles for the context information of different granularity to track different kinds of dialogue states. Thus, in this paper, we will study and discuss how the context information of different granularity affects dialogue state tracking. First, we explore how greatly different granularities affect dialogue state tracking. Then, we further discuss how to combine multiple granularities for dialogue state tracking. Finally, we apply the findings about context granularity to few-shot learning scenario. Besides, we have publicly released all codes. @@ -2702,7 +2702,7 @@ <fixed-case>OTT</fixed-case>ers: One-turn Topic Transitions for Open-Domain Dialogue KarinSevegnani - David M.Howcroft + David M.Howcroft IoannisKonstas VerenaRieser 2492–2504 @@ -2937,7 +2937,7 @@ SohiSudhir PushkarMishra HelenYannakoudakis - Saif M.Mohammad + Saif M.Mohammad EkaterinaShutova 2700–2717 On social media platforms, hateful and offensive language negatively impact the mental well-being of users and the participation of people from diverse backgrounds. Automatic methods to detect offensive language have largely relied on datasets with categorical labels. However, comments can vary in their degree of offensiveness. We create the first dataset of English language Reddit comments that has fine-grained, real-valued scores between -1 (maximally supportive) and 1 (maximally offensive). The dataset was annotated using Best–Worst Scaling, a form of comparative annotation that has been shown to alleviate known biases of using rating scales. We show that the method produces highly reliable offensiveness scores. Finally, we evaluate the ability of widely-used neural models to predict offensiveness scores on this new dataset. @@ -2966,7 +2966,7 @@ Assessing the Representations of Idiomaticity in Vector Models with a Noun Compound Dataset Labeled at Type and Token Levels MarcosGarcia TiagoKramer Vieira - CarolinaScarton + CarolinaScarton MarcoIdiart AlineVillavicencio 2730–2741 @@ -3021,7 +3021,7 @@ XinyinMa ZeqiTan ShuaiZhang - WenWang + WenWang WeimingLu 2782–2794 Named entity recognition (NER) is a well-studied task in natural language processing. Traditional NER research only deals with flat entities and ignores nested entities. The span-based methods treat entity recognition as a span classification task. Although these methods have the innate ability to handle nested NER, they suffer from high computational cost, ignorance of boundary information, under-utilization of the spans that partially match with entities, and difficulties in long entity recognition. To tackle these issues, we propose a two-stage entity identifier. First we generate span proposals by filtering and boundary regression on the seed spans to locate the entities, and then label the boundary-adjusted span proposals with the corresponding categories. Our method effectively utilizes the boundary information of entities and partially matched spans during training. Through boundary regression, entities of any length can be covered theoretically, which improves the ability to recognize long entities. In addition, many low-quality seed spans are filtered out in the first stage, which reduces the time complexity of inference. Experiments on nested NER datasets demonstrate that our proposed method outperforms previous state-of-the-art models. @@ -3113,12 +3113,12 @@ Breaking the Corpus Bottleneck for Context-Aware Neural Machine Translation with Cross-Task Pre-training LinqingChen - JunhuiLi - ZhengxianGong + JunhuiLi + ZhengxianGong BoxingChen WeihuaLuo MinZhang - GuodongZhou + GuodongZhou 2851–2861 Context-aware neural machine translation (NMT) remains challenging due to the lack of large-scale document-level parallel corpora. To break the corpus bottleneck, in this paper we aim to improve context-aware NMT by taking the advantage of the availability of both large-scale sentence-level parallel dataset and source-side monolingual documents. To this end, we propose two pre-training tasks. One learns to translate a sentence from source language to target language on the sentence-level parallel dataset while the other learns to translate a document from deliberately noised to original on the monolingual documents. Importantly, the two pre-training tasks are jointly and simultaneously learned via the same model, thereafter fine-tuned on scale-limited parallel documents from both sentence-level and document-level perspectives. Experimental results on four translation tasks show that our approach significantly improves translation performance. One nice property of our approach is that the fine-tuned model can be used to translate both sentences and documents. 2021.acl-long.222 @@ -3150,7 +3150,7 @@ MarcoGaido AlinaKarakanta AlbertoMartinelli - MatteoNegri + MatteoNegri MarcoTurchi 2873–2887 Five years after the first published proofs of concept, direct approaches to speech translation (ST) are now competing with traditional cascade solutions. In light of this steady progress, can we claim that the performance gap between the two is closed? Starting from this question, we present a systematic comparison between state-of-the-art systems representative of the two paradigms. Focusing on three language directions (English-German/Italian/Spanish), we conduct automatic and manual evaluations, exploiting high-quality professional post-edits and annotations. Our multi-faceted analysis on one of the few publicly available ST benchmarks attests for the first time that: i) the gap between the two paradigms is now closed, and ii) the subtle differences observed in their behavior are not sufficient for humans neither to distinguish them nor to prefer one over the other. @@ -3296,10 +3296,10 @@ Learning Syntactic Dense Embedding with Correlation Graph for Automatic Readability Assessment - XinyingQiu + XinyingQiu YuanChen HanwuChen - Jian-YunNie + Jian-YunNie YumingShen DaweiLu 3013–3025 @@ -3403,7 +3403,7 @@ <fixed-case>O</fixed-case>nline <fixed-case>L</fixed-case>earning Meets <fixed-case>M</fixed-case>achine <fixed-case>T</fixed-case>ranslation Evaluation: Finding the Best Systems with the Least Human Effort VâniaMendonça RicardoRei - LuisaCoheur + LuisaCoheur AlbertoSardinha Ana LúciaSantos 3105–3117 @@ -3470,16 +3470,16 @@ Annotating Online Misogyny PhilineZeinert NannaInie - LeonDerczynski + LeonDerczynski 3181–3197 Online misogyny, a category of online abusive language, has serious and harmful social consequences. Automatic detection of misogynistic language online, while imperative, poses complicated challenges to both data gathering, data annotation, and bias mitigation, as this type of data is linguistically complex and diverse. This paper makes three contributions in this area: Firstly, we describe the detailed design of our iterative annotation process and codebook. Secondly, we present a comprehensive taxonomy of labels for annotating misogyny in natural written language, and finally, we introduce a high-quality dataset of annotated posts sampled from social media posts. 2021.acl-long.247 10.18653/v1/2021.acl-long.247 zeinert-etal-2021-annotating Few-<fixed-case>NERD</fixed-case>: A Few-shot Named Entity Recognition Dataset @@ -3541,7 +3541,7 @@ Joint Models for Answer Verification in Question Answering Systems ZeyuZhang - ThuyVu + ThuyVu AlessandroMoschitti 3252–3262 This paper studies joint models for selecting correct answer sentences among the top k provided by answer sentence selection (AS2) modules, which are core components of retrieval-based Question Answering (QA) systems. Our work shows that a critical step to effectively exploiting an answer set regards modeling the interrelated information between pair of answers. For this purpose, we build a three-way multi-classifier, which decides if an answer supports, refutes, or is neutral with respect to another one. More specifically, our neural architecture integrates a state-of-the-art AS2 module with the multi-classifier, and a joint layer connecting all components. We tested our models on WikiQA, TREC-QA, and a real-world dataset. The results show that our models obtain the new state of the art in AS2. @@ -3555,7 +3555,7 @@ YifanGao HenghuiZhu PatrickNg - CiceroNogueira dos Santos + CiceroNogueira dos Santos ZhiguoWang FengNan DejiaoZhang @@ -3578,7 +3578,7 @@ ShuoZhang JianchengLv FuliFeng - Tat-SengChua + Tat-SengChua 3277–3287 Hybrid data combining both tabular and textual content (e.g., financial reports) are quite pervasive in the real world. However, Question Answering (QA) over such hybrid data is largely neglected in existing research. In this work, we extract samples from real financial reports to build a new large-scale QA dataset containing both Tabular And Textual data, named TAT-QA, where numerical reasoning is usually required to infer the answer, such as addition, subtraction, multiplication, division, counting, comparison/sorting, and the compositions. We further propose a novel QA model termed TAGOP, which is capable of reasoning over both tables and text. It adopts sequence tagging to extract relevant cells from the table along with relevant spans from the text to infer their semantics, and then applies symbolic reasoning over them with a set of aggregation operators to arrive at the final answer. TAGOP achieves 58.0% inF1, which is an 11.1% absolute increase over the previous best baseline model, according to our experiments on TAT-QA. But this result still lags far behind performance of expert human, i.e.90.8% in F1. It is demonstrated that our TAT-QA is very challenging and can serve as a benchmark for training and testing powerful QA models that address hybrid form data. 2021.acl-long.254 @@ -3698,7 +3698,7 @@ JeremyBarnes RobinKurtz StephanOepen - LiljaØvrelid + LiljaØvrelid ErikVelldal 3387–3402 Structured sentiment analysis attempts to extract full opinion tuples from a text, but over time this task has been subdivided into smaller and smaller sub-tasks, e.g., target extraction or targeted polarity classification. We argue that this division has become counterproductive and propose a new unified framework to remedy the situation. We cast the structured sentiment problem as dependency graph parsing, where the nodes are spans of sentiment holders, targets and expressions, and the arcs are the relations between them. We perform experiments on five datasets in four languages (English, Norwegian, Basque, and Catalan) and show that this approach leads to strong improvements over state-of-the-art baselines. Our analysis shows that refining the sentiment graphs with syntactic dependency information further improves results. @@ -3733,7 +3733,7 @@ BoZheng ShaohanHuang Xian-LingMao - HeyanHuang + HeyanHuang FuruWei 3418–3430 The cross-lingual language models are typically pretrained with masked language modeling on multilingual text or parallel sentences. In this paper, we introduce denoising word alignment as a new cross-lingual pre-training task. Specifically, the model first self-label word alignments for parallel sentences. Then we randomly mask tokens in a bitext pair. Given a masked token, the model uses a pointer network to predict the aligned token in the other language. We alternately perform the above two steps in an expectation-maximization manner. Experimental results show that our method improves cross-lingual transferability on various datasets, especially on the token-level tasks, such as question answering, and structured prediction. Moreover, the model can serve as a pretrained word aligner, which achieves reasonably low error rate on the alignment benchmarks. The code and pretrained parameters are available at github.com/CZWin32768/XLM-Align. @@ -3870,7 +3870,7 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO RunxinXu TianyuLiu LeiLi - BaobaoChang + BaobaoChang 3533–3546 Document-level event extraction aims to recognize event information from a whole piece of article. Existing methods are not effective due to two challenges of this task: a) the target event arguments are scattered across sentences; b) the correlation among events in a document is non-trivial to model. In this paper, we propose Heterogeneous Graph-based Interaction Model with a Tracker (GIT) to solve the aforementioned two challenges. For the first challenge, GIT constructs a heterogeneous graph interaction network to capture global interactions among different sentences and entity mentions. For the second, GIT introduces a Tracker module to track the extracted events and hence capture the interdependency among the events. Experiments on a large-scale dataset (Zheng et al, 2019) show GIT outperforms the previous methods by 2.8 F1. Further analysis reveals is effective in extracting multiple correlated events and event arguments that scatter across the document. 2021.acl-long.274 @@ -3882,7 +3882,7 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO Nested Named Entity Recognition via Explicitly Excluding the Influence of the Best Path YiranWang HiroyukiShindo - YujiMatsumoto + YujiMatsumoto TaroWatanabe 3547–3557 This paper presents a novel method for nested named entity recognition. As a layered method, our method extends the prior second-best path recognition method by explicitly excluding the influence of the best path. Our method maintains a set of hidden states at each time step and selectively leverages them to build a different potential function for recognition at each level. In addition, we demonstrate that recognizing innermost entities first results in better performance than the conventional outermost entities first scheme. We provide extensive experimental results on ACE2004, ACE2005, and GENIA datasets to show the effectiveness and efficiency of our proposed method. @@ -3937,8 +3937,8 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO Superbizarre Is Not Superb: Derivational Morphology Improves <fixed-case>BERT</fixed-case>’s Interpretation of Complex Words ValentinHofmann - JanetPierrehumbert - HinrichSchütze + JanetPierrehumbert + HinrichSchütze 3594–3608 How does the input segmentation of pretrained language models (PLMs) affect their interpretations of complex words? We present the first study investigating this question, taking BERT as the example PLM and focusing on its semantic representations of English derivatives. We show that PLMs can be interpreted as serial dual-route models, i.e., the meanings of complex words are either stored or else need to be computed from the subwords, which implies that maximally meaningful input tokens should allow for the best generalization on new words. This hypothesis is confirmed by a series of semantic probing tasks on which DelBERT (Derivation leveraging BERT), a model with derivational input segmentation, substantially outperforms BERT with WordPiece segmentation. Our results suggest that the generalization capabilities of PLMs could be further improved if a morphologically-informed vocabulary of input tokens were used. 2021.acl-long.279 @@ -3949,9 +3949,9 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO <fixed-case>BERT</fixed-case> is to <fixed-case>NLP</fixed-case> what <fixed-case>A</fixed-case>lex<fixed-case>N</fixed-case>et is to <fixed-case>CV</fixed-case>: Can Pre-Trained Language Models Identify Analogies? AsahiUshio - LuisEspinosa Anke + LuisEspinosa Anke StevenSchockaert - JoseCamacho-Collados + JoseCamacho-Collados 3609–3624 Analogies play a central role in human commonsense reasoning. The ability to recognize analogies such as “eye is to seeing what ear is to hearing”, sometimes referred to as analogical proportions, shape how we structure knowledge and understand language. Surprisingly, however, the task of identifying such analogies has not yet received much attention in the language model era. In this paper, we analyze the capabilities of transformer-based language models on this unsupervised task, using benchmarks obtained from educational settings, as well as more commonly used datasets. We find that off-the-shelf language models can identify analogies to a certain extent, but struggle with abstract and complex relations, and results are highly sensitive to model architecture and hyperparameters. Overall the best results were obtained with GPT-2 and RoBERTa, while configurations using BERT were not able to outperform word embedding models. Our results raise important questions for future work about how, and to what extent, pre-trained language models capture knowledge about abstract semantic relations. 2021.acl-long.280 @@ -4066,8 +4066,8 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO Structural Guidance for Transformer Language Models PengQian TahiraNaseem - RogerLevy - RamónFernandez Astudillo + RogerLevy + RamónFernandez Astudillo 3735–3745 Transformer-based language models pre-trained on large amounts of text data have proven remarkably successful in learning generic transferable linguistic representations. Here we study whether structural guidance leads to more human-like systematic linguistic generalization in Transformer language models without resorting to pre-training on very large amounts of data. We explore two general ideas. The “Generative Parsing” idea jointly models the incremental parse and word sequence as part of the same sequence modeling task. The “Structural Scaffold” idea guides the language model’s representation via additional structure loss that separately predicts the incremental constituency parse. We train the proposed models along with a vanilla Transformer language model baseline on a 14 million-token and a 46 million-token subset of the BLLIP dataset, and evaluate models’ syntactic generalization performances on SG Test Suites and sized BLiMP. Experiment results across two benchmarks suggest converging evidence that generative structural supervisions can induce more robust and humanlike linguistic generalization in Transformer language models without the need for data intensive pre-training. 2021.acl-long.289 @@ -4092,7 +4092,7 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO <fixed-case>C</fixed-case>og<fixed-case>A</fixed-case>lign: Learning to Align Textual Neural Representations to Cognitive Language Processing Signals YuqiRen - DeyiXiong + DeyiXiong 3758–3769 Most previous studies integrate cognitive language processing signals (e.g., eye-tracking or EEG data) into neural models of natural language processing (NLP) just by directly concatenating word embeddings with cognitive features, ignoring the gap between the two modalities (i.e., textual vs. cognitive) and noise in cognitive features. In this paper, we propose a CogAlign approach to these issues, which learns to align textual neural representations to cognitive features. In CogAlign, we use a shared encoder equipped with a modality discriminator to alternatively encode textual and cognitive inputs to capture their differences and commonalities. Additionally, a text-aware attention mechanism is proposed to detect task-related information and to avoid using noise in cognitive features. Experimental results on three NLP tasks, namely named entity recognition, sentiment analysis and relation extraction, show that CogAlign achieves significant improvements with multiple cognitive features over state-of-the-art models on public datasets. Moreover, our model is able to transfer cognitive information to other datasets that do not have any cognitive processing signals. 2021.acl-long.291 @@ -4119,7 +4119,7 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO NoahConstant MandyGuo GirishKumar - DavidUthus + DavidUthus ZaranaParekh 3786–3800 We present a novel approach to the problem of text style transfer. Unlike previous approaches requiring style-labeled training data, our method makes use of readily-available unlabeled text by relying on the implicit connection in style between adjacent sentences, and uses labeled data only at inference time. We adapt T5 (Raffel et al., 2020), a strong pretrained text-to-text model, to extract a style vector from text and use it to condition the decoder to perform style transfer. As our label-free training results in a style vector space encoding many facets of style, we recast transfers as “targeted restyling” vector operations that adjust specific attributes of the input while preserving others. We demonstrate that training on unlabeled Amazon reviews data results in a model that is competitive on sentiment transfer, even compared to models trained fully on labeled data. Furthermore, applying our novel method to a diverse corpus of unlabeled web text results in a single model capable of transferring along multiple dimensions of style (dialect, emotiveness, formality, politeness, sentiment) despite no additional training and using only a handful of exemplars at inference time. @@ -4211,8 +4211,8 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO SurajNair PetraGaluscakova RuiZhang - DouglasOard - KathleenMcKeown + DouglasOard + KathleenMcKeown 3881–3895 This paper proposes an approach to cross-language sentence selection in a low-resource setting. It uses data augmentation and negative sampling techniques on noisy parallel sentence data to directly learn a cross-lingual embedding-based query relevance model. Results show that this approach performs as well as or better than multiple state-of-the-art machine translation + monolingual retrieval systems trained on the same parallel data. Moreover, when a rationale training secondary objective is applied to encourage the model to match word alignment hints from a phrase-based statistical machine translation model, consistent improvements are seen across three language pairs (English-Somali, English-Swahili and English-Tagalog) over a variety of state-of-the-art baselines. 2021.acl-long.300 @@ -4246,8 +4246,8 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO <fixed-case>ABCD</fixed-case>: A Graph Framework to Convert Complex Sentences to a Covering Set of Simple Sentences YanjunGao - Ting-HaoHuang - Rebecca J.Passonneau + Ting-HaoHuang + Rebecca J.Passonneau 3919–3931 Atomic clauses are fundamental text units for understanding complex sentences. Identifying the atomic sentences within complex sentences is important for applications such as summarization, argument mining, discourse analysis, discourse parsing, and question answering. Previous work mainly relies on rule-based methods dependent on parsing. We propose a new task to decompose each complex sentence into simple sentences derived from the tensed clauses in the source, and a novel problem formulation as a graph edit task. Our neural model learns to Accept, Break, Copy or Drop elements of a graph that combines word adjacency and grammatical dependencies. The full processing pipeline includes modules for graph construction, graph editing, and sentence generation from the output graph. We introduce DeSSE, a new dataset designed to train and evaluate complex sentence decomposition, and MinWiki, a subset of MinWikiSplit. ABCD achieves comparable performance as two parsing baselines on MinWiki. On DeSSE, which has a more even balance of complex sentence types, our model achieves higher accuracy on the number of atomic sentences than an encoder-decoder baseline. Results include a detailed error analysis. 2021.acl-long.303 @@ -4272,7 +4272,7 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO Adversarial Learning for Discourse Rhetorical Structure Parsing LongyinZhang FangKong - GuodongZhou + GuodongZhou 3946–3957 Text-level discourse rhetorical structure (DRS) parsing is known to be challenging due to the notorious lack of training data. Although recent top-down DRS parsers can better leverage global document context and have achieved certain success, the performance is still far from perfect. To our knowledge, all previous DRS parsers make local decisions for either bottom-up node composition or top-down split point ranking at each time step, and largely ignore DRS parsing from the global view point. Obviously, it is not sufficient to build an entire DRS tree only through these local decisions. In this work, we present our insight on evaluating the pros and cons of the entire DRS tree for global optimization. Specifically, based on recent well-performing top-down frameworks, we introduce a novel method to transform both gold standard and predicted constituency trees into tree diagrams with two color channels. After that, we learn an adversarial bot between gold and fake tree diagrams to estimate the generated DRS trees from a global perspective. We perform experiments on both RST-DT and CDTB corpora and use the original Parseval for performance evaluation. The experimental results show that our parser can substantially improve the performance when compared with previous state-of-the-art parsers. 2021.acl-long.305 @@ -4298,7 +4298,7 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO TongZhang LongZhang WeiYe - BoLi + BoLi JinanSun XiaoyuZhu WenZhao @@ -4346,7 +4346,7 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO DayihengLiu HaiboZhang WeihuaLuo - DegenHuang + DegenHuang JinsongSu 4008–4018 A good translation should not only translate the original content semantically, but also incarnate personal traits of the original text. For a real-world neural machine translation (NMT) system, these user traits (e.g., topic preference, stylistic characteristics and expression habits) can be preserved in user behavior (e.g., historical inputs). However, current NMT systems marginally consider the user behavior due to: 1) the difficulty of modeling user portraits in zero-shot scenarios, and 2) the lack of user-behavior annotated parallel dataset. To fill this gap, we introduce a novel framework called user-driven NMT. Specifically, a cache-based module and a user-driven contrastive learning method are proposed to offer NMT the ability to capture potential user traits from their historical inputs under a zero-shot learning fashion. Furthermore, we contribute the first Chinese-English parallel corpus annotated with user behavior called UDT-Corpus. Experimental results confirm that the proposed user-driven NMT can generate user-specific translations. @@ -4359,8 +4359,8 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO End-to-End Lexically Constrained Machine Translation for Morphologically Rich Languages JosefJon João PauloAires - DusanVaris - OndřejBojar + DusanVaris + OndřejBojar 4019–4033 Lexically constrained machine translation allows the user to manipulate the output sentence by enforcing the presence or absence of certain words and phrases. Although current approaches can enforce terms to appear in the translation, they often struggle to make the constraint word form agree with the rest of the generated output. Our manual analysis shows that 46% of the errors in the output of a baseline constrained model for English to Czech translation are related to agreement. We investigate mechanisms to allow neural machine translation to infer the correct word inflection given lemmatized constraints. In particular, we focus on methods based on training the model with constraints provided as part of the input sequence. Our experiments on English-Czech language pair show that this approach improves translation of constrained terms in both automatic and manual evaluation by reducing errors in agreement. Our approach thus eliminates inflection errors, without introducing new errors or decreasing overall quality of the translation. 2021.acl-long.311 @@ -4371,7 +4371,7 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO Handling Extreme Class Imbalance in Technical Logbook Datasets FarhadAkhbardeh - Cecilia OvesdotterAlm + Cecilia OvesdotterAlm MarcosZampieri TravisDesell 4034–4045 @@ -4472,7 +4472,7 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO Breaking Down Walls of Text: How Can <fixed-case>NLP</fixed-case> Benefit Consumer Privacy? AbhilashaRavichander - Alan WBlack + Alan WBlack ThomasNorton ShomirWilson NormanSadeh @@ -4505,7 +4505,7 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO Reliability Testing for Natural Language Processing Systems SamsonTan - ShafiqJoty + ShafiqJoty KathyBaxter ArazTaeihagh Gregory A.Bennett @@ -4543,7 +4543,7 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO IldikóPilán DavidSanchez MontserratBatet - LiljaØvrelid + LiljaØvrelid 4188–4203 This position paper investigates the problem of automated text anonymisation, which is a prerequisite for secure sharing of documents containing sensitive information about individuals. We summarise the key concepts behind text anonymisation and provide a review of current approaches. Anonymisation methods have so far been developed in two fields with little mutual interaction, namely natural language processing and privacy-preserving data publishing. Based on a case study, we outline the benefits and limitations of these approaches and discuss a number of open challenges, such as (1) how to account for multiple types of semantic inferences, (2) how to strike a balance between disclosure risk and data utility and (3) how to evaluate the quality of the resulting anonymisation. We lay out a case for moving beyond sequence labelling models and incorporate explicit measures of disclosure risk into the text anonymisation process. 2021.acl-long.323 @@ -4637,7 +4637,7 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO Societal Biases in Language Generation: Progress and Challenges EmilySheng Kai-WeiChang - PremNatarajan + PremNatarajan NanyunPeng 4275–4293 Technology for language generation has advanced rapidly, spurred by advancements in pre-training large models on massive amounts of data and the need for intelligent agents to communicate in a natural manner. While techniques can effectively generate fluent text, they can also produce undesirable societal biases that can have a disproportionately negative impact on marginalized populations. Language generation presents unique challenges for biases in terms of direct user interaction and the structure of decoding techniques. To better understand these challenges, we present a survey on societal biases in language generation, focusing on how data and techniques contribute to biases and progress towards reducing biases. Motivated by a lack of studies on biases from decoding techniques, we also conduct experiments to quantify the effects of these techniques. By further discussing general trends and open challenges, we call to attention promising directions for research and the importance of fairness and inclusivity considerations for language generation applications. @@ -4762,7 +4762,7 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO Neural Stylistic Response Generation with Disentangled Latent Variables QingfuZhu - Wei-NanZhang + Wei-NanZhang TingLiu William YangWang 4391–4401 @@ -4774,7 +4774,7 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO Intent Classification and Slot Filling for Privacy Policies - WasiAhmad + WasiAhmad JianfengChi TuLe ThomasNorton @@ -4865,7 +4865,7 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO PedroRodriguez JoeBarrow Alexander MiserlisHoyle - John P.Lalor + John P.Lalor RobinJia JordanBoyd-Graber 4486–4503 @@ -4881,7 +4881,7 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO AshkanKazemi KiranGarimella DevinGaffney - Scott A.Hale + Scott A.Hale 4504–4517 Manual fact-checking does not scale well to serve the needs of the internet. This issue is further compounded in non-English contexts. In this paper, we discuss claim matching as a possible solution to scale fact-checking. We define claim matching as the task of identifying pairs of textual messages containing claims that can be served with one fact-check. We construct a novel dataset of WhatsApp tipline and public group messages alongside fact-checked claims that are first annotated for containing “claim-like statements” and then matched with potentially similar items and annotated for claim matching. Our dataset contains content in high-resource (English, Hindi) and lower-resource (Bengali, Malayalam, Tamil) languages. We train our own embedding model using knowledge distillation and a high-quality “teacher” model in order to address the imbalance in embedding quality between the low- and high-resource languages in our dataset. We provide evaluations on the performance of our solution and compare with baselines and existing state-of-the-art multilingual embedding models, namely LASER and LaBSE. We demonstrate that our performance exceeds LASER and LaBSE in all settings. We release our annotated datasets, codebooks, and trained embedding model to allow for further research. 2021.acl-long.347 @@ -4921,7 +4921,7 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO Syntax-augmented Multilingual <fixed-case>BERT</fixed-case> for Cross-lingual Transfer - WasiAhmad + WasiAhmad HaoranLi Kai-WeiChang YasharMehdad @@ -4935,7 +4935,7 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO How to Adapt Your Pretrained Multilingual Model to 1600 Languages AbteenEbrahimi - KatharinaKann + KatharinaKann 4555–4567 Pretrained multilingual models (PMMs) enable zero-shot learning via cross-lingual transfer, performing best for languages seen during pretraining. While methods exist to improve performance for unseen languages, they have almost exclusively been evaluated using amounts of raw text only available for a small fraction of the world’s languages. In this paper, we evaluate the performance of existing methods to adapt PMMs to new languages using a resource available for close to 1600 languages: the New Testament. This is challenging for two reasons: (1) the small corpus size, and (2) the narrow domain. While performance drops for all approaches, we surprisingly still see gains of up to 17.69% accuracy for part-of-speech tagging and 6.29 F1 for NER on average over all languages as compared to XLM-R. Another unexpected finding is that continued pretraining, the simplest approach, performs best. Finally, we perform a case study to disentangle the effects of domain and size and to shed light on the influence of the finetuning source language. 2021.acl-long.351 @@ -5031,7 +5031,7 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO Recursive Tree-Structured Self-Attention for Answer Sentence Selection KhalilMrini EmiliaFarcas - NdapaNakashole + NdapaNakashole 4651–4661 Syntactic structure is an important component of natural language text. Recent top-performing models in Answer Sentence Selection (AS2) use self-attention and transfer learning, but not syntactic structure. Tree structures have shown strong performance in tasks with sentence pair input like semantic relatedness. We investigate whether tree structures can boost performance in AS2. We introduce the Tree Aggregation Transformer: a novel recursive, tree-structured self-attention model for AS2. The recursive nature of our model is able to represent all levels of syntactic parse trees with only one additional self-attention layer. Without transfer learning, we establish a new state of the art on the popular TrecQA and WikiQA benchmark datasets. Additionally, we evaluate our method on four Community Question Answering datasets, and find that tree-structured representations have limitations with noisy user-generated text. We conduct probing experiments to evaluate how our models leverage tree structures across datasets. Our findings show that the ability of tree-structured models to successfully absorb syntactic information is strongly correlated with a higher performance in AS2. 2021.acl-long.358 @@ -5044,7 +5044,7 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO ZikunHu YixinCao LifuHuang - Tat-SengChua + Tat-SengChua 4662–4671 Knowledge Graph (KG) and attention mechanism have been demonstrated effective in introducing and selecting useful information for weakly supervised methods. However, only qualitative analysis and ablation study are provided as evidence. In this paper, we contribute a dataset and propose a paradigm to quantitatively evaluate the effect of attention and KG on bag-level relation extraction (RE). We find that (1) higher attention accuracy may lead to worse performance as it may harm the model’s ability to extract entity mention features; (2) the performance of attention is largely influenced by various noise distribution patterns, which is closely related to real-world datasets; (3) KG-enhanced attention indeed improves RE performance, while not through enhanced attention but by incorporating entity prior; and (4) attention mechanism may exacerbate the issue of insufficient training data. Based on these findings, we show that a straightforward variant of RE model can achieve significant improvements (6% AUC on average) on two real-world datasets as compared with three state-of-the-art baselines. Our codes and datasets are available at https://github.com/zig-kwin-hu/how-KG-ATT-help. 2021.acl-long.359 @@ -5090,7 +5090,7 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO YanLiang ChristanGrant XiangRen - Xin LunaDong + Xin LunaDong 4694–4705 Automatic extraction of product attribute values is an important enabling technology in e-Commerce platforms. This task is usually modeled using sequence labeling architectures, with several extensions to handle multi-attribute extraction. One line of previous work constructs attribute-specific models, through separate decoders or entirely separate models. However, this approach constrains knowledge sharing across different attributes. Other contributions use a single multi-attribute model, with different techniques to embed attribute information. But sharing the entire network parameters across all attributes can limit the model’s capacity to capture attribute-specific characteristics. In this paper we present AdaTag, which uses adaptive decoding to handle extraction. We parameterize the decoder with pretrained attribute embeddings, through a hypernetwork and a Mixture-of-Experts (MoE) module. This allows for separate, but semantically correlated, decoders to be generated on the fly for different attributes. This approach facilitates knowledge sharing, while maintaining the specificity of each attribute. Our experiments on a real-world e-Commerce dataset show marked improvements over previous methods. 2021.acl-long.362 @@ -5103,7 +5103,7 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO ZhengbaoJiang JialongHan BunyaminSisman - Xin LunaDong + Xin LunaDong 4706–4716 Integrating extracted knowledge from the Web to knowledge graphs (KGs) can facilitate tasks like question answering. We study relation integration that aims to align free-text relations in subject-relation-object extractions to relations in a target KG. To address the challenge that free-text relations are ambiguous, previous methods exploit neighbor entities and relations for additional context. However, the predictions are made independently, which can be mutually inconsistent. We propose a two-stage Collective Relation Integration (CoRI) model, where the first stage independently makes candidate predictions, and the second stage employs a collective model that accesses all candidate predictions to make globally coherent predictions. We further improve the collective model with augmented data from the portion of the target KG that is otherwise unused. Experiment results on two datasets show that CoRI can significantly outperform the baselines, improving AUC from .677 to .748 and from .716 to .780, respectively. 2021.acl-long.363 @@ -5116,7 +5116,7 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO Robert LLogan IV AndrewMcCallum SameerSingh - DanBikel + DanBikel 4717–4731 Streaming cross document entity coreference (CDC) systems disambiguate mentions of named entities in a scalable manner via incremental clustering. Unlike other approaches for named entity disambiguation (e.g., entity linking), streaming CDC allows for the disambiguation of entities that are unknown at inference time. Thus, it is well-suited for processing streams of data where new entities are frequently introduced. Despite these benefits, this task is currently difficult to study, as existing approaches are either evaluated on datasets that are no longer available, or omit other crucial details needed to ensure fair comparison. In this work, we address this issue by compiling a large benchmark adapted from existing free datasets, and performing a comprehensive evaluation of a number of novel and existing baseline models. We investigate: how to best encode mentions, which clustering algorithms are most effective for grouping mentions, how models transfer to different domains, and how bounding the number of mentions tracked during inference impacts performance. Our results show that the relative performance of neural and feature-based mention encoders varies across different domains, and in most cases the best performance is achieved using a combination of both approaches. We also find that performance is minimally impacted by limiting the number of tracked mentions. 2021.acl-long.364 @@ -5132,7 +5132,7 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO WeiLi JiafengGuo YuanzhuoWang - XueqiCheng + XueqiCheng 4732–4743 Temporal Knowledge Graphs (TKGs) have been developed and used in many different areas. Reasoning on TKGs that predicts potential facts (events) in the future brings great challenges to existing models. When facing a prediction task, human beings usually search useful historical information (i.e., clues) in their memories and then reason for future meticulously. Inspired by this mechanism, we propose CluSTeR to predict future facts in a two-stage manner, Clue Searching and Temporal Reasoning, accordingly. Specifically, at the clue searching stage, CluSTeR learns a beam search policy via reinforcement learning (RL) to induce multiple clues from historical facts. At the temporal reasoning stage, it adopts a graph convolution network based sequence method to deduce answers from clues. Experiments on four datasets demonstrate the substantial advantages of CluSTeR compared with the state-of-the-art methods. Moreover, the clues found by CluSTeR further provide interpretability for the results. 2021.acl-long.365 @@ -5142,7 +5142,7 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO Employing Argumentation Knowledge Graphs for Neural Argument Generation - KhalidAl Khatib + KhalidAl Khatib LukasTrautner HenningWachsmuth YufangHou @@ -5156,7 +5156,7 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO Learning Span-Level Interactions for Aspect Sentiment Triplet Extraction - LuXu + LuXu Yew KenChia LidongBing 4755–4766 @@ -5222,7 +5222,7 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO FeiLi ZhiChaoLin MeishanZhang - DonghongJi + DonghongJi 4814–4828 Research on overlapped and discontinuous named entity recognition (NER) has received increasing attention. The majority of previous work focuses on either overlapped or discontinuous entities. In this paper, we propose a novel span-based model that can recognize both overlapped and discontinuous entities jointly. The model includes two major steps. First, entity fragments are recognized by traversing over all possible text spans, thus, overlapped entities can be recognized. Second, we perform relation classification to judge whether a given pair of entity fragments to be overlapping or succession. In this way, we can recognize not only discontinuous entities, and meanwhile doubly check the overlapped entities. As a whole, our model can be regarded as a relation extraction paradigm essentially. Experimental results on multiple benchmark datasets (i.e., CLEF, GENIA and ACE05) show that our model is highly competitive for overlapped and discontinuous NER. 2021.acl-long.372 @@ -5304,7 +5304,7 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO Parameter-Efficient Transfer Learning with Diff Pruning DemiGuo - AlexanderRush + AlexanderRush YoonKim 4884–4896 The large size of pretrained networks makes them difficult to deploy for multiple tasks in storage-constrained settings. Diff pruning enables parameter-efficient transfer learning that scales well with new tasks. The approach learns a task-specific “diff” vector that extends the original pretrained parameters. This diff vector is adaptively pruned during training with a differentiable approximation to the L0-norm penalty to encourage sparsity. As the number of tasks increases, diff pruning remains parameter-efficient, as it requires storing only a small diff vector for each task. Since it does not require access to all tasks during training, it is attractive in on-device deployment settings where tasks arrive in stream or even from different providers. Diff pruning can match the performance of finetuned baselines on the GLUE benchmark while only modifying 0.5% of the pretrained model’s parameters per task and scales favorably in comparison to popular pruning approaches. @@ -5384,8 +5384,8 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO Generating <fixed-case>SOAP</fixed-case> Notes from Doctor-Patient Conversations Using Modular Summarization Techniques KundanKrishna SopanKhosla - JeffreyBigham - Zachary C.Lipton + JeffreyBigham + Zachary C.Lipton 4958–4972 Following each patient visit, physicians draft long semi-structured clinical summaries called SOAP notes. While invaluable to clinicians and researchers, creating digital SOAP notes is burdensome, contributing to physician burnout. In this paper, we introduce the first complete pipelines to leverage deep summarization models to generate these notes based on transcripts of conversations between physicians and patients. After exploring a spectrum of methods across the extractive-abstractive spectrum, we propose Cluster2Sent, an algorithm that (i) extracts important utterances relevant to each summary section; (ii) clusters together related utterances; and then (iii) generates one summary sentence per cluster. Cluster2Sent outperforms its purely abstractive counterpart by 8 ROUGE-1 points, and produces significantly more factual and coherent sentences as assessed by expert human evaluators. For reproducibility, we demonstrate similar benefits on the publicly available AMI dataset. Our results speak to the benefits of structuring summaries into sections and annotating supporting evidence when constructing summarization corpora. 2021.acl-long.384 @@ -5465,7 +5465,7 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO KaitaoZhang JieBao ZhiyuanLiu - PaulBennett + PaulBennett 5030–5043 The effectiveness of Neural Information Retrieval (Neu-IR) often depends on a large scale of in-domain relevance training signals, which are not always available in real-world ranking scenarios. To democratize the benefits of Neu-IR, this paper presents MetaAdaptRank, a domain adaptive learning method that generalizes Neu-IR models from label-rich source domains to few-shot target domains. Drawing on source-domain massive relevance supervision, MetaAdaptRank contrastively synthesizes a large number of weak supervision signals for target domains and meta-learns to reweight these synthetic “weak” data based on their benefits to the target-domain ranking accuracy of Neu-IR models. Experiments on three TREC benchmarks in the web, news, and biomedical domains show that MetaAdaptRank significantly improves the few-shot ranking accuracy of Neu-IR models. Further analyses indicate that MetaAdaptRank thrives from both its contrastive weak data synthesis and meta-reweighted data selection. The code and data of this paper can be obtained from https://github.com/thunlp/MetaAdaptRank. 2021.acl-long.390 @@ -5522,7 +5522,7 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO FandongMeng BiaoZhang JieZhou - DegenHuang + DegenHuang QingqiangWu JinsongSu 5076–5085 @@ -5614,7 +5614,7 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO Data Augmentation with Adversarial Training for Cross-Lingual <fixed-case>NLI</fixed-case> - XinDong + XinDong YaxinZhu ZuohuiFu DongkuanXu @@ -5754,7 +5754,7 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO DavidHarwath TylerMiller ChristopherSong - JamesGlass + JamesGlass 5284–5300 In this paper we present the first model for directly synthesizing fluent, natural-sounding spoken audio captions for images that does not require natural language text as an intermediate representation or source of supervision. Instead, we connect the image captioning module and the speech synthesis module with a set of discrete, sub-word speech units that are discovered with a self-supervised visual grounding task. We conduct experiments on the Flickr8k spoken caption dataset in addition to a novel corpus of spoken audio captions collected for the popular MSCOCO dataset, demonstrating that our generated captions also capture diverse visual semantics of the images they describe. We investigate several different intermediate speech representations, and empirically find that the representation must satisfy several important properties to serve as drop-in replacements for text. 2021.acl-long.411 @@ -5806,9 +5806,9 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO Learning to Explain: Generating Stable Explanations Fast XuelinSitu IngridZukerman - CecileParis + CecileParis SameenMaruf - GholamrezaHaffari + GholamrezaHaffari 5340–5355 The importance of explaining the outcome of a machine learning model, especially a black-box model, is widely acknowledged. Recent approaches explain an outcome by identifying the contributions of input features to this outcome. In environments involving large black-box models or complex inputs, this leads to computationally demanding algorithms. Further, these algorithms often suffer from low stability, with explanations varying significantly across similar examples. In this paper, we propose a Learning to Explain (L2E) approach that learns the behaviour of an underlying explanation algorithm simultaneously from all training examples. Once the explanation algorithm is distilled into an explainer network, it can be used to explain new instances. Our experiments on three classification tasks, which compare our approach to six explanation algorithms, show that L2E is between 5 and 7.5×10ˆ4 times faster than these algorithms, while generating more stable explanations, and having comparable faithfulness to the black-box model. 2021.acl-long.415 @@ -5846,7 +5846,7 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO Enabling Lightweight Fine-tuning for Pre-trained Language Model Compression based on Matrix Product Operators PeiyuLiu Ze-FengGao - Wayne XinZhao + Wayne XinZhao Zhi-YuanXie Zhong-YiLu Ji-RongWen @@ -5969,7 +5969,7 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO XiaoqingZheng Cho-JuiHsieh Kai-WeiChang - XuanjingHuang + XuanjingHuang 5482–5492 Although deep neural networks have achieved prominent performance on many NLP tasks, they are vulnerable to adversarial examples. We propose Dirichlet Neighborhood Ensemble (DNE), a randomized method for training a robust model to defense synonym substitution-based attacks. During training, DNE forms virtual sentences by sampling embedding vectors for each word in an input sentence from a convex hull spanned by the word and its synonyms, and it augments them with the training data. In such a way, the model is robust to adversarial attacks while maintaining the performance on the original clean data. DNE is agnostic to the network architectures and scales to large models (e.g., BERT) for NLP applications. Through extensive experimentation, we demonstrate that our method consistently outperforms recently proposed defense methods by a significant margin across different network architectures and multiple data sets. 2021.acl-long.426 @@ -5980,7 +5980,7 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO Shortformer: Better Language Modeling using Shorter Inputs OfirPress - Noah A.Smith + Noah A.Smith MikeLewis 5493–5505 Increasing the input length has been a driver of progress in language modeling with transformers. We identify conditions where shorter inputs are not harmful, and achieve perplexity and efficiency improvements through two new methods that decrease input length. First, we show that initially training a model on short subsequences before moving on to longer ones both reduces overall training time and, surprisingly, substantially improves perplexity. Second, we show how to improve the efficiency of recurrence methods in transformers, which let models condition on previously processed tokens when generating sequences that exceed the maximal length the transformer can handle at once. Existing methods require computationally expensive relative position embeddings; we introduce a simple alternative of adding absolute position embeddings to queries and keys instead of to word embeddings, which efficiently produces superior results. We show that these recurrent models also benefit from short input lengths. Combining these techniques speeds up training by a factor of 1.65, reduces memory usage, and substantially improves perplexity on WikiText-103, without adding any parameters. @@ -6108,7 +6108,7 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO A Human-machine Collaborative Framework for Evaluating Malevolence in Dialogues YangjunZhang PengjieRen - Maartende Rijke + Maartende Rijke 5612–5623 Conversational dialogue systems (CDSs) are hard to evaluate due to the complexity of natural language. Automatic evaluation of dialogues often shows insufficient correlation with human judgements. Human evaluation is reliable but labor-intensive. We introduce a human-machine collaborative framework, HMCEval, that can guarantee reliability of the evaluation outcomes with reduced human effort. HMCEval casts dialogue evaluation as a sample assignment problem, where we need to decide to assign a sample to a human or a machine for evaluation. HMCEval includes a model confidence estimation module to estimate the confidence of the predicted sample assignment, and a human effort estimation module to estimate the human effort should the sample be assigned to human evaluation, as well as a sample assignment execution module that finds the optimum assignment solution based on the estimated confidence and effort. We assess the performance of HMCEval on the task of evaluating malevolence in dialogues. The experimental results show that HMCEval achieves around 99% evaluation accuracy with half of the human effort spared, showing that HMCEval provides reliable evaluation outcomes while reducing human effort by a large amount. 2021.acl-long.436 @@ -6136,7 +6136,7 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO PengjieRen ZhuminChen ZhaochunRen - Maartende Rijke + Maartende Rijke MingZhou 5638–5650 Conversational Question Simplification (CQS) aims to simplify self-contained questions into conversational ones by incorporating some conversational characteristics, e.g., anaphora and ellipsis. Existing maximum likelihood estimation based methods often get trapped in easily learned tokens as all tokens are treated equally during training. In this work, we introduce a Reinforcement Iterative Sequence Editing (RISE) framework that optimizes the minimum Levenshtein distance through explicit editing actions. RISE is able to pay attention to tokens that are related to conversational characteristics. To train RISE, we devise an Iterative Reinforce Training (IRT) algorithm with a Dynamic Programming based Sampling (DPS) process to improve exploration. Experimental results on two benchmark datasets show that RISE significantly outperforms state-of-the-art methods and generalizes well on unseen data. @@ -6175,9 +6175,9 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO <fixed-case>D</fixed-case>yna<fixed-case>E</fixed-case>val: Unifying Turn and Dialogue Level Evaluation - ChenZhang + ChenZhang YimingChen - Luis FernandoD’Haro + Luis FernandoD’Haro YanZhang ThomasFriedrichs GrandeeLee @@ -6221,7 +6221,7 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO YunlongLiang FandongMeng YufengChen - JinanXu + JinanXu JieZhou 5711–5724 Neural chat translation aims to translate bilingual conversational text, which has a broad application in international exchanges and cooperation. Despite the impressive performance of sentence-level and context-aware Neural Machine Translation (NMT), there still remain challenges to translate bilingual conversational text due to its inherent characteristics such as role preference, dialogue coherence, and translation consistency. In this paper, we aim to promote the translation quality of conversational text by modeling the above properties. Specifically, we design three latent variational modules to learn the distributions of bilingual conversational characteristics. Through sampling from these learned distributions, the latent variables, tailored for role preference, dialogue coherence, and translation consistency, are incorporated into the NMT model for better translation. We evaluate our approach on the benchmark dataset BConTrasT (English<->German) and a self-collected bilingual dialogue corpus, named BMELD (English<->Chinese). Extensive experiments show that our approach notably boosts the performance over strong baselines by a large margin and significantly surpasses some state-of-the-art context-aware NMT models in terms of BLEU and TER. Additionally, we make the BMELD dataset publicly available for the research community. @@ -6264,7 +6264,7 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO IvanVulić RoiReichart AnnaKorhonen - HinrichSchütze + HinrichSchütze 5751–5767 Few-shot crosslingual transfer has been shown to outperform its zero-shot counterpart with pretrained encoders like multilingual BERT. Despite its growing popularity, little to no attention has been paid to standardizing and analyzing the design of few-shot experiments. In this work, we highlight a fundamental risk posed by this shortcoming, illustrating that the model exhibits a high degree of sensitivity to the selection of few shots. We conduct a large-scale experimental study on 40 sets of sampled few shots for six diverse NLP tasks across up to 40 languages. We provide an analysis of success and failure cases of few-shot transfer, which highlights the role of lexical features. Additionally, we show that a straightforward full model finetuning approach is quite effective for few-shot transfer, outperforming several state-of-the-art few-shot approaches. As a step towards standardizing few-shot crosslingual experimental designs, we make our sampled few shots publicly available. 2021.acl-long.447 @@ -6275,7 +6275,7 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO Coreference Reasoning in Machine Reading Comprehension MingzhuWu - Nafise SadatMoosavi + Nafise SadatMoosavi DanRoth IrynaGurevych 5768–5781 @@ -6303,7 +6303,7 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO A Conditional Splitting Framework for Efficient Constituency Parsing Thanh-TungNguyen Xuan-PhiNguyen - ShafiqJoty + ShafiqJoty XiaoliLi 5795–5807 We introduce a generic seq2seq parsing framework that casts constituency parsing problems (syntactic and discourse parsing) into a series of conditional splitting decisions. Our parsing model estimates the conditional probability distribution of possible splitting points in a given text span and supports efficient top-down decoding, which is linear in number of nodes. The conditional splitting formulation together with efficient beam search inference facilitate structural consistency without relying on expensive structured inference. Crucially, for discourse analysis we show that in our formulation, discourse segmentation can be framed as a special case of parsing which allows us to perform discourse parsing without requiring segmentation as a pre-requisite. Experiments show that our model achieves good results on the standard syntactic parsing tasks under settings with/without pre-trained representations and rivals state-of-the-art (SoTA) methods that are more computationally expensive than ours. In discourse parsing, our method outperforms SoTA by a good margin. @@ -6349,7 +6349,7 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO LinlinLiu BoshengDing LidongBing - ShafiqJoty + ShafiqJoty LuoSi ChunyanMiao 5834–5846 @@ -6361,7 +6361,7 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO Lexicon Enhanced <fixed-case>C</fixed-case>hinese Sequence Labeling Using <fixed-case>BERT</fixed-case> Adapter - WeiLiu + WeiLiu XiyanFu YueZhang WenmingXiao @@ -6379,7 +6379,7 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO QinzhuoWu QiZhang ZhongyuWei - XuanjingHuang + XuanjingHuang 5859–5869 In recent years, math word problem solving has received considerable attention and achieved promising results, but previous methods rarely take numerical values into consideration. Most methods treat the numerical values in the problems as number symbols, and ignore the prominent role of the numerical values in solving the problem. In this paper, we propose a novel approach called NumS2T, which enhances math word problem solving performance by explicitly incorporating numerical values into a sequence-to-tree network. In addition, a numerical properties prediction mechanism is used to capture the category and comparison information of numerals and measure their importance in global expressions. Experimental results on the Math23K and APE datasets demonstrate that our model achieves better performance than existing state-of-the-art models. 2021.acl-long.455 @@ -6583,7 +6583,7 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO BoPeng YiLiao QunLiu - DeyiXiong + DeyiXiong 6012–6025 In order to deeply understand the capability of pretrained language models in text generation and conduct a diagnostic evaluation, we propose TGEA, an error-annotated dataset with multiple benchmark tasks for text generation from pretrained language models (PLMs). We use carefully selected prompt words to guide GPT-2 to generate candidate sentences, from which we select 47K for error annotation. Crowdsourced workers manually check each of these sentences and detect 12k erroneous sentences. We create an error taxonomy to cover 24 types of errors occurring in these erroneous sentences according to the nature of errors with respect to linguistics and knowledge (e.g., common sense). For each erroneous span in PLM-generated sentences, we also detect another span that is closely associated with it. Each error is hence manually labeled with comprehensive annotations, including the span of the error, the associated span, minimal correction to the error, the type of the error, and rationale behind the error. Apart from the fully annotated dataset, we also present a detailed description of the data collection procedure, statistics and analysis of the dataset. This is the first dataset with comprehensive annotations for PLM-generated texts, which facilitates the diagnostic evaluation of PLM-based text generation. Furthermore, we use TGEA as a benchmark dataset and propose a series of automatic diagnosis tasks, including error detection, error type classification, associated span detection, error rationale generation, to further promote future study on the automatic error detection and correction on texts generated by pretrained language models. 2021.acl-long.469 @@ -6762,7 +6762,7 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO YinghaoLi PranavShetty LucasLiu - ChaoZhang + ChaoZhang LeSong 6178–6190 We study the problem of learning a named entity recognition (NER) tagger using noisy labels from multiple weak supervision sources. Though cheap to obtain, the labels from weak supervision sources are often incomplete, inaccurate, and contradictory, making it difficult to learn an accurate NER model. To address this challenge, we propose a conditional hidden Markov model (CHMM), which can effectively infer true labels from multi-source noisy labels in an unsupervised way. CHMM enhances the classic hidden Markov model with the contextual representation power of pre-trained language models. Specifically, CHMM learns token-wise transition and emission probabilities from the BERT embeddings of the input tokens to infer the latent true labels from noisy observations. We further refine CHMM with an alternate-training approach (CHMM-ALT). It fine-tunes a BERT-NER model with the labels inferred by CHMM, and this BERT-NER’s output is regarded as an additional weak source to train the CHMM in return. Experiments on four NER benchmarks from various domains show that our method outperforms state-of-the-art weakly supervised NER models by wide margins. @@ -6792,7 +6792,7 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO TaoGui LinyangLi QiZhang - XuanjingHuang + XuanjingHuang YaqianZhou 6201–6213 Distant supervision for relation extraction provides uniform bag labels for each sentence inside the bag, while accurate sentence labels are important for downstream applications that need the exact relation type. Directly using bag labels for sentence-level training will introduce much noise, thus severely degrading performance. In this work, we propose the use of negative training (NT), in which a model is trained using complementary labels regarding that “the instance does not belong to these complementary labels”. Since the probability of selecting a true label as a complementary label is low, NT provides less noisy information. Furthermore, the model trained with NT is able to separate the noisy data from the training data. Based on NT, we propose a sentence-level framework, SENT, for distant relation extraction. SENT not only filters the noisy data to construct a cleaner dataset, but also performs a re-labeling process to transform the noisy data into useful training data, thus further benefiting the model’s performance. Experimental results show the significant improvement of the proposed method over previous methods on sentence-level evaluation and de-noise effect. @@ -6805,7 +6805,7 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO An End-to-End Progressive Multi-Task Learning Framework for Medical Named Entity Recognition and Normalization BaohangZhou XiangruiCai - YingZhang + YingZhang XiaojieYuan 6214–6224 Medical named entity recognition (NER) and normalization (NEN) are fundamental for constructing knowledge graphs and building QA systems. Existing implementations for medical NER and NEN are suffered from the error propagation between the two tasks. The mispredicted mentions from NER will directly influence the results of NEN. Therefore, the NER module is the bottleneck of the whole system. Besides, the learnable features for both tasks are beneficial to improving the model performance. To avoid the disadvantages of existing models and exploit the generalized representation across the two tasks, we design an end-to-end progressive multi-task learning model for jointly modeling medical NER and NEN in an effective way. There are three level tasks with progressive difficulty in the framework. The progressive tasks can reduce the error propagation with the incremental task settings which implies the lower level tasks gain the supervised signals other than errors from the higher level tasks to improve their performances. Besides, the context features are exploited to enrich the semantic information of entity mentions extracted by NER. The performance of NEN profits from the enhanced entity mention features. The standard entities from knowledge bases are introduced into the NER module for extracting corresponding entity mentions correctly. The empirical results on two publicly available medical literature datasets demonstrate the superiority of our method over nine typical methods. @@ -6852,10 +6852,10 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO Joint Biomedical Entity and Relation Extraction with Knowledge-Enhanced Collective Inference - TuanLai + TuanLai HengJi - ChengXiangZhai - Quan HungTran + ChengXiangZhai + Quan HungTran 6248–6260 Compared to the general news domain, information extraction (IE) from biomedical text requires much broader domain knowledge. However, many previous IE methods do not utilize any external knowledge during inference. Due to the exponential growth of biomedical publications, models that do not go beyond their fixed set of parameters will likely fall behind. Inspired by how humans look up relevant information to comprehend a scientific text, we present a novel framework that utilizes external knowledge for joint entity and relation extraction named KECI (Knowledge-Enhanced Collective Inference). Given an input text, KECI first constructs an initial span graph representing its initial understanding of the text. It then uses an entity linker to form a knowledge graph containing relevant background knowledge for the the entity mentions in the text. To make the final predictions, KECI fuses the initial span graph and the knowledge graph into a more refined graph using an attention mechanism. KECI takes a collective approach to link mention spans to entities by integrating global relational information into local representations using graph convolutional networks. Our experimental results show that the framework is highly effective, achieving new state-of-the-art results in two different benchmark datasets: BioRelEx (binding interaction detection) and ADE (adverse drug event extraction). For example, KECI achieves absolute improvements of 4.59% and 4.91% in F1 scores over the state-of-the-art on the BioRelEx entity and relation extraction tasks 2021.acl-long.488 @@ -6870,7 +6870,7 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO HengJi AhmedElsayed SkatjeMyers - MarthaPalmer + MarthaPalmer 6261–6270 Biomedical Information Extraction from scientific literature presents two unique and non-trivial challenges. First, compared with general natural language texts, sentences from scientific papers usually possess wider contexts between knowledge elements. Moreover, comprehending the fine-grained scientific entities and events urgently requires domain-specific background knowledge. In this paper, we propose a novel biomedical Information Extraction (IE) model to tackle these two challenges and extract scientific entities and events from English research papers. We perform Abstract Meaning Representation (AMR) to compress the wide context to uncover a clear semantic structure for each complex sentence. Besides, we construct the sentence-level knowledge graph from an external knowledge base and use it to enrich the AMR graph to improve the model’s understanding of complex scientific concepts. We use an edge-conditioned graph attention network to encode the knowledge-enriched AMR graph for biomedical IE tasks. Experiments on the GENIA 2011 dataset show that the AMR and external knowledge have contributed 1.8% and 3.0% absolute F-score gains respectively. In order to evaluate the impact of our approach on real-world problems that involve topic-specific fine-grained knowledge elements, we have also created a new ontology and annotated corpus for entity and event extraction for the COVID-19 scientific literature, which can serve as a new benchmark for the biomedical IE community. 2021.acl-long.489 @@ -6947,7 +6947,7 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO FangxiangFeng ZhanyuMa XiaojieWang - EduardHovy + EduardHovy 6319–6329 Aspect-based sentiment analysis is a fine-grained sentiment classification task. Recently, graph neural networks over dependency trees have been explored to explicitly model connections between aspects and opinion words. However, the improvement is limited due to the inaccuracy of the dependency parsing results and the informal expressions and complexity of online reviews. To overcome these challenges, in this paper, we propose a dual graph convolutional networks (DualGCN) model that considers the complementarity of syntax structures and semantic correlations simultaneously. Particularly, to alleviate dependency parsing errors, we design a SynGCN module with rich syntactic knowledge. To capture semantic correlations, we design a SemGCN module with self-attention mechanism. Furthermore, we propose orthogonal and differential regularizers to capture semantic correlations between words precisely by constraining attention scores in the SemGCN module. The orthogonal regularizer encourages the SemGCN to learn semantically correlated words with less overlap for each word. The differential regularizer encourages the SemGCN to learn semantic features that the SynGCN fails to capture. Experimental results on three public datasets show that our DualGCN model outperforms state-of-the-art methods and verify the effectiveness of our model. 2021.acl-long.494 @@ -7005,8 +7005,8 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO Keep It Simple: Unsupervised Simplification of Multi-Paragraph Text PhilippeLaban TobiasSchnabel - PaulBennett - Marti A.Hearst + PaulBennett + Marti A.Hearst 6365–6378 This work presents Keep it Simple (KiS), a new approach to unsupervised text simplification which learns to balance a reward across three properties: fluency, salience and simplicity. We train the model with a novel algorithm to optimize the reward (k-SCST), in which the model proposes several candidate simplifications, computes each candidate’s reward, and encourages candidates that outperform the mean reward. Finally, we propose a realistic text comprehension task as an evaluation method for text simplification. When tested on the English news domain, the KiS model outperforms strong supervised baselines by more than 4 SARI points, and can help people complete a comprehension task an average of 18% faster while retaining accuracy, when compared to the original text. 2021.acl-long.498 @@ -7073,7 +7073,7 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO <fixed-case>BERTG</fixed-case>en: Multi-task Generation through <fixed-case>BERT</fixed-case> FaidonMitzalis OzanCaglayan - PranavaMadhyastha + PranavaMadhyastha LuciaSpecia 6440–6455 We present BERTGen, a novel, generative, decoder-only model which extends BERT by fusing multimodal and multilingual pre-trained models VL-BERT and M-BERT, respectively. BERTGen is auto-regressively trained for language generation tasks, namely image captioning, machine translation and multimodal machine translation, under a multi-task setting. With a comprehensive set of evaluations, we show that BERTGen outperforms many strong baselines across the tasks explored. We also show BERTGen’s ability for zero-shot language generation, where it exhibits competitive performance to supervised counterparts. Finally, we conduct ablation studies which demonstrate that BERTGen substantially benefits from multi-tasking and effectively transfers relevant inductive biases from the pre-trained models. @@ -7100,7 +7100,7 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO PatrickFernandes KayoYin GrahamNeubig - André F. T.Martins + André F. T.Martins 6467–6478 Recent work in neural machine translation has demonstrated both the necessity and feasibility of using inter-sentential context, context from sentences other than those currently being translated. However, while many current methods present model architectures that theoretically can use this extra context, it is often not clear how much they do actually utilize it at translation time. In this paper, we introduce a new metric, conditional cross-mutual information, to quantify usage of context by these models. Using this metric, we measure how much document-level machine translation systems use particular varieties of context. We find that target context is referenced more than source context, and that including more context has a diminishing affect on results. We then introduce a new, simple training method, context-aware word dropout, to increase the usage of context by context-aware models. Experiments show that our method not only increases context usage, but also improves the translation quality according to metrics such as BLEU and COMET, as well as performance on anaphoric pronoun resolution and lexical cohesion contrastive datasets. 2021.acl-long.505 @@ -7112,9 +7112,9 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO Beyond Offline Mapping: Learning Cross-lingual Word Embeddings through Context Anchoring AitorOrmazabal MikelArtetxe - AitorSoroa - GorkaLabaka - EnekoAgirre + AitorSoroa + GorkaLabaka + EnekoAgirre 6479–6489 Recent research on cross-lingual word embeddings has been dominated by unsupervised mapping approaches that align monolingual embeddings. Such methods critically rely on those embeddings having a similar structure, but it was recently shown that the separate training in different languages causes departures from this assumption. In this paper, we propose an alternative approach that does not have this limitation, while requiring a weak seed dictionary (e.g., a list of identical words) as the only form of supervision. Rather than aligning two fixed embedding spaces, our method works by fixing the target language embeddings, and learning a new set of embeddings for the source language that are aligned with them. To that end, we use an extension of skip-gram that leverages translated context words as anchor points, and incorporates self-learning and iterative restarts to reduce the dependency on the initial dictionary. Our approach outperforms conventional mapping methods on bilingual lexicon induction, and obtains competitive results in the downstream XNLI task. 2021.acl-long.506 @@ -7127,7 +7127,7 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO HolgerSchwenk GuillaumeWenzek SergeyEdunov - EdouardGrave + EdouardGrave ArmandJoulin AngelaFan 6490–6500 @@ -7255,7 +7255,7 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO FirojAlam FabrizioSilvestri HamedFirooz - PreslavNakov + PreslavNakov GiovanniDa San Martino 6603–6617 Propaganda can be defined as a form of communication that aims to influence the opinions or the actions of people towards a specific goal; this is achieved by means of well-defined rhetorical and psychological devices. Propaganda, in the form we know it today, can be dated back to the beginning of the 17th century. However, it is with the advent of the Internet and the social media that propaganda has started to spread on a much larger scale than before, thus becoming major societal and political issue. Nowadays, a large fraction of propaganda in social media is multimodal, mixing textual with visual content. With this in mind, here we propose a new multi-label multimodal task: detecting the type of propaganda techniques used in memes. We further create and release a new corpus of 950 memes, carefully annotated with 22 propaganda techniques, which can appear in the text, in the image, or in both. Our analysis of the corpus shows that understanding both modalities together is essential for detecting these techniques. This is further confirmed in our experiments with several state-of-the-art multimodal models. @@ -7268,8 +7268,8 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO On the Efficacy of Adversarial Data Collection for Question Answering: Results from a Large-Scale Randomized Study DivyanshKaushik DouweKiela - Zachary C.Lipton - Wen-tauYih + Zachary C.Lipton + Wen-tauYih 6618–6633 In adversarial data collection (ADC), a human workforce interacts with a model in real time, attempting to produce examples that elicit incorrect predictions. Researchers hope that models trained on these more challenging datasets will rely less on superficial patterns, and thus be less brittle. However, despite ADC’s intuitive appeal, it remains unclear when training on adversarial datasets produces more robust models. In this paper, we conduct a large-scale controlled study focused on question answering, assigning workers at random to compose questions either (i) adversarially (with a model in the loop); or (ii) in the standard fashion (without a model). Across a variety of models and datasets, we find that models trained on adversarial data usually perform better on other adversarial datasets but worse on a diverse collection of out-of-domain evaluation sets. Finally, we provide a qualitative analysis of adversarial (vs standard) data, identifying key differences and offering guidance for future research. 2021.acl-long.517 @@ -7310,7 +7310,7 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO Question Answering Over Temporal Knowledge Graphs ApoorvSaxena SoumenChakrabarti - ParthaTalukdar + ParthaTalukdar 6663–6676 Temporal Knowledge Graphs (Temporal KGs) extend regular Knowledge Graphs by providing temporal scopes (start and end times) on each edge in the KG. While Question Answering over KG (KGQA) has received some attention from the research community, QA over Temporal KGs (Temporal KGQA) is a relatively unexplored area. Lack of broad coverage datasets has been another factor limiting progress in this area. We address this challenge by presenting CRONQUESTIONS, the largest known Temporal KGQA dataset, clearly stratified into buckets of structural complexity. CRONQUESTIONS expands the only known previous dataset by a factor of 340x. We find that various state-of-the-art KGQA methods fall far short of the desired performance on this new dataset. In response, we also propose CRONKGQA, a transformer-based solution that exploits recent advances in Temporal KG embeddings, and achieves performance superior to all baselines, with an increase of 120% in accuracy over the next best performing method. Through extensive experiments, we give detailed insights into the workings of CRONKGQA, as well as situations where significant further improvements appear possible. In addition to the dataset, we have released our code as well. 2021.acl-long.520 @@ -7337,7 +7337,7 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO XimingLu SwabhaSwayamdipta ChandraBhagavatula - Noah A.Smith + Noah A.Smith YejinChoi 6691–6706 Despite recent advances in natural language generation, it remains challenging to control attributes of generated text. We propose DExperts: Decoding-time Experts, a decoding-time method for controlled text generation that combines a pretrained language model with “expert” LMs and/or “anti-expert” LMs in a product of experts. Intuitively, under the ensemble, tokens only get high probability if they are considered likely by the experts, and unlikely by the anti-experts. We apply DExperts to language detoxification and sentiment-controlled generation, where we outperform existing controllable generation methods on both automatic and human evaluations. Moreover, because DExperts operates only on the output of the pretrained LM, it is effective with (anti-)experts of smaller size, including when operating on GPT-3. Our work highlights the promise of tuning small LMs on text with (un)desirable attributes for efficient decoding-time steering. @@ -7349,9 +7349,9 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO Polyjuice: Generating Counterfactuals for Explaining, Evaluating, and Improving Models TongshuangWu - Marco TulioRibeiro + Marco TulioRibeiro JeffreyHeer - DanielWeld + DanielWeld 6707–6723 While counterfactual examples are useful for analysis and training of NLP models, current generation methods either rely on manual labor to create very few counterfactuals, or only instantiate limited types of perturbations such as paraphrases or word substitutions. We present Polyjuice, a general-purpose counterfactual generator that allows for control over perturbation types and locations, trained by finetuning GPT-2 on multiple datasets of paired sentences. We show that Polyjuice produces diverse sets of realistic counterfactuals, which in turn are useful in various distinct applications: improving training and evaluation on three different tasks (with around 70% less annotation effort than manual generation), augmenting state-of-the-art explanation techniques, and supporting systematic counterfactual error analysis by revealing behaviors easily missed by human experts. 2021.acl-long.523 @@ -7377,7 +7377,7 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO Learning Latent Structures for Cross Action Phrase Relations in Wet Lab Protocols ChaitanyaKulkarni JanyChan - EricFosler-Lussier + EricFosler-Lussier RaghuMachiraju 6737–6750 Wet laboratory protocols (WLPs) are critical for conveying reproducible procedures in biological research. They are composed of instructions written in natural language describing the step-wise processing of materials by specific actions. This process flow description for reagents and materials synthesis in WLPs can be captured by material state transfer graphs (MSTGs), which encode global temporal and causal relationships between actions. Here, we propose methods to automatically generate a MSTG for a given protocol by extracting all action relationships across multiple sentences. We also note that previous corpora and methods focused primarily on local intra-sentence relationships between actions and entities and did not address two critical issues: (i) resolution of implicit arguments and (ii) establishing long-range dependencies across sentences. We propose a new model that incrementally learns latent structures and is better suited to resolving inter-sentence relations and implicit arguments. This model draws upon a new corpus WLP-MSTG which was created by extending annotations in the WLP corpora for inter-sentence relations and implicit arguments. Our model achieves an F1 score of 54.53% for temporal and causal relations in protocols from our corpus, which is a significant improvement over previous models - DyGIE++:28.17%; spERT:27.81%. We make our annotated WLP-MSTG corpus available to the research community. @@ -7393,7 +7393,7 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO MihirGoyal PrakharGoel PuneetMathur - Rajiv RatnShah + Rajiv RatnShah 6751–6762 Risk prediction is an essential task in financial markets. Merger and Acquisition (M&A) calls provide key insights into the claims made by company executives about the restructuring of the financial firms. Extracting vocal and textual cues from M&A calls can help model the risk associated with such financial activities. To aid the analysis of M&A calls, we curate a dataset of conference call transcripts and their corresponding audio recordings for the time period ranging from 2016 to 2020. We introduce M3ANet, a baseline architecture that takes advantage of the multimodal multi-speaker input to forecast the financial risk associated with the M&A calls. Empirical results prove that the task is challenging, with the pro-posed architecture performing marginally better than strong BERT-based baselines. We release the M3A dataset and benchmark models to motivate future research on this challenging problem domain. 2021.acl-long.526 @@ -7406,7 +7406,7 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO RashadAlbo Jamara NicoHerbig AntonioKrüger - Josefvan Genabith + Josefvan Genabith 6763–6773 To translate large volumes of text in a globally connected world, more and more translators are integrating machine translation (MT) and post-editing (PE) into their translation workflows to generate publishable quality translations. While this process has been shown to save time and reduce errors, the task of translation is changing from mostly text production from scratch to fixing errors within useful but partly incorrect MT output. This is affecting the interface design of translation tools, where better support for text editing tasks is required. Here, we present the first study that investigates the usefulness of mid-air hand gestures in combination with the keyboard (GK) for text editing in PE of MT. Guided by a gesture elicitation study with 14 freelance translators, we develop a prototype supporting mid-air hand gestures for cursor placement, text selection, deletion, and reordering. These gestures combined with the keyboard facilitate all editing types required for PE. An evaluation of the prototype shows that the average editing duration of GK is only slightly slower than the standard mouse and keyboard (MK), even though participants are very familiar with the latter, and relative novices to the former. Furthermore, the qualitative analysis shows positive attitudes towards hand gestures for PE, especially when manipulating single words. 2021.acl-long.527 @@ -7422,7 +7422,7 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO LiangQiu SiyuanHuang XiaodanLiang - Song-ChunZhu + Song-ChunZhu 6774–6786 Geometry problem solving has attracted much attention in the NLP community recently. The task is challenging as it requires abstract problem understanding and symbolic reasoning with axiomatic knowledge. However, current datasets are either small in scale or not publicly available. Thus, we construct a new large-scale benchmark, Geometry3K, consisting of 3,002 geometry problems with dense annotation in formal language. We further propose a novel geometry solving approach with formal language and symbolic reasoning, called Interpretable Geometry Problem Solver (Inter-GPS). Inter-GPS first parses the problem text and diagram into formal language automatically via rule-based text parsing and neural object detecting, respectively. Unlike implicit learning in existing methods, Inter-GPS incorporates theorem knowledge as conditional rules and performs symbolic reasoning step by step. Also, a theorem predictor is designed to infer the theorem application sequence fed to the symbolic solver for the more efficient and reasonable searching path. Extensive experiments on the Geometry3K and GEOS datasets demonstrate that Inter-GPS achieves significant improvements over existing methods. The project with code and data is available at https://lupantech.github.io/inter-gps. 2021.acl-long.528 @@ -7432,11 +7432,11 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO Joint Verification and Reranking for Open Fact Checking Over Tables - Michael SejrSchlichtkrull + Michael SejrSchlichtkrull VladimirKarpukhin BarlasOguz MikeLewis - Wen-tauYih + Wen-tauYih SebastianRiedel 6787–6799 Structured information is an important knowledge source for automatic verification of factual claims. Nevertheless, the majority of existing research into this task has focused on textual data, and the few recent inquiries into structured data have been for the closed-domain setting where appropriate evidence for each claim is assumed to have already been retrieved. In this paper, we investigate verification over structured data in the open-domain setting, introducing a joint reranking-and-verification model which fuses evidence documents in the verification component. Our open-domain model achieves performance comparable to the closed-domain state-of-the-art on the TabFact dataset, and demonstrates performance gains from the inclusion of multiple tables as well as a significant improvement over a heuristic retrieval baseline. @@ -7512,13 +7512,13 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO <fixed-case>C</fixed-case>onvo<fixed-case>S</fixed-case>umm: Conversation Summarization Benchmark and Improved Abstractive Summarization with Argument Mining - AlexanderFabbri + AlexanderFabbri FaiazRahman ImadRizvi BoruiWang HaoranLi YasharMehdad - DragomirRadev + DragomirRadev 6866–6880 While online conversations can cover a vast amount of information in many different formats, abstractive text summarization has primarily focused on modeling solely news articles. This research gap is due, in part, to the lack of standardized datasets for summarizing online discussions. To address this gap, we design annotation protocols motivated by an issues–viewpoints–assertions framework to crowdsource four new datasets on diverse online conversation forms of news comments, discussion forums, community question answering forums, and email threads. We benchmark state-of-the-art models on our datasets and analyze characteristics associated with the data. To create a comprehensive benchmark, we also evaluate these models on widely-used conversation summarization datasets to establish strong baselines in this domain. Furthermore, we incorporate argument mining through graph construction to directly model the issues, viewpoints, and assertions present in a conversation and filter noisy input, showing comparable or improved results according to automatic and human evaluations. 2021.acl-long.535 @@ -7529,10 +7529,10 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO Improving Factual Consistency of Abstractive Summarization via Question Answering FengNan - CiceroNogueira dos Santos + CiceroNogueira dos Santos HenghuiZhu PatrickNg - KathleenMcKeown + KathleenMcKeown RameshNallapati DejiaoZhang ZhiguoWang @@ -7562,7 +7562,7 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO Cross-Lingual Abstractive Summarization with Limited Parallel Resources YuBai YangGao - HeyanHuang + HeyanHuang 6910–6924 Parallel cross-lingual summarization data is scarce, requiring models to better use the limited available cross-lingual resources. Existing methods to do so often adopt sequence-to-sequence networks with multi-task frameworks. Such approaches apply multiple decoders, each of which is utilized for a specific task. However, these independent decoders share no parameters, hence fail to capture the relationships between the discrete phrases of summaries in different languages, breaking the connections in order to transfer the knowledge of the high-resource languages to low-resource languages. To bridge these connections, we propose a novel Multi-Task framework for Cross-Lingual Abstractive Summarization (MCLAS) in a low-resource setting. Employing one unified decoder to generate the sequential concatenation of monolingual and cross-lingual summaries, MCLAS makes the monolingual summarization task a prerequisite of the CLS task. In this way, the shared decoder learns interactions involving alignments and summary patterns across languages, which encourages attaining knowledge transfer. Experiments on two CLS datasets demonstrate that our model significantly outperforms three baseline models in both low-resource and full-dataset scenarios. Moreover, in-depth analysis on the generated summaries and attention heads verifies that interactions are learned well using MCLAS, which benefits the CLS task under limited parallel resources. 2021.acl-long.538 @@ -7584,7 +7584,7 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO Learning Prototypical Functions for Physical Artifacts TianyuJiang - EllenRiloff + EllenRiloff 6941–6951 Humans create things for a reason. Ancient people created spears for hunting, knives for cutting meat, pots for preparing food, etc. The prototypical function of a physical artifact is a kind of commonsense knowledge that we rely on to understand natural language. For example, if someone says “She borrowed the book” then you would assume that she intends to read the book, or if someone asks “Can I use your knife?” then you would assume that they need to cut something. In this paper, we introduce a new NLP task of learning the prototypical uses for human-made physical objects. We use frames from FrameNet to represent a set of common functions for objects, and describe a manually annotated data set of physical objects labeled with their prototypical function. We also present experimental results for this task, including BERT-based models that use predictions from masked patterns as well as artifact sense definitions from WordNet and frame definitions from FrameNet. 2021.acl-long.540 @@ -7609,8 +7609,8 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO Dynamic Contextualized Word Embeddings ValentinHofmann - JanetPierrehumbert - HinrichSchütze + JanetPierrehumbert + HinrichSchütze 6970–6984 Static word embeddings that represent words by a single vector cannot capture the variability of word meaning in different linguistic and extralinguistic contexts. Building on prior work on contextualized and dynamic word embeddings, we introduce dynamic contextualized word embeddings that represent words as a function of both linguistic and extralinguistic context. Based on a pretrained language model (PLM), dynamic contextualized word embeddings model time and social space jointly, which makes them attractive for a range of NLP tasks involving semantic variability. We highlight potential application scenarios by means of qualitative and quantitative analyses on four English datasets. 2021.acl-long.542 @@ -7624,7 +7624,7 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO MaikePark DominikSchlechtweg JonasKuhn - SabineSchulte im Walde + SabineSchulte im Walde 6985–6998 While there is a large amount of research in the field of Lexical Semantic Change Detection, only few approaches go beyond a standard benchmark evaluation of existing models. In this paper, we propose a shift of focus from change detection to change discovery, i.e., discovering novel word senses over time from the full corpus vocabulary. By heavily fine-tuning a type-based and a token-based approach on recently published German data, we demonstrate that both models can successfully be applied to discover new words undergoing meaning change. Furthermore, we provide an almost fully automated framework for both evaluation and discovery. 2021.acl-long.543 @@ -7692,7 +7692,7 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO Cross-replication Reliability - An Empirical Approach to Interpreting Inter-rater Reliability KaWong - PraveenParitosh + PraveenParitosh LoraAroyo 7053–7065 When collecting annotations and labeled data from humans, a standard practice is to use inter-rater reliability (IRR) as a measure of data goodness (Hallgren, 2012). Metrics such as Krippendorff’s alpha or Cohen’s kappa are typically required to be above a threshold of 0.6 (Landis and Koch, 1977). These absolute thresholds are unreasonable for crowdsourced data from annotators with high cultural and training variances, especially on subjective topics. We present a new alternative to interpreting IRR that is more empirical and contextualized. It is based upon benchmarking IRR against baseline measures in a replication, one of which is a novel cross-replication reliability (xRR) measure based on Cohen’s (1960) kappa. We call this approach the xRR framework. We opensource a replication dataset of 4 million human judgements of facial expressions and analyze it with the proposed framework. We argue this framework can be used to measure the quality of crowdsourced datasets. @@ -7758,7 +7758,7 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO AlexandraOlteanu KaheerSuleman AdamTrischler - Jackie Chi KitCheung + Jackie Chi KitCheung 7117–7128 A false contract is more likely to be rejected than a contract is, yet a false key is less likely than a key to open doors. While correctly interpreting and assessing the effects of such adjective-noun pairs (e.g., false key) on the plausibility of given events (e.g., opening doors) underpins many natural language understanding tasks, doing so often requires a significant degree of world knowledge and common-sense reasoning. We introduce ADEPT – a large-scale semantic plausibility task consisting of over 16 thousand sentences that are paired with slightly modified versions obtained by adding an adjective to a noun. Overall, we find that while the task appears easier for human judges (85% accuracy), it proves more difficult for transformer-based models like RoBERTa (71% accuracy). Our experiments also show that neither the adjective itself nor its taxonomic class suffice in determining the correct plausibility judgement, emphasizing the importance of endowing automatic natural language understanding systems with more context sensitivity and common-sense reasoning. 2021.acl-long.553 @@ -7781,7 +7781,7 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO Conditional Generation of Temporally-ordered Event Sequences Shih-TingLin - NathanaelChambers + NathanaelChambers GregDurrett 7142–7157 Models of narrative schema knowledge have proven useful for a range of event-related tasks, but they typically do not capture the temporal relationships between events. We propose a single model that addresses both temporal ordering, sorting given events into the order they occurred, and event infilling, predicting new events which fit into an existing temporally-ordered sequence. We use a BART-based conditional generation model that can capture both temporality and common event co-occurrence, meaning it can be flexibly applied to different tasks in this space. Our model is trained as a denoising autoencoder: we take temporally-ordered event sequences, shuffle them, delete some events, and then attempt to recover the original event sequence. This task teaches the model to make inferences given incomplete knowledge about the events in an underlying scenario. On the temporal ordering task, we show that our model is able to unscramble event sequences from existing datasets without access to explicitly labeled temporal training data, outperforming both a BERT-based pairwise model and a BERT-based pointer network. On event infilling, human evaluation shows that our model is able to generate events that fit better temporally into the input events when compared to GPT-2 story completion models. @@ -7821,7 +7821,7 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO <fixed-case>S</fixed-case>pan<fixed-case>NER</fixed-case>: Named Entity Re-/Recognition as Span Prediction JinlanFu - XuanjingHuang + XuanjingHuang PengfeiLiu 7183–7195 Recent years have seen the paradigm shift of Named Entity Recognition (NER) systems from sequence labeling to span prediction. Despite its preliminary effectiveness, the span prediction model’s architectural bias has not been fully understood. In this paper, we first investigate the strengths and weaknesses when the span prediction model is used for named entity recognition compared with the sequence labeling framework and how to further improve it, which motivates us to make complementary advantages of systems based on different paradigms. We then reveal that span prediction, simultaneously, can serve as a system combiner to re-recognize named entities from different systems’ outputs. We experimentally implement 154 systems on 11 datasets, covering three languages, comprehensive results show the effectiveness of span prediction models that both serve as base NER systems and system combiners. We make all codes and datasets available: https://github.com/neulab/spanner, as well as an online system demo: http://spanner.sh. Our model also has been deployed into the ExplainaBoard platform, which allows users to flexibly perform a system combination of top-scoring systems in an interactive way: http://explainaboard.nlpedia.ai/leaderboard/task-ner/. @@ -7896,7 +7896,7 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO SiddharthKaramcheti RanjayKrishna LiFei-Fei - ChristopherManning + ChristopherManning 7265–7281 Active learning promises to alleviate the massive data needs of supervised machine learning: it has successfully improved sample efficiency by an order of magnitude on traditional tasks like topic classification and object recognition. However, we uncover a striking contrast to this promise: across 5 models and 4 datasets on the task of visual question answering, a wide variety of active learning approaches fail to outperform random selection. To understand this discrepancy, we profile 8 active learning methods on a per-example basis, and identify the problem as collective outliers – groups of examples that active learning methods prefer to acquire but models fail to learn (e.g., questions that ask about text in images or require external knowledge). Through systematic ablation experiments and qualitative visualizations, we verify that collective outliers are a general phenomenon responsible for degrading pool-based active learning. Notably, we show that active learning sample efficiency increases significantly as the number of collective outliers in the active learning pool decreases. We conclude with a discussion and prescriptive recommendations for mitigating the effects of these outliers in future work. 2021.acl-long.564 @@ -7912,7 +7912,7 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO SofiaSerrano NikitaHaduong SuchinGururangan - Noah A.Smith + Noah A.Smith 7282–7296 Human evaluations are typically considered the gold standard in natural language generation, but as models’ fluency improves, how well can evaluators detect and judge machine-generated text? We run a study assessing non-experts’ ability to distinguish between human- and machine-authored text (GPT2 and GPT3) in three domains (stories, news articles, and recipes). We find that, without training, evaluators distinguished between GPT3- and human-authored text at random chance level. We explore three approaches for quickly training evaluators to better identify GPT3-authored text (detailed instructions, annotated examples, and paired examples) and find that while evaluators’ accuracy improved up to 55%, it did not significantly improve across the three domains. Given the inconsistent results across text domains and the often contradictory reasons evaluators gave for their judgments, we examine the role untrained human evaluations play in NLG evaluation and provide recommendations to NLG researchers for improving human evaluations of text generated from state-of-the-art models. 2021.acl-long.565 @@ -7926,7 +7926,7 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO Scientific Credibility of Machine Translation Research: A Meta-Evaluation of 769 Papers BenjaminMarie AtsushiFujita - RaphaelRubino + RaphaelRubino 7297–7306 This paper presents the first large-scale meta-evaluation of machine translation (MT). We annotated MT evaluations conducted in 769 research papers published from 2010 to 2020. Our study shows that practices for automatic MT evaluation have dramatically changed during the past decade and follow concerning trends. An increasing number of MT evaluations exclusively rely on differences between BLEU scores to draw conclusions, without performing any kind of statistical significance testing nor human evaluation, while at least 108 metrics claiming to be better than BLEU have been proposed. MT evaluations in recent papers tend to copy and compare automatic metric scores from previous work to claim the superiority of a method or an algorithm without confirming neither exactly the same training, validating, and testing data have been used nor the metric scores are comparable. Furthermore, tools for reporting standardized metric scores are still far from being widely adopted by the MT community. After showing how the accumulation of these pitfalls leads to dubious evaluation, we propose a guideline to encourage better automatic MT evaluation along with a simple meta-evaluation scoring method to assess its credibility. 2021.acl-long.566 @@ -7954,7 +7954,7 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO Intrinsic Dimensionality Explains the Effectiveness of Language Model Fine-Tuning ArmenAghajanyan SonalGupta - LukeZettlemoyer + LukeZettlemoyer 7319–7328 Although pretrained language models can be fine-tuned to produce state-of-the-art results for a very wide range of language understanding tasks, the dynamics of this process are not well understood, especially in the low data regime. Why can we use relatively vanilla gradient descent algorithms (e.g., without strong regularization) to tune a model with hundreds of millions of parameters on datasets with only hundreds or thousands of labeled examples? In this paper, we argue that analyzing fine-tuning through the lens of intrinsic dimension provides us with empirical and theoretical intuitions to explain this remarkable phenomenon. We empirically show that common pre-trained models have a very low intrinsic dimension; in other words, there exists a low dimension reparameterization that is as effective for fine-tuning as the full parameter space. For example, by optimizing only 200 trainable parameters randomly projected back into the full space, we can tune a RoBERTa model to achieve 90% of the full parameter performance levels on MRPC. Furthermore, we empirically show that pre-training implicitly minimizes intrinsic dimension and, perhaps surprisingly, larger models tend to have lower intrinsic dimension after a fixed number of pre-training updates, at least in part explaining their extreme effectiveness. Lastly, we connect intrinsic dimensionality with low dimensional task representations and compression based generalization bounds to provide intrinsic-dimension-based generalization bounds that are independent of the full parameter count. 2021.acl-long.568 @@ -8010,7 +8010,7 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO Proceedings of the 59th Annual Meeting of the Association for Computational Linguistics and the 11th International Joint Conference on Natural Language Processing (Volume 2: Short Papers) - ChengqingZong + ChengqingZong FeiXia WenjieLi RobertoNavigli @@ -8116,7 +8116,7 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO Attention Flows are Shapley Value Explanations KawinEthayarajh - DanJurafsky + DanJurafsky 49–54 Shapley Values, a solution to the credit assignment problem in cooperative game theory, are a popular type of explanation in machine learning, having been used to explain the importance of features, embeddings, and even neurons. In NLP, however, leave-one-out and attention-based explanations still predominate. Can we draw a connection between these different methods? We formally prove that — save for the degenerate case — attention weights and leave-one-out values cannot be Shapley Values. Attention flow is a post-processed variant of attention weights obtained by running the max-flow algorithm on the attention graph. Perhaps surprisingly, we prove that attention flows are indeed Shapley Values, at least at the layerwise level. Given the many desirable theoretical qualities of Shapley Values — which has driven their adoption among the ML community — we argue that NLP practitioners should, when possible, adopt attention flow explanations alongside more traditional ones. 2021.acl-short.8 @@ -8207,7 +8207,7 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO DeniseDiaz KennethHeafield XianLi - MonaDiab + MonaDiab 99–109 Is bias amplified when neural machine translation (NMT) models are optimized for speed and evaluated on generic test sets using BLEU? We investigate architectures and techniques commonly used to speed up decoding in Transformer-based models, such as greedy search, quantization, average attention networks (AANs) and shallow decoder models and show their effect on gendered noun translation. We construct a new gender bias test set, SimpleGEN, based on gendered noun phrases in which there is a single, unambiguous, correct answer. While we find minimal overall BLEU degradation as we apply speed optimizations, we observe that gendered noun translation performance degrades at a much faster rate. 2021.acl-short.15 @@ -8381,7 +8381,7 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO HwanheeLee SeunghyunYoon FranckDernoncourt - TrungBui + TrungBui KyominJung 220–226 Despite the success of various text generation metrics such as BERTScore, it is still difficult to evaluate the image captions without enough reference captions due to the diversity of the descriptions. In this paper, we introduce a new metric UMIC, an Unreferenced Metric for Image Captioning which does not require reference captions to evaluate image captions. Based on Vision-and-Language BERT, we train UMIC to discriminate negative captions via contrastive learning. Also, we observe critical problems of the previous benchmark dataset (i.e., human annotations) on image captioning metric, and introduce a new collection of human annotations on the generated captions. We validate UMIC on four datasets, including our new dataset, and show that UMIC has a higher correlation than all previous metrics that require multiple references. @@ -8395,7 +8395,7 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO Anchor-based Bilingual Word Embeddings for Low-Resource Languages TobiasEder ViktorHangya - AlexanderFraser + AlexanderFraser 227–232 Good quality monolingual word embeddings (MWEs) can be built for languages which have large amounts of unlabeled text. MWEs can be aligned to bilingual spaces using only a few thousand word translation pairs. For low resource languages training MWEs monolingually results in MWEs of poor quality, and thus poor bilingual word embeddings (BWEs) as well. This paper proposes a new approach for building BWEs in which the vector space of the high resource source language is used as a starting point for training an embedding space for the low resource target language. By using the source vectors as anchors the vector spaces are automatically aligned during training. We experiment on English-German, English-Hiligaynon and English-Macedonian. We show that our approach results not only in improved BWEs and bilingual lexicon induction performance, but also in improved target language MWE quality as measured using monolingual word similarity. 2021.acl-short.30 @@ -8434,7 +8434,7 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO Reinforcement Learning for Abstractive Question Summarization with Question-aware Semantic Rewards ShwetaYadav - DeepakGupta + DeepakGupta AsmaBen Abacha DinaDemner-Fushman 249–255 @@ -8452,8 +8452,8 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO IbrahimAbdelaziz Young-SukLee PavanKapanipathi - SalimRoukos - AlfioGliozzo + SalimRoukos + AlfioGliozzo AlexanderGray 256–262 Relation linking is a crucial component of Knowledge Base Question Answering systems. Existing systems use a wide variety of heuristics, or ensembles of multiple systems, heavily relying on the surface question text. However, the explicit semantic parse of the question is a rich source of relation information that is not taken advantage of. We propose a simple transformer-based neural model for relation linking that leverages the AMR semantic parse of a sentence. Our system significantly outperforms the state-of-the-art on 4 popular benchmark datasets. These are based on either DBpedia or Wikidata, demonstrating that our approach is effective across KGs. @@ -8491,9 +8491,9 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO <fixed-case>MOLEMAN</fixed-case>: Mention-Only Linking of Entities with a Mention Annotation Network NicholasFitzGerald - DanBikel + DanBikel JanBotha - DanielGillick + DanielGillick TomKwiatkowski AndrewMcCallum 278–285 @@ -8547,7 +8547,7 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO RenshenWang YasuhisaFujii SiyangQin - AshokPopat + AshokPopat TomasPfister 314–321 Natural reading orders of words are crucial for information extraction from form-like documents. Despite recent advances in Graph Convolutional Networks (GCNs) on modeling spatial layout patterns of documents, they have limited ability to capture reading orders of given word-level node representations in a graph. We propose Reading Order Equivariant Positional Encoding (ROPE), a new positional encoding technique designed to apprehend the sequential presentation of words in documents. ROPE generates unique reading order codes for neighboring words relative to the target word given a word-level graph connectivity. We study two fundamental document entity extraction tasks including word labeling and word grouping on the public FUNSD dataset and a large-scale payment dataset. We show that ROPE consistently improves existing GCNs with a margin up to 8.4% F1-score. @@ -8609,8 +8609,8 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO Modeling Task-Aware <fixed-case>MIMO</fixed-case> Cardinality for Efficient Multilingual Neural Machine Translation HongfeiXu QiuhuiLiu - Josefvan Genabith - DeyiXiong + Josefvan Genabith + DeyiXiong 361–367 Neural machine translation has achieved great success in bilingual settings, as well as in multilingual settings. With the increase of the number of languages, multilingual systems tend to underperform their bilingual counterparts. Model capacity has been found crucial for massively multilingual NMT to support language pairs with varying typological characteristics. Previous work increases the modeling capacity by deepening or widening the Transformer. However, modeling cardinality based on aggregating a set of transformations with the same topology has been proven more effective than going deeper or wider when increasing capacity. In this paper, we propose to efficiently increase the capacity for multilingual NMT by increasing the cardinality. Unlike previous work which feeds the same input to several transformations and merges their outputs into one, we present a Multi-Input-Multi-Output (MIMO) architecture that allows each transformation of the block to have its own input. We also present a task-aware attention mechanism to learn to selectively utilize individual transformations from a set of transformations for different translation directions. Our model surpasses previous work and establishes a new state-of-the-art on the large scale OPUS-100 corpus while being 1.31 times as fast. 2021.acl-short.46 @@ -8626,7 +8626,7 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO ShujianHuang BoxingChen WeihuaLuo - JiajunChen + JiajunChen 368–374 kNN-MT, recently proposed by Khandelwal et al. (2020a), successfully combines pre-trained neural machine translation (NMT) model with token-level k-nearest-neighbor (kNN) retrieval to improve the translation accuracy. However, the traditional kNN algorithm used in kNN-MT simply retrieves a same number of nearest neighbors for each target token, which may cause prediction errors when the retrieved neighbors include noises. In this paper, we propose Adaptive kNN-MT to dynamically determine the number of k for each target token. We achieve this by introducing a light-weight Meta-k Network, which can be efficiently trained with only a few training samples. On four benchmark machine translation datasets, we demonstrate that the proposed method is able to effectively filter out the noises in retrieval results and significantly outperforms the vanilla kNN-MT model. Even more noteworthy is that the Meta-k Network learned on one domain could be directly applied to other domains and obtain consistent improvements, illustrating the generality of our method. Our implementation is open-sourced at https://github.com/zhengxxn/adaptive-knn-mt. 2021.acl-short.47 @@ -8705,7 +8705,7 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO Discrete Cosine Transform as Universal Sentence Encoder NadaAlmarwani - MonaDiab + MonaDiab 419–426 Modern sentence encoders are used to generate dense vector representations that capture the underlying linguistic characteristics for a sequence of words, including phrases, sentences, or paragraphs. These kinds of representations are ideal for training a classifier for an end task such as sentiment analysis, question answering and text classification. Different models have been proposed to efficiently generate general purpose sentence representations to be used in pretraining protocols. While averaging is the most commonly used efficient sentence encoder, Discrete Cosine Transform (DCT) was recently proposed as an alternative that captures the underlying syntactic characteristics of a given text without compromising practical efficiency compared to averaging. However, as with most other sentence encoders, the DCT sentence encoder was only evaluated in English. To this end, we utilize DCT encoder to generate universal sentence representation for different languages such as German, French, Spanish and Russian. The experimental results clearly show the superior effectiveness of DCT encoding in which consistent performance improvements are achieved over strong baselines on multiple standardized datasets 2021.acl-short.53 @@ -8728,8 +8728,8 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO An Exploratory Analysis of Multilingual Word-Level Quality Estimation with Cross-Lingual Transformers TharinduRanasinghe - ConstantinOrasan - RuslanMitkov + ConstantinOrasan + RuslanMitkov 434–440 Most studies on word-level Quality Estimation (QE) of machine translation focus on language-specific models. The obvious disadvantages of these approaches are the need for labelled data for each language pair and the high cost required to maintain several language-specific models. To overcome these problems, we explore different approaches to multilingual, word-level QE. We show that multilingual QE models perform on par with the current language-specific models. In the cases of zero-shot and few-shot QE, we demonstrate that it is possible to accurately predict word-level quality for any given new language pair from models trained on other language pairs. Our findings suggest that the word-level QE models based on powerful pre-trained transformers that we propose in this paper generalise well across languages, making them more useful in real-world scenarios. 2021.acl-short.55 @@ -8743,7 +8743,7 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO ChongLi CenyuanZhang XiaoqingZheng - XuanjingHuang + XuanjingHuang 441–446 A sequence-to-sequence learning with neural networks has empirically proven to be an effective framework for Chinese Spelling Correction (CSC), which takes a sentence with some spelling errors as input and outputs the corrected one. However, CSC models may fail to correct spelling errors covered by the confusion sets, and also will encounter unseen ones. We propose a method, which continually identifies the weak spots of a model to generate more valuable training instances, and apply a task-specific pre-training strategy to enhance the model. The generated adversarial examples are gradually added to the training set. Experimental results show that such an adversarial training method combined with the pre-training strategy can improve both the generalization and robustness of multiple CSC models across three different datasets, achieving state-of-the-art performance for CSC task. 2021.acl-short.56 @@ -8768,7 +8768,7 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO An Empirical Study on Adversarial Attack on <fixed-case>NMT</fixed-case>: Languages and Positions Matter ZhiyuanZeng - DeyiXiong + DeyiXiong 454–460 In this paper, we empirically investigate adversarial attack on NMT from two aspects: languages (the source vs. the target language) and positions (front vs. rear). For autoregressive NMT models that generate target words from left to right, we observe that adversarial attack on the source language is more effective than on the target language, and that attacking front positions of target sentences or positions of source sentences aligned to the front positions of corresponding target sentences is more effective than attacking other positions. We further exploit the attention distribution of the victim model to attack source sentences at positions that have a strong association with front target words. Experiment results demonstrate that our attention-based adversarial attack is more effective than adversarial attacks by sampling positions randomly or according to gradients. 2021.acl-short.58 @@ -8779,7 +8779,7 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO <fixed-case>O</fixed-case>nto<fixed-case>GUM</fixed-case>: Evaluating Contextualized <fixed-case>SOTA</fixed-case> Coreference Resolution on 12 More Genres YilunZhu - SameerPradhan + SameerPradhan AmirZeldes 461–467 SOTA coreference resolution produces increasingly impressive scores on the OntoNotes benchmark. However lack of comparable data following the same scheme for more genres makes it difficult to evaluate generalizability to open domain data. This paper provides a dataset and comprehensive evaluation showing that the latest neural LM based end-to-end systems degrade very substantially out of domain. We make an OntoNotes-like coreference dataset called OntoGUM publicly available, converted from GUM, an English corpus covering 12 genres, using deterministic rules, which we evaluate. Thanks to the rich syntactic and discourse annotations in GUM, we are able to create the largest human-annotated coreference corpus following the OntoNotes guidelines, and the first to be evaluated for consistency with the OntoNotes scheme. Out-of-domain evaluation across 12 genres shows nearly 15-20% degradation for both deterministic and deep learning systems, indicating a lack of generalizability or covert overfitting in existing coreference resolution models. @@ -8863,7 +8863,7 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO YijinLiu FandongMeng JiajunZhang - JinanXu + JinanXu JieZhou 511–516 Recently, token-level adaptive training has achieved promising improvement in machine translation, where the cross-entropy loss function is adjusted by assigning different training weights to different tokens, in order to alleviate the token imbalance problem. However, previous approaches only use static word frequency information in the target language without considering the source language, which is insufficient for bilingual tasks like machine translation. In this paper, we propose a novel bilingual mutual information (BMI) based adaptive objective, which measures the learning difficulty for each target token from the perspective of bilingualism, and assigns an adaptive weight accordingly to improve token-level adaptive training. This method assigns larger training weights to tokens with higher BMI, so that easy tokens are updated with coarse granularity while difficult tokens are updated with fine granularity. Experimental results on WMT14 English-to-German and WMT19 Chinese-to-English demonstrate the superiority of our approach compared with the Transformer baseline and previous token-level adaptive training approaches. Further analyses confirm that our method can improve the lexical diversity. @@ -8894,7 +8894,7 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO RajivJain FranckDernoncourt VladMorariu - Quan HungTran + Quan HungTran DineshManocha 524–533 We present TIMERS - a TIME, Rhetorical and Syntactic-aware model for document-level temporal relation classification in the English language. Our proposed method leverages rhetorical discourse features and temporal arguments from semantic role labels, in addition to traditional local syntactic features, trained through a Gated Relational-GCN. Extensive experiments show that the proposed model outperforms previous methods by 5-18% on the TDDiscourse, TimeBank-Dense, and MATRES datasets due to our discourse-level modeling. @@ -8921,8 +8921,8 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO JiahuanLi YutongShen ShujianHuang - XinyuDai - JiajunChen + XinyuDai + JiajunChen 543–549 Subword segmentation algorithms have been a de facto choice when building neural machine translation systems. However, most of them need to learn a segmentation model based on some heuristics, which may produce sub-optimal segmentation. This can be problematic in some scenarios when the target language has rich morphological changes or there is not enough data for learning compact composition rules. Translating at fully character level has the potential to alleviate the issue, but empirical performances of character-based models has not been fully explored. In this paper, we present an in-depth comparison between character-based and subword-based NMT systems under three settings: translating to typologically diverse languages, training with low resource, and adapting to unseen domains. Experiment results show strong competitiveness of character-based models. Further analyses show that compared to subword-based models, character-based models are better at handling morphological phenomena, generating rare and unknown words, and more suitable for transferring to unseen domains. 2021.acl-short.69 @@ -8936,8 +8936,8 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO ZhengHu ShoushanLi HanqianWu - QiaomingZhu - GuodongZhou + QiaomingZhu + GuodongZhou 550–557 Chinese word segmentation (CWS) is undoubtedly an important basic task in natural language processing. Previous works only focus on the textual modality, but there are often audio and video utterances (such as news broadcast and face-to-face dialogues), where textual, acoustic and visual modalities normally exist. To this end, we attempt to combine the multi-modality (mainly the converted text and actual voice information) to perform CWS. In this paper, we annotate a new dataset for CWS containing text and audio. Moreover, we propose a time-dependent multi-modal interactive model based on Transformer framework to integrate multi-modal information for word sequence labeling. The experimental results on three different training sets show the effectiveness of our approach with fusing text and audio. 2021.acl-short.70 @@ -9011,7 +9011,7 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO Don’t Let Discourse Confine Your Model: Sequence Perturbations for Improved Event Language Models MahnazKoupaee GregDurrett - NathanaelChambers + NathanaelChambers NiranjanBalasubramanian 599–604 Event language models represent plausible sequences of events. Most existing approaches train autoregressive models on text, which successfully capture event co-occurrence but unfortunately constrain the model to follow the discourse order in which events are presented. Other domains may employ different discourse orders, and for many applications, we may care about different notions of ordering (e.g., temporal) or not care about ordering at all (e.g., when predicting related events in a schema). We propose a simple yet surprisingly effective strategy for improving event language models by perturbing event sequences so we can relax model dependence on text order. Despite generating completely synthetic event orderings, we show that this technique improves the performance of the event language models on both applications and out-of-domain events data. @@ -9099,7 +9099,7 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO <fixed-case>QA</fixed-case>-Driven Zero-shot Slot Filling with Weak Supervision Pretraining XinyaDu LuhengHe - QiLi + QiLi DianYu PanupongPasupat YuanZhang @@ -9164,7 +9164,7 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO Question Generation for Adaptive Education MeghaSrivastava - NoahGoodman + NoahGoodman 692–701 Intelligent and adaptive online education systems aim to make high-quality education available for a diverse range of students. However, existing systems usually depend on a pool of hand-made questions, limiting how fine-grained and open-ended they can be in adapting to individual students. We explore targeted question generation as a controllable sequence generation task. We first show how to fine-tune pre-trained language models for deep knowledge tracing (LM-KT). This model accurately predicts the probability of a student answering a question correctly, and generalizes to questions not seen in training. We then use LM-KT to specify the objective and data for training a model to generate questions conditioned on the student and target difficulty. Our results show we succeed at generating novel, well-calibrated language translation questions for second language learners from a real online education platform. 2021.acl-short.88 @@ -9193,7 +9193,7 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO WenlanWei YichenZhang LuntianMou - EricXing + EricXing PengtaoXie 708–718 Pathology imaging is broadly used for identifying the causes and effects of diseases or injuries. Given a pathology image, being able to answer questions about the clinical findings contained in the image is very important for medical decision making. In this paper, we aim to develop a pathological visual question answering framework to analyze pathology images and answer medical questions related to these images. To build such a framework, we create PathVQA, a VQA dataset with 32,795 questions asked from 4,998 pathology images. We also propose a three-level optimization framework which performs self-supervised pretraining and VQA finetuning end-to-end to learn powerful visual and textual representations jointly and automatically identifies and excludes noisy self-supervised examples from pretraining. We perform experiments on our created PathVQA dataset and the results demonstrate the effectiveness of our proposed methods. The datasets and code are available at https://github.com/UCSD-AI4H/PathVQA @@ -9220,7 +9220,7 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO m<fixed-case>TVR</fixed-case>: Multilingual Moment Retrieval in Videos JieLei - TamaraBerg + TamaraBerg MohitBansal 726–734 We introduce mTVR, a large-scale multilingual video moment retrieval dataset, containing 218K English and Chinese queries from 21.8K TV show video clips. The dataset is collected by extending the popular TVR dataset (in English) with paired Chinese queries and subtitles. Compared to existing moment retrieval datasets, mTVR is multilingual, larger, and comes with diverse annotations. We further propose mXML, a multilingual moment retrieval model that learns and operates on data from both languages, via encoder parameter sharing and language neighborhood constraints. We demonstrate the effectiveness of mXML on the newly collected mTVR dataset, where mXML outperforms strong monolingual baselines while using fewer parameters. In addition, we also provide detailed dataset analyses and model ablations. Data and code are publicly available at https://github.com/jayleicn/mTVRetrieval @@ -9258,7 +9258,7 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO Quotation Recommendation and Interpretation Based on Transformation from Queries to Quotations LingzhiWang XingshanZeng - Kam-FaiWong + Kam-FaiWong 754–758 To help individuals express themselves better, quotation recommendation is receiving growing attention. Nevertheless, most prior efforts focus on modeling quotations and queries separately and ignore the relationship between the quotations and the queries. In this work, we introduce a transformation matrix that directly maps the query representations to quotation representations. To better learn the mapping relationship, we employ a mapping loss that minimizes the distance of two semantic spaces (one for quotation and another for mapped-query). Furthermore, we explore using the words in history queries to interpret the figurative language of quotations, where quotation-aware attention is applied on top of history queries to highlight the indicator words. Experiments on two datasets in English and Chinese show that our model outperforms previous state-of-the-art models. 2021.acl-short.95 @@ -9306,9 +9306,9 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO Issues with Entailment-based Zero-shot Text Classification TingtingMa - Jin-GeYao - Chin-YewLin - TiejunZhao + Jin-GeYao + Chin-YewLin + TiejunZhao 786–796 The general format of natural language inference (NLI) makes it tempting to be used for zero-shot text classification by casting any target label into a sentence of hypothesis and verifying whether or not it could be entailed by the input, aiming at generic classification applicable on any specified label space. In this opinion piece, we point out a few overlooked issues that are yet to be discussed in this line of work. We observe huge variance across different classification datasets amongst standard BERT-based NLI models and surprisingly find that pre-trained BERT without any fine-tuning can yield competitive performance against BERT fine-tuned for NLI. With the concern that these models heavily rely on spurious lexical patterns for prediction, we also experiment with preliminary approaches for more robust NLI, but the results are in general negative. Our observations reveal implicit but challenging difficulties in entailment-based zero-shot text classification. 2021.acl-short.99 @@ -9322,7 +9322,7 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO FarhadMoghimifar LizhenQu Terry YueZhuo - GholamrezaHaffari + GholamrezaHaffari MahsaBaktashmotlagh 797–802 Commonsense reasoning aims to incorporate sets of commonsense facts, retrieved from Commonsense Knowledge Graphs (CKG), to draw conclusion about ordinary situations. The dynamic nature of commonsense knowledge postulates models capable of performing multi-hop reasoning over new situations. This feature also results in having large-scale sparse Knowledge Graphs, where such reasoning process is needed to predict relations between new events. However, existing approaches in this area are limited by considering CKGs as a limited set of facts, thus rendering them unfit for reasoning over new unseen situations and events. In this paper, we present a neural-symbolic reasoner, which is capable of reasoning over large-scale dynamic CKGs. The logic rules for reasoning over CKGs are learned during training by our model. In addition to providing interpretable explanation, the learned logic rules help to generalise prediction to newly introduced events. Experimental results on the task of link prediction on CKGs prove the effectiveness of our model by outperforming the state-of-the-art models. @@ -9333,7 +9333,7 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO What Motivates You? Benchmarking Automatic Detection of Basic Needs from Short Posts - SanjaStajner + SanjaStajner SerenYenikent BilalGhanem MarcFranco-Salvador @@ -9363,7 +9363,7 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO ChanghanWang JiataoGu DidierSchwab - LaurentBesacier + LaurentBesacier 817–824 Adapter modules were recently introduced as an efficient alternative to fine-tuning in NLP. Adapter tuning consists in freezing pre-trained parameters of a model and injecting lightweight modules between layers, resulting in the addition of only a small number of task-specific trainable parameters. While adapter tuning was investigated for multilingual neural machine translation, this paper proposes a comprehensive analysis of adapters for multilingual speech translation (ST). Starting from different pre-trained models (a multilingual ST trained on parallel data or a multilingual BART (mBART) trained on non parallel multilingual data), we show that adapters can be used to: (a) efficiently specialize ST to specific language pairs with a low extra cost in terms of parameters, and (b) transfer from an automatic speech recognition (ASR) task and an mBART pre-trained model to a multilingual ST task. Experiments show that adapter tuning offer competitive results to full fine-tuning, while being much more parameter-efficient. 2021.acl-short.103 @@ -9402,7 +9402,7 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO GiuseppeCastellucci SimoneFilice DaniloCroce - RobertoBasili + RobertoBasili 837–847 In real scenarios, a multilingual model trained to solve NLP tasks on a set of languages can be required to support new languages over time. Unfortunately, the straightforward retraining on a dataset containing annotated examples for all the languages is both expensive and time-consuming, especially when the number of target languages grows. Moreover, the original annotated material may no longer be available due to storage or business constraints. Re-training only with the new language data will inevitably result in Catastrophic Forgetting of previously acquired knowledge. We propose a Continual Learning strategy that updates a model to support new languages over time, while maintaining consistent results on previously learned languages. We define a Teacher-Student framework where the existing model “teaches” to a student model its knowledge about the languages it supports, while the student is also trained on a new language. We report an experimental evaluation in several tasks including Sentence Classification, Relational Learning and Sequence Labeling. 2021.acl-short.106 @@ -9440,7 +9440,7 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO Johannes MarioMeissner NapatThumwanit SakuSugawara - AkikoAizawa + AkikoAizawa 862–869 Natural Language Inference (NLI) datasets contain examples with highly ambiguous labels. While many research works do not pay much attention to this fact, several recent efforts have been made to acknowledge and embrace the existence of ambiguity, such as UNLI and ChaosNLI. In this paper, we explore the option of training directly on the estimated label distribution of the annotators in the NLI task, using a learning loss based on this ambiguity distribution instead of the gold-labels. We prepare AmbiNLI, a trial dataset obtained from readily available sources, and show it is possible to reduce ChaosNLI divergence scores when finetuning on this data, a promising first step towards learning how to capture linguistic ambiguity. Additionally, we show that training on the same amount of data but targeting the ambiguity distribution instead of gold-labels can result in models that achieve higher performance and learn better representations for downstream tasks. 2021.acl-short.109 @@ -9498,7 +9498,7 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO QingyangWu ZhouYu KunXu - EricXing + EricXing PengtaoXie 886–896 Under the pandemic of COVID-19, people experiencing COVID19-related symptoms have a pressing need to consult doctors. Because of the shortage of medical professionals, many people cannot receive online consultations timely. To address this problem, we aim to develop a medical dialog system that can provide COVID19-related consultations. We collected two dialog datasets – CovidDialog – (in English and Chinese respectively) containing conversations between doctors and patients about COVID-19. While the largest of their kind, these two datasets are still relatively small compared with general-domain dialog datasets. Training complex dialog generation models on small datasets bears high risk of overfitting. To alleviate overfitting, we develop a multi-task learning approach, which regularizes the data-deficient dialog generation task with a masked token prediction task. Experiments on the CovidDialog datasets demonstrate the effectiveness of our approach. We perform both human evaluation and automatic evaluation of dialogs generated by our method. Results show that the generated responses are promising in being doctor-like, relevant to conversation history, clinically informative and correct. The code and the data are available at https://github.com/UCSD-AI4H/COVID-Dialogue. @@ -9513,7 +9513,7 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO SuwonShin JaegulChoo Ho-JinChoi - Sung-HyonMyaeng + Sung-HyonMyaeng 897–906 In multi-modal dialogue systems, it is important to allow the use of images as part of a multi-turn conversation. Training such dialogue systems generally requires a large-scale dataset consisting of multi-turn dialogues that involve images, but such datasets rarely exist. In response, this paper proposes a 45k multi-modal dialogue dataset created with minimal human intervention. Our method to create such a dataset consists of (1) preparing and pre-processing text dialogue datasets, (2) creating image-mixed dialogues by using a text-to-image replacement technique, and (3) employing a contextual-similarity-based filtering step to ensure the contextual coherence of the dataset. To evaluate the validity of our dataset, we devise a simple retrieval model for dialogue sentence prediction tasks. Automatic metrics and human evaluation results on such tasks show that our dataset can be effectively used as training data for multi-modal dialogue systems which require an understanding of images and text in a context-aware manner. Our dataset and generation code is available at https://github.com/shh1574/multi-modal-dialogue-dataset. 2021.acl-short.113 @@ -9611,7 +9611,7 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO Sequence to General Tree: Knowledge-Guided Geometry Word Problem Solving Shih-hungTsai Chao-ChunLiang - Hsin-MinWang + Hsin-MinWang Keh-YihSu 964–972 With the recent advancements in deep learning, neural solvers have gained promising results in solving math word problems. However, these SOTA solvers only generate binary expression trees that contain basic arithmetic operators and do not explicitly use the math formulas. As a result, the expression trees they produce are lengthy and uninterpretable because they need to use multiple operators and constants to represent one single formula. In this paper, we propose sequence-to-general tree (S2G) that learns to generate interpretable and executable operation trees where the nodes can be formulas with an arbitrary number of arguments. With nodes now allowed to be formulas, S2G can learn to incorporate mathematical domain knowledge into problem-solving, making the results more interpretable. Experiments show that S2G can achieve a better performance against strong baselines on problems that require domain knowledge. @@ -9693,7 +9693,7 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO VanessaYan TianxiaoLi RihaoQu - DragomirRadev + DragomirRadev 1005–1011 Learning prerequisite chains is an important task for one to pick up knowledge efficiently in both known and unknown domains. For example, one may be an expert in the natural language processing (NLP) domain, but want to determine the best order in which to learn new concepts in an unfamiliar Computer Vision domain (CV). Both domains share some common concepts, such as machine learning basics and deep learning models. In this paper, we solve the task of unsupervised cross-domain concept prerequisite chain learning, using an optimized variational graph autoencoder. Our model learns to transfer concept prerequisite relations from an information-rich domain (source domain) to an information-poor domain (target domain), substantially surpassing other baseline models. In addition, we expand an existing dataset by introducing two new domains—-CV and Bioinformatics (BIO). The annotated data and resources as well as the code will be made publicly available. 2021.acl-short.127 @@ -9744,8 +9744,8 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO HaoyangWen AnthonyFerritto HengJi - RaduFlorian - AviSil + RaduFlorian + AviSil 1035–1042 Existing models on Machine Reading Comprehension (MRC) require complex model architecture for effectively modeling long texts with paragraph representation and classification, thereby making inference computationally inefficient for production use. In this work, we propose VAULT: a light-weight and parallel-efficient paragraph representation for MRC based on contextualized representation from long document input, trained using a new Gaussian distribution-based objective that pays close attention to the partially correct instances that are close to the ground-truth. We validate our VAULT architecture showing experimental results on two benchmark MRC datasets that require long context modeling; one Wikipedia-based (Natural Questions (NQ)) and the other on TechNotes (TechQA). VAULT can achieve comparable performance on NQ with a state-of-the-art (SOTA) complex document modeling approach while being 16 times faster, demonstrating the efficiency of our proposed model. We also demonstrate that our model can also be effectively adapted to a completely different domain – TechQA – with large improvement over a model fine-tuned on a previously published large PLM. 2021.acl-short.131 @@ -9781,7 +9781,7 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO PhilippeLaban LukeDai LucasBandarkar - Marti A.Hearst + Marti A.Hearst 1058–1064 The Shuffle Test is the most common task to evaluate whether NLP models can measure coherence in text. Most recent work uses direct supervision on the task; we show that by simply finetuning a RoBERTa model, we can achieve a near perfect accuracy of 97.8%, a state-of-the-art. We argue that this outstanding performance is unlikely to lead to a good model of text coherence, and suggest that the Shuffle Test should be approached in a Zero-Shot setting: models should be evaluated without being trained on the task itself. We evaluate common models in this setting, such as Generative and Bi-directional Transformers, and find that larger architectures achieve high-performance out-of-the-box. Finally, we suggest the k-Block Shuffle Test, a modification of the original by increasing the size of blocks shuffled. Even though human reader performance remains high (around 95% accuracy), model performance drops from 94% to 78% as block size increases, creating a conceptually simple challenge to benchmark NLP models. 2021.acl-short.134 @@ -9831,7 +9831,7 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO <fixed-case>R</fixed-case>eplicating and Extending “<fixed-case>B</fixed-case>ecause Their Treebanks Leak”: <fixed-case>G</fixed-case>raph Isomorphism, Covariants, and Parser Performance MarkAnderson - AndersSøgaard + AndersSøgaard CarlosGómez-Rodríguez 1090–1098 Søgaard (2020) obtained results suggesting the fraction of trees occurring in the test data isomorphic to trees in the training set accounts for a non-trivial variation in parser performance. Similar to other statistical analyses in NLP, the results were based on evaluating linear regressions. However, the study had methodological issues and was undertaken using a small sample size leading to unreliable results. We present a replication study in which we also bin sentences by length and find that only a small subset of sentences vary in performance with respect to graph isomorphism. Further, the correlation observed between parser performance and graph isomorphism in the wild disappears when controlling for covariants. However, in a controlled experiment, where covariants are kept fixed, we do observe a correlation. We suggest that conclusions drawn from statistical analyses like this need to be tempered and that controlled experiments can complement them by more readily teasing factors apart. @@ -9845,7 +9845,7 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO Don’t Rule Out Monolingual Speakers: <fixed-case>A</fixed-case> Method For Crowdsourcing Machine Translation Data RajatBhatnagar AnanyaGanesh - KatharinaKann + KatharinaKann 1099–1106 High-performing machine translation (MT) systems can help overcome language barriers while making it possible for everyone to communicate and use language technologies in the language of their choice. However, such systems require large amounts of parallel sentences for training, and translators can be difficult to find and expensive. Here, we present a data collection strategy for MT which, in contrast, is cheap and simple, as it does not require bilingual speakers. Based on the insight that humans pay specific attention to movements, we use graphics interchange formats (GIFs) as a pivot to collect parallel sentences from monolingual annotators. We use our strategy to collect data in Hindi, Tamil and English. As a baseline, we also collect data using images as a pivot. We perform an intrinsic evaluation by manually evaluating a subset of the sentence pairs and an extrinsic evaluation by finetuning mBART (Liu et al., 2020) on the collected data. We find that sentences collected via GIFs are indeed of higher quality. 2021.acl-short.139 @@ -9879,7 +9879,7 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO DavidThulke WeiyueWang ChristianDugast - HermannNey + HermannNey 1–15 Data processing is an important step in various natural language processing tasks. As the commonly used datasets in named entity recognition contain only a limited number of samples, it is important to obtain additional labeled data in an efficient and reliable manner. A common practice is to utilize large monolingual unlabeled corpora. Another popular technique is to create synthetic data from the original labeled data (data augmentation). In this work, we investigate the impact of these two methods on the performance of three different named entity recognition tasks. 2021.acl-srw.1 @@ -9891,7 +9891,7 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO Stage-wise Fine-tuning for Graph-to-Text Generation QingyunWang SemihYavuz - Xi VictoriaLin + Xi VictoriaLin HengJi NazneenRajani 16–22 @@ -9907,7 +9907,7 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO WeiyueWang ZijianYang YingboGao - HermannNey + HermannNey 23–32 The neural hidden Markov model has been proposed as an alternative to attention mechanism in machine translation with recurrent neural networks. However, since the introduction of the transformer models, its performance has been surpassed. This work proposes to introduce the concept of the hidden Markov model to the transformer architecture, which outperforms the transformer baseline. Interestingly, we find that the zero-order model already provides promising performance, giving it an edge compared to a model with first-order dependency, which performs similarly but is significantly slower in training and decoding. 2021.acl-srw.3 @@ -10011,7 +10011,7 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO “<fixed-case>I</fixed-case>’ve Seen Things You People Wouldn’t Believe”: Hallucinating Entities in <fixed-case>G</fixed-case>uess<fixed-case>W</fixed-case>hat?! AlbertoTestoni - RaffaellaBernardi + RaffaellaBernardi 101–111 Natural language generation systems have witnessed important progress in the last years, but they are shown to generate tokens that are unrelated to the source input. This problem affects computational models in many NLP tasks, and it is particularly unpleasant in multimodal systems. In this work, we assess the rate of object hallucination in multimodal conversational agents playing the GuessWhat?! referential game. Better visual processing has been shown to mitigate this issue in image captioning; hence, we adapt to the GuessWhat?! task the best visual processing models at disposal, and propose two new models to play the Questioner agent. We show that the new models generate few hallucinations compared to other renowned models available in the literature. Moreover, their hallucinations are less severe (affect task-accuracy less) and are more human-like. We also analyse where hallucinations tend to occur more often through the dialogue: hallucinations are less frequent in earlier turns, cause a cascade hallucination effect, and are often preceded by negative answers, which have been shown to be harder to ground. 2021.acl-srw.11 @@ -10023,7 +10023,7 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO How do different factors Impact the Inter-language Similarity? A Case Study on <fixed-case>I</fixed-case>ndian languages SouravKumar SalilAggarwal - Dipti MisraSharma + Dipti MisraSharma RadhikaMamidi 112–118 India is one of the most linguistically diverse nations of the world and is culturally very rich. Most of these languages are somewhat similar to each other on account of sharing a common ancestry or being in contact for a long period of time. Nowadays, researchers are constantly putting efforts in utilizing the language relatedness to improve the performance of various NLP systems such as cross lingual semantic search, machine translation, sentiment analysis systems, etc. So in this paper, we performed an extensive case study on similarity involving languages of the Indian subcontinent. Language similarity prediction is defined as the task of measuring how similar the two languages are on the basis of their lexical, morphological and syntactic features. In this study, we concentrate only on the approach to calculate lexical similarity between Indian languages by looking at various factors such as size and type of corpus, similarity algorithms, subword segmentation, etc. The main takeaways from our work are: (i) Relative order of the language similarities largely remain the same, regardless of the factors mentioned above, (ii) Similarity within the same language family is higher, (iii) Languages share more lexical features at the subword level. @@ -10035,7 +10035,7 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO <fixed-case>COVID</fixed-case>-19 and Misinformation: A Large-Scale Lexical Analysis on <fixed-case>T</fixed-case>witter DimosthenisAntypas - JoseCamacho-Collados + JoseCamacho-Collados AlunPreece DavidRogers 119–126 @@ -10115,7 +10115,7 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO SmritiSingh TanviAnand ArijitGhosh Chowdhury - ZeerakWaseem + ZeerakWaseem 180–185 Television shows play an important role inpropagating societal norms. Owing to the popularity of the situational comedy (sitcom) genre, it contributes significantly to the over-all development of society. In an effort to analyze the content of television shows belong-ing to this genre, we present a dataset of dialogue turns from popular sitcoms annotated for the presence of sexist remarks. We train a text classification model to detect sexism using domain adaptive learning. We apply the model to our dataset to analyze the evolution of sexist content over the years. We propose a domain-specific semi-supervised architecture for the aforementioned detection of sexism. Through extensive experiments, we show that our model often yields better classification performance over generic deep learn-ing based sentence classification that does not employ domain-specific training. We find that while sexism decreases over time on average,the proportion of sexist dialogue for the most sexist sitcom actually increases. A quantitative analysis along with a detailed error analysis presents the case for our proposed methodology 2021.acl-srw.19 @@ -10139,7 +10139,7 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO Improving the Robustness of <fixed-case>QA</fixed-case> Models to Challenge Sets with Variational Question-Answer Pair Generation KazutoshiShinoda SakuSugawara - AkikoAizawa + AkikoAizawa 197–214 Question answering (QA) models for reading comprehension have achieved human-level accuracy on in-distribution test sets. However, they have been demonstrated to lack robustness to challenge sets, whose distribution is different from that of training sets. Existing data augmentation methods mitigate this problem by simply augmenting training sets with synthetic examples sampled from the same distribution as the challenge sets. However, these methods assume that the distribution of a challenge set is known a priori, making them less applicable to unseen challenge sets. In this study, we focus on question-answer pair generation (QAG) to mitigate this problem. While most existing QAG methods aim to improve the quality of synthetic examples, we conjecture that diversity-promoting QAG can mitigate the sparsity of training sets and lead to better robustness. We present a variational QAG model that generates multiple diverse QA pairs from a paragraph. Our experiments show that our method can improve the accuracy of 12 challenge sets, as well as the in-distribution accuracy. 2021.acl-srw.21 @@ -10162,7 +10162,7 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO How Many Layers and Why? <fixed-case>A</fixed-case>n Analysis of the Model Depth in Transformers AntoineSimoulin - BenoitCrabbé + BenoitCrabbé 221–228 In this study, we investigate the role of the multiple layers in deep transformer models. We design a variant of Albert that dynamically adapts the number of layers for each token of the input. The key specificity of Albert is that weights are tied across layers. Therefore, the stack of encoder layers iteratively repeats the application of the same transformation function on the input. We interpret the repetition of this application as an iterative process where the token contextualized representations are progressively refined. We analyze this process at the token level during pre-training, fine-tuning, and inference. We show that tokens do not require the same amount of iterations and that difficult or crucial tokens for the task are subject to more iterations. 2021.acl-srw.23 @@ -10236,7 +10236,7 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO DuanchenLiu QingyunYang ZoeyLiu - EmilyPrud’hommeaux + EmilyPrud’hommeaux 284–291 Individuals with autism spectrum disorder (ASD) experience difficulties in social aspects of communication, but the linguistic characteristics associated with deficits in discourse and pragmatic expression are often difficult to precisely identify and quantify. We are currently collecting a corpus of transcribed natural conversations produced in an experimental setting in which participants with and without ASD complete a number of collaborative tasks with their neurotypical peers. Using this dyadic conversational data, we investigate three pragmatic features – politeness, uncertainty, and informativeness – and present a dataset of utterances annotated for each of these features on a three-point scale. We then introduce ongoing work in developing and training neural models to automatically predict these features, with the goal of identifying the same between-groups differences that are observed using manual annotations. We find the best performing model for all three features is a feed-forward neural network trained with BERT embeddings. Our models yield higher accuracy than ones used in previous approaches for deriving these features, with F1 exceeding 0.82 for all three pragmatic features. 2021.acl-srw.29 @@ -10311,7 +10311,7 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO RaúlVázquez HandeCelikkanat MathiasCreutz - JörgTiedemann + JörgTiedemann 337–347 Various studies show that pretrained language models such as BERT cannot straightforwardly replace encoders in neural machine translation despite their enormous success in other tasks. This is even more astonishing considering the similarities between the architectures. This paper sheds some light on the embedding spaces they create, using average cosine similarity, contextuality metrics and measures for representational similarity for comparison, revealing that BERT and NMT encoder representations look significantly different from one another. In order to address this issue, we propose a supervised transformation from one into the other using explicit alignment and fine-tuning. Our results demonstrate the need for such a transformation to improve the applicability of BERT in MT. 2021.acl-srw.35 @@ -10335,7 +10335,7 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO Proceedings of the 59th Annual Meeting of the Association for Computational Linguistics and the 11th International Joint Conference on Natural Language Processing: System Demonstrations HengJi - Jong C.Park + Jong C.Park RuiXia Association for Computational Linguistics
Online
@@ -10412,7 +10412,7 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO PuzhaoXie ZhipengChen ZhuohaoYu - Wayne XinZhao + Wayne XinZhao Ji-RongWen 30–39 In this paper, we release an open-source library, called TextBox, to provide a unified, modularized, and extensible text generation framework. TextBox aims to support a broad set of text generation tasks and models. In our library, we implement 21 text generation models on 9 benchmark datasets, covering the categories of VAE, GAN, and pretrained language models. Meanwhile, our library maintains sufficient modularity and extensibility by properly decomposing the model architecture, inference, and learning process into highly reusable modules, which allows users to easily incorporate new models into our framework. The above features make TextBox especially suitable for researchers and practitioners to quickly reproduce baseline models and develop new models. TextBox is implemented based on PyTorch, and released under Apache License 2.0 at the link https://github.com/RUCAIBox/TextBox. @@ -10466,7 +10466,7 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO MasoudJalili Sabet PhilippDufter MichaelCysou - HinrichSchütze + HinrichSchütze 63–72 With more than 7000 languages worldwide, multilingual natural language processing (NLP) is essential both from an academic and commercial perspective. Researching typological properties of languages is fundamental for progress in multilingual NLP. Examples include assessing language similarity for effective transfer learning, injecting inductive biases into machine learning models or creating resources such as dictionaries and inflection tables. We provide ParCourE, an online tool that allows to browse a word-aligned parallel corpus, covering 1334 languages. We give evidence that this is useful for typological research. ParCourE can be set up for any parallel corpus and can thus be used for typological research on other corpora as well as for exploring their quality and properties. 2021.acl-demo.8 @@ -10479,8 +10479,8 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO RicardoRei Ana CFarinha CraigStewart - LuisaCoheur - AlonLavie + LuisaCoheur + AlonLavie 73–80 We present MT-Telescope, a visualization platform designed to facilitate comparative analysis of the output quality of two Machine Translation (MT) systems. While automated MT evaluation metrics are commonly used to evaluate MT systems at a corpus-level, our platform supports fine-grained segment-level analysis and interactive visualisations that expose the fundamental differences in the performance of the compared systems. MT-Telescope also supports dynamic corpus filtering to enable focused analysis on specific phenomena such as; translation of named entities, handling of terminology, and the impact of input segment length on translation quality. Furthermore, the platform provides a bootstrapped t-test for statistical significance as a means of evaluating the rigor of the resulting system ranking. MT-Telescope is open source, written in Python, and is built around a user friendly and dynamic web interface. Complementing other existing tools, our platform is designed to facilitate and promote the broader adoption of more rigorous analysis practices in the evaluation of MT quality. 2021.acl-demo.9 @@ -10523,7 +10523,7 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO ZhichaoGeng HangYan XipengQiu - XuanjingHuang + XuanjingHuang 99–106 We present fastHan, an open-source toolkit for four basic tasks in Chinese natural language processing: Chinese word segmentation (CWS), Part-of-Speech (POS) tagging, named entity recognition (NER), and dependency parsing. The backbone of fastHan is a multi-task model based on a pruned BERT, which uses the first 8 layers in BERT. We also provide a 4-layer base model compressed from the 8-layer model. The joint-model is trained and evaluated on 13 corpora of four tasks, yielding near state-of-the-art (SOTA) performance in dependency parsing and NER, achieving SOTA performance in CWS and POS. Besides, fastHan’s transferability is also strong, performing much better than popular segmentation tools on a non-training corpus. To better meet the need of practical application, we allow users to use their own labeled data to further fine-tune fastHan. In addition to its small size and excellent performance, fastHan is user-friendly. Implemented as a python package, fastHan isolates users from the internal technical details and is convenient to use. The project is released on Github. 2021.acl-demo.12 @@ -10613,8 +10613,8 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO A Graphical Interface for Curating Schemas PiyushMishra AkankshaMalhotra - Susan WindischBrown - MarthaPalmer + Susan WindischBrown + MarthaPalmer GhazalehKazeminejad 159–166 Much past work has focused on extracting information like events, entities, and relations from documents. Very little work has focused on analyzing these results for better model understanding. In this paper, we introduce a curation interface that takes an Information Extraction (IE) system’s output in a pre-defined format and generates a graphical representation of its elements. The interface supports editing while curating schemas for complex events like Improvised Explosive Device (IED) based scenarios. We identify various schemas that either have linear event chains or contain parallel events with complicated temporal ordering. We iteratively update an induced schema to uniquely identify events specific to it, add optional events around them, and prune unnecessary events. The resulting schemas are improved and enriched versions of the machine-induced versions. @@ -10669,7 +10669,7 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO YuanhangZhou ChenzhanShang YuanCheng - Wayne XinZhao + Wayne XinZhao YaliangLi Ji-RongWen 185–193 @@ -10685,7 +10685,7 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO JuliaRozanova MokanaranganThayaparan MarcoValentino - AndréFreitas + AndréFreitas 194–201 Probing (or diagnostic classification) has become a popular strategy for investigating whether a given set of intermediate features is present in the representations of neural models. Naive probing studies may have misleading results, but various recent works have suggested more reliable methodologies that compensate for the possible pitfalls of probing. However, these best practices are numerous and fast-evolving. To simplify the process of running a set of probing experiments in line with suggested methodologies, we introduce Probe-Ably: an extendable probing framework which supports and automates the application of probing methods to the user’s inputs. 2021.acl-demo.23 @@ -10696,8 +10696,8 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO <fixed-case>CLTR</fixed-case>: An End-to-End, Transformer-Based System for Cell-Level Table Retrieval and Table Question Answering FeifeiPan MustafaCanim - MichaelGlass - AlfioGliozzo + MichaelGlass + AlfioGliozzo PeterFox 202–209 We present the first end-to-end, transformer-based table question answering (QA) system that takes natural language questions and massive table corpora as inputs to retrieve the most relevant tables and locate the correct table cells to answer the question. Our system, CLTR, extends the current state-of-the-art QA over tables model to build an end-to-end table QA architecture. This system has successfully tackled many real-world table QA problems with a simple, unified pipeline. Our proposed system can also generate a heatmap of candidate columns and rows over complex tables and allow users to quickly identify the correct cells to answer questions. In addition, we introduce two new open domain benchmarks, E2E_WTQ and E2E_GNQ, consisting of 2,005 natural language questions over 76,242 tables. The benchmarks are designed to validate CLTR as well as accommodate future table retrieval and end-to-end table QA research and experiments. Our experiments demonstrate that our system is the current state-of-the-art model on the table retrieval task and produces promising results for end-to-end table QA. @@ -10816,7 +10816,7 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO <fixed-case>T</fixed-case>wee<fixed-case>NLP</fixed-case>: A <fixed-case>T</fixed-case>witter Exploration Portal for Natural Language Processing VirajShah ShrutiSingh - MayankSingh + MayankSingh 265–271 We present TweeNLP, a one-stop portal that organizes Twitter’s natural language processing (NLP) data and builds a visualization and exploration platform. It curates 19,395 tweets (as of April 2021) from various NLP conferences and general NLP discussions. It supports multiple features such as TweetExplorer to explore tweets by topics, visualize insights from Twitter activity throughout the organization cycle of conferences, discover popular research papers and researchers. It also builds a timeline of conference and workshop submission deadlines. We envision TweeNLP to function as a collective memory unit for the NLP community by integrating the tweets pertaining to research papers with the NLPExplorer scientific literature search engine. The current system is hosted at http://nlpexplorer.org/twitter/CFP. 2021.acl-demo.32 @@ -10916,7 +10916,7 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO ShuangChen QianLiu ZhiweiYu - Chin-YewLin + Chin-YewLin Jian-GuangLou FengJiang 325–336 @@ -10951,7 +10951,7 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO ZexiongPang QinzhuoWu ZhengyanLi - ChongZhang + ChongZhang RuotianMa ZichuFei RuijianCai @@ -10972,7 +10972,7 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO YaqianZhou ZhongyuWei XipengQiu - XuanjingHuang + XuanjingHuang 347–355 TextFlint is a multilingual robustness evaluation toolkit for NLP tasks that incorporates universal text transformation, task-specific transformation, adversarial attack, subpopulation, and their combinations to provide comprehensive robustness analyses. This enables practitioners to automatically evaluate their models from various aspects or to customize their evaluations as desired with just a few lines of code. TextFlint also generates complete analytical reports as well as targeted augmented data to address the shortcomings of the model in terms of its robustness. To guarantee acceptability, all the text transformations are linguistically based and all the transformed data selected (up to 100,000 texts) scored highly under human evaluation. To validate the utility, we performed large-scale empirical evaluations (over 67,000) on state-of-the-art deep learning models, classic supervised methods, and real-world systems. The toolkit is already available at https://github.com/textflint with all the evaluation results demonstrated at textflint.io. 2021.acl-demo.41 @@ -10985,7 +10985,7 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO Chi-yangHsu Yun-WeiChu Tsai-LunYang - Ting-HaoHuang + Ting-HaoHuang Lun-WeiKu 356–362 In visual storytelling, a short story is generated based on a given image sequence. Despite years of work, most visual storytelling models remain limited in terms of the generated stories’ fixed length: most models produce stories with exactly five sentences because five-sentence stories dominate the training data. The fix-length stories carry limited details and provide ambiguous textual information to the readers. Therefore, we propose to “stretch” the stories, which create the potential to present in-depth visual details. This paper presents Stretch-VST, a visual storytelling framework that enables the generation of prolonged stories by adding appropriate knowledge, which is selected by the proposed scoring function. We propose a length-controlled Transformer to generate long stories. This model introduces novel positional encoding methods to maintain story quality with lengthy inputs. Experiments confirm that long stories are generated without deteriorating the quality. The human evaluation further shows that Stretch-VST can provide better focus and detail when stories are prolonged compared to state of the art. We create a webpage to demonstrate our prolonged capability. @@ -11032,7 +11032,7 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO Advances in Debating Technologies: Building <fixed-case>AI</fixed-case> That Can Debate Humans RoyBar-Haim - LiatEin-Dor + LiatEin-Dor MatanOrbach EladVenezian NoamSlonim @@ -11058,7 +11058,7 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO QiangNing ManlingLi HengJi - KathleenMcKeown + KathleenMcKeown DanRoth 6–14 This tutorial targets researchers and practitioners who are interested in AI technologies that help machines understand natural language text, particularly real-world events described in the text. These include methods to extract the internal structures of an event regarding its protagonist(s), participant(s) and properties, as well as external structures concerning memberships, temporal and causal relations of multiple events. This tutorial will provide audience with a systematic introduction of (i) knowledge representations of events, (ii) various methods for automated extraction, conceptualization and prediction of events and their relations, (iii) induction of event processes and properties, and (iv) a wide range of NLU and commonsense understanding tasks that benefit from aforementioned techniques. We will conclude the tutorial by outlining emerging research problems in this area. @@ -11093,8 +11093,8 @@ The source code has been made available at \url{https://github.com/liam0949/DCLO Prosody: Models, Methods, and Applications - NigelWard - Gina-AnneLevow + NigelWard + Gina-AnneLevow 26–28 Prosody is essential in human interaction, enabling people to show interest, establish rapport, efficiently convey nuances of attitude or intent, and so on. Some applications that exploit prosodic knowledge have recently shown superhuman performance, and in many respects our ability to effectively model prosody is rapidly advancing. This tutorial will overview the computational modeling of prosody, including recent advances and diverse actual and potential applications. 2021.acl-tutorials.5 diff --git a/data/xml/2021.adaptnlp.xml b/data/xml/2021.adaptnlp.xml index 3e10af2f14..926947634f 100644 --- a/data/xml/2021.adaptnlp.xml +++ b/data/xml/2021.adaptnlp.xml @@ -4,9 +4,9 @@ Proceedings of the Second Workshop on Domain Adaptation for NLP EyalBen-David - ShayCohen + ShayCohen RyanMcDonald - BarbaraPlank + BarbaraPlank RoiReichart GuyRotman YftahZiser @@ -23,7 +23,7 @@ Multidomain Pretrained Language Models for Green <fixed-case>NLP</fixed-case> AntonisMaronikolakis - HinrichSchütze + HinrichSchütze 1–8 When tackling a task in a given domain, it has been shown that adapting a model to the domain using raw text data before training on the supervised task improves performance versus solely training on the task. The downside is that a lot of domain data is required and if we want to tackle tasks in n domains, we require n models each adapted on domain data before task learning. Storing and using these models separately can be prohibitive for low-end devices. In this paper we show that domain adaptation can be generalised to cover multiple domains. Specifically, a single model can be trained across various domains at the same time with minimal drop in performance, even when we use less data and resources. Thus, instead of training multiple models, we can train a single multidomain model saving on computational resources and training time. 2021.adaptnlp-1.1 @@ -43,7 +43,7 @@ Conditional Adversarial Networks for Multi-Domain Text Classification YuanWu - DianaInkpen + DianaInkpen AhmedEl-Roby 16–27 In this paper, we propose conditional adversarial networks (CANs), a framework that explores the relationship between the shared features and the label predictions to impose stronger discriminability to the learned features, for multi-domain text classification (MDTC). The proposed CAN introduces a conditional domain discriminator to model the domain variance in both the shared feature representations and the class-aware information simultaneously, and adopts entropy conditioning to guarantee the transferability of the shared features. We provide theoretical analysis for the CAN framework, showing that CAN’s objective is equivalent to minimizing the total divergence among multiple joint distributions of shared features and label predictions. Therefore, CAN is a theoretically sound adversarial network that discriminates over multiple distributions. Evaluation results on two MDTC benchmarks show that CAN outperforms prior methods. Further experiments demonstrate that CAN has a good ability to generalize learned knowledge to unseen domains. @@ -108,7 +108,7 @@ Addressing Zero-Resource Domains Using Document-Level Context in Neural Machine Translation DarioStojanovski - AlexanderFraser + AlexanderFraser 80–93 Achieving satisfying performance in machine translation on domains for which there is no training data is challenging. Traditional supervised domain adaptation is not suitable for addressing such zero-resource domains because it relies on in-domain parallel data. We show that when in-domain parallel data is not available, access to document-level context enables better capturing of domain generalities compared to only having access to a single sentence. Having access to more information provides a more reliable domain estimation. We present two document-level Transformer models which are capable of using large context sizes and we compare these models against strong Transformer baselines. We obtain improvements for the two zero-resource domains we study. We additionally provide an analysis where we vary the amount of context and look at the case where in-domain data is available. 2021.adaptnlp-1.9 @@ -128,7 +128,7 @@ Domain adaptation in practice: Lessons from a real-world information extraction pipeline - TimothyMiller + TimothyMiller EgoitzLaparra StevenBethard 105–110 @@ -180,7 +180,7 @@ Dependency Parsing Evaluation for Low-resource Spontaneous Speech ZoeyLiu - EmilyPrud’hommeaux + EmilyPrud’hommeaux 156–165 How well can a state-of-the-art parsing system, developed for the written domain, perform when applied to spontaneous speech data involving different interlocutors? This study addresses this question in a low-resource setting using child-parent conversations from the CHILDES databse. Specifically, we focus on dependency parsing evaluation for utterances of one specific child (18 - 27 months) and her parents. We first present a semi-automatic adaption of the dependency annotation scheme in CHILDES to that of the Universal Dependencies project, an annotation style that is more commonly applied in dependency parsing. Our evaluation demonstrates that an outof-domain biaffine parser trained only on written texts performs well with parent speech. There is, however, much room for improvement on child utterances, particularly at 18 and 21 months, due to cases of omission and repetition that are prevalent in child speech. By contrast, parsers trained or fine-tuned with in-domain spoken data on a much smaller scale can achieve comparable results for parent speech and improve the weak parsing performance for child speech at these earlier ages 2021.adaptnlp-1.16 @@ -198,7 +198,7 @@ User Factor Adaptation for User Embedding via Multitask Learning XiaoleiHuang - Michael J.Paul + Michael J.Paul FranckDernoncourt RobinBurke MarkDredze @@ -257,7 +257,7 @@ AbdulWaheed DevamanyuHazarika Min-YenKan - Rajiv RatnShah + Rajiv RatnShah 222–244 The robustness of pretrained language models(PLMs) is generally measured using performance drops on two or more domains. However, we do not yet understand the inherent robustness achieved by contributions from different layers of a PLM. We systematically analyze the robustness of these representations layer by layer from two perspectives. First, we measure the robustness of representations by using domain divergence between two domains. We find that i) Domain variance increases from the lower to the upper layers for vanilla PLMs; ii) Models continuously pretrained on domain-specific data (DAPT)(Gururangan et al., 2020) exhibit more variance than their pretrained PLM counterparts; and that iii) Distilled models (e.g., DistilBERT) also show greater domain variance. Second, we investigate the robustness of representations by analyzing the encoded syntactic and semantic information using diagnostic probes. We find that similar layers have similar amounts of linguistic information for data from an unseen domain. 2021.adaptnlp-1.23 @@ -266,10 +266,10 @@ Few-Shot Learning of an Interleaved Text Summarization Model by Pretraining with Synthetic Data Sanjeev KumarKarn - FrancineChen + FrancineChen Yan-YingChen UlliWaltinger - HinrichSchütze + HinrichSchütze 245–254 Interleaved texts, where posts belonging to different threads occur in a sequence, commonly occur in online chat posts, so that it can be time-consuming to quickly obtain an overview of the discussions. Existing systems first disentangle the posts by threads and then extract summaries from those threads. A major issue with such systems is error propagation from the disentanglement component. While end-to-end trainable summarization system could obviate explicit disentanglement, such systems require a large amount of labeled data. To address this, we propose to pretrain an end-to-end trainable hierarchical encoder-decoder system using synthetic interleaved texts. We show that by fine-tuning on a real-world meeting dataset (AMI), such a system out-performs a traditional two-step system by 22%. We also compare against transformer models and observed that pretraining with synthetic data both the encoder and decoder outperforms the BertSumExtAbs transformer model which pretrains only the encoder on a large dataset. 2021.adaptnlp-1.24 diff --git a/data/xml/2021.alta.xml b/data/xml/2021.alta.xml index a541f1b9b2..17fb554fcc 100644 --- a/data/xml/2021.alta.xml +++ b/data/xml/2021.alta.xml @@ -42,7 +42,7 @@ EhsanAbbasnejad IngridZukerman WrayBuntine - GholamrezaHaffari + GholamrezaHaffari 22–33 Visual question answering (VQA) models, in particular modular ones, are commonly trained on large-scale datasets to achieve state-of-the-art performance. However, such datasets are sometimes not available. Further, it has been shown that training these models on small datasets significantly reduces their accuracy. In this paper, we propose curriculum-based learning (CL) regime to increase the accuracy of VQA models trained on small datasets. Specifically, we offer three criteria to rank the samples in these datasets and propose a training strategy for each criterion. Our results show that, for small datasets, our CL approach yields more accurate results than those obtained when training with no curriculum. 2021.alta-1.3 @@ -73,7 +73,7 @@ RhysBiddle MaciekRybinski QianLi - CecileParis + CecileParis GuandongXu 58–67 The detection of hyperbole is an important stepping stone to understanding the intentions of a hyperbolic utterance. We propose a model that combines pre-trained language models with privileged information for the task of hyperbole detection. We also introduce a suite of behavioural tests to probe the capabilities of hyperbole detection models across a range of hyperbole types. Our experiments show that our model improves upon baseline models on an existing hyperbole detection dataset. Probing experiments combined with analysis using local linear approximations (LIME) show that our model excels at detecting one particular type of hyperbole. Further, we discover that our experiments highlight annotation artifacts introduced through the process of literal paraphrasing of hyperbole. These annotation artifacts are likely to be a roadblock to further improvements in hyperbole detection. @@ -94,7 +94,7 @@ Phone Based Keyword Spotting for Transcribing Very Low Resource Languages Eric LeFerrand StevenBird - LaurentBesacier + LaurentBesacier 79–86 We investigate the efficiency of two very different spoken term detection approaches for transcription when the available data is insufficient to train a robust speech recognition system. This work is grounded in a very low-resource language documentation scenario where only a few minutes of recording have been transcribed for a given language so far. Experiments on two oral languages show that a pretrained universal phone recognizer, fine-tuned with only a few minutes of target language speech, can be used for spoken term detection through searches in phone confusion networks with a lexicon expressed as a finite state automaton. Experimental results show that a phone recognition based approach provides better overall performances than Dynamic Time Warping when working with clean data, and highlight the benefits of each methods for two types of speech corpus. 2021.alta-1.8 @@ -103,7 +103,7 @@ Evaluation of Review Summaries via Question-Answering NannanHuang - XiuzhenZhang + XiuzhenZhang 87–96 Summarisation of reviews aims at compressing opinions expressed in multiple review documents into a concise form while still covering the key opinions. Despite the advancement in summarisation models, evaluation metrics for opinionated text summaries lag behind and still rely on lexical-matching metrics such as ROUGE. In this paper, we propose to use the question-answering(QA) approach to evaluate summaries of opinions in reviews. We propose to identify opinion-bearing text spans in the reference summary to generate QA pairs so as to capture salient opinions. A QA model is then employed to probe the candidate summary to evaluate information overlap between candidate and reference summaries. We show that our metric RunQA, Review Summary Evaluation via Question Answering, correlates well with human judgments in terms of coverage and focus of information. Finally, we design an adversarial task and demonstrate that the proposed approach is more robust than metrics in the literature for ranking summaries. 2021.alta-1.9 @@ -113,7 +113,7 @@ Exploring Story Generation with Multi-task Objectives in Variational Autoencoders ZhuohanXie Jey HanLau - TrevorCohn + TrevorCohn 97–106 GPT-2 has been frequently adapted in story generation models as it provides powerful generative capability. However, it still fails to generate consistent stories and lacks diversity. Current story generation models leverage additional information such as plots or commonsense into GPT-2 to guide the generation process. These approaches focus on improving generation quality of stories while our work look at both quality and diversity. We explore combining BERT and GPT-2 to build a variational autoencoder (VAE), and extend it by adding additional objectives to learn global features such as story topic and discourse relations. Our evaluations show our enhanced VAE can provide better quality and diversity trade off, generate less repetitive story content and learn a more informative latent variable. 2021.alta-1.10 @@ -133,7 +133,7 @@ Robustness Analysis of Grover for Machine-Generated News Detection RinaldoGagiano Maria Myung-HeeKim - XiuzhenZhang + XiuzhenZhang JenniferBiggs 119–127 Advancements in Natural Language Generation have raised concerns on its potential misuse for deep fake news. Grover is a model for both generation and detection of neural fake news. While its performance on automatically discriminating neural fake news surpassed GPT-2 and BERT, Grover could face a variety of adversarial attacks to deceive detection. In this work, we present an investigation of Grover’s susceptibility to adversarial attacks such as character-level and word-level perturbations. The experiment results show that even a singular character alteration can cause Grover to fail, affecting up to 97% of target articles with unlimited attack attempts, exposing a lack of robustness. We further analyse these misclassified cases to highlight affected words, identify vulnerability within Grover’s encoder, and perform a novel visualisation of cumulative classification scores to assist in interpreting model behaviour. @@ -143,8 +143,8 @@ Document Level Hierarchical Transformer NajamZaidi - TrevorCohn - GholamrezaHaffari + TrevorCohn + GholamrezaHaffari 128–137 Generating long and coherent text is an important and challenging task encompassing many application areas such as summarization, document level machine translation and story generation. Despite the success in modeling intra-sentence coherence, existing long text generation models (e.g., BART and GPT-3) still struggle to maintain a coherent event sequence throughout the generated text. We conjecture that this is because of the difficulty for the model to revise, replace, revoke or delete any part that has been generated by the model. In this paper, we present a novel semi-autoregressive document generation model capable of revising and editing the generated text. Building on recent models by (Gu et al., 2019; Xu and Carpuat, 2020) we propose document generation as a hierarchical Markov decision process with a two level hierarchy, where the high and low level editing programs. We train our model using imitation learning (Hussein et al., 2017) and introduce roll-in policy such that each policy learns on the output of applying the previous action. Experiments applying the proposed approach sheds various insights on the problems of long text generation using our model. We suggest various remedies such as using distilled dataset, designing better attention mechanisms and using autoregressive models as a low level program. 2021.alta-1.13 @@ -174,7 +174,7 @@ Does <fixed-case>QA</fixed-case>-based intermediate training help fine-tuning language models for text classification? ShiweiZhang - XiuzhenZhang + XiuzhenZhang 158–162 Fine-tuning pre-trained language models for downstream tasks has become a norm for NLP. Recently it is found that intermediate training can improve performance for fine-tuning language models for target tasks, high-level inference tasks such as Question Answering (QA) tend to work best as intermediate tasks. However it is not clear if intermediate training generally benefits various language models. In this paper, using the SQuAD-2.0 QA task for intermediate training for target text classification tasks, we experimented on eight tasks for single-sequence classification and eight tasks for sequence-pair classification using two base and two compact language models. Our experiments show that QA-based intermediate training generates varying transfer performance across different language models, except for similar QA tasks. 2021.alta-1.16 @@ -201,9 +201,9 @@ Using Discourse Structure to Differentiate Focus Entities from Background Entities in Scientific Literature - AntonioJimeno Yepes + AntonioJimeno Yepes AmeerAlbahem - KarinVerspoor + KarinVerspoor 174–178 In developing systems to identify focus entities in scientific literature, we face the problem of discriminating key entities of interest from other potentially relevant entities of the same type mentioned in the articles. We introduce the task of pathogen characterisation. We aim to discriminate mentions of biological pathogens, that are actively studied in the research presented in scientific publications. These are the pathogens that are the focus of direct experimentation in the research, rather than those that are referred to for context or as playing secondary roles. In this paper, we explore the hypothesis that these focus entities can be differentiated from other, non-actively studied, pathogens mentioned in articles through analysis of the patterns of mentions across different sections of a scientific paper, that is, using the discourse structure of the paper. We provide an indicative case study with the help of a small data set of PubMed abstracts that have been annotated with actively mentioned pathogens. 2021.alta-1.19 @@ -215,9 +215,9 @@ AiliShen HiyoriYoshikawa ChunpengMa - DanielBeck + DanielBeck TomoyaIwakura - TimothyBaldwin + TimothyBaldwin 179–184 Hierarchical document categorisation is a special case of multi-label document categorisation, where there is a taxonomic hierarchy among the labels. While various approaches have been proposed for hierarchical document categorisation, there is no standard benchmark dataset, resulting in different methods being evaluated independently and there being no empirical consensus on what methods perform best. In this work, we examine different combinations of neural text encoders and hierarchical methods in an end-to-end framework, and evaluate over three datasets. We find that the performance of hierarchical document categorisation is determined not only by how the hierarchical information is modelled, but also the structure of the label hierarchy and class distribution. 2021.alta-1.20 @@ -251,7 +251,7 @@ Overview of the 2021 <fixed-case>ALTA</fixed-case> Shared Task: Automatic Grading of Evidence, 10 years later - DiegoMollá + DiegoMollá 201–204 The 2021 ALTA shared task is the 12th instance of a series of shared tasks organised by ALTA since 2010. Motivated by the advances in machine learning in the last 10 years, this year�s task is a re-visit of the 2011 ALTA shared task. Set within the framework of Evidence Based Medicine (EBM), the goal is to predict the qual-ity of the clinical evidence present in a set of documents. This year�s participant results didnot improve over those of participants from 2011. 2021.alta-1.23 diff --git a/data/xml/2021.alvr.xml b/data/xml/2021.alvr.xml index cc50d317f1..a418218be0 100644 --- a/data/xml/2021.alvr.xml +++ b/data/xml/2021.alvr.xml @@ -26,8 +26,8 @@ YuHong YuchenPan JianTang - JianminYao - GuodongZhou + JianminYao + GuodongZhou 1–10 Caption translation aims to translate image annotations (captions for short). Recently, Multimodal Neural Machine Translation (MNMT) has been explored as the essential solution. Besides of linguistic features in captions, MNMT allows visual(image) features to be used. The integration of multimodal features reinforces the semantic representation and considerably improves translation performance. However, MNMT suffers from the incongruence between visual and linguistic features. To overcome the problem, we propose to extend MNMT architecture with a harmonization network, which harmonizes multimodal features(linguistic and visual features)by unidirectional modal space conversion. It enables multimodal translation to be carried out in a seemingly monomodal translation pipeline. We experiment on the golden Multi30k-16 and 17. Experimental results show that, compared to the baseline,the proposed method yields the improvements of 2.2% BLEU for the scenario of translating English captions into German (En→De) at best,7.6% for the case of English-to-French translation(En→Fr) and 1.5% for English-to-Czech(En→Cz). The utilization of harmonization network leads to the competitive performance to the-state-of-the-art. 2021.alvr-1.1 diff --git a/data/xml/2021.americasnlp.xml b/data/xml/2021.americasnlp.xml index b2a41f7e9b..fe071b4bca 100644 --- a/data/xml/2021.americasnlp.xml +++ b/data/xml/2021.americasnlp.xml @@ -4,12 +4,12 @@ Proceedings of the First Workshop on Natural Language Processing for Indigenous Languages of the Americas ManuelMager - ArturoOncevay - AnnetteRios + ArturoOncevay + AnnetteRios Ivan Vladimir MezaRuiz AlexisPalmer GrahamNeubig - KatharinaKann + KatharinaKann Association for Computational Linguistics
Online
June @@ -32,7 +32,7 @@
A corpus of K’iche’ annotated for morphosyntactic structure - FrancisTyers + FrancisTyers RobertHenderson 10–20 This article describes a collection of sentences in K’iche’ annotated for morphology and syntax. K’iche’ is a language in the Mayan language family, spoken in Guatemala. The annotation is done according to the guidelines of the Universal Dependencies project. The corpus consists of a total of 1,433 sentences containing approximately 10,000 tokens and is released under a free/open-source licence. We present a comparison of parsing systems for K’iche’ using this corpus and describe how it can be used for mining linguistic examples. @@ -43,7 +43,7 @@ Investigating variation in written forms of <fixed-case>N</fixed-case>ahuatl using character-based language models RobertPugh - FrancisTyers + FrancisTyers 21–27 We describe experiments with character-based language modeling for written variants of Nahuatl. Using a standard LSTM model and publicly available Bible translations, we explore how character language models can be applied to the tasks of estimating mutual intelligibility, identifying genetic similarity, and distinguishing written variants. We demonstrate that these simple language models are able to capture similarities and differences that have been described in the linguistic literature. 2021.americasnlp-1.3 @@ -76,7 +76,7 @@ A survey of part-of-speech tagging approaches applied to K’iche’ - FrancisTyers + FrancisTyers NickHowell 44–52 We study the performance of several popular neural part-of-speech taggers from the Universal Dependencies ecosystem on Mayan languages using a small corpus of 1435 annotated K’iche’ sentences consisting of approximately 10,000 tokens, with encouraging results: F_1 scores 93%+ on lemmatisation, part-of-speech and morphological feature assignment. The high performance motivates a cross-language part-of-speech tagging study, where K’iche’-trained models are evaluated on two other Mayan languages, Kaqchikel and Uspanteko: performance on Kaqchikel is good, 63-85%, and on Uspanteko modest, 60-71%. Supporting experiments lead us to conclude the relative diversity of morphological features as a plausible explanation for the limiting factors in cross-language tagging performance, providing some direction for future sentence annotation and collection work to support these and other Mayan languages. @@ -112,7 +112,7 @@ A finite-state morphological analyser for <fixed-case>P</fixed-case>araguayan <fixed-case>G</fixed-case>uaraní AnastasiaKuznetsova - FrancisTyers + FrancisTyers 81–89 This article describes the development of morphological analyser for Paraguayan Guaraní, agglutinative indigenous language spoken by nearly 6 million people in South America. The implementation of our analyser uses HFST (Helsiki Finite State Technology) and two-level transducer that covers morphotactics and phonological processes occurring in Guaraní. We assess the efficacy of the approach on publicly available Wikipedia and Bible corpora and the naive coverage of analyser reaches 86% on Wikipedia and 91% on Bible corpora. 2021.americasnlp-1.9 @@ -123,7 +123,7 @@ Morphological Segmentation for <fixed-case>S</fixed-case>eneca ZoeyLiu RobertJimerson - EmilyPrud’hommeaux + EmilyPrud’hommeaux 90–101 This study takes up the task of low-resource morphological segmentation for Seneca, a critically endangered and morphologically complex Native American language primarily spoken in what is now New York State and Ontario. The labeled data in our experiments comes from two sources: one digitized from a publicly available grammar book and the other collected from informal sources. We treat these two sources as distinct domains and investigate different evaluation designs for model selection. The first design abides by standard practices and evaluate models with the in-domain development set, while the second one carries out evaluation using a development domain, or the out-of-domain development set. Across a series of monolingual and crosslinguistic training settings, our results demonstrate the utility of neural encoder-decoder architecture when coupled with multi-task learning. 2021.americasnlp-1.10 @@ -133,7 +133,7 @@ Representation of <fixed-case>Y</fixed-case>ine [<fixed-case>A</fixed-case>rawak] Morphology by Finite State Transducer Formalism AdrianoIngunza Torres - JohnMiller + JohnMiller ArturoOncevay RobertoZariquiey Biondi 102–112 @@ -166,7 +166,7 @@ Expanding <fixed-case>U</fixed-case>niversal <fixed-case>D</fixed-case>ependencies for Polysynthetic Languages: A Case of <fixed-case>S</fixed-case>t. <fixed-case>L</fixed-case>awrence <fixed-case>I</fixed-case>sland <fixed-case>Y</fixed-case>upik Hyunji HayleyPark LaneSchwartz - FrancisTyers + FrancisTyers 131–142 This paper describes the development of the first Universal Dependencies (UD) treebank for St. Lawrence Island Yupik, an endangered language spoken in the Bering Strait region. While the UD guidelines provided a general framework for our annotations, language-specific decisions were made necessary by the rich morphology of the polysynthetic language. Most notably, we annotated a corpus at the morpheme level as well as the word level. The morpheme level annotation was conducted using an existing morphological analyzer and manual disambiguation. By comparing the two resulting annotation schemes, we argue that morpheme-level annotation is essential for polysynthetic languages like St. Lawrence Island Yupik. Word-level annotation results in degenerate trees for some Yupik sentences and often fails to capture syntactic relations that can be manifested at the morpheme level. Dependency parsing experiments provide further support for morpheme-level annotation. Implications for UD annotation of other polysynthetic languages are discussed. 2021.americasnlp-1.14 @@ -219,7 +219,7 @@ Ayuuk-<fixed-case>S</fixed-case>panish Neural Machine Translator DelfinoZacarías Márquez - Ivan VladimirMeza Ruiz + Ivan VladimirMeza Ruiz 168–172 This paper presents the first neural machine translator system for the Ayuuk language. In our experiments we translate from Ayuuk to Spanish, and fromSpanish to Ayuuk. Ayuuk is a language spoken in the Oaxaca state of Mexico by the Ayuukjä’äy people (in Spanish commonly known as Mixes. We use different sources to create a low-resource parallel corpus, more than 6,000 phrases. For some of these resources we rely on automatic alignment. The proposed system is based on the Transformer neural architecture and it uses sub-word level tokenization as the input. We show the current performance given the resources we have collected for the San Juan Güichicovi variant, they are promising, up to 5 BLEU. We based our development on the Masakhane project for African languages. 2021.americasnlp-1.19 @@ -237,7 +237,7 @@ Towards a morphological transducer and orthography converter for <fixed-case>W</fixed-case>estern <fixed-case>T</fixed-case>lacolula <fixed-case>V</fixed-case>alley <fixed-case>Z</fixed-case>apotec - JonathanWashington + JonathanWashington FelipeLopez BrookLillehaugen 185–193 @@ -267,7 +267,7 @@ LuisChiruzzo GustavoGiménez-Lugo RicardoRamos - Ivan VladimirMeza Ruiz + Ivan VladimirMeza Ruiz RolandoCoto-Solano AlexisPalmer ElisabethMager-Hois @@ -341,7 +341,7 @@ HéctorMurrieta Bello DanielHershcovich Miryamde Lhoneux - AndersSøgaard + AndersSøgaard 248–254 We evaluated a range of neural machine translation techniques developed specifically for low-resource scenarios. Unsuccessfully. In the end, we submitted two runs: (i) a standard phrase-based model, and (ii) a random babbling baseline using character trigrams. We found that it was surprisingly hard to beat (i), in spite of this model being, in theory, a bad fit for polysynthetic languages; and more interestingly, that (ii) was better than several of the submitted systems, highlighting how difficult low-resource machine translation for polysynthetic languages is. 2021.americasnlp-1.28 @@ -353,7 +353,7 @@ RaúlVázquez YvesScherrer SamiVirpioja - JörgTiedemann + JörgTiedemann 255–264 The University of Helsinki participated in the AmericasNLP shared task for all ten language pairs. Our multilingual NMT models reached the first rank on all language pairs in track 1, and first rank on nine out of ten language pairs in track 2. We focused our efforts on three aspects: (1) the collection of additional data from various sources such as Bibles and political constitutions, (2) the cleaning and filtering of training data with the OpusFilter toolkit, and (3) different multilingual training techniques enabled by the latest version of the OpenNMT-py toolkit to make the most efficient use of the scarce data. This paper describes our efforts in detail. 2021.americasnlp-1.29 diff --git a/data/xml/2021.argmining.xml b/data/xml/2021.argmining.xml index dfdd5bbcb2..398ac82cd3 100644 --- a/data/xml/2021.argmining.xml +++ b/data/xml/2021.argmining.xml @@ -3,7 +3,7 @@ Proceedings of the 8th Workshop on Argument Mining - KhalidAl-Khatib + KhalidAl-Khatib YufangHou ManfredStede Association for Computational Linguistics @@ -43,7 +43,7 @@ JuriOpitz PhilippHeinisch PhilippWiesenbach - PhilippCimiano + PhilippCimiano AnetteFrank 24–35 When assessing the similarity of arguments, researchers typically use approaches that do not provide interpretable evidence or justifications for their ratings. Hence, the features that determine argument similarity remain elusive. We address this issue by introducing novel argument similarity metrics that aim at high performance and explainability. We show that Abstract Meaning Representation (AMR) graphs can be useful for representing arguments, and that novel AMR graph metrics can offer explanations for argument similarity ratings. We start from the hypothesis that similar premises often lead to similar conclusions—and extend an approach for AMR-based argument similarity rating by estimating, in addition, the similarity of conclusions that we automatically infer from the arguments used as premises. We show that AMR similarity metrics make argument similarity judgements more interpretable and may even support argument quality judgements. Our approach provides significant performance improvements over strong baselines in a fully unsupervised setting. Finally, we make first steps to address the problem of reference-less evaluation of argumentative conclusion generations. @@ -128,7 +128,7 @@ ArisFergadis DimitrisPappas AntoniaKaramolegkou - HarisPapageorgiou + HarisPapageorgiou 100–111 Science, technology and innovation (STI) policies have evolved in the past decade. We are now progressing towards policies that are more aligned with sustainable development through integrating social, economic and environmental dimensions. In this new policy environment, the need to keep track of innovation from its conception in Science and Research has emerged. Argumentation mining, an interdisciplinary NLP field, gives rise to the required technologies. In this study, we present the first STI-driven multidisciplinary corpus of scientific abstracts annotated for argumentative units (AUs) on the sustainable development goals (SDGs) set by the United Nations (UN). AUs are the sentences conveying the Claim(s) reported in the author’s original research and the Evidence provided for support. We also present a set of strong, BERT-based neural baselines achieving an f1-score of 70.0 for Claim and 62.4 for Evidence identification evaluated with 10-fold cross-validation. To demonstrate the effectiveness of our models, we experiment with different test sets showing comparable performance across various SDG policy domains. Our dataset and models are publicly available for research purposes. 2021.argmining-1.10 @@ -151,7 +151,7 @@ Multilingual Counter Narrative Type Classification Yi-LingChung MarcoGuerini - RodrigoAgerri + RodrigoAgerri 125–132 The growing interest in employing counter narratives for hatred intervention brings with it a focus on dataset creation and automation strategies. In this scenario, learning to recognize counter narrative types from natural text is expected to be useful for applications such as hate speech countering, where operators from non-governmental organizations are supposed to answer to hate with several and diverse arguments that can be mined from online sources. This paper presents the first multilingual work on counter narrative type classification, evaluating SoTA pre-trained language models in monolingual, multilingual and cross-lingual settings. When considering a fine-grained annotation of counter narrative classes, we report strong baseline classification results for the majority of the counter narrative types, especially if we translate every language to English before cross-lingual prediction. This suggests that knowledge about counter narratives can be successfully transferred across languages. 2021.argmining-1.12 @@ -163,7 +163,7 @@ Predicting Moderation of Deliberative Arguments: Is Argument Quality the Key? NeeleFalk ImanJundi - Eva MariaVecchi + Eva MariaVecchi GabriellaLapesa 133–141 Human moderation is commonly employed in deliberative contexts (argumentation and discussion targeting a shared decision on an issue relevant to a group, e.g., citizens arguing on how to employ a shared budget). As the scale of discussion enlarges in online settings, the overall discussion quality risks to drop and moderation becomes more important to assist participants in having a cooperative and productive interaction. The scale also makes it more important to employ NLP methods for(semi-)automatic moderation, e.g. to prioritize when moderation is most needed. In this work, we make the first steps towards (semi-)automatic moderation by using state-of-the-art classification models to predict which posts require moderation, showing that while the task is undoubtedly difficult, performance is significantly above baseline. We further investigate whether argument quality is a key indicator of the need for moderation, showing that surprisingly, high quality arguments also trigger moderation. We make our code and data publicly available. @@ -174,7 +174,7 @@ Self-trained Pretrained Language Models for Evidence Detection MohamedElaraby - DianeLitman + DianeLitman 142–147 Argument role labeling is a fundamental task in Argument Mining research. However, such research often suffers from a lack of large-scale datasets labeled for argument roles such as evidence, which is crucial for neural model training. While large pretrained language models have somewhat alleviated the need for massive manually labeled datasets, how much these models can further benefit from self-training techniques hasn’t been widely explored in the literature in general and in Argument Mining specifically. In this work, we focus on self-trained language models (particularly BERT) for evidence detection. We provide a thorough investigation on how to utilize pseudo labels effectively in the self-training scheme. We also assess whether adding pseudo labels from an out-of-domain source can be beneficial. Experiments on sentence level evidence detection show that self-training can complement pretrained language models to provide performance improvements. 2021.argmining-1.14 @@ -184,7 +184,7 @@ Multi-task Learning in Argument Mining for Persuasive Online Discussions NhatTran - DianeLitman + DianeLitman 148–153 We utilize multi-task learning to improve argument mining in persuasive online discussions, in which both micro-level and macro-level argumentation must be taken into consideration. Our models learn to identify argument components and the relations between them at the same time. We also tackle the low-precision which arises from imbalanced relation data by experimenting with SMOTE and XGBoost. Our approaches improve over baselines that use the same pre-trained language model but process the argument component task and two relation tasks separately. Furthermore, our results suggest that the tasks to be incorporated into multi-task learning should be taken into consideration as using all relevant tasks does not always lead to the best performance. 2021.argmining-1.15 @@ -208,7 +208,7 @@ Matching The Statements: A Simple and Accurate Model for Key Point Analysis HoangPhan - LongNguyen + LongNguyen LongNguyen KhanhDoan 165–174 @@ -237,7 +237,7 @@ ShahbazSyed PhilippHeinisch MaximilianSpliethöver - PhilippCimiano + PhilippCimiano MartinPotthast HenningWachsmuth 184–189 diff --git a/data/xml/2021.autosimtrans.xml b/data/xml/2021.autosimtrans.xml index ade2f629a6..ca0063b7cb 100644 --- a/data/xml/2021.autosimtrans.xml +++ b/data/xml/2021.autosimtrans.xml @@ -9,7 +9,7 @@ ZhongjunHe QunLiu MahaElbayad - MarkLiberman + MarkLiberman HaifengWang MingboMa RuiqingZhang diff --git a/data/xml/2021.bea.xml b/data/xml/2021.bea.xml index 495e34b31b..b25511c16d 100644 --- a/data/xml/2021.bea.xml +++ b/data/xml/2021.bea.xml @@ -27,7 +27,7 @@ MengyuZhang WeiqiWang ShuqiaoSun - WeiweiSun + WeiweiSun 1–10 This paper studies Negation Scope Resolution (NSR) for Chinese as a Second Language (CSL), which shows many unique characteristics that distinguish itself from “standard” Chinese. We annotate a new moderate-sized corpus that covers two background L1 languages, viz. English and Japanese. We build a neural NSR system, which achieves a new state-of-the-art accuracy on English benchmark data. We leverage this system to gauge how successful NSR for CSL can be. Different native language backgrounds of language learners result in unequal cross-lingual transfer, which has a significant impact on processing second language data. In particular, manual annotation, empirical evaluation and error analysis indicate two non-obvious facts: 1) L2-Chinese, L1-Japanese data are more difficult to analyze and thus annotate than L2-Chinese, L1-English data; 2) computational models trained on L2-Chinese, L1-Japanese data perform better than models trained on L2-Chinese, L1-English data. 2021.bea-1.1 @@ -46,7 +46,7 @@ Employing distributional semantics to organize task-focused vocabulary learning HaemanthSanthi Ponnusamy - DetmarMeurers + DetmarMeurers 26–36 How can a learner systematically prepare for reading a book they are interested in? In this paper, we explore how computational linguistic methods such as distributional semantics, morphological clustering, and exercise generation can be combined with graph-based learner models to answer this question both conceptually and in practice. Based on highly structured learner models and concepts from network analysis, the learner is guided to efficiently explore the targeted lexical space. They practice using multi-gap learning activities generated from the book. In sum, the approach combines computational linguistic methods with concepts from network analysis and tutoring systems to support learners in pursuing their individual reading task goals. 2021.bea-1.3 @@ -66,7 +66,7 @@ Broad Linguistic Complexity Analysis for <fixed-case>G</fixed-case>reek Readability Classification SavvasChatzipanagiotidis MariaGiagkou - DetmarMeurers + DetmarMeurers 48–58 This paper explores the linguistic complexity of Greek textbooks as a readability classification task. We analyze textbook corpora for different school subjects and textbooks for Greek as a Second Language, covering a very wide spectrum of school age groups and proficiency levels. A broad range of quantifiable linguistic complexity features (lexical, morphological and syntactic) are extracted and calculated. Conducting experiments with different feature subsets, we show that the different linguistic dimensions contribute orthogonal information, each contributing towards the highest result achieved using all linguistic feature subsets. A readability classifier trained on this basis reaches a classification accuracy of 88.16% for the Greek as a Second Language corpus. To investigate the generalizability of the classification models, we also perform cross-corpus evaluations. We show that the model trained on the most varied text collection (for Greek as a school subject) generalizes best. In addition to advancing the state of the art for Greek readability analysis, the paper also contributes insights on the role of different feature sets and training setups for generalizable readability classification. 2021.bea-1.5 @@ -75,7 +75,7 @@ Character Set Construction for <fixed-case>C</fixed-case>hinese Language Learning Chak YanYeung - JohnLee + JohnLee 59–63 To promote efficient learning of Chinese characters, pedagogical materials may present not only a single character, but a set of characters that are related in meaning and in written form. This paper investigates automatic construction of these character sets. The proposed model represents a character as averaged word vectors of common words containing the character. It then identifies sets of characters with high semantic similarity through clustering. Human evaluation shows that this representation outperforms direct use of character embeddings, and that the resulting character sets capture distinct semantic ranges. 2021.bea-1.6 @@ -102,7 +102,7 @@ Essay Quality Signals as Weak Supervision for Source-based Essay Scoring HaoranZhang - DianeLitman + DianeLitman 85–96 Human essay grading is a laborious task that can consume much time and effort. Automated Essay Scoring (AES) has thus been proposed as a fast and effective solution to the problem of grading student writing at scale. However, because AES typically uses supervised machine learning, a human-graded essay corpus is still required to train the AES model. Unfortunately, such a graded corpus often does not exist, so creating a corpus for machine learning can also be a laborious task. This paper presents an investigation of replacing the use of human-labeled essay grades when training an AES system with two automatically available but weaker signals of essay quality: word count and topic distribution similarity. Experiments using two source-based essay scoring (evidence score) corpora show that while weak supervision does not yield a competitive result when training a neural source-based AES model, it can be used to successfully extract Topical Components (TCs) from a source text, which are required by a supervised feature-based AES model. In particular, results show that feature-based AES performance is comparable with either automatically or manually constructed TCs. 2021.bea-1.9 @@ -188,7 +188,7 @@ ManavRathod TonyTu YunfangXiao - Marti A.Hearst + Marti A.Hearst 158–170 Automated question generation has the potential to greatly aid in education applications, such as online study aids to check understanding of readings. The state-of-the-art in neural question generation has advanced greatly, due in part to the availability of large datasets of question-answer pairs. However, the questions generated are often surface-level and not challenging for a human to answer. To develop more challenging questions, we propose the novel task of cause-and-effect question generation. We build a pipeline that extracts causal relations from passages of input text, and feeds these as input to a state-of-the-art neural question generator. The extractor is based on prior work that classifies causal relations by linguistic category (Cao et al., 2016; Altenberg, 1984). This work results in a new, publicly available collection of cause-and-effect questions. We evaluate via both automatic and manual metrics and find performance improves for both question generation and question answering when we utilize a small auxiliary data source of cause-and-effect questions for fine-tuning. Our approach can be easily applied to generate cause-and-effect questions from other text collections and educational material, allowing for adaptable large-scale generation of cause-and-effect questions. 2021.bea-1.17 diff --git a/data/xml/2021.bionlp.xml b/data/xml/2021.bionlp.xml index fd2a2ce30f..0e1ff17b55 100644 --- a/data/xml/2021.bionlp.xml +++ b/data/xml/2021.bionlp.xml @@ -4,9 +4,9 @@ Proceedings of the 20th Workshop on Biomedical Language Processing DinaDemner-Fushman - Kevin BretonnelCohen + Kevin BretonnelCohen SophiaAnaniadou - JunichiTsujii + JunichiTsujii Association for Computational Linguistics
Online
June @@ -21,8 +21,8 @@ Improving <fixed-case>BERT</fixed-case> Model Using Contrastive Learning for Biomedical Relation Extraction PengSu - YifanPeng - K.Vijay-Shanker + YifanPeng + K.Vijay-Shanker 1–10 Contrastive learning has been used to learn a high-quality representation of the image in computer vision. However, contrastive learning is not widely utilized in natural language processing due to the lack of a general method of data augmentation for text data. In this work, we explore the method of employing contrastive learning to improve the text representation from the BERT model for relation extraction. The key knob of our framework is a unique contrastive pre-training step tailored for the relation extraction tasks by seamlessly integrating linguistic knowledge into the data augmentation. Furthermore, we investigate how large-scale data constructed from the external knowledge bases can enhance the generality of contrastive pre-training of BERT. The experimental results on three relation extraction benchmark datasets demonstrate that our method can improve the BERT model representation and achieve state-of-the-art performance. In addition, we explore the interpretability of models by showing that BERT with contrastive pre-training relies more on rationales for prediction. Our code and data are publicly available at: https://github.com/AnonymousForNow. 2021.bionlp-1.1 @@ -44,8 +44,8 @@ Scalable Few-Shot Learning of Robust Biomedical Name Representations PieterFivez - SimonSuster - WalterDaelemans + SimonSuster + WalterDaelemans 23–29 Recent research on robust representations of biomedical names has focused on modeling large amounts of fine-grained conceptual distinctions using complex neural encoders. In this paper, we explore the opposite paradigm: training a simple encoder architecture using only small sets of names sampled from high-level biomedical concepts. Our encoder post-processes pretrained representations of biomedical names, and is effective for various types of input representations, both domain-specific or unsupervised. We validate our proposed few-shot learning approach on multiple biomedical relatedness benchmarks, and show that it allows for continual learning, where we accumulate information from various conceptual hierarchies to consistently improve encoder performance. Given these findings, we propose our approach as a low-cost alternative for exploring the impact of conceptual distinctions on robust biomedical name representations. 2021.bionlp-1.3 @@ -66,8 +66,8 @@ Are we there yet? Exploring clinical domain knowledge of <fixed-case>BERT</fixed-case> models MadhumitaSushil - SimonSuster - WalterDaelemans + SimonSuster + WalterDaelemans 41–53 We explore whether state-of-the-art BERT models encode sufficient domain knowledge to correctly perform domain-specific inference. Although BERT implementations such as BioBERT are better at domain-based reasoning than those trained on general-domain corpora, there is still a wide margin compared to human performance on these tasks. To bridge this gap, we explore whether supplementing textual domain knowledge in the medical NLI task: a) by further language model pretraining on the medical domain corpora, b) by means of lexical match algorithms such as the BM25 algorithm, c) by supplementing lexical retrieval with dependency relations, or d) by using a trained retriever module, can push this performance closer to that of humans. We do not find any significant difference between knowledge supplemented classification as opposed to the baseline BERT models, however. This is contrary to the results for evidence retrieval on other tasks such as open domain question answering (QA). By examining the retrieval output, we show that the methods fail due to unreliable knowledge retrieval for complex domain-specific reasoning. We conclude that the task of unsupervised text retrieval to bridge the gap in existing information to facilitate inference is more complex than what the state-of-the-art methods can solve, and warrants extensive research in the future. 2021.bionlp-1.5 @@ -181,7 +181,7 @@ YannisKatsis TylerBaldwin Ho-CheolKim - Chun-NanHsu + Chun-NanHsu 126–130 NLP has emerged as an essential tool to extract knowledge from the exponentially increasing volumes of biomedical texts. Many NLP tasks, such as named entity recognition and named entity normalization, are especially challenging in the biomedical domain partly because of the prolific use of acronyms. Long names for diseases, bacteria, and chemicals are often replaced by acronyms. We propose Biomedical Local Acronym Resolver (BLAR), a high-performing acronym resolver that leverages state-of-the-art (SOTA) pre-trained language models to accurately resolve local acronyms in biomedical texts. We test BLAR on the Ab3P corpus and achieve state-of-the-art results compared to the current best-performing local acronym resolution algorithms and models. 2021.bionlp-1.14 @@ -190,7 +190,7 @@ Claim Detection in Biomedical <fixed-case>T</fixed-case>witter Posts - AmelieWührl + AmelieWührl RomanKlinger 131–142 Social media contains unfiltered and unique information, which is potentially of great value, but, in the case of misinformation, can also do great harm. With regards to biomedical topics, false information can be particularly dangerous. Methods of automatic fact-checking and fake news detection address this problem, but have not been applied to the biomedical domain in social media yet. We aim to fill this research gap and annotate a corpus of 1200 tweets for implicit and explicit biomedical claims (the latter also with span annotations for the claim phrase). With this corpus, which we sample to be related to COVID-19, measles, cystic fibrosis, and depression, we develop baseline models which detect tweets that contain a claim automatically. Our analyses reveal that biomedical tweets are densely populated with claims (45 % in a corpus sampled to contain 1200 tweets focused on the domains mentioned above). Baseline classification experiments with embedding-based classifiers and BERT-based transfer learning demonstrate that the detection is challenging, however, shows acceptable performance for the identification of explicit expressions of claims. Implicit claim tweets are more challenging to detect. @@ -234,7 +234,7 @@ Word-Level Alignment of Paper Documents with their Electronic Full-Text Counterparts - Mark-ChristophMüller + Mark-ChristophMüller SuchetaGhosh UlrikeWittig MajaRey @@ -246,7 +246,7 @@ Improving Biomedical Pretrained Language Models with Knowledge - ZhengYuan + ZhengYuan YijiaLiu ChuanqiTan SongfangHuang @@ -260,7 +260,7 @@ <fixed-case>E</fixed-case>ntity<fixed-case>BERT</fixed-case>: Entity-centric Masking Strategy for Model Pretraining for the Clinical Domain ChenLin - TimothyMiller + TimothyMiller DmitriyDligach StevenBethard GuerganaSavova @@ -273,8 +273,8 @@ Contextual explanation rules for neural clinical classifiers MadhumitaSushil - SimonSuster - WalterDaelemans + SimonSuster + WalterDaelemans 202–212 Several previous studies on explanation for recurrent neural networks focus on approaches that find the most important input segments for a network as its explanations. In that case, the manner in which these input segments combine with each other to form an explanatory pattern remains unknown. To overcome this, some previous work tries to find patterns (called rules) in the data that explain neural outputs. However, their explanations are often insensitive to model parameters, which limits the scalability of text explanations. To overcome these limitations, we propose a pipeline to explain RNNs by means of decision lists (also called rules) over skipgrams. For evaluation of explanations, we create a synthetic sepsis-identification dataset, as well as apply our technique on additional clinical and sentiment analysis datasets. We find that our technique persistently achieves high explanation fidelity and qualitatively interpretable rules. 2021.bionlp-1.22 @@ -283,7 +283,7 @@ Exploring Word Segmentation and Medical Concept Recognition for <fixed-case>C</fixed-case>hinese Medical Texts - YangLiu + YangLiu YuanheTian Tsung-HuiChang SongWu @@ -298,7 +298,7 @@ <fixed-case>B</fixed-case>io<fixed-case>M</fixed-case>-Transformers: Building Large Biomedical Language Models with <fixed-case>BERT</fixed-case>, <fixed-case>ALBERT</fixed-case> and <fixed-case>ELECTRA</fixed-case> SultanAlrowili - VijayShanker + VijayShanker 221–227 The impact of design choices on the performance of biomedical language models recently has been a subject for investigation. In this paper, we empirically study biomedical domain adaptation with large transformer models using different design choices. We evaluate the performance of our pretrained models against other existing biomedical language models in the literature. Our results show that we achieve state-of-the-art results on several biomedical domain tasks despite using similar or less computational cost compared to other models in the literature. Our findings highlight the significant effect of design choices on improving the performance of biomedical language models. 2021.bionlp-1.24 @@ -330,7 +330,7 @@ Measuring the relative importance of full text sections for information retrieval from scientific literature. LanaYeganova Won GyuKim - DonaldComeau + DonaldComeau W JohnWilbur ZhiyongLu 247–256 @@ -344,10 +344,10 @@ KhalilMrini FranckDernoncourt SeunghyunYoon - TrungBui + TrungBui WalterChang EmiliaFarcas - NdapaNakashole + NdapaNakashole 257–262 In this paper, we describe our approach to question summarization and multi-answer summarization in the context of the 2021 MEDIQA shared task (Ben Abacha et al., 2021). We propose two kinds of transfer learning for the abstractive summarization of medical questions. First, we train on HealthCareMagic, a large question summarization dataset collected from an online healthcare service platform. Second, we leverage the ability of the BART encoder-decoder architecture to model both generation and classification tasks to train on the task of Recognizing Question Entailment (RQE) in the medical domain. We show that both transfer learning methods combined achieve the highest ROUGE scores. Finally, we cast the question-driven extractive summarization of multiple relevant answer documents as an Answer Sentence Selection (AS2) problem. We show how we can preprocess the MEDIQA-AnS dataset such that it can be trained in an AS2 setting. Our AS2 model is able to generate extractive summaries achieving high ROUGE scores. 2021.bionlp-1.28 @@ -393,7 +393,7 @@ Optum at <fixed-case>MEDIQA</fixed-case> 2021: Abstractive Summarization of Radiology Reports using simple <fixed-case>BART</fixed-case> Finetuning - RaviKondadadi + RaviKondadadi SahilManchanda JasonNgo RonanMcCormack @@ -418,7 +418,7 @@ <fixed-case>NLM</fixed-case> at <fixed-case>MEDIQA</fixed-case> 2021: Transfer Learning-based Approaches for Consumer Question and Multi-Answer Summarization ShwetaYadav MouradSarrouti - DeepakGupta + DeepakGupta 291–301 The quest for seeking health information has swamped the web with consumers’ healthrelated questions, which makes the need for efficient and reliable question answering systems more pressing. The consumers’ questions, however, are very descriptive and contain several peripheral information (like patient’s medical history, demographic information, etc.), that are often not required for answering the question. Furthermore, it contributes to the challenges of understanding natural language questions for automatic answer retrieval. Also, it is crucial to provide the consumers with the exact and relevant answers, rather than the entire pool of answer documents to their question. One of the cardinal tasks in achieving robust consumer health question answering systems is the question summarization and multi-document answer summarization. This paper describes the participation of the U.S. National Library of Medicine (NLM) in Consumer Question and Multi-Answer Summarization tasks of the MEDIQA 2021 challenge at NAACL-BioNLP workshop. In this work, we exploited the capabilities of pre-trained transformer models and introduced a transfer learning approach for the abstractive Question Summarization and extractive Multi-Answer Summarization tasks by first pre-training our model on a task-specific summarization dataset followed by fine-tuning it for both the tasks via incorporating medical entities. We achieved the second, sixth and the fourth position for the Question Summarization task in terms ROUGE-1, ROUGE-2 and ROUGE-L scores respectively. 2021.bionlp-1.34 @@ -444,8 +444,8 @@ Minh-QuangNguyen Huy-SonNguyen Linh Nguyen TranNgoc - Quang-ThuyHa - Mai-VuTran + Quang-ThuyHa + Mai-VuTran 311–319 This paper describes a system developed to summarize multiple answers challenge in the MEDIQA 2021 shared task collocated with the BioNLP 2021 Workshop. We propose an extractive summarization architecture based on several scores and state-of-the-art techniques. We also present our novel prosper-thy-neighbour strategies to improve performance. Our model has been proven to be effective with the best ROUGE-1/ROUGE-L scores, being the shared task runner up by ROUGE-2 F1 score (over 13 participated teams). 2021.bionlp-1.36 @@ -456,7 +456,7 @@ <fixed-case>MNLP</fixed-case> at <fixed-case>MEDIQA</fixed-case> 2021: Fine-Tuning <fixed-case>PEGASUS</fixed-case> for Consumer Health Question Summarization JooyeonLee HuongDang - OzlemUzuner + OzlemUzuner SamHenry 320–327 This paper details a Consumer Health Question (CHQ) summarization model submitted to MEDIQA 2021 for shared task 1: Question Summarization. Many CHQs are composed of multiple sentences with typos or unnecessary information, which can interfere with automated question answering systems. Question summarization mitigates this issue by removing this unnecessary information, aiding automated systems in generating a more accurate summary. Our summarization approach focuses on applying multiple pre-processing techniques, including question focus identification on the input and the development of an ensemble method to combine question focus with an abstractive summarization method. We use the state-of-art abstractive summarization model, PEGASUS (Pre-training with Extracted Gap-sentences for Abstractive Summarization), to generate abstractive summaries. Our experiments show that using our ensemble method, which combines abstractive summarization with question focus identification, improves performance over using summarization alone. Our model shows a ROUGE-2 F-measure of 11.14% against the official test dataset. @@ -466,7 +466,7 @@ <fixed-case>UET</fixed-case>fishes at <fixed-case>MEDIQA</fixed-case> 2021: Standing-on-the-Shoulders-of-Giants Model for Abstractive Multi-answer Summarization - Hoang-QuynhLe + Hoang-QuynhLe Quoc-AnNguyen Quoc-HungDuong Minh-QuangNguyen diff --git a/data/xml/2021.blackboxnlp.xml b/data/xml/2021.blackboxnlp.xml index a4ed4df989..1981c30741 100644 --- a/data/xml/2021.blackboxnlp.xml +++ b/data/xml/2021.blackboxnlp.xml @@ -104,7 +104,7 @@ <fixed-case>ALL</fixed-case> Dolphins Are Intelligent and <fixed-case>SOME</fixed-case> Are Friendly: Probing <fixed-case>BERT</fixed-case> for Nouns’ Semantic Properties and their Prototypicality MariannaApidianaki - AinaGarí Soler + AinaGarí Soler 79–94 Large scale language models encode rich commonsense knowledge acquired through exposure to massive data during pre-training, but their understanding of entities and their semantic properties is unclear. We probe BERT (Devlin et al., 2019) for the properties of English nouns as expressed by adjectives that do not restrict the reference scope of the noun they modify (as in “red car”), but instead emphasise some inherent aspect (“red strawberry”). We base our study on psycholinguistics datasets that capture the association strength between nouns and their semantic features. We probe BERT using cloze tasks and in a classification setting, and show that the model has marginal knowledge of these features and their prevalence as expressed in these datasets. We discuss factors that make evaluation challenging and impede drawing general conclusions about the models’ knowledge of noun properties. Finally, we show that when tested in a fine-tuning setting addressing entailment, BERT successfully leverages the information needed for reasoning about the meaning of adjective-noun constructions outperforming previous methods. 2021.blackboxnlp-1.7 @@ -148,7 +148,7 @@ BertrandHigy LiekeGelderloos AfraAlishahi - GrzegorzChrupała + GrzegorzChrupała 163–176 The distributed and continuous representations used by neural networks are at odds with representations employed in linguistics, which are typically symbolic. Vector quantization has been proposed as a way to induce discrete neural representations that are closer in nature to their linguistic counterparts. However, it is not clear which metrics are the best-suited to analyze such discrete representations. We compare the merits of four commonly used metrics in the context of weakly supervised models of spoken language. We compare the results they show when applied to two different models, while systematically studying the effect of the placement and size of the discretization layer. We find that different evaluation regimes can give inconsistent results. While we can attribute them to the properties of the different metrics in most cases, one point of concern remains: the use of minimal pairs of phoneme triples as stimuli disadvantages larger discrete unit inventories, unlike metrics applied to complete utterances. Furthermore, while in general vector quantization induces representations that correlate with units posited in linguistics, the strength of this correlation is only moderate. 2021.blackboxnlp-1.11 @@ -196,7 +196,7 @@ On the Language-specificity of Multilingual <fixed-case>BERT</fixed-case> and the Impact of Fine-tuning MarcTanti - Lonnekevan der Plas + Lonnekevan der Plas ClaudiaBorg AlbertGatt 214–227 @@ -311,7 +311,7 @@ What <fixed-case>BERT</fixed-case> Based Language Model Learns in Spoken Transcripts: An Empirical Study - AyushKumar + AyushKumar MukunthaNarayanan Sundararaman JithendraVepa 322–336 @@ -389,7 +389,7 @@ BadrAbdullah IuliiaZaitova TaniaAvgustinova - BerndMöbius + BerndMöbius DietrichKlakow 407–419 How do neural networks “perceive” speech sounds from unknown languages? Does the typological similarity between the model’s training language (L1) and an unknown language (L2) have an impact on the model representations of L2 speech signals? To answer these questions, we present a novel experimental design based on representational similarity analysis (RSA) to analyze acoustic word embeddings (AWEs)—vector representations of variable-duration spoken-word segments. First, we train monolingual AWE models on seven Indo-European languages with various degrees of typological similarity. We then employ RSA to quantify the cross-lingual similarity by simulating native and non-native spoken-word processing using AWEs. Our experiments show that typological similarity indeed affects the representational similarity of the models in our study. We further discuss the implications of our work on modeling speech processing and language similarity with neural networks. @@ -443,9 +443,9 @@ Controlled tasks for model analysis: Retrieving discrete information from sequences - Ionut-TeodorSorodoc - GemmaBoleda - MarcoBaroni + Ionut-TeodorSorodoc + GemmaBoleda + MarcoBaroni 468–478 In recent years, the NLP community has shown increasing interest in analysing how deep learning models work. Given that large models trained on complex tasks are difficult to inspect, some of this work has focused on controlled tasks that emulate specific aspects of language. We propose a new set of such controlled tasks to explore a crucial aspect of natural language processing that has not received enough attention: the need to retrieve discrete information from sequences. We also study model behavior on the tasks with simple instantiations of Transformers and LSTMs. Our results highlight the beneficial role of decoder attention and its sometimes unexpected interaction with other components. Moreover, we show that, for most of the tasks, these simple models still show significant difficulties. We hope that the community will take up the analysis possibilities that our tasks afford, and that a clearer understanding of model behavior on the tasks will lead to better and more transparent models. 2021.blackboxnlp-1.37 @@ -477,7 +477,7 @@ Do Language Models Know the Way to <fixed-case>R</fixed-case>ome? BastienLiétard MostafaAbdou - AndersSøgaard + AndersSøgaard 510–517 The global geometry of language models is important for a range of applications, but language model probes tend to evaluate rather local relations, for which ground truths are easily obtained. In this paper we exploit the fact that in geography, ground truths are available beyond local relations. In a series of experiments, we evaluate the extent to which language model representations of city and country names are isomorphic to real-world geography, e.g., if you tell a language model where Paris and Berlin are, does it know the way to Rome? We find that language models generally encode limited geographic information, but with larger models performing the best, suggesting that geographic knowledge can be induced from higher-order co-occurrence statistics. 2021.blackboxnlp-1.40 @@ -501,7 +501,7 @@ Fine-Tuned Transformers Show Clusters of Similar Representations Across Layers JasonPhang HaokunLiu - Samuel R.Bowman + Samuel R.Bowman 529–538 Despite the success of fine-tuning pretrained language encoders like BERT for downstream natural language understanding (NLU) tasks, it is still poorly understood how neural networks change after fine-tuning. In this work, we use centered kernel alignment (CKA), a method for comparing learned representations, to measure the similarity of representations in task-tuned models across layers. In experiments across twelve NLU tasks, we discover a consistent block diagonal structure in the similarity of representations within fine-tuned RoBERTa and ALBERT models, with strong similarity within clusters of earlier and later layers, but not between them. The similarity of later layer representations implies that later layers only marginally contribute to task performance, and we verify in experiments that the top few layers of fine-tuned Transformers can be discarded without hurting performance, even with no further tuning. 2021.blackboxnlp-1.42 diff --git a/data/xml/2021.bppf.xml b/data/xml/2021.bppf.xml index 89e8e57524..880ffcdacb 100644 --- a/data/xml/2021.bppf.xml +++ b/data/xml/2021.bppf.xml @@ -3,8 +3,8 @@ Proceedings of the 1st Workshop on Benchmarking: Past, Present and Future - KennethChurch - MarkLiberman + KennethChurch + MarkLiberman ValiaKordoni Association for Computational Linguistics
Online
@@ -31,7 +31,7 @@ Guideline Bias in <fixed-case>W</fixed-case>izard-of-<fixed-case>O</fixed-case>z Dialogues Victor Petrén BachHansen - AndersSøgaard + AndersSøgaard 8–14 NLP models struggle with generalization due to sampling and annotator bias. This paper focuses on a different kind of bias that has received very little attention: guideline bias, i.e., the bias introduced by how our annotator guidelines are formulated. We examine two recently introduced dialogue datasets, CCPE-M and Taskmaster-1, both collected by trained assistants in a Wizard-of-Oz set-up. For CCPE-M, we show how a simple lexical bias for the word like in the guidelines biases the data collection. This bias, in effect, leads to poor performance on data without this bias: a preference elicitation architecture based on BERT suffers a 5.3% absolute drop in performance, when like is replaced with a synonymous phrase, and a 13.2% drop in performance when evaluated on out-of-sample data. For Taskmaster-1, we show how the order in which instructions are resented, biases the data collection. 2021.bppf-1.2 @@ -45,8 +45,8 @@ TommasoFornaciari DirkHovy SilviuPaun - BarbaraPlank - MassimoPoesio + BarbaraPlank + MassimoPoesio AlexandraUma 15–21 Evaluation is of paramount importance in data-driven research fields such as Natural Language Processing (NLP) and Computer Vision (CV). Current evaluation practice largely hinges on the existence of a single “ground truth” against which we can meaningfully compare the prediction of a model. However, this comparison is flawed for two reasons. 1) In many cases, more than one answer is correct. 2) Even where there is a single answer, disagreement among annotators is ubiquitous, making it difficult to decide on a gold standard. We argue that the current methods of adjudication, agreement, and evaluation need serious reconsideration. Some researchers now propose to minimize disagreement and to fix datasets. We argue that this is a gross oversimplification, and likely to conceal the underlying complexity. Instead, we suggest that we need to better capture the sources of disagreement to improve today’s evaluation practice. We discuss three sources of disagreement: from the annotator, the data, and the context, and show how this affects even seemingly objective tasks. Datasets with multiple annotations are becoming more common, as are methods to integrate disagreement into modeling. The logical next step is to extend this to evaluation. diff --git a/data/xml/2021.bsnlp.xml b/data/xml/2021.bsnlp.xml index 027039a329..3f513a644f 100644 --- a/data/xml/2021.bsnlp.xml +++ b/data/xml/2021.bsnlp.xml @@ -5,7 +5,7 @@ Proceedings of the 8th Workshop on Balto-Slavic Natural Language Processing BogdanBabych OlgaKanishcheva - PreslavNakov + PreslavNakov JakubPiskorski LidiaPivovarova VasylStarko @@ -47,7 +47,7 @@ Abusive Language Recognition in <fixed-case>R</fixed-case>ussian KamilSaitov - LeonDerczynski + LeonDerczynski 20–25 Abusive phenomena are commonplace in language on the web. The scope of recognizing abusive language is broad, covering many behaviors and forms of expression. This work addresses automatic detection of abusive language in Russian. The lexical, grammatical and morphological diversity of Russian language present potential difficulties for this task, which is addressed using a variety of machine learning approaches. Finally, competitive performance is reached over multiple domains for this investigation into automatic detection of abusive language in Russian. 2021.bsnlp-1.3 @@ -88,7 +88,7 @@ Exploratory Analysis of News Sentiment Using Subgroup Discovery AnitaValmarska - Luis AdriánCabrera-Diego + Luis AdriánCabrera-Diego ElvysLinhares Pontes SenjaPollak 66–72 @@ -99,7 +99,7 @@ Creating an Aligned <fixed-case>R</fixed-case>ussian Text Simplification Dataset from Language Learner Data AnnaDmitrieva - JörgTiedemann + JörgTiedemann 73–79 Parallel language corpora where regular texts are aligned with their simplified versions can be used in both natural language processing and theoretical linguistic studies. They are essential for the task of automatic text simplification, but can also provide valuable insights into the characteristics that make texts more accessible and reveal strategies that human experts use to simplify texts. Today, there exist a few parallel datasets for English and Simple English, but many other languages lack such data. In this paper we describe our work on creating an aligned Russian-Simple Russian dataset composed of Russian literature texts adapted for learners of Russian as a foreign language. This will be the first parallel dataset in this domain, and one of the first Simple Russian datasets in general. 2021.bsnlp-1.8 @@ -118,7 +118,7 @@ Priberam Labs at the 3rd Shared Task on <fixed-case>S</fixed-case>lav<fixed-case>NER</fixed-case> PedroFerreira RubenCardoso - AfonsoMendes + AfonsoMendes 86–92 This document describes our participation at the 3rd Shared Task on SlavNER, part of the 8th Balto-Slavic Natural Language Processing Workshop, where we focused exclusively in the Named Entity Recognition (NER) task. We addressed this task by combining multi-lingual contextual embedding models, such as XLM-R (Conneau et al., 2020), with character- level embeddings and a biaffine classifier (Yu et al., 2020). This allowed us to train downstream models for NER using all the available training data. We are able to show that this approach results in good performance when replicating the scenario of the 2nd Shared Task. 2021.bsnlp-1.10 @@ -127,7 +127,7 @@ Multilingual <fixed-case>S</fixed-case>lavic Named Entity Recognition RinaldsVīksna - IngunaSkadina + IngunaSkadina 93–97 Named entity recognition, in particular for morphological rich languages, is challenging task due to the richness of inflected forms and ambiguity. This challenge is being addressed by SlavNER Shared Task. In this paper we describe system submitted to this task. Our system uses pre-trained multilingual BERT Language Model and is fine-tuned for six Slavic languages of this task on texts distributed by organizers. In our experiments this multilingual NER model achieved 96 F1 score on in-domain data and an F1 score of 83 on out of domain data. Entity coreference module achieved F1 score of 47.6 as evaluated by bsnlp2021 organizers. 2021.bsnlp-1.11 @@ -135,8 +135,8 @@ Using a Frustratingly Easy Domain and Tagset Adaptation for Creating <fixed-case>S</fixed-case>lavic Named Entity Recognition Systems - Luis AdriánCabrera-Diego - Jose G.Moreno + Luis AdriánCabrera-Diego + Jose G.Moreno AntoineDoucet 98–104 We present a collection of Named Entity Recognition (NER) systems for six Slavic languages: Bulgarian, Czech, Polish, Slovenian, Russian and Ukrainian. These NER systems have been trained using different BERT models and a Frustratingly Easy Domain Adaptation (FEDA). FEDA allow us creating NER systems using multiple datasets without having to worry about whether the tagset (e.g. Location, Event, Miscellaneous, Time) in the source and target domains match, while increasing the amount of data available for training. Moreover, we boosted the prediction on named entities by marking uppercase words and predicting masked words. Participating in the 3rd Shared Task on SlavNER, our NER systems reached a strict match micro F-score of up to 0.908. The results demonstrate good generalization, even in named entities with weak regularity, such as book titles, or entities that were never seen during the training. diff --git a/data/xml/2021.bucc.xml b/data/xml/2021.bucc.xml index 25615bdea7..658d11c02a 100644 --- a/data/xml/2021.bucc.xml +++ b/data/xml/2021.bucc.xml @@ -5,7 +5,7 @@ Proceedings of the 14th Workshop on Building and Using Comparable Corpora (BUCC 2021) ReinhardRapp SergeSharoff - PierreZweigenbaum + PierreZweigenbaum INCOMA Ltd.
Online (Virtual Mode)
September @@ -18,7 +18,7 @@ Invited Presentation - PushpakBhattacharyya + PushpakBhattacharyya 1 AI now and in future will have to grapple continuously with the problem of low resource. AI will increasingly be ML intensive. But ML needs data often with annotation. However, annotation is costly. Over the years, through work on multiple problems, we have developed insight into how to do language processing in low resource setting. Following 6 methods—individually and in combination—seem to be the way forward: 1) Artificially augment resource (e.g. subwords) 2) Cooperative NLP (e.g., pivot in MT) 3) Linguistic embellishment (e.g. factor based MT, source reordering) 4) Joint Modeling (e.g., Coref and NER, Sentiment and Emotion: each task helping the other to either boost accuracy or reduce resource requirement) 5) Multimodality (e.g., eye tracking based NLP, also picture+text+speech based Sentiment Analysis) 6)Cross Lingual Embedding (e.g., embedding from multiple languages helping MT, close to 2 above) The present talk will focus on low resource machine translation. We describe the use of techniques from the above list and bring home the seriousness and methodology of doing Machine Translation in low resource settings. 2021.bucc-1.1 @@ -72,7 +72,7 @@ JeremiasBohn JannikFischbach MartinSchmitt - HinrichSchütze + HinrichSchütze AndreasVogelsang 40–45 Creating datasets manually by human annotators is a laborious task that can lead to biased and inhomogeneous labels. We propose a flexible, semi-automatic framework for labeling data for relation extraction. Furthermore, we provide a dataset of preprocessed sentences from the requirements engineering domain, including a set of automatically created as well as hand-crafted labels. In our case study, we compare the human and automatic labels and show that there is a substantial overlap between both annotations. @@ -82,7 +82,7 @@ Majority Voting with Bidirectional Pre-translation For Bitext Retrieval AlexanderJones - Derry TantiWijaya + Derry TantiWijaya 46–59 Obtaining high-quality parallel corpora is of paramount importance for training NMT systems. However, as many language pairs lack adequate gold-standard training data, a popular approach has been to mine so-called “pseudo-parallel” sentences from paired documents in two languages. In this paper, we outline some drawbacks with current methods that rely on an embedding similarity threshold, and propose a heuristic method in its place. Our method involves translating both halves of a paired corpus before mining, and then performing a majority vote on sentence pairs mined in three ways: after translating documents in language x to language y, after translating language y to x, and using the original documents in languages x and y. We demonstrate success with this novel approach on the Tatoeba similarity search benchmark in 64 low-resource languages, and on NMT in Kazakh and Gujarati. We also uncover the effect of resource-related factors (i.e. how much monolingual/bilingual data is available for a given language) on the optimal choice of bitext mining method, demonstrating that there is currently no one-size-fits-all approach for this task. We make the code and data used in our experiments publicly available. 2021.bucc-1.7 diff --git a/data/xml/2021.calcs.xml b/data/xml/2021.calcs.xml index 8d1ec93b0d..fd0cb45c4c 100644 --- a/data/xml/2021.calcs.xml +++ b/data/xml/2021.calcs.xml @@ -5,8 +5,8 @@ Proceedings of the Fifth Workshop on Computational Approaches to Linguistic Code-Switching ThamarSolorio ShuguangChen - Alan W.Black - MonaDiab + Alan W.Black + MonaDiab SunayanaSitaram VictorSoto EmreYilmaz @@ -36,7 +36,7 @@ Challenges and Limitations with the Metrics Measuring the Complexity of Code-Mixed Text VivekSrivastava - MayankSingh + MayankSingh 6–14 Code-mixing is a frequent communication style among multilingual speakers where they mix words and phrases from two different languages in the same utterance of text or speech. Identifying and filtering code-mixed text is a challenging task due to its co-existence with monolingual and noisy text. Over the years, several code-mixing metrics have been extensively used to identify and validate code-mixed text quality. This paper demonstrates several inherent limitations of code-mixing metrics with examples from the already existing datasets that are popularly used across various experiments. 2021.calcs-1.2 @@ -47,7 +47,7 @@ Translate and Classify: Improving Sequence Level Classification for <fixed-case>E</fixed-case>nglish-<fixed-case>H</fixed-case>indi Code-Mixed Data DevanshGautam KshitijGupta - ManishShrivastava + ManishShrivastava 15–25 Code-mixing is a common phenomenon in multilingual societies around the world and is especially common in social media texts. Traditional NLP systems, usually trained on monolingual corpora, do not perform well on code-mixed texts. Training specialized models for code-switched texts is difficult due to the lack of large-scale datasets. Translating code-mixed data into standard languages like English could improve performance on various code-mixed tasks since we can use transfer learning from state-of-the-art English models for processing the translated data. This paper focuses on two sequence-level classification tasks for English-Hindi code mixed texts, which are part of the GLUECoS benchmark - Natural Language Inference and Sentiment Analysis. We propose using various pre-trained models that have been fine-tuned for similar English-only tasks and have shown state-of-the-art performance. We further fine-tune these models on the translated code-mixed datasets and achieve state-of-the-art performance in both tasks. To translate English-Hindi code-mixed data to English, we use mBART, a pre-trained multilingual sequence-to-sequence model that has shown competitive performance on various low-resource machine translation pairs and has also shown performance gains in languages that were not in its pre-training corpus. 2021.calcs-1.3 @@ -69,7 +69,7 @@ RamakrishnaAppicharla Kamal KumarGupta AsifEkbal - PushpakBhattacharyya + PushpakBhattacharyya 31–35 This paper describes the system submitted by IITP-MT team to Computational Approaches to Linguistic Code-Switching (CALCS 2021) shared task on MT for English→Hinglish. We submit a neural machine translation (NMT) system which is trained on the synthetic code-mixed (cm) English-Hinglish parallel corpus. We propose an approach to create code-mixed parallel corpus from a clean parallel corpus in an unsupervised manner. It is an alignment based approach and we do not use any linguistic resources for explicitly marking any token for code-switching. We also train NMT model on the gold corpus provided by the workshop organizers augmented with the generated synthetic code-mixed parallel corpus. The model trained over the generated synthetic cm data achieves 10.09 BLEU points over the given test set. 2021.calcs-1.5 @@ -94,7 +94,7 @@ PrashantKodali KshitijGupta AnmolGoel - ManishShrivastava + ManishShrivastava PonnurangamKumaraguru 47–55 Code-mixed languages are very popular in multilingual societies around the world, yet the resources lag behind to enable robust systems on such languages. A major contributing factor is the informal nature of these languages which makes it difficult to collect code-mixed data. In this paper, we propose our system for Task 1 of CACLS 2021 to generate a machine translation system for English to Hinglish in a supervised setting. Translating in the given direction can help expand the set of resources for several tasks by translating valuable datasets from high resource languages. We propose to use mBART, a pre-trained multilingual sequence-to-sequence model, and fully utilize the pre-training of the model by transliterating the roman Hindi words in the code-mixed sentences to Devanagri script. We evaluate how expanding the input by concatenating Hindi translations of the English sentences improves mBART’s performance. Our system gives a BLEU score of 12.22 on test set. Further, we perform a detailed error analysis of our proposed systems and explore the limitations of the provided dataset and metrics. @@ -128,7 +128,7 @@ A Language-aware Approach to Code-switched Morphological Tagging Şaziye BetülÖzateş - ÖzlemÇetinoğlu + ÖzlemÇetinoğlu 72–83 Morphological tagging of code-switching (CS) data becomes more challenging especially when language pairs composing the CS data have different morphological representations. In this paper, we explore a number of ways of implementing a language-aware morphological tagging method and present our approach for integrating language IDs into a transformer-based framework for CS morphological tagging. We perform our set of experiments on the Turkish-German SAGT Treebank. Experimental results show that including language IDs to the learning model significantly improves accuracy over other approaches. 2021.calcs-1.10 @@ -223,7 +223,7 @@ Code-Mixing on Sesame Street: Dawn of the Adversarial Polyglots SamsonTan - ShafiqJoty + ShafiqJoty 141 Multilingual models have demonstrated impressive cross-lingual transfer performance. However, test sets like XNLI are monolingual at the example level. In multilingual communities, it is common for polyglots to code-mix when conversing with each other. Inspired by this phenomenon, we present two strong black-box adversarial attacks (one word-level, one phrase-level) for multilingual models that push their ability to handle code-mixed sentences to the limit. The former (PolyGloss) uses bilingual dictionaries to propose perturbations and translations of the clean example for sense disambiguation. The latter (Bumblebee) directly aligns the clean example with its translations before extracting phrases as perturbations. Bumblebee has a success rate of 89.75% against XLM-R-large, bringing its average accuracy of 79.85 down to 8.18 on XNLI. Finally, we propose an efficient adversarial training scheme, Code-mixed Adversarial Training (CAT), that trains in the same number of steps as the original model. Even after controlling for the extra training data introduced, CAT improves model accuracy when the model is prevented from relying on lexical overlaps (+3.45), with a negligible drop (-0.15 points) in performance on the original XNLI test set. t-SNE visualizations reveal that CAT improves a model’s language agnosticity. This paper will be published in the proceedings of NAACL-HLT 2021. 2021.calcs-1.19 @@ -233,7 +233,7 @@ Are Multilingual Models Effective in Code-Switching? - Genta IndraWinata + Genta IndraWinata SamuelCahyawijaya ZihanLiu ZhaojiangLin diff --git a/data/xml/2021.case.xml b/data/xml/2021.case.xml index fce6531c62..debab3acd7 100644 --- a/data/xml/2021.case.xml +++ b/data/xml/2021.case.xml @@ -3,7 +3,7 @@ Proceedings of the 4th Workshop on Challenges and Applications of Automated Extraction of Socio-political Events from Text (CASE 2021) - AliHürriyetoğlu + AliHürriyetoğlu Association for Computational Linguistics
Online
August @@ -18,7 +18,7 @@ Challenges and Applications of Automated Extraction of Socio-political Events from Text (<fixed-case>CASE</fixed-case> 2021): Workshop and Shared Task Report AliHürriyetoğlu - HristoTanev + HristoTanev VanniZavarella JakubPiskorski ReyyanYeniterzi @@ -79,7 +79,7 @@ LianeGuillou MilošStanojević NickMcKenna - MarkSteedman + MarkSteedman 31–42 Language provides speakers with a rich system of modality for expressing thoughts about events, without being committed to their actual occurrence. Modality is commonly used in the political news domain, where both actual and possible courses of events are discussed. NLP systems struggle with these semantic phenomena, often incorrectly extracting events which did not happen, which can lead to issues in downstream applications. We present an open-domain, lexicon-based event extraction system that captures various types of modality. This information is valuable for Question Answering, Knowledge Graph construction and Fact-checking tasks, and our evaluation shows that the system is sufficiently strong to be used in downstream applications. 2021.case-1.6 @@ -90,7 +90,7 @@ Characterizing News Portrayal of Civil Unrest in <fixed-case>H</fixed-case>ong <fixed-case>K</fixed-case>ong, 1998–2020 JamesScharf - Arya D.McCarthy + Arya D.McCarthy Giovanna Maria DoraDore 43–52 We apply statistical techniques from natural language processing to a collection of Western and Hong Kong–based English-language newspaper articles spanning the years 1998–2020, studying the difference and evolution of its portrayal. We observe that both content and attitudes differ between Western and Hong Kong–based sources. ANOVA on keyword frequencies reveals that Hong Kong–based papers discuss protests and democracy less often. Topic modeling detects salient aspects of protests and shows that Hong Kong–based papers made fewer references to police violence during the Anti–Extradition Law Amendment Bill Movement. Diachronic shifts in word embedding neighborhoods reveal a shift in the characterization of salient keywords once the Movement emerged. Together, these raise questions about the existence of anodyne reporting from Hong Kong–based media. Likewise, they illustrate the importance of sample selection for protest event analysis. @@ -114,7 +114,7 @@ SwapnilHingmire SangameshwarPatil AlokKumar - GirishPalshikar + GirishPalshikar 58–67 Incidents in industries have huge social and political impact and minimizing the consequent damage has been a high priority. However, automated analysis of repositories of incident reports has remained a challenge. In this paper, we focus on automatically extracting events from incident reports. Due to absence of event annotated datasets for industrial incidents we employ a transfer learning based approach which is shown to outperform several baselines. We further provide detailed analysis regarding effect of increase in pre-training data and provide explainability of why pre-training improves the performance. 2021.case-1.9 @@ -174,8 +174,8 @@ <fixed-case>NUS</fixed-case>-<fixed-case>IDS</fixed-case> at <fixed-case>CASE</fixed-case> 2021 Task 1: Improving Multilingual Event Sentence Coreference Identification With Linguistic Information Fiona AntingTan - Sujatha DasGollapalli - See-KiongNg + Sujatha DasGollapalli + See-KiongNg 105–112 Event Sentence Coreference Identification (ESCI) aims to cluster event sentences that refer to the same event together for information extraction. We describe our ESCI solution developed for the ACL-CASE 2021 shared tasks on the detection and classification of socio-political and crisis event information in a multilingual setting. For a given article, our proposed pipeline comprises of an accurate sentence pair classifier that identifies coreferent sentence pairs and subsequently uses these predicted probabilities to cluster sentences into groups. Sentence pair representations are constructed from fine-tuned BERT embeddings plus POS embeddings fed through a BiLSTM model, and combined with linguistic-based lexical and semantic similarities between sentences. Our best models ranked 2nd, 1st and 2nd and obtained CoNLL F1 scores of 81.20%, 93.03%, 83.15% for the English, Portuguese and Spanish test sets respectively in the ACL-CASE 2021 competition. 2021.case-1.14 @@ -223,7 +223,7 @@ ParulAwasthy JianNi KenBarker - RaduFlorian + RaduFlorian 138–146 In this paper, we present the event detection models and systems we have developed for Multilingual Protest News Detection - Shared Task 1 at CASE 2021. The shared task has 4 subtasks which cover event detection at different granularity levels (from document level to token level) and across multiple languages (English, Hindi, Portuguese and Spanish). To handle data from multiple languages, we use a multilingual transformer-based language model (XLM-R) as the input text encoder. We apply a variety of techniques and build several transformer-based models that perform consistently well across all the subtasks and languages. Our systems achieve an average F_1 score of 81.2. Out of thirteen subtask-language tracks, our submissions rank 1st in nine and 2nd in four tracks. 2021.case-1.18 @@ -292,7 +292,7 @@ KenBarker ParulAwasthy JianNi - RaduFlorian + RaduFlorian 193–202 Supervised models can achieve very high accuracy for fine-grained text classification. In practice, however, training data may be abundant for some types but scarce or even non-existent for others. We propose a hybrid architecture that uses as much labeled data as available for fine-tuning classification models, while also allowing for types with little (few-shot) or no (zero-shot) labeled data. In particular, we pair a supervised text classification model with a Natural Language Inference (NLI) reranking model. The NLI reranker uses a textual representation of target types that allows it to score the strength with which a type is implied by a text, without requiring training data for the types. Experiments show that the NLI model is very sensitive to the choice of textual representation, but can be effective for classifying unseen types. It can also improve classification accuracy for the known types of an already highly accurate supervised model. 2021.case-1.24 @@ -324,7 +324,7 @@ Discovering Black Lives Matter Events in the <fixed-case>U</fixed-case>nited <fixed-case>S</fixed-case>tates: Shared Task 3, <fixed-case>CASE</fixed-case> 2021 SalvatoreGiorgi VanniZavarella - HristoTanev + HristoTanev NicolasStefanovitch SyHwang HansiHettiarachchi diff --git a/data/xml/2021.ccl.xml b/data/xml/2021.ccl.xml index 3e5ad8ea50..0757d4e0af 100644 --- a/data/xml/2021.ccl.xml +++ b/data/xml/2021.ccl.xml @@ -24,8 +24,8 @@ 融合零指代识别的篇章级机器翻译(Context-aware Machine Translation Integrating Zero Pronoun Recognition) HaoWang - JunhuiLi军辉 - ZhengxianGong正仙 + JunhuiLi军辉 + ZhengxianGong正仙 1–12 在汉语等其他有省略代词习惯的语言中,通常会删掉可从上下文信息推断出的代词。尽管以Transformer为代表的的神经机器翻译模型取得了巨大的成功,但这种省略现象依旧对神经机器翻译模型造成了很大的挑战。本文在Transformer基础上提出了一个融合零指代识别的翻译模型,并引入篇章上下文来丰富指代信息。具体地,该模型采用联合学习的框架,在翻译模型基础上,联合了一个分类任务,即判别句子中省略代词在句子所表示的成分,使得模型能够融合零指代信息辅助翻译。通过在中英对话数据集上的实验,验证了本文提出方法的有效性,与基准模型相比,翻译性能提升了1.48个BLEU值。 2021.ccl-1.1 @@ -37,7 +37,7 @@ WeiHu MaoxiLi茂西 BailianQiu白莲 - MingwenWang明文 + MingwenWang明文 13–22 机器译文自动评价对机器翻译的发展和应用起着重要的促进作用,它一般通过计算机器译文和人工参考译文的相似度来度量机器译文的质量。该文通过跨语种预训练语言模型XLM将源语言句子、机器译文和人工参考译文映射到相同的语义空间,结合分层注意力和内部注意力提取源语言句子与机器译文、机器译文与人工参考译文以及源语言句子与人工参考译文之间差异特征,并将其融入到基于Bi-LSTM神经译文自动评价方法中。在WMT’19译文自动评价数据集上的实验结果表明,融合XLM词语表示的神经机器译文自动评价方法显著提高了其与人工评价的相关性。 2021.ccl-1.2 @@ -47,7 +47,7 @@ 利用语义关联增强的跨语言预训练模型的译文质量评估(A Cross-language Pre-trained Model with Enhanced Semantic Connection for <fixed-case>MT</fixed-case> Quality Estimation) HengYe - ZhengxianGong正仙 + ZhengxianGong正仙 23–34 机器翻译质量评估(QE)虽然不需要参考译文就能进行自动评估,但它需要人工标注的评估数据进行训练。基于神经网络框架的QE为了克服人工评估数据的稀缺问题,通常包括两个阶段,首先借助大规模的平行语料学习双语对齐,然后在小规模评估数据集上进行评估建模。跨语言预训练模型可以用来代替该任务第一阶段的学习过程,因此本文首先建议一个基于XLM-R的为源/目标语言统一编码的QE模型。其次,由于大多数预训练模型是在多语言的单语数据集上构建的,因此两两语言对的语义关联能力相对较弱。为了能使跨语言预训练模型更好地适应QE任务,本文提出用三种预训练策略来增强预训练模型的跨语言语义关联能力。本文的方法在WMT2017和WMT2019英德评估数据集上都达到了最高性能。 2021.ccl-1.3 @@ -103,7 +103,7 @@ 基于双编码器的医学文本中文分词(<fixed-case>C</fixed-case>hinese word segmentation of medical text based on dual-encoder) YuanZong - BaobaoChang宝宝 + BaobaoChang宝宝 76–85 中文分词是自然语言处理领域的基础工作,然而前人的医学文本分词工作都只是直接套用通用分词的方法,而医学文本多专用术语的特点让分词系统需要对医学专用术语和医学文本中的非医学术语文本提供不同的分词粒度。本文提出了双编码器医学文本中文分词模型,利用辅助编码器为医学专有术语提供粗粒度表示。模型将需要粗粒度分词的医学专用术语和需要通用分词粒度的文本分开,在提升医学专用术语的分词能力的同时最大限度地避免了其粗粒度对于医学文本中通用文本分词的干扰。 2021.ccl-1.8 @@ -433,8 +433,8 @@ 基于序列到序列的中文<fixed-case>AMR</fixed-case>解析(<fixed-case>C</fixed-case>hinese <fixed-case>AMR</fixed-case> Parsing based on Sequence-to-Sequence Modeling) ZiyiHuang子怡 - JunhuiLi军辉 - ZhengxianGong正仙 + JunhuiLi军辉 + ZhengxianGong正仙 374–385 抽象语义表示(Abstract Meaning Representation,简称AMR)是将给定的文本的语义特征抽象成一个单根的有向无环图。AMR语义解析则是根据输入的文本获取对应的AMR图。相比于英文AMR,中文AMR的研究起步较晚,造成针对中文的AMR语义解析相关研究较少。本文针对公开的中文AMR语料库CAMR1.0,采用序列到序列的方法进行中文AMR语义解析的相关研究。具体地,首先基于Transformer模型实现一个适用于中文的序列到序列AMR语义解析系统;然后,探索并比较了不同预训练模型在中文AMR语义解析中的应用。基于该语料,本文中文AMR语义解析方法最优性能达到了70.29的Smatch F1值。本文是第一次在该数据集上报告实验结果。 2021.ccl-1.35 @@ -487,7 +487,7 @@ 基于自动识别的委婉语历时性发展变化与社会共变研究(A Study on the Diachronic Development and Social Covariance of Euphemism Based on Automatic Recognition) ChenlinZhang辰麟 - MingwenWang明文 + MingwenWang明文 YimingTan亦鸣 MingYin XinyiZhang心怡 @@ -614,7 +614,7 @@ JishunZhao继舜 BingjieDu冰洁 ShuchengZhu述承 - PengyuanLiu鹏远 + PengyuanLiu鹏远 564–575 自然语言处理领域各项任务中,模型广泛存在性别偏见。然而当前尚无中文性别偏见评估和消偏的相关数据集,因此无法对中文自然语言处理模型中的性别偏见进行评估。首先本文根据16对性别称谓词,从一个平面媒体语料库中筛选出性别无偏的句子,构建了一个含有20000条语句的中文句子级性别无偏数据集SlguSet。随后,本文提出了一个可衡量预训练语言模型性别偏见程度的指标,并对5种流行的预训练语言模型中的性别偏见进行评估。结果表明,中文预训练语言模型中存在不同程度的性别偏见,该文所构建数据集能够很好的对中文预训练语言模型中的性别偏见进行评估。同时,该数据集还可作为评估预训练语言模型消偏方法的数据集。 2021.ccl-1.51 @@ -625,7 +625,7 @@ 基于多任务标签一致性机制的中文命名实体识别(<fixed-case>C</fixed-case>hinese Named Entity Recognition based on Multi-task Label Consistency Mechanism) ShuningLv书宁 JianLiu - JinanXu金安 + JinanXu金安 YufengChen钰枫 YujieZhang玉洁 576–588 @@ -652,7 +652,7 @@ 融入篇章信息的文学作品命名实体识别(Document-level Literary Named Entity Recognition) YuxiangJia玉祥 RuiChao - HongyingZan红英 + HongyingZan红英 HuayiDou华溢 ShuaiCao ShuoXu @@ -679,7 +679,7 @@ YajuanYe娅娟 BinHu KunliZhang坤丽 - HongyingZan红英 + HongyingZan红英 622–632 电子病历是医疗信息的重要来源,包含大量与医疗相关的领域知识。本文从糖尿病电子病历文本入手,在调研了国内外已有的电子病历语料库的基础上,参考坉圲坂圲实体及关系分类,建立了糖尿病电子病历实体及实体关系分类体系,并制定了标注规范。利用实体及关系标注平台,进行了实体及关系预标注及多轮人工校对工作,形成了糖尿病电子病历实体及关系标注语料库(Diabetes Electronic Medical Record entity and Related Corpus DEMRC)。所构建的DEMRC包含8899个实体、456个实体修饰及16564个关系。对DEMRC进行一致性评价和分析,标注结果达到了较高的一致性。针对实体识别和实体关系抽取任务,分别采用基于迁移学习的Bi-LSTM-CRF模型和RoBERTa模型进行初步实验,并对语料库中的各类实体及关系进行评估,为后续糖尿病电子病历实体识别及关系抽取研究以及糖尿病知识图谱构建打下基础。 2021.ccl-1.56 @@ -689,7 +689,7 @@ 脑卒中疾病电子病历实体及实体关系标注语料库构建(Corpus Construction for Named-Entity and Entity Relations for Electronic Medical Records of Stroke Disease) HongyangChang洪阳 - HongyingZan红英 + HongyingZan红英 YutuanMa玉团 KunliZhang坤丽 633–642 @@ -702,7 +702,7 @@ 中文关系抽取的句级语言学特征探究(A Probe into the Sentence-level Linguistic Features of <fixed-case>C</fixed-case>hinese Relation Extraction) BaixiXing百西 JishunZhao继舜 - PengyuanLiu鹏远 + PengyuanLiu鹏远 643–654 神经网络模型近些年在关系抽取任务上已经展示出了很好的效果,然而我们对于特征提取的过程所知甚少,而这也进一步限制了深度神经网络模型在关系抽取任务上的进一步发展。当前已有研究工作对英文关系抽取的语言学特征进行探究,并且得到了一些规律。然而由于中文与西方语言之间明显的差异性,其所探究到的规律与解释性不适用于中文关系抽取。本文首次对中文关系抽取神经网络进行探究,采用了四个角度共13种探究任务,其中包含中文特有的分词探究任务。在两个关系抽取数据集上进行了实验,探究了中文关系抽取模型进行特征提取的规律。 2021.ccl-1.58 @@ -822,7 +822,7 @@ BoJin MingtongLiu明童 YujieZhang玉洁 - JinanXu金安 + JinanXu金安 YufengChen钰枫 758–768 如何挖掘语言资源中丰富的复述模板,是复述研究中的一项重要任务。已有方法在人工给定种子实体对的基础上,利用实体关系,通过自举迭代方式,从开放域获取复述模板,规避对平行语料或可比语料的依赖,但是该方法需人工给定实体对,实体关系受限;在迭代过程中语义会发生偏移,影响获取质量。针对这些问题,我们考虑知识库中包含描述特定语义关系的实体对(即关系三元组),提出融合外部知识的开放域复述模板自动获取方法。首先,将关系三元组与开放域文本对齐,获取关系对应文本,并将文本中语义丰富部分泛化成变量槽,获取关系模板;接着设计模板表示方法,本文利用预训练语言模型,在模板表示中融合变量槽语义;最后,根据获得的模板表示,设计自动聚类与筛选方法,获取高精度的复述模板。在融合自动评测与人工评测的评价方法下,实验结果表明,本文提出的方法实现了在开放域数据上复述模板的自动泛化与获取,能够获得质量高、语义一致的复述模板。 @@ -883,7 +883,7 @@ ZechengTang泽成 YixinJi一心 YiboZhao怡博 - JunhuiLi军辉 + JunhuiLi军辉 813–824 语法纠错是自然语言处理领域的热门任务之一,其目的是将错误的句子修改为正确的句子。为了缓解中文训练语料不足的问题,本文从数据增强的角度出发,提出一种新颖的扩充和增强数据的方法。具体地,为了使模型能更好地获取不同类型和不同粒度的错误,本文首先对语法纠错中出现的错误进行了字和词粒度的分类,在此基础上提出了融合字词粒度噪声的数据增强方法,以此获得大规模且质量较高的错误数据集。基于NLPCC2018共享任务的实验结果表明,本文提出的融合字词粒度加噪方法能够显著提升模型的性能,在该数据集上达到了最优的性能。最后,本文分析了错误类型和数据规模对中文语法纠错模型性能的影响。 2021.ccl-1.73 diff --git a/data/xml/2021.cinlp.xml b/data/xml/2021.cinlp.xml index 167f9b0994..176bd33c7b 100644 --- a/data/xml/2021.cinlp.xml +++ b/data/xml/2021.cinlp.xml @@ -14,7 +14,7 @@ RoiReichart MollyRoberts UriShalit - BrandonStewart + BrandonStewart VictorVeitch DiyiYang Association for Computational Linguistics @@ -31,7 +31,7 @@ Causal Augmentation for Causal Sentence Classification Fiona AntingTan DevamanyuHazarika - See-KiongNg + See-KiongNg SoujanyaPoria RogerZimmermann 1–20 diff --git a/data/xml/2021.cl.xml b/data/xml/2021.cl.xml index 30abce1010..2a6fde94d3 100644 --- a/data/xml/2021.cl.xml +++ b/data/xml/2021.cl.xml @@ -13,7 +13,7 @@ Kathy <fixed-case>M</fixed-case>c<fixed-case>K</fixed-case>eown Interviews Bonnie Webber - BonnieWebber + BonnieWebber 10.1162/coli_a_00393 Because the 2020 ACL Lifetime Achievement Award presentation could not be done in person, we replaced the usual LTA talk with an interview between Professor Kathy McKeown (Columbia University) and the recipient, Bonnie Webber. The following is an edited version of the interview, with added citations. 1–7 @@ -23,7 +23,7 @@ Formal Basis of a Language Universal MilošStanojević - MarkSteedman + MarkSteedman 10.1162/coli_a_00394 Steedman (2020) proposes as a formal universal of natural language grammar that grammatical permutations of the kind that have given rise to transformational rules are limited to a class known to mathematicians and computer scientists as the “separable” permutations. This class of permutations is exactly the class that can be expressed in combinatory categorial grammars (CCGs). The excluded non-separable permutations do in fact seem to be absent in a number of studies of crosslinguistic variation in word order in nominal and verbal constructions. The number of permutations that are separable grows in the number n of lexical elements in the construction as the Large Schröder Number Sn−1. Because that number grows much more slowly than the n! number of all permutations, this generalization is also of considerable practical interest for computational applications such as parsing and machine translation. The present article examines the mathematical and computational origins of this restriction, and the reason it is exactly captured in CCG without the imposition of any further constraints. 9–42 @@ -35,7 +35,7 @@ Comparing Knowledge-Intensive and Data-Intensive Models for <fixed-case>E</fixed-case>nglish Resource Semantic Parsing JunjieCao ZiLin - WeiweiSun + WeiweiSun XiaojunWan 10.1162/coli_a_00395 In this work, we present a phenomenon-oriented comparative analysis of the two dominant approaches in English Resource Semantic (ERS) parsing: classic, knowledge-intensive and neural, data-intensive models. To reflect state-of-the-art neural NLP technologies, a factorization-based parser is introduced that can produce Elementary Dependency Structures much more accurately than previous data-driven parsers. We conduct a suite of tests for different linguistic phenomena to analyze the grammatical competence of different parsers, where we show that, despite comparable performance overall, knowledge- and data-intensive models produce different types of errors, in a way that can be explained by their theoretical properties. This analysis is beneficial to in-depth evaluation of several representative parsing techniques and leads to new directions for parser development. @@ -46,7 +46,7 @@ Semantic Data Set Construction from Human Clustering and Spatial Arrangement OlgaMajewska - DianaMcCarthy + DianaMcCarthy Jasper J. F.van den Bosch NikolausKriegeskorte IvanVulić @@ -61,7 +61,7 @@ Interpretability Analysis for Named Entity Recognition to Understand System Predictions and How They Can Improve OshinAgarwal YinfeiYang - Byron C.Wallace + Byron C.Wallace AniNenkova 10.1162/coli_a_00397 Named entity recognition systems achieve remarkable performance on domains such as English news. It is natural to ask: What are these models actually learning to achieve this? Are they merely memorizing the names themselves? Or are they capable of interpreting the text and inferring the correct entity type from the linguistic context? We examine these questions by contrasting the performance of several variants of architectures for named entity recognition, with some provided only representations of the context as features. We experiment with GloVe-based BiLSTM-CRF as well as BERT. We find that context does influence predictions, but the main factor driving high performance is learning the named tokens themselves. Furthermore, we find that BERT is not always better at recognizing predictive contexts compared to a BiLSTM-CRF model. We enlist human annotators to evaluate the feasibility of inferring entity types from context alone and find that humans are also mostly unable to infer entity types for the majority of examples on which the context-only system made errors. However, there is room for improvement: A system should be able to recognize any named entity in a predictive context correctly and our experiments indicate that current systems may be improved by such capability. Our human study also revealed that systems and humans do not always learn the same contextual clues, and context-only systems are sometimes correct even when humans fail to recognize the entity type from the context. Finally, we find that one issue contributing to model errors is the use of “entangled” representations that encode both contextual and local token information into a single vector, which can obscure clues. Our results suggest that designing models that explicitly operate over representations of local inputs and context, respectively, may in some cases improve performance. In light of these and related findings, we highlight directions for future work. @@ -86,7 +86,7 @@ LifengJin LaneSchwartz FinaleDoshi-Velez - TimothyMiller + TimothyMiller WilliamSchuler 10.1162/coli_a_00399 This article describes a simple PCFG induction model with a fixed category domain that predicts a large majority of attested constituent boundaries, and predicts labels consistent with nearly half of attested constituent labels on a standard evaluation data set of child-directed speech. The article then explores the idea that the difference between simple grammars exhibited by child learners and fully recursive grammars exhibited by adult learners may be an effect of increasing working memory capacity, where the shallow grammars are constrained images of the recursive grammars. An implementation of these memory bounds as limits on center embedding in a depth-specific transform of a recursive grammar yields a significant improvement over an equivalent but unbounded baseline, suggesting that this arrangement may indeed confer a learning advantage. @@ -119,7 +119,7 @@ Approximating Probabilistic Models as Weighted Finite Automata Ananda TheerthaSuresh BrianRoark - MichaelRiley + MichaelRiley VladSchogol 10.1162/coli_a_00401 Weighted finite automata (WFAs) are often used to represent probabilistic models, such as n-gram language models, because among other things, they are efficient for recognition tasks in time and space. The probabilistic source to be represented as a WFA, however, may come in many forms. Given a generic probabilistic model over sequences, we propose an algorithm to approximate it as a WFA such that the Kullback-Leibler divergence between the source model and the WFA target model is minimized. The proposed algorithm involves a counting step and a difference of convex optimization step, both of which can be performed efficiently. We demonstrate the usefulness of our approach on various tasks, including distilling n-gram models from neural models, building compact language models, and building open-vocabulary character models. The algorithms used for these experiments are available in an open-source software library. @@ -129,10 +129,10 @@ <fixed-case>U</fixed-case>niversal <fixed-case>D</fixed-case>ependencies - Marie-Catherinede Marneffe - Christopher D.Manning + Marie-Catherinede Marneffe + Christopher D.Manning JoakimNivre - DanielZeman + DanielZeman 10.1162/coli_a_00402 Universal dependencies (UD) is a framework for morphosyntactic annotation of human language, which to date has been used to create treebanks for more than 100 languages. In this article, we outline the linguistic theory of the UD framework, which draws on a long tradition of typologically oriented grammatical theories. Grammatical relations between words are centrally used to explain how predicate–argument structures are encoded morphosyntactically in different languages while morphological features and part-of-speech classes give the properties of words. We argue that this theory is a good basis for crosslinguistically consistent annotation of typologically diverse languages in a way that supports computational natural language understanding as well as broader linguistic studies. 255–308 @@ -171,7 +171,7 @@ DanielLoureiro KiamehrRezaee Mohammad TaherPilehvar - JoseCamacho-Collados + JoseCamacho-Collados 10.1162/coli_a_00405 Transformer-based language models have taken many fields in NLP by storm. BERT and its derivatives dominate most of the existing evaluation benchmarks, including those for Word Sense Disambiguation (WSD), thanks to their ability in capturing context-sensitive semantic nuances. However, there is still little knowledge about their capabilities and potential limitations in encoding and recovering word senses. In this article, we provide an in-depth quantitative and qualitative analysis of the celebrated BERT model with respect to lexical ambiguity. One of the main conclusions of our analysis is that BERT can accurately capture high-level sense distinctions, even when a limited number of examples is available for each word sense. Our analysis also reveals that in some cases language models come close to solving coarse-grained noun disambiguation under ideal conditions in terms of availability of training data and computing resources. However, this scenario rarely occurs in real-world settings and, hence, many practical challenges remain even in the coarse-grained setting. We also perform an in-depth comparison of the two main language model-based WSD strategies, namely, fine-tuning and feature extraction, finding that the latter approach is more robust with respect to sense bias and it can better exploit limited available training data. In fact, the simple feature extraction strategy of averaging contextualized embeddings proves robust even using only three training sentences per word sense, with minimal improvements obtained by increasing the size of this training data. 387–443 @@ -182,7 +182,7 @@ Universal Discourse Representation Structure Parsing JiangmingLiu - Shay B.Cohen + Shay B.Cohen MirellaLapata JohanBos 10.1162/coli_a_00406 @@ -205,7 +205,7 @@ The Taxonomy of Writing Systems: How to Measure How Logographic a System Is - RichardSproat + RichardSproat AlexanderGutkin 10.1162/coli_a_00409 Taxonomies of writing systems since Gelb (1952) have classified systems based on what the written symbols represent: if they represent words or morphemes, they are logographic; if syllables, syllabic; if segments, alphabetic; and so forth. Sproat (2000) and Rogers (2005) broke with tradition by splitting the logographic and phonographic aspects into two dimensions, with logography being graded rather than a categorical distinction. A system could be syllabic, and highly logographic; or alphabetic, and mostly non-logographic. This accords better with how writing systems actually work, but neither author proposed a method for measuring logography. In this article we propose a novel measure of the degree of logography that uses an attention-based sequence-to-sequence model trained to predict the spelling of a token from its pronunciation in context. In an ideal phonographic system, the model should need to attend to only the current token in order to compute how to spell it, and this would show in the attention matrix activations. In contrast, with a logographic system, where a given pronunciation might correspond to several different spellings, the model would need to attend to a broader context. The ratio of the activation outside the token and the total activation forms the basis of our measure. We compare this with a simple lexical measure, and an entropic measure, as well as several other neural models, and argue that on balance our attention-based measure accords best with intuition about how logographic various systems are. Our work provides the first quantifiable measure of the notion of logography that accords with linguistic intuition and, we argue, provides better insight into what this notion means. @@ -242,7 +242,7 @@ Toward Gender-Inclusive Coreference Resolution: An Analysis of Gender and Bias Throughout the Machine Learning Lifecycle* Yang TristaCao - HalDaumé III + HalDaumé III 10.1162/coli_a_00413 Correctly resolving textual mentions of people fundamentally entails making inferences about those people. Such inferences raise the risk of systematic biases in coreference resolution systems, including biases that can harm binary and non-binary trans and cis stakeholders. To better understand such biases, we foreground nuanced conceptualizations of gender from sociology and sociolinguistics, and investigate where in the machine learning pipeline such biases can enter a coreference resolution system. We inspect many existing data sets for trans-exclusionary biases, and develop two new data sets for interrogating bias in both crowd annotations and in existing coreference resolution systems. Through these studies, conducted on English text, we confirm that without acknowledging and building systems that recognize the complexity of gender, we will build systems that fail for: quality of service, stereotyping, and over- or under-representation, especially for binary and non-binary trans users. 615–661 @@ -292,7 +292,7 @@ Natural Language Processing and Computational Linguistics - JunichiTsujii + JunichiTsujii 10.1162/coli_a_00420 707–727 2021.cl-4.24 @@ -333,7 +333,7 @@ The (Un)Suitability of Automatic Evaluation Metrics for Text Simplification FernandoAlva-Manchego - CarolinaScarton + CarolinaScarton LuciaSpecia 10.1162/coli_a_00418 In order to simplify sentences, several rewriting operations can be performed, such as replacing complex words per simpler synonyms, deleting unnecessary information, and splitting long sentences. Despite this multi-operation nature, evaluation of automatic simplification systems relies on metrics that moderately correlate with human judgments on the simplicity achieved by executing specific operations (e.g., simplicity gain based on lexical replacements). In this article, we investigate how well existing metrics can assess sentence-level simplifications where multiple operations may have been applied and which, therefore, require more general simplicity judgments. For that, we first collect a new and more reliable data set for evaluating the correlation of metrics and human judgments of overall simplicity. Second, we conduct the first meta-evaluation of automatic metrics in Text Simplification, using our new data set (and other existing data) to analyze the variation of the correlation between metrics’ scores and human judgments across three dimensions: the perceived simplicity level, the system type, and the set of references used for computation. We show that these three aspects affect the correlations and, in particular, highlight the limitations of commonly used operation-specific metrics. Finally, based on our findings, we propose a set of recommendations for automatic evaluation of multi-operation simplifications, suggesting which metrics to compute and how to interpret their scores. @@ -366,8 +366,8 @@ <fixed-case>LFG</fixed-case> Generation from Acyclic <fixed-case>F</fixed-case>-Structures is <fixed-case>NP</fixed-case>-Hard - JürgenWedekind - Ronald M.Kaplan + JürgenWedekind + Ronald M.Kaplan 10.1162/coli_a_00419 The universal generation problem for LFG grammars is the problem of determining whether a given grammar derives any terminal string with a given f-structure. It is known that this problem is decidable for acyclic f-structures. In this brief note, we show that for those f-structures the problem is nonetheless intractable. This holds even for grammars that are off-line parsable. 939–946 diff --git a/data/xml/2021.clpsych.xml b/data/xml/2021.clpsych.xml index c167771fba..2737001e01 100644 --- a/data/xml/2021.clpsych.xml +++ b/data/xml/2021.clpsych.xml @@ -6,7 +6,7 @@ NazliGoharian PhilipResnik AndrewYates - MollyIreland + MollyIreland KateNiederhoffer RebeccaResnik Association for Computational Linguistics @@ -25,7 +25,7 @@ GloriannaJagfeld FionaLobban PaulRayson - StevenJones + StevenJones 1–14 Recently, research on mental health conditions using public online data, including Reddit, has surged in NLP and health research but has not reported user characteristics, which are important to judge generalisability of findings. This paper shows how existing NLP methods can yield information on clinical, demographic, and identity characteristics of almost 20K Reddit users who self-report a bipolar disorder diagnosis. This population consists of slightly more feminine- than masculine-gendered mainly young or middle-aged US-based adults who often report additional mental health diagnoses, which is compared with general Reddit statistics and epidemiological studies. Additionally, this paper carefully evaluates all methods and discusses ethical issues. 2021.clpsych-1.1 @@ -47,7 +47,7 @@ Individual Differences in the Movement-Mood Relationship in Digital Life Data GlenCoppersmith - AlexFine + AlexFine PatrickCrutchley JoshuaCarroll 25–31 @@ -74,10 +74,10 @@ Demonstrating the Reliability of Self-Annotated Emotion Data AntonMalko - CecileParis + CecileParis AndreasDuenser MariaKangas - DiegoMolla + DiegoMolla RossSparks StephenWan 45–54 @@ -171,9 +171,9 @@ Suicide Risk Prediction by Tracking Self-Harm Aspects in Tweets: <fixed-case>NUS</fixed-case>-<fixed-case>IDS</fixed-case> at the <fixed-case>CLP</fixed-case>sych 2021 Shared Task - Sujatha DasGollapalli + Sujatha DasGollapalli Guilherme AugustoZagatti - See-KiongNg + See-KiongNg 93–98 We describe our system for identifying users at-risk for suicide based on their tweets developed for the CLPsych 2021 Shared Task. Based on research in mental health studies linking self-harm tendencies with suicide, in our system, we attempt to characterize self-harm aspects expressed in user tweets over a period of time. To this end, we design SHTM, a Self-Harm Topic Model that combines Latent Dirichlet Allocation with a self-harm dictionary for modeling daily tweets of users. Next, differences in moods and topics over time are captured as features to train a deep learning model for suicide prediction. 2021.clpsych-1.10 @@ -222,11 +222,11 @@ Automatic Detection and Prediction of Psychiatric Hospitalizations From Social Media Posts - ZhengpingJiang + ZhengpingJiang JonathanZomick Sarah ItaLevitan MarkSerper - JuliaHirschberg + JuliaHirschberg 116–121 We address the problem of predicting psychiatric hospitalizations using linguistic features drawn from social media posts. We formulate this novel task and develop an approach to automatically extract time spans of self-reported psychiatric hospitalizations. Using this dataset, we build predictive models of psychiatric hospitalization, comparing feature sets, user vs. post classification, and comparing model performance using a varying time window of posts. Our best model achieves an F1 of .718 using 7 days of posts. Our results suggest that this is a useful framework for collecting hospitalization data, and that social media data can be leveraged to predict acute psychiatric crises before they occur, potentially saving lives and improving outcomes for individuals with mental illness. 2021.clpsych-1.14 @@ -263,7 +263,7 @@ Detecting Cognitive Distortions from Patient-Therapist Interactions SagarikaShreevastava - PeterFoltz + PeterFoltz 151–158 An important part of Cognitive Behavioral Therapy (CBT) is to recognize and restructure certain negative thinking patterns that are also known as cognitive distortions. The aim of this project is to detect these distortions using natural language processing. We compare and contrast different types of linguistic features as well as different classification algorithms and explore the limitations of applying these techniques on a small dataset. We find that pre-trained Sentence-BERT embeddings to train an SVM classifier yields the best results with an F1-score of 0.79. Lastly, we discuss how this work provides insights into the types of linguistic features that are inherent in cognitive distortions. 2021.clpsych-1.17 @@ -274,7 +274,7 @@ Evaluating Automatic Speech Recognition Quality and Its Impact on Counselor Utterance Coding Do JuneMin VerónicaPérez-Rosas - RadaMihalcea + RadaMihalcea 159–168 Automatic speech recognition (ASR) is a crucial step in many natural language processing (NLP) applications, as often available data consists mainly of raw speech. Since the result of the ASR step is considered as a meaningful, informative input to later steps in the NLP pipeline, it is important to understand the behavior and failure mode of this step. In this work, we analyze the quality of ASR in the psychotherapy domain, using motivational interviewing conversations between therapists and clients. We conduct domain agnostic and domain-relevant evaluations using standard evaluation metrics and also identify domain-relevant keywords in the ASR output. Moreover, we empirically study the effect of mixing ASR and manual data during the training of a downstream NLP model, and also demonstrate how additional local context can help alleviate the error introduced by noisy ASR transcripts. 2021.clpsych-1.18 @@ -294,7 +294,7 @@ Safeguarding against spurious <fixed-case>AI</fixed-case>-based predictions: The case of automated verbal memory assessment ChelseaChandler - PeterFoltz + PeterFoltz AlexCohen TerjeHolmlund BritaElvevåg diff --git a/data/xml/2021.cmcl.xml b/data/xml/2021.cmcl.xml index 4579d847f0..6c98737cd2 100644 --- a/data/xml/2021.cmcl.xml +++ b/data/xml/2021.cmcl.xml @@ -5,9 +5,9 @@ Proceedings of the Workshop on Cognitive Modeling and Computational Linguistics EmmanueleChersoni NoraHollenstein - CassandraJacobs + CassandraJacobs YoheiOseki - LaurentPrévot + LaurentPrévot EnricoSantus Association for Computational Linguistics
Online
@@ -34,7 +34,7 @@ Human Sentence Processing: Recurrence or Attention? DannyMerkx - Stefan L.Frank + Stefan L.Frank 12–22 Recurrent neural networks (RNNs) have long been an architecture of interest for computational models of human sentence processing. The recently introduced Transformer architecture outperforms RNNs on many natural language processing tasks but little is known about its ability to model human language processing. We compare Transformer- and RNN-based language models’ ability to account for measures of human reading effort. Our analysis shows Transformers to outperform RNNs in explaining self-paced reading times and neural activity during reading English sentences, challenging the widely held idea that human sentence processing involves recurrent and immediate processing and provides evidence for cue-based retrieval. 2021.cmcl-1.2 @@ -47,9 +47,9 @@ ShohiniBhattasali DonaldDunagan LucaCampanelli - MarkSteedman + MarkSteedman JonathanBrennan - JohnHale + JohnHale 23–38 Hierarchical sentence structure plays a role in word-by-word human sentence comprehension, but it remains unclear how best to characterize this structure and unknown how exactly it would be recognized in a step-by-step process model. With a view towards sharpening this picture, we model the time course of hemodynamic activity within the brain during an extended episode of naturalistic language comprehension using Combinatory Categorial Grammar (CCG). CCG has well-defined incremental parsing algorithms, surface compositional semantics, and can explain long-range dependencies as well as complicated cases of coordination. We find that CCG-derived predictors improve a regression model of fMRI time course in six language-relevant brain regions, over and above predictors derived from context-free phrase structure. Adding a special Revealing operator to CCG parsing, one designed to handle right-adjunction, improves the fit in three of these regions. This evidence for CCG from neuroimaging bolsters the more general case for mildly context-sensitive grammars in the cognitive science of language. 2021.cmcl-1.3 @@ -80,7 +80,7 @@ Accounting for Agreement Phenomena in Sentence Comprehension with Transformer Language Models: Effects of Similarity-based Interference on Surprisal and Attention Soo HyunRyu - RichardLewis + RichardLewis 61–71 We advance a novel explanation of similarity-based interference effects in subject-verb and reflexive pronoun agreement processing, grounded in surprisal values computed from a pretrained large-scale Transformer model, GPT-2. Specifically, we show that surprisal of the verb or reflexive pronoun predicts facilitatory interference effects in ungrammatical sentences, where a distractor noun that matches in number with the verb or pronouns leads to faster reading times, despite the distractor not participating in the agreement relation. We review the human empirical evidence for such effects, including recent meta-analyses and large-scale studies. We also show that attention patterns (indexed by entropy and other measures) in the Transformer show patterns of diffuse attention in the presence of similar distractors, consistent with cue-based retrieval models of parsing. But in contrast to these models, the attentional cues and memory representations are learned entirely from the simple self-supervised task of predicting the next word. 2021.cmcl-1.6 @@ -104,7 +104,7 @@ <fixed-case>L</fixed-case>ang<fixed-case>R</fixed-case>esearch<fixed-case>L</fixed-case>ab_<fixed-case>NC</fixed-case> at <fixed-case>CMCL</fixed-case>2021 Shared Task: Predicting Gaze Behaviour Using Linguistic Features and Tree Regressors RakshaAgarwal - NiladriChatterjee + NiladriChatterjee 79–84 Analysis of gaze data behaviour has gained momentum in recent years for different NLP applications. The present paper aims at modelling gaze data behaviour of tokens in the context of a sentence. We have experimented with various Machine Learning Regression Algorithms on a feature space comprising the linguistic features of the target tokens for prediction of five Eye-Tracking features. CatBoost Regressor performed the best and achieved fourth position in terms of MAE based accuracy measurement for the ZuCo Dataset. 2021.cmcl-1.8 @@ -167,7 +167,7 @@ ShivaniChoudhary KushagriTandon RakshaAgarwal - NiladriChatterjee + NiladriChatterjee 114–119 Reading and comprehension are quintessentially cognitive tasks. Eye movement acts as a surrogate to understand which part of a sentence is critical to the process of comprehension. The aim of the shared task is to predict five eye-tracking features for a given word of the input sentence. We experimented with several models based on LGBM (Light Gradient Boosting Machine) Regression, ANN (Artificial Neural Network), and CNN (Convolutional Neural Network), using BERT embeddings and some combination of linguistic features. Our submission using CNN achieved an average MAE of 4.0639 and ranked 7th in the shared task. The average MAE was further lowered to 3.994 in post-task evaluation. 2021.cmcl-1.14 @@ -213,7 +213,7 @@ Enhancing Cognitive Models of Emotions with Representation Learning YutingGuo - Jinho D.Choi + Jinho D.Choi 141–148 We present a novel deep learning-based framework to generate embedding representations of fine-grained emotions that can be used to computationally describe psychological models of emotions. Our framework integrates a contextualized embedding encoder with a multi-head probing model that enables to interpret dynamically learned representations optimized for an emotion classification task. Our model is evaluated on the Empathetic Dialogue dataset and shows the state-of-the-art result for classifying 32 emotions. Our layer analysis can derive an emotion graph to depict hierarchical relations among the emotions. Our emotion representations can be used to generate an emotion wheel directly comparable to the one from Plutchik’s model, and also augment the values of missing emotions in the PAD emotional state model. 2021.cmcl-1.18 diff --git a/data/xml/2021.codi.xml b/data/xml/2021.codi.xml index 88f9b1093b..98e85efccd 100644 --- a/data/xml/2021.codi.xml +++ b/data/xml/2021.codi.xml @@ -24,7 +24,7 @@ CathrineDamgaard PaulinaToborek TrineEriksen - BarbaraPlank + BarbaraPlank 1–11 Indirect answers are replies to polar questions without the direct use of word cues such as ‘yes’ and ‘no’. Humans are very good at understanding indirect answers, such as ‘I gotta go home sometime’, when asked ‘You wanna crash on the couch?’. Understanding indirect answers is a challenging problem for dialogue systems. In this paper, we introduce a new English corpus to study the problem of understanding indirect answers. Instead of crowd-sourcing both polar questions and answers, we collect questions and indirect answers from transcripts of a prominent TV series and manually annotate them for answer type. The resulting dataset contains 5,930 question-answer pairs. We release both aggregated and raw human annotations. We present a set of experiments in which we evaluate Convolutional Neural Networks (CNNs) for this task, including a cross-dataset evaluation and experiments with learning from disagreements in annotation. Our results show that the task of interpreting indirect answers remains challenging, yet we obtain encouraging improvements when explicitly modeling human disagreement. 2021.codi-main.1 @@ -76,8 +76,8 @@ Coreference Chains Categorization by Sequence Clustering SilviaFederzoni - Lydia-MaiHo-Dac - CécileFabre + Lydia-MaiHo-Dac + CécileFabre 52–57 The diversity of coreference chains is usually tackled by means of global features (length, types and number of referring expressions, distance between them, etc.). In this paper, we propose a novel approach that provides a description of their composition in terms of sequences of expressions. To this end, we apply sequence analysis techniques to bring out the various strategies for introducing a referent and keeping it active throughout discourse. We discuss a first application of this method to a French written corpus annotated with coreference chains. We obtain clusters that are linguistically coherent and interpretable in terms of reference strategies and we demonstrate the influence of text genre and semantic type of the referent on chain composition. 2021.codi-main.5 @@ -137,7 +137,7 @@ Revisiting Shallow Discourse Parsing in the <fixed-case>PDTB</fixed-case>-3: Handling Intra-sentential Implicits ZhengZhao - BonnieWebber + BonnieWebber 107–121 In the PDTB-3, several thousand implicit discourse relations were newly annotated within individual sentences, adding to the over 15,000 implicit relations annotated across adjacent sentences in the PDTB-2. Given that the position of the arguments to these intra-sentential implicits is no longer as well-defined as with inter-sentential implicits, a discourse parser must identify both their location and their sense. That is the focus of the current work. The paper provides a comprehensive analysis of our results, showcasing model performance under different scenarios, pointing out limitations and noting future directions. 2021.codi-main.10 @@ -147,7 +147,7 @@ Improving Multi-Party Dialogue Discourse Parsing via Domain Integration ZhengyuanLiu - NancyChen + NancyChen 122–127 While multi-party conversations are often less structured than monologues and documents, they are implicitly organized by semantic level correlations across the interactive turns, and dialogue discourse analysis can be applied to predict the dependency structure and relations between the elementary discourse units, and provide feature-rich structural information for downstream tasks. However, the existing corpora with dialogue discourse annotation are collected from specific domains with limited sample sizes, rendering the performance of data-driven approaches poor on incoming dialogues without any domain adaptation. In this paper, we first introduce a Transformer-based parser, and assess its cross-domain performance. We next adopt three methods to gain domain integration from both data and language modeling perspectives to improve the generalization capability. Empirical results show that the neural parser can benefit from our proposed methods, and performs better on cross-domain dialogue samples. 2021.codi-main.11 @@ -192,7 +192,7 @@ <fixed-case>DMRST</fixed-case>: A Joint Framework for Document-Level Multilingual <fixed-case>RST</fixed-case> Discourse Segmentation and Parsing ZhengyuanLiu KeShi - NancyChen + NancyChen 154–164 Text discourse parsing weighs importantly in understanding information flow and argumentative structure in natural language, making it beneficial for downstream tasks. While previous work significantly improves the performance of RST discourse parsing, they are not readily applicable to practical use cases: (1) EDU segmentation is not integrated into most existing tree parsing frameworks, thus it is not straightforward to apply such models on newly-coming data. (2) Most parsers cannot be used in multilingual scenarios, because they are developed only in English. (3) Parsers trained from single-domain treebanks do not generalize well on out-of-domain inputs. In this work, we propose a document-level multilingual RST discourse parsing framework, which conducts EDU segmentation and discourse tree parsing jointly. Moreover, we propose a cross-translation augmentation strategy to enable the framework to support multilingual parsing and improve its domain generality. Experimental results show that our model achieves state-of-the-art performance on document-level multilingual RST parsing in all sub-tasks. 2021.codi-main.15 @@ -206,7 +206,7 @@ VassilinaNikoulina DongyeopKang DidierSchwab - LaurentBesacier + LaurentBesacier 165–170 This paper presents an interactive data dashboard that provides users with an overview of the preservation of discourse relations among 28 language pairs. We display a graph network depicting the cross-lingual discourse relations between a pair of languages for multilingual TED talks and provide a search function to look for sentences with specific keywords or relation types, facilitating ease of analysis on the cross-lingual discourse relations. 2021.codi-main.16 @@ -219,11 +219,11 @@ Proceedings of the CODI-CRAC 2021 Shared Task on Anaphora, Bridging, and Discourse Deixis in Dialogue SopanKhosla - RameshManuvinakurike + RameshManuvinakurike VincentNg - MassimoPoesio + MassimoPoesio MichaelStrube - CarolynRosé + CarolynRosé Association for Computational Linguistics
Punta Cana, Dominican Republic
November @@ -267,7 +267,7 @@ NataliaSkachkova SiyuTao SharmilaUpadhyaya - IvanaKruijff-Korbayova + IvanaKruijff-Korbayova 32–42 We describe the system developed by the DFKI-TalkingRobots Team for the CODI-CRAC 2021 Shared-Task on anaphora resolution in dialogue. Our system consists of three subsystems: (1) the Workspace Coreference System (WCS) incrementally clusters mentions using semantic similarity based on embeddings combined with lexical feature heuristics; (2) the Mention-to-Mention (M2M) coreference resolution system pairs same entity mentions; (3) the Discourse Deixis Resolution (DDR) system employs a Siamese Network to detect discourse anaphor-antecedent pairs. WCS achieved F1-score of 55.6% averaged across the evaluation test sets, M2M achieved 57.2% and DDR achieved 21.5%. 2021.codi-sharedtask.3 @@ -301,7 +301,7 @@ Adapted End-to-End Coreference Resolution System for Anaphoric Identities in Dialogues LiyanXu - Jinho D.Choi + Jinho D.Choi 55–62 We present an effective system adapted from the end-to-end neural coreference resolution model, targeting on the task of anaphora resolution in dialogues. Three aspects are specifically addressed in our approach, including the support of singletons, encoding speakers and turns throughout dialogue interactions, and knowledge transfer utilizing existing resources. Despite the simplicity of our adaptation strategies, they are shown to bring significant impact to the final performance, with up to 27 F1 improvement over the baseline. Our final system ranks the 1st place on the leaderboard of the anaphora resolution track in the CRAC 2021 shared task, and achieves the best evaluation results on all four datasets. 2021.codi-sharedtask.6 @@ -315,7 +315,7 @@ TatianaAnikina SiyuTao SharmilaUpadhyaya - IvanaKruijff-Korbayova + IvanaKruijff-Korbayova 63–70 We compare our team’s systems to others submitted for the CODI-CRAC 2021 Shared-Task on anaphora resolution in dialogue. We analyse the architectures and performance, report some problematic cases in gold annotations, and suggest possible improvements of the systems, their evaluation, data annotation, and the organization of the shared task. 2021.codi-sharedtask.7 diff --git a/data/xml/2021.computel.xml b/data/xml/2021.computel.xml index f22d0aa3a2..06b99d3deb 100644 --- a/data/xml/2021.computel.xml +++ b/data/xml/2021.computel.xml @@ -8,9 +8,9 @@ AtticusHarrigan MansHulden JordanLachler - SarahMoeller + SarahMoeller AlexisPalmer - MiikkaSilfverberg + MiikkaSilfverberg LaneSchwartz Association for Computational Linguistics
Online
@@ -44,7 +44,7 @@ <fixed-case>LARA</fixed-case> in the Service of Revivalistics and Documentary Linguistics: Community Engagement and Endangered Languages Ghil’AdZuckermann SigurðurVigfússon - MannyRayner + MannyRayner NeasaNí Chiaráin NedelinaIvanova HaniehHabibi @@ -89,7 +89,7 @@ JanetWiles AlexisMichaud SéverineGuillaume - LaurentBesacier + LaurentBesacier ChristopherCox KatyaAplonova GuillaumeJacques @@ -103,7 +103,7 @@ NilsHjortnaes NikoPartanen MichaelRießler - Francis M.Tyers + Francis M.Tyers 63–69 2021.computel-1.8 hjortnaes-etal-2021-relevance @@ -111,7 +111,7 @@ Shared Digital Resource Application within <fixed-case>I</fixed-case>nsular <fixed-case>S</fixed-case>candinavian HinrikHafsteinsson - Anton KarlIngason + Anton KarlIngason 70–79 2021.computel-1.9 hafsteinsson-ingason-2021-shared @@ -119,7 +119,7 @@ Towards an Open Source Finite-State Morphological Analyzer for Zacatlán-Ahuacatlán-Tepetzintla <fixed-case>N</fixed-case>ahuatl RobertPugh - FrancisTyers + FrancisTyers MarivelHuerta Mendez 80–85 2021.computel-1.10 @@ -135,9 +135,9 @@ Developing a Shared Task for Speech Processing on Endangered Languages - Gina-AnneLevow + Gina-AnneLevow EmilyAhn - Emily M.Bender + Emily M.Bender 96–106 2021.computel-1.12 levow-etal-2021-developing diff --git a/data/xml/2021.conll.xml b/data/xml/2021.conll.xml index f563e0076f..c2375bff75 100644 --- a/data/xml/2021.conll.xml +++ b/data/xml/2021.conll.xml @@ -20,7 +20,7 @@ KatharinaWeitz LindseyVanderlyn Ngoc ThangVu - ElisabethAndré + ElisabethAndré 1–16 Human-AI collaboration, a long standing goal in AI, refers to a partnership where a human and artificial intelligence work together towards a shared goal. Collaborative dialog allows human-AI teams to communicate and leverage strengths from both partners. To design collaborative dialog systems, it is important to understand what mental models users form about their AI-dialog partners, however, how users perceive these systems is not fully understood. In this study, we designed a novel, collaborative, communication-based puzzle game and explanatory dialog system. We created a public corpus from 117 conversations and post-surveys and used this to analyze what mental models users formed. Key takeaways include: Even when users were not engaged in the game, they perceived the AI-dialog partner as intelligent and likeable, implying they saw it as a partner separate from the game. This was further supported by users often overestimating the system’s abilities and projecting human-like attributes which led to miscommunications. We conclude that creating shared mental models between users and AI systems is important to achieving successful dialogs. We propose that our insights on mental models and miscommunication, the game, and our corpus provide useful tools for designing collaborative dialog systems. 2021.conll-1.1 @@ -81,7 +81,7 @@ EmanueleBugliarello Miryamde Lhoneux ChenQiu - AndersSøgaard + AndersSøgaard 58–71 Creole languages such as Nigerian Pidgin English and Haitian Creole are under-resourced and largely ignored in the NLP literature. Creoles typically result from the fusion of a foreign language with multiple local languages, and what grammatical and lexical features are transferred to the creole is a complex process. While creoles are generally stable, the prominence of some features may be much stronger with certain demographics or in some linguistic situations. This paper makes several contributions: We collect existing corpora and release models for Haitian Creole, Nigerian Pidgin English, and Singaporean Colloquial English. We evaluate these models on intrinsic and extrinsic tasks. Motivated by the above literature, we compare standard language models with distributionally robust ones and find that, somewhat surprisingly, the standard language models are superior to the distributionally robust ones. We investigate whether this is an effect of over-parameterization or relative distributional stability, and find that the difference persists in the absence of over-parameterization, and that drift is limited, confirming the relative stability of creole languages. 2021.conll-1.5 @@ -136,7 +136,7 @@ DanielHershcovich StellaFrank ElliePavlick - AndersSøgaard + AndersSøgaard 109–132 Pretrained language models have been shown to encode relational information, such as the relations between entities or concepts in knowledge-bases — (Paris, Capital, France). However, simple relations of this type can often be recovered heuristically and the extent to which models implicitly reflect topological structure that is grounded in world, such as perceptual structure, is unknown. To explore this question, we conduct a thorough case study on color. Namely, we employ a dataset of monolexemic color terms and color chips represented in CIELAB, a color space with a perceptually meaningful distance metric. Using two methods of evaluating the structural alignment of colors in this space with text-derived color term representations, we find significant correspondence. Analyzing the differences in alignment across the color spectrum, we find that warmer colors are, on average, better aligned to the perceptual color space than cooler ones, suggesting an intriguing connection to findings from recent work on efficient communication in color naming. Further analysis suggests that differences in alignment are, in part, mediated by collocationality and differences in syntactic usage, posing questions as to the relationship between color perception and usage and context. 2021.conll-1.9 @@ -158,7 +158,7 @@ Enriching Language Models with Visually-grounded Word Vectors and the <fixed-case>L</fixed-case>ancaster Sensorimotor Norms - CaseyKennington + CaseyKennington 148–157 Language models are trained only on text despite the fact that humans learn their first language in a highly interactive and multimodal environment where the first set of learned words are largely concrete, denoting physical entities and embodied states. To enrich language models with some of this missing experience, we leverage two sources of information: (1) the Lancaster Sensorimotor norms, which provide ratings (means and standard deviations) for over 40,000 English words along several dimensions of embodiment, and which capture the extent to which something is experienced across 11 different sensory modalities, and (2) vectors from coefficients of binary classifiers trained on images for the BERT vocabulary. We pre-trained the ELECTRA model and fine-tuned the RoBERTa model with these two sources of information then evaluate using the established GLUE benchmark and the Visual Dialog benchmark. We find that enriching language models with the Lancaster norms and image vectors improves results in both tasks, with some implications for robust language models that capture holistic linguistic meaning in a language learning context. 2021.conll-1.11 @@ -170,7 +170,7 @@ Learning Zero-Shot Multifaceted Visually Grounded Word Embeddings via Multi-Task Training HassanShahmohammadi Hendrik P. A.Lensch - R. HaraldBaayen + R. HaraldBaayen 158–170 Language grounding aims at linking the symbolic representation of language (e.g., words) into the rich perceptual knowledge of the outside world. The general approach is to embed both textual and visual information into a common space -the grounded space- confined by an explicit relationship. We argue that since concrete and abstract words are processed differently in the brain, such approaches sacrifice the abstract knowledge obtained from textual statistics in the process of acquiring perceptual information. The focus of this paper is to solve this issue by implicitly grounding the word embeddings. Rather than learning two mappings into a joint space, our approach integrates modalities by implicit alignment. This is achieved by learning a reversible mapping between the textual and the grounded space by means of multi-task training. Intrinsic and extrinsic evaluations show that our way of visual grounding is highly beneficial for both abstract and concrete words. Our embeddings are correlated with human judgments and outperform previous works using pretrained word embeddings on a wide range of benchmarks. Our grounded embeddings are publicly available here. 2021.conll-1.12 @@ -241,7 +241,7 @@ Agree to Disagree: Analysis of Inter-Annotator Disagreements in Human Evaluation of Machine Translation Output - MajaPopović + MajaPopović 234–243 This work describes an analysis of inter-annotator disagreements in human evaluation of machine translation output. The errors in the analysed texts were marked by multiple annotators under guidance of different quality criteria: adequacy, comprehension, and an unspecified generic mixture of adequacy and fluency. Our results show that different criteria result in different disagreements, and indicate that a clear definition of quality criterion can improve the inter-annotator agreement. Furthermore, our results show that for certain linguistic phenomena which are not limited to one or two words (such as word ambiguity or gender) but span over several words or even entire phrases (such as negation or relative clause), disagreements do not necessarily represent “errors” or “noise” but are rather inherent to the evaluation process. %These disagreements are caused by differences in error perception and/or the fact that there is no single correct translation of a text so that multiple solutions are possible. On the other hand, for some other phenomena (such as omission or verb forms) agreement can be easily improved by providing more precise and detailed instructions to the evaluators. 2021.conll-1.18 @@ -256,7 +256,7 @@ YovaKementchedjhieva LukasNielsen ChenQiu - AndersSøgaard + AndersSøgaard 244–257 Negation is one of the most fundamental concepts in human cognition and language, and several natural language inference (NLI) probes have been designed to investigate pretrained language models’ ability to detect and reason with negation. However, the existing probing datasets are limited to English only, and do not enable controlled probing of performance in the absence or presence of negation. In response, we present a multilingual (English, Bulgarian, German, French and Chinese) benchmark collection of NLI examples that are grammatical and correctly labeled, as a result of manual inspection and reformulation. We use the benchmark to probe the negation-awareness of multilingual language models and find that models that correctly predict examples with negation cues, often fail to correctly predict their counter-examples without negation cues, even when the cues are irrelevant for semantic inference. 2021.conll-1.19 @@ -357,9 +357,9 @@ Imposing Relation Structure in Language-Model Embeddings Using Contrastive Learning ChristosTheodoropoulos - JamesHenderson + JamesHenderson Andrei CatalinComan - Marie-FrancineMoens + Marie-FrancineMoens 337–348 Though language model text embeddings have revolutionized NLP research, their ability to capture high-level semantic information, such as relations between entities in text, is limited. In this paper, we propose a novel contrastive learning framework that trains sentence embeddings to encode the relations in a graph structure. Given a sentence (unstructured text) and its graph, we use contrastive learning to impose relation-related structure on the token level representations of the sentence obtained with a CharacterBERT (El Boukkouri et al., 2020) model. The resulting relation-aware sentence embeddings achieve state-of-the-art results on the relation extraction task using only a simple KNN classifier, thereby demonstrating the success of the proposed method. Additional visualization by a tSNE analysis shows the effectiveness of the learned representation space compared to baselines. Furthermore, we show that we can learn a different space for named entity recognition, again using a contrastive learning objective, and demonstrate how to successfully combine both representation spaces in an entity-relation task. 2021.conll-1.27 @@ -375,7 +375,7 @@ OmarAgha Soo-HwanLee ZhuoyeZhao - Samuel R.Bowman + Samuel R.Bowman TalLinzen 349–366 Understanding language requires grasping not only the overtly stated content, but also making inferences about things that were left unsaid. These inferences include presuppositions, a phenomenon by which a listener learns about new information through reasoning about what a speaker takes as given. Presuppositions require complex understanding of the lexical and syntactic properties that trigger them as well as the broader conversational context. In this work, we introduce the Naturally-Occurring Presuppositions in English (NOPE) Corpus to investigate the context-sensitivity of 10 different types of presupposition triggers and to evaluate machine learning models’ ability to predict human inferences. We find that most of the triggers we investigate exhibit moderate variability. We further find that transformer-based models draw correct inferences in simple cases involving presuppositions, but they fail to capture the minority of exceptional cases in which human judgments reveal complex interactions between context and triggers. @@ -471,7 +471,7 @@ Does referent predictability affect the choice of referential form? A computational approach using masked coreference resolution LauraAina XixianLiao - GemmaBoleda + GemmaBoleda MatthijsWestera 454–469 It is often posited that more predictable parts of a speaker’s meaning tend to be made less explicit, for instance using shorter, less informative words. Studying these dynamics in the domain of referring expressions has proven difficult, with existing studies, both psycholinguistic and corpus-based, providing contradictory results. We test the hypothesis that speakers produce less informative referring expressions (e.g., pronouns vs. full noun phrases) when the context is more informative about the referent, using novel computational estimates of referent predictability. We obtain these estimates training an existing coreference resolution system for English on a new task, masked coreference resolution, giving us a probability distribution over referents that is conditioned on the context but not the referring expression. The resulting system retains standard coreference resolution performance while yielding a better estimate of human-derived referent predictability than previous attempts. A statistical analysis of the relationship between model output and mention form supports the hypothesis that predictability affects the form of a mention, both its morphosyntactic type and its length. @@ -496,7 +496,7 @@ Commonsense Knowledge in Word Associations and <fixed-case>C</fixed-case>oncept<fixed-case>N</fixed-case>et ChunhuaLiu - TrevorCohn + TrevorCohn LeaFrermann 481–495 Humans use countless basic, shared facts about the world to efficiently navigate in their environment. This commonsense knowledge is rarely communicated explicitly, however, understanding how commonsense knowledge is represented in different paradigms is important for (a) a deeper understanding of human cognition and (b) augmenting automatic reasoning systems. This paper presents an in-depth comparison of two large-scale resources of general knowledge: ConceptNet, an engineered relational database, and SWOW, a knowledge graph derived from crowd-sourced word associations. We examine the structure, overlap and differences between the two graphs, as well as the extent of situational commonsense knowledge present in the two resources. We finally show empirically that both resources improve downstream task performance on commonsense reasoning benchmarks over text-only baselines, suggesting that large-scale word association data, which have been obtained for several languages through crowd-sourcing, can be a valuable complement to curated knowledge graphs. @@ -556,7 +556,7 @@ SiddiqueLatif InyoungKim IoanCalapodescu - LaurentBesacier + LaurentBesacier 544–551 While End-2-End Text-to-Speech (TTS) has made significant progresses over the past few years, these systems still lack intuitive user controls over prosody. For instance, generating speech with fine-grained prosody control (prosodic prominence, contextually appropriate emotions) is still an open challenge. In this paper, we investigate whether we can control prosody directly from the input text, in order to code information related to contrastive focus which emphasizes a specific word that is contrary to the presuppositions of the interlocutor. We build and share a specific dataset for this purpose and show that it allows to train a TTS system were this fine-grained prosodic feature can be correctly conveyed using control tokens. Our evaluation compares synthetic and natural utterances and shows that prosodic patterns of contrastive focus (variations of Fo, Intensity and Duration) can be learnt accurately. Such a milestone is important to allow, for example, smart speakers to be programmatically controlled in terms of output prosody. 2021.conll-1.42 @@ -569,7 +569,7 @@ HoyunSong Soo HyunRyu HuijeLee - JongPark + JongPark 552–561 As users in online communities suffer from severe side effects of abusive language, many researchers attempted to detect abusive texts from social media, presenting several datasets for such detection. However, none of them contain both comprehensive labels and contextual information, which are essential for thoroughly detecting all kinds of abusiveness from texts, since datasets with such fine-grained features demand a significant amount of annotations, leading to much increased complexity. In this paper, we propose a Comprehensive Abusiveness Detection Dataset (CADD), collected from the English Reddit posts, with multifaceted labels and contexts. Our dataset is annotated hierarchically for an efficient annotation through crowdsourcing on a large-scale. We also empirically explore the characteristics of our dataset and provide a detailed analysis for novel insights. The results of our experiments with strong pre-trained natural language understanding models on our dataset show that our dataset gives rise to meaningful performance, assuring its practicality for abusive language detection. 2021.conll-1.43 @@ -630,8 +630,8 @@ The Emergence of the Shape Bias Results from Communicative Efficiency EvaPortelance - Michael C.Frank - DanJurafsky + Michael C.Frank + DanJurafsky AlessandroSordoni RomainLaroche 607–623 @@ -657,7 +657,7 @@ Analysing Human Strategies of Information Transmission as a Function of Discourse Context MarioGiulianelli - RaquelFernández + RaquelFernández 647–660 Speakers are thought to use rational information transmission strategies for efficient communication (Genzel and Charniak, 2002; Aylett and Turk, 2004; Jaeger and Levy, 2007). Previous work analysing these strategies in sentence production has failed to take into account how the information content of sentences varies as a function of the available discourse context. In this study, we estimate sentence information content within discourse context. We find that speakers transmit information at a stable rate—i.e., rationally—in English newspaper articles but that this rate decreases in spoken open domain and written task-oriented dialogues. We also observe that speakers’ choices are not oriented towards local uniformity of information, which is another hypothesised rational strategy. We suggest that a more faithful model of communication should explicitly include production costs and goal-oriented rewards. 2021.conll-1.50 @@ -683,7 +683,7 @@ EmmaO’Neill JoeKenny AnthonyVentresque - JulieCarson-Berndsen + JulieCarson-Berndsen 674–683 A child who is unfamiliar with the correct spelling of a word often employs a “sound it out” approach: breaking the word down into its constituent sounds and then choosing letters to represent the identified sounds. This often results in a misspelling that is orthographically very different to the intended target. Recently, efforts have been made to develop phonetic based spellcheckers to tackle the more deviant nature of children’s misspellings. However, little work has been done to investigate the potential of spelling correction tools that incorporate regional pronunciation variation. If a child must first identify the sounds that make up a word, it stands to reason their pronunciation would influence this process. We investigate this hypothesis along with the feasibility and potential benefits of adapting spelling correction tools to more specific language variants - particularly Irish Accented English. We use misspelling data from schoolchildren across Ireland to adapt an existing English phonetic-based spellchecker and demonstrate improvements in performance. These results not only prompt consideration of language varieties in the development of spellcheckers but also contribute to existing literature on the role of regional accent in the acquisition of writing proficiency. 2021.conll-1.52 diff --git a/data/xml/2021.crac.xml b/data/xml/2021.crac.xml index 3ef40370a8..040f9b9524 100644 --- a/data/xml/2021.crac.xml +++ b/data/xml/2021.crac.xml @@ -4,8 +4,8 @@ Proceedings of the Fourth Workshop on Computational Models of Reference, Anaphora and Coreference MaciejOgrodniczuk - SameerPradhan - MassimoPoesio + SameerPradhan + MassimoPoesio YuliaGrishina VincentNg Association for Computational Linguistics @@ -50,7 +50,7 @@ JonginKim NayoungChoi MinSong - Jinho D.Choi + Jinho D.Choi 24–35 This paper presents a new corpus and annotation guideline for a novel coreference resolution task on fictional texts, and analyzes its unique characteristics. FantasyCoref contains 211 stories of Grimms’ Fairy Tales and 3 other fantasy literature annotated in the omniscient writer’s point of view (OWV) to handle distinctive aspects in this genre. This task is more challenging than general coreference resolution in two ways. First, documents in our corpus are 2.5 times longer than the ones in OntoNotes, raising a new layer of difficulty in resolving long-distant referents. Second, annotation of literary styles and concepts raise several issues which are not sufficiently addressed in the existing annotation guidelines. Hence, considerations on such issues and the concept of OWV are necessary to achieve high inter-annotator agreement (IAA) in coreference resolution of fictional texts. We carefully conduct annotation tasks in four stages to ensure the quality of our annotation. As a result, a high IAA score of 87% is achieved using the standard coreference evaluation metric. Finally, state-of-the-art coreference resolution approaches are evaluated on our corpus. After training with our annotated dataset, there was a 2.59% and 3.06% improvement over the model trained on the OntoNotes dataset. Also, we observe that the portion of errors specific to fictional texts declines after the training. 2021.crac-1.3 @@ -101,8 +101,8 @@ HieuLam MartinWu OphélieLacroix - BarbaraPlank - AndersSøgaard + BarbaraPlank + AndersSøgaard 63–69 Automatic coreference resolution is understudied in Danish even though most of the Danish Dependency Treebank (Buch-Kromann, 2003) is annotated with coreference relations. This paper describes a conversion of its partial, yet well-documented, coreference relations into coreference clusters and the training and evaluation of coreference models on this data. To the best of our knowledge, these are the first publicly available, neural coreference models for Danish. We also present a new entity linking annotation on the dataset using WikiData identifiers, a named entity disambiguation (NED) dataset, and a larger automatically created NED dataset enabling wikily supervised NED models. The entity linking annotation is benchmarked using a state-of-the-art neural entity disambiguation model. 2021.crac-1.7 @@ -189,7 +189,7 @@ XudongLin ManlingLi HengJi - Shih-FuChang + Shih-FuChang 132–140 Event coreference resolution is critical to understand events in the growing number of online news with multiple modalities including text, video, speech, etc. However, the events and entities depicting in different modalities may not be perfectly aligned and can be difficult to annotate, which makes the task especially challenging with little supervision available. To address the above issues, we propose a supervised model based on attention mechanism and an unsupervised model based on statistical machine translation, capable of learning the relative importance of modalities for event coreference resolution. Experiments on a video multimedia event dataset show that our multimodal models outperform text-only systems in event coreference resolution tasks. A careful analysis reveals that the performance gain of the multimodal model especially under unsupervised settings comes from better learning of visually salient events. 2021.crac-1.14 diff --git a/data/xml/2021.cstfrs.xml b/data/xml/2021.cstfrs.xml index aeb7b02733..16fc14a2ec 100644 --- a/data/xml/2021.cstfrs.xml +++ b/data/xml/2021.cstfrs.xml @@ -67,7 +67,7 @@ Analytical, Symbolic and First-Order Reasoning within Neural Architectures SamuelRyb - Martenvan Schijndel + Martenvan Schijndel 61–72 2021.cstfrs-1.7 ryb-van-schijndel-2021-analytical diff --git a/data/xml/2021.dash.xml b/data/xml/2021.dash.xml index ade9dfc9b1..463772a022 100644 --- a/data/xml/2021.dash.xml +++ b/data/xml/2021.dash.xml @@ -122,7 +122,7 @@ Data Cleaning Tools for Token Classification Tasks KarthikMuthuraman - FrederickReiss + FrederickReiss HongXu BryanCutler ZacharyEichenberger @@ -178,7 +178,7 @@ AjayPatel AkankshaMalhotra RenoKriz - MarthaPalmer + MarthaPalmer ChrisCallison-Burch 86–101 Acquiring training data for natural language processing systems can be expensive and time-consuming. Given a few training examples crafted by experts, large corpora can be mined for thousands of semantically similar examples that provide useful variability to improve model generalization. We present TopGuNN, a fast contextualized k-NN retrieval system that can efficiently index and search over contextual embeddings generated from large corpora. TopGuNN is demonstrated for a training data augmentation use case over the Gigaword corpus. Using approximate k-NN and an efficient architecture, TopGuNN performs queries over an embedding space of 4.63TB (approximately 1.5B embeddings) in less than a day. @@ -192,7 +192,7 @@ EshaSingh MartinMichalowski MariaGini - SergueiPakhomov + SergueiPakhomov 102–104 We present the Everyday Living Artificial Intelligence (AI) Hub, a novel proof-of-concept framework for enhancing human health and wellbeing via a combination of tailored wear-able and Conversational Agent (CA) solutions for non-invasive monitoring of physiological signals, assessment of behaviors through unobtrusive wearable devices, and the provision of personalized interventions to reduce stress and anxiety. We utilize recent advancements and industry standards in the Internet of Things (IoT)and AI technologies to develop this proof-of-concept framework. 2021.dash-1.15 diff --git a/data/xml/2021.deelio.xml b/data/xml/2021.deelio.xml index fb6324e0c7..36239b7b0e 100644 --- a/data/xml/2021.deelio.xml +++ b/data/xml/2021.deelio.xml @@ -3,7 +3,7 @@ Proceedings of Deep Learning Inside Out (DeeLIO): The 2nd Workshop on Knowledge Extraction and Integration for Deep Learning Architectures - EnekoAgirre + EnekoAgirre MariannaApidianaki IvanVulić Association for Computational Linguistics @@ -47,8 +47,8 @@ VidhishaBalachandran BhuwanDhingra HaitianSun - MichaelCollins - WilliamCohen + MichaelCollins + WilliamCohen 25–30 Existing work shows the benefits of integrating KBs with textual evidence for QA only on questions that are answerable by KBs alone (Sun et al., 2019). In contrast, real world QA systems often have to deal with questions that might not be directly answerable by KBs. Here, we investigate the effect of integrating background knowledge from KBs for the Natural Questions (NQ) task. We create a subset of the NQ data, Factual Questions (FQ), where the questions have evidence in the KB in the form of paths that link question entities to answer entities but still must be answered using text, to facilitate further research into KB integration methods. We propose and analyze a simple, model-agnostic approach for incorporating KB paths into text-based QA systems and establish a strong upper bound on FQ for our method using an oracle retriever. We show that several variants of Personalized PageRank based fact retrievers lead to a low recall of answer entities and consequently fail to improve QA performance. Our results suggest that fact retrieval is a bottleneck for integrating KBs into real world QA datasets 2021.deelio-1.3 @@ -102,7 +102,7 @@ Enhancing Multiple-Choice Question Answering with Causal Knowledge DhairyaDalal - MihaelArcan + MihaelArcan PaulBuitelaar 70–80 The task of causal question answering aims to reason about causes and effects over a provided real or hypothetical premise. Recent approaches have converged on using transformer-based language models to solve question answering tasks. However, pretrained language models often struggle when external knowledge is not present in the premise or when additional context is required to answer the question. To the best of our knowledge, no prior work has explored the efficacy of augmenting pretrained language models with external causal knowledge for multiple-choice causal question answering. In this paper, we present novel strategies for the representation of causal knowledge. Our empirical results demonstrate the efficacy of augmenting pretrained models with external causal knowledge. We show improved performance on the COPA (Choice of Plausible Alternatives) and WIQA (What If Reasoning Over Procedural Text) benchmark tasks. On the WIQA benchmark, our approach is competitive with the state-of-the-art and exceeds it within the evaluation subcategories of In-Paragraph and Out-of-Paragraph perturbations. @@ -127,7 +127,7 @@ SeojinBang WenXiao GiuseppeCarenini - RaymondNg + RaymondNg Young jiLee 96–107 Text classification has wide-ranging applications in various domains. While neural network approaches have drastically advanced performance in text classification, they tend to be powered by a large amount of training data, and interpretability is often an issue. As a step towards better accuracy and interpretability especially on small data, in this paper we present a new knowledge-infused attention mechanism, called KW-ATTN (KnoWledge-infused ATTentioN) to incorporate high-level concepts from external knowledge bases into Neural Network models. We show that KW-ATTN outperforms baseline models using only words as well as other approaches using concepts by classification accuracy, which indicates that high-level concepts help model prediction. Furthermore, crowdsourced human evaluation suggests that additional concept information helps interpretability of the model. @@ -174,7 +174,7 @@ Predicting Numerals in Natural Language Text Using a Language Model Considering the Quantitative Aspects of Numerals TakuSakamoto - AkikoAizawa + AkikoAizawa 140–150 Numerical common sense (NCS) is necessary to fully understand natural language text that includes numerals. NCS is knowledge about the numerical features of objects in text, such as size, weight, or color. Existing neural language models treat numerals in a text as string tokens in the same way as other words. Therefore, they cannot reflect the quantitative aspects of numerals in the training process, making it difficult to learn NCS. In this paper, we measure the NCS acquired by existing neural language models using a masked numeral prediction task as an evaluation task. In this task, we use two evaluation metrics to evaluate the language models in terms of the symbolic and quantitative aspects of the numerals, respectively. We also propose methods to reflect not only the symbolic aspect but also the quantitative aspect of numerals in the training of language models, using a loss function that depends on the magnitudes of the numerals and a regression model for the masked numeral prediction task. Finally, we quantitatively evaluate our proposed approaches on four datasets with different properties using the two metrics. Compared with methods that use existing language models, the proposed methods reduce numerical absolute errors, although exact match accuracy was reduced. This result confirms that the proposed methods, which use the magnitudes of the numerals for model training, are an effective way for models to capture NCS. 2021.deelio-1.14 diff --git a/data/xml/2021.depling.xml b/data/xml/2021.depling.xml index a2795a7307..9add553148 100644 --- a/data/xml/2021.depling.xml +++ b/data/xml/2021.depling.xml @@ -28,7 +28,7 @@ On auxiliary verb in <fixed-case>U</fixed-case>niversal <fixed-case>D</fixed-case>ependencies: untangling the issue and proposing a systematized annotation strategy - MagaliDuran + MagaliDuran AdrianaPagano AmandaRassi ThiagoPardo @@ -95,8 +95,8 @@ AnnaNedoluzhko MichalNovák MartinPopel - ZdeněkŽabokrtský - DanielZeman + ZdeněkŽabokrtský + DanielZeman 101–114 2021.depling-1.10 nedoluzhko-etal-2021-one diff --git a/data/xml/2021.dialdoc.xml b/data/xml/2021.dialdoc.xml index e7c5dd3354..3920dab6e4 100644 --- a/data/xml/2021.dialdoc.xml +++ b/data/xml/2021.dialdoc.xml @@ -86,7 +86,7 @@ <fixed-case>CA</fixed-case>i<fixed-case>RE</fixed-case> in <fixed-case>D</fixed-case>ial<fixed-case>D</fixed-case>oc21: Data Augmentation for Information Seeking Dialogue System YanXu EtsukoIshii - Genta IndraWinata + Genta IndraWinata ZhaojiangLin AndreaMadotto ZihanLiu @@ -103,7 +103,7 @@ JiapengLi MingdaLi LongxuanMa - Wei-NanZhang + Wei-NanZhang TingLiu 52–56 We participate in the DialDoc Shared Task sub-task 1 (Knowledge Identification). The task requires identifying the grounding knowledge in form of a document span for the next dialogue turn. We employ two well-known pre-trained language models (RoBERTa and ELECTRA) to identify candidate document spans and propose a metric-based ensemble method for span selection. Our methods include data augmentation, model pre-training/fine-tuning, post-processing, and ensemble. On the submission page, we rank 2nd based on the average of normalized F1 and EM scores used for the final evaluation. Specifically, we rank 2nd on EM and 3rd on F1. @@ -116,7 +116,7 @@ NicoDaheim DavidThulke ChristianDugast - HermannNey + HermannNey 57–62 This paper summarizes our entries to both subtasks of the first DialDoc shared task which focuses on the agent response prediction task in goal-oriented document-grounded dialogs. The task is split into two subtasks: predicting a span in a document that grounds an agent turn and generating an agent response based on a dialog and grounding document. In the first subtask, we restrict the set of valid spans to the ones defined in the dataset, use a biaffine classifier to model spans, and finally use an ensemble of different models. For the second sub-task, we use a cascaded model which grounds the response prediction on the predicted span instead of the full document. With these approaches, we obtain significant improvements in both subtasks compared to the baseline. 2021.dialdoc-1.8 @@ -192,7 +192,7 @@ YejuZhou KaixinMa JonathanFrancis - EricNyberg + EricNyberg AlessandroOltramari 109–112 In this paper, we describe our systems for solving the two Doc2Dial shared task: knowledge identification and response generation. We proposed several pre-processing and post-processing methods, and we experimented with data augmentation by pre-training the models on other relevant datasets. Our best model for knowledge identification outperformed the baseline by 10.5+ f1-score on the test-dev split, and our best model for response generation outperformed the baseline by 11+ Sacrebleu score on the test-dev split. @@ -215,7 +215,7 @@ Can <fixed-case>I</fixed-case> Be of Further Assistance? Using Unstructured Knowledge Access to Improve Task-oriented Conversational Modeling DiJin SeokhwanKim - DilekHakkani-Tur + DilekHakkani-Tur 119–127 Most prior work on task-oriented dialogue systems are restricted to limited coverage of domain APIs. However, users oftentimes have requests that are out of the scope of these APIs. This work focuses on responding to these beyond-API-coverage user turns by incorporating external, unstructured knowledge sources. Our approach works in a pipelined manner with knowledge-seeking turn detection, knowledge selection, and response generation in sequence. We introduce novel data augmentation methods for the first two steps and demonstrate that the use of information extracted from dialogue context improves the knowledge selection and end-to-end performances. Through experiments, we achieve state-of-the-art performance for both automatic and human evaluation metrics on the DSTC9 Track 1 benchmark dataset, validating the effectiveness of our contributions. 2021.dialdoc-1.16 diff --git a/data/xml/2021.discann.xml b/data/xml/2021.discann.xml index eddaa5afba..c52c06be1a 100644 --- a/data/xml/2021.discann.xml +++ b/data/xml/2021.discann.xml @@ -58,7 +58,7 @@ Kordula DeKuthy MadeeswaranKannan Haemanth SanthiPonnusamy - DetmarMeurers + DetmarMeurers 31–40 2021.discann-1.6 kuthy-etal-2021-advancing diff --git a/data/xml/2021.disrpt.xml b/data/xml/2021.disrpt.xml index c5de4197ac..e774a66957 100644 --- a/data/xml/2021.disrpt.xml +++ b/data/xml/2021.disrpt.xml @@ -4,8 +4,8 @@ Proceedings of the 2nd Shared Task on Discourse Relation Parsing and Treebanking (DISRPT 2021) AmirZeldes - Yang JanetLiu - MikelIruskieta + Yang JanetLiu + MikelIruskieta PhilippeMuller ChloéBraud SoniaBadene @@ -36,7 +36,7 @@ A Transformer Based Approach towards Identification of Discourse Unit Segments and Connectives SahilBakshi - DiptiSharma + DiptiSharma 13–21 Discourse parsing, which involves understanding the structure, information flow, and modeling the coherence of a given text, is an important task in natural language processing. It forms the basis of several natural language processing tasks such as question-answering, text summarization, and sentiment analysis. Discourse unit segmentation is one of the fundamental tasks in discourse parsing and refers to identifying the elementary units of text that combine to form a coherent text. In this paper, we present a transformer based approach towards the automated identification of discourse unit segments and connectives. Early approaches towards segmentation relied on rule-based systems using POS tags and other syntactic information to identify discourse segments. Recently, transformer based neural systems have shown promising results in this domain. Our system, SegFormers, employs this transformer based approach to perform multilingual discourse segmentation and connective identification across 16 datasets encompassing 11 languages and 3 different annotation frameworks. We evaluate the system based on F1 scores for both tasks, with the best system reporting the highest F1 score of 97.02% for the treebanked English RST-DT dataset. 2021.disrpt-1.2 diff --git a/data/xml/2021.dravidianlangtech.xml b/data/xml/2021.dravidianlangtech.xml index 7499fbbaf8..ea8dba1a5d 100644 --- a/data/xml/2021.dravidianlangtech.xml +++ b/data/xml/2021.dravidianlangtech.xml @@ -5,7 +5,7 @@ Proceedings of the First Workshop on Speech and Language Technologies for Dravidian Languages Bharathi RajaChakravarthi RubaPriyadharshini - AnandKumar M + AnandKumar M ParameswariKrishnamurthy ElizabethSherly Association for Computational Linguistics @@ -50,9 +50,9 @@ Sentiment Classification of Code-Mixed Tweets using Bi-Directional <fixed-case>RNN</fixed-case> and Language Tags - SainikMahata + SainikMahata DipankarDas - SivajiBandyopadhyay + SivajiBandyopadhyay 28–35 Sentiment analysis tools and models have been developed extensively throughout the years, for European languages. In contrast, similar tools for Indian Languages are scarce. This is because, state-of-the-art pre-processing tools like POS tagger, shallow parsers, etc., are not readily available for Indian languages. Although, such working tools for Indian languages, like Hindi and Bengali, that are spoken by the majority of the population, are available, finding the same for less spoken languages like, Tamil, Telugu, and Malayalam, is difficult. Moreover, due to the advent of social media, the multi-lingual population of India, who are comfortable with both English ad their regional language, prefer to communicate by mixing both languages. This gives rise to massive code-mixed content and automatically annotating them with their respective sentiment labels becomes a challenging task. In this work, we take up a similar challenge of developing a sentiment analysis model that can work with English-Tamil code-mixed data. The proposed work tries to solve this by using bi-directional LSTMs along with language tagging. Other traditional methods, based on classical machine learning algorithms have also been discussed in the literature, and they also act as the baseline systems to which we will compare our Neural Network based model. The performance of the developed algorithm, based on Neural Network architecture, garnered precision, recall, and F1 scores of 0.59, 0.66, and 0.58 respectively. 2021.dravidianlangtech-1.4 @@ -104,7 +104,7 @@ Task-Specific Pre-Training and Cross Lingual Transfer for Sentiment Analysis in <fixed-case>D</fixed-case>ravidian Code-Switched Languages AkshatGupta Sai KrishnaRallabandi - Alan WBlack + Alan WBlack 73–79 Sentiment analysis in Code-Mixed languages has garnered a lot of attention in recent years. It is an important task for social media monitoring and has many applications, as a large chunk of social media data is Code-Mixed. In this paper, we work on the problem of sentiment analysis for Dravidian Code-Switched languages - Tamil-Engish and Malayalam-English, using three different BERT based models. We leverage task-specific pre-training and cross-lingual transfer to improve on previously reported results, with significant improvement for the Tamil-Engish dataset. We also present a multilingual sentiment classification model that has competitive performance on both Tamil-English and Malayalam-English datasets. 2021.dravidianlangtech-1.9 @@ -166,7 +166,7 @@ RubaPriyadharshini ShubhankerBanerjee RichardSaldanha - John P.McCrae + John P.McCrae Anand KumarM ParameswariKrishnamurthy MelvinJohnson @@ -194,7 +194,7 @@ Prasanna KumarKumaresan RahulPonnusamy HariharanR L - John P.McCrae + John P.McCrae ElizabethSherly 133–145 Detecting offensive language in social media in local languages is critical for moderating user-generated content. Thus, the field of offensive language identification in under-resourced Tamil, Malayalam and Kannada languages are essential. As the user-generated content is more code-mixed and not well studied for under-resourced languages, it is imperative to create resources and conduct benchmarking studies to encourage research in under-resourced Dravidian languages. We created a shared task on offensive language detection in Dravidian languages. We summarize here the dataset for this challenge which are openly available at https://competitions.codalab.org/competitions/27654, and present an overview of the methods and the results of the competing systems. @@ -502,7 +502,7 @@ <fixed-case>JUNLP</fixed-case>@<fixed-case>D</fixed-case>ravidian<fixed-case>L</fixed-case>ang<fixed-case>T</fixed-case>ech-<fixed-case>EACL</fixed-case>2021: Offensive Language Identification in <fixed-case>D</fixed-case>ravidian Langauges AvishekGarain AtanuMandal - Sudip KumarNaskar + Sudip KumarNaskar 319–322 Offensive language identification has been an active area of research in natural language processing. With the emergence of multiple social media platforms offensive language identification has emerged as a need of the hour. Traditional offensive language identification models fail to deliver acceptable results as social media contents are largely in multilingual and are code-mixed in nature. This paper tries to resolve this problem by using IndicBERT and BERT architectures, to facilitate identification of offensive languages for Kannada-English, Malayalam-English, and Tamil-English code-mixed language pairs extracted from social media. The presented approach when evaluated on the test corpus provided precision, recall, and F1 score for language pair Kannada-English as 0.62, 0.71, and 0.66, respectively, for language pair Malayalam-English as 0.77, 0.43, and 0.53, respectively, and for Tamil-English as 0.71, 0.74, and 0.72, respectively. 2021.dravidianlangtech-1.46 diff --git a/data/xml/2021.eacl.xml b/data/xml/2021.eacl.xml index c48bdfa284..8f1dfe186c 100644 --- a/data/xml/2021.eacl.xml +++ b/data/xml/2021.eacl.xml @@ -4,7 +4,7 @@ Proceedings of the 16th Conference of the European Chapter of the Association for Computational Linguistics: Main Volume PaolaMerlo - JorgTiedemann + JorgTiedemann ReutTsarfaty Association for Computational Linguistics
Online
@@ -35,7 +35,7 @@ ChenyanXiong JianfengGao MengdiWang - BillDolan + BillDolan 12–30 Multi-document question generation focuses on generating a question that covers the common aspect of multiple documents. Such a model is useful in generating clarifying options. However, a naive model trained only using the targeted (‘positive’) document set may generate too generic questions that cover a larger scope than delineated by the document set. To address this challenge, we introduce the contrastive learning strategy where given ‘positive’ and ‘negative’ sets of documents, we generate a question that is closely related to the ‘positive’ set but is far away from the ‘negative’ set. This setting allows generated questions to be more specific and related to the target document set. To generate such specific questions, we propose Multi-Source Coordinated Question Generator (MSCQG), a novel framework that includes a supervised learning (SL) stage and a reinforcement learning (RL) stage. In the SL stage, a single-document question generator is trained. In the RL stage, a coordinator model is trained to find optimal attention weights to align multiple single-document generators, by optimizing a reward designed to promote specificity of generated questions. We also develop an effective auxiliary objective, named Set-induced Contrastive Regularization (SCR) that improves the coordinator’s contrastive learning during the RL stage. We show that our model significantly outperforms several strong baselines, as measured by automatic metrics and human evaluation. The source repository is publicly available at ‘www.github.com/woonsangcho/contrast_qgen’. 2021.eacl-main.2 @@ -59,8 +59,8 @@ AiliShen HiyoriYoshikawa TomoyaIwakura - DanielBeck - TimothyBaldwin + DanielBeck + TimothyBaldwin 42–48 Images are core components of multi-modal learning in natural language processing (NLP), and results have varied substantially as to whether images improve NLP tasks or not. One confounding effect has been that previous NLP research has generally focused on sophisticated tasks (in varying settings), generally applied to English only. We focus on text classification, in the context of assigning named entity classes to a given Wikipedia page, where images generally complement the text and the Wikipedia page can be in one of a number of different languages. Our experiments across a range of languages show that images complement NLP models (including BERT) trained without external pre-training, but when combined with BERT models pre-trained on large-scale external data, images contribute nothing. 2021.eacl-main.4 @@ -70,7 +70,7 @@ If you’ve got it, flaunt it: Making the most of fine-grained sentiment annotations JeremyBarnes - LiljaØvrelid + LiljaØvrelid ErikVelldal 49–62 Fine-grained sentiment analysis attempts to extract sentiment holders, targets and polar expressions and resolve the relationship between them, but progress has been hampered by the difficulty of annotation. Targeted sentiment analysis, on the other hand, is a more narrow task, focusing on extracting sentiment targets and classifying their polarity. In this paper, we explore whether incorporating holder and expression information can improve target extraction and classification and perform experiments on eight English datasets. We conclude that jointly predicting target and polarity BIO labels improves target extraction, and that augmenting the input text with gold expressions generally improves targeted polarity classification. This highlights the potential importance of annotating expressions for fine-grained sentiment datasets. At the same time, our results show that performance of current models for predicting polar expressions is poor, hampering the benefit of this information in practice. @@ -97,7 +97,7 @@ MeredithRiggs XinruYan HaogangBao - CarolynRose + CarolynRose 78–90 Modelling persuasion strategies as predictors of task outcome has several real-world applications and has received considerable attention from the computational linguistics community. However, previous research has failed to account for the resisting strategies employed by an individual to foil such persuasion attempts. Grounded in prior literature in cognitive and social psychology, we propose a generalised framework for identifying resisting strategies in persuasive conversations. We instantiate our framework on two distinct datasets comprising persuasion and negotiation conversations. We also leverage a hierarchical sequence-labelling neural architecture to infer the aforementioned resisting strategies automatically. Our experiments reveal the asymmetry of power roles in non-collaborative goal-directed conversations and the benefits accrued from incorporating resisting strategies on the final conversation outcome. We also investigate the role of different resisting strategies on the conversation outcome and glean insights that corroborate with past findings. We also make the code and the dataset of this work publicly available at https://github.com/americast/resper. 2021.eacl-main.7 @@ -185,7 +185,7 @@ Unification-based Reconstruction of Multi-hop Explanations for Science Questions MarcoValentino MokanaranganThayaparan - AndréFreitas + AndréFreitas 200–211 This paper presents a novel framework for reconstructing multi-hop explanations in science Question Answering (QA). While existing approaches for multi-hop reasoning build explanations considering each question in isolation, we propose a method to leverage explanatory patterns emerging in a corpus of scientific explanations. Specifically, the framework ranks a set of atomic facts by integrating lexical relevance with the notion of unification power, estimated analysing explanations for similar questions in the corpus. An extensive evaluation is performed on the Worldtree corpus, integrating k-NN clustering and Information Retrieval (IR) techniques. We present the following conclusions: (1) The proposed method achieves results competitive with Transformers, yet being orders of magnitude faster, a feature that makes it scalable to large explanatory corpora (2) The unification-based mechanism has a key role in reducing semantic drift, contributing to the reconstruction of many hops explanations (6 or more facts) and the ranking of complex inference facts (+12.0 Mean Average Precision) (3) Crucially, the constructed explanations can support downstream QA models, improving the accuracy of BERT by up to 10% overall. 2021.eacl-main.15 @@ -243,7 +243,7 @@ Exploiting Cloze-Questions for Few-Shot Text Classification and Natural Language Inference TimoSchick - HinrichSchütze + HinrichSchütze 255–269 Some NLP tasks can be solved in a fully unsupervised fashion by providing a pretrained language model with “task descriptions” in natural language (e.g., Radford et al., 2019). While this approach underperforms its supervised counterpart, we show in this work that the two ideas can be combined: We introduce Pattern-Exploiting Training (PET), a semi-supervised training procedure that reformulates input examples as cloze-style phrases to help language models understand a given task. These phrases are then used to assign soft labels to a large set of unlabeled examples. Finally, standard supervised training is performed on the resulting training set. For several tasks and languages, PET outperforms supervised training and strong semi-supervised approaches in low-resource settings by a large margin. 2021.eacl-main.20 @@ -267,7 +267,7 @@ <fixed-case>AREDSUM</fixed-case>: Adaptive Redundancy-Aware Iterative Sentence Ranking for Extractive Document Summarization KepingBi RahulJha - BruceCroft + BruceCroft AsliCelikyilmaz 281–291 Redundancy-aware extractive summarization systems score the redundancy of the sentences to be included in a summary either jointly with their salience information or separately as an additional sentence scoring step. Previous work shows the efficacy of jointly scoring and selecting sentences with neural sequence generation models. It is, however, not well-understood if the gain is due to better encoding techniques or better redundancy reduction approaches. Similarly, the contribution of salience versus diversity components on the created summary is not studied well. Building on the state-of-the-art encoding methods for summarization, we present two adaptive learning models: AREDSUM-SEQ that jointly considers salience and novelty during sentence selection; and a two-step AREDSUM-CTX that scores salience first, then learns to balance salience and redundancy, enabling the measurement of the impact of each aspect. Empirical results on CNN/DailyMail and NYT50 datasets show that by modeling diversity explicitly in a separate step, AREDSUM-CTX achieves significantly better performance than AREDSUM-SEQ as well as state-of-the-art extractive summarization baselines. @@ -281,7 +281,7 @@ AndreasWeise VeredSilber-Varod AnatLerner - JuliaHirschberg + JuliaHirschberg RivkaLevitan 292–299 It has been well-documented for several languages that human interlocutors tend to adapt their linguistic productions to become more similar to each other. This behavior, known as entrainment, affects lexical choice as well, both with regard to specific words, such as referring expressions, and overall style. We offer what we believe to be the first investigation of such lexical entrainment in Hebrew. Using two existing measures, we analyze Hebrew speakers interacting in a Map Task, a popular experimental setup, and find rich evidence of lexical entrainment. Analyzing speaker pairs by the combination of their genders as well as speakers by their individual gender, we find no clear pattern of differences. We do, however, find that speakers in a position of less power entrain more than those with greater power, which matches theoretical accounts. Overall, our results mostly accord with those for American English, with a lack of entrainment on hedge words being the main difference. @@ -353,7 +353,7 @@ A Systematic Review of Reproducibility Research in Natural Language Processing - AnyaBelz + AnyaBelz ShubhamAgarwal AnastasiaShimorina EhudReiter @@ -367,10 +367,10 @@ Bootstrapping Multilingual <fixed-case>AMR</fixed-case> with Contextual Word Alignments JanakiSheth Young-SukLee - RamónFernandez Astudillo + RamónFernandez Astudillo TahiraNaseem - RaduFlorian - SalimRoukos + RaduFlorian + SalimRoukos ToddWard 394–404 We develop high performance multilingual Abstract Meaning Representation (AMR) systems by projecting English AMR annotations to other languages with weak supervision. We achieve this goal by bootstrapping transformer-based multilingual word embeddings, in particular those from cross-lingual RoBERTa (XLM-R large). We develop a novel technique for foreign-text-to-English AMR alignment, using the contextual word alignment between English and foreign language tokens. This word alignment is weakly supervised and relies on the contextualized XLM-R word embeddings. We achieve a highly competitive performance that surpasses the best published results for German, Italian, Spanish and Chinese. @@ -445,7 +445,7 @@ <fixed-case>FEWS</fixed-case>: Large-Scale, Low-Shot Word Sense Disambiguation with the Dictionary TerraBlevins MandarJoshi - LukeZettlemoyer + LukeZettlemoyer 455–465 Current models for Word Sense Disambiguation (WSD) struggle to disambiguate rare senses, despite reaching human performance on global WSD metrics. This stems from a lack of data for both modeling and evaluating rare senses in existing WSD datasets. In this paper, we introduce FEWS (Few-shot Examples of Word Senses), a new low-shot WSD dataset automatically extracted from example sentences in Wiktionary. FEWS has high sense coverage across different natural language domains and provides: (1) a large training set that covers many more senses than previous datasets and (2) a comprehensive evaluation set containing few- and zero-shot examples of a wide variety of senses. We establish baselines on FEWS with knowledge-based and neural WSD approaches and present transfer learning experiments demonstrating that models additionally trained with FEWS better capture rare senses in existing WSD datasets. Finally, we find humans outperform the best baseline models on FEWS, indicating that FEWS will support significant future work on low-shot WSD. 2021.eacl-main.36 @@ -516,7 +516,7 @@ Does She Wink or Does She Nod? A Challenging Benchmark for Evaluating Word Understanding of Language Models Lutfi KeremSenel - HinrichSchütze + HinrichSchütze 532–538 Recent progress in pretraining language models on large corpora has resulted in significant performance gains on many NLP tasks. These large models acquire linguistic knowledge during pretraining, which helps to improve performance on downstream tasks via fine-tuning. To assess what kind of knowledge is acquired, language models are commonly probed by querying them with ‘fill in the blank’ style cloze questions. Existing probing datasets mainly focus on knowledge about relations between words and entities. We introduce WDLMPro (Word Definitions Language Model Probing) to evaluate word understanding directly using dictionary definitions of words. In our experiments, three popular pretrained language models struggle to match words and their definitions. This indicates that they understand many words poorly and that our new probing task is a difficult challenge that could help guide research on LMs in the future. 2021.eacl-main.42 @@ -611,7 +611,7 @@ AhmedEl-Kishky AdithyaRenduchintala VishravChaudhary - FranciscoGuzmán + FranciscoGuzmán LuciaSpecia 619–625 Quality estimation aims to measure the quality of translated content without access to a reference translation. This is crucial for machine translation systems in real-world scenarios where high-quality translation is needed. While many approaches exist for quality estimation, they are based on supervised machine learning requiring costly human labelled data. As an alternative, we propose a technique that does not rely on examples from human-annotators and instead uses synthetic training data. We train off-the-shelf architectures for supervised quality estimation on our synthetic data and show that the resulting models achieve comparable performance to models trained on human-annotated data, both for sentence and word-level prediction. @@ -633,8 +633,8 @@ <fixed-case>GRIT</fixed-case>: Generative Role-filler Transformers for Document-level Event Entity Extraction XinyaDu - AlexanderRush - ClaireCardie + AlexanderRush + ClaireCardie 634–644 We revisit the classic problem of document-level role-filler entity extraction (REE) for template filling. We argue that sentence-level approaches are ill-suited to the task and introduce a generative transformer-based encoder-decoder framework (GRIT) that is designed to model context at the document level: it can make extraction decisions across sentence boundaries; is implicitly aware of noun phrase coreference structure, and has the capacity to respect cross-role dependencies in the template structure. We evaluate our approach on the MUC-4 dataset, and show that our model performs substantially better than prior work. We also show that our modeling choices contribute to model performance, e.g., by implicitly capturing linguistic knowledge such as recognizing coreferent entity mentions. 2021.eacl-main.52 @@ -679,7 +679,7 @@ <fixed-case>F</fixed-case>ake<fixed-case>F</fixed-case>low: Fake News Detection by Modeling the Flow of Affective Information BilalGhanem - Simone PaoloPonzetto + Simone PaoloPonzetto PaoloRosso FranciscoRangel 679–689 @@ -692,7 +692,7 @@ <fixed-case>CTC</fixed-case>-based Compression for Direct Speech Translation MarcoGaido MauroCettolo - MatteoNegri + MatteoNegri MarcoTurchi 690–696 Previous studies demonstrated that a dynamic phone-informed compression of the input audio is beneficial for speech translation (ST). However, they required a dedicated model for phone recognition and did not test this solution for direct ST, in which a single model translates the input audio into the target language without intermediate representations. In this work, we propose the first method able to perform a dynamic compression of the input in direct ST models. In particular, we exploit the Connectionist Temporal Classification (CTC) to compress the input sequence according to its phonetic characteristics. Our experiments demonstrate that our solution brings a 1.3-1.5 BLEU improvement over a strong baseline on two language pairs (English-Italian and English-German), contextually reducing the memory footprint by more than 10%. @@ -731,7 +731,7 @@ Top-down Discourse Parsing via Sequence Labelling FajriKoto Jey HanLau - TimothyBaldwin + TimothyBaldwin 715–726 We introduce a top-down approach to discourse parsing that is conceptually simpler than its predecessors (Kobayashi et al., 2020; Zhang et al., 2020). By framing the task as a sequence labelling problem where the goal is to iteratively segment a document into individual discourse units, we are able to eliminate the decoder and reduce the search space for splitting points. We explore both traditional recurrent models and modern pre-trained transformer models for the task, and additionally introduce a novel dynamic oracle for top-down parsing. Based on the Full metric, our proposed LSTM model sets a new state-of-the-art for RST parsing. 2021.eacl-main.60 @@ -820,7 +820,7 @@ RahulAralikatte MatthewLamm DanielHardt - AndersSøgaard + AndersSøgaard 810–817 Most, if not all forms of ellipsis (e.g., so does Mary) are similar to reading comprehension questions (what does Mary do), in that in order to resolve them, we need to identify an appropriate text span in the preceding discourse. Following this observation, we present an alternative approach for English ellipsis resolution relying on architectures developed for question answering (QA). We present both single-task models, and joint models trained on auxiliary QA and coreference resolution datasets, clearly outperforming the current state of the art for Sluice Ellipsis (from 70.00 to 86.01 F1) and Verb Phrase Ellipsis (from 72.89 to 78.66 F1). 2021.eacl-main.68 @@ -887,7 +887,7 @@ Leveraging Passage Retrieval with Generative Models for Open Domain Question Answering GautierIzacard - EdouardGrave + EdouardGrave 874–880 Generative models for open domain question answering have proven to be competitive, without resorting to external knowledge. While promising, this approach requires to use models with billions of parameters, which are expensive to train and query. In this paper, we investigate how much these models can benefit from retrieving text passages, potentially containing evidence. We obtain state-of-the-art results on the Natural Questions and TriviaQA open benchmarks. Interestingly, we observe that the performance of this method significantly improves when increasing the number of retrieved passages. This is evidence that sequence-to-sequence models offers a flexible framework to efficiently aggregate and combine evidence from multiple passages. 2021.eacl-main.74 @@ -957,8 +957,8 @@ Multilingual Machine Translation: Closing the Gap between Shared and Language-specific Encoder-Decoders CarlosEscolano - Marta R.Costa-jussà - José A. R.Fonollosa + Marta R.Costa-jussà + José A. R.Fonollosa MikelArtetxe 944–948 State-of-the-art multilingual machine translation relies on a universal encoder-decoder, which requires retraining the entire system to add new languages. In this paper, we propose an alternative approach that is based on language-specific encoder-decoders, and can thus be more easily extended to new languages by learning their corresponding modules. So as to encourage a common interlingua representation, we simultaneously train the N initial languages. Our experiments show that the proposed approach outperforms the universal encoder-decoder by 3.28 BLEU points on average, while allowing to add new languages without the need to retrain the rest of the modules. All in all, our work closes the gap between shared and language-specific encoderdecoders, advancing toward modular multilingual machine translation systems that can be flexibly extended in lifelong learning settings. @@ -989,7 +989,7 @@ Hierarchical Multi-head Attentive Network for Evidence-aware Fake News Detection - NguyenVo + NguyenVo KyuminLee 965–975 The widespread of fake news and misinformation in various domains ranging from politics, economics to public health has posed an urgent need to automatically fact-check information. A recent trend in fake news detection is to utilize evidence from external sources. However, existing evidence-aware fake news detection methods focused on either only word-level attention or evidence-level attention, which may result in suboptimal performance. In this paper, we propose a Hierarchical Multi-head Attentive Network to fact-check textual claims. Our model jointly combines multi-head word-level attention and multi-head document-level attention, which aid explanation in both word-level and evidence-level. Experiments on two real-word datasets show that our model outperforms seven state-of-the-art baselines. Improvements over baselines are from 6% to 18%. Our source code and datasets are released at https://github.com/nguyenvo09/EACL2021. @@ -1001,7 +1001,7 @@ Identifying Named Entities as they are Typed RavneetArora Chen-TseTsai - DanielPreotiuc-Pietro + DanielPreotiuc-Pietro 976–988 Identifying named entities in written text is an essential component of the text processing pipeline used in applications such as text editors to gain a better understanding of the semantics of the text. However, the typical experimental setup for evaluating Named Entity Recognition (NER) systems is not directly applicable to systems that process text in real time as the text is being typed. Evaluation is performed on a sentence level assuming the end-user is willing to wait until the entire sentence is typed for entities to be identified and further linked to identifiers or co-referenced. We introduce a novel experimental setup for NER systems for applications where decisions about named entity boundaries need to be performed in an online fashion. We study how state-of-the-art methods perform under this setup in multiple languages and propose adaptations to these models to suit this new experimental setup. Experimental results show that the best systems that are evaluated on each token after its typed, reach performance within 1–5 F1 points of systems that are evaluated at the end of the sentence. These show that entity recognition can be performed in this setup and open up the development of other NLP tools in a similar setup. 2021.eacl-main.84 @@ -1059,7 +1059,7 @@ ThomasKober JulieWeeds LorenzoBertolini - DavidWeir + DavidWeir 1034–1048 The automatic detection of hypernymy relationships represents a challenging problem in NLP. The successful application of state-of-the-art supervised approaches using distributed representations has generally been impeded by the limited availability of high quality training data. We have developed two novel data augmentation techniques which generate new training examples from existing ones. First, we combine the linguistic principles of hypernym transitivity and intersective modifier-noun composition to generate additional pairs of vectors, such as “small dog - dog” or “small dog - animal”, for which a hypernymy relationship can be assumed. Second, we use generative adversarial networks (GANs) to generate pairs of vectors for which the hypernymy relation can also be assumed. We furthermore present two complementary strategies for extending an existing dataset by leveraging linguistic resources such as WordNet. Using an evaluation across 3 different datasets for hypernymy detection and 2 different vector spaces, we demonstrate that both of the proposed automatic data augmentation and dataset extension strategies substantially improve classifier performance. 2021.eacl-main.89 @@ -1099,7 +1099,7 @@ JiMa IvanKorotkov YinfeiYang - KeithHall + KeithHall RyanMcDonald 1075–1088 A major obstacle to the wide-spread adoption of neural retrieval models is that they require large supervised training sets to surpass traditional term-based techniques, which are constructed from raw corpora. In this paper, we propose an approach to zero-shot learning for passage retrieval that uses synthetic question generation to close this gap. The question generation system is trained on general domain data, but is applied to documents in the targeted domain. This allows us to create arbitrarily large, yet noisy, question-passage relevance pairs that are domain specific. Furthermore, when this is coupled with a simple hybrid term-neural model, first-stage retrieval performance can be improved further. Empirically, we show that this is an effective strategy for building neural passage retrieval models in the absence of large training corpora. Depending on the domain, this technique can even approach the accuracy of supervised models. @@ -1111,7 +1111,7 @@ Discourse-Aware Unsupervised Summarization for Long Scientific Documents YueDong AndreiMircea - Jackie Chi KitCheung + Jackie Chi KitCheung 1089–1102 We propose an unsupervised graph-based ranking model for extractive summarization of long scientific documents. Our method assumes a two-level hierarchical graph representation of the source document, and exploits asymmetrical positional cues to determine sentence importance. Results on the PubMed and arXiv datasets show that our approach outperforms strong unsupervised baselines by wide margins in automatic metrics and human evaluation. In addition, it achieves performance comparable to many state-of-the-art supervised approaches which are trained on hundreds of thousands of examples. These results suggest that patterns in the discourse structure are a strong signal for determining importance in scientific articles. 2021.eacl-main.93 @@ -1135,7 +1135,7 @@ KyunghyunCho MyleOtt BingLiu - JamesGlass + JamesGlass FuchunPeng 1121–1133 In this work, we study how the finetuning stage in the pretrain-finetune framework changes the behavior of a pretrained neural language generator. We focus on the transformer encoder-decoder model for the open-domain dialogue response generation task. Our major finding is that after standard finetuning, the model forgets some of the important language generation skills acquired during large-scale pretraining. We demonstrate the forgetting phenomenon through a set of detailed behavior analysis from the perspectives of knowledge transfer, context sensitivity, and function space projection. As a preliminary attempt to alleviate the forgetting problem, we propose an intuitive finetuning strategy named “mix-review”. We find that mix-review effectively regularizes the finetuning process, and the forgetting problem is alleviated to some extent. Finally, we discuss interesting behavior of the resulting dialogue model and its implications. @@ -1199,7 +1199,7 @@ Randomized Deep Structured Prediction for Discourse-Level Processing ManuelWidmoser - Maria LeonorPacheco + Maria LeonorPacheco JeanHonorio DanGoldwasser 1174–1184 @@ -1210,7 +1210,7 @@ Automatic Data Acquisition for Event Coreference Resolution - Prafulla KumarChoubey + Prafulla KumarChoubey RuihongHuang 1185–1196 We propose to leverage lexical paraphrases and high precision rules informed by news discourse structure to automatically collect coreferential and non-coreferential event pairs from unlabeled English news articles. We perform both manual validation and empirical evaluation on multiple evaluation datasets with different event domains and text genres to assess the quality of our acquired event pairs. We found that a model trained on our acquired event pairs performs comparably as the supervised model when applied to new data out of the training data domains. Further, augmenting human-annotated data with the acquired event pairs provides empirical performance gains on both in-domain and out-of-domain evaluation datasets. @@ -1262,7 +1262,7 @@ YaoWan JianguoZhang WentingZhao - PhilipYu + PhilipYu 1235–1244 The non-autoregressive models have boosted the efficiency of neural machine translation through parallelized decoding at the cost of effectiveness, when comparing with the autoregressive counterparts. In this paper, we claim that the syntactic and semantic structures among natural language are critical for non-autoregressive machine translation and can further improve the performance. However, these structures are rarely considered in the existing non-autoregressive models. Inspired by this intuition, we propose to incorporate the explicit syntactic and semantic structure of languages into a non-autoregressive Transformer, for the task of neural machine translation. Moreover, we also consider the intermediate latent alignment within target sentences to better learn the long-term token dependencies. Experimental results on two real-world datasets (i.e., WMT14 En-De and WMT16 En- Ro) show that our model achieves a significantly faster speed, as well as keeps the translation quality when compared with several state-of-the-art non-autoregressive models. 2021.eacl-main.105 @@ -1293,7 +1293,7 @@ Language Models for Lexical Inference in Context MartinSchmitt - HinrichSchütze + HinrichSchütze 1267–1280 Lexical inference in context (LIiC) is the task of recognizing textual entailment between two very similar sentences, i.e., sentences that only differ in one expression. It can therefore be seen as a variant of the natural language inference task that is focused on lexical semantics. We formulate and evaluate the first approaches based on pretrained language models (LMs) for this task: (i) a few-shot NLI classifier, (ii) a relation induction approach based on handcrafted patterns expressing the semantics of lexical inference, and (iii) a variant of (ii) with patterns that were automatically extracted from a corpus. All our approaches outperform the previous state of the art, showing the potential of pretrained LMs for LIiC. In an extensive analysis, we investigate factors of success and failure of our three approaches. 2021.eacl-main.108 @@ -1305,7 +1305,7 @@ ZhuangLi LizhenQu ShuoHuang - GholamrezaHaffari + GholamrezaHaffari 1281–1291 In this work, we investigate the problems of semantic parsing in a few-shot learning setting. In this setting, we are provided with k utterance-logical form pairs per new predicate. The state-of-the-art neural semantic parsers achieve less than 25% accuracy on benchmark datasets when k = 1. To tackle this problem, we proposed to i) apply a designated meta-learning method to train the model; ii) regularize attention scores with alignment statistics; iii) apply a smoothing technique in pretraining. As a result, our method consistently outperforms all the baselines in both one and two-shot settings. 2021.eacl-main.109 @@ -1344,7 +1344,7 @@ OzanCaglayan MenekseKuyu Mustafa SercanAmac - PranavaMadhyastha + PranavaMadhyastha ErkutErdem AykutErdem LuciaSpecia @@ -1360,7 +1360,7 @@ Memorization vs. Generalization : Quantifying Data Leakage in <fixed-case>NLP</fixed-case> Performance Evaluation AparnaElangovan JiayuanHe - KarinVerspoor + KarinVerspoor 1325–1335 Public datasets are often used to evaluate the efficacy and generalizability of state-of-the-art methods for many tasks in natural language processing (NLP). However, the presence of overlap between the train and test datasets can lead to inflated results, inadvertently evaluating the model’s ability to memorize and interpreting it as the ability to generalize. In addition, such data sets may not provide an effective indicator of the performance of these methods in real world scenarios. We identify leakage of training data into test data on several publicly available datasets used to evaluate NLP tasks, including named entity recognition and relation extraction, and study them to assess the impact of that leakage on the model’s ability to memorize versus generalize. 2021.eacl-main.113 @@ -1388,7 +1388,7 @@ VishravChaudhary ShuoSun HongyuGong - FranciscoGuzmán + FranciscoGuzmán 1351–1361 We present an approach based on multilingual sentence embeddings to automatically extract parallel sentences from the content of Wikipedia articles in 96 languages, including several dialects or low-resource languages. We do not limit the extraction process to alignments with English, but we systematically consider all possible language pairs. In total, we are able to extract 135M parallel sentences for 16720 different language pairs, out of which only 34M are aligned with English. This corpus is freely available. To get an indication on the quality of the extracted bitexts, we train neural MT baseline systems on the mined data only for 1886 languages pairs, and evaluate them on the TED corpus, achieving strong BLEU scores for many language pairs. The WikiMatrix bitexts seem to be particularly interesting to train MT systems between distant languages without the need to pivot through English. 2021.eacl-main.115 @@ -1401,8 +1401,8 @@ ChristianDruckenbrodt Saber AAkhondi JiayuanHe - TimothyBaldwin - KarinVerspoor + TimothyBaldwin + KarinVerspoor 1362–1375 Chemical patents contain rich coreference and bridging links, which are the target of this research. Specially, we introduce a novel annotation scheme, based on which we create the ChEMU-Ref dataset from reaction description snippets in English-language chemical patents. We propose a neural approach to anaphora resolution, which we show to achieve strong results, especially when jointly trained over coreference and bridging links. 2021.eacl-main.116 @@ -1449,8 +1449,8 @@ Evaluating language models for the retrieval and categorization of lexical collocations - LuisEspinosa Anke - JoanCodina-Filba + LuisEspinosa Anke + JoanCodina-Filba LeoWanner 1406–1417 Lexical collocations are idiosyncratic combinations of two syntactically bound lexical items (e.g., “heavy rain” or “take a step”). Understanding their degree of compositionality and idiosyncrasy, as well their underlying semantics, is crucial for language learners, lexicographers and downstream NLP applications. In this paper, we perform an exhaustive analysis of current language models for collocation understanding. We first construct a dataset of apparitions of lexical collocations in context, categorized into 17 representative semantic categories. Then, we perform two experiments: (1) unsupervised collocate retrieval using BERT, and (2) supervised collocation classification in context. We find that most models perform well in distinguishing light verb constructions, especially if the collocation’s first argument acts as subject, but often fail to distinguish, first, different syntactic structures within the same semantic category, and second, fine-grained semantic categories which restrict the use of small sets of valid collocates for a given base. @@ -1495,7 +1495,7 @@ Civil Rephrases Of Toxic Texts With Self-Supervised Transformers LéoLaugier JohnPavlopoulos - JeffreySorensen + JeffreySorensen LucasDixon 1442–1461 Platforms that support online commentary, from social networks to news sites, are increasingly leveraging machine learning to assist their moderation efforts. But this process does not typically provide feedback to the author that would help them contribute according to the community guidelines. This is prohibitively time-consuming for human moderators to do, and computational approaches are still nascent. This work focuses on models that can help suggest rephrasings of toxic comments in a more civil manner. Inspired by recent progress in unpaired sequence-to-sequence tasks, a self-supervised learning model is introduced, called CAE-T5. CAE-T5 employs a pre-trained text-to-text transformer, which is fine tuned with a denoising and cyclic auto-encoder loss. Experimenting with the largest toxicity detection dataset to date (Civil Comments) our model generates sentences that are more fluent and better at preserving the initial content compared to earlier text style transfer systems which we compare with using several scoring systems and human evaluation. @@ -1512,7 +1512,7 @@ HidetakaKamigaito KotaroFunakoshi HiroyaTakamura - ManabuOkumura + ManabuOkumura 1462–1473 The task of generating weather-forecast comments from meteorological simulations has the following requirements: (i) the changes in numerical values for various physical quantities need to be considered, (ii) the weather comments should be dependent on delivery time and area information, and (iii) the comments should provide useful information for users. To meet these requirements, we propose a data-to-text model that incorporates three types of encoders for numerical forecast maps, observation data, and meta-data. We also introduce weather labels representing weather information, such as sunny and rain, for our model to explicitly describe useful information. We conducted automatic and human evaluations. The results indicate that our model performed best against baselines in terms of informativeness. We make our code and data publicly available. 2021.eacl-main.125 @@ -1522,7 +1522,7 @@ <fixed-case>SICK</fixed-case>-<fixed-case>NL</fixed-case>: A Dataset for <fixed-case>D</fixed-case>utch Natural Language Inference GijsWijnholds - MichaelMoortgat + MichaelMoortgat 1474–1479 We present SICK-NL (read: signal), a dataset targeting Natural Language Inference in Dutch. SICK-NL is obtained by translating the SICK dataset of (Marelli et al., 2014) from English into Dutch. Having a parallel inference dataset allows us to compare both monolingual and multilingual NLP models for English and Dutch on the two tasks. In the paper, we motivate and detail the translation process, perform a baseline evaluation on both the original SICK dataset and its Dutch incarnation SICK-NL, taking inspiration from Dutch skipgram embeddings and contextualised embedding models. In addition, we encapsulate two phenomena encountered in the translation to formulate stress tests and verify how well the Dutch models capture syntactic restructurings that do not affect semantics. Our main finding is all models perform worse on SICK-NL than on SICK, indicating that the Dutch dataset is more challenging than the English original. Results on the stress tests show that models don’t fully capture word order freedom in Dutch, warranting future systematic studies. 2021.eacl-main.126 @@ -1536,8 +1536,8 @@ YevgenMatusevych HermanKamper ThomasSchatz - NaomiFeldman - SharonGoldwater + NaomiFeldman + SharonGoldwater 1480–1490 Non-native speakers show difficulties with spoken word processing. Many studies attribute these difficulties to imprecise phonological encoding of words in the lexical memory. We test an alternative hypothesis: that some of these difficulties can arise from the non-native speakers’ phonetic perception. We train a computational model of phonetic learning, which has no access to phonology, on either one or two languages. We first show that the model exhibits predictable behaviors on phone-level and word-level discrimination tasks. We then test the model on a spoken word processing task, showing that phonology may not be necessary to explain some of the word processing effects observed in non-native speakers. We run an additional analysis of the model’s lexical representation space, showing that the two training languages are not fully separated in that space, similarly to the languages of a bilingual human speaker. 2021.eacl-main.127 @@ -1653,7 +1653,7 @@ Benchmarking Machine Reading Comprehension: A Psychological Perspective SakuSugawara PontusStenetorp - AkikoAizawa + AkikoAizawa 1592–1612 Machine reading comprehension (MRC) has received considerable attention as a benchmark for natural language understanding. However, the conventional task design of MRC lacks explainability beyond the model interpretation, i.e., reading comprehension by a model cannot be explained in human terms. To this end, this position paper provides a theoretical basis for the design of MRC datasets based on psychology as well as psychometrics, and summarizes it in terms of the prerequisites for benchmarking MRC. We conclude that future datasets should (i) evaluate the capability of the model for constructing a coherent and grounded representation to understand context-dependent situations and (ii) ensure substantive validity by shortcut-proof questions and explanation as a part of the task design. 2021.eacl-main.137 @@ -1691,7 +1691,7 @@ ArtemRevenko KiamehrRezaee Mohammad TaherPilehvar - JoseCamacho-Collados + JoseCamacho-Collados 1635–1645 We present WiC-TSV, a new multi-domain evaluation benchmark for Word Sense Disambiguation. More specifically, we introduce a framework for Target Sense Verification of Words in Context which grounds its uniqueness in the formulation as binary classification task thus being independent of external sense inventories, and the coverage of various domains. This makes the dataset highly flexible for the evaluation of a diverse set of models and systems in and across domains. WiC-TSV provides three different evaluation settings, depending on the input signals provided to the model. We set baseline performance on the dataset using state-of-the-art language models. Experimental results show that even though these models can perform decently on the task, there remains a gap between machine and human performance, especially in out-of-domain settings. WiC-TSV data is available at https://competitions.codalab.org/competitions/23683. 2021.eacl-main.140 @@ -1796,7 +1796,7 @@ SanchitAgarwal Chien-WeiLin TagyoungChung - DilekHakkani-Tur + DilekHakkani-Tur 1730–1739 Dialogue State Tracking (DST) forms a core component of automated chatbot based systems designed for specific goals like hotel, taxi reservation, tourist information etc. With the increasing need to deploy such systems in new domains, solving the problem of zero/few-shot DST has become necessary. There has been a rising trend for learning to transfer knowledge from resource-rich domains to unknown domains with minimal need for additional data. In this work, we explore the merits of meta-learning algorithms for this transfer and hence, propose a meta-learner D-REPTILE specific to the DST problem. With extensive experimentation, we provide clear evidence of benefits over conventional approaches across different domains, methods, base models and datasets with significant (5-25%) improvement over the baseline in a low-data setting. Our proposed meta-learner is agnostic of the underlying model and hence any existing state-of-the-art DST system can improve its performance on unknown domains using our training strategy. 2021.eacl-main.148 @@ -1842,7 +1842,7 @@ What Sounds “Right” to Me? Experiential Factors in the Perception of Political Ideology QinlanShen - CarolynRose + CarolynRose 1762–1771 In this paper, we challenge the assumption that political ideology is inherently built into text by presenting an investigation into the impact of experiential factors on annotator perceptions of political ideology. We construct an annotated corpus of U.S. political discussion, where in addition to ideology labels for texts, annotators provide information about their political affiliation, exposure to political news, and familiarity with the source domain of discussion, Reddit. We investigate the variability in ideology judgments across annotators, finding evidence that these experiential factors may influence the consistency of how political ideologies are perceived. Finally, we present evidence that understanding how humans perceive and interpret ideology from texts remains a challenging task for state-of-the-art language models, pointing towards potential issues when modeling user experiences that may require more contextual knowledge. 2021.eacl-main.152 @@ -1863,7 +1863,7 @@ Globalizing <fixed-case>BERT</fixed-case>-based Transformer Architectures for Long Document Summarization QuentinGrail JulienPerez - EricGaussier + EricGaussier 1792–1810 Fine-tuning a large language model on downstream tasks has become a commonly adopted process in the Natural Language Processing (NLP) (CITATION). However, such a process, when associated with the current transformer-based (CITATION) architectures, shows several limitations when the target task requires to reason with long documents. In this work, we introduce a novel hierarchical propagation layer that spreads information between multiple transformer windows. We adopt a hierarchical approach where the input is divided in multiple blocks independently processed by the scaled dot-attentions and combined between the successive layers. We validate the effectiveness of our approach on three extractive summarization corpora of long scientific papers and news articles. We compare our approach to standard and pre-trained language-model-based summarizers and report state-of-the-art results for long document summarization and comparable results for smaller document summarization. 2021.eacl-main.154 @@ -1885,7 +1885,7 @@ We Need To Talk About Random Splits - AndersSøgaard + AndersSøgaard SebastianEbert JasmijnBastings KatjaFilippova @@ -1956,7 +1956,7 @@ Error Analysis and the Role of Morphology MarcelBollmann - AndersSøgaard + AndersSøgaard 1887–1900 We evaluate two common conjectures in error analysis of NLP models: (i) Morphology is predictive of errors; and (ii) the importance of morphology increases with the morphological complexity of a language. We show across four different tasks and up to 57 languages that of these conjectures, somewhat surprisingly, only (i) is true. Using morphological features does improve error prediction across tasks; however, this effect is less pronounced with morphologically complex languages. We speculate this is because morphology is more discriminative in morphologically simple languages. Across all four tasks, case and gender are the morphological features most predictive of error. 2021.eacl-main.162 @@ -2054,7 +2054,7 @@ Attention-based Relational Graph Convolutional Network for Target-Oriented Opinion Words Extraction JunfengJiang AnWang - AkikoAizawa + AkikoAizawa 1986–1997 Target-oriented opinion words extraction (TOWE) is a subtask of aspect-based sentiment analysis (ABSA). It aims to extract the corresponding opinion words for a given opinion target in a review sentence. Intuitively, the relation between an opinion target and an opinion word mostly relies on syntactics. In this study, we design a directed syntactic dependency graph based on a dependency tree to establish a path from the target to candidate opinions. Subsequently, we propose a novel attention-based relational graph convolutional neural network (ARGCN) to exploit syntactic information over dependency graphs. Moreover, to explicitly extract the corresponding opinion words toward the given opinion target, we effectively encode target information in our model with the target-aware representation. Empirical results demonstrate that our model significantly outperforms all of the existing models on four benchmark datasets. Extensive analysis also demonstrates the effectiveness of each component of our models. Our code is available at https://github.com/wcwowwwww/towe-eacl. 2021.eacl-main.170 @@ -2075,7 +2075,7 @@ Learning Relatedness between Types with Prototypes for Relation Extraction LishengFu - RalphGrishman + RalphGrishman 2011–2016 Relation schemas are often pre-defined for each relation dataset. Relation types can be related from different datasets and have overlapping semantics. We hypothesize we can combine these datasets according to the semantic relatedness between the relation types to overcome the problem of lack of training data. It is often easy to discover the connection between relation types based on relation names or annotation guides, but hard to measure the exact similarity and take advantage of the connection between the relation types from different datasets. We propose to use prototypical examples to represent each relation type and use these examples to augment related types from a different dataset. We obtain further improvement (ACE05) with this type augmentation over a strong baseline which uses multi-task learning between datasets to obtain better feature representation for relations. We make our implementation publicly available: https://github.com/fufrank5/relatedness 2021.eacl-main.172 @@ -2145,7 +2145,7 @@ The Interplay of Task Success and Dialogue Quality: An in-depth Evaluation in Task-Oriented Visual Dialogues AlbertoTestoni - RaffaellaBernardi + RaffaellaBernardi 2071–2082 When training a model on referential dialogue guessing games, the best model is usually chosen based on its task success. We show that in the popular end-to-end approach, this choice prevents the model from learning to generate linguistically richer dialogues, since the acquisition of language proficiency takes longer than learning the guessing task. By comparing models playing different games (GuessWhat, GuessWhich, and Mutual Friends), we show that this discrepancy is model- and task-agnostic. We investigate whether and when better language quality could lead to higher task success. We show that in GuessWhat, models could increase their accuracy if they learn to ground, encode, and decode also words that do not occur frequently in the training set. 2021.eacl-main.178 @@ -2214,7 +2214,7 @@ A Unified Feature Representation for Lexical Connotations EmilyAllaway - KathleenMcKeown + KathleenMcKeown 2145–2163 Ideological attitudes and stance are often expressed through subtle meanings of words and phrases. Understanding these connotations is critical to recognizing the cultural and emotional perspectives of the speaker. In this paper, we use distant labeling to create a new lexical resource representing connotation aspects for nouns and adjectives. Our analysis shows that it aligns well with human judgments. Additionally, we present a method for creating lexical representations that capture connotations within the embedding space and show that using the embeddings provides a statistically significant improvement on the task of stance detection when data is limited. 2021.eacl-main.184 @@ -2226,7 +2226,7 @@ RamitSawhney ArnavWadhwa ShivamAgarwal - Rajiv RatnShah + Rajiv RatnShah 2164–2175 Designing profitable trading strategies is complex as stock movements are highly stochastic; the market is influenced by large volumes of noisy data across diverse information sources like news and social media. Prior work mostly treats stock movement prediction as a regression or classification task and is not directly optimized towards profit-making. Further, they do not model the fine-grain temporal irregularities in the release of vast volumes of text that the market responds to quickly. Building on these limitations, we propose a novel hierarchical, learning to rank approach that uses textual data to make time-aware predictions for ranking stocks based on expected profit. Our approach outperforms state-of-the-art methods by over 8% in terms of cumulative profit and risk-adjusted returns in trading simulations on two benchmarks: English tweets and Chinese financial news spanning two major stock indexes and four global markets. Through ablative and qualitative analyses, we build the case for our method as a tool for daily stock trading. 2021.eacl-main.185 @@ -2272,8 +2272,8 @@ First Align, then Predict: Understanding the Cross-Lingual Ability of Multilingual <fixed-case>BERT</fixed-case> BenjaminMuller YanaiElazar - BenoîtSagot - DjaméSeddah + BenoîtSagot + DjaméSeddah 2214–2231 Multilingual pretrained language models have demonstrated remarkable zero-shot cross-lingual transfer capabilities. Such transfer emerges by fine-tuning on a task of interest in one language and evaluating on a distinct language, not seen during the fine-tuning. Despite promising results, we still lack a proper understanding of the source of this transfer. Using a novel layer ablation technique and analyses of the model’s internal representations, we show that multilingual BERT, a popular multilingual language model, can be viewed as the stacking of two sub-networks: a multilingual encoder followed by a task-specific language-agnostic predictor. While the encoder is crucial for cross-lingual transfer and remains mostly unchanged during fine-tuning, the task predictor has little importance on the transfer and can be reinitialized during fine-tuning. We present extensive experiments with three distinct tasks, seventeen typologically diverse languages and multiple domains to support our hypothesis. 2021.eacl-main.189 @@ -2338,7 +2338,7 @@ Content-based Models of Quotation AnselMacLaughlin - DavidSmith + DavidSmith 2296–2314 We explore the task of quotability identification, in which, given a document, we aim to identify which of its passages are the most quotable, i.e. the most likely to be directly quoted by later derived documents. We approach quotability identification as a passage ranking problem and evaluate how well both feature-based and BERT-based (Devlin et al., 2019) models rank the passages in a given document by their predicted quotability. We explore this problem through evaluations on five datasets that span multiple languages (English, Latin) and genres of literature (e.g. poetry, plays, novels) and whose corresponding derived documents are of multiple types (news, journal articles). Our experiments confirm the relatively strong performance of BERT-based models on this task, with the best model, a RoBERTA sequential sentence tagger, achieving an average rho of 0.35 and NDCG@1, 5, 50 of 0.26, 0.31 and 0.40, respectively, across all five datasets. 2021.eacl-main.195 @@ -2374,8 +2374,8 @@ Event-Driven News Stream Clustering using Entity-Aware Contextual Embeddings Kailash KarthikSaravanakumar MiguelBallesteros - Muthu KumarChandrasekaran - KathleenMcKeown + Muthu KumarChandrasekaran + KathleenMcKeown 2330–2340 We propose a method for online news stream clustering that is a variant of the non-parametric streaming K-means algorithm. Our model uses a combination of sparse and dense document representations, aggregates document-cluster similarity along these multiple representations and makes the clustering decision using a neural classifier. The weighted document-cluster similarity model is learned using a novel adaptation of the triplet loss into a linear classification objective. We show that the use of a suitable fine-tuning objective and external knowledge in pre-trained transformer models yields significant improvements in the effectiveness of contextual embeddings for clustering. Our model achieves a new state-of-the-art on a standard stream clustering dataset of English documents. 2021.eacl-main.198 @@ -2397,7 +2397,7 @@ Lexical Normalization for Code-switched Data and its Effect on <fixed-case>POS</fixed-case> Tagging Robvan der Goot - ÖzlemÇetinoğlu + ÖzlemÇetinoğlu 2352–2365 Lexical normalization, the translation of non-canonical data to standard language, has shown to improve the performance of many natural language processing tasks on social media. Yet, using multiple languages in one utterance, also called code-switching (CS), is frequently overlooked by these normalization systems, despite its common use in social media. In this paper, we propose three normalization models specifically designed to handle code-switched data which we evaluate for two language pairs: Indonesian-English and Turkish-German. For the latter, we introduce novel normalization layers and their corresponding language ID and POS tags for the dataset, and evaluate the downstream effect of normalization on POS tagging. Results show that our CS-tailored normalization models significantly outperform monolingual ones, and lead to 5.4% relative performance increase for POS tagging as compared to unnormalized input. 2021.eacl-main.200 @@ -2407,7 +2407,7 @@ Structural Encoding and Pre-training Matter: Adapting <fixed-case>BERT</fixed-case> for Table-Based Fact Verification RuiDong - DavidSmith + DavidSmith 2366–2375 Growing concern with online misinformation has encouraged NLP research on fact verification. Since writers often base their assertions on structured data, we focus here on verifying textual statements given evidence in tables. Starting from the Table Parsing (TAPAS) model developed for question answering (Herzig et al., 2020), we find that modeling table structure improves a language model pre-trained on unstructured text. Pre-training language models on English Wikipedia table data further improves performance. Pre-training on a question answering task with column-level cell rank information achieves the best performance. With improved pre-training and cell embeddings, this approach outperforms the state-of-the-art Numerically-aware Graph Neural Network table fact verification model (GNN-TabFact), increasing statement classification accuracy from 72.2% to 73.9% even without modeling numerical information. Incorporating numerical information with cell rankings and pre-training on a question-answering task increases accuracy to 76%. We further analyze accuracy on statements implicating single rows or multiple rows and columns of tables, on different numerical reasoning subtasks, and on generalizing to detecting errors in statements derived from the ToTTo table-to-text generation dataset. 2021.eacl-main.201 @@ -2416,9 +2416,9 @@ A Study of Automatic Metrics for the Evaluation of Natural Language Explanations - Miruna-AdrianaClinciu + Miruna-AdrianaClinciu ArashEshghi - HelenHastie + HelenHastie 2376–2387 As transparency becomes key for robotics and AI, it will be necessary to evaluate the methods through which transparency is provided, including automatically generated natural language (NL) explanations. Here, we explore parallels between the generation of such explanations and the much-studied field of evaluation of Natural Language Generation (NLG). Specifically, we investigate which of the NLG evaluation measures map well to explanations. We present the ExBAN corpus: a crowd-sourced corpus of NL explanations for Bayesian Networks. We run correlations comparing human subjective ratings with NLG automatic measures. We find that embedding-based automatic NLG evaluation methods, such as BERTScore and BLEURT, have a higher correlation with human ratings, compared to word-overlap metrics, such as BLEU and ROUGE. This work has implications for Explainable AI and transparent robotic and autonomous systems. 2021.eacl-main.202 @@ -2429,7 +2429,7 @@ Adversarial Stylometry in the Wild: <fixed-case>T</fixed-case>ransferable Lexical Substitution Attacks on Author Profiling ChrisEmmery ÁkosKádár - GrzegorzChrupała + GrzegorzChrupała 2388–2402 Written language contains stylistic cues that can be exploited to automatically infer a variety of potentially sensitive author information. Adversarial stylometry intends to attack such models by rewriting an author’s text. Our research proposes several components to facilitate deployment of these adversarial attacks in the wild, where neither data nor target models are accessible. We introduce a transformer-based extension of a lexical replacement attack, and show it achieves high transferability when trained on a weakly labeled corpus—decreasing target model performance below chance. While not completely inconspicuous, our more successful attacks also prove notably less detectable by humans. Our framework therefore provides a promising direction for future privacy-preserving adversarial attacks. 2021.eacl-main.203 @@ -2442,7 +2442,7 @@ HwijeenAhn Chan YoungPark YuliaTsvetkov - David R.Mortensen + David R.Mortensen 2403–2414 Much work in cross-lingual transfer learning explored how to select better transfer languages for multilingual tasks, primarily focusing on typological and genealogical similarities between languages. We hypothesize that these measures of linguistic proximity are not enough when working with pragmatically-motivated tasks, such as sentiment analysis. As an alternative, we introduce three linguistic features that capture cross-cultural similarities that manifest in linguistic patterns and quantify distinct aspects of language pragmatics: language context-level, figurative language, and the lexification of emotion concepts. Our analyses show that the proposed pragmatic features do capture cross-cultural similarities and align well with existing work in sociolinguistics and linguistic anthropology. We further corroborate the effectiveness of pragmatically-driven transfer in the downstream task of choosing transfer languages for cross-lingual sentiment analysis. 2021.eacl-main.204 @@ -2454,7 +2454,7 @@ RamitSawhney HarshitJoshi LucieFlek - Rajiv RatnShah + Rajiv RatnShah 2415–2428 Recent psychological studies indicate that individuals exhibiting suicidal ideation increasingly turn to social media rather than mental health practitioners. Contextualizing the build-up of such ideation is critical for the identification of users at risk. In this work, we focus on identifying suicidal intent in tweets by augmenting linguistic models with emotional phases modeled from users’ historical context. We propose PHASE, a time-and phase-aware framework that adaptively learns features from a user’s historical emotional spectrum on Twitter for preliminary screening of suicidal risk. Building on clinical studies, PHASE learns phase-like progressions in users’ historical Plutchik-wheel-based emotions to contextualize suicidal intent. While outperforming state-of-the-art methods, we show the utility of temporal and phase-based emotional contextual cues for suicide ideation detection. We further discuss practical and ethical considerations. 2021.eacl-main.205 @@ -2464,7 +2464,7 @@ Exploiting Definitions for Frame Identification TianyuJiang - EllenRiloff + EllenRiloff 2429–2434 Frame identification is one of the key challenges for frame-semantic parsing. The goal of this task is to determine which frame best captures the meaning of a target word or phrase in a sentence. We present a new model for frame identification that uses a pre-trained transformer model to generate representations for frames and lexical units (senses) using their formal definitions in FrameNet. Our frame identification model assesses the suitability of a frame for a target word in a sentence based on the semantic coherence of their meanings. We evaluate our model on three data sets and show that it consistently achieves better performance than previous systems. 2021.eacl-main.206 @@ -2485,8 +2485,8 @@ Conceptual Grounding Constraints for Truly Robust Biomedical Name Representations PieterFivez - SimonSuster - WalterDaelemans + SimonSuster + WalterDaelemans 2440–2450 Effective representation of biomedical names for downstream NLP tasks requires the encoding of both lexical as well as domain-specific semantic information. Ideally, the synonymy and semantic relatedness of names should be consistently reflected by their closeness in an embedding space. To achieve such robustness, prior research has considered multi-task objectives when training neural encoders. In this paper, we take a next step towards truly robust representations, which capture more domain-specific semantics while remaining universally applicable across different biomedical corpora and domains. To this end, we use conceptual grounding constraints which more effectively align encoded names to pretrained embeddings of their concept identifiers. These constraints are effective even when using a Deep Averaging Network, a simple feedforward encoding architecture that allows for scaling to large corpora while remaining sufficiently expressive. We empirically validate our approach using multiple tasks and benchmarks, which assess both literal synonymy as well as more general semantic relatedness. 2021.eacl-main.208 @@ -2631,7 +2631,7 @@ ArtidoroPagnoni Jay YoonLee DheerajRajagopal - JaimeCarbonell + JaimeCarbonell YuliaTsvetkov 2575–2585 Abstractive text summarization aims at compressing the information of a long source document into a rephrased, condensed summary. Despite advances in modeling techniques, abstractive summarization models still suffer from several key challenges: (i) layout bias: they overfit to the style of training corpora; (ii) limited abstractiveness: they are optimized to copying n-grams from the source rather than generating novel abstractive summaries; (iii) lack of transparency: they are not interpretable. In this work, we propose a framework based on document-level structure induction for summarization to address these challenges. To this end, we propose incorporating latent and explicit dependencies across sentences in the source document into end-to-end single-document summarization models. Our framework complements standard encoder-decoder summarization models by augmenting them with rich structure-aware document representations based on implicitly learned (latent) structures and externally-derived linguistic (explicit) structures. We show that our summarization framework, trained on the CNN/DM dataset, improves the coverage of content in the source documents, generates more abstractive summaries by generating more novel n-grams, and incorporates interpretable sentence-level structures, while performing on par with standard baselines. @@ -2704,7 +2704,7 @@ On the Computational Modelling of <fixed-case>M</fixed-case>ichif Verbal Morphology FineenDavis - Eddie A.Santos + Eddie A.Santos HeatherSouter 2631–2636 This paper presents a finite-state computational model of the verbal morphology of Michif. Michif, the official language of the Métis peoples, is a uniquely mixed language with Algonquian and French origins. It is spoken across the Métis homelands in what is now called Canada and the United States, but it is highly endangered with less than 100 speakers. The verbal morphology is remarkably complex, as the already polysynthetic Algonquian patterns are combined with French elements and unique morpho-phonological interactions. The model presented in this paper, LI VERB KAA-OOSHITAHK DI MICHIF handles this complexity by using a series of composed finite-state transducers to model the concatenative morphology and phonological rule alternations that are unique to Michif. Such a rule-based approach is necessary as there is insufficient language data for an approach that uses machine learning. A language model such as LI VERB KAA-OOSHITAHK DI MICHIF furthers the goals of Indigenous computational linguistics in Canada while also supporting the creation of tools for documentation, education, and revitalization that are desired by the Métis community. @@ -2738,7 +2738,7 @@ Informative and Controllable Opinion Summarization - Reinald KimAmplayo + Reinald KimAmplayo MirellaLapata 2662–2672 Opinion summarization is the task of automatically generating summaries for a set of reviews about a specific target (e.g., a movie or a product). Since the number of reviews for each target can be prohibitively large, neural network-based methods follow a two-stage approach where an extractive step first pre-selects a subset of salient opinions and an abstractive step creates the summary while conditioning on the extracted subset. However, the extractive model leads to loss of information which may be useful depending on user needs. In this paper we propose a summarization framework that eliminates the need to rely only on pre-selected content and waste possibly useful information, especially when customizing summaries. The framework enables the use of all input reviews by first condensing them into multiple dense vectors which serve as input to an abstractive model. We showcase an effective instantiation of our framework which produces more informative summaries and also allows to take user preferences into account using our zero-shot customization technique. Experimental results demonstrate that our model improves the state of the art on the Rotten Tomatoes dataset and generates customized summaries effectively. @@ -2748,7 +2748,7 @@ Coloring the Black Box: What Synesthesia Tells Us about Character Embeddings - KatharinaKann + KatharinaKann Mauro M.Monsalve-Mercado 2673–2685 In contrast to their word- or sentence-level counterparts, character embeddings are still poorly understood. We aim at closing this gap with an in-depth study of English character embeddings. For this, we use resources from research on grapheme–color synesthesia – a neuropsychological phenomenon where letters are associated with colors –, which give us insight into which characters are similar for synesthetes and how characters are organized in color space. Comparing 10 different character embeddings, we ask: How similar are character embeddings to a synesthete’s perception of characters? And how similar are character embeddings extracted from different models? We find that LSTMs agree with humans more than transformers. Comparing across tasks, grapheme-to-phoneme conversion results in the most human-like character embeddings. Finally, ELMo embeddings differ from both humans and other models. @@ -2770,7 +2770,7 @@ <fixed-case>BERT</fixed-case>ective: Language Models and Contextual Information for Deception Detection TommasoFornaciari FedericoBianchi - MassimoPoesio + MassimoPoesio DirkHovy 2699–2708 Spotting a lie is challenging but has an enormous potential impact on security as well as private and public safety. Several NLP methods have been proposed to classify texts as truthful or deceptive. In most cases, however, the target texts’ preceding context is not considered. This is a severe limitation, as any communication takes place in context, not in a vacuum, and context can help to detect deception. We study a corpus of Italian dialogues containing deceptive statements and implement deep neural models that incorporate various linguistic contexts. We establish a new state-of-the-art identifying deception and find that not all context is equally useful to the task. Only the texts closest to the target, if from the same speaker (rather than questions by an interlocutor), boost performance. We also find that the semantic information in language models such as BERT contributes to the performance. However, BERT alone does not capture the implicit knowledge of deception cues: its contribution is conditional on the concurrent use of attention to learn cues from BERT’s representations. @@ -2783,8 +2783,8 @@ Learning Coupled Policies for Simultaneous Machine Translation using Imitation Learning PhilipArthur - TrevorCohn - GholamrezaHaffari + TrevorCohn + GholamrezaHaffari 2709–2719 We present a novel approach to efficiently learn a simultaneous translation model with coupled programmer-interpreter policies. First, we present an algorithmic oracle to produce oracle READ/WRITE actions for training bilingual sentence-pairs using the notion of word alignments. This oracle actions are designed to capture enough information from the partial input before writing the output. Next, we perform a coupled scheduled sampling to effectively mitigate the exposure bias when learning both policies jointly with imitation learning. Experiments on six language-pairs show our method outperforms strong baselines in terms of translation quality quality while keeping the delay low. 2021.eacl-main.233 @@ -2810,10 +2810,10 @@ FengNan RameshNallapati ZhiguoWang - CiceroNogueira dos Santos + CiceroNogueira dos Santos HenghuiZhu DejiaoZhang - KathleenMcKeown + KathleenMcKeown BingXiang 2727–2733 A key challenge for abstractive summarization is ensuring factual consistency of the generated summary with respect to the original document. For example, state-of-the-art models trained on existing datasets exhibit entity hallucination, generating names of entities that are not present in the source document. We propose a set of new metrics to quantify the entity-level factual consistency of generated summaries and we show that the entity hallucination problem can be alleviated by simply filtering the training data. In addition, we propose a summary-worthy entity classification task to the training process as well as a joint entity and summary generation approach, which yield further improvements in entity level metrics. @@ -2856,8 +2856,8 @@ Diverse Adversaries for Mitigating Bias in Training XudongHan - TimothyBaldwin - TrevorCohn + TimothyBaldwin + TrevorCohn 2760–2765 Adversarial learning can learn fairer and less biased models of language processing than standard training. However, current adversarial techniques only partially mitigate the problem of model bias, added to which their training procedures are often unstable. In this paper, we propose a novel approach to adversarial learning based on the use of multiple diverse discriminators, whereby discriminators are encouraged to learn orthogonal hidden representations from one another. Experimental results show that our method substantially improves over standard adversarial removal methods, in terms of reducing bias and stability of training. 2021.eacl-main.239 @@ -2881,7 +2881,7 @@ Better Neural Machine Translation by Extracting Linguistic Information from <fixed-case>BERT</fixed-case> - Hassan S.Shavarani + Hassan S.Shavarani AnoopSarkar 2772–2783 Adding linguistic information (syntax or semantics) to neural machine translation (NMT) have mostly focused on using point estimates from pre-trained models. Directly using the capacity of massive pre-trained contextual word embedding models such as BERT(Devlin et al., 2019) has been marginally useful in NMT because effective fine-tuning is difficult to obtain for NMT without making training brittle and unreliable. We augment NMT by extracting dense fine-tuned vector-based linguistic information from BERT instead of using point estimates. Experimental results show that our method of incorporating linguistic information helps NMT to generalize better in a variety of training contexts and is no more difficult to train than conventional Transformer-based NMT. @@ -2895,7 +2895,7 @@ ChangbingYang YuLi AlexWarstadt - KatharinaKann + KatharinaKann 2784–2790 Linguistically informed analyses of language models (LMs) contribute to the understanding and improvement of such models. Here, we introduce the corpus of Chinese linguistic minimal pairs (CLiMP) to investigate what knowledge Chinese LMs acquire. CLiMP consists of sets of 1000 minimal pairs (MPs) for 16 syntactic contrasts in Chinese, covering 9 major Chinese linguistic phenomena. The MPs are semi-automatically generated, and human agreement with the labels in CLiMP is 95.8%. We evaluate 11 different LMs on CLiMP, covering n-grams, LSTMs, and Chinese BERT. We find that classifier–noun agreement and verb complement selection are the phenomena that models generally perform best at. However, models struggle the most with the ba construction, binding, and filler-gap dependencies. Overall, Chinese BERT achieves an 81.8% average accuracy, while the performances of LSTMs and 5-grams are only moderately above chance level. 2021.eacl-main.242 @@ -2969,9 +2969,9 @@ ElsbethTurcan PetraGaluscakova ElenaZotkina - ZhengpingJiang + ZhengpingJiang PeterBell - KathleenMcKeown + KathleenMcKeown 2842–2854 Typical ASR systems segment the input audio into utterances using purely acoustic information, which may not resemble the sentence-like units that are expected by conventional machine translation (MT) systems for Spoken Language Translation. In this work, we propose a model for correcting the acoustic segmentation of ASR models for low-resource languages to improve performance on downstream tasks. We propose the use of subtitles as a proxy dataset for correcting ASR acoustic segmentation, creating synthetic acoustic utterances by modeling common error modes. We train a neural tagging model for correcting ASR acoustic segmentation and show that it improves downstream performance on MT and audio-document cross-language information retrieval (CLIR). 2021.eacl-main.248 @@ -3032,7 +3032,7 @@ Representations for Question Answering from Documents with Tables and Text VickyZayats KristinaToutanova - MariOstendorf + MariOstendorf 2895–2906 Tables in web documents are pervasive and can be directly used to answer many of the queries searched on the web, motivating their integration in question answering. Very often information presented in tables is succinct and hard to interpret with standard language representations. On the other hand, tables often appear within textual context, such as an article describing the table. Using the information from an article as additional context can potentially enrich table representations. In this work we aim to improve question answering from tables by refining table representations based on information from surrounding text. We also present an effective method to combine text and table-based predictions for question answering from full documents, obtaining significant improvements on the Natural Questions dataset (Kwiatkowski et al., 2019). 2021.eacl-main.253 @@ -3044,7 +3044,7 @@ KemalKurniawan LeaFrermann PhilipSchulz - TrevorCohn + TrevorCohn 2907–2918 Cross-lingual transfer is a leading technique for parsing low-resource languages in the absence of explicit supervision. Simple ‘direct transfer’ of a learned model based on a multilingual input encoding has provided a strong benchmark. This paper presents a method for unsupervised cross-lingual transfer that improves over direct transfer systems by using their output as implicit supervision as part of self-training on unlabelled text in the target language. The method assumes minimal resources and provides maximal flexibility by (a) accepting any pre-trained arc-factored dependency parser; (b) assuming no access to source language data; (c) supporting both projective and non-projective parsing; and (d) supporting multi-source transfer. With English as the source language, we show significant improvements over state-of-the-art transfer models on both distant and nearby languages, despite our conceptually simpler approach. We provide analyses of the choice of source languages for multi-source transfer, and the advantage of non-projective parsing. Our code is available online. 2021.eacl-main.254 @@ -3055,7 +3055,7 @@ Modelling Context Emotions using Multi-task Learning for Emotion Controlled Dialog Generation DeekshaVarshney AsifEkbal - PushpakBhattacharyya + PushpakBhattacharyya 2919–2931 A recent topic of research in natural language generation has been the development of automatic response generation modules that can automatically respond to a user’s utterance in an empathetic manner. Previous research has tackled this task using neural generative methods by augmenting emotion classes with the input sequences. However, the outputs by these models may be inconsistent. We employ multi-task learning to predict the emotion label and to generate a viable response for a given utterance using a common encoder with multiple decoders. Our proposed encoder-decoder model consists of a self-attention based encoder and a decoder with dot product attention mechanism to generate response with a specified emotion. We use the focal loss to handle imbalanced data distribution, and utilize the consistency loss to allow coherent decoding by the decoders. Human evaluation reveals that our model produces more emotionally pertinent responses. In addition, our model outperforms multiple strong baselines on automatic evaluation measures such as F1 and BLEU scores, thus resulting in more fluent and adequate responses. 2021.eacl-main.255 @@ -3090,8 +3090,8 @@ Adapting Event Extractors to Medical Data: Bridging the Covariate Shift AakankshaNaik - Jill FainLehman - CarolynRose + Jill FainLehman + CarolynRose 2963–2975 We tackle the task of adapting event extractors to new domains without labeled data, by aligning the marginal distributions of source and target domains. As a testbed, we create two new event extraction datasets using English texts from two medical domains: (i) clinical notes, and (ii) doctor-patient conversations. We test the efficacy of three marginal alignment techniques: (i) adversarial domain adaptation (ADA), (ii) domain adaptive fine-tuning (DAFT), and (iii) a new instance weighting technique based on language model likelihood scores (LIW). LIW and DAFT improve over a no-transfer BERT baseline on both domains, but ADA only improves on notes. Deeper analysis of performance under different types of shifts (e.g., lexical shift, semantic shift) explains some of the variations among models. Our best-performing models reach F1 scores of 70.0 and 72.9 on notes and conversations respectively, using no labeled target data. 2021.eacl-main.258 @@ -3104,8 +3104,8 @@ SiddharthDalmia MariaRyskina FlorianMetze - EduardHovy - Alan WBlack + EduardHovy + Alan WBlack 2976–2992 When Question-Answering (QA) systems are deployed in the real world, users query them through a variety of interfaces, such as speaking to voice assistants, typing questions into a search engine, or even translating questions to languages supported by the QA system. While there has been significant community attention devoted to identifying correct answers in passages assuming a perfectly formed question, we show that components in the pipeline that precede an answering engine can introduce varied and considerable sources of error, and performance can degrade substantially based on these upstream noise sources even for powerful pre-trained QA models. We conclude that there is substantial room for progress before QA systems can be effectively deployed, highlight the need for QA evaluation to expand to consider real-world use, and hope that our findings will spur greater community interest in the issues that arise when our systems actually need to be of utility to humans. 2021.eacl-main.259 @@ -3166,7 +3166,7 @@ VinitRavishankar ArturKulmizev MostafaAbdou - AndersSøgaard + AndersSøgaard JoakimNivre 3031–3045 Since the popularization of the Transformer as a general-purpose feature encoder for NLP, many studies have attempted to decode linguistic structure from its novel multi-head attention mechanism. However, much of such work focused almost exclusively on English — a language with rigid word order and a lack of inflectional morphology. In this study, we present decoding experiments for multilingual BERT across 18 languages in order to test the generalizability of the claim that dependency syntax is reflected in attention patterns. We show that full trees can be decoded above baseline accuracy from single attention heads, and that individual relations are often tracked by the same heads across languages. Furthermore, in an attempt to address recent debates about the status of attention as an explanatory mechanism, we experiment with fine-tuning mBERT on a supervised parsing objective while freezing different series of parameters. Interestingly, in steering the objective to learn explicit linguistic structure, we find much of the same structure represented in the resulting attention patterns, with interesting differences with respect to which parameters are frozen. @@ -3186,7 +3186,7 @@ <fixed-case>CDA</fixed-case>: a Cost Efficient Content-based Multilingual Web Document Aligner - ThuyVu + ThuyVu AlessandroMoschitti 3053–3061 We introduce a Content-based Document Alignment approach (CDA), an efficient method to align multilingual web documents based on content in creating parallel training data for machine translation (MT) systems operating at the industrial level. CDA works in two steps: (i) projecting documents of a web domain to a shared multilingual space; then (ii) aligning them based on the similarity of their representations in such space. We leverage lexical translation models to build vector representations using TF×IDF. CDA achieves performance comparable with state-of-the-art systems in the WMT-16 Bilingual Document Alignment Shared Task benchmark while operating in multilingual space. Besides, we created two web-scale datasets to examine the robustness of CDA in an industrial setting involving up to 28 languages and millions of documents. The experiments show that CDA is robust, cost-effective, and is significantly superior in (i) processing large and noisy web data and (ii) scaling to new and low-resourced languages. @@ -3198,7 +3198,7 @@ Metric-Type Identification for Multi-Level Header Numerical Tables in Scientific Papers Lya HulliyyatusSuadaa HidetakaKamigaito - ManabuOkumura + ManabuOkumura HiroyaTakamura 3062–3071 Numerical tables are widely used to present experimental results in scientific papers. For table understanding, a metric-type is essential to discriminate numbers in the tables. We introduce a new information extraction task, metric-type identification from multi-level header numerical tables, and provide a dataset extracted from scientific papers consisting of header tables, captions, and metric-types. We then propose two joint-learning neural classification and generation schemes featuring pointer-generator-based and BERT-based models. Our results show that the joint models can handle both in-header and out-of-header metric-type identification problems. @@ -3244,7 +3244,7 @@ Facilitating Terminology Translation with Target Lemma Annotations TomsBergmanis - MārcisPinnis + MārcisPinnis 3105–3111 Most of the recent work on terminology integration in machine translation has assumed that terminology translations are given already inflected in forms that are suitable for the target language sentence. In day-to-day work of professional translators, however, it is seldom the case as translators work with bilingual glossaries where terms are given in their dictionary forms; finding the right target language form is part of the translation process. We argue that the requirement for apriori specified target language forms is unrealistic and impedes the practical applicability of previous work. In this work, we propose to train machine translation systems using a source-side data augmentation method that annotates randomly selected source language words with their target language lemmas. We show that systems trained on such augmented data are readily usable for terminology integration in real-life translation scenarios. Our experiments on terminology translation into the morphologically complex Baltic and Uralic languages show an improvement of up to 7 BLEU points over baseline systems with no means for terminology integration and an average improvement of 4 BLEU points over the previous work. Results of the human evaluation indicate a 47.7% absolute improvement over the previous work in term translation accuracy when translating into Latvian. 2021.eacl-main.271 @@ -3279,7 +3279,7 @@ MaartenSap SwabhaSwayamdipta YejinChoi - NoahSmith + NoahSmith 3143–3155 Biased associations have been a challenge in the development of classifiers for detecting toxic language, hindering both fairness and accuracy. As potential solutions, we investigate recently introduced debiasing methods for text classification datasets and models, as applied to toxic language detection. Our focus is on lexical (e.g., swear words, slurs, identity mentions) and dialectal markers (specifically African American English). Our comprehensive experiments establish that existing methods are limited in their ability to prevent biased behavior in current toxicity detectors. We then propose an automatic, dialect-aware data correction method, as a proof-of-concept. Despite the use of synthetic labels, this method reduces dialectal associations with toxicity. Overall, our findings show that debiasing a model trained on biased toxic language data is not as effective as simply relabeling the data to remove existing biases. 2021.eacl-main.274 @@ -3331,10 +3331,10 @@ Interpretability for Morphological Inflection: from Character-level Predictions to Subword-level Rules - TatyanaRuzsics + TatyanaRuzsics OlgaSozinova XimenaGutierrez-Vasques - TanjaSamardzic + TanjaSamardzic 3189–3201 Neural models for morphological inflection have recently attained very high results. However, their interpretation remains challenging. Towards this goal, we propose a simple linguistically-motivated variant to the encoder-decoder model with attention. In our model, character-level cross-attention mechanism is complemented with a self-attention module over substrings of the input. We design a novel approach for pattern extraction from attention weights to interpret what the model learn. We apply our methodology to analyze the model’s decisions on three typologically-different languages and find that a) our pattern extraction method applied to cross-attention weights uncovers variation in form of inflection morphemes, b) pattern extraction from self-attention shows triggers for such variation, c) both types of patterns are closely aligned with grammar inflection classes and class assignment criteria, for all three languages. Additionally, we find that the proposed encoder attention component leads to consistent performance improvements over a strong baseline. 2021.eacl-main.278 @@ -3370,7 +3370,7 @@ Andy MingrenLi YishuMiao OzanCaglayan - PranavaMadhyastha + PranavaMadhyastha LuciaSpecia 3222–3233 This paper addresses the problem of simultaneous machine translation (SiMT) by exploring two main concepts: (a) adaptive policies to learn a good trade-off between high translation quality and low latency; and (b) visual information to support this process by providing additional (visual) contextual information which may be available before the textual input is produced. For that, we propose a multimodal approach to simultaneous machine translation using reinforcement learning, with strategies to integrate visual and textual information in both the agent and the environment. We provide an exploration on how different types of visual information and integration strategies affect the quality and latency of simultaneous translation models, and demonstrate that visual cues lead to higher quality while keeping the latency low. @@ -3381,7 +3381,7 @@ <fixed-case>STAR</fixed-case>: Cross-modal [<fixed-case>STA</fixed-case>]tement [<fixed-case>R</fixed-case>]epresentation for selecting relevant mathematical premises DeborahFerreira - AndréFreitas + AndréFreitas 3234–3243 Mathematical statements written in natural language are usually composed of two different modalities: mathematical elements and natural language. These two modalities have several distinct linguistic and semantic properties. State-of-the-art representation techniques have demonstrated an inability in capturing such an entangled style of discourse. In this work, we propose STAR, a model that uses cross-modal attention to learn how to represent mathematical text for the task of Natural Language Premise Selection. This task uses conjectures written in both natural and mathematical language to recommend premises that most likely will be relevant to prove a particular statement. We found that STAR not only outperforms baselines that do not distinguish between natural language and mathematical elements, but it also achieves better performance than state-of-the-art models. 2021.eacl-main.282 @@ -3390,7 +3390,7 @@ Do Multi-Hop Question Answering Systems Know How to Answer the Single-Hop Sub-Questions? - YixuanTang + YixuanTang Hwee TouNg AnthonyTung 3244–3249 @@ -3403,7 +3403,7 @@ Multilingual <fixed-case>LAMA</fixed-case>: Investigating Knowledge in Multilingual Pretrained Language Models NoraKassner PhilippDufter - HinrichSchütze + HinrichSchütze 3250–3258 Recently, it has been found that monolingual English language models can be used as knowledge bases. Instead of structural knowledge base queries, masked sentences such as “Paris is the capital of [MASK]” are used as probes. We translate the established benchmarks TREx and GoogleRE into 53 languages. Working with mBERT, we investigate three questions. (i) Can mBERT be used as a multilingual knowledge base? Most prior work only considers English. Extending research to multiple languages is important for diversity and accessibility. (ii) Is mBERT’s performance as knowledge base language-independent or does it vary from language to language? (iii) A multilingual model is trained on more text, e.g., mBERT is trained on 104 Wikipedias. Can mBERT leverage this for better performance? We find that using mBERT as a knowledge base yields varying performance across languages and pooling predictions across languages improves performance. Conversely, mBERT exhibits a language bias; e.g., when queried in Italian, it tends to predict Italy as the country of origin. 2021.eacl-main.284 @@ -3446,9 +3446,9 @@ DipteshKanojia PrashantSharma SayaliGhodekar - PushpakBhattacharyya - GholamrezaHaffari - MalharKulkarni + PushpakBhattacharyya + GholamrezaHaffari + MalharKulkarni 3281–3292 Automatic detection of cognates helps downstream NLP tasks of Machine Translation, Cross-lingual Information Retrieval, Computational Phylogenetics and Cross-lingual Named Entity Recognition. Previous approaches for the task of cognate detection use orthographic, phonetic and semantic similarity based features sets. In this paper, we propose a novel method for enriching the feature sets, with cognitive features extracted from human readers’ gaze behaviour. We collect gaze behaviour data for a small sample of cognates and show that extracted cognitive features help the task of cognate detection. However, gaze data collection and annotation is a costly task. We use the collected gaze behaviour data to predict cognitive features for a larger sample and show that predicted cognitive features, also, significantly improve the task performance. We report improvements of 10% with the collected gaze features, and 12% using the predicted gaze features, over the previously proposed approaches. Furthermore, we release the collected gaze behaviour data along with our code and cross-lingual models. 2021.eacl-main.288 @@ -3471,7 +3471,7 @@ Modeling Coreference Relations in Visual Dialog MingxiaoLi - Marie-FrancineMoens + Marie-FrancineMoens 3306–3318 Visual dialog is a vision-language task where an agent needs to answer a series of questions grounded in an image based on the understanding of the dialog history and the image. The occurrences of coreference relations in the dialog makes it a more challenging task than visual question-answering. Most previous works have focused on learning better multi-modal representations or on exploring different ways of fusing visual and language features, while the coreferences in the dialog are mainly ignored. In this paper, based on linguistic knowledge and discourse features of human dialog we propose two soft constraints that can improve the model’s ability of resolving coreferences in dialog in an unsupervised way. Experimental results on the VisDial v1.0 dataset shows that our model, which integrates two novel and linguistically inspired soft constraints in a deep transformer neural architecture, obtains new state-of-the-art performance in terms of recall at 1 and other evaluation metrics compared to current existing models and this without pretraining on other vision language datasets. Our qualitative results also demonstrate the effectiveness of the method that we propose. 2021.eacl-main.290 @@ -3483,7 +3483,7 @@ YadollahYaghoobzadeh SoroushMehri RemiTachet des Combes - T. J.Hazen + T. J.Hazen AlessandroSordoni 3319–3332 Neural NLP models tend to rely on spurious correlations between labels and input features to perform their tasks. Minority examples, i.e., examples that contradict the spurious correlations present in the majority of data points, have been shown to increase the out-of-distribution generalization of pre-trained language models. In this paper, we first propose using example forgetting to find minority examples without prior knowledge of the spurious correlations present in the dataset. Forgettable examples are instances either learned and then forgotten during training or never learned. We show empirically how these examples are related to minorities in our training sets. Then, we introduce a new approach to robustify models by fine-tuning our models twice, first on the full training data and second on the minorities only. We obtain substantial improvements in out-of-distribution generalization when applying our approach to the MNLI, QQP and FEVER datasets. @@ -3527,7 +3527,7 @@ Probing the Probing Paradigm: Does Probing Accuracy Entail Task Relevance? AbhilashaRavichander YonatanBelinkov - EduardHovy + EduardHovy 3363–3377 Although neural models have achieved impressive results on several NLP benchmarks, little is understood about the mechanisms they use to perform language tasks. Thus, much recent attention has been devoted to analyzing the sentence representations learned by neural encoders, through the lens of ‘probing’ tasks. However, to what extent was the information encoded in sentence representations, as discovered through a probe, actually used by the model to perform its task? In this work, we examine this probing paradigm through a case study in Natural Language Inference, showing that models can learn to encode linguistic properties even if they are not needed for the task on which the model was trained. We further identify that pretrained word embeddings play a considerable role in encoding these properties rather than the training task itself, highlighting the importance of careful controls when designing probing experiments. Finally, through a set of controlled synthetic tasks, we demonstrate models can encode these properties considerably above chance-level, even when distributed in the data as random noise, calling into question the interpretation of absolute claims on probing tasks. 2021.eacl-main.295 @@ -3540,7 +3540,7 @@ YukunFeng HidetakaKamigaito HiroyaTakamura - ManabuOkumura + ManabuOkumura 3378–3390 This work presents multi-modal deep SVDD (mSVDD) for one-class text classification. By extending the uni-modal SVDD to a multiple modal one, we build mSVDD with multiple hyperspheres, that enable us to build a much better description for target one-class data. Additionally, the end-to-end architecture of mSVDD can jointly handle neural feature learning and one-class text learning. We also introduce a mechanism for incorporating negative supervision in the absence of real negative data, which can be beneficial to the mSVDD model. We conduct experiments on Reuters and 20 Newsgroup datasets, and the experimental results demonstrate that mSVDD outperforms uni-modal SVDD and mSVDD can get further improvements when negative supervision is incorporated. 2021.eacl-main.296 @@ -3580,7 +3580,7 @@ TejasDhamecha PreethiJyothi SamarthBharadwaj - PushpakBhattacharyya + PushpakBhattacharyya 3421–3427 Spoken language is different from the written language in its style and structure. Disfluencies that appear in transcriptions from speech recognition systems generally hamper the performance of downstream NLP tasks. Thus, a disfluency correction system that converts disfluent to fluent text is of great value. This paper introduces a disfluency correction model that translates disfluent to fluent text by drawing inspiration from recent encoder-decoder unsupervised style-transfer models for text. We also show considerable benefits in performance when utilizing a small sample of 500 parallel disfluent-fluent sentences in a semi-supervised way. Our unsupervised approach achieves a BLEU score of 79.39 on the Switchboard corpus test set, with further improvement to a BLEU score of 85.28 with semi-supervision. Both are comparable to two competitive fully-supervised models. 2021.eacl-main.299 @@ -3617,7 +3617,7 @@ XimenaGutierrez-Vasques ChristianBentz OlgaSozinova - TanjaSamardzic + TanjaSamardzic 3454–3468 The distributions of orthographic word types are very different across languages due to typological characteristics, different writing traditions and potentially other factors. The wide range of cross-linguistic diversity is still a major challenge for NLP and the study of language. We use BPE and information-theoretic measures to investigate if distributions become similar under specific levels of subword tokenization. We perform a cross-linguistic comparison, following incremental merges of BPE (we go from characters to words) for 47 diverse languages. We show that text entropy values (a feature of probability distributions) tend to converge at specific subword levels: relatively few BPE merges (around 350) lead to the most similar distributions across languages. Additionally, we analyze the interaction between subword and word-level distributions and show that our findings can be interpreted in light of the ongoing discussion regarding different types of morphological complexity. 2021.eacl-main.302 @@ -3627,7 +3627,7 @@ A Large-scale Evaluation of Neural Machine Transliteration for <fixed-case>I</fixed-case>ndic Languages AnoopKunchukuttan - SiddharthJain + SiddharthJain RahulKejriwal 3469–3475 We take up the task of large-scale evaluation of neural machine transliteration between English and Indic languages, with a focus on multilingual transliteration to utilize orthographic similarity between Indian languages. We create a corpus of 600K word pairs mined from parallel translation corpora and monolingual corpora, which is the largest transliteration corpora for Indian languages mined from public sources. We perform a detailed analysis of multilingual transliteration and propose an improved multilingual training recipe for Indic languages. We analyze various factors affecting transliteration quality like language family, transliteration direction and word origin. @@ -3638,7 +3638,7 @@ Communicative-Function-Based Sentence Classification for Construction of an Academic Formulaic Expression Database KenichiIwatsuki - AkikoAizawa + AkikoAizawa 3476–3497 Formulaic expressions (FEs), such as ‘in this paper, we propose’ are frequently used in scientific papers. FEs convey a communicative function (CF), i.e. ‘showing the aim of the paper’ in the above-mentioned example. Although CF-labelled FEs are helpful in assisting academic writing, the construction of FE databases requires manual labour for assigning CF labels. In this study, we considered a fully automated construction of a CF-labelled FE database using the top–down approach, in which the CF labels are first assigned to sentences, and then the FEs are extracted. For the CF-label assignment, we created a CF-labelled sentence dataset, on which we trained a SciBERT classifier. We show that the classifier and dataset can be used to construct FE databases of disciplines that are different from the training data. The accuracy of in-disciplinary classification was more than 80%, while cross-disciplinary classification also worked well. We also propose an FE extraction method, which was applied to the CF-labelled sentences. Finally, we constructed and published a new, large CF-labelled FE database. The evaluation of the final CF-labelled FE database showed that approximately 65% of the FEs are correct and useful, which is sufficiently high considering practical use. 2021.eacl-main.304 @@ -3670,7 +3670,7 @@ Don’t Change Me! User-Controllable Selective Paraphrase Generation - MohanZhang + MohanZhang LuchenTan ZihangFu KunXiong @@ -3685,10 +3685,10 @@ Rethinking Coherence Modeling: Synthetic vs. Downstream Tasks - TasnimMohiuddin + TasnimMohiuddin PrathyushaJwalapuram XiangLin - ShafiqJoty + ShafiqJoty 3528–3539 Although coherence modeling has come a long way in developing novel models, their evaluation on downstream applications for which they are purportedly developed has largely been neglected. With the advancements made by neural approaches in applications such as machine translation (MT), summarization and dialog systems, the need for coherence evaluation of these tasks is now more crucial than ever. However, coherence models are typically evaluated only on synthetic tasks, which may not be representative of their performance in downstream applications. To investigate how representative the synthetic tasks are of downstream use cases, we conduct experiments on benchmarking well-known traditional and neural coherence models on synthetic sentence ordering tasks, and contrast this with their performance on three downstream applications: coherence evaluation for MT and summarization, and next utterance prediction in retrieval-based dialog. Our results demonstrate a weak correlation between the model performances in the synthetic tasks and the downstream applications, motivating alternate training and evaluation methods for coherence models. 2021.eacl-main.308 @@ -3709,7 +3709,7 @@ Probing for idiomaticity in vector space models MarcosGarcia TiagoKramer Vieira - CarolinaScarton + CarolinaScarton MarcoIdiart AlineVillavicencio 3551–3564 @@ -3724,7 +3724,7 @@ Is the Understanding of Explicit Discourse Relations Required in Machine Reading Comprehension? YulongWu ViktorSchlegel - RizaBatista-Navarro + RizaBatista-Navarro 3565–3579 An in-depth analysis of the level of language understanding required by existing Machine Reading Comprehension (MRC) benchmarks can provide insight into the reading capabilities of machines. In this paper, we propose an ablation-based methodology to assess the extent to which MRC datasets evaluate the understanding of explicit discourse relations. We define seven MRC skills which require the understanding of different discourse relations. We then introduce ablation methods that verify whether these skills are required to succeed on a dataset. By observing the drop in performance of neural MRC models evaluated on the original and the modified dataset, we can measure to what degree the dataset requires these skills, in order to be understood correctly. Experiments on three large-scale datasets with the BERT-base and ALBERT-xxlarge model show that the relative changes for all skills are small (less than 6%). These results imply that most of the answered questions in the examined datasets do not require understanding the discourse structure of the text. To specifically probe for natural language understanding, there is a need to design more challenging benchmarks that can correctly evaluate the intended skills. 2021.eacl-main.311 @@ -3733,7 +3733,7 @@ Why Is <fixed-case>MBTI</fixed-case> Personality Detection from Texts a Difficult Task? - SanjaStajner + SanjaStajner SerenYenikent 3580–3589 Automatic detection of the four MBTI personality dimensions from texts has recently attracted noticeable attention from the natural language processing and computational linguistic communities. Despite the large collections of Twitter data for training, the best systems rarely even outperform the majority-class baseline. In this paper, we discuss the theoretical reasons for such low results and present the insights from an annotation study that further shed the light on this issue. @@ -3791,7 +3791,7 @@ Lifelong Knowledge-Enriched Social Event Representation Learning PrashanthVijayaraghavan - DebRoy + DebRoy 3624–3635 The ability of humans to symbolically represent social events and situations is crucial for various interactions in everyday life. Several studies in cognitive psychology have established the role of mental state attributions in effectively representing variable aspects of these social events. In the past, NLP research on learning event representations often focuses on construing syntactic and semantic information from language. However, they fail to consider the importance of pragmatic aspects and the need to consistently update new social situational information without forgetting the accumulated experiences. In this work, we propose a representation learning framework to directly address these shortcomings by integrating social commonsense knowledge with recent advancements in the space of lifelong language learning. First, we investigate methods to incorporate pragmatic aspects into our social event embeddings by leveraging social commonsense knowledge. Next, we introduce continual learning strategies that allow for incremental consolidation of new knowledge while retaining and promoting efficient usage of prior knowledge. Experimental results on event similarity, reasoning, and paraphrase detection tasks prove the efficacy of our social event embeddings. 2021.eacl-main.317 @@ -3862,7 +3862,7 @@ YoshitakaUshiku AtsushiHashimoto TaroWatanabe - YujiMatsumoto + YujiMatsumoto 3692–3702 Unsupervised image captioning is a challenging task that aims at generating captions without the supervision of image-sentence pairs, but only with images and sentences drawn from different sources and object labels detected from the images. In previous work, pseudo-captions, i.e., sentences that contain the detected object labels, were assigned to a given image. The focus of the previous work was on the alignment of input images and pseudo-captions at the sentence level. However, pseudo-captions contain many words that are irrelevant to a given image. In this work, we investigate the effect of removing mismatched words from image-sentence alignment to determine how they make this task difficult. We propose a simple gating mechanism that is trained to align image features with only the most reliable words in pseudo-captions: the detected object labels. The experimental results show that our proposed method outperforms the previous methods without introducing complex sentence-level learning objectives. Combined with the sentence-level alignment method of previous work, our method further improves its performance. These results confirm the importance of careful alignment in word-level details. 2021.eacl-main.323 @@ -3906,7 +3906,7 @@ Proceedings of the 16th Conference of the European Chapter of the Association for Computational Linguistics: System Demonstrations DimitraGkatzia - DjaméSeddah + DjaméSeddah Association for Computational Linguistics
Online
April @@ -3957,7 +3957,7 @@ PhuongNguyen ChauNguyen KenSatoh - YujiMatsumoto + YujiMatsumoto MinhNguyen 24–31 This paper presents CovRelex, a scientific paper retrieval system targeting entities and relations via relation extraction on COVID-19 scientific papers. This work aims at building a system supporting users efficiently in acquiring knowledge across a huge number of COVID-19 scientific papers published rapidly. Our system can be accessed via https://www.jaist.ac.jp/is/labs/nguyen-lab/systems/covrelex/. @@ -3993,7 +3993,7 @@ <fixed-case>T</fixed-case>-<fixed-case>NER</fixed-case>: An All-Round Python Library for Transformer-based Named Entity Recognition AsahiUshio - JoseCamacho-Collados + JoseCamacho-Collados 53–62 Language model (LM) pretraining has led to consistent improvements in many NLP downstream tasks, including named entity recognition (NER). In this paper, we present T-NER (Transformer-based Named Entity Recognition), a Python library for NER LM finetuning. In addition to its practical utility, T-NER facilitates the study and investigation of the cross-domain and cross-lingual generalization ability of LMs finetuned on NER. Our library also provides a web app where users can get model predictions interactively for arbitrary text, which facilitates qualitative model evaluation for non-expert programmers. We show the potential of the library by compiling nine public NER datasets into a unified format and evaluating the cross-domain and cross- lingual performance across the datasets. The results from our initial experiments show that in-domain performance is generally competitive across datasets. However, cross-domain generalization is challenging even with a large pretrained LM, which has nevertheless capacity to learn domain-specific features if fine- tuned on a combined dataset. To facilitate future research, we also release all our LM checkpoints via the Hugging Face model hub. 2021.eacl-demos.7 @@ -4004,7 +4004,7 @@ Forum 4.0: An Open-Source User Comment Analysis Framework MarloHaering Jakob SmedegaardAndersen - ChrisBiemann + ChrisBiemann WiebkeLoosen BenjaminMilde TimPietz @@ -4021,7 +4021,7 @@ <fixed-case>SLTEV</fixed-case>: Comprehensive Evaluation of Spoken Language Translation EbrahimAnsari - OndřejBojar + OndřejBojar BarryHaddow MohammadMahmoudi 71–79 @@ -4046,7 +4046,7 @@ <fixed-case>D</fixed-case>eb<fixed-case>IE</fixed-case>: A Platform for Implicit and Explicit Debiasing of Word Embedding Spaces NiklasFriedrich AnneLauscher - Simone PaoloPonzetto + Simone PaoloPonzetto GoranGlavaš 91–98 Recent research efforts in NLP have demonstrated that distributional word vector spaces often encode stereotypical human biases, such as racism and sexism. With word representations ubiquitously used in NLP models and pipelines, this raises ethical issues and jeopardizes the fairness of language technologies. While there exists a large body of work on bias measures and debiasing methods, to date, there is no platform that would unify these research efforts and make bias measuring and debiasing of representation spaces widely accessible. In this work, we present DebIE, the first integrated platform for (1) measuring and (2) mitigating bias in word embeddings. Given an (i) embedding space (users can choose between the predefined spaces or upload their own) and (ii) a bias specification (users can choose between existing bias specifications or create their own), DebIE can (1) compute several measures of implicit and explicit bias and modify the embedding space by executing two (mutually composable) debiasing models. DebIE’s functionality can be accessed through four different interfaces: (a) a web application, (b) a desktop application, (c) a REST-ful API, and (d) as a command-line application. DebIE is available at: debie.informatik.uni-mannheim.de. @@ -4198,7 +4198,7 @@ AhmetÜstün AlanRamponi IbrahimSharaf - BarbaraPlank + BarbaraPlank 176–197 Transfer learning, particularly approaches that combine multi-task learning with pre-trained contextualized embeddings and fine-tuning, have advanced the field of Natural Language Processing tremendously in recent years. In this paper we present MaChAmp, a toolkit for easy fine-tuning of contextualized embeddings in multi-task settings. The benefits of MaChAmp are its flexible configuration options, and the support of a variety of natural language processing tasks in a uniform toolkit, from text classification and sequence labeling to dependency parsing, masked language modeling, and text generation. 2021.eacl-demos.22 @@ -4211,7 +4211,7 @@ SabaAnwar Seid MuhieYimam AlexanderFriedrich - ChrisBiemann + ChrisBiemann 198–204 We present Sense Clustering over Time (SCoT), a novel network-based tool for analysing lexical change. SCoT represents the meanings of a word as clusters of similar words. It visualises their formation, change, and demise. There are two main approaches to the exploration of dynamic networks: the discrete one compares a series of clustered graphs from separate points in time. The continuous one analyses the changes of one dynamic network over a time-span. SCoT offers a new hybrid solution. First, it aggregates time-stamped documents into intervals and calculates one sense graph per discrete interval. Then, it merges the static graphs to a new type of dynamic semantic neighbourhood graph over time. The resulting sense clusters offer uniquely detailed insights into lexical change over continuous intervals with model transparency and provenance. SCoT has been successfully used in a European study on the changing meaning of ‘crisis’. 2021.eacl-demos.23 @@ -4234,7 +4234,7 @@ <fixed-case>T</fixed-case>2<fixed-case>NER</fixed-case>: Transformers based Transfer Learning Framework for Named Entity Recognition SaadullahAmin - GuenterNeumann + GuenterNeumann 212–220 Recent advances in deep transformer models have achieved state-of-the-art in several natural language processing (NLP) tasks, whereas named entity recognition (NER) has traditionally benefited from long-short term memory (LSTM) networks. In this work, we present a Transformers based Transfer Learning framework for Named Entity Recognition (T2NER) created in PyTorch for the task of NER with deep transformer models. The framework is built upon the Transformers library as the core modeling engine and supports several transfer learning scenarios from sequential transfer to domain adaptation, multi-task learning, and semi-supervised learning. It aims to bridge the gap between the algorithmic advances in these areas by combining them with the state-of-the-art in transformer models to provide a unified platform that is readily extensible and can be used for both the transfer learning research in NER, and for real-world applications. The framework is available at: https://github.com/suamin/t2ner. 2021.eacl-demos.25 @@ -4245,22 +4245,22 @@ <fixed-case>E</fixed-case>uropean Language Grid: A Joint Platform for the <fixed-case>E</fixed-case>uropean Language Technology Community GeorgRehm - SteliosPiperidis - KalinaBontcheva - JanHajic + SteliosPiperidis + KalinaBontcheva + JanHajic VictoriaArranz - AndrejsVasiļjevs + AndrejsVasiļjevs GerhardBackfried - Jose ManuelGomez-Perez + Jose ManuelGomez-Perez UlrichGermann RémiCalizzano NilsFeldhus StefanieHegele FlorianKintzel KatrinMarheinecke - JulianMoreno-Schneider - DimitrisGalanis - PennyLabropoulou + JulianMoreno-Schneider + DimitrisGalanis + PennyLabropoulou MiltosDeligiannis KaterinaGkirtzou AthanasiaKolovou @@ -4268,10 +4268,10 @@ LeonVoukoutis IanRoberts JanaHamrlova - DusanVaris + DusanVaris LukasKacena - KhalidChoukri - ValérieMapelli + KhalidChoukri + ValérieMapelli MickaëlRigault JulijaMelnika MiroJanosik @@ -4291,7 +4291,7 @@ HidetakaKamigaito JingunKwon Young-InSong - ManabuOkumura + ManabuOkumura 231–237 One way to enhance user engagement in search engines is to suggest interesting facts to the user. Although relationships between persons are important as a target for text mining, there are few effective approaches for extracting the interesting relationships between persons. We therefore propose a method for extracting interesting relationships between persons from natural language texts by focusing on their surprisingness. Our method first extracts all personal relationships from dependency trees for the texts and then calculates surprise scores for distributed representations of the extracted relationships in an unsupervised manner. The unique point of our method is that it does not require any labeled dataset with annotation for the surprising personal relationships. The results of the human evaluation show that the proposed method could extract more interesting relationships between persons from Japanese Wikipedia articles than a popularity-based baseline method. We demonstrate our proposed method as a chrome plugin on google search. 2021.eacl-demos.27 @@ -4314,7 +4314,7 @@ Story Centaur: Large Language Model Few Shot Learning as a Creative Writing Tool - BenSwanson + BenSwanson KoryMathewson BenPietrzak SherolChen @@ -4350,10 +4350,10 @@ <fixed-case>ELITR</fixed-case> Multilingual Live Subtitling: Demo and Strategy - OndřejBojar + OndřejBojar DominikMacháček SangeetSagar - OtakarSmrž + OtakarSmrž JonášKratochvíl PeterPolák EbrahimAnsari @@ -4364,8 +4364,8 @@ IvanSimonini Thai-SonNguyen FelixSchneider - SebastianStüker - AlexWaibel + SebastianStüker + AlexWaibel BarryHaddow RicoSennrich PhilipWilliams @@ -4398,10 +4398,10 @@ Domain Expert Platform for Goal-Oriented Dialog Collection - DidzisGoško - ArtursZnotins - IngunaSkadina - NormundsGruzitis + DidzisGoško + ArtursZnotins + IngunaSkadina + NormundsGruzitis GuntaNešpore-Bērzkalne 295–301 Today, most dialogue systems are fully or partly built using neural network architectures. A crucial prerequisite for the creation of a goal-oriented neural network dialogue system is a dataset that represents typical dialogue scenarios and includes various semantic annotations, e.g. intents, slots and dialogue actions, that are necessary for training a particular neural network architecture. In this demonstration paper, we present an easy to use interface and its back-end which is oriented to domain experts for the collection of goal-oriented dialogue samples. The platform not only allows to collect or write sample dialogues in a structured way, but also provides a means for simple annotation and interpretation of the dialogues. The platform itself is language-independent; it depends only on the availability of particular language processing components for a specific language. It is currently being used to collect dialogue samples in Latvian (a highly inflected language) which represent typical communication between students and the student service. @@ -4413,7 +4413,7 @@ Which is Better for Deep Learning: Python or <fixed-case>MATLAB</fixed-case>? Answering Comparative Questions in Natural Language ViktoriiaChekalina AlexanderBondarenko - ChrisBiemann + ChrisBiemann MeriemBeloucif VarvaraLogacheva AlexanderPanchenko @@ -4438,7 +4438,7 @@ AdityaGaydhani SheenaDufresne MariaGini - SergueiPakhomov + SergueiPakhomov 321–328 Conversational Agent for Daily Living Assessment Coaching (CADLAC) is a multi-modal conversational agent system designed to impersonate “individuals” with various levels of ability in activities of daily living (ADLs: e.g., dressing, bathing, mobility, etc.) for use in training professional assessors how to conduct interviews to determine one’s level of functioning. The system is implemented on the MindMeld platform for conversational AI and features a Bidirectional Long Short-Term Memory topic tracker that allows the agent to navigate conversations spanning 18 different ADL domains, a dialogue manager that interfaces with a database of over 10,000 historical ADL assessments, a rule-based Natural Language Generation (NLG) module, and a pre-trained open-domain conversational sub-agent (based on GPT-2) for handling conversation turns outside of the 18 ADL domains. CADLAC is delivered via state-of-the-art web frameworks to handle multiple conversations and users simultaneously and is enabled with voice interface. The paper includes a description of the system design and evaluation of individual components followed by a brief discussion of current limitations and next steps. 2021.eacl-demos.38 @@ -4448,7 +4448,7 @@ <fixed-case>HULK</fixed-case>: An Energy Efficiency Benchmark Platform for Responsible Natural Language Processing XiyouZhou - ZhiyuChen + ZhiyuChen XiaoyongJin William YangWang 329–336 @@ -4461,10 +4461,10 @@ Proceedings of the 16th Conference of the European Chapter of the Association for Computational Linguistics: Student Research Workshop - Ionut-TeodorSorodoc + Ionut-TeodorSorodoc MadhumitaSushil EceTakmaz - EnekoAgirre + EnekoAgirre Association for Computational Linguistics
Online
April @@ -4534,7 +4534,7 @@ A reproduction of Apple’s bi-directional <fixed-case>LSTM</fixed-case> models for language identification in short strings MadsToftrup SørenAsger Sørensen - Manuel R.Ciosici + Manuel R.Ciosici IraAssent 36–42 Language Identification is the task of identifying a document’s language. For applications like automatic spell checker selection, language identification must use very short strings such as text message fragments. In this work, we reproduce a language identification architecture that Apple briefly sketched in a blog post. We confirm the bi-LSTM model’s performance and find that it outperforms current open-source language identifiers. We further find that its language identification mistakes are due to confusion between related languages. @@ -4589,7 +4589,7 @@ Contrasting distinct structured views to learn sentence embeddings AntoineSimoulin - BenoitCrabbé + BenoitCrabbé 71–79 We propose a self-supervised method that builds sentence embeddings from the combination of diverse explicit syntactic structures of a sentence. We assume structure is crucial to building consistent representations as we expect sentence meaning to be a function of both syntax and semantic aspects. In this perspective, we hypothesize that some linguistic representations might be better adapted given the considered task or sentence. We, therefore, propose to learn individual representation functions for different syntactic frameworks jointly. Again, by hypothesis, all such functions should encode similar semantic information differently and consequently, be complementary for building better sentential semantic embeddings. To assess such hypothesis, we propose an original contrastive multi-view framework that induces an explicit interaction between models during the training phase. We make experiments combining various structures such as dependency, constituency, or sequential schemes. Our results outperform comparable methods on several tasks from standard sentence embedding benchmarks. 2021.eacl-srw.11 @@ -4599,7 +4599,7 @@ Discrete Reasoning Templates for Natural Language Understanding HadeelAl-Negheimish - PranavaMadhyastha + PranavaMadhyastha AlessandraRusso 80–87 Reasoning about information from multiple parts of a passage to derive an answer is an open challenge for reading-comprehension models. In this paper, we present an approach that reasons about complex questions by decomposing them to simpler subquestions that can take advantage of single-span extraction reading-comprehension models, and derives the final answer according to instructions in a predefined reasoning template. We focus on subtraction based arithmetic questions and evaluate our approach on a subset of the DROP dataset. We show that our approach is competitive with the state of the art while being interpretable and requires little supervision. @@ -4747,7 +4747,7 @@ SinanKurtyigit DominikSchlechtweg JonasKuhn - SabineSchulte im Walde + SabineSchulte im Walde 192–202 Type- and token-based embedding architectures are still competing in lexical semantic change detection. The recent success of type-based models in SemEval-2020 Task 1 has raised the question why the success of token-based models on a variety of other NLP tasks does not translate to our field. We investigate the influence of a range of variables on clusterings of BERT vectors and show that its low performance is largely due to orthographic information on the target word, which is encoded even in the higher layers of BERT representations. By reducing the influence of orthography we considerably improve BERT’s performance. 2021.eacl-srw.25 @@ -4807,7 +4807,7 @@ JanNiehues ElizabethSalesky MarcoTurchi - MatteoNegri + MatteoNegri 10–13 Speech translation is the translation of speech in one language typically to text in another, traditionally accomplished through a combination of automatic speech recognition and machine translation. Speech translation has attracted interest for many years, but the recent successful applications of deep learning to both individual tasks have enabled new opportunities through joint modeling, in what we today call ‘end-to-end speech translation.’ In this tutorial we introduce the techniques used in cutting-edge research on speech translation. Starting from the traditional cascaded approach, we give an overview on data sources and model architectures to achieve state-of-the art performance with end-to-end speech translation for both high- and low-resource languages. In addition, we discuss methods to evaluate analyze the proposed solutions, as well as the challenges faced when applying speech translation models for real-world applications. 2021.eacl-tutorials.3 @@ -4816,10 +4816,10 @@ Reviewing Natural Language Processing Research - KevinCohen + KevinCohen KarënFort MargotMieskes - AurélieNévéol + AurélieNévéol AnnaRogers 14–16 The reviewing procedure has been identified as one of the major issues in the current situation of the NLP field. While it is implicitly assumed that junior researcher learn reviewing during their PhD project, this might not always be the case. Additionally, with the growing NLP community and the efforts in the context of widening the NLP community, researchers joining the field might not have the opportunity to practise reviewing. This tutorial fills in this gap by providing an opportunity to learn the basics of reviewing. Also more experienced researchers might find this tutorial interesting to revise their reviewing procedure. diff --git a/data/xml/2021.ecnlp.xml b/data/xml/2021.ecnlp.xml index 2c8e96e6d9..78e6d158ab 100644 --- a/data/xml/2021.ecnlp.xml +++ b/data/xml/2021.ecnlp.xml @@ -3,7 +3,7 @@ Proceedings of the 4th Workshop on e-Commerce and NLP - ShervinMalmasi + ShervinMalmasi SuryaKallumadi NicolaUeffing OlegRokhlenko @@ -89,7 +89,7 @@ YueLiu HengJi YangLiu - PremkumarNatarajan + PremkumarNatarajan 38–48 The growing popularity of Virtual Assistants poses new challenges for Entity Resolution, the task of linking mentions in text to their referent entities in a knowledge base. Specifically, in the shopping domain, customers tend to mention the entities implicitly (e.g., “organic milk”) rather than use the entity names explicitly, leading to a large number of candidate products. Meanwhile, for the same query, different customers may expect different results. For example, with “add milk to my cart”, a customer may refer to a certain product from his/her favorite brand, while some customers may want to re-order products they regularly purchase. Moreover, new customers may lack persistent shopping history, which requires us to enrich the connections between customers through products and their attributes. To address these issues, we propose a new framework that leverages personalized features to improve the accuracy of product ranking. We first build a cross-source heterogeneous knowledge graph from customer purchase history and product knowledge graph to jointly learn customer and product embeddings. After that, we incorporate product, customer, and history representations into a neural reranking model to predict which candidate is most likely to be purchased by a specific customer. Experiment results show that our model substantially improves the accuracy of the top ranked candidates by 24.6% compared to the state-of-the-art product search model. 2021.ecnlp-1.6 @@ -211,7 +211,7 @@ Enhancing Aspect Extraction for <fixed-case>H</fixed-case>indi ArghyaBhattacharya AlokDebnath - ManishShrivastava + ManishShrivastava 140–149 Aspect extraction is not a well-explored topic in Hindi, with only one corpus having been developed for the task. In this paper, we discuss the merits of the existing corpus in terms of quality, size, sparsity, and performance in aspect extraction tasks using established models. To provide a better baseline corpus for aspect extraction, we translate the SemEval 2014 aspect-based sentiment analysis dataset and annotate the aspects in that data. We provide rigorous guidelines and a replicable methodology for this task. We quantitatively evaluate the translations and annotations using inter-annotator agreement scores. We also evaluate our dataset using state-of-the-art neural aspect extraction models in both monolingual and multilingual settings and show that the models perform far better on our corpus than on the existing Hindi dataset. With this, we establish our corpus as the gold-standard aspect extraction dataset in Hindi. 2021.ecnlp-1.17 diff --git a/data/xml/2021.econlp.xml b/data/xml/2021.econlp.xml index f5697655f9..8c35310249 100644 --- a/data/xml/2021.econlp.xml +++ b/data/xml/2021.econlp.xml @@ -4,8 +4,8 @@ Proceedings of the Third Workshop on Economics and Natural Language Processing UdoHahn - VeroniqueHoste - AmandaStent + VeroniqueHoste + AmandaStent Association for Computational Linguistics
Punta Cana, Dominican Republic
November @@ -19,7 +19,7 @@ A Fine-Grained Annotated Corpus for Target-Based Opinion Analysis of Economic and Financial Narratives JiahuiHu - PatrickParoubek + PatrickParoubek 1–12 In this paper about aspect-based sentiment analysis (ABSA), we present the first version of a fine-grained annotated corpus for target-based opinion analysis (TBOA) to analyze economic activities or financial markets. We have annotated, at an intra-sentential level, a corpus of sentences extracted from documents representative of financial analysts’ most-read materials by considering how financial actors communicate about the evolution of event trends and analyze related publications (news, official communications, etc.). Since we focus on identifying the expressions of opinions related to the economy and financial markets, we annotated the sentences that contain at least one subjective expression about a domain-specific term. Candidate sentences for annotations were randomly chosen from texts of specialized press and professional information channels over a period ranging from 1986 to 2021. Our annotation scheme relies on various linguistic markers like domain-specific vocabulary, syntactic structures, and rhetorical relations to explicitly describe the author’s subjective stance. We investigated and evaluated the recourse to automatic pre-annotation with existing natural language processing technologies to alleviate the annotation workload. Our aim is to propose a corpus usable on the one hand as training material for the automatic detection of the opinions expressed on an extensive range of domain-specific aspects and on the other hand as a gold standard for evaluation TBOA. In this paper, we present our pre-annotation models and evaluations of their performance, introduce our annotation scheme and report on the main characteristics of our corpus. 2021.econlp-1.1 @@ -135,7 +135,7 @@ Cryptocurrency Day Trading and Framing Prediction in Microblog Discourse Anna PaulaPawlicka Maule - KristenJohnson + KristenJohnson 82–92 With 56 million people actively trading and investing in cryptocurrency online and globally in 2020, there is an increasing need for automatic social media analysis tools to help understand trading discourse and behavior. In this work, we present a dual natural language modeling pipeline which leverages language and social network behaviors for the prediction of cryptocurrency day trading actions and their associated framing patterns. This pipeline first predicts if tweets can be used to guide day trading behavior, specifically if a cryptocurrency investor should buy, sell, or hold their cryptocurrencies in order to make a profit. Next, tweets are input to an unsupervised deep clustering approach to automatically detect trading framing patterns. Our contributions include the modeling pipeline for this novel task, a new Cryptocurrency Tweets Dataset compiled from influential accounts, and a Historical Price Dataset. Our experiments show that our approach achieves an 88.78% accuracy for day trading behavior prediction and reveals framing fluctuations prior to and during the COVID-19 pandemic that could be used to guide investment actions. 2021.econlp-1.11 diff --git a/data/xml/2021.emnlp.xml b/data/xml/2021.emnlp.xml index 82c7e08686..2b05d3b65f 100644 --- a/data/xml/2021.emnlp.xml +++ b/data/xml/2021.emnlp.xml @@ -3,10 +3,10 @@ Proceedings of the 2021 Conference on Empirical Methods in Natural Language Processing - Marie-FrancineMoens - XuanjingHuang + Marie-FrancineMoens + XuanjingHuang LuciaSpecia - Scott Wen-tauYih + Scott Wen-tauYih Association for Computational Linguistics
Online and Punta Cana, Dominican Republic
November @@ -68,7 +68,7 @@ DanLiu MenggeDu XiaoxiLi - YaLi + YaLi EnhongChen 39–55 This paper proposes a novel architecture, Cross Attention Augmented Transducer (CAAT), for simultaneous translation. The framework aims to jointly optimize the policy and translation models. To effectively consider all possible READ-WRITE simultaneous translation action paths, we adapt the online automatic speech recognition (ASR) model, RNN-T, but remove the strong monotonic constraint, which is critical for the translation task to consider reordering. To make CAAT work, we introduce a novel latency loss whose expectation can be optimized by a forward-backward algorithm. We implement CAAT with Transformer while the general CAAT architecture can also be implemented with other attention-based encoder-decoder frameworks. Experiments on both speech-to-text (S2T) and text-to-text (T2T) simultaneous translation tasks show that CAAT achieves significantly better latency-quality trade-offs compared to the state-of-the-art simultaneous translation approaches. @@ -95,7 +95,7 @@ YunlongLiang ChulunZhou FandongMeng - JinanXu + JinanXu YufengChen JinsongSu JieZhou @@ -123,7 +123,7 @@ Controllable Neural Dialogue Summarization with Personal Named Entity Planning ZhengyuanLiu - NancyChen + NancyChen 92–106 In this paper, we propose a controllable neural generation framework that can flexibly guide dialogue summarization with personal named entity planning. The conditional sequences are modulated to decide what types of information or what perspective to focus on when forming summaries to tackle the under-constrained problem in summarization tasks. This framework supports two types of use cases: (1) Comprehensive Perspective, which is a general-purpose case with no user-preference specified, considering summary points from all conversational interlocutors and all mentioned persons; (2) Focus Perspective, positioning the summary based on a user-specified personal named entity, which could be one of the interlocutors or one of the persons mentioned in the conversation. During training, we exploit occurrence planning of personal named entities and coreference information to improve temporal coherence and to minimize hallucination in neural generation. Experimental results show that our proposed framework generates fluent and factually consistent summaries under various planning controls using both objective metrics and human evaluations. 2021.emnlp-main.8 @@ -159,7 +159,7 @@ BaoyuJing ZeyuYou TaoYang - WeiFan + WeiFan HanghangTong 133–139 Extractive text summarization aims at extracting the most representative sentences from a given document as its summary. To extract a good summary from a long text document, sentence embedding plays an important role. Recent studies have leveraged graph neural networks to capture the inter-sentential relationship (e.g., the discourse graph) within the documents to learn contextual sentence embedding. However, those approaches neither consider multiple types of inter-sentential relationships (e.g., semantic similarity and natural connection relationships), nor model intra-sentential relationships (e.g, semantic similarity and syntactic relationship among words). To address these problems, we propose a novel Multiplex Graph Convolutional Network (Multi-GCN) to jointly model different types of relationships among sentences and words. Based on Multi-GCN, we propose a Multiplex Graph Summarization (Multi-GraS) model for extractive text summarization. Finally, we evaluate the proposed models on the CNN/DailyMail benchmark dataset to demonstrate effectiveness of our method. @@ -187,7 +187,7 @@ YaoWan CongyingXia LifangHe - PhilipYu + PhilipYu 146–154 To capture the semantic graph structure from raw text, most existing summarization approaches are built on GNNs with a pre-trained model. However, these methods suffer from cumbersome procedures and inefficient computations for long-text documents. To mitigate these issues, this paper proposes HetFormer, a Transformer-based pre-trained model with multi-granularity sparse attentions for long-text extractive summarization. Specifically, we model different types of semantic nodes in raw text as a potential heterogeneous graph and directly learn heterogeneous relationships (edges) among nodes by Transformer. Extensive experiments on both single- and multi-document summarization tasks show that HetFormer achieves state-of-the-art performance in Rouge F1 while using less memory and fewer parameters. 2021.emnlp-main.13 @@ -238,7 +238,7 @@ A Partition Filter Network for Joint Entity and Relation Extraction ZhihengYan - ChongZhang + ChongZhang JinlanFu QiZhang ZhongyuWei @@ -312,7 +312,7 @@ Learning Implicit Sentiment in Aspect-based Sentiment Analysis with Supervised Contrastive Pre-Training ZhengyanLi YichengZou - ChongZhang + ChongZhang QiZhang ZhongyuWei 246–256 @@ -448,7 +448,7 @@ Few-Shot Text Generation with Natural Language Instructions TimoSchick - HinrichSchütze + HinrichSchütze 390–402 Providing pretrained language models with simple task descriptions in natural language enables them to solve some tasks in a fully unsupervised fashion. Moreover, when combined with regular learning from examples, this idea yields impressive few-shot results for a wide range of text classification tasks. It is also a promising direction to improve data efficiency in generative settings, but there are several challenges to using a combination of task descriptions and example-based learning for text generation. In particular, it is crucial to find task descriptions that are easy to understand for the pretrained model and to ensure that it actually makes good use of them; furthermore, effective measures against overfitting have to be implemented. In this paper, we show how these challenges can be tackled: We introduce GenPET, a method for text generation that is based on pattern-exploiting training, a recent approach for combining textual instructions with supervised learning that only works for classification tasks. On several summarization and headline generation datasets, GenPET gives consistent improvements over strong baselines in few-shot settings. 2021.emnlp-main.32 @@ -533,7 +533,7 @@ Inducing Stereotypical Character Roles from Plot Structure LabibaJahan RahulMittal - MarkFinlayson + MarkFinlayson 492–497 Stereotypical character roles-also known as archetypes or dramatis personae-play an important function in narratives: they facilitate efficient communication with bundles of default characteristics and associations and ease understanding of those characters’ roles in the overall narrative. We present a fully unsupervised k-means clustering approach for learning stereotypical roles given only structural plot information. We demonstrate the technique on Vladimir Propp’s structural theory of Russian folktales (captured in the extended ProppLearner corpus, with 46 tales), showing that our approach can induce six out of seven of Propp’s dramatis personae with F1 measures of up to 0.70 (0.58 average), with an additional category for minor characters. We have explored various feature sets and variations of a cluster evaluation method. The best-performing feature set comprises plot functions, unigrams, tf-idf weights, and embeddings over coreference chain heads. Roles that are mentioned more often (Hero, Villain), or have clearly distinct plot patterns (Princess) are more strongly differentiated than less frequent or distinct roles (Dispatcher, Helper, Donor). Detailed error analysis suggests that the quality of the coreference chain and plot functions annotations are critical for this task. We provide all our data and code for reproducibility. 2021.emnlp-main.39 @@ -570,7 +570,7 @@ Mitigating Language-Dependent Ethnic Bias in <fixed-case>BERT</fixed-case> JaimeenAhn - AliceOh + AliceOh 533–549 In this paper, we study ethnic bias and how it varies across languages by analyzing and mitigating ethnic bias in monolingual BERT for English, German, Spanish, Korean, Turkish, and Chinese. To observe and quantify ethnic bias, we develop a novel metric called Categorical Bias score. Then we propose two methods for mitigation; first using a multilingual model, and second using contextual word alignment of two monolingual models. We compare our proposed methods with monolingual BERT and show that these methods effectively alleviate the ethnic bias. Which of the two methods works better depends on the amount of NLP resources available for that language. We additionally experiment with Arabic and Greek to verify that our proposed methods work for a wider variety of languages. 2021.emnlp-main.42 @@ -600,7 +600,7 @@ GabrielPoesia RobertHawkins DorsaSadigh - NoahGoodman + NoahGoodman 563–570 An overarching goal of natural language processing is to enable machines to communicate seamlessly with humans. However, natural language can be ambiguous or unclear. In cases of uncertainty, humans engage in an interactive process known as repair: asking questions and seeking clarification until their uncertainty is resolved. We propose a framework for building a visually grounded question-asking model capable of producing polar (yes-no) clarification questions to resolve misunderstandings in dialogue. Our model uses an expected information gain objective to derive informative questions from an off-the-shelf image captioner without requiring any supervised question-answer data. We demonstrate our model’s ability to pose questions that improve communicative success in a goal-oriented 20 questions game with synthetic and human answerers. 2021.emnlp-main.44 @@ -637,7 +637,7 @@ <fixed-case>T</fixed-case>ext2<fixed-case>M</fixed-case>ol: Cross-Modal Molecule Retrieval with Natural Language Queries CarlEdwards - ChengXiangZhai + ChengXiangZhai HengJi 595–607 We propose a new task, Text2Mol, to retrieve molecules using natural language descriptions as queries. Natural language and molecules encode information in very different ways, which leads to the exciting but challenging problem of integrating these two very different modalities. Although some work has been done on text-based retrieval and structure-based retrieval, this new task requires integrating molecules and natural language more directly. Moreover, this can be viewed as an especially challenging cross-lingual retrieval problem by considering the molecules as a language with a very unique grammar. We construct a paired dataset of molecules and their corresponding text descriptions, which we use to learn an aligned common semantic embedding space for retrieval. We extend this to create a cross-modal attention-based model for explainability and reranking by interpreting the attentions as association rules. We also employ an ensemble approach to integrate our different architectures, which significantly improves results from 0.372 to 0.499 MRR. This new multimodal approach opens a new perspective on solving problems in chemistry literature understanding and molecular machine learning. @@ -731,7 +731,7 @@ ShiunzuKuo VikasBhardwaj AnujKumar - MichaelWhite + MichaelWhite 682–697 We propose a novel framework to train models to classify acceptability of responses generated by natural language generation (NLG) models, improving upon existing sentence transformation and model-based approaches. An NLG response is considered acceptable if it is both semantically correct and grammatical. We don’t make use of any human references making the classifiers suitable for runtime deployment. Training data for the classifiers is obtained using a 2-stage approach of first generating synthetic data using a combination of existing and new model-based approaches followed by a novel validation framework to filter and sort the synthetic data into acceptable and unacceptable classes. Our 2-stage approach adapts to a wide range of data representations and does not require additional data beyond what the NLG models are trained on. It is also independent of the underlying NLG model architecture, and is able to generate more realistic samples close to the distribution of the NLG model-generated responses. We present results on 5 datasets (WebNLG, Cleaned E2E, ViGGO, Alarm, and Weather) with varying data representations. We compare our framework with existing techniques that involve synthetic data generation using simple sentence transformations and/or model-based techniques, and show that building acceptability classifiers using data that resembles the generation model outputs followed by a validation framework outperforms the existing techniques, achieving state-of-the-art results. We also show that our techniques can be used in few-shot settings using self-training. 2021.emnlp-main.53 @@ -795,7 +795,7 @@ Learning Compact Metrics for <fixed-case>MT</fixed-case> AmyPu Hyung WonChung - AnkurParikh + AnkurParikh SebastianGehrmann ThibaultSellam 751–762 @@ -810,7 +810,7 @@ The Impact of Positional Encodings on Multilingual Compression VinitRavishankar - AndersSøgaard + AndersSøgaard 763–777 In order to preserve word-order information in a non-autoregressive setting, transformer architectures tend to include positional knowledge, by (for instance) adding positional encodings to token embeddings. Several modifications have been proposed over the sinusoidal positional encodings used in the original transformer architecture; these include, for instance, separating position encodings and token embeddings, or directly modifying attention weights based on the distance between word pairs. We first show that surprisingly, while these modifications tend to improve monolingual language models, none of them result in better multilingual language models. We then answer why that is: sinusoidal encodings were explicitly designed to facilitate compositionality by allowing linear projections over arbitrary time steps. Higher variances in multilingual training distributions requires higher compression, in which case, compositionality becomes indispensable. Learned absolute positional encodings (e.g., in mBERT) tend to approximate sinusoidal embeddings in multilingual settings, but more complex positional encoding architectures lack the inductive bias to effectively learn cross-lingual alignment. In other words, while sinusoidal positional encodings were designed for monolingual applications, they are particularly useful in multilingual language models. 2021.emnlp-main.59 @@ -822,7 +822,7 @@ Disentangling Representations of Text by Masking Transformers XiongyiZhang Jan-Willemvan de Meent - ByronWallace + ByronWallace 778–791 Representations from large pretrained models such as BERT encode a range of features into monolithic vectors, affording strong predictive accuracy across a range of downstream tasks. In this paper we explore whether it is possible to learn disentangled representations by identifying existing subnetworks within pretrained models that encode distinct, complementary aspects. Concretely, we learn binary masks over transformer weights or hidden units to uncover subsets of features that correlate with a specific factor of variation; this eliminates the need to train a disentangled model from scratch for a particular task. We evaluate this method with respect to its ability to disentangle representations of sentiment from genre in movie reviews, toxicity from dialect in Tweets, and syntax from semantics. By combining masking with magnitude pruning we find that we can identify sparse subnetworks within BERT that strongly encode particular aspects (e.g., semantics) while only weakly encoding others (e.g., syntax). Moreover, despite only learning masks, disentanglement-via-masking performs as well as — and often better than —previously proposed methods based on variational autoencoders and adversarial training. 2021.emnlp-main.60 @@ -860,7 +860,7 @@ CoryPaik StéphaneAroca-Ouellette AlessandroRoncone - KatharinaKann + KatharinaKann 823–835 Recent work has raised concerns about the inherent limitations of text-only pretraining. In this paper, we first demonstrate that reporting bias, the tendency of people to not state the obvious, is one of the causes of this limitation, and then investigate to what extent multimodal training can mitigate this issue. To accomplish this, we 1) generate the Color Dataset (CoDa), a dataset of human-perceived color distributions for 521 common objects; 2) use CoDa to analyze and compare the color distribution found in text, the distribution captured by language models, and a human’s perception of color; and 3) investigate the performance differences between text-only and multimodal models on CoDa. Our results show that the distribution of colors that a language model recovers correlates more strongly with the inaccurate distribution found in text than with the ground-truth, supporting the claim that reporting bias negatively impacts and inherently limits text-only training. We then demonstrate that multimodal models can leverage their visual training to mitigate these effects, providing a promising avenue for future research. 2021.emnlp-main.63 @@ -910,7 +910,7 @@ Jump-Starting Item Parameters for Adaptive Language Tests - Arya D.McCarthy + Arya D.McCarthy Kevin P.Yancey Geoffrey T.LaFlair JesseEgbert @@ -948,7 +948,7 @@ MatúšFalis HangDong AlexandraBirch - BeatriceAlex + BeatriceAlex 907–912 Large-Scale Multi-Label Text Classification (LMTC) includes tasks with hierarchical label spaces, such as automatic assignment of ICD-9 codes to discharge summaries. Performance of models in prior art is evaluated with standard precision, recall, and F1 measures without regard for the rich hierarchical structure. In this work we argue for hierarchical evaluation of the predictions of neural LMTC models. With the example of the ICD-9 ontology we describe a structural issue in the representation of the structured label space in prior art, and propose an alternative representation based on the depth of the ontology. We propose a set of metrics for hierarchical evaluation using the depth-based representation. We compare the evaluation scores from the proposed metrics with previously used metrics on prior art LMTC models for ICD-9 coding in MIMIC-III. We also propose further avenues of research involving the proposed ontological representation. 2021.emnlp-main.69 @@ -1018,7 +1018,7 @@ PatrickHaller LenaJäger RyanCotterell - RogerLevy + RogerLevy 963–980 The uniform information density (UID) hypothesis posits a preference among language users for utterances structured such that information is distributed uniformly across a signal. While its implications on language production have been well explored, the hypothesis potentially makes predictions about language comprehension and linguistic acceptability as well. Further, it is unclear how uniformity in a linguistic signal—or lack thereof—should be measured, and over which linguistic unit, e.g., the sentence or language level, this uniformity should hold. Here we investigate these facets of the UID hypothesis using reading time and acceptability data. While our reading time results are generally consistent with previous work, they are also consistent with a weakly super-linear effect of surprisal, which would be compatible with UID’s predictions. For acceptability judgments, we find clearer evidence that non-uniformity in information density is predictive of lower acceptability. We then explore multiple operationalizations of UID, motivated by different interpretations of the original hypothesis, and analyze the scope over which the pressure towards uniformity is exerted. The explanatory power of a subset of the proposed operationalizations suggests that the strongest trend may be a regression towards a mean surprisal across the language, rather than the phrase, sentence, or document—a finding that supports a typical interpretation of UID, namely that it is the byproduct of language users maximizing the use of a (hypothetical) communication channel. 2021.emnlp-main.74 @@ -1068,7 +1068,7 @@ Kyoung-RokJang JunmoKang GiwonHong - Sung-HyonMyaeng + Sung-HyonMyaeng JooheePark TaewonYoon HeecheolSeo @@ -1161,7 +1161,7 @@ <fixed-case>M</fixed-case>ind<fixed-case>C</fixed-case>raft: Theory of Mind Modeling for Situated Dialogue in Collaborative Tasks Cristian-PaulBara SkyCH-Wang - JoyceChai + JoyceChai 1112–1125 An ideal integration of autonomous agents in a human world implies that they are able to collaborate on human terms. In particular, theory of mind plays an important role in maintaining common ground during human collaboration and communication. To enable theory of mind modeling in situated interactions, we introduce a fine-grained dataset of collaborative tasks performed by pairs of human subjects in the 3D virtual blocks world of Minecraft. It provides information that captures partners’ beliefs of the world and of each other as an interaction unfolds, bringing abundant opportunities to study human collaborative behaviors in situated language communication. As a first step towards our goal of developing embodied AI agents able to infer belief states of collaborative partners in situ, we build and present results on computational models for several theory of mind tasks. 2021.emnlp-main.85 @@ -1187,7 +1187,7 @@ Cross-lingual Intermediate Fine-tuning improves Dialogue State Tracking NikitaMoghe - MarkSteedman + MarkSteedman AlexandraBirch 1137–1150 Recent progress in task-oriented neural dialogue systems is largely focused on a handful of languages, as annotation of training data is tedious and expensive. Machine translation has been used to make systems multilingual, but this can introduce a pipeline of errors. Another promising solution is using cross-lingual transfer learning through pretrained multilingual models. Existing methods train multilingual models with additional code-mixed task data or refine the cross-lingual representations through parallel ontologies. In this work, we enhance the transfer learning process by intermediate fine-tuning of pretrained multilingual models, where the multilingual models are fine-tuned with different but related data and/or tasks. Specifically, we use parallel and conversational movie subtitles datasets to design cross-lingual intermediate tasks suitable for downstream dialogue tasks. We use only 200K lines of parallel data for intermediate fine-tuning which is already available for 1782 language pairs. We test our approach on the cross-lingual dialogue state tracking task for the parallel MultiWoZ (English -> Chinese, Chinese -> English) and Multilingual WoZ (English -> German, English -> Italian) datasets. We achieve impressive improvements (> 20% on joint goal accuracy) on the parallel MultiWoZ dataset and the Multilingual WoZ dataset over the vanilla baseline with only 10% of the target language task data and zero-shot setup respectively. @@ -1254,10 +1254,10 @@ Label Verbalization and Entailment for Effective Zero and Few-Shot Relation Extraction OscarSainz - OierLopez de Lacalle - GorkaLabaka + OierLopez de Lacalle + GorkaLabaka AnderBarrena - EnekoAgirre + EnekoAgirre 1199–1212 Relation extraction systems require large amounts of labeled examples which are costly to annotate. In this work we reformulate relation extraction as an entailment task, with simple, hand-made, verbalizations of relations produced in less than 15 min per relation. The system relies on a pretrained textual entailment engine which is run as-is (no training examples, zero-shot) or further fine-tuned on labeled examples (few-shot or fully trained). In our experiments on TACRED we attain 63% F1 zero-shot, 69% with 16 examples per relation (17% points better than the best supervised system on the same conditions), and only 4 points short to the state-of-the-art (which uses 20 times more training data). We also show that the performance can be improved significantly with larger entailment models, up to 12 points in zero-shot, allowing to report the best results to date on TACRED when fully trained. The analysis shows that our few-shot systems are specially effective when discriminating between relations, and that the performance difference in low data regimes comes mainly from identifying no-relation cases. 2021.emnlp-main.92 @@ -1299,7 +1299,7 @@ JiangtaoFeng LinQiu HaoZhou - WeinanZhang + WeinanZhang YongYu LeiLi 1239–1250 @@ -1368,7 +1368,7 @@ Evaluating the Evaluation Metrics for Style Transfer: A Case Study in Multilingual Formality Transfer EleftheriaBriakou SwetaAgrawal - JoelTetreault + JoelTetreault MarineCarpuat 1321–1336 While the field of style transfer (ST) has been growing rapidly, it has been hampered by a lack of standardized practices for automatic evaluation. In this paper, we evaluate leading automatic metrics on the oft-researched task of formality style transfer. Unlike previous evaluations, which focus solely on English, we expand our focus to Brazilian-Portuguese, French, and Italian, making this work the first multilingual evaluation of metrics in ST. We outline best practices for automatic evaluation in (formality) style transfer and identify several models that correlate well with human judgments and are robust across languages. We hope that this work will help accelerate development in ST, where human evaluation is often challenging to collect. @@ -1397,7 +1397,7 @@ Understanding Politics via Contextualized Discourse Processing - RajkumarPujari + RajkumarPujari DanGoldwasser 1353–1367 Politicians often have underlying agendas when reacting to events. Arguments in contexts of various events reflect a fairly consistent set of agendas for a given entity. In spite of recent advances in Pretrained Language Models, those text representations are not designed to capture such nuanced patterns. In this paper, we propose a Compositional Reader model consisting of encoder and composer modules, that captures and leverages such information to generate more effective representations for entities, issues, and events. These representations are contextualized by tweets, press releases, issues, news articles, and participating entities. Our model processes several documents at once and generates composed representations for multiple entities over several issues or events. Via qualitative and quantitative empirical analysis, we show that these representations are meaningful and effective. @@ -1448,7 +1448,7 @@ Focus on what matters: Applying Discourse Coherence Theory to Cross Document Coreference WilliamHeld DanIter - DanJurafsky + DanJurafsky 1406–1417 Performing event and entity coreference resolution across documents vastly increases the number of candidate mentions, making it intractable to do the full n^2 pairwise comparisons. Existing approaches simplify by considering coreference only within document clusters, but this fails to handle inter-cluster coreference, common in many applications. As a result cross-document coreference algorithms are rarely applied to downstream tasks. We draw on an insight from discourse coherence theory: potential coreferences are constrained by the reader’s discourse focus. We model the entities/events in a reader’s focus as a neighborhood within a learned latent embedding space which minimizes the distance between mentions and the centroids of their gold coreference clusters. We then use these neighborhoods to sample only hard negatives to train a fine-grained classifier on mention pairs and their local discourse features. Our approach achieves state-of-the-art results for both events and entities on the ECB+, Gun Violence, Football Coreference, and Cross-Domain Cross-Document Coreference corpora. Furthermore, training on multiple corpora improves average performance across all datasets by 17.2 F1 points, leading to a robust coreference resolution model that is now feasible to apply to downstream tasks. 2021.emnlp-main.106 @@ -1500,7 +1500,7 @@ <fixed-case>R</fixed-case>ule<fixed-case>BERT</fixed-case>: Teaching Soft Rules to Pre-Trained Language Models MohammedSaeed NaserAhmadi - PreslavNakov + PreslavNakov PaoloPapotti 1460–1476 While pre-trained language models (PLMs) are the go-to solution to tackle many natural language processing problems, they are still very limited in their ability to capture and to use common-sense knowledge. In fact, even if information is available in the form of approximate (soft) logical rules, it is not clear how to transfer it to a PLM in order to improve its performance for deductive reasoning tasks. Here, we aim to bridge this gap by teaching PLMs how to reason with soft Horn rules. We introduce a classification task where, given facts and soft rules, the PLM should return a prediction with a probability for a given hypothesis. We release the first dataset for this task, and we propose a revised loss function that enables the PLM to learn how to predict precise probabilities for the task. Our evaluation results show that the resulting fine-tuned models achieve very high performance, even on logical rules that were unseen at training. Moreover, we demonstrate that logical notions expressed by the rules are transferred to the fine-tuned model, yielding state-of-the-art results on external datasets. @@ -1536,9 +1536,9 @@ Shortcutted Commonsense: Data Spuriousness in Deep Learning of Commonsense Reasoning RubenBranco - AntónioBranco - JoãoAntónio Rodrigues - João RicardoSilva + AntónioBranco + JoãoAntónio Rodrigues + João RicardoSilva 1504–1521 Commonsense is a quintessential human capacity that has been a core challenge to Artificial Intelligence since its inception. Impressive results in Natural Language Processing tasks, including in commonsense reasoning, have consistently been achieved with Transformer neural language models, even matching or surpassing human performance in some benchmarks. Recently, some of these advances have been called into question: so called data artifacts in the training data have been made evident as spurious correlations and shallow shortcuts that in some cases are leveraging these outstanding results. In this paper we seek to further pursue this analysis into the realm of commonsense related language processing tasks. We undertake a study on different prominent benchmarks that involve commonsense reasoning, along a number of key stress experiments, thus seeking to gain insight on whether the models are learning transferable generalizations intrinsic to the problem at stake or just taking advantage of incidental shortcuts in the data items. The results obtained indicate that most datasets experimented with are problematic, with models resorting to non-robust features and appearing not to be learning and generalizing towards the overall tasks intended to be conveyed or exemplified by the datasets. 2021.emnlp-main.113 @@ -1647,7 +1647,7 @@ JohnHewitt KawinEthayarajh PercyLiang - ChristopherManning + ChristopherManning 1626–1639 Probing experiments investigate the extent to which neural representations make properties—like part-of-speech—predictable. One suggests that a representation encodes a property if probing that representation produces higher accuracy than probing a baseline representation like non-contextual word embeddings. Instead of using baselines as a point of comparison, we’re interested in measuring information that is contained in the representation but not in the baseline. For example, current methods can detect when a representation is more useful than the word identity (a baseline) for predicting part-of-speech; however, they cannot detect when the representation is predictive of just the aspects of part-of-speech not explainable by the word identity. In this work, we extend a theory of usable information called V-information and propose conditional probing, which explicitly conditions on the information in the baseline. In a case study, we find that after conditioning on non-contextual word embeddings, properties like part-of-speech are accessible at deeper layers of a network than previously thought. 2021.emnlp-main.122 @@ -1657,10 +1657,10 @@ <fixed-case>GFST</fixed-case>: <fixed-case>G</fixed-case>ender-Filtered Self-Training for More Accurate Gender in Translation - Prafulla KumarChoubey + Prafulla KumarChoubey AnnaCurrey PrashantMathur - GeorgianaDinu + GeorgianaDinu 1640–1654 Targeted evaluations have found that machine translation systems often output incorrect gender in translations, even when the gender is clear from context. Furthermore, these incorrectly gendered translations have the potential to reflect or amplify social biases. We propose gender-filtered self-training (GFST) to improve gender translation accuracy on unambiguously gendered inputs. Our GFST approach uses a source monolingual corpus and an initial model to generate gender-specific pseudo-parallel corpora which are then filtered and added to the training data. We evaluate GFST on translation from English into five languages, finding that it improves gender accuracy without damaging generic quality. We also show the viability of GFST on several experimental settings, including re-training from scratch, fine-tuning, controlling the gender balance of the data, forward translation, and back-translation. 2021.emnlp-main.123 @@ -1672,7 +1672,7 @@ “Wikily” Supervised Neural Translation Tailored to Cross-Lingual Tasks Mohammad SadeghRasooli ChrisCallison-Burch - Derry TantiWijaya + Derry TantiWijaya 1655–1670 We present a simple but effective approach for leveraging Wikipedia for neural machine translation as well as cross-lingual tasks of image captioning and dependency parsing without using any direct supervision from external parallel data or supervised models in the target language. We show that first sentences and titles of linked Wikipedia pages, as well as cross-lingual image captions, are strong signals for a seed parallel data to extract bilingual dictionaries and cross-lingual word embeddings for mining parallel text from Wikipedia. Our final model achieves high BLEU scores that are close to or sometimes higher than strong supervised baselines in low-resource languages; e.g. supervised BLEU of 4.0 versus 12.1 from our model in English-to-Kazakh. Moreover, we tailor our wikily translation models to unsupervised image captioning, and cross-lingual dependency parser transfer. In image captioning, we train a multi-tasking machine translation and image captioning pipeline for Arabic and English from which the Arabic training data is a wikily translation of the English captioning data. Our captioning results on Arabic are slightly better than that of its supervised model. In dependency parsing, we translate a large amount of monolingual text, and use it as an artificial training data in an annotation projection framework. We show that our model outperforms recent work on cross-lingual transfer of dependency parsers. 2021.emnlp-main.124 @@ -1688,7 +1688,7 @@ ShaohanHuang SakshamSinghal Xian-LingMao - HeyanHuang + HeyanHuang XiaSong FuruWei 1671–1683 @@ -1701,7 +1701,7 @@ Improving Zero-Shot Cross-Lingual Transfer Learning via Robust Training Kuan-HaoHuang - WasiAhmad + WasiAhmad NanyunPeng Kai-WeiChang 1684–1697 @@ -1715,7 +1715,7 @@ Speechformer: Reducing Information Loss in Direct Speech Translation SaraPapi MarcoGaido - MatteoNegri + MatteoNegri MarcoTurchi 1698–1706 Transformer-based models have gained increasing popularity achieving state-of-the-art performance in many research fields including speech translation. However, Transformer’s quadratic complexity with respect to the input sequence length prevents its adoption as is with audio signals, which are typically represented by long sequences. Current solutions resort to an initial sub-optimal compression based on a fixed sampling of raw audio features. Therefore, potentially useful linguistic information is not accessible to higher-level layers in the architecture. To solve this issue, we propose Speechformer, an architecture that, thanks to reduced memory usage in the attention layers, avoids the initial lossy compression and aggregates information only at a higher level according to more informed linguistic criteria. Experiments on three language pairs (en→de/es/nl) show the efficacy of our solution, with gains of up to 0.8 BLEU on the standard MuST-C corpus and of up to 4.0 BLEU in a low resource scenario. @@ -1728,7 +1728,7 @@ Is “moby dick” a Whale or a Bird? Named Entities and Terminology in Speech Translation MarcoGaido SusanaRodríguez - MatteoNegri + MatteoNegri LuisaBentivogli MarcoTurchi 1707–1716 @@ -1754,7 +1754,7 @@ Translation-based Supervision for Policy Generation in Simultaneous Neural Machine Translation AshkanAlinejad - Hassan S.Shavarani + Hassan S.Shavarani AnoopSarkar 1734–1744 In simultaneous machine translation, finding an agent with the optimal action sequence of reads and writes that maintain a high level of translation quality while minimizing the average lag in producing target tokens remains an extremely challenging problem. We propose a novel supervised learning approach for training an agent that can detect the minimum number of reads required for generating each target token by comparing simultaneous translations against full-sentence translations during training to generate oracle action sequences. These oracle sequences can then be used to train a supervised model for action generation at inference time. Our approach provides an alternative to current heuristic methods in simultaneous translation by introducing a new training objective, which is easier to train than previous attempts at training the agent using reinforcement learning techniques for this task. Our experimental results show that our novel training method for action generation produces much higher quality translations while minimizing the average lag in simultaneous translation. @@ -1793,7 +1793,7 @@ VivekRamanujan YoavGoldberg RoySchwartz - Noah A.Smith + Noah A.Smith 1766–1781 The capacity of neural networks like the widely adopted transformer is known to be very high. Evidence is emerging that they learn successfully due to inductive bias in the training routine, typically a variant of gradient descent (GD). To better understand this bias, we study the tendency for transformer parameters to grow in magnitude (\ell_2 norm) during training, and its implications for the emergent representations within self attention layers. Empirically, we document norm growth in the training of transformer language models, including T5 during its pretraining. As the parameters grow in magnitude, we prove that the network approximates a discretized network with saturated activation functions. Such “saturated” networks are known to have a reduced capacity compared to the full network family that can be described in terms of formal languages and automata. Our results suggest saturation is a new characterization of an inductive bias implicit in GD of particular interest for NLP. We leverage the emergent discrete structure in a saturated transformer to analyze the role of different attention heads, finding that some focus locally on a small number of positions, while other heads compute global averages, allowing counting. We believe understanding the interplay between these two capabilities may shed further light on the structure of computation within large transformers. 2021.emnlp-main.133 @@ -1819,10 +1819,10 @@ MattGardner WilliamMerrill JesseDodge - MatthewPeters + MatthewPeters AlexisRoss SameerSingh - Noah A.Smith + Noah A.Smith 1801–1813 Much recent work in NLP has documented dataset artifacts, bias, and spurious correlations between input features and output labels. However, how to tell which features have “spurious” instead of legitimate correlations is typically left unspecified. In this work we argue that for complex language understanding tasks, all simple feature correlations are spurious, and we formalize this notion into a class of problems which we call competency problems. For example, the word “amazing” on its own should not give information about a sentiment label independent of the context in which it appears, which could include negation, metaphor, sarcasm, etc. We theoretically analyze the difficulty of creating data for competency problems when human bias is taken into account, showing that realistic datasets will increasingly deviate from competency problems as dataset size increases. This analysis gives us a simple statistical test for dataset artifacts, which we use to show more subtle biases than were described in prior work, including demonstrating that models are inappropriately affected by these less extreme biases. Our theoretical treatment of this problem also allows us to analyze proposed solutions, such as making local edits to dataset instances, and to give recommendations for future data collection and model design efforts that target competency problems. 2021.emnlp-main.135 @@ -1836,7 +1836,7 @@ HuaxiuYao Ying-xinWu MaruanAl-Shedivat - EricXing + EricXing 1814–1821 Meta-learning has achieved great success in leveraging the historical learned knowledge to facilitate the learning process of the new task. However, merely learning the knowledge from the historical tasks, adopted by current meta-learning algorithms, may not generalize well to testing tasks when they are not well-supported by training tasks. This paper studies a low-resource text classification problem and bridges the gap between meta-training and meta-testing tasks by leveraging the external knowledge bases. Specifically, we propose KGML to introduce additional representation for each sentence learned from the extracted sentence-specific knowledge graph. The extensive experiments on three datasets demonstrate the effectiveness of KGML under both supervised adaptation and unsupervised adaptation settings. 2021.emnlp-main.136 @@ -1848,7 +1848,7 @@ Sentence Bottleneck Autoencoders from Transformer Language Models IvanMontero NikolaosPappas - Noah A.Smith + Noah A.Smith 1822–1831 Representation learning for text via pretraining a language model on a large corpus has become a standard starting point for building NLP systems. This approach stands in contrast to autoencoders, also trained on raw text, but with the objective of learning to encode each input as a vector that allows full reconstruction. Autoencoders are attractive because of their latent space structure and generative properties. We therefore explore the construction of a sentence-level autoencoder from a pretrained, frozen transformer language model. We adapt the masked language modeling objective as a generative, denoising one, while only training a sentence bottleneck and a single-layer modified transformer decoder. We demonstrate that the sentence representations discovered by our model achieve better quality than previous methods that extract representations from pretrained transformers on text similarity tasks, style transfer (an example of controlled generation), and single-sentence classification tasks in the GLUE benchmark, while using fewer parameters than large pretrained models. 2021.emnlp-main.137 @@ -1860,7 +1860,7 @@ Efficient Contrastive Learning via Novel Data Augmentation and Curriculum Learning SeonghyeonYe JiseonKim - AliceOh + AliceOh 1832–1838 We introduce EfficientCL, a memory-efficient continual pretraining method that applies contrastive learning with novel data augmentation and curriculum learning. For data augmentation, we stack two types of operation sequentially: cutoff and PCA jittering. While pretraining steps proceed, we apply curriculum learning by incrementing the augmentation degree for each difficulty step. After data augmentation is finished, contrastive learning is applied on projected embeddings of original and augmented examples. When finetuned on GLUE benchmark, our model outperforms baseline models, especially for sentence-level tasks. Additionally, this improvement is capable with only 70% of computational memory compared to the baseline model. 2021.emnlp-main.138 @@ -1886,7 +1886,7 @@ ZeqiuWu Bo-RuLu HannanehHajishirzi - MariOstendorf + MariOstendorf 1852–1863 Identifying relevant knowledge to be used in conversational systems that are grounded in long documents is critical to effective response generation. We introduce a knowledge identification model that leverages the document structure to provide dialogue-contextualized passage encodings and better locate knowledge relevant to the conversation. An auxiliary loss captures the history of dialogue-document connections. We demonstrate the effectiveness of our model on two document-grounded conversational datasets and provide analyses showing generalization to unseen documents and long dialogue contexts. 2021.emnlp-main.140 @@ -1953,14 +1953,14 @@ Few-Shot Intent Detection via Contrastive Pre-Training and Fine-Tuning JianguoZhang - TrungBui + TrungBui SeunghyunYoon XiangChen ZhiweiLiu CongyingXia - Quan HungTran + Quan HungTran WalterChang - PhilipYu + PhilipYu 1906–1912 In this work, we focus on a more challenging few-shot intent detection scenario where many intents are fine-grained and semantically similar. We present a simple yet effective few-shot intent detection schema via contrastive pre-training and fine-tuning. Specifically, we first conduct self-supervised contrastive pre-training on collected intent datasets, which implicitly learns to discriminate semantically similar utterances without using any labels. We then perform few-shot intent detection together with supervised contrastive learning, which explicitly pulls utterances from the same intent closer and pushes utterances across different intents farther. Experimental results show that our proposed method achieves state-of-the-art performance on three challenging intent detection datasets under 5-shot and 10-shot settings. 2021.emnlp-main.144 @@ -2008,10 +2008,10 @@ Robust Retrieval Augmented Generation for Zero-shot Slot Filling - MichaelGlass + MichaelGlass GaetanoRossiello Md Faisal MahbubChowdhury - AlfioGliozzo + AlfioGliozzo 1939–1949 Automatically inducing high quality knowledge graphs from a given collection of documents still remains a challenging problem in AI. One way to make headway for this problem is through advancements in a related task known as slot filling. In this task, given an entity query in form of [Entity, Slot, ?], a system is asked to ‘fill’ the slot by generating or extracting the missing value exploiting evidence extracted from relevant passage(s) in the given document collection. The recent works in the field try to solve this task in an end-to-end fashion using retrieval-based language models. In this paper, we present a novel approach to zero-shot slot filling that extends dense passage retrieval with hard negatives and robust training procedures for retrieval augmented generation models. Our model reports large improvements on both T-REx and zsRE slot filling datasets, improving both passage retrieval and slot value generation, and ranking at the top-1 position in the KILT leaderboard. Moreover, we demonstrate the robustness of our system showing its domain adaptation capability on a new variant of the TACRED dataset for slot filling, through a combination of zero/few-shot learning. We release the source code and pre-trained models. 2021.emnlp-main.148 @@ -2107,10 +2107,10 @@ Fairness-aware Class Imbalanced Learning - ShivashankarSubramanian + ShivashankarSubramanian AfshinRahimi - TimothyBaldwin - TrevorCohn + TimothyBaldwin + TrevorCohn LeaFrermann 2045–2051 Class imbalance is a common challenge in many NLP tasks, and has clear connections to bias, in that bias in training data often leads to higher accuracy for majority groups at the expense of minority groups. However there has traditionally been a disconnect between research on class-imbalanced learning and mitigating bias, and only recently have the two been looked at through a common lens. In this work we evaluate long-tail learning methods for tweet sentiment and occupation classification, and extend a margin-loss based approach with methods to enforce fairness. We empirically show through controlled experiments that the proposed approaches help mitigate both class imbalance and demographic biases. @@ -2228,7 +2228,7 @@ SoravitChangpinyo BoqingGong PiyushSharma - Song-ChunZhu + Song-ChunZhu RaduSoricut 2148–2166 One challenge in evaluating visual question answering (VQA) models in the cross-dataset adaptation setting is that the distribution shifts are multi-modal, making it difficult to identify if it is the shifts in visual or language features that play a key role. In this paper, we propose a semi-automatic framework for generating disentangled shifts by introducing a controllable visual question-answer generation (VQAG) module that is capable of generating highly-relevant and diverse question-answer pairs with the desired dataset style. We use it to create CrossVQA, a collection of test splits for assessing VQA generalization based on the VQA2, VizWiz, and Open Images datasets. We provide an analysis of our generated datasets and demonstrate its utility by using them to evaluate several state-of-the-art VQA systems. One important finding is that the visual shifts in cross-dataset VQA matter more than the language shifts. More broadly, we present a scalable framework for systematically evaluating the machine with little human intervention. @@ -2271,7 +2271,7 @@ Effect of Visual Extensions on Natural Language Understanding in Vision-and-Language Models TaichiIki - AkikoAizawa + AkikoAizawa 2189–2196 A method for creating a vision-and-language (V&L) model is to extend a language model through structural modifications and V&L pre-training. Such an extension aims to make a V&L model inherit the capability of natural language understanding (NLU) from the original language model. To see how well this is achieved, we propose to evaluate V&L models using an NLU benchmark (GLUE). We compare five V&L models, including single-stream and dual-stream models, trained with the same pre-training. Dual-stream models, with their higher modality independence achieved by approximately doubling the number of parameters, are expected to preserve the NLU capability better. Our main finding is that the dual-stream scores are not much different than the single-stream scores, contrary to expectation. Further analysis shows that pre-training causes the performance drop in NLU tasks with few exceptions. These results suggest that adopting a single-stream structure and devising the pre-training could be an effective method for improving the maintenance of language knowledge in V&L extensions. 2021.emnlp-main.167 @@ -2283,7 +2283,7 @@ Neural Path Hunter: Reducing Hallucination in Dialogue Systems via Path Grounding NouhaDziri AndreaMadotto - OsmarZaïane + OsmarZaïane Avishek JoeyBose 2197–2214 Dialogue systems powered by large pre-trained language models exhibit an innate ability to deliver fluent and natural-sounding responses. Despite their impressive performance, these models are fitful and can often generate factually incorrect statements impeding their widespread adoption. In this paper, we focus on the task of improving faithfulness and reducing hallucination of neural dialogue systems to known facts supplied by a Knowledge Graph (KG). We propose Neural Path Hunter which follows a generate-then-refine strategy whereby a generated response is amended using the KG. Neural Path Hunter leverages a separate token-level fact critic to identify plausible sources of hallucination followed by a refinement stage that retrieves correct entities by crafting a query signal that is propagated over a k-hop subgraph. We empirically validate our proposed approach on the OpenDialKG dataset (Moon et al., 2019) against a suite of metrics and report a relative improvement of faithfulness over dialogue responses by 20.35% based on FeQA (Durmus et al., 2020). The code is available at https://github.com/nouhadziri/Neural-Path-Hunter. @@ -2532,7 +2532,7 @@ YubinGe JialiZeng JunfengYao - DegenHuang + DegenHuang JinsongSu 2407–2417 Dominant sentence ordering models can be classified into pairwise ordering models and set-to-sequence models. However, there is little attempt to combine these two types of models, which inituitively possess complementary advantages. In this paper, we propose a novel sentence ordering framework which introduces two classifiers to make better use of pairwise orderings for graph-based sentence ordering (Yin et al. 2019, 2021). Specially, given an initial sentence-entity graph, we first introduce a graph-based classifier to predict pairwise orderings between linked sentences. Then, in an iterative manner, based on the graph updated by previously predicted high-confident pairwise orderings, another classifier is used to predict the remaining uncertain pairwise orderings. At last, we adapt a GRN-based sentence ordering model (Yin et al. 2019, 2021) on the basis of final graph. Experiments on five commonly-used datasets demonstrate the effectiveness and generality of our model. Particularly, when equipped with BERT (Devlin et al. 2019) and FHDecoder (Yin et al. 2020), our model achieves state-of-the-art performance. Our code is available at https://github.com/DeepLearnXMU/IRSEG. @@ -2547,7 +2547,7 @@ YaxinFan XiaominChu PeifengLi - QiaomingZhu + QiaomingZhu 2418–2431 Implicit discourse relation recognition (IDRR) is a critical task in discourse analysis. Previous studies only regard it as a classification task and lack an in-depth understanding of the semantics of different relations. Therefore, we first view IDRR as a generation task and further propose a method joint modeling of the classification and generation. Specifically, we propose a joint model, CG-T5, to recognize the relation label and generate the target sentence containing the meaning of relations simultaneously. Furthermore, we design three target sentence forms, including the question form, for the generation model to incorporate prior knowledge. To address the issue that large discourse units are hardly embedded into the target sentence, we also propose a target sentence construction mechanism that automatically extracts core sentences from those large discourse units. Experimental results both on Chinese MCDTB and English PDTB datasets show that our model CG-T5 achieves the best performance against several state-of-the-art systems. 2021.emnlp-main.187 @@ -2557,9 +2557,9 @@ A Language Model-based Generative Classifier for Sentence-level Discourse Parsing - YingZhang + YingZhang HidetakaKamigaito - ManabuOkumura + ManabuOkumura 2432–2446 Discourse segmentation and sentence-level discourse parsing play important roles for various NLP tasks to consider textual coherence. Despite recent achievements in both tasks, there is still room for improvement due to the scarcity of labeled data. To solve the problem, we propose a language model-based generative classifier (LMGC) for using more information from labels by treating the labels as an input while enhancing label representations by embedding descriptions for each label. Moreover, since this enables LMGC to make ready the representations for labels, unseen in the pre-training step, we can effectively use a pre-trained language model in LMGC. Experimental results on the RST-DT dataset show that our LMGC achieved the state-of-the-art F1 score of 96.72 in discourse segmentation. It further achieved the state-of-the-art relation F1 scores of 84.69 with gold EDU boundaries and 81.18 with automatically segmented boundaries, respectively, in sentence-level discourse parsing. 2021.emnlp-main.188 @@ -2616,7 +2616,7 @@ <fixed-case>FL</fixed-case>i<fixed-case>T</fixed-case>ext: A Faster and Lighter Semi-Supervised Text Classification with Convolution Networks - ChenLiu + ChenLiu ZhangMengchao FuZhibing PanpanHou @@ -2630,10 +2630,10 @@ Evaluating Debiasing Techniques for Intersectional Biases - ShivashankarSubramanian + ShivashankarSubramanian XudongHan - TimothyBaldwin - TrevorCohn + TimothyBaldwin + TrevorCohn LeaFrermann 2492–2498 Bias is pervasive for NLP models, motivating the development of automatic debiasing techniques. Evaluation of NLP debiasing methods has largely been limited to binary attributes in isolation, e.g., debiasing with respect to binary gender or race, however many corpora involve multiple such attributes, possibly with higher cardinality. In this paper we argue that a truly fair model must consider ‘gerrymandering’ groups which comprise not only single attributes, but also intersectional groups. We evaluate a form of bias-constrained model which is new to NLP, as well an extension of the iterative nullspace projection technique which can handle multiple identities. @@ -2661,7 +2661,7 @@ YanyanLan YanWang HuaweiShen - XueqiCheng + XueqiCheng 2510–2521 Unsupervised style transfer models are mainly based on an inductive learning approach, which represents the style as embeddings, decoder parameters, or discriminator parameters and directly applies these general rules to the test cases. However, the lacking of parallel corpus hinders the ability of these inductive learning methods on this task. As a result, it is likely to cause severe inconsistent style expressions, like ‘the salad is rude’. To tackle this problem, we propose a novel transductive learning approach in this paper, based on a retrieval-based context-aware style representation. Specifically, an attentional encoder-decoder with a retriever framework is utilized. It involves top-K relevant sentences in the target style in the transfer process. In this way, we can learn a context-aware style embedding to alleviate the above inconsistency problem. In this paper, both sparse (BM25) and dense retrieval functions (MIPS) are used, and two objective functions are designed to facilitate joint learning. Experimental results show that our method outperforms several strong baselines. The proposed transductive learning approach is general and effective to the task of unsupervised style transfer, and we will apply it to the other two typical methods in the future. 2021.emnlp-main.195 @@ -2688,7 +2688,7 @@ Coupling Context Modeling with Zero Pronoun Recovering for Document-Level Natural Language Generation XinTan LongyinZhang - GuodongZhou + GuodongZhou 2530–2540 Natural language generation (NLG) tasks on pro-drop languages are known to suffer from zero pronoun (ZP) problems, and the problems remain challenging due to the scarcity of ZP-annotated NLG corpora. In this case, we propose a highly adaptive two-stage approach to couple context modeling with ZP recovering to mitigate the ZP problem in NLG tasks. Notably, we frame the recovery process in a task-supervised fashion where the ZP representation recovering capability is learned during the NLG task learning process, thus our method does not require NLG corpora annotated with ZPs. For system enhancement, we learn an adversarial bot to adjust our model outputs to alleviate the error propagation caused by mis-recovered ZPs. Experiments on three document-level NLG tasks, i.e., machine translation, question answering, and summarization, show that our approach can improve the performance to a great extent, and the improvement on pronoun translation is very impressive. 2021.emnlp-main.197 @@ -2769,11 +2769,11 @@ Syntactically-Informed Unsupervised Paraphrasing with Non-Parallel Data ErguangYang MingtongLiu - DeyiXiong + DeyiXiong YujieZhang YaoMeng ChangjianHu - JinanXu + JinanXu YufengChen 2594–2604 Previous works on syntactically controlled paraphrase generation heavily rely on large-scale parallel paraphrase data that is not easily available for many languages and domains. In this paper, we take this research direction to the extreme and investigate whether it is possible to learn syntactically controlled paraphrase generation with nonparallel data. We propose a syntactically-informed unsupervised paraphrasing model based on conditional variational auto-encoder (VAE) which can generate texts in a specified syntactic structure. Particularly, we design a two-stage learning method to effectively train the model using non-parallel data. The conditional VAE is trained to reconstruct the input sentence according to the given input and its syntactic structure. Furthermore, to improve the syntactic controllability and semantic consistency of the pre-trained conditional VAE, we fine-tune it using syntax controlling and cycle reconstruction learning objectives, and employ Gumbel-Softmax to combine these new learning objectives. Experiment results demonstrate that the proposed model trained only on non-parallel data is capable of generating diverse paraphrases with specified syntactic structure. Additionally, we validate the effectiveness of our method for generating syntactically adversarial examples on the sentiment analysis task. @@ -2929,7 +2929,7 @@ Machine Reading Comprehension as Data Augmentation: A Case Study on Implicit Event Argument Extraction JianLiu YufengChen - JinanXu + JinanXu 2716–2725 Implicit event argument extraction (EAE) is a crucial document-level information extraction task that aims to identify event arguments beyond the sentence level. Despite many efforts for this task, the lack of enough training data has long impeded the study. In this paper, we take a new perspective to address the data sparsity issue faced by implicit EAE, by bridging the task with machine reading comprehension (MRC). Particularly, we devise two data augmentation regimes via MRC, including: 1) implicit knowledge transfer, which enables knowledge transfer from other tasks, by building a unified training framework in the MRC formulation, and 2) explicit data augmentation, which can explicitly generate new training examples, by treating MRC models as an annotator. The extensive experiments have justified the effectiveness of our approach — it not only obtains state-of-the-art performance on two benchmarks, but also demonstrates superior results in a data-low scenario. 2021.emnlp-main.214 @@ -2957,9 +2957,9 @@ ChenweiZhang YawenYang XiaoheLi - LiLin + LiLin LijieWen - Philip S.Yu + Philip S.Yu 2737–2746 Low-resource Relation Extraction (LRE) aims to extract relation facts from limited labeled corpora when human annotation is scarce. Existing works either utilize self-training scheme to generate pseudo labels that will cause the gradual drift problem, or leverage meta-learning scheme which does not solicit feedback explicitly. To alleviate selection bias due to the lack of feedback loops in existing LRE learning paradigms, we developed a Gradient Imitation Reinforcement Learning method to encourage pseudo label data to imitate the gradient descent direction on labeled data and bootstrap its optimization capability through trial and error. We also propose a framework called GradLRE, which handles two major scenarios in low-resource relation extraction. Besides the scenario where unlabeled data is sufficient, GradLRE handles the situation where no unlabeled data is available, by exploiting a contextualized augmentation method to generate data. Experimental results on two public datasets demonstrate the effectiveness of GradLRE on low resource relation extraction when comparing with baselines. 2021.emnlp-main.216 @@ -3013,7 +3013,7 @@ GuolinKe WaleedMalik ZhichengDou - PaulBennett + PaulBennett Tie-YanLiu ArnoldOverwijk 2780–2791 @@ -3071,7 +3071,7 @@ RuiyangRen YingqiQu JingLiu - Wayne XinZhao + Wayne XinZhao QiaoQiaoShe HuaWu HaifengWang @@ -3096,10 +3096,10 @@ From Alignment to Assignment: Frustratingly Simple Unsupervised Entity Alignment - XinMao + XinMao WentingWang YuanbinWu - ManLan + ManLan 2843–2853 Cross-lingual entity alignment (EA) aims to find the equivalent entities between crosslingual KGs (Knowledge Graphs), which is a crucial step for integrating KGs. Recently, many GNN-based EA methods are proposed and show decent performance improvements on several public datasets. However, existing GNN-based EA methods inevitably inherit poor interpretability and low efficiency from neural networks. Motivated by the isomorphic assumption of GNN-based methods, we successfully transform the cross-lingual EA problem into an assignment problem. Based on this re-definition, we propose a frustratingly Simple but Effective Unsupervised entity alignment method (SEU) without neural networks. Extensive experiments have been conducted to show that our proposed unsupervised approach even beats advanced supervised methods across all public datasets while having high efficiency, interpretability, and stability. 2021.emnlp-main.226 @@ -3195,8 +3195,8 @@ XuelinSitu SameenMaruf IngridZukerman - CecileParis - GholamrezaHaffari + CecileParis + GholamrezaHaffari 2933–2940 Lifelong Learning (LL) black-box models are dynamic in that they keep learning from new tasks and constantly update their parameters. Owing to the need to utilize information from previously seen tasks, and capture commonalities in potentially diverse data, it is hard for automatic explanation methods to explain the outcomes of these models. In addition, existing explanation methods, e.g., LIME, which are computationally expensive when explaining a static black-box model, are even more inefficient in the LL setting. In this paper, we propose a novel Lifelong Explanation (LLE) approach that continuously trains a student explainer under the supervision of a teacher – an arbitrary explanation algorithm – on different tasks undertaken in LL. We also leverage the Experience Replay (ER) mechanism to prevent catastrophic forgetting in the student explainer. Our experiments comparing LLE to three baselines on text classification tasks show that LLE can enhance the stability of the explanations for all seen tasks and maintain the same level of faithfulness to the black-box model as the teacher, while being up to 10ˆ2 times faster at test time. Our ablation study shows that the ER mechanism in our LLE approach enhances the learning capabilities of the student explainer. Our code is available at https://github.com/situsnow/LLE. 2021.emnlp-main.233 @@ -3206,10 +3206,10 @@ Linguistic Dependencies and Statistical Dependence - Jacob LouisHoover + Jacob LouisHoover WenyuDu AlessandroSordoni - Timothy J.O’Donnell + Timothy J.O’Donnell 2941–2963 Are pairs of words that tend to occur together also likely to stand in a linguistic dependency? This empirical question is motivated by a long history of literature in cognitive science, psycholinguistics, and NLP. In this work we contribute an extensive analysis of the relationship between linguistic dependencies and statistical dependence between words. Improving on previous work, we introduce the use of large pretrained language models to compute contextualized estimates of the pointwise mutual information between words (CPMI). For multiple models and languages, we extract dependency trees which maximize CPMI, and compare to gold standard linguistic dependencies. Overall, we find that CPMI dependencies achieve an unlabelled undirected attachment score of at most \approx 0.5. While far above chance, and consistently above a non-contextualized PMI baseline, this score is generally comparable to a simple baseline formed by connecting adjacent words. We analyze which kinds of linguistic dependencies are best captured in CPMI dependencies, and also find marked differences between the estimates of the large pretrained language models, illustrating how their different training schemes affect the type of dependencies they capture. 2021.emnlp-main.234 @@ -3458,7 +3458,7 @@ ShizhanChen XiaowangZhang ZhiyongFeng - DeyiXiong + DeyiXiong ShaojuanWu ChunliuDou 3148–3161 @@ -3573,7 +3573,7 @@ Enlivening Redundant Heads in Multi-head Self-attention for Machine Translation TianfuZhang - HeyanHuang + HeyanHuang ChongFeng LongbingCao 3238–3248 @@ -3587,7 +3587,7 @@ Unsupervised Neural Machine Translation with Universal Grammar ZuchaoLi MasaoUtiyama - EiichiroSumita + EiichiroSumita HaiZhao 3249–3264 Machine translation usually relies on parallel corpora to provide parallel signals for training. The advent of unsupervised machine translation has brought machine translation away from this reliance, though performance still lags behind traditional supervised machine translation. In unsupervised machine translation, the model seeks symmetric language similarities as a source of weak parallel signal to achieve translation. Chomsky’s Universal Grammar theory postulates that grammar is an innate form of knowledge to humans and is governed by universal principles and constraints. Therefore, in this paper, we seek to leverage such shared grammar clues to provide more explicit language parallel signals to enhance the training of unsupervised machine translation models. Through experiments on multiple typical language pairs, we demonstrate the effectiveness of our proposed approaches. @@ -3599,8 +3599,8 @@ Encouraging Lexical Translation Consistency for Document-Level Neural Machine Translation XinglinLyu - JunhuiLi - ZhengxianGong + JunhuiLi + ZhengxianGong MinZhang 3265–3277 Recently a number of approaches have been proposed to improve translation performance for document-level neural machine translation (NMT). However, few are focusing on the subject of lexical translation consistency. In this paper we apply “one translation per discourse” in NMT, and aim to encourage lexical translation consistency for document-level NMT. This is done by first obtaining a word link for each source word in a document, which tells the positions where the source word appears. Then we encourage the translation of those words within a link to be consistent in two ways. On the one hand, when encoding sentences within a document we properly share context information of those words. On the other hand, we propose an auxiliary loss function to better constrain that their translation should be consistent. Experimental results on Chinese↔English and English→French translation tasks show that our approach not only achieves state-of-the-art performance in BLEU scores, but also greatly improves lexical consistency in translation. @@ -3625,7 +3625,7 @@ YijinLiu FandongMeng YufengChen - JinanXu + JinanXu JieZhou 3285–3296 Scheduled sampling is widely used to mitigate the exposure bias problem for neural machine translation. Its core motivation is to simulate the inference scene during training by replacing ground-truth tokens with predicted tokens, thus bridging the gap between training and inference. However, vanilla scheduled sampling is merely based on training steps and equally treats all decoding steps. Namely, it simulates an inference scene with uniform error rates, which disobeys the real inference scene, where larger decoding steps usually have higher error rates due to error accumulations. To alleviate the above discrepancy, we propose scheduled sampling methods based on decoding steps, increasing the selection chance of predicted tokens with the growth of decoding steps. Consequently, we can more realistically simulate the inference scene during training, thus better bridging the gap between training and inference. Moreover, we investigate scheduled sampling based on both training steps and decoding steps for further improvements. Experimentally, our approaches significantly outperform the Transformer baseline and vanilla scheduled sampling on three large-scale WMT tasks. Additionally, our approaches also generalize well to the text summarization task on two popular benchmarks. @@ -3678,10 +3678,10 @@ Generalised Unsupervised Domain Adaptation of Neural Machine Translation with Cross-Lingual Data Selection - Thuy-TrangVu + Thuy-TrangVu XuanliHe DinhPhung - GholamrezaHaffari + GholamrezaHaffari 3335–3346 This paper considers the unsupervised domain adaptation problem for neural machine translation (NMT), where we assume the access to only monolingual text in either the source or target language in the new domain. We propose a cross-lingual data selection method to extract in-domain sentences in the missing language side from a large generic monolingual corpus. Our proposed method trains an adaptive layer on top of multilingual BERT by contrastive learning to align the representation between the source and target language. This then enables the transferability of the domain classifier between the languages in a zero-shot manner. Once the in-domain data is detected by the classifier, the NMT model is then adapted to the new domain by jointly learning translation and domain discrimination tasks. We evaluate our cross-lingual data selection method on NMT across five diverse domains in three language pairs, as well as a real-world scenario of translation for COVID-19. The results show that our proposed method outperforms other selection baselines up to +1.5 BLEU score. 2021.emnlp-main.268 @@ -3839,7 +3839,7 @@ Jin YeaJang SanKim MinyoungJung - SaimShin + SaimShin GahgeneGweon 3447–3452 Backchannel (BC), a short reaction signal of a listener to a speaker’s utterances, helps to improve the quality of the conversation. Several studies have been conducted to predict BC in conversation; however, the utilization of advanced natural language processing techniques using lexical information presented in the utterances of a speaker has been less considered. To address this limitation, we present a BC prediction model called BPM_MT (Backchannel prediction model with multitask learning), which utilizes KoBERT, a pre-trained language model. The BPM_MT simultaneously carries out two tasks at learning: 1) BC category prediction using acoustic and lexical features, and 2) sentiment score prediction based on sentiment cues. BPM_MT exhibited 14.24% performance improvement compared to the existing baseline in the four BC categories: continuer, understanding, empathic response, and No BC. In particular, for empathic response category, a performance improvement of 17.14% was achieved. @@ -3868,7 +3868,7 @@ JunmoKang JeonghwanKim SuwonShin - Sung-HyonMyaeng + Sung-HyonMyaeng 3464–3476 Tag recommendation relies on either a ranking function for top-k tags or an autoregressive generation method. However, the previous methods neglect one of two seemingly conflicting yet desirable characteristics of a tag set: orderlessness and inter-dependency. While the ranking approach fails to address the inter-dependency among tags when they are ranked, the autoregressive approach fails to take orderlessness into account because it is designed to utilize sequential relations among tokens. We propose a sequence-oblivious generation method for tag recommendation, in which the next tag to be generated is independent of the order of the generated tags and the order of the ground truth tags occurring in training data. Empirical results on two different domains, Instagram and Stack Overflow, show that our method is significantly superior to the previous approaches. 2021.emnlp-main.279 @@ -3880,7 +3880,7 @@ End-to-End Conversational Search for Online Shopping with Utterance Transfer LiqiangXiao JunMa - Xin LunaDong + Xin LunaDong PascualMartínez-Gómez NasserZalmout ChenweiZhang @@ -3898,7 +3898,7 @@ Self-Supervised Curriculum Learning for Spelling Error Correction ZifaGan HongfeiXu - HongyingZan + HongyingZan 3487–3494 Spelling Error Correction (SEC) that requires high-level language understanding is a challenging but useful task. Current SEC approaches normally leverage a pre-training then fine-tuning procedure that treats data equally. By contrast, Curriculum Learning (CL) utilizes training data differently during training and has shown its effectiveness in improving both performance and training efficiency in many other NLP tasks. In NMT, a model’s performance has been shown sensitive to the difficulty of training examples, and CL has been shown effective to address this. In SEC, the data from different language learners are naturally distributed at different difficulty levels (some errors made by beginners are obvious to correct while some made by fluent speakers are hard), and we expect that designing a curriculum correspondingly for model learning may also help its training and bring about better performance. In this paper, we study how to further improve the performance of the state-of-the-art SEC method with CL, and propose a Self-Supervised Curriculum Learning (SSCL) approach. Specifically, we directly use the cross-entropy loss as criteria for: 1) scoring the difficulty of training data, and 2) evaluating the competence of the model. In our approach, CL improves the model training, which in return improves the CL measurement. In our experiments on the SIGHAN 2015 Chinese spelling check task, we show that SSCL is superior to previous norm-based and uncertainty-aware approaches, and establish a new state of the art (74.38% F1). 2021.emnlp-main.281 @@ -4061,7 +4061,7 @@ PengQi HaejunLee TgSido - ChristopherManning + ChristopherManning 3599–3614 We develop a unified system to answer directly from text open-domain questions that may require a varying number of retrieval steps. We employ a single multi-task transformer model to perform all the necessary subtasks—retrieving supporting facts, reranking them, and predicting the answer from all retrieved documents—in an iterative fashion. We avoid crucial assumptions of previous work that do not transfer well to real-world settings, including exploiting knowledge of the fixed number of retrieval steps required to answer each question or using structured metadata like knowledge bases or web links that have limited availability. Instead, we design a system that can answer open-domain questions on any text collection without prior knowledge of reasoning complexity. To emulate this setting, we construct a new benchmark, called BeerQA, by combining existing one- and two-step datasets with a new collection of 530 questions that require three Wikipedia pages to answer, unifying Wikipedia corpora versions in the process. We show that our model demonstrates competitive performance on both existing benchmarks and this new benchmark. We make the new benchmark available at https://beerqa.github.io/. 2021.emnlp-main.292 @@ -4075,7 +4075,7 @@ LiangPang YanyanLan HuaweiShen - XueqiCheng + XueqiCheng 3615–3626 Information seeking is an essential step for open-domain question answering to efficiently gather evidence from a large corpus. Recently, iterative approaches have been proven to be effective for complex questions, by recursively retrieving new evidence at each step. However, almost all existing iterative approaches use predefined strategies, either applying the same retrieval function multiple times or fixing the order of different retrieval functions, which cannot fulfill the diverse requirements of various questions. In this paper, we propose a novel adaptive information-seeking strategy for open-domain question answering, namely AISO. Specifically, the whole retrieval and answer process is modeled as a partially observed Markov decision process, where three types of retrieval operations (e.g., BM25, DPR, and hyperlink) and one answer operation are defined as actions. According to the learned policy, AISO could adaptively select a proper retrieval action to seek the missing evidence at each step, based on the collected evidence and the reformulated query, or directly output the answer when the evidence set is sufficient for the question. Experiments on SQuAD Open and HotpotQA fullwiki, which serve as single-hop and multi-hop open-domain QA benchmarks, show that AISO outperforms all baseline methods with predefined strategies in terms of both retrieval and answer evaluations. 2021.emnlp-main.293 @@ -4086,13 +4086,13 @@ Mapping probability word problems to executable representations - SimonSuster + SimonSuster PieterFivez PietroTotis AngelikaKimmig JesseDavis Lucde Raedt - WalterDaelemans + WalterDaelemans 3627–3640 While solving math word problems automatically has received considerable attention in the NLP community, few works have addressed probability word problems specifically. In this paper, we employ and analyse various neural models for answering such word problems. In a two-step approach, the problem text is first mapped to a formal representation in a declarative language using a sequence-to-sequence model, and then the resulting representation is executed using a probabilistic programming system to provide the answer. Our best performing model incorporates general-domain contextualised word representations that were finetuned using transfer learning on another in-domain dataset. We also apply end-to-end models to this task, which bring out the importance of the two-step approach in obtaining correct solutions to probability problems. 2021.emnlp-main.294 @@ -4166,7 +4166,7 @@ SiruOuyang HaiZhao MasaoUtiyama - EiichiroSumita + EiichiroSumita 3685–3696 Conversational machine reading (CMR) requires machines to communicate with humans through multi-turn interactions between two salient dialogue states of decision making and question generation processes. In open CMR settings, as the more realistic scenario, the retrieved background knowledge would be noisy, which results in severe challenges in the information transmission. Existing studies commonly train independent or pipeline systems for the two subtasks. However, those methods are trivial by using hard-label decisions to activate question generation, which eventually hinders the model performance. In this work, we propose an effective gating strategy by smoothing the two dialogue states in only one decoder and bridge decision making and question generation to provide a richer dialogue state reference. Experiments on the OR-ShARC dataset show the effectiveness of our method, which achieves new state-of-the-art results. 2021.emnlp-main.299 @@ -4176,7 +4176,7 @@ <fixed-case>F</fixed-case>in<fixed-case>QA</fixed-case>: A Dataset of Numerical Reasoning over Financial Data - ZhiyuChen + ZhiyuChen WenhuChen ChareseSmiley SameenaShah @@ -4184,8 +4184,8 @@ DylanLangdon ReemaMoussa MattBeane - Ting-HaoHuang - BryanRoutledge + Ting-HaoHuang + BryanRoutledge William YangWang 3697–3711 The sheer volume of financial statements makes it difficult for humans to access and analyze a business’s financials. Robust numerical reasoning likewise faces unique challenges in this domain. In this work, we focus on answering deep questions over financial data, aiming to automate the analysis of a large corpus of financial documents. In contrast to existing tasks on general domain, the finance domain includes complex numerical reasoning and understanding of heterogeneous representations. To facilitate analytical progress, we propose a new large-scale dataset, FinQA, with Question-Answering pairs over Financial reports, written by financial experts. We also annotate the gold reasoning programs to ensure full explainability. We further introduce baselines and conduct comprehensive experiments in our dataset. The results demonstrate that popular, large, pre-trained models fall far short of expert humans in acquiring finance knowledge and in complex multi-step numerical reasoning on that knowledge. Our dataset – the first of its kind – should therefore enable significant, new community research into complex application domains. The dataset and code are publicly available at https://github.com/czyssrs/FinQA. @@ -4211,7 +4211,7 @@ <fixed-case>R</fixed-case>ock<fixed-case>NER</fixed-case>: A Simple Method to Create Adversarial Examples for Evaluating the Robustness of Named Entity Recognition Models - Bill YuchenLin + Bill YuchenLin WenyangGao JunYan RyanMoreno @@ -4242,7 +4242,7 @@ Constructing a Psychometric Testbed for Fair Natural Language Processing AhmedAbbasi DavidDobolyi - John P.Lalor + John P.Lalor Richard G.Netemeyer KendallSmith YiYang @@ -4271,7 +4271,7 @@ <fixed-case>C</fixed-case>hinese <fixed-case>WPLC</fixed-case>: A <fixed-case>C</fixed-case>hinese Dataset for Evaluating Pretrained Language Models on Word Prediction Given Long-Range Context HuibinGe ChenxiSun - DeyiXiong + DeyiXiong QunLiu 3770–3778 This paper presents a Chinese dataset for evaluating pretrained language models on Word Prediction given Long-term Context (Chinese WPLC). We propose both automatic and manual selection strategies tailored to Chinese to guarantee that target words in passages collected from over 69K novels can only be predicted with long-term context beyond the scope of sentences containing the target words. Dataset analysis reveals that the types of target words range from common nouns to Chinese 4-character idioms. We also observe that linguistic relations between target words and long-range context exhibit diversity, including lexical match, synonym, summary and reasoning. Experiment results show that the Chinese pretrained language model PanGu-\alpha is 45 points behind human in terms of top-1 word prediction accuracy, indicating that Chinese WPLC is a challenging dataset. The dataset is publicly available at https://git.openi.org.cn/PCL-Platform.Intelligence/Chinese_WPLC. @@ -4324,7 +4324,7 @@ Total Recall: a Customized Continual Learning Method for Neural Semantic Parsers ZhuangLi LizhenQu - GholamrezaHaffari + GholamrezaHaffari 3816–3831 This paper investigates continual learning for semantic parsing. In this setting, a neural semantic parser learns tasks sequentially without accessing full training data from previous tasks. Direct application of the SOTA continual learning algorithms to this problem fails to achieve comparable performance with re-training models with all seen tasks because they have not considered the special properties of structured outputs yielded by semantic parsers. Therefore, we propose TotalRecall, a continual learning method designed for neural semantic parsers from two aspects: i) a sampling method for memory replay that diversifies logical form templates and balances distributions of parse actions in a memory; ii) a two-stage training method that significantly improves generalization capability of the parsers across tasks. We conduct extensive experiments to study the research problems involved in continual semantic parsing and demonstrate that a neural semantic parser trained with TotalRecall achieves superior performance than the one trained directly with the SOTA continual learning algorithms and achieve a 3-6 times speedup compared to re-training from scratch. 2021.emnlp-main.310 @@ -4390,7 +4390,7 @@ Virtual Data Augmentation: A Robust and General Framework for Fine-tuning Pre-trained Models KunZhou - Wayne XinZhao + Wayne XinZhao SiruiWang FuzhengZhang WeiWu @@ -4578,7 +4578,7 @@ SaimWani ShivanshPatel UnnatJain - AngelChang + AngelChang 4018–4028 In the Vision-and-Language Navigation (VLN) task an embodied agent navigates a 3D environment, following natural language instructions. A challenge in this task is how to handle ‘off the path’ scenarios where an agent veers from a reference path. Prior work supervises the agent with actions based on the shortest path from the agent’s location to the goal, but such goal-oriented supervision is often not in alignment with the instruction. Furthermore, the evaluation metrics employed by prior work do not measure how much of a language instruction the agent is able to follow. In this work, we propose a simple and effective language-aligned supervision scheme, and a new metric that measures the number of sub-instructions the agent has completed during navigation. 2021.emnlp-main.328 @@ -4602,7 +4602,7 @@ JingunKwon NaokiKobayashi HidetakaKamigaito - ManabuOkumura + ManabuOkumura 4039–4044 Sentence extractive summarization shortens a document by selecting sentences for a summary while preserving its important contents. However, constructing a coherent and informative summary is difficult using a pre-trained BERT-based encoder since it is not explicitly trained for representing the information of sentences in a document. We propose a nested tree-based extractive summarization model on RoBERTa (NeRoBERTa), where nested tree structures consist of syntactic and discourse trees in a given document. Experimental results on the CNN/DailyMail dataset showed that NeRoBERTa outperforms baseline models in ROUGE. Human evaluation results also showed that NeRoBERTa achieves significantly better scores than the baselines in terms of coherence and yields comparable scores to the state-of-the-art models. 2021.emnlp-main.330 @@ -4674,7 +4674,7 @@ YutaHitomi HideakiTamori RyoheiSasano - NaoakiOkazaki + NaoakiOkazaki KentaroInui KoichiTakeda 4085–4090 @@ -4782,7 +4782,7 @@ JaydeepSen MustafaCanim SoumenChakrabarti - AlfioGliozzo + AlfioGliozzo KarthikSankaranarayanan 4159–4172 Weakly-supervised table question-answering (TableQA) models have achieved state-of-art performance by using pre-trained BERT transformer to jointly encoding a question and a table to produce structured query for the question. However, in practical settings TableQA systems are deployed over table corpora having topic and word distributions quite distinct from BERT’s pretraining corpus. In this work we simulate the practical topic shift scenario by designing novel challenge benchmarks WikiSQL-TS and WikiTable-TS, consisting of train-dev-test splits in five distinct topic groups, based on the popular WikiSQL and WikiTable-Questions datasets. We empirically show that, despite pre-training on large open-domain text, performance of models degrades significantly when they are evaluated on unseen topics. In response, we propose T3QA (Topic Transferable Table Question Answering) a pragmatic adaptation framework for TableQA comprising of: (1) topic-specific vocabulary injection into BERT, (2) a novel text-to-text transformer generator (such as T5, GPT2) based natural language question generation pipeline focused on generating topic-specific training data, and (3) a logical form re-ranker. We show that T3QA provides a reasonably good baseline for our topic shift benchmarks. We believe our topic split benchmarks will lead to robust TableQA solutions that are better suited for practical deployment @@ -4963,7 +4963,7 @@ <fixed-case>D</fixed-case>u<fixed-case>R</fixed-case>ec<fixed-case>D</fixed-case>ial 2.0: A Bilingual Parallel Corpus for Conversational Recommendation ZemingLiu HaifengWang - Zheng-YuNiu + Zheng-YuNiu HuaWu WanxiangChe 4335–4347 @@ -4977,7 +4977,7 @@ End-to-End Learning of Flowchart Grounded Task-Oriented Dialogs DineshRaghu ShantanuAgarwal - SachindraJoshi + SachindraJoshi Mausam 4348–4366 We propose a novel problem within end-to-end learning of task oriented dialogs (TOD), in which the dialog system mimics a troubleshooting agent who helps a user by diagnosing their problem (e.g., car not starting). Such dialogs are grounded in domain-specific flowcharts, which the agent is supposed to follow during the conversation. Our task exposes novel technical challenges for neural TOD, such as grounding an utterance to the flowchart without explicit annotation, referring to additional manual pages when user asks a clarification question, and ability to follow unseen flowcharts at test time. We release a dataset (FLODIAL) consisting of 2,738 dialogs grounded on 12 different troubleshooting flowcharts. We also design a neural model, FLONET, which uses a retrieval-augmented generation architecture to train the dialog agent. Our experiments find that FLONET can do zero-shot transfer to unseen flowcharts, and sets a strong baseline for future research. @@ -4993,7 +4993,7 @@ SeonghyeonYe JaeyeolJeon Hee YoungPark - AliceOh + AliceOh 4367–4380 We present a model to predict fine-grained emotions along the continuous dimensions of valence, arousal, and dominance (VAD) with a corpus with categorical emotion annotations. Our model is trained by minimizing the EMD (Earth Mover’s Distance) loss between the predicted VAD score distribution and the categorical emotion distributions sorted along VAD, and it can simultaneously classify the emotion categories and predict the VAD scores for a given sentence. We use pre-trained RoBERTa-Large and fine-tune on three different corpora with categorical labels and evaluate on EmoBank corpus with VAD scores. We show that our approach reaches comparable performance to that of the state-of-the-art classifiers in categorical emotion classification and shows significant positive correlations with the ground truth VAD scores. Also, further training with supervision of VAD labels leads to improved performance especially when dataset is small. We also present examples of predictions of appropriate emotion words that are not part of the original annotations. 2021.emnlp-main.358 @@ -5017,10 +5017,10 @@ XinchengJu DongZhang RongXiao - JunhuiLi + JunhuiLi ShoushanLi MinZhang - GuodongZhou + GuodongZhou 4395–4405 Aspect terms extraction (ATE) and aspect sentiment classification (ASC) are two fundamental and fine-grained sub-tasks in aspect-level sentiment analysis (ALSA). In the textual analysis, joint extracting both aspect terms and sentiment polarities has been drawn much attention due to the better applications than individual sub-task. However, in the multi-modal scenario, the existing studies are limited to handle each sub-task independently, which fails to model the innate connection between the above two objectives and ignores the better applications. Therefore, in this paper, we are the first to jointly perform multi-modal ATE (MATE) and multi-modal ASC (MASC), and we propose a multi-modal joint learning approach with auxiliary cross-modal relation detection for multi-modal aspect-level sentiment analysis (MALSA). Specifically, we first build an auxiliary text-image relation detection module to control the proper exploitation of visual information. Second, we adopt the hierarchical framework to bridge the multi-modal connection between MATE and MASC, as well as separately visual guiding for each sub module. Finally, we can obtain all aspect-level sentiment polarities dependent on the jointly extracted specific aspects. Extensive experiments show the effectiveness of our approach against the joint textual approaches, pipeline and collapsed multi-modal approaches. 2021.emnlp-main.360 @@ -5060,7 +5060,7 @@ The Effect of Round-Trip Translation on Fairness in Sentiment Analysis Jonathan GabelChristiansen MathiasGammelgaard - AndersSøgaard + AndersSøgaard 4423–4428 Sentiment analysis systems have been shown to exhibit sensitivity to protected attributes. Round-trip translation, on the other hand, has been shown to normalize text. We explore the impact of round-trip translation on the demographic parity of sentiment classifiers and show how round-trip translation consistently improves classification fairness at test time (reducing up to 47% of between-group gaps). We also explore the idea of retraining sentiment classifiers on round-trip-translated data. 2021.emnlp-main.363 @@ -5072,7 +5072,7 @@ <fixed-case>CH</fixed-case>o<fixed-case>R</fixed-case>a<fixed-case>L</fixed-case>: Collecting Humor Reaction Labels from Millions of Social Media Users ZixiaofanYang ShayanHooshmand - JuliaHirschberg + JuliaHirschberg 4429–4435 Humor detection has gained attention in recent years due to the desire to understand user-generated content with figurative language. However, substantial individual and cultural differences in humor perception make it very difficult to collect a large-scale humor dataset with reliable humor labels. We propose CHoRaL, a framework to generate perceived humor labels on Facebook posts, using the naturally available user reactions to these posts with no manual annotation needed. CHoRaL provides both binary labels and continuous scores of humor and non-humor. We present the largest dataset to date with labeled humor on 785K posts related to COVID-19. Additionally, we analyze the expression of COVID-related humor in social media by extracting lexico-semantic and affective features from the posts, and build humor detection models with performance similar to humans. CHoRaL enables the development of large-scale humor detection models on any topic and opens a new path to the study of humor on social media. 2021.emnlp-main.364 @@ -5088,7 +5088,7 @@ LuXiang YuZhou JiajunZhang - ChengqingZong + ChengqingZong 4436–4451 Dialogue summarization has drawn much attention recently. Especially in the customer service domain, agents could use dialogue summaries to help boost their works by quickly knowing customer’s issues and service progress. These applications require summaries to contain the perspective of a single speaker and have a clear topic flow structure, while neither are available in existing datasets. Therefore, in this paper, we introduce a novel Chinese dataset for Customer Service Dialogue Summarization (CSDS). CSDS improves the abstractive summaries in two aspects: (1) In addition to the overall summary for the whole dialogue, role-oriented summaries are also provided to acquire different speakers’ viewpoints. (2) All the summaries sum up each topic separately, thus containing the topic-level structure of the dialogue. We define tasks in CSDS as generating the overall summary and different role-oriented summaries for a given dialogue. Next, we compare various summarization methods on CSDS, and experiment results show that existing methods are prone to generate redundant and incoherent summaries. Besides, the performance becomes much worse when analyzing the performance on role-oriented summaries and topic structures. We hope that this study could benchmark Chinese dialogue summarization and benefit further studies. 2021.emnlp-main.365 @@ -5177,7 +5177,7 @@ All Bark and No Bite: Rogue Dimensions in Transformer Language Models Obscure Representational Quality WilliamTimkey - Martenvan Schijndel + Martenvan Schijndel 4527–4546 Similarity measures are a vital tool for understanding how language models represent and process language. Standard representational similarity measures such as cosine similarity and Euclidean distance have been successfully used in static word embedding models to understand how words cluster in semantic space. Recently, these measures have been applied to embeddings from contextualized models such as BERT and GPT-2. In this work, we call into question the informativity of such measures for contextualized language models. We find that a small number of rogue dimensions, often just 1-3, dominate these measures. Moreover, we find a striking mismatch between the dimensions that dominate similarity measures and those which are important to the behavior of the model. We show that simple postprocessing techniques such as standardization are able to correct for rogue dimensions and reveal underlying representational quality. We argue that accounting for rogue dimensions is essential for any similarity-based analysis of contextual language models. 2021.emnlp-main.372 @@ -5218,7 +5218,7 @@ ShengZhang XinZhang WeimingZhang - AndersSøgaard + AndersSøgaard 4581–4588 Using data from English cloze tests, in which subjects also self-reported their gender, age, education, and race, we examine performance differences of pretrained language models across demographic groups, defined by these (protected) attributes. We demonstrate wide performance gaps across demographic groups and show that pretrained language models systematically disfavor young non-white male speakers; i.e., not only do pretrained language models learn social biases (stereotypical associations) – pretrained language models also learn sociolectal biases, learning to speak more like some than like others. We show, however, that, with the exception of BERT models, larger pretrained language models reduce some the performance gaps between majority and minority groups. 2021.emnlp-main.375 @@ -5241,7 +5241,7 @@ Are <fixed-case>T</fixed-case>ransformers a Modern Version of <fixed-case>ELIZA</fixed-case>? <fixed-case>O</fixed-case>bservations on <fixed-case>F</fixed-case>rench Object Verb Agreement BingzhiLi GuillaumeWisniewski - BenoitCrabbé + BenoitCrabbé 4599–4610 Many recent works have demonstrated that unsupervised sentence representations of neural networks encode syntactic information by observing that neural language models are able to predict the agreement between a verb and its subject. We take a critical look at this line of research by showing that it is possible to achieve high accuracy on this agreement task with simple surface heuristics, indicating a possible flaw in our assessment of neural networks’ syntactic ability. Our fine-grained analyses of results on the long-range French object-verb agreement show that contrary to LSTMs, Transformers are able to capture a non-trivial amount of grammatical structure. 2021.emnlp-main.377 @@ -5450,7 +5450,7 @@ SantiagoCastro HanwenMiao WeijiLi - RadaMihalcea + RadaMihalcea 4770–4785 We aim to automatically identify human action reasons in online videos. We focus on the widespread genre of lifestyle vlogs, in which people perform actions while verbally describing them. We introduce and make publicly available the WhyAct dataset, consisting of 1,077 visual actions manually annotated with their reasons. We describe a multimodal model that leverages visual and textual information to automatically infer the reasons corresponding to an action presented in the video. 2021.emnlp-main.392 @@ -5462,7 +5462,7 @@ Genre as Weak Supervision for Cross-lingual Dependency Parsing MaxMüller-Eberstein Robvan der Goot - BarbaraPlank + BarbaraPlank 4786–4802 Recent work has shown that monolingual masked language models learn to represent data-driven notions of language variation which can be used for domain-targeted training data selection. Dataset genre labels are already frequently available, yet remain largely unexplored in cross-lingual setups. We harness this genre metadata as a weak supervision signal for targeted data selection in zero-shot dependency parsing. Specifically, we project treebank-level genre information to the finer-grained sentence level, with the goal to amplify information implicitly stored in unsupervised contextualized representations. We demonstrate that genre is recoverable from multilingual contextual embeddings and that it provides an effective signal for training data selection in cross-lingual, zero-shot scenarios. For 12 low-resource language treebanks, six of which are test-only, our genre-specific methods significantly outperform competitive baselines as well as recent embedding-based methods for data selection. Moreover, genre-based data selection provides new state-of-the-art results for three of these target languages. 2021.emnlp-main.393 @@ -5554,7 +5554,7 @@ Zero-Shot Dialogue Disentanglement by Self-Supervised Entangled Response Selection Ta-ChungChi - AlexanderRudnicky + AlexanderRudnicky 4897–4902 Dialogue disentanglement aims to group utterances in a long and multi-participant dialogue into threads. This is useful for discourse analysis and downstream applications such as dialogue response selection, where it can be the first step to construct a clean context/response set. Unfortunately, labeling all reply-to links takes quadratic effort w.r.t the number of utterances: an annotator must check all preceding utterances to identify the one to which the current utterance is a reply. In this paper, we are the first to propose a zero-shot dialogue disentanglement solution. Firstly, we train a model on a multi-participant response selection dataset harvested from the web which is not annotated; we then apply the trained model to perform zero-shot dialogue disentanglement. Without any labeled data, our model can achieve a cluster F1 score of 25. We also fine-tune the model using various amounts of labeled data. Experiments show that with only 10% of the data, we achieve nearly the same performance of using the full dataset. 2021.emnlp-main.400 @@ -5608,7 +5608,7 @@ Dialogue State Tracking with a Language Model using Schema-Driven Prompting Chia-HsuanLee HaoCheng - MariOstendorf + MariOstendorf 4937–4949 Task-oriented conversational systems often use dialogue state tracking to represent the user’s intentions, which involves filling in values of pre-defined slots. Many approaches have been proposed, often using task-specific architectures with special-purpose classifiers. Recently, good results have been obtained using more general architectures based on pretrained language models. Here, we introduce a new variation of the language modeling approach that uses schema-driven prompting to provide task-aware history encoding that is used for both categorical and non-categorical slots. We further improve performance by augmenting the prompting with schema descriptions, a naturally occurring source of in-domain knowledge. Our purely generative system achieves state-of-the-art performance on MultiWOZ 2.2 and achieves competitive performance on two other benchmarks: MultiWOZ 2.1 and M2M. The data and code will be available at https://github.com/chiahsuan156/DST-as-Prompting. 2021.emnlp-main.404 @@ -5660,8 +5660,8 @@ Unsupervised Data Augmentation with Naive Augmentation and without Unlabeled Data DavidLowell BrianHoward - Zachary C.Lipton - ByronWallace + Zachary C.Lipton + ByronWallace 4992–5001 Unsupervised Data Augmentation (UDA) is a semisupervised technique that applies a consistency loss to penalize differences between a model’s predictions on (a) observed (unlabeled) examples; and (b) corresponding ‘noised’ examples produced via data augmentation. While UDA has gained popularity for text classification, open questions linger over which design decisions are necessary and how to extend the method to sequence labeling tasks. In this paper, we re-examine UDA and demonstrate its efficacy on several sequential tasks. Our main contribution is an empirical study of UDA to establish which components of the algorithm confer benefits in NLP. Notably, although prior work has emphasized the use of clever augmentation techniques including back-translation, we find that enforcing consistency between predictions assigned to observed and randomly substituted words often yields comparable (or greater) benefits compared to these more complex perturbation models. Furthermore, we find that applying UDA’s consistency loss affords meaningful gains without any unlabeled data at all, i.e., in a standard supervised setting. In short, UDA need not be unsupervised to realize much of its noted benefits, and does not require complex data augmentation to be effective. 2021.emnlp-main.408 @@ -5753,7 +5753,7 @@ TianxingHe JingzhaoZhang ZhimingZhou - JamesGlass + JamesGlass 5087–5102 Exposure bias has been regarded as a central problem for auto-regressive language models (LM). It claims that teacher forcing would cause the test-time generation to be incrementally distorted due to the training-generation discrepancy. Although a lot of algorithms have been proposed to avoid teacher forcing and therefore alleviate exposure bias, there is little work showing how serious the exposure bias problem actually is. In this work, we focus on the task of open-ended language generation, propose metrics to quantify the impact of exposure bias in the aspects of quality, diversity, and consistency. Our key intuition is that if we feed ground-truth data prefixes (instead of prefixes generated by the model itself) into the model and ask it to continue the generation, the performance should become much better because the training-generation discrepancy in the prefix is removed. Both automatic and human evaluations are conducted in our experiments. On the contrary to the popular belief in exposure bias, we find that the the distortion induced by the prefix discrepancy is limited, and does not seem to be incremental during the generation. Moreover, our analysis reveals an interesting self-recovery ability of the LM, which we hypothesize to be countering the harmful effects from exposure bias. 2021.emnlp-main.415 @@ -5807,7 +5807,7 @@ Journalistic Guidelines Aware News Image Captioning XuewenYang SveborKaraman - JoelTetreault + JoelTetreault AlejandroJaimes 5162–5175 The task of news article image captioning aims to generate descriptive and informative captions for news article images. Unlike conventional image captions that simply describe the content of the image in general terms, news image captions follow journalistic guidelines and rely heavily on named entities to describe the image content, often drawing context from the whole article they are associated with. In this work, we propose a new approach to this task, motivated by caption guidelines that journalists follow. Our approach, Journalistic Guidelines Aware News Image Captioning (JoGANIC), leverages the structure of captions to improve the generation quality and guide our representation design. Experimental results, including detailed ablation studies, on two large-scale publicly available datasets show that JoGANIC substantially outperforms state-of-the-art methods both on caption generation and named entity related metrics. @@ -5832,7 +5832,7 @@ Refocusing on Relevance: Personalization in <fixed-case>NLG</fixed-case> ShiranDudy StevenBedrick - BonnieWebber + BonnieWebber 5190–5202 Many NLG tasks such as summarization, dialogue response, or open domain question answering, focus primarily on a source text in order to generate a target response. This standard approach falls short, however, when a user’s intent or context of work is not easily recoverable based solely on that source text– a scenario that we argue is more of the rule than the exception. In this work, we argue that NLG systems in general should place a much higher level of emphasis on making use of additional context, and suggest that relevance (as used in Information Retrieval) be thought of as a crucial tool for designing user-oriented text-generating tasks. We further discuss possible harms and hazards around such personalization, and argue that value-sensitive design represents a crucial path forward through these challenges. 2021.emnlp-main.421 @@ -5849,7 +5849,7 @@ KyunghyunCho HengJi JiaweiHan - ClareVoss + ClareVoss 5203–5215 Event schemas encode knowledge of stereotypical structures of events and their connections. As events unfold, schemas are crucial to act as a scaffolding. Previous work on event schema induction focuses either on atomic events or linear temporal event sequences, ignoring the interplay between events via arguments and argument relations. We introduce a new concept of Temporal Complex Event Schema: a graph-based schema representation that encompasses events, arguments, temporal connections and argument relations. In addition, we propose a Temporal Event Graph Model that predicts event instances following the temporal complex event schema. To build and evaluate such schemas, we release a new schema learning corpus containing 6,399 documents accompanied with event graphs, and we have manually constructed gold-standard schemas. Intrinsic evaluations by schema matching and instance graph perplexity, prove the superior quality of our probabilistic graph schema library compared to linear representations. Extrinsic evaluation on schema-guided future event prediction further demonstrates the predictive power of our event graph model, significantly outperforming human schemas and baselines by more than 17.8% on HITS@1. 2021.emnlp-main.422 @@ -5923,7 +5923,7 @@ Lifelong Event Detection with Knowledge Transfer PengfeiYu HengJi - PremNatarajan + PremNatarajan 5278–5290 Traditional supervised Information Extraction (IE) methods can extract structured knowledge elements from unstructured data, but it is limited to a pre-defined target ontology. In reality, the ontology of interest may change over time, adding emergent new types or more fine-grained subtypes. We propose a new lifelong learning framework to address this challenge. We focus on lifelong event detection as an exemplar case and propose a new problem formulation that is also generalizable to other IE tasks. In event detection and more general IE tasks, rich correlations or semantic relatedness exist among hierarchical knowledge element types. In our proposed framework, knowledge is being transferred between learned old event types and new event types. Specifically, we update old knowledge with new event types’ mentions using a self-training loss. In addition, we aggregate old event types’ representations based on their similarities with new event types to initialize the new event types’ representations. Experimental results show that our framework outperforms competitive baselines with a 5.1% absolute gain in the F1 score. Moreover, our proposed framework can boost the F1 score for over 30% absolute gain on some new long-tail rare event types with few training instances. Our knowledge transfer module improves performance on both learned event types and new event types under the lifelong learning setting, showing that it helps consolidate old knowledge and improve novel knowledge acquisition. 2021.emnlp-main.428 @@ -6106,7 +6106,7 @@ <fixed-case>PDALN</fixed-case>: Progressive Domain Adaptation over a Pre-trained Model for Low-Resource Cross-Domain Named Entity Recognition TaoZhang CongyingXia - Philip S.Yu + Philip S.Yu ZhiweiLiu ShuZhao 5441–5451 @@ -6145,7 +6145,7 @@ FilipIlievski JonathanFrancis SatoruOzaki - EricNyberg + EricNyberg AlessandroOltramari 5474–5483 Commonsense reasoning benchmarks have been largely solved by fine-tuning language models. The downside is that fine-tuning may cause models to overfit to task-specific data and thereby forget their knowledge gained during pre-training. Recent works only propose lightweight model updates as models may already possess useful knowledge from past experience, but a challenge remains in understanding what parts and to what extent models should be refined for a given task. In this paper, we investigate what models learn from commonsense reasoning datasets. We measure the impact of three different adaptation methods on the generalization and accuracy of models. Our experiments with two models show that fine-tuning performs best, by learning both the content and the structure of the task, but suffers from overfitting and limited generalization to novel answers. We observe that alternative adaptation methods like prefix-tuning have comparable accuracy, but generalize better to unseen answers and are more robust to adversarial splits. @@ -6215,7 +6215,7 @@ The Stem Cell Hypothesis: Dilemma behind Multi-Task Learning with Transformer Encoders HanHe - Jinho D.Choi + Jinho D.Choi 5555–5577 Multi-task learning with transformer encoders (MTL) has emerged as a powerful technique to improve performance on closely-related tasks for both accuracy and efficiency while a question still remains whether or not it would perform as well on tasks that are distinct in nature. We first present MTL results on five NLP tasks, POS, NER, DEP, CON, and SRL, and depict its deficiency over single-task learning. We then conduct an extensive pruning analysis to show that a certain set of attention heads get claimed by most tasks during MTL, who interfere with one another to fine-tune those heads for their own objectives. Based on this finding, we propose the Stem Cell Hypothesis to reveal the existence of attention heads naturally talented for many tasks that cannot be jointly trained to create adequate embeddings for all of those tasks. Finally, we design novel parameter-free probes to justify our hypothesis and demonstrate how attention heads are transformed across the five tasks during MTL through label analysis. 2021.emnlp-main.451 @@ -6251,7 +6251,7 @@ Controlled Evaluation of Grammatical Knowledge in <fixed-case>M</fixed-case>andarin <fixed-case>C</fixed-case>hinese Language Models YiwenWang JenniferHu - RogerLevy + RogerLevy PengQian 5604–5620 Prior work has shown that structural supervision helps English language models learn generalizations about syntactic phenomena such as subject-verb agreement. However, it remains unclear if such an inductive bias would also improve language models’ ability to learn grammatical dependencies in typologically different languages. Here we investigate this question in Mandarin Chinese, which has a logographic, largely syllable-based writing system; different word order; and sparser morphology than English. We train LSTMs, Recurrent Neural Network Grammars, Transformer language models, and Transformer-parameterized generative parsing models on two Mandarin Chinese datasets of different sizes. We evaluate the models’ ability to learn different aspects of Mandarin grammar that assess syntactic and semantic relationships. We find suggestive evidence that structural supervision helps with representing syntactic state across intervening content and improves performance in low-data settings, suggesting that the benefits of hierarchical inductive biases in acquiring dependency relationships may extend beyond English. @@ -6350,9 +6350,9 @@ <fixed-case>ST</fixed-case>ra<fixed-case>TA</fixed-case>: Self-Training with Task Augmentation for Better Few-shot Learning - TuVu - Minh-ThangLuong - QuocLe + TuVu + Minh-ThangLuong + QuocLe GradySimon MohitIyyer 5715–5731 @@ -6446,7 +6446,7 @@ AnchitGupta AkshatShrivastava XilunChen - LukeZettlemoyer + LukeZettlemoyer SonalGupta 5799–5811 We propose pre-finetuning, an additional large-scale learning stage between language model pre-training and fine-tuning. Pre-finetuning is massively multi-task learning (around 50 datasets, over 4.8 million total labeled examples), and is designed to encourage learning of representations that generalize better to many different tasks. We show that pre-finetuning consistently improves performance for pretrained discriminators (e.g. RoBERTa) and generation models (e.g. BART) on a wide range of tasks (sentence prediction, commonsense reasoning, MRC, etc.), while also significantly improving sample efficiency during fine-tuning. We also show that large-scale multi-tasking is crucial; pre-finetuning can hurt performance when few tasks are used up until a critical point (usually above 15) after which performance improves linearly in the number of tasks. @@ -6499,7 +6499,7 @@ Frustratingly Simple but Surprisingly Strong: Using Language-Independent Features for Zero-shot Cross-lingual Semantic Parsing JingfengYang FedericoFancellu - BonnieWebber + BonnieWebber DiyiYang 5848–5856 The availability of corpora has led to significant advances in training semantic parsers in English. Unfortunately, for languages other than English, annotated data is limited and so is the performance of the developed parsers. Recently, pretrained multilingual models have been proven useful for zero-shot cross-lingual transfer in many NLP tasks. What else does it require to apply a parser trained in English to other languages for zero-shot cross-lingual semantic parsing? Will simple language-independent features help? To this end, we experiment with six Discourse Representation Structure (DRS) semantic parsers in English, and generalize them to Italian, German and Dutch, where there are only a small number of manually annotated parses available. Extensive experiments show that despite its simplicity, adding Universal Dependency (UD) relations and Universal POS tags (UPOS) as model-agnostic features achieves surprisingly strong improvement on all parsers. @@ -6530,7 +6530,7 @@ VishravChaudhary JamesCross LuciaSpecia - FranciscoGuzmán + FranciscoGuzmán 5865–5875 Sentence-level Quality estimation (QE) of machine translation is traditionally formulated as a regression task, and the performance of QE models is typically measured by Pearson correlation with human labels. Recent QE models have achieved previously-unseen levels of correlation with human judgments, but they rely on large multilingual contextualized language models that are computationally expensive and make them infeasible for real-world applications. In this work, we evaluate several model compression techniques for QE and find that, despite their popularity in other NLP tasks, they lead to poor performance in this regression setting. We observe that a full model parameterization is required to achieve SoTA results in a regression task. However, we argue that the level of expressiveness of a model in a continuous range is unnecessary given the downstream applications of QE, and show that reframing QE as a classification problem and evaluating QE models using classification metrics would better reflect their actual performance in real-world applications. 2021.emnlp-main.474 @@ -6544,7 +6544,7 @@ AnoopBabu DuyguAtaman SherzodKariev - FrancisTyers + FrancisTyers OtabekAbduraufov MammadHajili SardanaIvanova @@ -6565,9 +6565,9 @@ Analyzing the Surprising Variability in Word Embedding Stability Across Languages - LauraBurdick + LauraBurdick Jonathan K.Kummerfeld - RadaMihalcea + RadaMihalcea 5891–5901 Word embeddings are powerful representations that form the foundation of many natural language processing architectures, both in English and in other languages. To gain further insight into word embeddings, we explore their stability (e.g., overlap between the nearest neighbors of a word in different embedding spaces) in diverse languages. We discuss linguistic properties that are related to stability, drawing out insights about correlations with affixing, language gender systems, and other features. This has implications for embedding use, particularly in research that uses them to study language trends. 2021.emnlp-main.476 @@ -6630,7 +6630,7 @@ YangLiu HuaCheng RussellKlopfer - Matthew R.Gormley + Matthew R.Gormley ThomasSchaaf 5941–5953 Multi-label document classification (MLDC) problems can be challenging, especially for long documents with a large label set and a long-tail distribution over labels. In this paper, we present an effective convolutional attention network for the MLDC problem with a focus on medical code prediction from clinical documents. Our innovations are three-fold: (1) we utilize a deep convolution-based encoder with the squeeze-and-excitation networks and residual networks to aggregate the information across the document and learn meaningful document representations that cover different ranges of texts; (2) we explore multi-layer and sum-pooling attention to extract the most informative features from these multi-scale representations; (3) we combine binary cross entropy loss and focal loss to improve performance for rare labels. We focus our evaluation study on MIMIC-III, a widely used dataset in the medical domain. Our models outperform prior work on medical coding and achieve new state-of-the-art results on multiple metrics. We also demonstrate the language independent nature of our approach by applying it to two non-English datasets. Our model outperforms prior best model and a multilingual Transformer model by a substantial margin. @@ -6658,7 +6658,7 @@ <fixed-case>IGA</fixed-case>: An Intent-Guided Authoring Assistant SimengSun WenlongZhao - VarunManjunatha + VarunManjunatha RajivJain VladMorariu FranckDernoncourt @@ -6710,7 +6710,7 @@ A Semantic Feature-Wise Transformation Relation Network for Automatic Short Answer Grading ZhaohuiLi YajurTomar - Rebecca J.Passonneau + Rebecca J.Passonneau 6030–6040 Automatic short answer grading (ASAG) is the task of assessing students’ short natural language responses to objective questions. It is a crucial component of new education platforms, and could support more wide-spread use of constructed response questions to replace cognitively less challenging multiple choice questions. We propose a Semantic Feature-wise transformation Relation Network (SFRN) that exploits the multiple components of ASAG datasets more effectively. SFRN captures relational knowledge among the questions (Q), reference answers or rubrics (R), and labeled student answers (A). A relation network learns vector representations for the elements of QRA triples, then combines the learned representations using learned semantic feature-wise transformations. We apply translation-based data augmentation to address the two problems of limited training data, and high data skew for multi-class ASAG tasks. Our model has up to 11% performance improvement over state-of-the-art results on the benchmark SemEval-2013 datasets, and surpasses custom approaches designed for a Kaggle challenge, demonstrating its generality. 2021.emnlp-main.487 @@ -6735,10 +6735,10 @@ HanLi AmeenPatel SidharthMudgal - SungjinLee + SungjinLee Young-BumKim SpyrosMatsoukas - RuhiSarikaya + RuhiSarikaya 6054–6063 Natural Language Understanding (NLU) is an established component within a conversational AI or digital assistant system, and it is responsible for producing semantic understanding of a user request. We propose a scalable and automatic approach for improving NLU in a large-scale conversational AI system by leveraging implicit user feedback, with an insight that user interaction data and dialog context have rich information embedded from which user satisfaction and intention can be inferred. In particular, we propose a domain-agnostic framework for curating new supervision data for improving NLU from live production traffic. With an extensive set of experiments, we show the results of applying the framework and improving NLU for a large-scale production system across 10 domains. 2021.emnlp-main.489 @@ -6787,12 +6787,12 @@ Perhaps <fixed-case>PTLM</fixed-case>s Should Go to School – A Task to Assess Open Book and Closed Book <fixed-case>QA</fixed-case> - ManuelCiosici + ManuelCiosici JoeCecil Dong-HoLee AlexHedges MarjorieFreedman - RalphWeischedel + RalphWeischedel 6104–6111 Our goal is to deliver a new task and leaderboard to stimulate research on question answering and pre-trained language models (PTLMs) to understand a significant instructional document, e.g., an introductory college textbook or a manual. PTLMs have shown great success in many question-answering tasks, given significant supervised training, but much less so in zero-shot settings. We propose a new task that includes two college-level introductory texts in the social sciences (American Government 2e) and humanities (U.S. History), hundreds of true/false statements based on review questions written by the textbook authors, validation/development tests based on the first eight chapters of the textbooks, blind tests based on the remaining textbook chapters, and baseline results given state-of-the-art PTLMs. Since the questions are balanced, random performance should be ~50%. T5, fine-tuned with BoolQ achieves the same performance, suggesting that the textbook’s content is not pre-represented in the PTLM. Taking the exam closed book, but having read the textbook (i.e., adding the textbook to T5’s pre-training), yields at best minor improvement (56%), suggesting that the PTLM may not have “understood” the textbook (or perhaps misunderstood the questions). Performance is better (~60%) when the exam is taken open-book (i.e., allowing the machine to automatically retrieve a paragraph and use it to answer the question). 2021.emnlp-main.493 @@ -6857,7 +6857,7 @@ SongFeng Siva SankalpPatel HuiWan - SachindraJoshi + SachindraJoshi 6162–6176 We propose MultiDoc2Dial, a new task and dataset on modeling goal-oriented dialogues grounded in multiple documents. Most previous works treat document-grounded dialogue modeling as machine reading comprehension task based on a single given document or passage. In this work, we aim to address more realistic scenarios where a goal-oriented information-seeking conversation involves multiple topics, and hence is grounded on different documents. To facilitate such task, we introduce a new dataset that contains dialogues grounded in multiple documents from four different domains. We also explore modeling the dialogue-based and document-based contexts in the dataset. We present strong baseline approaches and various experimental results, aiming to support further research efforts on such a task. 2021.emnlp-main.498 @@ -6876,7 +6876,7 @@ AmardeepKumar Isabelle G.Lee AnishAcharya - Rajiv RatnShah + Rajiv RatnShah 6177–6192 Code-switching is the communication phenomenon where the speakers switch between different languages during a conversation. With the widespread adoption of conversational agents and chat platforms, code-switching has become an integral part of written conversations in many multi-lingual communities worldwide. Therefore, it is essential to develop techniques for understanding and summarizing these conversations. Towards this objective, we introduce the task of abstractive summarization of Hindi-English (Hi-En) code-switched conversations. We also develop the first code-switched conversation summarization dataset - GupShup, which contains over 6,800 Hi-En conversations and their corresponding human-annotated summaries in English (En) and Hi-En. We present a detailed account of the entire data collection and annotation process. We analyze the dataset using various code-switching statistics. We train state-of-the-art abstractive summarization models and report their performances using both automated metrics and human evaluation. Our results show that multi-lingual mBART and multi-view seq2seq models obtain the best performances on this new dataset. We also conduct an extensive qualitative analysis to provide insight into the models and some of their shortcomings. 2021.emnlp-main.499 @@ -6900,7 +6900,7 @@ Data Collection vs. Knowledge Graph Completion: What is Needed to Improve Coverage? - KennethChurch + KennethChurch YuchenBian 6210–6215 This survey/position paper discusses ways to improve coverage of resources such as WordNet. Rapp estimated correlations, rho, between corpus statistics and pyscholinguistic norms. rho improves with quantity (corpus size) and quality (balance). 1M words is enough for simple estimates (unigram frequencies), but at least 100x more is required for good estimates of word associations and embeddings. Given such estimates, WordNet’s coverage is remarkable. WordNet was developed on SemCor, a small sample (200k words) from the Brown Corpus. Knowledge Graph Completion (KGC) attempts to learn missing links from subsets. But Rapp’s estimates of sizes suggest it would be more profitable to collect more data than to infer missing information that is not there. @@ -6927,7 +6927,7 @@ On the Benefit of Syntactic Supervision for Cross-lingual Transfer in Semantic Role Labeling ZhisongZhang EmmaStrubell - EduardHovy + EduardHovy 6229–6246 Although recent developments in neural architectures and pre-trained representations have greatly increased state-of-the-art model performance on fully-supervised semantic role labeling (SRL), the task remains challenging for languages where supervised SRL training data are not abundant. Cross-lingual learning can improve performance in this setting by transferring knowledge from high-resource languages to low-resource ones. Moreover, we hypothesize that annotations of syntactic dependencies can be leveraged to further facilitate cross-lingual transfer. In this work, we perform an empirical exploration of the helpfulness of syntactic supervision for crosslingual SRL within a simple multitask learning scheme. With comprehensive evaluations across ten languages (in addition to English) and three SRL benchmark datasets, including both dependency- and span-based SRL, we show the effectiveness of syntactic supervision in low-resource scenarios. 2021.emnlp-main.503 @@ -6976,10 +6976,10 @@ Structure-aware Fine-tuning of Sequence-to-sequence Transformers for Transition-based <fixed-case>AMR</fixed-case> Parsing JiaweiZhou TahiraNaseem - RamónFernandez Astudillo + RamónFernandez Astudillo Young-SukLee - RaduFlorian - SalimRoukos + RaduFlorian + SalimRoukos 6279–6290 Predicting linearized Abstract Meaning Representation (AMR) graphs using pre-trained sequence-to-sequence Transformer models has recently led to large improvements on AMR parsing benchmarks. These parsers are simple and avoid explicit modeling of structure but lack desirable properties such as graph well-formedness guarantees or built-in graph-sentence alignments. In this work we explore the integration of general pre-trained sequence-to-sequence language models and a structure-aware transition-based approach. We depart from a pointer-based transition system and propose a simplified transition set, designed to better exploit pre-trained language models for structured fine-tuning. We also explore modeling the parser state within the pre-trained encoder-decoder architecture and different vocabulary strategies for the same purpose. We provide a detailed comparison with recent progress in AMR parsing and show that the proposed parser retains the desirable properties of previous transition-based approaches, while being simpler and reaching the new parsing state of the art for AMR 2.0, without the need for graph re-categorization. 2021.emnlp-main.507 @@ -6994,7 +6994,7 @@ DheerajRajagopal PeterClark YimingYang - EduardHovy + EduardHovy 6291–6310 Defeasible reasoning is the mode of reasoning where conclusions can be overturned by taking into account new evidence. Existing cognitive science literature on defeasible reasoning suggests that a person forms a “mental model” of the problem scenario before answering questions. Our research goal asks whether neural models can similarly benefit from envisioning the question scenario before answering a defeasible query. Our approach is, given a question, to have a model first create a graph of relevant influences, and then leverage that graph as an additional input when answering the question. Our system, CURIOUS, achieves a new state-of-the-art on three different defeasible reasoning datasets. This result is significant as it illustrates that performance can be improved by guiding a system to “think about” a question and explicitly model the scenario, rather than answering reflexively. 2021.emnlp-main.508 @@ -7021,7 +7021,7 @@ Does <fixed-case>BERT</fixed-case> Learn as Humans Perceive? Understanding Linguistic Styles through Lexica Shirley AnugrahHayati DongyeopKang - LyleUngar + LyleUngar 6323–6331 People convey their intention and attitude through linguistic styles of the text that they write. In this study, we investigate lexicon usages across styles throughout two lenses: human perception and machine word importance, since words differ in the strength of the stylistic cues that they provide. To collect labels of human perception, we curate a new dataset, Hummingbird, on top of benchmarking style datasets. We have crowd workers highlight the representative words in the text that makes them think the text has the following styles: politeness, sentiment, offensiveness, and five emotion types. We then compare these human word labels with word importance derived from a popular fine-tuned style classifier like BERT. Our results show that the BERT often finds content words not relevant to the target style as important words used in style prediction, but humans do not perceive the same way even though for some styles (e.g., positive sentiment and joy) human- and machine-identified words share significant overlap for some styles. 2021.emnlp-main.510 @@ -7083,7 +7083,7 @@ MasumHasan Md SaifulIslam KurtisHaut - RadaMihalcea + RadaMihalcea EhsanHoque 6387–6397 The combination of gestures, intonations, and textual content plays a key role in argument delivery. However, the current literature mostly considers textual content while assessing the quality of an argument, and it is limited to datasets containing short sequences (18-48 words). In this paper, we study argument quality assessment in a multimodal context, and experiment on DBATES, a publicly available dataset of long debate videos. First, we propose a set of interpretable debate centric features such as clarity, content variation, body movement cues, and pauses, inspired by theories of argumentation quality. Second, we design the Multimodal ARgument Quality assessor (MARQ) – a hierarchical neural network model that summarizes the multimodal signals on long sequences and enriches the multimodal embedding with debate centric features. Our proposed MARQ model achieves an accuracy of 81.91% on the argument quality prediction task and outperforms established baseline models with an error rate reduction of 22.7%. Through ablation studies, we demonstrate the importance of multimodal cues in modeling argument quality. @@ -7097,7 +7097,7 @@ ArjunAkula SpandanaGella KezeWang - Song-ChunZhu + Song-ChunZhu SivaReddy 6398–6416 Neural module networks (NMN) are a popular approach for grounding visual referring expressions. Prior implementations of NMN use pre-defined and fixed textual inputs in their module instantiation. This necessitates a large number of modules as they lack the ability to share weights and exploit associations between similar textual contexts (e.g. “dark cube on the left” vs. “black cube on the left”). In this work, we address these limitations and evaluate the impact of contextual clues in improving the performance of NMN models. First, we address the problem of fixed textual inputs by parameterizing the module arguments. This substantially reduce the number of modules in NMN by up to 75% without any loss in performance. Next we propose a method to contextualize our parameterized model to enhance the module’s capacity in exploiting the visiolinguistic associations. Our model outperforms the state-of-the-art NMN model on CLEVR-Ref+ dataset with +8.1% improvement in accuracy on the single-referent test set and +4.3% on the full test set. Additionally, we demonstrate that contextualization provides +11.2% and +1.7% improvements in accuracy over prior NMN models on CLOSURE and NLVR2. We further evaluate the impact of our contextualization by constructing a contrast set for CLEVR-Ref+, which we call CC-Ref+. We significantly outperform the baselines by as much as +10.4% absolute accuracy on CC-Ref+, illustrating the generalization skills of our approach. @@ -7139,7 +7139,7 @@ LingfeiWu TianGao HengJi - KathleenMcKeown + KathleenMcKeown 6443–6456 Timeline Summarization identifies major events from a news collection and describes them following temporal order, with key dates tagged. Previous methods generally generate summaries separately for each date after they determine the key dates of events. These methods overlook the events’ intra-structures (arguments) and inter-structures (event-event connections). Following a different route, we propose to represent the news articles as an event-graph, thus the summarization becomes compressing the whole graph to its salient sub-graph. The key hypothesis is that the events connected through shared arguments and temporal order depict the skeleton of a timeline, containing events that are semantically related, temporally coherent and structurally salient in the global event graph. A time-aware optimal transport distance is then introduced for learning the compression model in an unsupervised manner. We show that our approach significantly improves on the state of the art on three real-world datasets, including two public standard benchmarks and our newly collected Timeline100 dataset. 2021.emnlp-main.519 @@ -7152,7 +7152,7 @@ SangwooCho FranckDernoncourt TimGanter - TrungBui + TrungBui NedimLipka WalterChang HailinJin @@ -7215,7 +7215,7 @@ <fixed-case>SPECTRA</fixed-case>: Sparse Structured Text Rationalization Nuno M.Guerreiro - André F. T.Martins + André F. T.Martins 6534–6550 Selective rationalization aims to produce decisions along with rationales (e.g., text highlights or word alignments between two sentences). Commonly, rationales are modeled as stochastic binary masks, requiring sampling-based gradient estimators, which complicates training and requires careful hyperparameter tuning. Sparse attention mechanisms are a deterministic alternative, but they lack a way to regularize the rationale extraction (e.g., to control the sparsity of a text highlight or the number of alignments). In this paper, we present a unified framework for deterministic extraction of structured explanations via constrained inference on a factor graph, forming a differentiable layer. Our approach greatly eases training and rationale regularization, generally outperforming previous work on what comes to performance and plausibility of the extracted rationales. We further provide a comparative study of stochastic and deterministic methods for rationale extraction for classification and natural language inference tasks, jointly assessing their predictive power, quality of the explanations, and model variability. 2021.emnlp-main.525 @@ -7255,7 +7255,7 @@ Aspect-Controllable Opinion Summarization - Reinald KimAmplayo + Reinald KimAmplayo StefanosAngelidis MirellaLapata 6578–6593 @@ -7318,7 +7318,7 @@ Multilingual Unsupervised Neural Machine Translation with Denoising Adapters AhmetÜstün AlexandreBerard - LaurentBesacier + LaurentBesacier MatthiasGallé 6650–6662 We consider the problem of multilingual unsupervised machine translation, translating to and from languages that only have monolingual data by using auxiliary parallel language pairs. For this problem the standard procedure so far to leverage the monolingual data is _back-translation_, which is computationally costly and hard to tune. In this paper we propose instead to use _denoising adapters_, adapter layers with a denoising objective, on top of pre-trained mBART-50. In addition to the modularity and flexibility of such an approach we show that the resulting translations are on-par with back-translating as measured by BLEU, and furthermore it allows adding unseen languages incrementally. @@ -7343,7 +7343,7 @@ Controlling Machine Translation for Multiple Attributes with Additive Interventions AndreaSchioppa DavidVilar - ArtemSokolov + ArtemSokolov KatjaFilippova 6676–6696 Fine-grained control of machine translation (MT) outputs along multiple attributes is critical for many modern MT applications and is a requirement for gaining users’ trust. A standard approach for exerting control in MT is to prepend the input with a special tag to signal the desired output attribute. Despite its simplicity, attribute tagging has several drawbacks: continuous values must be binned into discrete categories, which is unnatural for certain applications; interference between multiple tags is poorly understood. We address these problems by introducing vector-valued interventions which allow for fine-grained control over multiple attributes simultaneously via a weighted linear combination of the corresponding vectors. For some attributes, our approach even allows for fine-tuning a model trained without annotations to support such interventions. In experiments with three attributes (length, politeness and monotonicity) and two language pairs (English to German and Japanese) our models achieve better control over a wider range of tasks compared to tagging, and translation quality does not degrade when no control is requested. Finally, we demonstrate how to enable control in an already trained model after a relatively cheap fine-tuning stage. @@ -7355,7 +7355,7 @@ A Generative Framework for Simultaneous Machine Translation YishuMiao - PhilBlunsom + PhilBlunsom LuciaSpecia 6697–6706 We propose a generative framework for simultaneous machine translation. Conventional approaches use a fixed number of source words to translate or learn dynamic policies for the number of source words by reinforcement learning. Here we formulate simultaneous translation as a structural sequence-to-sequence learning problem. A latent variable is introduced to model read or translate actions at every time step, which is then integrated out to consider all the possible translation policies. A re-parameterised Poisson prior is used to regularise the policies which allows the model to explicitly balance translation quality and latency. The experiments demonstrate the effectiveness and robustness of the generative framework, which achieves the best BLEU scores given different average translation latencies on benchmark datasets. @@ -7368,8 +7368,8 @@ It Is Not As Good As You Think! Evaluating Simultaneous Machine Translation on Interpretation Data JinmingZhao PhilipArthur - GholamrezaHaffari - TrevorCohn + GholamrezaHaffari + TrevorCohn EhsanShareghi 6707–6715 Most existing simultaneous machine translation (SiMT) systems are trained and evaluated on offline translation corpora. We argue that SiMT systems should be trained and tested on real interpretation data. To illustrate this argument, we propose an interpretation test set and conduct a realistic evaluation of SiMT trained on offline translations. Our results, on our test set along with 3 existing smaller scale language pairs, highlight the difference of up-to 13.83 BLEU score when SiMT models are evaluated on translation vs interpretation data. In the absence of interpretation training data, we propose a translation-to-interpretation (T2I) style transfer method which allows converting existing offline translations into interpretation-style data, leading to up-to 2.8 BLEU improvement. However, the evaluation gap remains notable, calling for constructing large-scale interpretation corpora better suited for evaluating and developing SiMT systems. @@ -7385,7 +7385,7 @@ XujiangZhao HaifengChen FengChen - Jinho D.Choi + Jinho D.Choi 6716–6723 Recent multilingual pre-trained language models have achieved remarkable zero-shot performance, where the model is only finetuned on one source language and directly evaluated on target languages. In this work, we propose a self-learning framework that further utilizes unlabeled data of target languages, combined with uncertainty estimation in the process to select high-quality silver labels. Three different uncertainties are adapted and analyzed specifically for the cross lingual transfer: Language Heteroscedastic/Homoscedastic Uncertainty (LEU/LOU), Evidential Uncertainty (EVI). We evaluate our framework with uncertainties on two cross-lingual tasks including Named Entity Recognition (NER) and Natural Language Inference (NLI) covering 40 languages in total, which outperforms the baselines significantly by 10 F1 for NER on average and 2.5 accuracy for NLI. 2021.emnlp-main.538 @@ -7462,7 +7462,7 @@ DmytroOkhonko ArmenAghajanyan FlorianMetze - LukeZettlemoyer + LukeZettlemoyer ChristophFeichtenhofer 6787–6800 We present VideoCLIP, a contrastive approach to pre-train a unified model for zero-shot video and text understanding, without using any labels on downstream tasks. VideoCLIP trains a transformer for video and text by contrasting temporally overlapping positive video-text pairs with hard negatives from nearest neighbor retrieval. Our experiments on a diverse series of downstream tasks, including sequence-level text-video retrieval, VideoQA, token-level action localization, and action segmentation reveal state-of-the-art performance, surpassing prior work, and in some cases even outperforming supervised approaches. Code is made available at https://github.com/pytorch/fairseq/examples/MMPT. @@ -7603,7 +7603,7 @@ Generating Datasets with Pretrained Language Models TimoSchick - HinrichSchütze + HinrichSchütze 6943–6951 To obtain high-quality sentence embeddings from pretrained language models (PLMs), they must either be augmented with additional pretraining objectives or finetuned on a large set of labeled text pairs. While the latter approach typically outperforms the former, it requires great human effort to generate suitable datasets of sufficient size. In this paper, we show how PLMs can be leveraged to obtain high-quality sentence embeddings without the need for labeled data, finetuning or modifications to the pretraining objective: We utilize the generative abilities of large and high-performing PLMs to generate entire datasets of labeled text pairs from scratch, which we then use for finetuning much smaller and more efficient models. Our fully unsupervised approach outperforms strong baselines on several semantic textual similarity datasets. 2021.emnlp-main.555 @@ -7614,7 +7614,7 @@ Continuous Entailment Patterns for Lexical Inference in Context MartinSchmitt - HinrichSchütze + HinrichSchütze 6952–6959 Combining a pretrained language model (PLM) with textual patterns has been shown to help in both zero- and few-shot settings. For zero-shot performance, it makes sense to design patterns that closely resemble the text seen during self-supervised pretraining because the model has never seen anything else. Supervised training allows for more flexibility. If we allow for tokens outside the PLM’s vocabulary, patterns can be adapted more flexibly to a PLM’s idiosyncrasies. Contrasting patterns where a “token” can be any continuous vector from those where a discrete choice between vocabulary elements has to be made, we call our method CONtinous pAtterNs (CONAN). We evaluate CONAN on two established benchmarks for lexical inference in context (LIiC) a.k.a. predicate entailment, a challenging natural language understanding task with relatively small training data. In a direct comparison with discrete patterns, CONAN consistently leads to improved performance, setting a new state of the art. Our experiments give valuable insights on the kind of pattern that enhances a PLM’s performance on LIiC and raise important questions regarding our understanding of PLMs using text patterns. 2021.emnlp-main.556 @@ -7680,7 +7680,7 @@ Generative Context Pair Selection for Multi-hop Question Answering DheeruDua - CiceroNogueira dos Santos + CiceroNogueira dos Santos PatrickNg BenAthiwaratkun BingXiang @@ -7698,8 +7698,8 @@ ArijRiabi ThomasScialom RachelKeraron - BenoîtSagot - DjaméSeddah + BenoîtSagot + DjaméSeddah JacopoStaiano 7016–7030 Coupled with the availability of large scale datasets, deep learning architectures have enabled rapid progress on the Question Answering task. However, most of those datasets are in English, and the performances of state-of-the-art multilingual models are significantly lower when evaluated on non-English data. Due to high data collection costs, it is not realistic to obtain annotated data for each language one desires to support. We propose a method to improve the Cross-lingual Question Answering performance without requiring additional annotated data, leveraging Question Generation models to produce synthetic samples in a cross-lingual fashion. We show that the proposed method allows to significantly outperform the baselines trained on English data only. We report a new state-of-the-art on four datasets: MLQA, XQuAD, SQuAD-it and PIAF (fr). @@ -7714,7 +7714,7 @@ GiwonHong Kyung-minKim JunmoKang - Sung-HyonMyaeng + Sung-HyonMyaeng 7031–7037 Numerical reasoning in machine reading comprehension (MRC) has shown drastic improvements over the past few years. While the previous models for numerical MRC are able to interpolate the learned numerical reasoning capabilities, it is not clear whether they can perform just as well on numbers unseen in the training dataset. Our work rigorously tests state-of-the-art models on DROP, a numerical MRC dataset, to see if they can handle passages that contain out-of-range numbers. One of the key findings is that the models fail to extrapolate to unseen numbers. Presenting numbers as digit-by-digit input to the model, we also propose the E-digit number form that alleviates the lack of extrapolation in models and reveals the need to treat numbers differently from regular words in the text. Our work provides a valuable insight into the numerical MRC models and the way to represent number forms in MRC. 2021.emnlp-main.563 @@ -7728,7 +7728,7 @@ PeterWest VeredShwartz YejinChoi - LukeZettlemoyer + LukeZettlemoyer 7038–7051 Large language models have shown promising results in zero-shot settings. For example, they can perform multiple choice tasks simply by conditioning on a question and selecting the answer with the highest probability. However, ranking by string probability can be problematic due to surface form competition—wherein different surface forms compete for probability mass, even if they represent the same underlying concept in a given context, e.g. “computer” and “PC.” Since probability mass is finite, this lowers the probability of the correct answer, due to competition from other strings that are valid answers (but not one of the multiple choice options). We introduce Domain Conditional Pointwise Mutual Information, an alternative scoring function that directly compensates for surface form competition by simply reweighing each option according to its a priori likelihood within the context of a specific task. It achieves consistent gains in zero-shot performance over both calibrated and uncalibrated scoring functions on all GPT-2 and GPT-3 models on a variety of multiple choice datasets. 2021.emnlp-main.564 @@ -7755,7 +7755,7 @@ Back-Training excels Self-Training at Unsupervised Domain Adaptation of Question Generation and Passage Retrieval DevangKulshreshtha RobertBelfer - Iulian VladSerban + Iulian VladSerban SivaReddy 7064–7078 In this work, we introduce back-training, an alternative to self-training for unsupervised domain adaptation (UDA). While self-training generates synthetic training data where natural inputs are aligned with noisy outputs, back-training results in natural outputs aligned with noisy inputs. This significantly reduces the gap between target domain and synthetic data distribution, and reduces model overfitting to source domain. We run UDA experiments on question generation and passage retrieval from the Natural Questions domain to machine learning and biomedical domains. We find that back-training vastly outperforms self-training by a mean improvement of 7.8 BLEU-4 points on generation, and 17.6% top-20 retrieval accuracy across both domains. We further propose consistency filters to remove low-quality synthetic data before training. We also release a new domain-adaptation dataset - MLQuestions containing 35K unaligned questions, 50K unaligned passages, and 3K aligned question-passage pairs. @@ -7809,7 +7809,7 @@ AntoniosAnastasopoulos ShrutiRijhwani AditiChaudhary - David R.Mortensen + David R.Mortensen GrahamNeubig YuliaTsvetkov 7131–7150 @@ -7823,7 +7823,7 @@ <fixed-case>AM</fixed-case>2i<fixed-case>C</fixed-case>o: Evaluating Word Meaning in Context across Low-Resource Languages with Adversarial Examples QianchuLiu Edoardo MariaPonti - DianaMcCarthy + DianaMcCarthy IvanVulić AnnaKorhonen 7151–7162 @@ -7836,7 +7836,7 @@ <fixed-case>C</fixed-case>ross<fixed-case>F</fixed-case>it: A Few-shot Learning Challenge for Cross-task Generalization in <fixed-case>NLP</fixed-case> QinyuanYe - Bill YuchenLin + Bill YuchenLin XiangRen 7163–7189 Humans can learn a new language task efficiently with only few examples, by leveraging their knowledge obtained when learning prior tasks. In this paper, we explore whether and how such cross-task generalization ability can be acquired, and further applied to build better few-shot learners across diverse NLP tasks. We introduce CrossFit, a problem setup for studying cross-task generalization ability, which standardizes seen/unseen task partitions, data access during different learning stages, and the evaluation protocols. To instantiate different seen/unseen task partitions in CrossFit and facilitate in-depth analysis, we present the NLP Few-shot Gym, a repository of 160 diverse few-shot NLP tasks created from open-access NLP datasets and converted to a unified text-to-text format. Our analysis reveals that the few-shot learning ability on unseen tasks can be improved via an upstream learning stage using a set of seen tasks. We also observe that the selection of upstream learning tasks can significantly influence few-shot performance on unseen tasks, asking further analysis on task similarity and transferability. @@ -7849,7 +7849,7 @@ On the Influence of Masking Policies in Intermediate Pre-training QinyuanYe - Belinda Z.Li + Belinda Z.Li SinongWang BenjaminBolte HaoMa @@ -7880,7 +7880,7 @@ TanayDixit Dev YashpalSheth SreyasMohan - Mitesh M.Khapra + Mitesh M.Khapra 7219–7234 Natural Language Generation (NLG) evaluation is a multifaceted task requiring assessment of multiple desirable criteria, e.g., fluency, coherency, coverage, relevance, adequacy, overall quality, etc. Across existing datasets for 6 NLG tasks, we observe that the human evaluation scores on these multiple criteria are often not correlated. For example, there is a very low correlation between human scores on fluency and data coverage for the task of structured data to text generation. This suggests that the current recipe of proposing new automatic evaluation metrics for NLG by showing that they correlate well with scores assigned by humans for a single criteria (overall quality) alone is inadequate. Indeed, our extensive study involving 25 automatic evaluation metrics across 6 different tasks and 18 different evaluation criteria shows that there is no single metric which correlates well with human scores on all desirable criteria, for most NLG tasks. Given this situation, we propose CheckLists for better design and evaluation of automatic metrics. We design templates which target a specific criteria (e.g., coverage) and perturb the output such that the quality gets affected only along this specific criteria (e.g., the coverage drops). We show that existing evaluation metrics are not robust against even such simple perturbations and disagree with scores assigned by humans to the perturbed output. The proposed templates thus allow for a fine-grained assessment of automatic evaluation metrics exposing their limitations and will facilitate better design, analysis and evaluation of such metrics. Our templates and code are available at https://iitmnlp.github.io/EvalEval/ 2021.emnlp-main.575 @@ -7919,7 +7919,7 @@ AlexandreMuzio PrasadTadepalli StefanLee - HanyHassan + HanyHassan 7266–7279 Multilingual Neural Machine Translation (NMT) enables one model to serve all translation directions, including ones that are unseen during training, i.e. zero-shot translation. Despite being theoretically attractive, current models often produce low quality translations – commonly failing to even produce outputs in the right target language. In this work, we observe that off-target translation is dominant even in strong multilingual systems, trained on massive multilingual corpora. To address this issue, we propose a joint approach to regularize NMT models at both representation-level and gradient-level. At the representation level, we leverage an auxiliary target language prediction task to regularize decoder outputs to retain information about the target language. At the gradient level, we leverage a small amount of direct data (in thousands of sentence pairs) to regularize model gradients. Our results demonstrate that our approach is highly effective in both reducing off-target translation occurrences and improving zero-shot translation performance by +5.59 and +10.38 BLEU on WMT and OPUS datasets respectively. Moreover, experiments show that our method also works well when the small amount of direct data is not available. 2021.emnlp-main.578 @@ -7948,7 +7948,7 @@ YitongLi MengZhang LiangyouLi - GholamrezaHaffari + GholamrezaHaffari QunLiu 7291–7305 Learning multilingual and multi-domain translation model is challenging as the heterogeneous and imbalanced data make the model converge inconsistently over different corpora in real world. One common practice is to adjust the share of each corpus in the training, so that the learning process is balanced and low-resource cases can benefit from the high resource ones. However, automatic balancing methods usually depend on the intra- and inter-dataset characteristics, which is usually agnostic or requires human priors. In this work, we propose an approach, MultiUAT, that dynamically adjusts the training data usage based on the model’s uncertainty on a small set of trusted clean data for multi-corpus machine translation. We experiments with two classes of uncertainty measures on multilingual (16 languages with 4 settings) and multi-domain settings (4 for in-domain and 2 for out-of-domain on English-German translation) and demonstrate our approach MultiUAT substantially outperforms its baselines, including both static and dynamic strategies. We analyze the cross-domain transfer and show the deficiency of static and similarity based methods. @@ -8008,8 +8008,8 @@ Explaining Answers with Entailment Trees - BhavanaDalvi - PeterJansen + BhavanaDalvi + PeterJansen OyvindTafjord ZhengnanXie HannahSmith @@ -8051,7 +8051,7 @@ JenniferLee AntoineBosselut YejinChoi - TomMitchell + TomMitchell 7404–7418 One of the challenges faced by conversational agents is their inability to identify unstated presumptions of their users’ commands, a task trivial for humans due to their common sense. In this paper, we propose a zero-shot commonsense reasoning system for conversational agents in an attempt to achieve this. Our reasoner uncovers unstated presumptions from user commands satisfying a general template of if-(state), then-(action), because-(goal). Our reasoner uses a state-of-the-art transformer-based generative commonsense knowledge base (KB) as its source of background knowledge for reasoning. We propose a novel and iterative knowledge query mechanism to extract multi-hop reasoning chains from the neural KB which uses symbolic logic rules to significantly reduce the search space. Similar to any KBs gathered to date, our commonsense KB is prone to missing knowledge. Therefore, we propose to conversationally elicit the missing knowledge from human users with our novel dynamic question generation strategy, which generates and presents contextualized queries to human users. We evaluate the model with a user study with human users that achieves a 35% higher success rate compared to SOTA. 2021.emnlp-main.588 @@ -8079,7 +8079,7 @@ ZhaojiangLin ZhenpengZhou SeungwhanMoon - PaulCrook + PaulCrook BingLiu ZhouYu EunjoonCho @@ -8114,7 +8114,7 @@ Investigating Robustness of Dialog Models to Popular Figurative Language Constructs HarshJhamtani VarunGangal - EduardHovy + EduardHovy TaylorBerg-Kirkpatrick 7476–7485 Humans often employ figurative language use in communication, including during interactions with dialog systems. Thus, it is important for real-world dialog systems to be able to handle popular figurative language constructs like metaphor and simile. In this work, we analyze the performance of existing dialog models in situations where the input dialog context exhibits use of figurative language. We observe large gaps in handling of figurative language when evaluating the models on two open domain dialog datasets. When faced with dialog contexts consisting of figurative language, some models show very large drops in performance compared to contexts without figurative language. We encourage future research in dialog modeling to separately analyze and report results on figurative language in order to better test model capabilities relevant to real-world use. Finally, we propose lightweight solutions to help existing models become more robust to figurative language by simply using an external resource to translate figurative language to literal (non-figurative) forms while preserving the meaning to the best extent possible. @@ -8143,7 +8143,7 @@ IzBeltagy Madeleinevan Zuylen BaileyKuehl - Lucy LuWang + Lucy LuWang 7494–7513 To assess the effectiveness of any medical intervention, researchers must conduct a time-intensive and manual literature review. NLP systems can help to automate or assist in parts of this expensive process. In support of this goal, we release MSˆ2 (Multi-Document Summarization of Medical Studies), a dataset of over 470k documents and 20K summaries derived from the scientific literature. This dataset facilitates the development of systems that can assess and aggregate contradictory evidence across multiple studies, and is the first large-scale, publicly available multi-document summarization dataset in the biomedical domain. We experiment with a summarization system based on BART, with promising early results, though significant work remains to achieve higher summarization quality. We formulate our summarization inputs and targets in both free text and structured forms and modify a recently proposed metric to assess the quality of our system’s generated summaries. Data and models are available at https://github.com/allenai/ms2. 2021.emnlp-main.594 @@ -8169,7 +8169,7 @@ On the Challenges of Evaluating Compositional Explanations in Multi-Hop Inference: Relevance, Completeness, and Expert Ratings - PeterJansen + PeterJansen Kelly J.Smith DanMoreno HuitzilinOrtiz @@ -8201,7 +8201,7 @@ PeiZhou RahulKhanna SeyeonLee - Bill YuchenLin + Bill YuchenLin DanielHo JayPujara XiangRen @@ -8217,7 +8217,7 @@ MingkaiDeng BowenTan ZhengzhongLiu - EricXing + EricXing ZhitingHu 7580–7605 Natural language generation (NLG) spans a broad range of tasks, each of which serves for specific objectives and desires different properties of generated text. The complexity makes automatic evaluation of NLG particularly challenging. Previous work has typically focused on a single task and developed individual evaluation metrics based on specific intuitions. In this paper, we propose a unifying perspective based on the nature of information change in NLG tasks, including compression (e.g., summarization), transduction (e.g., text rewriting), and creation (e.g., dialog). _Information alignment_ between input, context, and output text plays a common central role in characterizing the generation. With automatic alignment prediction models, we develop a family of interpretable metrics that are suitable for evaluating key aspects of different NLG tasks, often without need of gold reference data. Experiments show the uniformly designed metrics achieve stronger or comparable correlations with human judgement compared to state-of-the-art metrics in each of diverse tasks, including text summarization, style transfer, and knowledge-grounded dialog. @@ -8231,8 +8231,8 @@ <fixed-case>MATE</fixed-case>: Multi-view Attention for Table Transformer Efficiency JulianEisenschlos MaharshiGor - ThomasMüller - WilliamCohen + ThomasMüller + WilliamCohen 7606–7619 This work presents a sparse-attention Transformer architecture for modeling documents that contain large tables. Tables are ubiquitous on the web, and are rich in information. However, more than 20% of relational tables on the web have 20 or more rows (Cafarella et al., 2008), and these large tables present a challenge for current Transformer models, which are typically limited to 512 tokens. Here we propose MATE, a novel Transformer architecture designed to model the structure of web tables. MATE uses sparse attention in a way that allows heads to efficiently attend to either rows or columns in a table. This architecture scales linearly with respect to speed and memory, and can handle documents containing more than 8000 tokens with current accelerators. MATE also has a more appropriate inductive bias for tabular data, and sets a new state-of-the-art for three table reasoning datasets. For HybridQA (Chen et al., 2020), a dataset that involves large documents containing tables, we improve the best prior result by 19 points. 2021.emnlp-main.600 @@ -8334,12 +8334,12 @@ RichardShin ChristopherLin SamThomson - CharlesChen + CharlesChen SubhroRoy Emmanouil AntoniosPlatanios AdamPauls DanKlein - JasonEisner + JasonEisner BenjaminVan Durme 7699–7715 We explore the use of large pretrained language models as few-shot semantic parsers. The goal in semantic parsing is to generate a structured meaning representation given a natural language input. However, language models are trained to generate natural language. To bridge the gap, we use language models to paraphrase inputs into a controlled sublanguage resembling English that can be automatically mapped to a target meaning representation. Our results demonstrate that with only a small amount of data and very little code to convert into English-like representations, our blueprint for rapidly bootstrapping semantic parsers leads to surprisingly effective performance on multiple community tasks, greatly exceeding baseline methods also trained on the same limited data. @@ -8429,7 +8429,7 @@ Come hither or go away? Recognising pre-electoral coalition signals in the news InesRehbein - Simone PaoloPonzetto + Simone PaoloPonzetto AnnaAdendorf OkeBahnsen LukasStoetzer @@ -8516,7 +8516,7 @@ A Collaborative Multi-agent Reinforcement Learning Framework for Dialog Action Decomposition HuiminWang - Kam-FaiWong + Kam-FaiWong 7882–7889 Most reinforcement learning methods for dialog policy learning train a centralized agent that selects a predefined joint action concatenating domain name, intent type, and slot name. The centralized dialog agent suffers from a great many user-agent interaction requirements due to the large action space. Besides, designing the concatenated actions is laborious to engineers and maybe struggled with edge cases. To solve these problems, we model the dialog policy learning problem with a novel multi-agent framework, in which each part of the action is led by a different agent. The framework reduces labor costs for action templates and decreases the size of the action space for each agent. Furthermore, we relieve the non-stationary problem caused by the changing dynamics of the environment as evolving of agents’ policies by introducing a joint optimization process that makes agents can exchange their policy information. Concurrently, an independent experience replay buffer mechanism is integrated to reduce the dependence between gradients of samples to improve training efficiency. The effectiveness of the proposed framework is demonstrated in a multi-domain environment with both user simulator evaluation and human evaluation. 2021.emnlp-main.621 @@ -8531,7 +8531,7 @@ AndreaMadotto SeungwhanMoon ZhenpengZhou - PaulCrook + PaulCrook ZhiguangWang ZhouYu EunjoonCho @@ -8553,7 +8553,7 @@ Hsien-chinLin NurulLubis ShutongFeng - MilicaGasic + MilicaGasic 7901–7914 The ability to identify and resolve uncertainty is crucial for the robustness of a dialogue system. Indeed, this has been confirmed empirically on systems that utilise Bayesian approaches to dialogue belief tracking. However, such systems consider only confidence estimates and have difficulty scaling to more complex settings. Neural dialogue systems, on the other hand, rarely take uncertainties into account. They are therefore overconfident in their decisions and less robust. Moreover, the performance of the tracking task is often evaluated in isolation, without consideration of its effect on the downstream policy optimisation. We propose the use of different uncertainty measures in neural belief tracking. The effects of these measures on the downstream task of policy optimisation are evaluated by adding selected measures of uncertainty to the feature space of the policy and training policies through interaction with a user simulator. Both human and simulated user results show that incorporating these measures leads to improvements both of the performance and of the robustness of the downstream dialogue policy. This highlights the importance of developing neural dialogue belief trackers that take uncertainty into account. 2021.emnlp-main.623 @@ -8564,7 +8564,7 @@ Dynamic Forecasting of Conversation Derailment YovaKementchedjhieva - AndersSøgaard + AndersSøgaard 7915–7919 Online conversations can sometimes take a turn for the worse, either due to systematic cultural differences, accidental misunderstandings, or mere malice. Automatically forecasting derailment in public online conversations provides an opportunity to take early action to moderate it. Previous work in this space is limited, and we extend it in several ways. We apply a pretrained language encoder to the task, which outperforms earlier approaches. We further experiment with shifting the training paradigm for the task from a static to a dynamic one to increase the forecast horizon. This approach shows mixed results: in a high-quality data setting, a longer average forecast horizon can be achieved at the cost of a small drop in F1; in a low-quality data setting, however, dynamic training propagates the noise and is highly detrimental to performance. 2021.emnlp-main.624 @@ -8660,7 +8660,7 @@ A Bag of Tricks for Dialogue Summarization MuhammadKhalifa MiguelBallesteros - KathleenMcKeown + KathleenMcKeown 8014–8022 Dialogue summarization comes with its own peculiar challenges as opposed to news or scientific articles summarization. In this work, we explore four different challenges of the task: handling and differentiating parts of the dialogue belonging to multiple speakers, negation understanding, reasoning about the situation, and informal language understanding. Using a pretrained sequence-to-sequence language model, we explore speaker name substitution, negation scope highlighting, multi-task learning with relevant tasks, and pretraining on in-domain data. Our experiments show that our proposed techniques indeed improve summarization performance, outperforming strong baselines. 2021.emnlp-main.631 @@ -8670,7 +8670,7 @@ Paraphrasing Compound Nominalizations - JohnLee + JohnLee Ho HungLim CarolWebster 8023–8028 @@ -8699,7 +8699,7 @@ Low-Rank Subspaces for Unsupervised Entity Linking AkhilArora - AlbertoGarcia-Duran + AlbertoGarcia-Duran RobertWest 8037–8054 Entity linking is an important problem with many applications. Most previous solutions were designed for settings where annotated training data is available, which is, however, not the case in numerous domains. We propose a light-weight and scalable entity linking method, Eigenthemes, that relies solely on the availability of entity names and a referent knowledge base. Eigenthemes exploits the fact that the entities that are truly mentioned in a document (the “gold entities”) tend to form a semantically dense subset of the set of all candidate entities in the document. Geometrically speaking, when representing entities as vectors via some given embedding, the gold entities tend to lie in a low-rank subspace of the full embedding space. Eigenthemes identifies this subspace using the singular value decomposition and scores candidate entities according to their proximity to the subspace. On the empirical front, we introduce multiple strong baselines that compare favorably to (and sometimes even outperform) the existing state of the art. Extensive experiments on benchmark datasets from a variety of real-world domains showcase the effectiveness of our approach. @@ -8753,7 +8753,7 @@ Back to the Basics: A Quantitative Analysis of Statistical and Graph-Based Term Weighting Schemes for Keyword Extraction AsahiUshio FedericoLiberatore - JoseCamacho-Collados + JoseCamacho-Collados 8089–8103 Term weighting schemes are widely used in Natural Language Processing and Information Retrieval. In particular, term weighting is the basis for keyword extraction. However, there are relatively few evaluation studies that shed light about the strengths and shortcomings of each weighting scheme. In fact, in most cases researchers and practitioners resort to the well-known tf-idf as default, despite the existence of other suitable alternatives, including graph-based models. In this paper, we perform an exhaustive and large-scale empirical comparison of both statistical and graph-based term weighting methods in the context of keyword extraction. Our analysis reveals some interesting findings such as the advantages of the less-known lexical specificity with respect to tf-idf, or the qualitative differences between statistical and graph-based methods. Finally, based on our findings we discuss and devise some suggestions for practitioners. Source code to reproduce our experimental results, including a keyword extraction library, are available in the following repository: https://github.com/asahi417/kex 2021.emnlp-main.638 @@ -8821,7 +8821,7 @@ YiHuang BuseGiledereli AbdullatifKöksal - ArzucanÖzgür + ArzucanÖzgür ElifOzkirimli 8153–8161 Multi-label text classification is a challenging task because it requires capturing label dependencies. It becomes even more challenging when class distribution is long-tailed. Resampling and re-weighting are common approaches used for addressing the class imbalance problem, however, they are not effective when there is label dependency besides class imbalance because they result in oversampling of common labels. Here, we introduce the application of balancing loss functions for multi-label text classification. We perform experiments on a general domain dataset with 90 labels (Reuters-21578) and a domain-specific dataset from PubMed with 18211 labels. We find that a distribution-balanced loss function, which inherently addresses both the class imbalance and label linkage problems, outperforms commonly used loss functions. Distribution balancing methods have been successfully used in the image recognition field. Here, we show their effectiveness in natural language processing. Source code is available at https://github.com/blessu/BalancedLossNLP. @@ -8874,7 +8874,7 @@ NathanielBerger StefanRiezler SebastianEbert - ArtemSokolov + ArtemSokolov 8216–8224 Recently more attention has been given to adversarial attacks on neural networks for natural language processing (NLP). A central research topic has been the investigation of search algorithms and search constraints, accompanied by benchmark algorithms and tasks. We implement an algorithm inspired by zeroth order optimization-based attacks and compare with the benchmark results in the TextAttack framework. Surprisingly, we find that optimization-based methods do not yield any improvement in a constrained setup and slightly benefit from approximate gradient information only in unconstrained setups where search spaces are larger. In contrast, simple heuristics exploiting nearest neighbors without querying the target function yield substantial success rates in constrained setups, and nearly full success rate in unconstrained setups, at an order of magnitude fewer queries. We conclude from these results that current TextAttack benchmark tasks are too easy and constraints are too strict, preventing meaningful research on black-box adversarial text attacks. 2021.emnlp-main.647 @@ -8885,7 +8885,7 @@ Adversarial Attacks on Knowledge Graph Embeddings via Instance Attribution Methods PeruBhardwaj - JohnKelleher + JohnKelleher LucaCostabello DeclanO’Sullivan 8225–8239 @@ -8897,7 +8897,7 @@ Locke’s Holiday: Belief Bias in Machine Reading - AndersSøgaard + AndersSøgaard 8240–8245 I highlight a simple failure mode of state-of-the-art machine reading systems: when contexts do not align with commonly shared beliefs. For example, machine reading systems fail to answer What did Elizabeth want? correctly in the context of ‘My kingdom for a cough drop, cried Queen Elizabeth.’ Biased by co-occurrence statistics in the training data of pretrained language models, systems predict my kingdom, rather than a cough drop. I argue such biases are analogous to human belief biases and present a carefully designed challenge dataset for English machine reading, called Auto-Locke, to quantify such effects. Evaluations of machine reading systems on Auto-Locke show the pervasiveness of belief bias in machine reading. 2021.emnlp-main.649 @@ -8907,8 +8907,8 @@ Sequence Length is a Domain: Length-based Overfitting in Transformer Models - DusanVaris - OndřejBojar + DusanVaris + OndřejBojar 8246–8257 Transformer-based sequence-to-sequence architectures, while achieving state-of-the-art results on a large number of NLP tasks, can still suffer from overfitting during training. In practice, this is usually countered either by applying regularization methods (e.g. dropout, L2-regularization) or by providing huge amounts of training data. Additionally, Transformer and other architectures are known to struggle when generating very long sequences. For example, in machine translation, the neural-based systems perform worse on very long sequences when compared to the preceding phrase-based translation approaches (Koehn and Knowles, 2017). We present results which suggest that the issue might also be in the mismatch between the length distributions of the training and validation data combined with the aforementioned tendency of the neural networks to overfit to the training data. We demonstrate on a simple string editing tasks and a machine translation task that the Transformer model performance drops significantly when facing sequences of length diverging from the length distribution in the training data. Additionally, we show that the observed drop in performance is due to the hypothesis length corresponding to the lengths seen by the model during training rather than the length of the input sequence. 2021.emnlp-main.650 @@ -8934,7 +8934,7 @@ Is Information Density Uniform in Task-Oriented Dialogues? MarioGiulianelli ArabellaSinclair - RaquelFernández + RaquelFernández 8271–8283 The Uniform Information Density principle states that speakers plan their utterances to reduce fluctuations in the density of the information transmitted. In this paper, we test whether, and within which contextual units this principle holds in task-oriented dialogues. We show that there is evidence supporting the principle in written dialogues where participants play a cooperative reference game as well as in spoken dialogues involving instruction giving and following. Our study underlines the importance of identifying the relevant contextual components, showing that information content increases particularly within topically and referentially related contextual units. 2021.emnlp-main.652 @@ -9095,7 +9095,7 @@ AlessandroRaganato RaúlVázquez MathiasCreutz - JörgTiedemann + JörgTiedemann 8449–8456 Zero-shot translations is a fascinating feature of Multilingual Neural Machine Translation (MNMT) systems. These MNMT models are usually trained on English-centric data, i.e. English either as the source or target language, and with a language label prepended to the input indicating the target language. However, recent work has highlighted several flaws of these models in zero-shot scenarios where language labels are ignored and the wrong language is generated or different runs show highly unstable results. In this paper, we investigate the benefits of an explicit alignment to language labels in Transformer-based MNMT models in the zero-shot context, by jointly training one cross attention head with word alignment supervision to stress the focus on the target language label. We compare and evaluate several MNMT systems on three multilingual MT benchmarks of different sizes, showing that simply supervising one cross attention head to focus both on word alignments and language labels reduces the bias towards translating into the wrong language, improving the zero-shot performance overall. Moreover, as an additional advantage, we find that our alignment supervision leads to more stable results across different training runs. 2021.emnlp-main.664 @@ -9110,7 +9110,7 @@ Lutfi KeremSenel PhilippDufter FrançoisYvon - HinrichSchütze + HinrichSchütze 8457–8469 With the advent of end-to-end deep learning approaches in machine translation, interest in word alignments initially decreased; however, they have again become a focus of research more recently. Alignments are useful for typological research, transferring formatting like markup to translated texts, and can be used in the decoding of machine translation systems. At the same time, massively multilingual processing is becoming an important NLP scenario, and pretrained language and machine translation models that are truly multilingual are proposed. However, most alignment algorithms rely on bitexts only and do not leverage the fact that many parallel corpora are multiparallel. In this work, we exploit the multiparallelity of corpora by representing an initial set of bilingual alignments as a graph and then predicting additional edges in the graph. We present two graph algorithms for edge prediction: one inspired by recommender systems and one based on network link prediction. Our experimental results show absolute improvements in F1 of up to 28% over the baseline bilingual word aligner in different datasets. 2021.emnlp-main.665 @@ -9123,7 +9123,7 @@ EvaHasler TobiasDomhan JonayTrenous - KeTran + KeTran BillByrne FelixHieber 8470–8477 @@ -9148,7 +9148,7 @@ Effective Fine-Tuning Methods for Cross-lingual Adaptation TaoYu - ShafiqJoty + ShafiqJoty 8492–8501 Large scale multilingual pre-trained language models have shown promising results in zero- and few-shot cross-lingual tasks. However, recent studies have shown their lack of generalizability when the languages are structurally dissimilar. In this work, we propose a novel fine-tuning method based on co-training that aims to learn more generalized semantic equivalences as a complementary to multilingual language modeling using the unlabeled data in the target language. We also propose an adaption method based on contrastive learning to better capture the semantic relationship in the parallel data, when a few translation pairs are available. To show our method’s effectiveness, we conduct extensive experiments on cross-lingual inference and review classification tasks across various languages. We report significant gains compared to directly fine-tuning multilingual pre-trained models and other semi-supervised alternatives. 2021.emnlp-main.668 @@ -9158,8 +9158,8 @@ Rethinking Data Augmentation for Low-Resource Neural Machine Translation: A Multi-Task Learning Approach - Víctor M.Sánchez-Cartagena - MiquelEsplà-Gomis + Víctor M.Sánchez-Cartagena + MiquelEsplà-Gomis Juan AntonioPérez-Ortiz FelipeSánchez-Martínez 8502–8516 @@ -9195,7 +9195,7 @@ Discrete and Soft Prompting for Multilingual Models MengjieZhao - HinrichSchütze + HinrichSchütze 8547–8555 It has been shown for English that discrete and soft prompting perform strongly in few-shot learning with pretrained language models (PLMs). In this paper, we show that discrete and soft prompting perform better than finetuning in multilingual cases: Crosslingual transfer and in-language training of multilingual natural language inference. For example, with 48 English training examples, finetuning obtains 33.74% accuracy in crosslingual transfer, barely surpassing the majority baseline (33.33%). In contrast, discrete and soft prompting outperform finetuning, achieving 36.43% and 38.79%. We also demonstrate good performance of prompting with training data in multiple languages other than English. 2021.emnlp-main.672 @@ -9235,7 +9235,7 @@ RudraMurthy SamarthBharadwaj KarthikSankaranarayanan - PushpakBhattacharyya + PushpakBhattacharyya 8584–8595 We explore the impact of leveraging the relatedness of languages that belong to the same family in NLP models using multilingual fine-tuning. We hypothesize and validate that multilingual fine-tuning of pre-trained language models can yield better performance on downstream NLP applications, compared to models fine-tuned on individual languages. A first of its kind detailed study is presented to track performance change as languages are added to a base language in a graded and greedy (in the sense of best boost of performance) manner; which reveals that careful selection of subset of related languages can significantly improve performance than utilizing all related languages. The Indo-Aryan (IA) language family is chosen for the study, the exact languages being Bengali, Gujarati, Hindi, Marathi, Oriya, Punjabi and Urdu. The script barrier is crossed by simple rule-based transliteration of the text of all languages to Devanagari. Experiments are performed on mBERT, IndicBERT, MuRIL and two RoBERTa-based LMs, the last two being pre-trained by us. Low resource languages, such as Oriya and Punjabi, are found to be the largest beneficiaries of multilingual fine-tuning. Textual Entailment, Entity Classification, Section Title Prediction, tasks of IndicGLUE and POS tagging form our test bed. Compared to monolingual fine tuning we get relative performance improvement of up to 150% in the downstream tasks. The surprise take-away is that for any language there is a particular combination of other languages which yields the best performance, and any additional language is in fact detrimental. 2021.emnlp-main.675 @@ -9248,8 +9248,8 @@ Comparing Feature-Engineering and Feature-Learning Approaches for Multilingual Translationese Classification DariaPylypenko KwabenaAmponsah-Kaakyire - KoelDutta Chowdhury - Josefvan Genabith + KoelDutta Chowdhury + Josefvan Genabith CristinaEspaña-Bonet 8596–8611 Traditional hand-crafted linguistically-informed features have often been used for distinguishing between translated and original non-translated texts. By contrast, to date, neural architectures without manual feature engineering have been less explored for this task. In this work, we (i) compare the traditional feature-engineering-based approach to the feature-learning-based one and (ii) analyse the neural architectures in order to investigate how well the hand-crafted features explain the variance in the neural models’ predictions. We use pre-trained neural word embeddings, as well as several end-to-end neural architectures in both monolingual and multilingual settings and compare them to feature-engineering-based SVM classifiers. We show that (i) neural architectures outperform other approaches by more than 20 accuracy points, with the BERT-based model performing the best in both the monolingual and multilingual settings; (ii) while many individual hand-crafted translationese features correlate with neural model predictions, feature importance analysis shows that the most important features for neural and classical architectures differ; and (iii) our multilingual experiments provide empirical evidence for translationese universals across languages. @@ -9286,9 +9286,9 @@ RanWang Xi’aoSu SiyuLong - XinyuDai + XinyuDai ShujianHuang - JiajunChen + JiajunChen 8633–8646 Large-scale multi-label text classification (LMTC) tasks often face long-tailed label distributions, where many labels have few or even no training instances. Although current methods can exploit prior knowledge to handle these few/zero-shot labels, they neglect the meta-knowledge contained in the dataset that can guide models to learn with few samples. In this paper, for the first time, this problem is addressed from a meta-learning perspective. However, the simple extension of meta-learning approaches to multi-label classification is sub-optimal for LMTC tasks due to long-tailed label distribution and coexisting of few- and zero-shot scenarios. We propose a meta-learning approach named META-LMTC. Specifically, it constructs more faithful and more diverse tasks according to well-designed sampling strategies and directly incorporates the objective of adapting to new low-resource tasks into the meta-learning phase. Extensive experiments show that META-LMTC achieves state-of-the-art performance against strong baselines and can still enhance powerful BERTlike models. 2021.emnlp-main.679 @@ -9337,7 +9337,7 @@ <fixed-case>ST</fixed-case>a<fixed-case>CK</fixed-case>: Sentence Ordering with Temporal Commonsense Knowledge DeepanwayGhosal NavonilMajumder - RadaMihalcea + RadaMihalcea SoujanyaPoria 8676–8686 Sentence order prediction is the task of finding the correct order of sentences in a randomly ordered document. Correctly ordering the sentences requires an understanding of coherence with respect to the chronological sequence of events described in the text. Document-level contextual understanding and commonsense knowledge centered around these events are often essential in uncovering this coherence and predicting the exact chronological order. In this paper, we introduce STaCK — a framework based on graph neural networks and temporal commonsense knowledge to model global information and predict the relative order of sentences. Our graph network accumulates temporal evidence using knowledge of ‘past’ and ‘future’ and formulates sentence ordering as a constrained edge classification problem. We report results on five different datasets, and empirically show that the proposed method is naturally suitable for order prediction. The implementation of this work is available at: https://github.com/declare-lab/sentence-ordering. @@ -9348,7 +9348,7 @@ Preventing Author Profiling through Zero-Shot Multilingual Back-Translation - David IfeoluwaAdelani + David IfeoluwaAdelani MiaoranZhang XiaoyuShen AliDavody @@ -9365,7 +9365,7 @@ <fixed-case>C</fixed-case>ode<fixed-case>T</fixed-case>5: Identifier-aware Unified Pre-trained Encoder-Decoder Models for Code Understanding and Generation YueWang WeishiWang - ShafiqJoty + ShafiqJoty Steven C.H.Hoi 8696–8708 Pre-trained models for Natural Languages (NL) like BERT and GPT have been recently shown to transfer well to Programming Languages (PL) and largely benefit a broad set of code-related tasks. Despite their success, most current methods either rely on an encoder-only (or decoder-only) pre-training that is suboptimal for generation (resp. understanding) tasks or process the code snippet in the same way as NL, neglecting the special characteristics of PL such as token types. We present CodeT5, a unified pre-trained encoder-decoder Transformer model that better leverages the code semantics conveyed from the developer-assigned identifiers. Our model employs a unified framework to seamlessly support both code understanding and generation tasks and allows for multi-task learning. Besides, we propose a novel identifier-aware pre-training task that enables the model to distinguish which code tokens are identifiers and to recover them when they are masked. Furthermore, we propose to exploit the user-written code comments with a bimodal dual generation task for better NL-PL alignment. Comprehensive experiments show that CodeT5 significantly outperforms prior methods on understanding tasks such as code defect detection and clone detection, and generation tasks across various directions including PL-NL, NL-PL, and PL-PL. Further analysis reveals that our model can better capture semantic information from code. Our code and pre-trained models are released at https://github.com/salesforce/CodeT5. @@ -9525,7 +9525,7 @@ <fixed-case>B</fixed-case>elief<fixed-case>B</fixed-case>ank: Adding Memory to a Pre-Trained Language Model for a Systematic Notion of Belief NoraKassner OyvindTafjord - HinrichSchütze + HinrichSchütze PeterClark 8849–8861 Although pretrained language models (PTLMs) contain significant amounts of world knowledge, they can still produce inconsistent answers to questions when probed, even after specialized training. As a result, it can be hard to identify what the model actually “believes” about the world, making it susceptible to inconsistent behavior and simple errors. Our goal is to reduce these problems. Our approach is to embed a PTLM in a broader system that also includes an evolving, symbolic memory of beliefs – a BeliefBank – that records but then may modify the raw PTLM answers. We describe two mechanisms to improve belief consistency in the overall system. First, a reasoning component – a weighted MaxSAT solver – revises beliefs that significantly clash with others. Second, a feedback component issues future queries to the PTLM using known beliefs as context. We show that, in a controlled experimental setting, these two mechanisms result in more consistent beliefs in the overall system, improving both the accuracy and consistency of its answers over time. This is significant as it is a first step towards PTLM-based architectures with a systematic notion of belief, enabling them to construct a more coherent picture of the world, and improve over time without model retraining. @@ -9549,7 +9549,7 @@ <fixed-case>I</fixed-case>ndo<fixed-case>NLG</fixed-case>: Benchmark and Resources for Evaluating <fixed-case>I</fixed-case>ndonesian Natural Language Generation SamuelCahyawijaya - Genta IndraWinata + Genta IndraWinata BryanWilie KarissaVincentio XiaohongLi @@ -9609,7 +9609,7 @@ What happens if you treat ordinal ratings as interval data? Human evaluations in <fixed-case>NLP</fixed-case> are even more under-powered than you think - David M.Howcroft + David M.Howcroft VerenaRieser 8932–8939 Previous work has shown that human evaluations in NLP are notoriously under-powered. Here, we argue that there are two common factors which make this problem even worse: NLP studies usually (a) treat ordinal data as interval data and (b) operate under high variance settings while the differences they are hoping to detect are often subtle. We demonstrate through simulation that ordinal mixed effects models are better able to detect small differences between models, especially in high variance settings common in evaluations of generated texts. We release tools for researchers to conduct their own power analysis and test their assumptions. We also make recommendations for improving statistical power. @@ -9704,7 +9704,7 @@ Cross-Domain Label-Adaptive Stance Detection MomchilHardalov ArnavArora - PreslavNakov + PreslavNakov IsabelleAugenstein 9011–9028 Stance detection concerns the classification of a writer’s viewpoint towards a target. There are different task variants, e.g., stance of a tweet vs. a full article, or stance with respect to a claim vs. an (implicit) topic. Moreover, task definitions vary, which includes the label inventory, the data collection, and the annotation protocol. All these aspects hinder cross-domain studies, as they require changes to standard domain adaptation approaches. In this paper, we perform an in-depth analysis of 16 stance detection datasets, and we explore the possibility for cross-domain learning from them. Moreover, we propose an end-to-end unsupervised framework for out-of-domain prediction of unseen, user-defined labels. In particular, we combine domain adaptation techniques such as mixture of experts and domain-adversarial training with label embeddings, and we demonstrate sizable performance gains over strong baselines, both (i) in-domain, i.e., for seen targets, and (ii) out-of-domain, i.e., for unseen targets. Finally, we perform an exhaustive analysis of the cross-domain results, and we highlight the important factors influencing the model performance. @@ -9730,7 +9730,7 @@ Distilling Relation Embeddings from Pretrained Language Models AsahiUshio - JoseCamacho-Collados + JoseCamacho-Collados StevenSchockaert 9044–9062 Pre-trained language models have been found to capture a surprisingly rich amount of lexical knowledge, ranging from commonsense properties of everyday concepts to detailed factual knowledge about named entities. Among others, this makes it possible to distill high-quality word vectors from pre-trained language models. However, it is currently unclear to what extent it is possible to distill relation embeddings, i.e. vectors that characterize the relationship between two words. Such relation embeddings are appealing because they can, in principle, encode relational knowledge in a more fine-grained way than is possible with knowledge graphs. To obtain relation embeddings from a pre-trained language model, we encode word pairs using a (manually or automatically generated) prompt, and we fine-tune the language model such that relationally similar word pairs yield similar output vectors. We find that the resulting relation embeddings are highly competitive on analogy (unsupervised) and relation classification (supervised) benchmarks, even without any task-specific fine-tuning. Source code to reproduce our experimental results and the model checkpoints are available in the following repository: https://github.com/asahi417/relbert @@ -9742,7 +9742,7 @@ Avoiding Inference Heuristics in Few-shot Prompt-based Finetuning PrasetyaUtama - Nafise SadatMoosavi + Nafise SadatMoosavi VictorSanh IrynaGurevych 9063–9074 @@ -9755,7 +9755,7 @@ A Differentiable Relaxation of Graph Segmentation and Alignment for <fixed-case>AMR</fixed-case> Parsing ChunchuanLyu - Shay B.Cohen + Shay B.Cohen IvanTitov 9075–9091 Abstract Meaning Representations (AMR) are a broad-coverage semantic formalism which represents sentence meaning as a directed acyclic graph. To train most AMR parsers, one needs to segment the graph into subgraphs and align each such subgraph to a word in a sentence; this is normally done at preprocessing, relying on hand-crafted rules. In contrast, we treat both alignment and segmentation as latent variables in our model and induce them as part of end-to-end training. As marginalizing over the structured latent variables is infeasible, we use the variational autoencoding framework. To ensure end-to-end differentiable optimization, we introduce a differentiable relaxation of the segmentation and alignment problems. We observe that inducing segmentation yields substantial gains over using a ‘greedy’ segmentation heuristic. The performance of our method also approaches that of a model that relies on the segmentation rules of Lyu and Titov (2018), which were hand-crafted to handle individual AMR constructions. @@ -9782,7 +9782,7 @@ SouravDutta HaythamAssem TheodorusFransen - John P.McCrae + John P.McCrae 9099–9113 Multilingual sentence embeddings capture rich semantic information not only for measuring similarity between texts but also for catering to a broad range of downstream cross-lingual NLP tasks. State-of-the-art multilingual sentence embedding models require large parallel corpora to learn efficiently, which confines the scope of these models. In this paper, we propose a novel sentence embedding framework based on an unsupervised loss function for generating effective multilingual sentence embeddings, eliminating the need for parallel corpora. We capture semantic similarity and relatedness between sentences using a multi-task loss function for training a dual encoder model mapping different languages onto the same vector space. We demonstrate the efficacy of an unsupervised as well as a weakly supervised variant of our framework on STS, BUCC and Tatoeba benchmark tasks. The proposed unsupervised sentence embedding framework outperforms even supervised state-of-the-art methods for certain under-resourced languages on the Tatoeba dataset and on a monolingual benchmark. Further, we show enhanced zero-shot learning capabilities for more than 30 languages, with the model being trained on only 13 languages. Our model can be extended to a wide range of languages from any language family, as it overcomes the requirement of parallel corpora for training. 2021.emnlp-main.716 @@ -9792,7 +9792,7 @@ <fixed-case>NB</fixed-case>-<fixed-case>MLM</fixed-case>: Efficient Domain Adaptation of Masked Language Models for Sentiment Analysis - NikolayArefyev + NikolayArefyev DmitriiKharchev ArtemShelmanov 9114–9124 @@ -9806,7 +9806,7 @@ Revisiting Self-training for Few-shot Learning of Language Model YimingChen YanZhang - ChenZhang + ChenZhang GrandeeLee RanCheng HaizhouLi @@ -9937,7 +9937,7 @@ Towards Label-Agnostic Emotion Embeddings - SvenBuechel + SvenBuechel LuiseModersohn UdoHahn 9231–9249 @@ -9974,7 +9974,7 @@ <fixed-case>PASTE</fixed-case>: A Tagging-Free Decoding Framework Using Pointer Networks for Aspect Sentiment Triplet Extraction RajdeepMukherjee - TapasNayak + TapasNayak YashButala SourangshuBhattacharya PawanGoyal @@ -10038,7 +10038,7 @@ Looking for Confirmations: An Effective and Human-Like Visual Dialogue Strategy AlbertoTestoni - RaffaellaBernardi + RaffaellaBernardi 9330–9338 Generating goal-oriented questions in Visual Dialogue tasks is a challenging and longstanding problem. State-Of-The-Art systems are shown to generate questions that, although grammatically correct, often lack an effective strategy and sound unnatural to humans. Inspired by the cognitive literature on information search and cross-situational word learning, we design Confirm-it, a model based on a beam search re-ranking algorithm that guides an effective goal-oriented strategy by asking questions that confirm the model’s conjecture about the referent. We take the GuessWhat?! game as a case-study. We show that dialogues generated by Confirm-it are more natural and effective than beam search decoding without re-ranking. 2021.emnlp-main.736 @@ -10051,7 +10051,7 @@ YingzhuZhao ChongjiaNi Cheung-ChiLeung - ShafiqJoty + ShafiqJoty Eng SiongChng BinMa 9339–9349 @@ -10136,7 +10136,7 @@ Enriching and Controlling Global Semantics for Text Summarization ThongNguyen - Anh TuanLuu + Anh TuanLuu TrucLu ThoQuan 9443–9456 @@ -10206,7 +10206,7 @@ FuliLuo ZhiyuanZhang ChuanqiTan - BaobaoChang + BaobaoChang SongfangHuang FeiHuang 9514–9528 @@ -10306,7 +10306,7 @@ ChenZhao ChenyanXiong JordanBoyd-Graber - HalDaumé III + HalDaumé III 9612–9622 Open-domain question answering answers a question based on evidence retrieved from a large corpus. State-of-the-art neural approaches require intermediate evidence annotations for training. However, such intermediate annotations are expensive, and methods that rely on them cannot transfer to the more common setting, where only question–answer pairs are available. This paper investigates whether models can learn to find evidence from a large corpus, with only distant supervision from answer labels for model training, thereby generating no additional annotation cost. We introduce a novel approach (DistDR) that iteratively improves over a weak retriever by alternately finding evidence from the up-to-date model and encouraging the model to learn the most likely evidence. Without using any evidence labels, DistDR is on par with fully-supervised state-of-the-art methods on both multi-hop and single-hop QA benchmarks. Our analysis confirms that DistDR finds more accurate evidence over iterations, which leads to model improvements. The code is available at https://github.com/henryzhao5852/DistDR. 2021.emnlp-main.756 @@ -10340,7 +10340,7 @@ Numerical reasoning in machine reading comprehension tasks: are we there yet? HadeelAl-Negheimish - PranavaMadhyastha + PranavaMadhyastha AlessandraRusso 9643–9649 Numerical reasoning based machine reading comprehension is a task that involves reading comprehension along with using arithmetic operations such as addition, subtraction, sorting and counting. The DROP benchmark (Dua et al., 2019) is a recent dataset that has inspired the design of NLP models aimed at solving this task. The current standings of these models in the DROP leaderboard, over standard metrics, suggests that the models have achieved near-human performance. However, does this mean that these models have learned to reason? In this paper, we present a controlled study on some of the top-performing model architectures for the task of numerical reasoning. Our observations suggest that the standard metrics are incapable of measuring progress towards such tasks. @@ -10594,7 +10594,7 @@ JiafengGuo ZixuanLi XiaolongJin - XueqiCheng + XueqiCheng 9869–9878 Scripts are structured sequences of events together with the participants, which are extracted from the texts. Script event prediction aims to predict the subsequent event given the historical events in the script. Two kinds of information facilitate this task, namely, the event-level information and the script-level information. At the event level, existing studies view an event as a verb with its participants, while neglecting other useful properties, such as the state of the participants. At the script level, most existing studies only consider a single event sequence corresponding to one common protagonist. In this paper, we propose a Transformer-based model, called MCPredictor, which integrates deep event-level and script-level information for script event prediction. At the event level, MCPredictor utilizes the rich information in the text to obtain more comprehensive event semantic representations. At the script-level, it considers multiple event sequences corresponding to different participants of the subsequent event. The experimental results on the widely-used New York Times corpus demonstrate the effectiveness and superiority of the proposed model. 2021.emnlp-main.777 @@ -10663,7 +10663,7 @@ Identifying Morality Frames in Political Tweets using Relational Learning ShamikRoy - Maria LeonorPacheco + Maria LeonorPacheco DanGoldwasser 9939–9958 Extracting moral sentiment from text is a vital component in understanding public opinion, social movements, and policy decisions. The Moral Foundation Theory identifies five moral foundations, each associated with a positive and negative polarity. However, moral sentiment is often motivated by its targets, which can correspond to individuals or collective entities. In this paper, we introduce morality frames, a representation framework for organizing moral attitudes directed at different entities, and come up with a novel and high-quality annotated dataset of tweets written by US politicians. Then, we propose a relational learning model to predict moral attitudes towards entities and moral foundations jointly. We do qualitative and quantitative evaluations, showing that moral sentiment towards entities differs highly across political ideologies. @@ -10715,7 +10715,7 @@ JiseonKim EldenGriggs In SongKim - AliceOh + AliceOh 10048–10064 Bill writing is a critical element of representative democracy. However, it is often overlooked that most legislative bills are derived, or even directly copied, from other bills. Despite the significance of bill-to-bill linkages for understanding the legislative process, existing approaches fail to address semantic similarities across bills, let alone reordering or paraphrasing which are prevalent in legal document writing. In this paper, we overcome these limitations by proposing a 5-class classification task that closely reflects the nature of the bill generation process. In doing so, we construct a human-labeled dataset of 4,721 bill-to-bill relationships at the subsection-level and release this annotated dataset to the research community. To augment the dataset, we generate synthetic data with varying degrees of similarity, mimicking the complex bill writing process. We use BERT variants and apply multi-stage training, sequentially fine-tuning our models with synthetic and human-labeled datasets. We find that the predictive performance significantly improves when training with both human-labeled and synthetic data. Finally, we apply our trained model to infer section- and bill-level similarities. Our analysis shows that the proposed methodology successfully captures the similarities across legal documents at various levels of aggregation. 2021.emnlp-main.787 @@ -10743,7 +10743,7 @@ Pranav JeevanP PrerakGandhi DipteshKanojia - PushpakBhattacharyya + PushpakBhattacharyya 10073–10079 Computational Humour (CH) has attracted the interest of Natural Language Processing and Computational Linguistics communities. Creating datasets for automatic measurement of humour quotient is difficult due to multiple possible interpretations of the content. In this work, we create a multi-modal humour-annotated dataset (~40 hours) using stand-up comedy clips. We devise a novel scoring mechanism to annotate the training data with a humour quotient score using the audience’s laughter. The normalized duration (laughter duration divided by the clip duration) of laughter in each clip is used to compute this humour coefficient score on a five-point scale (0-4). This method of scoring is validated by comparing with manually annotated scores, wherein a quadratic weighted kappa of 0.6 is obtained. We use this dataset to train a model that provides a ‘funniness’ score, on a five-point scale, given the audio and its corresponding text. We compare various neural language models for the task of humour-rating and achieve an accuracy of 0.813 in terms of Quadratic Weighted Kappa (QWK). Our ‘Open Mic’ dataset is released for further research along with the code. 2021.emnlp-main.789 @@ -10875,7 +10875,7 @@ RakshaShenoy NicoHerbig AntonioKrüger - Josefvan Genabith + Josefvan Genabith 10173–10185 Compared to fully manual translation, post-editing (PE) machine translation (MT) output can save time and reduce errors. Automatic word-level quality estimation (QE) aims to predict the correctness of words in MT output and holds great promise to aid PE by flagging problematic output. Quality of QE is crucial, as incorrect QE might lead to translators missing errors or wasting time on already correct MT output. Achieving accurate automatic word-level QE is very hard, and it is currently not known (i) at what quality threshold QE is actually beginning to be useful for human PE, and (ii), how to best present word-level QE information to translators. In particular, should word-level QE visualization indicate uncertainty of the QE model or not? In this paper, we address both research questions with real and simulated word-level QE, visualizations, and user studies, where time, subjective ratings, and quality of the final translations are assessed. Results show that current word-level QE models are not yet good enough to support PE. Instead, quality levels of > 80% F1 are required. For helpful quality levels, a visualization reflecting the uncertainty of the QE model is preferred. Our analysis further shows that speed gains achieved through QE are not merely a result of blindly trusting the QE system, but that the quality of the final translations also improves. The threshold results from the paper establish a quality goal for future word-level QE research. 2021.emnlp-main.799 @@ -10901,7 +10901,7 @@ Neural Machine Translation Quality and Post-Editing Performance VilémZouhar MartinPopel - OndřejBojar + OndřejBojar AlešTamchyna 10204–10214 We test the natural expectation that using MT in professional translation saves human processing time. The last such study was carried out by Sanchez-Torron and Koehn (2016) with phrase-based MT, artificially reducing the translation quality. In contrast, we focus on neural MT (NMT) of high quality, which has become the state-of-the-art approach since then and also got adopted by most translation companies. Through an experimental study involving over 30 professional translators for English -> Czech translation, we examine the relationship between NMT performance and post-editing time and quality. Across all models, we found that better MT systems indeed lead to fewer changes in the sentences in this industry setting. The relation between system quality and post-editing time is however not straightforward and, contrary to the results on phrase-based MT, BLEU is definitely not a stable predictor of the time or final output quality. @@ -10946,7 +10946,7 @@ <fixed-case>M</fixed-case>easuring Association Between Labels and Free-Text Rationales SarahWiegreffe AnaMarasović - Noah A.Smith + Noah A.Smith 10266–10284 In interpretable NLP, we require faithful rationales that reflect the model’s decision-making process for an explained instance. While prior work focuses on extractive rationales (a subset of the input words), we investigate their less-studied counterpart: free-text natural language rationales. We demonstrate that *pipelines*, models for faithful rationalization on information-extraction style tasks, do not work as well on “reasoning” tasks requiring free-text rationales. We turn to models that *jointly* predict and rationalize, a class of widely used high-performance models for free-text rationalization. We investigate the extent to which the labels and rationales predicted by these models are associated, a necessary property of faithful explanation. Via two tests, *robustness equivalence* and *feature importance agreement*, we find that state-of-the-art T5-based joint models exhibit desirable properties for explaining commonsense question-answering and natural language inference, indicating their potential for producing faithful free-text rationales. 2021.emnlp-main.804 @@ -10985,8 +10985,8 @@ Rationales for Sequential Predictions KeyonVafa YuntianDeng - DavidBlei - AlexanderRush + DavidBlei + AlexanderRush 10314–10332 Sequence models are a critical component of modern NLP systems, but their predictions are difficult to explain. We consider model explanations though rationales, subsets of context that can explain individual model predictions. We find sequential rationales by solving a combinatorial optimization: the best rationale is the smallest subset of input tokens that would predict the same output as the full sequence. Enumerating all subsets is intractable, so we propose an efficient greedy algorithm to approximate this objective. The algorithm, which is called greedy rationalization, applies to any model. For this approach to be effective, the model should form compatible conditional distributions when making predictions on incomplete subsets of the context. This condition can be enforced with a short fine-tuning step. We study greedy rationalization on language modeling and machine translation. Compared to existing baselines, greedy rationalization is best at optimizing the sequential objective and provides the most faithful rationales. On a new dataset of annotated sequential rationales, greedy rationales are most similar to human rationales. 2021.emnlp-main.807 @@ -11043,7 +11043,7 @@ GaetanoRossiello NandanaMihindukulasooriya SugatoBagchi - AlfioGliozzo + AlfioGliozzo 10379–10394 Noun phrases and Relation phrases in open knowledge graphs are not canonicalized, leading to an explosion of redundant and ambiguous subject-relation-object triples. Existing approaches to solve this problem take a two-step approach. First, they generate embedding representations for both noun and relation phrases, then a clustering algorithm is used to group them using the embeddings as features. In this work, we propose Canonicalizing Using Variational AutoEncoders and Side Information (CUVA), a joint model to learn both embeddings and cluster assignments in an end-to-end approach, which leads to a better vector representation for the noun and relation phrases. Our evaluation over multiple benchmarks shows that CUVA outperforms the existing state-of-the-art approaches. Moreover, we introduce CanonicNell, a novel dataset to evaluate entity canonicalization systems. 2021.emnlp-main.811 @@ -11089,7 +11089,7 @@ AhmedEl-Kishky AdithyaRenduchintala JamesCross - FranciscoGuzmán + FranciscoGuzmán PhilippKoehn 10424–10430 Cross-lingual named-entity lexica are an important resource to multilingual NLP tasks such as machine translation and cross-lingual wikification. While knowledge bases contain a large number of entities in high-resource languages such as English and French, corresponding entities for lower-resource languages are often missing. To address this, we propose Lexical-Semantic-Phonetic Align (LSP-Align), a technique to automatically mine cross-lingual entity lexica from mined web data. We demonstrate LSP-Align outperforms baselines at extracting cross-lingual entity pairs and mine 164 million entity pairs from 120 different languages aligned with English. We release these cross-lingual entity pairs along with the massively multilingual tagged named entity corpus as a resource to the NLP community. @@ -11207,7 +11207,7 @@ A Root of a Problem: Optimizing Single-Root Dependency Parsing MilošStanojević - Shay B.Cohen + Shay B.Cohen 10540–10557 We describe two approaches to single-root dependency parsing that yield significant speed ups in such parsing. One approach has been previously used in dependency parsers in practice, but remains undocumented in the parsing literature, and is considered a heuristic. We show that this approach actually finds the optimal dependency tree. The second approach relies on simple reweighting of the inference graph being input to the dependency parser and has an optimal running time. Here, we again show that this approach is fully correct and identifies the highest-scoring parse tree. Our experiments demonstrate a manyfold speed up compared to a previous graph-based state-of-the-art parser without any loss in accuracy or optimality. 2021.emnlp-main.823 @@ -11242,7 +11242,7 @@ A New Representation for Span-based <fixed-case>CCG</fixed-case> Parsing - YoshihideKato + YoshihideKato ShigekiMatsubara 10579–10584 This paper proposes a new representation for CCG derivations. CCG derivations are represented as trees whose nodes are labeled with categories strictly restricted by CCG rule schemata. This characteristic is not suitable for span-based parsing models because they predict node labels independently. In other words, span-based models may generate invalid CCG derivations that violate the rule schemata. Our proposed representation decomposes CCG derivations into several independent pieces and prevents the span-based parsing models from violating the schemata. Our experimental result shows that an off-the-shelf span-based parser with our representation is comparable with previous CCG parsers. @@ -11279,7 +11279,7 @@ FrançoisLagunas EllaCharlaix VictorSanh - AlexanderRush + AlexanderRush 10619–10629 Pre-training has improved model accuracy for both classification and generation tasks at the cost of introducing much larger and slower models. Pruning methods have proven to be an effective way of reducing model size, whereas distillation methods are proven for speeding up inference. We introduce a block pruning approach targeting both small and fast models. Our approach extends structured methods by considering blocks of any size and integrates this structure into the movement pruning paradigm for fine-tuning. We find that this approach learns to prune out full components of the underlying model, such as attention heads. Experiments consider classification and generation tasks, yielding among other results a pruned model that is a 2.4x faster, 74% smaller BERT on SQuAD v1, with a 1% drop on F1, competitive both with distilled models in speed and pruned models in size. 2021.emnlp-main.829 @@ -11297,7 +11297,7 @@ NikolaosPappas YiMao WeizhuChen - Noah A.Smith + Noah A.Smith 10630–10643 Transformers have outperformed recurrent neural networks (RNNs) in natural language generation. But this comes with a signifi- cant computational cost, as the attention mechanism’s complexity scales quadratically with sequence length. Efficient transformer variants have received increasing interest in recent works. Among them, a linear-complexity recurrent variant has proven well suited for autoregressive generation. It approximates the softmax attention with randomized or heuristic feature maps, but can be difficult to train and may yield suboptimal accuracy. This work aims to convert a pretrained transformer into its efficient recurrent counterpart, improving efficiency while maintaining accuracy. Specifically, we propose a swap-then-finetune procedure: in an off-the-shelf pretrained transformer, we replace the softmax attention with its linear-complexity recurrent alternative and then finetune. With a learned feature map, our approach provides an improved tradeoff between efficiency and accuracy over the standard transformer and other recurrent variants. We also show that the finetuning process has lower training cost relative to training these recurrent variants from scratch. As many models for natural language tasks are increasingly dependent on large-scale pretrained transformers, this work presents a viable approach to improving inference efficiency without repeating the expensive pretraining process. 2021.emnlp-main.830 @@ -11336,7 +11336,7 @@ <fixed-case>I</fixed-case>ndo<fixed-case>BERT</fixed-case>weet: A Pretrained Language Model for <fixed-case>I</fixed-case>ndonesian <fixed-case>T</fixed-case>witter with Effective Domain-Specific Vocabulary Initialization FajriKoto Jey HanLau - TimothyBaldwin + TimothyBaldwin 10660–10668 We present IndoBERTweet, the first large-scale pretrained model for Indonesian Twitter that is trained by extending a monolingually-trained Indonesian BERT model with additive domain-specific vocabulary. We focus in particular on efficient model adaptation under vocabulary mismatch, and benchmark different ways of initializing the BERT embedding layer for new word types. We find that initializing with the average BERT subword embedding makes pretraining five times faster, and is more effective than proposed methods for vocabulary adaptation in terms of extrinsic evaluation over seven Twitter-based datasets. 2021.emnlp-main.833 @@ -11428,7 +11428,7 @@ Mohammad JavadHosseini SanderBijl de Vroe MarkJohnson - MarkSteedman + MarkSteedman 10758–10768 Drawing inferences between open-domain natural language predicates is a necessity for true language understanding. There has been much progress in unsupervised learning of entailment graphs for this purpose. We make three contributions: (1) we reinterpret the Distributional Inclusion Hypothesis to model entailment between predicates of different valencies, like DEFEAT(Biden, Trump) entails WIN(Biden); (2) we actualize this theory by learning unsupervised Multivalent Entailment Graphs of open-domain predicates; and (3) we demonstrate the capabilities of these graphs on a novel question answering task. We show that directional entailment is more helpful for inference than non-directional similarity on questions of fine-grained semantics. We also show that drawing on evidence across valencies answers more questions than by using only the same valency evidence. 2021.emnlp-main.840 @@ -11543,7 +11543,7 @@ ZuchaoLi KevinParnow MasaoUtiyama - EiichiroSumita + EiichiroSumita HaiZhao 1–10 In this paper, we present MiSS, an assistant for multi-style simultaneous translation. Our proposed translation system has five key features: highly accurate translation, simultaneous translation, translation for multiple text styles, back-translation for translation quality evaluation, and grammatical error correction. With this system, we aim to provide a complete translation experience for machine translation users. Our design goals are high translation accuracy, real-time translation, flexibility, and measurable translation quality. Compared with the free commercial translation systems commonly used, our translation assistance system regards the machine translation application as a more complete and fully-featured tool for users. By incorporating additional features and giving the user better control over their experience, we improve translation efficiency and performance. Additionally, our assistant system combines machine translation, grammatical error correction, and interactive edits, and uses a crowdsourcing mode to collect more data for further training to improve both the machine translation and grammatical error correction models. A short video demonstrating our system is available at https://www.youtube.com/watch?v=ZGCo7KtRKd8. @@ -11580,7 +11580,7 @@ <fixed-case>T</fixed-case>rans<fixed-case>I</fixed-case>ns: Document Translation with Markup Reinsertion JörgSteffen - Josefvan Genabith + Josefvan Genabith 28–34 For many use cases, it is required that MT does not just translate raw text, but complex formatted documents (e.g. websites, slides, spreadsheets) and the result of the translation should reflect the formatting. This is challenging, as markup can be nested, apply to spans contiguous in source but non-contiguous in target etc. Here we present TransIns, a system for non-plain text document translation that builds on the Okapi framework and MT models trained with Marian NMT. We develop, implement and evaluate different strategies for reinserting markup into translated sentences using token alignments between source and target sentences. We propose a simple and effective strategy that compiles down all markup to single source tokens and transfers them to aligned target tokens. A first evaluation shows that this strategy yields highly accurate markup in the translated documents that outperforms the markup quality found in documents translated with popular translation services. We release TransIns under the MIT License as open-source software on https://github.com/DFKI-MLT/TransIns. An online demonstrator is available at https://transins.dfki.de. 2021.emnlp-demo.4 @@ -11591,7 +11591,7 @@ <fixed-case>ET</fixed-case>: A Workstation for Querying, Editing and Evaluating Annotated Corpora Elvisde Souza - CláudiaFreitas + CláudiaFreitas 35–41 In this paper we explore the functionalities of ET, a suite designed to support linguistic research and natural language processing tasks using corpora annotated in the CoNLL-U format. These goals are achieved by two integrated environments – Interrogatório, an environment for querying and editing annotated corpora, and Julgamento, an environment for assessing their quality. ET is open-source, built on different Python Web technologies and has Web demonstrations available on-line. ET has been intensively used in our research group for over two years, being the chosen framework for several linguistic and NLP-related studies conducted by its researchers. 2021.emnlp-demo.5 @@ -11696,10 +11696,10 @@ Semantic Context Path Labeling for Semantic Exploration of User Reviews - SalahAït-Mokhtar + SalahAït-Mokhtar CarolineBrun YvesHoppenot - AgnesSandor + AgnesSandor 106–113 In this paper we present a prototype demonstrator showcasing a novel method to perform semantic exploration of user reviews. The system enables effective navigation in a rich contextual semantic schema with a large number of structured classes indicating relevant information. In order to identify instances of the structured classes in the reviews, we defined a new Information Extraction task called Semantic Context Path (SCP) labeling, which simultaneously assigns types and semantic roles to entity mentions. Reviews can rapidly be explored based on the fine-grained and structured semantic classes. As a proof-of-concept, we have implemented this system for reviews on Points-of-Interest, in English and Korean. 2021.emnlp-demo.13 @@ -11734,8 +11734,8 @@ PhillipLee JeshwanthBheemanpally RohanPandey - AdwaitRatnaparkhi - MarilynWalker + AdwaitRatnaparkhi + MarilynWalker 124–133 Athena 2.0 is an Alexa Prize SocialBot that has been a finalist in the last two Alexa Prize Grand Challenges. One reason for Athena’s success is its novel dialogue management strategy, which allows it to dynamically construct dialogues and responses from component modules, leading to novel conversations with every interaction. Here we describe Athena’s system design and performance in the Alexa Prize during the 20/21 competition. A live demo of Athena as well as video recordings will provoke discussion on the state of the art in conversational AI. 2021.emnlp-demo.15 @@ -11792,7 +11792,7 @@ JinZhao NianwenXue JensVan Gysel - Jinho D.Choi + Jinho D.Choi 160–167 We present UMR-Writer, a web-based application for annotating Uniform Meaning Representations (UMR), a graph-based, cross-linguistically applicable semantic representation developed recently to support the development of interpretable natural language applications that require deep semantic analysis of texts. We present the functionalities of UMR-Writer and discuss the challenges in developing such a tool and how they are addressed. 2021.emnlp-demo.19 @@ -11844,7 +11844,7 @@ ThibaultGoehringer VictorMustar FrançoisLagunas - AlexanderRush + AlexanderRush ThomasWolf 175–184 The scale, variety, and quantity of publicly-available NLP datasets has grown rapidly as researchers propose new tasks, larger models, and novel benchmarks. Datasets is a community library for contemporary NLP designed to support this ecosystem. Datasets aims to standardize end-user interfaces, versioning, and documentation, while providing a lightweight front-end that behaves similarly for small datasets as for internet-scale corpora. The design of the library incorporates a distributed, community-driven approach to adding datasets and documenting usage. After a year of development, the library now includes more than 650 unique datasets, has more than 250 contributors, and has helped support a variety of novel cross-dataset research projects and shared tasks. The library is available at https://github.com/huggingface/datasets. @@ -11857,7 +11857,7 @@ Summary Explorer: Visualizing the State of the Art in Text Summarization ShahbazSyed TariqYousef - KhalidAl Khatib + KhalidAl Khatib StefanJänicke MartinPotthast 185–194 @@ -11906,7 +11906,7 @@ TimFrench MelindaHodkiewicz MichaelStewart - WeiLiu + WeiLiu 212–219 NLP systems are often challenged by difficulties arising from noisy, non-standard, and domain specific corpora. The task of lexical normalisation aims to standardise such corpora, but currently lacks suitable tools to acquire high-quality annotated data to support deep learning based approaches. In this paper, we present LexiClean, the first open-source web-based annotation tool for multi-task lexical normalisation. LexiClean’s main contribution is support for simultaneous in situ token-level modification and annotation that can be rapidly applied corpus wide. We demonstrate the usefulness of our tool through a case study on two sets of noisy corpora derived from the specialised-domain of industrial mining. We show that LexiClean allows for the rapid and efficient development of high-quality parallel corpora. A demo of our system is available at: https://youtu.be/P7_ooKrQPDU. 2021.emnlp-demo.25 @@ -11955,7 +11955,7 @@ YimengSun MargritBetke PrakashIshwar - Derry TantiWijaya + Derry TantiWijaya 242–250 When journalists cover a news story, they can cover the story from multiple angles or perspectives. These perspectives are called “frames,” and usage of one frame or another may influence public perception and opinion of the issue at hand. We develop a web-based system for analyzing frames in multilingual text documents. We propose and guide users through a five-step end-to-end computational framing analysis framework grounded in media framing theory in communication research. Users can use the framework to analyze multilingual text data, starting from the exploration of frames in user’s corpora and through review of previous framing literature (step 1-3) to frame classification (step 4) and prediction (step 5). The framework combines unsupervised and supervised machine learning and leverages a state-of-the-art (SoTA) multilingual language model, which can significantly enhance frame prediction performance while requiring a considerably small sample of manual annotations. Through the interactive website, anyone can perform the proposed computational framing analysis, making advanced computational analysis available to researchers without a programming background and bridging the digital divide within the communication research discipline in particular and the academic community in general. The system is available online at http://www.openframing.org, via an API http://www.openframing.org:5000/docs/, or through our GitHub page https://github.com/vibss2397/openFraming. 2021.emnlp-demo.28 @@ -12089,8 +12089,8 @@ TroyFeng YusenZhang TaoYu - Ahmed HassanAwadallah - DragomirRadev + Ahmed HassanAwadallah + DragomirRadev 329–338 Recent advances in summarization provide models that can generate summaries of higher quality. Such models now exist for a number of summarization tasks, including query-based summarization, dialogue summarization, and multi-document summarization. While such models and tasks are rapidly growing in the research field, it has also become challenging for non-experts to keep track of them. To make summarization methods more accessible to a wider audience, we develop SummerTime by rethinking the summarization task from the perspective of an NLP non-expert. SummerTime is a complete toolkit for text summarization, including various models, datasets, and evaluation metrics, for a full spectrum of summarization-related tasks. SummerTime integrates with libraries designed for NLP researchers, and enables users with easy-to-use APIs. With SummerTime, users can locate pipeline solutions and search for the best model with their own data, and visualize the differences, all with a few lines of code. We also provide explanations for models and evaluation metrics to help users understand the model behaviors and select models that best suit their needs. Our library, along with a notebook demo, is available at https://github.com/Yale-LILY/SummerTime. 2021.emnlp-demo.37 @@ -12156,7 +12156,7 @@ FernandoAlva-Manchego AbiolaObamuyide AmitGajbhiye - FrédéricBlain + FrédéricBlain MarinaFomicheva LuciaSpecia 382–389 @@ -12190,7 +12190,7 @@ NikitaNangia MaartenSap MarkYatskar - Samuel R.Bowman + Samuel R.Bowman YoavArtzi 1–6 Crowdsourcing from non-experts is one of the most common approaches to collecting data and annotations in NLP. Even though it is such a fundamental tool in NLP, crowdsourcing use is largely guided by common practices and the personal experience of researchers. Developing a theory of crowdsourcing use for practical language problems remains an open challenge. However, there are various principles and practices that have proven effective in generating high quality and diverse data. This tutorial exposes NLP researchers to such data collection crowdsourcing methods and principles through a detailed discussion of a diverse set of case studies. The selection of case studies focuses on challenging settings where crowdworkers are asked to write original text or otherwise perform relatively unconstrained work. Through these case studies, we discuss in detail processes that were carefully designed to achieve data with specific properties, for example to require logical inference, grounded reasoning or conversational understanding. Each case study focuses on data collection crowdsourcing protocol details that often receive limited attention in research presentations, for example in conferences, but are critical for research success. @@ -12226,7 +12226,7 @@ Multi-Domain Multilingual Question Answering SebastianRuder - AviSil + AviSil 17–21 Question answering (QA) is one of the most challenging and impactful tasks in natural language processing. Most research in QA, however, has focused on the open-domain or monolingual setting while most real-world applications deal with specific domains or languages. In this tutorial, we attempt to bridge this gap. Firstly, we introduce standard benchmarks in multi-domain and multilingual QA. In both scenarios, we discuss state-of-the-art approaches that achieve impressive performance, ranging from zero-shot transfer learning to out-of-the-box training with open-domain QA systems. Finally, we will present open research problems that this new research agenda poses such as multi-task learning, cross-lingual transfer learning, domain adaptation and training large scale pre-trained multilingual language models. 2021.emnlp-tutorials.4 diff --git a/data/xml/2021.eval4nlp.xml b/data/xml/2021.eval4nlp.xml index f2a313bc70..939dc8c9cc 100644 --- a/data/xml/2021.eval4nlp.xml +++ b/data/xml/2021.eval4nlp.xml @@ -24,8 +24,8 @@ HichamEl Boukkouri CyrilGrouin ThomasLavergne - PatrickParoubek - PierreZweigenbaum + PatrickParoubek + PierreZweigenbaum 1–10 Most of the time, when dealing with a particular Natural Language Processing task, systems are compared on the basis of global statistics such as recall, precision, F1-score, etc. While such scores provide a general idea of the behavior of these systems, they ignore a key piece of information that can be useful for assessing progress and discerning remaining challenges: the relative difficulty of test instances. To address this shortcoming, we introduce the notion of differential evaluation which effectively defines a pragmatic partition of instances into gradually more difficult bins by leveraging the predictions made by a set of systems. Comparing systems along these difficulty bins enables us to produce a finer-grained analysis of their relative merits, which we illustrate on two use-cases: a comparison of systems participating in a multi-label text classification task (CLEF eHealth 2018 ICD-10 coding), and a comparison of neural models trained for biomedical entity detection (BioCreative V chemical-disease relations dataset). 2021.eval4nlp-1.1 @@ -107,8 +107,8 @@ TaoYu TongNiu YingboZhou - DragomirRadev - Xi VictoriaLin + DragomirRadev + Xi VictoriaLin 73–83 The benchmark performance of cross-database semantic parsing has climbed steadily in recent years, catalyzed by the wide adoption of pre-trained language models. Yet existing work have shown that state-of-the-art cross-database semantic parsers struggle to generalize to novel user utterances, databases and query structures. To obtain transparent details on the strengths and limitation of these models, we propose a diagnostic testing approach based on controlled synthesis of canonical natural language and SQL pairs. Inspired by the CheckList, we characterize a set of essential capabilities for cross-database semantic parsing models, and detailed the method for synthesizing the corresponding test data. We evaluated a variety of high performing models using the proposed approach, and identified several non-obvious weaknesses across models (e.g. unable to correctly select many columns). Our dataset and code are released as a test suite at http://github.com/hclent/BehaviorCheckingSemPar. 2021.eval4nlp-1.8 @@ -140,7 +140,7 @@ Statistically Significant Detection of Semantic Shifts using Contextual Word Embeddings - YangLiu + YangLiu AlanMedlar DorotaGlowacka 104–113 @@ -166,7 +166,7 @@ AyushGarg SammedKagi VivekSrivastava - MayankSingh + MayankSingh 123–132 Code-mixing is a phenomenon of mixing words and phrases from two or more languages in a single utterance of speech and text. Due to the high linguistic diversity, code-mixing presents several challenges in evaluating standard natural language generation (NLG) tasks. Various widely popular metrics perform poorly with the code-mixed NLG tasks. To address this challenge, we present a metric in- dependent evaluation pipeline MIPE that significantly improves the correlation between evaluation metrics and human judgments on the generated code-mixed text. As a use case, we demonstrate the performance of MIPE on the machine-generated Hinglish (code-mixing of Hindi and English languages) sentences from the HinGE corpus. We can extend the proposed evaluation strategy to other code-mixed language pairs, NLG tasks, and evaluation metrics with minimal to no effort. 2021.eval4nlp-1.13 @@ -179,7 +179,7 @@ MarcosTreviso Nuno M.Guerreiro RicardoRei - André F. T.Martins + André F. T.Martins 133–145 We present the joint contribution of Instituto Superior Técnico (IST) and Unbabel to the Explainable Quality Estimation (QE) shared task, where systems were submitted to two tracks: constrained (without word-level supervision) and unconstrained (with word-level supervision). For the constrained track, we experimented with several explainability methods to extract the relevance of input tokens from sentence-level QE models built on top of multilingual pre-trained transformers. Among the different tested methods, composing explanations in the form of attention weights scaled by the norm of value vectors yielded the best results. When word-level labels are used during training, our best results were obtained by using word-level predicted probabilities. We further improve the performance of our methods on the two tracks by ensembling explanation scores extracted from models trained with different pre-trained transformers, achieving strong results for in-domain and zero-shot language pairs. 2021.eval4nlp-1.14 @@ -189,7 +189,7 @@ Error Identification for Machine Translation with Metric Embedding and Attention - RaphaelRubino + RaphaelRubino AtsushiFujita BenjaminMarie 146–156 @@ -249,7 +249,7 @@ <fixed-case>H</fixed-case>in<fixed-case>GE</fixed-case>: A Dataset for Generation and Evaluation of Code-Mixed <fixed-case>H</fixed-case>inglish Text VivekSrivastava - MayankSingh + MayankSingh 200–208 Text generation is a highly active area of research in the computational linguistic community. The evaluation of the generated text is a challenging task and multiple theories and metrics have been proposed over the years. Unfortunately, text generation and evaluation are relatively understudied due to the scarcity of high-quality resources in code-mixed languages where the words and phrases from multiple languages are mixed in a single utterance of text and speech. To address this challenge, we present a corpus (HinGE) for a widely popular code-mixed language Hinglish (code-mixing of Hindi and English languages). HinGE has Hinglish sentences generated by humans as well as two rule-based algorithms corresponding to the parallel Hindi-English sentences. In addition, we demonstrate the in- efficacy of widely-used evaluation metrics on the code-mixed data. The HinGE dataset will facilitate the progress of natural language generation research in code-mixed languages. 2021.eval4nlp-1.20 @@ -262,7 +262,7 @@ OskarWysocki MalinaFlorea DónalLanders - AndréFreitas + AndréFreitas 209–229 SemEval is the primary venue in the NLP community for the proposal of new challenges and for the systematic empirical evaluation of NLP systems. This paper provides a systematic quantitative analysis of SemEval aiming to evidence the patterns of the contributions behind SemEval. By understanding the distribution of task types, metrics, architectures, participation and citations over time we aim to answer the question on what is being evaluated by SemEval. 2021.eval4nlp-1.21 @@ -298,7 +298,7 @@ Explainable Quality Estimation: <fixed-case>CUNI</fixed-case> <fixed-case>E</fixed-case>val4<fixed-case>NLP</fixed-case> Submission PeterPolák MuskaanSingh - OndřejBojar + OndřejBojar 250–255 This paper describes our participating system in the shared task Explainable quality estimation of 2nd Workshop on Evaluation & Comparison of NLP Systems. The task of quality estimation (QE, a.k.a. reference-free evaluation) is to predict the quality of MT output at inference time without access to reference translations. In this proposed work, we first build a word-level quality estimation model, then we finetune this model for sentence-level QE. Our proposed models achieve near state-of-the-art results. In the word-level QE, we place 2nd and 3rd on the supervised Ro-En and Et-En test sets. In the sentence-level QE, we achieve a relative improvement of 8.86% (Ro-En) and 10.6% (Et-En) in terms of the Pearson correlation coefficient over the baseline model. 2021.eval4nlp-1.24 diff --git a/data/xml/2021.fever.xml b/data/xml/2021.fever.xml index c5807ad4e2..9548033f5a 100644 --- a/data/xml/2021.fever.xml +++ b/data/xml/2021.fever.xml @@ -8,7 +8,7 @@ OanaCocarascu ZhijiangGuo ArpitMittal - MichaelSchlichtkrull + MichaelSchlichtkrull JamesThorne AndreasVlachos Association for Computational Linguistics @@ -100,7 +100,7 @@ Verdict Inference with Claim and Retrieved Elements Using <fixed-case>R</fixed-case>o<fixed-case>BERT</fixed-case>a In-ZuGi Ting-YuFang - Richard Tzong-HanTsai + Richard Tzong-HanTsai 60–65 Automatic fact verification has attracted recent research attention as the increasing dissemination of disinformation on social media platforms. The FEVEROUS shared task introduces a benchmark for fact verification, in which a system is challenged to verify the given claim using the extracted evidential elements from Wikipedia documents. In this paper, we propose our 3rd place three-stage system consisting of document retrieval, element retrieval, and verdict inference for the FEVEROUS shared task. By considering the context relevance in the fact extraction and verification task, our system achieves 0.29 FEVEROUS score on the development set and 0.25 FEVEROUS score on the blind test set, both outperforming the FEVEROUS baseline. 2021.fever-1.7 @@ -110,7 +110,7 @@ Stance Detection in <fixed-case>G</fixed-case>erman News Articles LauraMascarell - TatyanaRuzsics + TatyanaRuzsics ChristianSchneebeli PhilippeSchlattner LucaCampanella @@ -162,7 +162,7 @@ GiulioAlfarano KhaiNguyen DucPham - RaphaelTroncy + RaphaelTroncy PaoloPapotti 108–112 Computational fact-checking has gained a lot of traction in the machine learning and natural language processing communities. A plethora of solutions have been developed, but methods which leverage both structured and unstructured information to detect misinformation are of particular relevance. In this paper, we tackle the FEVEROUS (Fact Extraction and VERification Over Unstructured and Structured information) challenge which consists of an open source baseline system together with a benchmark dataset containing 87,026 verified claims. We extend this baseline model by improving the evidence retrieval module yielding the best evidence F1 score among the competitors in the challenge leaderboard while obtaining an overall FEVEROUS score of 0.20 (5th best ranked system). diff --git a/data/xml/2021.findings.xml b/data/xml/2021.findings.xml index 49c13820e8..2f8c938a5a 100644 --- a/data/xml/2021.findings.xml +++ b/data/xml/2021.findings.xml @@ -3,7 +3,7 @@ Findings of the Association for Computational Linguistics: ACL-IJCNLP 2021 - ChengqingZong + ChengqingZong FeiXia WenjieLi RobertoNavigli @@ -22,7 +22,7 @@ Explainable Inference Over Grounding-Abstract Chains for Science Questions MokanaranganThayaparan MarcoValentino - AndréFreitas + AndréFreitas 1–12 2021.findings-acl.1 10.18653/v1/2021.findings-acl.1 @@ -206,7 +206,7 @@ More than just Frequency? Demasking Unsupervised Hypernymy Prediction Methods ThomasBott DominikSchlechtweg - SabineSchulte im Walde + SabineSchulte im Walde 186–192 2021.findings-acl.16 10.18653/v1/2021.findings-acl.16 @@ -231,7 +231,7 @@ Kazi SajeedMehrab Md. Mahim AnjumHaque TahmidHasan - WasiAhmad + WasiAhmad AnindyaIqbal RifatShahriyar 210–218 @@ -269,7 +269,7 @@ ShoTakase KeiUchiumi AtsushiKeyaki - NaoakiOkazaki + NaoakiOkazaki 244–255 2021.findings-acl.21 10.18653/v1/2021.findings-acl.21 @@ -319,7 +319,7 @@ Better <fixed-case>C</fixed-case>hinese Sentence Segmentation with Reinforcement Learning SrivatsanSrinivasan - ChrisDyer + ChrisDyer 293–302 2021.findings-acl.25 10.18653/v1/2021.findings-acl.25 @@ -341,7 +341,7 @@ Empirical Error Modeling Improves Robustness of Noisy Neural Sequence Labeling MarcinNamysl SvenBehnke - JoachimKöhler + JoachimKöhler 314–329 2021.findings-acl.27 2021.findings-acl.27.OptionalSupplementaryMaterial.zip @@ -517,8 +517,8 @@ Decoupling Adversarial Training for Fair <fixed-case>NLP</fixed-case> XudongHan - TimothyBaldwin - TrevorCohn + TimothyBaldwin + TrevorCohn 471–477 2021.findings-acl.41 10.18653/v1/2021.findings-acl.41 @@ -583,7 +583,7 @@ JinghuiQin XiaodanLiang LingboLiu - EricXing + EricXing LiangLin 513–523 2021.findings-acl.46 @@ -595,7 +595,7 @@ <fixed-case>SIRE</fixed-case>: Separate Intra- and Inter-sentential Reasoning for Document-level Relation Extraction ShuangZeng YutingWu - BaobaoChang + BaobaoChang 524–534 2021.findings-acl.47 10.18653/v1/2021.findings-acl.47 @@ -622,7 +622,7 @@ ShengqiongWu YafengRen FeiLi - DonghongJi + DonghongJi 549–559 2021.findings-acl.49 10.18653/v1/2021.findings-acl.49 @@ -644,7 +644,7 @@ Contrastive Fine-tuning Improves Robustness for Neural Rankers XiaofeiMa - CiceroNogueira dos Santos + CiceroNogueira dos Santos Andrew O.Arnold 570–582 2021.findings-acl.51 @@ -665,8 +665,8 @@ <fixed-case>T</fixed-case>ell<fixed-case>M</fixed-case>e<fixed-case>W</fixed-case>hy: A Dataset for Answering Why-Questions in Narratives Yash KumarLal - NathanaelChambers - RaymondMooney + NathanaelChambers + RaymondMooney NiranjanBalasubramanian 596–610 2021.findings-acl.53 @@ -699,8 +699,8 @@ MuriloGazzola EsterSabino AnnaLevin - ArnaldoCandido Jr - SandraAluisio + ArnaldoCandido Jr + SandraAluisio MarceloFinger 625–633 2021.findings-acl.55 @@ -753,7 +753,7 @@ Prediction or Comparison: Toward Interpretable Qualitative Reasoning MuchengRen - HeyanHuang + HeyanHuang YangGao 664–675 2021.findings-acl.59 @@ -791,7 +791,7 @@ Sang-WooLee Ji-HoonKim Jung-WooHa - AliceOh + AliceOh 694–704 2021.findings-acl.62 10.18653/v1/2021.findings-acl.62 @@ -817,7 +817,7 @@ ChaoLi ZizhenWang YunboCao - HeyanHuang + HeyanHuang Xian-LingMao 716–728 2021.findings-acl.64 @@ -841,7 +841,7 @@ LeiJi HuaishaoLuo BotianShi - HeyanHuang + HeyanHuang NanDuan Xian-LingMao 743–752 @@ -902,7 +902,7 @@ Evaluating the Efficacy of Summarization Evaluation across Languages FajriKoto Jey HanLau - TimothyBaldwin + TimothyBaldwin 801–812 2021.findings-acl.71 10.18653/v1/2021.findings-acl.71 @@ -952,7 +952,7 @@ Can Cognate Prediction Be Modelled as a Low-Resource Machine Translation Task? ClémentineFourrier RachelBawden - BenoîtSagot + BenoîtSagot 847–861 2021.findings-acl.75 10.18653/v1/2021.findings-acl.75 @@ -965,7 +965,7 @@ What if This Modified That? Syntactic Interventions with Counterfactual Embeddings MycalTucker PengQian - RogerLevy + RogerLevy 862–875 2021.findings-acl.76 10.18653/v1/2021.findings-acl.76 @@ -1016,7 +1016,7 @@ PepaAtanasova GeorgiKaradzhov MarcosZampieri - PreslavNakov + PreslavNakov 915–928 2021.findings-acl.80 10.18653/v1/2021.findings-acl.80 @@ -1039,7 +1039,7 @@ Promoting Graph Awareness in Linearized Graph-to-Text Generation Alexander MiserlisHoyle AnaMarasović - Noah A.Smith + Noah A.Smith 944–956 2021.findings-acl.82 10.18653/v1/2021.findings-acl.82 @@ -1065,7 +1065,7 @@ SarathChandar SoroushVosoughi TerukoMitamura - EduardHovy + EduardHovy 968–988 2021.findings-acl.84 10.18653/v1/2021.findings-acl.84 @@ -1115,7 +1115,7 @@ JianqiangMa ZeyuYan ChangLi - YangZhang + YangZhang 1028–1033 2021.findings-acl.88 10.18653/v1/2021.findings-acl.88 @@ -1238,7 +1238,7 @@ Out of Order: How important is the sequential order of words in a sentence in Natural Language Understanding tasks? ThangPham - TrungBui + TrungBui LongMai AnhNguyen 1145–1160 @@ -1295,7 +1295,7 @@ RuochenXu YangLiu MichaelZeng - XuedongHuang + XuedongHuang 1201–1207 2021.findings-acl.102 10.18653/v1/2021.findings-acl.102 @@ -1339,7 +1339,7 @@ RahulAralikatte DishaShrivastava SivaReddy - AndersSøgaard + AndersSøgaard 1245–1260 2021.findings-acl.106 10.18653/v1/2021.findings-acl.106 @@ -1353,7 +1353,7 @@ XimingLu Jena D.Hwang AntoineBosselut - Jackie Chi KitCheung + Jackie Chi KitCheung YejinChoi 1261–1274 2021.findings-acl.107 @@ -1400,7 +1400,7 @@ ShujieLiu FuruWei MingZhou - MuyunYang + MuyunYang 1300–1305 2021.findings-acl.111 10.18653/v1/2021.findings-acl.111 @@ -1467,7 +1467,7 @@ Exploiting Position Bias for Robust Aspect Sentiment Classification FangMa - ChenZhang + ChenZhang DaweiSong 1352–1358 2021.findings-acl.116 @@ -1482,7 +1482,7 @@ FeiLi HaoFei YafengRen - DonghongJi + DonghongJi 1359–1370 2021.findings-acl.117 10.18653/v1/2021.findings-acl.117 @@ -1522,9 +1522,9 @@ ChuntingZhou GrahamNeubig JiataoGu - MonaDiab - FranciscoGuzmán - LukeZettlemoyer + MonaDiab + FranciscoGuzmán + LukeZettlemoyer MarjanGhazvininejad 1393–1404 2021.findings-acl.120 @@ -1538,7 +1538,7 @@ DuyuTang NanDuan ZhongyuWei - XuanjingHuang + XuanjingHuang JianshuJi GuihongCao DaxinJiang @@ -1579,7 +1579,7 @@ Exploring the Role of Context in Utterance-level Emotion, Act and Intent Classification in Conversations: An Empirical Study DeepanwayGhosal NavonilMajumder - RadaMihalcea + RadaMihalcea SoujanyaPoria 1435–1449 2021.findings-acl.124 @@ -1616,11 +1616,11 @@ Putting words into the system’s mouth: A targeted attack on neural machine translation using monolingual data poisoning JunWang ChangXu - FranciscoGuzmán + FranciscoGuzmán AhmedEl-Kishky YuqingTang BenjaminRubinstein - TrevorCohn + TrevorCohn 1463–1473 2021.findings-acl.127 10.18653/v1/2021.findings-acl.127 @@ -1667,7 +1667,7 @@ <fixed-case>R</fixed-case>iddle<fixed-case>S</fixed-case>ense: Reasoning about Riddle Questions Featuring Linguistic Creativity and Commonsense Knowledge - Bill YuchenLin + Bill YuchenLin ZiyiWu YichiYang Dong-HoLee @@ -1693,11 +1693,11 @@ Learning Slice-Aware Representations with Mixture of Attentions ChengWang - SungjinLee + SungjinLee SunghyunPark HanLi Young-BumKim - RuhiSarikaya + RuhiSarikaya 1530–1536 2021.findings-acl.133 10.18653/v1/2021.findings-acl.133 @@ -1726,7 +1726,7 @@ Few-shot Knowledge Graph-to-Text Generation with Pretrained Language Models JunyiLi TianyiTang - Wayne XinZhao + Wayne XinZhao ZhichengWei Nicholas JingYuan Ji-RongWen @@ -1779,10 +1779,10 @@ Target-oriented Fine-tuning for Zero-Resource Named Entity Recognition - YingZhang + YingZhang FandongMeng YufengChen - JinanXu + JinanXu JieZhou 1603–1615 2021.findings-acl.140 @@ -1807,7 +1807,7 @@ HaotongSun JunshengZhou WeiguangQu - XinyuDai + XinyuDai 1630–1640 2021.findings-acl.142 10.18653/v1/2021.findings-acl.142 @@ -1830,7 +1830,7 @@ Discriminative Reasoning for Document-level Relation Extraction WangXu KehaiChen - TiejunZhao + TiejunZhao 1653–1663 2021.findings-acl.144 10.18653/v1/2021.findings-acl.144 @@ -1923,7 +1923,7 @@ YukunFeng HaoWu HidetakaKamigaito - ManabuOkumura + ManabuOkumura 1743–1750 2021.findings-acl.152 10.18653/v1/2021.findings-acl.152 @@ -1996,7 +1996,7 @@ Cross-Lingual Cross-Domain Nested Named Entity Evaluation on <fixed-case>E</fixed-case>nglish Web Texts - BarbaraPlank + BarbaraPlank 1808–1815 2021.findings-acl.158 10.18653/v1/2021.findings-acl.158 @@ -2044,7 +2044,7 @@ Huang-ChengChou Woan-ShiuanChien Da-ChengJuan - Chi-ChunLee + Chi-ChunLee 1846–1860 2021.findings-acl.162 2021.findings-acl.162.OptionalSupplementaryMaterial.zip @@ -2067,7 +2067,7 @@ Structured Refinement for Sequential Labeling YiranWang HiroyukiShindo - YujiMatsumoto + YujiMatsumoto TaroWatanabe 1873–1884 2021.findings-acl.164 @@ -2088,7 +2088,7 @@ Deciphering Implicit Hate: Evaluating Automated Detection Algorithms for Multimodal Hate AustinBotelho - Scott A.Hale + Scott A.Hale BertieVidgen 1896–1907 2021.findings-acl.166 @@ -2101,7 +2101,7 @@ Studying the Evolution of Scientific Topics and their Relationships Ana SabinaUban CorneliaCaragea - Liviu P.Dinu + Liviu P.Dinu 1908–1922 2021.findings-acl.167 10.18653/v1/2021.findings-acl.167 @@ -2110,7 +2110,7 @@ End-to-End Self-Debiasing Framework for Robust <fixed-case>NLU</fixed-case> Training AbbasGhaddar - PhillippeLanglais + PhillippeLanglais MehdiRezagholizadeh AhmadRashid 1923–1929 @@ -2195,7 +2195,7 @@ ScottNovotney IvanBulyko AriyaRastrow - AndreasStolcke + AndreasStolcke AnkurGandhe 1994–2003 2021.findings-acl.175 @@ -2207,7 +2207,7 @@ Annotation and Evaluation of Coreference Resolution in Screenplays SabyasacheeBaruah SandeepNallan Chakravarthula - ShrikanthNarayanan + ShrikanthNarayanan 2004–2010 2021.findings-acl.176 2021.findings-acl.176.OptionalSupplementaryMaterial.gz @@ -2273,7 +2273,7 @@ ShuwenQiu LifengFan YixinZhu - Song-ChunZhu + Song-ChunZhu 2074–2085 2021.findings-acl.182 10.18653/v1/2021.findings-acl.182 @@ -2295,7 +2295,7 @@ Fusion: Towards Automated <fixed-case>ICD</fixed-case> Coding via Feature Compression - JunyuLuo + JunyuLuo CaoXiao LucasGlass JimengSun @@ -2315,7 +2315,7 @@ MichelGalley ChrisBrockett YizheZhang - BillDolan + BillDolan 2102–2113 2021.findings-acl.185 10.18653/v1/2021.findings-acl.185 @@ -2362,7 +2362,7 @@ JieZhou YuanbinWu QinChen - XuanjingHuang + XuanjingHuang LiangHe 2152–2161 2021.findings-acl.189 @@ -2391,7 +2391,7 @@ ShangwenLv YingqiQu JingLiu - Wayne XinZhao + Wayne XinZhao QiaoQiaoShe HuaWu HaifengWang @@ -2457,7 +2457,7 @@ JizhiZhang XiangnanHe HanwangZhang - Tat-SengChua + Tat-SengChua 2226–2236 2021.findings-acl.196 10.18653/v1/2021.findings-acl.196 @@ -2568,7 +2568,7 @@ YijinLiu FandongMeng YufengChen - JinanXu + JinanXu JieZhou 2327–2337 2021.findings-acl.205 @@ -2579,7 +2579,7 @@ <fixed-case>MA</fixed-case>-<fixed-case>BERT</fixed-case>: Learning Representation by Incorporating Multi-Attribute Knowledge in Transformers YouZhang JinWang - Liang-ChihYu + Liang-ChihYu XuejieZhang 2338–2343 2021.findings-acl.206 @@ -2593,7 +2593,7 @@ YuxuanWang WanxiangChe IvanTitov - Shay B.Cohen + Shay B.Cohen ZhilinLei TingLiu 2344–2354 @@ -2608,7 +2608,7 @@ TiberiuSosea AdityaSawant Ajith JayaramanNair - DianaInkpen + DianaInkpen CorneliaCaragea 2355–2365 2021.findings-acl.208 @@ -2630,7 +2630,7 @@ KuicaiDong ZhaoYilin AixinSun - Jung-JaeKim + Jung-JaeKim XiaoliLi 2377–2389 2021.findings-acl.210 @@ -2684,7 +2684,7 @@ TongtongWu GuilinQi Yuan-FangLi - GholamrezaHaffari + GholamrezaHaffari ShengBi 2417–2429 2021.findings-acl.214 @@ -2815,7 +2815,7 @@ <fixed-case>A</fixed-case>da<fixed-case>ST</fixed-case>: Dynamically Adapting Encoder States in the Decoder for End-to-End Speech-to-Text Translation WuweiHuang DexinWang - DeyiXiong + DeyiXiong 2539–2545 2021.findings-acl.224 10.18653/v1/2021.findings-acl.224 @@ -2825,7 +2825,7 @@ <fixed-case>OKGIT</fixed-case>: <fixed-case>O</fixed-case>pen Knowledge Graph Link Prediction with Implicit Types .Chandrahas - ParthaTalukdar + ParthaTalukdar 2546–2559 2021.findings-acl.225 2021.findings-acl.225.OptionalSupplementaryMaterial.zip @@ -2849,7 +2849,7 @@ Joint Multi-Decoder Framework with Hierarchical Pointer Network for Frame Semantic Parsing XudongChen CeZheng - BaobaoChang + BaobaoChang 2570–2578 2021.findings-acl.227 10.18653/v1/2021.findings-acl.227 @@ -2860,7 +2860,7 @@ Jhih-weiChen Tsu-JuiFu Chen-KangLee - Wei-YunMa + Wei-YunMa 2579–2593 2021.findings-acl.228 2021.findings-acl.228.OptionalSupplementaryMaterial.zip @@ -2923,7 +2923,7 @@ Automatic Text Simplification for Social Good: Progress and Challenges - SanjaStajner + SanjaStajner 2637–2652 2021.findings-acl.233 10.18653/v1/2021.findings-acl.233 @@ -2992,7 +2992,7 @@ Continual Mixed-Language Pre-Training for Extremely Low-Resource Neural Machine Translation ZihanLiu - Genta IndraWinata + Genta IndraWinata PascaleFung 2706–2718 2021.findings-acl.239 @@ -3037,7 +3037,7 @@ When Time Makes Sense: A Historically-Aware Approach to Targeted Sense Disambiguation KasparBeelen FedericoNanni - MarionaColl Ardanuy + MarionaColl Ardanuy KasraHosseini GiorgiaTolfo BarbaraMcGillivray @@ -3050,8 +3050,8 @@ Understanding Feature Focus in Multitask Settings for Lexico-semantic Relation Identification HoussamAkhmouch - GaëlDias - Jose G.Moreno + GaëlDias + Jose G.Moreno 2762–2772 2021.findings-acl.244 10.18653/v1/2021.findings-acl.244 @@ -3077,7 +3077,7 @@ RituparnaMukherjee ShivamSharma Md. ShadAkhtar - PreslavNakov + PreslavNakov TanmoyChakraborty 2783–2796 2021.findings-acl.246 @@ -3103,7 +3103,7 @@ <fixed-case>Z</fixed-case>m<fixed-case>BART</fixed-case>: An Unsupervised Cross-lingual Transfer Framework for Language Generation Kaushal KumarMaurya - Maunendra SankarDesarkar + Maunendra SankarDesarkar YoshinobuKano KumariDeepshikha 2804–2818 @@ -3131,7 +3131,7 @@ Do Multilingual Neural Machine Translation Models Contain Language Pair Specific Attention Heads? Zae MyungKim - LaurentBesacier + LaurentBesacier VassilinaNikoulina DidierSchwab 2832–2841 @@ -3157,9 +3157,9 @@ YawenOuyang JiashengYe YuChen - XinyuDai + XinyuDai ShujianHuang - JiajunChen + JiajunChen 2852–2861 2021.findings-acl.252 10.18653/v1/2021.findings-acl.252 @@ -3181,7 +3181,7 @@ <fixed-case>M</fixed-case>erge<fixed-case>D</fixed-case>istill: <fixed-case>M</fixed-case>erging Language Models using Pre-trained Distillation SimranKhanuja MelvinJohnson - ParthaTalukdar + ParthaTalukdar 2874–2887 2021.findings-acl.254 10.18653/v1/2021.findings-acl.254 @@ -3203,8 +3203,8 @@ <fixed-case>F</fixed-case>rame<fixed-case>N</fixed-case>et-assisted Noun Compound Interpretation GirishkumarPonkiya DipteshKanojia - PushpakBhattacharyya - GirishPalshikar + PushpakBhattacharyya + GirishPalshikar 2901–2911 2021.findings-acl.256 10.18653/v1/2021.findings-acl.256 @@ -3240,7 +3240,7 @@ On the Interaction of Belief Bias and Explanations Ana ValeriaGonzález AnnaRogers - AndersSøgaard + AndersSøgaard 2930–2942 2021.findings-acl.259 10.18653/v1/2021.findings-acl.259 @@ -3321,7 +3321,7 @@ Probing Pre-Trained Language Models for Disease Knowledge IsraaAlghanmi - LuisEspinosa Anke + LuisEspinosa Anke StevenSchockaert 3023–3033 2021.findings-acl.266 @@ -3330,9 +3330,9 @@ <fixed-case>A</fixed-case>ug<fixed-case>V</fixed-case>ic: Exploiting <fixed-case>B</fixed-case>i<fixed-case>T</fixed-case>ext Vicinity for Low-Resource <fixed-case>NMT</fixed-case> - TasnimMohiuddin + TasnimMohiuddin M SaifulBari - ShafiqJoty + ShafiqJoty 3034–3045 2021.findings-acl.267 10.18653/v1/2021.findings-acl.267 @@ -3411,7 +3411,7 @@ GeetickaChauhan BrianTse MrinmayaSachan - RadaMihalcea + RadaMihalcea 3099–3113 2021.findings-acl.273 10.18653/v1/2021.findings-acl.273 @@ -3502,7 +3502,7 @@ SuciFitriany Alham FikriAji Radityo EkoPrasojo - Derry TantiWijaya + Derry TantiWijaya 3170–3183 2021.findings-acl.280 10.18653/v1/2021.findings-acl.280 @@ -3548,7 +3548,7 @@ Is the Lottery Fair? Evaluating Winning Tickets Across Demographics Victor Petrén BachHansen - AndersSøgaard + AndersSøgaard 3214–3224 2021.findings-acl.284 10.18653/v1/2021.findings-acl.284 @@ -3597,7 +3597,7 @@ <fixed-case>D</fixed-case>o<fixed-case>T</fixed-case>: An efficient Double Transformer for <fixed-case>NLP</fixed-case> tasks with tables SyrineKrichene - ThomasMüller + ThomasMüller JulianEisenschlos 3273–3283 2021.findings-acl.289 @@ -3628,7 +3628,7 @@ Hyperbolic Temporal Knowledge Graph Embeddings with Relational and Time Curvatures SebastienMontella - Lina M.Rojas Barahona + Lina M.Rojas Barahona JohannesHeinecke 3296–3308 2021.findings-acl.292 @@ -3675,7 +3675,7 @@ Representing Syntax and Composition with Geometric Transformations LorenzoBertolini JulieWeeds - DavidWeir + DavidWeir QiweiPeng 3343–3353 2021.findings-acl.296 @@ -3700,7 +3700,7 @@ To Point or Not to Point: Understanding How Abstractive Summarizers Paraphrase Text MattWilber WilliamTimkey - Martenvan Schijndel + Martenvan Schijndel 3362–3376 2021.findings-acl.298 10.18653/v1/2021.findings-acl.298 @@ -3796,7 +3796,7 @@ Generating Informative Conclusions for Argumentative Texts ShahbazSyed - KhalidAl Khatib + KhalidAl Khatib MiladAlshomary HenningWachsmuth MartinPotthast @@ -3808,7 +3808,7 @@ Substructure Substitution: Structured Data Augmentation for <fixed-case>NLP</fixed-case> - HaoyueShi + HaoyueShi KarenLivescu KevinGimpel 3494–3508 @@ -3864,7 +3864,7 @@ YasumasaOnoe IoanaBaldini JoydeepGhosh - ByronWallace + ByronWallace KushVarshney 3547–3561 2021.findings-acl.311 @@ -3888,7 +3888,7 @@ MarcoGaido BeatriceSavoldi LuisaBentivogli - MatteoNegri + MatteoNegri MarcoTurchi 3576–3589 2021.findings-acl.313 @@ -3909,7 +3909,7 @@ An Exploratory Analysis of the Relation between Offensive Language and Mental Health Ana-MariaBucur MarcosZampieri - Liviu P.Dinu + Liviu P.Dinu 3600–3606 2021.findings-acl.315 10.18653/v1/2021.findings-acl.315 @@ -3931,7 +3931,7 @@ <fixed-case>P</fixed-case>roof<fixed-case>W</fixed-case>riter: Generating Implications, Proofs, and Abductive Statements over Natural Language OyvindTafjord - BhavanaDalvi + BhavanaDalvi PeterClark 3621–3634 2021.findings-acl.317 @@ -4063,7 +4063,7 @@ ChulakaGunasekara GuyFeigenblat BenjaminSznajder - SachindraJoshi + SachindraJoshi DavidKonopnicki 3748–3756 2021.findings-acl.329 @@ -4143,7 +4143,7 @@ Explaining <fixed-case>NLP</fixed-case> Models via Minimal Contrastive Editing (<fixed-case>M</fixed-case>i<fixed-case>CE</fixed-case>) AlexisRoss AnaMarasović - MatthewPeters + MatthewPeters 3840–3852 2021.findings-acl.336 2021.findings-acl.336.OptionalSupplementaryMaterial.zip @@ -4169,7 +4169,7 @@ Synthesizing Adversarial Negative Responses for Robust Response Ranking and Evaluation PrakharGupta YuliaTsvetkov - JeffreyBigham + JeffreyBigham 3867–3883 2021.findings-acl.338 10.18653/v1/2021.findings-acl.338 @@ -4181,15 +4181,15 @@ PavanKapanipathi IbrahimAbdelaziz SrinivasRavishankar - SalimRoukos + SalimRoukos AlexanderGray - RamónFernandez Astudillo + RamónFernandez Astudillo MariaChang CristinaCornelio SaswatiDana - AchilleFokoue + AchilleFokoue DineshGarg - AlfioGliozzo + AlfioGliozzo SairamGurajada HimaKaranam NaweedKhan @@ -4236,10 +4236,10 @@ Perceptual Models of Machine-Edited Text - ElizabethMerkhofer + ElizabethMerkhofer Monica-AnnMendoza RebeccaMarvin - JohnHenderson + JohnHenderson 3909–3920 2021.findings-acl.342 10.18653/v1/2021.findings-acl.342 @@ -4306,7 +4306,7 @@ CassLowry SujayKhandagale FrancescaCallejas - JudithKlavans + JudithKlavans MariaPolinsky SmarandaMuresan 3969–3974 @@ -4321,7 +4321,7 @@ ShuaiWang RishitaAnubhai KasturiBhattacharjee - YaserAl-Onaizan + YaserAl-Onaizan SmarandaMuresan 3975–3989 2021.findings-acl.348 @@ -4404,7 +4404,7 @@ Analyzing Stereotypes in Generative Text Inference Tasks AnnaSotnikova Yang TristaCao - HalDaumé III + HalDaumé III RachelRudinger 4052–4065 2021.findings-acl.355 @@ -4429,7 +4429,7 @@ Improving Automated Evaluation of Open Domain Dialog via Diverse Reference Augmentation VarunGangal HarshJhamtani - EduardHovy + EduardHovy TaylorBerg-Kirkpatrick 4079–4090 2021.findings-acl.357 @@ -4496,7 +4496,7 @@ ShraddhanJain MichaelFerdman PeterMilder - H. AndrewSchwartz + H. AndrewSchwartz NiranjanBalasubramanian 4147–4157 2021.findings-acl.363 @@ -4536,7 +4536,7 @@ JulianMichael MarjanGhazvininejad HannanehHajishirzi - LukeZettlemoyer + LukeZettlemoyer 4179–4192 2021.findings-acl.366 10.18653/v1/2021.findings-acl.366 @@ -4559,7 +4559,7 @@ Hierarchical Task Learning from Language Instructions with Unified Transformers and Self-Monitoring YichiZhang - JoyceChai + JoyceChai 4202–4213 2021.findings-acl.368 10.18653/v1/2021.findings-acl.368 @@ -4583,7 +4583,7 @@ MasoumehAminzadeh ChristophFeichtenhofer FlorianMetze - LukeZettlemoyer + LukeZettlemoyer 4227–4239 2021.findings-acl.370 10.18653/v1/2021.findings-acl.370 @@ -4646,7 +4646,7 @@ Grounding ‘Grounding’ in <fixed-case>NLP</fixed-case> Khyathi RaghaviChandu YonatanBisk - Alan WBlack + Alan WBlack 4283–4305 2021.findings-acl.375 10.18653/v1/2021.findings-acl.375 @@ -4738,8 +4738,8 @@ Enhancing <fixed-case>C</fixed-case>hinese Word Segmentation via Pseudo Labels for Practicability KaiyuHuang JunpengLiu - DegenHuang - DeyiXiong + DegenHuang + DeyiXiong ZhuangLiu JinsongSu 4369–4381 @@ -4776,7 +4776,7 @@ XinranZhao EsinDurmus HongmingZhang - ClaireCardie + ClaireCardie 4401–4407 2021.findings-acl.386 10.18653/v1/2021.findings-acl.386 @@ -4811,7 +4811,7 @@ Inducing Semantic Roles Without Syntax JulianMichael - LukeZettlemoyer + LukeZettlemoyer 4427–4442 2021.findings-acl.389 10.18653/v1/2021.findings-acl.389 @@ -4821,7 +4821,7 @@ Plot and Rework: Modeling Storylines for Visual Storytelling Chi-yangHsu Yun-WeiChu - Ting-HaoHuang + Ting-HaoHuang Lun-WeiKu 4443–4453 2021.findings-acl.390 @@ -4851,7 +4851,7 @@ Jonathan K.Kummerfeld Lawrence CAn KennethResnicow - RadaMihalcea + RadaMihalcea VerónicaPérez-Rosas 4467–4480 2021.findings-acl.392 @@ -4862,7 +4862,7 @@ An Investigation of Suitability of Pre-Trained Language Models for Dialogue Generation – Avoiding Discrepancies YanZeng - Jian-YunNie + Jian-YunNie 4481–4494 2021.findings-acl.393 10.18653/v1/2021.findings-acl.393 @@ -4883,7 +4883,7 @@ Reordering Examples Helps during Priming-based Few-Shot Learning SawanKumar - ParthaTalukdar + ParthaTalukdar 4507–4518 2021.findings-acl.395 10.18653/v1/2021.findings-acl.395 @@ -4984,7 +4984,7 @@ StéphaneAroca-Ouellette CoryPaik AlessandroRoncone - KatharinaKann + KatharinaKann 4597–4608 2021.findings-acl.404 10.18653/v1/2021.findings-acl.404 @@ -5056,9 +5056,9 @@ WeiShi JiewenWu XiwenYang - NancyChen + NancyChen IvanHo Mien - Jung-JaeKim + Jung-JaeKim PavitraKrishnaswamy 4665–4672 2021.findings-acl.410 @@ -5123,10 +5123,10 @@ As Easy as 1, 2, 3: Behavioural Testing of <fixed-case>NMT</fixed-case> Systems for Numerical Translation JunWang ChangXu - FranciscoGuzmán + FranciscoGuzmán AhmedEl-Kishky BenjaminRubinstein - TrevorCohn + TrevorCohn 4711–4717 2021.findings-acl.415 10.18653/v1/2021.findings-acl.415 @@ -5159,8 +5159,8 @@ What Would a Teacher Do? <fixed-case>P</fixed-case>redicting Future Talk Moves AnanyaGanesh - MarthaPalmer - KatharinaKann + MarthaPalmer + KatharinaKann 4739–4751 2021.findings-acl.418 10.18653/v1/2021.findings-acl.418 @@ -5184,7 +5184,7 @@ Multilingual Simultaneous Neural Machine Translation PhilipArthur DongwonRyu - GholamrezaHaffari + GholamrezaHaffari 4758–4766 2021.findings-acl.420 10.18653/v1/2021.findings-acl.420 @@ -5292,7 +5292,7 @@ John praised <fixed-case>M</fixed-case>ary because _he_? Implicit Causality Bias and Its Interaction with Explicit Cues in <fixed-case>LM</fixed-case>s YovaKementchedjhieva MarkAnderson - AndersSøgaard + AndersSøgaard 4859–4871 2021.findings-acl.429 2021.findings-acl.429.OptionalSupplementaryMaterial.zip @@ -5361,7 +5361,7 @@ <fixed-case>D</fixed-case>oc<fixed-case>NLI</fixed-case>: A Large-scale Dataset for Document-level Natural Language Inference WenpengYin - DragomirRadev + DragomirRadev CaimingXiong 4913–4922 2021.findings-acl.435 @@ -5385,10 +5385,10 @@ Are Multilingual Models the Best Choice for Moderately Under-resourced Languages? <fixed-case>A</fixed-case> Comprehensive Assessment for <fixed-case>C</fixed-case>atalan JordiArmengol-Estapé Casimiro PioCarrino - CarlosRodriguez-Penagos - Onade Gibert Bonet + CarlosRodriguez-Penagos + Onade Gibert Bonet CarmeArmentano-Oller - AitorGonzalez-Agirre + AitorGonzalez-Agirre MaiteMelero MartaVillegas 4933–4946 @@ -5473,7 +5473,7 @@ MadinaHasan LuciaSpecia ThomasHain - BjörnSchuller + BjörnSchuller 5004–5009 2021.findings-acl.443 10.18653/v1/2021.findings-acl.443 @@ -5505,7 +5505,7 @@ Phrase-Level Action Reinforcement Learning for Neural Dialog Response Generation TakatoYamazaki - AkikoAizawa + AkikoAizawa 5028–5038 2021.findings-acl.446 10.18653/v1/2021.findings-acl.446 @@ -5531,7 +5531,7 @@ DineshRaghu AtishyaJain Mausam - SachindraJoshi + SachindraJoshi 5051–5061 2021.findings-acl.448 10.18653/v1/2021.findings-acl.448 @@ -5552,7 +5552,7 @@ What Did You Refer to? <fixed-case>E</fixed-case>valuating Co-References in Dialogue - Wei-NanZhang + Wei-NanZhang YueZhang HanlinTang ZhengyuZhao @@ -5579,7 +5579,7 @@ AmitGajbhiye MarinaFomicheva FernandoAlva-Manchego - FrédéricBlain + FrédéricBlain AbiolaObamuyide NikolaosAletras LuciaSpecia @@ -5631,7 +5631,7 @@ DheerajRajagopal NiketTandon YimingYang - EduardHovy + EduardHovy 5138–5147 2021.findings-acl.456 10.18653/v1/2021.findings-acl.456 @@ -5641,8 +5641,8 @@ Characterizing Social Spambots by their Human Traits SalvatoreGiorgi - LyleUngar - H. AndrewSchwartz + LyleUngar + H. AndrewSchwartz 5148–5158 2021.findings-acl.457 10.18653/v1/2021.findings-acl.457 @@ -5653,10 +5653,10 @@ Findings of the Association for Computational Linguistics: EMNLP 2021 - Marie-FrancineMoens - XuanjingHuang + Marie-FrancineMoens + XuanjingHuang LuciaSpecia - Scott Wen-tauYih + Scott Wen-tauYih Association for Computational Linguistics
Punta Cana, Dominican Republic
November @@ -5724,7 +5724,7 @@ Neural News Recommendation with Collaborative News Encoding and Structural User Encoding ZhimingMao XingshanZeng - Kam-FaiWong + Kam-FaiWong 46–55 Automatic news recommendation has gained much attention from the academic community and industry. Recent studies reveal that the key to this task lies within the effective representation learning of both news and users. Existing works typically encode news title and content separately while neglecting their semantic interaction, which is inadequate for news text comprehension. Besides, previous models encode user browsing history without leveraging the structural correlation of user browsed news to reflect user interests explicitly. In this work, we propose a news recommendation framework consisting of collaborative news encoding (CNE) and structural user encoding (SUE) to enhance news and user representation learning. CNE equipped with bidirectional LSTMs encodes news title and content collaboratively with cross-selection and cross-attention modules to learn semantic-interactive news representations. SUE utilizes graph convolutional networks to extract cluster-structural features of user history, followed by intra-cluster and inter-cluster attention modules to learn hierarchical user interest representations. Experiment results on the MIND dataset validate the effectiveness of our model to improve the performance of news recommendation. 2021.findings-emnlp.5 @@ -5738,7 +5738,7 @@ DianYu KaiSun DongYu - ClaireCardie + ClaireCardie 56–68 Despite considerable progress, most machine reading comprehension (MRC) tasks still lack sufficient training data to fully exploit powerful deep neural network models with millions of parameters, and it is laborious, expensive, and time-consuming to create large-scale, high-quality MRC data through crowdsourcing. This paper focuses on generating more training data for MRC tasks by leveraging existing question-answering (QA) data. We first collect a large-scale multi-subject multiple-choice QA dataset for Chinese, ExamQA. We next use incomplete, yet relevant snippets returned by a web search engine as the context for each QA instance to convert it into a weakly-labeled MRC instance. To better use the weakly-labeled data to improve a target MRC task, we evaluate and compare several methods and further propose a self-teaching paradigm. Experimental results show that, upon state-of-the-art MRC baselines, we can obtain +5.1% in accuracy on a multiple-choice Chinese MRC dataset, Cˆ3, and +3.8% in exact match on an extractive Chinese MRC dataset, CMRC 2018, demonstrating the usefulness of the generated QA-based weakly-labeled data for different types of MRC tasks as well as the effectiveness of self-teaching. ExamQA will be available at https://dataset.org/examqa/. 2021.findings-emnlp.6 @@ -5771,7 +5771,7 @@ ShoyaYoshida LovishChum HengJi - Shih-FuChang + Shih-FuChang 74–88 Visual and textual modalities contribute complementary information about events described in multimedia documents. Videos contain rich dynamics and detailed unfoldings of events, while text describes more high-level and abstract concepts. However, existing event extraction methods either do not handle video or solely target video while ignoring other modalities. In contrast, we propose the first approach to jointly extract events from both video and text articles. We introduce the new task of Video MultiMedia Event Extraction and propose two novel components to build the first system towards this task. First, we propose the first self-supervised cross-modal event coreference model that can determine coreference between video events and text events without any manually annotated pairs. Second, we introduce the first cross-modal transformer architecture, which extracts structured event information from both videos and text documents. We also construct and will publicly release a new benchmark of video-article pairs, consisting of 860 video-article pairs with extensive annotations for evaluating methods on this task. Our experimental results demonstrate the effectiveness of our proposed method on our new benchmark dataset. We achieve 6.0% and 5.8% absolute F-score gain on multimodal event coreference resolution and multimedia event extraction. 2021.findings-emnlp.8 @@ -5918,7 +5918,7 @@ YingboZhou SemihYavuz CaimingXiong - PhilipYu + PhilipYu 188–200 Dense neural text retrieval has achieved promising results on open-domain Question Answering (QA), where latent representations of questions and passages are exploited for maximum inner product search in the retrieval process. However, current dense retrievers require splitting documents into short passages that usually contain local, partial and sometimes biased context, and highly depend on the splitting process. As a consequence, it may yield inaccurate and misleading hidden representations, thus deteriorating the final retrieval result. In this work, we propose Dense Hierarchical Retrieval (DHR), a hierarchical framework which can generate accurate dense representations of passages by utilizing both macroscopic semantics in the document and microscopic semantics specific to each passage. Specifically, a document-level retriever first identifies relevant documents, among which relevant passages are then retrieved by a passage-level retriever. The ranking of the retrieved passages will be further calibrated by examining the document-level relevance. In addition, hierarchical title structure and two negative sampling strategies (i.e., In-Doc and In-Sec negatives) are investigated. We apply DHR to large-scale open-domain QA datasets. DHR significantly outperforms the original dense passage retriever, and helps an end-to-end QA system outperform the strong baselines on multiple open-domain QA benchmarks. 2021.findings-emnlp.19 @@ -5989,7 +5989,7 @@ GuyFeigenblat ChulakaGunasekara BenjaminSznajder - SachindraJoshi + SachindraJoshi DavidKonopnicki RanitAharonov 245–260 @@ -6030,7 +6030,7 @@ ZeyuPeng TejasVaidhya BernhardSchoelkopf - RadaMihalcea + RadaMihalcea 288–301 Mining the causes of political decision-making is an active research area in the field of political science. In the past, most studies have focused on long-term policies that are collected over several decades of time, and have primarily relied on surveys as the main source of predictors. However, the recent COVID-19 pandemic has given rise to a new political phenomenon, where political decision-making consists of frequent short-term decisions, all on the same controlled topic—the pandemic. In this paper, we focus on the question of how public opinion influences policy decisions, while controlling for confounders such as COVID-19 case increases or unemployment rates. Using a dataset consisting of Twitter data from the 50 US states, we classify the sentiments toward governors of each state, and conduct controlled studies and comparisons. Based on the compiled samples of sentiments, policies, and confounders, we conduct causal inference to discover trends in political decision-making across different states. 2021.findings-emnlp.27 @@ -6110,7 +6110,7 @@ SanKim Jin YeaJang MinyoungJung - SaimShin + SaimShin 352–365 Research on open-domain dialogue systems that allow free topics is challenging in the field of natural language processing (NLP). The performance of the dialogue system has been improved recently by the method utilizing dialogue-related knowledge; however, non-English dialogue systems suffer from reproducing the performance of English dialogue systems because securing knowledge in the same language with the dialogue system is relatively difficult. Through experiments with a Korean dialogue system, this paper proves that the performance of a non-English dialogue system can be improved by utilizing English knowledge, highlighting the system uses cross-lingual knowledge. For the experiments, we 1) constructed a Korean version of the Wizard of Wikipedia dataset, 2) built Korean-English T5 (KE-T5), a language model pre-trained with Korean and English corpus, and 3) developed a knowledge-grounded Korean dialogue model based on KE-T5. We observed the performance improvement in the open-domain Korean dialogue model even only English knowledge was given. The experimental results showed that the knowledge inherent in cross-lingual language models can be helpful for generating responses in open dialogue systems. 2021.findings-emnlp.33 @@ -6152,7 +6152,7 @@ Cartography Active Learning MikeZhang - BarbaraPlank + BarbaraPlank 395–406 We propose Cartography Active Learning (CAL), a novel Active Learning (AL) algorithm that exploits the behavior of the model on individual instances during training as a proxy to find the most informative instances for labeling. CAL is inspired by data maps, which were recently proposed to derive insights into dataset quality (Swayamdipta et al., 2020). We compare our method on popular text classification tasks to commonly used AL strategies, which instead rely on post-training behavior. We demonstrate that CAL is competitive to other common AL methods, showing that training dynamics derived from small seed data can be successfully used for AL. We provide insights into our new AL method by analyzing batch-level statistics utilizing the data maps. Our results further show that CAL results in a more data-efficient learning strategy, achieving comparable or better results with considerably less training data. 2021.findings-emnlp.36 @@ -6192,7 +6192,7 @@ Attention Weights in Transformer <fixed-case>NMT</fixed-case> Fail Aligning Words Between Sequences but Largely Explain Model Predictions JavierFerrando - Marta R.Costa-jussà + Marta R.Costa-jussà 434–443 This work proposes an extensive analysis of the Transformer architecture in the Neural Machine Translation (NMT) setting. Focusing on the encoder-decoder attention mechanism, we prove that attention weights systematically make alignment errors by relying mainly on uninformative tokens from the source sequence. However, we observe that NMT models assign attention to these tokens to regulate the contribution in the prediction of the two contexts, the source and the prefix of the target sequence. We provide evidence about the influence of wrong alignments on the model behavior, demonstrating that the encoder-decoder attention mechanism is well suited as an interpretability method for NMT. Finally, based on our analysis, we propose methods that largely reduce the word alignment error rate compared to standard induced alignments from attention weights. 2021.findings-emnlp.39 @@ -6261,7 +6261,7 @@ FukunMa ChenyaoLiu LijieWen - Philip S.Yu + Philip S.Yu 487–496 To alleviate human efforts from obtaining large-scale annotations, Semi-Supervised Relation Extraction methods aim to leverage unlabeled data in addition to learning from limited samples. Existing self-training methods suffer from the gradual drift problem, where noisy pseudo labels on unlabeled data are incorporated during training. To alleviate the noise in pseudo labels, we propose a method called MetaSRE, where a Relation Label Generation Network generates accurate quality assessment on pseudo labels by (meta) learning from the successful and failed attempts on Relation Classification Network as an additional meta-objective. To reduce the influence of noisy pseudo labels, MetaSRE adopts a pseudo label selection and exploitation scheme which assesses pseudo label quality on unlabeled samples and only exploits high-quality pseudo labels in a self-training fashion to incrementally augment labeled samples for both robustness and accuracy. Experimental results on two public datasets demonstrate the effectiveness of the proposed approach. 2021.findings-emnlp.44 @@ -6304,7 +6304,7 @@ GuyFeigenblat BenjaminSznajder RanitAharonov - SachindraJoshi + SachindraJoshi 518–526 Neural abstractive summarization models have drastically improved in the recent years. However, the summaries generated by these models generally suffer from issues such as: not capturing the critical facts in source documents, and containing facts that are inconsistent with the source documents. In this work, we present a general framework to train abstractive summarization models to alleviate such issues. We first train a sequence-to-sequence model to summarize documents, and then further train this model in a Reinforcement Learning setting with question-answering based rewards. We evaluate the summaries generated by the this framework using multiple automatic measures and human judgements. The experimental results show that the question-answering rewards can be used as a general framework to improve neural abstractive summarization. Particularly, the results from human evaluations show that the summaries generated by our approach is preferred over 30% of the time over the summaries generated by general abstractive summarization models. 2021.findings-emnlp.47 @@ -6337,7 +6337,7 @@ Unseen Entity Handling in Complex Question Answering over Knowledge Base via Language Generation XinHuang - Jung-JaeKim + Jung-JaeKim BoweiZou 547–557 Complex question answering over knowledge base remains as a challenging task because it involves reasoning over multiple pieces of information, including intermediate entities/relations and other constraints. Previous methods simplify the SPARQL query of a question into such forms as a list or a graph, missing such constraints as “filter” and “order_by”, and present models specialized for generating those simplified forms from a given question. We instead introduce a novel approach that directly generates an executable SPARQL query without simplification, addressing the issue of generating unseen entities. We adapt large scale pre-trained encoder-decoder models and show that our method significantly outperforms the previous methods and also that our method has higher interpretability and computational efficiency than the previous methods. @@ -6401,7 +6401,7 @@ HuiHuang DongJing YufengChen - JinanXu + JinanXu JianLiu 599–610 Recent multilingual pre-trained models, like XLM-RoBERTa (XLM-R), have been demonstrated effective in many cross-lingual tasks. However, there are still gaps between the contextualized representations of similar words in different languages. To solve this problem, we propose a novel framework named Multi-View Mixed Language Training (MVMLT), which leverages code-switched data with multi-view learning to fine-tune XLM-R. MVMLT uses gradient-based saliency to extract keywords which are the most relevant to downstream tasks and replaces them with the corresponding words in the target language dynamically. Furthermore, MVMLT utilizes multi-view learning to encourage contextualized embeddings to align into a more refined language-invariant space. Extensive experiments with four languages show that our model achieves state-of-the-art results on zero-shot cross-lingual sentiment classification and dialogue state tracking tasks, demonstrating the effectiveness of our proposed model. @@ -6428,7 +6428,7 @@ GijsDanoe FrisoStolk BrittBruntink - PreslavNakov + PreslavNakov 611–649 With the emergence of the COVID-19 pandemic, the political and the medical aspects of disinformation merged as the problem got elevated to a whole new level to become the first global infodemic. Fighting this infodemic has been declared one of the most important focus areas of the World Health Organization, with dangers ranging from promoting fake cures, rumors, and conspiracy theories to spreading xenophobia and panic. Addressing the issue requires solving a number of challenging problems such as identifying messages containing claims, determining their check-worthiness and factuality, and their potential to do harm as well as the nature of that harm, to mention just a few. To address this gap, we release a large dataset of 16K manually annotated tweets for fine-grained disinformation analysis that (i) focuses on COVID-19, (ii) combines the perspectives and the interests of journalists, fact-checkers, social media platforms, policy makers, and society, and (iii) covers Arabic, Bulgarian, Dutch, and English. Finally, we show strong evaluation results using pretrained Transformers, thus confirming the practical utility of the dataset in monolingual vs. multilingual, and single task vs. multitask settings. 2021.findings-emnlp.56 @@ -6454,7 +6454,7 @@ Stream-level Latency Evaluation for Simultaneous Machine Translation JavierIranzo-Sánchez JorgeCivera Saiz - AlfonsJuan + AlfonsJuan 664–670 Simultaneous machine translation has recently gained traction thanks to significant quality improvements and the advent of streaming applications. Simultaneous translation systems need to find a trade-off between translation quality and response time, and with this purpose multiple latency measures have been proposed. However, latency evaluations for simultaneous translation are estimated at the sentence level, not taking into account the sequential nature of a streaming scenario. Indeed, these sentence-level latency measures are not well suited for continuous stream translation, resulting in figures that are not coherent with the simultaneous translation policy of the system being assessed. This work proposes a stream level adaptation of the current latency measures based on a re-segmentation approach applied to the output translation, that is successfully evaluated on streaming conditions for a reference IWSLT task. 2021.findings-emnlp.58 @@ -6500,7 +6500,7 @@ Learn Continually, Generalize Rapidly: Lifelong Knowledge Accumulation for Few-shot Learning XisenJin - Bill YuchenLin + Bill YuchenLin MohammadRostami XiangRen 714–729 @@ -6573,8 +6573,8 @@ Learning Hard Retrieval Decoder Attention for Transformers HongfeiXu QiuhuiLiu - Josefvan Genabith - DeyiXiong + Josefvan Genabith + DeyiXiong 779–785 The Transformer translation model is based on the multi-head attention mechanism, which can be parallelized easily. The multi-head attention network performs the scaled dot-product attention function in parallel, empowering the model by jointly attending to information from different representation subspaces at different positions. In this paper, we present an approach to learning a hard retrieval attention where an attention head only attends to one token in the sentence rather than all tokens. The matrix multiplication between attention probabilities and the value sequence in the standard scaled dot-product attention can thus be replaced by a simple and efficient retrieval operation. We show that our hard retrieval attention mechanism is 1.43 times faster in decoding, while preserving translation quality on a wide range of machine translation tasks when used in the decoder self- and cross-attention networks. 2021.findings-emnlp.67 @@ -6631,7 +6631,7 @@ YizhongWang JungoKasai HannanehHajishirzi - Noah A.Smith + Noah A.Smith 820–842 Models of language trained on very large corpora have been demonstrated useful for natural language processing. As fixed artifacts, they have become the object of intense study, with many researchers “probing” the extent to which they acquire and readily demonstrate linguistic abstractions, factual and commonsense knowledge, and reasoning abilities. Recent work applied several probes to intermediate training stages to observe the developmental process of a large-scale model (Chiang et al., 2020). Following this effort, we systematically answer a question: for various types of knowledge a language model learns, when during (pre)training are they acquired? Using RoBERTa as a case study, we find: linguistic knowledge is acquired fast, stably, and robustly across domains. Facts and commonsense are slower and more domain-sensitive. Reasoning abilities are, in general, not stably acquired. As new datasets, pretraining protocols, and probes emerge, we believe that probing-across-time analyses can help researchers understand the complex, intermingled learning that these models undergo and guide us toward more efficient approaches that accomplish necessary learning faster. 2021.findings-emnlp.71 @@ -6657,7 +6657,7 @@ MartinFajcik MartinDocekal KarelOndrej - PavelSmrz + PavelSmrz 854–870 This work presents a novel four-stage open-domain QA pipeline R2-D2 (Rank twice, reaD twice). The pipeline is composed of a retriever, passage reranker, extractive reader, generative reader and a mechanism that aggregates the final prediction from all system’s components. We demonstrate its strength across three open-domain QA datasets: NaturalQuestions, TriviaQA and EfficientQA, surpassing state-of-the-art on the first two. Our analysis demonstrates that: (i) combining extractive and generative reader yields absolute improvements up to 5 exact match and it is at least twice as effective as the posterior averaging ensemble of the same models with different parameters, (ii) the extractive reader with fewer parameters can match the performance of the generative reader on extractive QA datasets. 2021.findings-emnlp.73 @@ -6684,7 +6684,7 @@ YiminFan YaoboLiang AlexandreMuzio - HanyHassan + HanyHassan HouqiangLi MingZhou NanDuan @@ -6791,7 +6791,7 @@ RoseWang JuliaWhite JesseMu - NoahGoodman + NoahGoodman 977–984 To be good conversational partners, natural language processing (NLP) systems should be trained to produce contextually useful utterances. Prior work has investigated training NLP systems with communication-based objectives, where a neural listener stands in as a communication partner. However, these systems commonly suffer from semantic drift where the learned language diverges radically from natural language. We propose a method that uses a population of neural listeners to regularize speaker training. We first show that language drift originates from the poor uncertainty calibration of a neural listener, which makes high-certainty predictions on novel sentences. We explore ensemble- and dropout-based populations of listeners and find that the former results in better uncertainty quantification. We evaluate both population-based objectives on reference games, and show that the ensemble method with better calibration enables the speaker to generate pragmatic utterances while scaling to a large vocabulary and generalizing to new games and listeners. 2021.findings-emnlp.83 @@ -6818,7 +6818,7 @@ KyleGorman ChristoKirov BrianRoark - RichardSproat + RichardSproat 995–1005 Ad hoc abbreviations are commonly found in informal communication channels that favor shorter messages. We consider the task of reversing these abbreviations in context to recover normalized, expanded versions of abbreviated messages. The problem is related to, but distinct from, spelling correction, as ad hoc abbreviations are intentional and can involve more substantial differences from the original words. Ad hoc abbreviations are also productively generated on-the-fly, so they cannot be resolved solely by dictionary lookup. We generate a large, open-source data set of ad hoc abbreviations. This data is used to study abbreviation strategies and to develop two strong baselines for abbreviation expansion. 2021.findings-emnlp.85 @@ -6893,7 +6893,7 @@ YangLiu ChaoLi YunboCao - DongshengLi + DongshengLi 1053–1066 Although showing promising values to downstream applications, generating question and answer together is under-explored. In this paper, we introduce a novel task that targets question-answer pair generation from visual images. It requires not only generating diverse question-answer pairs but also keeping the consistency of them. We study different generation paradigms for this task and propose three models: the pipeline model, the joint model, and the sequential model. We integrate variational inference into these models to achieve diversity and consistency. We also propose region representation scaling and attention alignment to improve the consistency further. We finally devise an evaluator as a quantitative metric for consistency. We validate our approach on two benchmarks, VQA2.0 and Visual-7w, by automatically and manually evaluating diversity and consistency. Experimental results show the effectiveness of our models: they can generate diverse or consistent pairs. Moreover, this task can be used to improve visual question generation and visual question answering. 2021.findings-emnlp.91 @@ -6905,7 +6905,7 @@ Entity-level Cross-modal Learning Improves Multi-modal Machine Translation XinHuang JiajunZhang - ChengqingZong + ChengqingZong 1067–1080 Multi-modal machine translation (MMT) aims at improving translation performance by incorporating visual information. Most of the studies leverage the visual information through integrating the global image features as auxiliary input or decoding by attending to relevant local regions of the image. However, this kind of usage of visual information makes it difficult to figure out how the visual modality helps and why it works. Inspired by the findings of (CITATION) that entities are most informative in the image, we propose an explicit entity-level cross-modal learning approach that aims to augment the entity representation. Specifically, the approach is framed as a reconstruction task that reconstructs the original textural input from multi-modal input in which entities are replaced with visual features. Then, a multi-task framework is employed to combine the translation task and the reconstruction task to make full use of cross-modal entity representation learning. The extensive experiments demonstrate that our approach can achieve comparable or even better performance than state-of-the-art models. Furthermore, our in-depth analysis shows how visual information improves translation. 2021.findings-emnlp.92 @@ -7017,9 +7017,9 @@ YeqiuLi BoweiZou ZhifengLi - Ai TiAw + Ai TiAw YuHong - QiaomingZhu + QiaomingZhu 1157–1165 We tackle multi-choice question answering. Acquiring related commonsense knowledge to the question and options facilitates the recognition of the correct answer. However, the current reasoning models suffer from the noises in the retrieved knowledge. In this paper, we propose a novel encoding method which is able to conduct interception and soft filtering. This contributes to the harvesting and absorption of representative information with less interference from noises. We experiment on CommonsenseQA. Experimental results illustrate that our method yields substantial and consistent improvements compared to the strong Bert, RoBERTa and Albert-based baselines. 2021.findings-emnlp.100 @@ -7035,7 +7035,7 @@ Jan-DavidKrieger TerryRuas BelaGipp - AkikoAizawa + AkikoAizawa 1166–1177 Media coverage has a substantial effect on the public perception of events. Nevertheless, media outlets are often biased. One way to bias news articles is by altering the word choice. The automatic identification of bias by word choice is challenging, primarily due to the lack of a gold standard data set and high context dependencies. This paper presents BABE, a robust and diverse data set created by trained experts, for media bias research. We also analyze why expert labeling is essential within this domain. Our data set offers better annotation quality and higher inter-annotator agreement than existing work. It consists of 3,700 sentences balanced among topics and outlets, containing media bias labels on the word and sentence level. Based on our data, we also introduce a way to detect bias-inducing sentences in news articles automatically. Our best performing BERT-based model is pre-trained on a larger corpus consisting of distant labels. Fine-tuning and evaluating the model on our proposed supervised data set, we achieve a macro F1-score of 0.804, outperforming existing methods. 2021.findings-emnlp.101 @@ -7067,7 +7067,7 @@ BisweshMohapatra GauravPandey DanishContractor - SachindraJoshi + SachindraJoshi 1190–1203 Popular dialog datasets such as MultiWOZ are created by providing crowd workers an instruction, expressed in natural language, that describes the task to be accomplished. Crowd workers play the role of a user and an agent to generate dialogs to accomplish tasks involving booking restaurant tables, calling a taxi etc. In this paper, we present a data creation strategy that uses the pre-trained language model, GPT2, to simulate the interaction between crowd workers by creating a user bot and an agent bot. We train the simulators using a smaller percentage of actual crowd-generated conversations and their corresponding instructions. We demonstrate that by using the simulated data, we achieve significant improvements in low-resource settings on two publicly available datasets - MultiWOZ dataset and the Persona chat dataset. 2021.findings-emnlp.103 @@ -7133,7 +7133,7 @@ <fixed-case>A</fixed-case>rabic<fixed-case>T</fixed-case>ransformer: Efficient Large <fixed-case>A</fixed-case>rabic Language Model with Funnel Transformer and <fixed-case>ELECTRA</fixed-case> Objective SultanAlrowili - VijayShanker + VijayShanker 1255–1261 Pre-training Transformer-based models such as BERT and ELECTRA on a collection of Arabic corpora, demonstrated by both AraBERT and AraELECTRA, shows an impressive result on downstream tasks. However, pre-training Transformer-based language models is computationally expensive, especially for large-scale models. Recently, Funnel Transformer has addressed the sequential redundancy inside Transformer architecture by compressing the sequence of hidden states, leading to a significant reduction in the pre-training cost. This paper empirically studies the performance and efficiency of building an Arabic language model with Funnel Transformer and ELECTRA objective. We find that our model achieves state-of-the-art results on several Arabic downstream tasks despite using less computational resources compared to other BERT-based models. 2021.findings-emnlp.108 @@ -7158,7 +7158,7 @@ ZujunDou YuHong YuSun - GuodongZhou + GuodongZhou 1275–1283 Training implicit discourse relation classifiers suffers from data sparsity. Variational AutoEncoder (VAE) appears to be the proper solution. It is because ideally VAE is capable of generating inexhaustible varying samples, and this facilitates selective data augmentation. However, our experiments show that coupling VAE with the RoBERTa-based classifier results in severe performance degradation. We ascribe the unusual phenomenon to erroneous sampling that would happen when VAE pursued variations. To overcome the problem, we develop a re-anchoring strategy, where Conditional VAE (CVAE) is used for estimating the risk of erroneous sampling, and meanwhile migrating the anchor to reduce the risk. The test results on PDTB v2.0 illustrate that, compared to the RoBERTa-based baseline, re-anchoring yields substantial improvements. Besides, we observe that re-anchoring can cooperate with other auxiliary strategies (transfer learning and interactive attention mechanism) to further improve the baseline, obtaining the F-scores of about 55%, 63%, 80% and 44% for the four main relation types (Comparison, Contingency, Expansion, Temporality) in the binary classification (Yes/No) scenario. 2021.findings-emnlp.110 @@ -7196,7 +7196,7 @@ LongyinZhang XinTan FangKong - GuodongZhou + GuodongZhou 1304–1312 Discourse analysis has long been known to be fundamental in natural language processing. In this research, we present our insight on discourse-level topic chain (DTC) parsing which aims at discovering new topics and investigating how these topics evolve over time within an article. To address the lack of data, we contribute a new discourse corpus with DTC-style dependency graphs annotated upon news articles. In particular, we ensure the high reliability of the corpus by utilizing a two-step annotation strategy to build the data and filtering out the annotations with low confidence scores. Based on the annotated corpus, we introduce a simple yet robust system for automatic discourse-level topic chain parsing. 2021.findings-emnlp.113 @@ -7208,7 +7208,7 @@ Multilingual Neural Machine Translation: Can Linguistic Hierarchies Help? FahimehSaleh WrayBuntine - GholamrezaHaffari + GholamrezaHaffari LanDu 1313–1330 Multilingual Neural Machine Translation (MNMT) trains a single NMT model that supports translation between multiple languages, rather than training separate models for different languages. Learning a single model can enhance the low-resource translation by leveraging data from multiple languages. However, the performance of an MNMT model is highly dependent on the type of languages used in training, as transferring knowledge from a diverse set of languages degrades the translation performance due to negative transfer. In this paper, we propose a Hierarchical Knowledge Distillation (HKD) approach for MNMT which capitalises on language groups generated according to typological features and phylogeny of languages to overcome the issue of negative transfer. HKD generates a set of multilingual teacher-assistant models via a selective knowledge distillation mechanism based on the language groups, and then distills the ultimate multilingual model from those assistants in an adaptive way. Experimental results derived from the TED dataset with 53 languages demonstrate the effectiveness of our approach in avoiding the negative transfer effect in MNMT, leading to an improved translation performance (about 1 BLEU score in average) compared to strong baselines. @@ -7476,7 +7476,7 @@ BudhadityaDeb GuoqingZheng MiladShokouhi - Ahmed HassanAwadallah + Ahmed HassanAwadallah 1553–1568 We study the problem of multilingual automated reply suggestions (RS) model serving many languages simultaneously. Multilingual models are often challenged by model capacity and severe data distribution skew across languages. While prior works largely focus on monolingual models, we propose Conditional Generative Matching models (CGM), optimized within a Variational Autoencoder framework to address challenges arising from multilingual RS. CGM does so with expressive message conditional priors, mixture densities to enhance multilingual data representation, latent alignment for language discrimination, and effective variational optimization techniques for training multilingual RS. The enhancements result in performance that exceed competitive baselines in relevance (ROUGE score) by more than 10% on average, and 16%for low resource languages. CGM also shows remarkable improvements in diversity (80%) illustrating its expressiveness in representation of multi-lingual data. 2021.findings-emnlp.134 @@ -7510,7 +7510,7 @@ <fixed-case>P</fixed-case>rofiling News Discourse Structure Using Explicit Subtopic Structures Guided Critics - Prafulla KumarChoubey + Prafulla KumarChoubey RuihongHuang 1594–1605 We present an actor-critic framework to induce subtopical structures in a news article for news discourse profiling. The model uses multiple critics that act according to known subtopic structures while the actor aims to outperform them. The content structures constitute sentences that represent latent subtopic boundaries. Then, we introduce a hierarchical neural network that uses the identified subtopic boundary sentences to model multi-level interaction between sentences, subtopics, and the document. Experimental results and analyses on the NewsDiscourse corpus show that the actor model learns to effectively segment a document into subtopics and improves the performance of the hierarchical model on the news discourse profiling task. @@ -7536,7 +7536,7 @@ Learning from Language Description: Low-shot Named Entity Recognition via Decomposed Framework YaqingWang HaodaChu - ChaoZhang + ChaoZhang JingGao 1618–1630 In this work, we study the problem of named entity recognition (NER) in a low resource scenario, focusing on few-shot and zero-shot settings. Built upon large-scale pre-trained language models, we propose a novel NER framework, namely SpanNER, which learns from natural language supervision and enables the identification of never-seen entity classes without using in-domain labeled data. We perform extensive experiments on 5 benchmark datasets and evaluate the proposed method in the few-shot learning, domain transfer and zero-shot learning settings. The experimental results show that the proposed method can bring 10%, 23% and 26% improvements in average over the best baselines in few-shot learning, domain transfer and zero-shot learning settings respectively. @@ -7547,9 +7547,9 @@ <fixed-case>BERT</fixed-case> might be Overkill: A Tiny but Effective Biomedical Entity Linker based on Residual Convolutional Neural Networks - TuanLai + TuanLai HengJi - ChengXiangZhai + ChengXiangZhai 1631–1639 Biomedical entity linking is the task of linking entity mentions in a biomedical document to referent entities in a knowledge base. Recently, many BERT-based models have been introduced for the task. While these models achieve competitive results on many datasets, they are computationally expensive and contain about 110M parameters. Little is known about the factors contributing to their impressive performance and whether the over-parameterization is needed. In this work, we shed some light on the inner workings of these large BERT-based models. Through a set of probing experiments, we have found that the entity linking performance only changes slightly when the input word order is shuffled or when the attention scope is limited to a fixed window size. From these observations, we propose an efficient convolutional neural network with residual connections for biomedical entity linking. Because of the sparse connectivity and weight sharing properties, our model has a small number of parameters and is highly efficient. On five public datasets, our model achieves comparable or even better linking accuracy than the state-of-the-art BERT-based models while having about 60 times fewer parameters. 2021.findings-emnlp.140 @@ -7575,7 +7575,7 @@ Exploring Multitask Learning for Low-Resource Abstractive Summarization AhmedMagooda - DianeLitman + DianeLitman MohamedElaraby 1652–1661 This paper explores the effect of using multitask learning for abstractive summarization in the context of small training corpora. In particular, we incorporate four different tasks (extractive summarization, language modeling, concept detection, and paraphrase detection) both individually and in combination, with the goal of enhancing the target task of abstractive summarization via multitask learning. We show that for many task combinations, a model trained in a multitask setting outperforms a model trained only for abstractive summarization, with no additional summarization data introduced. Additionally, we do a comprehensive search and find that certain tasks (e.g. paraphrase detection) consistently benefit abstractive summarization, not only when combined with other tasks but also when using different architectures and training corpora. @@ -7656,7 +7656,7 @@ AustinReiter Ser-NamLim YoavArtzi - ClaireCardie + ClaireCardie 1716–1723 We introduce Classification with Alternating Normalization (CAN), a non-parametric post-processing step for classification. CAN improves classification accuracy for challenging examples by re-adjusting their predicted class probability distribution using the predicted class distributions of high-confidence validation examples. CAN is easily applicable to any probabilistic classifier, with minimal computation overhead. We analyze the properties of CAN using simulated experiments, and empirically demonstrate its effectiveness across a diverse set of classification tasks. 2021.findings-emnlp.148 @@ -7698,7 +7698,7 @@ Towards Developing a Multilingual and Code-Mixed Visual Question Answering System by Knowledge Distillation HumairRaj Khan - DeepakGupta + DeepakGupta AsifEkbal 1753–1767 Pre-trained language-vision models have shown remarkable performance on the visual question answering (VQA) task. However, most pre-trained models are trained by only considering monolingual learning, especially the resource-rich language like English. Training such models for multilingual setups demand high computing resources and multilingual language-vision dataset which hinders their application in practice. To alleviate these challenges, we propose a knowledge distillation approach to extend an English language-vision model (teacher) into an equally effective multilingual and code-mixed model (student). Unlike the existing knowledge distillation methods, which only use the output from the last layer of the teacher network for distillation, our student model learns and imitates the teacher from multiple intermediate layers (language and vision encoders) with appropriately designed distillation objectives for incremental knowledge extraction. We also create the large-scale multilingual and code-mixed VQA dataset in eleven different language setups considering the multiple Indian and European languages. Experimental results and in-depth analysis show the effectiveness of the proposed VQA model over the pre-trained language-vision models on eleven diverse language setups. @@ -7713,7 +7713,7 @@ FandongMeng JinchaoZhang YufengChen - JinanXu + JinanXu JieZhou 1768–1780 Aspect-based sentiment analysis (ABSA) mainly involves three subtasks: aspect term extraction, opinion term extraction, and aspect-level sentiment classification, which are typically handled in a separate or joint manner. However, previous approaches do not well exploit the interactive relations among three subtasks and do not pertinently leverage the easily available document-level labeled domain/sentiment knowledge, which restricts their performances. To address these issues, we propose a novel Iterative Multi-Knowledge Transfer Network (IMKTN) for end-to-end ABSA. For one thing, through the interactive correlations between the ABSA subtasks, our IMKTN transfers the task-specific knowledge from any two of the three subtasks to another one at the token level by utilizing a well-designed routing algorithm, that is, any two of the three subtasks will help the third one. For another, our IMKTN pertinently transfers the document-level knowledge, i.e., domain-specific and sentiment-related knowledge, to the aspect-level subtasks to further enhance the corresponding performance. Experimental results on three benchmark datasets demonstrate the effectiveness and superiority of our approach. @@ -7808,7 +7808,7 @@ YuFeng JingZhang GaoleHe - Wayne XinZhao + Wayne XinZhao LemaoLiu QuanLiu CuipingLi @@ -7880,7 +7880,7 @@ <fixed-case>C</fixed-case>ontract<fixed-case>NLI</fixed-case>: A Dataset for Document-level Natural Language Inference for Contracts YutaKoreeda - ChristopherManning + ChristopherManning 1907–1919 Reviewing contracts is a time-consuming procedure that incurs large expenses to companies and social inequality to those who cannot afford it. In this work, we propose “document-level natural language inference (NLI) for contracts”, a novel, real-world application of NLI that addresses such problems. In this task, a system is given a set of hypotheses (such as “Some obligations of Agreement may survive termination.”) and a contract, and it is asked to classify whether each hypothesis is “entailed by”, “contradicting to” or “not mentioned by” (neutral to) the contract as well as identifying “evidence” for the decision as spans in the contract. We annotated and release the largest corpus to date consisting of 607 annotated contracts. We then show that existing models fail badly on our task and introduce a strong baseline, which (a) models evidence identification as multi-label classification over spans instead of trying to predict start and end tokens, and (b) employs more sophisticated context segmentation for dealing with long documents. We also show that linguistic characteristics of contracts, such as negations by exceptions, are contributing to the difficulty of this task and that there is much room for improvement. 2021.findings-emnlp.164 @@ -7902,7 +7902,7 @@ Grouped-Attention for Content-Selection and Content-Plan Generation - Bayu DistiawanTrisedya + Bayu DistiawanTrisedya XiaojieWang JianzhongQi RuiZhang @@ -8000,8 +8000,8 @@ Say ‘<fixed-case>YES</fixed-case>’ to Positivity: Detecting Toxic Language in Workplace Communications Meghana MoorthyBhat SagharHosseini - Ahmed HassanAwadallah - PaulBennett + Ahmed HassanAwadallah + PaulBennett WeishengLi 2017–2029 Workplace communication (e.g. email, chat, etc.) is a central part of enterprise productivity. Healthy conversations are crucial for creating an inclusive environment and maintaining harmony in an organization. Toxic communications at the workplace can negatively impact overall job satisfaction and are often subtle, hidden, or demonstrate human biases. The linguistic subtlety of mild yet hurtful conversations has made it difficult for researchers to quantify and extract toxic conversations automatically. While offensive language or hate speech has been extensively studied in social communities, there has been little work studying toxic communication in emails. Specifically, the lack of corpus, sparsity of toxicity in enterprise emails, and well-defined criteria for annotating toxic conversations have prevented researchers from addressing the problem at scale. We take the first step towards studying toxicity in workplace emails by providing (1) a general and computationally viable taxonomy to study toxic language at the workplace (2) a dataset to study toxic language at the workplace based on the taxonomy and (3) analysis on why offensive language and hate-speech datasets are not suitable to detect workplace toxicity. @@ -8029,7 +8029,7 @@ Mitigating Data Scarceness through Data Synthesis, Augmentation and Curriculum for Abstractive Summarization AhmedMagooda - DianeLitman + DianeLitman 2043–2052 This paper explores three simple data manipulation techniques (synthesis, augmentation, curriculum) for improving abstractive summarization models without the need for any additional data. We introduce a method of data synthesis with paraphrasing, a data augmentation technique with sample mixing, and curriculum learning with two new difficulty metrics based on specificity and abstractiveness. We conduct experiments to show that these three techniques can help improve abstractive summarization across two summarization models and two different small datasets. Furthermore, we show that these techniques can improve performance when applied in isolation and when combined. 2021.findings-emnlp.175 @@ -8128,7 +8128,7 @@ LingzhiWang XingshanZeng HuangHu - Kam-FaiWong + Kam-FaiWong DaxinJiang 2127–2137 In recent years, world business in online discussions and opinion sharing on social media is booming. Re-entry prediction task is thus proposed to help people keep track of the discussions which they wish to continue. Nevertheless, existing works only focus on exploiting chatting history and context information, and ignore the potential useful learning signals underlying conversation data, such as conversation thread patterns and repeated engagement of target users, which help better understand the behavior of target users in conversations. In this paper, we propose three interesting and well-founded auxiliary tasks, namely, Spread Pattern, Repeated Target user, and Turn Authorship, as the self-supervised signals for re-entry prediction. These auxiliary tasks are trained together with the main task in a multi-task manner. Experimental results on two datasets newly collected from Twitter and Reddit show that our method outperforms the previous state-of-the-arts with fewer parameters and faster convergence. Extensive experiments and analysis show the effectiveness of our proposed models and also point out some key ideas in designing self-supervised tasks. @@ -8419,7 +8419,7 @@ Wine is not v i n. On the Compatibility of Tokenizations across Languages AntonisMaronikolakis PhilippDufter - HinrichSchütze + HinrichSchütze 2382–2399 The size of the vocabulary is a central design choice in large pretrained language models, with respect to both performance and memory requirements. Typically, subword tokenization algorithms such as byte pair encoding and WordPiece are used. In this work, we investigate the compatibility of tokenizations for multilingual static and contextualized embedding spaces and propose a measure that reflects the compatibility of tokenizations across languages. Our goal is to prevent incompatible tokenizations, e.g., “wine” (word-level) in English vs. “v i n” (character-level) in French, which make it hard to learn good multilingual semantic representations. We show that our compatibility measure allows the system designer to create vocabularies across languages that are compatible – a desideratum that so far has been neglected in multilingual models. 2021.findings-emnlp.205 @@ -8430,7 +8430,7 @@ Temporal Adaptation of <fixed-case>BERT</fixed-case> and Performance on Downstream Document Classification: Insights from Social Media PaulRöttger - JanetPierrehumbert + JanetPierrehumbert 2400–2412 Language use differs between domains and even within a domain, language use changes over time. For pre-trained language models like BERT, domain adaptation through continued pre-training has been shown to improve performance on in-domain downstream tasks. In this article, we investigate whether temporal adaptation can bring additional benefits. For this purpose, we introduce a corpus of social media comments sampled over three years. It contains unlabelled data for adaptation and evaluation on an upstream masked language modelling task as well as labelled data for fine-tuning and evaluation on a downstream document classification task. We find that temporality matters for both tasks: temporal adaptation improves upstream and temporal fine-tuning downstream task performance. Time-specific models generally perform better on past than on future test sets, which matches evidence on the bursty usage of topical words. However, adapting BERT to time and domain does not improve performance on the downstream task over only adapting to domain. Token-level analysis shows that temporal adaptation captures event-driven changes in language use in the downstream task, but not those changes that are actually relevant to task performance. Based on our findings, we discuss when temporal adaptation may be more effective. 2021.findings-emnlp.206 @@ -8582,7 +8582,7 @@ Probing Pre-trained Language Models for Semantic Attributes and their Values MeriemBeloucif - ChrisBiemann + ChrisBiemann 2554–2559 Pretrained language models (PTLMs) yield state-of-the-art performance on many natural language processing tasks, including syntax, semantics and commonsense. In this paper, we focus on identifying to what extent do PTLMs capture semantic attributes and their values, e.g., the correlation between rich and high net worth. We use PTLMs to predict masked tokens using patterns and lists of items from Wikidata in order to verify how likely PTLMs encode semantic attributes along with their values. Such inferences based on semantics are intuitive for humans as part of our language understanding. Since PTLMs are trained on large amount of Wikipedia data we would assume that they can generate similar predictions, yet our findings reveal that PTLMs are still much worse than humans on this task. We show evidence and analysis explaining how to exploit our methodology to integrate better context and semantics into PTLMs using knowledge bases. 2021.findings-emnlp.218 @@ -8654,7 +8654,7 @@ Subword Mapping and Anchoring across Languages GiorgosVernikos - AndreiPopescu-Belis + AndreiPopescu-Belis 2633–2647 State-of-the-art multilingual systems rely on shared vocabularies that sufficiently cover all considered languages. To this end, a simple and frequently used approach makes use of subword vocabularies constructed jointly over several languages. We hypothesize that such vocabularies are suboptimal due to false positives (identical subwords with different meanings across languages) and false negatives (different subwords with similar meanings). To address these issues, we propose Subword Mapping and Anchoring across Languages (SMALA), a method to construct bilingual subword vocabularies. SMALA extracts subword alignments using an unsupervised state-of-the-art mapping technique and uses them to create cross-lingual anchors based on subword similarities. We demonstrate the benefits of SMALA for cross-lingual natural language inference (XNLI), where it improves zero-shot transfer to an unseen language without task-specific data, but only by sharing subword embeddings. Moreover, in neural machine translation, we show that joint subword vocabularies obtained with SMALA lead to higher BLEU scores on sentences that contain many false positives and false negatives. 2021.findings-emnlp.224 @@ -8667,7 +8667,7 @@ AviCaciularu ArmanCohan IzBeltagy - MatthewPeters + MatthewPeters ArieCattan IdoDagan 2648–2662 @@ -8680,7 +8680,7 @@ Patterns of Polysemy and Homonymy in Contextualised Language Models JanoschHaber - MassimoPoesio + MassimoPoesio 2663–2676 One of the central aspects of contextualised language models is that they should be able to distinguish the meaning of lexically ambiguous words by their contexts. In this paper we investigate the extent to which the contextualised embeddings of word forms that display multiplicity of sense reflect traditional distinctions of polysemy and homonymy. To this end, we introduce an extended, human-annotated dataset of graded word sense similarity and co-predication acceptability, and evaluate how well the similarity of embeddings predicts similarity in meaning. Both types of human judgements indicate that the similarity of polysemic interpretations falls in a continuum between identity of meaning and homonymy. However, we also observe significant differences within the similarity ratings of polysemes, forming consistent patterns for different types of polysemic sense alternation. Our dataset thus appears to capture a substantial part of the complexity of lexical ambiguity, and can provide a realistic test bed for contextualised embeddings. Among the tested models, BERT Large shows the strongest correlation with the collected word sense similarity ratings, but struggles to consistently replicate the observed similarity patterns. When clustering ambiguous word forms based on their embeddings, the model displays high confidence in discerning homonyms and some types of polysemic alternations, but consistently fails for others. 2021.findings-emnlp.226 @@ -8703,7 +8703,7 @@ Controlled Neural Sentence-Level Reframing of News Articles Wei-FanChen - KhalidAl Khatib + KhalidAl Khatib BennoStein HenningWachsmuth 2683–2693 @@ -8743,7 +8743,7 @@ Improving Numerical Reasoning Skills in the Modular Approach for Complex Question Answering on Text Xiao-YuGuo Yuan-FangLi - GholamrezaHaffari + GholamrezaHaffari 2713–2718 Numerical reasoning skills are essential for complex question answering (CQA) over text. It requires opertaions including counting, comparison, addition and subtraction. A successful approach to CQA on text, Neural Module Networks (NMNs), follows the programmer-interpreter paradigm and leverages specialised modules to perform compositional reasoning. However, the NMNs framework does not consider the relationship between numbers and entities in both questions and paragraphs. We propose effective techniques to improve NMNs’ numerical reasoning capabilities by making the interpreter question-aware and capturing the relationship between entities and numbers. On the same subset of the DROP dataset for CQA on text, experimental results show that our additions outperform the original NMNs by 3.0 points for the overall F1 score. 2021.findings-emnlp.231 @@ -8754,8 +8754,8 @@ Retrieval Augmented Code Generation and Summarization - Md RizwanParvez - WasiAhmad + Md RizwanParvez + WasiAhmad SaikatChakraborty BaishakhiRay Kai-WeiChang @@ -8833,9 +8833,9 @@ Open-Domain Contextual Link Prediction and its Complementarity with Entailment Graphs Mohammad JavadHosseini - Shay B.Cohen + Shay B.Cohen MarkJohnson - MarkSteedman + MarkSteedman 2790–2802 An open-domain knowledge graph (KG) has entities as nodes and natural language relations as edges, and is constructed by extracting (subject, relation, object) triples from text. The task of open-domain link prediction is to infer missing relations in the KG. Previous work has used standard link prediction for the task. Since triples are extracted from text, we can ground them in the larger textual context in which they were originally found. However, standard link prediction methods only rely on the KG structure and ignore the textual context that each triple was extracted from. In this paper, we introduce the new task of open-domain contextual link prediction which has access to both the textual context and the KG structure to perform link prediction. We build a dataset for the task and propose a model for it. Our experiments show that context is crucial in predicting missing relations. We also demonstrate the utility of contextual link prediction in discovering context-independent entailments between relations, in the form of entailment graphs (EG), in which the nodes are the relations. The reverse holds too: context-independent EGs assist in predicting relations in context. 2021.findings-emnlp.238 @@ -8889,8 +8889,8 @@ “Be nice to your wife! The restaurants are closed”: Can Gender Stereotype Detection Improve Sexism Classification? PatriciaChiril - FarahBenamara - VéroniqueMoriceau + FarahBenamara + VéroniqueMoriceau 2833–2844 In this paper, we focus on the detection of sexist hate speech against women in tweets studying for the first time the impact of gender stereotype detection on sexism classification. We propose: (1) the first dataset annotated for gender stereotype detection, (2) a new method for data augmentation based on sentence similarity with multilingual external datasets, and (3) a set of deep learning experiments first to detect gender stereotypes and then, to use this auxiliary task for sexism detection. Although the presence of stereotypes does not necessarily entail hateful content, our results show that sexism classification can definitively benefit from gender stereotype detection. 2021.findings-emnlp.242 @@ -8901,7 +8901,7 @@ Automatic Discrimination between Inherited and Borrowed <fixed-case>L</fixed-case>atin Words in <fixed-case>R</fixed-case>omance Languages Alina MariaCristea - Liviu P.Dinu + Liviu P.Dinu SimonaGeorgescu Mihnea-LucianMihai Ana SabinaUban @@ -8930,8 +8930,8 @@ Knowledge-Interactive Network with Sentiment Polarity Intensity-Aware Multi-Task Learning for Emotion Recognition in Conversations YunheXie KailaiYang - ChengjieSun - BingquanLiu + ChengjieSun + BingquanLiu ZhenzhouJi 2879–2889 Emotion Recognition in Conversation (ERC) has gained much attention from the NLP community recently. Some models concentrate on leveraging commonsense knowledge or multi-task learning to help complicated emotional reasoning. However, these models neglect direct utterance-knowledge interaction. In addition, these models utilize emotion-indirect auxiliary tasks, which provide limited affective information for the ERC task. To address the above issues, we propose a Knowledge-Interactive Network with sentiment polarity intensity-aware multi-task learning, namely KI-Net, which leverages both commonsense knowledge and sentiment lexicon to augment semantic information. Specifically, we use a self-matching module for internal utterance-knowledge interaction. Considering correlations with the ERC task, a phrase-level Sentiment Polarity Intensity Prediction (SPIP) task is devised as an auxiliary task. Experiments show that all knowledge integration, self-matching and SPIP modules improve the model performance respectively on three datasets. Moreover, our KI-Net model shows 1.04% performance improvement over the state-of-the-art model on the IEMOCAP dataset. @@ -8972,9 +8972,9 @@ KaiyuHuang HaoYu JunpengLiu - WeiLiu + WeiLiu JingxiangCao - DegenHuang + DegenHuang 2908–2917 Precise information of word boundary can alleviate the problem of lexical ambiguity to improve the performance of natural language processing (NLP) tasks. Thus, Chinese word segmentation (CWS) is a fundamental task in NLP. Due to the development of pre-trained language models (PLM), pre-trained knowledge can help neural methods solve the main problems of the CWS in significant measure. Existing methods have already achieved high performance on several benchmarks (e.g., Bakeoff-2005). However, recent outstanding studies are limited by the small-scale annotated corpus. To further improve the performance of CWS methods based on fine-tuning the PLMs, we propose a novel neural framework, LBGCN, which incorporates a lexicon-based graph convolutional network into the Transformer encoder. Experimental results on five benchmarks and four cross-domain datasets show the lexicon-based graph convolutional network successfully captures the information of candidate words and helps to improve performance on the benchmarks (Bakeoff-2005 and CTB6) and the cross-domain datasets (SIGHAN-2010). Further experiments and analyses demonstrate that our proposed framework effectively models the lexicon to enhance the ability of basic neural frameworks and strengthens the robustness in the cross-domain scenario. 2021.findings-emnlp.248 @@ -8989,7 +8989,7 @@ YeyunGong JianJiao RuofeiZhang - TimothyBaldwin + TimothyBaldwin NanDuan 2918–2928 Pre-trained language models have led to substantial gains over a broad range of natural language processing (NLP) tasks, but have been shown to have limitations for natural language generation tasks with high-quality requirements on the output, such as commonsense generation and ad keyword generation. In this work, we present a novel Knowledge Filtering and Contrastive learning Network (KFCNet) which references external knowledge and achieves better generation performance. Specifically, we propose a BERT-based filter model to remove low-quality candidates, and apply contrastive learning separately to each of the encoder and decoder, within a general encoder–decoder architecture. The encoder contrastive module helps to capture global target semantics during encoding, and the decoder contrastive module enhances the utility of retrieved prototypes while learning general features. Extensive experiments on the CommonGen benchmark show that our model outperforms the previous state of the art by a large margin: +6.6 points (42.5 vs. 35.9) for BLEU-4, +3.7 points (33.3 vs. 29.6) for SPICE, and +1.3 points (18.3 vs. 17.0) for CIDEr. We further verify the effectiveness of the proposed contrastive module on ad keyword generation, and show that our model has potential commercial value. @@ -9040,7 +9040,7 @@ MatthewMatero NikitaSoni NiranjanBalasubramanian - H. AndrewSchwartz + H. AndrewSchwartz 2959–2966 Much of natural language processing is focused on leveraging large capacity language models, typically trained over single messages with a task of predicting one or more tokens. However, modeling human language at higher-levels of context (i.e., sequences of messages) is under-explored. In stance detection and other social media tasks where the goal is to predict an attribute of a message, we have contextual data that is loosely semantically connected by authorship. Here, we introduce Message-Level Transformer (MeLT) – a hierarchical message-encoder pre-trained over Twitter and applied to the task of stance prediction. We focus on stance prediction as a task benefiting from knowing the context of the message (i.e., the sequence of previous messages). The model is trained using a variant of masked-language modeling; where instead of predicting tokens, it seeks to generate an entire masked (aggregated) message vector via reconstruction loss. We find that applying this pre-trained masked message-level transformer to the downstream task of stance detection achieves F1 performance of 67%. 2021.findings-emnlp.253 @@ -9090,7 +9090,7 @@ Argumentation-Driven Evidence Association in Criminal Cases YefeiTeng - WenHanChao + WenHanChao 2997–3001 Evidence association in criminal cases is dividing a set of judicial evidence into several non-overlapping subsets, improving the interpretability and legality of conviction. Observably, evidence divided into the same subset usually supports the same claim. Therefore, we propose an argumentation-driven supervised learning method to calculate the distance between evidence pairs for the following evidence association step in this paper. Experimental results on a real-world dataset demonstrate the effectiveness of our method. 2021.findings-emnlp.257 @@ -9185,9 +9185,9 @@ YohanJo HaneulYoo JinYeongBak - AliceOh - ChrisReed - EduardHovy + AliceOh + ChrisReed + EduardHovy 3074–3094 Finding counterevidence to statements is key to many tasks, including counterargument generation. We build a system that, given a statement, retrieves counterevidence from diverse sources on the Web. At the core of this system is a natural language inference (NLI) model that determines whether a candidate sentence is valid counterevidence or not. Most NLI models to date, however, lack proper reasoning abilities necessary to find counterevidence that involves complex inference. Thus, we present a knowledge-enhanced NLI model that aims to handle causality- and example-based inference by incorporating knowledge graphs. Our NLI model outperforms baselines for NLI tasks, especially for instances that require the targeted inference. In addition, this NLI model further improves the counterevidence retrieval system, notably finding complex counterevidence better. 2021.findings-emnlp.264 @@ -9211,7 +9211,7 @@ HwiyeolJo DongyeopKang AndrewHead - Marti A.Hearst + Marti A.Hearst 3102–3115 Natural language models often fall short when understanding and generating mathematical notation. What is not clear is whether these shortcomings are due to fundamental limitations of the models, or the absence of appropriate tasks. In this paper, we explore the extent to which natural language models can learn semantics between mathematical notation and their surrounding text. We propose two notation prediction tasks, and train a model that selectively masks notation tokens and encodes left and/or right sentences as context. Compared to baseline models trained by masked language modeling, our method achieved significantly better performance at the two tasks, showing that this approach is a good first step towards modeling mathematical texts. However, the current models rarely predict unseen symbols correctly, and token-level predictions are more accurate than symbol-level predictions, indicating more work is needed to represent structural patterns. Based on the results, we suggest future works toward modeling mathematical texts. 2021.findings-emnlp.266 @@ -9223,7 +9223,7 @@ Unpacking the Interdependent Systems of Discrimination: Ableist Bias in <fixed-case>NLP</fixed-case> Systems through an Intersectional Lens SaadHassan MattHuenerfauth - Cecilia OvesdotterAlm + Cecilia OvesdotterAlm 3116–3123 Much of the world’s population experiences some form of disability during their lifetime. Caution must be exercised while designing natural language processing (NLP) systems to prevent systems from inadvertently perpetuating ableist bias against people with disabilities, i.e., prejudice that favors those with typical abilities. We report on various analyses based on word predictions of a large-scale BERT language model. Statistically significant results demonstrate that people with disabilities can be disadvantaged. Findings also explore overlapping forms of discrimination related to interconnected gender and race identities. 2021.findings-emnlp.267 @@ -9266,7 +9266,7 @@ AhmadRashid MehdiRezagholizadeh AliGhodsi - PhilippeLanglais + PhilippeLanglais 3145–3152 Knowledge Distillation (KD) is extensively used in Natural Language Processing to compress the pre-training and task-specific fine-tuning phases of large neural language models. A student model is trained to minimize a convex combination of the prediction loss over the labels and another over the teacher output. However, most existing works either fix the interpolating weight between the two losses apriori or vary the weight using heuristics. In this work, we propose a novel sample-wise loss weighting method, RW-KD. A meta-learner, simultaneously trained with the student, adaptively re-weights the two losses for each sample. We demonstrate, on 7 datasets of the GLUE benchmark, that RW-KD outperforms other loss re-weighting methods for KD. 2021.findings-emnlp.270 @@ -9289,7 +9289,7 @@ Beyond the Tip of the Iceberg: Assessing Coherence of Text Classifiers ShaneStorks - JoyceChai + JoyceChai 3169–3177 As large-scale, pre-trained language models achieve human-level and superhuman accuracy on existing language understanding tasks, statistical bias in benchmark data and probing studies have recently called into question their true capabilities. For a more informative evaluation than accuracy on text classification tasks can offer, we propose evaluating systems through a novel measure of prediction coherence. We apply our framework to two existing language understanding benchmarks with different properties to demonstrate its versatility. Our experimental results show that this evaluation framework, although simple in ideas and implementation, is a quick, effective, and versatile measure to provide insight into the coherence of machines’ predictions. 2021.findings-emnlp.272 @@ -9300,8 +9300,8 @@ Does Pretraining for Summarization Require Knowledge Transfer? KundanKrishna - JeffreyBigham - Zachary C.Lipton + JeffreyBigham + Zachary C.Lipton 3178–3189 Pretraining techniques leveraging enormous datasets have driven recent advances in text summarization. While folk explanations suggest that knowledge transfer accounts for pretraining’s benefits, little is known about why it works or what makes a pretraining task or dataset suitable. In this paper, we challenge the knowledge transfer story, showing that pretraining on documents consisting of character n-grams selected at random, we can nearly match the performance of models pretrained on real corpora. This work holds the promise of eliminating upstream corpora, which may alleviate some concerns over offensive language, bias, and copyright issues. To see whether the small residual benefit of using real data could be accounted for by the structure of the pretraining task, we design several tasks motivated by a qualitative study of summarization corpora. However, these tasks confer no appreciable benefit, leaving open the possibility of a small role for knowledge transfer. 2021.findings-emnlp.273 @@ -9313,7 +9313,7 @@ Bandits Don’t Follow Rules: Balancing Multi-Facet Machine Translation with Multi-Armed Bandits JuliaKreutzer DavidVilar - ArtemSokolov + ArtemSokolov 3190–3204 Training data for machine translation (MT) is often sourced from a multitude of large corpora that are multi-faceted in nature, e.g. containing contents from multiple domains or different levels of quality or complexity. Naturally, these facets do not occur with equal frequency, nor are they equally important for the test scenario at hand. In this work, we propose to optimize this balance jointly with MT model parameters to relieve system developers from manual schedule design. A multi-armed bandit is trained to dynamically choose between facets in a way that is most beneficial for the MT system. We evaluate it on three different multi-facet applications: balancing translationese and natural training data, or data from multiple domains or multiple language pairs. We find that bandit learning leads to competitive MT systems across tasks, and our analysis provides insights into its learned strategies and the underlying data sets. 2021.findings-emnlp.274 @@ -9349,7 +9349,7 @@ <fixed-case>S</fixed-case>ci<fixed-case>C</fixed-case>ap: Generating Captions for Scientific Figures Ting-YaoHsu C LeeGiles - Ting-HaoHuang + Ting-HaoHuang 3258–3264 Researchers use figures to communicate rich, complex information in scientific papers. The captions of these figures are critical to conveying effective messages. However, low-quality figure captions commonly occur in scientific articles and may decrease understanding. In this paper, we propose an end-to-end neural framework to automatically generate informative, high-quality captions for scientific figures. To this end, we introduce SCICAP, a large-scale figure-caption dataset based on computer science arXiv papers published between 2010 and 2020. After pre-processing – including figure-type classification, sub-figure identification, text normalization, and caption text selection – SCICAP contained more than two million figures extracted from over 290,000 papers. We then established baseline models that caption graph plots, the dominant (19.2%) figure type. The experimental results showed both opportunities and steep challenges of generating captions for scientific figures. 2021.findings-emnlp.277 @@ -9520,7 +9520,7 @@ m<fixed-case>DAPT</fixed-case>: Multilingual Domain Adaptive Pretraining in a Single Model RasmusKær Jørgensen MareikeHartmann - XiangDai + XiangDai DesmondElliott 3404–3418 Domain adaptive pretraining, i.e. the continued unsupervised pretraining of a language model on domain-specific text, improves the modelling of text for downstream tasks within the domain. Numerous real-world applications are based on domain-specific text, e.g. working with financial or biomedical documents, and these applications often need to support multiple languages. However, large-scale domain-specific multilingual pretraining data for such scenarios can be difficult to obtain, due to regulations, legislation, or simply a lack of language- and domain-specific text. One solution is to train a single multilingual model, taking advantage of the data available in as many languages as possible. In this work, we explore the benefits of domain adaptive pretraining with a focus on adapting to multiple languages within a specific domain. We propose different techniques to compose pretraining corpora that enable a language model to both become domain-specific and multilingual. Evaluation on nine domain-specific datasets—for biomedical named entity recognition and financial sentence classification—covering seven different languages show that a single multilingual domain-specific model can outperform the general multilingual model, and performs close to its monolingual counterpart. This finding holds across two different pretraining methods, adapter-based pretraining and full model pretraining. @@ -9578,7 +9578,7 @@ <fixed-case>AS</fixed-case>titch<fixed-case>I</fixed-case>n<fixed-case>L</fixed-case>anguage<fixed-case>M</fixed-case>odels: Dataset and Methods for the Exploration of Idiomaticity in Pre-Trained Language Models HarishTayyar Madabushi EdwardGow-Smith - CarolinaScarton + CarolinaScarton AlineVillavicencio 3464–3477 Despite their success in a variety of NLP tasks, pre-trained language models, due to their heavy reliance on compositionality, fail in effectively capturing the meanings of multiword expressions (MWEs), especially idioms. Therefore, datasets and methods to improve the representation of MWEs are urgently needed. Existing datasets are limited to providing the degree of idiomaticity of expressions along with the literal and, where applicable, (a single) non-literal interpretation of MWEs. This work presents a novel dataset of naturally occurring sentences containing MWEs manually classified into a fine-grained set of meanings, spanning both English and Portuguese. We use this dataset in two tasks designed to test i) a language model’s ability to detect idiom usage, and ii) the effectiveness of a language model in generating representations of sentences containing idioms. Our experiments demonstrate that, on the task of detecting idiomatic usage, these models perform reasonably well in the one-shot and few-shot scenarios, but that there is significant scope for improvement in the zero-shot scenario. On the task of representing idiomaticity, we find that pre-training is not always effective, while fine-tuning could provide a sample efficient method of learning representations of sentences containing MWEs. @@ -9602,7 +9602,7 @@ A Computational Exploration of Pejorative Language in Social Media - Liviu P.Dinu + Liviu P.Dinu Ioan-BogdanIordache Ana SabinaUban MarcosZampieri @@ -9668,7 +9668,7 @@ Disentangling Generative Factors in Natural Language with Discrete Variational Autoencoders GiangiacomoMercatali - AndréFreitas + AndréFreitas 3547–3556 The ability of learning disentangled representations represents a major step for interpretable NLP systems as it allows latent linguistic features to be controlled. Most approaches to disentanglement rely on continuous variables, both for images and text. We argue that despite being suitable for image datasets, continuous variables may not be ideal to model features of textual data, due to the fact that most generative factors in text are discrete. We propose a Variational Autoencoder based method which models language features as discrete variables and encourages independence between variables for learning disentangled representations. The proposed model outperforms continuous and discrete baselines on several qualitative and quantitative benchmarks for disentanglement as well as on a text style transfer downstream application. 2021.findings-emnlp.301 @@ -9694,10 +9694,10 @@ Do <fixed-case>UD</fixed-case> Trees Match Mention Spans in Coreference Annotations? MartinPopel - ZdeněkŽabokrtský + ZdeněkŽabokrtský AnnaNedoluzhko MichalNovák - DanielZeman + DanielZeman 3570–3576 One can find dozens of data resources for various languages in which coreference - a relation between two or more expressions that refer to the same real-world entity - is manually annotated. One could also assume that such expressions usually constitute syntactically meaningful units; however, mention spans have been annotated simply by delimiting token intervals in most coreference projects, i.e., independently of any syntactic representation. We argue that it could be advantageous to make syntactic and coreference annotations convergent in the long term. We present a pilot empirical study focused on matches and mismatches between hand-annotated linear mention spans and automatically parsed syntactic trees that follow Universal Dependencies conventions. The study covers 9 datasets for 8 different languages. 2021.findings-emnlp.303 @@ -9712,7 +9712,7 @@ AnkurBapna MaximKrikun DmitryLepikhin - Minh-ThangLuong + Minh-ThangLuong OrhanFirat 3577–3599 Sparse Mixture-of-Experts (MoE) has been a successful approach for scaling multilingual translation models to billions of parameters without a proportional increase in training computation. However, MoE models are prohibitively large and practitioners often resort to methods such as distillation for serving. In this work, we investigate routing strategies at different granularity (token, sentence, task) in MoE models to bypass distillation. Experiments on WMT and a web-scale dataset suggest that task-level routing (task-MoE) enables us to extract smaller, ready-to-deploy sub-networks from large sparse models. On WMT, our task-MoE with 32 experts (533M parameters) outperforms the best performing token-level MoE model (token-MoE) by +1.0 BLEU on average across 30 language pairs. The peak inference throughput is also improved by a factor of 1.9x when we route by tasks instead of tokens. While distilling a token-MoE to a smaller dense model preserves only 32% of the BLEU gains, our sub-network task-MoE, by design, preserves all the gains with the same inference cost as the distilled student model. Finally, when scaling up to 200 language pairs, our 128-expert task-MoE (13B parameters) performs competitively with a token-level counterpart, while improving the peak inference throughput by a factor of 2.6x. @@ -9822,7 +9822,7 @@ Post-Editing Extractive Summaries by Definiteness Prediction JadKabbara - Jackie Chi KitCheung + Jackie Chi KitCheung 3682–3692 Extractive summarization has been the mainstay of automatic summarization for decades. Despite all the progress, extractive summarizers still suffer from shortcomings including coreference issues arising from extracting sentences away from their original context in the source document. This affects the coherence and readability of extractive summaries. In this work, we propose a lightweight post-editing step for extractive summaries that centers around a single linguistic decision: the definiteness of noun phrases. We conduct human evaluation studies that show that human expert judges substantially prefer the output of our proposed system over the original summaries. Moreover, based on an automatic evaluation study, we provide evidence for our system’s ability to generate linguistic decisions that lead to improved extractive summaries. We also draw insights about how the automatic system is exploiting some local cues related to the writing style of the main article texts or summary texts to make the decisions, rather than reasoning about the contexts pragmatically. 2021.findings-emnlp.312 @@ -9838,7 +9838,7 @@ VasudevanJagannathan Hamid RezaHassanzadeh ThomasSchaaf - Matthew R.Gormley + Matthew R.Gormley 3693–3712 Fine-tuning pretrained models for automatically summarizing doctor-patient conversation transcripts presents many challenges: limited training data, significant domain shift, long and noisy transcripts, and high target summary variability. In this paper, we explore the feasibility of using pretrained transformer models for automatically summarizing doctor-patient conversations directly from transcripts. We show that fluent and adequate summaries can be generated with limited training data by fine-tuning BART on a specially constructed dataset. The resulting models greatly surpass the performance of an average human annotator and the quality of previous published work for the task. We evaluate multiple methods for handling long conversations, comparing them to the obvious baseline of truncating the conversation to fit the pretrained model length limit. We introduce a multistage approach that tackles the task by learning two fine-tuned models: one for summarizing conversation chunks into partial summaries, followed by one for rewriting the collection of partial summaries into a complete summary. Using a carefully chosen fine-tuning dataset, this method is shown to be effective at handling longer conversations, improving the quality of generated summaries. We conduct both an automatic evaluation (through ROUGE and two concept-based metrics focusing on medical findings) and a human evaluation (through qualitative examples from literature, assessing hallucination, generalization, fluency, and general quality of the generated summaries). 2021.findings-emnlp.313 @@ -9862,7 +9862,7 @@ DenisPeskov ViktorHangya JordanBoyd-Graber - AlexanderFraser + AlexanderFraser 3725–3750 How would you explain Bill Gates to a German? He is associated with founding a company in the United States, so perhaps the German founder Carl Benz could stand in for Gates in those contexts. This type of translation is called adaptation in the translation community. Until now, this task has not been done computationally. Automatic adaptation could be used in natural language processing for machine translation and indirectly for generating new question answering datasets and education. We propose two automatic methods and compare them to human results for this novel NLP task. First, a structured knowledge base adapts named entities using their shared properties. Second, vector-arithmetic and orthogonal embedding mappings methods identify better candidates, but at the expense of interpretable features. We evaluate our methods through a new dataset of human adaptations. 2021.findings-emnlp.315 @@ -9897,7 +9897,7 @@ Sequence-to-Lattice Models for Fast Translation YuntianDeng - AlexanderRush + AlexanderRush 3765–3772 Non-autoregressive machine translation (NAT) approaches enable fast generation by utilizing parallelizable generative processes. The remaining bottleneck in these models is their decoder layers; unfortunately unlike in autoregressive models (Kasai et al., 2020), removing decoder layers from NAT models significantly degrades accuracy. This work proposes a sequence-to-lattice model that replaces the decoder with a search lattice. Our approach first constructs a candidate lattice using efficient lookup operations, generates lattice scores from a deep encoder, and finally finds the best path using dynamic programming. Experiments on three machine translation datasets show that our method is faster than past non-autoregressive generation approaches, and more accurate than naively reducing the number of decoder layers. 2021.findings-emnlp.318 @@ -9956,7 +9956,7 @@ Searching for More Efficient Dynamic Programs TimVieira RyanCotterell - JasonEisner + JasonEisner 3812–3830 Computational models of human language often involve combinatorial problems. For instance, a probabilistic parser may marginalize over exponentially many trees to make predictions. Algorithms for such problems often employ dynamic programming and are not always unique. Finding one with optimal asymptotic runtime can be unintuitive, time-consuming, and error-prone. Our work aims to automate this laborious process. Given an initial correct declarative program, we search for a sequence of semantics-preserving transformations to improve its running time as much as possible. To this end, we describe a set of program transformations, a simple metric for assessing the efficiency of a transformed program, and a heuristic search procedure to improve this metric. We show that in practice, automated search—like the mental search performed by human programmers—can find substantial improvements to the initial program. Empirically, we show that many speed-ups described in the NLP literature could have been discovered automatically by our system. 2021.findings-emnlp.322 @@ -10031,7 +10031,7 @@ <fixed-case>C</fixed-case>onvex <fixed-case>A</fixed-case>ggregation for <fixed-case>O</fixed-case>pinion <fixed-case>S</fixed-case>ummarization HayateIso XiaolanWang - YoshihikoSuhara + YoshihikoSuhara StefanosAngelidis Wang-ChiewTan 3885–3903 @@ -10062,7 +10062,7 @@ TaisiyaGlushkova ChrysoulaZerva RicardoRei - André F. T.Martins + André F. T.Martins 3920–3938 Several neural-based metrics have been recently proposed to evaluate machine translation quality. However, all of them resort to point estimates, which provide limited information at segment level. This is made worse as they are trained on noisy, biased and scarce human judgements, often resulting in unreliable quality predictions. In this paper, we introduce uncertainty-aware MT evaluation and analyze the trustworthiness of the predicted quality. We combine the COMET framework with two uncertainty estimation methods, Monte Carlo dropout and deep ensembles, to obtain quality scores along with confidence intervals. We compare the performance of our uncertainty-aware MT evaluation methods across multiple language pairs from the QT21 dataset and the WMT20 metrics task, augmented with MQM annotations. We experiment with varying numbers of references and further discuss the usefulness of uncertainty-aware quality estimation (without references) to flag possibly critical translation mistakes. 2021.findings-emnlp.330 @@ -10099,8 +10099,8 @@ Benchmarking Meta-embeddings: What Works and What Does Not IkerGarcía-Ferrero - RodrigoAgerri - GermanRigau + RodrigoAgerri + GermanRigau 3957–3972 In the last few years, several methods have been proposed to build meta-embeddings. The general aim was to obtain new representations integrating complementary knowledge from different source pre-trained embeddings thereby improving their overall quality. However, previous meta-embeddings have been evaluated using a variety of methods and datasets, which makes it difficult to draw meaningful conclusions regarding the merits of each approach. In this paper we propose a unified common framework, including both intrinsic and extrinsic tasks, for a fair and objective meta-embeddings evaluation. Furthermore, we present a new method to generate meta-embeddings, outperforming previous work on a large number of intrinsic evaluation benchmarks. Our evaluation framework also allows us to conclude that previous extrinsic evaluations of meta-embeddings have been overestimated. 2021.findings-emnlp.333 @@ -10143,7 +10143,7 @@ EricChang AmilcareGentili JulianMcAuley - Chun-NanHsu + Chun-NanHsu 4009–4015 Radiology report generation aims at generating descriptive text from radiology images automatically, which may present an opportunity to improve radiology reporting and interpretation. A typical setting consists of training encoder-decoder models on image-report pairs with a cross entropy loss, which struggles to generate informative sentences for clinical diagnoses since normal findings dominate the datasets. To tackle this challenge and encourage more clinically-accurate text outputs, we propose a novel weakly supervised contrastive loss for medical report generation. Experimental results demonstrate that our method benefits from contrasting target reports with incorrect but semantically-close ones. It outperforms previous work on both clinical correctness and text generation metrics for two public benchmarks. 2021.findings-emnlp.336 @@ -10153,7 +10153,7 @@ <fixed-case>NUANCED</fixed-case>: Natural Utterance Annotation for Nuanced Conversation with Estimated Distributions - ZhiyuChen + ZhiyuChen HongleiLiu HuXu SeungwhanMoon @@ -10193,7 +10193,7 @@ HengchangHu MargritBetke PrakashIshwar - Derry TantiWijaya + Derry TantiWijaya 4037–4050 News media structure their reporting of events or issues using certain perspectives. When describing an incident involving gun violence, for example, some journalists may focus on mental health or gun regulation, while others may emphasize the discussion of gun rights. Such perspectives are called “frames” in communication research. We study, for the first time, the value of combining lead images and their contextual information with text to identify the frame of a given news article. We observe that using multiple modes of information(article- and image-derived features) improves prediction of news frames over any single mode of information when the images are relevant to the frames of the headlines. We also observe that frame image relevance is related to the ease of conveying frames via images, which we call frame concreteness. Additionally, we release the first multimodal news framing dataset related to gun violence in the U.S., curated and annotated by communication researchers. The dataset will allow researchers to further examine the use of multiple information modalities for studying media framing. 2021.findings-emnlp.339 @@ -10232,7 +10232,7 @@ SuchinGururangan DallasCard RoySchwartz - Noah A.Smith + Noah A.Smith 4066–4073 Research in NLP is often supported by experimental results, and improved reporting of such results can lead to better understanding and more reproducible science. In this paper we analyze three statistical estimators for expected validation performance, a tool used for reporting performance (e.g., accuracy) as a function of computational budget (e.g., number of hyperparameter tuning experiments). Where previous work analyzing such estimators focused on the bias, we also examine the variance and mean squared error (MSE). In both synthetic and realistic scenarios, we evaluate three estimators and find the unbiased estimator has the highest variance, and the estimator with the smallest variance has the largest bias; the estimator with the smallest MSE strikes a balance between bias and variance, displaying a classic bias-variance tradeoff. We use expected validation performance to compare between different models, and analyze how frequently each estimator leads to drawing incorrect conclusions about which of two models performs best. We find that the two biased estimators lead to the fewest incorrect conclusions, which hints at the importance of minimizing variance and MSE. 2021.findings-emnlp.342 @@ -10294,7 +10294,7 @@ WentingZhao YeLiu YaoWan - PhilipYu + PhilipYu 4106–4117 Few-shot table-to-text generation is a task of composing fluent and faithful sentences to convey table content using limited data. Despite many efforts having been made towards generating impressive fluent sentences by fine-tuning powerful pre-trained language models, the faithfulness of generated content still needs to be improved. To this end, this paper proposes a novel approach Attend, Memorize and Generate (called AMG), inspired by the text generation process of humans. In particular, AMG (1) attends over the multi-granularity of context using a novel strategy based on table slot level and traditional token-by-token level attention to exploit both the table structure and natural linguistic information; (2) dynamically memorizes the table slot allocation states; and (3) generates faithful sentences according to both the context and memory allocation states. Comprehensive experiments with human evaluation on three domains (i.e., humans, songs, and books) of the Wiki dataset show that our model can generate higher qualified texts when compared with several state-of-the-art baselines, in both fluency and faithfulness. 2021.findings-emnlp.347 @@ -10324,7 +10324,7 @@ PeiZhou PegahJandaghi HyundongCho - Bill YuchenLin + Bill YuchenLin JayPujara XiangRen 4132–4146 @@ -10353,7 +10353,7 @@ Textual Time Travel: A Temporally Informed Approach to Theory of Mind AkshathaArodi - Jackie Chi KitCheung + Jackie Chi KitCheung 4162–4172 Natural language processing systems such as dialogue agents should be able to reason about other people’s beliefs, intentions and desires. This capability, called theory of mind (ToM), is crucial, as it allows a model to predict and interpret the needs of users based on their mental states. A recent line of research evaluates the ToM capability of existing memory-augmented neural models through question-answering. These models perform poorly on false belief tasks where beliefs differ from reality, especially when the dataset contains distracting sentences. In this paper, we propose a new temporally informed approach for improving the ToM capability of memory-augmented neural models. Our model incorporates priors about the entities’ minds and tracks their mental states as they evolve over time through an extended passage. It then responds to queries through textual time travel–i.e., by accessing the stored memory of an earlier time step. We evaluate our model on ToM datasets and find that this approach improves performance, particularly by correcting the predicted mental states to match the false belief. 2021.findings-emnlp.351 @@ -10416,7 +10416,7 @@ AllenKim CharutaPethe NaoyaInoue - SteveSkiena + SteveSkiena 4217–4226 Substantial amounts of work are required to clean large collections of digitized books for NLP analysis, both because of the presence of errors in the scanned text and the presence of duplicate volumes in the corpora. In this paper, we consider the issue of deduplication in the presence of optical character recognition (OCR) errors. We present methods to handle these errors, evaluated on a collection of 19,347 texts from the Project Gutenberg dataset and 96,635 texts from the HathiTrust Library. We demonstrate that improvements in language models now enable the detection and correction of OCR errors without consideration of the scanning image itself. The inconsistencies found by aligning pairs of scans of the same underlying work provides training data to build models for detecting and correcting errors. We identify the canonical version for each of 17,136 repeatedly-scanned books from 58,808 scans. Finally, we investigate methods to detect and correct errors in single-copy texts. We show that on average, our method corrects over six times as many errors as it introduces. We also provide interesting analysis on the relation between scanning quality and other factors such as location and publication year. 2021.findings-emnlp.356 @@ -10445,7 +10445,7 @@ BoxingChen JunXie WeihuaLuo - JiajunChen + JiajunChen 4234–4241 Recently, kNN-MT (Khandelwal et al., 2020) has shown the promising capability of directly incorporating the pre-trained neural machine translation (NMT) model with domain-specific token-level k-nearest-neighbor (kNN) retrieval to achieve domain adaptation without retraining. Despite being conceptually attractive, it heavily relies on high-quality in-domain parallel corpora, limiting its capability on unsupervised domain adaptation, where in-domain parallel corpora are scarce or nonexistent. In this paper, we propose a novel framework that directly uses in-domain monolingual sentences in the target language to construct an effective datastore for k-nearest-neighbor retrieval. To this end, we first introduce an autoencoder task based on the target language, and then insert lightweight adapters into the original NMT model to map the token-level representation of this task to the ideal representation of the translation task. Experiments on multi-domain datasets demonstrate that our proposed approach significantly improves the translation accuracy with target-side monolingual data, while achieving comparable performance with back-translation. Our implementation is open-sourced at https://github.com/zhengxxn/UDA-KNN. 2021.findings-emnlp.358 @@ -10456,7 +10456,7 @@ The Topic Confusion Task: A Novel Evaluation Scenario for Authorship Attribution MalikAltakrori - Jackie Chi KitCheung + Jackie Chi KitCheung Benjamin C. M.Fung 4242–4256 Authorship attribution is the problem of identifying the most plausible author of an anonymous text from a set of candidate authors. Researchers have investigated same-topic and cross-topic scenarios of authorship attribution, which differ according to whether new, unseen topics are used in the testing phase. However, neither scenario allows us to explain whether errors are caused by failure to capture authorship writing style or by the topic shift. Motivated by this, we propose the topic confusion task where we switch the author-topic configuration between the training and testing sets. This setup allows us to investigate two types of errors: one caused by the topic shift and one caused by the features’ inability to capture the writing styles. We show that stylometric features with part-of-speech tags are the least susceptible to topic variations. We further show that combining them with other features leads to significantly lower topic confusion and higher attribution accuracy. Finally, we show that pretrained language models such as BERT and RoBERTa perform poorly on this task and are surpassed by simple features such as word-level n-gram. @@ -10470,7 +10470,7 @@ AndrewLee Jonathan K.Kummerfeld LarryAn - RadaMihalcea + RadaMihalcea 4257–4272 Many statistical models have high accuracy on test benchmarks, but are not explainable, struggle in low-resource scenarios, cannot be reused for multiple tasks, and cannot easily integrate domain expertise. These factors limit their use, particularly in settings such as mental health, where it is difficult to annotate datasets and model outputs have significant impact. We introduce a micromodel architecture to address these challenges. Our approach allows researchers to build interpretable representations that embed domain knowledge and provide explanations throughout the model’s decision process. We demonstrate the idea on multiple mental health tasks: depression classification, PTSD classification, and suicidal risk assessment. Our systems consistently produce strong results, even in low-resource scenarios, and are more interpretable than alternative methods. 2021.findings-emnlp.360 @@ -10481,7 +10481,7 @@ Discovering Explanatory Sentences in Legal Case Decisions Using Pre-trained Language Models JaromirSavelka - KevinAshley + KevinAshley 4273–4283 Legal texts routinely use concepts that are difficult to understand. Lawyers elaborate on the meaning of such concepts by, among other things, carefully investigating how they have been used in the past. Finding text snippets that mention a particular concept in a useful way is tedious, time-consuming, and hence expensive. We assembled a data set of 26,959 sentences, coming from legal case decisions, and labeled them in terms of their usefulness for explaining selected legal concepts. Using the dataset we study the effectiveness of transformer models pre-trained on large language corpora to detect which of the sentences are useful. In light of models’ predictions, we analyze various linguistic properties of the explanatory sentences as well as their relationship to the legal concept that needs to be explained. We show that the transformer-based models are capable of learning surprisingly sophisticated features and outperform the prior approaches to the task. 2021.findings-emnlp.361 @@ -10509,7 +10509,7 @@ Reference-based Weak Supervision for Answer Sentence Selection using Web Data VivekKrishnamurthy - ThuyVu + ThuyVu AlessandroMoschitti 4294–4299 Answer Sentence Selection (AS2) models are core components of efficient retrieval-based Question Answering (QA) systems. We present the Reference-based Weak Supervision (RWS), a fully automatic large-scale data pipeline that harvests high-quality weakly- supervised answer sentences from Web data, only requiring a question-reference pair as input. We evaluated the quality of the RWS-derived data by training TANDA models, which are the state of the art for AS2. Our results show that the data consistently bolsters TANDA on three different datasets. In particular, we set the new state of the art for AS2 to P@1=90.1%, and MAP=92.9%, on WikiQA. We record similar performance gains of RWS on a much larger dataset named Web-based Question Answering (WQA). @@ -10599,9 +10599,9 @@ Mitigating Data Poisoning in Text Classification with Differential Privacy ChangXu JunWang - FranciscoGuzmán + FranciscoGuzmán BenjaminRubinstein - TrevorCohn + TrevorCohn 4348–4356 NLP models are vulnerable to data poisoning attacks. One type of attack can plant a backdoor in a model by injecting poisoned examples in training, causing the victim model to misclassify test instances which include a specific pattern. Although defences exist to counter these attacks, they are specific to an attack type or pattern. In this paper, we propose a generic defence mechanism by making the training process robust to poisoning attacks through gradient shaping methods, based on differentially private training. We show that our method is highly effective in mitigating, or even eliminating, poisoning attacks on text classification, with only a small cost in predictive accuracy. 2021.findings-emnlp.369 @@ -10649,7 +10649,7 @@ Switch Point biased Self-Training: Re-purposing Pretrained Models for Code-Switching ParulChopra Sai KrishnaRallabandi - Alan WBlack + Alan WBlack Khyathi RaghaviChandu 4389–4397 Code-switching (CS), a ubiquitous phenomenon due to the ease of communication it offers in multilingual communities still remains an understudied problem in language processing. The primary reasons behind this are: (1) minimal efforts in leveraging large pretrained multilingual models, and (2) the lack of annotated data. The distinguishing case of low performance of multilingual models in CS is the intra-sentence mixing of languages leading to switch points. We first benchmark two sequence labeling tasks – POS and NER on 4 different language pairs with a suite of pretrained models to identify the problems and select the best performing char-BERT model among them (addressing (1)). We then propose a self training method to repurpose the existing pretrained models using a switch-point bias by leveraging unannotated data (addressing (2)). We finally demonstrate that our approach performs well on both tasks by reducing the gap between the switch point performance while retaining the overall performance on two distinct language pairs in both the tasks. We plan to release our models and the code for all our experiments. @@ -10703,8 +10703,8 @@ ChenguangZhu BudhadityaDeb AsliCelikyilmaz - Ahmed HassanAwadallah - DragomirRadev + Ahmed HassanAwadallah + DragomirRadev 4426–4433 Dialogue summarization helps readers capture salient information from long conversations in meetings, interviews, and TV series. However, real-world dialogues pose a great challenge to current summarization models, as the dialogue length typically exceeds the input limits imposed by recent transformer-based pre-trained models, and the interactive nature of dialogues makes relevant information more context-dependent and sparsely distributed than news articles. In this work, we perform a comprehensive study on long dialogue summarization by investigating three strategies to deal with the lengthy input problem and locate relevant information: (1) extended transformer models such as Longformer, (2) retrieve-then-summarize pipeline models with several dialogue utterance retrieval methods, and (3) hierarchical dialogue encoding models such as HMNet. Our experimental results on three long dialogue datasets (QMSum, MediaSum, SummScreen) show that the retrieve-then-summarize pipeline models yield the best performance. We also demonstrate that the summary quality can be further improved with a stronger retrieval model and pretraining on proper external summarization datasets. 2021.findings-emnlp.377 @@ -10730,7 +10730,7 @@ ShivamSharma DimitarDimitrov Md. ShadAkhtar - PreslavNakov + PreslavNakov TanmoyChakraborty 4439–4455 Internet memes have become powerful means to transmit political, psychological, and socio-cultural ideas. Although memes are typically humorous, recent days have witnessed an escalation of harmful memes used for trolling, cyberbullying, and abuse. Detecting such memes is challenging as they can be highly satirical and cryptic. Moreover, while previous work has focused on specific aspects of memes such as hate speech and propaganda, there has been little work on harm in general. Here, we aim to bridge this gap. In particular, we focus on two tasks: (i)detecting harmful memes, and (ii) identifying the social entities they target. We further extend the recently released HarMeme dataset, which covered COVID-19, with additional memes and a new topic: US politics. To solve these tasks, we propose MOMENTA (MultimOdal framework for detecting harmful MemEs aNd Their tArgets), a novel multimodal deep neural network that uses global and local perspectives to detect harmful memes. MOMENTA systematically analyzes the local and the global perspective of the input meme (in both modalities) and relates it to the background context. MOMENTA is interpretable and generalizable, and our experiments show that it outperforms several strong rivaling approaches. @@ -10747,7 +10747,7 @@ XiangGao HamidPalangi JianfengWang - KennethForbus + KennethForbus JianfengGao 4456–4472 Emotion and empathy are examples of human qualities lacking in many human-machine interactions. The goal of our work is to generate engaging dialogue grounded in a user-shared image with increased emotion and empathy while minimizing socially inappropriate or offensive outputs. We release the Neural Image Commenting with Empathy (NICE) dataset consisting of almost two million images and the corresponding human-generated comments, a set of human annotations, and baseline performance on a range of models. In-stead of relying on manually labeled emotions, we also use automatically generated linguistic representations as a source of weakly supervised labels. Based on these annotations, we define two different tasks for the NICE dataset. Then, we propose a novel pre-training model - Modeling Affect Generation for Image Comments (MAGIC) - which aims to generate comments for images, conditioned on linguistic representations that capture style and affect, and to help generate more empathetic, emotional, engaging and socially appropriate comments. Using this model we achieve state-of-the-art performance on one of our NICE tasks. The experiments show that the approach can generate more human-like and engaging image comments. @@ -10838,7 +10838,7 @@ ParsaFarinneya Mohammad MahdiAbdollah Pour SardarHamidian - MonaDiab + MonaDiab 4556–4565 Social media has emerged as a key channel for seeking information. Online users spend several hours reading, posting, and searching for news on microblogging platforms daily. However, this could act as a double-edged sword especially when not all information online is reliable. Moreover, the inherently unmoderated nature of social media renders identifying unverified information ever more challenging. Most of the existing approaches for rumor tracking are not scalable because of their dependency on a significant amount of labeled data. In this work, we investigate this problem from different angles. We design an Active-Transfer Learning (ATL) strategy to identify rumors with a limited amount of annotated data. We go beyond that and investigate the impact of leveraging various machine learning approaches in addition to different contextual representations. We discuss the impact of multiple classifiers on a limited amount of annotated data followed by an interactive approach to gradually update the models by adding the least certain samples (LCS) from the pool of unlabeled data. Our proposed Active Learning (AL) strategy achieves faster convergence in terms of the F-score while requiring fewer annotated samples (42% of the whole dataset for the best model). 2021.findings-emnlp.387 @@ -10936,7 +10936,7 @@ HongjieRen KazushigeOuchi ZeLiu - JinanXu + JinanXu 4620–4630 Generative conversation systems tend to produce meaningless and generic responses, which significantly reduce the user experience. In order to generate informative and diverse responses, recent studies proposed to fuse knowledge to improve informativeness and adopt latent variables to enhance the diversity. However, utilizing latent variables will lead to the inaccuracy of knowledge in the responses, and the dissemination of wrong knowledge will mislead the communicators. To address this problem, we propose a Syntactically Diverse Adversarial Network (SDAN) for knowledge-grounded conversation model. SDAN contains an adversarial hierarchical semantic network to keep the semantic coherence, a knowledge-aware network to attend more related knowledge for improving the informativeness and a syntactic latent variable network to generate syntactically diverse responses. Additionally, in order to increase the controllability of syntax, we adopt adversarial learning to decouple semantic and syntactic representations. Experimental results show that our model can not only generate syntactically diverse and knowledge-accurate responses but also significantly achieve the balance between improving the syntactic diversity and maintaining the knowledge accuracy. 2021.findings-emnlp.394 @@ -10965,7 +10965,7 @@ MingxuanWang LeiLi HangLi - DeyiXiong + DeyiXiong 4639–4644 This paper presents Self-correcting Encoding (Secoco), a framework that effectively deals with noisy input for robust neural machine translation by introducing self-correcting predictors. Different from previous robust approaches, Secoco enables NMT to explicitly correct noisy inputs and delete specific errors simultaneously with the translation decoding process. Secoco is able to achieve significant improvements over strong baselines on two real-world test sets and a benchmark WMT dataset with good interpretability. We will make our code and dataset publicly available soon. 2021.findings-emnlp.396 @@ -11060,7 +11060,7 @@ <fixed-case>A</fixed-case>uto<fixed-case>EQA</fixed-case>: Auto-Encoding Questions for Extractive Question Answering StalinVaranasi SaadullahAmin - GuenterNeumann + GuenterNeumann 4706–4712 There has been a significant progress in the field of Extractive Question Answering (EQA) in the recent years. However, most of them are reliant on annotations of answer-spans in the corresponding passages. In this work, we address the problem of EQA when no annotations are present for the answer span, i.e., when the dataset contains only questions and corresponding passages. Our method is based on auto-encoding of the question that performs a question answering task during encoding and a question generation task during decoding. We show that our method performs well in a zero-shot setting and can provide an additional loss to boost performance for EQA. 2021.findings-emnlp.403 @@ -11201,7 +11201,7 @@ ‘Just What do You Think You’re Doing, Dave?’ A Checklist for Responsible Data Use in <fixed-case>NLP</fixed-case> AnnaRogers - TimothyBaldwin + TimothyBaldwin KobiLeins 4821–4833 A key part of the NLP ethics movement is responsible use of data, but exactly what that means or how it can be best achieved remain unclear. This position paper discusses the core legal and ethical principles for collection and sharing of textual data, and the tensions between them. We propose a potential checklist for responsible data (re-)use that could both standardise the peer review of conference submissions, as well as enable a more in-depth view of published research across the community. Our proposal aims to contribute to the development of a consistent standard for data (re-)use, embraced across NLP conferences. @@ -11298,7 +11298,7 @@ KarmanyaAggarwal EmilyAllaway TalLinzen - Samuel R.Bowman + Samuel R.Bowman 4886–4901 Many crowdsourced NLP datasets contain systematic artifacts that are identified only after data collection is complete. Earlier identification of these issues should make it easier to create high-quality training and evaluation data. We attempt this by evaluating protocols in which expert linguists work ‘in the loop’ during data collection to identify and address these issues by adjusting task instructions and incentives. Using natural language inference as a test case, we compare three data collection protocols: (i) a baseline protocol with no linguist involvement, (ii) a linguist-in-the-loop intervention with iteratively-updated constraints on the writing task, and (iii) an extension that adds direct interaction between linguists and crowdworkers via a chatroom. We find that linguist involvement does not lead to increased accuracy on out-of-domain test sets compared to baseline, and adding a chatroom has no effect on the data. Linguist involvement does, however, lead to more challenging evaluation data and higher accuracy on some challenge sets, demonstrating the benefits of integrating expert analysis during data collection. 2021.findings-emnlp.421 @@ -11311,7 +11311,7 @@ ShaneStorks QiaoziGao YichiZhang - JoyceChai + JoyceChai 4902–4918 Large-scale, pre-trained language models (LMs) have achieved human-level performance on a breadth of language understanding tasks. However, evaluations only based on end task performance shed little light on machines’ true ability in language understanding and reasoning. In this paper, we highlight the importance of evaluating the underlying reasoning process in addition to end performance. Toward this goal, we introduce Tiered Reasoning for Intuitive Physics (TRIP), a novel commonsense reasoning dataset with dense annotations that enable multi-tiered evaluation of machines’ reasoning process. Our empirical results show that while large LMs can achieve high end performance, they struggle to support their predictions with valid supporting evidence. The TRIP dataset and our baseline results will motivate verifiable evaluation of commonsense reasoning and facilitate future research toward developing better language understanding and reasoning models. 2021.findings-emnlp.422 @@ -11323,7 +11323,7 @@ Making Heads and Tails of Models with Marginal Calibration for Sparse Tagsets MichaelKranzlein - Nelson F.Liu + Nelson F.Liu NathanSchneider 4919–4928 For interpreting the behavior of a probabilistic model, it is useful to measure a model’s calibration—the extent to which it produces reliable confidence scores. We address the open problem of calibration for tagging models with sparse tagsets, and recommend strategies to measure and reduce calibration error (CE) in such models. We show that several post-hoc recalibration techniques all reduce calibration error across the marginal distribution for two existing sequence taggers. Moreover, we propose tag frequency grouping (TFG) as a way to measure calibration error in different frequency bands. Further, recalibrating each group separately promotes a more equitable reduction of calibration error across the tag frequency spectrum. @@ -11338,11 +11338,11 @@ Akhilesh DeepakGotmare BryanMcCann Nitish ShirishKeskar - ShafiqJoty + ShafiqJoty RichardSocher Nazneen FatemaRajani 4929–4952 - + 2021.findings-emnlp.424 krause-etal-2021-gedi-generative 10.18653/v1/2021.findings-emnlp.424 diff --git a/data/xml/2021.fnp.xml b/data/xml/2021.fnp.xml index 880ddea61f..d9d8477bce 100644 --- a/data/xml/2021.fnp.xml +++ b/data/xml/2021.fnp.xml @@ -32,7 +32,7 @@ DominiqueMariko EstelleLabidurie Huguesde Mazancourt - PatrickParoubek + PatrickParoubek 9–18 2021.fnp-1.2 cui-etal-2021-sequence @@ -66,7 +66,7 @@ <fixed-case>NUS</fixed-case>-<fixed-case>IDS</fixed-case> at <fixed-case>F</fixed-case>in<fixed-case>C</fixed-case>ausal 2021: Dependency Tree in Graph Neural Network for Better Cause-Effect Span Detection Fiona AntingTan - See-KiongNg + See-KiongNg 37–43 2021.fnp-1.6 tan-ng-2021-nus @@ -111,7 +111,7 @@ Annotation model and corpus for opinionated economy and finance narrative detection JiahuiHu - PatrickParoubek + PatrickParoubek DirkSchumacher 61–66 2021.fnp-1.11 @@ -171,7 +171,7 @@ Extractive Financial Narrative Summarisation using <fixed-case>S</fixed-case>entence<fixed-case>BERT</fixed-case> Based Clustering TubaGokhan PhillipSmith - MarkLee + MarkLee 94–98 2021.fnp-1.18 gokhan-etal-2021-extractive diff --git a/data/xml/2021.gebnlp.xml b/data/xml/2021.gebnlp.xml index bf08b93fe7..d3f6c4feaa 100644 --- a/data/xml/2021.gebnlp.xml +++ b/data/xml/2021.gebnlp.xml @@ -97,7 +97,7 @@ Using Gender- and Polarity-Informed Models to Investigate Bias SamiaTouileb - LiljaØvrelid + LiljaØvrelid ErikVelldal 66–74 In this work we explore the effect of incorporating demographic metadata in a text classifier trained on top of a pre-trained transformer language model. More specifically, we add information about the gender of critics and book authors when classifying the polarity of book reviews, and the polarity of the reviews when classifying the genders of authors and critics. We use an existing data set of Norwegian book reviews with ratings by professional critics, which has also been augmented with gender information, and train a document-level sentiment classifier on top of a recently released Norwegian BERT-model. We show that gender-informed models obtain substantially higher accuracy, and that polarity-informed models obtain higher accuracy when classifying the genders of book authors. For this particular data set, we take this result as a confirmation of the gender bias in the underlying label distribution, but in other settings we believe a similar approach can be used for mitigating bias in the model. @@ -107,8 +107,8 @@ Assessing Gender Bias in <fixed-case>W</fixed-case>ikipedia: Inequalities in Article Titles - AgnieszkaFalenska - ÖzlemÇetinoğlu + AgnieszkaFalenska + ÖzlemÇetinoğlu 75–85 Potential gender biases existing in Wikipedia’s content can contribute to biased behaviors in a variety of downstream NLP systems. Yet, efforts in understanding what inequalities in portraying women and men occur in Wikipedia focused so far only on *biographies*, leaving open the question of how often such harmful patterns occur in other topics. In this paper, we investigate gender-related asymmetries in Wikipedia titles from *all domains*. We assess that for only half of gender-related articles, i.e., articles with words such as *women* or *male* in their titles, symmetrical counterparts describing the same concept for the other gender (and clearly stating it in their titles) exist. Among the remaining imbalanced cases, the vast majority of articles concern sports- and social-related issues. We provide insights on how such asymmetries can influence other Wikipedia components and propose steps towards reducing the frequency of observed patterns. 2021.gebnlp-1.9 @@ -119,7 +119,7 @@ Investigating the Impact of Gender Representation in <fixed-case>ASR</fixed-case> Training Data: a Case Study on Librispeech MahaultGarnerin SolangeRossato - LaurentBesacier + LaurentBesacier 86–92 In this paper we question the impact of gender representation in training data on the performance of an end-to-end ASR system. We create an experiment based on the Librispeech corpus and build 3 different training corpora varying only the proportion of data produced by each gender category. We observe that if our system is overall robust to the gender balance or imbalance in training data, it is nonetheless dependant of the adequacy between the individuals present in the training and testing sets. 2021.gebnlp-1.10 @@ -129,7 +129,7 @@ Generating Gender Augmented Data for <fixed-case>NLP</fixed-case> NishthaJain - MajaPopović + MajaPopović DeclanGroves EvaVanmassenhove 93–102 diff --git a/data/xml/2021.gem.xml b/data/xml/2021.gem.xml index c089c90557..505d6b8f9b 100644 --- a/data/xml/2021.gem.xml +++ b/data/xml/2021.gem.xml @@ -81,7 +81,7 @@ EleftheriaBriakou SwetaAgrawal KeZhang - JoelTetreault + JoelTetreault MarineCarpuat 58–67 This paper reviews and summarizes human evaluation practices described in 97 style transfer papers with respect to three main evaluation aspects: style transfer, meaning preservation, and fluency. In principle, evaluations by human raters should be the most reliable. However, in style transfer papers, we find that protocols for human evaluations are often underspecified and not standardized, which hampers the reproducibility of research in this field and progress toward better human and automatic evaluation methods. @@ -130,7 +130,7 @@ AnuoluwapoAremu AntoineBosselut Khyathi RaghaviChandu - Miruna-AdrianaClinciu + Miruna-AdrianaClinciu DipanjanDas KaustubhDhole WanyuDu @@ -139,7 +139,7 @@ Chris ChinenyeEmezue VarunGangal CristinaGarbacea - TatsunoriHashimoto + TatsunoriHashimoto YufangHou YacineJernite HarshJhamtani @@ -162,7 +162,7 @@ VitalyNikolaev AndreNiyongabo Rubungo SalomeyOsei - AnkurParikh + AnkurParikh LauraPerez-Beltrachini Niranjan RameshRao VikasRaunak @@ -172,7 +172,7 @@ ThibaultSellam SamiraShaikh AnastasiaShimorina - Marco AntonioSobrevilla Cabezudo + Marco AntonioSobrevilla Cabezudo HendrikStrobelt NishantSubramani WeiXu @@ -206,7 +206,7 @@ PeymanHeidari AnkitArun ShashankJain - MichaelWhite + MichaelWhite 136–147 We explore the use of self-training and acceptability classifiers with pre-trained models for natural language generation in structure-to-text settings using three GEM datasets (E2E, WebNLG-en, Schema-Guided Dialog). With the Schema-Guided Dialog dataset, we also experiment with including multiple turns of context in the input. We find that self-training with reconstruction matching along with acceptability classifier filtering can improve semantic correctness, though gains are limited in the full-data setting. With context-conditioning, we find that including multiple turns in the context encourages the model to align with the user’s word and phrasing choices as well as to generate more self-consistent responses. In future versions of the GEM challenge, we encourage the inclusion of few-shot tracks to encourage research on data efficiency. 2021.gem-1.12 @@ -216,7 +216,7 @@ <fixed-case>NUIG</fixed-case>-<fixed-case>DSI</fixed-case>’s submission to The <fixed-case>GEM</fixed-case> Benchmark 2021 NivranshuPasricha - MihaelArcan + MihaelArcan PaulBuitelaar 148–154 This paper describes the submission by NUIG-DSI to the GEM benchmark 2021. We participate in the modeling shared task where we submit outputs on four datasets for data-to-text generation, namely, DART, WebNLG (en), E2E and CommonGen. We follow an approach similar to the one described in the GEM benchmark paper where we use the pre-trained T5-base model for our submission. We train this model on additional monolingual data where we experiment with different masking strategies specifically focused on masking entities, predicates and concepts as well as a random masking strategy for pre-training. In our results we find that random masking performs the best in terms of automatic evaluation metrics, though the results are not statistically significantly different compared to other masking strategies. @@ -228,7 +228,7 @@ <fixed-case>S</fixed-case>imple<fixed-case>NER</fixed-case> Sentence Simplification System for <fixed-case>GEM</fixed-case> 2021 K V AdityaSrivatsa MonilGokani - ManishShrivastava + ManishShrivastava 155–160 This paper describes SimpleNER, a model developed for the sentence simplification task at GEM-2021. Our system is a monolingual Seq2Seq Transformer architecture that uses control tokens pre-pended to the data, allowing the model to shape the generated simplifications according to user desired attributes. Additionally, we show that NER-tagging the training data before use helps stabilize the effect of the control tokens and significantly improves the overall performance of the system. We also employ pretrained embeddings to reduce data sparsity and allow the model to produce more generalizable outputs. 2021.gem-1.14 diff --git a/data/xml/2021.germeval.xml b/data/xml/2021.germeval.xml index e9a447856a..36a9958a2c 100644 --- a/data/xml/2021.germeval.xml +++ b/data/xml/2021.germeval.xml @@ -133,7 +133,7 @@ ur-iw-hnt at <fixed-case>G</fixed-case>erm<fixed-case>E</fixed-case>val 2021: An Ensembling Strategy with Multiple <fixed-case>BERT</fixed-case> Models Hoai NamTran - UdoKruschwitz + UdoKruschwitz 83–87 This paper describes our approach (ur-iw-hnt) for the Shared Task of GermEval2021 to identify toxic, engaging, and fact-claiming comments. We submitted three runs using an ensembling strategy by majority (hard) voting with multiple different BERT models of three different types: German-based, Twitter-based, and multilingual models. All ensemble models outperform single models, while BERTweet is the winner of all individual models in every subtask. Twitter-based models perform better than GermanBERT models, and multilingual models perform worse but by a small margin. 2021.germeval-1.12 @@ -153,7 +153,7 @@ <fixed-case>UR</fixed-case>@<fixed-case>NLP</fixed-case>_<fixed-case>A</fixed-case>_<fixed-case>T</fixed-case>eam @ <fixed-case>G</fixed-case>erm<fixed-case>E</fixed-case>val 2021: Ensemble-based Classification of Toxic, Engaging and Fact-Claiming Comments Kwabena OdameAkomeah - UdoKruschwitz + UdoKruschwitz BerndLudwig 95–99 In this paper, we report on our approach to addressing the GermEval 2021 Shared Task on the Identification of Toxic, Engaging, and Fact-Claiming Comments for the German language. We submitted three runs for each subtask based on ensembles of three models each using contextual embeddings from pre-trained language models using SVM and neural-network-based classifiers. We include language-specific as well as language-agnostic language models – both with and without fine-tuning. We observe that for the runs we submitted that the SVM models overfitted the training data and this affected the aggregation method (simple majority voting) of the ensembles. The model records a lower performance on the test set than on the training set. Exploring the issue of overfitting we uncovered that due to a bug in the pipeline the runs we submitted had not been trained on the full set but only on a small training set. Therefore in this paper we also include the results we get when trained on the full training set which demonstrate the power of ensembles. diff --git a/data/xml/2021.gwc.xml b/data/xml/2021.gwc.xml index 4ab2172d7b..0220e33101 100644 --- a/data/xml/2021.gwc.xml +++ b/data/xml/2021.gwc.xml @@ -70,7 +70,7 @@ <fixed-case>A</fixed-case>sk2<fixed-case>T</fixed-case>ransformers: Zero-Shot Domain labelling with Pretrained Language Models OscarSainz - GermanRigau + GermanRigau 44–52 In this paper we present a system that exploits different pre-trained Language Models for assigning domain labels to WordNet synsets without any kind of supervision. Furthermore, the system is not restricted to use a particular set of domain labels. We exploit the knowledge encoded within different off-the-shelf pre-trained Language Models and task formulations to infer the domain label of a particular WordNet definition. The proposed zero-shot system achieves a new state-of-the-art on the English dataset used in the evaluation. 2021.gwc-1.6 @@ -100,7 +100,7 @@ Monolingual Word Sense Alignment as a Classification Problem SinaAhmadi - John P.McCrae + John P.McCrae 73–80 Words are defined based on their meanings in various ways in different resources. Aligning word senses across monolingual lexicographic resources increases domain coverage and enables integration and incorporation of data. In this paper, we explore the application of classification methods using manually-extracted features along with representation learning techniques in the task of word sense alignment and semantic relationship detection. We demonstrate that the performance of classification methods dramatically varies based on the type of semantic relationships due to the nature of the task but outperforms the previous experiments. 2021.gwc-1.9 @@ -118,8 +118,8 @@ The <fixed-case>G</fixed-case>lobal<fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et Formats: Updates for 2020 - John P.McCrae - Michael WayneGoodman + John P.McCrae + Michael WayneGoodman FrancisBond AlexandreRademaker EwaRudnicka @@ -131,7 +131,7 @@ Intrinsically Interlingual: The Wn Python Library for Wordnets - Michael WayneGoodman + Michael WayneGoodman FrancisBond 100–107 This paper introduces Wn, a new Python library for working with wordnets. Unlike previous libraries, Wn is built from the beginning to accommodate multiple wordnets — for multiple languages or multiple versions of the same wordnet — while retaining the ability to query and traverse them independently. It is also able to download and incorporate wordnets published online. These features are made possible through Wn’s adoption of standard formats and methods for interoperability, namely the WN-LMF schema (Vossen et al., 2013; Bond et al., 2020) and the Collaborative Interlingual Index (Bond et al., 2016). Wn is open-source, easily available, and well-documented. @@ -167,7 +167,7 @@ Evaluation of Taxonomy Enrichment on Diachronic <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et Versions IrinaNikishina - NataliaLoukachevitch + NataliaLoukachevitch VarvaraLogacheva AlexanderPanchenko 126–136 @@ -257,7 +257,7 @@ Comparing Similarity of Words Based on Psychosemantic Experiment and <fixed-case>R</fixed-case>u<fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et ValerySolovyev - NataliaLoukachevitch + NataliaLoukachevitch 199–206 In the paper we compare the structure of the Russian language thesaurus RuWordNet with the data of a psychosemantic experiment to identify semantically close words. The aim of the study is to find out to what extent the structure of RuWordNet corresponds to the intuitive ideas of native speakers about the semantic proximity of words. The respondents were asked to list synonyms to a given word. As a result of the experiment, we found that the respondents mainly mentioned not only synonyms but words that are in paradigmatic relations with the stimuli. The words of the mental sphere were chosen for the experiment. In 95% of cases, the words characterized in the experiment as semantically close were also close according to the thesaurus. In other cases, additions to the thesaurus were proposed. 2021.gwc-1.23 @@ -320,7 +320,7 @@ Towards a Linking between <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et and <fixed-case>W</fixed-case>ikidata - John P.McCrae + John P.McCrae DavidCillessen 252–257 WordNet is the most widely used lexical resource for English, while Wikidata is one of the largest knowledge graphs of entity and concepts available. While, there is a clear difference in the focus of these two resources, there is also a significant overlap and as such a complete linking of these resources would have many uses. We propose the development of such a linking, first by means of the hapax legomenon links and secondly by the use of natural language processing techniques. We show that these can be done with high accuracy but that human validation is still necessary. This has resulted in over 9,000 links being added between these two resources. @@ -340,7 +340,7 @@ <fixed-case>D</fixed-case>an<fixed-case>N</fixed-case>et2: Extending the coverage of adjectives in <fixed-case>D</fixed-case>an<fixed-case>N</fixed-case>et based on thesaurus data (project presentation) SanniNimb - BolettePedersen + BolettePedersen SussiOlsen 267–272 The paper describes work in progress in the DanNet2 project financed by the Carlsberg Foundation. The project aim is to extend the original Danish wordnet, DanNet, in several ways. Main focus is on extension of the coverage and description of the adjectives, a part of speech that was rather sparsely described in the original wordnet. We describe the methodology and initial work of semi-automatically transferring adjectives from the Danish Thesaurus to the wordnet with the aim of easily enlarging the coverage from 3,000 to approx. 13,000 adjectival synsets. Transfer is performed by manually encoding all missing adjectival subsection headwords from the thesaurus and thereafter employing a semi-automatic procedure where adjectives from the same subsection are transferred to the wordnet as either 1) near synonyms to the section’s headword, 2) hyponyms to the section’s headword, or 3) as members of the same synset as the headword. We also discuss how to deal with the problem of multiple representations of the same sense in the thesaurus, and present other types of information from the thesaurus that we plan to integrate, such as thematic and sentiment information. diff --git a/data/xml/2021.hackashop.xml b/data/xml/2021.hackashop.xml index ab9d38bf32..8854ae9a3e 100644 --- a/data/xml/2021.hackashop.xml +++ b/data/xml/2021.hackashop.xml @@ -33,7 +33,7 @@ Related Named Entities Classification in the Economic-Financial Context DanielDe Los Reyes AllanBarcelos - RenataVieira + RenataVieira IsabelManssour 8–15 The present work uses the Bidirectional Encoder Representations from Transformers (BERT) to process a sentence and its entities and indicate whether two named entities present in a sentence are related or not, constituting a binary classification problem. It was developed for the Portuguese language, considering the financial domain and exploring deep linguistic representations to identify a relation between entities without using other lexical-semantic resources. The results of the experiments show an accuracy of 86% of the predictions. @@ -172,7 +172,7 @@ MatejUlčar LindaFreienthal SilverTraat - Luis AdriánCabrera-Diego + Luis AdriánCabrera-Diego MatejMartinc NadaLavrač BlažŠkrlj @@ -182,8 +182,8 @@ VidPodpečan JanezKranjc ShaneSheehan - EmanuelaBoros - Jose G.Moreno + EmanuelaBoros + Jose G.Moreno AntoineDoucet HannuToivonen 99–109 diff --git a/data/xml/2021.hcinlp.xml b/data/xml/2021.hcinlp.xml index 49f00f9923..1e43749c8d 100644 --- a/data/xml/2021.hcinlp.xml +++ b/data/xml/2021.hcinlp.xml @@ -6,7 +6,7 @@ Su LinBlodgett MichaelMadaio BrendanO'Connor - HannaWallach + HannaWallach QianYang Association for Computational Linguistics
Online
@@ -30,7 +30,7 @@
Spellchecking for Children in Web Search: a Natural Language Interface Case-study - CaseyKennington + CaseyKennington Jerry AlanFails KatherineLandau Wright Maria SoledadPera @@ -105,7 +105,7 @@ JeonghwanKim JunmoKang SuwonShin - Sung-HyonMyaeng + Sung-HyonMyaeng 53–59 Customer reviews are useful in providing an indirect, secondhand experience of a product. People often use reviews written by other customers as a guideline prior to purchasing a product. Such behavior signifies the authenticity of reviews in e-commerce platforms. However, fake reviews are increasingly becoming a hassle for both consumers and product owners. To address this issue, we propose You Only Need Gold (YONG), an essential information mining tool for detecting fake reviews and augmenting user discretion. Our experimental results show the poor human performance on fake review detection, substantially improved user capability given our tool, and the ultimate need for user reliance on the tool. 2021.hcinlp-1.9 @@ -125,7 +125,7 @@ HenrikVoigt MoniqueMeuschke KaiLawonn - SinaZarrieß + SinaZarrieß 66–73 Intuitive interaction with visual models becomes an increasingly important task in the field of Visualization (VIS) and verbal interaction represents a significant aspect of it. Vice versa, modeling verbal interaction in visual environments is a major trend in ongoing research in NLP. To date, research on Language & Vision, however, mostly happens at the intersection of NLP and Computer Vision (CV), and much less at the intersection of NLP and Visualization, which is an important area in Human-Computer Interaction (HCI). This paper presents a brief survey of recent work on interactive tasks and set-ups in NLP and Visualization. We discuss the respective methods, show interesting gaps, and conclude by suggesting neural, visually grounded dialogue modeling as a promising potential for NLIs for visual models. 2021.hcinlp-1.11 @@ -150,7 +150,7 @@ Machine Translation Believability - MariannaMartindale + MariannaMartindale KevinDuh MarineCarpuat 88–95 @@ -169,7 +169,7 @@ An <fixed-case>IDR</fixed-case> Framework of Opportunities and Barriers between <fixed-case>HCI</fixed-case> and <fixed-case>NLP</fixed-case> NannaInie - LeonDerczynski + LeonDerczynski 101–108 This paper presents a framework of opportunities and barriers/risks between the two research fields Natural Language Processing (NLP) and Human-Computer Interaction (HCI). The framework is constructed by following an interdisciplinary research-model (IDR), combining field-specific knowledge with existing work in the two fields. The resulting framework is intended as a departure point for discussion and inspiration for research collaborations. 2021.hcinlp-1.16 diff --git a/data/xml/2021.humeval.xml b/data/xml/2021.humeval.xml index 34a06511aa..56fb2ff8c6 100644 --- a/data/xml/2021.humeval.xml +++ b/data/xml/2021.humeval.xml @@ -3,7 +3,7 @@ Proceedings of the Workshop on Human Evaluation of NLP Systems (HumEval) - AnyaBelz + AnyaBelz ShubhamAgarwal YvetteGraham EhudReiter @@ -21,27 +21,27 @@ It’s Commonsense, isn’t it? Demystifying Human Evaluations in Commonsense-Enhanced <fixed-case>NLG</fixed-case> Systems - Miruna-AdrianaClinciu + Miruna-AdrianaClinciu DimitraGkatzia SaadMahamood 1–12 Common sense is an integral part of human cognition which allows us to make sound decisions, communicate effectively with others and interpret situations and utterances. Endowing AI systems with commonsense knowledge capabilities will help us get closer to creating systems that exhibit human intelligence. Recent efforts in Natural Language Generation (NLG) have focused on incorporating commonsense knowledge through large-scale pre-trained language models or by incorporating external knowledge bases. Such systems exhibit reasoning capabilities without common sense being explicitly encoded in the training set. These systems require careful evaluation, as they incorporate additional resources during training which adds additional sources of errors. Additionally, human evaluation of such systems can have significant variation, making it impossible to compare different systems and define baselines. This paper aims to demystify human evaluations of commonsense-enhanced NLG systems by proposing the Commonsense Evaluation Card (CEC), a set of recommendations for evaluation reporting of commonsense-enhanced NLG systems, underpinned by an extensive analysis of human evaluations reported in the recent literature. 2021.humeval-1.1 Estimating Subjective Crowd-Evaluations as an Additional Objective to Improve Natural Language Generation JakobNyberg MaikePaetzel - RameshManuvinakurike + RameshManuvinakurike 13–24 Human ratings are one of the most prevalent methods to evaluate the performance of NLP (natural language processing) algorithms. Similarly, it is common to measure the quality of sentences generated by a natural language generation model using human raters. In this paper we argue for exploring the use of subjective evaluations within the process of training language generation models in a multi-task learning setting. As a case study, we use a crowd-authored dialogue corpus to fine-tune six different language generation models. Two of these models incorporate multi-task learning and use subjective ratings of lines as part of an explicit learning goal. A human evaluation of the generated dialogue lines reveals that utterances generated by the multi-tasking models were subjectively rated as the most typical, most moving the conversation forward, and least offensive. Based on these promising first results, we discuss future research directions for incorporating subjective human evaluations into language model training and to hence keep the human user in the loop during the development process. 2021.humeval-1.2 Trading Off Diversity and Quality in Natural Language Generation @@ -153,7 +153,7 @@ Detecting Post-Edited References and Their Effect on Human Evaluation VěraKloudová - OndřejBojar + OndřejBojar MartinPopel 114–119 This paper provides a quick overview of possible methods how to detect that reference translations were actually created by post-editing an MT system. Two methods based on automatic metrics are presented: BLEU difference between the suspected MT and some other good MT and BLEU difference using additional references. These two methods revealed a suspicion that the WMT 2020 Czech reference is based on MT. The suspicion was confirmed in a manual analysis by finding concrete proofs of the post-editing procedure in particular sentences. Finally, a typology of post-editing changes is presented where typical errors or changes made by the post-editor or errors adopted from the MT are classified. @@ -164,14 +164,14 @@ A Case Study of Efficacy and Challenges in Practical Human-in-Loop Evaluation of <fixed-case>NLP</fixed-case> Systems Using Checklist ShailyBhatt RahulJain - SandipanDandapat + SandipanDandapat SunayanaSitaram 120–130 Despite state-of-the-art performance, NLP systems can be fragile in real-world situations. This is often due to insufficient understanding of the capabilities and limitations of models and the heavy reliance on standard evaluation benchmarks. Research into non-standard evaluation to mitigate this brittleness is gaining increasing attention. Notably, the behavioral testing principle ‘Checklist’, which decouples testing from implementation revealed significant failures in state-of-the-art models for multiple tasks. In this paper, we present a case study of using Checklist in a practical scenario. We conduct experiments for evaluating an offensive content detection system and use a data augmentation technique for improving the model using insights from Checklist. We lay out the challenges and open questions based on our observations of using Checklist for human-in-loop evaluation and improvement of NLP systems. Disclaimer: The paper contains examples of content with offensive language. The examples do not represent the views of the authors or their employers towards any person(s), group(s), practice(s), or entity/entities. 2021.humeval-1.14 Interrater Disagreement Resolution: A Systematic Procedure to Reach Consensus in Annotation Tasks diff --git a/data/xml/2021.icnlsp.xml b/data/xml/2021.icnlsp.xml index 30fae356d0..e38edd9963 100644 --- a/data/xml/2021.icnlsp.xml +++ b/data/xml/2021.icnlsp.xml @@ -80,7 +80,7 @@ Beyond Voice Activity Detection: Hybrid Audio Segmentation for Direct Speech Translation MarcoGaido - MatteoNegri + MatteoNegri MauroCettolo MarcoTurchi 55–62 @@ -91,7 +91,7 @@ A Sample-Based Training Method for Distantly Supervised Relation Extraction with Pre-Trained Transformers MehrdadNasser Mohamad BagherSajadi - BehrouzMinaei-Bidgoli + BehrouzMinaei-Bidgoli 63–72 2021.icnlsp-1.8 nasser-etal-2021-sample @@ -135,7 +135,7 @@ Formulating Automated Responses to Cognitive Distortions for <fixed-case>CBT</fixed-case> Interactions Ignaciode Toledo Rodriguez - GiancarloSalton + GiancarloSalton RobertRoss 108–116 2021.icnlsp-1.13 @@ -234,7 +234,7 @@ AbdulWaheed MuskanGoyal NimishaMittal - DeepakGupta + DeepakGupta AshishKhanna MoolchandSharma 209–218 diff --git a/data/xml/2021.icon.xml b/data/xml/2021.icon.xml index b52b6ee109..c0b5c2bd4a 100644 --- a/data/xml/2021.icon.xml +++ b/data/xml/2021.icon.xml @@ -3,9 +3,9 @@ Proceedings of the 18th International Conference on Natural Language Processing (ICON) - SivajiBandyopadhyay - Sobha LalithaDevi - PushpakBhattacharyya + SivajiBandyopadhyay + Sobha LalithaDevi + PushpakBhattacharyya NLP Association of India (NLPAI)
National Institute of Technology Silchar, Silchar, India
December @@ -21,7 +21,7 @@ Constrained Decoding for Technical Term Retention in <fixed-case>E</fixed-case>nglish-<fixed-case>H</fixed-case>indi <fixed-case>MT</fixed-case> NiyatiBafna MartinVastl - OndřejBojar + OndřejBojar 1–6 Technical terms may require special handling when the target audience is bilingual, depending on the cultural and educational norms of the society in question. In particular, certain translation scenarios may require “term retention” i.e. preserving of the source language technical terms in the target language output to produce a fluent and comprehensible code-switched sentence. We show that a standard transformer-based machine translation model can be adapted easily to perform this task with little or no damage to the general quality of its output. We present an English-to-Hindi model that is trained to obey a “retain” signal, i.e. it can perform the required code-mixing on a list of terms, possibly unseen, provided at runtime. We perform automatic evaluation using BLEU as well as F1 metrics on the list of retained terms; we also collect manual judgments on the quality of the output sentences. 2021.icon-main.1 @@ -54,8 +54,8 @@
Small Batch Sizes Improve Training of Low-Resource Neural <fixed-case>MT</fixed-case> - ÀlexAtrio - AndreiPopescu-Belis + ÀlexAtrio + AndreiPopescu-Belis 18–24 We study the role of an essential hyper-parameter that governs the training of Transformers for neural machine translation in a low-resource setting: the batch size. Using theoretical insights and experimental evidence, we argue against the widespread belief that batch size should be set as large as allowed by the memory of the GPUs. We show that in a low-resource setting, a smaller batch size leads to higher scores in a shorter training time, and argue that this is due to better regularization of the gradients during training. 2021.icon-main.4 @@ -85,7 +85,7 @@ Assessing Post-editing Effort in the <fixed-case>E</fixed-case>nglish-<fixed-case>H</fixed-case>indi Direction ArafatAhsan VandanMujadia - Dipti MisraSharma + Dipti MisraSharma 44–53 We present findings from a first in-depth post-editing effort estimation study in the English-Hindi direction along multiple effort indicators. We conduct a controlled experiment involving professional translators, who complete assigned tasks alternately, in a translation from scratch and a post-edit condition. We find that post-editing reduces translation time (by 63%), utilizes fewer keystrokes (by 59%), and decreases the number of pauses (by 63%) when compared to translating from scratch. We further verify the quality of translations thus produced via a human evaluation task in which we do not detect any discernible quality differences. 2021.icon-main.7 @@ -97,7 +97,7 @@ LaishramRahul AlokSingh Salam MichaelSingh - Thoudam DorenSingh + Thoudam DorenSingh SivajiBandyopadhyay 54–63 In this paper, we report the experimental findings of building Speech-to-Text translation systems for Manipuri-English on low resource setting which is first of its kind in this language pair. For this purpose, a new dataset consisting of a Manipuri-English parallel corpus along with the corresponding audio version of the Manipuri text is built. Based on this dataset, a benchmark evaluation is reported for the Manipuri-English Speech-to-Text translation using two approaches: 1) a pipeline model consisting of ASR (Automatic Speech Recognition) and Machine translation, and 2) an end-to-end Speech-to-Text translation. Gaussian Mixture Model-Hidden Markov Model (GMM-HMM) and Time delay neural network (TDNN) Acoustic models are used to build two different pipeline systems using a shared MT system. Experimental result shows that the TDNN model outperforms GMM-HMM model significantly by a margin of 2.53% WER. However, their evaluation of Speech-to-Text translation differs by a small margin of 0.1 BLEU. Both the pipeline translation models outperform the end-to-end translation model by a margin of 2.6 BLEU score. @@ -109,7 +109,7 @@ Salam MichaelSingh LoitongbamSanayai Meetei AlokSingh - Thoudam DorenSingh + Thoudam DorenSingh SivajiBandyopadhyay 64–74 In recent times, machine translation models can learn to perform implicit bridging between language pairs never seen explicitly during training and showing that transfer learning helps for languages with constrained resources. This work investigates the low resource machine translation via transfer learning from multilingual pre-trained models i.e. mBART-50 and mT5-base in the pretext of Indo-Aryan (Assamese and Bengali) and Tibeto-Burman (Manipuri) languages via finetuning as a downstream task. Assamese and Manipuri were absent in the pretraining of both mBART-50 and the mT5 models. However, the experimental results attest that the finetuning from these pre-trained models surpasses the multilingual model trained from scratch. @@ -138,7 +138,7 @@ The Importance of Context in Very Low Resource Language Modeling LukasEdman AntonioToral - Gertjanvan Noord + Gertjanvan Noord 86–92 This paper investigates very low resource language model pretraining, when less than 100 thousand sentences are available. We find that, in very low-resource scenarios, statistical n-gram language models outperform state-of-the-art neural models. Our experiments show that this is mainly due to the focus of the former on a local context. As such, we introduce three methods to improve a neural model’s performance in the low-resource setting, finding that limiting the model’s self-attention is the most effective one, improving on downstream tasks such as NLI and POS tagging by up to 5% for the languages we test on: English, Hindi, and Turkish. 2021.icon-main.12 @@ -171,7 +171,7 @@ On the Universality of Deep Contextual Language Models ShailyBhatt PoonamGoyal - SandipanDandapat + SandipanDandapat MonojitChoudhury SunayanaSitaram 106–119 @@ -298,7 +298,7 @@ Towards Multimodal Vision-Language Models Generating Non-Generic Text WesRobbins ZanyarZohourianshahzadi - JugalKalita + JugalKalita 220–230 Vision-language models can assess visual context in an image and generate descriptive text. While the generated text may be accurate and syntactically correct, it is often overly general. To address this, recent work has used optical character recognition to supplement visual information with text extracted from an image. In this work, we contend that vision-language models can benefit from information that can be extracted from an image, but are not used by current models. We modify previous multimodal frameworks to accept relevant information from any number of auxiliary classifiers. In particular, we focus on person names as an additional set of tokens and create a novel image-caption dataset to facilitate captioning with person names. The dataset, Politicians and Athletes in Captions (PAC), consists of captioned images of well-known people in context. By fine-tuning pretrained models with this dataset, we demonstrate a model that can naturally integrate facial recognition tokens into generated text by training on limited data. For the PAC dataset, we provide a discussion on collection and baseline benchmark scores. 2021.icon-main.27 @@ -308,7 +308,7 @@ Image Caption Generation Framework for <fixed-case>A</fixed-case>ssamese News using Attention Mechanism RingkiDas - Thoudam DorenSingh + Thoudam DorenSingh 231–239 Automatic caption generation is an artificial intelligence problem that falls at the intersection of computer vision and natural language processing. Although significant works have been reported in image captioning, the contribution is limited to English and few major languages with sufficient resources. But, no work on image captioning has been reported in a resource-constrained language like Assamese. With this inspiration, we propose an encoder-decoder based framework for image caption generation in the Assamese news domain. The VGG-16 pre-trained model at the encoder side and LSTM with an attention mechanism are employed at the decoder side to generate the Assamese caption. We train the proposed model on the dataset built in-house consisting of 10,000 images with a single caption for each image. We describe our experimental methodology, quantitative and qualitative results which validate the effectiveness of our model for caption generation. The proposed model shows a BLEU score of 12.1 outperforming the baseline model. 2021.icon-main.28 @@ -319,7 +319,7 @@ AlokSingh LoitongbamSanayai Meetei Salam MichaelSingh - Thoudam DorenSingh + Thoudam DorenSingh SivajiBandyopadhyay 240–250 Describing a video is a challenging yet attractive task since it falls into the intersection of computer vision and natural language generation. The attention-based models have reported the best performance. However, all these models follow similar procedures, such as segmenting videos into chunks of frames or sampling frames at equal intervals for visual encoding. The process of segmenting video into chunks or sampling frames at equal intervals causes encoding of redundant visual information and requires additional computational cost since a video consists of a sequence of similar frames and suffers from inescapable noise such as uneven illumination, occlusion and motion effects. In this paper, a boundary-based keyframes selection approach for video description is proposed that allow the system to select a compact subset of keyframes to encode the visual information and generate a description for a video without much degradation. The proposed approach uses 3 4 frames per video and yields competitive performance over two benchmark datasets MSVD and MSR-VTT (in both English and Hindi). @@ -369,7 +369,7 @@ Classifying Verses of the <fixed-case>Q</fixed-case>uran using Doc2vec MenwaAlshammeri - EricAtwell + EricAtwell MohammadAlsalka 284–288 The Quran, as a significant religious text, bears important spiritual and linguistic values. Understanding the text and inferring the underlying meanings entails semantic similarity analysis. We classified the verses of the Quran into 15 pre-defined categories or concepts, based on the Qurany corpus, using Doc2Vec and Logistic Regression. Our classifier scored 70% accuracy, and 60% F1-score using the distributed bag-of-words architecture. We then measured how similar the documents within the same category are to each other semantically and use this information to evaluate our model. We calculated the mean difference and average similarity values for each category to indicate how well our model describes that category. @@ -431,7 +431,7 @@ Resolving Prepositional Phrase Attachment Ambiguities with Contextualized Word Embeddings - AdwaitRatnaparkhi + AdwaitRatnaparkhi AtulKumar 335–340 This paper applies contextualized word embedding models to a long-standing problem in the natural language parsing community, namely prepositional phrase attachment. Following past formulations of this problem, we use data sets in which the attachment decision is both a binary-valued choice as well as a multi-valued choice. We present a deep learning architecture that fine-tunes the output of a contextualized word embedding model for the purpose of predicting attachment decisions. We present experiments on two commonly used datasets that outperform the previous best results, using only the original training data and the unannotated full sentence context. @@ -444,7 +444,7 @@ ChenchenDing KatsuhitoSudoh MasaoUtiyama - EiichiroSumita + EiichiroSumita SatoshiNakamura 341–346 Pretrained multilingual language models have become a key part of cross-lingual transfer for many natural language processing tasks, even those without bilingual information. This work further investigates the cross-lingual transfer ability of these models for constituency parsing and focuses on multi-source transfer. Addressing structure and label set diversity problems, we propose the integration of typological features into the parsing model and treebank normalization. We trained the model on eight languages with diverse structures and use transfer parsing for an additional six low-resource languages. The experimental results show that the treebank normalization is essential for cross-lingual transfer performance and the typological features introduce further improvement. As a result, our approach improves the baseline F1 of multi-source transfer by 5 on average. @@ -524,7 +524,7 @@ Temporal Question Generation from History Text HarsimranBedi SangameshwarPatil - GirishPalshikar + GirishPalshikar 408–413 Temporal analysis of history text has always held special significance to students, historians and the Social Sciences community in general. We observe from experimental data that existing deep learning (DL) models of ProphetNet and UniLM for question generation (QG) task do not perform satisfactorily when used directly for temporal QG from history text. We propose linguistically motivated templates for generating temporal questions that probe different aspects of history text and show that finetuning the DL models using the temporal questions significantly improves their performance on temporal QG task. Using automated metrics as well as human expert evaluation, we show that performance of the DL models finetuned with the template-based questions is better than finetuning done with temporal questions from SQuAD. 2021.icon-main.49 @@ -636,7 +636,7 @@ An Efficient <fixed-case>BERT</fixed-case> Based Approach to Detect Aggression and Misogyny SandipDutta UtsoMajumder - SudipNaskar + SudipNaskar 493–498 Social media is bustling with ever growing cases of trolling, aggression and hate. A huge amount of social media data is generated each day which is insurmountable for manual inspection. In this work, we propose an efficient and fast method to detect aggression and misogyny in social media texts. We use data from the Second Workshop on Trolling, Aggression and Cyber Bullying for our task. We employ a BERT based model to augment our data. Next we employ Tf-Idf and XGBoost for detecting aggression and misogyny. Our model achieves 0.73 and 0.85 Weighted F1 Scores on the 2 prediction tasks, which are comparable to the state of the art. However, the training time, model size and resource requirements of our model are drastically lower compared to the state of the art models, making our model useful for fast inference. 2021.icon-main.60 @@ -662,7 +662,7 @@ Using Random Perturbations to Mitigate Adversarial Attacks on Sentiment Analysis Models AbigailSwenor - JugalKalita + JugalKalita 519–528 Attacks on deep learning models are often difficult to identify and therefore are difficult to protect against. This problem is exacerbated by the use of public datasets that typically are not manually inspected before use. In this paper, we offer a solution to this vulnerability by using, during testing, random perturbations such as spelling correction if necessary, substitution by random synonym, or simply drop-ping the word. These perturbations are applied to random words in random sentences to defend NLP models against adversarial attacks. Our Random Perturbations Defense andIncreased Randomness Defense methods are successful in returning attacked models to similar accuracy of models before attacks. The original accuracy of the model used in this work is 80% for sentiment classification. After undergoing attacks, the accuracy drops to an accuracy between 0% and 44%. After applying our defense methods, the accuracy of the model is returned to the original accuracy within statistical significance. 2021.icon-main.63 @@ -722,7 +722,7 @@ <fixed-case>D</fixed-case>ialog<fixed-case>A</fixed-case>cts based Search and Retrieval for Response Generation in Conversation Systems NidhiArora RashmiPrasad - SrinivasBangalore + SrinivasBangalore 564–572 Designing robust conversation systems with great customer experience requires a team of design experts to think of all probable ways a customer can interact with the system and then author responses for each use case individually. The responses are authored from scratch for each new client and application even though similar responses have been created in the past. This happens largely because the responses are encoded using domain specific set of intents and entities. In this paper, we present preliminary work to define a dialog act schema to merge and map responses from different domains and applications using a consistent domain-independent representation. These representations are stored and maintained using an Elasticsearch system to facilitate generation of responses through a search and retrieval process. We experimented generating different surface realizations for a response given a desired information state of the dialog. 2021.icon-main.69 @@ -744,7 +744,7 @@ Weakly Supervised Extraction of Tasks from Text SachinPawar - GirishPalshikar + GirishPalshikar AninditaSinha Banerjee 583–592 In this paper, we propose a novel problem of automatic extraction of tasks from text. A task is a well-defined knowledge-based volitional action. We describe various characteristics of tasks as well as compare and contrast them with events. We propose two techniques for task extraction – i) using linguistic patterns and ii) using a BERT-based weakly supervised neural model. We evaluate our techniques with other competent baselines on 4 datasets from different domains. Overall, the BERT-based weakly supervised neural model generalizes better across multiple domains as compared to the purely linguistic patterns based approach. @@ -796,7 +796,7 @@ SaujasVaduguru ParthoSarthi MonojitChoudhury - DiptiSharma + DiptiSharma 619–628 Learning linguistic generalizations from only a few examples is a challenging task. Recent work has shown that program synthesis – a method to learn rules from data in the form of programs in a domain-specific language – can be used to learn phonological rules in highly data-constrained settings. In this paper, we use the problem of phonological stress placement as a case to study how the design of the domain-specific language influences the generalization ability when using the same learning algorithm. We find that encoding the distinction between consonants and vowels results in much better performance, and providing syllable-level information further improves generalization. Program synthesis, thus, provides a way to investigate how access to explicit linguistic information influences what can be learnt from a small number of examples. 2021.icon-main.76 @@ -819,7 +819,7 @@ Introduction to <fixed-case>P</fixed-case>roverb<fixed-case>N</fixed-case>et: An Online Multilingual Database of Proverbs and Comprehensive Metadata ShreyasPimpalgaonkar DhanashreeLele - MalharKulkarni + MalharKulkarni PushpakBhattacharyya 638–650 Proverbs are unique linguistic expressions used by humans in the process of communication. They are frozen expressions and have the capacity to convey deep semantic aspects of a given language. This paper describes ProverbNet, a novel online multilingual database of proverbs and comprehensive metadata equipped with a multipurpose search engine to store, explore, understand, classify and analyze proverbs and their metadata. ProverbNet has immense applications including machine translation, cognitive studies and learning tools. We have 2320 Sanskrit Proverbs and 1136 Marathi proverbs and their metadata in ProverbNet and are adding more proverbs in different languages to the network. @@ -846,7 +846,7 @@ <fixed-case>F</fixed-case>in<fixed-case>R</fixed-case>ead: A Transfer Learning Based Tool to Assess Readability of Definitions of Financial Terms SohomGhosh ShovonSengupta - SudipNaskar + SudipNaskar Sunny KumarSingh 658–659 Simplified definitions of complex terms help learners to understand any content better. Comprehending readability is critical for the simplification of these contents. In most cases, the standard formula based readability measures do not hold good for measuring the complexity of definitions of financial terms. Furthermore, some of them works only for corpora of longer length which have at least 30 sentences. In this paper, we present a tool for evaluating readability of definitions of financial terms. It consists of a Light GBM based classification layer over sentence embeddings (Reimers et al., 2019) of FinBERT (Araci, 2019). It is trained on glossaries of several financial textbooks and definitions of various financial terms which are available on the web. The extensive evaluation shows that it outperforms the standard benchmarks by achieving a AU-ROC score of 0.993 on the validation set. @@ -981,7 +981,7 @@ Julio de JesúsGuerrero-Zambrano DominicForest GerardoReyes-Salgado - Juan-ManuelTorres-Moreno + Juan-ManuelTorres-Moreno 41–45 This work aims to evaluate the ability that both probabilistic and state-of-the-art vector space modeling (VSM) methods provide to well known machine learning algorithms to identify social network documents to be classified as aggressive, gender biased or communally charged. To this end, an exploratory stage was performed first in order to find relevant settings to test, i.e. by using training and development samples, we trained multiple algorithms using multiple vector space modeling and probabilistic methods and discarded the less informative configurations. These systems were submitted to the competition of the ComMA@ICON’21 Workshop on Multilingual Gender Biased and Communal Language Identification. 2021.icon-multigen.6 @@ -1001,7 +1001,7 @@ Sdutta at <fixed-case>C</fixed-case>om<fixed-case>MA</fixed-case>@<fixed-case>ICON</fixed-case>: A <fixed-case>CNN</fixed-case>-<fixed-case>LSTM</fixed-case> Model for Hate Detection SandipDutta UtsoMajumder - SudipNaskar + SudipNaskar 53–57 In today’s world, online activity and social media are facing an upsurge of cases of aggression, gender-biased comments and communal hate. In this shared task, we used a CNN-LSTM hybrid method to detect aggression, misogynistic and communally charged content in social media texts. First, we employ text cleaning and convert the text into word embeddings. Next we proceed to our CNN-LSTM based model to predict the nature of the text. Our model achieves 0.288, 0.279, 0.294 and 0.335 Overall Micro F1 Scores in multilingual, Meitei, Bengali and Hindi datasets, respectively, on the 3 prediction labels. 2021.icon-multigen.8 @@ -1013,7 +1013,7 @@ OxanaVitman Hosahalli LakshmaiahShashirekha GrigoriSidorov - AlexanderGelbukh + AlexanderGelbukh 58–63 Social media analytics are widely being explored by researchers for various applications. Prominent among them are identifying and blocking abusive contents especially targeting individuals and communities, for various reasons. The increasing abusive contents and the increasing number of users on social media demands automated tools to detect and filter the abusive contents as it is highly impossible to handle this manually. To address the challenges of detecting abusive contents, this paper describes the approaches proposed by our team MUCIC for Multilingual Gender Biased and Communal Language Identification shared task (ComMA@ICON) at International Conference on Natural Language Processing (ICON) 2021. This shared task dataset consists of code-mixed multi-script texts in Meitei, Bangla, Hindi as well as in Multilingual (a combination of Meitei, Bangla, Hindi, and English). The shared task is modeled as a multi-label Text Classification (TC) task combining word and char n-grams with vectors obtained from Multilingual Sentence Encoder (MSE) to train the Machine Learning (ML) classifiers using Pre-aggregation and Post-aggregation of labels. These approaches obtained the highest performance in the shared task for Meitei, Bangla, and Multilingual texts with instance-F1 scores of 0.350, 0.412, and 0.380 respectively using Pre-aggregation of labels. 2021.icon-multigen.9 diff --git a/data/xml/2021.ijclclp.xml b/data/xml/2021.ijclclp.xml index ec43d01465..8e5c35ce55 100644 --- a/data/xml/2021.ijclclp.xml +++ b/data/xml/2021.ijclclp.xml @@ -23,7 +23,7 @@ Tien-HongLo Shi-YanWeng Shih-HsuanChiu - Yao-TingSung + Yao-TingSung BerlinChen 2021.ijclclp-1.1 chao-etal-2021-ntnu @@ -51,7 +51,7 @@ Textual Relations with Conjunctive Adverbials in <fixed-case>E</fixed-case>nglish Writing by <fixed-case>C</fixed-case>hinese Speakers: A corpus-based Approach Tung-YuKao - Li-meiChen + Li-meiChen 2021.ijclclp-1.4 kao-chen-2021-textual @@ -59,7 +59,7 @@ 中文新聞文本之宣傳手法標記與分析 (The Analysis and Annotation of Propaganda Techniques in <fixed-case>C</fixed-case>hinese News Texts) Meng-HsienShih Ren-fengDuann - Siaw-FongChung + Siaw-FongChung 2021.ijclclp-1.5 zho shih-etal-2021-zhong @@ -103,7 +103,7 @@ 使用低通時序列語音特徵訓練理想比率遮罩法之語音強化 (Employing Low-Pass Filtered Temporal Speech Features for the Training of Ideal Ratio Mask in Speech Enhancement) Yan-TongChen - Jeih-weihHung + Jeih-weihHung 2021.ijclclp-2.3 zho chen-hung-2021-shi @@ -121,7 +121,7 @@ Chao-ChunLiang DanielLee Meng-TseWu - Hsin-MinWang + Hsin-MinWang Keh-YihSu 2021.ijclclp-2.5 liang-etal-2021-answering diff --git a/data/xml/2021.inlg.xml b/data/xml/2021.inlg.xml index 70bfa6f967..68c9f1f73d 100644 --- a/data/xml/2021.inlg.xml +++ b/data/xml/2021.inlg.xml @@ -3,7 +3,7 @@ Proceedings of the 14th International Conference on Natural Language Generation - AnyaBelz + AnyaBelz AngelaFan EhudReiter YajiSripada @@ -21,8 +21,8 @@ Generating Diverse Descriptions from Semantic Graphs JiuzhouHan - DanielBeck - TrevorCohn + DanielBeck + TrevorCohn 1–11 Text generation from semantic graphs is traditionally performed with deterministic methods, which generate a unique description given an input graph. However, the generation problem admits a range of acceptable textual outputs, exhibiting lexical, syntactic and semantic variation. To address this disconnect, we present two main contributions. First, we propose a stochastic graph-to-text model, incorporating a latent variable in an encoder-decoder model, and its use in an ensemble. Second, to assess the diversity of the generated sentences, we propose a new automatic evaluation metric which jointly evaluates output diversity and quality in a multi-reference setting. We evaluate the models on WebNLG datasets in English and Russian, and show an ensemble of stochastic models produces diverse sets of generated sentences while, retaining similar quality to state-of-the-art models. 2021.inlg-1.1 @@ -34,7 +34,7 @@ AleksandreMaskharashvili SymonStevens-Guille XintongLi - MichaelWhite + MichaelWhite 12–23 Recent developments in natural language generation (NLG) have bolstered arguments in favor of re-introducing explicit coding of discourse relations in the input to neural models. In the Methodius corpus, a meaning representation (MR) is hierarchically structured and includes discourse relations. Meanwhile pre-trained language models have been shown to implicitly encode rich linguistic knowledge which provides an excellent resource for NLG. By virtue of synthesizing these lines of research, we conduct extensive experiments on the benefits of using pre-trained models and discourse relation information in MRs, focusing on the improvement of discourse coherence and correctness. We redesign the Methodius corpus; we also construct another Methodius corpus in which MRs are not hierarchically structured but flat. We report experiments on different versions of the corpora, which probe when, where, and how pre-trained models benefit from MRs with discourse relation information in them. We conclude that discourse relations significantly improve NLG when data is limited. 2021.inlg-1.2 @@ -47,7 +47,7 @@ HaemanthSanthi Ponnusamy KordulaDe Kuthy LukasStein - DetmarMeurers + DetmarMeurers 24–34 In question generation, the question produced has to be well-formed and meaningfully related to the answer serving as input. Neural generation methods have predominantly leveraged the distributional semantics of words as representations of meaning and generated questions one word at a time. In this paper, we explore the viability of form-based and more fine-grained encodings, such as character or subword representations for question generation. We start from the typical seq2seq architecture using word embeddings presented by De Kuthy et al. (2020), who generate questions from text so that the answer given in the input text matches not just in meaning but also in form, satisfying question-answer congruence. We show that models trained on character and subword representations substantially outperform the published results based on word embeddings, and they do so with fewer parameters. Our approach eliminates two important problems of the word-based approach: the encoding of rare or out-of-vocabulary words and the incorrect replacement of words with semantically-related ones. The character-based model substantially improves on the published results, both in terms of BLEU scores and regarding the quality of the generated question. Going beyond the specific task, this result adds to the evidence weighing different form- and meaning-based representations for natural language processing tasks. 2021.inlg-1.3 @@ -80,7 +80,7 @@ Predicting Antonyms in Context using <fixed-case>BERT</fixed-case> AyanaNiwa KeisukeNishiguchi - NaoakiOkazaki + NaoakiOkazaki 48–54 We address the task of antonym prediction in a context, which is a fill-in-the-blanks problem. This task setting is unique and practical because it requires contrastiveness to the other word and naturalness as a text in filling a blank. We propose methods for fine-tuning pre-trained masked language models (BERT) for context-aware antonym prediction. The experimental results demonstrate that these methods have positive impacts on the prediction of antonyms within a context. Moreover, human evaluation reveals that more than 85% of predictions using the proposed method are acceptable as antonyms. 2021.inlg-1.6 @@ -115,7 +115,7 @@ KarthikGopalakrishnan PankajRajan YangLiu - DilekHakkani-Tur + DilekHakkani-Tur 76–86 Incorporating external knowledge sources effectively in conversations is a longstanding problem in open-domain dialogue research. The existing literature on open-domain knowledge selection is limited and makes certain brittle assumptions on knowledge sources to simplify the overall task, such as the existence of a single relevant knowledge sentence per context. In this work, we evaluate the existing state of open-domain conversation knowledge selection, showing where the existing methodologies regarding data and evaluation are flawed. We then improve on them by proposing a new framework for collecting relevant knowledge, and create an augmented dataset based on the Wizard of Wikipedia (WOW) corpus, which we call WOW++. WOW++ averages 8 relevant knowledge sentences per dialogue context, embracing the inherent ambiguity of open-domain dialogue knowledge selection. We then benchmark various knowledge ranking algorithms on this augmented dataset with both intrinsic evaluation and extrinsic measures of response quality, showing that neural rerankers that use WOW++ can outperform rankers trained on standard datasets. 2021.inlg-1.9 @@ -127,7 +127,7 @@ XintongLi SymonStevens-Guille AleksandreMaskharashvili - MichaelWhite + MichaelWhite 87–102 Neural approaches to natural language generation in task-oriented dialogue have typically required large amounts of annotated training data to achieve satisfactory performance, especially when generating from compositional inputs. To address this issue, we show that self-training enhanced with constrained decoding yields large gains in data efficiency on a conversational weather dataset that employs compositional meaning representations. In particular, our experiments indicate that self-training with constrained decoding can enable sequence-to-sequence models to achieve satisfactory quality using vanilla decoding with five to ten times less data than with ordinary supervised baseline; moreover, by leveraging pretrained models, data efficiency can be increased further to fifty times. We confirm the main automatic results with human evaluations and show that they extend to an enhanced, compositional version of the E2E dataset. The end result is an approach that makes it possible to achieve acceptable performance on compositional NLG tasks using hundreds rather than tens of thousands of training samples. 2021.inlg-1.10 @@ -154,7 +154,7 @@ SameenMaruf IngridZukerman EhudReiter - GholamrezaHaffari + GholamrezaHaffari 114–127 We offer an approach to explain Decision Tree (DT) predictions by addressing potential conflicts between aspects of these predictions and plausible expectations licensed by background information. We define four types of conflicts, operationalize their identification, and specify explanatory schemas that address them. Our human evaluation focused on the effect of explanations on users’ understanding of a DT’s reasoning and their willingness to act on its predictions. The results show that (1) explanations that address potential conflicts are considered at least as good as baseline explanations that just follow a DT path; and (2) the conflict-based explanations are deemed especially valuable when users’ expectations disagree with the DT’s predictions. 2021.inlg-1.12 @@ -174,7 +174,7 @@ Underreporting of errors in <fixed-case>NLG</fixed-case> output, and what to do about it Emielvan Miltenburg - MirunaClinciu + MirunaClinciu OndřejDušek DimitraGkatzia StephanieInglis @@ -195,7 +195,7 @@ What can Neural Referential Form Selectors Learn? GuanyiChen FahimeSame - Keesvan Deemter + Keesvan Deemter 154–166 Despite achieving encouraging results, neural Referring Expression Generation models are often thought to lack transparency. We probed neural Referential Form Selection (RFS) models to find out to what extent the linguistic features influencing the RE form are learned and captured by state-of-the-art RFS models. The results of 8 probing tasks show that all the defined features were learned to some extent. The probing tasks pertaining to referential status and syntactic position exhibited the highest performance. The lowest performance was achieved by the probing models designed to predict discourse structure properties beyond the sentence level. 2021.inlg-1.15 @@ -224,7 +224,7 @@ Using <fixed-case>BERT</fixed-case> for choosing classifiers in <fixed-case>M</fixed-case>andarin JaniJärnfors GuanyiChen - Keesvan Deemter + Keesvan Deemter RintSybesma 172–176 Choosing the most suitable classifier in a linguistic context is a well-known problem in the production of Mandarin and many other languages. The present paper proposes a solution based on BERT, compares this solution to previous neural and rule-based models, and argues that the BERT model performs particularly well on those difficult cases where the classifier adds information to the text. @@ -234,7 +234,7 @@ Enriching the <fixed-case>E</fixed-case>2<fixed-case>E</fixed-case> dataset - ThiagoCastro Ferreira + ThiagoCastro Ferreira HelenaVaz BrianDavis AdrianaPagano @@ -276,7 +276,7 @@ Steven Y.Feng JessicaHuynh Chaitanya PrasadNarisetty - EduardHovy + EduardHovy VarunGangal 212–225 We motivate and propose a suite of simple but effective improvements for concept-to-text generation called SAPPHIRE: Set Augmentation and Post-hoc PHrase Infilling and REcombination. We demonstrate their effectiveness on generative commonsense reasoning, a.k.a. the CommonGen task, through experiments using both BART and T5 models. Through extensive automatic and human evaluation, we show that SAPPHIRE noticeably improves model performance. An in-depth qualitative analysis illustrates that SAPPHIRE effectively addresses many issues of the baseline model generations, including lack of commonsense, insufficient specificity, and poor fluency. @@ -370,7 +370,7 @@ Another <fixed-case>PASS</fixed-case>: A Reproduction Study of the Human Evaluation of a Football Report Generation System SimonMille - ThiagoCastro Ferreira + ThiagoCastro Ferreira AnyaBelz BrianDavis 286–292 @@ -381,7 +381,7 @@ A Reproduction Study of an Annotation-based Human Evaluation of <fixed-case>MT</fixed-case> Outputs - MajaPopović + MajaPopović AnyaBelz 293–300 In this paper we report our reproduction study of the Croatian part of an annotation-based human evaluation of machine-translated user reviews (Popovic, 2020). The work was carried out as part of the ReproGen Shared Task on Reproducibility of Human Evaluation in NLG. Our aim was to repeat the original study exactly, except for using a different set of evaluators. We describe the experimental design, characterise differences between original and reproduction study, and present the results from each study, along with analysis of the similarity between them. For the six main evaluation results of Major/Minor/All Comprehension error rates and Major/Minor/All Adequacy error rates, we find that (i) 4/6 system rankings are the same in both studies, (ii) the relative differences between systems are replicated well for Major Comprehension and Adequacy (Pearson’s > 0.9), but not for the corresponding Minor error rates (Pearson’s 0.36 for Adequacy, 0.67 for Comprehension), and (iii) the individual system scores for both types of Minor error rates had a higher degree of reproducibility than the corresponding Major error rates. We also examine inter-annotator agreement and compare the annotations obtained in the original and reproduction studies. @@ -414,7 +414,7 @@ Quality Evaluation of the Low-Resource Synthetically Generated Code-Mixed <fixed-case>H</fixed-case>inglish Text VivekSrivastava - MayankSingh + MayankSingh 314–319 In this shared task, we seek the participating teams to investigate the factors influencing the quality of the code-mixed text generation systems. We synthetically generate code-mixed Hinglish sentences using two distinct approaches and employ human annotators to rate the generation quality. We propose two subtasks, quality rating prediction and annotators’ disagreement prediction of the synthetic Hinglish dataset. The proposed subtasks will put forward the reasoning and explanation of the factors influencing the quality and human perception of the code-mixed text. 2021.inlg-1.34 @@ -475,7 +475,7 @@ MauajamaFirdaus UmangJain AsifEkbal - PushpakBhattacharyya + PushpakBhattacharyya 353–363 Social chatbots have gained immense popularity, and their appeal lies not just in their capacity to respond to the diverse requests from users, but also in the ability to develop an emotional connection with users. To further develop and promote social chatbots, we need to concentrate on increasing user interaction and take into account both the intellectual and emotional quotient in the conversational agents. Therefore, in this work, we propose the task of sentiment aware emotion controlled personalized dialogue generation giving the machine the capability to respond emotionally and in accordance with the persona of the user. As sentiment and emotions are highly co-related, we use the sentiment knowledge of the previous utterance to generate the correct emotional response in accordance with the user persona. We design a Transformer based Dialogue Generation framework, that generates responses that are sensitive to the emotion of the user and corresponds to the persona and sentiment as well. Moreover, the persona information is encoded by a different Transformer encoder, along with the dialogue history, is fed to the decoder for generating responses. We annotate the PersonaChat dataset with sentiment information to improve the response quality. Experimental results on the PersonaChat dataset show that the proposed framework significantly outperforms the existing baselines, thereby generating personalized emotional responses in accordance with the sentiment that provides better emotional connection and user satisfaction as desired in a social chatbot. 2021.inlg-1.39 @@ -495,10 +495,10 @@ Decoding, Fast and Slow: A Case Study on Balancing Trade-Offs in Incremental, Character-level Pragmatic Reasoning - SinaZarrieß + SinaZarrieß HendrikBuschmeier TingHan - SimeonSchüz + SimeonSchüz 371–376 Recent work has adopted models of pragmatic reasoning for the generation of informative language in, e.g., image captioning. We propose a simple but highly effective relaxation of fully rational decoding, based on an existing incremental and character-level approach to pragmatically informative neural image captioning. We implement a mixed, ‘fast’ and ‘slow’, speaker that applies pragmatic reasoning occasionally (only word-initially), while unrolling the language model. In our evaluation, we find that increased informativeness through pragmatic decoding generally lowers quality and, somewhat counter-intuitively, increases repetitiveness in captions. Our mixed speaker, however, achieves a good balance between quality and informativeness. 2021.inlg-1.41 @@ -540,7 +540,7 @@ Attention Is Indeed All You Need: Semantically Attention-Guided Decoding for Data-to-Text <fixed-case>NLG</fixed-case> JurajJuraska - MarilynWalker + MarilynWalker 416–431 Ever since neural models were adopted in data-to-text language generation, they have invariably been reliant on extrinsic components to improve their semantic accuracy, because the models normally do not exhibit the ability to generate text that reliably mentions all of the information provided in the input. In this paper, we propose a novel decoding method that extracts interpretable information from encoder-decoder models’ cross-attention, and uses it to infer which attributes are mentioned in the generated text, which is subsequently used to rescore beam hypotheses. Using this decoding method with T5 and BART, we show on three datasets its ability to dramatically reduce semantic errors in the generated outputs, while maintaining their state-of-the-art quality. 2021.inlg-1.45 diff --git a/data/xml/2021.insights.xml b/data/xml/2021.insights.xml index 2977e85b23..ca071aab16 100644 --- a/data/xml/2021.insights.xml +++ b/data/xml/2021.insights.xml @@ -19,7 +19,7 @@ Corrected <fixed-case>CBOW</fixed-case> Performs as well as Skip-gram - Ozanİrsoy + Ozanİrsoy AdrianBenton KarlStratos 1–8 @@ -45,7 +45,7 @@ <fixed-case>BERT</fixed-case> Cannot Align Characters AntonisMaronikolakis PhilippDufter - HinrichSchütze + HinrichSchütze 16–22 In previous work, it has been shown that BERT can adequately align cross-lingual sentences on the word level. Here we investigate whether BERT can also operate as a char-level aligner. The languages examined are English, Fake English, German and Greek. We show that the closer two languages are, the better BERT can align them on the character level. BERT indeed works well in English to Fake English alignment, but this does not generalize to natural languages to the same extent. Nevertheless, the proximity of two languages does seem to be a factor. English is more related to German than to Greek and this is reflected in how well BERT aligns them; English to German is better than English to Greek. We examine multiple setups and show that the similarity matrices for natural languages show weaker relations the further apart two languages are. 2021.insights-1.3 @@ -59,7 +59,7 @@ SungjinPark SeolhwaLee TaesunWhang - HeuiseokLim + HeuiseokLim 23–28 In the field of natural language processing, ensembles are broadly known to be effective in improving performance. This paper analyzes how ensemble of neural machine translation (NMT) models affect performance improvement by designing various experimental setups (i.e., intra-, inter-ensemble, and non-convergence ensemble). To an in-depth examination, we analyze each ensemble method with respect to several aspects such as different attention models and vocab strategies. Experimental results show that ensembling is not always resulting in performance increases and give noteworthy negative findings. 2021.insights-1.4 @@ -93,7 +93,7 @@ Zero-Shot Cross-Lingual Transfer is a Hard Baseline to Beat in <fixed-case>G</fixed-case>erman Fine-Grained Entity Typing SabineWeber - MarkSteedman + MarkSteedman 42–48 The training of NLP models often requires large amounts of labelled training data, which makes it difficult to expand existing models to new languages. While zero-shot cross-lingual transfer relies on multilingual word embeddings to apply a model trained on one language to another, Yarowski and Ngai (2001) propose the method of annotation projection to generate training data without manual annotation. This method was successfully used for the tasks of named entity recognition and coarse-grained entity typing, but we show that it is outperformed by zero-shot cross-lingual transfer when applied to the similar task of fine-grained entity typing. In our study of fine-grained entity typing with the FIGER type ontology for German, we show that annotation projection amplifies the English model’s tendency to underpredict level 2 labels and is beaten by zero-shot cross-lingual transfer on three novel test sets. 2021.insights-1.7 @@ -135,7 +135,7 @@ JanRosendahl ChristianHerold FrithjofPetrick - HermannNey + HermannNey 62–66 In this work, we conduct a comprehensive investigation on one of the centerpieces of modern machine translation systems: the encoder-decoder attention mechanism. Motivated by the concept of first-order alignments, we extend the (cross-)attention mechanism by a recurrent connection, allowing direct access to previous attention/alignment decisions. We propose several ways to include such a recurrency into the attention mechanism. Verifying their performance across different translation tasks we conclude that these extensions and dependencies are not beneficial for the translation performance of the Transformer architecture. 2021.insights-1.10 @@ -147,7 +147,7 @@ On the Difficulty of Segmenting Words with Attention RamonSanabria HaoTang - SharonGoldwater + SharonGoldwater 67–73 Word segmentation, the problem of finding word boundaries in speech, is of interest for a range of tasks. Previous papers have suggested that for sequence-to-sequence models trained on tasks such as speech translation or speech recognition, attention can be used to locate and segment the words. We show, however, that even on monolingual data this approach is brittle. In our experiments with different input types, data sizes, and segmentation algorithms, only models trained to predict phones from words succeed in the task. Models trained to predict words from either phones or speech (i.e., the opposite direction needed to generalize to new data), yield much worse results, suggesting that attention-based segmentation is only useful in limited scenarios. 2021.insights-1.11 @@ -181,7 +181,7 @@ Learning Data Augmentation Schedules for Natural Language Processing DaphnéChopard Matthias S.Treder - IrenaSpasić + IrenaSpasić 89–102 Despite its proven efficiency in other fields, data augmentation is less popular in the context of natural language processing (NLP) due to its complexity and limited results. A recent study (Longpre et al., 2020) showed for example that task-agnostic data augmentations fail to consistently boost the performance of pretrained transformers even in low data regimes. In this paper, we investigate whether data-driven augmentation scheduling and the integration of a wider set of transformations can lead to improved performance where fixed and limited policies were unsuccessful. Our results suggest that, while this approach can help the training process in some settings, the improvements are unsubstantial. This negative result is meant to help researchers better understand the limitations of data augmentation for NLP. 2021.insights-1.14 @@ -206,7 +206,7 @@ LianeGuillou SanderBijl de Vroe MarkJohnson - MarkSteedman + MarkSteedman 110–116 Understanding linguistic modality is widely seen as important for downstream tasks such as Question Answering and Knowledge Graph Population. Entailment Graph learning might also be expected to benefit from attention to modality. We build Entailment Graphs using a news corpus filtered with a modality parser, and show that stripping modal modifiers from predicates in fact increases performance. This suggests that for some tasks, the pragmatics of modal modification of predicates allows them to contribute as evidence of entailment. 2021.insights-1.16 @@ -240,8 +240,8 @@ Challenging the Semi-Supervised <fixed-case>VAE</fixed-case> Framework for Text Classification GhaziFelhi - JosephLe Roux - DjaméSeddah + JosephLe Roux + DjaméSeddah 136–143 Semi-Supervised Variational Autoencoders (SSVAEs) are widely used models for data efficient learning. In this paper, we question the adequacy of the standard design of sequence SSVAEs for the task of text classification as we exhibit two sources of overcomplexity for which we provide simplifications. These simplifications to SSVAEs preserve their theoretical soundness while providing a number of practical advantages in the semi-supervised setup where the result of training is a text classifier. These simplifications are the removal of (i) the Kullback-Liebler divergence from its objective and (ii) the fully unobserved latent variable from its probabilistic model. These changes relieve users from choosing a prior for their latent variables, make the model smaller and faster, and allow for a better flow of information into the latent variables. We compare the simplified versions to standard SSVAEs on 4 text classification tasks. On top of the above-mentioned simplification, experiments show a speed-up of 26%, while keeping equivalent classification scores. The code to reproduce our experiments is public. 2021.insights-1.19 diff --git a/data/xml/2021.internlp.xml b/data/xml/2021.internlp.xml index 1cd84d5c1d..9202bc2eee 100644 --- a/data/xml/2021.internlp.xml +++ b/data/xml/2021.internlp.xml @@ -8,7 +8,7 @@ IrynaGurevych Ji-UngLee FilipRadlinski - HinrichSchütze + HinrichSchütze EdwinSimpson LiliYu Association for Computational Linguistics @@ -67,12 +67,12 @@ Dynamic Facet Selection by Maximizing Graded Relevance - MichaelGlass + MichaelGlass Md Faisal MahbubChowdhury YuDeng RuchiMahindru Nicolas RodolfoFauceglia - AlfioGliozzo + AlfioGliozzo NandanaMihindukulasooriya 32–39 Dynamic faceted search (DFS), an interactive query refinement technique, is a form of Human–computer information retrieval (HCIR) approach. It allows users to narrow down search results through facets, where the facets-documents mapping is determined at runtime based on the context of user query instead of pre-indexing the facets statically. In this paper, we propose a new unsupervised approach for dynamic facet generation, namely optimistic facets, which attempts to generate the best possible subset of facets, hence maximizing expected Discounted Cumulative Gain (DCG), a measure of ranking quality that uses a graded relevance scale. We also release code to generate a new evaluation dataset. Through empirical results on two datasets, we show that the proposed DFS approach considerably improves the document ranking in the search results. diff --git a/data/xml/2021.isa.xml b/data/xml/2021.isa.xml index e6a21d1aaf..0bd53f1304 100644 --- a/data/xml/2021.isa.xml +++ b/data/xml/2021.isa.xml @@ -3,7 +3,7 @@ Proceedings of the 17th Joint ACL - ISO Workshop on Interoperable Semantic Annotation - HarryBunt + HarryBunt Association for Computational Linguistics
Groningen, The Netherlands (online)
June @@ -68,7 +68,7 @@
Converting Multilayer Glosses into Semantic and Pragmatic forms with <fixed-case>GENLIS</fixed-case> - RodolfoDelmonte + RodolfoDelmonte SerenaTrolvi FrancescoStiffoni 54–64 diff --git a/data/xml/2021.iwclul.xml b/data/xml/2021.iwclul.xml index 8ccbedf31f..5090315d2a 100644 --- a/data/xml/2021.iwclul.xml +++ b/data/xml/2021.iwclul.xml @@ -22,7 +22,7 @@ Keyword spotting for audiovisual archival search in <fixed-case>U</fixed-case>ralic languages NilsHjortnaes NikoPartanen - Francis M.Tyers + Francis M.Tyers 1–7 2021.iwclul-1.1 hjortnaes-etal-2021-keyword diff --git a/data/xml/2021.iwcs.xml b/data/xml/2021.iwcs.xml index 6fe1f24bd0..10ac7838cf 100644 --- a/data/xml/2021.iwcs.xml +++ b/data/xml/2021.iwcs.xml @@ -3,7 +3,7 @@ Proceedings of the 14th International Conference on Computational Semantics (IWCS) - SinaZarrieß + SinaZarrieß JohanBos Rikvan Noord LashaAbzianidze @@ -23,7 +23,7 @@ GuyMarshall MokanaranganThayaparan PhilipOsborne - AndréFreitas + AndréFreitas 1–10 This paper explores the topic of transportability, as a sub-area of generalisability. By proposing the utilisation of metrics based on well-established statistics, we are able to estimate the change in performance of NLP models in new contexts. Defining a new measure for transportability may allow for better estimation of NLP system performance in new domains, and is crucial when assessing the performance of NLP systems in new tasks and domains. Through several instances of increasing complexity, we demonstrate how lightweight domain similarity measures can be used as estimators for the transportability in NLP applications. The proposed transportability measures are evaluated in the context of Named Entity Recognition and Natural Language Inference tasks. 2021.iwcs-1.1 @@ -52,7 +52,7 @@ Computing All Quantifier Scopes with <fixed-case>CCG</fixed-case> MilošStanojević - MarkSteedman + MarkSteedman 33–37 We present a method for computing all quantifer scopes that can be extracted from a single CCG derivation. To do that we build on the proposal of Steedman (1999, 2011) where all existential quantifiers are treated as Skolem functions. We extend the approach by introducing a better packed representation of all possible specifications that also includes node addresses where the specifications happen. These addresses are necessary for recovering all, and only, possible readings. 2021.iwcs-1.4 @@ -63,7 +63,7 @@ ZiliZhou MarcoValentino DonalLanders - AndréFreitas + AndréFreitas 38–50 This paper describes N-XKT (Neural encoding based on eXplanatory Knowledge Transfer), a novel method for the automatic transfer of explanatory knowledge through neural encoding mechanisms. We demonstrate that N-XKT is able to improve accuracy and generalization on science Question Answering (QA). Specifically, by leveraging facts from background explanatory knowledge corpora, the N-XKT model shows a clear improvement on zero-shot QA. Furthermore, we show that N-XKT can be fine-tuned on a target QA dataset, enabling faster convergence and more accurate results. A systematic analysis is conducted to quantitatively analyze the performance of the N-XKT model and the impact of different categories of knowledge on the zero-shot generalization task. 2021.iwcs-1.5 @@ -72,7 +72,7 @@ Predicate Representations and Polysemy in <fixed-case>V</fixed-case>erb<fixed-case>N</fixed-case>et Semantic Parsing JamesGung - MarthaPalmer + MarthaPalmer 51–62 Despite recent advances in semantic role labeling propelled by pre-trained text encoders like BERT, performance lags behind when applied to predicates observed infrequently during training or to sentences in new domains. In this work, we investigate how role labeling performance on low-frequency predicates and out-of-domain data can be further improved by using VerbNet, a verb lexicon that groups verbs into hierarchical classes based on shared syntactic and semantic behavior and defines semantic representations describing relations between arguments. We find that VerbNet classes provide an effective level of abstraction, improving generalization on low-frequency predicates by allowing them to learn from the training examples of other predicates belonging to the same class. We also find that joint training of VerbNet role labeling and predicate disambiguation of VerbNet classes for polysemous verbs leads to improvements in both tasks, naturally supporting the extraction of VerbNet’s semantic representations. 2021.iwcs-1.6 @@ -92,7 +92,7 @@ Do Natural Language Explanations Represent Valid Logical Arguments? Verifying Entailment in Explainable <fixed-case>NLI</fixed-case> Gold Standards MarcoValentino IanPratt-Hartmann - AndréFreitas + AndréFreitas 76–86 An emerging line of research in Explainable NLP is the creation of datasets enriched with human-annotated explanations and rationales, used to build and evaluate models with step-wise inference and explanation generation capabilities. While human-annotated explanations are used as ground-truth for the inference, there is a lack of systematic assessment of their consistency and rigour. In an attempt to provide a critical quality assessment of Explanation Gold Standards (XGSs) for NLI, we propose a systematic annotation methodology, named Explanation Entailment Verification (EEV), to quantify the logical validity of human-annotated explanations. The application of EEV on three mainstream datasets reveals the surprising conclusion that a majority of the explanations, while appearing coherent on the surface, represent logically invalid arguments, ranging from being incomplete to containing clearly identifiable logical errors. This conclusion confirms that the inferential properties of explanations are still poorly formalised and understood, and that additional work on this line of research is necessary to improve the way Explanation Gold Standards are constructed. 2021.iwcs-1.8 @@ -111,7 +111,7 @@ Automatic Assignment of Semantic Frames in Disaster Response Team Communication Dialogues NataliaSkachkova - IvanaKruijff-Korbayova + IvanaKruijff-Korbayova 93–109 We investigate frame semantics as a meaning representation framework for team communication in a disaster response scenario. We focus on the automatic frame assignment and retrain PAFIBERT, which is one of the state-of-the-art frame classifiers, on English and German disaster response team communication data, obtaining accuracy around 90%. We examine the performance of both models and discuss their adjustments, such as sampling of additional training instances from an unrelated domain and adding extra lexical and discourse features to input token representations. We show that sampling has some positive effect on the German frame classifier, discuss an unexpected impact of extra features on the models’ behaviour and perform a careful error analysis. 2021.iwcs-1.10 @@ -146,7 +146,7 @@ RebeccaKehlbeck RitaSevastjanova OliverDeussen - DanielKeim + DanielKeim MiriamButt 132–143 Research in NLP has mainly focused on factoid questions, with the goal of finding quick and reliable ways of matching a query to an answer. However, human discourse involves more than that: it contains non-canonical questions deployed to achieve specific communicative goals. In this paper, we investigate this under-studied aspect of NLP by introducing a targeted task, creating an appropriate corpus for the task and providing baseline models of diverse nature. With this, we are also able to generate useful insights on the task and open the way for future research in this direction. @@ -158,7 +158,7 @@ New Domain, Major Effort? How Much Data is Necessary to Adapt a Temporal Tagger to the Voice Assistant Domain TouhidulAlam AlessandraZarcone - SebastianPadó + SebastianPadó 144–154 Reliable tagging of Temporal Expressions (TEs, e.g., Book a table at L’Osteria for Sunday evening) is a central requirement for Voice Assistants (VAs). However, there is a dearth of resources and systems for the VA domain, since publicly-available temporal taggers are trained only on substantially different domains, such as news and clinical text. Since the cost of annotating large datasets is prohibitive, we investigate the trade-off between in-domain data and performance in DA-Time, a hybrid temporal tagger for the English VA domain which combines a neural architecture for robust TE recognition, with a parser-based TE normalizer. We find that transfer learning goes a long way even with as little as 25 in-domain sentences: DA-Time performs at the state of the art on the news domain, and substantially outperforms it on the VA domain. 2021.iwcs-1.14 @@ -184,10 +184,10 @@ Builder, we have done it: Evaluating & Extending Dialogue-<fixed-case>AMR</fixed-case> <fixed-case>NLU</fixed-case> Pipeline for Two Collaborative Domains - ClaireBonial + ClaireBonial MitchellAbrams - DavidTraum - ClareVoss + DavidTraum + ClareVoss 173–183 We adopt, evaluate, and improve upon a two-step natural language understanding (NLU) pipeline that incrementally tames the variation of unconstrained natural language input and maps to executable robot behaviors. The pipeline first leverages Abstract Meaning Representation (AMR) parsing to capture the propositional content of the utterance, and second converts this into “Dialogue-AMR,” which augments standard AMR with information on tense, aspect, and speech acts. Several alternative approaches and training datasets are evaluated for both steps and corresponding components of the pipeline, some of which outperform the original. We extend the Dialogue-AMR annotation schema to cover a different collaborative instruction domain and evaluate on both domains. With very little training data, we achieve promising performance in the new domain, demonstrating the scalability of this approach. 2021.iwcs-1.17 @@ -199,7 +199,7 @@ GeneKim VietDuong XinLu - LenhartSchubert + LenhartSchubert 184–201 “Episodic Logic: Unscoped Logical Form” (EL-ULF) is a semantic representation capturing predicate-argument structure as well as more challenging aspects of language within the Episodic Logic formalism. We present the first learned approach for parsing sentences into ULFs, using a growing set of annotated examples. The results provide a strong baseline for future improvement. Our method learns a sequence-to-sequence model for predicting the transition action sequence within a modified cache transition system. We evaluate the efficacy of type grammar-based constraints, a word-to-symbol lexicon, and transition system state features in this task. Our system is available at https://github.com/genelkim/ulf-transition-parser. We also present the first official annotated ULF dataset at https://www.cs.rochester.edu/u/gkim21/ulf/resources/. 2021.iwcs-1.18 @@ -219,7 +219,7 @@ Tuning Deep Active Learning for Semantic Role Labeling SkatjeMyers - MarthaPalmer + MarthaPalmer 212–221 Active learning has been shown to reduce annotation requirements for numerous natural language processing tasks, including semantic role labeling (SRL). SRL involves labeling argument spans for potentially multiple predicates in a sentence, which makes it challenging to aggregate the numerous decisions into a single score for determining new instances to annotate. In this paper, we apply two ways of aggregating scores across multiple predicates in order to choose query sentences with two methods of estimating model certainty: using the neural network’s outputs and using dropout-based Bayesian Active Learning by Disagreement. We compare these methods with three passive baselines — random sentence selection, random whole-document selection, and selecting sentences with the most predicates — and analyse the effect these strategies have on the learning curve with respect to reducing the number of annotated sentences and predicates to achieve high performance. 2021.iwcs-1.20 @@ -230,10 +230,10 @@ KevinStowe JenettePreciado KathrynConger - Susan WindischBrown + Susan WindischBrown GhazalehKazeminejad JamesGung - MarthaPalmer + MarthaPalmer 222–227 The SemLink resource provides mappings between a variety of lexical semantic ontologies, each with their strengths and weaknesses. To take advantage of these differences, the ability to move between resources is essential. This work describes advances made to improve the usability of the SemLink resource: the automatic addition of new instances and mappings, manual corrections, sense-based vectors and collocation information, and architecture built to automatically update the resource when versions of the underlying resources change. These updates improve coverage, provide new tools to leverage the capabilities of these resources, and facilitate seamless updates, ensuring the consistency and applicability of these mappings in the future. 2021.iwcs-1.21 @@ -254,7 +254,7 @@ NeeleFalk YanaStrakatova EvaHuber - ErhardHinrichs + ErhardHinrichs 239–249 Adjectives such as heavy (as in heavy rain) and windy (as in windy day) provide possible values for the attributes intensity and climate, respectively. The attributes themselves are not overtly realized and are in this sense implicit. While these attributes can be easily inferred by humans, their automatic classification poses a challenging task for computational models. We present the following contributions: (1) We gain new insights into the attribute selection task for German. More specifically, we develop computational models for this task that are able to generalize to unseen data. Moreover, we show that classification accuracy depends, inter alia, on the degree of polysemy of the lexemes involved, on the generalization potential of the training data and on the degree of semantic transparency of the adjective-noun pairs in question. (2) We provide the first resource for computational and linguistic experiments with German adjective-noun pairs that can be used for attribute selection and related tasks. In order to safeguard against unwelcome memorization effects, we present an automatic data augmentation method based on a lexical resource that can increase the size of the training data to a large extent. 2021.iwcs-1.23 diff --git a/data/xml/2021.iwpt.xml b/data/xml/2021.iwpt.xml index 0539534472..619ef3c768 100644 --- a/data/xml/2021.iwpt.xml +++ b/data/xml/2021.iwpt.xml @@ -7,8 +7,8 @@ KenjiSagae ReutTsarfaty GosseBouma - DjaméSeddah - DanielZeman + DjaméSeddah + DanielZeman Association for Computational Linguistics
Online
August @@ -57,7 +57,7 @@ Semi-Automatic Construction of Text-to-<fixed-case>SQL</fixed-case> Data for Domain Transfer TianyiLi SujianLi - MarkSteedman + MarkSteedman 38–49 Strong and affordable in-domain data is a desirable asset when transferring trained semantic parsers to novel domains. As previous methods for semi-automatically constructing such data cannot handle the complexity of realistic SQL queries, we propose to construct SQL queries via context-dependent sampling, and introduce the concept of topic. Along with our SQL query construction method, we propose a novel pipeline of semi-automatic Text-to-SQL dataset construction that covers the broad space of SQL queries. We show that the created dataset is comparable with expert annotation along multiple dimensions, and is capable of improving domain transfer performance for SOTA semantic parsers. 2021.iwpt-1.4 @@ -68,7 +68,7 @@ Levi Graph <fixed-case>AMR</fixed-case> Parser using Heterogeneous Attention HanHe - Jinho D.Choi + Jinho D.Choi 50–57 Coupled with biaffine decoders, transformers have been effectively adapted to text-to-graph transduction and achieved state-of-the-art performance on AMR parsing. Many prior works, however, rely on the biaffine decoder for either or both arc and label predictions although most features used by the decoder may be learned by the transformer already. This paper presents a novel approach to AMR parsing by combining heterogeneous data (tokens, concepts, labels) as one input to a transformer to learn attention, and use only attention matrices from the transformer to predict all elements in AMR graphs (concepts, arcs, labels). Although our models use significantly fewer parameters than the previous state-of-the-art graph parser, they show similar or better accuracy on AMR 2.0 and 3.0. 2021.iwpt-1.5 @@ -112,7 +112,7 @@ Multilingual Dependency Parsing for Low-Resource <fixed-case>A</fixed-case>frican Languages: Case Studies on <fixed-case>B</fixed-case>ambara, <fixed-case>W</fixed-case>olof, and <fixed-case>Y</fixed-case>oruba - Cheikh M. BambaDione + Cheikh M. BambaDione 84–92 This paper describes a methodology for syntactic knowledge transfer between high-resource languages to extremely low-resource languages. The methodology consists in leveraging multilingual BERT self-attention model pretrained on large datasets to develop a multilingual multi-task model that can predict Universal Dependencies annotations for three African low-resource languages. The UD annotations include universal part-of-speech, morphological features, lemmas, and dependency trees. In our experiments, we used multilingual word embeddings and a total of 11 Universal Dependencies treebanks drawn from three high-resource languages (English, French, Norwegian) and three low-resource languages (Bambara, Wolof and Yoruba). We developed various models to test specific language combinations involving contemporary contact languages or genetically related languages. The results of the experiments show that multilingual models that involve high-resource languages and low-resource languages with contemporary contact between each other can provide better results than combinations that only include unrelated languages. As far genetic relationships are concerned, we could not draw any conclusion regarding the impact of language combinations involving the selected low-resource languages, namely Wolof and Yoruba. 2021.iwpt-1.9 @@ -123,7 +123,7 @@ Bidirectional Domain Adaptation Using Weighted Multi-Task Learning DanielDakota Zeeshan AliSayyed - SandraKübler + SandraKübler 93–105 Domain adaption in syntactic parsing is still a significant challenge. We address the issue of data imbalance between the in-domain and out-of-domain treebank typically used for the problem. We define domain adaptation as a Multi-task learning (MTL) problem, which allows us to train two parsers, one for each do-main. Our results show that the MTL approach is beneficial for the smaller treebank. For the larger treebank, we need to use loss weighting in order to avoid a decrease in performance be-low the single task. In order to determine towhat degree the data imbalance between two domains and the domain differences affect results, we also carry out an experiment with two imbalanced in-domain treebanks and show that loss weighting also improves performance in an in-domain setting. Given loss weighting in MTL, we can improve results for both parsers. 2021.iwpt-1.10 @@ -134,7 +134,7 @@ Strength in Numbers: Averaging and Clustering Effects in Mixture of Experts for Graph-Based Dependency Parsing XudongZhang - JosephLe Roux + JosephLe Roux ThierryCharnois 106–118 We review two features of mixture of experts (MoE) models which we call averaging and clustering effects in the context of graph-based dependency parsers learned in a supervised probabilistic framework. Averaging corresponds to the ensemble combination of parsers and is responsible for variance reduction which helps stabilizing and improving parsing accuracy. Clustering describes the capacity of MoE models to give more credit to experts believed to be more accurate given an input. Although promising, this is difficult to achieve, especially without additional data. We design an experimental set-up to study the impact of these effects. Whereas averaging is always beneficial, clustering requires good initialization and stabilization techniques, but its advantages over mere averaging seem to eventually vanish when enough experts are present. As a by product, we show how this leads to state-of-the-art results on the PTB and the CoNLL09 Chinese treebank, with low variance across experiments. @@ -259,7 +259,7 @@ AlirezaMohammadshahi JoachimWagner JenniferFoster - JamesHenderson + JamesHenderson 204–212 We describe the DCU-EPFL submission to the IWPT 2021 Parsing Shared Task: From Raw Text to Enhanced Universal Dependencies. The task involves parsing Enhanced UD graphs, which are an extension of the basic dependency trees designed to be more facilitative towards representing semantic structure. Evaluation is carried out on 29 treebanks in 17 languages and participants are required to parse the data from each language starting from raw strings. Our approach uses the Stanza pipeline to preprocess the text files, XLM-RoBERTa to obtain contextualized token representations, and an edge-scoring and labeling model to predict the enhanced graph. Finally, we run a postprocessing script to ensure all of our outputs are valid Enhanced UD graphs. Our system places 6th out of 9 participants with a coarse Enhanced Labeled Attachment Score (ELAS) of 83.57. We carry out additional post-deadline experiments which include using Trankit for pre-processing, XLM-RoBERTa LARGE, treebank concatenation, and multitask learning between a basic and an enhanced dependency parser. All of these modifications improve our initial score and our final system has a coarse ELAS of 88.04. 2021.iwpt-1.22 diff --git a/data/xml/2021.iwslt.xml b/data/xml/2021.iwslt.xml index e4e7f0c7c5..e12a56e1d3 100644 --- a/data/xml/2021.iwslt.xml +++ b/data/xml/2021.iwslt.xml @@ -4,8 +4,8 @@ Proceedings of the 18th International Conference on Spoken Language Translation (IWSLT 2021) MarcelloFederico - AlexWaibel - Marta R.Costa-jussà + AlexWaibel + Marta R.Costa-jussà JanNiehues SebastianStuker ElizabethSalesky @@ -23,18 +23,18 @@ <fixed-case>FINDINGS</fixed-case> <fixed-case>OF</fixed-case> <fixed-case>THE</fixed-case> <fixed-case>IWSLT</fixed-case> 2021 <fixed-case>EVALUATION</fixed-case> <fixed-case>CAMPAIGN</fixed-case> AntoniosAnastasopoulos - OndřejBojar + OndřejBojar JacobBremerman RoldanoCattoni MahaElbayad MarcelloFederico XutaiMa SatoshiNakamura - MatteoNegri + MatteoNegri JanNiehues JuanPino ElizabethSalesky - SebastianStüker + SebastianStüker KatsuhitoSudoh MarcoTurchi AlexanderWaibel @@ -92,7 +92,7 @@ Without Further Ado: Direct and Simultaneous Speech Translation by <fixed-case>A</fixed-case>pp<fixed-case>T</fixed-case>ek in 2021 ParniaBahar PatrickWilken - Mattia A.Di Gangi + Mattia A.Di Gangi EvgenyMatusov 52–63 This paper describes the offline and simultaneous speech translation systems developed at AppTek for IWSLT 2021. Our offline ST submission includes the direct end-to-end system and the so-called posterior tight integrated model, which is akin to the cascade system but is trained in an end-to-end fashion, where all the cascaded modules are end-to-end models themselves. For simultaneous ST, we combine hybrid automatic speech recognition with a machine translation approach whose translation policy decisions are learned from statistical word alignments. Compared to last year, we improve general quality and provide a wider range of quality/latency trade-offs, both due to a data augmentation method making the MT model robust to varying chunk sizes. Finally, we present a method for ASR output segmentation into sentences that introduces a minimal additional delay. @@ -133,7 +133,7 @@ Dealing with training and test segmentation mismatch: <fixed-case>FBK</fixed-case>@<fixed-case>IWSLT</fixed-case>2021 SaraPapi MarcoGaido - MatteoNegri + MatteoNegri MarcoTurchi 84–91 This paper describes FBK’s system submission to the IWSLT 2021 Offline Speech Translation task. We participated with a direct model, which is a Transformer-based architecture trained to translate English speech audio data into German texts. The training pipeline is characterized by knowledge distillation and a two-step fine-tuning procedure. Both knowledge distillation and the first fine-tuning step are carried out on manually segmented real and synthetic data, the latter being generated with an MT system trained on the available corpora. Differently, the second fine-tuning step is carried out on a random segmentation of the MuST-C v2 En-De dataset. Its main goal is to reduce the performance drops occurring when a speech translation model trained on manually segmented data (i.e. an ideal, sentence-like segmentation) is evaluated on automatically segmented audio (i.e. actual, more realistic testing conditions). For the same purpose, a custom hybrid segmentation procedure that accounts for both audio content (pauses) and for the length of the produced segments is applied to the test data before passing them to the system. At inference time, we compared this procedure with a baseline segmentation method based on Voice Activity Detection (VAD). Our results indicate the effectiveness of the proposed hybrid approach, shown by a reduction of the gap with manual segmentation from 8.3 to 1.4 BLEU points. @@ -176,7 +176,7 @@ Gerard I.Gállego IoannisTsiamas CarlosEscolano - José A. R.Fonollosa + José A. R.Fonollosa Marta R.Costa-jussà 110–119 This paper describes the submission to the IWSLT 2021 offline speech translation task by the UPC Machine Translation group. The task consists of building a system capable of translating English audio recordings extracted from TED talks into German text. Submitted systems can be either cascade or end-to-end and use a custom or given segmentation. Our submission is an end-to-end speech translation system, which combines pre-trained models (Wav2Vec 2.0 and mBART) with coupling modules between the encoder and decoder, and uses an efficient fine-tuning technique, which trains only 20% of its total parameters. We show that adding an Adapter to the system and pre-training it, can increase the convergence speed and the final result, with which we achieve a BLEU score of 27.3 on the MuST-C test set. Our final model is an ensemble that obtains 28.22 BLEU score on the same set. Our submission also uses a custom segmentation algorithm that employs pre-trained Wav2Vec 2.0 for identifying periods of untranscribable text and can bring improvements of 2.5 to 3 BLEU score on the IWSLT 2019 test set, as compared to the result with the given segmentation. @@ -203,10 +203,10 @@ Tuan NamNguyen Thai SonNguyen ChristianHuber - Ngoc-QuanPham + Ngoc-QuanPham Thanh-LeHa FelixSchneider - SebastianStüker + SebastianStüker 125–130 This paper describes KIT’submission to the IWSLT 2021 Offline Speech Translation Task. We describe a system in both cascaded condition and end-to-end condition. In the cascaded condition, we investigated different end-to-end architectures for the speech recognition module. For the text segmentation module, we trained a small transformer-based model on high-quality monolingual data. For the translation module, our last year’s neural machine translation model was reused. In the end-to-end condition, we improved our Speech Relative Transformer architecture to reach or even surpass the result of the cascade system. 2021.iwslt-1.13 @@ -260,10 +260,10 @@ Multilingual Speech Translation <fixed-case>KIT</fixed-case> @ <fixed-case>IWSLT</fixed-case>2021 - Ngoc-QuanPham + Ngoc-QuanPham Tuan NamNguyen Thanh-LeHa - SebastianStüker + SebastianStüker AlexanderWaibel DanHe 154–159 @@ -364,7 +364,7 @@ Between Flexibility and Consistency: Joint Generation of Captions and Subtitles AlinaKarakanta MarcoGaido - MatteoNegri + MatteoNegri MarcoTurchi 215–225 Speech translation (ST) has lately received growing interest for the generation of subtitles without the need for an intermediate source language transcription and timing (i.e. captions). However, the joint generation of source captions and target subtitles does not only bring potential output quality advantages when the two decoding processes inform each other, but it is also often required in multilingual scenarios. In this work, we focus on ST models which generate consistent captions-subtitles in terms of structure and lexical content. We further introduce new metrics for evaluating subtitling consistency. Our findings show that joint decoding leads to increased performance and consistency between the generated captions and subtitles while still allowing for sufficient flexibility to produce subtitles conforming to language-specific needs and norms. @@ -433,7 +433,7 @@ PavelPetrushkov TomerLancewicki ShahramKhadivi - HermannNey + HermannNey 276–286 Complex natural language applications such as speech translation or pivot translation traditionally rely on cascaded models. However,cascaded models are known to be prone to error propagation and model discrepancy problems. Furthermore, there is no possibility of using end-to-end training data in conventional cascaded systems, meaning that the training data most suited for the task cannot be used. Previous studies suggested several approaches for integrated end-to-end training to overcome those problems, however they mostly rely on(synthetic or natural) three-way data. We propose a cascaded model based on the non-autoregressive Transformer that enables end-to-end training without the need for an explicit intermediate representation. This new architecture (i) avoids unnecessary early decisions that can cause errors which are then propagated throughout the cascaded models and (ii) utilizes the end-to-end training data directly. We conduct an evaluation on two pivot-based machine translation tasks, namely French→German and German→Czech. Our experimental results show that the proposed architecture yields an improvement of more than 2 BLEU for French→German over the cascaded baseline. 2021.iwslt-1.32 @@ -442,7 +442,7 @@ Data Augmentation by Concatenation for Low-Resource Translation: A Mystery and a Solution - Toan Q.Nguyen + Toan Q.Nguyen KentonMurray DavidChiang 287–293 diff --git a/data/xml/2021.jeptalnrecital.xml b/data/xml/2021.jeptalnrecital.xml index a0bf2e4b08..30504960d5 100644 --- a/data/xml/2021.jeptalnrecital.xml +++ b/data/xml/2021.jeptalnrecital.xml @@ -76,7 +76,7 @@ Plongements Interprétables pour la Détection de Biais Cachés (Interpretable Embeddings for Hidden Biases Detection) TomBourgeade PhilippeMuller - TimVan de Cruys + TimVan de Cruys 64–80 De nombreuses tâches sémantiques en TAL font usage de données collectées de manière semiautomatique, ce qui est souvent source d’artefacts indésirables qui peuvent affecter négativement les modèles entraînés sur celles-ci. Avec l’évolution plus récente vers des modèles à usage générique pré-entraînés plus complexes, et moins interprétables, ces biais peuvent conduire à l’intégration de corrélations indésirables dans des applications utilisateurs. Récemment, quelques méthodes ont été proposées pour entraîner des plongements de mots avec une meilleure interprétabilité. Nous proposons une méthode simple qui exploite ces représentations pour détecter de manière préventive des corrélations lexicales faciles à apprendre, dans divers jeux de données. Nous évaluons à cette fin quelques modèles de plongements interprétables populaires pour l’anglais, en utilisant à la fois une évaluation intrinsèque, et un ensemble de tâches sémantiques en aval, et nous utilisons la qualité interprétable des plongements afin de diagnostiquer des biais potentiels dans les jeux de données associés. 2021.jeptalnrecital-taln.6 @@ -106,7 +106,7 @@ Analyse en dépendances du français avec des plongements contextualisés (<fixed-case>F</fixed-case>rench dependency parsing with contextualized embeddings) LoïcGrobol - BenoitCrabbé + BenoitCrabbé 106–114 Cet article présente un analyseur syntaxique en dépendances pour le français qui se compare favorablement à l’état de l’art sur la plupart des corpus de référence. L’analyseur s’appuie sur de riches représentations lexicales issues notamment de BERT et de FASTTEXT. On remarque que les représentations lexicales produites par FLAUBERT ont un caractère auto-suffisant pour réaliser la tâche d’analyse syntaxique de manière optimale. 2021.jeptalnrecital-taln.9 @@ -116,7 +116,7 @@ Caractérisation des relations sémantiques entre termes multi-mots fondée sur l’analogie (Semantic relations recognition between multi-word terms by means of analogy ) YizheWang - BéatriceDaille + BéatriceDaille NabilHathout 115–124 La terminologie d’un domaine rend compte de la structure du domaine grâce aux relations entre ses termes. Dans cet article, nous nous intéressons à la caractérisation des relations terminologiques qui existent entre termes multi-mots (MWT) dans les espaces vectoriels distributionnels. Nous avons constitué un jeu de données composé de MWT en français du domaine de l’environnement, reliés par des relations sémantiques lexicales. Nous présentons une expérience dans laquelle ces relations sémantiques entre MWT sont caractérisées au moyen de l’analogie. Les résultats obtenus permettent d’envisager un processus automatique pour aider à la structuration des terminologies. @@ -136,11 +136,11 @@ Contribution d’informations syntaxiques aux capacités de généralisation compositionelle des modèles seq2seq convolutifs (Assessing the Contribution of Syntactic Information for Compositional Generalization of seq2seq Convolutional Networks) - Diana NicoletaPopa + Diana NicoletaPopa William N.Havard MaximinCoavoux - EricGaussier - LaurentBesacier + EricGaussier + LaurentBesacier 134–141 Les modèles neuronaux de type seq2seq manifestent d’étonnantes capacités de prédiction quand ils sont entraînés sur des données de taille suffisante. Cependant, ils échouent à généraliser de manière satisfaisante quand la tâche implique d’apprendre et de réutiliser des règles systématiques de composition et non d’apprendre simplement par imitation des exemples d’entraînement. Le jeu de données SCAN, constitué d’un ensemble de commandes en langage naturel associées à des séquences d’action, a été spécifiquement conçu pour évaluer les capacités des réseaux de neurones à apprendre ce type de généralisation compositionnelle. Dans cet article, nous nous proposons d’étudier la contribution d’informations syntaxiques sur les capacités de généralisation compositionnelle des réseaux de neurones seq2seq convolutifs. 2021.jeptalnrecital-taln.12 @@ -153,7 +153,7 @@ VojtechHudecek DanielStancl OndrejDusek - PatrickParoubek + PatrickParoubek 142–152 Définition et détection des incohérences du système dans les dialogues orientés tâche. Nous présentons des expériences sur la détection automatique des comportements incohérents des systèmes de dialogues orientés tâche à partir du contexte. Nous enrichissons les données bAbI/DSTC2 (Bordes et al., 2017) avec une annotation automatique des incohérences de dialogue, et nous démontrons que les incohérences sont en corrélation avec les dialogues ratés. Nous supposons que l’utilisation d’un historique de dialogue limité et la prédiction du prochain tour de l’utilisateur peuvent améliorer la classification des incohérences. Si les deux hypothèses sont confirmées pour un modèle de dialogue basé sur les réseaux de mémoire, elles ne le sont pas pour un entraînement basé sur le modèle de langage GPT-2, qui bénéficie le plus de l’utilisation de l’historique complet du dialogue et obtient un score de précision de 0,99. 2021.jeptalnrecital-taln.13 @@ -164,7 +164,7 @@ Évaluation de méthodes et d’outils pour la lemmatisation automatique du français médiéval (Evaluation of methods and tools for automatic lemmatization in <fixed-case>O</fixed-case>ld <fixed-case>F</fixed-case>rench) CristinaHolgado AlexeiLavrentiev - MathieuConstant + MathieuConstant 153–161 Pour les langues historiques non stabilisées comme le français médiéval, la lemmatisation automatique présente toujours des défis, car cette langue connaît une forte variation graphique. Dans cet article, nous dressons un état des lieux de la lemmatisation automatique pour cette langue en comparant les performances de quatre lemmatiseurs existants sur un même jeu de données. L’objectif est d’évaluer où se situent les nouvelles techniques de l’apprentissage automatique par rapport aux techniques plus traditionnelles s’appuyant sur des systèmes de règles et lexiques, en particulier pour la prédiction des mots inconnus. 2021.jeptalnrecital-taln.14 @@ -187,7 +187,7 @@ Formalisation de la relation entre les verbes imperfectifs et perfectifs en ukrainien (In the <fixed-case>S</fixed-case>lavic linguistic tradition, perfective and imperfective forms of verbs are traditionally entered independently in dictionaries) OlenaSaint-Joanis - MaxSilberztein + MaxSilberztein 171–178 Dans la tradition linguistique slave, les formes perfectives et imperfectives des verbes sont traditionnellement inscrites séparément dans les dictionnaires. Cependant, il existe de forts liens morphologiques et sémantiques entre les deux formes verbales. Nous présentons une formalisation qui nous a permis de lier les deux formes. Nous avons construit un dictionnaire électronique qui contient plus de 13 000 entrées verbales associées à plus de 300 paradigmes morphologiques, qui peut être utilisé pour automatiquement lemmatiser les formes verbales dans les textes ukrainiens et relier les formes perfectives et imperfectives. 2021.jeptalnrecital-taln.16 @@ -196,7 +196,7 @@ Intérêt des modèles de caractères pour la détection d’événements (The interest of character-level models for event detection) - EmanuelaBoros + EmanuelaBoros RomaricBesançon OlivierFerret BrigitteGrau @@ -254,8 +254,8 @@ Stratégie Multitâche pour la Classification Multiclasse (A Multitask Strategy for Multiclass Classification) HoussamAkhmouch HamzaBouanani - GaëlDias - Jose G.Moreno + GaëlDias + Jose G.Moreno 227–236 Nous proposons une idée originale pour exploiter les relations entre les classes dans les problèmes multiclasses. Nous définissons deux architectures multitâches de type one-vs-rest qui combinent des ensembles de classifieurs appris dans une configuration multitâche en utilisant des réseaux de neurones. Les expériences menées sur six jeux de données pour la classification des sentiments, des émotions, des thématiques et des relations lexico-sémantiques montrent que nos architectures améliorent constamment les performances par rapport aux stratégies de l’état de l’art de type one-vsrest et concurrencent fortement les autres stratégies multiclasses. 2021.jeptalnrecital-taln.22 @@ -301,7 +301,7 @@ _ _ (French) We introduce a French adaptation from the well-known GPT model) AntoineSimoulin - BenoitCrabbé + BenoitCrabbé 246–255 Nous proposons une adaptation en français du fameux modèle Generative Pre-trained Transformer (GPT). Ce dernier appartient à la catégorie des architectures transformers qui ont significativement transformé les méthodes de traitement automatique du langage. Ces architectures sont en particulier pré-entraînées sur des tâches auto-supervisées et sont ainsi spécifiques pour une langue donnée. Si certaines sont disponibles en français, la plupart se déclinent avant tout en anglais. GPT est particulièrement efficace pour les tâches de génération de texte. Par ailleurs, il est possible de l’appliquer à de nombreux cas d’usages. Ses propriétés génératives singulières permettent de l’utiliser dans des conditions originales comme l’apprentissage sans exemple qui ne suppose aucune mise à jour des poids du modèle, ou modification de l’architecture. 2021.jeptalnrecital-taln.24 @@ -311,7 +311,7 @@ _ (French) We introduce a Fren Une étude des avis en ligne : généralisabilité d’un modèle d’évaluation (A Study of Online Reviews : Generalizability of the Evaluation Model) HyunJung Kang - IrisEshkol-Taravella + IrisEshkol-Taravella 256–263 Ce travail se situe dans la continuité de nos travaux antérieurs proposant le modèle d’évaluation portant sur des avis en ligne sur des restaurants. Le modèle est composé de quatre catégories : l’opinion (positive, négative, mixte), la suggestion, l’intention et la description. Cet article vise à tester la généralisabilité du modèle en l’appliquant sur deux corpus supplémentaires : un corpus relevant d’un autre domaine (celui de l’hôtellerie) et un corpus écrit dans une autre langue (le coréen). Nous avons présenté l’annotation manuelle et la détection automatique de ces catégories en nous appuyant sur différents modèles de l’apprentissage de surface (SVM) et l’apprentissage profond (LSTM). 2021.jeptalnrecital-taln.25 @@ -526,7 +526,7 @@ _ (French) We introduce a Fren Corpus <fixed-case>EN</fixed-case>-Istex : un corpus d’articles scientifiques annoté manuellement en entités nommées (<fixed-case>ISTEX</fixed-case>-<fixed-case>EN</fixed-case> Corpus: a scientific paper corpus manually annotated in named entities) EnzaMorale DenisMaurel - JeanneVillaneau + JeanneVillaneau Jean-YvesAntoine 6–7 Nous présentons ici une nouvelle ressource libre : le corpus EN-ISTEX, un corpus de deux cents articles scientifiques annotés manuellement en entités nommées. Ces articles ont été extraits des deux éditeurs scientifiques les plus importants de la plateforme ISTEX. Tous les domaines sont concernés, même si les sciences dites dures, en particulier les sciences du vivant et de la santé, sont prépondérantes. Parmi ceux-ci vingt articles ont été multi-annotés afin de vérifier l’adéquation du guide d’annotation et la fiabilité de l’annotation. L’accord inter annotateurs sur ces vingt textes s’élève à 91 %. @@ -599,7 +599,7 @@ _ (French) We introduce a Fren Classification multi-label de cas cliniques avec <fixed-case>C</fixed-case>amem<fixed-case>BERT</fixed-case> (Multi-label classification of clinical cases with <fixed-case>C</fixed-case>amem<fixed-case>BERT</fixed-case> ) AlexandreBailly CorentinBlanc - ThierryGuillotin + ThierryGuillotin 14–20 La quantité de documents textuels médicaux allant grandissant, la nécessité d’en extraire automatiquement des informations concernant des patients devient de plus en plus grande. La prédiction du profil clinique permet de gagner du temps pour le praticien tout en extrayant l’essentiel de l’information concernant un patient. Avec l’explosion du nombre de documents (médicaux ou non), des modèles pré-entraînés tels que BERT pour l’anglais ou CamemBERT pour le français ont émergé. L’utilisation de ces modèles permet d’encoder contextuellement du texte afin de l’utiliser dans des réseaux neuronaux pour notamment prédire des profils cliniques. Cet article vise à comparer différentes méthodes de prédiction de profil clinique en se basant sur l’utilisation de CamemBERT. Dans un premier temps, uniquement du texte provenant de documents médicaux a été utilisé. Dans un second temps, des entités nommées ont été injectées en plus du texte par concaténation ou par sommation pondérée. Les résultats ont montré un succès limité et dépendant de la prévalence des chapitres à prédire dans le corpus ainsi qu’une dégradation des performances lors de l’ajout des entités nommées. 2021.jeptalnrecital-deft.2 diff --git a/data/xml/2021.konvens.xml b/data/xml/2021.konvens.xml index 9f5f3e9380..d9cfe7e36c 100644 --- a/data/xml/2021.konvens.xml +++ b/data/xml/2021.konvens.xml @@ -6,7 +6,7 @@ KilianEvang LauraKallmeyer RainerOsswald - JakubWaszczuk + JakubWaszczuk TorstenZesch KONVENS 2021 Organizers
Düsseldorf, Germany
@@ -51,7 +51,7 @@ SanaMoin AnirbanBhowmick Seid MuhieYimam - ChrisBiemann + ChrisBiemann 37–48 2021.konvens-1.4 von-boguszewski-etal-2021-hateful @@ -103,7 +103,7 @@ Extraction and Normalization of Vague Time Expressions in <fixed-case>G</fixed-case>erman UlrikeMay KarolinaZaczynska - JuliánMoreno-Schneider + JuliánMoreno-Schneider GeorgRehm 114–126 2021.konvens-1.10 @@ -154,7 +154,7 @@ Neural End-to-end Coreference Resolution for <fixed-case>G</fixed-case>erman in Different Domains FynnSchröder Hans OleHatzel - ChrisBiemann + ChrisBiemann 170–181 2021.konvens-1.15 schroder-etal-2021-neural @@ -193,7 +193,7 @@
<fixed-case>D</fixed-case>e<fixed-case>I</fixed-case>n<fixed-case>S</fixed-case>tance: Creating and Evaluating a <fixed-case>G</fixed-case>erman Corpus for Fine-Grained Inferred Stance Detection - AnneGöhring + AnneGöhring ManfredKlenner SophiaConrad 213–217 @@ -231,9 +231,9 @@ <fixed-case>W</fixed-case>ord<fixed-case>G</fixed-case>uess: Using Associations for Guessing, Learning and Exploring Related Words CennetOguz - AndréBlessing + AndréBlessing JonasKuhn - SabineSchulte Im Walde + SabineSchulte Im Walde 235–241 2021.konvens-1.24 oguz-etal-2021-wordguess @@ -242,7 +242,7 @@ Towards a balanced annotated <fixed-case>L</fixed-case>ow <fixed-case>S</fixed-case>axon dataset for diachronic investigation of dialectal variation JanineSiewert YvesScherrer - JörgTiedemann + JörgTiedemann 242–246 2021.konvens-1.25 siewert-etal-2021-towards diff --git a/data/xml/2021.lantern.xml b/data/xml/2021.lantern.xml index 2459a75442..c5d860c956 100644 --- a/data/xml/2021.lantern.xml +++ b/data/xml/2021.lantern.xml @@ -8,7 +8,7 @@ SandroPezzelle AdityaMogadala DietrichKlakow - Marie-FrancineMoens + Marie-FrancineMoens ZeynepAkata Association for Computational Linguistics
Kyiv, Ukraine
@@ -65,7 +65,7 @@ What Did This Castle Look like before? Exploring Referential Relations in Naturally Occurring Multimodal Texts RonjaUtescher - SinaZarrieß + SinaZarrieß 53–60 Multi-modal texts are abundant and diverse in structure, yet Language & Vision research of these naturally occurring texts has mostly focused on genres that are comparatively light on text, like tweets. In this paper, we discuss the challenges and potential benefits of a L&V framework that explicitly models referential relations, taking Wikipedia articles about buildings as an example. We briefly survey existing related tasks in L&V and propose multi-modal information extraction as a general direction for future research. 2021.lantern-1.5 diff --git a/data/xml/2021.latechclfl.xml b/data/xml/2021.latechclfl.xml index 5aafbcec46..b007a33faa 100644 --- a/data/xml/2021.latechclfl.xml +++ b/data/xml/2021.latechclfl.xml @@ -6,7 +6,7 @@ StefaniaDegaetano-Ortlieb AnnaKazantseva NilsReiter - StanSzpakowicz + StanSzpakowicz Association for Computational Linguistics
Punta Cana, Dominican Republic (online)
November @@ -53,7 +53,7 @@
Quantifying Contextual Aspects of Inter-annotator Agreement in Intertextuality Research - EnriqueManjavacas Arevalo + EnriqueManjavacas Arevalo LaurenceMellerin MikeKestemont 31–42 @@ -132,7 +132,7 @@ Unsupervised Adverbial Identification in <fixed-case>M</fixed-case>odern <fixed-case>C</fixed-case>hinese Literature WenxiuXie - JohnLee + JohnLee FangqiongZhan XiaoHan Chi-YinChow @@ -159,7 +159,7 @@ Translationese in <fixed-case>R</fixed-case>ussian Literary Texts MariaKunilovskaya EkaterinaLapshinova-Koltunski - RuslanMitkov + RuslanMitkov 101–112 The paper reports the results of a translationese study of literary texts based on translated and non-translated Russian. We aim to find out if translations deviate from non-translated literary texts, and if the established differences can be attributed to typological relations between source and target languages. We expect that literary translations from typologically distant languages should exhibit more translationese, and the fingerprints of individual source languages (and their families) are traceable in translations. We explore linguistic properties that distinguish non-translated Russian literature from translations into Russian. Our results show that non-translated fiction is different from translations to the degree that these two language varieties can be automatically classified. As expected, language typology is reflected in translations of literary texts. We identified features that point to linguistic specificity of Russian non-translated literature and to shining-through effects. Some of translationese features cut across all language pairs, while others are characteristic of literary translations from languages belonging to specific language families. 2021.latechclfl-1.12 @@ -205,7 +205,7 @@ Zero-Shot Information Extraction to Enhance a Knowledge Graph Describing Silk Textiles ThomasSchleider - RaphaelTroncy + RaphaelTroncy 138–146 The knowledge of the European silk textile production is a typical case for which the information collected is heterogeneous, spread across many museums and sparse since rarely complete. Knowledge Graphs for this cultural heritage domain, when being developed with appropriate ontologies and vocabularies, enable to integrate and reconcile this diverse information. However, many of these original museum records still have some metadata gaps. In this paper, we present a zero-shot learning approach that leverages the ConceptNet common sense knowledge graph to predict categorical metadata informing about the silk objects production. We compared the performance of our approach with traditional supervised deep learning-based methods that do require training data. We demonstrate promising and competitive performance for similar datasets and circumstances and the ability to predict sometimes more fine-grained information. Our results can be reproduced using the code and datasets published at https://github.com/silknow/ZSL-KG-silk. 2021.latechclfl-1.16 @@ -239,7 +239,7 @@ Period Classification in <fixed-case>C</fixed-case>hinese Historical Texts ZuoyuTian - SandraKübler + SandraKübler 168–177 In this study, we study language change in Chinese Biji by using a classification task: classifying Ancient Chinese texts by time periods. Specifically, we focus on a unique genre in classical Chinese literature: Biji (literally “notebook” or “brush notes”), i.e., collections of anecdotes, quotations, etc., anything authors consider noteworthy, Biji span hundreds of years across many dynasties and conserve informal language in written form. For these reasons, they are regarded as a good resource for investigating language change in Chinese (Fang, 2010). In this paper, we create a new dataset of 108 Biji across four dynasties. Based on the dataset, we first introduce a time period classification task for Chinese. Then we investigate different feature representation methods for classification. The results show that models using contextualized embeddings perform best. An analysis of the top features chosen by the word n-gram model (after bleaching proper nouns) confirms that these features are informative and correspond to observations and assumptions made by historical linguists. 2021.latechclfl-1.19 @@ -248,7 +248,7 @@ A Mixed-Methods Analysis of Western and <fixed-case>H</fixed-case>ong <fixed-case>K</fixed-case>ong–based Reporting on the 2019–2020 Protests - Arya D.McCarthy + Arya D.McCarthy JamesScharf Giovanna Maria DoraDore 178–188 diff --git a/data/xml/2021.law.xml b/data/xml/2021.law.xml index 0bceb7bed5..54e05ab699 100644 --- a/data/xml/2021.law.xml +++ b/data/xml/2021.law.xml @@ -3,7 +3,7 @@ Proceedings of the Joint 15th Linguistic Annotation Workshop (LAW) and 3rd Designing Meaning Representations (DMR) Workshop - ClaireBonial + ClaireBonial NianwenXue Association for Computational Linguistics
Punta Cana, Dominican Republic
@@ -55,7 +55,7 @@ <fixed-case>A</fixed-case>uto<fixed-case>A</fixed-case>spect: Automatic Annotation of Tense and Aspect for Uniform Meaning Representations DanielChen - MarthaPalmer + MarthaPalmer MeaganVigus 36–45 We present AutoAspect, a novel, rule-based annotation tool for labeling tense and aspect. The pilot version annotates English data. The aspect labels are designed specifically for Uniform Meaning Representations (UMR), an annotation schema that aims to encode crosslingual semantic information. The annotation tool combines syntactic and semantic cues to assign aspects on a sentence-by-sentence basis, following a sequence of rules that each output a UMR aspect. Identified events proceed through the sequence until they are assigned an aspect. We achieve a recall of 76.17% for identifying UMR events and an accuracy of 62.57% on all identified events, with high precision values for 2 of the aspect labels. @@ -106,7 +106,7 @@ Sister Help: Data Augmentation for Frame-Semantic Role Labeling AyushPancholy - Miriam R LPetruck + Miriam R LPetruck SwabhaSwayamdipta 78–84 While FrameNet is widely regarded as a rich resource of semantics in natural language processing, a major criticism concerns its lack of coverage and the relative paucity of its labeled data compared to other commonly used lexical resources such as PropBank and VerbNet. This paper reports on a pilot study to address these gaps. We propose a data augmentation approach, which uses existing frame-specific annotation to automatically annotate other lexical units of the same frame which are unannotated. Our rule-based approach defines the notion of a **sister lexical unit** and generates frame-specific augmented data for training. We present experiments on frame-semantic role labeling which demonstrate the importance of this data augmentation: we obtain a large improvement to prior results on frame identification and argument identification for FrameNet, utilizing both full-text and lexicographic annotations under FrameNet. Our findings on data augmentation highlight the value of automatic resource creation for improved models in frame-semantic parsing. @@ -154,8 +154,8 @@ TalhaBedir KarahanŞahin OnurGungor - SuzanUskudarli - ArzucanÖzgür + SuzanUskudarli + ArzucanÖzgür TungaGüngör BalkizOzturk Basaran 112–122 @@ -167,7 +167,7 @@ Automatic Entity State Annotation using the <fixed-case>V</fixed-case>erb<fixed-case>N</fixed-case>et Semantic Parser GhazalehKazeminejad - MarthaPalmer + MarthaPalmer TaoLi VivekSrikumar 123–132 @@ -189,7 +189,7 @@ Increasing Sentence-Level Comprehension Through Text Classification of Epistemic Functions - MariaBerger + MariaBerger ElizabethGoldstein 139–150 Word embeddings capture semantic meaning of individual words. How to bridge word-level linguistic knowledge with sentence-level language representation is an open problem. This paper examines whether sentence-level representations can be achieved by building a custom sentence database focusing on one aspect of a sentence’s meaning. Our three separate semantic aspects are whether the sentence: (1) communicates a causal relationship, (2) indicates that two things are correlated with each other, and (3) expresses information or knowledge. The three classifiers provide epistemic information about a sentence’s content. diff --git a/data/xml/2021.lchange.xml b/data/xml/2021.lchange.xml index c9bb1fedd2..c101240d0a 100644 --- a/data/xml/2021.lchange.xml +++ b/data/xml/2021.lchange.xml @@ -114,7 +114,7 @@ Ana SabinaUban Alina MariaCristea AncaDinu - Liviu P.Dinu + Liviu P.Dinu SimonaGeorgescu LaurentiuZoicas 64–74 diff --git a/data/xml/2021.louhi.xml b/data/xml/2021.louhi.xml index c58e1f5e7c..d3b5747ddf 100644 --- a/data/xml/2021.louhi.xml +++ b/data/xml/2021.louhi.xml @@ -4,10 +4,10 @@ Proceedings of the 12th International Workshop on Health Text Mining and Information Analysis EbenHolderness - AntonioJimeno Yepes - AlbertoLavelli + AntonioJimeno Yepes + AlbertoLavelli Anne-LyseMinard - JamesPustejovsky + JamesPustejovsky FabioRinaldi Association for Computational Linguistics
online
@@ -31,7 +31,7 @@ Multilingual Negation Scope Resolution for Clinical Text MareikeHartmann - AndersSøgaard + AndersSøgaard 7–18 Negation scope resolution is key to high-quality information extraction from clinical texts, but so far, efforts to make encoders used for information extraction negation-aware have been limited to English. We present a universal approach to multilingual negation scope resolution, that overcomes the lack of training data by relying on disparate resources in different languages and domains. We evaluate two approaches to learn from these resources, training on combined data and training in a multi-task learning setup. Our experiments show that zero-shot scope resolution in clinical text is possible, and that combining available resources improves performance in most cases. 2021.louhi-1.2 @@ -44,7 +44,7 @@ RamieFathy BarringtonBennett DanielStokes - Sharath ChandraGuntuku + Sharath ChandraGuntuku 19–27 In online forums focused on health and wellbeing, individuals tend to seek and give the following social support: emotional and informational support. Understanding the expressions of these social supports in an online COVID- 19 forum is important for: (a) the forum and its members to provide the right type of support to individuals and (b) determining the long term effects of the COVID-19 pandemic on the well-being of the public, thereby informing interventions. In this work, we build four machine learning models to measure the extent of the following social supports expressed in each post in a COVID-19 online forum: (a) emotional support given (b) emotional support sought (c) informational support given, and (d) informational support sought. Using these models, we aim to: (i) determine if there is a correlation between the different social supports expressed in posts e.g. when members of the forum give emotional support in posts, do they also tend to give or seek informational support in the same post? (ii) determine how these social supports sought and given changes over time in published posts. We find that (i) there is a positive correlation between the informational support given in posts and the emotional support given and emotional support sought, respectively, in these posts and (ii) over time, users tended to seek more emotional support and give less emotional support. 2021.louhi-1.3 @@ -72,8 +72,8 @@ Integrating Higher-Level Semantics into Robust Biomedical Name Representations PieterFivez - SimonSuster - WalterDaelemans + SimonSuster + WalterDaelemans 49–58 Neural encoders of biomedical names are typically considered robust if representations can be effectively exploited for various downstream NLP tasks. To achieve this, encoders need to model domain-specific biomedical semantics while rivaling the universal applicability of pretrained self-supervised representations. Previous work on robust representations has focused on learning low-level distinctions between names of fine-grained biomedical concepts. These fine-grained concepts can also be clustered together to reflect higher-level, more general semantic distinctions, such as grouping the names nettle sting and tick-borne fever together under the description puncture wound of skin. It has not yet been empirically confirmed that training biomedical name encoders on fine-grained distinctions automatically leads to bottom-up encoding of such higher-level semantics. In this paper, we show that this bottom-up effect exists, but that it is still relatively limited. As a solution, we propose a scalable multi-task training regime for biomedical name encoders which can also learn robust representations using only higher-level semantic classes. These representations can generalise both bottom-up as well as top-down among various semantic hierarchies. Moreover, we show how they can be used out-of-the-box for improved unsupervised detection of hypernyms, while retaining robust performance on various semantic relatedness benchmarks. 2021.louhi-1.6 diff --git a/data/xml/2021.ltedi.xml b/data/xml/2021.ltedi.xml index 53102e16c8..7832057ac4 100644 --- a/data/xml/2021.ltedi.xml +++ b/data/xml/2021.ltedi.xml @@ -4,7 +4,7 @@ Proceedings of the First Workshop on Language Technology for Equality, Diversity and Inclusion Bharathi RajaChakravarthi - John P.McCrae + John P.McCrae ManelZarrouk KalikaBali PaulBuitelaar @@ -39,7 +39,7 @@ Cross-Lingual Transfer Learning for Hate Speech Detection IrinaBigoulaeva ViktorHangya - AlexanderFraser + AlexanderFraser 15–25 We address the task of automatic hate speech detection for low-resource languages. Rather than collecting and annotating new hate speech data, we show how to use cross-lingual transfer learning to leverage already existing data from higher-resource languages. Using bilingual word embeddings based classifiers we achieve good performance on the target language by training only on the source dataset. Using our transferred system we bootstrap on unlabeled target language data, improving the performance of standard cross-lingual transfer approaches. We use English as a high resource language and German as the target language for which only a small amount of annotated corpora are available. Our results indicate that cross-lingual transfer learning together with our approach to leverage additional unlabeled data is an effective way of achieving good performance on low-resource target languages without the need for any target-language annotations. 2021.ltedi-1.3 @@ -74,7 +74,7 @@ <fixed-case>GEPSA</fixed-case>, a tool for monitoring social challenges in digital press IñakiSan Vicente - XabierSaralegi + XabierSaralegi NereaZubia 46–50 This papers presents a platform for monitoring press narratives with respect to several social challenges, including gender equality, migrations and minority languages. As narratives are encoded in natural language, we have to use natural processing techniques to automate their analysis. Thus, crawled news are processed by means of several NLP modules, including named entity recognition, keyword extraction,document classification for social challenge detection, and sentiment analysis. A Flask powered interface provides data visualization for a user-based analysis of the data. This paper presents the architecture of the system and describes in detail its different components. Evaluation is provided for the modules related to extraction and classification of information regarding social challenges. @@ -313,7 +313,7 @@ <fixed-case>CFILT</fixed-case> <fixed-case>IIT</fixed-case> <fixed-case>B</fixed-case>ombay@<fixed-case>LT</fixed-case>-<fixed-case>EDI</fixed-case>-<fixed-case>EACL</fixed-case>2021: Hope Speech Detection for Equality, Diversity, and Inclusion using Multilingual Representation from<fixed-case>T</fixed-case>ransformers PankajSingh PrinceKumar - PushpakBhattacharyya + PushpakBhattacharyya 193–196 With the internet becoming part and parcel of our lives, engagement in social media has increased a lot. Identifying and eliminating offensive content from social media has become of utmost priority to prevent any kind of violence. However, detecting encouraging, supportive and positive content is equally important to prevent misuse of censorship targeted to attack freedom of speech. This paper presents our system for the shared task Hope Speech Detection for Equality, Diversity, and Inclusion at LT-EDI, EACL 2021. The data for this shared task is provided in English, Tamil, and Malayalam which was collected from YouTube comments. It is a multiclass classification problem where each data instance is categorized into one of the three classes: ‘Hope speech’, ‘Not hope speech’, and ‘Not in intended language’. We propose a system that employs multilingual transformer models to obtain the representation of text and classifies it into one of the three classes. We explored the use of multilingual models trained specifically for Indian languages along with generic multilingual models. Our system was ranked 2nd for English, 2nd for Malayalam, and 7th for the Tamil language in the final leader board published by organizers and obtained a weighted F1-score of 0.92, 0.84, 0.55 respectively on the hidden test dataset used for the competition. We have made our system publicly available at GitHub. 2021.ltedi-1.29 diff --git a/data/xml/2021.maiworkshop.xml b/data/xml/2021.maiworkshop.xml index 657fea8104..5099f1876a 100644 --- a/data/xml/2021.maiworkshop.xml +++ b/data/xml/2021.maiworkshop.xml @@ -26,7 +26,7 @@ Multimodal Weighted Fusion of Transformers for Movie Genre Classification IsaacRodríguez Bribiesca Adrián PastorLópez Monroy - ManuelMontes-y-Gómez + ManuelMontes-y-Gómez 1–5 The Multimodal Transformer showed to be a competitive model for multimodal tasks involving textual, visual and audio signals. However, as more modalities are involved, its late fusion by concatenation starts to have a negative impact on the model’s performance. Besides, interpreting model’s predictions becomes difficult, as one would have to look at the different attention activation matrices. In order to overcome these shortcomings, we propose to perform late fusion by adding a GMU module, which effectively allows the model to weight modalities at instance level, improving its performance while providing a better interpretabilty mechanism. In the experiments, we compare our proposed model (MulT-GMU) against the original implementation (MulT-Concat) and a SOTA model tested in a movie genre classification dataset. Our approach, MulT-GMU, outperforms both, MulT-Concat and previous SOTA model. 2021.maiworkshop-1.1 @@ -137,7 +137,7 @@ Li ErranLi ZhitingHu YiXu - DilekHakkani-Tur + DilekHakkani-Tur ZhengDu BelindaZeng 74–78 diff --git a/data/xml/2021.metanlp.xml b/data/xml/2021.metanlp.xml index 91aae5c028..57c2bd7f8b 100644 --- a/data/xml/2021.metanlp.xml +++ b/data/xml/2021.metanlp.xml @@ -10,7 +10,7 @@ MandyKorpusik ShuyanDong Ngoc ThangVu - DilekHakkani-Tur + DilekHakkani-Tur Association for Computational Linguistics
Online
August @@ -50,7 +50,7 @@ Zero-Shot Compositional Concept Learning GuangyueXu ParisaKordjamshidi - JoyceChai + JoyceChai 19–27 In this paper, we study the problem of recognizing compositional attribute-object concepts within the zero-shot learning (ZSL) framework. We propose an episode-based cross-attention (EpiCA) network which combines merits of cross-attention mechanism and episode-based training strategy to recognize novel compositional concepts. Firstly, EpiCA bases on cross-attention to correlate conceptvisual information and utilizes the gated pooling layer to build contextualized representations for both images and concepts. The updated representations are used for a more indepth multi-modal relevance calculation for concept recognition. Secondly, a two-phase episode training strategy, especially the ransductive phase, is adopted to utilize unlabeled test examples to alleviate the low-resource learning problem. Experiments on two widelyused zero-shot compositional learning (ZSCL) benchmarks have demonstrated the effectiveness of the model compared with recent approaches on both conventional and generalized ZSCL settings. 2021.metanlp-1.3 @@ -71,8 +71,8 @@ On the cross-lingual transferability of multilingual prototypical models across <fixed-case>NLU</fixed-case> tasks OralieCattan - SophieRosset - ChristopheServan + SophieRosset + ChristopheServan 36–43 Supervised deep learning-based approaches have been applied to task-oriented dialog and have proven to be effective for limited domain and language applications when a sufficient number of training examples are available. In practice, these approaches suffer from the drawbacks of domain-driven design and under-resourced languages. Domain and language models are supposed to grow and change as the problem space evolves. On one hand, research on transfer learning has demonstrated the cross-lingual ability of multilingual Transformers-based models to learn semantically rich representations. On the other, in addition to the above approaches, meta-learning have enabled the development of task and language learning algorithms capable of far generalization. Through this context, this article proposes to investigate the cross-lingual transferability of using synergistically few-shot learning with prototypical neural networks and multilingual Transformers-based models. Experiments in natural language understanding tasks on MultiATIS++ corpus shows that our approach substantially improves the observed transfer learning performances between the low and the high resource languages. More generally our approach confirms that the meaningful latent space learned in a given language can be can be generalized to unseen and under-resourced ones using meta-learning. 2021.metanlp-1.5 @@ -104,7 +104,7 @@ Semi-supervised Meta-learning for Cross-domain Few-shot Intent Classification - YueLi + YueLi JiongZhang 67–75 Meta learning aims to optimize the model’s capability to generalize to new tasks and domains. Lacking a data-efficient way to create meta training tasks has prevented the application of meta-learning to the real-world few shot learning scenarios. Recent studies have proposed unsupervised approaches to create meta-training tasks from unlabeled data for free, e.g., the SMLMT method (Bansal et al., 2020a) constructs unsupervised multi-class classification tasks from the unlabeled text by randomly masking words in the sentence and let the meta learner choose which word to fill in the blank. This study proposes a semi-supervised meta-learning approach that incorporates both the representation power of large pre-trained language models and the generalization capability of prototypical networks enhanced by SMLMT. The semi-supervised meta training approach avoids overfitting prototypical networks on a small number of labeled training examples and quickly learns cross-domain task-specific representation only from a few supporting examples. By incorporating SMLMT with prototypical networks, the meta learner generalizes better to unseen domains and gains higher accuracy on out-of-scope examples without the heavy lifting of pre-training. We observe significant improvement in few-shot generalization after training only a few epochs on the intent classification tasks evaluated in a multi-domain setting. diff --git a/data/xml/2021.mmsr.xml b/data/xml/2021.mmsr.xml index d29b604ad9..73480a2e88 100644 --- a/data/xml/2021.mmsr.xml +++ b/data/xml/2021.mmsr.xml @@ -6,7 +6,7 @@ LuciaDonatelli NikhilKrishnaswamy KennethLai - JamesPustejovsky + JamesPustejovsky Association for Computational Linguistics
Groningen, Netherlands (Online)
June @@ -91,7 +91,7 @@
Incremental Unit Networks for Multimodal, Fine-grained Information State Representation - CaseyKennington + CaseyKennington DavidSchlangen 89–94 We offer a fine-grained information state annotation scheme that follows directly from the Incremental Unit abstract model of dialogue processing when used within a multimodal, co-located, interactive setting. We explain the Incremental Unit model and give an example application using the Localized Narratives dataset, then offer avenues for future research. diff --git a/data/xml/2021.mmtlrl.xml b/data/xml/2021.mmtlrl.xml index fca787cce4..32450165aa 100644 --- a/data/xml/2021.mmtlrl.xml +++ b/data/xml/2021.mmtlrl.xml @@ -3,10 +3,10 @@ Proceedings of the First Workshop on Multimodal Machine Translation for Low Resource Languages (MMTLRL 2021) - ThoudamDoren Singh + ThoudamDoren Singh CristinaEspaña i Bonet - SivajiBandyopadhyay - Josefvan Genabith + SivajiBandyopadhyay + Josefvan Genabith INCOMA Ltd.
Online (Virtual Mode)
September diff --git a/data/xml/2021.motra.xml b/data/xml/2021.motra.xml index 3eec414004..40ab980d60 100644 --- a/data/xml/2021.motra.xml +++ b/data/xml/2021.motra.xml @@ -6,7 +6,7 @@ YuriBizzoni ElkeTeich CristinaEspaña-Bonet - Josefvan Genabith + Josefvan Genabith Association for Computational Linguistics
online
May @@ -30,7 +30,7 @@
<fixed-case>H</fixed-case>ei<fixed-case>C</fixed-case>i<fixed-case>C</fixed-case>: A simultaneous interpreting corpus combining product and pre-process data - KerstinKunz + KerstinKunz ChristophStoll EvaKlüber 8–14 diff --git a/data/xml/2021.mrl.xml b/data/xml/2021.mrl.xml index 5e09688a67..02a7018f61 100644 --- a/data/xml/2021.mrl.xml +++ b/data/xml/2021.mrl.xml @@ -21,7 +21,7 @@ Language Models are Few-shot Multilingual Learners - Genta IndraWinata + Genta IndraWinata AndreaMadotto ZhaojiangLin RosanneLiu @@ -38,8 +38,8 @@ Learning Contextualised Cross-lingual Word Embeddings and Alignments for Extremely Low-Resource Languages Using Parallel Corpora TakashiWada TomoharuIwata - YujiMatsumoto - TimothyBaldwin + YujiMatsumoto + TimothyBaldwin Jey HanLau 16–31 We propose a new approach for learning contextualised cross-lingual word embeddings based on a small parallel corpus (e.g. a few hundred sentence pairs). Our method obtains word embeddings via an LSTM encoder-decoder model that simultaneously translates and reconstructs an input sentence. Through sharing model parameters among different languages, our model jointly trains the word embeddings in a common cross-lingual space. We also propose to combine word and subword embeddings to make use of orthographic similarities across different languages. We base our experiments on real-world data from endangered languages, namely Yongning Na, Shipibo-Konibo, and Griko. Our experiments on bilingual lexicon induction and word alignment tasks show that our model outperforms existing methods by a large margin for most language pairs. These results demonstrate that, contrary to common belief, an encoder-decoder translation model is beneficial for learning cross-lingual representations even in extremely low-resource conditions. Furthermore, our model also works well on high-resource conditions, achieving state-of-the-art performance on a German-English word-alignment task. @@ -51,7 +51,7 @@ Clustering Monolingual Vocabularies to Improve Cross-Lingual Generalization RiccardoBassani - AndersSøgaard + AndersSøgaard TejaswiniDeoskar 32–40 Multilingual language models exhibit better performance for some languages than for others (Singh et al., 2019), and many languages do not seem to benefit from multilingual sharing at all, presumably as a result of poor multilingual segmentation (Pyysal o et al., 2020). This work explores the idea of learning multilingual language models based on clustering of monolingual segments. We show significant improvements over standard multilingual segmentation and training across nine languages on a question answering task, both in a small model regime and for a model of the size of BERT-base. @@ -64,7 +64,7 @@ Do not neglect related languages: The case of low-resource <fixed-case>O</fixed-case>ccitan cross-lingual word embeddings LisaWoller ViktorHangya - AlexanderFraser + AlexanderFraser 41–50 Cross-lingual word embeddings (CLWEs) have proven indispensable for various natural language processing tasks, e.g., bilingual lexicon induction (BLI). However, the lack of data often impairs the quality of representations. Various approaches requiring only weak cross-lingual supervision were proposed, but current methods still fail to learn good CLWEs for languages with only a small monolingual corpus. We therefore claim that it is necessary to explore further datasets to improve CLWEs in low-resource setups. In this paper we propose to incorporate data of related high-resource languages. In contrast to previous approaches which leverage independently pre-trained embeddings of languages, we (i) train CLWEs for the low-resource and a related language jointly and (ii) map them to the target language to build the final multilingual space. In our experiments we focus on Occitan, a low-resource Romance language which is often neglected due to lack of resources. We leverage data from French, Spanish and Catalan for training and evaluate on the Occitan-English BLI task. By incorporating supporting languages our method outperforms previous approaches by a large margin. Furthermore, our analysis shows that the degree of relatedness between an incorporated language and the low-resource language is critically important. 2021.mrl-1.4 @@ -75,7 +75,7 @@ Specializing Multilingual Language Models: An Empirical Study Ethan C.Chau - Noah A.Smith + Noah A.Smith 51–61 Pretrained multilingual language models have become a common tool in transferring NLP capabilities to low-resource languages, often with adaptations. In this work, we study the performance, extensibility, and interaction of two such adaptations: vocabulary augmentation and script transliteration. Our evaluations on part-of-speech tagging, universal dependency parsing, and named entity recognition in nine diverse low-resource languages uphold the viability of these approaches while raising new questions around how to optimally adapt multilingual models to low-resource settings. 2021.mrl-1.5 @@ -135,7 +135,7 @@ On the Cross-lingual Transferability of Contextualized Sense Embeddings KiamehrRezaee DanielLoureiro - JoseCamacho-Collados + JoseCamacho-Collados Mohammad TaherPilehvar 107–115 In this paper we analyze the extent to which contextualized sense embeddings, i.e., sense embeddings that are computed based on contextualized word embeddings, are transferable across languages. To this end, we compiled a unified cross-lingual benchmark for Word Sense Disambiguation. We then propose two simple strategies to transfer sense-specific knowledge across languages and test them on the benchmark. Experimental results show that this contextualized knowledge can be effectively transferred to similar languages through pre-trained multilingual language models, to the extent that they can out-perform monolingual representations learnednfrom existing language-specific data. @@ -258,7 +258,7 @@ SoojinChung HyunsooWoo MinSong - Jinho D.Choi + Jinho D.Choi 224–237 This paper presents a English-Korean parallel dataset that collects 381K news articles where 1,400 of them, comprising 10K sentences, are manually labeled for crosslingual named entity recognition (NER). The annotation guidelines for the two languages are developed in parallel, that yield the inter-annotator agreement scores of 91 and 88% for English and Korean respectively, indicating sublime quality annotation in our dataset. Three types of crosslingual learning approaches, direct model transfer, embedding projection, and annotation projection, are used to develop zero-shot Korean NER models. Our best model gives the F1-score of 51% that is very encouraging, considering the extremely distinct natures of these two languages. This is pioneering work that explores zero-shot cross-lingual learning between English and Korean and provides rich parallel annotation for a core NLP task such as named entity recognition. 2021.mrl-1.19 diff --git a/data/xml/2021.mrqa.xml b/data/xml/2021.mrqa.xml index 74c2ba6b17..d49195dfa6 100644 --- a/data/xml/2021.mrqa.xml +++ b/data/xml/2021.mrqa.xml @@ -26,7 +26,7 @@ MaximeDe Bruyn EhsanLotfi JeskaBuhmann - WalterDaelemans + WalterDaelemans 1–13 In this paper, we present the first multilingual FAQ dataset publicly available. We collected around 6M FAQ pairs from the web, in 21 different languages. Although this is significantly larger than existing FAQ retrieval datasets, it comes with its own challenges: duplication of content and uneven distribution of topics. We adopt a similar setup as Dense Passage Retrieval (DPR) and test various bi-encoders on this dataset. Our experiments reveal that a multilingual model based on XLM-RoBERTa achieves the best results, except for English. Lower resources languages seem to learn from one another as a multilingual model achieves a higher MRR than language-specific ones. Our qualitative analysis reveals the brittleness of the model on simple word changes. We publicly release our dataset, model, and training script. 2021.mrqa-1.1 @@ -37,7 +37,7 @@ Rethinking the Objectives of Extractive Question Answering MartinFajcik JosefJon - PavelSmrz + PavelSmrz 14–27 This work demonstrates that using the objective with independence assumption for modelling the span probability P (a_s , a_e ) = P (a_s )P (a_e) of span starting at position a_s and ending at position a_e has adverse effects. Therefore we propose multiple approaches to modelling joint probability P (a_s , a_e) directly. Among those, we propose a compound objective, composed from the joint probability while still keeping the objective with independence assumption as an auxiliary objective. We find that the compound objective is consistently superior or equal to other assumptions in exact match. Additionally, we identified common errors caused by the assumption of independence and manually checked the counterpart predictions, demonstrating the impact of the compound objective on the real examples. Our findings are supported via experiments with three extractive QA models (BIDAF, BERT, ALBERT) over six datasets and our code, individual results and manual analysis are available online. 2021.mrqa-1.2 @@ -47,8 +47,8 @@ What Would it Take to get Biomedical <fixed-case>QA</fixed-case> Systems into Practice? GregoryKell - IainMarshall - ByronWallace + IainMarshall + ByronWallace AndreJaun 28–41 Medical question answering (QA) systems have the potential to answer clinicians’ uncertainties about treatment and diagnosis on-demand, informed by the latest evidence. However, despite the significant progress in general QA made by the NLP community, medical QA systems are still not widely used in clinical environments. One likely reason for this is that clinicians may not readily trust QA system outputs, in part because transparency, trustworthiness, and provenance have not been key considerations in the design of such models. In this paper we discuss a set of criteria that, if met, we argue would likely increase the utility of biomedical QA systems, which may in turn lead to adoption of such systems in practice. We assess existing models, tasks, and datasets with respect to these criteria, highlighting shortcomings of previously proposed approaches and pointing toward what might be more usable QA systems. @@ -83,7 +83,7 @@ Can Question Generation Debias Question Answering Models? A Case Study on Question–Context Lexical Overlap KazutoshiShinoda SakuSugawara - AkikoAizawa + AkikoAizawa 63–72 Question answering (QA) models for reading comprehension have been demonstrated to exploit unintended dataset biases such as question–context lexical overlap. This hinders QA models from generalizing to under-represented samples such as questions with low lexical overlap. Question generation (QG), a method for augmenting QA datasets, can be a solution for such performance degradation if QG can properly debias QA datasets. However, we discover that recent neural QG models are biased towards generating questions with high lexical overlap, which can amplify the dataset bias. Moreover, our analysis reveals that data augmentation with these QG models frequently impairs the performance on questions with low lexical overlap, while improving that on questions with high lexical overlap. To address this problem, we use a synonym replacement-based approach to augment questions with low lexical overlap. We demonstrate that the proposed data augmentation approach is simple yet effective to mitigate the degradation problem with only 70k synthetic examples. 2021.mrqa-1.6 @@ -93,8 +93,8 @@ What Can a Generative Language Model Answer About a Passage? DouglasSummers-Stay - ClaireBonial - ClareVoss + ClaireBonial + ClareVoss 73–81 Generative language models trained on large, diverse corpora can answer questions about a passage by generating the most likely continuation of the passage followed by a question/answer pair. However, accuracy rates vary depending on the type of question asked. In this paper we keep the passage fixed, and test with a wide variety of question types, exploring the strengths and weaknesses of the GPT-3 language model. We provide the passage and test questions as a challenge set for other language models. 2021.mrqa-1.7 diff --git a/data/xml/2021.mtsummit.xml b/data/xml/2021.mtsummit.xml index e413a58573..c065dd5994 100644 --- a/data/xml/2021.mtsummit.xml +++ b/data/xml/2021.mtsummit.xml @@ -8,7 +8,7 @@ August 2021 KevinDuh - FranciscoGuzmán + FranciscoGuzmán 2021.mtsummit-research mtsummit @@ -20,7 +20,7 @@ Learning Curricula for Multilingual Neural Machine Translation Training GauravKumar PhilippKoehn - SanjeevKhudanpur + SanjeevKhudanpur 1-9 2021.mtsummit-research.1 Low-resource Multilingual Neural Machine Translation (MNMT) is typically tasked with improving the translation performance on one or more language pairs with the aid of high-resource language pairs. In this paper and we propose two simple search based curricula – orderings of the multilingual training data – which help improve translation performance in conjunction with existing techniques such as fine-tuning. Additionally and we attempt to learn a curriculum for MNMT from scratch jointly with the training of the translation system using contextual multi-arm bandits. We show on the FLORES low-resource translation dataset that these learned curricula can provide better starting points for fine tuning and improve overall performance of the translation system. @@ -32,7 +32,7 @@ DhanvanthBoppana RejwanulHaque AsifEkbal - PushpakBhattacharyya + PushpakBhattacharyya 10-22 2021.mtsummit-research.2 Interactive-predictive translation is a collaborative iterative process and where human translators produce translations with the help of machine translation (MT) systems interactively. Various sampling techniques in active learning (AL) exist to update the neural MT (NMT) model in the interactive-predictive scenario. In this paper and we explore term based (named entity count (NEC)) and quality based (quality estimation (QE) and sentence similarity (Sim)) sampling techniques – which are used to find the ideal candidates from the incoming data – for human supervision and MT model’s weight updation. We carried out experiments with three language pairs and viz. German-English and Spanish-English and Hindi-English. Our proposed sampling technique yields 1.82 and 0.77 and 0.81 BLEU points improvements for German-English and Spanish-English and Hindi-English and respectively and over random sampling based baseline. It also improves the present state-of-the-art by 0.35 and 0.12 BLEU points for German-English and Spanish-English and respectively. Human editing effort in terms of number-of-words-changed also improves by 5 and 4 points for German-English and Spanish-English and respectively and compared to the state-of-the-art. @@ -42,7 +42,7 @@ Crosslingual Embeddings are Essential in <fixed-case>UNMT</fixed-case> for distant languages: An <fixed-case>E</fixed-case>nglish to <fixed-case>I</fixed-case>ndo<fixed-case>A</fixed-case>ryan Case Study TamaliBanerjee RudraV Murthy - PushpakBhattacharya + PushpakBhattacharya 23-34 2021.mtsummit-research.3 Recent advances in Unsupervised Neural Machine Translation (UNMT) has minimized the gap between supervised and unsupervised machine translation performance for closely related language-pairs. However and the situation is very different for distant language pairs. Lack of overlap in lexicon and low syntactic similarity such as between English and IndoAryan languages leads to poor translation quality in existing UNMT systems. In this paper and we show that initialising the embedding layer of UNMT models with cross-lingual embeddings leads to significant BLEU score improvements over existing UNMT models where the embedding layer weights are randomly initialized. Further and freezing the embedding layer weights leads to better gains compared to updating the embedding layer weights during training. We experimented using Masked Sequence to Sequence (MASS) and Denoising Autoencoder (DAE) UNMT approaches for three distant language pairs. The proposed cross-lingual embedding initialization yields BLEU score improvement of as much as ten times over the baseline for English-Hindi and English-Bengali and English-Gujarati. Our analysis shows that initialising embedding layer with static cross-lingual embedding mapping is essential for training of UNMT models for distant language-pairs. @@ -55,7 +55,7 @@ ShivamMhaskar SourabhDeoghare AmanSehgal - PushpakBhattacharyya + PushpakBhattacharyya 35-47 2021.mtsummit-research.4 In this paper and we explore different techniques of overcoming the challenges of low-resource in Neural Machine Translation (NMT) and specifically focusing on the case of English-Marathi NMT. NMT systems require a large amount of parallel corpora to obtain good quality translations. We try to mitigate the low-resource problem by augmenting parallel corpora or by using transfer learning. Techniques such as Phrase Table Injection (PTI) and back-translation and mixing of language corpora are used for enhancing the parallel data; whereas pivoting and multilingual embeddings are used to leverage transfer learning. For pivoting and Hindi comes in as assisting language for English-Marathi translation. Compared to baseline transformer model and a significant improvement trend in BLEU score is observed across various techniques. We have done extensive manual and automatic and qualitative evaluation of our systems. Since the trend in Machine Translation (MT) today is post-editing and measuring of Human Effort Reduction (HER) and we have given our preliminary observations on Translation Edit Rate (TER) vs. BLEU score study and where TER is regarded as a measure of HER. @@ -73,9 +73,9 @@ The Effect of Domain and Diacritics in <fixed-case>Y</fixed-case>oruba–<fixed-case>E</fixed-case>nglish Neural Machine Translation - David IfeoluwaAdelani + David IfeoluwaAdelani DanaRuiter - Jesujoba O.Alabi + Jesujoba O.Alabi DamilolaAdebonojo AdesinaAyeni MofeAdeyemi @@ -92,7 +92,7 @@ Our models outperform massively multilingual models such as Google (+8 Integrating Unsupervised Data Generation into Self-Supervised Neural Machine Translation for Low-Resource Languages DanaRuiter DietrichKlakow - Josefvan Genabith + Josefvan Genabith CristinaEspaña-Bonet 76-91 2021.mtsummit-research.7 @@ -104,13 +104,13 @@ Our models outperform massively multilingual models such as Google (+8 AlexandraBirch BarryHaddow AntonioValerio Miceli Barone - JindrichHelcl + JindrichHelcl JonasWaldendorf FelipeSánchez Martínez - MikelForcada + MikelForcada VíctorSánchez Cartagena Juan AntonioPérez-Ortiz - MiquelEsplà-Gomis + MiquelEsplà-Gomis WilkerAziz LinaMurady SeviSariisik @@ -144,7 +144,7 @@ Our models outperform massively multilingual models such as Google (+8 Scrambled Translation Problem: A Problem of Denoising <fixed-case>UNMT</fixed-case> TamaliBanerjee RudraV Murthy - PushpakBhattacharya + PushpakBhattacharya 127-138 2021.mtsummit-research.11 In this paper and we identify an interesting kind of error in the output of Unsupervised Neural Machine Translation (UNMT) systems like Undreamt1. We refer to this error type as Scrambled Translation problem. We observe that UNMT models which use word shuffle noise (as in case of Undreamt) can generate correct words and but fail to stitch them together to form phrases. As a result and words of the translated sentence look scrambled and resulting in decreased BLEU. We hypothesise that the reason behind scrambled translation problem is ’shuffling noise’ which is introduced in every input sentence as a denoising strategy. To test our hypothesis and we experiment by retraining UNMT models with a simple retraining strategy. We stop the training of the Denoising UNMT model after a pre-decided number of iterations and resume the training for the remaining iterations- which number is also pre-decided- using original sentence as input without adding any noise. Our proposed solution achieves significant performance improvement UNMT models that train conventionally. We demonstrate these performance gains on four language pairs and viz. and English-French and English-German and English-Spanish and Hindi-Punjabi. Our qualitative and quantitative analysis shows that the retraining strategy helps achieve better alignment as observed by attention heatmap and better phrasal translation and leading to statistically significant improvement in BLEU scores. @@ -177,7 +177,7 @@ Our models outperform massively multilingual models such as Google (+8 On nature and causes of observed <fixed-case>MT</fixed-case> errors - MajaPopovic + MajaPopovic 163-175 2021.mtsummit-research.14 This work describes analysis of nature and causes of MT errors observed by different evaluators under guidance of different quality criteria: adequacy and comprehension and and a not specified generic mixture of adequacy and fluency. We report results for three language pairs and two domains and eleven MT systems. Our findings indicate that and despite the fact that some of the identified phenomena depend on domain and/or language and the following set of phenomena can be considered as generally challenging for modern MT systems: rephrasing groups of words and translation of ambiguous source words and translating noun phrases and and mistranslations. Furthermore and we show that the quality criterion also has impact on error perception. Our findings indicate that comprehension and adequacy can be assessed simultaneously by different evaluators and so that comprehension and as an important quality criterion and can be included more often in human evaluations. @@ -187,7 +187,7 @@ Our models outperform massively multilingual models such as Google (+8 A Comparison of Sentence-Weighting Techniques for <fixed-case>NMT</fixed-case> SimonRieß MatthiasHuck - AlexFraser + AlexFraser 176-187 2021.mtsummit-research.15 Sentence weighting is a simple and powerful domain adaptation technique. We carry out domain classification for computing sentence weights with 1) language model cross entropy difference 2) a convolutional neural network 3) a Recursive Neural Tensor Network. We compare these approaches with regard to domain classification accuracy and and study the posterior probability distributions. Then we carry out NMT experiments in the scenario where we have no in-domain parallel corpora and and only very limited in-domain monolingual corpora. Here and we use the domain classifier to reweight the sentences of our out-of-domain training corpus. This leads to improvements of up to 2.1 BLEU for German to English translation. @@ -196,7 +196,7 @@ Our models outperform massively multilingual models such as Google (+8 Sentiment-based Candidate Selection for <fixed-case>NMT</fixed-case> AlexanderJones - DerryWijaya + DerryWijaya 188-201 2021.mtsummit-research.16 The explosion of user-generated content (UGC)—e.g. social media posts and comments and and reviews—has motivated the development of NLP applications tailored to these types of informal texts. Prevalent among these applications have been sentiment analysis and machine translation (MT). Grounded in the observation that UGC features highly idiomatic and sentiment-charged language and we propose a decoder-side approach that incorporates automatic sentiment scoring into the MT candidate selection process. We train monolingual sentiment classifiers in English and Spanish and in addition to a multilingual sentiment model and by fine-tuning BERT and XLM-RoBERTa. Using n-best candidates generated by a baseline MT model with beam search and we select the candidate that minimizes the absolute difference between the sentiment score of the source sentence and that of the translation and and perform two human evaluations to assess the produced translations. Unlike previous work and we select this minimally divergent translation by considering the sentiment scores of the source sentence and translation on a continuous interval and rather than using e.g. binary classification and allowing for more fine-grained selection of translation candidates. The results of human evaluations show that and in comparison to the open-source MT baseline model on top of which our sentiment-based pipeline is built and our pipeline produces more accurate translations of colloquial and sentiment-heavy source texts. @@ -253,7 +253,7 @@ Our models outperform massively multilingual models such as Google (+8 Introducing Mouse Actions into Interactive-Predictive Neural Machine Translation ÁngelNavarro - FranciscoCasacuberta + FranciscoCasacuberta 270-281 2021.mtsummit-research.22 The quality of the translations generated by Machine Translation (MT) systems has highly improved through the years and but we are still far away to obtain fully automatic high-quality translations. To generate them and translators make use of Computer-Assisted Translation (CAT) tools and among which we find the Interactive-Predictive Machine Translation (IPMT) systems. In this paper and we use bandit feedback as the main and only information needed to generate new predictions that correct the previous translations. The application of bandit feedback reduces significantly the number of words that the translator need to type in an IPMT session. In conclusion and the use of this technique saves useful time and effort to translators and its performance improves with the future advances in MT and so we recommend its application in the actuals IPMT systems. @@ -298,7 +298,7 @@ Our models outperform massively multilingual models such as Google (+8 Seed Words Based Data Selection for Language Model Adaptation RobertoGretter - MarcoMatassoni + MarcoMatassoni DanieleFalavigna 1-12 2021.mtsummit-asltrw.1 @@ -308,7 +308,7 @@ Our models outperform massively multilingual models such as Google (+8 Post-Editing Job Profiles for Subtitlers AnkeTardel - SilviaHansen-Schirra + SilviaHansen-Schirra JeanNitzke 13-22 2021.mtsummit-asltrw.2 @@ -317,10 +317,10 @@ Our models outperform massively multilingual models such as Google (+8 Operating a Complex <fixed-case>SLT</fixed-case> System with Speakers and Human Interpreters - OndřejBojar + OndřejBojar VojtěchSrdečný RishuKumar - OtakarSmrž + OtakarSmrž FelixSchneider BarryHaddow PhilWilliams @@ -334,7 +334,7 @@ Our models outperform massively multilingual models such as Google (+8 Simultaneous Speech Translation for Live Subtitling: from Delay to Display AlinaKarakanta SaraPapi - MatteoNegri + MatteoNegri MarcoTurchi 35-48 2021.mtsummit-asltrw.4 @@ -402,7 +402,7 @@ Our models outperform massively multilingual models such as Google (+8 FrankiePicron DavyVan Landuyt TinaSioen - AnneliesBraffort + AnneliesBraffort MichaelFilhol SarahEbling ThomasHanke @@ -422,7 +422,7 @@ Our models outperform massively multilingual models such as Google (+8 AmelieUnger KristofferWaldow SonjaWecker - ElisabethAndré + ElisabethAndré StephanBusemann ChristianDold ArnulphFuhrmann @@ -570,8 +570,8 @@ Our models outperform massively multilingual models such as Google (+8 Corpus Creation and Evaluation for Speech-to-Text and Speech Translation CoreyMiller - EvelyneTzoukermann - JenniferDoyon + EvelyneTzoukermann + JenniferDoyon ElizabethMallard 44-53 2021.mtsummit-up.6 @@ -581,7 +581,7 @@ Our models outperform massively multilingual models such as Google (+8 From Research to Production: Fine-Grained Analysis of Terminology Integration TomsBergmanis - MārcisPinnis + MārcisPinnis PaulaReichenberg 54-77 Dynamic terminology integration in neural machine translation (NMT) is a sought-after feature of computer-aided translation tools among language service providers and small to medium businesses. Despite the recent surge in research on terminology integration in NMT, it still is seldom or inadequately supported in commercial machine translation solutions. In this presentation, we will share our experience of developing and deploying terminology integration capabilities for NMT systems in production. We will look at the three core tasks of terminology integration: terminology management, terminology identification, and translation with terminology. This talk will be insightful for NMT system developers, translators, terminologists, and anyone interested in translation projects. @@ -591,7 +591,7 @@ Our models outperform massively multilingual models such as Google (+8 Glossary functionality in commercial machine translation: does it help? A first step to identify best practices for a language service provider RandyScansani - LoïcDugast + LoïcDugast 78-88 2021.mtsummit-up.8 Recently, a number of commercial Machine Translation (MT) providers have started to offer glossary features allowing users to enforce terminology into the output of a generic model. However, to the best of our knowledge it is not clear how such features would impact terminology accuracy and the overall quality of the output. The present contribution aims at providing a first insight into the performance of the glossary-enhanced generic models offered by four providers. Our tests involve two different domains and language pairs, i.e. Sportswear En–Fr and Industrial Equipment De–En. The output of each generic model and of the glossaryenhanced one will be evaluated relying on Translation Error Rate (TER) to take into account the overall output quality and on accuracy to assess the compliance with the glossary. This is followed by a manual evaluation. The present contribution mainly focuses on understanding how these glossary features can be fruitfully exploited by language service providers (LSPs), especially in a scenario in which a customer glossary is already available and is added to the generic model as is. @@ -668,7 +668,7 @@ Our models outperform massively multilingual models such as Google (+8 Field Experiments of Real Time Foreign News Distribution Powered by <fixed-case>MT</fixed-case> KeijiYasuda IchiroYamada - NaoakiOkazaki + NaoakiOkazaki HidekiTanaka HidehiroAsaka TakeshiAnzai @@ -732,16 +732,16 @@ Our models outperform massively multilingual models such as Google (+8 Neural Translation for <fixed-case>E</fixed-case>uropean <fixed-case>U</fixed-case>nion (<fixed-case>NTEU</fixed-case>) - MercedesGarcía-Martínez + MercedesGarcía-Martínez LaurentBié AleixCerdà AmandoEstela ManuelHerranz - RihardsKrišlauks + RihardsKrišlauks MaiteMelero TonyO’Dowd SineadO’Gorman - MarcisPinnis + MarcisPinnis ArtūrsStafanovič RiccardoSuperbo ArtūrsVasiļevskis @@ -780,7 +780,7 @@ Our models outperform massively multilingual models such as Google (+8 Using speech technology in the translation process workflow in international organizations: A quantitative and qualitative study - PierretteBouillon + PierretteBouillon JeevanthiLiyanapathirana 382-395 In international organizations, the growing demand for translations has increased the need for post-editing. Different studies show that automatic speech recognition systems have the potential to increase the productivity of the translation process as well as the quality. In this talk, we will explore the possibilities of using speech in the translation process by conducting a post-editing experiment with three professional translators in an international organization. Our experiment consisted of comparing three translation methods: speaking the translation with MT as an inspiration (RESpeaking), post-editing the MT suggestions by typing (PE), and editing the MT suggestion using speech (SPE). BLEU and HTER scores were used to compare the three methods. Our study shows that translators did more edits under condition RES, whereas in SPE, the resulting translations were closer to the reference according to the BLEU score and required less edits. Time taken to translate was the least in SPE followed by PE, RES methods and the translators preferred using speech to typing. These results show the potential of speech when it is coupled with post-editing. To the best of our knowledge, this is the first quantitative study conducted on using post-editing and speech together in large scale international organizations. @@ -836,8 +836,8 @@ Our models outperform massively multilingual models such as Google (+8 August 2021 JohnOrtega - Atul Kr.Ojha - KatharinaKann + Atul Kr.Ojha + KatharinaKann Chao-HongLiu 2021.mtsummit-loresmt loresmt @@ -852,7 +852,7 @@ Our models outperform massively multilingual models such as Google (+8 ChanjunPark HyeonseokMoon JaehyungSeo - HeuiseokLim + HeuiseokLim 1-10 2021.mtsummit-loresmt.1 In quality estimation (QE), the quality of translation can be predicted by referencing the source sentence and the machine translation (MT) output without access to the reference sentence. However, there exists a paradox in that constructing a dataset for creating a QE model requires non-trivial human labor and time, and it may even requires additional effort compared to the cost of constructing a parallel corpus. In this study, to address this paradox and utilize the various applications of QE, even in low-resource languages (LRLs), we propose a method for automatically constructing a pseudo-QE dataset without using human labor. We perform a comparative analysis on the pseudo-QE dataset using multilingual pre-trained language models. As we generate the pseudo dataset, we conduct experiments using various external machine translators as test sets to verify the accuracy of the results objectively. Also, the experimental results show that multilingual BART demonstrates the best performance, and we confirm the applicability of QE in LRLs using pseudo-QE dataset construction methods. @@ -879,7 +879,7 @@ Our models outperform massively multilingual models such as Google (+8 Active Learning for Massively Parallel Translation of Constrained Text into Low Resource Languages ZhongZhou - AlexWaibel + AlexWaibel 32-43 2021.mtsummit-loresmt.4 We translate a closed text that is known in advance and available in many languages into a new and severely low resource language. Most human translation efforts adopt a portionbased approach to translate consecutive pages/chapters in order, which may not suit machine translation. We compare the portion-based approach that optimizes coherence of the text locally with the random sampling approach that increases coverage of the text globally. Our results show that the random sampling approach performs better. When training on a seed corpus of ∼1,000 lines from the Bible and testing on the rest of the Bible (∼30,000 lines), random sampling gives a performance gain of +11.0 BLEU using English as a simulated low resource language, and +4.9 BLEU using Eastern Pokomchi, a Mayan language. Furthermore, we compare three ways of updating machine translation models with increasing amount of human post-edited data through iterations. We find that adding newly post-edited data to training after vocabulary update without self-supervision performs the best. We propose an algorithm for human and machine to work together seamlessly to translate a closed text into a severely low resource language. @@ -902,7 +902,7 @@ Our models outperform massively multilingual models such as Google (+8 CaitlinSmith EricRosen AsliCelikyilmaz - R. ThomasMcCoy + R. ThomasMcCoy YichenJiang ColemanHaley RolandFernandez @@ -939,7 +939,7 @@ Our models outperform massively multilingual models such as Google (+8 Sahinur RahmanLaskar AbdullahFaiz Ur Rahman Khilji Darsh Kaushik ParthaPakray - SivajiBandyopadhyay + SivajiBandyopadhyay 89-95 2021.mtsummit-loresmt.9 In machine translation, corpus preparation is one of the crucial tasks, particularly for lowresource pairs. In multilingual countries like India, machine translation plays a vital role in communication among people with various linguistic backgrounds. There are available online automatic translation systems by Google and Microsoft which include various languages which lack support for the Khasi language, which can hence be considered lowresource. This paper overviews the development of EnKhCorp1.0, a corpus for English–Khasi pair, and implemented baseline systems for EnglishtoKhasi and KhasitoEnglish translation based on the neural machine translation approach. @@ -948,7 +948,7 @@ Our models outperform massively multilingual models such as Google (+8 Zero-Shot Neural Machine Translation with Self-Learning Cycle Surafel M.Lakew - MatteoNegri + MatteoNegri MarcoTurchi 96-113 2021.mtsummit-loresmt.10 @@ -971,7 +971,7 @@ Our models outperform massively multilingual models such as Google (+8 A3-108 Machine Translation System for <fixed-case>L</fixed-case>o<fixed-case>R</fixed-case>es<fixed-case>MT</fixed-case> Shared Task @<fixed-case>MT</fixed-case> Summit 2021 Conference SaumitraYadav - ManishShrivastava + ManishShrivastava 124-128 2021.mtsummit-loresmt.12 In this paper, we describe our submissions for LoResMT Shared Task @MT Summit 2021 Conference. We built statistical translation systems in each direction for English ⇐⇒ Marathi language pair. This paper outlines initial baseline experiments with various tokenization schemes to train models. Using optimal tokenization scheme we create synthetic data and further train augmented dataset to create more statistical models. Also, we reorder English to match Marathi syntax to further train another set of baseline and data augmented models using various tokenization schemes. We report configuration of the submitted systems and results produced by them. @@ -1013,7 +1013,7 @@ Our models outperform massively multilingual models such as Google (+8 <fixed-case>E</fixed-case>nglish-<fixed-case>M</fixed-case>arathi Neural Machine Translation for <fixed-case>L</fixed-case>o<fixed-case>R</fixed-case>es<fixed-case>MT</fixed-case> 2021 VandanMujadia - Dipti MisraSharma + Dipti MisraSharma 151-157 2021.mtsummit-loresmt.16 In this paper, we (team - oneNLP-IIITH) describe our Neural Machine Translation approaches for English-Marathi (both direction) for LoResMT-20211 . We experimented with transformer based Neural Machine Translation and explored the use of different linguistic features like POS and Morph on subword unit for both English-Marathi and Marathi-English. In addition, we have also explored forward and backward translation using web-crawled monolingual data. We obtained 22.2 (overall 2 nd) and 31.3 (overall 1 st) BLEU scores for English-Marathi and Marathi-English on respectively @@ -1023,7 +1023,7 @@ Our models outperform massively multilingual models such as Google (+8 Evaluating the Performance of Back-translation for Low Resource <fixed-case>E</fixed-case>nglish-<fixed-case>M</fixed-case>arathi Language Pair: <fixed-case>CFILT</fixed-case>-<fixed-case>IITB</fixed-case>ombay @ <fixed-case>L</fixed-case>o<fixed-case>R</fixed-case>es<fixed-case>MT</fixed-case> 2021 AdityaJain ShivamMhaskar - PushpakBhattacharyya + PushpakBhattacharyya 158-162 2021.mtsummit-loresmt.17 In this paper, we discuss the details of the various Machine Translation (MT) systems that we have submitted for the English-Marathi LoResMT task. As a part of this task, we have submitted three different Neural Machine Translation (NMT) systems; a Baseline English-Marathi system, a Baseline Marathi-English system, and an English-Marathi system that is based on the back-translation technique. We explore the performance of these NMT systems between English and Marathi languages, which forms a low resource language pair due to unavailability of sufficient parallel data. We also explore the performance of the back-translation technique when the back-translated data is obtained from NMT systems that are trained on a very less amount of data. From our experiments, we observe that the back-translation technique can help improve the MT quality over the baseline for the English-Marathi language pair. diff --git a/data/xml/2021.mwe.xml b/data/xml/2021.mwe.xml index b0a4dd9d06..926f052253 100644 --- a/data/xml/2021.mwe.xml +++ b/data/xml/2021.mwe.xml @@ -5,7 +5,7 @@ Proceedings of the 17th Workshop on Multiword Expressions (MWE 2021) PaulCook JelenaMitrović - Carla ParraEscartín + Carla ParraEscartín AshwiniVaidya PetyaOsenova ShivaTaslimipoor @@ -33,7 +33,7 @@ Where Do Aspectual Variants of Light Verb Constructions Belong? AggelikiFotopoulou - EricLaporte + EricLaporte TakuyaNakamura 2–12 Expressions with an aspectual variant of a light verb, e.g. ‘take on debt’ vs. ‘have debt’, are frequent in texts but often difficult to classify between verbal idioms, light verb constructions or compositional phrases. We investigate the properties of such expressions with a disputed membership and propose a selection of features that determine more satisfactory boundaries between the three categories in this zone, assigning the expressions to one of them. @@ -76,7 +76,7 @@ Lexical Semantic Recognition - Nelson F.Liu + Nelson F.Liu DanielHershcovich MichaelKranzlein NathanSchneider @@ -90,7 +90,7 @@ Finding <fixed-case>BERT</fixed-case>’s Idiomatic Key VasudevanNedumpozhimana - JohnKelleher + JohnKelleher 57–62 Sentence embeddings encode information relating to the usage of idioms in a sentence. This paper reports a set of experiments that combine a probing methodology with input masking to analyse where in a sentence this idiomatic information is taken from, and what form it takes. Our results indicate that BERT’s idiomatic key is primarily found within an idiomatic expression, but also draws on information from the surrounding context. Also, BERT can distinguish between the disruption in a sentence caused by words missing and the incongruity caused by idiomatic usage. 2021.mwe-1.7 diff --git a/data/xml/2021.naacl.xml b/data/xml/2021.naacl.xml index 12ba7ee475..5adc0f0989 100644 --- a/data/xml/2021.naacl.xml +++ b/data/xml/2021.naacl.xml @@ -5,8 +5,8 @@ Proceedings of the 2021 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies KristinaToutanova AnnaRumshisky - LukeZettlemoyer - DilekHakkani-Tur + LukeZettlemoyer + DilekHakkani-Tur IzBeltagy StevenBethard RyanCotterell @@ -89,7 +89,7 @@ HengJi QiangNing JiaweiHan - AviSil + AviSil HanghangTong DanRoth 62–73 @@ -102,9 +102,9 @@ Probing Word Translations in the Transformer and Trading Decoder for Encoder Layers HongfeiXu - Josefvan Genabith + Josefvan Genabith QiuhuiLiu - DeyiXiong + DeyiXiong 74–85 Due to its effectiveness and performance, the Transformer translation model has attracted wide attention, most recently in terms of probing-based approaches. Previous work focuses on using or probing source linguistic features in the encoder. To date, the way word translation evolves in Transformer layers has not yet been investigated. Naively, one might assume that encoder layers capture source information while decoder layers translate. In this work, we show that this is not quite the case: translation already happens progressively in encoder layers and even in the input embeddings. More surprisingly, we find that some of the lower decoder layers do not actually do that much decoding. We show all of this in terms of a probing approach where we project representations of the layer analyzed to the final trained and frozen classifier level of the Transformer decoder to measure word translation accuracy. Our findings motivate and explain a Transformer configuration change: if translation already happens in the encoder layers, perhaps we can increase the number of encoder layers, while decreasing the number of decoder layers, boosting decoding speed, without loss in translation quality? Our experiments show that this is indeed the case: we can increase speed by up to a factor 2.3 with small gains in translation quality, while an 18-4 deep encoder configuration boosts translation quality by +1.42 BLEU (En-De) at a speed-up of 1.4. 2021.naacl-main.7 @@ -193,10 +193,10 @@ VilémZouhar MichalNovák MatúšŽilinec - OndřejBojar + OndřejBojar MateoObregón - Robin L.Hill - FrédéricBlain + Robin L.Hill + FrédéricBlain MarinaFomicheva LuciaSpecia LisaYankovskaya @@ -213,7 +213,7 @@ ChristianHerold JanRosendahl JorisVanvinckenroye - HermannNey + HermannNey 162–172 Data filtering for machine translation (MT) describes the task of selecting a subset of a given, possibly noisy corpus with the aim to maximize the performance of an MT system trained on this selected data. Over the years, many different filtering approaches have been proposed. However, varying task definitions and data conditions make it difficult to draw a meaningful comparison. In the present work, we aim for a more systematic approach to the task at hand. First, we analyze the performance of language identification, a tool commonly used for data filtering in the MT community and identify specific weaknesses. Based on our findings, we then propose several novel methods for data filtering, based on cross-lingual word embeddings. We compare our approaches to one of the winning methods from the WMT 2018 shared task on parallel corpus filtering on three real-life, high resource MT tasks. We find that said method, which was performing very strong in the WMT shared task, does not perform well within our more realistic task conditions. While we find that our approaches come out at the top on all three tasks, different variants perform best on different tasks. Further experiments on the WMT 2020 shared task for parallel corpus filtering show that our methods achieve comparable results to the strongest submissions of this campaign. 2021.naacl-main.15 @@ -225,7 +225,7 @@ Improving the Lexical Ability of Pretrained Language Models for Unsupervised Neural Machine Translation AlexandraChronopoulou DarioStojanovski - AlexanderFraser + AlexanderFraser 173–180 Successful methods for unsupervised neural machine translation (UNMT) employ cross-lingual pretraining via self-supervision, often in the form of a masked language modeling or a sequence generation task, which requires the model to align the lexical- and high-level representations of the two languages. While cross-lingual pretraining works for similar languages with abundant corpora, it performs poorly in low-resource and distant languages. Previous research has shown that this is because the representations are not sufficiently aligned. In this paper, we enhance the bilingual masked language model pretraining with lexical-level information by using type-level cross-lingual subword embeddings. Empirical results demonstrate improved performance both on UNMT (up to 4.5 BLEU) and bilingual lexicon induction using our method compared to a UNMT baseline. 2021.naacl-main.16 @@ -248,7 +248,7 @@ Counterfactual Data Augmentation for Neural Machine Translation QiLiu MattKusner - PhilBlunsom + PhilBlunsom 187–197 We propose a data augmentation method for neural machine translation. It works by interpreting language models and phrasal alignment causally. Specifically, it creates augmented parallel translation corpora by generating (path-specific) counterfactual aligned phrases. We generate these by sampling new source phrases from a masked language model, then sampling an aligned counterfactual target phrase by noting that a translation language model can be interpreted as a Gumbel-Max Structural Causal Model (Oberst and Sontag, 2019). Compared to previous work, our method takes both context and alignment into account to maintain the symmetry between source and target sequences. Experiments on IWSLT’15 English → Vietnamese, WMT’17 English → German, WMT’18 English → Turkish, and WMT’19 robust English → French show that the method can improve the performance of translation, backtranslation and translation robustness. 2021.naacl-main.18 @@ -262,7 +262,7 @@ IsidoraTourni Mohammad SadeghRasooli ChrisCallison-Burch - Derry TantiWijaya + Derry TantiWijaya 198–209 Neural Machine Translation (NMT) models have been observed to produce poor translations when there are few/no parallel sentences to train the models. In the absence of parallel data, several approaches have turned to the use of images to learn translations. Since images of words, e.g., horse may be unchanged across languages, translations can be identified via images associated with words in different languages that have a high degree of visual similarity. However, translating via images has been shown to improve upon text-only models only marginally. To better understand when images are useful for translation, we study image translatability of words, which we define as the translatability of words via images, by measuring intra- and inter-cluster similarities of image representations of words that are translations of each other. We find that images of words are not always invariant across languages, and that language pairs with shared culture, meaning having either a common language family, ethnicity or religion, have improved image translatability (i.e., have more similar images for similar words) compared to its converse, regardless of their geographic proximity. In addition, in line with previous works that show images help more in translating concrete words, we found that concrete words have improved image translatability compared to abstract ones. 2021.naacl-main.19 @@ -289,7 +289,7 @@ A Million Tweets Are Worth a Few Points: Tuning Transformers for Customer Service Tasks AmirHadifar SofieLabat - VeroniqueHoste + VeroniqueHoste ChrisDevelder ThomasDemeester 220–225 @@ -366,7 +366,7 @@ DingminWang ChenghuaLin QiLiu - Kam-FaiWong + Kam-FaiWong 289–295 We present a fast and scalable architecture called Explicit Modular Decomposition (EMD), in which we incorporate both classification-based and extraction-based methods and design four modules (for clas- sification and sequence labelling) to jointly extract dialogue states. Experimental results based on the MultiWoz 2.0 dataset validates the superiority of our proposed model in terms of both complexity and scalability when compared to the state-of-the-art methods, especially in the scenario of multi-domain dialogues entangled with many turns of utterances. 2021.naacl-main.27 @@ -428,7 +428,7 @@ JulianEisenschlos BhuwanDhingra JannisBulian - BenjaminBörschinger + BenjaminBörschinger JordanBoyd-Graber 352–365 We release FoolMeTwice (FM2 for short), a large dataset of challenging entailment pairs collected through a fun multi-player game. Gamification encourages adversarial examples, drastically lowering the number of examples that can be solved using “shortcuts” compared to other popular entailment datasets. Players are presented with two tasks. The first task asks the player to write a plausible claim based on the evidence from a Wikipedia page. The second one shows two plausible claims written by other players, one of which is false, and the goal is to identify it before the time runs out. Players “pay” to see clues retrieved from the evidence pool: the more evidence the player needs, the harder the claim. Game-play between motivated players leads to diverse strategies for crafting claims, such as temporal inference and diverting to unrelated evidence, and results in higher quality data for the entailment and evidence retrieval tasks. We open source the dataset and the game code. @@ -464,7 +464,7 @@ Text Generation from Discourse Representation Structures JiangmingLiu - Shay B.Cohen + Shay B.Cohen MirellaLapata 397–415 We propose neural models to generate text from formal meaning representations based on Discourse Representation Structures (DRSs). DRSs are document-level representations which encode rich semantic detail pertaining to rhetorical relations, presupposition, and co-reference within and across sentences. We formalize the task of neural DRS-to-text generation and provide modeling solutions for the problems of condition ordering and variable naming which render generation from DRSs non-trivial. Our generator relies on a novel sibling treeLSTM model which is able to accurately represent DRS structures and is more generally suited to trees with wide branches. We achieve competitive performance (59.48 BLEU) on the GMB benchmark against several strong baselines. @@ -491,7 +491,7 @@ <fixed-case>DART</fixed-case>: Open-Domain Structured Data Record to Text Generation LinyongNan - DragomirRadev + DragomirRadev RuiZhang AmritRau AbhinandSivaprasad @@ -510,7 +510,7 @@ AnkitGupta TaoYu Yi ChernTan - Xi VictoriaLin + Xi VictoriaLin CaimingXiong RichardSocher Nazneen FatemaRajani @@ -525,8 +525,8 @@ When Being Unseen from m<fixed-case>BERT</fixed-case> is just the Beginning: Handling New Languages With Multilingual Language Models BenjaminMuller AntoniosAnastasopoulos - BenoîtSagot - DjaméSeddah + BenoîtSagot + DjaméSeddah 448–462 Transfer learning based on pretraining language models on a large amount of raw data has become a new norm to reach state-of-the-art performance in NLP. Still, it remains unclear how this approach should be applied for unseen languages that are not covered by any available large-scale multilingual language model and for which only a small amount of raw data is generally available. In this work, by comparing multilingual and monolingual models, we show that such models behave in multiple ways on unseen languages. Some languages greatly benefit from transfer learning and behave similarly to closely related high resource languages whereas others apparently do not. Focusing on the latter, we show that this failure to transfer is largely related to the impact of the script used to write such languages. We show that transliterating those languages significantly improves the potential of large-scale multilingual language models on downstream tasks. This result provides a promising direction towards making these massively multilingual models useful for a new set of unseen languages. 2021.naacl-main.38 @@ -537,7 +537,7 @@ Multi-Adversarial Learning for Cross-Lingual Word Embeddings HaozhouWang - JamesHenderson + JamesHenderson PaolaMerlo 463–472 Generative adversarial networks (GANs) have succeeded in inducing cross-lingual word embeddings - maps of matching words across languages - without supervision. Despite these successes, GANs’ performance for the difficult case of distant languages is still not satisfactory. These limitations have been explained by GANs’ incorrect assumption that source and target embedding spaces are related by a single linear mapping and are approximately isomorphic. We assume instead that, especially across distant languages, the mapping is only piece-wise linear, and propose a multi-adversarial learning method. This novel method induces the seed cross-lingual dictionary through multiple mappings, each induced to fit the mapping for one subspace. Our experiments on unsupervised bilingual lexicon induction and cross-lingual document classification show that this method improves performance over previous single-mapping methods, especially for distant languages. @@ -583,7 +583,7 @@ SubhabrataMukherjee MiladShokouhi GrahamNeubig - Ahmed HassanAwadallah + Ahmed HassanAwadallah 499–511 The combination of multilingual pre-trained representations and cross-lingual transfer learning is one of the most effective methods for building functional NLP systems for low-resource languages. However, for extremely low-resource languages without large-scale monolingual corpora for pre-training or sufficient annotated data for fine-tuning, transfer learning remains an understudied and challenging task. Moreover, recent work shows that multilingual representations are surprisingly disjoint across languages, bringing additional challenges for transfer onto extremely low-resource languages. In this paper, we propose MetaXL, a meta-learning based framework that learns to transform representations judiciously from auxiliary languages to a target one and brings their representation spaces closer for effective transfer. Extensive experiments on real-world low-resource languages – without access to large-scale monolingual corpora or large amounts of labeled data – for tasks like cross-lingual sentiment analysis and named entity recognition show the effectiveness of our approach. Code for MetaXL is publicly available at github.com/microsoft/MetaXL. 2021.naacl-main.42 @@ -593,7 +593,7 @@ Open Domain Question Answering over Tables via Dense Retrieval JonathanHerzig - ThomasMüller + ThomasMüller SyrineKrichene JulianEisenschlos 512–519 @@ -609,7 +609,7 @@ SvitlanaVakulenko ZhuchengTu ShayneLongpre - StephenPulman + StephenPulman SrinivasChappidi 520–534 We introduce a new dataset for Question Rewriting in Conversational Context (QReCC), which contains 14K conversations with 80K question-answer pairs. The task in QReCC is to find answers to conversational questions within a collection of 10M web pages (split into 54M passages). Answers to questions in the same conversation may be distributed across several web pages. QReCC provides annotations that allow us to train and evaluate individual subtasks of question rewriting, passage retrieval and reading comprehension required for the end-to-end conversational question answering (QA) task. We report the effectiveness of a strong baseline approach that combines the state-of-the-art model for question rewriting, and competitive models for open-domain QA. Our results set the first baseline for the QReCC dataset with F1 of 19.10, compared to the human upper bound of 75.45, indicating the difficulty of the setup and a large room for improvement. @@ -636,7 +636,7 @@ <fixed-case>XOR</fixed-case> <fixed-case>QA</fixed-case>: Cross-lingual Open-Retrieval Question Answering AkariAsai JungoKasai - JonathanClark + JonathanClark KentonLee EunsolChoi HannanehHajishirzi @@ -698,7 +698,7 @@ Preregistering <fixed-case>NLP</fixed-case> research Emielvan Miltenburg Chrisvan der Lee - EmielKrahmer + EmielKrahmer 613–623 Preregistration refers to the practice of specifying what you are going to do, and what you expect to find in your study, before carrying out the study. This practice is increasingly common in medicine and psychology, but is rarely discussed in NLP. This paper discusses preregistration in more detail, explores how NLP researchers could preregister their work, and presents several preregistration questions for different kinds of studies. Finally, we argue in favour of registered reports, which could provide firmer grounds for slow science in NLP research. The goal of this paper is to elicit a discussion in the NLP community, which we hope to synthesise into a general NLP preregistration form in future research. 2021.naacl-main.51 @@ -775,13 +775,13 @@ Improving Zero and Few-Shot Abstractive Summarization with Intermediate Fine-tuning and Data Augmentation - AlexanderFabbri + AlexanderFabbri SimengHan HaoyuanLi HaoranLi MarjanGhazvininejad - ShafiqJoty - DragomirRadev + ShafiqJoty + DragomirRadev YasharMehdad 704–717 Models pretrained with self-supervised objectives on large text corpora achieve state-of-the-art performance on English text summarization tasks. However, these models are typically fine-tuned on hundreds of thousands of data points, an infeasible requirement when applying summarization to new, niche domains. In this work, we introduce a novel and generalizable method, called WikiTransfer, for fine-tuning pretrained models for summarization in an unsupervised, dataset-specific manner. WikiTransfer fine-tunes pretrained models on pseudo-summaries, produced from generic Wikipedia data, which contain characteristics of the target dataset, such as the length and level of abstraction of the desired summaries. WikiTransfer models achieve state-of-the-art, zero-shot abstractive summarization performance on the CNN-DailyMail dataset and demonstrate the effectiveness of our approach on three additional diverse datasets. These models are more robust to noisy data and also achieve better or comparable few-shot performance using 10 and 100 training examples when compared to few-shot transfer from other summarization datasets. To further boost performance, we employ data augmentation via round-trip translation as well as introduce a regularization term for improved few-shot transfer. To understand the role of dataset aspects in transfer performance and the quality of the resulting output summaries, we further study the effect of the components of our unsupervised fine-tuning data and analyze few-shot performance using both automatic and human evaluation. @@ -797,7 +797,7 @@ RuochenXu QingkaiZeng MichaelZeng - XuedongHuang + XuedongHuang MengJiang 718–733 Automatic abstractive summaries are found to often distort or fabricate facts in the article. This inconsistency between summary and original text has seriously impacted its applicability. We propose a fact-aware summarization model FASum to extract and integrate factual relations into the summary generation process via graph attention. We then design a factual corrector model FC to automatically correct factual errors from summaries generated by existing systems. Empirical results show that the fact-aware summarization can produce abstractive summaries with higher factual consistency compared with existing systems, and the correction model improves the factual consistency of given summaries via modifying only a few keywords. @@ -814,7 +814,7 @@ YuanZhang XinyaDu PanupongPasupat - QiLi + QiLi 734–749 Few-shot learning arises in important practical scenarios, such as when a natural language understanding system needs to learn new semantic labels for an emerging, resource-scarce domain. In this paper, we explore retrieval-based methods for intent classification and slot filling tasks in few-shot settings. Retrieval-based methods make predictions based on labeled examples in the retrieval index that are similar to the input, and thus can adapt to new domains simply by changing the index without having to retrain the model. However, it is non-trivial to apply such methods on tasks with a complex label space like slot filling. To this end, we propose a span-level retrieval method that learns similar contextualized representations for spans with the same label via a novel batch-softmax objective. At inference time, we use the labels of the retrieved spans to construct the final structure with the highest aggregated score. Our method outperforms previous systems in various few-shot settings on the CLINC and SNIPS benchmarks. 2021.naacl-main.59 @@ -826,7 +826,7 @@ “Nice Try, Kiddo”: Investigating Ad Hominems in Dialogue Responses EmilySheng Kai-WeiChang - PremNatarajan + PremNatarajan NanyunPeng 750–767 Ad hominem attacks are those that target some feature of a person’s character instead of the position the person is maintaining. These attacks are harmful because they propagate implicit biases and diminish a person’s credibility. Since dialogue systems respond directly to user input, it is important to study ad hominems in dialogue responses. To this end, we propose categories of ad hominems, compose an annotated dataset, and build a classifier to analyze human and dialogue system responses to English Twitter posts. We specifically compare responses to Twitter topics about marginalized communities (#BlackLivesMatter, #MeToo) versus other topics (#Vegan, #WFH), because the abusive language of ad hominems could further amplify the skew of power away from marginalized populations. Furthermore, we propose a constrained decoding technique that uses salient n-gram similarity as a soft constraint for top-k sampling to reduce the amount of ad hominems generated. Our results indicate that 1) responses from both humans and DialoGPT contain more ad hominems for discussions around marginalized communities, 2) different quantities of ad hominems in the training data can influence the likelihood of generating ad hominems, and 3) we can use constrained decoding techniques to reduce ad hominems in generated dialogue responses. @@ -838,7 +838,7 @@ Human-like informative conversations: Better acknowledgements using conditional mutual information AshwinParanjape - ChristopherManning + ChristopherManning 768–781 This work aims to build a dialogue agent that can weave new factual content into conversations as naturally as humans. We draw insights from linguistic principles of conversational analysis and annotate human-human conversations from the Switchboard Dialog Act Corpus to examine humans strategies for acknowledgement, transition, detail selection and presentation. When current chatbots (explicitly provided with new factual content) introduce facts into a conversation, their generated responses do not acknowledge the prior turns. This is because models trained with two contexts - new factual content and conversational history - generate responses that are non-specific w.r.t. one of the contexts, typically the conversational history. We show that specificity w.r.t. conversational history is better captured by pointwise conditional mutual information (pcmi_h) than by the established use of pointwise mutual information (pmi). Our proposed method, Fused-PCMI, trades off pmi for pcmi_h and is preferred by humans for overall quality over the Max-PMI baseline 60% of the time. Human evaluators also judge responses with higher pcmi_h better at acknowledgement 74% of the time. The results demonstrate that systems mimicking human conversational traits (in this case acknowledgement) improve overall quality and more broadly illustrate the utility of linguistic principles in improving dialogue agents. 2021.naacl-main.61 @@ -875,7 +875,7 @@ JackUrbanek MargaretLi ArthurSzlam - TimRocktäschel + TimRocktäschel JasonWeston 807–833 We seek to create agents that both act and communicate with other agents in pursuit of a goal. Towards this end, we extend LIGHT (Urbanek et al. 2019)—a large-scale crowd-sourced fantasy text-game—with a dataset of quests. These contain natural language motivations paired with in-game goals and human demonstrations; completing a quest might require dialogue or actions (or both). We introduce a reinforcement learning system that (1) incorporates large-scale language modeling-based and commonsense reasoning-based pre-training to imbue the agent with relevant priors; and (2) leverages a factorized action space of action commands and dialogue, balancing between the two. We conduct zero-shot evaluations using held-out human expert demonstrations, showing that our agents are able to act consistently and talk naturally with respect to their motivations. @@ -903,7 +903,7 @@ GiannisKaramanolakis SubhabrataMukherjee GuoqingZheng - Ahmed HassanAwadallah + Ahmed HassanAwadallah 845–863 State-of-the-art deep neural networks require large-scale labeled training data that is often expensive to obtain or not available for many tasks. Weak supervision in the form of domain-specific rules has been shown to be useful in such settings to automatically generate weakly labeled training data. However, learning with weak rules is challenging due to their inherent heuristic and noisy nature. An additional challenge is rule coverage and overlap, where prior work on weak supervision only considers instances that are covered by weak rules, thus leaving valuable unlabeled data behind. In this work, we develop a weak supervision framework (ASTRA) that leverages all the available data for a given task. To this end, we leverage task-specific unlabeled data through self-training with a model (student) that considers contextualized representations and predicts pseudo-labels for instances that may not be covered by weak rules. We further develop a rule attention network (teacher) that learns how to aggregate student pseudo-labels with weak rule labels, conditioned on their fidelity and the underlying context of an instance. Finally, we construct a semi-supervised learning objective for end-to-end training with unlabeled data, domain-specific rules, and a small amount of labeled data. Extensive experiments on six benchmark datasets for text classification demonstrate the effectiveness of our approach with significant improvements over state-of-the-art baselines. 2021.naacl-main.66 @@ -953,8 +953,8 @@ Template Filling with Generative Transformers XinyaDu - AlexanderRush - ClaireCardie + AlexanderRush + ClaireCardie 909–914 Template filling is generally tackled by a pipeline of two separate supervised systems – one for role-filler extraction and another for template/event recognition. Since pipelines consider events in isolation, they can suffer from error propagation. We introduce a framework based on end-to-end generative transformers for this task (i.e., GTT). It naturally models the dependence between entities both within a single event and across the multiple events described in a document. Experiments demonstrate that this framework substantially outperforms pipeline-based approaches, and other neural end-to-end baselines that do not model between-event dependencies. We further show that our framework specifically improves performance on documents containing multiple events. 2021.naacl-main.70 @@ -965,7 +965,7 @@ Towards Interpreting and Mitigating Shortcut Learning Behavior of <fixed-case>NLU</fixed-case> models MengnanDu - VarunManjunatha + VarunManjunatha RajivJain RuchiDeshpande FranckDernoncourt @@ -985,7 +985,7 @@ JiajiHuang XingyuCai JiahongYuan - KennethChurch + KennethChurch 930–945 Multi-layer multi-head self-attention mechanism is widely applied in modern neural language models. Attention redundancy has been observed among attention heads but has not been deeply studied in the literature. Using BERT-base model as an example, this paper provides a comprehensive study on attention redundancy which is helpful for model interpretation and model compression. We analyze the attention redundancy with Five-Ws and How. (What) We define and focus the study on redundancy matrices generated from pre-trained and fine-tuned BERT-base model for GLUE datasets. (How) We use both token-based and sentence-based distance functions to measure the redundancy. (Where) Clear and similar redundancy patterns (cluster structure) are observed among attention heads. (When) Redundancy patterns are similar in both pre-training and fine-tuning phases. (Who) We discover that redundancy patterns are task-agnostic. Similar redundancy patterns even exist for randomly generated token sequences. (“Why”) We also evaluate influences of the pre-training dropout ratios on attention redundancy. Based on the phase-independent and task-agnostic attention redundancy patterns, we propose a simple zero-shot pruning method as a case study. Experiments on fine-tuning GLUE tasks verify its effectiveness. The comprehensive analyses on attention redundancy make model understanding and zero-shot model pruning promising. 2021.naacl-main.72 @@ -999,7 +999,7 @@ SarthakJain KarlPichotta YoavGoldberg - ByronWallace + ByronWallace 946–959 Large Transformers pretrained over clinical notes from Electronic Health Records (EHR) have afforded substantial gains in performance on predictive clinical tasks. The cost of training such models (and the necessity of data access to do so) coupled with their utility motivates parameter sharing, i.e., the release of pretrained models such as ClinicalBERT. While most efforts have used deidentified EHR, many researchers have access to large sets of sensitive, non-deidentified EHR with which they might train a BERT model (or similar). Would it be safe to release the weights of such a model if they did? In this work, we design a battery of approaches intended to recover Personal Health Information (PHI) from a trained BERT. Specifically, we attempt to recover patient names and conditions with which they are associated. We find that simple probing methods are not able to meaningfully extract sensitive information from BERT trained over the MIMIC-III corpus of EHR. However, more sophisticated “attacks” may succeed in doing so: To facilitate such research, we make our experimental setup and baseline probing models available at https://github.com/elehman16/exposing_patient_data_release. 2021.naacl-main.73 @@ -1011,7 +1011,7 @@ Low-Complexity Probing via Finding Subnetworks StevenCao VictorSanh - AlexanderRush + AlexanderRush 960–966 The dominant approach in probing neural networks for linguistic properties is to train a new shallow multi-layer perceptron (MLP) on top of the model’s internal representations. This approach can detect properties encoded in the model, but at the cost of adding new parameters that may learn the task directly. We instead propose a subtractive pruning-based probe, where we find an existing subnetwork that performs the linguistic task of interest. Compared to an MLP, the subnetwork probe achieves both higher accuracy on pre-trained models and lower accuracy on random models, so it is both better at finding properties of interest and worse at learning on its own. Next, by varying the complexity of each probe, we show that subnetwork probing Pareto-dominates MLP probing in that it achieves higher accuracy given any budget of probe complexity. Finally, we analyze the resulting subnetworks across various tasks to locate where each task is encoded, and we find that lower-level tasks are captured in lower layers, reproducing similar findings in past work. 2021.naacl-main.74 @@ -1023,7 +1023,7 @@ An Empirical Comparison of Instance Attribution Methods for <fixed-case>NLP</fixed-case> PouyaPezeshkpour SarthakJain - ByronWallace + ByronWallace SameerSingh 967–975 Widespread adoption of deep models has motivated a pressing need for approaches to interpret network outputs and to facilitate model debugging. Instance attribution methods constitute one means of accomplishing these goals by retrieving training instances that (may have) led to a particular prediction. Influence functions (IF; Koh and Liang 2017) provide machinery for doing this by quantifying the effect that perturbing individual train instances would have on a specific test prediction. However, even approximating the IF is computationally expensive, to the degree that may be prohibitive in many cases. Might simpler approaches (e.g., retrieving train examples most similar to a given test point) perform comparably? In this work, we evaluate the degree to which different potential instance attribution agree with respect to the importance of training samples. We find that simple retrieval methods yield training instances that differ from those identified via gradient-based methods (such as IFs), but that nonetheless exhibit desirable characteristics similar to more complex attribution methods. Code for all methods and experiments in this paper is available at: https://github.com/successar/instance_attributions_NLP. @@ -1151,7 +1151,7 @@ HaomingJiang WendiRen TuoZhao - ChaoZhang + ChaoZhang 1063–1077 Fine-tuned pre-trained language models (LMs) have achieved enormous success in many natural language processing (NLP) tasks, but they still require excessive labeled data in the fine-tuning stage. We study the problem of fine-tuning pre-trained LMs using only weak supervision, without any labeled data. This problem is challenging because the high capacity of LMs makes them prone to overfitting the noisy labels generated by weak supervision. To address this problem, we develop a contrastive self-training framework, COSINE, to enable fine-tuning LMs with weak supervision. Underpinned by contrastive regularization and confidence-based reweighting, our framework gradually improves model fitting while effectively suppressing error propagation. Experiments on sequence, token, and sentence pair classification tasks show that our model outperforms the strongest baseline by large margins and achieves competitive performance with fully-supervised fine-tuning methods. Our implementation is available on https://github.com/yueyu1030/COSINE. 2021.naacl-main.84 @@ -1201,8 +1201,8 @@ <fixed-case>DR</fixed-case>e<fixed-case>C</fixed-case>a: A General Task Augmentation Strategy for Few-Shot Natural Language Inference ShikharMurty - Tatsunori B.Hashimoto - ChristopherManning + Tatsunori B.Hashimoto + ChristopherManning 1113–1125 Meta-learning promises few-shot learners that can adapt to new distributions by repurposing knowledge acquired from previous training. However, we believe meta-learning has not yet succeeded in NLP due to the lack of a well-defined task distribution, leading to attempts that treat datasets as tasks. Such an ad hoc task distribution causes problems of quantity and quality. Since there’s only a handful of datasets for any NLP problem, meta-learners tend to overfit their adaptation mechanism and, since NLP datasets are highly heterogeneous, many learning episodes have poor transfer between their support and query sets, which discourages the meta-learner from adapting. To alleviate these issues, we propose DReCA (Decomposing datasets into Reasoning Categories), a simple method for discovering and using latent reasoning categories in a dataset, to form additional high quality tasks. DReCA works by splitting examples into label groups, embedding them with a finetuned BERT model and then clustering each group into reasoning categories. Across four few-shot NLI problems, we demonstrate that using DReCA improves the accuracy of meta-learners by 1.5-4% 2021.naacl-main.88 @@ -1215,7 +1215,7 @@ XavierGarcia AdityaSiddhant OrhanFirat - AnkurParikh + AnkurParikh 1126–1137 Unsupervised translation has reached impressive performance on resource-rich language pairs such as English-French and English-German. However, early studies have shown that in more realistic settings involving low-resource, rare languages, unsupervised translation performs poorly, achieving less than 3.0 BLEU. In this work, we show that multilinguality is critical to making unsupervised systems practical for low-resource settings. In particular, we present a single model for 5 low-resource languages (Gujarati, Kazakh, Nepali, Sinhala, and Turkish) to and from English directions, which leverages monolingual and auxiliary parallel data from other high-resource language pairs via a three-stage training scheme. We outperform all current state-of-the-art unsupervised baselines for these languages, achieving gains of up to 14.4 BLEU. Additionally, we outperform strong supervised baselines for various language pairs as well as match the performance of the current state-of-the-art supervised model for Nepali-English. We conduct a series of ablation studies to establish the robustness of our model under different degrees of data quality, as well as to analyze the factors which led to the superior performance of the proposed approach over traditional unsupervised models. 2021.naacl-main.89 @@ -1265,7 +1265,7 @@ Towards Continual Learning for Multilingual Machine Translation via Vocabulary Substitution XavierGarcia NoahConstant - AnkurParikh + AnkurParikh OrhanFirat 1184–1192 We propose a straightforward vocabulary adaptation scheme to extend the language capacity of multilingual machine translation models, paving the way towards efficient continual learning for multilingual machine translation. Our approach is suitable for large-scale datasets, applies to distant languages with unseen scripts, incurs only minor degradation on the translation performance for the original language pairs and provides competitive performance even in the case where we only possess monolingual data for the new languages. @@ -1300,13 +1300,13 @@ Capturing Row and Column Semantics in Transformer Based Question Answering over Tables - MichaelGlass + MichaelGlass MustafaCanim - AlfioGliozzo + AlfioGliozzo SaneemChemmengath VishwajeetKumar RishavChakravarti - AviSil + AviSil FeifeiPan SamarthBharadwaj Nicolas RodolfoFauceglia @@ -1359,7 +1359,7 @@ SrinivasanIyer SewonMin YasharMehdad - Wen-tauYih + Wen-tauYih 1280–1287 State-of-the-art Machine Reading Comprehension (MRC) models for Open-domain Question Answering (QA) are typically trained for span selection using distantly supervised positive examples and heuristically retrieved negative examples. This training scheme possibly explains empirical observations that these models achieve a high recall amongst their top few predictions, but a low overall accuracy, motivating the need for answer re-ranking. We develop a successful re-ranking approach (RECONSIDER) for span-extraction tasks that improves upon the performance of MRC models, even beyond large-scale pre-training. RECONSIDER is trained on positive and negative examples extracted from high confidence MRC model predictions, and uses in-passage span annotations to perform span-focused re-ranking over a smaller candidate set. As a result, RECONSIDER learns to eliminate close false positives, achieving a new extractive state of the art on four QA tasks, with 45.5% Exact Match accuracy on Natural Questions with real user questions, and 61.7% on TriviaQA. We will release all related data, models, and code. 2021.naacl-main.100 @@ -1400,7 +1400,7 @@ RaymondLi DzmitryBahdanau Harmde Vries - ChrisPal + ChrisPal 1313–1321 Recent neural text-to-SQL models can effectively translate natural language questions to corresponding SQL queries on unseen databases. Working mostly on the Spider dataset, researchers have proposed increasingly sophisticated solutions to the problem. Contrary to this trend, in this paper we focus on simplifications. We begin by building DuoRAT, a re-implementation of the state-of-the-art RAT-SQL model that unlike RAT-SQL is using only relation-aware or vanilla transformers as the building blocks. We perform several ablation experiments using DuoRAT as the baseline model. Our experiments confirm the usefulness of some techniques and point out the redundancy of others, including structural SQL features and features that link the question with the schema. 2021.naacl-main.103 @@ -1426,8 +1426,8 @@ Structure-Grounded Pretraining for Text-to-<fixed-case>SQL</fixed-case> XiangDeng - Ahmed HassanAwadallah - ChristopherMeek + Ahmed HassanAwadallah + ChristopherMeek OleksandrPolozov HuanSun MatthewRichardson @@ -1443,7 +1443,7 @@ CongyingXia WenpengYin YihaoFeng - PhilipYu + PhilipYu 1351–1360 Text classification is usually studied by labeling natural language texts with relevant categories from a predefined set. In the real world, new classes might keep challenging the existing system with limited labeled data. The system should be intelligent enough to recognize upcoming new classes with a few examples. In this work, we define a new task in the NLP domain, incremental few-shot text classification, where the system incrementally handles multiple rounds of new classes. For each round, there is a batch of new classes with a few labeled examples per class. Two major challenges exist in this new task: (i) For the learning process, the system should incrementally learn new classes round by round without re-training on the examples of preceding classes; (ii) For the performance, the system should perform well on new classes without much loss on preceding classes. In addition to formulating the new task, we also release two benchmark datasets in the incremental few-shot setting: intent classification and relation classification. Moreover, we propose two entailment approaches, ENTAILMENT and HYBRID, which show promise for solving this novel problem. 2021.naacl-main.106 @@ -1558,7 +1558,7 @@ JinlanFu LiangjingFeng QiZhang - XuanjingHuang + XuanjingHuang PengfeiLiu 1463–1475 The development of neural networks and pretraining techniques has spawned many sentence-level tagging systems that achieved superior performance on typical benchmarks. However, a relatively less discussed topic is what if more context information is introduced into current top-scoring tagging systems. Although several existing works have attempted to shift tagging systems from sentence-level to document-level, there is still no consensus conclusion about when and why it works, which limits the applicability of the larger-context approach in tagging tasks. In this paper, instead of pursuing a state-of-the-art tagging system by architectural exploration, we focus on investigating when and why the larger-context training, as a general strategy, can work. To this end, we conduct a thorough comparative study on four proposed aggregators for context information collecting and present an attribute-aided evaluation method to interpret the improvement brought by larger-context training. Experimentally, we set up a testbed based on four tagging tasks and thirteen datasets. Hopefully, our preliminary observations can deepen the understanding of larger-context training and enlighten more follow-up works on the use of contextual information. @@ -1596,7 +1596,7 @@ TaoMeng AnjieFang OlegRokhlenko - ShervinMalmasi + ShervinMalmasi 1499–1512 Named Entity Recognition (NER) remains difficult in real-world settings; current challenges include short texts (low context), emerging entities, and complex entities (e.g. movie names). Gazetteer features can help, but results have been mixed due to challenges with adding extra features, and a lack of realistic evaluation data. It has been shown that including gazetteer features can cause models to overuse or underuse them, leading to poor generalization. We propose GEMNET, a novel approach for gazetteer knowledge integration, including (1) a flexible Contextual Gazetteer Representation (CGR) encoder that can be fused with any word-level model; and (2) a Mixture-of- Experts gating network that overcomes the feature overuse issue by learning to conditionally combine the context and gazetteer features, instead of assigning them fixed weights. To comprehensively evaluate our approaches, we create 3 large NER datasets (24M tokens) reflecting current challenges. In an uncased setting, our methods show large gains (up to +49% F1) in recognizing difficult entities compared to existing baselines. On standard benchmarks, we achieve a new uncased SOTA on CoNLL03 and WNUT17. 2021.naacl-main.118 @@ -1626,7 +1626,7 @@ ChaeHunPark EugeneJang WonsukYang - JongPark + JongPark 1525–1534 Evaluating the quality of responses generated by open-domain conversation systems is a challenging task. This is partly because there can be multiple appropriate responses to a given dialogue history. Reference-based metrics that rely on comparisons to a set of known correct responses often fail to account for this variety, and consequently correlate poorly with human judgment. To address this problem, researchers have investigated the possibility of assessing response quality without using a set of known correct responses. RUBER demonstrated that an automatic response evaluation model could be made using unsupervised learning for the next-utterance prediction (NUP) task. For the unsupervised learning of such model, we propose a method of manipulating a golden response to create a new negative response that is designed to be inappropriate within the context while maintaining high similarity with the original golden response. We find, from our experiments on English datasets, that using the negative samples generated by our method alongside random negative samples can increase the model’s correlation with human evaluations. The process of generating such negative samples is automated and does not rely on human annotation. 2021.naacl-main.120 @@ -1653,7 +1653,7 @@ TaesukHong ByoungjaeKim YoungjoongKo - JungyunSeo + JungyunSeo 1549–1558 Retrieval-based dialogue systems display an outstanding performance when pre-trained language models are used, which includes bidirectional encoder representations from transformers (BERT). During the multi-turn response selection, BERT focuses on training the relationship between the context with multiple utterances and the response. However, this method of training is insufficient when considering the relations between each utterance in the context. This leads to a problem of not completely understanding the context flow that is required to select a response. To address this issue, we propose a new fine-grained post-training method that reflects the characteristics of the multi-turn dialogue. Specifically, the model learns the utterance level interactions by training every short context-response pair in a dialogue session. Furthermore, by using a new training objective, the utterance relevance classification, the model understands the semantic relevance and coherence between the dialogue utterances. Experimental results show that our model achieves new state-of-the-art with significant margins on three benchmark datasets. This suggests that the fine-grained post-training method is highly effective for the response selection task. 2021.naacl-main.122 @@ -1681,14 +1681,14 @@ Adding Chit-Chat to Enhance Task-Oriented Dialogues KaiSun SeungwhanMoon - PaulCrook + PaulCrook StephenRoller BeckaSilvert BingLiu ZhiguangWang HongleiLiu EunjoonCho - ClaireCardie + ClaireCardie 1570–1583 Existing dialogue corpora and models are typically designed under two disjoint motives: while task-oriented systems focus on achieving functional goals (e.g., booking hotels), open-domain chatbots aim at making socially engaging conversations. In this work, we propose to integrate both types of systems by Adding Chit-Chat to ENhance Task-ORiented dialogues (ACCENTOR), with the goal of making virtual assistant conversations more engaging and interactive. Specifically, we propose a Human <-> AI collaborative data collection approach for generating diverse chit-chat responses to augment task-oriented dialogues with minimal annotation effort. We then present our new chit-chat-based annotations to 23.8K dialogues from two popular task-oriented datasets (Schema-Guided Dialogue and MultiWOZ 2.1) and demonstrate their advantage over the originals via human evaluation. Lastly, we propose three new models for adding chit-chat to task-oriented dialogues, explicitly trained to predict user goals and to generate contextually relevant chit-chat responses. Automatic and human evaluations show that, compared with the state-of-the-art task-oriented baseline, our models can code-switch between task and chit-chat to be more engaging, interesting, knowledgeable, and humanlike, while maintaining competitive task performance. 2021.naacl-main.124 @@ -1699,7 +1699,7 @@ Incorporating Syntax and Semantics in Coreference Resolution with Heterogeneous Graph Attention Network FanJiang - TrevorCohn + TrevorCohn 1584–1591 External syntactic and semantic information has been largely ignored by existing neural coreference resolution models. In this paper, we present a heterogeneous graph-based model to incorporate syntactic and semantic structures of sentences. The proposed graph contains a syntactic sub-graph where tokens are connected based on a dependency tree, and a semantic sub-graph that contains arguments and predicates as nodes and semantic role labels as edges. By applying a graph attention network, we can obtain syntactically and semantically augmented word representation, which can be integrated using an attentive integration layer and gating mechanism. Experiments on the OntoNotes 5.0 benchmark show the effectiveness of our proposed model. 2021.naacl-main.125 @@ -1726,7 +1726,7 @@ NaokiKobayashi TsutomuHirao HidetakaKamigaito - ManabuOkumura + ManabuOkumura MasaakiNagata 1600–1612 Most of the previous Rhetorical Structure Theory (RST) parsing methods are based on supervised learning such as neural networks, that require an annotated corpus of sufficient size and quality. However, the RST Discourse Treebank (RST-DT), the benchmark corpus for RST parsing in English, is small due to the costly annotation of RST trees. The lack of large annotated training data causes poor performance especially in relation labeling. Therefore, we propose a method for improving neural RST parsing models by exploiting silver data, i.e., automatically annotated data. We create large-scale silver data from an unlabeled corpus by using a state-of-the-art RST parser. To obtain high-quality silver data, we extract agreement subtrees from RST trees for documents built using the RST parsers. We then pre-train a neural RST parser with the obtained silver data and fine-tune it on the RST-DT. Experimental results show that our method achieved the best micro-F1 scores for Nuclearity and Relation at 75.0 and 63.2, respectively. Furthermore, we obtained a remarkable gain in the Relation score, 3.0 points, against the previous state-of-the-art parser. @@ -1739,7 +1739,7 @@ <fixed-case>RST</fixed-case> Parsing from Scratch Thanh-TungNguyen Xuan-PhiNguyen - ShafiqJoty + ShafiqJoty XiaoliLi 1613–1625 We introduce a novel top-down end-to-end formulation of document level discourse parsing in the Rhetorical Structure Theory (RST) framework. In this formulation, we consider discourse parsing as a sequence of splitting decisions at token boundaries and use a seq2seq network to model the splitting decisions. Our framework facilitates discourse parsing from scratch without requiring discourse segmentation as a prerequisite; rather, it yields segmentation as part of the parsing process. Our unified parsing model adopts a beam search to decode the best tree structure by searching through a space of high scoring trees. With extensive experiments on the standard RST discourse treebank, we demonstrate that our parser outperforms existing methods by a good margin in both end-to-end parsing and parsing with gold segmentation. More importantly, it does so without using any handcrafted features, making it faster and easily adaptable to new languages and domains. @@ -1765,7 +1765,7 @@ Evaluating the Impact of a Hierarchical Discourse Representation on Entity Coreference Resolution Performance SopanKhosla JamesFiacco - CarolynRosé + CarolynRosé 1645–1651 Recent work on entity coreference resolution (CR) follows current trends in Deep Learning applied to embeddings and relatively simple task-related features. SOTA models do not make use of hierarchical representations of discourse structure. In this work, we leverage automatically constructed discourse parse trees within a neural approach and demonstrate a significant improvement on two benchmark entity coreference-resolution datasets. We explore how the impact varies depending upon the type of mention. 2021.naacl-main.130 @@ -1835,7 +1835,7 @@ JianJiao NanDuan RuofeiZhang - XuanjingHuang + XuanjingHuang 1692–1701 Transformer is an attention-based neural network, which consists of two sublayers, namely, Self-Attention Network (SAN) and Feed-Forward Network (FFN). Existing research explores to enhance the two sublayers separately to improve the capability of Transformer for text representation. In this paper, we present a novel understanding of SAN and FFN as Mask Attention Networks (MANs) and show that they are two special cases of MANs with static mask matrices. However, their static mask matrices limit the capability for localness modeling in text representation learning. We therefore introduce a new layer named dynamic mask attention network (DMAN) with a learnable mask matrix which is able to model localness adaptively. To incorporate advantages of DMAN, SAN, and FFN, we propose a sequential layered structure to combine the three types of layers. Extensive experiments on various tasks, including neural machine translation and text summarization demonstrate that our model outperforms the original Transformer. 2021.naacl-main.135 @@ -1878,7 +1878,7 @@ IanPorada KaheerSuleman AdamTrischler - Jackie Chi KitCheung + Jackie Chi KitCheung 1732–1743 Understanding natural language requires common sense, one aspect of which is the ability to discern the plausibility of events. While distributional models—most recently pre-trained, Transformer language models—have demonstrated improvements in modeling event plausibility, their performance still falls short of humans’. In this work, we show that Transformer-based plausibility models are markedly inconsistent across the conceptual classes of a lexical hierarchy, inferring that “a person breathing” is plausible while “a dentist breathing” is not, for example. We find this inconsistency persists even when models are softly injected with lexical knowledge, and we present a simple post-hoc method of forcing model consistency that improves correlation with human plausibility judgements. 2021.naacl-main.138 @@ -2177,7 +2177,7 @@ PrabirMallick SangameshwarPatil IndrajitBhattacharya - GirishPalshikar + GirishPalshikar 1996–2005 Given the diversity of the candidates and complexity of job requirements, and since interviewing is an inherently subjective process, it is an important task to ensure consistent, uniform, efficient and objective interviews that result in high quality recruitment. We propose an interview assistant system to automatically, and in an objective manner, select an optimal set of technical questions (from question banks) personalized for a candidate. This set can help a human interviewer to plan for an upcoming interview of that candidate. We formalize the problem of selecting a set of questions as an integer linear programming problem and use standard solvers to get a solution. We use knowledge graph as background knowledge in this formulation, and derive our objective functions and constraints from it. We use candidate’s resume to personalize the selection of questions. We propose an intrinsic evaluation to compare a set of suggested questions with actually asked questions. We also use expert interviewers to comparatively evaluate our approach with a set of reasonable baselines. 2021.naacl-main.160 @@ -2318,7 +2318,7 @@ SeunghyunYoon FranckDernoncourt Doo SoonKim - TrungBui + TrungBui JoongboShin KyominJung 2105–2115 @@ -2334,7 +2334,7 @@ YiweiLyu Paul PuLiang HaiPham - EduardHovy + EduardHovy BarnabásPóczos RuslanSalakhutdinov Louis-PhilippeMorency @@ -2377,7 +2377,7 @@ Framing Unpacked: A Semi-Supervised Interpretable Multi-View Model of Media Frames ShimaKhanehzar - TrevorCohn + TrevorCohn GosiaMikolajczak AndrewTurpin LeaFrermann @@ -2392,7 +2392,7 @@ Automatic Classification of Neutralization Techniques in the Narrative of Climate Change Scepticism ShraeyBhatia Jey HanLau - TimothyBaldwin + TimothyBaldwin 2167–2175 Neutralisation techniques, e.g. denial of responsibility and denial of victim, are used in the narrative of climate change scepticism to justify lack of action or to promote an alternative view. We first draw on social science to introduce the problem to the community of nlp, present the granularity of the coding schema and then collect manual annotations of neutralised techniques in text relating to climate change, and experiment with supervised and semi- supervised BERT-based models. 2021.naacl-main.175 @@ -2404,7 +2404,7 @@ Suicide Ideation Detection via Social and Temporal User Representations using Hyperbolic Learning RamitSawhney HarshitJoshi - Rajiv RatnShah + Rajiv RatnShah LucieFlek 2176–2190 Recent psychological studies indicate that individuals exhibiting suicidal ideation increasingly turn to social media rather than mental health practitioners. Personally contextualizing the buildup of such ideation is critical for accurate identification of users at risk. In this work, we propose a framework jointly leveraging a user’s emotional history and social information from a user’s neighborhood in a network to contextualize the interpretation of the latest tweet of a user on Twitter. Reflecting upon the scale-free nature of social network relationships, we propose the use of Hyperbolic Graph Convolution Networks, in combination with the Hawkes process to learn the historical emotional spectrum of a user in a time-sensitive manner. Our system significantly outperforms state-of-the-art methods on this task, showing the benefits of both socially and personally contextualized representations. @@ -2419,7 +2419,7 @@ AndreaCeolin IknoorSingh NiyatiChhaya - LyleUngar + LyleUngar 2191–2200 This study introduces and analyzes WikiTalkEdit, a dataset of conversations and edit histories from Wikipedia, for research in online cooperation and conversation modeling. The dataset comprises dialog triplets from the Wikipedia Talk pages, and editing actions on the corresponding articles being discussed. We show how the data supports the classic understanding of style matching, where positive emotion and the use of first-person pronouns predict a positive emotional change in a Wikipedia contributor. However, they do not predict editorial behavior. On the other hand, feedback invoking evidentiality and criticism, and references to Wikipedia’s community norms, is more likely to persuade the contributor to perform edits but is less likely to lead to a positive emotion. We developed baseline classifiers trained on pre-trained RoBERTa features that can predict editorial change with an F1 score of .54, as compared to an F1 score of .66 for predicting emotional change. A diagnostic analysis of persisting errors is also provided. We conclude with possible applications and recommendations for future work. The dataset is publicly available for the research community at https://github.com/kj2013/WikiTalkEdit/. 2021.naacl-main.177 @@ -2508,7 +2508,7 @@ Learning to Recognize Dialect Features DorottyaDemszky DevyaniSharma - JonathanClark + JonathanClark VinodkumarPrabhakaran JacobEisenstein 2315–2338 @@ -2521,7 +2521,7 @@ It’s Not Just Size That Matters: Small Language Models Are Also Few-Shot Learners TimoSchick - HinrichSchütze + HinrichSchütze 2339–2352 When scaled to hundreds of billions of parameters, pretrained language models such as GPT-3 (Brown et al., 2020) achieve remarkable few-shot performance. However, enormous amounts of compute are required for training and applying such big models, resulting in a large carbon footprint and making it difficult for researchers and practitioners to use them. We show that performance similar to GPT-3 can be obtained with language models that are much “greener” in that their parameter count is several orders of magnitude smaller. This is achieved by converting textual inputs into cloze questions that contain a task description, combined with gradient-based optimization; exploiting unlabeled data gives further improvements. We identify key factors required for successful natural language understanding with small language models. 2021.naacl-main.185 @@ -2534,7 +2534,7 @@ Static Embeddings as Efficient Knowledge Bases? PhilippDufter NoraKassner - HinrichSchütze + HinrichSchütze 2353–2363 Recent research investigates factual knowledge stored in large pretrained language models (PLMs). Instead of structural knowledge base (KB) queries, masked sentences such as “Paris is the capital of [MASK]” are used as probes. The good performance on this analysis task has been interpreted as PLMs becoming potential repositories of factual knowledge. In experiments across ten linguistically diverse languages, we study knowledge contained in static embeddings. We show that, when restricting the output space to a candidate set, simple nearest neighbor matching using static embeddings performs better than PLMs. E.g., static embeddings perform 1.6% points better than BERT while just using 0.3% of energy for training. One important factor in their good comparative performance is that static embeddings are standardly learned for a large vocabulary. In contrast, BERT exploits its more sophisticated, but expensive ability to compose meaningful representations from a much smaller subword vocabulary. 2021.naacl-main.186 @@ -2652,7 +2652,7 @@ JunjieHu GrahamNeubig FlorianMetze - AlexanderHauptmann + AlexanderHauptmann 2443–2459 This paper studies zero-shot cross-lingual transfer of vision-language models. Specifically, we focus on multilingual text-to-video search and propose a Transformer-based model that learns contextual multilingual multimodal embeddings. Under a zero-shot setting, we empirically demonstrate that performance degrades significantly when we query the multilingual text-video model with non-English sentences. To address this problem, we introduce a multilingual multimodal pre-training strategy, and collect a new multilingual instructional video dataset (Multi-HowTo100M) for pre-training. Experiments on VTT show that our method significantly improves video search in non-English languages without additional annotations. Furthermore, when multilingual annotations are available, our method outperforms recent baselines by a large margin in multilingual text-to-video search on VTT and VATEX; as well as in multilingual text-to-image search on Multi30K. Our model and Multi-HowTo100M is available at http://github.com/berniebear/Multi-HT100M. 2021.naacl-main.195 @@ -2683,7 +2683,7 @@ AlanRamponi Siti OryzaKhairunnisa MamoruKomachi - BarbaraPlank + BarbaraPlank 2479–2497 The lack of publicly available evaluation data for low-resource languages limits progress in Spoken Language Understanding (SLU). As key tasks like intent classification and slot filling require abundant training data, it is desirable to reuse existing data in high-resource languages to develop models for low-resource scenarios. We introduce xSID, a new benchmark for cross-lingual (x) Slot and Intent Detection in 13 languages from 6 language families, including a very low-resource dialect. To tackle the challenge, we propose a joint learning approach, with English SLU training data and non-English auxiliary tasks from raw text, syntax and translation for transfer. We study two setups which differ by type and language coverage of the pre-trained embeddings. Our results show that jointly learning the main tasks with masked language modeling is effective for slots, while machine translation transfer works best for intent classification. 2021.naacl-main.197 @@ -2731,7 +2731,7 @@ VladimirKarpukhin JeanMaillard VassilisPlachouras - TimRocktäschel + TimRocktäschel SebastianRiedel 2523–2544 Challenging problems such as open-domain question answering, fact checking, slot filling and entity linking require access to large, external knowledge sources. While some models do well on individual tasks, developing general models is difficult as each task might require computationally expensive indexing of custom knowledge sources, in addition to dedicated infrastructure. To catalyze research on models that condition on specific information in large textual resources, we present a benchmark for knowledge-intensive language tasks (KILT). All tasks in KILT are grounded in the same snapshot of Wikipedia, reducing engineering turnaround through the re-use of components, as well as accelerating research into task-agnostic memory architectures. We test both task-specific and general baselines, evaluating downstream performance in addition to the ability of the models to provide provenance. We find that a shared dense vector index coupled with a seq2seq model is a strong baseline, outperforming more tailor-made approaches for fact checking, open-domain question answering and dialogue, and yielding competitive results on entity linking and slot filling, by generating disambiguated text. KILT data and code are available at https://github.com/facebookresearch/KILT. @@ -2784,9 +2784,9 @@ TommasoFornaciari AlexandraUma SilviuPaun - BarbaraPlank + BarbaraPlank DirkHovy - MassimoPoesio + MassimoPoesio 2591–2597 Supervised learning assumes that a ground truth label exists. However, the reliability of this ground truth depends on human annotators, who often disagree. Prior work has shown that this disagreement can be helpful in training models. We propose a novel method to incorporate this disagreement as information: in addition to the standard error computation, we use soft-labels (i.e., probability distributions over the annotator labels) as an auxiliary task in a multi-task neural network. We measure the divergence between the predictions and the target soft-labels with several loss-functions and evaluate the models on various NLP tasks. We find that the soft-label prediction auxiliary task reduces the penalty for errors on ambiguous entities, and thereby mitigates overfitting. It significantly improves performance across tasks, beyond the standard approach and prior work. 2021.naacl-main.204 @@ -2810,11 +2810,11 @@ Variance-reduced First-order Meta-learning for Natural Language Processing Tasks - LingxiaoWang + LingxiaoWang KevinHuang TengyuMa QuanquanGu - JingHuang + JingHuang 2609–2615 First-order meta-learning algorithms have been widely used in practice to learn initial model parameters that can be quickly adapted to new tasks due to their efficiency and effectiveness. However, existing studies find that meta-learner can overfit to some specific adaptation when we have heterogeneous tasks, leading to significantly degraded performance. In Natural Language Processing (NLP) applications, datasets are often diverse and each task has its unique characteristics. Therefore, to address the overfitting issue when applying first-order meta-learning to NLP applications, we propose to reduce the variance of the gradient estimator used in task adaptation. To this end, we develop a variance-reduced first-order meta-learning algorithm. The core of our algorithm is to introduce a novel variance reduction term to the gradient estimation when performing the task adaptation. Experiments on two NLP applications: few-shot text classification and multi-domain dialog state tracking demonstrate the superior performance of our proposed method. 2021.naacl-main.206 @@ -2827,7 +2827,7 @@ TianzeShi AdrianBenton IgorMalioutov - Ozanİrsoy + Ozanİrsoy 2616–2626 While the predictive performance of modern statistical dependency parsers relies heavily on the availability of expensive expert-annotated treebank data, not all annotations contribute equally to the training of the parsers. In this paper, we attempt to reduce the number of labeled examples needed to train a strong dependency parser using batch active learning (AL). In particular, we investigate whether enforcing diversity in the sampled batches, using determinantal point processes (DPPs), can improve over their diversity-agnostic counterparts. Simulation experiments on an English newswire corpus show that selecting diverse batches with DPPs is superior to strong selection strategies that do not enforce batch diversity, especially during the initial stages of the learning process. Additionally, our diversity-aware strategy is robust under a corpus duplication setting, where diversity-agnostic sampling strategies exhibit significant degradation. 2021.naacl-main.207 @@ -2838,7 +2838,7 @@ How many data points is a prompt worth? TevenLe Scao - AlexanderRush + AlexanderRush 2627–2636 When fine-tuning pretrained models for classification, researchers either use a generic model head or a task-specific prompt for prediction. Proponents of prompting have argued that prompts provide a method for injecting task-specific guidance, which is beneficial in low-data regimes. We aim to quantify this benefit through rigorous testing of prompts in a fair setting: comparing prompted and head-based fine-tuning in equal conditions across many tasks and data sizes. By controlling for many sources of advantage, we find that prompting does indeed provide a benefit, and that this benefit can be quantified per task. Results show that prompting is often worth 100s of data points on average across classification tasks. 2021.naacl-main.208 @@ -2863,7 +2863,7 @@ Smoothing and Shrinking the Sparse <fixed-case>S</fixed-case>eq2<fixed-case>S</fixed-case>eq Search Space BenPeters - André F. T.Martins + André F. T.Martins 2642–2654 Current sequence-to-sequence models are trained to minimize cross-entropy and use softmax to compute the locally normalized probabilities over target sequences. While this setup has led to strong results in a variety of tasks, one unsatisfying aspect is its length bias: models give high scores to short, inadequate hypotheses and often make the empty string the argmax—the so-called cat got your tongue problem. Recently proposed entmax-based sparse sequence-to-sequence models present a possible solution, since they can shrink the search space by assigning zero probability to bad hypotheses, but their ability to handle word-level tasks with transformers has never been tested. In this work, we show that entmax-based models effectively solve the cat got your tongue problem, removing a major source of model error for neural machine translation. In addition, we generalize label smoothing, a critical regularization technique, to the broader family of Fenchel-Young losses, which includes both cross-entropy and the entmax losses. Our resulting label-smoothed entmax loss models set a new state of the art on multilingual grapheme-to-phoneme conversion and deliver improvements and better calibration properties on cross-lingual morphological inflection and machine translation for 7 language pairs. 2021.naacl-main.210 @@ -2873,7 +2873,7 @@ Unified Pre-training for Program Understanding and Generation - WasiAhmad + WasiAhmad SaikatChakraborty BaishakhiRay Kai-WeiChang @@ -2923,7 +2923,7 @@ Semantic Frame Forecast Chieh-YangHuang - Ting-HaoHuang + Ting-HaoHuang 2702–2713 This paper introduces Semantic Frame Forecast, a task that predicts the semantic frames that will occur in the next 10, 100, or even 1,000 sentences in a running story. Prior work focused on predicting the immediate future of a story, such as one to a few sentences ahead. However, when novelists write long stories, generating a few sentences is not enough to help them gain high-level insight to develop the follow-up story. In this paper, we formulate a long story as a sequence of “story blocks,” where each block contains a fixed number of sentences (e.g., 10, 100, or 200). This formulation allows us to predict the follow-up story arc beyond the scope of a few sentences. We represent a story block using the term frequencies (TF) of semantic frames in it, normalized by each frame’s inverse document frequency (IDF). We conduct semantic frame forecast experiments on 4,794 books from the Bookcorpus and 7,962 scientific abstracts from CODA-19, with block sizes ranging from 5 to 1,000 sentences. The results show that automated models can forecast the follow-up story blocks better than the random, prior, and replay baselines, indicating the feasibility of the task. We also learn that the models using the frame representation as features outperform all the existing approaches when the block size is over 150 sentences. The human evaluation also shows that the proposed frame representation, when visualized as word clouds, is comprehensible, representative, and specific to humans. 2021.naacl-main.215 @@ -2937,7 +2937,7 @@ MichalisPapakostas MihaiBurzo MohamedAbouelenien - RadaMihalcea + RadaMihalcea 2714–2725 The capability to automatically detect human stress can benefit artificial intelligent agents involved in affective computing and human-computer interaction. Stress and emotion are both human affective states, and stress has proven to have important implications on the regulation and expression of emotion. Although a series of methods have been established for multimodal stress detection, limited steps have been taken to explore the underlying inter-dependence between stress and emotion. In this work, we investigate the value of emotion recognition as an auxiliary task to improve stress detection. We propose MUSER – a transformer-based model architecture and a novel multi-task learning algorithm with speed-based dynamic sampling strategy. Evaluation on the Multimodal Stressed Emotion (MuSE) dataset shows that our model is effective for stress detection with both internal and external auxiliary tasks, and achieves state-of-the-art results. 2021.naacl-main.216 @@ -2950,7 +2950,7 @@ YiZhang Sujay KumarJauhar JuliaKiseleva - RyenWhite + RyenWhite DanRoth 2726–2735 People rely on digital task management tools, such as email or to-do apps, to manage their tasks. Some of these tasks are large and complex, leading to action paralysis and feelings of being overwhelmed on the part of the user. The micro-productivity literature has shown that such tasks could benefit from being decomposed and organized, in order to reduce user cognitive load. Thus in this paper, we propose a novel end-to-end pipeline that consumes a complex task and induces a dependency graph from unstructured text to represent sub-tasks and their relationships. Our solution first finds nodes for sub-tasks from multiple ‘how-to’ articles on the web by injecting a neural text generator with three key desiderata – relevance, abstraction, and consensus. Then we resolve and infer edges between these subtask nodes by learning task dependency relations. We collect a new dataset of complex tasks with their sub-task graph to develop and evaluate our solutions. Both components of our graph induction solution are evaluated in experiments, demonstrating that our models outperform a state-of-the-art text generator significantly. Our generalizable and scalable end-to-end solution has important implications for boosting user productivity and assisting with digital task management. @@ -2989,7 +2989,7 @@ Learning to Synthesize Data for Semantic Parsing BailinWang WenpengYin - Xi VictoriaLin + Xi VictoriaLin CaimingXiong 2760–2766 Synthesizing data for semantic parsing has gained increasing attention recently. However, most methods require handcrafted (high-precision) rules in their generative process, hindering the exploration of diverse unseen data. In this work, we propose a generative model which features a (non-neural) PCFG that models the composition of programs (e.g., SQL), and a BART-based translation model that maps a program to an utterance. Due to the simplicity of PCFG and pre-trained BART, our generative model can be efficiently learned from existing data at hand. Moreover, explicitly modeling compositions using PCFG leads to better exploration of unseen programs, thus generate more diverse data. We evaluate our method in both in-domain and out-of-domain settings of text-to-SQL parsing on the standard benchmarks of GeoQuery and Spider, respectively. Our empirical results show that the synthesized data generated from our model can substantially help a semantic parser achieve better compositional and domain generalization. @@ -3029,7 +3029,7 @@ RomaPatel MartaGarnelo IanGemp - ChrisDyer + ChrisDyer YoramBachrach 2789–2798 The input vocabulary and the representations learned are crucial to the performance of neural NLP models. Using the full vocabulary results in less explainable and more memory intensive models, with the embedding layer often constituting the majority of model parameters. It is thus common to use a smaller vocabulary to lower memory requirements and construct more interpertable models. We propose a vocabulary selection method that views words as members of a team trying to maximize the model’s performance. We apply power indices from cooperative game theory, including the Shapley value and Banzhaf index, that measure the relative importance of individual team members in accomplishing a joint task. We approximately compute these indices to identify the most influential words. Our empirical evaluation examines multiple NLP tasks, including sentence and document classification, question answering and textual entailment. We compare to baselines that select words based on frequency, TF-IDF and regression coefficients under L1 regularization, and show that this game-theoretic vocabulary selection outperforms all baseline on a range of different tasks and datasets. @@ -3111,7 +3111,7 @@ PengQi GuangtaoWang RexYing - JingHuang + JingHuang XiaodongHe BowenZhou 2884–2894 @@ -3125,7 +3125,7 @@ Emotion-Infused Models for Explainable Psychological Stress Detection ElsbethTurcan SmarandaMuresan - KathleenMcKeown + KathleenMcKeown 2895–2909 The problem of detecting psychological stress in online posts, and more broadly, of detecting people in distress or in need of help, is a sensitive application for which the ability to interpret models is vital. Here, we present work exploring the use of a semantically related task, emotion detection, for equally competent but more explainable and human-like psychological stress detection as compared to a black-box model. In particular, we explore the use of multi-task learning as well as emotion-based language model fine-tuning. With our emotion-infused models, we see comparable results to state-of-the-art BERT. Our analysis of the words used for prediction show that our emotion-infused models mirror psychological components of stress. 2021.naacl-main.230 @@ -3171,7 +3171,7 @@ Learning Syntax from Naturally-Occurring Bracketings TianzeShi - Ozanİrsoy + Ozanİrsoy IgorMalioutov LillianLee 2941–2949 @@ -3255,7 +3255,7 @@ Controlling Dialogue Generation with Semantic Exemplars PrakharGupta - JeffreyBigham + JeffreyBigham YuliaTsvetkov AmyPavel 3018–3029 @@ -3446,7 +3446,7 @@ News Headline Grouping as a Challenging <fixed-case>NLU</fixed-case> Task PhilippeLaban LucasBandarkar - Marti A.Hearst + Marti A.Hearst 3186–3198 Recent progress in Natural Language Understanding (NLU) has seen the latest models outperform human performance on many standard tasks. These impressive results have led the community to introspect on dataset limitations, and iterate on more nuanced challenges. In this paper, we introduce the task of HeadLine Grouping (HLG) and a corresponding dataset (HLGD) consisting of 20,056 pairs of news headlines, each labeled with a binary judgement as to whether the pair belongs within the same group. On HLGD, human annotators achieve high performance of around 0.9 F-1, while current state-of-the art Transformer models only reach 0.75 F-1, opening the path for further improvements. We further propose a novel unsupervised Headline Generator Swap model for the task of HeadLine Grouping that achieves within 3 F-1 of the best supervised model. Finally, we analyze high-performing models with consistency tests, and find that models are not consistent in their predictions, revealing modeling limits of current architectures. 2021.naacl-main.255 @@ -3459,7 +3459,7 @@ EleftheriaBriakou DiLu KeZhang - JoelTetreault + JoelTetreault 3199–3216 We take the first step towards multilingual style transfer by creating and releasing XFORMAL, a benchmark of multiple formal reformulations of informal text in Brazilian Portuguese, French, and Italian. Results on XFORMAL suggest that state-of-the-art style transfer approaches perform close to simple baselines, indicating that style transfer is even more challenging when moving multilingual. 2021.naacl-main.256 @@ -3513,7 +3513,7 @@ HaoPeng DongxiaoHe JianxinLi - PhilipYu + PhilipYu 3259–3265 The current state-of-the-art model HiAGM for hierarchical text classification has two limitations. First, it correlates each text sample with all labels in the dataset which contains irrelevant information. Second, it does not consider any statistical constraint on the label representations learned by the structure encoder, while constraints for representation learning are proved to be helpful in previous work. In this paper, we propose HTCInfoMax to address these issues by introducing information maximization which includes two modules: text-label mutual information maximization and label prior matching. The first module can model the interaction between each text sample and its ground truth labels explicitly which filters out irrelevant information. The second one encourages the structure encoder to learn better representations with desired characteristics for all labels which can better handle label imbalance in hierarchical text classification. Experimental results on two benchmark datasets demonstrate the effectiveness of the proposed HTCInfoMax. 2021.naacl-main.260 @@ -3639,7 +3639,7 @@ <fixed-case>TABBIE</fixed-case>: Pretrained Representations of Tabular Data HiroshiIida DungThai - VarunManjunatha + VarunManjunatha MohitIyyer 3446–3456 Existing work on tabular representation-learning jointly models tables and associated text using self-supervised objective functions derived from pretrained language models such as BERT. While this joint pretraining improves tasks involving paired tables and text (e.g., answering questions about tables), we show that it underperforms on tasks that operate over tables without any associated text (e.g., populating missing cells). We devise a simple pretraining objective (corrupt cell detection) that learns exclusively from tabular data and reaches the state-of-the-art on a suite of table-based prediction tasks. Unlike competing approaches, our model (TABBIE) provides embeddings of all table substructures (cells, rows, and columns), and it also requires far less compute to train. A qualitative analysis of our model’s learned cell, column, and row representations shows that it understands complex table semantics and numerical trends. @@ -3650,7 +3650,7 @@ Better Feature Integration for Named Entity Recognition - LuXu + LuXu ZhanmingJie WeiLu LidongBing @@ -3685,10 +3685,10 @@ A Context-Dependent Gated Module for Incorporating Symbolic Semantics into Event Coreference Resolution - TuanLai + TuanLai HengJi - TrungBui - Quan HungTran + TrungBui + Quan HungTran FranckDernoncourt WalterChang 3491–3499 @@ -3750,7 +3750,7 @@ Choose Your Own Adventure: Paired Suggestions in Collaborative Writing for Evaluating Story Generation Models ElizabethClark - Noah A.Smith + Noah A.Smith 3566–3575 Story generation is an open-ended and subjective task, which poses a challenge for evaluating story generation models. We present Choose Your Own Adventure, a collaborative writing setup for pairwise model evaluation. Two models generate suggestions to people as they write a short story; we ask writers to choose one of the two suggestions, and we observe which model’s suggestions they prefer. The setup also allows further analysis based on the revisions people make to the suggestions. We show that these measures, combined with automatic metrics, provide an informative picture of the models’ performance, both in cases where the differences in generation methods are small (nucleus vs. top-k sampling) and large (GPT2 vs. Fusion models). 2021.naacl-main.279 @@ -3768,7 +3768,7 @@ WenhuiWang XiaSong Xian-LingMao - HeyanHuang + HeyanHuang MingZhou 3576–3588 In this work, we present an information-theoretic framework that formulates cross-lingual language model pre-training as maximizing mutual information between multilingual-multi-granularity texts. The unified view helps us to better understand the existing methods for learning cross-lingual representations. More importantly, inspired by the framework, we propose a new pre-training task based on contrastive learning. Specifically, we regard a bilingual sentence pair as two views of the same meaning and encourage their encoded representations to be more similar than the negative examples. By leveraging both monolingual and parallel corpora, we jointly train the pretext tasks to improve the cross-lingual transferability of pre-trained models. Experimental results on several benchmarks show that our approach achieves considerably better performance. The code and pre-trained models are available at https://aka.ms/infoxlm. @@ -3793,7 +3793,7 @@ Code-Mixing on Sesame Street: Dawn of the Adversarial Polyglots SamsonTan - ShafiqJoty + ShafiqJoty 3596–3616 Multilingual models have demonstrated impressive cross-lingual transfer performance. However, test sets like XNLI are monolingual at the example level. In multilingual communities, it is common for polyglots to code-mix when conversing with each other. Inspired by this phenomenon, we present two strong black-box adversarial attacks (one word-level, one phrase-level) for multilingual models that push their ability to handle code-mixed sentences to the limit. The former uses bilingual dictionaries to propose perturbations and translations of the clean example for sense disambiguation. The latter directly aligns the clean example with its translations before extracting phrases as perturbations. Our phrase-level attack has a success rate of 89.75% against XLM-R-large, bringing its average accuracy of 79.85 down to 8.18 on XNLI. Finally, we propose an efficient adversarial training scheme that trains in the same number of steps as the original model and show that it creates more language-invariant representations, improving clean and robust accuracy in the absence of lexical overlap without degrading performance on the original examples. 2021.naacl-main.282 @@ -3808,7 +3808,7 @@ MeryemM’hamdi Doo SoonKim FranckDernoncourt - TrungBui + TrungBui XiangRen JonathanMay 3617–3632 @@ -3873,7 +3873,7 @@ PatVerga HaitianSun LivioBaldini Soares - WilliamCohen + WilliamCohen 3678–3691 Past research has demonstrated that large neural language models (LMs) encode surprising amounts of factual information: however, augmenting or modifying this information requires modifying a corpus and retraining, which is computationally expensive. To address this problem, we develop a neural LM that includes an interpretable neuro-symbolic KB in the form of a “fact memory”. Each element of the fact memory is formed from a triple of vectors, where each vector corresponds to a KB entity or relation. Our LM improves performance on knowledge-intensive question-answering tasks, sometimes dramatically, including a 27 point increase in one setting of WebQuestionsSP over a state-of-the-art open-book model, despite using 5% of the parameters. Most interestingly, we demonstrate that the model can be modified, without any re-training, by updating the fact memory. 2021.naacl-main.288 @@ -3952,7 +3952,7 @@ An Empirical Investigation of Bias in the Multimodal Analysis of Financial Earnings Calls RamitSawhney ArshiyaAggarwal - Rajiv RatnShah + Rajiv RatnShah 3751–3757 Volatility prediction is complex due to the stock market’s stochastic nature. Existing research focuses on the textual elements of financial disclosures like earnings calls transcripts to forecast stock volatility and risk, but ignores the rich acoustic features in the company executives’ speech. Recently, new multimodal approaches that leverage the verbal and vocal cues of speakers in financial disclosures significantly outperform previous state-of-the-art approaches demonstrating the benefits of multimodality and speech. However, the financial realm is still plagued with a severe underrepresentation of various communities spanning diverse demographics, gender, and native speech. While multimodal models are better risk forecasters, it is imperative to also investigate the potential bias that these models may learn from the speech signals of company executives. In this work, we present the first study to discover the gender bias in multimodal volatility prediction due to gender-sensitive audio features and fewer female executives in earnings calls of one of the world’s biggest stock indexes, the S&P 500 index. We quantitatively analyze bias as error disparity and investigate the sources of this bias. Our results suggest that multimodal neural financial models accentuate gender-based stereotypes. 2021.naacl-main.294 @@ -3993,7 +3993,7 @@ ShrimaiPrabhumoye BrendonBoldt RuslanSalakhutdinov - Alan WBlack + Alan WBlack 3784–3798 Recent work in natural language processing (NLP) has focused on ethical challenges such as understanding and mitigating bias in data and algorithms; identifying objectionable content like hate speech, stereotypes and offensive language; and building frameworks for better system design and data handling practices. However, there has been little discussion about the ethical foundations that underlie these efforts. In this work, we study one ethical theory, namely deontological ethics, from the perspective of NLP. In particular, we focus on the generalization principle and the respect for autonomy through informed consent. We provide four case studies to demonstrate how these principles can be used with NLP systems. We also recommend directions to avoid the ethical issues in these systems. 2021.naacl-main.297 @@ -4020,7 +4020,7 @@ On the Impact of Random Seeds on the Fairness of Clinical Classifiers SilvioAmir Jan-Willemvan de Meent - ByronWallace + ByronWallace 3808–3823 Recent work has shown that fine-tuning large networks is surprisingly sensitive to changes in random seed(s). We explore the implications of this phenomenon for model fairness across demographic groups in clinical prediction tasks over electronic health records (EHR) in MIMIC-III —— the standard dataset in clinical NLP research. Apparent subgroup performance varies substantially for seeds that yield similar overall performance, although there is no evidence of a trade-off between overall and subgroup performance. However, we also find that the small sample sizes inherent to looking at intersections of minority groups and somewhat rare conditions limit our ability to accurately estimate disparities. Further, we find that jointly optimizing for high overall performance and low disparities does not yield statistically significant improvements. Our results suggest that fairness work using MIMIC-III should carefully account for variations in apparent differences that may arise from stochasticity and small sample sizes. 2021.naacl-main.299 @@ -4044,7 +4044,7 @@ Discourse Probing of Pretrained Language Models FajriKoto Jey HanLau - TimothyBaldwin + TimothyBaldwin 3849–3864 Existing work on probing of pretrained language models (LMs) has predominantly focused on sentence-level syntactic tasks. In this paper, we introduce document-level discourse probing to evaluate the ability of pretrained LMs to capture document-level relations. We experiment with 7 pretrained LMs, 4 languages, and 7 discourse probing tasks, and find BART to be overall the best model at capturing discourse — but only in its encoder, with BERT performing surprisingly well as the baseline model. Across the different models, there are substantial differences in which layers best capture discourse information, and large disparities between models. 2021.naacl-main.301 @@ -4060,7 +4060,7 @@ YingceXia ShufangXie TaoQin - XinyuDai + XinyuDai Tie-YanLiu 3865–3878 Transformer architecture achieves great success in abundant natural language processing tasks. The over-parameterization of the Transformer model has motivated plenty of works to alleviate its overfitting for superior performances. With some explorations, we find simple techniques such as dropout, can greatly boost model performance with a careful design. Therefore, in this paper, we integrate different dropout techniques into the training of Transformer models. Specifically, we propose an approach named UniDrop to unites three different dropout techniques from fine-grain to coarse-grain, i.e., feature dropout, structure dropout, and data dropout. Theoretically, we demonstrate that these three dropouts play different roles from regularization perspectives. Empirically, we conduct experiments on both neural machine translation and text classification benchmark datasets. Extensive results indicate that Transformer with UniDrop can achieve around 1.5 BLEU improvement on IWSLT14 translation tasks, and better accuracy for the classification even using strong pre-trained RoBERTa as backbone. @@ -4098,7 +4098,7 @@ Double Perturbation: On the Robustness of Robustness and Counterfactual Bias Evaluation - ChongZhang + ChongZhang JieyuZhao HuanZhang Kai-WeiChang @@ -4117,7 +4117,7 @@ JatinGanhotra HuiWan ChulakaGunasekara - SachindraJoshi + SachindraJoshi YangfengJi 3917–3930 Explaining neural network models is important for increasing their trustworthiness in real-world applications. Most existing methods generate post-hoc explanations for neural network models by identifying individual feature attributions or detecting interactions between adjacent features. However, for models with text pairs as inputs (e.g., paraphrase identification), existing methods are not sufficient to capture feature interactions between two texts and their simple extension of computing all word-pair interactions between two texts is computationally inefficient. In this work, we propose the Group Mask (GMASK) method to implicitly detect word correlations by grouping correlated words from the input text pair together and measure their contribution to the corresponding NLP tasks as a whole. The proposed method is evaluated with two different model architectures (decomposable attention model and BERT) across four datasets, including natural language inference and paraphrase identification tasks. Experiments show the effectiveness of GMASK in providing faithful explanations to these models. @@ -4183,8 +4183,8 @@ RuiWang KehaiChen MasaoUtiyama - EiichiroSumita - TiejunZhao + EiichiroSumita + TiejunZhao 3975–3981 Unsupervised neural machine translation (UNMT) that relies solely on massive monolingual corpora has achieved remarkable results in several translation tasks. However, in real-world scenarios, massive monolingual corpora do not exist for some extremely low-resource languages such as Estonian, and UNMT systems usually perform poorly when there is not adequate training corpus for one language. In this paper, we first define and analyze the unbalanced training data scenario for UNMT. Based on this scenario, we propose UNMT self-training mechanisms to train a robust UNMT system and improve its performance in this case. Experimental results on several language pairs show that the proposed methods substantially outperform conventional UNMT systems. 2021.naacl-main.311 @@ -4254,7 +4254,7 @@ RamitSawhney ArnavWadhwa ShivamAgarwal - Rajiv RatnShah + Rajiv RatnShah 4018–4030 It is challenging to design profitable and practical trading strategies, as stock price movements are highly stochastic, and the market is heavily influenced by chaotic data across sources like news and social media. Existing NLP approaches largely treat stock prediction as a classification or regression problem and are not optimized to make profitable investment decisions. Further, they do not model the temporal dynamics of large volumes of diversely influential text to which the market responds quickly. Building on these shortcomings, we propose a deep reinforcement learning approach that makes time-aware decisions to trade stocks while optimizing profit using textual data. Our method outperforms state-of-the-art in terms of risk-adjusted returns in trading simulations on two benchmarks: Tweets (English) and financial news (Chinese) pertaining to two major indexes and four global stock markets. Through extensive experiments and studies, we build the case for our method as a tool for quantitative trading. 2021.naacl-main.316 @@ -4294,7 +4294,7 @@ MohammadKachuee HaoYuan Young-BumKim - SungjinLee + SungjinLee 4053–4064 Turn-level user satisfaction is one of the most important performance metrics for conversational agents. It can be used to monitor the agent’s performance and provide insights about defective user experiences. While end-to-end deep learning has shown promising results, having access to a large number of reliable annotated samples required by these methods remains challenging. In a large-scale conversational system, there is a growing number of newly developed skills, making the traditional data collection, annotation, and modeling process impractical due to the required annotation costs and the turnaround times. In this paper, we suggest a self-supervised contrastive learning approach that leverages the pool of unlabeled data to learn user-agent interactions. We show that the pre-trained models using the self-supervised objective are transferable to the user satisfaction prediction. In addition, we propose a novel few-shot transfer learning approach that ensures better transferability for very small sample sizes. The suggested few-shot method does not require any inner loop optimization process and is scalable to very large datasets and complex models. Based on our experiments using real data from a large-scale commercial system, the suggested approach is able to significantly reduce the required number of annotations, while improving the generalization on unseen skills. 2021.naacl-main.319 @@ -4317,7 +4317,7 @@ Grey-box Adversarial Attack And Defence For Sentiment Classification YingXu XuZhong - AntonioJimeno Yepes + AntonioJimeno Yepes Jey HanLau 4078–4087 We introduce a grey-box adversarial attack and defence framework for sentiment classification. We address the issues of differentiability, label preservation and input reconstruction for adversarial attack and defence in one unified framework. Our results show that once trained, the attacking model is capable of generating high-quality adversarial examples substantially faster (one order of magnitude less in time) than state-of-the-art attacking methods. These examples also preserve the original sentiment according to human evaluation. Additionally, our framework produces an improved classifier that is robust in defending against multiple adversarial attacking methods. Code is available at: https://github.com/ibm-aur-nlp/adv-def-text-dist. @@ -4331,7 +4331,7 @@ How low is too low? A monolingual take on lemmatisation in <fixed-case>I</fixed-case>ndian languages KumarSaunack KumarSaurav - PushpakBhattacharyya + PushpakBhattacharyya 4088–4094 Lemmatization aims to reduce the sparse data problem by relating the inflected forms of a word to its dictionary form. Most prior work on ML based lemmatization has focused on high resource languages, where data sets (word forms) are readily available. For languages which have no linguistic work available, especially on morphology or in languages where the computational realization of linguistic rules is complex and cumbersome, machine learning based lemmatizers are the way togo. In this paper, we devote our attention to lemmatisation for low resource, morphologically rich scheduled Indian languages using neural methods. Here, low resource means only a small number of word forms are available. We perform tests to analyse the variance in monolingual models’ performance on varying the corpus size and contextual morphological tag data for training. We show that monolingual approaches with data augmentation can give competitive accuracy even in the low resource setting, which augurs well for NLP in low resource setting. 2021.naacl-main.322 @@ -4343,7 +4343,7 @@ Causal Effects of Linguistic Properties ReidPryzant DallasCard - DanJurafsky + DanJurafsky VictorVeitch DhanyaSridhar 4095–4109 @@ -4368,7 +4368,7 @@ ZhiyiMa TristanThrush SebastianRiedel - ZeerakWaseem + ZeerakWaseem PontusStenetorp RobinJia MohitBansal @@ -4384,8 +4384,8 @@ Translational <fixed-case>NLP</fixed-case>: A New Paradigm and General Principles for Natural Language Processing Research DenisNewman-Griffis - Jill FainLehman - CarolynRosé + Jill FainLehman + CarolynRosé HarryHochheiser 4125–4138 Natural language processing (NLP) research combines the study of universal principles, through basic science, with applied science targeting specific use cases and settings. However, the process of exchange between basic NLP and applications is often assumed to emerge naturally, resulting in many innovations going unapplied and many important questions left unstudied. We describe a new paradigm of Translational NLP, which aims to structure and facilitate the processes by which basic and applied NLP research inform one another. Translational NLP thus presents a third research paradigm, focused on understanding the challenges posed by application needs and how these challenges can drive innovation in basic science and technology design. We show that many significant advances in NLP research have emerged from the intersection of basic principles with application needs, and present a conceptual framework outlining the stakeholders and key questions in translational research. Our framework provides a roadmap for developing Translational NLP as a dedicated research area, and identifies general translational principles to facilitate exchange between basic and applied research. @@ -4408,7 +4408,7 @@ Probing for Bridging Inference in Transformer Language Models - OnkarPandit + OnkarPandit YufangHou 4153–4163 We probe pre-trained transformer language models for bridging inference. We first investigate individual attention heads in BERT and observe that attention heads at higher layers prominently focus on bridging relations in-comparison with the lower and middle layers, also, few specific attention heads concentrate consistently on bridging. More importantly, we consider language models as a whole in our second approach where bridging anaphora resolution is formulated as a masked token prediction task (Of-Cloze test). Our formulation produces optimistic results without any fine-tuning, which indicates that pre-trained language models substantially capture bridging inference. Our further investigation shows that the distance between anaphor-antecedent and the context provided to language models play an important role in the inference. @@ -4432,9 +4432,9 @@ Stay Together: A System for Single and Split-antecedent Anaphora Resolution JuntaoYu - Nafise SadatMoosavi + Nafise SadatMoosavi SilviuPaun - MassimoPoesio + MassimoPoesio 4174–4184 The state-of-the-art on basic, single-antecedent anaphora has greatly improved in recent years. Researchers have therefore started to pay more attention to more complex cases of anaphora such as split-antecedent anaphora, as in “Time-Warner is considering a legal challenge to Telecommunications Inc’s plan to buy half of Showtime Networks Inc–a move that could lead to all-out war between the two powerful companies”. Split-antecedent anaphora is rarer and more complex to resolve than single-antecedent anaphora; as a result, it is not annotated in many datasets designed to test coreference, and previous work on resolving this type of anaphora was carried out in unrealistic conditions that assume gold mentions and/or gold split-antecedent anaphors are available. These systems also focus on split-antecedent anaphors only. In this work, we introduce a system that resolves both single and split-antecedent anaphors, and evaluate it in a more realistic setting that uses predicted mentions. We also start addressing the question of how to evaluate single and split-antecedent anaphors together using standard coreference evaluation metrics. 2021.naacl-main.329 @@ -4468,7 +4468,7 @@ Multi-source Neural Topic Modeling in Multi-view Embedding Spaces PankajGupta YatinChaudhary - HinrichSchütze + HinrichSchütze 4205–4217 Though word embeddings and topics are complementary representations, several past works have only used pretrained word embeddings in (neural) topic modeling to address data sparsity in short-text or small collection of documents. This work presents a novel neural topic modeling framework using multi-view embed ding spaces: (1) pretrained topic-embeddings, and (2) pretrained word-embeddings (context-insensitive from Glove and context-sensitive from BERT models) jointly from one or many sources to improve topic quality and better deal with polysemy. In doing so, we first build respective pools of pretrained topic (i.e., TopicPool) and word embeddings (i.e., WordPool). We then identify one or more relevant source domain(s) and transfer knowledge to guide meaningful learning in the sparse target domain. Within neural topic modeling, we quantify the quality of topics and document representations via generalization (perplexity), interpretability (topic coherence) and information retrieval (IR) using short-text, long-text, small and large document collections from news and medical domains. Introducing the multi-source multi-view embedding spaces, we have shown state-of-the-art neural topic modeling using 6 source (high-resource) and 5 target (low-resource) corpora. 2021.naacl-main.332 @@ -4482,7 +4482,7 @@ JiminHuang PanDu MinPeng - Jian-YunNie + Jian-YunNie 4218–4227 Graph convolutional networks (GCNs) have been applied recently to text classification and produced an excellent performance. However, existing GCN-based methods do not assume an explicit latent semantic structure of documents, making learned representations less effective and difficult to interpret. They are also transductive in nature, thus cannot handle out-of-graph documents. To address these issues, we propose a novel model named inductive Topic Variational Graph Auto-Encoder (T-VGAE), which incorporates a topic model into variational graph-auto-encoder (VGAE) to capture the hidden semantic information between documents and words. T-VGAE inherits the interpretability of the topic model and the efficient information propagation mechanism of VGAE. It learns probabilistic representations of words and documents by jointly encoding and reconstructing the global word-level graph and bipartite graphs of documents, where each document is considered individually and decoupled from the global correlation graph so as to enable inductive learning. Our experiments on several benchmark datasets show that our method outperforms the existing competitive models on supervised and semi-supervised text classification, as well as unsupervised text representation learning. In addition, it has higher interpretability and is able to deal with unseen documents. 2021.naacl-main.333 @@ -4549,7 +4549,7 @@ ShrimaiPrabhumoye KazumaHashimoto YingboZhou - Alan WBlack + Alan WBlack RuslanSalakhutdinov 4274–4287 Document grounded generation is the task of using the information provided in a document to improve text generation. This work focuses on two different document grounded generation tasks: Wikipedia Update Generation task and Dialogue response generation. Our work introduces two novel adaptations of large scale pre-trained encoder-decoder models focusing on building context driven representation of the document and enabling specific attention to the information in the document. Additionally, we provide a stronger BART baseline for these tasks. Our proposed techniques outperform existing methods on both automated (at least 48% increase in BLEU-4 points) and human evaluation for closeness to reference and relevance to the document. Furthermore, we perform comprehensive manual inspection of the generated output and categorize errors to provide insights into future directions in modeling these tasks. @@ -4591,7 +4591,7 @@ BowenTan ZichaoYang MaruanAl-Shedivat - EricXing + EricXing ZhitingHu 4313–4324 Large-scale language models (LMs) pretrained on massive corpora of text, such as GPT-2, are powerful open-domain text generators. However, as our systematic examination reveals, it is still challenging for such models to generate coherent long passages of text (e.g., 1000 tokens), especially when the models are fine-tuned to the target domain on a small corpus. Previous planning-then-generation methods also fall short of producing such long text in various domains. To overcome the limitations, we propose a simple but effective method of generating text in a progressive manner, inspired by generating images from low to high resolution. Our method first produces domain-specific content keywords and then progressively refines them into complete passages in multiple stages. The simple design allows our approach to take advantage of pretrained LMs at each stage and effectively adapt to any target domain given only a small set of examples. We conduct a comprehensive empirical study with a broad set of evaluation metrics, and show that our approach significantly improves upon the fine-tuned large LMs and various planning-then-generation methods in terms of quality and sample efficiency. Human evaluation also validates that our model generations are more coherent. @@ -4616,7 +4616,7 @@ SarikGhazarian ZixiLiu AkashS M - RalphWeischedel + RalphWeischedel AramGalstyan NanyunPeng 4334–4344 @@ -4728,7 +4728,7 @@ SianGooding EkaterinaKochmar Seid MuhieYimam - ChrisBiemann + ChrisBiemann 4439–4449 Lexical complexity is a highly subjective notion, yet this factor is often neglected in lexical simplification and readability systems which use a ”one-size-fits-all” approach. In this paper, we investigate which aspects contribute to the notion of lexical complexity in various groups of readers, focusing on native and non-native speakers of English, and how the notion of complexity changes depending on the proficiency level of a non-native reader. To facilitate reproducibility of our approach and foster further research into these aspects, we release a dataset of complex words annotated by readers with different backgrounds. 2021.naacl-main.351 @@ -4764,7 +4764,7 @@ On Biasing Transformer Attention Towards Monotonicity - AnnetteRios + AnnetteRios ChantalAmrhein NoëmiAepli RicoSennrich @@ -4783,7 +4783,7 @@ Madeleinevan Zuylen SravanthiParasa EricHorvitz - DanielWeld + DanielWeld RoySchwartz HannanehHajishirzi 4489–4503 @@ -4810,7 +4810,7 @@ MatthewMatero Aravind ReddyRavula HuyVu - H. AndrewSchwartz + H. AndrewSchwartz 4515–4532 In human-level NLP tasks, such as predicting mental health, personality, or demographics, the number of observations is often smaller than the standard 768+ hidden state sizes of each layer within modern transformer-based language models, limiting the ability to effectively leverage transformers. Here, we provide a systematic study on the role of dimension reduction methods (principal components analysis, factorization techniques, or multi-layer auto-encoders) as well as the dimensionality of embedding vectors and sample sizes as a function of predictive performance. We first find that fine-tuning large models with a limited amount of data pose a significant difficulty which can be overcome with a pre-trained dimension reduction regime. RoBERTa consistently achieves top performance in human-level tasks, with PCA giving benefit over other reduction methods in better handling users that write longer texts. Finally, we observe that a majority of the tasks achieve results comparable to the best performance with just 1/12 of the embedding dimensions. 2021.naacl-main.357 @@ -4824,7 +4824,7 @@ EvanSholle AshleyBeecy SubhiAl’Aref - YifanPeng + YifanPeng 4533–4538 Utilizing clinical texts in survival analysis is difficult because they are largely unstructured. Current automatic extraction models fail to capture textual information comprehensively since their labels are limited in scope. Furthermore, they typically require a large amount of data and high-quality expert annotations for training. In this work, we present a novel method of using BERT-based hidden layer representations of clinical texts as covariates for proportional hazards models to predict patient survival outcomes. We show that hidden layers yield notably more accurate predictions than predefined features, outperforming the previous baseline model by 5.7% on average across C-index and time-dependent AUC. We make our work publicly available at https://github.com/bionlplab/heart_failure_mortality. 2021.naacl-main.358 @@ -4916,7 +4916,7 @@ KyleLo IzBeltagy ArmanCohan - Noah A.Smith + Noah A.Smith MattGardner 4599–4610 Readers of academic research papers often read with the goal of answering specific questions. Question Answering systems that can answer those questions can make consumption of the content much more efficient. However, building such tools requires data that reflect the difficulty of the task arising from complex reasoning about claims made in multiple parts of a paper. In contrast, existing information-seeking question answering datasets usually contain questions about generic factoid-type information. We therefore present Qasper, a dataset of 5049 questions over 1585 Natural Language Processing papers. Each question is written by an NLP practitioner who read only the title and abstract of the corresponding paper, and the question seeks information present in the full text. The questions are then answered by a separate set of NLP practitioners who also provide supporting evidence to answers. We find that existing models that do well on other QA tasks do not perform well on answering these questions, underperforming humans by at least 27 F1 points when answering them from entire papers, motivating further research in document-grounded, information-seeking QA, which our dataset is designed to facilitate. @@ -4927,12 +4927,12 @@ Differentiable Open-Ended Commonsense Reasoning - Bill YuchenLin + Bill YuchenLin HaitianSun BhuwanDhingra ManzilZaheer XiangRen - WilliamCohen + WilliamCohen 4611–4625 Current commonsense reasoning research focuses on developing models that use commonsense knowledge to answer multiple-choice questions. However, systems designed to answer multiple-choice questions may not be useful in applications that do not provide a small list of candidate answers to choose from. As a step towards making commonsense reasoning research more realistic, we propose to study open-ended commonsense reasoning (OpenCSR) — the task of answering a commonsense question without any pre-defined choices — using as a resource only a corpus of commonsense facts written in natural language. OpenCSR is challenging due to a large decision space, and because many questions require implicit multi-hop reasoning. As an approach to OpenCSR, we propose DrFact, an efficient Differentiable model for multi-hop Reasoning over knowledge Facts. To evaluate OpenCSR methods, we adapt several popular commonsense reasoning benchmarks, and collect multiple new answers for each test question via crowd-sourcing. Experiments show that DrFact outperforms strong baseline methods by a large margin. 2021.naacl-main.366 @@ -4946,7 +4946,7 @@ SongFeng ChulakaGunasekara Siva SankalpPatel - SachindraJoshi + SachindraJoshi LuisLastras 4626–4634 Machine reading comprehension is a challenging task especially for querying documents with deep and interconnected contexts. Transformer-based methods have shown advanced performances on this task; however, most of them still treat documents as a flat sequence of tokens. This work proposes a new Transformer-based method that reads a document as tree slices. It contains two modules for identifying more relevant text passage and the best answer span respectively, which are not only jointly trained but also jointly consulted at inference time. Our evaluation results show that our proposed method outperforms several competitive baseline approaches on two datasets from varied domains. @@ -4960,7 +4960,7 @@ ChenZhao ChenyanXiong JordanBoyd-Graber - HalDaumé III + HalDaumé III 4635–4641 Complex question answering often requires finding a reasoning chain that consists of multiple evidence pieces. Current approaches incorporate the strengths of structured knowledge and unstructured text, assuming text corpora is semi-structured. Building on dense retrieval methods, we propose a new multi-step retrieval approach (BeamDR) that iteratively forms an evidence chain through beam search in dense representations. When evaluated on multi-hop question answering, BeamDR is competitive to state-of-the-art systems, without using any semi-structured information. Through query composition in dense space, BeamDR captures the implicit relationships between evidence in the reasoning chain. The code is available at https://github.com/henryzhao5852/BeamDR. 2021.naacl-main.368 @@ -4982,7 +4982,7 @@ Scalar Adjective Identification and Multilingual Ranking - AinaGarí Soler + AinaGarí Soler MariannaApidianaki 4653–4660 The intensity relationship that holds between scalar adjectives (e.g., nice < great < wonderful) is highly relevant for natural language inference and common-sense reasoning. Previous research on scalar adjective ranking has focused on English, mainly due to the availability of datasets for evaluation. We introduce a new multilingual dataset in order to promote research on scalar adjectives in new languages. We perform a series of experiments and set performance baselines on this dataset, using monolingual and multilingual contextual language models. Additionally, we introduce a new binary classification task for English scalar adjective identification which examines the models’ ability to distinguish scalar from relational adjectives. We probe contextualised representations and report baseline results for future comparison on this task. @@ -5044,7 +5044,7 @@ XuebinQin NawshadFarruque LiliMou - OsmarZaïane + OsmarZaïane 4717–4724 Multi-label emotion classification is an important task in NLP and is essential to many applications. In this work, we propose a sequence-to-emotion (Seq2Emo) approach, which implicitly models emotion correlations in a bi-directional decoder. Experiments on SemEval’18 and GoEmotions datasets show that our approach outperforms state-of-the-art methods (without using external data). In particular, Seq2Emo outperforms the binary relevance (BR) and classifier chain (CC) approaches in a fair setting. 2021.naacl-main.375 @@ -5091,7 +5091,7 @@ Adversarial Learning for Zero-Shot Stance Detection on Social Media EmilyAllaway MalavikaSrikanth - KathleenMcKeown + KathleenMcKeown 4756–4767 Stance detection on social media can help to identify and understand slanted news or commentary in everyday life. In this work, we propose a new model for zero-shot stance detection on Twitter that uses adversarial learning to generalize across topics. Our model achieves state-of-the-art performance on a number of unseen test topics with minimal computational costs. In addition, we extend zero-shot stance detection to topics not previously considered, highlighting future directions for zero-shot transfer. 2021.naacl-main.379 @@ -5138,7 +5138,7 @@ EmilyAlsentzer MertKetenci JasonZucker - NoémieElhadad + NoémieElhadad 4794–4811 Summarization of clinical narratives is a long-standing research problem. Here, we introduce the task of hospital-course summarization. Given the documentation authored throughout a patient’s hospitalization, generate a paragraph that tells the story of the patient admission. We construct an English, text-to-text dataset of 109,000 hospitalizations (2M source notes) and their corresponding summary proxy: the clinician-authored “Brief Hospital Course” paragraph written as part of a discharge note. Exploratory analyses reveal that the BHC paragraphs are highly abstractive with some long extracted fragments; are concise yet comprehensive; differ in style and content organization from the source notes; exhibit minimal lexical cohesion; and represent silver-standard references. Our analysis identifies multiple implications for modeling this complex, multi-document summarization task. 2021.naacl-main.382 @@ -5174,7 +5174,7 @@ What Will it Take to Fix Benchmarking in Natural Language Understanding? - Samuel R.Bowman + Samuel R.Bowman GeorgeDahl 4843–4855 Evaluation for many natural language understanding (NLU) tasks is broken: Unreliable and biased systems score so highly on standard benchmarks that there is little room for researchers who develop better systems to demonstrate their improvements. The recent trend to abandon IID benchmarks in favor of adversarially-constructed, out-of-distribution test sets ensures that current models will perform poorly, but ultimately only obscures the abilities that we want our benchmarks to measure. In this position paper, we lay out four criteria that we argue NLU benchmarks should meet. We argue most current benchmarks fail at these criteria, and that adversarial data collection does not meaningfully address the causes of these failures. Instead, restoring a healthy evaluation ecosystem will require significant progress in the design of benchmark datasets, the reliability with which they are annotated, their size, and the ways they handle social bias. @@ -5203,8 +5203,8 @@ RamitSawhney PuneetMathur TaruJain - Akash KumarGautam - Rajiv RatnShah + Akash KumarGautam + Rajiv RatnShah 4881–4892 The #MeToo movement on social media platforms initiated discussions over several facets of sexual harassment in our society. Prior work by the NLP community for automated identification of the narratives related to sexual abuse disclosures barely explored this social phenomenon as an independent task. However, emotional attributes associated with textual conversations related to the #MeToo social movement are complexly intertwined with such narratives. We formulate the task of identifying narratives related to the sexual abuse disclosures in online posts as a joint modeling task that leverages their emotional attributes through multitask learning. Our results demonstrate that positive knowledge transfer via context-specific shared representations of a flexible cross-stitched parameter sharing model helps establish the inherent benefit of jointly modeling tasks related to sexual abuse disclosures with emotion classification from the text in homogeneous and heterogeneous settings. We show how for more domain-specific tasks related to sexual abuse disclosures such as sarcasm identification and dialogue act (refutation, justification, allegation) classification, homogeneous multitask learning is helpful, whereas for more general tasks such as stance and hate speech detection, heterogeneous multitask learning with emotion classification works better. 2021.naacl-main.387 @@ -5241,7 +5241,7 @@ Identifying inherent disagreement in natural language inference Xinliang FrederickZhang - Marie-Catherinede Marneffe + Marie-Catherinede Marneffe 4908–4915 Natural language inference (NLI) is the task of determining whether a piece of text is entailed, contradicted by or unrelated to another piece of text. In this paper, we investigate how to tease systematic inferences (i.e., items for which people agree on the NLI label) apart from disagreement items (i.e., items which lead to different annotations), which most prior work has overlooked. To distinguish systematic inferences from disagreement items, we propose Artificial Annotators (AAs) to simulate the uncertainty in the annotation process by capturing the modes in annotations. Results on the CommitmentBank, a corpus of naturally occurring discourses in English, confirm that our approach performs statistically significantly better than all baselines. We further show that AAs learn linguistic patterns and context-dependent reasoning. 2021.naacl-main.390 @@ -5252,7 +5252,7 @@ Modeling Human Mental States with an Entity-based Narrative Graph I-TaLee - Maria LeonorPacheco + Maria LeonorPacheco DanGoldwasser 4916–4926 Understanding narrative text requires capturing characters’ motivations, goals, and mental states. This paper proposes an Entity-based Narrative Graph (ENG) to model the internal- states of characters in a story. We explicitly model entities, their interactions and the context in which they appear, and learn rich representations for them. We experiment with different task-adaptive pre-training objectives, in-domain training, and symbolic inference to capture dependencies between different decisions in the output space. We evaluate our model on two narrative understanding tasks: predicting character mental states, and desire fulfillment, and conduct a qualitative analysis. @@ -5264,7 +5264,7 @@ A Simple and Efficient Multi-Task Learning Approach for Conditioned Dialogue Generation YanZeng - Jian-YunNie + Jian-YunNie 4927–4939 Conditioned dialogue generation suffers from the scarcity of labeled responses. In this work, we exploit labeled non-dialogue text data related to the condition, which are much easier to collect. We propose a multi-task learning approach to leverage both labeled dialogue and text data. The 3 tasks jointly optimize the same pre-trained Transformer – conditioned dialogue generation task on the labeled dialogue data, conditioned language encoding task and conditioned language generation task on the labeled text data. Experimental results show that our approach outperforms the state-of-the-art models by leveraging the labeled texts, and it also obtains larger improvement in performance comparing to the previous methods to leverage text data. 2021.naacl-main.392 @@ -5300,8 +5300,8 @@ Paragraph-level Simplification of Medical Texts AshwinDevaraj - IainMarshall - ByronWallace + IainMarshall + ByronWallace Junyi JessyLi 4972–4984 We consider the problem of learning to simplify medical texts. This is important because most reliable, up-to-date information in biomedicine is dense with jargon and thus practically inaccessible to the lay audience. Furthermore, manual simplification does not scale to the rapidly growing body of biomedical literature, motivating the need for automated approaches. Unfortunately, there are no large-scale resources available for this task. In this work we introduce a new corpus of parallel texts in English comprising technical and lay summaries of all published evidence pertaining to different clinical topics. We then propose a new metric based on likelihood scores from a masked language model pretrained on scientific texts. We show that this automated measure better differentiates between technical and lay summaries than existing heuristics. We introduce and evaluate baseline encoder-decoder Transformer models for simplification and propose a novel augmentation to these in which we explicitly penalize the decoder for producing “jargon” terms; we find that this yields improvements over baselines in terms of readability. @@ -5369,7 +5369,7 @@ LiqunChen ChrisBrockett Ming-TingSun - BillDolan + BillDolan 5053–5069 Adversarial examples expose the vulnerabilities of natural language processing (NLP) models, and can be used to evaluate and improve their robustness. Existing techniques of generating such examples are typically driven by local heuristic rules that are agnostic to the context, often resulting in unnatural and ungrammatical outputs. This paper presents CLARE, a ContextuaLized AdversaRial Example generation model that produces fluent and grammatical outputs through a mask-then-infill procedure. CLARE builds on a pre-trained masked language model and modifies the inputs in a context-aware manner. We propose three contextualized perturbations, Replace, Insert and Merge, that allow for generating outputs of varied lengths. CLARE can flexibly combine these perturbations and apply them at any position in the inputs, and is thus able to attack the victim model more effectively with fewer edits. Extensive experiments and human evaluation demonstrate that CLARE outperforms the baselines in terms of attack success rate, textual similarity, fluency and grammaticality. 2021.naacl-main.400 @@ -5392,7 +5392,7 @@ Evaluating the Values of Sources in Transfer Learning - Md RizwanParvez + Md RizwanParvez Kai-WeiChang 5084–5116 Transfer learning that adapts a model trained on data-rich sources to low-resource targets has been widely applied in natural language processing (NLP). However, when training a transfer model over multiple sources, not every source is equally useful for the target. To better transfer a model, it is essential to understand the values of the sources. In this paper, we develop , an efficient source valuation framework for quantifying the usefulness of the sources (e.g., ) in transfer learning based on the Shapley value method. Experiments and comprehensive analyses on both cross-domain and cross-lingual transfers demonstrate that our framework is not only effective in choosing useful transfer sources but also the source values match the intuitive source-target similarity. @@ -5416,7 +5416,7 @@ On the Inductive Bias of Masked Language Modeling: From Statistical to Syntactic Dependencies TianyiZhang - Tatsunori B.Hashimoto + Tatsunori B.Hashimoto 5131–5146 We study how masking and predicting tokens in an unsupervised fashion can give rise to linguistic structures and downstream performance gains. Recent theories have suggested that pretrained language models acquire useful inductive biases through masks that implicitly act as cloze reductions for downstream tasks. While appealing, we show that the success of the random masking strategy used in practice cannot be explained by such cloze-like masks alone. We construct cloze-like masks using task-specific lexicons for three different classification datasets and show that the majority of pretrained performance gains come from generic masks that are not associated with the lexicon. To explain the empirical success of these generic masks, we demonstrate a correspondence between the Masked Language Model (MLM) objective and existing methods for learning statistical dependencies in graphical models. Using this, we derive a method for extracting these learned statistical dependencies in MLMs and show that these dependencies encode useful inductive biases in the form of syntactic structures. In an unsupervised parsing evaluation, simply forming a minimum spanning tree on the implied statistical dependence structure outperforms a classic method for unsupervised parsing (58.74 vs. 55.91 UUAS). 2021.naacl-main.404 @@ -5429,8 +5429,8 @@ Chu-ChengLin AaronJaech XinLi - Matthew R.Gormley - JasonEisner + Matthew R.Gormley + JasonEisner 5147–5173 Standard autoregressive language models perform only polynomial-time computation to compute the probability of the next symbol. While this is attractive, it means they cannot model distributions whose next-symbol probability is hard to compute. Indeed, they cannot even model them well enough to solve associated easy decision problems for which an engineer might want to consult a language model. These limitations apply no matter how much computation and data are used to train the model, unless the model is given access to oracle parameters that grow superpolynomially in sequence length. Thus, simply training larger autoregressive language models is not a panacea for NLP. Alternatives include energy-based models (which give up efficient sampling) and latent-variable autoregressive models (which give up efficient scoring of a given string). Both are powerful enough to escape the above limitations. 2021.naacl-main.405 @@ -5497,7 +5497,7 @@ Learning How to Ask: Querying <fixed-case>LM</fixed-case>s with Mixtures of Soft Prompts GuanghuiQin - JasonEisner + JasonEisner 5203–5212 Natural-language prompts have recently been used to coax pretrained language models into performing other AI tasks, using a fill-in-the-blank paradigm (Petroni et al., 2019) or a few-shot extrapolation paradigm (Brown et al., 2020). For example, language models retain factual knowledge from their training corpora that can be extracted by asking them to “fill in the blank” in a sentential prompt. However, where does this prompt come from? We explore the idea of learning prompts by gradient descent—either fine-tuning prompts taken from previous work, or starting from random initialization. Our prompts consist of “soft words,” i.e., continuous vectors that are not necessarily word type embeddings from the language model. Furthermore, for each task, we optimize a mixture of prompts, learning which prompts are most effective and how to ensemble them. Across multiple English LMs and tasks, our approach hugely outperforms previous methods, showing that the implicit factual knowledge in language models was previously underestimated. Moreover, this knowledge is cheap to elicit: random initialization is nearly as good as informed initialization. 2021.naacl-main.410 @@ -5525,7 +5525,7 @@ <fixed-case>AVA</fixed-case>: an Automatic e<fixed-case>V</fixed-case>aluation Approach for Question Answering Systems - ThuyVu + ThuyVu AlessandroMoschitti 5223–5233 We introduce AVA, an automatic evaluation approach for Question Answering, which given a set of questions associated with Gold Standard answers (references), can estimate system Accuracy. AVA uses Transformer-based language models to encode question, answer, and reference texts. This allows for effectively assessing answer correctness using similarity between the reference and an automatic answer, biased towards the question semantics. To design, train, and test AVA, we built multiple large training, development, and test sets on public and industrial benchmarks. Our innovative solutions achieve up to 74.7% F1 score in predicting human judgment for single answers. Additionally, AVA can be used to evaluate the overall system Accuracy with an error lower than 7% at 95% of confidence when measured on several QA systems. @@ -5558,7 +5558,7 @@ ChrisBrockett ChrisQuirk JianfengGao - BillDolan + BillDolan 5259–5274 A prevailing paradigm in neural text generation is one-shot generation, where text is produced in a single step. The one-shot setting is inadequate, however, when the constraints the user wishes to impose on the generated text are dynamic, especially when authoring longer documents. We address this limitation with an interactive text generation setting in which the user interacts with the system by issuing commands to edit existing text. To this end, we propose a novel text editing task, and introduce WikiDocEdits, a dataset of single-sentence edits crawled from Wikipedia. We show that our Interactive Editor, a transformer-based model trained on this dataset, outperforms baselines and obtains positive results in both automatic and human evaluations. We present empirical and qualitative analyses of this model’s performance. 2021.naacl-main.414 @@ -5586,7 +5586,7 @@ YuhaoZhang EmilyTsai CurtisLanglotz - DanJurafsky + DanJurafsky 5288–5304 Neural image-to-text radiology report generation systems offer the potential to improve radiology reporting by reducing the repetitive process of report drafting and identifying possible medical errors. However, existing report generation systems, despite achieving high performances on natural language generation metrics such as CIDEr or BLEU, still suffer from incomplete and inconsistent generations. Here we introduce two new simple rewards to encourage the generation of factually complete and consistent radiology reports: one that encourages the system to generate radiology domain entities consistent with the reference, and one that uses natural language inference to encourage these entities to be described in inferentially consistent ways. We combine these with the novel use of an existing semantic equivalence metric (BERTScore). We further propose a report generation system that optimizes these rewards via reinforcement learning. On two open radiology report datasets, our system substantially improved the F1 score of a clinical information extraction performance by +22.1 (Delta +63.9%). We further show via a human evaluation and a qualitative analysis that our system leads to generations that are more factually complete and consistent compared to the baselines. 2021.naacl-main.416 @@ -5645,7 +5645,7 @@ HaoxuanYou ZhecanWang AlirezaZareian - Shih-FuChang + Shih-FuChang Kai-WeiChang 5339–5350 Pre-trained contextual vision-and-language (V&L) models have achieved impressive performance on various benchmarks. However, existing models require a large amount of parallel image-caption data for pre-training. Such data are costly to collect and require cumbersome curation. Inspired by unsupervised machine translation, we investigate if a strong V&L representation model can be learned through unsupervised pre-training without image-caption corpora. In particular, we propose to conduct “mask-and-predict” pre-training on text-only and image-only corpora and introduce the object tags detected by an object recognition model as anchor points to bridge two modalities. We find that such a simple approach achieves performance close to a model pre-trained with aligned data, on four English V&L benchmarks. Our work challenges the widely held notion that aligned data is necessary for V&L pre-training, while significantly reducing the amount of supervision needed for V&L models. @@ -5722,7 +5722,7 @@ Self-training Improves Pre-training for Natural Language Understanding JingfeiDu - EdouardGrave + EdouardGrave BelizGunel VishravChaudhary OnurCelebi @@ -5742,7 +5742,7 @@ XiaokaiWei Shang-WenLi HenghuiZhu - KathleenMcKeown + KathleenMcKeown RameshNallapati Andrew O.Arnold BingXiang @@ -5774,7 +5774,7 @@ XiaoyuanYi MaosongSun LinerYang - Tat-SengChua + Tat-SengChua 5441–5452 Grammatical Error Correction (GEC) aims to correct writing errors and help language learners improve their writing skills. However, existing GEC models tend to produce spurious corrections or fail to detect lots of errors. The quality estimation model is necessary to ensure learners get accurate GEC results and avoid misleading from poorly corrected sentences. Well-trained GEC models can generate several high-quality hypotheses through decoding, such as beam search, which provide valuable GEC evidence and can be used to evaluate GEC quality. However, existing models neglect the possible GEC evidence from different hypotheses. This paper presents the Neural Verification Network (VERNet) for GEC quality estimation with multiple hypotheses. VERNet establishes interactions among hypotheses with a reasoning graph and conducts two kinds of attention mechanisms to propagate GEC evidence to verify the quality of generated hypotheses. Our experiments on four GEC datasets show that VERNet achieves state-of-the-art grammatical error detection performance, achieves the best quality estimation results, and significantly improves GEC performance by reranking hypotheses. All data and source codes are available at https://github.com/thunlp/VERNet. 2021.naacl-main.429 @@ -5802,7 +5802,7 @@ ZhongyuWei JingLi QiZhang - XuanjingHuang + XuanjingHuang 5467–5478 In this paper, we focus on identifying interactive argument pairs from two posts with opposite stances to a certain topic. Considering opinions are exchanged from different perspectives of the discussing topic, we study the discrete representations for arguments to capture varying aspects in argumentation languages (e.g., the debate focus and the participant behavior). Moreover, we utilize hierarchical structure to model post-wise information incorporating contextual knowledge. Experimental results on the large-scale dataset collected from CMV show that our proposed framework can significantly outperform the competitive baselines. Further analyses reveal why our model yields superior performance and prove the usefulness of our learned representations. 2021.naacl-main.431 @@ -5813,11 +5813,11 @@ On Unifying Misinformation Detection NayeonLee - Belinda Z.Li + Belinda Z.Li SinongWang PascaleFung HaoMa - Wen-tauYih + Wen-tauYih MadianKhabsa 5479–5485 In this paper, we introduce UnifiedM2, a general-purpose misinformation model that jointly models multiple domains of misinformation with a single, unified setup. The model is trained to handle four tasks: detecting news bias, clickbait, fake news, and verifying rumors. By grouping these tasks together, UnifiedM2 learns a richer representation of misinformation, which leads to state-of-the-art or comparable performance across all tasks. Furthermore, we demonstrate that UnifiedM2’s learned representation is helpful for few-shot learning of unseen misinformation tasks/datasets and the model’s generalizability to unseen events. @@ -5854,8 +5854,8 @@ Do <fixed-case>RNN</fixed-case> States Encode Abstract Phonological Alternations? - MiikkaSilfverberg - FrancisTyers + MiikkaSilfverberg + FrancisTyers GarrettNicolai MansHulden 5501–5513 @@ -5887,8 +5887,8 @@ LeiLi TianyuLiu ZhifangSui - BaobaoChang - YangLiu + BaobaoChang + YangLiu 5524–5531 In this paper, we tackle the task of Definition Generation (DG) in Chinese, which aims at automatically generating a definition for a word. Most existing methods take the source word as an indecomposable semantic unit. However, in parataxis languages like Chinese, word meanings can be composed using the word formation process, where a word (“桃花”, peach-blossom) is formed by formation components (“桃”, peach; “花”, flower) using a formation rule (Modifier-Head). Inspired by this process, we propose to enhance DG with word formation features. We build a formation-informed dataset, and propose a model DeFT, which Decomposes words into formation features, dynamically Fuses different features through a gating mechanism, and generaTes word definitions. Experimental results show that our method is both effective and robust. 2021.naacl-main.437 @@ -5901,7 +5901,7 @@ ShoheiHigashiyama MasaoUtiyama TaroWatanabe - EiichiroSumita + EiichiroSumita 5532–5541 Morphological analysis (MA) and lexical normalization (LN) are both important tasks for Japanese user-generated text (UGT). To evaluate and compare different MA/LN systems, we have constructed a publicly available Japanese UGT corpus. Our corpus comprises 929 sentences annotated with morphological and normalization information, along with category information we classified for frequent UGT-specific phenomena. Experiments on the corpus demonstrated the low performance of existing MA/LN methods for non-general words and non-standard forms, indicating that the corpus would be a challenging benchmark for further research on UGT. 2021.naacl-main.438 @@ -5929,7 +5929,7 @@ YiyangHou YajieYe LiLiang - WeiweiSun + WeiweiSun 5554–5566 Universal Semantic Tagging aims to provide lightweight unified analysis for all languages at the word level. Though the proposed annotation scheme is conceptually promising, the feasibility is only examined in four Indo–European languages. This paper is concerned with extending the annotation scheme to handle Mandarin Chinese and empirically study the plausibility of unifying meaning representations for multiple languages. We discuss a set of language-specific semantic phenomena, propose new annotation specifications and build a richly annotated corpus. The corpus consists of 1100 English–Chinese parallel sentences, where compositional semantic analysis is available for English, and another 1000 Chinese sentences which has enriched syntactic analysis. By means of the new annotations, we also evaluate a series of neural tagging models to gauge how successful semantic tagging can be: accuracies of 92.7% and 94.6% are obtained for Chinese and English respectively. The English tagging performance is remarkably better than the state-of-the-art by 7.7%. 2021.naacl-main.440 @@ -5969,8 +5969,8 @@ <fixed-case>AMR</fixed-case> Parsing with Action-Pointer Transformer JiaweiZhou TahiraNaseem - RamónFernandez Astudillo - RaduFlorian + RamónFernandez Astudillo + RaduFlorian 5585–5598 Abstract Meaning Representation parsing is a sentence-to-graph prediction task where target nodes are not explicitly aligned to sentence tokens. However, since graph nodes are semantically based on one or more sentence tokens, implicit alignments can be derived. Transition-based parsers operate over the sentence from left to right, capturing this inductive bias via alignments at the cost of limited expressiveness. In this work, we propose a transition-based system that combines hard-attention over sentences with a target-side action pointer mechanism to decouple source tokens from node representations and address alignments. We model the transitions as well as the pointer mechanism through straightforward modifications within a single Transformer architecture. Parser state and graph structure information are efficiently encoded using attention heads. We show that our action-pointer approach leads to increased expressiveness and attains large gains (+1.6 points) against the best transition-based AMR parser in very similar conditions. While using no graph re-categorization, our single model yields the second best Smatch score on AMR 2.0 (81.8), which is further improved to 83.4 with silver data and ensemble decoding. 2021.naacl-main.443 @@ -5981,11 +5981,11 @@ <fixed-case>NL</fixed-case>-<fixed-case>EDIT</fixed-case>: Correcting Semantic Parse Errors through Natural Language Interaction AhmedElgohary - ChristopherMeek + ChristopherMeek MatthewRichardson AdamFourney GonzaloRamos - Ahmed HassanAwadallah + Ahmed HassanAwadallah 5599–5610 We study semantic parsing in an interactive setting in which users correct errors with natural language feedback. We present NL-EDIT, a model for interpreting natural language feedback in the interaction context to generate a sequence of edits that can be applied to the initial parse to correct its errors. We show that NL-EDIT can boost the accuracy of existing text-to-SQL parsers by up to 20% with only one turn of correction. We analyze the limitations of the model and discuss directions for improvement and evaluation. The code and datasets used in this paper are publicly available at http://aka.ms/NLEdit. 2021.naacl-main.444 @@ -6042,7 +6042,7 @@ ZhaojiangLin BingLiu SeungwhanMoon - PaulCrook + PaulCrook ZhenpengZhou ZhiguangWang ZhouYu @@ -6161,7 +6161,7 @@ TulikaSaha ApoorvaUpadhyaya SriparnaSaha - PushpakBhattacharyya + PushpakBhattacharyya 5727–5737 Speech Act Classification determining the communicative intent of an utterance has been investigated widely over the years as a standalone task. This holds true for discussion in any fora including social media platform such as Twitter. But the emotional state of the tweeter which has a considerable effect on the communication has not received the attention it deserves. Closely related to emotion is sentiment, and understanding of one helps understand the other. In this work, we firstly create a new multi-modal, emotion-TA (‘TA’ means tweet act, i.e., speech act in Twitter) dataset called EmoTA collected from open-source Twitter dataset. We propose a Dyadic Attention Mechanism (DAM) based multi-modal, adversarial multi-tasking framework. DAM incorporates intra-modal and inter-modal attention to fuse multiple modalities and learns generalized features across all the tasks. Experimental results indicate that the proposed framework boosts the performance of the primary task, i.e., TA classification (TAC) by benefitting from the two secondary tasks, i.e., Sentiment and Emotion Analysis compared to its uni-modal and single task TAC (tweet act classification) variants. 2021.naacl-main.456 @@ -6187,8 +6187,8 @@ ShujianHuang TongXiao DongqiWang - XinyuDai - JiajunChen + XinyuDai + JiajunChen 5749–5759 Non-autoregressive Transformer is a promising text generation model. However, current non-autoregressive models still fall behind their autoregressive counterparts in translation quality. We attribute this accuracy gap to the lack of dependency modeling among decoder inputs. In this paper, we propose CNAT, which learns implicitly categorical codes as latent variables into the non-autoregressive decoding. The interaction among these categorical codes remedies the missing dependencies and improves the model capacity. Experiment results show that our model achieves comparable or better performance in machine translation tasks than several strong baselines. 2021.naacl-main.458 @@ -6289,7 +6289,7 @@ JingLiu KaiLiu RuiyangRen - Wayne XinZhao + Wayne XinZhao DaxiangDong HuaWu HaifengWang @@ -6371,11 +6371,11 @@ AhmadZaidi MutethiaMutuma RahulJha - Ahmed HassanAwadallah + Ahmed HassanAwadallah AsliCelikyilmaz YangLiu XipengQiu - DragomirRadev + DragomirRadev 5905–5921 Meetings are a key component of human collaboration. As increasing numbers of meetings are recorded and transcribed, meeting summaries have become essential to remind those who may or may not have attended the meetings about the key decisions made and the tasks to be completed. However, it is hard to create a single short summary that covers all the content of a long meeting involving multiple people and topics. In order to satisfy the needs of different types of users, we define a new query-based multi-domain meeting summarization task, where models have to select and summarize relevant spans of meetings in response to a query, and we introduce QMSum, a new benchmark for this task. QMSum consists of 1,808 query-summary pairs over 232 meetings in multiple domains. Besides, we investigate a locate-then-summarize method and evaluate a set of strong summarization baselines on the task. Experimental results and manual analysis reveal that QMSum presents significant challenges in long meeting summarization for future research. Dataset is available at https://github.com/Yale-LILY/QMSum. 2021.naacl-main.472 @@ -6452,8 +6452,8 @@ Proceedings of the 2021 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies: Demonstrations - AviSil - Xi VictoriaLin + AviSil + Xi VictoriaLin Association for Computational Linguistics
Online
June @@ -6478,13 +6478,13 @@
Machine-Assisted Script Curation - ManuelCiosici + ManuelCiosici JosephCummings MitchellDeHaven AlexHedges YashKankanampati Dong-HoLee - RalphWeischedel + RalphWeischedel MarjorieFreedman 8–17 We describe Machine-Aided Script Curator (MASC), a system for human-machine collaborative script authoring. Scripts produced with MASC include (1) English descriptions of sub-events that comprise a larger, complex event; (2) event types for each of those events; (3) a record of entities expected to participate in multiple sub-events; and (4) temporal sequencing between the sub-events. MASC automates portions of the script creation process with suggestions for event types, links to Wikidata, and sub-events that may have been forgotten. We illustrate how these automations are useful to the script writer with a few case-study scripts. @@ -6587,15 +6587,15 @@ YiFung HengJi JiaweiHan - Shih-FuChang - JamesPustejovsky + Shih-FuChang + JamesPustejovsky JasmineRah DavidLiem AhmedELsayed - MarthaPalmer - ClareVoss + MarthaPalmer + ClareVoss CynthiaSchneider - BoyanOnyshkevych + BoyanOnyshkevych 66–77 To combat COVID-19, both clinicians and scientists need to digest the vast amount of relevant biomedical knowledge in literature to understand the disease mechanism and the related biological functions. We have developed a novel and comprehensive knowledge discovery framework, COVID-KG to extract fine-grained multimedia knowledge elements (entities, relations and events) from scientific literature. We then exploit the constructed multimedia knowledge graphs (KGs) for question answering and report generation, using drug repurposing as a case study. Our framework also provides detailed contextual sentences, subfigures, and knowledge subgraphs as evidence. All of the data, KGs, reports. 2021.naacl-demos.8 @@ -6633,7 +6633,7 @@ Interactive Plot Manipulation using Natural Language YihanWang YutongShao - NdapaNakashole + NdapaNakashole 92–98 We present an interactive Plotting Agent, a system that enables users to directly manipulate plots using natural language instructions within an interactive programming environment. The Plotting Agent maps language to plot updates. We formulate this problem as a slot-based task-oriented dialog problem, which we tackle with a sequence-to-sequence model. This plotting model while accurate in most cases, still makes errors, therefore, the system allows a feedback mode, wherein the user is presented with a top-k list of plots, among which the user can pick the desired one. From this kind of feedback, we can then, in principle, continuously learn and improve the system. Given that plotting is widely used across data-driven fields, we believe our demonstration will be of interest to both practitioners such as data scientists broadly defined, and researchers interested in natural language interfaces. 2021.naacl-demos.11 @@ -6645,7 +6645,7 @@ <fixed-case>A</fixed-case>ctive<fixed-case>A</fixed-case>nno: General-Purpose Document-Level Annotation Tool with Active Learning Integration MaxWiechmann Seid MuhieYimam - ChrisBiemann + ChrisBiemann 99–105 ActiveAnno is an annotation tool focused on document-level annotation tasks developed both for industry and research settings. It is designed to be a general-purpose tool with a wide variety of use cases. It features a modern and responsive web UI for creating annotation projects, conducting annotations, adjudicating disagreements, and analyzing annotation results. ActiveAnno embeds a highly configurable and interactive user interface. The tool also integrates a RESTful API that enables integration into other software systems, including an API for machine learning integration. ActiveAnno is built with extensible design and easy deployment in mind, all to enable users to perform annotation tasks with high efficiency and high-quality annotation results. 2021.naacl-demos.12 @@ -6659,7 +6659,7 @@ DenisNewman-Griffis VenkateshSivaraman AdamPerer - EricFosler-Lussier + EricFosler-Lussier HarryHochheiser 106–115 Embeddings of words and concepts capture syntactic and semantic regularities of language; however, they have seen limited use as tools to study characteristics of different corpora and how they relate to one another. We introduce TextEssence, an interactive system designed to enable comparative analysis of corpora using embeddings. TextEssence includes visual, neighbor-based, and similarity-based modes of embedding analysis in a lightweight, web-based interface. We further propose a new measure of embedding confidence based on nearest neighborhood overlap, to assist in identifying high-quality embeddings for corpus analysis. A case study on COVID-19 scientific literature illustrates the utility of the system. TextEssence can be found at https://textessence.github.io. @@ -6699,7 +6699,7 @@ RaeferGabriel ShuyangGao RahulGoel - DilekHakkani-Tur + DilekHakkani-Tur JanJezabek AbhayJha Jiun-YuKao @@ -6729,7 +6729,7 @@ <fixed-case>RESIN</fixed-case>: A Dockerized Schema-Guided Cross-document Cross-lingual Cross-media Information Extraction and Event Tracking System HaoyangWen YingLin - TuanLai + TuanLai XiaomanPan ShaLi XudongLin @@ -6745,13 +6745,13 @@ QingLyu DídacSurís BrianChen - Susan WindischBrown - MarthaPalmer + Susan WindischBrown + MarthaPalmer ChrisCallison-Burch CarlVondrick JiaweiHan DanRoth - Shih-FuChang + Shih-FuChang HengJi 133–143 We present a new information extraction system that can automatically construct temporal event graphs from a collection of news documents from multiple sources, multiple languages (English and Spanish for our experiment), and multiple data modalities (speech, text, image and video). The system advances state-of-the-art from two aspects: (1) extending from sentence-level event extraction to cross-document cross-lingual cross-media event extraction, coreference resolution and temporal event tracking; (2) using human curated event schema library to match and enhance the extraction output. We have made the dockerlized system publicly available for research purpose at GitHub, with a demo video. @@ -6777,7 +6777,7 @@ Proceedings of the 2021 Conference of the North American Chapter of the Association for Computational Linguistics: Student Research Workshop EsinDurmus VivekGupta - NelsonLiu + NelsonLiu NanyunPeng YuSu Association for Computational Linguistics @@ -6838,7 +6838,7 @@ Towards Layered Events and Schema Representations in Long Documents Hans OleHatzel - ChrisBiemann + ChrisBiemann 32–39 In this thesis proposal, we explore the application of event extraction to literary texts. Considering the lengths of literary documents modeling events in different granularities may be more adequate to extract meaningful information, as individual elements contribute little to the overall semantics. We adapt the concept of schemas as sequences of events all describing a single process, connected through shared participants extending it to for multiple schemas in a document. Segmentation of event sequences into schemas is approached by modeling event sequences, on such task as the narrative cloze task, the prediction of missing events in sequences. We propose building on sequences of event embeddings to form schema embeddings, thereby summarizing sections of documents using a single representation. This approach will allow for the comparisons of different sections of documents and entire literary works. Literature is a challenging domain based on its variety of genres, yet the representation of literary content has received relatively little attention. 2021.naacl-srw.5 @@ -6885,7 +6885,7 @@ JinfengXiao LidanWang FranckDernoncourt - TrungBui + TrungBui TongSun JiaweiHan 61–67 @@ -6913,7 +6913,7 @@ JingxuanTu MarcVerhagen BrentCochran - JamesPustejovsky + JamesPustejovsky 76–87 We propose semantic visualization as a linguistic visual analytic method. It can enable exploration and discovery over large datasets of complex networks by exploiting the semantics of the relations in them. This involves extracting information, applying parameter reduction operations, building hierarchical data representation and designing visualization. We also present the accompanying COVID-SemViz a searchable and interactive visualization system for knowledge exploration of COVID-19 data to demonstrate the application of our proposed method. In the user studies, users found that semantic visualization-powered COVID-SemViz is helpful in terms of finding relevant information and discovering unknown associations. 2021.naacl-srw.11 @@ -7040,7 +7040,7 @@ FlorianSchneider ÖzgeAlaçam XintongWang - ChrisBiemann + ChrisBiemann In primary school, children’s books, as well as in modern language learning apps, multi-modal learning strategies like illustrations of terms and phrases are used to support reading comprehension. Also, several studies in educational psychology suggest that integrating cross-modal information will improve reading comprehension. We claim that state-of- he-art multi-modal transformers, which could be used in a language learner context to improve human reading, will perform poorly because of the short and relatively simple textual data those models are trained with. To prove our hypotheses, we collected a new multi-modal image-retrieval dataset based on data from Wikipedia. In an in-depth data analysis, we highlight the differences between our dataset and other popular datasets. Additionally, we evaluate several state-of-the-art multi-modal transformers on text-image retrieval on our dataset and analyze their meager results, which verify our claims. 2021.naacl-srw.21 schneider-etal-2021-towards @@ -7051,8 +7051,8 @@ Proceedings of the 2021 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies: Tutorials GregKondrak - KalinaBontcheva - DanGillick + KalinaBontcheva + DanGillick Association for Computational Linguistics
Online
June @@ -7104,7 +7104,7 @@
A Tutorial on Evaluation Metrics used in Natural Language Generation - Mitesh M.Khapra + Mitesh M.Khapra Ananya B.Sai 15–19 The advent of Deep Learning and the availability of large scale datasets has accelerated research on Natural Language Generation with a focus on newer tasks and better models. With such rapid progress, it is vital to assess the extent of scientific progress made and identify the areas/components that need improvement. To accomplish this in an automatic and reliable manner, the NLP community has actively pursued the development of automatic evaluation metrics. Especially in the last few years, there has been an increasing focus on evaluation metrics, with several criticisms of existing metrics and proposals for several new metrics. This tutorial presents the evolution of automatic evaluation metrics to their current state along with the emerging trends in this field by specifically addressing the following questions: (i) What makes NLG evaluation challenging? (ii) Why do we need automatic evaluation metrics? (iii) What are the existing automatic evaluation metrics and how can they be organised in a coherent taxonomy? (iv) What are the criticisms and shortcomings of existing metrics? (v) What are the possible future directions of research? @@ -7119,7 +7119,7 @@ ArmanCohan HannanehHajishirzi SewonMin - Matthew E.Peters + Matthew E.Peters 20–24 In this tutorial, we aim at bringing interested NLP researchers up to speed about the recent and ongoing techniques for document-level representation learning. Additionally, our goal is to reveal new research opportunities to the audience, which will hopefully bring us closer to address existing challenges in this domain. 2021.naacl-tutorials.5 @@ -7151,7 +7151,7 @@ Proceedings of the 2021 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies: Industry Papers Young-bumKim YunyaoLi - OwenRambow + OwenRambow Association for Computational Linguistics
Online
June @@ -7183,7 +7183,7 @@ JoseGarrido Ramas GiorgioPessot AbdalghaniAbujabal - MartinRajman + MartinRajman 10–18 Annotation conflict resolution is crucial towards building machine learning models with acceptable performance. Past work on annotation conflict resolution had assumed that data is collected at once, with a fixed set of annotators and fixed annotation guidelines. Moreover, previous work dealt with atomic labeling tasks. In this paper, we address annotation conflict resolution for Natural Language Understanding (NLU), a structured prediction task, in a real-world setting of commercial voice-controlled personal assistants, where (1) regular data collections are needed to support new and existing functionalities, (2) annotation guidelines evolve over time, and (3) the pool of annotators change across data collections. We devise an approach combining information-theoretic measures and a supervised neural model to resolve conflicts in data annotation. We evaluate our approach both intrinsically and extrinsically on a real-world dataset with 3.5M utterances of a commercial dialog system in German. Our approach leads to dramatic improvements over a majority baseline especially in contentious cases. On the NLU task, our approach achieves 2.75% error reduction over a no-resolution baseline. 2021.naacl-industry.2 @@ -7221,7 +7221,7 @@ HanWang YueLiu YangLiu - DilekHakkani-Tur + DilekHakkani-Tur 26–33 In recent years, incorporating external knowledge for response generation in open-domain conversation systems has attracted great interest. To improve the relevancy of retrieved knowledge, we propose a neural entity linking (NEL) approach. Different from formal documents, such as news, conversational utterances are informal and multi-turn, which makes it more challenging to disambiguate the entities. Therefore, we present a context-aware named entity recognition model (NER) and entity resolution (ER) model to utilize dialogue context information. We conduct NEL experiments on three open-domain conversation datasets and validate that incorporating context information improves the performance of NER and ER models. The end-to-end NEL approach outperforms the baseline by 62.8% relatively in F1 metric. Furthermore, we verify that using external knowledge based on NEL benefits the neural response generation model. 2021.naacl-industry.4 @@ -7295,7 +7295,7 @@ ShahabJalalvand MinhuaChen YanjieZhao - SrinivasBangalore + SrinivasBangalore 63–71 Spoken language understanding (SLU) extracts the intended mean- ing from a user utterance and is a critical component of conversational virtual agents. In enterprise virtual agents (EVAs), language understanding is substantially challenging. First, the users are infrequent callers who are unfamiliar with the expectations of a pre-designed conversation flow. Second, the users are paying customers of an enterprise who demand a reliable, consistent and efficient user experience when resolving their issues. In this work, we describe a general and robust framework for intent and entity extraction utilizing a hybrid of statistical and rule-based approaches. Our framework includes confidence modeling that incorporates information from all components in the SLU pipeline, a critical addition for EVAs to en- sure accuracy. Our focus is on creating accurate and scalable SLU that can be deployed rapidly for a large class of EVA applications with little need for human intervention. 2021.naacl-industry.9 @@ -7306,7 +7306,7 @@ Proteno: Text Normalization with Limited Data for Fast Deployment in Text to Speech Systems ShubhiTyagi - AntonioBonafonte + AntonioBonafonte JaimeLorenzo-Trueba JavierLatorre 72–79 @@ -7319,10 +7319,10 @@ Addressing the Vulnerability of <fixed-case>NMT</fixed-case> in Input Perturbations WeiwenXu - Ai TiAw + Ai TiAw YangDing KuiWu - ShafiqJoty + ShafiqJoty 80–88 Neural Machine Translation (NMT) has achieved significant breakthrough in performance but is known to suffer vulnerability to input perturbations. As real input noise is difficult to predict during training, robustness is a big issue for system deployment. In this paper, we improve the robustness of NMT models by reducing the effect of noisy words through a Context-Enhanced Reconstruction (CER) approach. CER trains the model to resist noise in two steps: (1) perturbation step that breaks the naturalness of input sequence with made-up words; (2) reconstruction step that defends the noise propagation by generating better and more robust contextual representation. Experimental results on Chinese-English (ZH-EN) and French-English (FR-EN) translation tasks demonstrate robustness improvement on both news and social media text. Further fine-tuning experiments on social media text show our approach can converge at a higher position and provide a better adaptation. 2021.naacl-industry.11 @@ -7348,7 +7348,7 @@ ChanjunPark SugyeongEo HyeonseokMoon - HeuiseokLim + HeuiseokLim 97–104 Most of the recent Natural Language Processing(NLP) studies are based on the Pretrain-Finetuning Approach (PFA), but in small and medium-sized enterprises or companies with insufficient hardware there are many limitations to servicing NLP application software using such technology due to slow speed and insufficient memory. The latest PFA technologies require large amounts of data, especially for low-resource languages, making them much more difficult to work with. We propose a new tokenization method, ONE-Piece, to address this limitation that combines the morphology-considered subword tokenization method and the vocabulary method used after probing for an existing method that has not been carefully considered before. Our proposed method can also be used without modifying the model structure. We experiment by applying ONE-Piece to Korean, a morphologically-rich and low-resource language. We derive an optimal subword tokenization result for Korean-English machine translation by conducting a case study that combines the subword tokenization method, morphological segmentation, and vocabulary method. Through comparative experiments with all the tokenization methods currently used in NLP research, ONE-Piece achieves performance comparable to the current Korean-English machine translation state-of-the-art model. 2021.naacl-industry.13 @@ -7362,7 +7362,7 @@ ChengqiZhao MingxuanWang LeiLi - DeyiXiong + DeyiXiong 105–112 Automatic translation of dialogue texts is a much needed demand in many real life scenarios. However, the currently existing neural machine translation delivers unsatisfying results. In this paper, we conduct a deep analysis of a dialogue corpus and summarize three major issues on dialogue translation, including pronoun dropping (), punctuation dropping (), and typos (). In response to these challenges, we propose a joint learning method to identify omission and typo, and utilize context to translate dialogue utterances. To properly evaluate the performance, we propose a manually annotated dataset with 1,931 Chinese-English parallel utterances from 300 dialogues as a benchmark testbed for dialogue translation. Our experiments show that the proposed method improves translation quality by 3.2 BLEU over the baselines. It also elevates the recovery rate of omitted pronouns from 26.09% to 47.16%. We will publish the code and dataset publicly at https://xxx.xx. 2021.naacl-industry.14 @@ -7515,7 +7515,7 @@ SidaGao XiaoYang JustineKao - StephenPulman + StephenPulman AtishKothari RayShen YinyingPan @@ -7560,7 +7560,7 @@ BrianLester SagnikRay Choudhury RashmiPrasad - SrinivasBangalore + SrinivasBangalore 214–221 Complex natural language understanding modules in dialog systems have a richer understanding of user utterances, and thus are critical in providing a better user experience. However, these models are often created from scratch, for specific clients and use cases and require the annotation of large datasets. This encourages the sharing of annotated data across multiple clients. To facilitate this we introduce the idea of intent features: domain and topic agnostic properties of intents that can be learnt from the syntactic cues only, and hence can be shared. We introduce a new neural network architecture, the Global-Local model, that shows significant improvement over strong baselines for identifying these features in a deployed, multi-intent natural language understanding module, and more generally in a classification setting where a part of an utterance has to be classified utilizing the whole context. 2021.naacl-industry.27 @@ -7625,8 +7625,8 @@ Coherent and Concise Radiology Report Generation via Context Specific Image Representations and Orthogonal Sentence States - LittonJ Kurisinkel - Ai TiAw + LittonJ Kurisinkel + Ai TiAw Nancy FChen 246–254 Neural models for text generation are often designed in an end-to-end fashion, typically with zero control over intermediate computations, limiting their practical usability in downstream applications. In this work, we incorporate explicit means into neural models to ensure topical continuity, informativeness and content diversity of generated radiology reports. For the purpose we propose a method to compute image representations specific to each sentential context and eliminate redundant content by exploiting diverse sentence states. We conduct experiments to generate radiology reports from medical images of chest x-rays using MIMIC-CXR. Our model outperforms baselines by up to 18% and 29% respective in the evaluation for informativeness and content ordering respectively, relative on objective metrics and 16% on human evaluation. @@ -7640,7 +7640,7 @@ HidetakaKamigaito PeinanZhang HiroyaTakamura - ManabuOkumura + ManabuOkumura 255–262 Although there are many studies on neural language generation (NLG), few trials are put into the real world, especially in the advertising domain. Generating ads with NLG models can help copywriters in their creation. However, few studies have adequately evaluated the effect of generated ads with actual serving included because it requires a large amount of training data and a particular environment. In this paper, we demonstrate a practical use case of generating ad-text with an NLG model. Specially, we show how to improve the ads’ impact, deploy models to a product, and evaluate the generated ads. 2021.naacl-industry.32 diff --git a/data/xml/2021.naloma.xml b/data/xml/2021.naloma.xml index 4f1ed396a8..7aadd23ece 100644 --- a/data/xml/2021.naloma.xml +++ b/data/xml/2021.naloma.xml @@ -4,7 +4,7 @@ Proceedings of the 1st and 2nd Workshops on Natural Logic Meets Machine Learning (NALOMA) Aikaterini-LidaKalouli - Lawrence S.Moss + Lawrence S.Moss Association for Computational Linguistics
Groningen, the Netherlands (online)
June @@ -20,7 +20,7 @@ Learning General Event Schemas with Episodic Logic LaneLawley BenjaminKuehnert - LenhartSchubert + LenhartSchubert 1–6 We present a system for learning generalized, stereotypical patterns of events—or “schemas”—from natural language stories, and applying them to make predictions about other stories. Our schemas are represented with Episodic Logic, a logical form that closely mirrors natural language. By beginning with a “head start” set of protoschemas— schemas that a 1- or 2-year-old child would likely know—we can obtain useful, general world knowledge with very few story examples—often only one or two. Learned schemas can be combined into more complex, composite schemas, and used to make predictions in other stories where only partial information is available. 2021.naloma-1.1 @@ -57,7 +57,7 @@ Monotonic Inference for Underspecified Episodic Logic GeneKim MandarJuvekar - LenhartSchubert + LenhartSchubert 26–40 We present a method of making natural logic inferences from Unscoped Logical Form of Episodic Logic. We establish a correspondence between inference rules of scope resolved Episodic Logic and the natural logic treatment by Sánchez Valencia (1991a), and hence demonstrate the ability to handle foundational natural logic inferences from prior literature as well as more general nested monotonicity inferences. 2021.naloma-1.5 @@ -69,7 +69,7 @@ DeborahFerreira MokanaranganThayaparan MarcoValentino - AndréFreitas + AndréFreitas 41–50 Natural language contexts display logical regularities with respect to substitutions of related concepts: these are captured in a functional order-theoretic property called monotonicity. For a certain class of NLI problems where the resulting entailment label depends only on the context monotonicity and the relation between the substituted concepts, we build on previous techniques that aim to improve the performance of NLI models for these problems, as consistent performance across both upward and downward monotone contexts still seems difficult to attain even for state of the art models. To this end, we reframe the problem of context monotonicity classification to make it compatible with transformer-based pre-trained NLI models and add this task to the training pipeline. Furthermore, we introduce a sound and complete simplified monotonicity logic formalism which describes our treatment of contexts as abstract units. Using the notions in our formalism, we adapt targeted challenge sets to investigate whether an intermediate context monotonicity classification task can aid NLI models’ performance on examples exhibiting monotonicity reasoning. 2021.naloma-1.6 @@ -100,7 +100,7 @@ MandarJuvekar JunisEkmekciu VietDuong - LenhartSchubert + LenhartSchubert 71–80 We implement the formalization of natural logic-like monotonic inference using Unscoped Episodic Logical Forms (ULFs) by Kim et al. (2020). We demonstrate this system’s capacity to handle a variety of challenging semantic phenomena using the FraCaS dataset (Cooper et al., 1996). These results give empirical evidence for prior claims that ULF is an appropriate representation to mediate natural logic-like inferences. 2021.naloma-1.9 diff --git a/data/xml/2021.nejlt.xml b/data/xml/2021.nejlt.xml index f7d68d5874..abf374ac14 100644 --- a/data/xml/2021.nejlt.xml +++ b/data/xml/2021.nejlt.xml @@ -3,7 +3,7 @@ Northern European Journal of Language Technology, Volume 7 - LeonDerczynski + LeonDerczynski Linköping University Electronic Press
Linköping, Sweden
10.3384/nejlt.2000-1533.7.1 diff --git a/data/xml/2021.newsum.xml b/data/xml/2021.newsum.xml index 21da8d5962..229ec74c0c 100644 --- a/data/xml/2021.newsum.xml +++ b/data/xml/2021.newsum.xml @@ -4,7 +4,7 @@ Proceedings of the Third Workshop on New Frontiers in Summarization GiuseppeCarenini - Jackie Chi KitCheung + Jackie Chi KitCheung YueDong FeiLiu LuWang @@ -21,7 +21,7 @@ Sentence-level Planning for Especially Abstractive Summarization AndreasMarfurt - JamesHenderson + JamesHenderson 1–14 Abstractive summarization models heavily rely on copy mechanisms, such as the pointer network or attention, to achieve good performance, measured by textual overlap with reference summaries. As a result, the generated summaries stay close to the formulations in the source document. We propose the *sentence planner* model to generate more abstractive summaries. It includes a hierarchical decoder that first generates a representation for the next summary sentence, and then conditions the word generator on this representation. Our generated summaries are more abstractive and at the same time achieve high ROUGE scores when compared to human reference summaries. We verify the effectiveness of our design decisions with extensive evaluations. 2021.newsum-1.1 @@ -77,7 +77,7 @@ Evaluation of Summarization Systems across Gender, Age, and Race AnnaJørgensen - AndersSøgaard + AndersSøgaard 51–56 Summarization systems are ultimately evaluated by human annotators and raters. Usually, annotators and raters do not reflect the demographics of end users, but are recruited through student populations or crowdsourcing platforms with skewed demographics. For two different evaluation scenarios – evaluation against gold summaries and system output ratings – we show that summary evaluation is sensitive to protected attributes. This can severely bias system development and evaluation, leading us to build models that cater for some groups rather than others. 2021.newsum-1.6 @@ -106,7 +106,7 @@ ChanheeLee SeungwooCho MingunPark - HeuiseokLim + HeuiseokLim 65–73 In this paper, we focus on improving the quality of the summary generated by neural abstractive dialogue summarization systems. Even though pre-trained language models generate well-constructed and promising results, it is still challenging to summarize the conversation of multiple participants since the summary should include a description of the overall situation and the actions of each speaker. This paper proposes self-supervised strategies for speaker-focused post-correction in abstractive dialogue summarization. Specifically, our model first discriminates which type of speaker correction is required in a draft summary and then generates a revised summary according to the required type. Experimental results show that our proposed method adequately corrects the draft summaries, and the revised summaries are significantly improved in both quantitative and qualitative evaluations. 2021.newsum-1.8 @@ -117,7 +117,7 @@ Measuring Similarity of Opinion-bearing Sentences WenyiTay - XiuzhenZhang + XiuzhenZhang StephenWan SarvnazKarimi 74–84 @@ -148,7 +148,7 @@ NicoleBeckage ShachiH Kumar SauravSahay - RameshManuvinakurike + RameshManuvinakurike 96–106 Incremental meeting temporal summarization, summarizing relevant information of partial multi-party meeting dialogue, is emerging as the next challenge in summarization research. Here we examine the extent to which human abstractive summaries of the preceding increments (context) can be combined with extractive meeting dialogue to generate abstractive summaries. We find that previous context improves ROUGE scores. Our findings further suggest that contexts begin to outweigh the dialogue. Using keyphrase extraction and semantic role labeling (SRL), we find that SRL captures relevant information without overwhelming the the model architecture. By compressing the previous contexts by ~70%, we achieve better ROUGE scores over our baseline models. Collectively, these results suggest that context matters, as does the way in which context is presented to the model. 2021.newsum-1.11 @@ -160,7 +160,7 @@ Are We Summarizing the Right Way? A Survey of Dialogue Summarization Data Sets DonTuggener MargotMieskes - JanDeriu + JanDeriu MarkCieliebak 107–118 Dialogue summarization is a long-standing task in the field of NLP, and several data sets with dialogues and associated human-written summaries of different styles exist. However, it is unclear for which type of dialogue which type of summary is most appropriate. For this reason, we apply a linguistic model of dialogue types to derive matching summary items and NLP tasks. This allows us to map existing dialogue summarization data sets into this model and identify gaps and potential directions for future work. As part of this process, we also provide an extensive overview of existing dialogue summarization data sets. @@ -212,7 +212,7 @@ A New Dataset and Efficient Baselines for Document-level Text Simplification in <fixed-case>G</fixed-case>erman - AnnetteRios + AnnetteRios NicolasSpring TannonKew MarekKostrzewa diff --git a/data/xml/2021.nllp.xml b/data/xml/2021.nllp.xml index 9980856116..4ddd92a00f 100644 --- a/data/xml/2021.nllp.xml +++ b/data/xml/2021.nllp.xml @@ -7,7 +7,7 @@ IonAndroutsopoulos LeslieBarrett CatalinaGoanta - DanielPreotiuc-Pietro + DanielPreotiuc-Pietro Association for Computational Linguistics
Punta Cana, Dominican Republic
November @@ -142,11 +142,11 @@ Few-shot and Zero-shot Approaches to Legal Text Classification: A Case Study in the Financial Sector RajdeepSarkar - Atul Kr.Ojha + Atul Kr.Ojha JayMegaro JohnMariano VallHerard - John P.McCrae + John P.McCrae 102–106 The application of predictive coding techniques to legal texts has the potential to greatly reduce the cost of legal review of documents, however, there is such a wide array of legal tasks and continuously evolving legislation that it is hard to construct sufficient training data to cover all cases. In this paper, we investigate few-shot and zero-shot approaches that require substantially less training data and introduce a triplet architecture, which for promissory statements produces performance close to that of a supervised system. This method allows predictive coding methods to be rapidly developed for new regulations and markets. 2021.nllp-1.10 @@ -206,7 +206,7 @@ Capturing Logical Structure of Visually Structured Documents with Multimodal Transition Parser YutaKoreeda - ChristopherManning + ChristopherManning 144–154 While many NLP pipelines assume raw, clean texts, many texts we encounter in the wild, including a vast majority of legal documents, are not so clean, with many of them being visually structured documents (VSDs) such as PDFs. Conventional preprocessing tools for VSDs mainly focused on word segmentation and coarse layout analysis, whereas fine-grained logical structure analysis (such as identifying paragraph boundaries and their hierarchies) of VSDs is underexplored. To that end, we proposed to formulate the task as prediction of “transition labels” between text fragments that maps the fragments to a tree, and developed a feature-based machine learning system that fuses visual, textual and semantic cues. Our system is easily customizable to different types of VSDs and it significantly outperformed baselines in identifying different structures in VSDs. For example, our system obtained a paragraph boundary detection F1 score of 0.953 which is significantly better than a popular PDF-to-text tool with an F1 score of 0.739. 2021.nllp-1.15 @@ -226,7 +226,7 @@ Supervised Identification of Participant Slots in Contracts - DanSimonson + DanSimonson 163–171 This paper presents a technique for the identification of participant slots in English language contracts. Taking inspiration from unsupervised slot extraction techniques, the system presented here uses a supervised approach to identify terms used to refer to a genre-specific slot in novel contracts. We evaluate the system in multiple feature configurations to demonstrate that the best performing system in both genres of contracts omits the exact mention form from consideration—even though such mention forms are often the name of the slot under consideration—and is instead based solely on the dependency label and parent; in other words, a more reliable quantification of a party’s role in a contract is found in what they do rather than what they are named. 2021.nllp-1.17 @@ -260,7 +260,7 @@ Learning from Limited Labels for Long Legal Dialogue JennyHong DerekChong - ChristopherManning + ChristopherManning 190–204 We study attempting to achieve high accuracy information extraction of case factors from a challenging dataset of parole hearings, which, compared to other legal NLP datasets, has longer texts, with fewer labels. On this corpus, existing work directly applying pretrained neural models has failed to extract all but a few relatively basic items with little improvement over rule-based extraction. We address two challenges posed by existing work: training on long documents and reasoning over complex speech patterns. We use a similar approach to the two-step open-domain question answering approach by using a Reducer to extract relevant text segments and a Producer to generate both extractive answers and non-extractive classifications. In a context like ours, with limited labeled data, we show that a superior approach for strong performance within limited development time is to use a combination of a rule-based Reducer and a neural Producer. We study four representative tasks from the parole dataset. On all four, we improve extraction from the previous benchmark of 0.41–0.63 to 0.83–0.89 F1. 2021.nllp-1.20 @@ -294,7 +294,7 @@ Jey HanLau BraydenMerrifield KateFazio - TimothyBaldwin + TimothyBaldwin 217–227 Free legal assistance is critically under-resourced, and many of those who seek legal help have their needs unmet. A major bottleneck in the provision of free legal assistance to those most in need is the determination of the precise nature of the legal problem. This paper describes a collaboration with a major provider of free legal assistance, and the deployment of natural language processing models to assign area-of-law categories to real-world requests for legal assistance. In particular, we focus on an investigation of models to generate efficiencies in the triage process, but also the risks associated with naive use of model predictions, including fairness across different user demographics. 2021.nllp-1.23 @@ -307,7 +307,7 @@ MeladelMistica InbarLevy AndrewChristie - TimothyBaldwin + TimothyBaldwin 228–238 We introduce the new task of domain name dispute resolution (DNDR), that predicts the outcome of a process for resolving disputes about legal entitlement to a domain name. TheICANN UDRP establishes a mandatory arbitration process for a dispute between a trade-mark owner and a domain name registrant pertaining to a generic Top-Level Domain (gTLD) name (one ending in .COM, .ORG, .NET, etc). The nature of the problem leads to a very skewed data set, which stems from being able to register a domain name with extreme ease, very little expense, and no need to prove an entitlement to it. In this paper, we describe thetask and associated data set. We also present benchmarking results based on a range of mod-els, which show that simple baselines are in general difficult to beat due to the skewed data distribution, but in the specific case of the respondent having submitted a response, a fine-tuned BERT model offers considerable improvements over a majority-class model 2021.nllp-1.24 diff --git a/data/xml/2021.nlp4call.xml b/data/xml/2021.nlp4call.xml index eb621abd53..ea19a00e19 100644 --- a/data/xml/2021.nlp4call.xml +++ b/data/xml/2021.nlp4call.xml @@ -27,7 +27,7 @@ ChristosRodosthenous FedericoSangati AlexanderKönig - CorinaForascu + CorinaForascu 1–14 2021.nlp4call-1.1 nicolas-etal-2021-experiment @@ -35,9 +35,9 @@ Automatic annotation of curricular language targets to enrich activity models and support both pedagogy and adaptive systems MartíQuixal - BjörnRudzewitz + BjörnRudzewitz ElizabethBear - DetmarMeurers + DetmarMeurers 15–27 2021.nlp4call-1.2 quixal-etal-2021-automatic @@ -53,9 +53,9 @@ Using Broad Linguistic Complexity Modeling for Cross-Lingual Readability Assessment - ZarahWeiss + ZarahWeiss XiaobinChen - DetmarMeurers + DetmarMeurers 38–54 2021.nlp4call-1.4 weiss-etal-2021-using @@ -63,7 +63,7 @@ Developing <fixed-case>F</fixed-case>lashcards for Learning <fixed-case>I</fixed-case>celandic XindanXu - Anton KarlIngason + Anton KarlIngason 55–61 2021.nlp4call-1.5 xu-ingason-2021-developing diff --git a/data/xml/2021.nlp4convai.xml b/data/xml/2021.nlp4convai.xml index a43e8cb90f..6e988ef416 100644 --- a/data/xml/2021.nlp4convai.xml +++ b/data/xml/2021.nlp4convai.xml @@ -3,7 +3,7 @@ Proceedings of the 3rd Workshop on Natural Language Processing for Conversational AI - AlexandrosPapangelis + AlexandrosPapangelis PawełBudzianowski BingLiu ElnazNouri @@ -124,7 +124,7 @@ What Went Wrong? Explaining Overall Dialogue Quality through Utterance-Level Impacts James D.Finch Sarah E.Finch - Jinho D.Choi + Jinho D.Choi 93–101 Improving user experience of a dialogue system often requires intensive developer effort to read conversation logs, run statistical analyses, and intuit the relative importance of system shortcomings. This paper presents a novel approach to automated analysis of conversation logs that learns the relationship between user-system interactions and overall dialogue quality. Unlike prior work on utterance-level quality prediction, our approach learns the impact of each interaction from the overall user rating without utterance-level annotation, allowing resultant model conclusions to be derived on the basis of empirical evidence and at low cost. Our model identifies interactions that have a strong correlation with the overall dialogue quality in a chatbot setting. Experiments show that the automated analysis from our model agrees with expert judgments, making this work the first to show that such weakly-supervised learning of utterance-level quality prediction is highly achievable. 2021.nlp4convai-1.9 @@ -135,7 +135,7 @@ <fixed-case>XP</fixed-case>ersona: Evaluating Multilingual Personalized Chatbot ZhaojiangLin ZihanLiu - Genta IndraWinata + Genta IndraWinata SamuelCahyawijaya AndreaMadotto YejinBang @@ -247,7 +247,7 @@ ShuyangDai GuoyinWang SunghyunPark - SungjinLee + SungjinLee 189–197 Large-scale auto-regressive models have achieved great success in dialogue response generation, with the help of Transformer layers. However, these models do not learn a representative latent space of the sentence distribution, making it hard to control the generation. Recent works have tried on learning sentence representations using Transformer-based framework, but do not model the context-response relationship embedded in the dialogue datasets. In this work, we aim to construct a robust sentence representation learning model, that is specifically designed for dialogue response generation, with Transformer-based encoder-decoder structure. An utterance-level contrastive learning is proposed, encoding predictive information in each context representation for its corresponding response. Extensive experiments are conducted to verify the robustness of the proposed representation learning mechanism. By using both reference-based and reference-free evaluation metrics, we provide detailed analysis on the generated sentences, demonstrating the effectiveness of our proposed model. 2021.nlp4convai-1.18 @@ -270,7 +270,7 @@ Investigating Pretrained Language Models for Graph-to-Text Generation Leonardo F. R.Ribeiro MartinSchmitt - HinrichSchütze + HinrichSchütze IrynaGurevych 211–227 Graph-to-text generation aims to generate fluent texts from graph-based data. In this paper, we investigate two recent pretrained language models (PLMs) and analyze the impact of different task-adaptive pretraining strategies for PLMs in graph-to-text generation. We present a study across three graph domains: meaning representations, Wikipedia knowledge graphs (KGs) and scientific KGs. We show that approaches based on PLMs BART and T5 achieve new state-of-the-art results and that task-adaptive pretraining strategies improve their performance even further. We report new state-of-the-art BLEU scores of 49.72 on AMR-LDC2017T10, 59.70 on WebNLG, and 25.66 on AGENDA datasets - a relative improvement of 31.8%, 4.5%, and 42.4%, respectively, with our models generating significantly more fluent texts than human references. In an extensive analysis, we identify possible reasons for the PLMs’ success on graph-to-text tasks. Our findings suggest that the PLMs benefit from similar facts seen during pretraining or fine-tuning, such that they perform well even when the input graph is reduced to a simple bag of node and edge labels. @@ -288,7 +288,7 @@ YuhengDu AnjaliNarayan-Chen TagyoungChung - DilekHakkani-Tur + DilekHakkani-Tur 228–242 Natural Language Generation (NLG) for task-oriented dialogue systems focuses on communicating specific content accurately, fluently, and coherently. While these attributes are crucial for a successful dialogue, it is also desirable to simultaneously accomplish specific stylistic goals, such as response length, point-of-view, descriptiveness, sentiment, formality, and empathy. In this work, we focus on stylistic control and evaluation for schema-guided NLG, with joint goals of achieving both semantic and stylistic control. We experiment in detail with various controlled generation methods for large pretrained language models: specifically, conditional training, guided fine-tuning, and guided decoding. We discuss their advantages and limitations, and evaluate them with a broad range of automatic and human evaluation metrics. Our results show that while high style accuracy and semantic correctness are easier to achieve for more lexically-defined styles with conditional training, stylistic control is also achievable for more semantically complex styles using discriminator-based guided decoding methods. The results also suggest that methods that are more scalable (with less hyper-parameters tuning) and that disentangle context generation and stylistic variations are more effective at achieving semantic correctness and style accuracy. 2021.nlp4convai-1.21 @@ -302,7 +302,7 @@ Joel Ruben AntonyMoniz XiaoYang ManosTsagkias - StephenPulman + StephenPulman 243–250 Entity tags in human-machine dialog are integral to natural language understanding (NLU) tasks in conversational assistants. However, current systems struggle to accurately parse spoken queries with the typical use of text input alone, and often fail to understand the user intent. Previous work in linguistics has identified a cross-language tendency for longer speech pauses surrounding nouns as compared to verbs. We demonstrate that the linguistic observation on pauses can be used to improve accuracy in machine-learnt language understanding tasks. Analysis of pauses in French and English utterances from a commercial voice assistant shows the statistically significant difference in pause duration around multi-token entity span boundaries compared to within entity spans. Additionally, in contrast to text-based NLU, we apply pause duration to enrich contextual embeddings to improve shallow parsing of entities. Results show that our proposed novel embeddings improve the relative error rate by up to 8% consistently across three domains for French, without any added annotation or alignment costs to the parser. 2021.nlp4convai-1.22 @@ -319,7 +319,7 @@ JayPujara XiangRen YangLiu - DilekHakkani-Tur + DilekHakkani-Tur 251–253 Humans make appropriate responses not only based on previous dialogue utterances but also on implicit background knowledge such as common sense. Although neural response generation models seem to produce human-like responses, they are mostly end-to-end and not generating intermediate grounds between a dialogue history and responses. This work aims to study if and how we can train an RG model that talks with itself to generate implicit knowledge before making responses. We further investigate can such models identify when to generate implicit background knowledge and when it is not necessary. Experimental results show that compared with models that directly generate responses given a dialogue history, self-talk models produce better-quality responses according to human evaluation on grammaticality, coherence, and engagingness. And models that are trained to identify when to self-talk further improves the response quality. Analysis on generated implicit knowledge shows that models mostly use the knowledge appropriately in the responses. 2021.nlp4convai-1.23 @@ -331,7 +331,7 @@ EhsanLotfi MaximeDe Bruyn JeskaBuhmann - WalterDaelemans + WalterDaelemans 254–262 Knowledge Grounded Conversation Models are usually based on a selection/retrieval module and a generation module, trained separately or simultaneously, with or without having access to a ‘gold’ knowledge option. With the introduction of large pre-trained generative models, the selection and generation part have become more and more entangled, shifting the focus towards enhancing knowledge incorporation (from multiple sources) instead of trying to pick the best knowledge option. These approaches however depend on knowledge labels and/or a separate dense retriever for their best performance. In this work we study the unsupervised selection abilities of pre-trained generative models (e.g. BART) and show that by adding a score-and-aggregate module between encoder and decoder, they are capable of learning to pick the proper knowledge through minimising the language modelling loss (i.e. without having access to knowledge labels). Trained as such, our model - K-Mine - shows competitive selection and generation performance against models that benefit from knowledge labels and/or separate dense retriever. 2021.nlp4convai-1.24 @@ -371,7 +371,7 @@ ShuyangGao SeokhwanKim YangLiu - DilekHakkani-Tur + DilekHakkani-Tur 281–288 Most prior work on task-oriented dialogue systems is restricted to supporting domain APIs. However, users may have requests that are out of the scope of these APIs. This work focuses on identifying such user requests. Existing methods for this task mainly rely on fine-tuning pre-trained models on large annotated data. We propose a novel method, REDE, based on adaptive representation learning and density estimation. REDE can be applied to zero-shot cases, and quickly learns a high-performing detector with only a few shots by updating less than 3K parameters. We demonstrate REDE’s competitive performance on DSTC9 data and our newly collected test set. 2021.nlp4convai-1.27 diff --git a/data/xml/2021.nlp4dh.xml b/data/xml/2021.nlp4dh.xml index 8dbf0a2e1f..d967eff125 100644 --- a/data/xml/2021.nlp4dh.xml +++ b/data/xml/2021.nlp4dh.xml @@ -48,7 +48,7 @@ <fixed-case>M</fixed-case>ac<fixed-case>BERT</fixed-case>h: Development and Evaluation of a Historically Pre-trained Language Model for <fixed-case>E</fixed-case>nglish (1450-1950) - EnriqueManjavacas Arevalo + EnriqueManjavacas Arevalo LaurenFonteyn 23–36 The new pre-train-then-fine-tune paradigm in Natural made important performance gains accessible to a wider audience. Once pre-trained, deploying a large language model presents comparatively small infrastructure requirements, and offers robust performance in many NLP tasks. The Digital Humanities community has been an early adapter of this paradigm. Yet, a large part of this community is concerned with the application of NLP algorithms to historical texts, for which large models pre-trained on contemporary text may not provide optimal results. In the present paper, we present “MacBERTh”—a transformer-based language model pre-trained on historical English—and exhaustively assess its benefits on a large set of relevant downstream tasks. Our experiments highlight that, despite some differences across target time periods, pre-training on historical language from scratch outperforms models pre-trained on present-day language and later adapted to historical language. @@ -185,7 +185,7 @@ Transferring Modern Named Entity Recognition to the Historical Domain: How to Take the Step? BaptisteBlouin - BenoitFavre + BenoitFavre JeremyAuguste ChristianHenriot 152–162 diff --git a/data/xml/2021.nlp4if.xml b/data/xml/2021.nlp4if.xml index 1136bc88d9..4affc82253 100644 --- a/data/xml/2021.nlp4if.xml +++ b/data/xml/2021.nlp4if.xml @@ -6,7 +6,7 @@ AnnaFeldman GiovanniDa San Martino ChrisLeberknight - PreslavNakov + PreslavNakov Association for Computational Linguistics
Online
June @@ -21,7 +21,7 @@ Identifying Automatically Generated Headlines using Transformers AntonisMaronikolakis - HinrichSchütze + HinrichSchütze MarkStevenson 1–6 False information spread via the internet and social media influences public opinion and user activity, while generative models enable fake content to be generated faster and more cheaply than had previously been possible. In the not so distant future, identifying fake content generated by deep learning models will play a key role in protecting users from misinformation. To this end, a dataset containing human and computer-generated headlines was created and a user study indicated that humans were only able to identify the fake headlines in 47.8% of the cases. However, the most accurate automatic approach, transformers, achieved an overall accuracy of 85.7%, indicating that content generated from language models can be filtered out accurately. @@ -33,7 +33,7 @@ Improving Hate Speech Type and Target Detection with Hateful Metaphor Features JensLemmens IliaMarkov - WalterDaelemans + WalterDaelemans 7–16 We study the usefulness of hateful metaphorsas features for the identification of the type and target of hate speech in Dutch Facebook comments. For this purpose, all hateful metaphors in the Dutch LiLaH corpus were annotated and interpreted in line with Conceptual Metaphor Theory and Critical Metaphor Analysis. We provide SVM and BERT/RoBERTa results, and investigate the effect of different metaphor information encoding methods on hate speech type and target detection accuracy. The results of the conducted experiments show that hateful metaphor features improve model performance for the both tasks. To our knowledge, it is the first time that the effectiveness of hateful metaphors as an information source for hatespeech classification is investigated. 2021.nlp4if-1.2 @@ -43,7 +43,7 @@ Improving Cross-Domain Hate Speech Detection by Reducing the False Positive Rate IliaMarkov - WalterDaelemans + WalterDaelemans 17–22 Hate speech detection is an actively growing field of research with a variety of recently proposed approaches that allowed to push the state-of-the-art results. One of the challenges of such automated approaches – namely recent deep learning models – is a risk of false positives (i.e., false accusations), which may lead to over-blocking or removal of harmless social media content in applications with little moderator intervention. We evaluate deep learning models both under in-domain and cross-domain hate speech detection conditions, and introduce an SVM approach that allows to significantly improve the state-of-the-art results when combined with the deep learning models through a simple majority-voting ensemble. The improvement is mainly due to a reduction of the false positive rate. 2021.nlp4if-1.3 @@ -92,7 +92,7 @@ AshkanKazemi ZehuaLi VerónicaPérez-Rosas - RadaMihalcea + RadaMihalcea 45–50 In this paper, we explore the construction of natural language explanations for news claims, with the goal of assisting fact-checking and news evaluation applications. We experiment with two methods: (1) an extractive method based on Biased TextRank – a resource-effective unsupervised graph-based algorithm for content extraction; and (2) an abstractive method based on the GPT-2 language model. We perform comparative evaluations on two misinformation datasets in the political and health news domains, and find that the extractive method shows the most promise. 2021.nlp4if-1.7 @@ -172,10 +172,10 @@ <fixed-case>NARNIA</fixed-case> at <fixed-case>NLP</fixed-case>4<fixed-case>IF</fixed-case>-2021: Identification of Misinformation in <fixed-case>COVID</fixed-case>-19 Tweets Using <fixed-case>BERT</fixed-case>weet - AnkitKumar + AnkitKumar NamanJhunjhunwala RakshaAgarwal - NiladriChatterjee + NiladriChatterjee 99–103 The spread of COVID-19 has been accompanied with widespread misinformation on social media. In particular, Twitterverse has seen a huge increase in dissemination of distorted facts and figures. The present work aims at identifying tweets regarding COVID-19 which contains harmful and false information. We have experimented with a number of Deep Learning-based models, including different word embeddings, such as Glove, ELMo, among others. BERTweet model achieved the best overall F1-score of 0.881 and secured the third rank on the above task. 2021.nlp4if-1.14 diff --git a/data/xml/2021.nlp4musa.xml b/data/xml/2021.nlp4musa.xml index c8796296e6..1187378cc2 100644 --- a/data/xml/2021.nlp4musa.xml +++ b/data/xml/2021.nlp4musa.xml @@ -5,7 +5,7 @@ Proceedings of the 2nd Workshop on NLP for Music and Spoken Audio (NLP4MusA) SergioOramas ElenaEpure - LuisEspinosa-Anke + LuisEspinosa-Anke RosieJones MassimoQuadrana MohamedSordo @@ -60,7 +60,7 @@ Using Listeners’ Interpretations in Topic Classification of Song Lyrics VarvaraPapazoglou - RobertGaizauskas + RobertGaizauskas 22–26 2021.nlp4musa-1.5 papazoglou-gaizauskas-2021-using diff --git a/data/xml/2021.nlp4posimpact.xml b/data/xml/2021.nlp4posimpact.xml index 52ee1a4e8e..febb142234 100644 --- a/data/xml/2021.nlp4posimpact.xml +++ b/data/xml/2021.nlp4posimpact.xml @@ -22,7 +22,7 @@ Restatement and Question Generation for Counsellor Chatbot - JohnLee + JohnLee BaikunLiang HaleyFong 1–7 @@ -45,8 +45,8 @@ Cartography of Natural Language Processing for Social Good (<fixed-case>NLP</fixed-case>4<fixed-case>SG</fixed-case>): Searching for Definitions, Statistics and White Spots PaulaFortuna LauraPérez-Mayos - AhmedAbuRa’ed - JuanSoler-Company + AhmedAbuRa’ed + JuanSoler-Company LeoWanner 19–26 The range of works that can be considered as developing NLP for social good (NLP4SG) is enormous. While many of them target the identification of hate speech or fake news, there are others that address, e.g., text simplification to alleviate consequences of dyslexia, or coaching strategies to fight depression. However, so far, there is no clear picture of what areas are targeted by NLP4SG, who are the actors, which are the main scenarios and what are the topics that have been left aside. In order to obtain a clearer view in this respect, we first propose a working definition of NLP4SG and identify some primary aspects that are crucial for NLP4SG, including, e.g., areas, ethics, privacy and bias. Then, we draw upon a corpus of around 50,000 articles downloaded from the ACL Anthology. Based on a list of keywords retrieved from the literature and revised in view of the task, we select from this corpus articles that can be considered to be on NLP4SG according to our definition and analyze them in terms of trends along the time line, etc. The result is a map of the current NLP4SG research and insights concerning the white spots on this map. @@ -113,7 +113,7 @@ Challenges for Information Extraction from Dialogue in Criminal Law JennyHong CatalinVoss - ChristopherManning + ChristopherManning 71–81 Information extraction and question answering have the potential to introduce a new paradigm for how machine learning is applied to criminal law. Existing approaches generally use tabular data for predictive metrics. An alternative approach is needed for matters of equitable justice, where individuals are judged on a case-by-case basis, in a process involving verbal or written discussion and interpretation of case factors. Such discussions are individualized, but they nonetheless rely on underlying facts. Information extraction can play an important role in surfacing these facts, which are still important to understand. We analyze unsupervised, weakly supervised, and pre-trained models’ ability to extract such factual information from the free-form dialogue of California parole hearings. With a few exceptions, most F1 scores are below 0.85. We use this opportunity to highlight some opportunities for further research for information extraction and question answering. We encourage new developments in NLP to enable analysis and review of legal cases to be done in a post-hoc, not predictive, manner. 2021.nlp4posimpact-1.8 @@ -187,7 +187,7 @@ A Speech-enabled Fixed-phrase Translator for Healthcare Accessibility - PierretteBouillon + PierretteBouillon JohannaGerlach JonathanMutal NikosTsourakis @@ -201,7 +201,7 @@ A Grounded Well-being Conversational Agent with Multiple Interaction Modes: Preliminary Results XinxinYan - NdapaNakashole + NdapaNakashole 143–151 Technologies for enhancing well-being, healthcare vigilance and monitoring are on the rise. However, despite patient interest, such technologies suffer from low adoption. One hypothesis for this limited adoption is loss of human interaction that is central to doctor-patient encounters. In this paper we seek to address this limitation via a conversational agent that adopts one aspect of in-person doctor-patient interactions: A human avatar to facilitate medical grounded question answering. This is akin to the in-person scenario where the doctor may point to the human body or the patient may point to their own body to express their conditions. Additionally, our agent has multiple interaction modes, that may give more options for the patient to use the agent, not just for medical question answering, but also to engage in conversations about general topics and current events. Both the avatar, and the multiple interaction modes could help improve adherence. We present a high level overview of the design of our agent, Marie Bot Wellbeing. We also report implementation details of our early prototype , and present preliminary results. 2021.nlp4posimpact-1.16 diff --git a/data/xml/2021.nlpmc.xml b/data/xml/2021.nlpmc.xml index f1cc6593a8..3487f5019e 100644 --- a/data/xml/2021.nlpmc.xml +++ b/data/xml/2021.nlpmc.xml @@ -10,7 +10,7 @@ ShaoqingYuan YiZhang ParminderBhatia - ByronWallace + ByronWallace Association for Computational Linguistics
Online
June @@ -37,7 +37,7 @@ Towards Automating Medical Scribing : Clinic Visit <fixed-case>D</fixed-case>ialogue2<fixed-case>N</fixed-case>ote Sentence Alignment and Snippet Summarization Wen-waiYim - MelihaYetisgen + MelihaYetisgen 10–20 Medical conversations from patient visits are routinely summarized into clinical notes for documentation of clinical care. The automatic creation of clinical note is particularly challenging given that it requires summarization over spoken language and multiple speaker turns; as well, clinical notes include highly technical semi-structured text. In this paper, we describe our corpus creation method and baseline systems for two NLP tasks, clinical dialogue2note sentence alignment and clinical dialogue2note snippet summarization. These two systems, as well as other models created from such a corpus, may be incorporated as parts of an overall end-to-end clinical note generation system. 2021.nlpmc-1.2 @@ -116,7 +116,7 @@ FranckDernoncourt WalterChang EmiliaFarcas - NdapaNakashole + NdapaNakashole 58–65 Understanding the intent of medical questions asked by patients, or Consumer Health Questions, is an essential skill for medical Conversational AI systems. We propose a novel data-augmented and simple joint learning approach combining question summarization and Recognizing Question Entailment (RQE) in the medical domain. Our data augmentation approach enables to use just one dataset for joint learning. We show improvements on both tasks across four biomedical datasets in accuracy (+8%), ROUGE-1 (+2.5%) and human evaluation scores. Human evaluation shows joint learning generates faithful and informative summaries. Finally, we release our code, the two question summarization datasets extracted from a large-scale medical dialogue dataset, as well as our augmented datasets. 2021.nlpmc-1.8 diff --git a/data/xml/2021.nodalida.xml b/data/xml/2021.nodalida.xml index 00954e1038..a41ae68ba8 100644 --- a/data/xml/2021.nodalida.xml +++ b/data/xml/2021.nodalida.xml @@ -4,7 +4,7 @@ Proceedings of the 23rd Nordic Conference on Computational Linguistics (NoDaLiDa) SimonDobnik - LiljaØvrelid + LiljaØvrelid Linköping University Electronic Press, Sweden
Reykjavik, Iceland (Online)
May 31--2 June @@ -222,7 +222,7 @@ De-identification of Privacy-related Entities in Job Postings Kristian NørgaardJensen MikeZhang - BarbaraPlank + BarbaraPlank 210–221 De-identification is the task of detecting privacy-related entities in text, such as person names, emails and contact data. It has been well-studied within the medical domain. The need for de-identification technology is increasing, as privacy-preserving data handling is in high demand in many domains. In this paper, we focus on job postings. We present JobStack, a new corpus for de-identification of personal data in job vacancies on Stackoverflow. We introduce baselines, comparing Long-Short Term Memory (LSTM) and Transformer models. To improve these baselines, we experiment with BERT representations, and distantly related auxiliary data via multi-task learning. Our results show that auxiliary data helps to improve de-identification performance. While BERT representations improve performance, surprisingly “vanilla” BERT turned out to be more effective than BERT trained on Stackoverflow-related data. 2021.nodalida-main.21 @@ -269,7 +269,7 @@ Synonym Replacement based on a Study of Basic-level Nouns in <fixed-case>S</fixed-case>wedish Texts of Different Complexity EvelinaRennes - ArneJönsson + ArneJönsson 259–267 Basic-level terms have been described as the most important to human categorisation. They are the earliest emerging words in children’s language acquisition, and seem to be more frequently occurring in language in general. In this article, we explored the use of basic-level nouns in texts of different complexity, and hypothesise that hypernyms with characteristics of basic-level words could be useful for the task of lexical simplification. We conducted two corpus studies using four different corpora, two corpora of standard Swedish and two corpora of simple Swedish, and explored whether corpora of simple texts contain a higher proportion of basic-level nouns than corpora of standard Swedish. Based on insights from the corpus studies, we developed a novel algorithm for choosing the best synonym by rewarding high relative frequencies and monolexemity, and restricting the climb in the word hierarchy not to suggest synonyms of a too high level of inclusiveness. 2021.nodalida-main.26 @@ -289,7 +289,7 @@ AarneTalman MariannaApidianaki StergiosChatzikyriakidis - JörgTiedemann + JörgTiedemann 276–287 Pre-trained neural language models give high performance on natural language inference (NLI) tasks. But whether they actually understand the meaning of the processed sequences is still unclear. We propose a new diagnostics test suite which allows to assess whether a dataset constitutes a good testbed for evaluating the models’ meaning understanding capabilities. We specifically apply controlled corruption transformations to widely used benchmarks (MNLI and ANLI), which involve removing entire word classes and often lead to non-sensical sentence pairs. If model accuracy on the corrupted data remains high, then the dataset is likely to contain statistical biases and artefacts that guide prediction. Inversely, a large decrease in model accuracy indicates that the original dataset provides a proper challenge to the models’ reasoning capabilities. Hence, our proposed controls can serve as a crash test for developing high quality data for NLI tasks. 2021.nodalida-main.28 @@ -345,7 +345,7 @@ Towards cross-lingual application of language-specific <fixed-case>P</fixed-case>o<fixed-case>S</fixed-case> tagging schemes HinrikHafsteinsson - Anton KarlIngason + Anton KarlIngason 321–325 We describe the process of conversion between the PoS tagging schemes of two languages, the Icelandic MIM-GOLD tagging scheme and the Faroese Sosialurin tagging scheme. These tagging schemes are functionally similar but use separate ways to encode fine-grained morphological information on tokenised text. As Faroese and Icelandic are lexically and grammatically similar, having a systematic method to convert between these two tagging schemes would be beneficial in the field of language technology, specifically in research on transfer learning between the two languages. As a product of our work, we present a provisional version of Icelandic corpora, prepared in the Faroese PoS tagging scheme, ready for use in cross-lingual NLP applications. 2021.nodalida-main.33 @@ -387,7 +387,7 @@ MikkoAulamo SamiVirpioja YvesScherrer - JörgTiedemann + JörgTiedemann 351–356 We consider a low-resource translation task from Finnish into Northern Sámi. Collecting all available parallel data between the languages, we obtain around 30,000 sentence pairs. However, there exists a significantly larger monolingual Northern Sámi corpus, as well as a rule-based machine translation (RBMT) system between the languages. To make the best use of the monolingual data in a neural machine translation (NMT) system, we use the backtranslation approach to create synthetic parallel data from it using both NMT and RBMT systems. Evaluating the results on an in-domain test set and a small out-of-domain set, we find that the RBMT backtranslation outperforms NMT backtranslation clearly for the out-of-domain test set, but also slightly for the in-domain data, for which the NMT backtranslation model provided clearly better BLEU scores than the RBMT. In addition, combining both backtranslated data sets improves the RBMT approach only for the in-domain test set. This suggests that the RBMT system provides general-domain knowledge that cannot be found from the relative small parallel training data. 2021.nodalida-main.37 @@ -449,7 +449,7 @@ Error Analysis of using <fixed-case>BART</fixed-case> for Multi-Document Summarization: A Study for <fixed-case>E</fixed-case>nglish and <fixed-case>G</fixed-case>erman Language TimoJohner AbhikJana - ChrisBiemann + ChrisBiemann 391–397 Recent research using pre-trained language models for multi-document summarization task lacks deep investigation of potential erroneous cases and their possible application on other languages. In this work, we apply a pre-trained language model (BART) for multi-document summarization (MDS) task using both fine-tuning and without fine-tuning. We use two English datasets and one German dataset for this study. First, we reproduce the multi-document summaries for English language by following one of the recent studies. Next, we show the applicability of the model to German language by achieving state-of-the-art performance on German MDS. We perform an in-depth error analysis of the followed approach for both languages, which leads us to identifying most notable errors, from made-up facts and topic delimitation, and quantifying the amount of extractiveness. 2021.nodalida-main.43 @@ -477,13 +477,13 @@ The <fixed-case>D</fixed-case>anish <fixed-case>G</fixed-case>igaword Corpus - LeonStrømberg-Derczynski - ManuelCiosici + LeonStrømberg-Derczynski + ManuelCiosici RebekahBaglini Morten H.Christiansen Jacob AarupDalsgaard RiccardoFusaroli - Peter JuelHenrichsen + Peter JuelHenrichsen RasmusHvingelby AndreasKirkedal Alex SpeedKjeldsen @@ -503,7 +503,7 @@ <fixed-case>D</fixed-case>an<fixed-case>FEVER</fixed-case>: claim verification dataset for <fixed-case>D</fixed-case>anish JeppeNørregaard - LeonDerczynski + LeonDerczynski 422–428 We present a dataset, DanFEVER, intended for multilingual misinformation research. The dataset is in Danish and has the same format as the well-known English FEVER dataset. It can be used for testing methods in multilingual settings, as well as for creating models in production for the Danish language. 2021.nodalida-main.47 @@ -525,7 +525,7 @@ Getting Hold of Villains and other Rogues ManfredKlenner - AnneGöhring + AnneGöhring SophiaConrad 435–439 In this paper, we introduce the first corpus specifying negative entities within sentences. We discuss indicators for their presence, namely particular verbs, but also the linguistic conditions when their prediction should be suppressed. We further show that a fine-tuned Bert-based baseline model outperforms an over-generating rule-based approach which is not aware of these further restrictions. If a perfect filter were applied, both would be on par. diff --git a/data/xml/2021.nuse.xml b/data/xml/2021.nuse.xml index 2afb09c47b..a9c0fde730 100644 --- a/data/xml/2021.nuse.xml +++ b/data/xml/2021.nuse.xml @@ -41,7 +41,7 @@ AakankshaNaik HuimingJin HariharanMuralidharan - CarolynRosé + CarolynRosé 13–23 Fanfiction presents an opportunity as a data source for research in NLP, education, and social science. However, answering specific research questions with this data is difficult, since fanfiction contains more diverse writing styles than formal fiction. We present a text processing pipeline for fanfiction, with a focus on identifying text associated with characters. The pipeline includes modules for character identification and coreference, as well as the attribution of quotes and narration to those characters. Additionally, the pipeline contains a novel approach to character coreference that uses knowledge from quote attribution to resolve pronouns within quotes. For each module, we evaluate the effectiveness of various approaches on 10 annotated fanfiction stories. This pipeline outperforms tools developed for formal fiction on the tasks of character coreference and quote attribution 2021.nuse-1.2 @@ -73,7 +73,7 @@ Gender and Representation Bias in <fixed-case>GPT</fixed-case>-3 Generated Stories - LiLucy + LiLucy DavidBamman 48–55 Using topic modeling and lexicon-based word similarity, we find that stories generated by GPT-3 exhibit many known gender stereotypes. Generated stories depict different topics and descriptions depending on GPT-3’s perceived gender of the character in a prompt, with feminine characters more likely to be associated with family and appearance, and described as less powerful than masculine characters, even when associated with high power verbs in a prompt. Our study raises questions on how one can avoid unintended social biases when using large language models for storytelling. diff --git a/data/xml/2021.paclic.xml b/data/xml/2021.paclic.xml index 7147da018a..130fc010dc 100644 --- a/data/xml/2021.paclic.xml +++ b/data/xml/2021.paclic.xml @@ -5,7 +5,7 @@ Proceedings of the 35th Pacific Asia Conference on Language, Information and Computation KaibaoHu Jong-BokKim - ChengqingZong + ChengqingZong EmmanueleChersoni Association for Computational Lingustics
Shanghai, China
@@ -48,7 +48,7 @@
Exploring sentiment constructions: connecting deep learning models with linguistic construction - Shu-KaiHsieh + Shu-KaiHsieh Yu-HsiangTseng 32–39 2021.paclic-1.4 @@ -67,7 +67,7 @@ An Empirical Performance Analysis of State-of-the-Art Summarization Models for Automatic Minuting MuskaanSingh TirthankarGhosal - OndrejBojar + OndrejBojar 50–60 2021.paclic-1.6 muskaan-singh-bojar-2021-empirical @@ -116,7 +116,7 @@ Po-Ya AngelaWang Han-TangHung Ka-SîngKhóo - Shu-KaiHsieh + Shu-KaiHsieh 108–118 2021.paclic-1.12 chang-etal-2021-examine @@ -133,7 +133,7 @@ Aspect or Manner? A Study of Reduplicated Adverbials in <fixed-case>M</fixed-case>andarin <fixed-case>C</fixed-case>hinese - Siaw-FongChung + Siaw-FongChung Chu-RenHuang 130–139 2021.paclic-1.14 @@ -176,7 +176,7 @@ Applying Masked Language Models to Search for Suitable Verbs Used in Academic Writing - Chooi LingGoh + Chooi LingGoh 180–188 2021.paclic-1.19 goh-2021-applying @@ -192,7 +192,7 @@ A Comparative Study of Collocation Extraction Methods from the Perspectives of Vocabulary and Grammar: A Case Study in the Field of Journalism LuluGu YuePan - PengyuanLiu + PengyuanLiu 201–210 2021.paclic-1.21 gu-etal-2021-comparative @@ -246,7 +246,7 @@ Various Errors Improve Neural Grammatical Error Correction ShotaKoyama HiroyaTakamura - NaoakiOkazaki + NaoakiOkazaki 251–261 2021.paclic-1.27 koyama-etal-2021-various @@ -391,7 +391,7 @@ Natural Language Inference using Neural Network and Tableau Method AyahitoSaji DaikiTakao - YoshihideKato + YoshihideKato ShigekiMatsubara 402–410 2021.paclic-1.43 @@ -543,7 +543,7 @@ Incorporating Semantic Textual Similarity and Lexical Matching for Information Retrieval HirokiIida - NaoakiOkazaki + NaoakiOkazaki 582–591 2021.paclic-1.61 iida-okazaki-2021-incorporating @@ -561,7 +561,7 @@ A Sentiment Analysis of Men’s and Women’s Speech in the <fixed-case>BNC</fixed-case>64 - Yong-HunLee + Yong-HunLee Ji-HyeKim 603–610 2021.paclic-1.63 @@ -574,7 +574,7 @@ ChaehunPark HoyunSong EugeneJang - Jong C.Park + Jong C.Park 611–621 2021.paclic-1.64 lee-etal-2021-optimizing @@ -673,7 +673,7 @@ Science Mapping of Publications in Natural Language Processing in the <fixed-case>P</fixed-case>hilippines: 2006 to 2020 - Rachel Edita O.Roxas + Rachel Edita O.Roxas Joseph MarvinImperial Angelica H.De La Cruz 721–730 diff --git a/data/xml/2021.pail.xml b/data/xml/2021.pail.xml index 878c63ff45..a510ed7a8b 100644 --- a/data/xml/2021.pail.xml +++ b/data/xml/2021.pail.xml @@ -23,7 +23,7 @@ ShyamRatan DeepakAlok RiteshKumar - Atul Kr.Ojha + Atul Kr.Ojha 1–11 In this paper, we discuss the development of treebanks for two low-resourced Indian languages - Magahi and Braj - based on the Universal Dependencies framework. The Magahi treebank contains 945 sentences and Braj treebank around 500 sentences marked with their lemmas, part-of-speech, morphological features and universal dependencies. This paper gives a description of the different dependency relationship found in the two languages and give some statistics of the two treebanks. The dataset will be made publicly available on Universal Dependency (UD) repository in the next (v2.10) release. 2021.pail-1.1 @@ -41,8 +41,8 @@ Dependency Parsing in a Morphological rich language, <fixed-case>T</fixed-case>amil - VijaySundar Ram - SobhaLalitha Devi + VijaySundar Ram + SobhaLalitha Devi 20–26 Dependency parsing is the process of analysing the grammatical structure of a sentence based on the dependencies between the words in a sentence. The annotation of dependency parsing is done using different formalisms at word-level namely Universal Dependencies and chunk-level namely AnnaCorra. Though dependency parsing is deeply dealt in languages such as English, Czech etc the same cannot be adopted for the morphologically rich and agglutinative languages. In this paper, we discuss the development of a dependency parser for Tamil, a South Dravidian language. The different characteristics of the language make this task a challenging task. Tamil, a morphologically rich and agglutinative language, has copula drop, accusative and genitive case drop and pro-drop. Coordinative constructions are introduced by affixation of morpheme ‘um’. Embedded clausal structures are common in relative participle and complementizer clauses. In this paper, we have discussed our approach to handle some of these challenges. We have used Malt parser, a supervised learning- approach based implementation. We have obtained an accuracy of 79.27% for Unlabelled Attachment Score, 73.64% for Labelled Attachment Score and 68.82% for Labelled Accuracy. 2021.pail-1.3 diff --git a/data/xml/2021.privatenlp.xml b/data/xml/2021.privatenlp.xml index 2a074c6b29..ea3e07f38e 100644 --- a/data/xml/2021.privatenlp.xml +++ b/data/xml/2021.privatenlp.xml @@ -5,7 +5,7 @@ Proceedings of the Third Workshop on Privacy in Natural Language Processing OluwaseyiFeyisetan SepidehGhanavati - ShervinMalmasi + ShervinMalmasi PatriciaThaine Association for Computational Linguistics
Online
@@ -65,7 +65,7 @@ An Investigation towards Differentially Private Sequence Tagging in a Federated Framework AbhikJana - ChrisBiemann + ChrisBiemann 30–35 To build machine learning-based applications for sensitive domains like medical, legal, etc. where the digitized text contains private information, anonymization of text is required for preserving privacy. Sequence tagging, e.g. as done in Named Entity Recognition (NER) can help to detect private information. However, to train sequence tagging models, a sufficient amount of labeled data are required but for privacy-sensitive domains, such labeled data also can not be shared directly. In this paper, we investigate the applicability of a privacy-preserving framework for sequence tagging tasks, specifically NER. Hence, we analyze a framework for the NER task, which incorporates two levels of privacy protection. Firstly, we deploy a federated learning (FL) framework where the labeled data are not shared with the centralized server as well as the peer clients. Secondly, we apply differential privacy (DP) while the models are being trained in each client instance. While both privacy measures are suitable for privacy-aware models, their combination results in unstable models. To our knowledge, this is the first study of its kind on privacy-aware sequence tagging models. 2021.privatenlp-1.4 diff --git a/data/xml/2021.ranlp.xml b/data/xml/2021.ranlp.xml index c3eb8e9ac8..9a7179bf21 100644 --- a/data/xml/2021.ranlp.xml +++ b/data/xml/2021.ranlp.xml @@ -3,7 +3,7 @@ Proceedings of the International Conference on Recent Advances in Natural Language Processing (RANLP 2021) - RuslanMitkov + RuslanMitkov GaliaAngelova INCOMA Ltd.
Held Online
@@ -29,7 +29,7 @@ Ontology Population Reusing Resources for Dialogue Intent Detection: Generic and Multilingual Approach CristinaAceta IzaskunFernández - AitorSoroa + AitorSoroa 10–18 This work presents a generic semi-automatic strategy to populate the domain ontology of an ontology-driven task-oriented dialogue system, with the aim of performing successful intent detection in the dialogue process, reusing already existing multilingual resources. This semi-automatic approach allows ontology engineers to exploit available resources so as to associate the potential situations in the use case to FrameNet frames and obtain the relevant lexical units associated to them in the target language, following lexical and semantic criteria, without linguistic expert knowledge. This strategy has been validated and evaluated in two use cases, from industrial scenarios, for interaction in Spanish with a guide robot and with a Computerized Maintenance Management System (CMMS). In both cases, this method has allowed the ontology engineer to instantiate the domain ontology with the intent-relevant information with quality data in a simple and low-resource-consuming manner. 2021.ranlp-1.2 @@ -49,7 +49,7 @@ Domain Adaptation for <fixed-case>H</fixed-case>indi-<fixed-case>T</fixed-case>elugu Machine Translation Using Domain Specific Back Translation HemaAla VandanMujadia - DiptiSharma + DiptiSharma 26–34 In this paper, we present a novel approachfor domain adaptation in Neural MachineTranslation which aims to improve thetranslation quality over a new domain. Adapting new domains is a highly challeng-ing task for Neural Machine Translation onlimited data, it becomes even more diffi-cult for technical domains such as Chem-istry and Artificial Intelligence due to spe-cific terminology, etc. We propose DomainSpecific Back Translation method whichuses available monolingual data and gen-erates synthetic data in a different way. This approach uses Out Of Domain words. The approach is very generic and can beapplied to any language pair for any domain. We conduct our experiments onChemistry and Artificial Intelligence do-mains for Hindi and Telugu in both direc-tions. It has been observed that the usageof synthetic data created by the proposedalgorithm improves the BLEU scores significantly. 2021.ranlp-1.4 @@ -86,7 +86,7 @@ Comparing Supervised Machine Learning Techniques for Genre Analysis in Software Engineering Research Articles FelipeAraújo de Britto - ThiagoCastro Ferreira + ThiagoCastro Ferreira Leonardo PereiraNunes FernandoSilva Parreiras 63–72 @@ -97,7 +97,7 @@ Enriching the Transformer with Linguistic Factors for Low-Resource Machine Translation JordiArmengol-Estapé - Marta R.Costa-jussà + Marta R.Costa-jussà CarlosEscolano 73–78 Introducing factors, that is to say, word features such as linguistic information referring to the source tokens, is known to improve the results of neural machine translation systems in certain settings, typically in recurrent architectures. This study proposes enhancing the current state-of-the-art neural machine translation architecture, the Transformer, so that it allows to introduce external knowledge. In particular, our proposed modification, the Factored Transformer, uses linguistic factors that insert additional knowledge into the machine translation system. Apart from using different kinds of features, we study the effect of different architectural configurations. Specifically, we analyze the performance of combining words and features at the embedding level or at the encoder level, and we experiment with two different combination strategies. With the best-found configuration, we show improvements of 0.8 BLEU over the baseline Transformer in the IWSLT German-to-English task. Moreover, we experiment with the more challenging FLoRes English-to-Nepali benchmark, which includes both extremely low-resourced and very distant languages, and obtain an improvement of 1.2 BLEU @@ -212,7 +212,7 @@ SonjaRemmer AliciaPérez HerculesDalianis - ArantzaCasillas + ArantzaCasillas 165–172 We introduce a multi-label text classifier with per-label attention for the classification of Electronic Health Records according to the International Classification of Diseases. We apply the model on two Electronic Health Records datasets with Discharge Summaries in two languages with fewer resources than English, Spanish and Swedish. Our model leverages the BERT Multilingual model (specifically the Wikipedia, as the model have been trained with 104 languages, including Spanish and Swedish, with the largest Wikipedia dumps) to share the language modelling capabilities across the languages. With the per-label attention, the model can compute the relevance of each word from the EHR towards the prediction of each label. For the experimental framework, we apply 157 labels from Chapter XI – Diseases of the Digestive System of the ICD, which makes the attention especially important as the model has to discriminate between similar diseases. 1 https://github.com/google-research/bert/blob/master/multilingual.md#list-of-languages 2021.ranlp-1.20 @@ -234,7 +234,7 @@ IvanKoychev MariaCastaldo TommasoVenturini - PreslavNakov + PreslavNakov 182–189 We propose a novel framework for predicting the factuality of reporting of news media outlets by studying the user attention cycles in their YouTube channels. In particular, we design a rich set of features derived from the temporal evolution of the number of views, likes, dislikes, and comments for a video, which we then aggregate to the channel level. We develop and release a dataset for the task, containing observations of user attention on YouTube channels for 489 news media. Our experiments demonstrate both complementarity and sizable improvements over state-of-the-art textual representations. 2021.ranlp-1.22 @@ -253,7 +253,7 @@ A Psychologically Informed Part-of-Speech Analysis of Depression in Social Media Ana-MariaBucur Ioana R.Podina - Liviu P.Dinu + Liviu P.Dinu 199–207 In this work, we provide an extensive part-of-speech analysis of the discourse of social media users with depression. Research in psychology revealed that depressed users tend to be self-focused, more preoccupied with themselves and ruminate more about their lives and emotions. Our work aims to make use of large-scale datasets and computational methods for a quantitative exploration of discourse. We use the publicly available depression dataset from the Early Risk Prediction on the Internet Workshop (eRisk) 2018 and extract part-of-speech features and several indices based on them. Our results reveal statistically significant differences between the depressed and non-depressed individuals confirming findings from the existing psychology literature. Our work provides insights regarding the way in which depressed individuals are expressing themselves on social media platforms, allowing for better-informed computational models to help monitor and prevent mental illnesses. 2021.ranlp-1.24 @@ -275,10 +275,10 @@ HianCañizares-Díaz AlejandroPiad-Morffis SuilanEstevez-Velarde - YoanGutiérrez + YoanGutiérrez YudiviánAlmeida Cruz - AndresMontoyo - RafaelMuñoz-Guillena + AndresMontoyo + RafaelMuñoz-Guillena 216–225 This paper presents an active learning approach that aims to reduce the human effort required during the annotation of natural language corpora composed of entities and semantic relations. Our approach assists human annotators by intelligently selecting the most informative sentences to annotate and then pre-annotating them with a few highly accurate entities and semantic relations. We define an uncertainty-based query strategy with a weighted density factor, using similarity metrics based on sentence embeddings. As a case study, we evaluate our approach via simulation in a biomedical corpus and estimate the potential reduction in total annotation time. Experimental results suggest that the query strategy reduces by between 35% and 40% the number of sentences that must be manually annotated to develop systems able to reach a target F1 score, while the pre-annotation strategy produces an additional 24% reduction in the total annotation time. Overall, our preliminary experiments suggest that as much as 60% of the annotation time could be saved while producing corpora that have the same usefulness for training machine learning algorithms. An open-source computational tool that implements the aforementioned strategies is presented and published online for the research community. 2021.ranlp-1.26 @@ -296,7 +296,7 @@ Evaluating Recognizing Question Entailment Methods for a <fixed-case>P</fixed-case>ortuguese Community Question-Answering System about Diabetes Mellitus - ThiagoCastro Ferreira + ThiagoCastro Ferreira JoãoVictor de Pinho Costa IsabelaRigotto VitoriaPortella @@ -323,8 +323,8 @@ On the Usability of Transformers-based Models for a <fixed-case>F</fixed-case>rench Question-Answering Task OralieCattan - ChristopheServan - SophieRosset + ChristopheServan + SophieRosset 244–255 For many tasks, state-of-the-art results have been achieved with Transformer-based architectures, resulting in a paradigmatic shift in practices from the use of task-specific architectures to the fine-tuning of pre-trained language models. The ongoing trend consists in training models with an ever-increasing amount of data and parameters, which requires considerable resources. It leads to a strong search to improve resource efficiency based on algorithmic and hardware improvements evaluated only for English. This raises questions about their usability when applied to small-scale learning problems, for which a limited amount of training data is available, especially for under-resourced languages tasks. The lack of appropriately sized corpora is a hindrance to applying data-driven and transfer learning-based approaches with strong instability cases. In this paper, we establish a state-of-the-art of the efforts dedicated to the usability of Transformer-based models and propose to evaluate these improvements on the question-answering performances of French language which have few resources. We address the instability relating to data scarcity by investigating various training strategies with data augmentation, hyperparameters optimization and cross-lingual transfer. We also introduce a new compact model for French FrALBERT which proves to be competitive in low-resource settings. 2021.ranlp-1.29 @@ -343,7 +343,7 @@ Character-based <fixed-case>T</fixed-case>hai Word Segmentation with Multiple Attentions ThodsapornChay-intr HidetakaKamigaito - ManabuOkumura + ManabuOkumura 264–273 Character-based word-segmentation models have been extensively applied to agglutinative languages, including Thai, due to their high performance. These models estimate word boundaries from a character sequence. However, a character unit in sequences has no essential meaning, compared with word, subword, and character cluster units. We propose a Thai word-segmentation model that uses various types of information, including words, subwords, and character clusters, from a character sequence. Our model applies multiple attentions to refine segmentation inferences by estimating the significant relationships among characters and various unit types. The experimental results indicate that our model can outperform other state-of-the-art Thai word-segmentation models. 2021.ranlp-1.31 @@ -372,7 +372,7 @@ <fixed-case>RED</fixed-case>: A Novel Dataset for <fixed-case>R</fixed-case>omanian Emotion Detection from Tweets AlexandraCiobotaru - Liviu P.Dinu + Liviu P.Dinu 291–300 In Romanian language there are some resources for automatic text comprehension, but for Emotion Detection, not lexicon-based, there are none. To cover this gap, we extracted data from Twitter and created the first dataset containing tweets annotated with five types of emotions: joy, fear, sadness, anger and neutral, with the intent of being used for opinion mining and analysis tasks. In this article we present some features of our novel dataset, and create a benchmark to achieve the first supervised machine learning model for automatic Emotion Detection in Romanian short texts. We investigate the performance of four classical machine learning models: Multinomial Naive Bayes, Logistic Regression, Support Vector Classification and Linear Support Vector Classification. We also investigate more modern approaches like fastText, which makes use of subword information. Lastly, we fine-tune the Romanian BERT for text classification and our experiments show that the BERT-based model has the best performance for the task of Emotion Detection from Romanian tweets. Keywords: Emotion Detection, Twitter, Romanian, Supervised Machine Learning 2021.ranlp-1.34 @@ -401,7 +401,7 @@ Towards an Etymological Map of <fixed-case>R</fixed-case>omanian Alina MariaCristea AncaDinu - Liviu P.Dinu + Liviu P.Dinu SimonaGeorgescu Ana SabinaUban LaurentiuZoicas @@ -414,7 +414,7 @@ A Syntax-Aware Edit-based System for Text Simplification Oscar M.Cumbicus-Pineda ItziarGonzalez-Dios - AitorSoroa + AitorSoroa 324–334 Edit-based text simplification systems have attained much attention in recent years due to their ability to produce simplification solutions that are interpretable, as well as requiring less training examples compared to traditional seq2seq systems. Edit-based systems learn edit operations at a word level, but it is well known that many of the operations performed when simplifying text are of a syntactic nature. In this paper we propose to add syntactic information into a well known edit-based system. We extend the system with a graph convolutional network module that mimics the dependency structure of the sentence, thus giving the model an explicit representation of syntax. We perform a series of experiments in English, Spanish and Italian, and report improvements of the state of the art in four out of five datasets. Further analysis shows that syntactic information is always beneficial, and suggest that syntax is more helpful in complex sentences. 2021.ranlp-1.38 @@ -423,8 +423,8 @@ On Generating Fact-Infused Question Variations ArthurDeschamps - Sujatha DasGollapalli - See-KiongNg + Sujatha DasGollapalli + See-KiongNg 335–345 To fully model human-like ability to ask questions, automatic question generation (QG) models must be able to produce multiple expressions of the same question with different levels of detail. Unfortunately, existing datasets available for learning QG do not include paraphrases or question variations affecting a model’s ability to learn this capability. We present FIRS, a dataset containing human-generated fact-infused rewrites of questions from the widely-used SQuAD dataset to address this limitation. Questions in FIRS were obtained by combining a given question with facts of entities referenced in the question. We study a double encoder-decoder model, Fact-Infused Question Generator (FIQG), for learning to generate fact-infused questions from a given question. Experimental results show that FIQG effectively incorporates information from facts to add more detail to a given question. To the best of our knowledge, ours is the first study to present fact-infusion as a novel form of question paraphrasing. 2021.ranlp-1.39 @@ -433,8 +433,8 @@ Event Prominence Extraction Combining a Knowledge-Based Syntactic Parser and a <fixed-case>BERT</fixed-case> Classifier for <fixed-case>D</fixed-case>utch ThierryDesot - OrpheeDe Clercq - VeroniqueHoste + OrpheeDe Clercq + VeroniqueHoste 346–357 A core task in information extraction is event detection that identifies event triggers in sentences that are typically classified into event types. In this study an event is considered as the unit to measure diversity and similarity in news articles in the framework of a news recommendation system. Current typology-based event detection approaches fail to handle the variety of events expressed in real-world situations. To overcome this, we aim to perform event salience classification and explore whether a transformer model is capable of classifying new information into less and more general prominence classes. After comparing a Support Vector Machine (SVM) baseline and our transformer-based classifier performances on several event span formats, we conceived multi-word event spans as syntactic clauses. Those are fed into our prominence classifier which is fine-tuned on pre-trained Dutch BERT word embeddings. On top of that we outperform a pipeline of a Conditional Random Field (CRF) approach to event-trigger word detection and the BERT-based classifier. To the best of our knowledge we present the first event extraction approach that combines an expert-based syntactic parser with a transformer-based classifier for Dutch. 2021.ranlp-1.40 @@ -460,9 +460,9 @@ Tracing Source Language Interference in Translation with Graph-Isomorphism Measures - KoelDutta Chowdhury + KoelDutta Chowdhury CristinaEspaña-Bonet - Josefvan Genabith + Josefvan Genabith 375–385 Previous research has used linguistic features to show that translations exhibit traces of source language interference and that phylogenetic trees between languages can be reconstructed from the results of translations into the same language. Recent research has shown that instances of translationese (source language interference) can even be detected in embedding spaces, comparing embeddings spaces of original language data with embedding spaces resulting from translations into the same language, using a simple Eigenvector-based divergence from isomorphism measure. To date, it remains an open question whether alternative graph-isomorphism measures can produce better results. In this paper, we (i) explore Gromov-Hausdorff distance, (ii) present a novel spectral version of the Eigenvector-based method, and (iii) evaluate all approaches against a broad linguistic typological database (URIEL). We show that language distances resulting from our spectral isomorphism approaches can reproduce genetic trees on a par with previous work without requiring any explicit linguistic information and that the results can be extended to non-Indo-European languages. Finally, we show that the methods are robust under a variety of modeling conditions. 2021.ranlp-1.43 @@ -491,8 +491,8 @@ SuilanEstevez-Velarde AlejandroPiad-Morffis YoanGutierrez - AndresMontoyo - RafaelMuñoz + AndresMontoyo + RafaelMuñoz YudiviánAlmeida Cruz 402–410 This paper presents the preliminary results of an ongoing project that analyzes the growing body of scientific research published around the COVID-19 pandemic. In this research, a general-purpose semantic model is used to double annotate a batch of 500 sentences that were manually selected from the CORD-19 corpus. Afterwards, a baseline text-mining pipeline is designed and evaluated via a large batch of 100,959 sentences. We present a qualitative analysis of the most interesting facts automatically extracted and highlight possible future lines of development. The preliminary results show that general-purpose semantic models are a useful tool for discovering fine-grained knowledge in large corpora of scientific documents. @@ -516,7 +516,7 @@ ChenlongHu HidetakaKamigaito HiroyaTakamura - ManabuOkumura + ManabuOkumura 421–427 Character-aware neural language models can capture the relationship between words by exploiting character-level information and are particularly effective for languages with rich morphology. However, these models are usually biased towards information from surface forms. To alleviate this problem, we propose a simple and effective method to improve a character-aware neural language model by forcing a character encoder to produce word-based embeddings under Skip-gram architecture in a warm-up step without extra training data. We empirically show that the resulting character-aware neural language model achieves obvious improvements of perplexity scores on typologically diverse languages, that contain many low-frequency or unseen words. 2021.ranlp-1.48 @@ -558,7 +558,7 @@ A Dynamic Head Importance Computation Mechanism for Neural Machine Translation AkshayGoindani - ManishShrivastava + ManishShrivastava 454–462 Multiple parallel attention mechanisms that use multiple attention heads facilitate greater performance of the Transformer model for various applications e.g., Neural Machine Translation (NMT), text classification. In multi-head attention mechanism, different heads attend to different parts of the input. However, the limitation is that multiple heads might attend to the same part of the input, resulting in multiple heads being redundant. Thus, the model resources are under-utilized. One approach to avoid this is to prune least important heads based on certain importance score. In this work, we focus on designing a Dynamic Head Importance Computation Mechanism (DHICM) to dynamically calculate the importance of a head with respect to the input. Our insight is to design an additional attention layer together with multi-head attention, and utilize the outputs of the multi-head attention along with the input, to compute the importance for each head. Additionally, we add an extra loss function to prevent the model from assigning same score to all heads, to identify more important heads and improvise performance. We analyzed performance of DHICM for NMT with different languages. Experiments on different datasets show that DHICM outperforms traditional Transformer-based approach by large margin, especially, when less training data is available. 2021.ranlp-1.52 @@ -596,7 +596,7 @@ Apples to Apples: A Systematic Evaluation of Topic Models IsmailHarrando PasqualeLisena - RaphaelTroncy + RaphaelTroncy 483–493 From statistical to neural models, a wide variety of topic modelling algorithms have been proposed in the literature. However, because of the diversity of datasets and metrics, there have not been many efforts to systematically compare their performance on the same benchmarks and under the same conditions. In this paper, we present a selection of 9 topic modelling techniques from the state of the art reflecting a diversity of approaches to the task, an overview of the different metrics used to compare their performance, and the challenges of conducting such a comparison. We empirically evaluate the performance of these models on different settings reflecting a variety of real-life conditions in terms of dataset size, number of topics, and distribution of topics, following identical preprocessing and evaluation processes. Using both metrics that rely on the intrinsic characteristics of the dataset (different coherence metrics), as well as external knowledge (word embeddings and ground-truth topic labels), our experiments reveal several shortcomings regarding the common practices in topic models evaluation. 2021.ranlp-1.55 @@ -776,7 +776,7 @@ <fixed-case>BERT</fixed-case>-<fixed-case>P</fixed-case>ers<fixed-case>NER</fixed-case>: A New Model for <fixed-case>P</fixed-case>ersian Named Entity Recognition FaraneJalali Farahani - GholamrezaGhassem-Sani + GholamrezaGhassem-Sani 647–654 Named entity recognition (NER) is one of the major tasks in natural language processing. A named entity is often a word or expression that bears a valuable piece of information, which can be effectively employed by some major NLP tasks such as machine translation, question answering, and text summarization. In this paper, we introduce a new model called BERT-PersNER (BERT based Persian Named Entity Recognizer), in which we have applied transfer learning and active learning approaches to NER in Persian, which is regarded as a low-resource language. Like many others, we have used Conditional Random Field for tag decoding in our proposed architecture. BERT-PersNER has outperformed two available studies in Persian NER, in most cases of our experiments using the supervised learning approach on two Persian datasets called Arman and Peyma. Besides, as the very first effort to try active learning in the Persian NER, using only 30% of Arman and 20% of Peyma, we respectively achieved 92.15%, and 92.41% performance of the mentioned supervised learning experiments. 2021.ranlp-1.73 @@ -873,7 +873,7 @@ Multilingual Multi-Domain <fixed-case>NMT</fixed-case> for <fixed-case>I</fixed-case>ndian Languages SouravKumar SalilAggarwal - DiptiSharma + DiptiSharma 727–733 India is known as the land of many tongues and dialects. Neural machine translation (NMT) is the current state-of-the-art approach for machine translation (MT) but performs better only with large datasets which Indian languages usually lack, making this approach infeasible. So, in this paper, we address the problem of data scarcity by efficiently training multilingual and multilingual multi domain NMT systems involving languages of the 𝐈𝐧𝐝𝐢𝐚𝐧 𝐬𝐮𝐛𝐜𝐨𝐧𝐭𝐢𝐧𝐞𝐧𝐭. We are proposing the technique for using the joint domain and language tags in a multilingual setup. We draw three major conclusions from our experiments: (i) Training a multilingual system via exploiting lexical similarity based on language family helps in achieving an overall average improvement of 𝟑.𝟐𝟓 𝐁𝐋𝐄𝐔 𝐩𝐨𝐢𝐧𝐭𝐬 over bilingual baselines, (ii) Technique of incorporating domain information into the language tokens helps multilingual multi-domain system in getting a significant average improvement of 𝟔 𝐁𝐋𝐄𝐔 𝐩𝐨𝐢𝐧𝐭𝐬 over the baselines, (iii) Multistage fine-tuning further helps in getting an improvement of 𝟏-𝟏.𝟓 𝐁𝐋𝐄𝐔 𝐩𝐨𝐢𝐧𝐭𝐬 for the language pair of interest. 2021.ranlp-1.83 @@ -931,7 +931,7 @@ NaokiKobayashi HidetakaKamigaito HiroyaTakamura - ManabuOkumura + ManabuOkumura 770–779 In the social media, users frequently use small images called emojis in their posts. Although using emojis in texts plays a key role in recent communication systems, less attention has been paid on their positions in the given texts, despite that users carefully choose and put an emoji that matches their post. Exploring positions of emojis in texts will enhance understanding of the relationship between emojis and texts. We extend an emoji label prediction task taking into account the information of emoji positions, by jointly learning the emoji position in a tweet to predict the emoji label. The results demonstrate that the position of emojis in texts is a good clue to boost the performance of emoji label prediction. Human evaluation validates that there exists a suitable emoji position in a tweet, and our proposed task is able to make tweets more fancy and natural. In addition, considering emoji position can further improve the performance for the irony detection task compared to the emoji label prediction. We also report the experimental results for the modified dataset, due to the problem of the original dataset for the first shared task to predict an emoji label in SemEval2018. 2021.ranlp-1.88 @@ -940,7 +940,7 @@ Addressing Slot-Value Changes in Task-oriented Dialogue Systems through Dialogue Domain Adaptation TizianoLabruna - BernardoMagnini + BernardoMagnini 780–789 Recent task-oriented dialogue systems learn a model from annotated dialogues, and such dialogues are in turn collected and annotated so that they are consistent with certain domain knowledge. However, in real scenarios, domain knowledge is subject to frequent changes, and initial training dialogues may soon become obsolete, resulting in a significant decrease in the model performance. In this paper, we investigate the relationship between training dialogues and domain knowledge, and propose Dialogue Domain Adaptation, a methodology aiming at adapting initial training dialogues to changes intervened in the domain knowledge. We focus on slot-value changes (e.g., when new slot values are available to describe domain entities) and define an experimental setting for dialogue domain adaptation. First, we show that current state-of-the-art models for dialogue state tracking are still poorly robust to slot-value changes of the domain knowledge. Then, we compare different domain adaptation strategies, showing that simple techniques are effective to reduce the gap between training dialogues and domain knowledge. 2021.ranlp-1.89 @@ -958,7 +958,7 @@ Text Retrieval for Language Learners: Graded Vocabulary vs. Open Learner Model - JohnLee + JohnLee Chak YanYeung 798–804 A text retrieval system for language learning returns reading materials at the appropriate difficulty level for the user. The system typically maintains a learner model on the user’s vocabulary knowledge, and identifies texts that best fit the model. As the user’s language proficiency increases, model updates are necessary to retrieve texts with the corresponding lexical complexity. We investigate an open learner model that allows user modification of its content, and evaluate its effectiveness with respect to the amount of user update effort. We compare this model with the graded approach, in which the system returns texts at the optimal grade. When the user makes at least half of the expected updates to the open learner model, simulation results show that it outperforms the graded approach in retrieving texts that fit user preference for new-word density. @@ -1035,9 +1035,9 @@ <fixed-case>G</fixed-case>e<fixed-case>SERA</fixed-case>: General-domain Summary Evaluation by Relevance Analysis JessicaLópez Espejel Gaëlde Chalendar - JorgeGarcia Flores + JorgeGarcia Flores ThierryCharnois - Ivan VladimirMeza Ruiz + Ivan VladimirMeza Ruiz 856–867 We present GeSERA, an open-source improved version of SERA for evaluating automatic extractive and abstractive summaries from the general domain. SERA is based on a search engine that compares candidate and reference summaries (called queries) against an information retrieval document base (called index). SERA was originally designed for the biomedical domain only, where it showed a better correlation with manual methods than the widely used lexical-based ROUGE method. In this paper, we take out SERA from the biomedical domain to the general one by adapting its content-based method to successfully evaluate summaries from the general domain. First, we improve the query reformulation strategy with POS Tags analysis of general-domain corpora. Second, we replace the biomedical index used in SERA with two article collections from AQUAINT-2 and Wikipedia. We conduct experiments with TAC2008, TAC2009, and CNNDM datasets. Results show that, in most cases, GeSERA achieves higher correlations with manual evaluation methods than SERA, while it reduces its gap with ROUGE for general-domain summary evaluation. GeSERA even surpasses ROUGE in two cases of TAC2009. Finally, we conduct extensive experiments and provide a comprehensive study of the impact of human annotators and the index size on summary evaluation with SERA and GeSERA. 2021.ranlp-1.98 @@ -1047,7 +1047,7 @@ On the Interaction between Annotation Quality and Classifier Performance in Abusive Language Detection HollyLopez Long AlexandraO’Neil - SandraKübler + SandraKübler 868–875 Abusive language detection has become an important tool for the cultivation of safe online platforms. We investigate the interaction of annotation quality and classifier performance. We use a new, fine-grained annotation scheme that allows us to distinguish between abusive language and colloquial uses of profanity that are not meant to harm. Our results show a tendency of crowd workers to overuse the abusive class, which creates an unrealistic class balance and affects classification accuracy. We also investigate different methods of distinguishing between explicit and implicit abuse and show lexicon-based approaches either over- or under-estimate the proportion of explicit abuse in data sets. 2021.ranlp-1.99 @@ -1055,7 +1055,7 @@ <fixed-case>NEREL</fixed-case>: A <fixed-case>R</fixed-case>ussian Dataset with Nested Named Entities, Relations and Events - NataliaLoukachevitch + NataliaLoukachevitch EkaterinaArtemova TatianaBatura PavelBraslavski @@ -1073,7 +1073,7 @@ Active Learning for Interactive Relation Extraction in a <fixed-case>F</fixed-case>rench Newspaper’s Articles CyrielleMallart MichelLe Nouy - GuillaumeGravier + GuillaumeGravier PascaleSébillot 886–894 Relation extraction is a subtask of natural langage processing that has seen many improvements in recent years, with the advent of complex pre-trained architectures. Many of these state-of-the-art approaches are tested against benchmarks with labelled sentences containing tagged entities, and require important pre-training and fine-tuning on task-specific data. However, in a real use-case scenario such as in a newspaper company mostly dedicated to local information, relations are of varied, highly specific type, with virtually no annotated data for such relations, and many entities co-occur in a sentence without being related. We question the use of supervised state-of-the-art models in such a context, where resources such as time, computing power and human annotators are limited. To adapt to these constraints, we experiment with an active-learning based relation extraction pipeline, consisting of a binary LSTM-based lightweight model for detecting the relations that do exist, and a state-of-the-art model for relation classification. We compare several choices for classification models in this scenario, from basic word embedding averaging, to graph neural networks and Bert-based ones, as well as several active learning acquisition strategies, in order to find the most cost-efficient yet accurate approach in our French largest daily newspaper company’s use case. @@ -1096,7 +1096,7 @@ MilenaSlavcheva PetyaOsenova IvayloRadev - KirilSimov + KirilSimov 901–909 The paper describes a system for automatic summarization in English language of online news data that come from different non-English languages. The system is designed to be used in production environment for media monitoring. Automatic summarization can be very helpful in this domain when applied as a helper tool for journalists so that they can review just the important information from the news channels. However, like every software solution, the automatic summarization needs performance monitoring and assured safe environment for the clients. In media monitoring environment the most problematic features to be addressed are: the copyright issues, the factual consistency, the style of the text and the ethical norms in journalism. Thus, the main contribution of our present work is that the above mentioned characteristics are successfully monitored in neural automatic summarization models and improved with the help of validation, fact-preserving and fact-checking procedures. 2021.ranlp-1.103 @@ -1155,7 +1155,7 @@ Ranking Online Reviews Based on Their Helpfulness: An Unsupervised Approach AlimuddinMelleng AnnaJurek-Loughrey - DeepakP + DeepakP 959–967 Online reviews are an essential aspect of online shopping for both customers and retailers. However, many reviews found on the Internet lack in quality, informativeness or helpfulness. In many cases, they lead the customers towards positive or negative opinions without providing any concrete details (e.g., very poor product, I would not recommend it). In this work, we propose a novel unsupervised method for quantifying helpfulness leveraging the availability of a corpus of reviews. In particular, our method exploits three characteristics of the reviews, viz., relevance, emotional intensity and specificity, towards quantifying helpfulness. We perform three rankings (one for each feature above), which are then combined to obtain a final helpfulness ranking. For the purpose of empirically evaluating our method, we use review of four product categories from Amazon review. The experimental evaluation demonstrates the effectiveness of our method in comparison to a recent and state-of-the-art baseline. 2021.ranlp-1.109 @@ -1166,7 +1166,7 @@ MariusMosbach IrinaStenger TaniaAvgustinova - BerndMöbius + BerndMöbius DietrichKlakow 968–977 We present an extended version of a tool developed for calculating linguistic distances and asymmetries in auditory perception of closely related languages. Along with evaluating the metrics available in the initial version of the tool, we introduce word adaptation entropy as an additional metric of linguistic asymmetry. Potential predictors of speech intelligibility are validated with human performance in spoken cognate recognition experiments for Bulgarian and Russian. Special attention is paid to the possibly different contributions of vowels and consonants in oral intercomprehension. Using incom.py 2.0 it is possible to calculate, visualize, and validate three measurement methods of linguistic distances and asymmetries as well as carrying out regression analyses in speech intelligibility between related languages. @@ -1195,7 +1195,7 @@ <fixed-case>COVID</fixed-case>-19 in <fixed-case>B</fixed-case>ulgarian Social Media: Factuality, Harmfulness, Propaganda, and Framing - PreslavNakov + PreslavNakov FirojAlam ShadenShaar GiovanniDa San Martino @@ -1207,7 +1207,7 @@ A Second Pandemic? Analysis of Fake News about <fixed-case>COVID</fixed-case>-19 Vaccines in <fixed-case>Q</fixed-case>atar - PreslavNakov + PreslavNakov FirojAlam ShadenShaar GiovanniDa San Martino @@ -1219,7 +1219,7 @@ A Hierarchical Entity Graph Convolutional Network for Relation Extraction across Documents - TapasNayak + TapasNayak Hwee TouNg 1022–1030 Distantly supervised datasets for relation extraction mostly focus on sentence-level extraction, and they cover very few relations. In this work, we propose cross-document relation extraction, where the two entities of a relation tuple appear in two different documents that are connected via a chain of common entities. Following this idea, we create a dataset for two-hop relation extraction, where each chain contains exactly two documents. Our proposed dataset covers a higher number of relations than the publicly available sentence-level datasets. We also propose a hierarchical entity graph convolutional network (HEGCN) model for this task that improves performance by 1.1% F1 score on our two-hop relation extraction dataset, compared to some strong neural baselines. @@ -1228,7 +1228,7 @@ Improving Distantly Supervised Relation Extraction with Self-Ensemble Noise Filtering - TapasNayak + TapasNayak NavonilMajumder SoujanyaPoria 1031–1039 @@ -1250,7 +1250,7 @@ Extending a Text-to-Pictograph System to <fixed-case>F</fixed-case>rench and to Arasaac MagaliNorré VincentVandeghinste - PierretteBouillon + PierretteBouillon ThomasFrançois 1050–1059 We present an adaptation of the Text-to-Picto system, initially designed for Dutch, and extended to English and Spanish. The original system, aimed at people with an intellectual disability, automatically translates text into pictographs (Sclera and Beta). We extend it to French and add a large set of Arasaac pictographs linked to WordNet 3.1. To carry out this adaptation, we automatically link the pictographs and their metadata to synsets of two French WordNets and leverage this information to translate words into pictographs. We automatically and manually evaluate our system with different corpora corresponding to different use cases, including one for medical communication between doctors and patients. The system is also compared to similar systems in other languages. @@ -1301,7 +1301,7 @@ <fixed-case>O</fixed-case>ffend<fixed-case>ES</fixed-case>: A New Corpus in <fixed-case>S</fixed-case>panish for Offensive Language Research Flor MiriamPlaza-del-Arco ArturoMontejo-Ráez - L. AlfonsoUreña-López + L. AlfonsoUreña-López María-TeresaMartín-Valdivia 1096–1108 Offensive language detection and analysis has become a major area of research in Natural Language Processing. The freedom of participation in social media has exposed online users to posts designed to denigrate, insult or hurt them according to gender, race, religion, ideology, or other personal characteristics. Focusing on young influencers from the well-known social platforms of Twitter, Instagram, and YouTube, we have collected a corpus composed of 47,128 Spanish comments manually labeled on offensive pre-defined categories. A subset of the corpus attaches a degree of confidence to each label, so both multi-class classification and multi-output regression studies are possible. In this paper, we introduce the corpus, discuss its building process, novelties, and some preliminary experiments with it to serve as a baseline for the research community. @@ -1310,7 +1310,7 @@ On Machine Translation of User Reviews - MajaPopović + MajaPopović AlbertoPoncelas MarijaBrkic AndyWay @@ -1438,7 +1438,7 @@ Sentiment-Aware Measure (<fixed-case>SAM</fixed-case>) for Evaluating Sentiment Transfer by Machine Translation Systems HadeelSaadany - ConstantinOrăsan + ConstantinOrăsan EmadMohamed AshrafTantavy 1217–1226 @@ -1458,7 +1458,7 @@ Exploiting Domain-Specific Knowledge for Judgment Prediction Is No Panacea OlivierSalaün - PhilippeLanglais + PhilippeLanglais KarimBenyekhlef 1234–1243 Legal judgment prediction (LJP) usually consists in a text classification task aimed at predicting the verdict on the basis of the fact description. The literature shows that the use of articles as input features helps improve the classification performance. In this work, we designed a verdict prediction task based on landlord-tenant disputes and we applied BERT-based models to which we fed different article-based features. Although the results obtained are consistent with the literature, the improvements with the articles are mostly obtained with the most frequent labels, suggesting that pre-trained and fine-tuned transformer-based models are not scalable as is for legal reasoning in real life scenarios as they would only excel in accurately predicting the most recurrent verdicts to the detriment of other legal outcomes. @@ -1469,8 +1469,8 @@ Masking and Transformer-based Models for Hyperpartisanship Detection in News JavierSánchez-Junquera PaoloRosso - ManuelMontes-y-Gómez - Simone PaoloPonzetto + ManuelMontes-y-Gómez + Simone PaoloPonzetto 1244–1251 Hyperpartisan news show an extreme manipulation of reality based on an underlying and extreme ideological orientation. Because of its harmful effects at reinforcing one’s bias and the posterior behavior of people, hyperpartisan news detection has become an important task for computational linguists. In this paper, we evaluate two different approaches to detect hyperpartisan news. First, a text masking technique that allows us to compare style vs. topic-related features in a different perspective from previous work. Second, the transformer-based models BERT, XLM-RoBERTa, and M-BERT, known for their ability to capture semantic and syntactic patterns in the same representation. Our results corroborate previous research on this task in that topic-related features yield better results than style-based ones, although they also highlight the relevance of using higher-length n-grams. Furthermore, they show that transformer-based models are more effective than traditional methods, but this at the cost of greater computational complexity and lack of transparency. Based on our experiments, we conclude that the beginning of the news show relevant information for the transformers at distinguishing effectively between left-wing, mainstream, and right-wing orientations. 2021.ranlp-1.140 @@ -1480,7 +1480,7 @@ <fixed-case>S</fixed-case>erbian <fixed-case>NER</fixed-case>&Beyond: The Archaic and the Modern Intertwinned BranislavaŠandrih Todorović CvetanaKrstev - RankaStanković + RankaStanković MilicaIkonić Nešić 1252–1260 In this work, we present a Serbian literary corpus that is being developed under the umbrella of the “Distant Reading for European Literary History” COST Action CA16204. Using this corpus of novels written more than a century ago, we have developed and made publicly available a Named Entity Recognizer (NER) trained to recognize 7 different named entity types, with a Convolutional Neural Network (CNN) architecture, having F1 score of ≈91% on the test dataset. This model has been further assessed on a separate evaluation dataset. We wrap up with comparison of the developed model with the existing one, followed by a discussion of pros and cons of the both models. @@ -1529,7 +1529,7 @@ A Case Study of Deep Learning-Based Multi-Modal Methods for Labeling the Presence of Questionable Content in Movie Trailers MahsaShafaei ChristosSmailis - IoannisKakadiaris + IoannisKakadiaris ThamarSolorio 1297–1307 In this work, we explore different approaches to combine modalities for the problem of automated age-suitability rating of movie trailers. First, we introduce a new dataset containing videos of movie trailers in English downloaded from IMDB and YouTube, along with their corresponding age-suitability rating labels. Secondly, we propose a multi-modal deep learning pipeline addressing the movie trailer age suitability rating problem. This is the first attempt to combine video, audio, and speech information for this problem, and our experimental results show that multi-modal approaches significantly outperform the best mono and bimodal models in this task. @@ -1571,7 +1571,7 @@ Exploring <fixed-case>G</fixed-case>erman Multi-Level Text Simplification NicolasSpring - AnnetteRios + AnnetteRios SarahEbling 1339–1349 We report on experiments in automatic text simplification (ATS) for German with multiple simplification levels along the Common European Framework of Reference for Languages (CEFR), simplifying standard German into levels A1, A2 and B1. For that purpose, we investigate the use of source labels and pretraining on standard German, allowing us to simplify standard language to a specific CEFR level. We show that these approaches are especially effective in low-resource scenarios, where we are able to outperform a standard transformer baseline. Moreover, we introduce copy labels, which we show can help the model make a distinction between sentences that require further modifications and sentences that can be copied as-is. @@ -1580,7 +1580,7 @@ Exploring Reliability of Gold Labels for Emotion Detection in <fixed-case>T</fixed-case>witter - SanjaStajner + SanjaStajner 1350–1359 Emotion detection from social media posts has attracted noticeable attention from natural language processing (NLP) community in recent years. The ways for obtaining gold labels for training and testing of the systems for automatic emotion detection differ significantly from one study to another, and pose the question of reliability of gold labels and obtained classification results. This study systematically explores several ways for obtaining gold labels for Ekman’s emotion model on Twitter data and the influence of the chosen strategy on the manual classification results. 2021.ranlp-1.151 @@ -1588,7 +1588,7 @@ How to Obtain Reliable Labels for <fixed-case>MBTI</fixed-case> Classification from Texts? - SanjaStajner + SanjaStajner SerenYenikent 1360–1368 Automatic detection of the Myers-Briggs Type Indicator (MBTI) from short posts attracted noticeable attention in the last few years. Recent studies showed that this is quite a difficult task, especially on commonly used Twitter data. Obtaining MBTI labels is also difficult, as human annotation requires trained psychologists, and automatic way of obtaining them is through long questionnaires of questionable usability for the task. In this paper, we present a method for collecting reliable MBTI labels via only four carefully selected questions that can be applied to any type of textual data. @@ -1644,7 +1644,7 @@ <fixed-case>TR</fixed-case>-<fixed-case>SEQ</fixed-case>: Named Entity Recognition Dataset for <fixed-case>T</fixed-case>urkish Search Engine Queries BerkayTopçu - İlknurDurgar El-Kahlout + İlknurDurgar El-Kahlout 1417–1422 Recognizing named entities in short search engine queries is a difficult task due to their weaker contextual information compared to long sentences. Standard named entity recognition (NER) systems that are trained on grammatically correct and long sentences fail to perform well on such queries. In this study, we share our efforts towards creating a cleaned and labeled dataset of real Turkish search engine queries (TR-SEQ) and introduce an extended label set to satisfy the search engine needs. A NER system is trained by applying the state-of-the-art deep learning method BERT to the collected data and its high performance on search engine queries is reported. Moreover, we compare our results with the state-of-the-art Turkish NER systems. 2021.ranlp-1.158 @@ -1866,7 +1866,7 @@ ChenlongHu HidetakaKamigaito HiroyaTakamura - ManabuOkumura + ManabuOkumura 1586–1596 Neural sequence-to-sequence (Seq2Seq) models and BERT have achieved substantial improvements in abstractive document summarization (ADS) without and with pre-training, respectively. However, they sometimes repeatedly attend to unimportant source phrases while mistakenly ignore important ones. We present reconstruction mechanisms on two levels to alleviate this issue. The sequence-level reconstructor reconstructs the whole document from the hidden layer of the target summary, while the word embedding-level one rebuilds the average of word embeddings of the source at the target side to guarantee that as much critical information is included in the summary as possible. Based on the assumption that inverse document frequency (IDF) measures how important a word is, we further leverage the IDF weights in our embedding-level reconstructor. The proposed frameworks lead to promising improvements for ROUGE metrics and human rating on both the CNN/Daily Mail and Newsroom summarization datasets. 2021.ranlp-1.178 @@ -1877,8 +1877,8 @@ SeunghakYu GiovanniDa San Martino MitraMohtarami - JamesGlass - PreslavNakov + JamesGlass + PreslavNakov 1597–1605 Online users today are exposed to misleading and propagandistic news articles and media posts on a daily basis. To counter thus, a number of approaches have been designed aiming to achieve a healthier and safer online news and media consumption. Automatic systems are able to support humans in detecting such content; yet, a major impediment to their broad adoption is that besides being accurate, the decisions of such systems need also to be interpretable in order to be trusted and widely adopted by users. Since misleading and propagandistic content influences readers through the use of a number of deception techniques, we propose to detect and to show the use of such techniques as a way to offer interpretability. In particular, we define qualitatively descriptive features and we analyze their suitability for detecting deception techniques. We further show that our interpretable features can be easily combined with pre-trained language models, yielding state-of-the-art results. 2021.ranlp-1.179 @@ -1886,11 +1886,11 @@ Generic Mechanism for Reducing Repetitions in Encoder-Decoder Models - YingZhang + YingZhang HidetakaKamigaito TatsuyaAoki HiroyaTakamura - ManabuOkumura + ManabuOkumura 1606–1615 Encoder-decoder models have been commonly used for many tasks such as machine translation and response generation. As previous research reported, these models suffer from generating redundant repetition. In this research, we propose a new mechanism for encoder-decoder models that estimates the semantic difference of a source sentence before and after being fed into the encoder-decoder model to capture the consistency between two sides. This mechanism helps reduce repeatedly generated tokens for a variety of tasks. Evaluation results on publicly available machine translation and response generation datasets demonstrate the effectiveness of our proposal. 2021.ranlp-1.180 @@ -1908,7 +1908,7 @@ Delexicalized Cross-lingual Dependency Parsing for <fixed-case>X</fixed-case>ibe HeZhou - SandraKübler + SandraKübler 1626–1635 Manually annotating a treebank is time-consuming and labor-intensive. We conduct delexicalized cross-lingual dependency parsing experiments, where we train the parser on one language and test on our target language. As our test case, we use Xibe, a severely under-resourced Tungusic language. We assume that choosing a closely related language as the source language will provide better results than more distant relatives. However, it is not clear how to determine those closely related languages. We investigate three different methods: choosing the typologically closest language, using LangRank, and choosing the most similar language based on perplexity. We train parsing models on the selected languages using UDify and test on different genres of Xibe data. The results show that languages selected based on typology and perplexity scores outperform those predicted by LangRank; Japanese is the optimal source language. In determining the source language, proximity to the target language is more important than large training sizes. Parsing is also influenced by genre differences, but they have little influence as long as the training data is at least as complex as the target. 2021.ranlp-1.182 @@ -1942,7 +1942,7 @@ Not All Comments Are Equal: Insights into Comment Moderation from a Topic-Aware Model ElaineZosa RaviShekhar - Vanja MladenKaran + Vanja MladenKaran MatthewPurver 1652–1662 Moderation of reader comments is a significant problem for online news platforms. Here, we experiment with models for automatic moderation, using a dataset of comments from a popular Croatian newspaper. Our analysis shows that while comments that violate the moderation rules mostly share common linguistic and thematic features, their content varies across the different sections of the newspaper. We therefore make our models topic-aware, incorporating semantic features from a topic model into the classification decision. Our results show that topic information improves the performance of the model, increases its confidence in correct outputs, and helps us understand the model’s outputs. @@ -2045,7 +2045,7 @@ Bilingual Terminology Extraction Using Neural Word Embeddings on Comparable Corpora DaryaFilippova BurcuCan - GloriaCorpas Pastor + GloriaCorpas Pastor 58–64 Term and glossary management are vital steps of preparation of every language specialist, and they play a very important role at the stage of education of translation professionals. The growing trend of efficient time management and constant time constraints we may observe in every job sector increases the necessity of the automatic glossary compilation. Many well-performing bilingual AET systems are based on processing parallel data, however, such parallel corpora are not always available for a specific domain or a language pair. Domain-specific, bilingual access to information and its retrieval based on comparable corpora is a very promising area of research that requires a detailed analysis of both available data sources and the possible extraction techniques. This work focuses on domain-specific automatic terminology extraction from comparable corpora for the English – Russian language pair by utilizing neural word embeddings. 2021.ranlp-srw.9 @@ -2097,7 +2097,7 @@ Paragraph Similarity Matches for Generating Multiple-choice Test Items HalynaMaslak - RuslanMitkov + RuslanMitkov 99–108 Multiple-choice questions (MCQs) are widely used in knowledge assessment in educational institutions, during work interviews, in entertainment quizzes and games. Although the research on the automatic or semi-automatic generation of multiple-choice test items has been conducted since the beginning of this millennium, most approaches focus on generating questions from a single sentence. In this research, a state-of-the-art method of creating questions based on multiple sentences is introduced. It was inspired by semantic similarity matches used in the translation memory component of translation management systems. The performance of two deep learning algorithms, doc2vec and SBERT, is compared for the paragraph similarity task. The experiments are performed on the ad-hoc corpus within the EU domain. For the automatic evaluation, a smaller corpus of manually selected matching paragraphs has been compiled. The results prove the good performance of Sentence Embeddings for the given task. 2021.ranlp-srw.15 @@ -2105,7 +2105,7 @@ Neural Borrowing Detection with Monolingual Lexical Models - JohnMiller + JohnMiller EmanuelPariasca CesarBeltran Castañon 109–117 @@ -2125,7 +2125,7 @@ On Reducing Repetition in Abstractive Summarization PranavNair - Anil KumarSingh + Anil KumarSingh 126–134 Repetition in natural language generation reduces the informativeness of text and makes it less appealing. Various techniques have been proposed to alleviate it. In this work, we explore and propose techniques to reduce repetition in abstractive summarization. First, we explore the application of unlikelihood training and embedding matrix regularizers from previous work on language modeling to abstractive summarization. Next, we extend the coverage and temporal attention mechanisms to the token level to reduce repetition. In our experiments on the CNN/Daily Mail dataset, we observe that these techniques reduce the amount of repetition and increase the informativeness of the summaries, which we confirm via human evaluation. 2021.ranlp-srw.18 @@ -2134,7 +2134,7 @@ Improving Abstractive Summarization with Commonsense Knowledge PranavNair - Anil KumarSingh + Anil KumarSingh 135–143 Large scale pretrained models have demonstrated strong performances on several natural language generation and understanding benchmarks. However, introducing commonsense into them to generate more realistic text remains a challenge. Inspired from previous work on commonsense knowledge generation and generative commonsense reasoning, we introduce two methods to add commonsense reasoning skills and knowledge into abstractive summarization models. Both methods beat the baseline on ROUGE scores, demonstrating the superiority of our models over the baseline. Human evaluation results suggest that summaries generated by our methods are more realistic and have fewer commonsensical errors. 2021.ranlp-srw.19 @@ -2190,7 +2190,7 @@ Towards New Generation Translation Memory Systems NikolaSpasovski - RuslanMitkov + RuslanMitkov 180–183 Despite the enormous popularity of Translation Memory systems and the active research in the field, their language processing features still suffer from certain limitations. While many recent papers focus on semantic matching capabilities of TMs, this planned study will address how these tools perform when dealing with longer segments and whether this could be a cause of lower match scores. An experiment will be carried out on corpora from two different (repetitive) domains. Following the results, recommendations for future developments of new TMs will be made. 2021.ranlp-srw.25 @@ -2209,8 +2209,8 @@ LionelTadonfouet Tadjou FabriceBourge TiphaineMarie - LaurentRomary - Éricde la Clergerie + LaurentRomary + Éricde la Clergerie 193–202 In this paper we describe the process of build-ing a corporate corpus that will be used as a ref-erence for modelling and computing threadsfrom conversations generated using commu-nication and collaboration tools. The overallgoal of the reconstruction of threads is to beable to provide value to the collorator in var-ious use cases, such as higlighting the impor-tant parts of a running discussion, reviewingthe upcoming commitments or deadlines, etc. Since, to our knowledge, there is no avail-able corporate corpus for the French languagewhich could allow us to address this prob-lem of thread constitution, we present here amethod for building such corpora includingdifferent aspects and steps which allowed thecreation of a pipeline to pseudo-anonymisedata. Such a pipeline is a response to theconstraints induced by the General Data Pro-tection Regulation GDPR in Europe and thecompliance to the secrecy of correspondence. 2021.ranlp-srw.27 @@ -2223,7 +2223,7 @@ GeorgiKaradzhov GeorgiGeorgiev IvanKoychev - PreslavNakov + PreslavNakov 203–209 In education, quiz questions have become an important tool for assessing the knowledge of students. Yet, manually preparing such questions is a tedious task, and thus automatic question generation has been proposed as a possible alternative. So far, the vast majority of research has focused on generating the question text, relying on question answering datasets with readily picked answers, and the problem of how to come up with answer candidates in the first place has been largely ignored. Here, we aim to bridge this gap. In particular, we propose a model that can generate a specified number of answer candidates for a given passage of text, which can then be used by instructors to write questions manually or can be passed as an input to automatic answer-aware question generators. Our experiments show that our proposed answer candidate generation model outperforms several baselines. 2021.ranlp-srw.28 diff --git a/data/xml/2021.reinact.xml b/data/xml/2021.reinact.xml index 7e309cba48..711fda2ae0 100644 --- a/data/xml/2021.reinact.xml +++ b/data/xml/2021.reinact.xml @@ -79,8 +79,8 @@ Decoupling Pragmatics: Discriminative Decoding for Referring Expression Generation - SimeonSchüz - SinaZarrieß + SimeonSchüz + SinaZarrieß 47–52 The shift to neural models in Referring Expression Generation (REG) has enabled more natural set-ups, but at the cost of interpretability. We argue that integrating pragmatic reasoning into the inference of context-agnostic generation models could reconcile traits of traditional and neural REG, as this offers a separation between context-independent, literal information and pragmatic adaptation to context. With this in mind, we apply existing decoding strategies from discriminative image captioning to REG and evaluate them in terms of pragmatic informativity, likelihood to ground-truth annotations and linguistic diversity. Our results show general effectiveness, but a relatively small gain in informativity, raising important questions for REG in general. 2021.reinact-1.7 @@ -90,7 +90,7 @@ Generating Justifications in a Spatial Question-Answering Dialogue System for a Blocks World GeorgiyPlatonov BenjaminKane - LenhartSchubert + LenhartSchubert 53–57 As AI reaches wider adoption, designing systems that are explainable and interpretable becomes a critical necessity. In particular, when it comes to dialogue systems, their reasoning must be transparent and must comply with human intuitions in order for them to be integrated seamlessly into day-to-day collaborative human-machine activities. Here, we describe our ongoing work on a (general purpose) dialogue system equipped with a spatial specialist with explanatory capabilities. We applied this system to a particular task of characterizing spatial configurations of blocks in a simple physical Blocks World (BW) domain using natural locative expressions, as well as generating justifications for the proposed spatial descriptions by indicating the factors that the system used to arrive at a particular conclusion. 2021.reinact-1.8 diff --git a/data/xml/2021.repl4nlp.xml b/data/xml/2021.repl4nlp.xml index 41ce5ea356..a6a0e0e7a2 100644 --- a/data/xml/2021.repl4nlp.xml +++ b/data/xml/2021.repl4nlp.xml @@ -28,7 +28,7 @@ PrithvirajSen HuaiyuZhu YunyaoLi - DragomirRadev + DragomirRadev 1–7 Cross-lingual text classification (CLTC) is a challenging task made even harder still due to the lack of labeled data in low-resource languages. In this paper, we propose zero-shot instance-weighting, a general model-agnostic zero-shot learning framework for improving CLTC by leveraging source instance weighting. It adds a module on top of pre-trained language models for similarity computation of instance weights, thus aligning each source instance to the target language. During training, the framework utilizes gradient descent that is weighted by instance weights to update parameters. We evaluate this framework over seven target languages on three fundamental tasks and show its effectiveness and extensibility, by improving on F1 score up to 4% in single-source transfer and 8% in multi-source transfer. To the best of our knowledge, our method is the first to apply instance weighting in zero-shot CLTC. It is simple yet effective and easily extensible into multi-source transfer. 2021.repl4nlp-1.1 @@ -98,7 +98,7 @@ Structure-aware Sentence Encoder in Bert-Based <fixed-case>S</fixed-case>iamese Network QiweiPeng - DavidWeir + DavidWeir JulieWeeds 57–63 Recently, impressive performance on various natural language understanding tasks has been achieved by explicitly incorporating syntax and semantic information into pre-trained models, such as BERT and RoBERTa. However, this approach depends on problem-specific fine-tuning, and as widely noted, BERT-like models exhibit weak performance, and are inefficient, when applied to unsupervised similarity comparison tasks. Sentence-BERT (SBERT) has been proposed as a general-purpose sentence embedding method, suited to both similarity comparison and downstream tasks. In this work, we show that by incorporating structural information into SBERT, the resulting model outperforms SBERT and previous general sentence encoders on unsupervised semantic textual similarity (STS) datasets and transfer classification tasks. @@ -109,7 +109,7 @@ Preserving Cross-Linguality of Pre-trained Models via Continual Learning ZihanLiu - Genta IndraWinata + Genta IndraWinata AndreaMadotto PascaleFung 64–71 @@ -137,7 +137,7 @@ PengchengYang TianyuLiu ZhifangSui - BaobaoChang + BaobaoChang 83–89 Conventional Knowledge Graph Completion (KGC) assumes that all test entities appear during training. However, in real-world scenarios, Knowledge Graphs (KG) evolve fast with out-of-knowledge-graph (OOKG) entities added frequently, and we need to efficiently represent these entities. Most existing Knowledge Graph Embedding (KGE) methods cannot represent OOKG entities without costly retraining on the whole KG. To enhance efficiency, we propose a simple and effective method that inductively represents OOKG entities by their optimal estimation under translational assumptions. Moreover, given pretrained embeddings of the in-knowledge-graph (IKG) entities, our method even needs no additional learning. Experimental results on two KGC tasks with OOKG entities show that our method outperforms the previous methods by a large margin with higher efficiency. 2021.repl4nlp-1.10 @@ -174,7 +174,7 @@ <fixed-case>X</fixed-case>2<fixed-case>P</fixed-case>arser: Cross-Lingual and Cross-Domain Framework for Task-Oriented Compositional Semantic Parsing ZihanLiu - Genta IndraWinata + Genta IndraWinata PengXu PascaleFung 112–127 @@ -208,7 +208,7 @@ Probing Cross-Modal Representations in Multi-Step Relational Reasoning IuliiaParfenova DesmondElliott - RaquelFernández + RaquelFernández SandroPezzelle 152–162 We investigate the representations learned by vision and language models in tasks that require relational reasoning. Focusing on the problem of assessing the relative size of objects in abstract visual contexts, we analyse both one-step and two-step reasoning. For the latter, we construct a new dataset of three-image scenes and define a task that requires reasoning at the level of the individual images and across images in a scene. We probe the learned model representations using diagnostic classifiers. Our experiments show that pretrained multimodal transformer-based architectures can perform higher-level relational reasoning, and are able to learn representations for novel tasks and data that are very different from what was seen in pretraining. @@ -242,7 +242,7 @@ Deriving Word Vectors from Contextualized Language Models using Topic-Aware Mention Selection YixiaoWang ZiedBouraoui - LuisEspinosa Anke + LuisEspinosa Anke StevenSchockaert 185–194 One of the long-standing challenges in lexical semantics consists in learning representations of words which reflect their semantic properties. The remarkable success of word embeddings for this purpose suggests that high-quality representations can be obtained by summarizing the sentence contexts of word mentions. In this paper, we propose a method for learning word representations that follows this basic strategy, but differs from standard word embeddings in two important ways. First, we take advantage of contextualized language models (CLMs) rather than bags of word vectors to encode contexts. Second, rather than learning a word vector directly, we use a topic model to partition the contexts in which words appear, and then learn different topic-specific vectors for each word. Finally, we use a task-specific supervision signal to make a soft selection of the resulting vectors. We show that this simple strategy leads to high-quality word vectors, which are more predictive of semantic properties than word embeddings and existing CLM-based strategies. @@ -377,7 +377,7 @@ PengQi GuangtaoWang TengyuMa - JingHuang + JingHuang 307–315 Document-level relation extraction is a challenging task, requiring reasoning over multiple sentences to predict a set of relations in a document. In this paper, we propose a novel framework E2GRE (Entity and Evidence Guided Relation Extraction) that jointly extracts relations and the underlying evidence sentences by using large pretrained language model (LM) as input encoder. First, we propose to guide the pretrained LM’s attention mechanism to focus on relevant context by using attention probabilities as additional features for evidence prediction. Furthermore, instead of feeding the whole document into pretrained LMs to obtain entity representation, we concatenate document text with head entities to help LMs concentrate on parts of the document that are more related to the head entity. Our E2GRE jointly learns relation extraction and evidence prediction effectively, showing large gains on both these tasks, which we find are highly correlated. 2021.repl4nlp-1.30 diff --git a/data/xml/2021.rocling.xml b/data/xml/2021.rocling.xml index 59d1dea6ce..e9e3c74cbd 100644 --- a/data/xml/2021.rocling.xml +++ b/data/xml/2021.rocling.xml @@ -71,7 +71,7 @@ Tzu-ManWu AleksandraSmolka Chao-ChunLiang - Hsin-MinWang + Hsin-MinWang Kuan-YuChen YuTsao Keh-YihSu @@ -182,7 +182,7 @@ Mining Commonsense and Domain Knowledge from Math Word Problems Shih-HungTsai Chao-ChunLiang - Hsin-MinWang + Hsin-MinWang Keh-YihSu 111–117 Current neural math solvers learn to incorporate commonsense or domain knowledge by utilizing pre-specified constants or formulas. However, as these constants and formulas are mainly human-specified, the generalizability of the solvers is limited. In this paper, we propose to explicitly retrieve the required knowledge from math problemdatasets. In this way, we can determinedly characterize the required knowledge andimprove the explainability of solvers. Our two algorithms take the problem text andthe solution equations as input. Then, they try to deduce the required commonsense and domain knowledge by integrating information from both parts. We construct two math datasets and show the effectiveness of our algorithms that they can retrieve the required knowledge for problem-solving. @@ -338,7 +338,7 @@ Employing low-pass filtered temporal speech features for the training of ideal ratio mask in speech enhancement Yan-TongChen Zi-QiangLin - Jeih-WeihHung + Jeih-WeihHung 236–242 The masking-based speech enhancement method pursues a multiplicative mask that applies to the spectrogram of input noise-corrupted utterance, and a deep neural network (DNN) is often used to learn the mask. In particular, the features commonly used for automatic speech recognition can serve as the input of the DNN to learn the well-behaved mask that significantly reduce the noise distortion of processed utterances. This study proposes to preprocess the input speech features for the ideal ratio mask (IRM)-based DNN by lowpass filtering in order to alleviate the noise components. In particular, we employ the discrete wavelet transform (DWT) to decompose the temporal speech feature sequence and scale down the detail coefficients, which correspond to the high-pass portion of the sequence. Preliminary experiments conducted on a subset of TIMIT corpus reveal that the proposed method can make the resulting IRM achieve higher speech quality and intelligibility for the babble noise-corrupted signals compared with the original IRM, indicating that the lowpass filtered temporal feature sequence can learn a superior IRM network for speech enhancement. 2021.rocling-1.30 @@ -359,7 +359,7 @@ Automatic Extraction of <fixed-case>E</fixed-case>nglish Grammar Pattern Correction Rules Kuan-YuShen Yi-ChienLin - Jason S.Chang + Jason S.Chang 252–256 We introduce a method for generating error-correction rules for grammar pattern errors in a given annotated learner corpus. In our approach, annotated edits in the learner corpus are converted into edit rules for correcting common writing errors. The method involves automatic extraction of grammar patterns, and automatic alignment of the erroneous patterns and correct patterns. At run-time, grammar patterns are extracted from the grammatically correct sentences, and correction rules are retrieved by aligning the extracted grammar patterns with the erroneous patterns. Using the proposed method, we generate 1,499 high-quality correction rules related to 232 headwords. The method can be used to assist ESL students in avoiding grammatical errors, and aid teachers in correcting students’ essays. Additionally, the method can be used in the compilation of collocation error dictionaries and the construction of grammar error correction systems. 2021.rocling-1.32 @@ -370,7 +370,7 @@ Hao-ChuanKao Man-ChenHung Lung-HaoLee - Yuen-HsienTseng + Yuen-HsienTseng 257–264 We use Hypergraph Attention Networks (HyperGAT) to recognize multiple labels of Chinese humor texts. We firstly represent a joke as a hypergraph. The sequential hyperedge and semantic hyperedge structures are used to construct hyperedges. Then, attention mechanisms are adopted to aggregate context information embedded in nodes and hyperedges. Finally, we use trained HyperGAT to complete the multi-label classification task. Experimental results on the Chinese humor multi-label dataset showed that HyperGAT model outperforms previous sequence-based (CNN, BiLSTM, FastText) and graph-based (Graph-CNN, TextGCN, Text Level GNN) deep learning models. 2021.rocling-1.33 @@ -396,7 +396,7 @@ Lian-HuiTan Tzu-JuLin Chun-WeiWang - Shu-KaiHsieh + Shu-KaiHsieh 271–279 Ever-expanding evaluative texts on online forums have become an important source of sentiment analysis. This paper proposes an aspect-based annotated dataset consisting of telecom reviews on social media. We introduce a category, implicit evaluative texts, impevals for short, to investigate how the deep learning model works on these implicit reviews. We first compare two models, BertSimple and BertImpvl, and find that while both models are competent to learn simple evaluative texts, they are confused when classifying impevals. To investigate the factors underlying the correctness of the model’s predictions, we conduct a series of analyses, including qualitative error analysis and quantitative analysis of linguistic features with logistic regressions. The results show that local features that affect the overall sentential sentiment confuse the model: multiple target entities, transitional words, sarcasm, and rhetorical questions. Crucially, these linguistic features are independent of the model’s confidence measured by the classifier’s softmax probabilities. Interestingly, the sentence complexity indicated by syntax-tree depth is not correlated with the model’s correctness. In sum, this paper sheds light on the characteristics of the modern deep learning model and when it might need more supervision through linguistic evaluations. 2021.rocling-1.35 @@ -442,7 +442,7 @@ Yi-ChienLin Chun-HoKwok Hai-LunTu - Jason S.Chang + Jason S.Chang 301–309 We introduce a method for assisting English as Second Language (ESL) learners by providing translations of Collins COBUILD grammar patterns(GP) for a given word. In our approach, bilingual parallel corpus is transformed into bilingual GP pairs aimed at providing native language support for learning word usage through GPs. The method involves automatically parsing sentences to extract GPs, automatically generating translation GP pairs from bilingual sentences, and automatically extracting common bilingual GPs. At run-time, the target word is used for lookup GPs and translations, and the retrieved common GPs and their example sentences are shown to the user. We present a prototype phrase search engine, Linggle GPTrans, that implements the methods to assist ESL learners. Preliminary evaluation on a set of more than 300 GP-translation pairs shows that the methods achieve 91% accuracy. 2021.rocling-1.39 @@ -454,7 +454,7 @@ YongfuLiao Po-Ya AngelaWang Mao-ChangKu - Shu-KaiHsieh + Shu-KaiHsieh 310–317 The rapid flow of information and the abundance of text data on the Internet have brought about the urgent demand for the construction of monitoring resources and techniques used for various purposes. To extract facets of information useful for particular domains from such large and dynamically growing corpora requires an unsupervised yet transparent ways of analyzing the textual data. This paper proposed a hybrid collocation analysis as a potential method to retrieve and summarize Taiwan-related topics posted on Weibo and PTT. By grouping collocates of 臺灣 ‘Taiwan’ into clusters of topics via either word embeddings clustering or Latent Dirichlet allocation, lists of collocates can be converted to probability distributions such that distances and similarities can be defined and computed. With this method, we conduct a diachronic analysis of the similarity between Weibo and PTT, providing a way to pinpoint when and how the topic similarity between the two rises or falls. A fine-grained view on the grammatical behavior and political implications is attempted, too. This study thus sheds light on alternative explainable routes for future social media listening method on the understanding of cross-strait relationship. 2021.rocling-1.40 @@ -484,7 +484,7 @@ Identify Bilingual Patterns and Phrases from a Bilingual Sentence Pair Yi-JyunChen Hsin-YunChung - Jason S.Chang + Jason S.Chang 333–338 This paper presents a method for automatically identifying bilingual grammar patterns and extracting bilingual phrase instances from a given English-Chinese sentence pair. In our approach, the English-Chinese sentence pair is parsed to identify English grammar patterns and Chinese counterparts. The method involves generating translations of each English grammar pattern and calculating translation probability of words from a word-aligned parallel corpora. The results allow us to extract the most probable English-Chinese phrase pairs in the sentence pair. We present a prototype system that applies the method to extract grammar patterns and phrases in parallel sentences. An evaluation on randomly selected examples from a dictionary shows that our approach has reasonably good performance. We use human judge to assess the bilingual phrases generated by our approach. The results have potential to assist language learning and machine translation research. 2021.rocling-1.43 @@ -567,7 +567,7 @@ <fixed-case>ROCLING</fixed-case>-2021 Shared Task: Dimensional Sentiment Analysis for Educational Texts - Liang-ChihYu + Liang-ChihYu JinWang BoPeng Chu-RenHuang diff --git a/data/xml/2021.scil.xml b/data/xml/2021.scil.xml index 8ea4d291fc..9101f370fc 100644 --- a/data/xml/2021.scil.xml +++ b/data/xml/2021.scil.xml @@ -27,7 +27,7 @@ A Network Science Approach to Bilingual Code-switching QihuiXu MagdalenaMarkowska - MartinChodorow + MartinChodorow PingLi 18–27 2021.scil-1.2 @@ -51,7 +51,7 @@ Drivers of <fixed-case>E</fixed-case>nglish Syntactic Change in the <fixed-case>C</fixed-case>anadian Parliament LiwenHou - David A.Smith + David A.Smith 51–60 2021.scil-1.5 hou-smith-2021-drivers @@ -69,7 +69,7 @@ Emerging <fixed-case>E</fixed-case>nglish Transitives over the Last Two Centuries LiwenHou - David A.Smith + David A.Smith 71–80 2021.scil-1.7 hou-smith-2021-emerging @@ -85,7 +85,7 @@ Effects of Duration, Locality, and Surprisal in Speech Disfluency Prediction in <fixed-case>E</fixed-case>nglish Spontaneous Speech SamvitDammalapati - RajakrishnanRajkumar + RajakrishnanRajkumar SidharthRanjan SumeetAgarwal 91–101 @@ -154,7 +154,7 @@ Learning Morphological Productivity as Meaning-Form Mappings SarahPayne JordanKodner - CharlesYang + CharlesYang 177–187 2021.scil-1.17 payne-etal-2021-learning @@ -244,7 +244,7 @@ Vowel Harmony Viewed as Error-Correcting Code YvoMeeres - Tommi APirinen + Tommi APirinen 313–322 2021.scil-1.28 meeres-pirinen-2021-vowel @@ -252,7 +252,7 @@ What’s in a Span? Evaluating the Creativity of a Span-Based Neural Constituency Parser DanielDakota - SandraKübler + SandraKübler 323–333 2021.scil-1.29 dakota-kubler-2021-whats @@ -277,7 +277,7 @@ A <fixed-case>R</fixed-case>ate–<fixed-case>D</fixed-case>istortion view of human pragmatic reasoning? NogaZaslavsky JenniferHu - Roger P.Levy + Roger P.Levy 347–348 2021.scil-1.32 zaslavsky-etal-2021-rate @@ -286,7 +286,7 @@ Apparent Communicative Efficiency in the Lexicon is Emergent SpencerCaplan JordanKodner - CharlesYang + CharlesYang 349–350 2021.scil-1.33 caplan-etal-2021-apparent @@ -469,7 +469,7 @@ How to marry a star: Probabilistic constraints for meaning in context KatrinErk - AurélieHerbelot + AurélieHerbelot 451–453 2021.scil-1.55 erk-herbelot-2021-marry @@ -522,7 +522,7 @@ Will it Unblend? YuvalPinter - Cassandra L.Jacobs + Cassandra L.Jacobs JacobEisenstein 474–476 2021.scil-1.61 diff --git a/data/xml/2021.sdp.xml b/data/xml/2021.sdp.xml index 54984ea3af..3dd737cf0a 100644 --- a/data/xml/2021.sdp.xml +++ b/data/xml/2021.sdp.xml @@ -6,9 +6,9 @@ IzBeltagy ArmanCohan GuyFeigenblat - DayneFreitag + DayneFreitag TirthankarGhosal - KeithHall + KeithHall DrahomiraHerrmannova PetrKnoth KyleLo @@ -17,7 +17,7 @@ MichalShmueli-Scheuer Anitade Waard KuansanWang - Lucy LuWang + Lucy LuWang Association for Computational Linguistics
Online
June @@ -43,7 +43,7 @@ SoyeongJeong JinheonBaek ChaeHunPark - JongPark + JongPark 7–17 One of the challenges in information retrieval (IR) is the vocabulary mismatch problem, which happens when the terms between queries and documents are lexically different but semantically similar. While recent work has proposed to expand the queries or documents by enriching their representations with additional relevant terms to address this challenge, they usually require a large volume of query-document pairs to train an expansion model. In this paper, we propose an Unsupervised Document Expansion with Generation (UDEG) framework with a pre-trained language model, which generates diverse supplementary sentences for the original document without using labels on query-document pairs for training. For generating sentences, we further stochastically perturb their embeddings to generate more diverse sentences for document expansion. We validate our framework on two standard IR benchmark datasets. The results show that our framework significantly outperforms relevant expansion baselines for IR. 2021.sdp-1.2 @@ -104,7 +104,7 @@
Argument Mining for Scholarly Document Processing: Taking Stock and Looking Ahead - KhalidAl Khatib + KhalidAl Khatib TirthankarGhosal YufangHou Anitade Waard @@ -135,7 +135,7 @@ ArjunManoharan DeepakMittal RamakanthPasunuru - ManishShrivastava + ManishShrivastava ManeeshSingh MohitBansal PreethiJyothi diff --git a/data/xml/2021.semdeep.xml b/data/xml/2021.semdeep.xml index bceb6d5d89..7cc506aa7b 100644 --- a/data/xml/2021.semdeep.xml +++ b/data/xml/2021.semdeep.xml @@ -3,11 +3,11 @@ Proceedings of the 6th Workshop on Semantic Deep Learning (SemDeep-6) - LuisEspinosa-Anke + LuisEspinosa-Anke DagmarGromann ThierryDeclerck AnnaBreit - JoseCamacho-Collados + JoseCamacho-Collados MohammadTaher Pilehvar ArtemRevenko Association for Computational Linguistics @@ -23,9 +23,9 @@ <fixed-case>CTLR</fixed-case>@<fixed-case>W</fixed-case>i<fixed-case>C</fixed-case>-<fixed-case>TSV</fixed-case>: Target Sense Verification using Marked Inputs and<fixed-case>P</fixed-case>re-trained Models - José G.Moreno + José G.Moreno Elvys LinharesPontes - GaëlDias + GaëlDias 1–6 2021.semdeep-1.1 moreno-etal-2021-ctlr @@ -50,7 +50,7 @@ Relation Classification via Relation Validation - José G.Moreno + José G.Moreno AntoineDoucet BrigitteGrau 20–27 diff --git a/data/xml/2021.semeval.xml b/data/xml/2021.semeval.xml index dde1bbf8e8..f42e748a53 100644 --- a/data/xml/2021.semeval.xml +++ b/data/xml/2021.semeval.xml @@ -7,7 +7,7 @@ NathanSchneider NatalieSchluter GuyEmerson - AurelieHerbelot + AurelieHerbelot XiaodanZhu Association for Computational Linguistics
Online
@@ -23,8 +23,8 @@ <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2021 Task 1: Lexical Complexity Prediction MatthewShardlow - RichardEvans - Gustavo HenriquePaetzold + RichardEvans + Gustavo HenriquePaetzold MarcosZampieri 1–16 This paper presents the results and main findings of SemEval-2021 Task 1 - Lexical Complexity Prediction. We provided participants with an augmented version of the CompLex Corpus (Shardlow et al. 2020). CompLex is an English multi-domain corpus in which words and multi-word expressions (MWEs) were annotated with respect to their complexity using a five point Likert scale. SemEval-2021 Task 1 featured two Sub-tasks: Sub-task 1 focused on single words and Sub-task 2 focused on MWEs. The competition attracted 198 teams in total, of which 54 teams submitted official runs on the test data to Sub-task 1 and 37 to Sub-task 2. @@ -88,7 +88,7 @@ <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2021 Task 5: Toxic Spans Detection JohnPavlopoulos - JeffreySorensen + JeffreySorensen LéoLaugier IonAndroutsopoulos 59–69 @@ -106,7 +106,7 @@ FirojAlam FabrizioSilvestri HamedFirooz - PreslavNakov + PreslavNakov GiovanniDa San Martino 70–98 We describe SemEval-2021 task 6 on Detection of Persuasion Techniques in Texts and Images: the data, the annotation guidelines, the evaluation setup, the results, and the participating systems. The task focused on memes and had three subtasks: (i) detecting the techniques in the text, (ii) detecting the text spans where the techniques are used, and (iii) detecting techniques in the entire meme, i.e., both in the text and in the image. It was a popular task, attracting 71 registrations, and 22 teams that eventually made an official submission on the test set. The evaluation results for the third subtask confirmed the importance of both modalities, the text and the image. Moreover, some teams reported benefits when not just combining the two modalities, e.g., by using early or late fusion, but rather modeling the interaction between them in a joint model. @@ -147,7 +147,7 @@ <fixed-case>L</fixed-case>ang<fixed-case>R</fixed-case>esearch<fixed-case>L</fixed-case>ab <fixed-case>NC</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2021 Task 1: Linguistic Feature Based Modelling for Lexical Complexity RakshaAgarwal - NiladriChatterjee + NiladriChatterjee 120–125 The present work aims at assigning a complexity score between 0 and 1 to a target word or phrase in a given sentence. For each Single Word Target, a Random Forest Regressor is trained on a feature set consisting of lexical, semantic, and syntactic information about the target. For each Multiword Target, a set of individual word features is taken along with single word complexities in the feature space. The system yielded the Pearson correlation of 0.7402 and 0.8244 on the test set for the Single and Multiword Targets, respectively. 2021.semeval-1.10 @@ -210,7 +210,7 @@ <fixed-case>S</fixed-case>koltech<fixed-case>NLP</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2021 Task 2: Generating Cross-Lingual Training Data for the Word-in-Context Task AntonRazzhigaev - NikolayArefyev + NikolayArefyev AlexanderPanchenko 157–162 In this paper, we present a system for the solution of the cross-lingual and multilingual word-in-context disambiguation task. Task organizers provided monolingual data in several languages, but no cross-lingual training data were available. To address the lack of the officially provided cross-lingual training data, we decided to generate such data ourselves. We describe a simple yet effective approach based on machine translation and back translation of the lexical units to the original language used in the context of this shared task. In our experiments, we used a neural system based on the XLM-R, a pre-trained transformer-based masked language model, as a baseline. We show the effectiveness of the proposed approach as it allows to substantially improve the performance of this strong neural baseline model. In addition, in this study, we present multiple types of the XLM-R based classifier, experimenting with various ways of mixing information from the first and second occurrences of the target word in two samples. @@ -467,7 +467,7 @@ JessicaCox CurtKohler AntonyScerri - RonDaniel Jr. + RonDaniel Jr. PaulGroth 306–316 We describe MeasEval, a SemEval task of extracting counts, measurements, and related context from scientific documents, which is of significant importance to the creation of Knowledge Graphs that distill information from the scientific literature. This is a new task in 2021, for which over 75 submissions from 25 participants were received. We expect the data developed for this task and the findings reported to be valuable to the scientific knowledge extraction, metrology, and automated knowledge base construction communities. @@ -512,9 +512,9 @@ AncaDumitrache TristanMiller JonChamberlain - BarbaraPlank + BarbaraPlank EdwinSimpson - MassimoPoesio + MassimoPoesio 338–347 Disagreement between coders is ubiquitous in virtually all datasets annotated with human judgements in both natural language processing and computer vision. However, most supervised machine learning methods assume that a single preferred interpretation exists for each item, which is at best an idealization. The aim of the SemEval-2021 shared task on learning with disagreements (Le-Wi-Di) was to provide a unified testing framework for methods for learning from data containing multiple and possibly contradictory annotations covering the best-known datasets containing information about disagreements for interpreting language and classifying images. In this paper we describe the shared task and its results. 2021.semeval-1.41 @@ -527,8 +527,8 @@ EgoitzLaparra XinSu YiyunZhao - ÖzlemUzuner - TimothyMiller + ÖzlemUzuner + TimothyMiller StevenBethard 348–356 This paper presents the Source-Free Domain Adaptation shared task held within SemEval-2021. The aim of the task was to explore adaptation of machine-learning models in the face of data sharing constraints. Specifically, we consider the scenario where annotations exist for a domain but cannot be shared. Instead, participants are provided with models trained on that (source) data. Participants also receive some labeled data from a new (development) domain on which to explore domain adaptation algorithms. Participants are then tested on data representing a new (target) domain. We explored this scenario with two different semantic tasks: negation detection (a text classification task) and time expression recognition (a sequence tagging task). @@ -541,7 +541,7 @@ WeikangWang YiWu YixiangLiu - PengyuanLiu + PengyuanLiu 357–363 Domain adaptation assumes that samples from source and target domains are freely accessible during a training phase. However, such assumption is rarely plausible in the real-world and may causes data-privacy issues, especially when the label of the source domain can be a sensitive attribute as an identifier. SemEval-2021 task 10 focuses on these issues. We participate in the task and propose novel frameworks based on self-training method. In our systems, two different frameworks are designed to solve text classification and sequence labeling. These approaches are tested to be effective which ranks the third among all system in subtask A, and ranks the first among all system in subtask B. 2021.semeval-1.43 @@ -631,7 +631,7 @@ <fixed-case>TAPAS</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2021 Task 9: Reasoning over tables with intermediate pre-training - ThomasMüller + ThomasMüller JulianEisenschlos SyrineKrichene 423–430 @@ -645,7 +645,7 @@ AbdullatifKöksal YusufYüksel BekirYıldırım - ArzucanÖzgür + ArzucanÖzgür 431–437 In this paper, we present our text augmentation based approach for the Table Statement Support Subtask (Phase A) of SemEval-2021 Task 9. We experiment with different text augmentation techniques such as back translation and synonym swapping using Word2Vec and WordNet. We show that text augmentation techniques lead to 2.5% improvement in F1 on the test set. Further, we investigate the impact of domain adaptation and joint learning on fact verification in tabular data by utilizing the SemTabFacts and TabFact datasets. We observe that joint learning improves the F1 scores on the SemTabFacts and TabFact test sets by 3.31% and 0.77%, respectively. 2021.semeval-1.52 @@ -671,7 +671,7 @@ KemalKurniawan LeaFrermann PhilipSchulz - TrevorCohn + TrevorCohn 445–451 This paper describes PTST, a source-free unsupervised domain adaptation technique for sequence tagging, and its application to the SemEval-2021 Task 10 on time expression recognition. PTST is an extension of the cross-lingual parsimonious parser transfer framework, which uses high-probability predictions of the source model as a supervision signal in self-training. We extend the framework to a sequence prediction setting, and demonstrate its applicability to unsupervised domain adaptation. PTST achieves F1 score of 79.6% on the official test set, with the precision of 90.1%, the highest out of 14 submissions. 2021.semeval-1.54 @@ -728,7 +728,7 @@ YuSu ChanghongHe LeiLin - ChengjieSun + ChengjieSun LiliShan 485–489 This paper describes the winning system in the End-to-end Pipeline phase for the NLPContributionGraph task. The system is composed of three BERT-based models and the three models are used to extract sentences, entities and triples respectively. Experiments show that sampling and adversarial training can greatly boost the system. In End-to-end Pipeline phase, our system got an average F1 of 0.4703, significantly higher than the second-placed system which got an average F1 of 0.3828. @@ -859,7 +859,7 @@ JinghangGu EmmanueleChersoni WenjieLi - QinLu + QinLu Chu-RenHuang 565–570 In this contribution, we describe the system presented by the PolyU CBS-Comp Team at the Task 1 of SemEval 2021, where the goal was the estimation of the complexity of words in a given sentence context. Our top system, based on a combination of lexical, syntactic, word embeddings and Transformers-derived features and on a Gradient Boosting Regressor, achieves a top correlation score of 0.754 on the subtask 1 for single words and 0.659 on the subtask 2 for multiword expressions. @@ -946,7 +946,7 @@ <fixed-case>UTFPR</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2021 Task 1: Complexity Prediction by Combining <fixed-case>BERT</fixed-case> Vectors and Classic Features - Gustavo HenriquePaetzold + Gustavo HenriquePaetzold 617–622 We describe the UTFPR systems submitted to the Lexical Complexity Prediction shared task of SemEval 2021. They perform complexity prediction by combining classic features, such as word frequency, n-gram frequency, word length, and number of senses, with BERT vectors. We test numerous feature combinations and machine learning models in our experiments and find that BERT vectors, even if not optimized for the task at hand, are a great complement to classic features. We also find that employing the principle of compositionality can potentially help in phrase complexity prediction. Our systems place 45th out of 55 for single words and 29th out of 38 for phrases. 2021.semeval-1.78 @@ -1063,7 +1063,7 @@ <fixed-case>C</fixed-case>3<fixed-case>SL</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2021 Task 1: Predicting Lexical Complexity of Words in Specific Contexts with Sentence Embeddings RaulAlmeida HeglerTissot - Marcos Didonet DelFabro + Marcos Didonet DelFabro 683–687 We present our approach to predicting lexical complexity of words in specific contexts, as entered LCP Shared Task 1 at SemEval 2021. The approach consists of separating sentences into smaller chunks, embedding them with Sent2Vec, and reducing the embeddings into a simpler vector used as input to a neural network, the latter for predicting the complexity of words and expressions. Results show that the pre-trained sentence embeddings are not able to capture lexical complexity from the language when applied in cross-domain applications. 2021.semeval-1.88 @@ -1164,7 +1164,7 @@ <fixed-case>U</fixed-case>o<fixed-case>B</fixed-case>_<fixed-case>UK</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val 2021 Task 2: Zero-Shot and Few-Shot Learning for Multi-lingual and Cross-lingual Word Sense Disambiguation. WeiLi HarishTayyar Madabushi - MarkLee + MarkLee 738–742 This paper describes our submission to SemEval 2021 Task 2. We compare XLM-RoBERTa Base and Large in the few-shot and zero-shot settings and additionally test the effectiveness of using a k-nearest neighbors classifier in the few-shot setting instead of the more traditional multi-layered perceptron. Our experiments on both the multi-lingual and cross-lingual data show that XLM-RoBERTa Large, unlike the Base version, seems to be able to more effectively transfer learning in a few-shot setting and that the k-nearest neighbors classifier is indeed a more powerful classifier than a multi-layered perceptron when used in few-shot learning. 2021.semeval-1.97 @@ -1195,7 +1195,7 @@ <fixed-case>G</fixed-case>loss<fixed-case>R</fixed-case>eader at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2021 Task 2: Reading Definitions Improves Contextualized Word Embeddings MaximRachinskiy - NikolayArefyev + NikolayArefyev 756–762 Consulting a dictionary or a glossary is a familiar way for many humans to figure out what does a word in a particular context mean. We hypothesize that a system that can select a proper definition for a particular word occurrence can also naturally solve tasks related to word senses. To verify this hypothesis we developed a solution for the Multilingual and Cross-lingual Word-in-Context (MCL-WiC) task, that does not use any of the shared task data or other WiC data for training. Instead, it is trained to embed word definitions from English WordNet and word occurrences in English texts into the same vector space following an approach previously proposed for Word Sense Disambiguation (WSD). To estimate the similarity in meaning of two word occurrences, we compared different metrics in this shared vector space and found that L1-distance between normalized contextualized word embeddings outperforms traditionally employed cosine similarity and several other metrics. To solve the task for languages other than English, we rely on zero-shot cross-lingual transfer capabilities of the multilingual XLM-R masked language model. Despite not using MCL-WiC training data, in the shared task our approach achieves an accuracy of 89.5% on the English test set, which is only 4% less than the best system. In the multilingual subtask zero-shot cross-lingual transfer shows competitive results, that are within 2% from the best systems for Russian, French, and Arabic. In the cross-lingual subtask are within 2-4% from the best systems. 2021.semeval-1.100 @@ -1227,7 +1227,7 @@ <fixed-case>LIORI</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2021 Task 2: Span Prediction and Binary Classification approaches to Word-in-Context Disambiguation AdisDavletov - NikolayArefyev + NikolayArefyev DenisGordeev AlexeyRey 780–786 @@ -1241,8 +1241,8 @@ CiprianBodnar AndradaTapuc CosminPintilie - DanielaGifu - DianaTrandabat + DanielaGifu + DianaTrandabat 787–792 This paper presents a word-in-context disambiguation system. The task focuses on capturing the polysemous nature of words in a multilingual and cross-lingual setting, without considering a strict inventory of word meanings. The system applies Natural Language Processing algorithms on datasets from SemEval 2021 Task 2, being able to identify the meaning of words for the languages Arabic, Chinese, English, French and Russian, without making use of any additional mono- or multilingual resources. 2021.semeval-1.104 @@ -1265,7 +1265,7 @@ <fixed-case>U</fixed-case>o<fixed-case>R</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2021 Task 4: Using Pre-trained <fixed-case>BERT</fixed-case> Token Embeddings for Question Answering of Abstract Meaning ThanetMarkchom - HuizhiLiang + HuizhiLiang 799–804 Most question answering tasks focuses on predicting concrete answers, e.g., named entities. These tasks can be normally achieved by understanding the contexts without additional information required. In Reading Comprehension of Abstract Meaning (ReCAM) task, the abstract answers are introduced. To understand abstract meanings in the context, additional knowledge is essential. In this paper, we propose an approach that leverages the pre-trained BERT Token embeddings as a prior knowledge resource. According to the results, our approach using the pre-trained BERT outperformed the baselines. It shows that the pre-trained BERT token embeddings can be used as additional knowledge for understanding abstract meanings in question answering. 2021.semeval-1.106 @@ -1395,7 +1395,7 @@ <fixed-case>HITMI</fixed-case>&<fixed-case>T</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2021 Task 5: Integrating Transformer and <fixed-case>CRF</fixed-case> for Toxic Spans Detection ChenyiWang TianshuLiu - TiejunZhao + TiejunZhao 870–874 This paper introduces our system at SemEval-2021 Task 5: Toxic Spans Detection. The task aims to accurately locate toxic spans within a text. Using BIO tagging scheme, we model the task as a token-level sequence labeling task. Our system uses a single model built on the model of multi-layer bidirectional transformer encoder. And we introduce conditional random field (CRF) to make the model learn the constraints between tags. We use ERNIE as pre-trained model, which is more suitable for the task accroding to our experiments. In addition, we use adversarial training with the fast gradient method (FGM) to improve the robustness of the system. Our system obtains 69.85% F1 score, ranking 3rd for the official evaluation. 2021.semeval-1.117 @@ -1590,8 +1590,8 @@ <fixed-case>SINAI</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2021 Task 5: Combining Embeddings in a <fixed-case>B</fixed-case>i<fixed-case>LSTM</fixed-case>-<fixed-case>CRF</fixed-case> model for Toxic Spans Detection Flor MiriamPlaza-del-Arco PilarLópez-Úbeda - L. AlfonsoUreña-López - M. TeresaMartín-Valdivia + L. AlfonsoUreña-López + M. TeresaMartín-Valdivia 984–989 This paper describes the participation of SINAI team at Task 5: Toxic Spans Detection which consists of identifying spans that make a text toxic. Although several resources and systems have been developed so far in the context of offensive language, both annotation and tasks have mainly focused on classifying whether a text is offensive or not. However, detecting toxic spans is crucial to identify why a text is toxic and can assist human moderators to locate this type of content on social media. In order to accomplish the task, we follow a deep learning-based approach using a Bidirectional variant of a Long Short Term Memory network along with a stacked Conditional Random Field decoding layer (BiLSTM-CRF). Specifically, we test the performance of the combination of different pre-trained word embeddings for recognizing toxic entities in text. The results show that the combination of word embeddings helps in detecting offensive content. Our team ranks 29th out of 91 participants. 2021.semeval-1.134 @@ -1647,7 +1647,7 @@ <fixed-case>LIIR</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2021 task 6: Detection of Persuasion Techniques In Texts and Images using <fixed-case>CLIP</fixed-case> features ErfanGhadery DamienSileo - Marie-FrancineMoens + Marie-FrancineMoens 1015–1019 We describe our approach for SemEval-2021 task 6 on detection of persuasion techniques in multimodal content (memes). Our system combines pretrained multimodal models (CLIP) and chained classifiers. Also, we propose to enrich the data by a data augmentation technique. Our submission achieves a rank of 8/16 in terms of F1-micro and 9/16 with F1-macro on the test set. 2021.semeval-1.139 @@ -1766,7 +1766,7 @@ <fixed-case>M</fixed-case>in<fixed-case>D</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2021 Task 6: Propaganda Detection using Transfer Learning and Multimodal Fusion - JunfengTian + JunfengTian MinGui ChenliangLi MingYan @@ -1947,7 +1947,7 @@ <fixed-case>U</fixed-case>o<fixed-case>R</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2021 Task 7: Utilizing Pre-trained <fixed-case>D</fixed-case>istil<fixed-case>BERT</fixed-case> Model and Multi-scale <fixed-case>CNN</fixed-case> for Humor Detection ZehaoLiu CarlHaines - HuizhiLiang + HuizhiLiang 1179–1184 Humour detection is an interesting but difficult task in NLP. Because humorous might not be obvious in text, it can be embedded into context, hide behind the literal meaning and require prior knowledge to understand. We explored different shallow and deep methods to create a humour detection classifier for task 7-1a. Models like Logistic Regression, LSTM, MLP, CNN were used, and pre-trained models like DistilBert were introduced to generate accurate vector representation for textual data. We focused on applying multi-scale strategy on modelling, and compared different models. Our best model is the DistilBERT+MultiScale CNN, it used different sizes of CNN kernel to get multiple scales of features, which achieved 93.7% F1-score and 92.1% accuracy on the test set. 2021.semeval-1.166 @@ -2037,7 +2037,7 @@ <fixed-case>FII</fixed-case> <fixed-case>FUNNY</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2021 Task 7: <fixed-case>H</fixed-case>a<fixed-case>H</fixed-case>ackathon: Detecting and rating Humor and Offense MihaiSamson - DanielaGifu + DanielaGifu 1226–1231 The “HaHackathon: Detecting and Rating Humor and Offense” task at the SemEval 2021 competition focuses on detecting and rating the humor level in sentences, as well as the level of offensiveness contained in these texts with humoristic tones. In this paper, we present an approach based on recent Deep Learning techniques by both trying to train the models based on the dataset solely and by trying to fine-tune pre-trained models on the gigantic corpus. 2021.semeval-1.174 @@ -2086,7 +2086,7 @@ <fixed-case>LIORI</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2021 Task 8: Ask Transformer for measurements AdisDavletov DenisGordeev - NikolayArefyev + NikolayArefyev EmilDavletov 1249–1254 This work describes our approach for subtasks of SemEval-2021 Task 8: MeasEval: Counts and Measurements which took the official first place in the competition. To solve all subtasks we use multi-task learning in a question-answering-like manner. We also use learnable scalar weights to weight subtasks’ contribution to the final loss in multi-task training. We fine-tune LUKE to extract quantity spans and we fine-tune RoBERTa to extract everything related to found quantities, including quantities themselves. @@ -2113,7 +2113,7 @@ <fixed-case>V</fixed-case>olta at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2021 Task 9: Statement Verification and Evidence Finding with Tables using <fixed-case>TAPAS</fixed-case> and Transfer Learning DevanshGautam KshitijGupta - ManishShrivastava + ManishShrivastava 1262–1270 Tables are widely used in various kinds of documents to present information concisely. Understanding tables is a challenging problem that requires an understanding of language and table structure, along with numerical and logical reasoning. In this paper, we present our systems to solve Task 9 of SemEval-2021: Statement Verification and Evidence Finding with Tables (SEM-TAB-FACTS). The task consists of two subtasks: (A) Given a table and a statement, predicting whether the table supports the statement and (B) Predicting which cells in the table provide evidence for/against the statement. We fine-tune TAPAS (a model which extends BERT’s architecture to capture tabular structure) for both the subtasks as it has shown state-of-the-art performance in various table understanding tasks. In subtask A, we evaluate how transfer learning and standardizing tables to have a single header row improves TAPAS’ performance. In subtask B, we evaluate how different fine-tuning strategies can improve TAPAS’ performance. Our systems achieve an F1 score of 67.34 in subtask A three-way classification, 72.89 in subtask A two-way classification, and 62.95 in subtask B. 2021.semeval-1.180 @@ -2182,7 +2182,7 @@ <fixed-case>UOR</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2021 Task 12: On Crowd Annotations; Learning with Disagreements to optimise crowd truth EmmanuelOsei-Brefo ThanetMarkchom - HuizhiLiang + HuizhiLiang 1303–1309 Crowdsourcing has been ubiquitously used for annotating enormous collections of data. However, the major obstacles to using crowd-sourced labels are noise and errors from non-expert annotations. In this work, two approaches dealing with the noise and errors in crowd-sourced labels are proposed. The first approach uses Sharpness-Aware Minimization (SAM), an optimization technique robust to noisy labels. The other approach leverages a neural network layer called softmax-Crowdlayer specifically designed to learn from crowd-sourced annotations. According to the results, the proposed approaches can improve the performance of the Wide Residual Network model and Multi-layer Perception model applied on crowd-sourced datasets in the image processing domain. It also has similar and comparable results with the majority voting technique when applied to the sequential data domain whereby the Bidirectional Encoder Representations from Transformers (BERT) is used as the base model in both instances. 2021.semeval-1.186 diff --git a/data/xml/2021.semspace.xml b/data/xml/2021.semspace.xml index d9ec0ee16e..e73429be4d 100644 --- a/data/xml/2021.semspace.xml +++ b/data/xml/2021.semspace.xml @@ -4,7 +4,7 @@ Proceedings of the 2021 Workshop on Semantic Spaces at the Intersection of NLP, Physics, and Cognitive Science (SemSpace) MarthaLewis - MehrnooshSadrzadeh + MehrnooshSadrzadeh Association for Computational Linguistics
Groningen, The Netherlands
June @@ -86,7 +86,7 @@ Should Semantic Vector Composition be Explicit? Can it be Linear? DominicWiddows KristenHowell - TrevorCohen + TrevorCohen 76–86 Vector representations have become a central element in semantic language modelling, leading to mathematical overlaps with many fields including quantum theory. Compositionality is a core goal for such representations: given representations for ‘wet’ and ‘fish’, how should the concept ‘wet fish’ be represented? This position paper surveys this question from two points of view. The first considers the question of whether an explicit mathematical representation can be successful using only tools from within linear algebra, or whether other mathematical tools are needed. The second considers whether semantic vector composition should be explicitly described mathematically, or whether it can be a model-internal side-effect of training a neural network. A third and newer question is whether a compositional model can be implemented on a quantum computer. Given the fundamentally linear nature of quantum mechanics, we propose that these questions are related, and that this survey may help to highlight candidate operations for future quantum implementation. 2021.semspace-1.8 diff --git a/data/xml/2021.sigdial.xml b/data/xml/2021.sigdial.xml index 407e0c92e8..bcebf3f53f 100644 --- a/data/xml/2021.sigdial.xml +++ b/data/xml/2021.sigdial.xml @@ -4,7 +4,7 @@ Proceedings of the 22nd Annual Meeting of the Special Interest Group on Discourse and Dialogue HaizhouLi - Gina-AnneLevow + Gina-AnneLevow ZhouYu ChitralekhaGupta BerrakSisman @@ -27,7 +27,7 @@ Understanding and predicting user dissatisfaction in a neural generative chatbot AbigailSee - ChristopherManning + ChristopherManning 1–12 Neural generative dialogue agents have shown an increasing ability to hold short chitchat conversations, when evaluated by crowdworkers in controlled settings. However, their performance in real-life deployment – talking to intrinsically-motivated users in noisy environments – is less well-explored. In this paper, we perform a detailed case study of a neural generative model deployed as part of Chirpy Cardinal, an Alexa Prize socialbot. We find that unclear user utterances are a major source of generative errors such as ignoring, hallucination, unclearness and repetition. However, even in unambiguous contexts the model frequently makes reasoning errors. Though users express dissatisfaction in correlation with these errors, certain dissatisfaction types (such as offensiveness and privacy objections) depend on additional factors – such as the user’s personal attitudes, and prior unaddressed dissatisfaction in the conversation. Finally, we show that dissatisfied user utterances can be used as a semi-supervised learning signal to improve the dialogue system. We train a model to predict next-turn dissatisfaction, and show through human evaluation that as a ranking function, it selects higher-quality neural-generated utterances. 2021.sigdial-1.1 @@ -37,7 +37,7 @@ Towards Continuous Estimation of Dissatisfaction in Spoken Dialog - NigelWard + NigelWard Jonathan E.Avila Aaron M.Alarcon 13–20 @@ -62,7 +62,7 @@ Individual Interaction Styles: Evidence from a Spoken Chat Corpus - NigelWard + NigelWard 27–31 here is increasing interest in modeling style choices in dialog, for example for enabling dialog systems to adapt to their users. It is commonly assumed that each user has his or her own stable characteristics, but for interaction style the truth of this assumption has not been well examined. I investigated using a vector-space model of interaction styles, derived from the Switchboard corpus of telephone conversations and a broad set of prosodic-behavior features. While most individuals exhibited interaction style tendencies, these were generally far from stable, with a predictive model based on individual tendencies outperforming a speaker-independent model by only 3.6%. The tendencies were somewhat stronger for some speakers, including generally males, and for some dimensions of variation. 2021.sigdial-1.4 @@ -73,7 +73,7 @@ Evaluation of In-Person Counseling Strategies To Develop Physical Activity Chatbot for Women Kai-HuiLiang - PatrickLange + PatrickLange Yoo JungOh JingwenZhang YoshimiFukuoka @@ -120,7 +120,7 @@ PinarDonmez VikasBhardwaj AnujKumar - MichaelWhite + MichaelWhite 66–76 In this paper, we study the utilization of pre-trained language models to enable few-shotNatural Language Generation (NLG) in task-oriented dialog systems. We introduce a system consisting of iterative self-training and an extensible mini-template framework that textualizes the structured input data into semi-natural text to fully take advantage of pre-trained language models. We compare var-ious representations of NLG models’ input and output and show that transforming the input and output to be similar to what the language model has seen before during pre-training improves the model’s few-shot performance substantially. We show that neural mod-els can be trained with as few as 300 annotated examples while providing high fidelity, considerably lowering the resource requirements for standing up a new domain or language. This level of data efficiency removes the need for crowd-sourced data collection resulting in higher quality data annotated by expert linguists. In addition, model maintenance and debugging processes will improve in this few-shot setting. Finally, we explore distillation and using a caching system to satisfy latency requirements of real-world systems. 2021.sigdial-1.8 @@ -144,7 +144,7 @@ Integrated taxonomy of errors in chat-oriented dialogue systems RyuichiroHigashinaka - MasahiroAraki + MasahiroAraki HiroshiTsukahara MasahiroMizukami 89–98 @@ -158,7 +158,7 @@ Effective Social Chatbot Strategies for Increasing User Initiative AmeliaHardy AshwinParanjape - ChristopherManning + ChristopherManning 99–110 Many existing chatbots do not effectively support mixed initiative, forcing their users to either respond passively or lead constantly. We seek to improve this experience by introducing new mechanisms to encourage user initiative in social chatbot conversations. Since user initiative in this setting is distinct from initiative in human-human or task-oriented dialogue, we first propose a new definition that accounts for the unique behaviors users take in this context. Drawing from linguistics, we propose three mechanisms to promote user initiative: back-channeling, personal disclosure, and replacing questions with statements. We show that simple automatic metrics of utterance length, number of noun phrases, and diversity of user responses correlate with human judgement of initiative. Finally, we use these metrics to suggest that these strategies do result in statistically significant increases in user initiative, where frequent, but not excessive, back-channeling is the most effective strategy. 2021.sigdial-1.11 @@ -168,12 +168,12 @@ Generative Conversational Networks - AlexandrosPapangelis + AlexandrosPapangelis KarthikGopalakrishnan AishwaryaPadmakumar SeokhwanKim - GokhanTur - DilekHakkani-Tur + GokhanTur + DilekHakkani-Tur 111–120 Inspired by recent work in meta-learning and generative teaching networks, we propose a framework called Generative Conversational Networks, in which conversational agents learn to generate their own labelled training data (given some seed data) and then train themselves from that data to perform a given task. We use reinforcement learning to optimize the data generation process where the reward signal is the agent’s performance on the task. The task can be any language-related task, from intent detection to full task-oriented conversations. In this work, we show that our approach is able to generalise from seed data and performs well in limited data and limited computation settings, with significant gains for intent detection and slot tagging across multiple datasets: ATIS, TOD, SNIPS, and Restaurants8k. We show an average improvement of 35% in intent detection and 21% in slot tagging over a baseline model trained from the seed data. We also conduct an analysis of the novelty of the generated data and provide generated examples for intent detection, slot tagging, and non-goal oriented conversations. 2021.sigdial-1.12 @@ -190,7 +190,7 @@ JayPujara XiangRen YangLiu - DilekHakkani-Tur + DilekHakkani-Tur 121–132 Smooth and effective communication requires the ability to perform latent or explicit commonsense inference. Prior commonsense reasoning benchmarks (such as SocialIQA and CommonsenseQA) mainly focus on the discriminative task of choosing the right answer from a set of candidates, and do not involve interactive language generation as in dialogue. Moreover, existing dialogue datasets do not explicitly focus on exhibiting commonsense as a facet. In this paper, we present an empirical study of commonsense in dialogue response generation. We first auto-extract commonsensical dialogues from existing dialogue datasets by leveraging ConceptNet, a commonsense knowledge graph. Furthermore, building on social contexts/situations in SocialIQA, we collect a new dialogue dataset with 25K dialogues aimed at exhibiting social commonsense in an interactive setting. We evaluate response generation models trained using these datasets and find that models trained on both extracted and our collected data produce responses that consistently exhibit more commonsense than baselines. Finally we propose an approach for automatic evaluation of commonsense that relies on features derived from ConceptNet and pre-trained language and dialog models, and show reasonable correlation with human evaluation of responses’ commonsense quality. 2021.sigdial-1.13 @@ -203,7 +203,7 @@ TahaAksu ZhengyuanLiu Min-YenKan - NancyChen + NancyChen 133–143 We introduce a synthetic dialogue generation framework, Velocidapter, which addresses the corpus availability problem for dialogue comprehension. Velocidapter augments datasets by simulating synthetic conversations for a task-oriented dialogue domain, requiring a small amount of bootstrapping work for each new domain. We evaluate the efficacy of our framework on a task-oriented dialogue comprehension dataset, MRCWOZ, which we curate by annotating questions for slots in the restaurant, taxi, and hotel domains of the MultiWOZ 2.2 dataset (Zang et al., 2020). We run experiments within a low-resource setting, where we pretrain a model on SQuAD, fine-tuning it on either a small original data or on the synthetic data generated by our framework. Velocidapter shows significant improvements using both the transformer-based BERTBase and BiDAF as base models. We further show that the framework is easy to use by novice users and conclude that Velocidapter can greatly help training over task-oriented dialogues, especially for low-resourced emerging domains. 2021.sigdial-1.14 @@ -214,7 +214,7 @@ An Analysis of State-of-the-Art Models for Situated Interactive <fixed-case>M</fixed-case>ulti<fixed-case>M</fixed-case>odal Conversations (<fixed-case>SIMMC</fixed-case>) SatwikKottur - PaulCrook + PaulCrook SeungwhanMoon AhmadBeirami EunjoonCho @@ -230,7 +230,7 @@ A Simple yet Effective Method for Sentence Ordering AiliShen - TimothyBaldwin + TimothyBaldwin 154–160 Sentence ordering is the task of arranging a given bag of sentences so as to maximise the coherence of the overall text. In this work, we propose a simple yet effective training method that improves the capacity of models to capture overall text coherence based on training over pairs of sentences/segments. Experimental results show the superiority of our proposed method in in- and cross-domain settings. The utility of our method is also verified over a multi-document summarisation task. 2021.sigdial-1.16 @@ -243,7 +243,7 @@ RachnaKonigari SaurabhRamola Vijay VardhanAlluri - ManishShrivastava + ManishShrivastava 161–166 Topic diversion occurs frequently with engaging open-domain dialogue systems like virtual assistants. The balance between staying on topic and rectifying the topic drift is important for a good collaborative system. In this paper, we present a model which uses a fine-tuned XLNet-base to classify the utterances pertaining to the major topic of conversation and those which are not, with a precision of 84%. We propose a preliminary study, classifying utterances into major, minor and off-topics, which further extends into a system initiative for diversion rectification. A case study was conducted where a system initiative is emulated as a response to the user going off-topic, mimicking a common occurrence of mixed initiative present in natural human-human conversation. This task of classifying utterances into those which belong to the major theme or not, would also help us in identification of relevant sentences for tasks like dialogue summarization and information extraction from conversations. 2021.sigdial-1.17 @@ -317,7 +317,7 @@ Hi-<fixed-case>DST</fixed-case>: A Hierarchical Approach for Scalable and Extensible Dialogue State Tracking SuvodipDey - Maunendra SankarDesarkar + Maunendra SankarDesarkar 218–227 Dialogue State Tracking (DST) is a sub-task of task-based dialogue systems where the user intention is tracked through a set of (domain, slot, slot-value) triplets. Existing DST models can be difficult to extend for new datasets with larger domains/slots mainly due to either of the two reasons- i) prediction of domain-slot as a pair, and ii) dependency of model parameters on the number of slots and domains. In this work, we propose to address these issues using a Hierarchical DST (Hi-DST) model. At a given turn, the model first detects a change in domain followed by domain prediction if required. Then it decides suitable action for each slot in the predicted domains and finds their value accordingly. The model parameters of Hi-DST are independent of the number of domains/slots. Due to the hierarchical modeling, it achieves O(|M|+|N|) belief state prediction for a single turn where M and N are the set of unique domains and slots respectively. We argue that the hierarchical structure helps in the model explainability and makes it easily extensible to new datasets. Experiments on the MultiWOZ dataset show that our proposed model achieves comparable joint accuracy performance to state-of-the-art DST models. 2021.sigdial-1.23 @@ -343,7 +343,7 @@ Recent Neural Methods on Dialogue State Tracking for Task-Oriented Dialogue Systems: A Survey VevakeBalaraman SeyedmostafaSheikhalishahi - BernardoMagnini + BernardoMagnini 239–251 This paper aims at providing a comprehensive overview of recent developments in dialogue state tracking (DST) for task-oriented conversational systems. We introduce the task, the main datasets that have been exploited as well as their evaluation metrics, and we analyze several proposed approaches. We distinguish between static ontology DST models, which predict a fixed set of dialogue states, and dynamic ontology models, which can predict dialogue states even when the ontology changes. We also discuss the model’s ability to track either single or multiple domains and to scale to new domains, both in terms of knowledge transfer and zero-shot learning. We cover a period from 2013 to 2020, showing a significant increase of multiple domain methods, most of them utilizing pre-trained language models. 2021.sigdial-1.25 @@ -367,7 +367,7 @@ <fixed-case>ERICA</fixed-case>: An Empathetic Android Companion for Covid-19 Quarantine EtsukoIshii - Genta IndraWinata + Genta IndraWinata SamuelCahyawijaya DiveshLala TatsuyaKawahara @@ -455,7 +455,7 @@ PengfeiHong SiqiShen NavonilMajumder - RadaMihalcea + RadaMihalcea SoujanyaPoria 301–313 Commonsense inference to understand and explain human language is a fundamental research problem in natural language processing. Explaining human conversations poses a great challenge as it requires contextual understanding, planning, inference, and several aspects of reasoning including causal, temporal, and commonsense reasoning. In this work, we introduce CIDER – a manually curated dataset that contains dyadic dialogue explanations in the form of implicit and explicit knowledge triplets inferred using contextual commonsense inference. Extracting such rich explanations from conversations can be conducive to improving several downstream applications. The annotated triplets are categorized by the type of commonsense knowledge present (e.g., causal, conditional, temporal). We set up three different tasks conditioned on the annotated dataset: Dialogue-level Natural Language Inference, Span Extraction, and Multi-choice Span Selection. Baseline results obtained with transformer-based models reveal that the tasks are difficult, paving the way for promising future research. The dataset and the baseline implementations are publicly available at https://github.com/declare-lab/CIDER. @@ -580,9 +580,9 @@ Diversity as a By-Product: Goal-oriented Language Generation Leads to Linguistic Variation - SimeonSchüz + SimeonSchüz TingHan - SinaZarrieß + SinaZarrieß 411–422 The ability for variation in language use is necessary for speakers to achieve their conversational goals, for instance when referring to objects in visual environments. We argue that diversity should not be modelled as an independent objective in dialogue, but should rather be a result or by-product of goal-oriented language generation. Different lines of work in neural language generation investigated decoding methods for generating more diverse utterances, or increasing the informativity through pragmatic reasoning. We connect those lines of work and analyze how pragmatic reasoning during decoding affects the diversity of generated image captions. We find that boosting diversity itself does not result in more pragmatically informative captions, but pragmatic reasoning does increase lexical diversity. Finally, we discuss whether the gain in informativity is achieved in linguistically plausible ways. 2021.sigdial-1.43 @@ -636,7 +636,7 @@ ChristianGeishauser MichaelHeck ShutongFeng - MilicaGasic + MilicaGasic 445–456 Dialogue policy optimisation via reinforcement learning requires a large number of training interactions, which makes learning with real users time consuming and expensive. Many set-ups therefore rely on a user simulator instead of humans. These user simulators have their own problems. While hand-coded, rule-based user simulators have been shown to be sufficient in small, simple domains, for complex domains the number of rules quickly becomes intractable. State-of-the-art data-driven user simulators, on the other hand, are still domain-dependent. This means that adaptation to each new domain requires redesigning and retraining. In this work, we propose a domain-independent transformer-based user simulator (TUS). The structure of TUS is not tied to a specific domain, enabling domain generalization and the learning of cross-domain user behaviour from data. We compare TUS with the state-of-the-art using automatic as well as human evaluations. TUS can compete with rule-based user simulators on pre-defined domains and is able to generalize to unseen domains in a zero-shot fashion. 2021.sigdial-1.47 @@ -706,7 +706,7 @@ Coreference-Aware Dialogue Summarization ZhengyuanLiu KeShi - NancyChen + NancyChen 509–519 Summarizing conversations via neural approaches has been gaining research traction lately, yet it is still challenging to obtain practical solutions. Examples of such challenges include unstructured information exchange in dialogues, informal interactions between speakers, and dynamic role changes of speakers as the dialogue evolves. Many of such challenges result in complex coreference links. Therefore, in this work, we investigate different approaches to explicitly incorporate coreference information in neural abstractive dialogue summarization models to tackle the aforementioned challenges. Experimental results show that the proposed approaches achieve state-of-the-art performance, implying it is useful to utilize coreference information in dialogue summarization. Evaluation results on factual correctness suggest such coreference-aware models are better at tracing the information flow among interlocutors and associating accurate status/actions with the corresponding interlocutors and person mentions. 2021.sigdial-1.53 @@ -730,7 +730,7 @@ Incremental temporal summarization in multi-party meetings - RameshManuvinakurike + RameshManuvinakurike SauravSahay WendaChen LamaNachman @@ -743,9 +743,9 @@ Mitigating Topic Bias when Detecting Decisions in Dialogue - Vanja MladenKaran + Vanja MladenKaran PrashantKhare - PatrickHealey + PatrickHealey MatthewPurver 542–547 This work revisits the task of detecting decision-related utterances in multi-party dialogue. We explore performance of a traditional approach and a deep learning-based approach based on transformer language models, with the latter providing modest improvements. We then analyze topic bias in the models using topic information obtained by manual annotation. Our finding is that when detecting some types of decisions in our data, models rely more on topic specific words that decisions are about rather than on words that more generally indicate decision making. We further explore this by removing topic information from the train data. We show that this resolves the bias issues to an extent and, surprisingly, sometimes even boosts performance. @@ -772,7 +772,7 @@ Large-Scale Quantitative Evaluation of Dialogue Agents’ Response Strategies against Offensive Users HaojunLi DilaraSoylu - ChristopherManning + ChristopherManning 556–561 As voice assistants and dialogue agents grow in popularity, so does the abuse they receive. We conducted a large-scale quantitative evaluation of the effectiveness of 4 response types (avoidance, why, empathetic, and counter), and 2 additional factors (using a redirect or a voluntarily provided name) that have not been tested by prior work. We measured their direct effectiveness on real users in-the-wild by the re-offense ratio, length of conversation after the initial response, and number of turns until the next re-offense. Our experiments confirm prior lab studies in showing that empathetic responses perform better than generic avoidance responses as well as counter responses. We show that dialogue agents should almost always guide offensive users to a new topic through the use of redirects and use the user’s name if provided. As compared to a baseline avoidance strategy employed by commercial agents, our best strategy is able to reduce the re-offense ratio from 92% to 43%. 2021.sigdial-1.58 diff --git a/data/xml/2021.sigmorphon.xml b/data/xml/2021.sigmorphon.xml index 8327bc0466..f0224269e2 100644 --- a/data/xml/2021.sigmorphon.xml +++ b/data/xml/2021.sigmorphon.xml @@ -90,7 +90,7 @@ SaujasVaduguru AalokSathe MonojitChoudhury - DiptiSharma + DiptiSharma 60–71 Neural models excel at extracting statistical patterns from large amounts of data, but struggle to learn patterns or reason about language from only a few examples. In this paper, we ask: Can we learn explicit rules that generalize well from only a few examples? We explore this question using program synthesis. We develop a synthesis model to learn phonology rules as programs in a domain-specific language. We test the ability of our models to generalize from few training examples using our new dataset of problems from the Linguistics Olympiad, a challenging set of tasks that require strong linguistic reasoning ability. In addition to being highly sample-efficient, our approach generates human-readable programs, and allows control over the generalizability of the learnt programs. 2021.sigmorphon-1.7 @@ -101,13 +101,13 @@ Findings of the <fixed-case>SIGMORPHON</fixed-case> 2021 Shared Task on Unsupervised Morphological Paradigm Clustering AdamWiemerslage - Arya D.McCarthy + Arya D.McCarthy AlexanderErdmann GarrettNicolai ManexAgirrezabal - MiikkaSilfverberg + MiikkaSilfverberg MansHulden - KatharinaKann + KatharinaKann 72–81 We describe the second SIGMORPHON shared task on unsupervised morphology: the goal of the SIGMORPHON 2021 Shared Task on Unsupervised Morphological Paradigm Clustering is to cluster word types from a raw text corpus into paradigms. To this end, we release corpora for 5 development and 9 test languages, as well as gold partial paradigms for evaluation. We receive 14 submissions from 4 teams that follow different strategies, and the best performing system is based on adaptor grammars. Results vary significantly across languages. However, all systems are outperformed by a supervised lemmatizer, implying that there is still room for improvement. 2021.sigmorphon-1.8 @@ -117,7 +117,7 @@ <fixed-case>A</fixed-case>daptor <fixed-case>G</fixed-case>rammars for Unsupervised Paradigm Clustering KateMcCurdy - SharonGoldwater + SharonGoldwater AdamLopez 82–89 This work describes the Edinburgh submission to the SIGMORPHON 2021 Shared Task 2 on unsupervised morphological paradigm clustering. Given raw text input, the task was to assign each token to a cluster with other tokens from the same paradigm. We use Adaptor Grammar segmentations combined with frequency-based heuristics to predict paradigm clusters. Our system achieved the highest average F1 score across 9 test languages, placing first out of 15 submissions. @@ -140,7 +140,7 @@ Unsupervised Paradigm Clustering Using Transformation Rules ChangbingYang GarrettNicolai - MiikkaSilfverberg + MiikkaSilfverberg 98–106 This paper describes the submission of the CU-UBC team for the SIGMORPHON 2021 Shared Task 2: Unsupervised morphological paradigm clustering. Our system generates paradigms using morphological transformation rules which are discovered from raw data. We experiment with two methods for discovering rules. Our first approach generates prefix and suffix transformations between similar strings. Secondly, we experiment with more general rules which can apply transformations inside the input strings in addition to prefix and suffix transformations. We find that the best overall performance is delivered by prefix and suffix rules but more general transformation rules perform better for languages with templatic morphology and very high morpheme-to-word ratios. 2021.sigmorphon-1.11 @@ -151,7 +151,7 @@ Paradigm Clustering with Weighted Edit Distance AndrewGerlach AdamWiemerslage - KatharinaKann + KatharinaKann 107–114 This paper describes our system for the SIGMORPHON 2021 Shared Task on Unsupervised Morphological Paradigm Clustering, which asks participants to group inflected forms together according their underlying lemma without the aid of annotated training data. We employ agglomerative clustering to group word forms together using a metric that combines an orthographic distance and a semantic distance from word embeddings. We experiment with two variations of an edit distance-based model for quantifying orthographic distance, but, due to time constraints, our system does not improve over the shared task’s baseline system. 2021.sigmorphon-1.12 @@ -207,7 +207,7 @@ VagrantGautam Wang YauLi ZafarullahMahmood - FredMailhot + FredMailhot ShreekanthaNadig RiqiangWang NathanZhang @@ -264,7 +264,7 @@ An <fixed-case>FST</fixed-case> morphological analyzer for the Gitksan language ClarissaForbes GarrettNicolai - MiikkaSilfverberg + MiikkaSilfverberg 188–197 This paper presents a finite-state morphological analyzer for the Gitksan language. The analyzer draws from a 1250-token Eastern dialect wordlist. It is based on finite-state technology and additionally includes two extensions which can provide analyses for out-of-vocabulary words: rules for generating predictable dialect variants, and a neural guesser component. The pre-neural analyzer, tested against interlinear-annotated texts from multiple dialects, achieves coverage of (75-81%), and maintains high precision (95-100%). The neural extension improves coverage at the cost of lowered precision. 2021.sigmorphon-1.21 @@ -275,9 +275,9 @@ Comparative Error Analysis in Neural and Finite-state Models for Unsupervised Character-level Transduction MariaRyskina - EduardHovy + EduardHovy TaylorBerg-Kirkpatrick - Matthew R.Gormley + Matthew R.Gormley 198–211 Traditionally, character-level transduction problems have been solved with finite-state models designed to encode structural and linguistic knowledge of the underlying process, whereas recent approaches rely on the power and flexibility of sequence-to-sequence models with attention. Focusing on the less explored unsupervised learning scenario, we compare the two model classes side by side and find that they tend to make different types of errors even when achieving comparable performance. We analyze the distributions of different error classes using two unsupervised tasks as testbeds: converting informally romanized text into the native script of its language (for Russian, Arabic, and Kannada) and translating between a pair of closely related languages (Serbian and Bosnian). Finally, we investigate how combining finite-state and sequence-to-sequence models at decoding time affects the output quantitatively and qualitatively. 2021.sigmorphon-1.22 @@ -289,7 +289,7 @@ Finite-state Model of Shupamem Reduplication MagdalenaMarkowska JeffreyHeinz - OwenRambow + OwenRambow 212–221 Shupamem, a language of Western Cameroon, is a tonal language which also exhibits the morpho-phonological process of full reduplication. This creates two challenges for finite-state model of its morpho-syntax and morphophonology: how to manage the full reduplication and the autosegmental nature of lexical tone. Dolatian and Heinz (2020) explain how 2-way finite-state transducers can model full reduplication without an exponential increase in states, and finite-state transducers with multiple tapes have been used to model autosegmental tiers, including tone (Wiebe, 1992; Dolatian and Rawski, 2020a). Here we synthesize 2-way finite-state transducers and multitape transducers, resulting in a finite-state formalism that subsumes both, to account for the full reduplicative processes in Shupamem which also affect tone. 2021.sigmorphon-1.23 @@ -314,7 +314,7 @@ <fixed-case>SIGMORPHON</fixed-case> 2021 Shared Task on Morphological Reinflection: Generalization Across Languages TiagoPimentel MariaRyskina - Sabrina J.Mielke + Sabrina J.Mielke ShijieWu EleanorChodroff BrianLeonard @@ -327,7 +327,7 @@ MichaelGasser WilliamLane MattColer - ArturoOncevay + ArturoOncevay Jaime RafaelMontoya Samame Gema CelesteSilva Villegas AdamEk @@ -346,7 +346,7 @@ AelitaSalchak ChristopherStraughn ZoeyLiu - Jonathan NorthWashington + Jonathan NorthWashington DuyguAtaman WitoldKieraś MarcinWoliński @@ -354,11 +354,11 @@ NiklasStoehr ZahrohNuriah ShyamRatan - Francis M.Tyers + Francis M.Tyers Edoardo M.Ponti GrantAiton Richard J.Hatcher - EmilyPrud’hommeaux + EmilyPrud’hommeaux RiteshKumar MansHulden BotondBarta diff --git a/data/xml/2021.sigtyp.xml b/data/xml/2021.sigtyp.xml index 82b3a5d070..d4ff8939c5 100644 --- a/data/xml/2021.sigtyp.xml +++ b/data/xml/2021.sigtyp.xml @@ -5,7 +5,7 @@ Proceedings of the Third Workshop on Computational Typology and Multilingual NLP EkaterinaVylomova ElizabethSalesky - SabrinaMielke + SabrinaMielke GabriellaLapesa RiteshKumar HaraldHammarström @@ -77,9 +77,9 @@ <fixed-case>F</fixed-case>rame<fixed-case>N</fixed-case>et and Typology - MichaelEllsworth - CollinBaker - Miriam R. L.Petruck + MichaelEllsworth + CollinBaker + Miriam R. L.Petruck 61–66 FrameNet and the Multilingual FrameNet project have produced multilingual semantic annotations of parallel texts that yield extremely fine-grained typological insights. Moreover, frame semantic annotation of a wide cross-section of languages would provide information on the limits of Frame Semantics (Fillmore 1982, Fillmore1985). Multilingual semantic annotation offers critical input for research on linguistic diversity and recurrent patterns in computational typology. Drawing on results from FrameNet annotation of parallel texts, this paper proposes frame semantic annotation as a new component to complement the state of the art in computational semantic typology. 2021.sigtyp-1.6 @@ -89,7 +89,7 @@ Family of Origin and Family of Choice: Massively Parallel Lexiconized Iterative Pretraining for Severely Low Resource Text-based Translation ZhongZhou - AlexanderWaibel + AlexanderWaibel 67–80 We translate a closed text that is known in advance into a severely low resource language by leveraging massive source parallelism. In other words, given a text in 124 source languages, we translate it into a severely low resource language using only ∼1,000 lines of low resource data without any external help. Firstly, we propose a systematic method to rank and choose source languages that are close to the low resource language. We call the linguistic definition of language family Family of Origin (FAMO), and we call the empirical definition of higher-ranked languages using our metrics Family of Choice (FAMC). Secondly, we build an Iteratively Pretrained Multilingual Order-preserving Lexiconized Transformer (IPML) to train on ∼1,000 lines (∼3.5%) of low resource data. In order to translate named entities well, we build a massive lexicon table for 2,939 Bible named entities in 124 source languages, and include many that occur once and covers more than 66 severely low resource languages. Moreover, we also build a novel method of combining translations from different source languages into one. Using English as a hypothetical low resource language, we get a +23.9 BLEU increase over a multilingual baseline, and a +10.3 BLEU increase over our asymmetric baseline in the Bible dataset. We get a 42.8 BLEU score for Portuguese-English translation on the medical EMEA dataset. We also have good results for a real severely low resource Mayan language, Eastern Pokomchi. 2021.sigtyp-1.7 diff --git a/data/xml/2021.smm4h.xml b/data/xml/2021.smm4h.xml index ce8520685b..272c23f773 100644 --- a/data/xml/2021.smm4h.xml +++ b/data/xml/2021.smm4h.xml @@ -13,12 +13,12 @@ Salvador LimaLopez IvanFlores KarenO'Connor - DavyWeissenbacher + DavyWeissenbacher ElenaTutubalina AbeedSarker Juan MBanda - MartinKrallinger - GracielaGonzalez-Hernandez + MartinKrallinger + GracielaGonzalez-Hernandez Association for Computational Linguistics
Mexico City, Mexico
June @@ -45,7 +45,7 @@ View Distillation with Unlabeled Data for Extracting Adverse Drug Effects from User-Generated Data PayamKarisani - Jinho D.Choi + Jinho D.Choi LiXiong 7–12 We present an algorithm based on multi-layer transformers for identifying Adverse Drug Reactions (ADR) in social media data. Our model relies on the properties of the problem and the characteristics of contextual word embeddings to extract two views from documents. Then a classifier is trained on each view to label a set of unlabeled documents to be used as an initializer for a new classifier in the other view. Finally, the initialized classifier in each view is further trained using the initial training examples. We evaluated our model in the largest publicly available ADR dataset. The experiments testify that our model significantly outperforms the transformer-based models pretrained on domain-specific data. @@ -159,7 +159,7 @@ <fixed-case>UACH</fixed-case>-<fixed-case>INAOE</fixed-case> at <fixed-case>SMM</fixed-case>4<fixed-case>H</fixed-case>: a <fixed-case>BERT</fixed-case> based approach for classification of <fixed-case>COVID</fixed-case>-19 <fixed-case>T</fixed-case>witter posts AlbertoValdes JesusLopez - ManuelMontes + ManuelMontes 65–68 This work describes the participation of the Universidad Autónoma de Chihuahua - Instituto Nacional de Astrofísica, Óptica y Electrónica team at the Social Media Mining for Health Applications (SMM4H) 2021 shared task. Our team participated in task 5 and 6, both focused on the automatic classification of Twitter posts related to COVID-19. Task 5 was oriented on solving a binary classification problem, trying to identify self-reporting tweets of potential cases of COVID-19. Task 6 objective was to classify tweets containing COVID-19 symptoms. For both tasks we used models based on bidirectional encoder representations from transformers (BERT). Our objective was to determine if a model pretrained on a corpus in the domain of interest can outperform one trained on a much larger general domain corpus. Our F1 results were encouraging, 0.77 and 0.95 for task 5 and 6 respectively, having achieved the highest score among all the participants in the latter. 2021.smm4h-1.10 @@ -222,7 +222,7 @@ <fixed-case>BERT</fixed-case> based Adverse Drug Effect Tweet Classification TanayKayastha PranjalGupta - PushpakBhattacharyya + PushpakBhattacharyya 88–90 This paper describes models developed for the Social Media Mining for Health (SMM4H) 2021 shared tasks. Our team participated in the first subtask that classifies tweets with Adverse Drug Effect (ADE) mentions. Our best performing model utilizes BERTweet followed by a single layer of BiLSTM. The system achieves an F-score of 0.45 on the test set without the use of any auxiliary resources such as Part-of-Speech tags, dependency tags, or knowledge from medical dictionaries. 2021.smm4h-1.15 @@ -231,7 +231,7 @@ A Joint Training Approach to Tweet Classification and Adverse Effect Extraction and Normalization for <fixed-case>SMM</fixed-case>4<fixed-case>H</fixed-case> 2021 - MohabElkaref + MohabElkaref LamieceHassan 91–94 In this work we describe our submissions to the Social Media Mining for Health (SMM4H) 2021 Shared Task. We investigated the effectiveness of a joint training approach to Task 1, specifically classification, extraction and normalization of Adverse Drug Effect (ADE) mentions in English tweets. Our approach performed well on the normalization task, achieving an above average f1 score of 24%, but less so on classification and extraction, with f1 scores of 22% and 37% respectively. Our experiments also showed that a larger dataset with more negative results led to stronger results than a smaller more balanced dataset, even when both datasets have the same positive examples. Finally we also submitted a tuned BERT model for Task 6: Classification of Covid-19 tweets containing symptoms, which achieved an above average f1 score of 96%. @@ -288,7 +288,7 @@ Lasige-<fixed-case>B</fixed-case>io<fixed-case>TM</fixed-case> at <fixed-case>P</fixed-case>rof<fixed-case>NER</fixed-case>: <fixed-case>B</fixed-case>i<fixed-case>LSTM</fixed-case>-<fixed-case>CRF</fixed-case> and contextual <fixed-case>S</fixed-case>panish embeddings for Named Entity Recognition and Tweet Binary Classification PedroRuas VitorAndrade - FranciscoCouto + FranciscoCouto 108–111 The paper describes the participation of the Lasige-BioTM team at sub-tracks A and B of ProfNER, which was based on: i) a BiLSTM-CRF model that leverages contextual and classical word embeddings to recognize and classify the mentions, and ii) on a rule-based module to classify tweets. In the Evaluation phase, our model achieved a F1-score of 0.917 (0,031 more than the median) in sub-track A and a F1-score of 0.727 (0,034 less than the median) in sub-track B. 2021.smm4h-1.21 @@ -310,7 +310,7 @@ <fixed-case>U</fixed-case>o<fixed-case>B</fixed-case> at <fixed-case>P</fixed-case>rof<fixed-case>NER</fixed-case> 2021: Data Augmentation for Classification Using Machine Translation Frances AdrianaLaureano De Leon HarishTayyar Madabushi - MarkLee + MarkLee 115–117 This paper describes the participation of the UoB-NLP team in the ProfNER-ST shared subtask 7a. The task was aimed at detecting the mention of professions in social media text. Our team experimented with two methods of improving the performance of pre-trained models: Specifically, we experimented with data augmentation through translation and the merging of multiple language inputs to meet the objective of the task. While the best performing model on the test data consisted of mBERT fine-tuned on augmented data using back-translation, the improvement is minor possibly because multi-lingual pre-trained models such as mBERT already have access to the kind of information provided through back-translation and bilingual data. 2021.smm4h-1.23 @@ -375,7 +375,7 @@ Classification of <fixed-case>COVID</fixed-case>19 tweets using Machine Learning Approaches AnupamMondal - SainikMahata + SainikMahata MonalisaDey DipankarDas 135–137 @@ -387,7 +387,7 @@ Fine-tuning <fixed-case>BERT</fixed-case> to classify <fixed-case>COVID</fixed-case>19 tweets containing symptoms RajarshiRoychoudhury - SudipNaskar + SudipNaskar 138–140 Twitter is a valuable source of patient-generated data that has been used in various population health studies. The first step in many of these studies is to identify and capture Twitter messages (tweets) containing medication mentions. Identifying personal mentions of COVID19 symptoms requires distinguishing personal mentions from other mentions such as symptoms reported by others and references to news articles or other sources. In this article, we describe our submission to Task 6 of the Social Media Mining for Health Applications (SMM4H) Shared Task 2021. This task challenged participants to classify tweets where the target classes are:(1) self-reports,(2) non-personal reports, and (3) literature/news mentions. Our system used a handcrafted preprocessing and word embeddings from BERT encoder model. We achieved an F1 score of 93% 2021.smm4h-1.30 @@ -399,8 +399,8 @@ AlbertoMesa Murgado AnaParras Portillo PilarLópez Úbeda - MaiteMartin - AlfonsoUreña-López + MaiteMartin + AlfonsoUreña-López 141–145 This paper describes the entry of the research group SINAI at SMM4H’s ProfNER task on the identification of professions and occupations in social media related with health. Specifically we have participated in Task 7a: Tweet Binary Classification to determine whether a tweet contains mentions of occupations or not, as well as in Task 7b: NER Offset Detection and Classification aimed at predicting occupations mentions and classify them discriminating by professions and working statuses. 2021.smm4h-1.31 @@ -420,11 +420,11 @@ <fixed-case>ULD</fixed-case>-<fixed-case>NUIG</fixed-case> at Social Media Mining for Health Applications (#<fixed-case>SMM</fixed-case>4<fixed-case>H</fixed-case>) Shared Task 2021 - Atul Kr.Ojha + Atul Kr.Ojha PriyaRani KoustavaGoswami Bharathi RajaChakravarthi - John P.McCrae + John P.McCrae 149–152 Social media platforms such as Twitter and Facebook have been utilised for various research studies, from the cohort-level discussion to community-driven approaches to address the challenges in utilizing social media data for health, clinical and biomedical information. Detection of medical jargon’s, named entity recognition, multi-word expression becomes the primary, fundamental steps in solving those challenges. In this paper, we enumerate the ULD-NUIG team’s system, designed as part of Social Media Mining for Health Applications (#SMM4H) Shared Task 2021. The team conducted a series of experiments to explore the challenges of task 6 and task 5. The submitted systems achieve F-1 0.84 and 0.53 score for task 6 and 5 respectively. 2021.smm4h-1.33 diff --git a/data/xml/2021.smp.xml b/data/xml/2021.smp.xml index 4b533fe6a1..6377499416 100644 --- a/data/xml/2021.smp.xml +++ b/data/xml/2021.smp.xml @@ -28,8 +28,8 @@ Prosody Labelled Dataset for <fixed-case>H</fixed-case>indi EshaBanerjee - Atul Kr.Ojha - GirishJha + Atul Kr.Ojha + GirishJha 14–19 This study aims to develop an intonation labelled database for Hindi, for enhancing prosody in ASR and TTS systems, which is also helpful for building Speech to Speech Machine Translation systems. Although no single standard for prosody labelling exists in Hindi, researchers in the past have employed perceptual and statistical methods in literature to draw inferences about the behaviour of prosody patterns in Hindi. Based on such existing research and largely agreed upon intonational theories in Hindi, this study attempts to develop a manually annotated prosodic corpus of Hindi speech data, which can be used for training speech models for natural-sounding speech in the future. 500 sentences (2,550 words) for declarative and interrogative types have been labelled using Praat. 2021.smp-1.2 diff --git a/data/xml/2021.socialnlp.xml b/data/xml/2021.socialnlp.xml index 2b10cdab8a..73d368f977 100644 --- a/data/xml/2021.socialnlp.xml +++ b/data/xml/2021.socialnlp.xml @@ -47,7 +47,7 @@ KazumaMurao TakeshiMasuyama TaichiYatsuka - ManabuOkumura + ManabuOkumura SatoshiSekine 24–35 Ranking the user comments posted on a news article is important for online news services because comment visibility directly affects the user experience. Research on ranking comments with different metrics to measure the comment quality has shown “constructiveness” used in argument analysis is promising from a practical standpoint. In this paper, we report a case study in which this constructiveness is examined in the real world. Specifically, we examine an in-house competition to improve the performance of ranking constructive comments and demonstrate the effectiveness of the best obtained model for a commercial service. @@ -119,7 +119,7 @@ Self-Contextualized Attention for Abusive Language Identification HoracioJarquín-Vásquez Hugo JairEscalante - ManuelMontes + ManuelMontes 103–112 The use of attention mechanisms in deep learning approaches has become popular in natural language processing due to its outstanding performance. The use of these mechanisms allows one managing the importance of the elements of a sequence in accordance to their context, however, this importance has been observed independently between the pairs of elements of a sequence (self-attention) and between the application domain of a sequence (contextual attention), leading to the loss of relevant information and limiting the representation of the sequences. To tackle these particular issues we propose the self-contextualized attention mechanism, which trades off the previous limitations, by considering the internal and contextual relationships between the elements of a sequence. The proposed mechanism was evaluated in four standard collections for the abusive language identification task achieving encouraging results. It outperformed the current attention mechanisms and showed a competitive performance with respect to state-of-the-art approaches. 2021.socialnlp-1.9 @@ -152,7 +152,7 @@ <fixed-case>PANDORA</fixed-case> Talks: Personality and Demographics on <fixed-case>R</fixed-case>eddit MatejGjurković - Vanja MladenKaran + Vanja MladenKaran IvaVukojević MihaelaBošnjak JanSnajder @@ -168,7 +168,7 @@ XuemingXu YiweiZhang IanStewart - RadaMihalcea + RadaMihalcea 153–162 Many people aim for change, but not everyone succeeds. While there are a number of social psychology theories that propose motivation-related characteristics of those who persist with change, few computational studies have explored the motivational stage of personal change. In this paper, we investigate a new dataset consisting of the writings of people who manifest intention to change, some of whom persist while others do not. Using a variety of linguistic analysis techniques, we first examine the writing patterns that distinguish the two groups of people. Persistent people tend to reference more topics related to long-term self-improvement and use a more complicated writing style. Drawing on these consistent differences, we build a classifier that can reliably identify the people more likely to persist, based on their language. Our experiments provide new insights into the motivation-related behavior of people who persist with their intention to change. 2021.socialnlp-1.13 diff --git a/data/xml/2021.splurobonlp.xml b/data/xml/2021.splurobonlp.xml index 8895c4dceb..bd820f6b2e 100644 --- a/data/xml/2021.splurobonlp.xml +++ b/data/xml/2021.splurobonlp.xml @@ -34,7 +34,7 @@ Miltiadis MariosKatsakioris IoannisKonstas Pierre YvesMignotte - HelenHastie + HelenHastie 11–21 Robust situated dialog requires the ability to process instructions based on spatial information, which may or may not be available. We propose a model, based on LXMERT, that can extract spatial information from text instructions and attend to landmarks on OpenStreetMap (OSM) referred to in a natural language instruction. Whilst, OSM is a valuable resource, as with any open-sourced data, there is noise and variation in the names referred to on the map, as well as, variation in natural language instructions, hence the need for data-driven methods over rule-based systems. This paper demonstrates that the gold GPS location can be accurately predicted from the natural language instruction and metadata with 72% accuracy for previously seen maps and 64% for unseen maps. 2021.splurobonlp-1.2 @@ -46,7 +46,7 @@ TianaiDong AlbertoTestoni LucianaBenotti - RaffaellaBernardi + RaffaellaBernardi 22–31 In this paper, we define and evaluate a methodology for extracting history-dependent spatial questions from visual dialogues. We say that a question is history-dependent if it requires (parts of) its dialogue history to be interpreted. We argue that some kinds of visual questions define a context upon which a follow-up spatial question relies. We call the question that restricts the context: trigger, and we call the spatial question that requires the trigger question to be answered: zoomer. We automatically extract different trigger and zoomer pairs based on the visual property that the questions rely on (e.g. color, number). We manually annotate the automatically extracted trigger and zoomer pairs to verify which zoomers require their trigger. We implement a simple baseline architecture based on a SOTA multimodal encoder. Our results reveal that there is much room for improvement for answering history-dependent questions. 2021.splurobonlp-1.3 @@ -61,7 +61,7 @@ HaoyuWu JonathanWaxman MarcusHill - LenhartSchubert + LenhartSchubert 32–41 Understanding spatial expressions and using them appropriately is necessary for seamless and natural human-machine interaction. However, capturing the semantics and appropriate usage of spatial prepositions is notoriously difficult, because of their vagueness and polysemy. Although modern data-driven approaches are good at capturing statistical regularities in the usage, they usually require substantial sample sizes, often do not generalize well to unseen instances and, most importantly, their structure is essentially opaque to analysis, which makes diagnosing problems and understanding their reasoning process difficult. In this work, we discuss our attempt at modeling spatial senses of prepositions in English using a combination of rule-based and statistical learning approaches. Each preposition model is implemented as a tree where each node computes certain intuitive relations associated with the preposition, with the root computing the final value of the prepositional relation itself. The models operate on a set of artificial 3D “room world” environments, designed in Blender, taking the scene itself as an input. We also discuss our annotation framework used to collect human judgments employed in the model training. Both our factored models and black-box baseline models perform quite well, but the factored models will enable reasoned explanations of spatial relation judgements. 2021.splurobonlp-1.4 @@ -103,7 +103,7 @@ Interactive Reinforcement Learning for Table Balancing Robot HaeinJeon YewonKim - Bo-YeongKang + Bo-YeongKang 71–78 With the development of robotics, the use of robots in daily life is increasing, which has led to the need for anyone to easily train robots to improve robot use. Interactive reinforcement learning(IARL) is a method for robot training based on human–robot interaction; prior studies on IARL provide only limited types of feedback or require appropriately designed shaping rewards, which is known to be difficult and time-consuming. Therefore, in this study, we propose interactive deep reinforcement learning models based on voice feedback. In the proposed system, a robot learns the task of cooperative table balancing through deep Q-network using voice feedback provided by humans in real-time, with automatic speech recognition(ASR) and sentiment analysis to understand human voice feedback. As a result, an optimal policy convergence rate of up to 96% was realized, and performance was improved in all voice feedback-based models 2021.splurobonlp-1.8 diff --git a/data/xml/2021.spnlp.xml b/data/xml/2021.spnlp.xml index 1e0ce7f9fb..87c1972208 100644 --- a/data/xml/2021.spnlp.xml +++ b/data/xml/2021.spnlp.xml @@ -7,7 +7,7 @@ SujithRavi AndreasVlachos PriyankaAgrawal - AndréMartins + AndréMartins Association for Computational Linguistics
Online
August @@ -81,12 +81,12 @@ Using Hierarchical Class Structure to Improve Fine-Grained Claim Classification ErenayDayanik - AndreBlessing + AndreBlessing NicoBlokker SebastianHaunss JonasKuhn GabriellaLapesa - SebastianPadó + SebastianPadó 53–60 The analysis of public debates crucially requires the classification of political demands according to hierarchical claim ontologies (e.g. for immigration, a supercategory “Controlling Migration” might have subcategories “Asylum limit” or “Border installations”). A major challenge for automatic claim classification is the large number and low frequency of such subclasses. We address it by jointly predicting pairs of matching super- and subcategories. We operationalize this idea by (a) encoding soft constraints in the claim classifier and (b) imposing hard constraints via Integer Linear Programming. Our experiments with different claim classifiers on a German immigration newspaper corpus show consistent performance increases for joint prediction, in particular for infrequent categories and discuss the complementarity of the two approaches. 2021.spnlp-1.6 @@ -98,7 +98,7 @@ ChenyangHuang WeiYang YanshuaiCao - OsmarZaïane + OsmarZaïane LiliMou 61–66 In this paper, we propose a globally normalized model for context-free grammar (CFG)-based semantic parsing. Instead of predicting a probability, our model predicts a real-valued score at each step and does not suffer from the label bias problem. Experiments show that our approach outperforms locally normalized models on small datasets, but it does not yield improvement on a large dataset. @@ -110,7 +110,7 @@ Comparing Span Extraction Methods for Semantic Role Labeling ZhisongZhang EmmaStrubell - EduardHovy + EduardHovy 67–77 In this work, we empirically compare span extraction methods for the task of semantic role labeling (SRL). While recent progress incorporating pre-trained contextualized representations into neural encoders has greatly improved SRL F1 performance on popular benchmarks, the potential costs and benefits of structured decoding in these models have become less clear. With extensive experiments on PropBank SRL datasets, we find that more structured decoding methods outperform BIO-tagging when using static (word type) embeddings across all experimental settings. However, when used in conjunction with pre-trained contextualized word representations, the benefits are diminished. We also experiment in cross-genre and cross-lingual settings and find similar trends. We further perform speed comparisons and provide analysis on the accuracy-efficiency trade-offs among different decoding methods. 2021.spnlp-1.8 diff --git a/data/xml/2021.starsem.xml b/data/xml/2021.starsem.xml index 372d992a2f..212332c06c 100644 --- a/data/xml/2021.starsem.xml +++ b/data/xml/2021.starsem.xml @@ -4,7 +4,7 @@ Proceedings of *SEM 2021: The Tenth Joint Conference on Lexical and Computational Semantics Lun-WeiKu - ViviNastase + ViviNastase IvanVulić Association for Computational Linguistics
Online
@@ -46,8 +46,8 @@ Semantic shift in social networks BillNoble - AsadSayeed - RaquelFernández + AsadSayeed + RaquelFernández StaffanLarsson 26–37 Just as the meaning of words is tied to the communities in which they are used, so too is semantic change. But how does lexical semantic change manifest differently across different communities? In this work, we investigate the relationship between community structure and semantic change in 45 communities from the social media website Reddit. We use distributional methods to quantify lexical semantic change and induce a social network on communities, based on interactions between members. We explore the relationship between semantic change and the clustering coefficient of a community’s social network graph, as well as community size and stability. While none of these factors are found to be significant on their own, we report a significant effect of their three-way interaction. We also report on significant word-level effects of frequency and change in frequency, which replicate previous findings. @@ -74,7 +74,7 @@ Recovering Lexically and Semantically Reused Texts AnselMacLaughlin ShaobinXu - David A.Smith + David A.Smith 52–66 Writers often repurpose material from existing texts when composing new documents. Because most documents have more than one source, we cannot trace these connections using only models of document-level similarity. Instead, this paper considers methods for local text reuse detection (LTRD), detecting localized regions of lexically or semantically similar text embedded in otherwise unrelated material. In extensive experiments, we study the relative performance of four classes of neural and bag-of-words models on three LTRD tasks – detecting plagiarism, modeling journalists’ use of press releases, and identifying scientists’ citation of earlier papers. We conduct evaluations on three existing datasets and a new, publicly-available citation localization dataset. Our findings shed light on a number of previously-unexplored questions in the study of LTRD, including the importance of incorporating document-level context for predictions, the applicability of of-the-shelf neural models pretrained on “general” semantic textual similarity tasks such as paraphrase detection, and the trade-offs between more efficient bag-of-words and feature-based neural models and slower pairwise neural models. 2021.starsem-1.5 @@ -95,7 +95,7 @@ <fixed-case>N</fixed-case>eural<fixed-case>L</fixed-case>og: Natural Language Inference with Joint Neural and Logical Reasoning ZemingChen QiyueGao - Lawrence S.Moss + Lawrence S.Moss 78–88 Deep learning (DL) based language models achieve high performance on various benchmarks for Natural Language Inference (NLI). And at this time, symbolic approaches to NLI are receiving less attention. Both approaches (symbolic and DL) have their advantages and weaknesses. However, currently, no method combines them in a system to solve the task of NLI. To merge symbolic and deep learning methods, we propose an inference framework called NeuralLog, which utilizes both a monotonicity-based logical inference engine and a neural network language model for phrase alignment. Our framework models the NLI task as a classic search problem and uses the beam search algorithm to search for optimal inference paths. Experiments show that our joint logic and neural inference system improves accuracy on the NLI task and can achieve state-of-art accuracy on the SICK and MED datasets. 2021.starsem-1.7 @@ -120,7 +120,7 @@ MahsaGhaderan AminPourdabiri ZahraSayedi - BehrouzMinaei-Bidgoli + BehrouzMinaei-Bidgoli SaulehEetemadi Mohammad TaherPilehvar 99–104 @@ -133,7 +133,7 @@ <fixed-case>B</fixed-case>i<fixed-case>Q</fixed-case>u<fixed-case>AD</fixed-case>: Towards <fixed-case>QA</fixed-case> based on deeper text understanding FrankGrimm - PhilippCimiano + PhilippCimiano 105–115 Recent question answering and machine reading benchmarks frequently reduce the task to one of pinpointing spans within a certain text passage that answers the given question. Typically, these systems are not required to actually understand the text on a deeper level that allows for more complex reasoning on the information contained. We introduce a new dataset called BiQuAD that requires deeper comprehension in order to answer questions in both extractive and deductive fashion. The dataset consist of 4,190 closed-domain texts and a total of 99,149 question-answer pairs. The texts are synthetically generated soccer match reports that verbalize the main events of each match. All texts are accompanied by a structured Datalog program that represents a (logical) model of its information. We show that state-of-the-art QA models do not perform well on the challenging long form contexts and reasoning requirements posed by the dataset. In particular, transformer based state-of-the-art models achieve F1-scores of only 39.0. We demonstrate how these synthetic datasets align structured knowledge with natural text and aid model introspection when approaching complex text understanding. 2021.starsem-1.10 @@ -144,7 +144,7 @@ Evaluating <fixed-case>U</fixed-case>niversal <fixed-case>D</fixed-case>ependency Parser Recovery of Predicate Argument Structure via <fixed-case>C</fixed-case>omp<fixed-case>C</fixed-case>hain Analysis SagarIndurkhya BeracahYankama - Robert C.Berwick + Robert C.Berwick 116–128 Accurate recovery of predicate-argument structure from a Universal Dependency (UD) parse is central to downstream tasks such as extraction of semantic roles or event representations. This study introduces compchains, a categorization of the hierarchy of predicate dependency relations present within a UD parse. Accuracy of compchain classification serves as a proxy for measuring accurate recovery of predicate-argument structure from sentences with embedding. We analyzed the distribution of compchains in three UD English treebanks, EWT, GUM and LinES, revealing that these treebanks are sparse with respect to sentences with predicate-argument structure that includes predicate-argument embedding. We evaluated the CoNLL 2018 Shared Task UDPipe (v1.2) baseline (dependency parsing) models as compchain classifiers for the EWT, GUMS and LinES UD treebanks. Our results indicate that these three baseline models exhibit poorer performance on sentences with predicate-argument structure with more than one level of embedding; we used compchains to characterize the errors made by these parsers and present examples of erroneous parses produced by the parser that were identified using compchains. We also analyzed the distribution of compchains in 58 non-English UD treebanks and then used compchains to evaluate the CoNLL’18 Shared Task baseline model for each of these treebanks. Our analysis shows that performance with respect to compchain classification is only weakly correlated with the official evaluation metrics (LAS, MLAS and BLEX). We identify gaps in the distribution of compchains in several of the UD treebanks, thus providing a roadmap for how these treebanks may be supplemented. We conclude by discussing how compchains provide a new perspective on the sparsity of training data for UD parsers, as well as the accuracy of the resulting UD parses. 2021.starsem-1.11 @@ -182,7 +182,7 @@ DuccioPappadopulo LisaBauer MarcoFarina - Ozanİrsoy + Ozanİrsoy MohitBansal 152–159 Many modern messaging systems allow fast and synchronous textual communication among many users. The resulting sequence of messages hides a more complicated structure in which independent sub-conversations are interwoven with one another. This poses a challenge for any task aiming to understand the content of the chat logs or gather information from them. The ability to disentangle these conversations is then tantamount to the success of many downstream tasks such as summarization and question answering. Structured information accompanying the text such as user turn, user mentions, timestamps, is used as a cue by the participants themselves who need to follow the conversation and has been shown to be important for disentanglement. DAG-LSTMs, a generalization of Tree-LSTMs that can handle directed acyclic dependencies, are a natural way to incorporate such information and its non-sequential nature. In this paper, we apply DAG-LSTMs to the conversation disentanglement task. We perform our experiments on the Ubuntu IRC dataset. We show that the novel model we propose achieves state of the art status on the task of recovering reply-to relations and it is competitive on other disentanglement metrics. @@ -193,7 +193,7 @@ Toward Diverse Precondition Generation HeeyoungKwon - NathanaelChambers + NathanaelChambers NiranjanBalasubramanian 160–172 A typical goal for language understanding is to logically connect the events of a discourse, but often connective events are not described due to their commonsense nature. In order to address this deficit, we focus here on generating precondition events. Precondition generation can be framed as a sequence-to-sequence problem: given a target event, generate a possible precondition. However, in most real-world scenarios, an event can have several preconditions, which is not always suitable for standard seq2seq frameworks. We propose DiP, the Diverse Precondition generation system that can generate unique and diverse preconditions. DiP consists of three stages of the generative process – an event sampler, a candidate generator, and a post-processor. The event sampler provides control codes (precondition triggers) which the candidate generator uses to focus its generation. Post-processing further improves the results through re-ranking and filtering. Unlike other conditional generation systems, DiP automatically generates control codes without training on diverse examples. Analysis reveals that DiP improves the diversity of preconditions significantly compared to a beam search baseline. Also, manual evaluation shows that DiP generates more preconditions than a strong nucleus sampling baseline. @@ -246,7 +246,7 @@ Dependency Patterns of Complex Sentences and Semantic Disambiguation for <fixed-case>A</fixed-case>bstract <fixed-case>M</fixed-case>eaning <fixed-case>R</fixed-case>epresentation Parsing YukiYamamoto - YujiMatsumoto + YujiMatsumoto TaroWatanabe 212–221 Abstract Meaning Representation (AMR) is a sentence-level meaning representation based on predicate argument structure. One of the challenges we find in AMR parsing is to capture the structure of complex sentences which expresses the relation between predicates. Knowing the core part of the sentence structure in advance may be beneficial in such a task. In this paper, we present a list of dependency patterns for English complex sentence constructions designed for AMR parsing. With a dedicated pattern matcher, all occurrences of complex sentence constructions are retrieved from an input sentence. While some of the subordinators have semantic ambiguities, we deal with this problem through training classification models on data derived from AMR and Wikipedia corpus, establishing a new baseline for future works. The developed complex sentence patterns and the corresponding AMR descriptions will be made public. @@ -257,7 +257,7 @@ Neural Metaphor Detection with Visibility Embeddings GititKehat - JamesPustejovsky + JamesPustejovsky 222–228 We present new results for the problem of sequence metaphor labeling, using the recently developed Visibility Embeddings. We show that concatenating such embeddings to the input of a BiLSTM obtains consistent and significant improvements at almost no cost, and we present further improved results when visibility embeddings are combined with BERT. 2021.starsem-1.21 @@ -282,7 +282,7 @@ DominikSchlechtweg EnriqueCastaneda JonasKuhn - SabineSchulte im Walde + SabineSchulte im Walde 241–251 We suggest to model human-annotated Word Usage Graphs capturing fine-grained semantic proximity distinctions between word uses with a Bayesian formulation of the Weighted Stochastic Block Model, a generative model for random graphs popular in biology, physics and social sciences. By providing a probabilistic model of graded word meaning we aim to approach the slippery and yet widely used notion of word sense in a novel way. The proposed framework enables us to rigorously compare models of word senses with respect to their fit to the data. We perform extensive experiments and select the empirically most adequate model. 2021.starsem-1.23 @@ -295,7 +295,7 @@ JuliaBettinger MichaelDorna JonasKuhn - SabineSchulte im Walde + SabineSchulte im Walde 252–262 Predicting the difficulty of domain-specific vocabulary is an important task towards a better understanding of a domain, and to enhance the communication between lay people and experts. We investigate German closed noun compounds and focus on the interaction of compound-based lexical features (such as frequency and productivity) and terminology-based features (contrasting domain-specific and general language) across word representations and classifiers. Our prediction experiments complement insights from classification using (a) manually designed features to characterise termhood and compound formation and (b) compound and constituent word embeddings. We find that for a broad binary distinction into ‘easy’ vs. ‘difficult’ general-language compound frequency is sufficient, but for a more fine-grained four-class distinction it is crucial to include contrastive termhood features and compound and constituent features. 2021.starsem-1.24 @@ -306,7 +306,7 @@ Spurious Correlations in Cross-Topic Argument Mining Terne SashaThorn Jakobsen MariaBarrett - AndersSøgaard + AndersSøgaard 263–277 Recent work in cross-topic argument mining attempts to learn models that generalise across topics rather than merely relying on within-topic spurious correlations. We examine the effectiveness of this approach by analysing the output of single-task and multi-task models for cross-topic argument mining, through a combination of linear approximations of their decision boundaries, manual feature grouping, challenge examples, and ablations across the input vocabulary. Surprisingly, we show that cross-topic models still rely mostly on spurious correlations and only generalise within closely related topics, e.g., a model trained only on closed-class words and a few common open-class words outperforms a state-of-the-art cross-topic model on distant target topics. 2021.starsem-1.25 diff --git a/data/xml/2021.sustainlp.xml b/data/xml/2021.sustainlp.xml index 3d3b21443f..0d51ba3eab 100644 --- a/data/xml/2021.sustainlp.xml +++ b/data/xml/2021.sustainlp.xml @@ -4,7 +4,7 @@ Proceedings of the Second Workshop on Simple and Efficient Natural Language Processing Association for Computational Linguistics - Nafise SadatMoosavi + Nafise SadatMoosavi IrynaGurevych AngelaFan ThomasWolf @@ -38,7 +38,7 @@ Evaluating the carbon footprint of <fixed-case>NLP</fixed-case> methods: a survey and analysis of existing tools NesrineBannour SaharGhannay - AurélieNévéol + AurélieNévéol Anne-LaureLigozat 11–21 Modern Natural Language Processing (NLP) makes intensive use of deep learning methods because of the accuracy they offer for a variety of applications. Due to the significant environmental impact of deep learning, cost-benefit analysis including carbon footprint as well as accuracy measures has been suggested to better document the use of NLP methods for research or deployment. In this paper, we review the tools that are available to measure energy use and CO2 emissions of NLP methods. We describe the scope of the measures provided and compare the use of six tools (carbon tracker, experiment impact tracker, green algorithms, ML CO2 impact, energy usage and cumulator) on named entity recognition experiments performed on different computational set-ups (local server vs. computing facility). Based on these findings, we propose actionable recommendations to accurately measure the environmental impact of NLP experiments. @@ -142,8 +142,8 @@ GengyuWang XiaochenHou DiyiYang - KathleenMcKeown - JingHuang + KathleenMcKeown + JingHuang 79–85 Large pre-trained language models (PLMs) have led to great success on various commonsense question answering (QA) tasks in an end-to-end fashion. However, little attention has been paid to what commonsense knowledge is needed to deeply characterize these QA tasks. In this work, we proposed to categorize the semantics needed for these tasks using the SocialIQA as an example. Building upon our labeled social knowledge categories dataset on top of SocialIQA, we further train neural QA models to incorporate such social knowledge categories and relation information from a knowledge base. Unlike previous work, we observe our models with semantic categorizations of social knowledge can achieve comparable performance with a relatively simple model and smaller size compared to other complex approaches. 2021.sustainlp-1.10 @@ -171,7 +171,7 @@ Lucas HøybergPuvis de Chavannes Mads Guldborg KjeldgaardKongsbak TimmieRantzau - LeonDerczynski + LeonDerczynski 96–118 Training large language models can consume a large amount of energy. We hypothesize that the language model’s configuration impacts its energy consumption, and that there is room for power consumption optimisation in modern large language models. To investigate these claims, we introduce a power consumption factor to the objective function, and explore the range of models and hyperparameter configurations that affect power. We identify multiple configuration factors that can reduce power consumption during language model training while retaining model quality. 2021.sustainlp-1.12 diff --git a/data/xml/2021.tacl.xml b/data/xml/2021.tacl.xml index 7d548deb83..98cb433b94 100644 --- a/data/xml/2021.tacl.xml +++ b/data/xml/2021.tacl.xml @@ -26,8 +26,8 @@ Revisiting Multi-Domain Machine Translation - MinhQuangPham - Josep MariaCrego + MinhQuangPham + Josep MariaCrego FrançoisYvon 10.1162/tacl_a_00351 When building machine translation systems, one often needs to make the best out of heterogeneous sets of parallel data in training, and to robustly handle inputs from unexpected domains in testing. This multi-domain scenario has attracted a lot of recent work that fall under the general umbrella of transfer learning. In this study, we revisit multi-domain machine translation, with the aim to formulate the motivations for developing such systems and the associated expectations with respect to performance. Our experiments with a large sample of multi-domain systems show that most of these expectations are hardly met and suggest that further work is needed to better analyze the current behaviour of multi-domain systems and to make them fully hold their promises. @@ -87,7 +87,7 @@ Modeling Content and Context with Deep Relational Learning - Maria LeonorPacheco + Maria LeonorPacheco DanGoldwasser 10.1162/tacl_a_00357 Building models for realistic natural language tasks requires dealing with long texts and accounting for complicated structural dependencies. Neural-symbolic representations have emerged as a way to combine the reasoning capabilities of symbolic methods, with the expressiveness of neural networks. However, most of the existing frameworks for combining neural and symbolic representations have been designed for classic relational learning tasks that work over a universe of symbolic entities and relations. In this paper, we present DRaiL, an open-source declarative framework for specifying deep relational models, designed to support a variety of NLP scenarios. Our framework supports easy integration with expressive language encoders, and provides an interface to study the interactions between representation, inference and learning. @@ -99,7 +99,7 @@ Recursive Non-Autoregressive Graph-to-Graph Transformer for Dependency Parsing with Iterative Refinement AlirezaMohammadshahi - JamesHenderson + JamesHenderson 10.1162/tacl_a_00358 We propose the Recursive Non-autoregressive Graph-to-Graph Transformer architecture (RNGTr) for the iterative refinement of arbitrary graphs through the recursive application of a non-autoregressive Graph-to-Graph Transformer and apply it to syntactic dependency parsing. We demonstrate the power and effectiveness of RNGTr on several dependency corpora, using a refinement model pre-trained with BERT. We also introduce Syntactic Transformer (SynTr), a non-recursive parser similar to our refinement model. RNGTr can improve the accuracy of a variety of initial parsers on 13 languages from the Universal Dependencies Treebanks, English and Chinese Penn Treebanks, and the German CoNLL2009 corpus, even improving over the new state-of-the-art results achieved by SynTr, significantly improving the state-of-the-art for all corpora tested. 120–138 @@ -112,7 +112,7 @@ RyanCotterell LawrenceWolf-Sonkin DamiánBlasi - HannaWallach + HannaWallach 10.1162/tacl_a_00355 We use large-scale corpora in six different gendered languages, along with tools from NLP and information theory, to test whether there is a relationship between the grammatical genders of inanimate nouns and the adjectives used to describe those nouns. For all six languages, we find that there is a statistically significant relationship. We also find that there are statistically significant relationships between the grammatical genders of inanimate nouns and the verbs that take those nouns as direct objects, as indirect objects, and as subjects. We defer deeper investigation of these relationships for future work. 139–159 @@ -181,7 +181,7 @@ Infusing Finetuning with Semantic Dependencies ZhaofengWu HaoPeng - Noah A.Smith + Noah A.Smith 10.1162/tacl_a_00363 For natural language processing systems, two kinds of evidence support the use of text representations from neural language models “pretrained” on large unannotated corpora: performance on application-inspired benchmarks (Peters et al., 2018, inter alia), and the emergence of syntactic abstractions in those representations (Tenney et al., 2019, inter alia). On the other hand, the lack of grounded supervision calls into question how well these representations can ever capture meaning (Bender and Koller, 2020). We apply novel probes to recent language models— specifically focusing on predicate-argument structure as operationalized by semantic dependencies (Ivanova et al., 2012)—and find that, unlike syntax, semantics is not brought to the surface by today’s pretrained models. We then use convolutional graph encoders to explicitly incorporate semantic parses into task-specific finetuning, yielding benefits to natural language understanding (NLU) tasks in the GLUE benchmark. This approach demonstrates the potential for general-purpose (rather than task-specific) linguistic supervision, above and beyond conventional pretraining and finetuning. Several diagnostics help to localize the benefits of our approach.1 226–242 @@ -218,8 +218,8 @@ Extractive Opinion Summarization in Quantized Transformer Spaces StefanosAngelidis - Reinald KimAmplayo - YoshihikoSuhara + Reinald KimAmplayo + YoshihikoSuhara XiaolanWang MirellaLapata 10.1162/tacl_a_00366 @@ -256,7 +256,7 @@ YiLuan JacobEisenstein KristinaToutanova - MichaelCollins + MichaelCollins 10.1162/tacl_a_00369 Dual encoders perform retrieval by encoding documents and queries into dense low-dimensional vectors, scoring each document by its inner product with the query. We investigate the capacity of this architecture relative to sparse bag-of-words models and attentional neural networks. Using both theoretical and empirical analysis, we establish connections between the encoding dimension, the margin between gold and lower-ranked documents, and the document length, suggesting limitations in the capacity of fixed-length encodings to support precise retrieval of long documents. Building on these insights, we propose a simple neural model that combines the efficiency of dual encoders with some of the expressiveness of more costly attentional architectures, and explore sparse-dense hybrids to capitalize on the precision of sparse retrieval. These models outperform strong alternatives in large-scale retrieval. 329–345 @@ -304,12 +304,12 @@ <fixed-case>S</fixed-case>umm<fixed-case>E</fixed-case>val: Re-evaluating Summarization Evaluation - Alexander R.Fabbri + Alexander R.Fabbri WojciechKryściński BryanMcCann CaimingXiong RichardSocher - DragomirRadev + DragomirRadev 10.1162/tacl_a_00373 The scarcity of comprehensive up-to-date studies on evaluation metrics for text summarization and the lack of consensus regarding evaluation protocols continue to inhibit progress. We address the existing shortcomings of summarization evaluation methods along five dimensions: 1) we re-evaluate 14 automatic evaluation metrics in a comprehensive and consistent fashion using neural summarization model outputs along with expert and crowd-sourced human annotations; 2) we consistently benchmark 23 recent summarization models using the aforementioned automatic evaluation metrics; 3) we assemble the largest collection of summaries generated by models trained on the CNN/DailyMail news dataset and share it in a unified format; 4) we implement and share a toolkit that provides an extensible and unified API for evaluating summarization models across a broad range of automatic metrics; and 5) we assemble and share the largest and most diverse, in terms of model types, collection of human judgments of model-generated summaries on the CNN/Daily Mail dataset annotated by both expert judges and crowd-source workers. We hope that this work will help promote a more complete evaluation protocol for text summarization as well as advance research in developing evaluation metrics that better correlate with human judgments. 391–409 @@ -351,7 +351,7 @@ MatthewLamm TomKwiatkowski DipanjanDas - MichaelCollins + MichaelCollins 10.1162/tacl_a_00377 Models for question answering, dialogue agents, and summarization often interpret the meaning of a sentence in a rich context and use that meaning in a new context. Taking excerpts of text can be problematic, as key pieces may not be explicit in a local window. We isolate and define the problem of sentence decontextualization: taking a sentence together with its context and rewriting it to be interpretable out of context, while preserving its meaning. We describe an annotation procedure, collect data on the Wikipedia corpus, and use the data to train models to automatically decontextualize sentences. We present preliminary studies that show the value of sentence decontextualization in a user-facing task, and as preprocessing for systems that perform document understanding. We argue that decontextualization is an important subtask in many downstream applications, and that the definitions and resources provided can benefit tasks that operate on sentences that occur in a richer context. 447–461 @@ -421,7 +421,7 @@ Characterizing <fixed-case>E</fixed-case>nglish Variation across Social Media Communities with <fixed-case>BERT</fixed-case> - LiLucy + LiLucy DavidBamman 10.1162/tacl_a_00383 Much previous work characterizing language variation across Internet social groups has focused on the types of words used by these groups. We extend this type of study by employing BERT to characterize variation in the senses of words as well, analyzing two months of English comments in 474 Reddit communities. The specificity of different sense clusters to a community, combined with the specificity of a community’s unique word types, is used to identify cases where a social group’s language deviates from the norm. We validate our metrics using user-created glossaries and draw on sociolinguistic theories to connect language variation with trends in community behavior. We find that communities with highly distinctive language are medium-sized, and their loyal and highly engaged users interact in dense networks. @@ -436,7 +436,7 @@ Le HongLong YunshanMa WenqiangLei - Tat-SengChua + Tat-SengChua 10.1162/tacl_a_00384 Tracking dialogue states to better interpret user goals and feed downstream policy learning is a bottleneck in dialogue management. Common practice has been to treat it as a problem of classifying dialogue content into a set of pre-defined slot-value pairs, or generating values for different slots given the dialogue history. Both have limitations on considering dependencies that occur on dialogues, and are lacking of reasoning capabilities. This paper proposes to track dialogue states gradually with reasoning over dialogue turns with the help of the back-end data. Empirical results demonstrate that our method outperforms the state-of-the-art methods in terms of joint belief accuracy for MultiWOZ 2.1, a large-scale human–human dialogue dataset across multiple domains. 557–569 @@ -460,7 +460,7 @@ Context-aware Adversarial Training for Name Regularity Bias in Named Entity Recognition AbbasGhaddar - PhilippeLanglais + PhilippeLanglais AhmadRashid MehdiRezagholizadeh 10.1162/tacl_a_00386 @@ -490,7 +490,7 @@ MeladelMistica BaharSalehi HangLi - TimothyBaldwin + TimothyBaldwin JianzhongQi 10.1162/tacl_a_00388 While pretrained language models (LMs) have driven impressive gains over morpho-syntactic and semantic tasks, their ability to model discourse and pragmatic phenomena is less clear. As a step towards a better understanding of their discourse modeling capabilities, we propose a sentence intrusion detection task. We examine the performance of a broad range of pretrained LMs on this detection task for English. Lacking a dataset for the task, we introduce INSteD, a novel intruder sentence detection dataset, containing 170,000+ documents constructed from English Wikipedia and CNN news articles. Our experiments show that pretrained LMs perform impressively in in-domain evaluation, but experience a substantial drop in the cross-domain setting, indicating limited generalization capacity. Further results over a novel linguistic probe dataset show that there is substantial room for improvement, especially in the cross- domain setting. @@ -516,7 +516,7 @@ QiLiu LeiYu LauraRimell - PhilBlunsom + PhilBlunsom 10.1162/tacl_a_00390 Direct decoding for task-oriented dialogue is known to suffer from the explaining-away effect, manifested in models that prefer short and generic responses. Here we argue for the use of Bayes’ theorem to factorize the dialogue task into two models, the distribution of the context given the response, and the prior for the response itself. This approach, an instantiation of the noisy channel model, both mitigates the explaining-away effect and allows the principled incorporation of large pretrained models for the response prior. We present extensive experiments showing that a noisy channel model decodes better responses compared to direct decoding and that a two-stage pretraining strategy, employing both open-domain and task-oriented dialogue data, improves over randomly initialized models. 657–674 @@ -564,8 +564,8 @@ Classifying Argumentative Relations Using Logical Mechanisms and Argumentation Schemes YohanJo SeojinBang - ChrisReed - EduardHovy + ChrisReed + EduardHovy 10.1162/tacl_a_00394 While argument mining has achieved significant success in classifying argumentative relations between statements (support, attack, and neutral), we have a limited computational understanding of logical mechanisms that constitute those relations. Most recent studies rely on black-box models, which are not as linguistically insightful as desired. On the other hand, earlier studies use rather simple lexical features, missing logical relations between statements. To overcome these limitations, our work classifies argumentative relations based on four logical and theory-informed mechanisms between two statements, namely, (i) factual consistency, (ii) sentiment coherence, (iii) causal relation, and (iv) normative relation. We demonstrate that our operationalization of these logical mechanisms classifies argumentative relations without directly training on data labeled with the relations, significantly better than several unsupervised baselines. We further demonstrate that these mechanisms also improve supervised classifiers through representation learning. 721–739 @@ -620,7 +620,7 @@ DanielAndor EunsolChoi Livio BaldiniSoares - MichaelCollins + MichaelCollins 10.1162/tacl_a_00398 A question answering system that in addition to providing an answer provides an explanation of the reasoning that leads to that answer has potential advantages in terms of debuggability, extensibility, and trust. To this end, we propose QED, a linguistically informed, extensible framework for explanations in question answering. A QED explanation specifies the relationship between a question and answer according to formal semantic notions such as referential equality, sentencehood, and entailment. We describe and publicly release an expert-annotated dataset of QED explanations built upon a subset of the Google Natural Questions dataset, and report baseline models on two tasks—post- hoc explanation generation given an answer, and joint question answering and explanation generation. In the joint setting, a promising result suggests that training on a relatively small amount of QED data can improve question answering. In addition to describing the formal, language-theoretic motivations for the QED approach, we describe a large user study showing that the presence of QED explanations significantly improves the ability of untrained raters to spot errors made by a strong neural QA baseline. 790–806 @@ -645,7 +645,7 @@ Let’s Play Mono-Poly: <fixed-case>BERT</fixed-case> Can Reveal Words’ Polysemy Level and Partitionability into Senses - AinaGarí Soler + AinaGarí Soler MariannaApidianaki 10.1162/tacl_a_00400 Pre-trained language models (LMs) encode rich information about linguistic structure but their knowledge about lexical polysemy remains unclear. We propose a novel experimental setup for analyzing this knowledge in LMs specifically trained for different languages (English, French, Spanish, and Greek) and in multilingual BERT. We perform our analysis on datasets carefully designed to reflect different sense distributions, and control for parameters that are highly correlated with polysemy such as frequency and grammatical category. We demonstrate that BERT-derived representations reflect words’ polysemy level and their partitionability into senses. Polysemy-related information is more clearly present in English BERT embeddings, but models in other languages also manage to establish relevant distinctions between words at different polysemy levels. Our results contribute to a better understanding of the knowledge encoded in contextualized representations and open up new avenues for multilingual lexical semantics research. @@ -659,7 +659,7 @@ BeatriceSavoldi MarcoGaido LuisaBentivogli - MatteoNegri + MatteoNegri MarcoTurchi 10.1162/tacl_a_00401 AbstractMachine translation (MT) technology has facilitated our daily tasks by providing accessible shortcuts for gathering, processing, and communicating information. However, it can suffer from biases that harm users and society at large. As a relatively new field of inquiry, studies of gender bias in MT still lack cohesion. This advocates for a unified framework to ease future research. To this end, we: i) critically review current conceptualizations of bias in light of theoretical insights from related disciplines, ii) summarize previous analyses aimed at assessing gender bias in MT, iii) discuss the mitigating strategies proposed so far, and iv) point toward potential directions for future work. @@ -672,7 +672,7 @@ Neural Event Semantics for Grounded Language Understanding ShyamalBuch LiFei-Fei - Noah D.Goodman + Noah D.Goodman 10.1162/tacl_a_00402 We present a new conjunctivist framework, neural event semantics (NES), for compositional grounded language understanding. Our approach treats all words as classifiers that compose to form a sentence meaning by multiplying output scores. These classifiers apply to spatial regions (events) and NES derives its semantic structure from language by routing events to different classifier argument inputs via soft attention. NES is trainable end-to-end by gradient descent with minimal supervision. We evaluate our method on compositional grounded language tasks in controlled synthetic and real-world settings. NES offers stronger generalization capability than standard function-based compositional frameworks, while improving accuracy over state-of-the-art neural methods on real-world language tasks. 875–890 @@ -682,7 +682,7 @@ Sensitivity as a Complexity Measure for Sequence Classification Tasks MichaelHahn - DanJurafsky + DanJurafsky RichardFutrell 10.1162/tacl_a_00403 We introduce a theoretical framework for understanding and predicting the complexity of sequence classification tasks, using a novel extension of the theory of Boolean function sensitivity. The sensitivity of a function, given a distribution over input sequences, quantifies the number of disjoint subsets of the input sequence that can each be individually changed to change the output. We argue that standard sequence classification methods are biased towards learning low-sensitivity functions, so that tasks requiring high sensitivity are more difficult. To that end, we show analytically that simple lexical classifiers can only express functions of bounded sensitivity, and we show empirically that low-sensitivity functions are easier to learn for LSTMs. We then estimate sensitivity on 15 NLP tasks, finding that sensitivity is higher on challenging tasks collected in GLUE than on simple text classification tasks, and that sensitivity predicts the performance both of simple lexical classifiers and of vanilla BiLSTMs without pretrained contextualized embeddings. Within a task, sensitivity predicts which inputs are hard for such simple models. Our results suggest that the success of massively pretrained contextual representations stems in part because they provide representations from which information can be extracted by low-sensitivity decoders. @@ -743,7 +743,7 @@ Multimodal Pretraining Unmasked: A Meta-Analysis and a Unified Framework of Vision-and-Language <fixed-case>BERT</fixed-case>s EmanueleBugliarello RyanCotterell - NaoakiOkazaki + NaoakiOkazaki DesmondElliott 10.1162/tacl_a_00408 Large-scale pretraining and task-specific fine- tuning is now the standard methodology for many tasks in computer vision and natural language processing. Recently, a multitude of methods have been proposed for pretraining vision and language BERTs to tackle challenges at the intersection of these two key areas of AI. These models can be categorized into either single-stream or dual-stream encoders. We study the differences between these two categories, and show how they can be unified under a single theoretical framework. We then conduct controlled experiments to discern the empirical differences between five vision and language BERTs. Our experiments show that training data and hyperparameters are responsible for most of the differences between the reported results, but they also reveal that the embedding layer plays a crucial role in these massive models. @@ -755,7 +755,7 @@ Maintaining Common Ground in Dynamic Environments TakumaUdagawa - AkikoAizawa + AkikoAizawa 10.1162/tacl_a_00409 Common grounding is the process of creating and maintaining mutual understandings, which is a critical aspect of sophisticated human communication. While various task settings have been proposed in existing literature, they mostly focus on creating common ground under a static context and ignore the aspect of maintaining them overtime under dynamic context. In this work, we propose a novel task setting to study the ability of both creating and maintaining common ground in dynamic environments. Based on our minimal task formulation, we collected a large-scale dataset of 5,617 dialogues to enable fine-grained evaluation and analysis of various dialogue systems. Through our dataset analyses, we highlight novel challenges introduced in our setting, such as the usage of complex spatio-temporal expressions to create and maintain common ground. Finally, we conduct extensive experiments to assess the capabilities of our baseline dialogue system and discuss future prospects of our research. 995–1011 @@ -769,8 +769,8 @@ NoraKassner ShauliRavfogel AbhilashaRavichander - EduardHovy - HinrichSchütze + EduardHovy + HinrichSchütze YoavGoldberg 10.1162/tacl_a_00410 Consistency of a model—that is, the invariance of its behavior under meaning-preserving alternations in its input—is a highly desirable property in natural language processing. In this paper we study the question: Are Pretrained Language Models (PLMs) consistent with respect to factual knowledge? To this end, we create ParaRel🤘, a high-quality resource of cloze-style query English paraphrases. It contains a total of 328 paraphrases for 38 relations. Using ParaRel🤘, we show that the consistency of all PLMs we experiment with is poor— though with high variance between relations. Our analysis of the representational spaces of PLMs suggests that they have a poor structure and are currently not suitable for representing knowledge robustly. Finally, we propose a method for improving model consistency and experimentally demonstrate its effectiveness.1 @@ -800,7 +800,7 @@ WilliamMerrill YoavGoldberg RoySchwartz - Noah A.Smith + Noah A.Smith 10.1162/tacl_a_00412 Language models trained on billions of tokens have recently led to unprecedented results on many NLP tasks. This success raises the question of whether, in principle, a system can ever “understand” raw text without access to some form of grounding. We formally investigate the abilities of ungrounded systems to acquire meaning. Our analysis focuses on the role of “assertions”: textual contexts that provide indirect clues about the underlying semantics. We study whether assertions enable a system to emulate representations preserving semantic relations like equivalence. We find that assertions enable semantic emulation of languages that satisfy a strong notion of semantic transparency. However, for classes of languages where the same expression can take different values in different contexts, we show that emulation can become uncomputable. Finally, we discuss differences between our formal model and natural language, exploring how our results generalize to a modal setting and other semantic relations. Together, our results suggest that assertions in code or language do not provide sufficient signal to fully emulate semantic representations. We formalize ways in which ungrounded language models appear to be fundamentally limited in their ability to “understand”. 1047–1060 @@ -816,7 +816,7 @@ Mohammad AliKhan YinYang HassanSajjad - PreslavNakov + PreslavNakov DemingChen MarianneWinslett 10.1162/tacl_a_00413 @@ -829,7 +829,7 @@ He Thinks He Knows Better than the Doctors: <fixed-case>BERT</fixed-case> for Event Factuality Fails on Pragmatics NanjiangJiang - Marie-Catherinede Marneffe + Marie-Catherinede Marneffe 10.1162/tacl_a_00414 We investigate how well BERT performs on predicting factuality in several existing English datasets, encompassing various linguistic constructions. Although BERT obtains a strong performance on most datasets, it does so by exploiting common surface patterns that correlate with certain factuality labels, and it fails on instances where pragmatic reasoning is necessary. Contrary to what the high performance suggests, we are still far from having a robust system for factuality prediction. 1081–1097 @@ -856,7 +856,7 @@ <fixed-case>M</fixed-case>asakha<fixed-case>NER</fixed-case>: Named Entity Recognition for <fixed-case>A</fixed-case>frican Languages - David IfeoluwaAdelani + David IfeoluwaAdelani JadeAbbott GrahamNeubig DanielD’souza @@ -875,7 +875,7 @@ AremuAnuoluwapo CatherineGitau DergueneMbaye - JesujobaAlabi + JesujobaAlabi Seid MuhieYimam Tajuddeen RabiuGwadabe IgnatiusEzeani @@ -1078,7 +1078,7 @@ Partially Supervised Named Entity Recognition via the Expected Entity Ratio Loss ThomasEffland - MichaelCollins + MichaelCollins 10.1162/tacl_a_00429 We study learning named entity recognizers in the presence of missing entity annotations. We approach this setting as tagging with latent variables and propose a novel loss, the Expected Entity Ratio, to learn models in the presence of systematically missing tags. We show that our approach is both theoretically sound and empirically useful. Experimentally, we find that it meets or exceeds performance of strong and state-of-the-art baselines across a variety of languages, annotation scenarios, and amounts of labeled data. In particular, we find that it significantly outperforms the previous state-of-the-art methods from Mayhew et al. (2019) and Li et al. (2021) by +12.7 and +2.3 F1 score in a challenging setting with only 1,000 biased annotations, averaged across 7 datasets. We also show that, when combined with our approach, a novel sparse annotation scheme outperforms exhaustive annotation for modest annotation budgets.1 1320–1335 @@ -1148,8 +1148,8 @@ NoraKassner ShauliRavfogel AbhilashaRavichander - EduardHovy - HinrichSchütze + EduardHovy + HinrichSchütze YoavGoldberg 10.1162/tacl_x_00455 During production of this paper, an error was introduced to the formula on the bottom of the right column of page 1020. In the last two terms of the formula, the n and m subscripts were swapped. The correct formula is:Lc=∑n=1k∑m=n+1kDKL(Qnri∥Qmri)+DKL(Qmri∥Qnri)The paper has been updated. @@ -1161,7 +1161,7 @@ Self-Diagnosis and Self-Debiasing: A Proposal for Reducing Corpus-Based Bias in <fixed-case>NLP</fixed-case> TimoSchick SahanaUdupa - HinrichSchütze + HinrichSchütze 10.1162/tacl_a_00434 This paper contains prompts and model outputs that are offensive in nature. When trained on large, unfiltered crawls from the Internet, language models pick up and reproduce all kinds of undesirable biases that can be found in the data: They often generate racist, sexist, violent, or otherwise toxic language. As large models require millions of training examples to achieve good performance, it is difficult to completely prevent them from being exposed to such content. In this paper, we first demonstrate a surprising finding: Pretrained language models recognize, to a considerable degree, their undesirable biases and the toxicity of the content they produce. We refer to this capability as self-diagnosis. Based on this finding, we then propose a decoding algorithm that, given only a textual description of the undesired behavior, reduces the probability of a language model producing problematic text. We refer to this approach as self-debiasing. Self-debiasing does not rely on manually curated word lists, nor does it require any training data or changes to the model’s parameters. While we by no means eliminate the issue of language models generating biased text, we believe our approach to be an important step in this direction.1 1408–1424 @@ -1253,7 +1253,7 @@ DavidFrancis EllaRabinovich FarhanSamir - DavidMortensen + DavidMortensen SuzanneStevenson 10.1162/tacl_a_00441 We adopt an evolutionary view on language change in which cognitive factors (in addition to social ones) affect the fitness of words and their success in the linguistic ecosystem. Specifically, we propose a variety of psycholinguistic factors—semantic, distributional, and phonological—that we hypothesize are predictive of lexical decline, in which words greatly decrease in frequency over time. Using historical data across three languages (English, French, and German), we find that most of our proposed factors show a significant difference in the expected direction between each curated set of declining words and their matched stable words. Moreover, logistic regression analyses show that semantic and distributional factors are significant in predicting declining words. Further diachronic analysis reveals that declining words tend to decrease in the diversity of their lexical contexts over time, gradually narrowing their ‘ecological niches’. @@ -1276,7 +1276,7 @@ Word Representation Learning in Multimodal Pre-Trained Transformers: An Intrinsic Evaluation SandroPezzelle EceTakmaz - RaquelFernández + RaquelFernández 10.1162/tacl_a_00443 This study carries out a systematic intrinsic evaluation of the semantic representations learned by state-of-the-art pre-trained multimodal Transformers. These representations are claimed to be task-agnostic and shown to help on many downstream language-and-vision tasks. However, the extent to which they align with human semantic intuitions remains unclear. We experiment with various models and obtain static word representations from the contextualized ones they learn. We then evaluate them against the semantic judgments provided by human speakers. In line with previous evidence, we observe a generalized advantage of multimodal representations over language- only ones on concrete word pairs, but not on abstract ones. On the one hand, this confirms the effectiveness of these models to align language and vision, which results in better semantic representations for concepts that are grounded in images. On the other hand, models are shown to follow different representation learning patterns, which sheds some light on how and when they perform multimodal integration. 1563–1579 diff --git a/data/xml/2021.tal.xml b/data/xml/2021.tal.xml index c56b5275f9..adc44fc8e6 100644 --- a/data/xml/2021.tal.xml +++ b/data/xml/2021.tal.xml @@ -3,9 +3,9 @@ Traitement Automatique des Langues, Volume 62, Numéro 1 : Varia [Varia] - CécileFabre + CécileFabre EmmanuelMorin - SophieRosset + SophieRosset PascaleSébillot ATALA (Association pour le Traitement Automatique des Langues)
France
@@ -40,8 +40,8 @@ Traitement Automatique des Langues, Volume 62, Numéro 2 : Nouvelles applications du TAL [New applications in NLP] - GéraldineDamnati - DianaInkpen + GéraldineDamnati + DianaInkpen ATALA (Association pour le Traitement Automatique des Langues)
France
2021 @@ -77,7 +77,7 @@ Traitement Automatique des Langues, Volume 62, Numéro 3 : Diversité Linguistique [Linguistic Diversity in Natural Language Processing] AarneRanta - CyrilGoutte + CyrilGoutte ATALA (Association pour le Traitement Automatique des Langues)
France
2021 diff --git a/data/xml/2021.teachingnlp.xml b/data/xml/2021.teachingnlp.xml index 4d16a76578..d9103839b1 100644 --- a/data/xml/2021.teachingnlp.xml +++ b/data/xml/2021.teachingnlp.xml @@ -5,7 +5,7 @@ Proceedings of the Fifth Workshop on Teaching NLP DavidJurgens VaradaKolhatkar - LucyLi + LucyLi MargotMieskes TedPedersen Association for Computational Linguistics @@ -117,7 +117,7 @@
From back to the roots into the gated woods: Deep learning for <fixed-case>NLP</fixed-case> - BarbaraPlank + BarbaraPlank 59–61 Deep neural networks have revolutionized many fields, including Natural Language Processing. This paper outlines teaching materials for an introductory lecture on deep learning in Natural Language Processing (NLP). The main submitted material covers a summer school lecture on encoder-decoder models. Complementary to this is a set of jupyter notebook slides from earlier teaching, on which parts of the lecture were based on. The main goal of this teaching material is to provide an overview of neural network approaches to natural language processing, while linking modern concepts back to the roots showing traditional essential counterparts. The lecture departs from count-based statistical methods and spans up to gated recurrent networks and attention, which is ubiquitous in today’s NLP. 2021.teachingnlp-1.9 @@ -146,7 +146,7 @@ Gaining Experience with Structured Data: Using the Resources of Dialog State Tracking Challenge 2 - RonnieSmith + RonnieSmith 70–79 This paper describes a class project for a recently introduced undergraduate NLP course that gives computer science students the opportunity to explore the data of Dialog State Tracking Challenge 2 (DSTC 2). Student background, curriculum choices, and project details are discussed. The paper concludes with some instructor advice and final reflections. 2021.teachingnlp-1.12 @@ -185,7 +185,7 @@ Introducing Information Retrieval for Biomedical Informatics Students SanyaTaneja - RichardBoyce + RichardBoyce WilliamReynolds DenisNewman-Griffis 96–98 @@ -246,7 +246,7 @@ Natural Language Processing for Computer Scientists and Data Scientists at a Large State University - CaseyKennington + CaseyKennington 115–124 The field of Natural Language Processing (NLP) changes rapidly, requiring course offerings to adjust with those changes, and NLP is not just for computer scientists; it’s a field that should be accessible to anyone who has a sufficient background. In this paper, I explain how students with Computer Science and Data Science backgrounds can be well-prepared for an upper-division NLP course at a large state university. The course covers probability and information theory, elementary linguistics, machine and deep learning, with an attempt to balance theoretical ideas and concepts with practical applications. I explain the course objectives, topics and assignments, reflect on adjustments to the course over the last four years, as well as feedback from students. 2021.teachingnlp-1.21 @@ -275,7 +275,7 @@ The Online Pivot: Lessons Learned from Teaching a Text and Data Mining Course in Lockdown, Enhancing online Teaching with Pair Programming and Digital Badges - BeatriceAlex + BeatriceAlex ClareLlewellyn PawelOrzechowski MariaBoutchkova diff --git a/data/xml/2021.textgraphs.xml b/data/xml/2021.textgraphs.xml index 06f343542a..f9c5570d38 100644 --- a/data/xml/2021.textgraphs.xml +++ b/data/xml/2021.textgraphs.xml @@ -8,7 +8,7 @@ VarvaraLogacheva AbhikJana DmitryUstalov - PeterJansen + PeterJansen Association for Computational Linguistics
Mexico City, Mexico
June @@ -37,7 +37,7 @@ Leonardo F. R.Ribeiro PhilippDufter IrynaGurevych - HinrichSchütze + HinrichSchütze 10–21 We present Graformer, a novel Transformer-based encoder-decoder architecture for graph-to-text generation. With our novel graph self-attention, the encoding of a node relies on all nodes in the input graph - not only direct neighbors - facilitating the detection of global patterns. We represent the relation between two nodes as the length of the shortest path between them. Graformer learns to weight these node-node relations differently for different attention heads, thus virtually learning differently connected views of the input graph. We evaluate Graformer on two popular graph-to-text generation benchmarks, AGENDA and WebNLG, where it achieves strong performance while using many fewer parameters than other approaches. 2021.textgraphs-1.2 @@ -71,7 +71,7 @@ <fixed-case>GENE</fixed-case>: Global Event Network Embedding QiZeng ManlingLi - TuanLai + TuanLai HengJi MohitBansal HanghangTong @@ -84,8 +84,8 @@ Learning Clause Representation from Dependency-Anchor Graph for Connective Prediction YanjunGao - Ting-HaoHuang - Rebecca J.Passonneau + Ting-HaoHuang + Rebecca J.Passonneau 54–66 Semantic representation that supports the choice of an appropriate connective between pairs of clauses inherently addresses discourse coherence, which is important for tasks such as narrative understanding, argumentation, and discourse parsing. We propose a novel clause embedding method that applies graph learning to a data structure we refer to as a dependency-anchor graph. The dependency anchor graph incorporates two kinds of syntactic information, constituency structure, and dependency relations, to highlight the subject and verb phrase relation. This enhances coherence-related aspects of representation. We design a neural model to learn a semantic representation for clauses from graph convolution over latent representations of the subject and verb phrase. We evaluate our method on two new datasets: a subset of a large corpus where the source texts are published novels, and a new dataset collected from students’ essays. The results demonstrate a significant improvement over tree-based models, confirming the importance of emphasizing the subject and verb phrase. The performance gap between the two datasets illustrates the challenges of analyzing student’s written text, plus a potential evaluation task for coherence modeling and an application for suggesting revisions to students. 2021.textgraphs-1.6 @@ -107,7 +107,7 @@ Selective Attention Based Graph Convolutional Networks for Aspect-Level Sentiment Classification XiaochenHou - JingHuang + JingHuang GuangtaoWang PengQi XiaodongHe @@ -169,7 +169,7 @@ LiangMa Tanay KumarSaha DiLiu - JoelTetreault + JoelTetreault AlejandroJaimes 132–137 Recent works show that the graph structure of sentences, generated from dependency parsers, has potential for improving event detection. However, they often only leverage the edges (dependencies) between words, and discard the dependency labels (e.g., nominal-subject), treating the underlying graph edges as homogeneous. In this work, we propose a novel framework for incorporating both dependencies and their labels using a recently proposed technique called Graph Transformer Network (GTN). We integrate GTN to leverage dependency relations on two existing homogeneous-graph-based models and demonstrate an improvement in the F1 score on the ACE dataset. @@ -180,7 +180,7 @@ Fine-grained General Entity Typing in <fixed-case>G</fixed-case>erman using <fixed-case>G</fixed-case>erma<fixed-case>N</fixed-case>et SabineWeber - MarkSteedman + MarkSteedman 138–143 Fine-grained entity typing is important to tasks like relation extraction and knowledge base construction. We find however, that fine-grained entity typing systems perform poorly on general entities (e.g. “ex-president”) as compared to named entities (e.g. “Barack Obama”). This is due to a lack of general entities in existing training data sets. We show that this problem can be mitigated by automatically generating training data from WordNets. We use a German WordNet equivalent, GermaNet, to automatically generate training data for German general entity typing. We use this data to supplement named entity data to train a neural fine-grained entity typing system. This leads to a 10% improvement in accuracy of the prediction of level 1 FIGER types for German general entities, while decreasing named entity type prediction accuracy by only 1%. 2021.textgraphs-1.14 diff --git a/data/xml/2021.tlt.xml b/data/xml/2021.tlt.xml index cf857f8ad5..2cd19673d7 100644 --- a/data/xml/2021.tlt.xml +++ b/data/xml/2021.tlt.xml @@ -5,7 +5,7 @@ Proceedings of the 20th International Workshop on Treebanks and Linguistic Theories (TLT, SyntaxFest 2021) DanielDakota KilianEvang - SandraKübler + SandraKübler Association for Computational Linguistics
Sofia, Bulgaria
December @@ -38,7 +38,7 @@ Is <fixed-case>O</fixed-case>ld <fixed-case>F</fixed-case>rench tougher to parse? LoïcGrobol SophiePrévost - BenoîtCrabbé + BenoîtCrabbé 27–34 2021.tlt-1.3 grobol-etal-2021-old @@ -75,7 +75,7 @@ How Universal is Genre in <fixed-case>U</fixed-case>niversal <fixed-case>D</fixed-case>ependencies? MaxMüller-Eberstein Robvan der Goot - BarbaraPlank + BarbaraPlank 69–85 2021.tlt-1.7 muller-eberstein-etal-2021-universal @@ -100,7 +100,7 @@ Discourse Tree Structure and Dependency Distance in <fixed-case>EFL</fixed-case> Writing JingtingYuan QiuhanLin - John S. Y.Lee + John S. Y.Lee 105–115 2021.tlt-1.10 yuan-etal-2021-discourse diff --git a/data/xml/2021.triton.xml b/data/xml/2021.triton.xml index 8f22f58a96..00b870d5a1 100644 --- a/data/xml/2021.triton.xml +++ b/data/xml/2021.triton.xml @@ -3,7 +3,7 @@ Proceedings of the Translation and Interpreting Technology Online Conference - RuslanMitkov + RuslanMitkov VilelminiSosoni Julie ChristineGiguère ElenaMurgolo @@ -37,7 +37,7 @@
Interpreting and Technology: Is the Sky Really the Limit? - GloriaCorpas Pastor + GloriaCorpas Pastor 15–24 Nowadays there is a pressing need to develop interpreting-related technolo-gies, with practitioners and other end-users increasingly calling for tools tai-lored to their needs and their new interpreting scenarios. But, at the same time, interpreting as a human activity has resisted complete automation for various reasons, such as fear, unawareness, communication complexities, lack of dedicated tools, etc. Several computer-assisted interpreting tools and resources for interpreters have been developed, although they are rather modest in terms of the sup-port they provide. In the same vein, and despite the pressing need to aiding in multilingual mediation, machine interpreting is still under development, with the exception of a few success stories. This paper will present the results of VIP, a R&D project on language technologies applied to interpreting. It is the ‘seed’ of a family of projects on interpreting technologies which are currently being developed or have just been completed at the Research Institute of Multilingual Language Technol-ogies (IUITLM), University of Malaga. 2021.triton-1.3 @@ -56,7 +56,7 @@ DespoinaMouratidis MariaStasimioti VilelminiSosoni - Katia LidaKermanidis + Katia LidaKermanidis 37–47 Due to the wide-spread development of Machine Translation (MT) systems –especially Neural Machine Translation (NMT) systems– MT evaluation, both automatic and human, has become more and more important as it helps us establish how MT systems perform. Yet, automatic evaluation metrics have lagged behind, as the most popular choices (e.g., BLEU, METEOR and ROUGE) may correlate poorly with human judgments. This paper seeks to put to the test an evaluation model based on a novel deep learning schema (NoDeeLe) used to compare two NMT systems on four different text genres, i.e. medical, legal, marketing and literary in the English-Greek language pair. The model utilizes information from the source segments, the MT outputs and the reference translation, as well as the automatic metrics BLEU, METEOR and WER. The proposed schema achieves a strong correlation with human judgment (78% average accuracy for the four texts with the highest accuracy, i.e. 85%, observed in the case of the marketing text), while it outperforms classic machine learning algorithms and automatic metrics. 2021.triton-1.5 @@ -65,7 +65,7 @@ <fixed-case>BLEU</fixed-case>, <fixed-case>METEOR</fixed-case>, <fixed-case>BERTS</fixed-case>core: Evaluation of Metrics Performance in Assessing Critical Translation Errors in Sentiment-Oriented Text HadeelSaadany - ConstantinOrasan + ConstantinOrasan 48–56 Social media companies as well as censorship authorities make extensive use of artificial intelligence (AI) tools to monitor postings of hate speech, celebrations of violence or profanity. Since AI software requires massive volumes of data to train computers, automatic-translation of the online content is usually implemented to compensate for the scarcity of text in some languages. However, machine translation (MT) mistakes are a regular occurrence when translating sentiment-oriented user-generated content (UGC), especially when a low-resource language is involved. In such scenarios, the adequacy of the whole process relies on the assumption that the translation can be evaluated correctly. In this paper, we assess the ability of automatic quality metrics to detect critical machine translation errors which can cause serious misunderstanding of the affect message. We compare the performance of three canonical metrics on meaningless translations as compared to meaningful translations with a critical error that distorts the overall sentiment of the source text. We demonstrate the need for the fine-tuning of automatic metrics to make them more robust in detecting sentiment critical errors. 2021.triton-1.6 @@ -101,7 +101,7 @@ Cross-Lingual Named Entity Recognition via <fixed-case>F</fixed-case>ast<fixed-case>A</fixed-case>lign: a Case Study AliHatami RuslanMitkov - GloriaCorpas Pastor + GloriaCorpas Pastor 85–92 Named Entity Recognition is an essential task in natural language processing to detect entities and classify them into predetermined categories. An entity is a meaningful word, or phrase that refers to proper nouns. Named Entities play an important role in different NLP tasks such as Information Extraction, Question Answering and Machine Translation. In Machine Translation, named entities often cause translation failures regardless of local context, affecting the output quality of translation. Annotating named entities is a time-consuming and expensive process especially for low-resource languages. One solution for this problem is to use word alignment methods in bilingual parallel corpora in which just one side has been annotated. The goal is to extract named entities in the target language by using the annotated corpus of the source language. In this paper, we compare the performance of two alignment methods, Grow-diag-final-and and Intersect Symmetrisation heuristics, to exploit the annotation projection of English-Brazilian Portuguese bilingual corpus to detect named entities in Brazilian Portuguese. A NER model that is trained on annotated data extracted from the alignment methods, is used to evaluate the performance of aligners. Experimental results show the Intersect Symmetrisation is able to achieve superior performance scores compared to the Grow-diag-final-and heuristic in Brazilian Portuguese. 2021.triton-1.10 @@ -120,7 +120,7 @@ <fixed-case>S</fixed-case>mar<fixed-case>T</fixed-case>erp: A <fixed-case>CAI</fixed-case> System to Support Simultaneous Interpreters in Real-Time SusanaRodriguez RobertoGretter - MarcoMatassoni + MarcoMatassoni AlvaroAlonso OscarCorcho MarianoRico @@ -202,7 +202,7 @@ <fixed-case>M</fixed-case>ulti<fixed-case>T</fixed-case>rai<fixed-case>NMT</fixed-case>: Training Materials to Approach Neural Machine Translation from Scratch - GemaRamírez-Sánchez + GemaRamírez-Sánchez Juan AntonioPérez-Ortiz FelipeSánchez-Martínez CarolineRossi diff --git a/data/xml/2021.udw.xml b/data/xml/2021.udw.xml index b59ec3de9f..fa6a3027fa 100644 --- a/data/xml/2021.udw.xml +++ b/data/xml/2021.udw.xml @@ -19,7 +19,7 @@ Formae reformandae: for a reorganisation of verb form annotation in <fixed-case>U</fixed-case>niversal <fixed-case>D</fixed-case>ependencies illustrated by the specific case of <fixed-case>L</fixed-case>atin - Flavio MassimilianoCecchini + Flavio MassimilianoCecchini 1–15 2021.udw-1.1 cecchini-2021-formae @@ -133,7 +133,7 @@ Date and Time in <fixed-case>U</fixed-case>niversal <fixed-case>D</fixed-case>ependencies - DanielZeman + DanielZeman 173–193 2021.udw-1.15 zeman-2021-date diff --git a/data/xml/2021.vardial.xml b/data/xml/2021.vardial.xml index 19319a8dc4..834156923c 100644 --- a/data/xml/2021.vardial.xml +++ b/data/xml/2021.vardial.xml @@ -4,9 +4,9 @@ Proceedings of the Eighth Workshop on NLP for Similar Languages, Varieties and Dialects MarcosZampieri - PreslavNakov + PreslavNakov NikolaLjubešić - JörgTiedemann + JörgTiedemann YvesScherrer TommiJauhiainen Association for Computational Linguistics @@ -26,7 +26,7 @@ Radu TudorIonescu HeidiJauhiainen TommiJauhiainen - KristerLindén + KristerLindén NikolaLjubešić NikoPartanen RubaPriyadharshini @@ -56,7 +56,7 @@ GabriellaLapesa ReemAlatrash DominikSchlechtweg - SabineSchulte im Walde + SabineSchulte im Walde 21–27 Kiezdeutsch is a variety of German predominantly spoken by teenagers from multi-ethnic urban neighborhoods in casual conversations with their peers. In recent years, the popularity of Kiezdeutsch has increased among young people, independently of their socio-economic origin, and has spread in social media, too. While previous studies have extensively investigated this language variety from a linguistic and qualitative perspective, not much has been done from a quantitative point of view. We perform the first large-scale data-driven analysis of the lexical and morpho-syntactic properties of Kiezdeutsch in comparison with standard German. At the level of results, we confirm predictions of previous qualitative analyses and integrate them with further observations on specific linguistic phenomena such as slang and self-centered speaker attitude. At the methodological level, we provide logistic regression as a framework to perform bottom-up feature selection in order to quantify differences across language varieties. 2021.vardial-1.3 @@ -103,7 +103,7 @@ Discriminating Between Similar <fixed-case>N</fixed-case>ordic Languages RenéHaas - LeonDerczynski + LeonDerczynski 67–75 Automatic language identification is a challenging problem. Discriminating between closely related languages is especially difficult. This paper presents a machine learning approach for automatic language identification for the Nordic languages, which often suffer miscategorisation by existing state-of-the-art tools. Concretely we will focus on discrimination between six Nordic languages: Danish, Swedish, Norwegian (Nynorsk), Norwegian (Bokmål), Faroese and Icelandic. 2021.vardial-1.8 @@ -113,7 +113,7 @@ Naive <fixed-case>B</fixed-case>ayes-based Experiments in <fixed-case>R</fixed-case>omanian Dialect Identification TommiJauhiainen HeidiJauhiainen - KristerLindén + KristerLindén 76–83 This article describes the experiments and systems developed by the SUKI team for the second edition of the Romanian Dialect Identification (RDI) shared task which was organized as part of the 2021 VarDial Evaluation Campaign. We submitted two runs to the shared task and our second submission was the overall best submission by a noticeable margin. Our best submission used a character n-gram based naive Bayes classifier with adaptive language models. We describe our experiments on the development set leading to both submissions. 2021.vardial-1.9 @@ -170,7 +170,7 @@ N-gram and Neural Models for <fixed-case>U</fixed-case>ralic Language Identification: <fixed-case>NRC</fixed-case> at <fixed-case>V</fixed-case>ar<fixed-case>D</fixed-case>ial 2021 GabrielBernier-Colborne SergeLeger - CyrilGoutte + CyrilGoutte 128–134 We describe the systems developed by the National Research Council Canada for the Uralic language identification shared task at the 2021 VarDial evaluation campaign. We evaluated two different approaches to this task: a probabilistic classifier exploiting only character 5-grams as features, and a character-based neural network pre-trained through self-supervision, then fine-tuned on the language identification task. The former method turned out to perform better, which casts doubt on the usefulness of deep learning methods for language identification, where they have yet to convincingly and consistently outperform simpler and less costly classification algorithms exploiting n-gram features. 2021.vardial-1.15 diff --git a/data/xml/2021.wanlp.xml b/data/xml/2021.wanlp.xml index 93c0bb03e4..dc4cd9cd2a 100644 --- a/data/xml/2021.wanlp.xml +++ b/data/xml/2021.wanlp.xml @@ -73,7 +73,7 @@ Kawarith: an <fixed-case>A</fixed-case>rabic <fixed-case>T</fixed-case>witter Corpus for Crisis Events AlaaAlharbi - MarkLee + MarkLee 42–52 Social media (SM) platforms such as Twitter provide large quantities of real-time data that can be leveraged during mass emergencies. Developing tools to support crisis-affected communities requires available datasets, which often do not exist for low resource languages. This paper introduces Kawarith a multi-dialect Arabic Twitter corpus for crisis events, comprising more than a million Arabic tweets collected during 22 crises that occurred between 2018 and 2020 and involved several types of hazard. Exploration of this content revealed the most discussed topics and information types, and the paper presents a labelled dataset from seven emergency events that serves as a gold standard for several tasks in crisis informatics research. Using annotated data from the same event, a BERT model is fine-tuned to classify tweets into different categories in the multi- label setting. Results show that BERT-based models yield good performance on this task even with small amounts of task-specific training data. 2021.wanlp-1.5 @@ -91,7 +91,7 @@ <fixed-case>A</fixed-case>rabic Emoji Sentiment Lexicon (<fixed-case>A</fixed-case>rab-<fixed-case>ESL</fixed-case>): A Comparison between <fixed-case>A</fixed-case>rabic and <fixed-case>E</fixed-case>uropean Emoji Sentiment Lexicons Shatha Ali A.Hakami - RobertHendley + RobertHendley PhillipSmith 60–71 Emoji (the popular digital pictograms) are sometimes seen as a new kind of artificial and universally usable and consistent writing code. In spite of their assumed universality, there is some evidence that the sense of an emoji, specifically in regard to sentiment, may change from language to language and culture to culture. This paper investigates whether contextual emoji sentiment analysis is consistent across Arabic and European languages. To conduct this investigation, we, first, created the Arabic emoji sentiment lexicon (Arab-ESL). Then, we exploited an existing European emoji sentiment lexicon to compare the sentiment conveyed in each of the two families of language and culture (Arabic and European). The results show that the pairwise correlation between the two lexicons is consistent for emoji that represent, for instance, hearts, facial expressions, and body language. However, for a subset of emoji (those that represent objects, nature, symbols, and some human activities), there are large differences in the sentiment conveyed. More interestingly, an extremely high level of inconsistency has been shown with food emoji. @@ -223,7 +223,7 @@ Quranic Verses Semantic Relatedness Using <fixed-case>A</fixed-case>ra<fixed-case>BERT</fixed-case> AbdullahAlsaleh - EricAtwell + EricAtwell AbdulrahmanAltahhan 185–190 Bidirectional Encoder Representations from Transformers (BERT) has gained popularity in recent years producing state-of-the-art performances across Natural Language Processing tasks. In this paper, we used AraBERT language model to classify pairs of verses provided by the QurSim dataset to either be semantically related or not. We have pre-processed The QurSim dataset and formed three datasets for comparisons. Also, we have used both versions of AraBERT, which are AraBERTv02 and AraBERTv2, to recognise which version performs the best with the given datasets. The best results was AraBERTv02 with 92% accuracy score using a dataset comprised of label ‘2’ and label '-1’, the latter was generated outside of QurSim dataset. @@ -324,7 +324,7 @@ Adapting <fixed-case>MARBERT</fixed-case> for Improved <fixed-case>A</fixed-case>rabic Dialect Identification: Submission to the <fixed-case>NADI</fixed-case> 2021 Shared Task BadrAlKhamissi MohamedGabr - MuhammadElNokrashy + MuhammadElNokrashy KhaledEssam 260–264 In this paper, we tackle the Nuanced Arabic Dialect Identification (NADI) shared task (Abdul-Mageed et al., 2021) and demonstrate state-of-the-art results on all of its four subtasks. Tasks are to identify the geographic origin of short Dialectal (DA) and Modern Standard Arabic (MSA) utterances at the levels of both country and province. Our final model is an ensemble of variants built on top of MARBERT that achieves an F1-score of 34.03% for DA at the country-level development set—an improvement of 7.63% from previous work. @@ -357,7 +357,7 @@ ElsayedIssa MohammedAlShakhori1 RedaAl-Bahrani - GusHahn-Powell + GusHahn-Powell 276–281 This work investigates the value of augmenting recurrent neural networks with feature engineering for the Second Nuanced Arabic Dialect Identification (NADI) Subtask 1.2: Country-level DA identification. We compare the performance of a simple word-level LSTM using pretrained embeddings with one enhanced using feature embeddings for engineered linguistic features. Our results show that the addition of explicit features to the LSTM is detrimental to performance. We attribute this performance loss to the bivalency of some linguistic items in some text, ubiquity of topics, and participant mobility. 2021.wanlp-1.32 @@ -378,7 +378,7 @@ Machine Learning-Based Approach for <fixed-case>A</fixed-case>rabic Dialect Identification HamadaNayel - AhmedHassan + AhmedHassan MahmoudSobhi AhmedEl-Sawy 287–290 @@ -424,7 +424,7 @@ Multi-task Learning Using a Combination of Contextualised and Static Word Embeddings for <fixed-case>A</fixed-case>rabic Sarcasm Detection and Sentiment Analysis Abdullah I.Alharbi - MarkLee + MarkLee 318–322 Sarcasm detection and sentiment analysis are important tasks in Natural Language Understanding. Sarcasm is a type of expression where the sentiment polarity is flipped by an interfering factor. In this study, we exploited this relationship to enhance both tasks by proposing a multi-task learning approach using a combination of static and contextualised embeddings. Our proposed system achieved the best result in the sarcasm detection subtask. 2021.wanlp-1.39 @@ -508,7 +508,7 @@ Leveraging Offensive Language for Sarcasm and Sentiment Detection in <fixed-case>A</fixed-case>rabic FatemahHusain - OzlemUzuner + OzlemUzuner 364–369 Sarcasm detection is one of the top challenging tasks in text classification, particularly for informal Arabic with high syntactic and semantic ambiguity. We propose two systems that harness knowledge from multiple tasks to improve the performance of the classifier. This paper presents the systems used in our participation to the two sub-tasks of the Sixth Arabic Natural Language Processing Workshop (WANLP); Sarcasm Detection and Sentiment Analysis. Our methodology is driven by the hypothesis that tweets with negative sentiment and tweets with sarcasm content are more likely to have offensive content, thus, fine-tuning the classification model using large corpus of offensive language, supports the learning process of the model to effectively detect sentiment and sarcasm contents. Results demonstrate the effectiveness of our approach for sarcasm detection task over sentiment analysis task. 2021.wanlp-1.47 diff --git a/data/xml/2021.wassa.xml b/data/xml/2021.wassa.xml index 23188fd70b..2a18afbf7c 100644 --- a/data/xml/2021.wassa.xml +++ b/data/xml/2021.wassa.xml @@ -3,13 +3,13 @@ Proceedings of the Eleventh Workshop on Computational Approaches to Subjectivity, Sentiment and Social Media Analysis - OrpheeDe Clercq - AlexandraBalahur + OrpheeDe Clercq + AlexandraBalahur JoaoSedoc ValentinBarriere ShabnamTafreshi - SvenBuechel - VeroniqueHoste + SvenBuechel + VeroniqueHoste Association for Computational Linguistics
Online
April @@ -65,7 +65,7 @@ Emotion Ratings: How Intensity, Annotation Confidence and Agreements are Entangled EnricaTroiano - SebastianPadó + SebastianPadó RomanKlinger 40–49 When humans judge the affective content of texts, they also implicitly assess the correctness of such judgment, that is, their confidence. We hypothesize that people’s (in)confidence that they performed well in an annotation task leads to (dis)agreements among each other. If this is true, confidence may serve as a diagnostic tool for systematic differences in annotations. To probe our assumption, we conduct a study on a subset of the Corpus of Contemporary American English, in which we ask raters to distinguish neutral sentences from emotion-bearing ones, while scoring the confidence of their answers. Confidence turns out to approximate inter-annotator disagreements. Further, we find that confidence is correlated to emotion intensity: perceiving stronger affect in text prompts annotators to more certain classification performances. This insight is relevant for modelling studies of intensity, as it opens the question wether automatic regressors or classifiers actually predict intensity, or rather human’s self-perceived confidence. @@ -75,7 +75,7 @@ Disentangling Document Topic and Author Gender in Multiple Languages: Lessons for Adversarial Debiasing ErenayDayanik - SebastianPadó + SebastianPadó 50–61 Text classification is a central tool in NLP. However, when the target classes are strongly correlated with other textual attributes, text classification models can pick up “wrong” features, leading to bad generalization and biases. In social media analysis, this problem surfaces for demographic user classes such as language, topic, or gender, which influence the generate text to a substantial extent. Adversarial training has been claimed to mitigate this problem, but thorough evaluation is missing. In this paper, we experiment with text classification of the correlated attributes of document topic and author gender, using a novel multilingual parallel corpus of TED talk transcripts. Our findings are: (a) individual classifiers for topic and author gender are indeed biased; (b) debiasing with adversarial training works for topic, but breaks down for author gender; (c) gender debiasing results differ across languages. We interpret the result in terms of feature space overlap, highlighting the role of linguistic surface realization of the target classes. 2021.wassa-1.6 @@ -183,7 +183,7 @@ IliaMarkov NikolaLjubešić DarjaFišer - WalterDaelemans + WalterDaelemans 149–159 In this paper, we describe experiments designed to evaluate the impact of stylometric and emotion-based features on hate speech detection: the task of classifying textual content into hate or non-hate speech classes. Our experiments are conducted for three languages – English, Slovene, and Dutch – both in in-domain and cross-domain setups, and aim to investigate hate speech using features that model two linguistic phenomena: the writing style of hateful social media content operationalized as function word usage on the one hand, and emotion expression in hateful messages on the other hand. The results of experiments with features that model different combinations of these phenomena support our hypothesis that stylometric and emotion-based features are robust indicators of hate speech. Their contribution remains persistent with respect to domain and language variation. We show that the combination of features that model the targeted phenomena outperforms words and character n-gram features under cross-domain conditions, and provides a significant boost to deep learning models, which currently obtain the best results, when combined with them in an ensemble. 2021.wassa-1.16 diff --git a/data/xml/2021.wat.xml b/data/xml/2021.wat.xml index ae146c316a..54a0409606 100644 --- a/data/xml/2021.wat.xml +++ b/data/xml/2021.wat.xml @@ -14,14 +14,14 @@ HiroshiManabe Win PaPa ShantipriyaParida - OndřejBojar + OndřejBojar ChenhuiChu AkikoEriguchi KaoriAbe YusukeOda KatsuhitoSudoh SadaoKurohashi - PushpakBhattacharyya + PushpakBhattacharyya Association for Computational Linguistics
Online
August @@ -85,7 +85,7 @@ <fixed-case>NICT</fixed-case>’s Neural Machine Translation Systems for the <fixed-case>WAT</fixed-case>21 Restricted Translation Task ZuchaoLi MasaoUtiyama - EiichiroSumita + EiichiroSumita HaiZhao 62–67 This paper describes our system (Team ID: nictrb) for participating in the WAT’21 restricted machine translation task. In our submitted system, we designed a new training approach for restricted machine translation. By sampling from the translation target, we can solve the problem that ordinary training data does not have a restricted vocabulary. With the further help of constrained decoding in the inference phase, we achieved better results than the baseline, confirming the effectiveness of our solution. In addition, we also tried the vanilla and sparse Transformer as the backbone network of the model, as well as model ensembling, which further improved the final translation performance. @@ -140,7 +140,7 @@ <fixed-case>NICT</fixed-case>-2 Translation System at <fixed-case>WAT</fixed-case>-2021: Applying a Pretrained Multilingual Encoder-Decoder Model to Low-resource Language Pairs KenjiImamura - EiichiroSumita + EiichiroSumita 90–95 In this paper, we present the NICT system (NICT-2) submitted to the NICT-SAP shared task at the 8th Workshop on Asian Translation (WAT-2021). A feature of our system is that we used a pretrained multilingual BART (Bidirectional and Auto-Regressive Transformer; mBART) model. Because publicly available models do not support some languages in the NICT-SAP task, we added these languages to the mBART model and then trained it using monolingual corpora extracted from Wikipedia. We fine-tuned the expanded mBART model using the parallel corpora specified by the NICT-SAP task. The BLEU scores greatly improved in comparison with those of systems without the pretrained model, including the additional languages. 2021.wat-1.8 @@ -168,7 +168,7 @@ ChanheeLee HyeonseokMoon SugyeongEo - HeuiseokLim + HeuiseokLim 106–116 With the growing popularity of smart speakers, such as Amazon Alexa, speech is becoming one of the most important modes of human-computer interaction. Automatic speech recognition (ASR) is arguably the most critical component of such systems, as errors in speech recognition propagate to the downstream components and drastically degrade the user experience. A simple and effective way to improve the speech recognition accuracy is to apply automatic post-processor to the recognition result. However, training a post-processor requires parallel corpora created by human annotators, which are expensive and not scalable. To alleviate this problem, we propose Back TranScription (BTS), a denoising-based method that can create such corpora without human labor. Using a raw corpus, BTS corrupts the text using Text-to-Speech (TTS) and Speech-to-Text (STT) systems. Then, a post-processing model can be trained to reconstruct the original text given the corrupted input. Quantitative and qualitative evaluations show that a post-processor trained using our approach is highly effective in fixing non-trivial speech recognition errors such as mishandling foreign words. We present the generated parallel corpus and post-processing platform to make our results publicly available. 2021.wat-1.10 @@ -252,7 +252,7 @@ Abdullah Faiz Ur RahmanKhilji DarshKaushik ParthaPakray - SivajiBandyopadhyay + SivajiBandyopadhyay 155–160 Machine translation performs automatic translation from one natural language to another. Neural machine translation attains a state-of-the-art approach in machine translation, but it requires adequate training data, which is a severe problem for low-resource language pairs translation. The concept of multimodal is introduced in neural machine translation (NMT) by merging textual features with visual features to improve low-resource pair translation. WAT2021 (Workshop on Asian Translation 2021) organizes a shared task of multimodal translation for English to Hindi. We have participated the same with team name CNLP-NITS-PP in two submissions: multimodal and text-only NMT. This work investigates phrase pairs injection via data augmentation approach and attains improvement over our previous work at WAT2020 on the same task in both text-only and multimodal NMT. We have achieved second rank on the challenge test set for English to Hindi multimodal translation where Bilingual Evaluation Understudy (BLEU) score of 39.28, Rank-based Intuitive Bilingual Evaluation Score (RIBES) 0.792097, and Adequacy-Fluency Metrics (AMFM) score 0.830230 respectively. 2021.wat-1.17 @@ -298,7 +298,7 @@ Optimal Word Segmentation for Neural Machine Translation into <fixed-case>D</fixed-case>ravidian Languages PrajitDhar AriannaBisazza - Gertjanvan Noord + Gertjanvan Noord 181–190 Dravidian languages, such as Kannada and Tamil, are notoriously difficult to translate by state-of-the-art neural models. This stems from the fact that these languages are morphologically very rich as well as being low-resourced. In this paper, we focus on subword segmentation and evaluate Linguistically Motivated Vocabulary Reduction (LMVR) against the more commonly used SentencePiece (SP) for the task of translating from English into four different Dravidian languages. Additionally we investigate the optimal subword vocabulary size for each language. We find that SP is the overall best choice for segmentation, and that larger dictionary sizes lead to higher translation quality. 2021.wat-1.21 @@ -310,7 +310,7 @@ RahulAralikatte Miryamde Lhoneux AnoopKunchukuttan - AndersSøgaard + AndersSøgaard 191–197 This work introduces Itihasa, a large-scale translation dataset containing 93,000 pairs of Sanskrit shlokas and their English translations. The shlokas are extracted from two Indian epics viz., The Ramayana and The Mahabharata. We first describe the motivation behind the curation of such a dataset and follow up with empirical analysis to bring out its nuances. We then benchmark the performance of standard translation models on this corpus and show that even state-of-the-art transformer architectures perform poorly, emphasizing the complexity of the dataset. 2021.wat-1.22 @@ -336,7 +336,7 @@ Miryamde Lhoneux DanielHershcovich MarcelBollmann - AndersSøgaard + AndersSøgaard 205–211 This work shows that competitive translation results can be obtained in a constrained setting by incorporating the latest advances in memory and compute optimization. We train and evaluate large multilingual translation models using a single GPU for a maximum of 100 hours and get within 4-5 BLEU points of the top submission on the leaderboard. We also benchmark standard baselines on the PMI corpus and re-discover well-known shortcomings of translation systems and metrics. 2021.wat-1.24 @@ -347,7 +347,7 @@ <fixed-case>IIIT</fixed-case> Hyderabad Submission To <fixed-case>WAT</fixed-case> 2021: Efficient Multilingual <fixed-case>NMT</fixed-case> systems for <fixed-case>I</fixed-case>ndian languages SouravKumar SalilAggarwal - DiptiSharma + DiptiSharma 212–216 This paper describes the work and the systems submitted by the IIIT-Hyderbad team in the WAT 2021 MultiIndicMT shared task. The task covers 10 major languages of the Indian subcontinent. For the scope of this task, we have built multilingual systems for 20 translation directions namely English-Indic (one-to- many) and Indic-English (many-to-one). Individually, Indian languages are resource poor which hampers translation quality but by leveraging multilingualism and abundant monolingual corpora, the translation quality can be substantially boosted. But the multilingual systems are highly complex in terms of time as well as computational resources. Therefore, we are training our systems by efficiently se- lecting data that will actually contribute to most of the learning process. Furthermore, we are also exploiting the language related- ness found in between Indian languages. All the comparisons were made using BLEU score and we found that our final multilingual sys- tem significantly outperforms the baselines by an average of 11.3 and 19.6 BLEU points for English-Indic (en-xx) and Indic-English (xx- en) directions, respectively. 2021.wat-1.25 diff --git a/data/xml/2021.winlp.xml b/data/xml/2021.winlp.xml index 39ed1435db..70e3d5aabf 100644 --- a/data/xml/2021.winlp.xml +++ b/data/xml/2021.winlp.xml @@ -10,7 +10,7 @@ KyathiChandu XandaSchofield SurangikaRanathunga - HaleyLepp + HaleyLepp TirthankarGhosal Association for Computational Linguistics
Punta Cana, Dominican Republic
@@ -55,7 +55,7 @@ DianaTodea LiviuFodor AndreeaLuca - Liviu P.Dinu + Liviu P.Dinu RareșBoian 21–24 In the current study, we analyzed 15297 texts from 39 cancer survivors who posted or commented on Reddit in order to detect the language particularities of cancer survivors from online discourse. We performed a computational linguistic analysis (part-of-speech analysis, emoji detection, sentiment analysis) on submissions around the time of the cancer diagnosis and around the time of remission. We found several significant differences in the texts posted around the time of remission compared to those around the time of diagnosis. Though our results need to be backed up by a higher corpus of data, they do cue to the fact that cancer survivors, around the time of remission, focus more on others, are more active on social media, and do not see the glass as half empty as suggested by the valence of the emojis. diff --git a/data/xml/2021.wmt.xml b/data/xml/2021.wmt.xml index 2df80ca98f..667a5dd0fd 100644 --- a/data/xml/2021.wmt.xml +++ b/data/xml/2021.wmt.xml @@ -4,23 +4,23 @@ Proceedings of the Sixth Conference on Machine Translation LoicBarrault - OndrejBojar + OndrejBojar FethiBougares - RajenChatterjee + RajenChatterjee Marta R.Costa-jussa ChristianFedermann MarkFishel - AlexanderFraser + AlexanderFraser MarkusFreitag YvetteGraham RomanGrundkiewicz PacoGuzman BarryHaddow MatthiasHuck - Antonio JimenoYepes + Antonio JimenoYepes PhilippKoehn TomKocmi - AndreMartins + AndreMartins MakotoMorishita ChristofMonz Association for Computational Linguistics @@ -66,7 +66,7 @@ MasaakiNagata AjayNagesh ToshiakiNakazawa - MatteoNegri + MatteoNegri SantanuPal Allahsera AugusteTapo MarcoTurchi @@ -87,7 +87,7 @@ SomyaJain DouweKiela TristanThrush - FranciscoGuzmán + FranciscoGuzmán 89–99 We present the results of the first task on Large-Scale Multilingual Machine Translation. The task consists on the many-to-many evaluation of a single model across a variety of source and target languages. This year, the task consisted on three different settings: (i) SMALL-TASK1 (Central/South-Eastern European Languages), (ii) the SMALL-TASK2 (South-East Asian Languages), and (iii) FULL-TASK (all 101 x 100 language pairs). All the tasks used the FLORES-101 dataset as the evaluation benchmark. To ensure the longevity of the dataset, the test sets were not publicly released and the models were evaluated in a controlled environment on Dynabench. There were a total of 10 participating teams for the tasks, with a total of 151 intermediate model submissions and 13 final models. This year’s result show a significant improvement over the known base-lines with +17.8 BLEU for SMALL-TASK2, +10.6 for FULL-TASK and +3.6 for SMALL-TASK1. 2021.wmt-1.2 @@ -105,11 +105,11 @@ The <fixed-case>U</fixed-case>niversity of <fixed-case>E</fixed-case>dinburgh’s <fixed-case>E</fixed-case>nglish-<fixed-case>G</fixed-case>erman and <fixed-case>E</fixed-case>nglish-<fixed-case>H</fixed-case>ausa Submissions to the <fixed-case>WMT</fixed-case>21 News Translation Task PinzhenChen - JindřichHelcl + JindřichHelcl UlrichGermann LaurieBurchell NikolayBogoychev - Antonio ValerioMiceli Barone + Antonio ValerioMiceli Barone JonasWaldendorf AlexandraBirch KennethHeafield @@ -122,7 +122,7 @@ Tune in: The <fixed-case>AFRL</fixed-case> <fixed-case>WMT</fixed-case>21 News-Translation Systems GrantErdmann JeremyGwinnup - TimAnderson + TimAnderson 110–116 This paper describes the Air Force Research Laboratory (AFRL) machine translation sys- tems and the improvements that were developed during the WMT21 evaluation campaign. This year, we explore various methods of adapting our baseline models from WMT20 and again measure improvements in performance on the Russian–English language pair. 2021.wmt-1.5 @@ -135,7 +135,7 @@ ChristineBasta JavierFerrando Marta R.Costa-jussa - José A. R.Fonollosa + José A. R.Fonollosa 117–122 This paper describes the submission to the WMT 2021 news translation shared task by the UPC Machine Translation group. The goal of the task is to translate German to French (De-Fr) and French to German (Fr-De). Our submission focuses on fine-tuning a pre-trained model to take advantage of monolingual data. We fine-tune mBART50 using the filtered data, and additionally, we train a Transformer model on the same data from scratch. In the experiments, we show that fine-tuning mBART50 results in 31.69 BLEU for De-Fr and 23.63 BLEU for Fr-De, which increases 2.71 and 1.90 BLEU accordingly, as compared to the model we train from scratch. Our final submission is an ensemble of these two models, further increasing 0.3 BLEU for Fr-De. 2021.wmt-1.6 @@ -160,7 +160,7 @@ Jailan S.ElMosalami MohamedAfify Ahmed Y.Tawfik - HanyHassan Awadalla + HanyHassan Awadalla 130–135 This paper describes the Microsoft Egypt Development Center (EgDC) submission to the constrained track of WMT21 shared news translation task. We focus on the three relatively low resource language pairs Bengali ↔ Hindi, English ↔ Hausa and Xhosa ↔ Zulu. To overcome the limitation of relatively low parallel data we train a multilingual model using a multitask objective employing both parallel and monolingual data. In addition, we augment the data using back translation. We also train a bilingual model incorporating back translation and knowledge distillation then combine the two models using sequence-to-sequence mapping. We see around 70% relative gain in BLEU point for En ↔ Ha and around 25% relative improvements for Bn ↔ Hi and Xh ↔ Zu compared to bilingual baselines. 2021.wmt-1.8 @@ -202,7 +202,7 @@ <fixed-case>M</fixed-case>i<fixed-case>SS</fixed-case>@<fixed-case>WMT</fixed-case>21: Contrastive Learning-reinforced Domain Adaptation in Neural Machine Translation ZuchaoLi MasaoUtiyama - EiichiroSumita + EiichiroSumita HaiZhao 154–161 In this paper, we describe our MiSS system that participated in the WMT21 news translation task. We mainly participated in the evaluation of the three translation directions of English-Chinese and Japanese-English translation tasks. In the systems submitted, we primarily considered wider networks, deeper networks, relative positional encoding, and dynamic convolutional networks in terms of model structure, while in terms of training, we investigated contrastive learning-reinforced domain adaptation, self-supervised training, and optimization objective switching training methods. According to the final evaluation results, a deeper, wider, and stronger network can improve translation performance in general, yet our data domain adaption method can improve performance even more. In addition, we found that switching to the use of our proposed objective during the finetune phase using relatively small domain-related data can effectively improve the stability of the model’s convergence and achieve better optimal performance. @@ -229,11 +229,11 @@ e<fixed-case>T</fixed-case>ranslation’s Submissions to the <fixed-case>WMT</fixed-case> 2021 News Translation Task CsabaOravecz - KatinaBontcheva - DavidKolovratník + KatinaBontcheva + DavidKolovratník BhavaniBhaskar MichaelJellinghaus - AndreasEisele + AndreasEisele 172–179 The paper describes the 3 NMT models submitted by the eTranslation team to the WMT 2021 news translation shared task. We developed systems in language pairs that are actively used in the European Commission’s eTranslation service. In the WMT news task, recent years have seen a steady increase in the need for computational resources to train deep and complex architectures to produce competitive systems. We took a different approach and explored alternative strategies focusing on data selection and filtering to improve the performance of baseline systems. In the domain constrained task for the French–German language pair our approach resulted in the best system by a significant margin in BLEU. For the other two systems (English–German and English-Czech) we tried to build competitive models using standard best practices. 2021.wmt-1.15 @@ -329,7 +329,7 @@ <fixed-case>LISN</fixed-case> @ <fixed-case>WMT</fixed-case> 2021 JitaoXu - Minh QuangPham + Minh QuangPham SadafAbdul Rauf FrançoisYvon 232–242 @@ -345,7 +345,7 @@ QiuRan FandongMeng PengLi - JinanXu + JinanXu JieZhou 243–254 This paper introduces WeChat AI’s participation in WMT 2021 shared news translation task on English->Chinese, English->Japanese, Japanese->English and English->German. Our systems are based on the Transformer (Vaswani et al., 2017) with several novel and effective variants. In our experiments, we employ data filtering, large-scale synthetic data generation (i.e., back-translation, knowledge distillation, forward-translation, iterative in-domain knowledge transfer), advanced finetuning approaches, and boosted Self-BLEU based model ensemble. Our constrained systems achieve 36.9, 46.9, 27.8 and 31.3 case-sensitive BLEU scores on English->Chinese, English->Japanese, Japanese->English and English->German, respectively. The BLEU scores of English->Chinese, English->Japanese and Japanese->English are the highest among all submissions, and that of English->German is the highest among all constrained submissions. @@ -420,7 +420,7 @@ BishwarajPaul Prottay KumarAdhikary ParthaPakray - SivajiBandyopadhyay + SivajiBandyopadhyay 284–287 The neural machine translation approach has gained popularity in machine translation because of its context analysing ability and its handling of long-term dependency issues. We have participated in the WMT21 shared task of similar language translation on a Tamil-Telugu pair with the team name: CNLP-NITS. In this task, we utilized monolingual data via pre-train word embeddings in transformer model based neural machine translation to tackle the limitation of parallel corpus. Our model has achieved a bilingual evaluation understudy (BLEU) score of 4.05, rank-based intuitive bilingual evaluation score (RIBES) score of 24.80 and translation edit rate (TER) score of 97.24 for both Tamil-to-Telugu and Telugu-to-Tamil translations respectively. 2021.wmt-1.29 @@ -429,7 +429,7 @@ Low Resource Similar Language Neural Machine Translation for <fixed-case>T</fixed-case>amil-<fixed-case>T</fixed-case>elugu VandanMujadia - DiptiSharma + DiptiSharma 288–291 This paper describes the participation of team oneNLP (LTRC, IIIT-Hyderabad) for the WMT 2021 task, similar language translation. We experimented with transformer based Neural Machine Translation and explored the use of language similarity for Tamil-Telugu and Telugu-Tamil. We incorporated use of different subword configurations, script conversion and single model training for both directions as exploratory experiments. 2021.wmt-1.30 @@ -447,7 +447,7 @@ <fixed-case>NITK</fixed-case>-<fixed-case>U</fixed-case>o<fixed-case>H</fixed-case>: <fixed-case>T</fixed-case>amil-<fixed-case>T</fixed-case>elugu Machine Translation Systems for the <fixed-case>WMT</fixed-case>21 Similar Language Translation Task RichardSaldanha AnanthanarayanaV. S - Anand KumarM + Anand KumarM ParameswariKrishnamurthy 299–303 In this work, two Neural Machine Translation (NMT) systems have been developed and evaluated as part of the bidirectional Tamil-Telugu similar languages translation subtask in WMT21. The OpenNMT-py toolkit has been used to create quick prototypes of the systems, following which models have been trained on the training datasets containing the parallel corpus and finally the models have been evaluated on the dev datasets provided as part of the task. Both the systems have been trained on a DGX station with 4 -V100 GPUs. The first NMT system in this work is a Transformer based 6 layer encoder-decoder model, trained for 100000 training steps, whose configuration is similar to the one provided by OpenNMT-py and this is used to create a model for bidirectional translation. The second NMT system contains two unidirectional translation models with the same configuration as the first system, with the addition of utilizing Byte Pair Encoding (BPE) for subword tokenization through the pre-trained MultiBPEmb model. Based on the dev dataset evaluation metrics for both the systems, the first system i.e. the vanilla Transformer model has been submitted as the Primary system. Since there were no improvements in the metrics during training of the second system with BPE, it has been submitted as a contrastive system. @@ -458,7 +458,7 @@ A3-108 Machine Translation System for Similar Language Translation Shared Task 2021 SaumitraYadav - ManishShrivastava + ManishShrivastava 304–306 In this paper, we describe our submissions for the Similar Language Translation Shared Task 2021. We built 3 systems in each direction for the Tamil ⇐⇒ Telugu language pair. This paper outlines experiments with various tokenization schemes to train statistical models. We also report the configuration of the submitted systems and results produced by them. 2021.wmt-1.33 @@ -524,7 +524,7 @@ HuanLiu JunpengLiu KaiyuHuang - DegenHuang + DegenHuang 331–335 This paper describes DUT-NLP Lab’s submission to the WMT-21 triangular machine translation shared task. The participants are not allowed to use other data and the translation direction of this task is Russian-to-Chinese. In this task, we use the Transformer as our baseline model, and integrate several techniques to enhance the performance of the baseline, including data filtering, data selection, fine-tuning, and post-editing. Further, to make use of the English resources, such as Russian/English and Chinese/English parallel data, the relationship triangle is constructed by multilingual neural machine translation systems. As a result, our submission achieves a BLEU score of 21.9 in Russian-to-Chinese. 2021.wmt-1.38 @@ -533,7 +533,7 @@ Pivot Based Transfer Learning for Neural Machine Translation: <fixed-case>CFILT</fixed-case> <fixed-case>IITB</fixed-case> @ <fixed-case>WMT</fixed-case> 2021 Triangular <fixed-case>MT</fixed-case> ShivamMhaskar - PushpakBhattacharyya + PushpakBhattacharyya 336–340 In this paper, we discuss the various techniques that we used to implement the Russian-Chinese machine translation system for the Triangular MT task at WMT 2021. Neural Machine translation systems based on transformer architecture have an encoder-decoder architecture, which are trained end-to-end and require a large amount of parallel corpus to produce good quality translations. This is the reason why neural machine translation systems are referred to as data hungry. Such a large amount of parallel corpus is majorly available for language pairs which include English and not for non-English language pairs. This is a major problem in building neural machine translation systems for non-English language pairs. We try to utilize the resources of the English language to improve the translation of non-English language pairs. We use the pivot language, that is English, to leverage transfer learning to improve the quality of Russian-Chinese translation. Compared to the baseline transformer-based neural machine translation system, we observe that the pivot language-based transfer learning technique gives a higher BLEU score. 2021.wmt-1.39 @@ -563,7 +563,7 @@ JosefJon MichalNovák João PauloAires - DusanVaris + DusanVaris OndřejBojar 354–361 This paper describes Charles University sub-mission for Terminology translation shared task at WMT21. The objective of this task is to design a system which translates certain terms based on a provided terminology database, while preserving high overall translation quality. We competed in English-French language pair. Our approach is based on providing the desired translations alongside the input sentence and training the model to use these provided terms. We lemmatize the terms both during the training and inference, to allow the model to learn how to produce correct surface forms of the words, when they differ from the forms provided in the terminology database. @@ -573,7 +573,7 @@ Transfer Learning with Shallow Decoders: <fixed-case>BSC</fixed-case> at <fixed-case>WMT</fixed-case>2021’s Multilingual Low-Resource Translation for <fixed-case>I</fixed-case>ndo-<fixed-case>E</fixed-case>uropean Languages Shared Task KseniaKharitonova - Onade Gibert Bonet + Onade Gibert Bonet JordiArmengol-Estapé MarRodriguez i Alvarez MaiteMelero @@ -585,8 +585,8 @@ <fixed-case>E</fixed-case>din<fixed-case>S</fixed-case>aar@<fixed-case>WMT</fixed-case>21: <fixed-case>N</fixed-case>orth-<fixed-case>G</fixed-case>ermanic Low-Resource Multilingual <fixed-case>NMT</fixed-case> SvetlanaTchistiakova - JesujobaAlabi - KoelDutta Chowdhury + JesujobaAlabi + KoelDutta Chowdhury SouravDutta DanaRuiter 368–375 @@ -601,7 +601,7 @@ WanyingXie AmbyeraHan PanLiu - JinanXu + JinanXu QiJu 376–382 This paper describes TenTrans’ submission to WMT21 Multilingual Low-Resource Translation shared task for the Romance language pairs. This task focuses on improving translation quality from Catalan to Occitan, Romanian and Italian, with the assistance of related high-resource languages. We mainly utilize back-translation, pivot-based methods, multilingual models, pre-trained model fine-tuning, and in-domain knowledge transfer to improve the translation quality. On the test set, our best-submitted system achieves an average of 43.45 case-sensitive BLEU scores across all low-resource pairs. Our data, code, and pre-trained models used in this work are available in TenTrans evaluation examples. @@ -703,7 +703,7 @@ ShaohanHuang AlexandreMuzio SakshamSinghal - HanyHassan + HanyHassan XiaSong FuruWei 446–455 @@ -757,7 +757,7 @@ Just Ask! Evaluating Machine Translation by Asking and Answering Questions MateuszKrubiński ErfanGhadery - Marie-FrancineMoens + Marie-FrancineMoens PavelPecina 495–506 In this paper, we show that automatically-generated questions and answers can be used to evaluate the quality of Machine Translation (MT) systems. Building on recent work on the evaluation of abstractive text summarization, we propose a new metric for system-level MT evaluation, compare it with other state-of-the-art solutions, and show its robustness by conducting experiments for various MT directions. @@ -787,7 +787,7 @@ ShaxnozaPulatova DuyguAtaman JuliaKreutzer - FrancisTyers + FrancisTyers OrhanFirat JohnLicato SriramChellappan @@ -868,8 +868,8 @@ DipteshKanojia MarinaFomicheva TharinduRanasinghe - FrédéricBlain - ConstantinOrăsan + FrédéricBlain + ConstantinOrăsan LuciaSpecia 625–638 Current Machine Translation (MT) systems achieve very good results on a growing variety of language pairs and datasets. However, they are known to produce fluent translation outputs that can contain important meaning errors, thus undermining their reliability in practice. Quality Estimation (QE) is the task of automatically assessing the performance of MT systems at test time. Thus, in order to be useful, QE systems should be able to detect such errors. However, this ability is yet to be tested in the current evaluation practices, where QE systems are assessed only in terms of their correlation with human judgements. In this work, we bridge this gap by proposing a general methodology for adversarial testing of QE for MT. First, we show that despite a high correlation with human judgements achieved by the recent SOTA, certain types of meaning errors are still problematic for QE to detect. Second, we show that on average, the ability of a given model to discriminate between meaning-preserving and meaning-altering perturbations is predictive of its overall performance, thus potentially allowing for comparing QE systems without relying on manual quality annotation. @@ -893,8 +893,8 @@ Md Mahfuz IbnAlam IvanaKvapilíková AntoniosAnastasopoulos - LaurentBesacier - GeorgianaDinu + LaurentBesacier + GeorgianaDinu MarcelloFederico MatthiasGallé KweonwooJung @@ -914,12 +914,12 @@ FedericaVezzani AmySiu InigoJauregi Unanue - MaiteOronoz + MaiteOronoz NancyMah - AurélieNévéol - DavidMartinez + AurélieNévéol + DavidMartinez RachelBawden - Giorgio MariaDi Nunzio + Giorgio MariaDi Nunzio RolandRoller PhilippeThomas CristianGrozea @@ -934,7 +934,7 @@ Findings of the <fixed-case>WMT</fixed-case> 2021 Shared Task on Quality Estimation LuciaSpecia - FrédéricBlain + FrédéricBlain MarinaFomicheva ChrysoulaZerva ZhenhaoLi @@ -963,7 +963,7 @@ Chi-kiuLo CraigStewart GeorgeFoster - AlonLavie + AlonLavie OndřejBojar 733–774 This paper presents the results of the WMT21 Metrics Shared Task. Participants were asked to score the outputs of the translation systems competing in the WMT21 News Translation Task with automatic metrics on two different domains: news and TED talks. All metrics were evaluated on how well they correlate at the system- and segment-level with human ratings. Contrary to previous years’ editions, this year we acquired our own human ratings based on expert-based human evaluation via Multidimensional Quality Metrics (MQM). This setup had several advantages: (i) expert-based evaluation has been shown to be more reliable, (ii) we were able to evaluate all metrics on two different domains using translations of the same MT systems, (iii) we added 5 additional translations coming from the same system during system development. In addition, we designed three challenge sets that evaluate the robustness of all automatic metrics. We present an extensive analysis on how well metrics perform on three language pairs: English to German, English to Russian and Chinese to English. We further show the impact of different reference translations on reference-based metrics and compare our expert-based MQM annotation with the DA scores acquired by WMT. @@ -1081,7 +1081,7 @@ Dynamic Terminology Integration for <fixed-case>COVID</fixed-case>-19 and Other Emerging Domains TomsBergmanis - MārcisPinnis + MārcisPinnis 821–827 The majority of language domains require prudent use of terminology to ensure clarity and adequacy of information conveyed. While the correct use of terminology for some languages and domains can be achieved by adapting general-purpose MT systems on large volumes of in-domain parallel data, such quantities of domain-specific data are seldom available for less-resourced languages and niche domains. Furthermore, as exemplified by COVID-19 recently, no domain-specific parallel data is readily available for emerging domains. However, the gravity of this recent calamity created a high demand for reliable translation of critical information regarding pandemic and infection prevention. This work is part of WMT2021 Shared Task: Machine Translation using Terminologies, where we describe Tilde MT systems that are capable of dynamic terminology integration at the time of translation. Our systems achieve up to 94% COVID-19 term use accuracy on the test set of the EN-FR language pair without having access to any form of in-domain information during system training. 2021.wmt-1.81 @@ -1092,7 +1092,7 @@ JosefJon MichalNovák João PauloAires - DusanVaris + DusanVaris OndřejBojar 828–834 This paper describes Charles University sub-mission for Terminology translation Shared Task at WMT21. The objective of this task is to design a system which translates certain terms based on a provided terminology database, while preserving high overall translation quality. We competed in English-French language pair. Our approach is based on providing the desired translations alongside the input sentence and training the model to use these provided terms. We lemmatize the terms both during the training and inference, to allow the model to learn how to produce correct surface forms of the words, when they differ from the forms provided in the terminology database. Our submission ranked second in Exact Match metric which evaluates the ability of the model to produce desired terms in the translation. @@ -1111,8 +1111,8 @@ <fixed-case>SYSTRAN</fixed-case> @ <fixed-case>WMT</fixed-case> 2021: Terminology Task - Minh QuangPham - JosepCrego + Minh QuangPham + JosepCrego AntoineSenellart DanBerrebbi JeanSenellart @@ -1196,7 +1196,7 @@ <fixed-case>RTM</fixed-case> Super Learner Results at Quality Estimation Task - ErgunBiçici + ErgunBiçici 885–889 We obtain new results using referential translation machines (RTMs) with predictions mixed to obtain a better mixture of experts prediction. Our super learner results improve the results and provide a robust combination model. 2021.wmt-1.91 @@ -1288,7 +1288,7 @@ <fixed-case>NICT</fixed-case> <fixed-case>K</fixed-case>yoto Submission for the <fixed-case>WMT</fixed-case>’21 Quality Estimation Task: Multimetric Multilingual Pretraining for Critical Error Detection - RaphaelRubino + RaphaelRubino AtsushiFujita BenjaminMarie 941–947 @@ -1327,10 +1327,10 @@ RicardoRei Ana CFarinha PedroRamos - José G.C. de Souza + José G.C. de Souza TaisiyaGlushkova MiguelVera - FabioKepler + FabioKepler André F. T.Martins 961–972 We present the joint contribution of IST and Unbabel to the WMT 2021 Shared Task on Quality Estimation. Our team participated on two tasks: Direct Assessment and Post-Editing Effort, encompassing a total of 35 submissions. For all submissions, our efforts focused on training multilingual models on top of OpenKiwi predictor-estimator architecture, using pre-trained multilingual encoders combined with adapters. We further experiment with and uncertainty-related objectives and features as well as training on out-of-domain direct assessment data. @@ -1339,11 +1339,11 @@ The <fixed-case>IICT</fixed-case>-Yverdon System for the <fixed-case>WMT</fixed-case> 2021 Unsupervised <fixed-case>MT</fixed-case> and Very Low Resource Supervised <fixed-case>MT</fixed-case> Task - Àlex R.Atrio + Àlex R.Atrio GabrielLuthier AxelFahy GiorgosVernikos - AndreiPopescu-Belis + AndreiPopescu-Belis LjiljanaDolamic 973–981 In this paper, we present the systems submitted by our team from the Institute of ICT (HEIG-VD / HES-SO) to the Unsupervised MT and Very Low Resource Supervised MT task. We first study the improvements brought to a baseline system by techniques such as back-translation and initialization from a parent model. We find that both techniques are beneficial and suffice to reach performance that compares with more sophisticated systems from the 2020 task. We then present the application of this system to the 2021 task for low-resource supervised Upper Sorbian (HSB) to German translation, in both directions. Finally, we present a contrastive system for HSB-DE in both directions, and for unsupervised German to Lower Sorbian (DSB) translation, which uses multi-task training with various training schedules to improve over the baseline. @@ -1355,7 +1355,7 @@ LukasEdman AhmetÜstün AntonioToral - Gertjanvan Noord + Gertjanvan Noord 982–988 This paper describes the methods behind the systems submitted by the University of Groningen for the WMT 2021 Unsupervised Machine Translation task for German–Lower Sorbian (DE–DSB): a high-resource language to a low-resource one. Our system uses a transformer encoder-decoder architecture in which we make three changes to the standard training procedure. First, our training focuses on two languages at a time, contrasting with a wealth of research on multilingual systems. Second, we introduce a novel method for initializing the vocabulary of an unseen language, achieving improvements of 3.2 BLEU for DE->DSB and 4.0 BLEU for DSB->DE.Lastly, we experiment with the order in which offline and online back-translation are used to train an unsupervised system, finding that using online back-translation first works better for DE->DSB by 2.76 BLEU. Our submissions ranked first (tied with another team) for DSB->DE and third for DE->DSB. 2021.wmt-1.104 @@ -1374,7 +1374,7 @@ Language Model Pretraining and Transfer Learning for Very Low Resource Languages JyotsanaKhatri RudraMurthy - PushpakBhattacharyya + PushpakBhattacharyya 995–998 This paper describes our submission for the shared task on Unsupervised MT and Very Low Resource Supervised MT at WMT 2021. We submitted systems for two language pairs: German ↔ Upper Sorbian (de ↔ hsb) and German-Lower Sorbian (de ↔ dsb). For de ↔ hsb, we pretrain our system using MASS (Masked Sequence to Sequence) objective and then finetune using iterative back-translation. Final finetunng is performed using the parallel data provided for translation objective. For de ↔ dsb, no parallel data is provided in the task, we use final de ↔ hsb model as initialization of the de ↔ dsb model and train it further using iterative back-translation, using the same vocabulary as used in the de ↔ hsb model. 2021.wmt-1.106 @@ -1417,7 +1417,7 @@ <fixed-case>MTEQA</fixed-case> at <fixed-case>WMT</fixed-case>21 Metrics Shared Task MateuszKrubiński ErfanGhadery - Marie-FrancineMoens + Marie-FrancineMoens PavelPecina 1024–1029 In this paper, we describe our submission to the WMT 2021 Metrics Shared Task. We use the automatically-generated questions and answers to evaluate the quality of Machine Translation (MT) systems. Our submission builds upon the recently proposed MTEQA framework. Experiments on WMT20 evaluation datasets show that at the system-level the MTEQA metric achieves performance comparable with other state-of-the-art solutions, while considering only a certain amount of information from the whole translation. @@ -1434,7 +1434,7 @@ PedroRamos TaisiyaGlushkova André F. T.Martins - AlonLavie + AlonLavie 1030–1040 In this paper, we present the joint contribution of Unbabel and IST to the WMT 2021 Metrics Shared Task. With this year’s focus on Multidimensional Quality Metric (MQM) as the ground-truth human assessment, our aim was to steer COMET towards higher correlations with MQM. We do so by first pre-training on Direct Assessments and then fine-tuning on z-normalized MQM scores. In our experiments we also show that reference-free COMET models are becoming competitive with reference-based models, even outperforming the best COMET model from 2020 on this year’s development data. Additionally, we present COMETinho, a lightweight COMET model that is 19x faster on CPU than the original model, while also achieving state-of-the-art correlations with MQM. Finally, in the “QE as a metric” track, we also participated with a QE model trained using the OpenKiwi framework leveraging MQM scores and word-level annotations. 2021.wmt-1.111 @@ -1512,7 +1512,7 @@ Learning Feature Weights using Reward Modeling for Denoising Parallel Corpora GauravKumar PhilippKoehn - SanjeevKhudanpur + SanjeevKhudanpur 1100–1109 Large web-crawled corpora represent an excellent resource for improving the performance of Neural Machine Translation (NMT) systems across several language pairs. However, since these corpora are typically extremely noisy, their use is fairly limited. Current approaches to deal with this problem mainly focus on filtering using heuristics or single features such as language model scores or bi-lingual similarity. This work presents an alternative approach which learns weights for multiple sentence-level features. These feature weights which are optimized directly for the task of improving translation performance, are used to score and filter sentences in the noisy corpora more effectively. We provide results of applying this technique to building NMT systems using the Paracrawl corpus for Estonian-English and show that it beats strong single feature baselines and hand designed combinations. Additionally, we analyze the sensitivity of this method to different types of noise and explore if the learned weights generalize to other language pairs using the Maltese-English Paracrawl corpus. 2021.wmt-1.118 @@ -1521,7 +1521,7 @@ Monotonic Simultaneous Translation with Chunk-wise Reordering and Refinement - HyoJungHan + HyoJungHan SeokchanAhn YoonjungChoi InsooChung diff --git a/data/xml/2021.wnut.xml b/data/xml/2021.wnut.xml index 1503859418..50534d8249 100644 --- a/data/xml/2021.wnut.xml +++ b/data/xml/2021.wnut.xml @@ -5,7 +5,7 @@ Proceedings of the Seventh Workshop on Noisy User-generated Text (W-NUT 2021) WeiXu AlanRitter - TimBaldwin + TimBaldwin AfshinRahimi Association for Computational Linguistics
Online
@@ -33,7 +33,7 @@ Finding the needle in a haystack: Extraction of Informative <fixed-case>COVID</fixed-case>-19 <fixed-case>D</fixed-case>anish Tweets BenjaminOlsen - BarbaraPlank + BarbaraPlank 11–19 Finding informative COVID-19 posts in a stream of tweets is very useful to monitor health-related updates. Prior work focused on a balanced data setup and on English, but informative tweets are rare, and English is only one of the many languages spoken in the world. In this work, we introduce a new dataset of 5,000 tweets for finding informative COVID-19 tweets for Danish. In contrast to prior work, which balances the label distribution, we model the problem by keeping its natural distribution. We examine how well a simple probabilistic model and a convolutional neural network (CNN) perform on this task. We find a weighted CNN to work well but it is sensitive to embedding and hyperparameter choices. We hope the contributed dataset is a starting point for further work in this direction. 2021.wnut-1.2 @@ -115,7 +115,7 @@ ShoheiHigashiyama MasaoUtiyama TaroWatanabe - EiichiroSumita + EiichiroSumita 67–80 Lexical normalization, in addition to word segmentation and part-of-speech tagging, is a fundamental task for Japanese user-generated text processing. In this paper, we propose a text editing model to solve the three task jointly and methods of pseudo-labeled data generation to overcome the problem of data deficiency. Our experiments showed that the proposed model achieved better normalization performance when trained on more diverse pseudo-labeled data. 2021.wnut-1.9 @@ -170,7 +170,7 @@ Common Sense Bias in Semantic Role Labeling HeatherLent - AndersSøgaard + AndersSøgaard 114–119 Large-scale language models such as ELMo and BERT have pushed the horizon of what is possible in semantic role labeling (SRL), solving the out-of-vocabulary problem and enabling end-to-end systems, but they have also introduced significant biases. We evaluate three SRL parsers on very simple transitive sentences with verbs usually associated with animate subjects and objects, such as, “Mary babysat Tom”: a state-of-the-art parser based on BERT, an older parser based on GloVe, and an even older parser from before the days of word embeddings. When arguments are word forms predominantly used as person names, aligning with common sense expectations of animacy, the BERT-based parser is unsurprisingly superior; yet, with abstract or random nouns, the opposite picture emerges. We refer to this as “common sense bias” and present a challenge dataset for evaluating the extent to which parsers are sensitive to such a bias. Our code and challenge dataset are available here: github.com/coastalcph/comte 2021.wnut-1.14 @@ -180,7 +180,7 @@ <fixed-case>P</fixed-case>oli<fixed-case>WAM</fixed-case>: An Exploration of a Large Scale Corpus of Political Discussions on <fixed-case>W</fixed-case>hats<fixed-case>A</fixed-case>pp Messenger VivekSrivastava - MayankSingh + MayankSingh 120–130 WhatsApp Messenger is one of the most popular channels for spreading information with a current reach of more than 180 countries and 2 billion people. Its widespread usage has made it one of the most popular media for information propagation among the masses during any socially engaging event. In the recent past, several countries have witnessed its effectiveness and influence in political and social campaigns. We observe a high surge in information and propaganda flow during election campaigning. In this paper, we explore a high-quality large-scale user-generated dataset curated from WhatsApp comprising of 281 groups, 31,078 unique users, and 223,404 messages shared before, during, and after the Indian General Elections 2019, encompassing all major Indian political parties and leaders. In addition to the raw noisy user-generated data, we present a fine-grained annotated dataset of 3,848 messages that will be useful to understand the various dimensions of WhatsApp political campaigning. We present several complementary insights into the investigative and sensational news stories from the same period. Exploratory data analysis and experiments showcase several exciting results and future research opportunities. To facilitate reproducible research, we make the anonymized datasets available in the public domain. 2021.wnut-1.15 @@ -226,7 +226,7 @@ ChengChen Md Tahmid RahmanLaskar ShashiBhushan - SimonCorston-Oliver + SimonCorston-Oliver 168–174 Automatic Speech Recognition (ASR) systems generally do not produce punctuated transcripts. To make transcripts more readable and follow the expected input format for downstream language models, it is necessary to add punctuation marks. In this paper, we tackle the punctuation restoration problem specifically for the noisy text (e.g., phone conversation scenarios). To leverage the available written text datasets, we introduce a data sampling technique based on an n-gram language model to sample more training data that are similar to our in-domain data. Moreover, we propose a two-stage fine-tuning approach that utilizes the sampled external data as well as our in-domain dataset for models based on BERT. Extensive experiments show that the proposed approach outperforms the baseline with an improvement of 1.12% F1 score. 2021.wnut-1.19 @@ -258,7 +258,7 @@ Understanding the Impact of <fixed-case>UGC</fixed-case> Specificities on Translation Quality José CarlosRosales Núñez - DjaméSeddah + DjaméSeddah GuillaumeWisniewski 189–198 This work takes a critical look at the evaluation of user-generated content automatic translation, the well-known specificities of which raise many challenges for MT. Our analyses show that measuring the average-case performance using a standard metric on a UGC test set falls far short of giving a reliable image of the UGC translation quality. That is why we introduce a new data set for the evaluation of UGC translation in which UGC specificities have been manually annotated using a fine-grained typology. Using this data set, we conduct several experiments to measure the impact of different kinds of UGC specificities on translation quality, more precisely than previously possible. @@ -270,7 +270,7 @@ Noisy <fixed-case>UGC</fixed-case> Translation at the Character Level: Revisiting Open-Vocabulary Capabilities and Robustness of Char-Based Models José CarlosRosales Núñez GuillaumeWisniewski - DjaméSeddah + DjaméSeddah 199–211 This work explores the capacities of character-based Neural Machine Translation to translate noisy User-Generated Content (UGC) with a strong focus on exploring the limits of such approaches to handle productive UGC phenomena, which almost by definition, cannot be seen at training time. Within a strict zero-shot scenario, we first study the detrimental impact on translation performance of various user-generated content phenomena on a small annotated dataset we developed and then show that such models are indeed incapable of handling unknown letters, which leads to catastrophic translation failure once such characters are encountered. We further confirm this behavior with a simple, yet insightful, copy task experiment and highlight the importance of reducing the vocabulary size hyper-parameter to increase the robustness of character-based models for machine translation. 2021.wnut-1.23 @@ -323,7 +323,7 @@ JustynaGromada EwelinaSzczekocka RobertKołodyński - GéraldineDamnati + GéraldineDamnati 238–248 Following the increasing performance of neural machine translation systems, the paradigm of using automatically translated data for cross-lingual adaptation is now studied in several applicative domains. The capacity to accurately project annotations remains however an issue for sequence tagging tasks where annotation must be projected with correct spans. Additionally, when the task implies noisy user-generated text, the quality of translation and annotation projection can be affected. In this paper we propose to tackle multilingual sequence tagging with a new span alignment method and apply it to opinion target extraction from customer reviews. We show that provided suitable heuristics, translated data with automatic span-level annotation projection can yield improvements both for cross-lingual adaptation compared to zero-shot transfer, and data augmentation compared to a multilingual baseline. 2021.wnut-1.27 @@ -334,7 +334,7 @@ A Novel Framework for Detecting Important Subevents from Crisis Events via Dynamic Semantic Graphs EvangeliaSpiliopoulou Tanay KumarSaha - JoelTetreault + JoelTetreault AlejandroJaimes 249–259 Social media is an essential tool to share information about crisis events, such as natural disasters. Event Detection aims at extracting information in the form of an event, but considers each event in isolation, without combining information across sentences or events. Many posts in Crisis NLP contain repetitive or complementary information which needs to be aggregated (e.g., the number of trapped people and their location) for disaster response. Although previous approaches in Crisis NLP aggregate information across posts, they only use shallow representations of the content (e.g., keywords), which cannot adequately represent the semantics of a crisis event and its sub-events. In this work, we propose a novel framework to extract critical sub-events from a large-scale crisis event by combining important information across relevant tweets. Our framework first converts all the tweets from a crisis event into a temporally-ordered set of graphs. Then it extracts sub-graphs that represent semantic relationships connecting verbs and nouns in 3 to 6 node sub-graphs. It does this by learning edge weights via Dynamic Graph Convolutional Networks (DGCNs) and extracting smaller, relevant sub-graphs. Our experiments show that our extracted structures (1) are semantically meaningful sub-events and (2) contain information important for the large crisis-event. Furthermore, we show that our approach significantly outperforms event detection baselines, highlighting the importance of aggregating information across tweets for our task. @@ -395,7 +395,7 @@ KhalilBibi ChengyangLi AliGhodsi - PhillippeLanglais + PhillippeLanglais MehdiRezagholizadeh 297–303 Knowledge Distillation (KD) is extensively used to compress and deploy large pre-trained language models on edge devices for real-world applications. However, one neglected area of research is the impact of noisy (corrupted) labels on KD. We present, to the best of our knowledge, the first study on KD with noisy labels in Natural Language Understanding (NLU). We document the scope of the problem and present two methods to mitigate the impact of label noise. Experiments on the GLUE benchmark show that our methods are effective even under high noise levels. Nevertheless, our results indicate that more research is necessary to cope with label noise under the KD. @@ -551,8 +551,8 @@ Can Character-based Language Models Improve Downstream Task Performances In Low-Resource And Noisy Language Scenarios? ArijRiabi - BenoîtSagot - DjaméSeddah + BenoîtSagot + DjaméSeddah 423–436 Recent impressive improvements in NLP, largely based on the success of contextual neural language models, have been mostly demonstrated on at most a couple dozen high- resource languages. Building language mod- els and, more generally, NLP systems for non- standardized and low-resource languages remains a challenging task. In this work, we fo- cus on North-African colloquial dialectal Arabic written using an extension of the Latin script, called NArabizi, found mostly on social media and messaging communication. In this low-resource scenario with data display- ing a high level of variability, we compare the downstream performance of a character-based language model on part-of-speech tagging and dependency parsing to that of monolingual and multilingual models. We show that a character-based model trained on only 99k sentences of NArabizi and fined-tuned on a small treebank of this language leads to performance close to those obtained with the same architecture pre- trained on large multilingual and monolingual models. Confirming these results a on much larger data set of noisy French user-generated content, we argue that such character-based language models can be an asset for NLP in low-resource and high language variability set- tings. 2021.wnut-1.47 @@ -617,7 +617,7 @@ Sequence-to-Sequence Lexical Normalization with Multilingual Transformers Ana-MariaBucur AdrianCosma - Liviu P.Dinu + Liviu P.Dinu 473–482 Current benchmark tasks for natural language processing contain text that is qualitatively different from the text used in informal day to day digital communication. This discrepancy has led to severe performance degradation of state-of-the-art NLP models when fine-tuned on real-world data. One way to resolve this issue is through lexical normalization, which is the process of transforming non-standard text, usually from social media, into a more standardized form. In this work, we propose a sentence-level sequence-to-sequence model based on mBART, which frames the problem as a machine translation problem. As the noisy text is a pervasive problem across languages, not just English, we leverage the multi-lingual pre-training of mBART to fine-tune it to our data. While current approaches mainly operate at the word or subword level, we argue that this approach is straightforward from a technical standpoint and builds upon existing pre-trained transformer networks. Our results show that while word-level, intrinsic, performance evaluation is behind other methods, our model improves performance on extrinsic, downstream tasks through normalization compared to models operating on raw, unprocessed, social media text. 2021.wnut-1.53 @@ -639,11 +639,11 @@ Robvan der Goot AlanRamponi ArkaitzZubiaga - BarbaraPlank + BarbaraPlank BenjaminMuller IñakiSan Vicente Roncal NikolaLjubešić - ÖzlemÇetinoğlu + ÖzlemÇetinoğlu RahmadMahendra TalhaÇolakoğlu TimothyBaldwin diff --git a/data/xml/2021.woah.xml b/data/xml/2021.woah.xml index 99a7a0cc4a..db7b18fbd3 100644 --- a/data/xml/2021.woah.xml +++ b/data/xml/2021.woah.xml @@ -8,7 +8,7 @@ MathiasLambert BertieVidgen VinodkumarPrabhakaran - ZeerakWaseem + ZeerakWaseem Association for Computational Linguistics
Online
August @@ -160,7 +160,7 @@ Hell Hath No Fury? Correcting Bias in the <fixed-case>NRC</fixed-case> Emotion Lexicon SamiraZad JoshuanJimenez - MarkFinlayson + MarkFinlayson 102–113 There have been several attempts to create an accurate and thorough emotion lexicon in English, which identifies the emotional content of words. Of the several commonly used resources, the NRC emotion lexicon (Mohammad and Turney, 2013b) has received the most attention due to its availability, size, and its choice of Plutchik’s expressive 8-class emotion model. In this paper we identify a large number of troubling entries in the NRC lexicon, where words that should in most contexts be emotionally neutral, with no affect (e.g., ‘lesbian’, ‘stone’, ‘mountain’), are associated with emotional labels that are inaccurate, nonsensical, pejorative, or, at best, highly contingent and context-dependent (e.g., ‘lesbian’ labeled as Disgust and Sadness, ‘stone’ as Anger, or ‘mountain’ as Anticipation). We describe a procedure for semi-automatically correcting these problems in the NRC, which includes disambiguating POS categories and aligning NRC entries with other emotion lexicons to infer the accuracy of labels. We demonstrate via an experimental benchmark that the quality of the resources is thus improved. We release the revised resource and our code to enable other researchers to reproduce and build upon results. 2021.woah-1.11 @@ -173,7 +173,7 @@ Yung-SungChuang MingyeGao HongyinLuo - JamesGlass + JamesGlass Hung-yiLee Yun-NungChen Shang-WenLi @@ -190,7 +190,7 @@ PeterBourgonje KarolinaZaczynska MalteOstendorff - JulianMoreno-Schneider + JulianMoreno-Schneider GeorgRehm 121–131 We present a data set consisting of German news articles labeled for political bias on a five-point scale in a semi-supervised way. While earlier work on hyperpartisan news detection uses binary classification (i.e., hyperpartisan or not) and English data, we argue for a more fine-grained classification, covering the full political spectrum (i.e., far-left, left, centre, right, far-right) and for extending research to German data. Understanding political bias helps in accurately detecting hate speech and online abuse. We experiment with different classification methods for political bias detection. Their comparatively low performance (a macro-F1 of 43 for our best setup, compared to a macro-F1 of 79 for the binary classification task) underlines the need for more (balanced) data annotated in a fine-grained way. diff --git a/data/xml/2022.aacl.xml b/data/xml/2022.aacl.xml index b839782aff..b77e10b4f4 100644 --- a/data/xml/2022.aacl.xml +++ b/data/xml/2022.aacl.xml @@ -36,7 +36,7 @@ Double Trouble: How to not Explain a Text Classifier’s Decisions Using Counterfactuals Synthesized by Masked Language Models? ThangPham - TrungBui + TrungBui LongMai AnhNguyen 12–31 @@ -87,8 +87,8 @@ Systematic Evaluation of Predictive Fairness XudongHan AiliShen - TrevorCohn - TimothyBaldwin + TrevorCohn + TimothyBaldwin LeaFrermann 68–81 Mitigating bias in training on biased datasets is an important open problem. Several techniques have been proposed, however the typical evaluation regime is very limited, considering very narrow data conditions. For instance, the effect of target class imbalance and stereotyping is under-studied. To address this gap, we examine the performance of various debiasing methods across multiple tasks, spanning binary classification (Twitter sentiment), multi-class classification (profession prediction), and regression (valence prediction). Through extensive experimentation, we find that data conditions have a strong influence on relative model performance, and that general conclusions cannot be drawn about method efficacy when evaluating only on standard datasets, as is current practice in fairness research. @@ -124,7 +124,7 @@ <fixed-case>WAX</fixed-case>: A New Dataset for Word Association e<fixed-case>X</fixed-case>planations ChunhuaLiu - TrevorCohn + TrevorCohn Simon DeDeyne LeaFrermann 106–120 @@ -152,7 +152,7 @@ GwénoléLecorvé MorganVeyret QuentinBrabant - Lina M.Rojas Barahona + Lina M.Rojas Barahona 131–147 This paper focuses on the generation of natural language questions based on SPARQL queries, with an emphasis on conversational use cases (follow-up question-answering). It studies what can be achieved so far based on current deep learning models (namely pretrained T5 and BART models). To do so, 4 knowledge-based QA corpora have been homogenized for the task and a new challenge set is introduced. A first series of experiments analyzes the impact of different training setups, while a second series seeks to understand what is still difficult for these models. The results from automatic metrics and human evaluation show that simple questions and frequent templates of SPARQL queries are usually well processed whereas complex questions and conversational dimensions (coreferences and ellipses) are still difficult to handle. The experimental material is publicly available on https://github.com/Orange-OpenSource/sparql-to-text . 2022.aacl-main.11 @@ -212,7 +212,7 @@ <fixed-case>A</fixed-case>rabic Dialect Identification with a Few Labeled Examples Using Generative Adversarial Networks MahmoudYusuf MarwanTorki - NagwaEl-Makky + NagwaEl-Makky 196–204 Given the challenges and complexities introduced while dealing with Dialect Arabic (DA) variations, Transformer based models, e.g., BERT, outperformed other models in dealing with the DA identification task. However, to fine-tune these models, a large corpus is required. Getting a large number high quality labeled examples for some Dialect Arabic classes is challenging and time-consuming. In this paper, we address the Dialect Arabic Identification task. We extend the transformer-based models, ARBERT and MARBERT, with unlabeled data in a generative adversarial setting using Semi-Supervised Generative Adversarial Networks (SS-GAN). Our model enabled producing high-quality embeddings for the Dialect Arabic examples and aided the model to better generalize for the downstream classification task given few labeled examples. Experimental results showed that our model reached better performance and faster convergence when only a few labeled examples are available. 2022.aacl-main.16 @@ -236,7 +236,7 @@ AnthiPapadopoulou YunhaoYu PierreLison - LiljaØvrelid + LiljaØvrelid 217–229 We present a novel approach for text sanitization, which is the task of editing a document to mask all (direct and indirect) personal identifiers and thereby conceal the identity of the individuals(s) mentioned in the text. In contrast to previous work, the approach relies on explicit measures of privacy risk, making it possible to explicitly control the trade-off between privacy protection and data utility. The approach proceeds in three steps. A neural, privacy-enhanced entity recognizer is first employed to detect and classify potential personal identifiers. We then determine which entities, or combination of entities, are likely to pose a re-identification risk through a range of privacy risk assessment measures. We present three such measures of privacy risk, respectively based on (1) span probabilities derived from a BERT language model, (2) web search queries and (3) a classifier trained on labelled data. Finally, a linear optimization solver decides which entities to mask to minimize the semantic loss while simultaneously ensuring that the estimated privacy risk remains under a given threshold. We evaluate the approach both in the absence and presence of manually annotated data. Our results highlight the potential of the approach, as well as issues specific types of personal data can introduce to the process. 2022.aacl-main.18 @@ -286,7 +286,7 @@ AntonChernyavskiy IvanKoychev DmitryIlvovsky - PreslavNakov + PreslavNakov 266–285 While there has been substantial progress in developing systems to automate fact-checking, they still lack credibility in the eyes of the users. Thus, an interesting approach has emerged: to perform automatic fact-checking by verifying whether an input claim has been previously fact-checked by professional fact-checkers and to return back an article that explains their decision. This is a sensible approach as people trust manual fact-checking, and as many claims are repeated multiple times. Yet, a major issue when building such systems is the small number of known tweet–verifying article pairs available for training. Here, we aim to bridge this gap by making use of crowd fact-checking, i.e., mining claims in social media for which users have responded with a link to a fact-checking article. In particular, we mine a large-scale collection of 330,000 tweets paired with a corresponding fact-checking article. We further propose an end-to-end framework to learn from this noisy data based on modified self-adaptive training, in a distant supervision scenario. Our experiments on the CLEF’21 CheckThat! test set show improvements over the state of the art by two points absolute. Our code and datasets are available at https://github.com/mhardalov/crowdchecked-claims 2022.aacl-main.22 @@ -321,7 +321,7 @@ FrancescoBarbieri VitorSousa LeonardoNeves - JoseCamacho-Collados + JoseCamacho-Collados 309–319 Recent progress in language model pre-training has led to important improvements in Named Entity Recognition (NER). Nonetheless, this progress has been mainly tested in well-formatted documents such as news, Wikipedia, or scientific articles. In social media the landscape is different, in which it adds another layer of complexity due to its noisy and dynamic nature. In this paper, we focus on NER in Twitter, one of the largest social media platforms, and construct a new NER dataset, TweetNER7, which contains seven entity types annotated over 11,382 tweets from September 2019 to August 2021. The dataset was constructed by carefully distributing the tweets over time and taking representative trends as a basis. Along with the dataset, we provide a set of language model baselines and perform an analysis on the language model performance on the task, especially analyzing the impact of different time periods. In particular, we focus on three important temporal aspects in our analysis: short-term degradation of NER models over time, strategies to fine-tune a language model over different periods, and self-labeling as an alternative to lack of recently-labeled data. TweetNER7 is released publicly (https://huggingface.co/datasets/tner/tweetner7) along with the models fine-tuned on it (NER models have been integrated into TweetNLP and can be found at https://github.com/asahi417/tner/tree/master/examples/tweetner7_paper). 2022.aacl-main.25 @@ -346,7 +346,7 @@ Cross-Lingual Open-Domain Question Answering with Answer Sentence Generation BenjaminMuller LucaSoldaini - RikKoncel-Kedziorski + RikKoncel-Kedziorski EricLind AlessandroMoschitti 337–353 @@ -361,7 +361,7 @@ YuqingXing LongyinZhang FangKong - GuodongZhou + GuodongZhou 354–363 In recent years, top-down neural models have achieved significant success in text-level discourse parsing. Nevertheless, they still suffer from the top-down error propagation issue, especially when the performance on the upper-level tree nodes is terrible. In this research, we aim to learn from the correlations in between EDUs directly to shorten the hierarchical distance of the RST structure to alleviate the above problem. Specifically, we contribute a joint top-down framework that learns from both discourse dependency and constituency parsing through one shared encoder and two independent decoders. Moreover, we also explore a constituency-to-dependency conversion scheme tailored for the Chinese discourse corpus to ensure the high quality of the joint learning process. Our experimental results on CDTB show that the dependency information we use well heightens the understanding of the rhetorical structure, especially for the upper-level tree layers. 2022.aacl-main.28 @@ -377,7 +377,7 @@ LeiGuo PrakashIshwar MargritBetke - Derry TantiWijaya + Derry TantiWijaya 364–374 We aim to develop methods for understanding how multimedia news exposure can affect people’s emotional responses, and we especially focus on news content related to gun violence, a very important yet polarizing issue in the U.S. We created the dataset NEmo+ by significantly extending the U.S. gun violence news-to-emotions dataset, BU-NEmo, from 320 to 1,297 news headline and lead image pairings and collecting 38,910 annotations in a large crowdsourcing experiment. In curating the NEmo+ dataset, we developed methods to identify news items that will trigger similar versus divergent emotional responses. For news items that trigger similar emotional responses, we compiled them into the NEmo+-Consensus dataset. We benchmark models on this dataset that predict a person’s dominant emotional response toward the target news item (single-label prediction). On the full NEmo+ dataset, containing news items that would lead to both differing and similar emotional responses, we also benchmark models for the novel task of predicting the distribution of evoked emotional responses in humans when presented with multi-modal news content. Our single-label and multi-label prediction models outperform baselines by large margins across several metrics. 2022.aacl-main.29 @@ -389,7 +389,7 @@ <fixed-case>A</fixed-case>ug<fixed-case>CSE</fixed-case>: Contrastive Sentence Embedding with Diverse Augmentations ZiluTang Muhammed YusufKocyigit - Derry TantiWijaya + Derry TantiWijaya 375–398 Data augmentation techniques have been proven useful in many applications in NLP fields. Most augmentations are task-specific, and cannot be used as a general-purpose tool. In our work, we present AugCSE, a unified framework to utilize diverse sets of data augmentations to achieve a better, general-purpose, sentence embedding model. Building upon the latest sentence embedding models, our approach uses a simple antagonistic discriminator that differentiates the augmentation types. With the finetuning objective borrowed from domain adaptation, we show that diverse augmentations, which often lead to conflicting contrastive signals, can be tamed to produce a better and more robust sentence representation. Our methods achieve state-of-the-art results on downstream transfer tasks and perform competitively on semantic textual similarity tasks, using only unsupervised data. 2022.aacl-main.30 @@ -415,7 +415,7 @@ Dual-Encoder Transformers with Cross-modal Alignment for Multimodal Aspect-based Sentiment Analysis ZhewenYu JinWang - Liang-ChihYu + Liang-ChihYu XuejieZhang 414–423 Multimodal aspect-based sentiment analysis (MABSA) aims to extract the aspect terms from text and image pairs, and then analyze their corresponding sentiment. Recent studies typically use either a pipeline method or a unified transformer based on a cross-attention mechanism. However, these methods fail to explicitly and effectively incorporate the alignment between text and image. Supervised finetuning of the universal transformers for MABSA still requires a certain number of aligned image-text pairs. This study proposes a dual-encoder transformer with cross-modal alignment (DTCA). Two auxiliary tasks, including text-only extraction and text-patch alignment are introduced to enhance cross-attention performance. To align text and image, we propose an unsupervised approach which minimizes the Wasserstein distance between both modalities, forcing both encoders to produce more appropriate representations for the final extraction. Experimental results on two benchmarks demonstrate that DTCA consistently outperforms existing methods. @@ -427,7 +427,7 @@ <fixed-case>AVAST</fixed-case>: Attentive Variational State Tracker in a Reinforced Navigator Je-WeiJang MahdinRohmatillah - Jen-TzungChien + Jen-TzungChien 424–433 Recently, emerging approaches have been proposed to deal with robotic navigation problems, especially vision-and-language navigation task which is one of the most realistic indoor navigation challenge tasks. This task can be modelled as a sequential decision-making problem, which is suitable to be solved by deep reinforcement learning. Unfortunately, the observations provided from the simulator in this task are not fully observable states, which exacerbate the difficulty of implementing reinforcement learning. To deal with this challenge, this paper presents a novel method, called as attentive variational state tracker (AVAST), a variational approach to approximate belief state distribution for the construction of a reinforced navigator. The variational approach is introduced to improve generalization to the unseen environment which barely achieved by traditional deterministic state tracker. In order to stabilize the learning procedure, a fine-tuning process using policy optimization is proposed. From the experimental results, the proposed AVAST does improve the generalization relative to previous works in vision-and-language navigation task. A significant performance is achieved without requiring any additional exploration in the unseen environment. 2022.aacl-main.33 @@ -459,7 +459,7 @@ Bag-of-Vectors Autoencoders for Unsupervised Conditional Text Generation FlorianMai - JamesHenderson + JamesHenderson 468–488 Text autoencoders are often used for unsupervised conditional text generation by applying mappings in the latent space to change attributes to the desired values. Recently, Mai et al. (2020) proposed Emb2Emb, a method to learn these mappings in the embedding space of an autoencoder. However, their method is restricted to autoencoders with a single-vector embedding, which limits how much information can be retained. We address this issue by extending their method to Bag-of-Vectors Autoencoders (BoV-AEs), which encode the text into a variable-size bag of vectors that grows with the size of the text, as in attention-based models. This allows to encode and reconstruct much longer texts than standard autoencoders. Analogous to conventional autoencoders, we propose regularization techniques that facilitate learning meaningful operations in the latent space. Finally, we adapt Emb2Emb for a training scheme that learns to map an input bag to an output bag, including a novel loss function and neural architecture. Our empirical evaluations on unsupervised sentiment transfer show that our method performs substantially better than a standard autoencoder. 2022.aacl-main.36 @@ -474,7 +474,7 @@ LeiSha CanXu DaxinJiang - Kam-FaiWong + Kam-FaiWong 489–500 Conversational Recommender System (CRS), which aims to recommend high-quality items to users through interactive conversations, has gained great research interest recently. A CRS is usually composed of a recommendation module and a generation module. In the previous work, these two modules are loosely connected in the model training and are shallowly integrated during inference, where a simple switching or copy mechanism is adopted to incorporate recommended items into generated responses. Moreover, the current end-to-end neural models trained on small crowd-sourcing datasets (e.g., 10K dialogs in the ReDial dataset) tend to overfit and have poor chit-chat ability. In this work, we propose a novel unified framework that integrates recommendation into the dialog (RecInDial) generation by introducing a vocabulary pointer. To tackle the low-resource issue in CRS, we finetune the large-scale pretrained language models to generate fluent and diverse responses, and introduce a knowledge-aware bias learned from an entity-oriented knowledge graph to enhance the recommendation performance. Furthermore, we propose to evaluate the CRS models in an end-to-end manner, which can reflect the overall performance of the entire system rather than the performance of individual modules, compared to the separate evaluations of the two modules used in previous work. Experiments on the benchmark dataset ReDial show our RecInDial model significantly surpasses the state-of-the-art methods. More extensive analyses show the effectiveness of our model. 2022.aacl-main.37 @@ -531,7 +531,7 @@ Affective Retrofitted Word Embeddings SapanShah SreedharReddy - PushpakBhattacharyya + PushpakBhattacharyya 550–561 Word embeddings learned using the distributional hypothesis (e.g., GloVe, Word2vec) do not capture the affective dimensions of valence, arousal, and dominance, which are present inherently in words. We present a novel retrofitting method for updating embeddings of words for their affective meaning. It learns a non-linear transformation function that maps pre-trained embeddings to an affective vector space, in a representation learning setting. We investigate word embeddings for their capacity to cluster emotion-bearing words. The affective embeddings learned by our method achieve better inter-cluster and intra-cluster distance for words having the same emotions, as evaluated through different cluster quality metrics. For the downstream tasks on sentiment analysis and sarcasm detection, simple classification models, viz. SVM and Attention Net, learned using our affective embeddings perform better than their pre-trained counterparts (more than 1.5% improvement in F1-score) and other benchmarks. Furthermore, the difference in performance is more pronounced in limited data setting. 2022.aacl-main.42 @@ -543,7 +543,7 @@ YingboGao ChristianHerold ZijianYang - HermannNey + HermannNey 562–574 Encoder-decoder architecture is widely adopted for sequence-to-sequence modeling tasks. For machine translation, despite the evolution from long short-term memory networks to Transformer networks, plus the introduction and development of attention mechanism, encoder-decoder is still the de facto neural network architecture for state-of-the-art models. While the motivation for decoding information from some hidden space is straightforward, the strict separation of the encoding and decoding steps into an encoder and a decoder in the model architecture is not necessarily a must. Compared to the task of autoregressive language modeling in the target language, machine translation simply has an additional source sentence as context. Given the fact that neural language models nowadays can already handle rather long contexts in the target language, it is natural to ask whether simply concatenating the source and target sentences and training a language model to do translation would work. In this work, we investigate the aforementioned concept for machine translation. Specifically, we experiment with bilingual translation, translation with additional target monolingual data, and multilingual translation. In all cases, this alternative approach performs on par with the baseline encoder-decoder Transformer, suggesting that an encoder-decoder architecture might be redundant for neural machine translation. 2022.aacl-main.43 @@ -645,7 +645,7 @@ Construction Repetition Reduces Information Rate in Dialogue MarioGiulianelli ArabellaSinclair - RaquelFernández + RaquelFernández 665–682 Speakers repeat constructions frequently in dialogue. Due to their peculiar information-theoretic properties, repetitions can be thought of as a strategy for cost-effective communication. In this study, we focus on the repetition of lexicalised constructions—i.e., recurring multi-word units—in English open-domain spoken dialogues. We hypothesise that speakers use construction repetition to mitigate information rate, leading to an overall decrease in utterance information content over the course of a dialogue. We conduct a quantitative analysis, measuring the information content of constructions and that of their containing utterances, estimating information content with an adaptive neural language model. We observe that construction usage lowers the information content of utterances. This facilitating effect (i) increases throughout dialogues, (ii) is boosted by repetition, (iii) grows as a function of repetition frequency and density, and (iv) is stronger for repetitions of referential constructions. 2022.aacl-main.51 @@ -695,7 +695,7 @@ Re-contextualizing Fairness in <fixed-case>NLP</fixed-case>: The Case of <fixed-case>I</fixed-case>ndia ShailyBhatt SunipaDev - ParthaTalukdar + ParthaTalukdar ShachiDave VinodkumarPrabhakaran 727–740 @@ -719,7 +719,7 @@ Unsupervised Domain Adaptation for Sparse Retrieval by Filling Vocabulary and Word Frequency Gaps HirokiIida - NaoakiOkazaki + NaoakiOkazaki 752–765 IR models using a pretrained language model significantly outperform lexical approaches like BM25. In particular, SPLADE, which encodes texts to sparse vectors, is an effective model for practical use because it shows robustness to out-of-domain datasets. However, SPLADE still struggles with exact matching of low-frequency words in training data. In addition, domain shifts in vocabulary and word frequencies deteriorate the IR performance of SPLADE. Because supervision data are scarce in the target domain, addressing the domain shifts without supervision data is necessary. This paper proposes an unsupervised domain adaptation method by filling vocabulary and word-frequency gaps. First, we expand a vocabulary and execute continual pretraining with a masked language model on a corpus of the target domain. Then, we multiply SPLADE-encoded sparse vectors by inverse document frequency weights to consider the importance of documents with low-frequency words. We conducted experiments using our method on datasets with a large vocabulary gap from a source domain. We show that our method outperforms the present state-of-the-art domain adaptation method. In addition, our method achieves state-of-the-art results, combined with BM25. 2022.aacl-main.57 @@ -739,11 +739,11 @@ Cross-lingual Few-Shot Learning on Unseen Languages - GentaWinata + GentaWinata ShijieWu MayankKulkarni ThamarSolorio - DanielPreotiuc-Pietro + DanielPreotiuc-Pietro 777–791 Large pre-trained language models (LMs) have demonstrated the ability to obtain good performance on downstream tasks with limited examples in cross-lingual settings. However, this was mostly studied for relatively resource-rich languages, where at least enough unlabeled data is available to be included in pre-training a multilingual language model. In this paper, we explore the problem of cross-lingual transfer in unseen languages, where no unlabeled data is available for pre-training a model. We use a downstream sentiment analysis task across 12 languages, including 8 unseen languages, to analyze the effectiveness of several few-shot learning strategies across the three major types of model architectures and their learning dynamics. We also compare strategies for selecting languages for transfer and contrast findings across languages seen in pre-training compared to those that are not. Our findings contribute to the body of knowledge on cross-lingual models for low-resource settings that is paramount to increasing coverage, diversity, and equity in access to NLP technology. We show that, in few-shot learning, linguistically similar and geographically similar languages are useful for cross-lingual adaptation, but taking the context from a mixture of random source languages is surprisingly more effective. We also compare different model architectures and show that the encoder-only model, XLM-R, gives the best downstream task performance. 2022.aacl-main.59 @@ -812,11 +812,11 @@ Not another Negation Benchmark: The <fixed-case>N</fixed-case>a<fixed-case>N</fixed-case>-<fixed-case>NLI</fixed-case> Test Suite for Sub-clausal Negation Thinh HungTruong - YuliaOtmakhova - TimothyBaldwin - TrevorCohn + YuliaOtmakhova + TimothyBaldwin + TrevorCohn Jey HanLau - KarinVerspoor + KarinVerspoor 883–894 Negation is poorly captured by current language models, although the extent of this problem is not widely understood. We introduce a natural language inference (NLI) test suite to enable probing the capabilities of NLP methods, with the aim of understanding sub-clausal negation. The test suite contains premise–hypothesis pairs where the premise contains sub-clausal negation and the hypothesis is constructed by making minimal modifications to the premise in order to reflect different possible interpretations. Aside from adopting standard NLI labels, our test suite is systematically constructed under a rigorous linguistic framework. It includes annotation of negation types and constructions grounded in linguistic theory, as well as the operations used to construct hypotheses. This facilitates fine-grained analysis of model performance. We conduct experiments using pre-trained language models to demonstrate that our test suite is more challenging than existing benchmarks focused on negation, and show how our annotation supports a deeper understanding of the current NLI capabilities in terms of negation and quantification. 2022.aacl-main.65 @@ -851,9 +851,9 @@ Dual Mechanism Priming Effects in <fixed-case>H</fixed-case>indi Word Order SidharthRanjan - Martenvan Schijndel + Martenvan Schijndel SumeetAgarwal - RajakrishnanRajkumar + RajakrishnanRajkumar 936–953 Word order choices during sentence production can be primed by preceding sentences. In this work, we test the DUAL MECHANISM hypothesis that priming is driven by multiple different sources. Using a Hindi corpus of text productions, we model lexical priming with an n-gram cache model, and we capture more abstract syntactic priming with an adaptive neural language model. We permute the preverbal constituents of corpus sentences and then use a logistic regression model to predict which sentences actually occurred in the corpus against artificially generated meaning-equivalent variants. Our results indicate that lexical priming and lexically-independent syntactic priming affect complementary sets of verb classes. By showing that different priming influences are separable from one another, our results support the hypothesis that multiple different cognitive mechanisms underlie priming. 2022.aacl-main.68 @@ -890,7 +890,7 @@ TulikaSaha Aditya PrakashPatra SriparnaSaha - PushpakBhattacharyya + PushpakBhattacharyya 978–990 Dialogue Act Classification (DAC) that determines the communicative intention of an utterance has been investigated widely over the years as a standalone task. But the emotional state of the speaker has a considerable effect on its pragmatic content. Sentiment as a human behavior is also closely related to emotion and one aids in the better understanding of the other. Thus, their role in identification of DAs needs to be explored. As a first step, we extend the newly released multi-modal EMOTyDA dataset to enclose sentiment tags for each utterance. In order to incorporate these multiple aspects, we propose a Dual Attention Mechanism (DAM) based multi-modal, multi-tasking conversational framework. The DAM module encompasses intra-modal and interactive inter-modal attentions with multiple loss optimization at various hierarchies to fuse multiple modalities efficiently and learn generalized features across all the tasks. Additionally, to counter the class-imbalance issue in dialogues, we introduce a 2-step Deferred Optimisation Schedule (DOS) that involves Meta-Net (MN) learning and deferred re-weighting where the former helps to learn an explicit weighting function from data automatically and the latter deploys a re-weighted multi-task loss with a smaller learning rate. Empirically, we establish that the joint optimisation of multi-modal DAC, SA and ER tasks along with the incorporation of 2-step DOS and MN learning produces better results compared to its different counterparts and outperforms state-of-the-art model. 2022.aacl-main.71 @@ -925,7 +925,7 @@ Hengam: An Adversarially Trained Transformer for <fixed-case>P</fixed-case>ersian Temporal Tagging SajadMirzababaei Amir HosseinKargaran - HinrichSchütze + HinrichSchütze EhsaneddinAsgari 1013–1024 Many NLP main tasks benefit from an accurate understanding of temporal expressions, e.g., text summarization, question answering, and information retrieval. This paper introduces Hengam, an adversarially trained transformer for Persian temporal tagging outperforming state-of-the-art approaches on a diverse and manually created dataset. We create Hengam in the following concrete steps: (1) we develop HengamTagger, an extensible rule-based tool that can extract temporal expressions from a set of diverse language-specific patterns for any language of interest. (2) We apply HengamTagger to annotate temporal tags in a large and diverse Persian text collection (covering both formal and informal contexts) to be used as weakly labeled data. (3) We introduce an adversarially trained transformer model on HengamCorpus that can generalize over the HengamTagger’s rules. We create HengamGold, the first high-quality gold standard for Persian temporal tagging. Our trained adversarial HengamTransformer not only achieves the best performance in terms of the F1-score (a type F1-Score of 95.42 and a partial F1-Score of 91.60) but also successfully deals with language ambiguities and incorrect spellings. Our code, data, and models are publicly available at https://github.com/kargaranamir/Hengam. @@ -938,7 +938,7 @@ Yang TristaCao KyleSeelman KyungjunLee - HalDaumé III + HalDaumé III 1025–1034 In visual question answering (VQA), a machine must answer a question given an associated image. Recently, accessibility researchers have explored whether VQA can be deployed in a real-world setting where users with visual impairments learn about their environment by capturing their visual surroundings and asking questions. However, most of the existing benchmarking datasets for VQA focus on machine “understanding” and it remains unclear how progress on those datasets corresponds to improvements in this real-world use case. We aim to answer this question by evaluating discrepancies between machine “understanding” datasets (VQA-v2) and accessibility datasets (VizWiz) by evaluating a variety of VQA models. Based on our findings, we discuss opportunities and challenges in VQA for accessibility and suggest directions for future work. 2022.aacl-main.75 @@ -952,7 +952,7 @@ ShubhashisSengupta AnutoshMaitra RoshniRamnani - PushpakBhattacharyya + PushpakBhattacharyya 1035–1047 Task-oriented conversational agents are gaining immense popularity and success in a wide range of tasks, from flight ticket booking to online shopping. However, the existing systems presume that end-users will always have a pre-determined and servable task goal, which results in dialogue failure in hostile scenarios, such as goal unavailability. On the other hand, human agents accomplish users’ tasks even in a large number of goal unavailability scenarios by persuading them towards a very similar and servable goal. Motivated by the limitation, we propose and build a novel end-to-end multi-modal persuasive dialogue system incorporated with a personalized persuasive module aided goal controller and goal persuader. The goal controller recognizes goal conflicting/unavailability scenarios and formulates a new goal, while the goal persuader persuades users using a personalized persuasive strategy identified through dialogue context. We also present a novel automatic evaluation metric called Persuasiveness Measurement Rate (PMeR) for quantifying the persuasive capability of a conversational agent. The obtained improvements (both quantitative and qualitative) firmly establish the superiority and need of the proposed context-guided, personalized persuasive virtual agent over existing traditional task-oriented virtual agents. Furthermore, we also curated a multi-modal persuasive conversational dialogue corpus annotated with intent, slot, sentiment, and dialogue act for e-commerce domain. 2022.aacl-main.76 @@ -967,7 +967,7 @@ RajdeepMukherjee KripabandhuGhosh PawanGoyal - SaptarshiGhosh + SaptarshiGhosh 1048–1064 Summarization of legal case judgement documents is a challenging problem in Legal NLP. However, not much analyses exist on how different families of summarization models (e.g., extractive vs. abstractive) perform when applied to legal case documents. This question is particularly important since many recent transformer-based abstractive summarization models have restrictions on the number of input tokens, and legal documents are known to be very long. Also, it is an open question on how best to evaluate legal case document summarization systems. In this paper, we carry out extensive experiments with several extractive and abstractive summarization methods (both supervised and unsupervised) over three legal summarization datasets that we have developed. Our analyses, that includes evaluation by law practitioners, lead to several interesting insights on legal summarization in specific and long document summarization in general. 2022.aacl-main.77 @@ -1056,7 +1056,7 @@ Higher-Order Dependency Parsing for Arc-Polynomial Score Functions via Gradient-Based Methods and Genetic Algorithm XudongZhang - JosephLe Roux + JosephLe Roux ThierryCharnois 1158–1171 We present a novel method for higher-order dependency parsing which takes advantage of the general form of score functions written as arc-polynomials, a general framework which encompasses common higher-order score functions, and includes new ones. This method is based on non-linear optimization techniques, namely coordinate ascent and genetic search where we iteratively update a candidate parse. Updates are formulated as gradient-based operations, and are efficiently computed by auto-differentiation libraries. Experiments show that this method obtains results matching the recent state-of-the-art second order parsers on three standard datasets. @@ -1115,7 +1115,7 @@ Transfer Learning for Humor Detection by Twin Masked Yellow <fixed-case>M</fixed-case>uppets AseemArora - GaëlDias + GaëlDias AdamJatowt AsifEkbal 1–7 @@ -1147,7 +1147,7 @@ Number Theory Meets Linguistics: Modelling Noun Pluralisation Across 1497 Languages Using 2-adic Metrics GregoryBaker - DiegoMolla + DiegoMolla 24–32 A simple machine learning model of pluralisation as a linear regression problem minimising a p-adic metric substantially outperforms even the most robust of Euclidean-space regressors on languages in the Indo-European, Austronesian, Trans New-Guinea, Sino-Tibetan, Nilo-Saharan, Oto-Meanguean and Atlantic-Congo language families. There is insufficient evidence to support modelling distinct noun declensions as a p-adic neighbourhood even in Indo-European languages. 2022.aacl-short.4 @@ -1334,7 +1334,7 @@ HelenaBalabin JulioHurtado AlvaroSoto - Marie-FrancineMoens + Marie-FrancineMoens 154–160 Lifelong language learning seeks to have models continuously learn multiple tasks in a sequential order without suffering from catastrophic forgetting. State-of-the-art approaches rely on sparse experience replay as the primary approach to prevent forgetting. Experience replay usually adopts sampling methods for the memory population; however, the effect of the chosen sampling strategy on model performance has not yet been studied. In this paper, we investigate how relevant the selective memory population is in the lifelong learning process of text classification and question-answering tasks. We found that methods that randomly store a uniform number of samples from the entire data stream lead to high performances, especially for low memory size, which is consistent with computer vision studies. 2022.aacl-short.20 @@ -1355,7 +1355,7 @@ Multi-Type Conversational Question-Answer Generation with Closed-ended and Unanswerable Questions SeonjeongHwang YunsuKim - Gary GeunbaeLee + Gary GeunbaeLee 169–177 Conversational question answering (CQA) facilitates an incremental and interactive understanding of a given context, but building a CQA system is difficult for many domains due to the problem of data scarcity. In this paper, we introduce a novel method to synthesize data for CQA with various question types, including open-ended, closed-ended, and unanswerable questions. We design a different generation flow for each question type and effectively combine them in a single, shared framework. Moreover, we devise a hierarchical answerability classification (hierarchical AC) module that improves quality of the synthetic data while acquiring unanswerable questions. Manual inspections show that synthetic data generated with our framework have characteristics very similar to those of human-generated conversations. Across four domains, CQA systems trained on our synthetic data indeed show good performance close to the systems trained on human-annotated data. 2022.aacl-short.22 @@ -1378,7 +1378,7 @@ <fixed-case>NGEP</fixed-case>: A Graph-based Event Planning Framework for Story Generation ChenTang - ZhihaoZhang + ZhihaoZhang TylerLoakman ChenghuaLin FrankGuerin @@ -1427,8 +1427,8 @@ ShuaiboWang YufengChen SongmingZhang - DeyiXiong - JinanXu + DeyiXiong + JinanXu 221–227 Neural machine translation (NMT) models are known to be fragile to noisy inputs from automatic speech recognition (ASR) systems. Existing methods are usually tailored for robustness against only homophone errors which account for a small portion of realistic ASR errors. In this paper, we propose an adversarial example generation method based on confusion sets that contain words easily confusable with a target word by ASR to conduct adversarial training for NMT models. Specifically, an adversarial example is generated from the perspective of acoustic relations instead of the traditional uniform or unigram sampling from the confusion sets. Experiments on different test sets with hand-crafted and real-world noise demonstrate the effectiveness of our method over previous methods. Moreover, our approach can achieve improvements on the clean test set. 2022.aacl-short.28 @@ -1543,7 +1543,7 @@ Demographic-Aware Language Model Fine-tuning as a Bias Mitigation Technique AparnaGarimella - RadaMihalcea + RadaMihalcea AkhashAmarnath 311–319 BERT-like language models (LMs), when exposed to large unstructured datasets, are known to learn and sometimes even amplify the biases present in such data. These biases generally reflect social stereotypes with respect to gender, race, age, and others. In this paper, we analyze the variations in gender and racial biases in BERT, a large pre-trained LM, when exposed to different demographic groups. Specifically, we investigate the effect of fine-tuning BERT on text authored by historically disadvantaged demographic groups in comparison to that by advantaged groups. We show that simply by fine-tuning BERT-like LMs on text authored by certain demographic groups can result in the mitigation of social biases in these LMs against various target groups. @@ -1570,7 +1570,7 @@ MayankKulkarni LingjueXie MounicaMaddela - DanielPreotiuc-Pietro + DanielPreotiuc-Pietro 326–333 Entity-centric summarization is a type of controllable summarization that aims to produce a summary of a document that is specific to a given target entity. Extractive summaries possess multiple advantages over abstractive ones such as preserving factuality and can be directly used in downstream tasks like target-based sentiment analysis or incorporated into search applications. In this paper, we explore methods to solve this task by recasting it as a sentence selection task, as supported by the EntSUM data set. We use methods inspired by information retrieval, where the input to the model is a pair representing a sentence from the original document and the target entity, in place of the query. We explore different architecture variants and loss functions in this framework with results showing an up to 5.8 F1 improvement over past state-of-the-art and outperforming the competitive entity-centric Lead 3 heuristic by 1.1 F1. In addition, we also demonstrate similarly strong results on the related task of salient sentence selection for an entity. 2022.aacl-short.40 @@ -1586,7 +1586,7 @@ RamyEskander CassLowry RichardCompton - JudithKlavans + JudithKlavans MariaPolinsky SmarandaMuresan 334–340 @@ -1599,7 +1599,7 @@ Self-Repetition in Abstractive Neural Summarizers NikitaSalkar ThomasTrikalinos - ByronWallace + ByronWallace AniNenkova 341–350 We provide a quantitative and qualitative analysis of self-repetition in the output of neural summarizers. We measure self-repetition as the number of n-grams of length four or longer that appear in multiple outputs of the same system. We analyze the behavior of three popular architectures (BART, T5, and Pegasus), fine-tuned on five datasets. In a regression analysis, we find that the three architectures have different propensities for repeating content across output summaries for inputs, with BART being particularly prone to self-repetition. Fine-tuning on more abstractive data, and on data featuring formulaic language is associated with a higher rate of self-repetition. In qualitative analysis, we find systems produce artefacts such as ads and disclaimers unrelated to the content being summarized, as well as formulaic phrases common in the fine-tuning domain. Our approach to corpus-level analysis of self-repetition may help practitioners clean up training data for summarizers and ultimately support methods for minimizing the amount of self-repetition. @@ -1621,7 +1621,7 @@ Modeling Document-level Temporal Structures for Building Temporal Dependency Graphs - Prafulla KumarChoubey + Prafulla KumarChoubey RuihongHuang 357–365 We propose to leverage news discourse profiling to model document-level temporal structures for building temporal dependency graphs. Our key observation is that the functional roles of sentences used for profiling news discourse signify different time frames relevant to a news story and can, therefore, help to recover the global temporal structure of a document. Our analyses and experiments with the widely used knowledge distillation technique show that discourse profiling effectively identifies distant inter-sentence event and (or) time expression pairs that are temporally related and otherwise difficult to locate. @@ -1645,7 +1645,7 @@ <fixed-case>M</fixed-case>i<fixed-case>QA</fixed-case>: A Benchmark for Inference on Metaphorical Questions IuliaComșa JulianEisenschlos - SriniNarayanan + SriniNarayanan 373–381 We propose a benchmark to assess the capability of large language models to reason with conventional metaphors. Our benchmark combines the previously isolated topics of metaphor detection and commonsense reasoning into a single task that requires a model to make inferences by accurately selecting between the literal and metaphorical register. We examine the performance of state-of-the-art pre-trained models on binary-choice tasks and find a large discrepancy between the performance of small and very large models, going from chance to near-human level. We also analyse the largest model in a generative setting and find that although human performance is approached, careful multiple-shot prompting is required. 2022.aacl-short.46 @@ -1656,7 +1656,7 @@ <fixed-case>GCDT</fixed-case>: A <fixed-case>C</fixed-case>hinese <fixed-case>RST</fixed-case> Treebank for Multigenre and Multilingual Discourse Parsing SiyaoPeng - Yang JanetLiu + Yang JanetLiu AmirZeldes 382–391 A lack of large-scale human-annotated data has hampered the hierarchical discourse parsing of Chinese. In this paper, we present GCDT, the largest hierarchical discourse treebank for Mandarin Chinese in the framework of Rhetorical Structure Theory (RST). GCDT covers over 60K tokens across five genres of freely available text, using the same relation inventory as contemporary RST treebanks for English. We also report on this dataset’s parsing experiments, including state-of-the-art (SOTA) scores for Chinese RST parsing and RST parsing on the English GUM dataset, using cross-lingual training in Chinese and English with multilingual embeddings. @@ -1668,7 +1668,7 @@ Assessing Combinational Generalization of Language Models in Biased Scenarios YanboFang ZuohuiFu - XinDong + XinDong YongfengZhang Gerardde Melo 392–397 @@ -1695,7 +1695,7 @@ Vector Space Interpolation for Query Expansion DeepanwayGhosal SomakAditya - SandipanDandapat + SandipanDandapat MonojitChoudhury 405–410 Topic-sensitive query set expansion is an important area of research that aims to improve search results for information retrieval. It is particularly crucial for queries related to sensitive and emerging topics. In this work, we describe a method for query set expansion about emerging topics using vector space interpolation. We use a transformer model called OPTIMUS, which is suitable for vector space manipulation due to its variational autoencoder nature. One of our proposed methods – Dirichlet interpolation shows promising results for query expansion. Our methods effectively generate new queries about the sensitive topic by incorporating set-level diversity, which is not captured by traditional sentence-level augmentation methods such as paraphrasing or back-translation. @@ -1705,7 +1705,7 @@ <fixed-case>S</fixed-case>ch<fixed-case>A</fixed-case>man: Spell-Checking Resources and Benchmark for Endangered Languages from Amazonia - ArturoOncevay + ArturoOncevay GerardoCardoso CarloAlva CésarLara Ávila @@ -1799,7 +1799,7 @@ How Well Do Multi-hop Reading Comprehension Models Understand Date Information? XanhHo SakuSugawara - AkikoAizawa + AkikoAizawa 470–479 Several multi-hop reading comprehension datasets have been proposed to resolve the issue of reasoning shortcuts by which questions can be answered without performing multi-hop reasoning. However, the ability of multi-hop models to perform step-by-step reasoning when finding an answer to a comparison question remains unclear. It is also unclear how questions about the internal reasoning process are useful for training and evaluating question-answering (QA) systems. To evaluate the model precisely in a hierarchical manner, we first propose a dataset, HieraDate, with three probing tasks in addition to the main question: extraction, reasoning, and robustness. Our dataset is created by enhancing two previous multi-hop datasets, HotpotQA and 2WikiMultiHopQA, focusing on multi-hop questions on date information that involve both comparison and numerical reasoning. We then evaluate the ability of existing models to understand date information. Our experimental results reveal that the multi-hop models do not have the ability to subtract two dates even when they perform well in date comparison and number subtraction tasks. Other results reveal that our probing questions can help to improve the performance of the models (e.g., by +10.3 F1) on the main QA task and our dataset can be used for data augmentation to improve the robustness of the models. 2022.aacl-short.58 @@ -1810,7 +1810,7 @@ Dodging the Data Bottleneck: Automatic Subtitling with Automatically Segmented <fixed-case>ST</fixed-case> Corpora SaraPapi AlinaKarakanta - MatteoNegri + MatteoNegri MarcoTurchi 480–487 Speech translation for subtitling (SubST) is the task of automatically translating speech data into well-formed subtitles by inserting subtitle breaks compliant to specific displaying guidelines. Similar to speech translation (ST), model training requires parallel data comprising audio inputs paired with their textual translations. In SubST, however, the text has to be also annotated with subtitle breaks. So far, this requirement has represented a bottleneck for system development, as confirmed by the dearth of publicly available SubST corpora. To fill this gap, we propose a method to convert existing ST corpora into SubST resources without human intervention. We build a segmenter model that automatically segments texts into proper subtitles by exploiting audio and text in a multimodal fashion, achieving high segmentation quality in zero-shot conditions. Comparative experiments with SubST systems respectively trained on manual and automatic segmentations result in similar performance, showing the effectiveness of our approach. @@ -1978,7 +1978,7 @@ SebastianBlank XintongWang Hans-PeterZorn - ChristianBiemann + ChristianBiemann 76–83 The multi-modal foundation model CLIP computes representations from texts and images that achieved unprecedented performance on tasks such as zero-shot image classification. However, CLIP was pretrained on public internet data. Thus it lacks highly domain-specific knowledge. We investigate the adaptation of CLIP-based models to the chest radiography domain using the MIMIC-CXR dataset. We show that the features of the pretrained CLIP models do not transfer to this domain. We adapt CLIP to the chest radiography domain using contrastive language supervision and show that this approach yields a model that outperforms supervised learning on labels on the MIMIC-CXR dataset while also generalizing to the CheXpert and RSNA Pneumonia datasets. Furthermore, we do a detailed ablation study of the batch and dataset size. Finally, we show that language supervision allows for better explainability by using the multi-modal model to generate images from texts such that experts can inspect what the model has learned. 2022.aacl-srw.11 @@ -2000,7 +2000,7 @@ Concreteness vs. Abstractness: A Selectional Preference Perspective TarunTater DiegoFrassinelli - SabineSchulte im Walde + SabineSchulte im Walde 92–98 Concrete words refer to concepts that are strongly experienced through human senses (banana, chair, salt, etc.), whereas abstract concepts are less perceptually salient (idea, glory, justice, etc.). A clear definition of abstractness is crucial for the understanding of human cognitive processes and for the development of natural language applications such as figurative language detection. In this study, we investigate selectional preferences as a criterion to distinguish between concrete and abstract concepts and words: we hypothesise that abstract and concrete verbs and nouns differ regarding the semantic classes of their arguments. Our study uses a collection of 5,438 nouns and 1,275 verbs to exploit selectional preferences as a salient characteristic in classifying English abstract vs. concrete words, and in predicting their concreteness scores. We achieve an f1-score of 0.84 for nouns and 0.71 for verbs in classification, and Spearman’s ρ correlation of 0.86 for nouns and 0.59 for verbs. 2022.aacl-srw.13 @@ -2068,7 +2068,7 @@ SeolhwaLee JaehyungSeo KisuYang - HeuiseokLim + HeuiseokLim 17–27 Children with language disabilities face communication difficulties in daily life. They are often deprived of the opportunity to participate in social activities due to their difficulty in understanding or using natural language. In this regard, Augmentative and Alternative Communication (AAC) can be a practical means of communication for children with language disabilities. In this study, we propose PicTalky, which is an AI-based AAC system that helps children with language developmental disabilities to improve their communication skills and language comprehension abilities. PicTalky can process both text and pictograms more accurately by connecting a series of neural-based NLP modules. Additionally, we perform quantitative and qualitative analyses on the modules of PicTalky. By using this service, it is expected that those suffering from language problems will be able to express their intentions or desires more easily and improve their quality of life. We have made the models freely available alongside a demonstration of the web interface. Furthermore, we implemented robotics AAC for the first time by applying PicTalky to the NAO robot. 2022.aacl-demo.3 @@ -2150,7 +2150,7 @@ PeterHenderson KhuyagbaatarBatsuren DieuwkeHupkes - MonaDiab + MonaDiab 72–87 We present a tool, Text Characterization Toolkit (TCT), that researchers can use to study characteristics of large datasets. Furthermore, such properties can lead to understanding the influence of such attributes on models’ behaviour. Traditionally, in most NLP research, models are usually evaluated by reporting single-number performance scores on a number of readily available benchmarks, without much deeper analysis. Here, we argue that – especially given the well-known fact that benchmarks often contain biases, artefacts, and spurious correlations – deeper results analysis should become the de-facto standard when presenting new models or benchmarks. TCT aims at filling this gap by facilitating such deeper analysis for datasets at scale, where datasets can be for training/development/evaluation. TCT includes both an easy-to-use tool, as well as off-the-shelf scripts that can be used for specific analyses. We also present use-cases from several different domains. TCT is used to predict difficult examples for given well-known trained models; TCT is also used to identify (potentially harmful) biases present in a dataset. 2022.aacl-demo.9 @@ -2174,7 +2174,7 @@ Proceedings of the 2nd Conference of the Asia-Pacific Chapter of the Association for Computational Linguistics and the 12th International Joint Conference on Natural Language Processing: Tutorial Abstracts - Miguel A.Alonso + Miguel A.Alonso ZhongyuWei Association for Computational Linguistics
Taipei
@@ -2218,7 +2218,7 @@ HanzhuoTan JingLi MingyuWan - Kam-FaiWong + Kam-FaiWong 16–21 Cantonese is an influential Chinese variant with a large population of speakers worldwide. However, it is under-resourced in terms of the data scale and diversity, excluding Cantonese Natural Language Processing (NLP) from the stateof-the-art (SOTA) “pre-training and fine-tuning” paradigm. This tutorial will start with a substantially review of the linguistics and NLP progress for shaping language specificity, resources, and methodologies. It will be followed by an introduction to the trendy transformerbased pre-training methods, which have been largely advancing the SOTA performance of a wide range of downstream NLP tasks in numerous majority languages (e.g., English and Chinese). Based on the above, we will present the main challenges for Cantonese NLP in relation to Cantonese language idiosyncrasies of colloquialism and multilingualism, followed by the future directions to line NLP for Cantonese and other low-resource languages up to the cutting-edge pre-training practice. 2022.aacl-tutorials.3 @@ -2228,7 +2228,7 @@ Grounding Meaning Representation for Situated Reasoning NikhilKrishnaswamy - JamesPustejovsky + JamesPustejovsky 22–27 As natural language technology becomes ever-present in everyday life, people will expect artificial agents to understand language use as humans do. Nevertheless, most advanced neural AI systems fail at some types of interactions that are trivial for humans (e.g., ask a smart system “What am I pointing at?”). One critical aspect of human language understanding is situated reasoning, where inferences make reference to the local context, perceptual surroundings, and contextual groundings from the interaction. In this cutting-edge tutorial, we bring to the NLP/CL community a synthesis of multimodal grounding and meaning representation techniques with formal and computational models of embodied reasoning. We will discuss existing approaches to multimodal language grounding and meaning representations, discuss the kind of information each method captures and their relative suitability to situated reasoning tasks, and demon- strate how to construct agents that conduct situated reasoning by embodying a simulated environment. In doing so, these agents also represent their human interlocutor(s) within the simulation, and are represented through their virtual embodiment in the real world, enabling true bidirectional communication with a computer using multiple modalities. 2022.aacl-tutorials.4 @@ -2239,7 +2239,7 @@ The Battlefront of Combating Misinformation and Coping with Media Bias YiFung Kung-HsiangHuang - PreslavNakov + PreslavNakov HengJi 28–34 Misinformation is a pressing issue in modern society. It arouses a mixture of anger, distrust, confusion, and anxiety that cause damage on our daily life judgments and public policy decisions. While recent studies have explored various fake news detection and media bias detection techniques in attempts to tackle the problem, there remain many ongoing challenges yet to be addressed, as can be witnessed from the plethora of untrue and harmful content present during the COVID-19 pandemic and the international crises of late. In this tutorial, we provide researchers and practitioners with a systematic overview of the frontier in fighting misinformation. Specifically, we dive into the important research questions of how to (i) develop a robust fake news detection system, which not only fact-check information pieces provable by background knowledge but also reason about the consistency and the reliability of subtle details for emerging events; (ii) uncover the bias and agenda of news sources to better characterize misinformation; as well as (iii) correct false information and mitigate news bias, while allowing diverse opinions to be expressed. Moreover, we discuss the remaining challenges, future research directions, and exciting opportunities to help make this world a better place, with safer and more harmonic information sharing. diff --git a/data/xml/2022.acl.xml b/data/xml/2022.acl.xml index 525202623d..d20ff88f24 100644 --- a/data/xml/2022.acl.xml +++ b/data/xml/2022.acl.xml @@ -4,7 +4,7 @@ Proceedings of the 60th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers) SmarandaMuresan - PreslavNakov + PreslavNakov AlineVillavicencio Association for Computational Linguistics
Dublin, Ireland
@@ -32,8 +32,8 @@
Quantified Reproducibility Assessment of <fixed-case>NLP</fixed-case> Results - AnyaBelz - MajaPopovic + AnyaBelz + MajaPopovic SimonMille 16-28 This paper describes and tests a method for carrying out quantified reproducibility assessment (QRA) that is based on concepts and definitions from metrology. QRA produces a single score estimating the degree of reproducibility of a given system and evaluation measure, on the basis of the scores from, and differences between, different reproductions. We test QRA on 18 different system and evaluation measure combinations (involving diverse NLP tasks and types of evaluation), for each of which we have the original results and one to seven reproduction results. The proposed QRA method produces degree-of-reproducibility scores that are comparable across multiple reproductions not only of the same, but also of different, original studies. We find that the proposed method facilitates insights into causes of variation between reproductions, and as a result, allows conclusions to be drawn about what aspects of system and/or evaluation design need to be changed in order to improve reproducibility. @@ -78,7 +78,7 @@ HanwangZhang XiangnanHe FengbinZhu - Tat-SengChua + Tat-SengChua 57-69 Neural discrete reasoning (NDR) has shown remarkable progress in combining deep models with discrete reasoning. However, we find that existing NDR solution suffers from large performance drop on hypothetical questions, e.g. “what the annualized rate of return would be if the revenue in 2020 was doubled”. The key to hypothetical question answering (HQA) is counterfactual thinking, which is a natural ability of human reasoning but difficult for deep models. In this work, we devise a Learning to Imagine (L2I) module, which can be seamlessly incorporated into NDR models to perform the imagination of unseen counterfactual. In particular, we formulate counterfactual thinking into two steps: 1) identifying the fact to intervene, and 2) deriving the counterfactual from the fact and assumption, which are designed as neural networks. Based on TAT-QA, we construct a very challenging HQA dataset with 8,283 hypothetical questions. We apply the proposed L2I to TAGOP, the state-of-the-art solution on TAT-QA, validating the rationality and effectiveness of our approach. 2022.acl-long.5 @@ -253,7 +253,7 @@ Overlap-based Vocabulary Generation Improves Cross-lingual Transfer Among Related Languages VaidehiPatil - ParthaTalukdar + ParthaTalukdar SunitaSarawagi 219-233 Pre-trained multilingual language models such as mBERT and XLM-R have demonstrated great potential for zero-shot cross-lingual transfer to low web-resource languages (LRL). However, due to limited model capacity, the large difference in the sizes of available monolingual corpora between high web-resource languages (HRL) and LRLs does not provide enough scope of co-embedding the LRL with the HRL, thereby affecting the downstream task performance of LRLs. In this paper, we argue that relatedness among languages in a language family along the dimension of lexical overlap may be leveraged to overcome some of the corpora limitations of LRLs. We propose Overlap BPE (OBPE), a simple yet effective modification to the BPE vocabulary generation algorithm which enhances overlap across related languages. Through extensive experiments on multiple NLP tasks and datasets, we observe that OBPE generates a vocabulary that increases the representation of LRLs via tokens shared with HRLs. This results in improved zero-shot transfer from related HRLs to LRLs without reducing HRL representation and accuracy. Unlike previous studies that dismissed the importance of token-overlap, we show that in the low-resource related language setting, token overlap matters. Synthetically reducing the overlap to zero can cause as much as a four-fold drop in zero-shot transfer accuracy. @@ -496,7 +496,7 @@ <fixed-case>K</fixed-case>a<fixed-case>FSP</fixed-case>: Knowledge-Aware Fuzzy Semantic Parsing for Conversational Question Answering over a Large-Scale Knowledge Base JunzhuoLi - DeyiXiong + DeyiXiong 461-473 In this paper, we study two issues of semantic parsing approaches to conversational question answering over a large-scale knowledge base: (1) The actions defined in grammar are not sufficient to handle uncertain reasoning common in real-world scenarios. (2) Knowledge base information is not well exploited and incorporated into semantic parsing. To mitigate the two issues, we propose a knowledge-aware fuzzy semantic parsing framework (KaFSP). It defines fuzzy comparison operations in the grammar system for uncertain reasoning based on the fuzzy set theory. In order to enhance the interaction between semantic parsing and knowledge base, we incorporate entity triples from the knowledge base into a knowledge-aware entity disambiguation module. Additionally, we propose a multi-label classification framework to not only capture correlations between entity types and relations but also detect knowledge base information relevant to the current utterance. Both enhancements are based on pre-trained language models. Experiments on a large-scale conversational question answering benchmark demonstrate that the proposed KaFSP achieves significant improvements over previous state-of-the-art models, setting new SOTA results on 8 out of 10 question types, gaining improvements of over 10% F1 or accuracy on 3 question types, and improving overall F1 from 83.01% to 85.33%. The source code of KaFSP is available at https://github.com/tjunlp-lab/KaFSP. 2022.acl-long.35 @@ -649,7 +649,7 @@ MichalShmueli-Scheuer IlyaShnayderman NoamSlonim - LiatEin-Dor + LiatEin-Dor 596-609 Paraphrase generation has been widely used in various downstream tasks. Most tasks benefit mainly from high quality paraphrases, namely those that are semantically similar to, yet linguistically diverse from, the original sentence. Generating high-quality paraphrases is challenging as it becomes increasingly hard to preserve meaning as linguistic diversity increases. Recent works achieve nice results by controlling specific aspects of the paraphrase, such as its syntactic tree. However, they do not allow to directly control the quality of the generated paraphrase, and suffer from low flexibility and scalability. Here we propose QCPG, a quality-guided controlled paraphrase generation model, that allows directly controlling the quality dimensions. Furthermore, we suggest a method that given a sentence, identifies points in the quality control space that are expected to yield optimal generated paraphrases. We show that our method is able to generate paraphrases which maintain the original meaning while achieving higher diversity than the uncontrolled baseline. The models, the code, and the data can be found in https://github.com/IBM/quality-controlled-paraphrase-generation. 2022.acl-long.45 @@ -777,7 +777,7 @@ YueYu PranavShetty LeSong - ChaoZhang + ChaoZhang 745-758 Weakly-supervised learning (WSL) has shown promising results in addressing label scarcity on many NLP tasks, but manually designing a comprehensive, high-quality labeling rule set is tedious and difficult. We study interactive weakly-supervised learning—the problem of iteratively and automatically discovering novel labeling rules from data to improve the WSL model. Our proposed model, named PRBoost, achieves this goal via iterative prompt-based rule discovery and model boosting. It uses boosting to identify large-error instances and discovers candidate rules from them by prompting pre-trained LMs with rule templates. The candidate rules are judged by human experts, and the accepted rules are used to generate complementary weak labels and strengthen the current model. Experiments on four tasks show PRBoost outperforms state-of-the-art WSL baselines up to 7.1%, and bridges the gaps with fully supervised models. 2022.acl-long.55 @@ -880,7 +880,7 @@ Nested Named Entity Recognition with Span-level Graphs JunchengWan DongyuRu - WeinanZhang + WeinanZhang YongYu 892-903 Span-based methods with the neural networks backbone have great potential for the nested named entity recognition (NER) problem. However, they face problems such as degenerating when positive instances and negative instances largely overlap. Besides, the generalization ability matters a lot in nested NER, as a large proportion of entities in the test set hardly appear in the training set. In this work, we try to improve the span representation by utilizing retrieval-based span-level graphs, connecting spans and entities in the training data based on n-gram features. Specifically, we build the entity-entity graph and span-entity graph globally based on n-gram similarity to integrate the information of similar neighbor entities into the span representation. To evaluate our method, we conduct experiments on three common nested NER datasets, ACE2004, ACE2005, and GENIA datasets. Experimental results show that our method achieves general improvements on all three benchmarks (+0.30 \sim 0.85 micro-F1), and obtains special superiority on low frequency entities (+0.56 \sim 2.08 recall). @@ -892,7 +892,7 @@ <fixed-case>C</fixed-case>og<fixed-case>T</fixed-case>askonomy: Cognitively Inspired Task Taxonomy Is Beneficial to Transfer Learning in <fixed-case>NLP</fixed-case> YifeiLuo MinghuiXu - DeyiXiong + DeyiXiong 904-920 Is there a principle to guide transfer learning across tasks in natural language processing (NLP)? Taxonomy (Zamir et al., 2018) finds that a structure exists among visual tasks, as a principle underlying transfer learning for them. In this paper, we propose a cognitively inspired framework, CogTaskonomy, to learn taxonomy for NLP tasks. The framework consists of Cognitive Representation Analytics (CRA) and Cognitive-Neural Mapping (CNM). The former employs Representational Similarity Analysis, which is commonly used in computational neuroscience to find a correlation between brain-activity measurement and computational modeling, to estimate task similarity with task-specific sentence representations. The latter learns to detect task relations by projecting neural representations from NLP models to cognitive signals (i.e., fMRI voxels). Experiments on 12 NLP tasks, where BERT/TinyBERT are used as the underlying models for transfer learning, demonstrate that the proposed CogTaxonomy is able to guide transfer learning, achieving performance competitive to the Analytic Hierarchy Process (Saaty, 1987) used in visual Taskonomy (Zamir et al., 2018) but without requiring exhaustive pairwise O(m^2) task transferring. Analyses further discover that CNM is capable of learning model-agnostic task taxonomy. 2022.acl-long.64 @@ -927,7 +927,7 @@ LinXu ZhongyuWei WeidongZhan - BaobaoChang + BaobaoChang SujianLi TianyuLiu ZhifangSui @@ -987,7 +987,7 @@ LinjuanWu ShaojuanWu XiaowangZhang - DeyiXiong + DeyiXiong ShizhanChen ZhiqiangZhuang ZhiyongFeng @@ -1027,7 +1027,7 @@ JunXu ZeyangLei HaifengWang - Zheng-YuNiu + Zheng-YuNiu HuaWu 1024-1034 Most dialog systems posit that users have figured out clear and specific goals before starting an interaction. For example, users have determined the departure, the destination, and the travel time for booking a flight. However, in many scenarios, limited by experience and knowledge, users may know what they need, but still struggle to figure out clear and specific goals by determining all the necessary slots. In this paper, we identify this challenge, and make a step forward by collecting a new human-to-human mixed-type dialog corpus. It contains 5k dialog sessions and 168k utterances for 4 dialog types and 5 domains. Within each session, an agent first provides user-goal-related knowledge to help figure out clear and specific goals, and then help achieve them. Furthermore, we propose a mixed-type dialog model with a novel Prompt-based continual learning mechanism. Specifically, the mechanism enables the model to continually strengthen its ability on any specific type by utilizing existing dialog corpora effectively. @@ -1232,7 +1232,7 @@ JayPujara XiangRen YangLiu - DilekHakkani-Tur + DilekHakkani-Tur 1237-1252 Implicit knowledge, such as common sense, is key to fluid human conversations. Current neural response generation (RG) models are trained to generate responses directly, omitting unstated implicit knowledge. In this paper, we present Think-Before-Speaking (TBS), a generative approach to first externalize implicit commonsense knowledge (think) and use this knowledge to generate responses (speak). We argue that externalizing implicit knowledge allows more efficient learning, produces more informative responses, and enables more explainable models. We analyze different choices to collect knowledge-aligned dialogues, represent implicit knowledge, and transition between knowledge and dialogues. Empirical results show TBS models outperform end-to-end and knowledge-augmented RG baselines on most automatic metrics and generate more informative, specific, and commonsense-following responses, as evaluated by human annotators. TBS also generates knowledge that makes sense and is relevant to the dialogue around 85% of the time 2022.acl-long.88 @@ -1244,7 +1244,7 @@ Flow-Adapter Architecture for Unsupervised Machine Translation YihongLiu HarisJabbar - HinrichSchuetze + HinrichSchuetze 1253-1266 In this work, we propose a flow-adapter architecture for unsupervised NMT. It leverages normalizing flows to explicitly model the distributions of sentence-level latent representations, which are subsequently used in conjunction with the attention mechanism for the translation task. The primary novelties of our model are: (a) capturing language-specific sentence representations separately for each language using normalizing flows and (b) using a simple transformation of these latent representations for translating from one language to another. This architecture allows for unsupervised training of each language independently. While there is prior work on latent variables for supervised MT, to the best of our knowledge, this is the first work that uses latent variables and normalizing flows for unsupervised MT. We obtain competitive results on several unsupervised MT benchmarks. 2022.acl-long.89 @@ -1255,7 +1255,7 @@ Efficient Unsupervised Sentence Compression by Fine-tuning Transformers with Reinforcement Learning DemianGhalandari - ChrisHokamp + ChrisHokamp GeorgianaIfrim 1267-1280 Sentence compression reduces the length of text by removing non-essential content while preserving important facts and grammaticality. Unsupervised objective driven methods for sentence compression can be used to create customized models without the need for ground-truth training data, while allowing flexibility in the objective function(s) that are used for learning and inference. Recent unsupervised sentence compression approaches use custom objectives to guide discrete search; however, guided search is expensive at inference time. In this work, we explore the use of reinforcement learning to train effective sentence compression models that are also fast when generating predictions. In particular, we cast the task as binary sequence labelling and fine-tune a pre-trained transformer using a simple policy gradient approach. Our approach outperforms other unsupervised models while also being more efficient at inference time. @@ -1307,7 +1307,7 @@ YaoZhao JoshuaMaynez DipanjanDas - MichaelCollins + MichaelCollins MirellaLapata 1319-1339 We propose Composition Sampling, a simple but effective method to generate diverse outputs for conditional generation of higher quality compared to previous stochastic decoding strategies. It builds on recently proposed plan-based neural generation models (FROST, Narayan et al, 2021) that are trained to first create a composition of the output and then generate by conditioning on it and the input. Our approach avoids text degeneration by first sampling a composition in the form of an entity chain and then using beam search to generate the best possible text grounded to this entity chain. Experiments on summarization (CNN/DailyMail and XSum) and question generation (SQuAD), using existing and newly proposed automaticmetrics together with human-based evaluation, demonstrate that Composition Sampling is currently the best available decoding strategy for generating diverse meaningful outputs. @@ -1343,7 +1343,7 @@ Tackling Fake News Detection by Continually Improving Social Context Representations using Graph Neural Networks NikhilMehta - Maria LeonorPacheco + Maria LeonorPacheco DanGoldwasser 1363-1380 Easy access, variety of content, and fast widespread interactions are some of the reasons making social media increasingly popular. However, this rise has also enabled the propagation of fake news, text published by news sources with an intent to spread misinformation and sway beliefs. Detecting it is an important and challenging problem to prevent large scale misinformation and maintain a healthy society. We view fake news detection as reasoning over the relations between sources, articles they publish, and engaging users on social media in a graph framework. After embedding this information, we formulate inference operators which augment the graph edges by revealing unobserved interactions between its elements, such as similarity between documents’ contents and users’ engagement patterns. Our experiments over two challenging fake news detection tasks show that using inference operators leads to a better understanding of the social media framework enabling fake news spread, resulting in improved performance. @@ -1357,7 +1357,7 @@ YupeiDu QiZheng YuanbinWu - ManLan + ManLan YanYang MeirongMa 1381-1395 @@ -1384,8 +1384,8 @@ FaisalLadhak EsinDurmus HeHe - ClaireCardie - KathleenMcKeown + ClaireCardie + KathleenMcKeown 1410-1421 Despite recent progress in abstractive summarization, systems still suffer from faithfulness errors. While prior work has proposed models that improve faithfulness, it is unclear whether the improvement comes from an increased level of extractiveness of the model outputs as one naive way to improve faithfulness is to make summarization models more extractive. In this work, we present a framework for evaluating the effective faithfulness of summarization systems, by generating a faithfulness-abstractiveness trade-off curve that serves as a control at different operating points on the abstractiveness spectrum. We then show that the Maximum Likelihood Estimation (MLE) baseline as well as recently proposed methods for improving faithfulness, fail to consistently improve over the control at the same level of abstractiveness. Finally, we learn a selector to identify the most faithful and abstractive summary for a given document, and show that this system can attain higher faithfulness scores in human evaluations while being more abstractive than the baseline system on two datasets. Moreover, we show that our system is able to achieve a better faithfulness-abstractiveness trade-off than the control at the same level of abstractiveness. 2022.acl-long.100 @@ -1410,7 +1410,7 @@ Spurious Correlations in Reference-Free Evaluation of Text Generation EsinDurmus FaisalLadhak - TatsunoriHashimoto + TatsunoriHashimoto 1443-1454 Model-based, reference-free evaluation metricshave been proposed as a fast and cost-effectiveapproach to evaluate Natural Language Generation(NLG) systems. Despite promising recentresults, we find evidence that reference-freeevaluation metrics of summarization and dialoggeneration may be relying on spuriouscorrelations with measures such as word overlap,perplexity, and length. We further observethat for text summarization, these metrics havehigh error rates when ranking current state-ofthe-art abstractive summarization systems. Wedemonstrate that these errors can be mitigatedby explicitly designing evaluation metrics toavoid spurious features in reference-free evaluation. 2022.acl-long.102 @@ -1421,7 +1421,7 @@ On The Ingredients of an Effective Zero-shot Semantic Parser PengchengYin JohnWieting - AvirupSil + AvirupSil GrahamNeubig 1455-1474 Semantic parsers map natural language utterances into meaning representations (e.g., programs). Such models are typically bottlenecked by the paucity of training data due to the required laborious annotation efforts. Recent studies have performed zero-shot learning by synthesizing training examples of canonical utterances and programs from a grammar, and further paraphrasing these utterances to improve linguistic diversity. However, such synthetic examples cannot fully capture patterns in real data. In this paper we analyze zero-shot parsers through the lenses of the language and logical gaps (Herzig and Berant, 2019), which quantify the discrepancy of language and programmatic patterns between the canonical examples and real-world user-issued ones. We propose bridging these gaps using improved grammars, stronger paraphrasers, and efficient learning methods using canonical examples that most likely reflect real user intents. Our model achieves strong performance on two semantic parsing benchmarks (Scholar, Geo) with zero labeled data. @@ -1464,7 +1464,7 @@ Match the Script, Adapt if Multilingual: Analyzing the Effect of Multilingual Pretraining on Cross-lingual Transferability YoshinariFujinuma JordanBoyd-Graber - KatharinaKann + KatharinaKann 1500-1512 Pretrained multilingual models enable zero-shot learning even for unseen languages, and that performance can be further improved via adaptation prior to finetuning. However, it is unclear how the number of pretraining languages influences a model’s zero-shot learning for languages unseen during pretraining. To fill this gap, we ask the following research questions: (1) How does the number of pretraining languages influence zero-shot performance on unseen target languages? (2) Does the answer to that question change with model adaptation? (3) Do the findings for our first question change if the languages used for pretraining are all related? Our experiments on pretraining with related languages indicate that choosing a diverse set of languages is crucial. Without model adaptation, surprisingly, increasing the number of pretraining languages yields better results up to adding related languages, after which performance plateaus. In contrast, with model adaptation via continued pretraining, pretraining on a larger number of languages often gives further improvement, suggesting that model adaptation is crucial to exploit additional pretraining languages. 2022.acl-long.106 @@ -1500,8 +1500,8 @@ Differentiable Multi-Agent Actor-Critic for Multi-Step Radiology Report Summarization Sanjeev KumarKarn - NingLiu - HinrichSchuetze + NingLiu + HinrichSchuetze OladimejiFarri 1542-1553 The IMPRESSIONS section of a radiology report about an imaging study is a summary of the radiologist’s reasoning and conclusions, and it also aids the referring physician in confirming or excluding certain diagnoses. A cascade of tasks are required to automatically generate an abstractive summary of the typical information-rich radiology report. These tasks include acquisition of salient content from the report and generation of a concise, easily consumable IMPRESSIONS section. Prior research on radiology report summarization has focused on single-step end-to-end models – which subsume the task of salient content acquisition. To fully explore the cascade structure and explainability of radiology report summarization, we introduce two innovations. First, we design a two-step approach: extractive summarization followed by abstractive summarization. Second, we additionally break down the extractive part into two independent tasks: extraction of salient (1) sentences and (2) keywords. Experiments on English radiology reports from two clinical sites show our novel approach leads to a more precise summary compared to single-step and to two-step-with-single-extractive-process baselines with an overall improvement in F1 score of 3-4%. @@ -1514,7 +1514,7 @@ Online Semantic Parsing for Latency Reduction in Task-Oriented Dialogue Outstanding Paper JiaweiZhou - JasonEisner + JasonEisner MichaelNewman Emmanouil AntoniosPlatanios SamThomson @@ -1546,7 +1546,7 @@ ChenguangZhu BudhadityaDeb AhmedAwadallah - DragomirRadev + DragomirRadev RuiZhang 1592-1604 Text summarization helps readers capture salient information from documents, news, interviews, and meetings. However, most state-of-the-art pretrained language models (LM) are unable to efficiently process long text for many summarization tasks. In this paper, we propose Summ^N, a simple, flexible, and effective multi-stage framework for input texts that are longer than the maximum context length of typical pretrained LMs. Summ^N first splits the data samples and generates a coarse summary in multiple stages and then produces the final fine-grained summary based on it. Our framework can process input text of arbitrary length by adjusting the number of stages while keeping the LM input size fixed. Moreover, it can deal with both single-source documents and dialogues, and it can be used on top of different backbone abstractive summarization models. To the best of our knowledge, Summ^N is the first multi-stage split-then-summarize framework for long input summarization. Our experiments demonstrate that Summ^N outperforms previous state-of-the-art methods by improving ROUGE scores on three long meeting summarization datasets AMI, ICSI, and QMSum, two long TV series datasets from SummScreen, and a long document summarization dataset GovReport. Our data and code are available at https://github.com/psunlpgroup/Summ-N. @@ -1559,7 +1559,7 @@ KaixinMa HaoCheng XiaodongLiu - EricNyberg + EricNyberg JianfengGao 1605-1620 The retriever-reader framework is popular for open-domain question answering (ODQA) due to its ability to use explicit knowledge. Although prior work has sought to increase the knowledge coverage by incorporating structured knowledge beyond text, accessing heterogeneous knowledge sources through a unified interface remains an open question. While data-to-text generation has the potential to serve as a universal interface for data and text, its feasibility for downstream tasks remains largely unknown. In this work, we bridge this gap and use the data-to-text method as a means for encoding structured knowledge for open-domain question answering. Specifically, we propose a verbalizer-retriever-reader framework for ODQA over data and text where verbalized tables from Wikipedia and graphs from Wikidata are used as augmented knowledge sources. We show that our Unified Data and Text QA, UDT-QA, can effectively benefit from the expanded knowledge index, leading to large gains over text-only baselines. Notably, our approach sets the single-model state-of-the-art on Natural Questions. Furthermore, our analyses indicate that verbalized knowledge is preferred for answer reasoning for both adapted and hot-swap settings. @@ -1572,9 +1572,9 @@ Principled Paraphrase Generation with Parallel Corpora AitorOrmazabal MikelArtetxe - AitorSoroa - GorkaLabaka - EnekoAgirre + AitorSoroa + GorkaLabaka + EnekoAgirre 1621-1638 Round-trip Machine Translation (MT) is a popular choice for paraphrase generation, which leverages readily available parallel corpora for supervision. In this paper, we formalize the implicit similarity function induced by this approach, and show that it is susceptible to non-paraphrase pairs sharing a single ambiguous translation. Based on these insights, we design an alternative similarity metric that mitigates this issue by requiring the entire translation distribution to match, and implement a relaxation of it through the Information Bottleneck method. Our approach incorporates an adversarial term into MT training in order to learn representations that encode as much information about the reference translation as possible, while keeping as little information about the input as possible. Paraphrases can be generated by decoding back to the source from this representation, without having to generate pivot translations. In addition to being more principled and efficient than round-trip MT, our approach offers an adjustable parameter to control the fidelity-diversity trade-off, and obtains better results in our experiments. 2022.acl-long.114 @@ -1588,7 +1588,7 @@ JunjieHu LidongBing MahaniAljunied - ShafiqJoty + ShafiqJoty LuoSi ChunyanMiao 1639-1657 @@ -1637,7 +1637,7 @@ BudhadityaDeb ChenguangZhu AhmedAwadallah - DragomirRadev + DragomirRadev 1687-1698 Transformer-based models have achieved state-of-the-art performance on short-input summarization. However, they still struggle with summarizing longer text. In this paper, we present DYLE, a novel dynamic latent extraction approach for abstractive long-input summarization. DYLE jointly trains an extractor and a generator and treats the extracted text snippets as the latent variable, allowing dynamic snippet-level attention weights during decoding. To provide adequate supervision, we propose simple yet effective heuristics for oracle extraction as well as a consistency loss term, which encourages the extractor to approximate the averaged dynamic weights predicted by the generator. We evaluate our method on different long-document and long-dialogue summarization tasks: GovReport, QMSum, and arXiv. Experiment results show that DYLE outperforms all existing methods on GovReport and QMSum, with gains up to 6.1 ROUGE, while yielding strong results on arXiv. Further analysis shows that the proposed dynamic weights provide interpretability of our generation process. 2022.acl-long.118 @@ -1687,7 +1687,7 @@ ChenxiGu Jonathan K.Kummerfeld VeronicaPerez-Rosas - RadaMihalcea + RadaMihalcea 1742-1752 Personalized language models are designed and trained to capture language patterns specific to individual users. This makes them more accurate at predicting what a user will write. However, when a new user joins a platform and not enough text is available, it is harder to build effective personalized language models. We propose a solution for this problem, using a model trained on users that are similar to a new user. In this paper, we explore strategies for finding the similarity between new users and existing ones and methods for using the data from existing users who are a good match. We further explore the trade-off between available data for new users and how well their language can be modeled. 2022.acl-long.122 @@ -1759,7 +1759,7 @@ BeatriceSavoldi MarcoGaido LuisaBentivogli - MatteoNegri + MatteoNegri MarcoTurchi 1807-1824 Gender bias is largely recognized as a problematic phenomenon affecting language technologies, with recent studies underscoring that it might surface differently across languages. However, most of current evaluation practices adopt a word-level focus on a narrow set of occupational nouns under synthetic conditions. Such protocols overlook key features of grammatical gender languages, which are characterized by morphosyntactic chains of gender agreement, marked on a variety of lexical items and parts-of-speech (POS). To overcome this limitation, we enrich the natural, gender-sensitive MuST-SHE corpus (Bentivogli et al., 2020) with two new linguistic annotation layers (POS and agreement chains), and explore to what extent different lexical categories and agreement phenomena are impacted by gender skews. Focusing on speech translation, we conduct a multifaceted evaluation on three language directions (English-French/Italian/Spanish), with models trained on varying amounts of data and different word segmentation techniques. By shedding light on model behaviours, gender bias, and its detection at several levels of granularity, our findings emphasize the value of dedicated analyses beyond aggregated overall results. @@ -1811,8 +1811,8 @@ ChangyeLi DavidKnopman WeizheXu - TrevorCohen - SergueiPakhomov + TrevorCohen + SergueiPakhomov 1866-1877 Deep learning (DL) techniques involving fine-tuning large numbers of model parameters have delivered impressive performance on the task of discriminating between language produced by cognitively healthy individuals, and those with Alzheimer’s disease (AD). However, questions remain about their ability to generalize beyond the small reference sets that are publicly available for research. As an alternative to fitting model parameters directly, we propose a novel method by which a Transformer DL model (GPT-2) pre-trained on general English text is paired with an artificially degraded version of itself (GPT-D), to compute the ratio between these two models’ perplexities on language from cognitively healthy and impaired individuals. This technique approaches state-of-the-art performance on text data from a widely used “Cookie Theft” picture description task, and unlike established alternatives also generalizes well to spontaneous conversations. Furthermore, GPT-D generates text with characteristics known to be associated with AD, demonstrating the induction of dementia-related linguistic anomalies. Our study is a step toward better understanding of the relationships between the inner workings of generative neural language models, the language that they produce, and the deleterious effects of dementia on human speech and language characteristics. 2022.acl-long.131 @@ -1904,7 +1904,7 @@ QuCui ShujianHuang ShumingShi - JiajunChen + JiajunChen 1958-1969 Interactive neural machine translation (INMT) is able to guarantee high-quality translations by taking human interactions into account. Existing IMT systems relying on lexical constrained decoding (LCD) enable humans to translate in a flexible translation order beyond the left-to-right. However, they typically suffer from two significant limitations in translation efficiency and quality due to the reliance on LCD. In this work, we propose a novel BiTIIMT system, Bilingual Text-Infilling for Interactive Neural Machine Translation. The key idea to BiTIIMT is Bilingual Text-infilling (BiTI) which aims to fill missing segments in a manually revised translation for a given source sentence. We propose a simple yet effective solution by casting this task as a sequence-to-sequence task. In this way, our system performs decoding without explicit constraints and makes full use of revised words for better translation prediction. Experiment results show that BiTiIMT performs significantly better and faster than state-of-the-art LCD-based IMT on three translation tasks. 2022.acl-long.138 @@ -2033,7 +2033,7 @@ YunlongLiang FandongMeng ChulunZhou - JinanXu + JinanXu YufengChen JinsongSu JieZhou @@ -2061,7 +2061,7 @@ PremSelvaraj GokulNc PratyushKumar - MiteshKhapra + MiteshKhapra 2114-2133 AI technologies for Natural Languages have made tremendous progress recently. However, commensurate progress has not been made on Sign Languages, in particular, in recognizing signs as individual words or as complete sentences. We introduce OpenHands, a library where we take four key ideas from the NLP community for low-resource languages and apply them to sign languages for word-level recognition. First, we propose using pose extracted through pretrained models as the standard modality of data in this work to reduce training time and enable efficient inference, and we release standardized pose datasets for different existing sign language datasets. Second, we train and release checkpoints of 4 pose-based isolated sign language recognition models across 6 languages (American, Argentinian, Chinese, Greek, Indian, and Turkish), providing baselines and ready checkpoints for deployment. Third, to address the lack of labelled data, we propose self-supervised pretraining on unlabelled data. We curate and release the largest pose-based pretraining dataset on Indian Sign Language (Indian-SL). Fourth, we compare different pretraining strategies and for the first time establish that pretraining is effective for sign language recognition by demonstrating (a) improved fine-tuning performance especially in low-resource settings, and (b) high crosslingual transfer from Indian-SL to few other sign languages. We open-source all models and datasets in OpenHands with a hope that it makes research in sign languages reproducible and more accessible. 2022.acl-long.150 @@ -2115,7 +2115,7 @@ DexinWang KaiFan BoxingChen - DeyiXiong + DeyiXiong 2175-2187 k-Nearest-Neighbor Machine Translation (kNN-MT) has been recently proposed as a non-parametric solution for domain adaptation in neural machine translation (NMT). It aims to alleviate the performance degradation of advanced MT systems in translating out-of-domain sentences by coordinating with an additional token-level feature-based retrieval module constructed from in-domain data. Previous studies (Khandelwal et al., 2021; Zheng et al., 2021) have already demonstrated that non-parametric NMT is even superior to models fine-tuned on out-of-domain data. In spite of this success, kNN retrieval is at the expense of high latency, in particular for large datastores. To make it practical, in this paper, we explore a more efficient kNN-MT and propose to use clustering to improve the retrieval efficiency. Concretely, we first propose a cluster-based Compact Network for feature reduction in a contrastive learning manner to compress context features into 90+% lower dimensional vectors. We then suggest a cluster-based pruning solution to filter out 10% 40% redundant nodes in large datastores while retaining translation quality. Our proposed methods achieve better or comparable performance while reducing up to 57% inference latency against the advanced non-parametric MT model on several machine translation benchmarks. Experimental results indicate that the proposed methods maintain the most useful information of the original datastore and the Compact Network shows good generalization on unseen domains. Codes are available at https://github.com/tjunlp-lab/PCKMT. 2022.acl-long.154 @@ -2156,7 +2156,7 @@ WeiWu TaoGui QiZhang - XuanjingHuang + XuanjingHuang 2211-2224 Recent works on Lottery Ticket Hypothesis have shown that pre-trained language models (PLMs) contain smaller matching subnetworks(winning tickets) which are capable of reaching accuracy comparable to the original models. However, these tickets are proved to be notrobust to adversarial examples, and even worse than their PLM counterparts. To address this problem, we propose a novel method based on learning binary weight masks to identify robust tickets hidden in the original PLMs. Since the loss is not differentiable for the binary mask, we assign the hard concrete distribution to the masks and encourage their sparsity using a smoothing approximation of L0 regularization. Furthermore, we design an adversarial loss objective to guide the search for robust tickets and ensure that the tickets perform well bothin accuracy and robustness. Experimental results show the significant improvement of the proposed method over previous work on adversarial robustness evaluation. 2022.acl-long.157 @@ -2335,7 +2335,7 @@ YijinLiu FandongMeng YufengChen - JinanXu + JinanXu JianLiu JieZhou 2377-2389 @@ -2416,7 +2416,7 @@ BaileyKuehl ArmanCohan IsabelleAugenstein - Lucy LuWang + Lucy LuWang 2448-2460 Automated scientific fact checking is difficult due to the complexity of scientific language and a lack of significant amounts of training data, as annotation requires domain expertise. To address this challenge, we propose scientific claim generation, the task of generating one or more atomic and verifiable claims from scientific sentences, and demonstrate its usefulness in zero-shot fact checking for biomedical claims. We propose CLAIMGEN-BART, a new supervised method for generating claims supported by the literature, as well as KBIN, a novel method for generating claim negations. Additionally, we adapt an existing unsupervised entity-centric method of claim generation to biomedical claims, which we call CLAIMGEN-ENTITY. Experiments on zero-shot fact checking demonstrate that both CLAIMGEN-ENTITY and CLAIMGEN-BART, coupled with KBIN, achieve up to 90% performance of fully supervised models trained on manually annotated claims and evidence. A rigorous evaluation study demonstrates significant improvement in generated claim and negation quality over existing baselines 2022.acl-long.175 @@ -2505,7 +2505,7 @@ LuXiang YuZhou JiajunZhang - ChengqingZong + ChengqingZong 2545-2558 Role-oriented dialogue summarization is to generate summaries for different roles in the dialogue, e.g., merchants and consumers. Existing methods handle this task by summarizing each role’s content separately and thus are prone to ignore the information from other roles. However, we believe that other roles’ content could benefit the quality of summaries, such as the omitted information mentioned by other roles. Therefore, we propose a novel role interaction enhanced method for role-oriented dialogue summarization. It adopts cross attention and decoder self-attention interactions to interactively acquire other roles’ critical information. The cross attention interaction aims to select other roles’ critical dialogue utterances, while the decoder self-attention interaction aims to obtain key information from other roles’ summaries. Experimental results have shown that our proposed method significantly outperforms strong baselines on two public role-oriented dialogue summarization datasets. Extensive analyses have demonstrated that other roles’ content could help generate summaries with more complete semantics and correct topic structures. 2022.acl-long.182 @@ -2531,7 +2531,7 @@ Measuring and Mitigating Name Biases in Neural Machine Translation JunWang BenjaminRubinstein - TrevorCohn + TrevorCohn 2576-2590 Neural Machine Translation (NMT) systems exhibit problematic biases, such as stereotypical gender bias in the translation of occupation terms into languages with grammatical gender. In this paper we describe a new source of bias prevalent in NMT systems, relating to translations of sentences containing person names. To correctly translate such sentences, a NMT system needs to determine the gender of the name. We show that leading systems are particularly poor at this task, especially for female given names. This bias is deeper than given name gender: we show that the translation of terms with ambiguous sentiment can also be affected by person names, and the same holds true for proper nouns denoting race. To mitigate these biases we propose a simple but effective data augmentation method based on randomly switching entities during translation, which effectively eliminates the problem without any effect on translation quality. 2022.acl-long.184 @@ -2557,7 +2557,7 @@ <fixed-case>MSCTD</fixed-case>: A Multimodal Sentiment Chat Translation Dataset YunlongLiang FandongMeng - JinanXu + JinanXu YufengChen JieZhou 2601-2613 @@ -2723,7 +2723,7 @@ Continual Few-shot Relation Learning via Embedding Space Regularization and Data Augmentation ChengweiQin - ShafiqJoty + ShafiqJoty 2776-2789 Existing continual relation learning (CRL) methods rely on plenty of labeled training data for learning a new task, which can be hard to acquire in real scenario as getting large and representative labeled data is often expensive and time-consuming. It is therefore necessary for the model to learn novel relational patterns with very few labeled data while avoiding catastrophic forgetting of previous task knowledge. In this paper, we formulate this challenging yet practical problem as continual few-shot relation learning (CFRL). Based on the finding that learning for new emerging few-shot tasks often results in feature distributions that are incompatible with previous tasks’ learned distributions, we propose a novel method based on embedding space regularization and data augmentation. Our method generalizes to new few-shot tasks and avoids catastrophic forgetting of previous tasks by enforcing extra constraints on the relational embeddings and by adding extra relevant data in a self-supervised manner. With extensive experiments we demonstrate that our method can significantly outperform previous state-of-the-art methods in CFRL task settings. 2022.acl-long.198 @@ -2776,7 +2776,7 @@ Learning to Mediate Disparities Towards Pragmatic Communication YuweiBao SayanGhosh - JoyceChai + JoyceChai 2829-2842 Human communication is a collaborative process. Speakers, on top of conveying their own intent, adjust the content and language expressions by taking the listeners into account, including their knowledge background, personalities, and physical capabilities. Towards building AI agents with similar abilities in language communication, we propose a novel rational reasoning framework, Pragmatic Rational Speaker (PRS), where the speaker attempts to learn the speaker-listener disparity and adjust the speech accordingly, by adding a light-weighted disparity adjustment layer into working memory on top of speaker’s long-term memory system. By fixing the long-term memory, the PRS only needs to update its working memory to learn and adapt to different types of listeners. To validate our framework, we create a dataset that simulates different types of speaker-listener disparities in the context of referential games. Our empirical results demonstrate that the PRS is able to shift its output towards the language that listeners are able to understand, significantly improve the collaborative task outcome, and learn the disparity more efficiently than joint training. 2022.acl-long.202 @@ -2844,7 +2844,7 @@ <fixed-case>BRIO</fixed-case>: Bringing Order to Abstractive Summarization YixinLiu PengfeiLiu - DragomirRadev + DragomirRadev GrahamNeubig 2890-2903 Abstractive summarization models are commonly trained using maximum likelihood estimation, which assumes a deterministic (one-point) target distribution in which an ideal model will assign all the probability mass to the reference summary. This assumption may lead to performance degradation during inference, where the model needs to compare several system-generated (candidate) summaries that have deviated from the reference summary. To address this problem, we propose a novel training paradigm which assumes a non-deterministic distribution so that different candidate summaries are assigned probability mass according to their quality. Our method achieves a new state-of-the-art result on the CNN/DailyMail (47.78 ROUGE-1) and XSum (49.07 ROUGE-1) datasets. Further analysis also shows that our model can estimate probabilities of candidate summaries that are more correlated with their level of quality. @@ -2873,7 +2873,7 @@ OanaIgnat NanLiu JonathanStroud - RadaMihalcea + RadaMihalcea 2925-2940 We propose fill-in-the-blanks as a video understanding evaluation framework and introduce FIBER – a novel dataset consisting of 28,000 videos and descriptions in support of this evaluation framework. The fill-in-the-blanks setting tests a model’s understanding of a video by requiring it to predict a masked noun phrase in the caption of the video, given the video and the surrounding text. The FIBER benchmark does not share the weaknesses of the current state-of-the-art language-informed video understanding tasks, namely: (1) video question answering using multiple-choice questions, where models perform relatively well because they exploit linguistic biases in the task formulation, thus making our framework challenging for the current state-of-the-art systems to solve; and (2) video captioning, which relies on an open-ended evaluation framework that is often inaccurate because system answers may be perceived as incorrect if they differ in form from the ground truth. The FIBER dataset and our code are available at https://lit.eecs.umich.edu/fiber/. 2022.acl-long.209 @@ -2956,12 +2956,12 @@ Cross-Modal Discrete Representation Learning - AlexanderLiu + AlexanderLiu SouYoungJin Cheng-ILai AndrewRouditchenko AudeOliva - JamesGlass + JamesGlass 3013-3035 In contrast to recent advances focusing on high-level representation learning across modalities, in this work we present a self-supervised learning framework that is able to learn a representation that captures finer levels of granularity across different modalities such as concepts or events represented by visual objects or spoken words. Our framework relies on a discretized embedding space created via vector quantization that is shared across different modalities. Beyond the shared embedding space, we propose a Cross-Modal Code Matching objective that forces the representations from different views (modalities) to have a similar distribution over the discrete embedding space such that cross-modal objects/actions localization can be performed without direct supervision. We show that the proposed discretized multi-modal fine-grained representation (e.g., pixel/word/frame) can complement high-level summary representations (e.g., video/sentence/waveform) for improved performance on cross-modal retrieval tasks. We also observe that the discretized representation uses individual clusters to represent the same semantic concept across modalities. 2022.acl-long.215 @@ -3046,7 +3046,7 @@ VeronicaPerez-Rosas CharlesWelch SoujanyaPoria - RadaMihalcea + RadaMihalcea 3096-3107 In this paper, we study the effect of commonsense and domain knowledge while generating responses in counseling conversations using retrieval and generative methods for knowledge integration. We propose a pipeline that collects domain knowledge through web mining, and show that retrieval from both domain-specific and commonsense knowledge bases improves the quality of generated responses. We also present a model that incorporates knowledge generated by COMET using soft positional encoding and masked self-attention. We show that both retrieved and COMET-generated knowledge improve the system’s performance as measured by automatic metrics and also by human evaluation. Lastly, we present a comparative study on the types of knowledge encoded by our system showing that causal and intentional relationships benefit the generation task more than other types of commonsense relations. 2022.acl-long.221 @@ -3071,8 +3071,8 @@ On Continual Model Refinement in Out-of-Distribution Data Streams - Bill YuchenLin - SidaWang + Bill YuchenLin + SidaWang XiLin RobinJia LinXiao @@ -3149,7 +3149,7 @@ AlexisRoss TongshuangWu HaoPeng - MatthewPeters + MatthewPeters MattGardner 3194-3213 Controlled text perturbation is useful for evaluating and improving model generalizability. However, current techniques rely on training a model for every target perturbation, which is expensive and hard to generalize. We present Tailor, a semantically-controlled text generation system. Tailor builds on a pretrained seq2seq model and produces textual outputs conditioned on control codes derived from semantic representations. We craft a set of operations to modify the control codes, which in turn steer generation towards targeted attributes. These operations can be further composed into higher-level ones, allowing for flexible perturbation strategies. We demonstrate the effectiveness of these perturbations in multiple applications. First, we use Tailor to automatically create high-quality contrast sets for four distinct natural language processing (NLP) tasks. These contrast sets contain fewer spurious artifacts and are complementary to manually annotated ones in their lexical diversity. Second, we show that Tailor perturbations can improve model generalization through data augmentation. Perturbing just ∼2% of training data leads to a 5.8-point gain on an NLI challenge set measuring reliance on syntactic heuristics. @@ -3172,7 +3172,7 @@ Adaptive Testing and Debugging of <fixed-case>NLP</fixed-case> Models - Marco TulioRibeiro + Marco TulioRibeiro ScottLundberg 3253-3267 Current approaches to testing and debugging NLP models rely on highly variable human creativity and extensive labor, or only work for a very restrictive class of bugs. We present AdaTest, a process which uses large scale language models (LMs) in partnership with human feedback to automatically write unit tests highlighting bugs in a target model. Such bugs are then addressed through an iterative text-fix-retest loop, inspired by traditional software development. In experiments with expert and non-expert users and commercial / research models for 8 different tasks, AdaTest makes users 5-10x more effective at finding bugs than current approaches, and helps users effectively fix bugs without adding new bugs. @@ -3258,7 +3258,7 @@ Hallucinated but Factual! Inspecting the Factuality of Hallucinations in Abstractive Summarization MengCao YueDong - JackieCheung + JackieCheung 3340-3354 State-of-the-art abstractive summarization systems often generate hallucinations; i.e., content that is not directly inferable from the source text. Despite being assumed to be incorrect, we find that much hallucinated content is actually consistent with world knowledge, which we call factual hallucinations. Including these factual hallucinations in a summary can be beneficial because they provide useful background information. In this work, we propose a novel detection approach that separates factual from non-factual hallucinations of entities. Our method is based on an entity’s prior and posterior probabilities according to pre-trained and finetuned masked language models, respectively. Empirical results suggest that our method vastly outperforms two baselines in both accuracy and F1 scores and has a strong correlation with human judgments on factuality classification tasks. Furthermore, we use our method as a reward signal to train a summarization system using an off-line reinforcement learning (RL) algorithm that can significantly improve the factuality of generated summaries while maintaining the level of abstractiveness. 2022.acl-long.236 @@ -3270,7 +3270,7 @@ <fixed-case>E</fixed-case>nt<fixed-case>SUM</fixed-case>: A Data Set for Entity-Centric Extractive Summarization MounicaMaddela MayankKulkarni - DanielPreotiuc-Pietro + DanielPreotiuc-Pietro 3355-3366 Controllable summarization aims to provide summaries that take into account user-specified aspects and preferences to better assist them with their information need, as opposed to the standard summarization setup which build a single generic summary of a document. We introduce a human-annotated data set EntSUM for controllable summarization with a focus on named entities as the aspects to control. We conduct an extensive quantitative analysis to motivate the task of entity-centric summarization and show that existing methods for controllable summarization fail to generate entity-centric summaries. We propose extensions to state-of-the-art summarization approaches that achieve substantially better results on our data set. Our analysis and results show the challenging nature of this task and of the proposed data set. 2022.acl-long.237 @@ -3373,7 +3373,7 @@ in the Case of Unambiguous Gender Imputing Out-of-Vocabulary Embeddings with <fixed-case>LOVE</fixed-case> Makes <fixed-case>L</fixed-case>anguage<fixed-case>M</fixed-case>odels Robust with Little Cost LihuChen GaelVaroquaux - FabianSuchanek + FabianSuchanek 3488-3504 State-of-the-art NLP systems represent inputs with word embeddings, but these are brittle when faced with Out-of-Vocabulary (OOV) words. To address this issue, we follow the principle of mimick-like models to generate vectors for unseen words, by learning the behavior of pre-trained embeddings using only the surface form of words. We present a simple contrastive learning framework, LOVE, which extends the word representation of an existing pre-trained language model (such as BERT) and makes it robust to OOV with few additional parameters. Extensive evaluations demonstrate that our lightweight model achieves similar or even better performances than prior competitors, both on original datasets and on corrupted variants. Moreover, it can be used in a plug-and-play fashion with FastText and BERT, where it significantly improves their robustness. 2022.acl-long.245 @@ -3419,7 +3419,7 @@ in the Case of Unambiguous Gender PengjieRen WentaoDeng ZhuminChen - Maartende Rijke + Maartende Rijke 3543-3555 A dialogue response is malevolent if it is grounded in negative emotions, inappropriate behavior, or an unethical value basis in terms of content and dialogue acts. The detection of malevolent dialogue responses is attracting growing interest. Current research on detecting dialogue malevolence has limitations in terms of datasets and methods. First, available dialogue datasets related to malevolence are labeled with a single category, but in practice assigning a single category to each utterance may not be appropriate as some malevolent utterances belong to multiple labels. Second, current methods for detecting dialogue malevolence neglect label correlation. Therefore, we propose the task of multi-label dialogue malevolence detection and crowdsource a multi-label dataset, multi-label dialogue malevolence detection (MDMD) for evaluation. We also propose a multi-label malevolence detection model, multi-faceted label correlation enhanced CRF (MCRF), with two label correlation mechanisms, label correlation in taxonomy (LCT) and label correlation in context (LCC). Experiments on MDMD show that our method outperforms the best performing baseline by a large margin, i.e., 16.1%, 11.9%, 12.0%, and 6.1% on precision, recall, F1, and Jaccard score, respectively. 2022.acl-long.248 @@ -3484,7 +3484,7 @@ in the Case of Unambiguous Gender <fixed-case>C</fixed-case>onditional<fixed-case>QA</fixed-case>: A Complex Reading Comprehension Dataset with Conditional Answers HaitianSun - WilliamCohen + WilliamCohen RuslanSalakhutdinov 3627-3637 We describe a Question Answering (QA) dataset that contains complex questions with conditional answers, i.e. the answers are only applicable when certain conditions apply. We call this dataset ConditionalQA. In addition to conditional answers, the dataset also features:(1) long context documents with information that is related in logically complex ways;(2) multi-hop questions that require compositional logical reasoning;(3) a combination of extractive questions, yes/no questions, questions with multiple answers, and not-answerable questions;(4) questions asked without knowing the answers. We show that ConditionalQA is challenging for many of the existing QA models, especially in selecting answer conditions. We believe that this dataset will motivate further research in answering complex questions over long documents. @@ -3495,8 +3495,8 @@ in the Case of Unambiguous Gender Prompt-free and Efficient Few-shot Learning with Language Models RabeehKarimi Mahabadi - LukeZettlemoyer - JamesHenderson + LukeZettlemoyer + JamesHenderson LambertMathias MarziehSaeidi VeselinStoyanov @@ -3566,7 +3566,7 @@ in the Case of Unambiguous Gender JohnPavlopoulos LeoLaugier AlexandrosXenos - JeffreySorensen + JeffreySorensen IonAndroutsopoulos 3721-3734 We study the task of toxic spans detection, which concerns the detection of the spans that make a text toxic, when detecting such spans is possible. We introduce a dataset for this task, ToxicSpans, which we release publicly. By experimenting with several methods, we show that sequence labeling models perform best, but methods that add generic rationale extraction mechanisms on top of classifiers trained to predict if a post is toxic or not are also surprisingly promising. Finally, we use ToxicSpans and systems trained on it, to provide further analysis of state-of-the-art toxic to non-toxic transfer systems, as well as of human performance on that latter task. Our work highlights challenges in finer toxicity detection and mitigation. @@ -3743,7 +3743,7 @@ in the Case of Unambiguous Gender ZoeyLiu CrystalRichardson RichardHatcher - EmilyPrud’hommeaux + EmilyPrud’hommeaux 3933-3944 Languages are classified as low-resource when they lack the quantity of data necessary for training statistical and machine learning tools and models. Causes of resource scarcity vary but can include poor access to technology for developing these resources, a relatively small population of speakers, or a lack of urgency for collecting such resources in bilingual populations where the second language is high-resource. As a result, the languages described as low-resource in the literature are as different as Finnish on the one hand, with millions of speakers using it in every imaginable domain, and Seneca, with only a small-handful of fluent speakers using the language primarily in a restricted domain. While issues stemming from the lack of resources necessary to train models unite this disparate group of languages, many other issues cut across the divide between widely-spoken low-resource languages and endangered languages. In this position paper, we discuss the unique technological, cultural, practical, and ethical challenges that researchers and indigenous speech community members face when working together to develop language technology to support endangered language documentation and revitalization. We report the perspectives of language teachers, Master Speakers and elders from indigenous communities, as well as the point of view of academics. We describe an ongoing fruitful collaboration and make recommendations for future partnerships between academic researchers and language community stakeholders. 2022.acl-long.272 @@ -3753,7 +3753,7 @@ in the Case of Unambiguous Gender Automatic Identification and Classification of Bragging in Social Media MaliJin - DanielPreotiuc-Pietro + DanielPreotiuc-Pietro A. SezaDoğruöz NikolaosAletras 3945-3959 @@ -3771,7 +3771,7 @@ in the Case of Unambiguous Gender KejianShi JiayuanGu ThomasPorter - ClaireCardie + ClaireCardie 3960-3975 Document-level information extraction (IE) tasks have recently begun to be revisited in earnest using the end-to-end neural network techniques that have been successful on their sentence-level IE counterparts. Evaluation of the approaches, however, has been limited in a number of dimensions. In particular, the precision/recall/F1 scores typically reported provide few insights on the range of errors the models make. We build on the work of Kummerfeld and Klein (2013) to propose a transformation-based framework for automating error analysis in document-level event and (N-ary) relation extraction. We employ our framework to compare two state-of-the-art document-level template-filling approaches on datasets from three domains; and then, to gauge progress in IE since its inception 30 years ago, vs. four systems from the MUC-4 (1992) evaluation. 2022.acl-long.274 @@ -3812,7 +3812,7 @@ in the Case of Unambiguous Gender AhmedMasry MeghThakkar EnamulHoque - ShafiqJoty + ShafiqJoty 4005-4023 Charts are commonly used for exploring data and communicating insights. Generating natural language summaries from charts can be very helpful for people in inferring key insights that would otherwise require a lot of cognitive and perceptual efforts. We present Chart-to-text, a large-scale benchmark with two datasets and a total of 44,096 charts covering a wide range of topics and chart types. We explain the dataset construction process and analyze the datasets. We also introduce a number of state-of-the-art neural models as baselines that utilize image captioning and data-to-text generation techniques to tackle two problem variations: one assumes the underlying data table of the chart is available while the other needs to extract data from chart images. Our analysis with automatic and human evaluation shows that while our best models usually generate fluent summaries and yield reasonable BLEU scores, they also suffer from hallucinations and factual errors as well as difficulties in correctly explaining complex patterns and trends in charts. 2022.acl-long.277 @@ -3822,9 +3822,9 @@ in the Case of Unambiguous Gender Characterizing Idioms: Conventionality and Contingency MichaelaSocolof - JackieCheung + JackieCheung MichaelWagner - TimothyO’Donnell + TimothyO’Donnell 4024-4037 Idioms are unlike most phrases in two important ways. First, words in an idiom have non-canonical meanings. Second, the non-canonical meanings of words in an idiom are contingent on the presence of other words in the idiom. Linguistic theories differ on whether these properties depend on one another, as well as whether special theoretical machinery is needed to accommodate idioms. We define two measures that correspond to the properties above, and we show that idioms fall at the expected intersection of the two dimensions, but that the dimensions themselves are not correlated. Our results suggest that introducing special machinery to handle idioms may not be warranted. 2022.acl-long.278 @@ -3900,7 +3900,7 @@ in the Case of Unambiguous Gender EmilyDinan GavinAbercrombie A.Bergman - ShannonSpruit + ShannonSpruit DirkHovy Y-LanBoureau VerenaRieser @@ -3957,7 +3957,7 @@ in the Case of Unambiguous Gender HeqiZheng XiaoZhang ZewenChi - HeyanHuang + HeyanHuang YanTan TianLan WeiWei @@ -3970,11 +3970,11 @@ in the Case of Unambiguous Gender Improving Compositional Generalization with Self-Training for Data-to-Text Generation - Sanket VaibhavMehta + Sanket VaibhavMehta JinfengRao YiTay MihirKale - AnkurParikh + AnkurParikh EmmaStrubell 4205-4219 Data-to-text generation focuses on generating fluent natural language responses from structured meaning representations (MRs). Such representations are compositional and it is costly to collect responses for all possible combinations of atomic meaning schemata, thereby necessitating few-shot generalization to novel MRs. In this work, we systematically study the compositional generalization of the state-of-the-art T5 models in few-shot data-to-text tasks. We show that T5 models fail to generalize to unseen MRs, and we propose a template-based input representation that considerably improves the model’s generalization capability. To further improve the model’s performance, we propose an approach based on self-training using fine-tuned BLEURT for pseudo-response selection. On the commonly-used SGD and Weather benchmarks, the proposed self-training approach improves tree accuracy by 46\%+ and reduces the slot error rates by 73\%+ over the strong T5 baselines in few-shot settings. @@ -4001,7 +4001,7 @@ in the Case of Unambiguous Gender FeiLi JingyeLi HaoFei - DonghongJi + DonghongJi 4232-4241 The state-of-the-art model for structured sentiment analysis casts the task as a dependency parsing problem, which has some limitations: (1) The label proportions for span prediction and span relation prediction are imbalanced. (2) The span lengths of sentiment tuple components may be very large in this task, which will further exacerbates the imbalance problem. (3) Two nodes in a dependency graph cannot have multiple arcs, therefore some overlapped sentiment tuples cannot be recognized. In this work, we propose nichetargeting solutions for these issues. First, we introduce a novel labeling strategy, which contains two sets of token pair labels, namely essential label set and whole label set. The essential label set consists of the basic labels for this task, which are relatively balanced and applied in the prediction layer. The whole label set includes rich labels to help our model capture various token relations, which are applied in the hidden layer to softly influence our model. Moreover, we also propose an effective model to well collaborate with our labeling strategy, which is equipped with the graph attention networks to iteratively refine token representations, and the adaptive multi-label classifier to dynamically predict multiple relations between token pairs. We perform extensive experiments on 5 benchmark datasets in four languages. Experimental results show that our model outperforms previous SOTA models by a large margin. 2022.acl-long.291 @@ -4066,7 +4066,7 @@ in the Case of Unambiguous Gender OliverEberle StephanieBrandl JonasPilot - AndersSøgaard + AndersSøgaard 4295-4309 Learned self-attention functions in state-of-the-art NLP models often correlate with human attention. We investigate whether self-attention in large-scale pre-trained language models is as predictive of human eye fixation patterns during task-reading as classical cognitive models of human attention. We compare attention functions across two task-specific reading datasets for sentiment analysis and relation extraction. We find the predictiveness of large-scale pre-trained self-attention for human attention depends on ‘what is in the tail’, e.g., the syntactic nature of rare contexts. Further, we observe that task-specific fine-tuning does not increase the correlation with human task-specific reading. Through an input reduction experiment we give complementary insights on the sparsity and fidelity trade-off, showing that lower-entropy attention vectors are more faithful. 2022.acl-long.296 @@ -4126,7 +4126,7 @@ in the Case of Unambiguous Gender Scheduled Multi-task Learning for Neural Chat Translation YunlongLiang FandongMeng - JinanXu + JinanXu YufengChen JieZhou 4375-4388 @@ -4142,7 +4142,7 @@ in the Case of Unambiguous Gender ShengZhang LetiziaTomada SebastianSchwemer - AndersSøgaard + AndersSøgaard 4389-4406 We present a benchmark suite of four datasets for evaluating the fairness of pre-trained language models and the techniques used to fine-tune them for downstream tasks. Our benchmarks cover four jurisdictions (European Council, USA, Switzerland, and China), five languages (English, German, French, Italian and Chinese) and fairness across five attributes (gender, age, region, language, and legal area). In our experiments, we evaluate pre-trained language models using several group-robust fine-tuning techniques and show that performance group disparities are vibrant in many cases, while none of these techniques guarantee fairness, nor consistently mitigate group disparities. Furthermore, we provide a quantitative and qualitative analysis of our results, highlighting open challenges in the development of robustness methods in legal NLP. 2022.acl-long.301 @@ -4262,8 +4262,8 @@ in the Case of Unambiguous Gender <fixed-case>S</fixed-case>umma<fixed-case>R</fixed-case>eranker: A Multi-Task Mixture-of-Experts Re-ranking Framework for Abstractive Summarization MathieuRavaut - ShafiqJoty - NancyChen + ShafiqJoty + NancyChen 4504-4524 Sequence-to-sequence neural networks have recently achieved great success in abstractive summarization, especially through fine-tuning large pre-trained language models on the downstream dataset. These models are typically decoded with beam search to generate a unique summary. However, the search space is very large, and with the exposure bias, such decoding is not optimal. In this paper, we show that it is possible to directly train a second-stage model performing re-ranking on a set of summary candidates. Our mixture-of-experts SummaReranker learns to select a better candidate and consistently improves the performance of the base model. With a base PEGASUS, we push ROUGE scores by 5.44% on CNN- DailyMail (47.16 ROUGE-1), 1.31% on XSum (48.12 ROUGE-1) and 9.34% on Reddit TIFU (29.83 ROUGE-1), reaching a new state-of-the-art. Our code and checkpoints will be available at https://github.com/ntunlp/SummaReranker. 2022.acl-long.309 @@ -4278,7 +4278,7 @@ in the Case of Unambiguous Gender AlexSpangher PegahAlipoormolabashi MarjorieFreedman - RalphWeischedel + RalphWeischedel NanyunPeng 4525-4542 The ability to sequence unordered events is evidence of comprehension and reasoning about real world tasks/procedures. It is essential for applications such as task planning and multi-source instruction summarization. It often requires thorough understanding of temporal common sense and multimodal information, since these procedures are often conveyed by a combination of texts and images. While humans are capable of reasoning about and sequencing unordered procedural instructions, the extent to which the current machine learning methods possess such capability is still an open question. In this work, we benchmark models’ capability of reasoning over and sequencing unordered multimodal instructions by curating datasets from online instructional manuals and collecting comprehensive human annotations. We find current state-of-the-art models not only perform significantly worse than humans but also seem incapable of efficiently utilizing multimodal information. To improve machines’ performance on multimodal event sequencing, we propose sequence-aware pretraining techniques exploiting the sequential alignment properties of both texts and images, resulting in > 5% improvements on perfect match ratio. @@ -4306,7 +4306,7 @@ in the Case of Unambiguous Gender Divide and Rule: Effective Pre-Training for Context-Aware Multi-Encoder Translation Models LorenzoLupo MarcoDinarelli - LaurentBesacier + LaurentBesacier 4557-4572 Multi-encoder models are a broad family of context-aware neural machine translation systems that aim to improve translation quality by encoding document-level contextual information alongside the current sentence. The context encoding is undertaken by contextual parameters, trained on document-level data. In this work, we discuss the difficulty of training these parameters effectively, due to the sparsity of the words in need of context (i.e., the training signal), and their relevant context. We propose to pre-train the contextual parameters over split sentence pairs, which makes an efficient use of the available data for two reasons. Firstly, it increases the contextual training signal by breaking intra-sentential syntactic relations, and thus pushing the model to search the context for disambiguating clues more frequently. Secondly, it eases the retrieval of relevant context, since context segments become shorter. We propose four different splitting methods, and evaluate our approach with BLEU and contrastive test sets. Results show that it consistently improves learning of contextual parameters, both in low and high resource settings. 2022.acl-long.312 @@ -4319,7 +4319,7 @@ in the Case of Unambiguous Gender Saliency as Evidence: Event Detection with Trigger Saliency Attribution JianLiu YufengChen - JinanXu + JinanXu 4573-4585 Event detection (ED) is a critical subtask of event extraction that seeks to identify event triggers of certain types in texts. Despite significant advances in ED, existing methods typically follow a “one model fits all types” approach, which sees no differences between event types and often results in a quite skewed performance. Finding the causes of skewed performance is crucial for the robustness of an ED model, but to date there has been little exploration of this problem. This research examines the issue in depth and presents a new concept termed trigger salience attribution, which can explicitly quantify the underlying patterns of events. On this foundation, we develop a new training mechanism for ED, which can distinguish between trigger-dependent and context-dependent types and achieve promising performance on two benchmarks. Finally, by highlighting many distinct characteristics of trigger-dependent and context-dependent types, our work may promote more research into this problem. 2022.acl-long.313 @@ -4365,7 +4365,7 @@ in the Case of Unambiguous Gender Multilingual Generative Language Models for Zero-Shot Cross-Lingual Event Argument Extraction Kuan-HaoHuang I-HungHsu - PremNatarajan + PremNatarajan Kai-WeiChang NanyunPeng 4633-4646 @@ -4427,7 +4427,7 @@ in the Case of Unambiguous Gender Semi-Supervised Formality Style Transfer with Consistency Training AoLiu AnWang - NaoakiOkazaki + NaoakiOkazaki 4689-4701 Formality style transfer (FST) is a task that involves paraphrasing an informal sentence into a formal one without altering its meaning. To address the data-scarcity problem of existing parallel datasets, previous studies tend to adopt a cycle-reconstruction scheme to utilize additional unlabeled data, where the FST model mainly benefits from target-side unlabeled sentences. In this work, we propose a simple yet effective semi-supervised framework to better utilize source-side unlabeled sentences based on consistency training. Specifically, our approach augments pseudo-parallel data obtained from a source-side informal sentence by enforcing the model to generate similar outputs for its perturbed version. Moreover, we empirically examined the effects of various data perturbation methods and propose effective data filtering strategies to improve our framework. Experimental results on the GYAFC benchmark demonstrate that our approach can achieve state-of-the-art results, even with less than 40% of the parallel data. 2022.acl-long.321 @@ -4512,7 +4512,7 @@ in the Case of Unambiguous Gender <fixed-case>W</fixed-case>iki<fixed-case>D</fixed-case>iverse: A Multimodal Entity Linking Dataset with Diversified Contextual Topics and Entity Types XuwuWang - JunfengTian + JunfengTian MinGui ZhixuLi RuiWang @@ -4730,7 +4730,7 @@ in the Case of Unambiguous Gender Learning From Failure: Data Capture in an <fixed-case>A</fixed-case>ustralian Aboriginal Community EricLe Ferrand StevenBird - LaurentBesacier + LaurentBesacier 4988-4998 Most low resource language technology development is premised on the need to collect data for training statistical models. When we follow the typical process of recording and transcribing text for small Indigenous languages, we hit up against the so-called “transcription bottleneck.” Therefore it is worth exploring new ways of engaging with speakers which generate data while avoiding the transcription bottleneck. We have deployed a prototype app for speakers to use for confirming system guesses in an approach to transcription based on word spotting. However, in the process of testing the app we encountered many new problems for engagement with speakers. This paper presents a close-up study of the process of deploying data capture technology on the ground in an Australian Aboriginal community. We reflect on our interactions with participants and draw lessons that apply to anyone seeking to develop methods for language data collection in an Indigenous community. 2022.acl-long.342 @@ -4753,7 +4753,7 @@ in the Case of Unambiguous Gender DeepanwayGhosal SiqiShen NavonilMajumder - RadaMihalcea + RadaMihalcea SoujanyaPoria 5010-5028 This paper addresses the problem of dialogue reasoning with contextualized commonsense inference. We curate CICERO, a dataset of dyadic conversations with five types of utterance-level reasoning-based inferences: cause, subsequent event, prerequisite, motivation, and emotional reaction. The dataset contains 53,105 of such inferences from 5,672 dialogues. We use this dataset to solve relevant generative and discriminative tasks: generation of cause and subsequent event; generation of prerequisite, motivation, and listener’s emotional reaction; and selection of plausible alternatives. Our results ascertain the value of such dialogue-centric commonsense knowledge datasets. It is our hope that CICERO will open new research avenues into commonsense-based dialogue reasoning. @@ -4775,7 +4775,7 @@ in the Case of Unambiguous Gender <fixed-case>SP</fixed-case>o<fixed-case>T</fixed-case>: Better Frozen Model Adaptation through Soft Prompt Transfer - TuVu + TuVu BrianLester NoahConstant RamiAl-Rfou’ @@ -4837,9 +4837,9 @@ in the Case of Unambiguous Gender The patient is more dead than alive: exploring the current state of the multi-document summarisation of the biomedical literature - YuliaOtmakhova - KarinVerspoor - TimothyBaldwin + YuliaOtmakhova + KarinVerspoor + TimothyBaldwin Jey HanLau 5098-5111 Although multi-document summarisation (MDS) of the biomedical literature is a highly valuable task that has recently attracted substantial interest, evaluation of the quality of biomedical summaries lacks consistency and transparency. In this paper, we examine the summaries generated by two current models in order to understand the deficiencies of existing evaluation approaches in the context of the challenges that arise in the MDS task. Based on this analysis, we propose a new approach to human evaluation and identify several challenges that must be overcome to develop effective biomedical MDS systems. @@ -4889,7 +4889,7 @@ in the Case of Unambiguous Gender JungsooPark SewonMin JaewooKang - LukeZettlemoyer + LukeZettlemoyer HannanehHajishirzi 5154-5166 Despite significant interest in developing general purpose fact checking models, it is challenging to construct a large-scale fact verification dataset with realistic real-world claims. Existing claims are either authored by crowdworkers, thereby introducing subtle biases thatare difficult to control for, or manually verified by professional fact checkers, causing them to be expensive and limited in scale. In this paper, we construct a large-scale challenging fact verification dataset called FAVIQ, consisting of 188k claims derived from an existing corpus of ambiguous information-seeking questions. The ambiguities in the questions enable automatically constructing true and false claims that reflect user confusions (e.g., the year of the movie being filmed vs. being released). Claims in FAVIQ are verified to be natural, contain little lexical bias, and require a complete understanding of the evidence for verification. Our experiments show that the state-of-the-art models are far from solving our new task. Moreover, training on our data helps in professional fact-checking, outperforming models trained on the widely used dataset FEVER or in-domain data by up to 17% absolute. Altogether, our data will serve as a challenging benchmark for natural language understanding and support future progress in professional fact checking. @@ -4940,7 +4940,7 @@ in the Case of Unambiguous Gender Dynamic Prefix-Tuning for Generative Template-based Event Extraction XiaoLiu - HeyanHuang + HeyanHuang GeShi BoWang 5216-5228 @@ -5006,7 +5006,7 @@ in the Case of Unambiguous Gender VishravChaudhary ChauTran PhilippKoehn - FranciscoGuzmán + FranciscoGuzmán 5291-5305 Recent work in multilingual machine translation (MMT) has focused on the potential of positive transfer between languages, particularly cases where higher-resourced languages can benefit lower-resourced ones. While training an MMT model, the supervision signals learned from one language pair can be transferred to the other via the tokens shared by multiple source languages. However, the transfer is inhibited when the token overlap among source languages is small, which manifests naturally when languages use different writing systems. In this paper, we tackle inhibited transfer by augmenting the training data with alternative signals that unify different writing systems, such as phonetic, romanized, and transliterated input. We test these signals on Indic and Turkic languages, two language families where the writing systems differ but languages still share common features. Our results indicate that a straightforward multi-source self-ensemble – training a model on a mixture of various signals and ensembling the outputs of the same model fed with different signals during inference, outperforms strong ensemble baselines by 1.3 BLEU points on both language families. Further, we find that incorporating alternative inputs via self-ensemble can be particularly effective when training set is small, leading to +5 BLEU when only 5% of the total training data is accessible. Finally, our analysis demonstrates that including alternative signals yields more consistency and translates named entities more accurately, which is crucial for increased factuality of automated systems. 2022.acl-long.363 @@ -5029,7 +5029,7 @@ in the Case of Unambiguous Gender SewonMin MikeLewis HannanehHajishirzi - LukeZettlemoyer + LukeZettlemoyer 5316-5330 We introduce a noisy channel approach for language model prompting in few-shot text classification. Instead of computing the likelihood of the label given the input (referred as direct models), channel models compute the conditional probability of the input given the label, and are thereby required to explain every word in the input. We use channel models for recently proposed few-shot learning methods with no or very limited updates to the language model parameters, via either in-context demonstration or prompt tuning. Our experiments show that, for both methods, channel models significantly outperform their direct counterparts, which we attribute to their stability, i.e., lower variance and higher worst-case accuracy. We also present extensive ablations that provide recommendations for when to use channel prompt tuning instead of other competitive models (e.g., direct head tuning): channel prompt tuning is preferred when the number of training examples is small, labels in the training data are imbalanced, or generalization to unseen labels is required. 2022.acl-long.365 @@ -5138,7 +5138,7 @@ in the Case of Unambiguous Gender Multi Task Learning For Zero Shot Performance Prediction of Multilingual Models KabirAhuja ShanuKumar - SandipanDandapat + SandipanDandapat MonojitChoudhury 5454-5467 Massively Multilingual Transformer based Language Models have been observed to be surprisingly effective on zero-shot transfer across languages, though the performance varies from language to language depending on the pivot language(s) used for fine-tuning. In this work, we build upon some of the existing techniques for predicting the zero-shot performance on a task, by modeling it as a multi-task learning problem. We jointly train predictive models for different tasks which helps us build more accurate predictors for tasks where we have test data in very few languages to measure the actual performance of the model. Our approach also lends us the ability to perform a much more robust feature selection, and identify a common set of features that influence zero-shot performance across a variety of tasks. @@ -5151,7 +5151,7 @@ in the Case of Unambiguous Gender <tex-math>\infty</tex-math>-former: Infinite Memory Transformer Pedro HenriqueMartins ZitaMarinho - AndreMartins + AndreMartins 5468-5485 Transformers are unable to model long-term memories effectively, since the amount of computation they need to perform grows with the context length. While variations of efficient transformers have been proposed, they all have a finite memory capacity and are forced to drop old information. In this paper, we propose the \infty-former, which extends the vanilla transformer with an unbounded long-term memory. By making use of a continuous-space attention mechanism to attend over the long-term memory, the \infty-former’s attention complexity becomes independent of the context length, trading off memory length with precision.In order to control where precision is more important, \infty-former maintains “sticky memories,” being able to model arbitrarily long contexts while keeping the computation budget fixed.Experiments on a synthetic sorting task, language modeling, and document grounded dialogue generation demonstrate the \infty-former’s ability to retain information from long sequences. 2022.acl-long.375 @@ -5177,7 +5177,7 @@ in the Case of Unambiguous Gender LeonieWeissweiler ValentinHofmann MasoudJalili Sabet - HinrichSchuetze + HinrichSchuetze 5506-5516 We introduce CaMEL (Case Marker Extraction without Labels), a novel and challenging task in computational morphology that is especially relevant for low-resource languages. We propose a first model for CaMEL that uses a massively multilingual corpus to extract case markers in 83 languages based only on a noun phrase chunker and an alignment system. To evaluate CaMEL, we automatically construct a silver standard from UniMorph. The case markers extracted by our model can be used to detect and visualise similarities and differences between the case systems of different languages as well as to annotate fine-grained deep cases in languages in which they are not overtly marked. 2022.acl-long.377 @@ -5189,7 +5189,7 @@ in the Case of Unambiguous Gender Improving Generalizability in Implicitly Abusive Language Detection with Concept Activation Vectors IsarNejadgholi - KathleenFraser + KathleenFraser SvetlanaKiritchenko 5517-5529 Robustness of machine learning models on ever-changing real-world data is critical, especially for applications affecting human well-being such as content moderation. New kinds of abusive language continually emerge in online discussions in response to current events (e.g., COVID-19), and the deployed abuse detection systems should be updated regularly to remain accurate. In this paper, we show that general abusive language classifiers tend to be fairly reliable in detecting out-of-domain explicitly abusive utterances but fail to detect new types of more subtle, implicit abuse. Next, we propose an interpretability technique, based on the Testing Concept Activation Vector (TCAV) method from computer vision, to quantify the sensitivity of a trained model to the human-defined concepts of explicit and implicit abusive language, and use that to explain the generalizability of the model on new data, in this case, COVID-related anti-Asian hate speech. Extending this technique, we introduce a novel metric, Degree of Explicitness, for a single instance and show that the new metric is beneficial in suggesting out-of-domain unlabeled examples to effectively enrich the training data with informative, implicitly abusive texts. @@ -5214,7 +5214,7 @@ in the Case of Unambiguous Gender Non-neural Models Matter: a Re-evaluation of Neural Referring Expression Generation Systems FahimeSame GuanyiChen - KeesVan Deemter + KeesVan Deemter 5554-5567 In recent years, neural models have often outperformed rule-based and classic Machine Learning approaches in NLG. These classic approaches are now often disregarded, for example when new neural models are evaluated. We argue that they should not be overlooked, since, for some tasks, well-designed non-neural approaches achieve better performance than neural ones. In this paper, the task of generating referring expressions in linguistic context is used as an example. We examined two very different English datasets (WEBNLG and WSJ), and evaluated each algorithm using both automatic and human evaluations. Overall, the results of these evaluations suggest that rule-based systems with simple rule sets achieve on-par or better performance on both datasets compared to state-of-the-art neural REG systems. In the case of the more realistic dataset, WSJ, a machine learning-based system with well-designed linguistic features performed best. We hope that our work can encourage researchers to consider non-neural models in future. 2022.acl-long.380 @@ -5238,7 +5238,7 @@ in the Case of Unambiguous Gender Predicate-Argument Based Bi-Encoder for Paraphrase Identification QiweiPeng - DavidWeir + DavidWeir JulieWeeds YekunChai 5579-5589 @@ -5258,7 +5258,7 @@ in the Case of Unambiguous Gender TaoGui LiangQiao ZhanzhanCheng - XuanjingHuang + XuanjingHuang 5590-5600 NER model has achieved promising performance on standard NER benchmarks. However, recent studies show that previous approaches may over-rely on entity mention information, resulting in poor performance on out-of-vocabulary(OOV) entity recognition. In this work, we propose MINER, a novel NER learning framework, to remedy this issue from an information-theoretic perspective. The proposed approach contains two mutual information based training objectives: i) generalizing information maximization, which enhances representation via deep understanding of context and entity surface forms; ii) superfluous information minimization, which discourages representation from rotate memorizing entity names or exploiting biased cues in data. Experiments on various settings and datasets demonstrate that it achieves better performance in predicting OOV entities. 2022.acl-long.383 @@ -5298,7 +5298,7 @@ in the Case of Unambiguous Gender LiangQiao TaoGui QiZhang - XuanjingHuang + XuanjingHuang 5634-5644 Adversarial robustness has attracted much attention recently, and the mainstream solution is adversarial training. However, the tradition of generating adversarial perturbations for each input embedding (in the settings of NLP) scales up the training computational complexity by the number of gradient steps it takes to obtain the adversarial samples. To address this problem, we leverage Flooding method which primarily aims at better generalization and we find promising in defending adversarial attacks. We further propose an effective criterion to bring hyper-parameter-dependent flooding into effect with a narrowed-down search space by measuring how the gradient steps taken within one epoch affect the loss of each batch. Our approach requires zero adversarial sample for training, and its time consumption is equivalent to fine-tuning, which can be 2-15 times faster than standard adversarial training. We experimentally show that our method improves BERT’s resistance to textual adversarial attacks by a large margin, and achieves state-of-the-art robust accuracy on various text classification and GLUE tasks. 2022.acl-long.386 @@ -5323,7 +5323,7 @@ in the Case of Unambiguous Gender Finding Structural Knowledge in Multimodal-<fixed-case>BERT</fixed-case> VictorMilewski Miryamde Lhoneux - Marie-FrancineMoens + Marie-FrancineMoens 5658-5671 In this work, we investigate the knowledge learned in the embeddings of multimodal-BERT models. More specifically, we probe their capabilities of storing the grammatical structure of linguistic data and the structure learned over objects in visual data. To reach that goal, we first make the inherent structure of language and visuals explicit by a dependency parse of the sentences that describe the image and by the dependencies between the object regions in the image, respectively. We call this explicit visual structure the scene tree, that is based on the dependency tree of the language description. Extensive probing experiments show that the multimodal-BERT models do not encode these scene trees. 2022.acl-long.388 @@ -5416,7 +5416,7 @@ in the Case of Unambiguous Gender DamirJuric JackFlann EhudReiter - AnyaBelz + AnyaBelz AleksandarSavkov 5739-5754 In recent years, machine learning models have rapidly become better at generating clinical consultation notes; yet, there is little work on how to properly evaluate the generated consultation notes to understand the impact they may have on both the clinician using them and the patient’s clinical safety. To address this we present an extensive human evaluation study of consultation notes where 5 clinicians (i) listen to 57 mock consultations, (ii) write their own notes, (iii) post-edit a number of automatically generated notes, and (iv) extract all the errors, both quantitative and qualitative. We then carry out a correlation study with 18 automatic quality metrics and the human judgements. We find that a simple, character-based Levenshtein distance metric performs on par if not better than common model-based metrics like BertScore. All our findings and annotations are open-sourced. @@ -5487,7 +5487,7 @@ in the Case of Unambiguous Gender Evaluating Extreme Hierarchical Multi-label Classification - EnriqueAmigo + EnriqueAmigo AgustínDelgado 5809-5819 Several natural language processing (NLP) tasks are defined as a classification problem in its most complex form: Multi-label Hierarchical Extreme classification, in which items may be associated with multiple classes from a set of thousands of possible classes organized in a hierarchy and with a highly unbalanced distribution both in terms of class frequency and the number of labels per item. We analyze the state of the art of evaluation metrics based on a set of formal properties and we define an information theoretic based metric inspired by the Information Contrast Model (ICM). Experiments on synthetic data and a case study on real data show the suitability of the ICM for such scenarios. @@ -5570,14 +5570,14 @@ in the Case of Unambiguous Gender An Effective and Efficient Entity Alignment Decoding Algorithm via Third-Order Tensor Isomorphism - XinMao + XinMao MeirongMa HaoYuan JianchaoZhu ZongYuWang RuiXie WeiWu - ManLan + ManLan 5888-5898 Entity alignment (EA) aims to discover the equivalent entity pairs between KGs, which is a crucial step for integrating multi-source KGs.For a long time, most researchers have regarded EA as a pure graph representation learning task and focused on improving graph encoders while paying little attention to the decoding process. In this paper, we propose an effective and efficient EA Decoding Algorithm via Third-order Tensor Isomorphism (DATTI).Specifically, we derive two sets of isomorphism equations: (1) Adjacency tensor isomorphism equations and (2) Gramian tensor isomorphism equations. By combining these equations, DATTI could effectively utilize the adjacency and inner correlation isomorphisms of KGs to enhance the decoding process of EA.Extensive experiments on public datasets indicate that our decoding algorithm can deliver significant performance improvements even on the most advanced EA methods, while the extra required time is less than 3 seconds. 2022.acl-long.405 @@ -5614,7 +5614,7 @@ in the Case of Unambiguous Gender Continual Pre-training of Language Models for Math Problem Understanding with Syntax-Aware Memory Network ZhengGong KunZhou - XinZhao + XinZhao JingSha ShijinWang Ji-RongWen @@ -5749,7 +5749,7 @@ in the Case of Unambiguous Gender Rethinking Self-Supervision Objectives for Generalizable Coherence Modeling PrathyushaJwalapuram - ShafiqJoty + ShafiqJoty XiangLin 6044-6059 Given the claims of improved text generation quality across various pre-trained neural models, we consider the coherence evaluation of machine generated text to be one of the principal applications of coherence models that needs to be investigated. Prior work in neural coherence modeling has primarily focused on devising new architectures for solving the permuted document task. We instead use a basic model architecture and show significant improvements over state of the art within the same training regime. We then design a harder self-supervision objective by increasing the ratio of negative samples within a contrastive learning setup, and enhance the model further through automatic hard negative mining coupled with a large global negative queue encoded by a momentum encoder. We show empirically that increasing the density of negative samples improves the basic model, and using a global negative queue further improves and stabilizes the model while training with hard negative samples. We evaluate the coherence model on task-independent test sets that resemble real-world applications and show significant improvements in coherence evaluations of downstream tasks. @@ -5787,7 +5787,7 @@ in the Case of Unambiguous Gender <fixed-case>CLIP</fixed-case> Models are Few-Shot Learners: Empirical Studies on <fixed-case>VQA</fixed-case> and Visual Entailment HaoyuSong LiDong - WeinanZhang + WeinanZhang TingLiu FuruWei 6088-6100 @@ -5819,7 +5819,7 @@ in the Case of Unambiguous Gender Debiased Contrastive Learning of Unsupervised Sentence Representations KunZhou BeichenZhang - XinZhao + XinZhao Ji-RongWen 6120-6130 Recently, contrastive learning has been shown to be effective in improving pre-trained language models (PLM) to derive high-quality sentence representations. It aims to pull close positive examples to enhance the alignment while push apart irrelevant negatives for the uniformity of the whole representation space. However, previous works mostly adopt in-batch negatives or sample from training data at random. Such a way may cause the sampling bias that improper negatives (false negatives and anisotropy representations) are used to learn sentence representations, which will hurt the uniformity of the representation space. To address it, we present a new framework DCLR (Debiased Contrastive Learning of unsupervised sentence Representations) to alleviate the influence of these improper negatives.In DCLR, we design an instance weighting method to punish false negatives and generate noise-based negatives to guarantee the uniformity of the representation space.Experiments on seven semantic textual similarity tasks show that our approach is more effective than competitive baselines. Our code and data are publicly available at the link: bluehttps://github.com/RUCAIBox/DCLR. @@ -5874,7 +5874,7 @@ in the Case of Unambiguous Gender PayalBajaj XiaSong Xian-LingMao - HeyanHuang + HeyanHuang FuruWei 6170-6182 In this paper, we introduce ELECTRA-style tasks to cross-lingual language model pre-training. Specifically, we present two pre-training tasks, namely multilingual replaced token detection, and translation replaced token detection. Besides, we pretrain the model, named as XLM-E, on both multilingual and parallel corpora. Our model outperforms the baseline models on various cross-lingual understanding tasks with much less computation cost. Moreover, analysis shows that XLM-E tends to obtain better cross-lingual transferability. @@ -5975,21 +5975,21 @@ in the Case of Unambiguous Gender <fixed-case>A</fixed-case>mericas<fixed-case>NLI</fixed-case>: Evaluating Zero-shot Natural Language Understanding of Pretrained Multilingual Models in Truly Low-resource Languages AbteenEbrahimi ManuelMager - ArturoOncevay + ArturoOncevay VishravChaudhary LuisChiruzzo AngelaFan JohnOrtega RicardoRamos - AnnetteRios - Ivan VladimirMeza Ruiz + AnnetteRios + Ivan VladimirMeza Ruiz GustavoGiménez-Lugo - ElisabethMager + ElisabethMager GrahamNeubig AlexisPalmer RolandoCoto-Solano ThangVu - KatharinaKann + KatharinaKann 6279-6299 Pretrained multilingual models are able to perform cross-lingual transfer in a zero-shot setting, even for languages unseen during pretraining. However, prior work evaluating performance on unseen languages has largely been limited to low-level, syntactic tasks, and it remains unclear if zero-shot learning of high-level, semantic tasks is possible for unseen languages. To explore this question, we present AmericasNLI, an extension of XNLI (Conneau et al., 2018) to 10 Indigenous languages of the Americas. We conduct experiments with XLM-R, testing multiple zero-shot and translation-based approaches. Additionally, we explore model adaptation via continued pretraining and provide an analysis of the dataset by considering hypothesis-only models. We find that XLM-R’s zero-shot performance is poor for all 10 languages, with an average performance of 38.48%. Continued pretraining offers improvements, with an average accuracy of 43.85%. Surprisingly, training on poorly translated data by far outperforms all other methods with an accuracy of 49.12%. 2022.acl-long.435 @@ -6041,7 +6041,7 @@ in the Case of Unambiguous Gender <fixed-case>CONT</fixed-case>ai<fixed-case>NER</fixed-case>: Few-Shot Named Entity Recognition via Contrastive Learning Sarkar Snigdha SarathiDas ArzooKatiyar - RebeccaPassonneau + RebeccaPassonneau RuiZhang 6338-6353 Named Entity Recognition (NER) in Few-Shot setting is imperative for entity tagging in low resource domains. Existing approaches only learn class-specific semantic features and intermediate representations from source domains. This affects generalizability to unseen target domains, resulting in suboptimal performances. To this end, we present CONTaiNER, a novel contrastive learning technique that optimizes the inter-token distribution distance for Few-Shot NER. Instead of optimizing class-specific attributes, CONTaiNER optimizes a generalized objective of differentiating between token categories based on their Gaussian-distributed embeddings. This effectively alleviates overfitting issues originating from training domains. Our experiments in several traditional test domains (OntoNotes, CoNLL’03, WNUT ‘17, GUM) and a new large scale Few-Shot NER dataset (Few-NERD) demonstrate that on average, CONTaiNER outperforms previous methods by 3%-13% absolute F1 points while showing consistent performance trends, even in challenging scenarios where previous approaches could not achieve appreciable performance. @@ -6071,7 +6071,7 @@ in the Case of Unambiguous Gender VincentChen Kuan-ChiehLo ChachaChen - Ting-HaoHuang + Ting-HaoHuang Lun-WeiKu 6365-6378 Visual storytelling (VIST) is a typical vision and language task that has seen extensive development in the natural language generation research domain. However, it remains unclear whether conventional automatic evaluation metrics for text generation are applicable on VIST. In this paper, we present the VHED (VIST Human Evaluation Data) dataset, which first re-purposes human evaluation results for automatic evaluation; hence we develop Vrank (VIST Ranker), a novel reference-free VIST metric for story evaluation. We first show that the results from commonly adopted automatic metrics for text generation have little correlation with those obtained from human evaluation, which motivates us to directly utilize human evaluation results to learn the automatic evaluation model. In the experiments, we evaluate the generated texts to predict story ranks using our model as well as other reference-based and reference-free metrics. Results show that Vrank prediction is significantly more aligned to human evaluation than other metrics with almost 30% higher accuracy when ranking story pairs. Moreover, we demonstrate that only Vrank shows human-like behavior in its strong ability to find better stories when the quality gap between two stories is high. Finally, we show the superiority of Vrank by its generalizability to pure textual stories, and conclude that this reuse of human evaluation results puts Vrank in a strong position for continued future advances. @@ -6160,7 +6160,7 @@ in the Case of Unambiguous Gender Compositional Generalization in Dependency Parsing EmilyGoodwin SivaReddy - TimothyO’Donnell + TimothyO’Donnell DzmitryBahdanau 6482-6493 Compositionality— the ability to combine familiar units like words into novel phrases and sentences— has been the focus of intense interest in artificial intelligence in recent years. To test compositional generalization in semantic parsing, Keysers et al. (2020) introduced Compositional Freebase Queries (CFQ). This dataset maximizes the similarity between the test and train distributions over primitive units, like words, while maximizing the compound divergence: the dissimilarity between test and train distributions over larger structures, like phrases. Dependency parsing, however, lacks a compositional generalization benchmark. In this work, we introduce a gold-standard set of dependency parses for CFQ, and use this to analyze the behaviour of a state-of-the art dependency parser (Qi et al., 2020) on the CFQ dataset. We find that increasing compound divergence degrades dependency parsing performance, although not as dramatically as semantic parsing performance. Additionally, we find the performance of the dependency parser does not uniformly degrade relative to compound divergence, and the parser performs differently on different splits with the same compound divergence. We explore a number of hypotheses for what causes the non-uniform degradation in dependency parsing performance, and identify a number of syntactic structures that drive the dependency parser’s lower performance on the most challenging splits. @@ -6210,7 +6210,7 @@ in the Case of Unambiguous Gender Substructure Distribution Projection for Zero-Shot Cross-Lingual Dependency Parsing - FredaShi + FredaShi KevinGimpel KarenLivescu 6547-6563 @@ -6335,7 +6335,7 @@ in the Case of Unambiguous Gender Reinforcement Guided Multi-Task Learning Framework for Low-Resource Stereotype Detection - RajkumarPujari + RajkumarPujari ErikOveson PriyankaKulkarni ElnazNouri @@ -6365,7 +6365,7 @@ in the Case of Unambiguous Gender YiMao ZhifangSui WeizhuChen - BillDolan + BillDolan 6723-6737 Large pretrained generative models like GPT-3 often suffer from hallucinating non-existent or incorrect content, which undermines their potential merits in real applications. Existing work usually attempts to detect these hallucinations based on a corresponding oracle reference at a sentence or document level. However ground-truth references may not be readily available for many free-form text generation applications, and sentence- or document-level detection may fail to provide the fine-grained signals that would prevent fallacious content in real time. As a first step to addressing these issues, we propose a novel token-level, reference-free hallucination detection task and an associated annotated dataset named HaDeS (HAllucination DEtection dataSet). To create this dataset, we first perturb a large number of text segments extracted from English language Wikipedia, and then verify these with crowd-sourced annotations. To mitigate label imbalance during annotation, we utilize an iterative model-in-loop strategy. We conduct comprehensive data analyses and create multiple baseline models. 2022.acl-long.464 @@ -6498,7 +6498,7 @@ in the Case of Unambiguous Gender Length Control in Abstractive Summarization by Pretraining Information Selection YizhuLiu QiJia - KennyZhu + KennyZhu 6885-6895 Previous length-controllable summarization models mostly control lengths at the decoding stage, whereas the encoding or the selection of information from the source document is not sensitive to the designed length. They also tend to generate summaries as long as those in the training data. In this paper, we propose a length-aware attention mechanism (LAAM) to adapt the encoding of the source based on the desired length. Our approach works by training LAAM on a summary length balanced dataset built from the original training data, and then fine-tuning as usual. Results show that this approach is effective in generating high-quality summaries with desired lengths and even those short lengths never seen in the original training set. 2022.acl-long.474 @@ -6514,7 +6514,7 @@ in the Case of Unambiguous Gender DiLiang SiruiWang WeiWu - XuanjingHuang + XuanjingHuang 6896-6906 Multi-hop question generation focuses on generating complex questions that require reasoning over multiple pieces of information of the input passage. Current models with state-of-the-art performance have been able to generate the correct questions corresponding to the answers. However, most models can not ensure the complexity of generated questions, so they may generate shallow questions that can be answered without multi-hop reasoning. To address this challenge, we propose the CQG, which is a simple and effective controlled framework. CQG employs a simple method to generate the multi-hop questions that contain key entities in multi-hop reasoning chains, which ensure the complexity and quality of the questions. In addition, we introduce a novel controlled Transformer-based decoder to guarantee that key entities appear in the questions. Experiment results show that our model greatly improves performance, which also outperforms the state-of-the-art model about 25% by 5 BLEU points on HotpotQA. 2022.acl-long.475 @@ -6526,7 +6526,7 @@ in the Case of Unambiguous Gender MostafaAbdou VinitRavishankar ArturKulmizev - AndersSøgaard + AndersSøgaard 6907-6919 Recent studies have shown that language models pretrained and/or fine-tuned on randomly permuted sentences exhibit competitive performance on GLUE, putting into question the importance of word order information. Somewhat counter-intuitively, some of these studies also report that position embeddings appear to be crucial for models’ good performance with shuffled text. We probe these language models for word order information and investigate what position embeddings learned from shuffled text encode, showing that these models retain a notion of word order information. We show this is in part due to a subtlety in how shuffling is implemented in previous work – before rather than after subword segmentation. Surprisingly, we find even Language models trained on text shuffled after subword segmentation retain some semblance of information about word order because of the statistical dependencies between sentence length and unigram probabilities. Finally, we show that beyond GLUE, a variety of language understanding tasks do require word order information, often to an extent that cannot be learned through fine-tuning. 2022.acl-long.476 @@ -6569,7 +6569,7 @@ in the Case of Unambiguous Gender SakuSugawara NikitaNangia AlexWarstadt - SamuelBowman + SamuelBowman 6951-6971 For a natural language understanding benchmark to be useful in research, it has to consist of examples that are diverse and difficult enough to discriminate among current and near-future state-of-the-art systems. However, we do not yet know how best to select text sources to collect a variety of challenging examples. In this study, we crowdsource multiple-choice reading comprehension questions for passages taken from seven qualitatively distinct sources, analyzing what attributes of passages contribute to the difficulty and question types of the collected examples. To our surprise, we find that passage source, length, and readability measures do not significantly affect question difficulty. Through our manual annotation of seven reasoning types, we observe several trends between passage sources and reasoning types, e.g., logical reasoning is more often required in questions written for technical passages. These results suggest that when creating a new benchmark dataset, selecting a diverse set of passages can help ensure a diverse range of question types, but that passage difficulty need not be a priority. 2022.acl-long.479 @@ -6580,7 +6580,7 @@ in the Case of Unambiguous Gender From Simultaneous to Streaming Machine Translation by Leveraging Streaming History JavierIranzo-Sánchez JorgeCivera - AlfonsJuan + AlfonsJuan 6972-6985 Simultaneous Machine Translation is the task of incrementally translating an input sentence before it is fully available. Currently, simultaneous translation is carried out by translating each sentence independently of the previously translated text. More generally, Streaming MT can be understood as an extension of Simultaneous MT to the incremental translation of a continuous input text stream. In this work, a state-of-the-art simultaneous sentence-level MT system is extended to the streaming setup by leveraging the streaming history. Extensive empirical results are reported on IWSLT Translation Tasks, showing that leveraging the streaming history leads to significant quality gains. In particular, the proposed system proves to compare favorably to the best performing systems. 2022.acl-long.480 @@ -6617,7 +6617,7 @@ in the Case of Unambiguous Gender ConstanzaFierro KaterinaMargatina PhillipRust - AndersSøgaard + AndersSøgaard 6997-7013 Various efforts in the Natural Language Processing (NLP) community have been made to accommodate linguistic diversity and serve speakers of many different languages. However, it is important to acknowledge that speakers and the content they produce and require, vary not just by language, but also by culture. Although language and culture are tightly linked, there are important differences. Analogous to cross-lingual and multilingual NLP, cross-cultural and multicultural NLP considers these differences in order to better serve users of NLP systems. We propose a principled framework to frame these efforts, and survey existing and potential strategies. 2022.acl-long.482 @@ -6707,7 +6707,7 @@ in the Case of Unambiguous Gender ShumingMa BoZheng ZhifangSui - BaobaoChang + BaobaoChang FuruWei 7085-7095 The Mixture-of-Experts (MoE) technique can scale up the model size of Transformers with an affordable computational overhead. We point out that existing learning-to-route MoE methods suffer from the routing fluctuation issue, i.e., the target expert of the same input may change along with training, but only one expert will be activated for the input during inference. The routing fluctuation tends to harm sample efficiency because the same input updates different experts but only one is finally used. In this paper, we propose StableMoE with two training stages to address the routing fluctuation problem. In the first training stage, we learn a balanced and cohesive routing strategy and distill it into a lightweight router decoupled from the backbone model. In the second training stage, we utilize the distilled router to determine the token-to-expert assignment and freeze it for a stable routing strategy. We validate our method on language modeling and multilingual machine translation. The results show that StableMoE outperforms existing MoE methods in terms of both convergence speed and performance. @@ -6810,7 +6810,7 @@ in the Case of Unambiguous Gender MasahiroKaneko ShoTakase AyanaNiwa - NaoakiOkazaki + NaoakiOkazaki 7176-7187 Grammatical Error Correction (GEC) should not focus only on high accuracy of corrections but also on interpretability for language learning. However, existing neural-based GEC models mainly aim at improving accuracy, and their interpretability has not been explored.A promising approach for improving interpretability is an example-based method, which uses similar retrieved examples to generate corrections. In addition, examples are beneficial in language learning, helping learners understand the basis of grammatically incorrect/correct texts and improve their confidence in writing. Therefore, we hypothesize that incorporating an example-based method into GEC can improve interpretability as well as support language learners. In this study, we introduce an Example-Based GEC (EB-GEC) that presents examples to language learners as a basis for a correction result. The examples consist of pairs of correct and incorrect sentences similar to a given input and its predicted correction. Experiments demonstrate that the examples presented by EB-GEC help language learners decide to accept or refuse suggestions from the GEC output. Furthermore, the experiments also show that retrieved examples improve the accuracy of corrections. 2022.acl-long.496 @@ -6860,7 +6860,7 @@ in the Case of Unambiguous Gender One Country, 700+ Languages: <fixed-case>NLP</fixed-case> Challenges for Underrepresented Languages and Dialects in <fixed-case>I</fixed-case>ndonesia Alham FikriAji - Genta IndraWinata + Genta IndraWinata FajriKoto SamuelCahyawijaya AdeRomadhony @@ -6868,7 +6868,7 @@ in the Case of Unambiguous Gender KemalKurniawan DavidMoeljadi Radityo EkoPrasojo - TimothyBaldwin + TimothyBaldwin Jey HanLau SebastianRuder 7226-7249 @@ -6882,8 +6882,8 @@ in the Case of Unambiguous Gender Is <fixed-case>GPT</fixed-case>-3 Text Indistinguishable from Human Text? Scarecrow: A Framework for Scrutinizing Machine Text YaoDou MaxwellForbes - RikKoncel-Kedziorski - Noah A.Smith + RikKoncel-Kedziorski + Noah A.Smith YejinChoi 7250-7274 Modern neural language models can produce remarkably fluent and grammatical text. So much, in fact, that recent work by Clark et al. (2021) has reported that conventional crowdsourcing can no longer reliably distinguish between machine-authored (GPT-3) and human-authored writing. As errors in machine generations become ever subtler and harder to spot, it poses a new challenge to the research community for robust machine text evaluation. We propose a new framework called Scarecrow for scrutinizing machine text via crowd annotation. To support the broad range of real machine errors that can be identified by laypeople, the ten error categories of Scarecrow—such as redundancy, commonsense errors, and incoherence—are identified through several rounds of crowd annotation experiments without a predefined ontology. We then use Scarecrow to collect over 41k error spans in human-written and machine-generated paragraphs of English language news text. We isolate factors for detailed analysis, including parameter count, training data, and various decoding-time configurations. Our approach successfully quantifies measurable gaps between human authored text and generations from models of several sizes, including fourteen configurations of GPT-3. In addition, our analysis unveils new insights, with detailed rationales provided by laypeople, e.g., that the commonsense capabilities have been improving with larger models while math capabilities have not, and that the choices of simple decoding hyperparameters can make remarkable differences on the perceived quality of machine text. We release our training material, annotation toolkit and dataset at https://yao-dou.github.io/scarecrow/. @@ -6950,7 +6950,7 @@ in the Case of Unambiguous Gender Outstanding Paper AshwinDevaraj WilliamSheffield - ByronWallace + ByronWallace Junyi JessyLi 7331-7345 Automated simplification models aim to make input texts more readable. Such methods have the potential to make complex information accessible to a wider audience, e.g., providing access to recent medical literature which might otherwise be impenetrable for a lay reader. However, such models risk introducing errors into automatically simplified texts, for instance by inserting statements unsupported by the corresponding original text, or by omitting key information. Providing more readable but inaccurate versions of texts may in many cases be worse than providing no such access at all. The problem of factual accuracy (and the lack thereof) has received heightened attention in the context of summarization models, but the factuality of automatically simplified texts has not been investigated. We introduce a taxonomy of errors that we use to analyze both references drawn from standard simplification datasets and state-of-the-art model outputs. We find that errors often appear in both that are not captured by existing evaluation metrics, motivating a need for research into ensuring the factual accuracy of automated simplification models. @@ -7002,7 +7002,7 @@ in the Case of Unambiguous Gender Weakly Supervised Word Segmentation for Computational Language Documentation ShuOkabe - LaurentBesacier + LaurentBesacier FrançoisYvon 7385-7398 Word and morpheme segmentation are fundamental steps of language documentation as they allow to discover lexical units in a language for which the lexicon is unknown. However, in most language documentation scenarios, linguists do not start from a blank page: they may already have a pre-existing dictionary or have initiated manual segmentation of a small part of their data. This paper studies how such a weak supervision can be taken advantage of in Bayesian non-parametric models of segmentation. Our experiments on two very low resource languages (Mboshi and Japhug), whose documentation is still in progress, show that weak supervision can be beneficial to the segmentation quality. In addition, we investigate an incremental learning scenario where manual segmentations are provided in a sequential manner. This work opens the way for interactive annotation tools for documentary linguists. @@ -7054,7 +7054,7 @@ in the Case of Unambiguous Gender DeepakNathani XavierGarcia BidishaSamanta - ParthaTalukdar + ParthaTalukdar 7439-7468 Style transfer is the task of rewriting a sentence into a target style while approximately preserving content. While most prior literature assumes access to a large style-labelled corpus, recent work (Riley et al. 2021) has attempted “few-shot” style transfer using only 3-10 sentences at inference for style extraction. In this work we study a relevant low-resource setting: style transfer for languages where no style-labelled corpora are available. We notice that existing few-shot methods perform this task poorly, often copying inputs verbatim. We push the state-of-the-art for few-shot style transfer with a new method modeling the stylistic difference between paraphrases. When compared to prior work, our model achieves 2-3x better performance in formality transfer and code-mixing addition across seven languages. Moreover, our method is better at controlling the style transfer magnitude using an input scalar knob. We report promising qualitative results for several attribute transfer tasks (sentiment transfer, simplification, gender neutralization, text anonymization) all without retraining the model. Finally, we find model evaluation to be difficult due to the lack of datasets and metrics for many languages. To facilitate future research we crowdsource formality annotations for 4000 sentence pairs in four Indic languages, and use this data to design our automatic evaluations. 2022.acl-long.514 @@ -7071,7 +7071,7 @@ in the Case of Unambiguous Gender ZhaofengWu LingpengKong RoySchwartz - Noah A.Smith + Noah A.Smith 7469-7483 Transformer architectures have achieved state- of-the-art results on a variety of natural language processing (NLP) tasks. However, their attention mechanism comes with a quadratic complexity in sequence lengths, making the computational overhead prohibitive, especially for long sequences. Attention context can be seen as a random-access memory with each token taking a slot. Under this perspective, the memory size grows linearly with the sequence length, and so does the overhead of reading from it. One way to improve the efficiency is to bound the memory size. We show that disparate approaches can be subsumed into one abstraction, attention with bounded-memory control (ABC), and they vary in their organization of the memory. ABC reveals new, unexplored possibilities. First, it connects several efficient attention variants that would otherwise seem apart. Second, this abstraction gives new insights—an established approach (Wang et al., 2020b) previously thought to not be applicable in causal attention, actually is. Last, we present a new instance of ABC, which draws inspiration from existing ABC approaches, but replaces their heuristic memory-organizing functions with a learned, contextualized one. Our experiments on language modeling, machine translation, and masked language model finetuning show that our approach outperforms previous efficient attention models; compared to the strong transformer baselines, it significantly improves the inference time and space efficiency with no or negligible accuracy loss. 2022.acl-long.515 @@ -7081,7 +7081,7 @@ in the Case of Unambiguous Gender The Dangers of Underclaiming: Reasons for Caution When Reporting How <fixed-case>NLP</fixed-case> Systems Fail - SamuelBowman + SamuelBowman 7484-7499 Researchers in NLP often frame and discuss research results in ways that serve to deemphasize the field’s successes, often in response to the field’s widespread hype. Though well-meaning, this has yielded many misleading or false claims about the limits of our best technology. This is a problem, and it may be more serious than it looks: It harms our credibility in ways that can make it harder to mitigate present-day harms, like those involving biased systems for content moderation or resume screening. It also limits our ability to prepare for the potentially enormous impacts of more distant future advances. This paper urges researchers to be careful about these claims and suggests some research directions and communication strategies that will make it easier to avoid or rebut them. 2022.acl-long.516 @@ -7156,7 +7156,7 @@ in the Case of Unambiguous Gender RuolanYang ZitongLi HaifengTang - KennyZhu + KennyZhu 7579-7590 Existing automatic evaluation systems of chatbots mostly rely on static chat scripts as ground truth, which is hard to obtain, and requires access to the models of the bots as a form of “white-box testing”. Interactive evaluation mitigates this problem but requires human involvement. In our work, we propose an interactive chatbot evaluation framework in which chatbots compete with each other like in a sports tournament, using flexible scoring metrics. This framework can efficiently rank chatbots independently from their model architectures and the domains for which they are trained. 2022.acl-long.522 @@ -7275,7 +7275,7 @@ in the Case of Unambiguous Gender Probing for Labeled Dependency Trees MaxMüller-Eberstein Robvan der Goot - BarbaraPlank + BarbaraPlank 7711-7726 Probing has become an important tool for analyzing representations in Natural Language Processing (NLP). For graphical NLP tasks such as dependency parsing, linear probes are currently limited to extracting undirected or unlabeled parse trees which do not capture the full task. This work introduces DepProbe, a linear probe which can extract labeled and directed dependency parse trees from embeddings while using fewer parameters and compute than prior methods. Leveraging its full task coverage and lightweight parametrization, we investigate its predictive power for selecting the best transfer language for training a full biaffine attention parser. Across 13 languages, our proposed method identifies the best source treebank 94% of the time, outperforming competitive baselines and prior work. Finally, we analyze the informativeness of task-specific subspaces in contextual embeddings as well as which benefits a full parser’s non-linear parametrization provides. 2022.acl-long.532 @@ -7383,7 +7383,7 @@ in the Case of Unambiguous Gender Fair and Argumentative Language Modeling for Computational Argumentation CarolinHoltermann AnneLauscher - SimonePonzetto + SimonePonzetto 7841-7861 Although much work in NLP has focused on measuring and mitigating stereotypical bias in semantic spaces, research addressing bias in computational argumentation is still in its infancy. In this paper, we address this research gap and conduct a thorough investigation of bias in argumentative language models. To this end, we introduce ABBA, a novel resource for bias measurement specifically tailored to argumentation. We employ our resource to assess the effect of argumentative fine-tuning and debiasing on the intrinsic bias found in transformer-based language models using a lightweight adapter-based approach that is more sustainable and parameter-efficient than full fine-tuning. Finally, we analyze the potential impact of language model debiasing on the performance in argument quality prediction, a downstream task of computational argumentation. Our results show that we are able to successfully and sustainably remove bias in general and argumentative language models while preserving (and sometimes improving) model performance in downstream tasks. We make all experimental code and data available at https://github.com/umanlp/FairArgumentativeLM. 2022.acl-long.541 @@ -7433,12 +7433,12 @@ in the Case of Unambiguous Gender YuanNi GuotongXie ZhifangSui - BaobaoChang + BaobaoChang HuiZong - ZhengYuan + ZhengYuan LinfengLi JunYan - HongyingZan + HongyingZan KunliZhang BuzhouTang QingcaiChen @@ -7546,7 +7546,7 @@ in the Case of Unambiguous Gender ChaoShang GuangtaoWang PengQi - JingHuang + JingHuang 8017-8026 Question answering over temporal knowledge graphs (KGs) efficiently uses facts contained in a temporal KG, which records entity relations and when they occur in time, to answer natural language questions (e.g., “Who was the president of the US before Obama?”). These questions often involve three time-related challenges that previous work fail to adequately address: 1) questions often do not specify exact timestamps of interest (e.g., “Obama” instead of 2000); 2) subtle lexical differences in time relations (e.g., “before” vs “after”); 3) off-the-shelf temporal KG embeddings that previous work builds on ignore the temporal order of timestamps, which is crucial for answering temporal-order related questions. In this paper, we propose a time-sensitive question answering (TSQA) framework to tackle these problems. TSQA features a timestamp estimation module to infer the unwritten timestamp from the question. We also employ a time-sensitive KG encoder to inject ordering information into the temporal KG embeddings that TSQA is based on. With the help of techniques to reduce the search space for potential answers, TSQA significantly outperforms the previous state of the art on a new benchmark for question answering over temporal KGs, especially achieving a 32% (absolute) error reduction on complex questions that require multiple steps of reasoning over facts in the temporal KG. 2022.acl-long.552 @@ -7643,7 +7643,7 @@ in the Case of Unambiguous Gender LeiHou JuanziLi ZhiyuanLiu - JinghuiXiao + JinghuiXiao 8128-8140 Program induction for answering complex questions over knowledge bases (KBs) aims to decompose a question into a multi-step program, whose execution against the KB produces the final answer. Learning to induce programs relies on a large number of parallel question-program pairs for the given KB. However, for most KBs, the gold program annotations are usually lacking, making learning difficult. In this paper, we propose the approach of program transfer, which aims to leverage the valuable program annotations on the rich-resourced KBs as external supervision signals to aid program induction for the low-resourced KBs that lack program annotations. For program transfer, we design a novel two-stage parsing framework with an efficient ontology-guided pruning strategy. First, a sketch parser translates the question into a high-level program sketch, which is the composition of functions. Second, given the question and sketch, an argument parser searches the detailed arguments from the KB for functions. During the searching, we incorporate the KB ontology to prune the search space. The experiments on ComplexWebQuestions and WebQuestionSP show that our method outperforms SOTA methods significantly, demonstrating the effectiveness of program transfer and our framework. Our codes and datasets can be obtained from https://github.com/THU-KEG/ProgramTransfer. 2022.acl-long.559 @@ -7696,7 +7696,7 @@ in the Case of Unambiguous Gender Flexible Generation from Fragmentary Linguistic Input PengQian - RogerLevy + RogerLevy 8176-8196 The dominant paradigm for high-performance models in novel NLP tasks today is direct specialization for the task via training from scratch or fine-tuning large pre-trained models. But does direct specialization capture how humans approach novel language tasks? We hypothesize that human performance is better characterized by flexible inference through composition of basic computational motifs available to the human language user. To test this hypothesis, we formulate a set of novel fragmentary text completion tasks, and compare the behavior of three direct-specialization models against a new model we introduce, GibbsComplete, which composes two basic computational motifs central to contemporary models: masked and autoregressive word prediction. We conduct three types of evaluation: human judgments of completion quality, satisfaction of syntactic constraints imposed by the input fragment, and similarity to human behavior in the structural statistics of the completions. With no task-specific parameter tuning, GibbsComplete performs comparably to direct-specialization models in the first two evaluations, and outperforms all direct-specialization models in the third evaluation. These results support our hypothesis that human behavior in novel language tasks and environments may be better characterized by flexible composition of basic computational motifs rather than by direct specialization. 2022.acl-long.563 @@ -7782,7 +7782,7 @@ in the Case of Unambiguous Gender Generating Scientific Definitions with Controllable Complexity TalAugust KatharinaReinecke - Noah A.Smith + Noah A.Smith 8298-8317 Unfamiliar terminology and complex language can present barriers to understanding science. Natural language processing stands to help address these issues by automatically defining unfamiliar terms. We introduce a new task and dataset for defining scientific terms and controlling the complexity of generated definitions as a way of adapting to a specific reader’s background knowledge. We test four definition generation methods for this new task, finding that a sequence-to-sequence approach is most successful. We then explore the version of the task in which definitions are generated at a target complexity level. We introduce a novel reranking approach and find in human evaluations that it offers superior fluency while also controlling complexity, compared to several controllable generation baselines. 2022.acl-long.569 @@ -7837,7 +7837,7 @@ in the Case of Unambiguous Gender Ethics Sheets for <fixed-case>AI</fixed-case> Tasks - SaifMohammad + SaifMohammad 8368-8379 Several high-profile events, such as the mass testing of emotion recognition systems on vulnerable sub-populations and using question answering systems to make moral judgments, have highlighted how technology will often lead to more adverse outcomes for those that are already marginalized. At issue here are not just individual systems and datasets, but also the AI tasks themselves. In this position paper, I make a case for thinking about ethical considerations not just at the level of individual models and datasets, but also at the level of AI tasks. I will present a new form of such an effort, Ethics Sheets for AI Tasks, dedicated to fleshing out the assumptions and ethical considerations hidden in how a task is commonly framed and in the choices we make regarding the data, method, and evaluation. I will also present a template for ethics sheets with 50 ethical considerations, using the task of emotion recognition as a running example. Ethics sheets are a mechanism to engage with and document ethical considerations before building datasets and systems. Similar to survey articles, a small number of carefully created ethics sheets can serve numerous researchers and developers. 2022.acl-long.573 @@ -7865,8 +7865,8 @@ in the Case of Unambiguous Gender ShujianHuang DongqiWang LihuaQian - XinyuDai - JiajunChen + XinyuDai + JiajunChen LeiLi 8398-8409 Recently, parallel text generation has received widespread attention due to its success in generation efficiency. Although many advanced techniques are proposed to improve its generation quality, they still need the help of an autoregressive model for training to overcome the one-to-many multi-modal phenomenon in the dataset, limiting their applications. In this paper, we propose GLAT, which employs the discrete latent variables to capture word categorical information and invoke an advanced curriculum learning technique, alleviating the multi-modality problem. Experiment results show that our method outperforms strong baselines without the help of an autoregressive model, which further broadens the application scenarios of the parallel decoding paradigm. @@ -7961,7 +7961,7 @@ in the Case of Unambiguous Gender LiDong YaruHao ZhifangSui - BaobaoChang + BaobaoChang FuruWei 8493-8502 Large-scale pretrained language models are surprisingly good at recalling factual knowledge presented in the training corpus. In this paper, we present preliminary studies on how factual knowledge is stored in pretrained Transformers by introducing the concept of knowledge neurons. Specifically, we examine the fill-in-the-blank cloze task for BERT. Given a relational fact, we propose a knowledge attribution method to identify the neurons that express the fact. We find that the activation of such knowledge neurons is positively correlated to the expression of their corresponding facts. In our case studies, we attempt to leverage knowledge neurons to edit (such as update, and erase) specific factual knowledge without fine-tuning. Our results shed light on understanding the storage of knowledge within pretrained Transformers. @@ -7989,7 +7989,7 @@ in the Case of Unambiguous Gender <fixed-case>F</fixed-case>rench <fixed-case>C</fixed-case>row<fixed-case>S</fixed-case>-Pairs: Extending a challenge dataset for measuring social bias in masked language models to a language other than <fixed-case>E</fixed-case>nglish - AurélieNévéol + AurélieNévéol YoannDupont JulienBezançon KarënFort @@ -8002,7 +8002,7 @@ in the Case of Unambiguous Gender Few-Shot Learning with <fixed-case>S</fixed-case>iamese Networks and Label Tuning - ThomasMüller + ThomasMüller GuillermoPérez-Torró MarcFranco-Salvador 8532-8545 @@ -8196,7 +8196,7 @@ in the Case of Unambiguous Gender DianYu JianshuChen DongYu - ClaireCardie + ClaireCardie 8736-8747 To perform well on a machine reading comprehension (MRC) task, machine readers usually require commonsense knowledge that is not explicitly mentioned in the given documents. This paper aims to extract a new kind of structured knowledge from scripts and use it to improve MRC. We focus on scripts as they contain rich verbal and nonverbal messages, and two relevant messages originally conveyed by different modalities during a short time period may serve as arguments of a piece of commonsense knowledge as they function together in daily communications. To save human efforts to name relations, we propose to represent relations implicitly by situating such an argument pair in a context and call it contextualized knowledge. To use the extracted knowledge to improve MRC, we compare several fine-tuning strategies to use the weakly-labeled MRC data constructed based on contextualized knowledge and further design a teacher-student paradigm with multiple teachers to facilitate the transfer of knowledge in weakly-labeled MRC data. Experimental results show that our paradigm outperforms other methods that use weakly-labeled data and improves a state-of-the-art baseline by 4.3% in accuracy on a Chinese multiple-choice MRC dataset C^3, wherein most of the questions require unstated prior knowledge. We also seek to transfer the knowledge to other tasks by simply adapting the resulting student reader, yielding a 2.9% improvement in F1 on a relation extraction dataset DialogRE, demonstrating the potential usefulness of the knowledge for non-MRC tasks that require document comprehension. 2022.acl-long.598 @@ -8218,7 +8218,7 @@ in the Case of Unambiguous Gender Active Evaluation: Efficient <fixed-case>NLG</fixed-case> Evaluation with Few Pairwise Comparisons Outstanding Paper Akash KumarMohankumar - MiteshKhapra + MiteshKhapra 8761-8781 Recent studies have shown the advantages of evaluating NLG systems using pairwise comparisons as opposed to direct assessment. Given k systems, a naive approach for identifying the top-ranked system would be to uniformly obtain pairwise comparisons from all {k \choose 2} pairs of systems. However, this can be very expensive as the number of human annotations required would grow quadratically with k. In this work, we introduce Active Evaluation, a framework to efficiently identify the top-ranked system by actively choosing system pairs for comparison using dueling bandit algorithms. We perform extensive experiments with 13 dueling bandits algorithms on 13 NLG evaluation datasets spanning 5 tasks and show that the number of human annotations can be reduced by 80%. To further reduce the number of human annotations, we propose model-based dueling bandit algorithms which combine automatic evaluation metrics with human evaluations. Specifically, we eliminate sub-optimal systems even before the human annotation process and perform human evaluations only on test examples where the automatic metric is highly uncertain. This reduces the number of human annotations required further by 89%. In effect, we show that identifying the top-ranked system requires only a few hundred human annotations, which grow linearly with k. Lastly, we provide practical recommendations and best practices to identify the top-ranked system efficiently. Our code has been made publicly available at https://github.com/akashkm99/duelnlg 2022.acl-long.600 @@ -8274,7 +8274,7 @@ in the Case of Unambiguous Gender Proceedings of the 60th Annual Meeting of the Association for Computational Linguistics (Volume 2: Short Papers) SmarandaMuresan - PreslavNakov + PreslavNakov AlineVillavicencio Association for Computational Linguistics
Dublin, Ireland
@@ -8305,7 +8305,7 @@ in the Case of Unambiguous Gender HuaShen TongshuangWu WenboGuo - Ting-HaoHuang + Ting-HaoHuang 10-19 Existing self-explaining models typically favor extracting the shortest possible rationales — snippets of an input text “responsible for” corresponding output — to explain the model prediction, with the assumption that shorter rationales are more intuitive to humans. However, this assumption has yet to be validated. Is the shortest rationale indeed the most human-understandable? To answer this question, we design a self-explaining model, LimitedInk, which allows users to extract rationales at any target length. Compared to existing baselines, LimitedInk achieves compatible end-task performance and human-annotated rationale agreement, making it a suitable representation of the recent class of self-explaining models. We use LimitedInk to conduct a user study on the impact of rationale length, where we ask human judges to predict the sentiment label of documents based only on LimitedInk-generated rationales with different lengths. We show rationales that are too short do not help humans predict labels better than randomly masked text, suggesting the need for more careful design of the best human rationales. 2022.acl-short.2 @@ -8318,7 +8318,7 @@ in the Case of Unambiguous Gender TiagoPimentel ThomasClark RyanCotterell - RogerLevy + RogerLevy 20-28 Numerous analyses of reading time (RT) data have been undertaken in the effort to learn more about the internal processes that occur during reading comprehension. However, data measured on words at the end of a sentence–or even clause–is often omitted due to the confounding factors introduced by so-called “wrap-up effects,” which manifests as a skewed distribution of RTs for these words. Consequently, the understanding of the cognitive processes that might be involved in these effects is limited. In this work, we attempt to learn more about these processes by looking for the existence–or absence–of a link between wrap-up effects and information theoretic quantities, such as word and context information content. We find that the information distribution of prior context is often predictive of sentence- and clause-final RTs (while not of sentence-medial RTs), which lends support to several prior hypotheses about the processes involved in wrap-up effects. 2022.acl-short.3 @@ -8401,7 +8401,7 @@ in the Case of Unambiguous Gender On Efficiently Acquiring Annotations for Multilingual Models Joel Ruben AntonyMoniz BarunPatra - MatthewGormley + MatthewGormley 69-85 When tasked with supporting multiple languages for a given problem, two approaches have arisen: training a model for each language with the annotation budget divided equally among them, and training on a high-resource language followed by zero-shot transfer to the remaining languages. In this work, we show that the strategy of joint learning across multiple languages using a single model performs substantially better than the aforementioned alternatives. We also demonstrate that active learning provides additional, complementary benefits. We show that this simple approach enables the model to be data efficient by allowing it to arbitrate its annotation budget to query languages it is less certain on. We illustrate the effectiveness of our proposed method on a diverse set of tasks: a classification task with 4 languages, a sequence tagging task with 4 languages and a dependency parsing task with 5 languages. Our proposed method, whilst simple, substantially outperforms the other viable alternatives for building a model in a multilingual setting under constrained budgets. 2022.acl-short.9 @@ -8445,7 +8445,7 @@ in the Case of Unambiguous Gender AmanulHaque VaibhavGarg HuiGuo - MunindarSingh + MunindarSingh 106-112 We present Pixie, a manually annotated dataset for preference classification comprising 8,890 sentences drawn from app reviews. Unlike previous studies on preference classification, Pixie contains implicit (omitting an entity being compared) and indirect (lacking comparative linguistic cues) comparisons. We find that transformer-based pretrained models, finetuned on Pixie, achieve a weighted average F1 score of 83.34% and outperform the existing state-of-the-art preference classification model (73.99%). 2022.acl-short.13 @@ -8635,7 +8635,7 @@ in the Case of Unambiguous Gender Predicting Sentence Deletions for Text Simplification Using a Functional Discourse Structure BohanZhang - Prafulla KumarChoubey + Prafulla KumarChoubey RuihongHuang 255-261 Document-level text simplification often deletes some sentences besides performing lexical, grammatical or structural simplification to reduce text complexity. In this work, we focus on sentence deletions for text simplification and use a news genre-specific functional discourse structure, which categorizes sentences based on their contents and their function roles in telling a news story, for predicting sentence deletion. We incorporate sentence categories into a neural net model in two ways for predicting sentence deletions, either as additional features or by jointly predicting sentence deletions and sentence categories. Experimental results using human-annotated data show that incorporating the functional structure improves the recall of sentence deletion prediction by 6.5% and 10.7% respectively using the two methods, and improves the overall F1-score by 3.6% and 4.3% respectively. @@ -8694,7 +8694,7 @@ in the Case of Unambiguous Gender LongBai WeiLi JiafengGuo - XueqiCheng + XueqiCheng 290-296 A Temporal Knowledge Graph (TKG) is a sequence of KGs corresponding to different timestamps. TKG reasoning aims to predict potential facts in the future given the historical KG sequences. One key of this task is to mine and understand evolutional patterns of facts from these sequences. The evolutional patterns are complex in two aspects, length-diversity and time-variability. Existing models for TKG reasoning focus on modeling fact sequences of a fixed length, which cannot discover complex evolutional patterns that vary in length. Furthermore, these models are all trained offline, which cannot well adapt to the changes of evolutional patterns from then on. Thus, we propose a new model, called Complex Evolutional Network (CEN), which uses a length-aware Convolutional Neural Network (CNN) to handle evolutional patterns of different lengths via an easy-to-difficult curriculum learning strategy. Besides, we propose to learn the model under the online setting so that it can adapt to the changes of evolutional patterns over time. Extensive experiments demonstrate that CEN obtains substantial performance improvement under both the traditional offline and the proposed online settings. 2022.acl-short.32 @@ -8760,7 +8760,7 @@ in the Case of Unambiguous Gender TianyuLiu DamaiDai YunboCao - BaobaoChang + BaobaoChang ZhifangSui 333-339 Abstract Meaning Representation (AMR) parsing aims to translate sentences to semantic representation with a hierarchical structure, and is recently empowered by pretrained sequence-to-sequence models. However, there exists a gap between their flat training objective (i.e., equally treats all output tokens) and the hierarchical AMR structure, which limits the model generalization. To bridge this gap, we propose a Hierarchical Curriculum Learning (HCL) framework with Structure-level (SC) and Instance-level Curricula (IC). SC switches progressively from core to detail AMR semantic elements while IC transits from structure-simple to -complex AMR instances during training. Through these two warming-up processes, HCL reduces the difficulty of learning complex structures, thus the flat model can better adapt to the AMR hierarchy. Extensive experiments on AMR2.0, AMR3.0, structure-complex and out-of-distribution situations verify the effectiveness of HCL. @@ -8825,7 +8825,7 @@ in the Case of Unambiguous Gender k-<fixed-case>R</fixed-case>ater <fixed-case>R</fixed-case>eliability: <fixed-case>T</fixed-case>he Correct Unit of Reliability for Aggregated Human Annotations KaWong - PraveenParitosh + PraveenParitosh 378-384 Since the inception of crowdsourcing, aggregation has been a common strategy for dealing with unreliable data. Aggregate ratings are more reliable than individual ones. However, many Natural Language Processing (NLP) applications that rely on aggregate ratings only report the reliability of individual ratings, which is the incorrect unit of analysis. In these instances, the data reliability is under-reported, and a proposed k-rater reliability (kRR) should be used as the correct data reliability for aggregated datasets. It is a multi-rater generalization of inter-rater reliability (IRR). We conducted two replications of the WordSim-353 benchmark, and present empirical, analytical, and bootstrap-based methods for computing kRR on WordSim-353. These methods produce very similar results. We hope this discussion will nudge researchers to report kRR in addition to IRR. 2022.acl-short.42 @@ -8835,8 +8835,8 @@ in the Case of Unambiguous Gender An Embarrassingly Simple Method to Mitigate Undesirable Properties of Pretrained Language Model Tokenizers ValentinHofmann - HinrichSchuetze - JanetPierrehumbert + HinrichSchuetze + JanetPierrehumbert 385-393 We introduce FLOTA (Few Longest Token Approximation), a simple yet effective method to improve the tokenization of pretrained language models (PLMs). FLOTA uses the vocabulary of a standard tokenizer but tries to preserve the morphological structure of words during tokenization. We evaluate FLOTA on morphological gold segmentations as well as a text classification task, using BERT, GPT-2, and XLNet as example PLMs. FLOTA leads to performance gains, makes inference more efficient, and enhances the robustness of PLMs with respect to whitespace noise. 2022.acl-short.43 @@ -8860,7 +8860,7 @@ in the Case of Unambiguous Gender KaitlynZhou KawinEthayarajh DallasCard - DanJurafsky + DanJurafsky 401-423 Cosine similarity of contextual embeddings is used in many NLP tasks (e.g., QA, IR, MT) and metrics (e.g., BERTScore). Here, we uncover systematic ways in which word similarities estimated by cosine over BERT embeddings are understated and trace this effect to training data frequency. We find that relative to human judgements, cosine similarity underestimates the similarity of frequent words with other instances of the same word or other words across contexts, even after controlling for polysemy and other factors. We conjecture that this underestimation of similarity for high frequency words is due to differences in the representational geometry of high and low frequency words and provide a formal argument for the two-dimensional case. 2022.acl-short.45 @@ -8871,7 +8871,7 @@ in the Case of Unambiguous Gender Revisiting the Compositional Generalization Abilities of Neural Sequence Models ArkilPatel SatwikBhattamishra - PhilBlunsom + PhilBlunsom NavinGoyal 424-434 Compositional generalization is a fundamental trait in humans, allowing us to effortlessly combine known phrases to form novel sentences. Recent works have claimed that standard seq-to-seq models severely lack the ability to compositionally generalize. In this paper, we focus on one-shot primitive generalization as introduced by the popular SCAN benchmark. We demonstrate that modifying the training distribution in simple and intuitive ways enables standard seq-to-seq models to achieve near-perfect generalization performance, thereby showing that their compositional generalization abilities were previously underestimated. We perform detailed empirical analysis of this phenomenon. Our results indicate that the generalization performance of models is highly sensitive to the characteristics of the training data which should be carefully considered while designing such benchmarks in future. @@ -8900,7 +8900,7 @@ in the Case of Unambiguous Gender JinheonBaek SukminCho Sung JuHwang - JongPark + JongPark 442-452 Dense retrieval models, which aim at retrieving the most relevant document for an input query on a dense representation space, have gained considerable attention for their remarkable success. Yet, dense models require a vast amount of labeled training data for notable performance, whereas it is often challenging to acquire query-document pairs annotated by humans. To tackle this problem, we propose a simple but effective Document Augmentation for dense Retrieval (DAR) framework, which augments the representations of documents with their interpolation and perturbation. We validate the performance of DAR on retrieval tasks with two benchmark datasets, showing that the proposed DAR significantly outperforms relevant baselines on the dense retrieval of both the labeled and unlabeled documents. 2022.acl-short.48 @@ -8941,7 +8941,7 @@ in the Case of Unambiguous Gender JeremyBarnes RobinKurtz StephanOepen - LiljaØvrelid + LiljaØvrelid ErikVelldal 470-478 This paper demonstrates how a graph-based semantic parser can be applied to the task of structured sentiment analysis, directly predicting sentiment graphs from text. We advance the state of the art on 4 out of 5 standard benchmark sets. We release the source code, models and predictions. @@ -8980,7 +8980,7 @@ in the Case of Unambiguous Gender How Distributed are Distributed Representations? An Observation on the Locality of Syntactic Information in Verb Agreement Tasks BingzhiLi GuillaumeWisniewski - BenoitCrabbé + BenoitCrabbé 501-507 This work addresses the question of the localization of syntactic information encoded in the transformers representations. We tackle this question from two perspectives, considering the object-past participle agreement in French, by identifying, first, in which part of the sentence and, second, in which part of the representation the syntactic information is encoded. The results of our experiments, using probing, causal analysis and feature selection method, show that syntactic information is encoded locally in a way consistent with the French grammar. 2022.acl-short.54 @@ -9034,7 +9034,7 @@ in the Case of Unambiguous Gender S<tex-math>^4</tex-math>-Tuning: A Simple Cross-lingual Sub-network Tuning Method RunxinXu FuliLuo - BaobaoChang + BaobaoChang SongfangHuang FeiHuang 530-537 @@ -9104,7 +9104,7 @@ in the Case of Unambiguous Gender Zero-Shot Dependency Parsing with Worst-Case Aware Automated Curriculum Learning Miryamde Lhoneux ShengZhang - AndersSøgaard + AndersSøgaard 578-587 Large multilingual pretrained language models such as mBERT and XLM-RoBERTa have been found to be surprisingly effective for cross-lingual transfer of syntactic parsing models Wu and Dredze (2019), but only between related languages. However, source and training languages are rarely related, when parsing truly low-resource languages. To close this gap, we adopt a method from multi-task learning, which relies on automated curriculum learning, to dynamically optimize for parsing performance on outlier languages. We show that this approach is significantly better than uniform and size-proportional sampling in the zero-shot setting. 2022.acl-short.64 @@ -9230,7 +9230,7 @@ in the Case of Unambiguous Gender Focus on the Target’s Vocabulary: Masked Label Smoothing for Machine Translation LiangChen RunxinXu - BaobaoChang + BaobaoChang 665-671 Label smoothing and vocabulary sharing are two widely used techniques in neural machine translation models. However, we argue that simply applying both techniques can be conflicting and even leads to sub-optimal performance. When allocating smoothed probability, original label smoothing treats the source-side words that would never appear in the target language equally to the real target-side words, which could bias the translation model. To address this issue, we propose Masked Label Smoothing (MLS), a new mechanism that masks the soft label probability of source-side words to zero. Simple yet effective, MLS manages to better integrate label smoothing with vocabulary sharing. Our extensive experiments show that MLS consistently yields improvement over original label smoothing on different datasets, including bilingual and multilingual translation from both translation quality and model’s calibration. Our code is released at https://github.com/PKUnlp-icler/MLS 2022.acl-short.74 @@ -9242,7 +9242,7 @@ in the Case of Unambiguous Gender Contrastive Learning-Enhanced Nearest Neighbor Mechanism for Multi-Label Text Classification Xi’aoSu RanWang - XinyuDai + XinyuDai 672-679 Multi-Label Text Classification (MLTC) is a fundamental and challenging task in natural language processing. Previous studies mainly focus on learning text representation and modeling label correlation but neglect the rich knowledge from the existing similar instances when predicting labels of a specific text. To make up for this oversight, we propose a k nearest neighbor (kNN) mechanism which retrieves several neighbor instances and interpolates the model output with their labels. Moreover, we design a multi-label contrastive learning objective that makes the model aware of the kNN classification process and improves the quality of the retrieved neighbors while inference. Extensive experiments show that our method can bring consistent and significant performance improvement to multiple MLTC models including the state-of-the-art pretrained and non-pretrained ones. 2022.acl-short.75 @@ -9332,7 +9332,7 @@ in the Case of Unambiguous Gender Unsupervised multiple-choice question generation for out-of-domain <fixed-case>Q</fixed-case>&<fixed-case>A</fixed-case> fine-tuning GuillaumeLe Berre ChristopheCerisara - PhilippeLanglais + PhilippeLanglais GuyLapalme 732-738 Pre-trained models have shown very good performances on a number of question answering benchmarks especially when fine-tuned on multiple question answering datasets at once. In this work, we propose an approach for generating a fine-tuning dataset thanks to a rule-based algorithm that generates questions and answers from unannotated sentences. We show that the state-of-the-art model UnifiedQA can greatly benefit from such a system on a multiple-choice benchmark about physics, biology and chemistry it has never been trained on. We further show that improved performances may be obtained by selecting the most challenging distractors (wrong answers), with a dedicated ranker based on a pretrained RoBERTa model. @@ -9355,7 +9355,7 @@ in the Case of Unambiguous Gender Probing the Robustness of Trained Metrics for Conversational Dialogue Systems - JanDeriu + JanDeriu DonTuggener PiusVon Däniken MarkCieliebak @@ -9432,7 +9432,7 @@ in the Case of Unambiguous Gender Code Synonyms Do Matter: Multiple Synonyms Matching Network for Automatic <fixed-case>ICD</fixed-case> Coding - ZhengYuan + ZhengYuan ChuanqiTan SongfangHuang 808-814 @@ -9446,7 +9446,7 @@ in the Case of Unambiguous Gender <fixed-case>C</fixed-case>o<fixed-case>DA</fixed-case>21: Evaluating Language Understanding Capabilities of <fixed-case>NLP</fixed-case> Models With Context-Definition Alignment Lütfi KeremSenel TimoSchick - HinrichSchuetze + HinrichSchuetze 815-824 Pretrained language models (PLMs) have achieved superhuman performance on many benchmarks, creating a need for harder tasks. We introduce CoDA21 (Context Definition Alignment), a challenging benchmark that measures natural language understanding (NLU) capabilities of PLMs: Given a definition and a context each for k words, but not the words themselves, the task is to align the k definitions with the k contexts. CoDA21 requires a deep understanding of contexts and definitions, including complex inference and world knowledge. We find that there is a large gap between human and PLM performance, suggesting that CoDA21 measures an aspect of NLU that is not sufficiently covered in existing benchmarks. 2022.acl-short.92 @@ -9604,7 +9604,7 @@ in the Case of Unambiguous Gender What Do You Mean by Relation Extraction? A Survey on Datasets and Study on Scientific Relation Classification ElisaBassignana - BarbaraPlank + BarbaraPlank 67-83 Over the last five years, research on Relation Extraction (RE) witnessed extensive progress with many new dataset releases. At the same time, setup clarity has decreased, contributing to increased difficulty of reliable empirical evaluation (Taillé et al., 2020). In this paper, we provide a comprehensive survey of RE datasets, and revisit the task definition and its adoption by the community. We find that cross-dataset and cross-domain setups are particularly lacking. We present an empirical study on scientific Relation Classification across two datasets. Despite large data overlap, our analysis reveals substantial discrepancies in annotation. Annotation discrepancies strongly impact Relation Classification performance, explaining large drops in cross-dataset evaluations. Variation within further sub-domains exists but impacts Relation Classification only to limited degrees. Overall, our study calls for more rigour in reporting setups in RE and evaluation across multiple test sets. 2022.acl-srw.7 @@ -9669,7 +9669,7 @@ in the Case of Unambiguous Gender <fixed-case>AMR</fixed-case> Alignment for Morphologically-rich and Pro-drop Languages K. ElifOral - GülşenEryiğit + GülşenEryiğit 143-152 Alignment between concepts in an abstract meaning representation (AMR) graph and the words within a sentence is one of the important stages of AMR parsing. Although there exist high performing AMR aligners for English, unfortunately, these are not well suited for many languages where many concepts appear from morpho-semantic elements. For the first time in the literature, this paper presents an AMR aligner tailored for morphologically-rich and pro-drop languages by experimenting on the Turkish language being a prominent example of this language group. Our aligner focuses on the meaning considering the rich Turkish morphology and aligns AMR concepts that emerge from morphemes using a tree traversal approach without additional resources or rules. We evaluate our aligner over a manually annotated gold data set in terms of precision, recall and F1 score. Our aligner outperforms the Turkish adaptations of the previously proposed aligners for English and Portuguese by an F1 score of 0.87 and provides a relative error reduction of up to 76%. 2022.acl-srw.13 @@ -9724,7 +9724,7 @@ in the Case of Unambiguous Gender Restricted or Not: A General Training Framework for Neural Machine Translation ZuchaoLi MasaoUtiyama - EiichiroSumita + EiichiroSumita HaiZhao 245-251 Restricted machine translation incorporates human prior knowledge into translation. It restricts the flexibility of the translation to satisfy the demands of translation in specific scenarios. Existing work typically imposes constraints on beam search decoding. Although this can satisfy the requirements overall, it usually requires a larger beam size and far longer decoding time than unrestricted translation, which limits the concurrent processing ability of the translation model in deployment, and thus its practicality. In this paper, we propose a general training framework that allows a model to simultaneously support both unrestricted and restricted translation by adopting an additional auxiliary training process without constraining the decoding process. This maintains the benefits of restricted translation but greatly reduces the extra time overhead of constrained decoding, thus improving its practicality. The effectiveness of our proposed training framework is demonstrated by experiments on both original (WAT21 En\leftrightarrowJa) and simulated (WMT14 En\rightarrowDe and En\rightarrowFr) restricted translation benchmarks. @@ -9803,7 +9803,7 @@ in the Case of Unambiguous Gender Mining Logical Event Schemas From Pre-Trained Language Models LaneLawley - LenhartSchubert + LenhartSchubert 332-345 We present NESL (the Neuro-Episodic Schema Learner), an event schema learning system that combines large language models, FrameNet parsing, a powerful logical representation of language, and a set of simple behavioral schemas meant to bootstrap the learning process. In lieu of a pre-made corpus of stories, our dataset is a continuous feed of “situation samples” from a pre-trained language model, which are then parsed into FrameNet frames, mapped into simple behavioral schemas, and combined and generalized into complex, hierarchical schemas for a variety of everyday scenarios. We show that careful sampling from the language model can help emphasize stereotypical properties of situations and de-emphasize irrelevant details, and that the resulting schemas specify situations more comprehensively than those learned by other systems. 2022.acl-srw.25 @@ -9891,7 +9891,7 @@ in the Case of Unambiguous Gender BelenAlastruey JavierFerrando Gerard I.Gállego - Marta R.Costa-jussà + Marta R.Costa-jussà 402-412 Transformers have achieved state-of-the-art results across multiple NLP tasks. However, the self-attention mechanism complexity scales quadratically with the sequence length, creating an obstacle for tasks involving long sequences, like in the speech domain. In this paper, we discuss the usefulness of self-attention for Direct Speech Translation. First, we analyze the layer-wise token contributions in the self-attention of the encoder, unveiling local diagonal patterns. To prove that some attention weights are avoidable, we propose to substitute the standard self-attention with a local efficient one, setting the amount of context used based on the results of the analysis. With this approach, our model matches the baseline performance, and improves the efficiency by skipping the computation of those weights that standard attention discards. 2022.acl-srw.32 @@ -9926,7 +9926,7 @@ in the Case of Unambiguous Gender Towards Fine-grained Classification of Climate Change related Social Media Text RoopalVaid KartikeyPant - ManishShrivastava + ManishShrivastava 434-443 With climate change becoming a cause of concern worldwide, it becomes essential to gauge people’s reactions. This can help educate and spread awareness about it and help leaders improve decision-making. This work explores the fine-grained classification and Stance detection of climate change-related social media text. Firstly, we create two datasets, ClimateStance and ClimateEng, consisting of 3777 tweets each, posted during the 2019 United Nations Framework Convention on Climate Change and comprehensively outline the dataset collection, annotation methodology, and dataset composition. Secondly, we propose the task of Climate Change stance detection based on our proposed ClimateStance dataset. Thirdly, we propose a fine-grained classification based on the ClimateEng dataset, classifying social media text into five categories: Disaster, Ocean/Water, Agriculture/Forestry, Politics, and General. We benchmark both the datasets for climate change stance detection and fine-grained classification using state-of-the-art methods in text classification. We also create a Reddit-based dataset for both the tasks, ClimateReddit, consisting of 6262 pseudo-labeled comments along with 329 manually annotated comments for the label. We then perform semi-supervised experiments for both the tasks and benchmark their results using the best-performing model for the supervised experiments. Lastly, we provide insights into the ClimateStance and ClimateReddit using part-of-speech tagging and named-entity recognition. 2022.acl-srw.35 @@ -9967,7 +9967,7 @@ in the Case of Unambiguous Gender A Dataset and <fixed-case>BERT</fixed-case>-based Models for Targeted Sentiment Analysis on <fixed-case>T</fixed-case>urkish Texts Mustafa MelihMutlu - ArzucanÖzgür + ArzucanÖzgür 467-472 Targeted Sentiment Analysis aims to extract sentiment towards a particular target from a given text. It is a field that is attracting attention due to the increasing accessibility of the Internet, which leads people to generate an enormous amount of data. Sentiment analysis, which in general requires annotated data for training, is a well-researched area for widely studied languages such as English. For low-resource languages such as Turkish, there is a lack of such annotated data. We present an annotated Turkish dataset suitable for targeted sentiment analysis. We also propose BERT-based models with different architectures to accomplish the task of targeted sentiment analysis. The results demonstrate that the proposed models outperform the traditional sentiment analysis models for the targeted sentiment analysis task. 2022.acl-srw.39 @@ -9980,7 +9980,7 @@ in the Case of Unambiguous Gender Proceedings of the 60th Annual Meeting of the Association for Computational Linguistics: System Demonstrations ValerioBasile ZornitsaKozareva - SanjaStajner + SanjaStajner Association for Computational Linguistics
Dublin, Ireland
May @@ -10019,7 +10019,7 @@ in the Case of Unambiguous Gender Leonardo F. R.Ribeiro JonasPfeiffer NilsReimers - GözdeŞahin + GözdeŞahin IrynaGurevych 9-22 Recent advances in NLP and information retrieval have given rise to a diverse set of question answering tasks that are of different formats (e.g., extractive, abstractive), require different model architectures (e.g., generative, discriminative), and setups (e.g., with or without retrieval). Despite having a large number of powerful, specialized QA pipelines (which we refer to as Skills) that consider a single domain, model or setup, there exists no framework where users can easily explore and compare such pipelines and can extend them according to their needs. To address this issue, we present UKP-SQuARE, an extensible online QA platform for researchers which allows users to query and analyze a large collection of modern Skills via a user-friendly web interface and integrated behavioural tests. In addition, QA researchers can develop, manage, and share their custom Skills using our microservices that support a wide range of models (Transformers, Adapters, ONNX), datastores and retrieval techniques (e.g., sparse and dense). UKP-SQuARE is available on https://square.ukp-lab.de @@ -10143,9 +10143,9 @@ in the Case of Unambiguous Gender UrmishThakker KhalidAlmubarak XiangruTang - DragomirRadev + DragomirRadev Mike Tian-jianJiang - AlexanderRush + AlexanderRush 93-104 PromptSource is a system for creating, sharing, and using natural language prompts. Prompts are functions that map an example from a dataset to a natural language input and target output. Using prompts to train and query language models is an emerging area in NLP that requires new tools that let users develop and refine these prompts collaboratively. PromptSource addresses the emergent challenges in this new setting with (1) a templating language for defining data-linked prompts, (2) an interface that lets users quickly iterate on prompt development by observing outputs of their prompts on many examples, and (3) a community-driven set of guidelines for contributing new prompts to a common pool. Over 2,000 prompts for roughly 170 datasets are already available in PromptSource. PromptSource is available at https://github.com/bigscience-workshop/promptsource. 2022.acl-demo.9 @@ -10178,7 +10178,7 @@ in the Case of Unambiguous Gender RichardShin SubhroRoy AleksandrNisnevich - CharlesChen + CharlesChen BenjaminVan Durme 114-126 Collecting data for conversational semantic parsing is a time-consuming and demanding process. In this paper we consider, given an incomplete dataset with only a small amount of data, how to build an AI-powered human-in-the-loop process to enable efficient data collection. A guided K-best selection process is proposed, which (i) generates a set of possible valid candidates; (ii) allows users to quickly traverse the set and filter incorrect parses; and (iii) asks users to select the correct parse, with minimal modification when necessary. We investigate how to best support users in efficiently traversing the candidate set and locating the correct parse, in terms of speed and accuracy. In our user study, consisting of five annotators labeling 300 instances each, we find that combining keyword searching, where keywords can be used to query relevant candidates, and keyword suggestion, where representative keywords are automatically generated, enables fast and accurate annotation. @@ -10191,7 +10191,7 @@ in the Case of Unambiguous Gender Hard and Soft Evaluation of <fixed-case>NLP</fixed-case> models with <fixed-case>BOO</fixed-case>t<fixed-case>ST</fixed-case>rap <fixed-case>SA</fixed-case>mpling - <fixed-case>B</fixed-case>oo<fixed-case>S</fixed-case>t<fixed-case>S</fixed-case>a TommasoFornaciari AlexandraUma - MassimoPoesio + MassimoPoesio DirkHovy 127-134 Natural Language Processing (NLP) ‘s applied nature makes it necessary to select the most effective and robust models. Producing slightly higher performance is insufficient; we want to know whether this advantage will carry over to other data sets. Bootstrapped significance tests can indicate that ability. So while necessary, computing the significance of models’ performance differences has many levels of complexity. It can be tedious, especially when the experimental design has many conditions to compare and several runs of experiments. We present BooStSa, a tool that makes it easy to compute significance levels with the BOOtSTrap SAmpling procedure to evaluate models that predict not only standard hard labels but soft-labels (i.e., probability distributions over different classes) as well. @@ -10205,7 +10205,7 @@ in the Case of Unambiguous Gender RevanthGangi Reddy ZiqiWang Yi-shyuanChiang - TuanLai + TuanLai PengfeiYu ZixuanZhang HengJi @@ -10300,7 +10300,7 @@ in the Case of Unambiguous Gender Cue-bot: A Conversational Agent for Assistive Technology ShachiH Kumar HsuanSu - RameshManuvinakurike + RameshManuvinakurike Maximilian C.Pinaroc SaiPrasad SauravSahay @@ -10398,7 +10398,7 @@ in the Case of Unambiguous Gender DanielLoureiro FrancescoBarbieri LeonardoNeves - LuisEspinosa Anke + LuisEspinosa Anke JoseCamacho-collados 251-260 Despite its importance, the time variable has been largely neglected in the NLP and language model literature. In this paper, we present TimeLMs, a set of language models specialized on diachronic Twitter data. We show that a continual learning strategy contributes to enhancing Twitter-based language models’ capacity to deal with future and out-of-distribution tweets, while making them competitive with standardized and more monolithic benchmarks. We also perform a number of qualitative analyses showing how they cope with trends and peaks in activity involving specific named entities or concept drift. TimeLMs is available at github.com/cardiffnlp/timelms. @@ -10423,7 +10423,7 @@ in the Case of Unambiguous Gender <fixed-case>Q</fixed-case>uick<fixed-case>G</fixed-case>raph: A Rapid Annotation Tool for Knowledge Graph Extraction from Technical Text TylerBikaun MichaelStewart - WeiLiu + WeiLiu 270-278 Acquiring high-quality annotated corpora for complex multi-task information extraction (MT-IE) is an arduous and costly process for human-annotators. Adoption of unsupervised techniques for automated annotation have thus become popular. However, these techniques rely heavily on dictionaries, gazetteers, and knowledge bases. While such resources are abundant for general domains, they are scarce for specialised technical domains. To tackle this challenge, we present QuickGraph, the first collaborative MT-IE annotation tool built with indirect weak supervision and clustering to maximise annotator productivity. QuickGraph’s main contribution is a set of novel features that enable knowledge graph extraction through rapid and consistent complex multi-task entity and relation annotation. In this paper, we discuss these key features and qualitatively compare QuickGraph to existing annotation tools. 2022.acl-demo.27 @@ -10436,7 +10436,7 @@ in the Case of Unambiguous Gender Proceedings of the 60th Annual Meeting of the Association for Computational Linguistics: Tutorial Abstracts LucianaBenotti - NaoakiOkazaki + NaoakiOkazaki YvesScherrer MarcosZampieri Association for Computational Linguistics @@ -10452,7 +10452,7 @@ in the Case of Unambiguous Gender A Gentle Introduction to Deep Nets and Opportunities for the Future - KennethChurch + KennethChurch ValiaKordoni GaryMarcus ErnestDavis @@ -10489,7 +10489,7 @@ in the Case of Unambiguous Gender ChenguangZhu YichongXu XiangRen - Bill YuchenLin + Bill YuchenLin MengJiang WenhaoYu 12-20 @@ -10517,7 +10517,7 @@ in the Case of Unambiguous Gender Learning with Limited Text Data DiyiYang - AnkurParikh + AnkurParikh ColinRaffel 28-31 Natural Language Processing (NLP) has achieved great progress in the past decade on the basis of neural models, which often make use of large amounts of labeled data to achieve state-of-the-art performance. The dependence on labeled data prevents NLP models from being applied to low-resource settings and languages because of the time, money, and expertise that is often required to label massive amounts of textual data. Consequently, the ability to learn with limited labeled data is crucial for deploying neural systems to real-world NLP applications. Recently, numerous approaches have been explored to alleviate the need for labeled data in NLP such as data augmentation and semi-supervised learning. This tutorial aims to provide a systematic and up-to-date overview of these methods in order to help researchers and practitioners understand the landscape of approaches and the challenges associated with learning from limited labeled data, an emerging topic in the computational linguistics community. We will consider applications to a wide variety of NLP tasks (including text classification, generation, and structured prediction) and will highlight current challenges and future directions. diff --git a/data/xml/2022.alta.xml b/data/xml/2022.alta.xml index 34f96f2750..9e77f8b48b 100644 --- a/data/xml/2022.alta.xml +++ b/data/xml/2022.alta.xml @@ -26,7 +26,7 @@ Using public domain resources and off-the-shelf tools to produce high-quality multimedia texts - MannyRayner + MannyRayner BelindaChiera CathyChua 6–15 @@ -48,7 +48,7 @@ FatemehShiri TongtongWu YuanfangLi - GholamrezaHaffari + GholamrezaHaffari 22–30 2022.alta-1.4 shiri-etal-2022-tcg @@ -57,7 +57,7 @@ Complex Reading Comprehension Through Question Decomposition Xiao-YuGuo Yuan-FangLi - GholamrezaHaffari + GholamrezaHaffari 31–40 2022.alta-1.5 guo-etal-2022-complex @@ -84,7 +84,7 @@ Robustness of Hybrid Models in Cross-domain Readability Assessment Ho HungLim TianyuanCai - John S. Y.Lee + John S. Y.Lee MeichunLiu 62–67 2022.alta-1.8 @@ -102,7 +102,7 @@ JinghuiLiu DanielCapurro AnthonyNguyen - KarinVerspoor + KarinVerspoor 73–83 2022.alta-1.10 liu-etal-2022-improving @@ -127,7 +127,7 @@ Stability of Forensic Text Comparison System - SusanBrown + SusanBrown ShunichiIshihara 98–106 2022.alta-1.13 @@ -154,7 +154,7 @@ Automatic Explanation Generation For Climate Science Claims RuiXing ShraeyBhatia - TimothyBaldwin + TimothyBaldwin Jey HanLau 122–129 Climate change is an existential threat to humanity, the proliferation of unsubstantiated claims relating to climate science is manipulating public perception, motivating the need for fact-checking in climate science. In this work, we draw on recent work that uses retrieval-augmented generation for veracity prediction and explanation generation, in framing explanation generation as a query-focused multi-document summarization task. We adapt PRIMERA to the climate science domain by adding additional global attention on claims. Through automatic evaluation and qualitative analysis, we demonstrate that our method is effective at generating explanations. @@ -173,7 +173,7 @@ Evaluating the Examiner: The Perils of <fixed-case>P</fixed-case>earson Correlation for Validating Text Similarity Metrics GiselaVallejo - TimothyBaldwin + TimothyBaldwin LeaFrermann 130–138 In recent years, researchers have developed question-answering based approaches to automatically evaluate system summaries, reporting improved validity compared to word overlap-based metrics like ROUGE, in terms of correlation with human ratings of criteria including fluency and hallucination. In this paper, we take a closer look at one particular metric, QuestEval, and ask whether: (1) it can serve as a more general metric for long document similarity assessment; and (2) a single correlation score between metric scores and human ratings, as the currently standard approach, is sufficient for metric validation. We find that correlation scores can be misleading, and that score distributions and outliers should be taken into account. With these caveats in mind, QuestEval can be a promising candidate for long document similarity assessment. @@ -235,7 +235,7 @@ Overview of the 2022 <fixed-case>ALTA</fixed-case> Shared task: <fixed-case>PIBOSO</fixed-case> sentence classification, 10 years later - DiegoMollá + DiegoMollá 178–182 The 2022 ALTA shared task has been running annually since 2010. This year, the shared task is a re-visit of the 2012 ALTA shared task. The purpose of this task is to classify sentences of medical publications using the PIBOSO taxonomy. This is a multi-label classification task which can help medical researchers and practitioners conduct Evidence Based Medicine (EBM). In this paper we present the task, the evaluation criteria, and the results of the systems participating in the shared task. 2022.alta-1.24 diff --git a/data/xml/2022.amta.xml b/data/xml/2022.amta.xml index 09f27f45f5..98023a207c 100644 --- a/data/xml/2022.amta.xml +++ b/data/xml/2022.amta.xml @@ -8,7 +8,7 @@ September 2022 KevinDuh - FranciscoGuzmán + FranciscoGuzmán 2022.amta-research amta @@ -21,7 +21,7 @@ PintuLohar SineadMadden EdmondO’Connor - MajaPopovic + MajaPopovic TanyaHabruseva 1-13 2022.amta-research.1 @@ -32,7 +32,7 @@ Domain-Specific Text Generation for Machine Translation YasminMoslem RejwanulHaque - JohnKelleher + JohnKelleher AndyWay 14-30 2022.amta-research.2 @@ -64,7 +64,7 @@ MohamedAfify Young JinKim HitokazuMatsushita - HanyHassan + HanyHassan 58-69 2022.amta-research.5 Multilingual Neural Machine Translation has been showing great success using transformer models. Deploying these models is challenging because they usually require large vocabulary (vocab) sizes for various languages. This limits the speed of predicting the output tokens in the last vocab projection layer. To alleviate these challenges, this paper proposes a fast vocabulary projection method via clustering which can be used for multilingual transformers on GPUs. First, we offline split the vocab search space into disjoint clusters given the hidden context vector of the decoder output, which results in much smaller vocab columns for vocab projection. Second, at inference time, the proposed method predicts the clusters and candidate active tokens for hidden context vectors at the vocab projection. This paper also includes analysis of different ways of building these clusters in multilingual settings. Our results show end-to-end speed gains in float16 GPU inference up to 25% while maintaining the BLEU score and slightly increasing memory cost. The proposed method speeds up the vocab projection step itself by up to 2.6x. We also conduct an extensive human evaluation to verify the proposed method preserves the quality of the translations from the original model. @@ -72,11 +72,11 @@ Language Tokens: A Frustratingly Simple Approach Improves Zero-Shot Performance of Multilingual Translation - MuhammadElNokrashy + MuhammadElNokrashy AmrHendy MohamedMaher MohamedAfify - HanyHassan Awadalla + HanyHassan Awadalla 70-82 2022.amta-research.6 This paper proposes a simple yet effective method to improve direct (X-to-Y) translation for both cases: zero-shot and when direct data is available. We modify the input tokens at both the encoder and decoder to include signals for the source and target languages. We show a performance gain when training from scratch, or finetuning a pretrained model with the pro- posed setup. In the experiments, our method shows nearly 10.0 BLEU points gain on in-house datasets depending on the checkpoint selection criteria. In a WMT evaluation campaign, From- English performance improves by 4.17 and 2.87 BLEU points, in the zero-shot setting, and when direct data is available for training, respectively. While X-to-Y improves by 1.29 BLEU over the zero-shot baseline, and 0.44 over the many-to-many baseline. In the low-resource setting, we see a 1.5 ∼ 1.7 point improvement when finetuning on X-to-Y domain data. @@ -122,7 +122,7 @@ On the Effectiveness of Quasi Character-Level Models for Machine Translation SalvadorCarrión - FranciscoCasacuberta + FranciscoCasacuberta 131-143 2022.amta-research.10 Neural Machine Translation (NMT) models often use subword-level vocabularies to deal with rare or unknown words. Although some studies have shown the effectiveness of purely character-based models, these approaches have resulted in highly expensive models in computational terms. In this work, we explore the benefits of quasi-character-level models for very low-resource languages and their ability to mitigate the effects of the catastrophic forgetting problem. First, we conduct an empirical study on the efficacy of these models, as a function of the vocabulary and training set size, for a range of languages, domains, and architectures. Next, we study the ability of these models to mitigate the effects of catastrophic forgetting in machine translation. Our work suggests that quasi-character-level models have practically the same generalization capabilities as character-based models but at lower computational costs. Furthermore, they appear to help achieve greater consistency between domains than standard subword-level models, although the catastrophic forgetting problem is not mitigated. @@ -153,7 +153,7 @@ Limitations and Challenges of Unsupervised Cross-lingual Pre-training MartínQuesada Zaragoza - FranciscoCasacuberta + FranciscoCasacuberta 175-187 2022.amta-research.13 Cross-lingual alignment methods for monolingual language representations have received notable attention in recent years. However, their use in machine translation pre-training remains scarce. This work tries to shed light on the effects of some of the factors that play a role in cross-lingual pre-training, both for cross-lingual mappings and their integration in supervised neural models. The results show that unsupervised cross-lingual methods are effective at inducing alignment even for distant languages and they benefit noticeably from subword information. However, we find that their effectiveness as pre-training models in machine translation is severely limited due to their cross-lingual signal being easily distorted by the principal network during training. Moreover, the learned bilingual projection is too restrictive to allow said network to learn properly when the embedding weights are frozen. @@ -162,7 +162,7 @@ Few-Shot Regularization to Tackle Catastrophic Forgetting in Multilingual Machine Translation SalvadorCarrión - FranciscoCasacuberta + FranciscoCasacuberta 188-199 2022.amta-research.14 Increasing the number of tasks supported by a machine learning model without forgetting previously learned tasks is the goal of any lifelong learning system. In this work, we study how to mitigate the effects of the catastrophic forgetting problem to sequentially train a multilingual neural machine translation model using minimal past information. First, we describe the catastrophic forgetting phenomenon as a function of the number of tasks learned (language pairs) and the ratios of past data used during the learning of the new task. Next, we explore the importance of applying oversampling strategies for scenarios where only minimal amounts of past data are available. Finally, we derive a new loss function that minimizes the forgetting of previously learned tasks by actively re-weighting past samples and penalizing weights that deviate too much from the original model. Our work suggests that by using minimal amounts of past data and a simple regularization function, we can significantly mitigate the effects of the catastrophic forgetting phenomenon without increasing the computational costs. @@ -188,9 +188,9 @@ Refining an Almost Clean Translation Memory Helps Machine Translation ShivendraBhardwa DavidAlfonso-Hermelo - PhilippeLanglais + PhilippeLanglais GabrielBernier-Colborne - CyrilGoutte + CyrilGoutte MichelSimard 215-226 2022.amta-research.16 @@ -200,8 +200,8 @@ Practical Attacks on Machine Translation using Paraphrase Elizabeth MMerkhofer - JohnHenderson - AbigailGertner + JohnHenderson + AbigailGertner MichaelDoyle LilyWong 227-239 @@ -222,7 +222,7 @@ A Neural Machine Translation Approach to Translate Text to Pictographs in a Medical Speech Translation System - The <fixed-case>B</fixed-case>abel<fixed-case>D</fixed-case>r Use Case JonathanMutal - PierretteBouillon + PierretteBouillon MagaliNorré JohannaGerlach Lucia OrmaecheaGrijalba @@ -282,7 +282,7 @@ CynthiaGao JaniceLam FranciscoGuzman - MonaDiab + MonaDiab PhilippKoehn 309-321 2022.amta-research.24 @@ -400,7 +400,7 @@ A Multimodal Simultaneous Interpretation Prototype: Who Said What XiaolinWang MasaoUtiyama - EiichiroSumita + EiichiroSumita 132-143 2022.amta-upg.10 “Who said what” is essential for users to understand video streams that have more than one speaker, but conventional simultaneous interpretation systems merely present “what was said” in the form of subtitles. Because the translations unavoidably have delays and errors, users often find it difficult to trace the subtitles back to speakers. To address this problem, we propose a multimodal SI system that presents users “who said what”. Our system takes audio-visual approaches to recognize the speaker of each sentence, and then annotates its translation with the textual tag and face icon of the speaker, so that users can quickly understand the scenario. Furthermore, our system is capable of interpreting video streams in real-time on a single desktop equipped with two Quadro RTX 4000 GPUs owing to an efficient sentence-based architecture. @@ -473,7 +473,7 @@ Craig AStewart MadalenaGonçalves MariannaBuchicchio - AlonLavie + AlonLavie 231-256 Frameworks such as Multidimensional Quality Metrics (MQM) provide detailed feedback on translation quality and can pinpoint concrete linguistic errors. The quality of a translation is, however, also closely tied to its utility in a particular use case. Many customers have highly subjective expectations of translation quality. Features such as register, discourse style and brand consistency can be difficult to accommodate given a broadly applied translation solution. In this presentation we will introduce the concept of Business Critical Errors (BCE). Adapted from MQM, the BCE framework provides a perspective on translation quality that allows us to be reactive and adaptive to expectation whilst also maintaining consistency in our translation evaluation. We will demonstrate tooling used at Unbabel that allows us to evaluate the performance of our MT models on BCE using specialized test suites as well as the ability of our AI evaluation models to successfully capture BCE information. 2022.amta-upg.17.Presentation.pdf @@ -500,7 +500,7 @@ Boosting Neural Machine Translation with Similar Translations JitaoXu - JosepCrego + JosepCrego JeanSenellart 282-292 2022.amta-upg.20 @@ -637,7 +637,7 @@ Robust Translation of <fixed-case>F</fixed-case>rench Live Speech Transcripts EliseBertin-Lemée GuillaumeKlein - JosepCrego + JosepCrego JeanSenellart 455-464 2022.amta-upg.32 @@ -646,9 +646,9 @@ Speech-to-Text and Evaluation of Multiple Machine Translation Systems - EvelyneTzoukermann + EvelyneTzoukermann StevenVan Guilder - JenniferDoyon + JenniferDoyon EkaterinaHarke 465-472 2022.amta-upg.33 @@ -660,15 +660,14 @@ Proceedings of the 15th biennial conference of the Association for Machine Translation in the Americas (Workshop 2: Corpus Generation and Corpus Augmentation for Machine Translation) Association for Machine Translation in the Americas -
September 2022 John E.Ortega MarineCarpuat WilliamChen - KatharinaKann + KatharinaKann ConstantineLignos - MajaPopovic + MajaPopovic ShabnamTafreshi 2022.amta-coco4mt amta @@ -703,7 +702,7 @@ Building and Analysis of <fixed-case>T</fixed-case>amil Lyric Corpus with Semantic Representation KarthikaRanganathan - GeethaT V + GeethaT V 18-27 2022.amta-coco4mt.3 In the new era of modern technology, the cloud has become the library for many things including entertainment, i.e, the availability of lyrics. In order to create awareness about the language and to increase the interest in Tamil film lyrics, a computerized electronic format of Tamil lyrics corpus is necessary for mining the lyric documents. In this paper, the Tamil lyric corpus was collected from various books and lyric websites. Here, we also address the challenges faced while building this corpus. A corpus was created with 15286 documents and stored all the lyric information obtained in the XML format. In this paper, we also explained the Universal Networking Language (UNL) semantic representation that helps to represent the document in a language and domain independent ways. We evaluated this corpus by performing simple statistical analysis for characters, words and a few rhetorical effect analysis. We also evaluated our semantic representation with the existing work and the results are very encouraging. diff --git a/data/xml/2022.argmining.xml b/data/xml/2022.argmining.xml index 7969d577b4..66791b0876 100644 --- a/data/xml/2022.argmining.xml +++ b/data/xml/2022.argmining.xml @@ -23,7 +23,7 @@ ZhexiongLiu MeiqiGuo YueDai - DianeLitman + DianeLitman 1–18 The growing interest in developing corpora of persuasive texts has promoted applications in automated systems, e.g., debating and essay scoring systems; however, there is little prior work mining image persuasiveness from an argumentative perspective. To expand persuasiveness mining into a multi-modal realm, we present a multi-modal dataset, ImageArg, consisting of annotations of image persuasiveness in tweets. The annotations are based on a persuasion taxonomy we developed to explore image functionalities and the means of persuasion. We benchmark image persuasiveness tasks on ImageArg using widely-used multi-modal learning methods. The experimental results show that our dataset offers a useful resource for this rich and challenging topic, and there is ample room for modeling improvement. 2022.argmining-1.1 @@ -35,7 +35,7 @@ MoritzPlenz JuriOpitz AnetteFrank - PhilippCimiano + PhilippCimiano 19–33 We address the problem of automatically predicting the quality of a conclusion given a set of (textual) premises of an argument, focusing in particular on the task of predicting the validity and novelty of the argumentative conclusion. We propose a multi-task approach that jointly predicts the validity and novelty of the textual conclusion, relying on pre-trained language models fine-tuned on the task. As training data for this task is scarce and costly to obtain, we experimentally investigate the impact of data augmentation approaches for improving the accuracy of prediction compared to a baseline that relies on task-specific data only. We consider the generation of synthetic data as well as the integration of datasets from related argument tasks. We show that especially our synthetic data, combined with class-balancing and instance-specific learning rates, substantially improves classification results (+15.1 points in F_1-score). Using only training data retrieved from related datasets by automatically labeling them for validity and novelty, combined with synthetic data, outperforms the baseline by 11.5 points in F_1-score. 2022.argmining-1.2 @@ -55,7 +55,7 @@ Analyzing Culture-Specific Argument Structures in Learner Essays Wei-FanChen - Mei-HuaChen + Mei-HuaChen GarimaMudgal HenningWachsmuth 51–61 @@ -76,7 +76,7 @@ A Unified Representation and a Decoupled Deep Learning Architecture for Argumentation Mining of Students’ Persuasive Essays Muhammad TawsifSazid - Robert E.Mercer + Robert E.Mercer 74–83 We develop a novel unified representation for the argumentation mining task facilitating the extracting from text and the labelling of the non-argumentative units and argumentation components—premises, claims, and major claims—and the argumentative relations—premise to claim or premise in a support or attack relation, and claim to major-claim in a for or against relation—in an end-to-end machine learning pipeline. This tightly integrated representation combines the component and relation identification sub-problems and enables a unitary solution for detecting argumentation structures. This new representation together with a new deep learning architecture composed of a mixed embedding method, a multi-head attention layer, two biLSTM layers, and a final linear layer obtain state-of-the-art accuracy on the Persuasive Essays dataset. Also, we have introduced a decoupled solution to identify the entities and relations first, and on top of that, a second model is used to detect distance between the detected related components. An augmentation of the corpus (paragraph version) by including copies of major claims has further increased the performance. 2022.argmining-1.6 @@ -88,7 +88,7 @@ AnetteFrank JuriOpitz MoritzPlenz - PhilippCimiano + PhilippCimiano 84–94 This paper provides an overview of the Argument Validity and Novelty Prediction Shared Task that was organized as part of the 9th Workshop on Argument Mining (ArgMining 2022). The task focused on the prediction of the validity and novelty of a conclusion given a textual premise. Validity is defined as the degree to which the conclusion is justified with respect to the given premise. Novelty defines the degree to which the conclusion contains content that is new in relation to the premise. Six groups participated in the task, submitting overall 13 system runs for the subtask of binary classification and 2 system runs for the subtask of relative classification. The results reveal that the task is challenging, with best results obtained for Validity prediction in the range of 75% F1 score, for Novelty prediction of 70% F1 score and for correctly predicting both Validity and Novelty of 45% F1 score. In this paper we summarize the task definition and dataset. We give an overview of the results obtained by the participating systems, as well as insights to be gained from the diverse contributions. 2022.argmining-1.7 @@ -149,7 +149,7 @@ Predicting the Presence of Reasoning Markers in Argumentative Text JonathanClayton - RobGaizauskas + RobGaizauskas 137–142 This paper proposes a novel task in Argument Mining, which we will refer to as Reasoning Marker Prediction. We reuse the popular Persuasive Essays Corpus (Stab and Gurevych, 2014). Instead of using this corpus for Argument Structure Parsing, we use a simple heuristic method to identify text spans which we can identify as reasoning markers. We propose baseline methods for predicting the presence of these reasoning markers automatically, and make a script to generate the data for the task publicly available. 2022.argmining-1.13 @@ -205,7 +205,7 @@ Entity-based Claim Representation Improves Fact-Checking of Medical Content in Tweets - AmelieWührl + AmelieWührl RomanKlinger 187–198 False medical information on social media poses harm to people’s health. While the need for biomedical fact-checking has been recognized in recent years, user-generated medical content has received comparably little attention. At the same time, models for other text genres might not be reusable, because the claims they have been trained with are substantially different. For instance, claims in the SciFact dataset are short and focused: “Side effects associated with antidepressants increases risk of stroke”. In contrast, social media holds naturally-occurring claims, often embedded in additional context: "‘If you take antidepressants like SSRIs, you could be at risk of a condition called serotonin syndrome’ Serotonin syndrome nearly killed me in 2010. Had symptoms of stroke and seizure.” This showcases the mismatch between real-world medical claims and the input that existing fact-checking systems expect. To make user-generated content checkable by existing models, we propose to reformulate the social-media input in such a way that the resulting claim mimics the claim characteristics in established datasets. To accomplish this, our method condenses the claim with the help of relational entity information and either compiles the claim out of an entity-relation-entity triple or extracts the shortest phrase that contains these elements. We show that the reformulated input improves the performance of various fact-checking models as opposed to checking the tweet text in its entirety. diff --git a/data/xml/2022.autosimtrans.xml b/data/xml/2022.autosimtrans.xml index d4e217f71a..404d62ec93 100644 --- a/data/xml/2022.autosimtrans.xml +++ b/data/xml/2022.autosimtrans.xml @@ -37,7 +37,7 @@ Over-Generation Cannot Be Rewarded: Length-Adaptive Average Lagging for Simultaneous Speech Translation SaraPapi MarcoGaido - MatteoNegri + MatteoNegri MarcoTurchi 12-17 Simultaneous speech translation (SimulST) systems aim at generating their output with the lowest possible latency, which is normally computed in terms of Average Lagging (AL). In this paper we highlight that, despite its widespread adoption, AL provides underestimated scores for systems that generate longer predictions compared to the corresponding references. We also show that this problem has practical relevance, as recent SimulST systems have indeed a tendency to over-generate. As a solution, we propose LAAL (Length-Adaptive Average Lagging), a modified version of the metric that takes into account the over-generation phenomenon and allows for unbiased evaluation of both under-/over-generating systems. diff --git a/data/xml/2022.bea.xml b/data/xml/2022.bea.xml index fe3621b1f2..54429c05dd 100644 --- a/data/xml/2022.bea.xml +++ b/data/xml/2022.bea.xml @@ -144,7 +144,7 @@ BoweiZou PengfeiLi LiangmingPan - Ai TiAw + Ai TiAw 61-70 In field of teaching, true/false questioning is an important educational method for assessing students’ general understanding of learning materials. Manually creating such questions requires extensive human effort and expert knowledge. Question Generation (QG) technique offers the possibility to automatically generate a large number of questions. However, there is limited work on automatic true/false question generation due to the lack of training data and difficulty finding question-worthy content. In this paper, we propose an unsupervised True/False Question Generation approach (TF-QG) that automatically generates true/false questions from a given passage for reading comprehension test. TF-QG consists of a template-based framework that aims to test the specific knowledge in the passage by leveraging various NLP techniques, and a generative framework to generate more flexible and complicated questions by using a novel masking-and-infilling strategy. Human evaluation shows that our approach can generate high-quality and valuable true/false questions. In addition, simulated testing on the generated questions challenges the state-of-the-art inference models from NLI, QA, and fact verification tasks. 2022.bea-1.10 @@ -157,7 +157,7 @@ AbhijitSuresh JenniferJacobs MargaretPerkoff - James H.Martin + James H.Martin TamaraSumner 71-81 “Talk moves” are specific discursive strategies used by teachers and students to facilitate conversations in which students share their thinking, and actively consider the ideas of others, and engage in rich discussions. Experts in instructional practices often rely on cues to identify and document these strategies, for example by annotating classroom transcripts. Prior efforts to develop automated systems to classify teacher talk moves using transformers achieved a performance of 76.32% F1. In this paper, we investigate the feasibility of using enriched contextual cues to improve model performance. We applied state-of-the-art deep learning approaches for Natural Language Processing (NLP), including Robustly optimized bidirectional encoder representations from transformers (Roberta) with a special input representation that supports previous and subsequent utterances as context for talk moves classification. We worked with the publically available TalkMoves dataset, which contains utterances sourced from real-world classroom sessions (human- transcribed and annotated). Through a series of experimentations, we found that a combination of previous and subsequent utterances improved the transformers’ ability to differentiate talk moves (by 2.6% F1). These results constitute a new state of the art over previously published results and provide actionable insights to those in the broader NLP community who are working to develop similar transformer-based classification models. @@ -168,7 +168,7 @@ Cross-corpora experiments of automatic proficiency assessment and error detection for spoken <fixed-case>E</fixed-case>nglish StefanoBannò - MarcoMatassoni + MarcoMatassoni 82-91 The growing demand for learning English as a second language has led to an increasing interest in automatic approaches for assessing spoken language proficiency. One of the most significant challenges in this field is the lack of publicly available annotated spoken data. Another common issue is the lack of consistency and coherence in human assessment. To tackle both problems, in this paper we address the task of automatically predicting the scores of spoken test responses of English-as-a-second-language learners by training neural models on written data and using the presence of grammatical errors as a feature, as they can be considered consistent indicators of proficiency through their distribution and frequency. Specifically, we train a feature extractor on EFCAMDAT, a large written corpus containing error annotations and proficiency levels assigned by human experts, in order to extract information related to grammatical errors and, in turn, we use the resulting model for inference on the CLC-FCE corpus, on the ICNALE corpus, and on the spoken section of the TLT-school corpus, a collection of proficiency tests taken by Italian students. The work investigates the impact of the feature extractor on spoken proficiency assessment as well as the written-to-spoken approach. We find that our error-based approach can be beneficial for assessing spoken proficiency. The results obtained on the considered datasets are discussed and evaluated with appropriate metrics. 2022.bea-1.12 @@ -250,8 +250,8 @@ Assessing sentence readability for <fixed-case>G</fixed-case>erman language learners with broad linguistic modeling or readability formulas: When do linguistic insights make a difference? - ZarahWeiss - DetmarMeurers + ZarahWeiss + DetmarMeurers 141-153 We present a new state-of-the-art sentence-wise readability assessment model for German L2 readers. We build a linguistically broadly informed machine learning model and compare its performance against four commonly used readability formulas. To understand when the linguistic insights used to inform our model make a difference for readability assessment and when simple readability formulas suffice, we compare their performance based on two common automatic readability assessment tasks: predictive regression and sentence pair ranking. We find that leveraging linguistic insights yields top performances across tasks, but that for the identification of simplified sentences also readability formulas – which are easier to compute and more accessible – can be sufficiently precise. Linguistically informed modeling, however, is the only viable option for high quality outcomes in fine-grained prediction tasks. We then explore the sentence-wise readability profile of leveled texts written for language learners at a beginning, intermediate, and advanced level of German to showcase the valuable insights that sentence-wise readability assessment can have for the adaptation of learning materials and better understand how sentences’ individual readability contributes to larger texts’ overall readability. 2022.bea-1.19 @@ -261,7 +261,7 @@ Parametrizable exercise generation from authentic texts: Effectively targeting the language means on the curriculum TanjaHeck - DetmarMeurers + DetmarMeurers 154-166 We present a parametrizable approach to exercise generation from authentic texts that addresses the need for digital materials designed to practice the language means on the curriculum in a real-life school setting. The tool builds on a language-aware searchengine that helps identify attractive texts rich in the language means to be practiced. Making use of state-of-the-art NLP, the relevant learning targets are identified and transformed intoexercise items embedded in the original context. While the language-aware search engine ensures that these contexts match the learner‘s interests based on the search term used, and the linguistic parametrization of the system then reranks the results to prioritize texts that richly represent the learning targets, for theexercise generation to proceed on this basis, an interactive configuration panel allows users to adjust exercise complexity through a range of parameters specifying both properties of thesource sentences and of the exercises. An evaluation of exercises generated from web documents for a representative sample of language means selected from the English curriculum of 7th grade in German secondary school showed that the ombination of language-aware search and exercise generationsuccessfully facilitates the process of generating exercises from authentic texts that support practice of the pedagogical targets. 2022.bea-1.20 @@ -318,7 +318,7 @@ JamesFiacco ShiyanJiang DavidAdamson - CarolynRosé + CarolynRosé 204-215 Providing effective automatic essay feedback is necessary for offering writing instruction at a massive scale. In particular, feedback for promoting coherent flow of ideas in essays is critical. In this paper we propose a state-of-the-art method for automated analysis of structure and flow of writing, referred to as Rhetorical Structure Theory (RST) parsing. In so doing, we lay a foundation for a generalizable approach to automated writing feedback related to structure and flow. We address challenges in automated rhetorical analysis when applied to student writing and evaluate our novel RST parser model on both a recent student writing dataset and a standard benchmark RST parsing dataset. 2022.bea-1.25 @@ -344,7 +344,7 @@ ZidMancenido JingLiu HeatherHill - DanJurafsky + DanJurafsky 224-233 2022.bea-1.27 2022.bea-1.27.attachment.zip @@ -371,7 +371,7 @@ JasdeepSingh KatherineGoodman JeanHertzberg - KatharinaKann + KatharinaKann 250-261 Recent advances in natural language processing (NLP) have greatly helped educational applications, for both teachers and students. In higher education, there is great potential to use NLP tools for advancing pedagogical research. In this paper, we focus on how NLP can help understand student experiences in engineering, thus facilitating engineering educators to carry out large scale analysis that is helpful for re-designing the curriculum. Here, we introduce a new task we call response construct tagging (RCT), in which student responses to tailored survey questions are automatically tagged for six constructs measuring transformative experiences and engineering identity of students. We experiment with state-of-the-art classification models for this task and investigate the effects of different sources of additional information. Our best model achieves an F1 score of 48. We further investigate multi-task training on the related task of sentiment classification, which improves our model’s performance to 55 F1. Finally, we provide a detailed qualitative analysis of model performance. 2022.bea-1.29 @@ -393,7 +393,7 @@ Incremental Disfluency Detection for Spoken Learner <fixed-case>E</fixed-case>nglish LucySkidmore - RogerMoore + RogerMoore 272-278 Incremental disfluency detection provides a framework for computing communicative meaning from hesitations, repetitions and false starts commonly found in speech. One application of this area of research is in dialogue-based computer-assisted language learning (CALL), where detecting learners’ production issues word-by-word can facilitate timely and pedagogically driven responses from an automated system. Existing research on disfluency detection in learner speech focuses on disfluency removal for subsequent downstream tasks, processing whole utterances non-incrementally. This paper instead explores the application of laughter as a feature for incremental disfluency detection and shows that when combined with silence, these features reduce the impact of learner errors on model precision as well as lead to an overall improvement of model performance. This work adds to the growing body of research incorporating laughter as a feature for dialogue processing tasks and provides further support for the application of multimodality in dialogue-based CALL systems. 2022.bea-1.31 diff --git a/data/xml/2022.bigscience.xml b/data/xml/2022.bigscience.xml index d36bee26c7..c526ee301f 100644 --- a/data/xml/2022.bigscience.xml +++ b/data/xml/2022.bigscience.xml @@ -4,7 +4,7 @@ Proceedings of BigScience Episode #5 -- Workshop on Challenges & Perspectives in Creating Large Language Models AngelaFan - SuzanaIlic + SuzanaIlic ThomasWolf MatthiasGallé Association for Computational Linguistics @@ -39,11 +39,11 @@ Using <fixed-case>ASR</fixed-case>-Generated Text for Spoken Language Modeling NicolasHervé ValentinPelloin - BenoitFavre + BenoitFavre FranckDary AntoineLaurent SylvainMeignier - LaurentBesacier + LaurentBesacier 17-25 This papers aims at improving spoken language modeling (LM) using very large amount of automatically transcribed speech. We leverage the INA (French National Audiovisual Institute) collection and obtain 19GB of text after applying ASR on 350,000 hours of diverse TV shows. From this, spoken language models are trained either by fine-tuning an existing LM (FlauBERT) or through training a LM from scratch. The new models (FlauBERT-Oral) will be shared with the community and are evaluated not only in terms of word prediction accuracy but also for two downstream tasks : classification of TV shows and syntactic parsing of speech. Experimental results show that FlauBERT-Oral is better than its initial FlauBERT version demonstrating that, despite its inherent noisy nature, ASR-Generated text can be useful to improve spoken language modeling. 2022.bigscience-1.2 @@ -53,16 +53,16 @@ You reap what you sow: On the Challenges of Bias Evaluation Under Multilingual Settings - ZeerakTalat - AurélieNévéol + ZeerakTalat + AurélieNévéol StellaBiderman - MirunaClinciu + MirunaClinciu MananDey ShayneLongpre SashaLuccioni MaraimMasoud MargaretMitchell - DragomirRadev + DragomirRadev ShanyaSharma ArjunSubramonian JaesungTae @@ -124,7 +124,7 @@ ChristopherAkiki JavierDe La Rosa ClémentineFourrier - EnriqueManjavacas + EnriqueManjavacas StefanSchweter DanielVan Strien 75-83 @@ -197,7 +197,7 @@ Emergent Structures and Training Dynamics in Large Language Models RyanTeehan - MirunaClinciu + MirunaClinciu OlegSerikov ElizaSzczechla NatashaSeelam diff --git a/data/xml/2022.bionlp.xml b/data/xml/2022.bionlp.xml index b2338aa284..e1b9a7a833 100644 --- a/data/xml/2022.bionlp.xml +++ b/data/xml/2022.bionlp.xml @@ -4,9 +4,9 @@ Proceedings of the 21st Workshop on Biomedical Language Processing DinaDemner-Fushman - Kevin BretonnelCohen + Kevin BretonnelCohen SophiaAnaniadou - JunichiTsujii + JunichiTsujii Association for Computational Linguistics
Dublin, Ireland
May @@ -72,7 +72,7 @@ Zero-Shot Aspect-Based Scientific Document Summarization using Self-Supervised Pre-training AmirSoleimani VassilinaNikoulina - BenoitFavre + BenoitFavre SalahAit Mokhtar 49–62 We study the zero-shot setting for the aspect-based scientific document summarization task. Summarizing scientific documents with respect to an aspect can remarkably improve document assistance systems and readers experience. However, existing large-scale datasets contain a limited variety of aspects, causing summarization models to over-fit to a small set of aspects and a specific domain. We establish baseline results in zero-shot performance (over unseen aspects and the presence of domain shift), paraphrasing, leave-one-out, and limited supervised samples experimental setups. We propose a self-supervised pre-training approach to enhance the zero-shot performance. We leverage the PubMed structured abstracts to create a biomedical aspect-based summarization dataset. Experimental results on the PubMed and FacetSum aspect-based datasets show promising performance when the model is pre-trained using unlabelled in-domain data. @@ -109,7 +109,7 @@ Automatic Biomedical Term Clustering by Learning Fine-grained Term Representations SihangZeng - ZhengYuan + ZhengYuan ShengYu 91–96 Term clustering is important in biomedical knowledge graph construction. Using similarities between terms embedding is helpful for term clustering. State-of-the-art term embeddings leverage pretrained language models to encode terms, and use synonyms and relation knowledge from knowledge graphs to guide contrastive learning. These embeddings provide close embeddings for terms belonging to the same concept. However, from our probing experiments, these embeddings are not sensitive to minor textual differences which leads to failure for biomedical term clustering. To alleviate this problem, we adjust the sampling strategy in pretraining term embeddings by providing dynamic hard positive and negative samples during contrastive learning to learn fine-grained representations which result in better biomedical term clustering. We name our proposed method as CODER++, and it has been applied in clustering biomedical concepts in the newly released Biomedical Knowledge Graph named BIOS. @@ -121,7 +121,7 @@ <fixed-case>B</fixed-case>io<fixed-case>BART</fixed-case>: Pretraining and Evaluation of A Biomedical Generative Language Model HongyiYuan - ZhengYuan + ZhengYuan RuyiGan JiaxingZhang YutaoXie @@ -239,7 +239,7 @@ Intra-Template Entity Compatibility based Slot-Filling for Clinical Trial Information Extraction ChristianWitte - PhilippCimiano + PhilippCimiano 178–192 We present a deep learning based information extraction system that can extract the design and results of a published abstract describing a Randomized Controlled Trial (RCT). In contrast to other approaches, our system does not regard the PICO elements as flat objects or labels but as structured objects. We thus model the task as the one of filling a set of templates and slots; our two-step approach recognizes relevant slot candidates as a first step and assigns them to a corresponding template as second step, relying on a learned pairwise scoring function that models the compatibility of the different slot values. We evaluate the approach on a dataset of 211 manually annotated abstracts for type 2 Diabetes and Glaucoma, showing the positive impact of modelling intra-template entity compatibility. As main benefit, our approach yields a structured object for every RCT abstract that supports the aggregation and summarization of clinical trial results across published studies and can facilitate the task of creating a systematic review or meta-analysis. 2022.bionlp-1.18 @@ -256,7 +256,7 @@ JordiArmengol-Estapé JoaquínSilveira-Ocampo AlfonsoValencia - AitorGonzalez-Agirre + AitorGonzalez-Agirre MartaVillegas 193–199 This work presents the first large-scale biomedical Spanish language models trained from scratch, using large biomedical corpora consisting of a total of 1.1B tokens and an EHR corpus of 95M tokens. We compared them against general-domain and other domain-specific models for Spanish on three clinical NER tasks. As main results, our models are superior across the NER tasks, rendering them more convenient for clinical NLP applications. Furthermore, our findings indicate that when enough data is available, pre-training from scratch is better than continual pre-training when tested on clinical tasks, raising an exciting research question about which approach is optimal. Our models and fine-tuning scripts are publicly available at HuggingFace and GitHub. @@ -272,7 +272,7 @@ MorganWixted AlejandroGarcia-Rudolph CatalinaMartínez-Costa - GuenterNeumann + GuenterNeumann 200–211 Despite the advances in digital healthcare systems offering curated structured knowledge, much of the critical information still lies in large volumes of unlabeled and unstructured clinical texts. These texts, which often contain protected health information (PHI), are exposed to information extraction tools for downstream applications, risking patient identification. Existing works in de-identification rely on using large-scale annotated corpora in English, which often are not suitable in real-world multilingual settings. Pre-trained language models (LM) have shown great potential for cross-lingual transfer in low-resource settings. In this work, we empirically show the few-shot cross-lingual transfer property of LMs for named entity recognition (NER) and apply it to solve a low-resource and real-world challenge of code-mixed (Spanish-Catalan) clinical notes de-identification in the stroke domain. We annotate a gold evaluation dataset to assess few-shot setting performance where we only use a few hundred labeled examples for training. Our model improves the zero-shot F1-score from 73.7% to 91.2% on the gold evaluation set when adapting Multilingual BERT (mBERT) (CITATION) from the MEDDOCAN (CITATION) corpus with our few-shot cross-lingual target corpus. When generalized to an out-of-sample test set, the best model achieves a human-evaluation F1-score of 97.2%. 2022.bionlp-1.20 @@ -308,7 +308,7 @@ <fixed-case>B</fixed-case>io<fixed-case>C</fixed-case>ite: A Deep Learning-based Citation Linkage Framework for Biomedical Research Articles SudiptaSingha Roy - Robert E.Mercer + Robert E.Mercer 241–251 Research papers reflect scientific advances. Citations are widely used in research publications to support the new findings and show their benefits, while also regulating the information flow to make the contents clearer for the audience. A citation in a research article refers to the information’s source, but not the specific text span from that source article. In biomedical research articles, this task is challenging as the same chemical or biological component can be represented in multiple ways in different papers from various domains. This paper suggests a mechanism for linking citing sentences in a publication with cited sentences in referenced sources. The framework presented here pairs the citing sentence with all of the sentences in the reference text, and then tries to retrieve the semantically equivalent pairs. These semantically related sentences from the reference paper are chosen as the cited statements. This effort involves designing a citation linkage framework utilizing sequential and tree-structured siamese deep learning models. This paper also provides a method to create a synthetic corpus for such a task. 2022.bionlp-1.23 @@ -331,7 +331,7 @@ Overview of the <fixed-case>M</fixed-case>ed<fixed-case>V</fixed-case>id<fixed-case>QA</fixed-case> 2022 Shared Task on Medical Video Question-Answering - DeepakGupta + DeepakGupta DinaDemner-Fushman 264–274 In this paper, we present an overview of the MedVidQA 2022 shared task, collocated with the 21st BioNLP workshop at ACL 2022. The shared task addressed two of the challenges faced by medical video question answering: (I) a video classification task that explores new approaches to medical video understanding (labeling), and (ii) a visual answer localization task. Visual answer localization refers to the identification of the relevant temporal segments (start and end timestamps) in the video where the answer to the medical question is being shown or illustrated. A total of thirteen teams participated in the shared task challenges, with eleven system descriptions submitted to the workshop. The descriptions present monomodal and multi-modal approaches developed for medical video classification and visual answer localization. This paper describes the tasks, the datasets, evaluation metrics, and baseline systems for both tasks. Finally, the paper summarizes the techniques and results of the evaluation of the various approaches explored by the participating teams. @@ -385,7 +385,7 @@ Data Augmentation for Rare Symptoms in Vaccine Side-Effect Detection BosungKim - NdapaNakashole + NdapaNakashole 310–315 We study the problem of entity detection and normalization applied to patient self-reports of symptoms that arise as side-effects of vaccines. Our application domain presents unique challenges that render traditional classification methods ineffective: the number of entity types is large; and many symptoms are rare, resulting in a long-tail distribution of training examples per entity type. We tackle these challenges with an autoregressive model that generates standardized names of symptoms. We introduce a data augmentation technique to increase the number of training examples for rare symptoms. Experiments on real-life patient vaccine symptom self-reports show that our approach outperforms strong baselines, and that additional examples improve performance on the long-tail entities. 2022.bionlp-1.29 @@ -458,7 +458,7 @@ YingDing GregDurrett Justin F.Rousseau - YifanPeng + YifanPeng 359–368 Generating a summary from findings has been recently explored (Zhang et al., 2018, 2020) in note types such as radiology reports that typically have short length. In this work, we focus on echocardiogram notes that is longer and more complex compared to previous note types. We formally define the task of echocardiography conclusion generation (EchoGen) as generating a conclusion given the findings section, with emphasis on key cardiac findings. To promote the development of EchoGen methods, we present a new benchmark, which consists of two datasets collected from two hospitals. We further compare both standard and start-of-the-art methods on this new benchmark, with an emphasis on factual consistency. To accomplish this, we develop a tool to automatically extract concept-attribute tuples from the text. We then propose an evaluation metric, FactComp, to compare concept-attribute tuples between the human reference and generated conclusions. Both automatic and human evaluations show that there is still a significant gap between human-written and machine-generated conclusions on echo reports in terms of factuality and overall quality. 2022.bionlp-1.35 @@ -507,7 +507,7 @@ MatúšFalis HangDong AlexandraBirch - BeatriceAlex + BeatriceAlex 389–401 Medical document coding is the process of assigning labels from a structured label space (ontology – e.g., ICD-9) to medical documents. This process is laborious, costly, and error-prone. In recent years, efforts have been made to automate this process with neural models. The label spaces are large (in the order of thousands of labels) and follow a big-head long-tail label distribution, giving rise to few-shot and zero-shot scenarios. Previous efforts tried to address these scenarios within the model, leading to improvements on rare labels, but worse results on frequent ones. We propose data augmentation and synthesis techniques in order to address these scenarios. We further introduce an analysis technique for this setting inspired by confusion matrices. This analysis technique points to the positive impact of data augmentation and synthesis, but also highlights more general issues of confusion within families of codes, and underprediction. 2022.bionlp-1.39 diff --git a/data/xml/2022.blackboxnlp.xml b/data/xml/2022.blackboxnlp.xml index 356f501d57..e0c4ec1f63 100644 --- a/data/xml/2022.blackboxnlp.xml +++ b/data/xml/2022.blackboxnlp.xml @@ -46,7 +46,7 @@ Where’s the Learning in Representation Learning for Compositional Semantics and the Case of Thematic Fit MughilanMuthupari SamratHalder - AsadSayeed + AsadSayeed YuvalMarton 28-39 Observing that for certain NLP tasks, such as semantic role prediction or thematic fit estimation, random embeddings perform as well as pre-trained embeddings, we explore what settings allow for this, and examine where most of the learning is encoded: the word embeddings, the semantic role embeddings, or “the network”. We find nuanced answers, depending on the task and its relation to the training objective. We examine these representation learning aspects in multi-task learning, where role prediction and role-filling are supervised tasks, while several thematic fit tasks are outside the models’ direct supervision. We observe a non-monotonous relation between some tasks’ quality scores and the training data size. In order to better understand this observation, we analyze these results using easier, per-verb versions of these tasks. @@ -58,7 +58,7 @@ Sentence Ambiguity, Grammaticality and Complexity Probes SunitBhattacharya VilémZouhar - OndrejBojar + OndrejBojar 40-50 It is unclear whether, how and where large pre-trained language models capture subtle linguistic traits like ambiguity, grammaticality and sentence complexity. We present results of automatic classification of these traits and compare their viability and patterns across representation types. We demonstrate that template-based datasets with surface-level artifacts should not be used for probing, careful comparisons with baselines should be done and that t-SNE plots should not be used to determine the presence of a feature among dense vectors representations. We also show how features might be highly localized in the layers for these models and get lost in the upper layers. 2022.blackboxnlp-1.4 @@ -94,7 +94,7 @@ MaximeDe Bruyn EhsanLotfi JeskaBuhmann - WalterDaelemans + WalterDaelemans 80-90 Researchers often use games to analyze the abilities of Artificial Intelligence models. In this work, we use the game of Twenty Questions to study the world knowledge of language models. Despite its simplicity for humans, this game requires a broad knowledge of the world to answer yes/no questions. We evaluate several language models on this task and find that only the largest model has enough world knowledge to play it well, although it still has difficulties with the shape and size of objects. We also present a new method to improve the knowledge of smaller models by leveraging external information from the web. Finally, we release our dataset and Twentle, a website to interactively test the knowledge of language models by playing Twenty Questions. 2022.blackboxnlp-1.7 @@ -139,7 +139,7 @@ RasmusJørgensen FiammettaCaccavale ChristianIgel - AndersSøgaard + AndersSøgaard 131-141 Multilingual NLP models provide potential solutions to the digital language divide, i.e., cross-language performance disparities. Early analyses of such models have indicated good performance across training languages and good generalization to unseen, related languages. This work examines whether, between related languages, multilingual models are equally right for the right reasons, i.e., if interpretability methods reveal that the models put emphasis on the same words as humans. To this end, we provide a new trilingual, parallel corpus of rationale annotations for English, Danish, and Italian sentiment analysis models and use it to benchmark models and interpretability methods. We propose rank-biased overlap as a better metric for comparing input token attributions to human rationale annotations. Our results show: (i) models generally perform well on the languages they are trained on, and align best with human rationales in these languages; (ii) performance is higher on English, even when not a source language, but this performance is not accompanied by higher alignment with human rationales, which suggests that language models favor English, but do not facilitate successful transfer of rationales. 2022.blackboxnlp-1.11 @@ -199,7 +199,7 @@ Understanding Domain Learning in Language Models Through Subpopulation Analysis ZhengZhao YftahZiser - ShayCohen + ShayCohen 192-209 We investigate how different domains are encoded in modern neural network architectures. We analyze the relationship between natural language domains, model size, and the amount of training data used. The primary analysis tool we develop is based on subpopulation analysis with Singular Vector Canonical Correlation Analysis (SVCCA), which we apply to Transformer-based language models (LMs). We compare the latent representations of such a language model at its different layers from a pair of models: a model trained on multiple domains (an experimental model) and a model trained on a single domain (a control model). Through our method, we find that increasing the model capacity impacts how domain information is stored in upper and lower layers differently. In addition, we show that larger experimental models simultaneously embed domain-specific information as if they were conjoined control models. These findings are confirmed qualitatively, demonstrating the validity of our method. 2022.blackboxnlp-1.16 @@ -211,7 +211,7 @@ DiegoGarcia-Olano YasumasaOnoe JoydeepGhosh - ByronWallace + ByronWallace 210-224 Interpretable entity representations (IERs) are sparse embeddings that are “human-readable” in that dimensions correspond to fine-grained entity types and values are predicted probabilities that a given entity is of the corresponding type. These methods perform well in zero-shot and low supervision settings. Compared to standard dense neural embeddings, such interpretable representations may permit analysis and debugging. However, while fine-tuning sparse, interpretable representations improves accuracy on downstream tasks, it destroys the semantics of the dimensions which were enforced in pre-training. Can we maintain the interpretable semantics afforded by IERs while improving predictive performance on downstream tasks? Toward this end, we propose Intermediate enTity-based Sparse Interpretable Representation Learning (ItsIRL). ItsIRL realizes improved performance over prior IERs on biomedical tasks, while maintaining “interpretability” generally and their ability to support model debugging specifically. The latter is enabled in part by the ability to perform “counterfactual” fine-grained entity type manipulation, which we explore in this work. Finally, we propose a method to construct entity type based class prototypes for revealing global semantic properties of classes learned by our model. Code for pre-training and experiments will be made publicly available. 2022.blackboxnlp-1.17 @@ -222,7 +222,7 @@ Towards Procedural Fairness: Uncovering Biases in How a Toxic Language Classifier Uses Sentiment Information IsarNejadgholi EsmaBalkir - KathleenFraser + KathleenFraser SvetlanaKiritchenko 225-237 Previous works on the fairness of toxic language classifiers compare the output of models with different identity terms as input features but do not consider the impact of other important concepts present in the context. Here, besides identity terms, we take into account high-level latent features learned by the classifier and investigate the interaction between these features and identity terms. For a multi-class toxic language classifier, we leverage a concept-based explanation framework to calculate the sensitivity of the model to the concept of sentiment, which has been used before as a salient feature for toxic language detection. Our results show that although for some classes, the classifier has learned the sentiment information as expected, this information is outweighed by the influence of identity terms as input features. This work is a step towards evaluating procedural fairness, where unfair processes lead to unfair outcomes. The produced knowledge can guide debiasing techniques to ensure that important concepts besides identity terms are well-represented in training datasets. @@ -234,7 +234,7 @@ Investigating the Characteristics of a Transformer in a Few-Shot Setup: Does Freezing Layers in <fixed-case>R</fixed-case>o<fixed-case>BERT</fixed-case>a Help? DigvijayIngle RishabhTripathi - AyushKumar + AyushKumar KevinPatel JithendraVepa 238-248 @@ -248,7 +248,7 @@ It Is Not Easy To Detect Paraphrases: Analysing Semantic Similarity With Antonyms and Negation Using the New <fixed-case>S</fixed-case>em<fixed-case>A</fixed-case>nto<fixed-case>N</fixed-case>eg Benchmark TeemuVahtola MathiasCreutz - JörgTiedemann + JörgTiedemann 249-262 We investigate to what extent a hundred publicly available, popular neural language models capture meaning systematically. Sentence embeddings obtained from pretrained or fine-tuned language models can be used to perform particular tasks, such as paraphrase detection, semantic textual similarity assessment or natural language inference. Common to all of these tasks is that paraphrastic sentences, that is, sentences that carry (nearly) the same meaning, should have (nearly) the same embeddings regardless of surface form. We demonstrate that performance varies greatly across different language models when a specific type of meaning-preserving transformation is applied: two sentences should be identified as paraphrastic if one of them contains a negated antonym in relation to the other one, such as “I am not guilty” versus “I am innocent”.We introduce and release SemAntoNeg, a new test suite containing 3152 entries for probing paraphrasticity in sentences incorporating negation and antonyms. Among other things, we show that language models fine-tuned for natural language inference outperform other types of models, especially the ones fine-tuned to produce general-purpose sentence embeddings, on the test suite. Furthermore, we show that most models designed explicitly for paraphrasing are rather mediocre in our task. 2022.blackboxnlp-1.20 @@ -354,7 +354,7 @@ AnuragKatakkar Clay H.Yoo WeiqinWang - ZacharyLipton + ZacharyLipton DivyanshKaushik 346-355 In attempts to develop sample-efficient and interpretable algorithms, researcher have explored myriad mechanisms for collecting and exploiting feature feedback, auxiliary annotations provided for training (but not test) instances that highlight salient evidence. Examples include bounding boxes around objects and salient spans in text. Despite its intuitive appeal, feature feedback has not delivered significant gains in practical problems as assessed on iid holdout sets. However, recent works on counterfactually augmented data suggest an alternative benefit of supplemental annotations, beyond interpretability: lessening sensitivity to spurious patterns and consequently delivering gains in out-of-domain evaluations. We speculate that while existing methods for incorporating feature feedback have delivered negligible in-sample performance gains, they may nevertheless provide out-of-domain benefits. Our experiments addressing sentiment analysis, show that feature feedback methods perform significantly better on various natural out-of-domain datasets despite comparable in-domain evaluations. By contrast, performance on natural language inference remains comparable. Finally, we compare those tasks where feature feedback does (and does not) help. @@ -400,7 +400,7 @@ DeborahFerreira MokanaranganThayaparan MarcoValentino - AndreFreitas + AndreFreitas 394-403 In the interest of interpreting neural NLI models and their reasoning strategies, we carry out a systematic probing study which investigates whether these modelscapture the crucial semantic features central to natural logic: monotonicity and concept inclusion. Correctly identifying valid inferences in downward-monotone contexts is a known stumbling block for NLI performance,subsuming linguistic phenomena such as negation scope and generalized quantifiers. To understand this difficulty, we emphasize monotonicity as a property of a context and examine the extent to which models capture relevant monotonicity information in the vector representations which are intermediate to their decision making process. Drawing on the recent advancement of the probing paradigm,we compare the presence of monotonicity features across various models. We find that monotonicity information is notably weak in the representations of popularNLI models which achieve high scores on benchmarks, and observe that previous improvements to these models based on fine-tuning strategies have introduced stronger monotonicity features together with their improved performance on challenge sets. 2022.blackboxnlp-1.33 @@ -410,7 +410,7 @@ Probing with Noise: Unpicking the Warp and Weft of Embeddings FilipKlubicka - JohnKelleher + JohnKelleher 404-417 Improving our understanding of how information is encoded in vector space can yield valuable interpretability insights. Alongside vector dimensions, we argue that it is possible for the vector norm to also carry linguistic information. We develop a method to test this: an extension of the probing framework which allows for relative intrinsic interpretations of probing results. It relies on introducing noise that ablates information encoded in embeddings, grounded in random baselines and confidence intervals. We apply the method to well-established probing tasks and find evidence that confirms the existence of separate information containers in English GloVe and BERT embeddings. Our correlation analysis aligns with the experimental findings that different encoders use the norm to encode different kinds of information: GloVe stores syntactic and sentence length information in the vector norm, while BERT uses it to encode contextual incongruity. 2022.blackboxnlp-1.34 @@ -422,7 +422,7 @@ Look to the Right: Mitigating Relative Position Bias in Extractive Question Answering KazutoshiShinoda SakuSugawara - AkikoAizawa + AkikoAizawa 418-425 Extractive question answering (QA) models tend to exploit spurious correlations to make predictions when a training set has unintended biases. This tendency results in models not being generalizable to examples where the correlations do not hold. Determining the spurious correlations QA models can exploit is crucial in building generalizable QA models in real-world applications; moreover, a method needs to be developed that prevents these models from learning the spurious correlations even when a training set is biased. In this study, we discovered that the relative position of an answer, which is defined as the relative distance from an answer span to the closest question-context overlap word, can be exploited by QA models as superficial cues for making predictions. Specifically, we find that when the relative positions in a training set are biased, the performance on examples with relative positions unseen during training is significantly degraded. To mitigate the performance degradation for unseen relative positions, we propose an ensemble-based debiasing method that does not require prior knowledge about the distribution of relative positions. We demonstrate that the proposed method mitigates the models’ reliance on relative positions using the biased and full SQuAD dataset. We hope that this study can help enhance the generalization ability of QA models in real-world applications. 2022.blackboxnlp-1.35 diff --git a/data/xml/2022.bucc.xml b/data/xml/2022.bucc.xml index 91ca3ef7b6..9a7d23e7ab 100644 --- a/data/xml/2022.bucc.xml +++ b/data/xml/2022.bucc.xml @@ -4,7 +4,7 @@ Proceedings of the BUCC Workshop within LREC 2022 ReinhardRapp - PierreZweigenbaum + PierreZweigenbaum SergeSharoff European Language Resources Association
Marseille, France
@@ -31,7 +31,7 @@ About Evaluating Bilingual Lexicon Induction MartinLaville EmmanuelMorin - PhillippeLanglais + PhillippeLanglais 8–14 With numerous new methods proposed recently, the evaluation of Bilingual Lexicon Induction have been quite hazardous and inconsistent across works. Some studies proposed some guidance to sanitize this; yet, they are not necessarily followed by practitioners. In this study, we try to gather these different recommendations and add our owns, with the aim to propose an unified evaluation protocol. We further show that the easiness of a benchmark while being correlated to the proximity of the language pairs being considered, is even more conditioned on the graphical similarities within the test word pairs. 2022.bucc-1.2 @@ -42,8 +42,8 @@ SilviaSeverini ViktorHangya MasoudJalili Sabet - AlexanderFraser - HinrichSchütze + AlexanderFraser + HinrichSchütze 15–22 Bilingual Word Embeddings (BWEs) are one of the cornerstones of cross-lingual transfer of NLP models. They can be built using only monolingual corpora without supervision leading to numerous works focusing on unsupervised BWEs. However, most of the current approaches to build unsupervised BWEs do not compare their results with methods based on easy-to-access cross-lingual signals. In this paper, we argue that such signals should always be considered when developing unsupervised BWE methods. The two approaches we find most effective are: 1) using identical words as seed lexicons (which unsupervised approaches incorrectly assume are not available for orthographically distinct language pairs) and 2) combining such lexicons with pairs extracted by matching romanized versions of words with an edit distance threshold. We experiment on thirteen non-Latin languages (and English) and show that such cheap signals work well and that they outperform using more complex unsupervised methods on distant language pairs such as Chinese, Japanese, Kannada, Tamil, and Thai. In addition, they are even competitive with the use of high-quality lexicons in supervised approaches. Our results show that these training signals should not be neglected when building BWEs, even for distant languages. 2022.bucc-1.3 @@ -53,7 +53,7 @@ Building Domain-specific Corpora from the Web: the Case of <fixed-case>E</fixed-case>uropean Digital Service Infrastructures Rikvan Noord CristianGarcía-Romero - MiquelEsplà-Gomis + MiquelEsplà-Gomis LeopoldoPla Sempere AntonioToral 23–32 @@ -77,7 +77,7 @@ KláraTauchmanová KristýnaNeumannová IvanaKvapilíková - OndřejBojar + OndřejBojar 43–49 We present our submission to the BUCC Shared Task on bilingual term alignment in comparable specialized corpora. We devised three approaches using static embeddings with post-hoc alignment, the Monoses pipeline for unsupervised phrase-based machine translation, and contextualized multilingual embeddings. We show that contextualized embeddings from pretrained multilingual models lead to similar results as static embeddings but further improvement can be achieved by task-specific fine-tuning. Retrieving term pairs from the running phrase tables of the Monoses systems can match this enhanced performance and leads to an average precision of 0.88 on the train set. 2022.bucc-1.6 diff --git a/data/xml/2022.cai.xml b/data/xml/2022.cai.xml index d6ea57bf9f..ec58aebd3d 100644 --- a/data/xml/2022.cai.xml +++ b/data/xml/2022.cai.xml @@ -45,7 +45,7 @@ YeongbeomLim SanKim Jin YeaJang - SaimShin + SaimShin Ki-HoonLee 16–22 We propose a Korean multimodal dialogue system targeting emotion-based empathetic dialogues because most research in this field has been conducted in a few languages such as English and Japanese and in certain circumstances. Our dialogue system consists of an emotion detector, an empathetic response generator, a monitoring interface, a voice activity detector, a speech recognizer, a speech synthesizer, a gesture classification, and several controllers to provide both multimodality and empathy during a conversation between a human and a machine. For comparisons across visual influence on users, our dialogue system contains two versions of the user interface, a cat face-based user interface and an avatar-based user interface. We evaluated our dialogue system by investigating the dialogues in text and the average mean opinion scores under three different visual conditions, no visual, the cat face-based, and the avatar-based expressions. The experimental results stand for the importance of adequate visual expressions according to user utterances. @@ -80,8 +80,8 @@ ZhuoGong DaisukeSaito ShengLi - HisashiKawai - NobuakiMinematsu + HisashiKawai + NobuakiMinematsu 42–47 Language models (LM) have played crucial roles in automatic speech recognition (ASR) to enhance end-to-end (E2E) ASR systems’ performance. There are two categories of approaches: finding better ways to integrate LMs into ASR systems and adapting on LMs to the task domain. This article will start with a reflection of interpolation-based integration methods of E2E ASR’s scores and LM’s scores. Then we will focus on LM augmentation approaches based on the noisy channel model, which is intrigued by insights obtained from the above reflection. The experiments show that we can enhance an ASR E2E model based on encoder-decoder architecture by pre-training the decoder with text data. This implies the decoder of an E2E model can be treated as an LM and reveals the possibility of enhancing the E2E model without an external LM. Based on those ideas, we proposed the implicit language model canceling method and then did more discussion about the decoder part of an E2E ASR model. The experimental results on the TED-LIUM2 dataset show that our approach achieves a 3.4% relative WER reduction compared with the baseline system, and more analytic experiments provide concrete experimental supports for our assumption. 2022.cai-1.6 @@ -91,7 +91,7 @@ Semantic Content Prediction for Generating Interviewing Dialogues to Elicit Users’ Food Preferences JieZeng TatsuyaSakato - YukikoNakano + YukikoNakano 48–58 Dialogue systems that aim to acquire user models through interactions with users need to have interviewing functionality. In this study, we propose a method to generate interview dialogues to build a dialogue system that acquires user preferences for food. First, we collected 118 text-based dialogues between the interviewer and customer and annotated the communicative function and semantic content of the utterances. Next, using the corpus as training data, we created a classification model for the communicative function of the interviewer’s next utterance and a generative model that predicts the semantic content of the utterance based on the dialogue history. By representing semantic content as a sequence of tokens, we evaluated the semantic content prediction model using BLEU. The results demonstrated that the semantic content produced by the proposed method was closer to the ground truth than the semantic content transformed from the output text generated by the retrieval model and GPT-2. Further, we present some examples of dialogue generation by applying model outputs to template-based sentence generation. 2022.cai-1.7 diff --git a/data/xml/2022.case.xml b/data/xml/2022.case.xml index 646a635c07..d4b193f079 100644 --- a/data/xml/2022.case.xml +++ b/data/xml/2022.case.xml @@ -3,8 +3,8 @@ Proceedings of the 5th Workshop on Challenges and Applications of Automated Extraction of Socio-political Events from Text (CASE) - AliHürriyetoğlu - HristoTanev + AliHürriyetoğlu + HristoTanev VanniZavarella ErdemYörük Association for Computational Linguistics @@ -37,7 +37,7 @@ HuilingYou DavidSamuel SamiaTouileb - LiljaØvrelid + LiljaØvrelid 7-15 Event extraction involves the detection and extraction of both the event triggers and the corresponding arguments. Existing systems often decompose event extraction into multiple subtasks, without considering their possible interactions. In this paper, we propose EventGraph, a joint framework for event extraction, which encodes events as graphs. We represent event triggers and arguments as nodes in a semantic graph. Event extraction therefore becomes a graph parsing problem, which provides the following advantages: 1) performing event detection and argument extraction jointly; 2) detecting and extracting multiple events from a piece of text; 3) capturing the complicated interaction between event arguments and triggers. Experimental results on ACE2005 show that our model is competitive to state-of-the-art systems and has substantially improved the results on argument extraction. Additionally, we create two new datasets from ACE2005 where we keep the entire text spans for event arguments, instead of just the head word(s). Our code and models will be released as open-source. 2022.case-1.2 @@ -59,8 +59,8 @@ A Hybrid Knowledge and Transformer-Based Model for Event Detection with Automatic Self-Attention Threshold, Layer and Head Selection ThierryDesot - OrpheeDe Clercq - VeroniqueHoste + OrpheeDe Clercq + VeroniqueHoste 21-31 Event and argument role detection are frequently conceived as separate tasks. In this work we conceive both processes as one taskin a hybrid event detection approach. Its main component is based on automatic keyword extraction (AKE) using the self-attention mechanism of a BERT transformer model. As a bottleneck for AKE is defining the threshold of the attention values, we propose a novel method for automatic self-attention thresholdselection. It is fueled by core event information, or simply the verb and its arguments as the backbone of an event. These are outputted by a knowledge-based syntactic parser. In a secondstep the event core is enriched with other semantically salient words provided by the transformer model. Furthermore, we propose an automatic self-attention layer and head selectionmechanism, by analyzing which self-attention cells in the BERT transformer contribute most to the hybrid event detection and which linguistic tasks they represent. This approach was integrated in a pipeline event extraction approachand outperforms three state of the art multi-task event extraction methods. 2022.case-1.4 @@ -95,7 +95,7 @@ <fixed-case>LTRC</fixed-case> @ Causal News Corpus 2022: Extracting and Identifying Causal Elements using Adapters HiranmaiSri Adibhatla - ManishShrivastava + ManishShrivastava 50-55 Causality detection and identification is centered on identifying semantic and cognitive connections in a sentence. In this paper, we describe the effort of team LTRC for Causal News Corpus - Event Causality Shared Task 2022 at the 5th Workshop on Challenges and Applications of Automated Extraction of Socio-political Events from Text (CASE 2022). The shared task consisted of two subtasks: 1) identifying if a sentence contains a causality relation, and 2) identifying spans of text that correspond to cause, effect and signals. We fine-tuned transformer-based models with adapters for both subtasks. Our best-performing models obtained a binary F1 score of 0.853 on held-out data for subtask 1 and a macro F1 score of 0.032 on held-out data for subtask 2. Our approach is ranked third in subtask 1 and fourth in subtask 2. The paper describes our experiments, solutions, and analysis in detail. 2022.case-1.7 @@ -124,7 +124,7 @@ EsauVillatoro-tello MartinFajcik MuskaanSingh - PavelSmrz + PavelSmrz PetrMotlicek 61-69 In this paper, we describe our participation in the subtask 1 of CASE-2022, Event Causality Identification with Casual News Corpus. We address the Causal Relation Identification (CRI) task by exploiting a set of simple yet complementary techniques for fine-tuning language models (LMs) on a few annotated examples (i.e., a few-shot configuration).We follow a prompt-based prediction approach for fine-tuning LMs in which the CRI task is treated as a masked language modeling problem (MLM). This approach allows LMs natively pre-trained on MLM tasks to directly generate textual responses to CRI-specific prompts. We compare the performance of this method against ensemble techniques trained on the entire dataset. Our best-performing submission was fine-tuned with only 256 instances per class, 15.7% of the all available data, and yet obtained the second-best precision (0.82), third-best accuracy (0.82), and an F1-score (0.85) very close to what was reported by the winner team (0.86). @@ -141,7 +141,7 @@ EsauVillatoro-tello SergioBurdisso PetrMotlicek - PavelSmrz + PavelSmrz 70-78 In this paper, we describe our shared task submissions for Subtask 2 in CASE-2022, Event Causality Identification with Casual News Corpus. The challenge focused on the automatic detection of all cause-effect-signal spans present in the sentence from news-media. We detect cause-effect-signal spans in a sentence using T5 — a pre-trained autoregressive language model. We iteratively identify all cause-effect-signal span triplets, always conditioning the prediction of the next triplet on the previously predicted ones. To predict the triplet itself, we consider different causal relationships such as cause→effect→signal. Each triplet component is generated via a language model conditioned on the sentence, the previous parts of the current triplet, and previously predicted triplets. Despite training on an extremely small dataset of 160 samples, our approach achieved competitive performance, being placed second in the competition. Furthermore, we show that assuming either cause→effect or effect→cause order achieves similar results. 2022.case-1.10 @@ -288,7 +288,7 @@ HuilingYou DavidSamuel SamiaTouileb - LiljaØvrelid + LiljaØvrelid 155-160 This paper presents our submission to the 2022 edition of the CASE 2021 shared task 1, subtask 4. The EventGraph system adapts an end-to-end, graph-based semantic parser to the task of Protest Event Extraction and more specifically subtask 4 on event trigger and argument extraction. We experiment with various graphs, encoding the events as either “labeled-edge” or “node-centric” graphs. We show that the “node-centric” approach yields best results overall, performing well across the three languages of the task, namely English, Spanish, and Portuguese. EventGraph is ranked 3rd for English and Portuguese, and 4th for Spanish. 2022.case-1.22 diff --git a/data/xml/2022.ccgpk.xml b/data/xml/2022.ccgpk.xml index 464a608131..408abd23ed 100644 --- a/data/xml/2022.ccgpk.xml +++ b/data/xml/2022.ccgpk.xml @@ -3,11 +3,11 @@ Proceedings of the 1st Workshop on Customized Chat Grounding Persona and Knowledge - HeuiseokLim + HeuiseokLim SeungryongKim YeonsooLee SteveLin - Paul HongsuckSeo + Paul HongsuckSeo YuminSuh YoonnaJang JungwooLim @@ -43,7 +43,7 @@ Proto-Gen: An end-to-end neural generator for persona and knowledge grounded response generation SougataSaha SouvikDas - RohiniSrihari + RohiniSrihari 9–14 In this paper we detail the implementation of Proto-Gen, an end-to-end neural response generator capable of selecting appropriate persona and fact sentences from available options, and generating persona and fact grounded responses. Incorporating a novel interaction layer in an encoder-decoder architecture, Proto-Gen facilitates learning dependencies between facts, persona and the context, and outperforms existing baselines on the FoCus dataset for both the sub-tasks of persona and fact selection, and response generation. We further fine tune Proto-Gen’s hyperparameters, and share our results and findings. 2022.ccgpk-1.2 diff --git a/data/xml/2022.ccl.xml b/data/xml/2022.ccl.xml index 423df3244b..4f067fa38c 100644 --- a/data/xml/2022.ccl.xml +++ b/data/xml/2022.ccl.xml @@ -158,7 +158,7 @@ 融合知识的多目标词联合框架语义分析模型(Knowledge-integrated Joint Model For Multi-target Frame Semantic Parsing) XudongChen旭东 CeZheng - BaobaoChang宝宝 + BaobaoChang宝宝 132–142 “框架语义分析任务是自然语言处理领域的一项基础性任务。先前的研究工作大多针对单目标词进行模型设计,无法一次性完成多个目标词的框架语义结构提取。本文提出了一个面向多目标的框架语义分析模型,实现对多目标词的联合预测。该模型对框架语义分析的各项子任务进行交互性建模,实现子任务间的双向交互。此外,本文利用关系图网络对框架关系信息进行编码,将其作为框架语义学知识融入模型中。实验表明,本文模型在不借助额外语料的情况下相比之前模型都有不同程度的提高。消融实验证明了本文模型设计的有效性。此外我们分析了模型目前存在的局限性以及未来的改进方向。” 2022.ccl-1.13 @@ -181,7 +181,7 @@ 基于实体信息增强及多粒度融合的多文档摘要(Multi-Document Summarization Based on Entity Information Enhancement and Multi-Granularity Fusion) JiaruiTang嘉蕊 LiuMeiling美玲 - TiejunZhao铁军 + TiejunZhao铁军 JiyunZhou继云 155–165 “神经网络模型的快速发展使得多文档摘要可以获得人类可读的流畅的摘要,对大规模的数据进行预训练可以更好的从自然语言文本中捕捉更丰富的语义信息,并更好的作用于下游任务。目前很多的多文档摘要的工作也应用了预训练模型(如BERT)并取得了一定的效果,但是这些预训练模型不能更好的从文本中捕获事实性知识,没有考虑到多文档文本的结构化的实体-关系信息,本文提出了基于实体信息增强和多粒度融合的多文档摘要模型MGNIE,将实体关系信息融入预训练模型ERNIE中,增强知识事实以获得多层语义信息,解决摘要生成的事实一致性问题。进而从多种粒度进行多文档层次结构的融合建模,以词信息、实体信息以及句子信息捕捉长文本信息摘要生成所需的关键信息点。本文设计的模型,在国际标准评测数据集MultiNews上对比强基线模型效果和竞争力获得较大提升。” @@ -203,7 +203,7 @@ 生成,推理与排序:基于多任务架构的数学文字题生成(Generating, Reasoning & Ranking: Multitask Learning Framework for Math Word Problem Generation) TianyangCao天旸 XiaodanXu晓丹 - BaobaoChang宝宝 + BaobaoChang宝宝 178–189 “数学文字题是一段能反映数学等式潜在逻辑的叙述性文本。成功的数学问题生成在语言生成和教育领域都具有广阔的应用前景。前人的工作大多需要人工标注的模板或关键词作为输入,且未考虑数学表达式本身的特点。本文提出了一种多任务联合训练的问题文本生成模型。我们设计了三个辅助任务,包括数字间关系抽取、数值排序和片段替换预测。他们与生成目标联合训练,用以监督解码器的学习,增强模型对运算逻辑和问题条件的感知能力。实验证明所提方法能有效提升生成的数学文字题的质量。” 2022.ccl-1.17 @@ -272,7 +272,7 @@ 期货领域知识图谱构建(Construction of Knowledge Graph in Futures Field) WenxinLi雯昕 - HongyingZan红英 + HongyingZan红英 TongfengGuan同峰 YingjieHan英杰 246–256 @@ -320,7 +320,7 @@ HouliMa候丽 LingDong WenjunWang文君 - JianWang + JianWang ShengxiangGao盛祥 ZhengtaoYu正涛 293–304 @@ -456,7 +456,7 @@ <fixed-case>C</fixed-case>ore<fixed-case>V</fixed-case>alue:面向价值观计算的中文核心价值-行为体系及知识库(<fixed-case>C</fixed-case>ore<fixed-case>V</fixed-case>alue: <fixed-case>C</fixed-case>hinese Core Value-Behavior Frame and Knowledge Base for Value Computing) - PengyuanLiu鹏远 + PengyuanLiu鹏远 SanleZhang三乐 DongYu LinBo @@ -496,7 +496,7 @@ YixuanMa翊轩 WenruiWang文瑞 YuzheLiu宇哲 - MuyunYang沐昀 + MuyunYang沐昀 455–463 “专利文献是一种重要的技术文献,是知识产权强国的重要工作内容。目前专利语料库多集中于信息检索、机器翻译以及文本文分类等领域,尚缺乏更细粒度的标注,不足以支持问答、阅读理解等新形态的人工智能技术研发。本文面向专利智能分析的需要,提出了从解决问题、技术手段、效果三个角度对发明专利进行专利标注,并最终构建了包含313篇的中文专利关键信息语料库。利用命名实体识别技术对语料库关键信息进行识别和验证,表明专利关键信息的识别是不同于领域命名实体识别的更大粒度的信息抽取难题。” 2022.ccl-1.41 @@ -561,7 +561,7 @@ JishunZhao继舜 ShuchengZhu述承 YingLiu - PengyuanLiu鹏远 + PengyuanLiu鹏远 510–522 “尽管悲观者认为,职场中永远不可能存在性别平等。但随着人们观念的转变,愈来愈多的人们相信,职业的选择应只与个人能力相匹配,而不应由个体的性别决定。目前已经发现自然语言处理的各个任务中都存在着职业性别偏见。但这些研究往往只针对特定的英文任务,缺乏针对中文的、综合多任务的职业性别偏见测量研究。本文基于霍兰德职业模型,从中文自然语言处理中常见的三个任务出发,测量了词向量、共指消解和文本生成中的职业性别偏见,发现不同任务中的职业性别偏见既有一定的共性,又存在着独特的差异性。总体来看,不同任务中的职业性别偏见反映了现实生活中人们对于不同性别所选择职业的刻板印象。此外,在设计不同任务的偏见测量指标时,还需要考虑如语体、词序等语言学要素的影响。” 2022.ccl-1.46 diff --git a/data/xml/2022.cl.xml b/data/xml/2022.cl.xml index f1446e73f0..ce91a350fd 100644 --- a/data/xml/2022.cl.xml +++ b/data/xml/2022.cl.xml @@ -13,7 +13,7 @@ Obituary: <fixed-case>M</fixed-case>artin Kay - Ronald M.Kaplan + Ronald M.Kaplan HansUszkoreit 10.1162/coli_a_00424 1–3 @@ -22,7 +22,7 @@ To Augment or Not to Augment? A Comparative Study on Text Augmentation Techniques for Low-Resource <fixed-case>NLP</fixed-case> - Gözde GülŞahin + Gözde GülŞahin 10.1162/coli_a_00425 Data-hungry deep neural networks have established themselves as the de facto standard for many NLP tasks, including the traditional sequence tagging ones. Despite their state-of-the-art performance on high-resource languages, they still fall behind their statistical counterparts in low-resource scenarios. One methodology to counterattack this problem is text augmentation, that is, generating new synthetic training data points from existing data. Although NLP has recently witnessed several new textual augmentation techniques, the field still lacks a systematic performance analysis on a diverse set of languages and sequence tagging tasks. To fill this gap, we investigate three categories of text augmentation methodologies that perform changes on the syntax (e.g., cropping sub-sentences), token (e.g., random word insertion), and character (e.g., character swapping) levels. We systematically compare the methods on part-of-speech tagging, dependency parsing, and semantic role labeling for a diverse set of language families using various models, including the architectures that rely on pretrained multilingual contextualized language models such as mBERT. Augmentation most significantly improves dependency parsing, followed by part-of-speech tagging and semantic role labeling. We find the experimented techniques to be effective on morphologically rich languages in general rather than analytic languages such as Vietnamese. Our results suggest that the augmentation techniques can further improve over strong baselines based on mBERT, especially for dependency parsing. We identify the character-level methods as the most consistent performers, while synonym replacement and syntactic augmenters provide inconsistent improvements. Finally, we discuss that the results most heavily depend on the task, language pair (e.g., syntactic-level techniques mostly benefit higher-level tasks and morphologically richer languages), and model type (e.g., token-level augmentation provides significant improvements for BPE, while character-level ones give generally higher scores for char and mBERT based models). 5–42 @@ -35,7 +35,7 @@ TanikSaikh TameeshBiswas AsifEkbal - PushpakBhattacharyya + PushpakBhattacharyya 10.1162/coli_a_00429 The quest for new information is an inborn human trait and has always been quintessential for human survival and progress. Novelty drives curiosity, which in turn drives innovation. In Natural Language Processing (NLP), Novelty Detection refers to finding text that has some new information to offer with respect to whatever is earlier seen or known. With the exponential growth of information all across the Web, there is an accompanying menace of redundancy. A considerable portion of the Web contents are duplicates, and we need efficient mechanisms to retain new information and filter out redundant information. However, detecting redundancy at the semantic level and identifying novel text is not straightforward because the text may have less lexical overlap yet convey the same information. On top of that, non-novel/redundant information in a document may have assimilated from multiple source documents, not just one. The problem surmounts when the subject of the discourse is documents, and numerous prior documents need to be processed to ascertain the novelty/non-novelty of the current one in concern. In this work, we build upon our earlier investigations for document-level novelty detection and present a comprehensive account of our efforts toward the problem. We explore the role of pre-trained Textual Entailment (TE) models to deal with multiple source contexts and present the outcome of our current investigations. We argue that a multipremise entailment task is one close approximation toward identifying semantic-level non-novelty. Our recent approach either performs comparably or achieves significant improvement over the latest reported results on several datasets and across several related tasks (paraphrasing, plagiarism, rewrite). We critically analyze our performance with respect to the existing state of the art and show the superiority and promise of our approach for future investigations. We also present our enhanced dataset TAP-DLND 2.0 and several baselines to the community for further research on document-level novelty detection. 77–117 @@ -75,7 +75,7 @@ ZhijingJin ZhitingHu OlgaVechtomova - RadaMihalcea + RadaMihalcea 10.1162/coli_a_00426 Text style transfer is an important task in natural language generation, which aims to control certain attributes in the generated text, such as politeness, emotion, humor, and many others. It has a long history in the field of natural language processing, and recently has re-gained significant attention thanks to the promising performance brought by deep neural models. In this article, we present a systematic survey of the research on neural text style transfer, spanning over 100 representative articles since the first neural text style transfer work in 2017. We discuss the task formulation, existing datasets and subtasks, evaluation, as well as the rich methodologies in the presence of parallel and non-parallel data. We also provide discussions on a variety of important topics regarding the future development of this task.1 155–205 @@ -94,7 +94,7 @@ Revisiting the Boundary between <fixed-case>ASR</fixed-case> and <fixed-case>NLU</fixed-case> in the Age of Conversational Dialog Systems ManaalFaruqui - DilekHakkani-Tür + DilekHakkani-Tür 10.1162/coli_a_00430 As more users across the world are interacting with dialog agents in their daily life, there is a need for better speech understanding that calls for renewed attention to the dynamics between research in automatic speech recognition (ASR) and natural language understanding (NLU). We briefly review these research areas and lay out the current relationship between them. In light of the observations we make in this article, we argue that (1) NLU should be cognizant of the presence of ASR models being used upstream in a dialog system’s pipeline, (2) ASR should be able to learn from errors found in NLU, (3) there is a need for end-to-end data sets that provide semantic annotations on spoken input, (4) there should be stronger collaboration between ASR and NLU research communities. 221–232 @@ -112,7 +112,7 @@ Erratum for “Formal Basis of a Language Universal” MilošStanojević - MarkSteedman + MarkSteedman 10.1162/coli_x_00432 237–237 2022.cl-1.10 @@ -132,7 +132,7 @@ Ethics Sheet for Automatic Emotion Recognition and Sentiment Analysis - Saif M.Mohammad + Saif M.Mohammad 10.1162/coli_a_00433 The importance and pervasiveness of emotions in our lives makes affective computing a tremendously important and vibrant line of work. Systems for automatic emotion recognition (AER) and sentiment analysis can be facilitators of enormous progress (e.g., in improving public health and commerce) but also enablers of great harm (e.g., for suppressing dissidents and manipulating voters). Thus, it is imperative that the affective computing community actively engage with the ethical ramifications of their creations. In this article, I have synthesized and organized information from AI Ethics and Emotion Recognition literature to present fifty ethical considerations relevant to AER. Notably, this ethics sheet fleshes out assumptions hidden in how AER is commonly framed, and in the choices often made regarding the data, method, and evaluation. Special attention is paid to the implications of AER on privacy and social groups. Along the way, key recommendations are made for responsible AER. The objective of the ethics sheet is to facilitate and encourage more thoughtfulness on why to automate, how to automate, and how to judge success well before the building of AER systems. Additionally, the ethics sheet acts as a useful introductory document on emotion recognition (complementing survey articles). 239–278 @@ -144,7 +144,7 @@ Domain Adaptation with Pre-trained Transformers for Query-Focused Abstractive Text Summarization Md Tahmid RahmanLaskar EnamulHoque - Jimmy XiangjiHuang + Jimmy XiangjiHuang 10.1162/coli_a_00434 The Query-Focused Text Summarization (QFTS) task aims at building systems that generate the summary of the text document(s) based on the given query. A key challenge in addressing this task is the lack of large labeled data for training the summarization model. In this article, we address this challenge by exploring a series of domain adaptation techniques. Given the recent success of pre-trained transformer models in a wide range of natural language processing tasks, we utilize such models to generate abstractive summaries for the QFTS task for both single-document and multi-document scenarios. For domain adaptation, we apply a variety of techniques using pre-trained transformer-based summarization models including transfer learning, weakly supervised learning, and distant supervision. Extensive experiments on six datasets show that our proposed approach is very effective in generating abstractive summaries for the QFTS task while setting a new state-of-the-art result in several datasets across a set of automatic and human evaluation metrics. 279–320 @@ -193,7 +193,7 @@ Dual Attention Model for Citation Recommendation with Analyses on Explainability of Attention Mechanisms and Qualitative Experiments - YangZhang + YangZhang QiangMa 10.1162/coli_a_00438 Based on an exponentially increasing number of academic articles, discovering and citing comprehensive and appropriate resources have become non-trivial tasks. Conventional citation recommendation methods suffer from severe information losses. For example, they do not consider the section header of the paper that the author is writing and for which they need to find a citation, the relatedness between the words in the local context (the text span that describes a citation), or the importance of each word from the local context. These shortcomings make such methods insufficient for recommending adequate citations to academic manuscripts. In this study, we propose a novel embedding-based neural network called dual attention model for citation recommendation (DACR) to recommend citations during manuscript preparation. Our method adapts the embedding of three semantic pieces of information: words in the local context, structural contexts,1 and the section on which the author is working. A neural network model is designed to maximize the similarity between the embedding of the three inputs (local context words, section headers, and structural contexts) and the target citation appearing in the context. The core of the neural network model comprises self-attention and additive attention; the former aims to capture the relatedness between the contextual words and structural context, and the latter aims to learn their importance. Recommendation experiments on real-world datasets demonstrate the effectiveness of the proposed approach. To seek explainability on DACR, particularly the two attention mechanisms, the learned weights from them are investigated to determine how the attention mechanisms interpret “relatedness” and “importance” through the learned weights. In addition, qualitative analyses were conducted to testify that DACR could find necessary citations that were not noticed by the authors in the past due to the limitations of the keyword-based searching. @@ -212,7 +212,7 @@ Boring Problems Are Sometimes the Most Interesting - RichardSproat + RichardSproat 10.1162/coli_a_00439 In a recent position paper, Turing Award Winners Yoshua Bengio, Geoffrey Hinton, and Yann LeCun make the case that symbolic methods are not needed in AI and that, while there are still many issues to be resolved, AI will be solved using purely neural methods. In this piece I issue a challenge: Demonstrate that a purely neural approach to the problem of text normalization is possible. Various groups have tried, but so far nobody has eliminated the problem of unrecoverable errors, errors where, due to insufficient training data or faulty generalization, the system substitutes some other reading for the correct one. Solutions have been proposed that involve a marriage of traditional finite-state methods with neural models, but thus far nobody has shown that the problem can be solved using neural methods alone. Though text normalization is hardly an “exciting” problem, I argue that until one can solve “boring” problems like that using purely AI methods, one cannot claim that AI is a success. 483–490 @@ -257,7 +257,7 @@ AhmetÜstün AriannaBisazza GosseBouma - Gertjanvan Noord + Gertjanvan Noord 10.1162/coli_a_00443 Recent advances in multilingual language modeling have brought the idea of a truly universal parser closer to reality. However, such models are still not immune to the “curse of multilinguality”: Cross-language interference and restrained model capacity remain major obstacles. To address this, we propose a novel language adaptation approach by introducing contextual language adapters to a multilingual parser. Contextual language adapters make it possible to learn adapters via language embeddings while sharing model parameters across languages based on contextual parameter generation. Moreover, our method allows for an easy but effective integration of existing linguistic typology features into the parsing model. Because not all typological features are available for every language, we further combine typological feature prediction with parsing in a multi-task model that achieves very competitive parsing performance without the need for an external prediction system for missing features. The resulting parser, UDapter, can be used for dependency parsing as well as sequence labeling tasks such as POS tagging, morphological tagging, and NER. In dependency parsing, it outperforms strong monolingual and multilingual baselines on the majority of both high-resource and low-resource (zero-shot) languages, showing the success of the proposed adaptation approach. In sequence labeling tasks, our parser surpasses the baseline on high resource languages, and performs very competitively in a zero-shot setting. Our in-depth analyses show that adapter generation via typological features of languages is key to this success.1 555–592 @@ -291,8 +291,8 @@ Survey of Low-Resource Machine Translation BarryHaddow RachelBawden - Antonio ValerioMiceli Barone - JindřichHelcl + Antonio ValerioMiceli Barone + JindřichHelcl AlexandraBirch 10.1162/coli_a_00446 We present a survey covering the state of the art in low-resource machine translation (MT) research. There are currently around 7,000 languages spoken in the world and almost all language pairs lack significant resources for training machine translation models. There has been increasing interest in research addressing the challenge of producing useful translation models when very little translated training data is available. We present a summary of this topical research field and provide a description of the techniques evaluated by researchers in several recent shared tasks in low-resource MT. @@ -304,7 +304,7 @@ Position Information in Transformers: An Overview PhilippDufter MartinSchmitt - HinrichSchütze + HinrichSchütze 10.1162/coli_a_00445 Transformers are arguably the main workhorse in recent natural language processing research. By definition, a Transformer is invariant with respect to reordering of the input. However, language is inherently sequential and word order is essential to the semantics and syntax of an utterance. In this article, we provide an overview and theoretical comparison of existing methods to incorporate position information into Transformer models. The objectives of this survey are to (1) showcase that position information in Transformer is a vibrant and extensive research area; (2) enable the reader to compare existing methods by providing a unified notation and systematization of different approaches along important model dimensions; (3) indicate what characteristics of an application should be taken into account when selecting a position encoding; and (4) provide stimuli for future research. 733–763 @@ -325,7 +325,7 @@ Martha Palmer and Barbara Di Eugenio Interview Martha Evens - MarthaEvens + MarthaEvens 10.1162/coli_a_00453 765–773 2022.cl-4.9 @@ -333,7 +333,7 @@ Martha Evens, Brief Autobiography - MarthaEvens + MarthaEvens 10.1162/coli_a_00452 775–782 2022.cl-4.10 @@ -393,10 +393,10 @@ Information Theory–based Compositional Distributional Semantics - EnriqueAmigó + EnriqueAmigó AlejandroAriza-Casabona VictorFresno - M. AntòniaMartí + M. AntòniaMartí 10.1162/coli_a_00454 In the context of text representation, Compositional Distributional Semantics models aim to fuse the Distributional Hypothesis and the Principle of Compositionality. Text embedding is based on co-ocurrence distributions and the representations are in turn combined by compositional functions taking into account the text structure. However, the theoretical basis of compositional functions is still an open issue. In this article we define and study the notion of Information Theory–based Compositional Distributional Semantics (ICDS): (i) We first establish formal properties for embedding, composition, and similarity functions based on Shannon’s Information Theory; (ii) we analyze the existing approaches under this prism, checking whether or not they comply with the established desirable properties; (iii) we propose two parameterizable composition and similarity functions that generalize traditional approaches while fulfilling the formal properties; and finally (iv) we perform an empirical study on several textual similarity datasets that include sentences with a high and low lexical overlap, and on the similarity between words and their description. Our theoretical analysis and empirical results show that fulfilling formal properties affects positively the accuracy of text representation models in terms of correspondence (isometry) between the embedding and meaning spaces. 907–948 @@ -443,7 +443,7 @@ The Text Anonymization Benchmark (<fixed-case>TAB</fixed-case>): A Dedicated Corpus and Evaluation Framework for Text Anonymization IldikóPilán PierreLison - LiljaØvrelid + LiljaØvrelid AnthiPapadopoulou DavidSánchez MontserratBatet @@ -468,7 +468,7 @@ A Metrological Perspective on Reproducibility in <fixed-case>NLP</fixed-case>* - AnyaBelz + AnyaBelz 10.1162/coli_a_00448 Reproducibility has become an increasingly debated topic in NLP and ML over recent years, but so far, no commonly accepted definitions of even basic terms or concepts have emerged. The range of different definitions proposed within NLP/ML not only do not agree with each other, they are also not aligned with standard scientific definitions. This article examines the standard definitions of repeatability and reproducibility provided by the meta-science of metrology, and explores what they imply in terms of how to assess reproducibility, and what adopting them would mean for reproducibility assessment in NLP/ML. It turns out the standard definitions lead directly to a method for assessing reproducibility in quantified terms that renders results from reproduction studies comparable across multiple reproductions of the same original study, as well as reproductions of different original studies. The article considers where this method sits in relation to other aspects of NLP work one might wish to assess in the context of reproducibility. 1125–1135 diff --git a/data/xml/2022.clasp.xml b/data/xml/2022.clasp.xml index 479a220659..1f3c1b13fe 100644 --- a/data/xml/2022.clasp.xml +++ b/data/xml/2022.clasp.xml @@ -5,7 +5,7 @@ Proceedings of the 2022 CLASP Conference on (Dis)embodiment SimonDobnik JulianGrove - AsadSayeed + AsadSayeed Association for Computational Linguistics
Gothenburg, Sweden
September @@ -21,7 +21,7 @@ A Small but Informed and Diverse Model: The Case of the Multimodal <fixed-case>G</fixed-case>uess<fixed-case>W</fixed-case>hat!? Guessing Game ClaudioGreco AlbertoTestoni - RaffaellaBernardi + RaffaellaBernardi StellaFrank 1–10 Pre-trained Vision and Language Transformers achieve high performance on downstream tasks due to their ability to transfer representational knowledge accumulated during pretraining on substantial amounts of data. In this paper, we ask whether it is possible to compete with such models using features based on transferred (pre-trained, frozen) representations combined with a lightweight architecture. We take a multimodal guessing task as our testbed, GuessWhat?!. An ensemble of our lightweight model matches the performance of the finetuned pre-trained transformer (LXMERT). An uncertainty analysis of our ensemble shows that the lightweight transferred representations close the data uncertainty gap with LXMERT, while retaining model diversity leading to ensemble boost. We further demonstrate that LXMERT’s performance gain is due solely to its extra V&L pretraining rather than because of architectural improvements. These results argue for flexible integration of multiple features and lightweight models as a viable alternative to large, cumbersome, pre-trained models. @@ -75,7 +75,7 @@ Embodied Interaction in Mental Health Consultations: Some Observations on Grounding and Repair Jing HuiLaw - PatrickHealey + PatrickHealey RosellaGalindo Esparza 51–61 Shared physical space is an important resource for face-to-face interaction. People use the position and orientation of their bodies—relative to each other and relative to the physical environment—to determine who is part of a conversation, to manage conversational roles (e.g. speaker, addressee, side-participant) and to help co-ordinate turn-taking. These embodied uses of shared space also extend to more fine-grained aspects of interaction, such as gestures and body movements, to support topic management, orchestration of turns and grounding. This paper explores the role of embodied resources in (mis)communication in a corpus of mental health consultations. We illustrate some of the specific ways in which clinicians and patients can exploit embodiment and the position of objects in shared space to diagnose and manage moments of misunderstanding. diff --git a/data/xml/2022.clib.xml b/data/xml/2022.clib.xml index 9571d71ba9..6eb8cd1680 100644 --- a/data/xml/2022.clib.xml +++ b/data/xml/2022.clib.xml @@ -16,7 +16,7 @@ <fixed-case>O</fixed-case>nto<fixed-case>P</fixed-case>opulis, a System for Learning Semantic Classes - HristoTanev + HristoTanev 8–12 Ontopopulis is a multilingual weakly supervised terminology learning algorithm which takes on its input a set of seed terms for a semantic category and an unannotated text corpus. The algorithm learns additional terms, which belong to this category. For example, for the category “environmental disasters” the input seed set in English is environmental disaster, water pollution, climate change. Among the highest ranked new terms which the system learns for this semantic class are deforestation, global warming and so on. 2022.clib-1.1 @@ -124,7 +124,7 @@ Andrei-MariusAvram VasilePăis MariaMitrofan - Verginica BarbuMititelu + Verginica BarbuMititelu ElenaIrimia ValentinBadea 105–112 @@ -152,7 +152,7 @@ Sense-Annotated Corpus for <fixed-case>R</fixed-case>ussian AlexanderKirillovich - NataliaLoukachevitch + NataliaLoukachevitch MaksimKulaev AngelinaBolshina DmitryIlvovsky @@ -163,7 +163,7 @@ A <fixed-case>R</fixed-case>omanian Treebank Annotated with Verbal Multiword Expressions - VerginicaBarbu Mititelu + VerginicaBarbu Mititelu MihaelaCristescu MariaMitrofan Bianca-MădălinaZgreabăn @@ -194,7 +194,7 @@ Language rehabilitation of people with <fixed-case>BROCA</fixed-case> aphasia using deep neural machine translation - KamelSmaili + KamelSmaili DavidLanglois PeterPribil 162–170 diff --git a/data/xml/2022.clinicalnlp.xml b/data/xml/2022.clinicalnlp.xml index f092193ce4..c900c58e58 100644 --- a/data/xml/2022.clinicalnlp.xml +++ b/data/xml/2022.clinicalnlp.xml @@ -126,7 +126,7 @@ Isabelle RoseAlberto Pia GabrielleAlfonso DanaMoukheiber - ByronWallace + ByronWallace AnnaRumshisky JenniferLiang PreethiRaghavan @@ -167,7 +167,7 @@ Ensemble-based Fine-Tuning Strategy for Temporal Relation Extraction from the Clinical Narrative LijingWang - TimothyMiller + TimothyMiller StevenBethard GuerganaSavova 103-108 @@ -180,7 +180,7 @@ Exploring Text Representations for Generative Temporal Relation Extraction DmitriyDligach StevenBethard - TimothyMiller + TimothyMiller GuerganaSavova 109-113 Sequence-to-sequence models are appealing because they allow both encoder and decoder to be shared across many tasks by formulating those tasks as text-to-text problems. Despite recently reported successes of such models, we find that engineering input/output representations for such text-to-text models is challenging. On the Clinical TempEval 2016 relation extraction task, the most natural choice of output representations, where relations are spelled out in simple predicate logic statements, did not lead to good performance. We explore a variety of input/output representations, with the most successful prompting one event at a time, and achieving results competitive with standard pairwise temporal relation extraction systems. diff --git a/data/xml/2022.clpsych.xml b/data/xml/2022.clpsych.xml index ac5b5c6b4d..c1baafbd69 100644 --- a/data/xml/2022.clpsych.xml +++ b/data/xml/2022.clpsych.xml @@ -8,7 +8,7 @@ MariaLiakata StevenBedrick BartDesmet - MollyIreland + MollyIreland AndrewLee SeanMacAvaney MatthewPurver @@ -79,9 +79,9 @@ JamesFiumara JuhiPandey ChristopherChatham - ChristopherCieri - RobertSchultz - MarkLiberman + ChristopherCieri + RobertSchultz + MarkLiberman JuliaParish-morris 40-46 This study examined differences in linguistic features produced by autistic and neurotypical (NT) children during brief picture descriptions, and assessed feature stability over time. Weekly speech samples from well-characterized participants were collected using a telephony system designed to improve access for geographically isolated and historically marginalized communities. Results showed stable group differences in certain acoustic features, some of which may potentially serve as key outcome measures in future treatment studies. These results highlight the importance of eliciting semi-structured speech samples in a variety of contexts over time, and adds to a growing body of research showing that fine-grained naturalistic communication features hold promise for intervention research. @@ -148,7 +148,7 @@ MichaelPullmann ThomasHull PatriciaAreán - TrevorCohen + TrevorCohen 105-115 The increasing adoption of message-based behavioral therapy enables new approaches to assessing mental health using linguistic analysis of patient-generated text. Word counting approaches have demonstrated utility for linguistic feature extraction, but deep learning methods hold additional promise given recent advances in this area. We evaluated the utility of emotion features extracted using a BERT-based model in comparison to emotions extracted using word counts as predictors of symptom severity in a large set of messages from text-based therapy sessions involving over 6,500 unique patients, accompanied by data from repeatedly administered symptom scale measurements. BERT-based emotion features explained more variance in regression models of symptom severity, and improved predictive modeling of scale-derived diagnostic categories. However, LIWC categories that are not directly related to emotions provided valuable and complementary information for modeling of symptom severity, indicating a role for both approaches in inferring the mental states underlying patient-generated language. 2022.clpsych-1.9 @@ -175,7 +175,7 @@ JustinTauscher XiruoDing DrorBen-zeev - TrevorCohen + TrevorCohen 126-136 There is growing evidence that mobile text message exchanges between patients and therapists can augment traditional cognitive behavioral therapy. The automatic characterization of patient thinking patterns in this asynchronous text communication may guide treatment and assist in therapist training. In this work, we automatically identify distorted thinking in text-based patient-therapist exchanges, investigating the role of conversation history (context) in distortion prediction. We identify six unique types of cognitive distortions and utilize BERT-based architectures to represent text messages within the context of the conversation. We propose two approaches for leveraging dynamic conversation context in model training. By representing the text messages within the context of the broader patient-therapist conversation, the models better emulate the therapist’s task of recognizing distorted thoughts. This multi-turn classification approach also leverages the clustering of distorted thinking in the conversation timeline. We demonstrate that including conversation context, including the proposed dynamic context methods, improves distortion prediction performance. The proposed architectures and conversation encoding approaches achieve performance comparable to inter-rater agreement. The presence of any distorted thinking is identified with relatively high performance at 0.73 F1, significantly outperforming the best context-agnostic models (0.68 F1). 2022.clpsych-1.11 @@ -231,7 +231,7 @@ SalvatoreGiorgi MckenzieHimelein-wachowiak DanielHabib - LyleUngar + LyleUngar BrendaCurtis 177-183 2022.clpsych-1.15 @@ -268,8 +268,8 @@ AliciaPerez LourdesAraujo NuriaLebea - MaiteOronoz - ArantzaCasillas + MaiteOronoz + ArantzaCasillas 199-204 This paper describes the participation of our group on the CLPsych 2022 shared task. For task A, which tries to capture changes in mood over time, we have applied an Approximate Nearest Neighbour (ANN) extraction technique with the aim of relabelling the user messages according to their proximity, based on the representation of these messages in a vector space. Regarding the subtask B, we have used the output of the subtask A to train a Recurrent Neural Network (RNN) to predict the risk of suicide at the user level. The results obtained are very competitive considering that our team was one of the few that made use of the organisers’ proposed virtual environment and also made use of the Task A output to predict the Task B results. 2022.clpsych-1.17 @@ -328,7 +328,7 @@ PrasadithKirinde Gamaarachchige AhmedHusseini Orabi MahmoudHusseini Orabi - DianaInkpen + DianaInkpen 232-238 This paper investigates the impact of using Multi-Task Learning (MTL) to predict mood changes over time for each individual (social media user). The presented models were developed as a part of the Computational Linguistics and Clinical Psychology (CLPsych) 2022 shared task. Given the limited number of Reddit social media users, as well as their posts, we decided to experiment with different multi-task learning architectures to identify to what extent knowledge can be shared among similar tasks. Due to class imbalance at both post and user levels and to accommodate task alignment, we randomly sampled an equal number of instances from the respective classes and performed ensemble learning to reduce prediction variance. Faced with several constraints, we managed to produce competitive results that could provide insights into the use of multi-task learning to identify mood changes over time and suicide ideation risk. 2022.clpsych-1.22 @@ -368,9 +368,9 @@ ShashankaSubrahmanya MatthewMatero NikitaSoni - Sharath ChandraGuntuku + Sharath ChandraGuntuku JohannesEichstaedt - H. AndrewSchwartz + H. AndrewSchwartz 251-258 Psychological states unfold dynamically; to understand and measure mental health at scale we need to detect and measure these changes from sequences of online posts. We evaluate two approaches to capturing psychological changes in text: the first relies on computing the difference between the embedding of a message with the one that precedes it, the second relies on a “human-aware” multi-level recurrent transformer (HaRT). The mood changes of timeline posts of users were annotated into three classes, ‘ordinary,’ ‘switching’ (positive to negative or vice versa) and ‘escalations’ (increasing in intensity). For classifying these mood changes, the difference-between-embeddings technique – applied to RoBERTa embeddings – showed the highest overall F1 score (0.61) across the three different classes on the test set. The technique particularly outperformed the HaRT transformer (and other baselines) in the detection of switches (F1 = .33) and escalations (F1 = .61).Consistent with the literature, the language use patterns associated with mental-health related constructs in prior work (including depression, stress, anger and anxiety) predicted both mood switches and escalations. 2022.clpsych-1.25 diff --git a/data/xml/2022.cltw.xml b/data/xml/2022.cltw.xml index b473599982..240f8760e6 100644 --- a/data/xml/2022.cltw.xml +++ b/data/xml/2022.cltw.xml @@ -66,7 +66,7 @@ Iterated Dependencies in a <fixed-case>B</fixed-case>reton treebank and implications for a Categorial Dependency Grammar AnnieForet DenisBéchet - ValérieBellynck + ValérieBellynck 40–46 Categorial Dependency Grammars (CDG) are computational grammars for natural language processing, defining dependency structures. They can be viewed as a formal system, where types are attached to words, combining the classical categorial grammars’ elimination rules with valency pairing rules able to define discontinuous (non-projective) dependencies. Algorithms have been proposed to infer grammars in this class from treebanks, with respect to Mel’čuk principles. We consider this approach with experiments on Breton. We focus in particular on ”repeatable dependencies” (iterated) and their patterns. A dependency d is iterated in a dependency structure if some word in this structure governs several other words through dependency d. We illustrate this approach with data in the universal dependencies format and dependency patterns written in Grew (a graph rewriting tool dedicated to applications in natural Language Processing). 2022.cltw-1.6 @@ -98,7 +98,7 @@ Handwriting recognition for <fixed-case>S</fixed-case>cottish <fixed-case>G</fixed-case>aelic WilliamLamb - BeatriceAlex + BeatriceAlex MarkSinclair 60–70 Like most other minority languages, Scottish Gaelic has limited tools and resources available for Natural Language Processing research and applications. These limitations restrict the potential of the language to participate in modern speech technology, while also restricting research in fields such as corpus linguistics and the Digital Humanities. At the same time, Gaelic has a long written history, is well-described linguistically, and is unusually well-supported in terms of potential NLP training data. For instance, archives such as the School of Scottish Studies hold thousands of digitised recordings of vernacular speech, many of which have been transcribed as paper-based, handwritten manuscripts. In this paper, we describe a project to digitise and recognise a corpus of handwritten narrative transcriptions, with the intention of re-purposing it to develop a Gaelic speech recognition system. @@ -121,7 +121,7 @@ Cipher – Faoi Gheasa: A Game-with-a-Purpose for <fixed-case>I</fixed-case>rish - ElaineUí Dhonnchadha + ElaineUí Dhonnchadha MonicaWard LiangXu 77–84 @@ -176,7 +176,7 @@ LucyEvans WilliamLamb MarkSinclair - BeatriceAlex + BeatriceAlex 110–120 This paper discusses our efforts to develop a full automatic speech recognition (ASR) system for Scottish Gaelic, starting from a point of limited resource. Building ASR technology is important for documenting and revitalising endangered languages; it enables existing resources to be enhanced with automatic subtitles and transcriptions, improves accessibility for users, and, in turn, encourages continued use of the language. In this paper, we explain the many difficulties faced when collecting minority language data for speech recognition. A novel cross-lingual approach to the alignment of training data is used to overcome one such difficulty, and in this way we demonstrate how majority language resources can bootstrap the development of lower-resourced language technology. We use the Kaldi speech recognition toolkit to develop several Gaelic ASR systems, and report a final WER of 26.30%. This is a 9.50% improvement on our original model. 2022.cltw-1.16 diff --git a/data/xml/2022.cmcl.xml b/data/xml/2022.cmcl.xml index 33069e2bc6..baea3c22bc 100644 --- a/data/xml/2022.cmcl.xml +++ b/data/xml/2022.cmcl.xml @@ -5,9 +5,9 @@ Proceedings of the Workshop on Cognitive Modeling and Computational Linguistics EmmanueleChersoni NoraHollenstein - CassandraJacobs + CassandraJacobs YoheiOseki - LaurentPrévot + LaurentPrévot EnricoSantus Association for Computational Linguistics
Dublin, Ireland
@@ -23,7 +23,7 @@ Seeing the advantage: visually grounding word embeddings to better capture human semantic knowledge DannyMerkx - StefanFrank + StefanFrank MirjamErnestus 1-11 Distributional semantic models capture word-level meaning that is useful in many natural language processing tasks and have even been shown to capture cognitive aspects of word meaning. The majority of these models are purely text based, even though the human sensory experience is much richer. In this paper we create visually grounded word embeddings by combining English text and images and compare them to popular text-based methods, to see if visual information allows our model to better capture cognitive aspects of word meaning. Our analysis shows that visually grounded embedding similarities are more predictive of the human reaction times in a large priming experiment than the purely text-based embeddings. The visually grounded embeddings also correlate well with human word similarity ratings. Importantly, in both experiments we show that the grounded embeddings account for a unique portion of explained variance, even when we include text-based embeddings trained on huge corpora. This shows that visual grounding allows our model to capture information that cannot be extracted using text as the only source of information. @@ -60,7 +60,7 @@ Less Descriptive yet Discriminative: Quantifying the Properties of Multimodal Referring Utterances via <fixed-case>CLIP</fixed-case> EceTakmaz SandroPezzelle - RaquelFernández + RaquelFernández 36-42 In this work, we use a transformer-based pre-trained multimodal model, CLIP, to shed light on the mechanisms employed by human speakers when referring to visual entities. In particular, we use CLIP to quantify the degree of descriptiveness (how well an utterance describes an image in isolation) and discriminativeness (to what extent an utterance is effective in picking out a single image among similar images) of human referring utterances within multimodal dialogues. Overall, our results show that utterances become less descriptive over time while their discriminativeness remains unchanged. Through analysis, we propose that this trend could be due to participants relying on the previous mentions in the dialogue history, as well as being able to distill the most discriminative information from the visual context. In general, our study opens up the possibility of using this and similar models to quantify patterns in human data and shed light on the underlying cognitive mechanisms. 2022.cmcl-1.4 @@ -104,7 +104,7 @@ Predicting scalar diversity with context-driven uncertainty over alternatives JenniferHu - RogerLevy + RogerLevy SebastianSchuster 68-74 Scalar implicature (SI) arises when a speaker uses an expression (e.g., “some”) that is semantically compatible with a logically stronger alternative on the same scale (e.g., “all”), leading the listener to infer that they did not intend to convey the stronger meaning. Prior work has demonstrated that SI rates are highly variable across scales, raising the question of what factors determine the SI strength for a particular scale. Here, we test the hypothesis that SI rates depend on the listener’s confidence in the underlying scale, which we operationalize as uncertainty over the distribution of possible alternatives conditioned on the context. We use a T5 model fine-tuned on a text infilling task to estimate this distribution. We find that scale uncertainty predicts human SI rates, measured as entropy over the sampled alternatives and over latent classes among alternatives in sentence embedding space. Furthermore, we do not find a significant effect of the surprisal of the strong scalemate. Our results suggest that pragmatic inferences depend on listeners’ context-driven uncertainty over alternatives. @@ -122,7 +122,7 @@ NesetTan Paul MichaelCorballis PatriciaRiddle - MichaelWitbrock + MichaelWitbrock 75-87 Attention describes cognitive processes that are important to many human phenomena including reading. The term is also used to describe the way in which transformer neural networks perform natural language processing. While attention appears to be very different under these two contexts, this paper presents an analysis of the correlations between transformer attention and overt human attention during reading tasks. An extensive analysis of human eye tracking datasets showed that the dwell times of human eye movements were strongly correlated with the attention patterns occurring in the early layers of pre-trained transformers such as BERT. Additionally, the strength of a correlation was not related to the number of parameters within a transformer. This suggests that something about the transformers’ architecture determined how closely the two measures were correlated. 2022.cmcl-1.9 @@ -133,7 +133,7 @@ About Time: Do Transformers Learn Temporal Verbal Aspect? EleniMetheniti - TimVan De Cruys + TimVan De Cruys NabilHathout 88-101 Aspect is a linguistic concept that describes how an action, event, or state of a verb phrase is situated in time. In this paper, we explore whether different transformer models are capable of identifying aspectual features. We focus on two specific aspectual features: telicity and duration. Telicity marks whether the verb’s action or state has an endpoint or not (telic/atelic), and duration denotes whether a verb expresses an action (dynamic) or a state (stative). These features are integral to the interpretation of natural language, but also hard to annotate and identify with NLP methods. We perform experiments in English and French, and our results show that transformer models adequately capture information on telicity and duration in their vectors, even in their non-finetuned forms, but are somewhat biased with regard to verb tense and word order. @@ -195,7 +195,7 @@ Team <fixed-case>ÚFAL</fixed-case> at <fixed-case>CMCL</fixed-case> 2022 Shared Task: Figuring out the correct recipe for predicting Eye-Tracking features using Pretrained Language Models SunitBhattacharya RishuKumar - OndrejBojar + OndrejBojar 130-135 Eye-Tracking data is a very useful source of information to study cognition and especially language comprehension in humans. In this paper, we describe our systems for the CMCL 2022 shared task on predicting eye-tracking information. We describe our experiments withpretrained models like BERT and XLM and the different ways in which we used those representations to predict four eye-tracking features. Along with analysing the effect of using two different kinds of pretrained multilingual language models and different ways of pooling the token-level representations, we also explore how contextual information affects the performance of the systems. Finally, we also explore if factors like augmenting linguistic information affect the predictions. Our submissions achieved an average MAE of 5.72 and ranked 5th in the shared task. The average MAE showed further reduction to 5.25 in post task evaluation. 2022.cmcl-1.15 diff --git a/data/xml/2022.cmlc.xml b/data/xml/2022.cmlc.xml index ec60176885..bc728287c0 100644 --- a/data/xml/2022.cmlc.xml +++ b/data/xml/2022.cmlc.xml @@ -7,7 +7,7 @@ AdrienBarbaresi SimonClematide MarcKupietz - HaraldLüngen + HaraldLüngen European Language Resources Association
Marseille, France
June @@ -23,7 +23,7 @@ Challenges in Creating a Representative Corpus of <fixed-case>R</fixed-case>omanian Micro-Blogging Text VasilePais MariaMitrofan - VerginicaBarbu Mititelu + VerginicaBarbu Mititelu ElenaIrimia RoxanaMicu Carol LucaGasan diff --git a/data/xml/2022.codi.xml b/data/xml/2022.codi.xml index 0ff783b055..f0cb0b8e1b 100644 --- a/data/xml/2022.codi.xml +++ b/data/xml/2022.codi.xml @@ -23,7 +23,7 @@ <fixed-case>KOJAK</fixed-case>: A New Corpus for Studying <fixed-case>G</fixed-case>erman Discourse Particle ja AdilSoubki - OwenRambow + OwenRambow ChongKang 1–6 In German, ja can be used as a discourse particle to indicate that a proposition, according to the speaker, is believed by both the speaker and audience. We use this observation to create KoJaK, a distantly-labeled English dataset derived from Europarl for studying when a speaker believes a statement to be common ground. This corpus is then analyzed to identify lexical choices in English that correspond with German ja. Finally, we perform experiments on the dataset to predict if an English clause corresponds to a German clause containing ja and achieve an F-measure of 75.3% on a balanced test corpus. @@ -55,7 +55,7 @@ Evaluating Discourse Cohesion in Pre-trained Language Models JieHe WanqiuLong - DeyiXiong + DeyiXiong 28–34 Large pre-trained neural models have achieved remarkable success in natural language process (NLP), inspiring a growing body of research analyzing their ability from different aspects. In this paper, we propose a test suite to evaluate the cohesive ability of pre-trained language models. The test suite contains multiple cohesion phenomena between adjacent and non-adjacent sentences. We try to compare different pre-trained language models on these phenomena and analyze the experimental results,hoping more attention can be given to discourse cohesion in the future. The built discourse cohesion test suite will be publicly available at https://github.com/probe2/discourse_cohesion. 2022.codi-1.4 @@ -66,7 +66,7 @@ AndrewShen FajriKoto Jey HanLau - TimothyBaldwin + TimothyBaldwin 35–41 We propose a novel unconstrained bottom-up approach for rhetorical discourse parsing based on sequence labelling of adjacent pairs of discourse units (DUs), based on the framework of Koto et al. (2021). We describe the unique training requirements of an unconstrained parser, and explore two different training procedures: (1) fixed left-to-right; and (2) random order in tree construction. Additionally, we introduce a novel dynamic oracle for unconstrained bottom-up parsing. Our proposed parser achieves competitive results for bottom-up rhetorical discourse parsing. 2022.codi-1.5 @@ -97,8 +97,8 @@ ZlataKikteva KamilaGorska WassilikiSiskou - AnnetteHautli-Janisz - ChrisReed + AnnetteHautli-Janisz + ChrisReed 54–63 Building on the recent results of a study into the roles that are played by questions in argumentative dialogue (Hautli-Janisz et al.,2022a), we expand the analysis to investigate a newly released corpus that constitutes the largest extant corpus of closely annotated debate. Questions play a critical role in driving dialogical discourse forward; in combative or critical discursive environments, they not only provide a range of discourse management techniques, they also scaffold the semantic structure of the positions that interlocutors develop. The boundaries, however, between providing substantive answers to questions, merely responding to questions, and evading questions entirely, are fuzzy and the way in which answers, responses and evasions affect the subsequent development of dialogue and argumentation structure are poorly understood. In this paper, we explore how questions have ramifications on the large-scale structure of a debate using as our substrate the BBC television programme Question Time, the foremost topical debate show in the UK. Analysis of the data demonstrates not only that questioning plays a particularly prominent role in such debate, but also that its repercussions can reverberate through a discourse. 2022.codi-1.8 @@ -107,7 +107,7 @@ Shallow Discourse Parsing for Open Information Extraction and Text Simplification ChristinaNiklaus - AndréFreitas + AndréFreitas SiegfriedHandschuh 64–76 We present a discourse-aware text simplification (TS) approach that recursively splits and rephrases complex English sentences into a semantic hierarchy of simplified sentences. Using a set of linguistically principled transformation patterns, sentences are converted into a hierarchical representation in the form of core sentences and accompanying contexts that are linked via rhetorical relations. As opposed to previously proposed sentence splitting approaches, which commonly do not take into account discourse-level aspects, our TS approach preserves the semantic relationship of the decomposed constituents in the output. A comparative analysis with the annotations contained in RST-DT shows that we capture the contextual hierarchy between the split sentences with a precision of 89% and reach an average precision of 69% for the classification of the rhetorical relations that hold between them. Moreover, an integration into state-of-the-art Open Information Extraction (IE) systems reveals that when applying our TS approach as a pre-processing step, the generated relational tuples are enriched with additional meta information, resulting in a novel lightweight semantic representation for the task of Open IE. @@ -129,7 +129,7 @@ MathildeVeron OlivierGalibert GuillaumeBernard - SophieRosset + SophieRosset 86–91 Dialog state tracking (DST) is a core step for task-oriented dialogue systems aiming to track the user’s current goal during a dialogue. Recently a special focus has been put on applying existing DST models to new domains, in other words performing zero-shot cross-domain transfer. While recent state-of-the-art models leverage large pre-trained language models, no work has been made on understanding and improving the results of first developed zero-shot models like SUMBT. In this paper, we thus propose to improve SUMBT zero-shot results on MultiWOZ by using attention modulation during inference. This method improves SUMBT zero-shot results significantly on two domains and does not worsen the initial performance with the great advantage of needing no additional training. 2022.codi-1.11 @@ -156,12 +156,12 @@ Proceedings of the CODI-CRAC 2022 Shared Task on Anaphora, Bridging, and Discourse Deixis in Dialogue JuntaoYu SopanKhosla - RameshManuvinakurike - LoriLevin + RameshManuvinakurike + LoriLevin VincentNg - MassimoPoesio + MassimoPoesio MichaelStrube - CarolynRose + CarolynRose Association for Computational Linguistics
Gyeongju, Republic of Korea
October diff --git a/data/xml/2022.cogalex.xml b/data/xml/2022.cogalex.xml index 71ffea65f6..fff8ad80b8 100644 --- a/data/xml/2022.cogalex.xml +++ b/data/xml/2022.cogalex.xml @@ -55,7 +55,7 @@ <fixed-case>CAT</fixed-case> <fixed-case>M</fixed-case>any<fixed-case>N</fixed-case>ames: A New Dataset for Object Naming in <fixed-case>C</fixed-case>atalan MarDomínguez Orfila MaiteMelero Nogués - GemmaBoleda Torrent + GemmaBoleda Torrent 31–36 Object Naming is an important task within the field of Language and Vision that consists of generating a correct and appropriate name for an object given an image. The ManyNames dataset uses real-world human annotated images with multiple labels, instead of just one. In this work, we describe the adaptation of this dataset (originally in English) to Catalan, by (i) machine-translating the English labels and (ii) collecting human annotations for a subset of the original corpus and comparing both resources. Analyses reveal divergences in the lexical variation of the two sets showing potential problems of directly translated resources, particularly when there is no resource to a proper context, which in this case is conveyed by the image. The analysis also points to the impact of cultural factors in the naming task, which should be accounted for in future cross-lingual naming tasks. 2022.cogalex-1.4 @@ -76,7 +76,7 @@ Putting <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et’s Dictionary Examples in the Context of Definition Modelling: An Empirical Analysis FatemahAlmeman - LuisEspinosa Anke + LuisEspinosa Anke 42–48 Definition modeling is the task to generate a valid definition for a given input term. This relatively novel task has been approached either with no context (i.e., given a word embedding alone) and, more recently, as word-in-context modeling. Despite their success, most works make little to no distinction between resources and their specific features (e.g., type and style of definitions, or quality of examples) when used for training. Given the high diversity lexicographic resources exhibit in terms of topic coverage, style and formal structure, it is desirable for downstream definition modeling to better understand which of them are better suited for the task. In this paper, we propose an empirical evaluation of the well-known lexical database WordNet, and specifically, its dictionary examples. We evaluate them both directly, by matching them against criteria for good dictionary writing, and indirectly, in the task of definition modeling. Our results suggest that WordNet’s dictionary examples could be improved by extending them in length, and incorporating prototypicality. 2022.cogalex-1.6 diff --git a/data/xml/2022.coling.xml b/data/xml/2022.coling.xml index ab0c9848ed..e3afd6ae0f 100644 --- a/data/xml/2022.coling.xml +++ b/data/xml/2022.coling.xml @@ -3,12 +3,12 @@ Proceedings of the 29th International Conference on Computational Linguistics - NicolettaCalzolari + NicolettaCalzolari Chu-RenHuang HansaemKim - JamesPustejovsky + JamesPustejovsky LeoWanner - Key-SunChoi + Key-SunChoi Pum-MoRyu Hsin-HsiChen LuciaDonatelli @@ -17,7 +17,7 @@ PatriziaPaggio NianwenXue SeokhwanKim - YounggyunHahm + YounggyunHahm ZhongHe Tony KyungilLee EnricoSantus @@ -77,10 +77,10 @@ Measuring Morphological Fusion Using Partial Information Decomposition MichaelaSocolof - Jacob LouisHoover + Jacob LouisHoover RichardFutrell AlessandroSordoni - Timothy J.O’Donnell + Timothy J.O’Donnell 44–54 Morphological systems across languages vary when it comes to the relation between form and meaning. In some languages, a single meaning feature corresponds to a single morpheme, whereas in other languages, multiple meaning features are bundled together into one morpheme. The two types of languages have been called agglutinative and fusional, respectively, but this distinction does not capture the graded nature of the phenomenon. We provide a mathematically precise way of characterizing morphological systems using partial information decomposition, a framework for decomposing mutual information into three components: unique, redundant, and synergistic information. We show that highly fusional languages are characterized by high levels of synergy. 2022.coling-1.5 @@ -172,7 +172,7 @@ Character Jacobian: Modeling <fixed-case>C</fixed-case>hinese Character Meanings with Deep Learning Model Yu-HsiangTseng - Shu-KaiHsieh + Shu-KaiHsieh 152–162 Compounding, a prevalent word-formation process, presents an interesting challenge for computational models. Indeed, the relations between compounds and their constituents are often complicated. It is particularly so in Chinese morphology, where each character is almost simultaneously bound and free when treated as a morpheme. To model such word-formation process, we propose the Notch (NOnlinear Transformation of CHaracter embeddings) model and the character Jacobians. The Notch model first learns the non-linear relations between the constituents and words, and the character Jacobians further describes the character’s role in each word. In a series of experiments, we show that the Notch model predicts the embeddings of the real words from their constituents but helps account for the behavioral data of the pseudowords. Moreover, we also demonstrated that character Jacobians reflect the characters’ meanings. Taken together, the Notch model and character Jacobians may provide a new perspective on studying the word-formation process and morphology with modern deep learning. 2022.coling-1.14 @@ -193,10 +193,10 @@ Exploring Semantic Spaces for Detecting Clustering and Switching in Verbal Fluency ÖzgeAlacam - SimeonSchüz + SimeonSchüz MartinWegrzyn JohannaKißler - SinaZarrieß + SinaZarrieß 178–191 In this work, we explore the fitness of various word/concept representations in analyzing an experimental verbal fluency dataset providing human responses to 10 different category enumeration tasks. Based on human annotations of so-called clusters and switches between sub-categories in the verbal fluency sequences, we analyze whether lexical semantic knowledge represented in word embedding spaces (GloVe, fastText, ConceptNet, BERT) is suitable for detecting these conceptual clusters and switches within and across different categories. Our results indicate that ConceptNet embeddings, a distributional semantics method enriched with taxonomical relations, outperforms other semantic representations by a large margin. Moreover, category-specific analysis suggests that individual thresholds per category are more suited for the analysis of clustering and switching in particular embedding sub-space instead of a one-fits-all cross-category solution. The results point to interesting directions for future work on probing word embedding models on the verbal fluency task. 2022.coling-1.16 @@ -255,7 +255,7 @@ NurulLubis MichaelHeck ShutongFeng - MilicaGašić + MilicaGašić 266–284 Continual learning is one of the key components of human learning and a necessary requirement of artificial intelligence. As dialogue can potentially span infinitely many topics and tasks, a task-oriented dialogue system must have the capability to continually learn, dynamically adapting to new challenges while preserving the knowledge it already acquired. Despite the importance, continual reinforcement learning of the dialogue policy has remained largely unaddressed. The lack of a framework with training protocols, baseline models and suitable metrics, has so far hindered research in this direction. In this work we fill precisely this gap, enabling research in dialogue policy optimisation to go from static to dynamic learning. We provide a continual learning algorithm, baseline architectures and metrics for assessing continual learning models. Moreover, we propose the dynamic dialogue policy transformer (DDPT), a novel dynamic architecture that can integrate new knowledge seamlessly, is capable of handling large state spaces and obtains significant zero-shot performance when being exposed to unseen domains, without any growth in network parameter size. We validate the strengths of DDPT in simulation with two user simulators as well as with humans. 2022.coling-1.21 @@ -338,7 +338,7 @@ Schema Encoding for Transferable Dialogue State Tracking HyunminJeon - Gary GeunbaeLee + Gary GeunbaeLee 355–366 Dialogue state tracking (DST) is an essential sub-task for task-oriented dialogue systems. Recent work has focused on deep neural models for DST. However, the neural models require a large dataset for training. Furthermore, applying them to another domain needs a new dataset because the neural models are generally trained to imitate the given dataset. In this paper, we propose Schema Encoding for Transferable Dialogue State Tracking (SET-DST), which is a neural DST method for effective transfer to new domains. Transferable DST could assist developments of dialogue systems even with few dataset on target domains. We use a schema encoder not just to imitate the dataset but to comprehend the schema of the dataset. We aim to transfer the model to new domains by encoding new schemas and using them for DST on multi-domain settings. As a result, SET-DST improved the joint accuracy by 1.46 points on MultiWOZ 2.1. 2022.coling-1.28 @@ -403,7 +403,7 @@ JunyoungSon JinsungKim JungwooLim - HeuiseokLim + HeuiseokLim 412–423 The dialogue-based relation extraction (DialogRE) task aims to predict the relations between argument pairs that appear in dialogue. Most previous studies utilize fine-tuning pre-trained language models (PLMs) only with extensive features to supplement the low information density of the dialogue by multiple speakers. To effectively exploit inherent knowledge of PLMs without extra layers and consider scattered semantic cues on the relation between the arguments, we propose a Guiding model with RelAtional Semantics using Prompt (GRASP). We adopt a prompt-based fine-tuning approach and capture relational semantic clues of a given dialogue with 1) an argument-aware prompt marker strategy and 2) the relational clue detection task. In the experiments, GRASP achieves state-of-the-art performance in terms of both F1 and F1c scores on a DialogRE dataset even though our method only leverages PLMs without adding any extra layers. 2022.coling-1.33 @@ -464,7 +464,7 @@ QixianZhou JinlanFu Min-YenKan - See-KiongNg + See-KiongNg 471–484 Knowledge-grounded dialog systems need to incorporate smooth transitions among knowledge selected for generating responses, to ensure that dialog flows naturally. For document-grounded dialog systems, the inter- and intra-document knowledge relations can be used to model such conversational flows. We develop a novel Multi-Document Co-Referential Graph (Coref-MDG) to effectively capture the inter-document relationships based on commonsense and similarity and the intra-document co-referential structures of knowledge segments within the grounding documents. We propose CorefDiffs, a Co-referential and Differential flow management method, to linearize the static Coref-MDG into conversational sequence logic. CorefDiffs performs knowledge selection by accounting for contextual graph structures and the knowledge difference sequences. CorefDiffs significantly outperforms the state-of-the-art by 9.5%, 7.4% and 8.2% on three public benchmarks. This demonstrates that the effective modeling of co-reference and knowledge difference for dialog flows are critical for transitions in document-grounded conversation. 2022.coling-1.38 @@ -474,7 +474,7 @@ <fixed-case>S</fixed-case>el<fixed-case>F</fixed-case>-Eval: Self-supervised Fine-grained Dialogue Evaluation LongxuanMa ZiyuZhuang - WeinanZhang + WeinanZhang MingdaLi TingLiu 485–495 @@ -487,7 +487,7 @@ MaximeDe Bruyn EhsanLotfi JeskaBuhmann - WalterDaelemans + WalterDaelemans 496–504 Automatic evaluation of open-domain dialogs remains an unsolved problem. Existing methods do not correlate strongly with human annotations. In this paper, we present a new automated evaluation method based on the use of follow-ups. We measure the probability that a language model will continue the conversation with a fixed set of follow-ups (e.g. not really relevant here, what are you trying to say?). When compared against twelve existing methods, our new evaluation achieves the highest correlation with human evaluations. 2022.coling-1.40 @@ -535,7 +535,7 @@ Using Multi-Encoder Fusion Strategies to Improve Personalized Response Selection SouvikDas SougataSaha - Rohini K.Srihari + Rohini K.Srihari 532–541 Personalized response selection systems are generally grounded on persona. However, a correlation exists between persona and empathy, which these systems do not explore well. Also, when a contradictory or off-topic response is selected, faithfulness to the conversation context plunges. This paper attempts to address these issues by proposing a suite of fusion strategies that capture the interaction between persona, emotion, and entailment information of the utterances. Ablation studies on the Persona-Chat dataset show that incorporating emotion and entailment improves the accuracy of response selection. We combine our fusion strategies and concept-flow encoding to train a BERT-based model which outperforms the previous methods by margins larger than 2.3% on original personas and 1.9% on revised personas in terms of hits@1 (top-1 accuracy), achieving a new state-of-the-art performance on the Persona-Chat dataset 2022.coling-1.44 @@ -570,7 +570,7 @@ <fixed-case>ET</fixed-case>5: A Novel End-to-end Framework for Conversational Machine Reading Comprehension XiaoZhang - HeyanHuang + HeyanHuang ZewenChi Xian-LingMao 570–579 @@ -580,12 +580,12 @@ <fixed-case>C</fixed-case>o<fixed-case>HS</fixed-case>-<fixed-case>CQG</fixed-case>: Context and History Selection for Conversational Question Generation - Xuan LongDo + Xuan LongDo BoweiZou LiangmingPan - Nancy F.Chen - ShafiqJoty - Ai TiAw + Nancy F.Chen + ShafiqJoty + Ai TiAw 580–591 Conversational question generation (CQG) serves as a vital task for machines to assist humans, such as interactive reading comprehension, through conversations. Compared to traditional single-turn question generation (SQG), CQG is more challenging in the sense that the generated question is required not only to be meaningful, but also to align with the provided conversation. Previous studies mainly focus on how to model the flow and alignment of the conversation, but do not thoroughly study which parts of the context and history are necessary for the model. We believe that shortening the context and history is crucial as it can help the model to optimise more on the conversational alignment property. To this end, we propose CoHS-CQG, a two-stage CQG framework, which adopts a novel CoHS module to shorten the context and history of the input. In particular, it selects the top-p sentences and history turns by calculating the relevance scores of them. Our model achieves state-of-the-art performances on CoQA in both the answer-aware and answer-unaware settings. 2022.coling-1.48 @@ -631,9 +631,9 @@ Towards Multi-label Unknown Intent Detection YawenOuyang ZhenWu - XinyuDai + XinyuDai ShujianHuang - JiajunChen + JiajunChen 626–635 Multi-class unknown intent detection has made remarkable progress recently. However, it has a strong assumption that each utterance has only one intent, which does not conform to reality because utterances often have multiple intents. In this paper, we propose a more desirable task, multi-label unknown intent detection, to detect whether the utterance contains the unknown intent, in which each utterance may contain multiple intents. In this task, the unique utterances simultaneously containing known and unknown intents make existing multi-class methods easy to fail. To address this issue, we propose an intuitive and effective method to recognize whether All Intents contained in the utterance are Known (AIK). Our high-level idea is to predict the utterance’s intent number, then check whether the utterance contains the same number of known intents. If the number of known intents is less than the number of intents, it implies that the utterance also contains unknown intents. We benchmark AIK over existing methods, and empirical results suggest that our method obtains state-of-the-art performances. For example, on the MultiWOZ 2.3 dataset, AIK significantly reduces the FPR95 by 12.25% compared to the best baseline. 2022.coling-1.52 @@ -792,7 +792,7 @@ Investigating the Performance of Transformer-Based <fixed-case>NLI</fixed-case> Models on Presuppositional Inferences JadKabbara - Jackie Chi KitCheung + Jackie Chi KitCheung 779–785 Presuppositions are assumptions that are taken for granted by an utterance, and identifying them is key to a pragmatic interpretation of language. In this paper, we investigate the capabilities of transformer models to perform NLI on cases involving presupposition. First, we present simple heuristics to create alternative “contrastive” test cases based on the ImpPres dataset and investigate the model performance on those test cases. Second, to better understand how the model is making its predictions, we analyze samples from sub-datasets of ImpPres and examine model performance on them. Overall, our findings suggest that NLI-trained transformer models seem to be exploiting specific structural and lexical cues as opposed to performing some kind of pragmatic reasoning. 2022.coling-1.65 @@ -803,7 +803,7 @@ JohnMurzaku PeterZeng MagdalenaMarkowska - OwenRambow + OwenRambow 786–796 We present a corrected version of a subset of the FactBank data set. Previously published results on FactBank are no longer valid. We perform experiments on FactBank using multiple training paradigms, data smoothing techniques, and polarity classifiers. We argue that f-measure is an important alternative evaluation metric for factuality. We provide new state-of-the-art results for four corpora including FactBank. We perform an error analysis on Factbank combined with two similar corpora. 2022.coling-1.66 @@ -883,7 +883,7 @@ Dialo-<fixed-case>AP</fixed-case>: A Dependency Parsing Based Argument Parser for Dialogues SougataSaha SouvikDas - Rohini K.Srihari + Rohini K.Srihari 887–901 While neural approaches to argument mining (AM) have advanced considerably, most of the recent work has been limited to parsing monologues. With an urgent interest in the use of conversational agents for broader societal applications, there is a need to advance the state-of-the-art in argument parsers for dialogues. This enables progress towards more purposeful conversations involving persuasion, debate and deliberation. This paper discusses Dialo-AP, an end-to-end argument parser that constructs argument graphs from dialogues. We formulate AM as dependency parsing of elementary and argumentative discourse units; the system is trained using extensive pre-training and curriculum learning comprising nine diverse corpora. Dialo-AP is capable of generating argument graphs from dialogues by performing all sub-tasks of AM. Compared to existing state-of-the-art baselines, Dialo-AP achieves significant improvements across all tasks, which is further validated through rigorous human evaluation. 2022.coling-1.74 @@ -905,7 +905,7 @@ YaxinFan PeifengLi FangKong - QiaomingZhu + QiaomingZhu 912–921 Conversational discourse parsing aims to construct an implicit utterance dependency tree to reflect the turn-taking in a multi-party conversation. Existing works are generally divided into two lines: graph-based and transition-based paradigms, which perform well for short-distance and long-distance dependency links, respectively. However, there is no study to consider the advantages of both paradigms to facilitate conversational discourse parsing. As a result, we propose a distance-aware multi-task framework DAMT that incorporates the strengths of transition-based paradigm to facilitate the graph-based paradigm from the encoding and decoding process. To promote multi-task learning on two paradigms, we first introduce an Encoding Interactive Module (EIM) to enhance the flow of semantic information between both two paradigms during the encoding step. And then we apply a Distance-Aware Graph Convolutional Network (DAGCN) in the decoding process, which can incorporate the different-distance dependency links predicted by the transition-based paradigm to facilitate the decoding of the graph-based paradigm. The experimental results on the datasets STAC and Molweni show that our method can significantly improve the performance of the SOTA graph-based paradigm on long-distance dependency links. 2022.coling-1.76 @@ -916,7 +916,7 @@ ArmanKazmi SidharthRanjan ArpitSharma - RajakrishnanRajkumar + RajakrishnanRajkumar 922–937 This work deploys linguistically motivated features to classify paragraph-level text into fiction and non-fiction genre using a logistic regression model and infers lexical and syntactic properties that distinguish the two genres. Previous works have focused on classifying document-level text into fiction and non-fiction genres, while in this work, we deal with shorter texts which are closer to real-world applications like sentiment analysis of tweets. Going beyond simple POS tag ratios proposed in Qureshi et al.(2019) for document-level classification, we extracted multiple linguistically motivated features belonging to four categories: Lexical features, POS ratio features, Syntactic features and Raw features. For the task of short-text classification, a model containing 28 best-features (selected via Recursive feature elimination with cross-validation; RFECV) confers an accuracy jump of 15.56 % over a baseline model consisting of 2 POS-ratio features found effective in previous work (cited above). The efficacy of the above model containing a linguistically motivated feature set also transfers over to another dataset viz, Baby BNC corpus. We also compared the classification accuracy of the logistic regression model with two deep-learning models. A 1D CNN model gives an increase of 2% accuracy over the logistic Regression classifier on both corpora. And the BERT-base-uncased model gives the best classification accuracy of 97% on Brown corpus and 98% on Baby BNC corpus. Although both the deep learning models give better results in terms of classification accuracy, the problem of interpreting these models remains unsolved. In contrast, regression model coefficients revealed that fiction texts tend to have more character-level diversity and have lower lexical density (quantified using content-function word ratios) compared to non-fiction texts. Moreover, subtle differences in word order exist between the two genres, i.e., in fiction texts Verbs precede Adverbs (inter-alia). 2022.coling-1.77 @@ -935,7 +935,7 @@ Hierarchical Information Matters: Text Classification via Tree Based Graph Neural Network - ChongZhang + ChongZhang HeZhu XingyuPeng JunranWu @@ -962,7 +962,7 @@ Community Topic: Topic Model Inference by Consecutive Word Community Discovery EricAustin - Osmar R.Zaïane + Osmar R.Zaïane ChristineLargeron 971–983 We present our novel, hyperparameter-free topic modelling algorithm, Community Topic. Our algorithm is based on mining communities from term co-occurrence networks. We empirically evaluate and compare Community Topic with Latent Dirichlet Allocation and the recently developed top2vec algorithm. We find that Community Topic runs faster than the competitors and produces topics that achieve higher coherence scores. Community Topic can discover coherent topics at various scales. The network representation used by Community Topic results in a natural relationship between topics and a topic hierarchy. This allows sub- and super-topics to be found on demand. These features make Community Topic the ideal tool for downstream applications such as applied research and conversational agents. @@ -1016,7 +1016,7 @@ <fixed-case>CONCRETE</fixed-case>: Improving Cross-lingual Fact-checking with Cross-lingual Retrieval Kung-HsiangHuang - ChengXiangZhai + ChengXiangZhai HengJi 1024–1035 Fact-checking has gained increasing attention due to the widespread of falsified information. Most fact-checking approaches focus on claims made in English only due to the data scarcity issue in other languages. The lack of fact-checking datasets in low-resource languages calls for an effective cross-lingual transfer technique for fact-checking. Additionally, trustworthy information in different languages can be complementary and helpful in verifying facts. To this end, we present the first fact-checking framework augmented with cross-lingual retrieval that aggregates evidence retrieved from multiple languages through a cross-lingual retriever. Given the absence of cross-lingual information retrieval datasets with claim-like queries, we train the retriever with our proposed Cross-lingual Inverse Cloze Task (X-ICT), a self-supervised algorithm that creates training instances by translating the title of a passage. The goal for X-ICT is to learn cross-lingual retrieval in which the model learns to identify the passage corresponding to a given translated title. On the X-Fact dataset, our approach achieves 2.23% absolute F1 improvement in the zero-shot cross-lingual setup over prior systems. The source code and data are publicly available at https://github.com/khuangaf/CONCRETE. @@ -1041,7 +1041,7 @@ Attribute Injection for Pretrained Language Models: A New Benchmark and an Efficient Method - Reinald KimAmplayo + Reinald KimAmplayo Kang MinYoo Sang-WooLee 1051–1064 @@ -1053,11 +1053,11 @@ Towards Robust Neural Retrieval with Source Domain Synthetic Pre-Finetuning RevanthGangi Reddy VikasYadav - Md ArafatSultan + Md ArafatSultan MartinFranz VittorioCastelli HengJi - AvirupSil + AvirupSil 1065–1070 Research on neural IR has so far been focused primarily on standard supervised learning settings, where it outperforms traditional term matching baselines. Many practical use cases of such models, however, may involve previously unseen target domains. In this paper, we propose to improve the out-of-domain generalization of Dense Passage Retrieval (DPR) - a popular choice for neural IR - through synthetic data augmentation only in the source domain. We empirically show that pre-finetuning DPR with additional synthetic data in its source domain (Wikipedia), which we generate using a fine-tuned sequence-to-sequence generator, can be a low-cost yet effective first step towards its generalization. Across five different test sets, our augmented model shows more robust performance than DPR in both in-domain and zero-shot out-of-domain evaluation. 2022.coling-1.89 @@ -1097,7 +1097,7 @@ SoumitraGhosh Dhirendra KumarMaurya AsifEkbal - PushpakBhattacharyya + PushpakBhattacharyya 1098–1105 The World Health Organization has emphasised the need of stepping up suicide prevention efforts to meet the United Nation’s Sustainable Development Goal target of 2030 (Goal 3: Good health and well-being). We address the challenging task of personality subtyping from suicide notes. Most research on personality subtyping has relied on statistical analysis and feature engineering. Moreover, state-of-the-art transformer models in the automated personality subtyping problem have received relatively less attention. We develop a novel EMotion-assisted PERSONAlity Detection Framework (EM-PERSONA). We annotate the benchmark CEASE-v2.0 suicide notes dataset with personality traits across four dichotomies: Introversion (I)-Extraversion (E), Intuition (N)-Sensing (S), Thinking (T)-Feeling (F), Judging (J)–Perceiving (P). Our proposed method outperforms all baselines on comprehensive evaluation using multiple state-of-the-art systems. Across the four dichotomies, EM-PERSONA improved accuracy by 2.04%, 3.69%, 4.52%, and 3.42%, respectively, over the highest-performing single-task systems. 2022.coling-1.93 @@ -1116,7 +1116,7 @@ Exploring Label Hierarchy in a Generative Way for Hierarchical Text Classification WeiHuang - ChenLiu + ChenLiu BoXiao YihuaZhao ZhaomingPan @@ -1132,7 +1132,7 @@ <fixed-case>M</fixed-case>u<fixed-case>S</fixed-case>e<fixed-case>CLIR</fixed-case>: A Multiple Senses and Cross-lingual Information Retrieval Dataset Wing YanLi JulieWeeds - DavidWeir + DavidWeir 1128–1135 This paper addresses a deficiency in existing cross-lingual information retrieval (CLIR) datasets and provides a robust evaluation of CLIR systems’ disambiguation ability. CLIR is commonly tackled by combining translation and traditional IR. Due to translation ambiguity, the problem of ambiguity is worse in CLIR than in monolingual IR. But existing auto-generated CLIR datasets are dominated by searches for named entity mentions, which does not provide a good measure for disambiguation performance, as named entity mentions can often be transliterated across languages and tend not to have multiple translations. Therefore, we introduce a new evaluation dataset (MuSeCLIR) to address this inadequacy. The dataset focusses on polysemous common nouns with multiple possible translations. MuSeCLIR is constructed from multilingual Wikipedia and supports searches on documents written in European (French, German, Italian) and Asian (Chinese, Japanese) languages. We provide baseline statistical and neural model results on MuSeCLIR which show that MuSeCLIR has a higher requirement on the ability of systems to disambiguate query terms. 2022.coling-1.96 @@ -1183,8 +1183,8 @@ YeonSeonwoo SeunghyunYoon FranckDernoncourt - TrungBui - AliceOh + TrungBui + AliceOh 1169–1178 Domain-specific documents cover terminologies and specialized knowledge. This has been the main challenge of domain-specific document retrieval systems. Previous approaches propose domain-adaptation and transfer learning methods to alleviate this problem. However, these approaches still follow the same document representation method in previous approaches; a document is embedded into a single vector. In this study, we propose VKGDR. VKGDR represents a given corpus into a graph of entities and their relations (known as a virtual knowledge graph) and computes the relevance between queries and documents based on the graph representation. We conduct three experiments 1) domain-specific document retrieval, 2) comparison of our virtual knowledge graph construction method with previous approaches, and 3) ablation study on each component of our virtual knowledge graph. From the results, we see that unsupervised VKGDR outperforms baselines in a zero-shot setting and even outperforms fully-supervised bi-encoder. We also verify that our virtual knowledge graph construction method results in better retrieval performance than previous approaches. 2022.coling-1.101 @@ -1245,7 +1245,7 @@ From Polarity to Intensity: Mining Morality from Semantic Space ChunxuZhao - PengyuanLiu + PengyuanLiu DongYu 1250–1262 Most works on computational morality focus on moral polarity recognition, i.e., distinguishing right from wrong. However, a discrete polarity label is not informative enough to reflect morality as it does not contain any degree or intensity information. Existing approaches to compute moral intensity are limited to word-level measurement and heavily rely on human labelling. In this paper, we propose MoralScore, a weakly-supervised framework that can automatically measure moral intensity from text. It only needs moral polarity labels, which are more robust and easier to acquire. Besides, the framework can capture latent moral information not only from words but also from sentence-level semantics which can provide a more comprehensive measurement. To evaluate the performance of our method, we introduce a set of evaluation metrics and conduct extensive experiments. Results show that our method achieves good performance on both automatic and human evaluations. @@ -1288,7 +1288,7 @@ Debiasing Isn’t Enough! – on the Effectiveness of Debiasing <fixed-case>MLM</fixed-case>s and Their Social Biases in Downstream Tasks MasahiroKaneko DanushkaBollegala - NaoakiOkazaki + NaoakiOkazaki 1299–1310 We study the relationship between task-agnostic intrinsic and task-specific extrinsic social bias evaluation measures for MLMs, and find that there exists only a weak correlation between these two types of evaluation measures. Moreover, we find that MLMs debiased using different methods still re-learn social biases during fine-tuning on downstream tasks. We identify the social biases in both training instances as well as their assigned labels as reasons for the discrepancy between intrinsic and extrinsic bias evaluation measurements. Overall, our findings highlight the limitations of existing MLM bias evaluation measures and raise concerns on the deployment of MLMs in downstream applications using those measures. 2022.coling-1.111 @@ -1305,7 +1305,7 @@ A Study of Implicit Bias in Pretrained Language Models against People with Disabilities - Pranav NarayananVenkit + Pranav NarayananVenkit MukundSrinath ShomirWilson 1324–1332 @@ -1418,7 +1418,7 @@ Self-Supervised Intermediate Fine-Tuning of Biomedical Language Models for Interpreting Patient Case Descriptions IsraaAlghanmi - LuisEspinosa-Anke + LuisEspinosa-Anke StevenSchockaert 1432–1441 Interpreting patient case descriptions has emerged as a challenging problem for biomedical NLP, where the aim is typically to predict diagnoses, to recommended treatments, or to answer questions about cases more generally. Previous work has found that biomedical language models often lack the knowledge that is needed for such tasks. In this paper, we aim to improve their performance through a self-supervised intermediate fine-tuning strategy based on PubMed abstracts. Our solution builds on the observation that many of these abstracts are case reports, and thus essentially patient case descriptions. As a general strategy, we propose to fine-tune biomedical language models on the task of predicting masked medical concepts from such abstracts. We find that the success of this strategy crucially depends on the selection of the medical concepts to be masked. By ensuring that these concepts are sufficiently salient, we can substantially boost the performance of biomedical language models, achieving state-of-the-art results on two benchmarks. @@ -1438,7 +1438,7 @@ Can We Guide a Multi-Hop Reasoning Language Model to Incrementally Learn at Each Single-Hop? JesusLovon-Melgarejo - Jose G.Moreno + Jose G.Moreno RomaricBesançon OlivierFerret LyndaTamine @@ -1486,7 +1486,7 @@ JiayiChen Xiao-YuGuo Yuan-FangLi - GholamrezaHaffari + GholamrezaHaffari 1502–1510 Answering complex questions that require multi-step multi-type reasoning over raw text is challenging, especially when conducting numerical reasoning. Neural Module Networks (NMNs), follow the programmer-interpreter framework and design trainable modules to learn different reasoning skills. However, NMNs only have limited reasoning abilities, and lack numerical reasoning capability. We upgrade NMNs by: (a) bridging the gap between its interpreter and the complex questions; (b) introducing addition and subtraction modules that perform numerical reasoning over numbers. On a subset of DROP, experimental results show that our proposed methods enhance NMNs’ numerical reasoning skills by 17.7% improvement of F1 score and significantly outperform previous state-of-the-art models. 2022.coling-1.129 @@ -1516,7 +1516,7 @@ KaixinMa FilipIlievski JonathanFrancis - EricNyberg + EricNyberg AlessandroOltramari 1534–1545 Procedural text understanding is a challenging language reasoning task that requires models to track entity states across the development of a narrative. We identify three core aspects required for modeling this task, namely the local and global view of the inputs, as well as the global view of outputs. Prior methods have considered a subset of these aspects, which leads to either low precision or low recall. In this paper, we propose a new model Coalescing Global and Local Information (CGLI), which builds entity- and timestep-aware input representations (local input) considering the whole context (global input), and we jointly model the entity states with a structured prediction objective (global output). Thus, CGLI simultaneously optimizes for both precision and recall. Moreover, we extend CGLI with additional output layers and integrate it into a story reasoning framework. Extensive experiments on a popular procedural text understanding dataset show that our model achieves state-of-the-art results, while experiments on a story reasoning benchmark show the positive impact of our model on downstream reasoning. @@ -1542,7 +1542,7 @@ Case-Based Abductive Natural Language Inference MarcoValentino MokanaranganThayaparan - AndréFreitas + AndréFreitas 1556–1568 Most of the contemporary approaches for multi-hop Natural Language Inference (NLI) construct explanations considering each test case in isolation. However, this paradigm is known to suffer from semantic drift, a phenomenon that causes the construction of spurious explanations leading to wrong conclusions. In contrast, this paper proposes an abductive framework for multi-hop NLI exploring the retrieve-reuse-refine paradigm in Case-Based Reasoning (CBR). Specifically, we present Case-Based Abductive Natural Language Inference (CB-ANLI), a model that addresses unseen inference problems by analogical transfer of prior explanations from similar examples. We empirically evaluate the abductive framework on commonsense and scientific question answering tasks, demonstrating that CB-ANLI can be effectively integrated with sparse and dense pre-trained encoders to improve multi-hop inference, or adopted as an evidence retriever for Transformers. Moreover, an empirical analysis of semantic drift reveals that the CBR paradigm boosts the quality of the most challenging explanations, a feature that has a direct impact on robustness and accuracy in downstream inference tasks. 2022.coling-1.134 @@ -1605,7 +1605,7 @@ Conversational <fixed-case>QA</fixed-case> Dataset Generation with Answer Revision SeonjeongHwang - Gary GeunbaeLee + Gary GeunbaeLee 1636–1644 Conversational question-answer generation is a task that automatically generates a large-scale conversational question answering dataset based on input passages. In this paper, we introduce a novel framework that extracts question-worthy phrases from a passage and then generates corresponding questions considering previous conversations. In particular, our framework revises the extracted answers after generating questions so that answers exactly match paired questions. Experimental results show that our simple answer revision approach leads to significant improvement in the quality of synthetic data. Moreover, we prove that our framework can be effectively utilized for domain adaptation of conversational question answering. 2022.coling-1.140 @@ -1629,7 +1629,7 @@ ZhongyuWei ZhihaoFan QiZhang - XuanjingHuang + XuanjingHuang 1655–1665 Multi-hop reasoning requires aggregating multiple documents to answer a complex question. Existing methods usually decompose the multi-hop question into simpler single-hop questions to solve the problem for illustrating the explainable reasoning process. However, they ignore grounding on the supporting facts of each reasoning step, which tends to generate inaccurate decompositions. In this paper, we propose an interpretable stepwise reasoning framework to incorporate both single-hop supporting sentence identification and single-hop question generation at each intermediate step, and utilize the inference of the current hop for the next until reasoning out the final result. We employ a unified reader model for both intermediate hop reasoning and final hop inference and adopt joint optimization for more accurate and robust multi-hop reasoning. We conduct experiments on two benchmark datasets HotpotQA and 2WikiMultiHopQA. The results show that our method can effectively boost performance and also yields a better interpretable reasoning process without decomposition supervision. 2022.coling-1.142 @@ -1708,7 +1708,7 @@ Unsupervised Question Answering via Answer Diversifying YuxiangNie - HeyanHuang + HeyanHuang ZewenChi Xian-LingMao 1732–1742 @@ -1764,7 +1764,7 @@ YonghuaZhu YangChen QianqianQi - MichaelWitbrock + MichaelWitbrock PatriciaRiddle 1791–1800 Multi-hop question answering (QA) requires reasoning over multiple documents to answer a complex question and provide interpretable supporting evidence. However, providing supporting evidence is not enough to demonstrate that a model has performed the desired reasoning to reach the correct answer. Most existing multi-hop QA methods fail to answer a large fraction of sub-questions, even if their parent questions are answered correctly. In this paper, we propose the Prompt-based Conservation Learning (PCL) framework for multi-hop QA, which acquires new knowledge from multi-hop QA tasks while conserving old knowledge learned on single-hop QA tasks, mitigating forgetting. Specifically, we first train a model on existing single-hop QA tasks, and then freeze this model and expand it by allocating additional sub-networks for the multi-hop QA task. Moreover, to condition pre-trained language models to stimulate the kind of reasoning required for specific multi-hop questions, we learn soft prompts for the novel sub-networks to perform type-specific reasoning. Experimental results on the HotpotQA benchmark show that PCL is competitive for multi-hop QA and retains good performance on the corresponding single-hop sub-questions, demonstrating the efficacy of PCL in mitigating knowledge loss by forgetting. @@ -1912,7 +1912,7 @@ A Hybrid Model of Classification and Generation for Spatial Relation Extraction FengWang PeifengLi - QiaomingZhu + QiaomingZhu 1915–1924 Extracting spatial relations from texts is a fundamental task for natural language understanding and previous studies only regard it as a classification task, ignoring those spatial relations with null roles due to their poor information. To address the above issue, we first view spatial relation extraction as a generation task and propose a novel hybrid model HMCGR for this task. HMCGR contains a generation and a classification model, while the former can generate those null-role relations and the latter can extract those non-null-role relations to complement each other. Moreover, a reflexivity evaluation mechanism is applied to further improve the accuracy based on the reflexivity principle of spatial relation. Experimental results on SpaceEval show that HMCGR outperforms the SOTA baselines significantly. 2022.coling-1.166 @@ -1965,7 +1965,7 @@ ShengqiongWu BoboLi LiangZhao - DonghongJi + DonghongJi 1953–1964 Event extraction (EE) is an essential task of information extraction, which aims to extract structured event information from unstructured text. Most prior work focuses on extracting flat events while neglecting overlapped or nested ones. A few models for overlapped and nested EE includes several successive stages to extract event triggers and arguments,which suffer from error propagation. Therefore, we design a simple yet effective tagging scheme and model to formulate EE as word-word relation recognition, called OneEE. The relations between trigger or argument words are simultaneously recognized in one stage with parallel grid tagging, thus yielding a very fast event extraction speed. The model is equipped with an adaptive event fusion module to generate event-aware representations and a distance-aware predictor to integrate relative distance information for word-word relation recognition, which are empirically demonstrated to be effective mechanisms. Experiments on 3 overlapped and nested EE benchmarks, namely FewFC, Genia11, and Genia13, show that OneEE achieves the state-of-the-art (SOTA) results. Moreover, the inference speed of OneEE is faster than those of baselines in the same condition, and can be further substantially improved since it supports parallel inference. 2022.coling-1.170 @@ -1999,7 +1999,7 @@ QinChen QiZhang LiangHe - XuanjingHuang + XuanjingHuang 1990–2000 Event argument extraction (EAE) aims to extract arguments with given roles from texts, which have been widely studied in natural language processing. Most previous works have achieved good performance in specific EAE datasets with dedicated neural architectures. Whereas, these architectures are usually difficult to adapt to new datasets/scenarios with various annotation schemas or formats. Furthermore, they rely on large-scale labeled data for training, which is unavailable due to the high labelling cost in most cases. In this paper, we propose a multi-format transfer learning model with variational information bottleneck, which makes use of the information especially the common knowledge in existing datasets for EAE in new datasets. Specifically, we introduce a shared-specific prompt framework to learn both format-shared and format-specific knowledge from datasets with different formats. In order to further absorb the common knowledge for EAE and eliminate the irrelevant noise, we integrate variational information bottleneck into our architecture to refine the shared representation. We conduct extensive experiments on three benchmark datasets, and obtain new state-of-the-art performance on EAE. 2022.coling-1.173 @@ -2038,7 +2038,7 @@ ZhizhengWang YuanyuanSun HongfeiLin - JianWang + JianWang 2024–2033 Chinese Named Entity Recognition (NER) has continued to attract research attention. However, most existing studies only explore the internal features of the Chinese language but neglect other lingual modal features. Actually, as another modal knowledge of the Chinese language, English contains rich prompts about entities that can potentially be applied to improve the performance of Chinese NER. Therefore, in this study, we explore the bilingual enhancement for Chinese NER and propose a unified bilingual interaction module called the Adapted Cross-Transformers with Global Sparse Attention (ACT-S) to capture the interaction of bilingual information. We utilize a model built upon several different ACT-Ss to integrate the rich English information into the Chinese representation. Moreover, our model can learn the interaction of information between bilinguals (inter-features) and the dependency information within Chinese (intra-features). Compared with existing Chinese NER methods, our proposed model can better handle entities with complex structures. The English text that enhances the model is automatically generated by machine translation, avoiding high labour costs. Experimental results on four well-known benchmark datasets demonstrate the effectiveness and robustness of our proposed model. 2022.coling-1.176 @@ -2047,7 +2047,7 @@ Read Extensively, Focus Smartly: A Cross-document Semantic Enhancement Method for Visual Documents <fixed-case>NER</fixed-case> JunZhao - XinZhao + XinZhao WenYuZhan TaoGui QiZhang @@ -2088,7 +2088,7 @@ XiaolongJin SaipingGuan JiafengGuo - XueqiCheng + XueqiCheng 2065–2074 Due to the lack of labeled data in many realistic scenarios, a number of few-shot learning methods for text classification have been proposed, among which the meta learning based ones have recently attracted much attention. Such methods usually consist of a learner as the classifier and a meta learner for specializing the learner to different tasks. For the learner, learning rate is crucial to its performance. However, existing methods treat it as a hyper parameter and adjust it manually, which is time-consuming and laborious. Intuitively, for different tasks and neural network layers, the learning rates should be different and self-adaptive. For the meta learner, it requires a good generalization ability so as to quickly adapt to new tasks. Motivated by these issues, we propose a novel meta learning framework, called MetaSLRCL, for few-shot text classification. Specifically, we present a novel meta learning mechanism to obtain different learning rates for different tasks and neural network layers so as to enable the learner to quickly adapt to new training data. Moreover, we propose a task-oriented curriculum learning mechanism to help the meta learner achieve a better generalization ability by learning from different tasks with increasing difficulties. Extensive experiments on three benchmark datasets demonstrate the effectiveness of MetaSLRCL. 2022.coling-1.180 @@ -2097,11 +2097,11 @@ A Simple Temporal Information Matching Mechanism for Entity Alignment between Temporal Knowledge Graphs LiCai - XinMao + XinMao MeirongMa HaoYuan JianchaoZhu - ManLan + ManLan 2075–2086 Entity alignment (EA) aims to find entities in different knowledge graphs (KGs) that refer to the same object in the real world. Recent studies incorporate temporal information to augment the representations of KGs. The existing methods for EA between temporal KGs (TKGs) utilize a time-aware attention mechanisms to incorporate relational and temporal information into entity embeddings. The approaches outperform the previous methods by using temporal information. However, we believe that it is not necessary to learn the embeddings of temporal information in KGs since most TKGs have uniform temporal representations. Therefore, we propose a simple GNN model combined with a temporal information matching mechanism, which achieves better performance with less time and fewer parameters. Furthermore, since alignment seeds are difficult to label in real-world applications, we also propose a method to generate unsupervised alignment seeds via the temporal information of TKG. Extensive experiments on public datasets indicate that our supervised method significantly outperforms the previous methods and the unsupervised one has competitive performance. 2022.coling-1.181 @@ -2254,7 +2254,7 @@ Diverse Multi-Answer Retrieval with Determinantal Point Processes PoojithaNandigam NikhilRayaprolu - ManishShrivastava + ManishShrivastava 2220–2225 Often questions provided to open-domain question answering systems are ambiguous. Traditional QA systems that provide a single answer are incapable of answering ambiguous questions since the question may be interpreted in several ways and may have multiple distinct answers. In this paper, we address multi-answer retrieval which entails retrieving passages that can capture majority of the diverse answers to the question. We propose a re-ranking based approach using Determinantal point processes utilizing BERT as kernels. Our method jointly considers query-passage relevance and passage-passage correlation to retrieve passages that are both query-relevant and diverse. Results demonstrate that our re-ranking technique outperforms state-of-the-art method on the AmbigQA dataset. 2022.coling-1.194 @@ -2306,7 +2306,7 @@ PasqualeMinervini DavidChang PontusStenetorp - GuenterNeumann + GuenterNeumann 2259–2277 Relation extraction in the biomedical domain is challenging due to the lack of labeled data and high annotation costs, needing domain experts. Distant supervision is commonly used to tackle the scarcity of annotated data by automatically pairing knowledge graph relationships with raw texts. Such a pipeline is prone to noise and has added challenges to scale for covering a large number of biomedical concepts. We investigated existing broad-coverage distantly supervised biomedical relation extraction benchmarks and found a significant overlap between training and test relationships ranging from 26% to 86%. Furthermore, we noticed several inconsistencies in the data construction process of these benchmarks, and where there is no train-test leakage, the focus is on interactions between narrower entity types. This work presents a more accurate benchmark MedDistant19 for broad-coverage distantly supervised biomedical relation extraction that addresses these shortcomings and is obtained by aligning the MEDLINE abstracts with the widely used SNOMED Clinical Terms knowledge base. Lacking thorough evaluation with domain-specific language models, we also conduct experiments validating general domain relation extraction findings to biomedical relation extraction. 2022.coling-1.198 @@ -2321,7 +2321,7 @@ JunjieShan QiZhang YuemingWu - XuanjingHuang + XuanjingHuang 2278–2287 Natural language understanding (NLU) models tend to rely on spurious correlations (i.e., dataset bias) to achieve high performance on in-distribution datasets but poor performance on out-of-distribution ones. Most of the existing debiasing methods often identify and weaken these samples with biased features (i.e., superficial surface features that cause such spurious correlations). However, down-weighting these samples obstructs the model in learning from the non-biased parts of these samples. To tackle this challenge, in this paper, we propose to eliminate spurious correlations in a fine-grained manner from a feature space perspective. Specifically, we introduce Random Fourier Features and weighted re-sampling to decorrelate the dependencies between features to mitigate spurious correlations. After obtaining decorrelated features, we further design a mutual-information-based method to purify them, which forces the model to learn features that are more relevant to tasks. Extensive experiments on two well-studied NLU tasks demonstrate that our method is superior to other comparative approaches. 2022.coling-1.199 @@ -2356,7 +2356,7 @@ ShuzhengSi ShuangZeng JiaxingLin - BaobaoChang + BaobaoChang 2313–2318 Unlabeled Entity Problem (UEP) in Named Entity Recognition (NER) datasets seriously hinders the improvement of NER performance. This paper proposes SCL-RAI to cope with this problem. Firstly, we decrease the distance of span representations with the same label while increasing it for different ones via span-based contrastive learning, which relieves the ambiguity among entities and improves the robustness of the model over unlabeled entities. Then we propose retrieval augmented inference to mitigate the decision boundary shifting problem. Our method significantly outperforms the previous SOTA method by 4.21% and 8.64% F1-score on two real-world datasets. 2022.coling-1.202 @@ -2366,7 +2366,7 @@ A Relation Extraction Dataset for Knowledge Extraction from Web Tables SiffiSingh Alham FikriAji - GauravSingh + GauravSingh ChristosChristodoulopoulos 2319–2327 Relational web-tables are significant sources of structural information that are widely used for relation extraction and population of facts into knowledge graphs. To transform the web-table data into knowledge, we need to identify the relations that exist between column pairs. Currently, there are only a handful of publicly available datasets with relations annotated against natural web-tables. Most datasets are constructed using synthetic tables that lack valuable metadata information, or are limited in size to be considered as a challenging evaluation set. In this paper, we present REDTab, the largest natural-table relation extraction dataset. We have annotated ~9K tables and ~22K column pairs using crowd sourced annotators from MTurk, which has 50x larger number of column pairs than the existing human-annotated benchmark. Our test set is specially designed to be challenging as observed in our experiment results using TaBERT. We publicly release REDTab as a benchmark for the evaluation process in relation extraction. @@ -2409,7 +2409,7 @@ Method Entity Extraction from Biomedical Texts Waqar BinKalim - Robert E.Mercer + Robert E.Mercer 2357–2362 In the field of Natural Language Processing (NLP), extracting method entities from biomedical text has been a challenging task. Scientific research papers commonly consist of complex keywords and domain-specific terminologies, and new terminologies are continuously appearing. In this research, we find method terminologies in biomedical text using both rule-based and machine learning techniques. We first use linguistic features to extract method sentence candidates from a large corpus of biomedical text. Then, we construct a silver standard biomedical corpus composed of these sentences. With a rule-based method that makes use of the Stanza dependency parsing module, we label the method entities in these sentences. Using this silver standard corpus we train two machine learning algorithms to automatically extract method entities from biomedical text. Our results show that it is possible to develop machine learning models that can automatically extract method entities to a reasonable accuracy without the need for a gold standard dataset. 2022.coling-1.207 @@ -2500,7 +2500,7 @@ Improving Zero-Shot Entity Linking Candidate Generation with Ultra-Fine Entity Type Information XuhuiSui - YingZhang + YingZhang KehuiSong BaohangZhou GuoqingZhao @@ -2616,7 +2616,7 @@ ZiyinHuang YijunWang ChangzhiSun - ManLan + ManLan YuanbinWu XiaofengMou DingWang @@ -2711,8 +2711,8 @@ ZhongQian HengZhang PeifengLi - QiaomingZhu - GuodongZhou + QiaomingZhu + GuodongZhou 2622–2632 Document-level Event Factuality Identification (DEFI) predicts the factuality of a specific event based on a document from which the event can be derived, which is a fundamental and crucial task in Natural Language Processing (NLP). However, most previous studies only considered sentence-level task and did not adopt document-level knowledge. Moreover, they modelled DEFI as a typical text classification task depending on annotated information heavily, and limited to the task-specific corpus only, which resulted in data scarcity. To tackle these issues, we propose a new framework formulating DEFI as Machine Reading Comprehension (MRC) tasks considering both Span-Extraction (Ext) and Multiple-Choice (Mch). Our model does not employ any other explicit annotated information, and utilizes Transfer Learning (TL) to extract knowledge from universal large-scale MRC corpora for cross-domain data augmentation. The empirical results on DLEFM corpus demonstrate that the proposed model outperforms several state-of-the-arts. 2022.coling-1.231 @@ -2724,8 +2724,8 @@ YuHong JieWang ShimingHe - JianminYao - GuodongZhou + JianminYao + GuodongZhou 2633–2638 We leverage cross-language data expansion and retraining to enhance neural Event Detection (abbr., ED) on English ACE corpus. Machine translation is utilized for expanding English training set of ED from that of Chinese. However, experimental results illustrate that such strategy actually results in performance degradation. The survey of translations suggests that the mistakenly-aligned triggers in the expanded data negatively influences the retraining process. We refer this phenomenon to “trigger falsification”. To overcome the issue, we apply heuristic rules for regulating the expanded data, fixing the distracting samples that contain the falsified triggers. The supplementary experiments show that the rule-based regulation is beneficial, yielding the improvement of about 1.6% F1-score for ED. We additionally prove that, instead of transfer learning from the translated ED data, the straight data combination by random pouring surprisingly performs better. 2022.coling-1.232 @@ -2778,8 +2778,8 @@ WeijiaXu MarineCarpuat KennethHeafield - DouglasOard - KathleenMcKeown + DouglasOard + KathleenMcKeown 2668–2680 Query-focused summaries of foreign-language, retrieved documents can help a user understand whether a document is actually relevant to the query term. A standard approach to this problem is to first translate the source documents and then perform extractive summarization to find relevant snippets. However, in a cross-lingual setting, the query term does not necessarily appear in the translations of relevant documents. In this work, we show that constrained machine translation and constrained post-editing can improve human relevance judgments by including a query term in a summary when its translation appears in the source document. We also present several strategies for selecting only certain documents for regeneration which yield further improvements 2022.coling-1.236 @@ -2835,10 +2835,10 @@ HarpreetSingh FranckDernoncourt SeunghyunYoon - TrungBui + TrungBui Walter W.Chang EmiliaFarcas - NdapaNakashole + NdapaNakashole 2734–2747 Current medical question answering systems have difficulty processing long, detailed and informally worded questions submitted by patients, called Consumer Health Questions (CHQs). To address this issue, we introduce a medical question understanding and answering system with knowledge grounding and semantic self-supervision. Our system is a pipeline that first summarizes a long, medical, user-written question, using a supervised summarization loss. Then, our system performs a two-step retrieval to return answers. The system first matches the summarized user question with an FAQ from a trusted medical knowledge base, and then retrieves a fixed number of relevant sentences from the corresponding answer document. In the absence of labels for question matching or answer relevance, we design 3 novel, self-supervised and semantically-guided losses. We evaluate our model against two strong retrieval-based question answering baselines. Evaluators ask their own questions and rate the answers retrieved by our baselines and own system according to their relevance. They find that our system retrieves more relevant answers, while achieving speeds 20 times faster. Our self-supervised losses also help the summarizer achieve higher scores in ROUGE, as well as in human evaluation metrics. 2022.coling-1.241 @@ -2852,7 +2852,7 @@ YangYang BaohuaZhou QiZhang - XuanjingHuang + XuanjingHuang 2748–2758 Existing works on rumor resolution have shown great potential in recognizing word appearance and user participation. However, they ignore the intrinsic propagation mechanisms of rumors and present poor adaptive ability when unprecedented news emerges. To exploit the fine-grained rumor diffusion patterns and generalize rumor resolution methods, we formulate a predecessor task to identify triggering posts, and then exploit their characteristics to facilitate rumor verification. We design a tree-structured annotation interface and extend PHEME dataset with labels on the message level. Data analysis shows that triggers play a critical role in verifying rumors and present similar lingual patterns across irrelevant events. We propose a graph-based model considering the direction and interaction of information flow to implement role-aware rumor resolution. Experimental results demonstrate the effectiveness of our proposed model and progressive scheme. 2022.coling-1.242 @@ -2884,7 +2884,7 @@ <fixed-case>CL</fixed-case>o<fixed-case>SE</fixed-case>: Contrastive Learning of Subframe Embeddings for Political Bias Classification of News Media Michelle YoungJinKim - Kristen MarieJohnson + Kristen MarieJohnson 2780–2793 Framing is a political strategy in which journalists and politicians emphasize certain aspects of a societal issue in order to influence and sway public opinion. Frameworks for detecting framing in news articles or social media posts are critical in understanding the spread of biased information in our society. In this paper, we propose CLoSE, a multi-task BERT-based model which uses contrastive learning to embed indicators of frames from news articles in order to predict political bias. We evaluate the performance of our proposed model on subframes and political bias classification tasks. We also demonstrate the model’s classification accuracy on zero-shot and few-shot learning tasks, providing a promising avenue for framing detection in unlabeled data. 2022.coling-1.245 @@ -2902,7 +2902,7 @@ <fixed-case>CXR</fixed-case> Data Annotation and Classification with Pre-trained Language Models NinaZhou - Ai TiAw + Ai TiAw Zhuo HanLiu Cher hengTan YonghanTing @@ -2952,7 +2952,7 @@ XuanliHe LingjuanLyu LizhenQu - GholamrezaHaffari + GholamrezaHaffari 2849–2860 Machine-learning-as-a-service (MLaaS) has attracted millions of users to their splendid large-scale models. Although published as black-box APIs, the valuable models behind these services are still vulnerable to imitation attacks. Recently, a series of works have demonstrated that attackers manage to steal or extract the victim models. Nonetheless, none of the previous stolen models can outperform the original black-box APIs. In this work, we conduct unsupervised domain adaptation and multi-victim ensemble to showing that attackers could potentially surpass victims, which is beyond previous understanding of model extraction. Extensive experiments on both benchmark datasets and real-world APIs validate that the imitators can succeed in outperforming the original black-box models on transferred domains. We consider our work as a milestone in the research of imitation attack, especially on NLP APIs, as the superior performance could influence the defense or even publishing strategy of API providers. 2022.coling-1.251 @@ -2977,7 +2977,7 @@ QinLiu TaoGui QiZhang - XuanjingHuang + XuanjingHuang RuiXie WeiWu 2873–2882 @@ -3106,7 +3106,7 @@ Summarizing Patients’ Problems from Hospital Progress Notes Using Pre-trained Sequence-to-Sequence Models YanjunGao DmitriyDligach - TimothyMiller + TimothyMiller DongfangXu Matthew M. M.Churpek MajidAfshar @@ -3348,7 +3348,7 @@ Can Transformers Process Recursive Nested Constructions, Like Humans? YairLakretz - ThéoDesbordes + ThéoDesbordes DieuwkeHupkes StanislasDehaene 3226–3232 @@ -3385,7 +3385,7 @@ Parameter-Efficient Mixture-of-Experts Architecture for Pre-trained Language Models Ze-FengGao PeiyuLiu - Wayne XinZhao + Wayne XinZhao Zhong-YiLu Ji-RongWen 3263–3273 @@ -3397,7 +3397,7 @@ Pre-trained Token-replaced Detection Model as Few-shot Learner ZichengLi ShoushanLi - GuodongZhou + GuodongZhou 3274–3284 Pre-trained masked language models have demonstrated remarkable ability as few-shot learners. In this paper, as an alternative, we propose a novel approach to few-shot learning with pre-trained token-replaced detection models like ELECTRA. In this approach, we reformulate a classification or a regression task as a token-replaced detection problem. Specifically, we first define a template and label description words for each task and put them into the input to form a natural language prompt. Then, we employ the pre-trained token-replaced detection model to predict which label description word is the most original (i.e., least replaced) among all label description words in the prompt. A systematic evaluation on 16 datasets demonstrates that our approach outperforms few-shot learners with pre-trained masked language models in both one-sentence and two-sentence learning tasks. 2022.coling-1.289 @@ -3469,7 +3469,7 @@ <fixed-case>R</fixed-case>eal<fixed-case>M</fixed-case>ed<fixed-case>D</fixed-case>ial: A Real Telemedical Dialogue Dataset Collected from Online <fixed-case>C</fixed-case>hinese Short-Video Clips BoXu HongtongZhang - JianWang + JianWang XiaokunZhang DezhiHao LinlinZong @@ -3487,10 +3487,10 @@ Areej NasserMuhajab Isabella A.White GabrielWong - LuisEspinosa-Anke + LuisEspinosa-Anke LeonardoNeves FrancescoBarbieri - JoseCamacho-Collados + JoseCamacho-Collados 3353–3359 Language evolves over time, and word meaning changes accordingly. This is especially true in social media, since its dynamic nature leads to faster semantic shifts, making it challenging for NLP models to deal with new content and trends. However, the number of datasets and models that specifically address the dynamic nature of these social platforms is scarce. To bridge this gap, we present TempoWiC, a new benchmark especially aimed at accelerating research in social media-based meaning shift. Our results show that TempoWiC is a challenging benchmark, even for recently-released language models specialized in social media. 2022.coling-1.296 @@ -3500,7 +3500,7 @@ Automatic Generation of Large-scale Multi-turn Dialogues from <fixed-case>R</fixed-case>eddit DaniilHuryn William M.Hutsell - Jinho D.Choi + Jinho D.Choi 3360–3373 This paper presents novel methods to automatically convert posts and their comments from discussion forums such as Reddit into multi-turn dialogues. Our methods are generalizable to any forums; thus, they allow us to generate a massive amount of dialogues for diverse topics that can be used to pretrain language models. Four methods are introduced, Greedy_Baseline, Greedy_Advanced, Beam Search and Threading, which are applied to posts from 10 subreddits and assessed. Each method makes a noticeable improvement over its predecessor such that the best method shows an improvement of 36.3% over the baseline for appropriateness. Our best method is applied to posts from those 10 subreddits for the creation of a corpus comprising 10,098 dialogues (3.3M tokens), 570 of which are compared against dialogues in three other datasets, Blended Skill Talk, Daily Dialogue, and Topical Chat. Our dialogues are found to be more engaging but slightly less natural than the ones in the other datasets, while it costs a fraction of human labor and money to generate our corpus compared to the others. To the best of our knowledge, it is the first work to create a large multi-turn dialogue corpus from Reddit that can advance neural dialogue systems. 2022.coling-1.297 @@ -3523,7 +3523,7 @@ <fixed-case>T</fixed-case>witter Topic Classification DimosthenisAntypas AsahiUshio - JoseCamacho-Collados + JoseCamacho-Collados VitorSilva LeonardoNeves FrancescoBarbieri @@ -3535,7 +3535,7 @@ Layer or Representation Space: What Makes <fixed-case>BERT</fixed-case>-based Evaluation Metrics Robust? Doan Nam LongVu - Nafise SadatMoosavi + Nafise SadatMoosavi SteffenEger 3401–3411 The evaluation of recent embedding-based evaluation metrics for text generation is primarily based on measuring their correlation with human evaluations on standard benchmarks. However, these benchmarks are mostly from similar domains to those used for pretraining word embeddings. This raises concerns about the (lack of) generalization of embedding-based metrics to new and noisy domains that contain a different vocabulary than the pretraining data. In this paper, we examine the robustness of BERTScore, one of the most popular embedding-based metrics for text generation. We show that (a) an embedding-based metric that has the highest correlation with human evaluations on a standard benchmark can have the lowest correlation if the amount of input noise or unknown tokens increases, (b) taking embeddings from the first layer of pretrained models improves the robustness of all metrics, and (c) the highest robustness is achieved when using character-level embeddings, instead of token-based embeddings, from the first layer of the pretrained model. @@ -3548,7 +3548,7 @@ ZoeyLiu QingyunYang YujingHuang - EmilyPrud’hommeaux + EmilyPrud’hommeaux 3412–3419 Difficulties with social aspects of language are among the hallmarks of autism spectrum disorder (ASD). These communication differences are thought to contribute to the challenges that adults with ASD experience when seeking employment, underscoring the need for interventions that focus on improving areas of weakness in pragmatic and social language. In this paper, we describe a transformer-based framework for identifying linguistic features associated with social aspects of communication using a corpus of conversations between adults with and without ASD and neurotypical conversational partners produced while engaging in collaborative tasks. While our framework yields strong accuracy overall, performance is significantly worse for the language of participants with ASD, suggesting that they use a more diverse set of strategies for some social linguistic functions. These results, while showing promise for the development of automated language analysis tools to support targeted language interventions for ASD, also reveal weaknesses in the ability of large contextualized language models to model neuroatypical language. 2022.coling-1.301 @@ -3568,7 +3568,7 @@ <fixed-case>L</fixed-case>ip<fixed-case>K</fixed-case>ey: A Large-Scale News Dataset for Absent Keyphrases Generation and Abstractive Summarization FajriKoto - TimothyBaldwin + TimothyBaldwin Jey HanLau 3427–3437 Summaries, keyphrases, and titles are different ways of concisely capturing the content of a document. While most previous work has released the datasets of keyphrases and summarization separately, in this work, we introduce LipKey, the largest news corpus with human-written abstractive summaries, absent keyphrases, and titles. We jointly use the three elements via multi-task training and training as joint structured inputs, in the context of document summarization. We find that including absent keyphrases and titles as additional context to the source document improves transformer-based summarization models. @@ -3590,7 +3590,7 @@ Effective Data Augmentation for Sentence Classification Using One <fixed-case>VAE</fixed-case> per Class FrédéricPiedboeuf - PhilippeLanglais + PhilippeLanglais 3454–3464 In recent years, data augmentation has become an important field of machine learning. While images can use simple techniques such as cropping or rotating, textual data augmentation needs more complex manipulations to ensure that the generated examples are useful. Variational auto-encoders (VAE) and its conditional variant the Conditional-VAE (CVAE) are often used to generate new textual data, both relying on a good enough training of the generator so that it doesn’t create examples of the wrong class. In this paper, we explore a simpler way to use VAE for data augmentation: the training of one VAE per class. We show on several dataset sizes, as well as on four different binary classification tasks, that it systematically outperforms other generative data augmentation techniques. 2022.coling-1.305 @@ -3628,7 +3628,7 @@ GyeongminKim JinsungKim JunyoungSon - HeuiseokLim + HeuiseokLim 3496–3505 As digitized traditional cultural heritage documents have rapidly increased, resulting in an increased need for preservation and management, practical recognition of entities and typification of their classes has become essential. To achieve this, we propose KoCHET - a Korean cultural heritage corpus for the typical entity-related tasks, i.e., named entity recognition (NER), relation extraction (RE), and entity typing (ET). Advised by cultural heritage experts based on the data construction guidelines of government-affiliated organizations, KoCHET consists of respectively 112,362, 38,765, 113,198 examples for NER, RE, and ET tasks, covering all entity types related to Korean cultural heritage. Moreover, unlike the existing public corpora, modified redistribution can be allowed both domestic and foreign researchers. Our experimental results make the practical usability of KoCHET more valuable in terms of cultural heritage. We also provide practical insights of KoCHET in terms of statistical and linguistic analysis. Our corpus is freely available at https://github.com/Gyeongmin47/KoCHET. 2022.coling-1.308 @@ -3653,7 +3653,7 @@ JieZhao GiuseppeCastellucci MarcusCollins - ShervinMalmasi + ShervinMalmasi OlegRokhlenko EugeneAgichtein 3514–3529 @@ -3680,7 +3680,7 @@ JeskaBuhmann MaximeDe Bruyn EhsanLotfi - WalterDaelemans + WalterDaelemans 3539–3549 FAQs are important resources to find information. However, especially if a FAQ concerns many question-answer pairs, it can be a difficult and time-consuming job to find the answer you are looking for. A FAQ chatbot can ease this process by automatically retrieving the relevant answer to a user’s question. We present VaccinChatNL, a Dutch FAQ corpus on the topic of COVID-19 vaccination. Starting with 50 question-answer pairs we built VaccinChat, a FAQ chatbot, which we used to gather more user questions that were also annotated with the appropriate or new answer classes. This iterative process of gathering user questions, annotating them, and retraining the model with the increased data set led to a corpus that now contains 12,883 user questions divided over 181 answers. We provide the first publicly available Dutch FAQ answering data set of this size with large groups of semantically equivalent human-paraphrased questions. Furthermore, our study shows that before fine-tuning a classifier, continued pre-training of Dutch language models with task- and/or domain-specific data improves classification results. In addition, we show that large groups of semantically similar questions are important for obtaining well-performing intent classification models. 2022.coling-1.312 @@ -3688,7 +3688,7 @@ Benchmarking Automated Clinical Language Simplification: Dataset, Algorithm, and Evaluation - JunyuLuo + JunyuLuo JunxianLin ChiLin CaoXiao @@ -3704,7 +3704,7 @@ KalvinChang ChenxuanCui YoungminKim - David R.Mortensen + David R.Mortensen 3563–3569 Most comparative datasets of Chinese varieties are not digital; however, Wiktionary includes a wealth of transcriptions of words from these varieties. The usefulness of these data is limited by the fact that they use a wide range of variety-specific romanizations, making data difficult to compare. The current work collects this data into a single constituent (IPA, or International Phonetic Alphabet) and structured form (TSV) for use in comparative linguistics and Chinese NLP. At the time of writing, the dataset contains 67,943 entries across 8 varieties and Middle Chinese. The dataset is validated on a protoform reconstruction task using an encoder-decoder cross-attention architecture (Meloni et al 2021), achieving an accuracy of 54.11%, a PER (phoneme error rate) of 17.69%, and a FER (feature error rate) of 6.60%. 2022.coling-1.314 @@ -3728,7 +3728,7 @@ <fixed-case>IMPARA</fixed-case>: Impact-Based Metric for <fixed-case>GEC</fixed-case> Using Parallel Data KokiMaeda MasahiroKaneko - NaoakiOkazaki + NaoakiOkazaki 3578–3588 Automatic evaluation of grammatical error correction (GEC) is essential in developing useful GEC systems. Existing methods for automatic evaluation require multiple reference sentences or manual scores. However, such resources are expensive, thereby hindering automatic evaluation for various domains and correction styles. This paper proposes an Impact-based Metric for GEC using PARAllel data, IMPARA, which utilizes correction impacts computed by parallel data comprising pairs of grammatical/ungrammatical sentences. As parallel data is cheaper than manually assessing evaluation scores, IMPARA can reduce the cost of data creation for automatic evaluation. Correlations between IMPARA and human scores indicate that IMPARA is comparable or better than existing evaluation methods. Furthermore, we find that IMPARA can perform evaluations that fit different domains and correction styles trained on various parallel data. 2022.coling-1.316 @@ -3747,7 +3747,7 @@ Are Pretrained Multilingual Models Equally Fair across Languages? LauraCabello Piqueras - AndersSøgaard + AndersSøgaard 3597–3605 Pretrained multilingual language models can help bridge the digital language divide, enabling high-quality NLP models for lower-resourced languages. Studies of multilingual models have so far focused on performance, consistency, and cross-lingual generalisation. However, with their wide-spread application in the wild and downstream societal impact, it is important to put multilingual models under the same scrutiny as monolingual models. This work investigates the group fairness of multilingual models, asking whether these models are equally fair across languages. To this end, we create a new four-way multilingual dataset of parallel cloze test examples (MozArt), equipped with demographic information (balanced with regard to gender and native tongue) about the test participants. We evaluate three multilingual models on MozArt –mBERT, XLM-R, and mT5– and show that across the four target languages, the three models exhibit different levels of group disparity, e.g., exhibiting near-equal risk for Spanish, but high levels of disparity for German. 2022.coling-1.318 @@ -3847,7 +3847,7 @@ A Data-driven Approach to Named Entity Recognition for Early <fixed-case>M</fixed-case>odern <fixed-case>F</fixed-case>rench - PedroOrtiz Suarez + PedroOrtiz Suarez SimonGabay 3722–3730 Named entity recognition has become an increasingly useful tool for digital humanities research, specially when it comes to historical texts. However, historical texts pose a wide range of challenges to both named entity recognition and natural language processing in general that are still difficult to address even with modern neural methods. In this article we focus in named entity recognition for historical French, and in particular for Early Modern French (16th-18th c.), i.e. Ancien Régime French. However, instead of developing a specialised architecture to tackle the particularities of this state of language, we opt for a data-driven approach by developing a new corpus with fine-grained entity annotation, covering three centuries of literature corresponding to the early modern period; we try to annotate as much data as possible producing a corpus that is many times bigger than the most popular NER evaluation corpora for both Contemporary English and French. We then fine-tune existing state-of-the-art architectures for Early Modern and Contemporary French, obtaining results that are on par with those of the current state-of-the-art NER systems for Contemporary English. Both the corpus and the fine-tuned models are released. @@ -3928,7 +3928,7 @@ <fixed-case>M</fixed-case>ulti<fixed-case>C</fixed-case>o<fixed-case>NER</fixed-case>: A Large-scale Multilingual Dataset for Complex Named Entity Recognition - ShervinMalmasi + ShervinMalmasi AnjieFang BesnikFetahu SudiptaKar @@ -3960,8 +3960,8 @@ <fixed-case>QSTS</fixed-case>: A Question-Sensitive Text Similarity Measure for Question Generation - Sujatha DasGollapalli - See-KiongNg + Sujatha DasGollapalli + See-KiongNg 3835–3846 While question generation (QG) has received significant focus in conversation modeling and text generation research, the problems of comparing questions and evaluation of QG models have remained inadequately addressed. Indeed, QG models continue to be evaluated using traditional measures such as BLEU, METEOR, and ROUGE scores which were designed for other text generation problems. We propose QSTS, a novel Question-Sensitive Text Similarity measure for comparing two questions by characterizing their target intent based on question class, named-entity, and semantic similarity information from the two questions. We show that QSTS addresses several shortcomings of existing measures that depend on n-gram overlap scores and obtains superior results compared to traditional measures on publicly-available QG datasets. We also collect a novel dataset SimQG, for enabling question similarity research in QG contexts. SimQG contains questions generated by state-of-the-art QG models along with human judgements on their relevance with respect to the passage context they were generated for as well as when compared to the given reference question. Using SimQG, we showcase the key aspect of QSTS that differentiates it from all existing measures. QSTS is not only able to characterize similarity between two questions, but is also able to score questions with respect to passage contexts. Thus QSTS is, to our knowledge, the first metric that enables the measurement of QG performance in a reference-free manner. 2022.coling-1.337 @@ -3992,7 +3992,7 @@ <fixed-case>I</fixed-case>nfer<fixed-case>ES</fixed-case> : A Natural Language Inference Corpus for <fixed-case>S</fixed-case>panish Featuring Negation-Based Contrastive and Adversarial Examples VenelinKovatchev - MarionaTaulé + MarionaTaulé 3873–3884 In this paper we present InferES - an original corpus for Natural Language Inference (NLI) in European Spanish. We propose, implement, and analyze a variety of corpus-creating strategies utilizing expert linguists and crowd workers. The objectives behind InferES are to provide high-quality data, and at the same time to facilitate the systematic evaluation of automated systems. Specifically, we focus on measuring and improving the performance of machine learning systems on negation-based adversarial examples and their ability to generalize across out-of-distribution topics. We train two transformer models on InferES (8,055 gold examples) in a variety of scenarios. Our best model obtains 72.8% accuracy, leaving a lot of room for improvement. The “hypothesis-only” baseline performs only 2%-5% higher than majority, indicating much fewer annotation artifacts than prior work. We show that models trained on InferES generalize very well across topics (both in- and out-of-distribution) and perform moderately well on negation-based adversarial examples. 2022.coling-1.340 @@ -4002,8 +4002,8 @@ <fixed-case>P</fixed-case>ara<fixed-case>Z</fixed-case>h-22<fixed-case>M</fixed-case>: A Large-Scale <fixed-case>C</fixed-case>hinese Parabank via Machine Translation WenjieHao HongfeiXu - DeyiXiong - HongyingZan + DeyiXiong + HongyingZan LinglingMu 3885–3897 Paraphrasing, i.e., restating the same meaning in different ways, is an important data augmentation approach for natural language processing (NLP). Zhang et al. (2019b) propose to extract sentence-level paraphrases from multiple Chinese translations of the same source texts, and construct the PKU Paraphrase Bank of 0.5M sentence pairs. However, despite being the largest Chinese parabank to date, the size of PKU parabank is limited by the availability of one-to-many sentence translation data, and cannot well support the training of large Chinese paraphrasers. In this paper, we relieve the restriction with one-to-many sentence translation data, and construct ParaZh-22M, a larger Chinese parabank that is composed of 22M sentence pairs, based on one-to-one bilingual sentence translation data and machine translation (MT). In our data augmentation experiments, we show that paraphrasing based on ParaZh-22M can bring about consistent and significant improvements over several strong baselines on a wide range of Chinese NLP tasks, including a number of Chinese natural language understanding benchmarks (CLUE) and low-resource machine translation. @@ -4051,8 +4051,8 @@ <fixed-case>S</fixed-case>inglish Message Paraphrasing: A Joint Task of Creole Translation and Text Normalization ZhengyuanLiu ShikangNi - Ai TiAw - Nancy F.Chen + Ai TiAw + Nancy F.Chen 3924–3936 Within the natural language processing community, English is by far the most resource-rich language. There is emerging interest in conducting translation via computational approaches to conform its dialects or creole languages back to standard English. This computational approach paves the way to leverage generic English language backbones, which are beneficial for various downstream tasks. However, in practical online communication scenarios, the use of language varieties is often accompanied by noisy user-generated content, making this translation task more challenging. In this work, we introduce a joint paraphrasing task of creole translation and text normalization of Singlish messages, which can shed light on how to process other language varieties and dialects. We formulate the task in three different linguistic dimensions: lexical level normalization, syntactic level editing, and semantic level rewriting. We build an annotated dataset of Singlish-to-Standard English messages, and report performance on a perturbation-resilient sequence-to-sequence model. Experimental results show that the model produces reasonable generation results, and can improve the performance of downstream tasks like stance detection. 2022.coling-1.345 @@ -4074,7 +4074,7 @@ One Word, Two Sides: Traces of Stance in Contextualized Word Representations - AinaGarí Soler + AinaGarí Soler MatthieuLabeau ChloéClavel 3950–3959 @@ -4097,7 +4097,7 @@ Modelling Commonsense Properties Using Pre-Trained Bi-Encoders AmitGajbhiye - LuisEspinosa-Anke + LuisEspinosa-Anke StevenSchockaert 3971–3983 Grasping the commonsense properties of everyday concepts is an important prerequisite to language understanding. While contextualised language models are reportedly capable of predicting such commonsense properties with human-level accuracy, we argue that such results have been inflated because of the high similarity between training and test concepts. This means that models which capture concept similarity can perform well, even if they do not capture any knowledge of the commonsense properties themselves. In settings where there is no overlap between the properties that are considered during training and testing, we find that the empirical performance of standard language models drops dramatically. To address this, we study the possibility of fine-tuning language models to explicitly model concepts and their properties. In particular, we train separate concept and property encoders on two types of readily available data: extracted hyponym-hypernym pairs and generic sentences. Our experimental results show that the resulting encoders allow us to predict commonsense properties with much higher accuracy than is possible by directly fine-tuning language models. We also present experimental results for the related task of unsupervised hypernym discovery. @@ -4210,7 +4210,7 @@ Testing Large Language Models on Compositionality and Inference with Phrase-Level Adjective-Noun Entailment LorenzoBertolini JulieWeeds - DavidWeir + DavidWeir 4084–4100 Previous work has demonstrated that pre-trained large language models (LLM) acquire knowledge during pre-training which enables reasoning over relationships between words (e.g, hyponymy) and more complex inferences over larger units of meaning such as sentences. Here, we investigate whether lexical entailment (LE, i.e. hyponymy or the is a relation between words) can be generalised in a compositional manner. Accordingly, we introduce PLANE (Phrase-Level Adjective-Noun Entailment), a new benchmark to test models on fine-grained compositional entailment using adjective-noun phrases. Our experiments show that knowledge extracted via In–Context and transfer learning is not enough to solve PLANE. However, a LLM trained on PLANE can generalise well to out–of–distribution sets, since the required knowledge can be stored in the representations of subwords (SW) tokens. 2022.coling-1.359 @@ -4219,7 +4219,7 @@ Does <fixed-case>BERT</fixed-case> Recognize an Agent? Modeling <fixed-case>D</fixed-case>owty’s Proto-Roles with Contextual Embeddings MattiaProietti - GianlucaLebani + GianlucaLebani AlessandroLenci 4101–4112 Contextual embeddings build multidimensional representations of word tokens based on their context of occurrence. Such models have been shown to achieve a state-of-the-art performance on a wide variety of tasks. Yet, the community struggles in understanding what kind of semantic knowledge these representations encode. We report a series of experiments aimed at investigating to what extent one of such models, BERT, is able to infer the semantic relations that, according to Dowty’s Proto-Roles theory, a verbal argument receives by virtue of its role in the event described by the verb. This hypothesis were put to test by learning a linear mapping from the BERT’s verb embeddings to an interpretable space of semantic properties built from the linguistic dataset by White et al. (2016). In a first experiment we tested whether the semantic properties inferred from a typed version of the BERT embeddings would be more linguistically plausible than those produced by relying on static embeddings. We then move to evaluate the semantic properties inferred from the contextual embeddings both against those available in the original dataset, as well as by assessing their ability to model the semantic properties possessed by the agent of the verbs participating in the so-called causative alternation. @@ -4229,7 +4229,7 @@ Towards Structure-aware Paraphrase Identification with Phrase Alignment Using Sentence Encoders QiweiPeng - DavidWeir + DavidWeir JulieWeeds 4113–4123 Previous works have demonstrated the effectiveness of utilising pre-trained sentence encoders based on their sentence representations for meaning comparison tasks. Though such representations are shown to capture hidden syntax structures, the direct similarity comparison between them exhibits weak sensitivity to word order and structural differences in given sentences. A single similarity score further makes the comparison process hard to interpret. Therefore, we here propose to combine sentence encoders with an alignment component by representing each sentence as a list of predicate-argument spans (where their span representations are derived from sentence encoders), and decomposing the sentence-level meaning comparison into the alignment between their spans for paraphrase identification tasks. Empirical results show that the alignment component brings in both improved performance and interpretability for various sentence encoders. After closer investigation, the proposed approach indicates increased sensitivity to structural difference and enhanced ability to distinguish non-paraphrases with high lexical overlap. @@ -4251,7 +4251,7 @@ Emotion Enriched Retrofitted Word Embeddings SapanShah SreedharReddy - PushpakBhattacharyya + PushpakBhattacharyya 4136–4148 Word embeddings learned using the distributional hypothesis (e.g., GloVe, Word2vec) are good at encoding various lexical-semantic relations. However, they do not capture the emotion aspects of words. We present a novel retrofitting method for updating the vectors of emotion bearing words like fun, offence, angry, etc. The retrofitted embeddings achieve better inter-cluster and intra-cluster distance for words having the same emotions, e.g., the joy cluster containing words like fun, happiness, etc., and the anger cluster with words like offence, rage, etc., as evaluated through different cluster quality metrics. For the downstream tasks on sentiment analysis and sarcasm detection, simple classification models, such as SVM and Attention Net, learned using our retrofitted embeddings perform better than their pre-trained counterparts (about 1.5 % improvement in F1-score) as well as other benchmarks. Furthermore, the difference in performance is more pronounced in the limited data setting. 2022.coling-1.363 @@ -4282,8 +4282,8 @@ Unsupervised Lexical Substitution with Decontextualised Embeddings TakashiWada - TimothyBaldwin - YujiMatsumoto + TimothyBaldwin + YujiMatsumoto Jey HanLau 4172–4185 We propose a new unsupervised method for lexical substitution using pre-trained language models. Compared to previous approaches that use the generative capability of language models to predict substitutes, our method retrieves substitutes based on the similarity of contextualised and decontextualised word embeddings, i.e. the average contextual representation of a word in multiple contexts. We conduct experiments in English and Italian, and show that our method substantially outperforms strong baselines and establishes a new state-of-the-art without any explicit supervision or fine-tuning. We further show that our method performs particularly well at predicting low-frequency substitutes, and also generates a diverse list of substitute candidates, reducing morphophonetic or morphosyntactic biases induced by article-noun agreement. @@ -4338,8 +4338,8 @@ Noisy Label Regularisation for Textual Regression YuxiaWang - TimothyBaldwin - KarinVerspoor + TimothyBaldwin + KarinVerspoor 4228–4240 Training with noisy labelled data is known to be detrimental to model performance, especially for high-capacity neural network models in low-resource domains. Our experiments suggest that standard regularisation strategies, such as weight decay and dropout, are ineffective in the face of noisy labels. We propose a simple noisy label detection method that prevents error propagation from the input layer. The approach is based on the observation that the projection of noisy labels is learned through memorisation at advanced stages of learning, and that the Pearson correlation is sensitive to outliers. Extensive experiments over real-world human-disagreement annotations as well as randomly-corrupted and data-augmented labels, across various tasks and domains, demonstrate that our method is effective, regularising noisy labels and improving generalisation performance. 2022.coling-1.371 @@ -4374,7 +4374,7 @@ Revisiting Syllables in Language Modelling and Their Application on Low-Resource Machine Translation - ArturoOncevay + ArturoOncevay Kervy DanteRivas Rojas Liz KarenChavez Sanchez RobertoZariquiey @@ -4396,7 +4396,7 @@ Fashioning Local Designs from Generic Speech Technologies in an <fixed-case>A</fixed-case>ustralian Aboriginal Community ÉricLe Ferrand StevenBird - LaurentBesacier + LaurentBesacier 4274–4285 An increasing number of papers have been addressing issues related to low-resource languages and the transcription bottleneck paradigm. After several years spent in Northern Australia, where some of the strongest Aboriginal languages are spoken, we could observe a gap between the motivations depicted in research contributions in this space and the Northern Australian context. In this paper, we address this gap in research by exploring the potential of speech recognition in an Aboriginal community. We describe our work from training a spoken term detection system to its implementation in an activity with Aboriginal participants. We report here on one side how speech recognition technologies can find their place in an Aboriginal context and, on the other, methodological paths that allowed us to reach better comprehension and engagement from Aboriginal participants. 2022.coling-1.376 @@ -4405,8 +4405,8 @@ Few-Shot Pidgin Text Adaptation via Contrastive Fine-Tuning ErnieChang - Jesujoba O.Alabi - David IfeoluwaAdelani + Jesujoba O.Alabi + David IfeoluwaAdelani VeraDemberg 4286–4291 The surging demand for multilingual dialogue systems often requires a costly labeling process for each language addition. For low resource languages, human annotators are continuously tasked with the adaptation of resource-rich language utterances for each new domain. However, this prohibitive and impractical process can often be a bottleneck for low resource languages that are still without proper translation systems nor parallel corpus. In particular, it is difficult to obtain task-specific low resource language annotations for the English-derived creoles (e.g. Nigerian and Cameroonian Pidgin). To address this issue, we utilize the pretrained language models i.e. BART which has shown great potential in language generation/understanding – we propose to finetune the BART model to generate utterances in Pidgin by leveraging the proximity of the source and target languages, and utilizing positive and negative examples in constrastive training objectives. We collected and released the first parallel Pidgin-English conversation corpus in two dialogue domains and showed that this simple and effective technique is suffice to yield impressive results for English-to-Pidgin generation, which are two closely-related languages. @@ -4417,7 +4417,7 @@ Penalizing Divergence: Multi-Parallel Translation for Low-Resource Languages of <fixed-case>N</fixed-case>orth <fixed-case>A</fixed-case>merica GarrettNicolai ChangbingYang - MiikkaSilfverberg + MiikkaSilfverberg 4292–4298 This paper explores a special case in multilingual machine translation: so called multi-parallel translation, where the target data for all language pairs are identical. While multi-parallelism offers benefits which are not available in a standard translation setting, translation models can easily overfit when training data are limited. We introduce a regularizer, the divergence penalty, which penalizes the translation model when it represents source sentences with identical target translations in divergent ways. Experiments on very low-resourced Indigenous North American languages show that an initially deficient multilingual translator can improve by 4.9 BLEU through mBART pre-training, and 5.5 BLEU points with the strategic addition of monolingual data, and that a divergence penalty leads to further increases of 0.4 BLEU. Further experiments on Germanic languages demonstrate a improvement of 0.5 BLEU when applying the divergence penalty. An investigation of the neural encoder representations learned by our translation models shows that the divergence penalty encourages models to learn a unified neural interlingua. 2022.coling-1.378 @@ -4458,8 +4458,8 @@ Adapting Pre-trained Language Models to <fixed-case>A</fixed-case>frican Languages via Multilingual Adaptive Fine-Tuning - Jesujoba O.Alabi - David IfeoluwaAdelani + Jesujoba O.Alabi + David IfeoluwaAdelani MariusMosbach DietrichKlakow 4336–4349 @@ -4479,7 +4479,7 @@ Improving Low-resource <fixed-case>RRG</fixed-case> Parsing with Cross-lingual Self-training KilianEvang LauraKallmeyer - JakubWaszczuk + JakubWaszczuk Kiluvon Prince TatianaBladier SimonPetitjean @@ -4508,7 +4508,7 @@ Towards Multi-Sense Cross-Lingual Alignment of Contextual Embeddings LinlinLiu Thien HaiNguyen - ShafiqJoty + ShafiqJoty LidongBing LuoSi 4381–4396 @@ -4520,7 +4520,7 @@ How to Parse a Creole: When Martinican Creole Meets <fixed-case>F</fixed-case>rench LudovicMompelat DanielDakota - SandraKübler + SandraKübler 4397–4406 We investigate methods to develop a parser for Martinican Creole, a highly under-resourced language, using a French treebank. We compare transfer learning and multi-task learning models and examine different input features and strategies to handle the massive size imbalance between the treebanks. Surprisingly, we find that a simple concatenated (French + Martinican Creole) baseline yields optimal results even though it has access to only 80 Martinican Creole sentences. POS embeddings work better than lexical ones, but they suffer from negative transfer. 2022.coling-1.387 @@ -4540,7 +4540,7 @@ Nanda PutriRomadhona Sin-EnLu Bo-HanLu - Richard Tzong-HanTsai + Richard Tzong-HanTsai 4418–4428 Code-mixing refers to the mixed use of multiple languages. It is prevalent in multilingual societies and is also one of the most challenging natural language processing tasks. In this paper, we study Bahasa Rojak, a dialect popular in Malaysia that consists of English, Malay, and Chinese. Aiming to establish a model to deal with the code-mixing phenomena of Bahasa Rojak, we use data augmentation to automatically construct the first Bahasa Rojak corpus for pre-training language models, which we name the Bahasa Rojak Crawled Corpus (BRCC). We also develop a new pre-trained model called “Mixed XLM”. The model can tag the language of the input token automatically to process code-mixing input. Finally, to test the effectiveness of the Mixed XLM model pre-trained on BRCC for social media scenarios where code-mixing is found frequently, we compile a new Bahasa Rojak sentiment analysis dataset, SentiBahasaRojak, with a Kappa value of 0.77. 2022.coling-1.389 @@ -4570,7 +4570,7 @@ Zero-shot Disfluency Detection for <fixed-case>I</fixed-case>ndian Languages RohitKundu PreethiJyothi - PushpakBhattacharyya + PushpakBhattacharyya 4442–4454 Disfluencies that appear in the transcriptions from automatic speech recognition systems tend to impair the performance of downstream NLP tasks. Disfluency correction models can help alleviate this problem. However, the unavailability of labeled data in low-resource languages impairs progress. We propose using a pretrained multilingual model, finetuned only on English disfluencies, for zero-shot disfluency detection in Indian languages. We present a detailed pipeline to synthetically generate disfluent text and create evaluation datasets for four Indian languages: Bengali, Hindi, Malayalam, and Marathi. Even in the zero-shot setting, we obtain F1 scores of 75 and higher on five disfluency types across all four languages. We also show the utility of synthetically generated disfluencies by evaluating on real disfluent text in Bengali, Hindi, and Marathi. Finetuning the multilingual model on additional synthetic Hindi disfluent text nearly doubles the number of exact matches and yields a 20-point boost in F1 scores when evaluated on real Hindi disfluent text, compared to training with only English disfluent text. 2022.coling-1.392 @@ -4599,8 +4599,8 @@ XiaolinXing YuHong MinhanXu - JianminYao - GuodongZhou + JianminYao + GuodongZhou 4481–4491 Training Neural Machine Translation (NMT) models suffers from sparse parallel data, in the infrequent translation scenarios towards low-resource source languages. The existing solutions primarily concentrate on the utilization of Parent-Child (PC) transfer learning. It transfers well-trained NMT models on high-resource languages (namely Parent NMT) to low-resource languages, so as to produce Child NMT models by fine-tuning. It has been carefully demonstrated that a variety of PC variants yield significant improvements for low-resource NMT. In this paper, we intend to enhance PC-based NMT by a bidirectionally-adaptive learning strategy. Specifically, we divide inner constituents (6 transformers) of Parent encoder into two “teams”, i.e., T1 and T2. During representation learning, T1 learns to encode low-resource languages conditioned on bilingual shareable latent space. Generative adversarial network and masked language modeling are used for space-shareable encoding. On the other hand, T2 is straightforwardly transferred to low-resource languages, and fine-tuned together with T1 for low-resource translation. Briefly, T1 and T2 take actions separately for different goals. The former aims to adapt to characteristics of low-resource languages during encoding, while the latter adapts to translation experiences learned from high-resource languages. We experiment on benchmark corpora SETIMES, conducting low-resource NMT for Albanian (Sq), Macedonian (Mk), Croatian (Hr) and Romanian (Ro). Experimental results show that our method yields substantial improvements, which allows the NMT performance to reach BLEU4-scores of 62.24%, 56.93%, 50.53% and 54.65% for Sq, Mk, Hr and Ro, respectively. 2022.coling-1.395 @@ -4656,7 +4656,7 @@ ArpitaKundu SubhasishGhosh PratikSaini - TapasNayak + TapasNayak IndrajitBhattacharya 4537–4543 Predicting difficulty of questions is crucial for technical interviews. However, such questions are long-form and more open-ended than factoid and multiple choice questions explored so far for question difficulty prediction. Existing models also require large volumes of candidate response data for training. We study weak-supervision and use unsupervised algorithms for both question generation and difficulty prediction. We create a dataset of interview questions with difficulty scores for deep learning and use it to evaluate SOTA models for question difficulty prediction trained using weak supervision. Our analysis brings out the task’s difficulty as well as the promise of weak supervision for it. @@ -4715,7 +4715,7 @@ YejinKim HodongLee H. HowieHuang - HeuiseokLim + HeuiseokLim 4585–4592 Recent pre-trained language models (PLMs) achieved great success on many natural language processing tasks through learning linguistic features and contextualized sentence representation. Since attributes captured in stacked layers of PLMs are not clearly identified, straightforward approaches such as embedding the last layer are commonly preferred to derive sentence representations from PLMs. This paper introduces the attention-based pooling strategy, which enables the model to preserve layer-wise signals captured in each layer and learn digested linguistic features for downstream tasks. The contrastive learning objective can adapt the layer-wise attention pooling to both unsupervised and supervised manners. It results in regularizing the anisotropic space of pre-trained embeddings and being more uniform. We evaluate our model on standard semantic textual similarity (STS) and semantic search tasks. As a result, our method improved the performance of the base contrastive learned BERT_{base} and variants. 2022.coling-1.405 @@ -4807,7 +4807,7 @@ Accelerating Inference for Pretrained Language Models by Unified Multi-Perspective Early Exiting JunKong JinWang - Liang-ChihYu + Liang-ChihYu XuejieZhang 4677–4686 Conditional computation algorithms, such as the early exiting (EE) algorithm, can be applied to accelerate the inference of pretrained language models (PLMs) while maintaining competitive performance on resource-constrained devices. However, this approach is only applied to the vertical architecture to decide which layers should be used for inference. Conversely, the operation of the horizontal perspective is ignored, and the determination of which tokens in each layer should participate in the computation fails, leading to a high redundancy for adaptive inference. To address this limitation, a unified horizontal and vertical multi-perspective early exiting (MPEE) framework is proposed in this study to accelerate the inference of transformer-based models. Specifically, the vertical architecture uses recycling EE classifier memory and weighted self-distillation to enhance the performance of the EE classifiers. Then, the horizontal perspective uses recycling class attention memory to emphasize the informative tokens. Conversely, the tokens with less information are truncated by weighted fusion and isolated from the following computation. Based on this, both horizontal and vertical EE are unified to obtain a better tradeoff between performance and efficiency. Extensive experimental results show that MPEE can achieve higher acceleration inference with competent performance than existing competitive methods. @@ -4841,7 +4841,7 @@ MehdiRezagholizadeh AbbasGhaddar KhalilBibi - PhillippeLanglais + PhillippeLanglais PascalPoupart 4707–4713 Knowledge distillation (KD) is an efficient framework for compressing large-scale pre-trained language models. Recent years have seen a surge of research aiming to improve KD by leveraging Contrastive Learning, Intermediate Layer Distillation, Data Augmentation, and Adversarial Training. In this work, we propose a learning-based data augmentation technique tailored for knowledge distillation, called CILDA. To the best of our knowledge, this is the first time that intermediate layer representations of the main task are used in improving the quality of augmented samples. More precisely, we introduce an augmentation technique for KD based on intermediate layer matching using contrastive loss to improve masked adversarial data augmentation. CILDA outperforms existing state-of-the-art KD approaches on the GLUE benchmark, as well as in an out-of-domain evaluation. @@ -4925,7 +4925,7 @@ HandeCelikkanat VinitRavishankar MathiasCreutz - JörgTiedemann + JörgTiedemann 4788–4800 We analyze the learning dynamics of neural language and translation models using Loss Change Allocation (LCA), an indicator that enables a fine-grained analysis of parameter updates when optimizing for the loss function. In other words, we can observe the contributions of different network components at training time. In this article, we systematically study masked language modeling, causal language modeling, and machine translation. We show that the choice of training objective leads to distinctive optimization procedures, even when performed on comparable Transformer architectures. We demonstrate how the various Transformer parameters are used during training, supporting that the feed-forward components of each layer are the main contributors to the optimization procedure. Finally, we find that the learning dynamics are not affected by data size and distribution but rather determined by the learning objective. 2022.coling-1.424 @@ -5041,7 +5041,7 @@ Knowledge Distillation with Reptile Meta-Learning for Pretrained Language Model Compression XingeMa JinWang - Liang-ChihYu + Liang-ChihYu XuejieZhang 4907–4917 The billions, and sometimes even trillions, of parameters involved in pre-trained language models significantly hamper their deployment in resource-constrained devices and real-time applications. Knowledge distillation (KD) can transfer knowledge from the original model (i.e., teacher) into a compact model (i.e., student) to achieve model compression. However, previous KD methods have usually frozen the teacher and applied its immutable output feature maps as soft labels to guide the student’s training. Moreover, the goal of the teacher is to achieve the best performance on downstream tasks rather than knowledge transfer. Such a fixed architecture may limit the teacher’s teaching and student’s learning abilities. Herein, a knowledge distillation method with reptile meta-learning is proposed to facilitate the transfer of knowledge from the teacher to the student. The teacher can continuously meta-learn the student’s learning objective to adjust its parameters for maximizing the student’s performance throughout the distillation process. In this way, the teacher learns to teach, produces more suitable soft labels, and transfers more appropriate knowledge to the student, resulting in improved performance. Unlike previous KD using meta-learning, the proposed method only needs to calculate the first-order derivatives to update the teacher, leading to lower computational cost but better convergence. Extensive experiments on the GLUE benchmark show the competitive performance achieved by the proposed method. For reproducibility, the code for this paper is available at: https://github.com/maxinge8698/ReptileDistil. @@ -5148,7 +5148,7 @@ ChenchenDing HidekiTanaka MasaoUtiyama - EiichiroSumita + EiichiroSumita 5014–5020 In this paper we present FeatureBART, a linguistically motivated sequence-to-sequence monolingual pre-training strategy in which syntactic features such as lemma, part-of-speech and dependency labels are incorporated into the span prediction based pre-training framework (BART). These automatically extracted features are incorporated via approaches such as concatenation and relevance mechanisms, among which the latter is known to be better than the former. When used for low-resource NMT as a downstream task, we show that these feature based models give large improvements in bilingual settings and modest ones in multilingual settings over their counterparts that do not use features. 2022.coling-1.443 @@ -5157,8 +5157,8 @@ Multi-level Community-awareness Graph Neural Networks for Neural Machine Translation BinhNguyen - LongNguyen - DienDinh + LongNguyen + DienDinh 5021–5028 Neural Machine Translation (NMT) aims to translate the source- to the target-language while preserving the original meaning. Linguistic information such as morphology, syntactic, and semantics shall be grasped in token embeddings to produce a high-quality translation. Recent works have leveraged the powerful Graph Neural Networks (GNNs) to encode such language knowledge into token embeddings. Specifically, they use a trained parser to construct semantic graphs given sentences and then apply GNNs. However, most semantic graphs are tree-shaped and too sparse for GNNs which cause the over-smoothing problem. To alleviate this problem, we propose a novel Multi-level Community-awareness Graph Neural Network (MC-GNN) layer to jointly model local and global relationships between words and their linguistic roles in multiple communities. Intuitively, the MC-GNN layer substitutes a self-attention layer at the encoder side of a transformer-based machine translation model. Extensive experiments on four language-pair datasets with common evaluation metrics show the remarkable improvements of our method while reducing the time complexity in very long sentences. 2022.coling-1.444 @@ -5191,7 +5191,7 @@ Language Branch Gated Multilingual Neural Machine Translation HaoranSun - DeyiXiong + DeyiXiong 5046–5053 Knowledge transfer across languages is crucial for multilingual neural machine translation. In this paper, we propose language branch (LB) gated multilingual neural machine translation that encourages knowledge transfer within the same language branch with a LB-gated module that is integrated into both the encoder and decoder. The LB-gated module distinguishes LB-specific parameters from global parameters shared by all languages and routes languages from the same LB to the corresponding LB-specific network. Comprehensive experiments on the OPUS-100 dataset show that the proposed approach substantially improves translation quality on both middle- and low-resource languages over previous methods. Further analysis demonstrates its ability in learning similarities between language branches. 2022.coling-1.447 @@ -5203,7 +5203,7 @@ HuiHuang JialeGao YufengChen - JinanXu + JinanXu JianLiu 5054–5065 Back-translation has been proven to be effective in unsupervised domain adaptation of neural machine translation (NMT). However, the existing back-translation methods mainly improve domain adaptability by generating in-domain pseudo-parallel data that contains sentence-structural knowledge, paying less attention to the in-domain lexical knowledge, which may lead to poor translation of unseen in-domain words. In this paper, we propose an Iterative Constrained Back-Translation (ICBT) method to incorporate in-domain lexical knowledge on the basis of BT for unsupervised domain adaptation of NMT. Specifically, we apply lexical constraints into back-translation to generate pseudo-parallel data with in-domain lexical knowledge, and then perform round-trip iterations to incorporate more lexical knowledge. Based on this, we further explore sampling strategies of constrained words in ICBT to introduce more targeted lexical knowledge, via domain specificity and confidence estimation. Experimental results on four domains show that our approach achieves state-of-the-art results, improving the BLEU score by up to 3.08 compared to the strongest baseline, which demonstrates the effectiveness of our approach. @@ -5214,7 +5214,7 @@ Linguistically-Motivated <fixed-case>Y</fixed-case>orùbá-<fixed-case>E</fixed-case>nglish Machine Translation IfeAdebara MuhammadAbdul-Mageed - MiikkaSilfverberg + MiikkaSilfverberg 5066–5075 Translating between languages where certain features are marked morphologically in one but absent or marked contextually in the other is an important test case for machine translation. When translating into English which marks (in)definiteness morphologically, from Yorùbá which uses bare nouns but marks these features contextually, ambiguities arise. In this work, we perform fine-grained analysis on how an SMT system compares with two NMT systems (BiLSTM and Transformer) when translating bare nouns in Yorùbá into English. We investigate how the systems what extent they identify BNs, correctly translate them, and compare with human translation patterns. We also analyze the type of errors each model makes and provide a linguistic description of these errors. We glean insights for evaluating model performance in low-resource settings. In translating bare nouns, our results show the transformer model outperforms the SMT and BiLSTM models for 4 categories, the BiLSTM outperforms the SMT model for 3 categories while the SMT outperforms the NMT models for 1 category. 2022.coling-1.449 @@ -5236,7 +5236,7 @@ JianYang ShumingMa DongdongZhang - WeinanZhang + WeinanZhang YongYu ZhoujunLi 5085–5097 @@ -5310,7 +5310,7 @@ Informative Language Representation Learning for Massively Multilingual Neural Machine Translation RenrenJin - DeyiXiong + DeyiXiong 5158–5174 In a multilingual neural machine translation model that fully shares parameters across all languages, an artificial language token is usually used to guide translation into the desired target language. However, recent studies show that prepending language tokens sometimes fails to navigate the multilingual neural machine translation models into right translation directions, especially on zero-shot translation. To mitigate this issue, we propose two methods, language embedding embodiment and language-aware multi-head attention, to learn informative language representations to channel translation into right directions. The former embodies language embeddings into different critical switching points along the information flow from the source to the target, aiming at amplifying translation direction guiding signals. The latter exploits a matrix, instead of a vector, to represent a language in the continuous space. The matrix is chunked into multiple heads so as to learn language representations in multiple subspaces. Experiment results on two datasets for massively multilingual neural machine translation demonstrate that language-aware multi-head attention benefits both supervised and zero-shot translation and significantly alleviates the off-target translation issue. Further linguistic typology prediction experiments show that matrix-based language representations learned by our methods are capable of capturing rich linguistic typology features. 2022.coling-1.458 @@ -5334,7 +5334,7 @@ JaehyungSeo GyeongminKim JungseobLee - HeuiseokLim + HeuiseokLim 5181–5190 With the recent advance in neural machine translation demonstrating its importance, research on quality estimation (QE) has been steadily progressing. QE aims to automatically predict the quality of machine translation (MT) output without reference sentences. Despite its high utility in the real world, there remain several limitations concerning manual QE data creation: inevitably incurred non-trivial costs due to the need for translation experts, and issues with data scaling and language expansion. To tackle these limitations, we present QUAK, a Korean-English synthetic QE dataset generated in a fully automatic manner. This consists of three sub-QUAK datasets QUAK-M, QUAK-P, and QUAK-H, produced through three strategies that are relatively free from language constraints. Since each strategy requires no human effort, which facilitates scalability, we scale our data up to 1.58M for QUAK-P, H and 6.58M for QUAK-M. As an experiment, we quantitatively analyze word-level QE results in various ways while performing statistical analysis. Moreover, we show that datasets scaled in an efficient way also contribute to performance improvements by observing meaningful performance gains in QUAK-M, P when adding data up to 1.58M. 2022.coling-1.460 @@ -5344,7 +5344,7 @@ Improving Both Domain Robustness and Domain Adaptability in Machine Translation WenLai JindřichLibovický - AlexanderFraser + AlexanderFraser 5191–5204 We consider two problems of NMT domain adaptation using meta-learning. First, we want to reach domain robustness, i.e., we want to reach high quality on both domains seen in the training data and unseen domains. Second, we want our systems to be adaptive, i.e., making it possible to finetune systems with just hundreds of in-domain parallel sentences. We study the domain adaptability of meta-learning when improving the domain robustness of the model. In this paper, we propose a novel approach, RMLNMT (Robust Meta-Learning Framework for Neural Machine Translation Domain Adaptation), which improves the robustness of existing meta-learning models. More specifically, we show how to use a domain classifier in curriculum learning and we integrate the word-level domain mixing model into the meta-learning framework with a balanced sampling strategy. Experiments on English-German and English-Chinese translation show that RMLNMT improves in terms of both domain robustness and domain adaptability in seen and unseen domains. 2022.coling-1.461 @@ -5354,7 +5354,7 @@ <fixed-case>C</fixed-case>o<fixed-case>D</fixed-case>o<fixed-case>NMT</fixed-case>: Modeling Cohesion Devices for Document-Level Neural Machine Translation YikunLei YuqiRen - DeyiXiong + DeyiXiong 5205–5216 Cohesion devices, e.g., reiteration, coreference, are crucial for building cohesion links across sentences. In this paper, we propose a document-level neural machine translation framework, CoDoNMT, which models cohesion devices from two perspectives: Cohesion Device Masking (CoDM) and Cohesion Attention Focusing (CoAF). In CoDM, we mask cohesion devices in the current sentence and force NMT to predict them with inter-sentential context information. A prediction task is also introduced to be jointly trained with NMT. In CoAF, we attempt to guide the model to pay exclusive attention to relevant cohesion devices in the context when translating cohesion devices in the current sentence. Such a cohesion attention focusing strategy is softly applied to the self-attention layer. Experiments on three benchmark datasets demonstrate that our approach outperforms state-of-the-art document-level neural machine translation baselines. Further linguistic evaluation validates the effectiveness of the proposed model in producing cohesive translations. 2022.coling-1.462 @@ -5396,8 +5396,8 @@ Alleviating the Inequality of Attention Heads for Neural Machine Translation ZeweiSun ShujianHuang - XinyuDai - JiajunChen + XinyuDai + JiajunChen 5246–5250 Recent studies show that the attention heads in Transformer are not equal. We relate this phenomenon to the imbalance training of multi-head attention and the model dependence on specific heads. To tackle this problem, we propose a simple masking method: HeadMask, in two specific ways. Experiments show that translation improvements are achieved on multiple language pairs. Subsequent empirical analyses also support our assumption and confirm the effectiveness of the method. 2022.coling-1.466 @@ -5432,7 +5432,7 @@ Cross-lingual Feature Extraction from Monolingual Corpora for Low-resource Unsupervised Bilingual Lexicon Induction ZihaoFeng HailongCao - TiejunZhao + TiejunZhao WeixuanWang WeiPeng 5278–5287 @@ -5470,7 +5470,7 @@ Deciphering and Characterizing Out-of-Vocabulary Words for Morphologically Rich Languages GeorgieBotev - Arya D.McCarthy + Arya D.McCarthy WinstonWu DavidYarowsky 5309–5326 @@ -5590,9 +5590,9 @@ YigeChen Eunkyul LeahJo YundongYao - KyungTaeLim - MiikkaSilfverberg - Francis M.Tyers + KyungTaeLim + MiikkaSilfverberg + Francis M.Tyers JungyeulPark 5432–5437 In this study, we propose a morpheme-based scheme for Korean dependency parsing and adopt the proposed scheme to Universal Dependencies. We present the linguistic rationale that illustrates the motivation and the necessity of adopting the morpheme-based format, and develop scripts that convert between the original format used by Universal Dependencies and the proposed morpheme-based format automatically. The effectiveness of the proposed format for Korean dependency parsing is then testified by both statistical and neural models, including UDPipe and Stanza, with our carefully constructed morpheme-based word embedding for Korean. morphUD outperforms parsing results for all Korean UD treebanks, and we also present detailed error analysis. @@ -5644,8 +5644,8 @@ Belief Revision Based Caption Re-ranker with Visual Semantic Information AhmedSabir FrancescMoreno-Noguer - PranavaMadhyastha - LluísPadró + PranavaMadhyastha + LluísPadró 5488–5506 In this work, we focus on improving the captions generated by image-caption generation systems. We propose a novel re-ranking approach that leverages visual-semantic measures to identify the ideal caption that maximally captures the visual information in the image. Our re-ranker utilizes the Belief Revision framework (Blok et al., 2003) to calibrate the original likelihood of the top-n captions by explicitly exploiting semantic relatedness between the depicted caption and the visual context. Our experiments demonstrate the utility of our approach, where we observe that our re-ranker can enhance the performance of a typical image-captioning system without necessity of any additional training or fine-tuning. 2022.coling-1.487 @@ -5655,7 +5655,7 @@ Towards Understanding the Relation between Gestures and Language ArtemAbzaliev AndrewOwens - RadaMihalcea + RadaMihalcea 5507–5520 In this paper, we explore the relation between gestures and language. Using a multimodal dataset, consisting of Ted talks where the language is aligned with the gestures made by the speakers, we adapt a semi-supervised multimodal model to learn gesture embeddings. We show that gestures are predictive of the native language of the speaker, and that gesture embeddings further improve language prediction result. In addition, gesture embeddings might contain some linguistic information, as we show by probing embeddings for psycholinguistic categories. Finally, we analyze the words that lead to the most expressive gestures and find that function words drive the expressiveness of gestures. 2022.coling-1.488 @@ -5727,7 +5727,7 @@ FedericoPedeni AlessandroSuglia AlbertoTestoni - RaffaellaBernardi + RaffaellaBernardi 5597–5612 Artificial agents are nowadays challenged to perform embodied AI tasks. To succeed, agents must understand the meaning of verbs and how their corresponding actions transform the surrounding world. In this work, we propose ACT-Thor, a novel controlled benchmark for embodied action understanding. We use the AI2-THOR simulated environment to produce a controlled setup in which an agent, given a before-image and an associated action command, has to determine what the correct after-image is among a set of possible candidates. First, we assess the feasibility of the task via a human evaluation that resulted in 81.4% accuracy, and very high inter-annotator agreement (84.9%). Second, we design both unimodal and multimodal baselines, using state-of-the-art visual feature extractors. Our evaluation and error analysis suggest that only models that have a very structured representation of the actions together with powerful visual features can perform well on the task. However, they still fall behind human performance in a zero-shot scenario where the model is exposed to unseen (action, object) pairs. This paves the way for a systematic way of evaluating embodied AI agents that understand grounded actions. 2022.coling-1.495 @@ -5739,7 +5739,7 @@ NaihaoDeng PingxuanHuang MihaiBurzo - RadaMihalcea + RadaMihalcea 5613–5635 Existing video understanding datasets mostly focus on human interactions, with little attention being paid to the “in the wild” settings, where the videos are recorded outdoors. We propose WILDQA, a video understanding dataset of videos recorded in outside settings. In addition to video question answering (Video QA), we also introduce the new task of identifying visual support for a given question and answer (Video Evidence Selection). Through evaluations using a wide range of baseline models, we show that WILDQA poses new challenges to the vision and language research communities. The dataset is available at https: //lit.eecs.umich.edu/wildqa/. 2022.coling-1.496 @@ -5810,7 +5810,7 @@ ZhijiangGuo YuFu LijieWen - Philip S.Yu + Philip S.Yu 5707–5720 A scene graph is a semantic representation that expresses the objects, attributes, and relationships between objects in a scene. Scene graphs play an important role in many cross modality tasks, as they are able to capture the interactions between images and texts. In this paper, we focus on scene graph modification (SGM), where the system is required to learn how to update an existing scene graph based on a natural language query. Unlike previous approaches that rebuilt the entire scene graph, we frame SGM as a graph expansion task by introducing the incremental structure expanding (ISE). ISE constructs the target graph by incrementally expanding the source graph without changing the unmodified structure. Based on ISE, we further propose a model that iterates between nodes prediction and edges prediction, inferring more accurate and harmonious expansion decisions progressively. In addition, we construct a challenging dataset that contains more complicated queries and larger scene graphs than existing datasets. Experiments on four benchmarks demonstrate the effectiveness of our approach, which surpasses the previous state-of-the-art model by large margins. 2022.coling-1.502 @@ -5821,7 +5821,7 @@ YikeWu YuZhao ShiwanZhao - YingZhang + YingZhang XiaojieYuan GuoqingZhao NingJiang @@ -5833,7 +5833,7 @@ Efficient Multilingual Multi-modal Pre-training through Triple Contrastive Loss YouhanLee - KyungTaeLim + KyungTaeLim WoonhyukBaek ByungseokRoh SaehoonKim @@ -5855,7 +5855,7 @@ <fixed-case>GAP</fixed-case>: A Graph-aware Language Model Framework for Knowledge Graph-to-Text Generation AnthonyColas MehrdadAlvandipour - Daisy ZheWang + Daisy ZheWang 5755–5769 Recent improvements in KG-to-text generation are due to additional auxiliary pre-training tasks designed to give the fine-tune task a boost in performance. These tasks require extensive computational resources while only suggesting marginal improvements. Here, we demonstrate that by fusing graph-aware elements into existing pre-trained language models, we are able to outperform state-of-the-art models and close the gap imposed by additional pre-training tasks. We do so by proposing a mask structure to capture neighborhood information and a novel type encoder that adds a bias to the graph-attention weights depending on the connection type. Experiments on two KG-to-text benchmark datasets show our models are competitive while involving fewer parameters and no additional pre-training tasks. By formulating the problem as a framework, we can interchange the various proposed components and begin interpreting KG-to-text generative models based on the topological and type information found in a graph. 2022.coling-1.506 @@ -5876,7 +5876,7 @@ MingZhong ZhiyongWu QinZhu - XuanjingHuang + XuanjingHuang XipengQiu 5783–5793 Traditional training paradigms for extractive and abstractive summarization systems always only use token-level or sentence-level training objectives. However, the output summary is always evaluated from summary-level which leads to the inconsistency in training and evaluation. In this paper, we propose a Contrastive Learning based re-ranking framework for one-stage summarization called CoLo. By modeling a contrastive objective, we show that the summarization model is able to directly generate summaries according to the summary-level score without additional modules and parameters. Extensive experiments demonstrate that CoLo boosts the extractive and abstractive results of one-stage systems on CNN/DailyMail benchmark to 44.58 and 46.33 ROUGE-1 score while preserving the parameter efficiency and inference efficiency. Compared with state-of-the-art multi-stage systems, we save more than 100 GPU training hours and obtaining 3x 8x speed-up ratio during inference while maintaining comparable results. @@ -5887,7 +5887,7 @@ Of Human Criteria and Automatic Metrics: A Benchmark of the Evaluation of Story Generation CyrilChhun PierreColombo - Fabian M.Suchanek + Fabian M.Suchanek ChloéClavel 5794–5836 Research on Automatic Story Generation (ASG) relies heavily on human and automatic evaluation. However, there is no consensus on which human evaluation criteria to use, and no analysis of how well automatic criteria correlate with them. In this paper, we propose to re-evaluate ASG evaluation. We introduce a set of 6 orthogonal and comprehensive human criteria, carefully motivated by the social sciences literature. We also present HANNA, an annotated dataset of 1,056 stories produced by 10 different ASG systems. HANNA allows us to quantitatively evaluate the correlations of 72 automatic metrics with human criteria. Our analysis highlights the weaknesses of current metrics for ASG and allows us to formulate practical recommendations for ASG evaluation. @@ -5910,7 +5910,7 @@ DongyuanLi JingyiYou KotaroFunakoshi - ManabuOkumura + ManabuOkumura 5857–5869 Text infilling aims to restore incomplete texts by filling in blanks, which has attracted more attention recently because of its wide application in ancient text restoration and text rewriting. However, attribute- aware text infilling is yet to be explored, and existing methods seldom focus on the infilling length of each blank or the number/location of blanks. In this paper, we propose an Attribute-aware Text Infilling method via a Pre-trained language model (A-TIP), which contains a text infilling component and a plug- and-play discriminator. Specifically, we first design a unified text infilling component with modified attention mechanisms and intra- and inter-blank positional encoding to better perceive the number of blanks and the infilling length for each blank. Then, we propose a plug-and-play discriminator to guide generation towards the direction of improving attribute relevance without decreasing text fluency. Finally, automatic and human evaluations on three open-source datasets indicate that A-TIP achieves state-of- the-art performance compared with all baselines. 2022.coling-1.511 @@ -5919,7 +5919,7 @@ Multi Graph Neural Network for Extractive Long Document Summarization Xuan-DungDoan - Le-MinhNguyen + Le-MinhNguyen Khac-Hoai NamBui 5870–5875 Heterogeneous Graph Neural Networks (HeterGNN) have been recently introduced as an emergent approach for extracting document summarization (EDS) by exploiting the cross-relations between words and sentences. However, applying HeterGNN for long documents is still an open research issue. One of the main majors is the lacking of inter-sentence connections. In this regard, this paper exploits how to apply HeterGNN for long documents by building a graph on sentence-level nodes (homogeneous graph) and combine with HeterGNN for capturing the semantic information in terms of both inter and intra-sentence connections. Experiments on two benchmark datasets of long documents such as PubMed and ArXiv show that our method is able to achieve state-of-the-art results in this research field. @@ -5941,7 +5941,7 @@ Fei-TzinLee MiguelBallesteros FengNan - KathleenMcKeown + KathleenMcKeown 5882–5895 Large pretrained language models offer powerful generation capabilities, but cannot be reliably controlled at a sub-sentential level. We propose to make such fine-grained control possible in pretrained LMs by generating text directly from a semantic representation, Abstract Meaning Representation (AMR), which is augmented at the node level with syntactic control tags. We experiment with English-language generation of three modes of syntax relevant to the framing of a sentence - verb voice, verb tense, and realization of human entities - and demonstrate that they can be reliably controlled, even in settings that diverge drastically from the training distribution. These syntactic aspects contribute to how information is framed in text, something that is important for applications such as summarization which aim to highlight salient information. 2022.coling-1.514 @@ -5952,7 +5952,7 @@ GeLuo HebiLi YoubiaoHe - Forrest ShengBao + Forrest ShengBao 5896–5903 Evaluating machine-generated summaries without a human-written reference summary has been a need for a long time. Inspired by preference labeling in existing work of summarization evaluation, we propose to judge summary quality by learning the preference rank of summaries using the Bradley-Terry power ranking model from inferior summaries generated by corrupting base summaries. Extensive experiments on several datasets show that our weakly supervised scheme can produce scores highly correlated with human ratings. 2022.coling-1.515 @@ -5973,7 +5973,7 @@ Coordination Generation via Synchronized Text-Infilling HirokiTeranishi - YujiMatsumoto + YujiMatsumoto 5914–5924 Generating synthetic data for supervised learning from large-scale pre-trained language models has enhanced performances across several NLP tasks, especially in low-resource scenarios. In particular, many studies of data augmentation employ masked language models to replace words with other words in a sentence. However, most of them are evaluated on sentence classification tasks and cannot immediately be applied to tasks related to the sentence structure. In this paper, we propose a simple yet effective approach to generating sentences with a coordinate structure in which the boundaries of its conjuncts are explicitly specified. For a given span in a sentence, our method embeds a mask with a coordinating conjunction in two ways (”X and [mask]”, ”[mask] and X”) and forces masked language models to fill the two blanks with an identical text. To achieve this, we introduce decoding methods for BERT and T5 models with the constraint that predictions for different masks are synchronized. Furthermore, we develop a training framework that effectively selects synthetic examples for the supervised coordination disambiguation task. We demonstrate that our method produces promising coordination instances that provide gains for the task in low-resource settings. 2022.coling-1.517 @@ -6036,7 +6036,7 @@ <fixed-case>JPG</fixed-case> - Jointly Learn to Align: Automated Disease Prediction and Radiology Report Generation JingyiYou DongyuanLi - ManabuOkumura + ManabuOkumura KenjiSuzuki 5989–6001 Automated radiology report generation aims to generate paragraphs that describe fine-grained visual differences among cases, especially those between the normal and the diseased. Existing methods seldom consider the cross-modal alignment between textual and visual features and tend to ignore disease tags as an auxiliary for report generation. To bridge the gap between textual and visual information, in this study, we propose a “Jointly learning framework for automated disease Prediction and radiology report Generation (JPG)” to improve the quality of reports through the interaction between the main task (report generation) and two auxiliary tasks (feature alignment and disease prediction). The feature alignment and disease prediction help the model learn text-correlated visual features and record diseases as keywords so that it can output high-quality reports. Besides, the improved reports in turn provide additional harder samples for feature alignment and disease prediction to learn more precise visual and textual representations and improve prediction accuracy. All components are jointly trained in a manner that helps improve them iteratively and progressively. Experimental results demonstrate the effectiveness of JPG on the most commonly used IU X-RAY dataset, showing its superior performance over multiple state-of-the-art image captioning and medical report generation methods with regard to BLEU, METEOR, and ROUGE metrics. @@ -6045,7 +6045,7 @@ Automatic Nominalization of Clauses through Textual Entailment - John S. Y.Lee + John S. Y.Lee Ho HungLim CarolWebster AntonMelser @@ -6073,7 +6073,7 @@ Source-summary Entity Aggregation in Abstractive Summarization José ÁngelGonzález AnnieLouis - Jackie Chi KitCheung + Jackie Chi KitCheung 6019–6034 In a text, entities mentioned earlier can be referred to in later discourse by a more general description. For example, Celine Dion and Justin Bieber can be referred to by Canadian singers or celebrities. In this work, we study this phenomenon in the context of summarization, where entities from a source text are generalized in the summary. We call such instances source-summary entity aggregations. We categorize these aggregations into two types and analyze them in the Cnn/Dailymail corpus, showing that they are reasonably frequent. We then examine how well three state-of-the-art summarization systems can generate such aggregations within summaries. We also develop techniques to encourage them to generate more aggregations. Our results show that there is significant room for improvement in producing semantically correct aggregations. 2022.coling-1.526 @@ -6154,7 +6154,7 @@ WeiLi XuhuiJiang HuaweiShen - XueqiCheng + XueqiCheng 6105–6114 Complex question generation over knowledge bases (KB) aims to generate natural language questions involving multiple KB relations or functional constraints. Existing methods train one encoder-decoder-based model to fit all questions. However, such a one-size-fits-all strategy may not perform well since complex questions exhibit an uneven distribution in many dimensions, such as question types, involved KB relations, and query structures, resulting in insufficient learning for long-tailed samples under different dimensions. To address this problem, we propose a meta-learning framework for complex question generation. The meta-trained generator can acquire universal and transferable meta-knowledge and quickly adapt to long-tailed samples through a few most related training samples. To retrieve similar samples for each input query, we design a self-supervised graph retriever to learn distributed representations for samples, and contrastive learning is leveraged to improve the learned representations. We conduct experiments on both WebQuestionsSP and ComplexWebQuestion, and results on long-tailed samples of different dimensions have been significantly improved, which demonstrates the effectiveness of the proposed framework. 2022.coling-1.533 @@ -6198,7 +6198,7 @@ Phrase-Level Localization of Inconsistency Errors in Summarization by Weak Supervision MasatoTakatsuka TetsunoriKobayashi - YoshihikoHayashi + YoshihikoHayashi 6151–6164 Although the fluency of automatically generated abstractive summaries has improved significantly with advanced methods, the inconsistency that remains in summarization is recognized as an issue to be addressed. In this study, we propose a methodology for localizing inconsistency errors in summarization. A synthetic dataset that contains a variety of factual errors likely to be produced by a common summarizer is created by applying sentence fusion, compression, and paraphrasing operations. In creating the dataset, we automatically label erroneous phrases and the dependency relations between them as “inconsistent,” which can contribute to detecting errors more adequately than existing models that rely only on dependency arc-level labels. Subsequently, this synthetic dataset is employed as weak supervision to train a model called SumPhrase, which jointly localizes errors in a summary and their corresponding sentences in the source document. The empirical results demonstrate that our SumPhrase model can detect factual errors in summarization more effectively than existing weakly supervised methods owing to the phrase-level labeling. Moreover, the joint identification of error-corresponding original sentences is proven to be effective in improving error detection accuracy. 2022.coling-1.537 @@ -6208,7 +6208,7 @@ <fixed-case>P</fixed-case>oli<fixed-case>S</fixed-case>e: Reinforcing Politeness Using User Sentiment for Customer Care Response Generation MauajamaFirdaus AsifEkbal - PushpakBhattacharyya + PushpakBhattacharyya 6165–6175 The interaction between a consumer and the customer service representative greatly contributes to the overall customer experience. Therefore, to ensure customers’ comfort and retention, it is important that customer service agents and chatbots connect with users on social, cordial, and empathetic planes. In the current work, we automatically identify the sentiment of the user and transform the neutral responses into polite responses conforming to the sentiment and the conversational history. Our technique is basically a reinforced multi-task network- the primary task being ‘polite response generation’ and the secondary task being ‘sentiment analysis’- that uses a Transformer based encoder-decoder. We use sentiment annotated conversations from Twitter as the training data. The detailed evaluation shows that our proposed approach attains superior performance compared to the baseline models. 2022.coling-1.538 @@ -6228,7 +6228,7 @@ <fixed-case>A</fixed-case>rg<fixed-case>L</fixed-case>egal<fixed-case>S</fixed-case>umm: Improving Abstractive Summarization of Legal Documents with Argument Mining MohamedElaraby - DianeLitman + DianeLitman 6187–6194 A challenging task when generating summaries of legal documents is the ability to address their argumentative nature. We introduce a simple technique to capture the argumentative structure of legal documents by integrating argument role labeling into the summarization process. Experiments with pretrained language models show that our proposed approach improves performance over strong baselines. 2022.coling-1.540 @@ -6315,7 +6315,7 @@ Steven Y.Feng HarshJhamtani MaliheAlikhani - EduardHovy + EduardHovy 6270–6284 A personification is a figure of speech that endows inanimate entities with properties and actions typically seen as requiring animacy. In this paper, we explore the task of personification generation. To this end, we propose PINEAPPLE: Personifying INanimate Entities by Acquiring Parallel Personification data for Learning Enhanced generation. We curate a corpus of personifications called PersonifCorp, together with automatically generated de-personified literalizations of these personifications. We demonstrate the usefulness of this parallel corpus by training a seq2seq model to personify a given literal input. Both automatic and human evaluations show that fine-tuning with PersonifCorp leads to significant gains in personification-related qualities such as animacy and interestingness. A detailed qualitative analysis also highlights key strengths and imperfections of PINEAPPLE over baselines, demonstrating a strong ability to generate diverse and creative personifications that enhance the overall appeal of a sentence. 2022.coling-1.547 @@ -6350,7 +6350,7 @@ FangweiZhu JuanziLi LeiHou - Jian-YunNie + Jian-YunNie 6315–6326 Multi-Document Summarization (MDS) commonly employs the 2-stage extract-then-abstract paradigm, which first extracts a relatively short meta-document, then feeds it into the deep neural networks to generate an abstract. Previous work usually takes the ROUGE score as the label for training a scoring model to evaluate source documents. However, the trained scoring model is prone to under-fitting for low-resource settings, as it relies on the training data. To extract documents effectively, we construct prompting templates that invoke the underlying knowledge in Pre-trained Language Model (PLM) to calculate the document and keyword’s perplexity, which can assess the document’s semantic salience. Our unsupervised approach can be applied as a plug-in to boost other metrics for evaluating a document’s salience, thus improving the subsequent abstract generation. We get positive results on 2 MDS datasets, 2 data settings, and 2 abstractive backbone models, showing our method’s effectiveness. Our code is available at https://github.com/THU-KEG/UPER 2022.coling-1.550 @@ -6361,8 +6361,8 @@ TianyangCao ShuangZeng XiaodanXu - MairgupMansur - BaobaoChang + MairgupMansur + BaobaoChang 6327–6339 A math word problem (MWP) is a coherent narrative which reflects the underlying logic of math equations. Successful MWP generation can automate the writing of mathematics questions. Previous methods mainly generate MWP text based on inflexible pre-defined templates. In this paper, we propose a neural model for generating MWP text from math equations. Firstly, we incorporate a matching model conditioned on the domain knowledge to retrieve a MWP instance which is most consistent with the ground-truth, where the domain is a latent variable extracted with a domain summarizer. Secondly, by constructing a Quantity Cell Graph (QCG) from the retrieved MWP instance and reasoning over it, we improve the model’s comprehension of real-world scenarios and derive a domain-constrained instance sketch to guide the generation. Besides, the QCG also interacts with the equation encoder to enhance the alignment between math tokens (e.g., quantities and variables) and MWP text. Experiments and empirical analysis on educational MWP set show that our model achieves impressive performance in both automatic evaluation metrics and human evaluation metrics. 2022.coling-1.551 @@ -6372,7 +6372,7 @@ Context-Tuning: Learning Contextualized Prompts for Natural Language Generation TianyiTang JunyiLi - Wayne XinZhao + Wayne XinZhao Ji-RongWen 6340–6354 Recently, pretrained language models (PLMs) have had exceptional success in language generation. To leverage the rich knowledge encoded by PLMs, a simple yet powerful paradigm is to use prompts in the form of either discrete tokens or continuous embeddings. In existing studies, these prompting methods are typically independent of the inputs, lacking sufficient consideration of input semantics. To address this issue, we propose a novel continuous prompting approach, called context-tuning, to fine-tuning PLMs for natural language generation. Firstly, the prompts are derived based on the input text to elicit useful knowledge from PLMs for generation. We refer to such prompts as contextualized prompts. Secondly, we use continuous inverse prompting to improve the process of natural language generation by modeling an inverse generation process from output to input, making the generated text more relevant to the inputs. Furthermore, we utilize a lightweight context-tuning method that fine-tunes only 0.12% of the parameters while maintaining good performance. Our code is publicly available at https://github.com/RUCAIBox/Context-Tuning. @@ -6386,7 +6386,7 @@ YuBai JiaweiLi YinanHu - HeyanHuang + HeyanHuang BoxingChen 6355–6368 Few-shot abstractive summarization has become a challenging task in natural language generation. To support it, we developed a novel soft prompts architecture coupled with a prompt pre-training plus prompt fine-tuning paradigm, which is effective and tunes only extremely light parameters. To meet the structure of the generation models, the soft prompts comprise continuous input embeddings across an encoder and a decoder. Importantly, a new inner-prompt placed in the text is introduced to capture document-level information. The aim is to devote attention to understanding the document that better prompts the model to generate document-related content. In the training process, the prompt pre-training with self-supervised pseudo-data firstly teaches the model basic summarizing capability. Then, with few-shot examples, only the designed lightweight soft prompts are fine-tuned. Experimental results on the CNN/DailyMail and XSum datasets show that our method, with only 0.1% of the parameters, outperforms full-model tuning where all model parameters are tuned. It also surpasses Prompt Tuning by a large margin and delivers competitive results against Prefix-Tuning with 3% of the parameters. @@ -6460,7 +6460,7 @@ <fixed-case>CHAE</fixed-case>: Fine-Grained Controllable Story Generation with Characters, Actions and Emotions - XinpengWang + XinpengWang HanJiang ZhihuaWei ShanlinZhou @@ -6570,7 +6570,7 @@ MingZhong ZhangyueYin XipengQiu - XuanjingHuang + XuanjingHuang 6540–6546 Pre-trained models have brought remarkable success on the text summarization task. For dialogue summarization, the subdomain of text summarization, utterances are concatenated to flat text before being processed. As a result, existing summarization systems based on pre-trained models are unable to recognize the unique format of the speaker-utterance pair well in the dialogue. To investigate this issue, we conduct probing tests and manual analysis, and find that the powerful pre-trained model can not identify different speakers well in the conversation, which leads to various factual errors. Moreover, we propose three speaker-aware supervised contrastive learning (SCL) tasks: Token-level SCL, Turn-level SCL, and Global-level SCL. Comprehensive experiments demonstrate that our methods achieve significant performance improvement on two mainstream dialogue summarization datasets. According to detailed human evaluations, pre-trained models equipped with SCL tasks effectively generate summaries with better factual consistency. 2022.coling-1.569 @@ -6603,7 +6603,7 @@ XinZhou TaoGui QiZhang - XuanjingHuang + XuanjingHuang 6575–6585 Question generation over knowledge bases (KBQG) aims at generating natural questions about a subgraph, which can be answered by a given answer entity. Existing KBQG models still face two main challenges: (1) Most models often focus on the most relevant part of the answer entity, while neglecting the rest of the subgraph. (2) There are a large number of out-of-vocabulary (OOV) predicates in real-world scenarios, which are hard to adapt for most KBQG models. To address these challenges, we propose LFKQG, a controlled generation framework for Question Generation over Knowledge Bases. (1) LFKQG employs a simple controlled generation method to generate the questions containing the critical entities in the subgraph, ensuring the question is relevant to the whole subgraph. (2) We propose an optimization strategy called local fine-tuning, which can make good use of the rich information hidden in the pre-trained model to improve the ability of the model to adapt the OOV predicates. Extensive experiments show that our method outperforms existing methods significantly on three widely-used benchmark datasets SimpleQuestion, PathQuestions, and WebQuestions. 2022.coling-1.572 @@ -6633,7 +6633,7 @@ Offensive Content Detection via Synthetic Code-Switched Text CesaSalaam FranckDernoncourt - TrungBui + TrungBui DandaRawat SeunghyunYoon 6617–6624 @@ -6651,7 +6651,7 @@ Giovanni Da SanMartino ShadenShaar HamedFirooz - PreslavNakov + PreslavNakov 6625–6643 Recent years have witnessed the proliferation of offensive content online such as fake news, propaganda, misinformation, and disinformation. While initially this was mostly about textual content, over time images and videos gained popularity, as they are much easier to consume, attract more attention, and spread further than text. As a result, researchers started leveraging different modalities and combinations thereof to tackle online multimodal offensive content. In this study, we offer a survey on the state-of-the-art on multimodal disinformation detection covering various combinations of modalities: text, images, speech, video, social media network structure, and temporal information. Moreover, while some studies focused on factuality, others investigated how harmful the content is. While these two components in the definition of disinformation – (i) factuality, and (ii) harmfulness –, are equally important, they are typically studied in isolation. Thus, we argue for the need to tackle disinformation detection by taking into account multiple modalities as well as both factuality and harmfulness, in the same framework. Finally, we discuss current challenges and future research directions. 2022.coling-1.576 @@ -6695,7 +6695,7 @@ XiaoyunHan BinyangLi MenglongLu - DongshengLi + DongshengLi 6680–6690 Early rumor detection is a key challenging task to prevent rumors from spreading widely. Sociological research shows that social bots’ behavior in the early stage has become the main reason for rumors’ wide spread. However, current models do not explicitly distinguish genuine users from social bots, and their failure in identifying rumors timely. Therefore, this paper aims at early rumor detection by accounting for social bots’ behavior, and presents a Social Bot-Aware Graph Neural Network, named SBAG. SBAG firstly pre-trains a multi-layer perception network to capture social bot features, and then constructs multiple graph neural networks by embedding the features to model the early propagation of posts, which is further used to detect rumors. Extensive experiments on three benchmark datasets show that SBAG achieves significant improvements against the baselines and also identifies rumors within 3 hours while maintaining more than 90% accuracy. 2022.coling-1.580 @@ -6731,7 +6731,7 @@ Detecting Minority Arguments for Mutual Understanding: A Moderation Tool for the Online Climate Change Debate CedricWaterschoot Ernstvan den Hemel - Antalvan den Bosch + Antalvan den Bosch 6715–6725 Moderating user comments and promoting healthy understanding is a challenging task, especially in the context of polarized topics such as climate change. We propose a moderation tool to assist moderators in promoting mutual understanding in regard to this topic. The approach is twofold. First, we train classifiers to label incoming posts for the arguments they entail, with a specific focus on minority arguments. We apply active learning to further supplement the training data with rare arguments. Second, we dive deeper into singular arguments and extract the lexical patterns that distinguish each argument from the others. Our findings indicate that climate change arguments form clearly separable clusters in the embedding space. These classes are characterized by their own unique lexical patterns that provide a quick insight in an argument’s key concepts. Additionally, supplementing our training data was necessary for our classifiers to be able to adequately recognize rare arguments. We argue that this detailed rundown of each argument provides insight into where others are coming from. These computational approaches can be part of the toolkit for content moderators and researchers struggling with polarized topics. 2022.coling-1.583 @@ -6750,7 +6750,7 @@ Structural Bias for Aspect Sentiment Triplet Extraction - ChenZhang + ChenZhang LeiRen FangMa JingangWang @@ -6777,7 +6777,7 @@ Gopendra VikramSingh AseemArora AsifEkbal - PushpakBhattacharyya + PushpakBhattacharyya 6752–6761 In this paper, we hypothesize that humor is closely related to sentiment and emotions. Also, due to the tremendous growth in multilingual content, there is a great demand for building models and systems that support multilingual information access. To end this, we first extend the recently released Multimodal Multiparty Hindi Humor (M2H2) dataset by adding parallel English utterances corresponding to Hindi utterances and then annotating each utterance with sentiment and emotion classes. We name it Sentiment, Humor, and Emotion aware Multilingual Multimodal Multiparty Dataset (SHEMuD). Therefore, we propose a multitask framework wherein the primary task is humor detection, and the auxiliary tasks are sentiment and emotion identification. We design a multitasking framework wherein we first propose a Context Transformer to capture the deep contextual relationships with the input utterances. We then propose a Sentiment and Emotion aware Embedding (SE-Embedding) to get the overall representation of a particular emotion and sentiment w.r.t. the specific humor situation. Experimental results on the SHEMuD show the efficacy of our approach and shows that multitask learning offers an improvement over the single-task framework for both monolingual (4.86 points in Hindi and 5.9 points in English in F1-score) and multilingual (5.17 points in F1-score) setting. 2022.coling-1.587 @@ -6799,7 +6799,7 @@ Entity-Level Sentiment Analysis (<fixed-case>ELSA</fixed-case>): An Exploratory Task Survey EgilRønningstad ErikVelldal - LiljaØvrelid + LiljaØvrelid 6773–6783 This paper explores the task of identifying the overall sentiment expressed towards volitional entities (persons and organizations) in a document - what we refer to as Entity-Level Sentiment Analysis (ELSA). While identifying sentiment conveyed towards an entity is well researched for shorter texts like tweets, we find little to no research on this specific task for longer texts with multiple mentions and opinions towards the same entity. This lack of research would be understandable if ELSA can be derived from existing tasks and models. To assess this, we annotate a set of professional reviews for their overall sentiment towards each volitional entity in the text. We sample from data already annotated for document-level, sentence-level, and target-level sentiment in a multi-domain review corpus, and our results indicate that there is no single proxy task that provides this overall sentiment we seek for the entities at a satisfactory level of performance. We present a suite of experiments aiming to assess the contribution towards ELSA provided by document-, sentence-, and target-level sentiment analysis, and provide a discussion of their shortcomings. We show that sentiment in our dataset is expressed not only with an entity mention as target, but also towards targets with a sentiment-relevant relation to a volitional entity. In our data, these relations extend beyond anaphoric coreference resolution, and our findings call for further research of the topic. Finally, we also present a survey of previous relevant work. 2022.coling-1.589 @@ -6810,9 +6810,9 @@ FeiZhao ZhenWu SiyuLong - XinyuDai + XinyuDai ShujianHuang - JiajunChen + JiajunChen 6784–6794 Target-oriented multimodal sentiment classification (TMSC) is a new subtask of aspect-based sentiment analysis, which aims to determine the sentiment polarity of the opinion target mentioned in a (sentence, image) pair. Recently, dominant works employ the attention mechanism to capture the corresponding visual representations of the opinion target, and then aggregate them as evidence to make sentiment predictions. However, they still suffer from two problems: (1) The granularity of the opinion target in two modalities is inconsistent, which causes visual attention sometimes fail to capture the corresponding visual representations of the target; (2) Even though it is captured, there are still significant differences between the visual representations expressing the same mood, which brings great difficulty to sentiment prediction. To this end, we propose a novel Knowledge-enhanced Framework (KEF) in this paper, which can successfully exploit adjective-noun pairs extracted from the image to improve the visual attention capability and sentiment prediction capability of the TMSC task. Extensive experimental results show that our framework consistently outperforms state-of-the-art works on two public datasets. 2022.coling-1.590 @@ -6844,7 +6844,7 @@ HangJiang DougBeeferman BrandonRoy - DebRoy + DebRoy 6818–6826 As political attitudes have diverged ideologically in the United States, political speech has diverged lingusitically. The ever-widening polarization between the US political parties is accelerated by an erosion of mutual understanding between them. We aim to make these communities more comprehensible to each other with a framework that probes community-specific responses to the same survey questions using community language models CommunityLM. In our framework we identify committed partisan members for each community on Twitter and fine-tune LMs on the tweets authored by them. We then assess the worldviews of the two groups using prompt-based probing of their corresponding LMs, with prompts that elicit opinions about public figures and groups surveyed by the American National Election Studies (ANES) 2020 Exploratory Testing Survey. We compare the responses generated by the LMs to the ANES survey results, and find a level of alignment that greatly exceeds several baseline methods. Our work aims to show that we can use community LMs to query the worldview of any group of people given a sufficiently large sample of their social media discussions or media diet. 2022.coling-1.593 @@ -6870,7 +6870,7 @@ JensLemmens JensVan Nooten TimKreutz - WalterDaelemans + WalterDaelemans 6837–6845 We present CoNTACT: a Dutch language model adapted to the domain of COVID-19 tweets. The model was developed by continuing the pre-training phase of RobBERT (Delobelle et al., 2020) by using 2.8M Dutch COVID-19 related tweets posted in 2021. In order to test the performance of the model and compare it to RobBERT, the two models were tested on two tasks: (1) binary vaccine hesitancy detection and (2) detection of arguments for vaccine hesitancy. For both tasks, not only Twitter but also Facebook data was used to show cross-genre performance. In our experiments, CoNTACT showed statistically significant gains over RobBERT in all experiments for task 1. For task 2, we observed substantial improvements in virtually all classes in all experiments. An error analysis indicated that the domain adaptation yielded better representations of domain-specific terminology, causing CoNTACT to make more accurate classification decisions. For task 2, we observed substantial improvements in virtually all classes in all experiments. An error analysis indicated that the domain adaptation yielded better representations of domain-specific terminology, causing CoNTACT to make more accurate classification decisions. 2022.coling-1.595 @@ -6890,7 +6890,7 @@ Transferring Confluent Knowledge to Argument Mining João AntónioRodrigues - AntónioBranco + AntónioBranco 6859–6874 Relevant to all application domains where it is important to get at the reasons underlying sentiments and decisions, argument mining seeks to obtain structured arguments from unstructured text and has been addressed by approaches typically involving some feature and/or neural architecture engineering. By adopting a transfer learning methodology, and by means of a systematic study with a wide range of knowledge sources promisingly suitable to leverage argument mining, the aim of this paper is to empirically assess the potential of transferring such knowledge learned with confluent tasks. By adopting a lean approach that dispenses with heavier feature and model engineering, this study permitted both to gain novel empirically based insights into the argument mining task and to establish new state of the art levels of performance for its three main sub-tasks, viz. identification of argument components, classification of the components, and determination of the relation among them. 2022.coling-1.597 @@ -6900,7 +6900,7 @@ When to Laugh and How Hard? A Multimodal Approach to Detecting Humor and Its Intensity KhalidAlnajjar MikaHämäläinen - JörgTiedemann + JörgTiedemann JormaLaaksonen MikkoKurimo 6875–6886 @@ -6924,7 +6924,7 @@ Analyzing Persuasion Strategies of Debaters on Social Media MattiWiegmann - KhalidAl Khatib + KhalidAl Khatib VishalKhanna BennoStein 6897–6905 @@ -6998,7 +6998,7 @@ ShengqiongWu HaoFei FeiLi - DonghongJi + DonghongJi 6955–6965 Emotion cause pair extraction (ECPE), as one of the derived subtasks of emotion cause analysis (ECA), shares rich inter-related features with emotion extraction (EE) and cause extraction (CE). Therefore EE and CE are frequently utilized as auxiliary tasks for better feature learning, modeled via multi-task learning (MTL) framework by prior works to achieve state-of-the-art (SoTA) ECPE results. However, existing MTL-based methods either fail to simultaneously model the specific features and the interactive feature in between, or suffer from the inconsistency of label prediction. In this work, we consider addressing the above challenges for improving ECPE by performing two alignment mechanisms with a novel Aˆ2Net model. We first propose a feature-task alignment to explicitly model the specific emotion-&cause-specific features and the shared interactive feature. Besides, an inter-task alignment is implemented, in which the label distance between the ECPE and the combinations of EE&CE are learned to be narrowed for better label consistency. Evaluations of benchmarks show that our methods outperform current best-performing systems on all ECA subtasks. Further analysis proves the importance of our proposed alignment mechanisms for the task. 2022.coling-1.606 @@ -7012,7 +7012,7 @@ JunjieYe TaoGui QiZhang - XuanjingHuang + XuanjingHuang 6966–6977 Despite having achieved great success for sentiment analysis, existing neural models struggle with implicit sentiment analysis. It is because they may latch onto spurious correlations (“shortcuts”, e.g., focusing only on explicit sentiment words), resulting in undermining the effectiveness and robustness of the learned model. In this work, we propose a CausaL intervention model for implicit sEntiment ANalysis using instrumental variable (CLEAN). We first review sentiment analysis from a causal perspective and analyze the confounders existing in this task. Then, we introduce instrumental variable to eliminate the confounding causal effects, thus extracting the pure causal effect between sentence and sentiment. We compare the proposed CLEAN with several strong baselines on both the general implicit sentiment analysis and aspect-based implicit sentiment analysis tasks. The results indicate the great advantages of our model and the efficacy of implicit sentiment reasoning. 2022.coling-1.607 @@ -7023,7 +7023,7 @@ SoumitraGhosh Gopendra VikramSingh AsifEkbal - PushpakBhattacharyya + PushpakBhattacharyya 6978–6990 Mental health is a critical component of the United Nations’ Sustainable Development Goals (SDGs), particularly Goal 3, which aims to provide “good health and well-being”. The present mental health treatment gap is exacerbated by stigma, lack of human resources, and lack of research capability for implementation and policy reform. We present and discuss a novel task of detecting emotional reasoning (ER) and accompanying emotions in conversations. In particular, we create a first-of-its-kind multimodal mental health conversational corpus that is manually annotated at the utterance level with emotional reasoning and related emotion. We develop a multimodal multitask framework with a novel multimodal feature fusion technique and a contextuality learning module to handle the two tasks. Leveraging multimodal sources of information, commonsense reasoning, and through a multitask framework, our proposed model produces strong results. We achieve performance gains of 6% accuracy and 4.62% F1 on the emotion detection task and 3.56% accuracy and 3.31% F1 on the ER detection task, when compared to the existing state-of-the-art model. 2022.coling-1.608 @@ -7096,10 +7096,10 @@ One-Teacher and Multiple-Student Knowledge Distillation on Sentiment Classification XiaoqinChang - Sophia Yat MeiLee + Sophia Yat MeiLee SuyangZhu ShoushanLi - GuodongZhou + GuodongZhou 7042–7052 Knowledge distillation is an effective method to transfer knowledge from a large pre-trained teacher model to a compacted student model. However, in previous studies, the distilled student models are still large and remain impractical in highly speed-sensitive systems (e.g., an IR system). In this study, we aim to distill a deep pre-trained model into an extremely compacted shallow model like CNN. Specifically, we propose a novel one-teacher and multiple-student knowledge distillation approach to distill a deep pre-trained teacher model into multiple shallow student models with ensemble learning. Moreover, we leverage large-scale unlabeled data to improve the performance of students. Empirical studies on three sentiment classification tasks demonstrate that our approach achieves better results with much fewer parameters (0.9%-18%) and extremely high speedup ratios (100X-1000X). 2022.coling-1.614 @@ -7113,7 +7113,7 @@ XuantingChen TaoGui QiZhang - XuanjingHuang + XuanjingHuang RuiXie WeiWu 7053–7064 @@ -7142,7 +7142,7 @@ ZhenWu JindongWang TakahiroShinozaki - ManabuOkumura + ManabuOkumura YueZhang 7075–7085 Target-oriented Opinion Words Extraction (TOWE) is a fine-grained sentiment analysis task that aims to extract the corresponding opinion words of a given opinion target from the sentence. Recently, deep learning approaches have made remarkable progress on this task. Nevertheless, the TOWE task still suffers from the scarcity of training data due to the expensive data annotation process. Limited labeled data increase the risk of distribution shift between test data and training data. In this paper, we propose exploiting massive unlabeled data to reduce the risk by increasing the exposure of the model to varying distribution shifts. Specifically, we propose a novel Multi-Grained Consistency Regularization (MGCR) method to make use of unlabeled data and design two filters specifically for TOWE to filter noisy data at different granularity. Extensive experimental results on four TOWE benchmark datasets indicate the superiority of MGCR compared with current state-of-the-art methods. The in-depth analysis also demonstrates the effectiveness of the different-granularity filters. @@ -7167,7 +7167,7 @@ ChujunWang SiyuanWang QiZhang - XuanjingHuang + XuanjingHuang LiboWu 7093–7098 Existing research for argument representation learning mainly treats tokens in the sentence equally and ignores the implied structure information of argumentative context. In this paper, we propose to separate tokens into two groups, namely framing tokens and topic ones, to capture structural information of arguments. In addition, we consider high-level structure by incorporating paragraph-level position information. A novel structure-aware argument encoder is proposed for literature discourse analysis. Experimental results on both a self-constructed corpus and a public corpus show the effectiveness of our model. Resources are available at https://github.com/lemuria-wchen/SAE. @@ -7228,12 +7228,12 @@ Keyphrase Prediction from Video Transcripts: New Dataset and Directions Amir Pouran BenVeyseh - Quan HungTran + Quan HungTran SeunghyunYoon - VarunManjunatha + VarunManjunatha HaniehDeilamsalehy RajivJain - TrungBui + TrungBui Walter W.Chang FranckDernoncourt Thien HuuNguyen @@ -7304,7 +7304,7 @@ ZhiyongWu DongXu WeifengZhao - HelenMeng + HelenMeng 7193–7202 Naturalness and expressiveness are crucial for audiobook speech synthesis, but now are limited by the averaged global-scale speaking style representation. In this paper, we propose an unsupervised multi-scale context-sensitive text-to-speech model for audiobooks. A multi-scale hierarchical context encoder is specially designed to predict both global-scale context style embedding and local-scale context style embedding from a wider context of input text in a hierarchical manner. Likewise, a multi-scale reference encoder is introduced to extract reference style embeddings at both global and local scales from the reference speech, which is used to guide the prediction of speaking styles. On top of these, a bi-reference attention mechanism is used to align both local-scale reference style embedding sequence and local-scale context style embedding sequence with corresponding phoneme embedding sequence. Both objective and subjective experiment results on a real-world multi-speaker Mandarin novel audio dataset demonstrate the excellent performance of our proposed method over all baselines in terms of naturalness and expressiveness of the synthesized speech. 2022.coling-1.630 diff --git a/data/xml/2022.computel.xml b/data/xml/2022.computel.xml index c1e521f20a..4591221ead 100644 --- a/data/xml/2022.computel.xml +++ b/data/xml/2022.computel.xml @@ -3,7 +3,7 @@ Proceedings of the Fifth Workshop on the Use of Computational Methods in the Study of Endangered Languages - SarahMoeller + SarahMoeller AntoniosAnastasopoulos AnttiArppe AditiChaudhary @@ -64,7 +64,7 @@ <fixed-case>CLD</fixed-case>² Language Documentation Meets Natural Language Processing for Revitalising Endangered Languages RobertoZariquiey - ArturoOncevay + ArturoOncevay JavierVera 20-30 Language revitalisation should not be understood as a direct outcome of language documentation, which is mainly focused on the creation of language repositories. Natural language processing (NLP) offers the potential to complement and exploit these repositories through the development of language technologies that may contribute to improving the vitality status of endangered languages. In this paper, we discuss the current state of the interaction between language documentation and computational linguistics, present a diagnosis of how the outputs of recent documentation projects for endangered languages are underutilised for the NLP community, and discuss how the situation could change from both the documentary linguistics and NLP perspectives. All this is introduced as a bridging paradigm dubbed as Computational Language Documentation and Development (CLD²). CLD² calls for (1) the inclusion of NLP-friendly annotated data as a deliverable of future language documentation projects; and (2) the exploitation of language documentation databases by the NLP community to promote the computerization of endangered languages, as one way to contribute to their revitalization. @@ -75,7 +75,7 @@ One Wug, Two Wug+s Transformer Inflection Models Hallucinate Affixes FarhanSamir - MiikkaSilfverberg + MiikkaSilfverberg 31-40 Data augmentation strategies are increasingly important in NLP pipelines for low-resourced and endangered languages, and in neural morphological inflection, augmentation by so called data hallucination is a popular technique. This paper presents a detailed analysis of inflection models trained with and without data hallucination for the low-resourced Canadian Indigenous language Gitksan. Our analysis reveals evidence for a concatenative inductive bias in augmented models—in contrast to models trained without hallucination, they strongly prefer affixing inflection patterns over suppletive ones. We find that preference for affixation in general improves inflection performance in “wug test” like settings, where the model is asked to inflect lexemes missing from the training set. However, data hallucination dramatically reduces prediction accuracy for reduplicative forms due to a misanalysis of reduplication as affixation. While the overall impact of data hallucination for unseen lexemes remains positive, our findings call for greater qualitative analysis and more varied evaluation conditions in testing automatic inflection systems. Our results indicate that further innovations in data augmentation for computational morphology are desirable. 2022.computel-1.5 @@ -93,7 +93,7 @@ MichaelHiggins RoyBarker JaneSimpson - DanJurafsky + DanJurafsky 41-51 Many archival recordings of speech from endangered languages remain unannotated and inaccessible to community members and language learning programs. One bottleneck is the time-intensive nature of annotation. An even narrower bottleneck occurs for recordings with access constraints, such as language that must be vetted or filtered by authorised community members before annotation can begin. We propose a privacy-preserving workflow to widen both bottlenecks for recordings where speech in the endangered language is intermixed with a more widely-used language such as English for meta-linguistic commentary and questions (e.g.What is the word for ‘tree’?). We integrate voice activity detection (VAD), spoken language identification (SLI), and automatic speech recognition (ASR) to transcribe the metalinguistic content, which an authorised person can quickly scan to triage recordings that can be annotated by people with lower levels of access. We report work-in-progress processing 136 hours archival audio containing a mix of English and Muruwari. Our collaborative work with the Muruwari custodian of the archival materials show that this workflow reduces metalanguage transcription time by 20% even given only minimal amounts of annotated training data, 10 utterances per language for SLI and for ASR at most 39 minutes, and possibly as little as 39 seconds. 2022.computel-1.6 @@ -109,7 +109,7 @@ DavidHuggins-Daines ChristopherCox FineenDavis - EddieAntonio Santos + EddieAntonio Santos ShankhalikaSrikanth DelasieTorkornoo SabrinaYu @@ -139,7 +139,7 @@ NedelinaIvanova ChristèleMaizonniaux NeasaNí Chiaráin - MannyRayner + MannyRayner JohnSloan Ghil’adZuckermann 68-77 @@ -235,7 +235,7 @@ Faoi Gheasa an adaptive game for <fixed-case>I</fixed-case>rish language learning LiangXu - ElaineUí Dhonnchadha + ElaineUí Dhonnchadha MonicaWard 133-138 In this paper, we present a game with a purpose (GWAP) (Von Ahn 2006). The aim of the game is to promote language learning and ‘noticing’ (Skehan, 2013). The game has been designed for Irish, but the framework could be used for other languages. Irish is a minority language which means that L2 learners have limited opportunities for exposure to the language, and additionally, there are also limited (digital) learning resources available. This research incorporates game development, language pedagogy and ICALL language materials development. This paper will focus on the language materials development as this is a bottleneck in the teaching and learning of minority and endangered languages. @@ -312,7 +312,7 @@ NikolaosConstantinides NikolaosKokkas GeorgePavlidis - StellaMarkantonatou + StellaMarkantonatou 179-186 The project XXXX is developing a platform to enable researchers of living languages to easily create and make available state-of-the-art spoken and textual annotated resources. As a case study we use Greek and Pomak, the latter being an endangered oral Slavic language of the Balkans (including Thrace/Greece). The linguistic documentation of Pomak is an ongoing work by an interdisciplinary team in close cooperation with the Pomak community of Greece. We describe our experience in the development of a Latin-based orthography and morphologically annotated text corpora of Pomak with state-of-the-art NLP technology. These resources will be made openly available on the XXXX site and the gold annotated corpora of Pomak will be made available on the Universal Dependencies treebank repository. 2022.computel-1.22 @@ -324,7 +324,7 @@ Enhancing Documentation of <fixed-case>H</fixed-case>upa with Automatic Speech Recognition ZoeyLiu JustinSpence - EmilyPrud’hommeaux + EmilyPrud’hommeaux 187-192 This study investigates applications of automatic speech recognition (ASR) techniques to Hupa, a critically endangered Native American language from the Dene (Athabaskan) language family. Using around 9h12m of spoken data produced by one elder who is a first-language Hupa speaker, we experimented with different evaluation schemes and training settings. On average a fully connected deep neural network reached a word error rate of 35.26%. Our overall results illustrate the utility of ASR for making Hupa language documentation more accessible and usable. In addition, we found that when training acoustic models, using recordings with transcripts that were not carefully verified did not necessarily have a negative effect on model performance. This shows promise for speech corpora of indigenous languages that commonly include transcriptions produced by second-language speakers or linguists who have advanced knowledge in the language of interest. 2022.computel-1.23 diff --git a/data/xml/2022.conll.xml b/data/xml/2022.conll.xml index 16681ff096..f52528e358 100644 --- a/data/xml/2022.conll.xml +++ b/data/xml/2022.conll.xml @@ -44,7 +44,7 @@ MichaelYoder LynnetteNg David WestBrown - KathleenCarley + KathleenCarley 27-39 This paper investigates how hate speech varies in systematic ways according to the identities it targets. Across multiple hate speech datasets annotated for targeted identities, we find that classifiers trained on hate speech targeting specific identity groups struggle to generalize to other targeted identities. This provides empirical evidence for differences in hate speech by target identity; we then investigate which patterns structure this variation. We find that the targeted demographic category (e.g. gender/sexuality or race/ethnicity) appears to have a greater effect on the language of hate speech than does the relative social power of the targeted identity group. We also find that words associated with hate speech targeting specific identities often relate to stereotypes, histories of oppression, current social movements, and other social contexts specific to identities. These experiments suggest the importance of considering targeted identity, as well as the social contexts associated with these identities, in automated hate speech classification 2022.conll-1.3 @@ -118,9 +118,9 @@ Combining Noisy Semantic Signals with Orthographic Cues: Cognate Induction for the <fixed-case>I</fixed-case>ndic Dialect Continuum NiyatiBafna - Josefvan Genabith + Josefvan Genabith CristinaEspaña-Bonet - ZdeněkŽabokrtský + ZdeněkŽabokrtský 110-131 We present a novel method for unsupervised cognate/borrowing identification from monolingual corpora designed for low and extremely low resource scenarios, based on combining noisy semantic signals from joint bilingual spaces with orthographic cues modelling sound change. We apply our method to the North Indian dialect continuum, containing several dozens of dialects and languages spoken by more than 100 million people. Many of these languages are zero-resource and therefore natural language processing for them is non-existent. We first collect monolingual data for 26 Indic languages, 16 of which were previously zero-resource, and perform exploratory character, lexical and subword cross-lingual alignment experiments for the first time at this scale on this dialect continuum. We create bilingual evaluation lexicons against Hindi for 20 of the languages. We then apply our cognate identification method on the data, and show that our method outperforms both traditional orthography baselines as well as EM-style learnt edit distance matrices. To the best of our knowledge, this is the first work to combine traditional orthographic cues with noisy bilingual embeddings to tackle unsupervised cognate detection in a (truly) low-resource setup, showing that even noisy bilingual embeddings can act as good guides for this task. We release our multilingual dialect corpus, called HinDialect, as well as our scripts for evaluation data collection and cognate induction. 2022.conll-1.9 @@ -131,7 +131,7 @@ Detecting Unintended Social Bias in Toxic Language Datasets NiharSahoo HimanshuGupta - PushpakBhattacharyya + PushpakBhattacharyya 132-143 With the rise of online hate speech, automatic detection of Hate Speech, Offensive texts as a natural language processing task is getting popular. However, very little research has been done to detect unintended social bias from these toxic language datasets. This paper introduces a new dataset ToxicBias curated from the existing dataset of Kaggle competition named “Jigsaw Unintended Bias in Toxicity Classification”. We aim to detect social biases, their categories, and targeted groups. The dataset contains instances annotated for five different bias categories, viz., gender, race/ethnicity, religion, political, and LGBTQ. We train transformer-based models using our curated datasets and report baseline performance for bias identification, target generation, and bias implications. Model biases and their mitigation are also discussed in detail. Our study motivates a systematic extraction of social bias data from toxic language datasets. 2022.conll-1.10 @@ -199,7 +199,7 @@ Leveraging a New <fixed-case>S</fixed-case>panish Corpus for Multilingual and Cross-lingual Metaphor Detection ElisaSanchez-Bayona - RodrigoAgerri + RodrigoAgerri 228-240 The lack of wide coverage datasets annotated with everyday metaphorical expressions for languages other than English is striking. This means that most research on supervised metaphor detection has been published only for that language. In order to address this issue, this work presents the first corpus annotated with naturally occurring metaphors in Spanish large enough to develop systems to perform metaphor detection. The presented dataset, CoMeta, includes texts from various domains, namely, news, political discourse, Wikipedia and reviews. In order to label CoMeta, we apply the MIPVU method, the guidelines most commonly used to systematically annotate metaphor on real data. We use our newly created dataset to provide competitive baselines by fine-tuning several multilingual and monolingual state-of-the-art large language models. Furthermore, by leveraging the existing VUAM English data in addition to CoMeta, we present the, to the best of our knowledge, first cross-lingual experiments on supervised metaphor detection. Finally, we perform a detailed error analysis that explores the seemingly high transfer of everyday metaphor across these two languages and datasets. 2022.conll-1.16 @@ -221,12 +221,12 @@ On Language Spaces, Scales and Cross-Lingual Transfer of <fixed-case>UD</fixed-case> Parsers - TanjaSamardžić + TanjaSamardžić XimenaGutierrez-Vasques Robvan der Goot MaxMüller-Eberstein OlgaPelloni - BarbaraPlank + BarbaraPlank 266-281 Cross-lingual transfer of parsing models has been shown to work well for several closely-related languages, but predicting the success in other cases remains hard. Our study is a comprehensive analysis of the impact of linguistic distance on the transfer of UD parsers. As an alternative to syntactic typological distances extracted from URIEL, we propose three text-based feature spaces and show that they can be more precise predictors, especially on a more local scale, when only shorter distances are taken into account. Our analyses also reveal that the good coverage in typological databases is not among the factors that explain good transfer. 2022.conll-1.18 @@ -253,7 +253,7 @@ Syntactic Surprisal From Neural Models Predicts, But Underestimates, Human Processing Difficulty From Syntactic Ambiguities SuhasArehalli - BrianDillon + BrianDillon TalLinzen 301-313 Humans exhibit garden path effects: When reading sentences that are temporarily structurally ambiguous, they slow down when the structure is disambiguated in favor of the less preferred alternative. Surprisal theory (Hale, 2001; Levy, 2008), a prominent explanation of this finding, proposes that these slowdowns are due to the unpredictability of each of the words that occur in these sentences. Challenging this hypothesis, van Schijndel and Linzen (2021) find that estimates of the cost of word predictability derived from language models severely underestimate the magnitude of human garden path effects. In this work, we consider whether this underestimation is due to the fact that humans weight syntactic factors in their predictions more highly than language models do. We propose a method for estimating syntactic predictability from a language model, allowing us to weigh the cost of lexical and syntactic predictability independently. We find that treating syntactic predictability independently from lexical predictability indeed results in larger estimates of garden path. At the same time, even when syntactic predictability is independently weighted, surprisal still greatly underestimate the magnitude of human garden path effects. Our results support the hypothesis that predictability is not the only factor responsible for the processing cost associated with garden path sentences. @@ -277,7 +277,7 @@ Optimizing text representations to capture (dis)similarity between political parties TaniseCeron NicoBlokker - SebastianPadó + SebastianPadó 325-338 Even though fine-tuned neural language models have been pivotal in enabling “deep” automatic text analysis, optimizing text representations for specific applications remains a crucial bottleneck. In this study, we look at this problem in the context of a task from computational social science, namely modeling pairwise similarities between political parties. Our research question is what level of structural information is necessary to create robust text representation, contrasting a strongly informed approach (which uses both claim span and claim category annotations) with approaches that forgo one or both types of annotation with document structure-based heuristics. Evaluating our models on the manifestos of German parties for the 2021 federal election. We find that heuristics that maximize within-party over between-party similarity along with a normalization step lead to reliable party similarity prediction, without the need for manual annotation. 2022.conll-1.22 diff --git a/data/xml/2022.constraint.xml b/data/xml/2022.constraint.xml index 16286d84aa..22d63d1b93 100644 --- a/data/xml/2022.constraint.xml +++ b/data/xml/2022.constraint.xml @@ -8,7 +8,7 @@ KaiShu H. RussellBernard MariaLiakata - PreslavNakov + PreslavNakov AseemSrivastava Association for Computational Linguistics
Dublin, Ireland
@@ -106,7 +106,7 @@ SyrielleMontariol ÉtienneSimon ArijRiabi - DjaméSeddah + DjaméSeddah 55-65 We propose our solution to the multimodal semantic role labeling task from the CONSTRAINT’22 workshop. The task aims at classifying entities in memes into classes such as “hero” and “villain”. We use several pre-trained multi-modal models to jointly encode the text and image of the memes, and implement three systems to classify the role of the entities. We propose dynamic sampling strategies to tackle the issue of class imbalance. Finally, we perform qualitative analysis on the representations of the entities. 2022.constraint-1.7 diff --git a/data/xml/2022.crac.xml b/data/xml/2022.crac.xml index 1cc490a570..1a4b3be14b 100644 --- a/data/xml/2022.crac.xml +++ b/data/xml/2022.crac.xml @@ -4,10 +4,10 @@ Proceedings of the Fifth Workshop on Computational Models of Reference, Anaphora and Coreference MaciejOgrodniczuk - SameerPradhan + SameerPradhan AnnaNedoluzhko VincentNg - MassimoPoesio + MassimoPoesio Association for Computational Linguistics
Gyeongju, Republic of Korea
October @@ -23,7 +23,7 @@ Quantifying Discourse Support for Omitted Pronouns ShulinZhang JixingLi - JohnHale + JohnHale 1–12 Pro-drop is commonly seen in many languages, but its discourse motivations have not been well characterized. Inspired by the topic chain theory in Chinese, this study shows how character-verb usage continuity distinguishes dropped pronouns from overt references to story characters. We model the choice to drop vs. not drop as a function of character-verb continuity. The results show that omitted subjects have higher character history-current verb continuity salience than non-omitted subjects. This is consistent with the idea that discourse coherence with a particular topic, such as a story character, indeed facilitates the omission of pronouns in languages and contexts where they are optional. 2022.crac-1.1 @@ -76,7 +76,7 @@ EgilRønningstad Per ErikSolberg ErikVelldal - LiljaØvrelid + LiljaØvrelid 48–60 We present the Norwegian Anaphora Resolution Corpus (NARC), the first publicly available corpus annotated with anaphoric relations between noun phrases for Norwegian. The paper describes the annotated data for 326 documents in Norwegian Bokmål, together with inter-annotator agreement and discussions of relevant statistics. We also present preliminary modelling results which are comparable to existing corpora for other languages, and discuss relevant problems in relation to both modelling and the annotations themselves. 2022.crac-1.6 @@ -85,7 +85,7 @@ Evaluating Coreference Resolvers on Community-based Question Answering: From Rule-based to State of the Art HaixiaChai - Nafise SadatMoosavi + Nafise SadatMoosavi IrynaGurevych MichaelStrube 61–73 @@ -105,8 +105,8 @@ Investigating Cross-Document Event Coreference for <fixed-case>D</fixed-case>utch LoicDe Langhe - OrpheeDe Clercq - VeroniqueHoste + OrpheeDe Clercq + VeroniqueHoste 88–98 In this paper we present baseline results for Event Coreference Resolution (ECR) in Dutch using gold-standard (i.e non-predicted) event mentions. A newly developed benchmark dataset allows us to properly investigate the possibility of creating ECR systems for both within and cross-document coreference. We give an overview of the state of the art for ECR in other languages, as well as a detailed overview of existing ECR resources. Afterwards, we provide a comparative report on our own dataset. We apply a significant number of approaches that have been shown to attain good results for English ECR including feature-based models, monolingual transformer language models and multilingual language models. The best results were obtained using the monolingual BERTje model. Finally, results for all models are thoroughly analysed and visualised, as to provide insight into the inner workings of ECR and long-distance semantic NLP tasks in general. 2022.crac-1.9 @@ -125,7 +125,7 @@ Proceedings of the CRAC 2022 Shared Task on Multilingual Coreference Resolution - ZdeněkŽabokrtský + ZdeněkŽabokrtský MaciejOgrodniczuk Association for Computational Linguistics
Gyeongju, Republic of Korea
@@ -148,7 +148,7 @@ MartinPopel OndřejPražák JakubSido - DanielZeman + DanielZeman YilunZhu 1–17 This paper presents an overview of the shared task on multilingual coreference resolution associated with the CRAC 2022 workshop. Shared task participants were supposed to develop trainable systems capable of identifying mentions and clustering them according to identity coreference. The public edition of CorefUD 1.0, which contains 13 datasets for 10 languages, was used as the source of training and evaluation data. The CoNLL score used in previous coreference-oriented shared tasks was used as the main evaluation metric. There were 8 coreference prediction systems submitted by 5 participating teams; in addition, there was a competitive Transformer-based baseline system provided by the organizers at the beginning of the shared task. The winner system outperformed the baseline by 12 percentage points (in terms of the CoNLL scores averaged across all datasets for individual languages). diff --git a/data/xml/2022.creativesumm.xml b/data/xml/2022.creativesumm.xml index 0cd1a9652f..37e6181a2e 100644 --- a/data/xml/2022.creativesumm.xml +++ b/data/xml/2022.creativesumm.xml @@ -27,7 +27,7 @@
Summarization of Long Input Texts Using Multi-Layer Neural Network - NiladriChatterjee + NiladriChatterjee AadyantKhatri RakshaAgarwal 13–18 @@ -112,13 +112,13 @@ <fixed-case>CREATIVESUMM</fixed-case>: Shared Task on Automatic Summarization for Creative Writing DivyanshAgarwal - Alexander R.Fabbri + Alexander R.Fabbri SimengHan WojciechKryscinski FaisalLadhak BryanLi - KathleenMcKeown - DragomirRadev + KathleenMcKeown + DragomirRadev TianyiZhang SamWiseman 67–73 diff --git a/data/xml/2022.csrnlp.xml b/data/xml/2022.csrnlp.xml index 4619ff03f3..9a04985a2d 100644 --- a/data/xml/2022.csrnlp.xml +++ b/data/xml/2022.csrnlp.xml @@ -23,7 +23,7 @@ Francesco PaoloLagrasta SergioCaputo PierpaoloPontrandolfo - GiovanniSemeraro + GiovanniSemeraro 1–8 Sustainability reporting has become an annual requirement in many countries and for certain types of companies. Sustainability reports inform stakeholders about companies’ commitment to sustainable development and their economic, social, and environmental sustainability practices. However, the fact that norms and standards allow a certain discretion to be adopted by drafting organizations makes such reports hardly comparable in terms of layout, disclosures, key performance indicators (KPIs), and so on. In this work, we present a system based on natural language processing and information extraction techniques to retrieve relevant information from sustainability reports, compliant with the Global Reporting Initiative Standards, written in Italian and English language. Specifically, the system is able to identify references to the various sustainability topics discussed by the reports: on which page of the document those references have been found, the context of each reference, and if it is mentioned positively or negatively. The output of the system has been then evaluated against a ground truth obtained through a manual annotation process on 134 reports. Experimental outcomes highlight the affordability of the approach for improving sustainability disclosures, accessibility, and transparency, thus empowering stakeholders to conduct further analysis and considerations. 2022.csrnlp-1.1 @@ -98,13 +98,13 @@ TapanAuti RajdeepSarkar BernardoStearns - Atul Kr.Ojha + Atul Kr.Ojha ArindamPaul MichaelaComerford JayMegaro JohnMariano VallHerard - John P.McCrae + John P.McCrae 52–57 Pharmaceutical text classification is an important area of research for commercial and research institutions working in the pharmaceutical domain. Addressing this task is challenging due to the need of expert verified labelled data which can be expensive and time consuming to obtain. Towards this end, we leverage predictive coding methods for the task as they have been shown to generalise well for sentence classification. Specifically, we utilise GAN-BERT architecture to classify pharmaceutical texts. To capture the domain specificity, we propose to utilise the BioBERT model as our BERT model in the GAN-BERT framework. We conduct extensive evaluation to show the efficacy of our approach over baselines on multiple metrics. 2022.csrnlp-1.8 diff --git a/data/xml/2022.csrr.xml b/data/xml/2022.csrr.xml index 0139c32bdf..7741a46b1b 100644 --- a/data/xml/2022.csrr.xml +++ b/data/xml/2022.csrr.xml @@ -5,7 +5,7 @@ Proceedings of the First Workshop on Commonsense Representation and Reasoning (CSRR 2022) AntoineBosselut XiangLi - Bill YuchenLin + Bill YuchenLin VeredShwartz Bodhisattwa PrasadMajumder Yash KumarLal @@ -38,7 +38,7 @@ Cloze Evaluation for Deeper Understanding of Commonsense Stories in <fixed-case>I</fixed-case>ndonesian FajriKoto - TimothyBaldwin + TimothyBaldwin Jey HanLau 8-16 Story comprehension that involves complex causal and temporal relations is a critical task in NLP, but previous studies have focused predominantly on English, leaving open the question of how the findings generalize to other languages, such as Indonesian. In this paper, we follow the Story Cloze Test framework of Mostafazadeh et al. (2016) in evaluating story understanding in Indonesian, by constructing a four-sentence story with one correct ending and one incorrect ending. To investigate commonsense knowledge acquisition in language models, we experimented with: (1) a classification task to predict the correct ending; and (2) a generation task to complete the story with a single sentence. We investigate these tasks in two settings: (i) monolingual training and ii) zero-shot cross-lingual transfer between Indonesian and English. @@ -63,7 +63,7 @@ YueenMa HaoxuanYou ZhecanWang - Shih-FuChang + Shih-FuChang 23-35 Large-scale visual-linguistic pre-training aims to capture the generic representations from multimodal features, which are essential for downstream vision-language tasks. Existing methods mostly focus on learning the semantic connections between visual objects and linguistic content, which tend to be recognitionlevel information and may not be sufficient for commonsensical reasoning tasks like VCR. In this paper, we propose a novel commonsensical vision-language pre-training framework to bridge the gap. We first augment the conventional image-caption pre-training datasets with commonsense inferences from a visuallinguistic GPT-2. To pre-train models on image, caption and commonsense inferences together, we propose two new tasks: masked commonsense modeling (MCM) and commonsense type prediction (CTP). To reduce the shortcut effect between captions and commonsense inferences, we further introduce the domain-wise adaptive masking that dynamically adjusts the masking ratio. Experimental results on downstream tasks, VCR and VQA, show the improvement of our pre-training strategy over previous methods. Human evaluation also validates the relevance, informativeness, and diversity of the generated commonsense inferences. Overall, we demonstrate the potential of incorporating commonsense knowledge into the conventional recognition-level visual-linguistic pre-training. 2022.csrr-1.4 @@ -86,7 +86,7 @@ Knowledge-Augmented Language Models for Cause-Effect Relation Classification PedramHosseini David A.Broniatowski - MonaDiab + MonaDiab 43-48 Previous studies have shown the efficacy of knowledge augmentation methods in pretrained language models. However, these methods behave differently across domains and downstream tasks. In this work, we investigate the augmentation of pretrained language models with knowledge graph data in the cause-effect relation classification and commonsense causal reasoning tasks. After automatically verbalizing triples in ATOMIC2020, a wide coverage commonsense reasoning knowledge graph, we continually pretrain BERT and evaluate the resulting model on cause-effect pair classification and answering commonsense causal reasoning questions. Our results show that a continually pretrained language model augmented with commonsense reasoning knowledge outperforms our baselines on two commonsense causal reasoning benchmarks, COPA and BCOPA-CE, and a Temporal and Causal Reasoning (TCR) dataset, without additional improvement in model architecture or using quality-enhanced data for fine-tuning. 2022.csrr-1.6 diff --git a/data/xml/2022.dadc.xml b/data/xml/2022.dadc.xml index a796573f54..73c296e4a1 100644 --- a/data/xml/2022.dadc.xml +++ b/data/xml/2022.dadc.xml @@ -101,7 +101,7 @@ Generalized Quantifiers as a Source of Error in Multilingual <fixed-case>NLU</fixed-case> Benchmarks RuixiangCui DanielHershcovich - AndersSøgaard + AndersSøgaard 61-61 Logical approaches to representing language have developed and evaluated computational models of quantifier words since the 19th century, but today’s NLU models still struggle to capture their semantics. We rely on Generalized Quantifier Theory for language-independent representations of the semantics of quantifier words, to quantify their contribution to the errors of NLU models. We find that quantifiers are pervasive in NLU benchmarks, and their occurrence at test time is associated with performance drops. Multilingual models also exhibit unsatisfying quantifier reasoning abilities, but not necessarily worse for non-English languages. To facilitate directly-targeted probing, we present an adversarial generalized quantifier NLI task (GQNLI) and show that pre-trained language models have a clear lack of robustness in generalized quantifier reasoning. 2022.dadc-1.7 @@ -114,7 +114,7 @@ JasonPhang AngelicaChen WilliamHuang - Samuel R.Bowman + Samuel R.Bowman 62-62 Large language models increasingly saturate existing task benchmarks, in some cases outperforming humans, leaving little headroom with which to measure further progress. Adversarial dataset creation, which builds datasets using examples that a target system outputs incorrect predictions for, has been proposed as a strategy to construct more challenging datasets, avoiding the more serious challenge of building more precise benchmarks by conventional means. In this work, we study the impact of applying three common approaches for adversarial dataset creation: (1) filtering out easy examples (AFLite), (2) perturbing examples (TextFooler), and (3) model-in-the-loop data collection (ANLI and AdversarialQA), across 18 different adversary models. We find that all three methods can produce more challenging datasets, with stronger adversary models lowering the performance of evaluated models more. However, the resulting ranking of the evaluated models can also be unstable and highly sensitive to the choice of adversary model. Moreover, we find that AFLite oversamples examples with low annotator agreement, meaning that model comparisons hinge on the examples that are most contentious for humans. We recommend that researchers tread carefully when using adversarial methods for building evaluation datasets. 2022.dadc-1.8 diff --git a/data/xml/2022.dash.xml b/data/xml/2022.dash.xml index 7baba477e0..06615e83d0 100644 --- a/data/xml/2022.dash.xml +++ b/data/xml/2022.dash.xml @@ -21,7 +21,7 @@ <fixed-case>MEGA</fixed-case>nno: Exploratory Labeling for <fixed-case>NLP</fixed-case> in Computational Notebooks - DanZhangMegagon Labs + DanZhangMegagon Labs HannahKimMegagon Labs RafaelLi ChenMegagon Labs EserKandoganMegagon Labs @@ -85,7 +85,7 @@ A Gamified Approach to Frame Semantic Role Labeling EmilyAmspokerCarnegie Mellon University - Miriam R LPetruckInternational Computer Science Institute + Miriam R LPetruckInternational Computer Science Institute 37-42 Much research has investigated the possibility of creating games with a purpose (GWAPs), i.e., online games whose purpose is gathering information to address the insufficient amount of data for training and testing of large language models (Von Ahn and Dabbish, 2008). Based on such work, this paper reports on the development of a game for frame semantic role labeling, where players have fun while using semantic frames as prompts for short story writing. This game will generate more annotations for FrameNet and original content for annotation, supporting FrameNet’s goal of characterizing the English language in terms of Frame Semantics. 2022.dash-1.6 @@ -121,7 +121,7 @@ Partially Humanizing Weak Supervision: Towards a Better Low Resource Pipeline for Spoken Language Understanding - AyushKumarObserve.AI + AyushKumarObserve.AI RishabhTripathiObserve.AI JithendraVepaObserve AI 64-73 @@ -172,9 +172,9 @@ Interactively Uncovering Latent Arguments in Social Media Platforms: A Case Study on the Covid-19 Vaccine Debate - Maria LeonorPachecoUniversity of Colorado Boulder / Microsoft Research + Maria LeonorPachecoUniversity of Colorado Boulder / Microsoft Research TunazzinaIslamPurdue University - LyleUngarUniversity of Pennsylvania + LyleUngarUniversity of Pennsylvania MingYinPurdue University DanGoldwasserPurdue University 94-111 diff --git a/data/xml/2022.dclrl.xml b/data/xml/2022.dclrl.xml index 180e288ce3..e629d109ab 100644 --- a/data/xml/2022.dclrl.xml +++ b/data/xml/2022.dclrl.xml @@ -20,7 +20,7 @@ <fixed-case>S</fixed-case>ynt<fixed-case>A</fixed-case>ct: A Synthesized Database of Basic Emotions FelixBurkhardt FlorianEyben - BjörnSchuller + BjörnSchuller 1–9 Speech emotion recognition is in the focus of research since several decades and has many applications. One problem is sparse data for supervised learning. One way to tackle this problem is the synthesis of data with emotion simulating speech synthesis approaches. We present a synthesized database of five basic emotions and neutral expression based on rule based manipulation for a diphone synthesizer which we release to the public. The database has been validated in several machine learning experiments as a training set to detect emotional expression from natural speech data. The scripts to generate such a database have been made open source and could be used to aid speech emotion recognition for a low resourced language, as MBROLA supports 35 languages 2022.dclrl-1.1 diff --git a/data/xml/2022.deelio.xml b/data/xml/2022.deelio.xml index 483aceba28..d27215476a 100644 --- a/data/xml/2022.deelio.xml +++ b/data/xml/2022.deelio.xml @@ -3,7 +3,7 @@ Proceedings of Deep Learning Inside Out (DeeLIO 2022): The 3rd Workshop on Knowledge Extraction and Integration for Deep Learning Architectures - EnekoAgirre + EnekoAgirre MariannaApidianaki IvanVulić Association for Computational Linguistics @@ -45,7 +45,7 @@ SukminCho SoyeongJeong WonsukYang - JongPark + JongPark 22-32 Dense retrieval aims at searching for the most relevant documents to the given query by encoding texts in the embedding space, requiring a large amount of query-document pairs to train. Since manually constructing such training data is challenging, recent work has proposed to generate synthetic queries from documents and use them to train a dense retriever. However, compared to the manually composed queries, synthetic queries do not generally ask for implicit information, therefore leading to a degraded retrieval performance. In this work, we propose Query Generation with External Knowledge (QGEK), a novel method for generating queries with external information related to the corresponding document. Specifically, we convert a query into a triplet-based template form to accommodate external information and transmit it to a pre-trained language model (PLM). We validate QGEK on both in-domain and out-domain dense retrieval settings. The dense retriever with the queries requiring implicit information is found to make good performance improvement. Also, such queries are similar to manually composed queries, confirmed by both human evaluation and unique & non-unique words distribution. 2022.deelio-1.3 @@ -72,7 +72,7 @@ Jointly Identifying and Fixing Inconsistent Readings from Information Extraction Systems AnkurPadia FrancisFerraro - TimFinin + TimFinin 42-52 Moral values as commonsense norms shape our everyday individual and community behavior. The possibility to extract moral attitude rapidly from natural language is an appealing perspective that would enable a deeper understanding of social interaction dynamics and the individual cognitive and behavioral dimension. In this work we focus on detecting moral content from natural language and we test our methods on a corpus of tweets previously labeled as containing moral values or violations, according to Moral Foundation Theory. We develop and compare two different approaches: (i) a frame-based symbolic value detector based on knowledge graphs and (ii) a zero-shot machine learning model fine-tuned on a task of Natural Language Inference (NLI) and a task of emotion detection. The final outcome from our work consists in two approaches meant to perform without the need for prior training process on a moral value detection task. 2022.deelio-1.5 @@ -133,7 +133,7 @@ JiachangLiu DinghanShen YizheZhang - BillDolan + BillDolan LawrenceCarin WeizhuChen 100-114 diff --git a/data/xml/2022.deeplo.xml b/data/xml/2022.deeplo.xml index f6f0fb2a6b..2425e72d6a 100644 --- a/data/xml/2022.deeplo.xml +++ b/data/xml/2022.deeplo.xml @@ -29,7 +29,7 @@ JohnOrtega WilliamChen RichardCastro - NúriaBel + NúriaBel CesarYoshikawa RenzoVenturas HilarioAradiel @@ -68,7 +68,7 @@ Generating unlabelled data for a tri-training approach in a low resourced <fixed-case>NER</fixed-case> task HugoBoulanger ThomasLavergne - SophieRosset + SophieRosset 30-37 Training a tagger for Named Entity Recognition (NER) requires a substantial amount of labeled data in the task domain. Manual labeling is a tedious and complicated task. Semisupervised learning methods can reduce the quantity of labeled data necessary to train a model. However, these methods require large quantities of unlabeled data, which remains an issue in many cases. @@ -138,7 +138,7 @@ We address this problem by generating unlabeled data. Large language models have ShaynaGardiner DavidRossouw TereRoldán - SimonCorston-Oliver + SimonCorston-Oliver 80-89 Automatic Speech Recognition (ASR) systems typically produce unpunctuated transcripts that have poor readability. In addition, building a punctuation restoration system is challenging for low-resource languages, especially for domain-specific applications. In this paper, we propose a Spanish punctuation restoration system designed for a real-time customer support transcription service. To address the data sparsity of Spanish transcripts in the customer support domain, we introduce two transferlearning-based strategies: 1) domain adaptation using out-of-domain Spanish text data; 2) crosslingual transfer learning leveraging in-domain English transcript data. Our experiment results show that these strategies improve the accuracy of the Spanish punctuation restoration system. 2022.deeplo-1.9 @@ -151,7 +151,7 @@ We address this problem by generating unlabeled data. Large language models have KurtMicallef AlbertGatt MarcTanti - Lonnekevan der Plas + Lonnekevan der Plas ClaudiaBorg 90-101 Multilingual language models such as mBERT have seen impressive cross-lingual transfer to a variety of languages, but many languages remain excluded from these models. In this paper, we analyse the effect of pre-training with monolingual data for a low-resource language that is not included in mBERT – Maltese – with a range of pre-training set ups. We conduct evaluations with the newly pre-trained models on three morphosyntactic tasks – dependency parsing, part-of-speech tagging, and named-entity recognition – and one semantic classification task – sentiment analysis. We also present a newly created corpus for Maltese, and determine the effect that the pre-training data size and domain have on the downstream performance. Our results show that using a mixture of pre-training domains is often superior to using Wikipedia text only. We also find that a fraction of this corpus is enough to make significant leaps in performance over Wikipedia-trained models. We pre-train and compare two models on the new corpus: a monolingual BERT model trained from scratch (BERTu), and a further pretrained multilingual BERT (mBERTu). The models achieve state-of-the-art performance on these tasks, despite the new corpus being considerably smaller than typically used corpora for high-resourced languages. On average, BERTu outperforms or performs competitively with mBERTu, and the largest gains are observed for higher-level tasks. @@ -164,7 +164,7 @@ We address this problem by generating unlabeled data. Large language models have Building an Event Extractor with Only a Few Examples PengfeiYu ZixuanZhang - ClareVoss + ClareVoss JonathanMay HengJi 102-109 @@ -181,7 +181,7 @@ We address this problem by generating unlabeled data. Large language models have DavidShimshoni AdityaSinghal SaraRosenthal - AvirupSil + AvirupSil 110-116 Pretrained language models have shown success in various areas of natural language processing, including reading comprehension tasks. However, when applying machine learning methods to new domains, labeled data may not always be available. To address this, we use supervised pretraining on source-domain data to reduce sample complexity on domainspecific downstream tasks. We evaluate zeroshot performance on domain-specific reading comprehension tasks by combining task transfer with domain adaptation to fine-tune a pretrained model with no labelled data from the target task. Our approach outperforms DomainAdaptive Pretraining on downstream domainspecific reading comprehension tasks in 3 out of 4 domains. 2022.deeplo-1.12 @@ -243,7 +243,7 @@ We address this problem by generating unlabeled data. Large language models have Clean or Annotate: How to Spend a Limited Data Collection Budget DerekChen ZhouYu - Samuel R.Bowman + Samuel R.Bowman 152-168 Crowdsourcing platforms are often used to collect datasets for training machine learning models, despite higher levels of inaccurate labeling compared to expert labeling. There are two common strategies to manage the impact of such noise: The first involves aggregating redundant annotations, but comes at the expense of labeling substantially fewer examples. Secondly, prior works have also considered using the entire annotation budget to label as many examples as possible and subsequently apply denoising algorithms to implicitly clean the dataset. We find a middle ground and propose an approach which reserves a fraction of annotations to explicitly clean up highly probable error samples to optimize the annotation process. In particular, we allocate a large portion of the labeling budget to form an initial dataset used to train a model. This model is then used to identify specific examples that appear most likely to be incorrect, which we spend the remaining budget to relabel. Experiments across three model variations and four natural language processing tasks show our approach outperforms or matches both label aggregation and advanced denoising methods designed to handle noisy labels when allocated the same finite annotation budget. 2022.deeplo-1.17 diff --git a/data/xml/2022.dialdoc.xml b/data/xml/2022.dialdoc.xml index df31405be3..a8c6f9117e 100644 --- a/data/xml/2022.dialdoc.xml +++ b/data/xml/2022.dialdoc.xml @@ -77,7 +77,7 @@ Parameter-Efficient Abstractive Question Answering over Tables or Text VaishaliPal EvangelosKanoulas - Maartende Rijke + Maartende Rijke 41-53 A long-term ambition of information seeking QA systems is to reason over multi-modal contexts and generate natural answers to user queries. Today, memory intensive pre-trained language models are adapted to downstream tasks such as QA by fine-tuning the model on QA data in a specific modality like unstructured text or structured tables. To avoid training such memory-hungry models while utilizing a uniform architecture for each modality, parameter-efficient adapters add and train small task-specific bottle-neck layers between transformer layers. In this work, we study parameter-efficient abstractive QA in encoder-decoder models over structured tabular data and unstructured textual data using only 1.5% additional parameters for each modality. We also ablate over adapter layers in both encoder and decoder modules to study the efficiency-performance trade-off and demonstrate that reducing additional trainable parameters down to 0.7%-1.0% leads to comparable results. Our models out-perform current state-of-the-art models on tabular QA datasets such as Tablesum and FeTaQA, and achieve comparable performance on a textual QA dataset such as NarrativeQA using significantly less trainable parameters than fine-tuning. 2022.dialdoc-1.5 @@ -140,7 +140,7 @@ EtsukoIshii SamuelCahyawijaya ZihanLiu - Genta IndraWinata + Genta IndraWinata AndreaMadotto DanSu PascaleFung @@ -154,7 +154,7 @@ G4: Grounding-guided Goal-oriented Dialogues Generation with Multiple Documents ShiweiZhang - YiyangDu + YiyangDu GuanzhongLiu ZhaoYan YunboCao @@ -186,7 +186,7 @@ JunanLi HongyuanLu XixinWu - HelenMeng + HelenMeng 123-129 MultiDoc2Dial presents an important challenge on modeling dialogues grounded with multiple documents. This paper proposes a pipeline system of “retrieve, re-rank, and generate”, where each component is individually optimized. This enables the passage re-ranker and response generator to fully exploit training with ground-truth data. Furthermore, we use a deep cross-encoder trained with localized hard negative passages from the retriever. For the response generator, we use grounding span prediction as an auxiliary task to be jointly trained with the main task of response generation. We also adopt a passage dropout and regularization technique to improve response generation performance. Experimental results indicate that the system clearly surpasses the competitive baseline and our team CPII-NLP ranked 1st among the public submissions on ALL four leaderboards based on the sum of F1, SacreBLEU, METEOR and RougeL scores. 2022.dialdoc-1.13 @@ -226,7 +226,7 @@ AliSatvaty SadraSabouri EhsaneddinAsgari - HosseinSameti + HosseinSameti 142-147 Information-seeking dialogue systems, including knowledge identification and response generation, aim to respond to users with fluent, coherent, and informative answers based on users’ needs. This paper discusses our proposed approach, Docalog, for the DialDoc-22 (MultiDoc2Dial) shared task. Docalog identifies the most relevant knowledge in the associated document, in a multi-document setting. Docalog, is a three-stage pipeline consisting of (1) a document retriever model (DR. TEIT), (2) an answer span prediction model, and (3) an ultimate span picker deciding on the most likely answer span, out of all predicted spans. In the test phase of MultiDoc2Dial 2022, Docalog achieved f1-scores of 36.07% and 28.44% and SacreBLEU scores of 23.70% and 20.52%, respectively on the MDD-SEEN and MDD-UNSEEN folds. 2022.dialdoc-1.16 @@ -243,7 +243,7 @@ Aditya SrikanthVeerubhotla RitamDutt TerukoMitamura - EricNyberg + EricNyberg 148-154 In this paper, we present our submission to the DialDoc shared task based on the MultiDoc2Dial dataset. MultiDoc2Dial is a conversational question answering dataset that grounds dialogues in multiple documents. The task involves grounding a user’s query in a document followed by generating an appropriate response. We propose several improvements over the baseline’s retriever-reader architecture to aid in modeling goal-oriented dialogues grounded in multiple documents. Our proposed approach employs sparse representations for passage retrieval, a passage re-ranker, the fusion-in-decoder architecture for generation, and a curriculum learning training paradigm. Our approach shows a 12 point improvement in BLEU score compared to the baseline RAG model. 2022.dialdoc-1.17 diff --git a/data/xml/2022.digitam.xml b/data/xml/2022.digitam.xml index 9cf8fe85cd..2ffbdc810e 100644 --- a/data/xml/2022.digitam.xml +++ b/data/xml/2022.digitam.xml @@ -24,8 +24,8 @@ A Free/Open-Source Morphological Transducer for <fixed-case>W</fixed-case>estern <fixed-case>A</fixed-case>rmenian HossepDolatian - DanielSwanson - JonathanWashington + DanielSwanson + JonathanWashington 1–7 We present a free/open-source morphological transducer for Western Armenian, an endangered and low-resource Indo-European language. The transducer has virtually complete coverage of the language’s inflectional morphology. We built the lexicon by scraping online dictionaries. As of submission, the transducer has a lexicon of 75K words. It has over 90% naive coverage on different Western Armenian corpora, and high precision. 2022.digitam-1.1 diff --git a/data/xml/2022.distcurate.xml b/data/xml/2022.distcurate.xml index 620c98ff54..3ebd907249 100644 --- a/data/xml/2022.distcurate.xml +++ b/data/xml/2022.distcurate.xml @@ -3,7 +3,7 @@ Proceedings of the Workshop on Dimensions of Meaning: Distributional and Curated Semantics (DistCurate 2022) - Collin F.Baker + Collin F.Baker Association for Computational Linguistics
Seattle, Washington
July @@ -39,7 +39,7 @@ Logical Story Representations via <fixed-case>F</fixed-case>rame<fixed-case>N</fixed-case>et + Semantic Parsing LaneLawley - LenhartSchubert + LenhartSchubert 19-23 We propose a means of augmenting FrameNet parsers with a formal logic parser to obtain rich semantic representations of events. These schematic representations of the frame events, which we call Episodic Logic (EL) schemas, abstract constants to variables, preserving their types and relationships to other individuals in the same text. Due to the temporal semantics of the chosen logical formalism, all identified schemas in a text are also assigned temporally bound “episodes” and related to one another in time. The semantic role information from the FrameNet frames is also incorporated into the schema’s type constraints. We describe an implementation of this method using a neural FrameNet parser, and discuss the approach’s possible applications to question answering and open-domain event schema learning. 2022.distcurate-1.3 @@ -49,8 +49,8 @@ Comparing Distributional and Curated Approaches for Cross-lingual Frame Alignment Collin F.Baker - MichaelEllsworth - Miriam R. L.Petruck + MichaelEllsworth + Miriam R. L.Petruck ArthurLorenzi 24-30 Despite advances in statistical approaches to the modeling of meaning, many ques- tions about the ideal way of exploiting both knowledge-based (e.g., FrameNet, WordNet) and data-based methods (e.g., BERT) remain unresolved. This workshop focuses on these questions with three session papers that run the gamut from highly distributional methods (Lekkas et al., 2022), to highly curated methods (Gamonal, 2022), and techniques with statistical methods producing structured semantics (Lawley and Schubert, 2022). In addition, we begin the workshop with a small comparison of cross-lingual techniques for frame semantic alignment for one language pair (Spanish and English). None of the distributional techniques consistently aligns the 1-best frame match from English to Spanish, all failing in at least one case. Predicting which techniques will align which frames cross-linguistically is not possible from any known characteristic of the alignment technique or the frames. Although distributional techniques are a rich source of semantic information for many tasks, at present curated, knowledge-based semantics remains the only technique that can consistently align frames across languages. diff --git a/data/xml/2022.dlg4nlp.xml b/data/xml/2022.dlg4nlp.xml index 86e1250f41..a72c8b1e2b 100644 --- a/data/xml/2022.dlg4nlp.xml +++ b/data/xml/2022.dlg4nlp.xml @@ -5,7 +5,7 @@ Proceedings of the 2nd Workshop on Deep Learning on Graphs for Natural Language Processing (DLG4NLP 2022) LingfeiWu BangLiu - RadaMihalcea + RadaMihalcea JianPei YueZhang YunyaoLi @@ -25,7 +25,7 @@ WenhaoYu ChenguangZhu LianhuiQin - ZhihanZhang + ZhihanZhang TongZhao MengJiang 1-11 @@ -74,7 +74,7 @@ Graph Neural Networks for Adapting Off-the-shelf General Domain Language Models to Low-Resource Specialised Domains MeriemeBouhandi EmmanuelMorin - ThierryHamon + ThierryHamon 36-42 Language models encode linguistic proprieties and are used as input for more specific models. Using their word representations as-is for specialised and low-resource domains might be less efficient. Methods of adapting them exist, but these models often overlook global information about how words, terms, and concepts relate to each other in a corpus due to their strong reliance on attention. We consider that global information can influence the results of the downstream tasks, and combination with contextual information is performed using graph convolution networks or GCN built on vocabulary graphs. By outperforming baselines, we show that this architecture is profitable for domain-specific tasks. 2022.dlg4nlp-1.5 @@ -110,7 +110,7 @@ ZhenyunDeng YonghuaZhu QianqianQi - MichaelWitbrock + MichaelWitbrock PatriciaRiddle 71-80 Current graph-neural-network-based (GNN-based) approaches to multi-hop questions integrate clues from scattered paragraphs in an entity graph, achieving implicit reasoning by synchronous update of graph node representations using information from neighbours; this is poorly suited for explaining how clues are passed through the graph in hops. In this paper, we describe a structured Knowledge and contextual Information Fusion GNN (KIFGraph) whose explicit multi-hop graph reasoning mimics human step by step reasoning. Specifically, we first integrate clues at multiple levels of granularity (question, paragraph, sentence, entity) as nodes in the graph, connected by edges derived using structured semantic knowledge, then use a contextual encoder to obtain the initial node representations, followed by step-by-step two-stage graph reasoning that asynchronously updates node representations. Each node can be related to its neighbour nodes through fused structured knowledge and contextual information, reliably integrating their answer clues. Moreover, a masked attention mechanism (MAM) filters out noisy or redundant nodes and edges, to avoid ineffective clue propagation in graph reasoning. Experimental results show performance competitive with published models on the HotpotQA dataset. diff --git a/data/xml/2022.dravidianlangtech.xml b/data/xml/2022.dravidianlangtech.xml index da0dee1c12..fb3d97624a 100644 --- a/data/xml/2022.dravidianlangtech.xml +++ b/data/xml/2022.dravidianlangtech.xml @@ -5,7 +5,7 @@ Proceedings of the Second Workshop on Speech and Language Technologies for Dravidian Languages Bharathi RajaChakravarthi RubaPriyadharshini - Anand KumarMadasamy + Anand KumarMadasamy ParameswariKrishnamurthy ElizabethSherly SinnathambyMahesan @@ -171,7 +171,7 @@ <fixed-case>T</fixed-case>eam<fixed-case>X</fixed-case>@<fixed-case>D</fixed-case>ravidian<fixed-case>L</fixed-case>ang<fixed-case>T</fixed-case>ech-<fixed-case>ACL</fixed-case>2022: A Comparative Analysis for Troll-Based Meme Classification Rabindra NathNandi FirojAlam - PreslavNakov + PreslavNakov 79-85 The spread of fake news, propaganda, misinformation, disinformation, and harmful content online raised concerns among social mediaplatforms, government agencies, policymakers, and society as a whole. This is because such harmful or abusive content leads to several consequences to people such as physical, emotional, relational, and financial. Among different harmful content trolling-based online content is one of them, where the idea is to post a message that is provocative, offensive, or menacing with an intent to mislead the audience. The content can be textual, visual, a combination of both, or a meme. In this study, we provide a comparative analysis of troll-based memes classification using the textual, visual, and multimodal content. We report several interesting findings in terms of code-mixed text, multimodal setting, and combining an additional dataset, which shows improvements over the majority baseline. 2022.dravidianlangtech-1.13 @@ -333,7 +333,7 @@ AngelS RajalakshmiSivanaiah Sakaya MiltonRajendram - MirnalineeT T + MirnalineeT T 165-169 In this paper, we present our system for the task of Emotion analysis in Tamil. Over 3.96 million people use these platforms to send messages formed using texts, images, videos, audio or combinations of these to express their thoughts and feelings. Text communication on social media platforms is quite overwhelming due to its enormous quantity and simplicity. The data must be processed to understand the general feeling felt by the author. We present a lexicon-based approach for the extraction emotion in Tamil texts. We use dictionaries of words labelled with their respective emotions. The process of assigning an emotional label to each text, and then capture the main emotion expressed in it. Finally, the F1-score in the official test set is 0.0300 and our method ranks 5th. 2022.dravidianlangtech-1.26 @@ -541,7 +541,7 @@ Bharathi RajaChakravarthi RubaPriyadharshini HosahalliShashirekha - JohnMcCrae + JohnMcCrae 271-278 This paper presents an outline of the shared task on translation of under-resourced Dravidian languages at DravidianLangTech-2022 workshop to be held jointly with ACL 2022. A description of the datasets used, approach taken for analysis of submissions and the results have been illustrated in this paper. Five sub-tasks organized as a part of the shared task include the following translation pairs: Kannada to Tamil, Kannada to Telugu, Kannada to Sanskrit, Kannada to Malayalam and Kannada to Tulu. Training, development and test datasets were provided to all participants and results were evaluated on the gold standard datasets. A total of 16 research groups participated in the shared task and a total of 12 submission runs were made for evaluation. Bilingual Evaluation Understudy (BLEU) score was used for evaluation of the translations. 2022.dravidianlangtech-1.41 diff --git a/data/xml/2022.eamt.xml b/data/xml/2022.eamt.xml index ca6dd15310..66a4c34e22 100644 --- a/data/xml/2022.eamt.xml +++ b/data/xml/2022.eamt.xml @@ -7,13 +7,13 @@ LieveMacken AndrewRufener LoïcBarrault - Marta R.Costa-jussà + Marta R.Costa-jussà ChristopheDeclercq MaaritKoponen EllieKemp SpyridonPilos - Mikel L.Forcada - CarolinaScarton + Mikel L.Forcada + CarolinaScarton JoachimVan den Bogaert JokeDaems ArdaTezcan @@ -39,7 +39,7 @@ Neural Speech Translation: From Neural Machine Translation to Direct Speech Translation - Mattia AntoninoDi Gangi + Mattia AntoninoDi Gangi 7–8 2022.eamt-1.2 gangi-2022-neural @@ -54,7 +54,7 @@ Multi-Domain Adaptation in Neural Machine Translation with Dynamic Sampling Strategies Minh-QuangPham - JosepCrego + JosepCrego FrançoisYvon 13–22 Building effective Neural Machine Translation models often implies accommodating diverse sets of heterogeneous data so as to optimize performance for the domain(s) of interest. Such multi-source / multi-domain adaptation problems are typically approached through instance selection or reweighting strategies, based on a static assessment of the relevance of training instances with respect to the task at hand. In this paper, we study dynamic data selection strategies that are able to automatically re-evaluate the usefulness of data samples and to evolve a data selection policy in the course of training. Based on the results of multiple experiments, we show that such methods constitute a generic framework to automatically and effectively handle a variety of real-world situations, from multi-source domain adaptation to multi-domain learning and unsupervised domain adaptation. @@ -75,8 +75,8 @@ Comparing and combining tagging with different decoding algorithms for back-translation in <fixed-case>NMT</fixed-case>: learnings from a low resource scenario XabierSoto OlatzPerez-De-Viñaspre - GorkaLabaka - MaiteOronoz + GorkaLabaka + MaiteOronoz 31–40 Back-translation is a well established approach to improve the performance of Neural Machine Translation (NMT) systems when large monolingual corpora of the target language and domain are available. Recently, diverse approaches have been proposed to get better automatic evaluation results of NMT models using back-translation, including the use of sampling instead of beam search as decoding algorithm for creating the synthetic corpus. Alternatively, it has been proposed to append a tag to the back-translated corpus for helping the NMT system to distinguish the synthetic bilingual corpus from the authentic one. However, not all the combinations of the previous approaches have been tested, and thus it is not clear which is the best approach for developing a given NMT system. In this work, we empirically compare and combine existing techniques for back-translation in a real low resource setting: the translation of clinical notes from Basque into Spanish. Apart from automatically evaluating the MT systems, we ask bilingual healthcare workers to perform a human evaluation, and analyze the different synthetic corpora by measuring their lexical diversity (LD). For reproducibility and generalizability, we repeat our experiments for German to English translation using public data. The results suggest that in lower resource scenarios tagging only helps when using sampling for decoding, in contradiction with the previous literature using bigger corpora from the news domain. When fine-tuning with a few thousand bilingual in-domain sentences, one of our proposed method (tagged restricted sampling) obtains the best results both in terms of automatic and human evaluation. We will publish the code upon acceptance. 2022.eamt-1.6 @@ -85,7 +85,7 @@ Passing Parser Uncertainty to the Transformer: Labeled Dependency Distributions for Neural Machine Translation DongqiLiu - KhalilSima’an + KhalilSima’an 41–50 Existing syntax-enriched neural machine translation (NMT) models work either with the single most-likely unlabeled parse or the set of n-best unlabeled parses coming out of an external parser. Passing a single or n-best parses to the NMT model risks propagating parse errors. Furthermore, unlabeled parses represent only syntactic groupings without their linguistically relevant categories. In this paper we explore the question: Does passing both parser uncertainty and labeled syntactic knowledge to the Transformer improve its translation performance? This paper contributes a novel method for infusing the whole labeled dependency distributions (LDD) of the source sentence’s dependency forest into the self-attention mechanism of the encoder of the Transformer. A range of experimental results on three language pairs demonstrate that the proposed approach outperforms both the vanilla Transformer as well as the single best-parse Transformer model across several evaluation metrics. 2022.eamt-1.7 @@ -103,11 +103,11 @@ Searching for <fixed-case>COMETINHO</fixed-case>: The Little Metric That Could RicardoRei Ana CFarinha - José G.C.de Souza + José G.C.de Souza Pedro G.Ramos André F.T.Martins - LuisaCoheur - AlonLavie + LuisaCoheur + AlonLavie 61–70 In recent years, several neural fine-tuned machine translation evaluation metrics such as COMET and BLEURT have been proposed. These metrics achieve much higher correlations with human judgments than lexical overlap metrics at the cost of computational efficiency and simplicity, limiting their applications to scenarios in which one has to score thousands of translation hypothesis (e.g. scoring multiple systems or Minimum Bayes Risk decoding). In this paper, we explore optimization techniques, pruning, and knowledge distillation to create more compact and faster COMET versions. Our results show that just by optimizing the code through the use of caching and length batching we can reduce inference time between 39% and 65% when scoring multiple systems. Also, we show that pruning COMET can lead to a 21% model reduction without affecting the model’s accuracy beyond 0.01 Kendall tau correlation. Furthermore, we present DISTIL-COMET a lightweight distilled version that is 80% smaller and 2.128x faster while attaining a performance close to the original model and above strong baselines such as BERTSCORE and PRISM. 2022.eamt-1.9 @@ -116,7 +116,7 @@ Studying Post-Editese in a Professional Context: A Pilot Study LiseVolkart - PierretteBouillon + PierretteBouillon 71–79 The past few years have seen the multiplication of studies on post-editese, following the massive adoption of post-editing in professional translation workflows. These studies mainly rely on the comparison of post-edited machine translation and human translation on artificial parallel corpora. By contrast, we investigate here post-editese on comparable corpora of authentic translation jobs for the language direction English into French. We explore commonly used scores and also proposes the use of a novel metric. Our analysis shows that post-edited machine translation is not only lexically poorer than human translation, but also less dense and less varied in terms of translation solutions. It also tends to be more prolific than human translation for our language direction. Finally, our study highlights some of the challenges of working with comparable corpora in post-editese research. 2022.eamt-1.10 @@ -162,8 +162,8 @@ On the Interaction of Regularization Factors in Low-resource Neural Machine Translation - Àlex R.Atrio - AndreiPopescu-Belis + Àlex R.Atrio + AndreiPopescu-Belis 111–120 We explore the roles and interactions of the hyper-parameters governing regularization, and propose a range of values applicable to low-resource neural machine translation. We demonstrate that default or recommended values for high-resource settings are not optimal for low-resource ones, and that more aggressive regularization is needed when resources are scarce, in proportion to their scarcity. We explain our observations by the generalization abilities of sharp vs. flat basins in the loss landscape of a neural network. Results for four regularization factors corroborate our claim: batch size, learning rate, dropout rate, and gradient clipping. Moreover, we show that optimal results are obtained when using several of these factors, and that our findings generalize across datasets of different sizes and languages. 2022.eamt-1.14 @@ -243,7 +243,7 @@ “Hi, how can <fixed-case>I</fixed-case> help you?” Improving Machine Translation of Conversational Content in a Business Context - BiankaBuschbeck + BiankaBuschbeck JenniferMell MiriamExel MatthiasHuck @@ -258,7 +258,7 @@ MariannaBuchicchio CraigStewart HelenaMoniz - AlonLavie + AlonLavie 201–210 This paper illustrates a new evaluation framework developed at Unbabel for measuring the quality of source language text and its effect on both Machine Translation (MT) and Human Post-Edition (PE) performed by non-professional post-editors. We examine both agent and user-generated content from the Customer Support domain and propose that differentiating the two is crucial to obtaining high quality translation output. Furthermore, we present results of initial experimentation with a new evaluation typology based on the Multidimensional Quality Metrics (MQM) Framework Lommel et al., 2014), specifically tailored toward the evaluation of source language text. We show how the MQM Framework Lommel et al., 2014) can be adapted to assess errors of monolingual source texts and demonstrate how very specific source errors propagate to the MT and PE targets. Finally, we illustrate how MT systems are not robust enough to handle very specific source noise in the context of Customer Support data. 2022.eamt-1.23 @@ -270,7 +270,7 @@ VeraCabarrão PedroMota Helena Moniz - AlonLavie + AlonLavie 211–219 This paper describes the research developed at Unbabel, a Portuguese Machine-translation start-up, that combines MT with human post-edition and focuses strictly on customer service content. We aim to contribute to furthering MT quality and good-practices by exposing the importance of having a continuously-in-development robust Named Entity Recognition system compliant with General Data Protection Regulation (GDPR). Moreover, we have tested semiautomatic strategies that support and enhance the creation of Named Entities gold standards to allow a more seamless implementation of Multilingual Named Entities Recognition Systems. The project described in this paper is the result of a shared work between Unbabel ́s linguists and Unbabel ́s AI engineering team, matured over a year. The project should, also, be taken as a statement of multidisciplinary, proving and validating the much-needed articulation between the different scientific fields that compose and characterize the area of Natural Language Processing (NLP). 2022.eamt-1.24 @@ -280,7 +280,7 @@ Investigating automatic and manual filtering methods to produce <fixed-case>MT</fixed-case>-ready glossaries from existing ones MariaAfara RandyScansani - LoïcDugast + LoïcDugast 221–230 Commercial Machine Translation (MT) providers offer functionalities that allow users to leverage bilingual glossaries. This poses the question of how to turn glossaries that were intended to be used by a human translator into MT-ready ones, removing entries that could harm the MT output. We present two automatic filtering approaches - one based on rules and the second one relying on a translation memory - and a manual filtering procedure carried out by a linguist. The resulting glossaries are added to the MT model. The outputs are compared against a baseline where no glossary is used and an output produced using the original glossary. The present work aims at investigating if any of these filtering methods can bring a higher terminology accuracy without negative effects on the overall quality. Results are measured with terminology accuracy and Translation Edit Rate. We test our filters on two language pairs, En-Fr and De-En. Results show that some of the automatically filtered glossaries improve the output when compared to the baseline, and they may help reach a better balance between accuracy and overall quality, replacing the costly manual process without quality loss. 2022.eamt-1.25 @@ -323,7 +323,7 @@ AlinaKarakanta LuisaBentivogli MauroCettolo - MatteoNegri + MatteoNegri MarcoTurchi 261–270 Recent developments in machine translation and speech translation are opening up opportunities for computer-assisted translation tools with extended automation functions. Subtitling tools are recently being adapted for post-editing by providing automatically generated subtitles, and featuring not only machine translation, but also automatic segmentation and synchronisation. But what do professional subtitlers think of post-editing automatically generated subtitles? In this work, we conduct a survey to collect subtitlers’ impressions and feedback on the use of automatic subtitling in their workflows. Our findings show that, despite current limitations stemming mainly from speech processing errors, automatic subtitling is seen rather positively and has potential for the future. @@ -423,7 +423,7 @@ Europeana Translate: Providing multilingual access to digital cultural heritage EiriniKaldeli - MercedesGarcía-Martínez + MercedesGarcía-Martínez AntoineIsaac Paolo SebastianoScalia ArneStabenau @@ -439,7 +439,7 @@ The <fixed-case>PASSAGE</fixed-case> project : <fixed-case>S</fixed-case>tandard <fixed-case>G</fixed-case>erman Subtitling of <fixed-case>S</fixed-case>wiss <fixed-case>G</fixed-case>erman <fixed-case>TV</fixed-case> content - PierretteBouillon + PierretteBouillon JohannaGerlach JonathanMutal MarianneStarlander @@ -451,16 +451,16 @@ <fixed-case>M</fixed-case>a<fixed-case>C</fixed-case>o<fixed-case>C</fixed-case>u: Massive collection and curation of monolingual and bilingual data: focus on under-resourced languages MartaBañón - MiquelEsplà-Gomis + MiquelEsplà-Gomis Mikel L.Forcada CristianGarcía-Romero TajaKuzman NikolaLjubešić Rikvan Noord Leopoldo PlaSempere - GemaRamírez-Sánchez + GemaRamírez-Sánchez PeterRupnik - VítSuchomel + VítSuchomel AntonioToral Tobiasvan der Werff JaumeZaragoza @@ -491,7 +491,7 @@ <fixed-case>MT</fixed-case>ee: Open Machine Translation Platform for <fixed-case>E</fixed-case>stonian Government TomsBergmanis - MarcisPinnis + MarcisPinnis RobertsRozis JānisŠlapiņš ValtersŠics @@ -518,7 +518,7 @@ AlessandroRaganato Niki A.Loppi Stig-ArneGrönroos - JörgTiedemann + JörgTiedemann 311–312 We describe the enhancement of a multilingual NMT toolkit developed as part of the FoTran project. We devise our modular attention-bridge model, which connects language-specific components through a shared network layer. The system now supports distributed training over many nodes and GPUs in order to substantially scale up the number of languages that can be included in a modern neural translation architecture. The model enables the study of emerging language-agnostic representations and also provides a modular toolkit for efficient machine translation. 2022.eamt-1.45 @@ -535,11 +535,11 @@ <fixed-case>QUARTZ</fixed-case>: Quality-Aware Machine Translation - José G.C.de Souza + José G.C.de Souza RicardoRei Ana C.Farinha HelenaMoniz - André F. T.Martins + André F. T.Martins 315–316 This paper presents QUARTZ, QUality-AwaRe machine Translation, a project led by Unbabel which aims at developing machine translation systems that are more robust and produce fewer critical errors. With QUARTZ we want to enable machine translation for user-generated conversational content types that do not tolerate critical errors in automatic translations. 2022.eamt-1.47 @@ -606,7 +606,7 @@ <fixed-case>D</fixed-case>eep<fixed-case>SPIN</fixed-case>: Deep Structured Prediction for Natural Language Processing - André F. T.Martins + André F. T.Martins BenPeters ChrysoulaZerva ChunchuanLyu @@ -656,7 +656,7 @@ AlinaKarakanta LuisaBentivogli MauroCettolo - MatteoNegri + MatteoNegri MarcoTurchi 335–336 In response to the increasing interest towards automatic subtitling, this EAMT-funded project aimed at collecting subtitle post-editing data in a real use case scenario where professional subtitlers edit automatically generated subtitles. The post-editing setting includes, for the first time, automatic generation of timestamps and segmentation, and focuses on the effect of timing and segmentation edits on the post-editing process. The collected data will serve as the basis for investigating how subtitlers interact with automatic subtitling and for devising evaluation methods geared to the multimodal nature and formal requirements of subtitling. @@ -666,7 +666,7 @@ <fixed-case>D</fixed-case>i<fixed-case>H</fixed-case>u<fixed-case>T</fixed-case>ra: a Parallel Corpus to Analyse Differences between Human Translations EkaterinaLapshinova-Koltunski - MajaPopović + MajaPopović MaaritKoponen 337–338 This project aimed to design a corpus of parallel human translations (HTs) of the same source texts by professionals and students. The resulting corpus consists of English news and reviews source texts, their translations into Russian and Croatian, and translations of the reviews into Finnish. The corpus will be valuable for both studying variation in translation and evaluating machine translation (MT) systems. @@ -687,12 +687,12 @@ Curated Multilingual Language Resources for <fixed-case>CEF</fixed-case> <fixed-case>AT</fixed-case> (<fixed-case>CURLICAT</fixed-case>): overall view - TamásVáradi + TamásVáradi MarkoTadić SvetlaKoeva MaciejOgrodniczuk - DanTufiş - RadovanGarabík + DanTufiş + RadovanGarabík SimonKrek AndražRepar 341–342 @@ -745,7 +745,7 @@ DimitraGkatzia HelenaMoniz IreneRusso - FabioKepler + FabioKepler IacerCalixto MarcinPaprzycki FrançoisPortet @@ -778,7 +778,7 @@ Automatic Video Dubbing at <fixed-case>A</fixed-case>pp<fixed-case>T</fixed-case>ek - MattiaDi Gangi + MattiaDi Gangi NickRossenbach AlejandroPérez ParniaBahar @@ -798,13 +798,13 @@ OwenGallagher FedericoGaspari MariaGiagkou - JanHajic + JanHajic Jens PeterKückens TeresaLynn GeorgRehm - GermanRigau + GermanRigau KatrinMarheinecke - SteliosPiperidis + SteliosPiperidis NataliaResende TeaVojtěchová AndyWay @@ -817,7 +817,7 @@ <fixed-case>LITHME</fixed-case>: Language in the Human-Machine Era MaaritKoponen KaisAllkivi-Metsoja - AntonioPareja-Lora + AntonioPareja-Lora DaveSayers MártaSeresi 355–356 @@ -850,7 +850,7 @@ MauroCettolo MarcoGaido AlinaKarakanta - MatteoNegri + MatteoNegri MarcoTurchi 361–362 This project aimed at extending the test sets of the MuST-C speech translation (ST) corpus with new reference translations. The new references were collected from professional post-editors working on the output of different ST systems for three language pairs: English-German/Italian/Spanish. In this paper, we shortly describe how the data were collected and how they are distributed. As an evidence of their usefulness, we also summarise the findings of the first comparative evaluation of cascade and direct ST approaches, which was carried out relying on the collected data. The project was partially funded by the European Association for Machine Translation (EAMT) through its 2020 Sponsorship of Activities programme. diff --git a/data/xml/2022.ecnlp.xml b/data/xml/2022.ecnlp.xml index 7a6d7f7310..9386b9b614 100644 --- a/data/xml/2022.ecnlp.xml +++ b/data/xml/2022.ecnlp.xml @@ -3,7 +3,7 @@ Proceedings of the Fifth Workshop on e-Commerce and NLP (ECNLP 5) - ShervinMalmasi + ShervinMalmasi OlegRokhlenko NicolaUeffing IdoGuy @@ -64,9 +64,9 @@ Data Quality Estimation Framework for Faster Tax Code Classification - RaviKondadadi + RaviKondadadi AllenWilliams - NicolasNicolov + NicolasNicolov 29-34 This paper describes a novel framework to estimate the data quality of a collection of product descriptions to identify required relevant information for accurate product listing classification for tax-code assignment. Our Data Quality Estimation (DQE) framework consists of a Question Answering (QA) based attribute value extraction model to identify missing attributes and a classification model to identify bad quality records. We show that our framework can accurately predict the quality of product descriptions. In addition to identifying low-quality product listings, our framework can also generate a detailed report at a category level showing missing product information resulting in a better customer experience. 2022.ecnlp-1.4 @@ -192,7 +192,7 @@ MarcoDel Tredici WeiweiCheng BillByrne - AdriàGispert + AdriàGispert 99-110 It is of great value to answer product questions based on heterogeneous information sources available on web product pages, e.g., semi-structured attributes, text descriptions, user-provided contents, etc. However, these sources have different structures and writing styles, which poses challenges for (1) evidence ranking, (2) source selection, and (3) answer generation. In this paper, we build a benchmark with annotations for both evidence selection and answer generation covering 6 information sources. Based on this benchmark, we conduct a comprehensive study and present a set of best practices. We show that all sources are important and contribute to answering questions. Handling all sources within one single model can produce comparable confidence scores across sources and combining multiple sources for training always helps, even for sources with totally different structures. We further propose a novel data augmentation method to iteratively create training samples for answer generation, which achieves close-to-human performance with only a few thousandannotations. Finally, we perform an in-depth error analysis of model predictions and highlight the challenges for future research. 2022.ecnlp-1.13 @@ -206,7 +206,7 @@ GianniBarlacchi MarcoDel Tredici WeiweiCheng - AdriàGispert + AdriàGispert 111-120 Product question answering (PQA) aims to automatically address customer questions to improve their online shopping experience. Current research mainly focuses on finding answers from either unstructured text, like product descriptions and user reviews, or structured knowledge bases with pre-defined schemas. Apart from the above two sources, a lot of product information is represented in a semi-structured way, e.g., key-value pairs, lists, tables, json and xml files, etc. These semi-structured data can be a valuable answer source since they are better organized than free text, while being easier to construct than structured knowledge bases. However, little attention has been paid to them. To fill in this blank, here we study how to effectively incorporate semi-structured answer sources for PQA and focus on presenting answers in a natural, fluent sentence. To this end, we present semiPQA: a dataset to benchmark PQA over semi-structured data. It contains 11,243 written questions about json-formatted data covering 320 unique attribute types. Each data point is paired with manually-annotated text that describes its contents, so that we can train a neural answer presenter to present the data in a natural way. We provide baseline results and a deep analysis on the successes and challenges of leveraging semi-structured data for PQA. In general, state-of-the-art neural models can perform remarkably well when dealing with seen attribute types. For unseen attribute types, however, a noticeable drop is observed for both answer presentation and attribute ranking. 2022.ecnlp-1.14 @@ -253,13 +253,13 @@ Domain-specific knowledge distillation yields smaller and better models for conversational commerce KristenHowell - JianWang + JianWang AkshayHazare JosephBradley ChrisBrew XiChen MatthewDunn - BethHockey + BethHockey AndrewMaurer DominicWiddows 151-160 @@ -333,7 +333,7 @@ Investigating the Generative Approach for Question Answering in <fixed-case>E</fixed-case>-Commerce KalyaniRoy VineethBalapanuru - TapasNayak + TapasNayak PawanGoyal 210-216 Many e-commerce websites provide Product-related Question Answering (PQA) platform where potential customers can ask questions related to a product, and other consumers can post an answer to that question based on their experience. Recently, there has been a growing interest in providing automated responses to product questions. In this paper, we investigate the suitability of the generative approach for PQA. We use state-of-the-art generative models proposed by Deng et al.(2020) and Lu et al.(2020) for this purpose. On closer examination, we find several drawbacks in this approach: (1) input reviews are not always utilized significantly for answer generation, (2) the performance of the models is abysmal while answering the numerical questions, (3) many of the generated answers contain phrases like “I do not know” which are taken from the reference answer in training data, and these answers do not convey any information to the customer. Although these approaches achieve a high ROUGE score, it does not reflect upon these shortcomings of the generated answers. We hope that our analysis will lead to more rigorous PQA approaches, and future research will focus on addressing these shortcomings in PQA. @@ -372,7 +372,7 @@ Can Pretrained Language Models Generate Persuasive, Faithful, and Informative Ad Text for Product Descriptions? FajriKoto Jey HanLau - TimothyBaldwin + TimothyBaldwin 234-243 For any e-commerce service, persuasive, faithful, and informative product descriptions can attract shoppers and improve sales. While not all sellers are capable of providing such interesting descriptions, a language generation system can be a source of such descriptions at scale, and potentially assist sellers to improve their product descriptions. Most previous work has addressed this task based on statistical approaches (Wang et al., 2017), limited attributes such as titles (Chen et al., 2019; Chan et al., 2020), and focused on only one product type (Wang et al., 2017; Munigala et al., 2018; Hong et al., 2021). In this paper, we jointly train image features and 10 text attributes across 23 diverse product types, with two different target text types with different writing styles: bullet points and paragraph descriptions. Our findings suggest that multimodal training with modern pretrained language models can generate fluent and persuasive advertisements, but are less faithful and informative, especially out of domain. 2022.ecnlp-1.27 diff --git a/data/xml/2022.emnlp.xml b/data/xml/2022.emnlp.xml index 9872cbaa16..86eebb6e8c 100644 --- a/data/xml/2022.emnlp.xml +++ b/data/xml/2022.emnlp.xml @@ -38,7 +38,7 @@ LibiaoPengTsinghua University ZhenGuoBaidu WenquanWuBaidu - Zheng-YuNiuBaidu Inc. + Zheng-YuNiuBaidu Inc. HuaWuBaidu MinlieHuangTsinghua University 18-29 @@ -117,7 +117,7 @@ SangwooChoTecent AI Lab KaiqiangSongTencent AI Lab XiaoyangWangTencent AI Lab - FeiLiuEmory University + FeiLiuEmory University DongYuTencent AI Lab 106-118 Text segmentation is important for signaling a document’s structure. Without segmenting a long document into topically coherent sections, it is difficult for readers to comprehend the text, let alone find important information. The problem is only exacerbated by a lack of segmentation in transcripts of audio/video recordings. In this paper, we explore the role that section segmentation plays in extractive summarization of written and spoken documents. Our approach learns robust sentence representations by performing summarization and segmentation simultaneously, which is further enhanced by an optimization-based regularizer to promote selection of diverse summary sentences. We conduct experiments on multiple datasets ranging from scientific articles to spoken transcripts to evaluate the model’s performance. Our findings suggest that the model can not only achieve state-of-the-art performance on publicly available benchmarks, but demonstrate better cross-genre transferability when equipped with text segmentation. We perform a series of analyses to quantify the impact of section segmentation on summarizing written and spoken documents of substantial length and complexity. @@ -156,7 +156,7 @@ Do JuneMinUniversity of Michigan VerónicaPérez-RosasUniversity of Michigan KennethResnicowSchool of Public Health, University of Michigan - RadaMihalceaUniversity of Michigan + RadaMihalceaUniversity of Michigan 148-158 Counselor reflection is a core verbal skill used by mental health counselors to express understanding and affirmation of the client’s experience and concerns. In this paper, we propose a system for the analysis of counselor reflections. Specifically, our system takes as input one dialog turn containing a client prompt and a counselor response, and outputs a score indicating the level of reflection in the counselor response. We compile a dataset consisting of different levels of reflective listening skills, and propose the Prompt-Aware margIn Ranking (PAIR) framework that contrasts positive and negative prompt and response pairs using specially designed multi-gap and prompt-aware margin ranking losses. Through empirical evaluations and deployment of our system in a real-life educational environment, we show that our analysis model outperforms several baselines on different metrics, and can be used to provide useful feedback to counseling trainees. 2022.emnlp-main.11 @@ -262,7 +262,7 @@ Multi-<fixed-case>VQG</fixed-case>: Generating Engaging Questions for Multiple Images Min-HsuanYehAcademia Sinica VincentChenUniversity of Illinois at Urbana Champaign - Ting-HaoHuangPennsylvania State University + Ting-HaoHuangPennsylvania State University Lun-WeiKuAcademia Sinica 277-290 Generating engaging content has drawn much recent attention in the NLP community. Asking questions is a natural way to respond to photos and promote awareness. However, most answers to questions in traditional question-answering (QA) datasets are factoids, which reduce individuals’ willingness to answer. Furthermore, traditional visual question generation (VQG) confines the source data for question generation to single images, resulting in a limited ability to comprehend time-series information of the underlying event. In this paper, we propose generating engaging questions from multiple images. We present MVQG, a new dataset, and establish a series of baselines, including both end-to-end and dual-stage architectures. Results show that building stories behind the image sequence enables models togenerate engaging questions, which confirms our assumption that people typically construct a picture of the event in their minds before asking questions. These results open up an exciting challenge for visual-and-language models to implicitly construct a story behind a series of photos to allow for creativity and experience sharing and hence draw attention to downstream applications. @@ -276,7 +276,7 @@ JannisBulianGoogle ChristianBuckGoogle Research WojciechGajewskiGoogle Research - BenjaminBörschingerGoogle + BenjaminBörschingerGoogle TalSchusterGoogle 291-305 The predictions of question answering (QA) systems are typically evaluated against manually annotated finite sets of one or more answers. This leads to a coverage limitation that results in underestimating the true performance of systems, and is typically addressed by extending over exact match (EM) with predefined rules or with the token-level F1 measure.In this paper, we present the first systematic conceptual and data-driven analysis to examine the shortcomings of token-level equivalence measures.To this end, we define the asymmetric notion of answer equivalence (AE), accepting answers that are equivalent to or improve over the reference, and publish over 23k human judgements for candidates produced by multiple QA systems on SQuAD.Through a careful analysis of this data, we reveal and quantify several concrete limitations of the F1 measure, such as a false impression of graduality, or missing dependence on the question.Since collecting AE annotations for each evaluated model is expensive, we learn a BERT matching (BEM) measure to approximate this task. Being a simpler task than QA, we find BEM to provide significantly better AE approximations than F1, and to more accurately reflect the performance of systems.Finally, we demonstrate the practical utility of AE and BEM on the concrete application of minimal accurate prediction sets, reducing the number of required answers by up to X2.6. @@ -354,7 +354,7 @@ Translation between Molecules and Natural Language CarlEdwardsUniversity of Illinois, Urbana-Champaign - TuanLaiUniversity of Illinois at Urbana-Champaign + TuanLaiUniversity of Illinois at Urbana-Champaign KevinRosUniversity of Illinois at Urbana-Champaign GarrettHonkeX, the Moonshot Factory KyunghyunChoNew York University @@ -382,8 +382,8 @@ Sentence-Incremental Neural Coreference Resolution MattGrenanderUniversity of Edinburgh - Shay B.CohenUniversity of Edinburgh - MarkSteedmanUniversity of Edinburgh + Shay B.CohenUniversity of Edinburgh + MarkSteedmanUniversity of Edinburgh 427-443 We propose a sentence-incremental neural coreference resolution system which incrementally builds clusters after marking mention boundaries in a shift-reduce method. The system is aimed at bridging two recent approaches at coreference resolution: (1) state-of-the-art non-incremental models that incur quadratic complexity in document length with high computational cost, and (2) memory network-based models which operate incrementally but do not generalize beyond pronouns. For comparison, we simulate an incremental setting by constraining non-incremental systems to form partial coreference chains before observing new sentences. In this setting, our system outperforms comparable state-of-the-art methods by 2 F1 on OntoNotes and 6.8 F1 on the CODI-CRAC 2021 corpus. In a conventional coreference setup, our system achieves 76.3 F1 on OntoNotes and 45.5 F1 on CODI-CRAC 2021, which is comparable to state-of-the-art baselines. We also analyze variations of our system and show that the degree of incrementality in the encoder has a surprisingly large effect on the resulting performance. 2022.emnlp-main.28 @@ -453,7 +453,7 @@ Yi-TingYehCarnegie Mellon University ShikibMehriCarnegie Mellon University MaxineEskenaziCarnegie Mellon University - JeffreyBighamCMU/Apple + JeffreyBighamCMU/Apple 505-525 Instruction tuning is an emergent paradigm in NLP wherein natural language instructions are leveraged with language models to induce zero-shot performance on unseen tasks. Dialogue is an especially interesting area in which to explore instruction tuning because dialogue systems perform multiple kinds of tasks related to language (e.g., natural language understanding and generation, domain-specific interaction), yet instruction tuning has not been systematically explored for dialogue-related tasks. We introduce InstructDial, an instruction tuning framework for dialogue, which consists of a repository of 48 diverse dialogue tasks in a unified text-to-text format created from 59 openly available dialogue datasets. We explore cross-task generalization ability on models tuned on InstructDial across diverse dialogue tasks. Our analysis reveals that InstructDial enables good zero-shot performance on unseen datasets and tasks such as dialogue evaluation and intent detection, and even better performance in a few-shot setting. To ensure that models adhere to instructions, we introduce novel meta-tasks. We establish benchmark zero-shot and few-shot performance of models trained using the proposed framework on multiple dialogue tasks. 2022.emnlp-main.33 @@ -547,19 +547,19 @@ Chien-ShengWuSalesforce MingZhongUniversity of Illinois at Urbana-Champaign PengchengYinCarnegie Mellon University - Sida I.WangFacebook AI Research + Sida I.WangFacebook AI Research VictorZhongUniversity of Washington BailinWangMassachusetts Institute of Technology ChengzuLiShanghai AI Lab ConnorBoyleUniversity of Washington AnsongNiYale University ZiyuYaoGeorge Mason University - DragomirRadevYale University + DragomirRadevYale University CaimingXiongSalesforce LingpengKongThe University of Hong Kong RuiZhangPenn State University - Noah A.SmithUniversity of Washington - LukeZettlemoyerUniversity of Washington; Meta + Noah A.SmithUniversity of Washington + LukeZettlemoyerUniversity of Washington; Meta TaoYuUniversity of Washington 602-631 Structured knowledge grounding (SKG) leverages structured knowledge to complete user requests, such as semantic parsing over databases and question answering over knowledge bases. Since the inputs and outputs of SKG tasks are heterogeneous, they have been studied separately by different communities, which limits systematic and compatible research on SKG. In this paper, we overcome this limitation by proposing the UnifiedSKG framework, which unifies 21 SKG tasks into a text-to-text format, aiming to promote systematic SKG research, instead of being exclusive to a single task, domain, or dataset. We use UnifiedSKG to benchmark T5 with different sizes and show that T5, with simple modifications when necessary, achieves state-of-the-art performance on almost all of the 21 tasks. We further demonstrate that multi-task prefix-tuning improves the performance on most tasks, largely improving the overall performance. UnifiedSKG also facilitates the investigation of zero-shot and few-shot learning, and we show that T0, GPT-3, and Codex struggle in zero-shot and few-shot learning for SKG. We also use UnifiedSKG to conduct a series of controlled experiments on structured knowledge encoding variants across SKG tasks. UnifiedSKG is easily extensible to more tasks, and it is open-sourced at https://github.com/hkunlp/unifiedskg. @@ -570,9 +570,9 @@ Balanced Adversarial Training: Balancing Tradeoffs between Fickleness and Obstinacy in <fixed-case>NLP</fixed-case> Models - HannahChenUniversity of Virginia + HannahChenUniversity of Virginia YangfengJiUniversity of Virginia - DavidEvansUniversity of Virginia + DavidEvansUniversity of Virginia 632-647 Traditional (fickle) adversarial examples involve finding a small perturbation that does not change an input’s true label but confuses the classifier into outputting a different prediction. Conversely, obstinate adversarial examples occur when an adversary finds a small perturbation that preserves the classifier’s prediction but changes the true label of an input.Adversarial training and certified robust training have shown some effectiveness in improving the robustness of machine learnt models to fickle adversarial examples. We show that standard adversarial training methods focused on reducing vulnerability to fickle adversarial examples may make a model more vulnerable to obstinate adversarial examples, with experiments for both natural language inference and paraphrase identification tasks. To counter this phenomenon, we introduce Balanced Adversarial Training, which incorporates contrastive learning to increase robustness against both fickle and obstinate adversarial examples. 2022.emnlp-main.40 @@ -597,7 +597,7 @@ Generative Language Models for Paragraph-Level Question Generation AsahiUshioCardiff University FernandoAlva-ManchegoCardiff University - JoseCamacho-ColladosCardiff University + JoseCamacho-ColladosCardiff University 670-688 Powerful generative models have led to recent progress in question generation (QG). However, it is difficult to measure advances in QG research since there are no standardized resources that allow a uniform comparison among approaches. In this paper, we introduce QG-Bench, a multilingual and multidomain benchmark for QG that unifies existing question answering datasets by converting them to a standard QG setting. It includes general-purpose datasets such as SQuAD for English, datasets from ten domains and two styles, as well as datasets in eight different languages. Using QG-Bench as a reference, we perform an extensive analysis of the capabilities of language models for the task. First, we propose robust QG baselines based on fine-tuning generative language models. Then, we complement automatic evaluation based on standard metrics with an extensive manual evaluation, which in turn sheds light on the difficulty of evaluating QG models. Finally, we analyse both the domain adaptability of these models as well as the effectiveness of multilingual models in languages other than English.QG-Bench is released along with the fine-tuned models presented in the paper (https://github.com/asahi417/lm-question-generation), which are also available as a demo (https://autoqg.net/). 2022.emnlp-main.42 @@ -607,7 +607,7 @@ A Unified Encoder-Decoder Framework with Entity Memory - ZhihanZhangUniversity of Notre Dame + ZhihanZhangUniversity of Notre Dame WenhaoYuUniversity of Notre Dame ChenguangZhuMicrosoft Cognitive Services Research Group MengJiangUniversity of Notre Dame @@ -651,7 +651,7 @@ YingjunMouGeorgia Institute of Technology XiangChenAdobe Research LeSongMBZUAI - ChaoZhangGeorgia Tech + ChaoZhangGeorgia Tech 730-744 We study the problem of extracting N-ary relation tuples from scientific articles. This task is challenging because the target knowledge tuples can reside in multiple parts and modalities of the document. Our proposed method ReSel decomposes this task into a two-stage procedure that first retrieves the most relevant paragraph/table and then selects the target entity from the retrieved component. For the high-level retrieval stage, ReSel designs a simple and effective feature set, which captures multi-level lexical and semantic similarities between the query and components. For the low-level selection stage, ReSel designs a cross-modal entity correlation graph along with a multi-view architecture, which models both semantic and document-structural relations between entities. Our experiments on three scientific information extraction datasets show that ReSel outperforms state-of-the-art baselines significantly. 2022.emnlp-main.46 @@ -661,7 +661,7 @@ <fixed-case>G</fixed-case>amma<fixed-case>E</fixed-case>: Gamma Embeddings for Logical Queries on Knowledge Graphs - DongYangOPPO Guangdong Mobile Telecommunications Co., Ltd. + DongYangOPPO Guangdong Mobile Telecommunications Co., Ltd. PeijunQingXidian University YangLiThe Hong Kong Polytechnic University HaonanLuOPPO Guangdong Mobile Telecommunications Co., Ltd. @@ -709,9 +709,9 @@ YifanChenUniversity of Illinois Urbana-Champaign DevamanyuHazarikaAmazon MahdiNamazifarAmazon - YangLiuAmazon + YangLiuAmazon DiJinAmazon - DilekHakkani-TurAmazon Alexa AI + DilekHakkani-TurAmazon Alexa AI 793-808 Prefix-tuning, or more generally continuous prompt tuning, has become an essential paradigm of parameter-efficient transfer learning. Using a large pre-trained language model (PLM), prefix-tuning can obtain strong performance by training only a small portion of parameters. In this paper, we propose to understand and further develop prefix-tuning through the kernel lens. Specifically, we make an analogy between prefixes and inducing variables in kernel methods and hypothesize that prefixes serving as inducing variables would improve their overall mechanism. From the kernel estimator perspective, we suggest a new variant of prefix-tuning—inducer-tuning, which shares the exact mechanism as prefix-tuning while leveraging the residual form found in adapter-tuning. This mitigates the initialization issue in prefix-tuning. Through comprehensive empirical experiments on natural language understanding and generation tasks, we demonstrate that inducer-tuning can close the performance gap between prefix-tuning and fine-tuning. 2022.emnlp-main.50 @@ -724,7 +724,7 @@ PuneetMathurUniversity of Maryland College Park GautamKunapuliVerisk Analytics RiyazBhatIBM IRL - ManishShrivastavaInternational Institute of Information Technology Hyderabad + ManishShrivastavaInternational Institute of Information Technology Hyderabad DineshManochaUniversity of Maryland ManeeshSinghMotive Technologies 809-824 @@ -736,10 +736,10 @@ <fixed-case>L</fixed-case>ight<fixed-case>EA</fixed-case>: A Scalable, Robust, and Interpretable Entity Alignment Framework via Three-view Label Propagation - XinMaoECNU + XinMaoECNU WentingWangBytedance Group YuanbinWuEast China Normal University - ManLanEast China Normal University + ManLanEast China Normal University 825-838 Entity Alignment (EA) aims to find equivalent entity pairs between KGs, which is the core step to bridging and integrating multi-source KGs. In this paper, we argue that existing complex EA methods inevitably inherit the inborn defects from their neural network lineage: poor interpretability and weak scalability. Inspired by recent studies, we reinvent the classical Label Propagation algorithm to effectively run on KGs and propose a neural-free EA framework — LightEA, consisting of three efficient components: (i) Random Orthogonal Label Generation, (ii) Three-view Label Propagation, and (iii) Sparse Sinkhorn Operation.According to the extensive experiments on public datasets, LightEA has impressive scalability, robustness, and interpretability. With a mere tenth of time consumption, LightEA achieves comparable results to state-of-the-art methods across all datasets and even surpasses them on many. Besides, due to the computational process of LightEA being entirely linear, we could trace the propagation process at each step and clearly explain how the entities are aligned. 2022.emnlp-main.52 @@ -908,7 +908,7 @@ MachelReidGoogle VictorZhongUniversity of Washington SuchinGururanganPaul G. Allen School of Computer Science; Meta AI - LukeZettlemoyerUniversity of Washington; Meta + LukeZettlemoyerUniversity of Washington; Meta 964-975 We present M2D2, a fine-grained, massively multi-domain corpus for studying domain adaptation in language models (LMs). M2D2 consists of 8.5B tokens and spans 145 domains extracted from Wikipedia and Semantic Scholar. Using ontologies derived from Wikipedia and ArXiv categories, we organize the domains in each data source into 22 groups. This two-level hierarchy enables the study of relationships between domains and their effects on in- and out-of-domain performance after adaptation. We also present a number of insights into the nature of effective domain adaptation in LMs, as examples of the new types of studies M2D2 enables. To improve in-domain performance, we show the benefits of adapting the LM along a domain hierarchy; adapting to smaller amounts of fine-grained domain-specific data can lead to larger in-domain performance gains than larger amounts of weakly relevant data. We further demonstrate a trade-off between in-domain specialization and out-of-domain generalization within and across ontologies, as well as a strong correlation between out-of-domain performance and lexical overlap between domains. 2022.emnlp-main.63 @@ -975,8 +975,8 @@ <fixed-case>ELMER</fixed-case>: A Non-Autoregressive Pre-trained Language Model for Efficient and Effective Text Generation JunyiLiGaoling School of Artificial Intelligence, Renmin University of China TianyiTangRenmin University of China - Wayne XinZhaoRUC - Jian-YunNieUniversity of Montreal + Wayne XinZhaoRUC + Jian-YunNieUniversity of Montreal Ji-RongWenRenmin University of China 1044-1058 We study the text generation task under the approach of pre-trained language models (PLMs). Typically, an auto-regressive (AR) method is adopted for generating texts in a token-by-token manner. Despite many advantages of AR generation, it usually suffers from inefficient inference. Therefore, non-autoregressive (NAR) models are proposed to generate all target tokens simultaneously. However, NAR models usually generate texts of lower quality due to the absence of token dependency in the output text. In this paper, we propose ELMER: an efficient and effective PLM for NAR text generation to explicitly model the token dependency during NAR generation. By leveraging the early exit technique, ELMER enables the token generations at different layers, according to their prediction confidence (a more confident token will exit at a lower layer). Besides, we propose a novel pre-training objective, Layer Permutation Language Modeling, to pre-train ELMER by permuting the exit layer for each token in sequences. Experiments on three text generation tasks show that ELMER significantly outperforms NAR models and further narrows the performance gap with AR PLMs (ELMER (29.92) vs BART (30.61) ROUGE-L in XSUM) while achieving over 10 times inference speedup. @@ -1042,7 +1042,7 @@ AlonHalfonIBM Research EyalShnarchIBM Research YotamPerlitzIBM - LiatEin-DorIBM Research + LiatEin-DorIBM Research NoamSlonimIBM Research 1107-1119 Recent advances in large pretrained language models have increased attention to zero-shot text classification. In particular, models finetuned on natural language inference datasets have been widely adopted as zero-shot classifiers due to their promising results and off-the-shelf availability. However, the fact that such models are unfamiliar with the target task can lead to instability and performance issues. We propose a plug-and-play method to bridge this gap using a simple self-training approach, requiring only the class names along with an unlabeled dataset, and without the need for domain expertise or trial and error. We show that fine-tuning the zero-shot classifier on its most confident predictions leads to significant performance gains across a wide range of text classification tasks, presumably since self-training adapts the zero-shot model to the task at hand. @@ -1070,7 +1070,7 @@ Richard YuanzhePangNew York University AngelicaChenNew York University JasonPhangNew York University - Samuel R.BowmanNew York University + Samuel R.BowmanNew York University 1139-1156 Summarization datasets are often assembled either by scraping naturally occurring public-domain summaries—which are nearly always in difficult-to-work-with technical domains—or by using approximate heuristics to extract them from everyday text—which frequently yields unfaithful summaries. In this work, we turn to a slower but more straightforward approach to developing summarization benchmark data: We hire highly-qualified contractors to read stories and write original summaries from scratch. To amortize reading time, we collect five summaries per document, with the first giving an overview and the subsequent four addressing specific questions. We use this protocol to collect SQuALITY, a dataset of question-focused summaries built on the same public-domain short stories as the multiple-choice dataset QuALITY (Pang et al., 2021). Experiments with state-of-the-art summarization systems show that our dataset is challenging and that existing automatic evaluation metrics are weak indicators of quality. 2022.emnlp-main.75 @@ -1129,8 +1129,8 @@ NiketTandonAllen Institute for Artificial Intelligence TanviAggarwalStony Brook University HoraceLiuStony Brook University - NathanaelChambersUS Naval Academy - RaymondMooneyUniversity of Texas at Austin + NathanaelChambersUS Naval Academy + RaymondMooneyUniversity of Texas at Austin NiranjanBalasubramanianStony Brook University 1204-1219 Answering questions in narratives about why events happened often requires commonsense knowledge external to the text. What aspects of this knowledge are available in large language models? What aspects can be made accessible via external commonsense resources? We study these questions in the context of answering questions in the TellMeWhy dataset using COMET as a source of relevant commonsense relations. We analyze the effects of model size (T5 and GPT3) along with methods of injecting knowledge (COMET) into these models. Results show that the largest models, as expected, yield substantial improvements over base models. Injecting external knowledge helps models of various sizes, but the amount of improvement decreases with larger model size. We also find that the format in which knowledge is provided is critical, and that smaller models benefit more from larger amounts of knowledge. Finally, we develop an ontology of knowledge types and analyze the relative coverage of the models across these categories. @@ -1193,7 +1193,7 @@ ZiqiaoMaUniversity of Michigan KeunwooYuUniversity Of Michigan YuweiBaoUniversity of Michigan - JoyceChaiUniversity of Michigan + JoyceChaiUniversity of Michigan 1280-1298 Recent years have seen an increasing amount of work on embodied AI agents that can perform tasks by following human language instructions. However, most of these agents are reactive, meaning that they simply learn and imitate behaviors encountered in the training data. These reactive agents are insufficient for long-horizon complex tasks. To address this limitation, we propose a neuro-symbolic deliberative agent that, while following language instructions, proactively applies reasoning and planning based on its neural and symbolic representations acquired from past experience (e.g., natural language and egocentric vision). We show that our deliberative agent achieves greater than 70% improvement over reactive baselines on the challenging TEACh benchmark. Moreover, the underlying reasoning and planning processes, together with our modular framework, offer impressive transparency and explainability to the behaviors of the agent. This enables an in-depth understanding of the agent’s capabilities, which shed light on challenges and opportunities for future embodied agents for instruction following. The code is available at https://github.com/sled-group/DANLI. 2022.emnlp-main.83 @@ -1277,7 +1277,7 @@ Geographic Citation Gaps in <fixed-case>NLP</fixed-case> Research MukundRungtaGeorgia Institute of Technology JanvijaySinghGeorgia Institute of Technology - Saif M.MohammadNational Research Council Canada + Saif M.MohammadNational Research Council Canada DiyiYangStanford University 1371-1383 In a fair world, people have equitable opportunities to education, to conduct scientific research, to publish, and to get credit for their work, regardless of where they live. However, it is common knowledge among researchers that a vast number of papers accepted at top NLP venues come from a handful of western countries and (lately) China; whereas, very few papers from Africa and South America get published. Similar disparities are also believed to exist for paper citation counts. In the spirit of “what we do not measure, we cannot improve”, this work asks a series of questions on the relationship between geographical location and publication success (acceptance in top NLP venues and citation impact). We first created a dataset of 70,000 papers from the ACL Anthology, extracted their meta-information, andgenerated their citation network. We then show that not only are there substantial geographical disparities in paper acceptance and citation but also that these disparities persist even when controlling for a number of variables such as venue of publication and sub-field of NLP. Further, despite some steps taken by the NLP community to improve geographical diversity, we show that the disparity in publication metrics across locations is still on an increasing trend since the early 2000s. We release our code and dataset here: https://github.com/iamjanvijay/acl-cite-net @@ -1362,7 +1362,7 @@ YueYuGeorgia Institute of Technology ChenyanXiongMicrosoft Research SiSunTsinghua University - ChaoZhangGeorgia Tech + ChaoZhangGeorgia Tech ArnoldOverwijkMicrosoft 1462-1479 We present a new zero-shot dense retrieval (ZeroDR) method, COCO-DR, to improve the generalization ability of dense retrieval by combating the distribution shifts between source training tasks and target scenarios. To mitigate the impact of document differences, COCO-DR continues pretraining the language model on the target corpora to adapt the model to target distributions via COtinuous COtrastive learning. To prepare for unseen target queries, COCO-DR leverages implicit Distributionally Robust Optimization (iDRO) to reweight samples from different source query clusters for improving model robustness over rare queries during fine-tuning. COCO-DR achieves superior average performance on BEIR, the zero-shot retrieval benchmark. At BERT_Base scale, COCO-DR Base outperforms other ZeroDR models with 60x larger size. At BERT_Large scale, COCO-DR Large outperforms the giant GPT-3 embedding model which has 500x more parameters. Our analysis shows the correlation between COCO-DR’s effectiveness in combating distribution shifts and improving zero-shot accuracy. Our code and model can be found at https://github.com/OpenMatch/COCO-DR. @@ -1376,8 +1376,8 @@ LiliangRenUniversity of Illinois, Urbana Champaign ZixuanZhangUniversity of Illinois Urbana-Champaign HanWangAmazon - ClareVossArmy Research Laboratory - ChengXiangZhaiUniversity of Illinois at Urbana-Champaign + ClareVossArmy Research Laboratory + ChengXiangZhaiUniversity of Illinois at Urbana-Champaign HengJiUniversity of Illinois at Urbana-Champaign and Amazon (Amazon Scholar) 1480-1494 Modern large-scale Pre-trained Language Models (PLMs) have achieved tremendous success on a wide range of downstream tasks. However, most of the LM pre-training objectives only focus on text reconstruction, but have not sought to learn latent-level interpretable representations of sentences. In this paper, we manage to push the language models to obtain a deeper understanding of sentences by proposing a new pre-training objective, Sparse Latent Typing, which enables the model to sparsely extract sentence-level keywords with diverse latent types. Experimental results show that our model is able to learn interpretable latent type categories in a self-supervised manner without using any external knowledge. Besides, the language model pre-trained with such an objective also significantly improves Information Extraction related downstream tasks in both supervised and few-shot settings. Our code is publicly available at https://github.com/renll/SparseLT. @@ -1434,7 +1434,7 @@ EmmanuelleSalinLIS, Aix Marseille Université StephaneAyacheAix-Marseille University AbdellahFourtassiAix-Marseille University - BenoitFavreAix-Marseille University LIS/CNRS + BenoitFavreAix-Marseille University LIS/CNRS 1538-1555 Recent advances in vision-and-language modeling have seen the development of Transformer architectures that achieve remarkable performance on multimodal reasoning tasks.Yet, the exact capabilities of these black-box models are still poorly understood. While much of previous work has focused on studying their ability to learn meaning at the word-level, their ability to track syntactic dependencies between words has received less attention.We take a first step in closing this gap by creating a new multimodal task targeted at evaluating understanding of predicate-noun dependencies in a controlled setup.We evaluate a range of state-of-the-art models and find that their performance on the task varies considerably, with some models performing relatively well and others at chance level. In an effort to explain this variability, our analyses indicate that the quality (and not only sheer quantity) of pretraining data is essential. Additionally, the best performing models leverage fine-grained multimodal pretraining objectives in addition to the standard image-text matching objectives.This study highlights that targeted and controlled evaluations are a crucial step for a precise and rigorous test of the multimodal knowledge of vision-and-language models. 2022.emnlp-main.100 @@ -1459,7 +1459,7 @@ SilviaSeveriniLudwig-Maximilians-Universität MasoudJalili SabetCenter for Information and Speech Processing, Ludwig Maximilian University of Munich FrançoisYvonLISN CNRS & Univ. Paris Saclay - HinrichSchützeCenter for Information and Language Processing, University of Munich + HinrichSchützeCenter for Information and Language Processing, University of Munich 1577-1589 Part-of-Speech (POS) tagging is an important component of the NLP pipeline, but many low-resource languages lack labeled data for training. An established method for training a POS tagger in such a scenario is to create a labeled training set by transferring from high-resource languages. In this paper, we propose a novel method for transferring labels from multiple high-resource source to low-resource target languages. We formalize POS tag projection as graph-based label propagation. Given translations of a sentence in multiple languages, we create a graph with words as nodes and alignment links as edges by aligning words for all language pairs. We then propagate node labels from source to target using a Graph Neural Network augmented with transformer layers. We show that our propagation creates training sets that allow us to train POS taggers for a diverse set of languages. When combined with enhanced contextualized embeddings, our method achieves a new state-of-the-art for unsupervised POS tagging of low-resource languages. 2022.emnlp-main.102 @@ -1559,7 +1559,7 @@ Learning Label Modular Prompts for Text Classification in the Wild HailinChenNTU AmritaSahaSalesforce Research - ShafiqJotyNanyang Technological University; Salesforce AI Research + ShafiqJotyNanyang Technological University; Salesforce AI Research Steven C.H.HoiSalesforce 1677-1690 Machine learning models usually assume i.i.d data during training and testing, but data and tasks in real world often change over time. To emulate the transient nature of real world, we propose a challenging but practical task: text classification in-the-wild, which introduces different non-stationary training/testing stages. Decomposing a complex task into modular components can enable robust generalisation under such non-stationary environment. However, current modular approaches in NLP do not take advantage of recent advances in parameter efficient tuning of pretrained language models. To close this gap, we propose ModularPrompt, a label-modular prompt tuning framework for text classification tasks. In ModularPrompt, the input prompt consists of a sequence of soft label prompts, each encoding modular knowledge related to the corresponding class label. In two of most formidable settings, ModularPrompt outperforms relevant baselines by a large margin demonstrating strong generalisation ability. We also conduct comprehensive analysis to validate whether the learned prompts satisfy properties of a modular representation. @@ -1645,7 +1645,7 @@ AnanthAgarwalStanford University PatrickLiuStanford University ChelseaFinnStanford University - ChristopherManningStanford University + ChristopherManningStanford University 1754-1768 While large pre-trained language models are powerful, their predictions often lack logical consistency across test inputs. For example, a state-of-the-art Macaw question-answering (QA) model answers <i>Yes</i> to <i>Is a sparrow a bird?</i> and <i>Does a bird have feet?</i> but answers <i>No</i> to <i>Does a sparrow have feet?</i>. To address this failure mode, we propose a framework, Consistency Correction through Relation Detection, or <b>ConCoRD</b>, for boosting the consistency and accuracy of pre-trained NLP models using pre-trained natural language inference (NLI) models without fine-tuning or re-training. Given a batch of test inputs, ConCoRD samples several candidate outputs for each input and instantiates a factor graph that accounts for both the model’s belief about the likelihood of each answer choice in isolation and the NLI model’s beliefs about pair-wise answer choice compatibility. We show that a weighted MaxSAT solver can efficiently compute high-quality answer choices under this factor graph, improving over the raw model’s predictions. Our experiments demonstrate that ConCoRD consistently boosts accuracy and consistency of off-the-shelf closed-book QA and VQA models using off-the-shelf NLI models, notably increasing accuracy of LXMERT on ConVQA by 5% absolute. See the project website (https://ericmitchell.ai/emnlp-2022-concord/) for code and data. 2022.emnlp-main.115 @@ -1697,7 +1697,7 @@ FatemehsadatMireshghallahUC San Diego ArchitUniyalPanjab University TianhaoWangUniversity of Virginia - DavidEvansUniversity of Virginia + DavidEvansUniversity of Virginia TaylorBerg-KirkpatrickUniversity of California San Diego 1816-1826 Large language models are shown to present privacy risks through memorization of training data, andseveral recent works have studied such risks for the pre-training phase. Little attention, however, has been given to the fine-tuning phase and it is not well understood how different fine-tuning methods (such as fine-tuning the full model, the model head, and adapter) compare in terms of memorization risk. This presents increasing concern as the “pre-train and fine-tune” paradigm proliferates. In this paper, we empirically study memorization of fine-tuning methods using membership inference and extraction attacks, and show that their susceptibility to attacks is very different. We observe that fine-tuning the head of the model has the highest susceptibility to attacks, whereas fine-tuning smaller adapters appears to be less vulnerable to known extraction attacks. @@ -1766,8 +1766,8 @@ Stop Measuring Calibration When Humans Disagree JorisBaanUniversity of Amsterdam WilkerAzizUniversity of Amsterdam - BarbaraPlankLMU Munich - RaquelFernandezILLC, University of Amsterdam + BarbaraPlankLMU Munich + RaquelFernandezILLC, University of Amsterdam 1892-1915 Calibration is a popular framework to evaluate whether a classifier knows when it does not know - i.e., its predictive probabilities are a good indication of how likely a prediction is to be correct. Correctness is commonly estimated against the human majority class. Recently, calibration to human majority has been measured on tasks where humans inherently disagree about which class applies. We show that measuring calibration to human majority given inherent disagreements is theoretically problematic, demonstrate this empirically on the ChaosNLI dataset, and derive several instance-level measures of calibration that capture key statistical properties of human judgements - including class frequency, ranking and entropy. 2022.emnlp-main.124 @@ -1780,7 +1780,7 @@ ArminehNourbakhshCMU, JP Morgan Chase CathyJiaoCMU SameenaShahJP Morgan - CarolynRoséCarnegie Mellon University + CarolynRoséCarnegie Mellon University 1916-1932 Quantitative reasoning is an important aspect of question answering, especially when numeric and verbal cues interact to indicate sophisticated, multi-step programs. In this paper, we demonstrate how modeling the compositional nature of quantitative text can enhance the performance and robustness of QA models, allowing them to capture arithmetic logic that is expressed verbally. Borrowing from the literature on semantic parsing, we propose a method that encourages the QA models to adjust their attention patterns and capture input/output alignments that are meaningful to the reasoning task. We show how this strategy improves program accuracy and renders the models more robust against overfitting as the number of reasoning steps grows. Our approach is designed as a standalone module which can be prepended to many existing models and trained in an end-to-end fashion without the need for additional supervisory signal. As part of this exercise, we also create a unified dataset building on four previously released numerical QA datasets over tabular data. 2022.emnlp-main.125 @@ -1792,7 +1792,7 @@ A Comprehensive Comparison of Neural Networks as Cognitive Models of Inflection AdamWiemerslageUniversity of Colorado Boulder ShiranDudyUniversity of Colorado - KatharinaKannUniversity of Colorado Boulder + KatharinaKannUniversity of Colorado Boulder 1933-1945 Neural networks have long been at the center of a debate around the cognitive mechanism by which humans process inflectional morphology. This debate has gravitated into NLP by way of the question: Are neural networks a feasible account for human behavior in morphological inflection?We address that question by measuring the correlation between human judgments and neural network probabilities for unknown word inflections. We test a larger range of architectures than previously studied on two important tasks for the cognitive processing debate: English past tense, and German number inflection. We find evidence that the Transformer may be a better account of human behavior than LSTMs on these datasets, and that LSTM features known to increase inflection accuracy do not always result in more human-like behavior. 2022.emnlp-main.126 @@ -1829,7 +1829,7 @@ EvangeliaSpiliopoulouCarnegie Mellon University ArtidoroPagnoniUniversity of Washington YonatanBiskCarnegie Mellon University - EduardHovyUniversity of Melbourne + EduardHovyUniversity of Melbourne 1982-1997 This paper investigates models of event implications. Specifically, how well models predict entity state-changes, by targeting their understanding of physical attributes. Nominally, Large Language models (LLM) have been exposed to procedural knowledge about how objects interact, yet our benchmarking shows they fail to reason about the world. Conversely, we also demonstrate that existing approaches often misrepresent the surprising abilities of LLMs via improper task encodings and that proper model prompting can dramatically improve performance of reported baseline results across multiple tasks. In particular, our results indicate that our prompting technique is especially useful for unseen attributes (out-of-domain) or when only limited data is available. 2022.emnlp-main.129 @@ -1854,7 +1854,7 @@ Towards a Unified Multi-Dimensional Evaluator for Text Generation MingZhongUniversity of Illinois at Urbana-Champaign - YangLiuMicrosoft + YangLiuMicrosoft DaYinUniversity of California, Los Angeles (UCLA) YuningMaoMeta Platforms, Inc. YizhuJiaoUniversity of Illinois Urbana-Champaign @@ -1897,7 +1897,7 @@ Entailer: Answering Questions with Faithful and Truthful Chains of Reasoning OyvindTafjordAI2 - BhavanaDalvi MishraAllen Institute for Artificial Intelligence + BhavanaDalvi MishraAllen Institute for Artificial Intelligence PeterClarkAllen Institute for Artificial Intelligence 2078-2093 Our goal is a question-answering (QA) system that can show how its answers are implied by its own internal beliefs via a systematic chain of reasoning. Such a capability would allow better understanding of why a model produced the answer it did. Our approach is to recursively combine a trained backward-chainingmodel, capable of generating a set of premises entailing an answer hypothesis, with a verifier that checks that the model itself believes those premises (and the entailment itself) through self-querying. To our knowledge, this is the first system to generate multistep chains that are both faithful (the answer follows from the reasoning) and truthful (the chain reflects the system’s own internal beliefs). In evaluation using two different datasets, users judge that a majority (70%+) of generated chains clearly show how an answer follows from a set of facts - substantially better than a high-performance baseline - while preserving answer accuracy. By materializing model beliefs that systematically support an answer, new opportunities arise for understanding the model’s system of belief, and diagnosing and correcting its misunderstandings when an answer is wrong. @@ -1930,7 +1930,7 @@ PascaleFungHong Kong University of Science and Technology LambertMathiasFacebook AsliCelikyilmazFAIR @ Meta - MonaDiabMeta Responsible AI + MonaDiabMeta Responsible AI 2109-2120 Hate speech detection is complex; it relies on commonsense reasoning, knowledge of stereotypes, and an understanding of social nuance that differs from one culture to the next. It is also difficult to collect a large-scale hate speech annotated dataset. In this work, we frame this problem as a few-shot learning task, and show significant gains with decomposing the task into its “constituent” parts. In addition, we see that infusing knowledge from reasoning datasets (e.g. ATOMIC2020) improves the performance even further. Moreover, we observe that the trained models generalize to out-of-distribution datasets, showing the superiority of task decomposition and knowledge infusion compared to previously used methods. Concretely, our method outperforms the baseline by 17.83% absolute gain in the 16-shot case. 2022.emnlp-main.136 @@ -2000,7 +2000,7 @@ Prompt-and-Rerank: A Method for Zero-Shot and Few-Shot Arbitrary Textual Style Transfer with Small Language Models MiracSuzgunStanford University LukeMelas-KyriaziOxford University - DanJurafskyStanford University + DanJurafskyStanford University 2195-2222 We propose a method for arbitrary textual style transfer (TST)—the task of transforming a text into any given style—utilizing general-purpose pre-trained language models. Our method, Prompt-and-Rerank, is based on a mathematical formulation of the TST task, decomposing it into three constituent components: textual similarity, target style strength, and fluency. Our method uses zero-shot or few-shot prompting to obtain a set of candidate generations in the target style, and then re-ranks them according to the three components. Our method enables small pre-trained language models to perform on par with state-of-the-art large-scale models while using two orders of magnitude less compute and memory. We also investigate the effect of model size and prompt design (e.g., prompt paraphrasing and delimiter-pair choice) on style transfer quality across seven diverse textual style transfer datasets, finding, among other things, that delimiter-pair choice has a large impact on performance, and that models have biases on the direction of style transfer. 2022.emnlp-main.141 @@ -2169,7 +2169,7 @@ MalikAltakroriMcGill University /Mila ThomasScialomMeta AI Benjamin C. M.FungMcGill University - Jackie Chi KitCheungMila / McGill University + Jackie Chi KitCheungMila / McGill University 2391-2406 Authorship obfuscation techniques have commonly been evaluated based on their ability to hide the author’s identity (evasion) while preserving the content of the original text. However, to avoid overstating the systems’ effectiveness, evasion detection must be evaluated using competitive identification techniques in settings that mimic real-life scenarios, and the outcomes of the content-preservation evaluation have to be interpretable by potential users of these obfuscation tools. Motivated by recent work on cross-topic authorship identification and content preservation in summarization, we re-evaluate different authorship obfuscation techniques on detection evasion and content preservation. Furthermore, we propose a new information-theoretic measure to characterize the misattribution harm that can be caused by detection evasion. Our results reveal key weaknesses in state-of-the-art obfuscation techniques and a surprisingly competitive effectiveness from a back-translation baseline in all evaluation aspects. 2022.emnlp-main.153 @@ -2184,7 +2184,7 @@ MelanieSubbiahColumbia University LydiaChiltonColumbia University DesmondPattonColumbia University - KathleenMcKeownColumbia University and Amazon (Amazon Scholar) + KathleenMcKeownColumbia University and Amazon (Amazon Scholar) William YangWangUnversity of California, Santa Barbara 2407-2421 Understanding what constitutes safe text is an important issue in natural language processing and can often prevent the deployment of models deemed harmful and unsafe. One such type of safety that has been scarcely studied is commonsense physical safety, i.e. text that is not explicitly violent and requires additional commonsense knowledge to comprehend that it leads to physical harm. We create the first benchmark dataset, SafeText, comprising real-life scenarios with paired safe and physically unsafe pieces of advice. We utilize SafeText to empirically study commonsense physical safety across various models designed for text generation and commonsense reasoning tasks. We find that state-of-the-art large language models are susceptible to the generation of unsafe text and have difficulty rejecting unsafe advice. As a result, we argue for further studies of safety and the assessment of commonsense physical safety in models before release. @@ -2197,7 +2197,7 @@ Ground-Truth Labels Matter: A Deeper Look into Input-Label Demonstrations Kang MinYooNAVER AI Lab JunyeobKimSeoul National University - Hyuhng JoonKimSeoul National University + Hyuhng JoonKimSeoul National University HyunsooChoSeoul National University HwiyeolJoClova AI, Naver Sang-WooLeeNAVER Clova @@ -2345,8 +2345,8 @@ EmilyGadeAssistant Professor of Political Science LeroyWangUniversity of Washington ZeyuWangUniversity of Washington - LukeZettlemoyerUniversity of Washington; Meta - Noah A.SmithUniversity of Washington + LukeZettlemoyerUniversity of Washington; Meta + Noah A.SmithUniversity of Washington 2562-2580 Language models increasingly rely on massive web crawls for diverse text data. However, these sources are rife with undesirable content. As such, resources like Wikipedia, books, and news often serve as anchors for automatically selecting web text most suitable for language modeling, a process typically referred to as quality filtering. Using a new dataset of U.S. high school newspaper articles—written by students from across the country—we investigate whose language is preferred by the quality filter used for GPT-3. We find that newspapers from larger schools, located in wealthier, educated, and urban zones (ZIP codes) are more likely to be classified as high quality. We also show that this quality measurement is unaligned with other sensible metrics, such as factuality or literary acclaim. We argue that privileging any corpus as high quality entails a language ideology, and more care is needed to construct training corpora for language models, with better transparency and justification for the inclusion or exclusion of various texts. 2022.emnlp-main.165 @@ -2360,7 +2360,7 @@ YangDengThe Chinese University of Hong Kong WenqiangLeiSichuan University WenlongZhaocuhk - Tat-SengChuaNational University of Singapore + Tat-SengChuaNational University of Singapore WaiLamThe Chinese University of Hong Kong 2581-2594 We study automatic Contract Clause Extraction (CCE) by modeling implicit relations in legal contracts. Existing CCE methods mostly treat contracts as plain text, creating a substantial barrier to understanding contracts of high complexity. In this work, we first comprehensively analyze the complexity issues of contracts and distill out three implicit relations commonly found in contracts, namely, 1) Long-range Context Relation that captures the correlations of distant clauses; 2) Term-Definition Relation that captures the relation between important terms with their corresponding definitions, and 3) Similar Clause Relation that captures the similarities between clauses of the same type. Then we propose a novel framework ConReader to exploit the above three relations for better contract understanding and improving CCE. Experimental results show that ConReader makes the prediction more interpretable and achieves new state-of-the-art on two CCE tasks in both conventional and zero-shot settings. @@ -2398,7 +2398,7 @@ Transfer Learning from Semantic Role Labeling to Event Argument Extraction with Template-based Slot Querying ZhisongZhangCarnegie Mellon University EmmaStrubellCarnegie Mellon University - EduardHovyUniversity of Melbourne + EduardHovyUniversity of Melbourne 2627-2647 In this work, we investigate transfer learning from semantic role labeling (SRL) to event argument extraction (EAE), considering their similar argument structures. We view the extraction task as a role querying problem, unifying various methods into a single framework. There are key discrepancies on role labels and distant arguments between semantic role and event argument annotations. To mitigate these discrepancies, we specify natural language-like queries to tackle the label mismatch problem and devise argument augmentation to recover distant arguments. We show that SRL annotations can serve as a valuable resource for EAE, and a template-based slot querying strategy is especially effective for facilitating the transfer. In extensive evaluations on two English EAE benchmarks, our proposed model obtains impressive zero-shot results by leveraging SRL annotations, reaching nearly 80% of the fullysupervised scores. It further provides benefits in low-resource cases, where few EAE annotations are available. Moreover, we show that our approach generalizes to cross-domain and multilingual scenarios. 2022.emnlp-main.169 @@ -2408,7 +2408,7 @@ Calibrating Zero-shot Cross-lingual (Un-)structured Predictions - ZhengpingJiangJohns Hopkins University + ZhengpingJiangJohns Hopkins University AnqiLiuJHU BenjaminVan DurmeJohns Hopkins University / Microsoft 2648-2674 @@ -2451,7 +2451,7 @@ Measuring Context-Word Biases in Lexical Semantic Datasets QianchuLiuUniversity of Cambridge - DianaMcCarthyUniversity of Cambridge (DTAL) + DianaMcCarthyUniversity of Cambridge (DTAL) AnnaKorhonenUniversity of Cambridge 2699-2713 State-of-the-art pretrained contextualized models (PCM) eg. BERT use tasks such as WiC and WSD to evaluate their word-in-context representations. This inherently assumes that performance in these tasks reflect how well a model represents the coupled word and context semantics. We question this assumption by presenting the first quantitative analysis on the context-word interaction being tested in major contextual lexical semantic tasks. To achieve this, we run probing baselines on masked input, and propose measures to calculate and visualize the degree of context or word biases in existing datasets. The analysis was performed on both models and humans. Our findings demonstrate that models are usually not being tested for word-in-context semantics in the same way as humans are in these tasks, which helps us better understand the model-human gap. Specifically, to PCMs, most existing datasets fall into the extreme ends (the retrieval-based tasks exhibit strong target word bias while WiC-style tasks and WSD show strong context bias); In comparison, humans are less biased and achieve much better performance when both word and context are available than with masked input. We recommend our framework for understanding and controlling these biases for model interpretation and future task design. @@ -2488,7 +2488,7 @@ Mitigating Data Sparsity for Short Text Topic Modeling by Topic-Semantic Contrastive Learning XiaobaoWuNanyang Technological University - Anh TuanLuuNanyang Technological University, Singapore + Anh TuanLuuNanyang Technological University, Singapore XinshuaiDongNanyang Technological University 2748-2760 To overcome the data sparsity issue in short text topic modeling, existing methods commonly rely on data augmentation or the data characteristic of short texts to introduce more word co-occurrence information. However, most of them do not make full use of the augmented data or the data characteristic: they insufficiently learn the relations among samples in data, leading to dissimilar topic distributions of semantically similar text pairs. To better address data sparsity, in this paper we propose a novel short text topic modeling framework, Topic-Semantic Contrastive Topic Model (TSCTM). To sufficiently model the relations among samples, we employ a new contrastive learning method with efficient positive and negative sampling strategies based on topic semantics. This contrastive learning method refines the representations, enriches the learning signals, and thus mitigates the sparsity issue. Extensive experimental results show that our TSCTM outperforms state-of-the-art baselines regardless of the data augmentation availability, producing high-quality topics and topic distributions. @@ -2745,7 +2745,7 @@ Conformal Predictor for Improving Zero-Shot Text Classification Efficiency - Prafulla KumarChoubeySalesforce AI Research + Prafulla KumarChoubeySalesforce AI Research YuBaiSalesforce AI Research Chien-ShengWuSalesforce WenhaoLiuSalesforce Research @@ -2777,7 +2777,7 @@ AkhilKediaSamsung Electronics JongwonLeeSamsung Research AshwinParanjapeStanford University - ChristopherManningStanford University + ChristopherManningStanford University Kyoung-GuWooGrowdle Corporation 3047-3060 Recent approaches to Open-domain Question Answering refer to an external knowledge base using a retriever model, optionally rerank passages with a separate reranker model and generate an answer using another reader model. Despite performing related tasks, the models have separate parameters and are weakly-coupled during training. We propose casting the retriever and the reranker as internal passage-wise attention mechanisms applied sequentially within the transformer architecture and feeding computed representations to the reader, with the hidden representations progressively refined at each stage. This allows us to use a single question answering model trained end-to-end, which is a more efficient use of model capacity and also leads to better gradient flow. We present a pre-training method to effectively train this architecture and evaluate our model on the Natural Questions and TriviaQA open datasets. For a fixed parameter budget, our model outperforms the previous state-of-the-art model by 1.0 and 0.7 exact match scores. @@ -2814,7 +2814,7 @@ Opinion Summarization by Weak-Supervision from Mix-structured Data YizhuLiuShanghai Jiao Tong University QiJiaShanghai Jiao Tong University - KennyZhuShanghai Jiao Tong University + KennyZhuShanghai Jiao Tong University 3086-3096 Opinion summarization of multiple reviews suffers from the lack of reference summaries for training.Most previous approaches construct multiple reviews and their summary based on textual similarities between reviews,resulting in information mismatch between the review input and the summary. In this paper, we convert each review into a mixof structured and unstructured data, which we call opinion-aspect pairs (OAs) and implicit sentences (ISs).We propose a new method to synthesize training pairs of such mix-structured data as input and the textual summary as output,and design a summarization model with OA encoder and IS encoder.Experiments show that our approach outperforms previous methods on Yelp, Amazon and RottenTomatos datasets. 2022.emnlp-main.201 @@ -2825,7 +2825,7 @@ Multi-level Distillation of Semantic Knowledge for Pre-training Multilingual Language Model MingqiLiClemson University FeiDingClemson University - DanZhangClemson University + DanZhangClemson University LongChengClemson University HongxinHuUniversity at Buffalo, SUNY FengLuoClemson University @@ -2878,7 +2878,7 @@ BaoxingHuaiHuawei Technologies XinJiangHuawei Noah’s Ark Lab QunLiuHuawei Noah’s Ark Lab - PhillippeLanglaisUniversité de Montréal + PhillippeLanglaisUniversité de Montréal 3135-3151 There is a growing body of work in recent years to develop pre-trained language models (PLMs) for the Arabic language. This work addresses two major problems in existing Arabic PLMs that limit the progress of the Arabic NLU and NLG fields. First, existing Arabic PLMs are not well-explored and their pre-training can be improved significantly using a more methodical approach. Second, there is a lack of systematic and reproducible evaluation of these models in the literature. We revisit both the pre-training and evaluation of Arabic PLMs. In terms of pre-training, we explore the impact of the quality of the pretraining data, the size of the model, and the incorporation of character-level information on Arabic PLM. As a result, we release three new Arabic BERT-style models ( JABER, Char-JABER, and SABER), and two T5-style models (AT5S and AT5B). In terms of evaluation, we conduct a comprehensive empirical study to systematically evaluate the performance of existing state-of-the-art models on ALUE, a leaderboard-powered benchmark for Arabic NLU tasks, and on a subset of the Arabic generative tasks. We show that our models significantly outperform existing Arabic PLMs and achieve a new state-of-the-art performance on discriminative and generative Arabic NLU and NLG tasks. Our models and source code to reproduce results will be made available upon acceptance. 2022.emnlp-main.205 @@ -3002,7 +3002,7 @@ WeijiaShiucla.edu JulianMichaelUniversity of Washington SuchinGururanganPaul G. Allen School of Computer Science; Meta AI - LukeZettlemoyerUniversity of Washington + LukeZettlemoyerUniversity of Washington 3254-3265 Retrieval-augmented language models (LMs) use non-parametric memory to substantially outperform their non-retrieval counterparts on perplexity-based evaluations, but it is an open question whether they achieve similar gains in few- and zero-shot end-task accuracy. We extensively study one such model, the k-nearest neighbor LM (kNN-LM), showing that the gains marginally transfer. The main challenge is to achieve coverage of the verbalizer tokens that define the different end-task class labels. To address this challenge, we also introduce kNN-Prompt, a simple and effective kNN-LM with automatically expanded fuzzy verbalizers (e.g. to expand “terrible” to also include “silly” and other task-specific synonyms for sentiment classification). Across nine diverse end-tasks, using kNN-Prompt with GPT-2 large yields significant performance boosts over strong zeroshot baselines (13.4% absolute improvement over the base LM on average). We also show that other advantages of non-parametric augmentation hold for end tasks; kNN-Prompt is effective for domain adaptation with no further training, and gains increase with the size of the retrieval model. 2022.emnlp-main.214 @@ -3037,7 +3037,7 @@ Making Pretrained Language Models Good Long-tailed Learners - ChenZhangBeijing Institute of Technology + ChenZhangBeijing Institute of Technology LeiRenMeituan-Dianping Group JingangWangMeituan WeiWumeituan @@ -3077,8 +3077,8 @@ <fixed-case>F</fixed-case>ine<fixed-case>D</fixed-case>-Eval: Fine-grained Automatic Dialogue-Level Evaluation - ChenZhangECE, National University of SIngapore - Luis FernandoD’HaroSpeech Technology and Machine Learning Group, ETSI de Telecomunicación, Universidad Politécnica de Madrid + ChenZhangECE, National University of SIngapore + Luis FernandoD’HaroSpeech Technology and Machine Learning Group, ETSI de Telecomunicación, Universidad Politécnica de Madrid QiquanZhangNational University of Singapore ThomasFriedrichsRobert Bosch (SEA) Pte Ltd HaizhouLiThe Chinese University of Hong Kong, Shenzhen @@ -3108,7 +3108,7 @@ HanGuoCMU TianminShuMIT MengSongUniversity of California, San Diego - EricXingCarnegie Mellon University + EricXingCarnegie Mellon University ZhitingHuUC San Diego 3369-3391 Prompting has shown impressive success in enabling large pre-trained language models (LMs) to perform diverse NLP tasks, especially with only few downstream data. Automatically finding the optimal prompt for each task, however, is challenging. Most existing work resorts to tuning *soft* prompts (e.g., embeddings) which fall short of interpretability, reusability across LMs, and applicability when gradients are not accessible. *Discrete* prompts, on the other hand, are difficult to optimize, and are often created by “enumeration (e.g., paraphrasing)-then-selection” heuristics that do not explore the prompt space systematically. This paper proposes RLPrompt, an efficient discrete prompt optimization approach with reinforcement learning (RL). RLPrompt formulates a parameter-efficient policy network that generates the optimized discrete prompt after training with reward. To harness the complex and stochastic reward signals from the large LM environment, we incorporate effective reward stabilization that substantially enhances training efficiency. RLPrompt is flexibly applicable to different types of LMs, such as masked (e.g., BERT) and left-to-right models (e.g., GPTs), for both classification and generation tasks. Experiments on few-shot classification and unsupervised text style transfer show superior performance over a wide range of existing fine-tuning or prompting methods. Interestingly, the resulting optimized prompts are often ungrammatical gibberish text; and surprisingly, those gibberish prompts are transferrable between different LMs to retain significant performance, indicating that LM prompting may not follow human language patterns. @@ -3197,7 +3197,7 @@ Discovering Differences in the Representation of People using Contextualized Semantic Axes - LiLucyUniversity of California, Berkeley + LiLucyUniversity of California, Berkeley DivyaTadimetiUniversity of California, Berkeley DavidBammanUniversity of California, Berkeley 3477-3494 @@ -3233,11 +3233,11 @@ Natural Language to Code Translation with Execution - FredaShiToyota Technological Institute at Chicago + FredaShiToyota Technological Institute at Chicago DanielFriedFacebook AI Research MarjanGhazvininejadFAIR - LukeZettlemoyerUniversity of Washington; Meta - Sida I.WangFacebook AI Research + LukeZettlemoyerUniversity of Washington; Meta + Sida I.WangFacebook AI Research 3533-3546 Generative models of code, pretrained on large corpora of programs, have shown great success in translating natural language to code (Chen et al., 2021; Austin et al., 2021; Li et al., 2022, inter alia). While these models do not explicitly incorporate program semantics (i.e., execution results) during training, they are able to generate correct solutions for many problems. However, choosing a single correct program from a generated set for each problem remains challenging. In this work, we introduce execution result–based minimum Bayes risk decoding (MBR-EXEC) for program selection and show that it improves the few-shot performance of pretrained code models on natural-language-to-code tasks. We select output programs from a generated candidate set by marginalizing over program implementations that share the same semantics. Because exact equivalence is intractable, we execute each program on a small number of test inputs to approximate semantic equivalence. Across datasets, execution or simulated execution significantly outperforms the methods that do not involve program semantics. We find that MBR-EXEC consistently improves over all execution-unaware selection methods, suggesting it as an effective approach for natural language to code translation. 2022.emnlp-main.231 @@ -3258,7 +3258,7 @@ Language Contamination Helps Explains the Cross-lingual Capabilities of <fixed-case>E</fixed-case>nglish Pretrained Models TerraBlevinsUniversity of Washington - LukeZettlemoyerUniversity of Washington; Meta + LukeZettlemoyerUniversity of Washington; Meta 3563-3574 English pretrained language models, which make up the backbone of many modern NLP systems, require huge amounts of unlabeled training data. These models are generally presented as being trained only on English text but have been found to transfer surprisingly well to other languages. We investigate this phenomenon and find that common English pretraining corpora actually contain significant amounts of non-English text: even when less than 1% of data is not English (well within the error rate of strong language classifiers), this leads to hundreds of millions of foreign language tokens in large-scale datasets. We then demonstrate that even these small percentages of non-English data facilitate cross-lingual transfer for models trained on them, with target language performance strongly correlated to the amount of in-language data seen during pretraining. In light of these findings, we argue that no model is truly monolingual when pretrained at scale, which should be considered when evaluating cross-lingual transfer. 2022.emnlp-main.233 @@ -3269,7 +3269,7 @@ Analyzing the Mono- and Cross-Lingual Pretraining Dynamics of Multilingual Language Models TerraBlevinsUniversity of Washington HilaGonenUW and FAIR - LukeZettlemoyerUniversity of Washington; Meta + LukeZettlemoyerUniversity of Washington; Meta 3575-3590 The emergent cross-lingual transfer seen in multilingual pretrained models has sparked significant interest in studying their behavior. However, because these analyses have focused on fully trained multilingual models, little is known about the dynamics of the multilingual pretraining process. We investigate when these models acquire their in-language and cross-lingual abilities by probing checkpoints taken from throughout XLM-R pretraining, using a suite of linguistic tasks. Our analysis shows that the model achieves high in-language performance early on, with lower-level linguistic skills acquired before more complex ones. In contrast, the point in pretraining when the model learns to transfer cross-lingually differs across language pairs. Interestingly, we also observe that, across many languages and tasks, the final model layer exhibits significant performance degradation over time, while linguistic knowledge propagates to lower layers of the network. Taken together, these insights highlight the complexity of multilingual pretraining and the resulting varied behavior for different languages over time. 2022.emnlp-main.234 @@ -3320,7 +3320,7 @@ JeredMcInerneyNortheastern University GeoffreyYoungBrigham and Women’s Hospital Jan-Willemvan de MeentNortheastern University, University of Amsterdam - ByronWallaceNortheastern University + ByronWallaceNortheastern University 3626-3648 Pretraining multimodal models on Electronic Health Records (EHRs) provides a means of learning representations that can transfer to downstream tasks with minimal supervision. Recent multimodal models induce soft local alignments between image regions and sentences. This is of particular interest in the medical domain, where alignments might highlight regions in an image relevant to specific phenomena described in free-text. While past work has suggested that attention “heatmaps” can be interpreted in this manner, there has been little evaluation of such alignments. We compare alignments from a state-of-the-art multimodal (image and text) model for EHR with human annotations that link image regions to sentences. Our main finding is that the text has an often weak or unintuitive influence on attention; alignments do not consistently reflect basic anatomical information. Moreover, synthetic modifications — such as substituting “left” for “right” — do not substantially influence highlights. Simple techniques such as allowing the model to opt out of attending to the image and few-shot finetuning show promise in terms of their ability to improve alignments with very little or no supervision. We make our code and checkpoints open-source. 2022.emnlp-main.238 @@ -3346,7 +3346,7 @@ ZhixingTanTsinghua University ZhaopengTuTencent AI Lab MaosongSunTsinghua University - YangLiuTsinghua University + YangLiuTsinghua University 3665-3679 Machine translation systems are expected to cope with various types of constraints in many practical scenarios. While neural machine translation (NMT) has achieved strong performance in unconstrained cases, it is non-trivial to impose pre-specified constraints into the translation process of NMT models. Although many approaches have been proposed to address this issue, most existing methods can not satisfy the following three desiderata at the same time: (1) high translation quality, (2) high match accuracy, and (3) low latency. In this work, we propose a template-based method that can yield results with high translation quality and match accuracy and the inference speed of our method is comparable with unconstrained NMT models. Our basic idea is to rearrange the generation of constrained and unconstrained tokens through a template. Our method does not require any changes in the model architecture and the decoding algorithm. Experimental results show that the proposed template-based approach can outperform several representative baselines in both lexically and structurally constrained translation tasks. 2022.emnlp-main.240 @@ -3396,7 +3396,7 @@ ZhenYangtencent.com FandongMengWeChat AI, Tencent YufengChenBeijing Jiaotong University - JinanXuBeijing Jiaotong University + JinanXuBeijing Jiaotong University JieZhouTencent Inc. 3715-3725 Word alignment which aims to extract lexicon translation equivalents between source and target sentences, serves as a fundamental tool for natural language processing. Recent studies in this area have yielded substantial improvements by generating alignments from contextualized embeddings of the pre-trained multilingual language models. However, we find that the existing approaches capture few interactions between the input sentence pairs, which degrades the word alignment quality severely, especially for the ambiguous words in the monolingual context. To remedy this problem, we propose Cross-Align to model deep interactions between the input sentence pairs, in which the source and target sentences are encoded separately with the shared self-attention modules in the shallow layers, while cross-lingual interactions are explicitly constructed by the cross-attention modules in the upper layers. Besides, to train our model effectively, we propose a two-stage training framework, where the model is trained with a simple Translation Language Modeling (TLM) objective in the first stage and then finetuned with a self-supervised alignment objective in the second stage. Experiments show that the proposed Cross-Align achieves the state-of-the-art (SOTA) performance on four out of five language pairs. @@ -3409,7 +3409,7 @@ TianxiangSunFudan University JunliangHeChongqing University XipengQiuFudan University - XuanjingHuangFudan University + XuanjingHuangFudan University 3726-3739 Automatic evaluation metrics are crucial to the development of generative systems. In recent years, pre-trained language model (PLM) based metrics, such as BERTScore, have been commonly adopted in various generation tasks. However, it has been demonstrated that PLMs encode a range of stereotypical societal biases, leading to a concern about the fairness of PLMs as metrics. To that end, this work presents the first systematic study on the social bias in PLM-based metrics. We demonstrate that popular PLM-based metrics exhibit significantly higher social bias than traditional metrics on 6 sensitive attributes, namely race, gender, religion, physical appearance, age, and socioeconomic status. In-depth analysis suggests that choosing paradigms (matching, regression, or generation) of the metric has a greater impact on fairness than choosing PLMs. In addition, we develop debiasing adapters that are injected into PLM layers, mitigating bias in PLM-based metrics while retaining high performance for evaluating text generation. 2022.emnlp-main.245 @@ -3434,9 +3434,9 @@ Not to Overfit or Underfit the Source Domains? An Empirical Study of Domain Generalization in Question Answering - Md ArafatSultanIBM Research AI - AviSilIBM Research AI - RaduFlorianIBM Research + Md ArafatSultanIBM Research AI + AviSilIBM Research AI + RaduFlorianIBM Research 3752-3761 Machine learning models are prone to overfitting their training (source) domains, which is commonly believed to be the reason why they falter in novel target domains. Here we examine the contrasting view that multi-source domain generalization (DG) is first and foremost a problem of mitigating source domain underfitting: models not adequately learning the signal already present in their multi-domain training data. Experiments on a reading comprehension DG benchmark show that as a model learns its source domains better—using familiar methods such as knowledge distillation (KD) from a bigger model—its zero-shot out-of-domain utility improves at an even faster pace. Improved source domain learning also demonstrates superior out-of-domain generalization over three popular existing DG approaches that aim to limit overfitting. Our implementation of KD-based domain generalization is available via PrimeQA at: https://ibm.biz/domain-generalization-with-kd. 2022.emnlp-main.247 @@ -3463,9 +3463,9 @@ MikeLewisFacebook AI Research MandarJoshiGoogle ArmenAghajanyanFacebook - Wen-tauYihFacebook AI Research + Wen-tauYihFacebook AI Research JoellePineauMcGill University - LukeZettlemoyerUniversity of Washington; Meta + LukeZettlemoyerUniversity of Washington; Meta 3781-3797 We propose a simple and effective re-ranking method for improving passage retrieval in open question answering. The re-ranker re-scores retrieved passages with a zero-shot question generation model, which uses a pre-trained language model to compute the probability of the input question conditioned on a retrieved passage. This approach can be applied on top of any retrieval method (e.g. neural or keyword-based), does not require any domain- or task-specific training (and therefore is expected to generalize better to data distribution shifts), and provides rich cross-attention between query and passage (i.e. it must explain every token in the question). When evaluated on a number of open-domain retrieval datasets, our re-ranker improves strong unsupervised retrieval models by 6%-18% absolute and strong supervised models by up to 12% in terms of top-20 passage retrieval accuracy. We also obtain new state-of-the-art results on full open-domain question answering by simply adding the new re-ranker to existing models with no further changes. 2022.emnlp-main.249 @@ -3475,7 +3475,7 @@ Summarizing Community-based Question-Answer Pairs Ting-YaoHsuPennsylvania State University - YoshiSuharaGrammarly + YoshiSuharaGrammarly XiaolanWangMegagon Labs 3798-3808 Community-based Question Answering (CQA), which allows users to acquire their desired information, has increasingly become an essential component of online services in various domains such as E-commerce, travel, and dining. However, an overwhelming number of CQA pairs makes it difficult for users without particular intent to find useful information spread over CQA pairs. To help users quickly digest the key information, we propose the novel CQA summarization task that aims to create a concise summary from CQA pairs. To this end, we first design a multi-stage data annotation process and create a benchmark dataset, COQASUM, based on the Amazon QA corpus. We then compare a collection of extractive and abstractive summarization methods and establish a strong baseline approach DedupLED for the CQA summarization task. Our experiment further confirms two key challenges, sentence-type transfer and deduplication removal, towards the CQA summarization task. Our data and code are publicly available. @@ -3510,7 +3510,7 @@ Chapter Ordering in Novels AllenKimStony Brook University - SteveSkienaStony Brook University + SteveSkienaStony Brook University 3838-3848 Understanding narrative flow and text coherence in long-form documents (novels) remains an open problem in NLP.To gain insight, we explore the task of chapter ordering, reconstructing the original order of chapters in novel given a random permutation of the text. This can be seen as extending the well-known sentence ordering task to vastly larger documents: our task deals with over 9,000 novels with an average of twenty chapters each, versus standard sentence ordering datasets averaging only 5-8 sentences. We formulate the task of reconstructing order as a constraint solving problem, using minimum feedback arc set and traveling salesman problem optimization criteria, where the weights of the graph are generated based on models for character occurrences and chapter boundary detection, using relational chapter scores derived from RoBERTa. Our best methods yield a Spearman correlation of 0.59 on this novel and challenging task, substantially above baseline. 2022.emnlp-main.253 @@ -3535,7 +3535,7 @@ Breno WilliamCarvalhoIBM Research IbrahimAbdelazizIBM Research PavanKapanipathiIBM Research - SalimRoukosIBM Research AI + SalimRoukosIBM Research AI AlexanderGrayIBM Research 3863-3875 Knowledge base completion (KBC) has benefitted greatly by learning explainable rules in an human-interpretable dialect such as first-order logic. Rule-based KBC has so far, mainly focussed on learning one of two types of rules: conjunction-of-disjunctions and disjunction-of-conjunctions. We qualitatively show, via examples, that one of these has an advantage over the other when it comes to achieving high quality KBC. To the best of our knowledge, we are the first to propose learning both kinds of rules within a common framework. To this end, we propose to utilize logical neural networks (LNN), a powerful neuro-symbolic AI framework that can express both kinds of rules and learn these end-to-end using gradient-based optimization. Our in-depth experiments show that our LNN-based approach to learning rules for KBC leads to roughly 10% relative improvements, if not more, over SotA rule-based KBC methods. Moreover, by showing how to combine our proposed methods with knowledge graph embeddings we further achieve an additional 7.5% relative improvement. @@ -3570,7 +3570,7 @@ Sparse Teachers Can Be Dense with Knowledge YiYangBeijing Institute of Technology - ChenZhangBeijing Institute of Technology + ChenZhangBeijing Institute of Technology DaweiSongBeijing Institute of Technology 3904-3915 Recent advances in distilling pretrained language models have discovered that, besides the expressiveness of knowledge, the student-friendliness should be taken into consideration to realize a truly knowledgeable teacher. Based on a pilot study, we find that over-parameterized teachers can produce expressive yet student-unfriendly knowledge and are thus limited in overall knowledgeableness. To remove the parameters that result in student-unfriendliness, we propose a sparse teacher trick under the guidance of an overall knowledgeable score for each teacher parameter. The knowledgeable score is essentially an interpolation of the expressiveness and student-friendliness scores. The aim is to ensure that the expressive parameters are retained while the student-unfriendly ones are removed. Extensive experiments on the GLUE benchmark show that the proposed sparse teachers can be dense with knowledge and lead to students with compelling performance in comparison with a series of competitive baselines. @@ -3584,7 +3584,7 @@ ZhengfuHeFudan University HongQianEast China Normal University YunhuaZhouFudan University - XuanjingHuangFudan University + XuanjingHuangFudan University XipengQiuFudan University 3916-3930 Most downstream adaptation methods tune all or part of the parameters of pre-trained models (PTMs) through gradient descent, where the tuning cost increases linearly with the growth of the model size.By contrast, gradient-free methods only require the forward computation of the PTM to tune the prompt, retaining the benefits of efficient tuning and deployment.Though, past work on gradient-free tuning often introduces gradient descent to seek a good initialization of prompt and lacks versatility across tasks and PTMs.In this paper, we present BBTv2, an improved version of Black-Box Tuning, to drive PTMs for few-shot learning.We prepend continuous prompts to every layer of the PTM and propose a divide-and-conquer gradient-free algorithm to optimize the prompts at different layers alternately.Extensive experiments across various tasks and PTMs show that BBTv2 can achieve comparable performance to full model tuning and state-of-the-art parameter-efficient methods (e.g., Adapter, LoRA, BitFit, etc.) under few-shot settings while maintaining much fewer tunable parameters. @@ -3606,7 +3606,7 @@ Mixed-effects transformers for hierarchical adaptation JuliaWhiteStanford University - NoahGoodmanStanford University + NoahGoodmanStanford University RobertHawkinsPrinceton University 3944-3954 Language differs dramatically from context to context. To some degree, large language models like GPT-3 account for such variation by conditioning on strings of initial input text, or prompts. However, prompting can be ineffective when contexts are sparse, out-of-sample, or extra-textual. In this paper, we introduce the mixed-effects transformer (MET), a novel approach for learning hierarchically-structured prefixes— lightweight modules prepended to an input sequence— to account for structured variation in language use. Specifically, we show how the popular class of mixed-effects regression models may be extended to transformer-based architectures using a regularized prefix-tuning procedure with dropout. We evaluate this approach on several domain-adaptation benchmarks, finding that it learns contextual variation from minimal data while generalizing well to unseen contexts. @@ -3619,7 +3619,7 @@ On Measuring the Intrinsic Few-Shot Hardness of Datasets XinranZhaoStanford University ShikharMurtyStanford University - ChristopherManningStanford University + ChristopherManningStanford University 3955-3963 While advances in pre-training have led to dramatic improvements in few-shot learning of NLP tasks, there is limited understanding of what drives successful few-shot adaptation in datasets. In particular, given a new dataset and a pre-trained model, what properties of the dataset make it few-shot learnable, and are these properties independent of the specific adaptation techniques used? We consider an extensive set of recent few-shot learning methods and show that their performance across a large number of datasets is highly correlated, showing that few-shot hardness may be intrinsic to datasets, for a given pre-trained model. To estimate intrinsic few-shot hardness, we then propose a simple and lightweight metric called Spread that captures the intuition that few-shot learning is made possible by exploiting feature-space invariances between training and test samples. Our metric better accounts for few-shot hardness compared to existing notions of hardness and is ~8-100x faster to compute. 2022.emnlp-main.262 @@ -3753,7 +3753,7 @@ Syntactic Multi-view Learning for Open Information Extraction KuicaiDongNanyang Technological University AixinSunNanyang Technological University - Jung-JaeKimInstitute for Infocomm Research + Jung-JaeKimInstitute for Infocomm Research XiaoliLiInstitute for Infocomm Research/Nanyang Technological University 4072-4083 Open Information Extraction (OpenIE) aims to extract relational tuples from open-domain sentences. Traditional rule-based or statistical models were developed based on syntactic structure of sentence, identified by syntactic parsers. However, previous neural OpenIE models under-explored the useful syntactic information. In this paper, we model both constituency and dependency trees into word-level graphs, and enable neural OpenIE to learn from the syntactic structures. To better fuse heterogeneous information from the two graphs, we adopt multi-view learning to capture multiple relationships from them. Finally, the finetuned constituency and dependency representations are aggregated with sentential semantic representations for tuple generation. Experiments show that both constituency and dependency information, and the multi-view learning are effective. @@ -3799,7 +3799,7 @@ SongYangGaoFudan University ShihanDouFudan University QiZhangFudan University - XuanjingHuangFudan University + XuanjingHuangFudan University 4112-4122 Dataset bias has attracted increasing attention recently for its detrimental effect on the generalization ability of fine-tuned models. The current mainstream solution is designing an additional shallow model to pre-identify biased instances. However, such two-stage methods scale up the computational complexity of training process and obstruct valid feature information while mitigating bias.To address this issue, we utilize the representation normalization method which aims at disentangling the correlations between features of encoded sentences. We find it also promising in eliminating the bias problem by providing isotropic data distribution. We further propose Kernel-Whitening, a Nystrom kernel approximation method to achieve more thorough debiasing on nonlinear spurious correlations. Our framework is end-to-end with similar time consumption to fine-tuning. Experiments show that Kernel-Whitening significantly improves the performance of BERT on out-of-distribution datasets while maintaining in-distribution accuracy. 2022.emnlp-main.275 @@ -3909,7 +3909,7 @@ ElisaBassignanaIT University of Copenhagen MaxMüller-EbersteinIT University of Copenhagen MikeZhangIT University of Copenhagen - BarbaraPlankLMU Munich + BarbaraPlankLMU Munich 4218-4227 With the increase in availability of large pre-trained language models (LMs) in Natural Language Processing (NLP), it becomes critical to assess their fit for a specific target task a priori—as fine-tuning the entire space of available LMs is computationally prohibitive and unsustainable. However, encoder transferability estimation has received little to no attention in NLP. In this paper, we propose to generate quantitative evidence to predict which LM, out of a pool of models, will perform best on a target task without having to fine-tune all candidates. We provide a comprehensive study on LM ranking for 10 NLP tasks spanning the two fundamental problem types of classification and structured prediction. We adopt the state-of-the-art Logarithm of Maximum Evidence (LogME) measure from Computer Vision (CV) and find that it positively correlates with final LM performance in 94% of the setups.In the first study of its kind, we further compare transferability measures with the de facto standard of human practitioner ranking, finding that evidence from quantitative metrics is more robust than pure intuition and can help identify unexpected LM candidates. 2022.emnlp-main.283 @@ -3921,7 +3921,7 @@ Chunk-based Nearest Neighbor Machine Translation Pedro HenriqueMartinsInstituto de Telecomunicações, Instituto Superior Técnico ZitaMarinhoDeepmind - André F. T.MartinsUnbabel, Instituto de Telecomunicacoes + André F. T.MartinsUnbabel, Instituto de Telecomunicacoes 4228-4245 Semi-parametric models, which augment generation with retrieval, have led to impressive results in language modeling and machine translation, due to their ability to retrieve fine-grained information from a datastore of examples. One of the most prominent approaches, kNN-MT, exhibits strong domain adaptation capabilities by retrieving tokens from domain-specific datastores (Khandelwal et al., 2021). However, kNN-MT requires an expensive retrieval operation for every single generated token, leading to a very low decoding speed (around 8 times slower than a parametric model). In this paper, we introduce a chunk-based kNN-MT model which retrieves chunks of tokens from the datastore, instead of a single token. We propose several strategies for incorporating the retrieved chunks into the generation process, and for selecting the steps at which the model needs to search for neighbors in the datastore. Experiments on machine translation in two settings, static and “on-the-fly” domain adaptation, show that the chunk-based kNN-MT model leads to significant speed-ups (up to 4 times) with only a small drop in translation quality. 2022.emnlp-main.284 @@ -3974,13 +3974,13 @@ <fixed-case>MT</fixed-case>-<fixed-case>G</fixed-case>en<fixed-case>E</fixed-case>val: A Counterfactual and Contextual Dataset for Evaluating Gender Accuracy in Machine Translation AnnaCurreyAWS AI Labs - MariaNadejdeAmazon AWS AI + MariaNadejdeAmazon AWS AI Raghavendra ReddyPappagariAmazon Web Services MiaMayerAWS StanislasLaulyNew York University XingNiuAmazon AI BenjaminHsuAmazon - GeorgianaDinuAmazon AWS + GeorgianaDinuAmazon AWS 4287-4299 As generic machine translation (MT) quality has improved, the need for targeted benchmarks that explore fine-grained aspects of quality has increased. In particular, gender accuracy in translation can have implications in terms of output fluency, translation accuracy, and ethics. In this paper, we introduce MT-GenEval, a benchmark for evaluating gender accuracy in translation from English into eight widely-spoken languages. MT-GenEval complements existing benchmarks by providing realistic, gender-balanced, counterfactual data in eight language pairs where the gender of individuals is unambiguous in the input segment, including multi-sentence segments requiring inter-sentential gender agreement. Our data and code is publicly available under a CC BY SA 3.0 license. 2022.emnlp-main.288 @@ -4005,7 +4005,7 @@ On the Calibration of Massively Multilingual Language Models KabirAhujaMicrosoft Research SunayanaSitaramMicrosoft Research India - SandipanDandapatMicrosoft India + SandipanDandapatMicrosoft India MonojitChoudhuryMicrosoft Research 4310-4323 Massively Multilingual Language Models (MMLMs) have recently gained popularity due to their surprising effectiveness in cross-lingual transfer. While there has been much work in evaluating these models for their performance on a variety of tasks and languages, little attention has been paid on how well calibrated these models are with respect to the confidence in their predictions. We first investigate the calibration of MMLMs in the zero-shot setting and observe a clear case of miscalibration in low-resource languages or those which are typologically diverse from English. Next, we empirically show that calibration methods like temperature scaling and label smoothing do reasonably well in improving calibration in the zero-shot scenario. We also find that few-shot examples in the language can further help reduce calibration errors, often substantially. Overall, our work contributes towards building more reliable multilingual models by highlighting the issue of their miscalibration, understanding what language and model-specific factors influence it, and pointing out the strategies to improve the same. @@ -4057,7 +4057,7 @@ Retrieval Augmentation for Commonsense Reasoning: A Unified Approach WenhaoYuUniversity of Notre Dame ChenguangZhuMicrosoft Cognitive Services Research Group - ZhihanZhangUniversity of Notre Dame + ZhihanZhangUniversity of Notre Dame ShuohangWangMicrosoft ZhuoshengZhangShanghai Jiao Tong University YuweiFangMicrosoft @@ -4074,7 +4074,7 @@ GuoyinWangAmazon Alexa AI JiweiLiShannon.AI SunghyunParkAmazon Alexa AI - SungjinLeeAmazon Alexa AI + SungjinLeeAmazon Alexa AI PuyangXuMobvoi RicardoHenaoDuke University LawrenceCarinDuke University @@ -4104,7 +4104,7 @@ DavidThulkeRWTH Aachen University YingboGaoRWTH Aachen University ChristianHeroldRWTH Aachen University - HermannNeyRWTH Aachen University + HermannNeyRWTH Aachen University 4480-4487 Currently, in speech translation, the straightforward approach - cascading a recognition system with a translation system - delivers state-of-the-art results.However, fundamental challenges such as error propagation from the automatic speech recognition system still remain.To mitigate these problems, recently, people turn their attention to direct data and propose various joint training methods.In this work, we seek to answer the question of whether joint training really helps cascaded speech translation.We review recent papers on the topic and also investigate a joint training criterion by marginalizing the transcription posterior probabilities.Our findings show that a strong cascaded baseline can diminish any improvements obtained using joint training, and we suggest alternatives to joint training.We hope this work can serve as a refresher of the current speech translation landscape, and motivate research in finding more efficient and creative ways to utilize the direct data for speech translation. 2022.emnlp-main.297 @@ -4114,17 +4114,17 @@ <fixed-case>M</fixed-case>asakha<fixed-case>NER</fixed-case> 2.0: <fixed-case>A</fixed-case>frica-centric Transfer Learning for Named Entity Recognition - David IfeoluwaAdelaniUniversity College London + David IfeoluwaAdelaniUniversity College London GrahamNeubigCarnegie Mellon University SebastianRuderGoogle ShrutiRijhwaniCarnegie Mellon University MichaelBeukmanUniversity of the Witwatersrand ChesterPalen-MichelBrandeis University ConstantineLignosBrandeis University - Jesujoba O.AlabiSaarland University + Jesujoba O.AlabiSaarland University Shamsuddeen H.MuhammadBayero University, Kano PeterNabendeMakerere University - Cheikh M. BambaDioneUniversity of Bergen + Cheikh M. BambaDioneUniversity of Bergen AndiswaBukulaSADiLaR RooweitherMabuyaSouth African Centre for Digital Language Resources Bonaventure F. P.DossouMila @@ -4236,8 +4236,8 @@ ShereenOrabyAmazon Alexa AI AlessandraCervoneAmazon Alexa AI TagyoungChungAmazon Alexa AI - JingHuangAmazon - YangLiuAmazon + JingHuangAmazon + YangLiuAmazon NanyunPengUniversity of California, Los Angeles 4590-4605 The tasks of humor understanding and generation are challenging and subjective even for humans, requiring commonsense and real-world knowledge to master. Puns, in particular, add the challenge of fusing that knowledge with the ability to interpret lexical-semantic ambiguity. In this paper, we present the ExPUNations (ExPUN) dataset, in which we augment an existing dataset of puns with detailed crowdsourced annotations of keywords denoting the most distinctive words that make the text funny, pun explanations describing why the text is funny, and fine-grained funniness ratings. This is the first humor dataset with such extensive and fine-grained annotations specifically for puns. Based on these annotations, we propose two tasks: explanation generation to aid with pun classification and keyword-conditioned pun generation, to challenge the current state-of-the-art natural language understanding and generation models’ ability to understand and generate humor. We showcase that the annotated keywords we collect are helpful for generating better novel humorous texts in human evaluation, and that our natural language explanations can be leveraged to improve both the accuracy and robustness of humor classifiers. @@ -4249,7 +4249,7 @@ <fixed-case>SLING</fixed-case>: <fixed-case>S</fixed-case>ino Linguistic Evaluation of Large Language Models YixiaoSongUniversity of Massachusetts Amherst KalpeshKrishnaUniversity of Massachusetts Amherst - RajeshBhattUniversity of Massachusetts Amherst + RajeshBhattUniversity of Massachusetts Amherst MohitIyyerUniversity of Massachusetts Amherst 4606-4634 To understand what kinds of linguistic knowledge are encoded by pretrained Chinese language models (LMs), we introduce the benchmark of Sino LINGuistics (SLING), which consists of 38K minimal sentence pairs in Mandarin Chinese grouped into 9 high-level linguistic phenomena. Each pair demonstrates the acceptability contrast of a specific syntactic or semantic phenomenon (e.g., The keys are lost vs. The keys is lost), and an LM should assign lower perplexity to the acceptable sentence. In contrast to the CLiMP dataset (Xiang et al., 2021), which also contains Chinese minimal pairs and was created by translating the vocabulary of the English BLiMP dataset, the minimal pairs in SLING are derived primarily by applying syntactic and lexical transformations to naturally-occurring, linguist-annotated sentences from the Chinese Treebank 9.0, thus addressing severe issues in CLiMP’s data generation process. We test 18 publicly available pretrained monolingual (e.g., BERT-base-zh, CPM) and multi-lingual (e.g., mT5, XLM) language models on SLING. Our experiments show that the average accuracy for LMs is far below human performance (69.7% vs. 97.1%), while BERT-base-zh achieves the highest accuracy (84.8%) of all tested LMs, even much larger ones. Additionally, we find that most LMs have a strong gender and number (singular/plural) bias, and they perform better on local phenomena than hierarchical ones. @@ -4264,8 +4264,8 @@ ShereenOrabyAmazon Alexa AI ShuyangGaoAmazon.com, Inc. TagyoungChungAmazon Alexa AI - JingHuangAmazon - YangLiuAmazon + JingHuangAmazon + YangLiuAmazon NanyunPengUniversity of California, Los Angeles 4635-4648 Previous work on pun generation commonly begins with a given pun word (a pair of homophones for heterographic pun generation and a polyseme for homographic pun generation) and seeks to generate an appropriate pun. While this may enable efficient pun generation, we believe that a pun is most entertaining if it fits appropriately within a given context, e.g., a given situation or dialogue. In this work, we propose a new task, context-situated pun generation, where a specific context represented by a set of keywords is provided, and the task is to first identify suitable pun words that are appropriate for the context, then generate puns based on the context keywords and the identified pun words. We collect a new dataset, CUP (Context-sitUated Pun), containing 4.5k tuples of context words and pun pairs. Based on the new data and setup, we propose a pipeline system for context-situated pun generation, including a pun word retrieval module that identifies suitable pun words for a given context, and a pun generation module that generates puns from context keywords and pun words. Human evaluation shows that 69% of our top retrieved pun words can be used to generate context-situated puns, and our generation module yields successful puns 31% of the time given a plausible tuple of context words and pun pair, almost tripling the yield of a state-of-the-art pun generation model. With an end-to-end evaluation, our pipeline system with the top-1 retrieved pun pair for a given context can generate successful puns 40% of the time, better than all other modeling variations but 32% lower than the human success rate. This highlights the difficulty of the task, and encourages more research in this direction. @@ -4287,7 +4287,7 @@ Concadia: Towards Image-Based Text Generation with a Purpose ElisaKreissStanford University FeiFangStanford University - NoahGoodmanStanford University + NoahGoodmanStanford University ChristopherPottsStanford University 4667-4684 Current deep learning models often achieve excellent results on benchmark image-to-text datasets but fail to generate texts that are useful in practice. We argue that to close this gap, it is vital to distinguish descriptions from captions based on their distinct communicative roles. Descriptions focus on visual features and are meant to replace an image (often to increase accessibility), whereas captions appear alongside an image to supply additional information. To motivate this distinction and help people put it into practice, we introduce the publicly available Wikipedia-based dataset Concadia consisting of 96,918 images with corresponding English-language descriptions, captions, and surrounding context. Using insights from Concadia, models trained on it, and a preregistered human-subjects experiment with human- and model-generated texts, we characterize the commonalities and differences between descriptions and captions. In addition, we show that, for generating both descriptions and captions, it is useful to augment image-to-text models with representations of the textual context in which the image appeared. @@ -4516,7 +4516,7 @@ Analyzing and Evaluating Faithfulness in Dialogue Summarization BinWangNational University of Singapore - ChenZhangECE, National University of SIngapore + ChenZhangECE, National University of SIngapore YanZhangNational University of Singapore YimingChenNational University of Singapore HaizhouLiThe Chinese University of Hong Kong, Shenzhen @@ -4533,9 +4533,9 @@ RonanLe BrasAllen Institute for AI HaoPengAllen Institute for AI XimingLuUniversity of Washington - DragomirRadevYale University + DragomirRadevYale University YejinChoiUniversity of Washington - Noah A.SmithUniversity of Washington + Noah A.SmithUniversity of Washington 4909-4923 Many language generation models are now available for a wide range of generation tasks, including machine translation and summarization. Combining such diverse models may lead to further progress, but ensembling generation models is challenging during inference: conventional ensembling methods (e.g., shallow fusion) require that the models share vocabulary/tokenization schemes. We introduce Twist decoding, a simple and general text generation algorithm that benefits from diverse models at inference time. Our method does not assume the vocabulary, tokenization or even generation order is shared. Our extensive evaluations on machine translation and scientific paper summarization demonstrate that Twist decoding substantially outperforms each model decoded in isolation over various scenarios, including cases where domain-specific and general-purpose models are both available. Twist decoding also consistently outperforms the popular reranking heuristic where output candidates from one model are rescored by another. We hope that our work will encourage researchers and practitioners to examine generation models collectively, not just independently, and to seek out models with complementary strengths to the currently available models. 2022.emnlp-main.326 @@ -4580,7 +4580,7 @@ AsliCelikyilmazFAIR @ Meta HaoranLiFacebook YasharMehdadFacebook AI - DragomirRadevYale University + DragomirRadevYale University 4949-4958 Abstractive dialogue summarization has long been viewed as an important standalone task in natural language processing, but no previous work has explored the possibility of whether abstractive dialogue summarization can also be used as a means to boost an NLP system’s performance on other important dialogue comprehension tasks. In this paper, we propose a novel type of dialogue summarization task - STRUctured DiaLoguE Summarization (STRUDEL) - that can help pre-trained language models to better understand dialogues and improve their performance on important dialogue comprehension tasks. In contrast to the holistic approach taken by the traditional free-form abstractive summarization task for dialogues, STRUDEL aims to decompose and imitate the hierarchical, systematic and structured mental process that we human beings usually go through when understanding and analyzing dialogues, and thus has the advantage of being more focused, specific and instructive for dialogue comprehension models to learn from. We further introduce a new STRUDEL dialogue comprehension modeling framework that integrates STRUDEL into a dialogue reasoning module over transformer encoder language models to improve their dialogue comprehension ability. In our empirical experiments on two important downstream dialogue comprehension tasks - dialogue question answering and dialogue response prediction - we demonstrate that our STRUDEL dialogue comprehension models can significantly improve the dialogue comprehension performance of transformer encoder language models. 2022.emnlp-main.329 @@ -4609,7 +4609,7 @@ ZihuiGuRenmin University of China JuFanRenmin University of China NanTangQatar Computing Research Institute, HBKU - PreslavNakovMohamed bin Zayed University of Artificial Intelligence + PreslavNakovMohamed bin Zayed University of Artificial Intelligence XiaomanZhaoRenmin University of China XiaoyongDuRenmin University of China 4971-4983 @@ -4677,7 +4677,7 @@ Capturing Global Structural Information in Long Document Question Answering with Compressive Graph Selector Network YuxiangNieBeijing Institute of Technology - HeyanHuangBeijing Institute of Technology + HeyanHuangBeijing Institute of Technology WeiWeiHuazhong University of Science and Technology Xian-LingMaoBeijing Institute of Technology 5036-5047 @@ -4909,7 +4909,7 @@ Should We Ban <fixed-case>E</fixed-case>nglish <fixed-case>NLP</fixed-case> for a Year? - AndersSøgaardUniversity of Copenhagen + AndersSøgaardUniversity of Copenhagen 5254-5260 Around two thirds of NLP research at top venues is devoted exclusively to developing technology for speakers of English, most speech data comes from young urban speakers, and most texts used to train language models come from male writers. These biases feed into consumer technologies to widen existing inequality gaps, not only within, but also across, societies. Many have argued that it is almost impossible to mitigate inequality amplification. I argue that, on the contrary, it is quite simple to do so, and that counter-measures would have little-to-no negative impact, except for, perhaps, in the very short term. 2022.emnlp-main.351 @@ -4948,7 +4948,7 @@ JiajunZhangInstitute of Automation Chinese Academy of Sciences WeiLuoAlibaba ZhongqiangHuangAlibaba Group - ChengqingZongInstitute of Automation, Chinese Academy of Sciences + ChengqingZongInstitute of Automation, Chinese Academy of Sciences 5291-5302 End-to-end Speech Translation (ST) aims at translating the source language speech into target language text without generating the intermediate transcriptions. However, the training of end-to-end methods relies on parallel ST data, which are difficult and expensive to obtain. Fortunately, the supervised data for automatic speech recognition (ASR) and machine translation (MT) are usually more accessible, making zero-shot speech translation a potential direction. Existing zero-shot methods fail to align the two modalities of speech and text into a shared semantic space, resulting in much worse performance compared to the supervised ST methods. In order to enable zero-shot ST, we propose a novel Discrete Cross-Modal Alignment (DCMA) method that employs a shared discrete vocabulary space to accommodate and match both modalities of speech and text. Specifically, we introduce a vector quantization module to discretize the continuous representations of speech and text into a finite set of virtual tokens, and use ASR data to map corresponding speech and text to the same virtual token in a shared codebook. This way, source language speech can be embedded in the same semantic space as the source language text, which can be then transformed into target language text with an MT module. Experiments on multiple language pairs demonstrate that our zero-shot ST method significantly improves the SOTA, and even performers on par with the strong supervised ST baselines. 2022.emnlp-main.354 @@ -4960,7 +4960,7 @@ Abstractive Summarization Guided by Latent Hierarchical Document Structure YifuQiuUniversity of Edinburgh - Shay B.CohenUniversity of Edinburgh + Shay B.CohenUniversity of Edinburgh 5303-5317 Sequential abstractive neural summarizers often do not use the underlying structure in the input article or dependencies between the input sentences. This structure is essential to integrate and consolidate information from different parts of the text. To address this shortcoming, we propose a hierarchy-aware graph neural network (HierGNN) which captures such dependencies through three main steps: 1) learning a hierarchical document structure through a latent structure tree learned by a sparse matrix-tree computation; 2) propagating sentence information over this structure using a novel message-passing node propagation mechanism to identify salient information; 3) using graph-level attention to concentrate the decoder on salient information. Experiments confirm HierGNN improves strong sequence models such as BART, with a 0.55 and 0.75 margin in average ROUGE-1/2/L for CNN/DM and XSum. Further human evaluation demonstrates that summaries produced by our model are more relevant and less redundant than the baselines, into which HierGNN is incorporated. We also find HierGNN synthesizes summaries by fusing multiple source sentences more, rather than compressing a single source sentence, and that it processes long inputs more effectively. 2022.emnlp-main.355 @@ -5033,7 +5033,7 @@ RajDabreNICT RatishPuduppullyUniversity of Edinburgh AnoopKunchukuttanMicrosoft AI and Research - Mitesh M.KhapraIndian Institute of Technology Madras + Mitesh M.KhapraIndian Institute of Technology Madras PratyushKumarIIT Madras 5363-5394 Natural Language Generation (NLG) for non-English languages is hampered by the scarcity of datasets in these languages. We present the IndicNLG Benchmark, a collection of datasets for benchmarking NLG for 11 Indic languages. We focus on five diverse tasks, namely, biography generation using Wikipedia infoboxes, news headline generation, sentence summarization, paraphrase generation and, question generation. We describe the created datasets and use them to benchmark the performance of several monolingual and multilingual baselines that leverage pre-trained sequence-to-sequence models. Our results exhibit the strong performance of multilingual language-specific pre-trained models, and the utility of models trained on our dataset for other related NLG tasks. Our dataset creation methods can be easily applied to modest-resource languages as they involve simple steps such as scraping news articles and Wikipedia infoboxes, light cleaning, and pivoting through machine translation data. To the best of our knowledge, the IndicNLG Benchmark is the first NLG benchmark for Indic languages and the most diverse multilingual NLG dataset, with approximately 8M examples across 5 tasks and 11 languages. The datasets and models will be publicly available. @@ -5045,7 +5045,7 @@ Improving Machine Translation with Phrase Pair Injection and Corpus Filtering AkshayBathejaIndian Institute of Technology Bombay - PushpakBhattacharyyaIndian Institute of Technology Bombay and Patna + PushpakBhattacharyyaIndian Institute of Technology Bombay and Patna 5395-5400 In this paper, we show that the combination of Phrase Pair Injection and Corpus Filtering boosts the performance of Neural Machine Translation (NMT) systems. We extract parallel phrases and sentences from the pseudo-parallel corpus and augment it with the parallel corpus to train the NMT models. With the proposed approach, we observe an improvement in the Machine Translation (MT) system for 3 low-resource language pairs, Hindi-Marathi, English-Marathi, and English-Pashto, and 6 translation directions by up to 2.7 BLEU points, on the FLORES test data. These BLEU score improvements are over the models trained using the whole pseudo-parallel corpus augmented with the parallel corpus. 2022.emnlp-main.361 @@ -5083,7 +5083,7 @@ PeilingLuMicrosoft XuTanMicrosoft Research Asia RuiWangMicrosoft - ChenZhangZhejiang University + ChenZhangZhejiang University SongruoyaoWuZhejiang University KejunZhangZhejiang University Xiang-YangLiUniversity of Science and Technology of China @@ -5133,7 +5133,7 @@ FandongMengWeChat AI, Tencent ChulunZhouTencent JieZhouTencent Inc. - DegenHuangDalian University of Technology + DegenHuangDalian University of Technology JinsongSuXiamen university 5468-5477 k-Nearest-Neighbor Machine Translation (kNN-MT) becomes an important research direction of NMT in recent years. Its main idea is to retrieve useful key-value pairs from an additional datastore to modify translations without updating the NMT model. However, the underlying retrieved noisy pairs will dramatically deteriorate the model performance. In this paper, we conduct a preliminary study and find that this problem results from not fully exploiting the prediction of the NMT model. To alleviate the impact of noise, we propose a confidence-enhanced kNN-MT model with robust training. Concretely, we introduce the NMT confidence to refine the modeling of two important components of kNN-MT: kNN distribution and the interpolation weight. Meanwhile we inject two types of perturbations into the retrieved pairs for robust training. Experimental results on four benchmark datasets demonstrate that our model not only achieves significant improvements over current kNN-MT models, but also exhibits better robustness. Our code is available at https://github.com/DeepLearnXMU/Robust-knn-mt. @@ -5211,7 +5211,7 @@ <fixed-case>PLOG</fixed-case>: Table-to-Logic Pretraining for Logical Table-to-Text Generation AoLiuTokyo Institute of Technology HaoyuDongMicrosoft Research - NaoakiOkazakiTokyo Institute of Technology + NaoakiOkazakiTokyo Institute of Technology ShiHanMicrosoft Research Asia DongmeiZhangMicrosoft Research 5531-5546 @@ -5237,7 +5237,7 @@ HexiangHuGoogle XiChenGoogle PatVergaGoogle - WilliamCohenGoogle AI + WilliamCohenGoogle AI 5558-5570 While language Models store a massive amount of world knowledge implicitly in their parameters, even very large models often fail to encode information about rare entities and events, while incurring huge computational costs. Recently, retrieval-augmented models, such as REALM, RAG, and RETRO, have incorporated world knowledge into language generation by leveraging an external non-parametric index and have demonstrated impressive performance with constrained model sizes. However, these methods are restricted to retrieving only textual knowledge, neglecting the ubiquitous amount of knowledge in other modalities like images – much of which contains information not covered by any text. To address this limitation, we propose the first Multimodal Retrieval-Augmented Transformer (MuRAG), which accesses an external non-parametric multimodal memory to augment language generation. MuRAG is pre-trained with a mixture of large-scale image-text and text-only corpora using a joint contrastive and generative loss. We perform experiments on two different datasets that require retrieving and reasoning over both images and text to answer a given query: WebQA, and MultimodalQA. Our results show that MuRAG achieves state-of-the-art accuracy, outperforming existing models by 10-20% absolute on both datasets and under both distractor and full-wiki settings. 2022.emnlp-main.375 @@ -5250,7 +5250,7 @@ ZhaoyueSunUniversity of Warwick JiazhengLiUniversity of Warwick GabrielePergolaUniversity of Warwick - ByronWallaceNortheastern University + ByronWallaceNortheastern University BinoJohnAstraZeneca NigelGreeneAstraZeneca JosephKimAstraZeneca @@ -5273,7 +5273,7 @@ <fixed-case>S</fixed-case>im<fixed-case>QA</fixed-case>: Detecting Simultaneous <fixed-case>MT</fixed-case> Errors through Word-by-Word Question Answering - HyoJungHanUniversity of Maryland, College Park + HyoJungHanUniversity of Maryland, College Park MarineCarpuatUniversity of Maryland JordanBoyd-GraberUniversity of Maryland 5598-5616 @@ -5407,7 +5407,7 @@ SubhabrataMukherjeeMicrosoft Research XiaodongLiuMicrosoft Research JingGaoPurdue University - Ahmed HassanAwadallahMicrosoft Research + Ahmed HassanAwadallahMicrosoft Research JianfengGaoMicrosoft Research, Redmond 5744-5760 Standard fine-tuning of large pre-trained language models (PLMs) for downstream tasks requires updating hundreds of millions to billions of parameters, and storing a large copy of the PLM weights for every task resulting in increased cost for storing, sharing and serving the models. To address this, parameter-efficient fine-tuning (PEFT) techniques were introduced where small trainable components are injected in the PLM and updated during fine-tuning. We propose AdaMix as a general PEFT method that tunes a mixture of adaptation modules – given the underlying PEFT method of choice – introduced in each Transformer layer while keeping most of the PLM weights frozen. For instance, AdaMix can leverage a mixture of adapters like Houlsby or a mixture of low rank decomposition matrices like LoRA to improve downstream task performance over the corresponding PEFT methods for fully supervised and few-shot NLU and NLG tasks. Further, we design AdaMix such that it matches the same computational cost and the number of tunable parameters as the underlying PEFT method. By only tuning 0.1-0.2% of PLM parameters, we show that AdaMix outperforms SOTA parameter-efficient fine-tuning and full model fine-tuning for both NLU and NLG tasks. @@ -5445,7 +5445,7 @@ <fixed-case>T</fixed-case>-Modules: Translation Modules for Zero-Shot Cross-Modal Machine Translation Paul-AmbroiseDuquenneMeta AI HongyuGongFacebook AI Research - BenoîtSagotInria + BenoîtSagotInria HolgerSchwenkMeta AI Research 5794-5806 We present a new approach to perform zero-shot cross-modal transfer between speech and text for translation tasks. Multilingual speech and text are encoded in a joint fixed-size representation space. Then, we compare different approaches to decode these multimodal and multilingual fixed-size representations, enabling zero-shot translation between languages and modalities. All our models are trained without the need of cross-modal labeled translation data.Despite a fixed-size representation, we achieve very competitive results on several text and speech translation tasks. In particular, we significantly improve the state-of-the-art for zero-shot speech translation on Must-C. Incorporating a speech decoder in our framework, we introduce the first results for zero-shot direct speech-to-speech and text-to-speech translation. @@ -5538,7 +5538,7 @@ A Framework for Adapting Pre-Trained Language Models to Knowledge Graph Completion JustinLovelaceCornell University - CarolynRoséCarnegie Mellon University + CarolynRoséCarnegie Mellon University 5937-5955 Recent work has demonstrated that entity representations can be extracted from pre-trained language models to develop knowledge graph completion models that are more robust to the naturally occurring sparsity found in knowledge graphs. In this work, we conduct a comprehensive exploration of how to best extract and incorporate those embeddings into knowledge graph completion models. We explore the suitability of the extracted embeddings for direct use in entity ranking and introduce both unsupervised and supervised processing methods that can lead to improved downstream performance. We then introduce supervised embedding extraction methods that can extract more informative representations. We then synthesize our findings and develop a knowledge graph completion model that significantly outperforms recent neural models. 2022.emnlp-main.398 @@ -5598,9 +5598,9 @@ YiFungUniversity of Illinois at Urbana Champaign KathrynCongerUniversitiy of Colorado, Boulder AhmedELsayedUniversity of Colorado - MarthaPalmerUniversity of Colorado - PreslavNakovMohamed bin Zayed University of Artificial Intelligence - EduardHovyUniversity of Melbourne + MarthaPalmerUniversity of Colorado + PreslavNakovMohamed bin Zayed University of Artificial Intelligence + EduardHovyUniversity of Melbourne KevinSmallAmazon HengJiUniversity of Illinois at Urbana-Champaign and Amazon (Amazon Scholar) 6002-6018 @@ -5637,7 +5637,7 @@ The Authenticity Gap in Human Evaluation KawinEthayarajhStanford University - DanJurafskyStanford University + DanJurafskyStanford University 6056-6070 Human ratings are the gold standard in NLG evaluation. The standard protocol is to collect ratings of generated text, average across annotators, and rank NLG systems by their average scores. However, little consideration has been given as to whether this approach faithfully captures human preferences. Analyzing this standard protocol through the lens of utility theory in economics, we identify the implicit assumptions it makes about annotators. These assumptions are often violated in practice, in which case annotator ratings cease to reflect their preferences. The most egregious violations come from using Likert scales, which provably reverse the direction of the true preference in certain cases. We suggest improvements to the standard protocol to make it more theoretically sound, but even in its improved form, it cannot be used to evaluate open-ended tasks like story generation. For the latter, we propose a new human evaluation protocol called system-level probabilistic assessment (SPA). When human evaluation of stories is done with SPA, we can recover the ordering of GPT-3 models by size, with statistically significant results. However, when human evaluation is done with the standard protocol, less than half of the expected preferences can be recovered (e.g., there is no significant difference between curie and davinci, despite using a highly powered test). 2022.emnlp-main.406 @@ -5664,8 +5664,8 @@ LinyongNanYale University BudhadityaDebMicrosoft Corporation ChenguangZhuMicrosoft Cognitive Services Research Group - Ahmed HassanAwadallahMicrosoft Research - DragomirRadevYale University + Ahmed HassanAwadallahMicrosoft Research + DragomirRadevYale University 6081-6093 Neural attention models have achieved significant improvements on many natural language processing tasks. However, the quadratic memory complexity of the self-attention module with respect to the input length hinders their applications in long text summarization. Instead of designing more efficient attention modules, we approach this problem by investigating if models with a restricted context can have competitive performance compared with the memory-efficient attention models that maintain a global context by treating the input as a single sequence. Our model is applied to individual pages, which contain parts of inputs grouped by the principle of locality, during both the encoding and decoding stages. We empirically investigated three kinds of locality in text summarization at different levels of granularity, ranging from sentences to documents. Our experimental results show that our model has a better performance compared with strong baseline models with efficient attention modules, and our analysis provides further insights into our locality-aware modeling strategy. 2022.emnlp-main.408 @@ -5738,7 +5738,7 @@ A Survey of Active Learning for Natural Language Processing ZhisongZhangCarnegie Mellon University EmmaStrubellCarnegie Mellon University - EduardHovyUniversity of Melbourne + EduardHovyUniversity of Melbourne 6166-6190 In this work, we provide a literature review of active learning (AL) for its applications in natural language processing (NLP). In addition to a fine-grained categorization of query strategies, we also investigate several other important aspects of applying AL to NLP problems. These include AL for structured prediction tasks, annotation cost, model learning (especially with deep neural models), and starting and stopping AL. Finally, we conclude with a discussion of related topics and future directions. 2022.emnlp-main.414 @@ -5828,7 +5828,7 @@ <fixed-case>C</fixed-case>onv<fixed-case>F</fixed-case>in<fixed-case>QA</fixed-case>: Exploring the Chain of Numerical Reasoning in Conversational Finance Question Answering - ZhiyuChenMeta + ZhiyuChenMeta ShiyangLiUC Santa Barbara ChareseSmileyJPMorgan AI Research ZhiqiangMaJPMorgan Chase @@ -5843,7 +5843,7 @@ A Span-based Multimodal Variational Autoencoder for Semi-supervised Multimodal Named Entity Recognition BaohangZhouNankai University - YingZhangNankai University + YingZhangNankai University KehuiSongNankai University WenyaGuoNankai University GuoqingZhaoMashang Consumer Finance Co.,Ltd. @@ -5868,7 +5868,7 @@ Modeling Consistency Preference via Lexical Chains for Document-level Neural Machine Translation XinglinLyuSoochow University - JunhuiLiSoochow University, Suzhou + JunhuiLiSoochow University, Suzhou ShiminTaohuawei HaoYangHuawei Co. Ltd YingQinHuawei Technologies @@ -5898,7 +5898,7 @@ Factorizing Content and Budget Decisions in Abstractive Summarization of Long Documents MarcioFonsecaUniversity of Edinburgh YftahZiserUniversity of Edinburgh - Shay B.CohenUniversity of Edinburgh + Shay B.CohenUniversity of Edinburgh 6341-6364 We argue that disentangling content selection from the budget used to cover salient content improves the performance and applicability of abstractive summarizers. Our method, FactorSum, does this disentanglement by factorizing summarization into two steps through an energy function: (1) generation of abstractive summary views covering salient information in subsets of the input document (document views); (2) combination of these views into a final summary, following a budget and content guidance. This guidance may come from different sources, including from an advisor model such as BART or BigBird, or in oracle mode – from the reference. This factorization achieves significantly higher ROUGE scores on multiple benchmarks for long document summarization, namely PubMed, arXiv, and GovReport. Most notably, our model is effective for domain adaptation. When trained only on PubMed samples, it achieves a 46.29 ROUGE-1 score on arXiv, outperforming PEGASUS trained in domain by a large margin. Our experimental results indicate that the performance gains are due to more flexible budget adaptation and processing of shorter contexts provided by partial document views. 2022.emnlp-main.426 @@ -5981,7 +5981,7 @@ JunbinXiaoNational University of Singapore YicongLiNational University of Singapore WeihongDengBeijing University of Posts and Telecommunications - Tat-SengChuaNational University of Singapore + Tat-SengChuaNational University of Singapore 6439-6455 This survey aims to sort out the recent advances in video question answering (VideoQA) and point towards future directions. We firstly categorize the datasets into 1) normal VideoQA, multi-modal VideoQA and knowledge-based VideoQA, according to the modalities invoked in the question-answer pairs, or 2) factoid VideoQA and inference VideoQA, according to the technical challenges in comprehending the questions and deriving the correct answers. We then summarize the VideoQA techniques, including those mainly designed for Factoid QA (e.g., the early spatio-temporal attention-based methods and the recently Transformer-based ones) and those targeted at explicit relation and logic inference (e.g., neural modular networks, neural symbolic methods, and graph-structured methods). Aside from the backbone techniques, we delve into the specific models and find out some common and useful insights either for video modeling, question answering, or for cross-modal correspondence learning. Finally, we point out the research trend of studying beyond factoid VideoQA to inference VideoQA, as well as towards the robustness and interpretability. Additionally, we maintain a repository, https://github.com/VRU-NExT/VideoQA, to keep trace of the latest VideoQA papers, datasets, and their open-source implementations if available. With these efforts, we strongly hope this survey could shed light on the follow-up VideoQA research. 2022.emnlp-main.432 @@ -6138,7 +6138,7 @@ Textual Manifold-based Defense Against Natural Language Adversarial Examples DangNguyen MinhVinAI Research - Anh TuanLuuNanyang Technological University, Singapore + Anh TuanLuuNanyang Technological University, Singapore 6612-6625 Despite the recent success of large pretrained language models in NLP, they are susceptible to adversarial examples. Concurrently, several studies on adversarial images have observed an intriguing property: the adversarial images tend to leave the low-dimensional natural data manifold. In this study, we find a similar phenomenon occurs in the contextualized embedding space of natural sentences induced by pretrained language models in which textual adversarial examples tend to have their embeddings diverge off the manifold of natural sentence embeddings. Based on this finding, we propose Textual Manifold-based Defense (TMD), a defense mechanism that learns the embedding space manifold of the underlying language model and projects novel inputs back to the approximated structure before classification. Through extensive experiments, we find that our method consistently and significantly outperforms previous defenses under various attack settings while remaining unaffected to the clean accuracy. To the best of our knowledge, this is the first kind of manifold-based defense adapted to the NLP domain. 2022.emnlp-main.443 @@ -6176,7 +6176,7 @@ <fixed-case>ATTEMPT</fixed-case>: Parameter-Efficient Multi-task Tuning via Attentional Mixtures of Soft Prompts AkariAsaiUniversity of Washington MohammadrezaSalehiUniversity of Washington - MatthewPetersAllen Institute for Artificial Intelligence + MatthewPetersAllen Institute for Artificial Intelligence HannanehHajishirziUniversity of Washington 6655-6672 This work introduces a new multi-task, parameter-efficient language model (LM) tuning method that learns to transfer knowledge across different tasks via a mixture of soft prompts—small prefix embedding vectors pre-trained for different tasks. Our method, called ATTEMPT (ATTEntional Mixtures of Prompt Tuning), obtains source prompts as encodings of large-scale source tasks into a small number of parameters and trains an attention module to interpolate the source prompts and a newly initialized target prompt for every instance in the target task. During training, only the target task prompt and the attention weights, which are shared between tasks in multi-task training, are updated, while the original LM and source prompts are intact. ATTEMPT is highly parameter-efficient (e.g., updates 2,300 times fewer parameters than full fine-tuning), while it overcomes instability of prompt tuning and achieves high task performance using learned knowledge from high-resource tasks. Moreover, it is modular using pre-trained soft prompts, and can flexibly add or remove source prompts for effective knowledge transfer. Our experimental results across 21 diverse NLP datasets show that ATTEMPT significantly outperforms prompt tuning and outperforms or matches fully fine-tuned or other parameter-efficient tuning approaches that use 10 times more parameters. Finally, ATTEMPT outperforms previous work in few-shot learning settings. @@ -6282,7 +6282,7 @@ Improving Event Coreference Resolution Using Document-level and Topic-level Information ShengXuSoochow University PeifengLiSoochow University - QiaomingZhuSoochow University + QiaomingZhuSoochow University 6765-6775 Event coreference resolution (ECR) aims to cluster event mentions that refer to the same real-world events. Deep learning methods have achieved SOTA results on the ECR task. However, due to the encoding length limitation, previous methods either adopt classical pairwise models based on sentence-level context or split each document into multiple chunks and encode them separately. They failed to capture the interactions and contextual cues among those long-distance event mentions. Besides, high-level information, such as event topics, is rarely considered to enhance representation learning for ECR. To address the above two issues, we first apply a Longformer-based encoder to obtain the document-level embeddings and an encoder with a trigger-mask mechanism to learn sentence-level embeddings based on local context. In addition, we propose an event topic generator to infer the latent topic-level representations. Finally, using the above event embeddings, we employ a multiple tensor matching method to capture their interactions at the document, sentence, and topic levels. Experimental results on the KBP 2017 dataset show that our model outperforms the SOTA baselines. 2022.emnlp-main.454 @@ -6305,7 +6305,7 @@ Boosting Natural Language Generation from Instructions with Meta-Learning BudhadityaDebMicrosoft Corporation - Ahmed HassanAwadallahMicrosoft Research + Ahmed HassanAwadallahMicrosoft Research GuoqingZhengMicrosoft Research 6792-6808 Recent work has shown that language models (LMs) trained with multi-task instructional learning (MTIL) can solve diverse NLP tasks in zero- and few-shot settings with improved performance compared to prompt tuning. MTIL illustrates that LMs can extract and use information about the task from instructions beyond the surface patterns of the inputs and outputs. This suggests that meta-learning may further enhance the utilization of instructions for effective task transfer. In this paper we investigate whether meta-learning applied to MTIL can further improve generalization to unseen tasks in a zero-shot setting. Specifically, we propose to adapt meta-learning to MTIL in three directions: 1) Model Agnostic Meta Learning (MAML), 2) Hyper-Network (HNet) based adaptation to generate task specific parameters conditioned on instructions, and 3) an approach combining HNet and MAML. Through extensive experiments on the large scale Natural Instructions V2 dataset, we show that our proposed approaches significantly improve over strong baselines in zero-shot settings. In particular, meta-learning improves the effectiveness of instructions and is most impactful when the test tasks are strictly zero-shot (i.e. no similar tasks in the training set) and are “hard” for LMs, illustrating the potential of meta-learning for MTIL for out-of-distribution tasks. @@ -6380,7 +6380,7 @@ XinLiAlibaba Group RuidanHeAlibaba Group LidongBingAlibaba DAMO Academy - ShafiqJotyNanyang Technological University; Salesforce AI Research + ShafiqJotyNanyang Technological University; Salesforce AI Research LuoSiAlibaba Group Inc 6878-6890 Knowledge-enhanced language representation learning has shown promising results across various knowledge-intensive NLP tasks. However, prior methods are limited in efficient utilization of multilingual knowledge graph (KG) data for language model (LM) pretraining. They often train LMs with KGs in indirect ways, relying on extra entity/relation embeddings to facilitate knowledge injection. In this work, we explore methods to make better use of the multilingual annotation and language agnostic property of KG triples, and present novel knowledge based multilingual language models (KMLMs) trained directly on the knowledge triples. We first generate a large amount of multilingual synthetic sentences using the Wikidata KG triples. Then based on the intra- and inter-sentence structures of the generated data, we design pretraining tasks to enable the LMs to not only memorize the factual knowledge but also learn useful logical patterns. Our pretrained KMLMs demonstrate significant performance improvements on a wide range of knowledge-intensive cross-lingual tasks, including named entity recognition (NER), factual knowledge retrieval, relation classification, and a newly designed logical reasoning task. @@ -6392,7 +6392,7 @@ Revisiting Grammatical Error Correction Evaluation and Beyond PeiyuanGongBeijing Institute of Technology XueboLiuHarbin Institute of Technology, Shenzhen - HeyanHuangBeijing Institute of Technology + HeyanHuangBeijing Institute of Technology MinZhangSuda 6891-6902 Pretraining-based (PT-based) automatic evaluation metrics (e.g., BERTScore and BARTScore) have been widely used in several sentence generation tasks (e.g., machine translation and text summarization) due to their better correlation with human judgments over traditional overlap-based methods. Although PT-based methods have become the de facto standard for training grammatical error correction (GEC) systems, GEC evaluation still does not benefit from pretrained knowledge. This paper takes the first step towards understanding and improving GEC evaluation with pretraining. We first find that arbitrarily applying PT-based metrics to GEC evaluation brings unsatisfactory correlation results because of the excessive attention to inessential systems outputs (e.g., unchanged parts). To alleviate the limitation, we propose a novel GEC evaluation metric to achieve the best of both worlds, namely PT-M2 which only uses PT-based metrics to score those corrected parts. Experimental results on the CoNLL14 evaluation task show that PT-M2 significantly outperforms existing methods, achieving a new state-of-the-art result of 0.949 Pearson correlation. Further analysis reveals that PT-M2 is robust to evaluate competitive GEC systems. Source code and scripts are freely available at https://github.com/pygongnlp/PT-M2. @@ -6411,7 +6411,7 @@ YixinLiuYale University LukeBensonYale University WeijinZouYale University - DragomirRadevYale University + DragomirRadevYale University 6903-6917 Unfaithful text generation is a common problem for text generation systems. In the case of Data-to-Text (D2T) systems, the factuality of the generated text is particularly crucial for any real-world applications. We introduce R2D2, a training framework that addresses unfaithful Data-to-Text generation by training a system both as a generator and a faithfulness discriminator with additional replacement detection and unlikelihood learning tasks. To facilitate such training, we propose two methods for sampling unfaithful sentences. We argue that the poor entity retrieval capability of D2T systems is one of the primary sources of unfaithfulness, so in addition to the existing metrics, we further propose named entity based metrics to evaluate the fidelity of D2T generations. Our experimental results show that R2D2 systems could effectively mitigate the unfaithful text generation, and they achieve new state-of-theart results on FeTaQA, LogicNLG, and ToTTo, all with significant improvements. 2022.emnlp-main.464 @@ -6421,7 +6421,7 @@ <fixed-case>IDK</fixed-case>-<fixed-case>MRC</fixed-case>: Unanswerable Questions for <fixed-case>I</fixed-case>ndonesian Machine Reading Comprehension Rifki AfinaPutriKAIST - AliceOhKAIST + AliceOhKAIST 6918-6933 Machine Reading Comprehension (MRC) has become one of the essential tasks in Natural Language Understanding (NLU) as it is often included in several NLU benchmarks (Liang et al., 2020; Wilie et al., 2020). However, most MRC datasets only have answerable question type, overlooking the importance of unanswerable questions. MRC models trained only on answerable questions will select the span that is most likely to be the answer, even when the answer does not actually exist in the given passage (Rajpurkar et al., 2018). This problem especially remains in medium- to low-resource languages like Indonesian. Existing Indonesian MRC datasets (Purwarianti et al., 2007; Clark et al., 2020) are still inadequate because of the small size and limited question types, i.e., they only cover answerable questions. To fill this gap, we build a new Indonesian MRC dataset called I(n)don’tKnow- MRC (IDK-MRC) by combining the automatic and manual unanswerable question generation to minimize the cost of manual dataset construction while maintaining the dataset quality. Combined with the existing answerable questions, IDK-MRC consists of more than 10K questions in total. Our analysis shows that our dataset significantly improves the performance of Indonesian MRC models, showing a large improvement for unanswerable questions. 2022.emnlp-main.465 @@ -6478,7 +6478,7 @@ WenqiangLeiSichuan University WenxuanZhangDAMO Academy, Alibaba Group WaiLamThe Chinese University of Hong Kong - Tat-SengChuaNational University of Singapore + Tat-SengChuaNational University of Singapore 6970-6984 To facilitate conversational question answering (CQA) over hybrid contexts in finance, we present a new dataset, named PACIFIC. Compared with existing CQA datasets, PACIFIC exhibits three key features: (i) proactivity, (ii) numerical reasoning, and (iii) hybrid context of tables and text. A new task is defined accordingly to study Proactive Conversational Question Answering (PCQA), which combines clarification question generation and CQA. In addition, we propose a novel method, namely UniPCQA, to adapt a hybrid format of input and output content in PCQA into the Seq2Seq problem, including the reformulation of the numerical reasoning process as code generation. UniPCQA performs multi-task learning over all sub-tasks in PCQA and incorporates a simple ensemble strategy to alleviate the error propagation issue in the multi-task learning by cross-validating top-k sampled Seq2Seq outputs. We benchmark the PACIFIC dataset with extensive baselines and provide comprehensive evaluations on each sub-task of PCQA. 2022.emnlp-main.469 @@ -6606,7 +6606,7 @@ SrinivasRavishankarIBM Research DaikiKimuraIBM Research AI KeerthiramMurugesanIBM Research - RamónFernandez AstudilloIBM Research + RamónFernandez AstudilloIBM Research TahiraNaseemIBM Research AI PavanKapanipathiIBM Research AlexanderGrayIBM Research @@ -6620,7 +6620,7 @@ <fixed-case>P</fixed-case>ara<fixed-case>T</fixed-case>ag: A Dataset of Paraphrase Tagging for Fine-Grained Labels, <fixed-case>NLG</fixed-case> Evaluation, and Data Augmentation ShuohangWangMicrosoft RuochenXuMicrosoft - YangLiuMicrosoft + YangLiuMicrosoft ChenguangZhuMicrosoft Cognitive Services Research Group MichaelZengMicrosoft Corp 7111-7122 @@ -6734,7 +6734,7 @@ m<fixed-case>PLUG</fixed-case>: Effective and Efficient Vision-Language Learning by Cross-modal Skip-connections ChenliangLiAlibaba Group HaiyangXuAlibaba Damo Academy - JunfengTianAlibaba Group + JunfengTianAlibaba Group WeiWangAlibaba Group MingYanAlibaba Group BinBiAlibaba @@ -6896,9 +6896,9 @@ Does Corpus Quality Really Matter for Low-Resource Languages? MikelArtetxeMeta AI ItziarAldabeHiTZ Center - Ixa, University of the Basque Country (UPV/EHU) - RodrigoAgerriHiTZ Center - Ixa, University of the Basque Country UPV/EHU + RodrigoAgerriHiTZ Center - Ixa, University of the Basque Country UPV/EHU OlatzPerez-de-ViñaspreHiTZ Center - Ixa, University of the Basque Country UPV/EHU - AitorSoroaHiTZ Center - Ixa, University of the Basque Country UPV/EHU + AitorSoroaHiTZ Center - Ixa, University of the Basque Country UPV/EHU 7383-7390 The vast majority of non-English corpora are derived from automatically filtered versions of CommonCrawl. While prior work has identified major issues on the quality of these datasets (Kreutzer et al., 2021), it is not clear how this impacts downstream performance. Taking representation learning in Basque as a case study, we explore tailored crawling (manually identifying and scraping websites with high-quality content) as an alternative to filtering CommonCrawl. Our new corpus, called EusCrawl, is similar in size to the Basque portion of popular multilingual corpora like CC100 and mC4, yet it has a much higher quality according to native annotators. For instance, 66% of documents are rated as high-quality for EusCrawl, in contrast with <33% for both mC4 and CC100. Nevertheless, we obtain similar results on downstream NLU tasks regardless of the corpus used for pre-training. Our work suggests that NLU performance in low-resource languages is not primarily constrained by the quality of the data, and other factors like corpus size and domain coverage can play a more important role. 2022.emnlp-main.499 @@ -6921,7 +6921,7 @@ Does Self-Rationalization Improve Robustness to Spurious Correlations? AlexisRossAllen Institute for Artificial Intelligence - MatthewPetersAllen Institute for Artificial Intelligence + MatthewPetersAllen Institute for Artificial Intelligence AnaMarasovicUniversity of Utah 7403-7416 Rationalization is fundamental to human reasoning and learning. NLP models trained to produce rationales along with predictions, called self-rationalization models, have been investigated for their interpretability and utility to end-users. However, the extent to which training with human-written rationales facilitates learning remains an under-explored question. We ask whether training models to self-rationalize can aid in their learning to solve tasks for the right reasons. Specifically, we evaluate how training self-rationalization models with free-text rationales affects robustness to spurious correlations in fine-tuned encoder-decoder and decoder-only models of six different sizes. We evaluate robustness to spurious correlations by measuring performance on 1) manually annotated challenge datasets and 2) subsets of original test sets where reliance on spurious correlations would fail to produce correct answers. We find that while self-rationalization can improve robustness to spurious correlations in low-resource settings, it tends to hurt robustness in higher-resource settings. Furthermore, these effects depend on model family and size, as well as on rationale content. Together, our results suggest that explainability can come at the cost of robustness; thus, appropriate care should be taken when training self-rationalizing models with the goal of creating more trustworthy models. @@ -6948,7 +6948,7 @@ Subword Evenness (<fixed-case>S</fixed-case>u<fixed-case>E</fixed-case>) as a Predictor of Cross-lingual Transfer to Low-resource Languages OlgaPelloniUniversity of Zurich AnastassiaShaitarovaUniversity of Zurich - TanjaSamardzicUniversity of Zurich + TanjaSamardzicUniversity of Zurich 7428-7445 Pre-trained multilingual models, such as mBERT, XLM-R and mT5, are used to improve the performance on various tasks in low-resource languages via cross-lingual transfer. In this framework, English is usually seen as the most natural choice for a transfer language (for fine-tuning or continued training of a multilingual pre-trained model), but it has been revealed recently that this is often not the best choice. The success of cross-lingual transfer seems to depend on some properties of languages, which are currently hard to explain. Successful transfer often happens between unrelated languages and it often cannot be explained by data-dependent factors.In this study, we show that languages written in non-Latin and non-alphabetic scripts (mostly Asian languages) are the best choices for improving performance on the task of Masked Language Modelling (MLM) in a diverse set of 30 low-resource languages and that the success of the transfer is well predicted by our novel measure of Subword Evenness (SuE). Transferring language models over the languages that score low on our measure results in the lowest average perplexity over target low-resource languages. Our correlation coefficients obtained with three different pre-trained multilingual models are consistently higher than all the other predictors, including text-based measures (type-token ratio, entropy) and linguistically motivated choice (genealogical and typological proximity). 2022.emnlp-main.503 @@ -6984,7 +6984,7 @@ UtsavShuklaThapar Institute of Engineering Technology Husrev TahaSencarQatar Computing Research Institute MohamedNabeelQatar Computing Research Institute, HBKU - PreslavNakovMohamed bin Zayed University of Artificial Intelligence + PreslavNakovMohamed bin Zayed University of Artificial Intelligence 7470-7480 We study the problem of profiling news media on the Web with respect to their factuality of reporting and bias. This is an important but under-studied problem related to disinformation and “fake news” detection, but it addresses the issue at a coarser granularity compared to looking at an individual article or an individual claim. This is useful as it allows to profile entire media outlets in advance. Unlike previous work, which has focused primarily on text (e.g., on the text of the articles published by the target website, or on the textual description in their social media profiles or in Wikipedia), here our main focus is on modeling the similarity between media outlets based on the overlap of their audience. This is motivated by homophily considerations, i.e., the tendency of people to have connections to people with similar interests, which we extend to media, hypothesizing that similar types of media would be read by similar kinds of users. In particular, we propose GREENER (GRaph nEural nEtwork for News mEdia pRofiling), a model that builds a graph of inter-media connections based on their audience overlap, and then uses graph neural networks to represent each medium. We find that such representations are quite useful for predicting the factuality and the bias of news media outlets, yielding improvements over state-of-the-art results reported on two datasets. When augmented with conventionally used representations obtained from news articles, Twitter, YouTube, Facebook, and Wikipedia, prediction accuracy is found to improve by 2.5-27 macro-F1 points for the two tasks. 2022.emnlp-main.506 @@ -7013,7 +7013,7 @@ ChaoqunDuanHarbin Institute of Technology YouzhengWuJD AI Research XiaodongHeJD AI Research - TiejunZhaoHarbin Institute of Technology + TiejunZhaoHarbin Institute of Technology 7494-7507 Question answering requiring discrete reasoning, e.g., arithmetic computing, comparison, and counting, over knowledge is a challenging task.In this paper, we propose UniRPG, a semantic-parsing-based approach advanced in interpretability and scalability, to perform Unified discrete Reasoning over heterogeneous knowledge resources, i.e., table and text, as Program Generation. Concretely, UniRPG consists of a neural programmer and a symbolic program executor,where a program is the composition of a set of pre-defined general atomic and higher-order operations and arguments extracted from table and text.First, the programmer parses a question into a program by generating operations and copying arguments, and then, the executor derives answers from table and text based on the program.To alleviate the costly program annotation issue, we design a distant supervision approach for programmer learning, where pseudo programs are automatically constructed without annotated derivations.Extensive experiments on the TAT-QA dataset show that UniRPG achieves tremendous improvements and enhances interpretability and scalability compared with previous state-of-the-art methods, even without derivation annotation.Moreover, it achieves promising performance on the textual dataset DROP without derivation annotation. 2022.emnlp-main.508 @@ -7048,8 +7048,8 @@ Cross-lingual neural fuzzy matching for exploiting target-language monolingual corpora in computer-aided translation - MiquelEsplà-GomisUniversitat d’Alacant - Víctor M.Sánchez-CartagenaUniversitat d’Alacant + MiquelEsplà-GomisUniversitat d’Alacant + Víctor M.Sánchez-CartagenaUniversitat d’Alacant Juan AntonioPérez-OrtizDepartament de Llenguatges i Sistemes Informàtics, Universitat d’Alacant FelipeSánchez-MartínezUniversitat d’Alacant 7532-7543 @@ -7117,7 +7117,7 @@ Cross-Modal Similarity-Based Curriculum Learning for Image Captioning HongkuanZhangNagoya University SakuSugawaraNational Institute of Informatics - AkikoAizawaNational Institute of Informatics + AkikoAizawaNational Institute of Informatics LeiZhouNagoya University RyoheiSasanoNagoya University KoichiTakedaNagoya University @@ -7132,7 +7132,7 @@ Debiasing Masks: A New Framework for Shortcut Mitigation in <fixed-case>NLU</fixed-case> Johannes MarioMeissnerThe University of Tokyo SakuSugawaraNational Institute of Informatics - AkikoAizawaNational Institute of Informatics + AkikoAizawaNational Institute of Informatics 7607-7613 Debiasing language models from unwanted behaviors in Natural Language Understanding (NLU) tasks is a topic with rapidly increasing interest in the NLP community. Spurious statistical correlations in the data allow models to perform shortcuts and avoid uncovering more advanced and desirable linguistic features.A multitude of effective debiasing approaches has been proposed, but flexibility remains a major issue. For the most part, models must be retrained to find a new set of weights with debiased behavior.We propose a new debiasing method in which we identify debiased pruning masks that can be applied to a finetuned model. This enables the selective and conditional application of debiasing behaviors.We assume that bias is caused by a certain subset of weights in the network; our method is, in essence, a mask search to identify and remove biased weights.Our masks show equivalent or superior performance to the standard counterparts, while offering important benefits.Pruning masks can be stored with high efficiency in memory, and it becomes possible to switch among several debiasing behaviors (or revert back to the original biased model) at inference time. Finally, it opens the doors to further research on how biases are acquired by studying the generated masks. For example, we observed that the early layers and attention heads were pruned more aggressively, possibly hinting towards the location in which biases may be encoded. 2022.emnlp-main.517 @@ -7192,7 +7192,7 @@ HonghaiYuTsinghua University XumingHuSchool of Software, Tsinghua University Shu’angLiSchool of Software, Tsinghua University - LiLinTsinghua University + LiLinTsinghua University FukunMaSchool of Software,Tsinghua University YawenYangSchool of Software, Tsinghua University LijieWenSchool of Software, Tsinghua University @@ -7263,7 +7263,7 @@ Spectral Probing MaxMüller-EbersteinIT University of Copenhagen Robvan der GootIT University of Copenhagen - BarbaraPlankLMU Munich + BarbaraPlankLMU Munich 7730-7741 Linguistic information is encoded at varying timescales (subwords, phrases, etc.) and communicative levels, such as syntax and semantics. Contextualized embeddings have analogously been found to capture these phenomena at distinctive layers and frequencies. Leveraging these findings, we develop a fully learnable frequency filter to identify spectral profiles for any given task. It enables vastly more granular analyses than prior handcrafted filters, and improves on efficiency. After demonstrating the informativeness of spectral probing over manual filters in a monolingual setting, we investigate its multilingual characteristics across seven diverse NLP tasks in six languages. Our analyses identify distinctive spectral profiles which quantify cross-task similarity in a linguistically intuitive manner, while remaining consistent across languages—highlighting their potential as robust, lightweight task descriptors. 2022.emnlp-main.527 @@ -7374,7 +7374,7 @@ XiaohanZhangInstitute of Automation, Chinese Academy of Sciences ShaonanWangNational Laboratory of Pattern Recognition, Institute of Automation, Chinese Academy of Sciences NanLinInstitute of Psychology of the Chinese Academy of Sciences - ChengqingZongInstitute of Automation, Chinese Academy of Sciences + ChengqingZongInstitute of Automation, Chinese Academy of Sciences 7852-7861 Evidence from psycholinguistic studies suggests that the human brain builds a hierarchical syntactic structure during language comprehension. However, it is still unknown whether the neural basis of such structures is universal across languages. In this paper, we first analyze the differences in language structure between two diverse languages: Chinese and English. By computing the working memory requirements when applying parsing strategies to different language structures, we find that top-down parsing generates less memory load for the right-branching English and bottom-up parsing is less memory-demanding for Chinese.Then we use functional magnetic resonance imaging (fMRI) to investigate whether the brain has different syntactic adaptation strategies in processing Chinese and English. Specifically, for both Chinese and English, we extract predictors from the implementations of different parsing strategies, i.e., bottom-up and top-down. Then, these predictors are separately associated with fMRI signals. Results show that for Chinese and English, the brain utilizes bottom-up and top-down parsing strategies separately. These results reveal that the brain adopts parsing strategies with less memory processing load according to different language structures. 2022.emnlp-main.535 @@ -7423,7 +7423,7 @@ <fixed-case>S</fixed-case>ocio<fixed-case>P</fixed-case>robe: What, When, and Where Language Models Learn about Sociodemographics AnneLauscherUniversity of Hamburg FedericoBianchiStanford University - Samuel R.BowmanNew York University + Samuel R.BowmanNew York University DirkHovyBocconi University 7901-7918 Pre-trained language models (PLMs) have outperformed other NLP models on a wide range of tasks. Opting for a more thorough understanding of their capabilities and inner workings, researchers have established the extend to which they capture lower-level knowledge like grammaticality, and mid-level semantic knowledge like factual understanding. However, there is still little understanding of their knowledge of higher-level aspects of language. In particular, despite the importance of sociodemographic aspects in shaping our language, the questions of whether, where, and how PLMs encode these aspects, e.g., gender or age, is still unexplored. We address this research gap by probing the sociodemographic knowledge of different single-GPU PLMs on multiple English data sets via traditional classifier probing and information-theoretic minimum description length probing. Our results show that PLMs do encode these sociodemographics, and that this knowledge is sometimes spread across the layers of some of the tested PLMs. We further conduct a multilingual analysis and investigate the effect of supplementary training to further explore to what extent, where, and with what amount of pre-training data the knowledge is encoded. Our overall results indicate that sociodemographic knowledge is still a major challenge for NLP. PLMs require large amounts of pre-training data to acquire the knowledge and models that excel in general language understanding do not seem to own more knowledge about these aspects. @@ -7446,7 +7446,7 @@ AhmetÜstünUniversity of Groningen AriannaBisazzaUniversity of Groningen GosseBoumaUniversity of Groningen - Gertjanvan NoordUniversity of Groningen + Gertjanvan NoordUniversity of Groningen SebastianRuderGoogle 7934-7949 Massively multilingual models are promising for transfer learning across tasks and languages. However, existing methods are unable to fully leverage training data when it is available in different task-language combinations. To exploit such heterogeneous supervision, we propose Hyper-X, a single hypernetwork that unifies multi-task and multilingual learning with efficient adaptation. It generates weights for adapter modules conditioned on both tasks and language embeddings. By learning to combine task and language-specific knowledge, our model enables zero-shot transfer for unseen languages and task-language combinations. Our experiments on a diverse set of languages demonstrate that Hyper-X achieves the best or competitive gain when a mixture of multiple resources is available, while on par with strong baseline in the standard scenario. Hyper-X is also considerably more efficient in terms of parameters and resources compared to methods that train separate adapters. Finally, Hyper-X consistently produces strong results in few-shot scenarios for new languages, showing the versatility of our approach beyond zero-shot transfer. @@ -7513,8 +7513,8 @@ RobertoDessìFacebook AI Research / Universitat Pompeu Fabra EleonoraGualdoniUniversitat Pompeu Fabra FrancescaFranzonUniversitat Pompeu Fabra - GemmaBoledaUniversitat Pompeu Fabra / ICREA - MarcoBaroniICREA + GemmaBoledaUniversitat Pompeu Fabra / ICREA + MarcoBaroniICREA 7998-8007 We compare the 0-shot performance of a neural caption-based image retriever when given as input either human-produced captions or captions generated by a neural captioner. We conduct this comparison on the recently introduced ImageCoDe data-set (Krojer et al. 2022), which contains hard distractors nearly identical to the images to be retrieved. We find that the neural retriever has much higher performance when fed neural rather than human captions, despite the fact that the former, unlike the latter, were generated without awareness of the distractors that make the task hard. Even more remarkably, when the same neural captions are given to human subjects, their retrieval performance is almost at chance level. Our results thus add to the growing body of evidence that, even when the “language” of neural models resembles English, this superficial resemblance might be deeply misleading. 2022.emnlp-main.546 @@ -7536,7 +7536,7 @@ Bilingual Synchronization: Restoring Translational Relationships with Editing Operations JitaoXuLISN, CNRS, Paris-Saclay University - JosepCregoSYSTRAN + JosepCregoSYSTRAN FrançoisYvonLISN CNRS & Univ. Paris Saclay 8016-8030 Machine Translation (MT) is usually viewed as a one-shot process that generates the target language equivalent of some source text from scratch. We consider here a more general setting which assumes an initial target sequence, that must be transformed into a valid translation of the source, thereby restoring parallelism between source and target. For this bilingual synchronization task, we consider several architectures (both autoregressive and non-autoregressive) and training regimes, and experiment with multiple practical settings such as simulated interactive MT, translating with Translation Memory (TM) and TM cleaning. Our results suggest that one single generic edit-based system, once fine-tuned, can compare with, or even outperform, dedicated systems specifically trained for these tasks. @@ -7572,7 +7572,7 @@ Entity-Focused Dense Passage Retrieval for Outside-Knowledge Visual Question Answering JialinWuUniversity of Texas at Austin - RaymondMooneyUniversity of Texas at Austin + RaymondMooneyUniversity of Texas at Austin 8061-8072 Most Outside-Knowledge Visual Question Answering (OK-VQA) systems employ a two-stage framework that first retrieves external knowledge given the visual question and then predicts the answer based on the retrieved content. However, the retrieved knowledge is often inadequate. Retrievals are frequently too general and fail to cover specific knowledge needed to answer the question. Also, the naturally available supervision (whether the passage contains the correct answer) is weak and does not guarantee question relevancy. To address these issues, we propose an Entity-Focused Retrieval (EnFoRe) model that provides stronger supervision during training and recognizes question-relevant entities to help retrieve more specific knowledge. Experiments show that our EnFoRe model achieves superior retrieval performance on OK-VQA, the currently largest outside-knowledge VQA dataset. We also combine the retrieved knowledge with state-of-the-art VQA models, and achieve a new state-of-the-art performance on OK-VQA. 2022.emnlp-main.551 @@ -7588,7 +7588,7 @@ QiZhangFudan University JingtingYeFudan University MenghanZhangInstitute of Modern Languages and Linguistics, Fudan University - XuanjingHuangFudan University + XuanjingHuangFudan University 8073-8092 Multilingual BERT (mBERT) has demonstrated considerable cross-lingual syntactic ability, whereby it enables effective zero-shot cross-lingual transfer of syntactic knowledge. The transfer is more successful between some languages, but it is not well understood what leads to this variation and whether it fairly reflects difference between languages. In this work, we investigate the distributions of grammatical relations induced from mBERT in the context of 24 typologically different languages. We demonstrate that the distance between the distributions of different languages is highly consistent with the syntactic difference in terms of linguistic formalisms. Such difference learnt via self-supervision plays a crucial role in the zero-shot transfer performance and can be predicted by variation in morphosyntactic properties between languages. These results suggest that mBERT properly encodes languages in a way consistent with linguistic diversity and provide insights into the mechanism of cross-lingual transfer. 2022.emnlp-main.552 @@ -7615,10 +7615,10 @@ Long Text Generation with Topic-aware Discrete Latent Variable Model ErguangYangBeijing Jiaotong University MingtongLiuBeijing Jiaotong University - DeyiXiongTianjin University + DeyiXiongTianjin University YujieZhangBeijing Jiaotong University YufengChenBeijing Jiaotong University - JinanXuBeijing Jiaotong University + JinanXuBeijing Jiaotong University 8100-8107 Generating coherent long texts is an important yet challenging task, particularly forthe open-ended generation. Prior work based on discrete latent codes focuses on the modeling of discourse relation, resulting in discrete codes only learning shallow semantics (Ji and Huang, 2021). A natural text always revolves around several related topics and the transition across them is natural and smooth.In this work, we investigate whether discrete latent codes can learn information of topics. To this end, we build a topic-aware latent code-guided text generation model. To encourage discrete codes to model information about topics, we propose a span-level bag-of-words training objective for the model. Automatic and manual evaluation experiments show that our method can generate more topic-relevant and coherent texts. 2022.emnlp-main.554 @@ -7630,10 +7630,10 @@ YihengShuNanjing University ZhiweiYuMicrosoft Research Asia YuhanLiNankai University - Börje F.KarlssonMicrosoft Research Asia + Börje F.KarlssonMicrosoft Research Asia TingtingMaHarbin Institute of Technology YuzhongQuNanjing University - Chin-YewLinMicrosoft Research + Chin-YewLinMicrosoft Research 8108-8121 Pre-trained language models (PLMs) have shown their effectiveness in multiple scenarios. However, KBQA remains challenging, especially regarding coverage and generalization settings. This is due to two main factors: i) understanding the semantics of both questions and relevant knowledge from the KB; ii) generating executable logical forms with both semantic and syntactic correctness. In this paper, we present a new KBQA model, TIARA, which addresses those issues by applying multi-grained retrieval to help the PLM focus on the most relevant KB context, viz., entities, exemplary logical forms, and schema items. Moreover, constrained decoding is used to control the output space and reduce generation errors. Experiments over important benchmarks demonstrate the effectiveness of our approach. TIARA outperforms previous SOTA, including those using PLMs or oracle entity annotations, by at least 4.1 and 1.1 F1 points on GrailQA and WebQuestionsSP, respectively. Specifically on GrailQA, TIARA outperforms previous models in all categories, with an improvement of 4.7 F1 points in zero-shot generalization. 2022.emnlp-main.555 @@ -7646,9 +7646,9 @@ BinWangNanjing University JiangzhouJuNanjing University YangFanNanjing University - XinyuDaiNational Key Laboratory for Novel Software Technology, Nanjing University + XinyuDaiNational Key Laboratory for Novel Software Technology, Nanjing University ShujianHuangNational Key Laboratory for Novel Software Technology, Nanjing University - JiajunChenNanjing University + JiajunChenNanjing University 8122-8132 As one of the challenging NLP tasks, designing math word problem (MWP) solvers has attracted increasing research attention for the past few years. In previous work, models designed by taking into account the properties of the binary tree structure of mathematical expressions at the output side have achieved better performance. However, the expressions corresponding to a MWP are often diverse (e.g., n_1+n_2 \times n_3-n_4, n_3\times n_2-n_4+n_1, etc.), and so are the corresponding binary trees, which creates difficulties in model learning due to the non-deterministic output space. In this paper, we propose the Structure-Unified M-Tree Coding Solver (SUMC-Solver), which applies a tree with any M branches (M-tree) to unify the output structures. To learn the M-tree, we use a mapping to convert the M-tree into the M-tree codes, where codes store the information of the paths from tree root to leaf nodes and the information of leaf nodes themselves, and then devise a Sequence-to-Code (seq2code) model to generate the codes. Experimental results on the widely used MAWPS and Math23K datasets have demonstrated that SUMC-Solver not only outperforms several state-of-the-art models under similar experimental settings but also performs much better under low-resource conditions. 2022.emnlp-main.556 @@ -7799,7 +7799,7 @@ BenjaminDayanETH Zurich RyanCotterellETH Zürich TimVieiraJohns Hopkins University - JasonEisnerJohns Hopkins University + JasonEisnerJohns Hopkins University 8289-8305 Weighted finite-state automata (WSFAs) arecommonly used in NLP. Failure transitions area useful extension for compactly representingbackoffs or interpolation in n-gram modelsand CRFs, which are special cases of WFSAs.Unfortunately, applying standard algorithmsfor computing the pathsum requires expand-ing these compact failure transitions. As aresult, na ̈ıve computation of the pathsum inacyclic WFSAs with failure transitions runs inO(|Q|2|Σ|) (O(|Q||Σ|) for deterministic WF-SAs) while the equivalent algorithm in normalWFSAs runs in O(|E|), where E representsthe set of transitions, Q the set of states, andΣ the alphabet. In this work, we present moreefficient algorithms for computing the pathsumin sparse acyclic WFSAs, i.e., WFSAs with av-erage out symbol fraction s ≪ 1. In those,backward runs in O(s|Q||Σ|). We proposean algorithm for semiring-weighted automatawhich runs in O(|E| + s|Σ||Q||Tmax| log |Σ|),where |Tmax| is the size of the largest con-nected component of failure transitions. Ad-ditionally, we propose faster algorithms fortwo specific cases. For ring-weighted WF-SAs we propose an algorithm with complex-ity O(|E| + s|Σ||Q||πmax|), where |πmax| de-notes the longest path length of failure transi-tions stemming from q and Σ(q) the set of sym-bols on the outgoing transitions from q. Forsemiring-weighted WFSAs whose failure tran-sition topology satisfies a condition exemplifiedby CRFs, we propose an algorithm with com-plexity O(|E| + s|Σ||Q| log |Σ|). 2022.emnlp-main.567 @@ -7829,7 +7829,7 @@ RuiZhengFudan University TaoGuifudan university QiZhangFudan University - XuanjingHuangFudan University + XuanjingHuangFudan University 8318-8331 Adversarial training is one of the most powerful methods to improve the robustness of pre-trained language models (PLMs). However, this approach is typically more expensive than traditional fine-tuning because of the necessity to generate adversarial examples via gradient descent. Delving into the optimization process of adversarial training, we find that robust connectivity patterns emerge in the early training phase (typically 0.15~0.3 epochs), far before parameters converge. Inspired by this finding, we dig out robust early-bird tickets (i.e., subnetworks) to develop an efficient adversarial training method: (1) searching for robust tickets with structured sparsity in the early stage; (2) fine-tuning robust tickets in the remaining time. To extract the robust tickets as early as possible, we design a ticket convergence metric to automatically terminate the searching process. Experiments show that the proposed efficient adversarial training method can achieve up to 7\times \sim 13 \times training speedups while maintaining comparable or even better robustness compared to the most competitive state-of-the-art adversarial training methods. 2022.emnlp-main.569 @@ -7857,8 +7857,8 @@ VassilinaNikoulinaNaver Labs Europe AlexandreBerardNaver Labs Europe CarolineBrunNaver Labs Europe - JamesHendersonIdiap Research Institute - LaurentBesacierNaver Labs Europe + JamesHendersonIdiap Research Institute + LaurentBesacierNaver Labs Europe 8348-8359 In recent years, multilingual machine translation models have achieved promising performance on low-resource language pairs by sharing information between similar languages, thus enabling zero-shot translation. To overcome the “curse of multilinguality”, these models often opt for scaling up the number of parameters, which makes their use in resource-constrained environments challenging. We introduce SMaLL-100, a distilled version of the M2M-100(12B) model, a massively multilingual machine translation model covering 100 languages. We train SMaLL-100 with uniform sampling across all language pairs and therefore focus on preserving the performance of low-resource languages. We evaluate SMaLL-100 on different low-resource benchmarks: FLORES-101, Tatoeba, and TICO-19 and demonstrate that it outperforms previous massively multilingual models of comparable sizes (200-600M) while improving inference latency and memory usage. Additionally, our model achieves comparable results to M2M-100 (1.2B), while being 3.6x smaller and 4.3x faster at inference. 2022.emnlp-main.571 @@ -7877,7 +7877,7 @@ YongDingHonor Device Co., Ltd YiboCheungHonor Device Co., Ltd QiZhangFudan University - XuanjingHuangFudan University + XuanjingHuangFudan University 8360-8371 Recently, more and more pre-trained language models are released as a cloud service. It allows users who lack computing resources to perform inference with a powerful model by uploading data to the cloud. The plain text may contain private information, as the result, users prefer to do partial computations locally and upload intermediate representations to the cloud for subsequent inference.However, recent studies have shown that intermediate representations can also be recovered to plain text with reasonable accuracy, thus the risk of privacy leakage still exists. To address this issue, we propose TextFusion, a novel method for preserving inference privacy.Specifically, we train a Fusion Predictor to dynamically fuse token representations, which hides multiple private token representations behind an unrecognizable one.Furthermore, an adversarial training regime is employed to privatize these representations. In this way, the cloud only receives incomplete and perturbed representations, making it difficult to accurately recover the complete plain text.The experimental results on diverse classification tasks show that our approach can effectively preserve inference privacy without significantly sacrificing performance in different scenarios. 2022.emnlp-main.572 @@ -7978,7 +7978,7 @@ Revisiting <fixed-case>D</fixed-case>oc<fixed-case>RED</fixed-case> - Addressing the False Negative Problem in Relation Extraction QingyuTanNational University of Singapore - LuXuSingapore University of Technology and Design + LuXuSingapore University of Technology and Design LidongBingAlibaba DAMO Academy Hwee TouNgNational University of Singapore Sharifah MahaniAljuniedAlibaba @@ -7992,8 +7992,8 @@ Towards Summary Candidates Fusion MathieuRavautNanyang Technological University - ShafiqJotyNanyang Technological University; Salesforce AI Research - NancyChenInstitute for Infocomm Research, A*STAR + ShafiqJotyNanyang Technological University; Salesforce AI Research + NancyChenInstitute for Infocomm Research, A*STAR 8488-8504 Sequence-to-sequence deep neural models fine-tuned for abstractive summarization can achieve great performance on datasets with enough human annotations. Yet, it has been shown that they have not reached their full potential, with a wide gap between the top beam search output and the oracle beam. Recently, re-ranking methods have been proposed, to learn to select a better summary candidate. However, such methods are limited by the summary quality aspects captured by the first-stage candidates. To bypass this limitation, we propose a new paradigm in second-stage abstractive summarization called SummaFusion that fuses several summary candidates to produce a novel abstractive second-stage summary. Our method works well on several summarization datasets, improving both the ROUGE scores and qualitative properties of fused summaries. It is especially good when the candidates to fuse are worse, such as in the few-shot setup where we set a new state-of-the art. We will make our code and checkpoints available at https://github.com/ntunlp/SummaFusion/. 2022.emnlp-main.581 @@ -8014,7 +8014,7 @@ <fixed-case>T</fixed-case>ran<fixed-case>SHER</fixed-case>: Translating Knowledge Graph Embedding with Hyper-Ellipsoidal Restriction YizhiLiUniversity of Sheffield; Pingan Technology - WeiFanUniversity of Central Florida + WeiFanUniversity of Central Florida ChaoLiuPingan Technology ChenghuaLinDepartment of Computer Science, University of Sheffield JiangQianPingan Technology @@ -8122,7 +8122,7 @@ ChrysoulaZervaInstituto de Telecomunicações, Instituto Superior Técnico, University of Lisbon TaisiyaGlushkovaInstituto de Telecomunicações, Instituto Superior Técnico, University of Lisbon RicardoReiUnbabel/INESC-ID - André F. T.MartinsUnbabel, Instituto de Telecomunicacoes + André F. T.MartinsUnbabel, Instituto de Telecomunicacoes 8622-8641 Trainable evaluation metrics for machine translation (MT) exhibit strong correlation with human judgements, but they are often hard to interpret and might produce unreliable scores under noisy or out-of-domain data. Recent work has attempted to mitigate this with simple uncertainty quantification techniques (Monte Carlo dropout and deep ensembles), however these techniques (as we show) are limited in several ways – for example, they are unable to distinguish between different kinds of uncertainty, and they are time and memory consuming. In this paper, we propose more powerful and efficient uncertainty predictors for MT evaluation, and we assess their ability to target different sources of aleatoric and epistemic uncertainty. To this end, we develop and compare training objectives for the COMET metric to enhance it with an uncertainty prediction output, including heteroscedastic regression, divergence minimization, and direct uncertainty prediction.Our experiments show improved results on uncertainty prediction for the WMT metrics task datasets, with a substantial reduction in computational costs. Moreover, they demonstrate the ability of these predictors to address specific uncertainty causes in MT evaluation, such as low quality references and out-of-domain data. 2022.emnlp-main.591 @@ -8174,7 +8174,7 @@ Measuring the Mixing of Contextual Information in the Transformer JavierFerrandoUPC Gerard I.GállegoUniversitat Politècnica de Catalunya - Marta R.Costa-jussàMeta AI + Marta R.Costa-jussàMeta AI 8698-8714 The Transformer architecture aggregates input information through the self-attention mechanism, but there is no clear understanding of how this information is mixed across the entire model. Additionally, recent works have demonstrated that attention weights alone are not enough to describe the flow of information. In this paper, we consider the whole attention block –multi-head attention, residual connection, and layer normalization– and define a metric to measure token-to-token interactions within each layer. Then, we aggregate layer-wise interpretations to provide input attribution scores for model predictions. Experimentally, we show that our method, ALTI (Aggregation of Layer-wise Token-to-token Interactions), provides more faithful explanations and increased robustness than gradient-based methods. 2022.emnlp-main.595 @@ -8185,7 +8185,7 @@ Dealing with Abbreviations in the <fixed-case>S</fixed-case>lovenian Biographical Lexicon AngelDazaVrije Universiteit Amsterdam - Computational Linguistics & Text Mining Lab AntskeFokkensVU Amsterdam - TomažErjavecDept. of Knowledge Technologies, Jožef Stefan Institute + TomažErjavecDept. of Knowledge Technologies, Jožef Stefan Institute 8715-8720 Abbreviations present a significant challenge for NLP systems because they cause tokenization and out-of-vocabulary errors. They can also make the text less readable, especially in reference printed books, where they are extensively used. Abbreviations are especially problematic in low-resource settings, where systems are less robust to begin with. In this paper, we propose a new method for addressing the problems caused by a high density of domain-specific abbreviations in a text. We apply this method to the case of a Slovenian biographical lexicon and evaluate it on a newly developed gold-standard dataset of 51 Slovenian biographies. Our abbreviation identification method performs significantly better than commonly used ad-hoc solutions, especially at identifying unseen abbreviations. We also propose and present the results of a method for expanding the identified abbreviations in context. 2022.emnlp-main.596 @@ -8223,7 +8223,7 @@ Gerard I.GállegoUniversitat Politècnica de Catalunya BelenAlastrueyUniversitat Politècnica de Catalunya CarlosEscolanoUniversitat Politècnica de Catalunya - Marta R.Costa-jussàMeta AI + Marta R.Costa-jussàMeta AI 8756-8769 In Neural Machine Translation (NMT), each token prediction is conditioned on the source sentence and the target prefix (what has been previously translated at a decoding step). However, previous work on interpretability in NMT has mainly focused solely on source sentence tokens’ attributions. Therefore, we lack a full understanding of the influences of every input token (source sentence and target prefix) in the model predictions. In this work, we propose an interpretability method that tracks input tokens’ attributions for both contexts. Our method, which can be extended to any encoder-decoder Transformer-based model, allows us to better comprehend the inner workings of current NMT models. We apply the proposed method to both bilingual and multilingual Transformers and present insights into their behaviour. 2022.emnlp-main.599 @@ -8237,7 +8237,7 @@ ShymaAlhuwaiderKing Abdullah University of Science and Technology FeifanLiUniversity of Southern California XiangliangZhangUniversity of Notre Dame - KennethChurchNortheastern University + KennethChurchNortheastern University MohamedElhoseinyKAUST 8770-8785 This paper introduces ArtELingo, a new benchmark and dataset, designed to encourage work on diversity across languages and cultures. Following ArtEmis, a collection of 80k artworks from WikiArt with 0.45M emotion labels and English-only captions, ArtELingo adds another 0.79M annotations in Arabic and Chinese, plus 4.8K in Spanish to evaluate “cultural-transfer” performance. 51K artworks have 5 annotations or more in 3 languages. This diversity makes it possible to study similarities and differences across languages and cultures. Further, we investigate captioning tasks, and find diversity improves the performance of baseline models. ArtELingo is publicly available at ‘www.artelingo.org‘ with standard splits and baseline models. We hope our work will help ease future research on multilinguality and culturally-aware AI. @@ -8311,7 +8311,7 @@ YeLiuSalesforce SemihYavuzSalesforce Research RuiMengSalesforce Research - DragomirRadevYale University + DragomirRadevYale University CaimingXiongMetamind YingboZhouSalesforce Research 8858-8869 @@ -8404,9 +8404,9 @@ A Major Obstacle for <fixed-case>NLP</fixed-case> Research: Let’s Talk about Time Allocation! - KatharinaKannUniversity of Colorado Boulder + KatharinaKannUniversity of Colorado Boulder ShiranDudyUniversity of Colorado - Arya D.McCarthyJohns Hopkins University + Arya D.McCarthyJohns Hopkins University 8959-8969 The field of natural language processing (NLP) has grown over the last few years: conferences have become larger, we have published an incredible amount of papers, and state-of-the-art research has been implemented in a large variety of customer-facing products. However, this paper argues that we have been less successful than we *should* have been and reflects on where and how the field fails to tap its full potential. Specifically, we demonstrate that, in recent years, **subpar time allocation has been a major obstacle for NLP research**. We outline multiple concrete problems together with their negative consequences and, importantly, suggest remedies to improve the status quo. We hope that this paper will be a starting point for discussions around which common practices are – or are *not* – beneficial for NLP research. 2022.emnlp-main.612 @@ -8442,7 +8442,7 @@ LinyongNanYale University ZhentingQiZhejiang University RuiZhangPenn State University - DragomirRadevYale University + DragomirRadevYale University 9006-9018 Reasoning over tabular data requires both table structure understanding and a broad set of table reasoning skills. Current models with table-specific architectures and pre-training methods perform well on understanding table structures, but they still struggle with tasks that require various table reasoning skills. In this work, we develop ReasTAP to show that high-level table reasoning skills can be injected into models during pre-training without a complex table-specific architecture design. We define 7 table reasoning skills, such as numerical operation, temporal comparison, and conjunction. Each reasoning skill is associated with one example generator, which synthesizes questions over semi-structured tables according to the sampled templates. We model the table pre-training task as a sequence generation task and pre-train ReasTAP to generate precise answers of the synthetic examples. ReasTAP is evaluated on four benchmarks covering three downstream tasks including 1) WikiSQL-Weak and WikiTQ for Table Question Answering, 2) TabFact for Table Fact Verification, and 3) LogicNLG for Faithful Table-to-Text Generation. Experimental results demonstrate that ReasTAP achieves new state-of-the-art results on all of them and delivers a significant improvement under low-resource setting. Our code is publicly available at https://github.com/Yale-LILY/ReasTAP. 2022.emnlp-main.615 @@ -8451,7 +8451,7 @@ Few-shot Learning with Multilingual Generative Language Models - Xi VictoriaLinMeta AI + Xi VictoriaLinMeta AI TodorMihaylovMeta AI MikelArtetxeMeta AI TianluWangMeta @@ -8467,9 +8467,9 @@ VishravChaudharyMicrosoft BrianO’HoroMeta AI JeffWangMeta AI - LukeZettlemoyerUniversity of Washington; Meta + LukeZettlemoyerUniversity of Washington; Meta ZornitsaKozarevaMeta AI - MonaDiabMeta Responsible AI + MonaDiabMeta Responsible AI VeselinStoyanovFacebook XianLiMeta AI 9019-9052 @@ -8493,7 +8493,7 @@ Detecting Label Errors by Using Pre-Trained Language Models DerekChongStanford University JennyHongStanford University - ChristopherManningStanford University + ChristopherManningStanford University 9074-9091 We show that large pre-trained language models are inherently highly capable of identifying label errors in natural language datasets: simply examining out-of-sample data points in descending order of fine-tuned task loss significantly outperforms more complex error-detection mechanisms proposed in previous work. To this end, we contribute a novel method for introducing realistic, human-originated label noise into existing crowdsourced datasets such as SNLI and TweetNLP. We show that this noise has similar properties to real, hand-verified label errors, and is harder to detect than existing synthetic noise, creating challenges for model robustness.We argue that human-originated noise is a better standard for evaluation than synthetic noise. Finally, we use crowdsourced verification to evaluate the detection of real errors on IMDB, Amazon Reviews, and Recon, and confirm that pre-trained models perform at a 9–36% higher absolute Area Under the Precision-Recall Curve than existing models. 2022.emnlp-main.618 @@ -8555,7 +8555,7 @@ Improving Factual Consistency in Summarization with Compression-Based Post-Editing AlexFabbriSalesforce AI Research - Prafulla KumarChoubeySalesforce AI Research + Prafulla KumarChoubeySalesforce AI Research JesseVigSalesforce Research Chien-ShengWuSalesforce CaimingXiongMetamind @@ -8602,7 +8602,7 @@ YichengHeColumbia University WenhaoLiColumbia University Kai-WeiChangUCLA - Shih-FuChangColumbia University + Shih-FuChangColumbia University 9212-9224 Visual commonsense understanding requires Vision Language (VL) models to not only understand image and text but also cross-reference in-between to fully integrate and achieve comprehension of the visual scene described. Recently, various approaches have been developed and have achieved high performance on visual commonsense benchmarks. However, it is unclear whether the models really understand the visual scene and underlying commonsense knowledge due to limited evaluation data resources. To provide an in-depth analysis, we present a Multimodal Evaluation (ME) pipeline to automatically generate question-answer pairs to test models’ understanding of the visual scene, text, and related knowledge. We then take a step further to show that training with the ME data boosts the model’s performance in standard VCR evaluation. Lastly, our in-depth analysis and comparison reveal interesting findings: (1) semantically low-level information can assist the learning of high-level information but not the opposite; (2) visual information is generally under utilization compared with text. 2022.emnlp-main.626 @@ -8649,7 +8649,7 @@ Overcoming Catastrophic Forgetting in Zero-Shot Cross-Lingual Generation - TuVuUniversity of Massachusetts Amherst + TuVuUniversity of Massachusetts Amherst AdityaBaruaGoogle BrianLesterGoogle DanielCerGoogle Research; University of California at Berkeley @@ -8728,7 +8728,7 @@ MahdiNamazifarAmazon Alexa AI MohitBansalUniversity of North Carolina at Chapel Hill JesseThomasonUniversity of Southern California - DilekHakkani-TurAmazon Alexa AI + DilekHakkani-TurAmazon Alexa AI 9369-9378 Embodied Vision and Language Task Completion requires an embodied agent to interpret natural language instructions and egocentric visual observations to navigate through and interact with environments. In this work, we examine ALFRED, a challenging benchmark for embodied task completion, with the goal of gaining insight into how effectively models utilize language. We find evidence that sequence-to-sequence and transformer-based models trained on this benchmark are not sufficiently sensitive to changes in input language instructions. Next, we construct a new test split – ALFRED-L to test whether ALFRED models can generalize to task structures not seen during training that intuitively require the same types of language understanding required in ALFRED. Evaluation of existing models on ALFRED-L suggests that (a) models are overly reliant on the sequence in which objects are visited in typical ALFRED trajectories and fail to adapt to modifications of this sequence and (b) models trained with additional augmented trajectories are able to adapt relatively better to such changes in input language instructions. 2022.emnlp-main.636 @@ -8740,7 +8740,7 @@ Dungeons and Dragons as a Dialog Challenge for Artificial Intelligence ChrisCallison-BurchUniversity of Pennsylvania - Gaurav SinghTomarGoogle Research + Gaurav SinghTomarGoogle Research Lara J.MartinUniversity of Pennsylvania DaphneIppolitoUniversity of Pennsylvania SumaBailisGoogle Research @@ -8775,7 +8775,7 @@ ChristopherThomasColumbia University HammadAyyubiColumbia University HengJiUniversity of Illinois at Urbana-Champaign and Amazon (Amazon Scholar) - Shih-FuChangColumbia University + Shih-FuChangColumbia University 9402-9413 Given a long untrimmed video and natural language queries, video grounding (VG) aims to temporally localize the semantically-aligned video segments. Almost all existing VG work holds two simple but unrealistic assumptions: 1) All query sentences can be grounded in the corresponding video. 2) All query sentences for the same video are always at the same semantic scale. Unfortunately, both assumptions make today’s VG models fail to work in practice. For example, in real-world multimodal assets (eg, news articles), most of the sentences in the article can not be grounded in their affiliated videos, and they typically have rich hierarchical relations (ie, at different semantic scales). To this end, we propose a new challenging grounding task: Weakly-Supervised temporal Article Grounding (WSAG). Specifically, given an article and a relevant video, WSAG aims to localize all “groundable” sentences to the video, and these sentences are possibly at different semantic scales. Accordingly, we collect the first WSAG dataset to facilitate this task: YouwikiHow, which borrows the inherent multi-scale descriptions in wikiHow articles and plentiful YouTube videos. In addition, we propose a simple but effective method DualMIL for WSAG, which consists of a two-level MIL loss and a single-/cross- sentence constraint loss. These training objectives are carefully designed for these relaxed assumptions. Extensive ablations have verified the effectiveness of DualMIL. 2022.emnlp-main.639 @@ -8786,7 +8786,7 @@ Exploring Dual Encoder Architectures for Question Answering ZheDongGoogle Inc JianmoNiGoogle - DanBikelMeta + DanBikelMeta EnriqueAlfonsecaGoogle YuanWangGoogle ChenQuGoogle @@ -8836,7 +8836,7 @@ Towards Teachable Reasoning Systems: Using a Dynamic Memory of User Feedback for Continual System Improvement - BhavanaDalvi MishraAllen Institute for Artificial Intelligence + BhavanaDalvi MishraAllen Institute for Artificial Intelligence OyvindTafjordAI2 PeterClarkAllen Institute for Artificial Intelligence 9465-9480 @@ -8849,7 +8849,7 @@ Knowledge Transfer from Answer Ranking to Answer Generation MatteoGabburoUniversity of Trento - RikKoncel-KedziorskiAmazon + RikKoncel-KedziorskiAmazon SiddhantGargAmazon Alexa AI LucaSoldainiAllen Institute for AI AlessandroMoschittiAmazon @@ -9055,7 +9055,7 @@ Leveraging <fixed-case>QA</fixed-case> Datasets to Improve Generative Data Augmentation DheerajMekalaUniversity of California San Diego - TuVuUniversity of Massachusetts Amherst + TuVuUniversity of Massachusetts Amherst TimoSchickMeta AI JingboShangUniversity of California, San Diego 9737-9750 @@ -9095,7 +9095,7 @@ MengCaoMcGill University YueDongMcGill University JingyiHeMcGill University - Jackie Chi KitCheungMila / McGill University + Jackie Chi KitCheungMila / McGill University 9768-9780 State-of-the-art abstractive summarization systems frequently hallucinate content that is not supported by the source document, mainly due to noise in the training dataset.Existing methods opt to drop the noisy samples or tokens from the training set entirely, reducing the effective training set size and creating an artificial propensity to copy words from the source. In this work, we propose a training objective for abstractive summarization based on rejection learning, in which the model learns whether or not to reject potentially noisy tokens. We further propose a regularized decoding objective that penalizes non-factual candidate summaries during inference by using the rejection probability learned during training.We show that our method considerably improves the factuality of generated summaries in automatic and human evaluations when compared to five baseline models, and that it does so while increasing the abstractiveness of the generated summaries. 2022.emnlp-main.663 @@ -9141,7 +9141,7 @@ Correcting Diverse Factual Errors in Abstractive Summarization via Post-Editing and Language Model Infilling VidhishaBalachandranCarnegie Mellon University HannanehHajishirziUniversity of Washington - WilliamCohenGoogle AI + WilliamCohenGoogle AI YuliaTsvetkovUniversity of Washington 9818-9830 Abstractive summarization models often generate inconsistent summaries containing factual errors or hallucinated content. Recent works focus on correcting factual errors in generated summaries via post-editing. Such correction models are trained using adversarial non-factual summaries constructed using heuristic rules for injecting errors. However, generating non-factual summaries using heuristics often does not generalize well to actual model errors. In this work, we propose to generate hard, representative synthetic examples of non-factual summaries through infilling language models. With this data, we train a more robust fact-correction model to post-edit the summaries to improve factual consistency. Through quantitative and qualitative experiments on two popular summarization datasets— CNN/DM and XSum—we show that our approach vastly outperforms prior methods in correcting erroneous summaries. Our model—FactEdit—improves factuality scores by over ~11 points on CNN/DM and over ~31 points on XSum on average across multiple summarization models, producing more factual summaries while maintaining competitive summarization quality. @@ -9171,7 +9171,7 @@ JiMaGoogle VincentZhaoGoogle YiLuanGoogle - KeithHallGoogle Research + KeithHallGoogle Research Ming-WeiChangGoogle Research YinfeiYangGoogle 9844-9855 @@ -9202,7 +9202,7 @@ ShengqiongWuWuhan University FangfangSuSchool of National Cybersecurity, Key Laboratory of Aerospace Information Security and Trusted Computing, Ministry of Education,School of Computer Science, Wuhan University WenxuanShiWuhan University - DonghongJiWuhan University + DonghongJiWuhan University BoCaiWuhan University 9871-9881 Relation Extraction (RE) is a fundamental task of information extraction, which has attracted a large amount of research attention. Previous studies focus on extracting the relations within a sentence or document, while currently researchers begin to explore cross-document RE. However, current cross-document RE methods directly utilize text snippets surrounding target entities in multiple given documents, which brings considerable noisy and non-relevant sentences. Moreover, they utilize all the text paths in a document bag in a coarse-grained way, without considering the connections between these text paths.In this paper, we aim to address both of these shortages and push the state-of-the-art for cross-document RE. First, we focus on input construction for our RE model and propose an entity-based document-context filter to retain useful information in the given documents by using the bridge entities in the text paths. Second, we propose a cross-document RE model based on cross-path entity relation attention, which allow the entity relations across text paths to interact with each other. We compare our cross-document RE method with the state-of-the-art methods in the dataset CodRED. Our method outperforms them by at least 10% in F1, thus demonstrating its effectiveness. @@ -9244,7 +9244,7 @@ Polyglot Prompt: Multilingual Multitask Prompt Training JinlanFuNational University of Singapore - See-KiongNgNational University of Singapore + See-KiongNgNational University of Singapore PengfeiLiuCarnegie Mellon University 9919-9935 This paper aims for a potential architectural improvement for multilingual learning and asks: Can different tasks from different languages be modeled in a monolithic framework, i.e. without any task/language-specific module? The benefit of achieving this could open new doors for future multilingual research, including allowing systems trained on low resources to be further assisted by other languages as well as other tasks. We approach this goal by developing a learning framework named Polyglot Prompting to exploit prompting methods for learning a unified semantic space for different languages and tasks with multilingual prompt engineering. We performed a comprehensive evaluation of 6 tasks, namely topic classification, sentiment classification, named entity recognition, question answering, natural language inference, and summarization, covering 24 datasets and 49 languages. The experimental results demonstrated the efficacy of multilingual multitask prompt-based learning and led to inspiring observations. We also present an interpretable multilingual evaluation methodology and show how the proposed framework, multilingual multitask prompt training, works. We release all datasets prompted in the best setting and code. @@ -9282,7 +9282,7 @@ ZhilingZhangShanghai Jiao Tong University SiyuanChenShanghai Jiao Tong University MengyueWuShanghai Jiao Tong University - KennyZhuShanghai Jiao Tong University + KennyZhuShanghai Jiao Tong University 9970-9985 Mental disease detection (MDD) from social media has suffered from poor generalizability and interpretability, due to lack of symptom modeling. This paper introduces PsySym, the first annotated symptom identification corpus of multiple psychiatric disorders, to facilitate further research progress. PsySym is annotated according to a knowledge graph of the 38 symptom classes related to 7 mental diseases complied from established clinical manuals and scales, and a novel annotation framework for diversity and quality. Experiments show that symptom-assisted MDD enabled by PsySym can outperform strong pure-text baselines. We also exhibit the convincing MDD explanations provided by symptom predictions with case studies, and point to their further potential applications. 2022.emnlp-main.677 @@ -9310,8 +9310,8 @@ HannahRashkinGoogle Research DavidReitterGoogle Research HannanehHajishirziUniversity of Washington - MariOstendorfUniversity of Washington - Gaurav SinghTomarGoogle Research + MariOstendorfUniversity of Washington + Gaurav SinghTomarGoogle Research 10000-10014 Compared to standard retrieval tasks, passage retrieval for conversational question answering (CQA) poses new challenges in understanding the current user question, as each question needs to be interpreted within the dialogue context. Moreover, it can be expensive to re-train well-established retrievers such as search engines that are originally developed for non-conversational queries. To facilitate their use, we develop a query rewriting model CONQRR that rewrites a conversational question in the context into a standalone question. It is trained with a novel reward function to directly optimize towards retrieval using reinforcement learning and can be adapted to any off-the-shelf retriever. CONQRR achieves state-of-the-art results on a recent open-domain CQA dataset containing conversations from three different sources, and is effective for two different off-the-shelf retrievers. Our extensive analysis also shows the robustness of CONQRR to out-of-domain dialogues as well as to zero query rewriting supervision. 2022.emnlp-main.679 @@ -9403,7 +9403,7 @@ Adaptive Contrastive Learning on Multimodal Transformer for Review Helpfulness Prediction ThongNguyenNational University of Singapore XiaobaoWuNanyang Technological University - Anh TuanLuuNanyang Technological University, Singapore + Anh TuanLuuNanyang Technological University, Singapore ZhenHaiAlibaba Group LidongBingAlibaba DAMO Academy 10085-10096 @@ -9420,7 +9420,7 @@ JiuyiLiDalian University of Technology HuanLiuDalian University of Technology JinsongSuXiamen university - DegenHuangDalian University of Technology + DegenHuangDalian University of Technology 10097-10113 Multilingual neural machine translation aims to translate multiple language pairs in a single model and has shown great success thanks to the knowledge transfer across languages with the shared parameters. Despite promising, this share-all paradigm suffers from insufficient ability to capture language-specific features. Currently, the common practice is to insert or search language-specific networks to balance the shared and specific features. However, those two types of features are not sufficient enough to model the complex commonality and divergence across languages, such as the locally shared features among similar languages, which leads to sub-optimal transfer, especially in massively multilingual translation. In this paper, we propose a novel token-level feature mixing method that enables the model to capture different features and dynamically determine the feature sharing across languages. Based on the observation that the tokens in the multilingual model are usually shared by different languages, we we insert a feature mixing layer into each Transformer sublayer and model each token representation as a mix of different features, with a proportion indicating its feature preference. In this way, we can perform fine-grained feature sharing and achieve better multilingual transfer. Experimental results on multilingual datasets show that our method outperforms various strong baselines and can be extended to zero-shot translation. Further analyses reveal that our method can capture different linguistic features and bridge the representation gap across languages. 2022.emnlp-main.687 @@ -9467,7 +9467,7 @@ Two is Better than Many? Binary Classification as an Effective Approach to Multi-Choice Question Answering DeepanwayGhosalSingapore University of Technology and Design NavonilMajumderSingapore University of Technology and Design - RadaMihalceaUniversity of Michigan + RadaMihalceaUniversity of Michigan SoujanyaPoriaSingapore University of Technology and Design 10158-10166 We propose a simple refactoring of multi-choice question answering (MCQA) tasks as a series of binary classifications. The MCQA task is generally performed by scoring each (question, answer) pair normalized over all the pairs, and then selecting the answer from the pair that yield the highest score. For n answer choices, this is equivalent to an n-class classification setup where only one class (true answer) is correct. We instead show that classifying (question, true answer) as positive instances and (question, false answer) as negative instances is significantly more effective across various models and datasets. We show the efficacy of our proposed approach in different tasks – abductive reasoning, commonsense question answering, science question answering, and sentence completion. Our DeBERTa binary classification model reaches the top or close to the top performance on public leaderboards for these tasks. The source code of the proposed approach is available at https://github.com/declare-lab/TEAM. @@ -9557,7 +9557,7 @@ SurajTripathiCarnegie Mellon University SumitAgarwalCarnegie Mellon University TerukoMitamuraCarnegie Mellon University - EricNybergCarnegie Mellon University + EricNybergCarnegie Mellon University 10243-10255 Code-switched (CS) data is ubiquitous in today’s globalized world, but the dearth of annotated datasets in code-switching poses a significant challenge for learning diverse tasks across different language pairs. Parameter-efficient prompt-tuning approaches conditioned on frozen language models have shown promise for transfer learning in limited-resource setups. In this paper, we propose a novel instance-based prompt composition technique, PRO-CS, for CS tasks that combine language and task knowledge. We compare our approach with prompt-tuning and fine-tuning for code-switched tasks on 10 datasets across 4 language pairs. Our model outperforms the prompt-tuning approach by significant margins across all datasets and outperforms or remains at par with fine-tuning by using just 0.18% of total parameters. We also achieve competitive results when compared with the fine-tuned model in the low-resource cross-lingual and cross-task setting, indicating the effectiveness of our approach to incorporate new code-switched tasks. 2022.emnlp-main.698 @@ -9610,7 +9610,7 @@ GiwonHongKAIST School of Computing JeonghwanKimKorea Advanced Institute of Science and Technology (KAIST) JunmoKangKAIST - Sung-HyonMyaengSchool of Computing, KAIST + Sung-HyonMyaengSchool of Computing, KAIST 10288-10294 A graph is a suitable data structure to represent the structural information of text. Recently, multi-hop question answering (MHQA) tasks, which require inter-paragraph/sentence linkages, have come to exploit such properties of a graph. Previous approaches to MHQA relied on leveraging the graph information along with the pre-trained language model (PLM) encoders. However, this trend exhibits the following drawbacks: (i) sample inefficiency while training in a low-resource setting; (ii) lack of reusability due to changes in the model structure or input. Our work proposes the Graph-Induced Transformer (GIT) that applies graph-derived attention patterns directly into a PLM, without the need to employ external graph modules. GIT can leverage the useful inductive bias of graphs while retaining the unperturbed Transformer structure and parameters. Our experiments on HotpotQA successfully demonstrate both the sample efficient characteristic of GIT and its capacity to replace the graph modules while preserving model performance. 2022.emnlp-main.702 @@ -9659,7 +9659,7 @@ QiongkaiXuThe University of Melbourne TongtongWuSoutheast University TianyangZhanBytedance Inc. - GholamrezaHaffariMonash University + GholamrezaHaffariMonash University 10335-10356 In this paper, we propose a variational autoencoder with disentanglement priors, VAE-Dprior, for task-specific natural language generation with none or a handful of task-specific labeled examples. In order to tackle compositional generalization across tasks, our model performs disentangled representation learning by introducing a conditional prior for the latent content space and another conditional prior for the latent label space. Both types of priors satisfy a novel property called \epsilon-disentangled. We show both empirically and theoretically that the novel priors can disentangle representations even without specific regularizations as in the prior work. The content prior enables directly sampling diverse content representations from the content space learned from the seen tasks, and fuse them with the representations of novel tasks for generating semantically diverse texts in the low-resource settings. Our extensive experiments demonstrate the superior performance of our model over competitive baselines in terms of i) data augmentation in continuous zero/few-shot learning, and ii) text style transfer in the few-shot setting. 2022.emnlp-main.706 @@ -9710,9 +9710,9 @@ Discourse Context Predictability Effects in <fixed-case>H</fixed-case>indi Word Order SidharthRanjanIndian Institute of Technology Delhi (IIT Delhi) - Martenvan SchijndelCornell University + Martenvan SchijndelCornell University SumeetAgarwalIndian Institute of Technology Delhi - RajakrishnanRajkumarAssistant Professor, Department of Humanities and Social Sciences, IISER Bhopal + RajakrishnanRajkumarAssistant Professor, Department of Humanities and Social Sciences, IISER Bhopal 10390-10406 We test the hypothesis that discourse predictability influences Hindi syntactic choice. While prior work has shown that a number of factors (e.g., information status, dependency length, and syntactic surprisal) influence Hindi word order preferences, the role of discourse predictability is underexplored in the literature. Inspired by prior work on syntactic priming, we investigate how the words and syntactic structures in a sentence influence the word order of the following sentences. Specifically, we extract sentences from the Hindi-Urdu Treebank corpus (HUTB), permute the preverbal constituents of those sentences, and build a classifier to predict which sentences actually occurred in the corpus against artificially generated distractors. The classifier uses a number of discourse-based features and cognitive features to make its predictions, including dependency length, surprisal, and information status. We find that information status and LSTM-based discourse predictability influence word order choices, especially for non-canonical object-fronted orders. We conclude by situating our results within the broader syntactic priming literature. 2022.emnlp-main.710 @@ -9769,7 +9769,7 @@ HyundongChoUSC, Information Sciences Institute PegahJandaghiUniversity of Southern California Dong-HoLeeUniversity of Southern California - Bill YuchenLinUniversity of Southern California + Bill YuchenLinUniversity of Southern California JayPujaraUniversity of Southern California XiangRenUniversity of Southern California 10450-10468 @@ -9847,7 +9847,7 @@ XiangruiCaiNankai University YikeWuNankai University HaiweiZhangNankai University - YingZhangNankai University + YingZhangNankai University GuoqingZhaoMashang Consumer Finance Co.,Ltd. NingJiangMashang Consumer Finance Co.,Ltd. 10527-10536 @@ -9861,7 +9861,7 @@ KaiyuHuangTsinghua University PengLiInstitute for AI Industry Research (AIR), Tsinghua University, China JinMaustc - YangLiuTsinghua University + YangLiuTsinghua University 10537-10550 In a practical real-world scenario, the longstanding goal is that a universal multilingual translation model can be incrementally updated when new language pairs arrive. Specifically, the initial vocabulary only covers some of the words in new languages, which hurts the translation quality for incremental learning. Although existing approaches attempt to address this issue by replacing the original vocabulary with a rebuilt vocabulary or constructing independent language-specific vocabularies, these methods can not meet the following three demands simultaneously: (1) High translation quality for original and incremental languages, (2) low cost for model training, (3) low time overhead for preprocessing. In this work, we propose an entropy-based vocabulary substitution (EVS) method that just needs to walk through new language pairs for incremental learning in a large-scale multilingual data updating while remaining the size of the vocabulary. Our method has access to learn new knowledge from updated training samples incrementally while keeping high translation quality for original language pairs, alleviating the issue of catastrophic forgetting. Results of experiments show that EVS can achieve better performance and save excess overhead for incremental learning in the multilingual machine translation task. 2022.emnlp-main.720 @@ -9908,9 +9908,9 @@ Making Science Simple: Corpora for the Lay Summarisation of Scientific Literature TomasGoldsackUniversity of Sheffield - ZhihaoZhangBeihang University + ZhihaoZhangBeihang University ChenghuaLinDepartment of Computer Science, University of Sheffield - CarolinaScartonUniversity of Sheffield + CarolinaScartonUniversity of Sheffield 10589-10604 Lay summarisation aims to jointly summarise and simplify a given text, thus making its content more comprehensible to non-experts.Automatic approaches for lay summarisation can provide significant value in broadening access to scientific literature, enabling a greater degree of both interdisciplinary knowledge sharing and public understanding when it comes to research findings. However, current corpora for this task are limited in their size and scope, hindering the development of broadly applicable data-driven approaches. Aiming to rectify these issues, we present two novel lay summarisation datasets, PLOS (large-scale) and eLife (medium-scale), each of which contains biomedical journal articles alongside expert-written lay summaries.We provide a thorough characterisation of our lay summaries, highlighting differing levels of readability and abstractivenessbetween datasets that can be leveraged to support the needs of different applications.Finally, we benchmark our datasets using mainstream summarisation approaches and perform a manual evaluation with domain experts, demonstrating their utility and casting light on the key challenges of this task. 2022.emnlp-main.724 @@ -9985,7 +9985,7 @@ TianhaoShenTianjin University MingtongLiuBeijing Jiaotong University MingZhouLangboat Technology - DeyiXiongTianjin University + DeyiXiongTianjin University 10659-10670 Negative samples have not been efficiently explored in multilingual dense passage retrieval. In this paper, we propose a novel multilingual dense passage retrieval framework, mHFN, to recover and utilize hard and false negative samples. mHFN consists of three key components: 1) a multilingual hard negative sample augmentation module that allows knowledge of indistinguishable passages to be shared across multiple languages and synthesizes new hard negative samples by interpolating representations of queries and existing hard negative samples, 2) a multilingual negative sample cache queue that stores negative samples from previous batches in each language to increase the number of multilingual negative samples used in training beyond the batch size limit, and 3) a lightweight adaptive false negative sample filter that uses generated pseudo labels to separate unlabeled false negative samples and converts them into positive passages in training. We evaluate mHFN on Mr. TyDi, a high-quality multilingual dense passage retrieval dataset covering eleven typologically diverse languages, and experimental results show that mHFN outperforms strong sparse, dense and hybrid baselines and achieves new state-of-the-art performance on all languages. Our source code is available at https://github.com/Magnetic2014/mHFN. 2022.emnlp-main.730 @@ -9994,7 +9994,7 @@ The “Problem” of Human Label Variation: On Ground Truth in Data, Modeling and Evaluation - BarbaraPlankLMU Munich + BarbaraPlankLMU Munich 10671-10682 Human variation in labeling is often considered noise. Annotation projects for machine learning (ML) aim at minimizing human label variation, with the assumption to maximize data quality and in turn optimize and maximize machine learning metrics. However, thisconventional practice assumes that there exists a *ground truth*, and neglects that there exists genuine human variation in labeling due to disagreement, subjectivity in annotation or multiple plausible answers.In this position paper, we argue that this big open problem of human label variation persists and critically needs more attention to move our field forward. This is because human label variation impacts all stages of the ML pipeline: *data, modeling and evaluation*. However, few works consider all of these dimensions jointly; and existing research is fragmented. We reconcile different previously proposed notions of human label variation, provide a repository of publicly-available datasets with un-aggregated labels, depict approaches proposed so far, identify gaps and suggest ways forward. As datasets are becoming increasingly available, we hope that this synthesized view on the “problem” will lead to an open discussion on possible strategies to devise fundamentally new directions. 2022.emnlp-main.731 @@ -10030,7 +10030,7 @@ Facilitating Contrastive Learning of Discourse Relational Senses by Exploiting the Hierarchy of Sense Relations WanqiuLongThe University of Edinburgh - BonnieWebberUniversity of Edinburgh + BonnieWebberUniversity of Edinburgh 10704-10716 Implicit discourse relation recognition is a challenging task that involves identifying the sense or senses that hold between two adjacent spans of text, in the absense of an explicit connective between them. In both PDTB-2 (prasad et al., 2008) and PDTB-3 (Webber et al., 2019), discourse relational senses are organized into a three-level hierarchy ranging from four broad top-level senses, to more specific senses below them. Most previous work on implicitf discourse relation recognition have used the sense hierarchy simply to indicate what sense labels were available. Here we do more — incorporating the sense hierarchy into the recognition process itself and using it to select the negative examples used in contrastive learning. With no additional effort, the approach achieves state-of-the-art performance on the task. Our code is released inhttps://github.com/wanqiulong 0923/Contrastive_IDRR. 2022.emnlp-main.734 @@ -10084,7 +10084,7 @@ ZhaoranLiuZhejiang University GuilinQiSoutheast University Yuan-FangLiMonash University - GholamrezaHaffariMonash University + GholamrezaHaffariMonash University 10751-10762 Relation extraction typically aims to extract semantic relationships between entities from the unstructured text.One of the most essential data sources for relation extraction is the spoken language, such as interviews and dialogues.However, the error propagation introduced in automatic speech recognition (ASR) has been ignored in relation extraction, and the end-to-end speech-based relation extraction method has been rarely explored.In this paper, we propose a new listening information extraction task, i.e., speech relation extraction.We construct the training dataset for speech relation extraction via text-to-speech systems, and we construct the testing dataset via crowd-sourcing with native English speakers.We explore speech relation extraction via two approaches: the pipeline approach conducting text-based extraction with a pretrained ASR module, and the end2end approach via a new proposed encoder-decoder model, or what we called SpeechRE.We conduct comprehensive experiments to distinguish the challenges in speech relation extraction, which may shed light on future explorations. We share the code and data on https://github.com/wutong8023/SpeechRE. 2022.emnlp-main.738 @@ -10095,7 +10095,7 @@ Structural Constraints and Natural Language Inference for End-to-End Flowchart Grounded Dialog Response Generation DineshRaghuIBM Research SurajJoshiIndian Institute of Technology, Delhi - SachindraJoshiIBM + SachindraJoshiIBM Mausam-Indian Institute of Technology, Delhi 10763-10774 Flowchart grounded dialog systems converse with users by following a given flowchart and a corpus of FAQs. The existing state-of-the-art approach (Raghu et al, 2021) for learning such a dialog system, named FLONET, has two main limitations. (1) It uses a Retrieval Augmented Generation (RAG) framework which represents a flowchart as a bag of nodes. By doing so, it loses the connectivity structure between nodes that can aid in better response generation. (2) Typically dialogs progress with the agent asking polar (Y/N) questions, but users often respond indirectly without the explicit use of polar words. In such cases, it fails to understand the correct polarity of the answer. To overcome these issues, we propose Structure-Aware FLONET (SA-FLONET) which infuses structural constraints derived from the connectivity structure of flowcharts into the RAG framework. It uses natural language inference to better predict the polarity of indirect Y/N answers. We find that SA-FLONET outperforms FLONET, with a success rate improvement of 68% and 123% in flowchart grounded response generation and zero-shot flowchart grounded response generation tasks respectively. @@ -10132,7 +10132,7 @@ ChiChenTsinghua University PengLiInstitute for AI Industry Research (AIR), Tsinghua University, China MaosongSunTsinghua University - YangLiuTsinghua University + YangLiuTsinghua University 10799-10810 Recently there has been an emerging interest in unsupervised vision-and-language pre-training (VLP) that learns multimodal representations without parallel image-caption data. These pioneering works significantly reduce the cost of VLP on data collection and achieve promising results compared to supervised VLP. However, existing unsupervised VLP methods take as input pre-extracted region-based visual features from external object detectors, which both limits flexibility and reduces computational efficiency. In this paper, we explore end-to-end unsupervised VLP with a vision encoder to directly encode images. The vision encoder is pre-trained on image-only data and jointly optimized during multimodal pre-training. To further enhance the learned cross-modal features, we propose a novel pre-training task that predicts which patches contain an object referred to in natural language from the encoded visual features. Extensive experiments on four vision-and-language tasks show that our approach outperforms previous unsupervised VLP methods and obtains new state-of-the-art results. 2022.emnlp-main.742 @@ -10159,7 +10159,7 @@ JaimeenAhnIndependent Researcher JihyungMoonSoftlyAI SungjoonParkSoftlyAI - AliceOhKAIST + AliceOhKAIST 10818-10833 Recent directions for offensive language detection are hierarchical modeling, identifying the type and the target of offensive language, and interpretability with offensive span annotation and prediction. These improvements are focused on English and do not transfer well to other languages because of cultural and linguistic differences. In this paper, we present the Korean Offensive Language Dataset (KOLD) comprising 40,429 comments, which are annotated hierarchically with the type and the target of offensive language, accompanied by annotations of the corresponding text spans. We collect the comments from NAVER news and YouTube platform and provide the titles of the articles and videos as the context information for the annotation process. We use these annotated comments as training data for Korean BERT and RoBERTa models and find that they are effective at offensiveness detection, target classification, and target span detection while having room for improvement for target group classification and offensive span detection. We discover that the target group distribution differs drastically from the existing English datasets, and observe that providing the context information improves the model performance in offensiveness detection (+0.3), target classification (+1.5), and target group classification (+13.1). We publicly release the dataset and baseline models. 2022.emnlp-main.744 @@ -10188,7 +10188,7 @@ LeonieWeissweilerCIS, LMU Munich ValentinHofmannUniversity of Oxford AbdullatifKöksalLMU Munich - HinrichSchützeCenter for Information and Language Processing, University of Munich + HinrichSchützeCenter for Information and Language Processing, University of Munich 10859-10882 Construction Grammar (CxG) is a paradigm from cognitive linguistics emphasising the connection between syntax and semantics. Rather than rules that operate on lexical items, it posits constructions as the central building blocks of language, i.e., linguistic units of different granularity that combine syntax and semantics. As a first step towards assessing the compatibility of CxG with the syntactic and semantic knowledge demonstrated by state-of-the-art pretrained language models (PLMs), we present an investigation of their capability to classify and understand one of the most commonly studied constructions, the English comparative correlative (CC). We conduct experiments examining the classification accuracy of a syntactic probe on the one hand and the models’ behaviour in a semantic application task on the other, with BERT, RoBERTa, and DeBERTa as the example PLMs. Our results show that all three investigated PLMs are able to recognise the structure of the CC but fail to use its meaning. While human-like performance of PLMs on many NLP tasks has been alleged, this indicates that PLMs still suffer from substantial shortcomings in central domains of linguistic knowledge. 2022.emnlp-main.746 @@ -10202,7 +10202,7 @@ QiZhangFudan University XinZhouFudan University TaoGuifudan university - XuanjingHuangFudan University + XuanjingHuangFudan University 10883-10892 Proof generation focuses on deductive reasoning: given a hypothesis and a set of theories, including some supporting facts and logical rules expressed in natural language, the model generates a proof tree indicating how to deduce the hypothesis from given theories.Current models with state-of-the-art performance employ the stepwise method that adds an individual node to the proof step-by-step.However, these methods actually focus on generating several proof paths rather than a whole tree.During generation, they focus on the most relevant areas of the currently generated node while neglecting the rest of the proof tree. To address this problem, we propose ProofInfer, which generates the proof tree via iterative hierarchical inference.At each step, ProofInfer adds the entire layer to the proof, where all nodes in this layer are generated simultaneously. Since the conventional autoregressive generation architecture cannot simultaneously predict multiple nodes, ProofInfer employs text-to-text paradigm.To this end, we propose a divide-and-conquer algorithm to encode the proof tree as the plain text without losing structure information.Experimental results show that ProofInfer significantly improves performance on several widely-used datasets.In addition, ProofInfer still performs well with data-limited, achieving comparable performance to the state-of-the-art model with about 40% of the training data. 2022.emnlp-main.747 @@ -10220,7 +10220,7 @@ ShivaniShrivastavaGoldman Sachs KoustuvDasguptaGoldman Sachs NiloyGangulyIIT kharagpur - SaptarshiGhoshIIT Kharagpur + SaptarshiGhoshIIT Kharagpur PawanGoyalIIT Kharagpur 10893-10906 Despite tremendous progress in automatic summarization, state-of-the-art methods are predominantly trained to excel in summarizing short newswire articles, or documents with strong layout biases such as scientific articles or government reports. Efficient techniques to summarize financial documents, discussing facts and figures, have largely been unexplored, majorly due to the unavailability of suitable datasets. In this work, we present ECTSum, a new dataset with transcripts of earnings calls (ECTs), hosted by publicly traded companies, as documents, and experts-written short telegram-style bullet point summaries derived from corresponding Reuters articles. ECTs are long unstructured documents without any prescribed length limit or format. We benchmark our dataset with state-of-the-art summarization methods across various metrics evaluating the content quality and factual consistency of the generated summaries. Finally, we present a simple yet effective approach, ECT-BPS, to generate a set of bullet points that precisely capture the important facts discussed in the calls. @@ -10330,7 +10330,7 @@ Semantic Simplification for Sentiment Classification XiaotongJiangSoochow University ZhongqingWangSoochow University - GuodongZhouSoochow University + GuodongZhouSoochow University 11022-11032 Recent work on document-level sentiment classification has shown that the sentiment in the original text is often hard to capture, since the sentiment is usually either expressed implicitly or shifted due to the occurrences of negation and rhetorical words. To this end, we enhance the original text with a sentiment-driven simplified clause to intensify its sentiment. The simplified clause shares the same opinion with the original text but expresses the opinion much more simply. Meanwhile, we employ Abstract Meaning Representation (AMR) for generating simplified clauses, since AMR explicitly provides core semantic knowledge, and potentially offers core concepts and explicit structures of original texts. Empirical studies show the effectiveness of our proposed model over several strong baselines. The results also indicate the importance of simplified clauses for sentiment classification. 2022.emnlp-main.757 @@ -10341,7 +10341,7 @@ <fixed-case>XP</fixed-case>rompt: Exploring the Extreme of Prompt Tuning FangMaBeijing Institute of Technology - ChenZhangBeijing Institute of Technology + ChenZhangBeijing Institute of Technology LeiRenMeituan-Dianping Group JingangWangMeituan QifanWangMeta AI @@ -10362,7 +10362,7 @@ MikelArtetxeMeta AI MikeLewisFacebook AI Research HannanehHajishirziUniversity of Washington - LukeZettlemoyerUniversity of Washington; Meta + LukeZettlemoyerUniversity of Washington; Meta 11048-11064 Large language models (LMs) are able to in-context learn—perform a new task via inference alone by conditioning on a few input-label pairs (demonstrations) and making predictions for new inputs. However, there has been little understanding of how the model learns and which aspects of the demonstrations contribute to end task performance. In this paper, we show that ground truth demonstrations are in fact not required—randomly replacing labels in the demonstrations barely hurts performance on a range of classification and multi-choce tasks, consistently over 12 different models including GPT-3. Instead, we find that other aspects of the demonstrations are the key drivers of endtask performance, including the fact that they provide a few examples of (1) the label space, (2) the distribution of the input text, and (3) the overall format of the sequence. Together, our analysis provides a new way of understanding how and why in-context learning works, while opening up new questions about how much can be learned from large language models through inference alone. 2022.emnlp-main.759 @@ -10416,7 +10416,7 @@ AoZhangNUS WeiJiNational University of Singapore ZhiyuanLiuTsinghua University - Tat-SengChuaNational University of Singapore + Tat-SengChuaNational University of Singapore MaosongSunTsinghua University 11104-11117 Vision-language pre-training (VLP) has shown impressive performance on a wide range of cross-modal tasks, where VLP models without reliance on object detectors are becoming the mainstream due to their superior computation efficiency and competitive performance. However, the removal of object detectors also deprives the capability of VLP models in explicit object modeling, which is essential to various position-sensitive vision-language (VL) tasks, such as referring expression comprehension and visual commonsense reasoning. To address the challenge, we introduce PEVL that enhances the pre-training and prompt tuning of VLP models with explicit object position modeling. Specifically, PEVL reformulates discretized object positions and language in a unified language modeling framework, which facilitates explicit VL alignment during pre-training, and also enables flexible prompt tuning for various downstream tasks. We show that PEVL enables state-of-the-art performance of detector-free VLP models on position-sensitive tasks such as referring expression comprehension and phrase grounding, and also improves the performance on position-insensitive tasks with grounded inputs. We make the data and code for this paper publicly available at https://github.com/thunlp/PEVL. @@ -10430,8 +10430,8 @@ ShaoboLiHarbin Institute of Technology XiaoguangLiHuawei Noah’s Ark Lab LifengShangNoah’s Ark Lab Huawei Technologies Co. Ltd. Sha Tin, Hong Kong - ChengjieSunHarbin Institute of Technology - BingquanLiuHarbin Institute of Technology + ChengjieSunHarbin Institute of Technology + BingquanLiuHarbin Institute of Technology ZhenzhouJiHarbin Institute of Technology XinJiangHuawei Noah’s Ark Lab QunLiuHuawei Noah’s Ark Lab @@ -10584,7 +10584,7 @@ <fixed-case>S</fixed-case>cience<fixed-case>W</fixed-case>orld: Is your Agent Smarter than a 5th Grader? RuoyaoWangUniversity of Arizona - PeterJansenUniversity of Arizona + PeterJansenUniversity of Arizona Marc-AlexandreCôtéMicrosoft Research PrithvirajAmmanabroluAllen Institute for AI 11279-11298 @@ -10633,8 +10633,8 @@ Balancing out Bias: Achieving Fairness Through Balanced Training XudongHanThe university of Melbourne - TimothyBaldwinThe University of Melbourne - TrevorCohnUniversity of Melbourne + TimothyBaldwinThe University of Melbourne + TrevorCohnUniversity of Melbourne 11335-11350 Group bias in natural language processing tasks manifests as disparities in system error rates across texts authorized by different demographic groups, typically disadvantaging minority groups. Dataset balancing has been shown to be effective at mitigating bias, however existing approaches do not directly account for correlations between author demographics and linguistic variables, limiting their effectiveness. To achieve Equal Opportunity fairness, such as equal job opportunity without regard to demographics, this paper introduces a simple, but highly effective, objective for countering bias using balanced training.We extend the method in the form of a gated model, which incorporates protected attributes as input, and show that it is effective at reducing bias in predictions through demographic input perturbation, outperforming all other bias mitigation techniques when combined with balanced training. 2022.emnlp-main.779 @@ -10661,7 +10661,7 @@ Identifying Physical Object Use in Sentences TianyuJiangUniversity of Utah - EllenRiloffUniversity of Utah + EllenRiloffUniversity of Utah 11362-11372 Commonsense knowledge about the typicalfunctions of physical objects allows people tomake inferences during sentence understanding.For example, we infer that “Sam enjoyedthe book” means that Sam enjoyed reading thebook, even though the action is implicit. Priorresearch has focused on learning the prototypicalfunctions of physical objects in order toenable inferences about implicit actions. Butmany sentences refer to objects even when theyare not used (e.g., “The book fell”). We arguethat NLP systems need to recognize whether anobject is being used before inferring how theobject is used. We define a new task called ObjectUse Classification that determines whethera physical object mentioned in a sentence wasused or likely will be used. We introduce a newdataset for this task and present a classificationmodel that exploits data augmentation methodsand FrameNet when fine-tuning a pre-trainedlanguage model. We also show that object useclassification combined with knowledge aboutthe prototypical functions of objects has thepotential to yield very good inferences aboutimplicit and anticipated actions. 2022.emnlp-main.781 @@ -10718,10 +10718,10 @@ Improving Tokenisation by Alternative Treatment of Spaces EdwardGow-SmithUniversity of Sheffield HarishTayyar MadabushiUniversity of Bath - CarolinaScartonUniversity of Sheffield + CarolinaScartonUniversity of Sheffield AlineVillavicencioUniversity of Sheffield, UK 11430-11443 - + 2022.emnlp-main.786 gow-smith-etal-2022-improving 10.18653/v1/2022.emnlp-main.786 @@ -10734,8 +10734,8 @@ NicholasLourieNew York University JungoKasaiUniversity of Washington YejinChoiUniversity of Washington - Noah A.SmithUniversity of Washington - DanielWeldUniversity of Washington & Allen Institute for Artificial Inelligence + Noah A.SmithUniversity of Washington + DanielWeldUniversity of Washington & Allen Institute for Artificial Inelligence 11444-11458 While often assumed a gold standard, effective human evaluation of text generation remains an important, open area for research.We revisit this problem with a focus on producing consistent evaluations that are reproducible—over time and across different populations. We study this goal in different stages of the human evaluation pipeline. In particular, we consider design choices for the annotation interface used to elicit human judgments and their impact on reproducibility. Furthermore, we develop an automated mechanism for maintaining annotator quality via a probabilistic model that detects and excludes noisy annotators. Putting these lessons together, we introduce GENIE: a system for running standardized human evaluations across different generation tasks.We instantiate GENIE with datasets representing four core challenges in text generation: machine translation, summarization, commonsense reasoning, and machine comprehension.For each task, GENIE offers a leaderboard that automatically crowdsources annotations for submissions, evaluating them along axes such as correctness, conciseness, and fluency.We have made the GENIE leaderboards publicly available, and have already ranked 50 submissions from 10 different research groups. We hope GENIE encourages further progress toward effective, standardized evaluations for text generation. 2022.emnlp-main.787 @@ -10763,7 +10763,7 @@ SamThomsonMicrosoft Semantic Machines HaoFangMicrosoft Semantic Machines BenjaminVan DurmeJohns Hopkins University / Microsoft - JasonEisnerJohns Hopkins University + JasonEisnerJohns Hopkins University YuSuThe Ohio State University 11473-11487 In natural language understanding (NLU) production systems, users’ evolving needs necessitate the addition of new features over time, indexed by new symbols added to the meaning representation space. This requires additional training data and results in ever-growing datasets. We present the first systematic investigation into this incremental symbol learning scenario. Our analysis reveals a troubling quirk in building broad-coverage NLU systems: as the training dataset grows, performance on a small set of new symbols often decreases. We show that this trend holds for multiple mainstream models on two common NLU tasks: intent recognition and semantic parsing. Rejecting class imbalance as the sole culprit, we reveal that the trend is closely associated with an effect we call source signal dilution, where strong lexical cues for the new symbol become diluted as the training dataset grows. Selectively dropping training examples to prevent dilution often reverses the trend, showing the over-reliance of mainstream neural NLU models on simple lexical cues. @@ -10855,7 +10855,7 @@ HaoSunTsinghua University ChujieZhengTsinghua University FeiMiHuawei - HelenMengThe Chinese University of Hong Kong + HelenMengThe Chinese University of Hong Kong MinlieHuangTsinghua University 11580-11599 Offensive language detection is increasingly crucial for maintaining a civilized social media platform and deploying pre-trained language models. However, this task in Chinese is still under exploration due to the scarcity of reliable datasets. To this end, we propose a benchmark –COLD for Chinese offensive language analysis, including a Chinese Offensive Language Dataset –COLDATASET and a baseline detector –COLDETECTOR which is trained on the dataset. We show that the COLD benchmark contributes to Chinese offensive language detection which is challenging for existing resources. We then deploy the COLDETECTOR and conduct detailed analyses on popular Chinese pre-trained language models. We first analyze the offensiveness of existing generative models and show that these models inevitably expose varying degrees of offensive issues. Furthermore, we investigate the factors that influence the offensive generations, and we find that anti-bias contents and keywords referring to certain groups or revealing negative attitudes trigger offensive outputs easier. @@ -10866,9 +10866,9 @@ Fixing Model Bugs with Natural Language Patches ShikharMurtyStanford University - ChristopherManningStanford University + ChristopherManningStanford University ScottLundbergMicrosoft Research - Marco TulioRibeiroMicrosoft Research + Marco TulioRibeiroMicrosoft Research 11600-11613 Current approaches for fixing systematic problems in NLP models (e.g., regex patches, finetuning on more data) are either brittle, or labor-intensive and liable to shortcuts. In contrast, humans often provide corrections to each other through natural language. Taking inspiration from this, we explore natural language patches—declarative statements that allow developers to provide corrective feedback at the right level of abstraction, either overriding the model (“if a review gives 2 stars, the sentiment is negative”) or providing additional information the model may lack (“if something is described as the bomb, then it is good”). We model the task of determining if a patch applies separately from the task of integrating patch information, and show that with a small amount of synthetic data, we can teach models to effectively use real patches on real data—1 to 7 patches improve accuracy by ~1–4 accuracy points on different slices of a sentiment analysis dataset, and F1 by 7 points on a relation extraction dataset. Finally, we show that finetuning on as many as 100 labeled examples may be needed to match the performance of a small set of language patches. 2022.emnlp-main.797 @@ -10968,7 +10968,7 @@ TodorMihaylovMeta AI MyleOttFacebook AI Research SamShleiferHugging Face - Xi VictoriaLinMeta AI + Xi VictoriaLinMeta AI JingfeiDuFacebook SrinivasanIyerFacebook RamakanthPasunuruMeta @@ -10982,8 +10982,8 @@ Punit SinghKouraFacebook Inc. BrianO’HoroMeta AI JeffreyWangMeta AI - LukeZettlemoyerUniversity of Washington; Meta - MonaDiabMeta Responsible AI + LukeZettlemoyerUniversity of Washington; Meta + MonaDiabMeta Responsible AI ZornitsaKozarevaMeta AI VeselinStoyanovFacebook 11699-11732 @@ -11048,7 +11048,7 @@ PaulaFortunaTALN, Pompeu Fabra University MonicaDominguezUniversitat Pompeu Fabra LeoWannerICREA and Pompeu Fabra University - ZeerakTalatSimon Fraser University + ZeerakTalatSimon Fraser University 11794-11805 Addressing hate speech in online spaces has been conceptualized as a classification task that uses Natural Language Processing (NLP) techniques. Through this conceptualization, the hate speech detection task has relied on common conventions and practices from NLP. For instance, inter-annotator agreement is conceptualized as a way to measure dataset quality and certain metrics and benchmarks are used to assure model generalization. However, hate speech is a deeply complex and situated concept that eludes such static and disembodied practices. In this position paper, we critically reflect on these methodologies for hate speech detection, we argue that many conventions in NLP are poorly suited for the problem and encourage researchers to develop methods that are more appropriate for the task. 2022.emnlp-main.809 @@ -11072,11 +11072,11 @@ <fixed-case>O</fixed-case>pen<fixed-case>CQA</fixed-case>: Open-ended Question Answering with Charts ShankarKantharajYork University - Xuan LongDoNanyang Technological University + Xuan LongDoNanyang Technological University Rixie TiffanyLeongNanyang Technological University, Singapore Jia QingTanNanyang Technological University EnamulHoqueYork University - ShafiqJotyNanyang Technological University; Salesforce AI Research + ShafiqJotyNanyang Technological University; Salesforce AI Research 11817-11837 Charts are very popular to analyze data and convey important insights. People often analyze visualizations to answer open-ended questions that require explanatory answers. Answering such questions are often difficult and time-consuming as it requires a lot of cognitive and perceptual efforts. To address this challenge, we introduce a new task called OpenCQA, where the goal is to answer an open-ended question about a chart with descriptive texts. We present the annotation process and an in-depth analysis of our dataset. We implement and evaluate a set of baselines under three practical settings. In the first setting, a chart and the accompanying article is provided as input to the model. The second setting provides only the relevant paragraph(s) to the chart instead of the entire article, whereas the third setting requires the model to generate an answer solely based on the chart. Our analysis of the results show that the top performing models generally produce fluent and coherent text while they struggle to perform complex logical and arithmetic reasoning. 2022.emnlp-main.811 @@ -11090,7 +11090,7 @@ AdhigunaKuncoroUniversity of Oxford and DeepMind JordanHoffmannDeepMind Cypriende Masson d’AutumeDeepMind - PhilBlunsomUniversity of Oxford + PhilBlunsomUniversity of Oxford AidaNematzadehDeepMind 11838-11855 Language models (LMs) trained on large amounts of data have shown impressive performance on many NLP tasks under the zero-shot and few-shot setup. Here we aim to better understand the extent to which such models learn commonsense knowledge — a critical component of many NLP applications. We conduct a systematic and rigorous zero-shot and few-shot commonsense evaluation of large pre-trained LMs, where we: (i) carefully control for the LMs’ ability to exploit potential surface cues and annotation artefacts, and (ii) account for variations in performance that arise from factors that are not related to commonsense knowledge. Our findings highlight the limitations of pre-trained LMs in acquiring commonsense knowledge without task-specific supervision; furthermore, using larger models or few-shot evaluation is insufficient to achieve human-level commonsense performance. @@ -11121,7 +11121,7 @@ BesnikFetahuAmazon JieZhaoAmazon OlegRokhlenkoAmazon Research - ShervinMalmasiAmazon + ShervinMalmasiAmazon 11875-11886 Users expect their queries to be answered by search systems, regardless of the query’s surface form, which include keyword queries and natural questions. Natural Language Understanding (NLU) components of Search and QA systems may fail to correctly interpret semantically equivalent inputs if this deviates from how the system was trained, leading to suboptimal understanding capabilities. We propose the keyword-question rewriting task to improve query understanding capabilities of NLU systems for all surface forms. To achieve this, we present CycleKQR, an unsupervised approach, enabling effective rewriting between keyword and question queries using non-parallel data.Empirically we show the impact on QA performance of unfamiliar query forms for open domain and Knowledge Base QA systems (trained on either keywords or natural language questions). We demonstrate how CycleKQR significantly improves QA performance by rewriting queries into the appropriate form, while at the same time retaining the original semantic meaning of input queries, allowing CycleKQR to improve performance by up to 3% over supervised baselines. Finally, we release a datasetof 66k keyword-question pairs. 2022.emnlp-main.814 @@ -11132,7 +11132,7 @@ Model Criticism for Long-Form Text Generation YuntianDengHarvard University VolodymyrKuleshovCornell Tech - AlexanderRushCornell University + AlexanderRushCornell University 11887-11912 Language models have demonstrated the ability to generate highly fluent text; however, it remains unclear whether their output retains coherent high-level structure (e.g., story progression). Here, we propose to apply a statistical tool, model criticism in latent space, to evaluate the high-level structure of the generated text. Model criticism compares the distributions between real and generated data in a latent space obtained according to an assumptive generative process. Different generative processes identify specific failure modes of the underlying model. We perform experiments on three representative aspects of high-level discourse—coherence, coreference, and topicality—and find that transformer-based language models are able to capture topical structures but have a harder time maintaining structural coherence or modeling coreference. 2022.emnlp-main.815 @@ -11184,7 +11184,7 @@ NiraliParekhDwarkadas J. Sanghvi College of Engineering KaranWaghelaSanta Clara University LynetteD’MelloDwarkadas J. Sanghvi College of Engineering - ZeerakTalatSimon Fraser University + ZeerakTalatSimon Fraser University 11951-11961 The use of emojis affords a visual modality to, often private, textual communication.The task of predicting emojis however provides a challenge for machine learning as emoji use tends to cluster into the frequently used and the rarely used emojis.Much of the machine learning research on emoji use has focused on high resource languages and has conceptualised the task of predicting emojis around traditional server-side machine learning approaches.However, traditional machine learning approaches for private communication can introduce privacy concerns, as these approaches require all data to be transmitted to a central storage.In this paper, we seek to address the dual concerns of emphasising high resource languages for emoji prediction and risking the privacy of people’s data.We introduce a new dataset of 118k tweets (augmented from 25k unique tweets) for emoji prediction in Hindi, and propose a modification to the federated learning algorithm, CausalFedGSD, which aims to strike a balance between model performance and user privacy. We show that our approach obtains comparative scores with more complex centralised models while reducing the amount of data required to optimise the models and minimising risks to user privacy. 2022.emnlp-main.819 @@ -11223,7 +11223,7 @@ Improving Low-Resource Languages in Pre-Trained Multilingual Language Models ViktorHangyaLudwig-Maximilians-Universität München Hossain ShaikhSaadiTechnical University of Munich - AlexanderFraserLudwig-Maximilians-Universität München + AlexanderFraserLudwig-Maximilians-Universität München 11993-12006 Pre-trained multilingual language models are the foundation of many NLP approaches, including cross-lingual transfer solutions. However, languages with small available monolingual corpora are often not well-supported by these models leading to poor performance. We propose an unsupervised approach to improve the cross-lingual representations of low-resource languages by bootstrapping word translation pairs from monolingual corpora and using them to improve language alignment in pre-trained language models. We perform experiments on nine languages, using contextual word retrieval and zero-shot named entity recognition to measure both intrinsic cross-lingual word representation quality and downstream task performance, showing improvements on both tasks. Our results show that it is possible to improve pre-trained multilingual language models by relying only on non-parallel resources. 2022.emnlp-main.822 @@ -11322,7 +11322,7 @@ Proceedings of the 2022 Conference on Empirical Methods in Natural Language Processing: Tutorial Abstracts - Samhaa R.El-Beltagy + Samhaa R.El-Beltagy XipengQiu Association for Computational Linguistics
Abu Dubai, UAE
@@ -11341,7 +11341,7 @@ IshanJindal YunyaoLi TimO’Gorman - MarthaPalmer + MarthaPalmer NianwenXue 1-8 This tutorial reviews the design of common meaning representations, SoTA models for predicting meaning representations, and the applications of meaning representations in a wide range of downstream NLP tasks and real-world applications. Reporting by a diverse team of NLP researchers from academia and industry with extensive experience in designing, building and using meaning representations, our tutorial has three components: (1) an introduction to common meaning representations, including basic concepts and design challenges; (2) a review of SoTA methods on building models for meaning representations; and (3) an overview of applications of meaning representations in downstream NLP tasks and real-world applications. We will also present qualitative comparisons of common meaning representations and a quantitative study on how their differences impact model performance. Finally, we will share best practices in choosing the right meaning representation for downstream tasks. @@ -11362,7 +11362,7 @@
Emergent Language-Based Coordination In Deep Multi-Agent Systems - MarcoBaroni + MarcoBaroni RobertoDessi AngelikiLazaridou 11-16 @@ -11376,7 +11376,7 @@ <fixed-case>C</fixed-case>ausal<fixed-case>NLP</fixed-case> Tutorial: An Introduction to Causality for Natural Language Processing ZhijingJin AmirFeder - KunZhang + KunZhang 17-22 Causal inference is becoming an increasingly important topic in deep learning, with the potential to help with critical deep learning problems such as model robustness, interpretability, and fairness. In addition, causality is naturally widely used in various disciplines of science, to discover causal relationships among variables and estimate causal effects of interest. In this tutorial, we introduce the fundamentals of causal discovery and causal effect estimation to the natural language processing (NLP) audience, provide an overview of causal perspectives to NLP problems, and aim to inspire novel approaches to NLP further. This tutorial is inclusive to a variety of audiences and is expected to facilitate the community’s developments in formulating and addressing new, important NLP problems in light of emerging causal principles and methodologies. 2022.emnlp-tutorials.4 @@ -11502,7 +11502,7 @@ DanielLoureiroCardiff University DimosthenisAntypasCardiff University JoanneBoissonCardiff University - LuisEspinosa AnkeCardiff University + LuisEspinosa AnkeCardiff University FangyuLiuUniversity of Cambridge EugenioMartínez CámaraUniversity of Granada 38-49 @@ -11530,8 +11530,8 @@ AiliShenAlexa AI, Amazon YitongLiHuawei Technology Co. ltd LeaFrermannMelbourne University - TimothyBaldwinThe University of Melbourne - TrevorCohnUniversity of Melbourne + TimothyBaldwinThe University of Melbourne + TrevorCohnUniversity of Melbourne 60-71 This paper presents FairLib, an open-source python library for assessing and improving model fairness. It provides a systematic framework for quickly accessing benchmark datasets, reproducing existing debiasing baseline models, developing new methods, evaluating models with different metrics, and visualizing their results. Its modularity and extensibility enable the framework to be used for diverse types of inputs, including natural language, images, and audio. We implement 14 debiasing methods, including pre-processing,at-training-time, and post-processing approaches. The built-in metrics cover the most commonly acknowledged fairness criteria and can be further generalized and customized for fairness evaluation. 2022.emnlp-demos.7 @@ -11636,7 +11636,7 @@ <fixed-case>K</fixed-case>eyword<fixed-case>S</fixed-case>cape: Visual Document Exploration using Contextualized Keyword Embeddings HenrikVoigtFriedrich-Schiller-University MoniqueMeuschkeUniversity of Magdeburg - SinaZarrießUniversity of Bielefeld + SinaZarrießUniversity of Bielefeld KaiLawonnUniversity of Jena 137-147 Although contextualized word embeddings have led to great improvements in automatic language understanding, their potential for practical applications in document exploration and visualization has been little explored. Common visualization techniques used for, e.g., model analysis usually provide simple scatter plots of token-level embeddings that do not provide insight into their contextual use. In this work, we propose KeywordScape, a visual exploration tool that allows to overview, summarize, and explore the semantic content of documents based on their keywords. While existing keyword-based exploration tools assume that keywords have static meanings, our tool represents keywords in terms of their contextualized embeddings. Our application visualizes these embeddings in a semantic landscape that represents keywords as islands on a spherical map. This keeps keywords with similar context close to each other, allowing for a more precise search and comparison of documents. @@ -11675,7 +11675,7 @@ ZhengZhang DakuoWang LucyYip - LiatEin-Dor + LiatEin-Dor LenaDankin IlyaShnayderman RanitAharonov @@ -11710,7 +11710,7 @@ <fixed-case>B</fixed-case>ot<fixed-case>SIM</fixed-case>: An End-to-End Bot Simulation Framework for Commercial Task-Oriented Dialog Systems GuangsenWangSalesforce Research Asia SamsonTanAWS AI Research & Education - ShafiqJotyNanyang Technological University; Salesforce AI Research + ShafiqJotyNanyang Technological University; Salesforce AI Research GangWuSalesforce Research JimmyAuSalesforce Steven C.h.HoiSalesforce @@ -11746,7 +11746,7 @@ BaileyKuehlAllen Institute for Artificial Intelligence SophieJohnsonAllen Institute for Artificial Intelligence JonathanBorchardtAllen Institute for Artificial Intelligence - DanielWeldAllen Institute for Artificial Intelligence + DanielWeldAllen Institute for Artificial Intelligence TomHopeAllen Institute for Artificial Intelligence DougDowneyAllen Institute for Artificial Intelligence 200-213 @@ -11812,7 +11812,7 @@ <fixed-case>L</fixed-case>ogi<fixed-case>T</fixed-case>orch: A <fixed-case>P</fixed-case>y<fixed-case>T</fixed-case>orch-based library for logical reasoning on natural language ChadiHelweTelecom Paris, Institut Polytechnique de Paris ChloéClavelLTCI, Telecom-Paris, Institut Polytechnique de Paris - FabianSuchanekTelecom Paris + FabianSuchanekTelecom Paris 250-257 Logical reasoning on natural language is one of the most challenging tasks for deep learning models. There has been an increasing interest in developing new benchmarks to evaluate the reasoning capabilities of language models such as BERT. In parallel, new models based on transformers have emerged to achieve ever better performance on these datasets. However, there is currently no library for logical reasoning that includes such benchmarks and models. This paper introduces LogiTorch, a PyTorch-based library that includes different logical reasoning benchmarks, different models, as well as utility functions such as co-reference resolution. This makes it easy to directly use the preprocessed datasets, to run the models, or to finetune them with different hyperparameters. LogiTorch is open source and can be found on GitHub. 2022.emnlp-demos.25 @@ -11845,7 +11845,7 @@ AbhikBhattacharjeeBangladesh University of Engineering and Technology AbinayaMahendiranMphasis NEXT Labs AlexWangNew York University - AlexandrosPapangelisAmazon Alexa AI + AlexandrosPapangelisAmazon Alexa AI AmanMadaanCarnegie Mellon University AngelinaMcmillan-majorUniversity of Washington AnnaShvetsFabLab by Inetum @@ -11859,15 +11859,15 @@ CristinaGarbaceaUniversity of Michigan DakuoWangMIT-IBM Watson AI Lab / Northeastern University DanielDeutschUniversity of Pennsylvania - DeyiXiongTianjin University + DeyiXiongTianjin University DiJinAmazon Alexa AI DimitraGkatziaEdinburgh Napier University - DragomirRadevYale University + DragomirRadevYale University ElizabethClarkGoogle Research EsinDurmusStanford University FaisalLadhakColumbia University FilipGinterUniversity of Turku - Genta IndraWinataThe Hong Kong University of Science and Technology + Genta IndraWinataThe Hong Kong University of Science and Technology HendrikStrobeltIBM Research / MIT-IBM Watson AI Lab HiroakiHayashiCarnegie Mellon University / Salesforce Research JekaterinaNovikovaWinterlight Labs @@ -11886,7 +11886,7 @@ LiZhangUniversity of Pennsylvania MahimPushkarnaGoogle Research MathiasCreutzUniversity of Helsinki - MichaelWhiteThe Ohio State University + MichaelWhiteThe Ohio State University Mihir SanjayKaleGoogle Research Moussa KamalEddineÉcole Polytechnique NicoDaheimRWTH Aachen University @@ -11902,7 +11902,7 @@ SaadMahamoodtrivago N.V SalomeyOseiMasakhane SamuelCahyawijayaHKUST - SanjaŠtajnerPompeu Fabra University + SanjaŠtajnerPompeu Fabra University SebastienMontellaOrange Labs ShailzaJollyTU Kaiserslautern SimonMillePompeu Fabra University @@ -11927,9 +11927,9 @@ <fixed-case>KGI</fixed-case>: An Integrated Framework for Knowledge Intensive Language Tasks Md Faisal MahbubChowdhuryIBM Research AI - MichaelGlassIbm + MichaelGlassIbm GaetanoRossielloIBM Research AI - AlfioGliozzoIBM Research AI + AlfioGliozzoIBM Research AI NandanaMihindukulasooriyaIBM Research AI 282-288 In this paper, we present a system to showcase the capabilities of the latest state-of-the-art retrieval augmented generation models trained on knowledge-intensive language tasks, such as slot filling, open domain question answering, dialogue, and fact-checking. Moreover, given a user query, we show how the output from these different models can be combined to cross-examine the outputs of each other. Particularly, we show how accuracy in dialogue can be improved using the question answering model. We are also releasing all models used in the demo as a contribution of this paper. A short video demonstrating the system is available at https://ibm.box.com/v/emnlp2022-demos. @@ -11971,7 +11971,7 @@ FanBaiGeorgia Institute of Technology AlanRitterGeorgia Institute of Technology PeterMadridBiosciences Division, SRI International - DayneFreitagSRI International + DayneFreitagSRI International JohnNiekrasz 311-318 In this paper we present SynKB, an open-source, automatically extracted knowledge base of chemical synthesis protocols. Similar to proprietary chemistry databases such as Reaxsys, SynKB allows chemists to retrieve structured knowledge about synthetic procedures. By taking advantage of recent advances in natural language processing for procedural texts, SynKB supports more flexible queries about reaction conditions, and thus has the potential to help chemists search the literature for conditions used in relevant reactions as they design new synthetic routes. Using customized Transformer models to automatically extract information from 6 million synthesis procedures described in U.S. and EU patents, we show that for many queries, SynKB has higher recall than Reaxsys, while maintaining high precision. We plan to make SynKB available as an open-source tool; in contrast, proprietary chemistry databases require costly subscriptions. @@ -12053,7 +12053,7 @@ Hands-On Interactive Neuro-Symbolic <fixed-case>NLP</fixed-case> with <fixed-case>DR</fixed-case>ai<fixed-case>L</fixed-case> - Maria LeonorPachecoUniversity of Colorado Boulder / Microsoft Research + Maria LeonorPachecoUniversity of Colorado Boulder / Microsoft Research ShamikRoyPurdue University DanGoldwasserPurdue University 371-378 @@ -12137,8 +12137,8 @@ YiwenHuRenmin University of China ZhuohaoYuRenmin University of China WenxunDaiXidian university - Wayne XinZhaoRuc - Jian-yunNieUniversity of Montreal + Wayne XinZhaoRuc + Jian-yunNieUniversity of Montreal Ji-rongWenRenmin University of China 435-444 To facilitate research on text generation, this paper presents a comprehensive and unified library, TextBox 2.0, focusing on the use of pre-trained language models (PLMs). To be comprehensive, our library covers 13 common text generation tasks and their corresponding 83 datasets and further incorporates 45 PLMs covering general, translation, Chinese, dialogue, controllable, distilled, prompting, and lightweight PLMs. We also implement 4 efficient training strategies and provide 4 generation objectives for pre-training new PLMs from scratch. To be unified, we design the interfaces to support the entire research pipeline (from data loading to training and evaluation), ensuring that each step can be fulfilled in a unified way. Despite the rich functionality, it is easy to use our library, either through the friendly Python API or command line. To validate the effectiveness of our library, we conduct extensive experiments and exemplify four types of research scenarios. The project is released at the link: https://github.com/RUCAIBox/TextBox#2.0. @@ -12181,7 +12181,7 @@ XiaodiSunMicrosoft SunnyRajagopalanGoogle PriyankaNigamAmazon - WeiyiLuAmazon + WeiyiLuAmazon YiXuAmazon ImanKeivanlooAmazon BelindaZengAmazon @@ -12197,7 +12197,7 @@ A Hybrid Approach to Cross-lingual Product Review Summarization SalehSoltanAmazon Alexa VictorSotoAmazon Inc. - KeTranAmazon + KeTranAmazon WaelHamzaAmazon 18-28 We present a hybrid approach for product review summarization which consists of: (i) an unsupervised extractive step to extract the most important sentences out of all the reviews, and (ii) a supervised abstractive step to summarize the extracted sentences into a coherent short summary. This approach allows us to develop an efficient cross-lingual abstractive summarizer that can generate summaries in any language, given the extracted sentences out of thousands of reviews in a source language. In order to train and test the abstractive model, we create the Cross-lingual Amazon Reviews Summarization (CARS) dataset which provides English summaries for training, and English, French, Italian, Arabic, and Hindi summaries for testing based on selected English reviews. We show that the summaries generated by our model are as good as human written summaries in coherence, informativeness, non-redundancy, and fluency. @@ -12254,7 +12254,7 @@ NiranjanUma NareshAmazon ZiyanJiangAmazon AnkitAnkitAmazon - SungjinLeeAmazon + SungjinLeeAmazon JieHaoAmazon XingFanAmazon ChenleiGuoAmazon @@ -12297,7 +12297,7 @@ FrancescoMoramarcoBabylon Health AlexPapadopoulos KorfiatisBabylon Health MarkPereraBabylon - AnyaBelzADAPT Research Centre, Dublin City University + AnyaBelzADAPT Research Centre, Dublin City University EhudReiterUniversity of Aberdeen 111-120 Evaluating automatically generated text is generally hard due to the inherently subjective nature of many aspects of the output quality. This difficulty is compounded in automatic consultation note generation by differing opinions between medical experts both about which patient statements should be included in generated notes and about their respective importance in arriving at a diagnosis. Previous real-world evaluations of note-generation systems saw substantial disagreement between expert evaluators. In this paper we propose a protocol that aims to increase objectivity by grounding evaluations in Consultation Checklists, which are created in a preliminary step and then used as a common point of reference during quality assessment. We observed good levels of inter-annotator agreement in a first evaluation study using the protocol; further, using Consultation Checklists produced in the study as reference for automatic metrics such as ROUGE or BERTScore improves their correlation with human judgements compared to using the original human note. @@ -12483,8 +12483,8 @@ <fixed-case>C</fixed-case>o<fixed-case>C</fixed-case>o<fixed-case>ID</fixed-case>: Learning Contrastive Representations and Compact Clusters for Semi-Supervised Intent Discovery - QianCaoSoochow University - DeyiXiongTianjin University + QianCaoSoochow University + DeyiXiongTianjin University QinlongWangLeyantech AI XiaPengSoochow University 226-236 @@ -12496,8 +12496,8 @@ Tractable & Coherent Multi-Document Summarization: Discrete Optimization of Multiple Neural Modeling Streams via Integer Linear Programming - LittonJ KurisinkelA-star - NancyChenInstitute for Infocomm Research, A*STAR + LittonJ KurisinkelA-star + NancyChenInstitute for Infocomm Research, A*STAR 237-243 One key challenge in multi-document summarization is the generated summary is often less coherent compared to single document summarization due to the larger heterogeneity of the input source content. In this work, we propose a generic framework to jointly consider coherence and informativeness in multi-document summarization and offers provisions to replace individual components based on the domain of source text. In particular, the framework characterizes coherence through verb transitions and entity mentions and takes advantage of syntactic parse trees and neural modeling for intra-sentential noise pruning. The framework cast the entire problem as an integer linear programming optimization problem with neural and non-neural models as linear components. We evaluate our method in the news and legal domains. The proposed approach consistently performs better than competitive baselines for both objective metrics and human evaluation. 2022.emnlp-industry.24 @@ -12573,7 +12573,7 @@ VladislavBelyaevComcast MadhuriEmmadiComcast CraigMurrayComcast - FerhanTureComcast Applied AI Research + FerhanTureComcast Applied AI Research JimmyLinUniversity of Waterloo 285-293 End-to-end automatic speech recognition systems represent the state of the art, but they rely on thousands of hours of manually annotated speech for training, as well as heavyweight computation for inference. Of course, this impedes commercialization since most companies lack vast human and computational resources. In this paper, we explore training and deploying an ASR system in the label-scarce, compute-limited setting. To reduce human labor, we use a third-party ASR system as a weak supervision source, supplemented with labeling functions derived from implicit user feedback. To accelerate inference, we propose to route production-time queries across a pool of CUDA graphs of varying input lengths, the distribution of which best matches the traffic’s. Compared to our third-party ASR, we achieve a relative improvement in word-error rate of 8% and a speedup of 600%. Our system, called SpeechNet, currently serves 12 million queries per day on our voice-enabled smart television. To our knowledge, this is the first time a large-scale, Wav2vec-based deployment has been described in the academic literature. @@ -12626,7 +12626,7 @@ Learning Geolocations for Cold-Start and Hard-to-Resolve Addresses via Deep Metric Learning - GovindAmazon + GovindAmazon SaurabhSohoneyAmazon 322-331 With evergrowing digital adoption in the society and increasing demand for businesses to deliver to customers doorstep, the last mile hop of transportation planning poses unique challenges in emerging geographies with unstructured addresses. One of the crucial inputs to facilitate effective planning is the task of geolocating customer addresses. Existing systems operate by aggregating historical delivery locations or by resolving/matching addresses to known buildings and campuses to vend a high-precision geolocation. However, by design they fail to cater to a significant fraction of addresses which are new in the system and have inaccurate or missing building level information. We propose a framework to resolve these addresses (referred to as hard-to-resolve henceforth) to a shallower granularity termed as neighbourhood. Specifically, we propose a weakly supervised deep metric learning model to encode the geospatial semantics in address embeddings. We present empirical evaluation on India (IN) and the United Arab Emirates (UAE) hard-to-resolve addresses to show significant improvements in learning geolocations i.e., 22% (IN) & 55% (UAE) reduction in delivery defects (where learnt geocode is Y meters away from actual location), and 43% (IN) & 90% (UAE) reduction in 50th percentile (p50) distance between learnt and actual delivery locations over the existing production system. @@ -12670,7 +12670,7 @@ AnjieFangAmazon BesnikFetahuAmazon OlegRokhlenkoAmazon - ShervinMalmasiAmazon + ShervinMalmasiAmazon 357-370 Conversational Question Answering (CQA) aims to answer questions contained within dialogues, which are not easily interpretable without context. Developing a model to rewrite conversational questions into self-contained ones is an emerging solution in industry settings as it allows using existing single-turn QA systems to avoid training a CQA model from scratch. Previous work trains rewriting models using human rewrites as supervision. However, such objectives are disconnected with QA models and therefore more human-like rewrites do not guarantee better QA performance. In this paper we propose using QA feedback to supervise the rewriting model with reinforcement learning. Experiments show that our approach can effectively improve QA performance over baselines for both extractive and retrieval QA. Furthermore, human evaluation shows that our method can generate more accurate and detailed rewrites when compared to human annotations. 2022.emnlp-industry.36 @@ -12775,7 +12775,7 @@ BesnikFetahuAmazon AkashVeeragouniAmazon OlegRokhlenkoAmazon - ShervinMalmasiAmazon + ShervinMalmasiAmazon 429-439 We describe an application of Knowledge Distillation used to distill and deploy multilingual Transformer models for voice assistants, enabling text classification for customers globally. Transformers have set new state-of-the-art results for tasks like intent classification, and multilingual models exploit cross-lingual transfer to allow serving requests across 100+ languages. However, their prohibitive inference time makes them impractical to deploy in real-world scenarios with low latency requirements, such as is the case of voice assistants. We address the problem of cross-architecture distillation of multilingual Transformers to simpler models, while maintaining multilinguality without performance degradation. Training multilingual student models has received little attention, and is our main focus. We show that a teacher-student framework, where the teacher’s unscaled activations (logits) on unlabelled data are used to supervise student model training, enables distillation of Transformers into efficient multilingual CNN models. Our student model achieves equivalent performance as the teacher, and outperforms a similar model trained on the labelled data used to train the teacher model. This approach has enabled us to accurately serve global customer requests at speed (18x improvement), scale, and low cost. 2022.emnlp-industry.43 @@ -12822,7 +12822,7 @@ Zero-Shot Dynamic Quantization for Transformer Inference YousefEl-kurdiIBM Research JerryQuinnIBM Research - AviSilIBM Research AI + AviSilIBM Research AI 451-457 We introduce a novel run-time method for significantly reducing the accuracy loss associated with quantizing BERT-like models to 8-bit integers. Existing methods for quantizing models either modify the training procedure, or they require an additional calibration step to adjust parameters that also requires a selected held-out dataset. Our method permits taking advantage of quantization without the need for these adjustments. We present results on several NLP tasks demonstrating the usefulness of this technique. 2022.emnlp-industry.45 @@ -12864,7 +12864,7 @@ RakeshChadaAmazon PradeepNatarajanAmazon ChenleiGuoAmazon - GokhanTurAmazon + GokhanTurAmazon 475-483 In conversational AI agents, Query Rewriting (QR) plays a crucial role in reducing user frictions and satisfying their daily demands. User frictions are caused by various reasons, such as errors in the conversational AI system, users’ accent or their abridged language. In this work, we present a novel Constrained Generation Framework (CGF) for query rewriting at both global and personalized levels. It is based on the encoder-decoder framework, where the encoder takes the query and its previous dialogue turns as the input to form a context-enhanced representation, and the decoder uses constrained decoding to generate the rewrites based on the pre-defined global or personalized constrained decoding space. Extensive offline and online A/B experiments show that the proposed CGF significantly boosts the query rewriting performance. 2022.emnlp-industry.48 @@ -12919,14 +12919,14 @@ <fixed-case>PLATO</fixed-case>-Ad: A Unified Advertisement Text Generation Framework with Multi-Task Prompt Learning ZeyangLeiBaidu Inc. - ChaoZhangBaidu Inc. + ChaoZhangBaidu Inc. XinchaoXuBaidu WenquanWuBaidu Zheng-yuNiuBaidu Inc. HuaWuBaidu HaifengWangBaidu YiYangBaidu Inc. - ShuanglongLiBaidu Inc. + ShuanglongLiBaidu Inc. 512-520 Online advertisement text generation aims at generating attractive and persuasive text ads to appeal to users clicking ads or purchasing products. While pretraining-based models have achieved remarkable success in generating high-quality text ads, some challenges still remain, such as ad generation in low-resource scenarios and training efficiency for multiple ad tasks. In this paper, we propose a novel unified text ad generation framework with multi-task prompt learning, called PLATO-Ad, totackle these problems. Specifically, we design a three-phase transfer learning mechanism to tackle the low-resource ad generation problem. Furthermore, we present a novel multi-task prompt learning mechanism to efficiently utilize a single lightweight model to solve multiple ad generation tasks without loss of performance compared to training a separate model for each task. Finally, we conduct offline and online evaluations and experiment results show that PLATO-Ad significantly outperforms the state-of-the-art on both offline and online metrics. PLATO-Ad has been deployed in a leading advertising platform with 3.5% CTR improvement on search ad descriptions and 10.4% CTR improvement on feed ad titles. 2022.emnlp-industry.52 @@ -12978,7 +12978,7 @@ KunZhouRenmin University of China YeyunGongMicrosoft Research Asia XiaoLiuMicrosoft Research Asia - Wayne XinZhaoRuc + Wayne XinZhaoRuc YelongShenMicrosoft AnleiDongMicrosoft JingwenLuMicrosoft diff --git a/data/xml/2022.emoji.xml b/data/xml/2022.emoji.xml index 42c184dbef..a2d4111be5 100644 --- a/data/xml/2022.emoji.xml +++ b/data/xml/2022.emoji.xml @@ -88,7 +88,7 @@ Semantic Congruency Facilitates Memory for Emojis Andriana L.Christofalos - Laurie BethFeldman + Laurie BethFeldman HeatherSheridan 63-68 Emojis can assume different relations with the sentence context in which they occur. While affective elaboration and emoji-word redundancy are frequently investigated in laboratory experiments, the role of emojis in inferential processes has received much less attention. Here, we used an online ratings task and a recognition memory task to investigate whether differences in emoji function within a sentence affect judgments of emoji-text coherence and subsequent recognition accuracy. Emojis that function as synonyms of a target word from the passages were rated as better fitting with the passage (more coherent) than emojis consistent with an inference from the passage, and both types of emojis were rated as more coherent than incongruent (unrelated) emojis. In a recognition test, emojis consistent with the semantic content of passages (synonym and inference emojis) were better recognized than incongruent emojis. Findings of the present study provide corroborating evidence that readers extract semantic information from emojis and then integrate it with surrounding passage content. diff --git a/data/xml/2022.eurali.xml b/data/xml/2022.eurali.xml index 6f092b3387..a33753a926 100644 --- a/data/xml/2022.eurali.xml +++ b/data/xml/2022.eurali.xml @@ -3,10 +3,10 @@ Proceedings of the Workshop on Resources and Technologies for Indigenous, Endangered and Lesser-resourced Languages in Eurasia within the 13th Language Resources and Evaluation Conference - Atul Kr.Ojha + Atul Kr.Ojha SinaAhmadi Chao-HongLiu - John P.McCrae + John P.McCrae European Language Resources Association
Marseille, France
June diff --git a/data/xml/2022.eval4nlp.xml b/data/xml/2022.eval4nlp.xml index d50f7eeadf..30a86d7110 100644 --- a/data/xml/2022.eval4nlp.xml +++ b/data/xml/2022.eval4nlp.xml @@ -28,7 +28,7 @@ MasaoIdeuchi MasaoUtiyama YoshiakiOida - EiichiroSumita + EiichiroSumita 1–10 2022.eval4nlp-1.1 10.18653/v1/2022.eval4nlp-1.1 @@ -134,7 +134,7 @@ Assessing Neural Referential Form Selectors on a Realistic Multilingual Dataset GuanyiChen FahimeSame - KeesVan Deemter + KeesVan Deemter 103–114 2022.eval4nlp-1.11 10.18653/v1/2022.eval4nlp-1.11 diff --git a/data/xml/2022.evonlp.xml b/data/xml/2022.evonlp.xml index b61c61a87f..7eac92cf15 100644 --- a/data/xml/2022.evonlp.xml +++ b/data/xml/2022.evonlp.xml @@ -4,9 +4,9 @@ Proceedings of the First Workshop on Ever Evolving NLP (EvoNLP) FrancescoBarbieri - JoseCamacho-Collados + JoseCamacho-Collados BhuwanDhingra - LuisEspinosa-Anke + LuisEspinosa-Anke ElenaGribovskaya AngelikiLazaridou DanielLoureiro diff --git a/data/xml/2022.fever.xml b/data/xml/2022.fever.xml index 407267ebab..fc1ddc648c 100644 --- a/data/xml/2022.fever.xml +++ b/data/xml/2022.fever.xml @@ -8,7 +8,7 @@ OanaCocarascu ZhijiangGuo ArpitMittal - MichaelSchlichtkrull + MichaelSchlichtkrull JamesThorne AndreasVlachos Association for Computational Linguistics @@ -52,7 +52,7 @@ NikitaBhutani AlexanderWhedon EstevamHruschka - YoshiSuhara + YoshiSuhara 16-28 Many people read online reviews to learn about real-world entities of their interest. However, majority of reviews only describes general experiences and opinions of the customers, and may not reveal facts that are specific to the entity being reviewed. In this work, we focus on a novel task of mining from a review corpus sentences that are unique for each entity. We refer to this task as Salient Fact Extraction. Salient facts are extremely scarce due to their very nature. Consequently, collecting labeled examples for training supervised models is tedious and cost-prohibitive. To alleviate this scarcity problem, we develop an unsupervised method, ZL-Distiller, which leverages contextual language representations of the reviews and their distributional patterns to identify salient sentences about entities. Our experiments on multiple domains (hotels, products, and restaurants) show that ZL-Distiller achieves state-of-the-art performance and further boosts the performance of other supervised/unsupervised algorithms for the task. Furthermore, we show that salient sentences mined by ZL-Distiller provide unique and detailed information about entities, which benefit downstream NLP applications including question answering and summarization. 2022.fever-1.3 @@ -78,7 +78,7 @@ A Semantics-Aware Approach to Automated Claim Verification BlancaCalvo Figueras MontseCuadros - RodrigoAgerri + RodrigoAgerri 37-48 The influence of fake news in the perception of reality has become a mainstream topic in the last years due to the fast propagation of misleading information. In order to help in the fight against misinformation, automated solutions to fact-checking are being actively developed within the research community. In this context, the task of Automated Claim Verification is defined as assessing the truthfulness of a claim by finding evidence about its veracity. In this work we empirically demonstrate that enriching a BERT model with explicit semantic information such as Semantic Role Labelling helps to improve results in claim verification as proposed by the FEVER benchmark. Furthermore, we perform a number of explainability tests that suggest that the semantically-enriched model is better at handling complex cases, such as those including passive forms or multiple propositions. 2022.fever-1.5 diff --git a/data/xml/2022.fieldmatters.xml b/data/xml/2022.fieldmatters.xml index 46e0addf43..a1245958a7 100644 --- a/data/xml/2022.fieldmatters.xml +++ b/data/xml/2022.fieldmatters.xml @@ -12,7 +12,7 @@ TatianaShavrina Eric LeFerrand ValentinMalykh - FrancisTyers + FrancisTyers TimofeyArkhangelskiy VladislavMikhailov AlenaFenogenova @@ -49,7 +49,7 @@
Machine Translation Between High-resource Languages in a Language Documentation Setting - KatharinaKann + KatharinaKann AbteenEbrahimi KristineStenzel AlexisPalmer @@ -70,7 +70,7 @@ The interaction between cognitive ease and informativeness shapes the lexicons of natural languages ThomasBrochhagen - GemmaBoleda + GemmaBoleda 42–44 It is common for languages to express multiple meanings with the same word, a phenomenon known as colexification. For instance, the meanings FINGER and TOE colexify in the word “dedo” in Spanish, while they do not colexify in English. Colexification has been suggested to follow universal constraints. In particular, previous work has shown that related meanings are more prone to colexify. This tendency has been explained in terms of the cognitive pressure for ease, since expressing related meanings with the same word makes lexicons easier to learn and use. The present study examines the interplay between this pressure and a competing universal constraint, the functional pressure for languages to maximize informativeness. We hypothesize that meanings are more likely to colexify if they are related (fostering ease), but not so related as to become confusable and cause misunderstandings (fostering informativeness). We find support for this principle in data from over 1200 languages and 1400 meanings. Our results thus suggest that universal principles shape the lexicons of natural languages. More broadly, they contribute to the growing body of evidence suggesting that languages evolve to strike a balance between competing functional and cognitive pressures. 2022.fieldmatters-1.5 diff --git a/data/xml/2022.findings.xml b/data/xml/2022.findings.xml index 47e4c7e65b..e3ffdcae8a 100644 --- a/data/xml/2022.findings.xml +++ b/data/xml/2022.findings.xml @@ -4,7 +4,7 @@ Findings of the Association for Computational Linguistics: ACL 2022 SmarandaMuresan - PreslavNakov + PreslavNakov AlineVillavicencio Association for Computational Linguistics
Dublin, Ireland
@@ -68,7 +68,7 @@ EmilBiju AnirudhSriram PratyushKumar - MiteshKhapra + MiteshKhapra 31-44 Self-attention heads are characteristic of Transformer models and have been well studied for interpretability and pruning. In this work, we demonstrate an altogether different utility of attention heads, namely for adversarial detection. Specifically, we propose a method to construct input-specific attention subnetworks (IAS) from which we extract three features to discriminate between authentic and adversarial inputs. The resultant detector significantly improves (by over 7.5%) the state-of-the-art adversarial detection accuracy for the BERT encoder on 10 NLU datasets with 11 different adversarial attack types. We also demonstrate that our method (a) is more accurate for larger models which are likely to have more spurious correlations and thus vulnerable to adversarial attack, and (b) performs well even with modest training sets of adversarial examples. 2022.findings-acl.4 @@ -95,9 +95,9 @@ SarubiThillainathan ShravanNayak SurangikaRanathunga - David IfeoluwaAdelani + David IfeoluwaAdelani RuisiSu - Arya D.McCarthy + Arya D.McCarthy 58-67 What can pre-trained multilingual sequence-to-sequence models like mBART contribute to translating low-resource languages? We conduct a thorough empirical experiment in 10 languages to ascertain this, considering five factors: (1) the amount of fine-tuning data, (2) the noise in the fine-tuning data, (3) the amount of pre-training data in the model, (4) the impact of domain mismatch, and (5) language typology. In addition to yielding several heuristics, the experiments form a framework for evaluating the data sensitivities of machine translation systems. While mBART is robust to domain differences, its translations for unseen and typologically distant languages remain below 3.0 BLEU. In answer to our title’s question, mBART is not a low-resource panacea; we therefore encourage shifting the emphasis from new models to new data. 2022.findings-acl.6 @@ -279,7 +279,7 @@ NathanYoung QimingBao JoshuaBensemann - MichaelWitbrock + MichaelWitbrock 218-227 Transformers have recently been shown to be capable of reliably performing logical reasoning over facts and rules expressed in natural language, but abductive reasoning - inference to the best explanation of an unexpected observation - has been underexplored despite significant applications to scientific discovery, common-sense reasoning, and model interpretability. This paper presents AbductionRules, a group of natural language datasets designed to train and test generalisable abduction over natural-language knowledge bases. We use these datasets to finetune pretrained Transformers and discuss their performance, finding that our models learned generalisable abductive techniques but also learned to exploit the structure of our data. Finally, we discuss the viability of this approach to abductive reasoning and ways in which it may be improved in future work. 2022.findings-acl.19 @@ -398,7 +398,7 @@ LingboMo AshleyLewis HuanSun - MichaelWhite + MichaelWhite 322-342 Existing studies on semantic parsing focus on mapping a natural-language utterance to a logical form (LF) in one turn. However, because natural language may contain ambiguity and variability, this is a difficult challenge. In this work, we investigate an interactive semantic parsing framework that explains the predicted LF step by step in natural language and enables the user to make corrections through natural-language feedback for individual steps. We focus on question answering over knowledge bases (KBQA) as an instantiation of our framework, aiming to increase the transparency of the parsing process and help the user trust the final answer. We construct INSPIRED, a crowdsourced dialogue dataset derived from the ComplexWebQuestions dataset. Our experiments show that this framework has the potential to greatly improve overall parse accuracy. Furthermore, we develop a pipeline for dialogue simulation to evaluate our framework w.r.t. a variety of state-of-the-art KBQA models without further crowdsourcing effort. The results demonstrate that our framework promises to be effective across such models. 2022.findings-acl.28 @@ -437,7 +437,7 @@ Towards Responsible Natural Language Annotation for the Varieties of <fixed-case>A</fixed-case>rabic A.Bergman - MonaDiab + MonaDiab 364-371 When building NLP models, there is a tendency to aim for broader coverage, often overlooking cultural and (socio)linguistic nuance. In this position paper, we make the case for care and attention to such nuances, particularly in dataset annotation, as well as the inclusion of cultural and linguistic expertise in the process. We present a playbook for responsible dataset creation for polyglossic, multidialectal languages. This work is informed by a study on Arabic annotation of social media content. 2022.findings-acl.31 @@ -477,7 +477,7 @@ <fixed-case>MDER</fixed-case>ank: A Masked Document Embedding Rank Approach for Unsupervised Keyphrase Extraction LinhanZhang QianChen - WenWang + WenWang ChongDeng ShiLiangZhang BingLi @@ -544,7 +544,7 @@ ZuchaoLi YiranWang MasaoUtiyama - EiichiroSumita + EiichiroSumita HaiZhao TaroWatanabe 459-471 @@ -558,7 +558,7 @@ PrashantKodali AnmolGoel MonojitChoudhury - ManishShrivastava + ManishShrivastava PonnurangamKumaraguru 472-480 Code mixing is the linguistic phenomenon where bilingual speakers tend to switch between two or more languages in conversations. Recent work on code-mixing in computational settings has leveraged social media code mixed texts to train NLP models. For capturing the variety of code mixing in, and across corpus, Language ID (LID) tags based measures (CMI) have been proposed. Syntactical variety/patterns of code-mixing and their relationship vis-a-vis computational model’s performance is under explored. In this work, we investigate a collection of English(en)-Hindi(hi) code-mixed datasets from a syntactic lens to propose, SyMCoM, an indicator of syntactic variety in code-mixed text, with intuitive theoretical bounds. We train SoTA en-hi PoS tagger, accuracy of 93.4%, to reliably compute PoS tags on a corpus, and demonstrate the utility of SyMCoM by applying it on various syntactical categories on a collection of datasets, and compare datasets using the measure. @@ -621,7 +621,7 @@ Better Quality Estimation for Low Resource Corpus Mining MuhammedKocyigit JihoLee - DerryWijaya + DerryWijaya 533-543 Quality Estimation (QE) models have the potential to change how we evaluate and maybe even train machine translation models. However, these models still lack the robustness to achieve general adoption. We show that Stateof-the-art QE models, when tested in a Parallel Corpus Mining (PCM) setting, perform unexpectedly bad due to a lack of robustness to out-of-domain examples. We propose a combination of multitask training, data augmentation and contrastive learning to achieve better and more robust QE performance. We show that our method improves QE performance significantly in the MLQE challenge and the robustness of QE models when tested in the Parallel Corpus Mining setup. We increase the accuracy in PCM by more than 0.80, making it on par with state-of-the-art PCM methods that use millions of sentence pairs to train their models. In comparison, we use a thousand times less data, 7K parallel sentences in total, and propose a novel low resource PCM method. 2022.findings-acl.45 @@ -660,7 +660,7 @@ Extracting Latent Steering Vectors from Pretrained Language Models NishantSubramani NiveditaSuresh - MatthewPeters + MatthewPeters 566-581 Prior work on controllable text generation has focused on learning how to control language models through trainable decoding, smart-prompt design, or fine-tuning based on a desired objective. We hypothesize that the information needed to steer the model to generate a target sentence is already encoded within the model. Accordingly, we explore a different approach altogether: extracting latent vectors directly from pretrained language model decoders without fine-tuning. Experiments show that there exist steering vectors, which, when added to the hidden states of the language model, generate a target sentence nearly perfectly (> 99 BLEU) for English sentences from a variety of domains. We show that vector arithmetic can be used for unsupervised sentiment transfer on the Yelp sentiment benchmark, with performance comparable to models tailored to this task. We find that distances between steering vectors reflect sentence similarity when evaluated on a textual similarity benchmark (STS-B), outperforming pooled hidden states of models. Finally, we present an analysis of the intrinsic properties of the steering vectors. Taken together, our results suggest that frozen LMs can be effectively controlled through their latent steering space. 2022.findings-acl.48 @@ -670,10 +670,10 @@ Domain Generalisation of <fixed-case>NMT</fixed-case>: Fusing Adapters with Leave-One-Domain-Out Training - Thuy-TrangVu + Thuy-TrangVu ShahramKhadivi DinhPhung - GholamrezaHaffari + GholamrezaHaffari 582-588 Generalising to unseen domains is under-explored and remains a challenge in neural machine translation. Inspired by recent research in parameter-efficient transfer learning from pretrained models, this paper proposes a fusion-based generalisation method that learns to combine domain-specific parameters. We propose a leave-one-domain-out training strategy to avoid information leaking to address the challenge of not knowing the test domain during training time. Empirical results on three language pairs show that our proposed fusion method outperforms other baselines up to +0.8 BLEU score on average. 2022.findings-acl.49 @@ -700,8 +700,8 @@ ChaoZhao TenghaoHuang SomnathBasu Roy Chowdhury - Muthu KumarChandrasekaran - KathleenMcKeown + Muthu KumarChandrasekaran + KathleenMcKeown SnigdhaChaturvedi 613-621 A common method for extractive multi-document news summarization is to re-formulate it as a single-document summarization problem by concatenating all documents as a single meta-document. However, this method neglects the relative importance of documents. We propose a simple approach to reorder the documents according to their relative importance before concatenating and summarizing them. The reordering makes the salient content easier to learn by the summarization model. Experiments show that our approach outperforms previous state-of-the-art methods with more complex architectures. @@ -714,7 +714,7 @@ NikitaSoni MatthewMatero NiranjanBalasubramanian - H. AndrewSchwartz + H. AndrewSchwartz 622-636 Natural language is generated by people, yet traditional language modeling views words or documents as if generated independently. Here, we propose human language modeling (HuLM), a hierarchical extension to the language modeling problem where by a human- level exists to connect sequences of documents (e.g. social media messages) and capture the notion that human language is moderated by changing human states. We introduce, HaRT, a large-scale transformer model for solving HuLM, pre-trained on approximately 100,000 social media users, and demonstrate it’s effectiveness in terms of both language modeling (perplexity) for social media and fine-tuning for 4 downstream tasks spanning document- and user-levels. Results on all tasks meet or surpass the current state-of-the-art. 2022.findings-acl.52 @@ -740,7 +740,7 @@ ShuxianZou ShaonanWang JiajunZhang - ChengqingZong + ChengqingZong 648-657 Decoding language from non-invasive brain activity has attracted increasing attention from both researchers in neuroscience and natural language processing. Due to the noisy nature of brain recordings, existing work has simplified brain-to-word decoding as a binary classification task which is to discriminate a brain signal between its corresponding word and a wrong one. This pairwise classification task, however, cannot promote the development of practical neural decoders for two reasons. First, it has to enumerate all pairwise combinations in the test set, so it is inefficient to predict a word in a large vocabulary. Second, a perfect pairwise decoder cannot guarantee the performance on direct classification. To overcome these and go a step further to a realistic neural decoder, we propose a novel Cross-Modal Cloze (CMC) task which is to predict the target word encoded in the neural image with a context as prompt. Furthermore, to address this task, we propose a general approach that leverages the pre-trained language model to predict the target word. To validate our method, we perform experiments on more than 20 participants from two brain imaging datasets. Our method achieves 28.91% top-1 accuracy and 54.19% top-5 accuracy on average across all participants, significantly outperforming several baselines. This result indicates that our model can serve as a state-of-the-art baseline for the CMC task. More importantly, it demonstrates that it is feasible to decode a certain word within a large vocabulary from its neural brain activity. 2022.findings-acl.54 @@ -772,7 +772,7 @@ KevinChang YunyaoLi LucianPopa - ChengXiangZhai + ChengXiangZhai 679-692 We propose a probabilistic approach to select a subset of a target domain representative keywords from a candidate set, contrasting with a context domain. Such a task is crucial for many downstream tasks in natural language processing. To contrast the target domain and the context domain, we adapt the two-component mixture model concept to generate a distribution of candidate keywords. It provides more importance to the distinctive keywords of the target domain than common keywords contrasting with the context domain. To support the representativeness of the selected keywords towards the target domain, we introduce an optimization algorithm for selecting the subset from the generated candidate distribution. We have shown that the optimization algorithm can be efficiently implemented with a near-optimal approximation guarantee. Finally, extensive experiments on multiple domains demonstrate the superiority of our approach over other baselines for the tasks of keyword summary generation and trending keywords selection. 2022.findings-acl.56 @@ -796,7 +796,7 @@ KushalArora LaylaEl Asri HareeshBahuleyan - JackieCheung + JackieCheung 700-710 Current language generation models suffer from issues such as repetition, incoherence, and hallucinations. An often-repeated hypothesis for this brittleness of generation models is that it is caused by the training and the generation procedure mismatch, also referred to as exposure bias. In this paper, we verify this hypothesis by analyzing exposure bias from an imitation learning perspective. We show that exposure bias leads to an accumulation of errors during generation, analyze why perplexity fails to capture this accumulation of errors, and empirically show that this accumulation results in poor generation quality. 2022.findings-acl.58 @@ -808,7 +808,7 @@ Question Answering Infused Pre-training of General-Purpose Contextualized Representations RobinJia MikeLewis - LukeZettlemoyer + LukeZettlemoyer 711-728 We propose a pre-training objective based on question answering (QA) for learning general-purpose contextual representations, motivated by the intuition that the representation of a phrase in a passage should encode all questions that the phrase can answer in context. To this end, we train a bi-encoder QA model, which independently encodes passages and questions, to match the predictions of a more accurate cross-encoder model on 80 million synthesized QA pairs. By encoding QA-relevant information, the bi-encoder’s token-level representations are useful for non-QA downstream tasks without extensive (or in some cases, any) fine-tuning. We show large improvements over both RoBERTa-large and previous state-of-the-art results on zero-shot and few-shot paraphrase detection on four datasets, few-shot named entity recognition on two datasets, and zero-shot sentiment analysis on three datasets. 2022.findings-acl.59 @@ -818,7 +818,7 @@ Automatic Song Translation for Tonal Languages FenfeiGuo - ChenZhang + ChenZhang ZhiruiZhang QixinHe KejunZhang @@ -988,7 +988,7 @@ <fixed-case>DS</fixed-case>-<fixed-case>TOD</fixed-case>: Efficient Domain Specialization for Task-Oriented Dialog Chia-ChienHung AnneLauscher - SimonePonzetto + SimonePonzetto GoranGlavaš 891-904 Recent work has shown that self-supervised dialog-specific pretraining on large conversational datasets yields substantial gains over traditional language modeling (LM) pretraining in downstream task-oriented dialog (TOD). These approaches, however, exploit general dialogic corpora (e.g., Reddit) and thus presumably fail to reliably embed domain-specific knowledge useful for concrete downstream TOD domains. In this work, we investigate the effects of domain specialization of pretrained language models (PLMs) for TOD. Within our DS-TOD framework, we first automatically extract salient domain-specific terms, and then use them to construct DomainCC and DomainReddit – resources that we leverage for domain-specific pretraining, based on (i) masked language modeling (MLM) and (ii) response selection (RS) objectives, respectively. We further propose a resource-efficient and modular domain specialization by means of domain adapters – additional parameter-light layers in which we encode the domain knowledge. Our experiments with prominent TOD tasks – dialog state tracking (DST) and response retrieval (RR) – encompassing five domains from the MultiWOZ benchmark demonstrate the effectiveness of DS-TOD. Moreover, we show that the light-weight adapter-based specialization (1) performs comparably to full fine-tuning in single domain setups and (2) is particularly suitable for multi-domain specialization, where besides advantageous computational footprint, it can offer better TOD performance. @@ -1033,7 +1033,7 @@ ZichaoLi PrakharSharma Xing HanLu - JackieCheung + JackieCheung SivaReddy 926-937 Most research on question answering focuses on the pre-deployment stage; i.e., building an accurate model for deployment. In this paper, we ask the question: Can we improve QA systems further post-deployment based on user interactions? We focus on two kinds of improvements: 1) improving the QA system’s performance itself, and 2) providing the model with the ability to explain the correctness or incorrectness of an answer. We collect a retrieval-based QA dataset, FeedbackQA, which contains interactive feedback from users. We collect this dataset by deploying a base QA system to crowdworkers who then engage with the system and provide feedback on the quality of its answers. The feedback contains both structured ratings and unstructured natural language explanations. We train a neural model with this feedback data that can generate explanations and re-score answer candidates. We show that feedback data not only improves the accuracy of the deployed QA system but also other stronger non-deployed systems. The generated explanations also help users make informed decisions about the correctness of answers. @@ -1048,7 +1048,7 @@ MokanaranganThayaparan MarcoValentino JuliaRozanova - AndreFreitas + AndreFreitas 938-948 The application of Natural Language Inference (NLI) methods over large textual corpora can facilitate scientific discovery, reducing the gap between current research and the available large-scale scientific knowledge. However, contemporary NLI models are still limited in interpreting mathematical knowledge written in Natural Language, even though mathematics is an integral part of scientific argumentation for many disciplines. One of the fundamental requirements towards mathematical language understanding, is the creation of models able to meaningfully represent variables. This problem is particularly challenging since the meaning of a variable should be assigned exclusively from its defining type, i.e., the representation of a variable should come from its context. Recent research has formalised the variable typing task, a benchmark for the understanding of abstract mathematical types and variables in a sentence. In this work, we propose VarSlot, a Variable Slot-based approach, which not only delivers state-of-the-art results in the task of variable typing, but is also able to create context-based representations for variables. 2022.findings-acl.76 @@ -1071,9 +1071,9 @@ <fixed-case>BPE</fixed-case> vs. Morphological Segmentation: A Case Study on Machine Translation of Four Polysynthetic Languages ManuelMager - ArturoOncevay - ElisabethMager - KatharinaKann + ArturoOncevay + ElisabethMager + KatharinaKann ThangVu 961-971 Morphologically-rich polysynthetic languages present a challenge for NLP systems due to data sparsity, and a common strategy to handle this issue is to apply subword segmentation. We investigate a wide variety of supervised and unsupervised morphological segmentation methods for four polysynthetic languages: Nahuatl, Raramuri, Shipibo-Konibo, and Wixarika. Then, we compare the morphologically inspired segmentation methods against Byte-Pair Encodings (BPEs) as inputs for machine translation (MT) when translating to and from Spanish. We show that for all language pairs except for Nahuatl, an unsupervised morphological segmentation algorithm outperforms BPEs consistently and that, although supervised methods achieve better segmentation scores, they under-perform in MT challenges. Finally, we contribute two new morphological segmentation datasets for Raramuri and Shipibo-Konibo, and a parallel corpus for Raramuri–Spanish. @@ -1097,12 +1097,12 @@ Morphological Processing of Low-Resource Languages: Where We Are and What’s Next AdamWiemerslage - MiikkaSilfverberg + MiikkaSilfverberg ChangbingYang - AryaMcCarthy + AryaMcCarthy GarrettNicolai ElianaColunga - KatharinaKann + KatharinaKann 988-1007 Automatic morphological processing can aid downstream natural language processing applications, especially for low-resource languages, and assist language documentation efforts for endangered languages. Having long been multilingual, the field of computational morphology is increasingly moving towards approaches suitable for languages with minimal or no annotated resources. First, we survey recent developments in computational morphology with a focus on low-resource languages. Second, we argue that the field is ready to tackle the logical next challenge: understanding a language’s morphology from raw text alone. We perform an empirical study on a truly unsupervised version of the paradigm completion task and show that, while existing state-of-the-art models bridged by two newly proposed models we devise perform reasonably, there is still much room for improvement. The stakes are high: solving this task will increase the language coverage of morphological resources by a number of magnitudes. 2022.findings-acl.80 @@ -1115,7 +1115,7 @@ NaoyaInoue CharutaPethe AllenKim - StevenSkiena + StevenSkiena 1008-1019 We address the problem of learning fixed-length vector representations of characters in novels. Recent advances in word embeddings have proven successful in learning entity representations from short texts, but fall short on longer documents because they do not capture full book-level information. To overcome the weakness of such text-based embeddings, we propose two novel methods for representing characters: (i) graph neural network-based embeddings from a full corpus-based character network; and (ii) low-dimensional embeddings constructed from the occurrence pattern of characters in each novel. We test the quality of these character embeddings using a new benchmark suite to evaluate character representations, encompassing 12 different tasks. We show that our representation techniques combined with text-based embeddings lead to the best character representations, outperforming text-based embeddings in four tasks. Our dataset and evaluation script will be made publicly available to stimulate additional work in this area. 2022.findings-acl.81 @@ -1140,7 +1140,7 @@ SebastianPeralta JoãoSedoc GarrickSherman - LyleUngar + LyleUngar 1035-1047 Being able to reliably estimate self-disclosure – a key component of friendship and intimacy – from language is important for many psychology studies. We build single-task models on five self-disclosure corpora, but find that these models generalize poorly; the within-domain accuracy of predicted message-level self-disclosure of the best-performing model (mean Pearson’s r=0.69) is much higher than the respective across data set accuracy (mean Pearson’s r=0.32), due to both variations in the corpora (e.g., medical vs. general topics) and labeling instructions (target variables: self-disclosure, emotional disclosure, intimacy). However, some lexical features, such as expression of negative emotions and use of first person personal pronouns such as ‘I’ reliably predict self-disclosure across corpora. We develop a multi-task model that yields better results, with an average Pearson’s r of 0.37 for out-of-corpora prediction. 2022.findings-acl.83 @@ -1191,7 +1191,7 @@ LeahNann HarisJabbar SahanaUdupa - HinrichSchuetze + HinrichSchuetze 1089-1104 Building on current work on multilingual hate speech (e.g., Ousidhoum et al. (2019)) and hate speech reduction (e.g., Sap et al. (2020)), we present XTREMESPEECH, a new hate speech dataset containing 20,297 social media passages from Brazil, Germany, India and Kenya. The key novelty is that we directly involve the affected communities in collecting and annotating the data – as opposed to giving companies and governments control over defining and combatting hate speech. This inclusive approach results in datasets more representative of actually occurring online speech and is likely to facilitate the removal of the social media content that marginalized communities view as causing the most harm. Based on XTREMESPEECH, we establish novel tasks with accompanying baselines, provide evidence that cross-country training is generally not feasible due to cultural differences between countries and perform an interpretability analysis of BERT’s predictions. 2022.findings-acl.87 @@ -1255,7 +1255,7 @@ OanaIgnat JeanMaillard VishravChaudhary - FranciscoGuzmán + FranciscoGuzmán 1164-1174 We aim to investigate the performance of current OCR systems on low resource languages and low resource scripts. We introduce and make publicly available a novel benchmark, OCR4MT, consisting of real and synthetic data, enriched with noise, for 60 low-resource languages in low resource scripts. We evaluate state-of-the-art OCR systems on our benchmark and analyse most common errors. We show that OCR monolingual data is a valuable resource that can increase performance of Machine Translation models, when used in backtranslation. We then perform an ablation study to investigate how OCR errors impact Machine Translation performance and determine what is the minimum level of OCR quality needed for the monolingual data to be useful for Machine Translation. 2022.findings-acl.92 @@ -1309,7 +1309,7 @@ SabineWeber Mohammad JavadHosseini LianeGuillou - MarkSteedman + MarkSteedman 1214-1233 Predicate entailment detection is a crucial task for question-answering from text, where previous work has explored unsupervised learning of entailment graphs from typed open relation triples. In this paper, we present the first pipeline for building Chinese entailment graphs, which involves a novel high-recall open relation extraction (ORE) method and the first Chinese fine-grained entity typing dataset under the FIGER type ontology. Through experiments on the Levy-Holt dataset, we verify the strength of our Chinese entailment graph, and reveal the cross-lingual complementarity: on the parallel Levy-Holt dataset, an ensemble of Chinese and English entailment graphs outperforms both monolingual graphs, and raises unsupervised SOTA by 4.7 AUC points. 2022.findings-acl.96 @@ -1373,7 +1373,7 @@ <fixed-case>C</fixed-case>o-training an <fixed-case>U</fixed-case>nsupervised <fixed-case>C</fixed-case>onstituency <fixed-case>P</fixed-case>arser with <fixed-case>W</fixed-case>eak <fixed-case>S</fixed-case>upervision NickilMaveli - ShayCohen + ShayCohen 1274-1291 We introduce a method for unsupervised parsing that relies on bootstrapping classifiers to identify if a node dominates a specific span in a sentence. There are two types of classifiers, an inside classifier that acts on a span, and an outside classifier that acts on everything outside of a given span. Through self-training and co-training with the two classifiers, we show that the interplay between them helps improve the accuracy of both, and as a result, effectively parse. A seed bootstrapping technique prepares the data to train these classifiers. Our analyses further validate that such an approach in conjunction with weak supervision using prior branching knowledge of a known language (left/right-branching) and minimal heuristics injects strong inductive bias into the parser, achieving 63.1 F_1 on the English (PTB) test set. In addition, we show the effectiveness of our architecture by evaluating on treebanks for Chinese (CTB) and Japanese (KTB) and achieve new state-of-the-art results. 2022.findings-acl.101 @@ -1471,7 +1471,7 @@ Lütfi KeremSenel MasoudJalili Sabet FrançoisYvon - HinrichSchuetze + HinrichSchuetze 1384-1396 After a period of decrease, interest in word alignments is increasing again for their usefulness in domains such as typological research, cross-lingual annotation projection and machine translation. Generally, alignment algorithms only use bitext and do not make use of the fact that many parallel corpora are multiparallel. Here, we compute high-quality word alignments between multiple language pairs by considering all language pairs together. First, we create a multiparallel word alignment graph, joining all bilingual word alignment pairs in one graph. Next, we use graph neural networks (GNNs) to exploit the graph structure. Our GNN approach (i) utilizes information about the meaning, position and language of the input words, (ii) incorporates information from multiple parallel sentences, (iii) adds and removes edges from the initial alignments, and (iv) yields a prediction model that can generalize beyond the training sentences. We show that community detection algorithms can provide valuable information for multiparallel word alignment. Our method outperforms previous work on three word alignment datasets and on a downstream task. 2022.findings-acl.108 @@ -1599,7 +1599,7 @@ Sang-WooLee Ji-HoonKim Jung-WooHa - AliceOh + AliceOh 1487-1492 The retriever-reader pipeline has shown promising performance in open-domain QA but suffers from a very slow inference speed. Recently proposed question retrieval models tackle this problem by indexing question-answer pairs and searching for similar questions. These models have shown a significant increase in inference speed, but at the cost of lower QA performance compared to the retriever-reader models. This paper proposes a two-step question retrieval model, SQuID (Sequential Question-Indexed Dense retrieval) and distant supervision for training. SQuID uses two bi-encoders for question retrieval. The first-step retriever selects top-k similar questions, and the second-step retriever finds the most similar question from the top-k questions. We evaluate the performance and the computational efficiency of SQuID. The results show that SQuID significantly increases the performance of existing question retrieval models with a negligible loss on inference speed. 2022.findings-acl.117 @@ -1698,8 +1698,8 @@ TingtingMa HuiqiangJiang QianhuiWu - TiejunZhao - Chin-YewLin + TiejunZhao + Chin-YewLin 1584-1596 Few-shot named entity recognition (NER) systems aim at recognizing novel-class named entities based on only a few labeled examples. In this paper, we present a decomposed meta-learning approach which addresses the problem of few-shot NER by sequentially tackling few-shot span detection and few-shot entity typing using meta-learning. In particular, we take the few-shot span detection as a sequence labeling problem and train the span detector by introducing the model-agnostic meta-learning (MAML) algorithm to find a good model parameter initialization that could fast adapt to new entity classes. For few-shot entity typing, we propose MAML-ProtoNet, i.e., MAML-enhanced prototypical networks to find a good embedding space that can better distinguish text span representations from different entity classes. Extensive experiments on various benchmarks show that our approach achieves superior performance over prior methods. 2022.findings-acl.124 @@ -1757,8 +1757,8 @@ Transfer Learning and Prediction Consistency for Detecting Offensive Spans of Text AmirPouran Ben Veyseh NingXu - QuanTran - VarunManjunatha + QuanTran + VarunManjunatha FranckDernoncourt ThienNguyen 1630-1637 @@ -1798,7 +1798,7 @@ TahaAksu ZhengyuanLiu Min-YenKan - NancyChen + NancyChen 1659-1671 Augmentation of task-oriented dialogues has followed standard methods used for plain-text such as back-translation, word-level manipulation, and paraphrasing despite its richly annotated structure. In this work, we introduce an augmentation framework that utilizes belief state annotations to match turns from various dialogues and form new synthetic dialogues in a bottom-up manner. Unlike other augmentation strategies, it operates with as few as five examples. Our augmentation strategy yields significant improvements when both adapting a DST model to a new domain, and when adapting a language model to the DST task, on evaluations with TRADE and TOD-BERT models. Further analysis shows that our model performs better on seen values during training, and it is also more robust to unseen values. We conclude that exploiting belief state annotations enhances dialogue augmentation and results in improved models in n-shot training scenarios. 2022.findings-acl.131 @@ -1840,7 +1840,7 @@ LinyangLi Cho-JuiHsieh Kai-WeiChang - XuanjingHuang + XuanjingHuang 1694-1707 Most of the existing defense methods improve the adversarial robustness by making the models adapt to the training set augmented with some adversarial examples. However, the augmented adversarial examples may not be natural, which might distort the training distribution, resulting in inferior performance both in clean accuracy and adversarial robustness. In this study, we explore the feasibility of introducing a reweighting mechanism to calibrate the training distribution to obtain robust models. We propose to train text classifiers by a sample reweighting method in which the example weights are learned to minimize the loss of a validation set mixed with the clean examples and their adversarial ones in an online learning manner. Through extensive experiments, we show that there exists a reweighting mechanism to make the models more robust against adversarial attacks without the need to craft the adversarial examples for the entire training set. 2022.findings-acl.134 @@ -1866,8 +1866,8 @@ XiaoguangLi LifengShang ZhenhuaDong - ChengjieSun - BingquanLiu + ChengjieSun + BingquanLiu ZhenzhouJi XinJiang QunLiu @@ -1955,7 +1955,7 @@ Modality-specific Learning Rates for Effective Multimodal Additive Late-fusion YiqunYao - RadaMihalcea + RadaMihalcea 1824-1834 In multimodal machine learning, additive late-fusion is a straightforward approach to combine the feature representations from different modalities, in which the final prediction can be formulated as the sum of unimodal predictions. While it has been found that certain late-fusion models can achieve competitive performance with lower computational costs compared to complex multimodal interactive models, how to effectively search for a good late-fusion model is still an open question. Moreover, for different modalities, the best unimodal models may work under significantly different learning rates due to the nature of the modality and the computational flow of the model; thus, selecting a global learning rate for late-fusion models can result in a vanishing gradient for some modalities. To help address these issues, we propose a Modality-Specific Learning Rate (MSLR) method to effectively build late-fusion multimodal models from fine-tuned unimodal models. We investigate three different strategies to assign learning rates to different modalities. Our experiments show that MSLR outperforms global learning rates on multiple tasks and settings, and enables the models to effectively learn each modality. 2022.findings-acl.143 @@ -1981,7 +1981,7 @@ HimaniShrotriya AnoopKunchukuttan RatishPuduppully - MiteshKhapra + MiteshKhapra PratyushKumar 1849-1863 In this paper, we study pre-trained sequence-to-sequence models for a group of related languages, with a focus on Indic languages. We present IndicBART, a multilingual, sequence-to-sequence pre-trained model focusing on 11 Indic languages and English. IndicBART utilizes the orthographic similarity between Indic scripts to improve transfer learning between similar Indic languages. We evaluate IndicBART on two NLG tasks: Neural Machine Translation (NMT) and extreme summarization. Our experiments on NMT and extreme summarization show that a model specific to related languages like IndicBART is competitive with large pre-trained models like mBART50 despite being significantly smaller. It also performs well on very low-resource translation scenarios where languages are not included in pre-training or fine-tuning. Script sharing, multilingual training, and better utilization of limited model capacity contribute to the good performance of the compact IndicBART model. @@ -1996,7 +1996,7 @@ GustavoHernandez Abrego NoahConstant JiMa - KeithHall + KeithHall DanielCer YinfeiYang 1864-1874 @@ -2036,7 +2036,7 @@ WenhaoYu ChenguangZhu LianhuiQin - ZhihanZhang + ZhihanZhang TongZhao MengJiang 1896-1906 @@ -2094,7 +2094,7 @@ PouyaPezeshkpour SarthakJain SameerSingh - ByronWallace + ByronWallace 1934-1946 Training the deep neural networks that dominate NLP requires large datasets. These are often collected automatically or via crowdsourcing, and may exhibit systematic biases or annotation artifacts. By the latter we mean spurious correlations between inputs and outputs that do not represent a generally held causal relationship between features and classes; models that exploit such correlations may appear to perform a given task well, but fail on out of sample data. In this paper, we evaluate use of different attribution methods for aiding identification of training data artifacts. We propose new hybrid approaches that combine saliency maps (which highlight important input features) with instance attribution methods (which retrieve training samples influential to a given prediction). We show that this proposed training-feature attribution can be used to efficiently uncover artifacts in training data when a challenging validation set is available. We also carry out a small user study to evaluate whether these methods are useful to NLP researchers in practice, with promising results. We make code for all methods and experiments in this paper available. 2022.findings-acl.153 @@ -2123,7 +2123,7 @@ SrikanthDoss RishitaAnubhai SunilMallya - YaserAl-Onaizan + YaserAl-Onaizan DanRoth 1956-1971 We study the problem of few shot learning for named entity recognition. Specifically, we leverage the semantic information in the names of the labels as a way of giving the model additional signal and enriched priors. We propose a neural architecture that consists of two BERT encoders, one to encode the document and its tokens and another one to encode each of the labels in natural language format. Our model learns to match the representations of named entities computed by the first encoder with label representations computed by the second encoder. The label semantics signal is shown to support improved state-of-the-art results in multiple few shot NER benchmarks and on-par performance in standard benchmarks. Our model is especially effective in low resource settings. @@ -2153,9 +2153,9 @@ KarthikGopalakrishnan YangLiu RobinsonPiramuthu - GokhanTur + GokhanTur DeviParikh - DilekHakkani-Tur + DilekHakkani-Tur 1984-1994 Interactive robots navigating photo-realistic environments need to be trained to effectively leverage and handle the dynamic nature of dialogue in addition to the challenges underlying vision-and-language navigation (VLN). In this paper, we present VISITRON, a multi-modal Transformer-based navigator better suited to the interactive regime inherent to Cooperative Vision-and-Dialog Navigation (CVDN). VISITRON is trained to: i) identify and associate object-level concepts and semantics between the environment and dialogue history, ii) identify when to interact vs. navigate via imitation learning of a binary classification head. We perform extensive pre-training and fine-tuning ablations with VISITRON to gain empirical insights and improve performance on CVDN. VISITRON’s ability to identify when to interact leads to a natural generalization of the game-play mode introduced by Roman et al. (2020) for enabling the use of such models in different environments. VISITRON is competitive with models on the static CVDN leaderboard and attains state-of-the-art performance on the Success weighted by Path Length (SPL) metric. 2022.findings-acl.157 @@ -2244,7 +2244,7 @@ Richer Countries and Richer Representations KaitlynZhou KawinEthayarajh - DanJurafsky + DanJurafsky 2074-2085 We examine whether some countries are more richly represented in embedding space than others. We find that countries whose names occur with low frequency in training corpora are more likely to be tokenized into subwords, are less semantically distinct in embedding space, and are less likely to be correctly predicted: e.g., Ghana (the correct answer and in-vocabulary) is not predicted for, “The country producing the most cocoa is [MASK].”. Although these performance discrepancies and representational harms are due to frequency, we find that frequency is highly correlated with a country’s GDP; thus perpetuating historic power and wealth inequalities. We analyze the effectiveness of mitigation strategies; recommend that researchers report training word frequencies; and recommend future work for the community to define and design representational guarantees. 2022.findings-acl.164 @@ -2260,7 +2260,7 @@ JasonPhang JanaThompson Phu MonHtut - SamuelBowman + SamuelBowman 2086-2105 It is well documented that NLP models learn social biases, but little work has been done on how these biases manifest in model outputs for applied tasks like question answering (QA). We introduce the Bias Benchmark for QA (BBQ), a dataset of question-sets constructed by the authors that highlight attested social biases against people belonging to protected classes along nine social dimensions relevant for U.S. English-speaking contexts. Our task evaluate model responses at two levels: (i) given an under-informative context, we test how strongly responses reflect social biases, and (ii) given an adequately informative context, we test whether the model’s biases override a correct answer choice. We find that models often rely on stereotypes when the context is under-informative, meaning the model’s outputs consistently reproduce harmful biases in this setting. Though models are more accurate when the context provides an informative answer, they still rely on stereotypes and average up to 3.4 percentage points higher accuracy when the correct answer aligns with a social bias than when it conflicts, with this difference widening to over 5 points on examples targeting gender for most models tested. 2022.findings-acl.165 @@ -2272,9 +2272,9 @@ Zero-shot Learning for Grapheme to Phoneme Conversion with Language Ensemble XinjianLi FlorianMetze - DavidMortensen + DavidMortensen ShinjiWatanabe - AlanBlack + AlanBlack 2106-2115 Grapheme-to-Phoneme (G2P) has many applications in NLP and speech fields. Most existing work focuses heavily on languages with abundant training datasets, which limits the scope of target languages to less than 100 languages. This work attempts to apply zero-shot learning to approximate G2P models for all low-resource and endangered languages in Glottolog (about 8k languages). For any unseen target language, we first build the phylogenetic tree (i.e. language family tree) to identify top-k nearest languages for which we have training sets. Then we run models of those languages to obtain a hypothesis set, which we combine into a confusion network to propose a most likely hypothesis as an approximation to the target language. We test our approach on over 600 unseen languages and demonstrate it significantly outperforms baselines. 2022.findings-acl.166 @@ -2290,7 +2290,7 @@ ChangbingYang EdithCoates GarrettNicolai - MiikkaSilfverberg + MiikkaSilfverberg 2116-2130 Recent progress in NLP is driven by pretrained models leveraging massive datasets and has predominantly benefited the world’s political and economic superpowers. Technologically underserved languages are left behind because they lack such resources. Hundreds of underserved languages, nevertheless, have available data sources in the form of interlinear glossed text (IGT) from language documentation efforts. IGT remains underutilized in NLP work, perhaps because its annotations are only semi-structured and often language-specific. With this paper, we make the case that IGT data can be leveraged successfully provided that target language expertise is available. We specifically advocate for collaboration with documentary linguists. Our paper provides a roadmap for successful projects utilizing IGT data: (1) It is essential to define which NLP tasks can be accomplished with the given IGT data and how these will benefit the speech community. (2) Great care and target language expertise is required when converting the data into structured formats commonly employed in NLP. (3) Task-specific and user-specific evaluation can help to ascertain that the tools which are created benefit the target language speech community. We illustrate each step through a case study on developing a morphological reinflection system for the Tsimchianic language Gitksan. 2022.findings-acl.167 @@ -2364,7 +2364,7 @@ The impact of lexical and grammatical processing on generating code from natural language NathanaëlBeau - BenoitCrabbé + BenoitCrabbé 2204-2214 Considering the seq2seq architecture of Yin and Neubig (2018) for natural language to code translation, we identify four key components of importance: grammatical constraints, lexical preprocessing, input representations, and copy mechanisms. To study the impact of these components, we use a state-of-the-art architecture that relies on BERT encoder and a grammar-based decoder for which a formalization is provided. The paper highlights the importance of the lexical substitution component in the current natural language to code systems. 2022.findings-acl.173 @@ -2417,9 +2417,9 @@ <fixed-case>C</fixed-case>hart<fixed-case>QA</fixed-case>: A Benchmark for Question Answering about Charts with Visual and Logical Reasoning AhmedMasry - Do XuanLong + Do XuanLong Jia QingTan - ShafiqJoty + ShafiqJoty EnamulHoque 2263-2279 Charts are very popular for analyzing data. When exploring charts, people often ask a variety of complex reasoning questions that involve several logical and arithmetic operations. They also commonly refer to visual features of a chart in their questions. However, most existing datasets do not focus on such complex reasoning questions as their questions are template-based and answers come from a fixed-vocabulary. In this work, we present a large-scale benchmark covering 9.6K human-written questions as well as 23.1K questions generated from human-written chart summaries. To address the unique challenges in our benchmark involving visual and logical reasoning over charts, we present two transformer-based models that combine visual features and the data table of the chart in a unified way to answer questions. While our models achieve the state-of-the-art results on the previous datasets as well as on our benchmark, the evaluation also reveals several challenges in answering complex reasoning questions. @@ -2457,7 +2457,7 @@ Phoneme transcription of endangered languages: an evaluation of recent <fixed-case>ASR</fixed-case> architectures in the single speaker scenario - GillesBoulianne + GillesBoulianne 2301-2308 Transcription is often reported as the bottleneck in endangered language documentation, requiring large efforts from scarce speakers and transcribers. In general, automatic speech recognition (ASR) can be accurate enough to accelerate transcription only if trained on large amounts of transcribed data. However, when a single speaker is involved, several studies have reported encouraging results for phonetic transcription even with small amounts of training. Here we expand this body of work on speaker-dependent transcription by comparing four ASR approaches, notably recent transformer and pretrained multilingual models, on a common dataset of 11 languages. To automate data preparation, training and evaluation steps, we also developed a phoneme recognition setup which handles morphologically complex languages and writing systems for which no pronunciation dictionary exists. We find that fine-tuning a multilingual pretrained model yields an average phoneme error rate (PER) of 15% for 6 languages with 99 minutes or less of transcribed data for training. For the 5 languages with between 100 and 192 minutes of training, we achieved a PER of 8.4% or less. These results on a number of varied languages suggest that ASR can now significantly reduce transcription efforts in the speaker-dependent situation common in endangered language work. 2022.findings-acl.180 @@ -2480,7 +2480,7 @@ Combining Static and Contextualised Multilingual Embeddings KatharinaHämmerl JindřichLibovický - AlexanderFraser + AlexanderFraser 2316-2329 Static and contextual multilingual embeddings have complementary strengths. Static embeddings, while less expressive than contextual language models, can be more straightforwardly aligned across multiple languages. We combine the strengths of static and contextual models to improve multilingual representations. We extract static embeddings for 40 languages from XLM-R, validate those embeddings with cross-lingual word retrieval, and then align them using VecMap. This results in high-quality, highly multilingual static embeddings. Then we apply a novel continued pre-training approach to XLM-R, leveraging the high quality alignment of our static embeddings to better align the representation space of XLM-R. We show positive results for multiple complex semantic tasks. We release the static embeddings and the continued pre-training code. Unlike most previous work, our continued pre-training approach does not require parallel text. 2022.findings-acl.182 @@ -2504,7 +2504,7 @@ Square One Bias in <fixed-case>NLP</fixed-case>: Towards a Multi-Dimensional Exploration of the Research Manifold SebastianRuder IvanVulić - AndersSøgaard + AndersSøgaard 2340-2354 The prototypical NLP experiment trains a standard architecture on labeled English data and optimizes for accuracy, without accounting for other dimensions such as fairness, interpretability, or computational efficiency. We show through a manual classification of recent NLP research papers that this is indeed the case and refer to it as the square one experimental setup. We observe that NLP research often goes beyond the square one setup, e.g, focusing not only on accuracy, but also on fairness or interpretability, but typically only along a single dimension. Most work targeting multilinguality, for example, considers only accuracy; most work on fairness or interpretability considers only English; and so on. Such one-dimensionality of most research means we are only exploring a fraction of the NLP research search space. We provide historical and recent examples of how the square one bias has led researchers to draw false conclusions or make unwise choices, point to promising yet unexplored directions on the research manifold, and make practical recommendations to enable more multi-dimensional research. We open-source the results of our annotations to enable further analysis. 2022.findings-acl.184 @@ -2516,7 +2516,7 @@ EdoardoManino JuliaRozanova DaniloCarvalho - AndreFreitas + AndreFreitas LucasCordeiro 2355-2366 Metamorphic testing has recently been used to check the safety of neural NLP models. Its main advantage is that it does not rely on a ground truth to generate test cases. However, existing studies are mostly concerned with robustness-like metamorphic relations, limiting the scope of linguistic properties they can test. We propose three new classes of metamorphic relations, which address the properties of systematicity, compositionality and transitivity. Unlike robustness, our relations are defined over multiple source inputs, thus increasing the number of test cases that we can produce by a polynomial factor. With them, we test the internal consistency of state-of-the-art NLP models, and show that they do not always behave according to their expected linguistic properties. Lastly, we introduce a novel graphical notation that efficiently summarises the inner structure of metamorphic relations. @@ -2529,12 +2529,12 @@ Improving Neural Political Statement Classification with Class Hierarchical Information ErenayDayanik - AndreBlessing + AndreBlessing NicoBlokker SebastianHaunss JonasKuhn GabriellaLapesa - SebastianPado + SebastianPado 2367-2382 Many tasks in text-based computational social science (CSS) involve the classification of political statements into categories based on a domain-specific codebook. In order to be useful for CSS analysis, these categories must be fine-grained. The typically skewed distribution of fine-grained categories, however, results in a challenging classification problem on the NLP side. This paper proposes to make use of the hierarchical relations among categories typically present in such codebooks:e.g., markets and taxation are both subcategories of economy, while borders is a subcategory of security. We use these ontological relations as prior knowledge to establish additional constraints on the learned model, thusimproving performance overall and in particular for infrequent categories. We evaluate several lightweight variants of this intuition by extending state-of-the-art transformer-based textclassifiers on two datasets and multiple languages. We find the most consistent improvement for an approach based on regularization. 2022.findings-acl.186 @@ -2579,7 +2579,7 @@ YilongHe YuanNi GuotongXie - XuanjingHuang + XuanjingHuang XipengQiu 2409-2421 Early exiting allows instances to exit at different layers according to the estimation of difficulty. Previous works usually adopt heuristic metrics such as the entropy of internal outputs to measure instance difficulty, which suffers from generalization and threshold-tuning. In contrast, learning to exit, or learning to predict instance difficulty is a more appealing way. Though some effort has been devoted to employing such “learn-to-exit” modules, it is still unknown whether and how well the instance difficulty can be learned. As a response, we first conduct experiments on the learnability of instance difficulty, which demonstrates that modern neural models perform poorly on predicting instance difficulty. Based on this observation, we propose a simple-yet-effective Hash-based Early Exiting approach HashEE) that replaces the learn-to-exit modules with hash functions to assign each token to a fixed exiting layer. Different from previous methods, HashEE requires no internal classifiers nor extra parameters, and therefore is more efficient. HashEE can be used in various tasks (including language understanding and generation) and model architectures such as seq2seq models. Experimental results on classification, regression, and generation tasks demonstrate that HashEE can achieve higher performance with fewer FLOPs and inference time compared with previous state-of-the-art early exiting methods. @@ -2589,7 +2589,7 @@ Auxiliary tasks to boost Biaffine Semantic Dependency Parsing - MarieCandito + MarieCandito 2422-2429 The biaffine parser of (CITATION) was successfully extended to semantic dependency parsing (SDP) (CITATION). Its performance on graphs is surprisingly high given that, without the constraint of producing a tree, all arcs for a given sentence are predicted independently from each other (modulo a shared representation of tokens).To circumvent such an independence of decision, while retaining the O(n^2) complexity and highly parallelizable architecture, we propose to use simple auxiliary tasks that introduce some form of interdependence between arcs. Experiments on the three English acyclic datasets of SemEval-2015 task 18 (CITATION), and on French deep syntactic cyclic graphs (CITATION) show modest but systematic performance gains on a near-state-of-the-art baseline using transformer-based contextualized representations. This provides a simple and robust method to boost SDP performance. 2022.findings-acl.190 @@ -2612,7 +2612,7 @@ Improved Multi-label Classification under Temporal Concept Drift: Rethinking Group-Robust Algorithms in a Label-Wise Setting IliasChalkidis - AndersSøgaard + AndersSøgaard 2441-2454 In document classification for, e.g., legal and biomedical text, we often deal with hundreds of classes, including very infrequent ones, as well as temporal concept drift caused by the influence of real world events, e.g., policy changes, conflicts, or pandemics. Class imbalance and drift can sometimes be mitigated by resampling the training data to simulate (or compensate for) a known target distribution, but what if the target distribution is determined by unknown future events? Instead of simply resampling uniformly to hedge our bets, we focus on the underlying optimization algorithms used to train such document classifiers and evaluate several group-robust optimization algorithms, initially proposed to mitigate group-level disparities. Reframing group-robust algorithms as adaptation algorithms under concept drift, we find that Invariant Risk Minimization and Spectral Decoupling outperform sampling-based approaches to class imbalance and concept drift, and lead to much better performance on minority classes. The effect is more pronounced the larger the label set. 2022.findings-acl.192 @@ -2640,7 +2640,7 @@ Why don’t people use character-level machine translation? JindřichLibovický HelmutSchmid - AlexanderFraser + AlexanderFraser 2470-2485 We present a literature and empirical survey that critically assesses the state of the art in character-level modeling for machine translation (MT). Despite evidence in the literature that character-level systems are comparable with subword systems, they are virtually never used in competitive setups in WMT competitions. We empirically show that even with recent modeling innovations in character-level natural language processing, character-level MT systems still struggle to match their subword-based counterparts. Character-level MT systems show neither better domain robustness, nor better morphological generalization, despite being often so motivated. However, we are able to show robustness towards source side noise and that translation quality does not degrade with increasing beam size at decoding time. 2022.findings-acl.194 @@ -2711,7 +2711,7 @@ Single Model Ensemble for Subword Regularized Models in Low-Resource Machine Translation ShoTakase TatsuyaHiraoka - NaoakiOkazaki + NaoakiOkazaki 2536-2541 Subword regularizations use multiple subword segmentations during training to improve the robustness of neural machine translation models. In previous subword regularizations, we use multiple segmentations in the training process but use only one segmentation in the inference. In this study, we propose an inference strategy to address this discrepancy. The proposed strategy approximates the marginalized likelihood by using multiple segmentations including the most plausible segmentation and several sampled segmentations. Because the proposed strategy aggregates predictions from several segmentations, we can regard it as a single model ensemble that does not require any additional cost for training. Experimental results show that the proposed strategy improves the performance of models trained with subword regularization in low-resource machine translation tasks. 2022.findings-acl.199 @@ -2723,7 +2723,7 @@ ChristianHerold JanRosendahl JorisVanvinckenroye - HermannNey + HermannNey 2542-2551 The filtering and/or selection of training data is one of the core aspects to be considered when building a strong machine translation system. In their influential work, Khayrallah and Koehn (2018) investigated the impact of different types of noise on the performance of machine translation systems. In the same year the WMT introduced a shared task on parallel corpus filtering, which went on to be repeated in the following years, and resulted in many different filtering approaches being proposed. In this work we aim to combine the recent achievements in data filtering with the original analysis of Khayrallah and Koehn (2018) and investigate whether state-of-the-art filtering systems are capable of removing all the suggested noise types. We observe that most of these types of noise can be detected with an accuracy of over 90% by modern filtering systems when operating in a well studied high resource setting. However, we also find that when confronted with more refined noise categories or when working with a less common language pair, the performance of the filtering systems is far from optimal, showing that there is still room for improvement in this area of research. 2022.findings-acl.200 @@ -2774,7 +2774,7 @@ HongyuanLu WaiLam HongCheng - HelenMeng + HelenMeng 2591-2601 Dialogue agents can leverage external textual knowledge to generate responses of a higher quality. To our best knowledge, most existing works on knowledge grounded dialogue settings assume that the user intention is always answerable. Unfortunately, this is impractical as there is no guarantee that the knowledge retrievers could always retrieve the desired knowledge. Therefore, this is crucial to incorporate fallback responses to respond to unanswerable contexts appropriately while responding to the answerable contexts in an informative manner. We propose a novel framework that automatically generates a control token with the generator to bias the succeeding response towards informativeness for answerable contexts and fallback for unanswerable contexts in an end-to-end manner. Since no existing knowledge grounded dialogue dataset considers this aim, we augment the existing dataset with unanswerable contexts to conduct our experiments. Automatic and human evaluation results indicate that naively incorporating fallback responses with controlled text generation still hurts informativeness for answerable context. In contrast, our proposed framework effectively mitigates this problem while still appropriately presenting fallback responses to unanswerable contexts. Such a framework also reduces the extra burden of the additional classifier and the overheads introduced in the previous works, which operates in a pipeline manner. 2022.findings-acl.204 @@ -2821,7 +2821,7 @@ XinchaoXu ZhibinGou WenquanWu - Zheng-YuNiu + Zheng-YuNiu HuaWu HaifengWang ShihangWang @@ -2838,7 +2838,7 @@ MicheleMastromattei FrancescaFallucchi NoemiScarpato - Fabio MassimoZanzotto + Fabio MassimoZanzotto 2651-2662 Word embeddings are powerful dictionaries, which may easily capture language variations. However, these dictionaries fail to give sense to rare words, which are surprisingly often covered by traditional dictionaries. In this paper, we propose to use definitions retrieved in traditional dictionaries to produce word embeddings for rare words. For this purpose, we introduce two methods: Definition Neural Network (DefiNNet) and Define BERT (DefBERT). In our experiments, DefiNNet and DefBERT significantly outperform state-of-the-art as well as baseline methods devised for producing embeddings of unknown words. In fact, DefiNNet significantly outperforms FastText, which implements a method for the same task-based on n-grams, and DefBERT significantly outperforms the BERT method for OOV words. Then, definitions in traditional dictionaries are useful to build word embeddings for rare words. 2022.findings-acl.208 @@ -2926,8 +2926,8 @@ Graph Refinement for Coreference Resolution - LeslyMiculicich - JamesHenderson + LeslyMiculicich + JamesHenderson 2732-2742 The state-of-the-art models for coreference resolution are based on independent mention pair-wise decisions. We propose a modelling approach that learns coreference at the document-level and takes global decisions. For this purpose, we model coreference links in a graph structure where the nodes are tokens in the text, and the edges represent the relationship between them. Our model predicts the graph in a non-autoregressive manner, then iteratively refines it based on previous predictions, allowing global dependencies between decisions. The experimental results show improvements over various baselines, reinforcing the hypothesis that document-level information improves conference resolution. 2022.findings-acl.215 @@ -3179,7 +3179,7 @@ Modular and Parameter-Efficient Multimodal Fusion with Prompting ShengLiang MengjieZhao - HinrichSchuetze + HinrichSchuetze 2976-2985 Recent research has made impressive progress in large-scale multimodal pre-training. In the context of the rapid growth of model size, it is necessary to seek efficient and flexible methods other than finetuning. In this paper, we propose to use prompt vectors to align the modalities. Our method achieves comparable performance to several other multimodal fusion methods in low-resource settings. We further show that our method is modular and parameter-efficient for processing tasks involving two or more data modalities. 2022.findings-acl.234 @@ -3191,7 +3191,7 @@ Synchronous Refinement for Neural Machine Translation KehaiChen MasaoUtiyama - EiichiroSumita + EiichiroSumita RuiWang MinZhang 2986-2996 @@ -3254,7 +3254,7 @@ Factual Consistency of Multilingual Pretrained Language Models ConstanzaFierro - AndersSøgaard + AndersSøgaard 3046-3052 Pretrained language models can be queried for factual knowledge, with potential applications in knowledge base acquisition and tasks that require inference. However, for that, we need to know how reliable this knowledge is, and recent work has shown that monolingual English language models lack consistency when predicting factual knowledge, that is, they fill-in-the-blank differently for paraphrases describing the same fact. In this paper, we extend the analysis of consistency to a multilingual setting. We introduce a resource, mParaRel, and investigate (i) whether multilingual language models such as mBERT and XLM-R are more consistent than their monolingual counterparts;and (ii) if such models are equally consistent across languages. We find that mBERT is as inconsistent as English BERT in English paraphrases, but that both mBERT and XLM-R exhibit a high degree of inconsistency in English and even more so for all the other 45 languages. 2022.findings-acl.240 @@ -3375,7 +3375,7 @@ The Inefficiency of Language Models in Scholarly Retrieval: An Experimental Walk-through ShrutiSingh - MayankSingh + MayankSingh 3153-3173 Language models are increasingly becoming popular in AI-powered scientific IR systems. This paper evaluates popular scientific language models in handling (i) short-query texts and (ii) textual neighbors. Our experiments showcase the inability to retrieve relevant documents for a short-query text even under the most relaxed conditions. Additionally, we leverage textual neighbors, generated by small perturbations to the original text, to demonstrate that not all perturbations lead to close neighbors in the embedding space. Further, an exhaustive categorization yields several classes of orthographically and semantically related, partially related and completely unrelated neighbors. Retrieval performance turns out to be more influenced by the surface form rather than the semantics of the text. 2022.findings-acl.249 @@ -3384,7 +3384,7 @@ Fusing Heterogeneous Factors with Triaffine Mechanism for Nested Named Entity Recognition - ZhengYuan + ZhengYuan ChuanqiTan SongfangHuang FeiHuang @@ -3493,11 +3493,11 @@ One Agent To Rule Them All: Towards Multi-agent Conversational <fixed-case>AI</fixed-case> ChristopherClarke - JosephPeper + JosephPeper KarthikKrishnamurthy WalterTalamonti KevinLeach - WalterLasecki + WalterLasecki YipingKang LingjiaTang JasonMars @@ -3514,7 +3514,7 @@ ShoTakase KeiUchiumi AtsushiKeyaki - NaoakiOkazaki + NaoakiOkazaki 3268-3275 We present two simple modifications for word-level perturbation: Word Replacement considering Length (WR-L) and Compositional Word Replacement (CWR).In conventional word replacement, a word in an input is replaced with a word sampled from the entire vocabulary, regardless of the length and context of the target word.WR-L considers the length of a target word by sampling words from the Poisson distribution.CWR considers the compositional candidates by restricting the source of sampling to related words that appear in subword regularization. Experimental results showed that the combination of WR-L and CWR improved the performance of text classification and machine translation. 2022.findings-acl.258 @@ -3538,7 +3538,7 @@ Controlling the Focus of Pretrained Language Generation Models JiabaoJi YoonKim - JamesGlass + JamesGlass TianxingHe 3291-3306 The finetuning of pretrained transformer-based language generation models are typically conducted in an end-to-end manner, where the model learns to attend to relevant parts of the input by itself. However, there does not exist a mechanism to directly control the model’s focus. This work aims to develop a control mechanism by which a user can select spans of context as “highlights” for the model to focus on, and generate relevant output. To achieve this goal, we augment a pretrained model with trainable “focus vectors” that are directly applied to the model’s embeddings, while the model itself is kept fixed. These vectors, trained on automatic annotations derived from attribution methods, act as indicators for context importance. We test our approach on two core generation tasks: dialogue response generation and abstractive summarization. We also collect evaluation data where the highlight-generation pairs are annotated by humans. Our experiments show that the trained focus vectors are effective in steering the model to generate outputs that are relevant to user-selected highlights. @@ -3551,7 +3551,7 @@ HayateIso XiaolanWang StefanosAngelidis - YoshihikoSuhara + YoshihikoSuhara 3307-3324 Opinion summarization focuses on generating summaries that reflect popular subjective information expressed in multiple online reviews. While generated summaries offer general and concise information about a particular hotel or product, the information may be insufficient to help the user compare multiple different choices. Thus, the user may still struggle with the question “Which one should I pick?” In this paper, we propose the comparative opinion summarization task, which aims at generating two contrastive summaries and one common summary from two different candidate sets of reviews. We develop a comparative summarization framework CoCoSum, which consists of two base summarization models that jointly generate contrastive and common summaries. Experimental results on a newly created benchmark CoCoTrip show that CoCoSum can produce higher-quality contrastive and common summaries than state-of-the-art opinion summarization models. The dataset and code are available at https://github.com/megagonlabs/cocosum 2022.findings-acl.261 @@ -3588,10 +3588,10 @@ From Stance to Concern: Adaptation of Propositional Analysis to New Tasks and Domains BrodieMather - BonnieDorr + BonnieDorr AdamDalton - Williamde Beaumont - OwenRambow + Williamde Beaumont + OwenRambow SonjaSchmer-Galunder 3354-3367 We present a generalized paradigm for adaptation of propositional analysis (predicate-argument pairs) to new tasks and domains. We leverage an analogy between stances (belief-driven sentiment) and concerns (topical issues with moral dimensions/endorsements) to produce an explanatory representation. A key contribution is the combination of semi-automatic resource building for extraction of domain-dependent concern types (with 2-4 hours of human labor per domain) and an entirely automatic procedure for extraction of domain-independent moral dimensions and endorsement values. Prudent (automatic) selection of terms from propositional structures for lexical expansion (via semantic similarity) produces new moral dimension lexicons at three levels of granularity beyond a strong baseline lexicon. We develop a ground truth (GT) based on expert annotators and compare our concern detection output to GT, to yield 231% improvement in recall over baseline, with only a 10% loss in precision. F1 yields 66% improvement over baseline and 97.8% of human performance. Our lexically based approach yields large savings over approaches that employ costly human labor and model building. We provide to the community a newly expanded moral dimension/value lexicon, annotation guidelines, and GT. @@ -3604,7 +3604,7 @@ ScottNovotney SreeparnaMukherjee ZeeshanAhmed - AndreasStolcke + AndreasStolcke 3368-3379 We propose a framework to modularize the training of neural language models that use diverse forms of context by eliminating the need to jointly train context and within-sentence encoders. Our approach, contextual universal embeddings (CUE), trains LMs on one type of contextual data and adapts to novel context types. The model consists of a pretrained neural sentence LM, a BERT-based contextual encoder, and a masked transfomer decoder that estimates LM probabilities using sentence-internal and contextual evidence. When contextually annotated data is unavailable, our model learns to combine contextual and sentence-internal information using noisy oracle unigram embeddings as a proxy. Real context data can be introduced later and used to adapt a small number of parameters that map contextual data into the decoder’s embedding space. We validate the CUE framework on a NYTimes text corpus with multiple metadata types, for which the LM perplexity can be lowered from 36.6 to 27.4 by conditioning on context. Bootstrapping a contextual LM with only a subset of the metadata during training retains 85% of the achievable gain. Training the model initially with proxy context retains 67% of the perplexity gain after adapting to real context. Furthermore, we can swap one type of pretrained sentence LM for another without retraining the context encoders, by only adapting the decoder model. Overall, we obtain a modular framework that allows incremental, scalable training of context-enhanced LMs. 2022.findings-acl.265 @@ -3735,8 +3735,8 @@ What does it take to bake a cake? The <fixed-case>R</fixed-case>ecipe<fixed-case>R</fixed-case>ef corpus and anaphora resolution in procedural text BiaoyanFang - TimothyBaldwin - KarinVerspoor + TimothyBaldwin + KarinVerspoor 3481-3495 Procedural text contains rich anaphoric phenomena, yet has not received much attention in NLP. To fill this gap, we investigate the textual properties of two types of procedural text, recipes and chemical patents, and generalize an anaphora annotation framework developed for the chemical domain for modeling anaphoric phenomena in recipes. We apply this framework to annotate the RecipeRef corpus with both bridging and coreference relations. Through comparison to chemical patents, we show the complexity of anaphora resolution in recipes. We demonstrate empirically that transfer learning from the chemical domain improves resolution of anaphora in recipes, suggesting transferability of general procedural knowledge. 2022.findings-acl.275 @@ -3802,7 +3802,7 @@ HaoZhou ChengqiZhao ShujianHuang - JiajunChen + JiajunChen LeiLi 3537-3548 This paper does not aim at introducing a novel model for document-level neural machine translation. Instead, we head back to the original Transformer model and hope to answer the following question: Is the capacity of current models strong enough for document-level translation? Interestingly, we observe that the original Transformer with appropriate training techniques can achieve strong results for document translation, even with a length of 2000 words. We evaluate this model and several recent approaches on nine document-level datasets and two sentence-level datasets across six languages. Experiments show that document-level Transformer models outperforms sentence-level ones and many previous methods in a comprehensive set of metrics, including BLEU, four lexical indices, three newly proposed assistant linguistic indicators, and human evaluation. @@ -3879,7 +3879,7 @@ Incorporating Dynamic Semantics into Pre-Trained Language Model for Aspect-based Sentiment Analysis KaiZhang - KunZhang + KunZhang MengdiZhang HongkeZhao QiLiu @@ -3921,7 +3921,7 @@ Modular Domain Adaptation JunshenChen DallasCard - DanJurafsky + DanJurafsky 3633-3655 Off-the-shelf models are widely used by computational social science researchers to measure properties of text, such as sentiment. However, without access to source data it is difficult to account for domain shift, which represents a threat to validity. Here, we treat domain adaptation as a modular process that involves separate model producers and model consumers, and show how they can independently cooperate to facilitate more accurate measurements of text. We introduce two lightweight techniques for this scenario, and demonstrate that they reliably increase out-of-domain accuracy on four multi-domain text classification datasets when used with linear and contextual embedding models. We conclude with recommendations for model producers and consumers, and release models and replication code to accompany this paper. 2022.findings-acl.288 @@ -3957,7 +3957,7 @@ Addressing Resource and Privacy Constraints in Semantic Parsing Through Data Augmentation KevinYang OliviaDeng - CharlesChen + CharlesChen RichardShin SubhroRoy BenjaminVan Durme @@ -3970,9 +3970,9 @@ Improving Candidate Retrieval with Entity Profile Generation for <fixed-case>W</fixed-case>ikidata Entity Linking - TuanLai + TuanLai HengJi - ChengXiangZhai + ChengXiangZhai 3696-3711 Entity linking (EL) is the task of linking entity mentions in a document to referent entities in a knowledge base (KB). Many previous studies focus on Wikipedia-derived KBs. There is little work on EL over Wikidata, even though it is the most extensive crowdsourced KB. The scale of Wikidata can open up many new real-world applications, but its massive number of entities also makes EL challenging. To effectively narrow down the search space, we propose a novel candidate retrieval paradigm based on entity profiling. Wikidata entities and their textual fields are first indexed into a text search engine (e.g., Elasticsearch). During inference, given a mention and its context, we use a sequence-to-sequence (seq2seq) model to generate the profile of the target entity, which consists of its title and description. We use the profile to query the indexed search engine to retrieve candidate entities. Our approach complements the traditional approach of using a Wikipedia anchor-text dictionary, enabling us to further design a highly effective hybrid method for candidate retrieval. Combined with a simple cross-attention reranker, our complete EL framework achieves state-of-the-art results on three Wikidata-based datasets and strong performance on TACKBP-2010. 2022.findings-acl.292 @@ -4016,7 +4016,7 @@ DaraBahri JiMa JaiGupta - CiceroNogueira dos Santos + CiceroNogueira dos Santos YiTay DonaldMetzler 3747-3758 @@ -4062,7 +4062,7 @@ Probing Multilingual Cognate Prediction Models ClémentineFourrier - BenoîtSagot + BenoîtSagot 3786-3801 Character-based neural machine translation models have become the reference models for cognate prediction, a historical linguistics task. So far, all linguistic interpretations about latent information captured by such models have been based on external analysis (accuracy, raw results, errors). In this paper, we investigate what probing can tell us about both models and previous interpretations, and learn that though our models store linguistic and diachronic information, they do not achieve it in previously assumed ways. 2022.findings-acl.299 @@ -4271,7 +4271,7 @@ Probing <fixed-case>BERT</fixed-case>’s priors with serial reproduction chains TakateruYamakoshi - ThomasGriffiths + ThomasGriffiths RobertHawkins 3977-3992 Sampling is a promising bottom-up method for exposing what generative models have learned about language, but it remains unclear how to generate representative samples from popular masked language models (MLMs) like BERT. The MLM objective yields a dependency network with no guarantee of consistent conditional distributions, posing a problem for naive approaches. Drawing from theories of iterated learning in cognitive science, we explore the use of serial reproduction chains to sample from BERT’s priors. In particular, we observe that a unique and consistent estimator of the ground-truth joint distribution is given by a Generative Stochastic Network (GSN) sampler, which randomly selects which token to mask and reconstruct on each step. We show that the lexical and syntactic statistics of sentences from GSN chains closely match the ground-truth corpus distribution and perform better than other methods in a large corpus of naturalness judgments. Our findings establish a firmer theoretical foundation for bottom-up probing and highlight richer deviations from human priors. @@ -4298,7 +4298,7 @@ AshwinSrinivasan AnkitaSharma DamienJose - PaulBennett + PaulBennett 4008-4020 Dense retrieval (DR) methods conduct text retrieval by first encoding texts in the embedding space and then matching them by nearest neighbor search. This requires strong locality properties from the representation space, e.g., close allocations of each small group of relevant texts, which are hard to generalize to domains without sufficient training data. In this paper, we aim to improve the generalization ability of DR models from source training domains with rich supervision signals to target domains without any relevance label, in the zero-shot setting. To achieve that, we propose Momentum adversarial Domain Invariant Representation learning (MoDIR), which introduces a momentum method to train a domain classifier that distinguishes source versus target domains, and then adversarially updates the DR encoder to learn domain invariant representations. Our experiments show that MoDIR robustly outperforms its baselines on 10+ ranking datasets collected in the BEIR benchmark in the zero-shot setup, with more than 10% relative gains on datasets with enough sensitivity for DR models’ evaluation. Source code is available at https://github.com/ji-xin/modir. 2022.findings-acl.316 @@ -4479,9 +4479,9 @@ What is wrong with you?: Leveraging User Sentiment for Automatic Dialog Evaluation SarikGhazarian BehnamHedayatnia - AlexandrosPapangelis + AlexandrosPapangelis YangLiu - DilekHakkani-Tur + DilekHakkani-Tur 4194-4204 Accurate automatic evaluation metrics for open-domain dialogs are in high demand. Existing model-based metrics for system response evaluation are trained on human annotated data, which is cumbersome to collect. In this work, we propose to use information that can be automatically extracted from the next user utterance, such as its sentiment or whether the user explicitly ends the conversation, as a proxy to measure the quality of the previous system response. This allows us to train on a massive set of dialogs with weak supervision, without requiring manual system turn quality annotations. Experiments show that our model is comparable to models trained on human annotated data. Furthermore, our model generalizes across both spoken and written open-domain dialog corpora collected from real and paid users. 2022.findings-acl.331 @@ -4494,8 +4494,8 @@ Findings of the Association for Computational Linguistics: NAACL 2022 MarineCarpuat - Marie-Catherinede Marneffe - Ivan VladimirMeza Ruiz + Marie-Catherinede Marneffe + Ivan VladimirMeza Ruiz Association for Computational Linguistics
Seattle, United States
July @@ -4579,7 +4579,7 @@ <fixed-case>M</fixed-case>ulti<fixed-case>V</fixed-case>er<fixed-case>S</fixed-case>: Improving scientific claim verification with weak supervision and full-document context DavidWadden KyleLo - Lucy LuWang + Lucy LuWang ArmanCohan IzBeltagy HannanehHajishirzi @@ -4668,7 +4668,7 @@
<fixed-case>F</fixed-case>ed<fixed-case>NLP</fixed-case>: Benchmarking Federated Learning Methods for Natural Language Processing Tasks - Bill YuchenLin + Bill YuchenLin ChaoyangHe ZihangZe HulinWang @@ -4701,7 +4701,7 @@ Lacuna Reconstruction: Self-Supervised Pre-Training for Low-Resource Historical Document Transcription NikolaiVogler - JonathanAllen + JonathanAllen MatthewMiller TaylorBerg-Kirkpatrick 206-216 @@ -4755,7 +4755,7 @@ KasturiBhattacharjee RashmiGangadharaiah DanRoth - CarolynRose + CarolynRose 253-268 Previous studies on question answering over knowledge graphs have typically operated over a single knowledge graph (KG). This KG is assumed to be known a priori and is lever- aged similarly for all users’ queries during inference. However, such an assumption is not applicable to real-world settings, such as health- care, where one needs to handle queries of new users over unseen KGs during inference. Furthermore, privacy concerns and high computational costs render it infeasible to query the single KG that has information about all users while answering a specific user’s query. The above concerns motivate our question answer- ing setting over personalized knowledge graphs (PERKGQA) where each user has restricted access to their KG. We observe that current state-of-the-art KGQA methods that require learning prior node representations fare poorly. We propose two complementary approaches, PATHCBR and PATHRGCN for PERKGQA. The former is a simple non-parametric technique that employs case-based reasoning, while the latter is a parametric approach using graph neural networks. Our proposed methods circumvent learning prior representations, can generalize to unseen KGs, and outperform strong baselines on an academic and an internal dataset by 6.5% and 10.5%. 2022.findings-naacl.19 @@ -4809,7 +4809,7 @@ Exploring the Value of Multi-View Learning for Session-Aware Query Representation DiegoOrtiz - JoseMoreno + JoseMoreno GillesHubert KarenPinel-Sauvagnat LyndaTamine @@ -4911,7 +4911,7 @@ AnaMarasovic IzBeltagy DougDowney - MatthewPeters + MatthewPeters 410-424 Self-rationalization models that predict task labels and generate free-text elaborations for their predictions could enable more intuitive interaction with NLP systems. These models are, however, currently trained with a large amount of human-written free-text explanations for each task which hinders their broader usage. We propose to study a more realistic setting of self-rationalization using few training examples. We present FEB—a standardized collection of four existing English-language datasets and associated metrics. We identify the right prompting approach by extensively exploring natural language prompts on FEB. Then, by using this prompt and scaling the model size, we demonstrate that making progress on few-shot self-rationalization is possible. We show there is still ample room for improvement in this task: the average plausibility of generated explanations assessed by human annotators is at most 51% (with GPT-3), while plausibility of human explanations is 76%. We hope that FEB and our proposed approach will spur the community to take on the few-shot self-rationalization challenge. 2022.findings-naacl.31 @@ -4937,7 +4937,7 @@ AakankshaNaik SravanthiParasa SergeyFeldman - Lucy LuWang + Lucy LuWang TomHope 438-453 We present BEEP (Biomedical Evidence-Enhanced Predictions), a novel approach for clinical outcome prediction that retrieves patient-specific medical literature and incorporates it into predictive models. Based on each individual patient’s clinical notes, we train language models (LMs) to find relevant papers and fuse them with information from notes to predict outcomes such as in-hospital mortality. We develop methods to retrieve literature based on noisy, information-dense patient notes, and to augment existing outcome prediction models with retrieved papers in a manner that maximizes predictive accuracy. Our approach boosts predictive performance on three important clinical tasks in comparison to strong recent LM baselines, increasing F1 by up to 5 points and precision@Top-K by a large margin of over 25%. @@ -4950,8 +4950,8 @@ Improving Few-Shot Relation Classification by Prototypical Representation Learning with Definition Text LiZhenzhen YuyangZhang - Jian-YunNie - DongshengLi + Jian-YunNie + DongshengLi 454-464 Few-shot relation classification is difficult because the few instances available may not represent well the relation patterns. Some existing approaches explored extra information such as relation definition, in addition to the instances, to learn a better relation representation. However, the encoding of the extra information has been performed independently from the labeled instances. In this paper, we propose to learn a prototype encoder from relation definition in a way that is useful for relation instance classification. To this end, we use a joint training approach to train both a prototype encoder from definition and an instance encoder. Extensive experiments on several datasets demonstrate the effectiveness and usefulness of our prototype encoder from definition text, enabling us to outperform state-of-the-art approaches. 2022.findings-naacl.34 @@ -4982,10 +4982,10 @@ Multimodal Intent Discovery from Livestream Videos AdyashaMaharana - QuanTran + QuanTran FranckDernoncourt SeunghyunYoon - TrungBui + TrungBui WalterChang MohitBansal 476-489 @@ -5034,7 +5034,7 @@ SeunghyunYoon AjinkyaKale FranckDernoncourt - TrungBui + TrungBui MohitBansal 517-527 Modern image captioning models are usually trained with text similarity objectives. However, since reference captions in public datasets often describe the most salient common objects, models trained with the text similarity objectives tend to ignore specific and detailed aspects of an image that distinguish it from others. Towards more descriptive and distinctive caption generation, we propose to use CLIP, a multimodal encoder trained on huge image-text pairs from the web, to calculate multi-modal similarity and use it as a reward function. We also propose a simple finetuning strategy of CLIP text encoder to improve grammar that does not require extra text annotation. This completely eliminates the need for reference captions during the reward computation. To comprehensively evaluate descriptive captions, we introduce FineCapEval, a new dataset for caption evaluation with fine-grained criteria: overall, background, object, relations. In our experiments on text-to-image retrieval and FineCapEval, the proposed CLIP-guided model generates more distinctive captions than the CIDEroptimized model. We also show that our unsupervised grammar finetuning of the CLIP text encoder alleviates the degeneration problem of the naive CLIP reward. Lastly, we show human analysis where the annotators strongly prefer CLIP reward to CIDEr and MLE objectives on diverse criteria. @@ -5060,8 +5060,8 @@ Modeling Ideological Salience and Framing in Polarized Online Groups with Graph Neural Networks and Structured Sparsity ValentinHofmann XiaowenDong - JanetPierrehumbert - HinrichSchuetze + JanetPierrehumbert + HinrichSchuetze 536-550 The increasing polarization of online political discourse calls for computational tools that automatically detect and monitor ideological divides in social media. We introduce a minimally supervised method that leverages the network structure of online discussion forums, specifically Reddit, to detect polarized concepts. We model polarization along the dimensions of salience and framing, drawing upon insights from moral psychology. Our architecture combines graph neural networks with structured sparsity learning and results in representations for concepts and subreddits that capture temporal ideological dynamics such as right-wing and left-wing radicalization. 2022.findings-naacl.41 @@ -5077,7 +5077,7 @@ MuhammedKocyigit SedaAkbiyik Serife LemanRunyun - DerryWijaya + DerryWijaya 551-564 Large language models trained on a mixture of NLP tasks that are converted into a text-to-text format using prompts, can generalize into novel forms of language and handle novel tasks. A large body of work within prompt engineering attempts to understand the effects of input forms and prompts in achieving superior performance. We consider an alternative measure and inquire whether the way in which an input is encoded affects social biases promoted in outputs. In this paper, we study T0, a large-scale multi-task text-to-text language model trained using prompt-based learning. We consider two different forms of semantically equivalent inputs: question-answer format and premise-hypothesis format. We use an existing bias benchmark for the former BBQ and create the first bias benchmark in natural language inference BBNLI with hand-written hypotheses while also converting each benchmark into the other form. The results on two benchmarks suggest that given two different formulations of essentially the same input, T0 conspicuously acts more biased in question answering form, which is seen during training, compared to premise-hypothesis form which is unlike its training examples. Code and data are released under https://github.com/feyzaakyurek/bbnli. 2022.findings-naacl.42 @@ -5089,7 +5089,7 @@ Anti-Overestimation Dialogue Policy Learning for Task-Completion Dialogue System ChangTian WenpengYin - Marie-FrancineMoens + Marie-FrancineMoens 565-577 A dialogue policy module is an essential part of task-completion dialogue systems. Recently, increasing interest has focused on reinforcement learning (RL)-based dialogue policy. Its favorable performance and wise action decisions rely on an accurate estimation of action values. The overestimation problem is a widely known issue of RL since its estimate of the maximum action value is larger than the ground truth, which results in an unstable learning process and suboptimal policy. This problem is detrimental to RL-based dialogue policy learning. To mitigate this problem, this paper proposes a dynamic partial average estimator (DPAV) of the ground truth maximum action value. DPAV calculates the partial average between the predicted maximum action value and minimum action value, where the weights are dynamically adaptive and problem-dependent. We incorporate DPAV into a deep Q-network as the dialogue policy and show that our method can achieve better or comparable results compared to top baselines on three dialogue datasets of different domains with a lower computational load. In addition, we also theoretically prove the convergence and derive the upper and lower bounds of the bias compared with those of other methods. 2022.findings-naacl.43 @@ -5101,7 +5101,7 @@ <fixed-case>P</fixed-case>enn-<fixed-case>H</fixed-case>elsinki Parsed Corpus of Early <fixed-case>M</fixed-case>odern <fixed-case>E</fixed-case>nglish: First Parsing Results and Analysis SethKulick NevilleRyant - BeatriceSantorini + BeatriceSantorini 578-593 The Penn-Helsinki Parsed Corpus of Early Modern English (PPCEME), a 1.7-million-word treebank that is an important resource for research in syntactic change, has several properties that present potential challenges for NLP technologies. We describe these key features of PPCEME that make it challenging for parsing, including a larger and more varied set of function tags than in the Penn Treebank, and present results for this corpus using a modified version of the Berkeley Neural Parser and the approach to function tag recovery of Gabbard et al. (2006). While this approach to function tag recovery gives reasonable results, it is in some ways inappropriate for span-based parsers. We also present further evidence of the importance of in-domain pretraining for contextualized word representations. The resulting parser will be used to parse Early English Books Online, a 1.5 billion word corpus whose utility for the study of syntactic change will be greatly increased with the addition of accurate parse trees. 2022.findings-naacl.44 @@ -5142,12 +5142,12 @@ <fixed-case>C</fixed-case>o<fixed-case>C</fixed-case>o<fixed-case>A</fixed-case>-<fixed-case>MT</fixed-case>: A Dataset and Benchmark for Contrastive Controlled <fixed-case>MT</fixed-case> with Application to Formality - MariaNadejde + MariaNadejde AnnaCurrey BenjaminHsu XingNiu MarcelloFederico - GeorgianaDinu + GeorgianaDinu 616-632 The machine translation (MT) task is typically formulated as that of returning a single translation for an input segment. However, in many cases, multiple different translations are valid and the appropriate translation may depend on the intended target audience, characteristics of the speaker, or even the relationship between speakers. Specific problems arise when dealing with honorifics, particularly translating from English into languages with formality markers. For example, the sentence “Are you sure?” can be translated in German as “Sind Sie sich sicher?” (formal register) or “Bist du dir sicher?” (informal). Using wrong or inconsistent tone may be perceived as inappropriate or jarring for users of certain cultures and demographics. This work addresses the problem of learning to control target language attributes, in this case formality, from a small amount of labeled contrastive data. We introduce an annotated dataset (CoCoA-MT) and an associated evaluation metric for training and evaluating formality-controlled MT models for six diverse target languages. We show that we can train formality-controlled models by fine-tuning on labeled contrastive data, achieving high accuracy (82% in-domain and 73% out-of-domain) while maintaining overall quality. 2022.findings-naacl.47 @@ -5173,7 +5173,7 @@ JesinJames VithyaYogarajan IsabellaShields - CatherineWatson + CatherineWatson PeterKeegan KeoniMahelona Peter-LucasJones @@ -5205,7 +5205,7 @@ MingleiLi XinJiang QunLiu - HinrichSchuetze + HinrichSchuetze 675-692 Vast efforts have been devoted to creating high-performance few-shot learners, i.e., large-scale pretrained language models (PLMs) that perform well with little downstream task training data. Training PLMs has incurred significant cost, but utilizing the few-shot learners is still challenging due to their enormous size. This work focuses on a crucial question: How to make effective use of these few-shot learners? We propose LMTurk, a novel approach that treats few-shotlearners as crowdsourcing workers. The rationale is that crowdsourcing workers are in fact few-shot learners: They are shown a few illustrative examples to learn about a task and then start annotating. LMTurk employs few-shot learners built upon PLMs as workers. We show that the resulting annotations can be utilized to train models that solve the task well and are small enough to be deployable in practical scenarios. Active learning is integrated into LMTurk to reduce the amount of queries made to PLMs, minimizing the computational cost of running PLM inference passes. Altogether, LMTurk is an important step towards making effective use of current PLMs. 2022.findings-naacl.51 @@ -5256,7 +5256,7 @@ <fixed-case>L</fixed-case>ong<fixed-case>T</fixed-case>5: <fixed-case>E</fixed-case>fficient Text-To-Text Transformer for Long Sequences MandyGuo JoshuaAinslie - DavidUthus + DavidUthus SantiagoOntanon JianmoNi Yun-HsuanSung @@ -5271,7 +5271,7 @@ Challenging <fixed-case>A</fixed-case>merica: Modeling language in longer time scales JakubPokrywka - FilipGraliński + FilipGraliński KrzysztofJassem KarolKaczmarek KrzysztofJurkiewicz @@ -5314,7 +5314,7 @@ SarthakDash SugatoBagchi NandanaMihindukulasooriya - AlfioGliozzo + AlfioGliozzo 788-800 Representing text in tables is essential for many business intelligence tasks such as semantic retrieval, data exploration and visualization, and question answering. Existing methods that leverage pretrained Transformer encoders range from a simple construction of pseudo-sentences by concatenating text across rows or columns to complex parameter-intensive models that encode table structure and require additional pretraining. In this work, we introduce a novel encoding strategy for Transformer encoders that preserves the critical property of permutation invariance across rows or columns. Unlike existing state-of-the-art methods for Table Understanding, our proposed approach does not require any additional pretraining and still substantially outperforms existing methods in almost all instances. We demonstrate the effectiveness of our proposed approach on three table interpretation tasks: column type annotation, relation extraction, and entity linking through extensive experiments on existing tabular datasets. 2022.findings-naacl.59 @@ -5451,7 +5451,7 @@ VictorSteinborn PhilippDufter HarisJabbar - HinrichSchuetze + HinrichSchuetze 921-932 Bias research in NLP is a rapidly growing and developing field. Similar to CrowS-Pairs (Nangia et al., 2020), we assess gender bias in masked-language models (MLMs) by studying pairs of sentences with gender swapped person references. Most bias research focuses on and often is specific to English.Using a novel methodology for creating sentence pairs that is applicable across languages, we create, based on CrowS-Pairs, a multilingual dataset for English, Finnish, German, Indonesian and Thai.Additionally, we propose S_{JSD}, a new bias measure based on Jensen–Shannon divergence, which we argue retains more information from the model output probabilities than other previously proposed bias measures for MLMs.Using multilingual MLMs, we find that S_{JSD} diagnoses the same systematic biased behavior for non-English that previous studies have found for monolingual English pre-trained MLMs. S_{JSD} outperforms the CrowS-Pairs measure, which struggles to find such biases for smaller non-English datasets. 2022.findings-naacl.69 @@ -5467,7 +5467,7 @@ ChenLiang HaomingJiang SiawpengEr - ChaoZhang + ChaoZhang TuoZhao HongyuanZha 933-949 @@ -5509,7 +5509,7 @@ <fixed-case>QLEVR</fixed-case>: A Diagnostic Dataset for Quantificational Language and Elementary Visual Reasoning ZechenLi - AndersSøgaard + AndersSøgaard 980-996 Synthetic datasets have successfully been used to probe visual question-answering datasets for their reasoning abilities. CLEVR (John- son et al., 2017), for example, tests a range of visual reasoning abilities. The questions in CLEVR focus on comparisons of shapes, colors, and sizes, numerical reasoning, and existence claims. This paper introduces a minimally biased, diagnostic visual question-answering dataset, QLEVR, that goes beyond existential and numerical quantification and focus on more complex quantifiers and their combinations, e.g., asking whether there are more than two red balls that are smaller than at least three blue balls in an image. We describe how the dataset was created and present a first evaluation of state-of-the-art visual question-answering models, showing that QLEVR presents a formidable challenge to our current models. Code and Dataset are available at https://github.com/zechenli03/QLEVR 2022.findings-naacl.73 @@ -5573,7 +5573,7 @@ ”Diversity and Uncertainty in Moderation” are the Key to Data Selection for Multilingual Few-shot Transfer ShanuKumar - SandipanDandapat + SandipanDandapat MonojitChoudhury 1042-1055 Few-shot transfer often shows substantial gain over zero-shot transfer (CITATION), which is a practically useful trade-off between fully supervised and unsupervised learning approaches for multilingual pretained model-based systems. This paper explores various strategies for selecting data for annotation that can result in a better few-shot transfer. The proposed approaches rely on multiple measures such as data entropy using n-gram language model, predictive entropy, and gradient embedding. We propose a loss embedding method for sequence labeling tasks, which induces diversity and uncertainty sampling similar to gradient embedding. The proposed data selection strategies are evaluated and compared for POS tagging, NER, and NLI tasks for up to 20 languages. Our experiments show that the gradient and loss embedding-based strategies consistently outperform random data selection baselines, with gains varying with the initial performance of the zero-shot transfer. Furthermore, the proposed method shows similar trends in improvement even when the model is fine-tuned using a lower proportion of the original task-specific labeled training data for zero-shot transfer. @@ -5704,9 +5704,9 @@ Improving Code-Switching Dependency Parsing with Semi-Supervised Auxiliary Tasks Şaziye BetülÖzateş - ArzucanÖzgür + ArzucanÖzgür TungaGungor - ÖzlemÇetinoğlu + ÖzlemÇetinoğlu 1159-1171 Code-switching dependency parsing stands as a challenging task due to both the scarcity of necessary resources and the structural difficulties embedded in code-switched languages. In this study, we introduce novel sequence labeling models to be used as auxiliary tasks for dependency parsing of code-switched text in a semi-supervised scheme. We show that using auxiliary tasks enhances the performance of an LSTM-based dependency parsing model and leads to better results compared to an XLM-R-based model with significantly less computational and time complexity. As the first study that focuses on multiple code-switching language pairs for dependency parsing, we acquire state-of-the-art scores on all of the studied languages. Our best models outperform the previous work by 7.4 LAS points on average. 2022.findings-naacl.87 @@ -5811,7 +5811,7 @@ A Survey on Stance Detection for Mis- and Disinformation Identification MomchilHardalov ArnavArora - PreslavNakov + PreslavNakov IsabelleAugenstein 1259-1277 Understanding attitudes expressed in texts, also known as stance detection, plays an important role in systems for detecting false information online, be it misinformation (unintentionally false) or disinformation (intentionally false information). Stance detection has been framed in different ways, including (a) as a component of fact-checking, rumour detection, and detecting previously fact-checked claims, or (b) as a task in its own right. While there have been prior efforts to contrast stance detection with other related tasks such as argumentation mining and sentiment analysis, there is no existing survey on examining the relationship between stance detection and mis- and disinformation detection. Here, we aim to bridge this gap by reviewing and analysing existing work in this area, with mis- and disinformation in focus, and discussing lessons learnt and future challenges. @@ -5836,7 +5836,7 @@ To Answer or Not To Answer? Improving Machine Reading Comprehension Model with Span-based Contrastive Learning YunjieJi - LiangyuChen + LiangyuChen ChenxiaoDou BaochangMa XiangangLi @@ -5851,7 +5851,7 @@ Target-Guided Dialogue Response Generation Using Commonsense and Data Augmentation PrakharGupta HarshJhamtani - JeffreyBigham + JeffreyBigham 1301-1317 Target-guided response generation enables dialogue systems to smoothly transition a conversation from a dialogue context toward a target sentence. Such control is useful for designing dialogue systems that direct a conversation toward specific goals, such as creating non-obtrusive recommendations or introducing new topics in the conversation. In this paper, we introduce a new technique for target-guided response generation, which first finds a bridging path of commonsense knowledge concepts between the source and the target, and then uses the identified bridging path to generate transition responses. Additionally, we propose techniques to re-purpose existing dialogue datasets for target-guided generation. Experiments reveal that the proposed techniques outperform various baselines on this task. Finally, we observe that the existing automated metrics for this task correlate poorly with human judgement ratings. We propose a novel evaluation metric that we demonstrate is more reliable for target-guided response evaluation. Our work generally enables dialogue system designers to exercise more control over the conversations that their systems produce. 2022.findings-naacl.97 @@ -5863,7 +5863,7 @@ <fixed-case>B</fixed-case>angla<fixed-case>BERT</fixed-case>: Language Model Pretraining and Benchmarks for Low-Resource Language Understanding Evaluation in <fixed-case>B</fixed-case>angla AbhikBhattacharjee TahmidHasan - WasiAhmad + WasiAhmad Kazi SaminMubasshir Md SaifulIslam AnindyaIqbal @@ -5925,7 +5925,7 @@ MahdiNamazifar YangLiu DiJin - DilekHakkani-Tur + DilekHakkani-Tur 1375-1388 The massive amount of trainable parameters in the pre-trained language models (PLMs) makes them hard to be deployed to multiple downstream tasks. To address this issue, parameter-efficient transfer learning methods have been proposed to tune only a few parameters during fine-tuning while freezing the rest. This paper looks at existing methods along this line through the kernel lens. Motivated by the connection between self-attention in transformer-based PLMs and kernel learning, we propose kernel-wise adapters, namely Kernel-mix, that utilize the kernel structure in self-attention to guide the assignment of the tunable parameters. These adapters use guidelines found in classical kernel learning and enable separate parameter tuning for each attention head. Our empirical results, over a diverse set of natural language generation and understanding tasks, show that our proposed adapters can attain or improve the strong performance of existing baselines. 2022.findings-naacl.102 @@ -5939,7 +5939,7 @@ NithinAnchuri MehdiRezagholizadeh AbbasGhaddar - PhilippeLanglais + PhilippeLanglais PascalPoupart 1389-1400 Intermediate layer knowledge distillation (KD) can improve the standard KD technique (which only targets the output of teacher and student models) especially over large pre-trained language models. However, intermediate layer distillation suffers from excessive computational burdens and engineering efforts required for setting up a proper layer mapping. To address these problems, we propose a RAndom Intermediate Layer Knowledge Distillation (RAIL-KD) approach in which, intermediate layers from the teacher model are selected randomly to be distilled into the intermediate layers of the student model. This randomized selection enforces that all teacher layers are taken into account in the training process, while reducing the computational cost of intermediate layer distillation. Also, we show that it acts as a regularizer for improving the generalizability of the student model. We perform extensive experiments on GLUE tasks as well as on out-of-domain test sets. We show that our proposed RAIL-KD approach outperforms other state-of-the-art intermediate layer KD methods considerably in both performance and training-time. @@ -6020,7 +6020,7 @@ Exploring Neural Models for Query-Focused Summarization JesseVig - AlexanderFabbri + AlexanderFabbri WojciechKryscinski Chien-ShengWu WenhaoLiu @@ -6034,8 +6034,8 @@ <fixed-case>B</fixed-case>itext<fixed-case>E</fixed-case>dit: Automatic Bitext Editing for Improved Low-Resource Machine Translation EleftheriaBriakou - SidaWang - LukeZettlemoyer + SidaWang + LukeZettlemoyer MarjanGhazvininejad 1469-1485 Mined bitexts can contain imperfect translations that yield unreliable training signals for Neural Machine Translation (NMT). While filtering such pairs out is known to improve final model quality, we argue that it is suboptimal in low-resource conditions where even mined data can be limited. In our work, we propose instead, to refine the mined bitexts via automatic editing: given a sentence in a language x_f, and a possibly imperfect translation of it \mathbf{x_e}, our model generates a revised version x_f' or x_e' that yields a more equivalent translation pair (i.e., <x_f, x_e'> or <x_f', x_e>). We use a simple editing strategy by (1) mining potentially imperfect translations for each sentence in a given bitext, (2) learning a model to reconstruct the original translations and translate, in a multi-task fashion. Experiments demonstrate that our approach successfully improves the quality of CCMatrix mined bitext for 5 low-resource language-pairs and 10 translation directions by up to 8 BLEU points, in most cases improving upon a competitive translation-based baseline. @@ -6111,7 +6111,7 @@ VladimirKarpukhin StanPeshterliev DmytroOkhonko - MichaelSchlichtkrull + MichaelSchlichtkrull SonalGupta YasharMehdad ScottYih @@ -6140,7 +6140,7 @@ PiotrNawrot SzymonTworkowski MichałTyrolski - LukaszKaiser + LukaszKaiser YuhuaiWu ChristianSzegedy HenrykMichalewski @@ -6156,7 +6156,7 @@ <fixed-case>DISARM</fixed-case>: Detecting the Victims Targeted by Harmful Memes ShivamSharma Md ShadAkhtar - PreslavNakov + PreslavNakov TanmoyChakraborty 1572-1588 Internet memes have emerged as an increasingly popular means of communication on the web. Although memes are typically intended to elicit humour, they have been increasingly used to spread hatred, trolling, and cyberbullying, as well as to target specific individuals, communities, or society on political, socio-cultural, and psychological grounds. While previous work has focused on detecting harmful, hateful, and offensive memes in general, identifying whom these memes attack (i.e., the ‘victims’) remains a challenging and underexplored area. We attempt to address this problem in this paper. To this end, we create a dataset in which we annotate each meme with its victim(s) such as the name of the targeted person(s), organization(s), and community(ies). We then propose DISARM (Detecting vIctimS targeted by hARmful Memes), a framework that uses named-entity recognition and person identification to detect all entities a meme is referring to, and then, incorporates a novel contextualized multimodal deep neural network to classify whether the meme intends to harm these entities. We perform several systematic experiments on three different test sets, corresponding to entities that are (i) all seen while training, (ii) not seen as a harmful target while training, and (iii) not seen at all while training. The evaluation shows that DISARM significantly outperforms 10 unimodal and multimodal systems. Finally, we demonstrate that DISARM is interpretable and comparatively more generalizable and that it can reduce the relative error rate of harmful target identification by up to 9 % absolute over multimodal baseline systems. @@ -6216,7 +6216,7 @@ ShadenShaar FirojAlam GiovanniDa San Martino - PreslavNakov + PreslavNakov 1619-1631 Recent years have seen the proliferation of disinformation and fake news online. Traditional approaches to mitigate these issues is to use manual or automatic fact-checking. Recently, another approach has emerged: checking whether the input claim has previously been fact-checked, which can be done automatically, and thus fast, while also offering credibility and explainability, thanks to the human fact-checking and explanations in the associated fact-checking article. Here, we focus on claims made in a political debate and we study the impact of modeling the context of the claim: both on the source side, i.e., in the debate, as well as on the target side, i.e., in the fact-checking explanation document. We do this by modeling the local context, the global context, as well as by means of co-reference resolution, and multi-hop reasoning over the sentences of the document describing the fact-checked claim. The experimental results show that each of these represents a valuable information source, but that modeling the source-side context is most important, and can yield 10+ points of absolute improvement over a state-of-the-art model. 2022.findings-naacl.122 @@ -6258,7 +6258,7 @@ QiJia YizhuLiu HaifengTang - KennyZhu + KennyZhu 1660-1669 Previous dialogue summarization techniques adapt large language models pretrained on the narrative text by injecting dialogue-specific features into the models. These features either require additional knowledge to recognize or make the resulting models harder to tune. To bridge the format gap between dialogues and narrative summaries in dialogue summarization tasks, we propose to post-train pretrained language models (PLMs) to rephrase from dialogue to narratives. After that, the model is fine-tuned for dialogue summarization as usual. Comprehensive experiments show that our approach significantly improves vanilla PLMs on dialogue summarization and outperforms other SOTA models by the summary quality and implementation costs. 2022.findings-naacl.125 @@ -6289,9 +6289,9 @@ SaswatiDana DineshGarg PavanKapanipathi - SalimRoukos + SalimRoukos AlexanderGray - L VenkataSubramaniam + L VenkataSubramaniam 1681-1697 Entity Linking (EL) maps an entity mention in a natural language sentence to an entity in a knowledge base (KB). The Zero-shot Entity Linking (ZEL) extends the scope of EL to unseen entities at the test time without requiring new labeled data. BLINK (BERT-based) is one of the SOTA models for ZEL. Interestingly, we discovered that BLINK exhibits diminishing returns, i.e., it reaches 98% of its performance with just 1% of the training data and the remaining 99% of the data yields only a marginal increase of 2% in the performance. While this extra 2% gain makes a huge difference for downstream tasks, training BLINK on large amounts of data is very resource-intensive and impractical. In this paper, we propose a neuro-symbolic, multi-task learning approach to bridge this gap. Our approach boosts the BLINK’s performance with much less data by exploiting an auxiliary information about entity types. Specifically, we train our model on two tasks simultaneously - entity linking (primary task) and hierarchical entity type prediction (auxiliary task). The auxiliary task exploits the hierarchical structure of entity types. Our approach achieves superior performance on ZEL task with significantly less training data. On four different benchmark datasets, we show that our approach achieves significantly higher performance than SOTA models when they are trained with just 0.01%, 0.1%, or 1% of the original training data. Our code is available at https://github.com/IBM/NeSLET. 2022.findings-naacl.127 @@ -6346,7 +6346,7 @@ JinhaoJiang KunZhou Ji-RongWen - XinZhao + XinZhao 1730-1741 Commonsense reasoning in natural language is a desired ability of artificial intelligent systems. For solving complex commonsense reasoning tasks, a typical solution is to enhance pre-trained language models (PTMs) with a knowledge-aware graph neural network (GNN) encoder that models a commonsense knowledge graph (CSKG).Despite the effectiveness, these approaches are built on heavy architectures, and can’t clearly explain how external knowledge resources improve the reasoning capacity of PTMs. Considering this issue, we conduct a deep empirical analysis, and find that it is indeed relation features from CSKGs (but not node features) that mainly contribute to the performance improvement of PTMs. Based on this finding, we design a simple MLP-based knowledge encoder that utilizes statistical relation paths as features. Extensive experiments conducted on five benchmarks demonstrate the effectiveness of our approach, which also largely reduces the parameters for encoding CSKGs.Our codes and data are publicly available at https://github.com/RUCAIBox/SAFE. 2022.findings-naacl.131 @@ -6359,9 +6359,9 @@ MingFang ShiZong JingLi - XinyuDai + XinyuDai ShujianHuang - JiajunChen + JiajunChen 1742-1754 Complaining is a speech act that expresses a negative inconsistency between reality and human’s expectations. While prior studies mostly focus on identifying the existence or the type of complaints, in this work, we present the first study in computational linguistics of measuring the intensity of complaints from text. Analyzing complaints from such perspective is particularly useful, as complaints of certain degrees may cause severe consequences for companies or organizations. We first collect 3,103 posts about complaints in education domain from Weibo, a popular Chinese social media platform. These posts are then annotated with complaints intensity scores using Best-Worst Scaling (BWS) method. We show that complaints intensity can be accurately estimated by computational models with best mean square error achieving 0.11. Furthermore, we conduct a comprehensive linguistic analysis around complaints, including the connections between complaints and sentiment, and a cross-lingual comparison for complaints expressions used by Chinese and English speakers. We finally show that our complaints intensity scores can be incorporated for better estimating the popularity of posts on social media. 2022.findings-naacl.132 @@ -6442,7 +6442,7 @@ JunmoKang Kyung-minKim GiwonHong - Sung-HyonMyaeng + Sung-HyonMyaeng 1811-1821 Numerical reasoning over text is a challenging subtask in question answering (QA) that requires both the understanding of texts and numbers. However, existing language models in these numerical reasoning QA models tend to overly rely on the pre-existing parametric knowledge at inference time, which commonly causes hallucination in interpreting numbers. Our work proposes a novel attention masked reasoning model, the NC-BERT, that learns to leverage the number-related contextual knowledge to alleviate the over-reliance on parametric knowledge and enhance the numerical reasoning capabilities of the QA model. The empirical results suggest that understanding of numbers in their context by reducing the parametric knowledge influence, and refining numerical information in the number embeddings lead to improved numerical reasoning accuracy and performance in DROP, a numerical QA dataset. 2022.findings-naacl.138 @@ -6471,7 +6471,7 @@ JuheeSon JinYeongBak KyunghyunCho - AliceOh + AliceOh 1832-1844 Historical records in Korea before the 20th century were primarily written in Hanja, an extinct language based on Chinese characters and not understood by modern Korean or Chinese speakers. Historians with expertise in this time period have been analyzing the documents, but that process is very difficult and time-consuming, and language models would significantly speed up the process. Toward building and evaluating language models for Hanja, we release the Hanja Understanding Evaluation dataset consisting of chronological attribution, topic classification, named entity recognition, and summary retrieval tasks. We also present BERT-based models continued training on the two major corpora from the 14th to the 19th centuries: the Annals of the Joseon Dynasty and Diaries of the Royal Secretariats. We compare the models with several baselines on all tasks and show there are significant improvements gained by training on the two corpora. Additionally, we run zero-shot experiments on the Daily Records of the Royal Court and Important Officials (DRRI). The DRRI dataset has not been studied much by the historians, and not at all by the NLP community. 2022.findings-naacl.140 @@ -6582,10 +6582,10 @@ YuweiCao WilliamGroves Tanay KumarSaha - JoelTetreault + JoelTetreault AlejandroJaimes HaoPeng - PhilipYu + PhilipYu 1931-1942 Temporal Expression Extraction (TEE) is essential for understanding time in natural language. It has applications in Natural Language Processing (NLP) tasks such as question answering, information retrieval, and causal inference. To date, work in this area has mostly focused on English as there is a scarcity of labeled data for other languages. We propose XLTime, a novel framework for multilingual TEE. XLTime works on top of pre-trained language models and leverages multi-task learning to prompt cross-language knowledge transfer both from English and within the non-English languages. XLTime alleviates problems caused by a shortage of data in the target language. We apply XLTime with different language models and show that it outperforms the previous automatic SOTA methods on French, Spanish, Portuguese, and Basque, by large margins. XLTime also closes the gap considerably on the handcrafted HeidelTime method. 2022.findings-naacl.148 @@ -6714,7 +6714,7 @@ Learning to Execute Actions or Ask Clarification Questions - ZhengxiangShi + ZhengxiangShi YueFeng AldoLipani 2060-2070 @@ -6730,7 +6730,7 @@ BoweiZou MengxingDong XiaoLi - AiTiAw + AiTiAw YuHong 2071-2078 Conversational Question Answering (ConvQA) is required to answer the current question, conditioned on the observable paragraph-level context and conversation history. Previous works have intensively studied history-dependent reasoning. They perceive and absorb topic-related information of prior utterances in the interactive encoding stage. It yielded significant improvement compared to history-independent reasoning. This paper further strengthens the ConvQA encoder by establishing long-distance dependency among global utterances in multi-turn conversation. We use multi-layer transformers to resolve long-distance relationships, which potentially contribute to the reweighting of attentive information in historical utterances. Experiments on QuAC show that our method obtains a substantial improvement (1%), yielding the F1 score of 73.7%. All source codes are available at https://github.com/jaytsien/GHR. @@ -6743,10 +6743,10 @@ Learning Structural Information for Syntax-Controlled Paraphrase Generation ErguangYang ChenglinBai - DeyiXiong + DeyiXiong YujieZhang YaoMeng - JinanXu + JinanXu YufengChen 2079-2090 Syntax-controlled paraphrase generation aims to produce paraphrase conform to given syntactic patterns. To address this task, recent works have started to use parse trees (or syntactic templates) to guide generation.A constituency parse tree contains abundant structural information, such as parent-child relation, sibling relation, and the alignment relation between words and nodes. Previous works have only utilized parent-child and alignment relations, which may affect the generation quality. To address this limitation, we propose a Structural Information-augmented Syntax-Controlled Paraphrasing (SI-SCP) model. Particularly, we design a syntax encoder based on tree-transformer to capture parent-child and sibling relations. To model the alignment relation between words and nodes, we propose an attention regularization objective, which makes the decoder accurately select corresponding syntax nodes to guide the generation of words. Experiments show that SI-SCP achieves state-of-the-art performances in terms of semantic and syntactic quality on two popular benchmark datasets. Additionally, we propose a Syntactic Template Retriever (STR) to retrieve compatible syntactic structures. We validate that STR is capable of retrieving compatible syntactic structures. We further demonstrate the effectiveness of SI-SCP to generate diverse paraphrases with retrieved syntactic structures. @@ -6873,7 +6873,7 @@ Specializing Pre-trained Language Models for Better Relational Reasoning via Network Pruning SiyuRen - KennyZhu + KennyZhu 2195-2207 Pretrained masked language models (PLMs) were shown to be inheriting a considerable amount of relational knowledge from the source corpora. In this paper, we present an in-depth and comprehensive study concerning specializing PLMs into relational models from the perspective of network pruning. We show that it is possible to find subnetworks capable of representing grounded commonsense relations at non-trivial sparsity while being more generalizable than original PLMs in scenarios requiring knowledge of single or multiple commonsense relations. 2022.findings-naacl.169 @@ -6919,7 +6919,7 @@ HyeonseokMoon SugyeongEo SeonminKoo - HeuiseokLim + HeuiseokLim 2233-2249 Recent natural language understanding (NLU) research on the Korean language has been vigorously maturing with the advancements of pretrained language models and datasets. However, Korean pretrained language models still struggle to generate a short sentence with a given condition based on compositionality and commonsense reasoning (i.e., generative commonsense reasoning). The two major challenges are inadequate data resources to develop generative commonsense reasoning regarding Korean linguistic features and to evaluate language models which are necessary for natural language generation (NLG). To solve these problems, we propose a text-generation dataset for Korean generative commonsense reasoning and language model evaluation. In this work, a semi-automatic dataset construction approach filters out contents inexplicable to commonsense, ascertains quality, and reduces the cost of building the dataset. We also present an in-depth analysis of the generation results of language models with various evaluation metrics along with human-annotated scores. The whole dataset is publicly available at (https://aihub.or.kr/opendata/korea-university). 2022.findings-naacl.172 @@ -6959,7 +6959,7 @@ ZhenLi BingXu ConghuiZhu - TiejunZhao + TiejunZhao 2282-2294 Compared with unimodal data, multimodal data can provide more features to help the model analyze the sentiment of data. Previous research works rarely consider token-level feature fusion, and few works explore learning the common features related to sentiment in multimodal data to help the model fuse multimodal features. In this paper, we propose a Contrastive Learning and Multi-Layer Fusion (CLMLF) method for multimodal sentiment detection. Specifically, we first encode text and image to obtain hidden representations, and then use a multi-layer fusion module to align and fuse the token-level features of text and image. In addition to the sentiment analysis task, we also designed two contrastive learning tasks, label based contrastive learning and data based contrastive learning tasks, which will help the model learn common features related to sentiment in multimodal data. Extensive experiments conducted on three publicly available multimodal datasets demonstrate the effectiveness of our approach for multimodal sentiment detection compared with existing methods. The codes are available for use at https: //github.com/Link-Li/CLMLF 2022.findings-naacl.175 @@ -7030,7 +7030,7 @@ Jointly Learning Guidance Induction and Faithful Summary Generation via Conditional Variational Autoencoders WangXu - TiejunZhao + TiejunZhao 2340-2350 Abstractive summarization can generate high quality results with the development of the neural network. However, generating factual consistency summaries is a challenging task for abstractive summarization. Recent studies extract the additional information with off-the-shelf tools from the source document as a clue to guide the summary generation, which shows effectiveness to improve the faithfulness. Unlike these work, we present a novel framework based on conditional variational autoencoders, which induces the guidance information and generates the summary equipment with the guidance synchronously. Experiments on XSUM and CNNDM dataset show that our approach can generate relevant and fluent summaries which is more faithful than the existing state-of-the-art approaches, according to multiple factual consistency metrics. 2022.findings-naacl.180 @@ -7123,9 +7123,9 @@ Textual Entailment for Event Argument Extraction: Zero- and Few-Shot with Multi-Source Learning OscarSainz ItziarGonzalez-Dios - OierLopez de Lacalle + OierLopez de Lacalle BonanMin - EnekoAgirre + EnekoAgirre 2439-2455 Recent work has shown that NLP tasks such as Relation Extraction (RE) can be recasted as a Textual Entailment tasks using verbalizations, with strong performance in zero-shot and few-shot settings thanks to pre-trained entailment models. The fact that relations in current RE datasets are easily verbalized casts doubts on whether entailment would be effective in more complex tasks. In this work we show that entailment is also effective in Event Argument Extraction (EAE), reducing the need of manual annotation to 50% and 20% in ACE and WikiEvents, respectively, while achieving the same performance as with full training. More importantly, we show that recasting EAE as entailment alleviates the dependency on schemas, which has been a roadblock for transferring annotations between domains. Thanks to entailment, the multi-source transfer between ACE and WikiEvents further reduces annotation down to 10% and 5% (respectively) of the full training without transfer. Our analysis shows that key to good results is the use of several entailment datasets to pre-train the entailment model. Similar to previous approaches, our method requires a small amount of effort for manual verbalization: only less than 15 minutes per event argument types is needed; comparable results can be achieved from users of different level of expertise. 2022.findings-naacl.187 @@ -7151,7 +7151,7 @@ Latent Group Dropout for Multilingual and Multidomain Machine Translation Minh-QuangPham FrançoisYvon - JosepCrego + JosepCrego 2469-2481 Multidomain and multilingual machine translation often rely on parameter sharing strategies, where large portions of the network are meant to capture the commonalities of the tasks at hand, while smaller parts are reserved to model the peculiarities of a language or a domain. In adapter-based approaches, these strategies are hardcoded in the network architecture, independent of the similarities between tasks. In this work, we propose a new method to better take advantage of these similarities, using a latent-variable model. We also develop new techniques to train this model end-to-end and report experimental results showing that the learned patterns are both meaningful and yield improved translation performance without any increase of the model size. 2022.findings-naacl.189 @@ -7166,7 +7166,7 @@ RunxinXu TianyuLiu ZhifangSui - BaobaoChang + BaobaoChang 2482-2496 As Abstract Meaning Representation (AMR) implicitly involves compound semantic annotations, we hypothesize auxiliary tasks which are semantically or formally related can better enhance AMR parsing. We find that 1) Semantic role labeling (SRL) and dependency parsing (DP), would bring more performance gain than other tasks e.g. MT and summarization in the text-to-AMR transition even with much less data. 2) To make a better fit for AMR, data from auxiliary tasks should be properly “AMRized” to PseudoAMR before training. Knowledge from shallow level parsing tasks can be better transferred to AMR Parsing with structure transform. 3) Intermediate-task learning is a better paradigm to introduce auxiliary tasks to AMR parsing, compared to multitask learning. From an empirical perspective, we propose a principled method to involve auxiliary tasks to boost AMR parsing. Extensive experiments show that our method achieves new state-of-the-art performance on different benchmarks especially in topology-related scores. Code and models are released at https://github.com/PKUnlp-icler/ATP. 2022.findings-naacl.190 @@ -7260,11 +7260,11 @@ <fixed-case>KETOD</fixed-case>: Knowledge-Enriched Task-Oriented Dialogue - ZhiyuChen + ZhiyuChen BingLiu SeungwhanMoon ChinnadhuraiSankar - PaulCrook + PaulCrook William YangWang 2581-2593 Existing studies in dialogue system research mostly treat task-oriented dialogue and chit-chat as separate domains. Towards building a human-like assistant that can converse naturally and seamlessly with users, it is important to build a dialogue system that conducts both types of conversations effectively. In this work, we investigate how task-oriented dialogue and knowledge-grounded chit-chat can be effectively integrated into a single model. To this end, we create a new dataset, KETOD (Knowledge-Enriched Task-Oriented Dialogue), where we naturally enrich task-oriented dialogues with chit-chat based on relevant entity knowledge. We also propose two new models, SimpleToDPlus and Combiner, for the proposed task. Experimental results on both automatic and human evaluations show that the proposed methods can significantly improve the performance in knowledge-enriched response generation while maintaining a competitive task-oriented dialog performance. We believe our new dataset will be a valuable resource for future studies. Our dataset and code are publicly available at https://github.com/facebookresearch/ketod. @@ -7320,7 +7320,7 @@ Learning from Bootstrapping and Stepwise Reinforcement Reward: A Semi-Supervised Framework for Text Style Transfer ZhengyuanLiu - NancyChen + NancyChen 2633-2648 Text style transfer is an important task in controllable language generation. Supervised approaches have pushed performance improvement on style-oriented rewriting such as formality conversion. However, challenges remain due to the scarcity of large-scale parallel data in many domains. While unsupervised approaches do not rely on annotated sentence pairs for each style, they are often plagued with instability issues such as mode collapse or quality degradation. To take advantage of both supervised and unsupervised paradigms and tackle the challenges, in this work, we propose a semi-supervised framework for text style transfer. First, the learning process is bootstrapped with supervision guided by automatically constructed pseudo-parallel pairs using lexical and semantic-based methods. Then the model learns from unlabeled data via reinforcement rewards. Specifically, we propose to improve the sequence-to-sequence policy gradient via stepwise reward optimization, providing fine-grained learning signals and stabilizing the reinforced learning process. Experimental results show that the proposed approach achieves state-of-the-art performance on multiple datasets, and produces effective generation with as minimal as 10% of training data. 2022.findings-naacl.201 @@ -7361,7 +7361,7 @@ ZhongyuWei ZejunLi SiyuanWang - XuanjingHuang + XuanjingHuang JianqingFan 2667-2678 Matching model is essential for Image-Text Retrieval framework. Existing research usually train the model with a triplet loss and explore various strategy to retrieve hard negative sentences in the dataset. We argue that current retrieval-based negative sample construction approach is limited in the scale of the dataset thus fail to identify negative sample of high difficulty for every image. We propose our TAiloring neGative Sentences with Discrimination and Correction (TAGS-DC) to generate synthetic sentences automatically as negative samples. TAGS-DC is composed of masking and refilling to generate synthetic negative sentences with higher difficulty. To keep the difficulty during training, we mutually improve the retrieval and generation through parameter sharing. To further utilize fine-grained semantic of mismatch in the negative sentence, we propose two auxiliary tasks, namely word discrimination and word correction to improve the training. In experiments, we verify the effectiveness of our model on MS-COCO and Flickr30K compared with current state-of-the-art models and demonstrates its robustness and faithfulness in the further analysis. @@ -7466,7 +7466,7 @@ YukunFeng AmirFayazi AbhinavRastogi - ManabuOkumura + ManabuOkumura 1–10 Recent work has shown advantages of incorporating knowledge graphs (KGs) into BERT for various NLP tasks. One common way is to feed entity embeddings as an additional input during pre-training. There are two limitations to such a method. First, to train the entity embeddings to include rich information of factual knowledge, it typically requires access to the entire KG. This is challenging for KGs with daily changes (e.g., Wikidata). Second, it requires a large scale pre-training corpus with entity annotations and high computational cost during pre-training. In this work, we efficiently construct entity embeddings only from the type knowledge, that does not require access to the entire KG. Although the entity embeddings contain only local information, they perform very well when combined with context. Furthermore, we show that our entity embeddings, constructed from BERT’s input embeddings, can be directly incorporated into the fine-tuning phase without requiring any specialized pre-training. In addition, these entity embeddings can also be constructed on the fly without requiring a large memory footprint to store them. Finally, we propose task-specific models that incorporate our entity embeddings for entity linking, entity typing, and relation classification. Experiments show that our models have comparable or superior performance to existing models while being more resource efficient. 2022.findings-aacl.1 @@ -7531,7 +7531,7 @@ Understanding the Use of Quantifiers in <fixed-case>M</fixed-case>andarin GuanyiChen - Keesvan Deemter + Keesvan Deemter 73–80 We introduce a corpus of short texts in Mandarin, in which quantified expressions figure prominently. We illustrate the significance of the corpus by examining the hypothesis (known as Huang’s “coolness” hypothesis) that speakers of East Asian Languages tend to speak more briefly but less informatively than, for example, speakers of West-European languages. The corpus results from an elicitation experiment in which participants were asked to describe abstract visual scenes. We compare the resulting corpus, called MQTUNA, with an English corpus that was collected using the same experimental paradigm. The comparison reveals that some, though not all, aspects of quantifier use support the above-mentioned hypothesis. Implications of these findings for the generation of quantified noun phrases are discussed. 2022.findings-aacl.7 @@ -7542,8 +7542,8 @@ Does Representational Fairness Imply Empirical Fairness? AiliShen XudongHan - TrevorCohn - TimothyBaldwin + TrevorCohn + TimothyBaldwin LeaFrermann 81–95 NLP technologies can cause unintended harms if learned representations encode sensitive attributes of the author, or predictions systematically vary in quality across groups. Popular debiasing approaches, like adversarial training, remove sensitive information from representations in order to reduce disparate performance, however the relation between representational fairness and empirical (performance) fairness has not been systematically studied. This paper fills this gap, and proposes a novel debiasing method building on contrastive learning to encourage a latent space that separates instances based on target label, while mixing instances that share protected attributes. Our results show the effectiveness of our new method and, more importantly, show across a set of diverse debiasing methods that representational fairness does not imply empirical fairness. This work highlights the importance of aligning and understanding the relation of the optimization objective and final fairness target. @@ -7578,7 +7578,7 @@ XinTian XinchaoXu YingzhanLin - Zheng-YuNiu + Zheng-YuNiu 107–118 To explore the limit of dialogue generation pre-training, we present the models of PLATO-XL with up to 11 billion parameters, trained on both Chinese and English social media conversations. To train such large models, we adopt the architecture of unified transformer with high computation and parameter efficiency. In addition, we carry out multi-party aware pre-training to better distinguish the characteristic information in social media conversations. With such designs, PLATO-XL successfully achieves superior performances as compared to other approaches in both Chinese and English chitchat. We further explore the capacity of PLATO-XL on other conversational tasks, such as knowledge grounded dialogue and task-oriented conversation. The experimental results indicate that PLATO-XL obtains state-of-the-art results across multiple conversational tasks, verifying its potential as a foundation model of conversational AI. 2022.findings-aacl.10 @@ -7661,7 +7661,7 @@ ZhenyunDeng YonghuaZhu Lia JisooLee - MichaelWitbrock + MichaelWitbrock JiamouLiu 176–187 We introduce TaKG, a new table-to-text generation dataset with the following highlights: (1) TaKG defines a long-text (paragraph-level) generation task as opposed to well-established short-text (sentence-level) generation datasets. (2) TaKG is the first large-scale dataset for this task, containing three application domains and ~750,000 samples. (3) To address the divergence phenomenon, TaKG enhances table input using external knowledge graphs, extracted by a new Wikidata-based method. We then propose a new Transformer-based multimodal sequence-to-sequence architecture for TaKG that integrates two pretrained language models RoBERTa and GPT-2. Our model shows reliable performance on long-text generation across a variety of metrics, and outperforms existing models for short-text generation tasks. @@ -7674,7 +7674,7 @@ YingboGao ChristianHerold ZijianYang - HermannNey + HermannNey 188–196 Checkpoint averaging is a simple and effective method to boost the performance of converged neural machine translation models. The calculation is cheap to perform and the fact that the translation improvement almost comes for free, makes it widely adopted in neural machine translation research. Despite the popularity, the method itself simply takes the mean of the model parameters from several checkpoints, the selection of which is mostly based on empirical recipes without many justifications. In this work, we revisit the concept of checkpoint averaging and consider several extensions. Specifically, we experiment with ideas such as using different checkpoint selection strategies, calculating weighted average instead of simple mean, making use of gradient information and fine-tuning the interpolation weights on development data. Our results confirm the necessity of applying checkpoint averaging for optimal performance, but also suggest that the landscape between the converged checkpoints is rather flat and not much further improvement compared to simple averaging is to be obtained. 2022.findings-aacl.18 @@ -7685,9 +7685,9 @@ Modeling Referential Gaze in Task-oriented Settings of Varying Referential Complexity ÖzgeAlacam EugenRuppert - SinaZarrieß + SinaZarrieß GaneshanMalhotra - ChrisBiemann + ChrisBiemann SinaZarrieß 197–210 Referential gaze is a fundamental phenomenon for psycholinguistics and human-human communication. However, modeling referential gaze for real-world scenarios, e.g. for task-oriented communication, is lacking the well-deserved attention from the NLP community. In this paper, we address this challenging issue by proposing a novel multimodal NLP task; namely predicting when the gaze is referential. We further investigate how to model referential gaze and transfer gaze features to adapt to unseen situated settings that target different referential complexities than the training environment. We train (i) a sequential attention-based LSTM model and (ii) a multivariate transformer encoder architecture to predict whether the gaze is on a referent object. The models are evaluated on the three complexity datasets. The results indicate that the gaze features can be transferred not only among various similar tasks and scenes but also across various complexity levels. Taking the referential complexity of a scene into account is important for successful target prediction using gaze parameters especially when there is not much data for fine-tuning. @@ -7709,7 +7709,7 @@ <fixed-case>C</fixed-case>o<fixed-case>RAL</fixed-case>: a Context-aware <fixed-case>C</fixed-case>roatian Abusive Language Dataset RaviShekhar - Vanja MladenKaran + Vanja MladenKaran MatthewPurver 217–225 In light of unprecedented increases in the popularity of the internet and social media, comment moderation has never been a more relevant task. Semi-automated comment moderation systems greatly aid human moderators by either automatically classifying the examples or allowing the moderators to prioritize which comments to consider first. However, the concept of inappropriate content is often subjective, and such content can be conveyed in many subtle and indirect ways. In this work, we propose CoRAL – a language and culturally aware Croatian Abusive dataset covering phenomena of implicitness and reliance on local and global context. We show experimentally that current models degrade when comments are not explicit and further degrade when language skill and context knowledge are required to interpret the comment. @@ -7731,12 +7731,12 @@ A Multilingual Multiway Evaluation Data Set for Structured Document Translation of <fixed-case>A</fixed-case>sian Languages - BiankaBuschbeck + BiankaBuschbeck RajDabre MiriamExel MatthiasHuck PatrickHuy - RaphaelRubino + RaphaelRubino HidekiTanaka 237–245 Translation of structured content is an important application of machine translation, but the scarcity of evaluation data sets, especially for Asian languages, limits progress. In this paper we present a novel multilingual multiway evaluation data set for the translation of structured documents of the Asian languages Japanese, Korean and Chinese. We describe the data set, its creation process and important characteristics, followed by establishing and evaluating baselines using the direct translation as well as detag-project approaches. Our data set is well suited for multilingual evaluation, and it contains richer annotation tag sets than existing data sets. Our results show that massively multilingual translation models like M2M-100 and mBART-50 perform surprisingly well despite not being explicitly trained to handle structured content. The data set described in this paper and used in our experiments is released publicly. @@ -7790,7 +7790,7 @@ ShailyBhatt PankajSingh SomakAditya - SandipanDandapat + SandipanDandapat SunayanaSitaram MonojitChoudhury 282–295 @@ -7869,7 +7869,7 @@ Multilingual Auxiliary Tasks Training: Bridging the Gap between Languages for Zero-Shot Transfer of Hate Speech Detection Models SyrielleMontariol ArijRiabi - DjaméSeddah + DjaméSeddah 347–363 Zero-shot cross-lingual transfer learning has been shown to be highly challenging for tasks involving a lot of linguistic specificities or when a cultural gap is present between lan- guages, such as in hate speech detection. In this paper, we highlight this limitation for hate speech detection in several domains and languages using strict experimental settings. Then, we propose to train on multilingual auxiliary tasks – sentiment analysis, named entity recognition, and tasks relying on syntactic information – to improve zero-shot transfer of hate speech detection models across languages. We show how hate speech detection models benefit from a cross-lingual knowledge proxy brought by auxiliary tasks fine-tuning and highlight these tasks’ positive impact on bridging the hate speech linguistic and cultural gap between languages. 2022.findings-aacl.33 @@ -7879,10 +7879,10 @@ Chop and Change: Anaphora Resolution in Instructional Cooking Videos CennetOguz - IvanaKruijff-Korbayova + IvanaKruijff-Korbayova EmmanuelVincent PascalDenis - Josefvan Genabith + Josefvan Genabith 364–374 Linguistic ambiguities arising from changes in entities in action flows are a key challenge in instructional cooking videos. In particular, temporally evolving entities present rich and to date understudied challenges for anaphora resolution. For example “oil” mixed with “salt” is later referred to as a “mixture”. In this paper we propose novel annotation guidelines to annotate recipes for the anaphora resolution task, reflecting change in entities. Moreover, we present experimental results for end-to-end multimodal anaphora resolution with the new annotation scheme and propose the use of temporal features for performance improvement. 2022.findings-aacl.34 @@ -7941,7 +7941,7 @@ Differential Bias: On the Perceptibility of Stance Imbalance in Argumentation AlonsoPalomino - KhalidAl Khatib + KhalidAl Khatib MartinPotthast BennoStein 411–421 @@ -8178,7 +8178,7 @@ Does Simultaneous Speech Translation need Simultaneous Models? SaraPapiFondazione Bruno Kessler MarcoGaidoFondazione Bruno Kessler, University of Trento - MatteoNegriFondazione Bruno Kessler + MatteoNegriFondazione Bruno Kessler MarcoTurchiZoom Video Communications 141-153 In simultaneous speech translation (SimulST), finding the best trade-off between high output quality and low latency is a challenging task. To meet the latency constraints posed by different application scenarios, multiple dedicated SimulST models are usually trained and maintained, generating high computational costs. In this paper, also motivated by the increased sensitivity towards sustainable AI, we investigate whether a single model trained offline can serve both offline and simultaneous applications under different latency regimes without additional training or adaptation. Experiments on en->de, es show that, aside from facilitating the adoption of well-established offline architectures and training strategies without affecting latency, offline training achieves similar or better quality compared to the standard SimulST training protocol, also being competitive with the state-of-the-art system. @@ -8295,7 +8295,7 @@ StanPeshterlievMeta YasharMehdadFacebook AI SonalGuptaFacebook - Wen-tauYihFacebook AI Research + Wen-tauYihFacebook AI Research 250-262 Despite their recent popularity and well-known advantages, dense retrievers still lag behind sparse methods such as BM25 in their ability to reliably match salient phrases and rare entities in the query and to generalize to out-of-domain data. It has been argued that this is an inherent limitation of dense models. We rebut this claim by introducing the Salient Phrase Aware Retriever (SPAR), a dense retriever with the lexical matching capacity of a sparse model. We show that a dense Lexical Model Λ can be trained to imitate a sparse one, and SPAR is built by augmenting a standard dense retriever with Λ. Empirically, SPAR shows superior performance on a range of tasks including five question answering datasets, MS MARCO passage retrieval, as well as the EntityQuestions and BEIR benchmarks for out-of-domain evaluation, exceeding the performance of state-of-the-art dense and sparse retrievers. The code and models of SPAR are available at: https://github.com/facebookresearch/dpr-scale/tree/main/spar 2022.findings-emnlp.19 @@ -8340,7 +8340,7 @@ ShichenLiSoochow University ZhongqingWangSoochow University XiaotongJiangSoochow University - GuodongZhouSoochow University + GuodongZhouSoochow University 289-299 Previous studies on cross-domain sentiment classification depend on the pivot features or utilize the target data for representation learning, which ignore the semantic relevance between different domains. To this end, we exploit Abstract Meaning Representation (AMR) to help with cross-domain sentiment classification. Compared with the textual input, AMR reduces data sparsity and explicitly provides core semantic knowledge and correlations between different domains. In particular, we develop an algorithm to construct a sentiment-driven semantic graph from sentence-level AMRs. We further design two strategies to linearize the semantic graph and propose a text-graph interaction model to fuse the text and semantic graph representations for cross-domain sentiment classification. Empirical studies show the effectiveness of our proposed model over several strong baselines. The results also indicate the importance of the proposed sentiment-driven semantic graph for cross-domain sentiment classification. 2022.findings-emnlp.22 @@ -8526,7 +8526,7 @@ HannahKirkUniversity of Oxford AbebaBirhaneMozilla Foundation / University College Dublin BertieVidgenAlan Turing Institute - LeonDerczynskiIT University of Copenhagen + LeonDerczynskiIT University of Copenhagen 497-510 Text data can pose a risk of harm. However, the risks are not fully understood, and how to handle, present, and discuss harmful text in a safe way remains an unresolved issue in the NLP community. We provide an analytical framework categorising harms on three axes: (1) the harm type (e.g., misinformation, hate speech or racial stereotypes); (2) whether a harm is sought as a feature of the research design if explicitly studying harmful content (e.g., training a hate speech classifier), versus unsought if harmful content is encountered when working on unrelated problems (e.g., language generation or part-of-speech tagging); and (3) who it affects, from people (mis)represented in the data to those handling the data and those publishing on the data. We provide advice for practitioners, with concrete steps for mitigating harm in research and in publication. To assist implementation we introduce HarmCheck – a documentation standard for handling and presenting harmful text in research. 2022.findings-emnlp.35 @@ -8716,7 +8716,7 @@ Diving Deep into Modes of Fact Hallucinations in Dialogue Systems SouvikDasUniversity at Buffalo SougataSahaState University of New York at Buffalo - RohiniSrihariUniversity at Buffalo, SUNY + RohiniSrihariUniversity at Buffalo, SUNY 684-699 Knowledge Graph(KG) grounded conversations often use large pre-trained models and usually suffer from fact hallucination. Frequently entities with no references in knowledge sources and conversation history are introduced into responses, thus hindering the flow of the conversation—existing work attempt to overcome this issue by tweaking the training procedure or using a multi-step refining method. However, minimal effort is put into constructing an entity-level hallucination detection system, which would provide fine-grained signals that control fallacious content while generating responses. As a first step to address this issue, we dive deep to identify various modes of hallucination in KG-grounded chatbots through human feedback analysis. Secondly, we propose a series of perturbation strategies to create a synthetic dataset named FADE (FActual Dialogue Hallucination DEtection Dataset). Finally, we conduct comprehensive data analyses and create multiple baseline models for hallucination detection to compare against human-verified data and already established benchmarks. 2022.findings-emnlp.48 @@ -8727,7 +8727,7 @@ Representation Learning for Resource-Constrained Keyphrase Generation DiWuUniversity of California, Los Angeles - WasiAhmadUniversity of California, Los Angeles + WasiAhmadUniversity of California, Los Angeles SunipaDevGoogle Research Kai-WeiChangUCLA 700-716 @@ -8853,8 +8853,8 @@ Influence Functions for Sequence Tagging Models SarthakJainNortheastern University - VarunManjunathaAdobe Research - ByronWallaceNortheastern University + VarunManjunathaAdobe Research + ByronWallaceNortheastern University AniNenkovaAdobe Research 824-839 Many standard tasks in NLP (e.g., Named Entity Recognition, Part-of-Speech tagging, and Semantic Role Labeling) are naturally framed as sequence tagging problems. However, there has been comparatively little work on interpretability methods for sequence tagging models. In this paper, we extend influence functions — which aim to trace predictions back to the training points that informed them — to sequence tagging tasks. We define the influence of a training instance segment as the effect that perturbing the labels within this segment has on a test segment level prediction. We provide an efficient approximation to compute this, and show that it tracks with the “true” segment influence (measured empirically). We show the practical utility of segment influence by using the method to identify noisy annotations in NER corpora. @@ -8934,7 +8934,7 @@ TianyiLiUniversity of Edinburgh Mohammad JavadHosseiniGoogle Research SabineWeberUniversity of Edinburgh - MarkSteedmanUniversity of Edinburgh + MarkSteedmanUniversity of Edinburgh 903-921 We examine LMs’ competence of directional predicate entailments by supervised fine-tuning with prompts. Our analysis shows that contrary to their apparent success on standard NLI, LMs show limited ability to learn such directional inference; moreover, existing datasets fail to test directionality, and/or are infested by artefacts that can be learnt as proxy for entailments, yielding over-optimistic results. In response, we present BoOQA (Boolean Open QA), a robust multi-lingual evaluation benchmark for directional predicate entailments, extrinsic to existing training sets. On BoOQA, we establish baselines and show evidence of existing LM-prompting models being incompetent directional entailment learners, in contrast to entailment graphs, however limited by sparsity. 2022.findings-emnlp.64 @@ -8996,7 +8996,7 @@ Subword-Delimited Downsampling for Better Character-Level Translation LukasEdmanUniversity of Groningen AntonioToralUniversity of Groningen - Gertjanvan NoordUniversity of Groningen + Gertjanvan NoordUniversity of Groningen 981-992 Subword-level models have been the dominant paradigm in NLP. However, character-level models have the benefit of seeing each character individually, providing the model with more detailed information that ultimately could lead to better models. Recent works have shown character-level models to be competitive with subword models, but costly in terms of time and computation. Character-level models with a downsampling component alleviate this, but at the cost of quality, particularly for machine translation. This work analyzes the problems of previous downsampling methods and introduces a novel downsampling method which is informed by subwords.This new downsampling method not only outperforms existing downsampling methods, showing that downsampling characters can be done without sacrificing quality, but also leads to promising performance compared to subword models for translation. 2022.findings-emnlp.69 @@ -9076,7 +9076,7 @@ HyesungJi DonghoonShin SeungryongKim - HeuiseokLim + HeuiseokLim 1053-1066 To build a conversational agent that interacts fluently with humans, previous studies blend knowledge or personal profile into the pre-trained language model. However, the model that considers knowledge and persona at the same time is still limited, leading to hallucination and a passive way of using personas. We propose an effective dialogue agent that grounds external knowledge and persona simultaneously. The agent selects the proper knowledge and persona to use for generating the answers with our candidate scoring implemented with a poly-encoder. Then, our model generates the utterance with lesser hallucination and more engagingness utilizing retrieval augmented generation with knowledge-persona enhanced query. We conduct experiments on the persona-knowledge chat and achieve state-of-the-art performance in grounding and generation tasks on the automatic metrics. Moreover, we validate the answers from the models regarding hallucination and engagingness through human evaluation and qualitative results. We show our retriever’s effectiveness in extracting relevant documents compared to the other previous retrievers, along with the comparison of multiple candidate scoring methods. Code is available at https://github.com/dlawjddn803/INFO 2022.findings-emnlp.75 @@ -9291,7 +9291,7 @@ HaneulYooKAIST JinYeongBakSungkyunkwan University KyunghyunChoNew York University - AliceOhKAIST + AliceOhKAIST 1260-1272 The Annals of Joseon Dynasty (AJD) contain the daily records of the Kings of Joseon, the 500-year kingdom preceding the modern nation of Korea.The Annals were originally written in an archaic Korean writing system, ‘Hanja’, and were translated into Korean from 1968 to 1993.The resulting translation was however too literal and contained many archaic Korean words; thus, a new expert translation effort began in 2012. Since then, the records of only one king have been completed in a decade.In parallel, expert translators are working on English translation, also at a slow pace and produced only one king’s records in English so far.Thus, we propose H2KE, a neural machine translation model, that translates historical documents in Hanja to more easily understandable Korean and to English.Built on top of multilingual neural machine translation, H2KE learns to translate a historical document written in Hanja, from both a full dataset of outdated Korean translation and a small dataset of more recently translated contemporary Korean and English.We compare our method against two baselines:a recent model that simultaneously learns to restore and translate Hanja historical documentand a Transformer based model trained only on newly translated corpora.The experiments reveal that our method significantly outperforms the baselines in terms of BLEU scores for both contemporary Korean and English translations.We further conduct extensive human evaluation which shows that our translation is preferred over the original expert translations by both experts and non-expert Korean speakers. 2022.findings-emnlp.91 @@ -9342,7 +9342,7 @@ Late Prompt Tuning: A Late Prompt Could Be Better Than Many Prompts XiangyangLiuFudan University TianxiangSunFudan University - XuanjingHuangFudan University + XuanjingHuangFudan University XipengQiuFudan University 1325-1338 Prompt tuning is a parameter-efficient tuning (PETuning) method for utilizing pre-trained models (PTMs) that simply prepends a soft prompt to the input and only optimizes the prompt to adapt PTMs to downstream tasks. Although it is parameter- and deployment-efficient, its performance still lags behind other state-of-the-art PETuning methods. Besides, the training cost of prompt tuning is not significantly reduced due to the back-propagation through the entire model. Through empirical analyses, we shed some light on the lagging performance of prompt tuning and recognize a trade-off between the propagation distance from label signals to the inserted prompt and the influence of the prompt on model outputs. Further, we present Late Prompt Tuning (LPT) that inserts a late prompt into an intermediate layer of the PTM instead of the input layer or all layers. The late prompt is obtained by a neural prompt generator conditioned on the hidden states before the prompt insertion layer and therefore is instance-dependent. Through extensive experimental results across various tasks and PTMs, we show that LPT can achieve competitive performance to full model tuning and other PETuning methods under both full-data and few-shot scenarios while possessing faster training speed and lower memory cost. @@ -9384,7 +9384,7 @@ NicoDaheimTU Darmstadt DavidThulkeRWTH Aachen University ChristianDugastRWTH, University of Aachen - HermannNeyRWTH Aachen University + HermannNeyRWTH Aachen University 1365-1381 In this work, we present a model for document-grounded response generation in dialog that is decomposed into two components according to Bayes’ theorem.One component is a traditional ungrounded response generation model and the other component models the reconstruction of the grounding document based on the dialog context and generated response.We propose different approximate decoding schemes and evaluate our approach on multiple open-domain and task-oriented document-grounded dialog datasets.Our experiments show that the model is more factual in terms of automatic factuality metrics than the baseline model.Furthermore, we outline how introducing scaling factors between the components allows for controlling the tradeoff between factuality and fluency in the model output.Finally, we compare our approach to a recently proposed method to control factuality in grounded dialog, CTRL (Rashkin et al., 2021), and show that both approaches can be combined to achieve additional improvements. 2022.findings-emnlp.98 @@ -9424,7 +9424,7 @@ DanielRotemHebrew University of Jerusalem JungoKasaiUniversity of Washington IvanMonteroPaul G. Allen School of Computer Science & Engineering, University of Washington - Noah A.SmithUniversity of Washington + Noah A.SmithUniversity of Washington RoySchwartzThe Hebrew University of Jerusalem 1403-1416 The attention mechanism is considered the backbone of the widely-used Transformer architecture. It contextualizes the input by computing input-specific attention matrices. We find that this mechanism, while powerful and elegant, is not as important as typically thought for pretrained language models. We introduce PAPA, a new probing method that replaces the input-dependent attention matrices with constant ones—the average attention weights over multiple inputs. We use PAPA to analyze several established pretrained Transformers on six downstream tasks. We find that without any input-dependent attention, all models achieve competitive performance—an average relative drop of only 8% from the probing baseline. Further, little or no performance drop is observed when replacing half of the input-dependent attention matrices with constant (input-independent) ones. Interestingly, we show that better-performing models lose more from applying our method than weaker models, suggesting that the utilization of the input-dependent attention mechanism might be a factor in their success. Our results motivate research on simpler alternatives to input-dependent attention, as well as on methods for better utilization of this mechanism in the Transformer architecture. @@ -9489,7 +9489,7 @@ BaohaoLiaoUniversity of Amsterdam DavidThulkeRWTH Aachen University SanjikaHewavitharanaeBay - HermannNeyRWTH Aachen University + HermannNeyRWTH Aachen University ChristofMonzUniversity of Amsterdam 1478-1492 The pre-training of masked language models (MLMs) consumes massive computation to achieve good results on downstream NLP tasks, resulting in a large carbon footprint. In the vanilla MLM, the virtual tokens, [MASK]s, act as placeholders and gather the contextualized information from unmasked tokens to restore the corrupted information. It raises the question of whether we can append [MASK]s at a later layer, to reduce the sequence length for earlier layers and make the pre-training more efficient. We show: (1) [MASK]s can indeed be appended at a later layer, being disentangled from the word embedding; (2) The gathering of contextualized information from unmasked tokens can be conducted with a few layers. By further increasing the masking rate from 15% to 50%, we can pre-train RoBERTa-base and RoBERTa-large from scratch with only 78% and 68% of the original computational budget without any degradation on the GLUE benchmark. When pre-training with the original budget, our method outperforms RoBERTa for 6 out of 8 GLUE tasks, on average by 0.4%. @@ -9515,7 +9515,7 @@ On the Effectiveness of Automated Metrics for Text Generation Systems Piusvon DänikenZurich University of Applied Sciences ZHAW - JanDeriuZurich University of Applied Sciences + JanDeriuZurich University of Applied Sciences DonTuggenerZurich University of Applied Sciences MarkCieliebakZurich University of Applied Sciences 1503-1522 @@ -9580,12 +9580,12 @@ Data Selection Curriculum for Neural Machine Translation - TasnimMohiuddinNanyang Technological University + TasnimMohiuddinNanyang Technological University PhilippKoehnJohns Hopkins University VishravChaudharyMicrosoft JamesCrossFacebook ShrutiBhosaleUniversity of Texas at Austin - ShafiqJotyNanyang Technological University; Salesforce AI Research + ShafiqJotyNanyang Technological University; Salesforce AI Research 1569-1582 Neural Machine Translation (NMT) models are typically trained on heterogeneous data that are concatenated and randomly shuffled. However, not all of the training data are equally useful to the model. Curriculum training aims to present the data to the NMT models in a meaningful order. In this work, we introduce a two-stage training framework for NMT where we fine-tune a base NMT model on subsets of data, selected by both deterministic scoring using pre-trained methods and online scoring that considers prediction scores of the emerging NMT model. Through comprehensive experiments on six language pairs comprising low- and high-resource languages from WMT’21, we have shown that our curriculum strategies consistently demonstrate better quality (up to +2.2 BLEU improvement) and faster convergence (approximately 50% fewer updates). 2022.findings-emnlp.113 @@ -9614,7 +9614,7 @@ AmithAnanthramColumbia University EmilyAllawayColumbia University HengJiUniversity of Illinois at Urbana-Champaign and Amazon (Amazon Scholar) - KathleenMcKeownColumbia University and Amazon (Amazon Scholar) + KathleenMcKeownColumbia University and Amazon (Amazon Scholar) 1595-1609 Practitioners from many disciplines (e.g., political science) use expert-crafted taxonomies to make sense of large, unlabeled corpora. In this work, we study Seeded Hierarchical Clustering (SHC): the task of automatically fitting unlabeled data to such taxonomies using a small set of labeled examples. We propose HierSeed, a novel weakly supervised algorithm for this task that uses only a small set of labeled seed examples in a computation and data efficient manner. HierSeed assigns documents to topics by weighing document density against topic hierarchical structure. It outperforms unsupervised and supervised baselines for the SHC task on three real-world datasets. 2022.findings-emnlp.115 @@ -9725,7 +9725,7 @@ Hyperdecoders: Instance-specific decoders for multi-task <fixed-case>NLP</fixed-case> HamishIvisonAllen Institute for AI - MatthewPetersAllen Institute for Artificial Intelligence + MatthewPetersAllen Institute for Artificial Intelligence 1715-1730 We investigate input-conditioned hypernetworks for multi-tasking in NLP, generating parameter-efficient adaptations for a decoder using a hypernetwork conditioned on the output of an encoder. This approach produces a unique decoder adaptation for every input instance, allowing the network a larger degree of flexibility than prior work that only produces one decoder adaptation per task. We apply our method to sequence classification tasks, extractive QA, and summarisation and find that it surpasses previous parameter efficient fine-tuning methods and often outperforms fully finetuning the underlying model. An analysis of the embeddings used by our hypernetwork shows that they are sensitive to output label and type, suggesting that our approach better maps from encoder representations to output labels. Our code is publicly available at https://github.com/allenai/hyperdecoders. 2022.findings-emnlp.124 @@ -9886,7 +9886,7 @@ JiannanXiangCarnegie Mellon University ZhengzhongLiuCarnegie Mellon University; Petuum INC. YuchengZhouUC San Diego - EricXingCarnegie Mellon University; MBZUAI; Petuum Inc. + EricXingCarnegie Mellon University; MBZUAI; Petuum Inc. ZhitingHuUC San Diego 1886-1899 Data-to-text generation is challenging due to the great variety of the input data in terms of domains (e.g., finance vs sports) or schemata (e.g., diverse predicates). Recent end-to-end neural methods thus require substantial training examples to learn to disambiguate and describe the data. Yet, real-world data-to-text problems often suffer from various data-scarce issues: one may have access to only a handful of or no training examples, and/or have to rely on examples in a different domain or schema. To fill this gap, we propose Any-Shot Data-to-Text (ASDOT), a new approach flexibly applicable to diverse settings by making efficient use of any given (or no) examples. ASDOT consists of two steps, data disambiguation and sentence fusion, both of which are amenable to be solved with off-the-shelf pretrained language models (LMs) with optional finetuning. In the data disambiguation stage, we employ the prompted GPT-3 model to understand possibly ambiguous triples from the input data and convert each into a short sentence with reduced ambiguity. The sentence fusion stage then uses an LM like T5 to fuse all the resulting sentences into a coherent paragraph as the final description. We evaluate extensively on various datasets in different scenarios, including the zero-/few-/full-shot settings, and generalization to unseen predicates and out-of-domain data. Experimental results show that ASDOT consistently achieves significant improvement over baselines, e.g., a 30.81 BLEU gain on the DART dataset under the zero-shot setting. @@ -9931,7 +9931,7 @@ MihirGoyalIndraprastha Institute of Information Technology Delhi RamitSawhneyGeorgia Institute of Technology RitikMathurIndian Institute of Technology Roorkee - JochenLeidnerCoburg University of Applied Sciences / University of Sheffield + JochenLeidnerCoburg University of Applied Sciences / University of Sheffield FranckDernoncourtAdobe Research DineshManochaUniversity of Maryland 1933-1940 @@ -9960,7 +9960,7 @@ Towards Faster Fine-tuning with Less Labels in Speech Processing HaoYangMonash University JinmingZhaoDept of Data Science and AI, Faculty of IT, Monash University - GholamrezaHaffariMonash University + GholamrezaHaffariMonash University EhsanShareghiMonash University 1952-1959 Pre-trained speech Transformers have facilitated great success across various speech processing tasks. However, fine-tuning these encoders for downstream tasks require sufficiently large training data to converge or to achieve state-of-the-art. In text domain this has been partly attributed to sub-optimality of the representation space in pre-trained Transformers. In this work, we take a sober look into pre-trained speech encoders and rewire their representation space without requiring any task-specific labels. Our method utilises neutrally synthesised version of audio inputs along with frame masking to construct positive pairs for contrastive self-supervised learning. When used for augmenting the wav2vec 2 encoder, we observe consistent improvement of isotropy in the representation space. Our experiments on 6 speech processing tasks, exhibit a significant convergence speedup during task fine-tuning as well as consistent task improvement, specially in low-resource settings. @@ -9975,7 +9975,7 @@ Towards Faster Fine-tuning with Less Labels in Speech Processing Faster and Smaller Speech Translation without Quality Compromise JinmingZhaoDept of Data Science and AI, Faculty of IT, Monash University HaoYangMonash University - GholamrezaHaffariMonash University + GholamrezaHaffariMonash University EhsanShareghiMonash University 1960-1967 Pre-trained speech Transformers in speech translation (ST) have facilitated state-of-the-art (SotA) results; yet, using such encoders is computationally expensive. To improve this, we present a novel Reducer Adaptor block, RedApt, that could be seamlessly integrated within any Transformer-based speech encoding architecture. Integrating the pretrained wav2vec 2 speech encoder with RedAptbrings 41% speedup, 33% memory reduction with 24% fewer FLOPs at inference. To our positive surprise, our ST model with RedApt outperforms the SotA architecture by an average of 0.68 BLEU score on 8 language pairs from Must-C. @@ -9998,7 +9998,7 @@ Faster and Smaller Speech Translation without Quality Compromise P<tex-math>\text{M}^2\text{F}^2</tex-math><fixed-case>N</fixed-case>: Patient Multi-view Multi-modal Feature Fusion Networks for Clinical Outcome Prediction - YingZhangNankai University + YingZhangNankai University BaohangZhouNankai University KehuiSongNankai University XuhuiSuiNankai University @@ -10101,7 +10101,7 @@ Faster and Smaller Speech Translation without Quality Compromise FirojAlamQatar Computing Research Institute, HBKU GiovanniDa San MartinoUniversity of Padova AishaMohamedUniversity of Wisconsin-Madison - PreslavNakovMohamed bin Zayed University of Artificial Intelligence + PreslavNakovMohamed bin Zayed University of Artificial Intelligence 2069-2080 Given the recent proliferation of false claims online, there has been a lot of manual fact-checking effort. As this is very time-consuming, human fact-checkers can benefit from tools that can support them and make them more efficient. Here, we focus on building a system that could provide such support. Given an input document, it aims to detect all sentences that contain a claim that can be verified by some previously fact-checked claims (from a given database). The output is a re-ranked list of the document sentences, so that those that can be verified are ranked as high as possible, together with corresponding evidence. Unlike previous work, which has looked into claim retrieval, here we take a document-level perspective. We create a new manually annotated dataset for the task, and we propose suitable evaluation measures. We further experiment with a learning-to-rank approach, achieving sizable performance gains over several strong baselines. Our analysis demonstrates the importance of modeling text similarity and stance, while also taking into account the veracity of the retrieved previously fact-checked claims. We believe that this research would be of interest to fact-checkers, journalists, media, and regulatory authorities. 2022.findings-emnlp.151 @@ -10319,7 +10319,7 @@ Faster and Smaller Speech Translation without Quality Compromise Alleviating Sparsity of Open Knowledge Graphs with Ternary Contrastive Learning QianLiNortheastern University and Nanyang Technological University - ShafiqJotyNanyang Technological University; Salesforce AI Research + ShafiqJotyNanyang Technological University; Salesforce AI Research DalingWang ShiFengNortheastern University YifeiZhangNortheastern University @@ -10335,7 +10335,7 @@ Faster and Smaller Speech Translation without Quality Compromise SheenaPanthaplackelThe University of Texas at Austin MilosGligoricThe University of Texas at Austin Junyi JessyLiUniversity of Texas at Austin - RaymondMooneyUniversity of Texas at Austin + RaymondMooneyUniversity of Texas at Austin 2292-2301 Automatically fixing software bugs is a challenging task. While recent work showed that natural language context is useful in guiding bug-fixing models, the approach required prompting developers to provide this context, which was simulated through commit messages written after the bug-fixing code changes were made. We instead propose using bug report discussions, which are available before the task is performed and are also naturally occurring, avoiding the need for any additional information from developers. For this, we augment standard bug-fixing datasets with bug report discussions. Using these newly compiled datasets, we demonstrate that various forms of natural language context derived from such discussions can aid bug-fixing, even leading to improved performance over using commit messages corresponding to the oracle bug-fixing commits. 2022.findings-emnlp.169 @@ -10447,7 +10447,7 @@ Faster and Smaller Speech Translation without Quality Compromise FeiZhaoNanjing University YuchenShenUESTC ZhenWuNanjing University - XinyuDaiNational Key Laboratory for Novel Software Technology, Nanjing University + XinyuDaiNational Key Laboratory for Novel Software Technology, Nanjing University 2390-2402 Multi-Label Few-Shot Aspect Category Detection (FS-ACD) is a new sub-task of aspect-based sentiment analysis, which aims to detect aspect categories accurately with limited training instances. Recently, dominant works use the prototypical network to accomplish this task, and employ the attention mechanism to extract keywords of aspect category from the sentences to produce the prototype for each aspect. However, they still suffer from serious noise problems: (1) due to lack of sufficient supervised data, the previous methods easily catch noisy words irrelevant to the current aspect category, which largely affects the quality of the generated prototype; (2) the semantically-close aspect categories usually generate similar prototypes, which are mutually noisy and confuse the classifier seriously. In this paper, we resort to the label information of each aspect to tackle the above problems, along with proposing a novel Label-Driven Denoising Framework (LDF). Extensive experimental results show that our framework achieves better performance than other state-of-the-art methods. 2022.findings-emnlp.177 @@ -10460,7 +10460,7 @@ Faster and Smaller Speech Translation without Quality Compromise YixingFanInstitute of Computing Technology, CAS. JiafengGuoInstitute of Computing Technology, CAS RuqingZhangCAS Key Lab of Network Data Science and Technology, Institute of Computing Technology, Chinese Academy of Sciences - XueqiChengInstitute of Computing Technology, CAS + XueqiChengInstitute of Computing Technology, CAS 2403-2415 Visual Entity Linking (VEL) is a task to link regions of images with their corresponding entities in Knowledge Bases (KBs), which is beneficial for many computer vision tasks such as image retrieval, image caption, and visual question answering. While existing tasks in VEL either rely on textual data to complement a multi-modal linking or only link objects with general entities, which fails to perform named entity linking on large amounts of image data. In this paper, we consider a purely Visual-based Named Entity Linking (VNEL) task, where the input only consists of an image. The task is to identify objects of interest (i.e., visual entity mentions) in images and link them to corresponding named entities in KBs. Since each entity often contains rich visual and textual information in KBs, we thus propose three different sub-tasks, i.e., visual to visual entity linking (V2VEL), visual to textual entity linking (V2TEL), and visual to visual-textual entity linking (V2VTEL). In addition, we present a high-quality human-annotated visual person linking dataset, named WIKIPerson. Based on WIKIPerson, we establish a series of baseline algorithms for the solution of each sub-task, and conduct experiments to verify the quality of the proposed datasets and the effectiveness of baseline methods. We envision this work to be helpful for soliciting more works regarding VNEL in the future. The codes and datasets are publicly available at https: //github.com/ict-bigdatalab/VNEL. 2022.findings-emnlp.178 @@ -10660,8 +10660,8 @@ Faster and Smaller Speech Translation without Quality Compromise Chia-HsuanLeeUniversity of Washington TianbaoXieHarbin Institute of Technology TaoYuUniversity of Washington - Noah A.SmithUniversity of Washington - MariOstendorfUniversity of Washington + Noah A.SmithUniversity of Washington + MariOstendorfUniversity of Washington 2627-2643 Collecting and annotating task-oriented dialogues is time-consuming and costly. Thus, zero and few shot learning for dialogue tasks presents an exciting opportunity. In this work, we propose an in-context (IC) learning framework for zero-shot and few-shot learning dialogue state tracking (DST), where a large pretrained language model (LM) takes a test instance and a few exemplars as input, and directly decodes the dialogue state without any parameter updates. This approach is more flexible and scalable than prior DST work when adapting to new domains and scenarios. To better leverage a tabular domain description in the LM prompt, we reformulate DST into a text-to-SQL problem. We also propose a novel approach to retrieve annotated dialogues as exemplars. Empirical results on MultiWOZ show that our method IC-DST substantially outperforms previous fine-tuned state-of-the-art models in few-shot settings. In addition, we test IC-DST in zero-shot settings, in which the model only takes a fixed task instruction as input, finding that it outperforms previous zero-shot methods by a large margin. 2022.findings-emnlp.193 @@ -10674,7 +10674,7 @@ Faster and Smaller Speech Translation without Quality Compromise AkshitaBhagiaAllen Institute for AI YonatanBiskCarnegie Mellon University FlorianMetzeCarnegie Mellon University - Alan WBlackCarnegie Mellon University + Alan WBlackCarnegie Mellon University AnaMarasovicUniversity of Utah 2644-2657 Combining the visual modality with pretrained language models has been surprisingly effective for simple descriptive tasks such as image captioning. More general text generation however remains elusive. We take a step back and ask: How do these models work for more complex generative tasks, i.e. conditioning on both text and images? Are multimodal models simply visually adapted language models, or do they combine they reason jointly over modalities?We investigate these questions in the context of self-rationalization (jointly generating task labels/answers and free-text explanations) of three tasks: (i) visual question answering in VQA-X, (ii) visual commonsense reasoning in VCR, and (iii) visual-textual entailment in E-SNLI-VE. We show that recent unimodal advances, CLIP image representations and scaling of language models, do not consistently improveself-rationalization in multimodal tasks. We find that no single model type works universally best across tasks, datasets, and finetuning data sizes. Our findings motivate the need for novel general backbones that move text generation from images and text beyond image captioning. @@ -10705,7 +10705,7 @@ Faster and Smaller Speech Translation without Quality Compromise MikeZhangIT University of Copenhagen Robvan der GootIT University of Copenhagen ChristianHardmeierIT University of Copenhagen/Uppsala University - BarbaraPlankLMU Munich + BarbaraPlankLMU Munich 2673-2692 The field of Deep Learning (DL) has undergone explosive growth during the last decade, with a substantial impact on Natural Language Processing (NLP) as well. Yet, compared to more established disciplines, a lack of common experimental standards remains an open challenge to the field at large. Starting from fundamental scientific principles, we distill ongoing discussions on experimental standards in NLP into a single, widely-applicable methodology. Following these best practices is crucial to strengthen experimental evidence, improve reproducibility and enable scientific progress. These standards are further collected in a public repository to help them transparently adapt to future needs. 2022.findings-emnlp.196 @@ -10788,7 +10788,7 @@ Faster and Smaller Speech Translation without Quality Compromise Probing for Incremental Parse States in Autoregressive Language Models TiwalayoEisapeMIT VineetGangireddyHarvard University - RogerLevyMIT + RogerLevyMIT YoonKimMIT 2801-2813 Next-word predictions from autoregressive neural language models show remarkable sensitivity to syntax. This work evaluates the extent to which this behavior arises as a result of a learned ability to maintain implicit representations of incremental syntactic structures. We extend work in syntactic probing to the incremental setting and present several probes for extracting incomplete syntactic structure (operationalized through parse states from a stack-based parser) from autoregressive language models. We find that our probes can be used to predict model preferences on ambiguous sentence prefixes and causally intervene on model representations and steer model behavior. This suggests implicit incremental syntactic inferences underlie next-word predictions in autoregressive neural language models. @@ -10836,8 +10836,8 @@ Faster and Smaller Speech Translation without Quality Compromise <fixed-case>MANT</fixed-case>a: Efficient Gradient-Based Tokenization for End-to-End Robust Language Modeling NathanGodeyInria / ALMAnaCH RomanCastagnéInria - Éricde la ClergerieINRIA - BenoîtSagotInria + Éricde la ClergerieINRIA + BenoîtSagotInria 2859-2870 Static subword tokenization algorithms have been an essential component of recent works on language modeling. However, their static nature results in important flaws that degrade the models’ downstream performance and robustness. In this work, we propose MANTa, a Module for Adaptive Neural TokenizAtion. MANTa is a differentiable tokenizer trained end-to-end with the language model. The resulting system offers a trade-off between the expressiveness of byte-level models and the speed of models trained using subword tokenization. In addition, our tokenizer is highly explainable since it produces an explicit segmentation of sequences into blocks. We evaluate our pre-trained model on several English datasets from different domains as well as on synthetic noise. We find that MANTa improves robustness to character perturbations and out-of-domain data. We then show that MANTa performs comparably to other models on the general-domain GLUE benchmark. Finally, we show that it is considerably faster than strictly byte-level models. 2022.findings-emnlp.207 @@ -10897,7 +10897,7 @@ Faster and Smaller Speech Translation without Quality Compromise JohnJudgeUniversity of California, Santa Barbara DesmondPattonColumbia University BruceBimberUniversity of California, Santa Barbara - KathleenMcKeownColumbia University and Amazon (Amazon Scholar) + KathleenMcKeownColumbia University and Amazon (Amazon Scholar) William YangWangUnversity of California, Santa Barbara 2914-2926 An increasingly prevalent problem for intelligent technologies is text safety, as uncontrolled systems may generate recommendations to their users that lead to injury or life-threatening consequences. However, the degree of explicitness of a generated statement that can cause physical harm varies. In this paper, we distinguish types of text that can lead to physical harm and establish one particularly underexplored category: covertly unsafe text. Then, we further break down this category with respect to the system’s information and discuss solutions to mitigate the generation of text in each of these subcategories. Ultimately, our work defines the problem of covertly unsafe language that causes physical harm and argues that this subtle yet dangerous issue needs to be prioritized by stakeholders and regulators. We highlight mitigation strategies to inspire future researchers to tackle this challenging problem and help improve safety within smart systems. @@ -10913,7 +10913,7 @@ Faster and Smaller Speech Translation without Quality Compromise ZezhongWangThe Chinese University of Hong Kong BinyangLiUniversity of International Relations FangchunYangBeijing University of Posts and Telecommunications - Kam-FaiWongDepartment of Systems Engineering and Engineering Management, The Chinese University of Hong Kong, Hong Kong + Kam-FaiWongDepartment of Systems Engineering and Engineering Management, The Chinese University of Hong Kong, Hong Kong 2927-2932 Humor plays an important role in our daily life, as it is an essential and fascinating element in the communication between persons. Therefore, how to recognize punchlines from the dialogue, i.e. conversational humor recognition, has attracted much interest of computational linguistics communities. However, most existing work attempted to understand the conversational humor by analyzing the contextual information of the dialogue, but neglected the character of the interlocutor, such as age, gender, occupation, and so on. For instance, the same utterance could bring out humorous from a serious person, but may be a plain expression from a naive person. To this end, this paper proposes a Character Fusion Conversational Humor Recognition model (CFCHR) to explore character information to recognize conversational humor. CFCHR utilizes a multi-task learning framework that unifies two highly pertinent tasks, i.e., character extraction and punchline identification. Based on deep neural networks, we trained both tasks jointly by sharing weight to extract the common and task-invariant features while each task could still learn its task-specific features. Experiments were conducted on Chinese sitcoms corpus, which consisted of 12,677 utterances from 22 characters. The experimental results demonstrated that CFCHR could achieve 33.08% improvements in terms of F1-score over some strong baselines, and proved the effectiveness of the character information to identify the punchlines. 2022.findings-emnlp.212 @@ -10964,7 +10964,7 @@ Faster and Smaller Speech Translation without Quality Compromise TanayDixitIndian Institute of Technology Madras BhargaviParanjapeUniversity of Washington HannanehHajishirziUniversity of Washington - LukeZettlemoyerUniversity of Washington; Meta + LukeZettlemoyerUniversity of Washington; Meta 2964-2984 Counterfactual data augmentation (CDA) – i.e., adding minimally perturbed inputs during training – helps reduce model reliance on spurious correlations and improves generalization to out-of-distribution (OOD) data. Prior work on generating counterfactuals only considered restricted classes of perturbations, limiting their effectiveness. We present Counterfactual Generation via Retrieval and Editing (CORE), a retrieval-augmented generation framework for creating diverse counterfactual perturbations for CDA. For each training example, CORE first performs a dense retrieval over a task-related unlabeled text corpus using a learned bi-encoder and extracts relevant counterfactual excerpts. CORE then incorporates these into prompts to a large language model with few-shot learning capabilities, for counterfactual editing. Conditioning language model edits on naturally occurring data results in more diverse perturbations. Experiments on natural language inference and sentiment analysis benchmarks show that CORE counterfactuals are more effective at improving generalization to OOD data compared to other DA approaches. We also show that the CORE retrieval framework can be used to encourage diversity in manually authored perturbations. 2022.findings-emnlp.216 @@ -11083,7 +11083,7 @@ Faster and Smaller Speech Translation without Quality Compromise Learning When and What to Quote: A Quotation Recommender System with Mutual Promotion of Recommendation and Generation LingzhiWangThe Chinese University of Hong Kong XingshanZengHuawei Noah’s Ark Lab - Kam-FaiWongDepartment of Systems Engineering and Engineering Management, The Chinese University of Hong Kong, Hong Kong + Kam-FaiWongDepartment of Systems Engineering and Engineering Management, The Chinese University of Hong Kong, Hong Kong 3094-3105 This work extends the current quotation recommendation task to a more realistic quotation recommender system that learns to predict when to quote and what to quote jointly. The system consists of three modules (tasks), a prediction module to predict whether to quote given conversation contexts, a recommendation module to recommend suitable quotations and a generation module generating quotations or sentences in ordinary language to continue the conversation. We benchmark several competitive models for the two newly introduced tasks (i.e., when-to-quote and what-to-continue). For quotation recommendation, compared with previous work that is either generation-based or ranking-based recommendation, we propose a novel framework with mutual promotion of generation module and ranking-based recommendation module. Experiments show that our framework achieves significantly better performance than baselines on two datasets. Further experiments and analyses validate the effectiveness of the proposed mechanisms and get a better understanding of the quotation recommendation task. 2022.findings-emnlp.225 @@ -11106,7 +11106,7 @@ Faster and Smaller Speech Translation without Quality Compromise Gender Bias in Meta-Embeddings MasahiroKanekoTokyo Institute of Technology DanushkaBollegalaUniversity of Liverpool/Amazon - NaoakiOkazakiTokyo Institute of Technology + NaoakiOkazakiTokyo Institute of Technology 3118-3133 Different methods have been proposed to develop meta-embeddings from a given set of source embeddings. However, the source embeddings can contain unfair gender-related biases, and how these influence the meta-embeddings has not been studied yet.We study the gender bias in meta-embeddings created under three different settings:(1) meta-embedding multiple sources without performing any debiasing (Multi-Source No-Debiasing),(2) meta-embedding multiple sources debiased by a single method (Multi-Source Single-Debiasing), and(3) meta-embedding a single source debiased by different methods (Single-Source Multi-Debiasing).Our experimental results show that meta-embedding amplifies the gender biases compared to input source embeddings.We find that debiasing not only the sources but also their meta-embedding is needed to mitigate those biases.Moreover, we propose a novel debiasing method based on meta-embedding learning where we use multiple debiasing methods on a single source embedding and then create a single unbiased meta-embedding. 2022.findings-emnlp.227 @@ -11212,7 +11212,7 @@ Faster and Smaller Speech Translation without Quality Compromise Low-resource Interactive Active Labeling for Fine-tuning Language Models SeijiMaekawaOsaka University - DanZhangMegagon Labs + DanZhangMegagon Labs HannahKimMegagon Labs SajjadurRahmanMegagon Labs EstevamHruschkaMegagon Labs - https://megagon.ai/ @@ -11266,7 +11266,7 @@ Faster and Smaller Speech Translation without Quality Compromise Mix-and-Match: Scalable Dialog Response Retrieval using <fixed-case>G</fixed-case>aussian Mixture Embeddings GauravPandeyIBM Research DanishContractorIBM Research & Indian Institute of Technology, New Delhi - SachindraJoshiIBM + SachindraJoshiIBM 3273-3287 Embedding-based approaches for dialog response retrieval embed the context-response pairs as points in the embedding space. These approaches are scalable, but fail to account for the complex, many-to-many relationships that exist between context-response pairs. On the other end of the spectrum, there are approaches that feed the context-response pairs jointly through multiple layers of neural networks. These approaches can model the complex relationships between context-response pairs, but fail to scale when the set of responses is moderately large (>1000). In this paper, we propose a scalable model that can learn complex relationships between context-response pairs. Specifically, the model maps the contexts as well as responses to probability distributions over the embedding space. We train the models by optimizing the Kullback-Leibler divergence between the distributions induced by context-response pairs in the training data. We show that the resultant model achieves better performance as compared to other embedding-based approaches on publicly available conversation data. 2022.findings-emnlp.239 @@ -11409,7 +11409,7 @@ Faster and Smaller Speech Translation without Quality Compromise Truncation Sampling as Language Model Desmoothing JohnHewittStanford University - ChristopherManningStanford University + ChristopherManningStanford University PercyLiangStanford University 3414-3427 Long samples of text from neural language models can be of poor quality. Truncation sampling algorithms–like top-p or top-k—address this by setting some words’ probabilities to zero at each step. This work investigates why these methods are important, and how to improve them. We propose thinking of a neural language model as a mixture of a true distribution and a smoothing distribution that avoids infinite perplexity. In this light, truncation algorithms aim to perform desmoothing, estimating a subset of the support of the true distribution. Finding a good subset is crucial: we show that top-p unnecessarily truncates high-probability words, for example causing it to truncate all words but Trump for a document that starts with Donald. We introduce eta-sampling, which truncates words below an entropy-dependent probability threshold. Compared to previous algorithms, our eta-sampling generates more plausible long documents according to humans, is better at breaking out of repetition, and behaves more reasonably on a battery of test distributions. @@ -11451,7 +11451,7 @@ Faster and Smaller Speech Translation without Quality Compromise Simple but Challenging: Natural Language Inference Models Fail on Simple Sentences ChengLuoZhejiang Lab - WeiLiuZhejiang university + WeiLiuZhejiang university JieyuLinZhejiang University JiajieZouZhejiang University MingXiangthe University of Chicago @@ -11514,7 +11514,7 @@ Faster and Smaller Speech Translation without Quality Compromise RongZhangAlibaba Group HuiXuealibaba DonghongSunChina - ChaoZhangTsinghua University + ChaoZhangTsinghua University 3502-3516 Despite of the superb performance on a wide range of tasks, pre-trained language models (e.g., BERT) have been proved vulnerable to adversarial texts. In this paper, we present RoChBERT, a framework to build more Robust BERT-based models by utilizing a more comprehensive adversarial graph to fuse Chinese phonetic and glyph features into pre-trained representations during fine-tuning. Inspired by curriculum learning, we further propose to augment the training dataset with adversarial texts in combination with intermediate samples. Extensive experiments demonstrate that RoChBERT outperforms previous methods in significant ways: (i) robust – RoChBERT greatly improves the model robustness without sacrificing accuracy on benign texts. Specifically, the defense lowers the success rates of unlimited and limited attacks by 59.43% and 39.33% respectively, while remaining accuracy of 93.30%; (ii) flexible – RoChBERT can easily extend to various language models to solve different downstream tasks with excellent performance; and (iii) efficient – RoChBERT can be directly applied to the fine-tuning stage without pre-training language model from scratch, and the proposed data augmentation method is also low-cost. 2022.findings-emnlp.256 @@ -11595,7 +11595,7 @@ Faster and Smaller Speech Translation without Quality Compromise MinlieHuangTsinghua University XinJiangHuawei Noah’s Ark Lab QunLiuHuawei Noah’s Ark Lab - HelenMengThe Chinese University of Hong Kong + HelenMengThe Chinese University of Hong Kong 3576-3591 Among all the safety concerns that hinder the deployment of open-domain dialog systems (e.g., offensive languages, biases, and toxic behaviors), social bias presents an insidious challenge. Addressing this challenge requires rigorous analyses and normative reasoning. In this paper, we focus our investigation on social bias measurement to facilitate the development of unbiased dialog systems. We first propose a novel Dial-Bias Framework for analyzing the social bias in conversations using a holistic method beyond bias lexicons or dichotomous annotations. Leveraging the proposed framework, we further introduce the CDial-Bias Dataset which is, to the best of our knowledge, the first annotated Chinese social bias dialog dataset. We also establish a fine-grained dialog bias measurement benchmark and conduct in-depth ablation studies to shed light on the utility of the detailed annotations in the proposed dataset. Finally, we evaluate representative Chinese generative models with our classifiers to unveil the presence of social bias in these systems. 2022.findings-emnlp.262 @@ -11606,7 +11606,7 @@ Faster and Smaller Speech Translation without Quality Compromise <fixed-case>C</fixed-case>ross<fixed-case>RE</fixed-case>: A Cross-Domain Dataset for Relation Extraction ElisaBassignanaIT University of Copenhagen - BarbaraPlankLMU Munich + BarbaraPlankLMU Munich 3592-3604 Relation Extraction (RE) has attracted increasing attention, but current RE evaluation is limited to in-domain evaluation setups. Little is known on how well a RE system fares in challenging, but realistic out-of-distribution evaluation setups. To address this gap, we propose CrossRE, a new, freely-available cross-domain benchmark for RE, which comprises six distinct text domains and includes multi-label annotations. An additional innovation is that we release meta-data collected during annotation, to include explanations and flags of difficult instances. We provide an empirical evaluation with a state-of-the-art model for relation classification. As the meta-data enables us to shed new light on the state-of-the-art model, we provide a comprehensive analysis on the impact of difficult cases and find correlations between model and human annotations. Overall, our empirical investigation highlights the difficulty of cross-domain RE. We release our dataset, to spur more research in this direction. 2022.findings-emnlp.263 @@ -11672,8 +11672,8 @@ Faster and Smaller Speech Translation without Quality Compromise AitorOrmazabalUniversity of the Basque Country MikelArtetxeMeta AI ManexAgirrezabalUniversity of Copenhagen - AitorSoroaHiTZ Center - Ixa, University of the Basque Country UPV/EHU - EnekoAgirreUniversity of the Basque Country (UPV/EHU) + AitorSoroaHiTZ Center - Ixa, University of the Basque Country UPV/EHU + EnekoAgirreUniversity of the Basque Country (UPV/EHU) 3655-3670 Formal verse poetry imposes strict constraints on the meter and rhyme scheme of poems. Most prior work on generating this type of poetry uses existing poems for supervision, which are difficult to obtain for most languages and poetic forms. In this work, we propose an unsupervised approach to generate poems that follow any given meter and rhyme scheme, without requiring any poetic text for training. Our method works by splitting a regular, non-poetic corpus into phrases, prepending control codes that describe the length and end rhyme of each phrase, and training a transformer language model in the augmented corpus. The transformer learns to link the structure descriptor with the control codes to the number of lines, their length and their end rhyme. During inference, we build control codes for the desired meter and rhyme scheme, and condition our language model on them to generate formal verse poetry. Experiments in Spanish and Basque show that our approach is able to generate valid poems, which are often comparable in quality to those written by humans. 2022.findings-emnlp.268 @@ -11880,7 +11880,7 @@ Faster and Smaller Speech Translation without Quality Compromise Prompt-based Connective Prediction Method for Fine-grained Implicit Discourse Relation Recognition HaoZhouEast China Normal University - ManLanEast China Normal University + ManLanEast China Normal University YuanbinWuEast China Normal University YuefengChenTranssion MeirongMaTranssion @@ -11915,14 +11915,14 @@ Faster and Smaller Speech Translation without Quality Compromise CezarPendusIBM SaswatiDanaIBM Research DineshGargIBM Research AI - AchilleFokoueIBM Research + AchilleFokoueIBM Research G P ShrivatsaBhargavIBM DineshKhandelwalIBM Research SrinivasRavishankarIBM Research SairamGurajadaIBM Research - Almaden MariaChangIBM Research AI RosarioUceda-SosaIBM Research - SalimRoukosIBM Research AI + SalimRoukosIBM Research AI AlexanderGrayIBM Research GuilhermeLimaIBM Research Brazil RyanRiegelIBM @@ -11953,10 +11953,10 @@ Faster and Smaller Speech Translation without Quality Compromise <fixed-case>M</fixed-case>3: Multi-level dataset for Multi-document summarisation of Medical studies - YuliaOtmakhovaUniversity of Melbourne - KarinVerspoorRMIT University - TimothyBaldwinThe University of Melbourne - AntonioJimeno YepesRMIT University + YuliaOtmakhovaUniversity of Melbourne + KarinVerspoorRMIT University + TimothyBaldwinThe University of Melbourne + AntonioJimeno YepesRMIT University Jey HanLauThe University of Melbourne 3887-3901 We present M3 (Multi-level dataset for Multi-document summarisation of Medical studies), a benchmark dataset for evaluating the quality of summarisation systems in the biomedical domain. The dataset contains sets of multiple input documents and target summaries of three levels of complexity: documents, sentences, and propositions. The dataset also includes several levels of annotation, including biomedical entities, direction, and strength of relations between them, and the discourse relationships between the input documents (“contradiction” or “agreement”). We showcase usage scenarios of the dataset by testing 10 generic and domain-specific summarisation models in a zero-shot setting, and introduce a probing task based on counterfactuals to test if models are aware of the direction and strength of the conclusions generated from input studies. @@ -11993,7 +11993,7 @@ Faster and Smaller Speech Translation without Quality Compromise Probing Relational Knowledge in Language Models via Word Analogies KiamehrRezaeeCardiff University - JoseCamacho-ColladosCardiff University + JoseCamacho-ColladosCardiff University 3930-3936 Understanding relational knowledge plays an integral part in natural language comprehension. When it comes to pre-trained language models (PLM), prior work has been focusing on probing relational knowledge this by filling the blanks in pre-defined prompts such as “The capital of France is —". However, these probes may be affected by the co-occurrence of target relation words and entities (e.g. “capital”, “France” and “Paris”) in the pre-training corpus. In this work, we extend these probing methodologies leveraging analogical proportions as a proxy to probe relational knowledge in transformer-based PLMs without directly presenting the desired relation. In particular, we analysed the ability of PLMs to understand (1) the directionality of a given relation (e.g. Paris-France is not the same as France-Paris); (2) the ability to distinguish types on a given relation (both France and Japan are countries); and (3) the relation itself (Paris is the capital of France, but not Rome). Our results show how PLMs are extremely accurate at (1) and (2), but have clear room for improvement for (3). To better understand the reasons behind this behaviour and mistakes made by PLMs, we provide an extended quantitative analysis based on relevant factors such as frequency. 2022.findings-emnlp.289 @@ -12051,7 +12051,7 @@ Faster and Smaller Speech Translation without Quality Compromise MikelArtetxeMeta AI JingfeiDuFacebook NamanGoyalFacebook - LukeZettlemoyerUniversity of Washington; Meta + LukeZettlemoyerUniversity of Washington; Meta VeselinStoyanovFacebook 3973-3985 Prior work on language model pre-training has explored different architectures and learning objectives, but differences in data, hyperparameters and evaluation make a principled comparison difficult. In this work, we focus on bidirectionality as a key factor that differentiates existing approaches, and present a comprehensive study of its role in next token prediction, text infilling, zero-shot priming and fine-tuning. We propose a new framework that generalizes prior approaches, including fully unidirectional models like GPT, fully bidirectional models like BERT, and hybrid models like CM3 and prefix LM. Our framework distinguishes between two notions of bidirectionality (bidirectional context and bidirectional attention) and allows us to control each of them separately. We find that the optimal configuration is largely application-dependent (e.g., bidirectional attention is beneficial for fine-tuning and infilling, but harmful for next token prediction and zero-shot priming). We train models with up to 6.7B parameters, and find differences to remain consistent at scale. While prior work on scaling has focused on left-to-right autoregressive models, our results suggest that this approach comes with some trade-offs, and it might be worthwhile to develop very large bidirectional models. @@ -12091,8 +12091,8 @@ Faster and Smaller Speech Translation without Quality Compromise Han-ChinShingAmazon QingSunAmazon ChristopherWinestockAmazon - KathleenMcKeownColumbia University and Amazon (Amazon Scholar) - NoémieElhadadColumbia University + KathleenMcKeownColumbia University and Amazon (Amazon Scholar) + NoémieElhadadColumbia University 4009-4027 In real-world scenarios with naturally occurring datasets, reference summaries are noisy and may contain information that cannot be inferred from the source text. On large news corpora, removing low quality samples has been shown to reduce model hallucinations. Yet, for smaller, and/or noisier corpora, filtering is detrimental to performance. To improve reference quality while retaining all data, we propose a new approach: to selectively re-write unsupported reference sentences to better reflect source data. We automatically generate a synthetic dataset of positive and negative revisions by corrupting supported sentences and learn to revise reference sentences with contrastive learning. The intensity of revisions is treated as a controllable attribute so that, at inference, diverse candidates can be over-generated-then-rescored to balance faithfulness and abstraction. To test our methods, we extract noisy references from publicly available MIMIC-III discharge summaries for the task of hospital-course summarization, and vary the data on which models are trained. According to metrics and human evaluation, models trained on revised clinical references are much more faithful, informative, and fluent than models trained on original or filtered data. 2022.findings-emnlp.296 @@ -12113,7 +12113,7 @@ Faster and Smaller Speech Translation without Quality Compromise On the Impact of Temporal Concept Drift on Model Explanations ZhixueZhaoUniversity of Sheffield GeorgeChrysostomouThe University of Sheffield - KalinaBontchevaUniversity of Sheffield + KalinaBontchevaUniversity of Sheffield NikolaosAletrasUniversity of Sheffield 4039-4054 Explanation faithfulness of model predictions in natural language processing is typically evaluated on held-out data from the same temporal distribution as the training data (i.e. synchronous settings). While model performance often deteriorates due to temporal variation (i.e. temporal concept drift), it is currently unknown how explanation faithfulness is impacted when the time span of the target data is different from the data used to train the model (i.e. asynchronous settings). For this purpose, we examine the impact of temporal variation on model explanations extracted by eight feature attribution methods and three select-then-predict models across six text classification tasks. Our experiments show that (i) faithfulness is not consistent under temporal variations across feature attribution methods (e.g. it decreases or increases depending on the method), with an attention-based method demonstrating the most robust faithfulness scores across datasets; and (ii) select-then-predict models are mostly robust in asynchronous settings with only small degradation in predictive performance. Finally, feature attribution methods show conflicting behavior when used in FRESH (i.e. a select-and-predict model) and for measuring sufficiency/comprehensiveness (i.e. as post-hoc methods), suggesting that we need more robust metrics to evaluate post-hoc explanation faithfulness. Code will be made publicly available. @@ -12153,7 +12153,7 @@ Faster and Smaller Speech Translation without Quality Compromise ChadiHelweTelecom Paris, Institut Polytechnique de Paris SimonCoumesÉcole Normale Supérieure de Rennes ChloéClavelLTCI, Telecom-Paris, Institut Polytechnique de Paris - FabianSuchanekTelecom Paris, Institut Polytechnique de Paris + FabianSuchanekTelecom Paris, Institut Polytechnique de Paris 4086-4099 Transformer-based language models achieve state-of-the-art results on several natural language processing tasks. One of these is textual entailment, i.e., the task of determining whether a premise logically entails a hypothesis. However, the models perform poorly on this task when the examples contain negations. In this paper, we propose a new definition of textual entailment that captures also negation. This allows us to develop TINA (Textual Inference with Negation Augmentation), a principled technique for negated data augmentation that can be combined with the unlikelihood loss function.Our experiments with different transformer-based models show that our method can significantly improve the performance of the models on textual entailment datasets with negation – without sacrificing performance on datasets without negation. 2022.findings-emnlp.301 @@ -12336,7 +12336,7 @@ Faster and Smaller Speech Translation without Quality Compromise m<tex-math>^4</tex-math> Adapter: Multilingual Multi-Domain Adaptation for Machine Translation with a Meta-Adapter WenLaiLudwig-Maximilians-Universität München AlexandraChronopoulouLMU Munich - AlexanderFraserLudwig-Maximilians-Universität München + AlexanderFraserLudwig-Maximilians-Universität München 4282-4296 Multilingual neural machine translation models (MNMT) yield state-of-the-art performance when evaluated on data from a domain and language pair seen at training time. However, when a MNMT model is used to translate under domain shift or to a new language pair, performance drops dramatically. We consider a very challenging scenario: adapting the MNMT model both to a new domain and to a new language pair at the same time. In this paper, we propose m^4Adapter (Multilingual Multi-Domain Adaptation for Machine Translation with a Meta-Adapter), which combines domain and language knowledge using meta-learning with adapters. We present results showing that our approach is a parameter-efficient solution which effectively adapts a model to both a new language pair and a new domain, while outperforming other adapter methods. An ablation study also shows that our approach more effectively transfers domain knowledge across different languages and language information across different domains. 2022.findings-emnlp.315 @@ -12363,8 +12363,8 @@ Faster and Smaller Speech Translation without Quality Compromise VassilinaNikoulinaNaver Labs Europe AlexandreBerardNaver Labs Europe CarolineBrunNaver Labs Europe - JamesHendersonIdiap Research Institute - LaurentBesacierNaver Labs Europe + JamesHendersonIdiap Research Institute + LaurentBesacierNaver Labs Europe 4308-4329 Recently, very large pre-trained models achieve state-of-the-art results in various natural language processing (NLP) tasks, but their size makes it more challenging to apply them in resource-constrained environments. Compression techniques allow to drastically reduce the size of the models and therefore their inference time with negligible impact on top-tier metrics. However, the general performance averaged across multiple tasks and/or languages may hide a drastic performance drop on under-represented features, which could result in the amplification of biases encoded by the models. In this work, we assess the impact of compression methods on Multilingual Neural Machine Translation models (MNMT) for various language groups, gender, and semantic biases by extensive analysis of compressed models on different machine translation benchmarks, i.e. FLORES-101, MT-Gender, and DiBiMT. We show that the performance of under-represented languages drops significantly, while the average BLEU metric only slightly decreases. Interestingly, the removal of noisy memorization with compression leads to a significant improvement for some medium-resource languages. Finally, we demonstrate that compression amplifies intrinsic gender and semantic biases, even in high-resource languages. 2022.findings-emnlp.317 @@ -12473,7 +12473,7 @@ Faster and Smaller Speech Translation without Quality Compromise ZetianWuOregon State University RoshanSanthoshUniversity of Pennsylvania TejasSrivastavaUniversity of Pennsylvania - LyleUngarUniversity of Pennsylvania + LyleUngarUniversity of Pennsylvania JoãoSedocNew York University 4430-4448 Lexica – words and associated scores – are widely used as simple, interpretable, generalizable language features to predict sentiment, emotions, mental health, and personality. They also provide insight into the psychological features behind those moods and traits. Such lexica, historically created by human experts, are valuable to linguists, psychologists, and social scientists, but they take years of refinement and have limited coverage. In this paper, we investigate how the lexica that provide psycholinguistic insights could be computationally induced and how they should be assessed. We identify generalizability and interpretability as two essential properties of such lexica. We induce lexica using both context-oblivious and context-aware approaches, compare their predictive performance both within the training corpus and across various corpora, and evaluate their quality using crowd-worker assessment. We find that lexica induced from context-oblivious models are more generalizable and interpretable than those from more accurate context-aware transformer models. In addition, lexicon scores can identify explanatory words more reliably than a high performing transformer with feature-importance measures like SHAP. @@ -12513,7 +12513,7 @@ Faster and Smaller Speech Translation without Quality Compromise Leveraging Data Recasting to Enhance Tabular Reasoning AashnaJenaIIIT Hyderabad VivekGuptaSchool of Computing, University of Utah - ManishShrivastavaInternational Institute of Information Technology Hyderabad + ManishShrivastavaInternational Institute of Information Technology Hyderabad JulianEisenschlosGoogle 4483-4496 Creating challenging tabular inference data is essential for learning complex reasoning. Prior work has mostly relied on two data generation strategies. The first is human annotation, which yields linguistically diverse data but is difficult to scale. The second category for creation is synthetic generation, which is scalable and cost effective but lacks inventiveness. In this research, we present a framework for semi-automatically recasting existing tabular data to make use of the benefits of both approaches. We utilize our framework to build tabular NLI instances from five datasets that were initially intended for tasks like table2text creation, tabular Q/A, and semantic parsing. We demonstrate that recasted data could be used as evaluation benchmarks as well as augmentation data to enhance performance on tabular NLI tasks. Furthermore, we investigate the effectiveness of models trained on recasted data in the zero-shot scenario, and analyse trends in performance across different recasted datasets types. @@ -12592,7 +12592,7 @@ Faster and Smaller Speech Translation without Quality Compromise JinshanZengJiangxi Normal University YudongXieJiangxi Normal University XianglongYuJiangxi Normal University - JohnLeeDepartment of Linguistics and Translation, City University of Hong Kong + JohnLeeDepartment of Linguistics and Translation, City University of Hong Kong Ding-XuanZhouSchool of Data Science and Department of Mathematics, City University of Hong Kong 4557-4568 The readability assessment task aims to assign a difficulty grade to a text. While neural models have recently demonstrated impressive performance, most do not exploit the ordinal nature of the difficulty grades, and make little effort for model initialization to facilitate fine-tuning. We address these limitations with soft labels for ordinal regression, and with model pre-training through prediction of pairwise relative text difficulty. We incorporate these two components into a model based on hierarchical attention networks, and evaluate its performance on both English and Chinese datasets. Experimental results show that our proposed model outperforms competitive neural models and statistical classifiers on most datasets. @@ -12700,7 +12700,7 @@ Faster and Smaller Speech Translation without Quality Compromise MerveÜnlü MenevşeBoğaziçi University YusufcanManavBogazici University EbruArisoyMEF University - ArzucanÖzgürBogazici University + ArzucanÖzgürBogazici University 4659-4666 This paper describes a framework to automatically generate a spoken question answering (QA) dataset. The framework consists of a question generation (QG) module to generate questions automatically from given text documents, a text-to-speech (TTS) module to convert the text documents into spoken form and an automatic speech recognition (ASR) module to transcribe the spoken content. The final dataset contains question-answer pairs for both the reference text and ASR transcriptions as well as the audio files corresponding to each reference text. For QG and ASR systems we used pre-trained multilingual encoder-decoder transformer models and fine-tuned these models using a limited amount of manually generated QA data and TTS-based speech data, respectively. As a proof of concept, we investigated the proposed framework for Turkish and generated the Turkish Question Answering (TurQuAse) dataset using Wikipedia articles. Manual evaluation of the automatically generated question- answer pairs and QA performance evaluation with state of-the-art models on TurQuAse show that the proposed framework is efficient for automatically generating spoken QA datasets. To the best of our knowledge, TurQuAse is the first publicly available spoken question answering dataset for Turkish. The proposed framework can be easily extended to other languages where a limited amount of QA data is available. 2022.findings-emnlp.342 @@ -12761,7 +12761,7 @@ Faster and Smaller Speech Translation without Quality Compromise BaileyKuehlAllen Institute for AI ArmanCohanAllen Institute for AI IzBeltagyAllen Institute for AI (AI2) - Lucy LuWangAllen Institute for AI + Lucy LuWangAllen Institute for AI HannanehHajishirziUniversity of Washington 4719-4734 While research on scientific claim verification has led to the development of powerful systems that appear to approach human performance, these approaches have yet to be tested in a realistic setting against large corpora of scientific literature. Moving to this open-domain evaluation setting, however, poses unique challenges; in particular, it is infeasible to exhaustively annotate all evidence documents. In this work, we present SciFact-Open, a new test collection designed to evaluate the performance of scientific claim verification systems on a corpus of 500K research abstracts. Drawing upon pooling techniques from information retrieval, we collect evidence for scientific claims by pooling and annotating the top predictions of four state-of-the-art scientific claim verification models. We find that systems developed on smaller corpora struggle to generalize to SciFact-Open, exhibiting performance drops of at least 15 F1. In addition, analysis of the evidence in SciFact-Open reveals interesting phenomena likely to appear when claim verification systems are deployed in practice, e.g., cases where the evidence supports only a special case of the claim. Our dataset is available at https://github.com/dwadden/scifact-open. @@ -12816,7 +12816,7 @@ Faster and Smaller Speech Translation without Quality Compromise JohnMorrisCornell Tech JustinChiuCornell Tech RaminZabihCornell Tech - AlexanderRushCornell University + AlexanderRushCornell University 4777-4788 Deidentification seeks to anonymize textual data prior to distribution. Automatic deidentification primarily uses supervised named entity recognition from human-labeled data points. We propose an unsupervised deidentification method that masks words that leak personally-identifying information. The approach utilizes a specially trained reidentification model to identify individuals from redacted personal documents. Motivated by K-anonymity based privacy, we generate redactions that ensure a minimum reidentification rank for the correct profile of the document. To evaluate this approach, we consider the task of deidentifying Wikipedia Biographies, and evaluate using an adversarial reidentification metric. Compared to a set of unsupervised baselines, our approach deidentifies documents more completely while removing fewer words. Qualitatively, we see that the approach eliminates many identifying aspects that would fall outside of the common named entity based approach. 2022.findings-emnlp.352 @@ -12829,7 +12829,7 @@ Faster and Smaller Speech Translation without Quality Compromise YatinChaudharyDRIMCo GmbH PranavRaiLMU Munich MatthiasSchubertLMU Munich - HinrichSchützeCenter for Information and Language Processing, University of Munich + HinrichSchützeCenter for Information and Language Processing, University of Munich PankajGuptaDRIMCO GmbH 4789-4799 In this work, we combine the two paradigms: Federated Learning (FL) and Continual Learning (CL) for text classification task in cloud-edge continuum. The objective of Federated Continual Learning (FCL) is to improve deep learning models over life time at each client by (relevant and efficient) knowledge transfer without sharing data. Here, we address challenges in minimizing inter-client interference while knowledge sharing due to heterogeneous tasks across clients in FCL setup. In doing so, we propose a novel framework, Federated Selective Inter-client Transfer (FedSeIT) which selectively combines model parameters of foreign clients. To further maximize knowledge transfer, we assess domain overlap and select informative tasks from the sequence of historical tasks at each foreign client while preserving privacy. Evaluating against the baselines, we show improved performance, a gain of (average) 12.4% in text classification over a sequence of tasks using five datasets from diverse domains. To the best of our knowledge, this is the first work that applies FCL to NLP. @@ -12848,7 +12848,7 @@ Faster and Smaller Speech Translation without Quality Compromise Eui-InKimUniversity of Michigan FelixGervitsUS Army Research Laboratory MatthewMargeArmy Research Laboratory - JoyceChaiUniversity of Michigan + JoyceChaiUniversity of Michigan 4800-4822 In the real world, autonomous driving agents navigate in highly dynamic environments full of unexpected situations where pre-trained models are unreliable. In these situations, what is immediately available to vehicles is often only human operators. Empowering autonomous driving agents with the ability to navigate in a continuous and dynamic environment and to communicate with humans through sensorimotor-grounded dialogue becomes critical. To this end, we introduce Dialogue On the ROad To Handle Irregular Events (DOROTHIE), a novel interactive simulation platform that enables the creation of unexpected situations on the fly to support empirical studies on situated communication with autonomous driving agents. Based on this platform, we created the Situated Dialogue Navigation (SDN), a navigation benchmark of 183 trials with a total of 8415 utterances, around 18.7 hours of control streams, and 2.9 hours of trimmed audio. SDN is developed to evaluate the agent’s ability to predict dialogue moves from humans as well as generate its own dialogue moves and physical navigation actions. We further developed a transformer-based baseline model for these SDN tasks. Our empirical results indicate that language guided-navigation in a highly dynamic environment is an extremely difficult task for end-to-end models. These results will provide insight towards future work on robust autonomous driving agents 2022.findings-emnlp.354 @@ -12860,7 +12860,7 @@ Faster and Smaller Speech Translation without Quality Compromise He Said, She Said: Style Transfer for Shifting the Perspective of Dialogues AmandaBertschCarnegie Mellon University GrahamNeubigCarnegie Mellon University - Matthew R.GormleyCarnegie Mellon University + Matthew R.GormleyCarnegie Mellon University 4823-4840 In this work, we define a new style transfer task: perspective shift, which reframes a dialouge from informal first person to a formal third person rephrasing of the text. This task requires challenging coreference resolution, emotion attribution, and interpretation of informal text. We explore several baseline approaches and discuss further directions on this task when applied to short dialogues. As a sample application, we demonstrate that applying perspective shifting to a dialogue summarization dataset (SAMSum) substantially improves the zero-shot performance of extractive news summarization models on this data. Additionally, supervised extractive models perform better when trained on perspective shifted data than on the original dialogues. We release our code publicly. 2022.findings-emnlp.355 @@ -12932,7 +12932,7 @@ Faster and Smaller Speech Translation without Quality Compromise Train Flat, Then Compress: Sharpness-Aware Minimization Learns More Compressible Models ClaraNaCarnegie Mellon University - Sanket VaibhavMehtaCarnegie Mellon University + Sanket VaibhavMehtaCarnegie Mellon University EmmaStrubellCarnegie Mellon University 4909-4936 Model compression by way of parameter pruning, quantization, or distillation has recently gained popularity as an approach for reducing the computational requirements of modern deep neural network models for NLP. Inspired by prior works suggesting a connection between simpler, more generalizable models and those that lie within wider loss basins, we hypothesize that optimizing for flat minima should lead to simpler parameterizations and thus more compressible models. We propose to combine sharpness-aware minimization (SAM) with various task-specific model compression methods, including iterative magnitude pruning (IMP), structured pruning with a distillation objective, and post-training dynamic quantization. Empirically, we show that optimizing for flatter minima consistently leads to greater compressibility of parameters compared to vanilla Adam when fine-tuning BERT models, with little to no loss in accuracy on the GLUE text classification and SQuAD question answering benchmarks. Moreover, SAM finds superior winning tickets during IMP that 1) are amenable to vanilla Adam optimization, and 2) transfer more effectively across tasks. @@ -12960,7 +12960,7 @@ Faster and Smaller Speech Translation without Quality Compromise MehdiRezagholizadehNoah’s Ark Lab Huawei AhmadRashidUniversity of Waterloo; Huawei Noah’s Ark Lab AliGhodsiUniversity of Waterloo - PhillippeLanglaisUniversité de Montréal + PhillippeLanglaisUniversité de Montréal 4948-4954 Knowledge Distillation (KD) is a commonly used technique for improving the generalization of compact Pre-trained Language Models (PLMs) on downstream tasks. However, such methods impose the additional burden of training a separate teacher model for every new dataset.Alternatively, one may directly work on the improvement of the optimization procedure of the compact model towards better generalization. Recent works observe that the flatness of the local minimum correlates well with better generalization.In this work, we adapt Stochastic Weight Averaging (SWA), a method encouraging convergence to a flatter minimum, to fine-tuning PLMs. We conduct extensive experiments on various NLP tasks (text classification, question answering, and generation) and different model architectures and demonstrate that our adaptation improves the generalization without extra computation cost. Moreover, we observe that this simple optimization technique is able to outperform the state-of-the-art KD methods for compact models. 2022.findings-emnlp.363 @@ -12994,7 +12994,7 @@ Faster and Smaller Speech Translation without Quality Compromise Unsupervised Multi-Granularity Summarization MingZhongUniversity of Illinois at Urbana-Champaign - YangLiuMicrosoft + YangLiuMicrosoft SuyuGeUniversity of Illinois at Urbana-Champaign YuningMaoMeta Platforms, Inc. YizhuJiaoUniversity of Illinois Urbana-Champaign @@ -13086,7 +13086,7 @@ Faster and Smaller Speech Translation without Quality Compromise The Undesirable Dependence on Frequency of Gender Bias Metrics Based on Word Embeddings FranciscoValentiniICC (UBA - CONICET); Maestría en Data Mining (UBA) GermánRosatiCONICET / UNSAM - DiegoFernandez SlezakUniversidad de Buenos Aires + DiegoFernandez SlezakUniversidad de Buenos Aires EdgarAltszylerDepartamento de Computación, Universidad de Buenos Aires; Instituto de Investigación en Ciencias de La Computación (ICC), CONICET-Universidad de Buenos Aires 5086-5092 Numerous works use word embedding-based metrics to quantify societal biases and stereotypes in texts. Recent studies have found that word embeddings can capture semantic similarity but may be affected by word frequency. In this work we study the effect of frequency when measuring female vs. male gender bias with word embedding-based bias quantification methods. We find that Skip-gram with negative sampling and GloVe tend to detect male bias in high frequency words, while GloVe tends to return female bias in low frequency words. We show these behaviors still exist when words are randomly shuffled. This proves that the frequency-based effect observed in unshuffled corpora stems from properties of the metric rather than from word associations. The effect is spurious and problematic since bias metrics should depend exclusively on word co-occurrences and not individual word frequencies. Finally, we compare these results with the ones obtained with an alternative metric based on Pointwise Mutual Information. We find that this metric does not show a clear dependence on frequency, even though it is slightly skewed towards male bias across all frequencies. @@ -13198,9 +13198,9 @@ Faster and Smaller Speech Translation without Quality Compromise AlessandroMoschittiAmazon MarcoDel TrediciAmazon XiaoyuShenAmazon - ThuyVuAmazon + ThuyVuAmazon BillByrneUniversity of Cambridge - Adriàde GispertAmazon + Adriàde GispertAmazon 5195-5208 We introduce question answering with a cotext in focus, a task that simulates a free interaction with a QA system. The user reads on a screen some information about a topic, and they can follow-up with questions that can be either related or not to the topic; and the answer can be found in the document containing the screen content or from other pages. We call such information context. To study the task, we construct FocusQA, a dataset for answer sentence selection (AS2) with 12,165011unique question/context pairs, and a total of 109,940 answers. To build the dataset, we developed a novel methodology that takes existing questions and pairs them with relevant contexts. To show the benefits of this approach, we present a comparative analysis with a set of questions written by humans after reading the context, showing that our approach greatly helps in eliciting more realistic question/context pairs. Finally, we show that the task poses several challenges for incorporating contextual information. In this respect, we introduce strong baselines for answer sentence selection that outperform the precision of state-of-the-art models for AS2 up to 21.3% absolute points. 2022.findings-emnlp.381 @@ -13265,7 +13265,7 @@ Faster and Smaller Speech Translation without Quality Compromise YuanGongMassachusetts Institute of Technology SameerKhuranaMIT RhodaAuBoston University - JamesGlassMassachusetts Institute of Technology + JamesGlassMassachusetts Institute of Technology 5270-5283 Neuropsychological exams are commonly used to diagnose various kinds of cognitive impairment. They typically involve a trained examiner who conducts a series of cognitive tests with a subject. In recent years, there has been growing interest in developing machine learning methods to extract speech and language biomarkers from exam recordings to provide automated input for cognitive assessment. Inspired by recent findings suggesting that the examiner’s language can influence cognitive impairment classifications, in this paper, we study the influence of the examiner on automatic dementia identification decisions in real-world neuropsychological exams. To mitigate the influence of the examiner, we propose a systematic three-stage pipeline for detecting dementia from exam recordings. In the first stage, we perform audio-based speaker diarization (i.e., estimating who spoke when?) by incorporating speaker discriminative features. In the second stage, we employ text-based language models to identify the role of the speaker (i.e., examiner or subject). Finally, in the third stage, we employ text- and audio-based models to detect cognitive impairment from hypothesized subject segments. Our studies suggest that incorporating audio-based diarization followed by text-based role identification helps mitigate the influences from the examiner’s segments. Further, we found that the text and audio modalities complement each other, and the performance improves when we use both modalities. We also perform several carefully designed experimental studies to assess the performance of each stage. 2022.findings-emnlp.386 @@ -13305,7 +13305,7 @@ Faster and Smaller Speech Translation without Quality Compromise <fixed-case>J</fixed-case>am<fixed-case>P</fixed-case>atois<fixed-case>NLI</fixed-case>: A Jamaican Patois Natural Language Inference Dataset Ruth-AnnArmstrongStanford University JohnHewittStanford University - ChristopherManningStanford University + ChristopherManningStanford University 5307-5320 JamPatoisNLI provides the first dataset for natural language inference in a creole language, Jamaican Patois.Many of the most-spoken low-resource languages are creoles. These languages commonly have a lexicon derived from a major world language and a distinctive grammar reflecting the languages of the original speakers and the process of language birth by creolization. This gives them a distinctive place in exploring the effectiveness of transfer from large monolingual or multilingual pretrained models. While our work, along with previous work, shows that transfer from these models to low-resource languages that are unrelated to languages in their training set is not very effective, we would expect stronger results from transfer to creoles. Indeed, our experiments show considerably better results from few-shot learning of JamPatoisNLI than for such unrelated languages, and help us begin to understand how the unique relationship between creoles and their high-resource base languages affect cross-lingual transfer. JamPatoisNLI, which consists of naturally-occurring premises and expert-written hypotheses, is a step towards steering research into a traditionally underserved language and a useful benchmark for understanding cross-lingual NLP. 2022.findings-emnlp.389 @@ -13349,7 +13349,7 @@ Faster and Smaller Speech Translation without Quality Compromise KaixinMaCarnegie Mellon University HaoChengMicrosoft Research XiaodongLiuMicrosoft Research - EricNybergCarnegie Mellon University + EricNybergCarnegie Mellon University JianfengGaoMicrosoft Research, Redmond 5360-5374 We propose a novel open-domain question answering (ODQA) framework for answering single/multi-hop questions across heterogeneous knowledge sources.The key novelty of our method is the introduction of the intermediary modules into the current retriever-reader pipeline.Unlike previous methods that solely rely on the retriever for gathering all evidence in isolation,our intermediary performs a chain of reasoning over the retrieved set.Specifically, our method links the retrieved evidence with its related global context into graphs and organizes them into a candidate list of evidence chains.Built upon pretrained language models, our system achieves competitive performance on two ODQA datasets, OTT-QA and NQ, against tables and passages from Wikipedia.In particular, our model substantially outperforms the previous state-of-the-art on OTT-QA with an exact match score of 47.3 (45% relative gain). @@ -13402,7 +13402,7 @@ Faster and Smaller Speech Translation without Quality Compromise SiddharthDalmiaGoogle BrianYanCarnegie Mellon University FlorianMetzeCarnegie Mellon University - Alan WBlackCarnegie Mellon University + Alan WBlackCarnegie Mellon University ShinjiWatanabeCarnegie Mellon University 5419-5429 End-to-end spoken language understanding (SLU) systems are gaining popularity over cascaded approaches due to their simplicity and ability to avoid error propagation. However, these systems model sequence labeling as a sequence prediction task causing a divergence from its well-established token-level tagging formulation. We build compositional end-to-end SLU systems that explicitly separate the added complexity of recognizing spoken mentions in SLU from the NLU task of sequence labeling. By relying on intermediate decoders trained for ASR, our end-to-end systems transform the input modality from speech to token-level representations that can be used in the traditional sequence labeling framework. This composition of ASR and NLU formulations in our end-to-end SLU system offers direct compatibility with pre-trained ASR and NLU systems, allows performance monitoring of individual components and enables the use of globally normalized losses like CRF, making them attractive in practical scenarios. Our models outperform both cascaded and direct end-to-end models on a labeling task of named entity recognition across SLU benchmarks. @@ -13442,7 +13442,7 @@ Faster and Smaller Speech Translation without Quality Compromise RuiSunColumbia University ZhecanWangcolumbia university Kai-WeiChangUCLA - Shih-FuChangColumbia University + Shih-FuChangColumbia University 5444-5454 From a visual scene containing multiple people, human is able to distinguish each individual given the context descriptions about what happened before, their mental/physical states or intentions, etc. Above ability heavily relies on human-centric commonsense knowledge and reasoning. For example, if asked to identify the “person who needs healing” in an image, we need to first know that they usually have injuries or suffering expressions, then find the corresponding visual clues before finally grounding the person. We present a new commonsense task, Human-centric Commonsense Grounding, that tests the models’ ability to ground individuals given the context descriptions about what happened before, and their mental/physical states or intentions. We further create a benchmark, HumanCog, a dataset with 130k grounded commonsensical descriptions annotated on 67k images, covering diverse types of commonsense and visual scenes. We set up a context-object-aware method as a strong baseline that outperforms previous pre-trained and non-pretrained models. Further analysis demonstrates that rich visual commonsense and powerful integration of multi-modal commonsense are essential, which sheds light on future works. Data and code will be available at https://github.com/Hxyou/HumanCog. 2022.findings-emnlp.399 @@ -13455,7 +13455,7 @@ Faster and Smaller Speech Translation without Quality Compromise BasharAlhafniNew York University KeZhangDataminr, inc ShihaoRanDataminr - JoelTetreaultDataminr + JoelTetreaultDataminr AlejandroJaimesDataminr 5455-5477 Social media has increasingly played a key role in emergency response: first responders can use public posts to better react to ongoing crisis events and deploy the necessary resources where they are most needed. Timeline extraction and abstractive summarization are critical technical tasks to leverage large numbers of social media posts about events. Unfortunately, there are few datasets for benchmarking technical approaches for those tasks. This paper presents , the largest dataset of local crisis event timelines available to date. contains 1,000 crisis event timelines across four domains: wildfires, local fires, traffic, and storms. We built using a semi-automated cluster-then-refine approach to collect data from the public Twitter stream. Our initial experiments indicate a significant gap between the performance of strong baselines compared to the human performance on both tasks.Our dataset, code, and models are publicly available (https://github.com/CrisisLTLSum/CrisisTimelines). @@ -13496,7 +13496,7 @@ Faster and Smaller Speech Translation without Quality Compromise ChenghuaLinDepartment of Computer Science, University of Sheffield HenglinHuangDepartment of Computer Science, University of Surrey FrankGuerinUniversity of Surrey - ZhihaoZhangBeihang University + ZhihaoZhangBeihang University 5504-5518 One of the key challenges of automatic story generation is how to generate a long narrative that can maintain fluency, relevance, and coherence. Despite recent progress, current story generation systems still face the challenge of how to effectively capture contextual and event features, which has a profound impact on a model’s generation performance. To address these challenges, we present EtriCA, a novel neural generation model, which improves the relevance and coherence of the generated stories through residually mapping context features to event sequences with a cross-attention mechanism. Such a feature capturing mechanism allows our model to better exploit the logical relatedness between events when generating stories. Extensive experiments based on both automatic and human evaluations show that our model significantly outperforms state-of-the-art baselines, demonstrating the effectiveness of our model in leveraging context and event features. 2022.findings-emnlp.403 @@ -13564,7 +13564,7 @@ Faster and Smaller Speech Translation without Quality Compromise TahiraNaseemIBM Research AI PavanKapanipathiIBM Research GaetanoRossielloIBM Research AI - AchilleFokoueIBM Research + AchilleFokoueIBM Research 5571-5580 Most existing approaches for Knowledge Base Question Answering (KBQA) focus on a specific underlying knowledge base either because of inherent assumptions in the approach, or because evaluating it on a different knowledge base requires non-trivial changes. However, many popular knowledge bases share similarities in their underlying schemas that can be leveraged to facilitate generalization across knowledge bases. To achieve this generalization, we introduce a KBQA framework based on a 2-stage architecture that explicitly separates semantic parsing from the knowledge base interaction, facilitating transfer learning across datasets and knowledge graphs. We show that pretraining on datasets with a different underlying knowledge base can nevertheless provide significant performance gains and reduce sample complexity. Our approach achieves comparable or state-of-the-art performance for LC-QuAD (DBpedia), WebQSP (Freebase), SimpleQuestions (Wikidata) and MetaQA (Wikimovies-KG). 2022.findings-emnlp.408 @@ -13598,7 +13598,7 @@ Faster and Smaller Speech Translation without Quality Compromise ZihuiwenYeUniversity of Oxford TaoYuUniversity of Washington LinfengSongTencent AI Lab - PhilBlunsomUniversity of Oxford + PhilBlunsomUniversity of Oxford 5608-5620 The task of context-dependent text-to-SQL aims to convert multi-turn user utterances to formal SQL queries. This is a challenging task due to both the scarcity of training data from which to learn complex contextual dependencies and to generalize to unseen databases. In this paper we explore augmenting the training datasets using self-play, which leverages contextual information to synthesize new interactions to adapt the model to new databases. We first design a SQL-to-text model conditioned on a sampled goal query, which represents a user’s intent, that then converses with a text-to-SQL semantic parser to generate new interactions. We then filter the synthesized interactions and retrain the models with the augmented data. We find that self-play improves the accuracy of a strong baseline on SParC and CoSQL, two widely used cross-domain text-to-SQL datasets. Our analysis shows that self-play simulates various conversational thematic relations, enhances cross-domain generalization and improves beam-search. 2022.findings-emnlp.411 @@ -13648,8 +13648,8 @@ Faster and Smaller Speech Translation without Quality Compromise Bo-RuLuUniversity of Washington YushiHuUniversity of Washington HaoChengMicrosoft Research - Noah A.SmithUniversity of Washington - MariOstendorfUniversity of Washington + Noah A.SmithUniversity of Washington + MariOstendorfUniversity of Washington 5657-5670 Human conversations can evolve in many different ways, creating challenges for automatic understanding and summarization. Goal-oriented conversations often have meaningful sub-dialogue structure, but it can be highly domain-dependent. This work introduces an unsupervised approach to learning hierarchical conversation structure, including turn and sub-dialogue segment labels, corresponding roughly to dialogue acts and sub-tasks, respectively. The decoded structure is shown to be useful in enhancing neural models of language for three conversation-level understanding tasks. Further, the learned finite-state sub-dialogue network is made interpretable through automatic summarization. 2022.findings-emnlp.415 @@ -13663,7 +13663,7 @@ Faster and Smaller Speech Translation without Quality Compromise YichongXuMicrosoft YuweiFangMicrosoft WenhaoYuUniversity of Notre Dame - YangLiuMicrosoft + YangLiuMicrosoft HaiZhaoShanghai Jiao Tong University ChenguangZhuMicrosoft Cognitive Services Research Group MichaelZengMicrosoft Corp @@ -13731,8 +13731,8 @@ Faster and Smaller Speech Translation without Quality Compromise MianzhiPanNanjing University JianbingZhangNanjing University ShujianHuangNational Key Laboratory for Novel Software Technology, Nanjing University - XinyuDaiNational Key Laboratory for Novel Software Technology, Nanjing University - JiajunChenNanjing University + XinyuDaiNational Key Laboratory for Novel Software Technology, Nanjing University + JiajunChenNanjing University 5739-5749 In recent years, vision and language pre-training (VLP) models have advanced the state-of-the-art results in a variety of cross-modal downstream tasks. Aligning cross-modal semantics is claimed to be one of the essential capabilities of VLP models. However, it still remains unclear about the inner working mechanism of alignment in VLP models. In this paper, we propose a new probing method that is based on image captioning to first empirically study the cross-modal semantics alignment of VLP models. Our probing method is built upon the fact that given an image-caption pair, the VLP models will give a score, indicating how well two modalities are aligned; maximizing such scores will generate sentences that VLP models believe are of good alignment. Analyzing these sentences thus will reveal in what way different modalities are aligned and how well these alignments are in VLP models. We apply our probing method to five popular VLP models, including UNITER, ROSITA, ViLBERT, CLIP, and LXMERT, and provide a comprehensive analysis of the generated captions guided by these models. Our results show that VLP models (1) focus more on just aligning objects with visual words, while neglecting global semantics; (2) prefer fixed sentence patterns, thus ignoring more important textual information including fluency and grammar; and (3) deem the captions with more visual words are better aligned with images. These findings indicate that VLP models still have weaknesses in cross-modal semantics alignment and we hope this work will draw researchers’ attention to such problems when designing a new VLP model. 2022.findings-emnlp.421 @@ -13778,7 +13778,7 @@ Faster and Smaller Speech Translation without Quality Compromise NikolaosFlemotomosUniversity of Southern California ZacImelUniversity of Utah DavidAtkinsUniversity of Washington - ShrikanthNarayananUniversity of Southern California + ShrikanthNarayananUniversity of Southern California 5787-5795 In psychotherapy interactions, the quality of a session is assessed by codifying the communicative behaviors of participants during the conversation through manual observation and annotation. Developing computational approaches for automated behavioral coding can reduce the burden on human coders and facilitate the objective evaluation of the intervention. In the real world, however, implementing such algorithms is associated with data sparsity challenges since privacy concerns lead to limited available in-domain data. In this paper, we leverage a publicly available conversation-based dataset and transfer knowledge to the low-resource behavioral coding task by performing an intermediate language model training via meta-learning. We introduce a task augmentation method to produce a large number of “analogy tasks” — tasks similar to the target one — and demonstrate that the proposed framework predicts target behaviors more accurately than all the other baseline models. 2022.findings-emnlp.425 @@ -13787,7 +13787,7 @@ Faster and Smaller Speech Translation without Quality Compromise Learning to Detect Noisy Labels Using Model-Based Features - ZhihaoWangMeta + ZhihaoWangMeta ZongyuLinTsinghua University JunjieWenChina Merchant Bank XianxinChenRecurrent AI @@ -13905,7 +13905,7 @@ Faster and Smaller Speech Translation without Quality Compromise JunWangUniversity of Melbourne XuanliHeMonash University BenjaminRubinsteinUniversity of Melbourne - TrevorCohnUniversity of Melbourne + TrevorCohnUniversity of Melbourne 5906-5913 Neural machine translation (NMT) systems are vulnerable to backdoor attacks, whereby an attacker injects poisoned samples into training such that a trained model produces malicious translations. Nevertheless, there is little research on defending against such backdoor attacks in NMT. In this paper, we first show that backdoor attacks that have been successful in text classification are also effective against machine translation tasks. We then present a novel defence method that exploits a key property of most backdoor attacks: namely the asymmetry between the source and target language sentences, which is used to facilitate malicious text insertions, substitutions and suchlike. Our technique uses word alignment coupled with language model scoring to detect outlier tokens, and thus can find and filter out training instances which may contain backdoors. Experimental results demonstrate that our technique can significantly reduce the success of various attacks by up to 89.0%, while not affecting predictive accuracy. 2022.findings-emnlp.435 @@ -14081,7 +14081,7 @@ Faster and Smaller Speech Translation without Quality Compromise <fixed-case>A</fixed-case>da<fixed-case>P</fixed-case>rompt: Adaptive Model Training for Prompt-based <fixed-case>NLP</fixed-case> YulongChenZhejiang University, Westlake University - YangLiuMicrosoft + YangLiuMicrosoft LiDongMicrosoft Research ShuohangWangMicrosoft ChenguangZhuMicrosoft Cognitive Services Research Group @@ -14123,7 +14123,7 @@ Faster and Smaller Speech Translation without Quality Compromise Generative Aspect-Based Sentiment Analysis with Contrastive Learning and Expressive Structure - JosephPeperUniversity of Michigan + JosephPeperUniversity of Michigan LuWangUniversity of Michigan 6089-6095 Generative models have demonstrated impressive results on Aspect-based Sentiment Analysis (ABSA) tasks, particularly for the emerging task of extracting Aspect-Category-Opinion-Sentiment (ACOS) quadruples. However, these models struggle with implicit sentiment expressions, which are commonly observed in opinionated content such as online reviews. In this work, we introduce GEN-SCL-NAT, which consists of two techniques for improved structured generation for ACOS quadruple extraction. First, we propose GEN-SCL, a supervised contrastive learning objective that aids quadruple prediction by encouraging the model to produce input representations that are discriminable across key input attributes, such as sentiment polarity and the existence of implicit opinions and aspects. Second, we introduce GEN-NAT, a new structured generation format that better adapts pre-trained autoregressive encoder-decoder models to extract quadruples in a generative fashion. Experimental results show that GEN-SCL-NAT achieves top performance across three ACOS datasets, averaging 1.48% F1 improvement, with a maximum 1.73% increase on the LAPTOP-L1 dataset. Additionally, we see significant gains on implicit aspect and opinion splits that have been shown as challenging for existing ACOS approaches. @@ -14258,7 +14258,7 @@ Faster and Smaller Speech Translation without Quality Compromise YuxiaWuXi’an Jiaotong University LiziLiaoSingapore Management University XuemingQianXi’an Jiaotong University - Tat-SengChuaNational University of Singapore + Tat-SengChuaNational University of Singapore 6207-6218 Discovering new slots is critical to the success of dialogue systems. Most existing methods rely on automatic slot induction in unsupervised fashion or perform domain adaptation across zero or few-shot scenarios. They have difficulties in providing high-quality supervised signals to learn clustering-friendly features, and are limited in effectively transferring the prior knowledge from known slots to new slots. In this work, we propose a Semi-supervised Incremental Clustering method (SIC), to discover new slots with the aid of existing linguistic annotation models and limited known slot data. Specifically, we harvest slot value candidates with NLP model cues and innovatively formulate the slot discovery task under an incremental clustering framework. The model gradually calibrate slot representations under the supervision of generated pseudo-labels, and automatically learns to terminate when no more salient slot remains. Our thorough evaluation on five public datasets demonstrates that it significantly outperforms state-of-the-art models. 2022.findings-emnlp.462 @@ -14323,7 +14323,7 @@ Faster and Smaller Speech Translation without Quality Compromise XiaolongJinInstitute of Computing Technology, Chinese Academy of Sciences LongBaiSchool of Computer Science and Technology, University of Chinese Academy of Sciences; CAS Key Laboratory of Network Data Science and Technology, Institute of Computing Technology, Chinese Academy of Sciences JiafengGuoInstitute of Computing Technology, CAS - XueqiChengInstitute of Computing Technology, CAS + XueqiChengInstitute of Computing Technology, CAS 6266-6275 Prototypical network based joint methods have attracted much attention in few-shot event detection, which carry out event detection in a unified sequence tagging framework. However, these methods suffer from the inaccurate prototype representation problem, due to two main reasons: the number of instances for calculating prototypes is limited; And, they do not well capture the relationships among event prototypes. To deal with this problem, we propose a Knowledge-Enhanced self-supervised Prototypical Network, called KE-PN, for few-shot event detection. KE-PN adopts hybrid rules, which can automatically align event types to an external knowledge base, i.e., FrameNet, to obtain more instances.It proposes a self-supervised learning method to filter out noisy data from enhanced instances. KE-PN is further equipped with an auxiliary event type relationship classification module, which injects the relationship information into representations of event prototypes. Extensive experiments on three benchmark datasets, i.e., FewEvent, MAVEN, and ACE2005 demonstrate the state-of-the-art performance of KE-PN. 2022.findings-emnlp.467 @@ -14352,7 +14352,7 @@ Faster and Smaller Speech Translation without Quality Compromise Sin-EnLuDepartment of Computer Science and Information Engineering, National Central University Bo-HanLuNational Central University, Taiwan Chao-YiLuPurdue Unversity - Richard Tzong-HanTsaiAcademia Sinica + Richard Tzong-HanTsaiAcademia Sinica 6287-6305 In natural language processing (NLP), code-mixing (CM) is a challenging task, especially when the mixed languages include dialects. In Southeast Asian countries such as Singapore, Indonesia, and Malaysia, Hokkien-Mandarin is the most widespread code-mixed language pair among Chinese immigrants, and it is also common in Taiwan. However, dialects such as Hokkien often have a scarcity of resources and the lack of an official writing system, limiting the development of dialect CM research. In this paper, we propose a method to construct a Hokkien-Mandarin CM dataset to mitigate the limitation, overcome the morphological issue under the Sino-Tibetan language family, and offer an efficient Hokkien word segmentation method through a linguistics-based toolkit. Furthermore, we use our proposed dataset and employ transfer learning to train the XLM (cross-lingual language model) for translation tasks. To fit the code-mixing scenario, we adapt XLM slightly. We found that by using linguistic knowledge, rules, and language tags, the model produces good results on CM data translation while maintaining monolingual translation quality. 2022.findings-emnlp.469 @@ -14433,7 +14433,7 @@ Faster and Smaller Speech Translation without Quality Compromise ChiZhangHKUST DongkyuLeeHong Kong University of Science and Technology YingxiuZhaoThe Hong Kong University of Science and Technology - DongshengLiNational University of Defense Technology + DongshengLiNational University of Defense Technology Nevin L.ZhangHong Kong University of Science and Technology 6364-6376 Emotional conversation systems generate responses for the input queries considering the speaker’s emotions in a conversation. Existing emotional conversation systems output emotional responses according to either a given emotion or the user’s emotion reflected in the input queries. Following a given emotion may lead to an emotional drift between the given emotion and the conversation state, and following only the user’s emotion may aggravate the user’s negative feelings if users suffer from a negative mood. In this paper, we propose to generate empathetic responses catering to the user’s emotions while leading the conversation to be emotionally positive. Particularly, by abstracting the conversation corpus, we extract and store the different responding strategies for different users’ emotions and conversational topics into a memory. We encourage positive emotions in conversation via a sentiment evaluator. We model the memory outputs with a Gaussian mixture distribution and sample a final responding strategy from the distribution. The strategy acts as a condition to a transformer model to generate responses. The experiments verify our model surpasses the baseline methods in appropriateness, diversity, and generating emotionally positive responses. @@ -14469,8 +14469,8 @@ Faster and Smaller Speech Translation without Quality Compromise Model and Data Transfer for Cross-Lingual Sequence Labelling in Zero-Resource Settings IkerGarcía-FerreroHiTZ Center - Ixa, University of the Basque Country UPV/EHU - RodrigoAgerriHiTZ Center - Ixa, University of the Basque Country UPV/EHU - GermanRigauUPV/EHU + RodrigoAgerriHiTZ Center - Ixa, University of the Basque Country UPV/EHU + GermanRigauUPV/EHU 6403-6416 Zero-resource cross-lingual transfer approaches aim to apply supervised modelsfrom a source language to unlabelled target languages. In this paper we performan in-depth study of the two main techniques employed so far for cross-lingualzero-resource sequence labelling, based either on data or model transfer.Although previous research has proposed translation and annotation projection(data-based cross-lingual transfer) as an effective technique for cross-lingualsequence labelling, in this paper we experimentally demonstrate that highcapacity multilingual language models applied in a zero-shot (model-basedcross-lingual transfer) setting consistently outperform data-basedcross-lingual transfer approaches. A detailed analysis of our results suggeststhat this might be due to important differences in language use. Morespecifically, machine translation often generates a textual signal which isdifferent to what the models are exposed to when using gold standard data,which affects both the fine-tuning and evaluation processes. Our results alsoindicate that data-based cross-lingual transfer approaches remain a competitiveoption when high-capacity multilingual language models are not available. 2022.findings-emnlp.478 @@ -14481,7 +14481,7 @@ Faster and Smaller Speech Translation without Quality Compromise Early Guessing for Dialect Identification VaniKanjirangatIDSIA - TanjaSamardzicUniversity of Zurich + TanjaSamardzicUniversity of Zurich FabioRinaldiIDSIA, Swiss AI Institute LjiljanaDolamicarmasuisse S&T 6417-6426 @@ -14546,7 +14546,7 @@ Faster and Smaller Speech Translation without Quality Compromise Large-Scale Differentially Private <fixed-case>BERT</fixed-case> RohanAnilGoogle BadihGhaziGoogle - VineetGuptaGoogle + VineetGuptaGoogle RaviKumarGoogle PasinManurangsiGoogle 6481-6491 @@ -14586,7 +14586,7 @@ Faster and Smaller Speech Translation without Quality Compromise Weakly Supervised Headline Dependency Parsing AdrianBentonGoogle TianzeShiGoogle - OzanİrsoyBloomberg LP + OzanİrsoyBloomberg LP IgorMalioutovBloomberg L.P. 6520-6535 English news headlines form a register with unique syntactic properties that have been documented in linguistics literature since the 1930s. However, headlines have received surprisingly little attention from the NLP syntactic parsing community. We aim to bridge this gap by providing the first news headline corpus of Universal Dependencies annotated syntactic dependency trees, which enables us to evaluate existing state-of-the-art dependency parsers on news headlines. To improve English news headline parsing accuracies, we develop a projection method to bootstrap silver training data from unlabeled news headline-article lead sentence pairs. Models trained on silver headline parses demonstrate significant improvements in performance over models trained solely on gold-annotated long-form texts. Ultimately, we find that, although projected silver training data improves parser performance across different news outlets, the improvement is moderated by constructions idiosyncratic to outlet. @@ -14600,7 +14600,7 @@ Faster and Smaller Speech Translation without Quality Compromise NazneenRajaniHugging Face DivyanshAgarwalCarnegie Mellon University CaimingXiongSalesforce - DragomirRadevYale University + DragomirRadevYale University 6536-6558 The majority of existing text summarization datasets include short-form source documents that lack long-range causal and temporal dependencies, and often contain strong layout and stylistic biases. While relevant, such datasets will offer limited challenges for future text summarization systems. We address these issues by introducing BOOKSUM, a collection of datasets for long-form narrative summarization. Our dataset covers documents from the literature domain, such as novels, plays and stories, and includes highly abstractive, human written summaries on three levels of granularity of increasing difficulty: paragraph-, chapter-, and book-level. The domain and structure of our dataset poses a unique set of challenges for summarization systems, which include: processing very long documents, non-trivial causal and temporal dependencies, and rich discourse structures. To facilitate future work, we trained and evaluated multiple extractive and abstractive summarization models as baselines for our dataset. 2022.findings-emnlp.488 @@ -14642,7 +14642,7 @@ Faster and Smaller Speech Translation without Quality Compromise JianLiThe Chinese University of Hong Kong HongruWangThe Chinese University of Hong Kong XingshanZengHuawei Noah’s Ark Lab - Kam-FaiWongDepartment of Systems Engineering and Engineering Management, The Chinese University of Hong Kong, Hong Kong + Kam-FaiWongDepartment of Systems Engineering and Engineering Management, The Chinese University of Hong Kong, Hong Kong 6595-6607 News recommendation (NR) is essential for online news services. Existing NR methods typically adopt a news-user representation learning framework, facing two potential limitations. First, in news encoder, single candidate news encoding suffers from an insufficient semantic information problem. Second, existing graph-based NR methods are promising but lack effective news-user feature interaction, rendering the graph-based recommendation suboptimal. To overcome these limitations, we propose dual-interactive graph attention networks (DIGAT) consisting of news- and user-graph channels. In the news-graph channel, we enrich the semantics of single candidate news by incorporating the semantically relevant news information with a semantic-augmented graph (SAG). In the user-graph channel, multi-level user interests are represented with a news-topic graph. Most notably, we design a dual-graph interaction process to perform effective feature interaction between the news and user graphs, which facilitates accurate news-user representation matching. Experiment results on the benchmark dataset MIND show that DIGAT outperforms existing news recommendation methods. Further ablation studies and analyses validate the effectiveness of (1) semantic-augmented news graph modeling and (2) dual-graph interaction. 2022.findings-emnlp.491 @@ -14737,7 +14737,7 @@ Faster and Smaller Speech Translation without Quality Compromise ChaoqunDuanHarbin Institute of Technology YouzhengWuJD AI Research XiaodongHeJD AI Research - TiejunZhaotjzhao@hit.edu.cn + TiejunZhaotjzhao@hit.edu.cn 6687-6697 Hybrid question answering (HQA) aims to answer questions over heterogeneous data, including tables and passages linked to table cells. The heterogeneous data can provide different granularity evidence to HQA models, e.t., column, row, cell, and link. Conventional HQA models usually retrieve coarse- or fine-grained evidence to reason the answer. Through comparison, we find that coarse-grained evidence is easier to retrieve but contributes less to the reasoner, while fine-grained evidence is the opposite. To preserve the advantage and eliminate the disadvantage of different granularity evidence, we propose MuGER2, a Multi-Granularity Evidence Retrieval and Reasoning approach. In evidence retrieval, a unified retriever is designed to learn the multi-granularity evidence from the heterogeneous data. In answer reasoning, an evidence selector is proposed to navigate the fine-grained evidence for the answer reader based on the learned multi-granularity evidence. Experiment results on the HybridQA dataset show that MuGER2 significantly boosts the HQA performance. Further ablation analysis verifies the effectiveness of both the retrieval and reasoning designs. 2022.findings-emnlp.498 @@ -14774,7 +14774,7 @@ Faster and Smaller Speech Translation without Quality Compromise NaokiKobayashiTokyo Institute of Technology TsutomuHiraoNTT Communication Science Labs. HidetakaKamigaitoNara Institute of Science and Technology - ManabuOkumuraTokyo Institute of Technology + ManabuOkumuraTokyo Institute of Technology MasaakiNagataNTT Corporation 6725-6737 To promote and further develop RST-style discourse parsing models, we need a strong baseline that can be regarded as a reference for reporting reliable experimental results. This paper explores a strong baseline by integrating existing simple parsing strategies, top-down and bottom-up, with various transformer-based pre-trained language models.The experimental results obtained from two benchmark datasets demonstrate that the parsing performance strongly relies on the pre-trained language models rather than the parsing strategies.In particular, the bottom-up parser achieves large performance gains compared to the current best parser when employing DeBERTa.We further reveal that language models with a span-masking scheme especially boost the parsing performance through our analysis within intra- and multi-sentential parsing, and nuclearity prediction. @@ -14814,7 +14814,7 @@ Faster and Smaller Speech Translation without Quality Compromise A <fixed-case>POMDP</fixed-case> Dialogue Policy with 3-way Grounding and Adaptive <fixed-case>S</fixed-case>ensing for Learning through Communication MaryamZarePennsylvania State University AlanWagnerpsu.edu - RebeccaPassonneaupsu.edu + RebeccaPassonneaupsu.edu 6767-6780 Agents to assist with rescue, surgery, and similar activities could collaborate better with humans if they could learn new strategic behaviors through communication. We introduce a novel POMDP dialogue policy for learning from people. The policy has 3-way grounding of language in the shared physical context, the dialogue context, and persistent knowledge. It can learn distinct but related games, and can continue learning across dialogues for complex games. A novel sensing component supports adaptation to information-sharing differences across people. The single policy performs better than oracle policies customized to specific games and information behavior. 2022.findings-emnlp.504 @@ -14863,7 +14863,7 @@ Faster and Smaller Speech Translation without Quality Compromise <fixed-case>WANLI</fixed-case>: Worker and <fixed-case>AI</fixed-case> Collaboration for Natural Language Inference Dataset Creation AlisaLiuUniversity of Washington SwabhaSwayamdiptaUniversity of Southern California - Noah A.SmithUniversity of Washington + Noah A.SmithUniversity of Washington YejinChoiUniversity of Washington 6826-6847 A recurring challenge of crowdsourcing NLP datasets at scale is that human writers often rely on repetitive patterns when crafting examples, leading to a lack of linguistic diversity. We introduce a novel approach for dataset creation based on worker and AI collaboration, which brings together the generative strength of language models and the evaluative strength of humans. Starting with an existing dataset, MultiNLI for natural language inference (NLI), our approach uses dataset cartography to automatically identify examples that demonstrate challenging reasoning patterns, and instructs GPT-3 to compose new examples with similar patterns. Machine generated examples are then automatically filtered, and finally revised and labeled by human crowdworkers. The resulting dataset, WANLI, consists of 107,885 NLI examples and presents unique empirical strengths over existing NLI datasets. Remarkably, training a model on WANLI improves performance on eight out-of-domain test sets we consider, including by 11% on HANS and 9% on Adversarial NLI, compared to training on the 4x larger MultiNLI. Moreover, it continues to be more effective than MultiNLI augmented with other NLI datasets. Our results demonstrate the promise of leveraging natural language generation techniques and re-imagining the role of humans in the dataset creation process. @@ -14965,7 +14965,7 @@ Faster and Smaller Speech Translation without Quality Compromise ZhaofengWuThe Allen Institute for Artificial Intelligence HaoPengAllen Institute for AI NikolaosPappasAmazon Web Services (AWS AI) - Noah A.SmithUniversity of Washington + Noah A.SmithUniversity of Washington 6931-6939 Document-level machine translation leverages inter-sentence dependencies to produce more coherent and consistent translations. However, these models, predominantly based on transformers, are difficult to scale to long documents as their attention layers have quadratic complexity in the sequence length. Recent efforts on efficient attention improve scalability, but their effect on document translation remains unexplored. In this work, we investigate the efficacy of a recent linear attention model by Peng et al. (2021) on document translation and augment it with a sentential gate to promote a recency inductive bias. We evaluate the model on IWSLT 2015 and OpenSubtitles 2018 against the transformer, demonstrating substantially increased decoding speed on long sequences with similar or better BLEU scores. We show that sentential gating further improves translation quality on IWSLT. 2022.findings-emnlp.515 @@ -14998,7 +14998,7 @@ Faster and Smaller Speech Translation without Quality Compromise HanGuoCMU BowenTanCarnegie Mellon University ZhengzhongLiuCarnegie Mellon University; Petuum INC. - EricXingCarnegie Mellon University + EricXingCarnegie Mellon University ZhitingHuUC San Diego 6969-6991 Maximum likelihood estimation (MLE) is the predominant algorithm for training text generation models. This paradigm relies on direct supervision examples, which is not applicable to many emerging applications, such as generating adversarial attacks or generating prompts to control language models. Reinforcement learning (RL) on the other hand offers a more flexible solution by allowing users to plug in arbitrary task metrics as reward. Yet previous RL algorithms for text generation, such as policy gradient (on-policy RL) and Q-learning (off-policy RL), are often notoriously inefficient or unstable to train due to the large sequence space and the sparse reward received only at the end of sequences. In this paper, we introduce a new RL formulation for text generation from the soft Q-learning (SQL) perspective. It enables us to draw from the latest RL advances, such as path consistency learning, to combine the best of on-/off-policy updates, and learn effectively from sparse reward. We apply the approach to a wide range of novel text generation tasks, including learning from noisy/negative examples, adversarial attacks, and prompt generation. Experiments show our approach consistently outperforms both task-specialized algorithms and the previous RL methods. @@ -15063,7 +15063,7 @@ Faster and Smaller Speech Translation without Quality Compromise XiaoqingZhengFudan University Kai-WeiChangUCLA Cho-JuiHsiehUniversity of California, Los Angeles - XuanjingHuangFudan University + XuanjingHuangFudan University 7054-7063 The existence and pervasiveness of textual adversarial examples have raised serious concerns to security-critical applications. Many methods have been developed to defend against adversarial attacks for neural natural language processing (NLP) models.Adversarial training is one of the most successful defense methods by adding some random or intentional perturbations to the original input texts and making the models robust to the perturbed examples.In this study, we explore the feasibility of improving the adversarial robustness of NLP models by performing perturbations in the parameter space rather than the input feature space.The weight perturbation helps to find a better solution (i.e., the values of weights) that minimizes the adversarial loss among other feasible solutions.We found that the weight perturbation can significantly improve the robustness of NLP models when it is combined with the perturbation in the input embedding space, yielding the highest accuracy on both clean and adversarial examples across different datasets. 2022.findings-emnlp.523 @@ -15190,7 +15190,7 @@ Faster and Smaller Speech Translation without Quality Compromise YiwenDingUniversity of Michigan ZhihengLyuThe University of Hong Kong MrinmayaSachanETH Zurich - RadaMihalceaUniversity of Michigan + RadaMihalceaUniversity of Michigan BernhardSchoelkopfMax-Planck Institute for Intelligent Systems 7180-7198 Reasoning is central to human intelligence. However, fallacious arguments are common, and some exacerbate problems such as spreading misinformation about climate change. In this paper, we propose the task of logical fallacy detection, and provide a new dataset (Logic) of logical fallacies generally found in text, together with an additional challenge set for detecting logical fallacies in climate change claims (LogicClimate). Detecting logical fallacies is a hard problem as the model must understand the underlying logical structure of the argument. We find that existing pretrained large language models perform poorly on this task. In contrast, we show that a simple structure-aware classifier outperforms the best language model by 5.46% F1 scores on Logic and 4.51% on LogicClimate. We encourage future work to explore this task since (a) it can serve as a new reasoning challenge for language models, and (b) it can have potential applications in tackling the spread of misinformation. Our dataset and code are available at https://github.com/causalNLP/logical-fallacy @@ -15212,7 +15212,7 @@ Faster and Smaller Speech Translation without Quality Compromise Revisiting Transformer-based Models for Long Document Classification - XiangDaiCSIRO Data61 + XiangDaiCSIRO Data61 IliasChalkidisUniversity of Copenhagen SuneDarknerUniversity of Copenhagen DesmondElliottUniversity of Copenhagen @@ -15289,7 +15289,7 @@ Faster and Smaller Speech Translation without Quality Compromise <fixed-case>CHIA</fixed-case>: <fixed-case>CH</fixed-case>oosing Instances to Annotate for Machine Translation RajatBhatnagarUniversity of Colorado Boulder AnanyaGaneshUniversity of Colorado Boulder - KatharinaKannUniversity of Colorado Boulder + KatharinaKannUniversity of Colorado Boulder 7299-7315 Neural machine translation (MT) systems have been shown to perform poorly on low-resource language pairs, for which large-scale parallel data is unavailable. Making the data annotation process faster and cheaper is therefore important to ensure equitable access to MT systems. To make optimal use of a limited annotation budget, we present CHIA (choosing instances to annotate), a method for selecting instances to annotate for machine translation. Using an existing multi-way parallel dataset of high-resource languages, we first identify instances, based on model training dynamics, that are most informative for training MT models for high-resource languages. We find that there are cross-lingual commonalities in instances that are useful for MT model training, which we use to identify instances that will be useful to train models on a new target language. Evaluating on 20 languages from two corpora, we show that training on instances selected using our method provides an average performance improvement of 1.59 BLEU over training on randomly selected instances of the same size. 2022.findings-emnlp.540 @@ -15323,7 +15323,7 @@ Faster and Smaller Speech Translation without Quality Compromise YajuanLyuBaidu Inc. WeiLiBaidu Inc. JiafengGuoInstitute of Computing Technology, CAS - XueqiChengInstitute of Computing Technology, CAS + XueqiChengInstitute of Computing Technology, CAS 7328-7338 A Temporal Knowledge Graph (TKG) is a sequence of KGs with respective timestamps, which adopts quadruples in the form of (subject, relation, object, timestamp) to describe dynamic facts. TKG reasoning has facilitated many real-world applications via answering such queries as (query entity, query relation, ?, future timestamp) about future. This is actually a matching task between a query and candidate entities based on their historical structures, which reflect behavioral trends of the entities at different timestamps. In addition, recent KGs provide background knowledge of all the entities, which is also helpful for the matching. Thus, in this paper, we propose the Historical Structure Matching (HiSMatch) model. It applies two structure encoders to capture the semantic information contained in the historical structures of the query and candidate entities. Besides, it adopts another encoder to integrate the background knowledge into the model. TKG reasoning experiments on six benchmark datasets demonstrate the significant improvement of the proposed HiSMatch model, with up to 5.6% performance improvement in MRR, compared to the state-of-the-art baselines. 2022.findings-emnlp.542 @@ -15364,9 +15364,9 @@ Faster and Smaller Speech Translation without Quality Compromise LisaBauerUniversity of North Carolina-Chapel Hill KarthikGopalakrishnanAmazon SpandanaGellaAmazon Alexa AI - YangLiuAmazon + YangLiuAmazon MohitBansalUniversity of North Carolina at Chapel Hill - DilekHakkani-TurAmazon Alexa AI + DilekHakkani-TurAmazon Alexa AI 7372-7386 Prompting inputs with natural language task descriptions has emerged as a popular mechanism to elicit reasonably accurate outputs from large-scale generative language models with little to no in-context supervision. This also helps gain insight into how well language models capture the semantics of a wide range of downstream tasks purely from self-supervised pre-training on massive corpora of unlabeled text. Such models have naturally also been exposed to a lot of undesirable content like racist and sexist language and there is only some work on awareness of models along these dimensions. In this paper, we define and comprehensively evaluate how well such language models capture the semantics of four tasks for bias: diagnosis, identification, extraction and rephrasing. We define three broad classes of task descriptions for these tasks: statement, question, and completion, with numerous lexical variants within each class. We study the efficacy of prompting for each task using these classes and the null task description across several decoding methods and few-shot examples. Our analyses indicate that language models are capable of performing these tasks to widely varying degrees across different bias dimensions, such as gender and political affiliation. We believe our work is an important step towards unbiased language models by quantifying the limits of current self-supervision objectives at accomplishing such sociologically challenging tasks. 2022.findings-emnlp.545 @@ -15395,7 +15395,7 @@ Faster and Smaller Speech Translation without Quality Compromise BaolinPengMicrosoft Research MichelGalleyMicrosoft Research SudhaRaoMicrosoft Research, Redmond - BillDolanMicrosoft Research + BillDolanMicrosoft Research SnigdhaChaturvediUniversity of North Carolina, Chapel Hill JianfengGaoMicrosoft Research, Redmond 7397-7413 diff --git a/data/xml/2022.finnlp.xml b/data/xml/2022.finnlp.xml index c24985a70e..7f61c5db86 100644 --- a/data/xml/2022.finnlp.xml +++ b/data/xml/2022.finnlp.xml @@ -22,7 +22,7 @@ Contextualizing Emerging Trends in Financial News Articles Nhu KhoaNguyenL3i Laboratory, La Rochelle University ThierryDelahautLa Banque Postale - Asset Management - EmanuelaBorosUniversity of La Rochelle + EmanuelaBorosUniversity of La Rochelle AntoineDoucetUniversity of La Rochelle GaëlLejeuneSTIH, Sorbonne Université 1-9 @@ -173,7 +173,7 @@ <fixed-case>LIPI</fixed-case> at the <fixed-case>F</fixed-case>in<fixed-case>NLP</fixed-case>-2022 <fixed-case>ERAI</fixed-case> Task: Ensembling Sentence Transformers for Assessing Maximum Possible Profit and Loss from Online Financial Posts SohomGhoshFidelity Investments - Sudip KumarNaskarJadavpur University + Sudip KumarNaskarJadavpur University 111-115 Using insights from social media for making investment decisions has become mainstream. However, in the current era of information ex- plosion, it is essential to mine high-quality so- cial media posts. The FinNLP-2022 ERAI task deals with assessing Maximum Possible Profit (MPP) and Maximum Loss (ML) from social me- dia posts relating to finance. In this paper, we present our team LIPI’s approach. We ensem- bled a range of Sentence Transformers to quan- tify these posts. Unlike other teams with vary- ing performances across different metrics, our system performs consistently well. Our code is available here https://github.com/sohomghosh/LIPI_ERAI_ FinNLP_EMNLP- 2022/ 2022.finnlp-1.13 @@ -298,7 +298,7 @@ How Can a Teacher Make Learning From Sparse Data Softer? Application to Business Relation Extraction HadjerKhaldiIRIT - University of Paul Sabatier/ Geotrend - FarahBenamaraUniversity of Toulouse + FarahBenamaraUniversity of Toulouse CamillePradelGeotrend NathalieAussenac-GillesCnrs - Irit 170-177 @@ -361,7 +361,7 @@ <fixed-case>F</fixed-case>in<fixed-case>S</fixed-case>im4-<fixed-case>ESG</fixed-case> Shared Task: Learning Semantic Similarities for the Financial Domain. Extended edition to <fixed-case>ESG</fixed-case> insights JuyeonKangFortia Financial Solutions - IsmailEl MaaroufImprevicible + IsmailEl MaaroufImprevicible 211-217 This paper describes FinSim4-ESG 1 shared task organized in the 4th FinNLP workshopwhich is held in conjunction with the IJCAI-ECAI-2022 confer- enceThis year, the FinSim4 is extended to the Environment, Social and Government (ESG) insights and proposes two subtasks, one for ESG Taxonomy Enrichment and the other for Sustainable Sentence Prediction. Among the 28 teams registered to the shared task, a total of 8 teams submitted their systems results and 6 teams also submitted a paper to describe their method. The winner of each subtask shows good performance results of 0.85% and 0.95% in terms of accuracy, respectively. 2022.finnlp-1.28 @@ -372,7 +372,7 @@ Using Contextual Sentence Analysis Models to Recognize <fixed-case>ESG</fixed-case> Concepts ElvysLinhares PontesUniversity of La Rochelle MohamedBen JannetLaboratoire d’Informatique pour la Mécanique et les Sciences de l’Ingénieur - Jose G.MorenoPaul Sabatier University - IRIT + Jose G.MorenoPaul Sabatier University - IRIT AntoineDoucetUniversity of La Rochelle 218-223 This paper summarizes the joint participation of the Trading Central Labs and the L3i laboratory of the University of La Rochelle on both sub-tasks of the Shared Task FinSim-4 evaluation campaign. The first sub-task aims to enrich the ‘Fortia ESG taxonomy’ with new lexicon entries while the second one aims to classify sentences to either ‘sustainable’ or ‘unsustainable’ with respect to ESG (Environment, Social and Governance) related factors. For the first sub-task, we proposed a model based on pre-trained Sentence-BERT models to project sentences and concepts in a common space in order to better represent ESG concepts. The official task results show that our system yields a significant performance improvement compared to the baseline and outperforms all other submissions on the first sub-task. For the second sub-task, we combine the RoBERTa model with a feed-forward multi-layer perceptron in order to extract the context of sentences and classify them. Our model achieved high accuracy scores (over 92%) and was ranked among the top 5 systems. @@ -420,7 +420,7 @@ Ranking Environment, Social And Governance Related Concepts And Assessing Sustainability Aspect of Financial Texts SohomGhoshFidelity Investments - Sudip KumarNaskarJadavpur University + Sudip KumarNaskarJadavpur University 243-249 Understanding Environmental, Social, and Governance (ESG) factors related to financial products has become extremely important for investors. However, manually screening through the corporate policies and reports to understand their sustainability aspect is extremely tedious. In this paper, we propose solutions to two such problems which were released as shared tasks of the FinNLP workshop of the IJCAI-2022 conference. Firstly, we train a Sentence Transformers based model which automatically ranks ESG related concepts for a given unknown term. Secondly, we fine-tune a RoBERTa model to classify financial texts as sustainable or not. Out of 26 registered teams, our team ranked 4th in sub-task 1 and 3rd in sub-task 2. The source code can be accessed from https://github.com/sohomghosh/Finsim4_ESG 2022.finnlp-1.33 diff --git a/data/xml/2022.fl4nlp.xml b/data/xml/2022.fl4nlp.xml index a159fd18c4..85e1425392 100644 --- a/data/xml/2022.fl4nlp.xml +++ b/data/xml/2022.fl4nlp.xml @@ -3,7 +3,7 @@ Proceedings of the First Workshop on Federated Learning for Natural Language Processing (FL4NLP 2022) - Bill YuchenLin + Bill YuchenLin ChaoyangHe ChulinXie FatemehsadatMireshghallah @@ -56,7 +56,7 @@ Adaptive Differential Privacy for Language Model Training XinweiWu LiGong - DeyiXiong + DeyiXiong 21-26 Although differential privacy (DP) can protect language models from leaking privacy, its indiscriminative protection on all data points reduces its practical utility. Previous works improve DP training by discriminating privacy and non-privacy data. But these works rely on datasets with prior privacy information, which is not available in real-world scenarios. In this paper, we propose an Adaptive Differential Privacy (ADP) framework for language modeling without resorting to prior privacy information. We estimate the probability that a linguistic item contains privacy based on a language model. We further propose a new Adam algorithm that adjusts the degree of differential privacy noise injected to the language model according to the estimated privacy probabilities. Experiments demonstrate that our ADP improves differentially private language modeling to achieve good protection from canary attackers. 2022.fl4nlp-1.3 diff --git a/data/xml/2022.flp.xml b/data/xml/2022.flp.xml index 538ba681d3..58ac3744c6 100644 --- a/data/xml/2022.flp.xml +++ b/data/xml/2022.flp.xml @@ -43,7 +43,7 @@ Transfer Learning Parallel Metaphor using Bilingual Embeddings - MariaBergerRuhr University Bochum + MariaBergerRuhr University Bochum 13-23 Automated metaphor detection in languages other than English is highly restricted as training corpora are comparably rare. One way to overcome this problem is transfer learning. This paper gives an overview on transfer learning techniques applied to NLP. We first introduce types of transfer learning, then we present work focusing on: i) transfer learning with cross-lingual embeddings; ii) transfer learning in machine translation; and iii) transfer learning using pre-trained transformer models. The paper is complemented by first experiments that make use of bilingual embeddings generated from different sources of parallel data: We i) present the preparation of a parallel Gold corpus; ii) examine the embeddings spaces to search for metaphoric words cross-lingually; iii) run first experiments in transfer learning German metaphor from English labeled data only. Results show that finding data sources for bilingual embeddings training and the vocabulary covered by these embeddings is critical for learning metaphor cross-lingually. 2022.flp-1.3 @@ -127,7 +127,7 @@ Distribution-Based Measures of Surprise for Creative Language: Experiments with Humor and Metaphor - Razvan C.BunescuDepartment of Computer Science, University of North Carolina at Charlotte + Razvan C.BunescuDepartment of Computer Science, University of North Carolina at Charlotte Oseremen O.UduehiSchool of EECS, Ohio University 68-78 Novelty or surprise is a fundamental attribute of creative output. As such, we postulate that a writer’s creative use of language leads to word choices and, more importantly, corresponding semantic structures that are unexpected for the reader. In this paper we investigate measures of surprise that rely solely on word distributions computed by language models and show empirically that creative language such as humor and metaphor is strongly correlated with surprise. Surprisingly at first, information content is observed to be at least as good a predictor of creative language as any of the surprise measures investigated. However, the best prediction performance is obtained when information and surprise measures are combined, showing that surprise measures capture an aspect of creative language that goes beyond information content. @@ -155,7 +155,7 @@ YaoFuThe University of Edinburgh ValentinaPyatkinBar-Ilan University IanMagnussonAllen Institute for AI - BhavanaDalvi MishraAllen Institute for AI + BhavanaDalvi MishraAllen Institute for AI PeterClarkAllen Institute for AI 84-93 Figurative language (e.g., “he flew like the wind”) is challenging to understand, as it is hard to tell what implicit information is being conveyed from the surface form alone. We hypothesize that to perform this task well, the reader needs to mentally elaborate the scene being described to identify a sensible meaning of the language. We present DREAM-FLUTE, a figurative language understanding system that does this, first forming a “mental model” of situations described in a premise and hypothesis before making an entailment/contradiction decision and generating an explanation. DREAM-FLUTE uses an existing scene elaboration model, DREAM, for constructing its “mental model.” In the FigLang2022 Shared Task evaluation, DREAM-FLUTE achieved (joint) first place (Acc@60=63.3%), and can perform even better with ensemble techniques, demonstrating the effectiveness of this approach. More generally, this work suggests that adding a reflective component to pretrained language models can improve their performance beyond standard fine-tuning (3.3% improvement in Acc@60). @@ -182,7 +182,7 @@ GiacomoAnerdiDepartment of Advanced Computing Sciences, Maastricht University PedroJeurisDepartment of Advanced Computing Sciences, Maastricht University Marijnten ThijDepartment of Advanced Computing Sciences, Maastricht University - RizaBatista-NavarroDepartment of Computer Science, The University of Manchester + RizaBatista-NavarroDepartment of Computer Science, The University of Manchester 100-110 Idiomatic expressions (or idioms) are phrases where the meaning of the phrase cannot be determined from the meaning of the individual words in the expression. Translating idioms between languages is therefore a challenging task. Transformer models based on contextual embeddings have advanced the state-of-the-art across many domains in the field of natural language processing. While research using transformers has advanced both idiom detection as well as idiom disambiguation, idiom translation has not seen a similar advancement. In this work, we investigate two approaches to fine-tuning a pretrained Text-to-Text Transfer Transformer (T5) model to perform idiom translation from English to German. The first approach directly translates English idiom-containing sentences to German, while the second is underpinned by idiom paraphrasing, firstly paraphrasing English idiomatic expressions to their simplified English versions before translating them to German. Results of our evaluation show that each of the approaches is able to generate adequate translations. 2022.flp-1.14 diff --git a/data/xml/2022.fnp.xml b/data/xml/2022.fnp.xml index db0c3fd3b9..bbb5110c9f 100644 --- a/data/xml/2022.fnp.xml +++ b/data/xml/2022.fnp.xml @@ -21,7 +21,7 @@ <fixed-case>F</fixed-case>in<fixed-case>RAD</fixed-case>: Financial Readability Assessment Dataset - 13,000+ Definitions of Financial Terms for Measuring Readability SohomGhosh ShovonSengupta - SudipNaskar + SudipNaskar Sunny KumarSingh 1–9 In today’s world, the advancement and spread of the Internet and digitalization have resulted in most information being openly accessible. This holds true for financial services as well. Investors make data driven decisions by analysing publicly available information like annual reports of listed companies, details regarding asset allocation of mutual funds, etc. Many a time these financial documents contain unknown financial terms. In such cases, it becomes important to look at their definitions. However, not all definitions are equally readable. Readability largely depends on the structure, complexity and constituent terms that make up a definition. This brings in the need for automatically evaluating the readability of definitions of financial terms. This paper presents a dataset, FinRAD consisting of financial terms, their definitions and embeddings. In addition to standard readability scores (like “Flesch Reading Index (FRI)”, “Automated Readability Index (ARI)”, “SMOG Index Score (SIS)”,“Dale-Chall formula (DCF)”, etc.), it also contains the readability scores (AR) assigned based on sources from which the terms have been collected. We manually inspect a sample from it to ensure the quality of the assignment. Subsequently, we prove that the rule-based standard readability scores (like “Flesch Reading Index (FRI)”, “Automated Readability Index (ARI)”, “SMOG Index Score (SIS)”,“Dale-Chall formula (DCF)”, etc.) do not correlate well with the manually assigned binary readability scores of definitions of financial terms. Finally, we present a few neural baselines using transformer based architecture to automatically classify these definitions as readable or not. Pre-trained FinBERT model fine-tuned on FinRAD corpus performs the best (AU-ROC = 0.9927, F1 = 0.9610). This corpus can be downloaded from https://github.com/sohomghosh/FinRAD_Financial_Readability_Assessment_Dataset. @@ -76,13 +76,13 @@ MahmoudEl-Haj NadhemZmandar PaulRayson - AhmedAbuRa’ed + AhmedAbuRa’ed MarinaLitvak NikiforosPittaras GeorgeGiannakopoulos ArisKosmopoulos BlancaCarbajo-Coronado - AntonioMoreno-Sandoval + AntonioMoreno-Sandoval 43–52 This paper presents the results and findings of the Financial Narrative Summarisation Shared Task on summarising UK, Greek and Spanish annual reports. The shared task was organised as part of the Financial Narrative Processing 2022 Workshop (FNP 2022 Workshop). The Financial Narrative summarisation Shared Task (FNS-2022) has been running since 2020 as part of the Financial Narrative Processing (FNP) workshop series (El-Haj et al., 2022; El-Haj et al., 2021; El-Haj et al., 2020b; El-Haj et al., 2019c; El-Haj et al., 2018). The shared task included one main task which is the use of either abstractive or extractive automatic summarisers to summarise long documents in terms of UK, Greek and Spanish financial annual reports. This shared task is the third to target financial documents. The data for the shared task was created and collected from publicly available annual reports published by firms listed on the Stock Exchanges of UK, Greece and Spain. A total number of 14 systems from 7 different teams participated in the shared task. 2022.fnp-1.6 @@ -150,7 +150,7 @@ SandraBellato BlancaCarbajo Coronado MahmoudEl-Haj - IsmailEl Maarouf + IsmailEl Maarouf MeiGan AnaGisbert AntonioMoreno Sandoval @@ -176,7 +176,7 @@ CataldoMusto MarcoDeGemmis GeorgiosLekkas - GiovanniSemeraro + GiovanniSemeraro 95–99 In this paper, we introduce the results of our submitted system to the FinTOC 2022 task. We address the task using a two-stage process: first, we detect titles using Document Image Analysis, then we train a supervised model for the hierarchical level prediction. We perform Document Image Analysis using a pre-trained Faster R-CNN on the PublyaNet dataset. We fine-tuned the model on the FinTOC 2022 training set. We extract orthographic and layout features from detected titles and use them to train a Random Forest model to predict the title level. The proposed system ranked #1 on both Title Detection and the Table of Content extraction tasks for Spanish. The system ranked #3 on both the two subtasks for English and French. 2022.fnp-1.14 @@ -238,7 +238,7 @@ <fixed-case>LIPI</fixed-case> at <fixed-case>F</fixed-case>in<fixed-case>C</fixed-case>ausal 2022: Mining Causes and Effects from Financial Texts SohomGhosh - SudipNaskar + SudipNaskar 121–123 While reading financial documents, investors need to know the causes and their effects. This empowers them to make data-driven decisions. Thus, there is a need to develop an automated system for extracting causes and their effects from financial texts using Natural Language Processing. In this paper, we present the approach our team LIPI followed while participating in the FinCausal 2022 shared task. This approach is based on the winning solution of the first edition of FinCausal held in the year 2020. 2022.fnp-1.20 @@ -281,7 +281,7 @@ <fixed-case>MNLP</fixed-case> at <fixed-case>F</fixed-case>in<fixed-case>C</fixed-case>ausal2022: Nested <fixed-case>NER</fixed-case> with a Generative Model JooyeonLee Luan HuyPham - ÖzlemUzuner + ÖzlemUzuner 135–138 This paper describes work performed for the FinCasual 2022 Shared Task “Financial Document Causality Detection” (FinCausal 2022). As the name implies, the task involves extraction of casual and consequential elements from financial text. Our approach focuses employing Nested NER using the Text-to-Text Transformer (T5) generative transformer models while applying different combinations of datasets and tagging methods. Our system reports accuracy of 79% in Exact Match comparison and F-measure score of 92% token level measurement. 2022.fnp-1.24 diff --git a/data/xml/2022.games.xml b/data/xml/2022.games.xml index d265fb6e71..3226a24183 100644 --- a/data/xml/2022.games.xml +++ b/data/xml/2022.games.xml @@ -40,7 +40,7 @@ Less Text, More Visuals: Evaluating the Onboarding Phase in a <fixed-case>GWAP</fixed-case> for <fixed-case>NLP</fixed-case> FatimaAlthani ChrisMadge - MassimoPoesio + MassimoPoesio 17–27 Games-with-a-purpose find attracting players a challenge. To improve player recruitment, we explored two game design elements that can increase player engagement during the onboarding phase; a narrative and a tutorial. In a qualitative study with 12 players of linguistic and language learning games, we examined the effect of presentation format on players’ engagement. Our reflexive thematic analysis found that in the onboarding phase of a GWAP for NLP, presenting players with visuals is expected and pre- senting too much text overwhelms them. Furthermore, players found that the instructions they were presented with lacked linguistic context. Additionally, the tutorial and game interface required refinement as the feedback is unsupportive and the graphics were not clear. 2022.games-1.3 diff --git a/data/xml/2022.gebnlp.xml b/data/xml/2022.gebnlp.xml index 33813fe299..b21dbf05b2 100644 --- a/data/xml/2022.gebnlp.xml +++ b/data/xml/2022.gebnlp.xml @@ -5,7 +5,7 @@ Proceedings of the 4th Workshop on Gender Bias in Natural Language Processing (GeBNLP) ChristianHardmeier ChristineBasta - Marta R.Costa-jussà + Marta R.Costa-jussà GabrielStanovsky HilaGonen Association for Computational Linguistics @@ -23,7 +23,7 @@ Analyzing Hate Speech Data along Racial, Gender and Intersectional Axes AntonisMaronikolakis PhilipBaader - HinrichSchütze + HinrichSchütze 1-7 To tackle the rising phenomenon of hate speech, efforts have been made towards data curation and analysis. When it comes to analysis of bias, previous work has focused predominantly on race. In our work, we further investigate bias in hate speech datasets along racial, gender and intersectional axes. We identify strong bias against African American English (AAE), masculine and AAE+Masculine tweets, which are annotated as disproportionately more hateful and offensive than from other demographics. We provide evidence that BERT-based models propagate this bias and show that balancing the training data for these protected attributes can lead to fairer models with regards to gender, but not race. 2022.gebnlp-1.1 @@ -35,7 +35,7 @@ JialiLi ShuchengZhu YingLiu - PengyuanLiu + PengyuanLiu 8-16 Gender is a construction in line with social perception and judgment. An important means of this construction is through languages. When natural language processing tools, such as word embeddings, associate gender with the relevant categories of social perception and judgment, it is likely to cause bias and harm to those groups that do not conform to the mainstream social perception and judgment. Using 12,251 Chinese word embeddings as intermedium, this paper studies the relationship between social perception and judgment categories and gender. The results reveal that these grammatical gender-neutral Chinese word embeddings show a certain gender bias, which is consistent with the mainstream society’s perception and judgment of gender. Men are judged by their actions and perceived as bad, easily-disgusted, bad-tempered and rational roles while women are judged by their appearances and perceived as perfect, either happy or sad, and emotional roles. 2022.gebnlp-1.2 @@ -58,7 +58,7 @@ LucyHavens MelissaTerras BenjaminBach - BeatriceAlex + BeatriceAlex 30-57 Mitigating harms from gender biased language in Natural Language Processing (NLP) systems remains a challenge, and the situated nature of language means bias is inescapable in NLP data. Though efforts to mitigate gender bias in NLP are numerous, they often vaguely define gender and bias, only consider two genders, and do not incorporate uncertainty into models. To address these limitations, in this paper we present a taxonomy of gender biased language and apply it to create annotated datasets. We created the taxonomy and annotated data with the aim of making gender bias in language transparent. If biases are communicated clearly, varieties of biased language can be better identified and measured. Our taxonomy contains eleven types of gender biases inclusive of people whose gender expressions do not fit into the binary conceptions of woman and man, and whose gender differs from that they were assigned at birth, while also allowing annotators to document unknown gender information. The taxonomy and annotated data will, in future work, underpin analysis and more equitable language model development. 2022.gebnlp-1.4 @@ -85,7 +85,7 @@ Gender Biases and Where to Find Them: Exploring Gender Bias in Pre-Trained Transformer-based Language Models Using Movement Pruning PrzemyslawJoniak - AkikoAizawa + AkikoAizawa 67-73 Language model debiasing has emerged as an important field of study in the NLP community. Numerous debiasing techniques were proposed, but bias ablation remains an unaddressed issue. We demonstrate a novel framework for inspecting bias in pre-trained transformer-based language models via movement pruning. Given a model and a debiasing objective, our framework finds a subset of the model containing less bias than the original model. We implement our framework by pruning the model while fine-tuning it on the debasing objective. Optimized are only the pruning scores – parameters coupled with the model’s weights that act as gates. We experiment with pruning attention heads, an important building block of transformers: we prune square blocks, as well as establish a new way of pruning the entire heads. Lastly, we demonstrate the usage of our framework using gender bias, and based on our findings, we propose an improvement to an existing debiasing method. Additionally, we re-discover a bias-performance trade-off: the better the model performs, the more bias it contains. 2022.gebnlp-1.6 @@ -120,7 +120,7 @@ Afra FeyzaAkyürek Muhammed YusufKocyigit SejinPaik - Derry TantiWijaya + Derry TantiWijaya 76-76 Researchers have devised numerous ways to quantify social biases vested in pretrained language models. As some language models are capable of generating coherent completions given a set of textual prompts, several prompting datasets have been proposed to measure biases between social groups—posing language generation as a way of identifying biases. In this opinion paper, we analyze how specific choices of prompt sets, metrics, automatic tools and sampling strategies affect bias results. We find out that the practice of measuring biases through text completion is prone to yielding contradicting results under different experiment settings. We additionally provide recommendations for reporting biases in open-ended language generation for a more complete outlook of biases exhibited by a given language model. Code to reproduce the results is released under https://github.com/feyzaakyurek/bias-textgen. 2022.gebnlp-1.9 @@ -145,7 +145,7 @@ Michael HenryTessler NicoleDubosh KatherineHiller - RogerLevy + RogerLevy 86-93 Though approximately 50% of medical school graduates today are women, female physicians tend to be underrepresented in senior positions, make less money than their male counterparts and receive fewer promotions. There is a growing body of literature demonstrating gender bias in various forms of evaluation in medicine, but this work was mainly conducted by looking for specific words using fixed dictionaries such as LIWC and focused on global assessments of performance such as recommendation letters. We use a dataset of written and quantitative assessments of medical student performance on individual shifts of work, collected across multiple institutions, to investigate the extent to which gender bias exists in a day-to-day context for medical students. We investigate differences in the narrative comments given to male and female students by both male or female faculty assessors, using a fine-tuned BERT model. This allows us to examine whether groups are written about in systematically different ways, without relying on hand-crafted wordlists or topic models. We compare these results to results from the traditional LIWC method and find that, although we find no evidence of group-level gender bias in this dataset, terms related to family and children are used more in feedback given to women. 2022.gebnlp-1.11 @@ -157,7 +157,7 @@ BeatriceSavoldi MarcoGaido LuisaBentivogli - MatteoNegri + MatteoNegri MarcoTurchi 94-111 Due to the complexity of bias and the opaque nature of current neural approaches, there is a rising interest in auditing language technologies. In this work, we contribute to such a line of inquiry by exploring the emergence of gender bias in Speech Translation (ST). As a new perspective, rather than focusing on the final systems only, we examine their evolution over the course of training. In this way, we are able to account for different variables related to the learning dynamics of gender translation, and investigate when and how gender divides emerge in ST. Accordingly, for three language pairs (en ? es, fr, it) we compare how ST systems behave for masculine and feminine translation at several levels of granularity. We find that masculine and feminine curves are dissimilar, with the feminine one being characterized by more erratic behaviour and late improvements over the course of training. Also, depending on the considered phenomena, their learning trends can be either antiphase or parallel. Overall, we show how such a progressive analysis can inform on the reliability and time-wise acquisition of gender, which is concealed by static evaluations and standard metrics. @@ -258,7 +258,7 @@ Occupational Biases in <fixed-case>N</fixed-case>orwegian and Multilingual Language Models SamiaTouileb - LiljaØvrelid + LiljaØvrelid ErikVelldal 200-211 In this paper we explore how a demographic distribution of occupations, along gender dimensions, is reflected in pre-trained language models. We give a descriptive assessment of the distribution of occupations, and investigate to what extent these are reflected in four Norwegian and two multilingual models. To this end, we introduce a set of simple bias probes, and perform five different tasks combining gendered pronouns, first names, and a set of occupations from the Norwegian statistics bureau. We show that language specific models obtain more accurate results, and are much closer to the real-world distribution of clearly gendered occupations. However, we see that none of the models have correct representations of the occupations that are demographically balanced between genders. We also discuss the importance of the training data on which the models were trained on, and argue that template-based bias probes can sometimes be fragile, and a simple alteration in a template can change a model’s behavior. @@ -285,7 +285,7 @@ <fixed-case>H</fixed-case>etero<fixed-case>C</fixed-case>orpus: A Corpus for Heteronormative Language Detection JuanVásquez - GemmaBel-Enguix + GemmaBel-Enguix Scott ThomasAndersen Sergio-LuisOjeda-Trueba 225-234 @@ -302,7 +302,7 @@ AshleyOh SanikaNatu SwethaGangu - Alan W.Black + Alan W.Black EmmaStrubell 235-243 Films are a rich source of data for natural language processing. OpenSubtitles (Lison and Tiedemann, 2016) is a popular movie script dataset, used for training models for tasks such as machine translation and dialogue generation. However, movies often contain biases that reflect society at the time, and these biases may be introduced during pre-training and influence downstream models. We perform sentiment analysis on template infilling (Kurita et al., 2019) and the Sentence Embedding Association Test (May et al., 2019) to measure how BERT-based language models change after continued pre-training on OpenSubtitles. We consider gender bias as a primary motivating case for this analysis, while also measuring other social biases such as disability. We show that sentiment analysis on template infilling is not an effective measure of bias due to the rarity of disability and gender identifying tokens in the movie dialogue. We extend our analysis to a longitudinal study of bias in film dialogue over the last 110 years and find that continued pre-training on OpenSubtitles encodes additional bias into BERT. We show that BERT learns associations that reflect the biases and representation of each film era, suggesting that additional care must be taken when using historical data. @@ -337,7 +337,7 @@ JaimeenAhn HwaranLee JinhwaKim - AliceOh + AliceOh 266-272 Knowledge distillation is widely used to transfer the language understanding of a large model to a smaller model. However, after knowledge distillation, it was found that the smaller model is more biased by gender compared to the source large model. This paper studies what causes gender bias to increase after the knowledge distillation process. Moreover, we suggest applying a variant of the mixup on knowledge distillation, which is used to increase generalizability during the distillation process, not for augmentation. By doing so, we can significantly reduce the gender bias amplification after knowledge distillation. We also conduct an experiment on the GLUE benchmark to demonstrate that even if the mixup is applied, it does not have a significant adverse effect on the model’s performance. 2022.gebnlp-1.27 diff --git a/data/xml/2022.gem.xml b/data/xml/2022.gem.xml index aad765c423..a4a30eaf7f 100644 --- a/data/xml/2022.gem.xml +++ b/data/xml/2022.gem.xml @@ -25,8 +25,8 @@ Improving abstractive summarization with energy-based re-ranking DiogoPernes - AfonsoMendes - André F. T.Martins + AfonsoMendes + André F. T.Martins 1-17 Current abstractive summarization systems present important weaknesses which prevent their deployment in real-world applications, such as the omission of relevant information and the generation of factual inconsistencies (also known as hallucinations). At the same time, automatic evaluation metrics such as CTC scores (Deng et al., 2021) have been recently proposed that exhibit a higher correlation with human judgments than traditional lexical-overlap metrics such as ROUGE. In this work, we intend to close the loop by leveraging the recent advances in summarization metrics to create quality-aware abstractive summarizers. Namely, we propose an energy-based model that learns to re-rank summaries according to one or a combination of these metrics. We experiment using several metrics to train our energy-based re-ranker and show that it consistently improves the scores achieved by the predicted summaries. Nonetheless, human evaluation results show that the re-ranking approach should be used with care for highly abstractive summaries, as the available metrics are not yet sufficiently reliable for this purpose. 2022.gem-1.1 @@ -67,8 +67,8 @@ KaamraanKhan Avinash KumarSingh SubhasishGhosh - TapasNayak - GirishPalshikar + TapasNayak + GirishPalshikar IndrajitBhattacharya 43-53 We explore the task of automated generation of technical interview questions from a given textbook. Such questions are different from those for reading comprehension studied in question generation literature. We curate a context based interview questions data set for Machine Learning and Deep Learning from two popular textbooks. We first explore the possibility of using a large generative language model (GPT-3) for this task in a zero shot setting. We then evaluate the performance of smaller generative models such as BART fine-tuned on weakly supervised data obtained using GPT-3 and hand-crafted templates. We deploy an automatic question importance assignment technique to figure out suitability of a question in a technical interview. It improves the evaluation results in many dimensions. We dissect the performance of these models for this task and also scrutinize the suitability of questions generated by them for use in technical interviews. @@ -107,7 +107,7 @@ JohnGlover FedericoFancellu VasudevanJagannathan - Matthew R.Gormley + Matthew R.Gormley ThomasSchaaf 97-105 Scoring the factuality of a generated summary involves measuring the degree to which a target text contains factual information using the input document as support. Given the similarities in the problem formulation, previous work has shown that Natural Language Inference models can be effectively repurposed to perform this task. As these models are trained to score entailment at a sentence level, several recent studies have shown that decomposing either the input document or the summary into sentences helps with factuality scoring. But is fine-grained decomposition always a winning strategy? In this paper we systematically compare different granularities of decomposition - from document to sub-sentence level, and we show that the answer is no. Our results show that incorporating additional context can yield improvement, but that this does not necessarily apply to all datasets. We also show that small changes to previously proposed entailment-based scoring methods can result in better performance, highlighting the need for caution in model and methodology selection for downstream tasks. @@ -170,7 +170,7 @@ EduardoCalò Elzevan der Werf AlbertGatt - Keesvan Deemter + Keesvan Deemter 148-171 Logic-to-text generation is an important yet underrepresented area of natural language generation (NLG). In particular, most previous works on this topic lack sound evaluation. We address this limitation by building and evaluating a system that generates high-quality English text given a first-order logic (FOL) formula as input. We start by analyzing the performance of Ranta (2011)’s system. Based on this analysis, we develop an extended version of the system, which we name LoLa, that performs formula simplification based on logical equivalences and syntactic transformations. We carry out an extensive evaluation of LoLa using standard automatic metrics and human evaluation. We compare the results against a baseline and Ranta (2011)’s system. The results show that LoLa outperforms the other two systems in most aspects. 2022.gem-1.13 @@ -184,7 +184,7 @@ Štěpán LarsLaichter ArabellaSinclair Margotvan der Goot - RaquelFernandez + RaquelFernandez SandroPezzelle 172-188 To be trusted and perceived as natural and coherent, conversational systems must adapt to the language of their users. While personalized dialogue is a promising direction, controlling generation for fine-grained language features remains a challenge in this approach. A recent line of research showed the effectiveness of leveraging pre-trained language models toward adapting to a text’s topic or sentiment. In this study, we build on these approaches and focus on a higher-level dimension of language variation: speakers’ age. We frame the task as a dialogue response generation, and test methods based on bag-of-words (BoW) and neural discriminators (Disc) to condition the output of GPT-2 and DialoGPT without altering the parameters of the language models. We show that Disc models achieve a higher degree of detectable control than BoW models based on automatic evaluation. In contrast, humans can partially detect age differences in BoW but not Disc responses. Since BoW responses are deemed better than Disc ones by humans, simple controllable methods thus appear to be a better tradeoff between adaptation and language quality. Our work confirms the challenges of adapting to higher-level dimensions of language variation. Moreover, it highlights the need to evaluate natural language generation thoroughly. @@ -260,7 +260,7 @@ Unsupervised Token-level Hallucination Detection from Summary Generation By-products AndreasMarfurt - JamesHenderson + JamesHenderson 248-261 Hallucinations in abstractive summarization are model generations that are unfaithful to the source document. Current methods for detecting hallucinations operate mostly on noun phrases and named entities, and restrict themselves to the XSum dataset, which is known to have hallucinations in 3 out of 4 training examples (Maynez et al., 2020). We instead consider the CNN/DailyMail dataset where the summarization model has not seen abnormally many hallucinations during training. We automatically detect candidate hallucinations at the token level, irrespective of its part of speech. Our detection comes essentially for free, as we only use information the model already produces during generation of the summary. This enables practitioners to jointly generate a summary and identify possible hallucinations, with minimal overhead. We repurpose an existing factuality dataset and create our own token-level annotations. The evaluation on these two datasets shows that our model achieves better precision-recall tradeoffs than its competitors, which additionally require a model forward pass. 2022.gem-1.21 @@ -272,8 +272,8 @@ AndreasMarfurt AshleyThornton DavidSylvan - Lonnekevan der Plas - JamesHenderson + Lonnekevan der Plas + JamesHenderson 262-275 A wide variety of tasks have been framed as text-to-text tasks to allow processing by sequence-to-sequence models. We propose a new task of generating a semi-structured interpretation of a source document. The interpretation is semi-structured in that it contains mandatory and optional fields with free-text information. This structure is surfaced by human annotations, which we standardize and convert to text format. We then propose an evaluation technique that is generally applicable to any such semi-structured annotation, called equivalence classes evaluation. The evaluation technique is efficient and scalable; it creates a large number of evaluation instances from a comparably cheap clustering of the free-text information by domain experts. For our task, we release a dataset about the monetary policy of the Federal Reserve. On this corpus, our evaluation shows larger differences between pretrained models than standard text generation metrics. 2022.gem-1.22 @@ -320,8 +320,8 @@ On reporting scores and agreement for error annotation tasks - MajaPopović - AnyaBelz + MajaPopović + AnyaBelz 306-315 This work examines different ways of aggregating scores for error annotation in MT outputs: raw error counts, error counts normalised over total number of words (word percentage’), and error counts normalised over total number of errors (error percentage’). We use each of these three scores to calculate inter-annotator agreement in the form of Krippendorff’s alpha and Pearson’s r and compare the obtained numbers, overall and separately for different types of errors. While each score has its advantages depending on the goal of the evaluation, we argue that the best way of estimating inter-annotator agreement using such numbers are raw counts. If the annotation process ensures that the total number of words cannot differ among the annotators (for example, due to adding omission symbols), normalising over number of words will lead to the same conclusions. In contrast, total number of errors is very subjective because different annotators often perceive different amount of errors in the same text, therefore normalising over this number can indicate lower agreements. 2022.gem-1.26 @@ -360,7 +360,7 @@ Most <fixed-case>NLG</fixed-case> is Low-Resource: here’s what we can do about it - David M.Howcroft + David M.Howcroft DimitraGkatzia 336-350 Many domains and tasks in natural language generation (NLG) are inherently ‘low-resource’, where training data, tools and linguistic analyses are scarce. This poses a particular challenge to researchers and system developers in the era of machine-learning-driven NLG. In this position paper, we initially present the challenges researchers & developers often encounter when dealing with low-resource settings in NLG. We then argue that it is unsustainable to collect large aligned datasets or build large language models from scratch for every possible domain due to cost, labour, and time constraints, so researching and developing methods and resources for low-resource settings is vital. We then discuss current approaches to low-resource NLG, followed by proposed solutions and promising avenues for future work in NLG for low-resource settings. @@ -397,7 +397,7 @@ A Survey of Recent Error Annotation Schemes for Automatically Generated Text RudaliHuidrom - AnyaBelz + AnyaBelz 383-398 While automatically computing numerical scores remains the dominant paradigm in NLP system evaluation, error analysis is receiving increasing attention, with numerous error annotation schemes being proposed for automatically generated text. However, there is little agreement about what error annotation schemes should look like, how many different types of errors should be distinguished and at what level of granularity. In this paper, our aim is to map out recent work on annotating errors in automatically generated text, with a particular focus on error taxonomies. We describe our systematic paper selection process, and survey the error annotation schemes reported in the papers, drawing out similarities and differences between them. Finally, we characterise the issues that would make it difficult to move from the current situation to a standardised error taxonomy for annotating errors in automatically generated text. 2022.gem-1.33 @@ -408,7 +408,7 @@ What’s in a (dataset’s) name? The case of <fixed-case>B</fixed-case>ig<fixed-case>P</fixed-case>atent SilviaCasola - AlbertoLavelli + AlbertoLavelli HoracioSaggion 399-404 Sharing datasets and benchmarks has been crucial for rapidly improving Natural Language Processing models and systems. Documenting datasets’ characteristics (and any modification introduced over time) is equally important to avoid confusion and make comparisons reliable. Here, we describe the case of BigPatent, a dataset for patent summarization that exists in at least two rather different versions under the same name. While previous literature has not clearly distinguished among versions, their differences do not only lay on a surface level but also modify the dataset’s core nature and, thus, the complexity of the summarization task. While this paper describes a specific case, we aim to shed light on new challenges that might emerge in resource sharing and advocate for comprehensive documentation of datasets and models. @@ -458,7 +458,7 @@ HwanheeLee CheoneumPark SeunghyunYoon - TrungBui + TrungBui FranckDernoncourt JuaeKim KyominJung @@ -485,7 +485,7 @@ Error Analysis of <fixed-case>T</fixed-case>o<fixed-case>TT</fixed-case>o Table-to-Text Neural <fixed-case>NLG</fixed-case> Models BarkaviSundararajan - SomayajuluSripada + SomayajuluSripada EhudReiter 456-470 We report error analysis of outputs from seven Table-to-Text generation models fine-tuned on ToTTo, an open-domain English language dataset. A manual error annotation of a subset of outputs (a total of 5,278 sentences) belonging to the topic of Politics generated by these seven models has been carried out. Our error annotation focused on eight categories of errors. The error analysis shows that more than 45% of sentences from each of the seven models have been error-free. It uncovered some of the specific classes of errors such as WORD errors that are the dominant errors in all the seven models, NAME and NUMBER errors are more committed by two of the GeM benchmark models, whereas DATE-DIMENSION and OTHER category of errors are more common in our Table-to-Text models. @@ -524,7 +524,7 @@ MaximeDe Bruyn EhsanLotfi JeskaBuhmann - WalterDaelemans + WalterDaelemans 494-508 What do language models know about our world? This question is hard to answer but important to get right. To this end, we introduce 20Q, a novel benchmark using the Twenty Questions game to evaluate world knowledge and common sense of language models. Thanks to our overlap-free benchmark, language models learn the game of Twenty Questions without learning relevant knowledge for the test set. We uncover two intuitive factors influencing the world knowledge of language models: the size of the model and the topic frequency in the pre-training data. Moreover, we show that in-context learning is inefficient for evaluating language models’ world knowledge — fine-tuning is necessary to show their true capabilities. Lastly, our results show room for improvement to enhance the world knowledge and common sense of large language models. A potential solution would be to up-sample unfrequent topics in the pre-training of language models. 2022.gem-1.46 @@ -536,7 +536,7 @@ EhsanLotfi MaximeDe Bruyn JeskaBuhmann - WalterDaelemans + WalterDaelemans 509-519 Generative conversational agents are known to suffer from problems like inconsistency and hallucination, and a big challenge in studying these issues remains evaluation: they are not properly reflected in common text generation metrics like perplexity or BLEU, and alternative implicit methods like semantic similarity or NLI labels can be misguided when few specific tokens are decisive. In this work we propose ConsisTest; a factual consistency benchmark including both WH and Y/N questions based on PersonaChat, along with a hybrid evaluation pipeline which aims to get the best of symbolic and sub-symbolic methods. Using these and focusing on pretrained generative models like BART, we provide detailed statistics and analysis on how the model’s consistency is affected by variations in question and context. 2022.gem-1.47 @@ -557,7 +557,7 @@ Exploring a <fixed-case>POS</fixed-case>-based Two-stage Approach for Improving Low-Resource <fixed-case>AMR</fixed-case>-to-Text Generation - Marco AntonioSobrevilla Cabezudo + Marco AntonioSobrevilla Cabezudo ThiagoPardo 531-538 This work presents a two-stage approach for tackling low-resource AMR-to-text generation for Brazilian Portuguese. Our approach consists of (1) generating a masked surface realization in which some tokens are masked according to its Part-of-Speech class and (2) infilling the masked tokens according to the AMR graph and the previous masked surface realization. Results show a slight improvement over the baseline, mainly in BLEU (1.63) and METEOR (0.02) scores. Moreover, we evaluate the pipeline components separately, showing that the bottleneck of the pipeline is the masked surface realization. Finally, the human evaluation suggests that models still suffer from hallucinations, and some strategies to deal with the problems found are proposed. @@ -581,7 +581,7 @@ DanielKing ZejiangShen NishantSubramani - Daniel S.Weld + Daniel S.Weld IzBeltagy DougDowney 555-571 diff --git a/data/xml/2022.gwll.xml b/data/xml/2022.gwll.xml index f92a1ce49b..d278d6b30d 100644 --- a/data/xml/2022.gwll.xml +++ b/data/xml/2022.gwll.xml @@ -40,7 +40,7 @@ KaterinaGkirtzou MaximIonov BesimKabashi - FahadKhan + FahadKhan Ciprian-OctavianTruică 10–18 Following presentations of frequency and attestations, and embeddings and distributional similarity, this paper introduces the third cornerstone of the emerging OntoLex module for Frequency, Attestation and Corpus-based Information, OntoLex-FrAC. We provide an RDF vocabulary for collocations, established as a consensus over contributions from five different institutions and numerous data sets, with the goal of eliciting feedback from reviewers, workshop audience and the scientific community in preparation of the final consolidation of the OntoLex-FrAC module, whose publication as a W3C community report is foreseen for the end of this year. The novel collocation component of OntoLex-FrAC is described in application to a lexicographic resource and corpus-based collocation scores available from the web, and finally, we demonstrate the capability and genericity of the model by showing how to retrieve and aggregate collocation information by means of SPARQL, and its export to a tabular format, so that it can be easily processed in downstream applications. diff --git a/data/xml/2022.hcinlp.xml b/data/xml/2022.hcinlp.xml index 1d36e34f2e..8d7eeb97ef 100644 --- a/data/xml/2022.hcinlp.xml +++ b/data/xml/2022.hcinlp.xml @@ -4,11 +4,11 @@ Proceedings of the Second Workshop on Bridging Human--Computer Interaction and Natural Language Processing Su LinBlodgett - HalDaumé III + HalDaumé III MichaelMadaio AniNenkova BrendanO'Connor - HannaWallach + HannaWallach QianYang Association for Computational Linguistics
Seattle, Washington
@@ -63,7 +63,7 @@
Design Considerations for an <fixed-case>NLP</fixed-case>-Driven Empathy and Emotion Interface for Clinician Training via Telemedicine - RoxanaGirju + RoxanaGirju MarinaGirju 21-27 As digital social platforms and mobile technologies become more prevalent and robust, the use of Artificial Intelligence (AI) in facilitating human communication will grow. This, in turn, will encourage development of intuitive, adaptive, and effective empathic AI interfaces that better address the needs of socially and culturally diverse communities. In this paper, we present several design considerations of an intelligent digital interface intended to guide the clinicians toward more empathetic communication. This approach allows various communities of practice to investigate how AI, on one side, and human communication and healthcare needs, on the other, can contribute to each other’s development. @@ -87,7 +87,7 @@ ErinPacquetet SougataSaha SouvikDas - RohiniSrihari + RohiniSrihari 34-39 This paper analyzes data from the 2021 Amazon Alexa Prize Socialbot Grand Challenge 4, in order to better understand the differences between human-computer interactions (HCI) in a socialbot setting and conventional human-to-human interactions. We find that because socialbots are a new genre of HCI, we are still negotiating norms to guide interactions in this setting. We present several notable patterns in user behavior toward socialbots, which have important implications for guiding future work in the development of conversational agents. 2022.hcinlp-1.5 diff --git a/data/xml/2022.humeval.xml b/data/xml/2022.humeval.xml index 4c8a06cd8e..b141a1929f 100644 --- a/data/xml/2022.humeval.xml +++ b/data/xml/2022.humeval.xml @@ -3,8 +3,8 @@ Proceedings of the 2nd Workshop on Human Evaluation of NLP Systems (HumEval) - AnyaBelz - MajaPopović + AnyaBelz + MajaPopović EhudReiter AnastasiaShimorina Association for Computational Linguistics @@ -57,10 +57,10 @@ Human evaluation of web-crawled parallel corpora for machine translation - GemaRamírez-Sánchez + GemaRamírez-Sánchez MartaBañón JaumeZaragoza-Bernabeu - SergioOrtiz Rojas + SergioOrtiz Rojas 32-41 Quality assessment has been an ongoing activity of the series of ParaCrawl efforts to crawl massive amounts of parallel data from multilingual websites for 29 languages. The goal of ParaCrawl is to get parallel data that is good for machine translation. To prove so, both, automatic (extrinsic) and human (intrinsic and extrinsic) evaluation tasks have been included as part of the quality assessment activity of the project. We sum up the various methods followed to address these evaluation tasks for the web-crawled corpora produced and their results. We review their advantages and disadvantages for the final goal of the ParaCrawl project and the related ongoing project MaCoCu. 2022.humeval-1.4 diff --git a/data/xml/2022.icnlsp.xml b/data/xml/2022.icnlsp.xml index a72072566a..2c004717ba 100644 --- a/data/xml/2022.icnlsp.xml +++ b/data/xml/2022.icnlsp.xml @@ -19,7 +19,7 @@ Error correction and extraction in request dialogs StefanConstantin - AlexWaibel + AlexWaibel 2–11 2022.icnlsp-1.1 constantin-waibel-2022-error @@ -37,7 +37,7 @@ HongruWang MingyuCui ZimoZhou - Kam-FaiWong + Kam-FaiWong 19–29 2022.icnlsp-1.3 wang-etal-2022-topicrefine @@ -47,7 +47,7 @@ ZezhongWang HongruWang Wai ChungKwan - Kam-FaiWong + Kam-FaiWong 30–39 2022.icnlsp-1.4 wang-etal-2022-prior @@ -75,8 +75,8 @@ Improving <fixed-case>NL</fixed-case>-to-Query Systems through Re-ranking of Semantic Hypothesis Piusvon Däniken - JanDeriu - EnekoAgirre + JanDeriu + EnekoAgirre UrsinBrunner MarkCieliebak KurtStockinger @@ -103,7 +103,7 @@ Performance of two <fixed-case>F</fixed-case>rench <fixed-case>BERT</fixed-case> models for <fixed-case>F</fixed-case>rench language on verbatim transcripts and online posts EmmanuelleKelodjoue - JérômeGoulian + JérômeGoulian DidierSchwab 88–94 2022.icnlsp-1.10 @@ -181,7 +181,7 @@ Comparison of Token- and Character-Level Approaches to Restoration of Spaces, Punctuation, and Capitalization in Various Languages LaurenceDyer - AnthonyHughes + AnthonyHughes DhwaniShah BurcuCan 168–178 @@ -243,7 +243,7 @@ A deep sentiment analysis of <fixed-case>T</fixed-case>unisian dialect comments on multi-domain posts in different social media platforms EmnaFsih RahmaBoujelbane - Lamia HadrichBelguith + Lamia HadrichBelguith 226–233 2022.icnlsp-1.26 fsih-etal-2022-deep diff --git a/data/xml/2022.icon.xml b/data/xml/2022.icon.xml index dc075c3ad9..f9579c5849 100644 --- a/data/xml/2022.icon.xml +++ b/data/xml/2022.icon.xml @@ -50,7 +50,7 @@ Knowledge Enhanced Deep Learning Model for Radiology Text Generation KaveriKaleIndian Institute of Technology, Bombay - PushpakBhattacharyaIIT Bombay + PushpakBhattacharyaIIT Bombay AdityaShettyCandy Breach Hospital, Mumbai MilindGuneConsultant Radiologist, Thane KushShrivastavaAugnito India Pvt Ltd @@ -65,7 +65,7 @@ Named Entity Recognition for Code-Mixed <fixed-case>K</fixed-case>annada-<fixed-case>E</fixed-case>nglish Social Media Data PoojithaNandigamIIIT Hyderabad AbhinavAppidiIIIT Hyderabad - ManishShrivastavaIIIT Hyderabad + ManishShrivastavaIIIT Hyderabad 43-49 Named Entity Recognition (NER) is a critical task in the field of Natural Language Processing (NLP) and is also a sub-task of Information Extraction. There has been a significant amount of work done in entity extraction and Named Entity Recognition for resource-rich languages. Entity extraction from code-mixed social media data like tweets from twitter complicates the problem due to its unstructured, informal, and incomplete information available in tweets. Here, we present work on NER in Kannada-English code-mixed social media corpus with corresponding named entity tags referring to Organisation (Org), Person (Pers), and Location (Loc). We experimented with machine learning classification models like Conditional Random Fields (CRF), Bi-LSTM, and Bi-LSTM-CRF models on our corpus. 2022.icon-main.5 @@ -160,7 +160,7 @@ SandhyaSinghBanasthali Vidyapith KushagraShreeIIT Patna SriparnaSahaIIT Patna - PushpakBhattacharyyaIIT Patna + PushpakBhattacharyyaIIT Patna GladvinChinnaduraiLgsi ManishVatsaLgsi 92-98 @@ -257,7 +257,7 @@ <fixed-case>SC</fixed-case>on<fixed-case>E</fixed-case>:Contextual Relevance based <fixed-case>S</fixed-case>ignificant <fixed-case>C</fixed-case>ompo<fixed-case>N</fixed-case>ent <fixed-case>E</fixed-case>xtraction from Contracts HiranmaiAdibhatlaIIIT Hyderabad - ManishShrivastavaIIIT Hyderabad + ManishShrivastavaIIIT Hyderabad 161-171 Automatic extraction of “significant” components of a legal contract, has the potential to simplify the end user’s comprehension. In essence, “significant” pieces of information have 1) information pertaining to material/practical details about a specific contract and 2) information that is novel or comes as a “surprise” for a specific type of contract. It indicates that the significance of a component may be defined at an individual contract level and at a contract-type level. A component, sentence, or paragraph, may be considered significant at a contract level if it contains contract-specific information (CSI), like names, dates, or currency terms. At a contract-type level, components that deviate significantly from the norm for the type may be considered significant (type-specific information (TSI)). In this paper, we present approaches to extract “significant” components from a contract at both these levels. We attempt to do this by identifying patterns in a pool of documents of the same kind. Consequently, in our approach, the solution is formulated in two parts: identifying CSI using a BERT-based contract-specific information extractor and identifying TSI by scoring sentences in a contract for their likelihood. In this paper, we even describe the annotated corpus of contract documents that we created as a first step toward the development of such a language-processing system. We also release a dataset of contract samples containing sentences belonging to CSI and TSI. 2022.icon-main.22 @@ -300,7 +300,7 @@ AnkushAgarwalIIT Bombay SakharamGawadeIIT Bombay SachinChannabasavarajendraHoneywell Technology Solutions Pvt Ltd - PushpakBhattacharyaIIT Bombay + PushpakBhattacharyaIIT Bombay 204-211 The integration of knowledge graphs with deep learning is thriving in improving the performance of various natural language processing (NLP) tasks. In this paper, we focus on knowledge-infused link prediction and question answering using language models, T5, and BLOOM across three domains:Aviation, Movie, and Web. In this context, we infuse knowledge in large and small language models and study their performance, and find the performance to be similar. For the link prediction task on the Aviation Knowledge Graph, we obtain a 0.2 hits@1 score using T5-small, T5-base, T5-large, and BLOOM. Using template-based scripts, we create a set of 1 million synthetic factoid QA pairs in the aviation domain from National Transportation Safety Board (NTSB) reports. On our curated QA pairs, the three models of T5 achieve a 0.7 hits@1 score. We validate our findings with the paired student t test and Cohen’s kappa scores. For link prediction on Aviation Knowledge Graph using T5-small and T5-large, we obtain a Cohen’s kappa score of 0.76, showing substantial agreement between the models. Thus, we infer that small language models perform similar to large language models with the infusion of knowledge. 2022.icon-main.26 @@ -321,7 +321,7 @@ Genre Transfer in <fixed-case>NMT</fixed-case>:Creating Synthetic Spoken Parallel Sentences using Written Parallel Data NalinKumarCharles University - OndrejBojarCharles University + OndrejBojarCharles University 224-233 Text style transfer (TST) aims to control attributes in a given text without changing the content. The matter gets complicated when the boundary separating two styles gets blurred. We can notice similar difficulties in the case of parallel datasets in spoken and written genres. Genuine spoken features like filler words and repetitions in the existing spoken genre parallel datasets are often cleaned during transcription and translation, making the texts closer to written datasets. This poses several problems for spoken genre-specific tasks like simultaneous speech translation. This paper seeks to address the challenge of improving spoken language translations. We start by creating a genre classifier for individual sentences and then try two approaches for data augmentation using written examples:(1) a novel method that involves assembling and disassembling spoken and written neural machine translation (NMT) models, and (2) a rule-based method to inject spoken features. Though the observed results for (1) are not promising, we get some interesting insights into the solution. The model proposed in (1) fine-tuned on the synthesized data from (2) produces naturally looking spoken translations for written-to-spoken genre transfer in En-Hi translation systems. We use this system to produce a second-stage En-Hi synthetic corpus, which however lacks appropriate alignments of explicit spoken features across the languages. For the final evaluation, we fine-tune Hi-En spoken translation systems on the synthesized parallel corpora. We observe that the parallel corpus synthesized using our rule-based method produces the best results. 2022.icon-main.28 @@ -355,7 +355,7 @@ Similarity Based Label Smoothing For Dialogue Generation SougataSaha SouvikDas - RohiniSrihari + RohiniSrihari 253-259 Generative neural conversational systems are typically trained by minimizing the entropy loss between the training “hard” targets and the predicted logits. Performance gains and improved generalization are often achieved by employing regularization techniques like label smoothing, which converts the training “hard” targets to soft targets. However, label smoothing enforces a data independent uniform distribution on the incorrect training targets, leading to a false assumption of equiprobability. In this paper, we propose and experiment with incorporating data-dependent word similarity-based weighing methods to transform the uniform distribution of the incorrect target probabilities in label smoothing to a more realistic distribution based on semantics. We introduce hyperparameters to control the incorrect target distribution and report significant performance gains over networks trained using standard label smoothing-based loss on two standard open-domain dialogue corpora. 2022.icon-main.31 @@ -367,7 +367,7 @@ SubhrajitDey MdAkhtar AmitavaDas - SudipNaskar + SudipNaskar 260-268 Sentiment analysis with deep learning in resource-constrained languages is a challenging task. In this paper, we introduce a novel approach for sentiment analysis in resource-constrained scenarios using character embedding and cross-lingual sentiment analysis with transliteration. We use this method to introduce the novel task of inducing sentiment polarity of words and sentences and aspect term sentiment analysis in the no-resource scenario. We formulate this task by taking a metalingual approach whereby we transliterate data from closely related languages and transform it into a meta language. We also demonstrated the efficacy of using character-level embedding for sentence representation. We experimented with 4 Indian languages – Bengali, Hindi, Tamil, and Telugu, and obtained encouraging results. We also presented new state-of-the-art results on the Hindi sentiment analysis dataset leveraging our metalingual character embeddings. 2022.icon-main.32 @@ -406,7 +406,7 @@ <fixed-case>T</fixed-case>e<fixed-case>Q</fixed-case>u<fixed-case>AD</fixed-case>:<fixed-case>T</fixed-case>elugu Question Answering Dataset RakeshVemula ManiNuthi - ManishSrivastava + ManishSrivastava 300-307 Recent state of the art models and new datasets have advanced many Natural Language Processing areas, especially, Machine Reading Comprehension tasks have improved with the help of datasets like SQuAD (Stanford Question Answering Dataset). But, large high quality datasets are still not a reality for low resource languages like Telugu to record progress in MRC. In this paper, we present a Telugu Question Answering Dataset - TeQuAD with the size of 82k parallel triples created by translating triples from the SQuAD. We also introduce a few methods to create similar Question Answering datasets for the low resource languages. Then, we present the performance of our models which outperform baseline models on Monolingual and Cross Lingual Machine Reading Comprehension (CLMRC) setups, the best of them resulting in an F1 score of 83 % and Exact Match (EM) score of 61 %. 2022.icon-main.36 @@ -512,7 +512,7 @@ OlgaKolesnikova MoeinShahiki Tash GrigoriSidorov - AlexanderGelbukh + AlexanderGelbukh 18-24 Language Identification at the Word Level in Kannada-English Texts. This paper describes the system paper of CoLI-Kanglish 2022 shared task. The goal of this task is to identify the different languages used in CoLI-Kanglish 2022. This dataset is distributed into different categories including Kannada, English, Mixed-Language, Location, Name, and Others. This Code-Mix was compiled by CoLI-Kanglish 2022 organizers from posts on social media. We use two classification techniques, KNN and SVM, and achieve an F1-score of 0.58 and place third out of nine competitors. 2022.icon-wlli.4 @@ -538,7 +538,7 @@ OlgaKolesnikova MoeinShahiki Tash GrigoriSidorov - AlexanderGelbukh + AlexanderGelbukh 29-33 The goal of code-mixed language identification (LID) is to determine which language is spoken or written in a given segment of a speech, word, sentence, or document. Our task is to identify English, Kannada, and mixed language from the provided data. To train a model we used the CoLI-Kenglish dataset, which contains English, Kannada, and mixed-language words. In our work, we conducted several experiments in order to obtain the best performing model. Then, we implemented the best model by using Bidirectional Long Short Term Memory (Bi-LSTM), which outperformed the other trained models with an F1-score of 0.61%. 2022.icon-wlli.6 @@ -562,7 +562,7 @@ N.Ashraf H.l.Shashirekha GrigoriSidorov - AlexanderGelbukh + AlexanderGelbukh 38-45 The task of Language Identification (LI) in text processing refers to automatically identifying the languages used in a text document. LI task is usually been studied at the document level and often in high-resource languages while giving less importance to low-resource languages. However, with the recent advance- ment in technologies, in a multilingual country like India, many low-resource language users post their comments using English and one or more language(s) in the form of code-mixed texts. Combination of Kannada and English is one such code-mixed text of mixing Kannada and English languages at various levels. To address the word level LI in code-mixed text, in CoLI-Kanglish shared task, we have focused on open-sourcing a Kannada-English code-mixed dataset for word level LI of Kannada, English and mixed-language words written in Roman script. The task includes classifying each word in the given text into one of six predefined categories, namely: Kannada (kn), English (en), Kannada-English (kn-en), Name (name), Lo-cation (location), and Other (other). Among the models submitted by all the participants, the best performing model obtained averaged-weighted and averaged-macro F1 scores of 0.86 and 0.62 respectively. 2022.icon-wlli.8 diff --git a/data/xml/2022.ijclclp.xml b/data/xml/2022.ijclclp.xml index 73e3343803..2f888d4016 100644 --- a/data/xml/2022.ijclclp.xml +++ b/data/xml/2022.ijclclp.xml @@ -3,9 +3,9 @@ International Journal of Computational Linguistics & Chinese Language Processing, Volume 27, Number 1, June 2022 - Siaw-FongChung + Siaw-FongChung RafalRzepka - Shih-pingWang + Shih-pingWang Association for Computational Linguistics and Chinese Language Processing
Taipei, Taiwan
June @@ -18,8 +18,8 @@ The Uniqueness in Speech: Prosodic Highlights-prompted Information Content Projection in Continuous Speech Speech - Helen Kai-yunChen - Chiu-yuTseng + Helen Kai-yunChen + Chiu-yuTseng 2022.ijclclp-1.1 chen-tseng-2022-uniqueness @@ -79,8 +79,8 @@ Aligning Sentences in a Paragraph-Paraphrased Corpus with New Embedding-based Similarity Measures Aleksandra SmolkaSmolka - Hsin-MinWang - Jason S.Chang + Hsin-MinWang + Jason S.Chang Keh-YihSu 2022.ijclclp-2.1 smolka-etal-2022-aligning @@ -112,7 +112,7 @@ Yu-HsiangTseng Chi-WeiWang Fang-ChiYeh - Shu-KaiHsieh + Shu-KaiHsieh 2022.ijclclp-2.4 chen-etal-2022-analyzing-discourse-functions diff --git a/data/xml/2022.in2writing.xml b/data/xml/2022.in2writing.xml index 89852bd163..5221861d5b 100644 --- a/data/xml/2022.in2writing.xml +++ b/data/xml/2022.in2writing.xml @@ -147,7 +147,7 @@ NikosVoskarides EdgarMeij SabrinaSauer - Maartende Rijke + Maartende Rijke 72-73 Writers such as journalists often use automatic tools to find relevant content to include in their narratives. In this paper, we focus on supporting writers in the news domain to develop event-centric narratives. Given an incomplete narrative that specifies a main event and a context, we aim to retrieve news articles that discuss relevant events that would enable the continuation of the narrative. We formally define this task and propose a retrieval dataset construction procedure that relies on existing news articles to simulate incomplete narratives and relevant articles. Experiments on two datasets derived from this procedure show that state-of-the-art lexical and semantic rankers are not sufficient for this task. We show that combining those with a ranker that ranks articles by reverse chronological order outperforms those rankers alone. We also perform an in-depth quantitative and qualitative analysis of the results that sheds light on the characteristics of this task. 2022.in2writing-1.10 diff --git a/data/xml/2022.inlg.xml b/data/xml/2022.inlg.xml index 28c34bfe1d..8e3e485ce2 100644 --- a/data/xml/2022.inlg.xml +++ b/data/xml/2022.inlg.xml @@ -4,8 +4,8 @@ Proceedings of the 15th International Conference on Natural Language Generation SamiraShaikh - ThiagoFerreira - AmandaStent + ThiagoFerreira + AmandaStent Association for Computational Linguistics
Waterville, Maine, USA and virtual meeting
July @@ -122,9 +122,9 @@ DavidSchlangen MartinHeckmann HeikoWersing - SinaZarrieß + SinaZarrieß 110-120 - + 2022.inlg-main.9 2022.inlg-main.9.software.zip attari-etal-2022-generating @@ -184,9 +184,9 @@ KevinRos MaxwellJong Chak HoChan - ChengXiangZhai + ChengXiangZhai 186-195 - + 2022.inlg-main.14 2022.inlg-main.14.software.zip ros-etal-2022-generation @@ -206,12 +206,12 @@
Generating Landmark-based Manipulation Instructions from Image Pairs - SinaZarrieß + SinaZarrieß HenrikVoigt DavidSchlangen PhilippSadler 203-211 - + 2022.inlg-main.16 2022.inlg-main.16.software.zip zarriess-etal-2022-generating @@ -226,10 +226,10 @@ MertInan ElizabethNielsen ShahabRaji - MarkSteedman + MarkSteedman MatthewStone 212-224 - + 2022.inlg-main.17 2022.inlg-main.17.software.zip alikhani-etal-2022-zero @@ -266,9 +266,9 @@ PhilippHeinisch AnetteFrank JuriOpitz - PhilippCimiano + PhilippCimiano 246-259 - + 2022.inlg-main.20 2022.inlg-main.20.software.zip heinisch-etal-2022-strategies @@ -328,9 +328,9 @@ Analogy Generation by Prompting Large Language Models: A Case Study of <fixed-case>I</fixed-case>nstruct<fixed-case>GPT</fixed-case> BhavyaBhavya JinjunXiong - ChengXiangZhai + ChengXiangZhai 298-312 - + 2022.inlg-main.25 bhavya-etal-2022-analogy 10.18653/v1/2022.inlg-main.25 @@ -340,8 +340,8 @@ Proceedings of the 15th International Conference on Natural Language Generation: System Demonstrations SamiraShaikh - ThiagoFerreira - AmandaStent + ThiagoFerreira + AmandaStent Association for Computational Linguistics
Waterville, Maine, USA and virtual meeting
July @@ -368,7 +368,7 @@ Generating Quizzes to Support Training on Quality Management and Assurance in Space Science and Engineering AndresGarcia-Silva CristianBerrio Aroca - Jose ManuelGomez-Perez + Jose ManuelGomez-Perez JoseMartinez PatrickFleith StefanoScaglioni @@ -413,8 +413,8 @@ Proceedings of the 15th International Conference on Natural Language Generation: Generation Challenges SamiraShaikh - ThiagoFerreira - AmandaStent + ThiagoFerreira + AmandaStent Association for Computational Linguistics
Waterville, Maine, USA and virtual meeting
July @@ -432,7 +432,7 @@ MarieHledíková MuskaanSingh AnnaNedoluzhko - OndřejBojar + OndřejBojar 1-11 We would host the AutoMin generation chal- lenge at INLG 2023 as a follow-up of the first AutoMin shared task at Interspeech 2021. Our shared task primarily concerns the automated generation of meeting minutes from multi-party meeting transcripts. In our first venture, we ob- served the difficulty of the task and highlighted a number of open problems for the community to discuss, attempt, and solve. Hence, we invite the Natural Language Generation (NLG) com- munity to take part in the second iteration of AutoMin. Like the first, the second AutoMin will feature both English and Czech meetings and the core task of summarizing the manually- revised transcripts into bulleted minutes. A new challenge we are introducing this year is to devise efficient metrics for evaluating the quality of minutes. We will also host an optional track to generate minutes for European parliamentary sessions. We carefully curated the datasets for the above tasks. Our ELITR Minuting Corpus has been recently accepted to LREC 2022 and publicly released. We are already preparing a new test set for evaluating the new shared tasks. We hope to carry forward the learning from the first AutoMin and instigate more community attention and interest in this timely yet chal- lenging problem. INLG, the premier forum for the NLG community, would be an appropriate venue to discuss the challenges and future of Automatic Minuting. The main objective of the AutoMin GenChal at INLG 2023 would be to come up with efficient methods to auto- matically generate meeting minutes and design evaluation metrics to measure the quality of the minutes. 2022.inlg-genchal.1 @@ -455,7 +455,7 @@ <fixed-case>H</fixed-case>inglish<fixed-case>E</fixed-case>val Generation Challenge on Quality Estimation of Synthetic Code-Mixed Text: Overview and Results VivekSrivastava - MayankSingh + MayankSingh 19-25 We hosted a shared task to investigate the factors influencing the quality of the code- mixed text generation systems. The teams experimented with two systems that gener- ate synthetic code-mixed Hinglish sentences. They also experimented with human ratings that evaluate the generation quality of the two systems. The first-of-its-kind, proposed sub- tasks, (i) quality rating prediction and (ii) an- notators’ disagreement prediction of the syn- thetic Hinglish dataset made the shared task quite popular among the multilingual research community. A total of 46 participants com- prising 23 teams from 18 institutions reg- istered for this shared task. The detailed description of the task and the leaderboard is available at https://codalab.lisn.upsaclay.fr/competitions/1688. 2022.inlg-genchal.3 @@ -468,7 +468,7 @@ AkshayGoindani AnmolGoel NamanAhuja - ManishShrivastava + ManishShrivastava PonnurangamKumaraguru 26-30 Code-Mixing is a phenomenon of mixing two or more languages in a speech event and is prevalent in multilingual societies. Given the low-resource nature of Code-Mixing, machine generation of code-mixed text is a prevalent approach for data augmentation. However, evaluating the quality of such machine gen- erated code-mixed text is an open problem. In our submission to HinglishEval, a shared- task collocated with INLG2022, we attempt to build models factors that impact the quality of synthetically generated code-mix text by pre- dicting ratings for code-mix quality. Hingli- shEval Shared Task consists of two sub-tasks - a) Quality rating prediction); b) Disagree- ment prediction. We leverage popular code- mixed metrics and embeddings of multilin- gual large language models (MLLMs) as fea- tures, and train task specific MLP regression models. Our approach could not beat the baseline results. However, for Subtask-A our team ranked a close second on F-1 and Co- hen’s Kappa Score measures and first for Mean Squared Error measure. For Subtask-B our ap- proach ranked third for F1 score, and first for Mean Squared Error measure. Code of our submission can be accessed here. @@ -507,9 +507,9 @@ The 2022 <fixed-case>R</fixed-case>epro<fixed-case>G</fixed-case>en Shared Task on Reproducibility of Evaluations in <fixed-case>NLG</fixed-case>: Overview and Results - AnyaBelz + AnyaBelz AnastasiaShimorina - MajaPopović + MajaPopović EhudReiter 43-51 Against a background of growing interest in reproducibility in NLP and ML, and as part of an ongoing research programme designed to develop theory and practice of reproducibility assessment in NLP, we organised the second shared task on reproducibility of evaluations in NLG, ReproGen 2022. This paper describes the shared task, summarises results from the reproduction studies submitted, and provides further comparative analysis of the results. Out of six initial team registrations, we received submissions from five teams. Meta-analysis of the five reproduction studies revealed varying degrees of reproducibility, and allowed further tentative conclusions about what types of evaluation tend to have better reproducibility. @@ -522,7 +522,7 @@ OndřejDušek ZdeněkKasner ThiagoCastro Ferreira - AnyaBelz + AnyaBelz 52-61 In this paper, we present the results of two reproduction studies for the human evaluation originally reported by Dušek and Kasner (2020) in which the authors comparatively evaluated outputs produced by a semantic error detection system for data-to-text generation against reference outputs. In the first reproduction, the original evaluators repeat the evaluation, in a test of the repeatability of the original evaluation. In the second study, two new evaluators carry out the evaluation task, in a test of the reproducibility of the original evaluation under otherwise identical conditions. We describe our approach to reproduction, and present and analyse results, finding different degrees of reproducibility depending on result type, data and labelling task. Our resources are available and open-sourced. 2022.inlg-genchal.9 @@ -549,10 +549,10 @@ Reproducing a Manual Evaluation of the Simplicity of Text Simplification System Outputs - MajaPopović + MajaPopović SheilaCastilho RudaliHuidrom - AnyaBelz + AnyaBelz 80-85 In this paper we describe our reproduction study of the human evaluation of text simplic- ity reported by Nisioi et al. (2017). The work was carried out as part of the ReproGen Shared Task 2022 on Reproducibility of Evaluations in NLG. Our aim was to repeat the evaluation of simplicity for nine automatic text simplification systems with a different set of evaluators. We describe our experimental design together with the known aspects of the original experimental design and present the results from both studies. Pearson correlation between the original and reproduction scores is moderate to high (0.776). Inter-annotator agreement in the reproduction study is lower (0.40) than in the original study (0.66). We discuss challenges arising from the unavailability of certain aspects of the origi- nal set-up, and make several suggestions as to how reproduction of similar evaluations can be made easier in future. 2022.inlg-genchal.12 @@ -568,7 +568,7 @@ Emielvan Miltenburg Chrisvan der Lee MartijnGoudbeek - EmielKrahmer + EmielKrahmer 86-93 In this paper, we describe our reproduction ef- fort of the paper: Towards Best Experiment Design for Evaluating Dialogue System Output by Santhanam and Shaikh (2019) for the 2022 ReproGen shared task. We aim to produce the same results, using different human evaluators, and a different implementation of the automatic metrics used in the original paper. Although overall the study posed some challenges to re- produce (e.g. difficulties with reproduction of automatic metrics and statistics), in the end we did find that the results generally replicate the findings of Santhanam and Shaikh (2019) and seem to follow similar trends. 2022.inlg-genchal.13 diff --git a/data/xml/2022.insights.xml b/data/xml/2022.insights.xml index 8f63dd09b1..01b0680f65 100644 --- a/data/xml/2022.insights.xml +++ b/data/xml/2022.insights.xml @@ -83,7 +83,7 @@ How Much Do Modifications to Transformer Language Models Affect Their Ability to Learn Linguistic Knowledge? SimengSun - BrianDillon + BrianDillon MohitIyyer 46-53 Recent progress in large pretrained language models (LMs) has led to a growth of analyses examining what kinds of linguistic knowledge are encoded by these models. Due to computational constraints, existing analyses are mostly conducted on publicly-released LM checkpoints, which makes it difficult to study how various factors during training affect the models’ acquisition of linguistic knowledge. In this paper, we train a suite of small-scale Transformer LMs that differ from each other with respect to architectural decisions (e.g., self-attention configuration) or training objectives (e.g., multi-tasking, focal loss). We evaluate these LMs on BLiMP, a targeted evaluation benchmark of multiple English linguistic phenomena. Our experiments show that while none of these modifications yields significant improvements on aggregate, changes to the loss function result in promising improvements on several subcategories (e.g., detecting adjunct islands, correctly scoping negative polarity items). We hope our work offers useful insights for future research into designing Transformer LMs that more effectively learn linguistic knowledge. @@ -109,7 +109,7 @@ DaweiZhu Michael A.Hedderich FangzhouZhai - David IfeoluwaAdelani + David IfeoluwaAdelani DietrichKlakow 62-67 Incorrect labels in training data occur when human annotators make mistakes or when the data is generated via weak or distant supervision. It has been shown that complex noise-handling techniques - by modeling, cleaning or filtering the noisy instances - are required to prevent models from fitting this label noise. However, we show in this work that, for text classification tasks with modern NLP models like BERT, over a variety of noise types, existing noise-handling methods do not always improve its performance, and may even deteriorate it, suggesting the need for further investigation. We also back our observations with a comprehensive analysis. @@ -122,7 +122,7 @@ Ancestor-to-Creole Transfer is Not a Walk in the Park HeatherLent EmanueleBugliarello - AndersSøgaard + AndersSøgaard 68-74 We aim to learn language models for Creole languages for which large volumes of data are not readily available, and therefore explore the potential transfer from ancestor languages (the ‘Ancestry Transfer Hypothesis’). We find that standard transfer methods do not facilitate ancestry transfer. Surprisingly, different from other non-Creole languages, a very distinct two-phase pattern emerges for Creoles: As our training losses plateau, and language models begin to overfit on their source languages, perplexity on the Creoles drop. We explore if this compression phase can lead to practically useful language models (the ‘Ancestry Bottleneck Hypothesis’), but also falsify this. Moreover, we show that Creoles even exhibit this two-phase pattern even when training on random, unrelated languages. Thus Creoles seem to be typological outliers and we speculate whether there is a link between the two observations. 2022.insights-1.9 @@ -191,7 +191,7 @@ Clustering Examples in Multi-Dataset Benchmarks with Item Response Theory PedroRodriguez Phu MonHtut - JohnLalor + JohnLalor JoãoSedoc 100-112 In natural language processing, multi-dataset benchmarks for common tasks (e.g., SuperGLUE for natural language inference and MRQA for question answering) have risen in importance. Invariably, tasks and individual examples vary in difficulty. Recent analysis methods infer properties of examples such as difficulty. In particular, Item Response Theory (IRT) jointly infers example and model properties from the output of benchmark tasks (i.e., scores for each model-example pair). Therefore, it seems sensible that methods like IRT should be able to detect differences between datasets in a task. This work shows that current IRT models are not as good at identifying differences as we would expect, explain why this is difficult, and outline future directions that incorporate more (textual) signal from examples. @@ -206,7 +206,7 @@ AishwaryaPadmakumar DiJin MohitBansal - DilekHakkani-Tur + DilekHakkani-Tur 113-118 Natural language guided embodied task completion is a challenging problem since it requires understanding natural language instructions, aligning them with egocentric visual observations, and choosing appropriate actions to execute in the environment to produce desired changes. We experiment with augmenting a transformer model for this task with modules that effectively utilize a wider field of view and learn to choose whether the next step requires a navigation or manipulation action. We observed that the proposed modules resulted in improved, and in fact state-of-the-art performance on an unseen validation set of a popular benchmark dataset, ALFRED. However, our best model selected using the unseen validation set underperforms on the unseen test split of ALFRED, indicating that performance on the unseen validation set may not in itself be a sufficient indicator of whether model improvements generalize to unseen test sets. We highlight this result as we believe it may be a wider phenomenon in machine learning tasks but primarily noticeable only in benchmarks that limit evaluations on test splits, and highlights the need to modify benchmark design to better account for variance in model performance. 2022.insights-1.15 @@ -229,7 +229,7 @@ The Document Vectors Using Cosine Similarity Revisited ZhangBingyu - NikolayArefyev + NikolayArefyev 129-133 The current state-of-the-art test accuracy (97.42%) on the IMDB movie reviews dataset was reported by Thongtan and Phienthrakul (2019) and achieved by the logistic regression classifier trained on the Document Vectors using Cosine Similarity (DV-ngrams-cosine) proposed in their paper and the Bag-of-N-grams (BON) vectors scaled by Naïve Bayesian weights. While large pre-trained Transformer-based models have shown SOTA results across many datasets and tasks, the aforementioned model has not been surpassed by them, despite being much simpler and pre-trained on the IMDB dataset only. In this paper, we describe an error in the evaluation procedure of this model, which was found when we were trying to analyze its excellent performance on the IMDB dataset. We further show that the previously reported test accuracy of 97.42% is invalid and should be corrected to 93.68%. We also analyze the model performance with different amounts of training data (subsets of the IMDB dataset) and compare it to the Transformer-based RoBERTa model. The results show that while RoBERTa has a clear advantage for larger training sets, the DV-ngrams-cosine performs better than RoBERTa when the labeled training set is very small (10 or 20 documents). Finally, we introduce a sub-sampling scheme based on Naïve Bayesian weights for the training process of the DV-ngrams-cosine, which leads to faster training and better quality. 2022.insights-1.17 @@ -239,9 +239,9 @@ Challenges in including extra-linguistic context in pre-trained language models - IonutSorodoc + IonutSorodoc LauraAina - GemmaBoleda + GemmaBoleda 134-138 To successfully account for language, computational models need to take into account both the linguistic context (the content of the utterances) and the extra-linguistic context (for instance, the participants in a dialogue). We focus on a referential task that asks models to link entity mentions in a TV show to the corresponding characters, and design an architecture that attempts to account for both kinds of context. In particular, our architecture combines a previously proposed specialized module (an “entity library”) for character representation with transfer learning from a pre-trained language model. We find that, although the model does improve linguistic contextualization, it fails to successfully integrate extra-linguistic information about the participants in the dialogue. Our work shows that it is very challenging to incorporate extra-linguistic information into pre-trained language models. 2022.insights-1.18 @@ -279,7 +279,7 @@ VinayshekharKumar VaibhavKumar MukulBhutani - AlexanderRudnicky + AlexanderRudnicky 154-158 In this work, we examine the problems associated with neural dialog models under the common theme of compositionality. Specifically, we investigate three manifestations of compositionality: (1) Productivity, (2) Substitutivity, and (3) Systematicity. These manifestations shed light on the generalization, syntactic robustness, and semantic capabilities of neural dialog models. We design probing experiments by perturbing the training data to study the above phenomenon. We make informative observations based on automated metrics and hope that this work increases research interest in understanding the capacity of these models. 2022.insights-1.21 diff --git a/data/xml/2022.isa.xml b/data/xml/2022.isa.xml index 3d79de6d02..825240429e 100644 --- a/data/xml/2022.isa.xml +++ b/data/xml/2022.isa.xml @@ -3,7 +3,7 @@ Proceedings of the 18th Joint ACL - ISO Workshop on Interoperable Semantic Annotation within LREC2022 - HarryBunt + HarryBunt European Language Resources Association
Marseille, France
June @@ -36,7 +36,7 @@
Guidelines and a Corpus for Extracting Biographical Events - Marco AntonioStranisci + Marco AntonioStranisci EnricoMensa RossanaDamiano DanieleRadicioni @@ -59,7 +59,7 @@ Event Sequencing Annotation with <fixed-case>TIE</fixed-case>-<fixed-case>ML</fixed-case> - DamirCavar + DamirCavar AliAljubailan LudovicMompelat YunaWon @@ -74,7 +74,7 @@ Measuring Similarity by Linguistic Features rather than Frequency - RodolfoDelmonte + RodolfoDelmonte NicolòBusetto 42–52 In the use and creation of current Deep Learning Models the only number that is used for the overall computation is the frequency value associated with the current word form in the corpus, which is used to substitute it. Frequency values come in two forms: absolute and relative. Absolute frequency is used indirectly when selecting the vocabulary against which the word embeddings are created: the cutoff threshold is usually fixed at 30/50K entries of the most frequent words. Relative frequency comes in directly when computing word embeddings based on co-occurrence values of the tokens included in a window size 2/5 adjacent tokens. The latter values are then used to compute similarity, mostly based on cosine distance. In this paper we will evaluate the impact of these two frequency parameters on a small corpus of Italian sentences whose main features are two: presence of very rare words and of non-canonical structures. Rather than basing our evaluation on cosine measure alone, we propose a graded scale of scores which are linguistically motivated. The results computed on the basis of a perusal of BERT’s raw embeddings shows that the two parameters conspire to decide the level of predictability. @@ -86,7 +86,7 @@ Testing the Annotation Consistency of Hallidayan Transitivity Processes: A Multi-variable Structural Approach MinDong XiaoyanLiu - Alex ChengyuFang + Alex ChengyuFang 53–60 SFL seeks to explain identifiable, observable phenomena of language use in context through the application of a theoretical framework which models language as a functional, meaning making system (Halliday & Matthiessen 2004). Due to the lack of explicit annotation criteria and the divide between conceptual vs. syntactic criteria in practice, it has been a tough job to achieve consistency in the annotation of Hallidayn transitivity processes. The present study proposed that explicit structural and syntactic criteria should be adopted as a basis. Drawing on syntactic and grammatical features as judgement cues, we applied structurally oriented criteria for the annotation of the process categories and participant roles combining a set of interrelated syntactic variables and established the annotation criteria for contextualised circumstantial categories in structural as well as semantic terms. An experiment was carried out to test the usefulness of these annotation criteria, applying percent agreement and Cohen’s kappa as measurements of interrater reliability between the two annotators in each of the five pairs. The results verified our assumptions, albeit rather mildly, and, more significantly, offered some first empirical indications about the practical consistency of transitivity analysis in SFL. In the future work, the research team expect to draw on the insights and experience from some of the ISO standards devoted to semantic annotation such as dialogue acts (Bunt et al. 2012) and semantic roles (ISO-24617-4, 2014). 2022.isa-1.7 @@ -170,11 +170,11 @@ Towards Practical Semantic Interoperability in <fixed-case>NLP</fixed-case> Platforms - JulianMoreno-Schneider + JulianMoreno-Schneider RémiCalizzano FlorianKintzel GeorgRehm - DimitrisGalanis + DimitrisGalanis IanRoberts 118–126 Interoperability is a necessity for the resolution of complex tasks that require the interconnection of several NLP services. This article presents the approaches that were adopted in three scenarios to address the respective interoperability issues. The first scenario describes the creation of a common REST API for a specific platform, the second scenario presents the interconnection of several platforms via mapping of different representation formats and the third scenario shows the complexities of interoperability through semantic schema mapping or automatic translation. diff --git a/data/xml/2022.iwslt.xml b/data/xml/2022.iwslt.xml index 07610a69ea..ef16719676 100644 --- a/data/xml/2022.iwslt.xml +++ b/data/xml/2022.iwslt.xml @@ -54,7 +54,7 @@ FrithjofPetrick JanRosendahl ChristianHerold - HermannNey + HermannNey 32-42 After its introduction the Transformer architecture quickly became the gold standard for the task of neural machine translation. A major advantage of the Transformer compared to previous architectures is the faster training speed achieved by complete parallelization across timesteps due to the use of attention over recurrent layers. However, this also leads to one of the biggest problems of the Transformer, namely the quadratic time and memory complexity with respect to the input length. In this work we adapt the locality-sensitive hashing approach of Kitaev et al. (2020) to self-attention in the Transformer, we extended it to cross-attention and apply this memory efficient framework to sentence- and document-level machine translation. Our experiments show that the LSH attention scheme for sentence-level comes at the cost of slightly reduced translation quality. For document-level NMT we are able to include much bigger context sizes than what is possible with the baseline Transformer. However, more context does neither improve translation quality nor improve scores on targeted test suites. 2022.iwslt-1.4 @@ -75,7 +75,7 @@ Who Are We Talking About? Handling Person Names in Speech Translation MarcoGaido - MatteoNegri + MatteoNegri MarcoTurchi 62-73 Recent work has shown that systems for speech translation (ST) – similarly to automatic speech recognition (ASR) – poorly handle person names. This shortcoming does not only lead to errors that can seriously distort the meaning of the input, but also hinders the adoption of such systems in application scenarios (like computer-assisted interpreting) where the translation of named entities, like person names, is crucial. In this paper, we first analyse the outputs of ASR/ST systems to identify the reasons of failures in person name transcription/translation. Besides the frequency in the training data, we pinpoint the nationality of the referred person as a key factor. We then mitigate the problem by creating multilingual models, and further improve our ST systems by forcing them to jointly generate transcripts and translations, prioritising the former over the latter. Overall, our solutions result in a relative improvement in token-level person name accuracy by 47.8% on average for three language pairs (en->es,fr,it). @@ -87,7 +87,7 @@ Joint Generation of Captions and Subtitles with Dual Decoding JitaoXu FrançoisBuet - JosepCrego + JosepCrego EliseBertin-Lemée FrançoisYvon 74-82 @@ -127,10 +127,10 @@ LoïcBarrault LuisaBentivogli MarcelyZanon Boito - OndřejBojar + OndřejBojar RoldanoCattoni AnnaCurrey - GeorgianaDinu + GeorgianaDinu KevinDuh MahaElbayad ClaraEmmanuel @@ -151,7 +151,7 @@ KentonMurray MariaNǎdejde SatoshiNakamura - MatteoNegri + MatteoNegri JanNiehues XingNiu JohnOrtega @@ -159,11 +159,11 @@ ElizabethSalesky JiatongShi MatthiasSperber - SebastianStüker + SebastianStüker KatsuhitoSudoh MarcoTurchi YogeshVirkar - AlexanderWaibel + AlexanderWaibel ChanghanWang ShinjiWatanabe 98-157 @@ -188,7 +188,7 @@ <fixed-case>A</fixed-case>mazon <fixed-case>A</fixed-case>lexa <fixed-case>AI</fixed-case>’s System for <fixed-case>IWSLT</fixed-case> 2022 Offline Speech Translation Shared Task Akshaya Vishnu KudluShanbhogue RanXue - Ching-YunChang + Ching-YunChang SarahCampbell 169-176 This paper describes Amazon Alexa AI’s submission to the IWSLT 2022 Offline Speech Translation Task. Our system is an end-to-end speech translation model that leverages pretrained models and cross modality transfer learning. We detail two improvements to the knowledge transfer schema. First, we implemented a new loss function that reduces knowledge gap between audio and text modalities in translation task effectively. Second, we investigate multiple finetuning strategies including sampling loss, language grouping and domain adaption. These strategies aims to bridge the gaps between speech and text translation tasks. We also implement a multi-stage segmentation and merging strategy that yields improvements on the unsegmented development datasets. Results show that the proposed loss function consistently improves BLEU scores on the development datasets for both English-German and multilingual models. Additionally, certain language pairs see BLEU score improvements with specific finetuning strategies. @@ -202,7 +202,7 @@ SaraPapi DennisFucci GiuseppeFiameni - MatteoNegri + MatteoNegri MarcoTurchi 177-189 The primary goal of this FBK’s systems submission to the IWSLT 2022 offline and simultaneous speech translation tasks is to reduce model training costs without sacrificing translation quality. As such, we first question the need of ASR pre-training, showing that it is not essential to achieve competitive results. Second, we focus on data filtering, showing that a simple method that looks at the ratio between source and target characters yields a quality improvement of 1 BLEU. Third, we compare different methods to reduce the detrimental effect of the audio segmentation mismatch between training data manually segmented at sentence level and inference data that is automatically segmented. Towards the same goal of training cost reduction, we participate in the simultaneous task with the same model trained for offline ST. The effectiveness of our lightweight training strategy is shown by the high score obtained on the MuST-C en-de corpus (26.7 BLEU) and is confirmed in high-resource data conditions by a 1.6 BLEU improvement on the IWSLT2020 test set over last year’s winning system. @@ -212,13 +212,13 @@ Effective combination of pretrained models - <fixed-case>KIT</fixed-case>@<fixed-case>IWSLT</fixed-case>2022 - Ngoc-QuanPham + Ngoc-QuanPham Tuan NamNguyen Thai-BinhNguyen DanniLiu CarlosMullov JanNiehues - AlexanderWaibel + AlexanderWaibel 190-197 Pretrained models in acoustic and textual modalities can potentially improve speech translation for both Cascade and End-to-end approaches. In this evaluation, we aim at empirically looking for the answer by using the wav2vec, mBART50 and DeltaLM models to improve text and speech translation models. The experiments showed that the presence of these models together with an advanced audio segmentation method results in an improvement over the previous end-to-end system by up to 7 BLEU points. More importantly, the experiments showed that given enough data and modeling capacity to overcome the training difficulty, we can outperform even very competitive Cascade systems. In our experiments, this gap can be as large as 2.0 BLEU points, the same gap that the Cascade often led over the years. 2022.iwslt-1.14 @@ -357,12 +357,12 @@ JavierJorge Cano AlejandroPérez-González-de-Martos AdriánGiménez Pastor - Gonçal V.Garcés Díaz-Munío + Gonçal V.Garcés Díaz-Munío PauBaquero-Arnal Joan AlbertSilvestre-Cerdà JorgeCivera Saiz AlbertSanchis - AlfonsJuan + AlfonsJuan 255-264 This work describes the participation of the MLLP-VRAIN research group in the two shared tasks of the IWSLT 2022 conference: Simultaneous Speech Translation and Speech-to-Speech Translation. We present our streaming-ready ASR, MT and TTS systems for Speech Translation and Synthesis from English into German. Our submission combines these systems by means of a cascade approach paying special attention to data preparation and decoding for streaming inference. 2022.iwslt-1.22 @@ -375,7 +375,7 @@ Gerard I.Gállego CarlosEscolano JoséFonollosa - Marta R.Costa-jussà + Marta R.Costa-jussà 265-276 This paper describes the submissions of the UPC Machine Translation group to the IWSLT 2022 Offline Speech Translation and Speech-to-Speech Translation tracks. The offline task involves translating English speech to German, Japanese and Chinese text. Our Speech Translation systems are trained end-to-end and are based on large pretrained speech and text models. We use an efficient fine-tuning technique that trains only specific layers of our system, and explore the use of adapter modules for the non-trainable layers. We further investigate the suitability of different speech encoders (wav2vec 2.0, HuBERT) for our models and the impact of knowledge distillation from the Machine Translation model that we use for the decoder (mBART). For segmenting the IWSLT test sets we fine-tune a pretrained audio segmentation model and achieve improvements of 5 BLEU compared to the given segmentation. Our best single model uses HuBERT and parallel adapters and achieves 29.42 BLEU at English-German MuST-C tst-COMMON and 26.77 at IWSLT 2020 test. By ensembling many models, we further increase translation quality to 30.83 BLEU and 27.78 accordingly. Furthermore, our submission for English-Japanese achieves 15.85 and English-Chinese obtains 25.63 BLEU on the MuST-C tst-COMMON sets. Finally, we extend our system to perform English-German Speech-to-Speech Translation with a pretrained Text-to-Speech model. 2022.iwslt-1.23 @@ -385,13 +385,13 @@ <fixed-case>CUNI</fixed-case>-<fixed-case>KIT</fixed-case> System for Simultaneous Speech Translation Task at <fixed-case>IWSLT</fixed-case> 2022 PeterPolák - Ngoc-QuanPham + Ngoc-QuanPham Tuan NamNguyen DanniLiu CarlosMullov JanNiehues - OndřejBojar - AlexanderWaibel + OndřejBojar + AlexanderWaibel 277-285 In this paper, we describe our submission to the Simultaneous Speech Translation at IWSLT 2022. We explore strategies to utilize an offline model in a simultaneous setting without the need to modify the original model. In our experiments, we show that our onlinization algorithm is almost on par with the offline setting while being 3x faster than offline in terms of latency on the test set. We also show that the onlinized offline model outperforms the best IWSLT2021 simultaneous system in medium and high latency regimes and is almost on par in the low latency regime. We make our system publicly available. 2022.iwslt-1.24 @@ -475,7 +475,7 @@ JinyiYang AmirHussein MatthewWiesner - SanjeevKhudanpur + SanjeevKhudanpur 319-326 This paper details the Johns Hopkins speech translation (ST) system used in the IWLST2022 dialect speech translation task. Our system uses a cascade of automatic speech recognition (ASR) and machine translation (MT). We use a Conformer model for ASR systems and a Transformer model for machine translation. Surprisingly, we found that while using additional ASR training data resulted in only a negligible change in performance as measured by BLEU or word error rate (WER), aggressive text normalization improved BLEU more significantly. We also describe an approach, similar to back-translation, for improving performance using synthetic dialectal source text produced from source sentences in mismatched dialects. 2022.iwslt-1.29 @@ -497,7 +497,7 @@ Controlling Formality in Low-Resource <fixed-case>NMT</fixed-case> with Domain Adaptation and Re-Ranking: <fixed-case>SLT</fixed-case>-<fixed-case>CDT</fixed-case>-<fixed-case>U</fixed-case>o<fixed-case>S</fixed-case> at <fixed-case>IWSLT</fixed-case>2022 SebastianVincent LoïcBarrault - CarolinaScarton + CarolinaScarton 341-350 This paper describes the SLT-CDT-UoS group’s submission to the first Special Task on Formality Control for Spoken Language Translation, part of the IWSLT 2022 Evaluation Campaign. Our efforts were split between two fronts: data engineering and altering the objective function for best hypothesis selection. We used language-independent methods to extract formal and informal sentence pairs from the provided corpora; using English as a pivot language, we propagated formality annotations to languages treated as zero-shot in the task; we also further improved formality controlling with a hypothesis re-ranking approach. On the test sets for English-to-German and English-to-Spanish, we achieved an average accuracy of .935 within the constrained setting and .995 within unconstrained setting. In a zero-shot setting for English-to-Russian and English-to-Italian, we scored average accuracy of .590 for constrained setting and .659 for unconstrained. 2022.iwslt-1.31 diff --git a/data/xml/2022.jeptalnrecital.xml b/data/xml/2022.jeptalnrecital.xml index bd55d125aa..7b1927655e 100644 --- a/data/xml/2022.jeptalnrecital.xml +++ b/data/xml/2022.jeptalnrecital.xml @@ -20,8 +20,8 @@ Abstraction ou hallucination ? État des lieux et évaluation du risque pour les modèles de génération de résumés automatiques de type séquence-à-séquence (Abstraction or Hallucination ? Status and Risk assessment for sequence-to-sequence Automatic) EuniceAkani - BenoitFavre - FredericBechet + BenoitFavre + FredericBechet 2–11 La génération de texte a récemment connu un très fort intérêt au vu des avancées notables dans le domaine des modèles de langage neuronaux. Malgré ces avancées, cette tâche reste difficile quand il s’agit d’un résumé automatique de texte par abstraction. Certains systèmes de résumés génèrent des textes qui ne sont pas forcément fidèles au document source. C’est sur cette thématique que porte notre étude. Nous présentons une typologie d’erreurs pour les résumés automatique et ainsi qu’une caractérisation du phénomène de l’abstraction pour les résumés de référence afin de mieux comprendre l’ampleur de ces différents phénomènes sur les entités nommées. Nous proposons également une mesure d’évaluation du risque d’erreur lorsqu’un système tente de faire des abstractions sur les entités nommées d’un document. 2022.jeptalnrecital-taln.1 @@ -59,7 +59,7 @@ YizhouXu KataGábor LeilaKhouas - FrédériqueSegond + FrédériqueSegond 42–53 La détection d’anomalies textuelles est une tâche importante de la fouille de textes. Plusieurs approches générales, visant l’identification de points de données aberrants, ont été appliqués dans ce domaine. Néanmoins, ces approches exploitent peu les nouvelles avancées du traitement automatique des langues naturelles (TALN). L’avènement des modèles de langage pré-entraînés comme BERT et GPT-2 a donné naissance à un nouveau paradigme de l’apprentissage automatique appelé ingénierie d’invite (prompt engineering) qui a montré de bonnes performances sur plusieurs tâches du TALN. Cet article présente un travail exploratoire visant à examiner la possibilité de détecter des anomalies textuelles à l’aide de l’ingénierie d’invite. Dans nos expérimentations, nous avons examiné la performance de différents modèles d’invite. Les résultats ont montré que l’ingénierie d’invite est une méthode prometteuse pour la détection d’anomalies textuelles. 2022.jeptalnrecital-taln.4 @@ -71,7 +71,7 @@ KevinDeturck DamienNouvel NamrataPatel - FrederiqueSegond + FrederiqueSegond 54–63 L’influence sociale est un phénomène important dans divers domaines, tels que l’économie et la politique, qui a gagné en résonnance avec la popularité des médias sociaux, notamment les réseaux sociaux et les forums. La majorité des travaux sur ce sujet propose des approches fondées sur des théories en sciences humaines (sociologie, linguistique), et des techniques d’analyse de réseau (mesures de propagation et de centralité) ou de TAL. Dans cet article, nous présentons un modèle d’influence inspiré de travaux en psychologie sociale, sur lequel nous construisons un système combinant un module de TAL pour détecter les messages reflétant les processus d’influence, associé à une analyse par centralité de la transmission de ces messages. Nos expériences sur le forum de débats Change My View montrent que l’approche par hybridation, comparée à la centralité seule, aide à mieux détecter les influenceurs. 2022.jeptalnrecital-taln.5 @@ -81,9 +81,9 @@ Étiquetage ou génération de séquences pour la compréhension automatique du langage en contexte d’interaction? (Sequence tagging or sequence generation for Natural Language Understanding ?) RimAbrougui - GéraldineDamnati + GéraldineDamnati JohannesHeinecke - FrédéricBéchet + FrédéricBéchet 64–73 La tâche de compréhension automatique du langage en contexte d’interaction (NLU pour Natural Language Understanding) est souvent réduite à la détection d’intentions et de concepts sur des corpus mono-domaines annotés avec une seule intention par énoncé. Afin de dépasser ce paradigme, nous cherchons à aborder des référentiels plus complexes en visant des représentations sémantiques structurées au-delà du simple modèle intention/concept. Nous nous intéressons au corpus MultiWOZ, couramment utilisé pour le suivi de l’état du dialogue. Nous questionnons la projection de ces annotations sémantiques complexes pour le NLU, en comparant plusieurs approches d’étiquetage de séquence, puis en proposant un nouveau formalisme inspiré des méthodes de génération de graphe pour la modélisation sémantique AMR. Nous discutons enfin le potentiel des approches génératives. 2022.jeptalnrecital-taln.6 @@ -117,7 +117,7 @@ Filtrage et régularisation pour améliorer la plausibilité des poids d’attention dans la tâche d’inférence en langue naturelle (Filtering and regularization to improve the plausibility of attention weights in <fixed-case>NLI</fixed-case>) DucHau Nguyen - GuillaumeGravier + GuillaumeGravier PascaleSébillot 95–103 Nous étudions la plausibilité d’un mécanisme d’attention pour une tâche d’inférence de phrases (entailment), c’est-à-dire sa capacité à fournir une explication plausible pour un humain de la relation entre deux phrases. En s’appuyant sur le corpus Explanation-Augmented Standford Natural Language Inference, il a été montré que les poids d’attention sont peu plausibles en pratique et tendent à ne pas se concentrer sur les tokens importants. Nous étudions ici différentes approches pour rendre les poids d’attention plus plausibles, en nous appuyant sur des masques issus d’une analyse morphosyntaxique ou sur une régularisation pour forcer la parcimonie. Nous montrons que ces stratégies permettent d’améliorer sensiblement la plausibilité des poids d’attention et s’avèrent plus performantes que les approches par carte de saillance. @@ -129,8 +129,8 @@ Génération de question à partir d’analyse sémantique pour l’adaptation non supervisée de modèles de compréhension de documents (Question generation from semantic analysis for unsupervised adaptation of document understanding models) ElieAntoine JeremyAuguste - FredericBechet - GéraldineDamnati + FredericBechet + GéraldineDamnati 104–115 La génération automatique de questions à partir de textes peut permettre d’obtenir des corpus d’apprentissage pour des modèles de compréhension de documents de type question/réponse sur des textes. Si cette tâche de génération est désormais appréhendée par des modèles de type séquence-àséquence basés sur de grands modèles de langage pré-entraînés, le choix des segments réponses à partir desquels seront générées les questions est l’un des principaux aspects différenciant les méthodes de génération de corpus de question/réponse. Nous proposons dans cette étude d’exploiter l’analyse sémantique de textes pour sélectionner des réponses plausibles et enrichir le processus de génération par des traits sémantiques génériques. Les questions générées sont évaluées dans leur capacité à être utilisées pour entraîner un modèle de question-réponse sur un nouveau corpus d’archives numérisées. 2022.jeptalnrecital-taln.10 @@ -182,11 +182,11 @@ Le projet <fixed-case>FREEM</fixed-case> : ressources, outils et enjeux pour l’étude du français d’Ancien Régime (The <fixed-case>F</fixed-case> <fixed-case>RE</fixed-case> <fixed-case>EM</fixed-case> project: Resources, tools and challenges for the study of Ancien Régime <fixed-case>F</fixed-case>rench) SimonGabay - PedroOrtiz Suarez + PedroOrtiz Suarez RachelBawden AlexandreBartz PhilippeGambette - BenoîtSagot + BenoîtSagot 154–165 En dépit de leur qualité certaine, les ressources et outils disponibles pour l’analyse du français d’Ancien Régime ne sont plus à même de répondre aux enjeux de la recherche en linguistique et en littérature pour cette période. Après avoir précisément défini le cadre chronologique retenu, nous présentons les corpus mis à disposition et les résultats obtenus avec eux pour plusieurs tâches de TAL fondamentales à l’étude de la langue et de la littérature. 2022.jeptalnrecital-taln.15 @@ -290,7 +290,7 @@ LoïcFosse Duc-HauNguyen PascaleSébillot - GuillaumeGravier + GuillaumeGravier 247–256 Nous étudions les propriétés statistiques des plongements dans les modèles transformers pour le français. Nous nous appuyons sur une analyse de la variance, des similarités cosinus intra-phrase et du rang effectif des plongements aux différents niveaux d’un transformer, pour des modèles pré-entraînés et des modèles adaptés à la classification de textes. Nous montrons que les modèles FlauBERT et CamemBERT pré-entraînés ont des comportements très différents même si les deux ont une tendance à générer des représentations anisotropiques, c’est-à-dire se concentrant dans un cône au sein de l’espace des plongements, comme observé pour l’anglais. L’adaptation à la classification de textes modifie le comportement des modèles, notamment dans les dernières couches, et procure une tendance forte à l’alignement des plongements, réduisant également la dimension effective de l’espace au final. Nous mettons également en évidence un lien entre convergence des plongements au sein d’une phrase et classification de texte, lien dont la nature reste difficile à appréhender. 2022.jeptalnrecital-taln.24 @@ -312,7 +312,7 @@ Adaptation au domaine de modèles de langue à l’aide de réseaux à base de graphes (Graph Neural Networks for Adapting General Domain Language Modèles Specialised Corpora) MeriemeBouhandi EmmanuelMorin - ThierryHamon + ThierryHamon 270–279 Les modèles de langue prodonds encodent les propriétés linguistiques et sont utilisés comme entrée pour des modèles plus spécifiques. Utiliser leurs représentations de mots telles quelles pour des domaines peu dotés se révèle être moins efficace. De plus, ces modèles négligent souvent les informations globales sur le vocabulaire au profit d’une plus forte dépendance à l’attention. Nous considérons que ces informations influent sur les résultats des tâches en aval. Leur combinaison avec les représentations contextuelles est effectuée à l’aide de réseaux de neurones à base de graphes. Nous montrons que l’utilité de cette combinaison qui surpassent les performances de baselines. 2022.jeptalnrecital-taln.26 @@ -326,7 +326,7 @@ AgataSavary IskanderKeskes JeanYves Antoine - LamiaHadrich Belguith + LamiaHadrich Belguith 280–286 Cet article décrit nos efforts pour étendre le projet PARSEME à l’arabe standard moderne. L’applicabilité du guide d’annotation de PARSEME a été testée en mesurant l’accord inter-annotateurs dès la première phase d’annotation. Un sous-ensemble de 1062 phrases du Prague Arabic Dependency Treebank (PADT) a été sélectionné et annoté indépendamment par deux locutrices natives arabes. Suite à leurs annotations, un nouveau corpus arabe avec plus de 1250 expressions polylexicales verbales (EPV) annotées a été construit. 2022.jeptalnrecital-taln.27 @@ -337,7 +337,7 @@ <fixed-case>CLISTER</fixed-case> : Un corpus pour la similarité sémantique textuelle dans des cas cliniques en français (<fixed-case>CLISTER</fixed-case> : A Corpus for Semantic Textual Similarity in <fixed-case>F</fixed-case>rench Clinical Narratives) NicolasHiebel KarënFort - AurélieNévéol + AurélieNévéol OlivierFerret 287–296 Le TAL repose sur la disponibilité de corpus annotés pour l’entraînement et l’évaluation de modèles. Il existe très peu de ressources pour la similarité sémantique dans le domaine clinique en français. Dans cette étude, nous proposons une définition de la similarité guidée par l’analyse clinique et l’appliquons au développement d’un nouveau corpus partagé de 1 000 paires de phrases annotées manuellement en scores de similarité. Nous évaluons ensuite le corpus par des expériences de mesure automatique de similarité. Nous montrons ainsi qu’un modèle de plongements de phrases peut capturer la similarité avec des performances à l’état de l’art sur le corpus DEFT STS (Spearman=0,8343). Nous montrons également que le contenu du corpus CLISTER est complémentaire de celui de DEFT STS. @@ -361,7 +361,7 @@ Classification automatique de questions spontanées vs. préparées dans des transcriptions de l’oral (Automatic Classification of Spontaneous vs) - IrisEshkol-Taravella + IrisEshkol-Taravella AngèleBarbedette XingyuLiu Valentin-GabrielSoumah @@ -408,7 +408,7 @@ Fine-tuning de modèles de langues pour la veille épidémiologique multilingue avec peu de ressources (Fine-tuning Language Models for Low-resource Multilingual Epidemic Surveillance) StephenMutuvi - EmanuelaBoros + EmanuelaBoros AntoineDoucet AdamJatowt GaëlLejeune @@ -421,7 +421,7 @@ <fixed-case>F</fixed-case>rench <fixed-case>C</fixed-case>row<fixed-case>S</fixed-case>-Pairs: Extension à une langue autre que l’anglais d’un corpus de mesure des biais sociétaux dans les modèles de langue masqués (<fixed-case>F</fixed-case>rench <fixed-case>C</fixed-case>row<fixed-case>S</fixed-case>-Pairs : Extending a challenge dataset for measuring social bias in masked language models to a language other than <fixed-case>E</fixed-case>nglish) - AurélieNévéol + AurélieNévéol YoannDupont JulienBezançon KarënFort @@ -445,8 +445,8 @@ L’importance des entités pour la tâche de détection d’événements en tant que système de question-réponse (Exploring Entities in Event Detection as Question Answering) - EmanuelaBoros - JoseMoreno + EmanuelaBoros + JoseMoreno AntoineDoucet 374–383 Dans cet article, nous abordons un paradigme récent et peu étudié pour la tâche de détection d’événements en la présentant comme un problème de question-réponse avec possibilité de réponses multiples et le support d’entités. La tâche d’extraction des déclencheurs d’événements est ainsi transformée en une tâche d’identification des intervalles de réponse à partir d’un contexte, tout en se concentrant également sur les entités environnantes. L’architecture est basée sur un modèle de langage pré-entraîné et finement ajusté, où le contexte d’entrée est augmenté d’entités marquées à différents niveaux, de leurs positions, de leurs types et, enfin, de leurs rôles d’arguments. Nos expériences sur le corpus ACE 2005 démontrent que le modèle proposé exploite correctement les informations sur les entités dans le cadre de la détection des événements et qu’il constitue une solution viable pour cette tâche. De plus, nous démontrons que notre méthode, avec différents marqueurs d’entités, est particulièrement capable d’extraire des types d’événements non vus dans des contextes d’apprentissage en peu de coups. @@ -458,7 +458,7 @@ Les représentations distribuées sont-elles vraiment distribuées ? Observations sur la localisation de l’information syntaxique dans les tâches d’accord du verbe en français (How Distributed are Distributed Representations ? An Observation on the Locality of Syntactic) BingzhiLi GuillaumeWisniewski - BenoîtCrabbé + BenoîtCrabbé 384–391 Ce travail aborde la question de la localisation de l’information syntaxique qui est encodée dans les représentations de transformers. En considérant la tâche d’accord objet-participe passé en français, les résultats de nos sondes linguistiques montrent que les informations nécessaires pour accomplir la tâche sont encodées d’une manière locale dans les représentations de mots entre l’antécédent du pronom relatif objet et le participe passé cible. En plus, notre analyse causale montre que le modèle s’appuie essentiellement sur les éléments linguistiquement motivés (i.e. antécédent et pronom relatif) pour prédire le nombre du participe passé. 2022.jeptalnrecital-taln.38 @@ -499,7 +499,7 @@ Tâches Auxiliaires Multilingues pour le Transfert de Modèles de Détection de Discours Haineux (Multilingual Auxiliary Tasks for Zero-Shot Cross-Lingual Transfer of Hate Speech Detection) ArijRiabi SyrielleMontariol - DjaméSeddah + DjaméSeddah 413–423 La tâche de détection de contenus haineux est ardue, car elle nécessite des connaissances culturelles et contextuelles approfondies ; les connaissances nécessaires varient, entre autres, selon la langue du locateur ou la cible du contenu. Or, des données annotées pour des domaines et des langues spécifiques sont souvent absentes ou limitées. C’est là que les données dans d’autres langues peuvent être exploitées ; mais du fait de ces variations, le transfert cross-lingue est souvent difficile. Dans cet article, nous mettons en évidence cette limitation pour plusieurs domaines et langues et montrons l’impact positif de l’apprentissage de tâches auxiliaires multilingues - analyse de sentiments, reconnaissance des entités nommées et tâches reposant sur des informations morpho-syntaxiques - sur le transfert cross-lingue zéro-shot des modèles de détection de discours haineux, afin de combler ce fossé culturel. 2022.jeptalnrecital-taln.41 @@ -508,7 +508,7 @@ Tâches auxiliaires pour l’analyse biaffine en graphes de dépendances (Auxiliary tasks to boost Biaffine Semantic Dependency Parsing) - MarieCandito + MarieCandito 424–433 L’analyseur biaffine de Dozat & Manning (2017), qui produit des arbres de dépendances syntaxiques, a été étendu avec succès aux graphes de dépendances syntaxico-sémantiques (Dozat & Manning, 2018). Ses performances sur les graphes sont étonnamment hautes étant donné que, sans la contrainte de devoir produire un arbre, les arcs pour une phrase donnée sont prédits indépendamment les uns des autres. Pour y remédier partiellement, tout en conservant la complexité O(n2 ) et l’architecture hautement parallélisable, nous proposons d’utiliser des tâches auxiliaires qui introduisent une forme d’interdépendance entre les arcs. Les expérimentations sur les trois jeux de données anglaises de la tâche 18 SemEval-2015 (Oepen et al., 2015), et sur des graphes syntaxiques profonds en français (Ribeyre et al., 2014) montrent une amélioration modeste mais systématique, par rapport à un système de base performant, utilisant un modèle de langue pré-entraîné. Notre méthode s’avère ainsi un moyen simple et robuste d’améliorer l’analyse vers graphes de dépendances. 2022.jeptalnrecital-taln.42 @@ -522,7 +522,7 @@ CamilleGuinaudeau HervéLe Borgne RomaricBesançon - JoseMoreno + JoseMoreno JesúsLovón-Melgarejo 434–444 Dans le contexte général des traitements multimodaux, nous nous intéressons à la tâche de réponse à des questions visuelles à propos d’entités nommées en utilisant des bases de connaissances (KVQAE). Nous mettons à disposition ViQuAE, un nouveau jeu de données de 3 700 questions associées à des images, annoté à l’aide d’une méthode semi-automatique. C’est le premier jeu de données de KVQAE comprenant des types d’entités variés associé à une base de connaissances composée d’1,5 million d’articles Wikipédia, incluant textes et images. Nous proposons également un modèle de référence de KVQAE en deux étapes : recherche d’information puis extraction des réponses. Les résultats de nos expériences démontrent empiriquement la difficulté de la tâche et ouvrent la voie à une meilleure représentation multimodale des entités nommées. @@ -559,8 +559,8 @@ Quand être absent de m<fixed-case>BERT</fixed-case> n’est que le commencement : Gérer de nouvelles langues à l’aide de modèles de langues multilingues (When Being Unseen from m<fixed-case>BERT</fixed-case> is just the Beginning : Handling New Languages With Multilingual Language Models) BenjaminMuller AntoniosAnastasopoulos - BenoîtSagot - DjaméSeddah + BenoîtSagot + DjaméSeddah 450–451 L’apprentissage par transfert basé sur le pré-entraînement de modèles de langue sur une grande quantité de données brutes est devenu la norme pour obtenir des performances état de l’art en TAL. Cependant, la façon dont cette approche devrait être appliquée pour des langues inconnues, qui ne sont couvertes par aucun modèle de langue multilingue à grande échelle et pour lesquelles seule une petite quantité de données brutes est le plus souvent disponible, n’est pas claire. Dans ce travail, en comparant des modèles multilingues et monolingues, nous montrons que de tels modèles se comportent de multiples façons sur des langues inconnues. Certaines langues bénéficient grandement de l’apprentissage par transfert et se comportent de manière similaire à des langues proches riches en ressource, alors que ce n’est manifestement pas le cas pour d’autres. En nous concentrant sur ces dernières, nous montrons dans ce travail que cet échec du transfert est largement lié à l’impact du script que ces langues utilisent. Nous montrons que la translittération de ces langues améliore considérablement le potentiel des larges modèles de langue neuronaux multilingues pour des tâches en aval. Ce résultat indique une piste prometteuse pour rendre ces modèles massivement multilingues utiles pour de nouveaux ensembles de langues absentes des données d’entraînement. 2022.jeptalnrecital-taln.46 @@ -746,7 +746,7 @@ HaQuang Le AnneVilnat GabrielIllouz - PatrickParoubek + PatrickParoubek 15–17 Dans cette démonstration, nous présenterons les travaux en cours pour l’annotation d’un nouveau corpus de questions-réponses en langue Française. Contrairement aux corpus existant comme “FQuad” ou “Piaf”, nous nous intéressons à l’annotation de questions-réponses “non factuelles”. En effet, si dans la littérature, de nombreux corpus et modèles de questions-réponses pré-entraînés sont disponibles, ceux-ci ne privilégient que rarement les annotations s’appuyant sur un schéma de raisonnement issue de l’agrégation de différentes sources ou contextes. L’objectif du projet associé est de parvenir à la création d’un assistant virtuel pour l’éducation, ainsi des réponses explicatives, de raisonnement et/ou d’agrégation de l’information sont à privilégier. Notons enfin, que la volumétrie des données doit être conséquente, en particulier par la considération d’approches neuronales génératives ou extractives. Actuellement, nous disposons de 262 questions et réponses obtenues durant l’étape de validation de la campagne d’annotation. Une deuxième phase d’annotation avec une volumétrie plus importante débutera fin mai 2022 (environ 8000 questions). 2022.jeptalnrecital-demo.5 @@ -895,7 +895,7 @@ Reconnaissance automatique des appellations d’œuvres visuelles antiques (Recognition of classical visual works appellations) AuroreLessieux - IrisEshkol-Taravella + IrisEshkol-Taravella Anne-ViolaineSzabados MarlèneNazarian 36–44 @@ -941,7 +941,7 @@ Simulation d’erreurs d’<fixed-case>OCR</fixed-case> dans les systèmes de <fixed-case>TAL</fixed-case> pour le traitement de données anachroniques (Simulation of <fixed-case>OCR</fixed-case> errors in <fixed-case>NLP</fixed-case> systems for processing anachronistic data) BaptisteBlouin - BenoitFavre + BenoitFavre JeremyAuguste 78–87 L’extraction d’information offre de nouvelles perspectives au sein des recherches historiques. Cependant, la majorité des recherches liées à ce domaine s’effectue sur des données contemporaines. Malgré l’évolution constante des systèmes d’OCR, les textes historiques résultant de ce procédé contiennent toujours de multiples erreurs. Du fait d’un manque de ressources historiques dédiées au TAL, le traitement de ce domaine reste dépendant de l’utilisation de ressources contemporaines. De nombreuses études ont démontré l’impact négatif que pouvaient avoir les erreurs d’OCR sur les systèmes prêts à l’emploi contemporains. Mais l’évaluation des nouvelles architectures, proposant des résultats prometteurs sur des données récentes, face à ce problème reste encore très minime. Dans cette étude, nous quantifions l’impact des erreurs d’OCR sur trois tâches d’extraction d’information en utilisant plusieurs architectures de type Transformers. Au vu de ces résultats, nous proposons une approche permettant de réduire de plus de 50% cet impact sans avoir recours à des ressources historiques spécialisées. diff --git a/data/xml/2022.jlcl.xml b/data/xml/2022.jlcl.xml index a38956db9d..b29708a463 100644 --- a/data/xml/2022.jlcl.xml +++ b/data/xml/2022.jlcl.xml @@ -34,7 +34,7 @@ InesRehbein GabriellaLapesa GoranGlavaš - Simone PaoloPonzetto + Simone PaoloPonzetto German Society for Computational Lingustics and Language Technology
Germany
Jul. diff --git a/data/xml/2022.konvens.xml b/data/xml/2022.konvens.xml index 1963413ea4..26982948e5 100644 --- a/data/xml/2022.konvens.xml +++ b/data/xml/2022.konvens.xml @@ -40,7 +40,7 @@
Lemma Hunting: Automatic Spelling Normalization for <fixed-case>G</fixed-case>erman <fixed-case>CMC</fixed-case> Corpora - EckhardBick + EckhardBick 16–20 2022.konvens-1.3 bick-2022-lemma @@ -66,8 +66,8 @@ Adapting <fixed-case>G</fixed-case>erma<fixed-case>N</fixed-case>et for the Semantic Web ClausZinn - MarieHinrichs - ErhardHinrichs + MarieHinrichs + ErhardHinrichs 41–47 2022.konvens-1.6 zinn-etal-2022-adapting @@ -87,7 +87,7 @@ Measuring Faithfulness of Abstractive Summaries TimFischer SteffenRemus - ChrisBiemann + ChrisBiemann 63–73 2022.konvens-1.8 fischer-etal-2022-measuring @@ -107,7 +107,7 @@ Do gender neutral affixes naturally reduce gender bias in static word embeddings? JonasWagner - SinaZarrieß + SinaZarrieß 88–97 2022.konvens-1.10 wagner-zarriess-2022-gender @@ -116,7 +116,7 @@ Improved Open Source Automatic Subtitling for Lecture Videos RobertGeislinger BenjaminMilde - ChrisBiemann + ChrisBiemann 98–103 2022.konvens-1.11 geislinger-etal-2022-improved @@ -132,7 +132,7 @@ Improved Opinion Role Labelling in Parliamentary Debates LauraBamberg InesRehbein - SimonePonzetto + SimonePonzetto 110–120 2022.konvens-1.13 bamberg-etal-2022-improved @@ -147,7 +147,7 @@ This isn’t the bias you’re looking for: Implicit causality, names and gender in <fixed-case>G</fixed-case>erman language models - SinaZarrieß + SinaZarrieß HannesGroener TorgrimSolstad OliverBott @@ -167,7 +167,7 @@ Semantic Role Labeling for Sentiment Inference: A Case Study ManfredKlenner - AnneGöhring + AnneGöhring 144–149 2022.konvens-1.17 klenner-gohring-2022-semantic @@ -187,7 +187,7 @@ SabaAnwar FynnPetersen-Frey SeidMuhie Yimam - ChrisBiemann + ChrisBiemann 156–166 2022.konvens-1.19 remus-etal-2022-like diff --git a/data/xml/2022.latechclfl.xml b/data/xml/2022.latechclfl.xml index dfd11ed1ec..bb7b394238 100644 --- a/data/xml/2022.latechclfl.xml +++ b/data/xml/2022.latechclfl.xml @@ -6,7 +6,7 @@ StefaniaDegaetano AnnaKazantseva NilsReiter - StanSzpakowicz + StanSzpakowicz International Conference on Computational Linguistics
Gyeongju, Republic of Korea
October @@ -105,7 +105,7 @@
The Distribution of Deontic Modals in Jane Austen’s Mature Novels - LaurenLevine + LaurenLevine 70–74 Deontic modals are auxiliary verbs which express some kind of necessity, obligation, or moral recommendation. This paper investigates the collocation and distribution within Jane Austen’s six mature novels of the following deontic modals: must, should, ought, and need. We also examine the co-occurrences of these modals with name mentions of the heroines in the six novels, categorizing each occurrence with a category of obligation if applicable. The paper offers a brief explanation of the categories of obligation chosen for this investigation. In order to examine the types of obligations associated with each heroine, we then investigate the distribution of these categories in relation to mentions of each heroine. The patterns observed show a general concurrence with the thematic characterizations of Austen’s heroines which are found in literary analysis. 2022.latechclfl-1.9 @@ -167,7 +167,7 @@ Measuring Presence of Women and Men as Information Sources in News MuitzeZulaika - XabierSaralegi + XabierSaralegi IñakiSan Vicente 126–134 In the news, statements from information sources are often quoted, made by individuals who interact in the news. Detecting those quotes and the gender of their sources is a key task when it comes to media analysis from a gender perspective. It is a challenging task: the structure of the quotes is variable, gender marks are not present in many languages, and quote authors are often omitted due to frequent use of coreferences. This paper proposes a strategy to measure the presence of women and men as information sources in news. We approach the problem of detecting sentences including quotes and the gender of the speaker as a joint task, by means of a supervised multiclass classifier of sentences. We have created the first datasets for Spanish and Basque by manually annotating quotes and the gender of the associated sources in news items. The results obtained show that BERT based approaches are significantly better than bag-of-words based classical ones, achieving accuracies close to 90%. We also analyse a bilingual learning strategy and generating additional training examples synthetically; both provide improvements up to 3.4% and 5.6%, respectively. diff --git a/data/xml/2022.lateraisse.xml b/data/xml/2022.lateraisse.xml index 14a910edf2..5239774ec8 100644 --- a/data/xml/2022.lateraisse.xml +++ b/data/xml/2022.lateraisse.xml @@ -32,7 +32,7 @@ Objectifying Women? A Syntactic Bias in <fixed-case>F</fixed-case>rench and <fixed-case>E</fixed-case>nglish Corpora. Yanisda Cunha - AnneAbeillé + AnneAbeillé 8–16 Gender biases in syntax have been documented for languages with grammatical gender for cases where mixed-gender coordination structures take masculine agreement, or with male-first preference in the ordering of pairs (Adam and Eve). On the basis of various annotated corpora spanning different genres (fiction, newspapers, speech and web), we show another syntactic gender bias: masculine pronouns are more often subjects than feminine pronouns, in both English and French. We find the same bias towards masculine subjects for French human nouns, which then refer to males and females. Comparing the subject of passive verbs and the object of active verbs, we show that this syntactic function bias is not reducible to a bias in semantic role assignment since it is also found with non-agentive subjects. For French fiction, we also found that the masculine syntactic function bias is larger in text written by male authors – female authors seem to be unbiased. We finally discuss two principles as possible explanations, ‘Like Me’ and ‘Easy first’, and examine the effect of the discourse tendency for men being agents and topics. We conclude by addressing the impact of such biases in language technologies. 2022.lateraisse-1.2 @@ -63,7 +63,7 @@ Identifying Hate Speech Using Neural Networks and Discourse Analysis Techniques Zehra MelceHüsünbeyi DidarAkar - ArzucanÖzgür + ArzucanÖzgür 32–41 Discriminatory language, in particular hate speech, is a global problem posing a grave threat to democracy and human rights. Yet, it is not always easy to identify, as it is rarely explicit. In order to detect hate speech, we developed Hierarchical Attention Network (HAN) based and Bidirectional Encoder Representations from Transformer (BERT) based deep learning models to capture the changing discursive cues and understand the context around the discourse. In addition, we designed linguistic features using critical discourse analysis techniques and integrated them into these neural network models. We studied the compatibility of our model with the hate speech detection problem by comparing it with traditional machine learning models, as well as a Convolution Neural Network (CNN) based model, a Convolutional Neural Network-Gated Recurrent Unit (CNN-GRU) based model which reached significant performance results for hate speech detection. Our results on a manually annotated corpus of print media in Turkish show that the proposed approach is effective for hate speech detection. We believe that the feature sets created for the Turkish language will encourage new studies in the quantitative analysis of hate speech. 2022.lateraisse-1.5 diff --git a/data/xml/2022.law.xml b/data/xml/2022.law.xml index bc5a0342ae..0a42bd16a0 100644 --- a/data/xml/2022.law.xml +++ b/data/xml/2022.law.xml @@ -3,8 +3,8 @@ Proceedings of the 16th Linguistic Annotation Workshop (LAW-XVI) within LREC2022 - SameerPradhan - SandraKuebler + SameerPradhan + SandraKuebler European Language Resources Association
Marseille, France
June @@ -46,7 +46,7 @@ Converting the <fixed-case>S</fixed-case>inica <fixed-case>T</fixed-case>reebank of <fixed-case>M</fixed-case>andarin <fixed-case>C</fixed-case>hinese to <fixed-case>U</fixed-case>niversal <fixed-case>D</fixed-case>ependencies Yu-MingHsieh Yueh-YinShih - Wei-YunMa + Wei-YunMa 23–30 This paper describes the conversion of the Sinica Treebank, one of the major Mandarin Chinese treebanks, to Universal Dependencies. The conversion is rule-based and the process involves POS tag mapping, head adjusting in line with the UD scheme and the dependency conversion. Linguistic insights into Mandarin Chinese alongwith the conversion are also discussed. The resulting corpus is the UD Chinese Sinica Treebank which contains more than fifty thousand tree structures according to the UD scheme. The dataset can be downloaded at https://github.com/ckiplab/ud. 2022.law-1.4 @@ -64,7 +64,7 @@ The Sensitivity of Annotator Bias to Task Definitions in Argument Mining Terne SashaThorn Jakobsen MariaBarrett - AndersSøgaard + AndersSøgaard DavidLassen 44–61 NLP models are dependent on the data they are trained on, including how this data is annotated. NLP research increasingly examines the social biases of models, but often in the light of their training data and specific social biases that can be identified in the text itself. In this paper, we present an annotation experiment that is the first to examine the extent to which social bias is sensitive to how data is annotated. We do so by collecting annotations of arguments in the same documents following four different guidelines and from four different demographic annotator backgrounds. We show that annotations exhibit widely different levels of group disparity depending on which guidelines annotators follow. The differences are not explained by task complexity, but rather by characteristics of these demographic groups, as previously identified by sociological studies. We release a dataset that is small in the number of instances but large in the number of annotations with demographic information, and our results encourage an increased awareness of annotator bias. @@ -84,7 +84,7 @@
Advantages of a Complex Multilayer Annotation Scheme: The Case of the <fixed-case>P</fixed-case>rague Dependency Treebank - EvaHajicova + EvaHajicova MarieMikulová BarboraŠtěpánková JiříMírovský @@ -109,7 +109,7 @@ KevinDeturck DamienNouvel NamrataPatel - FrédériqueSegond + FrédériqueSegond 85–90 To develop an influencer detection system, we designed an influence model based on the analysis of conversations in the “Change My View” debate forum. This led us to identify enunciative features (argumentation, emotion expression, view change, ...) related to influence between participants. In this paper, we present the annotation campaign we conducted to build up a reference corpus on these enunciative features. The annotation task was to identify in social media posts the text segments that corresponded to each enunciative feature. The posts to be annotated were extracted from two social media: the “Change My View” debate forum, with discussions on various topics, and Twitter, with posts from users identified as supporters of ISIS (Islamic State of Iraq and Syria). Over a thousand posts have been double or triple annotated throughout five annotation sessions gathering a total of 27 annotators. Some of the sessions involved the same annotators, which allowed us to analyse the evolution of their annotation work. Most of the sessions resulted in a reconciliation phase between the annotators, allowing for discussion and iterative improvement of the guidelines. We measured and analysed inter-annotator agreements over the course of the sessions, which allowed us to validate our iterative approach. 2022.law-1.10 @@ -119,7 +119,7 @@ Charon: A <fixed-case>F</fixed-case>rame<fixed-case>N</fixed-case>et Annotation Tool for Multimodal Corpora FredericoBelcavello MarceloViridiano - ElyMatos + ElyMatos TiagoTimponi Torrent 91–96 This paper presents Charon, a web tool for annotating multimodal corpora with FrameNet categories. Annotation can be made for corpora containing both static images and video sequences paired – or not – with text sequences. The pipeline features, besides the annotation interface, corpus import and pre-processing tools. @@ -140,7 +140,7 @@ <fixed-case>M</fixed-case>idas Loop: A Prioritized Human-in-the-Loop Annotation for Large Scale Multilayer Data LukeGessler - LaurenLevine + LaurenLevine AmirZeldes 103–110 Large scale annotation of rich multilayer corpus data is expensive and time consuming, motivating approaches that integrate high quality automatic tools with active learning in order to prioritize human labeling of hard cases. A related challenge in such scenarios is the concurrent management of automatically annotated data and human annotated data, particularly where different subsets of the data have been corrected for different types of annotation and with different levels of confidence. In this paper we present [REDACTED], a collaborative, version-controlled online annotation environment for multilayer corpus data which includes integrated provenance and confidence metadata for each piece of information at the document, sentence, token and annotation level. We present a case study on improving annotation quality in an existing multilayer parse bank of English called AMALGUM, focusing on active learning in corpus preprocessing, at the surprisingly challenging level of sentence segmentation. Our results show improvements to state-of-the-art sentence segmentation and a promising workflow for getting “silver” data to approach gold standard quality. @@ -163,7 +163,7 @@ Putting Context in <fixed-case>SNACS</fixed-case>: A 5-Way Classification of Adpositional Pragmatic Markers - Yang JanetLiu + Yang JanetLiu Jena D.Hwang NathanSchneider VivekSrikumar @@ -175,7 +175,7 @@ Building a Biomedical Full-Text Part-of-Speech Corpus Semi-Automatically NicholasElder - Robert E.Mercer + Robert E.Mercer SudiptaSingha Roy 129–138 This paper presents a method for semi-automatically building a corpus of full-text English-language biomedical articles annotated with part-of-speech tags. The outcomes are a semi-automatic procedure to create a large silver standard corpus of 5 million sentences drawn from a large corpus of full-text biomedical articles annotated for part-of-speech, and a robust, easy-to-use software tool that assists the investigation of differences in two tagged datasets. The method to build the corpus uses two part-of-speech taggers designed to tag biomedical abstracts followed by a human dispute settlement when the two taggers differ on the tagging of a token. The dispute resolution aspect is facilitated by the software tool which organizes and presents the disputed tags. The corpus and all of the software that has been implemented for this study are made publicly available. @@ -198,7 +198,7 @@ A Cognitive Approach to Annotating Causal Constructions in a Cross-Genre Corpus AngelaCao GregorWilliamson - Jinho D.Choi + Jinho D.Choi 151–159 We present a scheme for annotating causal language in various genres of text. Our annotation scheme is built on the popular categories of cause, enable, and prevent. These vague categories have many edge cases in natural language, and as such can prove difficult for annotators to consistently identify in practice. We introduce a decision based annotation method for handling these edge cases. We demonstrate that, by utilizing this method, annotators are able to achieve inter-annotator agreement which is comparable to that of previous studies. Furthermore, our method performs equally well across genres, highlighting the robustness of our annotation scheme. Finally, we observe notable variation in usage and frequency of causal language across different genres. 2022.law-1.18 @@ -208,7 +208,7 @@ Automatic Enrichment of <fixed-case>A</fixed-case>bstract <fixed-case>M</fixed-case>eaning <fixed-case>R</fixed-case>epresentations YuxinJi GregorWilliamson - Jinho D.Choi + Jinho D.Choi 160–169 Abstract Meaning Representation (AMR) is a semantic graph framework which inadequately represent a number of important semantic features including number, (in)definiteness, quantifiers, and intensional contexts. Several proposals have been made to improve the representational adequacy of AMR by enriching its graph structure. However, these modifications are rarely added to existing AMR corpora due to the labor costs associated with manual annotation. In this paper, we develop an automated annotation tool which algorithmically enriches AMR graphs to better represent number, (in)definite articles, quantificational determiners, and intensional arguments. We compare our automatically produced annotations to gold-standard manual annotations and show that our automatic annotator achieves impressive results. All code for this paper, including our automatic annotation tool, is made publicly available. 2022.law-1.19 @@ -217,7 +217,7 @@ <fixed-case>GRAIL</fixed-case>—<fixed-case>G</fixed-case>eneralized Representation and Aggregation of Information Layers SameerPradhan - MarkLiberman + MarkLiberman 170–181 This paper identifies novel characteristics necessary to successfully represent multiple streams of natural language information from speech and text simultaneously, and proposes a multi-tiered system that implements these characteristics centered around a declarative configuration. The system facilitates easy incremental extension by allowing the creation of composable workflows of loosely coupled extensions, or plugins, allowing simple intial systems to be extended to accomodate rich representations while maintaining high data integrity. Key to this is leveraging established tools and technologies. We demonstrate using a small example. 2022.law-1.20 diff --git a/data/xml/2022.lchange.xml b/data/xml/2022.lchange.xml index 52d503c12a..ed7f48dbfb 100644 --- a/data/xml/2022.lchange.xml +++ b/data/xml/2022.lchange.xml @@ -28,9 +28,9 @@ MariekeVan Erp IngerLeemans PasqualeLisena - RaphaelTroncy + RaphaelTroncy WilliamTullett - AliHürriyetoğlu + AliHürriyetoğlu GerDijkstra FemkeGordijn EliasJürgens @@ -234,7 +234,7 @@ <fixed-case>BOS</fixed-case> at <fixed-case>LSCD</fixed-case>iscovery: Lexical Substitution for Interpretable Lexical Semantic Change Detection ArtemKudisov - NikolayArefyev + NikolayArefyev 165-172 We propose a solution for the LSCDiscovery shared task on Lexical Semantic Change Detection in Spanish. Our approach is based on generating lexical substitutes that describe old and new senses of a given word. This approach achieves the second best result in sense loss and sense gain detection subtasks. By observing those substitutes that are specific for only one time period, one can understand which senses were obtained or lost. This allows providing more detailed information about semantic change to the user and makes our method interpretable. 2022.lchange-1.17 @@ -245,7 +245,7 @@ <fixed-case>D</fixed-case>eep<fixed-case>M</fixed-case>istake at <fixed-case>LSCD</fixed-case>iscovery: Can a Multilingual Word-in-Context Model Replace Human Annotators? DaniilHomskiy - NikolayArefyev + NikolayArefyev 173-179 In this paper we describe our solution of the LSCDiscovery shared task on Lexical Semantic Change Discovery (LSCD) in Spanish. Our solution employs a Word-in-Context (WiC) model, which is trained to determine if a particular word has the same meaning in two given contexts. We basically try to replicate the annotation of the dataset for the shared task, but replacing human annotators with a neural network. In the graded change discovery subtask, our solution has achieved the 2nd best result according to all metrics. In the main binary change detection subtask, our F1-score is 0.655 compared to 0.716 of the best submission, corresponding to the 5th place. However, in the optional sense gain detection subtask we have outperformed all other participants. During the post-evaluation experiments we compared different ways to prepare WiC data in Spanish for fine-tuning. We have found that it helps leaving only examples annotated as 1 (unrelated senses) and 4 (identical senses) rather than using 2x more examples including intermediate annotations. 2022.lchange-1.18 @@ -295,7 +295,7 @@ <fixed-case>G</fixed-case>loss<fixed-case>R</fixed-case>eader at <fixed-case>LSCD</fixed-case>iscovery: Train to Select a Proper Gloss in <fixed-case>E</fixed-case>nglish – Discover Lexical Semantic Change in <fixed-case>S</fixed-case>panish MaximRachinskiy - NikolayArefyev + NikolayArefyev 198-203 The contextualized embeddings obtained from neural networks pre-trained as Language Models (LM) or Masked Language Models (MLM) are not well suitable for solving the Lexical Semantic Change Detection (LSCD) task because they are more sensitive to changes in word forms rather than word meaning, a property previously known as the word form bias or orthographic bias. Unlike many other NLP tasks, it is also not obvious how to fine-tune such models for LSCD. In order to conclude if there are any differences between senses of a particular word in two corpora, a human annotator or a system shall analyze many examples containing this word from both corpora. This makes annotation of LSCD datasets very labour-consuming. The existing LSCD datasets contain up to 100 words that are labeled according to their semantic change, which is hardly enough for fine-tuning. To solve these problems we fine-tune the XLM-R MLM as part of a gloss-based WSD system on a large WSD dataset in English. Then we employ zero-shot cross-lingual transferability of XLM-R to build the contextualized embeddings for examples in Spanish. In order to obtain the graded change score for each word, we calculate the average distance between our improved contextualized embeddings of its old and new occurrences. For the binary change detection subtask, we apply thresholding to the same scores. Our solution has shown the best results among all other participants in all subtasks except for the optional sense gain detection subtask. 2022.lchange-1.22 diff --git a/data/xml/2022.ldl.xml b/data/xml/2022.ldl.xml index fbdf779af0..2ac285f771 100644 --- a/data/xml/2022.ldl.xml +++ b/data/xml/2022.ldl.xml @@ -4,7 +4,7 @@ Proceedings of the 8th Workshop on Linked Data in Linguistics within the 13th Language Resources and Evaluation Conference ThierryDeclerck - John P.McCrae + John P.McCrae ElenaMontiel ChristianChiarcos MaximIonov @@ -30,7 +30,7 @@ From <fixed-case>ELT</fixed-case>e<fixed-case>C</fixed-case> Text Collection Metadata and Named Entities to Linked-data (and Back) MilicaIkonić Nešić - RankaStanković + RankaStanković ChristofSchöch MihailoSkoric 7–16 @@ -61,7 +61,7 @@ Use Case: <fixed-case>R</fixed-case>omanian Language Resources in the <fixed-case>LOD</fixed-case> Paradigm - VerginicaBarbu Mititelu + VerginicaBarbu Mititelu ElenaIrimia VasilePais Andrei-MariusAvram @@ -85,7 +85,7 @@ A Cheap and Dirty Cross-Lingual Linking Service in the Cloud ChristianChiarcos - GillesSérasset + GillesSérasset 52–60 In this paper, we describe the application of Linguistic Linked Open Data (LLOD) technology for dynamic cross-lingual querying on demand. Whereas most related research is focusing on providing a static linking, i.e., cross-lingual inference, and then storing the resulting links, we demonstrate the application of the federation capabilities of SPARQL to perform lexical linking on the fly. In the end, we provide a baseline functionality that uses the connection of two web services – a SPARQL end point for multilingual lexical data and another SPARQL end point for querying an English language knowledge graph – in order to perform querying an English language knowledge graph using foreign language labels. We argue that, for low-resource languages where substantial native knowledge graphs are lacking, this functionality can be used to lower the language barrier by allowing to formulate cross-linguistically applicable queries mediated by a multilingual dictionary. 2022.ldl-1.7 @@ -102,14 +102,14 @@ A Survey of Guidelines and Best Practices for the Generation, Interlinking, Publication, and Validation of Linguistic Linked Data - FahadKhan + FahadKhan ChristianChiarcos ThierryDeclerck Maria PiaDi Buono MilanDojchinovski JorgeGracia Giedre ValunaiteOleskeviciene - DanielaGifu + DanielaGifu 69–77 This article discusses a survey carried out within the NexusLinguarum COST Action which aimed to give an overview of existing guidelines (GLs) and best practices (BPs) in linguistic linked data. In particular it focused on four core tasks in the production/publication of linked data: generation, interlinking, publication, and validation. We discuss the importance of GLs and BPs for LLD before describing the survey and its results in full. Finally we offer a number of directions for future work in order to address the findings of the survey. 2022.ldl-1.9 @@ -119,8 +119,8 @@ Computational Morphology with <fixed-case>O</fixed-case>nto<fixed-case>L</fixed-case>ex-Morph ChristianChiarcos KaterinaGkirtzou - FahadKhan - PennyLabropoulou + FahadKhan + PennyLabropoulou MarcoPassarotti MatteoPellegrini 78–86 diff --git a/data/xml/2022.legal.xml b/data/xml/2022.legal.xml index c8e152f071..d5148e1c71 100644 --- a/data/xml/2022.legal.xml +++ b/data/xml/2022.legal.xml @@ -37,7 +37,7 @@ Data Protection, Privacy and <fixed-case>US</fixed-case> Regulation - DeniseDiPersio + DeniseDiPersio 9–16 This paper examines the state of data protection and privacy in the United States. There is no comprehensive federal data protection or data privacy law despite bipartisan and popular support. There are several data protection bills pending in the 2022 session of the US Congress, five of which are examined in Section 2 below. Although it is not likely that any will be enacted, the growing number reflects the concerns of citizens and lawmakers about the power of big data. Recent actions against data abuses, including data breaches, litigation and settlements, are reviewed in Section 3 of this paper. These reflect the real harm caused when personal data is misused. Section 4 contains a brief US copyright law update on the fair use exemption, highlighting a recent court decision and indications of a re-thinking of the fair use analysis. In Section 5, some observations are made on the role of privacy in data protection regulation. It is argued that privacy should be considered from the start of the data collection and technology development process. Enhanced awareness of ethical issues, including privacy, through university-level data science programs will also lay the groundwork for best practices throughout the data and development cycles. 2022.legal-1.3 @@ -56,9 +56,9 @@ Categorizing legal features in a metadata-oriented task: defining the conditions of use MickaëlRigault VictoriaArranz - ValérieMapelli - PennyLabropoulou - SteliosPiperidis + ValérieMapelli + PennyLabropoulou + SteliosPiperidis 22–26 In recent times, more attention has been brought by the Human Language Technology (HLT) community to the legal framework for making available and reusing Language Resources (LR) and tools. Licensing is now an issue that is foreseen in most research projects and that is essential to provide legal certainty for repositories when distributing resources. Some repositories such as Zenodo or Quantum Stat do not offer the possibility to search for resources by licenses which can turn the searching for relevant resources a very complex task. Other repositories such as Hugging Face propose a search feature by license which may make it difficult to figure out what use can be made of such resources. During the European Language Grid (ELG) project, we moved a step forward to link metadata with the terms and conditions of use. In this paper, we document the process we undertook to categorize legal features of licenses listed in the SPDX license list and widely used in the HLT community as well as those licenses used within the ELG platform 2022.legal-1.5 @@ -126,14 +126,14 @@ <fixed-case>MAPA</fixed-case> Project: Ready-to-Go Open-Source Datasets and Deep Learning Technology to Remove Identifying Information from Text Documents VictoriaArranz - KhalidChoukri + KhalidChoukri MontseCuadros AitorGarcía Pablos LucieGianola CyrilGrouin ManuelHerranz - PatrickParoubek - PierreZweigenbaum + PatrickParoubek + PierreZweigenbaum 64–72 This paper presents the outcomes of the MAPA project, a set of annotated corpora for 24 languages of the European Union and an open-source customisable toolkit able to detect and substitute sensitive information in text documents from any domain, using state-of-the art, deep learning-based named entity recognition techniques. In the context of the project, the toolkit has been developed and tested on administrative, legal and medical documents, obtaining state-of-the-art results. As a result of the project, 24 dataset packages have been released and the de-identification toolkit is available as open source. 2022.legal-1.12 @@ -157,7 +157,7 @@ Legal and Ethical Challenges in Recording Air Traffic Control Speech MickaëlRigault ClaudiaCevenini - KhalidChoukri + KhalidChoukri MartinKocour KarelVeselý IgorSzoke diff --git a/data/xml/2022.lnls.xml b/data/xml/2022.lnls.xml index 4207f33de1..3634ab520b 100644 --- a/data/xml/2022.lnls.xml +++ b/data/xml/2022.lnls.xml @@ -52,7 +52,7 @@ AngelicaChen NikitaNangia JasonPhang - SamuelBowman + SamuelBowman 17-28 Current QA systems can generate reasonable-sounding yet false answers without explanation or evidence for the generated answer, which is especially problematic when humans cannot readily check the model’s answers. This presents a challenge for building trust in machine learning systems. We take inspiration from real-world situations where difficult questions are answered by considering opposing sides (see Irving et al., 2018). For multiple-choice QA examples, we build a dataset of single arguments for both a correct and incorrect answer option in a debate-style set-up as an initial step in training models to produce explanations for two candidate answers. We use long contexts—humans familiar with the context write convincing explanations for pre-selected correct and incorrect answers, and we test if those explanations allow humans who have not read the full context to more accurately determine the correct answer. We do not find that explanations in our set-up improve human accuracy, but a baseline condition shows that providing human-selected text snippets does improve accuracy. We use these findings to suggest ways of improving the debate set up for future data collection efforts. 2022.lnls-1.3 diff --git a/data/xml/2022.loresmt.xml b/data/xml/2022.loresmt.xml index 40c39a5ae5..3f786e4b75 100644 --- a/data/xml/2022.loresmt.xml +++ b/data/xml/2022.loresmt.xml @@ -3,13 +3,13 @@ Proceedings of the Fifth Workshop on Technologies for Machine Translation of Low-Resource Languages (LoResMT 2022) - Atul Kr.Ojha + Atul Kr.Ojha Chao-HongLiu EkaterinaVylomova JadeAbbott - JonathanWashington + JonathanWashington NathanielOco - Tommi APirinen + Tommi APirinen ValentinMalykh VarvaraLogacheva XiaobingZhao @@ -36,7 +36,7 @@ Multiple Pivot Languages and Strategic Decoder Initialization Helps Neural Machine Translation ShivamMhaskar - PushpakBhattacharyya + PushpakBhattacharyya 9–14 In machine translation, a pivot language can be used to assist the source to target translation model. In pivot-based transfer learning, the source to pivot and the pivot to target models are used to improve the performance of the source to target model. This technique works best when both source-pivot and pivot-target are high resource language pairs and the source-target is a low resource language pair. But in some cases, such as Indic languages, the pivot to target language pair is not a high resource one. To overcome this limitation, we use multiple related languages as pivot languages to assist the source to target model. We show that using multiple pivot languages gives 2.03 BLEU and 3.05 chrF score improvement over the baseline model. We show that strategic decoder initialization while performing pivot-based transfer learning with multiple pivot languages gives a 3.67 BLEU and 5.94 chrF score improvement over the baseline model. 2022.loresmt-1.2 @@ -54,7 +54,7 @@ The Only Chance to Understand: Machine Translation of the Severely Endangered Low-resource Languages of Eurasia AnnaMosolova - KamelSmaili + KamelSmaili 23–34 Numerous machine translation systems have been proposed since the appearance of this task. Nowadays, new large language model-based algorithms show results that sometimes overcome human ones on the rich-resource languages. Nevertheless, it is still not the case for the low-resource languages, for which all these algorithms did not show equally impressive results. In this work, we want to compare 3 generations of machine translation models on 7 low-resource languages and make a step further by proposing a new way of automatic parallel data augmentation using the state-of-the-art generative model. 2022.loresmt-1.4 @@ -65,7 +65,7 @@ NathanielRobinson CameronHogan NancyFulda - David R.Mortensen + David R.Mortensen 35–42 Multilingual transfer techniques often improve low-resource machine translation (MT). Many of these techniques are applied without considering data characteristics. We show in the context of Haitian-to-English translation that transfer effectiveness is correlated with amount of training data and relationships between knowledge-sharing languages. Our experiments suggest that for some languages beyond a threshold of authentic data, back-translation augmentation methods are counterproductive, while cross-lingual transfer from a sufficiently related language is preferred. We complement this finding by contributing a rule-based French-Haitian orthographic and syntactic engine and a novel method for phonological embedding. When used with multilingual techniques, orthographic transformation makes statistically significant improvements over conventional methods. And in very low-resource Jamaican MT, code-switching with a transfer language for orthographic resemblance yields a 6.63 BLEU point advantage. 2022.loresmt-1.5 @@ -95,7 +95,7 @@ <fixed-case>HFT</fixed-case>: High Frequency Tokens for Low-Resource <fixed-case>NMT</fixed-case> EdoardoSignoroni - PavelRychlý + PavelRychlý 56–63 Tokenization has been shown to impact the quality of downstream tasks, such as Neural Machine Translation (NMT), which is susceptible to out-of-vocabulary words and low frequency training data. Current state-of-the-art algorithms have been helpful in addressing the issues of out-of-vocabulary words, bigger vocabulary sizes and token frequency by implementing subword segmentation. We argue, however, that there is still room for improvement, in particular regarding low-frequency tokens in the training data. In this paper, we present “High Frequency Tokenizer”, or HFT, a new language-independent subword segmentation algorithm that addresses this issue. We also propose a new metric to measure the frequency coverage of a tokenizer’s vocabulary, based on a frequency rank weighted average of the frequency values of its items. We experiment with a diverse set of language corpora, vocabulary sizes, and writing systems and report improvements on both frequency statistics and on the average length of the output. We also observe a positive impact on downstream NMT. 2022.loresmt-1.8 diff --git a/data/xml/2022.louhi.xml b/data/xml/2022.louhi.xml index e1a5f04dea..711ed670ff 100644 --- a/data/xml/2022.louhi.xml +++ b/data/xml/2022.louhi.xml @@ -3,11 +3,11 @@ Proceedings of the 13th International Workshop on Health Text Mining and Information Analysis (LOUHI) - AlbertoLavelli + AlbertoLavelli EbenHolderness - AntonioJimeno Yepes + AntonioJimeno Yepes Anne-LyseMinard - JamesPustejovsky + JamesPustejovsky FabioRinaldi Association for Computational Linguistics
Abu Dhabi, United Arab Emirates (Hybrid)
@@ -37,7 +37,7 @@ Assessing the Limits of Straightforward Models for Nested Named Entity Recognition in <fixed-case>S</fixed-case>panish Clinical Narratives MatiasRojasUniversity of Chile Casimiro PioCarrinoBarcelona Supercomputing Center - AitorGonzalez-AgirreBarcelona Supercomputing Center + AitorGonzalez-AgirreBarcelona Supercomputing Center JocelynDunstanUniversity of Chile MartaVillegasBarcelona Supercomputing Center, Spain 14-25 @@ -51,7 +51,7 @@ Can Current Explainability Help Provide References in Clinical Notes to Support Humans Annotate Medical Codes? Byung-HakKimAKASA, Inc. ZhongfenDengUniversity of Illinois at Chicago, USA - PhilipYuUniversity of Illinois at Chicago, USA + PhilipYuUniversity of Illinois at Chicago, USA VarunGanapathiAKASA, Inc. 26-34 The medical codes prediction problem from clinical notes has received substantial interest in the NLP community, and several recent studies have shown the state-of-the-art (SOTA) code prediction results of full-fledged deep learning-based methods. However, most previous SOTA works based on deep learning are still in early stages in terms of providing textual references and explanations of the predicted codes, despite the fact that this level of explainability of the prediction outcomes is critical to gaining trust from professional medical coders. This raises the important question of how well current explainability methods apply to advanced neural network models such as transformers to predict correct codes and present references in clinical notes that support code prediction. First, we present an explainable Read, Attend, and Code (xRAC) framework and assess two approaches, attention score-based xRAC-ATTN and model-agnostic knowledge-distillation-based xRAC-KD, through simplified but thorough human-grounded evaluations with SOTA transformer-based model, RAC. We find that the supporting evidence text highlighted by xRAC-ATTN is of higher quality than xRAC-KD whereas xRAC-KD has potential advantages in production deployment scenarios. More importantly, we show for the first time that, given the current state of explainability methodologies, using the SOTA medical codes prediction system still requires the expertise and competencies of professional coders, even though its prediction accuracy is superior to that of human coders. This, we believe, is a very meaningful step toward developing explainable and accurate machine learning systems for fully autonomous medical code prediction from clinical notes. @@ -63,7 +63,7 @@ Distinguishing between focus and background entities in biomedical corpora using discourse structure and transformers AntonioJimeno YepesRMIT University, Australia & University of Melbourne, Australia - KarinVerspoorRMIT University, Australia & University of Melbourne, Australia + KarinVerspoorRMIT University, Australia & University of Melbourne, Australia 35-40 Scientific documents typically contain numerous entity mentions, while only a subset are directly relevant to the key contributions of the paper. Distinguishing these focus entities from background ones effectively could improve the recovery of relevant documents and the extraction of information from documents. To study the identification of focus entities, we developed two large datasets of disease-causing biological pathogens using MEDLINE, the largest collection of biomedical citations, and PubMed Central, a collection of full text articles. The focus entities were identified using human-curated indexing on these collections. Experiments with machine learning methods to identify focus entities show that transformer methods achieve high precision and recall and that document discourse information is relevant. The work lays the foundation for more targeted retrieval/summarisation of entity-relevant documents. 2022.louhi-1.4 @@ -75,7 +75,7 @@ YanisLabrakAvignon University, France AdrienBazogeNantes University, France RichardDufourNantes University, France - BeatriceDailleNantes University, France + BeatriceDailleNantes University, France Pierre-AntoineGourraudNantes University, France EmmanuelMorinNantes University, France MickaelRouvierAvignon University, France @@ -89,7 +89,7 @@ A Large-Scale Dataset for Biomedical Keyphrase Generation MaëlHoubreNantes University, France FlorianBoudinNantes University, France - BeatriceDailleNantes University, France + BeatriceDailleNantes University, France 47-53 Keyphrase generation is the task consisting in generating a set of words or phrases that highlight the main topics of a document. There are few datasets for keyphrase generation in the biomedical domain and they do not meet the expectations in terms of size for training generative models. In this paper, we introduce kp-biomed, the first large-scale biomedical keyphrase generation dataset collected from PubMed abstracts. We train and release several generative models and conduct a series of experiments showing that using large scale datasets improves significantly the performances for present and absent keyphrase generation. The dataset and models are available online. 2022.louhi-1.6 @@ -140,7 +140,7 @@ HichamEl BoukkouriUniversité Paris-Saclay, CNRS, France OlivierFerretUniversité Paris-Saclay, CEA, France ThomasLavergneUniversité Paris-Saclay, CNRS, France - PierreZweigenbaumUniversité Paris-Saclay, CNRS, France + PierreZweigenbaumUniversité Paris-Saclay, CNRS, France 69-80 Domain adaptation of word embeddings has mainly been explored in the context of retraining general models on large specialized corpora. While this usually yields good results, we argue that knowledge graphs, which are used less frequently, could also be utilized to enhance existing representations with specialized knowledge. In this work, we aim to shed some light on whether such knowledge injection could be achieved using a basic set of tools: graph-level embeddings and concatenation. To that end, we adopt an incremental approach where we first demonstrate that static embeddings can indeed be improved through concatenation with in-domain node2vec representations. Then, we validate this approach on contextual models and generalize it further by proposing a variant of BERT that incorporates knowledge embeddings within its hidden states through the same process of concatenation. We show that this variant outperforms plain retraining on several specialized tasks, then discuss how this simple approach could be improved further. Both our code and pre-trained models are open-sourced for future research. In this work, we conduct experiments that target the medical domain and the English language. 2022.louhi-1.9 @@ -293,7 +293,7 @@ Efsun SariogluKayiGeorge Washington University, USA SardarHamidianGeorge Washington University, USA MichaelComptonColumbia University, USA - MonaDiabGeorge Washington University, USA + MonaDiabGeorge Washington University, USA 173-183 Schizophrenia is one of the most disabling mental health conditions to live with. Approximately one percent of the population has schizophrenia which makes it fairly common, and it affects many people and their families. Patients with schizophrenia suffer different symptoms: formal thought disorder (FTD), delusions, and emotional flatness. In this paper, we quantitatively and qualitatively analyze the language of patients with schizophrenia measuring various linguistic features in two modalities: speech and written text. We examine the following features: coherence and cohesion of thoughts, emotions, specificity, level of commit- ted belief (LCB), and personality traits. Our results show that patients with schizophrenia score high in fear and neuroticism compared to healthy controls. In addition, they are more committed to their beliefs, and their writing lacks details. They score lower in most of the linguistic features of cohesion with significant p-values. 2022.louhi-1.20 @@ -328,7 +328,7 @@ Enriching Deep Learning with Frame Semantics for Empathy Classification in Medical Narrative Essays PriyankaDeyUniversity of Illinois at Urbana-Champaign, USA - RoxanaGirjuUniversity of Illinois at Urbana-Champaign, USA + RoxanaGirjuUniversity of Illinois at Urbana-Champaign, USA 207-217 Empathy is a vital component of health care and plays a key role in the training of future doctors. Paying attention to medical students’ self-reflective stories of their interactions with patients can encourage empathy and the formation of professional identities that embody desirable values such as integrity and respect. We present a computational approach and linguistic analysis of empathic language in a large corpus of 440 essays written by pre-med students as narrated simulated patient – doctor interactions. We analyze the discourse of three kinds of empathy: cognitive, affective, and prosocial as highlighted by expert annotators. We also present various experiments with state-of-the-art recurrent neural networks and transformer models for classifying these forms of empathy. To further improve over these results, we develop a novel system architecture that makes use of frame semantics to enrich our state-of-the-art models. We show that this novel framework leads to significant improvement on the empathy classification task for this dataset. 2022.louhi-1.23 @@ -340,7 +340,7 @@ Condition-Treatment Relation Extraction on Disease-related Social Media Data SichangTuEmory University, USA StephenDooganReal Life Sciences - Jinho D.ChoiEmory University, USA + Jinho D.ChoiEmory University, USA 218-228 Social media has become a popular platform where people share information about personal healthcare conditions, diagnostic histories, and medical plans. Analyzing posts on social media depicting such realistic information can help improve quality and clinical decision-making; however, the lack of structured resources in this genre limits us to build robust NLP models for meaningful analysis. This paper presents a new corpus annotating relations among many types of conditions, treatments, and their attributes illustrated in social media posts by patients and caregivers. For experiments, a transformer encoder is pretrained on 1M raw posts and used to train several document-level relation extraction models using our corpus. Our best-performing model achieves the F1 scores of 70.9 and 51.7 for Entity Recognition and Relation Extraction, respectively. These results are encouraging as it is the first neural model extracting complex relations of this kind on social media data. 2022.louhi-1.24 diff --git a/data/xml/2022.lrec.xml b/data/xml/2022.lrec.xml index 3554d6ea03..1e92501a69 100644 --- a/data/xml/2022.lrec.xml +++ b/data/xml/2022.lrec.xml @@ -3,19 +3,19 @@ Proceedings of the Thirteenth Language Resources and Evaluation Conference - NicolettaCalzolari - FrédéricBéchet + NicolettaCalzolari + FrédéricBéchet PhilippeBlache - KhalidChoukri - ChristopherCieri + KhalidChoukri + ChristopherCieri ThierryDeclerck SaraGoggi HitoshiIsahara - BenteMaegaard - JosephMariani + BenteMaegaard + JosephMariani HélèneMazo - JanOdijk - SteliosPiperidis + JanOdijk + SteliosPiperidis European Language Resources Association
Marseille, France
June @@ -33,7 +33,7 @@ Domain Adaptation in Neural Machine Translation using a Qualia-Enriched <fixed-case>F</fixed-case>rame<fixed-case>N</fixed-case>et Alexandre Diniz daCosta MateusCoutinho Marim - ElyMatos + ElyMatos TiagoTimponi Torrent 1–12 In this paper we present Scylla, a methodology for domain adaptation of Neural Machine Translation (NMT) systems that make use of a multilingual FrameNet enriched with qualia relations as an external knowledge base. Domain adaptation techniques used in NMT usually require fine-tuning and in-domain training data, which may pose difficulties for those working with lesser-resourced languages and may also lead to performance decay of the NMT system for out-of-domain sentences. Scylla does not require fine-tuning of the NMT model, avoiding the risk of model over-fitting and consequent decrease in performance for out-of-domain translations. Two versions of Scylla are presented: one using the source sentence as input, and another one using the target sentence. We evaluate Scylla in comparison to a state-of-the-art commercial NMT system in an experiment in which 50 sentences from the Sports domain are translated from Brazilian Portuguese to English. The two versions of Scylla significantly outperform the baseline commercial system in HTER. @@ -60,7 +60,7 @@ JaehyungSeo HyeonseokMoon SugyeongEo - HeuiseokLim + HeuiseokLim 22–28 In recent years, there has been an increasing need for the restoration and translation of historical languages. In this study, we attempt to translate historical records in ancient Korean language based on neural machine translation (NMT). Inspired by priming, a cognitive science theory that two different stimuli influence each other, we propose novel priming ancient-Korean NMT (AKNMT) using bilingual subword embedding initialization with structural property awareness in the ancient documents. Finally, we obtain state-of-the-art results in the AKNMT task. To the best of our knowledge, we confirm the possibility of developing a human-centric model that incorporates the concepts of cognitive science and analyzes the result from the perspective of interference and cognitive dissonance theory for the first time. 2022.lrec-1.3 @@ -91,7 +91,7 @@
Compiling a Suitable Level of Sense Granularity in a Lexicon for <fixed-case>AI</fixed-case> Purposes: The Open Source <fixed-case>COR</fixed-case> Lexicon - BolettePedersen + BolettePedersen Nathalie Carmen HauSørensen SanniNimb IdaFlørke @@ -140,7 +140,7 @@ Introducing the <fixed-case>CURLICAT</fixed-case> Corpora: Seven-language Domain Specific Annotated Corpora from Curated Sources - TamásVáradi + TamásVáradi BenceNyéki SvetlaKoeva MarkoTadić @@ -148,11 +148,11 @@ MaciejOgrodniczuk BartłomiejNitoń PiotrPęzik - VerginicaBarbu Mititelu + VerginicaBarbu Mititelu ElenaIrimia MariaMitrofan - DanTufiș - RadovanGarabík + DanTufiș + RadovanGarabík SimonKrek AndražRepar 100–108 @@ -178,7 +178,7 @@ <fixed-case>C</fixed-case>o<fixed-case>QAR</fixed-case>: Question Rewriting on <fixed-case>C</fixed-case>o<fixed-case>QA</fixed-case> QuentinBrabant GwénoléLecorvé - Lina M.Rojas Barahona + Lina M.Rojas Barahona 119–126 Questions asked by humans during a conversation often contain contextual dependencies, i.e., explicit or implicit references to previous dialogue turns. These dependencies take the form of coreferences (e.g., via pronoun use) or ellipses, and can make the understanding difficult for automated systems. One way to facilitate the understanding and subsequent treatments of a question is to rewrite it into an out-of-context form, i.e., a form that can be understood without the conversational context. We propose CoQAR, a corpus containing 4.5K conversations from the Conversational Question-Answering dataset CoQA, for a total of 53K follow-up question-answer pairs. Each original question was manually annotated with at least 2 at most 3 out-of-context rewritings. CoQA originally contains 8k conversations, which sum up to 127k question-answer pairs. CoQAR can be used in the supervised learning of three tasks: question paraphrasing, question rewriting and conversational question answering. In order to assess the quality of CoQAR’s rewritings, we conduct several experiments consisting in training and evaluating models for these three tasks. Our results support the idea that question rewriting can be used as a preprocessing step for (conversational and non-conversational) question answering models, thereby increasing their performances. 2022.lrec-1.13 @@ -203,7 +203,7 @@ CristinaGiannone AndreaFavalli RanieroRomagnoli - Fabio MassimoZanzotto + Fabio MassimoZanzotto 137–145 Incorporating handwritten domain scripts into neural-based task-oriented dialogue systems may be an effective way to reduce the need for large sets of annotated dialogues. In this paper, we investigate how the use of domain scripts written by conversational designers affects the performance of neural-based dialogue systems. To support this investigation, we propose the Conversational-Logic-Injection-in-Neural-Network system (CLINN) where domain scripts are coded in semi-logical rules. By using CLINN, we evaluated semi-logical rules produced by a team of differently-skilled conversational designers. We experimented with the Restaurant domain of the MultiWOZ dataset. Results show that external knowledge is extremely important for reducing the need for annotated examples for conversational systems. In fact, rules from conversational designers used in CLINN significantly outperform a state-of-the-art neural-based dialogue system when trained with smaller sets of annotated dialogues. 2022.lrec-1.15 @@ -223,7 +223,7 @@ Language Technologies for the Creation of Multilingual Terminologies. Lessons Learned from the <fixed-case>SSHOC</fixed-case> Project FedericaGamba FrancescaFrontini - DaanBroeder + DaanBroeder MonicaMonachini 154–163 This paper is framed in the context of the SSHOC project and aims at exploring how Language Technologies can help in promoting and facilitating multilingualism in the Social Sciences and Humanities (SSH). Although most SSH researchers produce culturally and societally relevant work in their local languages, metadata and vocabularies used in the SSH domain to describe and index research data are currently mostly in English. We thus investigate Natural Language Processing and Machine Translation approaches in view of providing resources and tools to foster multilingual access and discovery to SSH content across different languages. As case studies, we create and deliver as freely, openly available data a set of multilingual metadata concepts and an automatically extracted multilingual Data Stewardship terminology. The two case studies allow as well to evaluate performances of state-of-the-art tools and to derive a set of recommendations as to how best apply them. Although not adapted to the specific domain, the employed tools prove to be a valid asset to translation tasks. Nonetheless, validation of results by domain experts proficient in the language is an unavoidable phase of the whole workflow. @@ -253,7 +253,7 @@ Cross-Lingual Link Discovery for Under-Resourced Languages - MichaelRosner + MichaelRosner SinaAhmadi Elena-SimonaApostol JuliaBosque-Gil @@ -264,7 +264,7 @@ DagmarGromann ChayaLiebeskind GiedrėValūnaitė Oleškevičienė - GillesSérasset + GillesSérasset Ciprian-OctavianTruică 181–192 In this paper, we provide an overview of current technologies for cross-lingual link discovery, and we discuss challenges, experiences and prospects of their application to under-resourced languages. We rst introduce the goals of cross-lingual linking and associated technologies, and in particular, the role that the Linked Data paradigm (Bizer et al., 2011) applied to language data can play in this context. We de ne under-resourced languages with a speci c focus on languages actively used on the internet, i.e., languages with a digitally versatile speaker community, but limited support in terms of language technology. We argue that languages for which considerable amounts of textual data and (at least) a bilingual word list are available, techniques for cross-lingual linking can be readily applied, and that these enable the implementation of downstream applications for under-resourced languages via the localisation and adaptation of existing technologies and resources. @@ -313,8 +313,8 @@ Automatic Detection of Stigmatizing Uses of Psychiatric Terms on <fixed-case>T</fixed-case>witter - VéroniqueMoriceau - FarahBenamara + VéroniqueMoriceau + FarahBenamara AbdelmoumeneBoumadane 237–243 Psychiatry and people suffering from mental disorders have often been given a pejorative label that induces social rejection. Many studies have addressed discourse content about psychiatry on social media, suggesting that they convey stigmatizingrepresentations of mental health disorders. In this paper, we focus for the first time on the use of psychiatric terms in tweetsin French. We first describe the annotated dataset that we use. Then we propose several deep learning models to detectautomatically (1) the different types of use of psychiatric terms (medical use, misuse or irrelevant use), and (2) the polarityof the tweet. We show that polarity detection can be improved when done in a multitask framework in combination with typeof use detection. This confirms the observations made manually on several datasets, namely that the polarity of a tweet iscorrelated to the type of term use (misuses are mostly negative whereas medical uses are neutral). The results are interesting forboth tasks and it allows to consider the possibility for performant automatic approaches in order to conduct real-time surveyson social media, larger and less expensive than existing manual ones @@ -324,7 +324,7 @@ <fixed-case>C</fixed-case>o<fixed-case>VERT</fixed-case>: A Corpus of Fact-checked Biomedical <fixed-case>COVID</fixed-case>-19 Tweets IsabelleMohr - AmelieWührl + AmelieWührl RomanKlinger 244–257 During the first two years of the COVID-19 pandemic, large volumes of biomedical information concerning this new disease have been published on social media. Some of this information can pose a real danger, particularly when false information is shared, for instance recommendations how to treat diseases without professional medical advice. Therefore, automatic fact-checking resources and systems developed specifically for medical domain are crucial. While existing fact-checking resources cover COVID-19 related information in news or quantify the amount of misinformation in tweets, there is no dataset providing fact-checked COVID-19 related Twitter posts with detailed annotations for biomedical entities, relations and relevant evidence. We contribute CoVERT, a fact-checked corpus of tweets with a focus on the domain of biomedicine and COVID-19 related (mis)information. The corpus consists of 300 tweets, each annotated with named entities and relations. We employ a novel crowdsourcing methodology to annotate all tweets with fact-checking labels and supporting evidence, which crowdworkers search for online. This methodology results in substantial inter-annotator agreement. Furthermore, we use the retrieved evidence extracts as part of a fact-checking pipeline, finding that the real-world evidence is more useful than the knowledge directly available in pretrained language models. @@ -334,8 +334,8 @@ <fixed-case>XLM</fixed-case>-<fixed-case>T</fixed-case>: Multilingual Language Models in <fixed-case>T</fixed-case>witter for Sentiment Analysis and Beyond FrancescoBarbieri - LuisEspinosa Anke - JoseCamacho-Collados + LuisEspinosa Anke + JoseCamacho-Collados 258–266 Language models are ubiquitous in current NLP, and their multilingual capacity has recently attracted considerable attention. However, current analyses have almost exclusively focused on (multilingual variants of) standard benchmarks, and have relied on clean pre-training and task-specific corpora as multilingual signals. In this paper, we introduce XLM-T, a model to train and evaluate multilingual language models in Twitter. In this paper we provide: (1) a new strong multilingual baseline consisting of an XLM-R (Conneau et al. 2020) model pre-trained on millions of tweets in over thirty languages, alongside starter code to subsequently fine-tune on a target task; and (2) a set of unified sentiment analysis Twitter datasets in eight different languages and a XLM-T model trained on this dataset. 2022.lrec-1.27 @@ -354,7 +354,7 @@ Generating Questions from <fixed-case>W</fixed-case>ikidata Triples KelvinHan - ThiagoCastro Ferreira + ThiagoCastro Ferreira ClaireGardent 277–290 Question generation from knowledge bases (or knowledge base question generation, KBQG) is the task of generating questions from structured database information, typically in the form of triples representing facts. To handle rare entities and generalize to unseen properties, previous work on KBQG resorted to extensive, often ad-hoc pre- and post-processing of the input triple. We revisit KBQG – using pre training, a new (triple, question) dataset and taking question type into account – and show that our approach outperforms previous work both in a standard and in a zero-shot setting. We also show that the extended KBQG dataset (also helpful for knowledge base question answering) we provide allows not only for better coverage in terms of knowledge base (KB) properties but also for increased output variability in that it permits the generation of multiple questions from the same KB triple. @@ -375,7 +375,7 @@ Evaluating the Effects of Embedding with Speaker Identity Information in Dialogue Summarization YujiNaraki TetsuyaSakai - YoshihikoHayashi + YoshihikoHayashi 298–304 Automatic dialogue summarization is a task used to succinctly summarize a dialogue transcript while correctly linking the speakers and their speech, which distinguishes this task from a conventional document summarization. To address this issue and reduce the “who said what”-related errors in a summary, we propose embedding the speaker identity information in the input embedding into the dialogue transcript encoder. Unlike the speaker embedding proposed by Gu et al. (2020), our proposal takes into account the informativeness of position embedding. By experimentally comparing several embedding methods, we confirmed that the scores of ROUGE and a human evaluation of the generated summaries were substantially increased by embedding speaker information at the less informative part of the fixed position embedding with sinusoidal functions. 2022.lrec-1.31 @@ -469,7 +469,7 @@ MihaiDascalu TraianRebedea VasilePais - DanTufis + DanTufis 374–384 Running large-scale pre-trained language models in computationally constrained environments remains a challenging problem yet to be addressed, while transfer learning from these models has become prevalent in Natural Language Processing tasks. Several solutions, including knowledge distillation, network quantization, or network pruning have been previously proposed; however, these approaches focus mostly on the English language, thus widening the gap when considering low-resource languages. In this work, we introduce three light and fast versions of distilled BERT models for the Romanian language: Distil-BERT-base-ro, Distil-RoBERT-base, and DistilMulti-BERT-base-ro. The first two models resulted from the individual distillation of knowledge from two base versions of Romanian BERTs available in literature, while the last one was obtained by distilling their ensemble. To our knowledge, this is the first attempt to create publicly available Romanian distilled BERT models, which were thoroughly evaluated on five tasks: part-of-speech tagging, named entity recognition, sentiment analysis, semantic textual similarity, and dialect identification. Our experimental results argue that the three distilled models offer performance comparable to their teachers, while being twice as fast on a GPU and ~35% smaller. In addition, we further test the similarity between the predictions of our students versus their teachers by measuring their label and probability loyalty, together with regression loyalty - a new metric introduced in this work. 2022.lrec-1.39 @@ -540,7 +540,7 @@ Kompetencer: Fine-grained Skill Classification in <fixed-case>D</fixed-case>anish Job Postings via Distant Supervision and Transfer Learning MikeZhang Kristian NørgaardJensen - BarbaraPlank + BarbaraPlank 436–447 Skill Classification (SC) is the task of classifying job competences from job postings. This work is the first in SC applied to Danish job vacancy data. We release the first Danish job posting dataset: *Kompetencer* (_en_: competences), annotated for nested spans of competences. To improve upon coarse-grained annotations, we make use of The European Skills, Competences, Qualifications and Occupations (ESCO; le Vrang et al., (2014)) taxonomy API to obtain fine-grained labels via distant supervision. We study two setups: The zero-shot and few-shot classification setting. We fine-tune English-based models and RemBERT (Chung et al., 2020) and compare them to in-language Danish models. Our results show RemBERT significantly outperforms all other models in both the zero-shot and the few-shot setting. 2022.lrec-1.46 @@ -592,7 +592,7 @@ Constructing A Dataset of Support and Attack Relations in Legal Arguments in Court Judgements using Linguistic Rules BasitAli SachinPawar - GirishPalshikar + GirishPalshikar RiturajSingh 491–500 Argumentation mining is a growing area of research and has several interesting practical applications of mining legal arguments. Support and Attack relations are the backbone of any legal argument. However, there is no publicly available dataset of these relations in the context of legal arguments expressed in court judgements. In this paper, we focus on automatically constructing such a dataset of Support and Attack relations between sentences in a court judgment with reasonable accuracy. We propose three sets of rules based on linguistic knowledge and distant supervision to identify such relations from Indian Supreme Court judgments. The first rule set is based on multiple discourse connectors, the second rule set is based on common semantic structures between argumentative sentences in a close neighbourhood, and the third rule set uses the information about the source of the argument. We also explore a BERT-based sentence pair classification model which is trained on this dataset. We release the dataset of 20506 sentence pairs - 10746 Support (precision 77.3%) and 9760 Attack (precision 65.8%). We believe that this dataset and the ideas explored in designing the linguistic rules and will boost the argumentation mining research for legal arguments. @@ -628,7 +628,7 @@ Valet: Rule-Based Information Extraction for Rapid Deployment - DayneFreitag + DayneFreitag JohnCadigan RobertSasseen PaulKalmar @@ -641,7 +641,7 @@ Negation Detection in <fixed-case>D</fixed-case>utch Spoken Human-Computer Conversations TomSweers IrisHendrickx - HelmerStrik + HelmerStrik 534–542 Proper recognition and interpretation of negation signals in text or communication is crucial for any form of full natural language understanding. It is also essential for computational approaches to natural language processing. In this study we focus on negation detection in Dutch spoken human-computer conversations. Since there exists no Dutch (dialogue) corpus annotated for negation we have annotated a Dutch corpus sample to evaluate our method for automatic negation detection. We use transfer learning and trained NegBERT (an existing BERT implementation used for negation detection) on English data with multilingual BERT to detect negation in Dutch dialogues. Our results show that adding in-domain training material improves the results. We show that we can detect both negation cues and scope in Dutch dialogues with high precision and recall. We provide a detailed error analysis and discuss the effects of cross-lingual and cross-domain transfer learning on automatic negation detection. 2022.lrec-1.56 @@ -650,9 +650,9 @@ Reflections on 30 Years of Language Resource Development and Sharing ChristopherCieri - MarkLiberman + MarkLiberman SunghyeCho - StephanieStrassel + StephanieStrassel JamesFiumara JonathanWright 543–550 @@ -662,7 +662,7 @@ Language Resources to Support Language Diversity – the <fixed-case>ELRA</fixed-case> Achievements - ValérieMapelli + ValérieMapelli VictoriaArranz KhalidChoukri HélèneMazo @@ -695,9 +695,9 @@ Aspect-Based Emotion Analysis and Multimodal Coreference: A Case Study of Customer Comments on Adidas <fixed-case>I</fixed-case>nstagram Posts LunaDe Bruyne AkbarKarimi - OrpheeDe Clercq + OrpheeDe Clercq AndreaPrati - VeroniqueHoste + VeroniqueHoste 574–580 While aspect-based sentiment analysis of user-generated content has received a lot of attention in the past years, emotion detection at the aspect level has been relatively unexplored. Moreover, given the rise of more visual content on social media platforms, we want to meet the ever-growing share of multimodal content. In this paper, we present a multimodal dataset for Aspect-Based Emotion Analysis (ABEA). Additionally, we take the first steps in investigating the utility of multimodal coreference resolution in an ABEA framework. The presented dataset consists of 4,900 comments on 175 images and is annotated with aspect and emotion categories and the emotional dimensions of valence and arousal. Our preliminary experiments suggest that ABEA does not benefit from multimodal coreference resolution, and that aspect and emotion classification only requires textual information. However, when more specific information about the aspects is desired, image recognition could be essential. 2022.lrec-1.61 @@ -716,7 +716,7 @@ <fixed-case>N</fixed-case>aija<fixed-case>S</fixed-case>enti: A <fixed-case>N</fixed-case>igerian <fixed-case>T</fixed-case>witter Sentiment Corpus for Multilingual Sentiment Analysis Shamsuddeen HassanMuhammad - David IfeoluwaAdelani + David IfeoluwaAdelani SebastianRuder Ibrahim Sa’idAhmad IdrisAbdulmumin @@ -725,7 +725,7 @@ Chris ChinenyeEmezue Saheed SalahudeenAbdullahi AnuoluwapoAremu - AlípioJorge + AlípioJorge PavelBrazdil 590–602 Sentiment analysis is one of the most widely studied applications in NLP, but most work focuses on languages with large amounts of data. We introduce the first large-scale human-annotated Twitter sentiment dataset for the four most widely spoken languages in Nigeria—Hausa, Igbo, Nigerian-Pidgin, and Yorùbá—consisting of around 30,000 annotated tweets per language, including a significant fraction of code-mixed tweets. We propose text collection, filtering, processing and labeling methods that enable us to create datasets for these low-resource languages. We evaluate a range of pre-trained models and transfer strategies on the dataset. We find that language-specific models and language-adaptive fine-tuning generally perform best. We release the datasets, trained models, sentiment lexicons, and code to incentivize research on sentiment analysis in under-represented languages. @@ -764,7 +764,7 @@ Analysis and Prediction of <fixed-case>NLP</fixed-case> Models via Task Embeddings DamienSileo - Marie-FrancineMoens + Marie-FrancineMoens 633–647 Task embeddings are low-dimensional representations that are trained to capture task properties. In this paper, we propose MetaEval, a collection of 101 NLP tasks. We fit a single transformer to all MetaEval tasks jointly while conditioning it on learned embeddings. The resulting task embeddings enable a novel analysis of the space of tasks. We then show that task aspects can be mapped to task embeddings for new tasks without using any annotated examples. Predicted embeddings can modulate the encoder for zero-shot inference and outperform a zero-shot baseline on GLUE tasks. The provided multitask setup can function as a benchmark for future transfer learning research. 2022.lrec-1.67 @@ -775,7 +775,7 @@ AmirHazem MeriemeBouhandi FlorianBoudin - BeatriceDaille + BeatriceDaille 648–662 Automatic Term Extraction (ATE) is a key component for domain knowledge understanding and an important basis for further natural language processing applications. Even with persistent improvements, ATE still exhibits weak results exacerbated by small training data inherent to specialized domain corpora. Recently, transformers-based deep neural models, such as BERT, have proven to be efficient in many downstream NLP tasks. However, no systematic evaluation of ATE has been conducted so far. In this paper, we run an extensive study on fine-tuning pre-trained BERT models for ATE. We propose strategies that empirically show BERT’s effectiveness using cross-lingual and cross-domain transfer learning to extract single and multi-word terms. Experiments have been conducted on four specialized domains in three languages. The obtained results suggest that BERT can capture cross-domain and cross-lingual terminologically-marked contexts shared by terms, opening a new design-pattern for ATE. 2022.lrec-1.68 @@ -809,7 +809,7 @@ HadeelSaadany PrashantSharma DipteshKanojia - ConstantinOrăsan + ConstantinOrăsan 680–688 The detection and extraction of abbreviations from unstructured texts can help to improve the performance of Natural Language Processing tasks, such as machine translation and information retrieval. However, in terms of publicly available datasets, there is not enough data for training deep-neural-networks-based models to the point of generalising well over data. This paper presents PLOD, a large-scale dataset for abbreviation detection and extraction that contains 160k+ segments automatically annotated with abbreviations and their long forms. We performed manual validation over a set of instances and a complete automatic validation for this dataset. We then used it to generate several baseline models for detecting abbreviations and long forms. The best models achieved an F1-score of 0.92 for abbreviations and 0.89 for detecting their corresponding long forms. We release this dataset along with our code and all the models publicly at https://github.com/surrey-nlp/PLOD-AbbreviationDetection 2022.lrec-1.71 @@ -882,7 +882,7 @@ RobertsDarģis IlzeAuziņa IngaKaija - KristīneLevāne-Petrova + KristīneLevāne-Petrova KristīnePokratniece 727–731 This paper presents the Latvian Language Learner Corpus (LaVA) developed at the Institute of Mathematics and Computer Science, University of Latvia. LaVA corpus contains 1015 essays (190k tokens and 790k characters excluding whitespaces) from foreigners studying at Latvian higher education institutions and who are learning Latvian as a foreign language in the first or second semester, reaching the A1 (possibly A2) Latvian language proficiency level. The corpus has morphological and error annotations. Error analysis and the statistics of the LaVA corpus are also provided in the paper. The corpus is publicly available at: http://www.korpuss.lv/id/LaVA. @@ -894,7 +894,7 @@ KennethHeafield ElaineFarrow Jelmervan der Linde - GemaRamírez-Sánchez + GemaRamírez-Sánchez DionWiggins 732–740 We present the EuroPat corpus of patent-specific parallel data for 6 official European languages paired with English: German, Spanish, French, Croatian, Norwegian, and Polish. The filtered parallel corpora range in size from 51 million sentences (Spanish-English) to 154k sentences (Croatian-English), with the unfiltered (raw) corpora being up to 2 times larger. Access to clean, high quality, parallel data in technical domains such as science, engineering, and medicine is needed for training neural machine translation systems for tasks like online dispute resolution and eProcurement. Our evaluation found that the addition of EuroPat data to a generic baseline improved the performance of machine translation systems on in-domain test data in German, Spanish, French, and Polish; and in translating patent data from Croatian to English. The corpus has been released under Creative Commons Zero, and is expected to be widely useful for training high-quality machine translation systems, and particularly for those targeting technical documents such as patents and contracts. @@ -916,7 +916,7 @@ Criteria for the Annotation of Implicit Stereotypes WolfgangSchmeisser-Nieto MontserratNofre - MarionaTaulé + MarionaTaulé 753–762 The growth of social media has brought with it a massive channel for spreading and reinforcing stereotypes. This issue becomes critical when the affected targets are minority groups such as women, the LGBT+ community and immigrants. Although from the perspective of computational linguistics, the detection of this kind of stereotypes is steadily improving, most stereotypes are expressed implicitly and identifying them automatically remains a challenge. One of the problems we found for tackling this issue is the lack of an operationalised definition of implicit stereotypes that would allow us to annotate consistently new corpora by characterising the different forms in which stereotypes appear. In this paper, we present thirteen criteria for annotating implicitness which were elaborated to facilitate the subjective task of identifying the presence of stereotypes. We also present NewsCom-Implicitness, a corpus of 1,911 sentences, of which 426 comprise explicit and implicit racial stereotypes. An experiment was carried out to evaluate the applicability of these criteria. The results indicate that different criteria obtain different inter-annotator agreement values and that there is a greater agreement when more criteria can be identified in one sentence. 2022.lrec-1.80 @@ -998,9 +998,9 @@ Bicleaner <fixed-case>AI</fixed-case>: Bicleaner Goes Neural JaumeZaragoza-Bernabeu - GemaRamírez-Sánchez + GemaRamírez-Sánchez MartaBañón - SergioOrtiz Rojas + SergioOrtiz Rojas 824–831 This paper describes the experiments carried out during the development of the latest version of Bicleaner, named Bicleaner AI, a tool that aims at detecting noisy sentences in parallel corpora. The tool, which now implements a new neural classifier, uses state-of-the-art techniques based on pre-trained transformer-based language models fine-tuned on a binary classification task. After that, parallel corpus filtering is performed, discarding the sentences that have lower probability of being mutual translations. Our experiments, based on the training of neural machine translation (NMT) with corpora filtered using Bicleaner AI for two different scenarios, show significant improvements in translation quality compared to the previous version of the tool which implemented a classifier based on Extremely Randomized Trees. 2022.lrec-1.87 @@ -1030,7 +1030,7 @@ KyleGorman Yustinus GhanggoAte MariaRyskina - SabrinaMielke + SabrinaMielke ElenaBudianskaya CharbelEl-Khaissi TiagoPimentel @@ -1042,7 +1042,7 @@ Delio SiticonatziCamaiteri Esaú ZumaetaRojas DidierLópez Francis - ArturoOncevay + ArturoOncevay JuanLópez Bautista Gema Celeste SilvaVillegas Lucas TorrobaHennigen @@ -1058,7 +1058,7 @@ SofyaGanieva HilariaCruz RitvánKarahóǧa - StellaMarkantonatou + StellaMarkantonatou GeorgePavlidis MatveyPlugaryov ElenaKlyachko @@ -1080,7 +1080,7 @@ BrijeshBhatt ChristopherStraughn ZoeyLiu - Jonathan NorthWashington + Jonathan NorthWashington YuvalPinter DuyguAtaman MarcinWolinski @@ -1090,7 +1090,7 @@ HossepDolatian ZahrohNuriah ShyamRatan - Francis M.Tyers + Francis M.Tyers Edoardo M.Ponti GrantAiton AryamanArora @@ -1103,13 +1103,13 @@ IgorMarchenko PolinaMashkovtseva AlexandraSerova - EmilyPrud’hommeaux + EmilyPrud’hommeaux MariaNepomniashchaya FaustoGiunchiglia EleanorChodroff MansHulden - MiikkaSilfverberg - Arya D.McCarthy + MiikkaSilfverberg + Arya D.McCarthy DavidYarowsky RyanCotterell ReutTsarfaty @@ -1157,7 +1157,7 @@ JaehyungSeo JungseobLee SugyeongEo - HeuiseokLim + HeuiseokLim 883–891 Automatic post-editing (APE) refers to a research field that aims to automatically correct errors included in the translation sentences derived by the machine translation system. This study has several limitations, considering the data acquisition, because there is no official dataset for most language pairs. Moreover, the amount of data is restricted even for language pairs in which official data has been released, such as WMT. To solve this problem and promote universal APE research regardless of APE data existence, this study proposes a method for automatically generating APE data based on a noising scheme from a parallel corpus. Particularly, we propose a human mimicking errors-based noising scheme that considers a practical correction process at the human level. We propose a precise inspection to attain high performance, and we derived the optimal noising schemes that show substantial effectiveness. Through these, we also demonstrate that depending on the type of noise, the noising scheme-based APE data generation may lead to inferior performance. In addition, we propose a dynamic noise injection strategy that enables the acquisition of a robust error correction capability and demonstrated its effectiveness by comparative analysis. This study enables obtaining a high performance APE model without human-generated data and can promote universal APE research for all language pairs targeting English. 2022.lrec-1.93 @@ -1167,7 +1167,7 @@ Domain Mismatch Doesn’t Always Prevent Cross-lingual Transfer Learning DanielEdmiston PhillipKeung - Noah A.Smith + Noah A.Smith 892–899 Cross-lingual transfer learning without labeled target language data or parallel text has been surprisingly effective in zero-shot cross-lingual classification, question answering, unsupervised machine translation, etc. However, some recent publications have claimed that domain mismatch prevents cross-lingual transfer, and their results show that unsupervised bilingual lexicon induction (UBLI) and unsupervised neural machine translation (UNMT) do not work well when the underlying monolingual corpora come from different domains (e.g., French text from Wikipedia but English text from UN proceedings). In this work, we show how a simple initialization regimen can overcome much of the effect of domain mismatch in cross-lingual transfer. We pre-train word and contextual embeddings on the concatenated domain-mismatched corpora, and use these as initializations for three tasks: MUSE UBLI, UN Parallel UNMT, and the SemEval 2017 cross-lingual word similarity task. In all cases, our results challenge the conclusions of prior work by showing that proper initialization can recover a large portion of the losses incurred by domain mismatch. 2022.lrec-1.94 @@ -1250,7 +1250,7 @@ LouisKobras MelfJohannsen PeterKling - ChrisBiemann + ChrisBiemann 956–962 We present a dataset containing source code solutions to algorithmic programming exercises solved by hundreds of Bachelor-level students at the University of Hamburg. These solutions were collected during the winter semesters 2019/2020, 2020/2021 and 2021/2022. The dataset contains a set of solutions to a total of 21 tasks written in Java as well as Python and a total of over 1500 individual solutions. All solutions were submitted through Moodle and the Coderunner plugin and passed a number of test cases (including randomized tests), such that they can be considered as working correctly. All students whose solutions are included in the dataset gave their consent into publishing their solutions. The solutions are pseudonymized with a random solution ID. Included in this paper is a short analysis of the dataset containing statistical data and highlighting a few anomalies (e.g. the number of solutions per task decreases for the last few tasks due to grading rules). We plan to extend the dataset with tasks and solutions from upcoming courses. 2022.lrec-1.101 @@ -1277,7 +1277,7 @@ Patrick D.Watson TiagoTimponi Torrent OliverCzulo - CollinBaker + CollinBaker 976–986 Frame shift is a cross-linguistic phenomenon in translation which results in corresponding pairs of linguistic material evoking different frames. The ability to predict frame shifts would enable (semi-)automatic creation of multilingual frame annotations and thus speeding up FrameNet creation through annotation projection. Here, we first characterize how frame shifts result from other linguistic divergences such as translational divergences and construal differences. Our analysis also shows that many pairs of frames in frame shifts are multi-hop away from each other in Berkeley FrameNet’s net-like configuration. Then, we propose the Frame Shift Prediction task and demonstrate that our graph attention networks, combined with auxiliary training, can learn cross-linguistic frame-to-frame correspondence and predict frame shifts. 2022.lrec-1.103 @@ -1317,7 +1317,7 @@ A Speech Recognizer for <fixed-case>F</fixed-case>risian/<fixed-case>D</fixed-case>utch Council Meetings MartijnBentum Louisten Bosch - Henkvan den Heuvel + Henkvan den Heuvel SimoneWills Domeniquevan der Niet JelskeDijkstra @@ -1345,7 +1345,7 @@ Ali CanKocabiyikoglu FrançoisPortet PrudenceGibert - HervéBlanchon + HervéBlanchon Jean-MarcBabouchkine GaëtanGavazzi 1023–1031 @@ -1357,7 +1357,7 @@ Towards an Open-Source <fixed-case>D</fixed-case>utch Speech Recognition System for the Healthcare Domain CristianTejedor-García Berrievan der Molen - Henkvan den Heuvel + Henkvan den Heuvel Arjanvan Hessen ToinePieters 1032–1039 @@ -1398,10 +1398,10 @@ Using a Knowledge Base to Automatically Annotate Speech Corpora and to Identify Sociolinguistic Variation YaruWu - FabianSuchanek - IoanaVasilescu - LoriLamel - MartineAdda-Decker + FabianSuchanek + IoanaVasilescu + LoriLamel + MartineAdda-Decker 1054–1060 Speech characteristics vary from speaker to speaker. While some variation phenomena are due to the overall communication setting, others are due to diastratic factors such as gender, provenance, age, and social background. The analysis of these factors, although relevant for both linguistic and speech technology communities, is hampered by the need to annotate existing corpora or to recruit, categorise, and record volunteers as a function of targeted profiles. This paper presents a methodology that uses a knowledge base to provide speaker-specific information. This can facilitate the enrichment of existing corpora with new annotations extracted from the knowledge base. The method also helps the large scale analysis by automatically extracting instances of speech variation to correlate with diastratic features. We apply our method to an over 120-hour corpus of broadcast speech in French and investigate variation patterns linked to reduction phenomena and/or specific to connected speech such as disfluencies. We find significant differences in speech rate, the use of filler words, and the rate of non-canonical realisations of frequent segments as a function of different professional categories and age groups. 2022.lrec-1.113 @@ -1411,8 +1411,8 @@ Phone Inventories and Recognition for Every Language XinjianLi FlorianMetze - David R.Mortensen - Alan WBlack + David R.Mortensen + Alan WBlack ShinjiWatanabe 1061–1067 Identifying phone inventories is a crucial component in language documentation and the preservation of endangered languages. However, even the largest collection of phone inventory only covers about 2000 languages, which is only 1/4 of the total number of languages in the world. A majority of the remaining languages are endangered. In this work, we attempt to solve this problem by estimating the phone inventory for any language listed in Glottolog, which contains phylogenetic information regarding 8000 languages. In particular, we propose one probabilistic model and one non-probabilistic model, both using phylogenetic trees (“language family trees”) to measure the distance between languages. We show that our best model outperforms baseline models by 6.5 F1. Furthermore, we demonstrate that, with the proposed inventories, the phone recognition model can be customized for every language in the set, which improved the PER (phone error rate) in phone recognition by 25%. @@ -1447,7 +1447,7 @@ JayetriBardhan AnthonyColas KirkRoberts - Daisy ZheWang + Daisy ZheWang 1083–1097 This paper develops the first question answering dataset (DrugEHRQA) containing question-answer pairs from both structured tables and unstructured notes from a publicly available Electronic Health Record (EHR). EHRs contain patient records, stored in structured tables and unstructured clinical notes. The information in structured and unstructured EHRs is not strictly disjoint: information may be duplicated, contradictory, or provide additional context between these sources. Our dataset has medication-related queries, containing over 70,000 question-answer pairs. To provide a baseline model and help analyze the dataset, we have used a simple model (MultimodalEHRQA) which uses the predictions of a modality selection network to choose between EHR tables and clinical notes to answer the questions. This is used to direct the questions to the table-based or text-based state-of-the-art QA model. In order to address the problem arising from complex, nested queries, this is the first time Relation-Aware Schema Encoding and Linking for Text-to-SQL Parsers (RAT-SQL) has been used to test the structure of query templates in EHR data. Our goal is to provide a benchmark dataset for multi-modal QA systems, and to open up new avenues of research in improving question answering over EHR structured data by using context from unstructured clinical data. 2022.lrec-1.117 @@ -1466,10 +1466,10 @@ <fixed-case>BERT</fixed-case>rade: Using Contextual Embeddings to Parse <fixed-case>O</fixed-case>ld <fixed-case>F</fixed-case>rench LoïcGrobol MathildeRegnault - PedroOrtiz Suarez - BenoîtSagot - LaurentRomary - BenoitCrabbé + PedroOrtiz Suarez + BenoîtSagot + LaurentRomary + BenoitCrabbé 1104–1113 The successes of contextual word embeddings learned by training large-scale language models, while remarkable, have mostly occurred for languages where significant amounts of raw texts are available and where annotated data in downstream tasks have a relatively regular spelling. Conversely, it is not yet completely clear if these models are also well suited for lesser-resourced and more irregular languages. We study the case of Old French, which is in the interesting position of having relatively limited amount of available raw text, but enough annotated resources to assess the relevance of contextual word embedding models for downstream NLP tasks. In particular, we use POS-tagging and dependency parsing to evaluate the quality of such models in a large array of configurations, including models trained from scratch from small amounts of raw text and models pre-trained on other languages but fine-tuned on Medieval French data. 2022.lrec-1.119 @@ -1495,7 +1495,7 @@ Towards Universal Segmentations: <fixed-case>U</fixed-case>ni<fixed-case>S</fixed-case>egments 1.0 - ZdeněkŽabokrtský + ZdeněkŽabokrtský NiyatiBafna JanBodnár LukášKyjánek @@ -1509,11 +1509,11 @@ <fixed-case>T</fixed-case>e<fixed-case>DD</fixed-case>i Sample: Text Data Diversity Sample for Language Comparison and Multilingual <fixed-case>NLP</fixed-case> - StevenMoran + StevenMoran ChristianBentz XimenaGutierrez-Vasques OlgaPelloni - TanjaSamardzic + TanjaSamardzic 1150–1158 We present the TeDDi sample, a diversity sample of text data for language comparison and multilingual Natural Language Processing. The TeDDi sample currently features 89 languages based on the typological diversity sample in the World Atlas of Language Structures. It consists of more than 20k texts and is accompanied by open-source corpus processing tools. The aim of TeDDi is to facilitate text-based quantitative analysis of linguistic diversity. We describe in detail the TeDDi sample, how it was created, data availability, and its added value through for NLP and linguistic research. 2022.lrec-1.123 @@ -1533,7 +1533,7 @@ LindaWiechetek KatriHiovain-Asikainen Inga Lill SiggaMikkelsen - SjurMoshagen + SjurMoshagen FlammiePirinen TrondTrosterud BørreGaup @@ -1578,7 +1578,7 @@ <fixed-case>CAMIO</fixed-case>: A Corpus for <fixed-case>OCR</fixed-case> in Multiple Languages MichaelArrigo - StephanieStrassel + StephanieStrassel NolanKing ThaoTran LisaMason @@ -1619,7 +1619,7 @@ YoonnaJang SeolhwaLee SungjinPark - HeuiseokLim + HeuiseokLim 1242–1248 We propose a deep learning-based foreign language learning platform, named FreeTalky, for people who experience anxiety dealing with foreign languages, by employing a humanoid robot NAO and various deep learning models. A persona-based dialogue system that is embedded in NAO provides an interesting and consistent multi-turn dialogue for users. Also, an grammar error correction system promotes improvement in grammar skills of the users. Thus, our system enables personalized learning based on persona dialogue and facilitates grammar learning of a user using grammar error feedback. Furthermore, we verified whether FreeTalky provides practical help in alleviating xenoglossophobia by replacing the real human in the conversation with a NAO robot, through human evaluation. 2022.lrec-1.132 @@ -1637,7 +1637,7 @@ <fixed-case>D</fixed-case>ial<fixed-case>C</fixed-case>rowd 2.0: A Quality-Focused Dialog System Crowdsourcing Toolkit JessicaHuynh Ting-RuiChiang - JeffreyBigham + JeffreyBigham MaxineEskenazi 1256–1263 Dialog system developers need high-quality data to train, fine-tune and assess their systems. They often use crowdsourcing for this since it provides large quantities of data from many workers. However, the data may not be of sufficiently good quality. This can be due to the way that the requester presents a task and how they interact with the workers. This paper introduces DialCrowd 2.0 to help requesters obtain higher quality data by, for example, presenting tasks more clearly and facilitating effective communication with workers. DialCrowd 2.0 guides developers in creating improved Human Intelligence Tasks (HITs) and is directly applicable to the workflows used currently by developers and researchers. @@ -1646,7 +1646,7 @@ A Brief Survey of Textual Dialogue Corpora - HugoGonçalo Oliveira + HugoGonçalo Oliveira PatríciaFerreira DanielMartins CatarinaSilva @@ -1673,7 +1673,7 @@ VojtěchHudeček Léon-PaulSchaub DanielStancl - PatrickParoubek + PatrickParoubek OndřejDušek 1286–1296 Every model is only as strong as the data that it is trained on. In this paper, we present a new dataset, obtained by merging four publicly available annotated corpora for task-oriented dialogues in several domains (MultiWOZ 2.2, CamRest676, DSTC2 and Schema-Guided Dialogue Dataset). This way, we assess the feasibility of providing a unified ontology and annotation schema covering several domains with a relatively limited effort. We analyze the characteristics of the resulting dataset along three main dimensions: language, information content and performance. We focus on aspects likely to be pertinent for improving dialogue success, e.g. dialogue consistency. Furthermore, to assess the usability of this new corpus, we thoroughly evaluate dialogue generation performance under various conditions with the help of two prominent recent end-to-end dialogue models: MarCo and GPT-2. These models were selected as popular open implementations representative of the two main dimensions of dialogue modelling. While we did not observe a significant gain for dialogue state tracking performance, we show that using more training data from different sources can improve language modelling capabilities and positively impact dialogue flow (consistency). In addition, we provide the community with one of the largest open dataset for machine learning experiments. @@ -1722,12 +1722,12 @@ Making a Semantic Event-type Ontology Multilingual - ZdenkaUresova + ZdenkaUresova KarolinaZaczynska PeterBourgonje - EvaFučíková + EvaFučíková GeorgRehm - JanHajic + JanHajic 1332–1343 We present an extension of the SynSemClass Event-type Ontology, originally conceived as a bilingual Czech-English resource. We added German entries to the classes representing the concepts of the ontology. Having a different starting point than the original work (unannotated parallel corpus without links to a valency lexicon and, of course, different existing lexical resources), it was a challenge to adapt the annotation guidelines, the data model and the tools used for the original version. We describe the process and results of working in such a setup. We also show the next steps to adapt the annotation process, data structures and formats and tools necessary to make the addition of a new language in the future more smooth and efficient, and possibly to allow for various teams to work on SynSemClass extensions to many languages concurrently. We also present the latest release which contains the results of adding German, freely available for download as well as for online access. 2022.lrec-1.142 @@ -1744,8 +1744,8 @@ <fixed-case>TZOS</fixed-case>: an Online Terminology Database Aimed at Working on <fixed-case>B</fixed-case>asque Academic Terminology Collaboratively - IzaskunAldezabal - Jose MariArriola + IzaskunAldezabal + Jose MariArriola ArantxaOtegi 1353–1359 Terminology databases are highly useful for the dissemination of specialized knowledge. In this paper we present TZOS, an online terminology database to work on Basque academic terminology collaboratively. We show how this resource integrates the Communicative Theory of Terminology, together with the methodological matters, how it is connected with real corpus GARATERM, which terminology issues arise when terms are collected and future perspectives. The main objectives of this work are to develop basic tools to research academic registers and make the terminology collected by expert users available to the community. Even though TZOS has been designed for an educational context, its flexible structure makes possible to extend it also to the professional area. In this way, we have built IZIBI-TZOS which is a Civil Engineering oriented version of TZOS. These resources are already publicly available, and the ongoing work is towards the interlinking with other lexical resources by applying linking data principles. @@ -1755,7 +1755,7 @@ <fixed-case>A</fixed-case>nimacy Denoting <fixed-case>G</fixed-case>erman Nouns: Annotation and Classification ManfredKlenner - AnneGöhring + AnneGöhring 1360–1364 In this paper, we introduce a gold standard for animacy detection comprising almost 14,500 German nouns that might be used to denote either animate entities or non-animate entities. We present inter-annotator agreement of our crowd-sourced seed annotations (9,000 nouns) and discuss the results of machine learning models applied to this data. 2022.lrec-1.145 @@ -1774,7 +1774,7 @@ Polar Quantification of Actor Noun Phrases for <fixed-case>G</fixed-case>erman - AnneGöhring + AnneGöhring ManfredKlenner 1376–1380 In this paper, we discuss work that strives to measure the degree of negativity - the negative polar load - of noun phrases, especially those denoting actors. Since no gold standard data is available for German for this quantification task, we generated a silver standard and used it to fine-tune a BERT-based intensity regressor. We evaluated the quality of the silver standard empirically and found that our lexicon-based quantification metric showed a strong correlation with human annotators. @@ -1794,7 +1794,7 @@ <fixed-case>RED</fixed-case> v2: Enhancing <fixed-case>RED</fixed-case> Dataset for Multi-Label Emotion Detection AlexandraCiobotaru Mihai VladConstantinescu - Liviu P.Dinu + Liviu P.Dinu StefanDumitrescu 1392–1399 RED (Romanian Emotion Dataset) is a machine learning-based resource developed for the automatic detection of emotions in Romanian texts, containing single-label annotated tweets with one of the following emotions: joy, fear, sadness, anger and neutral. In this work, we propose REDv2, an open-source extension of RED by adding two more emotions, trust and surprise, and by widening the annotation schema so that the resulted novel dataset is multi-label. We show the overall reliability of our dataset by computing inter-annotator agreements per tweet using a formula suitable for our annotation setup and we aggregate all annotators’ opinions into two variants of ground truth, one suitable for multi-label classification and the other suitable for text regression. We propose strong baselines with two transformer models, the Romanian BERT and the multilingual XLM-Roberta model, in both categorical and regression settings. @@ -1822,7 +1822,7 @@ Frustratingly Easy Performance Improvements for Low-resource Setups: A Tale on <fixed-case>BERT</fixed-case> and Segment Embeddings Robvan der Goot MaxMüller-Eberstein - BarbaraPlank + BarbaraPlank 1418–1427 As input representation for each sub-word, the original BERT architecture proposes the sum of the sub-word embedding, position embedding and a segment embedding. Sub-word and position embeddings are well-known and studied, and encode lexical information and word position, respectively. In contrast, segment embeddings are less known and have so far received no attention, despite being ubiquitous in large pre-trained language models. The key idea of segment embeddings is to encode to which of the two sentences (segments) a word belongs to — the intuition is to inform the model about the separation of sentences for the next sentence prediction pre-training task. However, little is known on whether the choice of segment impacts performance. In this work, we try to fill this gap and empirically study the impact of the segment embedding during inference time for a variety of pre-trained embeddings and target tasks. We hypothesize that for single-sentence prediction tasks performance is not affected — neither in mono- nor multilingual setups — while it matters when swapping segment IDs in paired-sentence tasks. To our surprise, this is not the case. Although for classification tasks and monolingual BERT models no large differences are observed, particularly word-level multilingual prediction tasks are heavily impacted. For low-resource syntactic tasks, we observe impacts of segment embedding and multilingual BERT choice. We find that the default setting for the most used multilingual BERT model underperforms heavily, and a simple swap of the segment embeddings yields an average improvement of 2.5 points absolute LAS score for dependency parsing over 9 different treebanks. 2022.lrec-1.152 @@ -1851,7 +1851,7 @@ MustafaOcal AdrianPerez AntonelaRadas - MarkFinlayson + MarkFinlayson 1444–1453 TimeML is a scheme for representing temporal information (times, events, & temporal relations) in texts. Although automatic TimeML annotation is challenging, there has been notable progress, with F1s of 0.8–0.9 for events and time detection subtasks, and F1s of 0.5–0.7 for relation extraction. Individually, these subtask results are reasonable, even good, but when combined to generate a full TimeML graph, is overall performance still acceptable? We present a novel suite of eight metrics, combined with a new graph-transformation experimental design, for holistic evaluation of TimeML graphs. We apply these metrics to four automatic TimeML annotation systems (CAEVO, TARSQI, CATENA, and ClearTK). We show that on average 1/3 of the TimeML graphs produced using these systems are inconsistent, and there is on average 1/5 more temporal indeterminacy than the gold-standard. We also show that the automatically generated graphs are on average 109 edits from the gold-standard, which is 1/3 toward complete replacement. Finally, we show that the relationship individual subtask performance and graph quality is non-linear: small errors in TimeML subtasks result in rapid degradation of final graph quality. These results suggest current automatic TimeML annotators are far from optimal and significant further improvement would be useful. 2022.lrec-1.155 @@ -1871,7 +1871,7 @@ Challenging the Transformer-based models with a Classical <fixed-case>A</fixed-case>rabic dataset: <fixed-case>Q</fixed-case>uran and <fixed-case>H</fixed-case>adith ShathaAltammami - EricAtwell + EricAtwell 1462–1471 Transformer-based models showed near-perfect results on several downstream tasks. However, their performance on classical Arabic texts is largely unexplored. To fill this gap, we evaluate monolingual, bilingual, and multilingual state-of-the-art models to detect relatedness between the Quran (Muslim holy book) and the Hadith (Prophet Muhammed teachings), which are complex classical Arabic texts with underlying meanings that require deep human understanding. To do this, we carefully built a dataset of Quran-verse and Hadith-teaching pairs by consulting sources of reputable religious experts. This study presents the methodology of creating the dataset, which we make available on our repository, and discusses the models’ performance that calls for the imminent need to explore avenues for improving the quality of these models to capture the semantics in such complex, low-resource texts. 2022.lrec-1.157 @@ -1910,7 +1910,7 @@ Fine-tuning vs From Scratch: Do Vision & Language Models Have Similar Capabilities on Out-of-Distribution Visual Question Answering? Kristian NørgaardJensen - BarbaraPlank + BarbaraPlank 1496–1508 Fine-tuning general-purpose pre-trained models has become a de-facto standard, also for Vision and Language tasks such as Visual Question Answering (VQA). In this paper, we take a step back and ask whether a fine-tuned model has superior linguistic and reasoning capabilities than a prior state-of-the-art architecture trained from scratch on the training data alone. We perform a fine-grained evaluation on out-of-distribution data, including an analysis on robustness due to linguistic variation (rephrasings). Our empirical results confirm the benefit of pre-training on overall performance and rephrasing in particular. But our results also uncover surprising limitations, particularly for answering questions involving boolean operations. To complement the empirical evaluation, this paper also surveys relevant earlier work on 1) available VQA data sets, 2) models developed for VQA, 3) pre-trained Vision+Language models, and 4) earlier fine-grained evaluation of pre-trained Vision+Language models. 2022.lrec-1.161 @@ -1932,7 +1932,7 @@ Eui JunHwang SukminCho Du HuiLee - JongPark + JongPark 1519–1528 Sign language production (SLP) is the process of generating sign language videos from spoken language expressions. Since sign languages are highly under-resourced, existing vision-based SLP approaches suffer from out-of-vocabulary (OOV) and test-time generalization problems and thus generate low-quality translations. To address these problems, we introduce an avatar-based SLP system composed of a sign language translation (SLT) model and an avatar animation generation module. Our Transformer-based SLT model utilizes two additional strategies to resolve these problems: named entity transformation to reduce OOV tokens and context vector generation using a pretrained language model (e.g., BERT) to reliably train the decoder. Our system is validated on a new Korean-Korean Sign Language (KSL) dataset of weather forecasts and emergency announcements. Our SLT model achieves an 8.77 higher BLEU-4 score and a 4.57 higher ROUGE-L score over those of our baseline model. In a user evaluation, 93.48% of named entities were successfully identified by participants, demonstrating marked improvement on OOV issues. 2022.lrec-1.163 @@ -1944,7 +1944,7 @@ WilliamPickard BrittanyCates NathanielBlanchard - JamesPustejovsky + JamesPustejovsky 1529–1541 We present a five-year retrospective on the development of the VoxWorld platform, first introduced as a multimodal platform for modeling motion language, that has evolved into a platform for rapidly building and deploying embodied agents with contextual and situational awareness, capable of interacting with humans in multiple modalities, and exploring their environments. In particular, we discuss the evolution from the theoretical underpinnings of the VoxML modeling language to a platform that accommodates both neural and symbolic inputs to build agents capable of multimodal interaction and hybrid reasoning. We focus on three distinct agent implementations and the functionality needed to accommodate all of them: Diana, a virtual collaborative agent; Kirby, a mobile robot; and BabyBAW, an agent who self-guides its own exploration of the world. 2022.lrec-1.164 @@ -1998,7 +1998,7 @@ RichardBrutti LuciaDonatelli KennethLai - JamesPustejovsky + JamesPustejovsky 1576–1583 This paper presents Gesture AMR, an extension to Abstract Meaning Representation (AMR), that captures the meaning of gesture. In developing Gesture AMR, we consider how gesture form and meaning relate; how gesture packages meaning both independently and in interaction with speech; and how the meaning of gesture is temporally and contextually determined. Our case study for developing Gesture AMR is a focused human-human shared task to build block structures. We develop an initial taxonomy of gesture act relations that adheres to AMR’s existing focus on predicate-argument structure while integrating meaningful elements unique to gesture. Pilot annotation shows Gesture AMR to be more challenging than standard AMR, and illustrates the need for more work on representation of dialogue and multimodal meaning. We discuss challenges of adapting an existing meaning representation to non-speech-based modalities and outline several avenues for expanding Gesture AMR. 2022.lrec-1.169 @@ -2033,9 +2033,9 @@ <fixed-case>B</fixed-case>asque<fixed-case>GLUE</fixed-case>: A Natural Language Understanding Benchmark for <fixed-case>B</fixed-case>asque GorkaUrbizu IñakiSan Vicente - XabierSaralegi - RodrigoAgerri - AitorSoroa + XabierSaralegi + RodrigoAgerri + AitorSoroa 1603–1612 Natural Language Understanding (NLU) technology has improved significantly over the last few years and multitask benchmarks such as GLUE are key to evaluate this improvement in a robust and general way. These benchmarks take into account a wide and diverse set of NLU tasks that require some form of language understanding, beyond the detection of superficial, textual clues. However, they are costly to develop and language-dependent, and therefore they are only available for a small number of languages. In this paper, we present BasqueGLUE, the first NLU benchmark for Basque, a less-resourced language, which has been elaborated from previously existing datasets and following similar criteria to those used for the construction of GLUE and SuperGLUE. We also report the evaluation of two state-of-the-art language models for Basque on BasqueGLUE, thus providing a strong baseline to compare upon. BasqueGLUE is freely available under an open license. 2022.lrec-1.172 @@ -2078,9 +2078,9 @@ <fixed-case>MUSS</fixed-case>: Multilingual Unsupervised Sentence Simplification by Mining Paraphrases LouisMartin AngelaFan - Éricde la Clergerie + Éricde la Clergerie AntoineBordes - BenoîtSagot + BenoîtSagot 1651–1664 Progress in sentence simplification has been hindered by a lack of labeled parallel simplification data, particularly in languages other than English. We introduce MUSS, a Multilingual Unsupervised Sentence Simplification system that does not require labeled simplification data. MUSS uses a novel approach to sentence simplification that trains strong models using sentence-level paraphrase data instead of proper simplification data. These models leverage unsupervised pretraining and controllable generation mechanisms to flexibly adjust attributes such as length and lexical complexity at inference time. We further present a method to mine such paraphrase data in any language from Common Crawl using semantic sentence embeddings, thus removing the need for labeled data. We evaluate our approach on English, French, and Spanish simplification benchmarks and closely match or outperform the previous best supervised results, despite not using any labeled simplification data. We push the state of the art further by incorporating labeled simplification data. 2022.lrec-1.176 @@ -2102,7 +2102,7 @@ Combining <fixed-case>ELECTRA</fixed-case> and Adaptive Graph Encoding for Frame Identification - FabioTamburini + FabioTamburini 1671–1679 This paper presents contributions in two directions: first we propose a new system for Frame Identification (FI), based on pre-trained text encoders trained discriminatively and graphs embedding, producing state of the art performance and, second, we take in consideration all the extremely different procedures used to evaluate systems for this task performing a complete evaluation over two benchmarks and all possible splits and cleaning procedures used in the FI literature. 2022.lrec-1.178 @@ -2110,7 +2110,7 @@ Polysemy in Spoken Conversations and Written Texts - AinaGarí Soler + AinaGarí Soler MatthieuLabeau ChloéClavel 1680–1690 @@ -2188,7 +2188,7 @@ <fixed-case>D</fixed-case>i<fixed-case>H</fixed-case>u<fixed-case>T</fixed-case>ra: a Parallel Corpus to Analyse Differences between Human Translations EkaterinaLapshinova-Koltunski - MajaPopović + MajaPopović MaaritKoponen 1751–1760 This paper describes a new corpus of human translations which contains both professional and students translations. The data consists of English sources – texts from news and reviews – and their translations into Russian and Croatian, as well as of the subcorpus containing translations of the review texts into Finnish. All target languages represent mid-resourced and less or mid-investigated ones. The corpus will be valuable for studying variation in translation as it allows a direct comparison between human translations of the same source texts. The corpus will also be a valuable resource for evaluating machine translation systems. We believe that this resource will facilitate understanding and improvement of the quality issues in both human and machine translation. In the paper, we describe how the data was collected, provide information on translator groups and summarise the differences between the human translations at hand based on our preliminary results with shallow features. @@ -2211,7 +2211,7 @@ PeterPolák MuskaanSingh AnnaNedoluzhko - OndřejBojar + OndřejBojar 1771–1779 Summarization is a challenging problem, and even more challenging is to manually create, correct, and evaluate the summaries. The severity of the problem grows when the inputs are multi-party dialogues in a meeting setup. To facilitate the research in this area, we present ALIGNMEET, a comprehensive tool for meeting annotation, alignment, and evaluation. The tool aims to provide an efficient and clear interface for fast annotation while mitigating the risk of introducing errors. Moreover, we add an evaluation mode that enables a comprehensive quality evaluation of meeting minutes. To the best of our knowledge, there is no such tool available. We release the tool as open source. It is also directly installable from PyPI. 2022.lrec-1.188 @@ -2259,7 +2259,7 @@ Annotating Attribution in <fixed-case>C</fixed-case>zech News Server Articles - BarboraHladka + BarboraHladka JiříMírovský MatyášKopp VáclavMoravec @@ -2285,7 +2285,7 @@ AnnBies JeremyGetman KiraGriffitt - StephanieStrassel + StephanieStrassel 1831–1838 This paper describes data resources created for Phase 1 of the DARPA Active Interpretation of Disparate Alternatives (AIDA) program, which aims to develop language technology that can help humans manage large volumes of sometimes conflicting information to develop a comprehensive understanding of events around the world, even when such events are described in multiple media and languages. Especially important is the need for the technology to be capable of building multiple hypotheses to account for alternative interpretations of data imbued with informational conflict. The corpus described here is designed to support these goals. It focuses on the domain of Russia-Ukraine relations and contains multimedia source data in English, Russian and Ukrainian, annotated to support development and evaluation of systems that perform extraction of entities, events, and relations from individual multimedia documents, aggregate the information across documents and languages, and produce multiple “hypotheses” about what has happened. This paper describes source data collection, annotation, and assessment. 2022.lrec-1.195 @@ -2298,7 +2298,7 @@ AgataSavary IskandarKeskes Jean-YvesAntoine - LamiaHadrich-Belguith + LamiaHadrich-Belguith 1839–1848 This paper describes our efforts to extend the PARSEME framework to Modern Standard Arabic. Theapplicability of the PARSEME guidelines was tested by measuring the inter-annotator agreement in theearly annotation stage. A subset of 1,062 sentences from the Prague Arabic Dependency Treebank PADTwas selected and annotated by two Arabic native speakers independently. Following their annotations, anew Arabic corpus with over 1,250 annotated VMWEs has been built. This corpus already exceeds thesmallest corpora of the PARSEME suite, and enables first observations. We discuss our annotation guide-line schema that shows full MWE annotation is realizable in Arabic where we get good inter-annotator agreement. 2022.lrec-1.196 @@ -2339,7 +2339,7 @@ DanielCheng KyleYan PhillipKeung - Noah A.Smith + Noah A.Smith 1885–1889 Social media platforms play an increasingly important role as forums for public discourse. Many platforms use recommendation algorithms that funnel users to online groups with the goal of maximizing user engagement, which many commentators have pointed to as a source of polarization and misinformation. Understanding the role of NLP in recommender systems is an interesting research area, given the role that social media has played in world events. However, there are few standardized resources which researchers can use to build models that predict engagement with online groups on social media; each research group constructs datasets from scratch without releasing their version for reuse. In this work, we present a dataset drawn from posts and comments on the online message board Reddit. We develop baseline models for recommending subreddits to users, given the user’s post and comment history. We also study the behavior of our recommender models on subreddits that were banned in June 2020 as part of Reddit’s efforts to stop the dissemination of hate speech. 2022.lrec-1.200 @@ -2383,10 +2383,10 @@ A Comparative Cross Language View On Acted Databases Portraying Basic Emotions Utilising Machine Learning FelixBurkhardt AnabellHacker - UweReichel + UweReichel HagenWierstorf FlorianEyben - BjörnSchuller + BjörnSchuller 1917–1924 Since several decades emotional databases have been recorded by various laboratories. Many of them contain acted portrays of Darwin’s famous “big four” basic emotions. In this paper, we investigate in how far a selection of them are comparable by two approaches: on the one hand modeling similarity as performance in cross database machine learning experiments and on the other by analyzing a manually picked set of four acoustic features that represent different phonetic areas. It is interesting to see in how far specific databases (we added a synthetic one) perform well as a training set for others while some do not. Generally speaking, we found indications for both similarity as well as specificiality across languages. 2022.lrec-1.204 @@ -2398,7 +2398,7 @@ JohannesWagner HagenWierstorf FlorianEyben - BjörnSchuller + BjörnSchuller 1925–1932 We present advancements with a software tool called Nkululeko, that lets users perform (semi-) supervised machine learning experiments in the speaker characteristics domain. It is based on audformat, a format for speech database metadata description. Due to an interface based on configurable templates, it supports best practise and very fast setup of experiments without the need to be proficient in the underlying language: Python. The paper explains the handling of Nkululeko and presents two typical experiments: comparing the expert acoustic features with artificial neural net embeddings for emotion classification and speaker age regression. 2022.lrec-1.205 @@ -2420,7 +2420,7 @@ <fixed-case>PATATRA</fixed-case> and <fixed-case>PATAF</fixed-case>req: two <fixed-case>F</fixed-case>rench databases for the documentation of within-speaker variability in speech CécileFougeron NicolasAudibert - CedricGendrot + CedricGendrot EstelleChardenon LouiseWohmann 1939–1944 @@ -2476,9 +2476,9 @@ <fixed-case>SN</fixed-case>u<fixed-case>C</fixed-case>: The <fixed-case>S</fixed-case>heffield Numbers Spoken Language Corpus EmmaBarker JonBarker - RobertGaizauskas + RobertGaizauskas NingMa - Monica LestariParamita + Monica LestariParamita 1978–1984 We present SNuC, the first published corpus of spoken alphanumeric identifiers of the sort typically used as serial and part numbers in the manufacturing sector. The dataset contains recordings and transcriptions of over 50 native British English speakers, speaking over 13,000 multi-character alphanumeric sequences and totalling almost 20 hours of recorded speech. We describe requirements taken into account in the designing the corpus and the methodology used to construct it. We present summary statistics describing the corpus contents, as well as a preliminary investigation into errors in spoken alphanumeric identifiers. We validate the corpus by showing how it can be used to adapt a deep learning neural network based ASR system, resulting in improved recognition accuracy on the task of spoken alphanumeric identifier recognition. Finally, we discuss further potential uses for the corpus and for the tools developed to construct it. 2022.lrec-1.212 @@ -2541,7 +2541,7 @@ AswinkumarVijayananth Duc BachHa SvenBehnke - JoachimKöhler + JoachimKöhler 2022–2031 For research in audiovisual interview archives often it is not only of interest what is said but also how. Sentiment analysis and emotion recognition can help capture, categorize and make these different facets searchable. In particular, for oral history archives, such indexing technologies can be of great interest. These technologies can help understand the role of emotions in historical remembering. However, humans often perceive sentiments and emotions ambiguously and subjectively. Moreover, oral history interviews have multi-layered levels of complex, sometimes contradictory, sometimes very subtle facets of emotions. Therefore, the question arises of the chance machines and humans have capturing and assigning these into predefined categories. This paper investigates the ambiguity in human perception of emotions and sentiment in German oral history interviews and the impact on machine learning systems. Our experiments reveal substantial differences in human perception for different emotions. Furthermore, we report from ongoing machine learning experiments with different modalities. We show that the human perceptual ambiguity and other challenges, such as class imbalance and lack of training data, currently limit the opportunities of these technologies for oral history archives. Nonetheless, our work uncovers promising observations and possibilities for further research. 2022.lrec-1.217 @@ -2553,7 +2553,7 @@ Ioan-BogdanIordache ShwetaYadav CorneliaCaragea - Liviu P.Dinu + Liviu P.Dinu DragoșIliescu 2032–2041 Finding the polarity of feelings in texts is a far-reaching task. Whilst the field of natural language processing has established sentiment analysis as an alluring problem, many feelings are left uncharted. In this study, we analyze the optimism and pessimism concepts from Twitter posts to effectively understand the broader dimension of psychological phenomenon. Towards this, we carried a systematic study by first exploring the linguistic peculiarities of optimism and pessimism in user-generated content. Later, we devised a multi-task knowledge distillation framework to simultaneously learn the target task of optimism detection with the help of the auxiliary task of sentiment analysis and hate speech detection. We evaluated the performance of our proposed approach on the benchmark Optimism/Pessimism Twitter dataset. Our extensive experiments show the superior- ity of our approach in correctly differentiating between optimistic and pessimistic users. Our human and automatic evaluation shows that sentiment analysis and hate speech detection are beneficial for optimism/pessimism detection. @@ -2611,7 +2611,7 @@ <fixed-case>A</fixed-case>esop’s fable “The North Wind and the Sun” Used as a Rosetta Stone to Extract and Map Spoken Words in Under-resourced Languages ElenaKnyazeva - PhilippeBoula de Mareüil + PhilippeBoula de Mareüil FrédéricVernier 2072–2079 This paper describes a method of semi-automatic word spotting in minority languages, from one and the same Aesop fable “The North Wind and the Sun” translated in Romance languages/dialects from Hexagonal (i.e. Metropolitan) France and languages from French Polynesia. The first task consisted of finding out how a dozen words such as “wind” and “sun” were translated in over 200 versions collected in the field — taking advantage of orthographic similarity, word position and context. Occurrences of the translations were then extracted from the phone-aligned recordings. The results were judged accurate in 96–97% of cases, both on the development corpus and a test set of unseen data. Corrected alignments were then mapped and basemaps were drawn to make various linguistic phenomena immediately visible. The paper exemplifies how regular expressions may be used for this purpose. The final result, which takes the form of an online speaking atlas (enriching the https://atlas.limsi.fr website), enables us to illustrate lexical, morphological or phonetic variation. @@ -2654,9 +2654,9 @@ Assessing Multilinguality of Publicly Accessible Websites RinaldsVīksna - IngunaSkadiņa + IngunaSkadiņa RaivisSkadiņš - AndrejsVasiļjevs + AndrejsVasiļjevs RobertsRozis 2108–2116 Although information on the Internet can be shared in many languages, the language presence on the World Wide Web is very disproportionate. The problem of multilingualism on the Web, in particular access, availability and quality of information in the world’s languages, has been the subject of UNESCO focus for several decades. Making European websites more multilingual is also one of the focal targets of the Connecting Europe Facility Automated Translation (CEF AT) digital service infrastructure. In order to monitor this goal, alongside other possible solutions, CEF AT needs a methodology and easy to use tool to assess the degree of multilingualism of a given website. In this paper we investigate methods and tools that automatically analyse the language diversity of the Web and propose indicators and methodology on how to measure the multilingualism of European websites. We also introduce a prototype tool based on open-source software that helps to assess multilingualism of the Web and can be independently run at set intervals. We also present initial results obtained with our tool that allows us to conclude that multilingualism on the Web is still a problem not only at the world level, but also at the European and regional level. @@ -2666,7 +2666,7 @@ A Methodology for Building a Diachronic Dataset of Semantic Shifts and its Application to <fixed-case>QC</fixed-case>-<fixed-case>FR</fixed-case>-Diac-V1.0, a Free Reference for <fixed-case>F</fixed-case>rench DavidKletz - PhilippeLanglais + PhilippeLanglais FrançoisLareau PatrickDrouin 2117–2125 @@ -2685,7 +2685,7 @@ Evaluating Gender Bias in Speech Translation - Marta R.Costa-jussà + Marta R.Costa-jussà ChristineBasta Gerard I.Gállego 2141–2147 @@ -2745,7 +2745,7 @@ NishthaJain DeclanGroves LuciaSpecia - MajaPopović + MajaPopović 2188–2195 Studying and mitigating gender and other biases in natural language have become important areas of research from both algorithmic and data perspectives. This paper explores the idea of reducing gender bias in a language generation context by generating gender variants of sentences. Previous work in this field has either been rule-based or required large amounts of gender balanced training data. These approaches are however not scalable across multiple languages, as creating data or rules for each language is costly and time-consuming. This work explores a light-weight method to generate gender variants for a given text using pre-trained language models as the resource, without any task-specific labelled data. The approach is designed to work on multiple languages with minimal changes in the form of heuristics. To showcase that, we have tested it on a high-resourced language, namely Spanish, and a low-resourced language from a different family, namely Serbian. The approach proved to work very well on Spanish, and while the results were less positive for Serbian, it showed potential even for languages where pre-trained models are less effective. 2022.lrec-1.235 @@ -2801,8 +2801,8 @@ <fixed-case>O</fixed-case>pen<fixed-case>EL</fixed-case>: An Annotated Corpus for Entity Linking and Discourse in Open Domain Dialogue WenCui LeanneRolston - MarilynWalker - Beth AnnHockey + MarilynWalker + Beth AnnHockey 2245–2256 Entity linking in dialogue is the task of mapping entity mentions in utterances to a target knowledge base. Prior work on entity linking has mainly focused on well-written articles such as Wikipedia, annotated newswire, or domain-specific datasets. We extend the study of entity linking to open domain dialogue by presenting the OpenEL corpus: an annotated multi-domain corpus for linking entities in natural conversation to Wikidata. Each dialogic utterance in 179 dialogues over 12 topics from the EDINA dataset has been annotated for entities realized by definite referring expressions as well as anaphoric forms such as he, she, it and they. This dataset supports training and evaluation of entity linking in open-domain dialogue, as well as analysis of the effect of using dialogue context and anaphora resolution in model training. It could also be used for fine-tuning a coreference resolution algorithm. To the best of our knowledge, this is the first substantial entity linking corpus publicly available for open-domain dialogue. We also establish baselines for this task using several existing entity linking systems. We found that the Transformer-based system Flair + BLINK has the best performance with a 0.65 F1 score. Our results show that dialogue context is extremely beneficial for entity linking in conversations, with Flair + Blink achieving an F1 of 0.61 without discourse context. These results also demonstrate the remaining performance gap between the baselines and human performance, highlighting the challenges of entity linking in open-domain dialogue, and suggesting many avenues for future research using OpenEL. 2022.lrec-1.241 @@ -2831,7 +2831,7 @@ <fixed-case>A</fixed-case>r<fixed-case>MIS</fixed-case> - The <fixed-case>A</fixed-case>rabic Misogyny and Sexism Corpus with Annotator Subjective Disagreements DinaAlmanea - MassimoPoesio + MassimoPoesio 2282–2291 The use of misogynistic and sexist language has increased in recent years in social media, and is increasing in the Arabic world in reaction to reforms attempting to remove restrictions on women lives. However, there are few benchmarks for Arabic misogyny and sexism detection, and in those the annotations are in aggregated form even though misogyny and sexism judgments are found to be highly subjective. In this paper we introduce an Arabic misogyny and sexism dataset (ArMIS) characterized by providing annotations from annotators with different degree of religious beliefs, and provide evidence that such differences do result in disagreements. To the best of our knowledge, this is the first dataset to study in detail the effect of beliefs on misogyny and sexism annotation. We also discuss proof-of-concept experiments showing that a dataset in which disagreements have not been reconciled can be used to train state-of-the-art models for misogyny and sexism detection; and consider different ways in which such models could be evaluated. 2022.lrec-1.244 @@ -2850,7 +2850,7 @@ The Causal News Corpus: Annotating Causal Relations in Event Sentences from News Fiona AntingTan - AliHürriyetoğlu + AliHürriyetoğlu TommasoCaselli NellekeOostdijk TadashiNomoto @@ -2932,8 +2932,8 @@ A <fixed-case>U</fixed-case>niversal <fixed-case>D</fixed-case>ependencies Treebank of <fixed-case>A</fixed-case>ncient <fixed-case>H</fixed-case>ebrew - Daniel G.Swanson - Francis M.Tyers + Daniel G.Swanson + Francis M.Tyers 2353–2361 In this paper we present the initial construction of a Universal Dependencies treebank with morphological annotations of Ancient Hebrew containing portions of the Hebrew Scriptures (1579 sentences, 27K tokens) for use in comparative study with ancient translations and for analysis of the development of Hebrew syntax. We construct this treebank by applying a rule-based parser (300 rules) to an existing morphologically-annotated corpus with minimal constituency structure and manually verifying the output and present the results of this semi-automated annotation process and some of the annotation decisions made in the process of applying the UD guidelines to a new language. 2022.lrec-1.252 @@ -2945,7 +2945,7 @@ BernardoCunha RaquelSantos FernandoBatista - RicardoRibeiro + RicardoRibeiro 2362–2370 This paper introduces FIGHT, a dataset containing 63,450 tweets, posted before and after the official declaration of Covid-19 as a pandemic by online users in Portugal. This resource aims at contributing to the analysis of online hate speech targeting the most representative minorities in Portugal, namely the African descent and the Roma communities, and the LGBTQI community, the most commonly reported target of hate speech in social media at the European context. We present the methods for collecting the data, and provide insightful statistics on the distribution of tweets included in FIGHT, considering both the temporal and spatial dimensions. We also analyze the availability over time of tweets targeting the above-mentioned communities, distinguishing public, private and deleted tweets. We believe this study will contribute to better understand the dynamics of online hate speech in Portugal, particularly in adverse contexts, such as a pandemic outbreak, allowing the development of more informed and accurate hate speech resources for Portuguese. 2022.lrec-1.253 @@ -2965,7 +2965,7 @@ A Pragmatics-Centered Evaluation Framework for Natural Language Understanding DamienSileo PhilippeMuller - TimVan de Cruys + TimVan de Cruys CamillePradel 2382–2394 New models for natural language understanding have recently made an unparalleled amount of progress, which has led some researchers to suggest that the models induce universal text representations. However, current benchmarks are predominantly targeting semantic phenomena; we make the case that pragmatics needs to take center stage in the evaluation of natural language understanding. We introduce PragmEval, a new benchmark for the evaluation of natural language understanding, that unites 11 pragmatics-focused evaluation datasets for English. PragmEval can be used as supplementary training data in a multi-task learning setup, and is publicly available, alongside the code for gathering and preprocessing the datasets. Using our evaluation suite, we show that natural language inference, a widely used pretraining task, does not result in genuinely universal representations, which presents a new challenge for multi-task learning. @@ -3040,18 +3040,18 @@ <fixed-case>B</fixed-case>e<fixed-case>S</fixed-case>t: The Belief and Sentiment Corpus JenniferTracey - OwenRambow - ClaireCardie + OwenRambow + ClaireCardie AdamDalton - Hoa TrangDang - MonaDiab - BonnieDorr - LouiseGuthrie + Hoa TrangDang + MonaDiab + BonnieDorr + LouiseGuthrie MagdalenaMarkowska SmarandaMuresan VinodkumarPrabhakaran SamiraShaikh - TomekStrzalkowski + TomekStrzalkowski 2460–2467 We present the BeSt corpus, which records cognitive state: who believes what (i.e., factuality), and who has what sentiment towards what. This corpus is inspired by similar source-and-target corpora, specifically MPQA and FactBank. The corpus comprises two genres, newswire and discussion forums, in three languages, Chinese (Mandarin), English, and Spanish. The corpus is distributed through the LDC. 2022.lrec-1.262 @@ -3063,7 +3063,7 @@ FlorianSchneider ÖzgeAlacam PrateekChaudhury - ChrisBiemann + ChrisBiemann 2468–2477 MOTIF (MultimOdal ConTextualized Images For Language Learners) is a multimodal dataset that consists of 1125 comprehension texts retrieved from Wikipedia Simple Corpus. Allowing multimodal processing or enriching the context with multimodal information has proven imperative for many learning tasks, specifically for second language (L2) learning. In this respect, several traditional NLP approaches can assist L2 readers in text comprehension processes, such as simplifying text or giving dictionary descriptions for complex words. As nicely stated in the well-known proverb, sometimes “a picture is worth a thousand words” and an image can successfully complement the verbal message by enriching the representation, like in Pictionary books. This multimodal support can also assist on-the-fly text reading experience by providing a multimodal tool that chooses and displays the most relevant images for the difficult words, given the text context. This study mainly focuses on one of the key components to achieving this goal; collecting a multimodal dataset enriched with complex word annotation and validated image match. 2022.lrec-1.263 @@ -3099,7 +3099,7 @@ MarcVerhagen KelleyLynch KyeongminRim - JamesPustejovsky + JamesPustejovsky 2498–2506 The Computational Linguistics Applications for Multimedia Services (CLAMS) platform provides access to computational content analysis tools for multimedia material. The version we present here is a robust update of an initial prototype implementation from 2019. The platform now sports a variety of image, video, audio and text processing tools that interact via a common multi-modal representation language named MMIF (Multi-Media Interchange Format). We describe the overall architecture, the MMIF format, some of the tools included in the platform, the process to set up and run a workflow, visualizations included in CLAMS, and evaluate aspects of the platform on data from the American Archive of Public Broadcasting, showing how CLAMS can add metadata to mass-digitized multimedia collections, metadata that are typically only available implicitly in now largely unsearchable digitized media in archives and libraries. 2022.lrec-1.266 @@ -3114,7 +3114,7 @@ YanlingZhao LeiGuo MargritBetke - Derry TantiWijaya + Derry TantiWijaya 2507–2516 Given our society’s increased exposure to multimedia formats on social media platforms, efforts to understand how digital content impacts people’s emotions are burgeoning. As such, we introduce a U.S. gun violence news dataset that contains news headline and image pairings from 840 news articles with 15K high-quality, crowdsourced annotations on emotional responses to the news pairings. We created three experimental conditions for the annotation process: two with a single modality (headline or image only), and one multimodal (headline and image together). In contrast to prior works on affectively-annotated data, our dataset includes annotations on the dominant emotion experienced with the content, the intensity of the selected emotion and an open-ended, written component. By collecting annotations on different modalities of the same news content pairings, we explore the relationship between image and text influence on human emotional response. We offer initial analysis on our dataset, showing the nuanced affective differences that appear due to modality and individual factors such as political leaning and media consumption habits. Our dataset is made publicly available to facilitate future research in affective computing. 2022.lrec-1.267 @@ -3196,7 +3196,7 @@ Exploring Transformers for Ranking <fixed-case>P</fixed-case>ortuguese Semantic Relations - HugoGonçalo Oliveira + HugoGonçalo Oliveira 2573–2582 We explored transformer-based language models for ranking instances of Portuguese lexico-semantic relations. Weights were based on the likelihood of natural language sequences that transmitted the relation instances, and expectations were that they would be useful for filtering out noisier instances. However, after analysing the weights, no strong conclusions were taken. They are not correlated with redundancy, but are lower for instances with longer and more specific arguments, which may nevertheless be a consequence of their sensitivity to the frequency of such arguments. They did also not reveal to be useful when computing word similarity with network embeddings. Despite the negative results, we see the reported experiments and insights as another contribution for better understanding transformer language models like BERT and GPT, and we make the weighted instances publicly available for further research. 2022.lrec-1.275 @@ -3214,7 +3214,7 @@ Sentence Selection Strategies for Distilling Word Embeddings from <fixed-case>BERT</fixed-case> YixiaoWang ZiedBouraoui - LuisEspinosa Anke + LuisEspinosa Anke StevenSchockaert 2591–2600 Many applications crucially rely on the availability of high-quality word vectors. To learn such representations, several strategies based on language models have been proposed in recent years. While effective, these methods typically rely on a large number of contextualised vectors for each word, which makes them impractical. In this paper, we investigate whether similar results can be obtained when only a few contextualised representations of each word can be used. To this end, we analyse a range of strategies for selecting the most informative sentences. Our results show that with a careful selection strategy, high-quality word vectors can be learned from as few as 5 to 10 sentences. @@ -3225,7 +3225,7 @@ <fixed-case>D</fixed-case>ia<fixed-case>WUG</fixed-case>: A Dataset for Diatopic Lexical Semantic Variation in <fixed-case>S</fixed-case>panish GioiaBaldissin DominikSchlechtweg - SabineSchulte im Walde + SabineSchulte im Walde 2601–2609 We provide a novel dataset – DiaWUG – with judgements on diatopic lexical semantic variation for six Spanish variants in Europe and Latin America. In contrast to most previous meaning-based resources and studies on semantic diatopic variation, we collect annotations on semantic relatedness for Spanish target words in their contexts from both a semasiological perspective (i.e., exploring the meanings of a word given its form, thus including polysemy) and an onomasiological perspective (i.e., exploring identical meanings of words with different forms, thus including synonymy). In addition, our novel dataset exploits and extends the existing framework DURel for annotating word senses in context (Erk et al., 2013; Schlechtweg et al., 2018) and the framework-embedded Word Usage Graphs (WUGs) – which up to now have mainly be used for semasiological tasks and resources – in order to distinguish, visualize and interpret lexical semantic variation of contextualized words in Spanish from these two perspectives, i.e., semasiological and onomasiological language variation. 2022.lrec-1.278 @@ -3255,7 +3255,7 @@ HichamEl Boukkouri OlivierFerret ThomasLavergne - PierreZweigenbaum + PierreZweigenbaum 2626–2633 BERT models used in specialized domains all seem to be the result of a simple strategy: initializing with the original BERT and then resuming pre-training on a specialized corpus. This method yields rather good performance (e.g. BioBERT (Lee et al., 2020), SciBERT (Beltagy et al., 2019), BlueBERT (Peng et al., 2019)). However, it seems reasonable to think that training directly on a specialized corpus, using a specialized vocabulary, could result in more tailored embeddings and thus help performance. To test this hypothesis, we train BERT models from scratch using many configurations involving general and medical corpora. Based on evaluations using four different tasks, we find that the initial corpus only has a weak influence on the performance of BERT models when these are further pre-trained on a medical corpus. 2022.lrec-1.281 @@ -3276,7 +3276,7 @@ D3: A Massive Dataset of Scholarly Metadata for Analyzing the State of Computer Science Research Jan PhilipWahle TerryRuas - SaifMohammad + SaifMohammad BelaGipp 2642–2651 DBLP is the largest open-access repository of scientific articles on computer science and provides metadata associated with publications, authors, and venues. We retrieved more than 6 million publications from DBLP and extracted pertinent metadata (e.g., abstracts, author affiliations, citations) from the publication texts to create the DBLP Discovery Dataset (D3). D3 can be used to identify trends in research activity, productivity, focus, bias, accessibility, and impact of computer science research. We present an initial analysis focused on the volume of computer science research (e.g., number of papers, authors, research activity), trends in topics of interest, and citation patterns. Our findings show that computer science is a growing research field (15% annually), with an active and collaborative researcher community. While papers in recent years present more bibliographical entries in comparison to previous decades, the average number of citations has been declining. Investigating papers’ abstracts reveals that recent topic trends are clearly reflected in D3. Finally, we list further applications of D3 and pose supplemental research questions. The D3 dataset, our findings, and source code are publicly available for research purposes. @@ -3341,7 +3341,7 @@ Applying Automatic Text Summarization for Fake News Detection PhilippHartl - UdoKruschwitz + UdoKruschwitz 2702–2713 The distribution of fake news is not a new but a rapidly growing problem. The shift to news consumption via social media has been one of the drivers for the spread of misleading and deliberately wrong information, as in addition to its ease of use there is rarely any veracity monitoring. Due to the harmful effects of such fake news on society, the detection of these has become increasingly important. We present an approach to the problem that combines the power of transformer-based language models while simultaneously addressing one of their inherent problems. Our framework, CMTR-BERT, combines multiple text representations, with the goal of circumventing sequential limits and related loss of information the underlying transformer architecture typically suffers from. Additionally, it enables the incorporation of contextual information. Extensive experiments on two very different, publicly available datasets demonstrates that our approach is able to set new state-of-the-art performance benchmarks. Apart from the benefit of using automatic text summarization techniques we also find that the incorporation of contextual information contributes to performance gains. 2022.lrec-1.289 @@ -3393,7 +3393,7 @@ <fixed-case>LIP</fixed-case>-<fixed-case>RTVE</fixed-case>: An Audiovisual Database for Continuous <fixed-case>S</fixed-case>panish in the Wild DavidGimeno-Gómez - Carlos-D.Martínez-Hinarejos + Carlos-D.Martínez-Hinarejos 2750–2758 Speech is considered as a multi-modal process where hearing and vision are two fundamentals pillars. In fact, several studies have demonstrated that the robustness of Automatic Speech Recognition systems can be improved when audio and visual cues are combined to represent the nature of speech. In addition, Visual Speech Recognition, an open research problem whose purpose is to interpret speech by reading the lips of the speaker, has been a focus of interest in the last decades. Nevertheless, in order to estimate these systems in the currently Deep Learning era, large-scale databases are required. On the other hand, while most of these databases are dedicated to English, other languages lack sufficient resources. Thus, this paper presents a semi-automatically annotated audiovisual database to deal with unconstrained natural Spanish, providing 13 hours of data extracted from Spanish television. Furthermore, baseline results for both speaker-dependent and speaker-independent scenarios are reported using Hidden Markov Models, a traditional paradigm that has been widely used in the field of Speech Technologies. 2022.lrec-1.294 @@ -3438,7 +3438,7 @@ OlgaLyashevskaya AnnaNedoluzhko DaniilVodolazsky - ZdeněkŽabokrtský + ZdeněkŽabokrtský 2788–2797 Words of any language are to some extent related thought the ways they are formed. For instance, the verb ‘exempl-ify’ and the noun ‘example-s’ are both based on the word ‘example’, but the verb is derived from it, while the noun is inflected. In Natural Language Processing of Russian, the inflection is satisfactorily processed; however, there are only a few machine-trackable resources that capture derivations even though Russian has both of these morphological processes very rich. Therefore, we devote this paper to improving one of the methods of constructing such resources and to the application of the method to a Russian lexicon, which results in the creation of the largest lexical resource of Russian derivational relations. The resulting database dubbed DeriNet.RU includes more than 300 thousand lexemes connected with more than 164 thousand binary derivational relations. To create such data, we combined the existing machine-learning methods that we improved to manage this goal. The whole approach is evaluated on our newly created data set of manual, parallel annotation. The resulting DeriNet.RU is freely available under an open license agreement. 2022.lrec-1.298 @@ -3462,11 +3462,11 @@ Towards <fixed-case>L</fixed-case>atvian <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et - PeterisPaikens + PeterisPaikens MikusGrasmanis AguteKlints IlzeLokmane - LaumaPretkalniņa + LaumaPretkalniņa LauraRituma MadaraStāde LaineStrankale @@ -3478,7 +3478,7 @@ Building Sentiment Lexicons for <fixed-case>M</fixed-case>ainland <fixed-case>S</fixed-case>candinavian Languages Using Machine Translation and Sentence Embeddings PengLiu - CristinaMarco + CristinaMarco Jon AtleGulla 2816–2825 This paper presents a simple but effective method to build sentiment lexicons for the three Mainland Scandinavian languages: Danish, Norwegian and Swedish. This method benefits from the English Sentiwordnet and a thesaurus in one of the target languages. Sentiment information from the English resource is mapped to the target languages by using machine translation and similarity measures based on sentence embeddings. A number of experiments with Scandinavian languages are performed in order to determine the best working sentence embedding algorithm for this task. A careful extrinsic evaluation on several datasets yields state-of-the-art results using a simple rule-based sentiment analysis algorithm. The resources are made freely available under an MIT License. @@ -3489,7 +3489,7 @@ A Thesaurus-based Sentiment Lexicon for <fixed-case>D</fixed-case>anish: The <fixed-case>D</fixed-case>anish Sentiment Lexicon SanniNimb SussiOlsen - BolettePedersen + BolettePedersen ThomasTroelsgård 2826–2832 This paper describes how a newly published Danish sentiment lexicon with a high lexical coverage was compiled by use of lexicographic methods and based on the links between groups of words listed in semantic order in a thesaurus and the corresponding word sense descriptions in a comprehensive monolingual dictionary. The overall idea was to identify negative and positive sections in a thesaurus, extract the words from these sections and combine them with the dictionary information via the links. The annotation task of the dataset included several steps, and was based on the comparison of synonyms and near synonyms within a semantic field. In the cases where one of the words were included in the smaller Danish sentiment lexicon AFINN, its value there was used as inspiration and expanded to the synonyms when appropriate. In order to obtain a more practical lexicon with overall polarity values at lemma level, all the senses of the lemma were afterwards compared, taking into consideration dictionary information such as usage, style and frequency. The final lexicon contains 13,859 Danish polarity lemmas and includes morphological information. It is freely available at https://github.com/dsldk/danish-sentiment-lexicon (licence CC-BY-SA 4.0 International). @@ -3536,7 +3536,7 @@ Placing multi-modal, and multi-lingual Data in the Humanities Domain on the Map: the Mythotopia Geo-tagged Corpus - VoulaGiouli + VoulaGiouli AnnaVacalopoulou NikolaosSidiropoulos ChristinaFlouda @@ -3582,7 +3582,7 @@ <fixed-case>H</fixed-case>ong <fixed-case>K</fixed-case>ong: Longitudinal and Synchronic Characterisations of Protest News between 1998 and 2020 - Arya D.McCarthy + Arya D.McCarthy Giovanna Maria DoraDore 2891–2900 This paper showcases the utility and timeliness of the Hong Kong Protest News Dataset, a highly curated collection of news articles from diverse news sources, to investigate longitudinal and synchronic news characterisations of protests in Hong Kong between 1998 and 2020. The properties of the dataset enable us to apply natural language processing to its 4522 articles and thereby study patterns of journalistic practice across newspapers. This paper sheds light on whether depth and/or manner of reporting changed over time, and if so, in what ways, or in response to what. In its focus and methodology, this paper helps bridge the gap between “validity-focused methodological debates” and the use of computational methods of analysis in the social sciences. @@ -3609,7 +3609,7 @@ MilanStraka JanŠtěpánek BarboraŠtěpánková - JanHajic + JanHajic 2909–2918 This paper presents an analysis of annotation using an automatic pre-annotation for a mid-level annotation complexity task - dependency syntax annotation. It compares the annotation efforts made by annotators using a pre-annotated version (with a high-accuracy parser) and those made by fully manual annotation. The aim of the experiment is to judge the final annotation quality when pre-annotation is used. In addition, it evaluates the effect of automatic linguistically-based (rule-formulated) checks and another annotation on the same data available to the annotators, and their influence on annotation quality and efficiency. The experiment confirmed that the pre-annotation is an efficient tool for faster manual syntactic annotation which increases the consistency of the resulting annotation without reducing its quality. 2022.lrec-1.312 @@ -3621,7 +3621,7 @@ AntonelaRadas JaredHummer KarineMegerdoomian - MarkFinlayson + MarkFinlayson 2919–2927 TimeML is an annotation scheme for capturing temporal information in text. The developers of TimeML built the TimeBank corpus to both validate the scheme and provide a rich dataset of events, temporal expressions, and temporal relationships for training and testing temporal analysis systems. In our own work we have been developing methods aimed at TimeML graphs for detecting (and eventually automatically correcting) temporal inconsistencies, extracting timelines, and assessing temporal indeterminacy. In the course of this investigation we identified numerous previously unrecognized issues in the TimeBank corpus, including multiple violations of TimeML annotation guide rules, incorrectly disconnected temporal graphs, as well as inconsistent, redundant, missing, or otherwise incorrect annotations. We describe our methods for detecting and correcting these problems, which include: (a) automatic guideline checking (109 violations); (b) automatic inconsistency checking (65 inconsistent files); (c) automatic disconnectivity checking (625 incorrect breakpoints); and (d) manual comparison with the output of state-of-the-art automatic annotators to identify missing annotations (317 events, 52 temporal expressions). We provide our code as well as a set of patch files that can be applied to the TimeBank corpus to produce a corrected version for use by other researchers in the field. 2022.lrec-1.313 @@ -3690,7 +3690,7 @@ ChristèleMaizonniaux NeasaNí Chiaráin ChadiRaheb - MannyRayner + MannyRayner JohnSloan NikosTsourakis ChunlinYao @@ -3703,8 +3703,8 @@ Cyberbullying Classifiers are Sensitive to Model-Agnostic Perturbations ChrisEmmery ÁkosKádár - GrzegorzChrupała - WalterDaelemans + GrzegorzChrupała + WalterDaelemans 2976–2988 A limited amount of studies investigates the role of model-agnostic adversarial behavior in toxic content classification. As toxicity classifiers predominantly rely on lexical cues, (deliberately) creative and evolving language-use can be detrimental to the utility of current corpora and state-of-the-art models when they are deployed for content moderation. The less training data is available, the more vulnerable models might become. This study is, to our knowledge, the first to investigate the effect of adversarial behavior and augmentation for cyberbullying detection. We demonstrate that model-agnostic lexical substitutions significantly hurt classifier performance. Moreover, when these perturbed samples are used for augmentation, we show models become robust against word-level perturbations at a slight trade-off in overall task performance. Augmentations proposed in prior work on toxicity prove to be less effective. Our results underline the need for such evaluations in online harm areas with small corpora. 2022.lrec-1.319 @@ -3712,7 +3712,7 @@ Constructing Distributions of Variation in Referring Expression Type from Corpora for Model Evaluation - T. MarkEllison + T. MarkEllison FahimeSame 2989–2997 The generation of referring expressions (REs) is a non-deterministic task. However, the algorithms for the generation of REs are standardly evaluated against corpora of written texts which include only one RE per each reference. Our goal in this work is firstly to reproduce one of the few studies taking the distributional nature of the RE generation into account. We add to this work, by introducing a method for exploring variation in human RE choice on the basis of longitudinal corpora - substantial corpora with a single human judgement (in the process of composition) per RE. We focus on the prediction of RE types, proper name, description and pronoun. We compare evaluations made against distributions over these types with evaluations made against parallel human judgements. Our results show agreement in the evaluation of learning algorithms against distributions constructed from parallel human evaluations and from longitudinal data. @@ -3735,7 +3735,7 @@ Multi-Task Learning for Cross-Lingual Abstractive Summarization ShoTakase - NaoakiOkazaki + NaoakiOkazaki 3008–3016 We present a multi-task learning framework for cross-lingual abstractive summarization to augment training data. Recent studies constructed pseudo cross-lingual abstractive summarization data to train their neural encoder-decoders. Meanwhile, we introduce existing genuine data such as translation pairs and monolingual abstractive summarization data into training. Our proposed method, Transum, attaches a special token to the beginning of the input sentence to indicate the target task. The special token enables us to incorporate the genuine data into the training data easily. The experimental results show that Transum achieves better performance than the model trained with only pseudo cross-lingual summarization data. In addition, we achieve the top ROUGE score on Chinese-English and Arabic-English abstractive summarization. Moreover, Transum also has a positive effect on machine translation. Experimental results indicate that Transum improves the performance from the strong baseline, Transformer, in Chinese-English, Arabic-English, and English-Japanese translation datasets. 2022.lrec-1.322 @@ -3754,12 +3754,12 @@ HarritxuGete ThierryEtchegoyhen DavidPonce - GorkaLabaka + GorkaLabaka NoraAranberri AnderCorral - XabierSaralegi + XabierSaralegi IgorEllakuria - MaiteMartin + MaiteMartin 3026–3037 Document-level Neural Machine Translation aims to increase the quality of neural translation models by taking into account contextual information. Properly modelling information beyond the sentence level can result in improved machine translation output in terms of coherence, cohesion and consistency. Suitable corpora for context-level modelling are necessary to both train and evaluate context-aware systems, but are still relatively scarce. In this work we describe TANDO, a document-level corpus for the under-resourced Basque-Spanish language pair, which we share with the scientific community. The corpus is composed of parallel data from three different domains and has been prepared with context-level information. Additionally, the corpus includes contrastive test sets for fine-grained evaluations of gender and register contextual phenomena on both source and target language sides. To establish the usefulness of the corpus, we trained and evaluated baseline Transformer models and context-aware variants based on context concatenation. Our results indicate that the corpus is suitable for fine-grained evaluation of document-level machine translation systems. 2022.lrec-1.324 @@ -3767,14 +3767,14 @@ Unsupervised Machine Translation in Real-World Scenarios - Onade Gibert + Onade Gibert IakesGoenaga JordiArmengol-Estapé OlatzPerez-de-Viñaspre - CarlaParra + CarlaParra MarinaSánchez-Torrón - MarcisPinnis - GorkaLabaka + MarcisPinnis + GorkaLabaka MaiteMelero 3038–3047 In this work, we present the work that has been carried on in the MT4All CEF project and the resources that it has generated by leveraging recent research carried out in the field of unsupervised learning. In the course of the project 18 monolingual corpora for specific domains and languages have been collected, and 12 bilingual dictionaries and translation models have been generated. As part of the research, the unsupervised MT methodology based only on monolingual corpora (Artetxe et al., 2017) has been tested on a variety of languages and domains. Results show that in specialised domains, when there is enough monolingual in-domain data, unsupervised results are comparable to those of general domain supervised translation, and that, at any rate, unsupervised techniques can be used to boost results whenever very little data is available. @@ -3794,7 +3794,7 @@ On the Multilingual Capabilities of Very Large-Scale <fixed-case>E</fixed-case>nglish Language Models JordiArmengol-Estapé - Onade Gibert Bonet + Onade Gibert Bonet MaiteMelero 3056–3068 Generative Pre-trained Transformers (GPTs) have recently been scaled to unprecedented sizes in the history of machine learning. These models, solely trained on the language modeling objective, have been shown to exhibit outstanding zero, one, and few-shot learning capabilities in a number of different tasks. Nevertheless, aside from anecdotal experiences, little is known regarding their multilingual capabilities, given the fact that the pre-training corpus is almost entirely composed of English text. In this work, we investigate its potential and limits in three tasks: extractive question-answering, text summarization and natural language generation for five different languages, as well as the effect of scale in terms of model size. Our results show that GPT-3 can be almost as useful for many languages as it is for English, with room for improvement if optimization of the tokenization is addressed. @@ -3845,7 +3845,7 @@ Building Comparable Corpora for Assessing Multi-Word Term Alignment OmarAdjali EmmanuelMorin - PierreZweigenbaum + PierreZweigenbaum 3103–3112 Recent work has demonstrated the importance of dealing with Multi-Word Terms (MWTs) in several Natural Language Processing applications. In particular, MWTs pose serious challenges for alignment and machine translation systems because of their syntactic and semantic properties. Thus, developing algorithms that handle MWTs is becoming essential for many NLP tasks. However, the availability of bilingual and more generally multi-lingual resources is limited, especially for low-resourced languages and in specialized domains. In this paper, we propose an approach for building comparable corpora and bilingual term dictionaries that help evaluate bilingual term alignment in comparable corpora. To that aim, we exploit parallel corpora to perform automatic bilingual MWT extraction and comparable corpus construction. Parallel information helps to align bilingual MWTs and makes it easier to build comparable specialized sub-corpora. Experimental validation on an existing dataset and on manually annotated data shows the interest of the proposed methodology. 2022.lrec-1.332 @@ -3932,7 +3932,7 @@ MuskaanSingh MarieHledíková TirthankarGhosal - OndřejBojar + OndřejBojar 3174–3182 Taking minutes is an essential component of every meeting, although the goals, style, and procedure of this activity (“minuting” for short) can vary. Minuting is a rather unstructured writing activity and is affected by who is taking the minutes and for whom the intended minutes are. With the rise of online meetings, automatic minuting would be an important benefit for the meeting participants as well as for those who might have missed the meeting. However, automatically generating meeting minutes is a challenging problem due to a variety of factors including the quality of automatic speech recorders (ASRs), availability of public meeting data, subjective knowledge of the minuter, etc. In this work, we present the first of its kind dataset on Automatic Minuting. We develop a dataset of English and Czech technical project meetings which consists of transcripts generated from ASRs, manually corrected, and minuted by several annotators. Our dataset, AutoMin, consists of 113 (English) and 53 (Czech) meetings, covering more than 160 hours of meeting content. Upon acceptance, we will publicly release (aaa.bbb.ccc) the dataset as a set of meeting transcripts and minutes, excluding the recordings for privacy reasons. A unique feature of our dataset is that most meetings are equipped with more than one minute, each created independently. Our corpus thus allows studying differences in what people find important while taking the minutes. We also provide baseline experiments for the community to explore this novel problem further. To the best of our knowledge AutoMin is probably the first resource on minuting in English and also in a language other than English (Czech). 2022.lrec-1.340 @@ -3940,7 +3940,7 @@ Extracting Age-Related Stereotypes from Social Media Texts - Kathleen C.Fraser + Kathleen C.Fraser SvetlanaKiritchenko IsarNejadgholi 3183–3194 @@ -4005,7 +4005,7 @@ <fixed-case>SDS</fixed-case>-200: A <fixed-case>S</fixed-case>wiss <fixed-case>G</fixed-case>erman Speech to <fixed-case>S</fixed-case>tandard <fixed-case>G</fixed-case>erman Text Corpus MichelPlüss - ManuelaHürlimann + ManuelaHürlimann MarcCuny AllaStöckli NikolaosKapotis @@ -4014,7 +4014,7 @@ ChristianScheller YanickSchraner AmitJain - JanDeriu + JanDeriu MarkCieliebak ManfredVogel 3250–3256 @@ -4026,9 +4026,9 @@ Extracting Linguistic Knowledge from Speech: A Study of Stop Realization in 5 <fixed-case>R</fixed-case>omance Languages YaruWu MathildeHutin - IoanaVasilescu - LoriLamel - MartineAdda-Decker + IoanaVasilescu + LoriLamel + MartineAdda-Decker 3257–3263 This paper builds upon recent work in leveraging the corpora and tools originally used to develop speech technologies for corpus-based linguistic studies. We address the non-canonical realization of consonants in connected speech and we focus on voicing alternation phenomena of stops in 5 standard varieties of Romance languages (French, Italian, Spanish, Portuguese, Romanian). For these languages, both large scale corpora and speech recognition systems were available for the study. We use forced alignment with pronunciation variants and machine learning techniques to examine to what extent such frequent phenomena characterize languages and what are the most triggering factors. The results confirm that voicing alternations occur in all Romance languages. Automatic classification underlines that surrounding contexts and segment duration are recurring contributing factors for modeling voicing alternation. The results of this study also demonstrate the new role that machine learning techniques such as classification algorithms can play in helping to extract linguistic knowledge from speech and to suggest interesting research directions. 2022.lrec-1.348 @@ -4073,12 +4073,12 @@ <fixed-case>QT</fixed-case>30: A Corpus of Argument and Conflict in Broadcast Debate - AnnetteHautli-Janisz + AnnetteHautli-Janisz ZlataKikteva WassilikiSiskou KamilaGorska RayBecker - ChrisReed + ChrisReed 3291–3300 Broadcast political debate is a core pillar of democracy: it is the public’s easiest access to opinions that shape policies and enables the general public to make informed choices. With QT30, we present the largest corpus of analysed dialogical argumentation ever created (19,842 utterances, 280,000 words) and also the largest corpus of analysed broadcast political debate to date, using 30 episodes of BBC’s ‘Question Time’ from 2020 and 2021. Question Time is the prime institution in UK broadcast political debate and features questions from the public on current political issues, which are responded to by a weekly panel of five figures of UK politics and society. QT30 is highly argumentative and combines language of well-versed political rhetoric with direct, often combative, justification-seeking of the general public. QT30 is annotated with Inference Anchoring Theory, a framework well-known in argument mining, which encodes the way arguments and conflicts are created and reacted to in dialogical settings. The resource is freely available at http://corpora.aifdb.org/qt30. 2022.lrec-1.352 @@ -4117,7 +4117,7 @@ Distant Reading in Digital Humanities: Case Study on the <fixed-case>S</fixed-case>erbian Part of the <fixed-case>ELT</fixed-case>e<fixed-case>C</fixed-case> Collection - RankaStanković + RankaStanković CvetanaKrstev BranislavaŠandrih Todorović DuskoVitas @@ -4134,7 +4134,7 @@ JudithSieker SvenjaGuhr EvelynGius - SinaZarrieß + SinaZarrieß 3346–3353 Automatizing the process of understanding the global narrative structure of long texts and stories is still a major challenge for state-of-the-art natural language understanding systems, particularly because annotated data is scarce and existing annotation workflows do not scale well to the annotation of complex narrative phenomena. In this work, we focus on the identification of narrative levels in texts corresponding to stories that are embedded in stories. Lacking sufficient pre-annotated training data, we explore a solution to deal with data scarcity that is common in machine learning: the automatic augmentation of an existing small data set of annotated samples with the help of data synthesis. We present a workflow for narrative level detection, that includes the operationalization of the task, a model, and a data augmentation protocol for automatically generating narrative texts annotated with breaks between narrative levels. Our experiments suggest that narrative levels in long text constitute a challenging phenomenon for state-of-the-art NLP models, but generating training data synthetically does improve the prediction results considerably. 2022.lrec-1.357 @@ -4146,7 +4146,7 @@ JonathanPoinhos EleniKogkitsidou PhilippeGambette - BenoîtSagot + BenoîtSagot SimonGabay 3354–3366 Spelling normalisation is a useful step in the study and analysis of historical language texts, whether it is manual analysis by experts or automatic analysis using downstream natural language processing (NLP) tools. Not only does it help to homogenise the variable spelling that often exists in historical texts, but it also facilitates the use of off-the-shelf contemporary NLP tools, if contemporary spelling conventions are used for normalisation. We present FREEMnorm, a new benchmark for the normalisation of Early Modern French (from the 17th century) into contemporary French and provide a thorough comparison of three different normalisation methods: ABA, an alignment-based approach and MT-approaches, (both statistical and neural), including extensive parameter searching, which is often missing in the normalisation literature. @@ -4156,12 +4156,12 @@ From <fixed-case>F</fixed-case>re<fixed-case>EM</fixed-case> to D’<fixed-case>A</fixed-case>lem<fixed-case>BERT</fixed-case>: a Large Corpus and a Language Model for Early <fixed-case>M</fixed-case>odern <fixed-case>F</fixed-case>rench SimonGabay - PedroOrtiz Suarez + PedroOrtiz Suarez AlexandreBartz AlixChagué RachelBawden PhilippeGambette - BenoîtSagot + BenoîtSagot 3367–3374 anguage models for historical states of language are becoming increasingly important to allow the optimal digitisation and analysis of old textual sources. Because these historical states are at the same time more complex to process and more scarce in the corpora available, this paper presents recent efforts to overcome this difficult situation. These efforts include producing a corpus, creating the model, and evaluating it with an NLP task currently used by scholars in other ongoing projects. 2022.lrec-1.359 @@ -4170,7 +4170,7 @@ Detecting Multiple Transitions in Literary Texts NuetteHeyns - Mennovan Zaanen + Mennovan Zaanen 3375–3381 Identifying the high level structure of texts provides important information when performing distant reading analysis. The structure of texts is not necessarily linear, as transitions, such as changes in the scenery or flashbacks, can be present. As a first step in identifying this structure, we aim to identify transitions in texts. Previous work (Heyns and van Zaanen, 2021) proposed a system that can successfully identify one transition in literary texts. The text is split in snippets and LDA is applied, resulting in a sequence of topics. A transition is introduced at the point that separates the topics (before and after the point) best. In this article, we extend the existing system such that it can detect multiple transitions. Additionally, we introduce a new system that inherently handles multiple transitions in texts. The new system also relies on LDA information, but is more robust than the previous system. We apply these systems to texts with known transitions (as they are constructed by concatenating text snippets stemming from different source texts) and evaluation both systems on texts with one transition and texts with two transitions. As both systems rely on LDA to identify transitions between snippets, we also show the impact of varying the number of LDA topics on the results as well. The new system consistently outperforms the previous system, not only on texts with multiple transitions, but also on single boundary texts. 2022.lrec-1.360 @@ -4184,7 +4184,7 @@ AinaraLarrondo-Ureta SimónPeña-Fernández OlatzPerez-de-Viñaspre - RodrigoAgerri + RodrigoAgerri 3382–3390 Parliamentary transcripts provide a valuable resource to understand the reality and know about the most important facts that occur over time in our societies. Furthermore, the political debates captured in these transcripts facilitate research on political discourse from a computational social science perspective. In this paper we release the first version of a newly compiled corpus from Basque parliamentary transcripts. The corpus is characterized by heavy Basque-Spanish code-switching, and represents an interesting resource to study political discourse in contrasting languages such as Basque and Spanish. We enrich the corpus with metadata related to relevant attributes of the speakers and speeches (language, gender, party...) and process the text to obtain named entities and lemmas. The obtained metadata is then used to perform a detailed corpus analysis which provides interesting insights about the language use of the Basque political representatives across time, parties and gender. 2022.lrec-1.361 @@ -4215,7 +4215,7 @@ Quantification Annotation in <fixed-case>ISO</fixed-case> 24617-12, Second Draft - HarryBunt + HarryBunt MaximeAmblard JohanBos KarënFort @@ -4235,7 +4235,7 @@ The <fixed-case>LTRC</fixed-case> <fixed-case>H</fixed-case>indi-<fixed-case>T</fixed-case>elugu Parallel Corpus VandanMujadia - DiptiSharma + DiptiSharma 3417–3424 We present the Hindi-Telugu Parallel Corpus of different technical domains such as Natural Science, Computer Science, Law and Healthcare along with the General domain. The qualitative corpus consists of 700K parallel sentences of which 535K sentences were created using multiple methods such as extract, align and review of Hindi-Telugu corpora, end-to-end human translation, iterative back-translation driven post-editing and around 165K parallel sentences were collected from available sources in the public domain. We present the comparative assessment of created parallel corpora for representativeness and diversity. The corpus has been pre-processed for machine translation, and we trained a neural machine translation system using it and report state-of-the-art baseline results on the developed development set over multiple domains and on available benchmarks. With this, we define a new task on Domain Machine Translation for low resource language pairs such as Hindi and Telugu. The developed corpus (535K) is freely available for non-commercial research and to the best of our knowledge, this is the well curated, largest, publicly available domain parallel corpus for Hindi-Telugu. 2022.lrec-1.365 @@ -4244,7 +4244,7 @@ <fixed-case>MHE</fixed-case>: Code-Mixed Corpora for Similar Language Identification PriyaRani - John P.McCrae + John P.McCrae TheodorusFransen 3425–3433 This paper introduces a new Magahi-Hindi-English (MHE) code-mixed data-set for similar language identification (SMLID), where Magahi is a less-resourced minority language. This corpus provides a language id at two levels: word and sentence. This data-set is the first Magahi-Hindi-English code-mixed data-set for similar language identification task. Furthermore, we will discuss the complexity of the data-set and provide a few baselines for the language identification task. @@ -4263,7 +4263,7 @@ AmanBerhe LéoGalmant RuiqingYin - ClaudeBarras + ClaudeBarras 3434–3441 We introduce a dataset built around a large collection of TV (and movie) series. Those are filled with challenging multi-party dialogues. Moreover, TV series come with a very active fan base that allows the collection of metadata and accelerates annotation. With 16 TV and movie series, Bazinga! amounts to 400+ hours of speech and 8M+ tokens, including 500K+ tokens annotated with the speaker, addressee, and entity linking information. Along with the dataset, we also provide a baseline for speaker diarization, punctuation restoration, and person entity recognition. The results demonstrate the difficulty of the tasks and of transfer learning from models trained on mono-speaker audio or written text, which is more widely available. This work is a step towards better multi-party dialogue structuring and understanding. Bazinga! is available at hf.co/bazinga. Because (a large) part of Bazinga! is only partially annotated, we also expect this dataset to foster research towards self- or weakly-supervised learning methods. 2022.lrec-1.367 @@ -4282,11 +4282,11 @@ <fixed-case>W</fixed-case>e<fixed-case>C</fixed-case>an<fixed-case>T</fixed-case>alk: A New Multi-language, Multi-modal Resource for Speaker Recognition - KarenJones + KarenJones KevinWalker - ChristopherCaruso + ChristopherCaruso JonathanWright - StephanieStrassel + StephanieStrassel 3451–3456 The WeCanTalk (WCT) Corpus is a new multi-language, multi-modal resource for speaker recognition. The corpus contains Cantonese, Mandarin and English telephony and video speech data from over 200 multilingual speakers located in Hong Kong. Each speaker contributed at least 10 telephone conversations of 8-10 minutes’ duration collected via a custom telephone platform based in Hong Kong. Speakers also uploaded at least 3 videos in which they were both speaking and visible, along with one selfie image. At least half of the calls and videos for each speaker were in Cantonese, while their remaining recordings featured one or more different languages. Both calls and videos were made in a variety of noise conditions. All speech and video recordings were audited by experienced multilingual annotators for quality including presence of the expected language and for speaker identity. The WeCanTalk Corpus has been used to support the NIST 2021 Speaker Recognition Evaluation and will be published in the LDC catalog. 2022.lrec-1.369 @@ -4333,7 +4333,7 @@ <fixed-case>HAWP</fixed-case>: a Dataset for <fixed-case>H</fixed-case>indi Arithmetic Word Problem Solving HarshitaSharma PruthwikMishra - DiptiSharma + DiptiSharma 3479–3490 Word Problem Solving remains a challenging and interesting task in NLP. A lot of research has been carried out to solve different genres of word problems with various complexity levels in recent years. However, most of the publicly available datasets and work has been carried out for English. Recently there has been a surge in this area of word problem solving in Chinese with the creation of large benchmark datastes. Apart from these two languages, labeled benchmark datasets for low resource languages are very scarce. This is the first attempt to address this issue for any Indian Language, especially Hindi. In this paper, we present HAWP (Hindi Arithmetic Word Problems), a dataset consisting of 2336 arithmetic word problems in Hindi. We also developed baseline systems for solving these word problems. We also propose a new evaluation technique for word problem solvers taking equation equivalence into account. 2022.lrec-1.373 @@ -4342,7 +4342,7 @@ The <fixed-case>B</fixed-case>ulgarian Event Corpus: Overview and Initial <fixed-case>NER</fixed-case> Experiments PetyaOsenova - KirilSimov + KirilSimov IvaMarinova MelaniaBerbatova 3491–3499 @@ -4378,8 +4378,8 @@ Constrained Language Models for Interactive Poem Generation - AndreiPopescu-Belis - ÀlexAtrio + AndreiPopescu-Belis + ÀlexAtrio ValentinMinder ArisXanthos GabrielLuthier @@ -4396,7 +4396,7 @@ Young JuNa HoyunSong JisuShin - JongPark + JongPark 3530–3541 Online trolls increase social costs and cause psychological damage to individuals. With the proliferation of automated accounts making use of bots for trolling, it is difficult for targeted individual users to handle the situation both quantitatively and qualitatively. To address this issue, we focus on automating the method to counter trolls, as counter responses to combat trolls encourage community users to maintain ongoing discussion without compromising freedom of expression. For this purpose, we propose a novel dataset for automatic counter response generation. In particular, we constructed a pair-wise dataset that includes troll comments and counter responses with labeled response strategies, which enables models fine-tuned on our dataset to generate responses by varying counter responses according to the specified strategy. We conducted three tasks to assess the effectiveness of our dataset and evaluated the results through both automatic and human evaluation. In human evaluation, we demonstrate that the model fine-tuned with our dataset shows a significantly improved performance in strategy-controlled sentence generation. 2022.lrec-1.378 @@ -4446,7 +4446,7 @@ <fixed-case>ALEXSIS</fixed-case>: A Dataset for Lexical Simplification in <fixed-case>S</fixed-case>panish - DanielFerrés + DanielFerrés HoracioSaggion 3582–3594 Lexical Simplification is the process of reducing the lexical complexity of a text by replacing difficult words with easier to read (or understand) expressions while preserving the original information and meaning. In this paper we introduce ALEXSIS, a new dataset for this task, and we use ALEXSIS to benchmark Lexical Simplification systems in Spanish. The paper describes the evaluation of three kind of approaches to Lexical Simplification, a thesaurus-based approach, a single transformers-based approach, and a combination of transformers. We also report state of the art results on a previous Lexical Simplification dataset for Spanish. @@ -4476,7 +4476,7 @@ <fixed-case>R</fixed-case>a<fixed-case>F</fixed-case>o<fixed-case>L</fixed-case>a: A Rationale-Annotated Corpus for Detecting Indicators of Forced Labour ErickMendez Guzman ViktorSchlegel - RizaBatista-Navarro + RizaBatista-Navarro 3610–3625 Forced labour is the most common type of modern slavery, and it is increasingly gaining the attention of the research and social community. Recent studies suggest that artificial intelligence (AI) holds immense potential for augmenting anti-slavery action. However, AI tools need to be developed transparently in cooperation with different stakeholders. Such tools are contingent on the availability and access to domain-specific data, which are scarce due to the near-invisible nature of forced labour. To the best of our knowledge, this paper presents the first openly accessible English corpus annotated for multi-class and multi-label forced labour detection. The corpus consists of 989 news articles retrieved from specialised data sources and annotated according to risk indicators defined by the International Labour Organization (ILO). Each news article was annotated for two aspects: (1) indicators of forced labour as classification labels and (2) snippets of the text that justify labelling decisions. We hope that our data set can help promote research on explainability for multi-class and multi-label text classification. In this work, we explain our process for collecting the data underpinning the proposed corpus, describe our annotation guidelines and present some statistical analysis of its content. Finally, we summarise the results of baseline experiments based on different variants of the Bidirectional Encoder Representation from Transformer (BERT) model. 2022.lrec-1.386 @@ -4499,7 +4499,7 @@ RolandRoller OliverSapina SebastianMöller - PierreZweigenbaum + PierreZweigenbaum 3637–3649 In this work, we present the first corpus for German Adverse Drug Reaction (ADR) detection in patient-generated content. The data consists of 4,169 binary annotated documents from a German patient forum, where users talk about health issues and get advice from medical doctors. As is common in social media data in this domain, the class labels of the corpus are very imbalanced. This and a high topic imbalance make it a very challenging dataset, since often, the same symptom can have several causes and is not always related to a medication intake. We aim to encourage further multi-lingual efforts in the domain of ADR detection and provide preliminary experiments for binary classification using different methods of zero- and few-shot learning based on a multi-lingual model. When fine-tuning XLM-RoBERTa first on English patient forum data and then on the new German data, we achieve an F1-score of 37.52 for the positive class. We make the dataset and models publicly available for the community. 2022.lrec-1.388 @@ -4526,7 +4526,7 @@ <fixed-case>C</fixed-case>lin<fixed-case>IDM</fixed-case>ap: Towards a Clinical <fixed-case>ID</fixed-case>s Mapping for Data Interoperability ElenaZotova MontseCuadros - GermanRigau + GermanRigau 3661–3669 This paper presents ClinIDMap, a tool for mapping identifiers between clinical ontologies and lexical resources. ClinIDMap interlinks identifiers from UMLS, SMOMED-CT, ICD-10 and the corresponding Wikipedia articles for concepts from the UMLS Metathesaurus. Our main goal is to provide semantic interoperability across the clinical concepts from various knowledge bases. As a side effect, the mapping enriches already annotated corpora in multiple languages with new labels. For instance, spans manually annotated with IDs from UMLS can be annotated with Semantic Types and Groups, and its corresponding SNOMED CT and ICD-10 IDs. We also experiment with sequence labelling models for detecting Diagnosis and Procedures concepts and for detecting UMLS Semantic Groups trained on Spanish, English, and bilingual corpora obtained with the new mapping procedure. The ClinIDMap tool is publicly available. 2022.lrec-1.390 @@ -4565,7 +4565,7 @@ How’s Business Going Worldwide ? A Multilingual Annotated Corpus for Business Relation Extraction HadjerKhaldi - FarahBenamara + FarahBenamara CamillePradel GrégoireSigel NathalieAussenac-Gilles @@ -4611,7 +4611,7 @@ Enhanced Entity Annotations for Multilingual Corpora MichaelStrobl AmineTrabelsi - OsmarZaïane + OsmarZaïane 3732–3740 Modern approaches in Natural Language Processing (NLP) require, ideally, large amounts of labelled data for model training. However, new language resources, for example, for Named Entity Recognition (NER), Co-reference Resolution (CR), Entity Linking (EL) and Relation Extraction (RE), naming a few of the most popular tasks in NLP, have always been challenging to create since manual text annotations can be very time-consuming to acquire. While there may be an acceptable amount of labelled data available for some of these tasks in one language, there may be a lack of datasets in another. WEXEA is a tool to exhaustively annotate entities in the English Wikipedia. Guidelines for editors of Wikipedia articles result, on the one hand, in only a few annotations through hyperlinks, but on the other hand, make it easier to exhaustively annotate the rest of these articles with entities than starting from scratch. We propose the following main improvements to WEXEA: Creating multi-lingual corpora, improved entity annotations using a proven NER system, annotating dates and times. A brief evaluation of the annotation quality of WEXEA is added. 2022.lrec-1.398 @@ -4630,7 +4630,7 @@ <fixed-case>S</fixed-case>panish Datasets for Sensitive Entity Detection in the Legal Domain - Onade Gibert + Onade Gibert AitorGarcía-Pablos MontseCuadros MaiteMelero @@ -4654,7 +4654,7 @@ MeriemBeloucif Seid MuhieYimam SteffenStahlhacke - ChrisBiemann + ChrisBiemann 3771–3779 Comparative Question Answering (cQA) is the task of providing concrete and accurate responses to queries such as: “Is Lyft cheaper than a regular taxi?” or “What makes a mortgage different from a regular loan?”. In this paper, we propose two new open-domain real-world datasets for identifying and labeling comparative questions. While the first dataset contains instances of English questions labeled as comparative vs. non-comparative, the second dataset provides additional labels including the objects and the aspects of comparison. We conduct several experiments that evaluate the soundness of our datasets. The evaluation of our datasets using various classifiers show promising results that reach close-to-human results on a binary classification task with a neural model using ALBERT embeddings. When approaching the unsupervised sequence labeling task, some headroom remains. 2022.lrec-1.402 @@ -4664,7 +4664,7 @@ Decorate the Examples: A Simple Method of Prompt Design for Biomedical Relation Extraction Hui-SyuanYeh ThomasLavergne - PierreZweigenbaum + PierreZweigenbaum 3780–3787 Relation extraction is a core problem for natural language processing in the biomedical domain. Recent research on relation extraction showed that prompt-based learning improves the performance on both fine-tuning on full training set and few-shot training. However, less effort has been made on domain-specific tasks where good prompt design can be even harder. In this paper, we investigate prompting for biomedical relation extraction, with experiments on the ChemProt dataset. We present a simple yet effective method to systematically generate comprehensive prompts that reformulate the relation extraction task as a cloze-test task under a simple prompt formulation. In particular, we experiment with different ranking scores for prompt selection. With BioMed-RoBERTa-base, our results show that prompting-based fine-tuning obtains gains by 14.21 F1 over its regular fine-tuning baseline, and 1.14 F1 over SciFive-Large, the current state-of-the-art on ChemProt. Besides, we find prompt-based learning requires fewer training examples to make reasonable predictions. The results demonstrate the potential of our methods in such a domain-specific relation extraction task. 2022.lrec-1.403 @@ -4695,7 +4695,7 @@ <fixed-case>APPR</fixed-case>eddit: a Corpus of <fixed-case>R</fixed-case>eddit Posts Annotated for Appraisal - Marco AntonioStranisci + Marco AntonioStranisci SimonaFrenda EleonoraCeccaldi ValerioBasile @@ -4737,7 +4737,7 @@ MarianaIllescas SabinaOporto FredericBlum - ArturoOncevay + ArturoOncevay JavierVera 3840–3851 In this paper, we launch a new Universal Dependencies treebank for an endangered language from Amazonia: Kakataibo, a Panoan language spoken in Peru. We first discuss the collaborative methodology implemented, which proved effective to create a treebank in the context of a Computational Linguistic course for undergraduates. Then, we describe the general details of the treebank and the language-specific considerations implemented for the proposed annotation. We finally conduct some experiments on part-of-speech tagging and syntactic dependency parsing. We focus on monolingual and transfer learning settings, where we study the impact of a Shipibo-Konibo treebank, another Panoan language resource. @@ -4806,7 +4806,7 @@ Pre-Training Language Models for Identifying Patronizing and Condescending Language: An Analysis CarlaPerez Almendros - LuisEspinosa Anke + LuisEspinosa Anke StevenSchockaert 3902–3911 Patronizing and Condescending Language (PCL) is a subtle but harmful type of discourse, yet the task of recognizing PCL remains under-studied by the NLP community. Recognizing PCL is challenging because of its subtle nature, because available datasets are limited in size, and because this task often relies on some form of commonsense knowledge. In this paper, we study to what extent PCL detection models can be improved by pre-training them on other, more established NLP tasks. We find that performance gains are indeed possible in this way, in particular when pre-training on tasks focusing on sentiment, harmful language and commonsense morality. In contrast, for tasks focusing on political speech and social justice, no or only very small improvements were witnessed. These findings improve our understanding of the nature of PCL. @@ -4817,7 +4817,7 @@ <fixed-case>H</fixed-case>e<fixed-case>LI</fixed-case>-<fixed-case>OTS</fixed-case>, Off-the-shelf Language Identifier for Text TommiJauhiainen HeidiJauhiainen - KristerLindén + KristerLindén 3912–3922 This paper introduces HeLI-OTS, an off-the-shelf text language identification tool using the HeLI language identification method. The HeLI-OTS language identifier is equipped with language models for 200 languages and licensed for academic as well as commercial use. We present the HeLI method and its use in our previous research. Then we compare the performance of the HeLI-OTS language identifier with that of fastText on two different data sets, showing that fastText favors the recall of common languages, whereas HeLI-OTS reaches both high recall and high precision for all languages. While introducing existing off-the-shelf language identification tools, we also give a picture of digital humanities-related research that uses such tools. The validity of the results of such research depends on the results given by the language identifier used, and especially for research focusing on the less common languages, the tendency to favor widely used languages might be very detrimental, which Heli-OTS is now able to remedy. 2022.lrec-1.416 @@ -4828,7 +4828,7 @@ SilviaSeverini AyyoobImani PhilippDufter - HinrichSchütze + HinrichSchütze 3923–3933 Parallel corpora are ideal for extracting a multilingual named entity (MNE) resource, i.e., a dataset of names translated into multiple languages. Prior work on extracting MNE datasets from parallel corpora required resources such as large monolingual corpora or word aligners that are unavailable or perform poorly for underresourced languages. We present CLC-BN, a new method for creating an MNE resource, and apply it to the Parallel Bible Corpus, a corpus of more than 1000 languages. CLC-BN learns a neural transliteration model from parallel-corpus statistics, without requiring any other bilingual resources, word aligners, or seed data. Experimental results show that CLC-BN clearly outperforms prior work. We release an MNE resource for 1340 languages and demonstrate its effectiveness in two downstream tasks: knowledge graph augmentation and bilingual lexicon induction. 2022.lrec-1.417 @@ -4836,12 +4836,12 @@ Towards the Construction of a <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et for <fixed-case>O</fixed-case>ld <fixed-case>E</fixed-case>nglish - FahadKhan + FahadKhan Francisco J.Minaya Gómez RafaelCruz González HarryDiakoff Javier E.Diaz Vera - John P.McCrae + John P.McCrae CiaraO’Loughlin William MichaelShort SanderStolk @@ -4852,7 +4852,7 @@ A Framenet and Frame Annotator for <fixed-case>G</fixed-case>erman Social Media - EckhardBick + EckhardBick 3942–3949 This paper presents PFN-DE, a new, parsing- and annotation-oriented framenet for German, with almost 15,000 frames, covering 11,300 verb lemmas. The resource was developed in the context of a Danish/German social-media study on hate speech and has a strong focus on coverage, robustness and cross-language comparability. A simple annotation scheme for argument roles meshes directly with the output of a syntactic parser, facilitating frame disambiguation through slot-filler conditions based on valency, syntactic function and semantic noun class. We discuss design principles for the framenet and the frame tagger using it, and present statistics for frame and role distribution at both the lexicon (type) and corpus (token) levels. In an evaluation run on Twitter data, the parser-based frame annotator achieved an overall F-score for frame senses of 93.6%. 2022.lrec-1.419 @@ -4862,7 +4862,7 @@ The Robotic Surgery Procedural Framebank MarcoBombieri MarcoRospocher - Simone PaoloPonzetto + Simone PaoloPonzetto PaoloFiorini 3950–3959 Robot-Assisted minimally invasive robotic surgery is the gold standard for the surgical treatment of many pathological conditions, and several manuals and academic papers describe how to perform these interventions. These high-quality, often peer-reviewed texts are the main study resource for medical personnel and consequently contain essential procedural domain-specific knowledge. The procedural knowledge therein described could be extracted, e.g., on the basis of semantic parsing models, and used to develop clinical decision support systems or even automation methods for some procedure’s steps. However, natural language understanding algorithms such as, for instance, semantic role labelers have lower efficacy and coverage issues when applied to domain others than those they are typically trained on (i.e., newswire text). To overcome this problem, starting from PropBank frames, we propose a new linguistic resource specific to the robotic-surgery domain, named Robotic Surgery Procedural Framebank (RSPF). We extract from robotic-surgical texts verbs and nouns that describe surgical actions and extend PropBank frames by adding any of new lemmas, frames or role sets required to cover missing lemmas, specific frames describing the surgical significance, or new semantic roles used in procedural surgical language. Our resource is publicly available and can be used to annotate corpora in the surgical domain to train and evaluate Semantic Role Labeling (SRL) systems in a challenging fine-grained domain setting. @@ -4883,7 +4883,7 @@ NyomanJuniarta OlivierBonami NabilHathout - FiammettaNamer + FiammettaNamer YannickToussaint 3969–3976 We apply Formal Concept Analysis (FCA) to organize and to improve the quality of Démonette2, a French derivational database, through a detection of both missing and spurious derivations in the database. We represent each derivational family as a graph. Given that the subgraph relation exists among derivational families, FCA can group families and represent them in a partially ordered set (poset). This poset is also useful for improving the database. A family is regarded as a possible anomaly (meaning that it may have missing and/or spurious derivations) if its derivational graph is almost, but not completely identical to a large number of other families. @@ -4900,7 +4900,7 @@ Towards the Detection of a Semantic Gap in the Chain of Commonsense Knowledge Triples - YoshihikoHayashi + YoshihikoHayashi 3984–3993 A commonsense knowledge resource organizes common sense that is not necessarily correct all the time, but most people are expected to know or believe. Such knowledge resources have recently been actively built and utilized in artificial intelligence, particularly natural language processing. In this paper, we discuss an important but not significantly discussed the issue of semantic gaps potentially existing in a commonsense knowledge graph and propose a machine learning-based approach to detect a semantic gap that may inhibit the proper chaining of knowledge triples. In order to establish this line of research, we created a pilot dataset from ConceptNet, in which chains consisting of two adjacent triples are sampled, and the validity of each chain is human-annotated. We also devised a few baseline methods for detecting the semantic gaps and compared them in small-scale experiments. Although the experimental results suggest that the detection of semantic gaps may not be a trivial task, we achieved several insights to further push this research direction, including the potential efficacy of sense embeddings and contextualized word representations enabled by a pre-trained language model. 2022.lrec-1.424 @@ -4991,7 +4991,7 @@ The slurk Interaction Server Framework: Better Data for Better Dialog Models - JanaGötze + JanaGötze MaikePaetzel-Prüsmann WenckeLiermann TimDiekmann @@ -5006,9 +5006,9 @@ NataliaKalashnikova SergePajak FabriceLe Guel - IoanaVasilescu + IoanaVasilescu GemmaSerrano - LaurenceDevillers + LaurenceDevillers 4079–4087 In this paper, we present the methodology of corpus design that will be used to study the comparison of influence between linguistic nudges with positive or negative influences and three conversational agents: robot, smart speaker, and human. We recruited forty-nine participants to form six groups. The conversational agents first asked the participants about their willingness to adopt five ecological habits and invest time and money in ecological problems. The participants were then asked the same questions but preceded by one linguistic nudge with positive or negative influence. The comparison of standard deviation and mean metrics of differences between these two notes (before the nudge and after) showed that participants were mainly affected by nudges with positive influence, even though several nudges with negative influence decreased the average note. In addition, participants from all groups were willing to spend more money than time on ecological problems. In general, our experiment’s early results suggest that a machine agent can influence participants to the same degree as a human agent. A better understanding of the power of influence of different conversational machines and the potential of influence of nudges of different polarities will lead to the development of ethical norms of human-computer interactions. 2022.lrec-1.434 @@ -5035,7 +5035,7 @@ Hsien-chinLin MichaelHeck Carelvan Niekerk - MilicaGasic + MilicaGasic 4096–4113 The ability to recognise emotions lends a conversational artificial intelligence a human touch. While emotions in chit-chat dialogues have received substantial attention, emotions in task-oriented dialogues remain largely unaddressed. This is despite emotions and dialogue success having equally important roles in a natural system. Existing emotion-annotated task-oriented corpora are limited in size, label richness, and public availability, creating a bottleneck for downstream tasks. To lay a foundation for studies on emotions in task-oriented dialogues, we introduce EmoWOZ, a large-scale manually emotion-annotated corpus of task-oriented dialogues. EmoWOZ is based on MultiWOZ, a multi-domain task-oriented dialogue dataset. It contains more than 11K dialogues with more than 83K emotion annotations of user utterances. In addition to Wizard-of-Oz dialogues from MultiWOZ, we collect human-machine dialogues within the same set of domains to sufficiently cover the space of various emotions that can happen during the lifetime of a data-driven dialogue system. To the best of our knowledge, this is the first large-scale open-source corpus of its kind. We propose a novel emotion labelling scheme, which is tailored to task-oriented dialogues. We report a set of experimental results to show the usability of this corpus for emotion recognition and state tracking in task-oriented dialogues. 2022.lrec-1.436 @@ -5091,7 +5091,7 @@ YogeshDawer BorniniLahiri AkankshaBansal - Atul Kr.Ojha + Atul Kr.Ojha 4149–4161 In this paper, we discuss the development of a multilingual dataset annotated with a hierarchical, fine-grained tagset marking different types of aggression and the “context” in which they occur. The context, here, is defined by the conversational thread in which a specific comment occurs and also the “type” of discursive role that the comment is performing with respect to the previous comment. The initial dataset, being discussed here consists of a total 59,152 annotated comments in four languages - Meitei, Bangla, Hindi, and Indian English - collected from various social media platforms such as YouTube, Facebook, Twitter and Telegram. As is usual on social media websites, a large number of these comments are multilingual, mostly code-mixed with English. The paper gives a detailed description of the tagset being used for annotation and also the process of developing a multi-label, fine-grained tagset that has been used for marking comments with aggression and bias of various kinds including sexism (called gender bias in the tagset), religious intolerance (called communal bias in the tagset), class/caste bias and ethnic/racial bias. We also define and discuss the tags that have been used for marking the different discursive role being performed through the comments, such as attack, defend, etc. Finally, we present a basic statistical analysis of the dataset. The dataset is being incrementally made publicly available on the project website. 2022.lrec-1.441 @@ -5100,7 +5100,7 @@ <fixed-case>Tweet Emotion Dynamics</fixed-case>: Emotion Word Usage in Tweets from <fixed-case>US</fixed-case> and <fixed-case>C</fixed-case>anada KrishnapriyaVishnubhotla - Saif M.Mohammad + Saif M.Mohammad 4162–4176 Over the last decade, Twitter has emerged as one of the most influential forums for social, political, and health discourse. In this paper, we introduce a massive dataset of more than 45 million geo-located tweets posted between 2015 and 2021 from US and Canada (TUSC), especially curated for natural language analysis. We also introduce Tweet Emotion Dynamics (TED) — metrics to capture patterns of emotions associated with tweets over time. We use TED and TUSC to explore the use of emotion-associated words across US and Canada; across 2019 (pre-pandemic), 2020 (the year the pandemic hit), and 2021 (the second year of the pandemic); and across individual tweeters. We show that Canadian tweets tend to have higher valence, lower arousal, and higher dominance than the US tweets. Further, we show that the COVID-19 pandemic had a marked impact on the emotional signature of tweets posted in 2020, when compared to the adjoining years. Finally, we determine metrics of TED for 170,000 tweeters to benchmark characteristics of TED metrics at an aggregate level. TUSC and the metrics for TED will enable a wide variety of research on studying how we use language to express ourselves, persuade, communicate, and influence, with particularly promising applications in public health, affective science, social science, and psychology. 2022.lrec-1.442 @@ -5123,7 +5123,7 @@ Life is not Always Depressing: Exploring the Happy Moments of People Diagnosed with Depression Ana-MariaBucur AdrianCosma - Liviu P.Dinu + Liviu P.Dinu 4186–4192 In this work, we explore the relationship between depression and manifestations of happiness in social media. While the majority of works surrounding depression focus on symptoms, psychological research shows that there is a strong link between seeking happiness and being diagnosed with depression. We make use of Positive-Unlabeled learning paradigm to automatically extract happy moments from social media posts of both controls and users diagnosed with depression, and qualitatively analyze them with linguistic tools such as LIWC and keyness information. We show that the life of depressed individuals is not always bleak, with positive events related to friends and family being more noteworthy to their lives compared to the more mundane happy events reported by control users. 2022.lrec-1.444 @@ -5172,7 +5172,7 @@ ShreyasSharma KareemDarwish LucasPavanelli - ThiagoCastro Ferreira + ThiagoCastro Ferreira MohamedAl-Badrashiny Kamer AliYuksel HassanSawaf @@ -5196,7 +5196,7 @@ Transfer Learning Methods for Domain Adaptation in Technical Logbook Datasets FarhadAkhbardeh MarcosZampieri - Cecilia OvesdotterAlm + Cecilia OvesdotterAlm TravisDesell 4235–4244 Event identification in technical logbooks poses challenges given the limited logbook data available in specific technical domains, the large set of possible classes, and logbook entries typically being in short form and non-standard technical language. Technical logbook data typically has both a domain, the field it comes from (e.g., automotive), and an application, what it is used for (e.g., maintenance). In order to better handle the problem of data scarcity, using a variety of technical logbook datasets, this paper investigates the benefits of using transfer learning from sources within the same domain (but different applications), from within the same application (but different domains) and from all available data. Results show that performing transfer learning within a domain provides statistically significant improvements, and in all cases but one the best performance. Interestingly, transfer learning from within the application or across the global dataset degrades results in all cases but one, which benefited from adding as much data as possible. A further analysis of the dataset similarities shows that the datasets with higher similarity scores performed better in transfer learning tasks, suggesting that this can be utilized to determine the effectiveness of adding a dataset in a transfer learning task for technical logbooks. @@ -5250,7 +5250,7 @@ AmélieChatelain AlessandroCappelli IacopoPoli - DjaméSeddah + DjaméSeddah 4275–4284 Access to large pre-trained models of varied architectures, in many different languages, is central to the democratization of NLP. We introduce PAGnol, a collection of French GPT models. Using scaling laws, we efficiently train PAGnol-XL (1.5B parameters) with the same computational budget as CamemBERT, a model 13 times smaller. PAGnol-XL is the largest model trained from scratch for the French language. We plan to train increasingly large and performing versions of PAGnol, exploring the capabilities of French extreme-scale models. For this first release, we focus on the pre-training and scaling calculations underlining PAGnol. We fit a scaling law for compute for the French language, and compare it with its English counterpart. We find the pre-training dataset significantly conditions the quality of the outputs, with common datasets such as OSCAR leading to low-quality offensive text. We evaluate our models on discriminative and generative tasks in French, comparing to other state-of-the-art French and multilingual models, and reaching the state of the art in the abstract summarization task. Our research was conducted on the public GENCI Jean Zay supercomputer, and our models up to the Large are made publicly available. 2022.lrec-1.455 @@ -5260,7 +5260,7 @@ <fixed-case>CEPOC</fixed-case>: The <fixed-case>C</fixed-case>ambridge Exams Publishing Open Cloze dataset MarianoFelice ShivaTaslimipoor - Øistein E.Andersen + Øistein E.Andersen PaulaButtery 4285–4290 Open cloze tests are a standard type of exercise where examinees must complete a text by filling in the gaps without any given options to choose from. This paper presents the Cambridge Exams Publishing Open Cloze (CEPOC) dataset, a collection of open cloze tests from world-renowned English language proficiency examinations. The tests in CEPOC have been expertly designed and validated using standard principles in language research and assessment. They are prepared for language learners at different proficiency levels and hence classified into different CEFR levels (A2, B1, B2, C1, C2). This resource can be a valuable testbed for various NLP tasks. We perform a complete set of experiments on three tasks: gap filling, gap prediction, and CEFR text classification. We implement transformer-based systems based on pre-trained language models to model each task and use our dataset as a test set, providing promising benchmark results. @@ -5293,7 +5293,7 @@ NicolasHiebel OlivierFerret KarënFort - AurélieNévéol + AurélieNévéol 4306–4315 Modern Natural Language Processing relies on the availability of annotated corpora for training and evaluating models. Such resources are scarce, especially for specialized domains in languages other than English. In particular, there are very few resources for semantic similarity in the clinical domain in French. This can be useful for many biomedical natural language processing applications, including text generation. We introduce a definition of similarity that is guided by clinical facts and apply it to the development of a new French corpus of 1,000 sentence pairs manually annotated according to similarity scores. This new sentence similarity corpus is made freely available to the community. We further evaluate the corpus through experiments of automatic similarity measurement. We show that a model of sentence embeddings can capture similarity with state-of-the-art performance on the DEFT STS shared task evaluation data set (Spearman=0.8343). We also show that the corpus is complementary to DEFT STS. 2022.lrec-1.459 @@ -5312,7 +5312,7 @@ Modeling Noise in Paraphrase Detection TeemuVahtola EetuSjöblom - JörgTiedemann + JörgTiedemann MathiasCreutz 4324–4332 Noisy labels in training data present a challenging issue in classification tasks, misleading a model towards incorrect decisions during training. In this paper, we propose the use of a linear noise model to augment pre-trained language models to account for label noise in fine-tuning. We test our approach in a paraphrase detection task with various levels of noise and five different languages. Our experiments demonstrate the effectiveness of the additional noise model in making the training procedures more robust and stable. Furthermore, we show that this model can be applied without further knowledge about annotation confidence and reliability of individual training examples and we analyse our results in light of data selection and sampling strategies. @@ -5323,9 +5323,9 @@ Give me your Intentions, <fixed-case>I</fixed-case>’ll Predict our Actions: A Two-level Classification of Speech Acts for Crisis Management in Social Media EnzoLaurenti NilsBourgon - FarahBenamara + FarahBenamara AldaMari - VéroniqueMoriceau + VéroniqueMoriceau CamilleCourgeon 4333–4343 Discovered by (Austin,1962) and extensively promoted by (Searle, 1975), speech acts (SA) have been the object of extensive discussion in the philosophical and the linguistic literature, as well as in computational linguistics where the detection of SA have shown to be an important step in many down stream NLP applications. In this paper, we attempt to measure for the first time the role of SA on urgency detection in tweets, focusing on natural disasters. Indeed, SA are particularly relevant to identify intentions, desires, plans and preferences towards action, providing therefore actionable information that will help to set priorities for the human teams and decide appropriate rescue actions. To this end, we come up here with four main contributions: (1) A two-layer annotation scheme of SA both at the tweet and subtweet levels, (2) A new French dataset of 6,669 tweets annotated for both urgency and SA, (3) An in-depth analysis of the annotation campaign, highlighting the correlation between SA and urgency categories, and (4) A set of deep learning experiments to detect SA in a crisis corpus. Our results show that SA are correlated with urgency which is a first important step towards SA-aware NLP-based crisis management on social media. @@ -5335,9 +5335,9 @@ Towards a Cleaner Document-Oriented Multilingual Crawled Corpus JulienAbadji - PedroOrtiz Suarez - LaurentRomary - BenoîtSagot + PedroOrtiz Suarez + LaurentRomary + BenoîtSagot 4344–4355 The need for large corpora raw corpora has dramatically increased in recent years with the introduction of transfer learning and semi-supervised learning methods to Natural Language Processing. And while there have been some recent attempts to manually curate the amount of data necessary to train large language models, the main way to obtain this data is still through automatic web crawling. In this paper we take the existing multilingual web corpus OSCAR and its pipeline Ungoliant that extracts and classifies data from Common Crawl at the line level, and propose a set of improvements and automatic annotations in order to produce a new document-oriented version of OSCAR that could prove more suitable to pre-train large generative language models as well as hopefully other applications in Natural Language Processing and Digital Humanities. 2022.lrec-1.463 @@ -5399,7 +5399,7 @@ A Semi-Automated Live Interlingual Communication Workflow Featuring Intralingual Respeaking: Evaluation and Benchmarking TomaszKorybski ElenaDavitti - ConstantinOrasan + ConstantinOrasan SabineBraun 4405–4413 In this paper, we present a semi-automated workflow for live interlingual speech-to-text communication which seeks to reduce the shortcomings of existing ASR systems: a human respeaker works with a speaker-dependent speech recognition software (e.g., Dragon Naturally Speaking) to deliver punctuated same-language output of superior quality than obtained using out-of-the-box automatic speech recognition of the original speech. This is fed into a machine translation engine (the EU’s eTranslation) to produce live-caption ready text. We benchmark the quality of the output against the output of best-in-class (human) simultaneous interpreters working with the same source speeches from plenary sessions of the European Parliament. To evaluate the accuracy and facilitate the comparison between the two types of output, we use a tailored annotation approach based on the NTR model (Romero-Fresco and Pöchhacker, 2017). We find that the semi-automated workflow combining intralingual respeaking and machine translation is capable of generating outputs that are similar in terms of accuracy and completeness to the outputs produced in the benchmarking workflow, although the small scale of our experiment requires caution in interpreting this result. @@ -5433,14 +5433,14 @@ The Search for Agreement on Logical Fallacy Annotation of an Infodemic - ClaireBonial + ClaireBonial AustinBlodgett TaylorHudson - Stephanie M.Lukin + Stephanie M.Lukin JeffreyMicher DouglasSummers-Stay PeterSutor - ClareVoss + ClareVoss 4430–4438 We evaluate an annotation schema for labeling logical fallacy types, originally developed for a crowd-sourcing annotation paradigm, now using an annotation paradigm of two trained linguist annotators. We apply the schema to a variety of different genres of text relating to the COVID-19 pandemic. Our linguist (as opposed to crowd-sourced) annotation of logical fallacies allows us to evaluate whether the annotation schema category labels are sufficiently clear and non-overlapping for both manual and, later, system assignment. We report inter-annotator agreement results over two annotation phases as well as a preliminary assessment of the corpus for training and testing a machine learning algorithm (Pattern-Exploiting Training) for fallacy detection and recognition. The agreement results and system performance underscore the challenging nature of this annotation task and suggest that the annotation schema and paradigm must be iteratively evaluated and refined in order to arrive at a set of annotation labels that can be reproduced by human annotators and, in turn, provide reliable training data for automatic detection and recognition systems. 2022.lrec-1.471 @@ -5448,7 +5448,7 @@ Recovering Patient Journeys: A Corpus of Biomedical Entities and Relations on <fixed-case>T</fixed-case>witter (<fixed-case>BEAR</fixed-case>) - AmelieWührl + AmelieWührl RomanKlinger 4439–4450 Text mining and information extraction for the medical domain has focused on scientific text generated by researchers. However, their access to individual patient experiences or patient-doctor interactions is limited. On social media, doctors, patients and their relatives also discuss medical information. Individual information provided by laypeople complements the knowledge available in scientific text. It reflects the patient’s journey making the value of this type of data twofold: It offers direct access to people’s perspectives, and it might cover information that is not available elsewhere, including self-treatment or self-diagnose. Named entity recognition and relation extraction are methods to structure information that is available in unstructured text. However, existing medical social media corpora focused on a comparably small set of entities and relations. In contrast, we provide rich annotation layers to model patients’ experiences in detail. The corpus consists of medical tweets annotated with a fine-grained set of medical entities and relations between them, namely 14 entity (incl. environmental factors, diagnostics, biochemical processes, patients’ quality-of-life descriptions, pathogens, medical conditions, and treatments) and 20 relation classes (incl. prevents, influences, interactions, causes). The dataset consists of 2,100 tweets with approx. 6,000 entities and 2,200 relations. @@ -5467,7 +5467,7 @@ Entity Linking over Nested Named Entities for <fixed-case>R</fixed-case>ussian - NataliaLoukachevitch + NataliaLoukachevitch PavelBraslavski VladimirIvanov TatianaBatura @@ -5486,7 +5486,7 @@ RahulSharnagat JyotsanaKhatri DipteshKanojia - PushpakBhattacharyya + PushpakBhattacharyya 4467–4476 Named Entity Recognition (NER) is a foundational NLP task that aims to provide class labels like Person, Location, Organisation, Time, and Number to words in free text. Named Entities can also be multi-word expressions where the additional I-O-B annotation information helps label them during the NER annotation process. While English and European languages have considerable annotated data for the NER task, Indian languages lack on that front- both in terms of quantity and following annotation standards. This paper releases a significantly sized standard-abiding Hindi NER dataset containing 109,146 sentences and 2,220,856 tokens, annotated with 11 tags. We discuss the dataset statistics in all their essential detail and provide an in-depth analysis of the NER tag-set used with our data. The statistics of tag-set in our dataset shows a healthy per-tag distribution especially for prominent classes like Person, Location and Organisation. Since the proof of resource-effectiveness is in building models with the resource and testing the model on benchmark data and against the leader-board entries in shared tasks, we do the same with the aforesaid data. We use different language models to perform the sequence labelling task for NER and show the efficacy of our data by performing a comparative evaluation with models trained on another dataset available for the Hindi NER task. Our dataset helps achieve a weighted F1 score of 88.78 with all the tags and 92.22 when we collapse the tag-set, as discussed in the paper. To the best of our knowledge, no available dataset meets the standards of volume (amount) and variability (diversity), as far as Hindi NER is concerned. We fill this gap through this work, which we hope will significantly help NLP for Hindi. We release this dataset with our code and models for further research at https://github.com/cfiltnlp/HiNER 2022.lrec-1.475 @@ -5496,7 +5496,7 @@ Bootstrapping Text Anonymization Models with Distant Supervision AnthiPapadopoulou PierreLison - LiljaØvrelid + LiljaØvrelid IldikóPilán 4477–4487 We propose a novel method to bootstrap text anonymization models based on distant supervision. Instead of requiring manually labeled training data, the approach relies on a knowledge graph expressing the background information assumed to be publicly available about various individuals. This knowledge graph is employed to automatically annotate text documents including personal data about a subset of those individuals. More precisely, the method determines which text spans ought to be masked in order to guarantee k-anonymity, assuming an adversary with access to both the text documents and the background information expressed in the knowledge graph. The resulting collection of labeled documents is then used as training data to fine-tune a pre-trained language model for text anonymization. We illustrate this approach using a knowledge graph extracted from Wikidata and short biographical texts from Wikipedia. Evaluation results with a RoBERTa-based model and a manually annotated collection of 553 summaries showcase the potential of the approach, but also unveil a number of issues that may arise if the knowledge graph is noisy or incomplete. The results also illustrate that, contrary to most sequence labeling problems, the text anonymization task may admit several alternative solutions. @@ -5518,7 +5518,7 @@ KaushikGedela AlexMarr BartDesmet - CarolynRose + CarolynRose ChunxiaoZhou 4497–4503 Quality assurance (QA) is an essential though underdeveloped part of the data annotation process. Although QA is supported to some extent in existing annotation tools, comprehensive support for QA is not standardly provided. In this paper we contribute QA4IE, a comprehensive QA tool for information extraction, which can (1) detect potential problems in text annotations in a timely manner, (2) accurately assess the quality of annotations, (3) visually display and summarize annotation discrepancies among annotation team members, (4) provide a comprehensive statistics report, and (5) support viewing of annotated documents interactively. This paper offers a competitive analysis comparing QA4IE and other popular annotation tools and demonstrates its features, usage, and effectiveness through a case study. The Python code, documentation, and demonstration video are available publicly at https://github.com/CC-RMD-EpiBio/QA4IE. @@ -5528,7 +5528,7 @@ A New Dataset for Topic-Based Paragraph Classification in Genocide-Related Court Transcripts MiriamSchirmer - UdoKruschwitz + UdoKruschwitz GregorDonabauer 4504–4512 Recent progress in natural language processing has been impressive in many different areas with transformer-based approaches setting new benchmarks for a wide range of applications. This development has also lowered the barriers for people outside the NLP community to tap into the tools and resources applied to a variety of domain-specific applications. The bottleneck however still remains the lack of annotated gold-standard collections as soon as one’s research or professional interest falls outside the scope of what is readily available. One such area is genocide-related research (also including the work of experts who have a professional interest in accessing, exploring and searching large-scale document collections on the topic, such as lawyers). We present GTC (Genocide Transcript Corpus), the first annotated corpus of genocide-related court transcripts which serves three purposes: (1) to provide a first reference corpus for the community, (2) to establish benchmark performances (using state-of-the-art transformer-based approaches) for the new classification task of paragraph identification of violence-related witness statements, (3) to explore first steps towards transfer learning within the domain. We consider our contribution to be addressing in particular this year’s hot topic on Language Technology for All. @@ -5541,7 +5541,7 @@ RinaldoLima Adrian-GabrielChifu BernardEspinasse - SébastienFournier + SébastienFournier 4513–4522 The Relation Extraction (RE) is an important basic Natural Language Processing (NLP) for many applications, such as search engines, recommender systems, question-answering systems and others. There are many studies in this subarea of NLP that continue to be explored, such as SemEval campaigns (2010 to 2018), or DDI Extraction (2013).For more than ten years, different RE systems using mainly statistical models have been proposed as well as the frameworks to develop them. This paper focuses on frameworks allowing to develop such RE systems using deep learning models. Such frameworks should make it possible to reproduce experiments of various deep learning models and pre-processing techniques proposed in various publications. Currently, there are very few frameworks of this type, and we propose a new open and optimizable framework, called DeepREF, which is inspired by the OpenNRE and REflex existing frameworks. DeepREF allows the employment of various deep learning models, to optimize their use, to identify the best inputs and to get better results with each data set for RE and compare with other experiments, making ablation studies possible. The DeepREF Framework is evaluated on several reference corpora from various application domains. 2022.lrec-1.480 @@ -5570,7 +5570,7 @@ Using Sentence-level Classification Helps Entity Extraction from Material Science Literature AnkanMullick ShubhraneelPal - TapasNayak + TapasNayak Seung-CheolLee SatadeepBhattacharjee PawanGoyal @@ -5602,7 +5602,7 @@ FredericBechet ElieAntoine JérémyAuguste - GéraldineDamnati + GéraldineDamnati 4561–4568 This paper introduces the question answering paradigm as a way to explore digitized archive collections for Social Science studies. In particular, we are interested in evaluating largely studied question generation and question answering approaches on a new type of documents, as a step forward beyond traditional benchmark evaluations. Question generation can be used as a way to provide enhanced training material for Machine Reading Question Answering algorithms but also has its own purpose in this paradigm, where relevant questions can be used as a way to create explainable links between documents. To this end, generating large amounts of question is not the only motivation, but we need to include qualitative and semantic control to the generation process. We propose a new approach for question generation, relying on a BART Transformer based generative model, for which input data are enriched by semantic constraints. Question generation and answering are evaluated on several French corpora, and the whole approach is validated on a new corpus of digitized archive collection of a French Social Science journal. 2022.lrec-1.486 @@ -5610,7 +5610,7 @@ Evaluating Retrieval for Multi-domain Scientific Publications - NancyIde + NancyIde KeithSuderman JingxuanTu MarcVerhagen @@ -5618,7 +5618,7 @@ IanRoss JohnLawson AndrewBorg - JamesPustejovsky + JamesPustejovsky 4569–4576 This paper provides an overview of the xDD/LAPPS Grid framework and provides results of evaluating the AskMe retrievalengine using the BEIR benchmark datasets. Our primary goal is to determine a solid baseline of performance to guide furtherdevelopment of our retrieval capabilities. Beyond this, we aim to dig deeper to determine when and why certain approachesperform well (or badly) on both in-domain and out-of-domain data, an issue that has to date received relatively little attention. 2022.lrec-1.487 @@ -5674,7 +5674,7 @@ <fixed-case>P</fixed-case>hys<fixed-case>NLU</fixed-case>: A Language Resource for Evaluating Natural Language Understanding and Explanation Coherence in Physics JordanMeadows ZiliZhou - AndréFreitas + AndréFreitas 4611–4619 In order for language models to aid physics research, they must first encode representations of mathematical and natural language discourse which lead to coherent explanations, with correct ordering and relevance of statements. We present a collection of datasets developed to evaluate the performance of language models in this regard, which measure capabilities with respect to sentence ordering, position, section prediction, and discourse coherence. Analysis of the data reveals the classes of arguments and sub-disciplines which are most common in physics discourse, as well as the sentence-level frequency of equations and expressions. We present baselines that demonstrate how contemporary language models are challenged by coherence related tasks in physics, even when trained on mathematical natural language objectives. 2022.lrec-1.492 @@ -5682,12 +5682,12 @@ <fixed-case>HECTOR</fixed-case>: A Hybrid <fixed-case>TE</fixed-case>xt <fixed-case>S</fixed-case>implifi<fixed-case>C</fixed-case>ation <fixed-case>TO</fixed-case>ol for Raw Texts in <fixed-case>F</fixed-case>rench - AmaliaTodirascu + AmaliaTodirascu RodrigoWilkens EvaRolin ThomasFrançois DelphineBernhard - NúriaGala + NúriaGala 4620–4630 Reducing the complexity of texts by applying an Automatic Text Simplification (ATS) system has been sparking interest inthe area of Natural Language Processing (NLP) for several years and a number of methods and evaluation campaigns haveemerged targeting lexical and syntactic transformations. In recent years, several studies exploit deep learning techniques basedon very large comparable corpora. Yet the lack of large amounts of corpora (original-simplified) for French has been hinderingthe development of an ATS tool for this language. In this paper, we present our system, which is based on a combination ofmethods relying on word embeddings for lexical simplification and rule-based strategies for syntax and discourse adaptations. We present an evaluation of the lexical, syntactic and discourse-level simplifications according to automatic and humanevaluations. We discuss the performances of our system at the lexical, syntactic, and discourse levels 2022.lrec-1.493 @@ -5695,7 +5695,7 @@ <fixed-case>A</fixed-case>i<fixed-case>RO</fixed-case> - an Interactive Learning Tool for Children at Risk of Dyslexia - Peter JuelHenrichsen + Peter JuelHenrichsen StineFuglsang Engmose 4631–4636 This paper presents the AiRO learning tool, which is designed for use in classrooms and homes by children at risk of developing dyslexia. The tool is based on the client-server architecture with a graphical and auditive front end (providing the interaction with the learner) and all NLP-related components located at the back end (analysing the pupil’s input, deciding on the system’s response, preparing speech synthesis and other feedback, logging the pupil’s performance etc). AiRO software consists of independent modules for easy maintenance, e.g., upgrading the didactics or preparing AiROs for other languages. This paper also reports on our first tests ‘in vivo’ (November 2021) with 49 pupils (aged 6). The subjects completed 16 AiRO sessions over a four-week period. The subjects were pre- and post-tested on spelling and reading. The experimental group significantly out-performed the control group, suggesting that a new IT-supported teaching strategy may be within reach. A collection of AiRO resources (language materials, software, synthetic voice) are available as open source. At LREC, we shall present a demo of the AiRO learning tool. @@ -5707,7 +5707,7 @@ AnnikaSimonsen Sandra SaxovLamhauge Iben NyholmDebess - Peter JuelHenrichsen + Peter JuelHenrichsen 4637–4643 The biggest challenges we face in developing LR and LT for Faroese is the lack of existing resources. A few resources already exist for Faroese, but many of them are either of insufficient size and quality or are not easily accessible. Therefore, the Faroese ASR project, Ravnur, set out to make a BLARK for Faroese. The BLARK is still in the making, but many of its resources have already been produced or collected. The LR status is framed by mentioning existing LR of relevant size and quality. The specific components of the BLARK are presented as well as the working principles behind the BLARK. The BLARK will be a pillar in Faroese LR, being relatively substantial in both size, quality, and diversity. It will be open-source, inviting other small languages to use it as an inspiration to create their own BLARK. We comment on the faulty yet sprouting LT situation in the Faroe Islands. The LR and LT challenges are not solved with just a BLARK. Some initiatives are therefore proposed to better the prospects of Faroese LT. The open-source principle of the project should facilitate further development. 2022.lrec-1.495 @@ -5730,7 +5730,7 @@ JenniferJacobs CharisHarty MargaretPerkoff - James H.Martin + James H.Martin TamaraSumner 4654–4662 Transcripts of teaching episodes can be effective tools to understand discourse patterns in classroom instruction. According to most educational experts, sustained classroom discourse is a critical component of equitable, engaging, and rich learning environments for students. This paper describes the TalkMoves dataset, composed of 567 human-annotated K-12 mathematics lesson transcripts (including entire lessons or portions of lessons) derived from video recordings. The set of transcripts primarily includes in-person lessons with whole-class discussions and/or small group work, as well as some online lessons. All of the transcripts are human-transcribed, segmented by the speaker (teacher or student), and annotated at the sentence level for ten discursive moves based on accountable talk theory. In addition, the transcripts include utterance-level information in the form of dialogue act labels based on the Switchboard Dialog Act Corpus. The dataset can be used by educators, policymakers, and researchers to understand the nature of teacher and student discourse in K-12 math classrooms. Portions of this dataset have been used to develop the TalkMoves application, which provides teachers with automated, immediate, and actionable feedback about their mathematics instruction. @@ -5776,7 +5776,7 @@ A Benchmark Corpus for the Detection of Automatically Generated Text in Academic Publications VijiniLiyanage DavideBuscaldi - AdelineNazarenko + AdelineNazarenko 4692–4700 Automatic text generation based on neural language models has achieved performance levels that make the generated text almost indistinguishable from those written by humans. Despite the value that text generation can have in various applications, it can also be employed for malicious tasks. The diffusion of such practices represent a threat to the quality of academic publishing. To address these problems, we propose in this paper two datasets comprised of artificially generated research content: a completely synthetic dataset and a partial text substitution dataset. In the first case, the content is completely generated by the GPT-2 model after a short prompt extracted from original papers. The partial or hybrid dataset is created by replacing several sentences of abstracts with sentences that are generated by the Arxiv-NLP model. We evaluate the quality of the datasets comparing the generated texts to aligned original texts using fluency metrics such as BLEU and ROUGE. The more natural the artificial texts seem, the more difficult they are to detect and the better is the benchmark. We also evaluate the difficulty of the task of distinguishing original from generated text by using state-of-the-art classification models. 2022.lrec-1.501 @@ -5809,7 +5809,7 @@ Text Classification and Prediction in the Legal Domain Minh-QuocNghiem PaulBaylis - AndréFreitas + AndréFreitas SophiaAnaniadou 4717–4722 We present a case study on the application of text classification and legal judgment prediction for flight compensation. We combine transformer-based classification models to classify responses from airlines and incorporate text data with other data types to predict a legal claim being successful. Our experimental evaluations show that our models achieve consistent and significant improvements over baselines and even outperformed human prediction when predicting a claim being successful. These models were integrated into an existing claim management system, providing substantial productivity gains for handling the case lifecycle, currently supporting several thousands of monthly processes. @@ -5874,7 +5874,7 @@ MatthieuAllain UrszulaCzerwinska AmauryFouret - BenoîtSagot + BenoîtSagot RachelBawden 4754–4766 Detecting divergences in the applications of the law (where the same legal text is applied differently by two rulings) is an important task. It is the mission of the French Cour de Cassation. The first step in the detection of divergences is to detect similar cases, which is currently done manually by experts. They rely on summarised versions of the rulings (syntheses and keyword sequences), which are currently produced manually and are not available for all rulings. There is also a high degree of variability in the keyword choices and the level of granularity used. In this article, we therefore aim to provide automatic tools to facilitate the search for similar rulings. We do this by (i) providing automatic keyword sequence generation models, which can be used to improve the coverage of the analysis, and (ii) providing measures of similarity based on the available texts and augmented with predicted keyword sequences. Our experiments show that the predictions improve correlations of automatically obtained similarities against our specially colelcted human judgments of similarity. @@ -5994,7 +5994,7 @@ Building Dataset for Grounding of Formulae — Annotating Coreference Relations Among Math Identifiers TakutoAsakura YusukeMiyao - AkikoAizawa + AkikoAizawa 4851–4858 Grounding the meaning of each symbol in math formulae is important for automated understanding of scientific documents. Generally speaking, the meanings of math symbols are not necessarily constant, and the same symbol is used in multiple meanings. Therefore, coreference relations between symbols need to be identified for grounding, and the task has aspects of both description alignment and coreference analysis. In this study, we annotated 15 papers selected from arXiv.org with the grounding information. In total, 12,352 occurrences of math identifiers in these papers were annotated, and all coreference relations between them were made explicit in each paper. The constructed dataset shows that regardless of the ambiguity of symbols in math formulae, coreference relations can be labeled with a high inter-annotator agreement. The constructed dataset enables us to achieve automation of formula grounding, and in turn, make deeper use of the knowledge in scientific documents using techniques such as math information extraction. The built grounding dataset is available at https://sigmathling.kwarc.info/resources/grounding- dataset/. 2022.lrec-1.519 @@ -6005,9 +6005,9 @@ AnnaNedoluzhko MichalNovák MartinPopel - ZdeněkŽabokrtský + ZdeněkŽabokrtský AmirZeldes - DanielZeman + DanielZeman 4859–4872 Recent advances in standardization for annotated language resources have led to successful large scale efforts, such as the Universal Dependencies (UD) project for multilingual syntactically annotated data. By comparison, the important task of coreference resolution, which clusters multiple mentions of entities in a text, has yet to be standardized in terms of data formats or annotation guidelines. In this paper we present CorefUD, a multilingual collection of corpora and a standardized format for coreference resolution, compatible with morphosyntactic annotations in the UD framework and including facilities for related tasks such as named entity recognition, which forms a first step in the direction of convergence for coreference resolution across languages. 2022.lrec-1.520 @@ -6017,10 +6017,10 @@ The Universal Anaphora Scorer JuntaoYu SopanKhosla - Nafise SadatMoosavi + Nafise SadatMoosavi SilviuPaun - SameerPradhan - MassimoPoesio + SameerPradhan + MassimoPoesio 4873–4883 The aim of the Universal Anaphora initiative is to push forward the state of the art in anaphora and anaphora resolution by expanding the aspects of anaphoric interpretation which are or can be reliably annotated in anaphoric corpora, producing unified standards to annotate and encode these annotations, deliver datasets encoded according to these standards, and developing methods for evaluating models carrying out this type of interpretation. Such expansion of the scope of anaphora resolution requires a comparable expansion of the scope of the scorers used to evaluate this work. In this paper, we introduce an extended version of the Reference Coreference Scorer (Pradhan et al., 2014) that can be used to evaluate the extended range of anaphoric interpretation included in the current Universal Anaphora proposal. The UA scorer supports the evaluation of identity anaphora resolution and of bridging reference resolution, for which scorers already existed but not integrated in a single package. It also supports the evaluation of split antecedent anaphora and discourse deixis, for which no tools existed. The proposed approach to the evaluation of split antecedent anaphora is entirely novel; the proposed approach to the evaluation of discourse deixis leverages the encoding of discourse deixis proposed in Universal Anaphora to enable the use for discourse deixis of the same metrics already used for identity anaphora. The scorer was tested in the recent CODI-CRAC 2021 Shared Task on Anaphora Resolution in Dialogues. 2022.lrec-1.521 @@ -6080,7 +6080,7 @@ Evaluating Pre-training Objectives for Low-Resource Translation into Morphologically Rich Languages PrajitDhar AriannaBisazza - Gertjanvan Noord + Gertjanvan Noord 4933–4943 The scarcity of parallel data is a major limitation for Neural Machine Translation (NMT) systems, in particular for translation into morphologically rich languages (MRLs). An important way to overcome the lack of parallel data is to leverage target monolingual data, which is typically more abundant and easier to collect. We evaluate a number of techniques to achieve this, ranging from back-translation to random token masking, on the challenging task of translating English into four typologically diverse MRLs, under low-resource settings. Additionally, we introduce Inflection Pre-Training (or PT-Inflect), a novel pre-training objective whereby the NMT system is pre-trained on the task of re-inflecting lemmatized target sentences before being trained on standard source-to-target language translation. We conduct our evaluation on four typologically diverse target MRLs, and find that PT-Inflect surpasses NMT systems trained only on parallel data. While PT-Inflect is outperformed by back-translation overall, combining the two techniques leads to gains in some of the evaluated language pairs. 2022.lrec-1.527 @@ -6090,7 +6090,7 @@ Aligning Images and Text with Semantic Role Labels for Fine-Grained Cross-Modal Understanding AbhidipBhattacharyya CeciliaMauceri - MarthaPalmer + MarthaPalmer ChristofferHeckman 4944–4954 As vision processing and natural language processing continue to advance, there is increasing interest in multimodal applications, such as image retrieval, caption generation, and human-robot interaction. These tasks require close alignment between the information in the images and text. In this paper, we present a new multimodal dataset that combines state of the art semantic annotation for language with the bounding boxes of corresponding images. This richer multimodal labeling supports cross-modal inference for applications in which such alignment is useful. Our semantic representations, developed in the natural language processing community, abstract away from the surface structure of the sentence, focusing on specific actions and the roles of their participants, a level that is equally relevant to images. We then utilize these representations in the form of semantic role labels in the captions and the images and demonstrate improvements in standard tasks such as image retrieval. The potential contributions of these additional labels is evaluated using a role-aware retrieval system based on graph convolutional and recurrent neural networks. The addition of semantic roles into this system provides a significant increase in capability and greater flexibility for these tasks, and could be extended to state-of-the-art techniques relying on transformers with larger amounts of annotated data. @@ -6100,13 +6100,13 @@ Rosetta-<fixed-case>LSF</fixed-case>: an Aligned Corpus of <fixed-case>F</fixed-case>rench <fixed-case>S</fixed-case>ign <fixed-case>L</fixed-case>anguage and <fixed-case>F</fixed-case>rench for Text-to-Sign Translation EliseBertin-Lemée - AnneliesBraffort + AnneliesBraffort CamilleChallant ClaireDanet BorisDauriac MichaelFilhol EmmanuellaMartinod - JérémieSegouat + JérémieSegouat 4955–4962 This article presents a new French Sign Language (LSF) corpus called “Rosetta-LSF”. It was created to support future studies on the automatic translation of written French into LSF, rendered through the animation of a virtual signer. An overview of the field highlights the importance of a quality representation of LSF. In order to obtain quality animations understandable by signers, it must surpass the simple “gloss transcription” of the LSF lexical units to use in the discourse. To achieve this, we designed a corpus composed of four types of aligned data, and evaluated its usability. These are: news headlines in French, translations of these headlines into LSF in the form of videos showing animations of a virtual signer, gloss annotations of the “traditional” type—although including additional information on the context in which each gestural unit is performed as well as their potential for adaptation to another context—and AZee representations of the videos, i.e. formal expressions capturing the necessary and sufficient linguistic information. This article describes this data, exhibiting an example from the corpus. It is available online for public research. 2022.lrec-1.529 @@ -6116,14 +6116,14 @@ <fixed-case>MLQE</fixed-case>-<fixed-case>PE</fixed-case>: A Multilingual Quality Estimation and Post-Editing Dataset MarinaFomicheva ShuoSun - ErickFonseca + ErickFonseca ChrysoulaZerva - FrédéricBlain + FrédéricBlain VishravChaudhary - FranciscoGuzmán + FranciscoGuzmán NinaLopatina LuciaSpecia - André F. T.Martins + André F. T.Martins 4963–4974 We present MLQE-PE, a new dataset for Machine Translation (MT) Quality Estimation (QE) and Automatic Post-Editing (APE). The dataset contains annotations for eleven language pairs, including both high- and low-resource languages. Specifically, it is annotated for translation quality with human labels for up to 10,000 translations per language pair in the following formats: sentence-level direct assessments and post-editing effort, and word-level binary good/bad labels. Apart from the quality-related scores, each source-translation sentence pair is accompanied by the corresponding post-edited sentence, as well as titles of the articles where the sentences were extracted from, and information on the neural MT models used to translate the text. We provide a thorough description of the data collection and annotation process as well as an analysis of the annotation distribution for each language pair. We also report the performance of baseline systems trained on the MLQE-PE dataset. The dataset is freely available and has already been used for several WMT shared tasks. 2022.lrec-1.530 @@ -6134,7 +6134,7 @@ SangwhanMoon Won IkCho Hye JooHan - NaoakiOkazaki + NaoakiOkazaki Nam SooKim 4975–4983 Korean is a language with complex morphology that uses spaces at larger-than-word boundaries, unlike other East-Asian languages. While morpheme-based text generation can provide significant semantic advantages compared to commonly used character-level approaches, Korean morphological analyzers only provide a sequence of morpheme-level tokens, losing information in the tokenization process. Two crucial issues are the loss of spacing information and subcharacter level morpheme normalization, both of which make the tokenization result challenging to reconstruct the original input string, deterring the application to generative tasks. As this problem originates from the conventional scheme used when creating a POS tagging corpus, we propose an improvement to the existing scheme, which makes it friendlier to generative tasks. On top of that, we suggest a fully-automatic annotation of a corpus by leveraging public analyzers. We vote the surface and POS from the outcome and fill the sequence with the selected morphemes, yielding tokenization with a decent quality that incorporates space information. Our scheme is verified via an evaluation done on an external corpus, and subsequently, it is adapted to Korean Wikipedia to construct an open, permissive resource. We compare morphological analyzer performance trained on our corpus with existing methods, then perform an extrinsic evaluation on a downstream task. @@ -6152,7 +6152,7 @@ A <fixed-case>H</fixed-case>mong Corpus with Elaborate Expression Annotations - David R.Mortensen + David R.Mortensen XinyuZhang ChenxuanCui Katherine J.Zhang @@ -6164,7 +6164,7 @@ <fixed-case>ELAL</fixed-case>: An Emotion Lexicon for the Analysis of <fixed-case>A</fixed-case>lsatian Theatre Plays DelphineBernhard - PabloRuiz Fabo + PabloRuiz Fabo 5001–5010 In this work, we present a novel and manually corrected emotion lexicon for the Alsatian dialects, including graphical variants of Alsatian lexical items. These High German dialects are spoken in the North-East of France. They are used mainly orally, and thus lack a stable and consensual spelling convention. There has nevertheless been a continuous literary production since the middle of the 17th century and, in particular, theatre plays. A large sample of Alsatian theatre plays is currently being encoded according to the Text Encoding Initiative (TEI) Guidelines. The emotion lexicon will be used to perform automatic emotion analysis in this corpus of theatre plays. We used a graph-based approach to deriving emotion scores and translations, relying only on bilingual lexicons, cognates and spelling variants. The source lexicons for emotion scores are the NRC Valence Arousal and Dominance and NRC Emotion Intensity lexicons. 2022.lrec-1.534 @@ -6175,7 +6175,7 @@ RobertPugh MarivelHuerta Mendez MitsuyaSasaki - FrancisTyers + FrancisTyers 5011–5020 We present a morpho-syntactically-annotated corpus of Western Sierra Puebla Nahuatl that conforms to the annotation guidelines of the Universal Dependencies project. We describe the sources of the texts that make up the corpus, the annotation process, and important annotation decisions made throughout the development of the corpus. As the first indigenous language of Mexico to be added to the Universal Dependencies project, this corpus offers a good opportunity to test and more clearly define annotation guidelines for the Meso-american linguistic area, spontaneous and elicited spoken data, and code-switching. 2022.lrec-1.535 @@ -6184,7 +6184,7 @@ The Construction and Evaluation of the <fixed-case>LEAFTOP</fixed-case> Dataset of Automatically Extracted Nouns in 1480 Languages GregoryBaker - DiegoMolla + DiegoMolla 5021–5028 The LEAFTOP (language extracted automatically from thousands of passages) dataset consists of nouns that appear in multiple places in the four gospels of the New Testament. We use a naive approach — probabilistic inference — to identify likely translations in 1480 other languages. We evaluate this process and find that it provides lexiconaries with accuracy from 42% (Korafe) to 99% (Runyankole), averaging 72% correct across evaluated languages. The process translates up to 161 distinct lemmas from Koine Greek (average 159). We identify nouns which appear to be easy and hard to translate, language families where this technique works, and future possible improvements and extensions. The claims to novelty are: the use of a Koine Greek New Testament as the source language; using a fully-annotated manually-created grammatically parse of the source text; a custom scraper for texts in the target languages; a new metric for language similarity; a novel strategy for evaluation on low-resource languages. 2022.lrec-1.536 @@ -6244,7 +6244,7 @@ Standard <fixed-case>G</fixed-case>erman Subtitling of <fixed-case>S</fixed-case>wiss <fixed-case>G</fixed-case>erman <fixed-case>TV</fixed-case> content: the <fixed-case>PASSAGE</fixed-case> Project Jonathan DavidMutal - PierretteBouillon + PierretteBouillon JohannaGerlach VeronikaHaberkorn 5063–5070 @@ -6322,20 +6322,20 @@ <fixed-case>L</fixed-case>atvian National Corpora Collection – Korpuss.lv - BaibaSaulite + BaibaSaulite RobertsDarģis - NormundsGruzitis + NormundsGruzitis IlzeAuzina - KristīneLevāne-Petrova - LaumaPretkalniņa + KristīneLevāne-Petrova + LaumaPretkalniņa LauraRituma - PeterisPaikens - ArtursZnotins + PeterisPaikens + ArtursZnotins LaineStrankale KristīnePokratniece IlmārsPoikāns - GuntisBarzdins - IngunaSkadiņa + GuntisBarzdins + IngunaSkadiņa AndaBaklāne ValdisSaulespurēns JānisZiediņš @@ -6349,7 +6349,7 @@ Ioan-BogdanIordache Ana SabinaUban CatalinStoean - Liviu P.Dinu + Liviu P.Dinu 5130–5136 A new data set is gathered from a Romanian financial news website for the duration of four years. It is further refined to extract only information related to one company by selecting only paragraphs and even sentences that referred to it. The relation between the extracted sentiment scores of the texts and the stock prices from the corresponding dates is investigated using various approaches like the lexicon-based Vader tool, Financial BERT, as well as Transformer-based models. Automated translation is used, since some models could be only applied for texts in English. It is encouraging that all models, be that they are applied to Romanian or English texts, indicate a correlation between the sentiment scores and the increase or decrease of the stock closing prices. 2022.lrec-1.549 @@ -6358,8 +6358,8 @@ A Free/Open-Source Morphological Analyser and Generator for Sakha SardanaIvanova - JonathanWashington - FrancisTyers + JonathanWashington + FrancisTyers 5137–5142 We present, to our knowledge, the first ever published morphological analyser and generator for Sakha, a marginalised language of Siberia. The transducer, developed using HFST, has coverage of solidly above 90%, and high precision. In the development of the analyser, we have expanded linguistic knowledge about Sakha, and developed strategies for complex grammatical patterns. The transducer is already being used in downstream tasks, including computer assisted language learning applications for linguistic maintenance and computational linguistic shared tasks. 2022.lrec-1.550 @@ -6428,7 +6428,7 @@ Thematic Fit Bits: Annotation Quality and Quantity Interplay for Event Participant Representation YuvalMarton - AsadSayeed + AsadSayeed 5188–5197 Modeling thematic fit (a verb-argument compositional semantics task) currently requires a very large burden of labeled data. We take a linguistically machine-annotated large corpus and replace corpus layers with output from higher-quality, more modern taggers. We compare the old and new corpus versions’ impact on a verb-argument fit modeling task, using a high-performing neural approach. We discover that higher annotation quality dramatically reduces our data requirement while demonstrating better supervised predicate-argument classification. But in applying the model to psycholinguistic tasks outside the training objective, we see clear gains at scale, but only in one of two thematic fit estimation tasks, and no clear gains on the other. We also see that quality improves with training size, but perhaps plateauing or even declining in one task. Last, we tested the effect of role set size. All this suggests that the quality/quantity interplay is not all you need. We replicate previous studies while modifying certain role representation details and set a new state-of-the-art in event modeling, using a fraction of the data. We make the new corpus version public. 2022.lrec-1.556 @@ -6477,7 +6477,7 @@ The Automatic Extraction of Linguistic Biomarkers as a Viable Solution for the Early Diagnosis of Mental Disorders GloriaGagliardi - FabioTamburini + FabioTamburini 5234–5242 Digital Linguistic Biomarkers extracted from spontaneous language productions proved to be very useful for the early detection of various mental disorders. This paper presents a computational pipeline for the automatic processing of oral and written texts: the tool enables the computation of a rich set of linguistic features at the acoustic, rhythmic, lexical, and morphosyntactic levels. Several applications of the instrument - for the detection of Mild Cognitive Impairments, Anorexia Nervosa, and Developmental Language Disorders - are also briefly discussed. 2022.lrec-1.561 @@ -6494,7 +6494,7 @@ <fixed-case>COSMOS</fixed-case>: Experimental and Comparative Studies of Concept Representations in Schoolchildren - JeanneVillaneau + JeanneVillaneau FaridaSaid 5251–5260 COSMOS is a multidisciplinary research project investigating schoolchildren’s beliefs and representations of specific concepts under control variables (age, gender, language spoken at home). Seven concepts are studied: friend, father, mother, villain, work, television and dog. We first present the protocol used and the data collected from a survey of 184 children in two age groups (6-7 and 9-11 years) in four schools in Brittany (France). A word-level lexical study shows that children’s linguistic proficiency and lexical diversity increase with age, and we observe an interaction effect between gender and age on lexical diversity as measured with MLR (Measure of Lexical Richness). In contrast, none of the control variables affects lexical density. We also present the lemmas that schoolchildren most often associate with each concept. Generalized linear mixed-effects models reveal significant effects of age, gender, and home language on some concept-lemma associations and specific interactions between age and gender. Most of the identified effects are documented in the child development literature. To better understand the process of semantic construction in children, additional lexical analyses at the n-gram, chunk, and clause levels would be helpful. We briefly present ongoing and planned work in this direction. The COSMOS data will soon be made freely available to the scientific community. @@ -6504,7 +6504,7 @@ Features of Perceived Metaphoricity on the Discourse Level: Abstractness and Emotionality PriscaPiccirilli - SabineSchulte im Walde + SabineSchulte im Walde 5261–5273 Research on metaphorical language has shown ties between abstractness and emotionality with regard to metaphoricity; prior work is however limited to the word and sentence levels, and up to date there is no empirical study establishing the extent to which this is also true on the discourse level. This paper explores which textual and perceptual features human annotators perceive as important for the metaphoricity of discourses and expressions, and addresses two research questions more specifically. First, is a metaphorically-perceived discourse more abstract and more emotional in comparison to a literally- perceived discourse? Second, is a metaphorical expression preceded by a more metaphorical/abstract/emotional context than a synonymous literal alternative? We used a dataset of 1,000 corpus-extracted discourses for which crowdsourced annotators (1) provided judgements on whether they perceived the discourses as more metaphorical or more literal, and (2) systematically listed lexical terms which triggered their decisions in (1). Our results indicate that metaphorical discourses are more emotional and to a certain extent more abstract than literal discourses. However, neither the metaphoricity nor the abstractness and emotionality of the preceding discourse seem to play a role in triggering the choice between synonymous metaphorical vs. literal expressions. Our dataset is available at https://www.ims.uni-stuttgart.de/data/discourse-met-lit. 2022.lrec-1.564 @@ -6517,7 +6517,7 @@ NiharSahoo NiteeshMallela HimanshuGupta - PushpakBhattacharyya + PushpakBhattacharyya MilindSavagaonkar NidhiSultan RoshniRamnani @@ -6562,7 +6562,7 @@ Investigating Independence vs. Control: Agenda-Setting in <fixed-case>R</fixed-case>ussian News Coverage on Social Media AnneroseEichel GabriellaLapesa - SabineSchulte im Walde + SabineSchulte im Walde 5314–5323 Agenda-setting is a widely explored phenomenon in political science: powerful stakeholders (governments or their financial supporters) have control over the media and set their agenda: political and economical powers determine which news should be salient. This is a clear case of targeted manipulation to divert the public attention from serious issues affecting internal politics (such as economic downturns and scandals) by flooding the media with potentially distracting information. We investigate agenda-setting in the Russian social media landscape, exploring the relation between economic indicators and mentions of foreign geopolitical entities, as well as of Russia itself. Our contributions are at three levels: at the level of the domain of the investigation, our study is the first to substructure the Russian media landscape in state-controlled vs. independent outlets in the context of strategic distraction from negative economic trends; at the level of the scope of the investigation, we involve a large set of geopolitical entities (while previous work has focused on the U.S.); at the qualitative level, our analysis of posts on Ukraine, whose relationship with Russia is of high geopolitical relevance, provides further insights into the contrast between state-controlled and independent outlets. 2022.lrec-1.569 @@ -6595,7 +6595,7 @@ »textklang« – Towards a Multi-Modal Exploration Platform for <fixed-case>G</fixed-case>erman Poetry NadjaSchauffler ToniBernhart - AndreBlessing + AndreBlessing GunillaEschenbach MarkusGärtner KerstinJung @@ -6650,7 +6650,7 @@ ChangShen SallyMa TomoeMizutani - DragomirRadev + DragomirRadev 5388–5392 Fast-developing fields such as Artificial Intelligence (AI) often outpace the efforts of encyclopedic sources such as Wikipedia, which either do not completely cover recently-introduced topics or lack such content entirely. As a result, methods for automatically producing content are valuable tools to address this information overload. We show that recent advances in pretrained language modeling can be combined for a two-stage extractive and abstractive approach for Wikipedia lead paragraph generation. We extend this approach to generate longer Wikipedia-style summaries with sections and examine how such methods struggle in this application through detailed studies with 100 reference human-collected surveys. This is the first study on utilizing web resources for long Wikipedia-style summaries to the best of our knowledge. 2022.lrec-1.576 @@ -6661,7 +6661,7 @@ Sujay KumarJauhar NirupamaChandrasekaran MichaelGamon - RyenWhite + RyenWhite 5393–5403 Tasks are a fundamental unit of work in the daily lives of people, who are increasingly using digital means to keep track of, organize, triage, and act on them. These digital tools – such as task management applications – provide a unique opportunity to study and understand tasks and their connection to the real world, and through intelligent assistance, help people be more productive. By logging signals such as text, timestamp information, and social connectivity graphs, an increasingly rich and detailed picture of how tasks are created and organized, what makes them important, and who acts on them, can be progressively developed. Yet the context around actual task completion remains fuzzy, due to the basic disconnect between actions taken in the real world and telemetry recorded in the digital world. Thus, in this paper we compile and release a novel, real-life, large-scale dataset called MS-LaTTE that captures two core aspects of the context surrounding task completion: location and time. We describe our annotation framework and conduct a number of analyses on the data that were collected, demonstrating that it captures intuitive contextual properties for common tasks. Finally, we test the dataset on the two problems of predicting spatial and temporal task co-occurrence, concluding that predictors for co-location and co-time are both learnable, with a BERT fine-tuned model outperforming several other baselines. The MS-LaTTE dataset provides an opportunity to tackle many new modeling challenges in contextual task understanding and we hope that its release will spur future research in task intelligence more broadly. 2022.lrec-1.577 @@ -6708,7 +6708,7 @@ MarinaSantini PeterLundberg YosefAl-Abasse - ArneJonsson + ArneJonsson EmmaEneling MagnusStridsman 5428–5435 @@ -6721,7 +6721,7 @@ SaméhKchaou RahmaBoujelbane EmnaFsih - LamiaHadrich-Belguith + LamiaHadrich-Belguith 5436–5443 With the growing access to the internet, the spoken Arabic dialect language becomes informal languages written in social media. Most users post comments using their own dialect. This linguistic situation inhibits mutual understanding between internet users and makes difficult to use computational approaches since most Arabic resources are intended for the formal language: Modern Standard Arabic (MSA). In this paper, we present a pipeline to standardize the written texts in social networks by translating them to the standard language MSA. We fine-tun at first an identification bert-based model to select Tunisian Dialect (TD) from MSA and other dialects. Then, we learned transformer model to translate TD to MSA. The final system includes the translated TD text and the originally text written in MSA. Each of these steps was evaluated on the same test corpus. In order to test the effectiveness of the approach, we compared two opinion analysis models, the first intended for the Sentiment Analysis (SA) of dialect texts and the second for the MSA texts. We concluded that through standardization we obtain the best score. 2022.lrec-1.582 @@ -6739,7 +6739,7 @@ Preliminary Results on the Evaluation of Computational Tools for the Analysis of <fixed-case>Q</fixed-case>uechua and <fixed-case>A</fixed-case>ymara Marcelo YujiHimoro - AntonioPareja-Lora + AntonioPareja-Lora 5450–5459 This research has focused on evaluating the existing open-source morphological analyzers for two of the most widely spoken indigenous macrolanguages in South America, namely Quechua and Aymara. Firstly, we have evaluated their performance (precision, recall and F1 score) for the individual languages for which they were developed (Cuzco Quechua and Aymara). Secondly, in order to assess how these tools handle other individual languages of the macrolanguage, we have extracted some sample text from school textbooks and educational resources. This sample text was edited in the different countries where these macrolanguages are spoken (Colombia, Ecuador, Peru, Bolivia, Chile and Argentina for Quechua; and Bolivia, Peru and Chile for Aymara), and it includes their different standardized forms (10 individual languages of Quechua and 3 of Aymara). Processing this text by means of the tools, we have (i) calculated their coverage (number of words recognized and analyzed) and (ii) studied in detail the cases for which each tool was unable to generate any output. Finally, we discuss different ways in which these tools could be optimized, either to improve their performances or, in the specific case of Quechua, to cover more individual languages of this macrolanguage in future works as well. 2022.lrec-1.584 @@ -6769,7 +6769,7 @@ <fixed-case>M</fixed-case>e<fixed-case>SH</fixed-case>up: Corpus for Full Text Biomedical Document Indexing XindiWang - Robert E.Mercer + Robert E.Mercer FrankRudzicz 5473–5483 Medical Subject Heading (MeSH) indexing refers to the problem of assigning a given biomedical document with the most relevant labels from an extremely large set of MeSH terms. Currently, the vast number of biomedical articles in the PubMed database are manually annotated by human curators, which is time consuming and costly; therefore, a computational system that can assist the indexing is highly valuable. When developing supervised MeSH indexing systems, the availability of a large-scale annotated text corpus is desirable. A publicly available, large corpus that permits robust evaluation and comparison of various systems is important to the research community. We release a large scale annotated MeSH indexing corpus, MeSHup, which contains 1,342,667 full text articles, together with the associated MeSH labels and metadata, authors and publication venues that are collected from the MEDLINE database. We train an end-to-end model that combines features from documents and their associated labels on our corpus and report the new baseline. @@ -6780,7 +6780,7 @@ Hierarchical Annotation for Building A Suite of Clinical Natural Language Processing Tasks: Progress Note Understanding YanjunGao DmitriyDligach - TimothyMiller + TimothyMiller SamuelTesch RyanLaffin Matthew M.Churpek @@ -6792,7 +6792,7 @@ <fixed-case>KC</fixed-case>4<fixed-case>MT</fixed-case>: A High-Quality Corpus for Multilingual Machine Translation - Vinh VanNguyen + Vinh VanNguyen HaNguyen Huong ThanhLe Thai PhuongNguyen @@ -6800,7 +6800,7 @@ Luan NghiaPham Anh TuanPhan Cong Hoang-MinhNguyen - Viet HongTran + Viet HongTran Anh HuuTran 5494–5502 The multilingual parallel corpus is an important resource for many applications of natural language processing (NLP). For machine translation, the size and quality of the training corpus mainly affects the quality of the translation models. In this work, we present the method for building high-quality multilingual parallel corpus in the news domain and for some low-resource languages, including Vietnamese, Laos, and Khmer, to improve the quality of multilingual machine translation in these areas. We also publicized this one that includes 500.000 Vietnamese-Chinese bilingual sentence pairs; 150.000 Vietnamese-Laos bilingual sentence pairs, and 150.000 Vietnamese-Khmer bilingual sentence pairs. @@ -6830,7 +6830,7 @@ MichaelGamon Sujay KumarJauhar DiyiYang - EduardHovy + EduardHovy 5517–5524 Document authoring involves a lengthy revision process, marked by individual edits that are frequently linked to comments. Modeling the relationship between edits and comments leads to a better understanding of document evolution, potentially benefiting applications such as content summarization, and task triaging. Prior work on understanding revisions has primarily focused on classifying edit intents, but falling short of a deeper understanding of the nature of these edits. In this paper, we present explore the challenge of describing an edit at two levels: identifying the edit intent, and describing the edit using free-form text. We begin by defining a taxonomy of general edit intents and introduce a new dataset of full revision histories of Wikipedia pages, annotated with each revision’s edit intent. Using this dataset, we train a classifier that achieves a 90% accuracy in identifying edit intent. We use this classifier to train a distantly-supervised model that generates a high-level description of a revision in free-form text. Our experimental results show that incorporating edit intent information aids in generating better edit descriptions. We establish a set of baselines for the edit description task, achieving a best score of 28 ROUGE, thus demonstrating the effectiveness of our layered approach to edit understanding. 2022.lrec-1.591 @@ -6865,7 +6865,7 @@ <fixed-case>CLGC</fixed-case>: A Corpus for <fixed-case>C</fixed-case>hinese Literary Grace Evaluation YiLi DongYu - PengyuanLiu + PengyuanLiu 5548–5556 In this paper, we construct a Chinese literary grace corpus, CLGC, with 10,000 texts and more than 1.85 million tokens. Multi-level annotations are provided for each text in our corpus, including literary grace level, sentence category, and figure-of-speech type. Based on the corpus, we dig deep into the correlation between fine-grained features (semantic information, part-of-speech and figure-of-speech, etc.) and literary grace level. We also propose a new Literary Grace Evaluation (LGE) task, which aims at making a comprehensive assessment of the literary grace level according to the text. In the end, we build some classification models with machine learning algorithms (such as SVM, TextCNN) to prove the effectiveness of our features and corpus for LGE. The results of our preliminary classification experiments have achieved 79.71% on the weighted average F1-score. 2022.lrec-1.594 @@ -6873,7 +6873,7 @@ Anonymising the <fixed-case>SAGT</fixed-case> Speech Corpus and Treebank - ÖzlemÇetinoğlu + ÖzlemÇetinoğlu AntjeSchweitzer 5557–5564 Anonymisation, that is identifying and neutralising sensitive references, is a crucial part of dataset creation. In this paper, we describe the anonymisation process of a Turkish-German code-switching corpus, namely SAGT, which consists of speech data and a treebank that is built on its transcripts. We employed a selective pseudonymisation approach where we manually identified sensitive references to anonymise and replaced them with surrogate values on the treebank side. In addition to maintaining data privacy, our primary concerns in surrogate selection were keeping the integrity of code-switching properties, morphosyntactic annotation layers, and semantics. After the treebank anonymisation, we anonymised the speech data by mapping between the treebank sentences and audio transcripts with the help of Praat scripts. The treebank is publicly available for research purposes and the audio files can be obtained via an individual licence agreement. @@ -6918,7 +6918,7 @@ Design and Evaluation of the Corpus of Everyday <fixed-case>J</fixed-case>apanese Conversation - HanaeKoiso + HanaeKoiso HarukaAmatani YasuharuDen YurikoIseki @@ -6958,7 +6958,7 @@ BernardoConsoli Henrique D. P.dos Santos Ana Helena D. P. S.Ulbrich - RenataVieira + RenataVieira Rafael H.Bordini 5609–5616 Computational medicine research requires clinical data for training and testing purposes, so the development of datasets composed of real hospital data is of utmost importance in this field. Most such data collections are in the English language, were collected in anglophone countries, and do not reflect other clinical realities, which increases the importance of national datasets for projects that hope to positively impact public health. This paper presents a new Brazilian Clinical Dataset containing over 70,000 admissions from 10 hospitals in two Brazilian states, composed of a sum total of over 2.5 million free-text clinical notes alongside data pertaining to patient information, prescription information, and exam results. This data was collected, organized, deidentified, and is being distributed via credentialed access for the use of the research community. In the course of presenting the new dataset, this paper will explore the new dataset’s structure, population, and potential benefits of using this dataset in clinical AI tasks. @@ -6967,10 +6967,10 @@ Universal Grammatical Dependencies for <fixed-case>P</fixed-case>ortuguese with <fixed-case>CINTIL</fixed-case> Data, <fixed-case>LX</fixed-case> Processing and <fixed-case>CLARIN</fixed-case> support - AntónioBranco - João RicardoSilva + AntónioBranco + João RicardoSilva LuísGomes - JoãoAntónio Rodrigues + JoãoAntónio Rodrigues 5617–5626 The grammatical framework for the mapping between linguistic form and meaning representation known as Universal Dependencies relies on a non-constituency syntactic analysis that is centered on the notion of grammatical relation (e.g. Subject, Object, etc.). Given its core goal of providing a common set of analysis primitives suitable to every natural language, and its practical objective of fostering their computational grammatical processing, it keeps being an active domain of research in science and technology of language. This paper presents a new collection of quality language resources for the computational processing of the Portuguese language under the Universal Dependencies framework (UD). This is an all-encompassing, publicly available open collection of mutually consistent and inter-operable scientific resources that includes reliably annotated corpora, top-performing processing tools and expert support services: a new UPOS-annotated corpus, CINTIL-UPos, with 675K tokens and a new UD treebank, CINTIL-UDep Treebank, with nearly 38K sentences; a UPOS tagger, LX-UTagger, and a UD parser, LX-UDParser, trained on these corpora, available both as local stand-alone tools and as remote web-based services; and helpdesk support ensured by the Knowledge Center for the Science and Technology of Portuguese of the CLARIN research infrastructure. 2022.lrec-1.603 @@ -7016,7 +7016,7 @@ Building a Synthetic Biomedical Research Article Citation Linkage Corpus SudiptaSingha Roy - Robert E.Mercer + Robert E.Mercer 5665–5672 Citations are frequently used in publications to support the presented results and to demonstrate the previous discoveries while also assisting the reader in following the chronological progression of information through publications. In scientific publications, a citation refers to the referenced document, but it makes no mention of the exact span of text that is being referred to. Connecting the citation to this span of text is called citation linkage. In this paper, to find these citation linkages in biomedical research publications using deep learning, we provide a synthetic silver standard corpus as well as the method to build this corpus. The motivation for building this corpus is to provide a training set for deep learning models that will locate the text spans in a reference article, given a citing statement, based on semantic similarity. This corpus is composed of sentence pairs, where one sentence in each pair is the citing statement and the other one is a candidate cited statement from the referenced paper. The corpus is annotated using an unsupervised sentence embedding method. The effectiveness of this silver standard corpus for training citation linkage models is validated against a human-annotated gold standard corpus. 2022.lrec-1.608 @@ -7080,7 +7080,7 @@ NirmalSurange PavanBaswani PriyankaRavva - ManishShrivastava + ManishShrivastava 5712–5722 Expert human annotation for summarization is definitely an expensive task, and can not be done on huge scales. But with this work, we show that even with a crowd sourced summary generation approach, quality can be controlled by aggressive expert informed filtering and sampling-based human evaluation. We propose a pipeline that crowd-sources summarization data and then aggressively filters the content via: automatic and partial expert evaluation. Using this pipeline we create a high-quality Telugu Abstractive Summarization dataset (TeSum) which we validate with sampling-based human evaluation. We also provide baseline numbers for various models commonly used for summarization. A number of recently released datasets for summarization, scraped the web-content relying on the assumption that summary is made available with the article by the publishers. While this assumption holds for multiple resources (or news-sites) in English, it should not be generalised across languages without thorough analysis and verification. Our analysis clearly shows that this assumption does not hold true for most Indian language news resources. We show that our proposed filtration pipeline can even be applied to these large-scale scraped datasets to extract better quality article-summary pairs. 2022.lrec-1.614 @@ -7088,7 +7088,7 @@ A Corpus of Simulated Counselling Sessions with Dialog Act Annotation - JohnLee + JohnLee HaleyFong Lai Shuen JudyWong Chun ChungMak @@ -7105,7 +7105,7 @@ YulanFeng CarlaGordon Seyed HosseinAlavi - DavidTraum + DavidTraum MaxineEskenazi 5731–5738 The ultimate goal of dialog research is to develop systems that can be effectively used in interactive settings by real users. To this end, we introduced the Interactive Evaluation of Dialog Track at the 9th Dialog System Technology Challenge. This track consisted of two sub-tasks. The first sub-task involved building knowledge-grounded response generation models. The second sub-task aimed to extend dialog models beyond static datasets by assessing them in an interactive setting with real users. Our track challenges participants to develop strong response generation models and explore strategies that extend them to back-and-forth interactions with real users. The progression from static corpora to interactive evaluation introduces unique challenges and facilitates a more thorough assessment of open-domain dialog systems. This paper provides an overview of the track, including the methodology and results. Furthermore, it provides insights into how to best evaluate open-domain dialog models. @@ -7115,7 +7115,7 @@ <fixed-case>HADREB</fixed-case>: Human Appraisals and (<fixed-case>E</fixed-case>nglish) Descriptions of Robot Emotional Behaviors JosueTorres-Fonseca - CaseyKennington + CaseyKennington 5739–5748 Humans sometimes anthropomorphize everyday objects, but especially robots that have human-like qualities and that are often able to interact with and respond to humans in ways that other objects cannot. Humans especially attribute emotion to robot behaviors, partly because humans often use and interpret emotions when interacting with other humans, and they apply that capability when interacting with robots. Moreover, emotions are a fundamental part of the human language system and emotions are used as scaffolding for language learning, making them an integral part of language learning and meaning. However, there are very few datasets that explore how humans perceive the emotional states of robots and how emotional behaviors relate to human language. To address this gap we have collected HADREB, a dataset of human appraisals and English descriptions of robot emotional behaviors collected from over 30 participants. These descriptions and human emotion appraisals are collected using the Mistyrobotics Misty II and the Digital Dream Labs Cozmo (formerly Anki) robots. The dataset contains English descriptions and emotion appraisals of more than 500 descriptions and graded valence labels of 8 emotion pairs for each behavior and each robot. In this paper we describe the process of collecting and cleaning the data, give a general analysis of the data, and evaluate the usefulness of the dataset in two experiments, one using a language model to map descriptions to emotions, the other maps robot behaviors to emotions. 2022.lrec-1.617 @@ -7148,7 +7148,7 @@ Strategy-level Entrainment of Dialogue System Users in a Creative Visual Reference Resolution Task DeepthiKarkada - RameshManuvinakurike + RameshManuvinakurike MaikePaetzel-Prüsmann KallirroiGeorgila 5768–5777 @@ -7212,7 +7212,7 @@ Comparing Approaches to Language Understanding for Human-Robot Dialogue: An Error Taxonomy and Analysis AdaTur - DavidTraum + DavidTraum 5813–5820 In this paper, we compare two different approaches to language understanding for a human-robot interaction domain in which a human commander gives navigation instructions to a robot. We contrast a relevance-based classifier with a GPT-2 model, using about 2000 input-output examples as training data. With this level of training data, the relevance-based model outperforms the GPT-2 based model 79% to 8%. We also present a taxonomy of types of errors made by each model, indicating that they have somewhat different strengths and weaknesses, so we also examine the potential for a combined model. 2022.lrec-1.625 @@ -7234,7 +7234,7 @@ PriyanshuPriya MauajamaFirdaus AsifEkbal - PushpakBhattacharyya + PushpakBhattacharyya 5829–5837 The long-standing goal of Artificial Intelligence (AI) has been to create human-like conversational systems. Such systems should have the ability to develop an emotional connection with the users, consequently, emotion recognition in dialogues has gained popularity. Emotion detection in dialogues is a challenging task because humans usually convey multiple emotions with varying degrees of intensities in a single utterance. Moreover, emotion in an utterance of a dialogue may be dependent on previous utterances making the task more complex. Recently, emotion recognition in low-resource languages like Hindi has been in great demand. However, most of the existing datasets for multi-label emotion and intensity detection in conversations are in English. To this end, we propose a large conversational dataset in Hindi named EmoInHindi for multi-label emotion and intensity recognition in conversations containing 1,814 dialogues with a total of 44,247 utterances. We prepare our dataset in a Wizard-of-Oz manner for mental health and legal counselling of crime victims. Each utterance of dialogue is annotated with one or more emotion categories from 16 emotion labels including neutral and their corresponding intensity. We further propose strong contextual baselines that can detect the emotion(s) and corresponding emotional intensity of an utterance given the conversational context. 2022.lrec-1.627 @@ -7263,7 +7263,7 @@ A Language Modelling Approach to Quality Assessment of <fixed-case>OCR</fixed-case>’ed Historical Text CallumBooth RobertShoemaker - RobertGaizauskas + RobertGaizauskas 5859–5864 We hypothesise and evaluate a language model-based approach for scoring the quality of OCR transcriptions in the British Library Newspapers (BLN) corpus parts 1 and 2, to identify the best quality OCR for use in further natural language processing tasks, with a wider view to link individual newspaper reports of crime in nineteenth-century London to the Digital Panopticon—a structured repository of criminal lives. We mitigate the absence of gold standard transcriptions of the BLN corpus by utilising a corpus of genre-adjacent texts that capture the common and legal parlance of nineteenth-century London—the Proceedings of the Old Bailey Online—with a view to rank the BLN transcriptions by their OCR quality. 2022.lrec-1.630 @@ -7428,7 +7428,7 @@ Evaluation of Off-the-shelf Speech Recognizers on Different Accents in a Dialogue Domain DivyaTadimeti KallirroiGeorgila - DavidTraum + DavidTraum 6001–6008 We evaluate several publicly available off-the-shelf (commercial and research) automatic speech recognition (ASR) systems on dialogue agent-directed English speech from speakers with General American vs. non-American accents. Our results show that the performance of the ASR systems for non-American accents is considerably worse than for General American accents. Depending on the recognizer, the absolute difference in performance between General American accents and all non-American accents combined can vary approximately from 2% to 12%, with relative differences varying approximately between 16% and 49%. This drop in performance becomes even larger when we consider specific categories of non-American accents indicating a need for more diligent collection of and training on non-native English speaker data in order to narrow this performance gap. There are performance differences across ASR systems, and while the same general pattern holds, with more errors for non-American accents, there are some accents for which the best recognizer is different than in the overall case. We expect these results to be useful for dialogue system designers in developing more robust inclusive dialogue systems, and for ASR providers in taking into account performance requirements for different accents. 2022.lrec-1.645 @@ -7458,9 +7458,9 @@ SouvikKundu JoséCañete MarceloMendoza - Robert E.Mercer + Robert E.Mercer FelipeBravo-Marquez - Marie-FrancineMoens + Marie-FrancineMoens AlvaroSoto 6024–6034 Due to the success of pre-trained language models, versions of languages other than English have been released in recent years. This fact implies the need for resources to evaluate these models. In the case of Spanish, there are few ways to systematically assess the models’ quality. In this paper, we narrow the gap by building two evaluation benchmarks. Inspired by previous work (Conneau and Kiela, 2018; Chen et al., 2019), we introduce Spanish SentEval and Spanish DiscoEval, aiming to assess the capabilities of stand-alone and discourse-aware sentence representations, respectively. Our benchmarks include considerable pre-existing and newly constructed datasets that address different tasks from various domains. In addition, we evaluate and analyze the most recent pre-trained Spanish language models to exhibit their capabilities and limitations. As an example, we discover that for the case of discourse evaluation tasks, mBERT, a language model trained on multiple languages, usually provides a richer latent representation than models trained only with documents in Spanish. We hope our contribution will motivate a fairer, more comparable, and less cumbersome way to evaluate future Spanish language models. @@ -7514,7 +7514,7 @@ HoangVan MoribaJah RobertoFurfaro - PeterJansen + PeterJansen 6077–6082 Space situational awareness typically makes use of physical measurements from radar, telescopes, and other assets to monitor satellites and other spacecraft for operational, navigational, and defense purposes. In this work we explore using textual input for the space situational awareness task. We construct a corpus of 48.5k news articles spanning all known active satellites between 2009 and 2020. Using a dependency-rule-based extraction system designed to target three high-impact events – spacecraft launches, failures, and decommissionings, we identify 1,787 space-event sentences that are then annotated by humans with 15.9k labels for event slots. We empirically demonstrate a state-of-the-art neural extraction system achieves an overall F1 between 53 and 91 per slot for event extraction in this low-resource, high-impact domain. 2022.lrec-1.653 @@ -7615,7 +7615,7 @@ Incorporating Zoning Information into Argument Mining from Biomedical Literature BoyangLiu ViktorSchlegel - RizaBatista-Navarro + RizaBatista-Navarro SophiaAnaniadou 6162–6169 The goal of text zoning is to segment a text into zones (i.e., Background, Conclusion) that serve distinct functions. Argumentative zoning, a specific text zoning scheme for the scientific domain, is considered as the antecedent for argument mining by many researchers. Surprisingly, however, little work is concerned with exploiting zoning information to improve the performance of argument mining models, despite the relatedness of the two tasks. In this paper, we propose two transformer-based models to incorporate zoning information into argumentative component identification and classification tasks. One model is for the sentence-level argument mining task and the other is for the token-level task. In particular, we add the zoning labels predicted by an off-the-shelf model to the beginning of each sentence, inspired by the convention commonly used biomedical abstracts. Moreover, we employ multi-head attention to transfer the sentence-level zoning information to each token in a sentence. Based on experiment results, we find a significant improvement in F1-scores for both sentence- and token-level tasks. It is worth mentioning that these zoning labels can be obtained with high accuracy by utilising readily available automated methods. Thus, existing argument mining models can be improved by incorporating zoning information without any additional annotation cost. @@ -7637,10 +7637,10 @@ From Examples to Rules: Neural Guided Rule Synthesis for Information Extraction RobertVacareanu - Marco A.Valenzuela-Escárcega + Marco A.Valenzuela-Escárcega George CaiqueGouveia Barbosa RebeccaSharp - GustaveHahn-Powell + GustaveHahn-Powell MihaiSurdeanu 6180–6189 While deep learning approaches to information extraction have had many successes, they can be difficult to augment or maintain as needs shift. Rule-based methods, on the other hand, can be more easily modified. However, crafting rules requires expertise in linguistics and the domain of interest, making it infeasible for most users. Here we attempt to combine the advantages of these two directions while mitigating their drawbacks. We adapt recent advances from the adjacent field of program synthesis to information extraction, synthesizing rules from provided examples. We use a transformer-based architecture to guide an enumerative search, and show that this reduces the number of steps that need to be explored before a rule is found. Further, we show that without training the synthesis algorithm on the specific domain, our synthesized rules achieve state-of-the-art performance on the 1-shot scenario of a task that focuses on few-shot learning for relation classification, and competitive performance in the 5-shot scenario. @@ -7729,7 +7729,7 @@ AnkushAgarwal RajGite ShreyaLaddha - PushpakBhattacharyya + PushpakBhattacharyya SatyanarayanKar AsifEkbal PrabhjitThind @@ -7754,7 +7754,7 @@ A Large Interlinked Knowledge Graph of the <fixed-case>I</fixed-case>talian Cultural Heritage StefanoFaralli AndreaLenzi - PaolaVelardi + PaolaVelardi 6280–6289 Knowledge is the lifeblood for a plethora of applications such as search, recommender systems and natural language understanding. Thanks to the efforts in the fields of Semantic Web and Linked Open Data a growing number of interlinked knowledge bases are supporting the development of advanced knowledge-based applications. Unfortunately, for a large number of domain-specific applications, these knowledge bases are unavailable. In this paper, we present a resource consisting of a large knowledge graph linking the Italian cultural heritage entities (defined in the ArCo ontology) with the concepts defined on well-known knowledge bases (i.e., DBpedia and the Getty GVP ontology). We describe the methodologies adopted for the semi-automatic resource creation and provide an in-depth analysis of the resulting interlinked graph. 2022.lrec-1.675 @@ -7762,7 +7762,7 @@ Training on Lexical Resources - KennethChurch + KennethChurch XingyuCai YuchenBian 6290–6299 @@ -7786,9 +7786,9 @@ AndisLagzdiņš UldisSiliņš TomsBergmanis - MārcisPinnis + MārcisPinnis ArtūrsVasiļevskis - AndrejsVasiļjevs + AndrejsVasiļjevs 6310–6316 Consolidated access to current and reliable terms from different subject fields and languages is necessary for content creators and translators. Terminology is also needed in AI applications such as machine translation, speech recognition, information extraction, and other natural language processing tools. In this work, we facilitate standards-based sharing and management of terminology resources by providing an open terminology management solution - the EuroTermBank Toolkit. It allows organisations to manage and search their terms, create term collections, and share them within and outside the organisation by participating in the network of federated databases. The data curated in the federated databases are automatically shared with EuroTermBank, the largest multilingual terminology resource in Europe, allowing translators and language service providers as well as researchers and students to access terminology resources in their most current version. 2022.lrec-1.678 @@ -7829,7 +7829,7 @@ CécileRobin Gautham VadakkekaraSuresh VíctorRodriguez-Doncel - John P.McCrae + John P.McCrae PaulBuitelaar 6352–6360 Language resources are a key component of natural language processing and related research and applications. Users of language resources have different needs in terms of format, language, topics, etc. for the data they need to use. Linghub (McCrae and Cimiano, 2015) was first developed for this purpose, using the capabilities of linked data to represent metadata, and tackling the heterogeneous metadata issue. Linghub aimed at helping language resources and technology users to easily find and retrieve relevant data, and identify important information on access, topics, etc. This work describes a rejuvenation and modernisation of the 2015 platform into using a popular open source data management system, DSpace, as foundation. The new platform, Linghub2, contains updated and extended resources, more languages offered, and continues the work towards homogenisation of metadata through conversions, through linkage to standardisation strategies and community groups, such as the Open Digital Rights Language (ODRL) community group. @@ -7843,7 +7843,7 @@ Pin-ErChen Hsin-YuChou Mao-ChangKu - Shu-KaiHsieh + Shu-KaiHsieh 6361–6369 Constructions are direct form-meaning pairs with possible schematic slots. These slots are simultaneously constrained by the embedded construction itself and the sentential context. We propose that the constraint could be described by a conditional probability distribution. However, as this conditional probability is inevitably complex, we utilize language models to capture this distribution. Therefore, we build CxLM, a deep learning-based masked language model explicitly tuned to constructions’ schematic slots. We first compile a construction dataset consisting of over ten thousand constructions in Taiwan Mandarin. Next, an experiment is conducted on the dataset to examine to what extent a pretrained masked language model is aware of the constructions. We then fine-tune the model specifically to perform a cloze task on the opening slots. We find that the fine-tuned model predicts masked slots more accurately than baselines and generates both structurally and semantically plausible word samples. Finally, we release CxLM and its dataset as publicly available resources and hope to serve as new quantitative tools in studying construction grammar. 2022.lrec-1.683 @@ -7888,7 +7888,7 @@ How Does the Experimental Setting Affect the Conclusions of Neural Encoding Models? XiaohanZhang ShaonanWang - ChengqingZong + ChengqingZong 6397–6404 Recent years have witnessed the tendency of neural encoding models on exploring brain language processing using naturalistic stimuli. Neural encoding models are data-driven methods that require an encoding model to investigate the mystery of brain mechanisms hidden in the data. As a data-driven method, the performance of encoding models is very sensitive to the experimental setting. However, it is unknown how the experimental setting further affects the conclusions of neural encoding models. This paper systematically investigated this problem and evaluated the influence of three experimental settings, i.e., the data size, the cross-validation training method, and the statistical testing method. Results demonstrate that inappropriate cross-validation training and small data size can substantially decrease the performance of encoding models, especially in the temporal lobe and the frontal lobe. And different null hypotheses in significance testing lead to highly different significant brain regions. Based on these results, we suggest a block-wise cross-validation training method and an adequate data size for increasing the performance of linear encoding models. We also propose two strict null hypotheses to control false positive discovery rates. 2022.lrec-1.687 @@ -7908,7 +7908,7 @@ Progress in Multilingual Speech Recognition for Low Resource Languages <fixed-case>K</fixed-case>urmanji <fixed-case>K</fixed-case>urdish, <fixed-case>C</fixed-case>ree and Inuktut VishwaGupta - GillesBoulianne + GillesBoulianne 6420–6428 This contribution presents our efforts to develop the automatic speech recognition (ASR) systems for three low resource languages: Kurmanji Kurdish, Cree and Inuktut. As a first step, we generate multilingual models from acoustic training data from 12 different languages in the hybrid DNN/HMM framework. We explore different strategies for combining the phones from different languages: either keep the phone labels separate for each language or merge the common phones. For Kurmanji Kurdish and Inuktut, keeping the phones separate gives much lower word error rate (WER), while merging phones gives lower WER for Cree. These WER are lower than training the acoustic models separately for each language. We also compare two different DNN architectures: factored time delay neural network (TDNN-F), and bidirectional long short-term memory (BLSTM) acoustic models. The TDNN-F acoustic models give significantly lower WER for Kurmanji Kurdish and Cree, while BLSTM acoustic models give significantly lower WER for Inuktut. We also show that for each language, training multilingual acoustic models by one more epoch with acoustic data from that language reduces the WER significantly. We also added 512-dimensional embedding features from cross-lingual pre-trained wav2vec2.0 XLSR-53 models, but they lead to only a small reduction in WER. 2022.lrec-1.689 @@ -7916,7 +7916,7 @@ Efficient Entity Candidate Generation for Low-Resource Languages - AlbertoGarcia-Duran + AlbertoGarcia-Duran AkhilArora RobertWest 6429–6438 @@ -7930,7 +7930,7 @@ KelechiOgueji Miryamde Lhoneux OrevaogheneAhia - AndersSøgaard + AndersSøgaard 6439–6449 In recent years, the natural language processing (NLP) community has given increased attention to the disparity of efforts directed towards high-resource languages over low-resource ones. Efforts to remedy this delta often begin with translations of existing English datasets into other languages. However, this approach ignores that different language communities have different needs. We consider a group of low-resource languages, creole languages. Creoles are both largely absent from the NLP literature, and also often ignored by society at large due to stigma, despite these languages having sizable and vibrant communities. We demonstrate, through conversations with creole experts and surveys of creole-speaking communities, how the things needed from language technology can change dramatically from one language to another, even when the languages are considered to be very similar to each other, as with creoles. We discuss the prominent themes arising from these conversations, and ultimately demonstrate that useful language technology cannot be built without involving the relevant community. 2022.lrec-1.691 @@ -7967,7 +7967,7 @@ ShamsuddeenMuhammad Ibrahim Sa’idAhmad SubhadarshiPanda - OndřejBojar + OndřejBojar Bashir ShehuGaladanci Bello ShehuBello 6471–6479 @@ -8007,7 +8007,7 @@ Survey on <fixed-case>T</fixed-case>hai <fixed-case>NLP</fixed-case> Language Resources and Tools RatchakritArreerard StephenMander - ScottPiao + ScottPiao 6495–6505 Over the past decades, Natural Language Processing (NLP) research has been expanding to cover more languages. Recently particularly, NLP community has paid increasing attention to under-resourced languages. However, there are still many languages for which NLP research is limited in terms of both language resources and software tools. Thai language is one of the under-resourced languages in the NLP domain, although it is spoken by nearly 70 million people globally. In this paper, we report on our survey on the past development of Thai NLP research to help understand its current state and future research directions. Our survey shows that, although Thai NLP community has achieved a significant achievement over the past three decades, particularly on NLP upstream tasks such as tokenisation, research on downstream tasks such as syntactic parsing and semantic analysis is still limited. But we foresee that Thai NLP research will advance rapidly as richer Thai language resources and more robust NLP techniques become available. 2022.lrec-1.697 @@ -8115,7 +8115,7 @@ <fixed-case>G</fixed-case>eez<fixed-case>S</fixed-case>witch: Language Identification in Typologically Related Low-resourced <fixed-case>E</fixed-case>ast <fixed-case>A</fixed-case>frican Languages FitsumGaim WonsukYang - Jong C.Park + Jong C.Park 6578–6584 Language identification is one of the fundamental tasks in natural language processing that is a prerequisite to data processing and numerous applications. Low-resourced languages with similar typologies are generally confused with each other in real-world applications such as machine translation, affecting the user’s experience. In this work, we present a language identification dataset for five typologically and phylogenetically related low-resourced East African languages that use the Ge’ez script as a writing system; namely Amharic, Blin, Ge’ez, Tigre, and Tigrinya. The dataset is built automatically from selected data sources, but we also performed a manual evaluation to assess its quality. Our approach to constructing the dataset is cost-effective and applicable to other low-resource languages. We integrated the dataset into an existing language-identification tool and also fine-tuned several Transformer based language models, achieving very strong results in all cases. While the task of language identification is easy for the informed person, such datasets can make a difference in real-world deployments and also serve as part of a benchmark for language understanding in the target languages. The data and models are made available at https://github.com/fgaim/geezswitch. 2022.lrec-1.707 @@ -8149,7 +8149,7 @@ FarhanSamir EdithCoates GarrettNicolai - MiikkaSilfverberg + MiikkaSilfverberg 6597–6606 This paper presents a new inflectional resource for Gitksan, a low-resource Indigenous language of Canada. We use Gitksan data in interlinear glossed format, stemming from language documentation efforts, to build a database of partial inflection tables. We then enrich this morphological resource by filling in blank slots in the partial inflection tables using neural transformer reinflection models. We extend the training data for our transformer reinflection models using two data augmentation techniques: data hallucination and back-translation. Experimental results demonstrate substantial improvements from data augmentation, with data hallucination delivering particularly impressive gains. We also release reinflection models for Gitksan. 2022.lrec-1.710 @@ -8187,8 +8187,8 @@ Aligning the <fixed-case>R</fixed-case>omanian Reference Treebank and the Valence Lexicon of <fixed-case>R</fixed-case>omanian Verbs Ana-MariaBarbu - VerginicaBarbu Mititelu - CătălinMititelu + VerginicaBarbu Mititelu + CătălinMititelu 6626–6634 We present here the efforts of aligning two language resources for Romanian: the Romanian Reference Treebank and the Valence Lexicon of Romanian Verbs: for each occurrence of those verbs in the treebank that were included as entries in the lexicon, a set of valence frames is automatically assigned, then manually validated by two linguists and, when necessary, corrected. Validating a valence frame also means semantically disambiguating the verb in the respective context. The validation is done by two linguists, on complementary datasets. However, a subset of verbs were validated by both annotators and Cohen’s κ is 0.87 for this subset. The alignment we have made also serves as a method of enhancing the quality of the two resources, as in the process we identify morpho-syntactic annotation mistakes, incomplete valence frames or missing ones. Information from each resource complements the information from the other, thus their value increases. The treebank and the lexicon are freely available, while the links discovered between them are also made available on GitHub. 2022.lrec-1.714 @@ -8197,7 +8197,7 @@ <fixed-case>P</fixed-case>orti<fixed-case>L</fixed-case>exicon-<fixed-case>UD</fixed-case>: a <fixed-case>P</fixed-case>ortuguese Lexical Resource according to <fixed-case>U</fixed-case>niversal <fixed-case>D</fixed-case>ependencies Model LuceleneLopes - MagaliDuran + MagaliDuran PauloFernandes ThiagoPardo 6635–6643 @@ -8217,7 +8217,7 @@ Low-resource Neural Machine Translation: Benchmarking State-of-the-art Transformer for <fixed-case>W</fixed-case>olof<-><fixed-case>F</fixed-case>rench - Cheikh M. BambaDione + Cheikh M. BambaDione AllaLo Elhadji MamadouNguer SileyeBa @@ -8228,7 +8228,7 @@ Criteria for Useful Automatic <fixed-case>R</fixed-case>omanization in <fixed-case>S</fixed-case>outh <fixed-case>A</fixed-case>sian Languages - IsinDemirsahin + IsinDemirsahin CibuJohny AlexanderGutkin BrianRoark @@ -8273,7 +8273,7 @@ Learning How to Translate <fixed-case>N</fixed-case>orth <fixed-case>K</fixed-case>orean through <fixed-case>S</fixed-case>outh <fixed-case>K</fixed-case>orean HwichanKim SangwhanMoon - NaoakiOkazaki + NaoakiOkazaki MamoruKomachi 6711–6718 South and North Korea both use the Korean language. However, Korean NLP research has focused on South Korean only, and existing NLP systems of the Korean language, such as neural machine translation (NMT) models, cannot properly handle North Korean inputs. Training a model using North Korean data is the most straightforward approach to solving this problem, but there is insufficient data to train NMT models. In this study, we create data for North Korean NMT models using a comparable corpus. First, we manually create evaluation data for automatic alignment and machine translation, and then, investigate automatic alignment methods suitable for North Korean. Finally, we show that a model trained by North Korean bilingual data without human annotation significantly boosts North Korean translation accuracy compared to existing South Korean models in zero-shot settings. @@ -8290,7 +8290,7 @@ JianYu WeiChen YanfengWang - JiajunChen + JiajunChen 6719–6727 Previous research for adapting a general neural machine translation (NMT) model into a specific domain usually neglects the diversity in translation within the same domain, which is a core problem for domain adaptation in real-world scenarios. One representative of such challenging scenarios is to deploy a translation system for a conference with a specific topic, e.g., global warming or coronavirus, where there are usually extremely less resources due to the limited schedule. To motivate wider investigation in such a scenario, we present a real-world fine-grained domain adaptation task in machine translation (FGraDA). The FGraDA dataset consists of Chinese-English translation task for four sub-domains of information technology: autonomous vehicles, AI education, real-time networks, and smart phone. Each sub-domain is equipped with a development set and test set for evaluation purposes. To be closer to reality, FGraDA does not employ any in-domain bilingual training data but provides bilingual dictionaries and wiki knowledge base, which can be easier obtained within a short time. We benchmark the fine-grained domain adaptation task and present in-depth analyses showing that there are still challenging problems to further improve the performance with heterogeneous resources. 2022.lrec-1.723 @@ -8381,7 +8381,7 @@ Cheuk TungYiu RitaFrieske HolyLovenia - GentaWinata + GentaWinata QifengChen XiaojuanMa BertramShi @@ -8407,7 +8407,7 @@ ShuoXu YuxiangJia ChangyongNiu - HongyingZan + HongyingZan 6802–6807 Emotion recognition in conversation is important for an empathetic dialogue system to understand the user’s emotion and then generate appropriate emotional responses. However, most previous researches focus on modeling conversational contexts primarily based on the textual modality or simply utilizing multimodal information through feature concatenation. In order to exploit multimodal information and contextual information more effectively, we propose a multimodal directed acyclic graph (MMDAG) network by injecting information flows inside modality and across modalities into the DAG architecture. Experiments on IEMOCAP and MELD show that our model outperforms other state-of-the-art models. Comparative studies validate the effectiveness of the proposed modality fusion method. 2022.lrec-1.733 @@ -8417,7 +8417,7 @@ Automatic Gloss-level Data Augmentation for Sign Language Translation Jin YeaJang Han-MuPark - SaimShin + SaimShin SunaShin ByungcheonYoon GahgeneGweon @@ -8433,7 +8433,7 @@ HiroakiNanjo KeisukeShirai HirotakaKameko - MasatakeDantsuji + MasatakeDantsuji 6814–6821 We focus on image description and a corresponding assessment system for language learners. To achieve automatic assessment of image description, we construct a novel dataset, the Language Learner Image Description (LLID) dataset, which consists of images, their descriptions, and assessment annotations. Then, we propose a novel task of automatic error correction for image description, and we develop a baseline model that encodes multimodal information from a learner sentence with an image and accurately decodes a corrected sentence. Our experimental results show that the developed model can revise errors that cannot be revised without an image. 2022.lrec-1.735 @@ -8465,7 +8465,7 @@ MuskanGarg SeemaWazarkar MuskaanSingh - OndřejBojar + OndřejBojar 6837–6847 With the development of multimodal systems and natural language generation techniques, the resurgence of multimodal datasets has attracted significant research interests, which aims to provide new information to enrich the representation of textual data. However, there remains a lack of a comprehensive survey for this task. To this end, we take the first step and present a thorough review of this research field. This paper provides an overview of a publicly available dataset with different modalities according to the applications. Furthermore, we discuss the new frontier and give our thoughts. We hope this survey of multimodal datasets can provide the community with quick access and a general picture of the multimodal dataset for specific Natural Language Processing (NLP) applications and motivates future researches. In this context, we release the collection of all multimodal datasets easily accessible here: https://github.com/drmuskangarg/Multimodal-datasets 2022.lrec-1.738 @@ -8509,7 +8509,7 @@ HiroshiKanayama IsseiYoshida MasayasuMuraoka - AkikoAizawa + AkikoAizawa 6874–6883 Deletion-based sentence compression in the English language has made significant progress over the past few decades. However, there is a lack of large-scale and high-quality parallel corpus (i.e., (sentence, compression) pairs) for the Chinese language to train an efficient compression system. To remedy this shortcoming, we present a dependency-tree-based method to construct a Chinese corpus with 151k pairs of sentences and compression based on Chinese language-specific characteristics. Subsequently, we trained both extractive and generative neural compression models using the constructed corpus. The experimental results show that our compression model can generate high-quality compressed sentences on both automatic and human evaluation metrics compared with the baselines. The results of the faithfulness evaluation also indicated that the Chinese compression model trained on our constructed corpus can produce more faithful compressed sentences. Furthermore, a dataset with 1,000 pairs of sentences and ground truth compression was manually created for automatic evaluation, which, we believe, will benefit future research on Chinese sentence compression. 2022.lrec-1.742 @@ -8553,12 +8553,12 @@ The Bull and the Bear: Summarizing Stock Market Discussions - AyushKumar + AyushKumar DhyeyJani JayShah DevanshuThakar VarunJain - MayankSingh + MayankSingh 6909–6913 Stock market investors debate and heavily discuss stock ideas, investing strategies, news and market movements on social media platforms. The discussions are significantly longer in length and require extensive domain expertise for understanding. In this paper, we curate such discussions and construct a first-of-its-kind of abstractive summarization dataset. Our curated dataset consists of 7888 Reddit posts and manually constructed summaries for 400 posts. We robustly evaluate the summaries and conduct experiments on SOTA summarization tools to showcase their limitations. We plan to make the dataset publicly available. The sample dataset is available here: https://dhyeyjani.github.io/RSMC 2022.lrec-1.746 @@ -8568,7 +8568,7 @@ Combination of Contextualized and Non-Contextualized Layers for Lexical Substitution in <fixed-case>F</fixed-case>rench KévinEspasa EmmanuelMorin - OlivierHamon + OlivierHamon 6914–6921 Lexical substitution task requires to substitute a target word by candidates in a given context. Candidates must keep meaning and grammatically of the sentence. The task, introduced in the SemEval 2007, has two objectives. The first objective is to find a list of substitutes for a target word. This list of substitutes can be obtained with lexical resources like WordNet or generated with a pre-trained language model. The second objective is to rank these substitutes using the context of the sentence. Most of the methods use vector space models or more recently embeddings to rank substitutes. Embedding methods use high contextualized representation. This representation can be over contextualized and in this way overlook good substitute candidates which are more similar on non-contextualized layers. SemDis 2014 introduced the lexical substitution task in French. We propose an application of the state-of-the-art method based on BERT in French and a novel method using contextualized and non-contextualized layers to increase the suggestion of words having a lower probability in a given context but that are more semantically similar. Experiments show our method increases the BERT based system on the OOT measure but decreases on the BEST measure in the SemDis 2014 benchmark. 2022.lrec-1.747 @@ -8597,7 +8597,7 @@ Emotion analysis and detection during <fixed-case>COVID</fixed-case>-19 TiberiuSosea - ChauPham + ChauPham AlexanderTekle CorneliaCaragea Junyi JessyLi @@ -8627,8 +8627,8 @@ <fixed-case>V</fixed-case>accine<fixed-case>L</fixed-case>ies: A Natural Language Resource for Learning to Recognize Misinformation about the <fixed-case>COVID</fixed-case>-19 and <fixed-case>HPV</fixed-case> Vaccines - MaxwellWeinzierl - SandaHarabagiu + MaxwellWeinzierl + SandaHarabagiu 6967–6975 Billions of COVID-19 vaccines have been administered, but many remain hesitant. Misinformation about the COVID-19 vaccines and other vaccines, propagating on social media, is believed to drive hesitancy towards vaccination. The ability to automatically recognize misinformation targeting vaccines on Twitter depends on the availability of data resources. In this paper we present VaccineLies, a large collection of tweets propagating misinformation about two vaccines: the COVID-19 vaccines and the Human Papillomavirus (HPV) vaccines. Misinformation targets are organized in vaccine-specific taxonomies, which reveal the misinformation themes and concerns. The ontological commitments of the misinformation taxonomies provide an understanding of which misinformation themes and concerns dominate the discourse about the two vaccines covered in VaccineLies. The organization into training, testing and development sets of VaccineLies invites the development of novel supervised methods for detecting misinformation on Twitter and identifying the stance towards it. Furthermore, VaccineLies can be a stepping stone for the development of datasets focusing on misinformation targeting additional vaccines. 2022.lrec-1.753 @@ -8637,7 +8637,7 @@ Tackling Irony Detection using Ensemble Classifiers ChristophTurban - UdoKruschwitz + UdoKruschwitz 6976–6984 Automatic approaches to irony detection have been of interest to the NLP community for a long time, yet, state-of-the-art approaches still fall way short of what one would consider a desirable performance. In part this is due to the inherent difficulty of the problem. However, in recent years ensembles of transformer-based approaches have emerged as a promising direction to push the state of the art forward in a wide range of NLP applications. A different, more recent, development is the automatic augmentation of training data. In this paper we will explore both these directions for the task of irony detection in social media. Using the common SemEval 2018 Task 3 benchmark collection we demonstrate that transformer models are well suited in ensemble classifiers for the task at hand. In the multi-class classification task we observe statistically significant improvements over strong baselines. For binary classification we achieve performance that is on par with state-of-the-art alternatives. The examined data augmentation strategies showed an effect, but are not decisive for good results. 2022.lrec-1.754 @@ -8657,7 +8657,7 @@ AnupamaRay ShubhamMishra ApoorvaNunna - PushpakBhattacharyya + PushpakBhattacharyya 6992–7003 While sentiment and emotion analysis have been studied extensively, the relationship between sarcasm and emotion has largely remained unexplored. A sarcastic expression may have a variety of underlying emotions. For example, “I love being ignored” belies sadness, while “my mobile is fabulous with a battery backup of only 15 minutes!” expresses frustration. Detecting the emotion behind a sarcastic expression is non-trivial yet an important task. We undertake the task of detecting the emotion in a sarcastic statement, which to the best of our knowledge, is hitherto unexplored. We start with the recently released multimodal sarcasm detection dataset (MUStARD) pre-annotated with 9 emotions. We identify and correct 343 incorrect emotion labels (out of 690). We double the size of the dataset, label it with emotions along with valence and arousal which are important indicators of emotional intensity. Finally, we label each sarcastic utterance with one of the four sarcasm types-Propositional, Embedded, Likeprefixed and Illocutionary, with the goal of advancing sarcasm detection research. Exhaustive experimentation with multimodal (text, audio, and video) fusion models establishes a benchmark for exact emotion recognition in sarcasm and outperforms the state-of-art sarcasm detection. We release the dataset enriched with various annotations and the code for research purposes: https://github.com/apoorva-nunna/MUStARD_Plus_Plus 2022.lrec-1.756 @@ -8738,7 +8738,7 @@ TahaShangipour ataei KamyarDarvishi SoroushJavdan - BehrouzMinaei-Bidgoli + BehrouzMinaei-Bidgoli SaulehEetemadi 7056–7060 Due to the increased availability of online reviews, sentiment analysis witnessed a thriving interest from researchers. Sentiment analysis is a computational treatment of sentiment used to extract and understand the opinions of authors. While many systems were built to predict the sentiment of a document or a sentence, many others provide the necessary detail on various aspects of the entity (i.e., aspect-based sentiment analysis). Most of the available data resources were tailored to English and the other popular European languages. Although Farsi is a language with more than 110 million speakers, to the best of our knowledge, there is a lack of proper public datasets on aspect-based sentiment analysis for Farsi. This paper provides a manually annotated Farsi dataset, Pars-ABSA, annotated and verified by three native Farsi speakers. The dataset consists of 5,114 positive, 3,061 negative and 1,827 neutral data samples from 5,602 unique reviews. Moreover, as a baseline, this paper reports the performance of some aspect-based sentiment analysis methods focusing on transfer learning on Pars-ABSA. @@ -8749,7 +8749,7 @@ <fixed-case>H</fixed-case>indi<fixed-case>MD</fixed-case>: A Multi-domain Corpora for Low-resource Sentiment Analysis Mamta AsifEkbal - PushpakBhattacharyya + PushpakBhattacharyya TistaSaha AlkaKumar ShikhaSrivastava @@ -8920,7 +8920,7 @@ HangJiang YiningHua DougBeeferman - DebRoy + DebRoy 7199–7208 Social media data such as Twitter messages (“tweets”) pose a particular challenge to NLP systems because of their short, noisy, and colloquial nature. Tasks such as Named Entity Recognition (NER) and syntactic parsing require highly domain-matched training data for good performance. To date, there is no complete training corpus for both NER and syntactic analysis (e.g., part of speech tagging, dependency parsing) of tweets. While there are some publicly available annotated NLP datasets of tweets, they are only designed for individual tasks. In this study, we aim to create Tweebank-NER, an English NER corpus based on Tweebank V2 (TB2), train state-of-the-art (SOTA) Tweet NLP models on TB2, and release an NLP pipeline called Twitter-Stanza. We annotate named entities in TB2 using Amazon Mechanical Turk and measure the quality of our annotations. We train the Stanza pipeline on TB2 and compare with alternative NLP frameworks (e.g., FLAIR, spaCy) and transformer-based models. The Stanza tokenizer and lemmatizer achieve SOTA performance on TB2, while the Stanza NER tagger, part-of-speech (POS) tagger, and dependency parser achieve competitive performance against non-transformer models. The transformer-based models establish a strong baseline in Tweebank-NER and achieve the new SOTA performance in POS tagging and dependency parsing on TB2. We release the dataset and make both the Stanza pipeline and BERTweet-based models available “off-the-shelf” for use in future Tweet NLP research. Our source code, data, and pre-trained models are available at: https://github.com/social-machines/TweebankNLP. 2022.lrec-1.780 @@ -8930,8 +8930,8 @@ Did that happen? Predicting Social Media Posts that are Indicative of what happened in a scene: A case study of a <fixed-case>TV</fixed-case> show AnietieAndy RenoKriz - Sharath ChandraGuntuku - Derry TantiWijaya + Sharath ChandraGuntuku + Derry TantiWijaya ChrisCallison-Burch 7209–7214 While popular Television (TV) shows are airing, some users interested in these shows publish social media posts about the show. Analyzing social media posts related to a TV show can be beneficial for gaining insights about what happened during scenes of the show. This is a challenging task partly because a significant number of social media posts associated with a TV show or event may not clearly describe what happened during the event. In this work, we propose a method to predict social media posts (associated with scenes of a TV show) that are indicative of what transpired during the scenes of the show. We evaluate our method on social media (Twitter) posts associated with an episode of a popular TV show, Game of Thrones. We show that for each of the identified scenes, with high AUC’s, our method can predict posts that are indicative of what happened in a scene from those that are not-indicative. Based on Twitters policy, we will make the Tweeter ID’s of the Twitter posts used for this work publicly available. @@ -8943,7 +8943,7 @@ PrashantKodali AkshalaBhatnagar NamanAhuja - ManishShrivastava + ManishShrivastava PonnurangamKumaraguru 7215–7219 Hashtag segmentation is the task of breaking a hashtag into its constituent tokens. Hashtags often encode the essence of user-generated posts, along with information like topic and sentiment, which are useful in downstream tasks. Hashtags prioritize brevity and are written in unique ways - transliterating and mixing languages, spelling variations, creative named entities. Benchmark datasets used for the hashtag segmentation task - STAN, BOUN - are small and extracted from a single set of tweets. However, datasets should reflect the variations in writing styles of hashtags and account for domain and language specificity, failing which the results will misrepresent model performance. We argue that model performance should be assessed on a wider variety of hashtags, and datasets should be carefully curated. To this end, we propose HashSet, a dataset comprising of: a) 1.9k manually annotated dataset; b) 3.3M loosely supervised dataset. HashSet dataset is sampled from a different set of tweets when compared to existing datasets and provides an alternate distribution of hashtags to build and validate hashtag segmentation models. We analyze the performance of SOTA models for Hashtag Segmentation, and show that the proposed dataset provides an alternate set of hashtags to train and assess models. @@ -8976,7 +8976,7 @@ <fixed-case>R</fixed-case>o<fixed-case>BERT</fixed-case>uito: a pre-trained language model for social media text in <fixed-case>S</fixed-case>panish Juan ManuelPérez Damián ArielFurman - LauraAlonso Alemany + LauraAlonso Alemany Franco M.Luque 7235–7243 Since BERT appeared, Transformer language models and transfer learning have become state-of-the-art for natural language processing tasks. Recently, some works geared towards pre-training specially-crafted models for particular domains, such as scientific papers, medical documents, user-generated texts, among others. These domain-specific models have been shown to improve performance significantly in most tasks; however, for languages other than English, such models are not widely available. In this work, we present RoBERTuito, a pre-trained language model for user-generated text in Spanish, trained on over 500 million tweets. Experiments on a benchmark of tasks involving user-generated text showed that RoBERTuito outperformed other pre-trained language models in Spanish. In addition to this, our model has some cross-lingual abilities, achieving top results for English-Spanish tasks of the Linguistic Code-Switching Evaluation benchmark (LinCE) and also competitive performance against monolingual models in English Twitter tasks. To facilitate further research, we make RoBERTuito publicly available at the HuggingFace model hub together with the dataset used to pre-train it. @@ -8999,7 +8999,7 @@ ChristopherSong DavidHarwath TukaAlhanai - JamesGlass + JamesGlass 7253–7258 We present Speak, a toolkit that allows researchers to crowdsource speech audio recordings using Amazon Mechanical Turk (MTurk). Speak allows MTurk workers to submit speech recordings in response to a task prompt and stimulus (e.g. image, text excerpt, audio file) defined by researchers, a functionality that is not natively offered by MTurk at the time of writing this paper. Importantly, the toolkit employs numerous measures to ensure that speech recordings collected are of adequate quality, in order to avoid accepting unusable data and prevent abuse/fraud. Speak has demonstrated utility, having collected over 600,000 recordings to date. The toolkit is open-source and available for download. 2022.lrec-1.787 @@ -9009,7 +9009,7 @@ <fixed-case>ASCEND</fixed-case>: A Spontaneous <fixed-case>C</fixed-case>hinese-<fixed-case>E</fixed-case>nglish Dataset for Code-switching in Multi-turn Conversation HolyLovenia SamuelCahyawijaya - GentaWinata + GentaWinata PengXu YanXu ZihanLiu @@ -9098,8 +9098,8 @@ Multilingual Transfer Learning for Children Automatic Speech Recognition ThomasRolland AlbertoAbad - CatiaCucchiarini - HelmerStrik + CatiaCucchiarini + HelmerStrik 7314–7320 Despite recent advances in automatic speech recognition (ASR), the recognition of children’s speech still remains a significant challenge. This is mainly due to the high acoustic variability and the limited amount of available training data. The latter problem is particularly evident in languages other than English, which are usually less-resourced. In the current paper, we address children ASR in a number of less-resourced languages by combining several small-sized children speech corpora from these languages. In particular, we address the following research question: Does a novel two-step training strategy in which multilingual learning is followed by language-specific transfer learning outperform conventional single language/task training for children speech, as well as multilingual and transfer learning alone? Based on previous experimental results with English, we hypothesize that multilingual learning provides a better generalization of the underlying characteristics of children’s speech. Our results provide a positive answer to our research question, by showing that using transfer learning on top of a multilingual model for an unseen language outperforms conventional single language-specific learning. 2022.lrec-1.795 @@ -9169,7 +9169,7 @@ IrinaStenger PhilipGeorgis TaniaAvgustinova - BerndMöbius + BerndMöbius DietrichKlakow 7368–7376 We focus on the syntactic variation and measure syntactic distances between nine Slavic languages (Belarusian, Bulgarian, Croatian, Czech, Polish, Slovak, Slovene, Russian, and Ukrainian) using symmetric measures of insertion, deletion and movement of syntactic units in the parallel sentences of the fable “The North Wind and the Sun”. Additionally, we investigate phonetic and orthographic asymmetries between selected languages by means of the information theoretical notion of surprisal. Syntactic distance and surprisal are, thus, considered as potential predictors of mutual intelligibility between related languages. In spoken and written cloze test experiments for Slavic native speakers, the presented predictors will be validated as to whether variations in syntax lead to a slower or impeded intercomprehension of Slavic texts. diff --git a/data/xml/2022.lt4hala.xml b/data/xml/2022.lt4hala.xml index a7a24bfbbb..db178db554 100644 --- a/data/xml/2022.lt4hala.xml +++ b/data/xml/2022.lt4hala.xml @@ -3,7 +3,7 @@ Proceedings of the Second Workshop on Language Technologies for Historical and Ancient Languages - RacheleSprugnoli + RacheleSprugnoli MarcoPassarotti European Language Resources Association
Marseille, France
@@ -19,7 +19,7 @@ Identifying Cleartext in Historical Ciphers Maria-ElenaGambardella - BeataMegyesi + BeataMegyesi EvaPettersson 1–9 In historical encrypted sources we can find encrypted text sequences, also called ciphertext, as well as non-encrypted cleartexts written in a known language. While most of the cryptanalysis focuses on the decryption of ciphertext, cleartext is often overlooked although it can give us important clues about the historical interpretation and contextualisation of the manuscript. In this paper, we investigate to what extent we can automatically distinguish cleartext from ciphertext in historical ciphers and to what extent we are able to identify its language. The problem is challenging as cleartext sequences in ciphers are often short, up to a few words, in different languages due to historical code-switching. To identify the sequences and the language(s), we chose a rule-based approach and run 7 different models using historical language models on various ciphertexts. @@ -89,7 +89,7 @@ A Treebank-based Approach to the Supprema Constructio in Dante’s <fixed-case>L</fixed-case>atin Works - Flavio MassimilianoCecchini + Flavio MassimilianoCecchini GiuliaPedonese 51–58 This paper aims to apply a corpus-driven approach to Dante Alighieri’s Latin works using UDante, a treebank based on Dante Search and part of the Universal Dependencies project. We present a method based on the notion of barycentre applied to a dependency tree as a way to calculate the “syntactic balance” of a sentence. Its application to Dante’s Latin works shows its potential in analysing the style of an author, and contributes to the interpretation of the supprema constructio mentioned in DVE II vi 7 as a well balanced syntactic pattern modeled on Latin literary writing. @@ -100,7 +100,7 @@ From Inscriptions to Lexica and Back: A Platform for Editing and Linking the Languages of <fixed-case>A</fixed-case>ncient <fixed-case>I</fixed-case>taly ValeriaQuochi AndreaBellandi - FahadKhan + FahadKhan MicheleMallia FrancescaMurano SilviaPiccini @@ -134,7 +134,7 @@ Contextual Unsupervised Clustering of Signs for Ancient Writing Systems MicheleCorazza - FabioTamburini + FabioTamburini MiguelValério SilviaFerrara 84–93 @@ -148,7 +148,7 @@ ElisaGuadagnini EvaSassolini MarcoBiffi - SimonettaMontemagni + SimonettaMontemagni 94–100 In this paper we describe some experiments related to a corpus derived from an authoritative historical Italian dictionary, namely the Grande dizionario della lingua italiana (‘Great Dictionary of Italian Language’, in short GDLI). Thanks to the digitization and structuring of this dictionary, we have been able to set up the first nucleus of a diachronic annotated corpus that selects—according to specific criteria, and distinguishing between prose and poetry—some of the quotations that within the entries illustrate the different definitions and sub-definitions. In fact, the GDLI presents a huge collection of quotations covering the entire history of the Italian language and thus ranging from the Middle Ages to the present day. The corpus was enriched with linguistic annotation and used to train and evaluate NLP models for POS tagging and lemmatization, with promising results. 2022.lt4hala-1.13 @@ -167,8 +167,8 @@ Handling Stress in Finite-State Morphological Analyzers for <fixed-case>A</fixed-case>ncient <fixed-case>G</fixed-case>reek and <fixed-case>A</fixed-case>ncient <fixed-case>H</fixed-case>ebrew - Daniel G.Swanson - Francis M.Tyers + Daniel G.Swanson + Francis M.Tyers 108–113 Modeling stress placement has historically been a challenge for computational morphological analysis, especially in finite-state systems because lexically conditioned stress cannot be modeled using only rewrite rules on the phonological form of a word. However, these phenomena can be modeled fairly easily if the lexicon’s internal representation is allowed to contain more information than the pure phonological form. In this paper we describe the stress systems of Ancient Greek and Ancient Hebrew and we present two prototype finite-state morphological analyzers, one for each language, which successfully implement these stress systems by inserting a small number of control characters into the phonological form, thus conclusively refuting the claim that finite-state systems are not powerful enough to model such stress systems and arguing in favor of the continued relevance of finite-state systems as an appropriate tool for modeling the morphology of historical languages. 2022.lt4hala-1.15 @@ -312,7 +312,7 @@ Overview of the <fixed-case>E</fixed-case>va<fixed-case>L</fixed-case>atin 2022 Evaluation Campaign RacheleSprugnoli MarcoPassarotti - Flavio MassimilianoCecchini + Flavio MassimilianoCecchini MargheritaFantoli GiovanniMoretti 183–188 diff --git a/data/xml/2022.ltedi.xml b/data/xml/2022.ltedi.xml index cf7156c29a..97b0058e3a 100644 --- a/data/xml/2022.ltedi.xml +++ b/data/xml/2022.ltedi.xml @@ -118,7 +118,7 @@ Disambiguation of morpho-syntactic features of <fixed-case>A</fixed-case>frican <fixed-case>A</fixed-case>merican <fixed-case>E</fixed-case>nglish – the case of habitual be HarrisonSantiago JoshuaMartin - SarahMoeller + SarahMoeller KevinTang 70-75 Recent research has highlighted that natural language processing (NLP) systems exhibit a bias againstAfrican American speakers. These errors are often caused by poor representation of linguistic features unique to African American English (AAE), which is due to the relatively low probability of occurrence for many such features. We present a workflow to overcome this issue in the case of habitual “be”. Habitual “be” is isomorphic, and therefore ambiguous, with other forms of uninflected “be” found in both AAE and General American English (GAE). This creates a clear challenge for bias in NLP technologies. To overcome the scarcity, we employ a combination of rule-based filters and data augmentation that generate a corpus balanced between habitual and non-habitual instances. This balanced corpus trains unbiased machine learning classifiers, as demonstrated on a corpus of AAE transcribed texts, achieving .65 F_1 score at classifying habitual “be”. @@ -263,7 +263,7 @@ <fixed-case>D</fixed-case>eep<fixed-case>B</fixed-case>lues@<fixed-case>LT</fixed-case>-<fixed-case>EDI</fixed-case>-<fixed-case>ACL</fixed-case>2022: Depression level detection modelling through domain specific <fixed-case>BERT</fixed-case> and short text Depression classifiers NawshadFarruque - OsmarZaiane + OsmarZaiane RandyGoebel SudhakarSivapalan 167-171 @@ -281,7 +281,7 @@ AngelS RajalakshmiSivanaiah Sakaya MiltonRajendram - MirnalineeT T + MirnalineeT T 172-176 In recent years social media has become one of the major forums for expressing human views and emotions. With the help of smartphones and high-speed internet, anyone can express their views on Social media. However, this can also lead to the spread of hatred and violence in society. Therefore it is necessary to build a method to find and support helpful social media content. In this paper, we studied Natural Language Processing approach for detecting Hope speech in a given sentence. The task was to classify the sentences into ‘Hope speech’ and ‘Non-hope speech’. The dataset was provided by LT-EDI organizers with text from Youtube comments. Based on the task description, we developed a system using the pre-trained language model BERT to complete this task. Our model achieved 1st rank in the Kannada language with a weighted average F1 score of 0.750, 2nd rank in the Malayalam language with a weighted average F1 score of 0.740, 3rd rank in the Tamil language with a weighted average F1 score of 0.390 and 6th rank in the English language with a weighted average F1 score of 0.880. 2022.ltedi-1.22 @@ -329,7 +329,7 @@ RajalakshmiSivanaiah AngelS Sakaya MiltonRajendram - MirnalineeT T + MirnalineeT T 196-199 Depression is a common mental illness that involves sadness and lack of interest in all day-to-day activities. The task is to classify the social media text as signs of depression into three labels namely “not depressed”, “moderately depressed”, and “severely depressed”. We have build a system using Deep Learning Model “Transformers”. Transformers provides thousands of pretrained models to perform tasks on different modalities such as text, vision, and audio. The multi-class classification model used in our system is based on the ALBERT model. In the shared task ACL 2022, Our team SSN_MLRG3 obtained a Macro F1 score of 0.473. 2022.ltedi-1.26 @@ -356,7 +356,7 @@ FazlourrahmanBalouchzahi SaburButt GrigoriSidorov - AlexanderGelbukh + AlexanderGelbukh 206-211 Hope is an inherent part of human life and essential for improving the quality of life. Hope increases happiness and reduces stress and feelings of helplessness. Hope speech is the desired outcome for better and can be studied using text from various online sources where people express their desires and outcomes. In this paper, we address a deep-learning approach with a combination of linguistic and psycho-linguistic features for hope-speech detection. We report our best results submitted to LT-EDI-2022 which ranked 2nd and 3rd in English and Spanish respectively. 2022.ltedi-1.28 @@ -700,7 +700,7 @@ Bharathi RajaChakravarthi RubaPriyadharshini ThenmozhiDurairaj - JohnMcCrae + JohnMcCrae PaulBuitelaar PrasannaKumaresan RahulPonnusamy @@ -717,9 +717,9 @@ VigneshwaranMuralidaran RubaPriyadharshini SubalalithaCn - JohnMcCrae + JohnMcCrae Miguel ÁngelGarcía - Salud MaríaJiménez-Zafra + Salud MaríaJiménez-Zafra RafaelValencia-García PrasannaKumaresan RahulPonnusamy diff --git a/data/xml/2022.mathnlp.xml b/data/xml/2022.mathnlp.xml index cad62c3363..f49c88128b 100644 --- a/data/xml/2022.mathnlp.xml +++ b/data/xml/2022.mathnlp.xml @@ -5,7 +5,7 @@ Proceedings of the 1st Workshop on Mathematical Natural Language Processing (MathNLP) DeborahFerreira MarcoValentino - AndreFreitas + AndreFreitas SeanWelleck MoritzSchubotz Association for Computational Linguistics @@ -62,7 +62,7 @@ Towards Autoformalization of Mathematics and Code Correctness: Experiments with Elementary Proofs GarettCunningham - RazvanBunescu + RazvanBunescu DavidJuedes 25-32 The ever-growing complexity of mathematical proofs makes their manual verification by mathematicians very cognitively demanding. Autoformalization seeks to address this by translating proofs written in natural language into a formal representation that is computer-verifiable via interactive theorem provers. In this paper, we introduce a semantic parsing approach, based on the Universal Transformer architecture, that translates elementary mathematical proofs into an equivalent formalization in the language of the Coq interactive theorem prover. The same architecture is also trained to translate simple imperative code decorated with Hoare triples into formally verifiable proofs of correctness in Coq. Experiments on a limited domain of artificial and human-written proofs show that the models generalize well to intermediate lengths not seen during training and variations in natural language. diff --git a/data/xml/2022.mia.xml b/data/xml/2022.mia.xml index fa836c1779..eaf8060da2 100644 --- a/data/xml/2022.mia.xml +++ b/data/xml/2022.mia.xml @@ -5,7 +5,7 @@ Proceedings of the Workshop on Multilingual Information Access (MIA) AkariAsai EunsolChoi - Jonathan H.Clark + Jonathan H.Clark JunjieHu Chia-HsuanLee JungoKasai @@ -111,7 +111,7 @@ SotaroTakeshita MarcoBombieri GoranGlavaš - Simone PaoloPonzetto + Simone PaoloPonzetto 77-90 This paper introduces our proposed system for the MIA Shared Task on Cross-lingual Openretrieval Question Answering (COQA). In this challenging scenario, given an input question the system has to gather evidence documents from a multilingual pool and generate from them an answer in the language of the question. We devised several approaches combining different model variants for three main components: Data Augmentation, Passage Retrieval, and Answer Generation. For passage retrieval, we evaluated the monolingual BM25 ranker against the ensemble of re-rankers based on multilingual pretrained language models (PLMs) and also variants of the shared task baseline, re-training it from scratch using a recently introduced contrastive loss that maintains a strong gradient signal throughout training by means of mixed negative samples. For answer generation, we focused on languageand domain-specialization by means of continued language model (LM) pretraining of existing multilingual encoders. Additionally, for both passage retrieval and answer generation, we augmented the training data provided by the task organizers with automatically generated question-answer pairs created from Wikipedia passages to mitigate the issue of data scarcity, particularly for the low-resource languages for which no training data were provided. Our results show that language- and domain-specialization as well as data augmentation help, especially for low-resource languages. 2022.mia-1.8 @@ -124,7 +124,7 @@ SumitAgarwal SurajTripathi TerukoMitamura - Carolyn PensteinRose + Carolyn PensteinRose 91-99 People speaking different kinds of languages search for information in a cross-lingual manner. They tend to ask questions in their language and expect the answer to be in the same language, despite the evidence lying in another language. In this paper, we present our approach for this task of cross-lingual open-domain question-answering. Our proposed method employs a passage reranker, the fusion-in-decoder technique for generation, and a wiki data entity-based post-processing system to tackle the inability to generate entities across all languages. Our end-2-end pipeline shows an improvement of 3 and 4.6 points on F1 and EM metrics respectively, when compared with the baseline CORA model on the XOR-TyDi dataset. We also evaluate the effectiveness of our proposed techniques in the zero-shot setting using the MKQA dataset and show an improvement of 5 points in F1 for high-resource and 3 points improvement for low-resource zero-shot languages. Our team, CMUmQA’s submission in the MIA-Shared task ranked 1st in the constrained setup for the dev and 2nd in the test setting. 2022.mia-1.9 diff --git a/data/xml/2022.mmlow.xml b/data/xml/2022.mmlow.xml index 9b87c55ecc..ec8e0cc65a 100644 --- a/data/xml/2022.mmlow.xml +++ b/data/xml/2022.mmlow.xml @@ -35,7 +35,7 @@ FaerieMattins R SrivarshanSelvaraj AntonetteShibani - AnandKumar M + AnandKumar M BharathiRaja Chakravarthi 9-17 of expressing relevant idea through social media platforms and forums. At the same time, these memes are trolled by a person who tries to get identified from the other internet users like social media users, chat rooms and blogs. The memes contain both textual and visual information. Based on the content of memes, they are trolled in online community. There is no restriction for language usage in online media. The present work focuses on whether memes are trolled or not trolled. The proposed multi modal approach achieved considerably better weighted average F1 score of 0.5437 compared to Unimodal approaches. The other performance metrics like precision, recall, accuracy and macro average have also been studied to observe the proposed system. diff --git a/data/xml/2022.mmmpie.xml b/data/xml/2022.mmmpie.xml index 8b647f7234..28f0812eb7 100644 --- a/data/xml/2022.mmmpie.xml +++ b/data/xml/2022.mmmpie.xml @@ -43,7 +43,7 @@ GuillaumeBernard HervéBredin OlivierGalibert - SophieRosset + SophieRosset 15–25 Knowledge transfer between neural language models is a widely used technique that has proven to improve performance in a multitude of natural language tasks, in particular with the recent rise of large pre-trained language models like BERT. Similarly, high cross-lingual transfer has been shown to occur in multilingual language models. Hence, it is of great importance to better understand this phenomenon as well as its limits. While most studies about cross-lingual transfer focus on training on independent and identically distributed (i.e. i.i.d.) samples, in this paper we study cross-lingual transfer in a continual learning setting on two sequence labeling tasks: slot-filling and named entity recognition. We investigate this by training multilingual BERT on sequences of 9 languages, one language at a time, on the MultiATIS++ and MultiCoNER corpora. Our first findings are that forward transfer between languages is retained although forgetting is present. Additional experiments show that lost performance can be recovered with as little as a single training epoch even if forgetting was high, which can be explained by a progressive shift of model parameters towards a better multilingual initialization. We also find that commonly used metrics might be insufficient to assess continual learning performance. 2022.mmmpie-1.3 @@ -65,8 +65,8 @@ Cost-Effective Language Driven Image Editing with <fixed-case>LX</fixed-case>-<fixed-case>DRIM</fixed-case> RodrigoSantos - AntónioBranco - João RicardoSilva + AntónioBranco + João RicardoSilva 31–43 Cross-modal language and image processing is envisaged as a way to improve language understanding by resorting to visual grounding, but only recently, with the emergence of neural architectures specifically tailored to cope with both modalities, has it attracted increased attention and obtained promising results. In this paper we address a cross-modal task of language-driven image design, in particular the task of altering a given image on the basis of language instructions. We also avoid the need for a specifically tailored architecture and resort instead to a general purpose model in the Transformer family. Experiments with the resulting tool, LX-DRIM, show very encouraging results, confirming the viability of the approach for language-driven image design while keeping it affordable in terms of compute and data. 2022.mmmpie-1.5 diff --git a/data/xml/2022.mmnlu.xml b/data/xml/2022.mmnlu.xml index 1f5a479439..030d3c6113 100644 --- a/data/xml/2022.mmnlu.xml +++ b/data/xml/2022.mmnlu.xml @@ -5,7 +5,7 @@ Proceedings of the Massively Multilingual Natural Language Understanding Workshop (MMNLU-22) JackFitzGerald KayRottmann - JuliaHirschberg + JuliaHirschberg MohitBansal AnnaRumshisky CharithPeris @@ -123,7 +123,7 @@ MaximeDe bruynUniversity of Antwerp EhsanLotfiUniversity of Antwerp JeskaBuhmann - WalterDaelemansUniversity of Antwerp + WalterDaelemansUniversity of Antwerp 69-82 We expect to interact with home assistants irrespective of our language. However, scaling the Natural Language Understanding pipeline to multiple languages while keeping the same level of accuracy remains a challenge. In this work, we leverage the inherent multilingual aspect of translation models for the task of multilingual intent classification and slot filling. Our experiments reveal that they work equally well with general-purpose multilingual text-to-text models. Furthermore, their accuracy can be further improved by artificially increasing the size of the training set. Unfortunately, increasing the training set also increases the overlap with the test set, leading to overestimating their true capabilities. As a result, we propose two new evaluation methods capable of accounting for an overlap between the training and test set. 2022.mmnlu-1.8 diff --git a/data/xml/2022.mrl.xml b/data/xml/2022.mrl.xml index 8f5dec0c97..341a8d5ace 100644 --- a/data/xml/2022.mrl.xml +++ b/data/xml/2022.mrl.xml @@ -79,7 +79,7 @@ Hossain ShaikhSaadiTechnical University of Munich ViktorHangyaLudwig-Maximilians-Universität München TobiasEderTechnical University of Munich - AlexanderFraserLudwig-Maximilians-Universität München + AlexanderFraserLudwig-Maximilians-Universität München 64-75 Contextualized word embeddings have emerged as the most important tool for performing NLP tasks in a large variety of languages. In order to improve the cross- lingual representation and transfer learning quality, contextualized embedding alignment techniques, such as mapping and model fine-tuning, are employed. Existing techniques however are time-, data- and computational resource-intensive. In this paper we analyze these techniques by utilizing three tasks: bilingual lexicon induction (BLI), word retrieval and cross-lingual natural language inference (XNLI) for a high resource (German-English) and a low resource (Bengali-English) language pair. In contrast to previous works which focus only on a few popular models, we compare five multilingual and seven monolingual language models and investigate the effect of various aspects on their performance, such as vocabulary size, number of languages used for training and number of parameters. Additionally, we propose a parameter-, data- and runtime-efficient technique which can be trained with 10% of the data, less than 10% of the time and have less than 5% of the trainable parameters compared to model fine-tuning. We show that our proposed method is competitive with resource heavy models, even outperforming them in some cases, even though it relies on less resource 2022.mrl-1.6 @@ -90,9 +90,9 @@ How Language-Dependent is Emotion Detection? Evidence from Multilingual <fixed-case>BERT</fixed-case> LunaDe BruyneLT3, University of Ghent PranaydeepSinghLT3, University of Ghent - OrpheeDe ClercqLT3, University of Ghent + OrpheeDe ClercqLT3, University of Ghent ElsLefeverLT3, University of Ghent - VeroniqueHosteLT3, University of Ghent + VeroniqueHosteLT3, University of Ghent 76-85 As emotion analysis in text has gained a lot of attention in the field of natural language processing, differences in emotion expression across languages could have consequences for how emotion detection models work. We evaluate the language-dependence of an mBERT-based emotion detection model by comparing language identification performance before and after fine-tuning on emotion detection, and performing (adjusted) zero-shot experiments to assess whether emotion detection models rely on language-specific information. When dealing with typologically dissimilar languages, we found evidence for the language-dependence of emotion detection. 2022.mrl-1.7 @@ -116,7 +116,7 @@ Emre CanAcikgozKoc University TilekChubakovUniversity of California Berkeley MugeKuralKoc University - GözdeŞahinKoc University + GözdeŞahinKoc University DenizYuretKoc University 100-105 This paper describes the KUIS-AI NLP team’s submission for the 1st Shared Task on Multilingual Clause-level Morphology (MRL2022). We present our work on all three parts of the shared task: inflection, reinflection, and analysis. We mainly explore two approaches: Trans- former models in combination with data augmentation, and exploiting the state-of-the-art language modeling techniques for morphological analysis. Data augmentation leads to a remarkable performance improvement for most of the languages in the inflection task. Prefix-tuning on pretrained mGPT model helps us to adapt reinflection and analysis tasks in a low-data setting. Additionally, we used pipeline architectures using publicly available open-source lemmatization tools and monolingual BERT- based morphological feature classifiers for rein- flection and analysis tasks, respectively. While Transformer architectures with data augmentation and pipeline architectures achieved the best results for inflection and reinflection tasks, pipelines and prefix-tuning on mGPT received the highest results for the analysis task. Our methods achieved first place in each of the three tasks and outperforms mT5-baseline with 89% for inflection, 80% for reflection, and 12% for analysis. Our code 1 is publicly available. @@ -131,7 +131,7 @@ UtkarshSabooUniversity of British Columbia XihanWuUniversity of British Columbia GarrettNicolaiUniversity of British Columbia - MiikkaSilfverbergUniversity of British Columbia + MiikkaSilfverbergUniversity of British Columbia 106-114 We present the University of British Columbia’s submission to the MRL shared task on multilingual clause-level morphology. Our submission extends word-level inflectional models to the clause-level in two ways: first, by evaluating the role that BPE has on the learning of inflectional morphology, and second, by evaluating the importance of a copy bias obtained through data hallucination. Experiments demonstrate a strong preference for language-tuned BPE and a copy bias over a vanilla transformer. The methods are complementary for inflection and analysis tasks – combined models see error reductions of 38% for inflection and 15.6% for analysis; However, this synergy does not hold for reinflection, which performs best under a BPE-only setting. A deeper analysis of the errors generated by our models illustrates that the copy bias may be too strong - the combined model produces predictions more similar to the copy-influenced system, despite the success of the BPE-model. 2022.mrl-1.11 @@ -170,8 +170,8 @@ VictoriaBasmovBar Ilan University ShadrackKirimiChuka University LydiaNishimweInria, Paris - BenoîtSagotInria, Paris - DjaméSeddahInria, Paris + BenoîtSagotInria, Paris + DjaméSeddahInria, Paris ReutTsarfatyBar Ilan University DuyguAtamanNew York University 134-146 diff --git a/data/xml/2022.mwe.xml b/data/xml/2022.mwe.xml index cc962fa4fb..be608cfb07 100644 --- a/data/xml/2022.mwe.xml +++ b/data/xml/2022.mwe.xml @@ -21,7 +21,7 @@ Figurative Language in Noun Compound Models across Target Properties, Domains and Time - SabineSchulte im Walde + SabineSchulte im Walde 1 A variety of distributional and multi-modal computational approaches has been suggested for modelling the degrees of compositionality across types of multiword expressions and languages. As the starting point of my talk, I will present standard variants of computational models that have been proven successful in predicting the compositionality of German and English noun compounds. The main part of the talk will then be concerned with investigating the general reliability of these standard models and discussing implications for gold-standard datasets: I will demonstrate how prediction results vary (i) across representations, (ii) across empirical target properties, (iii) across compound types, (iv) across levels of abstractness, and (v) for general- vs. domain-specific language. Finally, I will present a preliminary quantitative study on diachronic changes of noun compound meanings and compositionality over time. 2022.mwe-1.1 @@ -39,7 +39,7 @@ A General Framework for Detecting Metaphorical Collocations MarijaBrkić Bakarić LuciaNačinović Prskalo - MajaPopović + MajaPopović 3–8 This paper aims at identifying a specific set of collocations known under the term metaphorical collocations. In this type of collocations, a semantic shift has taken place in one of the components. Since the appropriate gold standard needs to be compiled prior to any serious endeavour to extract metaphorical collocations automatically, this paper first presents the steps taken to compile it, and then establishes appropriate evaluation framework. The process of compiling the gold standard is illustrated on one of the most frequent Croatian nouns, which resulted in the preliminary relation significance set. With the aim to investigate the possibility of facilitating the process, frequency, logDice, relation, and pretrained word embeddings are used as features in the classification task conducted on the logDice-based word sketch relation lists. Preliminary results are presented. 2022.mwe-1.3 @@ -69,7 +69,7 @@ Support Verb Constructions across the Ocean Sea JorgeBaptista - NunoMamede + NunoMamede SóniaReis 26–36 This paper analyses the support (or light) verb constructions (SVC) in a publicly available, manually annotated corpus of multiword expressions (MWE) in Brazilian Portuguese. The paper highlights several issues in the linguistic definitions therein adopted for these types of MWE, and reports the results from applying STRING, a rule-based parsing system, originally developed for European Portuguese, to this corpus from Brazilian Portuguese. The goal is two-fold: to improve the linguistic definition of SVC in the annotation task, as well as to gauge the major difficulties found when transposing linguistic resources between these two varieties of the same language. @@ -165,7 +165,7 @@ Xuan-RuiFan EdwardGow-Smith HarishTayyar Madabushi - CarolinaScarton + CarolinaScarton AlineVillavicencio 105–111 Deep neural models, in particular Transformer-based pre-trained language models, require a significant amount of data to train. This need for data tends to lead to problems when dealing with idiomatic multiword expressions (MWEs), which are inherently less frequent in natural text. As such, this work explores sample efficient methods of idiomaticity detection. In particular we study the impact of Pattern Exploit Training (PET), a few-shot method of classification, and BERTRAM, an efficient method of creating contextual embeddings, on the task of idiomaticity detection. In addition, to further explore generalisability, we focus on the identification of MWEs not present in the training data. Our experiments show that while these methods improve performance on English, they are much less effective on Portuguese and Galician, leading to an overall performance about on par with vanilla mBERT. Regardless, we believe sample efficient methods for both identifying and representing potentially idiomatic MWEs are very encouraging and hold significant potential for future exploration. diff --git a/data/xml/2022.naacl.xml b/data/xml/2022.naacl.xml index 488f4bf545..b9fb59f4d8 100644 --- a/data/xml/2022.naacl.xml +++ b/data/xml/2022.naacl.xml @@ -4,8 +4,8 @@ Proceedings of the 2022 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies MarineCarpuat - Marie-Catherinede Marneffe - Ivan VladimirMeza Ruiz + Marie-Catherinede Marneffe + Ivan VladimirMeza Ruiz Association for Computational Linguistics
Seattle, United States
July @@ -46,7 +46,7 @@
Language Model Augmented Monotonic Attention for Simultaneous Translation - Sathish ReddyIndurthi + Sathish ReddyIndurthi Mohd AbbasZaidi BeomseokLee Nikhil KumarLakumarapu @@ -62,7 +62,7 @@ What Makes a Good and Useful Summary? <fixed-case>I</fixed-case>ncorporating Users in Automatic Summarization Research MaartjeTer Hoeve JuliaKiseleva - Maartende Rijke + Maartende Rijke 46-75 Automatic text summarization has enjoyed great progress over the years and is used in numerous applications, impacting the lives of many. Despite this development, there is little research that meaningfully investigates how the current research focus in automatic summarization aligns with users’ needs. To bridge this gap, we propose a survey methodology that can be used to investigate the needs of users of automatically generated summaries. Importantly, these needs are dependent on the target group. Hence, we design our survey in such a way that it can be easily adjusted to investigate different user groups. In this work we focus on university students, who make extensive use of summaries during their studies. We find that the current research directions of the automatic summarization community do not fully align with students’ needs. Motivated by our findings, we present ways to mitigate this mismatch in future research on automatic summarization: we propose research directions that impact the design, the development and the evaluation of automatically generated summaries. 2022.naacl-main.4 @@ -86,7 +86,7 @@ Semantic Diversity in Dialogue with Natural Language Inference KatherineStasaski - MartiHearst + MartiHearst 85-98 Generating diverse, interesting responses to chitchat conversations is a problem for neural conversational agents. This paper makes two substantial contributions to improving diversity in dialogue generation. First, we propose a novel metric which uses Natural Language Inference (NLI) to measure the semantic diversity of a set of model responses for a conversation. We evaluate this metric using an established framework (Tevet and Berant, 2021) and find strong evidence indicating NLI Diversity is correlated with semantic diversity. Specifically, we show that the contradiction relation is more useful than the neutral relation for measuring this diversity and that incorporating the NLI model’s confidence achieves state-of-the-art results. Second, we demonstrate how to iteratively improve the semantic diversity of a sampled set of responses via a new generation procedure called Diversity Threshold Generation, which results in an average 137% increase in NLI Diversity compared to standard generation procedures. 2022.naacl-main.6 @@ -133,7 +133,7 @@ AntonChernyavskiy DmitryIlvovsky PavelKalinin - PreslavNakov + PreslavNakov 116-126 The use of contrastive loss for representation learning has become prominent in computer vision, and it is now getting attention in Natural Language Processing (NLP).Here, we explore the idea of using a batch-softmax contrastive loss when fine-tuning large-scale pre-trained transformer models to learn better task-specific sentence embeddings for pairwise sentence scoring tasks. We introduce and study a number of variations in the calculation of the loss as well as in the overall training procedure; in particular, we find that a special data shuffling can be quite important. Our experimental results show sizable improvements on a number of datasets and pairwise sentence scoring tasks including classification, ranking, and regression. Finally, we offer detailed analysis and discussion, which should be useful for researchers aiming to explore the utility of contrastive loss in NLP. 2022.naacl-main.9 @@ -184,7 +184,7 @@ PaulRöttger BertieVidgen DirkHovy - JanetPierrehumbert + JanetPierrehumbert 175-190 Labelled data is the foundation of most natural language processing tasks. However, labelling data is difficult and there often are diverse valid beliefs about what the correct data labels should be. So far, dataset creators have acknowledged annotator subjectivity, but rarely actively managed it in the annotation process. This has led to partly-subjective datasets that fail to serve a clear downstream use. To address this issue, we propose two contrasting paradigms for data annotation. The descriptive paradigm encourages annotator subjectivity, whereas the prescriptive paradigm discourages it. Descriptive annotation allows for the surveying and modelling of different beliefs, whereas prescriptive annotation enables the training of models that consistently apply one belief. We discuss benefits and challenges in implementing both paradigms, and argue that dataset creators should explicitly aim for one or the other to facilitate the intended use of their dataset. Lastly, we conduct an annotation experiment using hate speech data that illustrates the contrast between the two paradigms. 2022.naacl-main.13 @@ -229,7 +229,7 @@ QingyuYin BingYin TuoZhao - ChaoZhang + ChaoZhang 219-230 User sessions empower many search and recommendation tasks on a daily basis. Such session data are semi-structured, which encode heterogeneous relations between queries and products, and each item is described by the unstructured text. Despite recent advances in self-supervised learning for text or graphs, there lack of self-supervised learning models that can effectively capture both intra-item semantics and inter-item interactions for semi-structured sessions. To fill this gap, we propose CERES, a graph-based transformer model for semi-structured session data. CERES learns representations that capture both inter- and intra-item semantics with (1) a graph-conditioned masked language pretraining task that jointly learns from item text and item-item relations; and (2) a graph-conditioned transformer architecture that propagates inter-item contexts to item-level representations. We pretrained CERES using ~468 million Amazon sessions and find that CERES outperforms strong pretraining baselines by up to 9% in three session search and entity linking tasks. 2022.naacl-main.16 @@ -256,7 +256,7 @@ Shang-WenLi MingyeGao SeunghakYu - JamesGlass + JamesGlass 244-257 Pretrained language models have significantly improved the performance of downstream language understanding tasks, including extractive question answering, by providing high-quality contextualized word embeddings. However, training question answering models still requires large amounts of annotated data for specific domains. In this work, we propose a cooperative self-training framework, RGX, for automatically generating more non-trivial question-answer pairs to improve model performance. RGX is built upon a masked answer extraction task with an interactive learning environment containing an answer entity Recognizer, a question Generator, and an answer eXtractor. Given a passage with a masked entity, the generator generates a question around the entity, and the extractor is trained to extract the masked entity with the generated question and raw texts. The framework allows the training of question generation and answering models on any text corpora without annotation. We further leverage a self-training technique to improve the performance of both question generation and answer extraction models. Experiment results show that RGX outperforms the state-of-the-art (SOTA) pretrained language models and transfer learning approaches on standard question-answering benchmarks, and yields the new SOTA performance under given model size and transfer learning settings. 2022.naacl-main.18 @@ -336,7 +336,7 @@ KaitlynZhou Su LinBlodgett AdamTrischler - HalDaumé III + HalDaumé III KaheerSuleman AlexandraOlteanu 314-324 @@ -377,7 +377,7 @@ OzgeAlacam MoniqueMeuschke KaiLawonn - SinaZarrieß + SinaZarrieß 348-374 Natural language as a modality of interaction is becoming increasingly popular in the field of visualization. In addition to the popular query interfaces, other language-based interactions such as annotations, recommendations, explanations, or documentation experience growing interest. In this survey, we provide an overview of natural language-based interaction in the research area of visualization. We discuss a renowned taxonomy of visualization tasks and classify 119 related works to illustrate the state-of-the-art of how current natural language interfaces support their performance. We examine applied NLP methods and discuss human-machine dialogue structures with a focus on initiative, duration, and communicative functions in recent visualization-oriented dialogue interfaces. Based on this overview, we point out interesting areas for the future application of NLP methods in the field of visualization. 2022.naacl-main.27 @@ -409,7 +409,7 @@ MarkPerera ChristianPerstl EhudReiter - AnyaBelz + AnyaBelz AleksandarSavkov 385-394 A growing body of work uses Natural Language Processing (NLP) methods to automatically generate medical notes from audio recordings of doctor-patient consultations. However, there are very few studies on how such systems could be used in clinical practice, how clinicians would adjust to using them, or how system design should be influenced by such considerations. In this paper, we present three rounds of user studies, carried out in the context of developing a medical note generation system. We present, analyse and discuss the participating clinicians’ impressions and views of how the system ought to be adapted to be of value to them. Next, we describe a three-week test run of the system in a live telehealth clinical practice. Major findings include (i) the emergence of five different note-taking behaviours; (ii) the importance of the system generating notes in real time during the consultation; and (iii) the identification of a number of clinical use cases that could prove challenging for automatic note generation systems. @@ -465,7 +465,7 @@ RuijiaCheng AlisonSmith-Renner KeZhang - JoelTetreault + JoelTetreault AlejandroJaimes-Larrarte 431-455 Automatic text summarization systems commonly involve humans for preparing data or evaluating model performance, yet, there lacks a systematic understanding of humans’ roles, experience, and needs when interacting with or being assisted by AI. From a human-centered perspective, we map the design opportunities and considerations for human-AI interaction in text summarization and broader text generation tasks. We first conducted a systematic literature review of 70 papers, developing a taxonomy of five interactions in AI-assisted text generation and relevant design dimensions. We designed text summarization prototypes for each interaction. We then interviewed 16 users, aided by the prototypes, to understand their expectations, experience, and needs regarding efficiency, control, and trust with AI in text summarization and propose design considerations accordingly. @@ -493,7 +493,7 @@ KeZhang RuijiaCheng WenjuanZhang - JoelTetreault + JoelTetreault AlejandroJaimes-Larrarte 475-493 Automatic summarization methods are efficient but can suffer from low quality. In comparison, manual summarization is expensive but produces higher quality. Can humans and AI collaborate to improve summarization performance? In similar text generation tasks (e.g., machine translation), human-AI collaboration in the form of “post-editing” AI-generated text reduces human workload and improves the quality of AI output. Therefore, we explored whether post-editing offers advantages in text summarization. Specifically, we conducted an experiment with 72 participants, comparing post-editing provided summaries with manual summarization for summary quality, human efficiency, and user experience on formal (XSum news) and informal (Reddit posts) text. This study sheds valuable insights on when post-editing is useful for text summarization: it helped in some cases (e.g., when participants lacked domain knowledge) but not in others (e.g., when provided summaries include inaccurate information). Participants’ different editing strategies and needs for assistance offer implications for future human-AI summarization systems. @@ -522,7 +522,7 @@ JunYan YangXiao SagnikMukherjee - Bill YuchenLin + Bill YuchenLin RobinJia XiangRen 508-520 @@ -629,7 +629,7 @@ AkikoEriguchi ShufangXie TaoQin - HanyHassan + HanyHassan 600-606 Multilingual Neural Machine Translation (MNMT) enables one system to translate sentences from multiple source languages to multiple target languages, greatly reducing deployment costs compared with conventional bilingual systems. The MNMT training benefit, however, is often limited to many-to-one directions. The model suffers from poor performance in one-to-many and many-to-many with zero-shot setup. To address this issue, this paper discusses how to practically build MNMT systems that serve arbitrary X-Y translation directions while leveraging multilinguality with a two-stage training strategy of pretraining and finetuning. Experimenting with the WMT’21 multilingual translation task, we demonstrate that our systems outperform the conventional baselines of direct bilingual models and pivot translation models for most directions, averagely giving +6.0 and +4.1 BLEU, without the need for architecture change or extra data collection. Moreover, we also examine our proposed approach in an extremely large-scale data setting to accommodate practical deployment scenarios. 2022.naacl-main.44 @@ -791,7 +791,7 @@ On the Machine Learning of Ethical Judgments from Natural Language - ZeerakTalat + ZeerakTalat HagenBlix JosefValvoda Maya IndiraGanesh @@ -817,7 +817,7 @@ LianhuiQin YoungjaeYu RowanZellers - Noah A.Smith + Noah A.Smith YejinChoi 780-799 The dominant paradigm for neural text generation is left-to-right decoding from autoregressive language models. Constrained or controllable generation under complex lexical constraints, however, requires foresight to plan ahead feasible future paths. Drawing inspiration from the A^* search algorithm, we propose NeuroLogic A*esque, a decoding algorithm that incorporates heuristic estimates of future cost. We develop lookahead heuristics that are efficient for large-scale language models, making our method a drop-in replacement for common techniques such as beam search and top-k sampling. To enable constrained generation, we build on NeuroLogic decoding (Lu et al., 2021), combining its flexibility in incorporating logical constraints with A*esque estimates of future constraint satisfaction. Our approach outperforms competitive baselines on five generation tasks, and achieves new state-of-the-art performance on table-to-text generation, constrained machine translation, and keyword-constrained generation. The improvements are particularly notable on tasks that require complex constraint satisfaction or in few-shot or zero-shot settings. NeuroLogic A*esque illustrates the power of decoding for improving and enabling new capabilities of large-scale language models. @@ -878,7 +878,7 @@ ToshikiKawamoto HidetakaKamigaito KotaroFunakoshi - ManabuOkumura + ManabuOkumura 852-859 A repetition is a response that repeats words in the previous speaker’s utterance in a dialogue. Repetitions are essential in communication to build trust with others, as investigated in linguistic studies. In this work, we focus on repetition generation. To the best of our knowledge, this is the first neural approach to address repetition generation. We propose Weighted Label Smoothing, a smoothing method for explicitly learning which words to repeat during fine-tuning, and a repetition scoring method that can output more appropriate repetitions during decoding. We conducted automatic and human evaluations involving applying these methods to the pre-trained language model T5 for generating repetitions. The experimental results indicate that our methods outperformed baselines in both evaluations. 2022.naacl-main.62 @@ -987,7 +987,7 @@ LiangkeGui BoruiWang QiuyuanHuang - AlexanderHauptmann + AlexanderHauptmann YonatanBisk JianfengGao 956-968 @@ -1027,7 +1027,7 @@ VerenaKaynig-Fittkau JiuxiangGu FranckDernoncourt - QuanTran + QuanTran AniNenkova DineshManocha RajivJain @@ -1117,11 +1117,11 @@ Inducing and Using Alignments for Transition-based <fixed-case>AMR</fixed-case> Parsing AndrewDrozdov JiaweiZhou - RaduFlorian + RaduFlorian AndrewMcCallum TahiraNaseem YoonKim - RamónAstudillo + RamónAstudillo 1086-1098 Transition-based parsers for Abstract Meaning Representation (AMR) rely on node-to-word alignments. These alignments are learned separately from parser training and require a complex pipeline of rule-based components, pre-processing, and post-processing to satisfy domain-specific constraints. Parsers also train on a point-estimate of the alignment pipeline, neglecting the uncertainty due to the inherent ambiguity of alignment. In this work we explore two avenues for overcoming these limitations. First, we propose a neural aligner for AMR that learns node-to-word alignments without relying on complex pipelines. We subsequently explore a tighter integration of aligner and parser training by considering a distribution over oracle action sequences arising from aligner uncertainty. Empirical results show this approach leads to more accurate alignments and generalization better from the AMR2.0 to AMR3.0 corpora. We attain a new state-of-the art for gold-only trained models, matching silver-trained performance without the need for beam search on AMR3.0. 2022.naacl-main.80 @@ -1144,7 +1144,7 @@ <fixed-case>DREAM</fixed-case>: Improving Situational <fixed-case>QA</fixed-case> by First Elaborating the Situation YulingGu - BhavanaDalvi + BhavanaDalvi PeterClark 1115-1127 When people answer questions about a specific situation, e.g., “I cheated on my mid-term exam last week. Was that wrong?”, cognitive science suggests that they form a mental picture of that situation before answering. While we do not know how language models (LMs) answer such questions, we conjecture that they may answer more accurately if they are also provided with additional details about the question situation, elaborating the “scene”. To test this conjecture, we train a new model, DREAM, to answer questions that elaborate the scenes that situated questions are about, and then provide those elaborations as additional context to a question-answering (QA) model. We find that DREAM is able to create better scene elaborations (more accurate, useful, and consistent) than a representative state-of-the-art, zero-shot model (Macaw). We also find that using the scene elaborations as additional context improves the answer accuracy of a downstream QA system, including beyond that obtainable by simply further fine-tuning the QA system on DREAM’s training data. These results suggest that adding focused elaborations about a situation can improve a system’s reasoning about it, and may serve as an effective way of injecting new scenario-based knowledge into QA models. Finally, our approach is dataset-neutral; we observe improved QA performance across different models, with even bigger gains on models with fewer parameters. @@ -1185,7 +1185,7 @@ SatwikKottur AhmadBeirami ShahinShayandeh - PaulCrook + PaulCrook AlborzGeramifard ZhouYu ChinnadhuraiSankar @@ -1213,7 +1213,7 @@ Towards a Progression-Aware Autonomous Dialogue Agent AbrahamSanders - TomekStrzalkowski + TomekStrzalkowski MeiSi AlbertChang DeepanshuDey @@ -1263,7 +1263,7 @@ HaonanLi MartinTomko MariaVasardani - TimothyBaldwin + TimothyBaldwin 1250-1260 Most existing reading comprehension datasets focus on single-span answers, which can be extracted as a single contiguous span from a given text passage. Multi-span questions, i.e., questions whose answer is a series of multiple discontiguous spans in the text, are common real life but are less studied. In this paper, we present MultiSpanQA, a new dataset that focuses on multi-span questions. Raw questions and contexts are extracted from the Natural Questions dataset. After multi-span re-annotation, MultiSpanQA consists of over a total of 6,000 multi-span questions in the basic version, and over 19,000 examples with unanswerable questions, and questions with single-, and multi-span answers in the expanded version. We introduce new metrics for the purposes of multi-span question answering evaluation, and establish several baselines using advanced models. Finally, we propose a new model which beats all baselines and achieves state-of-the-art on our dataset. 2022.naacl-main.90 @@ -1290,7 +1290,7 @@ Theory-Grounded Measurement of <fixed-case>U</fixed-case>.<fixed-case>S</fixed-case>. Social Stereotypes in <fixed-case>E</fixed-case>nglish Language Models Yang TristaCao AnnaSotnikova - HalDaumé III + HalDaumé III RachelRudinger LindaZou 1276-1295 @@ -1304,7 +1304,7 @@ Sort by Structure: Language Model Ranking as Dependency Probing MaxMüller-Eberstein Robvan der Goot - BarbaraPlank + BarbaraPlank 1296-1307 Making an informed choice of pre-trained language model (LM) is critical for performance, yet environmentally costly, and as such widely underexplored. The field of Computer Vision has begun to tackle encoder ranking, with promising forays into Natural Language Processing, however they lack coverage of linguistic tasks such as structured prediction. We propose probing to rank LMs, specifically for parsing dependencies in a given language, by measuring the degree to which labeled trees are recoverable from an LM’s contextualized embeddings. Across 46 typologically and architecturally diverse LM-language pairs, our probing approach predicts the best LM choice 79% of the time using orders of magnitude less compute than training a full parser. Within this study, we identify and analyze one recently proposed decoupled LM—RemBERT—and find it strikingly contains less inherent dependency information, but often yields the best parser after full fine-tuning. Without this outlier our approach identifies the best LM in 89% of cases. 2022.naacl-main.93 @@ -1314,7 +1314,7 @@ Quantifying Synthesis and Fusion and their Impact on Machine Translation - ArturoOncevay + ArturoOncevay DuyguAtaman NielsVan Berkel BarryHaddow @@ -1343,7 +1343,7 @@ Efficient Hierarchical Domain Adaptation for Pretrained Language Models AlexandraChronopoulou - MatthewPeters + MatthewPeters JesseDodge 1336-1351 The remarkable success of large language models has been driven by dense models trained on massive unlabeled, unstructured corpora. These corpora typically contain text from diverse, heterogeneous sources, but information about the source of the text is rarely used during training. Transferring their knowledge to a target domain is typically done by continuing training in-domain. In this paper, we introduce a method to permit domain adaptation to many diverse domains using a computationally efficient adapter approach. Our method is based on the observation that textual domains are partially overlapping, and we represent domains as a hierarchical tree structure where each node in the tree is associated with a set of adapter weights. When combined with a frozen pretrained language model, this approach enables parameter sharing among related domains, while avoiding negative interference between unrelated ones. Experimental results with GPT-2 and a large fraction of the 100 most represented websites in C4 show across-the-board improvements in-domain. We additionally provide an inference time algorithm for a held-out domain and show that averaging over multiple paths through the tree enables further gains in generalization, while adding only a marginal cost to inference. @@ -1358,7 +1358,7 @@ BertieVidgen PaulRottger TristanThrush - Scott A.Hale + Scott A.Hale 1352-1368 Detecting online hate is a complex task, and low-performing models have harmful consequences when used for sensitive applications such as content moderation. Emoji-based hate is an emerging challenge for automated detection. We present HatemojiCheck, a test suite of 3,930 short-form statements that allows us to evaluate performance on hateful language expressed with emoji. Using the test suite, we expose weaknesses in existing hate detection models. To address these weaknesses, we create the HatemojiBuild dataset using a human-and-model-in-the-loop approach. Models built with these 5,912 adversarial examples perform substantially better at detecting emoji-based hate, while retaining strong performance on text-only hate. Both HatemojiCheck and HatemojiBuild are made publicly available. 2022.naacl-main.97 @@ -1370,7 +1370,7 @@ On the Economics of Multilingual Few-shot Learning: Modeling the Cost-Performance Trade-offs of Machine Translated and Manual Data KabirAhuja MonojitChoudhury - SandipanDandapat + SandipanDandapat 1369-1384 Borrowing ideas from Production functions in micro-economics, in this paper we introduce a framework to systematically evaluate the performance and cost trade-offs between machine-translated and manually-created labelled data for task-specific fine-tuning of massively multilingual language models. We illustrate the effectiveness of our framework through a case-study on the TyDIQA-GoldP dataset. One of the interesting conclusion of the study is that if the cost of machine translation is greater than zero, the optimal performance at least cost is always achieved with at least some or only manually-created data. To our knowledge, this is the first attempt towards extending the concept of production functions to study data collection strategies for training multilingual models, and can serve as a valuable tool for other similar cost vs data trade-offs in NLP. 2022.naacl-main.98 @@ -1395,10 +1395,10 @@ PatrickFernandes AntónioFarinhas RicardoRei - José G.C. de Souza + José G.C. de Souza PerezOgayo GrahamNeubig - AndreMartins + AndreMartins 1396-1412 Despite the progress in machine translation quality estimation and evaluation in the last years, decoding in neural machine translation (NMT) is mostly oblivious to this and centers around finding the most probable translation according to the model (MAP decoding), approximated with beam search. In this paper, we bring together these two lines of research and propose quality-aware decoding for NMT, by leveraging recent breakthroughs in reference-free and reference-based MT evaluation through various inference methods like N-best reranking and minimum Bayes risk decoding. We perform an extensive comparison of various possible candidate generation and ranking methods across four datasets and two model classes and find that quality-aware decoding consistently outperforms MAP-based decoding according both to state-of-the-art automatic metrics (COMET and BLEURT) and to human assessments. 2022.naacl-main.100 @@ -1426,7 +1426,7 @@ LingkaiKong JieyuZhang RongzhiZhang - ChaoZhang + ChaoZhang 1422-1436 Although fine-tuning pre-trained language models (PLMs) renders strong performance in many NLP tasks, it relies on excessive labeled data. Recently, researchers have resorted to active fine-tuning for enhancing the label efficiency of PLM fine-tuning, but existing methods of this type usually ignore the potential of unlabeled data. We develop AcTune, a new framework that improves the label efficiency of active PLM fine-tuning by unleashing the power of unlabeled data via self-training. AcTune switches between data annotation and model self-training based on uncertainty: the unlabeled samples of high-uncertainty are selected for annotation, while the ones from low-uncertainty regions are used for model self-training. Additionally, we design (1) a region-aware sampling strategy to avoid redundant samples when querying annotations and (2) a momentum-based memory bank to dynamically aggregate the model’s pseudo labels to suppress label noise in self-training. Experiments on 6 text classification datasets show that AcTune outperforms the strongest active learning and self-training baselines and improves the label efficiency of PLM fine-tuning by 56.2% on average. Our implementation is available at https://github.com/yueyu1030/actune. 2022.naacl-main.102 @@ -1465,7 +1465,7 @@ Forecasting <fixed-case>COVID</fixed-case>-19 Caseloads Using Unsupervised Embedding Clusters of Social Media Posts FelixDrinkall StefanZohren - JanetPierrehumbert + JanetPierrehumbert 1471-1484 We present a novel approach incorporating transformer-based language models into infectious disease modelling. Text-derived features are quantified by tracking high-density clusters of sentence-level representations of Reddit posts within specific US states’ COVID-19 subreddits. We benchmark these clustered embedding features against features extracted from other high-quality datasets. In a threshold-classification task, we show that they outperform all other feature types at predicting upward trend signals, a significant result for infectious disease modelling in areas where epidemiological data is unreliable. Subsequently, in a time-series forecasting task, we fully utilise the predictive power of the caseload and compare the relative strengths of using different supplementary datasets as covariate feature sets in a transformer-based time-series model. 2022.naacl-main.105 @@ -1478,7 +1478,7 @@ RahulKumar SandeepMathias SriparnaSaha - PushpakBhattacharyya + PushpakBhattacharyya 1485-1495 Most research in the area of automatic essay grading (AEG) is geared towards scoring the essay holistically while there has also been little work done on scoring individual essay traits. In this paper, we describe a way to score essays using a multi-task learning (MTL) approach, where scoring the essay holistically is the primary task, and scoring the essay traits is the auxiliary task. We compare our results with a single-task learning (STL) approach, using both LSTMs and BiLSTMs. To find out which traits work best for different types of essays, we conduct ablation tests for each of the essay traits. We also report the runtime and number of training parameters for each system. We find that MTL-based BiLSTM system gives the best results for scoring the essay holistically, as well as performing well on scoring the essay traits. The MTL systems also give a speed-up of between 2.30 to 3.70 times the speed of the STL system, when it comes to scoring the essay and all the traits. 2022.naacl-main.106 @@ -1667,7 +1667,7 @@ JingZhao YouzhengWu XiaodongHe - TiejunZhao + TiejunZhao 1655-1666 Machine reading comprehension (MRC) that requires discrete reasoning involving symbolic operations, e.g., addition, sorting, and counting, is a challenging task. According to this nature, semantic parsing-based methods predict interpretable but complex logical forms. However, logical form generation is nontrivial and even a little perturbation in a logical form will lead to wrong answers. To alleviate this issue, multi-predictor -based methods are proposed to directly predict different types of answers and achieve improvements. However, they ignore the utilization of symbolic operations and encounter a lack of reasoning ability and interpretability. To inherit the advantages of these two types of methods, we propose OPERA, an operation-pivoted discrete reasoning framework, where lightweight symbolic operations (compared with logical forms) as neural modules are utilized to facilitate the reasoning ability and interpretability. Specifically, operations are first selected and then softly executed to simulate the answer reasoning procedure. Extensive experiments on both DROP and RACENum datasets show the reasoning ability of OPERA. Moreover, further analysis verifies its interpretability. 2022.naacl-main.119 @@ -1809,7 +1809,7 @@ Non-Autoregressive Machine Translation: It’s Not as Fast as it Seems - JindřichHelcl + JindřichHelcl BarryHaddow AlexandraBirch 1780-1790 @@ -1836,7 +1836,7 @@ Combining Humor and Sarcasm for Improving Political Parody Detection XiaoAo DanaeSanchez Villegas - DanielPreotiuc-Pietro + DanielPreotiuc-Pietro NikolaosAletras 1800-1807 Parody is a figurative device used for mimicking entities for comedic or critical purposes. Parody is intentionally humorous and often involves sarcasm. This paper explores jointly modelling these figurative tropes with the goal of improving performance of political parody detection in tweets. To this end, we present a multi-encoder model that combines three parallel encoders to enrich parody-specific representations with humor and sarcasm information. Experiments on a publicly available data set of political parody tweets demonstrate that our approach outperforms previous state-of-the-art methods. @@ -1903,7 +1903,7 @@ The Devil is in the Details: On the Pitfalls of Vocabulary Selection in Neural Machine Translation TobiasDomhan EvaHasler - KeTran + KeTran SonyTrenous BillByrne FelixHieber @@ -1936,7 +1936,7 @@ Kuan-HaoHuang ElizabethBoschee ScottMiller - PremNatarajan + PremNatarajan Kai-WeiChang NanyunPeng 1890-1908 @@ -2081,7 +2081,7 @@ <fixed-case>CS</fixed-case>1<fixed-case>QA</fixed-case>: A Dataset for Assisting Code-based Question Answering in an Introductory Programming Course ChangyoonLee YeonSeonwoo - AliceOh + AliceOh 2026-2040 We introduce CS1QA, a dataset for code-based question answering in the programming education domain. CS1QA consists of 9,237 question-answer pairs gathered from chat logs in an introductory programming class using Python, and 17,698 unannotated chat data with code. Each question is accompanied with the student’s code, and the portion of the code relevant to answering the question. We carefully design the annotation process to construct CS1QA, and analyze the collected dataset in detail. The tasks for CS1QA are to predict the question type, the relevant code snippet given the question and the code and retrieving an answer from the annotated corpus. Results for the experiments on several baseline models are reported and thoroughly analyzed. The tasks for CS1QA challenge models to understand both the code and natural language. This unique dataset can be used as a benchmark for source code comprehension and question answering in the educational setting. 2022.naacl-main.148 @@ -2095,7 +2095,7 @@ KemalKurniawan LeaFrermann PhilipSchulz - TrevorCohn + TrevorCohn 2041-2054 Providing technologies to communities or domains where training data is scarce or protected e.g., for privacy reasons, is becoming increasingly important. To that end, we generalise methods for unsupervised transfer from multiple input models for structured prediction. We show that the means of aggregating over the input models is critical, and that multiplying marginal probabilities of substructures to obtain high-probability structures for distant supervision is substantially better than taking the union of such structures over the input models, as done in prior work. Testing on 18 languages, we demonstrate that the method works in a cross-lingual setting, considering both dependency parsing and part-of-speech structured prediction problems. Our analyses show that the proposed method produces less noisy labels for the distant supervision. 2022.naacl-main.149 @@ -2150,7 +2150,7 @@ Reference-free Summarization Evaluation via Semantic Correlation and Compression Ratio YizhuLiu QiJia - KennyZhu + KennyZhu 2109-2115 A document can be summarized in a number of ways. Reference-based evaluation of summarization has been criticized for its inflexibility. The more sufficient the number of abstracts, the more accurate the evaluation results. However, it is difficult to collect sufficient reference summaries. In this paper, we propose a new automatic reference-free evaluation metric that compares semantic distribution between source document and summary by pretrained language models and considers summary compression ratio. The experiments show that this metric is more consistent with human evaluation in terms of coherence, consistency, relevance and fluency. 2022.naacl-main.153 @@ -2274,7 +2274,7 @@ Yu JinKim Beong-wooKwak YoungwookKim - Reinald KimAmplayo + Reinald KimAmplayo Seung-wonHwang JinyoungYeo 2244-2257 @@ -2424,7 +2424,7 @@ SaichethanReddy AnindyaDas SriparnaSaha - PushpakBhattacharyya + PushpakBhattacharyya 2436-2449 Mental Health Disorders continue plaguing humans worldwide. Aggravating this situation is the severe shortage of qualified and competent mental health professionals (MHPs), which underlines the need for developing Virtual Assistants (VAs) that can assist MHPs. The data+ML for automation can come from platforms that allow visiting and posting messages in peer-to-peer anonymous manner for sharing their experiences (frequently stigmatized) and seeking support. In this paper, we propose a VA that can act as the first point of contact and comfort for mental health patients. We curate a dataset, Motivational VA: MotiVAte comprising of 7k dyadic conversations collected from a peer-to-peer support platform. The system employs two mechanisms: (i) Mental Illness Classification: an attention based BERT classifier that outputs the mental disorder category out of the 4 categories, viz., Major Depressive Disorder (MDD), Anxiety, Obsessive Compulsive Disorder (OCD) and Post-traumatic Stress Disorder (PTSD), based on the input ongoing dialog between the support seeker and the VA; and (ii) Mental Illness Conditioned Motivational Dialogue Generation (MI-MDG): a sentiment driven Reinforcement Learning (RL) based motivational response generator. The empirical evaluation demonstrates the system capability by way of outperforming several baselines. 2022.naacl-main.174 @@ -2434,7 +2434,7 @@ <fixed-case>S</fixed-case>ue<fixed-case>N</fixed-case>es: A Weakly Supervised Approach to Evaluating Single-Document Summarization via Negative Sampling - ForrestBao + ForrestBao GeLuo HebiLi MinghuiQiu @@ -2493,11 +2493,11 @@ <fixed-case>A</fixed-case>nswer<fixed-case>S</fixed-case>umm: A Manually-Curated Dataset and Pipeline for Answer Summarization - AlexanderFabbri + AlexanderFabbri XiaojianWu SriniIyer HaoranLi - MonaDiab + MonaDiab 2508-2520 Community Question Answering (CQA) fora such as Stack Overflow and Yahoo! Answers contain a rich resource of answers to a wide range of community-based questions. Each question thread can receive a large number of answers with different perspectives. One goal of answer summarization is to produce a summary that reflects the range of answer perspectives. A major obstacle for this task is the absence of a dataset to provide supervision for producing such summaries. Recent works propose heuristics to create such data, but these are often noisy and do not cover all answer perspectives present. This work introduces a novel dataset of 4,631 CQA threads for answer summarization curated by professional linguists. Our pipeline gathers annotations for all subtasks of answer summarization, including relevant answer sentence selection, grouping these sentences based on perspectives, summarizing each perspective, and producing an overall summary. We analyze and benchmark state-of-the-art models on these subtasks and introduce a novel unsupervised approach for multi-perspective data augmentation that boosts summarization performance according to automatic evaluation. Finally, we propose reinforcement learning rewards to improve factual consistency and answer coverage and analyze areas for improvement. 2022.naacl-main.180 @@ -2584,7 +2584,7 @@ <fixed-case>QAF</fixed-case>act<fixed-case>E</fixed-case>val: Improved <fixed-case>QA</fixed-case>-Based Factual Consistency Evaluation for Summarization - AlexanderFabbri + AlexanderFabbri Chien-ShengWu WenhaoLiu CaimingXiong @@ -2649,7 +2649,7 @@ Necessity and Sufficiency for Explaining Text Classifiers: A Case Study in Hate Speech Detection EsmaBalkir IsarNejadgholi - KathleenFraser + KathleenFraser SvetlanaKiritchenko 2672-2686 We present a novel feature attribution method for explaining text classifiers, and analyze it in the context of hate speech detection. Although feature attribution models usually provide a single importance score for each token, we instead provide two complementary and theoretically-grounded scores – necessity and sufficiency – resulting in more informative explanations. We propose a transparent method that calculates these values by generating explicit perturbations of the input text, allowing the importance scores themselves to be explainable. We employ our method to explain the predictions of different hate speech detection models on the same set of curated examples from a test suite, and show that different values of necessity and sufficiency for identity terms correspond to different kinds of false positive errors, exposing sources of classifier bias against marginalized groups. @@ -2676,12 +2676,12 @@ <fixed-case>R</fixed-case>e2<fixed-case>G</fixed-case>: Retrieve, Rerank, Generate - MichaelGlass + MichaelGlass GaetanoRossiello Md Faisal MahbubChowdhury AnkitaNaik PengshanCai - AlfioGliozzo + AlfioGliozzo 2701-2715 As demonstrated by GPT-3 and T5, transformers grow in capability as parameter spaces become larger and larger. However, for tasks that require a large amount of knowledge, non-parametric memory allows models to grow dramatically with a sub-linear increase in computational cost and GPU memory requirements. Recent models such as RAG and REALM have introduced retrieval into conditional generation. These models incorporate neural initial retrieval from a corpus of passages. We build on this line of research, proposing Re2G, which combines both neural initial retrieval and reranking into a BART-based sequence-to-sequence generation. Our reranking approach also permits merging retrieval results from sources with incomparable scores, enabling an ensemble of BM25 and neural initial retrieval. To train our system end-to-end, we introduce a novel variation of knowledge distillation to train the initial retrieval, reranker and generation using only ground truth on the target sequence output. We find large gains in four diverse tasks: zero-shot slot filling, question answering, fact checking and dialog, with relative gains of 9% to 34% over the previous state-of-the-art on the KILT leaderboard. We make our code available as open source. 2022.naacl-main.194 @@ -2723,7 +2723,7 @@ MasahiroKaneko AizhanImankulova DanushkaBollegala - NaoakiOkazaki + NaoakiOkazaki 2740-2750 Masked Language Models (MLMs) pre-trained by predicting masked tokens on large corpora have been used successfully in natural language processing tasks for a variety of languages. Unfortunately, it was reported that MLMs also learn discriminative biases regarding attributes such as gender and race. Because most studies have focused on MLMs in English, the bias of MLMs in other languages has rarely been investigated. Manual annotation of evaluation data for languages other than English has been challenging due to the cost and difficulty in recruiting annotators. Moreover, the existing bias evaluation methods require the stereotypical sentence pairs consisting of the same context with attribute words (e.g. He/She is a nurse).We propose Multilingual Bias Evaluation (MBE) score, to evaluate bias in various languages using only English attribute word lists and parallel corpora between the target language and English without requiring manually annotated data. We evaluated MLMs in eight languages using the MBE and confirmed that gender-related biases are encoded in MLMs for all those languages. We manually created datasets for gender bias in Japanese and Russian to evaluate the validity of the MBE.The results show that the bias scores reported by the MBE significantly correlates with that computed from the above manually created datasets and the existing English datasets for gender bias. 2022.naacl-main.197 @@ -2748,7 +2748,7 @@ Falsesum: Generating Document-level <fixed-case>NLI</fixed-case> Examples for Recognizing Factual Inconsistency in Summarization PrasetyaUtama JoshuaBambrick - NafiseMoosavi + NafiseMoosavi IrynaGurevych 2763-2776 Neural abstractive summarization models are prone to generate summaries that are factually inconsistent with their source documents. Previous work has introduced the task of recognizing such factual inconsistency as a downstream application of natural language inference (NLI). However, state-of-the-art NLI models perform poorly in this context due to their inability to generalize to the target task. In this work, we show that NLI models can be effective for this task when the training data is augmented with high-quality task-oriented examples. We introduce Falsesum, a data generation pipeline leveraging a controllable text generation model to perturb human-annotated summaries, introducing varying types of factual inconsistencies. Unlike previously introduced document-level NLI datasets, our generated dataset contains examples that are diverse and inconsistent yet plausible. We show that models trained on a Falsesum-augmented NLI dataset improve the state-of-the-art performance across four benchmarks for detecting factual inconsistency in summarization. @@ -2762,7 +2762,7 @@ BesnikFetahu AnjieFang OlegRokhlenko - ShervinMalmasi + ShervinMalmasi 2777-2790 Named entity recognition (NER) in a real-world setting remains challenging and is impacted by factors like text genre, corpus quality, and data availability. NER models trained on CoNLL do not transfer well to other domains, even within the same language. This is especially the case for multi-lingual models when applied to low-resource languages, and is mainly due to missing entity information. We propose an approach that with limited effort and data, addresses the NER knowledge gap across languages and domains. Our novel approach uses a token-level gating layer to augment pre-trained multilingual transformers with gazetteers containing named entities (NE) from a target language or domain. This approach provides the flexibility to jointly integrate both textual and gazetteer information dynamically: entity knowledge from gazetteers is used only when a token’s textual representation is insufficient for the NER task. Evaluation on several languages and domains demonstrates: (i) a high mismatch of reported NER performance on CoNLL vs. domain specific datasets, (ii) gazetteers significantly improve NER performance across languages and domains, and (iii) gazetteers can be flexibly incorporated to guide knowledge transfer. On cross-lingual transfer we achieve an improvement over the baseline with F1=+17.6%, and with F1=+21.3% for cross-domain transfer. 2022.naacl-main.200 @@ -2774,7 +2774,7 @@ <fixed-case>M</fixed-case>eta<fixed-case>ICL</fixed-case>: Learning to Learn In Context SewonMin MikeLewis - LukeZettlemoyer + LukeZettlemoyer HannanehHajishirzi 2791-2809 We introduce MetaICL (Meta-training for In-Context Learning), a new meta-training framework for few-shot learning where a pretrained language model is tuned to do in-context learning on a large set of training tasks. This meta-training enables the model to more effectively learn a new task in context at test time, by simply conditioning on a few training examples with no parameter updates or task-specific templates. We experiment on a large, diverse collection of tasks consisting of 142 NLP datasets including classification, question answering, natural language inference, paraphrase detection and more, across seven different meta-training/target splits. MetaICL outperforms a range of baselines including in-context learning without meta-training and multi-task learning followed by zero-shot transfer. We find that the gains are particularly significant for target tasks that have domain shifts from the meta-training tasks, and that using a diverse set of the meta-training tasks is key to improvements. We also show that MetaICL approaches (and sometimes beats) the performance of models fully finetuned on the target task training data, and outperforms much bigger models with nearly 8x parameters. @@ -2791,7 +2791,7 @@ MohitBansal HengJi YangLiu - DilekHakkani-Tur + DilekHakkani-Tur 2810-2823 Providing conversation models with background knowledge has been shown to make open-domain dialogues more informative and engaging. Existing models treat knowledge selection as a sentence ranking or classification problem where each sentence is handled individually, ignoring the internal semantic connection between sentences. In this work, we propose to automatically convert the background knowledge documents into document semantic graphs and then perform knowledge selection over such graphs. Our document semantic graphs preserve sentence-level information through the use of sentence nodes and provide concept connections between sentences. We apply multi-task learning to perform sentence-level knowledge selection and concept-level knowledge selection, showing that it improves sentence-level selection. Our experiments show that our semantic graph-based knowledge selection improves over sentence selection baselines for both the knowledge selection task and the end-to-end response generation task on HollE and improves generalization on unseen topics in WoW. 2022.naacl-main.202 @@ -2883,7 +2883,7 @@ Learning to Borrow– Relation Representation for Without-Mention Entity-Pairs for Knowledge Graph Completion HudaHakami MonaHakami - AngroshMandya + AngroshMandya DanushkaBollegala 2887-2898 Prior work on integrating text corpora with knowledge graphs (KGs) to improve Knowledge Graph Embedding (KGE) have obtained good performance for entities that co-occur in sentences in text corpora. Such sentences (textual mentions of entity-pairs) are represented as Lexicalised Dependency Paths (LDPs) between two entities. However, it is not possible to represent relations between entities that do not co-occur in a single sentence using LDPs. In this paper, we propose and evaluate several methods to address this problem, where we borrow LDPs from the entity pairs that co-occur in sentences in the corpus (i.e. with mentions entity pairs) to represent entity pairs that do not co-occur in any sentence in the corpus (i.e. without mention entity pairs). We propose a supervised borrowing method, SuperBorrow, that learns to score the suitability of an LDP to represent a without-mentions entity pair using pre-trained entity embeddings and contextualised LDP representations. Experimental results show that SuperBorrow improves the link prediction performance of multiple widely-used prior KGE methods such as TransE, DistMult, ComplEx and RotatE. @@ -2921,7 +2921,7 @@ WangXu KehaiChen LiliMou - TiejunZhao + TiejunZhao 2920-2929 Document-level relation extraction (DocRE) aims to determine the relation between two entities from a document of multiple sentences. Recent studies typically represent the entire document by sequence- or graph-based models to predict the relations of all entity pairs. However, we find that such a model is not robust and exhibits bizarre behaviors: it predicts correctly when an entire test document is fed as input, but errs when non-evidence sentences are removed. To this end, we propose a Sentence Importance Estimation and Focusing (SIEF) framework for DocRE, where we design a sentence importance score and a sentence focusing loss, encouraging DocRE models to focus on evidence sentences. Experimental results on two domains show that our SIEF not only improves overall performance, but also makes DocRE models more robust. Moreover, SIEF is a general framework, shown to be effective when combined with a variety of base DocRE models. 2022.naacl-main.212 @@ -2933,7 +2933,7 @@ Are All the Datasets in Benchmark Necessary? A Pilot Study of Dataset Evaluation for Text Classification YangXiao JinlanFu - See-KiongNg + See-KiongNg PengfeiLiu 2930-2941 In this paper, we ask the research question of whether all the datasets in the benchmark are necessary. We approach this by first characterizing the distinguishability of datasets when comparing different systems. Experiments on 9 datasets and 36 systems show that several existing benchmark datasets contribute little to discriminating top-scoring systems, while those less used datasets exhibit impressive discriminative power. We further, taking the text classification task as a case study, investigate the possibility of predicting dataset discrimination based on its properties (e.g., average sentence length). Our preliminary experiments promisingly show that given a sufficient number of training experimental records, a meaningful predictor can be learned to estimate dataset discrimination over unseen datasets. We released all datasets with features explored in this work on DataLab. @@ -3060,8 +3060,8 @@ A Few Thousand Translations Go a Long Way! Leveraging Pre-trained Models for <fixed-case>A</fixed-case>frican News Translation - David IfeoluwaAdelani - Jesujoba OluwadaraAlabi + David IfeoluwaAdelani + Jesujoba OluwadaraAlabi AngelaFan JuliaKreutzer XiaoyuShen @@ -3324,7 +3324,7 @@ IkuyaYamada KokiWashio HiroyukiShindo - YujiMatsumoto + YujiMatsumoto 3264-3271 We propose a global entity disambiguation (ED) model based on BERT. To capture global contextual information for ED, our model treats not only words but also entities as input tokens, and solves the task by sequentially resolving mentions to their referent entities and using resolved entities as inputs at each step. We train the model using a large entity-annotated corpus obtained from Wikipedia. We achieve new state-of-the-art results on five standard ED datasets: AIDA-CoNLL, MSNBC, AQUAINT, ACE2004, and WNED-WIKI. The source code and model checkpoint are available at https://github.com/studio-ousia/luke. 2022.naacl-main.238 @@ -3357,7 +3357,7 @@ XinyuZhang HaoJiang ZhaoCao - XuanjingHuang + XuanjingHuang XipengQiu 3288-3303 Supersized pre-trained language models have pushed the accuracy of various natural language processing (NLP) tasks to a new state-of-the-art (SOTA). Rather than pursuing the reachless SOTA accuracy, more and more researchers start paying attention to model efficiency and usability. Different from accuracy, the metric for efficiency varies across different studies, making them hard to be fairly compared. To that end, this work presents ELUE (Efficient Language Understanding Evaluation), a standard evaluation, and a public leaderboard for efficient NLP models. ELUE is dedicated to depicting the Pareto Frontier for various language understanding tasks, such that it can tell whether and how much a method achieves Pareto improvement. Along with the benchmark, we also release a strong baseline, ElasticBERT, which allows BERT to exit at any layer in both static and dynamic ways. We demonstrate the ElasticBERT, despite its simplicity, outperforms or performs on par with SOTA compressed and early exiting models. With ElasticBERT, the proposed ELUE has a strong Pareto Frontier and makes a better evaluation for efficient NLP models. @@ -3449,7 +3449,7 @@ GuanYuWu AiweiLiu LijieWen - PhilipYu + PhilipYu 3362-3376 The explosion of misinformation spreading in the media ecosystem urges for automated fact-checking. While misinformation spans both geographic and linguistic boundaries, most work in the field has focused on English. Datasets and tools available in other languages, such as Chinese, are limited. In order to bridge this gap, we construct CHEF, the first CHinese Evidence-based Fact-checking dataset of 10K real-world claims. The dataset covers multiple domains, ranging from politics to public health, and provides annotated evidence retrieved from the Internet. Further, we develop established baselines and a novel approach that is able to model the evidence retrieval as a latent variable, allowing jointly training with the veracity prediction model in an end-to-end fashion. Extensive experiments show that CHEF will provide a challenging testbed for the development of fact-checking systems designed to retrieve and reason over non-English claims. 2022.naacl-main.246 @@ -3460,7 +3460,7 @@ <fixed-case>VGNMN</fixed-case>: Video-grounded Neural Module Networks for Video-Grounded Dialogue Systems HungLe - NancyChen + NancyChen StevenHoi 3377-3393 Neural module networks (NMN) have achieved success in image-grounded tasks such as Visual Question Answering (VQA) on synthetic images. However, very limited work on NMN has been studied in the video-grounded dialogue tasks. These tasks extend the complexity of traditional visual tasks with the additional visual temporal variance and language cross-turn dependencies. Motivated by recent NMN approaches on image-grounded tasks, we introduce Video-grounded Neural Module Network (VGNMN) to model the information retrieval process in video-grounded language tasks as a pipeline of neural modules. VGNMN first decomposes all language components in dialogues to explicitly resolve any entity references and detect corresponding action-based inputs from the question. The detected entities and actions are used as parameters to instantiate neural module networks and extract visual cues from the video. Our experiments show that VGNMN can achieve promising performance on a challenging video-grounded dialogue benchmark as well as a video QA benchmark. @@ -3472,7 +3472,7 @@ Multimodal Dialogue State Tracking HungLe - NancyChen + NancyChen StevenHoi 3394-3415 Designed for tracking user goals in dialogues, a dialogue state tracker is an essential component in a dialogue system. However, the research of dialogue state tracking has largely been limited to unimodality, in which slots and slot values are limited by knowledge domains (e.g. restaurant domain with slots of restaurant name and price range) and are defined by specific database schema. In this paper, we propose to extend the definition of dialogue state tracking to multimodality. Specifically, we introduce a novel dialogue state tracking task to track the information of visual objects that are mentioned in video-grounded dialogues. Each new dialogue utterance may introduce a new video segment, new visual objects, or new object attributes and a state tracker is required to update these information slots accordingly. We created a new synthetic benchmark and designed a novel baseline, Video-Dialogue Transformer Network (VDTN), for this task. VDTN combines both object-level features and segment-level features and learns contextual dependencies between videos and dialogues to generate multimodal dialogue states. We optimized VDTN for a state generation task as well as a self-supervised video understanding task which recovers video segment or object representations. Finally, we trained VDTN to use the decoded states in a response prediction task. Together with comprehensive ablation and qualitative analysis, we discovered interesting insights towards building more capable multimodal dialogue systems. @@ -3551,7 +3551,7 @@ JacobMorrison RonanLe Bras YejinChoi - Noah A.Smith + Noah A.Smith 3464-3478 We establish THumB, a rubric-based human evaluation protocol for image captioning models. Our scoring rubrics and their definitions are carefully developed based on machine- and human-generated captions on the MSCOCO dataset. Each caption is evaluated along two main dimensions in a tradeoff (precision and recall) as well as other aspects that measure the text quality (fluency, conciseness, and inclusive language). Our evaluations demonstrate several critical problems of the current evaluation practice. Human-generated captions show substantially higher quality than machine-generated ones, especially in coverage of salient information (i.e., recall), while most automatic metrics say the opposite. Our rubric-based results reveal that CLIPScore, a recent metric that uses image features, better correlates with human judgments than conventional text-only metrics because it is more sensitive to recall. We hope that this work will promote a more transparent evaluation protocol for image captioning and its automatic metrics. 2022.naacl-main.254 @@ -3583,9 +3583,9 @@ TimO’Gorman Young-SukLee JeffreyFlanigan - RamónAstudillo - RaduFlorian - SalimRoukos + RamónAstudillo + RaduFlorian + SalimRoukos NathanSchneider 3496-3505 Despite extensive research on parsing of English sentences into Abstract Meaning Representation (AMR) graphs, which are compared to gold graphs via the Smatch metric, full-document parsing into a unified graph representation lacks well-defined representation and evaluation. Taking advantage of a super-sentential level of coreference annotation from previous work, we introduce a simple algorithm for deriving a unified graph representation, avoiding the pitfalls of information loss from over-merging and lack of coherence from under merging. Next, we describe improvements to the Smatch metric to make it tractable for comparing document-level graphs and use it to re-evaluate the best published document-level AMR parser. We also present a pipeline approach combining the top-performing AMR parser and coreference resolution systems, providing a strong baseline for future research. @@ -3598,9 +3598,9 @@ Learning to Transfer Prompts for Text Generation JunyiLi TianyiTang - Jian-YunNie + Jian-YunNie Ji-RongWen - XinZhao + XinZhao 3506-3518 Pretrained language models (PLMs) have made remarkable progress in text generation tasks via fine-tuning. While, it is challenging to fine-tune PLMs in a data-scarce situation. Therefore, it is non-trivial to develop a general and lightweight model that can adapt to various text generation tasks based on PLMs. To fulfill this purpose, the recent prompt-based learning offers a potential solution. In this paper, we improve this technique and propose a novel prompt-based method (PTG) for text generation in a transferable setting. First, PTG learns a set of source prompts for various source generation tasks and then transfers these prompts as target prompts to perform target generation tasks. To consider both task- and instance-level information, we design an adaptive attention mechanism to derive the target prompts. For each data instance, PTG learns a specific target prompt by attending to highly relevant source prompts. In extensive experiments, PTG yields competitive or better results than fine-tuning methods. We release our source prompts as an open resource, where users can add or reuse them to improve new text generation tasks for future research. Code and data can be available at https://github.com/RUCAIBox/Transfer-Prompts-for-Text-Generation. 2022.naacl-main.257 @@ -3617,7 +3617,7 @@ ZhuohaoYu ZhipengChen JingyuanWang - XinZhao + XinZhao Ji-RongWen 3519-3539 Nowadays, pretrained language models (PLMs) have dominated the majority of NLP tasks. While, little research has been conducted on systematically evaluating the language abilities of PLMs. In this paper, we present a large-scale empirical study on general language ability evaluation of PLMs (ElitePLM). In our study, we design four evaluation dimensions, memory, comprehension, reasoning, and composition, to measure ten widely-used PLMs within five categories. Our empirical results demonstrate that: (1) PLMs with varying training objectives and strategies are good at different ability tests; (2) fine-tuning PLMs in downstream tasks is usually sensitive to the data size and distribution; (3) PLMs have excellent transferability between similar tasks. Moreover, the prediction results of PLMs in our experiments are released as an open resource for more deep and detailed analysis on the language abilities of PLMs. This paper can guide the future work to select, apply, and design PLMs for specific tasks. We have made all the details of experiments publicly available at https://github.com/RUCAIBox/ElitePLM. @@ -3633,9 +3633,9 @@ RonanLe Bras LaviniaDunagan JacobMorrison - AlexanderFabbri + AlexanderFabbri YejinChoi - Noah A.Smith + Noah A.Smith 3540-3557 Natural language processing researchers have identified limitations of evaluation methodology for generation tasks, with new questions raised about the validity of automatic metrics and of crowdworker judgments. Meanwhile, efforts to improve generation models tend to depend on simple n-gram overlap metrics (e.g., BLEU, ROUGE). We argue that new advances on models and metrics should each more directly benefit and inform the other. We therefore propose a generalization of leaderboards, bidimensional leaderboards (Billboards), that simultaneously tracks progress in language generation models and metrics for their evaluation. Unlike conventional unidimensional leaderboards that sort submitted systems by predetermined metrics, a Billboard accepts both generators and evaluation metrics as competing entries. A Billboard automatically creates an ensemble metric that selects and linearly combines a few metrics based on a global analysis across generators. Further, metrics are ranked based on their correlation with human judgments. We release four Billboards for machine translation, summarization, and image captioning. We demonstrate that a linear ensemble of a few diverse metrics sometimes substantially outperforms existing metrics in isolation. Our mixed-effects model analysis shows that most automatic metrics, especially the reference-based ones, overrate machine over human generation, demonstrating the importance of updating metrics as generation models become stronger (and perhaps more similar to humans) in the future. 2022.naacl-main.259 @@ -3687,7 +3687,7 @@ Benchmarking Intersectional Biases in <fixed-case>NLP</fixed-case> - JohnLalor + JohnLalor YiYang KendallSmith NicoleForsgren @@ -3703,7 +3703,7 @@ When is <fixed-case>BERT</fixed-case> Multilingual? Isolating Crucial Ingredients for Cross-lingual Transfer AmeetDeshpande - ParthaTalukdar + ParthaTalukdar KarthikNarasimhan 3610-3623 While recent work on multilingual language models has demonstrated their capacity for cross-lingual zero-shot transfer on downstream tasks, there is a lack of consensus in the community as to what shared properties between languages enable such transfer. Analyses involving pairs of natural languages are often inconclusive and contradictory since languages simultaneously differ in many linguistic aspects. In this paper, we perform a large-scale empirical study to isolate the effects of various linguistic properties by measuring zero-shot transfer between four diverse natural languages and their counterparts constructed by modifying aspects such as the script, word order, and syntax. Among other things, our experiments show that the absence of sub-word overlap significantly affects zero-shot transfer when languages differ in their word order, and there is a strong correlation between transfer performance and word embedding alignment between languages (e.g., \rho_s=0.94 on the task of NLI). Our results call for focus in multilingual models on explicitly improving word embedding alignment between languages rather than relying on its implicit emergence. @@ -3716,7 +3716,7 @@ How Conservative are Language Models? Adapting to the Introduction of Gender-Neutral Pronouns StephanieBrandl RuixiangCui - AndersSøgaard + AndersSøgaard 3624-3630 Gender-neutral pronouns have recently been introduced in many languages to a) include non-binary people and b) as a generic singular. Recent results from psycholinguistics suggest that gender-neutral pronouns (in Swedish) are not associated with human processing difficulties. This, we show, is in sharp contrast with automated processing. We show that gender-neutral pronouns in Danish, English, and Swedish are associated with higher perplexity, more dispersed attention patterns, and worse downstream performance. We argue that such conservativity in language models may limit widespread adoption of gender-neutral pronouns and must therefore be resolved. 2022.naacl-main.265 @@ -3759,7 +3759,7 @@ Learning the Ordering of Coordinate Compounds and Elaborate Expressions in <fixed-case>H</fixed-case>mong, <fixed-case>L</fixed-case>ahu, and <fixed-case>C</fixed-case>hinese ChenxuanCui Katherine J.Zhang - DavidMortensen + DavidMortensen 3656-3669 Coordinate compounds (CCs) and elaborate expressions (EEs) are coordinate constructions common in languages of East and Southeast Asia. Mortensen (2006) claims that (1) the linear ordering of EEs and CCs in Hmong, Lahu, and Chinese can be predicted via phonological hierarchies and (2) that these phonological hierarchies lack a clear phonetic rationale. These claims are significant because morphosyntax has often been seen as in a feed-forward relationship with phonology, and phonological generalizations have often been assumed to be phonetically “natural”. We investigate whether the ordering of CCs and EEs can be learned empirically and whether computational models (classifiers and sequence-labeling models) learn unnatural hierarchies similar to those posited by Mortensen (2006). We find that decision trees and SVMs learn to predict the order of CCs/EEs on the basis of phonology, beating strong baselines for all three languages, with DTs learning hierarchies strikingly similar to those proposed by Mortensen. However, we also find that a neural sequence labeling model is able to learn the ordering of elaborate expressions in Hmong very effectively without using any phonological information. We argue that EE ordering can be learned through two independent routes: phonology and lexical distribution, presenting a more nuanced picture than previous work. 2022.naacl-main.268 @@ -3786,7 +3786,7 @@ Chia-ChienHung AnneLauscher IvanVulić - SimonePonzetto + SimonePonzetto GoranGlavaš 3687-3703 Research on (multi-domain) task-oriented dialog (TOD) has predominantly focused on the English language, primarily due to the shortage of robust TOD datasets in other languages, preventing the systematic investigation of cross-lingual transfer for this crucial NLP application area. In this work, we introduce Multi2WOZ, a new multilingual multi-domain TOD dataset, derived from the well-established English dataset MultiWOZ, that spans four typologically diverse languages: Chinese, German, Arabic, and Russian. In contrast to concurrent efforts, Multi2WOZ contains gold-standard dialogs in target languages that are directly comparable with development and test portions of the English dataset, enabling reliable and comparative estimates of cross-lingual transfer performance for TOD. We then introduce a new framework for multilingual conversational specialization of pretrained language models (PrLMs) that aims to facilitate cross-lingual transfer for arbitrary downstream TOD tasks. Using such conversational PrLMs specialized for concrete target languages, we systematically benchmark a number of zero-shot and few-shot cross-lingual transfer approaches on two standard TOD tasks: Dialog State Tracking and Response Retrieval. Our experiments show that, in most setups, the best performance entails the combination of (i) conversational specialization in the target language and (ii) few-shot transfer for the concrete TOD task. Most importantly, we show that our conversational specialization in the target language allows for an exceptionally sample-efficient few-shot transfer for downstream TOD tasks. @@ -3834,7 +3834,7 @@ Adaptable Adapters - NafiseMoosavi + NafiseMoosavi QuentinDelfosse KristianKersting IrynaGurevych @@ -3908,8 +3908,8 @@ TingtingMa QianhuiWu ZhiweiYu - TiejunZhao - Chin-YewLin + TiejunZhao + Chin-YewLin 3806-3818 Recent studies on few-shot intent detection have attempted to formulate the task as a meta-learning problem, where a meta-learning model is trained with a certain capability to quickly adapt to newly specified few-shot tasks with potentially unseen intent categories. Prototypical networks have been commonly used in this setting, with the hope that good prototypical representations could be learned to capture the semantic similarity between the query and a few labeled instances. This intuition naturally leaves a question of whether or not a good sentence representation scheme could suffice for the task without further domain-specific adaptation. In this paper, we conduct empirical studies on a number of general-purpose sentence embedding schemes, showing that good sentence embeddings without any fine-tuning on intent detection data could produce a non-trivially strong performance. Inspired by the results from our qualitative analysis, we propose a frustratingly easy modification, which leads to consistent improvements over all sentence encoding schemes, including those from the state-of-the-art prototypical network variants with task-specific fine-tuning. 2022.naacl-main.279 @@ -4107,7 +4107,7 @@ Towards Debiasing Translation Artifacts - KoelDutta Chowdhury + KoelDutta Chowdhury RrichaJalota CristinaEspaña-Bonet JosefGenabith @@ -4146,7 +4146,7 @@ ChaitanyaAgarwal VivekGupta AnoopKunchukuttan - ManishShrivastava + ManishShrivastava 4018-4037 Existing research on Tabular Natural Language Inference (TNLI) exclusively examines the task in a monolingual setting where the tabular premise and hypothesis are in the same language. However, due to the uneven distribution of text resources on the web across languages, it is common to have the tabular premise in a high resource language and the hypothesis in a low resource language. As a result, we present the challenging task of bilingual Tabular Natural Language Inference (bTNLI), in which the tabular premise and a hypothesis over it are in two separate languages. We construct EI-InfoTabS: an English-Indic bTNLI dataset by translating the textual hypotheses of the English TNLI dataset InfoTabS into eleven major Indian languages. We thoroughly investigate how pre-trained multilingual models learn and perform on EI-InfoTabS. Our study shows that the performance on bTNLI can be close to its monolingual counterpart, with translate-train, translate-test and unified-train being strongly competitive baselines. 2022.naacl-main.295 @@ -4157,7 +4157,7 @@ Generative Biomedical Entity Linking via Knowledge Base-Guided Pre-training and Synonyms-Aware Fine-tuning HongyiYuan - ZhengYuan + ZhengYuan ShengYu 4038-4048 Entities lie in the heart of biomedical natural language understanding, and the biomedical entity linking (EL) task remains challenging due to the fine-grained and diversiform concept names. Generative methods achieve remarkable performances in general domain EL with less memory usage while requiring expensive pre-training. Previous biomedical EL methods leverage synonyms from knowledge bases (KB) which is not trivial to inject into a generative method. In this work, we use a generative approach to model biomedical EL and propose to inject synonyms knowledge in it. We propose KB-guided pre-training by constructing synthetic samples with synonyms and definitions from KB and require the model to recover concept names. We also propose synonyms-aware fine-tuning to select concept names for training, and propose decoder prompt and multi-synonyms constrained prefix tree for inference. Our method achieves state-of-the-art results on several biomedical EL tasks without candidate selection which displays the effectiveness of proposed pre-training and fine-tuning strategies. The source code is available at https://github.com/Yuanhy1997/GenBioEL. @@ -4188,7 +4188,7 @@ RamyEskander CassLowry SujayKhandagale - JudithKlavans + JudithKlavans MariaPolinsky SmarandaMuresan 4061-4072 @@ -4202,8 +4202,8 @@ Optimising Equal Opportunity Fairness in Model Training AiliShen XudongHan - TrevorCohn - TimothyBaldwin + TrevorCohn + TimothyBaldwin LeaFrermann 4073-4084 Real-world datasets often encode stereotypes and societal biases. Such biases can be implicitly captured by trained models, leading to biased predictions and exacerbating existing societal preconceptions. Existing debiasing methods, such as adversarial training and removing protected information from representations, have been shown to reduce bias. However, a disconnect between fairness criteria and training objectives makes it difficult to reason theoretically about the effectiveness of different techniques. In this work, we propose two novel training objectives which directly optimise for the widely-used criterion of equal opportunity, and show that they are effective in reducing bias while maintaining high performance over two classification tasks. @@ -4215,7 +4215,7 @@ Leaner and Faster: Two-Stage Model Compression for Lightweight Text-Image Retrieval SiyuRen - KennyZhu + KennyZhu 4085-4090 Current text-image approaches (e.g., CLIP) typically adopt dual-encoder architecture using pre-trained vision-language representation. However, these models still pose non-trivial memory requirements and substantial incremental indexing time, which makes them less practical on mobile devices. In this paper, we present an effective two-stage framework to compress large pre-trained dual-encoder for lightweight text-image retrieval. The resulting model is smaller (39% of the original), faster (1.6x/2.9x for processing image/text respectively), yet performs on par with or better than the original full model on Flickr30K and MSCOCO benchmarks. We also open-source an accompanying realistic mobile image search application. 2022.naacl-main.300 @@ -4230,7 +4230,7 @@ DongyuanLi HidetakaKamigaito KotaroFunakoshi - ManabuOkumura + ManabuOkumura 4091-4104 Previous studies on the timeline summarization (TLS) task ignored the information interaction between sentences and dates, and adopted pre-defined unlearnable representations for them. They also considered date selection and event detection as two independent tasks, which makes it impossible to integrate their advantages and obtain a globally optimal summary. In this paper, we present a joint learning-based heterogeneous graph attention network for TLS (HeterTls), in which date selection and event detection are combined into a unified framework to improve the extraction accuracy and remove redundant sentences simultaneously. Our heterogeneous graph involves multiple types of nodes, the representations of which are iteratively learned across the heterogeneous graph attention layer. We evaluated our model on four datasets, and found that it significantly outperformed the current state-of-the-art baselines with regard to ROUGE scores and date selection metrics. 2022.naacl-main.301 @@ -4340,9 +4340,9 @@ Improving negation detection with negation-focused pre-training ThinhTruong - TimothyBaldwin - TrevorCohn - KarinVerspoor + TimothyBaldwin + TrevorCohn + KarinVerspoor 4188-4193 Negation is a common linguistic feature that is crucial in many language understanding tasks, yet it remains a hard problem due to diversity in its expression in different types of text. Recent works show that state-of-the-art NLP models underperform on samples containing negation in various tasks, and that negation detection models do not transfer well across domains. We propose a new negation-focused pre-training strategy, involving targeted data augmentation and negation masking, to better incorporate negation information into language models. Extensive experiments on common benchmarks show that our proposed approach improves negation detection performance and generalizability over the strong baseline NegBERT (Khandelwal and Sawant, 2020). 2022.naacl-main.309 @@ -4368,13 +4368,13 @@ Yung-SungChuang RumenDangovski HongyinLuo - YangZhang + YangZhang ShiyuChang MarinSoljacic Shang-WenLi ScottYih YoonKim - JamesGlass + JamesGlass 4207-4218 We propose DiffCSE, an unsupervised contrastive learning framework for learning sentence embeddings. DiffCSE learns sentence embeddings that are sensitive to the difference between the original sentence and an edited sentence, where the edited sentence is obtained by stochastically masking out the original sentence and then sampling from a masked language model. We show that DiffSCE is an instance of equivariant contrastive learning, which generalizes contrastive learning and learns representations that are insensitive to certain types of augmentations and sensitive to other “harmful” types of augmentations. Our experiments show that DiffCSE achieves state-of-the-art results among unsupervised sentence representation learning methods, outperforming unsupervised SimCSE by 2.3 absolute points on semantic textual similarity tasks. 2022.naacl-main.311 @@ -4442,7 +4442,7 @@ FandongMeng XueZhang YufengChen - JinanXu + JinanXu JieZhou 4256-4266 Generating adversarial examples for Neural Machine Translation (NMT) with single Round-Trip Translation (RTT) has achieved promising results by releasing the meaning-preserving restriction. However, a potential pitfall for this approach is that we cannot decide whether the generated examples are adversarial to the target NMT model or the auxiliary backward one, as the reconstruction error through the RTT can be related to either. To remedy this problem, we propose a new definition for NMT adversarial examples based on the Doubly Round-Trip Translation (DRTT). Specifically, apart from the source-target-source RTT, we also consider the target-source-target one, which is utilized to pick out the authentic adversarial examples for the target NMT model. Additionally, to enhance the robustness of the NMT model, we introduce the masked language models to construct bilingual adversarial pairs based on DRTT, which are used to train the NMT model directly. Extensive experiments on both the clean and noisy test sets (including the artificial and natural noise) show that our approach substantially improves the robustness of NMT models. @@ -4475,7 +4475,7 @@ HansonLu ThomasIcard ChristopherPotts - NoahGoodman + NoahGoodman 4288-4295 Distillation efforts have led to language models that are more compact and efficient without serious drops in performance. The standard approach to distillation trains a student model against two objectives: a task-specific objective (e.g., language modeling) and an imitation objective that encourages the hidden states of the student model to be similar to those of the larger teacher model. In this paper, we show that it is beneficial to augment distillation with a third objective that encourages the student to imitate the causal dynamics of the teacher through a distillation interchange intervention training objective (DIITO). DIITO pushes the student model to become a causal abstraction of the teacher model – a faithful model with simpler causal structure. DIITO is fully differentiable, easily implemented, and combines flexibly with other objectives. Compared against standard distillation with the same setting, DIITO results in lower perplexity on the WikiText-103M corpus (masked language modeling) and marked improvements on the GLUE benchmark (natural language understanding), SQuAD (question answering), and CoNLL-2003 (named entity recognition). 2022.naacl-main.318 @@ -4603,7 +4603,7 @@ WentingZhao KonstantineArkoudas WeiqiSun - ClaireCardie + ClaireCardie 4418-4427 Task-oriented parsing (TOP) aims to convert natural language into machine-readable representations of specific tasks, such as setting an alarm. A popular approach to TOP is to apply seq2seq models to generate linearized parse trees. A more recent line of work argues that pretrained seq2seq2 models are better at generating outputs that are themselves natural language, so they replace linearized parse trees with canonical natural-language paraphrases that can then be easily translated into parse trees, resulting in so-called naturalized parsers. In this work we continue to explore naturalized semantic parsing by presenting a general reduction of TOP to abstractive question answering that overcomes some limitations of canonical paraphrasing. Experimental results show that our QA-based technique outperforms state-of-the-art methods in full-data settings while achieving dramatic improvements in few-shot settings. 2022.naacl-main.328 @@ -4615,10 +4615,10 @@ Learning Cross-Lingual <fixed-case>IR</fixed-case> from an <fixed-case>E</fixed-case>nglish Retriever YulongLi MartinFranz - Md ArafatSultan + Md ArafatSultan BhavaniIyer Young-SukLee - AvirupSil + AvirupSil 4428-4436 We present DR.DECR (Dense Retrieval with Distillation-Enhanced Cross-Lingual Representation), a new cross-lingual information retrieval (CLIR) system trained using multi-stage knowledge distillation (KD). The teacher of DR.DECR relies on a highly effective but computationally expensive two-stage inference process consisting of query translation and monolingual IR, while the student, DR.DECR, executes a single CLIR step. We teach DR.DECR powerful multilingual representations as well as CLIR by optimizing two corresponding KD objectives. Learning useful representations of non-English text from an English-only retriever is accomplished through a cross-lingual token alignment algorithm that relies on the representation capabilities of the underlying multilingual encoders. In both in-domain and zero-shot out-of-domain evaluation, DR.DECR demonstrates far superior accuracy over direct fine-tuning with labeled CLIR data. It is also the best single-model retriever on the XOR-TyDi benchmark at the time of this writing. 2022.naacl-main.329 @@ -4721,7 +4721,7 @@ Does Pre-training Induce Systematic Inference? How Masked Language Models Acquire Commonsense Knowledge IanPorada AlessandroSordoni - JackieCheung + JackieCheung 4550-4557 Transformer models pre-trained with a masked-language-modeling objective (e.g., BERT) encode commonsense knowledge as evidenced by behavioral probes; however, the extent to which this knowledge is acquired by systematic inference over the semantics of the pre-training corpora is an open question. To answer this question, we selectively inject verbalized knowledge into the pre-training minibatches of BERT and evaluate how well the model generalizes to supported inferences after pre-training on the injected knowledge. We find generalization does not improve over the course of pre-training BERT from scratch, suggesting that commonsense knowledge is acquired from surface-level, co-occurrence patterns rather than induced, systematic reasoning. 2022.naacl-main.337 @@ -4732,9 +4732,9 @@ Using Paraphrases to Study Properties of Contextual Embeddings - LauraBurdick + LauraBurdick Jonathan K.Kummerfeld - RadaMihalcea + RadaMihalcea 4558-4568 We use paraphrases as a unique source of data to analyze contextualized embeddings, with a particular focus on BERT. Because paraphrases naturally encode consistent word and phrase semantics, they provide a unique lens for investigating properties of embeddings. Using the Paraphrase Database’s alignments, we study words within paraphrases as well as phrase representations. We find that contextual embeddings effectively handle polysemous words, but give synonyms surprisingly different representations in many cases. We confirm previous findings that BERT is sensitive to word order, but find slightly different patterns than prior work in terms of the level of contextualization across BERT’s layers. 2022.naacl-main.338 @@ -4759,7 +4759,7 @@ DaniloCroce SimoneFilice GiuseppeCastellucci - RobertoBasili + RobertoBasili 4587-4601 Even if recent Transformer-based architectures, such as BERT, achieved impressive results in semantic processing tasks, their fine-tuning stage still requires large scale training resources. Usually, Data Augmentation (DA) techniques can help to deal with low resource settings. In Text Classification tasks, the objective of DA is the generation of well-formed sentences that i) represent the desired task category and ii) are novel with respect to existing sentences. In this paper, we propose a neural approach to automatically learn to generate new examples using a pre-trained sequence-to-sequence model. We first learn a task-oriented similarity function that we use to pair similar examples. Then, we use these example pairs to train a model to generate examples. Experiments in low resource settings show that augmenting the training material with the proposed strategy systematically improves the results on text classification and natural language inference tasks by up to 10% accuracy, outperforming existing DA approaches. 2022.naacl-main.340 @@ -4843,10 +4843,10 @@ Quantifying Adaptability in Pre-trained Language Models with 500 Tasks - BelindaLi + BelindaLi JaneYu MadianKhabsa - LukeZettlemoyer + LukeZettlemoyer AlonHalevy JacobAndreas 4696-4715 @@ -4934,7 +4934,7 @@ FeiLiu MoYu HongYu - SachindraJoshi + SachindraJoshi 4781-4796 We propose novel AI-empowered chat bots for learning as conversation where a user does not read a passage but gains information and knowledge through conversation with a teacher bot. Our information acquisition-oriented dialogue system employs a novel adaptation of reinforced self-play so that the system can be transferred to various domains without in-domain dialogue data, and can carry out conversations both informative and attentive to users. 2022.naacl-main.352 @@ -4945,7 +4945,7 @@ Dynamic Programming in Rank Space: Scaling Structured Inference with Low-Rank <fixed-case>HMM</fixed-case>s and <fixed-case>PCFG</fixed-case>s SonglinYang - WeiLiu + WeiLiu KeweiTu 4797-4809 Hidden Markov Models (HMMs) and Probabilistic Context-Free Grammars (PCFGs) are widely used structured models, both of which can be represented as factor graph grammars (FGGs), a powerful formalism capable of describing a wide range of models. Recent research found it beneficial to use large state spaces for HMMs and PCFGs. However, inference with large state spaces is computationally demanding, especially for PCFGs. To tackle this challenge, we leverage tensor rank decomposition (aka. CPD) to decrease inference computational complexities for a subset of FGGs subsuming HMMs and PCFGs. We apply CPD on the factors of an FGG and then construct a new FGG defined in the rank space. Inference with the new FGG produces the same result but has a lower time complexity when the rank size is smaller than the state size. We conduct experiments on HMM language modeling and unsupervised PCFG parsing, showing better performance than previous work. Our code is publicly available at https://github.com/VPeterV/RankSpace-Models. @@ -4983,7 +4983,7 @@ Mining Clues from Incomplete Utterance: A Query-enhanced Network for Incomplete Utterance Rewriting ShuzhengSi ShuangZeng - BaobaoChang + BaobaoChang 4839-4847 Incomplete utterance rewriting has recently raised wide attention. However, previous works do not consider the semantic structural information between incomplete utterance and rewritten utterance or model the semantic structure implicitly and insufficiently. To address this problem, we propose a QUEry-Enhanced Network(QUEEN) to solve this problem. Firstly, our proposed query template explicitly brings guided semantic structural knowledge between the incomplete utterance and the rewritten utterance making model perceive where to refer back to or recover omitted tokens. Then, we adopt a fast and effective edit operation scoring network to model the relation between two tokens. Benefiting from extra information and the well-designed network, QUEEN achieves state-of-the-art performance on several public datasets. 2022.naacl-main.356 @@ -5024,7 +5024,7 @@ Generalized Quantifiers as a Source of Error in Multilingual <fixed-case>NLU</fixed-case> Benchmarks RuixiangCui DanielHershcovich - AndersSøgaard + AndersSøgaard 4875-4893 Logical approaches to representing language have developed and evaluated computational models of quantifier words since the 19th century, but today’s NLU models still struggle to capture their semantics. We rely on Generalized Quantifier Theory for language-independent representations of the semantics of quantifier words, to quantify their contribution to the errors of NLU models. We find that quantifiers are pervasive in NLU benchmarks, and their occurrence at test time is associated with performance drops. Multilingual models also exhibit unsatisfying quantifier reasoning abilities, but not necessarily worse for non-English languages. To facilitate directly-targeted probing, we present an adversarial generalized quantifier NLI task (GQNLI) and show that pre-trained language models have a clear lack of robustness in generalized quantifier reasoning. 2022.naacl-main.359 @@ -5083,7 +5083,7 @@ <fixed-case>DUCK</fixed-case>: Rumour Detection on Social Media by Modelling User and Comment Propagation Networks LinTian - XiuzhenZhang + XiuzhenZhang Jey HanLau 4939-4949 Social media rumours, a form of misinformation, can mislead the public and cause significant economic and social disruption. Motivated by the observation that the user network — which captures \textit{who} engage with a story — and the comment network — which captures \textit{how} they react to it — provide complementary signals for rumour detection, in this paper, we propose DUCK (rumour \underline{d}etection with \underline{u}ser and \underline{c}omment networ\underline{k}s) for rumour detection on social media. We study how to leverage transformers and graph attention networks to jointly model the contents and structure of social media conversations, as well as the network of users who engaged in these conversations. Over four widely used benchmark rumour datasets in English and Chinese, we show that DUCK produces superior performance for detecting rumours, creating a new state-of-the-art. Source code for DUCK is available at: https://github.com/ltian678/DUCK-code. @@ -5109,7 +5109,7 @@ MikeZhang KristianJensen SifSonniks - BarbaraPlank + BarbaraPlank 4962-4984 Skill Extraction (SE) is an important and widely-studied task useful to gain insights into labor market dynamics. However, there is a lacuna of datasets and annotation guidelines; available datasets are few and contain crowd-sourced labels on the span-level or labels from a predefined skill inventory. To address this gap, we introduce SKILLSPAN, a novel SE dataset consisting of 14.5K sentences and over 12.5K annotated spans. We release its respective guidelines created over three different sources annotated for hard and soft skills by domain experts. We introduce a BERT baseline (Devlin et al., 2019). To improve upon this baseline, we experiment with language models that are optimized for long spans (Joshi et al., 2020; Beltagy et al., 2020), continuous pre-training on the job posting domain (Han and Eisenstein, 2019; Gururangan et al., 2020), and multi-task learning (Caruana, 1997). Our results show that the domain-adapted models significantly outperform their non-adapted counterparts, and single-task outperforms multi-task learning. 2022.naacl-main.366 @@ -5135,7 +5135,7 @@ CeZheng XudongChen RunxinXu - BaobaoChang + BaobaoChang 4998-5011 Frame semantic parsing is a fundamental NLP task, which consists of three subtasks: frame identification, argument identification and role classification. Most previous studies tend to neglect relations between different subtasks and arguments and pay little attention to ontological frame knowledge defined in FrameNet. In this paper, we propose a Knowledge-guided Incremental semantic parser with Double-graph (KID). We first introduce Frame Knowledge Graph (FKG), a heterogeneous graph containing both frames and FEs (Frame Elements) built on the frame knowledge so that we can derive knowledge-enhanced representations for frames and FEs. Besides, we propose Frame Semantic Graph (FSG) to represent frame semantic structures extracted from the text with graph structures. In this way, we can transform frame semantic parsing into an incremental graph construction problem to strengthen interactions between subtasks and relations between arguments. Our experiments show that KID outperforms the previous state-of-the-art method by up to 1.7 F1-score on two FrameNet datasets. Our code is availavle at https://github.com/PKUnlp-icler/KID. 2022.naacl-main.368 @@ -5150,7 +5150,7 @@ TianyuLiu QingyuZhou YunboCao - BaobaoChang + BaobaoChang ZhifangSui 5012-5024 Few-Shot Sequence Labeling (FSSL) is a canonical paradigm for the tagging models, e.g., named entity recognition and slot filling, to generalize on an emerging, resource-scarce domain. Recently, the metric-based meta-learning framework has been recognized as a promising approach for FSSL. However, most prior works assign a label to each token based on the token-level similarities, which ignores the integrality of named entities or slots. To this end, in this paper, we propose ESD, an Enhanced Span-based Decomposition method for FSSL. ESD formulates FSSL as a span-level matching problem between test query and supporting instances. Specifically, ESD decomposes the span matching problem into a series of span-level procedures, mainly including enhanced span representation, class prototype aggregation and span conflicts resolution. Extensive experiments show that ESD achieves the new state-of-the-art results on two popular FSSL benchmarks, FewNERD and SNIPS, and is proven to be more robust in the noisy and nested tagging scenarios. @@ -5166,7 +5166,7 @@ PeiyiWang TianyuLiu ShuangZeng - BaobaoChang + BaobaoChang ZhifangSui 5025-5036 Most previous studies aim at extracting events from a single sentence, while document-level event extraction still remains under-explored. In this paper, we focus on extracting event arguments from an entire document, which mainly faces two critical problems: a) the long-distance dependency between trigger and arguments over sentences; b) the distracting context towards an event in the document. To address these issues, we propose a Two-Stream Abstract meaning Representation enhanced extraction model (TSAR). TSAR encodes the document from different perspectives by a two-stream encoding module, to utilize local and global information and lower the impact of distracting context. Besides, TSAR introduces an AMR-guided interaction module to capture both intra-sentential and inter-sentential features, based on the locally and globally constructed AMR semantic graphs. An auxiliary boundary loss is introduced to enhance the boundary information for text spans explicitly. Extensive experiments illustrate that TSAR outperforms previous state-of-the-art by a large margin, with 2.54 F1 and 5.13 F1 performance gain on the public RAMS and WikiEvents datasets respectively, showing the superiority in the cross-sentence arguments extraction. We release our code in https://github.com/PKUnlp-icler/TSAR. @@ -5318,7 +5318,7 @@ Sketching as a Tool for Understanding and Accelerating Self-attention for Long Sequences YifanChen QiZeng - DilekHakkani-Tur + DilekHakkani-Tur DiJin HengJi YunYang @@ -5335,7 +5335,7 @@ HongyuanLu WaiLam HongCheng - HelenMeng + HelenMeng 5200-5212 Incorporating personas information allows diverse and engaging responses in dialogue response generation. Unfortunately, prior works have primarily focused on self personas and have overlooked the value of partner personas. Moreover, in practical applications, the availability of the gold partner personas is often not the case. This paper attempts to tackle these issues by offering a novel framework that leverages automatic partner personas generation to enhance the succeeding dialogue response generation. Our framework employs reinforcement learning with a dedicatedly designed critic network for reward judgement. Experimental results from automatic and human evaluations indicate that our framework is capable of generating relevant, interesting, coherent and informative partner personas, even compared to the ground truth partner personas. This enhances the succeeding dialogue response generation, which surpasses our competitive baselines that condition on the ground truth partner personas. 2022.naacl-main.382 @@ -5385,7 +5385,7 @@ <fixed-case>S</fixed-case>yn2<fixed-case>V</fixed-case>ec: Synset Colexification Graphs for Lexical Semantic Similarity JohnHarvill - RoxanaGirju + RoxanaGirju MarkHasegawa-Johnson 5259-5270 In this paper we focus on patterns of colexification (co-expressions of form-meaning mapping in the lexicon) as an aspect of lexical-semantic organization, and use them to build large scale synset graphs across BabelNet’s typologically diverse set of 499 world languages. We introduce and compare several approaches: monolingual and cross-lingual colexification graphs, popular distributional models, and fusion approaches. The models are evaluated against human judgments on a semantic similarity task for nine languages. Our strong empirical findings also point to the importance of universality of our graph synset embedding representations with no need for any language-specific adaptation when evaluated on the lexical similarity task. The insights of our exploratory investigation of large-scale colexification graphs could inspire significant advances in NLP across languages, especially for tasks involving languages which lack dedicated lexical resources, and can benefit from language transfer from large shared cross-lingual semantic spaces. @@ -5400,7 +5400,7 @@ NouhaDziri SivanMilton MoYu - OsmarZaiane + OsmarZaiane SivaReddy 5271-5285 Knowledge-grounded conversational models are known to suffer from producing factually invalid statements, a phenomenon commonly called hallucination. In this work, we investigate the underlying causes of this phenomenon: is hallucination due to the training data, or to the models? We conduct a comprehensive human study on both existing knowledge-grounded conversational benchmarks and several state-of-the-art models. Our study reveals that the standard benchmarks consist of > 60% hallucinated responses, leading to models that not only hallucinate but even amplify hallucinations. Our findings raise important questions on the quality of existing datasets and models trained using them. We make our annotations publicly available for future research. @@ -5463,7 +5463,7 @@ JohnnyMa JanaThompson HeHe - SamuelBowman + SamuelBowman 5336-5358 To enable building and testing models on long-document comprehension, we introduce QuALITY, a multiple-choice QA dataset with context passages in English that have an average length of about 5,000 tokens, much longer than typical current models can process. Unlike in prior work with passages, our questions are written and validated by contributors who have read the entire passage, rather than relying on summaries or excerpts. In addition, only half of the questions are answerable by annotators working under tight time constraints, indicating that skimming and simple search are not enough to consistently perform well. Our baseline models perform poorly on this task (55.4%) and significantly lag behind human performance (93.5%). 2022.naacl-main.391 @@ -5474,7 +5474,7 @@ <fixed-case>ExSum</fixed-case>: <fixed-case>F</fixed-case>rom Local Explanations to Model Understanding YilunZhou - Marco TulioRibeiro + Marco TulioRibeiro JulieShah 5359-5378 Interpretability methods are developed to understand the working mechanisms of black-box models, which is crucial to their responsible deployment. Fulfilling this goal requires both that the explanations generated by these methods are correct and that people can easily and reliably understand them. While the former has been addressed in prior work, the latter is often overlooked, resulting in informal model understanding derived from a handful of local explanations. In this paper, we introduce explanation summary (ExSum), a mathematical framework for quantifying model understanding, and propose metrics for its quality assessment. On two domains, ExSum highlights various limitations in the current practice, helps develop accurate model understanding, and reveals easily overlooked properties of the model. We also connect understandability to other properties of explanations such as human alignment, robustness, and counterfactual similarity and plausibility. @@ -5486,11 +5486,11 @@ Maximum <fixed-case>B</fixed-case>ayes <fixed-case>S</fixed-case>match Ensemble Distillation for <fixed-case>AMR</fixed-case> Parsing Young-SukLee - RamónAstudillo + RamónAstudillo HoangThanh Lam TahiraNaseem - RaduFlorian - SalimRoukos + RaduFlorian + SalimRoukos 5379-5392 AMR parsing has experienced an unprecendented increase in performance in the last three years, due to a mixture of effects including architecture improvements and transfer learning. Self-learning techniques have also played a role in pushing performance forward. However, for most recent high performant parsers, the effect of self-learning and silver data augmentation seems to be fading. In this paper we propose to overcome this diminishing returns of silver data by combining Smatch-based ensembling techniques with ensemble distillation. In an extensive experimental setup, we push single model English parser performance to a new state-of-the-art, 85.9 (AMR2.0) and 84.3 (AMR3.0), and return to substantial gains from silver data augmentation. We also attain a new state-of-the-art for cross-lingual AMR parsing for Chinese, German, Italian and Spanish. Finally we explore the impact of the proposed technique on domain adaptation, and show that it can produce gains rivaling those of human annotated data for QALD-9 and achieve a new state-of-the-art for BioAMR. 2022.naacl-main.393 @@ -5503,7 +5503,7 @@ MycalTucker TiwalayoEisape PengQian - RogerLevy + RogerLevy JulieShah 5393-5408 Recent causal probing literature reveals when language models and syntactic probes use similar representations. Such techniques may yield “false negative” causality results: models may use representations of syntax, but probes may have learned to use redundant encodings of the same syntactic information. We demonstrate that models do encode syntactic information redundantly and introduce a new probe design that guides probes to consider all syntactic information present in embeddings. Using these probes, we find evidence for the use of syntax in models where prior methods did not, allowing us to boost model performance by injecting syntactic information into representations. @@ -5515,7 +5515,7 @@ Modeling Task Interactions in Document-Level Joint Entity and Relation Extraction LiyanXu - JinhoChoi + JinhoChoi 5409-5416 We target on the document-level relation extraction in an end-to-end setting, where the model needs to jointly perform mention extraction, coreference resolution (COREF) and relation extraction (RE) at once, and gets evaluated in an entity-centric way. Especially, we address the two-way interaction between COREF and RE that has not been the focus by previous work, and propose to introduce explicit interaction namely Graph Compatibility (GC) that is specifically designed to leverage task characteristics, bridging decisions of two tasks for direct task interference. Our experiments are conducted on DocRED and DWIE; in addition to GC, we implement and compare different multi-task settings commonly adopted in previous work, including pipeline, shared encoders, graph propagation, to examine the effectiveness of different interactions. The result shows that GC achieves the best performance by up to 2.3/5.1 F1 improvement over the baseline. 2022.naacl-main.395 @@ -5681,8 +5681,8 @@ SuchinGururangan MikeLewis AriHoltzman - Noah A.Smith - LukeZettlemoyer + Noah A.Smith + LukeZettlemoyer 5557-5576 We introduce a new domain expert mixture (DEMix) layer that enables conditioning a language model (LM) on the domain of the input text. A DEMix layer includes a collection of expert feedforward networks, each specialized to a domain, that makes the LM modular: experts can be mixed, added, or removed after initial training. Extensive experiments with autoregressive transformer LMs (up to 1.3B parameters) show that DEMix layers reduce test-time perplexity (especially for out-of-domain data), increase training efficiency, and enable rapid adaptation. Mixing experts during inference, using a parameter-free weighted ensemble, enables better generalization to heterogeneous or unseen domains. We also show it is possible to add experts to adapt to new domains without forgetting older ones, and remove experts to restrict access to unwanted domains. Overall, these results demonstrate benefits of domain modularity in language models. 2022.naacl-main.407 @@ -5789,7 +5789,7 @@ HaoranLi AsliCelikyilmaz YasharMehdad - DragomirRadev + DragomirRadev 5657-5668 Factual inconsistencies in generated summaries severely limit the practical applications of abstractive dialogue summarization. Although significant progress has been achieved by using pre-trained neural language models, substantial amounts of hallucinated content are found during the human evaluation. In this work, we first devised a typology of factual errors to better understand the types of hallucinations generated by current models and conducted human evaluation on popular dialog summarization dataset. We further propose a training strategy that improves the factual consistency and overall quality of summaries via a novel contrastive fine-tuning, called CONFIT. To tackle top factual errors from our annotation, we introduce additional contrastive loss with carefully designed hard negative samples and self-supervised dialogue-specific loss to capture the key information between speakers. We show that our model significantly reduces all kinds of factual errors on both SAMSum dialogue summarization and AMI meeting summarization. On both datasets, we achieve significant improvements over state-of-the-art baselines using both automatic metrics, ROUGE and BARTScore, and human evaluation. 2022.naacl-main.415 @@ -5811,14 +5811,14 @@ Investigating Crowdsourcing Protocols for Evaluating the Factual Consistency of Summaries XiangruTang - AlexanderFabbri + AlexanderFabbri HaoranLi ZimingMao GriffinAdams BoruiWang AsliCelikyilmaz YasharMehdad - DragomirRadev + DragomirRadev 5680-5692 Current pre-trained models applied for summarization are prone to factual inconsistencies that misrepresent the source text. Evaluating the factual consistency of summaries is thus necessary to develop better models. However, the human evaluation setup for evaluating factual consistency has not been standardized. To determine the factors that affect the reliability of the human evaluation, we crowdsource evaluations for factual consistency across state-of-the-art models on two news summarization datasets using the rating-based Likert Scale and ranking-based Best-Worst Scaling. Our analysis reveals that the ranking-based Best-Worst Scaling offers a more reliable measure of summary quality across datasets and that the reliability of Likert ratings highly depends on the target dataset and the evaluation design. To improve crowdsourcing reliability, we extend the scale of the Likert rating and present a scoring algorithm for Best-Worst Scaling that we call value learning. Our crowdsourcing guidelines will be publicly available to facilitate future work on factual consistency in summarization. 2022.naacl-main.417 @@ -5856,7 +5856,7 @@ YidingTan LinyangLi QiZhang - XuanjingHuang + XuanjingHuang 5721-5732 Prompt-based methods have been successfully applied in sentence-level few-shot learning tasks, mostly owing to the sophisticated design of templates and label words. However, when applied to token-level labeling tasks such as NER, it would be time-consuming to enumerate the template queries over all potential entity spans. In this work, we propose a more elegant method to reformulate NER tasks as LM problems without any templates. Specifically, we discard the template construction process while maintaining the word prediction paradigm of pre-training models to predict a class-related pivot word (or label word) at the entity position. Meanwhile, we also explore principled ways to automatically search for appropriate label words that the pre-trained models can easily adapt to. While avoiding the complicated template-based process, the proposed LM objective also reduces the gap between different objectives used in pre-training and fine-tuning, thus it can better benefit the few-shot performance. Experimental results demonstrate the effectiveness of the proposed method over bert-tagger and template-based method under few-shot settings. Moreover, the decoding speed of the proposed method is up to 1930.12 times faster than the template-based method. 2022.naacl-main.420 @@ -5893,8 +5893,8 @@ Exploiting Inductive Bias in Transformers for Unsupervised Disentanglement of Syntax and Semantics with <fixed-case>VAE</fixed-case>s GhaziFelhi - JosephLe Roux - DjaméSeddah + JosephLe Roux + DjaméSeddah 5763-5776 We propose a generative model for text generation, which exhibits disentangled latent representations of syntax and semantics. Contrary to previous work, this model does not need syntactic information such as constituency parses, or semantic information such as paraphrase pairs. Our model relies solely on the inductive bias found in attention-based architectures such as Transformers. In the attention of Transformers, keys handle information selection while values specify what information is conveyed. Our model, dubbed QKVAE, uses Attention in its decoder to read latent variables where one latent variable infers keys while another infers values. We run experiments on latent representations and experiments on syntax/semantics transfer which show that QKVAE displays clear signs of disentangled syntax and semantics. We also show that our model displays competitive syntax transfer capabilities when compared to supervised models and that comparable supervised models need a fairly large amount of data (more than 50K samples) to outperform it on both syntactic and semantic transfer. The code for our experiments is publicly available. 2022.naacl-main.423 @@ -5946,12 +5946,12 @@ A Holistic Framework for Analyzing the <fixed-case>COVID</fixed-case>-19 Vaccine Debate - Maria LeonorPacheco + Maria LeonorPacheco TunazzinaIslam MonalMahajan AndreyShor MingYin - LyleUngar + LyleUngar DanGoldwasser 5821-5839 The Covid-19 pandemic has led to infodemic of low quality information leading to poor health decisions. Combating the outcomes of this infodemic is not only a question of identifying false claims, but also reasoning about the decisions individuals make. In this work we propose a holistic analysis framework connecting stance and reason analysis, and fine-grained entity level moral sentiment analysis. We study how to model the dependencies between the different level of analysis and incorporate human insights into the learning process. Experiments show that our framework provides reliable predictions even in the low-supervision settings. @@ -5992,7 +5992,7 @@ Explaining Dialogue Evaluation Metrics using Adversarial Behavioral Analysis BaberKhalid - SungjinLee + SungjinLee 5871-5883 There is an increasing trend in using neural methods for dialogue model evaluation. Lack of a framework to investigate these metrics can cause dialogue models to reflect their biases and cause unforeseen problems during interactions. In this work, we propose an adversarial test-suite which generates problematic variations of various dialogue aspects, e.g. logical entailment, using automatic heuristics. We show that dialogue metrics for both open-domain and task-oriented settings are biased in their assessments of different conversation behaviors and fail to properly penalize problematic conversations, by analyzing their assessments of these problematic examples. We conclude that variability in training methodologies and data-induced biases are some of the main causes of these problems. We also conduct an investigation into the metric behaviors using a black-box interpretability model which corroborates our findings and provides evidence that metrics pay attention to the problematic conversational constructs signaling a misunderstanding of different conversation semantics. 2022.naacl-main.430 @@ -6007,7 +6007,7 @@ LauraVianna XuhuiZhou YejinChoi - Noah A.Smith + Noah A.Smith 5884-5906 The perceived toxicity of language can vary based on someone’s identity and beliefs, but this variation is often ignored when collecting toxic language datasets, resulting in dataset and model biases. We seek to understand the *who*, *why*, and *what* behind biases in toxicity annotations. In two online studies with demographically and politically diverse participants, we investigate the effect of annotator identities (*who*) and beliefs (*why*), drawing from social psychology research about hate speech, free speech, racist beliefs, political leaning, and more. We disentangle *what* is annotated as toxic by considering posts with three characteristics: anti-Black language, African American English (AAE) dialect, and vulgarity. Our results show strong associations between annotator identity and beliefs and their ratings of toxicity. Notably, more conservative annotators and those who scored highly on our scale for racist beliefs were less likely to rate anti-Black language as toxic, but more likely to rate AAE as toxic. We additionally present a case study illustrating how a popular toxicity detection system’s ratings inherently reflect only specific beliefs and perspectives. Our findings call for contextualizing toxicity labels in social variables, which raises immense implications for toxic language annotation and detection. 2022.naacl-main.431 @@ -6046,7 +6046,7 @@ <fixed-case>DACSA</fixed-case>: A large-scale Dataset for Automatic summarization of <fixed-case>C</fixed-case>atalan and <fixed-case>S</fixed-case>panish newspaper Articles EncarnaciónSegarra Soriano VicentAhuir - Lluís-F.Hurtado + Lluís-F.Hurtado JoséGonzález 5931-5943 The application of supervised methods to automatic summarization requires the availability of adequate corpora consisting of a set of document-summary pairs. As in most Natural Language Processing tasks, the great majority of available datasets for summarization are in English, making it difficult to develop automatic summarization models for other languages. Although Spanish is gradually forming part of some recent summarization corpora, it is not the same for minority languages such as Catalan. In this work, we describe the construction of a corpus of Catalan and Spanish newspapers, the Dataset for Automatic summarization of Catalan and Spanish newspaper Articles (DACSA) corpus. It is a high-quality large-scale corpus that can be used to train summarization models for Catalan and Spanish.We have carried out an analysis of the corpus, both in terms of the style of the summaries and the difficulty of the summarization task. In particular, we have used a set of well-known metrics in the summarization field in order to characterize the corpus. Additionally, for benchmarking purposes, we have evaluated the performances of some extractive and abstractive summarization systems on the DACSA corpus. @@ -6061,7 +6061,7 @@ DanielKhashabi SuchinGururangan KarishmaMandyam - Noah A.Smith + Noah A.Smith 5944-5958 When an NLP model is trained on text data from one time period and tested or deployed on data from another, the resulting temporal misalignment can degrade end-task performance. In this work, we establish a suite of eight diverse tasks across different domains (social media, science papers, news, and reviews) and periods of time (spanning five years or more) to quantify the effects of temporal misalignment. Our study is focused on the ubiquitous setting where a pretrained model is optionally adapted through continued domain-specific pretraining, followed by task-specific finetuning. We establish a suite of tasks across multiple domains to study temporal misalignment in modern NLP systems. We find stronger effects of temporal misalignment on task performance than have been previously reported. We also find that, while temporal adaptation through continued pretraining can help, these gains are small compared to task-specific finetuning on data from the target time period. Our findings motivate continued research to improve temporal robustness of NLP models. 2022.naacl-main.435 @@ -6073,7 +6073,7 @@ <fixed-case>MCSE</fixed-case>: <fixed-case>M</fixed-case>ultimodal Contrastive Learning of Sentence Embeddings MiaoranZhang MariusMosbach - David IfeoluwaAdelani + David IfeoluwaAdelani Michael A.Hedderich DietrichKlakow 5959-5969 @@ -6090,7 +6090,7 @@ ChenweiZhang Shu’angLi LijieWen - PhilipYu + PhilipYu 5970-5980 Unsupervised relation extraction aims to extract the relationship between entities from natural language sentences without prior information on relational scope or distribution. Existing works either utilize self-supervised schemes to refine relational feature signals by iteratively leveraging adaptive clustering and classification that provoke gradual drift problems, or adopt instance-wise contrastive learning which unreasonably pushes apart those sentence pairs that are semantically similar. To overcome these defects, we propose a novel contrastive learning framework named HiURE, which has the capability to derive hierarchical signals from relational feature space using cross hierarchy attention and effectively optimize relation representation of sentences under exemplar-wise contrastive learning. Experimental results on two public datasets demonstrate the advanced effectiveness and robustness of HiURE on unsupervised relation extraction when compared with state-of-the-art models. 2022.naacl-main.437 @@ -6172,7 +6172,7 @@ Proceedings of the 2022 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies: Student Research Workshop DaphneIppolito Liunian HaroldLi - Maria LeonorPacheco + Maria LeonorPacheco DanqiChen NianwenXue Association for Computational Linguistics @@ -6214,7 +6214,7 @@ MengsayLoem ShoTakase MasahiroKaneko - NaoakiOkazaki + NaoakiOkazaki 16-24 Neural models trained with large amount of parallel data have achieved impressive performance in abstractive summarization tasks. However, large-scale parallel corpora are expensive and challenging to construct. In this work, we introduce a low-cost and effective strategy, ExtraPhrase, to augment training data for abstractive summarization tasks. ExtraPhrase constructs pseudo training data in two steps: extractive summarization and paraphrasing. We extract major parts of an input text in the extractive summarization step and obtain its diverse expressions with the paraphrasing step. Through experiments, we show that ExtraPhrase improves the performance of abstractive summarization tasks by more than 0.50 points in ROUGE scores compared to the setting without data augmentation. ExtraPhrase also outperforms existing methods such as back-translation and self-training. We also show that ExtraPhrase is significantly effective when the amount of genuine training data is remarkably small, i.e., a low-resource setting. Moreover, ExtraPhrase is more cost-efficient than the existing approaches 2022.naacl-srw.3 @@ -6280,7 +6280,7 @@ XiruoDing KevinLybarger JustinTauscher - TrevorCohen + TrevorCohen 68-75 Cognitive distortions are counterproductive patterns of thinking that are one of the targets of cognitive behavioral therapy (CBT). These can be challenging for clinicians to detect, especially those without extensive CBT training or supervision. Text classification methods can approximate expert clinician judgment in the detection of frequently occurring cognitive distortions in text-based therapy messages. However, performance with infrequent distortions is relatively poor. In this study, we address this sparsity problem with two approaches: Data Augmentation and Domain-Specific Model. The first approach includes Easy Data Augmentation, back translation, and mixup techniques. The second approach utilizes a domain-specific pretrained language model, MentalBERT. To examine the viability of different data augmentation methods, we utilized a real-world dataset of texts between therapists and clients diagnosed with serious mental illness that was annotated for distorted thinking. We found that with optimized parameter settings, mixup was helpful for rare classes. Performance improvements with an augmented model, MentalBERT, exceed those obtained with data augmentation. 2022.naacl-srw.9 @@ -6315,7 +6315,7 @@ Analysing the Correlation between Lexical Ambiguity and Translation Quality in a Multimodal Setting using <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et AliHatami PaulBuitelaar - MihaelArcan + MihaelArcan 89-95 Multimodal Neural Machine Translation is focusing on using visual information to translate sentences in the source language into the target language. The main idea is to utilise information from visual modalities to promote the output quality of the text-based translation model. Although the recent multimodal strategies extract the most relevant visual information in images, the effectiveness of using visual information on translation quality changes based on the text dataset. Due to this, this work studies the impact of leveraging visual information in multimodal translation models of ambiguous sentences. Our experiments analyse the Multi30k evaluation dataset and calculate ambiguity scores of sentences based on the WordNet hierarchical structure. To calculate the ambiguity of a sentence, we extract the ambiguity scores for all nouns based on the number of senses in WordNet. The main goal is to find in which sentences, visual content can improve the text-based translation model. We report the correlation between the ambiguity scores and translation quality extracted for all sentences in the English-German dataset. 2022.naacl-srw.12 @@ -6566,7 +6566,7 @@ Unifying Parsing and Tree-Structured Models for Generating Sentence Semantic Representations AntoineSimoulin - BenoitCrabbé + BenoitCrabbé 267-276 We introduce a novel tree-based model that learns its composition function together with its structure. The architecture produces sentence embeddings by composing words according to an induced syntactic tree. The parsing and the composition functions are explicitly connected and, therefore, learned jointly. As a result, the sentence embedding is computed according to an interpretable linguistic pattern and may be used on any downstream task. We evaluate our encoder on downstream tasks, and we observe that it outperforms tree-based models relying on external parsers. In some configurations, it is even competitive with Bert base model. Our model is capable of supporting multiple parser architectures. We exploit this property to conduct an ablation study by comparing different parser initializations. We explore to which extent the trees produced by our model compare with linguistic structures and how this initialization impacts downstream performances. We empirically observe that downstream supervision troubles producing stable parses and preserving linguistically relevant structures. 2022.naacl-srw.33 @@ -6579,7 +6579,7 @@ GerardSant Gerard I.Gállego BelenAlastruey - Marta RuizCosta-jussà + Marta RuizCosta-jussà 277-284 Transformer-based models have been achieving state-of-the-art results in several fields of Natural Language Processing. However, its direct application to speech tasks is not trivial. The nature of this sequences carries problems such as long sequence lengths and redundancy between adjacent tokens. Therefore, we believe that regular self-attention mechanism might not be well suited for it. Different approaches have been proposed to overcome these problems, such as the use of efficient attention mechanisms. However, the use of these methods usually comes with a cost, which is a performance reduction caused by information loss. In this study, we present the Multiformer, a Transformer-based model which allows the use of different attention mechanisms on each head. By doing this, the model is able to bias the self-attention towards the extraction of more diverse token interactions, and the information loss is reduced. Finally, we perform an analysis of the head contributions, and we observe that those architectures where all heads relevance is uniformly distributed obtain better results. Our results show that mixing attention patterns along the different heads and layers outperforms our baseline by up to 0.7 BLEU. 2022.naacl-srw.34 @@ -6616,7 +6616,7 @@ Proceedings of the 2022 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies: System Demonstrations HannanehHajishirzi QiangNing - AviSil + AviSil Association for Computational Linguistics
Hybrid: Seattle, Washington + Online
July @@ -6674,8 +6674,8 @@ <fixed-case>ZS</fixed-case>4<fixed-case>IE</fixed-case>: A toolkit for Zero-Shot Information Extraction with simple Verbalizations OscarSainz HaolingQiu - OierLopez de Lacalle - EnekoAgirre + OierLopez de Lacalle + EnekoAgirre BonanMin 27-38 The current workflow for Information Extraction (IE) analysts involves the definition of the entities/relations of interest and a training corpus with annotated examples. In this demonstration we introduce a new workflow where the analyst directly verbalizes the entities/relations, which are then used by a Textual Entailment model to perform zero-shot IE. We present the design and implementation of a toolkit with a user interface, as well as experiments on four IE tasks that show that the system achieves very good performance at zero-shot learning using only 5–15 minutes per type of a user’s effort. Our demonstration system is open-sourced at https://github.com/BBN-E/ZS4IE. A demonstration video is available at https://vimeo.com/676138340. @@ -6720,7 +6720,7 @@ ShaLi PengfeiYu HongweiWang - TuanLai + TuanLai XudongLin ZiqiWang IrisLiu @@ -6746,8 +6746,8 @@ CarlVondrick JiaweiHan DanRoth - Shih-FuChang - MarthaPalmer + Shih-FuChang + MarthaPalmer HengJi 54-63 We introduce RESIN-11, a new schema-guided event extraction&prediction framework that can be applied to a large variety of newsworthy scenarios. The framework consists of two parts: (1) an open-domain end-to-end multimedia multilingual information extraction system with weak-supervision and zero-shot learningbased techniques. (2) schema matching and schema-guided event prediction based on our curated schema library. We build a demo website based on our dockerized system and schema library publicly available for installation (https://github.com/RESIN-KAIROS/RESIN-11). We also include a video demonstrating the system. @@ -6760,9 +6760,9 @@ RobertVacareanu George C.G.Barbosa EnriqueNoriega-Atala - GusHahn-Powell + GusHahn-Powell RebeccaSharp - Marco A.Valenzuela-Escárcega + Marco A.Valenzuela-Escárcega MihaiSurdeanu 64-70 We propose a system that assists a user in constructing transparent information extraction models, consisting of patterns (or rules) written in a declarative language, through program synthesis. Users of our system can specify their requirements through the use of examples,which are collected with a search interface. The rule-synthesis system proposes rule candidates and the results of applying them on a textual corpus; the user has the option to accept the candidate, request another option, or adjust the examples provided to the system. Through an interactive evaluation, we show that our approach generates high-precision rules even in a 1-shot setting. On a second evaluation on a widely-used relation extraction dataset (TACRED), our method generates rules that outperform considerably manually written patterns. Our code, demo, and documentation is available at https://clulab.github.io/odinsynth. @@ -6868,7 +6868,7 @@ Proceedings of the 2022 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies: Tutorial Abstracts MiguelBallesteros YuliaTsvetkov - Cecilia O.Alm + Cecilia O.Alm Association for Computational Linguistics
Seattle, United States
July @@ -6970,7 +6970,7 @@ RuiZhang YangfengJi YueZhang - Rebecca J.Passonneau + Rebecca J.Passonneau 39-47 Current NLP models heavily rely on effective representation learning algorithms. Contrastive learning is one such technique to learn an embedding space such that similar data sample pairs have close representations while dissimilar samples stay far apart from each other. It can be used in supervised or unsupervised settings using different loss functions to produce task-specific or general-purpose representations. While it has originally enabled the success for vision tasks, recent years have seen a growing number of publications in contrastive NLP. This first line of works not only delivers promising performance improvements in various NLP tasks, but also provides desired characteristics such as task-agnostic sentence representation, faithful text generation, data-efficient learning in zero-shot and few-shot settings, interpretability and explainability. In this tutorial, we aim to provide a gentle introduction to the fundamentals of contrastive learning approaches and the theory behind them. We then survey the benefits and the best practices of contrastive learning for various downstream NLP applications including Text Classification, Question Answering, Summarization, Text Generation, Interpretability and Explainability, Commonsense Knowledge and Reasoning, Vision-and-Language.This tutorial intends to help researchers in the NLP and computational linguistics community to understand this emerging topic and promote future research directions of using contrastive learning for NLP applications. 2022.naacl-tutorials.6 @@ -7002,7 +7002,7 @@ JinseokNam SarthakAhuja Jin-MyungWon - SungjinLee + SungjinLee 1-8 Skill routing is an important component in large-scale conversational systems. In contrast to traditional rule-based skill routing, state-of-the-art systems use a model-based approach to enable natural conversations. To provide supervision signal required to train such models, ideas such as human annotation, replication of a rule-based system, relabeling based on user paraphrases, and bandit-based learning were suggested. However, these approaches: (a) do not scale in terms of the number of skills and skill on-boarding, (b) require a very costly expert annotation/rule-design, (c) introduce risks in the user experience with each model update. In this paper, we present a scalable self-learning approach to explore routing alternatives without causing abrupt policy changes that break the user experience, learn from the user interaction, and incrementally improve the routing via frequent model refreshes. To enable such robust frequent model updates, we suggest a simple and effective approach that ensures controlled policy updates for individual domains, followed by an off-policy evaluation for making deployment decisions without any need for lengthy A/B experimentation. We conduct various offline and online A/B experiments on a commercial large-scale conversational system to demonstrate the effectiveness of the proposed method in real-world production settings. 2022.naacl-industry.1 @@ -7026,7 +7026,7 @@
Augmenting Poetry Composition with <fixed-case>V</fixed-case>erse by <fixed-case>V</fixed-case>erse - DavidUthus + DavidUthus MariaVoitovich R.J.Mical 18-26 @@ -7119,7 +7119,7 @@ ShoHoshino HidetakaKamigaito HiroyaTakamura - ManabuOkumura + ManabuOkumura 69-78 Writing an ad text that attracts people and persuades them to click or act is essential for the success of search engine advertising. Therefore, ad creators must consider various aspects of advertising appeals (A^3) such as the price, product features, and quality. However, products and services exhibit unique effective A^3 for different industries. In this work, we focus on exploring the effective A^3 for different industries with the aim of assisting the ad creation process. To this end, we created a dataset of advertising appeals and used an existing model that detects various aspects for ad texts. Our experiments demonstrated %through correlation analysis that different industries have their own effective A^3 and that the identification of the A^3 contributes to the estimation of advertising performance. 2022.naacl-industry.9 @@ -7158,7 +7158,7 @@ Distantly Supervised Aspect Clustering And Naming For <fixed-case>E</fixed-case>-Commerce Reviews PrateekSircar AniketChakrabarti - DeepakGupta + DeepakGupta AnirbanMajumdar 94-102 Product aspect extraction from reviews is a critical task for e-commerce services to understand customer preferences and pain points. While aspect phrases extraction and sentiment analysis have received a lot of attention, clustering of aspect phrases and assigning human readable names to clusters in e-commerce reviews is an extremely important and challenging problem due to the scale of the reviews that makes human review infeasible. In this paper, we propose fully automated methods for clustering aspect words and generating human readable names for the clusters without any manually labeled data. We train transformer based sentence embeddings that are aware of unique e-commerce language characteristics (eg. incomplete sentences, spelling and grammar errors, vernacular etc.). We also train transformer based sequence to sequence models to generate human readable aspect names from clusters. Both the models are trained using heuristic based distant supervision. Additionally, the models are used to improve each other. Extensive empirical testing showed that the clustering model improves the Silhouette Score by 64% when compared to the state-of-the-art baseline and the aspect naming model achieves a high ROUGE-L score of 0.79. @@ -7185,7 +7185,7 @@ YeyunGong JianJiao RuofeiZhang - TimothyBaldwin + TimothyBaldwin NanDuan 112-120 Pre-trained language models (PLMs) have dramatically improved performance for many natural language processing (NLP) tasks in domains such as finance and healthcare. However, the application of PLMs in the domain of commerce, especially marketing and advertising, remains less studied. In this work, we adapt pre-training methods to the domain of commerce, by proposing CULG, a large-scale commercial universal language generation model which is pre-trained on a corpus drawn from 10 markets across 7 languages. We propose 4 commercial generation tasks and a two-stage training strategy for pre-training, and demonstrate that the proposed strategy yields performance improvements on three generation tasks as compared to single-stage pre-training. Extensive experiments show that our model outperforms other models by a large margin on commercial generation tasks, and we conclude with a discussion on additional applications over other markets, languages, and tasks. @@ -7240,7 +7240,7 @@ Asynchronous Convergence in Multi-Task Learning via Knowledge Distillation from Converged Tasks - WeiyiLu + WeiyiLu SunnyRajagopalan PriyankaNigam JaspreetSingh @@ -7315,7 +7315,7 @@ Intent Discovery for Enterprise Virtual Assistants: Applications of Utterance Embedding and Clustering to Intent Mining MinhuaChen BadrinathJayakumar - MichaelJohnston + MichaelJohnston S. EmanMahmoodi DanielPressel 197-208 @@ -7343,7 +7343,7 @@ Lightweight Transformers for Conversational <fixed-case>AI</fixed-case> DanielPressel WenshuoLiu - MichaelJohnston + MichaelJohnston MinhuaChen 221-229 To understand how training on conversational language impacts performance of pre-trained models on downstream dialogue tasks, we build compact Transformer-based Language Models from scratch on several large corpora of conversational data. We compare the performance and characteristics of these models against BERT and other strong baselines on dialogue probing tasks. Commercial dialogue systems typically require a small footprint and fast execution time, but recent trends are in the other direction, with an ever-increasing number of parameters, resulting in difficulties in model deployment. We focus instead on training fast, lightweight models that excel at natural language understanding (NLU) and can replace existing lower-capacity conversational AI models with similar size and speed. In the process, we develop a simple but unique curriculum-based approach that moves from general-purpose to dialogue-targeted both in terms of data and objective. Our resultant models have around 1/3 the number of parameters of BERT-base and produce better representations for a wide array of intent detection datasets using linear and Mutual-Information probing techniques. Additionally, the models can be easily fine-tuned on a single consumer GPU card and deployed in near real-time production environments. @@ -7369,7 +7369,7 @@ What Do Users Care About? Detecting Actionable Insights from User Feedback KasturiBhattacharjee RashmiGangadharaiah - KathleenMcKeown + KathleenMcKeown DanRoth 239-246 Users often leave feedback on a myriad of aspects of a product which, if leveraged successfully, can help yield useful insights that can lead to further improvements down the line. Detecting actionable insights can be challenging owing to large amounts of data as well as the absence of labels in real-world scenarios. In this work, we present an aggregation and graph-based ranking strategy for unsupervised detection of these insights from real-world, noisy, user-generated feedback. Our proposed approach significantly outperforms strong baselines on two real-world user feedback datasets and one academic dataset. @@ -7396,7 +7396,7 @@ PoojaHiranandani ShaynaGardiner ChengChen - SimonCorston-Oliver + SimonCorston-Oliver Xue-YongFu 259-267 For agents at a contact centre receiving calls, the most important piece of information is the reason for a given call. An agent cannot provide support on a call if they do not know why a customer is calling. In this paper we describe our implementation of a commercial system to detect Purpose of Call statements in English business call transcripts in real time. We present a detailed analysis of types of Purpose of Call statements and language patterns related to them, discuss an approach to collect rich training data by bootstrapping from a set of rules to a neural model, and describe a hybrid model which consists of a transformer-based classifier and a set of rules by leveraging insights from the analysis of call transcripts. The model achieved 88.6 F1 on average in various types of business calls when tested on real life data and has low inference time. We reflect on the challenges and design decisions when developing and deploying the system. @@ -7462,8 +7462,8 @@ VishwajeetKumar SamarthBharadwaj MustafaCanim - MichaelGlass - AlfioGliozzo + MichaelGlass + AlfioGliozzo FeifeiPan JaydeepSen KarthikSankaranarayanan @@ -7514,7 +7514,7 @@ Siva SankalpPatel J WilliamMurdock SaloniPotdar - SachindraJoshi + SachindraJoshi 334-343 Dialogue systems can benefit from being able to search through a corpus of text to find information relevant to user requests, especially when encountering a request for which no manually curated response is available. The state-of-the-art technology for neural dense retrieval or re-ranking involves deep learning models with hundreds of millions of parameters. However, it is difficult and expensive to get such models to operate at an industrial scale, especially for cloud services that often need to support a big number of individually customized dialogue systems, each with its own text corpus. We report our work on enabling advanced neural dense retrieval systems to operate effectively at scale on relatively inexpensive hardware. We compare with leading alternative industrial solutions and show that we can provide a solution that is effective, fast, and cost-efficient. 2022.naacl-industry.37 @@ -7530,7 +7530,7 @@ JonathanJohnston Xue-YongFu Shashi BhushanTn - SimonCorston-Oliver + SimonCorston-Oliver 344-352 An Entity Linking system aligns the textual mentions of entities in a text to their corresponding entries in a knowledge base. However, deploying a neural entity linking system for efficient real-time inference in production environments is a challenging task. In this work, we present a neural entity linking system that connects the product and organization type entities in business conversations to their corresponding Wikipedia and Wikidata entries. The proposed system leverages Elasticsearch to ensure inference efficiency when deployed in a resource limited cloud machine, and obtains significant improvements in terms of inference speed and memory consumption while retaining high accuracy. 2022.naacl-industry.38 diff --git a/data/xml/2022.nejlt.xml b/data/xml/2022.nejlt.xml index fff99a1b9b..5f591b03ec 100644 --- a/data/xml/2022.nejlt.xml +++ b/data/xml/2022.nejlt.xml @@ -14,7 +14,7 @@ Foreword to <fixed-case>NEJLT</fixed-case> Volume 8, 2022 - LeonDerczynski + LeonDerczynski An introduction to the Northern European Journal of Language Technology in 2022 2022.nejlt-1.1 10.3384/nejlt.2000-1533.2022.4617 @@ -24,7 +24,7 @@ Task-dependent Optimal Weight Combinations for Static Embeddings NathanielRobinson NathanielCarlson - DavidMortensen + DavidMortensen ElizabethVargas ThomasFackrell NancyFulda @@ -36,7 +36,7 @@ Building Analyses from Syntactic Inference in Local Languages: An <fixed-case>HPSG</fixed-case> Grammar Inference System KristenHowell - Emily M.Bender + Emily M.Bender We present a grammar inference system that leverages linguistic knowledge recorded in the form of annotations in interlinear glossed text (IGT) and in a meta-grammar engineering system (the LinGO Grammar Matrix customization system) to automatically produce machine-readable HPSG grammars. Building on prior work to handle the inference of lexical classes, stems, affixes and position classes, and preliminary work on inferring case systems and word order, we introduce an integrated grammar inference system that covers a wide range of fundamental linguistic phenomena. System development was guided by 27 geneologically and geographically diverse languages, and we test the system’s cross-linguistic generalizability on an additional 5 held-out languages, using datasets provided by field linguists. Our system out-performs three baseline systems in increasing coverage while limiting ambiguity and producing richer semantic representations, while also producing richer representations than previous work in grammar inference. 2022.nejlt-1.3 10.3384/nejlt.2000-1533.2022.4017 @@ -46,7 +46,7 @@ Bias Identification and Attribution in <fixed-case>NLP</fixed-case> Models With Regression and Effect Sizes ErenayDayanik Ngoc ThangVu - SebastianPadó + SebastianPadó In recent years, there has been an increasing awareness that many NLP systems incorporate biases of various types (e.g., regarding gender or race) which can have significant negative consequences. At the same time, the techniques used to statistically analyze such biases are still relatively simple. Typically, studies test for the presence of a significant difference between two levels of a single bias variable (e.g., male vs. female) without attention to potential confounders, and do not quantify the importance of the bias variable. This article proposes to analyze bias in the output of NLP systems using multivariate regression models. They provide a robust and more informative alternative which (a) generalizes to multiple bias variables, (b) can take covariates into account, (c) can be combined with measures of effect size to quantify the size of bias. Jointly, these effects contribute to a more robust statistical analysis of bias that can be used to diagnose system behavior and extract informative examples. We demonstrate the benefits of our method by analyzing a range of current NLP models on one regression and one classification tasks (emotion intensity prediction and coreference resolution, respectively). 2022.nejlt-1.4 10.3384/nejlt.2000-1533.2022.3505 @@ -55,7 +55,7 @@ Policy-focused Stance Detection in Parliamentary Debate Speeches GavinAbercrombie - RizaBatista-Navarro + RizaBatista-Navarro Legislative debate transcripts provide citizens with information about the activities of their elected representatives, but are difficult for people to process. We propose the novel task of policy-focused stance detection, in which both the policy proposals under debate and the position of the speakers towards those proposals are identified. We adapt a previously existing dataset to include manual annotations of policy preferences, an established schema from political science. We evaluate a range of approaches to the automatic classification of policy preferences and speech sentiment polarity, including transformer-based text representations and a multi-task learning paradigm. We find that it is possible to identify the policies under discussion using features derived from the speeches, and that incorporating motion-dependent debate modelling, previously used to classify speech sentiment, also improves performance in the classification of policy preferences. We analyse the output of the best performing system, finding that discriminating features for the task are highly domain-specific, and that speeches that address policy preferences proposed by members of the same party can be among the most difficult to predict. 2022.nejlt-1.5 10.3384/nejlt.2000-1533.2022.3454 @@ -98,7 +98,7 @@ Contextualized embeddings for semantic change detection: Lessons learned AndreyKutuzov ErikVelldal - LiljaØvrelid + LiljaØvrelid We present a qualitative analysis of the (potentially erroneous) outputs of contextualized embedding-based methods for detecting diachronic semantic change. First, we introduce an ensemble method outperforming previously described contextualized approaches. This method is used as a basis for an in-depth analysis of the degrees of semantic change predicted for English words across 5 decades. Our findings show that contextualized methods can often predict high change scores for words which are not undergoing any real diachronic semantic shift in the lexicographic sense of the term (or at least the status of these shifts is questionable). Such challenging cases are discussed in detail with examples, and their linguistic categorization is proposed. Our conclusion is that pre-trained contextualized language models are prone to confound changes in lexicographic senses and changes in contextual variance, which naturally stem from their distributional nature, but is different from the types of issues observed in methods based on static embeddings. Additionally, they often merge together syntactic and semantic aspects of lexical entities. We propose a range of possible future solutions to these issues. 2022.nejlt-1.9 10.3384/nejlt.2000-1533.2022.3478 diff --git a/data/xml/2022.nidcp.xml b/data/xml/2022.nidcp.xml index ea377c2b4b..82febc67f1 100644 --- a/data/xml/2022.nidcp.xml +++ b/data/xml/2022.nidcp.xml @@ -4,9 +4,9 @@ Proceedings of the 2nd Workshop on Novel Incentives in Data Collection from People: models, implementations, challenges and results within LREC 2022 ChrisCallison-Burch - ChristopherCieri + ChristopherCieri JamesFiumara - MarkLiberman + MarkLiberman European Language Resources Association
Marseille, France
June @@ -34,7 +34,7 @@ Use of a Citizen Science Platform for the Creation of a Language Resource to Study Bias in Language Models for <fixed-case>F</fixed-case>rench: A Case Study KarënFort - AurélieNévéol + AurélieNévéol YoannDupont JulienBezançon 8–13 @@ -57,7 +57,7 @@ Creating <fixed-case>M</fixed-case>exican <fixed-case>S</fixed-case>panish Language Resources through the Social Service Program Carlos DanielHernandez Mena - Ivan VladimirMeza Ruiz + Ivan VladimirMeza Ruiz 20–24 This work presents the path toward the creation of eight Spoken Language Resources under the umbrella of the Mexican Social Service national program. This program asks undergraduate students to donate time and work for the benefit of their society as a requirement to receive their degree. The program has thousands of options for the students who enroll. We show how we created a program which has resulted in the creation of open language resources which now are freely available in different repositories. We estimate that this exercise is equivalent to a budget of more than half a million US dollars. However, since the program is based on retribution from the students to their communities there has not been a necessity of a financial budget. 2022.nidcp-1.4 diff --git a/data/xml/2022.nlg4health.xml b/data/xml/2022.nlg4health.xml index 087188a499..eca239c5c2 100644 --- a/data/xml/2022.nlg4health.xml +++ b/data/xml/2022.nlg4health.xml @@ -3,7 +3,7 @@ Proceedings of the First Workshop on Natural Language Generation in Healthcare - EmielKrahmer + EmielKrahmer KathyMcCoy EhudReiter Association for Computational Linguistics @@ -52,7 +52,7 @@ Towards Development of an Automated Health Coach LeighanneHsu RommyMarquez Hernandez - KathleenMcCoy + KathleenMcCoy KeithDecker AjithVemuri GregDominick @@ -66,7 +66,7 @@ Personalizing Weekly Diet Reports ElenaMonfroglio LucasAnselma - AlessandroMazzei + AlessandroMazzei 40-45 In this paper we present the main components of a weekly diet report generator (DRG) in natural language. The idea is to produce a text that contains information on the adherence of the dishes eaten during a week to the Mediterranean diet. The system is based on a user model, a database of the dishes eaten during the week and on the automatic computation of the Mediterranean Diet Score. All these sources of information are exploited to produce a highly personalized text. The system has two main goals, related to two different kinds of users: on the one hand, when used by dietitians, the main goal is to highlight the most salient medical information of the patient diet and, on the other hand, when used by final users, the main goal is to educate them toward a Mediterranean style of eating. 2022.nlg4health-1.5 diff --git a/data/xml/2022.nllp.xml b/data/xml/2022.nllp.xml index d9e87d6e33..f79945a830 100644 --- a/data/xml/2022.nllp.xml +++ b/data/xml/2022.nllp.xml @@ -22,7 +22,7 @@ On Breadth Alone: Improving the Precision of Terminology Extraction Systems on Patent Corpora SeanNordquistNew York University - AdamMeyersNew York University + AdamMeyersNew York University 1-11 Automatic Terminology Extraction (ATE) methods are a class of linguistic, statistical, machine learning or hybrid techniques for identifying terminology in a set of documents. Most modern ATE methods use a statistical measure of how important or characteristic a potential term is to a foreground corpus by using a second background corpus as a baseline. While many variables with ATE methods have been carefully evaluated and tuned in the literature, the effects of choosing a particular background corpus over another are not obvious. In this paper, we propose a methodology that allows us to adjust the relative breadth of the foreground and background corpora in patent documents by taking advantage of the Cooperative Patent Classification (CPC) scheme. Our results show that for every foreground corpus, the broadest background corpus gave the worst performance, in the worst case that difference is 17%. Similarly, the least broad background corpus gave suboptimal performance in all three experiments. We also demonstrate qualitative differences between background corpora – narrower background corpora tend towards more technical output. We expect our results to generalize to terminology extraction for other legal and technical documents and, generally, to the foreground/background approach to ATE. 2022.nllp-1.1 @@ -285,7 +285,7 @@ Detecting Relevant Differences Between Similar Legal Texts XiangLiUniversity of Ottawa JiaxunGaoUniversity of Ottawa - DianaInkpenUniversity of Ottawa + DianaInkpenUniversity of Ottawa WolfgangAlschnerUniversity of Ottawa 256-264 Given two similar legal texts, is it useful to be able to focus only on the parts that contain relevant differences. However, because of variation in linguistic structure and terminology, it is not easy to identify true semantic differences. An accurate difference detection model between similar legal texts is therefore in demand, in order to increase the efficiency of legal research and document analysis. In this paper, we automatically label a training dataset of sentence pairs using an existing legal resource of international investment treaties that were already manually annotated with metadata. Then we propose models based on state-of-the-art deep learning techniques for the novel task of detecting relevant differences. In addition to providing solutions for this task, we include models for automatically producing metadata for the treaties that do not have it. @@ -336,7 +336,7 @@ VinayAggarwalAdobe AnanyaGaneshUniversity of Colorado Boulder NiyatiChhayaAdobe Research - NandakishoreKambhatlaAdobe Research + NandakishoreKambhatlaAdobe Research 296-304 2022.nllp-1.28 garimella-etal-2022-text @@ -362,7 +362,7 @@ Computing and Exploiting Document Structure to Improve Unsupervised Extractive Summarization of Legal Case Decisions YangZhongUniversity of Pittsburgh - DianeLitmanUniversity of Pittsburgh + DianeLitmanUniversity of Pittsburgh 322-337 Though many algorithms can be used to automatically summarize legal case decisions, most fail to incorporate domain knowledge about how important sentences in a legal decision relate to a representation of its document structure. For example, analysis of a legal case sum- marization dataset demonstrates that sentences serving different types of argumentative roles in the decision appear in different sections of the document. In this work, we propose an unsupervised graph-based ranking model that uses a reweighting algorithm to exploit properties of the document structure of legal case decisions. We also explore the impact of using different methods to compute the document structure. Results on the Canadian Legal Case Law dataset show that our proposed method outperforms several strong baselines. 2022.nllp-1.30 diff --git a/data/xml/2022.nlp4call.xml b/data/xml/2022.nlp4call.xml index 6a069f117d..b0a0f4040e 100644 --- a/data/xml/2022.nlp4call.xml +++ b/data/xml/2022.nlp4call.xml @@ -8,7 +8,7 @@ ThomasFrançois PietDesmet FrederikCornillie - ArneJönsson + ArneJönsson EvelinaRennes LiU Electronic Press
Louvain-la-Neuve, Belgium
@@ -73,7 +73,7 @@ Generating and authoring high-variability exercises from authentic texts TanjaHeck - DetmarMeurers + DetmarMeurers 61–71 2022.nlp4call-1.7 heck-meurers-2022-generating @@ -120,7 +120,7 @@ A Transformer for <fixed-case>SAG</fixed-case>: What Does it Grade? NicoWillms - UlrikePado + UlrikePado 114–122 2022.nlp4call-1.12 willms-pado-2022-transformer diff --git a/data/xml/2022.nlp4convai.xml b/data/xml/2022.nlp4convai.xml index 2673f7f4a3..469acff260 100644 --- a/data/xml/2022.nlp4convai.xml +++ b/data/xml/2022.nlp4convai.xml @@ -4,7 +4,7 @@ Proceedings of the 4th Workshop on NLP for Conversational AI BingLiu - AlexandrosPapangelis + AlexandrosPapangelis StefanUltes AbhinavRastogi Yun-NungChen @@ -42,7 +42,7 @@ ZhiweiLiu YeLiu CaimingXiong - PhilipYu + PhilipYu 12-20 Pre-trained Transformer-based models were reported to be robust in intent classification. In this work, we first point out the importance of in-domain out-of-scope detection in few-shot intent recognition tasks and then illustrate the vulnerability of pre-trained Transformer-based models against samples that are in-domain but out-of-scope (ID-OOS). We construct two new datasets, and empirically show that pre-trained models do not perform well on both ID-OOS examples and general out-of-scope examples, especially on fine-grained few-shot intent detection tasks. 2022.nlp4convai-1.2 @@ -95,7 +95,7 @@ Extracting and Inferring Personal Attributes from Dialogue ZhilinWang XuhuiZhou - RikKoncel-Kedziorski + RikKoncel-Kedziorski AlexMarin FeiXia 58-69 @@ -111,7 +111,7 @@ XiaoyuShen GianniBarlacchi BillByrne - Adriàde Gispert + Adriàde Gispert 70-76 In conversational QA, models have to leverage information in previous turns to answer upcoming questions. Current approaches, such as Question Rewriting, struggle to extract relevant information as the conversation unwinds. We introduce the Common Ground (CG), an approach to accumulate conversational information as it emerges and select the relevant information at every turn. We show that CG offers a more efficient and human-like way to exploit conversational information compared to existing approaches, leading to improvements on Open Domain Conversational QA. 2022.nlp4convai-1.7 @@ -137,8 +137,8 @@ <fixed-case>KG</fixed-case>-<fixed-case>CR</fixed-case>u<fixed-case>SE</fixed-case>: Recurrent Walks over Knowledge Graph for Explainable Conversation Reasoning using Semantic Embeddings RajdeepSarkar - MihaelArcan - JohnMcCrae + MihaelArcan + JohnMcCrae 98-107 Knowledge-grounded dialogue systems utilise external knowledge such as knowledge graphs to generate informative and appropriate responses. A crucial challenge of such systems is to select facts from a knowledge graph pertinent to the dialogue context for response generation. This fact selection can be formulated as path traversal over a knowledge graph conditioned on the dialogue context. Such paths can originate from facts mentioned in the dialogue history and terminate at the facts to be mentioned in the response. These walks, in turn, provide an explanation of the flow of the conversation. This work proposes KG-CRuSE, a simple, yet effective LSTM based decoder that utilises the semantic information in the dialogue history and the knowledge graph elements to generate such paths for effective conversation explanation. Extensive evaluations showed that our model outperforms the state-of-the-art models on the OpenDialKG dataset on multiple metrics. 2022.nlp4convai-1.9 @@ -175,7 +175,7 @@ Multimodal Conversational <fixed-case>AI</fixed-case>: A Survey of Datasets and Approaches - AnirudhSundar + AnirudhSundar LarryHeck 131-147 As humans, we experience the world with all our senses or modalities (sound, sight, touch, smell, and taste). We use these modalities, particularly sight and touch, to convey and interpret specific meanings. Multimodal expressions are central to conversations; a rich set of modalities amplify and often compensate for each other. A multimodal conversational AI system answers questions, fulfills tasks, and emulates human conversations by understanding and expressing itself via multiple modalities. This paper motivates, defines, and mathematically formulates the multimodal conversational research objective. We provide a taxonomy of research required to solve the objective: multimodal representation, fusion, alignment, translation, and co-learning. We survey state-of-the-art datasets and approaches for each research area and highlight their limiting assumptions. Finally, we identify multimodal co-learning as a promising direction for multimodal conversational AI research. @@ -186,7 +186,7 @@ Open-domain Dialogue Generation: What We Can Do, Cannot Do, And Should Do Next - KatharinaKann + KatharinaKann AbteenEbrahimi JoewieKoh ShiranDudy @@ -226,7 +226,7 @@ Stylistic Response Generation by Controlling Personality Traits and Intent SougataSaha SouvikDas - RohiniSrihari + RohiniSrihari 197-211 Personality traits influence human actions and thoughts, which is manifested in day to day conversations. Although glimpses of personality traits are observable in existing open domain conversation corpora, leveraging generic language modelling for response generation overlooks the interlocutor idiosyncrasies, resulting in non-customizable personality agnostic responses. With the motivation of enabling stylistically configurable response generators, in this paper we experiment with end-to-end mechanisms to ground neural response generators based on both (i) interlocutor Big-5 personality traits, and (ii) discourse intent as stylistic control codes. Since most of the existing large scale open domain chat corpora do not include Big-5 personality traits and discourse intent, we employ automatic annotation schemes to enrich the corpora with noisy estimates of personality and intent annotations, and further assess the impact of using such features as control codes for response generation using automatic evaluation metrics, ablation studies and human judgement. Our experiments illustrate the effectiveness of this strategy resulting in improvements to existing benchmarks. Additionally, we yield two silver standard annotated corpora with intents and personality traits annotated, which can be of use to the research community. 2022.nlp4convai-1.16 @@ -240,7 +240,7 @@ YongLiu BoyangLi PeixiangZhong - ChenZhang + ChenZhang HaoWang ChunyanMiao 212-217 diff --git a/data/xml/2022.nlp4dh.xml b/data/xml/2022.nlp4dh.xml index 97d49e5244..f8ab9e7fd9 100644 --- a/data/xml/2022.nlp4dh.xml +++ b/data/xml/2022.nlp4dh.xml @@ -169,7 +169,7 @@ Towards Bootstrapping a Chatbot on Industrial Heritage through Term and Relation Extraction - MihaelArcan + MihaelArcan RoryO’Halloran CécileRobin PaulBuitelaar @@ -181,7 +181,7 @@ Non-Parametric Word Sense Disambiguation for Historical Languages - EnriqueManjavacas Arevalo + EnriqueManjavacas Arevalo LaurenFonteyn 123–134 Recent approaches to Word Sense Disambiguation (WSD) have profited from the enhanced contextualized word representations coming from contemporary Large Language Models (LLMs). This advancement is accompanied by a renewed interest in WSD applications in Humanities research, where the lack of suitable, specific WSD-annotated resources is a hurdle in developing ad-hoc WSD systems. Because they can exploit sentential context, LLMs are particularly suited for disambiguation tasks. Still, the application of LLMs is often limited to linear classifiers trained on top of the LLM architecture. In this paper, we follow recent developments in non-parametric learning and show how LLMs can be efficiently fine-tuned to achieve strong few-shot performance on WSD for historical languages (English and Dutch, date range: 1450-1950). We test our hypothesis using (i) a large, general evaluation set taken from large lexical databases, and (ii) a small real-world scenario involving an ad-hoc WSD task. Moreover, this paper marks the release of GysBERT, a LLM for historical Dutch. diff --git a/data/xml/2022.nlp4pi.xml b/data/xml/2022.nlp4pi.xml index d87588dd5a..a396ee7677 100644 --- a/data/xml/2022.nlp4pi.xml +++ b/data/xml/2022.nlp4pi.xml @@ -7,7 +7,7 @@ DorottyaDemszky ZhijingJin MrinmayaSachan - JoelTetreault + JoelTetreault StevenWilson LuXiao JieyuZhao @@ -109,7 +109,7 @@ Modelling Persuasion through Misuse of Rhetorical Appeals AmaliePauliComputer Science, Aarhus University - LeonDerczynskiIT University of Copenhagen + LeonDerczynskiIT University of Copenhagen IraAssentDepartment of Computer Science, Aarhus University 89-100 It is important to understand how people use words to persuade each other. This helps understand debate, and detect persuasive narratives in regard to e.g. misinformation. While computational modelling of some aspects of persuasion has received some attention, a way to unify and describe the overall phenomenon of when persuasion becomes undesired and problematic, is missing. In this paper, we attempt to address this by proposing a taxonomy of computational persuasion. Drawing upon existing research and resources, this paper shows how to re-frame and re-organise current work into a coherent framework targeting the misuse of rhetorical appeals. As a study to validate these re-framings, we then train and evaluate models of persuasion adapted to our taxonomy. Our results show an application of our taxonomy, and we are able to detecting misuse of rhetorical appeals, finding that these are more often used in misinformative contexts than in true ones. diff --git a/data/xml/2022.nlpcss.xml b/data/xml/2022.nlpcss.xml index 43c7be20f7..8d5f0efbe2 100644 --- a/data/xml/2022.nlpcss.xml +++ b/data/xml/2022.nlpcss.xml @@ -63,7 +63,7 @@ Understanding Narratives from Demographic Survey Data: a Comparative Study with Multiple Neural Topic Models XiaoXuNIDI-KNAW / University of Groningen GertStulpUniversity of Groningen - AntalVan Den BoschUtrecht University + AntalVan Den BoschUtrecht University AnneGauthierNidi-knaw 33-38 Fertility intentions as verbalized in surveys are a poor predictor of actual fertility outcomes, the number of children people have. This can partly be explained by the uncertainty people have in their intentions. Such uncertainties are hard to capture through traditional survey questions, although open-ended questions can be used to get insight into people’s subjective narratives of the future that determine their intentions. Analyzing such answers to open-ended questions can be done through Natural Language Processing techniques. Traditional topic models (e.g., LSA and LDA), however, often fail to do since they rely on co-occurrences, which are often rare in short survey responses. The aim of this study was to apply and evaluate topic models on demographic survey data. In this study, we applied neural topic models (e.g. BERTopic, CombinedTM) based on language models to responses from Dutch women on their fertility plans, and compared the topics and their coherence scores from each model to expert judgments. Our results show that neural models produce topics more in line with human interpretation compared to LDA. However, the coherence score could only partly reflect on this, depending on the corpus used for calculation. This research is important because, first, it helps us develop more informed strategies on model selection and evaluation for topic modeling on survey data; and second, it shows that the field of demography has much to gain from adopting NLP methods. @@ -181,7 +181,7 @@ NikitaSoniStony Brook University WeixiWangStony Brook University ChristianLuhmannStony Brook University - H. AndrewSchwartzStony Brook University + H. AndrewSchwartzStony Brook University NaoyaInoueJapan Advanced Institute of Science and Technology 151-156 We address dissonant stance detection, classifying conflicting stance between two input statements. Computational models for traditional stance detection have typically been trained to indicate pro/con for a given target topic (e.g. gun control) and thus do not generalize well to new topics. In this paper, we systematically evaluate the generalizability of dissonant stance detection to situations where examples of the topic have not been seen at all or have only been seen a few times. We show that dissonant stance detection models trained on only 8 topics, none of which are the target topic, can perform as well as those trained only on a target topic. Further, adding non-target topics boosts performance further up to approximately 32 topics where accuracies start to plateau. Taken together, our experiments suggest dissonant stance detection models can generalize to new unanticipated topics, an important attribute for the social scientific study of social media where new topics emerge daily. diff --git a/data/xml/2022.nlperspectives.xml b/data/xml/2022.nlperspectives.xml index 3b7640d030..1c5f656248 100644 --- a/data/xml/2022.nlperspectives.xml +++ b/data/xml/2022.nlperspectives.xml @@ -21,9 +21,9 @@ Disagreement Space in Argument Analysis - AnnetteHautli-Janisz + AnnetteHautli-Janisz EllaSchad - ChrisReed + ChrisReed 1–9 For a highly subjective task such as recognising speaker intention and argumentation, the traditional way of generating gold standards is to aggregate a number of labels into a single one. However, this seriously neglects the underlying richness that characterises discourse and argumentation and is also, in some cases, straightforwardly impossible. In this paper, we present QT30nonaggr, the first corpus of non-aggregated argument annotation, which will be openly available upon publication. QT30nonaggr encompasses 10% of QT30, the largest corpus of dialogical argumentation and analysed broadcast political debate currently available with 30 episodes of BBC’s ‘Question Time’ from 2020 and 2021. Based on a systematic and detailed investigation of annotation judgements across all steps of the annotation process, we structure the disagreement space with a taxonomy of the types of label disagreements in argument annotation, identifying the categories of annotation errors, fuzziness and ambiguity. 2022.nlperspectives-1.1 @@ -36,7 +36,7 @@ AshkanKazemi NaihaoDeng StevenWilson - RadaMihalcea + RadaMihalcea 10–19 Recent studies have shown that for subjective annotation tasks, the demographics, lived experiences, and identity of annotators can have a large impact on how items are labeled. We expand on this work, hypothesizing that gender may correlate with differences in annotations for a number of NLP benchmarks, including those that are fairly subjective (e.g., affect in text) and those that are typically considered to be objective (e.g., natural language inference). We develop a robust framework to test for differences in annotation across genders for four benchmark datasets. While our results largely show a lack of statistically significant differences in annotation by males and females for these tasks, the framework can be used to analyze differences in annotation between various other demographic groups in future work. Finally, we note that most datasets are collected without annotator demographics and released only in aggregate form; we call on the community to consider annotator demographics as data is collected, and to release dis-aggregated data to allow for further work analyzing variability among annotators. 2022.nlperspectives-1.2 @@ -68,7 +68,7 @@ The Viability of Best-worst Scaling and Categorical Data Label Annotation Tasks in Detecting Implicit Bias ParkerGlenn - Cassandra L.Jacobs + Cassandra L.Jacobs MarvinThielk YiChu 32–36 @@ -107,7 +107,7 @@ ChristopherHoman Tharindu CyrilWeerasooriya LoraAroyo - ChrisWelty + ChrisWelty 56–65 Annotator disagreement is often dismissed as noise or the result of poor annotation process quality. Others have argued that it can be meaningful. But lacking a rigorous statistical foundation, the analysis of disagreement patterns can resemble a high-tech form of tea-leaf-reading. We contribute a framework for analyzing the variation of per-item annotator response distributions to data for humans-in-the-loop machine learning. We provide visualizations for, and use the framework to analyze the variance in, a crowdsourced dataset of hard-to-classify examples from the OpenImages archive. 2022.nlperspectives-1.8 @@ -118,7 +118,7 @@ SofieLabat NaomiAckaert ThomasDemeester - VeroniqueHoste + VeroniqueHoste 66–72 This pilot study employs the Wizard of Oz technique to collect a corpus of written human-computer conversations in the domain of customer service. The resulting dataset contains 192 conversations and is used to test three hypotheses related to the expression and annotation of emotions. First, we hypothesize that there is a discrepancy between the emotion annotations of the participant (the experiencer) and the annotations of our external annotator (the observer). Furthermore, we hypothesize that the personality of the participants has an influence on the emotions they expressed, and on the way they evaluated (annotated) these emotions. We found that for an external, trained annotator, not all emotion labels were equally easy to work with. We also noticed that the trained annotator had a tendency to opt for emotion labels that were more centered in the valence-arousal space, while participants made more ‘extreme’ annotations. For the second hypothesis, we discovered a positive correlation between the personality trait extraversion and the emotion dimensions valence and dominance in our sample. Finally, for the third premise, we observed a positive correlation between the internal-external agreement on emotion labels and the personality traits conscientiousness and extraversion. Our insights and findings will be used in future research to conduct a larger Wizard of Oz experiment. 2022.nlperspectives-1.9 @@ -129,7 +129,7 @@ LucyHavens BenjaminBach MelissaTerras - BeatriceAlex + BeatriceAlex 73–82 This paper presents an overview of text visualization techniques relevant for data perspectivism, aiming to facilitate analysis of annotated datasets for the datasets’ creators and stakeholders. Data perspectivism advocates for publishing non-aggregated, annotated text data, recognizing that for highly subjective tasks, such as bias detection and hate speech detection, disagreements among annotators may indicate conflicting yet equally valid interpretations of a text. While the publication of non-aggregated, annotated data makes different interpretations of text corpora available, barriers still exist to investigating patterns and outliers in annotations of the text. Techniques from text visualization can overcome these barriers, facilitating intuitive data analysis for NLP researchers and practitioners, as well as stakeholders in NLP systems, who may not have data science or computing skills. In this paper we discuss challenges with current dataset creation practices and annotation platforms, followed by a discussion of text visualization techniques that enable open-ended, multi-faceted, and iterative analysis of annotated data. 2022.nlperspectives-1.10 @@ -177,7 +177,7 @@ TiagoTimponi Torrent OliverCzulo ArthurLorenzi - ElyMatos + ElyMatos FredericoBelcavello 108–116 This paper argues in favor of the adoption of annotation practices for multimodal datasets that recognize and represent the inherently perspectivized nature of multimodal communication. To support our claim, we present a set of annotation experiments in which FrameNet annotation is applied to the Multi30k and the Flickr 30k Entities datasets. We assess the cosine similarity between the semantic representations derived from the annotation of both pictures and captions for frames. Our findings indicate that: (i) frame semantic similarity between captions of the same picture produced in different languages is sensitive to whether the caption is a translation of another caption or not, and (ii) picture annotation for semantic frames is sensitive to whether the image is annotated in presence of a caption or not. @@ -188,7 +188,7 @@ Change My Mind: How Syntax-based Hate Speech Recognizer Can Uncover Hidden Motivations Based on Different Viewpoints MicheleMastromattei ValerioBasile - Fabio MassimoZanzotto + Fabio MassimoZanzotto 117–125 Hate speech recognizers may mislabel sentences by not considering the different opinions that society has on selected topics. In this paper, we show how explainable machine learning models based on syntax can help to understand the motivations that induce a sentence to be offensive to a certain demographic group. By comparing and contrasting the results, we show the key points that make a sentence labeled as hate speech and how this varies across different ethnic groups. 2022.nlperspectives-1.15 diff --git a/data/xml/2022.nlppower.xml b/data/xml/2022.nlppower.xml index 7340667054..b25b83e5c6 100644 --- a/data/xml/2022.nlppower.xml +++ b/data/xml/2022.nlppower.xml @@ -75,7 +75,7 @@ MatthiasLindemann DanyangLiu WanqiuLong - Bonnie L.Webber + Bonnie L.Webber 42-51 Recent improvements in automatic news summarization fundamentally rely on large corpora of news articles and their summaries. These corpora are often constructed by scraping news websites, which results in including not only summaries but also other kinds of texts. Apart from more generic noise, we identify straplines as a form of text scraped from news websites that commonly turn out not to be summaries. The presence of these non-summaries threatens the validity of scraped corpora as benchmarks for news summarization. We have annotated extracts from two news sources that form part of the Newsroom corpus (Grusky et al., 2018), labeling those which were straplines, those which were summaries, and those which were both. We present a rule-based strapline detection method that achieves good performance on a manually annotated test set. Automatic evaluation indicates that removing straplines and noise from the training data of a news summarizer results in higher quality summaries, with improvements as high as 7 points ROUGE score. 2022.nlppower-1.5 @@ -100,7 +100,7 @@ Beyond Static models and test sets: Benchmarking the potential of pre-trained models across tasks and languages KabirAhuja - SandipanDandapat + SandipanDandapat SunayanaSitaram MonojitChoudhury 64-74 diff --git a/data/xml/2022.osact.xml b/data/xml/2022.osact.xml index bb3bd68e9f..7ad65f52c9 100644 --- a/data/xml/2022.osact.xml +++ b/data/xml/2022.osact.xml @@ -75,7 +75,7 @@ A Context-free <fixed-case>A</fixed-case>rabic Emoji Sentiment Lexicon (<fixed-case>CF</fixed-case>-<fixed-case>A</fixed-case>rab-<fixed-case>ESL</fixed-case>) Shatha Ali A.Hakami - RobertHendley + RobertHendley PhillipSmith 51–59 Emoji can be valuable features in textual sentiment analysis. One of the key elements of the use of emoji in sentiment analysis is the emoji sentiment lexicon. However, constructing such a lexicon is a challenging task. This is because interpreting the sentiment conveyed by these pictographic symbols is highly subjective, and differs depending upon how each person perceives them. Cultural background is considered to be one of the main factors that affects emoji sentiment interpretation. Thus, we focus in this work on targeting people from Arab cultures. This is done by constructing a context-free Arabic emoji sentiment lexicon annotated by native Arabic speakers from seven different regions (Gulf, Egypt, Levant, Sudan, North Africa, Iraq, and Yemen) to see how these Arabic users label the sentiment of these symbols without a textual context. We recruited 53 annotators (males and females) to annotate 1,069 unique emoji. Then we evaluated the reliability of the annotation for each participant by applying sensitivity (Recall) and consistency (Krippendorff’s Alpha) tests. For the analysis, we investigated the resulting emoji sentiment annotations to explore the impact of the Arabic cultural context. We analyzed this cultural reflection from different perspectives, including national affiliation, use of colour indications, animal indications, weather indications and religious impact. @@ -99,7 +99,7 @@ Classifying <fixed-case>A</fixed-case>rabic Crisis Tweets using Data Selection and Pre-trained Language Models AlaaAlharbi - MarkLee + MarkLee 71–78 User-generated Social Media (SM) content has been explored as a valuable and accessible source of data about crises to enhance situational awareness and support humanitarian response efforts. However, the timely extraction of crisis-related SM messages is challenging as it involves processing large quantities of noisy data in real-time. Supervised machine learning methods have been successfully applied to this task but such approaches require human-labelled data, which are unlikely to be available from novel and emerging crises. Supervised machine learning algorithms trained on labelled data from past events did not usually perform well when classifying a new disaster due to data variations across events. Using the BERT embeddings, we propose and investigate an instance distance-based data selection approach for adaptation to improve classifiers’ performance under a domain shift. The K-nearest neighbours algorithm selects a subset of multi-event training data that is most similar to the target event. Results show that fine-tuning a BERT model on a selected subset of data to classify crisis tweets outperforms a model that has been fine-tuned on all available source data. We demonstrated that our approach generally works better than the self-training adaptation method. Combing the self-training with our proposed classifier does not enhance the performance. 2022.osact-1.8 @@ -120,7 +120,7 @@ DamithPremasiri TharinduRanasinghe WajdiZaghouani - RuslanMitkov + RuslanMitkov 88–95 The task of machine reading comprehension (MRC) is a useful benchmark to evaluate the natural language understanding of machines. It has gained popularity in the natural language processing (NLP) field mainly due to the large number of datasets released for many languages. However, the research in MRC has been understudied in several domains, including religious texts. The goal of the Qur’an QA 2022 shared task is to fill this gap by producing state-of-the-art question answering and reading comprehension research on Qur’an. This paper describes the DTW entry to the Quran QA 2022 shared task. Our methodology uses transfer learning to take advantage of available Arabic MRC data. We further improve the results using various ensemble learning strategies. Our approach provided a partial Reciprocal Rank (pRR) score of 0.49 on the test set, proving its strong performance on the task. 2022.osact-1.10 @@ -165,7 +165,7 @@ SarahAlnefaie SanaaAlowaidi AlaaAlsaqer - EricAtwell + EricAtwell AbdulrahmanAltahhan MohammadAlsalka 120–125 @@ -265,7 +265,7 @@ Meta <fixed-case>AI</fixed-case> at <fixed-case>A</fixed-case>rabic Hate Speech 2022: <fixed-case>M</fixed-case>ulti<fixed-case>T</fixed-case>ask Learning with Self-Correction for Hate Speech Classification BadrAlKhamissi - MonaDiab + MonaDiab 186–193 In this paper, we tackle the Arabic Fine-Grained Hate Speech Detection shared task and demonstrate significant improvements over reported baselines for its three subtasks. The tasks are to predict if a tweet contains (1) Offensive language; and whether it is considered (2) Hate Speech or not and if so, then predict the (3) Fine-Grained Hate Speech label from one of six categories. Our final solution is an ensemble of models that employs multitask learning and a self-consistency correction method yielding 82.7% on the hate speech subtask—reflecting a 3.4% relative improvement compared to previous work. 2022.osact-1.24 diff --git a/data/xml/2022.paclic.xml b/data/xml/2022.paclic.xml index f36c283bb3..fa59199149 100644 --- a/data/xml/2022.paclic.xml +++ b/data/xml/2022.paclic.xml @@ -3,7 +3,7 @@ Proceedings of the 36th Pacific Asia Conference on Language, Information and Computation - ShirleyDita + ShirleyDita ArleneTrillanes Rochelle IreneLucas Association for Computational Linguistics @@ -23,7 +23,7 @@ Phu-ThinhPham DucDo An-VinhLuong - DienDinh + DienDinh 1–9 2022.paclic-1.1 vu-tran-etal-2022-integrating @@ -53,7 +53,7 @@ The Information Packaging of the Do-Constructions in <fixed-case>C</fixed-case>hinese, <fixed-case>R</fixed-case>ussian, and <fixed-case>C</fixed-case>zech - KawaiChui + KawaiChui Hsiang-LinYeh Shih-HuiLin 35–44 @@ -127,7 +127,7 @@ Improving Automatic Evaluation of Acceptability Based on Language Models with a Coarse Sentence Representation VijayDaultani - NaoakiOkazaki + NaoakiOkazaki 109–118 2022.paclic-1.13 daultani-okazaki-2022-improving @@ -184,7 +184,7 @@ Dushyant SinghChauhan MauajamaFirdaus AsifEkbal - PushpakBhattacharyya + PushpakBhattacharyya 166–174 2022.paclic-1.19 singh-etal-2022-emoji @@ -384,7 +384,7 @@ A comparison of the validity of measurement methods of the general <fixed-case>E</fixed-case>nglish proficiency by dictation and read-aloud performance KatsunoriKotani - TakehikoYoshimi + TakehikoYoshimi 388–395 2022.paclic-1.43 kotani-yoshimi-2022-comparison @@ -426,7 +426,7 @@ A Model-Theoretic Formalization of Natural Language Inference Using Neural Network and Tableau Method AyahitoSaji - YoshihideKato + YoshihideKato ShigekiMatsubara 430–437 2022.paclic-1.48 @@ -523,7 +523,7 @@ TanikSaikh SaprativaBhattacharjee AsifEkbal - PushpakBhattacharyya + PushpakBhattacharyya 525–532 2022.paclic-1.58 ghosal-etal-2022-novelty-detection @@ -697,7 +697,7 @@ KartikShinde TirthankarGhosal MuskaanSingh - OndrejBojar + OndrejBojar 691–702 2022.paclic-1.76 shinde-etal-2022-automatic @@ -783,7 +783,7 @@ DuyVu-Tran DucDo An-VinhLuong - DienDinh + DienDinh 777–782 2022.paclic-1.85 pham-etal-2022-intent @@ -792,7 +792,7 @@ Annotating Entity and Causal Relationships on <fixed-case>J</fixed-case>apanese Vehicle Recall Information Hsuan-YuKuo YoumiMa - NaoakiOkazaki + NaoakiOkazaki 783–791 2022.paclic-1.86 kuo-etal-2022-annotating @@ -802,7 +802,7 @@ Santosh KumarMishra SushantSinha SriparnaSaha - PushpakBhattacharyya + PushpakBhattacharyya 792–800 2022.paclic-1.87 mishra-etal-2022-deep @@ -844,8 +844,8 @@ Bi-directional Cross-Attention Network on <fixed-case>V</fixed-case>ietnamese Visual Question Answering Duy-MinhNguyen-Tran TungLe - Minh LeNguyen - Huy TienNguyen + Minh LeNguyen + Huy TienNguyen 834–841 2022.paclic-1.92 nguyen-tran-etal-2022-bi diff --git a/data/xml/2022.pandl.xml b/data/xml/2022.pandl.xml index 009b0e9759..28d6a1e077 100644 --- a/data/xml/2022.pandl.xml +++ b/data/xml/2022.pandl.xml @@ -5,7 +5,7 @@ Proceedings of the First Workshop on Pattern-based Approaches to NLP in the Age of Deep Learning LauraChiticariu YoavGoldberg - GusHahn-Powell + GusHahn-Powell Clayton T.Morrison AakankshaNaik RebeccaSharp @@ -51,7 +51,7 @@ SubhasishGhosh ArpitaKundu PratikSaini - TapasNayak + TapasNayak 21–28 We explore the task of generating long-form technical questions from textbooks. Semi-structured metadata of a textbook — the table of contents and the index — provide rich cues for technical question generation. Existing literature for long-form question generation focuses mostly on reading comprehension assessment, and does not use semi-structured metadata for question generation. We design unsupervised template based algorithms for generating questions based on structural and contextual patterns in the index and ToC. We evaluate our approach on textbooks on diverse subjects and show that our approach generates high quality questions of diverse types. We show that, in comparison, zero-shot question generation using pre-trained LLMs on the same meta-data has much poorer quality. 2022.pandl-1.3 @@ -63,7 +63,7 @@ OnyuPark ChanghoeHwang GwanghoonYoo - EricLaporte + EricLaporte JeesunNam 29–37 Natural language understanding (NLU) is integral to task-oriented dialog systems, but demands a considerable amount of annotated training data to increase the coverage of diverse utterances. In this study, we report the construction of a linguistic resource named FIAD (Financial Annotated Dataset) and its use to generate a Korean annotated training data for NLU in the banking customer service (CS) domain. By an empirical examination of a corpus of banking app reviews, we identified three linguistic patterns occurring in Korean request utterances: TOPIC (ENTITY, FEATURE), EVENT, and DISCOURSE MARKER. We represented them in LGGs (Local Grammar Graphs) to generate annotated data covering diverse intents and entities. To assess the practicality of the resource, we evaluate the performances of DIET-only (Intent: 0.91 /Topic [entity+feature]: 0.83), DIET+ HANBERT (I:0.94/T:0.85), DIET+ KoBERT (I:0.94/T:0.86), and DIET+ KorBERT (I:0.95/T:0.84) models trained on FIAD-generated data to extract various types of semantic items. @@ -76,7 +76,7 @@ ShinwooKim ChanghoeHwang GwanghoonYoo - EricLaporte + EricLaporte JeesunNam 38–44 We report the construction of a Korean evaluation-annotated corpus, hereafter called ‘Evaluation Annotated Dataset (EVAD)’, and its use in Aspect-Based Sentiment Analysis (ABSA) extended in order to cover e-commerce reviews containing sentiment and non-sentiment linguistic patterns. The annotation process uses Semi-Automatic Symbolic Propagation (SSP). We built extensive linguistic resources formalized as a Finite-State Transducer (FST) to annotate corpora with detailed ABSA components in the fashion e-commerce domain. The ABSA approach is extended, in order to analyze user opinions more accurately and extract more detailed features of targets, by including aspect values in addition to topics and aspects, and by classifying aspect-value pairs depending whether values are unary, binary, or multiple. For evaluation, the KoBERT and KcBERT models are trained on the annotated dataset, showing robust performances of F1 0.88 and F1 0.90, respectively, on recognition of aspect-value pairs. @@ -85,7 +85,7 @@ Accelerating Human Authorship of Information Extraction Rules - DayneFreitag + DayneFreitag JohnCadigan JohnNiekrasz RobertSasseen @@ -131,7 +131,7 @@ EnriqueNoriega-Atala RobertVacareanu GusHahn-Powell - Marco A.Valenzuela-Escárcega + Marco A.Valenzuela-Escárcega 85–93 We propose a neural-based approach for rule synthesis designed to help bridge the gap between the interpretability, precision and maintainability exhibited by rule-based information extraction systems with the scalability and convenience of statistical information extraction systems. This is achieved by avoiding placing the burden of learning another specialized language on domain experts and instead asking them to provide a small set of examples in the form of highlighted spans of text. We introduce a transformer-based architecture that drives a rule synthesis system that leverages a self-supervised approach for pre-training a large-scale language model complemented by an analysis of different loss functions and aggregation mechanisms for variable length sequences of user-annotated spans of text. The results are encouraging and point to different desirable properties, such as speed and quality, depending on the choice of loss and aggregation method. 2022.pandl-1.10 diff --git a/data/xml/2022.parlaclarin.xml b/data/xml/2022.parlaclarin.xml index f84768f337..f8f328057f 100644 --- a/data/xml/2022.parlaclarin.xml +++ b/data/xml/2022.parlaclarin.xml @@ -22,7 +22,7 @@ <fixed-case>P</fixed-case>arla<fixed-case>M</fixed-case>int <fixed-case>II</fixed-case>: The Show Must Go On MaciejOgrodniczuk PetyaOsenova - TomažErjavec + TomažErjavec DarjaFišer NikolaLjubešić ÇağrıÇöltekin @@ -149,7 +149,7 @@ <fixed-case>F</fixed-case>rame<fixed-case>AS</fixed-case>t: A Framework for Second-level Agenda Setting in Parliamentary Debates through the Lense of Comparative Agenda Topics ChristopherKlamm InesRehbein - Simone PaoloPonzetto + Simone PaoloPonzetto 92–100 This paper presents a framework for studying second-level political agenda setting in parliamentary debates, based on the selection of policy topics used by political actors to discuss a specific issue on the parliamentary agenda. For example, the COVID-19 pandemic as an agenda item can be contextualised as a health issue or as a civil rights issue, as a matter of macroeconomics or can be discussed in the context of social welfare. Our framework allows us to observe differences regarding how different parties discuss the same agenda item by emphasizing different topical aspects of the item. We apply and evaluate our framework on data from the German Bundestag and discuss the merits and limitations of our approach. In addition, we present a new annotated data set of parliamentary debates, following the coding schema of policy topics developed in the Comparative Agendas Project (CAP), and release models for topic classification in parliamentary debates. 2022.parlaclarin-1.13 @@ -188,7 +188,7 @@ TommasoAgnoloni RobertoBartolini FrancescaFrontini - SimonettaMontemagni + SimonettaMontemagni CarloMarchetti ValeriaQuochi ManuelaRuisi @@ -202,7 +202,7 @@ <fixed-case>P</fixed-case>arlament<fixed-case>P</fixed-case>arla: A Speech Corpus of <fixed-case>C</fixed-case>atalan Parliamentary Sessions BaybarsKulebi CarmeArmentano-Oller - CarlosRodriguez-Penagos + CarlosRodriguez-Penagos MartaVillegas 125–130 Recently, various end-to-end architectures of Automatic Speech Recognition (ASR) are being showcased as an important step towards providing language technologies to all languages instead of a select few such as English. However many languages are still suffering due to the “digital gap,” lacking thousands of hours of transcribed speech data openly accessible that is necessary to train modern ASR architectures. Although Catalan already has access to various open speech corpora, these corpora lack diversity and are limited in total volume. In order to address this lack of resources for Catalan language, in this work we present ParlamentParla, a corpus of more than 600 hours of speech from Catalan Parliament sessions. This corpus has already been used in training of state-of-the-art ASR systems, and proof-of-concept text-to-speech (TTS) models. In this work we explain in detail the pipeline that allows the information publicly available on the parliamentary website to be converted to a speech corpus compatible with training of ASR and possibly TTS models. diff --git a/data/xml/2022.politicalnlp.xml b/data/xml/2022.politicalnlp.xml index edd48e23a3..9d4c5f163d 100644 --- a/data/xml/2022.politicalnlp.xml +++ b/data/xml/2022.politicalnlp.xml @@ -32,7 +32,7 @@ Correlating Political Party Names in Tweets, Newspapers and Election Results EricSanders - Antalvan den Bosch + Antalvan den Bosch 8–15 Twitter has been used as a textual resource to attempt to predict the outcome of elections for over a decade. A body of literature suggests that this is not consistently possible. In this paper we test the hypothesis that mentions of political parties in tweets are better correlated with the appearance of party names in newspapers than to the intention of the tweeter to vote for that party. Five Dutch national elections are used in this study. We find only a small positive, negligible difference in Pearson’s correlation coefficient as well as in the absolute error of the relation between tweets and news, and between tweets and elections. However, we find a larger correlation and a smaller absolute error between party mentions in newspapers and the outcome of the elections in four of the five elections. This suggests that newspapers are a better starting point for predicting the election outcome than tweets. 2022.politicalnlp-1.2 @@ -41,7 +41,7 @@ Debating <fixed-case>E</fixed-case>urope: A Multilingual Multi-Target Stance Classification Dataset of Online Debates ValentinBarriere - AlexandraBalahur + AlexandraBalahur BrianRavenet 16–21 We present a new dataset of online debates in English, annotated with stance. The dataset was scraped from the “Debating Europe” platform, where users exchange opinions over different subjects related to the European Union. The dataset is composed of 2600 comments pertaining to 18 debates related to the “European Green Deal”, in a conversational setting. After presenting the dataset and the annotated sub-part, we pre-train a model for a multilingual stance classification over the X-stance dataset before fine-tuning it over our dataset, and vice-versa. The fine-tuned models are shown to improve stance classification performance on each of the datasets, even though they have different languages, topics and targets. Subsequently, we propose to enhance the performances over “Debating Europe” with an interaction-aware model, taking advantage of the online debate structure of the platform. We also propose a semi-supervised self-training method to take advantage of the imbalanced and unlabeled data from the whole website, leading to a final improvement of accuracy by 3.4% over a Vanilla XLM-R model. @@ -55,7 +55,7 @@ LeiGuo MargritBetke PrakashIshwar - Derry TantiWijaya + Derry TantiWijaya 22–31 Media framing refers to highlighting certain aspect of an issue in the news to promote a particular interpretation to the audience. Supervised learning has often been used to recognize frames in news articles, requiring a known pool of frames for a particular issue, which must be identified by communication researchers through thorough manual content analysis. In this work, we devise an unsupervised learning approach to discover the frames in news articles automatically. Given a set of news articles for a given issue, e.g., gun violence, our method first extracts frame elements from these articles using related Wikipedia articles and the Wikipedia category system. It then uses a community detection approach to identify frames from these frame elements. We discuss the effectiveness of our approach by comparing the frames it generates in an unsupervised manner to the domain-expert-derived frames for the issue of gun violence, for which a supervised learning model for frame recognition exists. 2022.politicalnlp-1.4 @@ -146,7 +146,7 @@ JoannaSzwoch MateuszStaszkow RafalRzepka - KenjiAraki + KenjiAraki 86–90 In this paper we describe a Polish news corpus as an attempt to create a filtered, organized and representative set of texts coming from contemporary online press articles from two major Polish TV news providers: commercial TVN24 and state-owned TVP Info. The process consists of web scraping, data cleaning and formatting. A random sample was selected from prepared data to perform a classification task. The random forest achieved the best prediction results out of all considered models. We believe that this dataset is a valuable contribution to existing Polish language corpora as online news are considered to be formal and relatively mistake-free, therefore, a reliable source of correct written language, unlike other online platforms such as blogs or social media. Furthermore, to our knowledge, such corpus from this period of time has not been created before. In the future we would like to expand this dataset with articles coming from other online news providers, repeat the classification task on a bigger scale, utilizing other algorithms. Our data analysis outcomes might be a relevant basis to improve research on a political polarization and propaganda techniques in media. 2022.politicalnlp-1.12 diff --git a/data/xml/2022.privatenlp.xml b/data/xml/2022.privatenlp.xml index f32a71744a..ddacd9b6b9 100644 --- a/data/xml/2022.privatenlp.xml +++ b/data/xml/2022.privatenlp.xml @@ -36,7 +36,7 @@ AtulaTejaswi Neerkaje RamitSawhney LucieFlek - AndersSogaard + AndersSogaard 12-12 The performance cost of differential privacy has, for some applications, been shown to be higher for minority groups fairness, conversely, has been shown to disproportionally compromise the privacy of members of such groups. Most work in this area has been restricted to computer vision and risk assessment. In this paper, we evaluate the impact of differential privacy on fairness across four tasks, focusing on how attempts to mitigate privacy violations and between-group performance differences interact Does privacy inhibit attempts to ensure fairness? To this end, we train epsilon, delta-differentially private models with empirical risk minimization and group distributionally robust training objectives. Consistent with previous findings, we find that differential privacy increases between-group performance differences in the baseline setting but more interestingly, differential privacy reduces between-group performance differences in the robust setting. We explain this by reinterpreting differential privacy as regularization. 2022.privatenlp-1.2 diff --git a/data/xml/2022.pvlam.xml b/data/xml/2022.pvlam.xml index 6fd0ca996f..8e8cf4d3ee 100644 --- a/data/xml/2022.pvlam.xml +++ b/data/xml/2022.pvlam.xml @@ -53,7 +53,7 @@ Cognitive States and Types of Nods TaigaMori - KristiinaJokinen + KristiinaJokinen YasuharuDen 17–25 In this paper we will study how different types of nods are related to the cognitive states of the listener. The distinction is made between nods with movement starting upwards (up-nods) and nods with movement starting downwards (down-nods) as well as between single or repetitive nods. The data is from Japanese multiparty conversations, and the results accord with the previous findings indicating that up-nods are related to the change in the listener’s cognitive state after hearing the partner’s contribution, while down-nods convey the meaning that the listener’s cognitive state is not changed. diff --git a/data/xml/2022.rapid.xml b/data/xml/2022.rapid.xml index 5602a2c4f6..ecdc8e62b7 100644 --- a/data/xml/2022.rapid.xml +++ b/data/xml/2022.rapid.xml @@ -7,7 +7,7 @@ Charalambos K.Themistocleous Kristina LundholmFors AthanasiosTsanas - Kathleen C.Fraser + Kathleen C.Fraser European Language Resources Association
Marseille, France
June @@ -24,7 +24,7 @@ ChiaraPesenti LoesVan Bemmel Roelandvan Hout - HelmerStrik + HelmerStrik 1–8 In the current study on dysarthric speech, we investigate the effect of web-based treatment, and whether there is a difference between content and function words. Since the goal of the treatment is to speak louder, without raising pitch, we focus on acoustic-phonetic features related to loudness, intensity, and pitch. We analyse dysarthric read speech from eight speakers at word level. We also investigate whether there are differences between content words and function words, and whether the treatment has a different impact on these two classes of words. Linear Mixed-Effects models show that there are differences before and after treatment, that for some speakers the treatment has the desired effect, but not for all speakers, and that the effect of the treatment on words for the two categories does not seem to be different. To a large extent, our results are in line with the results of a previous study in which the same data were analyzed in a different way, i.e. by studying intelligibility scores. 2022.rapid-1.1 @@ -66,7 +66,7 @@ Classification of <fixed-case>G</fixed-case>erman Jungian Extraversion and Introversion Texts with Assessment of Changes During the <fixed-case>COVID</fixed-case>-19 Pandemic DirkJohannßen - ChrisBiemann + ChrisBiemann DavidScheffer 31–40 The corona pandemic and countermeasures such as social distancing and lockdowns have confronted individuals with new challenges for their mental health and well-being. It can be assumed that the Jungian psychology types of extraverts and introverts react differently to these challenges. We propose a Bi-LSTM model with an attention mechanism for classifying introversion and extraversion from German tweets, which is trained on hand-labeled data created by 335 participants. With this work, we provide this novel dataset for free use and validation. The proposed model achieves solid performance with F1 = .72. Furthermore, we created a feature engineered logistic model tree (LMT) trained on hand-labeled tweets, to which the data is also made available with this work. With this second model, German tweets before and during the pandemic have been investigated. Extraverts display more positive emotions, whilst introverts show more insight and higher rates of anxiety. Even though such a model can not replace proper psychological diagnostics, it can help shed light on linguistic markers and to help understand introversion and extraversion better for a variety of applications and investigations. @@ -110,7 +110,7 @@ Data Augmentation for the Post-Stroke Speech Transcription (<fixed-case>PSST</fixed-case>) Challenge: Sometimes Less Is More JiahongYuan XingyuCai - KennethChurch + KennethChurch 71–79 We employ the method of fine-tuning wav2vec2.0 for recognition of phonemes in aphasic speech. Our effort focuses on data augmentation, by supplementing data from both in-domain and out-of-domain datasets for training. We found that although a modest amount of out-of-domain data may be helpful, the performance of the model degrades significantly when the amount of out-of-domain data is much larger than in-domain data. Our hypothesis is that fine-tuning wav2vec2.0 with a CTC loss not only learns bottom-up acoustic properties but also top-down constraints. Therefore, out-of-domain data augmentation is likely to degrade performance if there is a language model mismatch between “in” and “out” domains. For in-domain audio without ground truth labels, we found that it is beneficial to exclude samples with less confident pseudo labels. Our final model achieves 16.7% PER (phoneme error rate) on the validation set, without using a language model for decoding. The result represents a relative error reduction of 14% over the baseline model trained without data augmentation. Finally, we found that “canonicalized” phonemes are much easier to recognize than manually transcribed phonemes. 2022.rapid-1.9 diff --git a/data/xml/2022.readi.xml b/data/xml/2022.readi.xml index b145cbcf93..0be01e7ff9 100644 --- a/data/xml/2022.readi.xml +++ b/data/xml/2022.readi.xml @@ -6,7 +6,7 @@ RodrigoWilkens DavidAlfter RémiCardon - NúriaGala + NúriaGala European Language Resources Association
Marseille, France
June @@ -26,12 +26,12 @@ HakeemBeedar HaraldBerthelsen CathyChua - CatiaCucchiarini + CatiaCucchiarini BrynjarrEyjólfsson NedelinaIvanova ChristèleMaizonniaux NeasaNí Chiaráin - MannyRayner + MannyRayner JohnSloan SigurðurVigfússon Ghil’adZuckermann @@ -73,7 +73,7 @@ The <fixed-case>S</fixed-case>wedish Simplification Toolkit: – Designed with Target Audiences in Mind EvelinaRennes MarinaSantini - ArneJonsson + ArneJonsson 31–38 In this paper, we present the current version of The Swedish Simplification Toolkit. The toolkit includes computational and empirical tools that have been developed along the years to explore a still neglected area of NLP, namely the simplification of “standard” texts to meet the needs of target audiences. Target audiences, such as people affected by dyslexia, aphasia, autism, but also children and second language learners, require different types of text simplification and adaptation. For example, while individual with aphasia have difficulties in reading compounds (such as arbetsmarknadsdepartement, eng. ministry of employment), second language learners struggle with cultural-specific vocabulary (e.g. konflikträdd, eng. afraid of conflicts). The toolkit allows user to selectively decide the types of simplification that meet the specific needs of the target audience they belong to. The Swedish Simplification Toolkit is one of the first attempts to overcome the one-fits-all approach that is still dominant in Automatic Text Simplification, and proposes a set of computational methods that, used individually or in combination, may help individuals reduce reading (and writing) difficulties. 2022.readi-1.5 diff --git a/data/xml/2022.repl4nlp.xml b/data/xml/2022.repl4nlp.xml index cdbc87d361..1aee839f9e 100644 --- a/data/xml/2022.repl4nlp.xml +++ b/data/xml/2022.repl4nlp.xml @@ -15,9 +15,9 @@ IsabelleAugenstein AnnaRogers KyunghyunCho - EdwardGrefenstette + EdwardGrefenstette LauraRimell - ChrisDyer + ChrisDyer Association for Computational Linguistics
Dublin, Ireland
May @@ -57,7 +57,7 @@ When does <fixed-case>CLIP</fixed-case> generalize better than unimodal models? When judging human-centric concepts RomainBielawski BenjaminDevillers - TimVan De Cruys + TimVan De Cruys RufinVanrullen 29-38 CLIP, a vision-language network trained with a multimodal contrastive learning objective on a large dataset of images and captions, has demonstrated impressive zero-shot ability in various tasks. However, recent work showed that in comparison to unimodal (visual) networks, CLIP’s multimodal training does not benefit generalization (e.g. few-shot or transfer learning) for standard visual classification tasks such as object, street numbers or animal recognition. Here, we hypothesize that CLIP’s improved unimodal generalization abilities may be most prominent in domains that involve human-centric concepts (cultural, social, aesthetic, affective...); this is because CLIP’s training dataset is mainly composed of image annotations made by humans for other humans. To evaluate this, we use 3 tasks that require judging human-centric concepts”:” sentiment analysis on tweets, genre classification on books or movies. We introduce and publicly release a new multimodal dataset for movie genre classification. We compare CLIP’s visual stream against two visually trained networks and CLIP’s textual stream against two linguistically trained networks, as well as multimodal combinations of these networks. We show that CLIP generally outperforms other networks, whether using one or two modalities. We conclude that CLIP’s multimodal training is beneficial for both unimodal and multimodal tasks that require classification of human-centric concepts. @@ -164,7 +164,7 @@ Temporal Knowledge Graph Reasoning with Low-rank and Model-agnostic Representations IoannisDikeoulias SaadullahAmin - GünterNeumann + GünterNeumann 111-120 Temporal knowledge graph completion (TKGC) has become a popular approach for reasoning over the event and temporal knowledge graphs, targeting the completion of knowledge with accurate but missing information. In this context, tensor decomposition has successfully modeled interactions between entities and relations. Their effectiveness in static knowledge graph completion motivates us to introduce Time-LowFER, a family of parameter-efficient and time-aware extensions of the low-rank tensor factorization model LowFER. Noting several limitations in current approaches to represent time, we propose a cycle-aware time-encoding scheme for time features, which is model-agnostic and offers a more generalized representation of time. We implement our methods in a unified temporal knowledge graph embedding framework, focusing on time-sensitive data processing. The experiments show that our proposed methods perform on par or better than the state-of-the-art semantic matching models on two benchmarks. 2022.repl4nlp-1.12 diff --git a/data/xml/2022.rocling.xml b/data/xml/2022.rocling.xml index 817352dd84..f4b80504f9 100644 --- a/data/xml/2022.rocling.xml +++ b/data/xml/2022.rocling.xml @@ -33,7 +33,7 @@ Cheng-ChungFan Kuan-YuChen YuTsao - Hsin-MinWang + Hsin-MinWang Keh-YihSu 7–14 This paper constructs a Chinese dialogue-based information-seeking question answering dataset CMDQA, which is mainly applied to the scenario of getting Chinese movie related information. It contains 10K QA dialogs (40K turns in total). All questions and background documents are compiled from the Wikipedia via an Internet crawler. The answers to the questions are obtained via extracting the corresponding answer spans within the related text passage. In CMDQA, in addition to searching related documents, pronouns are also added to the question to better mimic the real dialog scenario. This dataset can test the individual performance of the information retrieval, the question answering and the question re-writing modules. This paper also provides a baseline system and shows its performance on this dataset. The experiments elucidate that it still has a big gap to catch the human performance. This dataset thus provides enough challenge for the researcher to conduct related research. @@ -92,8 +92,8 @@ Is Character Trigram Overlapping Ratio Still the Best Similarity Measure for Aligning Sentences in a Paraphrased Corpus? AleksandraSmolka - Hsin-MinWang - Jason S.Chang + Hsin-MinWang + Jason S.Chang Keh-YihSu 49–60 Sentence alignment is an essential step in studying the mapping among different language expressions, and the character trigram overlapping ratio was reported to be the most effective similarity measure in aligning sentences in the text simplification dataset. However, the appropriateness of each similarity measure depends on the characteristics of the corpus to be aligned. This paper studies if the character trigram is still a suitable similarity measure for the task of aligning sentences in a paragraph paraphrasing corpus. We compare several embedding-based and non-embeddings model-agnostic similarity measures, including those that have not been studied previously. The evaluation is conducted on parallel paragraphs sampled from the Webis-CPC-11 corpus, which is a paragraph paraphrasing dataset. Our results show that modern BERT-based measures such as Sentence-BERT or BERTScore can lead to significant improvement in this task. @@ -152,7 +152,7 @@ A Preliminary Study of the Application of Discrete Wavelet Transform Features in Conv-<fixed-case>T</fixed-case>as<fixed-case>N</fixed-case>et Speech Enhancement Model Yan-TongChen Zong-TaiWu - Jeih-WeihHung + Jeih-WeihHung 92–99 Nowadays, time-domain features have been widely used in speech enhancement (SE) networks like frequency-domain features to achieve excellent performance in eliminating noise from input utterances. This study primarily investigates how to extract information from time-domain utterances to create more effective features in speech enhancement. We present employing sub-signals dwelled in multiple acoustic frequency bands in time domain and integrating them into a unified feature set. We propose using the discrete wavelet transform (DWT) to decompose each input frame signal to obtain sub-band signals, and a projection fusion process is performed on these signals to create the ultimate features. The corresponding fusion strategy is the bi-projection fusion (BPF). In short, BPF exploits the sigmoid function to create ratio masks for two feature sources. The concatenation of fused DWT features and time features serves as the encoder output of a celebrated SE framework, fully-convolutional time-domain audio separation network (Conv-TasNet), to estimate the mask and then produce the enhanced time-domain utterances. The evaluation experiments are conducted on the VoiceBank-DEMAND and VoiceBank-QUT tasks. The experimental results reveal that the proposed method achieves higher speech quality and intelligibility than the original Conv-TasNet that uses time features only, indicating that the fusion of DWT features created from the input utterances can benefit time features to learn a superior Conv-TasNet in speech enhancement. 2022.rocling-1.12 @@ -163,7 +163,7 @@ Exploiting the compressed spectral loss for the learning of the <fixed-case>DEMUCS</fixed-case> speech enhancement network Chi-EnDai Qi-WeiHong - Jeih-WeihHung + Jeih-WeihHung 100–106 This study aims to improve a highly effective speech enhancement technique, DEMUCS, by revising the respective loss function in learning. DEMUCS, developed by Facebook Team, is built on the Wave-UNet and consists of convolutional layer encoding and decoding blocks with an LSTM layer in between. Although DEMUCS processes the input speech utterance purely in the time (wave) domain, the applied loss function consists of wave-domain L1 distance and multi-scale shorttime-Fourier-transform (STFT) loss. That is, both time- and frequency-domain features are taken into consideration in the learning of DEMUCS. In this study, we present revising the STFT loss in DEMUCS by employing the compressed magnitude spectrogram. The compression is done by either the power-law operation with a positive exponent less than one, or the logarithmic operation. We evaluate the presented novel framework on the VoiceBank-DEMAND database and task. The preliminary experimental results suggest that DEMUCS containing the power-law compressed magnitude spectral loss outperforms the original DEMUCS by providing the test utterances with higher objective quality and intelligibility scores (PESQ and STOI). Relatively, the logarithm compressed magnitude spectral loss does not benefit DEMUCS. Therefore, we reveal that DEMUCS can be further improved by properly revising the STFT terms of its loss function. 2022.rocling-1.13 @@ -229,7 +229,7 @@ Yu-HsiangTseng Chi-WeiWang Fang-ChiYeh - Shu-KaiHsieh + Shu-KaiHsieh 136–146 Non-lexical items are expressive devices used in conversations that are not words but are nevertheless meaningful. These items play crucial roles, such as signaling turn-taking or marking stances in interactions. However, as the non-lexical items do not stably correspond to written or phonological forms, past studies tend to focus on studying their acoustic properties, such as pitches and durations. In this paper, we investigate the discourse functions of non-lexical items through their acoustic properties and the phone embeddings extracted from a deep learning model. Firstly, we create a non-lexical item dataset based on the interpellation video clips from Taiwan’s Legislative Yuan. Then, we manually identify the non-lexical items and their discourse functions in the videos. Next, we analyze the acoustic properties of those items through statistical modeling and building classifiers based on phone embeddings extracted from a phone recognition model. We show that (1) the discourse functions have significant effects on the acoustic features; and (2) the classifiers built on phone embeddings perform better than the ones on conventional acoustic properties. These results suggest that phone embeddings may reflect the phonetic variations crucial in differentiating the discourse functions of non-lexical items. 2022.rocling-1.18 @@ -265,7 +265,7 @@ <fixed-case>H</fixed-case>an<fixed-case>T</fixed-case>rans: An Empirical Study on Cross-Era Transferability of <fixed-case>C</fixed-case>hinese Pre-trained Language Model Chin-TungLin - Wei-YunMa + Wei-YunMa 164–173 The pre-trained language model has recently dominated most downstream tasks in the NLP area. Particularly, bidirectional Encoder Representations from Transformers (BERT) is the most iconic pre-trained language model among the NLP tasks. Their proposed masked-language modeling (MLM) is an indispensable part of the existing pre-trained language models. Those outperformed models for downstream tasks benefited directly from the large training corpus in the pre-training stage. However, their training corpus for modern traditional Chinese was light. Most of all, the ancient Chinese corpus is still disappearance in the pre-training stage. Therefore, we aim to address this problem by transforming the annotation data of ancient Chinese into BERT style training corpus. Then we propose a pre-trained Oldhan Chinese BERT model for the NLP community. Our proposed model outperforms the original BERT model by significantly reducing perplexity scores in masked-language modeling (MLM). Also, our fine-tuning models improve F1 scores on word segmentation and part-of-speech tasks. Then we comprehensively study zero-shot cross-eras ability in the BERT model. Finally, we visualize and investigate personal pronouns in the embedding space of ancient Chinese records from four eras. We have released our code at https://github.com/ckiplab/han-transformers. 2022.rocling-1.21 @@ -277,7 +277,7 @@ Tzu-IWu Tien-HongLo Fu-AnChao - Yao-TingSung + Yao-TingSung BerlinChen 174–183 Due to the surge in global demand for English as a second language (ESL), developments of automated methods for grading speaking proficiency have gained considerable attention. This paper aims to present a computerized regime of grading the spontaneous spoken language for ESL learners. Based on the speech corpus of ESL learners recently collected in Taiwan, we first extract multi-view features (e.g., pronunciation, fluency, and prosody features) from either automatic speech recognition (ASR) transcription or audio signals. These extracted features are, in turn, fed into a tree-based classifier to produce a new set of indicative features as the input of the automated assessment system, viz. the grader. Finally, we use different machine learning models to predict ESL learners’ respective speaking proficiency and map the result into the corresponding CEFR level. The experimental results and analysis conducted on the speech corpus of ESL learners in Taiwan show that our approach holds great potential for use in automated speaking assessment, meanwhile offering more reliable predictive results than the human experts. @@ -301,7 +301,7 @@ Wen-ChaoYeh Yu-LunHsieh Yung-ChunChang - Wen-LianHsu + Wen-LianHsu 193–199 This study aims to evaluate three most popular word segmentation tool for a large Traditional Chinese corpus in terms of their efficiency, resource consumption, and cost. Specifically, we compare the performances of Jieba, CKIP, and MONPA on word segmentation, part-of-speech tagging and named entity recognition through extensive experiments. Experimental results show that MONPA using GPU for batch segmentation can greatly reduce the processing time of massive datasets. In addition, its features such as word segmentation, part-of-speech tagging, and named entity recognition are beneficial to downstream applications. 2022.rocling-1.24 @@ -334,7 +334,7 @@ Early Speech Production in Infants and Toddlers Later Diagnosed with Cerebral Palsy: A Retrospective Study Chien JuChan - Li-MeiChen + Li-MeiChen Li-WenChen 214–220 In this retrospective study, we compared the early speech development between infants with cerebral palsy (CP) and typically developing (TD) infants. The recordings of utterances were collected from two CP infants and two typically-developing (TD) infants at approximately 8 and 24 months old. The data was analyzed by volubility, consonant emergence, canonical babbling ratio (CBR), mean babbling level (MBL). The major findings show that comparing with TD group, CP group has the characteristics of: 1) lower volubility 2) CBRutter below 0.15 at 2 years old 3) MBL score below 2 at the age of 2 with a feature of above 95% in level 1 4) using consonants mainly at two oral places (bilabials and velars) and three manners of articulation (nasal, fricative, and stop) at 2 years old. @@ -353,7 +353,7 @@ Speech Timing in Typically Developing <fixed-case>M</fixed-case>andarin-Speaking Children From Ages 3 To 4 Jeng ManLew - Li-MeiChen + Li-MeiChen Yu ChingLin 230–235 This study aims to develop a better understanding of the speech timing development in Mandarin-speaking children from 3 to 4 years of age. Data were selected from two typically developing children. Four 50-min recordings were collected during 3 and 4 years old based on natural conversation among the observers, participants, and the parents, and the picture-naming task. Speech timing were measured by Praat, including speaking rate, articulation rate, mean length of utterance (MLUs), mean utterance duration, mean word duration, pause ratio, and volubility. Major findings of the current study are: 1) Five measurements (speaking rate, mean length of utterance(MLUs), mean utterance length, mean word duration and volubility) decreased with age in both children; 2) Articulation rate of both children increased with age; 3) Comparing with the findings from previous studies, pause ratio of both slightly increased with age. These findings not only contribute to a more comprehensive data for assessment, it also can be a reference in speech intervention. @@ -385,7 +385,7 @@ Hou-ChiangTseng Li-YunChang Hsueh-ChihChen - Yao-TingSung + Yao-TingSung 256–262 Feature analysis of Chinese characters plays a prominent role in “character-based” education. However, there is an urgent need for a text analysis system for processing the difficulty of composing components for characters, primarily based on Chinese learners’ performance. To meet this need, the purpose of this research was to provide such a system by adapting a data-driven approach. Based on Chen et al.’s (2011) Chinese Orthography Database, this research has designed and developed an system: Character Difficulty - Research on Multi-features (CD-ROM). This system provides three functions: (1) analyzing a text and providing its difficulty regarding Chinese characters; (2) decomposing characters into components and calculating the frequency of components based on the analyzed text; and (3) affording component-deriving characters based on the analyzed text and downloadable images as teaching materials. With these functions highlighting multi-level features of characters, this system has the potential to benefit the fields of Chinese character instruction, Chinese orthographic learning, and Chinese natural language processing. 2022.rocling-1.32 @@ -399,7 +399,7 @@ PankajDadure ParthaPakray RiyankaManna - SivajiBandyopadhyay + SivajiBandyopadhyay 263–272 Image captioning is a prominent Artificial Intelligence (AI) research area that deals with visual recognition and a linguistic description of the image. It is an interdisciplinary field concerning how computers can see and understand digital images @@ -474,7 +474,7 @@ Qiu-XiaZhang Te-YuChi Te-LunYang - Jyh-Shing RogerJang + Jyh-Shing RogerJang 321–328 This study uses training and validation data from the “ROCLING 2022 Chinese Health Care Named Entity Recognition Task” for modeling. The modeling process adopts technologies such as data augmentation and data post-processing, and uses the MacBERT pre-training model to build a dedicated Chinese medical field NER recognizer. During the fine-tuning process, we also added adversarial training methods, such as FGM and PGD, and the results of the final tuned model were close to the best team for task evaluation. In addition, by introducing mixed-precision training, we also greatly reduce the time cost of training. 2022.rocling-1.40 @@ -544,8 +544,8 @@ Overview of the <fixed-case>ROCLING</fixed-case> 2022 Shared Task for <fixed-case>C</fixed-case>hinese Healthcare Named Entity Recognition Lung-HaoLee Chao-YiChen - Liang-ChihYu - Yuen-HsienTseng + Liang-ChihYu + Yuen-HsienTseng 363–368 This paper describes the ROCLING-2022 shared task for Chinese healthcare named entity recognition, including task description, data preparation, performance metrics, and evaluation results. Among ten registered teams, seven participating teams submitted a total of 20 runs. This shared task reveals present NLP techniques for dealing with Chinese named entity recognition in the healthcare domain. All data sets with gold standards and evaluation scripts used in this shared task are publicly available for future research. 2022.rocling-1.46 diff --git a/data/xml/2022.salld.xml b/data/xml/2022.salld.xml index 76c6d1f459..1ae9fd6326 100644 --- a/data/xml/2022.salld.xml +++ b/data/xml/2022.salld.xml @@ -5,8 +5,8 @@ Proceedings of the 2nd Workshop on Sentiment Analysis and Linguistic Linked Data IlanKernerman SaraCarvalho - Carlos A.Iglesias - RacheleSprugnoli + Carlos A.Iglesias + RacheleSprugnoli European Language Resources Association
Marseille, France
June @@ -28,7 +28,7 @@
<fixed-case>O</fixed-case>-Dang! The Ontology of Dangerous Speech Messages - Marco AntonioStranisci + Marco AntonioStranisci SimonaFrenda MirkoLai OscarAraque @@ -45,7 +45,7 @@ Movie Rating Prediction using Sentiment Features JoãoRamos DiogoApóstolo - HugoGonçalo Oliveira + HugoGonçalo Oliveira 9–18 We analyze the impact of using sentiment features in the prediction of movie review scores. The effort included the creation of a new lexicon, Expanded OntoSenticNet (EON), by merging OntoSenticNet and SentiWordNet, and experiments were made on the “IMDB movie review” dataset, with the three main approaches for sentiment analysis: lexicon-based, supervised machine learning and hybrids of the previous. Hybrid approaches performed the best, demonstrating the potential of merging knowledge bases and machine learning, but supervised approaches based on review embeddings were not far. 2022.salld-1.3 @@ -54,7 +54,7 @@ Evaluating a New <fixed-case>D</fixed-case>anish Sentiment Resource: the <fixed-case>D</fixed-case>anish Sentiment Lexicon, <fixed-case>DSL</fixed-case> NinaSchneidermann - BolettePedersen + BolettePedersen 19–24 In this paper, we evaluate a new sentiment lexicon for Danish, the Danish Sentiment Lexicon (DSL), to gain input regarding how to carry out the final adjustments of the lexicon. A feature of the lexicon that differentiates it from other sentiment resources for Danish is that it is linked to a large number of other Danish lexical resources via the DDO lemma and sense inventory and the LLOD via the Danish wordnet, DanNet. We perform our evaluation on four datasets labeled with sentiments. In addition, we compare the lexicon against two existing benchmarks for Danish: the Afinn and the Sentida resources. We observe that DSL performs mostly comparably to the existing resources, but that more fine-grained explorations need to be done in order to fully exploit its possibilities given its linking properties. 2022.salld-1.4 @@ -72,7 +72,7 @@ Sentiment Analysis of <fixed-case>S</fixed-case>erbian Old Novels - RankaStanković + RankaStanković MilošKošprdić MilicaIkonić Nešić TijanaRadović diff --git a/data/xml/2022.scil.xml b/data/xml/2022.scil.xml index eea9bf6f79..68bbbfb5be 100644 --- a/data/xml/2022.scil.xml +++ b/data/xml/2022.scil.xml @@ -59,7 +59,7 @@ How well do <fixed-case>LSTM</fixed-case> language models learn filler-gap dependencies? SatoruOzaki DanYurovsky - LoriLevin + LoriLevin 76–88 2022.scil-1.6 ozaki-etal-2022-well @@ -90,7 +90,7 @@ Linguistic Complexity and Planning Effects on Word Duration in <fixed-case>H</fixed-case>indi Read Aloud Speech SidharthRanjan - RajakrishnanRajkumar + RajakrishnanRajkumar SumeetAgarwal 119–132 2022.scil-1.10 @@ -107,7 +107,7 @@ Parsing Early <fixed-case>M</fixed-case>odern <fixed-case>E</fixed-case>nglish for Linguistic Search SethKulick NevilleRyant - BeatriceSantorini + BeatriceSantorini 143–157 2022.scil-1.12 kulick-etal-2022-parsing @@ -172,7 +172,7 @@ The interaction between cognitive ease and informativeness shapes the lexicons of natural languages ThomasBrochhagen - GemmaBoleda + GemmaBoleda 217–219 2022.scil-1.20 brochhagen-boleda-2022-interaction @@ -188,7 +188,7 @@ Masked language models directly encode linguistic uncertainty - Cassandra L.Jacobs + Cassandra L.Jacobs Ryan J.Hubbard Kara D.Federmeier 225–228 @@ -208,8 +208,8 @@ OmriAbend NathanSchneider SamuelGibbon - SharonGoldwater - MarkSteedman + SharonGoldwater + MarkSteedman 235–240 2022.scil-1.24 szubert-etal-2022-universal @@ -219,7 +219,7 @@ EleonoraGualdoni AndreasMadebach ThomasBrochhagen - GemmaBoleda + GemmaBoleda 241–243 2022.scil-1.25 gualdoni-etal-2022-horse diff --git a/data/xml/2022.sdp.xml b/data/xml/2022.sdp.xml index 40f939a070..9c0837f1c7 100644 --- a/data/xml/2022.sdp.xml +++ b/data/xml/2022.sdp.xml @@ -5,7 +5,7 @@ Proceedings of the Third Workshop on Scholarly Document Processing ArmanCohan GuyFeigenblat - DayneFreitag + DayneFreitag TirthankarGhosal DrahomiraHerrmannova PetrKnoth @@ -13,7 +13,7 @@ PhilippMayr MichalShmueli-Scheuer Anitade Waard - Lucy LuWang + Lucy LuWang Association for Computational Linguistics
Gyeongju, Republic of Korea
October @@ -99,7 +99,7 @@ Incorporating the Rhetoric of Scientific Language into Sentence Embeddings using Phrase-guided Distant Supervision and Metric Learning KaitoSugimoto - AkikoAizawa + AkikoAizawa 54–68 Communicative functions are an important rhetorical feature of scientific writing. Sentence embeddings that contain such features are highly valuable for the argumentative analysis of scientific documents, with applications in document alignment, recommendation, and academic writing assistance. Moreover, embeddings can provide a possible solution to the open-set problem, where models need to generalize to new communicative functions unseen at training time. However, existing sentence representation models are not suited for detecting functional similarity since they only consider lexical or semantic similarities. To remedy this, we propose a combined approach of distant supervision and metric learning to make a representation model more aware of the functional part of a sentence. We first leverage an existing academic phrase database to label sentences automatically with their functions. Then, we train an embedding model to capture similarities and dissimilarities from a rhetorical perspective. The experimental results demonstrate that the embeddings obtained from our model are more advantageous than existing models when retrieving functionally similar sentences. We also provide an extensive analysis of the performance differences between five metric learning objectives, revealing that traditional methods (e.g., softmax cross-entropy loss and triplet loss) outperform state-of-the-art techniques. 2022.sdp-1.7 @@ -165,7 +165,7 @@ Exploiting Unary Relations with Stacked Learning for Relation Extraction YuanZhuang - EllenRiloff + EllenRiloff Kiri L.Wagstaff RaymondFrancis Matthew P.Golombek @@ -233,7 +233,7 @@ Overview of <fixed-case>MSLR</fixed-case>2022: A Shared Task on Multi-document Summarization for Literature Reviews Lucy LuWang JayDeYoung - ByronWallace + ByronWallace 175–180 We provide an overview of the MSLR2022 shared task on multi-document summarization for literature reviews. The shared task was hosted at the Third Scholarly Document Processing (SDP) Workshop at COLING 2022. For this task, we provided data consisting of gold summaries extracted from review papers along with the groups of input abstracts that were synthesized into these summaries, split into two subtasks. In total, six teams participated, making 10 public submissions, 6 to the Cochrane subtask and 4 to the MSˆ2 subtask. The top scoring systems reported over 2 points ROUGE-L improvement on the Cochrane subtask, though performance improvements are not consistently reported across all automated evaluation metrics; qualitative examination of the results also suggests the inadequacy of current evaluation metrics for capturing factuality and consistency on this task. Significant work is needed to improve system performance, and more importantly, to develop better methods for automatically evaluating performance on this task. 2022.sdp-1.20 @@ -241,11 +241,11 @@ <fixed-case>LED</fixed-case> down the rabbit hole: exploring the potential of global attention for biomedical multi-document summarisation - YuliaOtmakhova + YuliaOtmakhova Thinh HungTruong - TimothyBaldwin - TrevorCohn - KarinVerspoor + TimothyBaldwin + TrevorCohn + KarinVerspoor Jey HanLau 181–187 In this paper we report the experiments performed for the submission to the Multidocument summarisation for Literature Review (MSLR) Shared Task. In particular, we adopt Primera model to the biomedical domain by placing global attention on important biomedical entities in several ways. We analyse the outputs of 23 resulting models and report some patterns related to the presence of additional global attention, number of training steps and the inputs configuration. @@ -326,7 +326,7 @@ Overview of the <fixed-case>SV</fixed-case>-Ident 2022 Shared Task on Survey Variable Identification in Social Science Publications TornikeTsereteli Yavuz SelimKartal - Simone PaoloPonzetto + Simone PaoloPonzetto AndreaZielinski KaiEckert PhilippMayr @@ -393,7 +393,7 @@ <fixed-case>LTRC</fixed-case> @<fixed-case>M</fixed-case>u<fixed-case>P</fixed-case> 2022: Multi-Perspective Scientific Document Summarization Using Pre-trained Generation Models AshokUrlana NirmalSurange - ManishShrivastava + ManishShrivastava 279–284 The MuP-2022 shared task focuses on multiperspective scientific document summarization. Given a scientific document, with multiple reference summaries, our goal was to develop a model that can produce a generic summary covering as many aspects of the document as covered by all of its reference summaries. This paper describes our best official model, a finetuned BART-large, along with a discussion on the challenges of this task and some of our unofficial models including SOTA generation models. Our submitted model out performedthe given, MuP 2022 shared task, baselines on ROUGE-2, ROUGE-L and average ROUGE F1-scores. Code of our submission can be ac- cessed here. 2022.sdp-1.35 diff --git a/data/xml/2022.semeval.xml b/data/xml/2022.semeval.xml index 6c38744249..9840477b00 100644 --- a/data/xml/2022.semeval.xml +++ b/data/xml/2022.semeval.xml @@ -25,8 +25,8 @@ <fixed-case>S</fixed-case>emeval-2022 Task 1: <fixed-case>CODWOE</fixed-case> – Comparing Dictionaries and Word Embeddings TimotheeMickus - KeesVan Deemter - MathieuConstant + KeesVan Deemter + MathieuConstant DenisPaperno 1-14 Word embeddings have advanced the state of the art in NLP across numerous tasks. Understanding the contents of dense neural representations is of utmost interest to the computational semantics community. We propose to focus on relating these opaque word vectors with human-readable definitions, as found in dictionaries This problem naturally divides into two subtasks: converting definitions into embeddings, and converting embeddings into definitions. This task was conducted in a multilingual setting, using comparable sets of embeddings trained homogeneously. @@ -129,7 +129,7 @@ <fixed-case>RIGA</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2022 Task 1: Scaling Recurrent Neural Networks for <fixed-case>CODWOE</fixed-case> Dictionary Modeling EduardsMukans GusStrazds - GuntisBarzdins + GuntisBarzdins 82-87 Described are our two entries “emukans” and “guntis” for the definition modeling track of CODWOE SemEval-2022 Task 1. Our approach is based on careful scaling of a GRU recurrent neural network, which exhibits double descent of errors, corresponding to significant improvements also per human judgement. Our results are in the middle of the ranking table per official automatic metrics. 2022.semeval-1.9 @@ -180,7 +180,7 @@ HarishTayyar Madabushi EdwardGow-Smith MarcosGarcia - CarolinaScarton + CarolinaScarton MarcoIdiart AlineVillavicencio 107-121 @@ -193,7 +193,7 @@ <fixed-case>H</fixed-case>elsinki-<fixed-case>NLP</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2022 Task 2: A Feature-Based Approach to Multilingual Idiomaticity Detection SamiItkonen - JörgTiedemann + JörgTiedemann MathiasCreutz 122-134 This paper describes the University of Helsinki submission to the SemEval 2022 task on multilingual idiomaticity detection. Our system utilizes several models made available by HuggingFace, along with the baseline BERT model for the task. We focus on feature engineering based on properties that typically characterize idiomatic expressions. The additional features lead to improvements over the baseline and the final submission achieves 15th place out of 20 submissions. The paper provides error analysis of our model including visualisations of the contributions of individual features. @@ -265,8 +265,8 @@ <fixed-case>C</fixed-case>ardiff<fixed-case>NLP</fixed-case>-Metaphor at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2022 Task 2: Targeted Fine-tuning of Transformer-based Language Models for Idiomaticity Detection JoanneBoisson - JoseCamacho-Collados - LuisEspinosa-Anke + JoseCamacho-Collados + LuisEspinosa-Anke 169-177 This paper describes the experiments ran for SemEval-2022 Task 2, subtask A, zero-shot and one-shot settings for idiomaticity detection. Our main approach is based on fine-tuning transformer-based language models as a baseline to perform binary classification. Our system, CardiffNLP-Metaphor, ranked 8th and 7th (respectively on zero- and one-shot settings on this task. Our main contribution lies in the extensive evaluation of transformer-based language models and various configurations, showing, among others, the potential of large multilingual models over base monolingual models. Moreover, we analyse the impact of various input parameters, which offer interesting insights on how language models work in practice. 2022.semeval-1.20 @@ -423,7 +423,7 @@ <fixed-case>U</fixed-case>o<fixed-case>R</fixed-case>-<fixed-case>NCL</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2022 Task 3: Fine-Tuning the <fixed-case>BERT</fixed-case>-Based Models for Validating Taxonomic Relations ThanetMarkchom - HuizhiLiang + HuizhiLiang JiaoyanChen 260-265 In human languages, there are many presuppositional constructions that impose a constrain on the taxonomic relations between two nouns depending on their order. These constructions create a challenge in validating taxonomic relations in real-world contexts. In SemEval2022-Task3 Presupposed Taxonomies: Evaluating Neural Network Semantics (PreTENS), the organizers introduced a task regarding validating the taxonomic relations within a variety of presuppositional constructions. This task is divided into two subtasks: classification and regression. Each subtask contains three datasets in multiple languages, i.e., English, Italian and French. To tackle this task, this work proposes to fine-tune different BERT-based models pre-trained on different languages. According to the experimental results, the fine-tuned BERT-based models are effective compared to the baselines in classification. For regression, the fine-tuned models show promising performance with the possibility of improvement. @@ -483,7 +483,7 @@ <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2022 Task 4: Patronizing and Condescending Language Detection CarlaPerez-Almendros - LuisEspinosa-Anke + LuisEspinosa-Anke StevenSchockaert 298-307 This paper presents an overview of Task 4 at SemEval-2022, which was focused on detecting Patronizing and Condescending Language (PCL) towards vulnerable communities. Two sub-tasks were considered: a binary classification task, where participants needed to classify a given paragraph as containing PCL or not, and a multi-label classification task, where participants needed to identify which types of PCL are present (if any). The task attracted more than 300 participants, 77 teams and 229 valid submissions. We provide an overview of how the task was organized, discuss the techniques that were employed by the different participants, and summarize the main resulting insights about PCL detection and categorization. @@ -522,7 +522,7 @@ <fixed-case>BEIKE</fixed-case> <fixed-case>NLP</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2022 Task 4: Prompt-Based Paragraph Classification for Patronizing and Condescending Language Detection YongDeng ChenxiaoDou - LiangyuChen + LiangyuChen DeqiangMiao XianghuiSun BaochangMa @@ -595,7 +595,7 @@ <fixed-case>MS</fixed-case>@<fixed-case>IW</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2022 Task 4: Patronising and Condescending Language Detection with Synthetically Generated Data SelinaMeyer MaximilianSchmidhuber - UdoKruschwitz + UdoKruschwitz 363-368 In this description paper we outline the system architecture submitted to Task 4, Subtask 1 at SemEval-2022. We leverage the generative power of state of the art generative pretrained transformer models to increase training set size and remedy class imbalance issues. Our best submitted system is trained on a synthetically enhanced dataset with 10.3 times as many positive samples as the original dataset and reaches an F1 score of 50.62%, which is 10 percentage points higher than our initial system trained on an undersampled version of the original dataset. We explore possible reasons for the comparably low score in the overall task ranking and report on experiments conducted during the post-evaluation phase. 2022.semeval-1.47 @@ -648,7 +648,7 @@ Tesla at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2022 Task 4: Patronizing and Condescending Language Detection using Transformer-based Models with Data Augmentation SahilBhatt - ManishShrivastava + ManishShrivastava 394-399 This paper describes our system for Task 4 of SemEval 2022: Patronizing and Condescending Language (PCL) Detection. For sub-task 1, where the objective is to classify a text as PCL or non-PCL, we use a T5 Model fine-tuned on the dataset. For sub-task 2, which is a multi-label classification problem, we use a RoBERTa model fine-tuned on the dataset. Given that the key challenge in this task is classification on an imbalanced dataset, our models rely on an augmented dataset that we generate using paraphrasing. We found that these two models yield the best results out of all the other approaches we tried. 2022.semeval-1.52 @@ -703,7 +703,7 @@ Team <fixed-case>LRL</fixed-case>_<fixed-case>NC</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2022 Task 4: Binary and Multi-label Classification of <fixed-case>PCL</fixed-case> using Fine-tuned Transformer-based Models KushagriTandon - NiladriChatterjee + NiladriChatterjee 421-431 Patronizing and condescending language (PCL) can find its way into many mediums of public discourse. Presence of PCL in text can produce negative effects in the society. The challenge presented by the task emerges from the subtleties of PCL and various data dependent constraints. Hence, developing techniques to detect PCL in text, before it is propagated is vital. The aim of this paper is twofold, a) to present systems that can be used to classify a text as containing PCL or not, and b) to present systems that assign the different categories of PCL present in text. The proposed systems are primarily rooted in transformer-based pre-trained language models. Among the models submitted for Subtask 1, the best F1-Score of 0.5436 was achieved by a deep learning based ensemble model. This system secured the rank 29 in the official task ranking. For Subtask 2, the best macro-average F1-Score of 0.339 was achieved by an ensemble model combining transformer-based neural architecture with gradient boosting label-balanced classifiers. This system secured the rank 21 in the official task ranking. Among subsequently carried out experiments a variation in architecture of a system for Subtask 2 achieved a macro-average F1-Score of 0.3527. 2022.semeval-1.57 @@ -875,7 +875,7 @@ <fixed-case>JCT</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2022 Task 4-A: Patronism Detection in Posts Written in <fixed-case>E</fixed-case>nglish using Preprocessing Methods and various Machine Leaerning Methods - YaakovHaCohen-Kerner + YaakovHaCohen-Kerner IlanMeyrowitsch MatanFchima 519-524 @@ -903,7 +903,7 @@ BertaChulvi PaoloRosso AlyssaLees - JeffreySorensen + JeffreySorensen 533-549 The paper describes the SemEval-2022 Task 5: Multimedia Automatic Misogyny Identification (MAMI),which explores the detection of misogynous memes on the web by taking advantage of available texts and images. The task has been organised in two related sub-tasks: the first one is focused on recognising whether a meme is misogynous or not (Sub-task A), while the second one is devoted to recognising types of misogyny (Sub-task B). MAMI has been one of the most popular tasks at SemEval-2022 with more than 400 participants, 65 teams involved in Sub-task A and 41 in Sub-task B from 13 countries. The MAMI challenge received 4214 submitted runs (of which 166 uploaded on the leader-board), denoting an enthusiastic participation for the proposed problem. The collection and annotation is described for the task dataset. The paper provides an overview of the systems proposed for the challenge, reports the results achieved in both sub-tasks and outlines a description of the main errors for a comprehension of the systems capabilities and for detailing future research perspectives. 2022.semeval-1.74 @@ -961,7 +961,7 @@ RajalakshmiSivanaiah AngelS Sakaya MiltonRajendram - MirnalineeT T + MirnalineeT T 571-574 Research is progressing in a fast manner in the field of offensive, hate speech, abusive and sarcastic data. Tackling hate speech against women is urgent and really needed to give respect to the lady of our life. This paper describes the system used for identifying misogynous content using images and text. The system developed by the team TECHSSN uses transformer models to detect the misogynous content from text and Convolutional Neural Network model for image data. Various models like BERT, ALBERT, XLNET and CNN are explored and the combination of ALBERT and CNN as an ensemble model provides better results than the rest. This system was developed for the task 5 of the competition, SemEval 2022. 2022.semeval-1.78 @@ -1081,7 +1081,7 @@ taochen at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2022 Task 5: Multimodal Multitask Learning and Ensemble Learning ChenTao - Jung-jaeKim + Jung-jaeKim 648-653 We present a multi-modal deep learning system for the Multimedia Automatic Misogyny Identification (MAMI) challenge, a SemEval task of identifying and classifying misogynistic messages in online memes. We adapt multi-task learning for the multimodal subtasks of the MAMI challenge to transfer knowledge among the correlated subtasks. We also leverage on ensemble learning for synergistic integration of models individually trained for the subtasks. We finally discuss about errors of the system to provide useful insights for future work. 2022.semeval-1.89 @@ -1422,7 +1422,7 @@ RajalakshmiSivanaiah AngelS Sakaya MiltonRajendram - MirnalineeT T + MirnalineeT T 851-855 Irony detection in the social media is an upcoming research which places a main role in sentiment analysis and offensive language identification. Sarcasm is one form of irony that is used to provide intended comments against realism. This paper describes a method to detect intended sarcasm in text (SemEval-2022 Task 6). The TECHSSN team used Bidirectional Encoder Representations from Transformers (BERT) models and its variants to classify the text as sarcastic or non-sarcastic in English and Arabic languages. The data is preprocessed and fed to the model for training. The transformer models learn the weights during the training phase from the given dataset and predicts the output class labels for the unseen test data. 2022.semeval-1.118 @@ -1457,7 +1457,7 @@ <fixed-case>U</fixed-case>o<fixed-case>R</fixed-case>-<fixed-case>NCL</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2022 Task 6: Using ensemble loss with <fixed-case>BERT</fixed-case> for intended sarcasm detection EmmanuelOsei-Brefo - HuizhiLiang + HuizhiLiang 871-876 Sarcasm has gained notoriety for being difficult to detect by machine learning systems due to its figurative nature. In this paper, Bidirectional Encoder Representations from Transformers (BERT) model has been used with ensemble loss made of cross-entropy loss and negative log-likelihood loss to classify whether a given sentence is in English and Arabic tweets are sarcastic or not. From the results obtained in the experiments, our proposed BERT with ensemble loss achieved superior performance when applied to English and Arabic test datasets. For the validation dataset, our model performed better on the Arabic dataset but failed to outperform the baseline method (made of BERT with only a single loss function) when applied on the English validation set. 2022.semeval-1.121 @@ -1504,7 +1504,7 @@ <fixed-case>A</fixed-case>lex<fixed-case>U</fixed-case>-<fixed-case>AL</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2022 Task 6: Detecting Sarcasm in <fixed-case>A</fixed-case>rabic Text Using Deep Learning Techniques AyaLotfy MarwanTorki - NagwaEl-Makky + NagwaEl-Makky 891-895 Sarcasm detection is an important task in Natural Language Understanding. Sarcasm is a form of verbal irony that occurs when there is a discrepancy between the literal and intended meanings of an expression. In this paper, we use the tweets of the Arabic dataset provided by SemEval-2022 task 6 to train deep learning classifiers to solve the sub-tasks A and C associated with the dataset. Sub-task A is to determine if the tweet is sarcastic or not. For sub-task C, given a sarcastic text and its non-sarcastic rephrase, i.e. two texts that convey the same meaning, determine which is the sarcastic one. In our solution, we utilize fine-tuned MARBERT (Abdul-Mageed et al., 2021) model with an added single linear layer on top for classification. The proposed solution achieved 0.5076 F1-sarcastic in Arabic sub-task A, accuracy of 0.7450 and F-score of 0.7442 in Arabic sub-task C. We achieved the 2^{nd} and the 9^{th} places for Arabic sub-tasks A and C respectively. 2022.semeval-1.125 @@ -1593,7 +1593,7 @@ <fixed-case>TUG</fixed-case>-<fixed-case>CIC</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2021 Task 6: Two-stage Fine-tuning for Intended Sarcasm Detection JasonAngel SegunAroyehun - AlexanderGelbukh + AlexanderGelbukh 951-955 We present our systems and findings for the iSarcasmEval: Intended Sarcasm Detection In English and Arabic at SEMEVAL 2022. Specifically we take part in Subtask A for the English language. The task aims to determine whether a text from social media (a tweet) is sarcastic or not. We model the problem using knowledge sources, a pre-trained language model on sentiment/emotion data and a dataset focused on intended sarcasm. Our submission ranked third place among 43 teams. In addition, we show a brief error analysis of our best model to investigate challenging examples for detecting sarcasm. 2022.semeval-1.133 @@ -1627,7 +1627,7 @@ <fixed-case>FII</fixed-case> <fixed-case>UAIC</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2022 Task 6: i<fixed-case>S</fixed-case>arcasm<fixed-case>E</fixed-case>val - Intended Sarcasm Detection in <fixed-case>E</fixed-case>nglish and <fixed-case>A</fixed-case>rabic TudorManoleasa - DanielaGifu + DanielaGifu IustinSandu 970-977 The “iSarcasmEval - Intended Sarcasm Detection in English and Arabic” task at the SemEval 2022 competition focuses on detectingand rating the distinction between intendedand perceived sarcasm in the context of textual sarcasm detection, as well as the level ofirony contained in these texts. In the contextof SemEval, we present a binary classificationmethod which classifies the text as sarcasticor non-sarcastic (task A, for English) based onfive classical machine learning approaches bytrying to train the models based on this datasetsolely (i.e., no other datasets have been used).This process indicates low performance compared to previously studied datasets, which in2dicates that the previous ones might be biased. @@ -1650,7 +1650,7 @@ <fixed-case>LT</fixed-case>3 at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2022 Task 6: Fuzzy-Rough Nearest Neighbor Classification for Sarcasm Detection OlhaKaminska ChrisCornelis - VeroniqueHoste + VeroniqueHoste 987-992 This paper describes the approach developed by the LT3 team in the Intended Sarcasm Detection task at SemEval-2022 Task 6. We considered the binary classification subtask A for English data. The presented system is based on the fuzzy-rough nearest neighbor classification method using various text embedding techniques. Our solution reached 9th place in the official leader-board for English subtask A. 2022.semeval-1.138 @@ -1734,7 +1734,7 @@ <fixed-case>JCT</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2022 Task 6-A: Sarcasm Detection in Tweets Written in <fixed-case>E</fixed-case>nglish and <fixed-case>A</fixed-case>rabic using Preprocessing Methods and Word N-grams - YaakovHaCohen-Kerner + YaakovHaCohen-Kerner MatanFchima IlanMeyrowitsch 1031-1038 @@ -1870,7 +1870,7 @@ FabianFlöck DevinGaffney PrzemyslawGrabowicz - Scott A.Hale + Scott A.Hale DavidJurgens MattiaSamory 1094-1106 @@ -1883,7 +1883,7 @@ <fixed-case>EMBEDDIA</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2022 Task 8: Investigating Sentence, Image, and Knowledge Graph Representations for Multilingual News Article Similarity ElaineZosa - EmanuelaBoros + EmanuelaBoros BoshkoKoloski LidiaPivovarova 1107-1113 @@ -1909,9 +1909,9 @@ <fixed-case>G</fixed-case>ate<fixed-case>NLP</fixed-case>-<fixed-case>US</fixed-case>hef at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2022 Task 8: Entity-Enriched <fixed-case>S</fixed-case>iamese Transformer for Multilingual News Article Similarity IknoorSingh - YueLi + YueLi MelissaThong - CarolinaScarton + CarolinaScarton 1121-1128 This paper describes the second-placed system on the leaderboard of SemEval-2022 Task 8: Multilingual News Article Similarity. We propose an entity-enriched Siamese Transformer which computes news article similarity based on different sub-dimensions, such as the shared narrative, entities, location and time of the event discussed in the news article. Our system exploits a Siamese network architecture using a Transformer encoder to learn document-level representations for the purpose of capturing the narrative together with the auxiliary entity-based features extracted from the news articles. The intuition behind using all these features together is to capture the similarity between news articles at different granularity levels and to assess the extent to which different news outlets write about “the same events”. Our experimental results and detailed ablation study demonstrate the effectiveness and the validity of our proposed method. 2022.semeval-1.158 @@ -2022,7 +2022,7 @@ HongqingXu ShuzheZhou BohanChen - ChengjieSun + ChengjieSun YuanchaoLiu 1184-1189 This article introduces a system to solve the SemEval 2022 Task 8: Multilingual News Article Similarity. The task focuses on the consistency of events reported in two news articles. The system consists of a pre-trained model(e.g., INFOXLM and XLM-RoBERTa) to extract multilingual news features, following fully-connected networks to measure the similarity. In addition, data augmentation and Ten Fold Voting are used to enhance the model. Our final submitted model is an ensemble of three base models, with a Pearson value of 0.784 on the test dataset. @@ -2140,7 +2140,7 @@ KelleyLynch RichardBrutti RobertoNavigli - JamesPustejovsky + JamesPustejovsky 1244-1255 In this task, we identify a challenge that is reflective of linguistic and cognitive competencies that humans have when speaking and reasoning. Particularly, given the intuition that textual and visual information mutually inform each other for semantic reasoning, we formulate a Competence-based Question Answering challenge, designed to involve rich semantic annotation and aligned text-video objects. The task is to answer questions from a collection of cooking recipes and videos, where each question belongs to a “question family” reflecting a specific reasoning competence. The data and task result is publicly available. 2022.semeval-1.176 @@ -2153,7 +2153,7 @@ WeiheZhai MingqiangFeng ArkaitzZubiaga - BingquanLiu + BingquanLiu 1256-1262 This paper presents the second place system for the R2VQ: competence-based multimodal question answering shared task. The purpose of this task is to involve semantic&cooking roles and text-images objects when querying how well a system understands the procedure of a recipe. This task is approached with text-to-text generative model based on transformer architecture. As a result, the model can well generalise to soft constrained and other competence-based question answering problem. We propose label enclosed input method which help the model achieve significant improvement from 65.34 (baseline) to 91.3. In addition to describing the submitted system, the impact of model architecture and label selection are investigated along with remarks regarding error analysis. Finally, future works are presented. 2022.semeval-1.177 @@ -2202,8 +2202,8 @@ EnricaTroiano AndreyKutuzov JanBuchmann - RodrigoAgerri - LiljaØvrelid + RodrigoAgerri + LiljaØvrelid ErikVelldal 1280-1295 In this paper, we introduce the first SemEval shared task on Structured Sentiment Analysis, for which participants are required to predict all sentiment graphs in a text, where a single sentiment graph is composed of a sentiment holder, target, expression and polarity. This new shared task includes two subtracks (monolingual and cross-lingual) with seven datasets available in five languages, namely Norwegian, Catalan, Basque, Spanish and English. Participants submitted their predictions on a held-out test set and were evaluated on Sentiment Graph F1 . Overall, the task received over 200 submissions from 32 participating teams. We present the results of the 15 teams that provided system descriptions and our own expanded analysis of the test predictions. @@ -2255,7 +2255,7 @@ AngelS RajalakshmiSivanaiah Sakaya MiltonRajendram - MirnalineeT T + MirnalineeT T 1324-1328 Task 10 in SemEval 2022 is a composite task which entails analysis of opinion tuples, and recognition and demarcation of their nature. In this paper, we will elaborate on how such a methodology is implemented, how it is undertaken for a Structured Sentiment Analysis, and the results obtained thereof. To achieve this objective, we have adopted a bi-layered BiLSTM approach. In our research, a variation on the norm has been effected towards enhancement of accuracy, by basing the categorization meted out to an individual member as a by-product of its adjacent members, using specialized algorithms to ensure the veracity of the output, which has been modelled to be the holistically most accurate label for the entire sequence. Such a strategy is superior in terms of its parsing accuracy and requires less time. This manner of action has yielded an SF1 of 0.33 in the highest-performing configuration. 2022.semeval-1.184 @@ -2399,7 +2399,7 @@ <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2022 Task 11: Multilingual Complex Named Entity Recognition (<fixed-case>M</fixed-case>ulti<fixed-case>C</fixed-case>o<fixed-case>NER</fixed-case>) - ShervinMalmasi + ShervinMalmasi AnjieFang BesnikFetahu SudiptaKar @@ -2442,7 +2442,7 @@ Renzo M.Rivera-Zavala PalomaMartinez ClaudiaMoro - EmersonParaiso + EmersonParaiso 1448-1456 This study introduces the system submitted to the SemEval 2022 Task 11: MultiCoNER (Multilingual Complex Named Entity Recognition) by the UC3M-PUCPR team. We proposed an ensemble of transformer-based models for entity recognition in cross-domain texts. Our deep learning method benefits from the transformer architecture, which adopts the attention mechanism to handle the long-range dependencies of the input text. Also, the ensemble approach for named entity recognition (NER) improved the results over baselines based on individual models on two of the three tracks we participated in. The ensemble model for the code-mixed task achieves an overall performance of 76.36% F1-score, a 2.85 percentage point increase upon our individually best model for this task, XLM-RoBERTa-large (73.51%), outperforming the baseline provided for the shared task by 18.26 points. Our preliminary results suggest that contextualized language models ensembles can, even if modestly, improve the results in extracting information from unstructured data. 2022.semeval-1.199 @@ -2520,7 +2520,7 @@ Sliced at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2022 Task 11: Bigger, Better? Massively Multilingual <fixed-case>LM</fixed-case>s for Multilingual Complex <fixed-case>NER</fixed-case> on an Academic <fixed-case>GPU</fixed-case> Budget - BarbaraPlank + BarbaraPlank 1494-1500 Massively multilingual language models (MMLMs) have become a widely-used representation method, and multiple large MMLMs were proposed in recent years. A trend is to train MMLMs on larger text corpora or with more layers. In this paper we set out to test recent popular MMLMs on detecting semantically ambiguous and complex named entities with an academic GPU budget. Our submission of a single model for 11 languages on the SemEval Task 11 MultiCoNER shows that a vanilla transformer-CRF with XLM-R_{large} outperforms the more recent RemBERT, ranking 9th from 26 submissions in the multilingual track. Compared to RemBERT, the XLM-R model has the additional advantage to fit on a slice of a multi-instance GPU. As contrary to expectations and recent findings, we found RemBERT to not be the best MMLM, we further set out to investigate this discrepancy with additional experiments on multilingual Wikipedia NER data. While we expected RemBERT to have an edge on that dataset as it is closer to its pre-training data, surprisingly, our results show that this is not the case, suggesting that text domain match does not explain the discrepancy. 2022.semeval-1.205 @@ -2779,9 +2779,9 @@ L3i at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2022 Task 11: Straightforward Additional Context for Multilingual Named Entity Recognition - EmanuelaBoros + EmanuelaBoros Carlos-EmilianoGonzález-Gallardo - JoseMoreno + JoseMoreno AntoineDoucet 1630-1638 This paper summarizes the participation of the L3i laboratory of the University of La Rochelle in the SemEval-2022 Task 11, Multilingual Complex Named Entity Recognition (MultiCoNER). The task focuses on detecting semantically ambiguous and complex entities in short and low-context monolingual and multilingual settings. We argue that using a language-specific and a multilingual language model could improve the performance of multilingual and mixed NER. Also, we consider that using additional contexts from the training set could improve the performance of a NER on short texts. Thus, we propose a straightforward technique for generating additional contexts with and without the presence of entities. Our findings suggest that, in our internal experimental setup, this approach is promising. However, we ranked above average for the high-resource languages and lower than average for low-resource and multilingual models. diff --git a/data/xml/2022.sigdial.xml b/data/xml/2022.sigdial.xml index 51f2371347..f5a388b253 100644 --- a/data/xml/2022.sigdial.xml +++ b/data/xml/2022.sigdial.xml @@ -4,7 +4,7 @@ Proceedings of the 23rd Annual Meeting of the Special Interest Group on Discourse and Dialogue OliverLemon - DilekHakkani-Tur + DilekHakkani-Tur Junyi JessyLi ArashAshrafzadeh Daniel HernándezGarcia @@ -48,7 +48,7 @@ Knowledge-Grounded Conversational Data Augmentation with Generative Conversational Networks Yen TingLin - AlexandrosPapangelis + AlexandrosPapangelis SeokhwanKim DilekHakkani-Tur 26–38 @@ -62,7 +62,7 @@ Guiding the Release of Safer <fixed-case>E</fixed-case>2<fixed-case>E</fixed-case> Conversational <fixed-case>AI</fixed-case> through Value Sensitive Design A. StevieBergman GavinAbercrombie - ShannonSpruit + ShannonSpruit DirkHovy EmilyDinan Y-LanBoureau @@ -122,7 +122,7 @@ <fixed-case>Q</fixed-case>uality<fixed-case>A</fixed-case>dapt: an Automatic Dialogue Quality Estimation Framework JohnMendonca - AlonLavie + AlonLavie IsabelTrancoso 83–90 Despite considerable advances in open-domain neural dialogue systems, their evaluation remains a bottleneck. Several automated metrics have been proposed to evaluate these systems, however, they mostly focus on a single notion of quality, or, when they do combine several sub-metrics, they are computationally expensive. This paper attempts to solve the latter: QualityAdapt leverages the Adapter framework for the task of Dialogue Quality Estimation. Using well defined semi-supervised tasks, we train adapters for different subqualities and score generated responses with AdapterFusion. This compositionality provides an easy to adapt metric to the task at hand that incorporates multiple subqualities. It also reduces computational costs as individual predictions of all subqualities are obtained in a single forward pass. This approach achieves comparable results to state-of-the-art metrics on several datasets, whilst keeping the previously mentioned advantages. @@ -134,8 +134,8 @@ Graph Neural Network Policies and Imitation Learning for Multi-Domain Task-Oriented Dialogues ThibaultCordier TanguyUrvoy - FabriceLefèvre - Lina M.Rojas Barahona + FabriceLefèvre + Lina M.Rojas Barahona 91–100 Task-oriented dialogue systems are designed to achieve specific goals while conversing with humans. In practice, they may have to handle simultaneously several domains and tasks. The dialogue manager must therefore be able to take into account domain changes and plan over different domains/tasks in order to deal with multi-domain dialogues. However, learning with reinforcement in such context becomes difficult because the state-action dimension is larger while the reward signal remains scarce. Our experimental results suggest that structured policies based on graph neural networks combined with different degrees of imitation learning can effectively handle multi-domain dialogues. The reported experiments underline the benefit of structured policies over standard policies. 2022.sigdial-1.10 @@ -172,7 +172,7 @@ Dialog Acts for Task Driven Embodied Agents SpandanaGella AishwaryaPadmakumar - PatrickLange + PatrickLange DilekHakkani-Tur 111–123 Embodied agents need to be able to interact in natural language – understanding task descriptions and asking appropriate follow up questions to obtain necessary information to be effective at successfully accomplishing tasks for a wide range of users. In this work, we propose a set of dialog acts for modelling such dialogs and annotate the TEACh dataset that includes over 3,000 situated, task oriented conversations (consisting of 39.5k utterances in total) with dialog acts. To our knowledge,TEACh-DA is the first large scale dataset of dialog act annotations for embodied task completion. Furthermore, we demonstrate the use of this annotated dataset in training models for tagging the dialog acts of a given utterance, predicting the dialog act of the next response given a dialog history, and use the dialog acts to guide agent’s non-dialog behaviour. In particular, our experiments on the TEACh Execution from Dialog History task where the model predicts the sequence of low level actions to be executed in the environment for embodied task completion, demonstrate that dialog acts can improve end performance by up to 2 points compared to the system without dialog acts. @@ -185,7 +185,7 @@ Symbol and Communicative Grounding through Object Permanence with a Mobile Robot JosueTorres-Fonseca CatherineHenry - CaseyKennington + CaseyKennington 124–134 Object permanence is the ability to form and recall mental representations of objects even when they are not in view. Despite being a crucial developmental step for children, object permanence has had only some exploration as it relates to symbol and communicative grounding in spoken dialogue systems. In this paper, we leverage SLAM as a module for tracking object permanence and use a robot platform to move around a scene where it discovers objects and learns how they are denoted. We evaluated by comparing our system’s effectiveness at learning words from human dialogue partners both with and without object permanence. We found that with object permanence, human dialogue partners spoke with the robot and the robot correctly identified objects it had learned about significantly more than without object permanence, which suggests that object permanence helped facilitate communicative and symbol grounding. 2022.sigdial-1.14 @@ -215,7 +215,7 @@ PanLu WeiyanShi ZhouYu - Song-ChunZhu + Song-ChunZhu 146–158 Building a socially intelligent agent involves many challenges. One of which is to track the agent’s mental state transition and teach the agent to make decisions guided by its value like a human. Towards this end, we propose to incorporate mental state simulation and value modeling into dialogue agents. First, we build a hybrid mental state parser that extracts information from both the dialogue and event observations and maintains a graphical representation of the agent’s mind; Meanwhile, the transformer-based value model learns human preferences from the human value dataset, ValueNet. Empirical results show that the proposed model attains state-of-the-art performance on the dialogue/action/emotion prediction task in the fantasy text-adventure game dataset, LIGHT. We also show example cases to demonstrate: (i) how the proposed mental state parser can assist the agent’s decision by grounding on the context like locations and objects, and (ii) how the value model can help the agent make decisions based on its personal priorities. 2022.sigdial-1.16 @@ -227,7 +227,7 @@ Automatic Verbal Depiction of a Brick Assembly for a Robot Instructing Humans RamiYounes GérardBailly - FredericElisei + FredericElisei DamienPellier 159–171 Verbal and nonverbal communication skills are essential for human-robot interaction, in particular when the agents are involved in a shared task. We address the specific situation when the robot is the only agent knowing about the plan and the goal of the task and has to instruct the human partner. The case study is a brick assembly. We here describe a multi-layered verbal depictor whose semantic, syntactic and lexical settings have been collected and evaluated via crowdsourcing. One crowdsourced experiment involves a robot instructed pick-and-place task. We show that implicitly referring to achieved subgoals (stairs, pillows, etc) increases performance of human partners. @@ -251,7 +251,7 @@ <fixed-case>EDU</fixed-case>-<fixed-case>AP</fixed-case>: Elementary Discourse Unit based Argument Parser SougataSaha SouvikDas - RohiniSrihari + RohiniSrihari 183–192 Neural approaches to end-to-end argument mining (AM) are often formulated as dependency parsing (DP), which relies on token-level sequence labeling and intricate post-processing for extracting argumentative structures from text. Although such methods yield reasonable results, operating solely with tokens increases the possibility of discontinuous and overly segmented structures due to minor inconsistencies in token level predictions. In this paper, we propose EDU-AP, an end-to-end argument parser, that alleviates such problems in dependency-based methods by exploiting the intrinsic relationship between elementary discourse units (EDUs) and argumentative discourse units (ADUs) and operates at both token and EDU level granularity. Further, appropriately using contextual information, along with optimizing a novel objective function during training, EDU-AP achieves significant improvements across all four tasks of AM compared to existing dependency-based methods. 2022.sigdial-1.19 @@ -276,7 +276,7 @@ QingyangWu SongFeng DerekChen - SachindraJoshi + SachindraJoshi LuisLastras ZhouYu 204–216 @@ -290,7 +290,7 @@ When can <fixed-case>I</fixed-case> Speak? Predicting initiation points for spoken dialogue agents SiyanLi AshwinParanjape - ChristopherManning + ChristopherManning 217–224 Current spoken dialogue systems initiate their turns after a long period of silence (700-1000ms), which leads to little real-time feedback, sluggish responses, and an overall stilted conversational flow. Humans typically respond within 200ms and successfully predicting initiation points in advance would allow spoken dialogue agents to do the same. In this work, we predict the lead-time to initiation using prosodic features from a pre-trained speech representation model (wav2vec 1.0) operating on user audio and word features from a pre-trained language model (GPT-2) operating on incremental transcriptions. To evaluate errors, we propose two metrics w.r.t. predicted and true lead times. We train and evaluate the models on the Switchboard Corpus and find that our method outperforms features from prior work on both metrics and vastly outperforms the common approach of waiting for 700ms of silence. 2022.sigdial-1.22 @@ -300,7 +300,7 @@ Using Interaction Style Dimensions to Characterize Spoken Dialog Corpora - NigelWard + NigelWard 225–230 The construction of spoken dialog systems today relies heavily on appropriate corpora, but corpus selection is more an art than a science. As interaction style properties govern many aspects of dialog, they have the potential to be useful for relating and comparing corpora. This paper overviews a recently-developed model of interaction styles and shows how it can be used to identify relevant corpus differences, estimate corpus similarity, and flag likely outlier dialogs. 2022.sigdial-1.23 @@ -349,7 +349,7 @@ How Well Do You Know Your Audience? Toward Socially-aware Question Generation IanStewart - RadaMihalcea + RadaMihalcea 255–269 When writing, a person may need to anticipate questions from their audience, but different social groups may ask very different types of questions. If someone is writing about a problem they want to resolve, what kind of follow-up question will a domain expert ask, and could the writer better address the expert’s information needs by rewriting their original post? In this paper, we explore the task of socially-aware question generation. We collect a data set of questions and posts from social media, including background information about the question-askers’ social groups. We find that different social groups, such as experts and novices, consistently ask different types of questions. We train several text-generation models that incorporate social information, and we find that a discrete social-representation model outperforms the text-only model when different social groups ask highly different questions from one another. Our work provides a framework for developing text generation models that can help writers anticipate the information expectations of highly different social groups. 2022.sigdial-1.27 @@ -365,7 +365,7 @@ NurulLubis Carelvan Niekerk MichaelHeck - MilicaGasic + MilicaGasic 270–282 User simulators (USs) are commonly used to train task-oriented dialogue systems via reinforcement learning. The interactions often take place on semantic level for efficiency, but there is still a gap from semantic actions to natural language, which causes a mismatch between training and deployment environment. Incorporating a natural language generation (NLG) module with USs during training can partly deal with this problem. However, since the policy and NLG of USs are optimised separately, these simulated user utterances may not be natural enough in a given context. In this work, we propose a generative transformer-based user simulator (GenTUS). GenTUS consists of an encoder-decoder structure, which means it can optimise both the user policy and natural language generation jointly. GenTUS generates both semantic actions and natural language utterances, preserving interpretability and enhancing language variation. In addition, by representing the inputs and outputs as word sequences and by using a large pre-trained language model we can achieve generalisability in feature representation. We evaluate GenTUS with automatic metrics and human evaluation. Our results show that GenTUS generates more natural language and is able to transfer to an unseen ontology in a zero-shot fashion. In addition, its behaviour can be further shaped with reinforcement learning opening the door to training specialised user simulators. 2022.sigdial-1.28 @@ -411,7 +411,7 @@ Structured Dialogue Discourse Parsing Ta-ChungChi - AlexanderRudnicky + AlexanderRudnicky 325–335 Dialogue discourse parsing aims to uncover the internal structure of a multi-participant conversation by finding all the discourse links and corresponding relations. Previous work either treats this task as a series of independent multiple-choice problems, in which the link existence and relations are decoded separately, or the encoding is restricted to only local interaction, ignoring the holistic structural information. In contrast, we propose a principled method that improves upon previous work from two perspectives: encoding and decoding. From the encoding side, we perform structured encoding on the adjacency matrix followed by the matrix-tree learning algorithm, where all discourse links and relations in the dialogue are jointly optimized based on latent tree-level distribution. From the decoding side, we perform structured inference using the modified Chiu-Liu-Edmonds algorithm, which explicitly generates the labeled multi-root non-projective spanning tree that best captures the discourse structure. In addition, unlike in previous work, we do not rely on hand-crafted features; this improves the model’s robustness. Experiments show that our method achieves new state-of-the-art, surpassing the previous model by 2.3 on STAC and 1.5 on Molweni (F1 scores). 2022.sigdial-1.32 @@ -422,8 +422,8 @@ “Do you follow me?”: A Survey of Recent Approaches in Dialogue State Tracking LéoJacqmin - Lina M.Rojas Barahona - BenoitFavre + Lina M.Rojas Barahona + BenoitFavre 336–350 While communicating with a user, a task-oriented dialogue system has to track the user’s needs at each turn according to the conversation history. This process called dialogue state tracking (DST) is crucial because it directly informs the downstream dialogue policy. DST has received a lot of interest in recent years with the text-to-text paradigm emerging as the favored approach. In this review paper, we first present the task and its associated datasets. Then, considering a large number of recent publications, we identify highlights and advances of research in 2021-2022. Although neural approaches have enabled significant progress, we argue that some critical aspects of dialogue systems such as generalizability are still underexplored. To motivate future studies, we propose several research avenues. 2022.sigdial-1.33 @@ -457,7 +457,7 @@ Getting Better Dialogue Context for Knowledge Identification by Leveraging Document-level Topic Shift NhatTran - DianeLitman + DianeLitman 368–375 To build a goal-oriented dialogue system that can generate responses given a knowledge base, identifying the relevant pieces of information to be grounded in is vital. When the number of documents in the knowledge base is large, retrieval approaches are typically used to identify the top relevant documents. However, most prior work simply uses an entire dialogue history to guide retrieval, rather than exploiting a dialogue’s topical structure. In this work, we examine the importance of building the proper contextualized dialogue history when document-level topic shifts are present. Our results suggest that excluding irrelevant turns from the dialogue history (e.g., excluding turns not grounded in the same document as the current turn) leads to better retrieval results. We also propose a cascading approach utilizing the topical nature of a knowledge-grounded conversation to further manipulate the dialogue history used as input to the retrieval models. 2022.sigdial-1.36 @@ -487,7 +487,7 @@ JillianTang AvanikaNarayan GiovanniCampagna - ChristopherManning + ChristopherManning 376–395 We present Chirpy Cardinal, an open-domain social chatbot. Aiming to be both informative and conversational, our bot chats with users in an authentic, emotionally intelligent way. By integrating controlled neural generation with scaffolded, hand-written dialogue, we let both the user and bot take turns driving the conversation, producing an engaging and socially fluent experience. Deployed in the fourth iteration of the Alexa Prize Socialbot Grand Challenge, Chirpy Cardinal handled thousands of conversations per day, placing second out of nine bots with an average user rating of 3.58/5. 2022.sigdial-1.37 @@ -523,7 +523,7 @@ Entity-based De-noising Modeling for Controllable Dialogue Summarization ZhengyuanLiu - NancyChen + NancyChen 407–418 Although fine-tuning pre-trained backbones produces fluent and grammatically-correct text in various language generation tasks, factual consistency in abstractive summarization remains challenging. This challenge is especially thorny for dialogue summarization, where neural models often make inaccurate associations between personal named entities and their respective actions. To tackle this type of hallucination, we present an entity-based de-noising model via text perturbation on reference summaries. We then apply this proposed approach in beam search validation, conditional training augmentation, and inference post-editing. Experimental results on the SAMSum corpus show that state-of-the-art models equipped with our proposed method achieve generation quality improvement in both automatic evaluation and human assessment. 2022.sigdial-1.40 @@ -605,7 +605,7 @@ Carelvan Niekerk MichaelHeck ShutongFeng - MilicaGasic + MilicaGasic 478–489 Task-oriented dialogue systems aim to fulfill user goals through natural language interactions. They are ideally evaluated with human users, which however is unattainable to do at every iteration of the development phase. Simulated users could be an alternative, however their development is nontrivial. Therefore, researchers resort to offline metrics on existing human-human corpora, which are more practical and easily reproducible. They are unfortunately limited in reflecting real performance of dialogue systems. BLEU for instance is poorly correlated with human judgment, and existing corpus-based metrics such as success rate overlook dialogue context mismatches. There is still a need for a reliable metric for task-oriented systems with good generalization and strong correlation with human judgements. In this paper, we propose the use of offline reinforcement learning for dialogue evaluation based on static data. Such an evaluator is typically called a critic and utilized for policy optimization. We go one step further and show that offline RL critics can be trained for any dialogue system as external evaluators, allowing dialogue performance comparisons across various types of systems. This approach has the benefit of being corpus- and model-independent, while attaining strong correlation with human judgements, which we confirm via an interactive user trial. 2022.sigdial-1.46 @@ -621,7 +621,7 @@ BradfordMott KristaGlazewski Cindy E.Hmelo-Silver - JamesLester + JamesLester 490–499 Accurate detection and appropriate handling of disruptive talk in multi-party dialogue is essential for users to achieve shared goals. In collaborative game-based learning environments, detecting and attending to disruptive talk holds significant potential since it can cause distraction and produce negative learning experiences for students. We present a novel attention-based user-aware neural architecture for disruptive talk detection that uses a sequence dropout-based regularization mechanism. The disruptive talk detection models are evaluated with multi-party dialogue collected from 72 middle school students who interacted with a collaborative game-based learning environment. Our proposed disruptive talk detection model significantly outperforms competitive baseline approaches and shows significant potential for helping to support effective collaborative learning experiences. 2022.sigdial-1.47 @@ -634,7 +634,7 @@ SymonStevens-Guille AleksandreMaskharashvili XintongLi - MichaelWhite + MichaelWhite 500–515 We report results of experiments using BART (Lewis et al., 2019) and the Penn Discourse Tree Bank (Webber et al., 2019) (PDTB) to generate texts with correctly realized discourse relations. We address a question left open by previous research (Yung et al., 2021; Ko and Li, 2020) concerning whether conditioning the model on the intended discourse relation—which corresponds to adding explicit discourse relation information into the input to the model—improves its performance. Our results suggest that including discourse relation information in the input of the model significantly improves the consistency with which it produces a correctly realized discourse relation in the output. We compare our models’ performance to known results concerning the discourse structures found in written text and their possible explanations in terms of discourse interpretation strategies hypothesized in the psycholinguistics literature. Our findings suggest that natural language generation models based on current pre-trained Transformers will benefit from infusion with discourse level information if they aim to construct discourses with the intended relations. 2022.sigdial-1.48 @@ -647,7 +647,7 @@ XiaoyingZhang BaolinPeng JianfengGao - HelenMeng + HelenMeng 516–530 End-to-end task bots are typically learned over a static and usually limited-size corpus. However, when deployed in dynamic, changing, and open environments to interact with users, task bots tend to fail when confronted with data that deviate from the training corpus, i.e., out-of-distribution samples. In this paper, we study the problem of automatically adapting task bots to changing environments by learning from human-bot interactions with minimum or zero human annotations. We propose SL-Agent, a novel self-learning framework for building end-to-end task bots. SL-Agent consists of a dialog model and a pre-trained reward model to predict the quality of an agent response. It enables task bots to automatically adapt to changing environments by learning from the unlabeled human-bot dialog logs accumulated after deployment via reinforcement learning with the incorporated reward model. Experimental results on four well-studied dialog tasks show the effectiveness of SL-Agent to automatically adapt to changing environments, using both automatic and human evaluations. We will release code and data for further research. 2022.sigdial-1.49 @@ -696,10 +696,10 @@ Dialogue Term Extraction using Transfer Learning and Topological Data Analysis RenatoVukovic MichaelHeck - BenjaminRuppik + BenjaminRuppik Carelvan Niekerk MarcusZibrowius - MilicaGasic + MilicaGasic 564–581 Goal oriented dialogue systems were originally designed as a natural language interface to a fixed data-set of entities that users might inquire about, further described by domain, slots and values. As we move towards adaptable dialogue systems where knowledge about domains, slots and values may change, there is an increasing need to automatically extract these terms from raw dialogues or related non-dialogue data on a large scale. In this paper, we take an important step in this direction by exploring different features that can enable systems to discover realisations of domains, slots and values in dialogues in a purely data-driven fashion. The features that we examine stem from word embeddings, language modelling features, as well as topological features of the word embedding space. To examine the utility of each feature set, we train a seed model based on the widely used MultiWOZ data-set. Then, we apply this model to a different corpus, the Schema-guided dialogue data-set. Our method outperforms the previously proposed approach that relies solely on word embeddings. We also demonstrate that each of the features is responsible for discovering different kinds of content. We believe our results warrant further research towards ontology induction, and continued harnessing of topological data analysis for dialogue and natural language processing research. 2022.sigdial-1.53 @@ -747,7 +747,7 @@ Comparison of Lexical Alignment with a Teachable Robot in Human-Robot and Human-Human-Robot Interactions YuyaAsano - DianeLitman + DianeLitman MingzhiYu NikkiLobczowski TimothyNokes-Malach diff --git a/data/xml/2022.sigmorphon.xml b/data/xml/2022.sigmorphon.xml index edaa4b5e39..404ddacfc3 100644 --- a/data/xml/2022.sigmorphon.xml +++ b/data/xml/2022.sigmorphon.xml @@ -30,7 +30,7 @@ SimonTodd AnnieHuang JeremyNeedle - JenniferHay + JenniferHay JeanetteKing 12-22 We present an extension of the Morfessor Baseline model of unsupervised morphological segmentation (Creutz and Lagus, 2007) that incorporates abstract templates for reduplication, a typologically common but computationally underaddressed process. Through a detailed investigation that applies the model to Maori, the ̄ Indigenous language of Aotearoa New Zealand, we show that incorporating templates improves Morfessor’s ability to identify instances of reduplication, and does so most when there are multiple minimally-overlapping templates. We present an error analysis that reveals important factors to consider when applying the extended model and suggests useful future directions. @@ -64,7 +64,7 @@ A Masked Segmental Language Model for Unsupervised Natural Language Segmentation C.m.Downey FeiXia - Gina-AnneLevow + Gina-AnneLevow ShaneSteinert-Threlkeld 39-50 We introduce a Masked Segmental Language Model (MSLM) for joint language modeling and unsupervised segmentation. While near-perfect supervised methods have been developed for segmenting human-like linguistic units in resource-rich languages such as Chinese, many of the world’s languages are both morphologically complex, and have no large dataset of “gold” segmentations for supervised training. Segmental Language Models offer a unique approach by conducting unsupervised segmentation as the byproduct of a neural language modeling objective. However, current SLMs are limited in their scalability due to their recurrent architecture. We propose a new type of SLM for use in both unsupervised and lightly supervised segmentation tasks. The MSLM is built on a span-masking transformer architecture, harnessing a masked bidirectional modeling context and attention, as well as adding the potential for model scalability. In a series of experiments, our model outperforms the segmentation quality of recurrent SLMs on Chinese, and performs similarly to the recurrent model on English. @@ -86,7 +86,7 @@ Subword-based Cross-lingual Transfer of Embeddings from <fixed-case>H</fixed-case>indi to <fixed-case>M</fixed-case>arathi and <fixed-case>N</fixed-case>epali NiyatiBafna - ZdeněkŽabokrtský + ZdeněkŽabokrtský 61-71 Word embeddings are growing to be a crucial resource in the field of NLP for any language. This work introduces a novel technique for static subword embeddings transfer for Indic languages from a relatively higher resource language to a genealogically related low resource language. We primarily work with HindiMarathi, simulating a low-resource scenario for Marathi, and confirm observed trends on Nepali. We demonstrate the consistent benefits of unsupervised morphemic segmentation on both source and target sides over the treatment performed by fastText. Our best-performing approach uses an EM-style approach to learning bilingual subword embeddings; we also show, for the first time, that a trivial “copyand-paste” embeddings transfer based on even perfect bilingual lexicons is inadequate in capturing language-specific relationships. We find that our approach substantially outperforms the fastText baselines for both Marathi and Nepali on the Word Similarity task as well as WordNetBased Synonymy Tests; on the former task, its performance for Marathi is close to that of pretrained fastText embeddings that use three orders of magnitude more Marathi data. 2022.sigmorphon-1.7 @@ -108,8 +108,8 @@ Domain-Informed Probing of wav2vec 2.0 Embeddings for Phonetic Features PatrickCormac English - John D.Kelleher - JulieCarson-Berndsen + John D.Kelleher + JulieCarson-Berndsen 83-91 In recent years large transformer model architectures have become available which provide a novel means of generating high-quality vector representations of speech audio. These transformers make use of an attention mechanism to generate representations enhanced with contextual and positional information from the input sequence. Previous works have explored the capabilities of these models with regard to performance in tasks such as speech recognition and speaker verification, but there has not been a significant inquiry as to the manner in which the contextual information provided by the transformer architecture impacts the representation of phonetic information within these models. In this paper, we report the results of a number of probing experiments on the representations generated by the wav2vec 2.0 model’s transformer component, with regard to the encoding of phonetic categorization information within the generated embeddings. We find that the contextual information generated by the transformer’s operation results in enhanced capture of phonetic detail by the model, and allows for distinctions to emerge in acoustic data that are otherwise difficult to separate. 2022.sigmorphon-1.9 @@ -136,7 +136,7 @@ AryamanArora ViktorMartinovic KyleGorman - ZdeněkŽabokrtský + ZdeněkŽabokrtský AmarsanaaGanbold ŠárkaDohnalová MagdaŠevčíková @@ -152,7 +152,7 @@ Sharing Data by Language Family: Data Augmentation for <fixed-case>R</fixed-case>omance Language Morpheme Segmentation - LaurenLevine + LaurenLevine 117-123 This paper presents a basic character level sequence-to-sequence approach to morpheme segmentation for the following Romance languages: French, Italian, and Spanish. We experiment with adding a small set of additional linguistic features, as well as with sharing training data between sister languages for morphological categories with low performance in single language base models. We find that while the additional linguistic features were generally not helpful in this instance, data augmentation between sister languages did help to raise the scores of some individual morphological categories, but did not consistently result in an overall improvement when considering the aggregate of the categories. 2022.sigmorphon-1.12 @@ -298,7 +298,7 @@ ChangbingYang Ruixin (Ray)Yang GarrettNicolai - MiikkaSilfverberg + MiikkaSilfverberg 226-235 This paper presents experiments on morphological inflection using data from the SIGMORPHON-UniMorph 2022 Shared Task 0: Generalization and Typologically Diverse Morphological Inflection. We present a transformer inflection system, which enriches the standard transformer architecture with reverse positional encoding and type embeddings. We further apply data hallucination and lemma copying to augment training data. We train models using a two-stage procedure: (1) We first train on the augmented training data using standard backpropagation and teacher forcing. (2) We then continue training with a variant of the scheduled sampling algorithm dubbed student forcing. Our system delivers competitive performance under the small and large data conditions on the shared task datasets. 2022.sigmorphon-1.23 diff --git a/data/xml/2022.signlang.xml b/data/xml/2022.signlang.xml index 0759a3596c..1d519a4b74 100644 --- a/data/xml/2022.signlang.xml +++ b/data/xml/2022.signlang.xml @@ -4,7 +4,7 @@ Proceedings of the LREC2022 10th Workshop on the Representation and Processing of Sign Languages: Multilingual Sign Language Resources EleniEfthimiou - Stavroula-EvitaFotinea + Stavroula-EvitaFotinea ThomasHanke Julie A.Hochgesang JetteKristoffersen diff --git a/data/xml/2022.sigtyp.xml b/data/xml/2022.sigtyp.xml index 4d0f71cca9..09b4ab9a78 100644 --- a/data/xml/2022.sigtyp.xml +++ b/data/xml/2022.sigtyp.xml @@ -31,7 +31,7 @@ Word-order Typology in Multilingual <fixed-case>BERT</fixed-case>: A Case Study in Subordinate-Clause Detection DmitryNikolaev - SebastianPado + SebastianPado 11-21 The capabilities and limitations of BERT and similar models are still unclear when it comes to learning syntactic abstractions, in particular across languages. In this paper, we use the task of subordinate-clause detection within and across languages to probe these properties. We show that this task is deceptively simple, with easy gains offset by a long tail of harder cases, and that BERT’s zero-shot performance is dominated by word-order effects, mirroring the SVO/VSO/SOV typology. 2022.sigtyp-1.2 @@ -53,8 +53,8 @@ Cross-linguistic Comparison of Linguistic Feature Encoding in <fixed-case>BERT</fixed-case> Models for Typologically Different Languages - YuliaOtmakhova - KarinVerspoor + YuliaOtmakhova + KarinVerspoor Jey HanLau 27-35 Though recently there have been an increased interest in how pre-trained language models encode different linguistic features, there is still a lack of systematic comparison between languages with different morphology and syntax. In this paper, using BERT as an example of a pre-trained model, we compare how three typologically different languages (English, Korean, and Russian) encode morphology and syntax features across different layers. In particular, we contrast languages which differ in a particular aspect, such as flexibility of word order, head directionality, morphological type, presence of grammatical gender, and morphological richness, across four different tasks. @@ -111,7 +111,7 @@ Mockingbird at the <fixed-case>SIGTYP</fixed-case> 2022 Shared Task: Two Types of Models for the Prediction of Cognate Reflexes ChristoKirov - RichardSproat + RichardSproat AlexanderGutkin 70-79 The SIGTYP 2022 shared task concerns the problem of word reflex generation in a target language, given cognate words from a subset of related languages. We present two systems to tackle this problem, covering two very different modeling approaches. The first model extends transformer-based encoder-decoder sequence-to-sequence modeling, by encoding all available input cognates in parallel, and having the decoder attend to the resulting joint representation during inference. The second approach takes inspiration from the field of image restoration, where models are tasked with recovering pixels in an image that have been masked out. For reflex generation, the missing reflexes are treated as “masked pixels” in an “image” which is a representation of an entire cognate set across a language family. As in the image restoration case, cognate restoration is performed with a convolutional network. diff --git a/data/xml/2022.sigul.xml b/data/xml/2022.sigul.xml index 0b0f9a89f4..460a79ce67 100644 --- a/data/xml/2022.sigul.xml +++ b/data/xml/2022.sigul.xml @@ -23,7 +23,7 @@ BolajiYusuf LucasOndel AlineVillavicencio - LaurentBesacier + LaurentBesacier 1–9 Documenting languages helps to prevent the extinction of endangered dialects - many of which are otherwise expected to disappear by the end of the century. When documenting oral languages, unsupervised word segmentation (UWS) from speech is a useful, yet challenging, task. It consists in producing time-stamps for slicing utterances into smaller segments corresponding to words, being performed from phonetic transcriptions, or in the absence of these, from the output of unsupervised speech discretization models. These discretization models are trained using raw speech only, producing discrete speech units that can be applied for downstream (text-based) tasks. In this paper we compare five of these models: three Bayesian and two neural approaches, with regards to the exploitability of the produced units for UWS. For the UWS task, we experiment with two models, using as our target language the Mboshi (Bantu C25), an unwritten language from Congo-Brazzaville. Additionally, we report results for Finnish, Hungarian, Romanian and Russian in equally low-resource settings, using only 4 hours of speech. Our results suggest that neural models for speech discretization are difficult to exploit in our setting, and that it might be necessary to adapt them to limit sequence length. We obtain our best UWS results by using Bayesian models that produce high quality, yet compressed, discrete representations of the input speech signal. 2022.sigul-1.1 @@ -46,7 +46,7 @@ PhatDo MattColer JelskeDijkstra - EstherKlabbers + EstherKlabbers 16–22 We propose a new approach for phoneme mapping in cross-lingual transfer learning for text-to-speech (TTS) in under-resourced languages (URLs), using phonological features from the PHOIBLE database and a language-independent mapping rule. This approach was validated through our experiment, in which we pre-trained acoustic models in Dutch, Finnish, French, Japanese, and Spanish, and fine-tuned them with 30 minutes of Frisian training data. The experiment showed an improvement in both naturalness and pronunciation accuracy in the synthesized Frisian speech when our mapping approach was used. Since this improvement also depended on the source language, we then experimented on finding a good criterion for selecting source languages. As an alternative to the traditionally used language family criterion, we tested a novel idea of using Angular Similarity of Phoneme Frequencies (ASPF), which measures the similarity between the phoneme systems of two languages. ASPF was empirically confirmed to be more effective than language family as a criterion for source language selection, and also to affect the phoneme mapping’s effectiveness. Thus, a combination of our phoneme mapping approach and the ASPF measure can be beneficially adopted by other studies involving multilingual or cross-lingual TTS for URLs. 2022.sigul-1.3 @@ -101,7 +101,7 @@ Quality versus Quantity: Building <fixed-case>C</fixed-case>atalan-<fixed-case>E</fixed-case>nglish <fixed-case>MT</fixed-case> Resources - Onade Gibert + Onade Gibert KseniaKharitonova BlancaCalvo Figueras JordiArmengol-Estapé @@ -123,7 +123,7 @@ <fixed-case>CUNI</fixed-case> Submission to <fixed-case>MT</fixed-case>4<fixed-case>A</fixed-case>ll Shared Task IvanaKvapilíková - OndrejBojar + OndrejBojar 78–82 This paper describes our submission to the MT4All Shared Task in unsupervised machine translation from English to Ukrainian, Kazakh and Georgian in the legal domain. In addition to the standard pipeline for unsupervised training (pretraining followed by denoising and back-translation), we used supervised training on a pseudo-parallel corpus retrieved from the provided mono-lingual corpora. Our system scored significantly higher than the baseline hybrid unsupervised MT system. 2022.sigul-1.10 @@ -189,7 +189,7 @@ Machine Translation from <fixed-case>S</fixed-case>tandard <fixed-case>G</fixed-case>erman to Alemannic Dialects LouisaLambrecht FelixSchneider - AlexanderWaibel + AlexanderWaibel 129–136 Machine translation has been researched using deep neural networks in recent years. These networks require lots of data to learn abstract representations of the input stored in continuous vectors. Dialect translation has become more important since the advent of social media. In particular, when dialect speakers and standard language speakers no longer understand each other, machine translation is of rising concern. Usually, dialect translation is a typical low-resourced language setting facing data scarcity problems. Additionally, spelling inconsistencies due to varying pronunciations and the lack of spelling rules complicate translation. This paper presents the best-performing approaches to handle these problems for Alemannic dialects. The results show that back-translation and conditioning on dialectal manifestations achieve the most remarkable enhancement over the baseline. Using back-translation, a significant gain of +4.5 over the strong transformer baseline of 37.3 BLEU points is accomplished. Differentiating between several Alemannic dialects instead of treating Alemannic as one dialect leads to substantial improvements: Multi-dialectal translation surpasses the baseline on the dialectal test sets. However, training individual models outperforms the multi-dialectal approach. There, improvements range from 7.5 to 10.6 BLEU points over the baseline depending on the dialect. 2022.sigul-1.17 @@ -200,7 +200,7 @@ Tadesse DestawBelay Seid MuhieYimam AbinewAyele - ChrisBiemann + ChrisBiemann 137–145 In this work, we build a Question Answering (QA) classification dataset from a social media platform, namely the Telegram public channel called @AskAnythingEthiopia. The channel has more than 78k subscribers and has existed since May 31, 2019. The platform allows asking questions that belong to various domains, like politics, economics, health, education, and so on. Since the questions are posed in a mixed-code, we apply different strategies to pre-process the dataset. Questions are posted in Amharic, English, or Amharic but in a Latin script. As part of the pre-processing tools, we build a Latin to Ethiopic Script transliteration tool. We collect 8k Amharic and 24K transliterated questions and develop deep learning-based questions answering classifiers that attain as high as an F-score of 57.29 in 20 different question classes or categories. The datasets and pre-processing scripts are open-sourced to facilitate further research on the Amharic community-based question answering. 2022.sigul-1.18 @@ -236,7 +236,7 @@ Building Open-source Speech Technology for Low-resource Minority Languages with <fixed-case>S</fixed-case>á<fixed-case>M</fixed-case>i as an Example – Tools, Methods and Experiments KatriHiovain-Asikainen - SjurMoshagen + SjurMoshagen 169–175 This paper presents a work-in-progress report of an open-source speech technology project for indigenous Sami languages. A less detailed description of this work has been presented in a more general paper about the whole GiellaLT language infrastructure, submitted to the LREC 2022 main conference. At this stage, we have designed and collected a text corpus specifically for developing speech technology applications, namely Text-to-speech (TTS) and Automatic speech recognition (ASR) for the Lule and North Sami languages. We have also piloted and experimented with different speech synthesis technologies using a miniature speech corpus as well as developed tools for effective processing of large spoken corpora. Additionally, we discuss effective and mindful use of the speech corpus and also possibilities to use found/archive materials for training an ASR model for these languages. 2022.sigul-1.22 @@ -245,7 +245,7 @@ Investigating the Quality of Static Anchor Embeddings from Transformers for Under-Resourced Languages PranaydeepSingh - OrpheeDe Clercq + OrpheeDe Clercq ElsLefever 176–184 This paper reports on experiments for cross-lingual transfer using the anchor-based approach of Schuster et al. (2019) for English and a low-resourced language, namely Hindi. For the sake of comparison, we also evaluate the approach on three very different higher-resourced languages, viz. Dutch, Russian and Chinese. Initially designed for ELMo embeddings, we analyze the approach for the more recent BERT family of transformers for a variety of tasks, both mono and cross-lingual. The results largely prove that like most other cross-lingual transfer approaches, the static anchor approach is underwhelming for the low-resource language, while performing adequately for the higher resourced ones. We attempt to provide insights into both the quality of the anchors, and the performance for low-shot cross-lingual transfer to better understand this performance gap. We make the extracted anchors and the modified train and test sets available for future research at https://github.com/pranaydeeps/Vyaapak diff --git a/data/xml/2022.slpat.xml b/data/xml/2022.slpat.xml index 985bf76e4b..bd11aafb1f 100644 --- a/data/xml/2022.slpat.xml +++ b/data/xml/2022.slpat.xml @@ -4,7 +4,7 @@ Ninth Workshop on Speech and Language Processing for Assistive Technologies (SLPAT-2022) SarahEbling - EmilyPrud’hommeaux + EmilyPrud’hommeaux PreethiVaidyanathan Association for Computational Linguistics
Dublin, Ireland
@@ -75,7 +75,7 @@ Producing <fixed-case>S</fixed-case>tandard <fixed-case>G</fixed-case>erman Subtitles for <fixed-case>S</fixed-case>wiss <fixed-case>G</fixed-case>erman <fixed-case>TV</fixed-case> Content JohannaGerlach JonathanMutal - PierretteBouillon + PierretteBouillon 37-43 In this study we compare two approaches (neural machine translation and edit-based) and the use of synthetic data for the task of translating normalised Swiss German ASR output into correct written Standard German for subtitles, with a special focus on syntactic differences. Results suggest that NMT is better suited to this task and that relatively simple rule-based generation of training data could be a valuable approach for cases where little training data is available and transformations are simple. 2022.slpat-1.5 @@ -88,7 +88,7 @@ MagaliNorré VincentVandeghinste ThomasFrançois - PierretteBouillon + PierretteBouillon 44-49 Communication between physician and patients can lead to misunderstandings, especially for disabled people. An automatic system that translates natural language into a pictographic language is one of the solutions that could help to overcome this issue. In this preliminary study, we present the French version of a translation system using the Arasaac pictographs and we investigate the strategies used by speech therapists to translate into pictographs. We also evaluate the medical coverage of this tool for translating physician questions and patient instructions. 2022.slpat-1.6 @@ -120,9 +120,9 @@
<fixed-case>C</fixed-case>ue<fixed-case>B</fixed-case>ot: Cue-Controlled Response Generation for Assistive Interaction Usages - ShachiH. Kumar + ShachiH. Kumar HsuanSu - RameshManuvinakurike + RameshManuvinakurike MaxPinaroc SaiPrasad SauravSahay diff --git a/data/xml/2022.sltat.xml b/data/xml/2022.sltat.xml index ca2034186a..247b732162 100644 --- a/data/xml/2022.sltat.xml +++ b/data/xml/2022.sltat.xml @@ -4,7 +4,7 @@ Proceedings of the 7th International Workshop on Sign Language Translation and Avatar Technology: The Junction of the Visual and the Textual: Challenges and Perspectives EleniEfthimiou - Stavroula-EvitaFotinea + Stavroula-EvitaFotinea ThomasHanke John C.McDonald DimitarShterionov @@ -24,7 +24,7 @@ Synthesis for the Kinematic Control of Identity in Sign Language FélixBigand ElisePrigent - AnneliesBraffort + AnneliesBraffort 1–6 Sign Language (SL) animations generated from motion capture (mocap) of real signers convey critical information about their identity. It has been suggested that this information is mostly carried by statistics of the movements kinematics. Manipulating these statistics in the generation of SL movements could allow controlling the identity of the signer, notably to preserve anonymity. This paper tests this hypothesis by presenting a novel synthesis algorithm that manipulates the identity-specific statistics of mocap recordings. The algorithm produced convincing new versions of French Sign Language discourses, which accurately modulated the identity prediction of a machine learning model. These results open up promising perspectives toward the automatic control of identity in the motion animation of virtual signers. 2022.sltat-1.1 @@ -52,7 +52,7 @@ Example-based Multilinear Sign Language Generation from a Hierarchical Representation BorisDauriac - AnneliesBraffort + AnneliesBraffort EliseBertin-Lemée 21–28 This article presents an original method for automatic generation of sign language (SL) content by means of the animation of an avatar, with the aim of creating animations that respect as much as possible linguistic constraints while keeping bio-realistic properties. This method is based on the use of a domain-specific bilingual corpus richly annotated with timed alignments between SL motion capture data, text and hierarchical expressions from the framework called AZee at subsentential level. Animations representing new SL content are built from blocks of animations present in the corpus and adapted to the context if necessary. A smart blending approach has been designed that allows the concatenation, replacement and adaptation of original animation blocks. This approach has been tested on a tailored testset to show as a proof of concept its potential in comprehensibility and fluidity of the animation, as well as its current limits. @@ -106,7 +106,7 @@ <fixed-case>K</fixed-case>o<fixed-case>S</fixed-case>ign Sign Language Translation Project: Introducing The <fixed-case>NIASL</fixed-case>2021 Dataset - MathewHuerta-Enochian + MathewHuerta-Enochian Du HuiLee Hye JinMyung Kang SukByun diff --git a/data/xml/2022.smila.xml b/data/xml/2022.smila.xml index b9dba6c777..2cb76182c8 100644 --- a/data/xml/2022.smila.xml +++ b/data/xml/2022.smila.xml @@ -104,7 +104,7 @@ Inhalation Noises as Endings of Laughs in Conversational Speech JürgenTrouvain RaphaelWerner - KhietTruong + KhietTruong 28–29 In this study we investigate the role of inhalation noises at the end of laughter events in two conversational corpora that provide relevant annotations. A re-annotation of the categories for laughter, silence and inbreath noises enabled us to see that inhalation noises terminate laughter events in the majority of all inspected laughs with a duration comparable to inbreath noises initiating speech phases. This type of corpus analysis helps to understand the mechanisms of audible respiratory activities in speaking vs. laughing in conversations. 2022.smila-1.8 diff --git a/data/xml/2022.smm4h.xml b/data/xml/2022.smm4h.xml index 5cc7462c2f..0a871875fc 100644 --- a/data/xml/2022.smm4h.xml +++ b/data/xml/2022.smm4h.xml @@ -3,8 +3,8 @@ Proceedings of the Seventh Workshop on Social Media Mining for Health Applications, Workshop & Shared Task - GracielaGonzalez-Hernandez - DavyWeissenbacher + GracielaGonzalez-Hernandez + DavyWeissenbacher Association for Computational Linguistics
Gyeongju, Republic of Korea
October @@ -76,8 +76,8 @@ <fixed-case>NLP</fixed-case>-<fixed-case>CIC</fixed-case>-<fixed-case>WFU</fixed-case> at <fixed-case>S</fixed-case>ocial<fixed-case>D</fixed-case>is<fixed-case>NER</fixed-case>: Disease Mention Extraction in <fixed-case>S</fixed-case>panish Tweets Using Transfer Learning and Search by Propagation AntonioTamayo - AlexanderGelbukh - DiegoBurgos + AlexanderGelbukh + DiegoBurgos 19–22 Named entity recognition (e.g., disease mention extraction) is one of the most relevant tasks for data mining in the medical field. Although it is a well-known challenge, the bulk of the efforts to tackle this task have been made using clinical texts commonly written in English. In this work, we present our contribution to the SocialDisNER competition, which consists of a transfer learning approach to extracting disease mentions in a corpus from Twitter written in Spanish. We fine-tuned a model based on mBERT and applied post-processing using regular expressions to propagate the entities identified by the model and enhance disease mention extraction. Our system achieved a competitive strict F1 of 0.851 on the testing data set. 2022.smm4h-1.6 @@ -98,8 +98,8 @@ MariiaChizhikova PilarLópez-Úbeda Manuel C.Díaz-Galiano - L. AlfonsoUreña-López - M. TeresaMartín-Valdivia + L. AlfonsoUreña-López + M. TeresaMartín-Valdivia 27–30 This paper covers participation of the SINAI team in Tasks 5 and 10 of the Social Media Mining for Health (#SSM4H) workshop at COLING-2022. These tasks focus on leveraging Twitter posts written in Spanish for healthcare research. The objective of Task 5 was to classify tweets reporting COVID-19 symptoms, while Task 10 required identifying disease mentions in Twitter posts. The presented systems explore large RoBERTa language models pre-trained on Twitter data in the case of tweet classification task and general-domain data for the disease recognition task. We also present a text pre-processing methodology implemented in both systems and describe an initial weakly-supervised fine-tuning phase alongside with a submission post-processing procedure designed for Task 10. The systems obtained 0.84 F1-score on the Task 5 and 0.77 F1-score on Task 10. 2022.smm4h-1.8 @@ -162,8 +162,8 @@ <fixed-case>READ</fixed-case>-<fixed-case>B</fixed-case>io<fixed-case>M</fixed-case>ed@<fixed-case>S</fixed-case>ocial<fixed-case>D</fixed-case>is<fixed-case>NER</fixed-case>: Adaptation of an Annotation System to <fixed-case>S</fixed-case>panish Tweets - AntonioJimeno Yepes - KarinVerspoor + AntonioJimeno Yepes + KarinVerspoor 48–51 We describe the work of the READ-BioMed team for the preparation of a submission to the SocialDisNER Disease Named Entity Recognition (NER) Task (Task 10) in 2022. We had developed a system for named entity recognition for identifying biomedical concepts in English MEDLINE citations and Spanish clinical text for the LivingNER 2022 challenge. Minimal adaptation of our system was required to perform named entity recognition in the Spanish tweets in the SocialDisNER task, given the availability of Spanish pre-trained language models and the SocialDisNER training data. Minor additions included treatment of emojis and entities in hashtags and Twitter account names. 2022.smm4h-1.14 @@ -195,11 +195,11 @@ <fixed-case>CIC</fixed-case> <fixed-case>NLP</fixed-case> at <fixed-case>SMM</fixed-case>4<fixed-case>H</fixed-case> 2022: a <fixed-case>BERT</fixed-case>-based approach for classification of social media forum posts Atnafu LambeboTonja Olumide EbenezerOjo - Mohammed ArifKhan + Mohammed ArifKhan Abdul Gafar ManuelMeque OlgaKolesnikova GrigoriSidorov - AlexanderGelbukh + AlexanderGelbukh 58–61 This paper describes our submissions for the Social Media Mining for Health (SMM4H) 2022 shared tasks. We participated in 2 tasks: a) Task 4: Classification of Tweets self-reporting exact age and b) Task 9: Classification of Reddit posts self-reporting exact age. We evaluated the two( BERT and RoBERTa) transformer based models for both tasks. For Task 4 RoBERTa-Large achieved an F1 score of 0.846 on the test set and BERT-Large achieved an F1 score of 0.865 on the test set for Task 9. 2022.smm4h-1.17 @@ -287,7 +287,7 @@ AmanSinha Cristina GarciaHolgado MarianneClausel - MatthieuConstant + MatthieuConstant 85–89 Biomedical NER is an active research area today. Despite the availability of state-of-the-art models for standard NER tasks, their performance degrades on biomedical data due to OOV entities and the challenges encountered in specialized domains. We use Flair-NER framework to investigate the effectiveness of various contextual and static embeddings for NER on Spanish tweets, in particular, to capture complex disease mentions. 2022.smm4h-1.25 @@ -356,7 +356,7 @@ RoshanKhatri SougataSaha SouvikDas - RohiniSrihari + RohiniSrihari 114–117 Here we discuss our implementation of two tasks in the Social Media Mining for Health Applications (SMM4H) 2022 shared tasks – classification, detection, and normalization of Adverse Events (AE) mentioned in English tweets (Task 1) and classification of English tweets self-reporting exact age (Task 4). We have explored different methods and models for binary classification, multi-class classification and named entity recognition (NER) for these tasks. We have also processed the provided dataset for noise, imbalance, and creative language expression from data. Using diverse NLP methods we classified tweets for mentions of adverse drug effects (ADEs) and self-reporting the exact age in the tweets. Further, extracted reactions from the tweets and normalized these adverse effects to a standard concept ID in the MedDRA vocabulary. 2022.smm4h-1.32 @@ -449,7 +449,7 @@ JingeWu HonghanWu TonySun - BeatriceAlex + BeatriceAlex 148–152 This paper reports on the performance of Edinburgh_UCL_Health’s models in the Social Media Mining for Health (SMM4H) 2022 shared tasks. Our team participated in the tasks related to the Identification of Adverse Drug Events (ADEs), the classification of change in medication (change-med) and the classification of self-report of vaccination (self-vaccine). Our best performing models are based on DeepADEMiner (with respective F1= 0.64, 0.62 and 0.39 for ADE identification), on a GloVe model trained on Twitter (with F1=0.11 for the change-med) and finally on a stack embedding including a layer of Glove embedding and two layers of Flair embedding (with F1= 0.77 for self-report). 2022.smm4h-1.40 @@ -458,7 +458,7 @@ <fixed-case>KUL</fixed-case>@<fixed-case>SMM</fixed-case>4<fixed-case>H</fixed-case>’22: Template Augmented Adaptive Pre-training for Tweet Classification SumamFrancis - Marie-FrancineMoens + Marie-FrancineMoens 153–155 This paper describes models developed for the Social Media Mining for Health (SMM4H) 2022 shared tasks. Our team participated in the first subtask that classifies tweets with Adverse Drug Effect (ADE) mentions. Our best-performing model comprises of a template augmented task adaptive pre-training and further fine-tuning on target task data. Augmentation with random prompt templates increases the amount of task-specific data to generalize the LM to the target task domain. We explore 2 pre-training strategies: Masked language modeling (MLM) and Simple contrastive pre-training (SimSCE) and the impact of adding template augmentations with these pre-training strategies. Our system achieves an F1 score of 0.433 on the test set without using supplementary resources and medical dictionaries. 2022.smm4h-1.41 @@ -537,7 +537,7 @@ EulàliaFarré-Maduell SalvadorLima-López AntonioMiranda-Escalada - MartinKrallinger + MartinKrallinger 182–189 There is a pressing need to exploit health-related content from social media, a global source of data where key health information is posted directly by citizens, patients and other healthcare stakeholders. Use cases of disease related social media mining include disease outbreak/surveillance, mental health and pharmacovigilance. Current efforts address the exploitation of social media beyond English. The SocialDisNER task, organized as part of the SMM4H 2022 initiative, has applied the LINKAGE methodology to select and annotate a Gold Standard corpus of 9,500 tweets in Spanish enriched with disease mentions generated by patients and medical professionals. As a complementary resource for teams participating in the SocialDisNER track, we have also created a large-scale corpus of 85,000 tweets, where in addition to disease mentions, other medical entities of relevance (e.g., medications, symptoms and procedures, among others) have been automatically labelled. Using these large-scale datasets, co-mention networks or knowledge graphs were released for each entity pair type. Out of the 47 teams registered for the task, 17 teams uploaded a total of 32 runs. The top-performing team achieved a very competitive 0.891 f-score, with a system trained following a continue pre-training strategy. We anticipate that the corpus and systems resulting from the SocialDisNER track might further foster health related text mining of social media content in Spanish and inspire disease detection strategies in other languages. 2022.smm4h-1.48 @@ -546,7 +546,7 @@ <fixed-case>R</fixed-case>omanian micro-blogging named entity recognition including health-related entities VasilePais - VerginicaBarbu Mititelu + VerginicaBarbu Mititelu ElenaIrimia MariaMitrofan Carol LucaGasan @@ -613,7 +613,7 @@ YaoGe YutingGuo AriKlein - MartinKrallinger + MartinKrallinger MathiasLeddin ArjunMagge RaulRodriguez-Esteban diff --git a/data/xml/2022.socialnlp.xml b/data/xml/2022.socialnlp.xml index 440ea7da2d..1aaeb7d64d 100644 --- a/data/xml/2022.socialnlp.xml +++ b/data/xml/2022.socialnlp.xml @@ -33,7 +33,7 @@ DanaRuiter ThomasKleinbauer CristinaEspaña-Bonet - Josefvan Genabith + Josefvan Genabith DietrichKlakow 11-34 Recent research on style transfer takes inspiration from unsupervised neural machine translation (UNMT), learning from large amounts of non-parallel data by exploiting cycle consistency loss, back-translation, and denoising autoencoders. By contrast, the use of selfsupervised NMT (SSNMT), which leverages (near) parallel instances hidden in non-parallel data more efficiently than UNMT, has not yet been explored for style transfer. In this paper we present a novel Self-Supervised Style Transfer (3ST) model, which augments SSNMT with UNMT methods in order to identify and efficiently exploit supervisory signals in non-parallel social media posts. We compare 3ST with state-of-the-art (SOTA) style transfer models across civil rephrasing, formality and polarity tasks. We show that 3ST is able to balance the three major objectives (fluency, content preservation, attribute transfer accuracy) the best, outperforming SOTA models on averaged performance across their tested tasks in automatic and human evaluation. diff --git a/data/xml/2022.spanlp.xml b/data/xml/2022.spanlp.xml index e6a8992e1c..9435ac2535 100644 --- a/data/xml/2022.spanlp.xml +++ b/data/xml/2022.spanlp.xml @@ -24,7 +24,7 @@ Van-HienTran HirokiOuchi TaroWatanabe - YujiMatsumoto + YujiMatsumoto 1-6 Zero-shot relation extraction (ZSRE) aims to predict target relations that cannot be observed during training. While most previous studies have focused on fully supervised relation extraction and achieved considerably high performance, less effort has been made towards ZSRE. This study proposes a new model incorporating discriminative embedding learning for both sentences and semantic relations. In addition, a self-adaptive comparator network is used to judge whether the relationship between a sentence and a relation is consistent. Experimental results on two benchmark datasets showed that the proposed method significantly outperforms the state-of-the-art methods. 2022.spanlp-1.1 @@ -51,7 +51,7 @@ Efficient Machine Translation Domain Adaptation PedroMartins ZitaMarinho - AndreMartins + AndreMartins 23-29 Machine translation models struggle when translating out-of-domain text, which makes domain adaptation a topic of critical importance. However, most domain adaptation methods focus on fine-tuning or training the entire or part of the model on every new domain, which can be costly. On the other hand, semi-parametric models have been shown to successfully perform domain adaptation by retrieving examples from an in-domain datastore (Khandelwal et al., 2021). A drawback of these retrieval-augmented models, however, is that they tend to be substantially slower. In this paper, we explore several approaches to speed up nearest neighbors machine translation. We adapt the methods recently proposed by He et al. (2021) for language modeling, and introduce a simple but effective caching strategy that avoids performing retrieval when similar contexts have been seen before. Translation quality and runtimes for several domains show the effectiveness of the proposed solutions. 2022.spanlp-1.3 diff --git a/data/xml/2022.spnlp.xml b/data/xml/2022.spnlp.xml index 66429da1c7..efa5c8b341 100644 --- a/data/xml/2022.spnlp.xml +++ b/data/xml/2022.spnlp.xml @@ -5,7 +5,7 @@ Proceedings of the Sixth Workshop on Structured Prediction for NLP AndreasVlachos PriyankaAgrawal - AndréMartins + AndréMartins GerasimosLampouras ChunchuanLyu Association for Computational Linguistics @@ -34,7 +34,7 @@ Joint Entity and Relation Extraction Based on Table Labeling Using Convolutional Neural Networks YoumiMa TatsuyaHiraoka - NaoakiOkazaki + NaoakiOkazaki 11-21 This study introduces a novel approach to the joint extraction of entities and relations by stacking convolutional neural networks (CNNs) on pretrained language models. We adopt table representations to model the entities and relations, casting the entity and relation extraction as a table-labeling problem. Regarding each table as an image and each cell in a table as an image pixel, we apply two-dimensional CNNs to the tables to capture local dependencies and predict the cell labels. The experimental results showed that the performance of the proposed method is comparable to those of current state-of-art systems on the CoNLL04, ACE05, and ADE datasets. Even when freezing pretrained language model parameters, the proposed method showed a stable performance, whereas the compared methods suffered from significant decreases in performance. This observation indicates that the parameters of the pretrained encoder may incorporate dependencies among the entity and relation labels during fine-tuning. 2022.spnlp-1.2 @@ -85,7 +85,7 @@ Neural String Edit Distance JindřichLibovický - AlexanderFraser + AlexanderFraser 52-66 We propose the neural string edit distance model for string-pair matching and string transduction based on learnable string edit distance. We modify the original expectation-maximization learned edit distance algorithm into a differentiable loss function, allowing us to integrate it into a neural network providing a contextual representation of the input. We evaluate on cognate detection, transliteration, and grapheme-to-phoneme conversion, and show that we can trade off between performance and interpretability in a single framework. Using contextual representations, which are difficult to interpret, we match the performance of state-of-the-art string-pair matching models. Using static embeddings and a slightly different loss function, we force interpretability, at the expense of an accuracy drop. 2022.spnlp-1.6 @@ -97,7 +97,7 @@ MarcosTreviso AntónioGóis PatrickFernandes - ErickFonseca + ErickFonseca AndreMartins 67-81 Transformers’ quadratic complexity with respect to the input sequence length has motivated a body of work on efficient sparse approximations to softmax. An alternative path, used by entmax transformers, consists of having built-in exact sparse attention; however this approach still requires quadratic computation. In this paper, we propose Sparsefinder, a simple model trained to identify the sparsity pattern of entmax attention before computing it. We experiment with three variants of our method, based on distances, quantization, and clustering, on two tasks: machine translation (attention in the decoder) and masked language modeling (encoder-only). Our work provides a new angle to study model efficiency by doing extensive analysis of the tradeoff between the sparsity and recall of the predicted attention graph. This allows for detailed comparison between different models along their Pareto curves, important to guide future benchmarks for sparse attention models. diff --git a/data/xml/2022.starsem.xml b/data/xml/2022.starsem.xml index 0d50a9be88..66ccd64d26 100644 --- a/data/xml/2022.starsem.xml +++ b/data/xml/2022.starsem.xml @@ -3,10 +3,10 @@ Proceedings of the 11th Joint Conference on Lexical and Computational Semantics - ViviNastase + ViviNastase ElliePavlick Mohammad TaherPilehvar - JoseCamacho-Collados + JoseCamacho-Collados AlessandroRaganato Association for Computational Linguistics
Seattle, Washington
@@ -66,7 +66,7 @@ SamuelRyb MarioGiulianelli ArabellaSinclair - RaquelFernández + RaquelFernández 55-68 We investigate the extent to which pre-trained language models acquire analytical and deductive logical reasoning capabilities as a side effect of learning word prediction. We present AnaLog, a natural language inference task designed to probe models for these capabilities, controlling for different invalid heuristics the models may adopt instead of learning the desired generalisations. We test four languagemodels on AnaLog, finding that they have all learned, to a different extent, to encode information that is predictive of entailment beyond shallow heuristics such as lexical overlap and grammaticality. We closely analyse the best performing language model and show that while it performs more consistently than other language models across logical connectives and reasoning domains, it still is sensitive to lexical and syntactic variations in the realisation of logical statements. 2022.starsem-1.5 @@ -98,10 +98,10 @@
Multilingual Extraction and Categorization of Lexical Collocations with Graph-aware Transformers - LuisEspinosa Anke + LuisEspinosa Anke AlexanderShvets AlirezaMohammadshahi - JamesHenderson + JamesHenderson LeoWanner 89-100 Recognizing and categorizing lexical collocations in context is useful for language learning, dictionary compilation and downstream NLP. However, it is a challenging task due to the varying degrees of frozenness lexical collocations exhibit. In this paper, we put forward a sequence tagging BERT-based model enhanced with a graph-aware transformer architecture, which we evaluate on the task of collocation recognition in context. Our results suggest that explicitly encoding syntactic dependencies in the model architecture is helpful, and provide insights on differences in collocation typification in English, Spanish and French. @@ -115,7 +115,7 @@ KyleRichardson NoamKahlon AviadSar-shalom - Nelson F.Liu + Nelson F.Liu ReutTsarfaty DafnaShahaf 101-122 @@ -158,7 +158,7 @@ Distilling Hypernymy Relations from Language Models: On the Effectiveness of Zero-Shot Taxonomy Induction DevanshJain - LuisEspinosa Anke + LuisEspinosa Anke 151-156 In this paper, we analyze zero-shot taxonomy learning methods which are based on distilling knowledge from language models via prompting and sentence scoring. We show that, despite their simplicity, these methods outperform some supervised strategies and are competitive with the current state-of-the-art under adequate conditions. We also show that statistical and linguistic properties of prompts dictate downstream performance. 2022.starsem-1.13 @@ -232,7 +232,7 @@ AarneTalman MariannaApidianaki StergiosChatzikyriakidis - JörgTiedemann + JörgTiedemann 226-233 A central question in natural language understanding (NLU) research is whether high performance demonstrates the models’ strong reasoning capabilities. We present an extensive series of controlled experiments where pre-trained language models are exposed to data that have undergone specific corruption transformations. These involve removing instances of specific word classes and often lead to non-sensical sentences. Our results show that performance remains high on most GLUE tasks when the models are fine-tuned or tested on corrupted data, suggesting that they leverage other cues for prediction even in non-sensical contexts. Our proposed data transformations can be used to assess the extent to which a specific dataset constitutes a proper testbed for evaluating models’ language understanding capabilities. 2022.starsem-1.20 @@ -241,7 +241,7 @@ Leveraging Three Types of Embeddings from Masked Language Models in Idiom Token Classification - RyosukeTakahashi + RyosukeTakahashi RyoheiSasano KoichiTakeda 234-239 @@ -275,14 +275,14 @@ <fixed-case>P</fixed-case>rop<fixed-case>B</fixed-case>ank Comes of <fixed-case>A</fixed-case>ge—<fixed-case>L</fixed-case>arger, Smarter, and more Diverse - SameerPradhan + SameerPradhan JuliaBonn SkatjeMyers KathrynConger TimO’gorman JamesGung KristinWright-bettner - MarthaPalmer + MarthaPalmer 278-288 This paper describes the evolution of the PropBank approach to semantic role labeling over the last two decades. During this time the PropBank frame files have been expanded to include non-verbal predicates such as adjectives, prepositions and multi-word expressions. The number of domains, genres and languages that have been PropBanked has also expanded greatly, creating an opportunity for much more challenging and robust testing of the generalization capabilities of PropBank semantic role labeling systems. We also describe the substantial effort that has gone into ensuring the consistency and reliability of the various annotated datasets and resources, to better support the training and evaluation of such systems 2022.starsem-1.24 @@ -293,9 +293,9 @@ Speech acts and Communicative Intentions for Urgency Detection LaurentiEnzo BourgonNils - FarahBenamara + FarahBenamara MariAlda - VéroniqueMoriceau + VéroniqueMoriceau CourgeonCamille 289-298 Recognizing speech acts (SA) is crucial for capturing meaning beyond what is said, making communicative intentions particularly relevant to identify urgent messages. This paper attempts to measure for the first time the impact of SA on urgency detection during crises,006in tweets. We propose a new dataset annotated for both urgency and SA, and develop several deep learning architectures to inject SA into urgency detection while ensuring models generalisability. Our results show that taking speech acts into account in tweet analysis improves information type detection in an out-of-type configuration where models are evaluated in unseen event types during training. These results are encouraging and constitute a first step towards SA-aware disaster management in social media. @@ -306,7 +306,7 @@ What Drives the Use of Metaphorical Language? Negative Insights from Abstractness, Affect, Discourse Coherence and Contextualized Word Representations PriscaPiccirilli - SabineSchulte Im Walde + SabineSchulte Im Walde 299-310 Given a specific discourse, which discourse properties trigger the use of metaphorical language, rather than using literal alternatives? For example, what drives people to say grasp the meaning rather than understand the meaning within a specific context? Many NLP approaches to metaphorical language rely on cognitive and (psycho-)linguistic insights and have successfully defined models of discourse coherence, abstractness and affect. In this work, we build five simple models relying on established cognitive and linguistic properties ? frequency, abstractness, affect, discourse coherence and contextualized word representations ? to predict the use of a metaphorical vs. synonymous literal expression in context. By comparing the models? outputs to human judgments, our study indicates that our selected properties are not sufficient to systematically explain metaphorical vs. literal language choices. 2022.starsem-1.26 @@ -350,7 +350,7 @@ Online Coreference Resolution for Dialogue Processing: Improving Mention-Linking on Real-Time Conversations LiyanXu - Jinho D.Choi + Jinho D.Choi 341-347 This paper suggests a direction of coreference resolution for online decoding on actively generated input such as dialogue, where the model accepts an utterance and its past context, then finds mentions in the current utterance as well as their referents, upon each dialogue turn. A baseline and four incremental updated models adapted from the mention linking paradigm are proposed for this new setting, which address different aspects including the singletons, speaker-grounded encoding and cross-turn mention contextualization. Our approach is assessed on three datasets: Friends, OntoNotes, and BOLT. Results show that each aspect brings out steady improvement, and our best models outperform the baseline by over 10%, presenting an effective system for this setting. Further analysis highlights the task characteristics, such as the significance of addressing the mention recall. 2022.starsem-1.30 diff --git a/data/xml/2022.suki.xml b/data/xml/2022.suki.xml index 3955d4e0bd..a6469ac619 100644 --- a/data/xml/2022.suki.xml +++ b/data/xml/2022.suki.xml @@ -5,7 +5,7 @@ Proceedings of the Workshop on Structured and Unstructured Knowledge Integration (SUKI) WenhuChen XinyunChen - ZhiyuChen + ZhiyuChen ZiyuYao MichihiroYasunaga TaoYu @@ -82,7 +82,7 @@ Table Retrieval May Not Necessitate Table-specific Model Design ZhiruoWang ZhengbaoJiang - EricNyberg + EricNyberg GrahamNeubig 36-46 Tables are an important form of structured data for both human and machine readers alike, providing answers to questions that cannot, or cannot easily, be found in texts. Recent work has designed special models and training paradigms for table-related tasks such as table-based question answering and table retrieval. Though effective, they add complexity in both modeling and data acquisition compared to generic text solutions and obscure which elements are truly beneficial. In this work, we focus on the task of table retrieval, and ask: “is table-specific model design necessary for table retrieval, or can a simpler text-based model be effectively used to achieve a similar result?’’ First, we perform an analysis on a table-based portion of the Natural Questions dataset (NQ-table), and find that structure plays a negligible role in more than 70% of the cases. Based on this, we experiment with a general Dense Passage Retriever (DPR) based on text and a specialized Dense Table Retriever (DTR) that uses table-specific model designs. We find that DPR performs well without any table-specific design and training, and even achieves superior results compared to DTR when fine-tuned on properly linearized tables. We then experiment with three modules to explicitly encode table structures, namely auxiliary row/column embeddings, hard attention masks, and soft relation-based attention biases. However, none of these yielded significant improvements, suggesting that table-specific model design may not be necessary for table retrieval. @@ -94,8 +94,8 @@ Transfer Learning and Masked Generation for Answer Verbalization SebastienMontella - LinaRojas-Barahona - FredericBechet + LinaRojas-Barahona + FredericBechet JohannesHeinecke AlexisNasr 47-54 diff --git a/data/xml/2022.sumeval.xml b/data/xml/2022.sumeval.xml index 3c512c391c..a4eb003af5 100644 --- a/data/xml/2022.sumeval.xml +++ b/data/xml/2022.sumeval.xml @@ -8,7 +8,7 @@ BarunPatra GrahamNeubig MonojitChoudhury - SandipanDandapat + SandipanDandapat SunayanaSitaram VishravChaudhary Association for Computational Linguistics @@ -74,7 +74,7 @@ <fixed-case>I</fixed-case>ndo<fixed-case>R</fixed-case>obusta: Towards Robustness Against Diverse Code-Mixed <fixed-case>I</fixed-case>ndonesian Local Languages Muhammad FaridAdilazuarda SamuelCahyawijaya - Genta IndraWinata + Genta IndraWinata PascaleFung AyuPurwarianti 25–34 diff --git a/data/xml/2022.sustainlp.xml b/data/xml/2022.sustainlp.xml index 3cd454d1d0..1445680922 100644 --- a/data/xml/2022.sustainlp.xml +++ b/data/xml/2022.sustainlp.xml @@ -45,7 +45,7 @@ <fixed-case>KGR</fixed-case>efiner: Knowledge Graph Refinement for Improving Accuracy of Translational Link Prediction Methods Mohammad JavadSaeedizade NajmehTorabianNa - BehrouzMinaei-Bidgoli + BehrouzMinaei-Bidgoli 10-16 Link Prediction is the task of predicting missing relations between knowledge graph entities (KG). Recent work in link prediction mainly attempted to adapt a model to increase link prediction accuracy by using more layers in neural network architecture, which heavily rely on computational resources. This paper proposes the refinement of knowledge graphs to perform link prediction operations more accurately using relatively fast translational models. Translational link prediction models have significantly less complexity than deep learning approaches; this motivated us to improve their accuracy. Our method uses the ontologies of knowledge graphs to add information as auxiliary nodes to the graph. Then, these auxiliary nodes are connected to ordinary nodes of the KG that contain auxiliary information in their hierarchy. Our experiments show that our method can significantly increase the performance of translational link prediction methods in Hit@10, Mean Rank, and Mean Reciprocal Rank. 2022.sustainlp-1.3 @@ -57,7 +57,7 @@ Algorithmic Diversity and Tiny Models: Comparing Binary Networks and the Fruit Fly Algorithm on Document Representation Tasks TaniseCeron NhutTruong - AurelieHerbelotUniversity of Trento + AurelieHerbelotUniversity of Trento 17-28 Neural language models have seen a dramatic increase in size in the last years. While many still advocate that ‘bigger is better’, work in model distillation has shown that the number of parameters used by very large networks is actually more than what is required for state-of-the-art performance. This prompts an obvious question: can we build smaller models from scratch, rather than going through the inefficient process of training at scale and subsequently reducing model size. In this paper, we investigate the behaviour of a biologically inspired algorithm, based on the fruit fly’s olfactory system. This algorithm has shown good performance in the past on the task of learning word embeddings. We now put it to the test on the task of semantic hashing. Specifically, we compare the fruit fly to a standard binary network on the task of generating locality-sensitive hashes for text documents, measuring both task performance and energy consumption. Our results indicate that the two algorithms have complementary strengths while showing similar electricity usage. 2022.sustainlp-1.4 @@ -68,7 +68,7 @@ Look Ma, Only 400 Samples! Revisiting the Effectiveness of Automatic N-Gram Rule Generation for Spelling Normalization in <fixed-case>F</fixed-case>ilipino Lorenzo JaimeFlores - DragomirRadevYale University + DragomirRadevYale University 29-35 With 84.75 million Filipinos online, the ability for models to process online text is crucial for developing Filipino NLP applications. To this end, spelling correction is a crucial preprocessing step for downstream processing. However, the lack of data prevents the use of language models for this task. In this paper, we propose an N-Gram + Damerau-Levenshtein distance model with automatic rule extraction. We train the model on 300 samples, and show that despite limited training data, it achieves good performance and outperforms other deep learning approaches in terms of accuracy and edit distance. Moreover, the model (1) requires little compute power, (2) trains in little time, thus allowing for retraining, and (3) is easily interpretable, allowing for direct troubleshooting, highlighting the success of traditional approaches over more complex deep learning models in settings where data is unavailable. 2022.sustainlp-1.5 @@ -81,7 +81,7 @@ Young JinKimMicrosoft RawnHenry RaffyFahimMicrosoft - HanyHassanMicrosoft + HanyHassanMicrosoft 36-43 Mixture of Experts (MoE) models with conditional execution of sparsely activated layers has enabled training models with a much larger number of parameters. As a result, these models have achieved significantly better quality on various natural language processing tasks including machine translation. However, it remains challenging to deploy such models in real-life scenarios due to the large memory requirements and inefficient inference. In this work, we introduce a highly efficient inference framework with several optimization approaches to accelerate the computation of sparse models and cut down the memory consumption significantly. While we achieve up to 26x speed-up in terms of throughput, we also reduce the model size almost to one eighth of the original 32-bit float model by quantizing expert weights into 4-bit integers. As a result, we are able to deploy 136x larger models with 27% less cost and significantly better quality with large scale MoE model deployment compared to the existing solutions. This enables a paradigm shift in deploying large scale multilingual MoE transformers models instead of distilling into dozens of smaller models per language or task. 2022.sustainlp-1.6 @@ -124,8 +124,8 @@ AiliShenAmazon YitongLiHuawei Technologies Co., Ltd. LeaFrermannThe University of Melbourne - TimothyBaldwinMohamed bin Zayed University of Artificial Intelligence and The University of Melbourne - TrevorCohnThe University of Melbourne + TimothyBaldwinMohamed bin Zayed University of Artificial Intelligence and The University of Melbourne + TrevorCohnThe University of Melbourne 65-72 With the growing prevalence of large-scale language models, their energy footprint and potential to learn and amplify historical biases are two pressing challenges. Dataset distillation (DD) — a method for reducing the dataset size by learning a small number of synthetic samples which encode the information in the original dataset — is a method for reducing the cost of model training, however its impact on fairness has not been studied. We investigate how DD impacts on group bias, with experiments over two language classification tasks, concluding that vanilla DD preserves the bias of the dataset. We then show how existing debiasing methods can be combined with DD to produce models that are fair and accurate, at reduced training cost. 2022.sustainlp-1.13 diff --git a/data/xml/2022.tacl.xml b/data/xml/2022.tacl.xml index 12b0b5382f..b7ec0f6c2e 100644 --- a/data/xml/2022.tacl.xml +++ b/data/xml/2022.tacl.xml @@ -38,7 +38,7 @@ LinyongNan ChiachunHsieh ZimingMao - Xi VictoriaLin + Xi VictoriaLin NehaVerma RuiZhang WojciechKryściński @@ -51,7 +51,7 @@ RenusreeBandaru JacobCunningham CaimingXiong - DragomirRadev + DragomirRadev DragomirRadev 10.1162/tacl_a_00446 Existing table question answering datasets contain abundant factual questions that primarily evaluate a QA system’s comprehension of query and tabular data. However, restricted by their short-form answers, these datasets fail to include question–answer interactions that represent more advanced and naturally occurring information needs: questions that ask for reasoning and integration of information pieces retrieved from a structured knowledge source. To complement the existing datasets and to reveal the challenging nature of the table-based question answering task, we introduce FeTaQA, a new dataset with 10K Wikipedia-based table, question, free-form answer, supporting table cells pairs. FeTaQA is collected from noteworthy descriptions of Wikipedia tables that contain information people tend to seek; generation of these descriptions requires advanced processing that humans perform on a daily basis: Understand the question and table, retrieve, integrate, infer, and conduct text planning and surface realization to generate an answer. We provide two benchmark methods for the proposed task: a pipeline method based on semantic parsing-based QA systems and an end-to-end method based on large pretrained text generation models, and show that FeTaQA poses a challenge for both methods. @@ -69,21 +69,21 @@ NasanbayarUlzii-Orshikh AllahseraTapo NishantSubramani - ArtemSokolov + ArtemSokolov ClaytoneSikasote MonangSetyawan SupheakmungkolSarin SokharSamb - BenoîtSagot + BenoîtSagot ClaraRivera - AnnetteRios + AnnetteRios IsabelPapadimitriou SalomeyOsei Pedro OrtizSuarez IroroOrife KelechiOgueji Andre NiyongaboRubungo - Toan Q.Nguyen + Toan Q.Nguyen MathiasMüller AndréMüller Shamsuddeen HassanMuhammad @@ -122,7 +122,7 @@ Canine: Pre-training an Efficient Tokenization-Free Encoder for Language Representation - Jonathan H.Clark + Jonathan H.Clark DanGarrette IuliaTurc JohnWieting @@ -160,7 +160,7 @@ Out-of-Domain Discourse Dependency Parsing via Bootstrapping: An Empirical Analysis on Its Effectiveness and Limitation NorikiNishida - YujiMatsumoto + YujiMatsumoto 10.1162/tacl_a_00451 Discourse parsing has been studied for decades. However, it still remains challenging to utilize discourse parsing for real-world applications because the parsing accuracy degrades significantly on out-of-domain text. In this paper, we report and discuss the effectiveness and limitations of bootstrapping methods for adapting modern BERT-based discourse dependency parsers to out-of-domain text without relying on additional human supervision. Specifically, we investigate self-training, co-training, tri-training, and asymmetric tri-training of graph-based and transition-based discourse dependency parsing models, as well as confidence measures and sample selection criteria in two adaptation scenarios: monologue adaptation between scientific disciplines and dialogue genre adaptation. We also release COVID-19 Discourse Dependency Treebank (COVID19-DTB), a new manually annotated resource for discourse dependency parsing of biomedical paper abstracts. The experimental results show that bootstrapping is significantly and consistently effective for unsupervised domain adaptation of discourse dependency parsing, but the low coverage of accurately predicted pseudo labels is a bottleneck for further improvement. We show that active learning can mitigate this limitation. 127–144 @@ -187,7 +187,7 @@ VivekRaghavan AnoopKunchukuttan PratyushKumar - Mitesh ShantadeviKhapra + Mitesh ShantadeviKhapra 10.1162/tacl_a_00452 We present Samanantar, the largest publicly available parallel corpora collection for Indic languages. The collection contains a total of 49.7 million sentence pairs between English and 11 Indic languages (from two language families). Specifically, we compile 12.4 million sentence pairs from existing, publicly available parallel corpora, and additionally mine 37.4 million sentence pairs from the Web, resulting in a 4× increase. We mine the parallel sentences from the Web by combining many corpora, tools, and methods: (a) Web-crawled monolingual corpora, (b) document OCR for extracting sentences from scanned documents, (c) multilingual representation models for aligning sentences, and (d) approximate nearest neighbor search for searching in a large collection of sentences. Human evaluation of samples from the newly mined corpora validate the high quality of the parallel sentences across 11 languages. Further, we extract 83.4 million sentence pairs between all 55 Indic language pairs from the English-centric parallel corpus using English as the pivot language. We trained multilingual NMT models spanning all these languages on Samanantar which outperform existing models and baselines on publicly available benchmarks, such as FLORES, establishing the utility of Samanantar. Our data and models are available publicly at Samanantar and we hope they will help advance research in NMT and multilingual NLP for Indic languages. 145–162 @@ -199,8 +199,8 @@ <fixed-case>S</fixed-case>umma<fixed-case>C</fixed-case>: Re-Visiting <fixed-case>NLI</fixed-case>-based Models for Inconsistency Detection in Summarization PhilippeLaban TobiasSchnabel - Paul N.Bennett - Marti A.Hearst + Paul N.Bennett + Marti A.Hearst 10.1162/tacl_a_00453 In the summarization domain, a key requirement for summaries is to be factually consistent with the input document. Previous work has found that natural language inference (NLI) models do not perform competitively when applied to inconsistency detection. In this work, we revisit the use of NLI for inconsistency detection, finding that past work suffered from a mismatch in input granularity between NLI datasets (sentence-level), and inconsistency detection (document level). We provide a highly effective and light-weight method called SummaCConv that enables NLI models to be successfully used for this task by segmenting documents into sentence units and aggregating scores between pairs of sentences. We furthermore introduce a new benchmark called SummaC (Summary Consistency) which consists of six large inconsistency detection datasets. On this dataset, SummaCConv obtains state-of-the-art results with a balanced accuracy of 74.4%, a 5% improvement compared with prior work. 163–177 @@ -211,7 +211,7 @@ A Survey on Automated Fact-Checking ZhijiangGuo - MichaelSchlichtkrull + MichaelSchlichtkrull AndreasVlachos 10.1162/tacl_a_00454 Fact-checking has become increasingly important due to the speed with which both information and misinformation can spread in the modern media ecosystem. Therefore, researchers have been exploring how fact-checking can be automated, using techniques based on natural language processing, machine learning, knowledge representation, and databases to automatically predict the veracity of claims. In this paper, we survey automated fact-checking stemming from natural language processing, and discuss its connections to related tasks and disciplines. In this process, we present an overview of existing datasets and models, aiming to unify the various definitions given and identify common concepts. Finally, we highlight challenges for future research. @@ -263,9 +263,9 @@ BhuwanDhingra Jeremy R.Cole Julian MartinEisenschlos - DanielGillick + DanielGillick JacobEisenstein - William W.Cohen + William W.Cohen 10.1162/tacl_a_00459 Many facts come with an expiration date, from the name of the President to the basketball team Lebron James plays for. However, most language models (LMs) are trained on snapshots of data collected at a specific moment in time. This can limit their utility, especially in the closed-book setting where the pretraining corpus must contain the facts the model should memorize. We introduce a diagnostic dataset aimed at probing LMs for factual knowledge that changes over time and highlight problems with LMs at either end of the spectrum—those trained on specific slices of temporal data, as well as those trained on a wide range of temporal data. To mitigate these problems, we propose a simple technique for jointly modeling text with its timestamp. This improves memorization of seen facts from the training time period, as well as calibration on predictions about unseen facts from future time periods. We also show that models trained with temporal context can be efficiently “refreshed” as new data arrives, without the need for retraining from scratch. 257–273 @@ -281,7 +281,7 @@ MikelArtetxe NamanGoyal MikhailPlekhanov - LukeZettlemoyer + LukeZettlemoyer NicolaCancedda SebastianRiedel FabioPetroni @@ -326,7 +326,7 @@ Towards General Natural Language Understanding with Probabilistic Worldbuilding AbulhairSaparov - Tom M.Mitchell + Tom M.Mitchell 10.1162/tacl_a_00463 We introduce the Probabilistic Worldbuilding Model (PWM), a new fully symbolic Bayesian model of semantic parsing and reasoning, as a first step in a research program toward more domain- and task-general NLU and AI. Humans create internal mental models of their observations that greatly aid in their ability to understand and reason about a large variety of problems. In PWM, the meanings of sentences, acquired facts about the world, and intermediate steps in reasoning are all expressed in a human-readable formal language, with the design goal of interpretability. PWM is Bayesian, designed specifically to be able to generalize to new domains and new tasks. We derive and implement an inference algorithm that reads sentences by parsing and abducing updates to its latent world model that capture the semantics of those sentences, and evaluate it on two out-of-domain question-answering datasets: (1) ProofWriter and (2) a new dataset we call FictionalGeoQA, designed to be more representative of real language but still simple enough to focus on evaluating reasoning ability, while being robust against heuristics. Our method outperforms baselines on both, thereby demonstrating its value as a proof-of-concept. 325–342 @@ -352,10 +352,10 @@ RachitBansal BhuwanDhingra LivioBaldini Soares - MichaelCollins - Zachary C.Lipton + MichaelCollins + Zachary C.Lipton GrahamNeubig - William W.Cohen + William W.Cohen 10.1162/tacl_a_00465 While many methods purport to explain predictions by highlighting salient features, what aims these explanations serve and how they ought to be evaluated often go unstated. In this work, we introduce a framework to quantify the value of explanations via the accuracy gains that they confer on a student model trained to simulate a teacher model. Crucially, the explanations are available to the student during training, but are not available at test time. Compared with prior proposals, our approach is less easily gamed, enabling principled, automatic, model-agnostic evaluation of attributions. Using our framework, we compare numerous attribution methods for text classification and question answering, and observe quantitative differences that are consistent (to a moderate to high degree) across different student model architectures and learning strategies.1 359–375 @@ -367,9 +367,9 @@ <fixed-case>VILA</fixed-case>: Improving Structured Content Extraction from Scientific <fixed-case>PDF</fixed-case>s Using Visual Layout Groups ZejiangShen KyleLo - Lucy LuWang + Lucy LuWang BaileyKuehl - Daniel S.Weld + Daniel S.Weld DougDowney 10.1162/tacl_a_00466 Accurately extracting structured content from PDFs is a critical first step for NLP over scientific papers. Recent work has improved extraction accuracy by incorporating elementary layout information, for example, each token’s 2D position on the page, into language model pretraining. We introduce new methods that explicitly model VIsual LAyout (VILA) groups, that is, text lines or text blocks, to further improve performance. In our I-VILA approach, we show that simply inserting special tokens denoting layout group boundaries into model inputs can lead to a 1.9% Macro F1 improvement in token classification. In the H-VILA approach, we show that hierarchical encoding of layout-groups can result in up to 47% inference time reduction with less than 0.8% Macro F1 loss. Unlike prior layout-aware approaches, our methods do not require expensive additional pretraining, only fine-tuning, which we show can reduce training cost by up to 95%. Experiments are conducted on a newly curated evaluation suite, S2-VLUE, that unifies existing automatically labeled datasets and includes a new dataset of manual annotations covering diverse papers from 19 scientific disciplines. Pre-trained weights, benchmark datasets, and source code are available at https://github.com/allenai/VILA. @@ -381,7 +381,7 @@ Data-driven Model Generalizability in Crosslinguistic Low-resource Morphological Segmentation ZoeyLiu - EmilyPrud’hommeaux + EmilyPrud’hommeaux 10.1162/tacl_a_00467 Common designs of model evaluation typically focus on monolingual settings, where different models are compared according to their performance on a single data set that is assumed to be representative of all possible data for the task at hand. While this may be reasonable for a large data set, this assumption is difficult to maintain in low-resource scenarios, where artifacts of the data collection can yield data sets that are outliers, potentially making conclusions about model performance coincidental. To address these concerns, we investigate model generalizability in crosslinguistic low-resource scenarios. Using morphological segmentation as the test case, we compare three broad classes of models with different parameterizations, taking data from 11 languages across 6 language families. In each experimental setting, we evaluate all models on a first data set, then examine their performance consistency when introducing new randomly sampled data sets with the same size and when applying the trained models to unseen test sets of varying sizes. The results demonstrate that the extent of model generalization depends on the characteristics of the data set, and does not necessarily rely heavily on the data set size. Among the characteristics that we studied, the ratio of morpheme overlap and that of the average number of morphemes per word between the training and test sets are the two most prominent factors. Our findings suggest that future work should adopt random sampling to construct data sets with different sizes in order to make more responsible claims about model evaluation. 393–413 @@ -450,7 +450,7 @@ MomchilHardalov YoanDinkov IsabelleAugenstein - PreslavNakov + PreslavNakov 10.1162/tacl_a_00472 We propose a novel framework for cross- lingual content flagging with limited target- language data, which significantly outperforms prior work in terms of predictive performance. The framework is based on a nearest-neighbor architecture. It is a modern instantiation of the vanilla k-nearest neighbor model, as we use Transformer representations in all its components. Our framework can adapt to new source- language instances, without the need to be retrained from scratch. Unlike prior work on neighborhood-based approaches, we encode the neighborhood information based on query– neighbor interactions. We propose two encoding schemes and we show their effectiveness using both qualitative and quantitative analysis. Our evaluation results on eight languages from two different datasets for abusive language detection show sizable improvements of up to 9.5 F1 points absolute (for Italian) over strong baselines. On average, we achieve 3.6 absolute F1 points of improvement for the three languages in the Jigsaw Multilingual dataset and 2.14 points for the WUL dataset. 484–502 @@ -482,7 +482,7 @@ DaJu SanjanaKrishnan Marc’AurelioRanzato - FranciscoGuzmán + FranciscoGuzmán AngelaFan 10.1162/tacl_a_00474 One of the biggest challenges hindering progress in low-resource and multilingual machine translation is the lack of good evaluation benchmarks. Current evaluation benchmarks either lack good coverage of low-resource languages, consider only restricted domains, or are low quality because they are constructed using semi-automatic procedures. In this work, we introduce the Flores-101 evaluation benchmark, consisting of 3001 sentences extracted from English Wikipedia and covering a variety of different topics and domains. These sentences have been translated in 101 languages by professional translators through a carefully controlled process. The resulting dataset enables better assessment of model quality on the long tail of low-resource languages, including the evaluation of many-to-many multilingual translation systems, as all translations are fully aligned. By publicly releasing such a high-quality and high-coverage dataset, we hope to foster progress in the machine translation community and beyond. @@ -508,7 +508,7 @@ Relational Memory-Augmented Language Models QiLiu DaniYogatama - PhilBlunsom + PhilBlunsom 10.1162/tacl_a_00476 We present a memory-augmented approach to condition an autoregressive language model on a knowledge graph. We represent the graph as a collection of relation triples and retrieve relevant relations for a given context to improve text generation. Experiments on WikiText-103, WMT19, and enwik8 English datasets demonstrate that our approach produces a better language model in terms of perplexity and bits per character. We also show that relational memory improves coherence, is complementary to token-based memory, and enables causal interventions. Our model provides a simple yet effective way to combine an autoregressive language model and a knowledge graph for more coherent and logical generation. 555–572 @@ -581,9 +581,9 @@ Is My Model Using the Right Evidence? Systematic Probes for Examining Evidence-Based Tabular Reasoning VivekGupta - Riyaz A.Bhat + Riyaz A.Bhat AtreyaGhosal - ManishShrivastava + ManishShrivastava ManeeshSingh VivekSrikumar 10.1162/tacl_a_00482 @@ -596,9 +596,9 @@ Uncertainty Estimation and Reduction of Pre-trained Models for Text Regression YuxiaWang - DanielBeck - TimothyBaldwin - KarinVerspoor + DanielBeck + TimothyBaldwin + KarinVerspoor 10.1162/tacl_a_00483 State-of-the-art classification and regression models are often not well calibrated, and cannot reliably provide uncertainty estimates, limiting their utility in safety-critical applications such as clinical decision-making. While recent work has focused on calibration of classifiers, there is almost no work in NLP on calibration in a regression setting. In this paper, we quantify the calibration of pre- trained language models for text regression, both intrinsically and extrinsically. We further apply uncertainty estimates to augment training data in low-resource domains. Our experiments on three regression tasks in both self-training and active-learning settings show that uncertainty estimation can be used to increase overall performance and enhance model generalization. 680–696 @@ -620,7 +620,7 @@ True Few-Shot Learning with <fixed-case>P</fixed-case>rompts—<fixed-case>A</fixed-case> Real-World Perspective TimoSchick - HinrichSchütze + HinrichSchütze 10.1162/tacl_a_00485 Prompt-based approaches excel at few-shot learning. However, Perez et al. (2021) recently cast doubt on their performance as they had difficulty getting good results in a “true” few-shot setting in which prompts and hyperparameters cannot be tuned on a dev set. In view of this, we conduct an extensive study of Pet, a method that combines textual instructions with example-based finetuning. We show that, if correctly configured, Pet performs strongly in true few-shot settings without a dev set. Crucial for this strong performance is a number of design choices, including Pet’s ability to intelligently handle multiple prompts. We put our findings to a real-world test by running Pet on RAFT, a benchmark of tasks taken from realistic NLP applications for which no labeled dev or test sets are available. Pet achieves a new state of the art on RAFT and performs close to non-expert humans for 7 out of 11 tasks. These results demonstrate that prompt-based learners can successfully be applied in true few-shot settings and underpin our belief that learning from instructions will play an important role on the path towards human-like few-shot learning capabilities. 716–731 @@ -631,8 +631,8 @@ Heterogeneous Supervised Topic Models DhanyaSridhar - HalDaumé III - DavidBlei + HalDaumé III + DavidBlei 10.1162/tacl_a_00487 Researchers in the social sciences are often interested in the relationship between text and an outcome of interest, where the goal is to both uncover latent patterns in the text and predict outcomes for unseen texts. To this end, this paper develops the heterogeneous supervised topic model (HSTM), a probabilistic approach to text analysis and prediction. HSTMs posit a joint model of text and outcomes to find heterogeneous patterns that help with both text analysis and prediction. The main benefit of HSTMs is that they capture heterogeneity in the relationship between text and the outcome across latent topics. To fit HSTMs, we develop a variational inference algorithm based on the auto-encoding variational Bayes framework. We study the performance of HSTMs on eight datasets and find that they consistently outperform related methods, including fine-tuned black-box models. Finally, we apply HSTMs to analyze news articles labeled with pro- or anti-tone. We find evidence of differing language used to signal a pro- and anti-tone. 732–745 @@ -707,8 +707,8 @@ Generate, Annotate, and Learn: <fixed-case>NLP</fixed-case> with Synthetic Text XuanliHe IslamNassar - JamieKiros - GholamrezaHaffari + JamieKiros + GholamrezaHaffari MohammadNorouzi 10.1162/tacl_a_00492 This paper studies the use of language models as a source of synthetic unlabeled text for NLP. We formulate a general framework called “generate, annotate, and learn (GAL)” to take advantage of synthetic text within knowledge distillation, self-training, and few-shot learning applications. To generate high-quality task-specific text, we either fine-tune LMs on inputs from the task of interest, or prompt large LMs with few examples. We use the best available classifier to annotate synthetic text with soft pseudo labels for knowledge distillation and self-training, and use LMs to obtain hard labels for few-shot learning. We train new supervised models on the combination of labeled and pseudo-labeled data, which results in significant gains across several applications. We investigate key components of GAL and present theoretical and empirical arguments against the use of class-conditional LMs to generate synthetic labeled text instead of unlabeled text. GAL achieves new state-of-the-art knowledge distillation results for 6-layer transformers on the GLUE leaderboard. @@ -721,7 +721,7 @@ Saturated Transformers are Constant-Depth Threshold Circuits WilliamMerrill AshishSabharwal - Noah A.Smith + Noah A.Smith 10.1162/tacl_a_00493 Transformers have become a standard neural network architecture for many NLP problems, motivating theoretical analysis of their power in terms of formal languages. Recent work has shown that transformers with hard attention are quite limited in power (Hahn, 2020), as they can be simulated by constant-depth AND/OR circuits (Hao et al., 2022). However, hard attention is a strong assumption, which may complicate the relevance of these results in practice. In this work, we analyze the circuit complexity of transformers with saturated attention: a generalization of hard attention that more closely captures the attention patterns learnable in practical transformers. We first show that saturated transformers transcend the known limitations of hard-attention transformers. We then prove saturated transformers with floating-point values can be simulated by constant-depth threshold circuits, giving the class TC0 as an upper bound on the formal languages they recognize. 843–856 @@ -731,7 +731,7 @@ Reducing Conversational Agents’ Overconfidence Through Linguistic Calibration - Sabrina J.Mielke + Sabrina J.Mielke ArthurSzlam EmilyDinan Y-LanBoureau @@ -746,7 +746,7 @@ A Survey of Text Games for Reinforcement Learning Informed by Natural Language PhilipOsborne HeidoNõmm - AndréFreitas + AndréFreitas 10.1162/tacl_a_00495 Reinforcement Learning has shown success in a number of complex virtual environments. However, many challenges still exist towards solving problems with natural language as a core component. Interactive Fiction Games (or Text Games) are one such problem type that offer a set of safe, partially observable environments where natural language is required as part of the Reinforcement Learning solution. Therefore, this survey’s aim is to assist in the development of new Text Game problem settings and solutions for Reinforcement Learning informed by natural language. Specifically, this survey: 1) introduces the challenges in Text Game Reinforcement Learning problems, 2) outlines the generation tools for rendering Text Games and the subsequent environments generated, and 3) compares the agent architectures currently applied to provide a systematic review of benchmark methodologies and opportunities for future researchers. 873–887 @@ -779,7 +779,7 @@ Learning <fixed-case>E</fixed-case>nglish with <fixed-case>P</fixed-case>eppa <fixed-case>P</fixed-case>ig MitjaNikolaus AfraAlishahi - GrzegorzChrupała + GrzegorzChrupała 10.1162/tacl_a_00498 Recent computational models of the acquisition of spoken language via grounding in perception exploit associations between spoken and visual modalities and learn to represent speech and visual data in a joint vector space. A major unresolved issue from the point of ecological validity is the training data, typically consisting of images or videos paired with spoken descriptions of what is depicted. Such a setup guarantees an unrealistically strong correlation between speech and the visual data. In the real world the coupling between the linguistic and the visual modality is loose, and often confounded by correlations with non-semantic aspects of the speech signal. Here we address this shortcoming by using a dataset based on the children’s cartoon Peppa Pig. We train a simple bi-modal architecture on the portion of the data consisting of dialog between characters, and evaluate on segments containing descriptive narrations. Despite the weak and confounded signal in this training data, our model succeeds at learning aspects of the visual semantics of spoken language. 922–936 @@ -804,7 +804,7 @@ Adapting to the Long Tail: A Meta-Analysis of Transfer Learning Research for Language Understanding Tasks AakankshaNaik JillLehman - CarolynRosé + CarolynRosé 10.1162/tacl_a_00500 Natural language understanding (NLU) has made massive progress driven by large benchmarks, but benchmarks often leave a long tail of infrequent phenomena underrepresented. We reflect on the question: Have transfer learning methods sufficiently addressed the poor performance of benchmark-trained models on the long tail? We conceptualize the long tail using macro-level dimensions (underrepresented genres, topics, etc.), and perform a qualitative meta-analysis of 100 representative papers on transfer learning research for NLU. Our analysis asks three questions: (i) Which long tail dimensions do transfer learning studies target? (ii) Which properties of adaptation methods help improve performance on the long tail? (iii) Which methodological gaps have greatest negative impact on long tail performance? Our answers highlight major avenues for future research in transfer learning for the long tail. Lastly, using our meta-analysis framework, we perform a case study comparing the performance of various adaptation methods on clinical narratives, which provides interesting insights that may enable us to make progress along these future avenues. 956–980 @@ -815,7 +815,7 @@ How to Dissect a <fixed-case>M</fixed-case>uppet: The Structure of Transformer Embedding Spaces TimotheeMickus DenisPaperno - MathieuConstant + MathieuConstant 10.1162/tacl_a_00501 Pretrained embeddings based on the Transformer architecture have taken the NLP community by storm. We show that they can mathematically be reframed as a sum of vector factors and showcase how to use this reframing to study the impact of each component. We provide evidence that multi-head attentions and feed-forwards are not equally useful in all downstream applications, as well as a quantitative overview of the effects of finetuning on the overall embedding space. This approach allows us to draw connections to a wide range of previous studies, from vector space anisotropy to attention weights. 981–996 @@ -851,7 +851,7 @@ ArabellaSinclair JaapJumelet WillemZuidema - RaquelFernández + RaquelFernández 10.1162/tacl_a_00504 We investigate the extent to which modern neural language models are susceptible to structural priming, the phenomenon whereby the structure of a sentence makes the same structure more probable in a follow-up sentence. We explore how priming can be used to study the potential of these models to learn abstract structural information, which is a prerequisite for good performance on tasks that require natural language understanding skills. We introduce a novel metric and release Prime-LM, a large corpus where we control for various linguistic factors that interact with priming strength. We find that Transformer models indeed show evidence of structural priming, but also that the generalizations they learned are to some extent modulated by semantic information. Our experiments also show that the representations acquired by the models may not only encode abstract sequential structure but involve certain level of hierarchical syntactic information. More generally, our study shows that the priming paradigm is a useful, additional tool for gaining insights into the capacities of language models and opens the door to future priming-based investigations that probe the model’s internal states.1 1031–1050 @@ -867,7 +867,7 @@ HugoLaurençon SalahZaiem AbdelrahmanMohamed - BenoîtSagot + BenoîtSagot EmmanuelDupoux 10.1162/tacl_a_00505 Finding word boundaries in continuous speech is challenging as there is little or no equivalent of a ‘space’ delimiter between words. Popular Bayesian non-parametric models for text segmentation (Goldwater et al., 2006, 2009) use a Dirichlet process to jointly segment sentences and build a lexicon of word types. We introduce DP-Parse, which uses similar principles but only relies on an instance lexicon of word tokens, avoiding the clustering errors that arise with a lexicon of word types. On the Zero Resource Speech Benchmark 2017, our model sets a new speech segmentation state-of-the-art in 5 languages. The algorithm monotonically improves with better input representations, achieving yet higher scores when fed with weakly supervised inputs. Despite lacking a type lexicon, DP-Parse can be pipelined to a language model and learn semantic and syntactic representations as assessed by a new spoken word embedding benchmark. 1 @@ -908,7 +908,7 @@ MarcoValentino DeborahFerreira JuliaRozanova - AndréFreitas + AndréFreitas 10.1162/tacl_a_00508 This paper presents Diff-Explainer, the first hybrid framework for explainable multi-hop inference that integrates explicit constraints with neural architectures through differentiable convex optimization. Specifically, Diff- Explainer allows for the fine-tuning of neural representations within a constrained optimization framework to answer and explain multi-hop questions in natural language. To demonstrate the efficacy of the hybrid framework, we combine existing ILP-based solvers for multi-hop Question Answering (QA) with Transformer-based representations. An extensive empirical evaluation on scientific and commonsense QA tasks demonstrates that the integration of explicit constraints in a end-to-end differentiable framework can significantly improve the performance of non- differentiable ILP solvers (8.91%–13.3%). Moreover, additional analysis reveals that Diff-Explainer is able to achieve strong performance when compared to standalone Transformers and previous multi-hop approaches while still providing structured explanations in support of its predictions. 1103–1119 @@ -938,7 +938,7 @@ JustinGrimmer RoiReichart Margaret E.Roberts - Brandon M.Stewart + Brandon M.Stewart VictorVeitch DiyiYang 10.1162/tacl_a_00511 @@ -967,7 +967,7 @@ ShutongFeng ChristianGeishauser Hsien-ChinLin - MilicaGašić + MilicaGašić 10.1162/tacl_a_00513 Generalizing dialogue state tracking (DST) to new data is especially challenging due to the strong reliance on abundant and fine-grained supervision during training. Sample sparsity, distributional shift, and the occurrence of new concepts and topics frequently lead to severe performance degradation during inference. In this paper we propose a training strategy to build extractive DST models without the need for fine-grained manual span labels. Two novel input-level dropout methods mitigate the negative impact of sample sparsity. We propose a new model architecture with a unified encoder that supports value as well as slot independence by leveraging the attention mechanism. We combine the strengths of triple copy strategy DST and value matching to benefit from complementary predictions without violating the principle of ontology independence. Our experiments demonstrate that an extractive DST model can be trained without manual span labels. Our architecture and training strategies improve robustness towards sample sparsity, new concepts, and topics, leading to state-of-the-art performance on a range of benchmarks. We further highlight our model’s ability to effectively learn from non-dialogue data. 1175–1192 @@ -1087,7 +1087,7 @@ Investigating Reasons for Disagreement in Natural Language Inference Nan-JiangJiang - Marie-Catherinede Marneffe + Marie-Catherinede Marneffe 10.1162/tacl_a_00523 We investigate how disagreement in natural language inference (NLI) annotation arises. We developed a taxonomy of disagreement sources with 10 categories spanning 3 high- level classes. We found that some disagreements are due to uncertainty in the sentence meaning, others to annotator biases and task artifacts, leading to different interpretations of the label distribution. We explore two modeling approaches for detecting items with potential disagreement: a 4-way classification with a “Complicated” label in addition to the three standard NLI labels, and a multilabel classification approach. We found that the multilabel classification is more expressive and gives better recall of the possible interpretations in the data. 1357–1374 @@ -1123,8 +1123,8 @@ SamuelBarrett AdhigunaKuncoro MilošStanojević - PhilBlunsom - ChrisDyer + PhilBlunsom + ChrisDyer 10.1162/tacl_a_00526 We introduce Transformer Grammars (TGs), a novel class of Transformer language models that combine (i) the expressive power, scalability, and strong performance of Transformers and (ii) recursive syntactic compositions, which here are implemented through a special attention mask and deterministic transformation of the linearized tree. We find that TGs outperform various strong baselines on sentence-level language modeling perplexity, as well as on multiple syntax-sensitive language modeling evaluation metrics. Additionally, we find that the recursive syntactic composition bottleneck which represents each sentence as a single vector harms perplexity on document-level language modeling, providing evidence that a different kind of memory mechanism—one that is independent of composed syntactic representations—plays an important role in current successful models of long text. 1423–1439 @@ -1159,7 +1159,7 @@ NouhaDziri EhsanKamalloo SivanMilton - OsmarZaiane + OsmarZaiane MoYu Edoardo M.Ponti SivaReddy diff --git a/data/xml/2022.tal.xml b/data/xml/2022.tal.xml index 98e71cae41..2bb394a9f4 100644 --- a/data/xml/2022.tal.xml +++ b/data/xml/2022.tal.xml @@ -3,9 +3,9 @@ Traitement Automatique des Langues, Volume 63, Numéro 1 : Varia [Varia] - CécileFabre + CécileFabre EmmanuelMorin - SophieRosset + SophieRosset PascaleSébillot ATALA (Association pour le Traitement Automatique des Langues)
France
@@ -40,7 +40,7 @@ Survey on Narrative Structure: from Linguistic Theories to Automatic Extraction Approaches AmanBerhe CamilleGuinaudeau - ClaudeBarras + ClaudeBarras 63–87 2022.tal-1.3 berhe-etal-2022-survey @@ -50,7 +50,7 @@ Traitement Automatique des Langues, Volume 63, Numéro 2 : Traitement automatique des langues intermodal et multimodal [Cross-modal and multimodal natural language processing] GwénoléLecorvé - John D.Kelleher + John D.Kelleher ATALA (Association pour le Traitement Automatique des Langues)
France
2022 @@ -76,7 +76,7 @@ CamilleGuinaudeau HervéLe Borgne RomaricBesançon - Jose G.Moreno + Jose G.Moreno JesúsLovón Melgarejo 15–39 2022.tal-2.2 @@ -112,9 +112,9 @@ Traitement Automatique des Langues, Volume 63, Numéro 3 : Etats de l'art en TAL [Review articles in NLP] - CécileFabre + CécileFabre EmmanuelMorin - SophieRosset + SophieRosset PascaleSébillot ATALA (Association pour le Traitement Automatique des Langues)
France
@@ -150,7 +150,7 @@ Fillers in Spoken Language Understanding: Computational and Psycholinguistic Perspectives TanviDinkar ChloéClavel - IoanaVasilescu + IoanaVasilescu 37–62 2022.tal-3.3 dinkar-etal-2022-fillers diff --git a/data/xml/2022.tdle.xml b/data/xml/2022.tdle.xml index 4401b68594..bad865cd6b 100644 --- a/data/xml/2022.tdle.xml +++ b/data/xml/2022.tdle.xml @@ -6,7 +6,7 @@ ItziarAldabe BegoñaAltuna AritzFarwell - GermanRigau + GermanRigau European Language Resources Association
Marseille, France
June @@ -24,7 +24,7 @@ OwenGallagher GeorgRehm MariaGiagkou - SteliosPiperidis + SteliosPiperidis JaneDunne AndyWay 1–12 @@ -44,8 +44,8 @@ Collaborative Metadata Aggregation and Curation in Support of Digital Language Equality Monitoring MariaGiagkou - SteliosPiperidis - PennyLabropoulou + SteliosPiperidis + PennyLabropoulou MiltosDeligiannis AthanasiaKolovou LeonVoukoutis @@ -69,7 +69,7 @@ DašaFarkaš MateaFilko ArtūrsVasiļevskis - AndrejsVasiļjevs + AndrejsVasiļjevs JānisZiediņš ŽeljkaMotika MarkFishel @@ -90,7 +90,7 @@ CarmenMagariños Adina IoanaVladu John E.Ortega - José RamomPichel + José RamomPichel MarcosGarcía PabloGamallo ElisaFernández Rei diff --git a/data/xml/2022.term.xml b/data/xml/2022.term.xml index 703d8b5b29..17781d15e1 100644 --- a/data/xml/2022.term.xml +++ b/data/xml/2022.term.xml @@ -6,7 +6,7 @@ RuteCosta SaraCarvalho Ana OstroškiAnić - Anas FahadKhan + Anas FahadKhan European Language Resources Association
Marseille, France
June @@ -30,7 +30,7 @@ Knowledge Representation and Language Simplification of Human Rights SaraSilecchia FedericaVezzani - Giorgio MariaDi Nunzio + Giorgio MariaDi Nunzio 8–12 In this paper, we propose the description of a very recent interdisciplinary project aiming at analysing both the conceptual and linguistic dimensions of humanitarian rights terminology. This analysis will result in the form of a new knowledge-based multilingual terminological resource which is designed in order to meet the FAIR principles for Open Science and will serve, in the future, as a prototype for the development of a new software for the simplified rewriting of international legal texts relating to human rights. Given the early stage of the project, we will focus on the description of its rationale, the planned workflow, and the theoretical approach which will be adopted to achieve the main goal of this ambitious research project. 2022.term-1.2 @@ -51,7 +51,7 @@ A Dataset for Term Extraction in <fixed-case>H</fixed-case>indi ShubhankerBanerjee Bharathi RajaChakravarthi - John PhilipMcCrae + John PhilipMcCrae 19–25 Automatic Term Extraction (ATE) is one of the core problems in natural language processing and forms a key component of text mining pipelines of domain specific corpora. Complex low-level tasks such as machine translation and summarization for domain specific texts necessitate the use of term extraction systems. However, the development of these systems requires the use of large annotated datasets and thus there has been little progress made on this front for under-resourced languages. As a part of ongoing research, we present a dataset for term extraction from Hindi texts in this paper. To the best of our knowledge, this is the first dataset that provides term annotated documents for Hindi. Furthermore, we have evaluated this dataset on statistical term extraction methods and the results obtained indicate the problems associated with development of term extractors for under-resourced languages. 2022.term-1.4 @@ -73,7 +73,7 @@ PeterLundberg TomasBjerner YosefAl-Abasse - ArneJonsson + ArneJonsson ThomasVakili 30–32 In the experiments briefly presented in this abstract, we compare the performance of a generalist Swedish pre-trained language model with a domain-specific Swedish pre-trained model on the downstream task of focussed terminology extraction of implant terms, which are terms that indicate the presence of implants in the body of patients. The fine-tuning is identical for both models. For the search strategy we rely on KD-Tree that we feed with two different lists of term seeds, one with noise and one without noise. Results shows that the use of a domain-specific pre-trained language model has a positive impact on focussed terminology extraction only when using term seeds without noise. @@ -83,7 +83,7 @@ <fixed-case>D</fixed-case>-Terminer: Online Demo for Monolingual and Bilingual Automatic Term Extraction AylaRigouts Terryn - VeroniqueHoste + VeroniqueHoste ElsLefever 33–40 This contribution presents D-Terminer: an open access, online demo for monolingual and multilingual automatic term extraction from parallel corpora. The monolingual term extraction is based on a recurrent neural network, with a supervised methodology that relies on pretrained embeddings. Candidate terms can be tagged in their original context and there is no need for a large corpus, as the methodology will work even for single sentences. With the bilingual term extraction from parallel corpora, potentially equivalent candidate term pairs are extracted from translation memories and manual annotation of the results shows that good equivalents are found for most candidate terms. Accompanying the release of the demo is an updated version of the ACTER Annotated Corpora for Term Extraction Research (version 1.5). diff --git a/data/xml/2022.textgraphs.xml b/data/xml/2022.textgraphs.xml index 0cd99ae4d7..6b262972b5 100644 --- a/data/xml/2022.textgraphs.xml +++ b/data/xml/2022.textgraphs.xml @@ -69,7 +69,7 @@ <fixed-case>GUSUM</fixed-case>: Graph-based Unsupervised Summarization Using Sentence Features Scoring and Sentence-<fixed-case>BERT</fixed-case> TubaGokhan PhillipSmith - MarkLee + MarkLee 44–53 Unsupervised extractive document summarization aims to extract salient sentences from a document without requiring a labelled corpus. In existing graph-based methods, vertex and edge weights are usually created by calculating sentence similarities. In this paper, we develop a Graph-Based Unsupervised Summarization(GUSUM) method for extractive text summarization based on the principle of including the most important sentences while excluding sentences with similar meanings in the summary. We modify traditional graph ranking algorithms with recent sentence embedding models and sentence features and modify how sentence centrality is computed. We first define the sentence feature scores represented at the vertices, indicating the importance of each sentence in the document. After this stage, we use Sentence-BERT for obtaining sentence embeddings to better capture the sentence meaning. In this way, we define the edges of a graph where semantic similarities are represented. Next we create an undirected graph that includes sentence significance and similarities between sentences. In the last stage, we determine the most important sentences in the document with the ranking method we suggested on the graph created. Experiments on CNN/Daily Mail, New York Times, arXiv, and PubMed datasets show our approach achieves high performance on unsupervised graph-based summarization when evaluated both automatically and by humans. 2022.textgraphs-1.5 @@ -87,7 +87,7 @@ Text-Aware Graph Embeddings for Donation Behavior Prediction MeiXingDong XuemingXu - RadaMihalcea + RadaMihalcea 60–69 Predicting user behavior is essential for a large number of applications including recommender and dialog systems, and more broadly in domains such as healthcare, education, and economics. In this paper, we show that we can effectively predict donation behavior by using text-aware graph models, building upon graphs that connect user behaviors and their interests. Using a university donation dataset, we show that the graph representation significantly improves over learning from textual representations. Moreover, we show how incorporating implicit information inferred from text associated with the graph entities brings additional improvements. Our results demonstrate the role played by text-aware graph representations in predicting donation behavior. 2022.textgraphs-1.7 @@ -97,7 +97,7 @@ Word Sense Disambiguation of <fixed-case>F</fixed-case>rench Lexicographical Examples Using Lexical Networks AmanSinha SandrineOllinger - MathieuConstant + MathieuConstant 70–76 This paper focuses on the task of word sense disambiguation (WSD) on lexicographic examples relying on the French Lexical Network (fr-LN). For this purpose, we exploit the lexical and relational properties of the network, that we integrated in a feedforward neural WSD model on top of pretrained French BERT embeddings. We provide a comparative study with various models and further show the impact of our approach regarding polysemic units. 2022.textgraphs-1.8 @@ -130,7 +130,7 @@ MarcoValentino DeborahFerreira MokanaranganThayaparan - AndréFreitas + AndréFreitas DmitryUstalov 105–113 The Shared Task on Natural Language Premise Selection (NLPS) asks participants to retrieve the set of premises that are most likely to be useful for proving a given mathematical statement from a supporting knowledge base. While previous editions of the TextGraphs shared tasks series targeted multi-hop inference for explanation regeneration in the context of science questions (Thayaparan et al., 2021; Jansen and Ustalov, 2020, 2019), NLPS aims to assess the ability of state-of-the-art approaches to operate on a mixture of natural and mathematical language and model complex multi-hop reasoning dependencies between statements. To this end, this edition of the shared task makes use of a large set of approximately 21k mathematical statements extracted from the PS-ProofWiki dataset (Ferreira and Freitas, 2020a). In this summary paper, we present the results of the 1st edition of the NLPS task, providing a description of the evaluation data, and the participating systems. Additionally, we perform a detailed analysis of the results, evaluating various aspects involved in mathematical language processing and multi-hop inference. The best-performing system achieved a MAP of 15.39, improving the performance of a TF-IDF baseline by approximately 3.0 MAP. diff --git a/data/xml/2022.trac.xml b/data/xml/2022.trac.xml index 55a5b29012..ec7afaa2d0 100644 --- a/data/xml/2022.trac.xml +++ b/data/xml/2022.trac.xml @@ -4,9 +4,9 @@ Proceedings of the Third Workshop on Threat, Aggression and Cyberbullying (TRAC 2022) RiteshKumar - Atul Kr.Ojha + Atul Kr.Ojha MarcosZampieri - ShervinMalmasi + ShervinMalmasi DanielKadar Association for Computational Linguistics
Gyeongju, Republic of Korea
@@ -62,7 +62,7 @@ The Role of Context in Detecting the Target of Hate Speech IliaMarkov - WalterDaelemans + WalterDaelemans 37–42 Online hate speech detection is an inherently challenging task that has recently received much attention from the natural language processing community. Despite a substantial increase in performance, considerable challenges remain and include encoding contextual information into automated hate speech detection systems. In this paper, we focus on detecting the target of hate speech in Dutch social media: whether a hateful Facebook comment is directed against migrants or not (i.e., against someone else). We manually annotate the relevant conversational context and investigate the effect of different aspects of context on performance when adding it to a Dutch transformer-based pre-trained language model, BERTje. We show that performance of the model can be significantly improved by integrating relevant contextual information. 2022.trac-1.5 @@ -82,7 +82,7 @@ Is More Data Better? Re-thinking the Importance of Efficiency in Abusive Language Detection with Transformers-Based Active Learning HannahKirk BertieVidgen - Scott A.Hale + Scott A.Hale 52–61 Annotating abusive language is expensive, logistically complex and creates a risk of psychological harm. However, most machine learning research has prioritized maximizing effectiveness (i.e., F1 or accuracy score) rather than data efficiency (i.e., minimizing the amount of data that is annotated). In this paper, we use simulated experiments over two datasets at varying percentages of abuse to demonstrate that transformers-based active learning is a promising approach to substantially raise efficiency whilst still maintaining high effectiveness, especially when abusive content is a smaller percentage of the dataset. This approach requires a fraction of labeled data to reach performance equivalent to training over the full dataset. 2022.trac-1.7 diff --git a/data/xml/2022.trustnlp.xml b/data/xml/2022.trustnlp.xml index 4c1dbe3aab..3e86400815 100644 --- a/data/xml/2022.trustnlp.xml +++ b/data/xml/2022.trustnlp.xml @@ -48,7 +48,7 @@ Does Moral Code have a Moral Code? Probing Delphi’s Moral Philosophy - Kathleen C.Fraser + Kathleen C.Fraser SvetlanaKiritchenko EsmaBalkir 26-42 @@ -108,7 +108,7 @@ EsmaBalkir SvetlanaKiritchenko IsarNejadgholi - KathleenFraser + KathleenFraser 80-92 Motivations for methods in explainable artificial intelligence (XAI) often include detecting, quantifying and mitigating bias, and contributing to making machine learning models fairer. However, exactly how an XAI method can help in combating biases is often left unspecified. In this paper, we briefly review trends in explainability and fairness in NLP research, identify the current practices in which explainability methods are applied to detect and mitigate bias, and investigate the barriers preventing XAI methods from being used more widely in tackling fairness issues. 2022.trustnlp-1.8 diff --git a/data/xml/2022.tsar.xml b/data/xml/2022.tsar.xml index b4c24c44f3..ff43d9ed38 100644 --- a/data/xml/2022.tsar.xml +++ b/data/xml/2022.tsar.xml @@ -3,9 +3,9 @@ Proceedings of the Workshop on Text Simplification, Accessibility, and Readability (TSAR-2022) - SanjaŠtajner + SanjaŠtajner HoracioSaggion - DanielFerrés + DanielFerrés MatthewShardlow Kim ChengSheang KaiNorth @@ -108,7 +108,7 @@ ItziarGonzalez-DiosHiTZ Basque Center for Language Technologies - Ixa, University of the Basque Country UPV/EHU IkerGutiérrez-FandiñoUniversität Leipzig Oscar m.Cumbicus-PinedaIxa group and Carrera de Computación, UPV/EHU and UNL - AitorSoroaHiTZ Center - Ixa, University of the Basque Country UPV/EHU + AitorSoroaHiTZ Center - Ixa, University of the Basque Country UPV/EHU 86-97 Automatic Text simplification (ATS) seeks to reduce the complexity of a text for a general public or a target audience. In the last years, deep learning methods have become the most used systems in ATS research, but these systems need large and good quality datasets to be evaluated. Moreover, these data are available on a large scale only for English and in some cases with restrictive licenses. In this paper, we present IrekiaLF_es, an open-license benchmark for Spanish text simplification. It consists of a document-level corpus and a sentence-level test set that has been manually aligned. We also conduct a neurolinguistically-based evaluation of the corpus in order to reveal its suitability for text simplification. This evaluation follows the Lexicon-Unification-Linearity (LeULi) model of neurolinguistic complexity assessment. Finally, we present a set of experiments and baselines of ATS systems in a zero-shot scenario. 2022.tsar-1.8 @@ -182,7 +182,7 @@ An Investigation into the Effect of Control Tokens on Text Simplification - ZihaoLiManchester Metropolitan University + ZihaoLiManchester Metropolitan University MatthewShardlowManchester Metropolitan University SaeedHassanManchester Metropolitan University 154-165 diff --git a/data/xml/2022.tu.xml b/data/xml/2022.tu.xml index 538fc58701..93c19d7fb1 100644 --- a/data/xml/2022.tu.xml +++ b/data/xml/2022.tu.xml @@ -7,7 +7,7 @@ Thien HuuNguyen Viet DacLai Amir Pouran BenVeyseh - Trung H.Bui + Trung H.Bui David SeunghyunYoon International Conference on Computational Linguistics
Gyeongju, South Korea
@@ -47,7 +47,7 @@ Model Transfer for Event tracking as Transcript Understanding for Videos of Small Group Interaction SumitAgarwal RosannaVitiello - CarolynRosé + CarolynRosé 20–29 Videos of group interactions contain a wealth of information beyond the information directly communicated in a transcript of the discussion. Tracking who has participated throughout an extended interaction and what each of their trajectories has been in relation to one another is the foundation for joint activity understanding, though it comes with some unique challenges in videos of tightly coupled group work. Motivated by insights into the properties of such scenarios, including group composition and the properties of task-oriented, goal directed tasks, we present a successful proof-of-concept. In particular, we present a transfer experiment to a dyadic robot construction task, an ablation study, and a qualitative analysis. 2022.tu-1.3 diff --git a/data/xml/2022.udfestbr.xml b/data/xml/2022.udfestbr.xml index 4a91706e4b..b622dcdb2c 100644 --- a/data/xml/2022.udfestbr.xml +++ b/data/xml/2022.udfestbr.xml @@ -4,8 +4,8 @@ Proceedings of the Universal Dependencies Brazilian Festival Thiago Alexandre SalgueiroPardo - ArianiDi-Felippo - Norton TrevisanRoman + ArianiDi-Felippo + Norton TrevisanRoman Association for Computational Linguistics
Fortaleza, Brazil
March @@ -31,7 +31,7 @@ Polishing the gold – how much revision do we need in treebanks? ElvisSouza - ClaudiaFreitas + ClaudiaFreitas 1–11 2022.udfestbr-1.2 souza-freitas-2022-polishing @@ -58,7 +58,7 @@ Still on arguments and adjuncts: the status of the indirect object and the adverbial adjunct relations in <fixed-case>U</fixed-case>niversal <fixed-case>D</fixed-case>ependencies for <fixed-case>P</fixed-case>ortuguese ElvisSouza - ClaudiaFreitas + ClaudiaFreitas 1–10 2022.udfestbr-1.5 souza-freitas-2022-still diff --git a/data/xml/2022.umios.xml b/data/xml/2022.umios.xml index 9e4b0da0fb..e6357dc86a 100644 --- a/data/xml/2022.umios.xml +++ b/data/xml/2022.umios.xml @@ -52,7 +52,7 @@ WafaaMohammedUniversity of Tübingen HassanShahmohammadiUniversity of Tübingen Hendrik P. A.LenschUniversity of Tübingen - R. HaraldBaayenUniversity of Tübingen + R. HaraldBaayenUniversity of Tübingen 18-28 Visual grounding of Language aims at enriching textual representations of language with multiple sources of visual knowledge such as images and videos. Although visual grounding is an area of intense research, inter-lingual aspects of visual grounding have not received much attention. The present study investigates the inter-lingual visual grounding of word embeddings. We propose an implicit alignment technique between the two spaces of vision and language in which inter-lingual textual information interacts in order to enrich pre-trained textual word embeddings. We focus on three languages in our experiments, namely, English, Arabic, and German. We obtained visually grounded vector representations for these languages and studied whether visual grounding on one or multiple languages improved the performance of embeddings on word similarity and categorization benchmarks. Our experiments suggest that inter-lingual knowledge improves the performance of grounded embeddings in similar languages such as German and English. However, inter-lingual grounding of German or English with Arabic led to a slight degradation in performance on word similarity benchmarks. On the other hand, we observed an opposite trend on categorization benchmarks where Arabic had the most improvement on English. In the discussion section, several reasons for those findings are laid out. We hope that our experiments provide a baseline for further research on inter lingual visual grounding. 2022.umios-1.3 @@ -77,7 +77,7 @@ Discourse Relation Embeddings: Representing the Relations between Discourse Segments in Social Media YoungseoSonDepartment of Computer Science, Stony Brook University VasudhaVaradarajanDepartment of Computer Science, Stony Brook University - H. AndrewSchwartzDepartment of Computer Science, Stony Brook University + H. AndrewSchwartzDepartment of Computer Science, Stony Brook University 45-55 Discourse relations are typically modeled as a discrete class that characterizes the relation between segments of text (e.g. causal explanations, expansions). However, such predefined discrete classes limit the universe of potential relationships and their nuanced differences. Adding higher-level semantic structure to contextual word embeddings, we propose representing discourse relations as points in high dimensional continuous space. However, unlike words, discourse relations often have no surface form (relations are in between two segments, often with no word or phrase in that gap) which presents a challenge for existing embedding techniques. We present a novel method for automatically creating discourse relation embeddings (DiscRE), addressing the embedding challenge through a weakly supervised, multitask approach to learn diverse and nuanced relations in social media. Results show DiscRE representations obtain the best performance on Twitter discourse relation classification (macro F1=0.76), social media causality prediction (from F1=0.79 to 0.81), and perform beyond modern sentence and word transformers at traditional discourse relation classification, capturing novel nuanced relations (e.g. relations at the intersection of causal explanations and counterfactuals). 2022.umios-1.5 @@ -87,7 +87,7 @@ Understanding Cross-modal Interactions in <fixed-case>V</fixed-case>&<fixed-case>L</fixed-case> Models that Generate Scene Descriptions MicheleCafagnaUniversity of Malta, Institute of Linguistics and Language Technology - Keesvan DeemterUniversiteit Utrecht, Information and Computing Sciences + Keesvan DeemterUniversiteit Utrecht, Information and Computing Sciences AlbertGattUniversity of Malta, Institute of Linguistics and Language Technology 56-72 Image captioning models tend to describe images in an object-centric way, emphasising visible objects. But image descriptions can also abstract away from objects and describe the type of scene depicted. In this paper, we explore the potential of a state of the art Vision and Language model, VinVL, to caption images at the scene level using (1) a novel dataset which pairs images with both object-centric and scene descriptions. Through (2) an in-depth analysis of the effect of the fine-tuning, we show (3) that a small amount of curated data suffices to generate scene descriptions without losing the capability to identify object-level concepts in the scene; the model acquires a more holistic view of the image compared to when object-centric descriptions are generated. We discuss the parallels between these results and insights from computational and cognitive science research on scene perception. diff --git a/data/xml/2022.vardial.xml b/data/xml/2022.vardial.xml index 44bc3d5c47..fe432a9cb8 100644 --- a/data/xml/2022.vardial.xml +++ b/data/xml/2022.vardial.xml @@ -6,8 +6,8 @@ YvesScherrer TommiJauhiainen NikolaLjubešić - PreslavNakov - JörgTiedemann + PreslavNakov + JörgTiedemann MarcosZampieri Association for Computational Linguistics
Gyeongju, Republic of Korea
@@ -132,7 +132,7 @@ Transfer Learning Improves <fixed-case>F</fixed-case>rench Cross-Domain Dialect Identification: <fixed-case>NRC</fixed-case> @ <fixed-case>V</fixed-case>ar<fixed-case>D</fixed-case>ial 2022 GabrielBernier-Colborne SergeLeger - CyrilGoutte + CyrilGoutte 109–118 We describe the systems developed by the National Research Council Canada for the French Cross-Domain Dialect Identification shared task at the 2022 VarDial evaluation campaign. We evaluated two different approaches to this task: SVM and probabilistic classifiers exploiting n-grams as features, and trained from scratch on the data provided; and a pre-trained French language model, CamemBERT, that we fine-tuned on the dialect identification task. The latter method turned out to improve the macro-F1 score on the test set from 0.344 to 0.430 (25% increase), which indicates that transfer learning can be helpful for dialect identification. 2022.vardial-1.12 @@ -142,7 +142,7 @@ <fixed-case>I</fixed-case>talian Language and Dialect Identification and Regional <fixed-case>F</fixed-case>rench Variety Detection using Adaptive Naive <fixed-case>B</fixed-case>ayes TommiJauhiainen HeidiJauhiainen - KristerLindén + KristerLindén 119–129 This article describes the language identification approach used by the SUKI team in the Identification of Languages and Dialects of Italy and the French Cross-Domain Dialect Identification shared tasks organized as part of the VarDial workshop 2022. We describe some experiments and the preprocessing techniques we used for the training data in preparation for the shared task submissions, which are also discussed. Our Naive Bayes-based adaptive system reached the first position in Italian language identification and came second in the French variety identification task. 2022.vardial-1.13 diff --git a/data/xml/2022.wanlp.xml b/data/xml/2022.wanlp.xml index 903b2abfda..6d827889f7 100644 --- a/data/xml/2022.wanlp.xml +++ b/data/xml/2022.wanlp.xml @@ -6,7 +6,7 @@ HoudaBouamor HendAl-Khalifa KareemDarwish - OwenRambow + OwenRambow FethiBougares AhmedAbdelali NadiTomeh @@ -38,8 +38,8 @@ Joint Coreference Resolution for Zeros and non-Zeros in <fixed-case>A</fixed-case>rabic AbdulrahmanAlorainiQueen Mary University of London - SameerPradhanUniversity of Pennsylvania and cemantix.org - MassimoPoesioQueen Mary University of London + SameerPradhanUniversity of Pennsylvania and cemantix.org + MassimoPoesioQueen Mary University of London 11-21 Most existing proposals about anaphoric zero pronoun (AZP) resolution regard full mention coreference and AZP resolution as two independent tasks, even though the two tasks are clearly related. The main issues that need tackling to develop a joint model for zero and non-zero mentions are the difference between the two types of arguments (zero pronouns, being null, provide no nominal information) and the lack of annotated datasets of a suitable size in which both types of arguments are annotated for languages other than Chinese and Japanese. In this paper, we introduce two architectures for jointly resolving AZPs and non-AZPs, and evaluate them on Arabic, a language for which, as far as we know, there has been no prior work on joint resolution. Doing this also required creating a new version of the Arabic subset of the standard coreference resolution dataset used for the CoNLL-2012 shared task (Pradhan et al.,2012) in which both zeros and non-zeros are included in a single dataset. 2022.wanlp-1.2 @@ -62,7 +62,7 @@ MoussaKamal EddineÉcole polytechnique NadiTomehLIPN, Université Sorbonne Paris Nord NizarHabashNew York University Abu Dhabi - JosephLe RouxUniversité Sorbonne Paris Nord + JosephLe RouxUniversité Sorbonne Paris Nord MichalisVazirgiannisEcole Polytechnique 31-42 Like most natural language understanding and generation tasks, state-of-the-art models for summarization are transformer-based sequence-to-sequence architectures that are pretrained on large corpora. While most existing models focus on English, Arabic remains understudied. In this paper we propose AraBART, the first Arabic model in which the encoder and the decoder are pretrained end-to-end, based on BART. We show that AraBART achieves the best performance on multiple abstractive summarization datasets, outperforming strong baselines including a pretrained Arabic BERT-based model, multilingual BART, Arabic T5, and a multilingual T5 model. AraBART is publicly available. @@ -139,11 +139,11 @@ DaliyahAlZeerTaif University Kawla MohmadShnqitiClangu AhmedElbakryMicrosoft - MuhammadElNokrashyMicrosoft + MuhammadElNokrashyMicrosoft MohamedGabrMicrosoft AbderrahmaneIssamArchipel Cognitive AbdelrahimQaddoumiNyu - VijayShankerUniversity of Delaware + VijayShankerUniversity of Delaware MahmoudZyateLeyton 98-107 In this paper, we present the results and findings of the Shared Task on Gender Rewriting, which was organized as part of the Seventh Arabic Natural Language Processing Workshop. The task of gender rewriting refers to generating alternatives of a given sentence to match different target user gender contexts (e.g., a female speaker with a male listener, a male speaker with a male listener, etc.). This requires changing the grammatical gender (masculine or feminine) of certain words referring to the users. In this task, we focus on Arabic, a gender-marking morphologically rich language. A total of five teams from four countries participated in the shared task. @@ -157,7 +157,7 @@ HamdyMubarakQatar Computing Research Institute WajdiZaghouaniHamad Bin Khalifa University GiovanniDa San MartinoUniversity of Padova - PreslavNakovMohamed bin Zayed University of Artificial Intelligence + PreslavNakovMohamed bin Zayed University of Artificial Intelligence 108-118 Propaganda is defined as an expression of opinion or action by individuals or groups deliberately designed to influence opinions or actions of other individuals or groups with reference to predetermined ends and this is achieved by means of well-defined rhetorical and psychological devices. Currently, propaganda (or persuasion) techniques have been commonly used on social media to manipulate or mislead social media users. Automatic detection of propaganda techniques from textual, visual, or multimodal content has been studied recently, however, major of such efforts are focused on English language content. In this paper, we propose a shared task on detecting propaganda techniques for Arabic textual content. We have done a pilot annotation of 200 Arabic tweets, which we plan to extend to 2,000 tweets, covering diverse topics. We hope that the shared task will help in building a community for Arabic propaganda detection. The dataset will be made publicly available, which can help in future studies. 2022.wanlp-1.11 @@ -262,7 +262,7 @@ A Semi-supervised Approach for a Better Translation of Sentiment in Dialectical <fixed-case>A</fixed-case>rabic <fixed-case>UGT</fixed-case> HadeelSaadanyUniversity of Surrey - ConstantinOrăsanUniversity of Surrey + ConstantinOrăsanUniversity of Surrey EmadMohamedRGCL, Wolverhampton AshrafTantawyDe Montfort University 214-224 @@ -286,7 +286,7 @@ Improving <fixed-case>POS</fixed-case> Tagging for <fixed-case>A</fixed-case>rabic Dialects on Out-of-Domain Texts NoorAbo MokhIndiana University DanielDakotaIndiana University - SandraKüblerIndiana University + SandraKüblerIndiana University 238-248 We investigate part of speech tagging for four Arabic dialects (Gulf, Levantine, Egyptian, and Maghrebi), in an out-of-domain setting. More specifically, we look at the effectiveness of 1) upsampling the target dialect in the training data of a joint model, 2) increasing the consistency of the annotations, and 3) using word embeddings pre-trained on a large corpus of dialectal Arabic. We increase the accuracy on average by about 20 percentage points. 2022.wanlp-1.22 @@ -392,7 +392,7 @@ Emoji Sentiment Roles for Sentiment Analysis: A Case Study in <fixed-case>A</fixed-case>rabic Texts Shatha Ali A.HakamiUniversity of Birmingham - RobertHendleyUniversity of Birmingham + RobertHendleyUniversity of Birmingham PhillipSmithUniversity of Birmingham 346-355 Emoji (digital pictograms) are crucial features for textual sentiment analysis. However, analysing the sentiment roles of emoji is very complex. This is due to its dependency on different factors, such as textual context, cultural perspective, interlocutor’s personal traits, interlocutors’ relationships or a platforms’ functional features. This work introduces an approach to analysing the sentiment effects of emoji as textual features. Using an Arabic dataset as a benchmark, our results confirm the borrowed argument that each emoji has three different norms of sentiment role (negative, neutral or positive). Therefore, an emoji can play different sentiment roles depending upon the context. It can behave as an emphasizer, an indicator, a mitigator, a reverser or a trigger of either negative or positive sentiment within a text. In addition, an emoji may have a neutral effect (i.e., no effect) on the sentiment of the text. @@ -444,7 +444,7 @@ A Pilot Study on the Collection and Computational Analysis of Linguistic Differences Amongst Men and Women in a Kuwaiti <fixed-case>A</fixed-case>rabic <fixed-case>W</fixed-case>hats<fixed-case>A</fixed-case>pp Dataset HesahAldihanUniversity of Sheffield - RobertGaizauskasUniversity of Sheffield + RobertGaizauskasUniversity of Sheffield SusanFitzmauriceUniversity of Sheffield 372-380 This study focuses on the collection and computational analysis of Kuwaiti Arabic (KA), which is considered a low resource dialect, to test different sociolinguistic hypotheses related to gendered language use. In this paper, we describe the collection and analysis of a corpus of WhatsApp Group chats with mixed gender Kuwaiti participants. This corpus, which we are making publicly available, is the first corpus of KA conversational data. We analyse different interactional and linguistic features to get insights about features that may be indicative of gender to inform the development of a gender classification system for KA in an upcoming study. Statistical analysis of our data shows that there is insufficient evidence to claim that there are significant differences amongst men and women with respect to number of turns, length of turns and number of emojis. However, qualitative analysis shows that men and women differ substantially in the types of emojis they use and in their use of lengthened words. @@ -458,7 +458,7 @@ CibuJohnyGoogle RaiomondDoctorGoogle BrianRoarkGoogle Inc. - RichardSproatGoogle, Japan + RichardSproatGoogle, Japan 381-387 This paper presents an open-source software library that provides a set of finite-state transducer (FST) components and corresponding utilities for manipulating the writing systems of languages that use the Perso-Arabic script. The operations include various levels of script normalization, including visual invariance-preserving operations that subsume and go beyond the standard Unicode normalization forms, as well as transformations that modify the visual appearance of characters in accordance with the regional orthographies for eleven contemporary languages from diverse language families. The library also provides simple FST-based romanization and transliteration. We additionally attempt to formalize the typology of Perso-Arabic characters by providing one-to-many mappings from Unicode code points to the languages that use them. While our work focuses on the Arabic script diaspora rather than Arabic itself, this approach could be adopted for any language that uses the Arabic script, thus providing a unified framework for treating a script family used by close to a billion people. 2022.wanlp-1.36 @@ -473,7 +473,7 @@ ChristopherMadgeQueen Mary University of London,United Kingdom JuntaoYuUniversity of Essex,United Kingdom RichardBartleUniversity of Essex,United Kingdom - MassimoPoesioQueen Mary University of London,United Kingdom + MassimoPoesioQueen Mary University of London,United Kingdom 388-393 Coreference resolution is a key aspect of text comprehension, but the size of the available coreference corpora for Arabic is limited in comparison to the size of the corpora for other languages. In this paper we present a Game-With-A-Purpose called Stroll with a Scroll created to collect from players coreference annotations for Arabic. The key contribution of this work is the embedding of the annotation task in a virtual world setting, as opposed to the puzzle-type games used in previously proposed Games-With-A-Purpose for coreference. 2022.wanlp-1.37 @@ -508,7 +508,7 @@ Optimizing Naive <fixed-case>B</fixed-case>ayes for <fixed-case>A</fixed-case>rabic Dialect Identification TommiJauhiainenUniversity of Helsinki HeidiJauhiainenUniversity of Helsinki - KristerLindénUniversity of Helsinki + KristerLindénUniversity of Helsinki 409-414 This article describes the language identification system used by the SUKI team in the 2022 Nuanced Arabic Dialect Identification (NADI) shared task. In addition to the system description, we give some details of the dialect identification experiments we conducted while preparing our submissions. In the end, we submitted only one official run. We used a Naive Bayes-based language identifier with character n-grams from one to four, of which we implemented a new version, which automatically optimizes its parameters. We also experimented with clustering the training data according to different topics. With the macro F1 score of 0.1963 on test set A and 0.1058 on test set B, we achieved the 18th position out of the 19 competing teams. 2022.wanlp-1.40 @@ -555,7 +555,7 @@ EmnaFsihANLP Research Group / Sfax, Tunisia SamehKchaouANLP Research Group / Sfax, Tunisia RahmaBoujelbaneANLP Research Group / Sfax, Tunisia - LamiaHadrich-BelguithANLP Research Group, MIRACL Lab, FSEGS, Sfax University + LamiaHadrich-BelguithANLP Research Group, MIRACL Lab, FSEGS, Sfax University 431-435 Arabic has a widely varying collection of dialects. With the explosion of the use of social networks, the volume of written texts has remarkably increased. Most users express themselves using their own dialect. Unfortunately, many of these dialects remain under-studied due to the scarcity of resources. Researchers and industry practitioners are increasingly interested in analyzing users’ sentiments. In this context, several approaches have been proposed, namely: traditional machine learning, deep learning transfer learning and more recently few-shot learning approaches. In this work, we compare their efficiency as part of the NADI competition to develop a country-level sentiment analysis model. Three models were beneficial for this sub-task: The first based on Sentence Transformer (ST) and achieve 43.23% on DEV set and 42.33% on TEST set, the second based on CAMeLBERT and achieve 47.85% on DEV set and 41.72% on TEST set and the third based on multi-dialect BERT model and achieve 66.72% on DEV set and 39.69% on TEST set. 2022.wanlp-1.44 @@ -626,7 +626,7 @@ <fixed-case>NLP</fixed-case> <fixed-case>DI</fixed-case> at <fixed-case>NADI</fixed-case> Shared Task Subtask-1: Sub-word Level Convolutional Neural Models and Pre-trained Binary Classifiers for Dialect Identification VaniKanjirangatIdsia - TanjaSamardzicUniversity of Zurich + TanjaSamardzicUniversity of Zurich LjiljanaDolamicarmasuisse S&T FabioRinaldiIDSIA, Swiss AI Institute 468-473 @@ -651,7 +651,7 @@ Building an Ensemble of Transformer Models for <fixed-case>A</fixed-case>rabic Dialect Classification and Sentiment Analysis AbdullahKhered Ingy AbdelhalimAbdelhalim - RizaBatista-Navarro + RizaBatista-Navarro 479-484 In this paper, we describe the approaches we developed for the Nuanced Arabic Dialect Identification (NADI) 2022 shared task, which consists of two subtasks: the identification of country-level Arabic dialects and sentiment analysis. Our team, UniManc, developed approaches to the two subtasks which are underpinned by the same model: a pre-trained MARBERT language model. For Subtask 1, we applied undersampling to create versions of the training data with a balanced distribution across classes. For Subtask 2, we further trained the original MARBERT model for the masked language modelling objective using a NADI-provided dataset of unlabelled Arabic tweets. For each of the subtasks, a MARBERT model was fine-tuned for sequence classification, using different values for hyperparameters such as seed and learning rate. This resulted in multiple model variants, which formed the basis of an ensemble model for each subtask. Based on the official NADI evaluation, our ensemble model obtained a macro-F1-score of 26.863, ranking second overall in the first subtask. In the second subtask, our ensemble model also ranked second, obtaining a macro-F1-PN score (macro-averaged F1-score over the Positive and Negative classes) of 73.544. 2022.wanlp-1.53 @@ -671,7 +671,7 @@ Generative Approach for Gender-Rewriting Task with <fixed-case>A</fixed-case>rabic<fixed-case>T</fixed-case>5 SultanAlrowiliUniversity of Delaware - VijayShankerUniversity of Delaware + VijayShankerUniversity of Delaware 491-495 Addressing the correct gender in generative tasks (e.g., Machine Translation) has been an overlooked issue in the Arabic NLP. However, the recent introduction of the Arabic Parallel Gender Corpus (APGC) dataset has established new baselines for the Arabic Gender Rewriting task. To address the Gender Rewriting task, we first pre-train our new Seq2Seq ArabicT5 model on a 17GB of Arabic Corpora. Then, we continue pre-training our ArabicT5 model on the APGC dataset using a newly proposed method. Our evaluation shows that our ArabicT5 model, when trained on the APGC dataset, achieved competitive results against existing state-of-the-art methods. In addition, our ArabicT5 model shows better results on the APGC dataset compared to other Arabic and multilingual T5 models. 2022.wanlp-1.55 @@ -680,7 +680,7 @@ <fixed-case>A</fixed-case>ra<fixed-case>P</fixed-case>rop at <fixed-case>WANLP</fixed-case> 2022 Shared Task: Leveraging Pre-Trained Language Models for <fixed-case>A</fixed-case>rabic Propaganda Detection - GauravSinghIndependent Research + GauravSinghIndependent Research 496-500 This paper presents the approach taken for the shared task on Propaganda Detection in Arabic at the Seventh Arabic Natural Language Processing Workshop (WANLP 2022). We participated in Sub-task 1 where the text of a tweet is provided, and the goal is to identify the different propaganda techniques used in it. This problem belongs to multi-label classification. For our solution, we approached leveraging different transformer based pre-trained language models with fine-tuning to solve this problem. We found that MARBERTv2 outperforms in terms of performance where F1-macro is 0.08175 and F1-micro is 0.61116 compared to other language models that we considered. Our method achieved rank 4 in the testing phase of the challenge. 2022.wanlp-1.56 @@ -758,7 +758,7 @@ <fixed-case>IITD</fixed-case> at <fixed-case>WANLP</fixed-case> 2022 Shared Task: Multilingual Multi-Granularity Network for Propaganda Detection ShubhamMittalIndian Institute of Technology Delhi - PreslavNakovMohamed bin Zayed University of Artificial Intelligence + PreslavNakovMohamed bin Zayed University of Artificial Intelligence 529-533 We present our system for the two subtasks of the shared task on propaganda detection in Arabic, part of WANLP’2022. Subtask 1 is a multi-label classification problem to find the propaganda techniques used in a given tweet. Our system for this task uses XLM-R to predict probabilities for the target tweet to use each of the techniques. In addition to finding the techniques, subtask 2 further asks to identify the textual span for each instance of each technique that is present in the tweet; the task can be modelled as a sequence tagging problem. We use a multi-granularity network with mBERT encoder for subtask 2. Overall, our system ranks second for both subtasks (out of 14 and 3 participants, respectively). Our experimental results and analysis show that it does not help to use a much larger English corpus annotated with propaganda techniques, regardless of whether used in English or after translation to Arabic. 2022.wanlp-1.63 @@ -782,7 +782,7 @@ Abdullah Faiz Ur RahmanKhiljiNational Institute of Technology Silchar RiyankaMannaAdamas University, Kolkata ParthaPakrayNational Institute of Technology Silchar - SivajiBandyopadhyayJadavpur University, Nit Silchar + SivajiBandyopadhyayJadavpur University, Nit Silchar 541-544 In today’s time, online users are regularly exposed to media posts that are propagandistic. Several strategies have been developed to promote safer media consumption in Arabic to combat this. However, there is a limited available multilabel annotated social media dataset. In this work, we have used a pre-trained AraBERT twitter-base model on an expanded train data via data augmentation. Our team CNLP-NITS-PP, has achieved the third rank in subtask 1 at WANLP-2022, for propaganda detection in Arabic (shared task) in terms of micro-F1 score of 0.602. 2022.wanlp-1.65 @@ -795,7 +795,7 @@ Abu Bakr SolimanMohammadNu MohamedIbrahimNew Giza University Laila HeshamAfifyNewGiza University, School of IT - Samhaa R.El-BeltagyNewgiza University/Optomatica + Samhaa R.El-BeltagyNewgiza University/Optomatica 545-550 This paper presents the system developed by the NGU_CNLP team for addressing the shared task on Propaganda Detection in Arabic at WANLP 2022. The team participated in the shared tasks’ two sub-tasks which are: 1) Propaganda technique identification in text and 2) Propaganda technique span identification. In the first sub-task, the goal is to detect all employed propaganda techniques in some given piece of text out of a possible 17 different techniques or to detect that no propaganda technique is being used in that piece of text. As such, this first sub-task is a multi-label classification problem with a pool of 18 possible labels. Subtask 2 extends sub-task 1, by requiring the identification of the exact text span in which a propaganda technique was employed, making it a sequence labeling problem. For task 1, a combination of a data augmentation strategy coupled with an enabled transformer-based model comprised our classification model. This classification model ranked first amongst the 14 systems participating in this subtask. For sub-task two, a transfer learning model was adopted. The system ranked third among the 3 different models that participated in this subtask. 2022.wanlp-1.66 diff --git a/data/xml/2022.wassa.xml b/data/xml/2022.wassa.xml index 0e1d54257a..cb39223946 100644 --- a/data/xml/2022.wassa.xml +++ b/data/xml/2022.wassa.xml @@ -4,13 +4,13 @@ Proceedings of the 12th Workshop on Computational Approaches to Subjectivity, Sentiment & Social Media Analysis JeremyBarnes - OrphéeDe Clercq + OrphéeDe Clercq ValentinBarriere ShabnamTafreshi SawsanAlqahtani JoãoSedoc RomanKlinger - AlexandraBalahur + AlexandraBalahur Association for Computational Linguistics
Dublin, Ireland
May @@ -52,7 +52,7 @@ Domain-Aware Contrastive Knowledge Transfer for Multi-domain Imbalanced Data ZixuanKe MohammadKachuee - SungjinLee + SungjinLee 25-36 In many real-world machine learning applications, samples belong to a set of domains e.g., for product reviews each review belongs to a product category. In this paper, we study multi-domain imbalanced learning (MIL), the scenario that there is imbalance not only in classes but also in domains. In the MIL setting, different domains exhibit different patterns and there is a varying degree of similarity and divergence among domains posing opportunities and challenges for transfer learning especially when faced with limited or insufficient training data. We propose a novel domain-aware contrastive knowledge transfer method called DCMI to (1) identify the shared domain knowledge to encourage positive transfer among similar domains (in particular from head domains to tail domains); (2) isolate the domain-specific knowledge to minimize the negative transfer from dissimilar domains. We evaluated the performance of DCMI on three different datasets showing significant improvements in different MIL scenarios. 2022.wassa-1.3 @@ -81,7 +81,7 @@ ElsLefever PranaydeepSingh OlivierParent - VeroniqueHoste + VeroniqueHoste 51-61 In this paper, we present the SentEMO platform, a tool that provides aspect-based sentiment analysis and emotion detection of unstructured text data such as reviews, emails and customer care conversations. Currently, models have been trained for five domains and one general domain and are implemented in a pipeline approach, where the output of one model serves as the input for the next. The results are presented in three dashboards, allowing companies to gain more insights into what stakeholders think of their products and services. The SentEMO platform is available at https://sentemo.ugent.be 2022.wassa-1.5 @@ -131,7 +131,7 @@ Evaluating Contextual Embeddings and their Extraction Layers for Depression Assessment MatthewMatero AlbertHung - H. AndrewSchwartz + H. AndrewSchwartz 89-94 Many recent works in natural language processing have demonstrated ability to assess aspects of mental health from personal discourse. At the same time, pre-trained contextual word embedding models have grown to dominate much of NLP but little is known empirically on how to best apply them for mental health assessment. Using degree of depression as a case study, we do an empirical analysis on which off-the-shelf language model, individual layers, and combinations of layers seem most promising when applied to human-level NLP tasks. Notably, we find RoBERTa most effective and, despite the standard in past work suggesting the second-to-last or concatenation of the last 4 layers, we find layer 19 (sixth-to last) is at least as good as layer 23 when using 1 layer. Further, when using multiple layers, distributing them across the second half (i.e. Layers 12+), rather than last 4, of the 24 layers yielded the most accurate results. 2022.wassa-1.9 @@ -226,7 +226,7 @@ AaronMaladry ElsLefever CynthiaVan Hee - VeroniqueHoste + VeroniqueHoste 172-181 This paper presents the results of a replication experiment for automatic irony detection in Dutch social media text, investigating both a feature-based SVM classifier, as was done by Van Hee et al. (2017) and and a transformer-based approach. In addition to building a baseline model, an important goal of this research is to explore the implementation of common-sense knowledge in the form of implicit sentiment, as we strongly believe that common-sense and connotative knowledge are essential to the identification of irony and implicit meaning in tweets. We show promising results and the presented approach can provide a solid baseline and serve as a staging ground to build on in future experiments for irony detection in Dutch. 2022.wassa-1.16 @@ -286,7 +286,7 @@ <fixed-case>IUCL</fixed-case> at <fixed-case>WASSA</fixed-case> 2022 Shared Task: A Text-only Approach to Empathy and Emotion Detection YueChen YingnanJu - SandraKübler + SandraKübler 228-232 Our system, IUCL, participated in the WASSA 2022 Shared Task on Empathy Detection and Emotion Classification. Our main goal in building this system is to investigate how the use of demographic attributes influences performance. Our (official) results show that our text-only systems perform very competitively, ranking first in the empathy detection task, reaching an average Pearson correlation of 0.54, and second in the emotion classification task, reaching a Macro-F of 0.572. Our systems that use both text and demographic data are less competitive. 2022.wassa-1.21 @@ -353,7 +353,7 @@ SoumitraGhosh DhirendraMaurya AsifEkbal - PushpakBhattacharyya + PushpakBhattacharyya 255-260 Computational comprehension and identifying emotional components in language have been critical in enhancing human-computer connection in recent years. The WASSA 2022 Shared Task introduced four tracks and released a dataset of news stories: Track-1 for Empathy and Distress Prediction, Track-2 for Emotion classification, Track-3 for Personality prediction, and Track-4 for Interpersonal Reactivity Index prediction at the essay level. This paper describes our participation in the WASSA 2022 shared task on the tasks mentioned above. We developed multi-task deep learning methods to address Tracks 1 and 2 and machine learning models for Track 3 and 4. Our developed systems achieved average Pearson scores of 0.483, 0.05, and 0.08 for Track 1, 3, and 4, respectively, and a macro F1 score of 0.524 for Track 2 on the test set. We ranked 8th, 11th, 2nd and 2nd for tracks 1, 2, 3, and 4 respectively. 2022.wassa-1.26 @@ -392,7 +392,7 @@ <fixed-case>SURREY</fixed-case>-<fixed-case>CTS</fixed-case>-<fixed-case>NLP</fixed-case> at <fixed-case>WASSA</fixed-case>2022: An Experiment of Discourse and Sentiment Analysis for the Prediction of Empathy, Distress and Emotion ShenbinQian - ConstantinOrasan + ConstantinOrasan DipteshKanojia HadeelSaadany FélixDo Carmo diff --git a/data/xml/2022.wat.xml b/data/xml/2022.wat.xml index c87fb6904e..9ae673b547 100644 --- a/data/xml/2022.wat.xml +++ b/data/xml/2022.wat.xml @@ -24,7 +24,7 @@ ShantipriyaParida AnoopKunchukuttan MakotoMorishita - OndřejBojar + OndřejBojar ChenhuiChu AkikoEriguchi KaoriAbe @@ -73,7 +73,7 @@ YilunLiu ZhenZhang ShiminTao - JunhuiLi + JunhuiLi HaoYang 59–63 In this paper we describe our submission to the shared tasks of the 9th Workshop on Asian Translation (WAT 2022) on NICT–SAP under the team name ”HwTscSU”. The tasks involve translation from 5 languages into English and vice-versa in two domains: IT domain and Wikinews domain. The purpose is to determine the feasibility of multilingualism, domain adaptation or document-level knowledge given very little to none clean parallel corpora for training. Our approach for all translation tasks mainly focused on pre-training NMT models on general datasets and fine-tuning them on domain-specific datasets. Due to the small amount of parallel corpora, we collected and cleaned the OPUS dataset including three IT domain corpora, i.e., GNOME, KDE4, and Ubuntu. We then trained Transformer models on the collected dataset and fine-tuned on corresponding dev set. The BLEU scores greatly improved in comparison with other systems. Our submission ranked 1st in all IT-domain tasks and in one out of eight ALT domain tasks. @@ -117,7 +117,7 @@ Sahinur RahmanLaskar RiyankaManna ParthaPakray - SivajiBandyopadhyay + SivajiBandyopadhyay 78–81 In the domain of natural language processing, machine translation is a well-defined task where one natural language is automatically translated to another natural language. The deep learning-based approach of machine translation, known as neural machine translation attains remarkable translational performance. However, it requires a sufficient amount of training data which is a critical issue for low-resource pair translation. To handle the data scarcity problem, the multilingual concept has been investigated in neural machine translation in different settings like many-to-one and one-to-many translation. WAT2022 (Workshop on Asian Translation 2022) organizes (hosted by the COLING 2022) Indic tasks: English-to-Indic and Indic-to-English translation tasks where we have participated as a team named CNLP-NITS-PP. Herein, we have investigated a transliteration-based approach, where Indic languages are transliterated into English script and shared sub-word level vocabulary during the training phase. We have attained BLEU scores of 2.0 (English-to-Bengali), 1.10 (English-to-Assamese), 4.50 (Bengali-to-English), and 3.50 (Assamese-to-English) translation, respectively. 2022.wat-1.9 @@ -170,7 +170,7 @@ PankajDadure RiyankaManna ParthaPakray - SivajiBandyopadhyay + SivajiBandyopadhyay 111–116 Automatic translation of one natural language to another is a popular task of natural language processing. Although the deep learning-based technique known as neural machine translation (NMT) is a widely accepted machine translation approach, it needs an adequate amount of training data, which is a challenging issue for low-resource pair translation. Moreover, the multimodal concept utilizes text and visual features to improve low-resource pair translation. WAT2022 (Workshop on Asian Translation 2022) organizes (hosted by the COLING 2022) English to Bengali multimodal translation task where we have participated as a team named CNLP-NITS-PP in two tracks: 1) text-only and 2) multimodal translation. Herein, we have proposed a transliteration-based phrase pairs augmentation approach which shows improvement in the multimodal translation task and achieved benchmark results on Bengali Visual Genome 1.0 dataset. We have attained the best results on the challenge and evaluation test set for English to Bengali multimodal translation with BLEU scores of 28.70, 43.90 and RIBES scores of 0.688931, 0.780669, respectively. 2022.wat-1.14 @@ -183,7 +183,7 @@ Md FaizalKarim RiyankaManna ParthaPakray - SivajiBandyopadhyay + SivajiBandyopadhyay 117–122 Machine translation translates one natural language to another, a well-defined natural language processing task. Neural machine translation (NMT) is a widely accepted machine translation approach, but it requires a sufficient amount of training data, which is a challenging issue for low-resource pair translation. Moreover, the multimodal concept utilizes text and visual features to improve low-resource pair translation. WAT2022 (Workshop on Asian Translation 2022) organizes (hosted by the COLING 2022) English to Hindi multimodal translation task where we have participated as a team named CNLP-NITS-PP in two tracks: 1) text-only and 2) multimodal translation. Herein, we have proposed a transliteration-based phrase pairs augmentation approach, which shows improvement in the multimodal translation task. We have attained the second best results on the challenge test set for English to Hindi multimodal translation with BLEU score of 39.30, and a RIBES score of 0.791468. 2022.wat-1.15 diff --git a/data/xml/2022.wiesp.xml b/data/xml/2022.wiesp.xml index 44855c20be..f5f83f757b 100644 --- a/data/xml/2022.wiesp.xml +++ b/data/xml/2022.wiesp.xml @@ -72,7 +72,7 @@ Linking a Hypothesis Network From the Domain of Invasion Biology to a Corpus of Scientific Abstracts: The <fixed-case>INAS</fixed-case> Dataset MarcBrinner TinaHeger - SinaZarriess + SinaZarriess 32–42 We investigate the problem of identifying the major hypothesis that is addressed in a scientific paper. To this end, we present a dataset from the domain of invasion biology that organizes a set of 954 papers into a network of fine-grained domain-specific categories of hypotheses. We carry out experiments on classifying abstracts according to these categories and present a pilot study on annotating hypothesis statements within the text. We find that hypothesis statements in our dataset are complex, varied and more or less explicit, and, importantly, spread over the whole abstract. Experiments with BERT-based classifiers show that these models are able to classify complex hypothesis statements to some extent, without being trained on sentence-level text span annotations. 2022.wiesp-1.5 @@ -119,7 +119,7 @@ Detecting Entities in the Astrophysics Literature: A Comparison of Word-based and Span-based Entity Recognition Methods - XiangDai + XiangDai SarvnazKarimi 78–83 Information Extraction from scientific literature can be challenging due to the highly specialised nature of such text. We describe our entity recognition methods developed as part of the DEAL (Detecting Entities in the Astrophysics Literature) shared task. The aim of the task is to build a system that can identify Named Entities in a dataset composed by scholarly articles from astrophysics literature. We planned our participation such that it enables us to conduct an empirical comparison between word-based tagging and span-based classification methods. When evaluated on two hidden test sets provided by the organizer, our best-performing submission achieved F1 scores of 0.8307 (validation phase) and 0.7990 (testing phase). @@ -184,7 +184,7 @@ Atilla KaanAlkan CyrilGrouin FabianSchussler - PierreZweigenbaum + PierreZweigenbaum 131–139 The increased interest in time-domain astronomy over the last decades has resulted in a substantial increase in observation reports publication leading to a saturation of how astrophysicists read, analyze and classify information. Due to the short life span of the detected astronomical events, the information related to the characterization of new phenomena has to be communicated and analyzed very rapidly to allow other observatories to react and conduct their follow-up observations. This paper introduces TDAC: the first Corpus in Time-Domain Astrophysics, based on observation reports. We also present the NLP experiments we made for named entity recognition based on annotations we made and annotations from the WIESP NLP Challenge. 2022.wiesp-1.15 @@ -207,7 +207,7 @@ Atilla KaanAlkan CyrilGrouin FabianSchussler - PierreZweigenbaum + PierreZweigenbaum 145–150 Detecting Entities in the Astrophysics Literature (DEAL) is a proposed shared task in the scope of the first Workshop on Information Extraction from Scientific Publications (WIESP) at AACL-IJCNLP 2022. It aims to propose systems identifying astrophysical named entities. This article presents our system based on a majority voting strategy of an ensemble composed of multiple SciBERT models. The system we propose is ranked second and outperforms the baseline provided by the organisers by achieving an F1 score of 0.7993 and a Matthews Correlation Coefficient (MCC) score of 0.8978 in the testing phase. 2022.wiesp-1.17 diff --git a/data/xml/2022.wildre.xml b/data/xml/2022.wildre.xml index a351ee93bd..da84e7ba3d 100644 --- a/data/xml/2022.wildre.xml +++ b/data/xml/2022.wildre.xml @@ -3,10 +3,10 @@ Proceedings of the WILDRE-6 Workshop within the 13th Language Resources and Evaluation Conference - Girish NathJha + Girish NathJha SobhaL. KalikaBali - Atul Kr.Ojha + Atul Kr.Ojha European Language Resources Association
Marseille, France
June @@ -38,7 +38,7 @@
Leveraging Sub Label Dependencies in Code Mixed <fixed-case>I</fixed-case>ndian Languages for Part-Of-Speech Tagging using Conditional Random Fields. - Akash KumarGautam + Akash KumarGautam 13–17 Code-mixed text sequences often lead to challenges in the task of correct identification of Part-Of-Speech tags. However, lexical dependencies created while alternating between multiple languages can be leveraged to improve the performance of such tasks. Indian languages with rich morphological structure and highly inflected nature provide such an opportunity. In this work, we exploit these sub-label dependencies using conditional random fields (CRFs) by defining feature extraction functions on three distinct language pairs (Hindi-English, Bengali-English, and Telugu-English). Our results demonstrate a significant increase in the tagging performance if the feature extraction functions employ the rich inner structure of such languages. 2022.wildre-1.3 @@ -111,7 +111,7 @@ Classification of Multiword Expressions in <fixed-case>M</fixed-case>alayalam TreesaCyriac - SobhaLalitha Devi + SobhaLalitha Devi 55–59 Multiword expression is an interesting concept in languages and the MWEs of a language are not easy for a non-native speaker to understand. It includes lexicalized phrases, idioms, collocations etc. Data on multiwords are helpful in language processing. ‘Multiword expressions in Malayalam’ is a less studied area. In this paper, we are trying to explore multiwords in Malayalam and to classify them as per the three idiosyncrasies: semantic idiosyncrasy, syntactic idiosyncrasy, and statistic idiosyncrasy. Though these are already identified, they are not being studied in Malayalam. The classification and features are given and are studied using Malayalam multiwords. Through this study, we identified how the linguistic features of Malayalam such as agglutination influence its multiword expressions in terms of pronunciation and spelling. Malayalam has a set of code-mixed multiword expressions which is also addressed in this study. 2022.wildre-1.10 @@ -123,7 +123,7 @@ DeepakAlok AkankshaBansal Atul Kr.Ojha - John P.McCrae + John P.McCrae 60–67 This paper presents the development of the Parallel Universal Dependency (PUD) Treebank for two Indo-Aryan languages: Bengali and Magahi. A treebank of 1,000 sentences has been created using a parallel corpus of English and the UD framework. A preliminary set of sentences was annotated manually - 600 for Bengali and 200 for Magahi. The rest of the sentences were built using the Bengali and Magahi parser. The sentences have been translated and annotated manually by the authors, some of whom are also native speakers of the languages. The objective behind this work is to build a syntactically-annotated linguistic repository for the aforementioned languages, that can prove to be a useful resource for building further NLP tools. Additionally, Bengali and Magahi parsers were also created which is built on machine learning approach. The accuracy of the Bengali parser is 78.13% in the case of UPOS; 76.99% in the case of XPOS, 56.12% in the case of UAS; and 47.19% in the case of LAS. The accuracy of Magahi parser is 71.53% in the case of UPOS; 66.44% in the case of XPOS, 58.05% in the case of UAS; and 33.07% in the case of LAS. This paper also includes an illustration of the annotation schema followed, the findings of the Parallel Universal Dependency (PUD) treebank, and it’s resulting linguistic analysis 2022.wildre-1.11 @@ -141,7 +141,7 @@ Automatic Identification of Explicit Connectives in <fixed-case>M</fixed-case>alayalam KumariSheeja S - SobhaLalitha Devi + SobhaLalitha Devi 74–79 This work presents an automatic identification of explicit connectives and its arguments using supervised method, Conditional Random Fields (CRFs). In this work, we focus on the identification of connectives and their arguments in the corpus. We consider explicit connectives and its arguments for the present study. The corpus we have considered has 4,000 sentences from Malayalam documents and manually annotated the corpus for POS, chunk, clause, discourse connectives and its arguments. The corpus thus annotated is used for building the base engine. The percentage of the performance of the system is evaluated based on the precision, recall and F-score and obtained encouraging results. We have analysed the errors generated by the system and used the features obtained from the anlaysis to improve the performance of the system 2022.wildre-1.13 diff --git a/data/xml/2022.winlp.xml b/data/xml/2022.winlp.xml index 7ed45a2e67..c917b6ad52 100644 --- a/data/xml/2022.winlp.xml +++ b/data/xml/2022.winlp.xml @@ -8,7 +8,7 @@ BonaventureDossou TirthankarGhosal HatemHaddad - Haley M.Lepp + Haley M.Lepp FatemehsadatMireshghallah SurangikaRanathunga XandaSchofield diff --git a/data/xml/2022.wit.xml b/data/xml/2022.wit.xml index c48f3cbe21..9700dcc75e 100644 --- a/data/xml/2022.wit.xml +++ b/data/xml/2022.wit.xml @@ -4,7 +4,7 @@ Proceedings of the 2nd Workshop on Deriving Insights from User-Generated Text EstevamHruschka - TomMitchell + TomMitchell DunjaMladenic MarkoGrobelnik NikitaBhutani diff --git a/data/xml/2022.wmt.xml b/data/xml/2022.wmt.xml index 71b9f26060..41eb55084c 100644 --- a/data/xml/2022.wmt.xml +++ b/data/xml/2022.wmt.xml @@ -5,28 +5,28 @@ Proceedings of the Seventh Conference on Machine Translation (WMT) PhilippKoehn LoïcBarrault - OndřejBojar + OndřejBojar FethiBougares - RajenChatterjee - Marta R.Costa-jussà + RajenChatterjee + Marta R.Costa-jussà ChristianFedermann MarkFishel - AlexanderFraser + AlexanderFraser MarkusFreitag YvetteGraham RomanGrundkiewicz PacoGuzman BarryHaddow MatthiasHuck - AntonioJimeno Yepes + AntonioJimeno Yepes TomKocmi - AndréMartins + AndréMartins MakotoMorishita ChristofMonz MasaakiNagata ToshiakiNakazawa - MatteoNegri - AurélieNévéol + MatteoNegri + AurélieNévéol MarianaNeves MartinPopel MarcoTurchi @@ -62,7 +62,7 @@ ToshiakiNakazawaThe University of Tokyo MichalNovákCharles University, Faculty of Mathematics and Physics MartinPopelCharles University, Faculty of Mathematics and Physics, UFAL - MajaPopovićADAPT, Dublin City University + MajaPopovićADAPT, Dublin City University 1-45 This paper presents the results of the General Machine Translation Task organised as part of the Conference on Machine Translation (WMT) 2022. In the general MT task, participants were asked to build machine translation systems for any of 11 language pairs, to be evaluated on test sets consisting of four different domains. We evaluate system outputs with human annotators using two different techniques: reference-based direct assessment and (DA) and a combination of DA and scalar quality metric (DA+SQM). 2022.wmt-1.1 @@ -78,7 +78,7 @@ EleftheriosAvramidisGerman Research Center for Artificial Intelligence (DFKI) TomKocmiMicrosoft GeorgeFosterGoogle - AlonLavieUnbabel/Carnegie Mellon University + AlonLavieUnbabel/Carnegie Mellon University André F. T.MartinsUnbabel, Instituto de Telecomunicacoes 46-68 This paper presents the results of the WMT22 Metrics Shared Task. Participants submitting automatic MT evaluation metrics were asked to score the outputs of the translation systems competing in the WMT22 News Translation Task on four different domains: news, social, ecommerce, and chat. All metrics were evaluated on how well they correlate with human ratings at the system and segment level. Similar to last year, we acquired our own human ratings based on expert-based human evaluation via Multidimensional Quality Metrics (MQM). This setup had several advantages, among other things: (i) expert-based evaluation is more reliable, (ii) we extended the pool of translations by 5 additional translations based on MBR decoding or rescoring which are challenging for current metrics. In addition, we initiated a challenge set subtask, where participants had to create contrastive test suites for evaluating metrics’ ability to capture and penalise specific types of translation errors. Finally, we present an extensive analysis on how well metrics perform on three language pairs: English to German, English to Russian and Chinese to English. The results demonstrate the superiority of neural-based learned metrics and demonstrate again that overlap metrics like Bleu, spBleu or chrf correlate poorly with human ratings. The results also reveal that neural-based metrics are remarkably robust across different domains and challenges. @@ -88,14 +88,14 @@ Findings of the <fixed-case>WMT</fixed-case> 2022 Shared Task on Quality Estimation ChrysoulaZervaInstituto de Telecomunicações, Instituto Superior Técnico, University of Lisbon - FrédéricBlainUniversity of Wolverhampton + FrédéricBlainUniversity of Wolverhampton RicardoReiUnbabel/INESC-ID PiyawatLertvittayakumjornGoogle - José G.C. de SouzaUnbabel + José G.C. de SouzaUnbabel SteffenEgerNLLG Lab, Bielefeld University DipteshKanojiaUniversity of Surrey DuarteAlvesInstituto Superior Técnico / Unbabel - ConstantinOrăsanUniversity of Surrey + ConstantinOrăsanUniversity of Surrey MarinaFomichevaUniversity of Sheffield André F. T.MartinsUnbabel, Instituto de Telecomunicacoes LuciaSpeciaImperial College London @@ -118,7 +118,7 @@ Findings of the <fixed-case>WMT</fixed-case> 2022 Shared Task on Automatic Post-Editing - PushpakBhattacharyyaIIT Bombay + PushpakBhattacharyyaIIT Bombay RajenChatterjeeApple Inc. MarkusFreitagGoogle Research DipteshKanojiaUniversity of Surrey @@ -176,7 +176,7 @@ Gender Bias Mitigation for <fixed-case>NMT</fixed-case> Involving Genderless Languages AnderCorralOrai NLP Technologies - XabierSaralegiOrai NLP technologies + XabierSaralegiOrai NLP technologies 165-176 It has been found that NMT systems have a strong preference towards social defaults and biases when translating certain occupations, which due to their widespread use, can unintentionally contribute to amplifying and perpetuating these patterns. In that sense, this work focuses on sentence-level gender agreement between gendered entities and occupations when translating from genderless languages to languages with grammatical gender. Specifically, we address the Basque to Spanish translation direction for which bias mitigation has not been addressed. Gender information in Basque is explicit in neither the grammar nor the morphology. It is only present in a limited number of gender specific common nouns and person proper names. We propose a template-based fine-tuning strategy with explicit gender tags to provide a stronger gender signal for the proper inflection of occupations. This strategy is compared against systems fine-tuned on real data extracted from Wikipedia biographies. We provide a detailed gender bias assessment analysis and perform a template ablation study to determine the optimal set of templates. We report a substantial gender bias mitigation (up to 50% on gender bias scores) while keeping the original translation quality. 2022.wmt-1.10 @@ -226,11 +226,11 @@ Inria-<fixed-case>ALMA</fixed-case>na<fixed-case>CH</fixed-case> at <fixed-case>WMT</fixed-case> 2022: Does Transcription Help Cross-Script Machine Translation? - JesujobaAlabiSaarland University + JesujobaAlabiSaarland University LydiaNishimweInria BenjaminMullerInria CamilleReyInria - BenoîtSagotInria + BenoîtSagotInria RachelBawdenInria 233-243 This paper describes the Inria ALMAnaCH team submission to the WMT 2022 general translation shared task. Participating in the language directions cs,ru,uk→en and cs↔uk, we experiment with the use of a dedicated Latin-script transcription convention aimed at representing all Slavic languages involved in a way that maximises character- and word-level correspondences between them as well as with the English language. Our hypothesis was that bringing the source and target language closer could have a positive impact on machine translation results. We provide multiple comparisons, including bilingual and multilingual baselines, with and without transcription. Initial results indicate that the transcription strategy was not successful, resulting in lower results than baselines. We nevertheless submitted our multilingual, transcribed models as our primary systems, and in this paper provide some indications as to why we got these negative results. @@ -394,7 +394,7 @@ e<fixed-case>T</fixed-case>ranslation’s Submissions to the <fixed-case>WMT</fixed-case>22 General Machine Translation Task CsabaOraveczEuropean Commission, Directorate-General for Translation - KatinaBontchevaSogeti + KatinaBontchevaSogeti DavidKolovratnìkFujitsu BogomilKovachevEuropean Commission, Directorate-General for Translation ChristopherScottEuropean Commission, Directorate-General for Translation @@ -407,7 +407,7 @@ <fixed-case>CUNI</fixed-case> Systems for the <fixed-case>WMT</fixed-case> 22 <fixed-case>C</fixed-case>zech-<fixed-case>U</fixed-case>krainian Translation Task MartinPopelCharles University, Faculty of Mathematics and Physics, UFAL JindřichLibovickýCharles Univeristy - JindřichHelclCharles University in Prague + JindřichHelclCharles University in Prague 352-357 We present Charles University submissions to the WMT 22 GeneralTranslation Shared Task on Czech-Ukrainian and Ukrainian-Czech machine translation. We present two constrained submissions based on block back-translation and tagged back-translation and experiment with rule-basedromanization of Ukrainian. Our results show that the romanization onlyhas a minor effect on the translation quality. Further, we describe Charles Translator,a system that was developed in March 2022 as a response to the migrationfrom Ukraine to the Czech Republic. Compared to our constrained systems,it did not use the romanization and used some proprietary data sources. 2022.wmt-1.30 @@ -459,7 +459,7 @@ ShahramKhadivieBay XuanliHeMonash University DinhPhungMonash University, Australia - GholamrezaHaffariMonash University + GholamrezaHaffariMonash University 381-396 Previous works mostly focus on either multilingual or multi-domain aspects of neural machine translation (NMT). This paper investigates whether the domain information can be transferred across languages on the composition of multi-domain and multilingual NMT, particularly for the incomplete data condition where in-domain bitext is missing for some language pairs. Our results in the curated leave-one-domain-out experiments show that multi-domain multilingual (MDML) NMT can boost zero-shot translation performance up to +10 gains on BLEU, as well as aid the generalisation of multi-domain NMT to the missing domain. We also explore strategies for effective integration of multilingual and multi-domain NMT, including language and domain tag combination and auxiliary task training. We find that learning domain-aware representations and adding target-language tags to the encoder leads to effective MDML-NMT. 2022.wmt-1.34 @@ -471,7 +471,7 @@ TingWangDalian University of Technology HuanLiuDalian University of Technology JunpengLiuDalian University of Technology - DegenHuangDalian University of Technology + DegenHuangDalian University of Technology 397-402 This paper describes DUTNLP Lab’s submission to the WMT22 General MT Task on four translation directions: English to/from Chinese and English to/from Japanese under the constrained condition. Our primary system are built on several Transformer variants which employ wider FFN layer or deeper encoder layer. The bilingual data are filtered by detailed data pre-processing strategies and four data augmentation methods are combined to enlarge the training data with the provided monolingual data. Several common methods are also employed to further improve the model performance, such as fine-tuning, model ensemble and post-editing. As a result, our constrained systems achieve 29.01, 63.87, 41.84, and 24.82 BLEU scores on Chinese-to-English, English-to-Chinese, English-to-Japanese, and Japanese-to-English, respectively. 2022.wmt-1.35 @@ -562,7 +562,7 @@ Test Suite Evaluation: Morphological Challenges and Pronoun Translation - MarionWeller-Di MarcoLudwig-Maximilians-Universität München + MarionWeller-Di MarcoLudwig-Maximilians-Universität München AlexanderFraserLudwig-Maximilians-Universität München 458-468 This paper summarizes the results of our test suite evaluation with a main focus on morphology for the language pairs English to/from German. We look at the translation of morphologically complex words (DE–EN), and evaluatewhether English noun phrases are translated as compounds vs. phrases into German. Furthermore, we investigate the preservation of morphological features (gender in EN–DE pronoun translation and number in morpho-syntacticallycomplex structures for DE–EN). Our results indicate that systems are able to interpret linguistic structures to obtain relevant information, but also that translation becomes more challenging with increasing complexity, as seen, for example, when translating words with negation or non-concatenative properties, and for the morecomplex cases of the pronoun translation task. @@ -574,7 +574,7 @@ DuarteAlvesInstituto Superior Técnico / Unbabel RicardoReiUnbabel/INESC-ID Ana CFarinhaUnbabel - José G.C. de SouzaUnbabel + José G.C. de SouzaUnbabel André F. T.MartinsUnbabel, Instituto de Telecomunicacoes 469-478 Automatic translations with critical errors may lead to misinterpretations and pose several risks for the user. As such, it is important that Machine Translation (MT) Evaluation systems are robust to these errors in order to increase the reliability and safety of Machine Translation systems. Here we introduce SMAUG a novel Sentence-level Multilingual AUGmentation approach for generating translations with critical errors and apply this approach to create a test set to evaluate the robustness of MT metrics to these errors. We show that current State-of-the-Art metrics are improving their capability to distinguish translations with and without critical errors and to penalize the first accordingly. We also show that metrics tend to struggle with errors related to named entities and numbers and that there is a high variance in the robustness of current methods to translations with critical errors. @@ -658,7 +658,7 @@ Unsupervised Embedding-based Metric for <fixed-case>MT</fixed-case> Evaluation with Improved Human Correlation AnanyaMukherjeeInternational Institute of Information Technology Hyderabad - ManishShrivastavaInternational Institute of Information Technology Hyderabad + ManishShrivastavaInternational Institute of Information Technology Hyderabad 558-563 In this paper, we describe our submission to the WMT22 metrics shared task. Our metric focuses on computing contextual and syntactic equivalences along with lexical, morphological, and semantic similarity. The intent is to capture the fluency and context of the MT outputs along with their adequacy. Fluency is captured using syntactic similarity and context is captured using sentence similarity leveraging sentence embeddings. The final sentence translation score is the weighted combination of three similarity scores: a) Syntactic Similarity b) Lexical, Morphological and Semantic Similarity, and c) Contextual Similarity. This paper outlines two improved versions of MEE i.e., MEE2 and MEE4. Additionally, we report our experiments on language pairs of en-de, en-ru and zh-en from WMT17-19 testset and further depict the correlation with human assessments. 2022.wmt-1.49 @@ -667,7 +667,7 @@ <fixed-case>REUSE</fixed-case>: <fixed-case>RE</fixed-case>ference-free <fixed-case>U</fixed-case>n<fixed-case>S</fixed-case>upervised Quality Estimation Metric AnanyaMukherjeeInternational Institute of Information Technology Hyderabad - ManishShrivastavaInternational Institute of Information Technology Hyderabad + ManishShrivastavaInternational Institute of Information Technology Hyderabad 564-568 This paper describes our submission to the WMT2022 shared metrics task. Our unsupervised metric estimates the translation quality at chunk-level and sentence-level. Source and target sentence chunks are retrieved by using a multi-lingual chunker. The chunk-level similarity is computed by leveraging BERT contextual word embeddings and sentence similarity scores are calculated by leveraging sentence embeddings of Language-Agnostic BERT models. The final quality estimation score is obtained by mean pooling the chunk-level and sentence-level similarity scores. This paper outlines our experiments and also reports the correlation with human judgements for en-de, en-ru and zh-en language pairs of WMT17, WMT18 and WMT19 test sets. 2022.wmt-1.50 @@ -688,13 +688,13 @@ <fixed-case>COMET</fixed-case>-22: Unbabel-<fixed-case>IST</fixed-case> 2022 Submission for the Metrics Shared Task RicardoReiUnbabel/INESC-ID - José G.C. de SouzaUnbabel + José G.C. de SouzaUnbabel DuarteAlvesInstituto Superior Técnico / Unbabel ChrysoulaZervaInstituto de Telecomunicações, Instituto Superior Técnico, University of Lisbon Ana CFarinhaUnbabel TaisiyaGlushkovaInstituto de Telecomunicações, Instituto Superior Técnico, University of Lisbon - AlonLavieUnbabel/Carnegie Mellon University - LuisaCoheurINESC-ID/Instituto Superior Técnico + AlonLavieUnbabel/Carnegie Mellon University + LuisaCoheurINESC-ID/Instituto Superior Técnico André F. T.MartinsUnbabel, Instituto de Telecomunicacoes 578-585 In this paper, we present the joint contribution of Unbabel and IST to the WMT 2022 Metrics Shared Task. Our primary submission – dubbed COMET-22 – is an ensemble between a COMET estimator model trained with Direct Assessments and a newly proposed multitask model trained to predict sentence-level scores along with OK/BAD word-level tags derived from Multidimensional Quality Metrics error annotations. These models are ensembled together using a hyper-parameter search that weights different features extracted from both evaluation models and combines them into a single score. For the reference-free evaluation, we present CometKiwi. Similarly to our primary submission, CometKiwi is an ensemble between two models. A traditional predictor-estimator model inspired by OpenKiwi and our new multitask model trained on Multidimensional Quality Metrics which can also be used without references. Both our submissions show improved correlations compared to state-of-the-art metrics from last year as well as increased robustness to critical errors. @@ -750,7 +750,7 @@ ChanjunParkUpstage HyeonseokMoonKorea University JaehyungSeoKorea University - HeuiseokLimKorea University + HeuiseokLimKorea University 606-614 This paper presents KU X Upstage’s submission to the quality estimation (QE): critical error detection (CED) shared task in WMT22. We leverage the XLM-RoBERTa large model without utilizing any additional parallel data. To the best of our knowledge, we apply prompt-based fine-tuning to the QE task for the first time. To maximize the model’s language understanding capability, we reformulate the CED task to be similar to the masked language model objective, which is a pre-training strategy of the language model. We design intuitive templates and label words, and include auxiliary descriptions such as demonstration or Google Translate results in the input sequence. We further improve the performance through the template ensemble, and as a result of the shared task, our approach achieve the best performance for both English-German and Portuguese-English language pairs in an unconstrained setting. 2022.wmt-1.56 @@ -763,7 +763,7 @@ ShujianHuangNational Key Laboratory for Novel Software Technology, Nanjing University ShiminTaoHuawei HaoYangHuawei Co. Ltd - JiajunChenNanjing University + JiajunChenNanjing University 615-620 This paper presents submissions of the NJUNLP team in WMT 2022Quality Estimation shared task 1, where the goal is to predict the sentence-level and word-level quality for target machine translations. Our system explores pseudo data and multi-task learning. We propose several novel methods to generate pseudo data for different annotations using the conditional masked language model and the neural machine translation model. The proposed methods control the decoding process to generate more real pseudo translations. We pre-train the XLMR-large model with pseudo data and then fine-tune this model with real data both in the way of multi-task learning. We jointly learn sentence-level scores (with regression and rank tasks) and word-level tags (with a sequence tagging task). Our system obtains competitive results on different language pairs and ranks first place on both sentence- and word-level sub-tasks of the English-German language pair. 2022.wmt-1.57 @@ -778,7 +778,7 @@ KazushigeOuchiToshiba (China) Co., Ltd. YufengChenBeijing Jiaotong University JianLiuBeijing Jiaotong University - JinanXuBeijing Jiaotong University + JinanXuBeijing Jiaotong University 621-626 This paper presents the BJTU-Toshiba joint submission for WMT 2022 quality estimation shared task. We only participate in Task 1 (quality prediction) of the shared task, focusing on the sentence-level MQM prediction. The techniques we experimented with include the integration of monolingual language models and the pre-finetuning of pre-trained representations. We tried two styles of pre-finetuning, namely Translation Language Modeling and Replaced Token Detection. We demonstrate the competitiveness of our system compared to the widely adopted XLM-RoBERTa baseline. Our system is also the top-ranking system on the Sentence-level MQM Prediction for the English-German language pairs. 2022.wmt-1.58 @@ -801,11 +801,11 @@ ChrysoulaZervaInstituto de Telecomunicações, Instituto Superior Técnico, University of Lisbon Ana CFarinhaUnbabel ChristineMarotiUnbabel - José G.C. de SouzaUnbabel + José G.C. de SouzaUnbabel TaisiyaGlushkovaInstituto de Telecomunicações, Instituto Superior Técnico, University of Lisbon DuarteAlvesInstituto Superior Técnico / Unbabel - LuisaCoheurINESC-ID/Instituto Superior Técnico - AlonLavieUnbabel/Carnegie Mellon University + LuisaCoheurINESC-ID/Instituto Superior Técnico + AlonLavieUnbabel/Carnegie Mellon University André F. T.MartinsUnbabel, Instituto de Telecomunicacoes 634-645 We present the joint contribution of IST and Unbabel to the WMT 2022 Shared Task on Quality Estimation (QE). Our team participated in all three subtasks: (i) Sentence and Word-level Quality Prediction; (ii) Explainable QE; and (iii) Critical Error Detection. For all tasks we build on top of the COMET framework, connecting it with the predictor-estimator architecture of OpenKiwi, and equipping it with a word-level sequence tagger and an explanation extractor. Our results suggest that incorporating references during pretraining improves performance across several language pairs on downstream tasks, and that jointly training with sentence and word-level objectives yields a further boost. Furthermore, combining attention and gradient information proved to be the top strategy for extracting good explanations of sentence-level QE models. Overall, our submissions achieved the best results for all three tasks for almost all language pairs by a considerable margin. @@ -854,7 +854,7 @@ <fixed-case>CUNI</fixed-case> Non-Autoregressive System for the <fixed-case>WMT</fixed-case> 22 Efficient Translation Shared Task - JindřichHelclCharles University in Prague + JindřichHelclCharles University in Prague 668-670 We present a non-autoregressive system submission to the WMT 22 Efficient Translation Shared Task. Our system was used by Helcl et al. (2022) in an attempt to provide fair comparison between non-autoregressive and autoregressive models. This submission is an effort to establish solid baselines along with sound evaluation methodology, particularly in terms of measuring the decoding speed. The model itself is a 12-layer Transformer model trained with connectionist temporal classification on knowledge-distilled dataset by a strong autoregressive teacher model. 2022.wmt-1.64 @@ -898,7 +898,7 @@ <fixed-case>IIT</fixed-case> <fixed-case>B</fixed-case>ombay’s <fixed-case>WMT</fixed-case>22 Automatic Post-Editing Shared Task Submission SourabhDeoghareIIT Bombay - PushpakBhattacharyyaIndian Institute of Technology Bombay and Patna + PushpakBhattacharyyaIndian Institute of Technology Bombay and Patna 682-688 2022.wmt-1.67 deoghare-bhattacharyya-2022-iit @@ -925,14 +925,14 @@ MaikaVicente NavarroLeica Biosystems, Australia LanaYeganovaNCBI/NLM/NIH, Bethesda, USA DinaWiemannNovartis AG, Basel, Switzerland - Giorgio MariaDi NunzioUniversity of Padua, Italy + Giorgio MariaDi NunzioUniversity of Padua, Italy FedericaVezzaniUniversity of Padua, Italy ChristelGerardinSorbonne Universit�, Franc RachelBawdenInria, Paris, France Darryl JohanEstradaBarcelona Supercomputing Center, Spain SalvadorLima-lopezBarcelona Supercomputing Center, Spain EulaliaFarre-maduelBarcelona Supercomputing Center, Spain - MartinKrallingerBarcelona Supercomputing Center, Spai + MartinKrallingerBarcelona Supercomputing Center, Spai CristianGrozeaFraunhofer Institute FOKUS, Berlin, Germany AurelieNeveolUniversit� Paris-Saclay, CNRS, LISN, Orsay, France 694-723 @@ -943,10 +943,10 @@ Findings of the <fixed-case>WMT</fixed-case> 2022 Shared Task on Chat Translation Ana CFarinhaUnbabel - M. AminFarajianUnbabel + M. AminFarajianUnbabel MariannaBuchicchioUnbabel PatrickFernandesCarnegie Mellon University, Instituto de Telecomunicações - José G.C. de SouzaUnbabel + José G.C. de SouzaUnbabel HelenaMonizINESC-ID, University of Lisbon André F. T.MartinsUnbabel, Instituto de Telecomunicacoes 724-743 @@ -962,7 +962,7 @@ AlessiaBattistiUniversity of Zurich, Switzerland MichèleBergerHfH RichardBowdenUniversity of Surrey - AnneliesBraffortLISN, CNRS, Université Paris-Saclay + AnneliesBraffortLISN, CNRS, Université Paris-Saclay NecatiCihan CamgözMeta CristinaEspaña-bonetDFKI GmbH RomanGrundkiewiczMicrosoft Research @@ -971,7 +971,7 @@ AmitMoryossefBar-Ilan university, University of Zurich, ETH Zurich RegulaPerrollazHochschule fuer Heilpaedogik SabineReinhardHochschule für Heilpädagogik HfH - AnnetteRiosUniversity of Zurich + AnnetteRiosUniversity of Zurich DimitarShterionovTilburg University SandraSidler-miserezUniversity of Teacher Education in Special Needs (HfH) KatjaTissiHochschule fuer Heilpaedagogik @@ -982,7 +982,7 @@ Findings of the <fixed-case>WMT</fixed-case>’22 Shared Task on Large-Scale Machine Translation Evaluation for <fixed-case>A</fixed-case>frican Languages - David IfeoluwaAdelaniUniversity College London + David IfeoluwaAdelaniUniversity College London Md Mahfuz IbnAlamGeorge Mason University AntoniosAnastasopoulosGeorge Mason University AkshitaBhagiaAi2 @@ -991,7 +991,7 @@ FahimFaisalGeorge Mason University ChristianFedermannMicrosoft NataliaFedorovaToloka - FranciscoGuzmánMeta AI + FranciscoGuzmánMeta AI SergeyKoshelevToloka JeanMaillardMeta AI VukosiMarivateDepartment of Computer Science, University of Pretoria @@ -1007,7 +1007,7 @@ Findings of the <fixed-case>WMT</fixed-case> 2022 Shared Tasks in Unsupervised <fixed-case>MT</fixed-case> and Very Low Resource Supervised <fixed-case>MT</fixed-case> - MarionWeller-Di MarcoLudwig-Maximilians-Universität München + MarionWeller-Di MarcoLudwig-Maximilians-Universität München AlexanderFraserLudwig-Maximilians-Universität München 801-805 We present the findings of the WMT2022Shared Tasks in Unsupervised MT and VeryLow Resource Supervised MT with experiments on the language pairs German to/fromUpper Sorbian, German to/from Lower Sorbian and Lower Sorbian to/from Upper Sorbian. Upper and Lower Sorbian are minoritylanguages spoken in the Eastern parts of Germany. There are active language communitiesworking on the preservation of the languageswho also made the data used in this Shared Taskavailable.In total, four teams participated on this SharedTask, with submissions from three teams for theunsupervised sub task, and submissions fromall four teams for the supervised sub task. Inthis overview paper, we present and discuss theresults. @@ -1017,7 +1017,7 @@ Overview and Results of <fixed-case>M</fixed-case>ix<fixed-case>MT</fixed-case> Shared-Task at <fixed-case>WMT</fixed-case> 2022 VivekSrivastavaTCS Research - MayankSinghIIT Gandhinagar + MayankSinghIIT Gandhinagar 806-811 In this paper, we present an overview of the WMT 2022 shared task on code-mixed machine translation (MixMT). In this shared task, we hosted two code-mixed machine translation subtasks in the following settings: (i) monolingual to code-mixed translation and (ii) code-mixed to monolingual translation. In both the subtasks, we received registration and participation from teams across the globe showing an interest and need to immediately address the challenges with machine translation involving code-mixed and low-resource languages. 2022.wmt-1.74 @@ -1025,7 +1025,7 @@ Findings of the Word-Level <fixed-case>A</fixed-case>uto<fixed-case>C</fixed-case>ompletion Shared Task in <fixed-case>WMT</fixed-case> 2022 - FranciscoCasacubertaUniversitat Politècnica de València + FranciscoCasacubertaUniversitat Politècnica de València GeorgeFosterGoogle GuopingHuangTencent AI Lab PhilippKoehnJohns Hopkins University @@ -1033,7 +1033,7 @@ LemaoLiuTencent AI Lab ShumingShiTencent AI Lab TaroWatanabeNara Institute of Science and Technology - ChengqingZongInstitute of Automation, Chinese Academy of Sciences + ChengqingZongInstitute of Automation, Chinese Academy of Sciences 812-820 Recent years have witnessed rapid advancements in machine translation, but the state-of-the-art machine translation system still can not satisfy the high requirements in some rigorous translation scenarios. Computer-aided translation (CAT) provides a promising solution to yield a high-quality translation with a guarantee. Unfortunately, due to the lack of popular benchmarks, the research on CAT is not well developed compared with machine translation. In this year, we hold a new shared task called Word-level AutoCompletion (WLAC) for CAT in WMT. Specifically, we introduce some resources to train a WLAC model, and particularly we collect data from CAT systems as a part of test data for this shared task. In addition, we employ both automatic and human evaluations to measure the performance of the submitted systems, and our final evaluation results reveal some findings for the WLAC task. 2022.wmt-1.75 @@ -1055,7 +1055,7 @@ Focused Concatenation for Context-Aware Neural Machine Translation LorenzoLupoLig MarcoDinarelliLig - LaurentBesacierNaver Labs Europe + LaurentBesacierNaver Labs Europe 830-842 A straightforward approach to context-aware neural machine translation consists in feeding the standard encoder-decoder architecture with a window of consecutive sentences, formed by the current sentence and a number of sentences from its context concatenated to it. In this work, we propose an improved concatenation approach that encourages the model to focus on the translation of the current sentence, discounting the loss generated by target context. We also propose an additional improvement that strengthen the notion of sentence boundaries and of relative sentence distance, facilitating model compliance to the context-discounted objective. We evaluate our approach with both average-translation quality metrics and contrastive test sets for the translation of inter-sentential discourse phenomena, proving its superiority to the vanilla concatenation approach and other sophisticated context-aware systems. 2022.wmt-1.77 @@ -1084,7 +1084,7 @@ Too Brittle to Touch: Comparing the Stability of Quantization and Distillation towards Developing Low-Resource <fixed-case>MT</fixed-case> Models HarshitaDiddeeMicrosoft Research India - SandipanDandapatMicrosoft + SandipanDandapatMicrosoft MonojitChoudhuryMicrosoft Research TanujaGanuMicrosoft Research KalikaBaliMicrosoft Research Labs @@ -1132,7 +1132,7 @@ GlebErofeevLogrus Global IrinaSorokinaLogrus Global SergeGladkoffLogrus Global - GoranNenadicUniversity of Manchester + GoranNenadicUniversity of Manchester 908-919 Pre-trained language models (PLMs) often take advantage of the monolingual and multilingual dataset that is freely available online to acquire general or mixed domain knowledge before deployment into specific tasks. Extra-large PLMs (xLPLMs) are proposed very recently to claim supreme performances over smaller-sized PLMs such as in machine translation (MT) tasks. These xLPLMs include Meta-AI’s wmt21-dense-24-wide-en-X (2021) and NLLB (2022). In this work, we examine if xLPLMs are absolutely superior to smaller-sized PLMs in fine-tuning toward domain-specific MTs. We use two different in-domain data of different sizes: commercial automotive in-house data and clinical shared task data from the ClinSpEn2022 challenge at WMT2022. We choose the popular Marian Helsinki as smaller sized PLM and two massive-sized Mega-Transformers from Meta-AI as xLPLMs.Our experimental investigation shows that 1) on smaller-sized in-domain commercial automotive data, xLPLM wmt21-dense-24-wide-en-X indeed shows much better evaluation scores using SacreBLEU and hLEPOR metrics than smaller-sized Marian, even though its score increase rate is lower than Marian after fine-tuning; 2) on relatively larger-size well prepared clinical data fine-tuning, the xLPLM NLLB tends to lose its advantage over smaller-sized Marian on two sub-tasks (clinical terms and ontology concepts) using ClinSpEn offered metrics METEOR, COMET, and ROUGE-L, and totally lost to Marian on Task-1 (clinical cases) on all official metrics including SacreBLEU and BLEU; 3) metrics do not always agree with each other on the same tasks using the same model outputs; 4) clinic-Marian ranked No.2 on Task- 1 (via SacreBLEU/BLEU) and Task-3 (via METEOR and ROUGE) among all submissions. 2022.wmt-1.84 @@ -1199,8 +1199,8 @@ Unbabel-<fixed-case>IST</fixed-case> at the <fixed-case>WMT</fixed-case> Chat Translation Shared Task JoãoAlvesUnbabel Pedro HenriqueMartinsInstituto de Telecomunicações, Instituto Superior Técnico - José G.C. de SouzaUnbabel - M. AminFarajianUnbabel + José G.C. de SouzaUnbabel + M. AminFarajianUnbabel André F. T.MartinsUnbabel, Instituto de Telecomunicacoes 943-948 We present the joint contribution of IST and Unbabel to the WMT 2022 Chat Translation Shared Task. We participated in all six language directions (English ↔ German, English ↔ French, English ↔ Brazilian Portuguese). Due to the lack of domain-specific data, we use mBART50, a large pretrained language model trained on millions of sentence-pairs, as our base model. We fine-tune it using a two step fine-tuning process. In the first step, we fine-tune the model on publicly available data. In the second step, we use the validation set. After having a domain specific model, we explore the use of kNN-MT as a way of incorporating domain-specific data at decoding time. @@ -1226,7 +1226,7 @@ <fixed-case>BJTU</fixed-case>-<fixed-case>W</fixed-case>e<fixed-case>C</fixed-case>hat’s Systems for the <fixed-case>WMT</fixed-case>22 Chat Translation Task YunlongLiangBeijing Jiaotong University FandongMengWeChat AI, Tencent - JinanXuBeijing Jiaotong University + JinanXuBeijing Jiaotong University YufengChenBeijing Jiaotong University JieZhouTencent Inc. 955-961 @@ -1271,7 +1271,7 @@ Spatio-temporal Sign Language Representation and Translation YasserHamidullahDfki - JosefVan GenabithDfki + JosefVan GenabithDfki CristinaEspaña-bonetDFKI GmbH 977-982 This paper describes the DFKI-MLT submission to the WMT-SLT 2022 sign language translation (SLT) task from Swiss German Sign Language (video) into German (text). State-of-the-art techniques for SLT use a generic seq2seq architecture with customized input embeddings. Instead of word embeddings as used in textual machine translation, SLT systems use features extracted from video frames. Standard approaches often do not benefit from temporal features. In our participation, we present a system that learns spatio-temporal feature representations and translation in a single model, resulting in a real end-to-end architecture expected to better generalize to new data sets. Our best system achieved 5 \pm 1 BLEU points on the development set, but the performance on the test dropped to 0.11 \pm 0.06 BLEU points. @@ -1313,7 +1313,7 @@ Separating Grains from the Chaff: Using Data Filtering to Improve Multilingual Translation for Low-Resourced <fixed-case>A</fixed-case>frican Languages IdrisAbdulmumin MichaelBeukman - Jesujoba O.Alabi + Jesujoba O.Alabi ChrisEmezue EverlynAsiko TosinAdewumi @@ -1455,7 +1455,7 @@ <fixed-case>MUNI</fixed-case>-<fixed-case>NLP</fixed-case> Systems for <fixed-case>L</fixed-case>ower <fixed-case>S</fixed-case>orbian-<fixed-case>G</fixed-case>erman and <fixed-case>L</fixed-case>ower <fixed-case>S</fixed-case>orbian-<fixed-case>U</fixed-case>pper <fixed-case>S</fixed-case>orbian Machine Translation @ <fixed-case>WMT</fixed-case>22 EdoardoSignoroniFaculty of Informatics, Masaryk University - PavelRychlýNLP Centre, Faculty of Informatics, Masaryk University + PavelRychlýNLP Centre, Faculty of Informatics, Masaryk University 1111-1116 We describe our neural machine translation systems for the WMT22 shared task on unsupervised MT and very low resource supervised MT. We submit supervised NMT systems for Lower Sorbian-German and Lower Sorbian-Upper Sorbian translation in both directions. By using a novel tokenization algorithm, data augmentation techniques, such as Data Diversification (DD), and parameter optimization we improve on our baselines by 10.5-10.77 BLEU for Lower Sorbian-German and by 1.52-1.88 BLEU for Lower Sorbian-Upper Sorbian. 2022.wmt-1.109 @@ -1490,7 +1490,7 @@ ShivamMangaleInternational Institute of Information Technology - Hyderabad SaranshRajputInternational Institute of Information Technology, Hyderabad TanviKambleInternational Institute of Information Technology - Hyderabad - DiptiSharmaInternational Institute of Information Technology - Hyderabad + DiptiSharmaInternational Institute of Information Technology - Hyderabad VasudevVarmaInternational Institute of Information Technology - Hyderabad 1126-1130 Code-mixed machine translation has become an important task in multilingual communities and extending the task of machine translation to code mixed data has become a common task for these languages. In the shared tasks of EMNLP 2022, we try to tackle the same for both English + Hindi to Hinglish and Hinglish to English. The first task dealt with both Roman and Devanagari script as we had monolingual data in both English and Hindi whereas the second task only had data in Roman script. To our knowledge, we achieved one of the top ROUGE-L and WER scores for the first task of Monolingual to Code-Mixed machine translation. In this paper, we discuss the use of mBART with some special pre-processing and post-processing (transliteration from Devanagari to Roman) for the first task in detail and the experiments that we performed for the second task of translating code-mixed Hinglish to monolingual English. @@ -1536,7 +1536,7 @@ ShyambabuPandeyNational Institute of Technology Silchar RiyankaMannaAdamas University ParthaPakrayNational Institute of Technology Silchar - SivajiBandyopadhyayNational Institute of Technology Silchar + SivajiBandyopadhyayNational Institute of Technology Silchar 1158-1161 The mixing of two or more languages in speech or text is known as code-mixing. In this form of communication, users mix words and phrases from multiple languages. Code-mixing is very common in the context of Indian languages due to the presence of multilingual societies. The probability of the existence of code-mixed sentences in almost all Indian languages since in India English is the dominant language for social media textual communication platforms. We have participated in the WMT22 shared task of code-mixed machine translation with the team name: CNLP-NITS-PP. In this task, we have prepared a synthetic Hinglish–English parallel corpus using transliteration of original Hindi sentences to tackle the limitation of the parallel corpus, where, we mainly considered sentences that have named-entity (proper noun) from the available English-Hindi parallel corpus. With the addition of synthetic bi-text data to the original parallel corpus (train set), our transformer-based neural machine translation models have attained recall-oriented understudy for gisting evaluation (ROUGE-L) scores of 0.23815, 0.33729, and word error rate (WER) scores of 0.95458, 0.88451 at Sub-Task-1 (English-to-Hinglish) and Sub-Task-2 (Hinglish-to-English) for test set results respectively. 2022.wmt-1.116 @@ -1578,7 +1578,7 @@ <fixed-case>PRHLT</fixed-case>’s Submission to <fixed-case>WLAC</fixed-case> 2022 AngelNavarroPrhlt MiguelDomingoUniversitat Politècnica de València - FranciscoCasacubertaUniversitat Politècnica de València + FranciscoCasacubertaUniversitat Politècnica de València 1182-1186 This paper describes our submission to the Word-Level AutoCompletion shared task of WMT22. We participated in the English–German and German–English categories. We proposed a segment-based interactive machine translation approach whose central core is a machine translation (MT) model which generates a complete translation from the context provided by the task. From there, we obtain the word which corresponds to the autocompletion. With this approach, we aim to show that it is possible to use the MT models in the autocompletion task by simply performing minor changes at the decoding step, obtaining satisfactory results. 2022.wmt-1.120 @@ -1648,7 +1648,7 @@ SongmingZhangBeijing Jiaotong University HuiHuangHarbin Institute of Technology YufengChenBeijing Jiaotong University - JinanXuBeijing Jiaotong University + JinanXuBeijing Jiaotong University JianLiuBeijing Jiaotong University 1211-1216 Translation suggestion (TS) models are used to automatically provide alternative suggestions for incorrect spans in sentences generated by machine translation. This paper introduces the system used in our submission to the WMT’22 Translation Suggestion shared task. Our system is based on the ensemble of different translation architectures, including Transformer, SA-Transformer, and DynamicConv. We use three strategies to construct synthetic data from parallel corpora to compensate for the lack of supervised data. In addition, we introduce a multi-phase pre-training strategy, adding an additional pre-training phase with in-domain data. We rank second and third on the English-German and English-Chinese bidirectional tasks, respectively. diff --git a/data/xml/2022.wnu.xml b/data/xml/2022.wnu.xml index c52e65834b..a3aeda10ab 100644 --- a/data/xml/2022.wnu.xml +++ b/data/xml/2022.wnu.xml @@ -74,7 +74,7 @@ <fixed-case>G</fixed-case>is<fixed-case>P</fixed-case>y: A Tool for Measuring Gist Inference Score in Text PedramHosseini ChristopherWolfe - MonaDiab + MonaDiab DavidBroniatowski 38-46 Decision making theories such as Fuzzy-Trace Theory (FTT) suggest that individuals tend to rely on gist, or bottom-line meaning, in the text when making decisions. In this work, we delineate the process of developing GisPy, an opensource tool in Python for measuring the Gist Inference Score (GIS) in text. Evaluation of GisPy on documents in three benchmarks from the news and scientific text domains demonstrates that scores generated by our tool significantly distinguish low vs. high gist documents. Our tool is publicly available to use at: https: //github.com/phosseini/GisPy. diff --git a/data/xml/2022.wnut.xml b/data/xml/2022.wnut.xml index 7c181ce89d..12b032a563 100644 --- a/data/xml/2022.wnut.xml +++ b/data/xml/2022.wnut.xml @@ -27,7 +27,7 @@ Extracting Mathematical Concepts from Text JacobCollard - Valeriade Paiva + Valeriade Paiva BrendanFong EswaranSubrahmanian 15–23 @@ -113,7 +113,7 @@ ChengChen Md Tahmid RahmanLaskar Shashi BhushanTn - SimonCorston-Oliver + SimonCorston-Oliver 96–100 We present a simple yet effective method to train a named entity recognition (NER) model that operates on business telephone conversation transcripts that contain noise due to the nature of spoken conversation and artifacts of automatic speech recognition. We first fine-tune LUKE, a state-of-the-art Named Entity Recognition (NER) model, on a limited amount of transcripts, then use it as the teacher model to teach a smaller DistilBERT-based student model using a large amount of weakly labeled data and a small amount of human-annotated data. The model achieves high accuracy while also satisfying the practical constraints for inclusion in a commercial telephony product: realtime performance when deployed on cost-effective CPUs rather than GPUs. In this paper, we introduce the fine-tune-then-distill method for entity recognition on real world noisy data to deploy our NER model in a limited budget production environment. By generating pseudo-labels using a large teacher model pre-trained on typed text while fine-tuned on noisy speech text to train a smaller student model, we make the student model 75x times faster while reserving 99.09% of its accuracy. These findings demonstrate that our proposed approach is very effective in limited budget scenarios to alleviate the need of human labeling of a large amount of noisy data. 2022.wnut-1.10 @@ -135,7 +135,7 @@ SofieLabat AmirHadifar ThomasDemeester - VeroniqueHoste + VeroniqueHoste 106–112 The ability to track fine-grained emotions in customer service dialogues has many real-world applications, but has not been studied extensively. This paper measures the potential of prediction models on that task, based on a real-world dataset of Dutch Twitter conversations in the domain of customer service. We find that modeling emotion trajectories has a small, but measurable benefit compared to predictions based on isolated turns. The models used in our study are shown to generalize well to different companies and economic sectors. 2022.wnut-1.12 @@ -183,7 +183,7 @@ “Kanglish alli names!” Named Entity Recognition for <fixed-case>K</fixed-case>annada-<fixed-case>E</fixed-case>nglish Code-Mixed Social Media Data SumukhS - ManishShrivastava + ManishShrivastava 154–161 Code-mixing (CM) is a frequently observed phenomenon on social media platforms in multilingual societies such as India. While the increase in code-mixed content on these platforms provides good amount of data for studying various aspects of code-mixing, the lack of automated text analysis tools makes such studies difficult. To overcome the same, tools such as language identifiers and parts of-speech (POS) taggers for analysing code-mixed data have been developed. One such tool is Named Entity Recognition (NER), an important Natural Language Processing (NLP) task, which is not only a subtask of Information Extraction, but is also needed for downstream NLP tasks such as semantic role labeling. While entity extraction from social media data is generally difficult due to its informal nature, code-mixed data further complicates the problem due to its informal, unstructured and incomplete information. In this work, we present the first ever corpus for Kannada-English code-mixed social media data with the corresponding named entity tags for NER. We provide strong baselines with machine learning classification models such as CRF, Bi-LSTM, and Bi-LSTM-CRF on our corpus with word, character, and lexical features. 2022.wnut-1.17 diff --git a/data/xml/2022.woah.xml b/data/xml/2022.woah.xml index ad04ff9669..6aef647bbc 100644 --- a/data/xml/2022.woah.xml +++ b/data/xml/2022.woah.xml @@ -7,7 +7,7 @@ AidaMostafazadeh Davani LambertMathias BertieVidgen - ZeerakTalat + ZeerakTalat Association for Computational Linguistics
Seattle, Washington (Hybrid)
July @@ -23,7 +23,7 @@ Separating Hate Speech and Offensive Language Classes via Adversarial Debiasing ShuzhouYuan AntonisMaronikolakis - HinrichSchütze + HinrichSchütze 1-10 Research to tackle hate speech plaguing online media has made strides in providing solutions, analyzing bias and curating data. A challenging problem is ambiguity between hate speech and offensive language, causing low performance both overall and specifically for the hate speech class. It can be argued that misclassifying actual hate speech content as merely offensive can lead to further harm against targeted groups. In our work, we mitigate this potentially harmful phenomenon by proposing an adversarial debiasing method to separate the two classes. We show that our method works for English, Arabic German and Hindi, plus in a multilingual setting, improving performance over baselines. 2022.woah-1.1 @@ -121,7 +121,7 @@ Lost in Distillation: A Case Study in Toxicity Modeling AlyssaChvasta AlyssaLees - JeffreySorensen + JeffreySorensen LucyVasserman NiteshGoyal 92-101 @@ -138,7 +138,7 @@ AntigoneKlimi EleftheriaMolou AlexandraSaivanidou - StellaMarkantonatou + StellaMarkantonatou 102-108 We present a cleansed version of the multilingual lexicon HURTLEX-(EL) comprising 737 offensive words of Modern Greek. We worked bottom-up in two annotation rounds and developed detailed guidelines by cross-classifying words on three dimensions: context, reference, and thematic domain. Our classification reveals a wider spectrum of thematic domains concerning the study of offensive language than previously thought Efthymiou et al. (2014) and reveals social and cultural aspects that are not included in the HURTLEX categories. 2022.woah-1.10 @@ -220,7 +220,7 @@ BjörnRönnerstrand GregorRettenegger EllenBreitholtz - AsadSayeed + AsadSayeed 170-175 “Dogwhistles” are expressions intended by the speaker have two messages: a socially-unacceptable “in-group” message understood by a subset of listeners, and a benign message intended for the out-group. We take the result of a word-replacement survey of the Swedish population intended to reveal how dogwhistles are understood, and we show that the difficulty of annotating dogwhistles is reflected in the separability in the space of a sentence-transformer Swedish BERT trained on general data. 2022.woah-1.16 diff --git a/data/xml/2022.wordplay.xml b/data/xml/2022.wordplay.xml index 3aae4aa089..0db3d5285e 100644 --- a/data/xml/2022.wordplay.xml +++ b/data/xml/2022.wordplay.xml @@ -19,7 +19,7 @@ A Systematic Survey of Text Worlds as Embodied Natural Language Environments - PeterJansen + PeterJansen 1-15 Text Worlds are virtual environments for embodied agents that, unlike 2D or 3D environments, are rendered exclusively using textual descriptions. These environments offer an alternative to higher-fidelity 3D environments due to their low barrier to entry, providing the ability to study semantics, compositional inference, and other high-level tasks with rich action spaces while controlling for perceptual input. This systematic survey outlines recent developments in tooling, environments, and agent modeling for Text Worlds, while examining recent trends in knowledge graphs, common sense reasoning, transfer learning of Text World performance to higher-fidelity environments, as well as near-term development targets that, once achieved, make Text Worlds an attractive general research paradigm for natural language processing. 2022.wordplay-1.1 @@ -46,7 +46,7 @@ BenjaminVan Durme OliviaDeng AkankshaMalhotra - BillDolan + BillDolan 25-43 Non-Player Characters (NPCs) significantly enhance the player experience in many games. Historically, players’ interactions with NPCs have tended to be highly scripted, to be limited to natural language responses to be selected by the player, and to not involve dynamic change in game state. In this work, we demonstrate that use of a few example conversational prompts can power a conversational agent to generate both natural language and novel code. This approach can permit development of NPCs with which players can have grounded conversations that are free-form and less repetitive. We demonstrate our approach using OpenAI Codex (GPT-3 finetuned on GitHub), with Minecraft game development as our test bed. We show that with a few example prompts, a Codex-based agent can generate novel code, hold multi-turn conversations and answer questions about structured data. We evaluate this application using experienced gamers in a Minecraft realm and provide analysis of failure cases and suggest possible directions for solutions. 2022.wordplay-1.3 diff --git a/data/xml/2023.acl.xml b/data/xml/2023.acl.xml index 3c7753e1cd..c6a0fa2b8a 100644 --- a/data/xml/2023.acl.xml +++ b/data/xml/2023.acl.xml @@ -5,7 +5,7 @@ Proceedings of the 61st Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers) AnnaRogers JordanBoyd-Graber - NaoakiOkazaki + NaoakiOkazaki Association for Computational Linguistics
Toronto, Canada
July @@ -64,7 +64,7 @@ DavidDaleMeta AI ElenaVoitaMeta AI LoicBarraultMeta AI - Marta R.Costa-jussàMeta AI + Marta R.Costa-jussàMeta AI 36-50 While the problem of hallucinations in neural machine translation has long been recognized, so far the progress on its alleviation is very little. Indeed, recently it turned out that without artificially encouraging models to hallucinate, previously existing methods fall short and even the standard sequence log-probability is more informative. It means that internal characteristics of the model can give much more information than we expect, and before using external models and measures, we first need to ask: how far can we go if we use nothing but the translation model itself ? We propose to use a method that evaluates the percentage of the source contribution to a generated translation. Intuitively, hallucinations are translations “detached” from the source, hence they can be identified by low source contribution. This method improves detection accuracy for the most severe hallucinations by a factor of 2 and is able to alleviate hallucinations at test time on par with the previous best approach that relies on external models. Next, if we move away from internal model characteristics and allow external tools, we show that using sentence similarity from cross-lingual embeddings further improves these results. We release the code of our experiments. 2023.acl-long.3 @@ -167,7 +167,7 @@ YusukeMatsuiThe University of Tokyo MasaoUtiyamaNICT HidekiTanakaNICT - EiichiroSumitaNICT + EiichiroSumitaNICT 174-189 k-nearest-neighbor machine translation (kNN-MT) (Khandelwal et al., 2021) boosts the translation performance of trained neural machine translation (NMT) models by incorporating example-search into the decoding algorithm. However, decoding is seriously time-consuming, i.e., roughly 100 to 1,000 times slower than standard NMT, because neighbor tokens are retrieved from all target tokens of parallel data in each timestep. In this paper, we propose “Subset kNN-MT”, which improves the decoding speed of kNN-MT by two methods: (1) retrieving neighbor target tokens from a subset that is the set of neighbor sentences of the input sentence, not from all sentences, and (2) efficient distance computation technique that is suitable for subset neighbor search using a look-up table. Our proposed method achieved a speed-up of up to 132.2 times and an improvement in BLEU score of up to 1.6 compared with kNN-MT in the WMT’19 De-En translation task and the domain adaptation tasks in De-En and En-Ja. 2023.acl-long.10 @@ -288,7 +288,7 @@ AiweiLiuSchool of Software, Tsinghua University YawenYangSchool of Software, Tsinghua University ShuangLiSchool of Software, Tsinghua University - Philip S.YuUniversity of Illinois at Chicago + Philip S.YuUniversity of Illinois at Chicago LijieWenSchool of Software, Tsinghua University 322-337 Aspect-based sentiment analysis (ABSA) is a fine-grained sentiment classification task. Many recent works have used dependency trees to extract the relation between aspects and contexts and have achieved significant improvements. However, further improvement is limited due to the potential mismatch between the dependency tree as a syntactic structure and the sentiment classification as a semantic task. To alleviate this gap, we replace the syntactic dependency tree with the semantic structure named Abstract Meaning Representation (AMR) and propose a model called AMR-based Path Aggregation Relational Network (APARN) to take full advantage of semantic structures. In particular, we design the path aggregator and the relation-enhanced self-attention mechanism that complement each other. The path aggregator extracts semantic features from AMRs under the guidance of sentence information, while the relation-enhanced self-attention mechanism in turn improves sentence features with refined semantic information. Experimental results on four public datasets demonstrate 1.13% average F1 improvement of APARN in ABSA when compared with state-of-the-art baselines. @@ -425,7 +425,7 @@ Revealing Single Frame Bias for Video-and-Language Learning JieLeiMeta Platforms, Inc - TamaraBergUniversity of North Carolina Chapel Hill + TamaraBergUniversity of North Carolina Chapel Hill MohitBansalUniversity of North Carolina at Chapel Hill 487-507 Training an effective video-and-language model intuitively requires multiple frames as model inputs. However, it is unclear whether using multiple frames is beneficial to downstream tasks, and if yes, whether the performance gain is worth the drastically-increased computation and memory costs resulting from using more frames. In this work, we explore single-frame models for video-and-language learning. On a diverse set of video-and-language tasks (including text-to-video retrieval and video question answering), we show the surprising result that, with large-scale pre-training and a proper frame ensemble strategy at inference time, a single-frame trained model that does not consider temporal information can achieve better performance than existing methods that use multiple frames for training. This result reveals the existence of a strong “static appearance bias” in popular video-and-language datasets. Therefore, to allow for a more comprehensive evaluation of video-and-language models, we propose two new retrieval tasks based on existing fine-grained action recognition datasets that encourage temporal modeling. Our code is available at https://github.com/jayleicn/singularity. @@ -452,7 +452,7 @@ World-to-Words: Grounded Open Vocabulary Acquisition through Fast Mapping in Vision-Language Models ZiqiaoMaUniversity of Michigan JiayiPanUniversity of Michigan at Ann Arbor - JoyceChaiUniversity of Michigan + JoyceChaiUniversity of Michigan 524-544 The ability to connect language units to their referents in the physical world, referred to as grounding, is crucial to learning and understanding grounded meanings of words. While humans demonstrate fast mapping in new word learning, it remains unclear whether modern vision-language models can truly represent language with their grounded meanings, and how grounding may further bootstrap new word learning. To this end, we introduce Grounded Open Vocabulary Acquisition (GOVA) to examine grounding and bootstrapping in open-world language learning. As an initial attempt, we propose World-to-Words (W2W), a novel visually-grounded language model by pre-training on image-text pairs highlighting grounding as an objective. Through extensive experiments and analysis, we demonstrate that W2W is a more coherent and fast grounded word learner, and that the grounding ability acquired during pre-training helps the model to learn unseen words more rapidly and robustly. 2023.acl-long.31 @@ -521,7 +521,7 @@ PatrickFernandesCarnegie Mellon University, Instituto de Telecomunicações KayoYinUC Berkeley EmmyLiuCarnegie Mellon University - AndréMartinsUnbabel, Instituto de Telecomunicacoes + AndréMartinsUnbabel, Instituto de Telecomunicacoes GrahamNeubigCarnegie Mellon University 606-626 Although proper handling of discourse significantly contributes to the quality of machine translation (MT), these improvements are not adequately measured in common translation quality metrics. Recent works in context-aware MT attempt to target a small set of discourse phenomena during evaluation, however not in a fully systematic way. In this paper, we develop the Multilingual Discourse-Aware (MuDA) benchmark, a series of taggers that identify and evaluate model performance on discourse phenomena in any given dataset. The choice of phenomena is inspired by a novel methodology to systematically identify translations that require context. This methodology confirms the difficulty of previously studied phenomena while uncovering others which were not previously addressed. We find that commonly studied context-aware MT models make only marginal improvements over context-agnostic models, which suggests these models do not handle these ambiguities effectively. We release code and data for 14 language pairs to encourage the MT community to focus on accurately capturing discourse phenomena. Code available at https://github.com/neulab/contextual-mt @@ -612,7 +612,7 @@ MartijnBarteldsUniversity of Groningen NaySanStanford University BradleyMcDonnellUniversity of Hawai‘i at Mānoa - DanJurafskyStanford University + DanJurafskyStanford University MartijnWielingUniversity of Groningen 715-729 The performance of automatic speech recognition (ASR) systems has advanced substantially in recent years, particularly for languages for which a large amount of transcribed speech is available. Unfortunately, for low-resource languages, such as minority languages, regional languages or dialects, ASR performance generally remains much lower. In this study, we investigate whether data augmentation techniques could help improve low-resource ASR performance, focusing on four typologically diverse minority languages or language variants (West Germanic: Gronings, West-Frisian; Malayo-Polynesian: Besemah, Nasal). For all four languages, we examine the use of self-training, where an ASR system trained with the available human-transcribed data is used to generate transcriptions, which are then combined with the original data to train a new ASR system. For Gronings, for which there was a pre-existing text-to-speech (TTS) system available, we also examined the use of TTS to generate ASR training data from text-only sources. We find that using a self-training approach consistently yields improved performance (a relative WER reduction up to 20.5% compared to using an ASR system trained on 24 minutes of manually transcribed speech). The performance gain from TTS augmentation for Gronings was even stronger (up to 25.5% relative reduction in WER compared to a system based on 24 minutes of manually transcribed speech). In sum, our results show the benefit of using self-training or (if possible) TTS-generated data as an efficient solution to overcome the limitations of data availability for resource-scarce languages in order to improve ASR performance. @@ -763,7 +763,7 @@ KevinPeiUniversity of Illinois at Urbana-Champaign IshanJindalIBM Research Kevin Chen-ChuanChangUniversity of Illinois at Urbana-Champaign - ChengXiangZhaiUniversity of Illinois at Urbana-Champaign + ChengXiangZhaiUniversity of Illinois at Urbana-Champaign YunyaoLiApple 929-949 Open Information Extraction (OpenIE) has been used in the pipelines of various NLP tasks. Unfortunately, there is no clear consensus on which models to use in which tasks. Muddying things further is the lack of comparisons that take differing training sets into account. In this paper, we present an application-focused empirical survey of neural OpenIE models, training sets, and benchmarks in an effort to help users choose the most suitable OpenIE systems for their applications. We find that the different assumptions made by different models and datasets have a statistically significant effect on performance, making it important to choose the most appropriate model for one’s applications. We demonstrate the applicability of our recommendations on a downstream Complex QA application. @@ -845,7 +845,7 @@ Being Right for Whose Right Reasons? Terne SashaThorn JakobsenUniversity of Copenhagen LauraCabelloUniversity of Copenhagen - AndersSøgaardUniversity of Copenhagen + AndersSøgaardUniversity of Copenhagen 1033-1054 Explainability methods are used to benchmark the extent to which model predictions align with human rationales i.e., are ‘right for the right reasons’. Previous work has failed to acknowledge, however, that what counts as a rationale is sometimes subjective. This paper presents what we think is a first of its kind, a collection of human rationale annotations augmented with the annotators demographic information. We cover three datasets spanning sentiment analysis and common-sense reasoning, and six demographic groups (balanced across age and ethnicity). Such data enables us to ask both what demographics our predictions align with and whose reasoning patterns our models’ rationales align with. We find systematic inter-group annotator disagreement and show how 16 Transformer-based models align better with rationales provided by certain demographic groups: We find that models are biased towards aligning best with older and/or white annotators. We zoom in on the effects of model size and model distillation, finding –contrary to our expectations– negative correlations between model size and rationale agreement as well as no evidence that either model size or model distillation improves fairness. 2023.acl-long.59 @@ -862,7 +862,7 @@ SiddharthVermaSquare ZhijingJinMax Planck Institute & ETH Zurich GargiGhoshFacebook - MonaDiabMeta Responsible AI + MonaDiabMeta Responsible AI AsliCelikyilmazFAIR @ Meta 1055-1081 Recent advancements in large language models have enabled them to perform well on complex tasks that require step-by-step reasoning with few-shot learning. However, it is unclear whether these models are applying reasoning skills they have learnt during pre-training , or if they are simply memorizing their training corpus at finer granularity and have learnt to better understand their context. To address this question, we introduce {pasted macro ‘OUR’}model, a benchmark and suite of analyses for evaluating reasoning skills of language models. {pasted macro ‘OUR’}model enables comparing pre-trained and finetuned models on complex tasks that require reasoning skills to solve. Our benchmark provides a test bed to asses any language model on fine-grained reasoning skills, which spans over 20 datasets and covers 10 different reasoning skills. By using {pasted macro ‘OUR’}model we further investigate the role of finetuning. Our extensive empirical analysis shows that language models learn more reasoning skills such as textual entailment, abductive reasoning, and analogical reasoning during the finetuning stage compared to pretraining stage. However, we also find that when language models are finetuned they tend to overfit to the prompt template, which hurts the robustness of models causing generalization problems. @@ -880,9 +880,9 @@ NoraKassnerMeta AI ChunlanMaLMU Munich HelmutSchmidCIS, Ludwig-Maximilians-Universitaet - AndréMartinsUnbabel, Instituto de Telecomunicacoes + AndréMartinsUnbabel, Instituto de Telecomunicacoes FrançoisYvonISIR CNRS & Sorbonne Université - HinrichSchützeCenter for Information and Language Processing, University of Munich + HinrichSchützeCenter for Information and Language Processing, University of Munich 1082-1117 The NLP community has mainly focused on scaling Large Language Models (LLMs) vertically, i.e., making them better for about 100 languages. We instead scale LLMs horizontally: we create, through continued pretraining, Glot500-m, an LLM that covers 511 predominantly low-resource languages. An important part of this effort is to collect and clean Glot500-c, a corpus that covers these 511 languages and allows us to train Glot500-m. We evaluate Glot500-m on five diverse tasks across these languages. We observe large improvements for both high-resource and low-resource languages compared to an XLM-R baseline. Our analysis shows that no single factor explains the quality of multilingual LLM representations. Rather, a combination of factors determines quality including corpus size, script, “help” from related languages and the total capacity of the model. Our work addresses an important goal of NLP research: we should notlimit NLP to a small fraction of the world’s languages and instead strive to support as many languages as possible to bring the benefits of NLP technology to all languages and cultures. Code, data and models are available at https://github.com/cisnlp/Glot500. 2023.acl-long.61 @@ -1209,7 +1209,7 @@ Marked Personas: Using Natural Language Prompts to Measure Stereotypes in Language Models MyraChengStanford University EsinDurmusStanford University - DanJurafskyStanford University + DanJurafskyStanford University 1504-1532 To recognize and mitigate harms from large language models (LLMs), we need to understand the prevalence and nuances of stereotypes in LLM outputs. Toward this end, we present Marked Personas, a prompt-based method to measure stereotypes in LLMs for intersectional demographic groups without any lexicon or data labeling. Grounded in the sociolinguistic concept of markedness (which characterizes explicitly linguistically marked categories versus unmarked defaults), our proposed method is twofold: 1) prompting an LLM to generate personas, i.e., natural language descriptions, of the target demographic group alongside personas of unmarked, default groups; 2) identifying the words that significantly distinguish personas of the target group from corresponding unmarked ones. We find that the portrayals generated by GPT-3.5 and GPT-4 contain higher rates of racial stereotypes than human-written portrayals using the same prompts. The words distinguishing personas of marked (non-white, non-male) groups reflect patterns of othering and exoticizing these demographics. An intersectional lens further reveals tropes that dominate portrayals of marginalized groups, such as tropicalism and the hypersexualization of minoritized women. These representational harms have concerning implications for downstream applications like story generation. 2023.acl-long.84 @@ -1225,7 +1225,7 @@ YuanGaoNanjing University ZhenWuNanjing University JianbingZhangNanjing University - XinyuDaiNational Key Laboratory for Novel Software Technology, Nanjing University + XinyuDaiNational Key Laboratory for Novel Software Technology, Nanjing University 1533-1545 Out-of-distribution (OOD) detection, a fundamental task vexing real-world applications, has attracted growing attention in the NLP community. Recently fine-tuning based methods have made promising progress. However, it could be costly to store fine-tuned models for each scenario. In this paper, we depart from the classic fine-tuning based OOD detection toward a parameter-efficient alternative, and propose an unsupervised prefix-tuning based OOD detection framework termed PTO. Additionally, to take advantage of optional training data labels and targeted OOD data, two practical extensions of PTO are further proposed. Overall, PTO and its extensions offer several key advantages of being lightweight, easy-to-reproduce, and theoretically justified. Experimental results show that our methods perform comparably to, even better than, existing fine-tuning based OOD detection approaches under a wide range of metrics, detection settings, and OOD types. 2023.acl-long.85 @@ -1281,7 +1281,7 @@ HaoChengMicrosoft Research YuZhangUniversity of Illinois at Urbana-Champaign XiaodongLiuMicrosoft Research - EricNybergCarnegie Mellon University + EricNybergCarnegie Mellon University JianfengGaoMicrosoft Research, Redmond 1599-1618 The retrieval model is an indispensable component for real-world knowledge-intensive tasks, e.g., open-domain question answering (ODQA). As separate retrieval skills are annotated for different datasets, recent work focuses on customized methods, limiting the model transfer- ability and scalability. In this work, we propose a modular retriever where individual modules correspond to key skills that can be reused across datasets. Our approach supports flexible skill configurations based on the target domain to boost performance. To mitigate task interference, we design a novel modularization parameterization inspired by sparse Transformer. We demonstrate that our model can benefit from self-supervised pretraining on Wikipedia and fine-tuning using multiple ODQA datasets, both in a multi-task fashion. Our approach outperforms recent self-supervised retrievers in zero-shot evaluations and achieves state-of-the-art fine-tuned retrieval performance on NQ, HotpotQA and OTT-QA. @@ -1295,7 +1295,7 @@ WenyaWangUniversity of Washington VivekSrikumarUniversity of Utah HannanehHajishirziUniversity of Washington - Noah A.SmithUniversity of Washington + Noah A.SmithUniversity of Washington 1619-1635 In question answering requiring common sense, language models (e.g., GPT-3) have been used to generate text expressing background knowledge that helps improve performance. Yet the cost of working with such models is very high; in this work, we finetune smaller language models to generate useful intermediate context, referred to here as elaborations. Our framework alternates between updating two language models—an elaboration generator and an answer predictor—allowing each to influence the other. Using less than 0.5% of the parameters of GPT-3, our model outperforms alternatives with similar sizes and closes the gap with GPT-3 on four commonsense question answering benchmarks. Human evaluations show that the quality of the generated elaborations is high. 2023.acl-long.90 @@ -1321,8 +1321,8 @@ ZhenHuangNational University of Defense Technology YunxiangZhaoBeijing Institute of Biotechnology ZhiliangTianNational University of Defense Technology - YangLiuNational University of Defense Technology - DongshengLiNational University of Defense Technology + YangLiuNational University of Defense Technology + DongshengLiNational University of Defense Technology 1650-1668 Self-training emerges as an important research line on domain adaptation. By taking the model’s prediction as the pseudo labels of the unlabeled data, self-training bootstraps the model with pseudo instances in the target domain. However, the prediction errors of pseudo labels (label noise) challenge the performance of self-training. To address this problem, previous approaches only use reliable pseudo instances, i.e., pseudo instances with high prediction confidence, to retrain the model. Although these strategies effectively reduce the label noise, they are prone to miss the hard examples. In this paper, we propose a new self-training framework for domain adaptation, namely Domain adversarial learning enhanced Self-Training Framework (DaMSTF). Firstly, DaMSTF involves meta-learning to estimate the importance of each pseudo instance, so as to simultaneously reduce the label noise and preserve hard examples. Secondly, we design a meta constructor for constructing the meta-validation set, which guarantees the effectiveness of the meta-learning module by improving the quality of the meta-validation set. Thirdly, we find that the meta-learning module suffers from the training guidance vanish- ment and tends to converge to an inferior optimal. To this end, we employ domain adversarial learning as a heuristic neural network initialization method, which can help the meta-learning module converge to a better optimal. Theoretically and experimentally, we demonstrate the effectiveness of the proposed DaMSTF. On the cross-domain sentiment classification task, DaMSTF improves the performance of BERT with an average of nearly 4%. 2023.acl-long.92 @@ -1442,7 +1442,7 @@ SarahPayneStony Brook University JordanKodnerStony Brook University EllenBroselowStony Brook University - OwenRambowStony Brook University + OwenRambowStony Brook University 1793-1805 Explicit linguistic knowledge, encoded by resources such as rule-based morphological analyzers, continues to prove useful in downstream NLP tasks, especially for low-resource languages and dialects. Rules are an important asset in descriptive linguistic grammars. However, creating such resources is usually expensive and non-trivial, especially for spoken varieties with no written standard. In this work, we present a novel approach for automatically learning morphophonological rules of Arabic from a corpus. Motivated by classic cognitive models for rule learning, rules are generalized cautiously. Rules that are memorized for individual items are only allowed to generalize to unseen forms if they are sufficiently reliable in the training data. The learned rules are further examined to ensure that they capture true linguistic phenomena described by domain experts. We also investigate the learnability of rules in low-resource settings across different experimental setups and dialects. 2023.acl-long.101 @@ -1505,7 +1505,7 @@ Do language models have coherent mental models of everyday things? YulingGuAllen Institute for AI - BhavanaDalvi MishraAllen Institute for Artificial Intelligence + BhavanaDalvi MishraAllen Institute for Artificial Intelligence PeterClarkAllen Institute for AI 1892-1913 When people think of everyday things like an egg, they typically have a mental image associated with it. This allows them to correctly judge, for example, that “the yolk surrounds the shell” is a false statement. Do language models similarly have a coherent picture of such everyday things? To investigate this, we propose a benchmark dataset consisting of 100 everyday things, their parts, and the relationships between these parts, expressed as 11,720 “X relation Y?” true/false questions. Using these questions as probes, we observe that state-of-the-art pre-trained language models (LMs) like GPT-3 and Macaw have fragments of knowledge about these everyday things, but do not have fully coherent “parts mental models” (54-59% accurate, 19-43% conditional constraint violation). We propose an extension where we add a constraint satisfaction layer on top of the LM’s raw predictions to apply commonsense constraints. As well as removing inconsistencies, we find that this also significantly improves accuracy (by 16-20%), suggesting how the incoherence of the LM’s pictures of everyday things can be significantly reduced. @@ -1528,7 +1528,7 @@ Instruction Induction: From Few Examples to Natural Language Task Descriptions OrHonovichTel Aviv University UriShahamTel Aviv University - Samuel R.BowmanNew York University + Samuel R.BowmanNew York University OmerLevyMeta AI / Tel Aviv University 1935-1952 Large language models are able to perform a task by conditioning on a few input-output demonstrations - a paradigm known as in-context learning. We show that language models can explicitly infer an underlying task from a few demonstrations by prompting them to generate a natural language instruction that fits the examples. To explore this ability, we introduce the instruction induction challenge, compile a dataset consisting of 24 tasks, and define a novel evaluation metric based on executing the generated instruction. We discover that, to a large extent, the ability to generate instructions does indeed emerge when using a model that is both large enough and aligned to follow instructions; InstructGPT achieves 65.7% of human performance in our execution-based metric, while the original GPT-3 model reaches only 9.8% of human performance. This surprising result suggests that instruction induction might be a viable learning paradigm in and of itself, where instead of fitting a set of latent continuous parameters to the data, one searches for the best description in the natural language hypothesis space. @@ -1541,8 +1541,8 @@ In-Context Analogical Reasoning with Pre-Trained Language Models XiaoyangHuUniversity of Michigan ShaneStorksUniversity of Michigan - RichardLewisUniversity of Michigan - JoyceChaiUniversity of Michigan + RichardLewisUniversity of Michigan + JoyceChaiUniversity of Michigan 1953-1969 Analogical reasoning is a fundamental capacity of human cognition that allows us to reason abstractly about novel situations by relating them to past experiences. While it is thought to be essential for robust reasoning in AI systems, conventional approaches require significant training and/or hard-coding of domain knowledge to be applied to benchmark tasks. Inspired by cognitive science research that has found connections between human language and analogy-making, we explore the use of intuitive language-based abstractions to support analogy in AI systems. Specifically, we apply large pre-trained language models (PLMs) to visual Raven’s Progressive Matrices (RPM), a common relational reasoning test. By simply encoding the perceptual features of the problem into language form, we find that PLMs exhibit a striking capacity for zero-shot relational reasoning, exceeding human performance and nearing supervised vision-based methods. We explore different encodings that vary the level of abstraction over task features, finding that higher-level abstractions further strengthen PLMs’ analogical reasoning. Our detailed analysis reveals insights on the role of model complexity, in-context learning, and prior knowledge in solving RPM tasks. 2023.acl-long.109 @@ -1553,7 +1553,7 @@ Peek Across: Improving Multi-Document Modeling via Cross-Document Question-Answering AviCaciularuBar-Ilan University - MatthewPetersAllen Institute for Artificial Intelligence + MatthewPetersAllen Institute for Artificial Intelligence JacobGoldbergerBar-Ilan University IdoDaganBar-Ilan University ArmanCohanAllen Institute for AI @@ -1709,8 +1709,8 @@ ShuangzhiWuBytedance KehaiChenSchool of Computer Science and Technology, Harbin Institute of Technology HuiDiToshiba (China) Co., Ltd. - MuyunYangHarbin Institute of Technology - TiejunZhaoHarbin Institute of Technology + MuyunYangHarbin Institute of Technology + TiejunZhaoHarbin Institute of Technology 2175-2190 State-of-the-art translation Quality Estimation (QE) models are proven to be biased. More specifically, they over-rely on monolingual features while ignoring the bilingual semantic alignment. In this work, we propose a novel method to mitigate the bias of the QE model and improve estimation performance. Our method is based on the contrastive learning between clean and noisy sentence pairs. We first introduce noise to the target side of the parallel sentence pair, forming the negative samples. With the original parallel pairs as the positive sample, the QE model is contrastively trained to distinguish the positive samples from the negative ones. This objective is jointly trained with the regression-style quality estimation, so as to prevent the QE model from overfitting to monolingual features. Experiments on WMT QE evaluation datasets demonstrate that our method improves the estimation performance by a large margin while mitigating the bias. 2023.acl-long.121 @@ -1721,7 +1721,7 @@ Breeding Machine Translations: Evolutionary approach to survive and thrive in the world of automated evaluation JosefJonCharles University - OndřejBojarCharles University, MFF UFAL + OndřejBojarCharles University, MFF UFAL 2191-2212 We propose a genetic algorithm (GA) based method for modifying n-best lists produced by a machine translation (MT) system. Our method offers an innovative approach to improving MT quality and identifying weaknesses in evaluation metrics. Using common GA operations (mutation and crossover) on a list of hypotheses in combination with a fitness function (an arbitrary MT metric), we obtain novel and diverse outputs with high metric scores. With a combination of multiple MT metrics as the fitness function, the proposed method leads to an increase in translation quality as measured by other held-out automatic metrics.With a single metric (including popular ones such as COMET) as the fitness function, we find blind spots and flaws in the metric. This allows for an automated search for adversarial examples in an arbitrary metric, without prior assumptions on the form of such example. As a demonstration of the method, we create datasets of adversarial examples and use them to show that reference-free COMET is substantially less robust than the reference-based version. 2023.acl-long.122 @@ -1735,7 +1735,7 @@ ZhexinZhangTsinghua University FeiMiHuawei YashengWangHuawei Noah’s Ark Lab - WeiLiuxiaomi + WeiLiuxiaomi JianweiCuiXiaomi AI Lab BinWangXiaomi AI Lab QunLiuHuawei Noah’s Ark Lab @@ -1822,7 +1822,7 @@ XinxiLyuUniversity of Washington SewonMinUniversity of Washington IzBeltagyAllen Institute for AI (AI2) - LukeZettlemoyerUniversity of Washington; Meta + LukeZettlemoyerUniversity of Washington; Meta HannanehHajishirziUniversity of Washington 2304-2317 Although large language models can be prompted for both zero- and few-shot learning, performance drops significantly when no demonstrations are available. In this paper, we introduce Z-ICL, a new zero-shot method that closes the gap by constructing pseudo-demonstrations for a given test input using a raw text corpus. Concretely, pseudo-demonstrations are constructed by (1) finding the nearest neighbors to the test input from the corpus and pairing them with random task labels, and (2) applying a set of techniques to reduce the amount of direct copying the model does from the resulting demonstrations. Evaluation on nine classification datasets shows that Z-ICL outperforms previous zero-shot methods by a significant margin, and is on par with in-context learning with labeled training data in the few-shot setting. Overall, Z-ICL provides a significantly higher estimate of the zero-shot performance levels of a model, and supports future efforts to develop better pseudo-demonstrations that further improve zero-shot results. @@ -1934,7 +1934,7 @@ ChengsongHuangFudan university XiaoqingZhengFudan University JinshuLinHundsun - XuanjingHuangFudan University + XuanjingHuangFudan University 2437-2449 Tables are widely used in research and business, which are suitable for human consumption, but not easily machine-processable, particularly when tables are present in images. One of the main challenges to extracting data from images of tables is accurately recognizing table structures, especially for complex tables with cross rows and columns. In this study, we propose a novel multi-modal pre-training model for table structure recognition, named TableVLM.With a two-stream multi-modal transformer-based encoder-decoder architecture, TableVLM learns to capture rich table structure-related features by multiple carefully-designed unsupervised objectives inspired by the notion of masked visual-language modeling. To pre-train this model, we also created a dataset, called ComplexTable, which consists of 1,000K samples to be released publicly. Experiment results show that the model built on pre-trained TableVLM can improve the performance up to 1.97% in tree-editing-distance-score on ComplexTable. 2023.acl-long.137 @@ -1989,7 +1989,7 @@ RanXuEmory University JieyuZhangUniversity of Washington JiamingShenGoogle Research - ChaoZhangGeorgia Tech + ChaoZhangGeorgia Tech 2499-2521 We present PATRON, a prompt-based data selection method for pre-trained language model fine-tuning under cold-start scenarios, i.e., no initial labeled data are available. In PATRON, we design (1) a prompt-based uncertainty propagation approach to estimate the importance of data points and (2) a partition-then-rewrite (PTR) strategy to promote sample diversity when querying for annotations. Experiments on six text classification datasets show that PATRON outperforms the strongest cold-start data selection baselines by up to 6.9%. Besides, with 128 labels only, PATRON achieves 91.0% and 92.1% of the fully supervised performance based on vanilla fine-tuning and prompt-based learning respectively. Our implementation of PATRON will be published upon acceptance. 2023.acl-long.141 @@ -2000,7 +2000,7 @@ Training-free Neural Architecture Search for <fixed-case>RNN</fixed-case>s and Transformers AaronSerianniPrinceton University - JugalKalitaUniversity of Colorado + JugalKalitaUniversity of Colorado 2522-2540 Neural architecture search (NAS) has allowed for the automatic creation of new and effective neural network architectures, offering an alternative to the laborious process of manually designing complex architectures. However, traditional NAS algorithms are slow and require immense amounts of computing power. Recent research has investigated training-free NAS metrics for image classification architectures, drastically speeding up search algorithms. In this paper, we investigate training-free NAS metrics for recurrent neural network (RNN) and BERT-based transformer architectures, targeted towards language modeling tasks. First, we develop a new training-free metric, named hidden covariance, that predicts the trained performance of an RNN architecture and significantly outperforms existing training-free metrics. We experimentally evaluate the effectiveness of the hidden covariance metric on the NAS-Bench-NLP benchmark. Second, we find that the current search space paradigm for transformer architectures is not optimized for training-free neural architecture search. Instead, a simple qualitative analysis can effectively shrink the search space to the best performing architectures. This conclusion is based on our investigation of existing training-free metrics and new metrics developed from recent transformer pruning literature, evaluated on our own benchmark of trained BERT architectures. Ultimately, our analysis shows that the architecture search space and the training-free metric must be developed together in order to achieve effective results. Our source code is available at https://github.com/aaronserianni/training-free-nas. 2023.acl-long.142 @@ -2012,7 +2012,7 @@ <fixed-case>C</fixed-case>ross<fixed-case>S</fixed-case>um: Beyond <fixed-case>E</fixed-case>nglish-Centric Cross-Lingual Summarization for 1,500+ Language Pairs AbhikBhattacharjeeBangladesh University of Engineering and Technology TahmidHasanBangladesh University of Engineering and Technology - Wasi UddinAhmadAWS AI Labs + Wasi UddinAhmadAWS AI Labs Yuan-FangLiMonash University Yong-BinKangSwinburne University of Technology RifatShahriyarBangladesh University of Engineering and Technology @@ -2042,7 +2042,7 @@ Bi-Phone: Modeling Inter Language Phonetic Influences in Text AbhirutGuptaGoogle Research Ananya B.SaiIndian Institute of Technology, Madras - RichardSproatGoogle, Japan + RichardSproatGoogle, Japan YuriVasilevskiGoogle LLC JamesRenGoogle AmbarishJashGoogle LLC @@ -2060,7 +2060,7 @@ ShengqiongWuNational University of Singapore HaoFeiNational University of Singapore WeiJiNational University of Singapore - Tat-SengChuaNational University of Singapore + Tat-SengChuaNational University of Singapore 2593-2608 Unpaired cross-lingual image captioning has long suffered from irrelevancy and disfluency issues, due to the inconsistencies of the semantic scene and syntax attributes during transfer. In this work, we propose to address the above problems by incorporating the scene graph (SG) structures and the syntactic constituency (SC) trees. Our captioner contains the semantic structure-guided image-to-pivot captioning and the syntactic structure-guided pivot-to-target translation, two of which are joined via pivot language. We then take the SG and SC structures as pivoting, performing cross-modal semantic structure alignment and cross-lingual syntactic structure alignment learning. We further introduce cross-lingual&cross-modal back-translation training to fully align the captioning and translation stages. Experiments on English-Chinese transfers show that our model shows great superiority in improving captioning relevancy and fluency. 2023.acl-long.146 @@ -2130,8 +2130,8 @@ GriffinAdamsColumbia University AlexFabbriSalesforce AI Research FaisalLadhakColumbia University - NoémieElhadadColumbia University - KathleenMcKeownColumbia University and Amazon (Amazon Scholar) + NoémieElhadadColumbia University + KathleenMcKeownColumbia University and Amazon (Amazon Scholar) 2680-2697 Two-step approaches, in which summary candidates are generated-then-reranked to return a single summary, can improve ROUGE scores over the standard single-step approach. Yet, standard decoding methods (i.e., beam search, nucleus sampling, and diverse beam search) produce candidates with redundant, and often low quality, content. In this paper, we design a novel method to generate candidates for re-ranking that addresses these issues. We ground each candidate abstract on its own unique content plan and generate distinct plan-guided abstracts using a model’s top beam. More concretely, a standard language model (a BART LM) auto-regressively generates elemental discourse unit (EDU) content plans with an extractive copy mechanism. The top K beams from the content plan generator are then used to guide a separate LM, which produces a single abstractive candidate for each distinct plan. We apply an existing re-ranker (BRIO) to abstractive candidates generated from our method, as well as baseline decoding methods. We show large relevance improvements over previously published methods on widely used single document news article corpora, with ROUGE-2 F1 gains of 0.88, 2.01, and 0.38 on CNN / Dailymail, NYT, and Xsum, respectively. A human evaluation on CNN / DM validates these results. Similarly, on 1k samples from CNN / DM, we show that prompting GPT-3 to follow EDU plans outperforms sampling-based methods by by 1.05 ROUGE-2 F1 points. Code to generate and realize plans is available at https://github.com/griff4692/edu-sum. 2023.acl-long.151 @@ -2160,7 +2160,7 @@ XiangDengThe Ohio State University JiamingShenGoogle Research YouWuGoogle LLC - LukeZettlemoyerUniversity of Washington; Meta + LukeZettlemoyerUniversity of Washington; Meta HuanSunThe Ohio State University 2717-2739 Chain-of-Thought (CoT) prompting can dramatically improve the multi-step reasoning abilities of large language models (LLMs). CoT explicitly encourages the LLM to generate intermediate rationales for solving a problem, by providing a series of reasoning steps in the demonstrations. Despite its success, there is still little understanding of what makes CoT prompting effective and which aspects of the demonstrated reasoning steps contribute to its performance. In this paper, we show that CoT reasoning is possible even with invalid demonstrations - prompting with invalid reasoning steps can achieve over 80-90% of the performance obtained using CoT under various metrics, while still generating coherent lines of reasoning during inference. Further experiments show that other aspects of the rationales, such as being relevant to the query and correctly ordering the reasoning steps, are much more important for effective CoT reasoning. Overall, these findings both deepen our understanding of CoT prompting, and open up new questions regarding LLMs’ capability to learn to reason in context. @@ -2178,7 +2178,7 @@ VedanujGoswamiMeta AI PhilippKoehnJohns Hopkins University AngelaFanFacebook AI Research - FranciscoGuzmanMeta AI + FranciscoGuzmanMeta AI 2740-2756 For many languages, machine translation progress is hindered by the lack of reliable training data. Models are trained on whatever pre-existing datasets may be available and then augmented with synthetic data, because it is often not economical to pay for the creation of large-scale datasets. But for the case of low-resource languages, would the creation of a few thousand professionally translated sentence pairs give any benefit? In this paper, we show that it does. We describe a broad data collection effort involving around 6k professionally translated sentence pairs for each of 39 low-resource languages, which we make publicly available. We analyse the gains of models trained on this small but high-quality data, showing that it has significant impact even when larger but lower quality pre-existing corpora are used, or when data is augmented with millions of sentences through backtranslation. 2023.acl-long.154 @@ -2262,7 +2262,7 @@ MarcusCollinsAmazon, Inc. NikhitaVedulaAmazon SimoneFiliceamazon.com - ShervinMalmasiAmazon + ShervinMalmasiAmazon OlegRokhlenkoAmazon Research 2847-2867 Methods to generate text from structured data have advanced significantly in recent years, primarily due to fine-tuning of pre-trained language models on large datasets. However, such models can fail to produce output faithful to the input data, particularly on out-of-domain data. Sufficient annotated data is often not available for specific domains, leading us to seek an unsupervised approach to improve the faithfulness of output text. Since the problem is fundamentally one of consistency between the representations of the structured data and text, we evaluate the effectiveness of cycle training in this work. Cycle training uses two models which are inverses of each other: one that generates text from structured data, and one which generates the structured data from natural language text. We show that cycle training, when initialized with a small amount of supervised data (100 samples in our case), achieves nearly the same performance as fully supervised approaches for the data-to-text generation task on the WebNLG, E2E, WTQ, and WSQL datasets. We perform extensive empirical analysis with automated evaluation metrics and a newly designed human evaluation schema to reveal different cycle training strategies’ effectiveness of reducing various types of generation errors. Our code is publicly available at https://github.com/Edillower/CycleNLG. @@ -2328,7 +2328,7 @@ Summary-Oriented Vision Modeling for Multimodal Abstractive Summarization YunlongLiangBeijing Jiaotong University FandongMengWeChat AI, Tencent - JinanXuBeijing Jiaotong University + JinanXuBeijing Jiaotong University JiaanWangSchool of Computer Science and Technology, Soochow University, Suzhou, China YufengChenBeijing Jiaotong University JieZhouTencent Inc. @@ -2393,7 +2393,7 @@ NicolasStefanovitchJoint Research Centre NikolaosNikolaidisAthens University of Economics and Business GiovanniDa San MartinoUniversity of Padova - PreslavNakovMohamed bin Zayed University of Artificial Intelligence + PreslavNakovMohamed bin Zayed University of Artificial Intelligence 3001-3022 We present a new multilingual multifacet dataset of news articles, each annotated for genre (objective news reporting vs. opinion vs. satire), framing (what key aspects are highlighted), and persuasion techniques (logical fallacies, emotional appeals, ad hominem attacks, etc.). The persuasion techniques are annotated at the span level, using a taxonomy of 23 fine-grained techniques grouped into 6 coarse categories. The dataset contains 1,612 news articles covering recent news on current topics of public interest in six European languages (English, French, German, Italian, Polish, and Russian), with more than 37k annotated spans of persuasion techniques. We describe the dataset and the annotation process, and we report the evaluation results of multilabel classification experiments using state-of-the-art multilingual transformers at different levels of granularity: token-level, sentence-level, paragraph-level, and document-level. 2023.acl-long.169 @@ -2431,7 +2431,7 @@ FanYinUCLA JesseVigSalesforce Research PhilippeLabanSalesforce Research - ShafiqJotyNanyang Technological University; Salesforce AI Research + ShafiqJotyNanyang Technological University; Salesforce AI Research CaimingXiongSalesforce Chien-ShengWuSalesforce 3063-3079 @@ -2487,7 +2487,7 @@ Interpretable Word Sense Representations via Definition Generation: The Case of Semantic Change Analysis MarioGiulianelliUniversity of Amsterdam IrisLudenUniversity of Amsterdam - RaquelFernandezILLC, University of Amsterdam + RaquelFernandezILLC, University of Amsterdam AndreyKutuzovUniversity of Oslo 3130-3148 We propose using automatically generated natural language definitions of contextualised word usages as interpretable word and word sense representations. Given a collection of usage examples for a target word, and the corresponding data-driven usage clusters (i.e., word senses), a definition is generated for each usage with a specialised Flan-T5 language model, and the most prototypical definition in a usage cluster is chosen as the sense label. We demonstrate how the resulting sense labels can make existing approaches to semantic change analysis more interpretable, and how they can allow users — historical linguists, lexicographers, or social scientists — to explore and intuitively explain diachronic trajectories of word meaning. Semantic change analysis is only one of many possible applications of the ‘definitions as representations’ paradigm. Beyond being human-readable, contextualised definitions also outperform token or usage sentence embeddings in word-in-context semantic similarity judgements, making them a new promising type of lexical representation for NLP. @@ -2501,8 +2501,8 @@ HaoYanGeorge Mason University SaurabhSrivastavaGeorge Mason University YintaoTaiUniversity of Edinburgh - Sida I.WangFacebook AI Research - Wen-tauYihMeta AI - FAIR + Sida I.WangFacebook AI Research + Wen-tauYihMeta AI - FAIR ZiyuYaoGeorge Mason University 3149-3170 Interactive semantic parsing based on natural language (NL) feedback, where users provide feedback to correct the parser mistakes, has emerged as a more practical scenario than the traditional one-shot semantic parsing. However, prior work has heavily relied on human-annotated feedback data to train the interactive semantic parser, which is prohibitively expensive and not scalable. In this work, we propose a new task of simulating NL feedback for interactive semantic parsing. We accompany the task with a novel feedback evaluator. The evaluator is specifically designed to assess the quality of the simulated feedback, based on which we decide the best feedback simulator from our proposed variants. On a text-to-SQL dataset, we show that our feedback simulator can generate high-quality NL feedback to boost the error correction ability of a specific parser. In low-data settings, our feedback simulator can help achieve comparable error correction performance as trained using the costly, full set of human annotations. @@ -2530,7 +2530,7 @@ ClaudiaShiColumbia University KeyonVafaColumbia University AmirFederColumbia University - DavidBleiColumbia University + DavidBleiColumbia University 3186-3206 Controlled generation refers to the problem of creating text that contains stylistic or semantic attributes of interest. Many approaches reduce this problem to training a predictor of the desired attribute. For example, researchers hoping to deploy a large language model to produce non-toxic content may use a toxicity classifier to filter generated text. In practice, the generated text to classify, which is determined by user prompts, may come from a wide range of distributions. In this paper, we show that the performance of controlled generation may be poor if the distributions of text in response to user prompts differ from the distribution the predictor was trained on. To address this problem, we cast controlled generation under distribution shift as an invariant learning problem: the most effective predictor should be invariant across multiple text environments. We then discuss a natural solution that arises from this characterization and propose heuristics for selecting natural environments. We study this characterization and the proposed method empirically using both synthetic and real data. Experiments demonstrate both the challenge of distribution shift in controlled generation and the potential of invariance methods in this setting. 2023.acl-long.179 @@ -2567,7 +2567,7 @@ <fixed-case>H</fixed-case>y<fixed-case>P</fixed-case>e: Better Pre-trained Language Model Fine-tuning with Hidden Representation Perturbation HongyiYuanTsinghua University - ZhengYuanAlibaba Group + ZhengYuanAlibaba Group ChuanqiTanAlibaba Group FeiHuangAlibaba DAMO Academy SongfangHuangAlibaba DAMO Academy @@ -2585,7 +2585,7 @@ HongweiWangTencent AI Lab XiaoyangWangTencent AI Lab HongYuUniversity of Massachusetts, Lowell - FeiLiuEmory University + FeiLiuEmory University DongYuTencent AI Lab 3265-3280 The potential choices for news article headlines are enormous, and finding the right balance between conveying the essential message and capturing the reader’s attention is key to effective headlining. However, presenting the same news headline to all readers is a suboptimal strategy, because it does not take into account the different preferences and interests of diverse readers, who may be confused about why a particular article has been recommended to them and do not see a clear connection between their interests and the recommended article. In this paper, we present a novel framework that addresses these challenges by incorporating user profiling to generate personalized headlines, and a combination of automated and human evaluation methods to determine user preference for personalized headlines. Our framework utilizes a learnable relevance function to assign personalized signature phrases to users based on their reading histories, which are then used to personalize headline generation. Through extensive evaluation, we demonstrate the effectiveness of our proposed framework in generating personalized headlines that meet the needs of a diverse audience. Our framework has the potential to improve the efficacy of news recommendations and facilitate creation of personalized content. @@ -2611,7 +2611,7 @@ ShiFengNortheastern University DalingWangNortheastern University YifeiZhangNortheastern University - HinrichSchützeCenter for Information and Language Processing, University of Munich + HinrichSchützeCenter for Information and Language Processing, University of Munich 3295-3310 We investigate response generation for multi-turn dialogue in generative chatbots. Existing generative modelsbased on RNNs (Recurrent Neural Networks) usually employ the last hidden state to summarize the history, which makesmodels unable to capture the subtle variability observed in different dialogues and cannot distinguish the differencesbetween dialogues that are similar in composition. In this paper, we propose Pseudo-Variational Gated Recurrent Unit (PVGRU). The key novelty of PVGRU is a recurrent summarizing variable thataggregates the accumulated distribution variations of subsequences. We train PVGRU without relying on posterior knowledge, thus avoiding the training-inference inconsistency problem. PVGRU can perceive subtle semantic variability through summarizing variables that are optimized by two objectives we employ for training: distribution consistency and reconstruction. In addition, we build a Pseudo-Variational Hierarchical Dialogue(PVHD) model based on PVGRU. Experimental results demonstrate that PVGRU can broadly improve the diversity andrelevance of responses on two benchmark datasets. 2023.acl-long.185 @@ -2689,7 +2689,7 @@ ZihengLiPeking University ShaohanHuangMicrosoft Research Asia ZihanZhangMicrosoft - Zhi-HongDengPeking University + Zhi-HongDengPeking University QiangLouMicrosoft HaizhenHuangMicrosoft JianJiaoMicrosoft @@ -2711,7 +2711,7 @@ WenZhangXiaomi AI Lab JianLuanXiaomi AI Lab BinWangXiaomi AI Lab - DegenHuangDalian University of Technology + DegenHuangDalian University of Technology JinsongSuXiamen university 3479-3491 Text image translation (TIT) aims to translate the source texts embedded in the image to target translations, which has a wide range of applications and thus has important research value. However, current studies on TIT are confronted with two main bottlenecks: 1) this task lacks a publicly available TIT dataset, 2) dominant models are constructed in a cascaded manner, which tends to suffer from the error propagation of optical character recognition (OCR). In this work, we first annotate a Chinese-English TIT dataset named OCRMT30K, providing convenience for subsequent studies. Then, we propose a TIT model with a multimodal codebook, which is able to associate the image with relevant texts, providing useful supplementary information for translation. Moreover, we present a multi-stage training framework involving text machine translation, image-text alignment, and TIT tasks, which fully exploits additional bilingual texts, OCR dataset and our OCRMT30K dataset to train our model. Extensive experiments and in-depth analyses strongly demonstrate the effectiveness of our proposed model and training framework. @@ -2814,7 +2814,7 @@ <fixed-case>DAMP</fixed-case>: Doubly Aligned Multilingual Parser for Task-Oriented Dialogue WilliamHeldGeorgia Tech ChristopherHideyGoogle - FeiLiuGoogle + FeiLiuGoogle EricZhuGoogle RahulGoelGoogle DiyiYangStanford University @@ -2884,7 +2884,7 @@ RanZmigrodUniversity of Cambridge TimVieiraJohns Hopkins University RyanCotterellETH Zürich - JasonEisnerJohns Hopkins University + Microsoft Corporation + JasonEisnerJohns Hopkins University + Microsoft Corporation 3687-3713 We present Earley’s (1970) context-free parsing algorithm as a deduction system, incorporating various known and new speed-ups. In particular, our presentation supports a known worst-case runtime improvement from Earley’s (1970) O(N^3|G||R|), which is unworkable for the large grammars that arise in natural language processing, to O(N^3|G|), which matches the complexity of CKY on a binarized version of the grammar G. Here N is the length of the sentence, |R| is the number of productions in G, and |G| is the total length of those productions. We also provide a version that achieves runtime of O(N^3|M|) with |M| \leq |G| when the grammar is represented compactly as a single finite-state automaton M (this is partly novel). We carefully treat the generalization to semiring-weighted deduction, preprocessing the grammar like Stolcke (1995) to eliminate the possibility of deduction cycles, and further generalize Stolcke’s method to compute the weights of sentence prefixes. We also provide implementation details for efficient execution, ensuring that on a preprocessed grammar, the semiring-weighted versions of our methods have the same asymptotic runtime and space requirements as the unweighted methods, including sub-cubic runtime on some grammars. 2023.acl-long.204 @@ -2983,7 +2983,7 @@ Ze-FengGaoRenmin University of China KunZhouRenmin University of China PeiyuLiuRenmin University of China - Wayne XinZhaoRUC + Wayne XinZhaoRUC Ji-RongWenRenmin University of China 3819-3834 By scaling the model size, large pre-trained language models (PLMs) have shown remarkable performance in various natural language processing tasks, mostly outperforming small PLMs by a large margin. However, due to the high computational cost, the huge number of parameters also restricts the applicability of large PLMs in real-world systems. In this paper, we focus on scaling up the parameters of PLMs only during fine-tuning, to benefit from the over-parameterization, while without increasing the inference latency. Given a relatively small PLM, we over-parameterize it by employing a matrix product operator, an efficient and almost lossless decomposition method to factorize its contained parameter matrices into a set of higher-dimensional tensors.Considering the efficiency, we further propose both static and dynamic strategies to select the most important parameter matrices for over-parameterization.Extensive experiments have demonstrated that our approach can significantly boost the fine-tuning performance of small PLMs and even help small PLMs outperform 3\times parameterized larger ones.Our code is publicly available at https://github.com/zfgao66/OPF. @@ -3009,7 +3009,7 @@ NaokiOtaniCarnegie Mellon University JunArakiBosch Research HyeongSikKimRobert Bosch LLC - EduardHovyUniversity of Melbourne + EduardHovyUniversity of Melbourne 3856-3874 Recent data-driven conversational models are able to return fluent, consistent, and informative responses to many kinds of requests and utterances in task-oriented scenarios. However, these responses are typically limited to just the immediate local topic instead of being wider-ranging and proactively taking the conversation further, for example making suggestions to help customers achieve their goals. This inadequacy reflects a lack of understanding of the interlocutor’s situation and implicit goal. To address the problem, we introduce a task of proactive response selection based on situational information. We present a manually-curated dataset of 1.7k English conversation examples that include situational background information plus for each conversation a set of responses, only some of which are acceptable in the situation. A responsive and informed conversation system should select the appropriate responses and avoid inappropriate ones; doing so demonstrates the ability to adequately understand the initiating request and situation. Our benchmark experiments show that this is not an easy task even for strong neural models, offering opportunities for future research. 2023.acl-long.214 @@ -3087,7 +3087,7 @@ ZhengyuanLiuInstitute for Infocomm Research, A*STAR Yong KeongYapDSO National Laboratories Hai LeongChieuDSO National Laboratories - NancyChenInstitute for Infocomm Research, A*STAR + NancyChenInstitute for Infocomm Research, A*STAR 3987-4001 Stance detection determines whether the author of a piece of text is in favor of, against, or neutral towards a specified target, and can be used to gain valuable insights into social media. The ubiquitous indirect referral of targets makes this task challenging, as it requires computational solutions to model semantic features and infer the corresponding implications from a literal statement. Moreover, the limited amount of available training data leads to subpar performance in out-of-domain and cross-target scenarios, as data-driven approaches are prone to rely on superficial and domain-specific features. In this work, we decompose the stance detection task from a linguistic perspective, and investigate key components and inference paths in this task. The stance triangle is a generic linguistic framework previously proposed to describe the fundamental ways people express their stance. We further expand it by characterizing the relationship between explicit and implicit objects. We then use the framework to extend one single training corpus with additional annotation. Experimental results show that strategically-enriched data can significantly improve the performance on out-of-domain and cross-target evaluation. 2023.acl-long.220 @@ -3169,7 +3169,7 @@ YuSunFudan University XiaonanLiFudan University YunhuaZhouFudan University - XuanjingHuangFudan University + XuanjingHuangFudan University XipengQiuFudan University 4096-4122 Information Extraction (IE) spans several tasks with different output structures, such as named entity recognition, relation extraction and event extraction. Previously, those tasks were solved with different models because of diverse task output structures. Through re-examining IE tasks, we find that all of them can be interpreted as extracting spans and span relations. They can further be decomposed into token-pair classification tasks by using the start and end token of a span to pinpoint the span, and using the start-to-start and end-to-end token pairs of two spans to determine the relation. Based on the reformulation, we propose a Unified Token-pair Classification architecture for Information Extraction (UTC-IE), where we introduce Plusformer on top of the token-pair feature matrix. Specifically, it models axis-aware interaction with plus-shaped self-attention and local interaction with Convolutional Neural Network over token pairs. Experiments show that our approach outperforms task-specific and unified models on all tasks in 10 datasets, and achieves better or comparable results on 2 joint IE datasets. Moreover, UTC-IE speeds up over state-of-the-art models on IE tasks significantly in most datasets, which verifies the effectiveness of our architecture. @@ -3204,10 +3204,10 @@ LinyongNanYale University RuilinHanYale University SimengHanYale University - ShafiqJotyNanyang Technological University; Salesforce AI Research + ShafiqJotyNanyang Technological University; Salesforce AI Research Chien-ShengWuSalesforce CaimingXiongSalesforce - DragomirRadevYale University + DragomirRadevYale University 4140-4170 Human evaluation is the foundation upon which the evaluation of both summarization systems and automatic metrics rests. However, existing human evaluation studies for summarization either exhibit a low inter-annotator agreement or have insufficient scale, and an in-depth analysis of human evaluation is lacking. Therefore, we address the shortcomings of existing summarization evaluation along the following axes: (1) We propose a modified summarization salience protocol, Atomic Content Units (ACUs), which is based on fine-grained semantic units and allows for a high inter-annotator agreement. (2) We curate the Robust Summarization Evaluation (RoSE) benchmark, a large human evaluation dataset consisting of 22,000 summary-level annotations over 28 top-performing systems on three datasets. (3) We conduct a comparative study of four human evaluation protocols, underscoring potential confounding factors in evaluation setups. (4) We evaluate 50 automatic metrics and their variants using the collected human annotations across evaluation protocols and demonstrate how our benchmark leads to more statistically stable and significant results. The metrics we benchmarked include recent methods based on large language models (LLMs), GPTScore and G-Eval. Furthermore, our findings have important implications for evaluating LLMs, as we show that LLMs adjusted by human feedback (e.g., GPT-3.5) may overfit unconstrained human evaluation, which is affected by the annotators’ prior, input-agnostic preferences, calling for more robust, targeted evaluation methods. 2023.acl-long.228 @@ -3313,8 +3313,8 @@ LaurieCristTripadvisor MishaBritanAmazon WouterLeeuwisAmazon - GokhanTurAmazon - PremNatarajanCapital One + GokhanTurAmazon + PremNatarajanCapital One 4277-4302 We present the MASSIVE dataset–Multilingual Amazon Slu resource package (SLURP) for Slot-filling, Intent classification, and Virtual assistant Evaluation. MASSIVE contains 1M realistic, parallel, labeled virtual assistant utterances spanning 51 languages, 18 domains, 60 intents, and 55 slots. MASSIVE was created by tasking professional translators to localize the English-only SLURP dataset into 50 typologically diverse languages from 29 genera. We also present modeling results on XLM-R and mT5, including exact match accuracy, intent classification accuracy, and slot-filling F1 score. We have released our dataset, modeling code, and models publicly. 2023.acl-long.235 @@ -3499,7 +3499,7 @@ TianxiangSunFudan University QiongTangFudan University KuanningWangFudan University - XuanjingHuangFudan University + XuanjingHuangFudan University XipengQiuFudan University 4521-4534 We present DiffusionBERT, a new generative masked language model based on discrete dif- fusion models. Diffusion models and many pre- trained language models have a shared training objective, i.e., denoising, making it possible to combine the two powerful models and enjoy the best of both worlds. On the one hand, dif- fusion models offer a promising training strat- egy that helps improve the generation quality. On the other hand, pre-trained denoising lan- guage models (e.g., BERT) can be used as a good initialization that accelerates convergence. We explore training BERT to learn the reverse process of a discrete diffusion process with an absorbing state and elucidate several designs to improve it. First, we propose a new noise schedule for the forward diffusion process that controls the degree of noise added at each step based on the information of each token. Sec- ond, we investigate several designs of incorpo- rating the time step into BERT. Experiments on unconditional text generation demonstrate that DiffusionBERT achieves significant improve- ment over existing diffusion models for text (e.g., D3PM and Diffusion-LM) and previous generative masked language models in terms of perplexity and BLEU score. Promising re- sults in conditional generation tasks show that DiffusionBERT can generate texts of compa- rable quality and more diverse than a series of established baselines. @@ -3510,7 +3510,7 @@ Lifting the Curse of Capacity Gap in Distilling Language Models - ChenZhangBeijing Institute of Technology + ChenZhangBeijing Institute of Technology YangYangMeituan JiahaoLiuMeituan JingangWangMeituan @@ -3528,7 +3528,7 @@ Towards Faithful Dialogues via Focus Learning YifanDengUniversity of Chinese Academy of Sciences XingshengZhangInstitute of Information Engineering, Chinese Academy of Sciences - HeyanHuangBeijing Institute of Technology + HeyanHuangBeijing Institute of Technology YueHuInstitute of Information Engineering, Chinese Academy of Sciences 4554-4566 Maintaining faithfulness between responses and knowledge is an important research topic for building reliable knowledge-grounded dialogue systems. Existing models heavily rely on elaborate data engineering or increasing the model’s parameters ignoring to track the tokens that significantly influence losses, which is decisive for the optimization direction of the model in each iteration. To address this issue, we propose Focus Learning (FocusL), a novel learning approach that adjusts the contribution of each token to the optimization direction by directly scaling the corresponding objective loss. Specifically, we first introduce a positioning method by utilizing similarity distributions between knowledge and each response token to locate knowledge-aware tokens. Then, we further design a similarity-to-weight transformation to provide dynamic token-level weights for the cross-entropy loss. Finally, we use the weighted loss to encourage the model to pay special attention to the knowledge utilization. Experimental results demonstrate that our method achieves the new state-of-the-art results and generates more reliable responses while maintaining training stability. @@ -3552,7 +3552,7 @@ Prompter: Zero-shot Adaptive Prefixes for Dialogue State Tracking Domain Adaptation TahaAksuInstitute for Infocomm Research, A*STAR / School of Computing, National University of Singapore Min-YenKanNational University of Singapore - NancyChenInstitute for Infocomm Research, A*STAR + NancyChenInstitute for Infocomm Research, A*STAR 4588-4603 A challenge in the Dialogue State Tracking (DST) field is adapting models to new domains without using any supervised data — zero-shot domain adaptation. Parameter-Efficient Transfer Learning (PETL) has the potential to address this problem due to its robustness. However, it has yet to be applied to the zero-shot scenarios, as it is not clear how to apply it unsupervisedly. Our method, Prompter, uses descriptions of target domain slots to generate dynamic prefixes that are concatenated to the key and values at each layer’s self-attention mechanism. This allows for the use of prefix-tuning in zero-shot. Prompter outperforms previous methods on both the MultiWOZ and SGD benchmarks. In generating prefixes, our analyses find that Prompter not only utilizes the semantics of slot descriptions but also how often the slots appear together in conversation. Moreover, Prompter’s gains are due to its improved ability to distinguish ”none”-valued dialogue slots, compared against baselines. 2023.acl-long.252 @@ -3662,7 +3662,7 @@ ZhihaoFanFudan University HaijunShanCEC GienTech Technology Co.,Ltd. QiZhangFudan University - XuanjingHuangFudan University + XuanjingHuangFudan University 4706-4718 Logical reasoning over incomplete knowledge graphs to answer complex logical queries is a challenging task. With the emergence of new entities and relations in constantly evolving KGs, inductive logical reasoning over KGs has become a crucial problem. However, previous PLMs-based methods struggle to model the logical structures of complex queries, which limits their ability to generalize within the same structure. In this paper, we propose a structure-modeled textual encoding framework for inductive logical reasoning over KGs. It encodes linearized query structures and entities using pre-trained language models to find answers. For structure modeling of complex queries, we design stepwise instructions that implicitly prompt PLMs on the execution order of geometric operations in each query. We further separately model different geometric operations (i.e., projection, intersection, and union) on the representation space using a pre-trained encoder with additional attention and maxout layers to enhance structured modeling. We conduct experiments on two inductive logical reasoning datasets and three transductive datasets. The results demonstrate the effectiveness of our method on logical reasoning over KGs in both inductive and transductive settings. 2023.acl-long.259 @@ -3699,7 +3699,7 @@ Richard YuanzhePangNew York University VishakhPadmakumarNew York University ThibaultSellamGoogle - AnkurParikhGoogle + AnkurParikhGoogle HeHeNew York University 4746-4763 To align conditional text generation model outputs with desired behaviors, there has been an increasing focus on training the model using reinforcement learning (RL) with reward functions learned from human annotations. Under this framework, we identify three common cases where high rewards are incorrectly assigned to undesirable patterns: noise-induced spurious correlation, naturally occurring spurious correlation, and covariate shift. We show that even though learned metrics achieve high performance on the distribution of the data used to train the reward function, the undesirable patterns may be amplified during RL training of the text generation model. While there has been discussion about reward gaming in the RL or safety community, in this discussion piece, we would like to highlight reward gaming in the natural language generation (NLG) community using concrete conditional text generation examples and discuss potential fixes and areas for future work. @@ -3728,7 +3728,7 @@ XingxuanLiNanyang Technological University MeghThakkarBITS Pilani XinLiAlibaba Group - ShafiqJotyNanyang Technological University; Salesforce AI Research + ShafiqJotyNanyang Technological University; Salesforce AI Research LuoSiAlibaba Group Inc LidongBingAlibaba DAMO Academy 4799-4816 @@ -3780,8 +3780,8 @@ Ethical Considerations for Machine Translation of Indigenous Languages: Giving a Voice to the Speakers ManuelMagerAmazon AWS - ElisabethMagerUniversidad Nacional Autonoma de Mexico - KatharinaKannUniversity of Colorado Boulder + ElisabethMagerUniversidad Nacional Autonoma de Mexico + KatharinaKannUniversity of Colorado Boulder Ngoc ThangVuUniversity of Stuttgart 4871-4897 In recent years machine translation has become very successful for high-resource language pairs. This has also sparked new interest in research on the automatic translation of low-resource languages, including Indigenous languages. However, the latter are deeply related to the ethnic and cultural groups that speak (or used to speak) them. The data collection, modeling and deploying machine translation systems thus result in new ethical questions that must be addressed. Motivated by this, we first survey the existing literature on ethical considerations for the documentation, translation, and general natural language processing for Indigenous languages. Afterward, we conduct and analyze an interview study to shed light on the positions of community leaders, teachers, and language activists regarding ethical concerns for the automatic translation of their languages. Our results show that the inclusion, at different degrees, of native speakers and community members is vital to performing better and more ethical research on Indigenous languages. @@ -3819,8 +3819,8 @@ Privacy-Preserving Domain Adaptation of Semantic Parsers FatemehsadatMireshghallahUC San Diego YuSuThe Ohio State University - TatsunoriHashimotoStanford - JasonEisnerJohns Hopkins University + Microsoft Corporation + TatsunoriHashimotoStanford + JasonEisnerJohns Hopkins University + Microsoft Corporation RichardShinMicrosoft Semantic Machines 4950-4970 Task-oriented dialogue systems often assist users with personal or confidential matters. For this reason, the developers of such a system are generally prohibited from observing actual usage. So how can they know where the system is failing and needs more training data or new functionality? In this work, we study ways in which realistic user utterances can be generated synthetically, to help increase the linguistic and functional coverage of the system, without compromising the privacy of actual users. To this end, we propose a two-stage Differentially Private (DP) generation method which first generates latent semantic parses, and then generates utterances based on the parses. Our proposed approach improves MAUVE by 2.5X and parse tree function-type overlap by 1.3X relative to current approaches for private synthetic data generation, improving both on fluency and semantic coverage. We further validate our approach on a realistic domain adaptation task of adding new functionality from private user data to a semantic parser, and show overall gains of 8.5% points on its accuracy with the new feature. @@ -3871,7 +3871,7 @@ YutaoZhuUniversity of Montreal YihongWuUniversite de Montreal KaiyuHuangTsinghua University - Jian-YunNieUniversity of Montreal + Jian-YunNieUniversity of Montreal 4998-5012 In conversational search, the user’s real search intent for the current conversation turn is dependent on the previous conversation history. It is challenging to determine a good search query from the whole conversation context. To avoid the expensive re-training of the query encoder, most existing methods try to learn a rewriting model to de-contextualize the current query by mimicking the manual query rewriting. However, manually rewritten queries are not always the best search queries. Thus, training a rewriting model on them would lead to sub-optimal queries. Another useful information to enhance the search query is the potential answer to the question. In this paper, we propose ConvGQR, a new framework to reformulate conversational queries based on generative pre-trained language models (PLMs), one for query rewriting and another for generating potential answers. By combining both, ConvGQR can produce better search queries. In addition, to relate query reformulation to the retrieval task, we propose a knowledge infusion mechanism to optimize both query reformulation and retrieval. Extensive experiments on four conversational search datasets demonstrate the effectiveness of ConvGQR. 2023.acl-long.274 @@ -3885,8 +3885,8 @@ MahdiNamazifarAmazon Alexa AI DevamanyuHazarikaAmazon AishwaryaPadmakumarAmazon - YangLiuAmazon - DilekHakkani-TurAmazon Alexa AI + YangLiuAmazon + DilekHakkani-TurAmazon Alexa AI 5013-5035 Large pre-trained language models (PLMs) have been shown to retain implicit knowledge within their parameters. To enhance this implicit knowledge, we propose Knowledge Injection into Language Models (KILM), a novel approach that injects entity-related knowledge into encoder-decoder PLMs, via a generative knowledge infilling objective through continued pre-training. This is done without architectural modifications to the PLMs or adding additional parameters. Experimental results over a suite of knowledge-intensive tasks spanning numerous datasets show that KILM enables models to retain more knowledge and hallucinate less while preserving their original performance on general NLU and NLG tasks. KILM also demonstrates improved zero-shot performances on tasks such as entity disambiguation, outperforming state-of-the-art models having 30x more parameters. 2023.acl-long.275 @@ -3944,13 +3944,13 @@ SongWangMicrosoft Azure AI YangLiuMicrosoft RuochenXuMicrosoft - HanyHassanMicrosoft + HanyHassanMicrosoft YuShiMicrosoft ChenguangZhuMicrosoft Cognitive Services Research Group WayneXiongMicrosoft Corp. MichaelZengMicrosoft Corp JianfengGaoMicrosoft Research, Redmond - XuedongHuangMicrosoft Cloud and AI + XuedongHuangMicrosoft Cloud and AI 5095-5112 This paper presents Z-Code++, a new pre-trained language model optimized for abstractive text summarization. The model extends the state-of-the-art encoder-decoder model using three techniques. First, we use a two-phase pre-training to improve the model’s performance on low-resource summarization tasks. The model is first pre-trained using text corpora for language understanding, then is continually pre-trained on summarization corpora for grounded text generation. Second, we replace self-attention layers in the encoder with disentangled attention layers, where each word is represented using two vectors that encode its content and position, respectively. Third, we use fusion-in-encoder, a simple yet effective method of encoding long sequences in a hierarchical manner. Z-Code++ createsa new state-of-the-art on 9 of 13 text summarization tasks across 5 languages. Our model is parameter-efficient in that it outperforms the 600x larger PaLM540B on XSum, and the finetuned 200x larger GPT3175B on SAMSum. In zero-shot and few-shot settings, our model substantially outperforms the competing models. 2023.acl-long.279 @@ -3991,7 +3991,7 @@ Randomized Smoothing with Masked Inference for Adversarially Robust Text Classifications Han CheolMoonNanyang Technological University - ShafiqJotyNanyang Technological University; Salesforce AI Research + ShafiqJotyNanyang Technological University; Salesforce AI Research RuochenZhaoNanyang Technological University MeghThakkarBITS Pilani ChiXuNational University of Defense Technology @@ -4046,7 +4046,7 @@ <fixed-case>M</fixed-case>eta<fixed-case>A</fixed-case>dapt: Domain Adaptive Few-Shot Misinformation Detection via Meta Learning ZhenruiYueUniversity of Illinois Urbana-Champaign HuiminZengUniversity of Illinois Urbana-Champaign - YangZhangUniversity of Illinois Urbana-Champaign + YangZhangUniversity of Illinois Urbana-Champaign LanyuShangUniversity of Illinois at Urbana Champaign DongWangUniversity of Illinois at Urbana Champaign 5223-5239 @@ -4182,7 +4182,7 @@ MatthieuFuteralInria; Departement d’informatique de l’ENS, CNRS, PSL Research University CordeliaSchmidInria IvanLaptevINRIA - BenoîtSagotInria + BenoîtSagotInria RachelBawdenInria 5394-5413 One of the major challenges of machine translation (MT) is ambiguity, which can in some cases be resolved by accompanying context such as images. However, recent work in multimodal MT (MMT) has shown that obtaining improvements from images is challenging, limited not only by the difficulty of building effective cross-modal representations, but also by the lack of specific evaluation and training data. We present a new MMT approach based on a strong text-only MT model, which uses neural adapters, a novel guided self-attention mechanism and which is jointly trained on both visually-conditioned masking and MMT. We also introduce CoMMuTE, a Contrastive Multilingual Multimodal Translation Evaluation set of ambiguous sentences and their possible translations, accompanied by disambiguating images corresponding to each translation. Our approach obtains competitive results compared to strong text-only models on standard English→French, English→German and English→Czech benchmarks and outperforms baselines and state-of-the-art MMT systems by a large margin on our contrastive test set. Our code and CoMMuTE are freely available. @@ -4208,7 +4208,7 @@ TaoWangByteDance AI Lab ChengqiZhaoByteDance Inc. ShujianHuangNational Key Laboratory for Novel Software Technology, Nanjing University - JiajunChenNanjing University + JiajunChenNanjing University MingxuanWangBytedance AI Lab 5428-5443 Automatic metrics play a crucial role in machine translation. Despite the widespread use of n-gram-based metrics, there has been a recent surge in the development of pre-trained model-based metrics that focus on measuring sentence semantics. However, these neural metrics, while achieving higher correlations with human evaluations, are often considered to be black boxes with potential biases that are difficult to detect. In this study, we systematically analyze and compare various mainstream and cutting-edge automatic metrics from the perspective of their guidance for training machine translation systems. Through Minimum Risk Training (MRT), we find that certain metrics exhibit robustness defects, such as the presence of universal adversarial translations in BLEURT and BARTScore. In-depth analysis suggests two main causes of these robustness deficits: distribution biases in the training datasets, and the tendency of the metric paradigm. By incorporating token-level constraints, we enhance the robustness of evaluation metrics, which in turn leads to an improvement in the performance of machine translation systems. Codes are available at https://github.com/powerpuffpomelo/fairseq_mrt. @@ -4266,7 +4266,7 @@ JavierFerrandoUPC Gerard I.GállegoUniversitat Politècnica de Catalunya IoannisTsiamasPolytechnic University of Catalonia (UPC) - Marta R.Costa-jussàMeta AI + Marta R.Costa-jussàMeta AI 5486-5513 Language Generation Models produce words based on the previous context. Although existing methods offer input attributions as explanations for a model’s prediction, it is still unclear how prior words affect the model’s decision throughout the layers. In this work, we leverage recent advances in explainability of the Transformer and present a procedure to analyze models for language generation. Using contrastive examples, we compare the alignment of our explanations with evidence of the linguistic phenomena, and show that our method consistently aligns better than gradient-based and perturbation-based baselines. Then, we investigate the role of MLPs inside the Transformer and show that they learn features that help the model predict words that are grammatically acceptable. Lastly, we apply our method to Neural Machine Translation models, and demonstrate that they generate human-like source-target alignments for building predictions. 2023.acl-long.301 @@ -4294,7 +4294,7 @@ ShaLiUniversity of Illinois Urbana-Champaign ManlingLiUIUC XudongLinColumbia University - Shih-FuChangColumbia University + Shih-FuChangColumbia University MohitBansalUniversity of North Carolina at Chapel Hill HengJiUniversity of Illinois at Urbana-Champaign and Amazon (Amazon Scholar) 5529-5545 @@ -4398,7 +4398,7 @@ KarunKumarComcast PontusStenetorpUniversity College London JimmyLinUniversity of Waterloo - FerhanTureComcast Applied AI Research + FerhanTureComcast Applied AI Research 5644-5659 Diffusion models are a milestone in text-to-image generation, but they remain poorly understood, lacking interpretability analyses. In this paper, we perform a text-image attribution analysis on Stable Diffusion, a recently open-sourced model. To produce attribution maps, we upscale and aggregate cross-attention maps in the denoising module, naming our method DAAM. We validate it by testing its segmentation ability on nouns, as well as its generalized attribution quality on all parts of speech, rated by humans. On two generated datasets, we attain a competitive 58.8-64.8 mIoU on noun segmentation and fair to good mean opinion scores (3.4-4.2) on generalized attribution. Then, we apply DAAM to study the role of syntax in the pixel space across head–dependent heat map interaction patterns for ten common dependency relations. We show that, for some relations, the head map consistently subsumes the dependent, while the opposite is true for others. Finally, we study several semantic phenomena, focusing on feature entanglement; we find that the presence of cohyponyms worsens generation quality by 9%, and descriptive adjectives attend too broadly. We are the first to interpret large diffusion models from a visuolinguistic perspective, which enables future research. Our code is at https://github.com/castorini/daam. 2023.acl-long.310 @@ -4474,7 +4474,7 @@ Unsupervised Discontinuous Constituency Parsing with Mildly Context-Sensitive Grammars SonglinYangShanghaiTech University - RogerLevyMassachusetts Institute of Technology + RogerLevyMassachusetts Institute of Technology YoonKimMIT 5747-5766 We study grammar induction with mildly context-sensitive grammars for unsupervised discontinuous parsing. Using the probabilistic linear context-free rewriting system (LCFRS) formalism, our approach fixes the rule structure in advance and focuses on parameter learning with maximum likelihood. To reduce the computational complexity of both parsing and parameter estimation, we restrict the grammar formalism to LCFRS-2 (i.e., binary LCFRS with fan-out two) and further discard rules that require O(l^6) time to parse, reducing inference to O(l^5). We find that using a large number of nonterminals is beneficial and thus make use of tensor decomposition-based rank-space dynamic programming with an embedding-based parameterization of rule probabilities to scale up the number of nonterminals. Experiments on German and Dutch show that our approach is able to induce linguistically meaningful trees with continuous and discontinuous structures. @@ -4488,7 +4488,7 @@ SatwikBhattamishraUniversity of Oxford ArkilPatelMila and McGill University VarunKanadeUniversity of Oxford - PhilBlunsomUniversity of Oxford + PhilBlunsomUniversity of Oxford 5767-5791 Despite the widespread success of Transformers on NLP tasks, recent works have found that they struggle to model several formal languages when compared to recurrent models. This raises the question of why Transformers perform well in practice and whether they have any properties that enable them to generalize better than recurrent models. In this work, we conduct an extensive empirical study on Boolean functions to demonstrate the following: (i) Random Transformers are relatively more biased towards functions of low sensitivity. (ii) When trained on Boolean functions, both Transformers and LSTMs prioritize learning functions of low sensitivity, with Transformers ultimately converging to functions of lower sensitivity. (iii) On sparse Boolean functions which have low sensitivity, we find that Transformers generalize near perfectly even in the presence of noisy labels whereas LSTMs overfit and achieve poor generalization accuracy. Overall, our results provide strong quantifiable evidence that suggests differences in the inductive biases of Transformers and recurrent models which may help explain Transformer’s effective generalization performance despite relatively limited expressiveness. 2023.acl-long.317 @@ -4530,7 +4530,7 @@ Verify-and-Edit: A Knowledge-Enhanced Chain-of-Thought Framework RuochenZhaoNanyang Technological University XingxuanLiNanyang Technological University - ShafiqJotyNanyang Technological University; Salesforce AI Research + ShafiqJotyNanyang Technological University; Salesforce AI Research ChengweiQinNanyang Technological University LidongBingAlibaba DAMO Academy 5823-5840 @@ -4562,7 +4562,7 @@ Node Placement in Argument Maps: Modeling Unidirectional Relations in High & Low-Resource Scenarios ImanJundiUniversity of Stuttgart NeeleFalkUniversity of Stuttgart - Eva MariaVecchiUniversitat Stuttgart, Institut fur Maschinelle Sprachverarbeitung + Eva MariaVecchiUniversitat Stuttgart, Institut fur Maschinelle Sprachverarbeitung GabriellaLapesaUniversität Stuttgart, Institut für Maschinelle Sprachverarbeitung 5854-5876 Argument maps structure discourse into nodes in a tree with each node being an argument that supports or opposes its parent argument. This format is more comprehensible and less redundant compared to an unstructured one. Exploring those maps and maintaining their structure by placing new arguments under suitable parents is more challenging for users with huge maps that are typical in online discussions. To support those users, we introduce the task of node placement: suggesting candidate nodes as parents for a new contribution. We establish an upper-bound of human performance, and conduct experiments with models of various sizes and training strategies. We experiment with a selection of maps from Kialo, drawn from a heterogeneous set of domains. Based on an annotation study, we highlight the ambiguity of the task that makes it challenging for both humans and models. We examine the unidirectional relation between tree nodes and show that encoding a node into different embeddings for each of the parent and child cases improves performance. We further show the few-shot effectiveness of our approach. @@ -4602,7 +4602,7 @@ Connective Prediction for Implicit Discourse Relation Recognition via Knowledge Distillation HongyiWuEast China Normal University HaoZhouEast China Normal University - ManLanEast China Normal University + ManLanEast China Normal University YuanbinWuEast China Normal University YadongZhangEast China Normal University 5908-5923 @@ -4628,7 +4628,7 @@ ZhihaoFanFudan University JingjingChenFudan University QiZhangFudan University - XuanjingHuangFudan University + XuanjingHuangFudan University ZhongyuWeiSchool of Data Science, Fudan University 5939-5958 Multilingual Vision-Language Pre-training (VLP) is a promising but challenging topic due to the lack of large-scale multilingual image-text pairs. Existing works address the problem by translating English data into other languages, which is intuitive and the generated data is usually limited in form and scale. In this paper, we explore a more practical and scalable setting: weakly supervised multilingual VLP with only English image-text pairs and multilingual text corpora. We argue that the universal multilingual representation learned from texts allows the cross-modal interaction learned in English to be transferable to other languages. To this end, we propose a framework to effectively unify cross-lingual and cross-modal pre-training. For unified modeling on different data, we design an architecture with flexible modules to learn different interactions. Moreover, two unified tasks are introduced to efficiently guide the unified cross-lingual cross-modal learning. Extensive experiments demonstrate that our pre-trained model learns universal multilingual multimodal representations, allowing effective cross-lingual transfer on multimodal tasks. Code and models are available at https://github.com/FudanDISC/weakly-supervised-mVLP. @@ -4661,7 +4661,7 @@ QianLiuNanyang Technological University MeishanZhangHarbin Institute of Technology (Shenzhen), China MinZhangHarbin Institute of Technology (Shenzhen) - Tat-SengChuaNational University of Singapore + Tat-SengChuaNational University of Singapore 5980-5994 In this work, we investigate a more realistic unsupervised multimodal machine translation (UMMT) setup, inference-time image-free UMMT, where the model is trained with source-text image pairs, and tested with only source-text inputs. First, we represent the input images and texts with the visual and language scene graphs (SG), where such fine-grained vision-language features ensure a holistic understanding of the semantics. To enable pure-text input during inference, we devise a visual scene hallucination mechanism that dynamically generates pseudo visual SG from the given textual SG. Several SG-pivoting based learning objectives are introduced for unsupervised translation training. On the benchmark Multi30K data, our SG-based method outperforms the best-performing baseline by significant BLEU scores on the task and setup, helping yield translations with better completeness, relevance and fluency without relying on paired images. Further in-depth analyses reveal how our model advances in the task setting. 2023.acl-long.329 @@ -4674,9 +4674,9 @@ TingtingMaHarbin Institute of Technology QianhuiWuMicrosoft Corporation HuiqiangJiangMicrosoft Research Asia - Börje F.KarlssonBeijing Academy of Artificial Intelligence (BAAI) - TiejunZhaoHarbin Institute of Technology - Chin-YewLinMicrosoft Research + Börje F.KarlssonBeijing Academy of Artificial Intelligence (BAAI) + TiejunZhaoHarbin Institute of Technology + Chin-YewLinMicrosoft Research 5995-6009 Cross-lingual named entity recognition (NER) aims to train an NER system that generalizes well to a target language by leveraging labeled data in a given source language. Previous work alleviates the data scarcity problem by translating source-language labeled data or performing knowledge distillation on target-language unlabeled data. However, these methods may suffer from label noise due to the automatic labeling process. In this paper, we propose CoLaDa, a Collaborative Label Denoising Framework, to address this problem. Specifically, we first explore a model-collaboration-based denoising scheme that enables models trained on different data sources to collaboratively denoise pseudo labels used by each other. We then present an instance-collaboration-based strategy that considers the label consistency of each token’s neighborhood in the representation space for denoising. Experiments on different benchmark datasets show that the proposed CoLaDa achieves superior results compared to previous methods, especially when generalizing to distant languages. 2023.acl-long.330 @@ -4689,7 +4689,7 @@ JiaoSunUniversity of Southern California ThibaultSellamGoogle ElizabethClarkGoogle Research - TuVuUniversity of Massachusetts Amherst + TuVuUniversity of Massachusetts Amherst TimothyDozatGoogle DanGarretteGoogle Research AdityaSiddhantGoogle @@ -4723,7 +4723,7 @@ AaronMuellerThe Johns Hopkins University KanishkaMisraPurdue University KerenFuentesMeta - RogerLevyMassachusetts Institute of Technology + RogerLevyMassachusetts Institute of Technology AdinaWilliamsMeta Platforms, Inc. 6043-6063 Targeted syntactic evaluations of language models ask whether models show stable preferences for syntactically acceptable content over minimal-pair unacceptable inputs. Our best syntactic evaluation datasets, however, provide substantially less linguistic context than models receive during pretraining. This mismatch raises an important question: how robust are models’ syntactic judgements across different contexts? In this paper, we vary the input contexts based on: length, the types of syntactic phenomena it contains, and whether or not there are grammatical violations. We find that model judgements are generally robust when placed in randomly sampled linguistic contexts, but are unstable when contexts match the test stimuli in syntactic structure. Among all tested models (GPT-2 and five variants of OPT), we find that model performance is affected when we provided contexts with matching syntactic structure: performance significantly improves when contexts are acceptable, and it significantly declines when they are unacceptable. This effect is amplified by the length of the context, except for unrelated inputs. We show that these changes in model performance are not explainable by acceptability-preserving syntactic perturbations. This sensitivity to highly specific syntactic features of the context can only be explained by the models’ implicit in-context learning abilities. @@ -4742,7 +4742,7 @@ WenlinZhangZhejiang University XiangruTangYale University BoyuMiZhejiang University - DragomirRadevYale University + DragomirRadevYale University 6064-6081 Despite significant progress having been made in question answering on tabular data (Table QA), it’s unclear whether, and to what extent existing Table QA models are robust to task-specific perturbations, e.g., replacing key question entities or shuffling table columns. To systematically study the robustness of Table QA models, we propose a benchmark called RobuT, which builds upon existing Table QA datasets (WTQ, WikiSQL-Weak, and SQA) and includes human-annotated adversarial perturbations in terms of table header, table content, and question. Our results indicate that both state-of-the-art Table QA models and large language models (e.g., GPT-3) with few-shot learning falter in these adversarial sets. We propose to address this problem by using large language models to generate adversarial examples to enhance training, which significantly improves the robustness of Table QA models. 2023.acl-long.334 @@ -4766,7 +4766,7 @@ <fixed-case>TOME</fixed-case>: A Two-stage Approach for Model-based Retrieval RuiyangRenRenmin University of China - Wayne XinZhaoRUC + Wayne XinZhaoRUC JingLiuBaidu Inc. HuaWuBaidu Ji-RongWenRenmin University of China @@ -4796,7 +4796,7 @@ MoritzPlenzHeidelberg University JuriOpitzHeidelberg University PhilippHeinischBielefeld University - PhilippCimianoUniv. Bielefeld + PhilippCimianoUniv. Bielefeld AnetteFrankHeidelberg University 6130-6158 Arguments often do not make explicit how a conclusion follows from its premises. To compensate for this lack, we enrich arguments with structured background knowledge to support knowledge-intense argumentation tasks. We present a new unsupervised method for constructing Contextualized Commonsense Knowledge Graphs (CCKGs) that selects contextually relevant knowledge from large knowledge graphs (KGs) efficiently and at high quality. Our work goes beyond context-insensitive knowledge extraction heuristics by computing semantic similarity between KG triplets and textual arguments. Using these triplet similarities as weights, we extract contextualized knowledge paths that connect a conclusion to its premise, while maximizing similarity to the argument. We combine multiple paths into a CCKG that we optionally prune to reduce noise and raise precision. Intrinsic evaluation of the quality of our graphs shows that our method is effective for (re)constructing human explanation graphs. Manual evaluations in a large-scale knowledge selection setup verify high recall and precision of implicit CSK in the CCKGs. Finally, we demonstrate the effectiveness of CCKGs in a knowledge-insensitive argument quality rating task, outperforming strong baselines and rivaling a GPT-3 based system. @@ -4833,7 +4833,7 @@ JanvijaySinghGeorgia Institute of Technology MukundRungtaGeorgia Institute of Technology DiyiYangStanford University - SaifMohammadNational Research Council Canada + SaifMohammadNational Research Council Canada 6192-6208 Citing papers is the primary method through which modern scientific writing discusses and builds on past work. Collectively, citing a diverse set of papers (in time and area of study) is an indicator of how widely the community is reading. Yet, there is little work looking at broad temporal patterns of citation. This work systematically and empirically examines: How far back in time do we tend to go to cite papers? How has that changed over time, and what factors correlate with this citational attention/amnesia? We chose NLP as our domain of interest and analyzed approximately 71.5K papers to show and quantify several key trends in citation. Notably, around 62% of cited papers are from the immediate five years prior to publication, whereas only about 17% are more than ten years old. Furthermore, we show that the median age and age diversity of cited papers were steadily increasing from 1990 to 2014, but since then, the trend has reversed, and current NLP papers have an all-time low temporal citation diversity. Finally, we show that unlike the 1990s, the highly cited papers in the last decade were also papers with the least citation diversity, likely contributing to the intense (and arguably harmful) recency focus. Code, data, and a demo are available on the project homepage. 2023.acl-long.341 @@ -4860,11 +4860,11 @@ AubrieAmstutzTikTok ChadAtallaMicrosoft Su LinBlodgettMicrosoft Research - HalDaumé IIIUMD + HalDaumé IIIUMD AlexandraOlteanuMicrosoft Research EmilyShengMicrosoft Research DanVannMicrosoft - HannaWallachMicrosoft Research + HannaWallachMicrosoft Research 6231-6251 It is critical to measure and mitigate fairness-related harms caused by AI text generation systems, including stereotyping and demeaning harms. To that end, we introduce FairPrism, a dataset of 5,000 examples of AI-generated English text with detailed human annotations covering a diverse set of harms relating to gender and sexuality. FairPrism aims to address several limitations of existing datasets for measuring and mitigating fairness-related harms, including improved transparency, clearer specification of dataset coverage, and accounting for annotator disagreement and harms that are context-dependent. FairPrism’s annotations include the extent of stereotyping and demeaning harms, the demographic groups targeted, and appropriateness for different applications. The annotations also include specific harms that occur in interactive contexts and harms that raise normative concerns when the “speaker” is an AI system. Due to its precision and granularity, FairPrism can be used to diagnose (1) the types of fairness-related harms that AI text generation systems cause, and (2) the potential limitations of mitigation methods, both of which we illustrate through case studies. Finally, the process we followed to develop FairPrism offers a recipe for building improved datasets for measuring and mitigating harms caused by AI systems. 2023.acl-long.343 @@ -4953,7 +4953,7 @@ VaishaliPalUniversity of Amsterdam AndrewYatesUniversity of Amsterdam EvangelosKanoulasUniversity of Amsterdam - Maartende RijkeUniversity of Amsterdam + Maartende RijkeUniversity of Amsterdam 6322-6334 Recent advances in tabular question answering (QA) with large language models are constrained in their coverage and only answer questions over a single table. However, real-world queries are complex in nature, often over multiple tables in a relational database or web page. Single table questions do not involve common table operations such as set operations, Cartesian products (joins), or nested queries. Furthermore, multi-table operations often result in a tabular output, which necessitates table generation capabilities of tabular QA models. To fill this gap, we propose a new task of answering questions over multiple tables. Our model, MultiTabQA, not only answers questions over multiple tables, but also generalizes to generate tabular answers. To enable effective training, we build a pre-training dataset comprising of 132,645 SQL queries and tabular answers. Further, we evaluate the generated tables by introducing table-specific metrics of varying strictness assessing various levels of granularity of the table structure. MultiTabQA outperforms state-of-the-art single table QA models adapted to a multi-table QA setting by finetuning on three datasets: Spider, Atis and GeoQuery. 2023.acl-long.348 @@ -4985,7 +4985,7 @@ <fixed-case>C</fixed-case>o<fixed-case>AD</fixed-case>: Automatic Diagnosis through Symptom and Disease Collaborative Generation HuiminWangTencent Wai ChungKwanThe Chinese University of Hong Kong - Kam-FaiWongDepartment of Systems Engineering and Engineering Management, The Chinese University of Hong Kong, Hong Kong + Kam-FaiWongDepartment of Systems Engineering and Engineering Management, The Chinese University of Hong Kong, Hong Kong YefengZhengTencent 6348-6361 Automatic diagnosis (AD), a critical application of AI in healthcare, employs machine learning techniques to assist doctors in gathering patient symptom information for precise disease diagnosis. The Transformer-based method utilizes an input symptom sequence, predicts itself through auto-regression, and employs the hidden state of the final symptom to determine the disease. Despite its simplicity and superior performance demonstrated, a decline in disease diagnosis accuracy is observed caused by 1) a mismatch between symptoms observed during training and generation, and 2) the effect of different symptom orders on disease prediction. To address the above obstacles, we introduce the CoAD, a novel disease and symptom collaborative generation framework, which incorporates several key innovations to improve AD: 1) aligning sentence-level disease labels with multiple possible symptom inquiry steps to bridge the gap between training and generation; 2) expanding symptom labels for each sub-sequence of symptoms to enhance annotation and eliminate the effect of symptom order; 3) developing a repeated symptom input schema to effectively and efficiently learn the expanded disease and symptom labels. We evaluate the CoAD framework using four datasets, including three public and one private, and demonstrate that it achieves an average 2.3% improvement over previous state-of-the-art results in automatic disease diagnosis. For reproducibility, we release the code and data at https://github.com/KwanWaiChung/coad. @@ -5056,7 +5056,7 @@ <fixed-case>C</fixed-case>ontra<fixed-case>CLM</fixed-case>: Contrastive Learning For Causal Language Model NihalJainAWS AI Labs DejiaoZhangAWS AI Labs - Wasi UddinAhmadAWS AI Labs + Wasi UddinAhmadAWS AI Labs ZijianWangAWS AI Labs FengNanAWS AI XiaopengLiAWS AI Labs @@ -5149,7 +5149,7 @@ MehranKazemiGoogle Research NajoungKimBoston University DeeptiBhatiaGoogle - XinXuGoogle + XinXuGoogle DeepakRamachandranGoogle Research 6547-6568 Remarkable progress has been made on automated reasoning with natural text, by using Large Language Models (LLMs) and methods such as Chain-of-Thought prompting and Selection-Inference. These techniques search for proofs in the forward direction from axioms to the conclusion, which suffers from a combinatorial explosion of the search space, and thus high failure rates for problems requiring longer chains of reasoning. The classical automated reasoning literature has shown that reasoning in the backward direction (i.e. from intended conclusion to supporting axioms) is significantly more efficient at proof-finding. Importing this intuition into the LM setting, we develop a Backward Chaining algorithm, called LAMBADA, that decomposes reasoning into four sub-modules, that are simply implemented by few-shot prompted LLM inference. We show that LAMBADA achieves sizable accuracy boosts over state-of-the-art forward reasoning methods on two challenging logical reasoning datasets, particularly when deep and accurate proof chains are required. @@ -5201,7 +5201,7 @@ YashengWangHuawei Noah’s Ark Lab YitongLiHuawei Technology Co. ltd LifengShangNoah’s Ark Lab Huawei Technologies Co. Ltd. Sha Tin, Hong Kong - Kam-FaiWongDepartment of Systems Engineering and Engineering Management, The Chinese University of Hong Kong, Hong Kong + Kam-FaiWongDepartment of Systems Engineering and Engineering Management, The Chinese University of Hong Kong, Hong Kong RuifengXuHarbin Institute of Technology, Shenzhen 6608-6619 Dialogue models are often enriched with extensive external knowledge to provide informative responses through a retrieval-augmented pipeline. Nevertheless, retrieval-augmented approaches rely on finely annotated retrieval training data and knowledge-grounded response generation data, making it costly to transfer. To tackle this challenge, this paper proposed a retrieval-free approach, KiDG, by automatically turning knowledge documents into simulated multi-turn dialogues through a Multi-Document Traversal algorithm. The simulated knowledge-intensive dialogues constructed by KiDG in one domain can be easily used to train and enhance pre-trained dialogue models’ knowledge w.r.t. this domain without costly annotation. We conduct extensive experiments comparing retrieval-augmented models and a variety of retrieval-free models. We found that dialogue models enhanced with data simulated with KiDG largely outperform state-of-the-art retrieval-free methods, and it achieves comparable performance compared to retrieval-augmented methods while being better, and cheaper at domain transfer. @@ -5217,7 +5217,7 @@ ShichengXuInstitute of Computing Technology, Chinese Academy of Sciences LiangPangInstitute of Computing Technology of Chinese Academy of Sciences HuaweiShenInstitute of Computing Technology, Chinese Academy of Sciences - XueqiChengInstitute of Computing Technology, CAS + XueqiChengInstitute of Computing Technology, CAS 6620-6635 Dense retrieval has shown promise in the first-stage retrieval process when trained on in-domain labeled datasets. However, previous studies have found that dense retrieval is hard to generalize to unseen domains due to its weak modeling of domain-invariant and interpretable feature (i.e., matching signal between two texts, which is the essence of information retrieval). In this paper, we propose a novel method to improve the generalization of dense retrieval via capturing matching signal called BERM. Fully fine-grained expression and query-oriented saliency are two properties of the matching signal. Thus, in BERM, a single passage is segmented into multiple units and two unit-level requirements are proposed for representation as the constraint in training to obtain the effective matching signal. One is semantic unit balance and the other is essential matching unit extractability. Unit-level view and balanced semantics make representation express the text in a fine-grained manner. Essential matching unit extractability makes passage representation sensitive to the given query to extract the pure matching information from the passage containing complex context. Experiments on BEIR show that our method can be effectively combined with different dense retrieval training methods (vanilla, hard negatives mining and knowledge distillation) to improve its generalization ability without any additional inference overhead and target domain data. 2023.acl-long.365 @@ -5243,7 +5243,7 @@ Prompting Language Models for Linguistic Structure TerraBlevinsUniversity of Washington HilaGonenUW and FAIR - LukeZettlemoyerUniversity of Washington; Meta + LukeZettlemoyerUniversity of Washington; Meta 6649-6663 Although pretrained language models (PLMs) can be prompted to perform a wide range of language tasks, it remains an open question how much this ability comes from generalizable linguistic understanding versus surface-level lexical patterns. To test this, we present a structured prompting approach for linguistic structured prediction tasks, allowing us to perform zero- and few-shot sequence tagging with autoregressive PLMs. We evaluate this approach on part-of-speech tagging, named entity recognition, and sentence chunking, demonstrating strong few-shot performance in all cases. We also find that while PLMs contain significant prior knowledge of task labels due to task leakage into the pretraining corpus, structured prompting can also retrieve linguistic structure with arbitrary labels. These findings indicate that the in-context learning ability and linguistic knowledge of PLMs generalizes beyond memorization of their training data. 2023.acl-long.367 @@ -5267,7 +5267,7 @@ <fixed-case>RE</fixed-case>-Matching: A Fine-Grained Semantic Matching Method for Zero-Shot Relation Extraction JunZhaoFudan University WenYuZhanFuDan University - XinZhaoFudan University + XinZhaoFudan University QiZhangFudan University TaoGuifudan university ZhongyuWeiSchool of Data Science, Fudan University @@ -5293,7 +5293,7 @@ GunheeKimSeoul National University Eun-JuLeeSeoul National University YongLimSeoul National University - AliceOhKAIST + AliceOhKAIST SangchulParkSeoul National University Jung-WooHaNAVER Cloud AI Lab 6692-6712 @@ -5312,7 +5312,7 @@ KihyoParkCornell University Gyu TaeKimSoftlyAI MinjoonSeoKAIST - AliceOhKAIST + AliceOhKAIST 6713-6742 Research on Korean grammatical error correction (GEC) is limited, compared to other major languages such as English. We attribute this problematic circumstance to the lack of a carefully designed evaluation benchmark for Korean GEC. In this work, we collect three datasets from different sources (Kor-Lang8, Kor-Native, and Kor-Learner) that covers a wide range of Korean grammatical errors. Considering the nature of Korean grammar, We then define 14 error types for Korean and provide KAGAS (Korean Automatic Grammatical error Annotation System), which can automatically annotate error types from parallel corpora. We use KAGAS on our datasets to make an evaluation benchmark for Korean, and present baseline models trained from our datasets. We show that the model trained with our datasets significantly outperforms the currently used statistical Korean GEC system (Hanspell) on a wider range of error types, demonstrating the diversity and usefulness of the datasets. The implementations and datasets are open-sourced. 2023.acl-long.371 @@ -5341,7 +5341,7 @@ KeerthiramMurugesanIBM Research RosarioUceda-SosaIBM Research MichiakiTatsuboriIBM Research - Tokyo - AchilleFokoueIBM Research + AchilleFokoueIBM Research PavanKapanipathiIBM Research AsimMunawarIBM Research AlexanderGrayIBM Research @@ -5440,7 +5440,7 @@ PeiyuanGongBeijing Institute of Technology Derek F.WongUniversity of Macau YangGaoBeijing Institute of Technology - HeyanHuangBeijing Institute of Technology + HeyanHuangBeijing Institute of Technology MinZhangHarbin Institute of Technology (Shenzhen) 6878-6892 Grammatical error correction (GEC) can be divided into sequence-to-edit (Seq2Edit) and sequence-to-sequence (Seq2Seq) frameworks, both of which have their pros and cons. To utilize the strengths and make up for the shortcomings of these frameworks, this paper proposes a novel method, TemplateGEC, which capitalizes on the capabilities of both Seq2Edit and Seq2Seq frameworks in error detection and correction respectively. TemplateGEC utilizes the detection labels from a Seq2Edit model, to construct the template as the input. A Seq2Seq model is employed to enforce consistency between the predictions of different templates by utilizing consistency learning. Experimental results on the Chinese NLPCC18, English BEA19 and CoNLL14 benchmarks show the effectiveness and robustness of TemplateGEC.Further analysis reveals the potential of our method in performing human-in-the-loop GEC. Source code and scripts are available at https://github.com/li-aolong/TemplateGEC. @@ -5452,7 +5452,7 @@ Deep Model Compression Also Helps Models Capture Ambiguity HancheolParkSchool of Computing, KAIST - JongParkKAIST + JongParkKAIST 6893-6905 Natural language understanding (NLU) tasks face a non-trivial amount of ambiguous samples where veracity of their labels is debatable among annotators. NLU models should thus account for such ambiguity, but they approximate the human opinion distributions quite poorly and tend to produce over-confident predictions. To address this problem, we must consider how to exactly capture the degree of relationship between each sample and its candidate classes. In this work, we propose a novel method with deep model compression and show how such relationship can be accounted for. We see that more reasonably represented relationships can be discovered in the lower layers and that validation accuracies are converging at these layers, which naturally leads to layer pruning. We also see that distilling the relationship knowledge from a lower layer helps models produce better distribution. Experimental results demonstrate that our method makes substantial improvement on quantifying ambiguity without gold distribution labels. As positive side-effects, our method is found to reduce the model size significantly and improve latency, both attractive aspects of NLU products. 2023.acl-long.381 @@ -5520,10 +5520,10 @@ LiangmingPanUniversity of California, Santa Barbara (UCSB) XiaobaoWuNanyang Technological University XinyuanLuNational University of Singapore - Anh TuanLuuNanyang Technological University, Singapore + Anh TuanLuuNanyang Technological University, Singapore William YangWangUnversity of California, Santa Barbara Min-YenKanNational University of Singapore - PreslavNakovMohamed bin Zayed University of Artificial Intelligence + PreslavNakovMohamed bin Zayed University of Artificial Intelligence 6981-7004 Fact-checking real-world claims often requires collecting multiple pieces of evidence and applying complex multi-step reasoning. In this paper, we present Program-Guided Fact-Checking (ProgramFC), a novel fact-checking model that decomposes complex claims into simpler sub-tasks that can be solved using a shared library of specialized functions. We first leverage the in-context learning ability of large language models to generate reasoning programs to guide the verification process. Afterward, we execute the program by delegating each sub-task to the corresponding sub-task handler. This process makes our model both explanatory and data-efficient, providing clear explanations of its reasoning process and requiring minimal training data. We evaluate ProgramFC on two challenging fact-checking datasets and show that it outperforms seven fact-checking baselines across different settings of evidence availability, with explicit output programs that benefit human debugging. Our codes and data are publicly available at https://github.com/mbzuai-nlp/ProgramFC. 2023.acl-long.386 @@ -5709,7 +5709,7 @@ KangjieChenNanyang Technological University XiaofeiXieSingapore Management University TianweiZhangNanyang Technological University - YangLiuNanyang Technological University + YangLiuNanyang Technological University 7236-7254 Backdoor attacks for neural code models have gained considerable attention due to the advancement of code intelligence. However, most existing works insert triggers into task-specific data for code-related downstream tasks, thereby limiting the scope of attacks. Moreover, the majority of attacks for pre-trained models are designed for understanding tasks. In this paper, we propose task-agnostic backdoor attacks for code pre-trained models. Our backdoored model is pre-trained with two learning strategies (i.e., Poisoned Seq2Seq learning and token representation learning) to support the multi-target attack of downstream code understanding and generation tasks. During the deployment phase, the implanted backdoors in the victim models can be activated by the designed triggers to achieve the targeted attack. We evaluate our approach on two code understanding tasks and three code generation tasks over seven datasets. Extensive experimental results demonstrate that our approach effectively and stealthily attacks code-related downstream tasks. 2023.acl-long.399 @@ -5762,8 +5762,8 @@ QianhuiWuMicrosoft Corporation HuiqiangJiangMicrosoft Research Asia HaonanYinTsinghua University - Börje F.KarlssonBeijing Academy of Artificial Intelligence (BAAI) - Chin-YewLinMicrosoft Research + Börje F.KarlssonBeijing Academy of Artificial Intelligence (BAAI) + Chin-YewLinMicrosoft Research 7317-7332 Self-supervised representation learning has proved to be a valuable component for out-of-distribution (OoD) detection with only the texts of in-distribution (ID) examples. These approaches either train a language model from scratch or fine-tune a pre-trained language model using ID examples, and then take the perplexity output by the language model as OoD scores. In this paper, we analyze the complementary characteristic of both methods and propose a multi-level knowledge distillation approach that integrates their strengths while mitigating their limitations. Specifically, we use a fine-tuned model as the teacher to teach a randomly initialized student model on the ID examples. Besides the prediction layer distillation, we present a similarity-based intermediate layer distillation method to thoroughly explore the representation space of the teacher model. In this way, the learned student can better represent the ID data manifold while gaining a stronger ability to map OoD examples outside the ID data manifold with the regularization inherited from pre-training. Besides, the student model sees only ID examples during parameter learning, further promoting more distinguishable features for OoD detection. We conduct extensive experiments over multiple benchmark datasets, i.e., CLINC150, SST, ROSTD, 20 NewsGroups, and AG News; showing that the proposed method yields new state-of-the-art performance. We also explore its application as an AIGC detector to distinguish answers generated by ChatGPT and human experts. It is observed that our model exceeds human evaluators in the pair-expert task on the Human ChatGPT Comparison Corpus. 2023.acl-long.403 @@ -5789,7 +5789,7 @@ JiazhanFengPeking University QingfengSunMicrosoft Corporation CanXuSTCA NLP Group, Microsoft - PuZhaoMicrosoft + PuZhaoMicrosoft YamingYangMicrosoft ChongyangTaoMicrosoft Corporation DongyanZhaopku.edu.cn @@ -5915,7 +5915,7 @@ SireeshGururajaCarnegie Mellon University RitamDuttCarnegie Mellon University TinglongLiaoCarnegie Mellon University - CarolynRoséCarnegie Mellon University + CarolynRoséCarnegie Mellon University 7502-7514 Recent work has demonstrated the positive impact of incorporating linguistic representations as additional context and scaffolds on the in-domain performance of several NLP tasks. We extend this work by exploring the impact of linguistic representations on cross-domain performance in a few-shot transfer setting. An important question is whether linguistic representations enhance generalizability by providing features that function as cross-domain pivots. We focus on the task of relation extraction on three datasets of procedural text in two domains, cooking and materials science. Our approach augments a popular transformer-based architecture by alternately incorporating syntactic and semantic graphs constructed by freely available off-the-shelf tools. We examine their utility for enhancing generalization, and investigate whether earlier findings, e.g. that semantic representations can be more helpful than syntactic ones, extend to relation extraction in multiple domains. We find that while the inclusion of these graphs results in significantly higher performance in few-shot transfer, both types of graph exhibit roughly equivalent utility. 2023.acl-long.414 @@ -5946,7 +5946,7 @@ RussellKlopfer3M | MModal EdmondLu3M Health Information Systems BenjaminStriner3M - MatthewGormleyCarnegie Mellon University + MatthewGormleyCarnegie Mellon University 7534-7550 We introduce a dataset for evidence/rationale extraction on an extreme multi-label classification task over long medical documents. One such task is Computer-Assisted Coding (CAC) which has improved significantly in recent years, thanks to advances in machine learning technologies. Yet simply predicting a set of final codes for a patient encounter is insufficient as CAC systems are required to provide supporting textual evidence to justify the billing codes. A model able to produce accurate and reliable supporting evidence for each code would be a tremendous benefit. However, a human annotated code evidence corpus is extremely difficult to create because it requires specialized knowledge. In this paper, we introduce MDACE, the first publicly available code evidence dataset, which is built on a subset of the MIMIC-III clinical records. The dataset – annotated by professional medical coders – consists of 302 Inpatient charts with 3,934 evidence spans and 52 Profee charts with 5,563 evidence spans. We implemented several evidence extraction methods based on the EffectiveCAN model (Liu et al., 2021) to establish baseline performance on this dataset. MDACE can be used to evaluate code evidence extraction methods for CAC systems, as well as the accuracy and interpretability of deep learning models for multi-label classification. We believe that the release of MDACE will greatly improve the understanding and application of deep learning technologies for medical coding and document classification. 2023.acl-long.416 @@ -6065,7 +6065,7 @@ HongshenChenJD.com PengjieRenShandong University ZhuminChenShandong University - Maartende RijkeUniversity of Amsterdam + Maartende RijkeUniversity of Amsterdam ZhaochunRenShandong University 7669-7683 In open-domain question answering, due to the ambiguity of questions, multiple plausible answers may exist. To provide feasible answers to an ambiguous question,one approach is to directly predict all valid answers, but this can struggle with balancing relevance and diversity. An alternative is to gather candidate answers and aggregate them, but this method can be computationally costly and may neglect dependencies among answers. In this paper, we present AmbigPrompt to address the imperfections of existing approaches to answering ambiguous questions. Specifically, we integrate an answering model with a prompting model in an iterative manner. The prompting model adaptively tracks the reading process and progressively triggers the answering model to compose distinct and relevant answers. Additionally, we develop a task-specific post-pretraining approach for both the answering model and the prompting model, which greatly improves the performance of our framework. Empirical studies on two commonly-used open benchmarks show that AmbigPrompt achieves state-of-the-art or competitive results while using less memory and having a lower inference latency than competing approaches. Additionally, AmbigPrompt also performs well in low-resource settings. @@ -6089,7 +6089,7 @@ Massively Multilingual Lexical Specialization of Multilingual Transformers TommasoGreenUniversity of Mannheim - Simone PaoloPonzettoUniversity of Mannheim + Simone PaoloPonzettoUniversity of Mannheim GoranGlavašUniversity of Würzburg 7700-7715 While pretrained language models (PLMs) primarily serve as general-purpose text encoders that can be fine-tuned for a wide variety of downstream tasks, recent work has shown that they can also be rewired to produce high-quality word representations (i.e., static word embeddings) and yield good performance in type-level lexical tasks. While existing work primarily focused on the lexical specialization of monolingual PLMs with immense quantities of monolingual constraints, in this work we expose massively multilingual transformers (MMTs, e.g., mBERT or XLM-R) to multilingual lexical knowledge at scale, leveraging BabelNet as the readily available rich source of multilingual and cross-lingual type-level lexical knowledge. Concretely, we use BabelNet’s multilingual synsets to create synonym pairs (or synonym-gloss pairs) across 50 languages and then subject the MMTs (mBERT and XLM-R) to a lexical specialization procedure guided by a contrastive objective. We show that such massively multilingual lexical specialization brings substantial gains in two standard cross-lingual lexical tasks, bilingual lexicon induction and cross-lingual word similarity, as well as in cross-lingual sentence retrieval. Crucially, we observe gains for languages unseen in specialization, indicating that multilingual lexical specialization enables generalization to languages with no lexical constraints. In a series of subsequent controlled experiments, we show that the number of specialization constraints plays a much greater role than the set of languages from which they originate. @@ -6104,7 +6104,7 @@ EkinAkyurekMassachusetts Institute of Technology AshwinKalyanAllen Institute for Artificial Intelligence (AI2) PeterClarkAllen Institute for AI - Derry TantiWijayaBoston University + Derry TantiWijayaBoston University NiketTandonAllen Institute for Artificial Intelligence 7716-7733 Despite their unprecedented success, even the largest language models make mistakes. Similar to how humans learn and improve using feedback, previous work proposed providing language models with natural language feedback to guide them in repairing their outputs. Because human-generated critiques are expensive to obtain, researchers have devised learned critique generators in lieu of human critics while assuming one can train downstream models to utilize generated feedback. However, this approach does not apply to black-box or limited access models such as ChatGPT, as they cannot be fine-tuned. Moreover, in the era of large general-purpose language agents, fine-tuning is neither computationally nor spatially efficient as it results in multiple copies of the network. In this work, we introduce RL4F (Reinforcement Learning for Feedback), a multi-agent collaborative framework where the critique generator is trained to maximize end-task performance of GPT-3, a fixed model more than 200 times its size. RL4F produces critiques that help GPT-3 revise its outputs. We study three datasets for action planning, summarization and alphabetization and show relative improvements up to 10% in multiple text similarity metrics over other learned, retrieval-augmented or prompting-based critique generators. @@ -6155,7 +6155,7 @@ Modeling Structural Similarities between Documents for Coherence Assessment with Graph Convolutional Networks - WeiLiuHeidelberg Institute for Theoretical Studies + WeiLiuHeidelberg Institute for Theoretical Studies XiyanFuHeidelberg University MichaelStrubeHeidelberg Institute for Theoretical Studies 7792-7808 @@ -6168,7 +6168,7 @@ <fixed-case>H</fixed-case>i<fixed-case>TIN</fixed-case>: Hierarchy-aware Tree Isomorphism Network for Hierarchical Text Classification HeZhuBeihang University - ChongZhangBeihang University + ChongZhangBeihang University JunjieHuangThe Chinese University of Hong Kong JunranWuState Key Lab of Software Development Environment, Beihang University KeXuBeihang University @@ -6315,7 +6315,7 @@ JianguoWeiTianjin University MeishanZhangHarbin Institute of Technology (Shenzhen), China MinZhangHarbin Institute of Technology (Shenzhen) - Tat-SengChuaNational University of Singapore + Tat-SengChuaNational University of Singapore 7960-7977 Visual spatial description (VSD) aims to generate texts that describe the spatial relations of the given objects within images. Existing VSD work merely models the 2D geometrical vision features, thus inevitably falling prey to the problem of skewed spatial understanding of target objects. In this work, we investigate the incorporation of 3D scene features for VSD. With an external 3D scene extractor, we obtain the 3D objects and scene features for input images, based on which we construct a target object-centered 3D spatial scene graph (Go3D-S2G), such that we model the spatial semantics of target objects within the holistic 3D scenes. Besides, we propose a scene subgraph selecting mechanism, sampling topologically-diverse subgraphs from Go3D-S2G, where the diverse local structure features are navigated to yield spatially-diversified text generation. Experimental results on two VSD datasets demonstrate that our framework outperforms the baselines significantly, especially improving on the cases with complex visual spatial relations. Meanwhile, our method can produce more spatially-diversified generation. 2023.acl-long.442 @@ -6327,7 +6327,7 @@ YuanchiZhangTsinghua University PengLiInstitute for AI Industry Research (AIR), Tsinghua University, China MaosongSunTsinghua University - YangLiuTsinghua University + YangLiuTsinghua University 7978-7996 While many parallel corpora are not publicly accessible for data copyright, data privacy and competitive differentiation reasons, trained translation models are increasingly available on open platforms. In this work, we propose a method called continual knowledge distillation to take advantage of existing translation models to improve one model of interest. The basic idea is to sequentially transfer knowledge from each trained model to the distilled model. Extensive experiments on Chinese-English and German-English datasets show that our method achieves significant and consistent improvements over strong baselines under both homogeneous and heterogeneous trained model settings and is robust to malicious models. 2023.acl-long.443 @@ -6337,9 +6337,9 @@ Query Refinement Prompts for Closed-Book Long-Form <fixed-case>QA</fixed-case> - Reinald KimAmplayoGoogle + Reinald KimAmplayoGoogle KellieWebsterGoogle - MichaelCollinsColumbia University/Google + MichaelCollinsColumbia University/Google DipanjanDasGoogle Research ShashiNarayanGoogle 7997-8012 @@ -6403,7 +6403,7 @@ YufengChenBeijing Jiaotong University WenjuanHanBeijing Jiaotong University JianLiuBeijing Jiaotong University - JinanXuBeijing Jiaotong University + JinanXuBeijing Jiaotong University 8062-8079 Knowledge distillation (KD) is a promising technique for model compression in neural machine translation. However, where the knowledge hides in KD is still not clear, which may hinder the development of KD. In this work, we first unravel this mystery from an empirical perspective and show that the knowledge comes from the top-1 predictions of teachers, which also helps us build a potential connection between word- and sequence-level KD. Further, we point out two inherent issues in vanilla word-level KD based on this finding. Firstly, the current objective of KD spreads its focus to whole distributions to learn the knowledge, yet lacks special treatment on the most crucial top-1 information. Secondly, the knowledge is largely covered by the golden information due to the fact that most top-1 predictions of teachers overlap with ground-truth tokens, which further restricts the potential of KD. To address these issues, we propose a new method named Top-1 Information Enhanced Knowledge Distillation (TIE-KD). Specifically, we design a hierarchical ranking loss to enforce the learning of the top-1 information from the teacher. Additionally, we develop an iterative KD procedure to infuse more additional knowledge by distilling on the data without ground-truth targets. Experiments on WMT’14 English-German, WMT’14 English-French and WMT’16 English-Romanian demonstrate that our method can respectively boost Transformer_{base} students by +1.04, +0.60 and +1.11 BLEU scores and significantly outperforms the vanilla word-level KD baseline. Besides, our method shows higher generalizability on different teacher-student capacity gaps than existing KD techniques. 2023.acl-long.448 @@ -6491,7 +6491,7 @@ <fixed-case>F</fixed-case>i<fixed-case>D</fixed-case>-<fixed-case>ICL</fixed-case>: A Fusion-in-Decoder Approach for Efficient In-Context Learning QinyuanYeUniversity of Southern California IzBeltagyAllen Institute for AI (AI2) - MatthewPetersAllen Institute for Artificial Intelligence + MatthewPetersAllen Institute for Artificial Intelligence XiangRenUniversity of Southern California HannanehHajishirziUniversity of Washington 8158-8185 @@ -6521,7 +6521,7 @@ XuxiChenUT Austin TianlongChenUniversity of Texas at Austin WeizhuChenMicrosoft - Ahmed HassanAwadallahMicrosoft Research + Ahmed HassanAwadallahMicrosoft Research ZhangyangWangUT Austin YuChengMicrosoft Research 8208-8222 @@ -6614,7 +6614,7 @@ Factual or Contextual? Disentangling Error Types in Entity Description Generation NavitaGoyalUniversity of Maryland College Park AniNenkovaAdobe Research - HalDaumé IIIUMD + HalDaumé IIIUMD 8322-8340 In the task of entity description generation, given a context and a specified entity, a model must describe that entity correctly and in a contextually-relevant way. In this task, as well as broader language generation tasks, the generation of a nonfactual description (factual error) versus an incongruous description (contextual error) is fundamentally different, yet often conflated. We develop an evaluation paradigm that enables us to disentangle these two types of errors in naturally occurring textual contexts. We find that factuality and congruity are often at odds, and that models specifically struggle with accurate descriptions of entities that are less familiar to people. This shortcoming of language models raises concerns around the trustworthiness of such models, since factual errors on less well-known entities are exactly those that a human reader will not recognize. 2023.acl-long.463 @@ -6627,7 +6627,7 @@ ChiChenTsinghua University PengLiInstitute for AI Industry Research (AIR), Tsinghua University, China MaosongSunTsinghua University - YangLiuTsinghua University + YangLiuTsinghua University 8341-8355 Weakly supervised vision-and-language pre-training (WVLP), which learns cross-modal representations with limited cross-modal supervision, has been shown to effectively reduce the data cost of pre-training while maintaining decent performance on downstream tasks. However, current WVLP methods use only local descriptions of images, i.e., object tags, as cross-modal anchors to construct weakly-aligned image-text pairs for pre-training. This affects the data quality and thus the effectiveness of pre-training. In this paper, we propose to directly take a small number of aligned image-text pairs as anchors, and represent each unaligned image and text by its similarities to these anchors, i.e., relative representations. We build a WVLP framework based on the relative representations, namely RELIT, which collects high-quality weakly-aligned image-text pairs from large-scale image-only and text-only data for pre-training through relative representation-based retrieval and generation. Experiments on four downstream tasks show that RELIT achieves new state-of-the-art results under the weakly supervised setting. 2023.acl-long.464 @@ -6657,7 +6657,7 @@ <fixed-case>A</fixed-case>rg<fixed-case>U</fixed-case>: A Controllable Factual Argument Generator SougataSahaState University of New York at Buffalo - RohiniSrihariUniversity at Buffalo, SUNY + RohiniSrihariUniversity at Buffalo, SUNY 8373-8388 Effective argumentation is essential towards a purposeful conversation with a satisfactory outcome. For example, persuading someone to reconsider smoking might involve empathetic, well founded arguments based on facts and expert opinions about its ill-effects and the consequences on one’s family. However, the automatic generation of high-quality factual arguments can be challenging. Addressing existing controllability issues can make the recent advances in computational models for argument generation a potential solution. In this paper, we introduce ArgU: a neural argument generator capable of producing factual arguments from input facts and real-world concepts that can be explicitly controlled for stance and argument structure using Walton’s argument scheme-based control codes. Unfortunately, computational argument generation is a relatively new field and lacks datasets conducive to training. Hence, we have compiled and released an annotated corpora of 69,428 arguments spanning six topics and six argument schemes, making it the largest publicly available corpus for identifying argument schemes; the paper details our annotation and dataset creation framework. We further experiment with an argument generation strategy that establishes an inference strategy by generating an “argument template” before actual argument generation. Our results demonstrate that it is possible to automatically generate diverse arguments exhibiting different inference patterns for the same set of facts by using control codes based on argument schemes and stance. 2023.acl-long.466 @@ -6669,7 +6669,7 @@ Learning Answer Generation using Supervision from Automatic Question Answering Evaluators MatteoGabburoUniversity of Trento SiddhantGargAmazon Alexa AI - RikKoncel-KedziorskiKensho Technologies + RikKoncel-KedziorskiKensho Technologies AlessandroMoschittiAmazon 8389-8403 Recent studies show that sentence-level extractive QA, i.e., based on Answer Sentence Selection (AS2), is outperformed by Generation-based QA (GenQA) models, which generate answers using the top-k answer sentences ranked by AS2 models (a la retrieval-augmented generation style). In this paper, we propose a novel training paradigm for GenQA using supervision from automatic QA evaluation models (GAVA). Specifically, we propose three strategies to transfer knowledge from these QA evaluation models to a GenQA model: (i) augmenting training data with answers generated by the GenQA model and labelled by GAVA (either statically, before training, or (ii) dynamically, at every training epoch); and (iii) using the GAVA score for weighting the generator loss during the learning of the GenQA model. We evaluate our proposed methods on two academic and one industrial dataset, obtaining a significant improvement in answering accuracy over the previous state of the art. @@ -6710,7 +6710,7 @@ SubhajitChaudhuryIBM Research TahiraNaseemIBM Research AI RamonFernandez AstudilloIBM Research - AchilleFokoueIBM Research + AchilleFokoueIBM Research TimKlingerIBM Research AI 8434-8448 Nearly all general-purpose neural semantic parsers generate logical forms in a strictly top-down autoregressive fashion. Though such systems have achieved impressive results across a variety of datasets and domains, recent works have called into question whether they are ultimately limited in their ability to compositionally generalize. In this work, we approach semantic parsing from, quite literally, the opposite direction; that is, we introduce a neural semantic parsing generation method that constructs logical forms from the bottom up, beginning from the logical form’s leaves. The system we introduce is lazy in that it incrementally builds up a set of potential semantic parses, but only expands and processes the most promising candidate parses at each generation step. Such a parsimonious expansion scheme allows the system to maintain an arbitrarily large set of parse hypotheses that are never realized and thus incur minimal computational overhead. We evaluate our approach on compositional generalization; specifically, on the challenging CFQ dataset and two other Text-to-SQL datasets where we show that our novel, bottom-up semantic parsing technique outperforms general-purpose semantic parsers while also being competitive with semantic parsers that have been tailored to each task. @@ -6737,7 +6737,7 @@ (<fixed-case>QA</fixed-case>)<tex-math>^2</tex-math>: Question Answering with Questionable Assumptions NajoungKimBoston University Phu MonHtutAWS AI Labs - Samuel R.BowmanNew York University + Samuel R.BowmanNew York University JacksonPettyNew York University 8466-8487 Naturally occurring information-seeking questions often contain questionable assumptions—assumptions that are false or unverifiable. Questions containing questionable assumptions are challenging because they require a distinct answer strategy that deviates from typical answers for information-seeking questions. For instance, the question “When did Marie Curie discover Uranium?” cannot be answered as a typical “when” question without addressing the false assumption “Marie Curie discovered Uranium”. In this work, we propose (QA)2 (Question Answering with Questionable Assumptions), an open-domain evaluation dataset consisting of naturally occurring search engine queries that may or may not contain questionable assumptions. To be successful on (QA)2, systems must be able to detect questionable assumptions and also be able to produce adequate responses for both typical information-seeking questions and ones with questionable assumptions. Through human rater acceptability on end-to-end QA with (QA)2, we find that current models do struggle with handling questionable assumptions, leaving substantial headroom for progress. @@ -6761,7 +6761,7 @@ Targeted Data Generation: Finding and Fixing Model Weaknesses ZexueHeUniversity of California, San Diego - Marco TulioRibeiroMicrosoft Research + Marco TulioRibeiroMicrosoft Research FereshteKhaniMicrosoft 8506-8520 Even when aggregate accuracy is high, state-of-the-art NLP models often fail systematically on specific subgroups of data, resulting in unfair outcomes and eroding user trust. Additional data collection may not help in addressing these weaknesses, as such challenging subgroups may be unknown to users, and underrepresented in the existing and new data. We propose Targeted Data Generation (TDG), a framework that automatically identifies challenging subgroups, and generates new data for those subgroups using large language models (LLMs) with a human in the loop. TDG estimates the expected benefit and potential harm of data augmentation for each subgroup, and selects the ones most likely to improve within-group performance without hurting overall performance. In our experiments, TDG significantly improves the accuracy on challenging subgroups for state-of-the-art sentiment analysis and natural language inference models, while also improving overall test accuracy. @@ -6787,7 +6787,7 @@ JunnanZhuInstitute of Automation, Chinese Academy of Sciences HaitaoLinNational Laboratory of Pattern Recognition, Institute of Automation, CAS YuZhouCASIA - ChengqingZongInstitute of Automation, Chinese Academy of Sciences + ChengqingZongInstitute of Automation, Chinese Academy of Sciences 8538-8553 Multimodal summarization usually suffers from the problem that the contribution of the visual modality is unclear. Existing multimodal summarization approaches focus on designing the fusion methods of different modalities, while ignoring the adaptive conditions under which visual modalities are useful. Therefore, we propose a novel Coarse-to-Fine contribution network for multimodal Summarization (CFSum) to consider different contributions of images for summarization. First, to eliminate the interference of useless images, we propose a pre-filter module to abandon useless images. Second, to make accurate use of useful images, we propose two levels of visual complement modules, word level and phrase level. Specifically, image contributions are calculated and are adopted to guide the attention of both textual and visual modalities. Experimental results have shown that CFSum significantly outperforms multiple strong baselines on the standard benchmark. Furthermore, the analysis verifies that useful images can even help generate non-visual words which are implicitly represented in the image. 2023.acl-long.476 @@ -6800,9 +6800,9 @@ Made NindyatamaNityasyaIndependent HaryoWibowoIndependent Alham FikriAjiMBZUAI - GentaWinataBloomberg + GentaWinataBloomberg Radityo EkoPrasojoPitik.id - PhilBlunsomUniversity of Oxford + PhilBlunsomUniversity of Oxford AdhigunaKuncoroUniversity of Oxford and DeepMind 8554-8572 This evidence-based position paper critiques current research practices within the language model pre-training literature. Despite rapid recent progress afforded by increasingly better pre-trained language models (PLMs), current PLM research practices often conflate different possible sources of model improvement, without conducting proper ablation studies and principled comparisons between different models under comparable conditions. These practices (i) leave us ill-equipped to understand which pre-training approaches should be used under what circumstances; (ii) impede reproducibility and credit assignment; and (iii) render it difficult to understand: “How exactly does each factor contribute to the progress that we have today?” We provide a case in point by revisiting the success of BERT over its baselines, ELMo and GPT-1, and demonstrate how — under comparable conditions where the baselines are tuned to a similar extent — these baselines (and even-simpler variants thereof) can, in fact, achieve competitive or better performance than BERT. These findings demonstrate how disentangling different factors of model improvements can lead to valuable new insights. We conclude with recommendations for how to encourage and incentivize this line of work, and accelerate progress towards a better and more systematic understanding of what factors drive the progress of our foundation models today. @@ -6943,12 +6943,12 @@ PepaAtanasovaUniversity of Copenhagen TodorMihaylovMeta AI GaliaAngelovaInstitute of Information and communication Technologies, Bulgarian Academy of Sciences - KirilSimovArtificial Intelligence and Language Technologies Department, IICT, Bulgarian Academy of Sciences + KirilSimovArtificial Intelligence and Language Technologies Department, IICT, Bulgarian Academy of Sciences PetyaOsenovaSofia University “St. Kl. Ohridski” and IICT-BAS VeselinStoyanovFacebook IvanKoychevSofia University “St. Kliment Ohridski” - PreslavNakovMohamed bin Zayed University of Artificial Intelligence - DragomirRadevYale University + PreslavNakovMohamed bin Zayed University of Artificial Intelligence + DragomirRadevYale University 8733-8759 We present bgGLUE (Bulgarian General Language Understanding Evaluation), a benchmark for evaluating language models on Natural Language Understanding (NLU) tasks in Bulgarian. Our benchmark includes NLU tasks targeting a variety of NLP problems (e.g., natural language inference, fact-checking, named entity recognition, sentiment analysis, question answering, etc.) and machine learning tasks (sequence labeling, document-level classification, and regression). We run the first systematic evaluation of pre-trained language models for Bulgarian, comparing and contrasting results across the nine tasks in the benchmark. The evaluation results show strong performance on sequence labeling tasks, but there is a lot of room for improvement for tasks that require more complex reasoning. We make bgGLUE publicly available together with the fine-tuning and the evaluation code, as well as a public leaderboard at https://bgglue.github.io, and we hope that it will enable further advancements in developing NLU models for Bulgarian. 2023.acl-long.487 @@ -6975,7 +6975,7 @@ RuixiangCuiUniversity of Copenhagen SeolhwaLeeUniversity of Copenhagen DanielHershcovichUniversity of Copenhagen - AndersSøgaardUniversity of Copenhagen + AndersSøgaardUniversity of Copenhagen 8786-8800 Humans can effortlessly understand the coordinate structure of sentences such as “Niels Bohr and Kurt Cobain were born in Copenhagen and Seattle, *respectively*”. In the context of natural language inference (NLI), we examine how language models (LMs) reason with respective readings (Gawron and Kehler, 2004) from two perspectives: syntactic-semantic and commonsense-world knowledge. We propose a controlled synthetic dataset WikiResNLI and a naturally occurring dataset NatResNLI to encompass various explicit and implicit realizations of “respectively”. We show that fine-tuned NLI models struggle with understanding such readings without explicit supervision. While few-shot learning is easy in the presence of explicit cues, longer training is required when the reading is evoked implicitly, leaving models to rely on common sense inferences. Furthermore, our fine-grained analysis indicates models fail to generalize across different constructions. To conclude, we demonstrate that LMs still lag behind humans in generalizing to the long tail of linguistic constructions. 2023.acl-long.489 @@ -7174,7 +7174,7 @@ <fixed-case>M</fixed-case>ix<fixed-case>CE</fixed-case>: Training Autoregressive Language Models by Mixing Forward and Reverse Cross-Entropies ShiyueZhangThe University of North Carolina at Chapel Hill ShijieWuBloomberg L.P. - OzanIrsoyBloomberg + OzanIrsoyBloomberg StevenLuBloomberg MohitBansalUniversity of North Carolina at Chapel Hill MarkDredzeJohns Hopkins University @@ -7208,7 +7208,7 @@ JustineKaoMeta AI AlexandreMourachkoMeta AI HolgerSchwenkMeta AI Research - Marta R.Costa-jussàMeta AI + Marta R.Costa-jussàMeta AI 9064-9079 End-to-End speech-to-speech translation (S2ST) is generally evaluated with text-based metrics. This means that generated speech has to be automatically transcribed, making the evaluation dependent on the availability and quality of automatic speech recognition (ASR) systems. In this paper, we propose a text-free evaluation metric for end-to-end S2ST, named BLASER, to avoid the dependency on ASR systems. BLASER leverages a multilingual multimodal encoder to directly encode the speech segments for source input, translation output and reference into a shared embedding space and computes a score of the translation quality that can be used as a proxy to human evaluation. To evaluate our approach, we construct training and evaluation sets from more than 40k human annotations covering seven language directions. The best results of BLASER are achieved by training with supervision from human rating scores. We show that when evaluated at the sentence level, BLASER correlates significantly better with human judgment compared to ASR dependent metrics including ASR-SENTBLEU in all translation directions and ASR-COMET in five of them. Our analysis shows combining speech and text as inputs to BLASER does not increase the correlation with human scores, but best correlations are achieved when using speech, which motivates the goal of our research. Moreover, we show that using ASR for references is detrimental for text-based metrics. 2023.acl-long.504 @@ -7237,7 +7237,7 @@ Backpack Language Models JohnHewittStanford University JohnThickstunStanford University - ChristopherManningStanford University + ChristopherManningStanford University PercyLiangStanford University 9103-9125 We present Backpacks: a new neural architecture that marries strong modeling performancewith an interface for interpretability and control. Backpacks learn multiple non-contextual sense vectors for each word in a vocabulary, and represent a word in a sequence as a context-dependent, non-negative linear combination ofsense vectors in this sequence. We find that, after training, sense vectors specialize, each encoding a different aspect of a word. We can interpret a sense vector by inspecting its (non-contextual, linear) projection onto the output space, and intervene on these interpretable hooks to change the model’s behavior in predictable ways. We train a 170M-parameter Backpack language model on OpenWebText, matching the loss of a GPT-2 small (124Mparameter) Transformer. On lexical similarity evaluations, we find that Backpack sense vectors outperform even a 6B-parameter Transformer LM’s word embeddings. Finally, we present simple algorithms that intervene on sense vectors to perform controllable text generation and debiasing. For example, we can edit the sense vocabulary to tend more towards a topic, or localize a source of gender bias to a sense vector and globally suppress that sense. @@ -7294,7 +7294,7 @@ Translation-Enhanced Multilingual Text-to-Image Generation YaoyiranLiUniversity of Cambridge - Ching-YunChangAmazon.com + Ching-YunChangAmazon.com StephenRawlsAmazon IvanVulićUniversity of Cambridge AnnaKorhonenUniversity of Cambridge @@ -7341,7 +7341,7 @@ WenboZhaoAmazon YiwenChenUniversity of Cambridge TagyoungChungAmazon Alexa AI - JingHuangAmazon + JingHuangAmazon NanyunPengUniversity of California, Los Angeles 9235-9254 Automatic melody-to-lyric generation is a task in which song lyrics are generated to go with a given melody. It is of significant practical interest and more challenging than unconstrained lyric generation as the music imposes additional constraints onto the lyrics. The training data is limited as most songs are copyrighted, resulting in models that underfit the complicated cross-modal relationship between melody and lyrics. In this work, we propose a method for generating high-quality lyrics without training on any aligned melody-lyric data. Specifically, we design a hierarchical lyric generation framework that first generates a song outline and second the complete lyrics. The framework enables disentanglement of training (based purely on text) from inference (melody-guided text generation) to circumvent the shortage of parallel data. We leverage the segmentation and rhythm alignment between melody and lyrics to compile the given melody into decoding constraints as guidance during inference. The two-step hierarchical design also enables content control via the lyric outline, a much-desired feature for democratizing collaborative song creation. Experimental results show that our model can generate high-quality lyrics that are more on-topic, singable, intelligible, and coherent than strong baselines, for example SongMASS, a SOTA model trained on a parallel dataset, with a 24% relative overall quality improvement based on human ratings. Our code is available at https://github.com/amazon-science/unsupervised-melody-to-lyrics-generation. @@ -7468,7 +7468,7 @@ AdityaYedetoreBoston University TalLinzenNew York University RobertFrankYale University - R. ThomasMcCoyPrinceton University + R. ThomasMcCoyPrinceton University 9370-9393 When acquiring syntax, children consistently choose hierarchical rules over competing non-hierarchical possibilities. Is this preference due to a learning bias for hierarchical structure, or due to more general biases that interact with hierarchical cues in children’s linguistic input? We explore these possibilities by training LSTMs and Transformers - two types of neural networks without a hierarchical bias - on data similar in quantity and content to children’s linguistic input: text from the CHILDES corpus. We then evaluate what these models have learned about English yes/no questions, a phenomenon for which hierarchical structure is crucial. We find that, though they perform well at capturing the surface statistics of child-directed speech (as measured by perplexity), both model types generalize in a way more consistent with an incorrect linear rule than the correct hierarchical rule. These results suggest that human-like generalization from text alone requires stronger biases than the general sequence-processing biases of standard neural network architectures. 2023.acl-long.521 @@ -7524,14 +7524,14 @@ Open Set Relation Extraction via Unknown-Aware Training JunZhaoFudan University - XinZhaoFudan University + XinZhaoFudan University WenYuZhanFuDan University QiZhangFudan University TaoGuifudan university ZhongyuWeiSchool of Data Science, Fudan University Yun WenChenDataGrand Inc. XiangGaoDataGrand Inc. - XuanjingHuangFudan University + XuanjingHuangFudan University 9453-9467 The existing supervised relation extraction methods have achieved impressive performance in a closed-set setting, in which the relations remain the same during both training and testing. In a more realistic open-set setting, unknown relations may appear in the test set. Due to the lack of supervision signals from unknown relations, a well-performing closed-set relation extractor can still confidently misclassify them into known relations. In this paper, we propose an unknown-aware training method, regularizing the model by dynamically synthesizing negative instances that can provide the missing supervision signals. Inspired by text adversarial attack, We adaptively apply small but critical perturbations to original training data,synthesizing difficult enough negative instances that are mistaken by the model as known relations, thus facilitating a compact decision boundary. Experimental results show that our method achieves SOTA unknown relation detection without compromising the classification of known relations. 2023.acl-long.525 @@ -7545,7 +7545,7 @@ YushuoChenRENMIN UNIVERSITY of CHINA YifanDuGaoling School of Artificial Intelligence, Renmin University of China JunyiLiGaoling School of Artificial Intelligence, Renmin University of China - Wayne XinZhaoRUC + Wayne XinZhaoRUC Ji-RongWenRenmin University of China 9468-9481 People often imagine relevant scenes to aid in the writing process. In this work, we aim to utilize visual information for composition in the same manner as humans. We propose a method, LIVE, that makes pre-trained language models (PLMs) Learn to Imagine for Visually-augmented natural language gEneration. First, we imagine the scene based on the text: we use a diffusion model to synthesize high-quality images conditioned on the input texts. Second, we use CLIP to determine whether the text can evoke the imagination in a posterior way. Finally, our imagination is dynamic, and we conduct synthesis for each sentence rather than generate only one image for an entire paragraph. Technically, we propose a novel plug-and-play fusion layer to obtain visually-augmented representations for each text. Our vision-text fusion layer is compatible with Transformer-based architecture. We have conducted extensive experiments on four generation tasks using BART and T5, and the automatic results and human evaluation demonstrate the effectiveness of our proposed method. We will release the code, model, and data at the link: https://github.com/RUCAIBox/LIVE. @@ -7588,9 +7588,9 @@ The Best of Both Worlds: Combining Human and Machine Translations for Multilingual Semantic Parsing with Active Learning ZhuangLiMonash University LizhenQuMonash University - PhilipCohenOpenstream, Inc. + PhilipCohenOpenstream, Inc. RajTumuluriOpenstream.ai - GholamrezaHaffariMonash University + GholamrezaHaffariMonash University 9511-9528 Multilingual semantic parsing aims to leverage the knowledge from the high-resource languages to improve low-resource semantic parsing, yet commonly suffers from the data imbalance problem. Prior works propose to utilize the translations by either humans or machines to alleviate such issues. However, human translations are expensive, while machine translations are cheap but prone to error and bias. In this work, we propose an active learning approach that exploits the strengths of both human and machine translations by iteratively adding small batches of human translations into the machine-translated training set. Besides, we propose novel aggregated acquisition criteria that help our active learning method select utterances to be manually translated. Our experiments demonstrate that an ideal utterance selection can significantly reduce the error and bias in the translated data, resulting in higher parser accuracies than the parsers merely trained on the machine-translated data. 2023.acl-long.529 @@ -7626,7 +7626,7 @@ Document-Level Event Argument Extraction With a Chain Reasoning Paradigm JianLiuBeijing Jiaotong University ChenLiangBeijing Jiaotong University - JinanXuBeijing Jiaotong University + JinanXuBeijing Jiaotong University HaoyanLiuBeihang University ZheZhaoTencent 9570-9583 @@ -7794,7 +7794,7 @@ LucasTorroba HennigenMassachusetts Institute of Technology TiagoPimentelUniversity of Cambridge ClaraMeisterETH Zurich - JasonEisnerJohns Hopkins University + Microsoft Corporation + JasonEisnerJohns Hopkins University + Microsoft Corporation RyanCotterellETH Zürich 9744-9770 Language modeling, a central task in natural language processing, involves estimating a probability distribution over strings. In most cases, the estimated distribution sums to 1 over all finite strings. However, in some pathological cases, probability mass can “leak” onto the set of infinite sequences. In order to characterize the notion of leakage more precisely, this paper offers a measure-theoretic treatment of language modeling. We prove that many popular language model families are in fact tight, meaning that they will not leak in this sense. We also generalize characterizations of tightness proposed in previous works. @@ -7879,13 +7879,13 @@ Automated Metrics for Medical Multi-Document Summarization Disagree with Human Evaluations - Lucy LuWangUniversity of Washington - YuliaOtmakhovaUniversity of Melbourne + Lucy LuWangUniversity of Washington + YuliaOtmakhovaUniversity of Melbourne JayDeYoungNortheastern University Thinh HungTruongThe University of Melbourne BaileyKuehlAllen Institute for AI ErinBransomAllen Institute for AI - ByronWallaceNortheastern University + ByronWallaceNortheastern University 9871-9889 Evaluating multi-document summarization (MDS) quality is difficult. This is especially true in the case of MDS for biomedical literature reviews, where models must synthesize contradicting evidence reported across different documents. Prior work has shown that rather than performing the task, models may exploit shortcuts that are difficult to detect using standard n-gram similarity metrics such as ROUGE. Better automated evaluation metrics are needed, but few resources exist to assess metrics when they are proposed. Therefore, we introduce a dataset of human-assessed summary quality facets and pairwise preferences to encourage and support the development of better automated evaluation methods for literature review MDS. We take advantage of community submissions to the Multi-document Summarization for Literature Review (MSLR) shared task to compile a diverse and representative sample of generated summaries. We analyze how automated summarization evaluation metrics correlate with lexical features of generated summaries, to other automated metrics including several we propose in this work, and to aspects of human-assessed summary quality. We find that not only do automated metrics fail to capture aspects of quality as assessed by humans, in many cases the system rankings produced by these metrics are anti-correlated with rankings according to human annotators. 2023.acl-long.549 @@ -7941,7 +7941,7 @@ JunkaiZhouInstitute of Computing Technology, Chinese Academy of Sciences LiangPangInstitute of Computing Technology of Chinese Academy of Sciences HuaweiShenInstitute of Computing Technology, Chinese Academy of Sciences - XueqiChengInstitute of Computing Technology, CAS + XueqiChengInstitute of Computing Technology, CAS 9945-9959 Language models trained on large-scale corpora can generate remarkably fluent results in open-domain dialogue. However, for the persona-based dialogue generation task, consistency and coherence are also key factors, which are great challenges for language models. Existing works mainly focus on valuable data filtering, model structure modifying, or objective function designing, while their improvements are limited and hard to generalize to all types of pre-trained language models. However, we find that language models can produce consistent and coherent responses if we consider enough generations. Thus, the problems lay in large-scale response generation and target response selection. In this work, a simple but effective two-stage SimOAP strategy is proposed, i.e., over-sampling and post-evaluation. The over-sampling stage takes large-scale responses from existing trained models efficiently via off-the-shelf distilling and compressing methods, and the post-evaluation stage selects a good response based on multiple well-designed evaluation metrics from large-scale candidates. Experimental results show that the proposed plug-in SimOAP strategy improves the backbone models and outperforms the baseline strategies in both automatic and human evaluations. 2023.acl-long.553 @@ -8139,7 +8139,7 @@ ShaneStorksUniversity of Michigan KeunwooYuUniversity Of Michigan ZiqiaoMaUniversity of Michigan - JoyceChaiUniversity of Michigan + JoyceChaiUniversity of Michigan 10199-10219 As natural language processing (NLP) has recently seen an unprecedented level of excitement, and more people are eager to enter the field, it is unclear whether current research reproducibility efforts are sufficient for this group of beginners to apply the latest developments. To understand their needs, we conducted a study with 93 students in an introductory NLP course, where students reproduced the results of recent NLP papers. Surprisingly, we find that their programming skill and comprehension of research papers have a limited impact on their effort spent completing the exercise. Instead, we find accessibility efforts by research authors to be the key to success, including complete documentation, better coding practice, and easier access to data files. Going forward, we recommend that NLP researchers pay close attention to these simple aspects of open-sourcing their work, and use insights from beginners’ feedback to provide actionable ideas on how to better support them. 2023.acl-long.568 @@ -8194,8 +8194,8 @@ ShujianHuangNational Key Laboratory for Novel Software Technology, Nanjing University WeiZouNanjing University JianbingZhangNanjing University - XinyuDaiNational Key Laboratory for Novel Software Technology, Nanjing University - JiajunChenNanjing University + XinyuDaiNational Key Laboratory for Novel Software Technology, Nanjing University + JiajunChenNanjing University 10270-10287 In recent years, deep neural networks (DNNs) have achieved state-of-the-art performance on a wide range of tasks. However, limitations in interpretability have hindered their applications in the real world. This work proposes to interpret neural networks by linear decomposition and finds that the ReLU-activated Transformer can be considered as a linear model on a single input. We further leverage the linearity of the model and propose a linear decomposition of the model output to generate local explanations. Our evaluation of sentiment classification and machine translation shows that our method achieves competitive performance in efficiency and fidelity of explanation. In addition, we demonstrate the potential of our approach in applications with examples of error analysis on multiple tasks. 2023.acl-long.572 @@ -8344,7 +8344,7 @@ ArnavMhaskeIndian Institute of Technology, Madras HarshitKediaIndian Institute of Technology Madras SumanthDoddapaneniIndian Institute of Technology Madras - Mitesh M.KhapraIndian Institute of Technology Madras + Mitesh M.KhapraIndian Institute of Technology Madras PratyushKumarIIT Madras RudraMurthyIBM India Research Limited AnoopKunchukuttanMicrosoft AI and Research @@ -8359,7 +8359,7 @@ <fixed-case>CREPE</fixed-case>: Open-Domain Question Answering with False Presuppositions XinyanYuUniversity of Washington SewonMinUniversity of Washington - LukeZettlemoyerUniversity of Washington; Meta + LukeZettlemoyerUniversity of Washington; Meta HannanehHajishirziUniversity of Washington 10457-10480 When asking about unfamiliar topics, information seeking users often pose questions with false presuppositions. Most existing question answering (QA) datasets, in contrast, assume all questions have well defined answers. We introduce CREPE, a QA dataset containing a natural distribution of presupposition failures from online information-seeking forums. We find that 25% of questions contain false presuppositions, and provide annotations for these presuppositions and their corrections. Through extensive baseline experiments, we show that adaptations of existing open-domain QA models can find presuppositions moderately well, but struggle when predicting whether a presupposition is factually correct. This is in large part due to difficulty in retrieving relevant evidence passages from a large text corpus. CREPE provides a benchmark to study question answering in the wild, and our analyses provide avenues for future work in better modeling and further studying the task. @@ -8403,7 +8403,7 @@ Multilingual Knowledge Graph Completion with Language-Sensitive Multi-Graph Attention RongchuanTangInstitute of Automation, Chinese Academy of Sciences YangZhaoInstitute of Automation, Chinese Academy of Sciences - ChengqingZongInstitute of Automation, Chinese Academy of Sciences + ChengqingZongInstitute of Automation, Chinese Academy of Sciences YuZhouCASIA 10508-10519 Multilingual Knowledge Graph Completion (KGC) aims to predict missing links with multilingual knowledge graphs. However, existing approaches suffer from two main drawbacks: (a) alignment dependency: the multilingual KGC is always realized with joint entity or relation alignment, which introduces additional alignment models and increases the complexity of the whole framework; (b) training inefficiency: the trained model will only be used for the completion of one target KG, although the data from all KGs are used simultaneously. To address these drawbacks, we propose a novel multilingual KGC framework with language-sensitive multi-graph attention such that the missing links on all given KGs can be inferred by a universal knowledge completion model. Specifically, we first build a relational graph neural network by sharing the embeddings of aligned nodes to transfer language-independent knowledge. Meanwhile, a language-sensitive multi-graph attention (LSMGA) is proposed to deal with the information inconsistency among different KGs. Experimental results show that our model achieves significant improvements on the DBP-5L and E-PKG datasets. @@ -8423,7 +8423,7 @@ BudhadityaDebMicrosoft Corporation Yuan-JyueChenMicrosoft TristanNaumannMicrosoft Research - NoémieElhadadColumbia University + NoémieElhadadColumbia University 10520-10542 Summarization models often generate text that is poorly calibrated to quality metrics because they are trained to maximize the likelihood of a single reference (MLE). To address this, recent work has added a calibration step, which exposes a model to its own ranked outputs to improve relevance or, in a separate line of work, contrasts positive and negative sets to improve faithfulness. While effective, much of this work has focused on how to generate and optimize these sets. Less is known about why one setup is more effective than another. In this work, we uncover the underlying characteristics of effective sets. For each training instance, we form a large, diverse pool of candidates and systematically vary the subsets used for calibration fine-tuning. Each selection strategy targets distinct aspects of the sets, such as lexical diversity or the size of the gap between positive and negatives. On three diverse scientific long-form summarization datasets (spanning biomedical, clinical, and chemical domains), we find, among others, that faithfulness calibration is optimal when the negative sets are extractive and more likely to be generated, whereas for relevance calibration, the metric margin between candidates should be maximized and surprise–the disagreement between model and metric defined candidate rankings–minimized. 2023.acl-long.587 @@ -8548,7 +8548,7 @@ PhilippeLabanSalesforce Research JesseVigSalesforce Research WojciechKryscinskiSalesforce Research - ShafiqJotyNanyang Technological University; Salesforce AI Research + ShafiqJotyNanyang Technological University; Salesforce AI Research CaimingXiongSalesforce Chien-ShengWuSalesforce 10674-10695 @@ -8643,13 +8643,13 @@ Modeling What-to-ask and How-to-ask for Answer-unaware Conversational Question Generation - Xuan LongDoNanyang Technological University + Xuan LongDoNanyang Technological University BoweiZouInstitute for Infocomm Research - ShafiqJotyNanyang Technological University; Salesforce AI Research + ShafiqJotyNanyang Technological University; Salesforce AI Research TranTaiMr LiangmingPanUniversity of California, Santa Barbara (UCSB) - NancyChenInstitute for Infocomm Research, A*STAR - Ai TiAwInstitute for Infocomm Research + NancyChenInstitute for Infocomm Research, A*STAR + Ai TiAwInstitute for Infocomm Research 10785-10803 Conversational Question Generation (CQG) is a critical task for machines to assist humans in fulfilling their information needs through conversations. The task is generally cast into two different settings: answer-aware and answer-unaware. While the former facilitates the models by exposing the expected answer, the latter is more realistic and receiving growing attentions recently. What-to-ask and how-to-ask are the two main challenges in the answer-unaware setting. To address the first challenge, existing methods mainly select sequential sentences in context as the rationales. We argue that the conversation generated using such naive heuristics may not be natural enough as in reality, the interlocutors often talk about the relevant contents that are not necessarily sequential in context. Additionally, previous methods decide the type of question to be generated (boolean/span-based) implicitly. Modeling the question type explicitly is crucial as the answer, which hints the models to generate a boolean or span-based question, is unavailable. To this end, we present SG-CQG, a two-stage CQG framework. For the what-to-ask stage, a sentence is selected as the rationale from a semantic graph that we construct, and extract the answer span from it. For the how-to-ask stage, a classifier determines the target answer type of the question via two explicit control signals before generating and filtering. In addition, we propose Conv-Distinct, a novel evaluation metric for CQG, to evaluate the diversity of the generated conversation from a context. Compared with the existing answer-unaware CQG models, the proposed SG-CQG achieves state-of-the-art performance. 2023.acl-long.603 @@ -8729,10 +8729,10 @@ <fixed-case>M</fixed-case>asakha<fixed-case>POS</fixed-case>: Part-of-Speech Tagging for Typologically Diverse <fixed-case>A</fixed-case>frican languages - Cheikh M. BambaDioneGaston Berger University - David IfeoluwaAdelaniUniversity College London + Cheikh M. BambaDioneGaston Berger University + David IfeoluwaAdelaniUniversity College London PeterNabendeMakerere University - JesujobaAlabiSaarland University + JesujobaAlabiSaarland University ThapeloSindaneUniversity of Pretoria, Masakhane HappyBuzaabaRIKEN Shamsuddeen HassanMuhammadBayero University, Kano @@ -8788,7 +8788,7 @@ LongBaiSchool of Computer Science and Technology, University of Chinese Academy of Sciences; CAS Key Laboratory of Network Data Science and Technology, Institute of Computing Technology, Chinese Academy of Sciences SaipingGuanSchool of Computer Science and Technology, University of Chinese Academy of Sciences; CAS Key Laboratory of Network Data Science and Technology, Institute of Computing Technology, Chinese Academy of Sciences JiafengGuoInstitute of Computing Technology, CAS - XueqiChengInstitute of Computing Technology, CAS + XueqiChengInstitute of Computing Technology, CAS 10901-10913 Event Causality Identification (ECI) aims to identify causal relations between events in unstructured texts. This is a very challenging task, because causal relations are usually expressed by implicit associations between events. Existing methods usually capture such associations by directly modeling the texts with pre-trained language models, which underestimate two kinds of semantic structures vital to the ECI task, namely, event-centric structure and event-associated structure. The former includes important semantic elements related to the events to describe them more precisely, while the latter contains semantic paths between two events to provide possible supports for ECI. In this paper, we study the implicit associations between events by modeling the above explicit semantic structures, and propose a Semantic Structure Integration model (SemSIn).It utilizes a GNN-based event aggregator to integrate the event-centric structure information, and employs an LSTM-based path aggregator to capture the event-associated structure information between two events. Experimental results on three widely used datasets show that SemSIn achieves significant improvements over baseline methods. 2023.acl-long.610 @@ -8864,7 +8864,7 @@ I-HungHsuUSC Information Sciences Institute ZhiyuXieTsinghua University Kuan-HaoHuangUniversity of California, Los Angeles - PremNatarajanAmazon / Alexa + PremNatarajanAmazon / Alexa NanyunPengUniversity of California, Los Angeles 10976-10993 Event argument extraction (EAE) identifies event arguments and their specific roles for a given event. Recent advancement in generation-based EAE models has shown great performance and generalizability over classification-based models. However, existing generation-based EAE models mostly focus on problem re-formulation and prompt design, without incorporating additional information that has been shown to be effective for classification-based models, such as the abstract meaning representation (AMR) of the input passages. Incorporating such information into generation-based models is challenging due to the heterogeneous nature of the natural language form prevalently used in generation-based models and the structured form of AMRs. In this work, we study strategies to incorporate AMR into generation-based EAE models. We propose AMPERE, which generates AMR-aware prefixes for every layer of the generation model. Thus, the prefix introduces AMR information to the generation-based EAE model and then improves the generation. We also introduce an adjusted copy mechanism to AMPERE to help overcome potential noises brought by the AMR graph. Comprehensive experiments and analyses on ACE2005 and ERE datasets show that AMPERE can get 4% - 10% absolute F1 score improvements with reduced training data and it is in general powerful across different training sizes. @@ -9005,7 +9005,7 @@ ZhengfuHeFudan University QinZhuFudan University XipengQiuFudan University - XuanjingHuangFudan University + XuanjingHuangFudan University 11156-11172 Prompt tuning is a parameter-efficient approach to adapting pre-trained language models to downstream tasks. Although prompt tuning has been shown to match the performance of full model tuning when training data is sufficient, it tends to struggle in few-shot learning settings. In this paper, we present Multi-task Pre-trained Modular Prompt (MP2) to boost prompt tuning for few-shot learning. MP2 is a set of combinable prompts pre-trained on 38 Chinese tasks. On downstream tasks, the pre-trained prompts are selectively activated and combined, leading to strong compositional generalization to unseen tasks. To bridge the gap between pre-training and fine-tuning, we formulate upstream and downstream tasks into a unified machine reading comprehension task. Extensive experiments under two learning paradigms, i.e., gradient descent and black-box tuning, show that MP2 significantly outperforms prompt tuning, full model tuning, and prior prompt pre-training methods in few-shot settings. In addition, we demonstrate that MP2 can achieve surprisingly fast and strong adaptation to downstream tasks by merely learning 8 parameters to combine the pre-trained modular prompts. 2023.acl-long.625 @@ -9020,7 +9020,7 @@ LinlinLiuNanyang Technological University Yew KenChiaSingapore University of Technology and Design BoyangLiNanyang Technological University - ShafiqJotyNanyang Technological University; Salesforce AI Research + ShafiqJotyNanyang Technological University; Salesforce AI Research LidongBingAlibaba DAMO Academy 11173-11195 Data annotation is the process of labeling data that could be used to train machine learning models. Having high quality annotation is crucial, as it allows the model to learn the relationship between the input data and the desired output. GPT-3, a large-scale language model developed by OpenAI, has demonstrated im- impressive zero- and few-shot performance on a wide range of NLP tasks. It is therefore natural to wonder whether it can be used to effectively annotate data for NLP tasks. In this paper, we evaluate the performance of GPT-3 as a data annotator by comparing it with traditional data annotation methods and analyzing its output on a range of tasks. Through this analysis, we aim to provide insight into the potential of GPT-3 as a general-purpose data annotator in NLP. @@ -9089,7 +9089,7 @@ AkshitaBhagiaAllen Institute for AI YizhongWangUniversity of Washington HannanehHajishirziUniversity of Washington - MatthewPetersAllen Institute for Artificial Intelligence + MatthewPetersAllen Institute for Artificial Intelligence 11272-11288 Recent NLP models have shown the remarkable ability to effectively generalise ‘zero-shot’ to new tasks using only natural language instructions as guidance. However, many of these approaches suffer from high computational costs due to their reliance on concatenating lengthy instructions with every input example, resulting in costly reprocessing of the instruction. To avoid this, we introduce Hypernetworks for INstruction Tuning (HINT), which convert task instructions and examples into parameter-efficient modules inserted into an underlying model using a pretrained text encoder, eliminating the need to include instructions in the model input. The hypernetwork in HINT also produces an encoded instruction, which we concatenate with encoded inputs during decoding to further improve performance. HINT models outperform strong state-of-the-art baselines by over 10% when controlling for compute (measured in FLOPs). By converting instructions into modules, HINT models can effectively disregard the length of instructions and few-shot example inputs in terms of compute usage. As a result, HINT can enhance its performance by up to 25% by incorporating additional few-shot data, while utilizing only up to 5% more compute. This combines the strengths of parameter-efficient fine-tuning and in-context learning. 2023.acl-long.631 @@ -9256,7 +9256,7 @@ Contrastive Error Attribution for Finetuned Language Models FaisalLadhakColumbia University EsinDurmusStanford University - TatsunoriHashimotoStanford + TatsunoriHashimotoStanford 11482-11498 Recent work has identified noisy and misannotated data as a core cause of hallucinations and unfaithful outputs in Natural Language Generation (NLG) tasks. Consequently, identifying and removing these examples is a key open challenge in creating reliable NLG systems. In this work, we introduce a framework to identify and remove low-quality training instances that lead to undesirable outputs, such as faithfulness errors in text summarization. We show that existing approaches for error tracing, such as gradient-based influence measures, do not perform reliably for detecting faithfulness errors in NLG datasets. We overcome the drawbacks of existing error tracing methods through a new, contrast-based estimate that compares undesired generations to human-corrected outputs. Our proposed method can achieve a mean average precision of 0.93 at detecting known data errors across synthetic tasks with known ground truth, substantially outperforming existing approaches. Using this approach and re-training models on cleaned data leads to a 70% reduction in entity hallucinations on the NYT dataset and a 55% reduction in semantic errors on the E2E dataset. 2023.acl-long.643 @@ -9400,16 +9400,16 @@ HaileySchoelkopfEleutherAI NiklasMuennighoffHugging Face Alham FikriAjiMBZUAI - David IfeoluwaAdelaniUniversity College London + David IfeoluwaAdelaniUniversity College London KhalidAlmubarakPrince Sattam bin Abdulaziz University M SaifulBariNanyang Technological University LintangSutawikaDatasaur.ai JungoKasaiUniversity of Washington AhmedBaruwaUniversity of Oregon - GentaWinataBloomberg + GentaWinataBloomberg StellaBidermanEleutherAI EdwardRaffBooz Allen Hamilton - DragomirRadevYale University + DragomirRadevYale University VassilinaNikoulinaNaver Labs Europe 11682-11703 The BLOOM model is a large publicly available multilingual language model, but its pretraining was limited to 46 languages. To extend the benefits of BLOOM to other languages without incurring prohibitively large costs, it is desirable to adapt BLOOM to new languages not seen during pretraining. In this work, we apply existing language adaptation strategies to BLOOM and benchmark its zero-shot prompting performance on eight new languages in a resource-constrained setting. We find language adaptation to be effective at improving zero-shot performance in new languages. Surprisingly, we find that adapter-based finetuning is more effective than continued pretraining for large models. In addition, we discover that prompting performance is not significantly affected by language specifics, such as the writing system. It is primarily determined by the size of the language adaptation data. We also add new languages to BLOOMZ, which is a multitask finetuned version of BLOOM capable of following task instructions zero-shot. We find including a new language in the multitask fine-tuning mixture to be the most effective method to teach BLOOMZ a new language. We conclude that with sufficient training data language adaptation can generalize well to diverse languages. Our code is available at https://github.com/bigscience-workshop/multilingual-modeling. @@ -9489,7 +9489,7 @@ Learning to Initialize: Can Meta Learning Improve Cross-task Generalization in Prompt Tuning? ChengweiQinNanyang Technological University - ShafiqJotyNanyang Technological University; Salesforce AI Research + ShafiqJotyNanyang Technological University; Salesforce AI Research QianLiNortheastern University RuochenZhaoNanyang Technological University 11802-11832 @@ -9519,7 +9519,7 @@ FitsumGaimKorea Advanced Institute of Science and Technology WonsukYangKorea Advanced Institute of Science and Technology HancheolParkSchool of Computing, KAIST - JongParkKAIST + JongParkKAIST 11857-11870 Question-Answering (QA) has seen significant advances recently, achieving near human-level performance over some benchmarks. However, these advances focus on high-resourced languages such as English, while the task remains unexplored for most other languages, mainly due to the lack of annotated datasets. This work presents a native QA dataset for an East African language, Tigrinya. The dataset contains 10.6K question-answer pairs spanning 572 paragraphs extracted from 290 news articles on various topics. The dataset construction method is discussed, which is applicable to constructing similar resources for related languages. We present comprehensive experiments and analyses of several resource-efficient approaches to QA, including monolingual, cross-lingual, and multilingual setups, along with comparisons against machine-translated silver data. Our strong baseline models reach 76% in the F1 score, while the estimated human performance is 92%, indicating that the benchmark presents a good challenge for future work. We make the dataset, models, and leaderboard publicly available. 2023.acl-long.661 @@ -9532,7 +9532,7 @@ <fixed-case>ESCOXLM</fixed-case>-<fixed-case>R</fixed-case>: Multilingual Taxonomy-driven Pre-training for the Job Market Domain MikeZhangIT University of Copenhagen Robvan der GootIT University of Copenhagen - BarbaraPlankLMU Munich + BarbaraPlankLMU Munich 11871-11890 The increasing number of benchmarks for Natural Language Processing (NLP) tasks in the computational job market domain highlights the demand for methods that can handle job-related tasks such as skill extraction, skill classification, job title classification, and de-identification. While some approaches have been developed that are specific to the job market domain, there is a lack of generalized, multilingual models and benchmarks for these tasks. In this study, we introduce a language model called ESCOXLM-R, based on XLM-R-large, which uses domain-adaptive pre-training on the European Skills, Competences, Qualifications and Occupations (ESCO) taxonomy, covering 27 languages. The pre-training objectives for ESCOXLM-R include dynamic masked language modeling and a novel additional objective for inducing multilingual taxonomical ESCO relations. We comprehensively evaluate the performance of ESCOXLM-R on 6 sequence labeling and 3 classification tasks in 4 languages and find that it achieves state-of-the-art results on 6 out of 9 datasets. Our analysis reveals that ESCOXLM-R performs better on short spans and outperforms XLM-R-large on entity-level and surface-level span-F1, likely due to ESCO containing short skill and occupation titles, and encoding information on the entity-level. 2023.acl-long.662 @@ -9548,7 +9548,7 @@ AsishGhoshalFacebook AI JimmyLinUniversity of Waterloo YasharMehdadFacebook AI - Wen-tauYihMeta AI - FAIR + Wen-tauYihMeta AI - FAIR XilunChenMeta AI 11891-11907 Multi-vector retrieval methods combine the merits of sparse (e.g. BM25) and dense (e.g. DPR) retrievers and have achieved state-of-the-art performance on various retrieval tasks. These methods, however, are orders of magnitude slower and need much more space to store their indices compared to their single-vector counterparts. In this paper, we unify different multi-vector retrieval models from a token routing viewpoint and propose conditional token interaction via dynamic lexical routing, namely CITADEL, for efficient and effective multi-vector retrieval.CITADEL learns to route different token vectors to the predicted lexical keys such that a query token vector only interacts with document token vectors routed to the same key. This design significantly reduces the computation cost while maintaining high accuracy. Notably, CITADEL achieves the same or slightly better performance than the previous state of the art, ColBERT-v2, on both in-domain (MS MARCO) and out-of-domain (BEIR) evaluations, while being nearly 40 times faster. Source code and data are available at https://github.com/facebookresearch/dpr-scale/tree/citadel. @@ -9580,7 +9580,7 @@ XiaoranLiuStony Brook University JonahLubyStony Brook University ChristianLuhmannStony Brook University - H. AndrewSchwartzStony Brook University + H. AndrewSchwartzStony Brook University 11923-11936 While transformer-based systems have enabled greater accuracies with fewer training examples, data acquisition obstacles still persist for rare-class tasks – when the class label is very infrequent (e.g. < 5% of samples). Active learning has in general been proposed to alleviate such challenges, but choice of selection strategy, the criteria by which rare-class examples are chosen, has not been systematically evaluated. Further, transformers enable iterative transfer-learning approaches. We propose and investigate transfer- and active learning solutions to the rare class problem of dissonance detection through utilizing models trained on closely related tasks and the evaluation of acquisition strategies, including a proposed probability-of-rare-class (PRC) approach. We perform these experiments for a specific rare-class problem: collecting language samples of cognitive dissonance from social media. We find that PRC is a simple and effective strategy to guide annotations and ultimately improve model accuracy while transfer-learning in a specific order can improve the cold-start performance of the learner but does not benefit iterations of active learning. 2023.acl-long.665 @@ -9594,7 +9594,7 @@ QiJiaShanghai Jiao Tong University YizhuLiuMeituan HaifengTangChina Merchants Bank Credit Card Center - KennyZhuShanghai Jiao Tong University + KennyZhuShanghai Jiao Tong University 11937-11950 Curriculum learning has shown promising improvements in multiple domains by training machine learning models from easy samples to hard ones. Previous works which either design rules or train models for scoring the difficulty highly rely on task-specific expertise, and cannot generalize. Inspired by the “easy-to-hard” intuition, we propose to do in-sample curriculum learning for natural language generation tasks. Our learning strategy starts training the model to generate the last few words, i.e., do sequence completion, and gradually extends to generate the whole output sequence. Comprehensive experiments show that it generalizes well to different tasks and achieves significant improvements over strong baselines. 2023.acl-long.666 @@ -9646,7 +9646,7 @@ XinyiMouSchool of Data Science, Fudan University ZhongyuWeiSchool of Data Science, Fudan University QiZhangFudan University - XuanjingHuangFudan University + XuanjingHuangFudan University 11996-12012 Modeling political actors is at the core of quantitative political science. Existing works have incorporated contextual information to better learn the representation of political actors for specific tasks through graph models. However, they are limited to the structure and objective of training settings and can not be generalized to all politicians and other tasks. In this paper, we propose a Unified Pre-training Architecture for Political Actor Modeling based on language (UPPAM). In UPPAM, we aggregate statements to represent political actors and learn the mapping from languages to representation, instead of learning the representation of particular persons. We further design structure-aware contrastive learning and behavior-driven contrastive learning tasks, to inject multidimensional information in the political context into the mapping. In this framework, we can profile political actors from different aspects and solve various downstream tasks. Experimental results demonstrate the effectiveness and capability of generalization of our method. 2023.acl-long.670 @@ -9682,8 +9682,8 @@ Beyond Contrastive Learning: A Variational Generative Model for Multilingual Retrieval JohnWietingUniversity of Illinois; TTI-Chicago; CMU; Google - JonathanClarkGoogle - WilliamCohenGoogle AI + JonathanClarkGoogle + WilliamCohenGoogle AI GrahamNeubigCarnegie Mellon University TaylorBerg-KirkpatrickUniversity of California San Diego 12044-12066 @@ -9700,7 +9700,7 @@ TianleWangShanghai Jiao Tong University SachinKumarCarnegie Mellon University KyunghyunChoNew York University - JamesGlassMassachusetts Institute of Technology + JamesGlassMassachusetts Institute of Technology YuliaTsvetkovUniversity of Washington 12067-12097 In this work, we explore a useful but often neglected methodology for robustness analysis of text generation evaluation metrics: stress tests with synthetic data. Basically, we design and synthesize a wide range of potential errors and check whether they result in a commensurate drop in the metric scores. We examine a range of recently proposed evaluation metrics based on pretrained language models, for the tasks of open-ended generation, translation, and summarization. Our experiments reveal interesting insensitivities, biases, or even loopholes in existing metrics. For example, we find that BERTScore is confused by truncation errors in summarization, and MAUVE (built on top of GPT-2) is insensitive to errors at the beginning or middle of generations. Further, we investigate the reasons behind these blind spots and suggest practical workarounds for a more reliable evaluation of text generation. We have released our code and data at https://github.com/cloudygoose/blindspot_nlg. @@ -9798,7 +9798,7 @@ HoyunSongKAIST JisuShinKAIST HuijeLeeKorea Advanced Institute of Science and Technology (KAIST) - JongParkKAIST + JongParkKAIST 12190-12206 Social media is one of the most highly sought resources for analyzing characteristics of the language by its users. In particular, many researchers utilized various linguistic features of mental health problems from social media. However, existing approaches to detecting mental disorders face critical challenges, such as the scarcity of high-quality data or the trade-off between addressing the complexity of models and presenting interpretable results grounded in expert domain knowledge. To address these challenges, we design a simple but flexible model that preserves domain-based interpretability. We propose a novel approach that captures the semantic meanings directly from the text and compares them to symptom-related descriptions. Experimental results demonstrate that our model outperforms relevant baselines on various mental disorder detection tasks. Our detailed analysis shows that the proposed model is effective at leveraging domain knowledge, transferable to other mental disorders, and providing interpretable detection results. 2023.acl-long.681 @@ -9810,8 +9810,8 @@ Downstream Datasets Make Surprisingly Good Pretraining Corpora KundanKrishnaCarnegie Mellon University SaurabhGargCarnegie Mellon University - JeffreyBighamCMU/Apple - ZacharyLiptonCarnegie Mellon University + JeffreyBighamCMU/Apple + ZacharyLiptonCarnegie Mellon University 12207-12222 For most natural language processing tasks, the dominant practice is to finetune large pretrained transformer models (e.g., BERT) using smaller downstream datasets. Despite the success of this approach, it remains unclear to what extent these gainsare attributable to the massive background corpora employed for pretraining versus to the pretraining objectives themselves. This paper introduces a large-scale study of self-pretraining, where the same (downstream) training data is used for both pretraining and finetuning.In experiments addressing both ELECTRA and RoBERTa models and 10 distinct downstream classification datasets, we observe that self-pretraining rivals standard pretraining on the BookWiki corpus (despite using around 10x–500x less data), outperforming the latter on 7 and 5 datasets, respectively. Surprisingly, these task-specific pretrained models often perform well on other tasks,including the GLUE benchmark. Besides classification tasks, self-pretraining also provides benefits on structured output prediction tasks such as span based question answering and commonsense inference, often providing more than 50% of the performance boosts provided by pretraining on the BookWiki corpus. Our results hint that in many scenarios, performance gains attributable to pretraining are driven primarily by the pretraining objective itself and are not always attributable to the use of external pretraining data in massive amounts. These findings are especially relevant in light of concerns about intellectual property and offensive content in web-scale pretraining data. 2023.acl-long.682 @@ -9825,7 +9825,7 @@ ChenweiZhangAmazon XianLiAmazon JingboShangUniversity of California, San Diego - Jinho D.ChoiEmory University + Jinho D.ChoiEmory University 12223-12239 We present a new task setting for attribute mining on e-commerce products, serving as a practical solution to extract open-world attributes without extensive human intervention. Our supervision comes from a high-quality seed attribute set bootstrapped from existing resources, and we aim to expand the attribute vocabulary of existing seed types, and also to discover any new attribute types automatically. A new dataset is created to support our setting, and our approach Amacer is proposed specifically to tackle the limited supervision. Especially, given that no direct supervision is available for those unseen new attributes, our novel formulation exploits self-supervised heuristic and unsupervised latent attributes, which attains implicit semantic signals as additional supervision by leveraging product context. Experiments suggest that our approach surpasses various baselines by 12 F1, expanding attributes of existing types significantly by up to 12 times, and discovering values from 39% new types. 2023.acl-long.683 @@ -9839,7 +9839,7 @@ PingNiePeking University JieCaiPeking University HaifengWangBaidu - Zheng-YuNiuBaidu Inc. + Zheng-YuNiuBaidu Inc. PengZhangTsinghua University MrinmayaSachanETH Zurich KaipingPengTsinghua University @@ -9881,9 +9881,9 @@ AriHoltzmanUniversity of Washington DanielFriedCarnegie Mellon University PercyLiangStanford University - JasonEisnerJohns Hopkins University + Microsoft Corporation - TatsunoriHashimotoStanford - LukeZettlemoyerUniversity of Washington; Meta + JasonEisnerJohns Hopkins University + Microsoft Corporation + TatsunoriHashimotoStanford + LukeZettlemoyerUniversity of Washington; Meta MikeLewisFacebook AI Research 12286-12312 Given a language model (LM), maximum probability is a poor decoding objective for open-ended generation, because it produces short and repetitive text. On the other hand, sampling can often produce incoherent text that drifts from the original topics. We propose contrastive decoding (CD), a reliable decoding approach that optimizes a contrastive objective subject to a plausibility constraint. The contrastive objective returns the difference between the likelihood under a large LM (called the expert, e.g. OPT-13B) and a small LM (called the amateur, e.g. OPT-125M), and the constraint ensures that the outputs are plausible. CD is inspired by the fact that the failures of larger LMs (e.g., repetition, inco- herence) are even more prevalent in smaller LMs, and that this difference signals which texts should be preferred. CD requires zero additional training, and produces higher quality text than decoding from the larger LM alone. It also works across model scales (OPT-13B and GPT2-1.5B) and significantly outperforms four strong decoding algorithms (e.g., nucleus, top-k) in automatic and human evaluations across wikipedia, news and story domains. @@ -9938,7 +9938,7 @@ <fixed-case>W</fixed-case>iki<fixed-case>B</fixed-case>io: a Semantic Resource for the Intersectional Analysis of Biographical Events - Marco AntonioStranisciUniversity of Turin + Marco AntonioStranisciUniversity of Turin RossanaDamianoUniversità di Torino EnricoMensaUniversity of Turin - Department of Computer Science VivianaPattiUniversity of Turin, Dipartimento di Informatica @@ -9971,7 +9971,7 @@ RahulAralikatteMila GowthamRameshUniversity of Wisconsin Madison ShreyaGoyalAmerican Express - Mitesh M.KhapraIndian Institute of Technology Madras + Mitesh M.KhapraIndian Institute of Technology Madras AnoopKunchukuttanMicrosoft AI and Research PratyushKumarIIT Madras 12402-12426 @@ -10038,9 +10038,9 @@ SimoneTedeschiBabelscape JohanBosUniversity of Groningen ThierryDeclerckDFKI GmbH - JanHajičCharles University + JanHajičCharles University DanielHershcovichUniversity of Copenhagen - EduardHovyUniversity of Melbourne + EduardHovyUniversity of Melbourne AlexanderKollerSaarland University SimonKrekJožef Stefan Institute StevenSchockaertCardiff University @@ -10075,7 +10075,7 @@ Hints on the data for language modeling of synthetic languages with transformers RodolfoZevallosUniversitat Pompeu Fabra - NuriaBelUniversitat Pompeu Fabra + NuriaBelUniversitat Pompeu Fabra 12508-12522 Language Models (LM) are becoming more and more useful for providing representations upon which to train Natural Language Processing applications. However, there is now clear evidence that attention-based transformers require a critical amount of language data to produce good enough LMs. The question we have addressed in this paper is to what extent the critical amount of data varies for languages of different morphological typology, in particular those that have a rich inflectional morphology, and whether the tokenization method to preprocess the data can make a difference. These details can be important for low-resourced languages that need to plan the production of datasets. We evaluated intrinsically and extrinsically the differences of five different languages with different pretraining dataset sizes and three different tokenization methods for each. The results confirm that the size of the vocabulary due to morphological characteristics is directly correlated with both the LM perplexity and the performance of two typical downstream tasks such as NER identification and POS labeling. The experiments also provide new evidence that a canonical tokenizer can reduce perplexity by more than a half for a polysynthetic language like Quechua as well as raising F1 from 0.8 to more than 0.9 in both downstream tasks with a LM trained with only 6M tokens. 2023.acl-long.699 @@ -10127,7 +10127,7 @@ Large-scale Lifelong Learning of In-context Instructions and How to Tackle It JisooMokSeoul National University JaeyoungDoAmazon Alexa AI - SungjinLeeAmazon Alexa AI + SungjinLeeAmazon Alexa AI TaraTaghaviAmazon SeunghakYuNaver Search US SungrohYoonSeoul National University @@ -10194,7 +10194,7 @@ Unsupervised Selective Rationalization with Noise Injection AdamStorekColumbia University MelanieSubbiahColumbia University - KathleenMcKeownColumbia University and Amazon (Amazon Scholar) + KathleenMcKeownColumbia University and Amazon (Amazon Scholar) 12647-12659 A major issue with using deep learning models in sensitive applications is that they provide no explanation for their output. To address this problem, unsupervised selective rationalization produces rationales alongside predictions by chaining two jointly-trained components, a rationale generator and a predictor. Although this architecture guarantees that the prediction relies solely on the rationale, it does not ensure that the rationale contains a plausible explanation for the prediction. We introduce a novel training technique that effectively limits generation of implausible rationales by injecting noise between the generator and the predictor. Furthermore, we propose a new benchmark for evaluating unsupervised selective rationalization models using movie reviews from existing datasets. We achieve sizeable improvements in rationale plausibility and task accuracy over the state-of-the-art across a variety of tasks, including our new benchmark, while maintaining or improving model faithfulness. 2023.acl-long.707 @@ -10300,7 +10300,7 @@ <fixed-case>MGR</fixed-case>: Multi-generator Based Rationalization - WeiLiuHuazhong University of Science and Technology + WeiLiuHuazhong University of Science and Technology HaozhaoWangHuazhong University of Science and Technology JunWangiWudao Tech RuixuanLiHuazhong University of Science and Technology @@ -10322,7 +10322,7 @@ DiLuDataminr ShihaoRanDataminr KeZhangDataminr, inc - JoelTetreaultDataminr + JoelTetreaultDataminr AlejandroJaimesDataminr 12788-12812 The proliferation of automatic faithfulness metrics for summarization has produced a need for benchmarks to evaluate them. While existing benchmarks measure the correlation with human judgements of faithfulness on model-generated summaries, they are insufficient for diagnosing whether metrics are: 1) consistent, i.e., indicate lower faithfulness as errors are introduced into a summary, 2) effective on human-written texts, and 3) sensitive to different error types (as summaries can contain multiple errors). To address these needs, we present a benchmark of unfaithful minimal pairs (BUMP), a dataset of 889 human-written, minimally different summary pairs, where a single error is introduced to a summary from the CNN/DailyMail dataset to produce an unfaithful summary. We find BUMP complements existing benchmarks in a number of ways: 1) the summaries in BUMP are harder to discriminate and less probable under SOTA summarization models, 2) unlike non-pair-based datasets, BUMP can be used to measure the consistency of metrics, and reveals that the most discriminative metrics tend not to be the most consistent, and 3) unlike datasets containing generated summaries with multiple errors, BUMP enables the measurement of metrics’ performance on individual error types. @@ -10422,7 +10422,7 @@ Kuan-HaoHuangUniversity of California, Los Angeles ShuningZhangTsinghua University WenxinChengUniversity of California, Los Angeles - PremNatarajanAmazon / Alexa + PremNatarajanAmazon / Alexa Kai-WeiChangUCLA NanyunPengUniversity of California, Los Angeles 12917-12932 @@ -10469,7 +10469,7 @@ PhilippWickeInstitute for Information and Language Processing, LMU RenhaoPeiLudwig Maximilian University of Munich RobertZangenfeindCenter for Information and Language Processing, University of Munich - HinrichSchützeCenter for Information and Language Processing, University of Munich + HinrichSchützeCenter for Information and Language Processing, University of Munich 12969-13000 Languages differ in how they divide up the world into concepts and words; e.g., in contrast to English, Swahili has a single concept for ‘belly’ and ‘womb’. We investigate these differences in conceptualization across 1,335 languages by aligning concepts in a parallel corpus. To this end, we propose Conceptualizer, a method that creates a bipartite directed alignment graph between source language concepts and sets of target language strings. In a detailed linguistic analysis across all languages for one concept (‘bird’) and an evaluation on gold standard data for 32 Swadesh concepts, we show that Conceptualizer has good alignment accuracy. We demonstrate the potential of research on conceptualization in NLP with two experiments. (1) We define crosslingual stability of a concept as the degree to which it has 1-1 correspondences across languages, and show that concreteness predicts stability. (2) We represent each language by its conceptualization pattern for 83 concepts, and define a similarity measure on these representations. The resulting measure for the conceptual similarity between two languages is complementary to standard genealogical, typological, and surface similarity measures. For four out of six language families, we can assign languages to their correct family based on conceptual similarity with accuracies between 54% and 87% 2023.acl-long.726 @@ -10529,7 +10529,7 @@ Extrinsic Evaluation of Machine Translation Metrics NikitaMogheUniversity of Edinburgh TomSherborneUniversity of Edinburgh - MarkSteedmanUniversity of Edinburgh + MarkSteedmanUniversity of Edinburgh AlexandraBirchUniversity of Edinburgh 13060-13078 Automatic machine translation (MT) metrics are widely used to distinguish the quality of machine translation systems across relatively large test sets (system-level evaluation). However, it is unclear if automatic metrics are reliable at distinguishing good translations from bad translations at the sentence level (segment-level evaluation). In this paper, we investigate how useful MT metrics are at detecting the segment-level quality by correlating metrics with how useful the translations are for downstream task. We evaluate the segment-level performance of the most widely used MT metrics (chrF, COMET, BERTScore, etc.) on three downstream cross-lingual tasks (dialogue state tracking, question answering, and semantic parsing). For each task, we only have access to a monolingual task-specific model and a translation model. We calculate the correlation between the metric’s ability to predict a good/bad translation with the success/failure on the final task for the machine translated test sentences. Our experiments demonstrate that all metrics exhibit negligible correlation with the extrinsic evaluation of the downstream outcomes. We also find that the scores provided by neural metrics are not interpretable, in large part due to having undefined ranges. We synthesise our analysis into recommendations for future MT metrics to produce labels rather than scores for more informative interaction between machine translation and multilingual language understanding. @@ -10582,9 +10582,9 @@ MohamedAbdallaUniversity of Toronto Jan PhilipWahleUniversity of Göttingen TerryRuasUniversity of Göttingen - AurélieNévéolUniversité Paris Saclay, CNRS, LISN + AurélieNévéolUniversité Paris Saclay, CNRS, LISN FannyDucelSorbonne Universite, LORIA - SaifMohammadNRC + SaifMohammadNRC KarenFortSorbonne Universite and LORIA 13141-13160 Recent advances in deep learning methods for natural language processing (NLP) have created new business opportunities and made NLP research critical for industry development. As one of the big players in the field of NLP, together with governments and universities, it is important to track the influence of industry on research. In this study, we seek to quantify and characterize industry presence in the NLP community over time. Using a corpus with comprehensive metadata of 78,187 NLP publications and 701 resumes of NLP publication authors, we explore the industry presence in the field since the early 90s. We find that industry presence among NLP authors has been steady before a steep increase over the past five years (180% growth from 2017 to 2022). A few companies account for most of the publications and provide funding to academic researchers through grants and internships. Our study shows that the presence and impact of the industry on natural language processing research are significant and fast-growing. This work calls for increased transparency of industry influence in the field. @@ -10611,7 +10611,7 @@ Do Question Answering Modeling Improvements Hold Across Benchmarks? - Nelson F.LiuStanford University + Nelson F.LiuStanford University TonyLeeStanford University RobinJiaUniversity of Southern California PercyLiangStanford University @@ -10637,7 +10637,7 @@ QinhongZhouTsinghua University ZonghanYangTsinghua University PengLiInstitute for AI Industry Research (AIR), Tsinghua University, China - YangLiuTsinghua University + YangLiuTsinghua University 13234-13248 Conventional knowledge distillation (KD) methods require access to the internal information of teachers, e.g., logits. However, such information may not always be accessible for large pre-trained language models (PLMs). In this work, we focus on decision-based KD for PLMs, where only teacher decisions (i.e., top-1 labels) are accessible. Considering the information gap between logits and decisions, we propose a novel method to estimate logits from the decision distributions. Specifically, decision distributions can be both derived as a function of logits theoretically and estimated with test-time data augmentation empirically. By combining the theoretical and empirical estimations of the decision distributions together, the estimation of logits can be successfully reduced to a simple root-finding problem. Extensive experiments show that our method significantly outperforms strong baselines on both natural language understanding and machine reading comprehension datasets. 2023.acl-long.738 @@ -10665,7 +10665,7 @@ TongChenThe University of Queensland WeiYuanThe University of Queensland XingshanZengHuawei Noah’s Ark Lab - Kam-FaiWongDepartment of Systems Engineering and Engineering Management, The Chinese University of Hong Kong, Hong Kong + Kam-FaiWongDepartment of Systems Engineering and Engineering Management, The Chinese University of Hong Kong, Hong Kong HongzhiYinThe University of Queensland 13264-13276 Recent legislation of the “right to be forgotten” has led to the interest in machine unlearning, where the learned models are endowed with the function to forget information about specific training instances as if they have never existed in the training set. Previous work mainly focuses on computer vision scenarios and largely ignores the essentials of unlearning in NLP field, where text data contains more explicit and sensitive personal information than images. In this paper, we propose a general unlearning framework called KGA to induce forgetfulness. Different from previous work that tries to recover gradients or forces models to perform close to one specific distribution, KGA maintains distribution differences (i.e., knowledge gap). This relaxes the distribution assumption. Furthermore, we first apply the unlearning method to various NLP tasks (i.e., classification, translation, response generation) and propose several unlearning evaluation metrics with pertinence. Experiments on large-scale datasets show that KGA yields comprehensive improvements over baselines, where extensive analyses further validate the effectiveness of KGA and provide insight into unlearning for NLP tasks. @@ -10739,7 +10739,7 @@ Attention as a Guide for Simultaneous Speech Translation SaraPapiFondazione Bruno Kessler - MatteoNegriFondazione Bruno Kessler + MatteoNegriFondazione Bruno Kessler MarcoTurchiZoom Video Communications 13340-13356 In simultaneous speech translation (SimulST), effective policies that determine when to write partial translations are crucial to reach high output quality with low latency. Towards this objective, we propose EDAtt (Encoder-Decoder Attention), an adaptive policy that exploits the attention patterns between audio source and target textual translation to guide an offline-trained ST model during simultaneous inference. EDAtt exploits the attention scores modeling the audio-translation relation to decide whether to emit a partial hypothesis or wait for more audio input. This is done under the assumption that, if attention is focused towards the most recently received speech segments, the information they provide can be insufficient to generate the hypothesis (indicating that the system has to wait for additional audio input). Results on en->de, es show that EDAtt yields better results compared to the SimulST state of the art, with gains respectively up to 7 and 4 BLEU points for the two languages, and with a reduction in computational-aware latency up to 1.4s and 0.7s compared to existing SimulST policies applied to offline-trained models. @@ -10832,7 +10832,7 @@ ShaolinZhuTianjin university ShangjieLiTianjin University YikunLeiTianjin university - DeyiXiongTianjin University + DeyiXiongTianjin University 13433-13447 Image translation is a task that translates an image containing text in the source language to the target language. One major challenge with image translation is the modality gap between visual text inputs and textual inputs/outputs of machine translation (MT). In this paper, we propose PEIT, an end-to-end image translation framework that bridges the modality gap with pre-trained models. It is composed of four essential components: a visual encoder, a shared encoder-decoder backbone network, a vision-text representation aligner equipped with the shared encoder and a cross-modal regularizer stacked over the shared decoder. Both the aligner and regularizer aim at reducing the modality gap. To train PEIT, we employ a two-stage pre-training strategy with an auxiliary MT task: (1) pre-training the MT model on the MT training data to initialize the shared encoder-decoder backbone network; and (2) pre-training PEIT with the aligner and regularizer on a synthesized dataset with rendered images containing text from the MT training data. In order to facilitate the evaluation of PEIT and promote research on image translation, we create a large-scale image translation corpus ECOIT containing 480K image-translation pairs via crowd-sourcing and manual post-editing from real-world images in the e-commerce domain. Experiments on the curated ECOIT benchmark dataset demonstrate that PEIT substantially outperforms both cascaded image translation systems (OCR+MT) and previous strong end-to-end image translation model, with fewer parameters and faster decoding speed. 2023.acl-long.751 @@ -10873,7 +10873,7 @@ YeganehKordiTehran Polytechnic SwaroopMishraArizona State University AlisaLiuUniversity of Washington - Noah A.SmithUniversity of Washington + Noah A.SmithUniversity of Washington DanielKhashabiJohns Hopkins University HannanehHajishirziUniversity of Washington 13484-13508 @@ -10898,7 +10898,7 @@ Dissecting Transformer Length Extrapolation via the Lens of Receptive Field Analysis Ta-ChungChicarnegie mellon university Ting-HanFanPrinceton University - AlexanderRudnickyCarnegie Mellon University + AlexanderRudnickyCarnegie Mellon University PeterRamadgePrinceton University 13522-13537 Length extrapolation permits training a transformer language model on short sequences that preserves perplexities when tested on substantially longer sequences.A relative positional embedding design, ALiBi, has had the widest usage to date. We dissect ALiBi via the lens of receptive field analysis empowered by a novel cumulative normalized gradient tool. The concept of receptive field further allows us to modify the vanilla Sinusoidal positional embedding to create Sandwich, the first parameter-free relative positional embedding design that truly length information uses longer than the training sequence. Sandwich shares with KERPLE and T5 the same logarithmic decaying temporal bias pattern with learnable relative positional embeddings; these elucidate future extrapolatable positional embedding design. @@ -11055,10 +11055,10 @@ MengzhouXiaPrinceton University MikelArtetxeReka AI ChuntingZhouMeta AI - Xi VictoriaLinMeta AI + Xi VictoriaLinMeta AI RamakanthPasunuruMeta DanqiChenPrinceton University - LukeZettlemoyerUniversity of Washington; Meta + LukeZettlemoyerUniversity of Washington; Meta VeselinStoyanovFacebook 13711-13738 Scaling up language models has led to unprecedented performance gains, but little is understood about how the training dynamics change as models get larger. How do language models of different sizes learn during pre-training? Why do larger language models demonstrate more desirable behaviors? In this paper, we analyze the intermediate training checkpoints of differently sized OPT models (Zhang et al., 2022)—from 125M to 175B parameters—on next-token prediction, sequence-level generation and downstream tasks. We find that 1) at a given perplexity and independent of model sizes, a similar subset of training tokens see the most significant reduction in loss, with the rest stagnating or showing double-descent behavior (Nakkiran et al., 2020); 2) early in training, all models learn to reduce the perplexity of grammatical sequences that contain hallucinations, with small models halting at this suboptimal distribution and larger ones eventually learning to assign these sequences lower probabilities; and 3) perplexity is a strong predictor of in-context learning performance on 74 multiple-choice tasks from BIG-Bench, and this holds independent of the model size. Together, these results show that perplexity is more predictive of model behaviors than model size or training computation. @@ -11094,7 +11094,7 @@ Nuno M.GuerreiroInstituto de Telecomunicacoes, University of Lisbon PierreColomboL2S CentraleSupelec PabloPiantanidaCNRS, CentraleSupelec - AndréMartinsUnbabel, Instituto de Telecomunicacoes + AndréMartinsUnbabel, Instituto de Telecomunicacoes 13766-13784 Neural machine translation (NMT) has become the de-facto standard in real-world machine translation applications. However, NMT models can unpredictably produce severely pathological translations, known as hallucinations, that seriously undermine user trust. It becomes thus crucial to implement effective preventive strategies to guarantee their proper functioning. In this paper, we address the problem of hallucination detection in NMT by following a simple intuition: as hallucinations are detached from the source content, they exhibit encoder-decoder attention patterns that are statistically different from those of good quality translations. We frame this problem with an optimal transport formulation and propose a fully unsupervised, plug-in detector that can be used with any attention-based NMT model. Experimental results show that our detector not only outperforms all previous model-based detectors, but is also competitive with detectors that employ external models trained on millions of samples for related tasks such as quality estimation and cross-lingual sentence similarity. 2023.acl-long.770 @@ -11125,7 +11125,7 @@ JiaxinGePeking University HongyinLuoMIT YoonKimMIT - JamesGlassMassachusetts Institute of Technology + JamesGlassMassachusetts Institute of Technology 13803-13817 Entailment has been recognized as an important metric for evaluating natural language understanding (NLU) models, and recent studies have found that entailment pretraining benefits weakly supervised fine-tuning. In this work, we design a prompting strategy that formulates a number of different NLU tasks as contextual entailment. This approach improves the zero-shot adaptation of pretrained entailment models. Secondly, we notice that self-training entailment-based models with unlabeled data can significantly improve the adaptation performance on downstream tasks. To achieve more stable improvement, we propose the Simple Pseudo-Label Editing (SimPLE) algorithm for better pseudo-labeling quality in self-training. We also found that both pretrained entailment-based models and the self-trained models are robust against adversarial evaluation data. Experiments on binary and multi-class classification tasks show that SimPLE leads to more robust self-training results, indicating that the self-trained entailment models are more efficient and trustworthy than large language models on language understanding tasks. 2023.acl-long.772 @@ -11164,7 +11164,7 @@ Soda MaremLoUniversity of Turin Alessandra TeresaCignarellaComputer Science Department - University of Turin RaffaellaPanizzonUniversity of Padua - CristinaMarcoAlexa AI, Amazon + CristinaMarcoAlexa AI, Amazon BiancaScarliniAmazon VivianaPattiUniversity of Turin, Dipartimento di Informatica CristinaBoscoDipartimento di Informatica - Università di Torino @@ -11333,8 +11333,8 @@ Do You Hear The People Sing? Key Point Analysis via Iterative Clustering and Abstractive Summarisation HaoLiUniversity of Manchester ViktorSchlegelASUS AICS - RizaBatista-NavarroDepartment of Computer Science, The University of Manchester - GoranNenadicUniversity of Manchester + RizaBatista-NavarroDepartment of Computer Science, The University of Manchester + GoranNenadicUniversity of Manchester 14064-14080 Argument summarisation is a promising but currently under-explored field. Recent work has aimed to provide textual summaries in the form of concise and salient short texts, i.e., key points (KPs), in a task known as Key Point Analysis (KPA). One of the main challenges in KPA is finding high-quality key point candidates from dozens of arguments even in a small corpus. Furthermore, evaluating key points is crucial in ensuring that the automatically generated summaries are useful. Although automatic methods for evaluating summarisation have considerably advanced over the years, they mainly focus on sentence-level comparison, making it difficult to measure the quality of a summary (a set of KPs) as a whole. Aggravating this problem is the fact that human evaluation is costly and unreproducible. To address the above issues, we propose a two-step abstractive summarisation framework based on neural topic modelling with an iterative clustering procedure, to generate key points which are aligned with how humans identify key points. Our experiments show that our framework advances the state of the art in KPA, with performance improvement of up to 14 (absolute) percentage points, in terms of both ROUGE and our own proposed evaluation metrics. Furthermore, we evaluate the generated summaries using a novel set-based evaluation toolkit. Our quantitative analysis demonstrates the effectiveness of our proposed evaluation metrics in assessing the quality of generated KPs. Human evaluation further demonstrates the advantages of our approach and validates that our proposed evaluation metric is more consistent with human judgment than ROUGE scores. 2023.acl-long.786 @@ -11420,7 +11420,7 @@ <fixed-case>LLM</fixed-case>-Blender: Ensembling Large Language Models with Pairwise Ranking and Generative Fusion DongfuJiangZhejiang University XiangRenUniversity of Southern California - Bill YuchenLinAllen Institute for AI + Bill YuchenLinAllen Institute for AI 14165-14178 We present LLM-Blender, an ensembling framework designed to attain consistently superior performance by leveraging the diverse strengths of multiple open-source large language models (LLMs). Our framework consists of two modules: PairRanker and GenFuser, addressing the observation that optimal LLMs for different examples can significantly vary. PairRanker employs a specialized pairwise comparison method to distinguish subtle differences between candidate outputs. It jointly encodes the input text and a pair of candidates, using cross-attention encoders to determine the superior one. Our results demonstrate that PairRanker exhibits the highest correlation with ChatGPT-based ranking. Then, GenFuser aims to merge the top-ranked candidates, generating an improved output by capitalizing on their strengths and mitigating their weaknesses. To facilitate large-scale evaluation, we introduce a benchmark dataset, MixInstruct, which is a mixture of multiple instruction datasets featuring oracle pairwise comparisons. Our LLM-Blender significantly outperform individual LLMs and baseline methods across various metrics, establishing a substantial performance gap. 2023.acl-long.792 @@ -11450,7 +11450,7 @@ ShaogangGongQueen Mary University of London HailinJinAdobe Research YuxinPengPeking University - YangLiuPeking University + YangLiuPeking University 14197-14209 Video sentence localization aims to locate moments in an unstructured video according to a given natural language query. A main challenge is the expensive annotation costs and the annotation bias. In this work, we study video sentence localization in a zero-shot setting, which learns with only video data without any annotation. Existing zero-shot pipelines usually generate event proposals and then generate a pseudo query for each event proposal. However, their event proposals are obtained via visual feature clustering, which is query-independent and inaccurate; and the pseudo-queries are short or less interpretable. Moreover, existing approaches ignores the risk of pseudo-label noise when leveraging them in training. To address the above problems, we propose a Structure-based Pseudo Label generation (SPL), which first generate free-form interpretable pseudo queries before constructing query-dependent event proposals by modeling the event temporal structure. To mitigate the effect of pseudo-label noise, we propose a noise-resistant iterative method that repeatedly re-weight the training sample based on noise estimation to train a grounding model and correct pseudo labels. Experiments on the ActivityNet Captions and Charades-STA datasets demonstrate the advantages of our approach. Code can be found at https://github.com/minghangz/SPL. 2023.acl-long.794 @@ -11465,7 +11465,7 @@ VigneshNagarajanIndian Institute of Technology Madras AnoopKunchukuttanMicrosoft AI and Research PratyushKumarIIT Madras - Mitesh M.KhapraIndian Institute of Technology Madras + Mitesh M.KhapraIndian Institute of Technology Madras RajDabreNICT 14210-14228 The rapid growth of machine translation (MT) systems necessitates meta-evaluations of evaluation metrics to enable selection of those that best reflect MT quality. Unfortunately, most meta-evaluation studies focus on European languages, the observations for which may not always apply to other languages. Indian languages, having over a billion speakers, are linguistically different from them, and to date, there are no such systematic studies focused solely on English to Indian language MT. This paper fills this gap through a Multidimensional Quality Metric (MQM) dataset consisting of 7000 fine-grained annotations, spanning 5 Indian languages and 7 MT systems. We evaluate 16 metrics and show that, pre-trained metrics like COMET have the highest correlations with annotator scores as opposed to n-gram metrics like BLEU. We further leverage our MQM annotations to develop an Indic-COMET metric and show that it outperforms COMET counterparts in both human scores correlations and robustness scores in Indian languages. Additionally, we show that the Indic-COMET can outperform COMET on some unseen Indian languages. We hope that our dataset and analysis will facilitate further research in Indic MT evaluation. @@ -11523,7 +11523,7 @@ Python Code Generation by Asking Clarification Questions Haau-Sing (Xiaocheng)LiUKP Lab, Technical University of Darmstadt MohsenMesgarUKP Lab, Technical University of Darmstadt - AndréMartinsUnbabel, Instituto de Telecomunicacoes + AndréMartinsUnbabel, Instituto de Telecomunicacoes IrynaGurevychUKP Lab, Technische Universität Darmstadt 14287-14306 Code generation from text requires understanding the user’s intent from a natural languagedescription and generating an executable code snippet that satisfies this intent. While recent pretrained language models demonstrate remarkable performance for this task, these models fail when the given natural language description is under-specified. In this work, we introduce a novel and more realistic setup for this task. We hypothesize that the under-specification of a natural language description can be resolved by asking clarification questions. Therefore, we collect and introduce a new dataset named CodeClarQA containing pairs of natural language descriptions and code with created synthetic clarification questions and answers. The empirical results of our evaluation of pretrained language model performance on code generation show that clarifications result in more precisely generated code, as shown by the substantial improvement of model performance in all evaluation metrics. Alongside this, our task and dataset introduce new challenges to the community, including when and what clarification questions should be asked. Our code and dataset are available on GitHub. @@ -11652,7 +11652,7 @@ DongkuanXuNorth Carolina State University QingqingCaoUniversity of Washington XiaojunChenShenzhen University - TrevorCohnUniversity of Melbourne + TrevorCohnUniversity of Melbourne MengFangUniversity of Liverpool 14447-14465 Open domain question answering (ODQA) is a longstanding task aimed at answering factual questions from a large knowledge corpus without any explicit evidence in natural language processing (NLP). Recent works have predominantly focused on improving the answering accuracy and have achieved promising progress. However, higher accuracy often requires more memory consumption and inference latency, which might not necessarily be efficient enough for direct deployment in the real world. Thus, a trade-off between accuracy, memory consumption and processing speed is pursued. In this paper, we will survey recent advancements in the efficiency of ODQA models and conclude core techniques for achieving efficiency. Additionally, we will provide a quantitative analysis of memory cost, query speed, accuracy, and overall performance comparison. Our goal is to keep scholars informed of the latest advancements and open challenges in ODQA efficiency research and contribute to the further development of ODQA efficiency. @@ -11721,7 +11721,7 @@ Jointprop: Joint Semi-supervised Learning for Entity and Relation Extraction with Heterogeneous Graph-based Propagation YandanZhengNanyang University of Technology AnranHaoNanyang Technological University - Anh TuanLuuNanyang Technological University, Singapore + Anh TuanLuuNanyang Technological University, Singapore 14541-14555 Semi-supervised learning has been an important approach to address challenges in extracting entities and relations from limited data. However, current semi-supervised works handle the two tasks (i.e., Named Entity Recognition and Relation Extraction) separately and ignore the cross-correlation of entity and relation instances as well as the existence of similar instances across unlabeled data. To alleviate the issues, we propose Jointprop, a Heterogeneous Graph-based Propagation framework for joint semi-supervised entity and relation extraction, which captures the global structure information between individual tasks and exploits interactions within unlabeled data. Specifically, we construct a unified span-based heterogeneous graph from entity and relation candidates and propagate class labels based on confidence scores. We then employ a propagation learning scheme to leverage the affinities between labelled and unlabeled samples. Experiments on benchmark datasets show that our framework outperforms the state-of-the-art semi-supervised approaches on NER and RE tasks. We show that the joint semi-supervised learning of the two tasks benefits from their codependency and validates the importance of utilizing the shared information between unlabeled data. 2023.acl-long.813 @@ -11749,8 +11749,8 @@ Faking Fake News for Real Fake News Detection: Propaganda-Loaded Training Data Generation Kung-HsiangHuangUniversity of Illinois at Urbana-Champaign - KathleenMcKeownColumbia University and Amazon (Amazon Scholar) - PreslavNakovMohamed bin Zayed University of Artificial Intelligence + KathleenMcKeownColumbia University and Amazon (Amazon Scholar) + PreslavNakovMohamed bin Zayed University of Artificial Intelligence YejinChoiUniversity of Washington HengJiUniversity of Illinois at Urbana-Champaign and Amazon (Amazon Scholar) 14571-14589 @@ -11841,7 +11841,7 @@ BingshengYaoRensselaer Polytechnic Institute PrithvirajSenAmazon LucianPopaIBM Research - Almaden - JamesHendlerRensselaer Polytechnic Institute + JamesHendlerRensselaer Polytechnic Institute DakuoWangNortheastern University 14698-14713 Human-annotated labels and explanations are critical for training explainable NLP models. However, unlike human-annotated labels whose quality is easier to calibrate (e.g., with a majority vote), human-crafted free-form explanations can be quite subjective. Before blindly using them as ground truth to train ML models, a vital question needs to be asked: How do we evaluate a human-annotated explanation’s quality? In this paper, we build on the view that the quality of a human-annotated explanation can be measured based on its helpfulness (or impairment) to the ML models’ performance for the desired NLP tasks for which the annotations were collected. In comparison to the commonly used Simulatability score, we define a new metric that can take into consideration the helpfulness of an explanation for model performance at both fine-tuning and inference. With the help of a unified dataset format, we evaluated the proposed metric on five datasets (e.g., e-SNLI) against two model architectures (T5 and BART), and the results show that our proposed metric can objectively evaluate the quality of human-annotated explanations, while Simulatability falls short. @@ -11858,7 +11858,7 @@ YounginLeeKAIST So-YeonAhnKorea Advanced Institute of Science and Technology (KAIST) DongyeopKangUniversity of Minnesota - AliceOhKAIST + AliceOhKAIST 14714-14733 Researchers have traditionally recruited native speakers to provide annotations for the widely used benchmark datasets. But there are languages for which recruiting native speakers is difficult, and it would help to get learners of those languages to annotate the data. In this paper, we investigate whether language learners can contribute annotations to the benchmark datasets. In a carefully controlled annotation experiment, we recruit 36 language learners, provide two types of additional resources (dictionaries and machine-translated sentences), and perform mini-tests to measure their language proficiency. We target three languages, English, Korean, and Indonesian, and four NLP tasks, sentiment analysis, natural language inference, named entity recognition, and machine reading comprehension. We find that language learners, especially those with intermediate or advanced language proficiency, are able to provide fairly accurate labels with the help of additional resources. Moreover, we show that data annotation improves learners’ language proficiency in terms of vocabulary and grammar. The implication of our findings is that broadening the annotation task to include language learners can open up the opportunity to build benchmark datasets for languages for which it is difficult to recruit native speakers. 2023.acl-long.822 @@ -11872,7 +11872,7 @@ HaoFeiNational University of Singapore YixinCaoSingapore Management University LidongBingAlibaba DAMO Academy - Tat-SengChuaNational University of Singapore + Tat-SengChuaNational University of Singapore 14734-14751 Existing research on multimodal relation extraction (MRE) faces two co-existing challenges, internal-information over-utilization and external-information under-exploitation. To combat that, we propose a novel framework that simultaneously implements the idea of internal-information screening and external-information exploiting. First, we represent the fine-grained semantic structures of the input image and text with the visual and textual scene graphs, which are further fused into a unified cross-modal graph (CMG). Based on CMG, we perform structure refinement with the guidance of the graph information bottleneck principle, actively denoising the less-informative features. Next, we perform topic modeling over the input image and text, incorporating latent multimodal topic features to enrich the contexts. On the benchmark MRE dataset, our system outperforms the current best model significantly. With further in-depth analyses, we reveal the great potential of our method for the MRE task. 2023.acl-long.823 @@ -11975,8 +11975,8 @@ Abductive Commonsense Reasoning Exploiting Mutually Exclusive Explanations WentingZhaoCornell University JustinChiuCornell Tech - ClaireCardieCornell University - AlexanderRushCornell University + ClaireCardieCornell University + AlexanderRushCornell University 14883-14896 Abductive reasoning aims to find plausible explanations for an event. This style of reasoning is critical for commonsense tasks where there are often multiple plausible explanations. Existing approaches for abductive reasoning in natural language processing (NLP) often rely on manually generated annotations for supervision; however, such annotations can be subjective and biased. Instead of using direct supervision, this work proposes an approach for abductive commonsense reasoning that exploits the fact that only a subset of explanations is correct for a given context. The method uses posterior regularization to enforce a mutual exclusion constraint, encouraging the model to learn the distinction between fluent explanations and plausible ones. We evaluate our approach on a diverse set of abductive reasoning datasets; experimental results show that our approach outperforms or is comparable to directly applying pretrained language models in a zero-shot manner and other knowledge-augmented zero-shot methods. 2023.acl-long.831 @@ -12001,7 +12001,7 @@ Visually-augmented pretrained language models for <fixed-case>NLP</fixed-case> tasks without images HangyuGuoHarbin Institute of Technology (Shenzhen) KunZhouRenmin University of China - Wayne XinZhaoRUC + Wayne XinZhaoRUC QinyuZhangHarbin Institute of Technology (Shenzhen) Ji-RongWenRenmin University of China 14912-14929 @@ -12015,7 +12015,7 @@ Using counterfactual contrast to improve compositional generalization for multi-step quantitative reasoning ArminehNourbakhshCMU, JP Morgan Chase SameenaShahJP Morgan - CarolynRoséCarnegie Mellon University + CarolynRoséCarnegie Mellon University 14930-14943 In quantitative question answering, compositional generalization is one of the main challenges of state of the art models, especially when longer sequences of reasoning steps are required. In this paper we propose CounterComp, a method that uses counterfactual scenarios to generate samples with compositional contrast. Instead of a data augmentation approach, CounterComp is based on metric learning, which allows for direct sampling from the training set and circumvents the need for additional human labels. Our proposed auxiliary metric learning loss improves the performance of three state of the art models on four recently released datasets. We also show how the approach can improve OOD performance on unseen domains, as well as unseen compositions. Lastly, we demonstrate how the method can lead to better compositional attention patterns during training. 2023.acl-long.834 @@ -12033,7 +12033,7 @@ YixinLiuYale University SaadMahamoodtrivago N.V SebastianGehrmannBloomberg LP - MirunaClinciuEdinburgh Centre for Robotics + MirunaClinciuEdinburgh Centre for Robotics Khyathi RaghaviChanduAllen Institute of AI JoãoSedocNew York University 14944-14982 @@ -12062,7 +12062,7 @@ <fixed-case>M</fixed-case>eeting<fixed-case>QA</fixed-case>: Extractive Question-Answering on Meeting Transcripts ArchikiPrasadUNC Chapel Hill - TrungBuiAdobe Research + TrungBuiAdobe Research SeunghyunYoonAdobe Research HaniehDeilamsalehyAdobe Research FranckDernoncourtAdobe Research @@ -12077,7 +12077,7 @@ <fixed-case>FERMAT</fixed-case>: An Alternative to Accuracy for Numerical Reasoning JasivanSivakumarUniversity of Sheffield - Nafise SadatMoosaviDepartment of Computer Science, The University of Sheffield + Nafise SadatMoosaviDepartment of Computer Science, The University of Sheffield 15026-15043 While pre-trained language models achieve impressive performance on various NLP benchmarks, they still struggle with tasks that require numerical reasoning. Recent advances in improving numerical reasoning are mostly achieved using very large language models that contain billions of parameters and are not accessible to everyone. In addition, numerical reasoning is measured using a single score on existing datasets. As a result, we do not have a clear understanding of the strengths and shortcomings of existing models on different numerical reasoning aspects and therefore, potential ways to improve them apart from scaling them up. Inspired by CheckList (Ribeiro et al., 2020), we introduce a multi-view evaluation set for numerical reasoning in English, called FERMAT. Instead of reporting a single score on a whole dataset, FERMAT evaluates models on various key numerical reasoning aspects such as number understanding, mathematical operations, and training dependency. Apart from providing a comprehensive evaluation of models on different numerical reasoning aspects, FERMAT enables a systematic and automated generation of an arbitrarily large training or evaluation set for each aspect. The datasets and codes are publicly available to generate further multi-view data for ulterior tasks and languages. 2023.acl-long.838 @@ -12089,7 +12089,7 @@ Don’t Forget Your <fixed-case>ABC</fixed-case>’s: Evaluating the State-of-the-Art in Chat-Oriented Dialogue Systems Sarah E.FinchEmory University James D.FinchEmory University - Jinho D.ChoiEmory University + Jinho D.ChoiEmory University 15044-15071 Despite tremendous advancements in dialogue systems, stable evaluation still requires human judgments producing notoriously high-variance metrics due to their inherent subjectivity. Moreover, methods and labels in dialogue evaluation are not fully standardized, especially for open-domain chats, with a lack of work to compare and assess the validity of those approaches. The use of inconsistent evaluation can misinform the performance of a dialogue system, which becomes a major hurdle to enhance it. Thus, a dimensional evaluation of chat-oriented open-domain dialogue systems that reliably measures several aspects of dialogue capabilities is desired. This paper presents a novel human evaluation method to estimate the rates of many{pasted macro ‘LN’} dialogue system behaviors. Our method is used to evaluate four state-of-the-art open-domain dialogue systems and compared with existing approaches. The analysis demonstrates that our behavior method is more suitable than alternative Likert-style or comparative approaches for dimensional evaluation of these systems. 2023.acl-long.839 @@ -12119,7 +12119,7 @@ KaheerSulemanMicrosoft Research Montreal AdamTrischlerMicrosoft Research AlexandraOlteanuMicrosoft Research - Jackie Chi KitCheungMila / McGill University + Jackie Chi KitCheungMila / McGill University 15088-15108 Many state-of-the-art natural language understanding (NLU) models are based on pretrained neural language models. These models often make inferences using information from multiple sources. An important class of such inferences are those that require both background knowledge, presumably contained in a model’s pretrained parameters, and instance-specific information that is supplied at inference time. However, the integration and reasoning abilities of NLU models in the presence of multiple knowledge sources have been largely understudied. In this work, we propose a test suite of coreference resolution subtasks that require reasoning over multiple facts. These subtasks differ in terms of which knowledge sources contain the relevant facts. We also introduce subtasks where knowledge is present only at inference time using fictional knowledge. We evaluate state-of-the-art coreference resolution models on our dataset. Our results indicate that several models struggle to reason on-the-fly over knowledge observed both at pretrain time and at inference time. However, with task-specific training, a subset of models demonstrates the ability to integrate certain knowledge types from multiple sources. Still, even the best performing models seem to have difficulties with reliably integrating knowledge presented only at inference time. 2023.acl-long.841 @@ -12132,7 +12132,7 @@ MarcosTrevisoInstituto de Telecomunicacoes AlexisRossMassachusetts Institute of Technology Nuno M.GuerreiroInstituto de Telecomunicacoes, University of Lisbon - AndréMartinsUnbabel, Instituto de Telecomunicacoes + AndréMartinsUnbabel, Instituto de Telecomunicacoes 15109-15126 Selective rationales and counterfactual examples have emerged as two effective, complementary classes of interpretability methods for analyzing and training NLP models. However, prior work has not explored how these methods can be integrated to combine their complementary advantages. We overcome this limitation by introducing CREST (ContRastive Edits with Sparse raTionalization), a joint framework for selective rationalization and counterfactual text generation, and show that this framework leads to improvements in counterfactual quality, model robustness, and interpretability. First, CREST generates valid counterfactuals that are more natural than those produced by previous methods, and subsequently can be used for data augmentation at scale, reducing the need for human-generated examples. Second, we introduce a new loss function that leverages CREST counterfactuals to regularize selective rationales and show that this regularization improves both model robustness and rationale quality, compared to methods that do not leverage CREST counterfactuals. Our results demonstrate that CREST successfully bridges the gap between selective rationales and counterfactual examples, addressing the limitations of existing methods and providing a more comprehensive view of a model’s predictions. 2023.acl-long.842 @@ -12162,8 +12162,8 @@ BudhadityaDebMicrosoft Corporation MilagroTeruelMicrosoft Research AaronHalfakerMicrosoft - DragomirRadevYale University - Ahmed HassanAwadallahMicrosoft Research + DragomirRadevYale University + Ahmed HassanAwadallahMicrosoft Research 15144-15161 Despite the recent progress in language generation models, their outputs may not always meet user expectations. In this work, we study whether informational feedback in natural language can be leveraged to improve generation quality and user preference alignment. To this end, we consider factual consistency in summarization, the quality that the summary should only contain information supported by the input documents, as the user-expected preference. We collect a high-quality dataset, DeFacto, containing human demonstrations and informational natural language feedback consisting of corrective instructions, edited summaries, and explanations with respect to the factual consistency of the summary. Using our dataset, we study three natural language generation tasks: (1) editing a summary by following the human feedback, (2) generating human feedback for editing the original summary, and (3) revising the initial summary to correct factual errors by generating both the human feedback and edited summary. We show that DeFacto can provide factually consistent human-edited summaries and further insights into summarization factual consistency thanks to its informational natural language feedback. We further demonstrate that fine-tuned language models can leverage our dataset to improve the summary factual consistency, while large language models lack the zero-shot learning ability in our proposed tasks that require controllable text generation. 2023.acl-long.844 @@ -12231,7 +12231,7 @@ PengLiInstitute for AI Industry Research (AIR), Tsinghua University, China TaoLiMeituan Inc. MaosongSunTsinghua University - YangLiuTsinghua University + YangLiuTsinghua University 15233-15256 Recently, multi-aspect controllable text generation that controls the generated text in multiple aspects (e.g., sentiment, topic, and keywords) has attracted increasing attention. Although methods based on parameter efficient tuning like prefix-tuning could achieve multi-aspect controlling in a plug-and-play way, the mutual interference of multiple prefixes leads to significant degeneration of constraints and limits their extensibility to training-time unseen aspect combinations. In this work, we provide a theoretical lower bound for the interference and empirically found that the interference grows with the number of layers where prefixes are inserted. Based on these analyses, we propose using trainable gates to normalize the intervention of prefixes to restrain the growing interference. As a result, controlling training-time unseen combinations of aspects can be realized by simply concatenating corresponding plugins such that new constraints can be extended at a lower cost. In addition, we propose a unified way to process both categorical and free-form constraints. Experiments on text generation and machine translation demonstrate the superiority of our approach over baselines on constraint accuracy, text quality, and extensibility. 2023.acl-long.849 @@ -12271,7 +12271,7 @@ PengLiInstitute for AI Industry Research (AIR), Tsinghua University, China JinMaustc TingYaoTencent - YangLiuTsinghua University + YangLiuTsinghua University 15286-15304 In the real-world scenario, a longstanding goal of multilingual neural machine translation (MNMT) is that a single model can incrementally adapt to new language pairs without accessing previous training data. In this scenario, previous studies concentrate on overcoming catastrophic forgetting while lacking encouragement to learn new knowledge from incremental language pairs, especially when the incremental language is not related to the set of original languages. To better acquire new knowledge, we propose a knowledge transfer method that can efficiently adapt original MNMT models to diverse incremental language pairs. The method flexibly introduces the knowledge from an external model into original models, which encourages the models to learn new language pairs, completing the procedure of knowledge transfer. Moreover, all original parameters are frozen to ensure that translation qualities on original language pairs are not degraded. Experimental results show that our method can learn new knowledge from diverse language pairs incrementally meanwhile maintaining performance on original language pairs, outperforming various strong baselines in incremental learning for MNMT. 2023.acl-long.852 @@ -12286,7 +12286,7 @@ A. PastorLópez-Monroy Luis C.González David E.Losada - ManuelMontes-y-Gómez + ManuelMontes-y-Gómez 15305-15318 Mental disorders affect millions of people worldwide and cause interference with their thinking and behavior. Through the past years, awareness created by health campaigns and other sources motivated the study of these disorders using information extracted from social media platforms. In this work, we aim to contribute to the study of these disorders and to the understanding of how mental problems reflect on social media. To achieve this goal, we propose a double-domain adaptation of a language model. First, we adapted the model to social media language, and then, we adapted it to the mental health domain. In both steps, we incorporated a lexical resource to guide the masking process of the language model and, therefore, to help it in paying more attention to words related to mental disorders. We have evaluated our model in the detection of signs of three major mental disorders: Anorexia, Self-harm, and Depression. Results are encouraging as they show that the proposed adaptation enhances the classification performance and yields competitive results against state-of-the-art methods. 2023.acl-long.853 @@ -12296,8 +12296,8 @@ Toward Interactive Dictation - Belinda Z.LiMIT - JasonEisnerJohns Hopkins University + Microsoft Corporation + Belinda Z.LiMIT + JasonEisnerJohns Hopkins University + Microsoft Corporation AdamPaulsMicrosoft SamThomsonMicrosoft Semantic Machines 15319-15338 @@ -12314,7 +12314,7 @@ QiongTangFudan University HangYanFudan University YuanbinWuEast China Normal University - XuanjingHuangFudan University + XuanjingHuangFudan University XipengQiuFudan University 15339-15353 Large language models (LLMs) pre-trained on massive corpora have demonstrated impressive few-shot learning ability on many NLP tasks. A common practice is to recast the task into a text-to-text format such that generative LLMs of natural language (NL-LLMs) like GPT-3 can be prompted to solve it. However, it is nontrivial to perform information extraction (IE) tasks with NL-LLMs since the output of the IE task is usually structured and therefore is hard to be converted into plain text. In this paper, we propose to recast the structured output in the form of code instead of natural language and utilize generative LLMs of code (Code-LLMs) such as Codex to perform IE tasks, in particular, named entity recognition and relation extraction. In contrast to NL-LLMs, we show that Code-LLMs can be well-aligned with these IE tasks by designing code-style prompts and formulating these IE tasks as code generation tasks. Experiment results on seven benchmarks show that our method consistently outperforms fine-tuning moderate-size pre-trained models specially designed for IE tasks (e.g., UIE) and prompting NL-LLMs under few-shot settings. We further conduct a series of in-depth analyses to demonstrate the merits of leveraging Code-LLMs for IE tasks. @@ -12342,7 +12342,7 @@ Bridging The Gap: Entailment Fused-T5 for Open-retrieval Conversational Machine Reading Comprehension XiaoZhangBeijing Institute of Technology - HeyanHuangBeijing Institute of Technology + HeyanHuangBeijing Institute of Technology ZewenChiBeijing Institute of Technology Xian-LingMaoBeijing Institute of Technology 15374-15386 @@ -12426,7 +12426,7 @@ Human Inspired Progressive Alignment and Comparative Learning for Grounded Word Acquisition YuweiBaoUniversity of Michigan BarrettLattimerUniversity of Michigan - JoyceChaiUniversity of Michigan + JoyceChaiUniversity of Michigan 15475-15493 Human language acquisition is an efficient, supervised, and continual process. In this work, we took inspiration from how human babies acquire their first language, and developed a computational process for word acquisition through comparative learning. Motivated by cognitive findings, we generated a small dataset that enables the computation models to compare the similarities and differences of various attributes, learn to filter out and extract the common information for each shared linguistic label. We frame the acquisition of words as not only the information filtration process, but also as representation-symbol mapping. This procedure does not involve a fixed vocabulary size, nor a discriminative objective, and allows the models to continually learn more concepts efficiently. Our results in controlled experiments have shown the potential of this approach for efficient continual learning of grounded words. 2023.acl-long.863 @@ -12451,7 +12451,7 @@ NicolasGarneauUniversite Laval CatalinaGoantaUtrecht University DanielKatzIllinois Tech - Chicago Kent College of Law - AndersSøgaardUniversity of Copenhagen + AndersSøgaardUniversity of Copenhagen 15513-15535 In this work, we conduct a detailed analysis on the performance of legal-oriented pre-trained language models (PLMs). We examine the interplay between their original objective, acquired knowledge, and legal language understanding capacities which we define as the upstream, probing, and downstream performance, respectively. We consider not only the models’ size but also the pre-training corpora used as important dimensions in our study. To this end, we release a multinational English legal corpus (LeXFiles) and a legal knowledge probing benchmark (LegalLAMA) to facilitate training and detailed analysis of legal-oriented PLMs. We release two new legal PLMs trained on LeXFiles and evaluate them alongside others on LegalLAMA and LexGLUE. We find that probing performance strongly correlates with upstream performance in related legal topics. On the other hand, downstream performance is mainly driven by the model’s size and prior legal knowledge which can be estimated by upstream and probing performance. Based on these findings, we can conclude that both dimensions are important for those seeking the development of domain-specific PLMs. 2023.acl-long.865 @@ -12479,7 +12479,7 @@ KaiMeiRutgers University ZhengLiCISPA Helmholtz Center for Information Security ZhentingWangRutgers University - YangZhangCISPA Helmholtz Center for Information Security + YangZhangCISPA Helmholtz Center for Information Security ShiqingMaRutgers University 15551-15565 Prompt-based learning is vulnerable to backdoor attacks. Existing backdoor attacks against prompt-based models consider injecting backdoors into the entire embedding layers or word embedding vectors. Such attacks can be easily affected by retraining on downstream tasks and with different prompting strategies, limiting the transferability of backdoor attacks. In this work, we propose transferable backdoor attacks against prompt-based models, called NOTABLE, which is independent of downstream tasks and prompting strategies. Specifically, NOTABLE injects backdoors into the encoders of PLMs by utilizing an adaptive verbalizer to bind triggers to specific words (i.e., anchors). It activates the backdoor by pasting input with triggers to reach adversary-desired anchors, achieving independence from downstream tasks and prompting strategies. We conduct experiments on six NLP tasks, three popular models, and three prompting strategies. Empirical results show that NOTABLE achieves superior attack performance (i.e., attack success rate over 90% on all the datasets), and outperforms two state-of-the-art baselines. Evaluations on three defenses show the robustness of NOTABLE. Our code can be found at https://github.com/RU-System-Software-and-Security/Notable. @@ -12492,7 +12492,7 @@ Revisiting Relation Extraction in the era of Large Language Models SominWadhwaNortheastern University SilvioAmirNortheastern University - ByronWallaceNortheastern University + ByronWallaceNortheastern University 15566-15589 Relation extraction (RE) is the core NLP task of inferring semantic relationships between entities from text. Standard supervised RE techniques entail training modules to tag tokens comprising entity spans and then predict the relationship between them. Recent work has instead treated the problem as a sequence-to-sequence task, linearizing relations between entities as target strings to be generated conditioned on the input. Here we push the limits of this approach, using larger language models (GPT-3 and Flan-T5 large) than considered in prior work and evaluating their performance on standard RE tasks under varying levels of supervision. We address issues inherent to evaluating generative approaches to RE by doing human evaluations, in lieu of relying on exact matching. Under this refined evaluation, we find that: (1) Few-shot prompting with GPT-3 achieves near SOTA performance, i.e., roughly equivalent to existing fully supervised models; (2) Flan-T5 is not as capable in the few-shot setting, but supervising and fine-tuning it with Chain-of-Thought (CoT) style explanations (generated via GPT-3) yields SOTA results. We release this model as a new baseline for RE tasks. 2023.acl-long.868 @@ -12532,7 +12532,7 @@ HaolinChenIdiap Research Institute FrancoisMarelliIdiap Research Institute FrancoisFleuretUniversity of Geneva - JamesHendersonIdiap Research Institute + JamesHendersonIdiap Research Institute 15632-15654 Transformer-based architectures are the model of choice for natural language understanding, but they come at a significant cost, as they have quadratic complexity in the input length, require a lot of training data, and can be difficult to tune. In the pursuit of lower costs, we investigate simple MLP-based architectures. We find that existing architectures such as MLPMixer, which achieves token mixing through a static MLP applied to each feature independently, are too detached from the inductive biases required for natural language understanding. In this paper, we propose a simple variant, HyperMixer, which forms the token mixing MLP dynamically using hypernetworks. Empirically, we demonstrate that our model performs better than alternative MLP-based models, and on par with Transformers. In contrast to Transformers, HyperMixer achieves these results at substantially lower costs in terms of processing time, training data, and hyperparameter tuning. 2023.acl-long.871 @@ -12572,7 +12572,7 @@ Annotation-Inspired Implicit Discourse Relation Classification with Auxiliary Discourse Connective Generation - WeiLiuHeidelberg Institute for Theoretical Studies + WeiLiuHeidelberg Institute for Theoretical Studies MichaelStrubeHeidelberg Institute for Theoretical Studies 15696-15712 Implicit discourse relation classification is a challenging task due to the absence of discourse connectives. To overcome this issue, we design an end-to-end neural model to explicitly generate discourse connectives for the task, inspired by the annotation process of PDTB. Specifically, our model jointly learns to generate discourse connectives between arguments and predict discourse relations based on the arguments and the generated connectives. To prevent our relation classifier from being misled by poor connectives generated at the early stage of training while alleviating the discrepancy between training and inference, we adopt Scheduled Sampling to the joint learning. We evaluate our method on three benchmarks, PDTB 2.0, PDTB 3.0, and PCC. Results show that our joint model significantly outperforms various baselines on three datasets, demonstrating its superiority for the task. @@ -12615,7 +12615,7 @@ Two-Stage Fine-Tuning for Improved Bias and Variance for Large Pretrained Language Models LijingWangNew Jersey Institute of Technology YingyaLiHarvard Medical School and Boston Children’s Hospital - TimothyMillerBoston Children’s Hospital and Harvard Medical School + TimothyMillerBoston Children’s Hospital and Harvard Medical School StevenBethardUniversity of Arizona GuerganaSavovaBoston Children’s Hospital and Harvard Medical School 15746-15761 @@ -12648,7 +12648,7 @@ XiangLiAmazon PuyangXuMobvoi SunghyunParkAmazon Alexa AI - AliceOhKAIST + AliceOhKAIST 15783-15798 Unsupervised sentence representation learning has progressed through contrastive learning and data augmentation methods such as dropout masking. Despite this progress, sentence encoders are still limited to using only an input sentence when predicting its semantic vector. In this work, we show that the semantic meaning of a sentence is also determined by nearest-neighbor sentences that are similar to the input sentence. Based on this finding, we propose a novel unsupervised sentence encoder, RankEncoder. RankEncoder predicts the semantic vector of an input sentence by leveraging its relationship with other sentences in an external corpus, as well as the input sentence itself. We evaluate RankEncoder on semantic textual benchmark datasets. From the experimental results, we verify that 1) RankEncoder achieves 80.07% Spearman’s correlation, a 1.1% absolute improvement compared to the previous state-of-the-art performance, 2) RankEncoder is universally applicable to existing unsupervised sentence embedding methods, and 3) RankEncoder is specifically effective for predicting the similarity scores of similar sentence pairs. 2023.acl-long.879 @@ -12763,7 +12763,7 @@ JingjingXuShanghai AI Lab ShujianHuangNational Key Laboratory for Novel Software Technology, Nanjing University LingpengKongThe University of Hong Kong - JiajunChenNanjing University + JiajunChenNanjing University 15948-15959 Neural machine translation has achieved promising results on many translation tasks. However, previous studies have shown that neural models induce a non-smooth representation space, which harms its generalization results. Recently, kNN-MT has provided an effective paradigm to smooth the prediction based on neighbor representations during inference. Despite promising results, kNN-MT usually requires large inference overhead. We propose an effective training framework INK to directly smooth the representation space via adjusting representations of kNN neighbors with a small number of new parameters. The new parameters are then used to refresh the whole representation datastore to get new kNN knowledge asynchronously. This loop keeps running until convergence. Experiments on four benchmark datasets show that INK achieves average gains of 1.99 COMET and 1.0 BLEU, outperforming the state-of-the-art kNN-MT system with 0.02x memory space and 1.9x inference speedup. 2023.acl-long.888 @@ -12777,7 +12777,7 @@ KunHuangNanjing University of Science and Technology XiaocuiYangSchool of Computer Science and Engineering, Northeastern University, PengfeiHongSingapore University of Technology and Design - KunZhangNanjing University of Science and Technology + KunZhangNanjing University of Science and Technology SoujanyaPoriaSingapore University of Technology and Design 15960-15973 Document-level relation extraction (DocRE) aims to infer complex semantic relations among entities in a document. Distant supervision (DS) is able to generate massive auto-labeled data, which can improve DocRE performance. Recent works leverage pseudo labels generated by the pre-denoising model to reduce noise in DS data. However, unreliable pseudo labels bring new noise, e.g., adding false pseudo labels and losing correct DS labels. Therefore, how to select effective pseudo labels to denoise DS data is still a challenge in document-level distant relation extraction. To tackle this issue, we introduce uncertainty estimation technology to determine whether pseudo labels can be trusted. In this work, we propose a Document-level distant Relation Extraction framework with Uncertainty Guided label denoising, UGDRE. Specifically, we propose a novel instance-level uncertainty estimation method, which measures the reliability of the pseudo labels with overlapping relations. By further considering the long-tail problem, we design dynamic uncertainty thresholds for different types of relations to filter high-uncertainty pseudo labels. We conduct experiments on two public datasets. Our framework outperforms strong baselines by 1.91 F1 and 2.28 Ign F1 on the RE-DocRED dataset. @@ -12811,7 +12811,7 @@ Zheng XinYongBrown University HaileySchoelkopfEleutherAI XiangruTangYale University - DragomirRadevYale University + DragomirRadevYale University Alham FikriAjiMBZUAI KhalidAlmubarakPrince Sattam bin Abdulaziz University SamuelAlbanieUniversity of Cambridge @@ -12859,7 +12859,7 @@ ZhiguoWangAWS AI Labs BonanMinAmazon AWS AI Labs William YangWangAmazon AWS AI Labs - KathleenMcKeownColumbia University and Amazon (Amazon Scholar) + KathleenMcKeownColumbia University and Amazon (Amazon Scholar) VittorioCastelliAWS AI Labs DanRothUniversity of Pennsylvania BingXiangAmazon @@ -12891,7 +12891,7 @@ RichardDufourLS2N - Nantes University MickaelRouvierLIA - Avignon University EmmanuelMorinLS2N UMR CNRS 6004 - BéatriceDailleNantes Université- LS2N + BéatriceDailleNantes Université- LS2N Pierre-AntoineGourraudNantes Universite 16207-16221 In recent years, pre-trained language models (PLMs) achieve the best performance on a wide range of natural language processing (NLP) tasks. While the first models were trained on general domain data, specialized ones have emerged to more effectively treat specific domains. In this paper, we propose an original study of PLMs in the medical domain on French language. We compare, for the first time, the performance of PLMs trained on both public data from the web and private data from healthcare establishments. We also evaluate different learning strategies on a set of biomedical tasks. In particular, we show that we can take advantage of already existing biomedical PLMs in a foreign language by further pre-train it on our targeted data. Finally, we release the first specialized PLMs for the biomedical field in French, called DrBERT, as well as the largest corpus of medical data under free license on which these models are trained. @@ -12903,7 +12903,7 @@ Discriminative Reasoning with Sparse Event Representation for Document-level Event-Event Relation Extraction ChangsenYuanBeijing Institute of Technology - HeyanHuangBeijing Institute of Technology + HeyanHuangBeijing Institute of Technology YixinCaoSingapore Management University YonggangWenNTU Singapore 16222-16234 @@ -12938,7 +12938,7 @@ VedanujGoswamiMeta AI ChanghanWangMeta - Fundamental AI Research (FAIR) JuanPinoFacebook - BenoîtSagotInria + BenoîtSagotInria HolgerSchwenkMeta AI Research 16251-16269 We present SpeechMatrix, a large-scale multilingual corpus of speech-to-speech translations mined from real speech of European Parliament recordings. It contains speech alignments in 136 language pairs with a total of 418 thousand hours of speech. To evaluate the quality of this parallel speech, we train bilingual speech-to-speech translation models on mined data only and establish extensive baseline results on EuroParl-ST, VoxPopuli and FLEURS test sets. Enabled by the multilinguality of SpeechMatrix, we also explore multilingual speech-to-speech translation, a topic which was addressed by few other works. We also demonstrate that model pre-training and sparse scaling using Mixture-of-Experts bring large gains to translation performance. The mined data and models will be publicly released @@ -13004,7 +13004,7 @@ NikitaNangiaNew York University Richard YuanzhePangNew York University JasonPhangNew York University - Samuel R.BowmanNew York University + Samuel R.BowmanNew York University 16334-16368 We present the results of the NLP Community Metasurvey. Run from May to June 2022, it elicited opinions on controversial issues, including industry influence in the field, concerns about AGI, and ethics. Our results put concrete numbers to several controversies: For example, respondents are split in half on the importance of artificial general intelligence, whether language models understand language, and the necessity of linguistic structure and inductive bias for solving NLP problems. In addition, the survey posed meta-questions, asking respondents to predict the distribution of survey responses. This allows us to uncover false sociological beliefs where the community’s predictions don’t match reality. Among other results, we find that the community greatly overestimates its own belief in the usefulness of benchmarks and the potential for scaling to solve real-world problems, while underestimating its belief in the importance of linguistic structure, inductive bias, and interdisciplinary science. 2023.acl-long.903 @@ -13045,7 +13045,7 @@ HaniehDeilamsalehyAdobe Research FranckDernoncourtAdobe Research HassanForooshUniversity of Central Florida - FeiLiuEmory University + FeiLiuEmory University 16409-16423 As the number of recorded meetings increases, it becomes increasingly important to utilize summarization technology to create useful summaries of these recordings. However, there is a crucial lack of annotated meeting corpora for developing this technology, as it can be hard to collect meetings, especially when the topics discussed are confidential. Furthermore, meeting summaries written by experienced writers are scarce, making it hard for abstractive summarizers to produce sensible output without a reliable reference. This lack of annotated corpora has hindered the development of meeting summarization technology. In this paper, we present MeetingBank, a new benchmark dataset of city council meetings over the past decade. MeetingBank is unique among other meeting corpora due to its divide-and-conquer approach, which involves dividing professionally written meeting minutes into shorter passages and aligning them with specific segments of the meeting. This breaks down the process of summarizing a lengthy meeting into smaller, more manageable tasks. The dataset provides a new testbed of various meeting summarization systems and also allows the public to gain insight into how council decisions are made. We make the collection, including meeting video links, transcripts, reference summaries, agenda, and other metadata, publicly available to facilitate the development of better meeting summarization techniques. 2023.acl-long.906 @@ -13121,7 +13121,7 @@ Proceedings of the 61st Annual Meeting of the Association for Computational Linguistics (Volume 2: Short Papers) AnnaRogers JordanBoyd-Graber - NaoakiOkazaki + NaoakiOkazaki Association for Computational Linguistics
Toronto, Canada
July @@ -13135,7 +13135,7 @@ Should you marginalize over possible tokenizations? NadezhdaChirkovaNaver Labs Europe - GermánKruszewskiNaver Labs Europe + GermánKruszewskiNaver Labs Europe JosRozenNAVER LABS Europe MarcDymetmanIndependent researcher 1-12 @@ -13160,7 +13160,7 @@ Young MinKimCarnegie Mellon University KalvinChangCarnegie Mellon University ChenxuanCuiCarnegie Mellon University - David R.MortensenLanguage Technologies Institute, Carnegie Mellon University + David R.MortensenLanguage Technologies Institute, Carnegie Mellon University 24-38 Protoform reconstruction is the task of inferring what morphemes or words appeared like in the ancestral languages of a set of daughter languages. Meloni et al (2021) achieved the state-of-the-art on Latin protoform reconstruction with an RNN-based encoder-decoder with attention model. We update their model with the state-of-the-art seq2seq model: the Transformer. Our model outperforms their model on a suite of different metrics on two different datasets: their Romance data of 8,000 cognates spanning 5 languages and a Chinese dataset (Hou 2004) of 800+ cognates spanning 39 varieties. We also probe our model for potential phylogenetic signal contained in the model. Our code is publicly available at https://github.com/cmu-llab/acl-2023. 2023.acl-short.3 @@ -13219,12 +13219,12 @@ Tracing Linguistic Markers of Influence in a Large Online Organisation PrashantKhareQueen Mary University of London RaviShekharUniversity of Essex - Vanja MladenKaranQueen Mary University + Vanja MladenKaranQueen Mary University StephenMcQuistinUniversity of Glasgow ColinPerkinsUniversity of Glasgow IgnacioCastroQueen Mary University of London GarethTysonQMUL - PatrickHealeyQueen Mary, University of London + PatrickHealeyQueen Mary, University of London MatthewPurverQueen Mary University of London 82-90 Social science and psycholinguistic research have shown that power and status affect how people use language in a range of domains. Here, we investigate a similar question in a large, distributed, consensus-driven community with little traditional power hierarchy – the Internet Engineering Task Force (IETF), a collaborative organisation that designs internet standards. Our analysis based on lexical categories (LIWC) and BERT, shows that participants’ levels of influence can be predicted from their email text, and identify key linguistic differences (e.g., certain LIWC categories, such as “WE” are positively correlated with high-influence). We also identify the differences in language use for the same person before and after becoming influential. @@ -13280,7 +13280,7 @@ AruMaekawaTokyo Institute of Technology NaokiKobayashiLegalOnTechnologies KotaroFunakoshiTokyo Institute of Technology - ManabuOkumuraTokyo Institute of Technology + ManabuOkumuraTokyo Institute of Technology 119-127 Dataset distillation aims to create a small dataset of informative synthetic samples to rapidly train neural networks that retain the performance of the original dataset. In this paper, we focus on constructing distilled few-shot datasets for natural language processing (NLP) tasks to fine-tune pre-trained transformers. Specifically, we propose to introduce attention labels, which can efficiently distill the knowledge from the original dataset and transfer it to the transformer models via attention probabilities. We evaluated our dataset distillation methods in four various NLP tasks and demonstrated that it is possible to create distilled few-shot datasets with the attention labels, yielding impressive performances for fine-tuning BERT. Specifically, in AGNews, a four-class news classification task, our distilled few-shot dataset achieved up to 93.2% accuracy, which is 98.5% performance of the original dataset even with only one sample per class and only one gradient step. 2023.acl-short.12 @@ -13292,8 +13292,8 @@ Multi-Document Summarization with Centroid-Based Pretraining Ratish SurendranPuduppullyA-Star Research Entities ParagJainUniversity of Edinburgh - NancyChenInstitute for Infocomm Research, A*STAR - MarkSteedmanUniversity of Edinburgh + NancyChenInstitute for Infocomm Research, A*STAR + MarkSteedmanUniversity of Edinburgh 128-138 In Multi-Document Summarization (MDS), the input can be modeled as a set of documents, and the output is its summary. In this paper, we focus on pretraining objectives for MDS. Specifically, we introduce a novel pretraining objective, which involves selecting the ROUGE-based centroid of each document cluster as a proxy for its summary. Our objective thus does not require human written summaries and can be utilized for pretraining on a dataset consisting solely of document sets. Through zero-shot, few-shot, and fully supervised experiments on multiple MDS datasets, we show that our model Centrum is better or comparable to a state-of-the-art model. We make the pretrained and fine-tuned models freely available to the research community https://github.com/ratishsp/centrum. 2023.acl-short.13 @@ -13330,7 +13330,7 @@ <fixed-case>H</fixed-case>i<fixed-case>P</fixed-case>ool: Modeling Long Documents Using Graph Neural Networks IreneLiUniversity of Tokyo AosongFengYale University - DragomirRadevYale University + DragomirRadevYale University RexYingYale University 161-171 Encoding long sequences in Natural Language Processing (NLP) is a challenging problem. Though recent pretraining language models achieve satisfying performances in many NLP tasks, they are still restricted by a pre-defined maximum length, making them challenging to be extended to longer sequences. So some recent works utilize hierarchies to model long sequences. However, most of them apply sequential models for upper hierarchies, suffering from long dependency issues. In this paper, we alleviate these issues through a graph-based method. We first chunk the sequence with a fixed length to model the sentence-level information. We then leverage graphs to model intra- and cross-sentence correlations with a new attention mechanism. Additionally, due to limited standard benchmarks for long document classification (LDC), we propose a new challenging benchmark, totaling six datasets with up to 53k samples and 4034 average tokens’ length. Evaluation shows our model surpasses competitive baselines by 2.6% in F1 score, and 4.8% on the longest sequence dataset. Our method is shown to outperform hierarchical sequential models with better performance and scalability, especially for longer sequences. @@ -13344,7 +13344,7 @@ MichaelYoderCarnegie Mellon University AhmadDiabUniversity of Pittsburgh DavidBrownCarnegie Mellon University - KathleenCarleyCarnegie Mellon University, Netanomics + KathleenCarleyCarnegie Mellon University, Netanomics 172-185 We present a dataset and classifier for detecting the language of white supremacist extremism, a growing issue in online hate speech. Our weakly supervised classifier is trained on large datasets of text from explicitly white supremacist domains paired with neutral and anti-racist data from similar domains. We demonstrate that this approach improves generalization performance to new domains. Incorporating anti-racist texts as counterexamples to white supremacist language mitigates bias. 2023.acl-short.17 @@ -13483,7 +13483,7 @@ ZhijiangGuoUniversity of Cambridge ZhiyangTengNanyang Technological University IrwinKingThe Chinese University of Hong Kong - Philip S.YuUniversity of Illinois at Chicago + Philip S.YuUniversity of Illinois at Chicago 303-311 Multimodal relation extraction (MRE) is the task of identifying the semantic relationships between two entities based on the context of the sentence image pair. Existing retrieval-augmented approaches mainly focused on modeling the retrieved textual knowledge, but this may not be able to accurately identify complex relations. To improve the prediction, this research proposes to retrieve textual and visual evidence based on the object, sentence, and whole image. We further develop a novel approach to synthesize the object-level, image-level, and sentence-level information for better reasoning between the same and different modalities. Extensive experiments and analyses show that the proposed method is able to effectively select and compare evidence across modalities and significantly outperforms state-of-the-art models. 2023.acl-short.27 @@ -13535,7 +13535,7 @@ <fixed-case>PLUE</fixed-case>: Language Understanding Evaluation Benchmark for Privacy Policies in <fixed-case>E</fixed-case>nglish JianfengChiMeta AI - Wasi UddinAhmadAWS AI Labs + Wasi UddinAhmadAWS AI Labs YuanTianUniversity of California, Los Angeles Kai-WeiChangUCLA 352-365 @@ -13612,7 +13612,7 @@ Credible without Credit: Domain Experts Assess Generative Language Models DenisPeskoffPrinceton University - BrandonStewartPrinceton University + BrandonStewartPrinceton University 427-438 Language models have recently broken into the public consciousness with the release of the wildly popular ChatGPT. Commentators have argued that language models could replace search engines, make college essays obsolete, or even write academic research papers. All of these tasks rely on accuracy of specialized information which can be difficult to assess for non-experts. Using 10 domain experts across science and culture, we provide an initial assessment of the coherence, conciseness, accuracy, and sourcing of two language models across 100 expert-written questions. While we find the results are consistently cohesive and concise, we find that they are mixed in their accuracy. These results raise questions of the role language models should play in general-purpose and expert knowledge seeking. 2023.acl-short.37 @@ -13625,7 +13625,7 @@ ShikharMurtyStanford University PratyushaSharmaMIT JacobAndreasMIT - ChristopherManningStanford University + ChristopherManningStanford University 439-448 For humans, language production and comprehension is sensitive to the hierarchical structure of sentences. In natural language processing, past work has questioned how effectively neural sequence models like transformers capture this hierarchical structure when generalizing to structurally novel inputs. We show that transformer language models can learn to generalize hierarchically after training for extremely long periods—far beyond the point when in-domain accuracy has saturated. We call this phenomenon structural grokking. On multiple datasets, structural grokking exhibits inverted U-shaped scaling in model depth: intermediate-depth models generalize better than both very deep and very shallow transformers. When analyzing the relationship between model-internal properties and grokking, we find that optimal depth for grokking can be identified using the tree-structuredness metric of CITATION. Overall, our work provides strong evidence that, with extended training, vanilla transformers discover and use hierarchical structure. 2023.acl-short.38 @@ -13704,7 +13704,7 @@ FranciscoValentiniICC (UBA - CONICET); Maestría en Data Mining (UBA) GermánRosatiCONICET / UNSAM DamiánBlasiHarvard University and Max Planck Institute for the Science of Human History - DiegoFernandez SlezakUniversidad de Buenos Aires + DiegoFernandez SlezakUniversidad de Buenos Aires EdgarAltszylerInstituto de Investigación en Ciencias de La Computación (UBA-CONICET); GetGloby 509-520 In recent years, word embeddings have been widely used to measure biases in texts. Even if they have proven to be effective in detecting a wide variety of biases, metrics based on word embeddings lack transparency and interpretability. We analyze an alternative PMI-based metric to quantify biases in texts. It can be expressed as a function of conditional probabilities, which provides a simple interpretation in terms of word co-occurrences. We also prove that it can be approximated by an odds ratio, which allows estimating confidence intervals and statistical significance of textual biases. This approach produces similar results to metrics based on word embeddings when capturing gender gaps of the real world embedded in large corpora. @@ -13758,7 +13758,7 @@ ChenkaiSunUniversity of Illinois at Urbana-Champaign JinningLiUniversity of Illinois at Urbana-Champaign Hou PongChanUniversity of Macau - ChengXiangZhaiUniversity of Illinois at Urbana-Champaign + ChengXiangZhaiUniversity of Illinois at Urbana-Champaign HengJiUniversity of Illinois at Urbana-Champaign and Amazon (Amazon Scholar) 554-562 Predicting how a user responds to news events enables important applications such as allowing intelligent agents or content producers to estimate the effect on different communities and revise unreleased messages to prevent unexpected bad outcomes such as social conflict and moral injury. We present a new task, Response Forecasting on Personas for News Media, to estimate the response a persona (characterizing an individual or a group) might have upon seeing a news message. Compared to the previous efforts which only predict generic comments to news, the proposed task not only introduces personalization in the modeling but also predicts the sentiment polarity and intensity of each response. This enables more accurate and comprehensive inference on the mental state of the persona. Meanwhile, the generated sentiment dimensions make the evaluation and application more reliable. We create the first benchmark dataset, which consists of 13,357 responses to 3,847 news headlines from Twitter. We further evaluate the SOTA neural language models with our dataset. The empirical results suggest that the included persona attributes are helpful for the performance of all response dimensions. Our analysis shows that the best-performing models are capable of predicting responses that are consistent with the personas, and as a byproduct, the task formulation also enables many interesting applications in the analysis of social network groups and their opinions, such as the discovery of extreme opinion groups. @@ -13818,7 +13818,7 @@ Probing Physical Reasoning with Counter-Commonsense Context KazushiKondoThe University of Tokyo SakuSugawaraNational Institute of Informatics - AkikoAizawaNational Institute of Informatics + AkikoAizawaNational Institute of Informatics 603-612 In this study, we create a CConS (Counter-commonsense Contextual Size comparison) dataset to investigate how physical commonsense affects the contextualized size comparison task; the proposed dataset consists of both contexts that fit physical commonsense and those that do not. This dataset tests the ability of language models to predict the size relationship between objects under various contexts generated from our curated noun list and templates. We measure the ability of several masked language models and encoder-decoder models. The results show that while large language models can use prepositions such as “in” and “into” in the provided context to infer size relationships, they fail to use verbs and thus make incorrect judgments led by their prior physical commonsense. 2023.acl-short.53 @@ -13844,7 +13844,7 @@ WenjuanHanBeijing Jiaotong University HuiDiToshiba (China) Co., Ltd. YufengChenBeijing Jiaotong University - JinanXuBeijing Jiaotong University + JinanXuBeijing Jiaotong University 623-636 Traditional machine translation evaluation relies on reference written by humans. While reference-free evaluation gets rid of the constraints of labor-intensive annotations, which can pivot easily to new domains and is more scalable. In this paper, we propose a reference-free evaluation approach that characterizes evaluation as two aspects: (1) fluency: how well the translated text conforms to normal human language usage; (2) faithfulness: how well the translated text reflects the source data. We further split the faithfulness into word-level and sentence-level. Extensive experiments spanning WMT18/19/21 Metrics segment-level daRR and MQM datasets demonstrate that our proposed reference-free approach, ReFreeEval, outperforms SOTA reference-fee metrics like YiSi-2. 2023.acl-short.55 @@ -13909,7 +13909,7 @@ MathiasMüllerUniversity of Zurich ZifanJiangUniversity of Zurich AmitMoryossefBar-Ilan university, University of Zurich - AnnetteRiosUniversity of Zurich + AnnetteRiosUniversity of Zurich SarahEblingUniversity of Zurich 682-693 Automatic sign language processing is gaining popularity in Natural Language Processing (NLP) research (Yin et al., 2021). In machine translation (MT) in particular, sign language translation based on glosses is a prominent approach. In this paper, we review recent works on neural gloss translation. We find that limitations of glosses in general and limitations of specific datasets are not discussed in a transparent manner and that there is no common standard for evaluation. To address these issues, we put forward concrete recommendations for future research on gloss translation. Our suggestions advocate awareness of the inherent limitations of gloss-based approaches, realistic datasets, stronger baselines and convincing evaluation. @@ -13996,7 +13996,7 @@ AustinSimmmonsRIT ParidhiKhandelwalRIT SaraRosenthalIBM Research - PreslavNakovMohamed bin Zayed University of Artificial Intelligence + PreslavNakovMohamed bin Zayed University of Artificial Intelligence 762-770 We present TBO, a new dataset for Target-based Offensive language identification. TBO contains post-level annotations regarding the harmfulness of an offensive post and token-level annotations comprising of the target and the offensive argument expression. Popular offensive language identification datasets for social media focus on annotation taxonomies only at the post level and more recently, some datasets have been released that feature only token-level annotations. TBO is an important resource that bridges the gap between post-level and token-level annotation datasets by introducing a single comprehensive unified annotation taxonomy. We use the TBO taxonomy to annotate post-level and token-level offensive language on English Twitter posts. We release an initial dataset of over 4,500 instances collected from Twitter and we carry out multiple experiments to compare the performance of different models trained and tested on TBO. 2023.acl-short.66 @@ -14042,7 +14042,7 @@ YasamanBoreshbanSharif University of Technology SalamKhalifaStony Brook University SeyedAbolghasemMirroshandelStony Brook University - OwenRambowStony Brook University + OwenRambowStony Brook University 793-803 Building a system for morphological processing is a challenging task in morphologically complex languages like Arabic. Although there are some deep learning based models that achieve successful results, these models rely on a large amount of annotated data. Building such datasets, specially for some of the lower-resource Arabic dialects, is very difficult, time-consuming, and expensive. In addition, some parts of the annotated data do not contain useful information for training machine learning models. Active learning strategies allow the learner algorithm to select the most informative samples for annotation. There has been little research that focuses on applying active learning for morphological inflection and morphophonological processing. In this paper, we have proposed a deep active learning method for this task. Our experiments on Egyptian Arabic show that with only about 30% of annotated data, we achieve the same results as does the state-of-the-art model on the whole dataset. 2023.acl-short.69 @@ -14065,7 +14065,7 @@ Bhasa-<fixed-case>A</fixed-case>bhijnaanam: Native-script and romanized Language Identification for 22 <fixed-case>I</fixed-case>ndic languages YashMadhaniIndian Institute of Technology Madras - Mitesh M.KhapraIndian Institute of Technology Madras + Mitesh M.KhapraIndian Institute of Technology Madras AnoopKunchukuttanMicrosoft AI and Research 816-826 We create publicly available language identification (LID) datasets and models in all 22 Indian languages listed in the Indian constitution in both native-script and romanized text. First, we create Bhasha-Abhijnaanam, a language identification test set for native-script as well as romanized text which spans all 22 Indic languages. We also train IndicLID, a language identifier for all the above-mentioned languages in both native and romanized script. For native-script text, it has better language coverage than existing LIDs and is competitive or better than other LIDs. IndicLID is the first LID for romanized text in Indian languages. Two major challenges for romanized text LID are the lack of training data and low-LID performance when languages are similar. We provide simple and effective solutions to these problems. In general, there has been limited work on romanized text in any language, and our findings are relevant to other languages that need romanized language identification. Our models are publicly available at https://github.com/AI4Bharat/IndicLID under open-source licenses. Our training and test sets are also publicly available at https://huggingface.co/datasets/ai4bharat/Bhasha-Abhijnaanam under open-source licenses. @@ -14193,13 +14193,13 @@ <fixed-case>C</fixed-case>hat<fixed-case>GPT</fixed-case> for Zero-shot Dialogue State Tracking: A Solution or an Opportunity? MichaelHeckHeinrich Heine University NurulLubisHeinrich Heine University - BenjaminRuppikHeinrich Heine University Düsseldorf + BenjaminRuppikHeinrich Heine University Düsseldorf RenatoVukovicHeinrich Heine University Düsseldorf ShutongFengHeinrich-Heine-Universität Düsseldorf ChristianGeishauserHeinrich Heine University Duesseldorf Hsien-chinLinHeinrich Heine University Carelvan NiekerkHeinrich Heine University - MilicaGasicHeinrich Heine University Duesseldorf + MilicaGasicHeinrich Heine University Duesseldorf 936-950 Recent research on dialog state tracking (DST) focuses on methods that allow few- and zero-shot transfer to new domains or schemas. However, performance gains heavily depend on aggressive data augmentation and fine-tuning of ever larger language model based architectures. In contrast, general purpose language models, trained on large amounts of diverse data, hold the promise of solving any kind of task without task-specific training. We present preliminary experimental results on the ChatGPT research preview, showing that ChatGPT achieves state-of-the-art performance in zero-shot DST. Despite our findings, we argue that properties inherent to general purpose models limit their ability to replace specialized systems. We further theorize that the in-context learning capabilities of such models will likely become powerful tools to support the development of dedicated dialog state trackers and enable dynamic methods. 2023.acl-short.81 @@ -14248,7 +14248,7 @@ <fixed-case>N</fixed-case>olly<fixed-case>S</fixed-case>enti: Leveraging Transfer Learning and Machine Translation for <fixed-case>N</fixed-case>igerian Movie Sentiment Classification IyanuoluwaShodeMontclair State University - David IfeoluwaAdelaniUniversity College London + David IfeoluwaAdelaniUniversity College London JIngPengMontclair State University AnnaFeldmanMontclair State University 986-998 @@ -14274,7 +14274,7 @@ An (unhelpful) guide to selecting the best <fixed-case>ASR</fixed-case> architecture for your under-resourced language RobertJimersonRochester institute of Technology ZoeyLiuDepartment of Linguistics, University of Florida - EmilyPrud’hommeauxBoston College + EmilyPrud’hommeauxBoston College 1008-1016 Advances in deep neural models for automatic speech recognition (ASR) have yielded dramatic improvements in ASR quality for resource-rich languages, with English ASR now achieving word error rates comparable to that of human transcribers. The vast majority of the world’s languages, however, lack the quantity of data necessary to approach this level of accuracy. In this paper we use four of the most popular ASR toolkits to train ASR models for eleven languages with limited ASR training resources: eleven widely spoken languages of Africa, Asia, and South America, one endangered language of Central America, and three critically endangered languages of North America. We find that no single architecture consistently outperforms any other. These differences in performance so far do not appear to be related to any particular feature of the datasets or characteristics of the languages. These findings have important implications for future research in ASR for under-resourced languages. ASR systems for languages with abundant existing media and available speakers may derive the most benefit simply by collecting large amounts of additional acoustic and textual training data. Communities using ASR to support endangered language documentation efforts, who cannot easily collect more data, might instead focus on exploring multiple architectures and hyperparameterizations to optimize performance within the constraints of their available data and resources. 2023.acl-short.87 @@ -14286,7 +14286,7 @@ The Ecological Fallacy in Annotation: Modeling Human Label Variation goes beyond Sociodemographics MatthiasOrlikowskiBielefeld University PaulRöttgerUniversity of Oxford - PhilippCimianoUniv. Bielefeld + PhilippCimianoUniv. Bielefeld DirkHovyBocconi University 1017-1029 Many NLP tasks exhibit human label variation, where different annotators give different labels to the same texts. This variation is known to depend, at least in part, on the sociodemographics of annotators. Recent research aims to model individual annotator behaviour rather than predicting aggregated labels, and we would expect that sociodemographic information is useful for these models. On the other hand, the ecological fallacy states that aggregate group behaviour, such as the behaviour of the average female annotator, does not necessarily explain individual behaviour. To account for sociodemographics in models of individual annotator behaviour, we introduce group-specific layers to multi-annotator models. In a series of experiments for toxic content detection, we find that explicitly accounting for sociodemographic attributes in this way does not significantly improve model performance. This result shows that individual annotation behaviour depends on much more than just sociodemographics. @@ -14311,7 +14311,7 @@ VikasRaunakMicrosoft ArulMenezesMicrosoft Translator MattPostMicrosoft - HanyHassanMicrosoft + HanyHassanMicrosoft 1041-1050 Large Language Models (LLMs) such as GPT-3 have emerged as general-purpose language models capable of addressing many natural language generation or understanding tasks. On the task of Machine Translation (MT), multiple works have investigated few-shot prompting mechanisms to elicit better translations from LLMs. However, there has been relatively little investigation on how such translations differ qualitatively from the translations generated by standard Neural Machine Translation (NMT) models. In this work, we investigate these differences in terms of the literalness of translations produced by the two systems. Using literalness measures involving word alignment and monotonicity, we find that translations out of English (E-X) from GPTs tend to be less literal, while exhibiting similar or better scores on MT quality metrics. We demonstrate that this finding is borne out in human evaluations as well. We then show that these differences are especially pronounced when translating sentences that contain idiomatic expressions. 2023.acl-short.90 @@ -14363,9 +14363,9 @@ RicardoReiUnbabel/INESC-ID Nuno M.GuerreiroInstituto de Telecomunicacoes, University of Lisbon MarcosTrevisoInstituto de Telecomunicacoes - LuisaCoheurINESC-ID/Instituto Superior Tecnico - AlonLavieUnbabel/Carnegie Mellon University - AndréMartinsUnbabel, Instituto de Telecomunicacoes + LuisaCoheurINESC-ID/Instituto Superior Tecnico + AlonLavieUnbabel/Carnegie Mellon University + AndréMartinsUnbabel, Instituto de Telecomunicacoes 1089-1105 Neural metrics for machine translation evaluation, such as COMET, exhibit significant improvements in their correlation with human judgments, as compared to traditional metrics based on lexical overlap, such as BLEU. Yet, neural metrics are, to a great extent, “black boxes” returning a single sentence-level score without transparency about the decision-making process. In this work, we develop and compare several neural explainability methods and demonstrate their effectiveness for interpreting state-of-the-art fine-tuned neural metrics. Our study reveals that these metrics leverage token-level information that can be directly attributed to translation errors, as assessed through comparison of token-level neural saliency maps with Multidimensional Quality Metrics (MQM) annotations and with synthetically-generated critical translation errors. To ease future research, we release our code at: https://github.com/Unbabel/COMET/tree/explainable-metrics 2023.acl-short.94 @@ -14394,7 +14394,7 @@ TianyuZhaorinna Co., Ltd. MakotoShingrinna Co., Ltd. KeiSawadarinna Co., Ltd. - ManabuOkumuraTokyo Institute of Technology + ManabuOkumuraTokyo Institute of Technology 1116-1127 In a controllable text generation dataset, there exist unannotated attributes that could provide irrelevant learning signals to models that use it for training and thus degrade their performance. We propose focused prefix tuning (FPT) to mitigate the problem and to enable the control to focus on the desired attribute. Experimental results show that FPT can achieve better control accuracy and text fluency than baseline models in single-attribute control tasks. In multi-attribute control tasks, FPT achieves comparable control accuracy with the state-of-the-art approach while keeping the flexibility to control new attributes without retraining existing models. 2023.acl-short.96 @@ -14451,7 +14451,7 @@ ZhimingMaoThe Chinese University of Hong Kong HuiminWangTencent YimingDuThe Chinese University of Hong Kong - Kam-FaiWongDepartment of Systems Engineering and Engineering Management, The Chinese University of Hong Kong, Hong Kong + Kam-FaiWongDepartment of Systems Engineering and Engineering Management, The Chinese University of Hong Kong, Hong Kong 1160-1170 Prior study has shown that pretrained language models (PLM) can boost the performance of text-based recommendation. In contrast to previous works that either use PLM to encode user history as a whole input text, or impose an additional aggregation network to fuse multi-turn history representations, we propose a unified local- and global-attention Transformer encoder to better model two-level contexts of user history. Moreover, conditioned on user history encoded by Transformer encoders, our framework leverages Transformer decoders to estimate the language perplexity of candidate text items, which can serve as a straightforward yet significant contrastive signal for user-item text matching. Based on this, our framework, UniTRec, unifies the contrastive objectives of discriminative matching scores and candidate text perplexity to jointly enhance text-based recommendation. Extensive evaluation shows that UniTRec delivers SOTA performance on three text-based recommendation tasks. 2023.acl-short.100 @@ -14466,7 +14466,7 @@ QianLiuSea AI Lab LidongBingAlibaba DAMO Academy FeiLiWuhan University - Tat-SengChuaNational University of Singapore + Tat-SengChuaNational University of Singapore 1171-1182 While sentiment analysis systems try to determine the sentiment polarities of given targets based on the key opinion expressions in input texts, in implicit sentiment analysis (ISA) the opinion cues come in an implicit and obscure manner. Thus detecting implicit sentiment requires the common-sense and multi-hop reasoning ability to infer the latent intent of opinion. Inspired by the recent chain-of-thought (CoT) idea, in this work we introduce a Three-hop Reasoning (THOR) CoT framework to mimic the human-like reasoning process for ISA. We design a three-step prompting principle for THOR to step-by-step induce the implicit aspect, opinion, and finally the sentiment polarity. Our THOR+Flan-T5 (11B) pushes the state-of-the-art (SoTA) by over 6% F1 on supervised setup. More strikingly, THOR+GPT3 (175B) boosts the SoTA by over 50% F1 on zero-shot setting. 2023.acl-short.101 @@ -14479,7 +14479,7 @@ Ta-ChungChicarnegie mellon university Ting-HanFanPrinceton University Li-WeiChenCarnegie Mellon University - AlexanderRudnickyCarnegie Mellon University + AlexanderRudnickyCarnegie Mellon University PeterRamadgePrinceton University 1183-1193 The use of positional embeddings in transformer language models is widely accepted. However, recent research has called into question the necessity of such embeddings. We further extend this inquiry by demonstrating that a randomly initialized and frozen transformer language model, devoid of positional embeddings, inherently encodes strong positional information through the shrinkage of self-attention variance. To quantify this variance, we derive the underlying distribution of each step within a transformer layer. Through empirical validation using a fully pretrained model, we show that the variance shrinkage effect still persists after extensive gradient updates. Our findings serve to justify the decision to discard positional embeddings and thus facilitate more efficient pretraining of transformer language models. @@ -14502,7 +14502,7 @@ Class based Influence Functions for Error Detection ThangNguyen-DucFPT Software AI Center HoangThanh-TungFPT Software AI Center - Quan HungTranAdobe Research + Quan HungTranAdobe Research DangHuu-TienFPT Software AI Center HieuNguyenFPT Software AI Center AnhT. V. DauFPT Software AI Center @@ -14675,8 +14675,8 @@ Text-to-<fixed-case>SQL</fixed-case> Error Correction with Language Models of Code ZiruChenOhio State University ShijieChenThe Ohio State University - MichaelWhiteThe Ohio State University - RaymondMooneyUniversity of Texas at Austin + MichaelWhiteThe Ohio State University + RaymondMooneyUniversity of Texas at Austin AliPayaniCisco JayanthSrinivasaCisco Systems Inc YuSuThe Ohio State University @@ -14708,9 +14708,9 @@ ChantalShaibNortheastern University MillicentLiNortheastern University SebastianJosephUniversity of Texas at Austin - IainMarshallKing’s College London + IainMarshallKing’s College London Junyi JessyLiUniversity of Texas at Austin - ByronWallaceNortheastern University + ByronWallaceNortheastern University 1387-1407 Large language models, particularly GPT-3, are able to produce high quality summaries ofgeneral domain news articles in few- and zero-shot settings. However, it is unclear if such models are similarly capable in more specialized domains such as biomedicine. In this paper we enlist domain experts (individuals with medical training) to evaluate summaries of biomedical articles generated by GPT-3, given no supervision. We consider bothsingle- and multi-document settings. In the former, GPT-3 is tasked with generating regular and plain-language summaries of articles describing randomized controlled trials; in thelatter, we assess the degree to which GPT-3 is able to synthesize evidence reported acrossa collection of articles. We design an annotation scheme for evaluating model outputs, withan emphasis on assessing the factual accuracy of generated summaries. We find that whileGPT-3 is able to summarize and simplify single biomedical articles faithfully, it strugglesto provide accurate aggregations of findings over multiple documents. We release all data,code, and annotations used in this work. 2023.acl-short.119 @@ -14798,8 +14798,8 @@ XingNiuAmazon AI BenjaminHsuAmazon AnnaCurreyAWS AI Labs - GeorgianaDinuAmazon AWS - MariaNadejdeAWS AI Labs + GeorgianaDinuAmazon AWS + MariaNadejdeAWS AI Labs 1476-1490 Attribute-controlled translation (ACT) is a subtask of machine translation that involves controlling stylistic or linguistic attributes (like formality and gender) of translation outputs. While ACT has garnered attention in recent years due to its usefulness in real-world applications, progress in the task is currently limited by dataset availability, since most prior approaches rely on supervised methods. To address this limitation, we propose Retrieval and Attribute-Marking enhanced Prompting (RAMP), which leverages large multilingual language models to perform ACT in few-shot and zero-shot settings. RAMP improves generation accuracy over the standard prompting approach by (1) incorporating a semantic similarity retrieval component for selecting similar in-context examples, and (2) marking in-context examples with attribute annotations. Our comprehensive experiments show that RAMP is a viable approach in both zero-shot and few-shot settings. 2023.acl-short.126 @@ -14810,7 +14810,7 @@ Zero-Shot and Few-Shot Stance Detection on Varied Topics via Conditional Generation HaoyangWenCarnegie Mellon University - AlexanderHauptmannCarnegie Mellon University + AlexanderHauptmannCarnegie Mellon University 1491-1499 Zero-shot and few-shot stance detection identify the polarity of text with regard to a certain target when we have only limited or no training resources for the target. Previous work generally formulates the problem into a classification setting, ignoring the potential use of label text. In this paper, we instead utilize a conditional generation framework and formulate the problem as denoising from partially-filled templates, which can better utilize the semantics among input, label, and target texts. We further propose to jointly train an auxiliary task, target prediction, and to incorporate manually constructed incorrect samples with unlikelihood training to improve the representations for both target and label texts. We also verify the effectiveness of target-related Wikipedia knowledge with the generation framework. Experiments show that our proposed method significantly outperforms several strong baselines on VAST, and achieves new state-of-the-art performance. 2023.acl-short.127 @@ -14825,7 +14825,7 @@ VasudhaVaradarajanStony Brook University JohannesEichstaedtStanford University AdithyaV GanesanStony Brook University - H. AndrewSchwartzStony Brook University + H. AndrewSchwartzStony Brook University 1500-1511 Anxiety disorders are the most common of mental illnesses, but relatively little is known about how to detect them from language. The primary clinical manifestation of anxiety is worry associated cognitive distortions, which are likely expressed at the discourse-level of semantics. Here, we investigate the development of a modern linguistic assessment for degree of anxiety, specifically evaluating the utility of discourse-level information in addition to lexical-level large language model embeddings. We find that a combined lexico-discourse model outperforms models based solely on state-of-the-art contextual embeddings (RoBERTa), with discourse-level representations derived from Sentence-BERT and DiscRE both providing additional predictive power not captured by lexical-level representations. Interpreting the model, we find that discourse patterns of causal explanations, among others, were used significantly more by those scoring high in anxiety, dovetailing with psychological literature. 2023.acl-short.128 @@ -14922,7 +14922,7 @@ PierluigiCassottiUniversità degli studi di Bari LuciaSicilianiUniversity of Bari Aldo Moro MarcoDeGemmisUniversity of Bari - GiovanniSemeraroUniversity of Bari “Aldo Moro” + GiovanniSemeraroUniversity of Bari “Aldo Moro” PierpaoloBasileDepartment of Computer Science, University of Bari Aldo Moro 1577-1585 The recent introduction of large-scale datasets for the WiC (Word in Context) task enables the creation of more reliable and meaningful contextualized word embeddings.However, most of the approaches to the WiC task use cross-encoders, which prevent the possibility of deriving comparable word embeddings.In this work, we introduce XL-LEXEME, a Lexical Semantic Change Detection model.XL-LEXEME extends SBERT, highlighting the target word in the sentence. We evaluate XL-LEXEME on the multilingual benchmarks for SemEval-2020 Task 1 - Lexical Semantic Change (LSC) Detection and the RuShiftEval shared task involving five languages: English, German, Swedish, Latin, and Russian.XL-LEXEME outperforms the state-of-the-art in English, German and Swedish with statistically significant differences from the baseline results and obtains state-of-the-art performance in the RuShiftEval shared task. @@ -14933,7 +14933,7 @@ Theory-Grounded Computational Text Analysis - Arya D.McCarthyJohns Hopkins University + Arya D.McCarthyJohns Hopkins University Giovanna Maria DoraDoreJHU 1586-1594 In this position paper, we argue that computational text analysis lacks and requires organizing principles. A broad space separates its two constituent disciplines—natural language processing and social science—which has to date been sidestepped rather than filled by applying increasingly complex computational models to problems in social science research. We contrast descriptive and integrative findings, and our review of approximately 60 papers on computational text analysis reveals that those from *ACL venues are typically descriptive. The lack of theory began at the area’s inception and has over the decades, grown more important and challenging. A return to theoretically grounded research questions will propel the area from both theoretical and methodological points of view. @@ -15030,7 +15030,7 @@ Event Extraction as Question Generation and Answering DiLuDataminr ShihaoRanDataminr - JoelTetreaultDataminr + JoelTetreaultDataminr AlejandroJaimesDataminr 1666-1688 Recent work on Event Extraction has reframed the task as Question Answering (QA), with promising results. The advantage of this approach is that it addresses the error propagation issue found in traditional token-based classification approaches by directly predicting event arguments without extracting candidates first. However, the questions are typically based on fixed templates and they rarely leverage contextual information such as relevant arguments. In addition, prior QA-based approaches have difficulty handling cases where there are multiple arguments for the same role. In this paper, we propose QGA-EE, which enables a Question Generation (QG) model to generate questions that incorporate rich contextual information instead of using fixed templates. We also propose dynamic templates to assist the training of QG model. Experiments show that QGA-EE outperforms all prior single-task-based models on the ACE05 English dataset. @@ -15041,7 +15041,7 @@ Are Sample-Efficient <fixed-case>NLP</fixed-case> Models More Robust? - Nelson F.LiuStanford University + Nelson F.LiuStanford University AnanyaKumarStanford University PercyLiangStanford University RobinJiaUniversity of Southern California @@ -15071,7 +15071,7 @@ PhillipKeungUniversity of Washington DanielChengUniversity of Washington JungoKasaiUniversity of Washington - Noah A.SmithUniversity of Washington + Noah A.SmithUniversity of Washington 1723-1730 Large-scale language model pretraining is a very successful form of self-supervised learning in natural language processing, but it is increasingly expensive to perform as the models and pretraining corpora have become larger over time. We propose NarrowBERT, a modified transformer encoder that increases the throughput for masked language model pretraining by more than 2x. NarrowBERT sparsifies the transformer model such that the self-attention queries and feedforward layers only operate on the masked tokens of each sentence during pretraining, rather than all of the tokens as with the usual transformer encoder. We also show that NarrowBERT increases the throughput at inference time by as much as 3.5x with minimal (or no) performance degradation on sentence encoding tasks like MNLI. Finally, we examine the performance of NarrowBERT on the IMDB and Amazon reviews classification and CoNLL NER tasks and show that it is also comparable to standard BERT performance. 2023.acl-short.146 @@ -15131,14 +15131,14 @@ <fixed-case>STT</fixed-case>4<fixed-case>SG</fixed-case>-350: A Speech Corpus for All <fixed-case>S</fixed-case>wiss <fixed-case>G</fixed-case>erman Dialect Regions MichelPlüssUniversity of Applied Sciences and Arts Northwestern Switzerland - JanDeriuZurich University of Applied Sciences + JanDeriuZurich University of Applied Sciences YanickSchranerUniversity of Applied Sciences Northwestern Switzerland ClaudioPaonessaUniversity of Applied Sciences and Arts Northwestern Switzerland JuliaHartmannFHNW LarissaSchmidtUniversity of Zurich ChristianSchellerUniversity of Applied Sciences Northwestern Switzerland - ManuelaHürlimannZurich University of Applied Sciences (ZHAW) - TanjaSamardžićUniversity of Zurich + ManuelaHürlimannZurich University of Applied Sciences (ZHAW) + TanjaSamardžićUniversity of Zurich ManfredVogelUniversity of Applied Sciences Northwestern Switzerland MarkCieliebakZurich University of Applied Sciences 1763-1772 @@ -15191,7 +15191,7 @@ <fixed-case>S</fixed-case>co<fixed-case>N</fixed-case>e: Benchmarking Negation Reasoning in Language Models With Fine-Tuning and In-Context Learning Jingyuan S.SheHaverford College ChristopherPottsStanford University - Samuel R.BowmanNew York University + Samuel R.BowmanNew York University AtticusGeigerStanford University 1803-1821 A number of recent benchmarks seek to assess how well models handle natural language negation. However, these benchmarks lack the controlled example paradigms that would allow us to infer whether a model had truly learned how negation morphemes semantically scope. To fill these analytical gaps, we present the Scoped Negation NLI (ScoNe-NLI) benchmark, which contains contrast sets of six examples with up to two negations where either zero, one, or both negative morphemes affect the NLI label. We use ScoNe-NLI to assess fine-tuning and in-context learning strategies. We find that RoBERTa and DeBERTa models solve ScoNe-NLI after many shot fine-tuning. For in-context learning, we test the latest InstructGPT models and find that most prompt strategies are not successful, including those using step-by-step reasoning. To better understand this result, we extend ScoNe with ScoNe-NLG, a sentence completion test set that embeds negation reasoning in short narratives. Here, InstructGPT is successful, which reveals the model can correctly reason about negation, but struggles to do so on NLI examples outside of its core pretraining regime. @@ -15220,8 +15220,8 @@ JieCaoUniversity of Colorado E. MargaretPerkoffUniversity of Colorado Boulder RosySouthwellUniversity of Colorado - MarthaPalmerUniversity of Colorado - KatharinaKannUniversity of Colorado Boulder + MarthaPalmerUniversity of Colorado + KatharinaKannUniversity of Colorado Boulder 1833-1842 Recent advances in NLP have led to a rise in inter-disciplinary and application-oriented research. While this demonstrates the growing real-world impact of the field, research papers frequently feature experiments that do not account for the complexities of realistic data and environments. To explore the extent of this gap, we investigate the relationship between the real-world motivations described in NLP papers and the models and evaluation which comprise the proposed solution. We first survey papers from the NLP Applications track from ACL 2020 and EMNLP 2020, asking which papers have differences between their stated motivation and their experimental setting, and if so, mention them. We find that many papers fall short of considering real-world input and output conditions due to adopting simplified modeling or evaluation settings. As a case study, we then empirically show that the performance of an educational dialog understanding system deteriorates when used in a realistic classroom environment. 2023.acl-short.156 @@ -15233,8 +15233,8 @@ How to Distill your <fixed-case>BERT</fixed-case>: An Empirical Study on the Impact of Weight Initialisation and Distillation Objectives XinpengWangLudwig-Maximilians-Universitaet Muenchen LeonieWeissweilerCIS, LMU Munich - HinrichSchützeCenter for Information and Language Processing, University of Munich - BarbaraPlankLMU Munich + HinrichSchützeCenter for Information and Language Processing, University of Munich + BarbaraPlankLMU Munich 1843-1852 Recently, various intermediate layer distillation (ILD) objectives have been shown to improve compression of BERT models via Knowledge Distillation (KD). However, a comprehensive evaluation of the objectives in both task-specific and task-agnostic settings is lacking. To the best of our knowledge, this is the first work comprehensively evaluating distillation objectives in both settings. We show that attention transfer gives the best performance overall. We also study the impact of layer choice when initializing the student from the teacher layers, finding a significant impact on the performance in task-specific distillation. For vanilla KD and hidden states transfer, initialisation with lower layers of the teacher gives a considerable improvement over higher layers, especially on the task of QNLI (up to an absolute percentage change of 17.8 in accuracy). Attention transfer behaves consistently under different initialisation settings. We release our code as an efficient transformer-based model distillation framework for further studies. 2023.acl-short.157 @@ -15366,8 +15366,8 @@ RotemDrorUniversity of Pennsylvania ShaLiUniversity of Illinois Urbana-Champaign HengJiUniversity of Illinois at Urbana-Champaign and Amazon (Amazon Scholar) - MarthaPalmerUniversity of Colorado - Susan WindischBrownUniversity of Colorado at Boulder + MarthaPalmerUniversity of Colorado + Susan WindischBrownUniversity of Colorado at Boulder ReeceSuchockiUniversity of Colorado Boulder ChrisCallison-BurchUniversity of Pennsylvania 1-10 @@ -15434,22 +15434,22 @@ <fixed-case>P</fixed-case>rime<fixed-case>QA</fixed-case>: The Prime Repository for State-of-the-Art Multilingual Question Answering Research and Development - AviSilIBM Research AI + AviSilIBM Research AI JaydeepSenIBM Research AI BhavaniIyerIBM MartinFranzIBM T.J. Watson Research Center KshitijFadnisIBM Research MihaelaBorneaIBM Research SaraRosenthalIBM Research - ScottMcCarleyIBM Research AI + ScottMcCarleyIBM Research AI RongZhangIBM.com VishwajeetKumarIBM Research AI YulongLiIBM research - Md ArafatSultanIBM Research AI + Md ArafatSultanIBM Research AI RiyazBhatIBM IRL JuergenBrossIBM Research - RaduFlorianIBM Research - SalimRoukosIBM Research AI + RaduFlorianIBM Research + SalimRoukosIBM Research AI 51-62 The field of Question Answering (QA) has made remarkable progress in recent years, thanks to the advent of large pre-trained language models, newer realistic benchmark datasets with leaderboards, and novel algorithms for key components such as retrievers and readers. In this paper, we introduce PrimeQA: a one-stop and open-source QA repository with an aim to democratize QA research and facilitate easy replication of state-of-the-art (SOTA) QA methods. PrimeQA supports core QA functionalities like retrieval and reading comprehension as well as auxiliary capabilities such as question generation. It has been designed as an end-to-end toolkit for various use cases: building front-end applications, replicating SOTA methods on public benchmarks, and expanding pre-existing methods. PrimeQA is available at: https://github.com/primeqa. 2023.acl-demo.5 @@ -15489,7 +15489,7 @@ A Practical Toolkit for Multilingual Question and Answer Generation AsahiUshioCardiff University FernandoAlva-ManchegoCardiff University - JoseCamacho-ColladosCardiff University + JoseCamacho-ColladosCardiff University 86-94 Generating questions along with associated answers from a text has applications in several domains, such as creating reading comprehension tests for students, or improving document search by providing auxiliary questions and answers based on the query. Training models for question and answer generation (QAG) is not straightforward due to the expected structured output (i.e. a list of question and answer pairs), as it requires more than generating a single sentence. This results in a small number of publicly accessible QAG models. In this paper, we introduce AutoQG, an online service for multilingual QAG along with lmqg, an all-in-one python package for model fine-tuning, generation, and evaluation. We also release QAG models in eight languages fine-tuned on a few variants of pre-trained encoder-decoder language models, which can be used online via AutoQG or locally via lmqg. With these resources, practitioners of any level can benefit from a toolkit that includes a web interface for end users, and easy-to-use code for developers who require custom models or fine-grained controls for generation. 2023.acl-demo.8 @@ -15563,7 +15563,7 @@ disco: a toolkit for Distributional Control of Generative Models - GermánKruszewskiNaver Labs Europe + GermánKruszewskiNaver Labs Europe JosRozenNAVER LABS Europe MarcDymetmanIndependent researcher 144-160 @@ -15590,7 +15590,7 @@ <fixed-case>J</fixed-case>apanese-to-<fixed-case>E</fixed-case>nglish Simultaneous Dubbing Prototype XiaolinWangNICT MasaoUtiyamaNICT - EiichiroSumitaNICT + EiichiroSumitaNICT 169-178 Live video streaming has become an important form of communication such as virtual conferences. However, for cross-language communication in live video streaming, reading subtitles degrades the viewing experience. To address this problem, our simultaneous dubbing prototype translates and replaces the original speech of a live video stream in a simultaneous manner. Tests on a collection of 90 public videos show that our system achieves a low average latency of 11.90 seconds for smooth playback. Our method is general and can be extended to other language pairs. 2023.acl-demo.16 @@ -15630,7 +15630,7 @@ SugyeongEoKorea University SeounghoonLeeInstitute for Infocomm Research, A*STAR BernardoYahyaHankuk University of Foreign Studies - HeuiseokLimKorea University + HeuiseokLimKorea University 190-207 English is acknowledged worldwide as a mode of communication. However, due to the absence of realistic practicing scenarios, students learning English as a foreign language (EFL) typically have limited chances to converse and share feedback with others. In this paper, we propose PEEP-Talk, a real-world situational dialogue-based chatbot designed for English education. It also naturally switches to a new topic or situation in response to out-of-topic utterances, which are common among English beginners. Furthermore, PEEP-Talk provides feedback score on conversation and grammar error correction. We performed automatic and user evaluations to validate performance and education efficiency of our system. The results show that PEEP-Talk generates appropriate responses in various real-life situations while providing accurate feedback to learners. Moreover, we demonstrate a positive impact on English-speaking, grammar, and English learning anxiety, implying that PEEP-Talk can lower the barrier to learning natural conversation in effective ways. 2023.acl-demo.18 @@ -15727,7 +15727,7 @@ RajDabreNICT DipteshKanojiaUniversity of Surrey ChinmaySawantSurrey University - EiichiroSumitaNICT + EiichiroSumitaNICT 257-263 In this paper, we present our open-source neural machine translation (NMT) toolkit called “Yet Another Neural Machine Translation Toolkit” abbreviated as YANMTT - https://github.com/prajdabre/yanmtt, which is built on top of the HuggingFace Transformers library. YANMTT focuses on transfer learning and enables easy pre-training and fine-tuning of sequence-to-sequence models at scale. It can be used for training parameter-heavy models with minimal parameter sharing and efficient, lightweight models via heavy parameter sharing. Additionally, it supports parameter-efficient fine-tuning (PEFT) through adapters and prompts. Our toolkit also comes with a user interface that can be used to demonstrate these models and visualize various parts of the model. Apart from these core features, our toolkit also provides other advanced functionalities such as but not limited to document/multi-source NMT, simultaneous NMT, mixtures-of-experts, model compression and continual learning. 2023.acl-demo.24 @@ -15816,8 +15816,8 @@ The <fixed-case>OPUS</fixed-case>-<fixed-case>MT</fixed-case> Dashboard – A Toolkit for a Systematic Evaluation of Open Machine Translation Models - JörgTiedemannUniversity of Helsinki - Onade GibertUniversity of Helsinki + JörgTiedemannUniversity of Helsinki + Onade GibertUniversity of Helsinki 315-327 The OPUS-MT dashboard is a web-based platform that provides a comprehensive overview of open translation models. We focus on a systematic collection of benchmark results with verifiable translation performance and large coverage in terms of languages and domains. We provide results for in-house OPUS-MT and Tatoeba models as well as external models from the Huggingface repository and user-contributed translations. The functionalities of the evaluation tool include summaries of benchmarks for over 2,300 models covering 4,560 language directions and 294 languages, as well as the inspection of predicted translations against their human reference. We focus on centralization, reproducibility and coverage of MT evaluation combined with scalability. The dashboard can be accessed live at https://opus.nlpl.eu/dashboard/. 2023.acl-demo.30 @@ -15832,7 +15832,7 @@ FynnPetersen-FreyUniversität Hamburg IsabelEiserUniversität Hamburg GertraudKochUniversität Hamburg - ChrisBiemannUniversität Hamburg + ChrisBiemannUniversität Hamburg 328-335 This work introduces the D-WISE Tool Suite (DWTS), a novel working environment for digital qualitative discourse analysis in the Digital Humanities (DH). The DWTS addresses limitations of current DH tools induced by the ever-increasing amount of heterogeneous, unstructured, and multi-modal data in which the discourses of contemporary societies are encoded. To provide meaningful insights from such data, our system leverages and combines state-of-the-art machine learning technologies from Natural Language Processing and Com-puter Vision. Further, the DWTS is conceived and developed by an interdisciplinary team ofcultural anthropologists and computer scientists to ensure the tool’s usability for modernDH research. Central features of the DWTS are: a) import of multi-modal data like text, image, audio, and video b) preprocessing pipelines for automatic annotations c) lexical and semantic search of documents d) manual span, bounding box, time-span, and frame annotations e) documentation of the research process. 2023.acl-demo.31 @@ -15848,7 +15848,7 @@ LinyongNanYale University MinghaoGuoZhejiang University ArmanCohanAllen Institute for AI - DragomirRadevYale University + DragomirRadevYale University 336-347 There are a growing number of table pre-training methods proposed for reasoning over tabular data (e.g., question answering, fact checking, and faithful text generation). However, most existing methods are benchmarked solely on a limited number of datasets, varying in configuration, which leads to a lack of unified, standardized, fair, and comprehensive comparison between methods. This paper presents OpenRT, the first open-source framework for reasoning over tabular data, to reproduce existing table pre-training models for performance comparison and develop new models quickly. We implemented and compared six table pre-training models on four question answering, one fact checking, and one faithful text generation datasets. Moreover, to enable the community to easily construct new table reasoning datasets, we developed TaRAT, an annotation tool which supports multi-person collaborative annotations for various kinds of table reasoning tasks. The researchers are able to deploy the newly-constructed dataset to OpenRT and compare the performances of different baseline systems. 2023.acl-demo.32 @@ -15887,7 +15887,7 @@ <fixed-case>B</fixed-case>i<fixed-case>S</fixed-case>ync: A Bilingual Editor for Synchronized Monolingual Texts - JosepCregoSYSTRAN + JosepCregoSYSTRAN JitaoXuNetEase YouDao FrançoisYvonISIR CNRS & Sorbonne Université 369-376 @@ -16476,7 +16476,7 @@ <fixed-case>M</fixed-case>ed<fixed-case>T</fixed-case>em2.0: Prompt-based Temporal Classification of Treatment Events from Discharge Summaries YangCuiUniversity of Manchester LifengHanThe University of Manchester - GoranNenadicUniversity of Manchester + GoranNenadicUniversity of Manchester 160-183 Discharge summaries are comprehensive medical records that encompass vital information about a patient’s hospital stay. A crucial aspect of discharge summaries is the temporal information of treatments administered throughout the patient’s illness. With an extensive volume of clinical documents, manually extracting and compiling a patient’s medication list can be laborious, time-consuming, and susceptible to errors. The objective of this paper is to build upon the recent development on clinical NLP by temporally classifying treatments in clinical texts, specifically determining whether a treatment was administered between the time of admission and discharge from the hospital. State-of-the-art NLP methods including prompt-based learning on Generative Pre-trained Transformers (GPTs) models and fine-tuning on pre-trained language models (PLMs) such as BERT were employed to classify temporal relations between treatments and hospitalisation periods in discharge summaries. Fine-tuning with the BERT model achieved an F1 score of 92.45% and a balanced accuracy of 77.56%, while prompt learning using the T5 model and mixed templates resulted in an F1 score of 90.89% and a balanced accuracy of 72.07%.Our codes and data are available at https://github.com/HECTA-UoM/MedTem. 2023.acl-srw.27 @@ -16513,7 +16513,7 @@ Probing for Hyperbole in Pre-Trained Language Models NinaSchneidermannUniversity of Copenhagen DanielHershcovichUniversity of Copenhagen - BolettePedersenUniversity of Copenhagen + BolettePedersenUniversity of Copenhagen 200-211 Hyperbole is a common figure of speech, which is under-explored in NLP research. In this study, we conduct edge and minimal description length (MDL) probing experiments on three pre-trained language models (PLMs) in an attempt to explore the extent to which hyperbolic information is encoded in these models. We use both word-in-context and sentence-level representations as model inputs as a basis for comparison. We also annotate 63 hyperbole sentences from the HYPO dataset according to an operational taxonomy to conduct an error analysis to explore the encoding of different hyperbole categories. Our results show that hyperbole is to a limited extent encoded in PLMs, and mostly in the final layers. They also indicate that hyperbolic information may be better encoded by the sentence-level representations, which, due to the pragmatic nature of hyperbole, may therefore provide a more accurate and informative representation in PLMs. Finally, the inter-annotator agreement for our annotations, a Cohen’s Kappa of 0.339, suggest that the taxonomy categories may not be intuitive and need revision or simplification. 2023.acl-srw.30 @@ -16628,10 +16628,10 @@ Authorship Attribution of Late 19th Century Novels using <fixed-case>GAN</fixed-case>-<fixed-case>BERT</fixed-case> KanishkaSilvaUniversity of Wolverhampton BurcuCanUniversity of Stirling - FrédéricBlainTilburg University + FrédéricBlainTilburg University RaheemSarwarOTEHM, Manchester Metropolitan University LauraUgoliniUniversity of Wolverhampton - RuslanMitkovUniversity of Wolverhampton + RuslanMitkovUniversity of Wolverhampton 310-320 Authorship attribution aims to identify the author of an anonymous text. The task becomes even more worthwhile when it comes to literary works. For example, pen names were commonly used by female authors in the 19th century resulting in some literary works being incorrectly attributed or claimed. With this motivation, we collated a dataset of late 19th century novels in English. Due to the imbalance in the dataset and the unavailability of enough data per author, we employed the GANBERT model along with data sampling strategies to fine-tune a transformer-based model for authorship attribution. Differently from the earlier studies on the GAN-BERT model, we conducted transfer learning on comparatively smaller author subsets to train more focused author-specific models yielding performance over 0.88 accuracy and F1 scores. Furthermore, we observed that increasing the sample size has a negative impact on the model’s performance. Our research mainly contributes to the ongoing authorship attribution research using GAN-BERT architecture, especially in attributing disputed novelists in the late 19th century. 2023.acl-srw.44 @@ -16641,7 +16641,7 @@ How-to Guides for Specific Audiences: A Corpus and Initial Findings NicolaFantonUniversität Stuttgart - AgnieszkaFalenskaIMS, University of Stuttgart + AgnieszkaFalenskaIMS, University of Stuttgart MichaelRothUniversity of Stuttgart 321-333 Instructional texts for specific target groups should ideally take into account the prior knowledge and needs of the readers in order to guide them efficiently to their desired goals. However, targeting specific groups also carries the risk of reflecting disparate social norms and subtle stereotypes. In this paper, we investigate the extent to which how-to guides from one particular platform, wikiHow, differ in practice depending on the intended audience. We conduct two case studies in which we examine qualitative features of texts written for specific audiences. In a generalization study, we investigate which differences can also be systematically demonstrated using computational methods. The results of our studies show that guides from wikiHow, like other text genres, are subject to subtle biases. We aim to raise awareness of these inequalities as a first step to addressing them in future work. @@ -16722,7 +16722,7 @@ “Knowledge is Power”: Constructing Knowledge Graph of Abdominal Organs and Using Them for Automatic Radiology Report Generation KaveriKaleIndian Institute of Technology Bombay - PushpakBhattacharyyaIndian Institute of Technology Bombay and Patna + PushpakBhattacharyyaIndian Institute of Technology Bombay and Patna AdityaShettyConsultant Radiologist, Breach Candy Hospital, Mumbai MilindGuneConsultant Radiologist, Mumbai, Thane KushShrivastavaAugnito India Pvt Ltd @@ -16763,7 +16763,7 @@ Constrained Policy Optimization for Controlled Self-Learning in Conversational <fixed-case>AI</fixed-case> Systems MohammadKachueeAmazon Alexa AI - SungjinLeeAmazon Alexa AI + SungjinLeeAmazon Alexa AI 43-52 Recently, self-learning methods based on user satisfaction metrics and contextual bandits have shown promising results to enable consistent improvements in conversational AI systems. However, directly targeting such metrics by off-policy bandit learning objectives often increases the risk of making abrupt policy changes that break the current user experience. In this study, we introduce a scalable framework for supporting fine-grained exploration targets for individual domains via user-defined constraints. For example, we may want to ensure fewer policy deviations in business-critical domains such as shopping, while allocating more exploration budget to domains such as music. We present a novel meta-gradient learning approach that is scalable and practical to address this problem. The proposed method adjusts constraint violation penalty terms adaptively through a meta objective that encourages balanced constraint satisfaction across domains. We conducted extensive experiments on a real-world conversational AI and using a set of realistic constraint benchmarks. The proposed approach has been deployed in production for a large-scale commercial assistant, enabling the best balance between the policy value and constraint satisfaction rate. 2023.acl-industry.5 @@ -16808,7 +16808,7 @@ <fixed-case>KG</fixed-case>-<fixed-case>FLIP</fixed-case>: Knowledge-guided Fashion-domain Language-Image Pre-training for <fixed-case>E</fixed-case>-commerce QinjinJiaNorth Carolina State University - YangLiuAmazon + YangLiuAmazon DaopingWuIowa State University ShaoyuanXuAmazon HuidongLiuAmazon @@ -16935,7 +16935,7 @@ ChenweiZhangAmazon.com BinxuanHuangAmazon.com Yifan EthanXuMeta - Xin LunaDongMeta + Xin LunaDongMeta YizhouSunUCLA 172-185 2023.acl-industry.18 @@ -16962,7 +16962,7 @@ Tab-<fixed-case>CQA</fixed-case>: A Tabular Conversational Question Answering Dataset on Financial Reports ChuangLiuTianjin University JunzhuoLiTianjin University - DeyiXiongTianjin University + DeyiXiongTianjin University 196-207 Existing conversational question answering (CQA) datasets have been usually constructed from unstructured texts in English. In this paper, we propose Tab-CQA, a tabular CQA dataset created from Chinese financial reports that are extracted from listed companies in a wide range of different sectors in the past 30 years. From these reports, we select 2,463 tables, and manually generate 2,463 conversations with 35,494 QA pairs. Additionally, we select 4,578 tables, from which 4,578 conversations with 73,595 QA pairs are automatically created via a template-based method. With the manually- and automatically-generated conversations, Tab-CQA contains answerable and unanswerable questions. For the answerable questions, we further diversify them to cover a wide range of skills, e.g., table retrieval, fact checking, numerical reasoning, so as to accommodate real-world scenarios. We further propose two different tabular CQA models, a text-based model and an operation-based model, and evaluate them on Tab-CQA. Experiment results show that Tab-CQA is a very challenging dataset, where a huge performance gap exists between human and neural models. We will publicly release Tab-CQA as a benchmark testbed to promote further research on Chinese tabular CQA. 2023.acl-industry.20 @@ -17022,7 +17022,7 @@ LeanneRolstonUniversity of Washington JadinTredupLivePerson IlanaZimmermanLiveperson - EthanSelfridgeLivePerson + EthanSelfridgeLivePerson JosephBradleyLivePerson 248-267 Contacting customer service via chat is a common practice. Because employing customer service agents is expensive, many companies are turning to NLP that assists human agents by auto-generating responses that can be used directly or with modifications. With their ability to handle large context windows, Large Language Models (LLMs) are a natural fit for this use case. However, their efficacy must be balanced with the cost of training and serving them. This paper assesses the practical cost and impact of LLMs for the enterprise as a function of the usefulness of the responses that they generate. We present a cost framework for evaluating an NLP model’s utility for this use case and apply it to a single brand as a case study in the context of an existing agent assistance product. We compare three strategies for specializing an LLM — prompt engineering, fine-tuning, and knowledge distillation — using feedback from the brand’s customer service agents. We find that the usability of a model’s responses can make up for a large difference in inference cost for our case study brand, and we extrapolate our findings to the broader enterprise space. @@ -17091,7 +17091,7 @@ AnantKhandelwalAmazon HappyMittalAmazon ShreyasKulkarniAmazon - DeepakGuptaAmazon + DeepakGuptaAmazon 305-312 2023.acl-industry.29 khandelwal-etal-2023-large @@ -17244,7 +17244,7 @@ Semantic Ambiguity Detection in Sentence Classification using Task-Specific Embeddings Jong MyoungKimSK Telecom Young-junLeeKAIST - SangkeunJungChungnam National University + SangkeunJungChungnam National University Ho-jinChoiKAIST 425-437 Ambiguity is a major obstacle to providing services based on sentence classification. However, because of the structural limitations of the service, there may not be sufficient contextual information to resolve the ambiguity. In this situation, we focus on ambiguity detection so that service design considering ambiguity is possible. We utilize similarity in a semantic space to detect ambiguity in service scenarios and training data. In addition, we apply task-specific embedding to improve performance. Our results demonstrate that ambiguities and resulting labeling errors in training data or scenarios can be detected. Additionally, we confirm that it can be used to debug services @@ -17322,7 +17322,7 @@ XiujieSongShanghai Jiao Tong University XuezhiCaoMeituan YunsenXianMeituan - KennyZhuUniversity of Texas at Arlington + KennyZhuUniversity of Texas at Arlington 476-486 As e-commerce platforms develop different business lines, a special but challenging product categorization scenario emerges, where there are multiple domain-specific category taxonomies and each of them evolves dynamically over time. In order to unify the categorization process and ensure efficiency, we propose a two-stage taxonomy-agnostic framework that relies solely on calculating the semantic relatedness between product titles and category names in the vector space. To further enhance domain transferability and better exploit cross-domain data, we design two plug-in modules: a heuristic mapping scorer and a pretrained contrastive ranking module with the help of meta concepts, which represent keyword knowledge shared across domains. Comprehensive offline experiments show that our method outperforms strong baselineson three dynamic multi-domain product categorization (DMPC) tasks,and online experiments reconfirm its efficacy with a5% increase on seasonal purchase revenue. Related datasets will be released. 2023.acl-industry.46 @@ -17333,8 +17333,8 @@ <fixed-case>DISCOSQA</fixed-case>: A Knowledge Base Question Answering System for Space Debris based on Program Induction PaulDarmUniversity of Strathclyde - Antonio ValerioMiceli BaroneThe University of Edinburgh - Shay B.CohenUniversity of Edinburgh + Antonio ValerioMiceli BaroneThe University of Edinburgh + Shay B.CohenUniversity of Edinburgh AnnalisaRiccardiUniversity of Strathclyde 487-499 Space program agencies execute complex satellite operations that need to be supported by the technical knowledge contained in their extensive information systems. Knowledge Base (KB) databases are an effective way of storing and accessing such information to scale. In this work we present a system, developed for the European Space Agency, that can answer complex natural language queries, to support engineers in accessing the information contained in a KB that models the orbital space debris environment. Our system is based on a pipeline which first generates a program sketch from a natural language question, then specializes the sketch into a concrete query program with mentions of entities, attributes and relations, and finally executes the program against the database. This pipeline decomposition approach enables us to train the system by leveraging out-of-domain data and semi-synthetic data generated by GPT-3, thus reducing overfitting and shortcut learning even with limited amount of in-domain training data. @@ -17483,7 +17483,7 @@ YingxueZhouAmazon JieHaoAmazon MukundRungtaGeorgia Institute of Technology - YangLiuAmazon + YangLiuAmazon EunahChoAmazon, Alexa AI XingFanAmazon Corporation YanbinLuAmazon @@ -17503,7 +17503,7 @@ HaomingJiangAmazon Search ShaohuiXiUniversity of Science and Technology of China BingYinAmazon.com - ChaoZhangGeorgia Tech + ChaoZhangGeorgia Tech TuoZhaoGeorgia Tech 616-628 E-commerce queries are often short and ambiguous. Consequently, query understanding often uses query rewriting to disambiguate user-input queries. While using e-commerce search tools, users tend to enter multiple searches, which we call context, before purchasing. These history searches contain contextual insights about users’ true shopping intents. Therefore, modeling such contextual information is critical to a better query rewriting model. However, existing query rewriting models ignore users’ history behaviors and consider only the instant search query, which is often a short string offering limited information about the true shopping intent. We propose an end-to-end context-aware query rewriting model to bridge this gap, which takes the search context into account. Specifically, our model builds a session graph using the history search queries and their contained words. We then employ a graph attention mechanism that models cross-query relations and computes contextual information of the session. The model subsequently calculates session representations by combining the contextual information with the instant search query using an aggregation network. The session representations are then decoded to generate rewritten queries. Empirically, we demonstrate the superiority of our method to state-of-the-art approaches under various metrics. @@ -17603,7 +17603,7 @@ “Let’s not Quote out of Context”: Unified Vision-Language Pretraining for Context Assisted Image Captioning AbisekRajakumar KalaraniIndian Institute of Technology Bombay - PushpakBhattacharyyaIndian Institute of Technology Bombay and Patna + PushpakBhattacharyyaIndian Institute of Technology Bombay and Patna NiyatiChhayaAdobe Research SumitShekharAdobe Systems 695-706 @@ -17647,7 +17647,7 @@ ZhiyuChenAmazon BesnikFetahuAmazon OlegRokhlenkoAmazon Research - ShervinMalmasiAmazon + ShervinMalmasiAmazon 729-743 Spoken Question Answering (QA) is a key feature of voice assistants, usually backed by multiple QA systems. Users ask questions via spontaneous speech that can contain disfluencies, errors, and informal syntax or phrasing. This is a major challenge in QA, causing unanswered questions or irrelevant answers, leading to bad user experiences. We analyze failed QA requests to identify core challenges: lexical gaps, proposition types, complex syntactic structure, and high specificity. We propose a Semantic Question Reformulation (SURF) model offering three linguistically-grounded operations (repair, syntactic reshaping, generalization) to rewrite questions to facilitate answering. Offline evaluation on 1M unanswered questions from a leading voice assistant shows that SURF significantly improves answer rates: up to 24% of previously unanswered questions obtain relevant answers (75%). Live deployment shows positive impact for millions of customers with unanswered questions; explicit relevance feedback shows high user satisfaction. 2023.acl-industry.70 @@ -17688,7 +17688,7 @@ JasonChoiAmazon BesnikFetahuAmazon OlegRokhlenkoAmazon Research - ShervinMalmasiAmazon + ShervinMalmasiAmazon 763-771 Frequently Asked Question (FAQ) retrieval aims at retrieving question-answer pairs for a given a user query. Integrating FAQ retrieval with product search can not only empower users to make more informed purchase decisions, but also enhance user retention through efficient post-purchase support. Providing FAQ content without disrupting user’s shopping experience poses challenges on deciding when and how to show FAQ results. Our proposed intent-aware FAQ retrieval consists of (1) an intent classifier that predicts whether the query is looking for an FAQ; (2) a reformulation model that rewrites query into a natural question. Offline evaluation demonstrates that our approach improves 12% in Hit@1 on retrieving ground-truth FAQs, while reducing latency by 95% compared to baseline systems. These improvements are further validated by real user feedback, where more than 99% of users consider FAQs displayed on top of product search results is helpful. Overall, our findings show promising directions for integrating FAQ retrieval into product search at scale. 2023.acl-industry.73 @@ -17759,7 +17759,7 @@ YangDeng WenqiangLei MinlieHuang - Tat-SengChua + Tat-SengChua 1-10 Conversational systems are envisioned to provide social support or functional service to human users via natural language interactions. Conventional conversation researches mainly focus on the responseability of the system, such as dialogue context understanding and response generation, but overlooks the design of an essential property in intelligent conversations, i.e., goal awareness. The awareness of goals means the state of not only being responsive to the users but also aware of the target conversational goal and capable of leading the conversation towards the goal, which is a significant step towards higher-level intelligence and artificial consciousness. It can not only largely improve user engagement and service efficiency in the conversation, but also empower the system to handle more complicated conversation tasks that involve strategical and motivational interactions. In this tutorial, we will introduce the recent advances on the design of agent’s awareness of goals in a wide range of conversational systems. 2023.acl-tutorials.1 @@ -17770,7 +17770,7 @@ Complex Reasoning in Natural Language WentingZhao MorGeva - Bill YuchenLin + Bill YuchenLin MichihiroYasunaga AmanMadaan TaoYu diff --git a/data/xml/2023.alp.xml b/data/xml/2023.alp.xml index 67c25c8513..ec6368ac93 100644 --- a/data/xml/2023.alp.xml +++ b/data/xml/2023.alp.xml @@ -24,7 +24,7 @@ Training and Evaluation of Named Entity Recognition Models for Classical <fixed-case>L</fixed-case>atin MarijkeBeersmans Eveliende Graaf - TimVan de Cruys + TimVan de Cruys MargheritaFantoli 1–12 We evaluate the performance of various models on the task of named entity recognition (NER) for classical Latin. Using an existing dataset, we train two transformer-based LatinBERT models and one shallow conditional random field (CRF) model. The performance is assessed using both standard metrics and a detailed manual error analysis, and compared to the results obtained by different already released Latin NER tools. Both analyses demonstrate that the BERT models achieve a better f1-score than the other models. Furthermore, we annotate new, unseen data for further evaluation of the models, and we discuss the impact of annotation choices on the results. @@ -88,7 +88,7 @@ <fixed-case>L</fixed-case>atin Morphology through the Centuries: Ensuring Consistency for Better Language Processing FedericaGamba - DanielZeman + DanielZeman 59–67 This paper focuses on the process of harmonising the five Latin treebanks available in Universal Dependencies with respect to morphological annotation. We propose a workflow that allows to first spot inconsistencies and missing information, in order to detect to what extent the annotations differ, and then correct the retrieved bugs, with the goal of equalising the annotation of morphological features in the treebanks and producing more consistent linguistic data. Subsequently, we present some experiments carried out with UDPipe and Stanza in order to assess the impact of such harmonisation on parsing accuracy. 2023.alp-1.7 @@ -98,7 +98,7 @@ Cross-Lingual Constituency Parsing for <fixed-case>M</fixed-case>iddle <fixed-case>H</fixed-case>igh <fixed-case>G</fixed-case>erman: A Delexicalized Approach ErcongNie HelmutSchmid - HinrichSchütze + HinrichSchütze 68–79 Constituency parsing plays a fundamental role in advancing natural language processing (NLP) tasks. However, training an automatic syntactic analysis system for ancient languages solely relying on annotated parse data is a formidable task due to the inherent challenges in building treebanks for such languages. It demands extensive linguistic expertise, leading to a scarcity of available resources. To overcome this hurdle, cross-lingual transfer techniques which require minimal or even no annotated data for low-resource target languages offer a promising solution. In this study, we focus on building a constituency parser for Middle High German (MHG) under realistic conditions, where no annotated MHG treebank is available for training. In our approach, we leverage the linguistic continuity and structural similarity between MHG and Modern German (MG), along with the abundance of MG treebank resources. Specifically, by employing the delexicalization method, we train a constituency parser on MG parse datasets and perform cross-lingual transfer to MHG parsing. Our delexicalized constituency parser demonstrates remarkable performance on the MHG test set, achieving an F1-score of 67.3%. It outperforms the best zero-shot cross-lingual baseline by a margin of 28.6% points. The encouraging results underscore the practicality and potential for automatic syntactic analysis in other ancient languages that face similar challenges as MHG. 2023.alp-1.8 @@ -251,7 +251,7 @@ A Neural Pipeline for <fixed-case>POS</fixed-case>-tagging and Lemmatizing Cuneiform Languages AleksiSahala - KristerLindén + KristerLindén 203–212 We presented a pipeline for POS-tagging and lemmatizing cuneiform languages and evaluated its performance on Sumerian, first millennium Babylonian, Neo-Assyrian and Urartian texts extracted from Oracc. The system achieves a POS-tagging accuracy between 95-98% and a lemmatization accuracy of 94-96% depending on the language or dialect. For OOV words only, the current version can predict correct POS-tags for 83-91%, and lemmata for 68-84% of the input words. Compared with the earlier version, the current one has about 10% higher accuracy in OOV lemmatization and POS-tagging due to better neural network performance. We also tested the system for lemmatizing and POS-tagging the PROIEL Ancient Greek and Latin treebanks, achieving results similar to those with the cuneiform languages. 2023.alp-1.23 diff --git a/data/xml/2023.alta.xml b/data/xml/2023.alta.xml index ce7fad45c3..034c7ed29e 100644 --- a/data/xml/2023.alta.xml +++ b/data/xml/2023.alta.xml @@ -48,7 +48,7 @@ BelindaChiera CathyChua ChadiRaheb - MannyRayner + MannyRayner AnnikaSimonsen ZhengkangXiang RinaZviel-Girshin @@ -60,7 +60,7 @@ Exploring Causal Directions through Word Occurrences: Semi-supervised <fixed-case>B</fixed-case>ayesian Classification Framework King Tao JasonNg - DiegoMolla + DiegoMolla 30-39 Determining causal directions in sentences plays a critical role into understanding a cause-and-effect relationship between entities. In this paper, we show empirically that word occurrences from several Internet domains resemble the characteristics of causal directions. Our research contributes to the knowledge of the underlying data generation process behind causal directions. We propose a two-phase method: 1. Bayesian framework, which generates synthetic data from posteriors by incorporating word occurrences from the Internet domains. 2. Pre-trained BERT, which utilises semantics of words based on the context to perform classification. The proposed method achieves an improvement in performance for the Cause-Effect relations of the SemEval-2010 dataset, when compared with random guessing. 2023.alta-1.4 @@ -87,7 +87,7 @@ <fixed-case>MCASP</fixed-case>: Multi-Modal Cross Attention Network for Stock Market Prediction KamaladdinFataliyev - WeiLiu + WeiLiu 67-77 Stock market prediction is considered a complex task due to the non-stationary and volatile nature of the stock markets. With the increasing amount of online data, various information sources have been analyzed to understand the underlying patterns of the price movements. However, most existing works in the literature mostly focus on either the intra-modality information within each input data type, or the inter-modal relationships among the input modalities. Different from these, in this research, we propose a novel Multi-Modal Cross Attention Network for Stock Market Prediction (MCASP) by capturing both modality-specific features and the joint influence of each modality in a unified framework. We utilize financial news, historical market data and technical indicators to predict the movement direction of the market prices. After processing the input modalities with three separate deep networks, we first construct a self-attention network that utilizes multiple Transformer models to capture the intra-modal information. Then we design a novel cross-attention network that processes the inputs in pairs to exploit the cross-modal and joint information of the modalities. Experiments with real world datasets for S&P500 index forecast and the prediction of five individual stocks, demonstrate the effectiveness of the proposed multi-modal design over several state-of-the-art baseline models. 2023.alta-1.7 @@ -113,7 +113,7 @@ LizhenQu YufeiWang IngridZukerman - GholamrezaHaffari + GholamrezaHaffari 88-99 Flowchart-grounded troubleshooting dialogue (FTD) systems, which follow the instructions of a flowchart to diagnose users’ problems in specific domains (e.g., vehicle, laptop), have been gaining research interest in recent years. However, collecting sufficient dialogues that are naturally grounded on flowcharts is costly, thus FTD systems are impeded by scarce training data. To mitigate the data sparsity issue, we propose a plan-based synthetic data generation (PlanSDG) approach that generates diverse synthetic dialog data at scale by transforming concise flowchart into dialogues. Specifically, its generative model employs a variational-base framework with a hierarchical planning strategy that includes global and local latent planning variables. Experiments on the FloDial dataset show that synthetic dialogue produced by PlanSDG improves the performance of downstream tasks, including flowchart path retrieval and response generation, in particular on the Out-of-Flowchart settings. In addition, further analysis demonstrate the quality of synthetic data generated by PlanSDG in paths that are covered by current sample dialogues and paths that are not covered. 2023.alta-1.9 @@ -161,7 +161,7 @@ Predicting Empathic Accuracy from User-Designer Interviews StevenNguyen - DanielBeck + DanielBeck KatjaHoltta-Otto 125-129 Measuring empathy as a natural language processing task has often been limited to a subjective measure of how well individuals respond to each other in emotive situations. Cognitive empathy, or an individual’s ability to accurately assess another individual’s thoughts, remains a more novel task. In this paper, we explore natural language processing techniques to measure cognitive empathy using paired sentence data from design interviews. Our findings show that an unsupervised approach based on similarity of vectors from a Large Language Model is surprisingly promising, while adding supervision does not necessarily improve the performance. An analysis of the results highlights potential reasons for this behaviour and gives directions for future work in this space. @@ -172,7 +172,7 @@ <fixed-case>CRF</fixed-case>-based recognition of invasive fungal infection concepts in <fixed-case>CHIFIR</fixed-case> clinical reports YangMeng VladaRozova - KarinVerspoor + KarinVerspoor 130-135 Named entity recognition (NER) in clinical documentation is often hindered by the use of highly specialised terminology, variation in language used to express medical findings and general scarcity of high-quality data available for training. This short paper compares a Conditional Random Fields model to the previously established dictionary-based approach and evaluates its ability to extract information from a small corpus of annotated pathology reports. The results suggest that including token descriptors as well as contextual features significantly improves precision on several concept categories while maintaining the same level of recall. 2023.alta-1.15 @@ -190,7 +190,7 @@ Overview of the 2023 <fixed-case>ALTA</fixed-case> Shared Task: Discriminate between Human-Written and Machine-Generated Text - DiegoMolla + DiegoMolla HaolanZhan XuanliHe QiongkaiXu diff --git a/data/xml/2023.americasnlp.xml b/data/xml/2023.americasnlp.xml index 2a7fc31a22..f60cbed3ab 100644 --- a/data/xml/2023.americasnlp.xml +++ b/data/xml/2023.americasnlp.xml @@ -5,11 +5,11 @@ Proceedings of the Workshop on Natural Language Processing for Indigenous Languages of the Americas (AmericasNLP) ManuelMager AbteenEbrahimi - ArturoOncevay + ArturoOncevay EnoraRice ShrutiRijhwani AlexisPalmer - KatharinaKann + KatharinaKann Association for Computational Linguistics
Toronto, Canada
July @@ -56,7 +56,7 @@
Codex to corpus: Exploring annotation and processing for an open and extensible machine-readable edition of the Florentine Codex - FrancisTyersIndiana University + FrancisTyersIndiana University RobertPughIndiana University ValeryBerthoud F.Humboldt-Universitt zu Berlin 19-29 @@ -68,7 +68,7 @@ Developing finite-state language technology for <fixed-case>M</fixed-case>aya RobertPughIndiana University - FrancisTyersIndiana University + FrancisTyersIndiana University QuetzilCastañedaIndiana University 30-39 We describe a suite of finite-state language technologies for Maya, a Mayan language spoken in Mexico. At the core is a computational model of Maya morphology and phonology using a finite-state transducer. This model results in a morphological analyzer and a morphologically-informed spell-checker. All of these technologies are designed for use as both a pedagogical reading/writing aid for L2 learners and as a general language processing tool capable of supporting much of the natural variation in written Maya. We discuss the relevant features of Maya morphosyntax and orthography, and then outline the implementation details of the analyzer. To conclude, we present a longer-term vision for these tools and their use by both native speakers and learners. @@ -111,7 +111,7 @@ Enriching <fixed-case>W</fixed-case>ayúunaiki-<fixed-case>S</fixed-case>panish Neural Machine Translation with Linguistic Information NoraGraichenUdS - JosefVan GenabithDFKI + JosefVan GenabithDFKI CristinaEspaña-bonetDFKI GmbH 67-83 We present the first neural machine translation system for the low-resource language pair Wayúunaiki–Spanish and explore strategies to inject linguistic knowledge into the model to improve translation quality. We explore a wide range of methods and combine complementary approaches. Results indicate that incorporating linguistic information through linguistically motivated subword segmentation, factored models, and pretrained embeddings helps the system to generate improved translations, with the segmentation contributing most. In order to evaluate translation quality in a general domain and go beyond the available religious domain data, we gather and make publicly available a new test set and supplementary material. Although translation quality as measured with automatic metrics is low, we hope these resources will facilitate and support further research on Wayúunaiki. @@ -138,7 +138,7 @@ OlgaKolesnikovaInstituto Politecnico Nacional NoéCastro-SánchezTecNM/Cenidet GrigoriSidorovCIC-IPN - AlexanderGelbukhInstituto Politcnico Nacional + AlexanderGelbukhInstituto Politcnico Nacional 94-102 In this paper, we present a parallel Spanish- Mazatec and Spanish-Mixtec corpus for machine translation (MT) tasks, where Mazatec and Mixtec are two indigenous Mexican languages. We evaluated the usability of the collected corpus using three different approaches: transformer, transfer learning, and fine-tuning pre-trained multilingual MT models. Fine-tuning the Facebook m2m100-48 model outperformed the other approaches, with BLEU scores of 12.09 and 22.25 for Mazatec-Spanish and Spanish-Mazatec translations, respectively, and 16.75 and 22.15 for Mixtec-Spanish and Spanish-Mixtec translations, respectively. The results indicate that translation performance is influenced by the dataset size (9,799 sentences in Mazatec and 13,235 sentences in Mixtec) and is more effective when indigenous languages are used as target languages. The findings emphasize the importance of creating parallel corpora for indigenous languages and fine-tuning models for low-resource translation tasks. Future research will investigate zero-shot and few-shot learning approaches to further improve translation performance in low-resource settings. 2023.americasnlp-1.11 @@ -148,7 +148,7 @@ A finite-state morphological analyser for <fixed-case>H</fixed-case>ighland <fixed-case>P</fixed-case>uebla <fixed-case>N</fixed-case>ahuatl RobertPughIndiana University - FrancisTyersIndiana University + FrancisTyersIndiana University 103-108 This paper describes the development of a free/open-source finite-state morphologicaltransducer for Highland Puebla Nahuatl, a Uto-Aztecan language spoken in and around the stateof Puebla in Mexico. The finite-state toolkit used for the work is the Helsinki Finite-StateToolkit (HFST); we use the lexc formalism for modelling the morphotactics and twol formal-ism for modelling morphophonological alternations. An evaluation is presented which showsthat the transducer has a reasonable coveragearound 90%on freely-available corpora of the language, and high precisionover 95%on a manually verified test set 2023.americasnlp-1.12 @@ -244,7 +244,7 @@ MikkoAulamoUniversity of Helsinki YvesScherrerUniversity of Helsinki SamiVirpiojaUniversity of Helsinki - JörgTiedemannUniversity of Helsinki + JörgTiedemannUniversity of Helsinki 177-191 The Helsinki-NLP team participated in the AmericasNLP 2023 Shared Task with 6 submissions for all 11 language pairs arising from 4 different multilingual systems. We provide a detailed look at the work that went into collecting and preprocessing the data that led to our submissions. We explore various setups for multilingual Neural Machine Translation (NMT), namely knowledge distillation and transfer learning, multilingual NMT including a high-resource language (English), language-specific fine-tuning, and multilingual NMT exclusively using low-resource data. Our multilingual Model B ranks first in 4 out of the 11 language pairs. 2023.americasnlp-1.20 @@ -267,8 +267,8 @@ Hellina HailuNigatuUC Berkeley OlgaKolesnikovaInstituto Politecnico Nacional GrigoriSidorovCIC-IPN - AlexanderGelbukhInstituto Politcnico Nacional - JugalKalitaUniversity of Colorado + AlexanderGelbukhInstituto Politcnico Nacional + JugalKalitaUniversity of Colorado 200-205 This paper describes CIC NLP’s submission to the AmericasNLP 2023 Shared Task on machine translation systems for indigenous languages of the Americas. We present the system descriptions for three methods. We used two multilingual models, namely M2M-100 and mBART50, and one bilingual (one-to-one) — Helsinki NLP Spanish-English translation model, and experimented with different transfer learning setups. We experimented with 11 languages from America and report the setups we used as well as the results we achieved. Overall, the mBART setup was able to improve upon the baseline for three out of the eleven languages. 2023.americasnlp-1.22 diff --git a/data/xml/2023.arabicnlp.xml b/data/xml/2023.arabicnlp.xml index 72f872c783..8701849a34 100644 --- a/data/xml/2023.arabicnlp.xml +++ b/data/xml/2023.arabicnlp.xml @@ -4,7 +4,7 @@ Proceedings of ArabicNLP 2023 HassanSawaf - SamhaaEl-Beltagy + SamhaaEl-Beltagy WajdiZaghouani WalidMagdy AhmedAbdelali @@ -230,7 +230,7 @@ <fixed-case>A</fixed-case>r<fixed-case>T</fixed-case>rivia: Harvesting <fixed-case>A</fixed-case>rabic <fixed-case>W</fixed-case>ikipedia to Build A New <fixed-case>A</fixed-case>rabic Question Answering Dataset SultanAlrowili - KVijay-Shanker + KVijay-Shanker 191-207 We present ArTrivia, a new Arabic question-answering dataset consisting of more than 10,000 question-answer pairs along with relevant passages, covering a wide range of 18 diverse topics in Arabic. We created our dataset using a newly proposed pipeline that leverages diverse structured data sources from Arabic Wikipedia. Moreover, we conducted a comprehensive statistical analysis of ArTrivia and assessed the performance of each component in our pipeline. Additionally, we compared the performance of ArTrivia against the existing TyDi QA dataset using various experimental setups. Our analysis highlights the significance of often overlooked aspects in dataset creation, such as answer normalization, in enhancing the quality of QA datasets. Our evaluation also shows that ArTrivia presents more challenging and out-of-distribution questions to TyDi, raising questions about the feasibility of using ArTrivia as a complementary dataset to TyDi. 2023.arabicnlp-1.17 @@ -241,7 +241,7 @@ <fixed-case>A</fixed-case>r<fixed-case>S</fixed-case>arcas<fixed-case>M</fixed-case>oji Dataset: The Emoji Sentiment Roles in <fixed-case>A</fixed-case>rabic Ironic Contexts Shatha Ali A.Hakami - RobertHendley + RobertHendley PhillipSmith 208-217 In digital communication, emoji are essential in decoding nuances such as irony, sarcasm, and humour. However, their incorporation in Arabic natural language processing (NLP) has been cautious because of the perceived complexities of the Arabic language. This paper introduces ArSarcasMoji, a dataset of 24,630 emoji-augmented texts, with 17. 5% that shows irony. Through our analysis, we highlight specific emoji patterns paired with sentiment roles that denote irony in Arabic texts. The research counters prevailing notions, emphasising the importance of emoji’s role in understanding Arabic textual irony, and addresses their potential for accurate irony detection in Arabic digital content. @@ -556,7 +556,7 @@ Rosetta Stone at <fixed-case>KSAA</fixed-case>-<fixed-case>RD</fixed-case> Shared Task: A Hop From Language Modeling To Word–Definition Alignment AhmedElbakry MohamedGabr - MuhammadElNokrashy + MuhammadElNokrashy BadrAlKhamissi 477-482 A Reverse Dictionary is a tool enabling users to discover a word based on its provided definition, meaning, or description. Such a technique proves valuable in various scenarios, aiding language learners who possess a description of a word without its identity, and benefiting writers seeking precise terminology. These scenarios often encapsulate what is referred to as the “Tip-of-the-Tongue” (TOT) phenomena. In this work, we present our winning solution for the Arabic Reverse Dictionary shared task. This task focuses on deriving a vector representation of an Arabic word from its accompanying description. The shared task encompasses two distinct subtasks: the first involves an Arabic definition as input, while the second employs an English definition. For the first subtask, our approach relies on an ensemble of finetuned Arabic BERT-based models, predicting the word embedding for a given definition. The final representation is obtained through averaging the output embeddings from each model within the ensemble. In contrast, the most effective solution for the second subtask involves translating the English test definitions into Arabic and applying them to the finetuned models originally trained for the first subtask. This straightforward method achieves the highest score across both subtasks. @@ -572,7 +572,7 @@ HamdyMubarak SamirAbdaljalil WajdiZaghouani - PreslavNakov + PreslavNakov GiovanniDa San Martino AbedFreihat 483-493 @@ -855,7 +855,7 @@ <fixed-case>NLP</fixed-case>eople at <fixed-case>NADI</fixed-case> 2023 Shared Task: <fixed-case>A</fixed-case>rabic Dialect Identification with Augmented Context and Multi-Stage Tuning - MohabElkaref + MohabElkaref MovinaMoses ShinnosukeTanaka JamesBarry @@ -894,7 +894,7 @@ IngyAbdelhalim NadineAbdelhalim AhmedSoliman - RizaBatista-Navarro + RizaBatista-Navarro 658-664 This paper presents the methods we developed for the Nuanced Arabic Dialect Identification (NADI) 2023 shared task, specifically targeting the two subtasks focussed on sentence-level machine translation (MT) of text written in any of four Arabic dialects (Egyptian, Emirati, Jordanian and Palestinian) to Modern Standard Arabic (MSA). Our team, UniManc, employed models based on T5: multilingual T5 (mT5), multi-task fine-tuned mT5 (mT0) and AraT5. These models were trained based on two configurations: joint model training for all regional dialects (J-R) and independent model training for every regional dialect (I-R). Based on the results of the official NADI 2023 evaluation, our I-R AraT5 model obtained an overall BLEU score of 14.76, ranking first in the Closed Dialect-to-MSA MT subtask. Moreover, in the Open Dialect-to-MSA MT subtask, our J-R AraT5 model also ranked first, obtaining an overall BLEU score of 21.10. 2023.arabicnlp-1.71 @@ -1001,7 +1001,7 @@ <fixed-case>LKAU</fixed-case>23 at Qur’an <fixed-case>QA</fixed-case> 2023: Using Transformer Models for Retrieving Passages and Finding Answers to Questions from the Qur’an SarahAlnefaie AbdullahAlsaleh - EricAtwell + EricAtwell MohammadAlsalka AbdulrahmanAltahhan 720-727 @@ -1125,7 +1125,7 @@ MariamHussein SarahKhaled MarwanTorki - NagwaEl-Makky + NagwaEl-Makky 797-802 Named Entity Recognition (NER) is a crucial task in natural language processing that facilitates the extraction of vital information from text. However, NER for Arabic presents a significant challenge due to the language’s unique characteristics. In this paper, we introduce AraBINDER, our submission to the Wojood NER Shared Task 2023 (ArabicNLP 2023). The shared task comprises two sub-tasks: sub-task 1 focuses on Flat NER, while sub-task 2 centers on Nested NER. We have participated in both sub-tasks. The Bi-Encoder has proven its efficiency for NER in English. We employ AraBINDER (Arabic Bi-Encoder for Named Entity Recognition), which uses the power of two transformer encoders and employs contrastive learning to map candidate text spans and entity types into the same vector representation space. This approach frames NER as a representation learning problem that maximizes the similarity between the vector representations of an entity mention and its type. Our experiments reveal that AraBINDER achieves a micro F-1 score of 0.918 for Flat NER and 0.9 for Nested NER on the Wojood dataset. 2023.arabicnlp-1.90 @@ -1136,7 +1136,7 @@ El-Kawaref at <fixed-case>W</fixed-case>ojood<fixed-case>NER</fixed-case> shared task: <fixed-case>S</fixed-case>taged<fixed-case>NER</fixed-case> for <fixed-case>A</fixed-case>rabic Named Entity Recognition NehalElkaref - MohabElkaref + MohabElkaref 803-808 Named Entity Recognition (NER) is the task of identifying word-units that correspond to mentions as location, organization, person, or currency. In this shared task we tackle flat-entity classification for Arabic, where for each word-unit a single entity should be identified. To resolve the classification problem we propose StagedNER a novel technique to fine-tuning NER downstream tasks that divides the learning process of a transformer-model into two phases, where a model is tasked to learn sequence tags and then entity tags rather than learn both together simultaneously for an input sequence. We create an ensemble of two base models using this method that yield a score of on the development set and an F1 performance of 90.03% on the validation set and 91.95% on the test set. 2023.arabicnlp-1.91 diff --git a/data/xml/2023.argmining.xml b/data/xml/2023.argmining.xml index b6bc578c89..f20e83c51f 100644 --- a/data/xml/2023.argmining.xml +++ b/data/xml/2023.argmining.xml @@ -88,7 +88,7 @@ Legal Argument Extraction from Court Judgements using Integer Linear Programming BasitAli SachinPawar - GirishPalshikar + GirishPalshikar AninditaSinha Banerjee DhirendraSingh 52–63 @@ -138,7 +138,7 @@ PatrickKatzer MirkoOest SteffenHerbold - AnnetteHautli-Janisz + AnnetteHautli-Janisz 100–106 Debate naturalness ranges on a scale from small, highly structured, and topically focused settings to larger, more spontaneous and less constrained environments. The more unconstrained a debate, the more spontaneous speakers act: they build on contextual knowledge and use anaphora or ellipses to construct their arguments. They also use rhetorical devices such as questions and imperatives to support or attack claims. In this paper, we study how the reconstruction of the actual debate contributions, i.e., utterances which contain pronouns, ellipses and fuzzy language, into full-fledged propositions which are interpretable without context impacts the prediction of argument relations and investigate the effect of incorporating contextual information for the task. We work with highly complex spontaneous debates with more than 10 speakers on a wide variety of topics. We find that in contrast to our initial hypothesis, reconstruction does not improve predictions and context only improves them when used in combination with propositions. 2023.argmining-1.10 @@ -149,7 +149,7 @@ Unsupervised argument reframing with a counterfactual-based approach PhilippHeinisch DimitryMindlin - PhilippCimiano + PhilippCimiano 107–119 Framing is an important mechanism in argumentation, as participants in a debate tend to emphasize those aspects or dimensions of the issue under debate that support their standpoint. The task of reframing an argument, that is changing the underlying framing, has received increasing attention recently. We propose a novel unsupervised approach to argument reframing that takes inspiration from counterfactual explanation generation approaches in the field of eXplainable AI (XAI). We formalize the task as a mask-and-replace approach in which an LLM is tasked to replace masked tokens associated with a set of frames to be eliminated by other tokens related to a set of target frames to be added. Our method relies on two key mechanisms: framed decoding and reranking based on a number of metrics similar to those used in XAI to search for a suitable counterfactual. We evaluate our approach on three topics using the dataset by Ruckdeschel and Wiedemann (2022). We show that our two key mechanisms outperform an unguided LLM as a baseline by increasing the ratio of successfully reframed arguments by almost an order of magnitude. 2023.argmining-1.11 @@ -161,7 +161,7 @@ ZhexiongLiu MohamedElaraby YangZhong - DianeLitman + DianeLitman 120–132 This paper presents an overview of the ImageArg shared task, the first multimodal Argument Mining shared task co-located with the 10th Workshop on Argument Mining at EMNLP 2023. The shared task comprises two classification subtasks - (1) Subtask-A: Argument Stance Classification; (2) Subtask-B: Image Persuasiveness Classification. The former determines the stance of a tweet containing an image and a piece of text toward a controversial topic (e.g., gun control and abortion). The latter determines whether the image makes the tweet text more persuasive. The shared task received 31 submissions for Subtask-A and 21 submissions for Subtask-B from 9 different teams across 6 countries. The top submission in Subtask-A achieved an F1-score of 0.8647 while the best submission in Subtask-B achieved an F1-score of 0.5561. 2023.argmining-1.12 @@ -266,7 +266,7 @@ HariramVeeramani SaravananRajamanickam Adam MaciejWesterski - Jung-JaeKim + Jung-JaeKim 181–186 In this paper, we describe our system for ImageArg-2023 Shared Task that aims to identify an image’s stance towards a tweet and determine its persuasiveness score concerning a specific topic. In particular, the Shared Task proposes two subtasks viz. subtask (A) Multimodal Argument Stance (AS) Classification, and subtask (B) Multimodal Image Persuasiveness (IP) Classification, using a dataset composed of tweets (images and text) from controversial topics, namely gun control and abortion. For subtask A, we employ multiple transformer models using a text based approach to classify the argumentative stance of the tweet. For sub task B we adopted text based as well as multimodal learning methods to classify image persuasiveness of the tweet. Surprisingly, the text-based approach of the tweet overall performed better than the multimodal approaches considered. In summary, our best system achieved a F1 score of 0.85 for sub task (A) and 0.50 for subtask (B), and ranked 2nd in subtask (A) and 4th in subtask (B), among all teams submissions. 2023.argmining-1.20 @@ -321,9 +321,9 @@ <fixed-case>NUS</fixed-case>-<fixed-case>IDS</fixed-case> at <fixed-case>P</fixed-case>rag<fixed-case>T</fixed-case>ag-2023: Improving Pragmatic Tagging of Peer Reviews through Unlabeled Data - Sujatha DasGollapalli + Sujatha DasGollapalli YixinHuang - See-KiongNg + See-KiongNg 212–217 We describe our models for the Pragmatic Tagging of Peer Reviews Shared Task at the 10th Workshop on Argument Mining at EMNLP-2023. We trained multiple sentence classification models for the above competition task by employing various state-of-the-art transformer models that can be fine-tuned either in the traditional way or through instruction-based fine-tuning. Multiple model predictions on unlabeled data are combined to tentatively label unlabeled instances and augment the dataset to further improve performance on the prediction task. In particular, on the F1000RD corpus, we perform on-par with models trained on 100% of the training data while using only 10% of the data. Overall, on the competition datasets, we rank among the top-2 performers for the different data conditions. 2023.argmining-1.25 diff --git a/data/xml/2023.at4ssl.xml b/data/xml/2023.at4ssl.xml index 03c2791190..6ff7f60d57 100644 --- a/data/xml/2023.at4ssl.xml +++ b/data/xml/2023.at4ssl.xml @@ -9,7 +9,7 @@ Davy VanLanduyt RehanaOmardeen ShaunOboyle - AnneliesBraffort + AnneliesBraffort FlorisRoelofsen FredBlain BramVanroy @@ -39,10 +39,10 @@ A Linked Data Approach for linking and aligning Sign Language and Spoken Language Data ThierryDeclerck SamBigeard - FahadKhan + FahadKhan IreneMurtagh SussiOlsen - MikeRosner + MikeRosner InekeSchuurman AndonTchechmedjiev AndyWay @@ -55,7 +55,7 @@ An Open-Source Gloss-Based Baseline for Spoken to Signed Language Translation AmitMoryossef MathiasMüller - AnneGöhring + AnneGöhring ZifanJiang YoavGoldberg SarahEbling diff --git a/data/xml/2023.banglalp.xml b/data/xml/2023.banglalp.xml index e766290ff1..084c48527c 100644 --- a/data/xml/2023.banglalp.xml +++ b/data/xml/2023.banglalp.xml @@ -21,7 +21,7 @@ Offensive Language Identification in Transliterated and Code-Mixed <fixed-case>B</fixed-case>angla - Md NishatRaihan + Md NishatRaihan UmmaTanmoy Anika BinteIslam KaiNorth @@ -80,7 +80,7 @@ SourabrataMukherjee AkankshaBansal PrithaMajumdar - Atul Kr.OjhaUniversity of Galway, Ireland, Insight SFI Research Centre for Data Analytics, DSI, University of Galway, Ireland and Panlingua Languague Processing LLP, India + Atul Kr.OjhaUniversity of Galway, Ireland, Insight SFI Research Centre for Data Analytics, DSI, University of Galway, Ireland and Panlingua Languague Processing LLP, India OndřejDušekCharles University, Prague 34-47 Text style transfer (TST) involves modifying the linguistic style of a given text while retaining its core content. This paper addresses the challenging task of text style transfer in the Bangla language, which is low-resourced in this area. We present a novel Bangla dataset that facilitates text sentiment transfer, a subtask of TST, enabling the transformation of positive sentiment sentences to negative and vice versa. To establish a high-quality base for further research, we refined and corrected an existing English dataset of 1,000 sentences for sentiment transfer based on Yelp reviews, and we introduce a new human-translated Bangla dataset that parallels its English counterpart. Furthermore, we offer multiple benchmark models that serve as a validation of the dataset and baseline for further research. @@ -277,7 +277,7 @@ nlp<fixed-case>BD</fixed-case>patriots at <fixed-case>BLP</fixed-case>-2023 Task 1: Two-Step Classification for Violence Inciting Text Detection in <fixed-case>B</fixed-case>angla - Leveraging Back-Translation and Multilinguality - Md NishatRaihan + Md NishatRaihan DhimanGoswamiGeorge Mason University Sadiya Sayara ChowdhuryPuspoGeorge Mason University MarcosZampieriGeorge Mason University @@ -370,7 +370,7 @@ <fixed-case>UFAL</fixed-case>-<fixed-case>ULD</fixed-case> at <fixed-case>BLP</fixed-case>-2023 Task 1: Violence Detection in <fixed-case>B</fixed-case>angla Text SourabrataMukherjee - Atul Kr.OjhaUniversity of Galway, Ireland, Insight SFI Research Centre for Data Analytics, DSI, University of Galway, Ireland and Panlingua Languague Processing LLP, India + Atul Kr.OjhaUniversity of Galway, Ireland, Insight SFI Research Centre for Data Analytics, DSI, University of Galway, Ireland and Panlingua Languague Processing LLP, India OndřejDušekCharles University, Prague 220-224 In this paper, we present UFAL-ULD team’s system, desinged as a part of the BLP Shared Task 1: Violence Inciting Text Detection (VITD). This task aims to classify text, with a particular challenge of identifying incitement to violence into Direct, Indirect or Non-violence levels. We experimented with several pre-trained sequence classification models, including XLM-RoBERTa, BanglaBERT, Bangla BERT Base, and Multilingual BERT. Our best-performing model was based on the XLM-RoBERTa-base architecture, which outperformed the baseline models. Our system was ranked 20th among the 27 teams that participated in the task. @@ -478,7 +478,7 @@ M1437 at <fixed-case>BLP</fixed-case>-2023 Task 2: Harnessing <fixed-case>B</fixed-case>angla Text for Sentiment Analysis: A Transformer-based Approach MajidurRahmanGeorge Mason University - OzlemUzunerGeorge Mason University + OzlemUzunerGeorge Mason University 279-285 Analyzing public sentiment on social media is helpful in understanding the public’s emotions about any given topic. While numerous studies have been conducted in this field, there has been limited research on Bangla social media data. Team M1437 from George Mason University participated in the Sentiment Analysis shared task of the Bangla Language Processing (BLP) Workshop at EMNLP-2023. The team fine-tuned various BERT-based Transformer architectures to solve the task. This article shows that BanglaBERT_{large}, a language model pre-trained on Bangla text, outperformed other BERT-based models. This model achieved an F1 score of 73.15% and top position in the development phase, was further tuned with external training data, and achieved an F1 score of 70.36% in the evaluation phase, securing the fourteenth place on the leaderboard. The F1 score on the test set, when BanglaBERT_{large} was trained without external training data, was 71.54%. 2023.banglalp-1.36 @@ -489,7 +489,7 @@ nlp<fixed-case>BD</fixed-case>patriots at <fixed-case>BLP</fixed-case>-2023 Task 2: A Transfer Learning Approach towards <fixed-case>B</fixed-case>angla Sentiment Analysis DhimanGoswamiGeorge Mason University - Md NishatRaihan + Md NishatRaihan Sadiya Sayara ChowdhuryPuspoGeorge Mason University MarcosZampieriGeorge Mason University 286-292 @@ -590,7 +590,7 @@ <fixed-case>UFAL</fixed-case>-<fixed-case>ULD</fixed-case> at <fixed-case>BLP</fixed-case>-2023 Task 2 Sentiment Classification in <fixed-case>B</fixed-case>angla Text SourabrataMukherjee - Atul Kr.OjhaUniversity of Galway, Ireland, Insight SFI Research Centre for Data Analytics, DSI, University of Galway, Ireland and Panlingua Languague Processing LLP, India + Atul Kr.OjhaUniversity of Galway, Ireland, Insight SFI Research Centre for Data Analytics, DSI, University of Galway, Ireland and Panlingua Languague Processing LLP, India OndřejDušekCharles University, Prague 336-339 In this paper, we present the UFAL-ULD team’s system for the BLP Shared Task 2: Sentiment Analysis of Bangla Social Media Posts. The Task 2 involves classifying text into Positive, Negative, or Neutral sentiments. As a part of this task, we conducted a series of experiments with several pre-trained sequence classification models – XLM-RoBERTa, BanglaBERT, Bangla BERT Base and Multilingual BERT. Among these, our best-performing model was based on the XLM-RoBERTa-base architecture, which outperforms baseline models. Our system was ranked 19th among the 30 teams that participated in the task. diff --git a/data/xml/2023.bea.xml b/data/xml/2023.bea.xml index 0f2bb5c54e..ab2275018b 100644 --- a/data/xml/2023.bea.xml +++ b/data/xml/2023.bea.xml @@ -55,7 +55,7 @@ RichardZhouYale VanessaYanYale SwapnilHingmireNa - DragomirRadevYale University + DragomirRadevYale University 29-43 Effective human learning depends on a wide selection of educational materials that align with the learner’s current understanding of the topic. While the Internet has revolutionized human learning or education, a substantial resource accessibility barrier still exists. Namely, the excess of online information can make it challenging to navigate and discover high-quality learning materials in a given subject area. In this paper, we propose an automatic pipeline for building an educational resource discovery system for new domains. The pipeline consists of three main steps: resource searching, feature extraction, and resource classification. We first collect frequent queries from a set of seed documents, and search the web with these queries to obtain candidate resources such as lecture slides and introductory blog posts. Then, we process these resources for BERT-based features and meta-features. Next, we train a tree-based classifier to decide whether they are suitable learning materials. The pipeline achieves F1 scores of 0.94 and 0.82 when evaluated on two similar but novel domains. Finally, we demonstrate how this pipeline can benefit two applications: prerequisite chain learning and leading paragraph generation for surveys. We also release a corpus of 39,728 manually labeled web resources and 659 queries from NLP, Computer Vision (CV), and Statistics (STATS). 2023.bea-1.3 @@ -66,7 +66,7 @@ Using Learning Analytics for Adaptive Exercise Generation TanjaHeckUniversity of Tbingen - DetmarMeurersUniversitt Tbingen + DetmarMeurersUniversitt Tbingen 44-56 Single Choice exercises constitute a central exercise type for language learning in a learner’s progression from mere implicit exposure through input enhancement to productive language use in open exercises. Distractors that support learning in the individual zone of proximal development should not be derived from static analyses of learner corpora, but rely on dynamic learning analytics based on half-open exercises. We demonstrate how a system’s error diagnosis module can be re-used for automatic and dynamic generation and adaptation of distractors, as well as to inform exercise generation in terms of relevant learning goals and reasonable chunking in Jumbled Sentences exercises. 2023.bea-1.4 @@ -233,7 +233,7 @@ YukiOkanoTokyo Institute of Technology KotaroFunakoshiTokyo Institute of Technology RyoNagataKonan University - ManabuOkumuraTokyo Institute of Technology + ManabuOkumuraTokyo Institute of Technology 184-194 This paper proposes a new second language learning task of generating a response including specified grammatical items. We consider two approaches: 1) fine-tuning a pre-trained language model (DialoGPT) by reinforcement learning and 2) providing a few-shot prompt to a large language model (GPT-3). For reinforcement learning, we examine combinations of three reward functions that consider grammatical items, diversity, and fluency. Our experiments confirm that both approaches can generate responses including the specified grammatical items and that it is crucial to consider fluency rather than diversity as the reward function. 2023.bea-1.16 @@ -257,7 +257,7 @@ MengsayLoemTokyo Institute of Technology MasahiroKanekoTokyo Institute of Technology ShoTakaseTokyo Institute of Technology - NaoakiOkazakiTokyo Institute of Technology + NaoakiOkazakiTokyo Institute of Technology 205-219 Large-scale pre-trained language models such as GPT-3 have shown remarkable performance across various natural language processing tasks. However, applying prompt-based methods with GPT-3 for Grammatical Error Correction (GEC) tasks and their controllability remains underexplored. Controllability in GEC is crucial for real-world applications, particularly in educational settings, where the ability to tailor feedback according to learner levels and specific error types can significantly enhance the learning process. This paper investigates the performance and controllability of prompt-based methods with GPT-3 for GEC tasks using zero-shot and few-shot setting. We explore the impact of task instructions and examples on GPT-3’s output, focusing on controlling aspects such as minimal edits, fluency edits, and learner levels. Our findings demonstrate that GPT-3 could effectively perform GEC tasks, outperforming existing supervised and unsupervised approaches. We also showed that GPT-3 could achieve controllability when appropriate task instructions and examples are given. 2023.bea-1.18 @@ -280,7 +280,7 @@ Towards Extracting and Understanding the Implicit Rubrics of Transformer Based Automatic Essay Scoring Models JamesFiaccoCarnegie Mellon University DavidAdamsonTurnitin - CarolynRoseCarnegie Mellon University + CarolynRoseCarnegie Mellon University 232-241 By aligning the functional components derived from the activations of transformer models trained for AES with external knowledge such as human-understandable feature groups, the proposed method improves the interpretability of a Longformer Automatic Essay Scoring (AES) system and provides tools for performing such analyses on further neural AES systems. The analysis focuses on models trained to score essays based on organization, main idea, support, and language. The findings provide insights into the models’ decision-making processes, biases, and limitations, contributing to the development of more transparent and reliable AES systems. 2023.bea-1.20 @@ -329,7 +329,7 @@ Predicting the Quality of Revisions in Argumentative Writing ZhexiongLiuUniversity of Pittsburgh - DianeLitmanUniversity of Pittsburgh + DianeLitmanUniversity of Pittsburgh ElaineWangRAND Corporation LindsayMatsumuraUniversity of Pittsburgh RichardCorrentiUniversity of Pittsburgh @@ -344,7 +344,7 @@ Reconciling Adaptivity and Task Orientation in the Student Dashboard of an Intelligent Language Tutoring System LeonaCollingUniversity of Tbingen TanjaHeckUniversity of Tbingen - DetmarMeurersUniversitt Tbingen + DetmarMeurersUniversitt Tbingen 288-299 In intelligent language tutoring systems, student dashboards should display the learning progress and performance and support the navigation through the learning content. Designing an interface that transparently offers information on students’ learning in relation to specific learning targets while linking to the overarching functional goal, that motivates and organizes the practice in current foreign language teaching, is challenging. This becomes even more difficult in systems that adaptively expose students to different learning material and individualize system interactions. If such a system is used in an ecologically valid setting of blended learning, this generates additional requirements to incorporate the needs of students and teachers for control and customizability.We present the conceptual design of a student dashboard for a task-based, user-adaptive intelligent language tutoring system intended for use in real-life English classes in secondary schools. We highlight the key challenges and spell out open questions for future research. 2023.bea-1.25 @@ -368,7 +368,7 @@ <fixed-case>SIGHT</fixed-case>: A Large Annotated Dataset on Student Insights Gathered from Higher Education Transcripts RoseWangStanford PawanWirawarnStanford - NoahGoodmanStanford University + NoahGoodmanStanford University DorottyaDemszkyStanford University 315-351 Lectures are a learning experience for both students and teachers. Students learn from teachers about the subject material, while teachers learn from students about how to refine their instruction. Unfortunately, online student feedback is unstructured and abundant, making it challenging for teachers to learn and improve. We take a step towards tackling this challenge. First, we contribute a dataset for studying this problem: SIGHT is a large dataset of 288 math lecture transcripts and 15,784 comments collected from the Massachusetts Institute of Technology OpenCourseWare (MIT OCW) YouTube channel. Second, we develop a rubric for categorizing feedback types using qualitative analysis. Qualitative analysis methods are powerful in uncovering domain-specific insights, however they are costly to apply to large data sources. To overcome this challenge, we propose a set of best practices for using large language models (LLMs) to cheaply classify the comments at scale. We observe a striking correlation between the model’s and humans’ annotation: Categories with consistent human annotations (0.9 inter-rater reliability, IRR) also display higher human-model agreement (0.7), while categories with less consistent human annotations (0.7-0.8 IRR) correspondingly demonstrate lower human-model agreement (0.3-0.5). These techniques uncover useful student feedback from thousands of comments, costing around $0.002 per comment. We conclude by discussing exciting future directions on using online student feedback and improving automated annotation techniques for qualitative research. @@ -492,7 +492,7 @@ Hybrid Models for Sentence Readability Assessment FengkaiLiuCity University of Hong Kong - JohnLeeCity University of Hong Kong + JohnLeeCity University of Hong Kong 448-454 Automatic readability assessment (ARA) predicts how difficult it is for the reader to understand a text. While ARA has traditionally been performed at the passage level, there has been increasing interest in ARA at the sentence level, given its applications in downstream tasks such as text simplification and language exercise generation. Recent research has suggested the effectiveness of hybrid approaches for ARA, but they have yet to be applied on the sentence level. We present the first study that compares neural and hybrid models for sentence-level ARA. We conducted experiments on graded sentences from the Wall Street Journal (WSJ) and a dataset derived from the OneStopEnglish corpus. Experimental results show that both neural and hybrid models outperform traditional classifiers trained on linguistic features. Hybrid models obtained the best accuracy on both datasets, surpassing the previous best result reported on the WSJ dataset by almost 13% absolute. 2023.bea-1.37 @@ -736,7 +736,7 @@ Socratic Questioning of Novice Debuggers: A Benchmark Dataset and Preliminary Evaluations ErfanAl-HossamiUniversity of North Carolina at Charlotte - RazvanBunescuUniversity of North Carolina at Charlotte + RazvanBunescuUniversity of North Carolina at Charlotte RyanTeehanNew York University LaurelPowellUniversity of North Carolina at Charlotte KhyatiMahajanUniversity of North Carolina at Charlotte @@ -834,7 +834,7 @@ The <fixed-case>ADAIO</fixed-case> System at the <fixed-case>BEA</fixed-case>-2023 Shared Task: Shared Task Generating <fixed-case>AI</fixed-case> Teacher Responses in Educational Dialogues AdaezeAdigweUniversity of Edinburgh - ZhengYuanIstituto Italiano di Tecnologia, Universit di Ferrara, Italy + ZhengYuanIstituto Italiano di Tecnologia, Universit di Ferrara, Italy 796-804 This paper presents the ADAIO team’s system entry in the Building Educational Applications (BEA) 2023 Shared Task on Generating AI Teacher Responses in Educational Dialogues. The task aims to assess the performance of state-of-the-art generative models as AI teachers in producing suitable responses within a student-teacher dialogue. Our system comprises evaluating various baseline models using OpenAI GPT-3 and designing diverse prompts to prompt the OpenAI models for teacher response generation. After the challenge, our system achieved second place by employing a few-shot prompt-based approach with the OpenAI text-davinci-003 model. The results highlight the few-shot learning capabilities of large-language models, particularly OpenAI’s GPT-3, in the role of AI teachers. 2023.bea-1.65 diff --git a/data/xml/2023.bigpicture.xml b/data/xml/2023.bigpicture.xml index 5425efd52c..1ac74db87f 100644 --- a/data/xml/2023.bigpicture.xml +++ b/data/xml/2023.bigpicture.xml @@ -88,10 +88,10 @@ Transformers as Graph-to-Graph Models - JamesHendersonIdiap Research Institute + JamesHendersonIdiap Research Institute AlirezaMohammadshahi AndreiComan - LeslyMiculicichGoogle + LeslyMiculicichGoogle 93-107 We argue that Transformers are essentially graph-to-graph models, with sequences just being a special case. Attention weights are functionally equivalent to graph edges. Our Graph-to-Graph Transformer architecture makes this ability explicit, by inputting graph edges into the attention weight computations and predicting graph edges with attention-like functions, thereby integrating explicit graphs into the latent graphs learned by pretrained Transformers. Adding iterative graph refinement provides a joint embedding of input, output, and latent graphs, allowing non-autoregressive graph prediction to optimise the complete graph without any bespoke pipeline or decoding strategy. Empirical results show that this architecture achieves state-of-the-art accuracies for modelling a variety of linguistic structures, integrating very effectively with the latent linguistic representations learned by pretraining. 2023.bigpicture-1.8 @@ -103,7 +103,7 @@ AmandaBertschCarnegie Mellon University AlexXie GrahamNeubigCarnegie Mellon University - MatthewGormleySchool of Computer Science, Carnegie Mellon University and 3M + MatthewGormleySchool of Computer Science, Carnegie Mellon University and 3M 108-122 Minimum Bayes Risk (MBR) decoding is a method for choosing the outputs of a machine learning system based not on the output with the highest probability, but the output with the lowest risk (expected error) among multiple candidates. It is a simple but powerful method: for an additional cost at inference time, MBR provides reliable several-point improvements across metrics for a wide variety of tasks without any additional data or training. Despite this, MBR is not frequently applied in NLP works, and knowledge of the method itself is limited. We first provide an introduction to the method and the recent literature. We show that several recent methods that do not reference MBR can be written as special cases of MBR; this reformulation provides additional theoretical justification for the performance of these methods, explaining some results that were previously only empirical. We provide theoretical and empirical results about the effectiveness of various MBR variants and make concrete recommendations for the application of MBR in NLP models, including future directions in this area. 2023.bigpicture-1.9 diff --git a/data/xml/2023.bionlp.xml b/data/xml/2023.bionlp.xml index ebad61b449..0676930fae 100644 --- a/data/xml/2023.bionlp.xml +++ b/data/xml/2023.bionlp.xml @@ -5,7 +5,7 @@ Proceedings of the 22nd Workshop on Biomedical Natural Language Processing and BioNLP Shared Tasks DinaDemner-fushman SophiaAnaniadou - KevinCohen + KevinCohen Association for Computational Linguistics
Toronto, Canada
July @@ -21,7 +21,7 @@ Multi-Source (Pre-)Training for Cross-Domain Measurement, Unit and Context Extraction YuelingLiBasf Se SebastianMartschatBasf Se - Simone PaoloPonzettoUniversity of Mannheim + Simone PaoloPonzettoUniversity of Mannheim 1-25 We present a cross-domain approach for automated measurement and context extraction based on pre-trained language models. We construct a multi-source, multi-domain corpus and train an end-to-end extraction pipeline. We then apply multi-source task-adaptive pre-training and fine-tuning to benchmark the cross-domain generalization capability of our model. Further, we conceptualize and apply a task-specific error analysis and derive insights for future work. Our results suggest that multi-source training leads to the best overall results, while single-source training yields the best results for the respective individual domain. While our setup is successful at extracting quantity values and units, more research is needed to improve the extraction of contextual entities. We make the cross-domain corpus used in this work available online. 2023.bionlp-1.1 @@ -44,7 +44,7 @@ Exploring Partial Knowledge Base Inference in Biomedical Entity Linking HongyiYuanTsinghua University KemingLuUniversity of Southern California - ZhengYuanAlibaba Group + ZhengYuanAlibaba Group 37-49 Biomedical entity linking (EL) consists of named entity recognition (NER) and named entity disambiguation (NED). EL models are trained on corpora labeled by a predefined KB. However, it is a common scenario that only entities within a subset of the KB are precious to stakeholders. We name this scenario partial knowledge base inference; training an EL model with one KB and inferring on the part of it without further training. In this work, we give a detailed definition and evaluation procedures for this practically valuable but significantly understudied scenario and evaluate methods from three representative EL paradigms. We construct partial KB inference benchmarks and witness a catastrophic degradation in EL performance due to dramatically precision drop. Our findings reveal these EL paradigms can not correctly handle unlinkable mentions (NIL), so they are not robust to partial KB inference. We also propose two simple-and-effective redemption methods to combat the NIL issue with little computational overhead. 2023.bionlp-1.3 @@ -87,7 +87,7 @@ Evaluating and Improving Automatic Speech Recognition using Severity RyanWhettenBoise State University - CaseyKenningtonBoise State University + CaseyKenningtonBoise State University 79-91 A common metric for evaluating Automatic Speech Recognition (ASR) is Word Error Rate (WER) which solely takes into account discrepancies at the word-level. Although useful, WER is not guaranteed to correlate well with human judgment or performance on downstream tasks that use ASR. Meaningful assessment of ASR mistakes becomes even more important in high-stake scenarios such as health-care. We propose 2 general measures to evaluate the severity of mistakes made by ASR systems, one based on sentiment analysis and another based on text embeddings. We evaluate these measures on simulated patient-doctor conversations using 5 ASR systems. Results show that these measures capture characteristics of ASR errors that WER does not. Furthermore, we train an ASR system incorporating severity and demonstrate the potential for using severity not only in the evaluation, but in the development of ASR. Advantages and limitations of this methodology are analyzed and discussed. 2023.bionlp-1.6 @@ -109,7 +109,7 @@ Good Data, Large Data, or No Data? Comparing Three Approaches in Developing Research Aspect Classifiers for Biomedical Papers ShreyaChandrasekharPenn State University Chieh-YangHuangPennsylvania State University - Ting-HaoHuangPennsylvania State University + Ting-HaoHuangPennsylvania State University 103-113 The rapid growth of scientific publications, particularly during the COVID-19 pandemic, emphasizes the need for tools to help researchers efficiently comprehend the latest advancements. One essential part of understanding scientific literature is research aspect classification, which categorizes sentences in abstracts to Background, Purpose, Method, and Finding. In this study, we investigate the impact of different datasets on model performance for the crowd-annotated CODA-19 research aspect classification task. Specifically, we explore the potential benefits of using the large, automatically curated PubMed 200K RCT dataset and evaluate the effectiveness of large language models (LLMs), such as LLaMA, GPT-3, ChatGPT, and GPT-4. Our results indicate that using the PubMed 200K RCT dataset does not improve performance for the CODA-19 task. We also observe that while GPT-4 performs well, it does not outperform the SciBERT model fine-tuned on the CODA-19 dataset, emphasizing the importance of a dedicated and task-aligned datasets dataset for the target task. 2023.bionlp-1.8 @@ -200,7 +200,7 @@ NesrineBannourUniversite Paris-Saclay, CNRS, LISN BastienRanceINSERM, Universite Paris Cit, Sorbonne Paris Cit, AP-HP, HEGP, HeKa, Inria Paris XavierTannierSorbonne Universite, Inserm, LIMICS - AurelieNeveolUniversite Paris Saclay, CNRS, LISN + AurelieNeveolUniversite Paris Saclay, CNRS, LISN 191-205 Extracting temporal relations usually entails identifying and classifying the relation between two mentions. However, the definition of temporal mentions strongly depends on the text type and the application domain. Clinical text in particular is complex. It may describe events that occurred at different times, contain redundant information and a variety of domain-specific temporal expressions. In this paper, we propose a novel event-independent representation of temporal relations that is task-independent and, therefore, domain-independent. We are interested in identifying homogeneous text portions from a temporal standpoint and classifying the relation between each text portion and the document creation time. Temporal relation extraction is cast as a sequence labeling task and evaluated on oncology notes. We further evaluate our temporal representation by the temporal positioning of toxicity events of chemotherapy administrated to colon and lung cancer patients described in French clinical reports. An overall macro F-measure of 0.86 is obtained for temporal relation extraction by a neural token classification model trained on clinical texts written in French. Our results suggest that the toxicity event extraction task can be performed successfully by automatically identifying toxicity events and placing them within the patient timeline (F-measure .62). The proposed system has the potential to assist clinicians in the preparation of tumor board meetings. 2023.bionlp-1.16 @@ -250,7 +250,7 @@ Multiple Evidence Combination for Fact-Checking of Health-Related Information PritamDekaQueen’s University Belfast AnnaJurek-LoughreyQueen’s University Belfast - DeepakPQueen’s University Belfast + DeepakPQueen’s University Belfast 237-247 Fact-checking of health-related claims has become necessary in this digital age, where any information posted online is easily available to everyone. The most effective way to verify such claims is by using evidences obtained from reliable sources of medical knowledge, such as PubMed. Recent advances in the field of NLP have helped automate such fact-checking tasks. In this work, we propose a domain-specific BERT-based model using a transfer learning approach for the task of predicting the veracity of claim-evidence pairs for the verification of health-related facts. We also improvise on a method to combine multiple evidences retrieved for a single claim, taking into consideration conflicting evidences as well. We also show how our model can be exploited when labelled data is available and how back-translation can be used to augment data when there is data scarcity. 2023.bionlp-1.20 @@ -308,7 +308,7 @@ Extracting Drug-Drug and Protein-Protein Interactions from Text using a Continuous Update of Tree-Transformers SudiptaSingha RoyThe University of Western Ontario - Robert E.MercerThe University of Western Ontario + Robert E.MercerThe University of Western Ontario 280-291 Understanding biological mechanisms requires determining mutual protein-protein interactions (PPI). Obtaining drug-drug interactions (DDI) from scientific articles provides important information about drugs. Extracting such medical entity interactions from biomedical articles is challenging due to complex sentence structures. To address this issue, our proposed model utilizes tree-transformers to generate the sentence representation first, and then a sentence-to-word update step to fine-tune the word embeddings which are again used by the tree-transformers to generate enriched sentence representations. Using the tree-transformers helps the model preserve syntactical information and provide semantic information. The fine-tuning provided by the continuous update step adds improved semantics to the representation of each sentence. Our model outperforms other prominent models with a significant performance boost on the five standard PPI corpora and a performance boost on the one benchmark DDI corpus that are used in our experiments. 2023.bionlp-1.25 @@ -342,7 +342,7 @@ End-to-end clinical temporal information extraction with multi-head attention - TimothyMillerBoston Children’s Hospital and Harvard Medical School + TimothyMillerBoston Children’s Hospital and Harvard Medical School StevenBethardUniversity of Arizona DmitriyDligachLoyola University Chicago GuerganaSavovaBoston Children’s Hospital and Harvard Medical School @@ -467,7 +467,7 @@ Can Social Media Inform Dietary Approaches for Health Management? A Dataset and Benchmark for Low-Carb Diet SkylerZouCsiro - XiangDaiCsiro + XiangDaiCsiro GrantBrinkworthCsiro PennieTaylorCsiro SarvnazKarimiCsiro @@ -479,9 +479,9 @@ Promoting Fairness in Classification of Quality of Medical Evidence - SimonSusterUniversity of Melbourne - TimothyBaldwinMbzuai - KarinVerspoorRMIT University + SimonSusterUniversity of Melbourne + TimothyBaldwinMbzuai + KarinVerspoorRMIT University 413-426 Automatically rating the quality of published research is a critical step in medical evidence synthesis. While several methods have been proposed, their algorithmic fairness has been overlooked even though significant risks may follow when such systems are deployed in biomedical contexts. In this work, we study fairness on two systems along two sensitive attributes, participant sex and medical area. In some cases, we find important inequalities, leading us to apply various debiasing methods. Upon examining an interplay of systems’ predictive performance, fairness, as well as medically critical selective classification capabilities and calibration performance, we find that fairness can sometimes improve through debiasing, but at a cost in other performance measures. 2023.bionlp-1.39 @@ -542,7 +542,7 @@ Overview of the Problem List Summarization (<fixed-case>P</fixed-case>rob<fixed-case>S</fixed-case>um) 2023 Shared Task on Summarizing Patients’ Active Diagnoses and Problems from Electronic Health Record Progress Notes YanjunGaoUniversity of Wisconsin Madison DmitriyDligachLoyola University Chicago - TimothyMillerBoston Children’s Hospital and Harvard Medical School + TimothyMillerBoston Children’s Hospital and Harvard Medical School MajidAfsharUniversity of Wisconsin 461-467 The BioNLP Workshop 2023 initiated the launch of a shared task on Problem List Summarization (ProbSum) in January 2023. The aim of this shared task is to attract future research efforts in building NLP models for real-world diagnostic decision support applications, where a system generating relevant and accurate diagnoses will augment the healthcare providers’ decision-making process and improve the quality of care for patients. The goal for participants is to develop models that generated a list of diagnoses and problems using input from the daily care notes collected from the hospitalization of critically ill patients. Eight teams submitted their final systems to the shared task leaderboard. In this paper, we describe the tasks, datasets, evaluation metrics, and baseline systems. Additionally, the techniques and results of the evaluation of the different approaches tried by the participating teams are summarized. @@ -555,7 +555,7 @@ TomasGoldsackUniversity of Sheffield ZhehengLuoUniversity of Manchester QianqianXieUniversity of Manchester - CarolinaScartonUniversity of Sheffield + CarolinaScartonUniversity of Sheffield MatthewShardlowManchester Metropolitan University SophiaAnaniadouUniversity of Manchester ChenghuaLinDepartment of Computer Science, University of Sheffield @@ -593,7 +593,7 @@ <fixed-case>D</fixed-case>eakin<fixed-case>NLP</fixed-case> at <fixed-case>P</fixed-case>rob<fixed-case>S</fixed-case>um 2023: Clinical Progress Note Summarization with Rules and Language <fixed-case>M</fixed-case>odels<fixed-case>C</fixed-case>linical Progress Note Summarization with Rules and Languague Models MingLiuDeakin University - DanZhangDeakin University + DanZhangDeakin University WeicongTanMonash University HeZhangCnpiec Kexin Ltd 491-496 @@ -618,13 +618,13 @@ HaoLiUniversity of Manchester YupingWuUniversity of Manchester ViktorSchlegelAsus Aics - RizaBatista-NavarroDepartment of Computer Science, The University of Manchester + RizaBatista-NavarroDepartment of Computer Science, The University of Manchester Thanh-TungNguyenAsus AbhinavRamesh KashyapAsus Aics Xiao-JunZengUniversity of Manchester - DanielBeckUniversity of Melbourne + DanielBeckUniversity of Melbourne StefanWinklerNational University of Singapore - GoranNenadicUniversity of Manchester + GoranNenadicUniversity of Manchester 503-509 Medical progress notes play a crucial role in documenting a patient’s hospital journey, including his or her condition, treatment plan, and any updates for healthcare providers. Automatic summarisation of a patient’s problems in the form of a “problem list” can aid stakeholders in understanding a patient’s condition, reducing workload and cognitive bias. BioNLP 2023 Shared Task 1A focusses on generating a list of diagnoses and problems from the provider’s progress notes during hospitalisation. In this paper, we introduce our proposed approach to this task, which integrates two complementary components. One component employs large language models (LLMs) for data augmentation; the other is an abstractive summarisation LLM with a novel pre-training objective for generating the patients’ problems summarised as a list. Our approach was ranked second among all submissions to the shared task. The performance of our model on the development and test datasets shows that our approach is more robust on unknown data, with an improvement of up to 3.1 points over the same size of the larger model. 2023.bionlp-1.49 @@ -852,7 +852,7 @@ <fixed-case>CSIRO</fixed-case> <fixed-case>D</fixed-case>ata61 Team at <fixed-case>B</fixed-case>io<fixed-case>L</fixed-case>ay<fixed-case>S</fixed-case>umm Task 1: Lay Summarisation of Biomedical Research Articles Using Generative Models Mong YuanSimThe University of Adelaide - XiangDaiCSIRO Data61 + XiangDaiCSIRO Data61 MaciejRybinskiCsiro SarvnazKarimiCsiro 629-635 diff --git a/data/xml/2023.blackboxnlp.xml b/data/xml/2023.blackboxnlp.xml index 23a4f61420..31d1eb5af3 100644 --- a/data/xml/2023.blackboxnlp.xml +++ b/data/xml/2023.blackboxnlp.xml @@ -7,7 +7,7 @@ SophieHao JaapJumelet NajoungKim - AryaMcCarthy + AryaMcCarthy HoseinMohebbi Association for Computational Linguistics
Singapore
@@ -52,7 +52,7 @@ ChandanSingh John X.Morris JyotiAneja - AlexanderRush + AlexanderRush JianfengGao 31–55 Large language models (LLMs) have displayed an impressive ability to harness natural language to perform complex tasks. We explore whether we can leverage this ability to find and explain patterns in data. Specifically, given a pre-trained LLM and data examples, we apply interpretable autoprompting (iPrompt) to generate a natural language string explaining the data. iPrompt iteratively generates explanations with an LLM and reranks them based on their performance when used as a prompt. Experiments on a wide range of datasets, from synthetic mathematics to natural language understanding, show that iPrompt can yield meaningful insights by accurately finding dataset explanations that are human-interpretable. Moreover, iPrompt is reasonably efficient, as it does not require access to model gradients and works with relatively small models (e.g. ~6 billion parameters rather than >=100 billion). Finally, experiments with scientific datasets show the potential for iPrompt to aid in scientific discovery. @@ -116,7 +116,7 @@ Unveiling Multilinguality in Transformer Models: Exploring Language Specificity in Feed-Forward Networks SunitBhattacharya - OndřejBojar + OndřejBojar 120–126 Recent research suggests that the feed-forward module within Transformers can be viewed as a collection of key-value memories, where the keys learn to capture specific patterns from the input based on the training examples. The values then combine the output from the ‘memories’ of the keys to generate predictions about the next token. This leads to an incremental process of prediction that gradually converges towards the final token choice near the output layers. This interesting perspective raises questions about how multilingual models might leverage this mechanism. Specifically, for autoregressive models trained on two or more languages, do all neurons (across layers) respond equally to all languages? No! Our hypothesis centers around the notion that during pre-training, certain model parameters learn strong language-specific features, while others learn more language-agnostic (shared across languages) features. To validate this, we conduct experiments utilizing parallel corpora of two languages that the model was initially pre-trained on. Our findings reveal that the layers closest to the network’s input or output tend to exhibit more language-specific behaviour compared to the layers in the middle. 2023.blackboxnlp-1.9 @@ -136,7 +136,7 @@ Investigating Semantic Subspaces of Transformer Sentence Embeddings through Linear Structural Probing DmitryNikolaev - SebastianPadó + SebastianPadó 142–154 The question of what kinds of linguistic information are encoded in different layers of Transformer-based language models is of considerable interest for the NLP community. Existing work, however, has overwhelmingly focused on word-level representations and encoder-only language models with the masked-token training objective. In this paper, we present experiments with semantic structural probing, a method for studying sentence-level representations via finding a subspace of the embedding space that provides suitable task-specific pairwise distances between data-points. We apply our method to language models from different families (encoder-only, decoder-only, encoder-decoder) and of different sizes in the context of two tasks, semantic textual similarity and natural-language inference. We find that model families differ substantially in their performance and layer dynamics, but that the results are largely model-size invariant. 2023.blackboxnlp-1.11 @@ -157,7 +157,7 @@ Enhancing Interpretability Using Human Similarity Judgements to Prune Word Embeddings NataliaFlechas Manrique WanqianBao - AurelieHerbelot + AurelieHerbelot UriHasson 169–179 Interpretability methods in NLP aim to provide insights into the semantics underlying specific system architectures. Focusing on word embeddings, we present a supervised-learning method that, for a given domain (e.g., sports, professions), identifies a subset of model features that strongly improve prediction of human similarity judgments. We show this method keeps only 20-40% of the original embeddings, for 8 independent semantic domains, and that it retains different feature sets across domains. We then present two approaches for interpreting the semantics of the retained features. The first obtains the scores of the domain words (co-hyponyms) on the first principal component of the retained embeddings, and extracts terms whose co-occurrence with the co-hyponyms tracks these scores’ profile. This analysis reveals that humans differentiate e.g. sports based on how gender-inclusive and international they are. The second approach uses the retained sets as variables in a probing task that predicts values along 65 semantically annotated dimensions for a dataset of 535 words. The features retained for professions are best at predicting cognitive, emotional and social dimensions, whereas features retained for fruits or vegetables best predict the gustation (taste) dimension. We discuss implications for alignment between AI systems and human knowledge. @@ -168,7 +168,7 @@ When Your Language Model Cannot <fixed-case>E</fixed-case>ven Do Determiners Right: Probing for Anti-Presuppositions and the Maximize Presupposition! Principle JudithSieker - SinaZarrieß + SinaZarrieß 180–198 The increasing interest in probing the linguistic capabilities of large language models (LLMs) has long reached the area of semantics and pragmatics, including the phenomenon of presuppositions. In this study, we investigate a phenomenon that, however, has not yet been investigated, i.e., the phenomenon of anti-presupposition and the principle that accounts for it, the Maximize Presupposition! principle (MP!). Through an experimental investigation using psycholinguistic data and four open-source BERT model variants, we explore how language models handle different anti-presuppositions and whether they apply the MP! principle in their predictions. Further, we examine whether fine-tuning with Natural Language Inference data impacts adherence to the MP! principle. Our findings reveal that LLMs tend to replicate context-based n-grams rather than follow the MP! principle, with fine-tuning not enhancing their adherence. Notably, our results further indicate a striking difficulty of LLMs to correctly predict determiners, in relatively simple linguistic contexts. 2023.blackboxnlp-1.14 @@ -188,7 +188,7 @@ The Self-Contained Negation Test Set DavidKletz PascalAmsili - MarieCandito + MarieCandito 212–221 Several methodologies have recently been proposed to evaluate the ability of Pretrained Language Models (PLMs) to interpret negation. In this article, we build on Gubelmann and Handschuh (2022), which studies the modification of PLMs’ predictions as a function of the polarity of inputs, in English. Crucially, this test uses “self-contained” inputs ending with a masked position: depending on the polarity of a verb in the input, a particular token is either semantically ruled out or allowed at the masked position. By replicating Gubelmann and Handschuh (2022) experiments, we have uncovered flaws that weaken the conclusions that can be drawn from this test. We thus propose an improved version, the Self-Contained Neg Test, which is more controlled, more systematic, and entirely based on examples forming minimal pairs varying only in the presence or absence of verbal negation in English. When applying our test to the roberta and bert base and large models, we show that only roberta-large shows trends that match the expectations, while bert-base is mostly insensitive to negation. For all the tested models though, in a significant number of test instances the top-1 prediction remains the token that is semantically forbidden by the context, which shows how much room for improvement remains for a proper treatment of the negation phenomenon. 2023.blackboxnlp-1.16 @@ -231,8 +231,8 @@ Investigating the Encoding of Words in <fixed-case>BERT</fixed-case>’s Neurons Using Feature Textualization TanjaBaeumel SoniyaVijayakumar - Josefvan Genabith - GuenterNeumann + Josefvan Genabith + GuenterNeumann SimonOstermann 261–270 Pretrained language models (PLMs) form the basis of most state-of-the-art NLP technologies. Nevertheless, they are essentially black boxes: Humans do not have a clear understanding of what knowledge is encoded in different parts of the models, especially in individual neurons. A contrast is in computer vision, where feature visualization provides a decompositional interpretability technique for neurons of vision models. Activation maximization is used to synthesize inherently interpretable visual representations of the information encoded in individual neurons. Our work is inspired by this but presents a cautionary tale on the interpretability of single neurons, based on the first large-scale attempt to adapt activation maximization to NLP, and, more specifically, large PLMs. We propose feature textualization, a technique to produce dense representations of neurons in the PLM word embedding space. We apply feature textualization to the BERT model to investigate whether the knowledge encoded in individual neurons can be interpreted and symbolized. We find that the produced representations can provide insights about the knowledge encoded in individual neurons, but that individual neurons do not represent clear-cut symbolic units of language such as words. Additionally, we use feature textualization to investigate how many neurons are needed to encode words in BERT. @@ -263,7 +263,7 @@ Not Wacky vs. Definitely Wacky: A Study of Scalar Adverbs in Pretrained Language Models IsabelleLorge - Janet B.Pierrehumbert + Janet B.Pierrehumbert 296–316 Vector-space models of word meaning all assume that words occurring in similar contexts have similar meanings. Words that are similar in their topical associations but differ in their logical force tend to emerge as semantically close – creating well-known challenges for NLP applications that involve logical reasoning. Pretrained language models such as BERT, RoBERTa, GPT-2, and GPT-3 hold the promise of performing better on logical tasks than classic static word embeddings. However, reports are mixed about their success. Here, we advance this discussion through a systematic study of scalar adverbs, an under-explored class of words with strong logical force. Using three different tasks involving both naturalistic social media data and constructed examples, we investigate the extent to which BERT, RoBERTa, GPT-2 and GPT-3 exhibit knowledge of these common words. We ask: 1) Do the models distinguish amongst the three semantic categories of MODALITY, FREQUENCY and DEGREE? 2) Do they have implicit representations of full scales from maximally negative to maximally positive? 3) How do word frequency and contextual factors impact model performance? We find that despite capturing some aspects of logical meaning, the models still have obvious shortfalls. 2023.blackboxnlp-1.23 @@ -272,7 +272,7 @@ Rigorously Assessing Natural Language Explanations of Neurons - JingHuang + JingHuang AtticusGeiger KarelD’Oosterlinck ZhengxuanWu @@ -318,7 +318,7 @@ Systematic Generalization by Finetuning? Analyzing Pretrained Language Models Using Constituency Tests AishikChakraborty Jackie CKCheung - Timothy J.O’Donnell + Timothy J.O’Donnell 357–366 Constituents are groups of words that behave as a syntactic unit. Many linguistic phenomena (e.g., question formation, diathesis alternations) require the manipulation and rearrangement of constituents in a sentence. In this paper, we investigate how different finetuning setups affect the ability of pretrained sequence-to-sequence language models such as BART and T5 to replicate constituency tests — transformations that involve manipulating constituents in a sentence. We design multiple evaluation settings by varying the combinations of constituency tests and sentence types that a model is exposed to during finetuning. We show that models can replicate a linguistic transformation on a specific type of sentence that they saw during finetuning, but performance degrades substantially in other settings, showing a lack of systematic generalization. These results suggest that models often learn to manipulate sentences at a surface level unrelated to the constituent-level syntactic structure, for example by copying the first word of a sentence. These results may partially explain the brittleness of pretrained language models in downstream tasks. 2023.blackboxnlp-1.27 diff --git a/data/xml/2023.bsnlp.xml b/data/xml/2023.bsnlp.xml index 4f951d7165..2352748f29 100644 --- a/data/xml/2023.bsnlp.xml +++ b/data/xml/2023.bsnlp.xml @@ -5,7 +5,7 @@ Proceedings of the 9th Workshop on Slavic Natural Language Processing 2023 (SlavicNLP 2023) JakubPiskorski MichałMarcińczuk - PreslavNakov + PreslavNakov MaciejOgrodniczuk SenjaPollak PavelPřibáň @@ -258,7 +258,7 @@ Large Language Models for Multilingual <fixed-case>S</fixed-case>lavic Named Entity Linking RinaldsVīksnaUniversity of Latvia - IngunaSkadiņaTilde/ Institute of Mathematics and Computer Science, University of Latvia + IngunaSkadiņaTilde/ Institute of Mathematics and Computer Science, University of Latvia DaigaDeksneTilde; University of Latvia RobertsRozisTilde 172-178 diff --git a/data/xml/2023.c3nlp.xml b/data/xml/2023.c3nlp.xml index a5e085c6e9..2ce7c8cfcd 100644 --- a/data/xml/2023.c3nlp.xml +++ b/data/xml/2023.c3nlp.xml @@ -5,7 +5,7 @@ Proceedings of the First Workshop on Cross-Cultural Considerations in NLP (C3NLP) SunipaDev VinodkumarPrabhakaran - David IfeoluwaAdelani + David IfeoluwaAdelani DirkHovy LucianaBenotti Association for Computational Linguistics @@ -22,7 +22,7 @@ Varepsilon kú mask: Integrating <fixed-case>Y</fixed-case>orùbá cultural greetings into machine translation IdrisAkinadeUniversity of Ibadan - Jesujoba O.AlabiSaarland University + Jesujoba O.AlabiSaarland University David IfeoluwaAdelaniUniversity College London ClementOdojeUniversity of Ibadan DietrichKlakowSaarland University @@ -73,7 +73,7 @@ Hate Speech Classifiers are Culturally Insensitive NayeonLeeKaist ChaniJungSchool of Computing, KAIST - AliceOhKaist + AliceOhKaist 35-46 Increasingly, language models and machine translation are becoming valuable tools to help people communicate with others from diverse cultural backgrounds. However, current language models lack cultural awareness because they are trained on data representing only the culture within the dataset. This presents a problem in the context of hate speech classification, where cultural awareness is especially critical. This study aims to quantify the cultural insensitivity of three monolingual (Korean, English, Arabic) hate speech classifiers by evaluating their performance on translated datasets from the other two languages. Our research has revealed that hate speech classifiers evaluated on datasets from other cultures yield significantly lower F1 scores, up to almost 50%. In addition, they produce considerably higher false negative rates, with a magnitude up to five times greater, demonstrating the extent of the cultural gap. The study highlights the severity of cultural insensitivity of language models in hate speech classification. 2023.c3nlp-1.5 @@ -85,7 +85,7 @@ <fixed-case>MMT</fixed-case>: A Multilingual and Multi-Topic <fixed-case>I</fixed-case>ndian Social Media Dataset DwipDalalIndian Institute Of Technology Gandhinagar VivekSrivastavaTCS research - MayankSinghIIT Gandhinagar + MayankSinghIIT Gandhinagar 47-52 Social media plays a significant role in cross-cultural communication. A vast amount of this occurs in code-mixed and multilingual form, posing a significant challenge to Natural Language Processing (NLP) tools for processing such information, like language identification, topic modeling, and named-entity recognition. To address this, we introduce a large-scale multilingual and multi-topic dataset MMT collected from Twitter (1.7 million Tweets), encompassing 13 coarse-grained and 63 fine-grained topics in the Indian context. We further annotate a subset of 5,346 tweets from the MMT dataset with various Indian languages and their code-mixed counterparts. Also, we demonstrate that the currently existing tools fail to capture the linguistic diversity in MMT on two downstream tasks, i.e., topic modeling and language identification. To facilitate future research, we will make the anonymized and annotated dataset available in the public domain. 2023.c3nlp-1.6 @@ -137,7 +137,7 @@ Bias assessment for experts in discrimination, not in computer science - LauraAlonso AlemanyUniversidad Nacional de Cordoba + LauraAlonso AlemanyUniversidad Nacional de Cordoba LucianaBenottiUniversidad Nacional de Cordoba HernánMainaFacultad de Matemática, Astronomía, Física y Computación - Universidad Nacional de Córdoba - CONICET LucíaGonzalezFacultad de Matemática, Astronomía, Física y Computación - Universidad Nacional de Córdoba diff --git a/data/xml/2023.calcs.xml b/data/xml/2023.calcs.xml index b961388c43..09f21e1bfd 100644 --- a/data/xml/2023.calcs.xml +++ b/data/xml/2023.calcs.xml @@ -3,11 +3,11 @@ Proceedings of the 6th Workshop on Computational Approaches to Linguistic Code-Switching - GentaWinata + GentaWinata SudiptaKar MarinaZhukova ThamarSolorio - MonaDiab + MonaDiab SunayanaSitaram MonojitChoudhury KalikaBali @@ -113,8 +113,8 @@ Multilingual self-supervised speech representations improve the speech recognition of low-resource <fixed-case>A</fixed-case>frican languages with codeswitching TolulopeOgunremi - ChristopherManning - DanJurafsky + ChristopherManning + DanJurafsky 83-88 While many speakers of low-resource languages regularly code-switch between their languages and other regional languages or English, datasets of codeswitched speech are too small to train bespoke acoustic models from scratch or do language model rescoring. Here we propose finetuning self-supervised speech representations such as wav2vec 2.0 XLSR to recognize code-switched data. We find that finetuning self-supervised multilingual representations and augmenting them with n-gram language models trained from transcripts reduces absolute word error rates by up to 20% compared to baselines of hybrid models trained from scratch on code-switched data. Our findings suggest that in circumstances with limited training data finetuning self-supervised representations is a better performing and viable solution. 2023.calcs-1.8 diff --git a/data/xml/2023.case.xml b/data/xml/2023.case.xml index d0157bece7..b16f8d6a64 100644 --- a/data/xml/2023.case.xml +++ b/data/xml/2023.case.xml @@ -3,8 +3,8 @@ Proceedings of the 6th Workshop on Challenges and Applications of Automated Extraction of Socio-political Events from Text - AliHürriyetoğlu - HristoTanev + AliHürriyetoğlu + HristoTanev VanniZavarella ReyyanYeniterzi ErdemYörük @@ -87,7 +87,7 @@ JesusArmenta-Segura César JesúsNúñez-Prado Grigori OlegovichSidorov - AlexanderGelbukh + AlexanderGelbukh Rodrigo FranciscoRomán-Godínez 53–59 Hate speech detection during times of war has become crucial in recent years, as evident with the recent Russo-Ukrainian war. In this paper, we present our submissions for both subtasks from the Multimodal Hate Speech Event Detec- tion contest at CASE 2023, RANLP 2023. We used pre-trained BERT models in both submis- sion, achieving a F1 score of 0.809 in subtask A, and F1 score of 0.567 in subtask B. In the first subtask, our result was not far from the first place, which led us to realize the lower impact of images in real-life memes about feel- ings, when compared with the impact of text. However, we observed a higher importance of images when targeting hateful feelings towards a specific entity. The source code to reproduce our results can be found at the github repository https://github.com/JesusASmx/OmeteotlAtCASE2023 @@ -239,7 +239,7 @@ VanniZavarella AliHurriyetoglu BertrandDe Longueville - LeonidaDella Rocca + LeonidaDella Rocca 160–166 The purpose of the shared task 2 at the Challenges and Applications of Automated Extraction of Socio-political Events from Text (CASE) 2023 workshop was to test the abilities of the participating models and systems to detect and geocode armed conflicts events in social media messages from Telegram channels reporting on the Russo Ukrainian war. The evaluation followed an approach which was introduced in CASE 2021 (Giorgi et al., 2021): For each system we consider the correlation of the spatio-temporal distribution of its detected events and the events identified for the same period in the ACLED (Armed Conflict Location and Event Data Project) database (Raleigh et al., 2010). We use ACLED for the ground truth, since it is a well established standard in the field of event extraction and political trend analysis, which relies on human annotators for the encoding of security events using a fine grained taxonomy. Two systems participated in this shared task, we report in this paper on both the shared task and the participating systems. 2023.case-1.21 diff --git a/data/xml/2023.cawl.xml b/data/xml/2023.cawl.xml index c7930115dc..f38c0025aa 100644 --- a/data/xml/2023.cawl.xml +++ b/data/xml/2023.cawl.xml @@ -4,7 +4,7 @@ Proceedings of the Workshop on Computation and Written Language (CAWL 2023) KyleGorman - RichardSproat + RichardSproat BrianRoark Association for Computational Linguistics
Toronto, Canada
@@ -116,7 +116,7 @@
Decipherment of Lost Ancient Scripts as Combinatorial Optimisation Using Coupled Simulated Annealing - FabioTamburiniFICLIT - University of Bologna + FabioTamburiniFICLIT - University of Bologna 82-91 This paper presents a new approach to the ancient scripts decipherment problem based on combinatorial optimisation and coupled simulated annealing, an advanced non-convex optimisation procedure. Solutions are encoded by using k-permutations allowing for null, oneto-many, and many-to-one mappings between signs. The proposed system is able to produce enhanced results in cognate identification when compared to the state-of-the-art systems on standard evaluation benchmarks used in literature. 2023.cawl-1.10 diff --git a/data/xml/2023.ccl.xml b/data/xml/2023.ccl.xml index a98e3b6cc8..cfe3f0a626 100644 --- a/data/xml/2023.ccl.xml +++ b/data/xml/2023.ccl.xml @@ -21,7 +21,7 @@ SiyuanWang思远 ZhongyuWei忠钰 QinChen - XuanjingHuang萱菁 + XuanjingHuang萱菁 1–16 “本文提出了一种基于多跳推理链的对抗攻击方法,通过向输入文本中加入对抗性的攻击文本,并测试问答模型在干扰数据下生成答案的准确性,以检测问答模型真正执行多跳推理的能力和可解释性。该方法首先从输入文本中抽取从问题实体到答案实体的推理链,并基于推理链的特征把多跳问题分为了不同的推理类型,提出了一个模型来自动化实现问题拆解和推理类型预测,然后根据推理类型对原问题进行修改来构造攻击干扰句。实验对多个多跳问答模型进行了对抗攻击测试,所有模型的性能都显著下降,验证了该攻击方法的有效性以及目前问答模型存在的不足;向原训练集中加入对抗样本进行增强训练后,模型性能均有所回升,证明了本对抗增强训练方法可以提升模型的鲁棒性。” 2023.ccl-1.1 @@ -269,7 +269,7 @@ JialiZuo家莉 AnquanJie安全 WenbinLuo文兵 - MingwenWang明文 + MingwenWang明文 229–240 “古籍命名实体识别对于古籍实体知识库与语料库的建设具有显著的现实意义。目前古籍命名实体识别的研究较少,主要原因是缺乏足够的训练语料。本文从《资治通鉴》入手,人工构建了一份古籍命名实体识别数据集,以此展开对古籍命名实体识别任务的研究。针对古籍文本多以单字表意且存在大量省略的语言特点,本文采用预训练词向量作为词典信息,充分利用其中蕴涵的词汇信息。实验表明,这种方法可以有效处理古籍文本中人名实体识别的问题。” 2023.ccl-1.21 @@ -282,7 +282,7 @@ JialiZuo家莉 XueqiangCeng雪强 ZhongyingWan中英 - MingwenWang明文 + MingwenWang明文 241–252 “实体关系抽取是信息抽取领域中一项重要任务,目前实体关系抽取任务主要聚焦于英文和现代汉语领域,关于古汉语领域的数据集构建和方法的研究目前却较少。针对这一问题,本文在研究了开源的《资治通鉴》语料后,人工构建了一个古汉语实体关系数据集,并设计了一种结合全局对应矩阵和相对位置信息的实体关系联合抽取方法。最后通过在本文构建的数据集上进行实验,证明了该方法在古汉语实体关系抽取任务上的有效性。” 2023.ccl-1.22 @@ -292,7 +292,7 @@ 数字人文视域下的青藏高原文旅知识图谱构建研究——以塔尔寺为例(Research on the Construction of Cultural and Tourism Knowledge Atlas on the Qinghai-Tibet Plateau from the Perspective of Digital <fixed-case>H</fixed-case>umanity——<fixed-case>A</fixed-case> case study of Kumbum Monastery) XinhaoLi鑫豪 - WeinaZhao维纳 + WeinaZhao维纳 WanyiZhao婉亦 ChaoqunLi超群 253–263 @@ -331,7 +331,7 @@ 中国社会道德变化模型与发展动因探究——基于70年《人民日报》的计量与分析 (The Model of Moral Change and Motivation in <fixed-case>C</fixed-case>hinese Society ——<fixed-case>T</fixed-case>he Vocabulary Analysis of the 70-year ”People’s Daily”) HongruiWang弘睿 DongYu - PengyuanLiu鹏远 + PengyuanLiu鹏远 LiyingCeng立英 289–299 “社会道德的历时变迁研究具有重要意义。通过观察语言使用与道德变迁的历时联系,能够帮助描绘社会道德的变化趋势和发展规律、把握社会道德动态、推进道德建设。目前缺少从词汇角度、利用计算手段对大规模历时语料进行系统、全面的社会道德变迁研究。基于此,该文提出道德主题词历时计量模型,通过计量指标对1946-2015共70年的《人民日报》语料进行了历时计算与分析,观察了70年社会道德主题词的使用选择与变化。研究结果发现,道德词汇的历时使用与社会道德之间存在互动关系,反映出70年中国社会道德的历时变革与发展情况。” @@ -343,7 +343,7 @@ 动词视角下的汉语性别表征研究——基于多语体语料库与依存分析(Gendered Representation in <fixed-case>C</fixed-case>hinese via Verbal Analysis —<fixed-case>B</fixed-case>ased on a Multi-register Corpus and Dependency Parsing) YingshiChen颖诗 DongYu - PengyuanLiu鹏远 + PengyuanLiu鹏远 301–314 “动作是反映性别社会化的重要形式,研究汉语中动词的性别表征,可以找到语言构建不同性别身份的路径,即所采用的方式、形式。本文以依存句法关系为抓手,在四种语体的语料中抽取出和不同性别词构成依存结构的动词,统计出有显著性别差异的动词,并根据性别词充当的句子成分,结合语义进行了定量和定性分析。总体来看,大部分汉语动词表征是中性的,能体现性别的动词是少数,汉语作为一种承载着中华智慧且具有深厚文化底蕴的语言,对性别的表征是中立且平等的,这也体现出了我国的性别平等观念。而在表征性别的动词中,能看到构建男性和女性身份的两种不同路径。显著表征女性的动词在不同语体的语料中均多于显著表征男性的,但是表征男性的动词的语义分布则更为均衡,体现了“男性默认-女性专门”。在司法动词上,女性常常作为暴力行为的受害者,同时施害者男性却隐身了,体现了筜男性主宰笭女性顺从笢。不同语体的动词在构建性别时体现了不同的功能,新闻塑造了较为传统的性别规范,传统和网络文学以不同的形式打破了固有的性别规范。” 2023.ccl-1.27 @@ -412,7 +412,7 @@ SiyiTang思怡 ShikeWang诗可 DongYu - PengyuanLiu鹏远 + PengyuanLiu鹏远 364–376 “现有的文本分级阅读研究往往从文本可读性的角度出发,以离散的文本难度等级的形式为读者推荐阅读书目。目前,仍缺少一种研究读者在阅读过程中产生的多方面、深层次阅读体验的体系结构。对此,我们调研了读者在阅读中文篇章过程中产生的不同阅读体验,提出了中文篇章多维度阅读体验的量化体系。我们将阅读过程中呈现的连续性的阅读体验归纳为多种类别,并在此基础上构建了中文篇章多维度阅读体验数据集。同时,我们探究了以大规模语言模型为基础的ChatGPT对阅读体验的量化能力,发现其虽具备强大的信息抽取和语义理解能力,在阅读体验的量化上却表现不佳。但我们发现大规模语言模型所蕴含的能力能够以知识蒸馏的方式协助深层属性的量化,基于此,我们实现了大规模语言模型增强的中文篇章多维阅读体验量化模型。模型在各维度阅读体验上的平均F1值达到0.72,高于ChatGPT的Fewshot结果0.48。” 2023.ccl-1.32 @@ -426,7 +426,7 @@ WenqiDing文琪 YumengFu雨濛 LiliShan丽莉 - BingquanLiu秉权 + BingquanLiu秉权 377–387 “推特机器人检测任务的目标是判断一个推特账号是真人账号还是自动化机器人账号。随着自动化账号拟人算法的快速迭代,检测最新类别的自动化账号变得越来越困难。最近,预训练语言模型在自然语言生成任务和其他任务上表现出了出色的水平,当这些预训练语言模型被用于推特文本自动生成时,会为推特机器人检测任务带来很大挑战。本文研究发现,困惑度偏低和相似度偏高的现象始终出现在不同时代自动化账号的历史推文中,且此现象不受预训练语言模型的影响。针对这些发现,本文提出了一种抽取历史推文困惑度特征和相似度特征的方法,并设计了一种特征融合策略,以更好地将这些新特征应用于已有的算法模型。本文方法在选定数据集上的性能超越了已有的基准方法,并在人民网主办、传播内容认知全国重点实验室承办的社交机器人识别大赛上取得了冠军。” 2023.ccl-1.33 @@ -1384,7 +1384,7 @@ <fixed-case>CCL</fixed-case>23-Eval 任务6总结报告:电信网络诈骗案件分类(Overview of <fixed-case>CCL</fixed-case>23-Eval Task 6: Telecom Network Fraud Case Classification) - ChengjieSun承杰 + ChengjieSun承杰 JieJi BoyueShang伯乐 BinguanLiu秉权 @@ -1465,7 +1465,7 @@ <fixed-case>CCL</fixed-case>23-Eval 任务7总结报告: 汉语学习者文本纠错(Overview of <fixed-case>CCL</fixed-case>23-Eval Task: <fixed-case>C</fixed-case>hinese Learner Text Correction) HongxiangChang - YangLiu + YangLiu MengXu YingyingWang CunliangKong diff --git a/data/xml/2023.cl.xml b/data/xml/2023.cl.xml index 99e222aec8..6c073d938b 100644 --- a/data/xml/2023.cl.xml +++ b/data/xml/2023.cl.xml @@ -31,7 +31,7 @@ DeborahFerreira MagdalenaWysocka DónalLanders - AndréFreitas + AndréFreitas 10.1162/coli_a_00462 Specialized transformers-based models (such as BioBERT and BioMegatron) are adapted for the biomedical domain based on publicly available biomedical corpora. As such, they have the potential to encode large-scale biological knowledge. We investigate the encoding and representation of biological knowledge in these models, and its potential utility to support inference in cancer precision medicine—namely, the interpretation of the clinical significance of genomic alterations. We compare the performance of different transformer baselines; we use probing to determine the consistency of encodings for distinct entities; and we use clustering methods to compare and contrast the internal properties of the embeddings for genes, variants, drugs, and diseases. We show that these models do indeed encode biological knowledge, although some of this is lost in fine-tuning for specific tasks. Finally, we analyze how the models behave with regard to biases and imbalances in the dataset. 73–115 @@ -53,7 +53,7 @@ Annotation Error Detection: Analyzing the Past and Present for a More Coherent Future Jan-ChristophKlie - BonnieWebber + BonnieWebber IrynaGurevych 10.1162/coli_a_00464 Annotated data is an essential ingredient in natural language processing for training and evaluating machine learning models. It is therefore very desirable for the annotations to be of high quality. Recent work, however, has shown that several popular datasets contain a surprising number of annotation errors or inconsistencies. To alleviate this issue, many methods for annotation error detection have been devised over the years. While researchers show that their approaches work well on their newly introduced datasets, they rarely compare their methods to previous work or on the same datasets. This raises strong concerns on methods’ general performance and makes it difficult to assess their strengths and weaknesses. We therefore reimplement 18 methods for detecting potential annotation errors and evaluate them on 9 English datasets for text classification as well as token and span labeling. In addition, we define a uniform evaluation setup including a new formalization of the annotation error detection task, evaluation protocol, and general best practices. To facilitate future research and reproducibility, we release our datasets and implementations in an easy-to-use and open source software package.1 @@ -67,8 +67,8 @@ Aikaterini-LidaKalouli HaiHu Alexander F.Webb - Lawrence S.Moss - Valeriade Paiva + Lawrence S.Moss + Valeriade Paiva 10.1162/coli_a_00465 Against the backdrop of the ever-improving Natural Language Inference (NLI) models, recent efforts have focused on the suitability of the current NLI datasets and on the feasibility of the NLI task as it is currently approached. Many of the recent studies have exposed the inherent human disagreements of the inference task and have proposed a shift from categorical labels to human subjective probability assessments, capturing human uncertainty. In this work, we show how neither the current task formulation nor the proposed uncertainty gradient are entirely suitable for solving the NLI challenges. Instead, we propose an ordered sense space annotation, which distinguishes between logical and common-sense inference. One end of the space captures non-sensical inferences, while the other end represents strictly logical scenarios. In the middle of the space, we find a continuum of common-sense, namely, the subjective and graded opinion of a “person on the street.” To arrive at the proposed annotation scheme, we perform a careful investigation of the SICK corpus and we create a taxonomy of annotation issues and guidelines. We re-annotate the corpus with the proposed annotation scheme, utilizing four symbolic inference systems, and then perform a thorough evaluation of the scheme by fine-tuning and testing commonly used pre-trained language models on the re-annotated SICK within various settings. We also pioneer a crowd annotation of a small portion of the MultiNLI corpus, showcasing that it is possible to adapt our scheme for annotation by non-experts on another NLI corpus. Our work shows the efficiency and benefits of the proposed mechanism and opens the way for a careful NLI task refinement. 199–243 @@ -146,7 +146,7 @@ Onception: Active Learning with Expert Advice for Real World Machine Translation VâniaMendonça RicardoRei - LuísaCoheur + LuísaCoheur AlbertoSardinha 10.1162/coli_a_00473 Active learning can play an important role in low-resource settings (i.e., where annotated data is scarce), by selecting which instances may be more worthy to annotate. Most active learning approaches for Machine Translation assume the existence of a pool of sentences in a source language, and rely on human annotators to provide translations or post-edits, which can still be costly. In this article, we apply active learning to a real-world human-in-the-loop scenario in which we assume that: (1) the source sentences may not be readily available, but instead arrive in a stream; (2) the automatic translations receive feedback in the form of a rating, instead of a correct/edited translation, since the human-in-the-loop might be a user looking for a translation, but not be able to provide one. To tackle the challenge of deciding whether each incoming pair source–translations is worthy to query for human feedback, we resort to a number of stream-based active learning query strategies. Moreover, because we do not know in advance which query strategy will be the most adequate for a certain language pair and set of Machine Translation models, we propose to dynamically combine multiple strategies using prediction with expert advice. Our experiments on different language pairs and feedback settings show that using active learning allows us to converge on the best Machine Translation systems with fewer human interactions. Furthermore, combining multiple strategies using prediction with expert advice outperforms several individual active learning strategies with even fewer interactions, particularly in partial feedback settings. @@ -158,7 +158,7 @@ Reflection of Demographic Background on Word Usage AparnaGarimella CarmenBanea - RadaMihalcea + RadaMihalcea 10.1162/coli_a_00475 The availability of personal writings in electronic format provides researchers in the fields of linguistics, psychology, and computational linguistics with an unprecedented chance to study, on a large scale, the relationship between language use and the demographic background of writers, allowing us to better understand people across different demographics. In this article, we analyze the relation between language and demographics by developing cross-demographic word models to identify words with usage bias, or words that are used in significantly different ways by speakers of different demographics. Focusing on three demographic categories, namely, location, gender, and industry, we identify words with significant usage differences in each category and investigate various approaches of encoding a word’s usage, allowing us to identify language aspects that contribute to the differences. Our word models using topic-based features achieve at least 20% improvement in accuracy over the baseline for all demographic categories, even for scenarios with classification into 15 categories, illustrating the usefulness of topic-based features in identifying word usage differences. Further, we note that for location and industry, topics extracted from immediate context are the best predictors of word usages, hinting at the importance of word meaning and its grammatical function for these demographics, while for gender, topics obtained from longer contexts are better predictors for word usage. 373–394 @@ -170,7 +170,7 @@ JiehangZeng JianhanXu XiaoqingZheng - XuanjingHuang + XuanjingHuang 10.1162/coli_a_00476 Very recently, few certified defense methods have been developed to provably guarantee the robustness of a text classifier to adversarial synonym substitutions. However, all the existing certified defense methods assume that the defenders have been informed of how the adversaries generate synonyms, which is not a realistic scenario. In this study, we propose a certifiably robust defense method by randomly masking a certain proportion of the words in an input text, in which the above unrealistic assumption is no longer necessary. The proposed method can defend against not only word substitution-based attacks, but also character-level perturbations. We can certify the classifications of over 50% of texts to be robust to any perturbation of five words on AGNEWS, and two words on SST2 dataset. The experimental results show that our randomized smoothing method significantly outperforms recently proposed defense methods across multiple datasets under different attack algorithms. 395–427 @@ -224,10 +224,10 @@ Neural Data-to-Text Generation Based on Small Datasets: Comparing the Added Value of Two Semi-Supervised Learning Approaches on Top of a Large Language Model Chrisvan der Lee - ThiagoCastro Ferreira + ThiagoCastro Ferreira ChrisEmmery Travis J.Wiltshire - EmielKrahmer + EmielKrahmer 10.1162/coli_a_00484 This study discusses the effect of semi-supervised learning in combination with pretrained language models for data-to-text generation. It is not known whether semi-supervised learning is still helpful when a large-scale language model is also supplemented. This study aims to answer this question by comparing a data-to-text system only supplemented with a language model, to two data-to-text systems that are additionally enriched by a data augmentation or a pseudo-labeling semi-supervised learning approach. Results show that semi-supervised learning results in higher scores on diversity metrics. In terms of output quality, extending the training set of a data-to-text system with a language model using the pseudo-labeling approach did increase text quality scores, but the data augmentation approach yielded similar scores to the system without training set extension. These results indicate that semi-supervised learning approaches can bolster output quality and diversity, even when a language model is also present. 555–611 @@ -254,7 +254,7 @@ Muhammad RezaQorib HannanCao Hwee TouNg - TedBriscoe + TedBriscoe 10.1162/coli_a_00478 Grammatical Error Correction (GEC) is the task of automatically detecting and correcting errors in text. The task not only includes the correction of grammatical errors, such as missing prepositions and mismatched subject–verb agreement, but also orthographic and semantic errors, such as misspellings and word choice errors, respectively. The field has seen significant progress in the last decade, motivated in part by a series of five shared tasks, which drove the development of rule-based methods, statistical classifiers, statistical machine translation, and finally neural machine translation systems, which represent the current dominant state of the art. In this survey paper, we condense the field into a single article and first outline some of the linguistic challenges of the task, introduce the most popular datasets that are available to researchers (for both English and other languages), and summarize the various methods and techniques that have been developed with a particular focus on artificial error generation. We next describe the many different approaches to evaluation as well as concerns surrounding metric reliability, especially in relation to subjective human judgments, before concluding with an overview of recent progress and suggestions for future work and remaining challenges. We hope that this survey will serve as a comprehensive resource for researchers who are new to the field or who want to be kept apprised of recent developments. 643–701 @@ -268,7 +268,7 @@ JohnPavlopoulos VanessaStefanak AndrewSenior - ChrisDyer + ChrisDyer JohnBodel JonathanPrag IonAndroutsopoulos @@ -281,7 +281,7 @@ Dimensions of Explanatory Value in <fixed-case>NLP</fixed-case> Models - Keesvan Deemter + Keesvan Deemter 10.1162/coli_a_00480 Performance on a dataset is often regarded as the key criterion for assessing NLP models. I argue for a broader perspective, which emphasizes scientific explanation. I draw on a long tradition in the philosophy of science, and on the Bayesian approach to assessing scientific theories, to argue for a plurality of criteria for assessing NLP models. To illustrate these ideas, I compare some recent models of language production with each other. I conclude by asking what it would mean for institutional policies if the NLP community took these ideas onboard. 749–761 @@ -298,9 +298,9 @@ Obituary: <fixed-case>Y</fixed-case>orick <fixed-case>W</fixed-case>ilks - JohnTait - RobertGaizauskas - KalinaBontcheva + JohnTait + RobertGaizauskas + KalinaBontcheva 10.1162/coli_a_00485 767–772 2023.cl-3.8 @@ -333,10 +333,10 @@ VitalyNikolaev MatthewLamm LoraAroyo - MichaelCollins + MichaelCollins DipanjanDas SlavPetrov - Gaurav SinghTomar + Gaurav SinghTomar IuliaTurc DavidReitter 10.1162/coli_a_00486 @@ -373,7 +373,7 @@ Languages Through the Looking Glass of <fixed-case>BPE</fixed-case> Compression XimenaGutierrez-Vasques ChristianBentz - TanjaSamardžić + TanjaSamardžić 10.1162/coli_a_00489 Byte-pair encoding (BPE) is widely used in NLP for performing subword tokenization. It uncovers redundant patterns for compressing the data, and hence alleviates the sparsity problem in downstream applications. Subwords discovered during the first merge operations tend to have the most substantial impact on the compression of texts. However, the structural underpinnings of this effect have not been analyzed cross-linguistically. We conduct in-depth analyses across 47 typologically diverse languages and three parallel corpora, and thereby show that the types of recurrent patterns that have the strongest impact on compression are an indicator of morphological typology. For languages with richer inflectional morphology there is a preference for highly productive subwords on the early merges, while for languages with less inflectional morphology, idiosyncratic subwords are more prominent. Both types of patterns contribute to efficient compression. Counter to the common perception that BPE subwords are not linguistically relevant, we find patterns across languages that resemble those described in traditional typology. We thus propose a novel way to characterize languages according to their BPE subword properties, inspired by the notion of morphological productivity in linguistics. This allows us to have language vectors that encode typological knowledge induced from raw text. Our approach is easily applicable to a wider range of languages and texts, as it does not require annotated data or any external linguistic knowledge. We discuss its potential contributions to quantitative typology and multilingual NLP. 943–1001 diff --git a/data/xml/2023.clasp.xml b/data/xml/2023.clasp.xml index e971cc250f..ad9c2eea74 100644 --- a/data/xml/2023.clasp.xml +++ b/data/xml/2023.clasp.xml @@ -23,7 +23,7 @@ Improving Few-Shot Learning with Multilingual Transfer and <fixed-case>M</fixed-case>onte <fixed-case>C</fixed-case>arlo Training Set Selection AntonisMaronikolakis PaulO’Grady - HinrichSchütze + HinrichSchütze MattiLyra 1–10 In industry settings, machine learning is an attractive tool to automatize processes. Unfortunately, annotated and high-quality data is expensive to source. This problem is exacerbated in settings spanning multiple markets and languages. Thus, developing solutions for multilingual tasks with little available data is challenging. Few-shot learning is a compelling approach when building solutions in multilingual and low-resource settings, since the method not only requires just a few training examples to achieve high performance, but is also a technique agnostic to language. Even though the technique can be applied to multilingual settings, optimizing performance is an open question. In our work we show that leveraging higher-resource, task-specific language data can boost overall performance and we propose a method to select training examples per their average performance in a Monte Carlo simulation, resulting in a training set more conducive to learning. We demonstrate the effectiveness of our methods in fashion text reviews moderation, classifying reviews as related or unrelated to the given product. We show that our methodology boosts performance in multilingual (English, French, German) settings, increasing F1 score and significantly decreasing false positives. @@ -44,7 +44,7 @@ Entrenchment Matters: Investigating Positional and Constructional Sensitivity in Small and Large Language Models BastianBunzeck - SinaZarrieß + SinaZarrieß 25–37 The success of large language models (LMs) has also prompted a push towards smaller models, but the differences in functionality and encodings between these two types of models are not yet well understood. In this paper, we employ a perturbed masking approach to investigate differences in token influence patterns on the sequence embeddings of larger and smaller RoBERTa models. Specifically, we explore how token properties like position, length or part of speech influence their sequence embeddings. We find that there is a general tendency for sequence-final tokens to exert a higher influence. Among part-of-speech tags, nouns, numerals and punctuation marks are the most influential, with smaller deviations for individual models. These findings also align with usage-based linguistic evidence on the effect of entrenchment. Finally, we show that the relationship between data size and model size influences the variability and brittleness of these effects, hinting towards a need for holistically balanced models. 2023.clasp-1.3 @@ -142,7 +142,7 @@ Geometry-Aware Supertagging with Heterogeneous Dynamic Convolutions KonstantinosKogkalidis - MichaelMoortgat + MichaelMoortgat 107–119 The syntactic categories of categorial grammar formalisms are structured units made of smaller, indivisible primitives, bound together by the underlying grammar’s category formation rules. In the trending approach of constructive supertagging, neural models are increasingly made aware of the internal category structure. In turn, this enables them to more reliably predict rare and out-of-vocabulary categories. with significant implications for grammars previously deemed too complex to find practical use. In this work, we revisit constructive supertagging from a graph-theoretic perspective, and propose a framework based on heterogeneous dynamic graph convolutions, aimed at exploiting the distinctive structure of a supertagger’s output space. We test our approach on a number of categorial grammar datasets spanning different languages and grammar formalisms, achieving substantial improvements over previous state of the art scores. 2023.clasp-1.13 @@ -198,7 +198,7 @@ GeorgiosTziafas KonstantinosKogkalidis GijsWijnholds - MichaelMoortgat + MichaelMoortgat 176–184 Bidirectional masked Transformers have become the core theme in the current NLP landscape. Despite their impressive benchmarks, a recurring theme in recent research has been to question such models’ capacity for syntactic generalization. In this work, we seek to address this question by adding a supervised, token-level supertagging objective to standard unsupervised pretraining, enabling the explicit incorporation of syntactic biases into the network’s training dynamics. Our approach is straightforward to implement, induces a marginal computational overhead and is general enough to adapt to a variety of settings. We apply our methodology on Lassy Large, an automatically annotated corpus of written Dutch. Our experiments suggest that our syntax-aware model performs on par with established baselines, despite Lassy Large being one order of magnitude smaller than commonly used corpora. 2023.clasp-1.18 @@ -220,7 +220,7 @@ On the role of resources in the age of large language models SimonDobnik - JohnKelleher + JohnKelleher 191–197 We evaluate the role of expert-based domain knowledge and resources in relation to training large language models by referring to our work on training and evaluating neural models, also in under-resourced scenarios which we believe also informs training models for “well-resourced” languages and domains. We argue that our community needs both large-scale datasets and small but high-quality data based on expert knowledge and that both activities should work hand-in-hand. 2023.clasp-1.20 diff --git a/data/xml/2023.clicit.xml b/data/xml/2023.clicit.xml index bcc6fd9df2..a126550dbb 100644 --- a/data/xml/2023.clicit.xml +++ b/data/xml/2023.clicit.xml @@ -4,8 +4,8 @@ Proceedings of the 9th Italian Conference on Computational Linguistics (CLiC-it 2023) FedericoBoschetti - Gianluca E.Lebani - BernardoMagnini + Gianluca E.Lebani + BernardoMagnini NicoleNovielli CEUR Workshop Proceedings
Venice, Italy
@@ -31,7 +31,7 @@
When the Lab of <fixed-case>C</fixed-case>omp<fixed-case>L</fixed-case>ing Was Started at the University of Venice - Preface to the Proceedings of the First Workshop Held in 1982 - RodolfoDelmonte + RodolfoDelmonte 4-9 2023.clicit-1.2 delmonte-2023-lab @@ -43,7 +43,7 @@ DavideVenditti LeonardoRanaldi CristinaGiannone - Fabio MassimoZanzotto + Fabio MassimoZanzotto AndreaFavalli RanieroRomagnoli 10-16 @@ -114,7 +114,7 @@ Pier FeliceBalestrucci LucaAnselma CristianBernareggi - AlessandroMazzei + AlessandroMazzei 70-77 2023.clicit-1.10 balestrucci-etal-2023-building @@ -164,7 +164,7 @@ Testing <fixed-case>C</fixed-case>hat<fixed-case>GPT</fixed-case> for Stability and Reasoning: A Case Study Using <fixed-case>I</fixed-case>talian Medical Specialty Tests SilviaCasola TizianoLabruna - AlbertoLavelli + AlbertoLavelli BernardoMagnini 113-119 2023.clicit-1.15 @@ -213,7 +213,7 @@ Highway to Hell. Towards a <fixed-case>U</fixed-case>niversal <fixed-case>D</fixed-case>ependencies Treebank for Dante Alighieri’s Comedy ClaudiaCorbetta MarcoPassarotti - Flavio MassimilianoCecchini + Flavio MassimilianoCecchini GiovanniMoretti 154-161 2023.clicit-1.20 @@ -263,7 +263,7 @@ How To Build Competitive Multi-gender Speech Translation Models For Controlling Speaker Gender Translation MarcoGaido DennisFucci - MatteoNegri + MatteoNegri LuisaBentivogli 203-210 2023.clicit-1.26 @@ -301,7 +301,7 @@ End-to-end Dependency Parsing via Auto-regressive Large Language Model Claudiu DanielHromei DaniloCroce - RobertoBasili + RobertoBasili 236-242 2023.clicit-1.30 hromei-etal-2023-end @@ -329,7 +329,7 @@ Introducing Deep Learning with Data Augmentation and Corpus Construction for <fixed-case>LIS</fixed-case> ManuelaMarchisio - AlessandroMazzei + AlessandroMazzei DarioSammaruga 259-271 2023.clicit-1.33 @@ -375,8 +375,8 @@ Building Structured Synthetic Datasets: The Case of Blackbird Language Matrices (<fixed-case>BLM</fixed-case>s) PaolaMerlo GiuseppeSamo - ViviNastase - ChunyangJiang + ViviNastase + ChunyangJiang 292-302 2023.clicit-1.36 merlo-etal-2023-building @@ -411,7 +411,7 @@ Unraveling Text Coherence from the Human Perspective: a Novel Dataset for <fixed-case>I</fixed-case>talian FedericaPapa - LucaDini + LucaDini DominiqueBrunato FeliceDell’Orletta 334-341 @@ -431,7 +431,7 @@ Are All Languages Equal? Curriculum Learning over Different Languages GiuliaPucci LeonardoRanaldi - Fabio MassimoZanzotto + Fabio MassimoZanzotto 351-360 2023.clicit-1.42 pucci-etal-2023-languages @@ -445,7 +445,7 @@ CristinaGiannone AndreaFavalli RanieroRomagnoli - Fabio MassimoZanzotto + Fabio MassimoZanzotto 361-368 2023.clicit-1.43 ranaldi-etal-2023-prompting @@ -507,7 +507,7 @@ “That branch of the <fixed-case>L</fixed-case>ake of <fixed-case>C</fixed-case>omo...”: Developing a New Resource for the Analysis of <fixed-case>I</fixed-case> Promessi Sposi and its Historical Translations - RacheleSprugnoli + RacheleSprugnoli MarcoSartor 420-426 2023.clicit-1.50 @@ -598,7 +598,7 @@ PierluigiCassotti MarcoPolignano LuciaSiciliani - GiovanniSemeraro + GiovanniSemeraro 480-484 2023.clicit-1.59 basile-etal-2023-impact @@ -627,7 +627,7 @@ Alessandra TeresaCignarella SimonaFrenda MirkoLai - Marco AntonioStranisci + Marco AntonioStranisci AlessandraUrbinati 494-498 2023.clicit-1.62 @@ -697,7 +697,7 @@ Exploring Sentiments in Summarization: <fixed-case>S</fixed-case>enti<fixed-case>T</fixed-case>ext<fixed-case>R</fixed-case>ank, an Emotional Variant of <fixed-case>T</fixed-case>ext<fixed-case>R</fixed-case>ank Md. MuradHossain LucaAnselma - AlessandroMazzei + AlessandroMazzei 535-539 2023.clicit-1.70 hossain-etal-2023-exploring @@ -714,7 +714,7 @@ The Inherence of Telicity: Unveiling Temporal Reasoning in Video Question Answering OlgaLoginova - RaffaellaBernardi + RaffaellaBernardi 546-550 2023.clicit-1.72 loginova-bernardi-2023-inherence @@ -732,8 +732,8 @@ LeonardoRanaldi GiuliaPucci Elena SofiaRuzzetti - Fabio MassimoZanzotto - AndréFreitas + Fabio MassimoZanzotto + AndréFreitas 557-561 2023.clicit-1.74 ranaldi-etal-2023-teasing @@ -744,7 +744,7 @@ DarioOnorati LeonardoRanaldi DavideVenditti - Fabio MassimoZanzotto + Fabio MassimoZanzotto 562-569 2023.clicit-1.75 ruzzetti-etal-2023-investigating @@ -753,7 +753,7 @@ Towards a New Computational Lexicon for <fixed-case>I</fixed-case>talian: Building the Morphological Layer by Harmonizing and Merging Existing Resources FlaviaSciolette SimoneMarchi - EmilianoGiovannetti + EmilianoGiovannetti 570-574 2023.clicit-1.76 sciolette-etal-2023-towards diff --git a/data/xml/2023.clinicalnlp.xml b/data/xml/2023.clinicalnlp.xml index aa673f4203..0e2f146ec9 100644 --- a/data/xml/2023.clinicalnlp.xml +++ b/data/xml/2023.clinicalnlp.xml @@ -77,7 +77,7 @@ GlebErofeev IrinaSorokina SergeGladkoffLogrus Global AI Lab - GoranNenadicUniversity of Manchester + GoranNenadicUniversity of Manchester 31-40 Massively multilingual pre-trained language models (MMPLMs) are developed in recent years demonstrating superpowers and the pre-knowledge they acquire for downstream tasks. This work investigates whether MMPLMs can be applied to clinical domain machine translation (MT) towards entirely unseen languages via transfer learning. We carry out an experimental investigation using Meta-AI’s MMPLMs “wmt21-dense-24-wide-en-X and X-en (WMT21fb)” which were pre-trained on 7 language pairs and 14 translation directions including English to Czech, German, Hausa, Icelandic, Japanese, Russian, and Chinese, and the opposite direction. We fine-tune these MMPLMs towards English-Spanish language pair which did not exist at all in their original pre-trained corpora both implicitly and explicitly.We prepare carefully aligned clinical domain data for this fine-tuning, which is different from their original mixed domain knowledge.Our experimental result shows that the fine-tuning is very successful using just 250k well-aligned in-domain EN-ES segments for three sub-task translation testings: clinical cases, clinical terms, and ontology concepts. It achieves very close evaluation scores to another MMPLM NLLB from Meta-AI, which included Spanish as a high-resource setting in the pre-training.To the best of our knowledge, this is the first work on using MMPLMs towards clinical domain transfer-learning NMT successfully for totally unseen languages during pre-training. 2023.clinicalnlp-1.5 @@ -110,7 +110,7 @@ Navigating Data Scarcity: Pretraining for Medical Utterance Classification Do JuneMin VeronicaPerez-RosasUniversity of Michigan - Ann Arbor - RadaMihalceaUniversity of Michigan + RadaMihalceaUniversity of Michigan 59-68 Pretrained language models leverage self-supervised learning to use large amounts of unlabeled text for learning contextual representations of sequences. However, in the domain of medical conversations, the availability of large, public datasets is limited due to issues of privacy and data management. In this paper, we study the effectiveness of dialog-aware pretraining objectives and multiphase training in using unlabeled data to improve LMs training for medical utterance classification. The objectives of pretraining for dialog awareness involve tasks that take into account the structure of conversations, including features such as turn-taking and the roles of speakers. The multiphase training process uses unannotated data in a sequence that prioritizes similarities and connections between different domains. We empirically evaluate these methods on conversational dialog classification tasks in the medical and counseling domains, and find that multiphase training can help achieve higher performance than standard pretraining or finetuning. 2023.clinicalnlp-1.8 @@ -124,7 +124,7 @@ SimranjeetSingh JasmeetKaurIndraprastha Institute of Information Technology, Delhi PushpendraSingh - RajivShah + RajivShah 69-77 In developing countries like India, doctors and healthcare professionals working in public health spend significant time answering health queries that are fact-based and repetitive. Therefore, we propose an automated way to answer maternal and child health-related queries. A database of Frequently Asked Questions (FAQs) and their corresponding answers generated by experts is curated from rural health workers and young mothers. We develop a Hindi chatbot that identifies k relevant Question and Answer (QnA) pairs from the database in response to a healthcare query (q) written in Devnagri script or Hindi-English (Hinglish) code-mixed script. The curated database covers 80% of all the queries that a user of our study is likely to ask. We experimented with (i) rule-based methods, (ii) sentence embeddings, and (iii) a paraphrasing classifier, to calculate the q-Q similarity. We observed that paraphrasing classifier gives the best result when trained first on an open-domain text and then on the healthcare domain. Our chatbot uses an ensemble of all three approaches. We observed that if a given q can be answered using the database, then our chatbot can provide at least one relevant QnA pair among its top three suggestions for up to 70% of the queries. 2023.clinicalnlp-1.9 @@ -136,7 +136,7 @@ Multi-Task Training with In-Domain Language Models for Diagnostic Reasoning BrihatSharmaUniversity of Wisconsin - Madison YanjunGao - TimothyMillerHarvard University + TimothyMillerHarvard University MatthewChurpekUniversity of Wisconsin - Madison MajidAfsharUniversity of Wisconsin - Madison DmitriyDligachLoyola University Chicago @@ -199,7 +199,7 @@ Training Models on Oversampled Data and a Novel Multi-class Annotation Scheme for Dementia Detection NadineAbdelhalimUniversity of Manchester IngyAbdelhalimUniversity of Manchester - RizaBatista-NavarroUniversity of Manchester + RizaBatista-NavarroUniversity of Manchester 118-124 This work introduces a novel three-class annotation scheme for text-based dementia classification in patients, based on their recorded visit interactions. Multiple models were developed utilising BERT, RoBERTa and DistilBERT. Two approaches were employed to improve the representation of dementia samples: oversampling the underrepresented data points in the original Pitt dataset and combining the Pitt with the Holland and Kempler datasets. The DistilBERT models trained on either an oversampled Pitt dataset or the combined dataset performed best in classifying the dementia class. Specifically, the model trained on the oversampled Pitt dataset and the one trained on the combined dataset obtained state-of-the-art performance with 98.8% overall accuracy and 98.6% macro-averaged F1-score, respectively. The models’ outputs were manually inspected through saliency highlighting, using Local Interpretable Model-agnostic Explanations (LIME), to provide a better understanding of its predictions. 2023.clinicalnlp-1.15 @@ -212,7 +212,7 @@ MajidAfsharUniversity of Wisconsin - Madison DmitriyDligachLoyola University Chicago YanjunGao - TimothyMillerHarvard University + TimothyMillerHarvard University 125-130 Text in electronic health records is organized into sections, and classifying those sections into section categories is useful for downstream tasks. In this work, we attempt to improve the transferability of section classification models by combining the dataset-specific knowledge in supervised learning models with the world knowledge inside large language models (LLMs). Surprisingly, we find that zero-shot LLMs out-perform supervised BERT-based models applied to out-of-domain data. We also find that their strengths are synergistic, so that a simple ensemble technique leads to additional performance gains. 2023.clinicalnlp-1.16 @@ -392,7 +392,7 @@ XihuiLinMicrosoft YuwenSun ZihanQian - ZhengYuanAlibaba Group + ZhengYuanAlibaba Group TristanNaumannMicrosoft Research TianxiCaiHarvard T.H. Chan School of Public Health JunweiLuHarvard University @@ -495,8 +495,8 @@ Building blocks for complex tasks: Robust generative event extraction for radiology reports under domain shifts SitongZhou - MelihaYetisgenUniversity of Washington - MariOstendorfUniversity of Washington + MelihaYetisgenUniversity of Washington + MariOstendorfUniversity of Washington 344-357 This paper explores methods for extracting information from radiology reports that generalize across exam modalities to reduce requirements for annotated data. We demonstrate that multi-pass T5-based text-to-text generative models exhibit better generalization across exam modalities compared to approaches that employ BERT-based task-specific classification layers. We then develop methods that reduce the inference cost of the model, making large-scale corpus processing more feasible for clinical applications. Specifically, we introduce a generative technique that decomposes complex tasks into smaller subtask blocks, which improves a single-pass model when combined with multitask training. In addition, we leverage target-domain contexts during inference to enhance domain adaptation, enabling use of smaller models. Analyses offer insights into the benefits of different cost reduction strategies. 2023.clinicalnlp-1.38 @@ -539,8 +539,8 @@ BinHanUniversity of Washington KevinLybargerGeorge Mason University NicDobbins - OzlemUzunerGeorge Mason University - MelihaYetisgenUniversity of Washington + OzlemUzunerGeorge Mason University + MelihaYetisgenUniversity of Washington 385-393 Social determinants of health (SDOH) documented in the electronic health record through unstructured text are increasingly being studied to understand how SDOH impacts patient health outcomes. In this work, we utilize the Social History Annotation Corpus (SHAC), a multi-institutional corpus of de-identified social history sections annotated for SDOH, including substance use, employment, and living status information. We explore the automatic extraction of SDOH information with SHAC in both standoff and inline annotation formats using GPT-4 in a one-shot prompting setting. We compare GPT-4 extraction performance with a high-performing supervised approach and perform thorough error analyses. Our prompt-based GPT-4 method achieved an overall 0.652 F1 on the SHAC test set, similar to the 7th best-performing system among all teams in the n2c2 challenge with SHAC. 2023.clinicalnlp-1.41 @@ -554,9 +554,9 @@ Kyung MinChaeKonyang University YousangCho HyunbinSeoteddysum - KyungTaeLimSeoul National University of Science and Technology - Key-SunChoiKorea Advanced Institute of Science & Technology and Konyang University - YounggyunHahm + KyungTaeLimSeoul National University of Science and Technology + Key-SunChoiKorea Advanced Institute of Science & Technology and Konyang University + YounggyunHahm 394-402 In this paper, we introduce the design and various attempts for TaskB of MEDIQA-Chat 2023. The goal of TaskB in MEDIQA-Chat 2023 is to generate full clinical note from doctor-patient consultation dialogues. This task has several challenging issues, such as lack of training data, handling long dialogue inputs, and generating semi-structured clinical note which have section heads. To address these issues, we conducted various experiments and analyzed their results. We utilized the DialogLED model pre-trained on long dialogue data to handle long inputs, and we pre-trained on other dialogue datasets to address the lack of training data. We also attempted methods such as using prompts and contrastive learning for handling sections. This paper provides insights into clinical note generation through analyzing experimental methods and results, and it suggests future research directions. 2023.clinicalnlp-1.42 @@ -668,7 +668,7 @@ RaghavKapoor MedhaPalavalli AmandaBertschCarnegie Mellon University - MatthewGormleySchool of Computer Science, Carnegie Mellon University and 3M + MatthewGormleySchool of Computer Science, Carnegie Mellon University and 3M 490-502 Medical dialogue summarization is challenging due to the unstructured nature of medical conversations, the use of medical terminologyin gold summaries, and the need to identify key information across multiple symptom sets. We present a novel system for the Dialogue2Note Medical Summarization tasks in the MEDIQA 2023 Shared Task. Our approach for sectionwise summarization (Task A) is a two-stage process of selecting semantically similar dialogues and using the top-k similar dialogues as in-context examples for GPT-4. For full-note summarization (Task B), we use a similar solution with k=1. We achieved 3rd place in Task A (2nd among all teams), 4th place in Task B Division Wise Summarization (2nd among all teams), 15th place in Task A Section Header Classification (9th among all teams), and 8th place among all teams in Task B. Our results highlight the effectiveness of few-shot prompting for this task, though we also identify several weaknesses of prompting-based approaches. We compare GPT-4 performance with several finetuned baselines. We find that GPT-4 summaries are more abstractive and shorter. We make our code publicly available. 2023.clinicalnlp-1.51 @@ -681,7 +681,7 @@ Wen-waiYim GriffinAdams NealSnider - MelihaYetisgenUniversity of Washington + MelihaYetisgenUniversity of Washington 503-513 Automatic generation of clinical notes from doctor-patient conversations can play a key role in reducing daily doctors’ workload and improving their interactions with the patients. MEDIQA-Chat 2023 aims to advance and promote research on effective solutions through shared tasks on the automatic summarization of doctor-patient conversations and on the generation of synthetic dialogues from clinical notes for data augmentation. Seventeen teams participated in the challenge and experimented with a broad range of approaches and models. In this paper, we describe the three MEDIQA-Chat 2023 tasks, the datasets, and the participants’ results and methods. We hope that these shared tasks will lead to additional research efforts and insights on the automatic generation and evaluation of clinical notes. 2023.clinicalnlp-1.52 @@ -711,7 +711,7 @@ <fixed-case>C</fixed-case>are4<fixed-case>L</fixed-case>ang at <fixed-case>MEDIQA</fixed-case>-Chat 2023: Fine-tuning Language Models for Classifying and Summarizing Clinical Dialogues AmalAlqahtaniGeorge Washington University RanaSalamaGeorge Washington University - MonaDiabGeorge Washington University + MonaDiabGeorge Washington University AbdouYoussefGeorge Washington University 524-528 Summarizing medical conversations is one of the tasks proposed by MEDIQA-Chat to promote research on automatic clinical note generation from doctor-patient conversations. In this paper, we present our submission to this task using fine-tuned language models, including T5, BART and BioGPT models. The fine-tuned models are evaluated using ensemble metrics including ROUGE, BERTScore andBLEURT. Among the fine-tuned models, Flan-T5 achieved the highest aggregated score for dialogue summarization. diff --git a/data/xml/2023.codi.xml b/data/xml/2023.codi.xml index 992ebdcf37..cfb4361dfc 100644 --- a/data/xml/2023.codi.xml +++ b/data/xml/2023.codi.xml @@ -51,7 +51,7 @@ Ensemble Transfer Learning for Multilingual Coreference Resolution - TuanLaiUniversity of Illinois at Urbana-Champaign + TuanLaiUniversity of Illinois at Urbana-Champaign HengJiUniversity of Illinois at Urbana-Champaign and Amazon (Amazon Scholar) 24-36 Entity coreference resolution is an important research problem with many applications, including information extraction and question answering. Coreference resolution for English has been studied extensively. However, there is relatively little work for other languages. A problem that frequently occurs when working with a non-English language is the scarcity of annotated training data. To overcome this challenge, we design a simple but effective ensemble-based framework that combines various transfer learning (TL) techniques. We first train several models using different TL methods. Then, during inference, we compute the unweighted average scores of the models’ predictions to extract the final set of predicted clusters. Furthermore, we also propose a low-cost TL method that bootstraps coreference resolution models by utilizing Wikipedia anchor texts. Leveraging the idea that the coreferential links naturally exist between anchor texts pointing to the same article, our method builds a sizeable distantly-supervised dataset for the target language that consists of tens of thousands of documents. We can pre-train a model on the pseudo-labeled dataset before finetuning it on the final target dataset. Experimental results on two benchmark datasets, OntoNotes and SemEval, confirm the effectiveness of our methods. Our best ensembles consistently outperform the baseline approach of simple training by up to 7.68% in the F1 score. These ensembles also achieve new state-of-the-art results for three languages: Arabic, Dutch, and Spanish. @@ -73,8 +73,8 @@ Leveraging Structural Discourse Information for Event Coreference Resolution in <fixed-case>D</fixed-case>utch LoicDe LangheGhent University - OrpheeDe ClercqLT3, Ghent University - VeroniqueHosteLT3, Ghent University + OrpheeDe ClercqLT3, Ghent University + VeroniqueHosteLT3, Ghent University 48-53 2023.codi-1.5 de-langhe-etal-2023-leveraging @@ -165,7 +165,7 @@ Improving Long Context Document-Level Machine Translation ChristianHeroldRWTH Aachen University - HermannNeyRWTH Aachen University + HermannNeyRWTH Aachen University 112-125 Document-level context for neural machine translation (NMT) is crucial to improve the translation consistency and cohesion, the translation of ambiguous inputs, as well as several other linguistic phenomena. Many works have been published on the topic of document-level NMT, but most restrict the system to only local context, typically including just the one or two preceding sentences as additional information. This might be enough to resolve some ambiguous inputs, but it is probably not sufficient to capture some document-level information like the topic or style of a conversation. When increasing the context size beyond just the local context, there are two challenges: (i) the memory usage increases exponentially (ii) the translation performance starts to degrade. We argue that the widely-used attention mechanism is responsible for both issues. Therefore, we propose a constrained attention variant that focuses the attention on the most relevant parts of the sequence, while simultaneously reducing the memory consumption. For evaluation, we utilize targeted test sets in combination with novel evaluation techniques to analyze the translations in regards to specific discourse-related phenomena. We find that our approach is a good compromise between sentence-level NMT vs attending to the full context, especially in low resource scenarios. 2023.codi-1.15 @@ -197,7 +197,7 @@ The distribution of discourse relations within and across turns in spontaneous conversation S. MagalíLópez CortezUniversity at Buffalo - Cassandra L.JacobsUniversity at Buffalo + Cassandra L.JacobsUniversity at Buffalo 156-162 Time pressure and topic negotiation may impose constraints on how people leverage discourse relations (DRs) in spontaneous conversational contexts. In this work, we adapt a system of DRs for written language to spontaneous dialogue using crowdsourced annotations from novice annotators. We then test whether discourse relations are used differently across several types of multi-utterance contexts. We compare the patterns of DR annotation within and across speakers and within and across turns. Ultimately, we find that different discourse contexts produce distinct distributions of discourse relations, with single-turn annotations creating the most uncertainty for annotators. Additionally, we find that the discourse relation annotations are of sufficient quality to predict from embeddings of discourse units. 2023.codi-1.21 @@ -208,7 +208,7 @@ Embedding Mental Health Discourse for Community Recommendation HyDangUniversity of Notre Dame - BangNguyenUniversity of Notre Dame + BangNguyenUniversity of Notre Dame NoahZiemsUniversity of Notre Dame MengJiangUniversity of Notre Dame 163-172 diff --git a/data/xml/2023.computel.xml b/data/xml/2023.computel.xml index c49b3010b8..c825a6535d 100644 --- a/data/xml/2023.computel.xml +++ b/data/xml/2023.computel.xml @@ -6,7 +6,7 @@ AtticusHarrigan AditiChaudhary ShrutiRijhwani - SarahMoeller + SarahMoeller AnttiArppe AlexisPalmer RyanHenke @@ -32,7 +32,7 @@ WawanSahrozi BenFoley BradleyMcDonnell - DanJurafsky + DanJurafsky 1–6 2023.computel-1.1 san-etal-2023-leveraging @@ -46,7 +46,7 @@ Using <fixed-case>LARA</fixed-case> to rescue a legacy <fixed-case>P</fixed-case>itjantjatjara course - MannyRayner + MannyRayner SashaWilmoth 13–18 2023.computel-1.3 @@ -65,7 +65,7 @@ Towards a finite-state morphological analyser for San Mateo Huave - Francis M.Tyers + Francis M.Tyers Samuel HerreraCastro 30–37 2023.computel-1.5 @@ -73,7 +73,7 @@ Investigating Speaker Diarization of Endangered Language Data - Gina-AnneLevow + Gina-AnneLevow 38–43 2023.computel-1.6 levow-2023-investigating @@ -117,7 +117,7 @@ Studying the impact of language model size for low-resource <fixed-case>ASR</fixed-case> ZoeyLiu JustinSpence - EmilyPrud’hommeaux + EmilyPrud’hommeaux 77–83 2023.computel-1.11 liu-etal-2023-studying diff --git a/data/xml/2023.conll.xml b/data/xml/2023.conll.xml index cee9961180..0cf8075b64 100644 --- a/data/xml/2023.conll.xml +++ b/data/xml/2023.conll.xml @@ -81,7 +81,7 @@ Investigating the Nature of Disagreements on Mid-Scale Ratings: A Case Study on the Abstractness-Concreteness Continuum UrbanKnupleš DiegoFrassinelli - SabineSchulte im Walde + SabineSchulte im Walde 70–86 Humans tend to strongly agree on ratings on a scale for extreme cases (e.g., a CAT is judged as very concrete), but judgements on mid-scale words exhibit more disagreement. Yet, collected rating norms are heavily exploited across disciplines. Our study focuses on concreteness ratings and (i) implements correlations and supervised classification to identify salient multi-modal characteristics of mid-scale words, and (ii) applies a hard clustering to identify patterns of systematic disagreement across raters. Our results suggest to either fine-tune or filter mid-scale target words before utilising them. 2023.conll-1.6 @@ -126,7 +126,7 @@ A Minimal Approach for Natural Language Action Space in Text-based Games DongwonRyu MengFang - GholamrezaHaffari + GholamrezaHaffari ShiruiPan EhsanShareghi 138–154 @@ -138,7 +138,7 @@ Structural Ambiguity and its Disambiguation in Language Model Based Parsers: the Case of <fixed-case>D</fixed-case>utch Clause Relativization GijsWijnholds - MichaelMoortgat + MichaelMoortgat 155–164 This paper addresses structural ambiguity in Dutch relative clauses. By investigating the task of disambiguation by grounding, we study how the presence of a prior sentence can resolve relative clause ambiguities. We apply this method to two parsing architectures in an attempt to demystify the parsing and language model components of two present-day neural parsers. Results show that a neurosymbolic parser, based on proof nets, is more open to data bias correction than an approach based on universal dependencies, although both set-ups suffer from a comparable initial data bias. 2023.conll-1.11 @@ -151,7 +151,7 @@ AtifMahmud AbbasGhaddar MehdiRezagholizadeh - PhillippeLanglais + PhillippeLanglais PrasannaParthasarathi 165–182 Self-supervised Language Modelling (LM) objectives —like BERT masked LM— have become the default choice for pretraining language models. TOken Reordering (TOR) pretraining objectives, beyond token prediction, have not been extensively studied yet. In this work, we explore challenges that underlie the development and usefulness of such objectives on downstream language tasks. In particular, we design a novel TOR pretraining objective which predicts whether two tokens are adjacent or not given a partial bag-of-tokens input. In addition, we investigate the usefulness of Graph Isomorphism Network (GIN), when placed on top of the BERT encoder, in order to enhance the overall model ability to leverage topological signal from the encoded representations. We compare language understanding abilities of TOR to the one of MLM on word-order sensitive (e.g. Dependency Parsing) and insensitive (e.g. text classification) tasks in both full training and few-shot settings. Our results indicate that TOR is competitive to MLM on the GLUE language understanding benchmark, and slightly superior on syntax-dependent datasets, especially in the few-shot setting. @@ -369,7 +369,7 @@ YunkeHe XixianLiao JialingLiang - GemmaBoleda + GemmaBoleda 456–475 Different speakers often produce different names for the same object or entity (e.g., “woman” vs. “tourist” for a female tourist). The reasons behind variation in naming are not well understood. We create a Language and Vision dataset for Mandarin Chinese that provides an average of 20 names for 1319 naturalistic images, and investigate how familiarity with a given kind of object relates to the degree of naming variation it triggers across subjects. We propose that familiarity influences naming variation in two competing ways: increasing familiarity can either expand vocabulary, leading to higher variation, or promote convergence on conventional names, thereby reducing variation. We find evidence for both factors being at play. Our study illustrates how computational resources can be used to address research questions in Cognitive Science. 2023.conll-1.30 @@ -426,7 +426,7 @@ Exploring Transformers as Compact, Data-efficient Language Models ClaytonFields - CaseyKennington + CaseyKennington 521–531 Large scale transformer models, trained with massive datasets have become the standard in natural language processing. The huge size of most transformers make research with these models impossible for those with limited computational resources. Additionally, the enormous pretraining data requirements of transformers exclude pretraining them with many smaller datasets that might provide enlightening results. In this study, we show that transformers can be significantly reduced in size, with as few as 5.7 million parameters, and still retain most of their downstream capability. Further we show that transformer models can retain comparable results when trained on human-scale datasets, as few as 5 million words of pretraining data. Overall, the results of our study suggest transformers function well as compact, data efficient language models and that complex model compression methods, such as model distillation are not necessarily superior to pretraining reduced size transformer models from scratch. 2023.conll-1.35 @@ -448,7 +448,7 @@ KoyenaPal JiudingSun AndrewYuan - ByronWallace + ByronWallace DavidBau 548–560 We conjecture that hidden state vectors corresponding to individual input tokens encode information sufficient to accurately predict several tokens ahead. More concretely, in this paper we ask: Given a hidden (internal) representation of a single token at position t in an input, can we reliably anticipate the tokens that will appear at positions ≥ t + 2? To test this, we measure linear approximation and causal intervention methods in GPT-J-6B to evaluate the degree to which individual hidden states in the network contain signal rich enough to predict future hidden states and, ultimately, token outputs. We find that, at some layers, we can approximate a model’s output with more than 48% accuracy with respect to its prediction of subsequent tokens through a single hidden state. Finally we present a “Future Lens” visualization that uses these methods to create a new view of transformer states. @@ -538,7 +538,7 @@ <fixed-case>GPT</fixed-case>-wee: How Small Can a Small Language Model Really Get? BastianBunzeck - SinaZarrieß + SinaZarrieß 35-46 2023.conll-babylm.2 bunzeck-zarriess-2023-gpt @@ -550,7 +550,7 @@ OsamaNatoufBoise State University AndrewMcMainsBoise State University CatherineHenryBoise State University - CaseyKenningtonBoise State University + CaseyKenningtonBoise State University 47-57 2023.conll-babylm.3 fields-etal-2023-tiny @@ -726,7 +726,7 @@ Not all layers are equally as important: Every Layer Counts <fixed-case>BERT</fixed-case> - LucasGeorges Gabriel CharpentierUniversity of Oslo + LucasGeorges Gabriel CharpentierUniversity of Oslo DavidSamuelUniversity of Oslo 238-252 2023.conll-babylm.20 @@ -751,7 +751,7 @@ A surprisal oracle for active curriculum language modeling XudongHongSaarland University SharidLoáicigaUniversity of Gothenburg - AsadSayeedUniversity of Gothenburg + AsadSayeedUniversity of Gothenburg 259-268 2023.conll-babylm.22 hong-etal-2023-surprisal diff --git a/data/xml/2023.contents.xml b/data/xml/2023.contents.xml index 9ea03de7ec..150133da72 100644 --- a/data/xml/2023.contents.xml +++ b/data/xml/2023.contents.xml @@ -5,9 +5,9 @@ Proceedings of the Workshop on Computational Terminology in NLP and Translation Studies (ConTeNTS) Incorporating the 16th Workshop on Building and Using Comparable Corpora (BUCC) Amal HaddadHaddad Ayla RigoutsTerryn - RuslanMitkov + RuslanMitkov ReinhardRapp - PierreZweigenbaum + PierreZweigenbaum SergeSharoff INCOMA Ltd., Shoumen, Bulgaria
Varna, Bulgaria
diff --git a/data/xml/2023.cpss.xml b/data/xml/2023.cpss.xml index 8445fee16f..3fc539d991 100644 --- a/data/xml/2023.cpss.xml +++ b/data/xml/2023.cpss.xml @@ -7,7 +7,7 @@ GabriellaLapesa ValentinGold TheresaGessler - Simone PaoloPonzetto + Simone PaoloPonzetto Association for Computational Lingustics
Ingolstadt, Germany
September @@ -63,7 +63,7 @@ According to <fixed-case>BERT</fixed-case>opic, what do <fixed-case>D</fixed-case>anish Parties Debate on when they Address Energy and Environment? CostanzaNavarretta - Dorte H.Hansen + Dorte H.Hansen 59–68 2023.cpss-1.6 navarretta-hansen-2023-according diff --git a/data/xml/2023.crac.xml b/data/xml/2023.crac.xml index 9aa23cb6ba..0c8dc8c8e1 100644 --- a/data/xml/2023.crac.xml +++ b/data/xml/2023.crac.xml @@ -5,8 +5,8 @@ Proceedings of the Sixth Workshop on Computational Models of Reference, Anaphora and Coreference (CRAC 2023) MaciejOgrodniczuk VincentNg - SameerPradhan - MassimoPoesio + SameerPradhan + MassimoPoesio Association for Computational Linguistics
Singapore
December @@ -22,8 +22,8 @@ Filling in the Gaps: Efficient Event Coreference Resolution using Graph Autoencoder Networks LoicDe Langhe - OrpheeDe Clercq - VeroniqueHoste + OrpheeDe Clercq + VeroniqueHoste 1–7 2023.crac-main.1 de-langhe-etal-2023-filling @@ -45,7 +45,7 @@ Towards Transparency in Coreference Resolution: A Quantum-Inspired Approach HadiWazni - MehrnooshSadrzadeh + MehrnooshSadrzadeh 15–27 2023.crac-main.3 wazni-sadrzadeh-2023-towards @@ -55,7 +55,7 @@ Scalar Anaphora: Annotating Degrees of Coreference in Text BingyangYe JingxuanTu - JamesPustejovsky + JamesPustejovsky 28–38 2023.crac-main.4 ye-etal-2023-scalar @@ -119,7 +119,7 @@ Integrated Annotation of Event Structure, Object States, and Entity Coreference KyeongminRim - JamesPustejovsky + JamesPustejovsky 71–77 2023.crac-main.9 rim-pustejovsky-2023-integrated @@ -129,7 +129,7 @@ Proceedings of the CRAC 2023 Shared Task on Multilingual Coreference Resolution - ZdeněkŽabokrtský + ZdeněkŽabokrtský MaciejOgrodniczuk Association for Computational Linguistics
Singapore
@@ -153,7 +153,7 @@ MartinPopel OndrejPrazak JakubSido - DanielZeman + DanielZeman 1–18 This paper summarizes the second edition of the shared task on multilingual coreference resolution, held with the CRAC 2023 workshop. Just like last year, participants of the shared task were to create trainable systems that detect mentions and group them based on identity coreference; however, this year’s edition uses a slightly different primary evaluation score, and is also broader in terms of covered languages: version 1.1 of the multilingual collection of harmonized coreference resources CorefUD was used as the source of training and evaluation data this time, with 17 datasets for 12 languages. 7 systems competed in this shared task. 2023.crac-sharedtask.1 @@ -177,7 +177,7 @@ Neural End-to-End Coreference Resolution using Morphological Information TuğbaPamay Arslan KutayAcar - GülşenEryiğit + GülşenEryiğit 34–40 In morphologically rich languages, words consist of morphemes containing deeper information in morphology, and thus such languages may necessitate the use of morpheme-level representations as well as word representations. This study introduces a neural multilingual end-to-end coreference resolution system by incorporating morphological information in transformer-based word embeddings on the baseline model. This proposed model participated in the Sixth Workshop on Computational Models of Reference, Anaphora and Coreference (CRAC 2023). Including morphological information explicitly into the coreference resolution improves the performance, especially in morphologically rich languages (e.g., Catalan, Hungarian, and Turkish). The introduced model outperforms the baseline system by 2.57 percentage points on average by obtaining 59.53% CoNLL F-score. 2023.crac-sharedtask.3 @@ -197,7 +197,7 @@ <fixed-case>M</fixed-case>c<fixed-case>G</fixed-case>ill at <fixed-case>CRAC</fixed-case> 2023: Multilingual Generalization of Entity-Ranking Coreference Resolution Models IanPorada - Jackie Chi KitCheung + Jackie Chi KitCheung 52–57 Our submission to the CRAC 2023 shared task, described herein, is an adapted entity-ranking model jointly trained on all 17 datasets spanning 12 languages. Our model outperforms the shared task baselines by a difference in F1 score of +8.47, achieving an ultimate F1 score of 65.43 and fourth place in the shared task. We explore design decisions related to data preprocessing, the pretrained encoder, and data mixing. 2023.crac-sharedtask.5 diff --git a/data/xml/2023.crowdmt.xml b/data/xml/2023.crowdmt.xml index 39e574df96..25bca81f86 100644 --- a/data/xml/2023.crowdmt.xml +++ b/data/xml/2023.crowdmt.xml @@ -3,13 +3,13 @@ Proceedings of the 1st Workshop on Open Community-Driven Machine Translation - MiquelEsplà-Gomis - Mikel L.Forcada + MiquelEsplà-Gomis + Mikel L.Forcada TajaKuzman NikolaLjubešić Rikvan Noord - GemaRamírez-Sánchez - JörgTiedemann + GemaRamírez-Sánchez + JörgTiedemann AntonioToral European Association for Machine Translation
Tampere, Finland
diff --git a/data/xml/2023.cs4oa.xml b/data/xml/2023.cs4oa.xml index 1941cae0cd..9b5e7f8a03 100644 --- a/data/xml/2023.cs4oa.xml +++ b/data/xml/2023.cs4oa.xml @@ -48,7 +48,7 @@ Just Collect, Don’t Filter: Noisy Labels Do Not Improve Counterspeech Collection for Languages Without Annotated Resources PaulineMöhle MatthiasOrlikowski - PhilippCimiano + PhilippCimiano 44-61 Counterspeech on social media is rare. Consequently, it is difficult to collect naturally occurring examples, in particular for languages without annotated datasets. In this work, we study methods to increase the relevance of social media samples for counterspeech annotation when we lack annotated resources. We use the example of sourcing German data for counterspeech annotations from Twitter. We monitor tweets from German politicians and activists to collect replies. To select relevant replies we a) find replies that match German abusive keywords or b) label replies for counterspeech using a multilingual classifier fine-tuned on English data. For both approaches and a baseline setting, we annotate a random sample and use bootstrap sampling to estimate the amount of counterspeech. We find that neither the multilingual model nor the keyword approach achieve significantly higher counts of true counterspeech than the baseline. Thus, keyword lists or multi-lingual classifiers are likely not worth the added complexity beyond purposive data collection: Already without additional filtering, we gather a meaningful sample with 7,4% true counterspeech. 2023.cs4oa-1.4 diff --git a/data/xml/2023.cxgsnlp.xml b/data/xml/2023.cxgsnlp.xml index a49bba20d5..d8eeb1c24c 100644 --- a/data/xml/2023.cxgsnlp.xml +++ b/data/xml/2023.cxgsnlp.xml @@ -3,7 +3,7 @@ Proceedings of the First International Workshop on Construction Grammars and NLP (CxGs+NLP, GURT/SyntaxFest 2023) - ClaireBonial + ClaireBonial HarishTayyar Madabushi Association for Computational Linguistics
Washington, D.C.
@@ -37,7 +37,7 @@ <fixed-case>CAL</fixed-case>a<fixed-case>M</fixed-case>o: a Constructionist Assessment of Language Models LudovicaPannitto - AurélieHerbelot + AurélieHerbelot 21-30 This paper presents a novel framework for evaluating Neural Language Models’ linguistic abilities using a constructionist approach. Not only is the usage-based model in line with the un- derlying stochastic philosophy of neural architectures, but it also allows the linguist to keep meaning as a determinant factor in the analysis. We outline the framework and present two possible scenarios for its application. 2023.cxgsnlp-1.3 @@ -56,7 +56,7 @@ Constructivist Tokenization for <fixed-case>E</fixed-case>nglish AllisonFan - WeiweiSun + WeiweiSun 36-40 This paper revisits tokenization from a theoretical perspective, and argues for the necessity of a constructivist approach to tokenization for semantic parsing and modeling language acquisition. We consider two problems: (1) (semi-) automatically converting existing lexicalist annotations, e.g. those of the Penn TreeBank, into constructivist annotations, and (2) automatic tokenization of raw texts. We demonstrate that (1) a heuristic rule-based constructivist tokenizer is able to yield relatively satisfactory accuracy when gold standard Penn TreeBank part-of-speech tags are available, but that some manual annotations are still necessary to obtain gold standard results, and (2) a neural tokenizer is able to provide accurate automatic constructivist tokenization results from raw character sequences. Our research output also includes a set of high-quality morpheme-tokenized corpora, which enable the training of computational models that more closely align with language comprehension and acquisition. 2023.cxgsnlp-1.5 @@ -87,7 +87,7 @@ Investigating Stylistic Profiles for the Task of Empathy Classification in Medical Narrative Essays PriyankaDey - RoxanaGirju + RoxanaGirju 63-74 One important aspect of language is how speakers generate utterances and texts to convey their intended meanings. In this paper, we bring various aspects of the Construction Grammar (CxG) and the Systemic Functional Grammar (SFG) theories in a deep learning computational framework to model empathic language. Our corpus consists of 440 essays written by premed students as narrated simulated patient–doctor interactions. We start with baseline classifiers (state-of-the-art recurrent neural networks and transformer models). Then, we enrich these models with a set of linguistic constructions proving the importance of this novel approach to the task of empathy classification for this dataset. Our results indicate the potential of such constructions to contribute to the overall empathy profile of first-person narrative essays. 2023.cxgsnlp-1.8 @@ -111,8 +111,8 @@ TaiqiHe NaokiOtani DavidR. Mortensen - LoriLevin - HinrichSchütze + LoriLevin + HinrichSchütze 85-95 Construction Grammar (CxG) has recently been used as the basis for probing studies that have investigated the performance of large pretrained language models (PLMs) with respect to the structure and meaning of constructions. In this position paper, we make suggestions for the continuation and augmentation of this line of research. We look at probing methodology that was not designed with CxG in mind, as well as probing methodology that was designed for specific constructions. We analyse selected previous work in detail, and provide our view of the most important challenges and research questions that this promising new field faces. 2023.cxgsnlp-1.10 diff --git a/data/xml/2023.depling.xml b/data/xml/2023.depling.xml index 7036d3a3a9..60a1c348d3 100644 --- a/data/xml/2023.depling.xml +++ b/data/xml/2023.depling.xml @@ -3,7 +3,7 @@ Proceedings of the Seventh International Conference on Dependency Linguistics (Depling, GURT/SyntaxFest 2023) - OwenRambowStony Brook University + OwenRambowStony Brook University FrançoisLareauUniversité de Montréal Association for Computational Linguistics
Washington, D.C.
@@ -100,7 +100,7 @@ SimonMille JosepRicci AlexanderShvets - AnyaBelz + AnyaBelz 91-101 We present work in progress that aims to address the coverage issue faced by rule-based text generators. We propose a pipeline for extracting abstract dependency template (predicate-argument structures) from Wikipedia text to be used as input for generating text from structured data with the FORGe system. The pipeline comprises three main components: (i) candidate sentence retrieval, (ii) clause extraction, ranking and selection, and (iii) conversion to predicate-argument form. We present an approach and preliminary evaluation for the ranking and selection module. 2023.depling-1.9 diff --git a/data/xml/2023.dialdoc.xml b/data/xml/2023.dialdoc.xml index 1ebca44b30..9172f98612 100644 --- a/data/xml/2023.dialdoc.xml +++ b/data/xml/2023.dialdoc.xml @@ -59,10 +59,10 @@ Position Matters! Empirical Study of Order Effect in Knowledge-grounded Dialogue HsuanSu - ShachiH. KumarIntel Labs + ShachiH. KumarIntel Labs SahisnuMazumderIntel Labs, USA WendaChen - RameshManuvinakurike + RameshManuvinakurike EdaOkurIntel Labs SauravSahayIntel LamaNachman @@ -116,7 +116,7 @@ MichaelLucke SheenaDufresne MariaGiniUniversity of Minnesota , Twin Ciities - SergueiPakhomovUniversity of Minnesota - Twin Cities + SergueiPakhomovUniversity of Minnesota - Twin Cities 68-79 In healthcare, the ability to care for oneself is reflected in the “Activities of Daily Living (ADL),” which serve as a measure of functional ability (functioning). A lack of functioning may lead to poor living conditions requiring personal care and assistance. To accurately identify those in need of support, assistance programs continuously evaluate participants’ functioning across various domains. However, the assessment process may encounter consistency issues when multiple assessors with varying levels of expertise are involved. Novice assessors, in particular, may lack the necessary preparation for real-world interactions with participants. To address this issue, we developed a dialogue system that simulates interactions between assessors and individuals of varying functioning in a natural and reproducible way. The dialogue system consists of two major modules, one for natural language understanding (NLU) and one for natural language generation (NLG), respectively. In order to generate responses consistent with the underlying knowledge base, the dialogue system requires both an understanding of the user’s query and of biographical details of an individual being simulated. To fulfill this requirement, we experimented with query classification and generated responses based on those biographical details using some recently released InstructGPT-like models. 2023.dialdoc-1.8 @@ -130,7 +130,7 @@ QiZeng RevanthGangi Reddy HengJi - ChengXiangZhai + ChengXiangZhai 80-85 Existing reference-free turn-level evaluation metrics for chatbots inadequately capture the interaction between the user and the system. Consequently, they often correlate poorly with human evaluations. To address this issue, we propose a novel model-agnostic approach that leverages Conditional Pointwise Mutual Information (C-PMI) to measure the turn-level interaction between the system and the user based on a given evaluation dimension. Experimental results on the widely used FED dialogue evaluation dataset demonstrate that our approach significantly improves the correlation with human judgment compared with existing evaluation systems. By replacing the negative log-likelihood-based scorer with our proposed C-PMI scorer, we achieve a relative 60.5% higher Spearman correlation on average for the FED evaluation metric. Our code is publicly available at https://github.com/renll/C-PMI. 2023.dialdoc-1.9 @@ -144,8 +144,8 @@ WeiFangMassachusetts Institute of Technology HongyinLuoMassachusetts Institute of Technology XixinWuThe Chinese University of Hong Kong - HelenMeng - JamesGlass + HelenMeng + JamesGlass 86-100 Collecting and constructing human-annotated corpora for training conversational question-answering (CQA) models has recently been shown to be inefficient and costly. To solve this problem, previous works have proposed training QA models with automatically generated QA data. In this work, we extend earlier studies on QA synthesis, and propose an efficient QA data generation algorithm under conversational settings. Our model recognizes potential dialogue topics, generates corresponding questions, and extracts answers from grounding passages. To improve the quality of generated QAs and downstream self-training of CQA models, we propose dropout and agreement-based QA selection methods. We conduct experiments on both data augmentation and domain adaptation settings. Experiments on the QuAC and Doc2Dial tasks show that the proposed method can significantly improve the quality of generated QA data, and also improves the accuracy of self-trained CQA models based on the constructed training corpora. 2023.dialdoc-1.10 @@ -159,7 +159,7 @@ MitaliPotnis SrijanBansal TerukoMitamuraCarnegie Mellon University - EricNybergCarnegie Mellon University + EricNybergCarnegie Mellon University 101-108 The DialDoc 2023 shared task has expanded the document-grounded dialogue task to encompass multiple languages, despite having limited annotated data. This paper assesses the effectiveness of both language-agnostic and language-aware paradigms for multilingual pre-trained transformer models in a bi-encoder-based dense passage retriever (DPR), concluding that the language-agnostic approach is superior. Additionally, the study investigates the impact of query rewriting techniques using large language models, such as ChatGPT, on multilingual, document-grounded question-answering systems. The experiments conducted demonstrate that, for the examples examined, query rewriting does not enhance performance compared to the original queries. This failure is due to topic switching in final dialogue turns and irrelevant topics being considered for query rewriting. 2023.dialdoc-1.11 @@ -171,7 +171,7 @@ EhsanLotfiUniversiteit Antwerpen MaximeDe BruynAntwerp University Jeska.buhmann@uantwerpen.beJeska.buhmann@uantwerpen.beNA - WalterDaelemansUniversity of Antwerp + WalterDaelemansUniversity of Antwerp 109-121 Crowd-sourcing has been one of the primary ways to curate conversational data, specially for certain scenarios like grounding in knowledge. In this setting, using online platforms like AMT, non-expert participants are hired to converse with each other, following instructions which try to guide the outcome towards the desired format. The resulting data then is used for different parts of dialog modelling like knowledge selection and response selection/generation. In this work, we take a closer look into two of the most popular knowledge grounded dialog (KGD) datasets. Investigating potential biases and artefacts in knowledge selection labels, we observe that in many cases the ‘knowledge selection flow’ simply follows the order of presented knowledge pieces. In Wizard of Wikipedia (the most popular KGD dataset) we use simple content-agnostic models based on this bias to get significant knowledge selection performance. In Topical-Chat we see a similar correlation between the knowledge selection sequence and the order of entities and their segments, as provided to crowd-source workers. We believe that the observed results, question the significance and origin of the presumed dialog-level attributes like ‘knowledge flow’ in these crowd-sourced datasets. 2023.dialdoc-1.12 diff --git a/data/xml/2023.disrpt.xml b/data/xml/2023.disrpt.xml index d72d6098e9..f5411385c1 100644 --- a/data/xml/2023.disrpt.xml +++ b/data/xml/2023.disrpt.xml @@ -4,7 +4,7 @@ Proceedings of the 3rd Shared Task on Discourse Relation Parsing and Treebanking (DISRPT 2023) ChloéBraudIrit, Cnrs - Yang JanetLiuGeorgetown University + Yang JanetLiuGeorgetown University EleniMethenitiIRIT, University of Toulouse PhilippeMullerIRIT, University of Toulouse LauraRivièreIrit @@ -59,7 +59,7 @@ <fixed-case>HITS</fixed-case> at <fixed-case>DISRPT</fixed-case> 2023: Discourse Segmentation, Connective Detection, and Relation Classification - WeiLiuHeidelberg Institute for Theoretical Studies + WeiLiuHeidelberg Institute for Theoretical Studies YiFanHeidelberg Institute for Theoretical Studies MichaelStrubeHeidelberg Institute for Theoretical Studies 43-49 diff --git a/data/xml/2023.dmr.xml b/data/xml/2023.dmr.xml index f23c746c71..b204670ab8 100644 --- a/data/xml/2023.dmr.xml +++ b/data/xml/2023.dmr.xml @@ -50,10 +50,10 @@ <fixed-case>A</fixed-case>bstract <fixed-case>M</fixed-case>eaning <fixed-case>R</fixed-case>epresentation for Grounded Human-Robot Communication - ClaireBonial + ClaireBonial JulieForesta Nicholas C.Fung - Cory J.Hayes + Cory J.Hayes PhilipOsteen JacobArkin BennedHedegaard @@ -68,7 +68,7 @@ ChristopherTam RichardBrutti KennethLai - JamesPustejovsky + JamesPustejovsky 45–51 Actions are critical for interpreting dialogue: they provide context for demonstratives and definite descriptions in discourse, and they continually update the common ground. This paper describes how Abstract Meaning Representation (AMR) can be used to annotate actions in multimodal human-human and human-object interactions. We conduct initial annotations of shared task and first-person point-of-view videos. We show that AMRs can be interpreted by a proxy language, such as VoxML, as executable annotation structures in order to recreate and simulate a series of annotated events. 2023.dmr-1.5 @@ -118,12 +118,12 @@ <fixed-case>UMR</fixed-case> Annotation of Multiword Expressions JuliaBonn AndrewCowell - JanHajič + JanHajič AlexisPalmer - MarthaPalmer - JamesPustejovsky + MarthaPalmer + JamesPustejovsky HaiboSun - ZdenkaUresova + ZdenkaUresova ShiraWein NianwenXue JinZhao @@ -157,7 +157,7 @@ Damián ArielFurman PabloTorres José A.Rodríguez - LauraAlonso Alemany + LauraAlonso Alemany DiegoLetzen VaninaMartínez 136–153 diff --git a/data/xml/2023.dravidianlangtech.xml b/data/xml/2023.dravidianlangtech.xml index 80d8038862..38cc34e476 100644 --- a/data/xml/2023.dravidianlangtech.xml +++ b/data/xml/2023.dravidianlangtech.xml @@ -5,7 +5,7 @@ Proceedings of the Third Workshop on Speech and Language Technologies for Dravidian Languages Bharathi R.Chakravarthi RubaPriyadharshini - Anand KumarM + Anand KumarM SajeethaThavareesan ElizabethSherly INCOMA Ltd., Shoumen, Bulgaria @@ -306,7 +306,7 @@ ZahraAhani OlgaKolesnikova GrigoriSidorov - AlexanderGelbukh + AlexanderGelbukh 180–185 With the prevalence of code-mixing among speakers of Dravidian languages, DravidianLangTech proposed the shared task on Sentiment Analysis in Tamil and Tulu at RANLP 2023. This paper presents the submission of LIDOMA, which proposes a methodology that combines lexical features and Convolutional Neural Networks (CNNs) to address the challenge. A fine-tuned 6-layered CNN model is employed, achieving macro F1 scores of 0.542 and 0.199 for Tulu and Tamil, respectively 2023.dravidianlangtech-1.25 @@ -408,7 +408,7 @@ TadesseKebede OlgaKolesnikova GrigoriSidorov - AlexanderGelbukh + AlexanderGelbukh 239–243 This research paper focuses on sentiment analysis of Tamil and Tulu texts using a BERT model and an RNN model. The BERT model, which was pretrained, achieved satisfactory performance for the Tulu language, with a Macro F1 score of 0.352. On the other hand, the RNN model showed good performance for Tamil language sentiment analysis, obtaining a Macro F1 score of 0.208. As future work, the researchers aim to fine-tune the models to further improve their results after the training process. 2023.dravidianlangtech-1.35 @@ -420,7 +420,7 @@ SelamKanta OlgaKolesnikova GrigoriSidorov - AlexanderGelbukh + AlexanderGelbukh 244–249 This research focuses on identifying abusive language in comments. The study utilizes deep learning models, including Long Short-Term Memory (LSTM) and Recurrent Neural Networks (RNNs), to analyze linguistic patterns. Specifically, the LSTM model, a type of RNN, is used to understand the context by capturing long-term dependencies and intricate patterns in the input sequences. The LSTM model achieves better accuracy and is enhanced through the addition of a dropout layer and early stopping. For detecting abusive language in Telugu and Tamil-English, an LSTM model is employed, while in Tamil abusive language detection, a word-level RNN is developed to identify abusive words. These models process text sequentially, considering overall content and capturing contextual dependencies. 2023.dravidianlangtech-1.36 diff --git a/data/xml/2023.dstc.xml b/data/xml/2023.dstc.xml index 7e5f65f0f5..ef24365d8c 100644 --- a/data/xml/2023.dstc.xml +++ b/data/xml/2023.dstc.xml @@ -12,7 +12,7 @@ BehnamHedayatnia SatwikKottur SeungwhanMoon - ChenZhang + ChenZhang Association for Computational Linguistics
Prague, Czech Republic
September @@ -23,7 +23,7 @@ Exploring Prompt-based Multi-task Learning for Multimodal Dialog State Tracking and Immersive Multimodal Conversation YirongChenSouth China University of Technology - YaLiIFLYTEK Research + YaLiIFLYTEK Research TaoWangiFLYTEK Research XiaofenXingSouth China University of Technology XiangminXuSouth China University of Technology @@ -78,7 +78,7 @@ Multi-Stage Coarse-to-Fine Contrastive Learning for Conversation Intent Induction CaiyuanChuChongqing University - YaLiIFLYTEK Research + YaLiIFLYTEK Research YifanLiuIFLYTEK Research Jia-ChenGuUniversity of Science and Technology of China QuanLiuiFLYTEK Research @@ -116,7 +116,7 @@ JeiyoonParkLLSOLLU YoonnaJangKorea University ChanheeLeeNaver - HeuiseokLimKorea University + HeuiseokLimKorea University 57-66 The focus of this work is to investigate unsupervised approaches to overcome quintessential challenges in designing task-oriented dialog schema: assigning intent labels to each dialog turn (intent clustering) and generating a set of intents based on the intent clustering methods (intent induction). We postulate there are two salient factors for automatic induction of intents: (1) clustering algorithm for intent labeling and (2) user utterance embedding space. We compare existing off-the-shelf clustering models and embeddings based on DSTC11 evaluation. Our extensive experiments demonstrate that the combined selection of utterance embedding and clustering method in the intent induction task should be carefully considered. We also present that pretrained MiniLM with Agglomerative clustering shows significant improvement in NMI, ARI, F1, accuracy and example coverage in intent induction tasks. The source codes are available at https://github.com/Jeiyoon/dstc11-track2. 2023.dstc-1.8 @@ -163,7 +163,7 @@ LéoJacqminOrange & Aix-Marseille University LucasDruartOrange & Avignon University YannickEstèveLIA - Avignon University - BenoîtFavreLab. Informatique et Systèmes / Aix-Marseille University / CNRS + BenoîtFavreLab. Informatique et Systèmes / Aix-Marseille University / CNRS LinaM RojasOrange ValentinVielzeufOrange Labs 95-104 @@ -176,7 +176,7 @@ RidongJiangInstitute for Infocomm Research WeiShiI2R BinWangNational University of Singapore - ChenZhangNational University of Singapore + ChenZhangNational University of Singapore YanZhangNational University of Singapore ChunleiPanNational University of Singapore JungJae KimI2R @@ -215,7 +215,7 @@ PatríciaPereiraInstituto Superior Técnico / INESC-ID HelenaMonizINESC-ID JoaoPaulo CarvalhoINESC-ID / Instituto Superior Técnico, University of Lisbon, Portugal - AlonLavieUnbabel + AlonLavieUnbabel IsabelTrancosoIST / INESC-ID 133-143 Despite significant research effort in the development of automatic dialogue evaluation metrics, little thought is given to evaluating dialogues other than in English. At the same time, ensuring metrics are invariant to semantically similar responses is also an overlooked topic. In order to achieve the desired properties of robustness and multilinguality for dialogue evaluation metrics, we propose a novel framework that takes advantage of the strengths of current evaluation models with the newly-established paradigm of prompting Large Language Models (LLMs). Empirical results show our framework achieves state of the art results in terms of mean Spearman correlation scores across several benchmarks and ranks first place on both the Robust and Multilingual tasks of the DSTC11 Track 4 “Automatic Evaluation Metrics for Open-Domain Dialogue Systems”, proving the evaluation capabilities of prompted LLMs. @@ -308,7 +308,7 @@ ChangxinKeHarbin Institute of Technology ChuruiSunHarbin Institute of Technology LongxuanMaHarbin Institute of Technology - Wei-NanZhangHarbin Institute of Technology + Wei-NanZhangHarbin Institute of Technology TingLiu哈尔滨工业大学 216-225 We participate in the 11th Dialog System Technology Challenges (DSTC) track-5 called Task-oriented Conversational Modeling with Subjective Knowledge. Introducing subjective knowledge into task-oriented dialogue (TOD) can help the DS to understand variables of subjective user needs and to suit more dialogue scenarios. Track-5 includes several sub-tasks: 1) knowledge-seeking turn detection; 2) knowledge entity tracking; 3) knowledge entry selection; and 4) use of the selected knowledge entries for response generation. Besides the challenges of each sub-tasks own, there are two challenges across different sub-tasks. The first is that there are multiple valid knowledge entries for each knowledge-seeking turn, the accuracy of the knowledge entry selection is important for the quality of response generation. The second challenge is how to address the unseen dialogue/entities/entries in the validation and the test set. In this paper, we propose a difference-aware ensemble method to address these sub-tasks and the two challenges mentioned above. Our method helps to obtain more robust results and performs well on unseen instances. Among all the submissions for the test set, our method ranks 1st on the knowledge-seeking turn detection task and achieves 3rd on the overall automatic evaluation score. Our code and data will be released on GitHub. @@ -358,13 +358,13 @@ for Open-Domain Dialogue Systems at DSTC 11 Track 4 MarioRodríguez-CantelarUniversidad Politécnica de Madrid - ChenZhangNational University of Singapore + ChenZhangNational University of Singapore ChengguangTangTencent KeShiTencent SarikGhazarianISI USC JoãoSedocNew York University LuisFernando D’HaroSpeech Technology and Machine Learning Group - Universidad Politécnica de Madrid - Alexander I.RudnickyCarnegie Mellon University + Alexander I.RudnickyCarnegie Mellon University 260-273 The advent and fast development of neural networks have revolutionized the research on dialogue systems and subsequently have triggered various challenges regarding their automatic evaluation. Automatic evaluation of open-domain dialogue systems as an open challenge has been the center of the attention of many researchers. Despite the consistent efforts to improve automatic metrics’ correlations with human evaluation, there have been very few attempts to assess their robustness over multiple domains and dimensions. Also, their focus is mainly on the English language. All of these challenges prompt the development of automatic evaluation metrics that are reliable in various domains, dimensions, and languages. This track in the 11th Dialogue System Technology Challenge (DSTC11) is part of the ongoing effort to promote robust and multilingual automatic evaluation metrics. This article describes the datasets and baselines provided to participants and discusses the submission and result details of the two proposed subtasks. 2023.dstc-1.28 @@ -376,9 +376,9 @@ for Open-Domain Dialogue Systems at DSTC 11 Track 4SpandanaGellaUniversity of Edinburgh ChaoZhaoUNC Chapel Hill DiJinAmazon Alexa AI - AlexandrosPapangelisAmazon Alexa AI + AlexandrosPapangelisAmazon Alexa AI BehnamHedayatniaAmazon Alexa AI - YangLiuAmazon, Alexa AI + YangLiuAmazon, Alexa AI DilekZ Hakkani-TurAmazon Alexa AI 274-281 Conventional Task-oriented Dialogue (TOD) Systems rely on domain-specific APIs/DBs or external factual knowledge to create responses. In DSTC11 track 5, we aims to provide a new challenging task to accommodate subjective user requests (e.g.,”Is the WIFI reliable?” or “Does the restaurant have a good atmosphere?” into TOD. We release a benchmark dataset, which contains subjective knowledge-seeking dialogue contexts and manually annotated responses that are grounded in subjective knowledge sources. The challenge track received a total of 48 entries from 14 participating teams. diff --git a/data/xml/2023.eacl.xml b/data/xml/2023.eacl.xml index c6cb24c424..523184e74a 100644 --- a/data/xml/2023.eacl.xml +++ b/data/xml/2023.eacl.xml @@ -20,7 +20,7 @@ <fixed-case>P</fixed-case>i<fixed-case>C</fixed-case>: A Phrase-in-Context Dataset for Phrase Understanding and Semantic Search ThangPhamAuburn University SeunghyunYoonAdobe Research - TrungBuiAdobe Research + TrungBuiAdobe Research AnhNguyenAuburn University 1-26 While contextualized word embeddings have been a de-facto standard, learning contextualized phrase embeddings is less explored and being hindered by the lack of a human-annotated benchmark that tests machine understanding of phrase semantics given a context sentence or paragraph (instead of phrases alone). To fill this gap, we propose PiC—a dataset of ∼28K of noun phrases accompanied by their contextual Wikipedia pages and a suite of three tasks for training and evaluating phrase embeddings. Training on PiC improves ranking-models’ accuracy and remarkably pushes span selection (SS) models (i.e., predicting the start and end index of the target phrase) near human accuracy, which is 95% Exact Match (EM) on semantic search given a query phrase and a passage. Interestingly, we find evidence that such impressive performance is because the SS models learn to better capture the common meaning of a phrase regardless of its actual context. SotA models perform poorly in distinguishing two senses of the same phrase in two contexts (∼60% EM) and in estimating the similarity between two different phrases in the same context (∼70% EM). @@ -84,7 +84,7 @@ A Two-Sided Discussion of Preregistration of <fixed-case>NLP</fixed-case> Research - AndersSøgaardUniversity of Copenhagen + AndersSøgaardUniversity of Copenhagen DanielHershcovichUniversity of Copenhagen Miryamde LhoneuxKU Leuven 83-93 @@ -100,7 +100,7 @@ Julian MartinEisenschlosGoogle Jeremy R.ColeGoogle Research FangyuLiuUniversity of Cambridge - William W.CohenGoogle AI + William W.CohenGoogle AI 94-102 We introduce a new in-context learning paradigm to measure Large Language Models’ (LLMs) ability to learn novel words during inference. In particular, we rewrite Winograd-style co-reference resolution problems by replacing the key concept word with a synthetic but plausible word that the model must understand to complete the task. Solving this task requires the model to make use of the dictionary definition of the new word given in the prompt. This benchmark addresses word acquisition, one important aspect of the diachronic degradation known to afflict LLMs. As LLMs are frozen in time at the moment they are trained, they are normally unable to reflect the way language changes over time. We show that the accuracy of LLMs compared to the original Winograd tasks decreases radically in our benchmark, thus identifying a limitation of current models and providing a benchmark to measure future improvements in LLMs ability to do in-context learning. 2023.eacl-main.7 @@ -125,7 +125,7 @@ PranavNarayanan VenkitPennsylvania State University SanjanaGautamPennsylvania State University RuchiPanchanadikarPennsylvania State University - Ting-HaoHuangPennsylvania State University + Ting-HaoHuangPennsylvania State University ShomirWilsonPennsylvania State University 116-122 Little attention is placed on analyzing nationality bias in language models, especially when nationality is highly used as a factor in increasing the performance of social NLP models. This paper examines how a text generation model, GPT-2, accentuates pre-existing societal biases about country-based demonyms. We generate stories using GPT-2 for various nationalities and use sensitivity analysis to explore how the number of internet users and the country’s economic status impacts the sentiment of the stories. To reduce the propagation of biases through large language models (LLM), we explore the debiasing method of adversarial triggering. Our results show that GPT-2 demonstrates significant bias against countries with lower internet users, and adversarial triggering effectively reduces the same. @@ -138,7 +138,7 @@ Investigating data partitioning strategies for crosslinguistic low-resource <fixed-case>ASR</fixed-case> evaluation ZoeyLiuDepartment of Linguistics, University of Florida JustinSpenceUniversity of California, Davis - EmilyPrud’hommeauxBoston College + EmilyPrud’hommeauxBoston College 123-131 Many automatic speech recognition (ASR) data sets include a single pre-defined test set consisting of one or more speakers whose speech never appears in the training set. This “hold-speaker(s)-out” data partitioning strategy, however, may not be ideal for data sets in which the number of speakers is very small. This study investigates ten different data split methods for five languages with minimal ASR training resources. We find that (1) model performance varies greatly depending on which speaker is selected for testing; (2) the average word error rate (WER) across all held-out speakers is comparable not only to the average WER over multiple random splits but also to any given individual random split; (3) WER is also generally comparable when the data is split heuristically or adversarially; (4) utterance duration and intensity are comparatively more predictive factors of variability regardless of the data split. These results suggest that the widely used hold-speakers-out approach to ASR data partitioning can yield results that do not reflect model performance on unseen data or speakers. Random splits can yield more reliable and generalizable estimates when facing data sparsity. 2023.eacl-main.10 @@ -161,8 +161,8 @@ Socratic Question Generation: A Novel Dataset, Models, and Evaluation Beng HengAngIntegrative Sciences and Engineering Programme, NUS - Sujatha DasGollapalliInstitute of Data Science, NUS - See-KiongNgNational University of Singapore + Sujatha DasGollapalliInstitute of Data Science, NUS + See-KiongNgNational University of Singapore 147-165 Socratic questioning is a form of reflective inquiry often employed in education to encourage critical thinking in students, and to elicit awareness of beliefs and perspectives in a subject during therapeutic counseling. Specific types of Socratic questions are employed for enabling reasoning and alternate views against the context of individual personal opinions on a topic. Socratic contexts are different from traditional question generation contexts where “answer-seeking” questions are generated against a given formal passage on a topic, narrative stories or conversations. We present SocratiQ, the first large dataset of 110K (question, context) pairs for enabling studies on Socratic Question Generation (SoQG). We provide an in-depth study on the various types of Socratic questions and present models for generating Socratic questions against a given context through prompt tuning. Our automated and human evaluation results demonstrate that our SoQG models can produce realistic, type-sensitive, human-like Socratic questions enabling potential applications in counseling and coaching. 2023.eacl-main.12 @@ -212,9 +212,9 @@ Retrieval Enhanced Data Augmentation for Question Answering on Privacy Policies - Md RizwanParvezUniversity of California Los Angeles + Md RizwanParvezUniversity of California Los Angeles JianfengChiMeta AI - Wasi UddinAhmadAWS AI Labs + Wasi UddinAhmadAWS AI Labs YuanTianUniversity of California Los Angeles Kai-WeiChangUcla 201-210 @@ -296,7 +296,7 @@ FrancescoMultariSIRIS Lab, Research Division of SIRIS Academic NicolauDuran-SilvaSIRIS Lab, Research Division of SIRIS Academic & Universitat Pompeu Fabra CésarParra-RojasSIRIS Lab, Research Division of SIRIS Academic - AitorGonzalez-AgirreBarcelona Supercomputing Center (BSC) + AitorGonzalez-AgirreBarcelona Supercomputing Center (BSC) Francesco AlessandroMassucciSIRIS Lab, Research Division of SIRIS Academic MartaVillegasBarcelona Supercomputing Center 286-296 @@ -309,8 +309,8 @@ Fair Enough: Standardizing Evaluation and Model Selection for Fairness Research in <fixed-case>NLP</fixed-case> XudongHanThe university of Melbourne - TimothyBaldwinMbzuai - TrevorCohnUniversity of Melbourne + TimothyBaldwinMbzuai + TrevorCohnUniversity of Melbourne 297-312 Modern NLP systems exhibit a range of biases, which a growing literature on model debiasing attempts to correct. However, current progress is hampered by a plurality of definitions of bias, means of quantification, and oftentimes vague relation between debiasing algorithms and theoretical measures of bias. This paper seeks to clarify the current situation and plot a course for meaningful progress in fair learning, with two key contributions: (1) making clear inter-relations among the current gamut of methods, and their relation to fairness theory; and (2) addressing the practical problem of model selection, which involves a trade-off between fairness and accuracy and has led to systemic issues in fairness research. Putting them together, we make several recommendations to help shape future work. 2023.eacl-main.23 @@ -324,7 +324,7 @@ VivekKhetanAccenture Labs BogdanSacaleanuAccenture AnatoleGershmanCarnegie Mellon University - EduardHovyUniversity of Melbourne + EduardHovyUniversity of Melbourne 313-327 We motivate and introduce CHARD: Clinical Health-Aware Reasoning across Dimensions, to investigate the capability of text generation models to act as implicit clinical knowledge bases and generate free-flow textual explanations about various health-related conditions across several dimensions. We collect and present an associated dataset, CHARDat, consisting of explanations about 52 health conditions across three clinical dimensions. We conduct extensive experiments using BART and T5 along with data augmentation, and perform automatic, human, and qualitative analyses. We show that while our models can perform decently, CHARD is very challenging with strong potential for further exploration. 2023.eacl-main.24 @@ -338,7 +338,7 @@ RuqingZhangCAS Key Lab of Network Data Science and Technology, Institute of Computing Technology, Chinese Academy of Sciences YixingFanInstitute of Computing Technology, CAS. JiafengGuoInstitute of Computing Technology, CAS - XueqiChengInstitute of Computing Technology, CAS + XueqiChengInstitute of Computing Technology, CAS 328-339 Recently, prompt tuning has achieved promising results in a variety of natural language processing (NLP) tasks. The typical approach is to insert text pieces (i.e. templates) into the input and transform downstream tasks into the same form as pre-training. In essence, a high-quality template is the foundation of prompt tuning to support the performance of the converted cloze-style task. However, for sarcasm recognition, it is time-consuming and requires increasingly sophisticated domain knowledge to determine the appropriate templates and label words due to its highly figurative nature. In this work, we propose SarcPrompt, to incorporate the prior knowledge about contradictory intentions into prompt tuning for sarcasm recognition. SarcPrompt is inspired by that the speaker usually says the opposite of what they actually mean in the sarcastic text. Based on this idea, we explicitly mimic the actual intention by prompt construction and indicate whether the actual intention is contradictory to the literal content by verbalizer engineering. Experiments on three public datasets with standard and low-resource settings demonstrate the effectiveness of our SarcPrompt for sarcasm recognition. 2023.eacl-main.25 @@ -391,7 +391,7 @@ <fixed-case>D</fixed-case>i<fixed-case>TTO</fixed-case>: A Feature Representation Imitation Approach for Improving Cross-Lingual Transfer ShanuKumarMicrosoft SoujanyaAbbarajuMicrosoft - SandipanDandapatMicrosoft India + SandipanDandapatMicrosoft India SunayanaSitaramMicrosoft Research India MonojitChoudhuryMicrosoft 385-406 @@ -452,7 +452,7 @@ MinghaoWuMonash University GeorgeFosterGoogle LizhenQuMonash University - GholamrezaHaffariMonash University + GholamrezaHaffariMonash University 448-462 Existing work in document-level neural machine translation commonly concatenates several consecutive sentences as a pseudo-document, and then learns inter-sentential dependencies. This strategy limits the model’s ability to leverage information from distant context. We overcome this limitation with a novel Document Flattening (DocFlat) technique that integrates Flat-Batch Attention (FBA) and Neural Context Gate (NCG) into Transformer model to utilizes information beyond the pseudo-document boundaries. FBA allows the model to attend to all the positions in the batch and model the relationships between positions explicitly and NCG identifies the useful information from the distant context. We conduct comprehensive experiments and analyses on three benchmark datasets for English-German translation, and validate the effectiveness of two variants of DocFlat. Empirical results show that our approach outperforms strong baselines with statistical significance on BLEU, COMET and accuracy on the contrastive test set. The analyses highlight that DocFlat is highly effective in capturing the long-range information. 2023.eacl-main.33 @@ -478,7 +478,7 @@ SoyeongJeongKorea Advanced Institute of Science and Technology JinheonBaekKorea Advanced Institute of Science and Technology Sung JuHwangKaist - JongParkKaist + JongParkKaist 477-490 Conversational Question Answering (ConvQA) models aim at answering a question with its relevant paragraph and previous question-answer pairs that occurred during conversation multiple times. To apply such models to a real-world scenario, some existing work uses predicted answers, instead of unavailable ground-truth answers, as the conversation history for inference. However, since these models usually predict wrong answers, using all the predictions without filtering significantly hampers the model performance. To address this problem, we propose to filter out inaccurate answers in the conversation history based on their estimated confidences and uncertainties from the ConvQA model, without making any architectural changes. Moreover, to make the confidence and uncertainty values more reliable, we propose to further calibrate them, thereby smoothing the model predictions. We validate our models, Answer Selection-based realistic Conversation Question Answering, on two standard ConvQA datasets, and the results show that our models significantly outperform relevant baselines. Code is available at: https://github.com/starsuzi/AS-ConvQA. 2023.eacl-main.35 @@ -492,7 +492,7 @@ Steven Y.FengStanford University VarunGangalAsapp MaliheAlikhaniUniversity of Pittsburgh - EduardHovyUniversity of Melbourne + EduardHovyUniversity of Melbourne 491-504 Tongue twisters are meaningful sentences that are difficult to pronounce. The process of automatically generating tongue twisters is challenging since the generated utterance must satisfy two conditions at once: phonetic difficulty and semantic meaning. Furthermore, phonetic difficulty is itself hard to characterize and is expressed in natural tongue twisters through a heterogeneous mix of phenomena such as alliteration and homophony. In this paper, we propose PANCETTA: Phoneme Aware Neural Completion to Elicit Tongue Twisters Automatically. We leverage phoneme representations to capture the notion of phonetic difficulty, and we train language models to generate original tongue twisters on two proposed task settings. To do this, we curate a dataset called TT-Corp, consisting of existing English tongue twisters. Through automatic and human evaluation, as well as qualitative analysis, we show that PANCETTA generates novel, phonetically difficult, fluent, and semantically meaningful tongue twisters. 2023.eacl-main.36 @@ -518,7 +518,7 @@ A Survey of Methods for Addressing Class Imbalance in Deep-Learning Based Natural Language Processing SophieHenningBosch Center for Artificial Intelligence; Ludwig-Maximilians-Universität München WilliamBeluchBosch Center for Artificial Intelligence - AlexanderFraserLudwig-Maximilians-Universität München + AlexanderFraserLudwig-Maximilians-Universität München AnnemarieFriedrichBosch Center for Artificial Intelligence 523-540 Many natural language processing (NLP) tasks are naturally imbalanced, as some target categories occur much more frequently than others in the real world. In such scenarios, current NLP models tend to perform poorly on less frequent classes. Addressing class imbalance in NLP is an active research topic, yet, finding a good approach for a particular task and imbalance scenario is difficult. In this survey, the first overview on class imbalance in deep-learning based NLP, we first discuss various types of controlled and real-world class imbalance. Our survey then covers approaches that have been explicitly proposed for class-imbalanced NLP tasks or, originating in the computer vision community, have been evaluated on them. We organize the methods by whether they are based on sampling, data augmentation, choice of loss function, staged learning, or model design. Finally, we discuss open problems and how to move forward. @@ -548,7 +548,7 @@ ZhentingQiZhejiang University LinyongNanYale University Lorenzo JaimeFloresYale University - DragomirRadevYale University + DragomirRadevYale University 554-561 Logical Table-to-Text (LT2T) generation is tasked with generating logically faithful sentences from tables. There currently exists two challenges in the field: 1) Faithfulness: how to generate sentences that are factually correct given the table content; 2) Diversity: how to generate multiple sentences that offer different perspectives on the table. This work proposes LoFT, which utilizes logic forms as fact verifiers and content planners to control LT2T generation. Experimental results on the LogicNLG dataset demonstrate that LoFT is the first model that addresses unfaithfulness and lack of diversity issues simultaneously. Our code is publicly available at https://github.com/Yale-LILY/LoFT. 2023.eacl-main.40 @@ -691,8 +691,8 @@ SebastienMontellaOrange Labs AlexisNasrAix Marseille University JohannesHeineckeOrange Labs - FredericBechetAix Marseille Universite - LIS/CNRS - Lina M.Rojas BarahonaOrange Innovation Research + FredericBechetAix Marseille Universite - LIS/CNRS + Lina M.Rojas BarahonaOrange Innovation Research 727-736 Text generation from Abstract Meaning Representation (AMR) has substantially benefited from the popularized Pretrained Language Models (PLMs). Myriad approaches have linearized the input graph as a sequence of tokens to fit the PLM tokenization requirements. Nevertheless, this transformation jeopardizes the structural integrity of the graph and is therefore detrimental to its resulting representation. To overcome this issue, Ribeiro et al. (2021b) have recently proposed StructAdapt, a structure-aware adapter which injects the input graph connectivity within PLMs using Graph Neural Networks (GNNs). In this paper, we investigate the influence of Relative Position Embeddings (RPE) on AMR-to-Text, and, in parallel, we examine the robustness of StructAdapt. Through ablation studies, graph attack and link prediction, we reveal that RPE might be partially encoding input graphs. We suggest further research regarding the role of RPE will provide valuable insights for Graph-to-Text generation. 2023.eacl-main.51 @@ -706,7 +706,7 @@ AndreasOpedalETH Zurich TiagoPimentelUniversity of Cambridge TimVieiraJohns Hopkins University - JasonEisnerJohns Hopkins University + Microsoft Corporation + JasonEisnerJohns Hopkins University + Microsoft Corporation RyanCotterellETH Zürich 737-749 The Bar-Hillel construction is a classic result in formal language theory. It shows, by a simple construction, that the intersection of a context-free language and a regular language is itself context-free. In the construction, the regular language is specified by a finite-state automaton. However, neither the original construction (Bar-Hillel et al., 1961) nor its weighted extension (Nederhof and Satta, 2003) can handle finite-state automata with ε-arcs. While it is possible to remove ε-arcs from a finite-state automaton efficiently without modifying the language, such an operation modifies the automaton’s set of paths. We give a construction that generalizes the Bar- Hillel in the case the desired automaton has ε-arcs, and further prove that our generalized construction leads to a grammar that encodes the structure of both the input automaton and grammar while retaining the asymptotic size of the original construction. @@ -737,8 +737,8 @@ MarisCamilleriUniversity of Essex PalomaGarciaUniversity of Essex JonChamberlainUniversity of Essex - UdoKruschwitzUniversity of Regensburg - MassimoPoesioQueen Mary University of London + UdoKruschwitzUniversity of Regensburg + MassimoPoesioQueen Mary University of London 767-781 Although several datasets annotated for anaphoric reference / coreference exist, even the largest such datasets have limitations in term of size, range of domains, coverage of anaphoric phenomena, and size of documents included. Yet, the approaches proposed to scale up anaphoric annotation haven’t so far resulted in datasets overcoming these limitations. In this paper, we introduce a new release of a corpus for anaphoric reference labelled via a game-with-a-purpose. This new release is comparable in size to the largest existing corpora for anaphoric reference due in part to substantial activity by the players, in part thanks to the use of a new resolve-and-aggregate paradigm to ‘complete’ markable annotations through the combination of an anaphoric resolver and an aggregation method for anaphoric reference. The proposed method could be adopted to greatly speed up annotation time in other projects involving games-with-a-purpose. In addition, the corpus covers genres for which no comparable size datasets exist (Fiction and Wikipedia); it covers singletons and non-referring expressions; and it includes a substantial number of long documents ( 2K in length). 2023.eacl-main.54 @@ -750,7 +750,7 @@ What Makes Sentences Semantically Related? A Textual Relatedness Dataset and Empirical Study MohamedAbdallaUniversity of Toronto KrishnapriyaVishnubhotlaUniversity of Toronto - SaifMohammadNational Research Council Canada + SaifMohammadNational Research Council Canada 782-796 The degree of semantic relatedness of two units of language has long been considered fundamental to understanding meaning. Additionally, automatically determining relatedness has many applications such as question answering and summarization. However, prior NLP work has largely focused on semantic similarity, a subset of relatedness, because of a lack of relatedness datasets. In this paper, we introduce a dataset for Semantic Textual Relatedness, STR-2022, that has 5,500 English sentence pairs manually annotated using a comparative annotation framework, resulting in fine-grained scores. We show that human intuition regarding relatedness of sentence pairs is highly reliable, with a repeat annotation correlation of 0.84. We use the dataset to explore questions on what makes sentences semantically related. We also show the utility of STR-2022 for evaluating automatic methods of sentence representation and for various downstream NLP tasks. Our dataset, data statement, and annotation questionnaire can be found at: https://doi.org/10.5281/zenodo.7599667. 2023.eacl-main.55 @@ -773,7 +773,7 @@ <fixed-case>N</fixed-case>usa<fixed-case>X</fixed-case>: Multilingual Parallel Sentiment Dataset for 10 <fixed-case>I</fixed-case>ndonesian Local Languages EACL Outstanding Paper - Genta IndraWinataBloomberg + Genta IndraWinataBloomberg Alham FikriAjiMbzuai SamuelCahyawijayaHkust RahmadMahendraUniversitas Indonesia @@ -783,7 +783,7 @@ DavidMoeljadiKanda University of International Studies Radityo EkoPrasojoPitik.id PascaleFungHong Kong University of Science and Technology - TimothyBaldwinMbzuai + TimothyBaldwinMbzuai Jey HanLauThe University of Melbourne RicoSennrichUniversity of Zurich SebastianRuderGoogle @@ -836,7 +836,7 @@ Probing Power by Prompting: Harnessing Pre-trained Language Models for Power Connotation Framing ShimaKhanehzarUniversity of Melbourne - TrevorCohnUniversity of Melbourne + TrevorCohnUniversity of Melbourne GosiaMikolajczakAustralian National University LeaFrermannMelbourne University 873-885 @@ -891,7 +891,7 @@ AruMaekawaTokyo Institute of Technology HidetakaKamigaitoNara Institute of Science and Technology KotaroFunakoshiTokyo Institute of Technology - ManabuOkumuraTokyo Institute of Technology + ManabuOkumuraTokyo Institute of Technology 930-942 Continual learning aims to accumulate knowledge to solve new tasks without catastrophic forgetting for previously learned tasks. Research on continual learning has led to the development of generative replay, which prevents catastrophic forgetting by generating pseudo-samples for previous tasks and learning them together with new tasks. Inspired by the biological brain, we propose the hippocampal memory indexing to enhance the generative replay by controlling sample generation using compressed features of previous training samples. It enables the generation of a specific training sample from previous tasks, thus improving the balance and quality of generated replay samples. Experimental results indicate that our method effectively controls the sample generation and consistently outperforms the performance of current generative replay methods. 2023.eacl-main.65 @@ -901,7 +901,7 @@ A Survey of Multi-task Learning in Natural Language Processing: Regarding Task Relatedness and Training Methods - ZhihanZhangUniversity of Notre Dame + ZhihanZhangUniversity of Notre Dame WenhaoYuUniversity of Notre Dame MengxiaYuUniversity of Notre Dame ZhichunGuoUniversity of Notre Dame @@ -927,7 +927,7 @@ Question-Answer Sentence Graph for Joint Modeling Answer Selection RoshniIyerUniversity of California, Los Angeles - ThuyVuAmazon + ThuyVuAmazon AlessandroMoschittiAmazon YizhouSunUcla 968-979 @@ -1022,7 +1022,7 @@ Looking for a Needle in a Haystack: A Comprehensive Study of Hallucinations in Neural Machine Translation Nuno M.GuerreiroInstituto de Telecomunicacoes, University of Lisbon ElenaVoitaMeta AI - AndréMartinsUnbabel, Instituto de Telecomunicacoes + AndréMartinsUnbabel, Instituto de Telecomunicacoes 1059-1075 Although the problem of hallucinations in neural machine translation (NMT) has received some attention, research on this highly pathological phenomenon lacks solid ground. Previous work has been limited in several ways: it often resorts to artificial settings where the problem is amplified, it disregards some (common) types of hallucinations, and it does not validate adequacy of detection heuristics. In this paper, we set foundations for the study of NMT hallucinations. First, we work in a natural setting, i.e., in-domain data without artificial noise neither in training nor in inference. Next, we annotate a dataset of over 3.4k sentences indicating different kinds of critical errors and hallucinations. Then, we turn to detection methods and both revisit methods used previously and propose using glass-box uncertainty-based detectors. Overall, we show that for preventive settings, (i) previously used methods are largely inadequate, (ii) sequence log-probability works best and performs on par with reference-based methods. Finally, we propose DeHallucinator, a simple method for alleviating hallucinations at test time that significantly reduces the hallucinatory rate. 2023.eacl-main.75 @@ -1048,7 +1048,7 @@ YujinHuangMonash University FatemehShiriFaculty of Information Technology, Monash University WeiqingWangMonash University - GholamrezaHaffariMonash University + GholamrezaHaffariMonash University Yuan-FangLiMonash University 1090-1102 Semantic parsing is a technique aimed at constructing a structured representation of the meaning of a natural-language question. Recent advances in language models trained on code have shown superior performance in generating these representations compared to language models trained solely on natural language text. The existing fine-tuned neural semantic parsers are vulnerable to adversarial attacks on natural-language inputs. While it has been established that the robustness of smaller semantic parsers can be enhanced through adversarial training, this approach is not feasible for large language models in real-world scenarios, as it requires both substantial computational resources and expensive human annotation on in-domain semantic parsing data. This paper presents the first empirical study on the adversarial robustness of a prompt-based semantic parser based on CODEX, a stateof-the-art (SOTA) language model trained on code. Our results demonstrate that the large language model of code is vulnerable to carefully crafted adversarial examples. To overcome this challenge, we propose methods for enhancing robustness without requiring substantial amounts of labelled data or intensive computational resources. @@ -1212,7 +1212,7 @@ Logic Against Bias: Textual Entailment Mitigates Stereotypical Sentence Reasoning HongyinLuoMit - JamesGlassMassachusetts Institute of Technology + JamesGlassMassachusetts Institute of Technology 1243-1254 Due to their similarity-based learning objectives, pretrained sentence encoders often internalize stereotypical assumptions that reflect the social biases that exist within their training corpora. In this paper, we describe several kinds of stereotypes concerning different communities that are present in popular sentence representation models, including pretrained next sentence prediction and contrastive sentence representation models. We compare such models to textual entailment models that learn language logic for a variety of downstream language understanding tasks. By comparing strong pretrained models based on text similarity with textual entailment learning, we conclude that the explicit logic learning with textual entailment can significantly reduce bias and improve the recognition of social communities, without an explicit de-biasing process. 2023.eacl-main.89 @@ -1273,7 +1273,7 @@ Exploring Paracrawl for Document-level Neural Machine Translation YusserAl GhussinDFKI, Saarland University JingyiZhangHpi - Josefvan GenabithDfki + Josefvan GenabithDfki 1304-1310 Document-level neural machine translation (NMT) has outperformed sentence-level NMT on a number of datasets. However, document-level NMT is still not widely adopted in realworld translation systems mainly due to the lack of large-scale general-domain training data for document-level NMT. We examine the effectiveness of using Paracrawl for learning document-level translation. Paracrawl is a large-scale parallel corpus crawled from the Internet and contains data from various domains. The official Paracrawl corpus was released as parallel sentences (extracted from parallel webpages) and therefore previous works only used Paracrawl for learning sentence-level translation. In this work, we extract parallel paragraphs from Paracrawl parallel webpages using automatic sentence alignments and we use the extracted parallel paragraphs as parallel documents for training document-level translation models. We show that document-level NMT models trained with only parallel paragraphs from Paracrawl can be used to translate real documents from TED, News and Europarl, outperforming sentence-level NMT models. We also perform a targeted pronoun evaluation and show that document-level models trained with Paracrawl data can help context-aware pronoun translation. 2023.eacl-main.94 @@ -1301,7 +1301,7 @@ Integrating Translation Memories into Non-Autoregressive Machine Translation JitaoXuLISN, CNRS, Paris-Saclay University - JosepCregoSystran + JosepCregoSystran FrançoisYvonISIR CNRS & Sorbonne Université 1326-1338 Non-autoregressive machine translation (NAT) has recently made great progress. However, most works to date have focused on standard translation tasks, even though some edit-based NAT models, such as the Levenshtein Transformer (LevT), seem well suited to translate with a Translation Memory (TM). This is the scenario considered here. We first analyze the vanilla LevT model and explain why it does not do well in this setting. We then propose a new variant, TM-LevT, and show how to effectively train this model. By modifying the data presentation and introducing an extra deletion operation, we obtain performance that are on par with an autoregressive approach, while reducing the decoding load. We also show that incorporating TMs during training dispenses to use knowledge distillation, a well-known trick used to mitigate the multimodality issue. @@ -1340,9 +1340,9 @@ <fixed-case>BLM</fixed-case>-<fixed-case>A</fixed-case>gr<fixed-case>F</fixed-case>: A New <fixed-case>F</fixed-case>rench Benchmark to Investigate Generalization of Agreement in Neural Networks AixiuAnUniversité de Paris - ChunyangJiangUniversity of Geneva + ChunyangJiangUniversity of Geneva MariaA. RodriguezUniversity of Geneva - ViviNastaseUniversity of Geneva + ViviNastaseUniversity of Geneva PaolaMerloUniversity of Geneva 1363-1374 Successful machine learning systems currently rely on massive amounts of data, which are very effective in hiding some of the shallowness of the learned models. To help train models with more complex and compositional skills, we need challenging data, on which a system is successful only if it detects structure and regularities, that will allow it to generalize. In this paper, we describe a French dataset (BLM-AgrF) for learning the underlying rules of subject-verb agreement in sentences, developed in the BLM framework, a new task inspired by visual IQ tests known as Raven’s Progressive Matrices. In this task, an instance consists of sequences of sentences with specific attributes. To predict the correct answer as the next element of the sequence, a model must correctly detect the generative model used to produce the dataset. We provide details and share a dataset built following this methodology. Two exploratory baselines based on commonly used architectures show that despite the simplicity of the phenomenon, it is a complex problem for deep learning systems. @@ -1410,7 +1410,7 @@ Made of Steel? Learning Plausible Materials for Components in the Vehicle Repair Domain AnneroseEichelUniversity of Stuttgart HelenaSchlipfUniversity of Stuttgart - SabineSchulte im WaldeUniversity of Stuttgart + SabineSchulte im WaldeUniversity of Stuttgart 1420-1435 We propose a novel approach to learn domain-specific plausible materials for components in the vehicle repair domain by probing Pretrained Language Models (PLMs) in a cloze task style setting to overcome the lack of annotated datasets. We devise a new method to aggregate salient predictions from a set of cloze query templates and show that domain-adaptation using either a small, high-quality or a customized Wikipedia corpus boosts performance. When exploring resource-lean alternatives, we find a distilled PLM clearly outperforming a classic pattern-based algorithm. Further, given that 98% of our domain-specific components are multiword expressions, we successfully exploit the compositionality assumption as a way to address data sparsity. 2023.eacl-main.104 @@ -1448,14 +1448,14 @@ Selective In-Context Data Augmentation for Intent Detection using Pointwise <fixed-case>V</fixed-case>-Information Yen-TingLinNational Taiwan University - AlexandrosPapangelisAmazon Alexa AI + AlexandrosPapangelisAmazon Alexa AI SeokhwanKimAmazon Alexa AI - SungjinLeeAmazon Alexa AI + SungjinLeeAmazon Alexa AI DevamanyuHazarikaAmazon MahdiNamazifarAmazon Alexa AI DiJinAmazon YangLiuAmazon - DilekHakkani-TurAmazon Alexa AI + DilekHakkani-TurAmazon Alexa AI 1463-1476 This work focuses on in-context data augmentation for intent detection. Having found that augmentation via in-context prompting of large pre-trained language models (PLMs) alone does not improve performance, we introduce a novel approach based on PLMs and pointwise V-information (PVI), a metric that can measure the usefulness of a datapoint for training a model. Our method first fine-tunes a PLM on a small seed of training data and then synthesizes new datapoints - utterances that correspond to given intents. It then employs intent-aware filtering, based on PVI, to remove datapoints that are not helpful to the downstream intent classifier. Our method is thus able to leverage the expressive power of large language models to produce diverse training data. Empirical results demonstrate that our method can produce synthetic training data that achieve state-of-the-art performance on three challenging intent detection datasets under few-shot settings (1.28% absolute improvement in 5-shot and 1.18% absolute in 10-shot, on average) and perform on par with the state-of-the-art in full-shot settings (within 0.01% absolute, on average). 2023.eacl-main.107 @@ -1489,7 +1489,7 @@ A Systematic Search for Compound Semantics in Pretrained <fixed-case>BERT</fixed-case> Architectures FilipMileticUniversity of Stuttgart - SabineSchulte im WaldeUniversity of Stuttgart + SabineSchulte im WaldeUniversity of Stuttgart 1499-1512 To date, transformer-based models such as BERT have been less successful in predicting compositionality of noun compounds than static word embeddings. This is likely related to a suboptimal use of the encoded information, reflecting an incomplete grasp of how the models represent the meanings of complex linguistic structures. This paper investigates variants of semantic knowledge derived from pretrained BERT when predicting the degrees of compositionality for 280 English noun compounds associated with human compositionality ratings. Our performance strongly improves on earlier unsupervised implementations of pretrained BERT and highlights beneficial decisions in data preprocessing, embedding computation, and compositionality estimation. The distinct linguistic roles of heads and modifiers are reflected by differences in BERT-derived representations, with empirical properties such as frequency, productivity, and ambiguity affecting model performance. The most relevant representational information is concentrated in the initial layers of the model architecture. 2023.eacl-main.110 @@ -1512,7 +1512,7 @@ Summarize and Generate to Back-translate: Unsupervised Translation of Programming Languages - Wasi UddinAhmadAWS AI Labs + Wasi UddinAhmadAWS AI Labs SaikatChakrabortyMicrosoft Research BaishakhiRayColumbia University Kai-WeiChangUcla @@ -1554,7 +1554,7 @@ Towards More Efficient Insertion Transformer with Fractional Positional Encoding ZhisongZhangCarnegie Mellon University YizheZhangApple - BillDolanMicrosoft Research + BillDolanMicrosoft Research 1564-1572 Auto-regressive neural sequence models have been shown to be effective across text generation tasks. However, their left-to-right decoding order prevents generation from being parallelized. Insertion Transformer (Stern et al., 2019) is an attractive alternative that allows outputting multiple tokens in a single generation step. Nevertheless, due to the incompatibility between absolute positional encoding and insertion-based generation schemes, it needs to refresh the encoding of every token in the generated partial hypothesis at each step, which could be costly. We design a novel reusable positional encoding scheme for Insertion Transformers called Fractional Positional Encoding (FPE), which allows reusing representations calculated in previous steps. Empirical studies on various text generation tasks demonstrate the effectiveness of FPE, which leads to floating-point operation reduction and latency improvements on batched decoding. 2023.eacl-main.115 @@ -1581,7 +1581,7 @@ PatVergaGoogle Michielde JongUniversity of Southern California JohnWietingUniversity of Illinois; TTI-Chicago; CMU; Google - William W.CohenGoogle AI + William W.CohenGoogle AI 1597-1610 Existing state-of-the-art methods for open-domain question-answering (ODQA) use an open book approach in which information is first retrieved from a large text corpus or knowledge base (KB) and then reasoned over to produce an answer. A recent alternative is to retrieve from a collection of previously-generated question-answer pairs; this has several practical advantages including being more memory and compute-efficient. Question-answer pairs are also appealing in that they can be viewed as an intermediate between text and KB triples: like KB triples, they often concisely express a single relationship, but like text, have much higher coverage than traditional KBs. In this work, we describe a new QA system that augments a text-to-text model with a large memory of question-answer pairs, and a new pre-training task for the latent step of question retrieval. The pre-training task substantially simplifies training and greatly improves performance on smaller QA benchmarks. Unlike prior systems of this sort, our QA system can also answer multi-hop questions that do not explicitly appear in the collection of stored question-answer pairs. 2023.eacl-main.117 @@ -1593,7 +1593,7 @@ Gold Doesn’t Always Glitter: Spectral Removal of Linear and Nonlinear Guarded Attribute Information ShunShaoUniversity of Edinburgh YftahZiserUniversity of Edinburgh - Shay B.CohenUniversity of Edinburgh + Shay B.CohenUniversity of Edinburgh 1611-1622 We describe a simple and effective method (Spectral Attribute removaL; SAL) to remove private or guarded information from neural representations. Our method uses matrix decomposition to project the input representations into directions with reduced covariance with the guarded information rather than maximal covariance as factorization methods normally use. We begin with linear information removal and proceed to generalize our algorithm to the case of nonlinear information removal using kernels. Our experiments demonstrate that our algorithm retains better main task performance after removing the guarded information compared to previous work. In addition, our experiments demonstrate that we need a relatively small amount of guarded attribute data to remove information about these attributes, which lowers the exposure to sensitive data and is more suitable for low-resource scenarios. 2023.eacl-main.118 @@ -1610,7 +1610,7 @@ YosukeHiguchiWaseda University GrahamNeubigCarnegie Mellon University FlorianMetzeCarnegie Mellon University - Alan WBlackCarnegie Mellon University + Alan WBlackCarnegie Mellon University ShinjiWatanabeCarnegie Mellon University 1623-1639 Connectionist Temporal Classification (CTC) is a widely used approach for automatic speech recognition (ASR) that performs conditionally independent monotonic alignment. However for translation, CTC exhibits clear limitations due to the contextual and non-monotonic nature of the task and thus lags behind attentional decoder approaches in terms of translation quality. In this work, we argue that CTC does in fact make sense for translation if applied in a joint CTC/attention framework wherein CTC’s core properties can counteract several key weaknesses of pure-attention models during training and decoding. To validate this conjecture, we modify the Hybrid CTC/Attention model originally proposed for ASR to support text-to-text translation (MT) and speech-to-text translation (ST). Our proposed joint CTC/attention models outperform pure-attention baselines across six benchmark translation tasks. @@ -1653,7 +1653,7 @@ Cluster-Guided Label Generation in Extreme Multi-Label Classification TaeheeJungAmazon Alexa AI Joo-kyungKimAmazon Alexa AI - SungjinLeeAmazon Alexa AI + SungjinLeeAmazon Alexa AI DongyeopKangUniversity of Minnesota 1670-1685 For extreme multi-label classification (XMC), existing classification-based models poorly per- form for tail labels and often ignore the semantic relations among labels, like treating”Wikipedia” and “Wiki” as independent and separate labels. In this paper, we cast XMC as a generation task (XLGen), where we benefit from pre-trained text-to-text models. However, generating labels from the extremely large label space is challenging without any constraints or guidance. We, therefore, propose to guide label generation using label cluster information to hierarchically generate lower-level labels. We also find that frequency-based label ordering and using decoding ensemble methods are critical factors for the improvements in XLGen. XLGen with cluster guidance significantly outperforms the classification and generation baselines on tail labels, and also generally improves the overall performance in four popular XMC benchmarks. In human evaluation, we also find XLGen generates unseen but plausible labels. Our code is now available at https://github.com/alexa/xlgen-eacl-2023. @@ -1667,7 +1667,7 @@ AndrewLeeUniversity of Michigan Jonathan K.KummerfeldUniversity of Sydney LarryAnUniversity of Michigan - RadaMihalceaUniversity of Michigan + RadaMihalceaUniversity of Michigan 1686-1695 Understanding empathy in text dialogue data is a difficult, yet critical, skill for effective human-machine interaction. In this work, we ask whether systems are making meaningful progress on this challenge. We consider a simple model that checks if an input utterance is similar to a small set of empathetic examples. Crucially, the model does not look at what the utterance is a response to, i.e., the dialogue context. This model performs comparably to other work on standard benchmarks and even outperforms state-of-the-art models for empathetic rationale extraction by 16.7 points on T-F1 and 4.3 on IOU-F1. This indicates that current systems rely on the surface form of the response, rather than whether it is suitable in context. To confirm this, we create examples with dialogue contexts that change the interpretation of the response and show that current systems continue to label utterances as empathetic. We discuss the implications of our findings, including improvements for empathetic benchmarks and how our model can be an informative baseline. 2023.eacl-main.123 @@ -1696,7 +1696,7 @@ What happens before and after: Multi-Event Commonsense in Event Coreference Resolution SahithyaRaviThe University of British Columbia, Vancouver ChrisTannerMIT and Kensho Technologies - RaymondNgUniv British Columbia + RaymondNgUniv British Columbia VeredShwartzUniversity of British Columbia 1708-1724 Event coreference models cluster event mentions pertaining to the same real-world event. Recent models rely on contextualized representations to recognize coreference among lexically or contextually similar mentions. However, models typically fail to leverage commonsense inferences, which is particularly limiting for resolving lexically-divergent mentions. We propose a model that extends event mentions with temporal commonsense inferences. Given a complex sentence with multiple events, e.g., “the man killed his wife and got arrested”, with the target event “arrested”, our model generates plausible events that happen before the target event – such as “the police arrived”, and after it, such as “he was sentenced”. We show that incorporating such inferences into an existing event coreference model improves its performance, and we analyze the coreferences in which such temporal knowledge is required. @@ -1720,7 +1720,7 @@ <fixed-case>C</fixed-case>yl<fixed-case>E</fixed-case>: Cylinder Embeddings for Multi-hop Reasoning over Knowledge Graphs Chau Duc MinhNguyenThe University of Western Australia TimFrenchThe University of Western Australia - WeiLiuThe University of Western Austarlia + WeiLiuThe University of Western Austarlia MichaelStewartThe University of Western Australia 1736-1751 Recent geometric-based approaches have been shown to efficiently model complex logical queries (including the intersection operation) over Knowledge Graphs based on the natural representation of Venn diagram. Existing geometric-based models (using points, boxes embeddings), however, cannot handle the logical negation operation. Further, those using cones embeddings are limited to representing queries by two-dimensional shapes, which reduced their effectiveness in capturing entities query relations for correct answers. To overcome this challenge, we propose unbounded cylinder embeddings (namely CylE), which is a novel geometric-based model based on three-dimensional shapes. Our approach can handle a complete set of basic first-order logic operations (conjunctions, disjunctions and negations). CylE considers queries as Cartesian products of unbounded sector-cylinders and consider a set of nearest boxes corresponds to the set of answer entities. Precisely, the conjunctions can be represented via the intersections of unbounded sector-cylinders. Transforming queries to Disjunctive Normal Form can handle queries with disjunctions. The negations can be represented by considering the closure of complement for an arbitrary unbounded sector-cylinder. Empirical results show that the performance of multi-hop reasoning task using CylE significantly increases over state-of-the-art geometric-based query embedding models for queries without negation. For queries with negation operations, though the performance is on a par with the best performing geometric-based model, CylE significantly outperforms a recent distribution-based model. @@ -1750,7 +1750,7 @@ YuChengMicrosoft Research MiladShokouhiMicrosoft XiaHuRice University - Ahmed HassanAwadallahMicrosoft Research + Ahmed HassanAwadallahMicrosoft Research 1766-1778 Recent work has focused on compressing pre-trained language models (PLMs) like BERT where the major focus has been to improve the in-distribution performance for downstream tasks. However, very few of these studies have analyzed the impact of compression on the generalizability and robustness of compressed models for out-of-distribution (OOD) data. Towards this end, we study two popular model compression techniques including knowledge distillation and pruning and show that the compressed models are significantly less robust than their PLM counterparts on OOD test sets although they obtain similar performance on in-distribution development sets for a task. Further analysis indicates that the compressed models overfit on the shortcut samples and generalize poorly on the hard ones. We further leverage this observation to develop a regularization strategy for robust model compression based on sample uncertainty. 2023.eacl-main.129 @@ -1775,8 +1775,8 @@ Performance Prediction via <fixed-case>B</fixed-case>ayesian Matrix Factorisation for Multilingual Natural Language Processing Tasks ViktoriaSchramUniversity of Melbourne - DanielBeckUniversity of Melbourne - TrevorCohnUniversity of Melbourne + DanielBeckUniversity of Melbourne + TrevorCohnUniversity of Melbourne 1790-1801 Performance prediction for Natural Language Processing (NLP) seeks to reduce the experimental burden resulting from the myriad of different evaluation scenarios, e.g., the combination of languages used in multilingual transfer. In this work, we explore the framework ofBayesian matrix factorisation for performance prediction, as many experimental settings in NLP can be naturally represented in matrix format. Our approach outperforms the state-of-the-art in several NLP benchmarks, including machine translation and cross-lingual entity linking. Furthermore, it also avoids hyperparameter tuning and is able to provide uncertainty estimates over predictions. 2023.eacl-main.131 @@ -1802,7 +1802,7 @@ Don’t Mess with Mister-in-Between: Improved Negative Search for Knowledge Graph Completion FanJiangThe University of Melbourne TomDrummondUniversity of Melbourne - TrevorCohnUniversity of Melbourne + TrevorCohnUniversity of Melbourne 1818-1832 The best methods for knowledge graph completion use a ‘dual-encoding’ framework, a form of neural model with a bottleneck that facilitates fast approximate search over a vast collection of candidates. These approaches are trained using contrastive learning to differentiate between known positive examples and sampled negative instances. The mechanism for sampling negatives to date has been very simple, driven by pragmatic engineering considerations (e.g., using mismatched instances from the same batch). We propose several novel means of finding more informative negatives, based on searching for candidates with high lexical overlaps, from the dual-encoder model and according to knowledge graph structures. Experimental results on four benchmarks show that our best single model improves consistently over previous methods and obtains new state-of-the-art performance, including the challenging large-scale Wikidata5M dataset. Combing different kinds of strategies through model ensembling results in a further performance boost. 2023.eacl-main.133 @@ -1946,7 +1946,7 @@ Task and Sentiment Adaptation for Appraisal Tagging LinTianRMIT University - XiuzhenZhangRMIT University + XiuzhenZhangRMIT University Myung HeeKimDefence Science Technology Group JenniferBiggsDefence Science and Technology Group 1960-1970 @@ -1960,7 +1960,7 @@ <fixed-case>DREEAM</fixed-case>: Guiding Attention with Evidence for Improving Document-Level Relation Extraction YoumiMaTokyo Institute of Technology AnWangTokyo Institute of Technology - NaoakiOkazakiTokyo Institute of Technology + NaoakiOkazakiTokyo Institute of Technology 1971-1983 Document-level relation extraction (DocRE) is the task of identifying all relations between each entity pair in a document. Evidence, defined as sentences containing clues for the relationship between an entity pair, has been shown to help DocRE systems focus on relevant texts, thus improving relation extraction. However, evidence retrieval (ER) in DocRE faces two major issues: high memory consumption and limited availability of annotations. This work aims at addressing these issues to improve the usage of ER in DocRE. First, we propose DREEAM, a memory-efficient approach that adopts evidence information as the supervisory signal, thereby guiding the attention modules of the DocRE system to assign high weights to evidence. Second, we propose a self-training strategy for DREEAM to learn ER from automatically-generated evidence on massive data without evidence annotations. Experimental results reveal that our approach exhibits state-of-the-art performance on the DocRED benchmark for both DocRE and ER. To the best of our knowledge, DREEAM is the first approach to employ ER self-training. 2023.eacl-main.145 @@ -2026,7 +2026,7 @@ ApoorvSaxenaAdobe Research ChitrankGuptaIIT Bombay, UT Austin MehranKazemiGoogle Research - ParthaTalukdarGoogle Research and IISc + ParthaTalukdarGoogle Research and IISc SoumenChakrabartiIIT Bombay 2049-2060 Recent years have witnessed interest in Temporal Question Answering over Knowledge Graphs (TKGQA), resulting in the development of multiple methods. However, these are highly engineered, thereby limiting their generalizability, and they do not automatically discover relevant parts of the KG during multi-hop reasoning. Relational graph convolutional networks (RGCN) provide an opportunity to address both of these challenges – we explore this direction in the paper. Specifically, we propose a novel, intuitive and interpretable scheme to modulate the messages passed through a KG edge during convolution based on the relevance of its associated period to the question. We also introduce a gating device to predict if the answer to a complex temporal question is likely to be a KG entity or time and use this prediction to guide our scoring mechanism. We evaluate the resulting system, which we call TwiRGCN, on a recent challenging dataset for multi-hop complex temporal QA called TimeQuestions. We show that TwiRGCN significantly outperforms state-of-the-art models on this dataset across diverse question types. Interestingly, TwiRGCN improves accuracy by 9–10 percentage points for the most difficult ordinal and implicit question types. @@ -2052,7 +2052,7 @@ <fixed-case>GLADIS</fixed-case>: A General and Large Acronym Disambiguation Benchmark LihuChenTelecom Paris & Institut Polytechnique de Paris GaelVaroquauxInria - Fabian M.SuchanekTelecom Paris + Fabian M.SuchanekTelecom Paris 2073-2088 2023.eacl-main.152 chen-etal-2023-gladis @@ -2118,7 +2118,7 @@ AtharvaKulkarniCarnegie Mellon University TharunSureshIndraprastha Institute of Information Technology - Delhi HimanshiMathurIiitd - PreslavNakovMohamed bin Zayed University of Artificial Intelligence + PreslavNakovMohamed bin Zayed University of Artificial Intelligence Md. ShadAkhtarIndraprastha Institute of Information Technology, Delhi TanmoyChakrabortyIIT Delhi 2149-2163 @@ -2170,9 +2170,9 @@ Towards a Unified Multi-Domain Multilingual Named Entity Recognition Model MayankKulkarniAmazon - DanielPreotiuc-PietroBloomberg + DanielPreotiuc-PietroBloomberg KarthikRadhakrishnanBloomberg LP - Genta IndraWinataBloomberg + Genta IndraWinataBloomberg ShijieWuBloomberg LingjueXieBloomberg ShaohuaYangBloomberg @@ -2209,7 +2209,7 @@ Measuring Normative and Descriptive Biases in Language Models Using Census Data SamiaTouilebUniversity of Bergen - LiljaØvrelidDept of Informatics, University of Oslo + LiljaØvrelidDept of Informatics, University of Oslo ErikVelldalUniversity of Oslo 2242-2248 We investigate in this paper how distributions of occupations with respect to gender is reflected in pre-trained language models. Such distributions are not always aligned to normative ideals, nor do they necessarily reflect a descriptive assessment of reality. In this paper, we introduce an approach for measuring to what degree pre-trained language models are aligned to normative and descriptive occupational distributions. To this end, we use official demographic information about gender–occupation distributions provided by the national statistics agencies of France, Norway, United Kingdom, and the United States. We manually generate template-based sentences combining gendered pronouns and nouns with occupations, and subsequently probe a selection of ten language models covering the English, French, and Norwegian languages. The scoring system we introduce in this work is language independent, and can be used on any combination of template-based sentences, occupations, and languages. The approach could also be extended to other dimensions of national census data and other demographic variables. @@ -2287,7 +2287,7 @@ NicolasHiebelUniversité Paris Saclay, CNRS, LISN OlivierFerretCEA List KarenFortSorbonne Universite and LORIA - AurélieNévéolUniversité Paris Saclay, CNRS, LISN + AurélieNévéolUniversité Paris Saclay, CNRS, LISN 2320-2338 In sensitive domains, the sharing of corpora is restricted due to confidentiality, copyrights or trade secrets. Automatic text generation can help alleviate these issues by producing synthetic texts that mimic the linguistic properties of real documents while preserving confidentiality. In this study, we assess the usability of synthetic corpus as a substitute training corpus for clinical information extraction. Our goal is to automatically produce a clinical case corpus annotated with clinical entities and to evaluate it for a named entity recognition (NER) task. We use two auto-regressive neural models partially or fully trained on generic French texts and fine-tuned on clinical cases to produce a corpus of synthetic clinical cases. We study variants of the generation process: (i) fine-tuning on annotated vs. plain text (in that case, annotations are obtained a posteriori) and (ii) selection of generated texts based on models parameters and filtering criteria. We then train NER models with the resulting synthetic text and evaluate them on a gold standard clinical corpus. Our experiments suggest that synthetic text is useful for clinical NER. 2023.eacl-main.170 @@ -2314,7 +2314,7 @@ MarcoCognettaTokyo Institute of Technology SangwhanMoonTokyo Institute of Technology LawrenceWolf-sonkinGoogle Research - NaoakiOkazakiTokyo Institute of Technology + NaoakiOkazakiTokyo Institute of Technology 2350-2356 Character-level language modeling has been shown empirically to perform well on highly agglutinative or morphologically rich languages while using only a small fraction of the parameters required by (sub)word models. Korean fits nicely into this framework, except that, like other CJK languages, it has a very large character vocabulary of 11,172 unique syllables. However, unlike Japanese Kanji and Chinese Hanzi, each Korean syllable can be uniquely factored into a small set of subcharacters, called jamo. We explore a “three-hot” scheme, where we exploit the decomposability of Korean characters to model at the syllable level but using only jamo-level representations. We find that our three-hot embedding and decoding scheme alleviates the two major issues with prior syllable- and jamo-level models. Namely, it requires fewer than 1% of the embedding parameters of a syllable model, and it does not require tripling the sequence length, as with jamo models. In addition, it addresses a theoretical flaw in a prior three-hot modeling scheme. Our experiments show that, even when reducing the number of embedding parameters by 99.6% (from 11.4M to just 36k), our model suffers no loss in translation quality compared to the baseline syllable model. 2023.eacl-main.172 @@ -2420,7 +2420,7 @@ BidishaSamantaGoogle ShachiDaveGoogle Research SunitaSarawagiIIT Bombay - ParthaTalukdarGoogle Research and IISc + ParthaTalukdarGoogle Research and IISc 2455-2467 Despite cross-lingual generalization demonstrated by pre-trained multilingual models, the translate-train paradigm of transferring English datasets across multiple languages remains to be a key mechanism for training task-specific multilingual models. However, for many low-resource languages, the availability of a reliable translation service entails significant amounts of costly human-annotated translation pairs. Further, translation services may continue to be brittle due to domain mismatch between task-specific input text and general-purpose text used for training translation models. For multilingual semantic parsing, we demonstrate the effectiveness and flexibility offered by large language models (LLMs) for translating English datasets into several languages via few-shot prompting. Through extensive comparisons on two public datasets, MTOP and MASSIVE, spanning 50 languages and several domains, we show that our method of translating data using LLMs outperforms a strong translate-train baseline on 41 out of 50 languages. We study the key design choices that enable more effective multilingual data translation via prompted LLMs. 2023.eacl-main.180 @@ -2432,7 +2432,7 @@ Modeling Complex Event Scenarios via Simple Entity-focused Questions MahnazKoupaeeStony Brook University GregDurrettUT Austin - NathanaelChambersUS Naval Academy + NathanaelChambersUS Naval Academy NiranjanBalasubramanianStony Brook University 2468-2483 Event scenarios are often complex and involve multiple event sequences connected through different entity participants. Exploring such complex scenarios requires an ability to branch through different sequences, something that is difficult to achieve with standard event language modeling. To address this, we propose a question-guided generation framework that models events in complex scenarios as answers to questions about participants. At any step in the generation process, the framework uses the previously-generated events as context, but generates the next event as an answer to one of three questions: what else a participant did, what else happened to a participant, or what else happened. The participants and the questions themselves can be sampled or be provided as input from a user, allowing for controllable exploration. Our empirical evaluation shows that this question-guided generation provides better coverage of participants, diverse events within a domain, comparable perplexities for modeling event sequences, and more effective control for interactive schema generation. @@ -2513,7 +2513,7 @@ Towards preserving word order importance through Forced Invalidation HadeelAl-NegheimishImperial College London - PranavaMadhyasthaCity, University of London + PranavaMadhyasthaCity, University of London AlessandraRussoImperial College London 2563-2570 Large pre-trained language models such as BERT have been widely used as a framework for natural language understanding (NLU) tasks. However, recent findings have revealed that pre-trained language models are insensitive to word order. The performance on NLU tasks remains unchanged even after randomly permuting the word of a sentence, where crucial syntactic information is destroyed. To help preserve the importance of word order, we propose a simple approach called Forced Invalidation (FI): forcing the model to identify permuted sequences as invalid samples. We perform an extensive evaluation of our approach on various English NLU and QA based tasks over BERT-based and attention-based models over word embeddings. Our experiments demonstrate that FI significantly improves the sensitivity of the models to word order. @@ -2526,7 +2526,7 @@ How Many and Which Training Points Would Need to be Removed to Flip this Prediction? JinghanYangThe University of Hong Kong SarthakJainAWS AI Labs - Byron C.WallaceNortheastern University + Byron C.WallaceNortheastern University 2571-2584 2023.eacl-main.188 2023.eacl-main.188.dataset.zip @@ -2548,7 +2548,7 @@ Detecting Lexical Borrowings from Dominant Languages in Multilingual Wordlists - John E.MillerPUCP: Pontificia Universidad Catolica del Peru + John E.MillerPUCP: Pontificia Universidad Catolica del Peru Johann-MattisListMax Planck Institute for Evolutionary Anthropology 2599-2605 Language contact is a pervasive phenomenon reflected in the borrowing of words from donor to recipient languages. Most computational approaches to borrowing detection treat all languages under study as equally important, even though dominant languages have a stronger impact on heritage languages than vice versa. We test new methods for lexical borrowing detection in contact situations where dominant languages play an important role, applying two classical sequence comparison methods and one machine learning method to a sample of seven Latin American languages which have all borrowed extensively from Spanish. All systems perform well, with the supervised machine learning system outperforming the classical systems. A review of detection errors shows that borrowing detection could be substantially improved by taking into account donor words with divergent meanings from recipient words. @@ -2574,7 +2574,7 @@ EmilyAllawayColumbia University Jena D.HwangAllen Institute for AI ChandraBhagavatulaAllen Institute for AI - KathleenMcKeownColumbia University and Amazon (Amazon Scholar) + KathleenMcKeownColumbia University and Amazon (Amazon Scholar) DougDowneyAllen Institute for AI, Northwestern University YejinChoiUniversity of Washington 2618-2635 @@ -2606,7 +2606,7 @@ QimingBaoThe University of Auckland YangChenUniversity of Auckland MarkGaheganUniversity of Auckland - MichaelWitbrockUniversity of Auckland + MichaelWitbrockUniversity of Auckland 2652-2664 Training machine learning models to successfully perform scientific fact-checking tasks is challenging due to the expertise bottleneck that limits the availability of appropriate training datasets. In this task, models use textual evidence to confirm scientific claims, which requires data that contains extensive domain-expert annotation. Consequently, the number of existing scientific-fact-checking datasets and the sizes of those datasets are limited. However, these limitations do not apply to multiple-choice question datasets because of the necessity of domain exams in the modern education system. As one of the first steps towards addressing the fact-checking dataset scarcity problem in scientific domains, we propose a pipeline for automatically converting multiple-choice questions into fact-checking data, which we call Multi2Claim. By applying the proposed pipeline, we generated two large-scale datasets for scientific-fact-checking tasks: Med-Fact and Gsci-Fact for the medical and general science domains, respectively. These two datasets are among the first examples of large-scale scientific-fact-checking datasets. We developed baseline models for the verdict prediction task using each dataset. Additionally, we demonstrated that the datasets could be used to improve performance with respect to the F 1 weighted metric on existing fact-checking datasets such as SciFact, HEALTHVER, COVID-Fact, and CLIMATE-FEVER. In some cases, the improvement in performance was up to a 26% increase. 2023.eacl-main.194 @@ -2668,7 +2668,7 @@ Methods for Measuring, Updating, and Visualizing Factual Beliefs in Language Models PeterHaseUniversity of North Carolina at Chapel Hill - MonaDiabMeta Responsible AI + MonaDiabMeta Responsible AI AsliCelikyilmazFAIR @ Meta XianLiMeta AI ZornitsaKozarevaMeta AI @@ -2737,7 +2737,7 @@ Behavior Cloned Transformers are Neurosymbolic Reasoners RuoyaoWangUniversity of Arizona - PeterJansenUniversity of Arizona + PeterJansenUniversity of Arizona Marc-AlexandreCôtéMicrosoft Research PrithvirajAmmanabroluAllen Institute for AI 2777-2788 @@ -2792,7 +2792,7 @@ Shirley AnugrahHayatiUniversity of Minnesota KyuminParkKorea Advanced Institute of Science and Technology DheerajRajagopalGoogle Inc - LyleUngarUniversity of Pennsylvania + LyleUngarUniversity of Pennsylvania DongyeopKangUniversity of Minnesota 2843-2856 Large pre-trained language models have achieved impressive results on various style classification tasks, but they often learn spurious domain-specific words to make predictions (Hayati et al., 2021). While human explanation highlights stylistic tokens as important features for this task, we observe that model explanations often do not align with them. To tackle this issue, we introduce StyLEx, a model that learns from human annotated explanations of stylistic features and jointly learns to perform the task and predict these features as model explanations. Our experiments show that StyLEx can provide human like stylistic lexical explanations without sacrificing the performance of sentence-level style prediction on both in-domain and out-of-domain datasets. Explanations from StyLEx show significant improvements in explanation metrics (sufficiency, plausibility) and when evaluated with human annotations. They are also more understandable by human judges compared to the widely-used saliency-based explanation baseline. @@ -2805,7 +2805,7 @@ Comparing Intrinsic Gender Bias Evaluation Measures without using Human Annotated Examples MasahiroKanekoTokyo Institute of Technology DanushkaBollegalaUniversity of Liverpool/Amazon - NaoakiOkazakiTokyo Institute of Technology + NaoakiOkazakiTokyo Institute of Technology 2857-2863 Numerous types of social biases have been identified in pre-trained language models (PLMs), and various intrinsic bias evaluation measures have been proposed for quantifying those social biases. Prior works have relied on human annotated examples to compare existing intrinsic bias evaluation measures. However, this approach is not easily adaptable to different languages nor amenable to large scale evaluations due to the costs and difficulties when recruiting human annotators. To overcome this limitation, we propose a method to compare intrinsic gender bias evaluation measures without relying on human-annotated examples. Specifically, we create multiple bias-controlled versions of PLMs using varying amounts of male vs. female gendered sentences, mined automatically from an unannotated corpus using gender-related word lists. Next, each bias-controlled PLM is evaluated using an intrinsic bias evaluation measure, and the rank correlation between the computed bias scores and the gender proportions used to fine-tune the PLMs is computed. Experiments on multiple corpora and PLMs repeatedly show that the correlations reported by our proposed method that does not require human annotated examples are comparable to those computed using human annotated examples in prior work. 2023.eacl-main.209 @@ -2817,7 +2817,7 @@ Faithfulness-Aware Decoding Strategies for Abstractive Summarization DavidWanUniversity of North Carolina at Chapel Hill MengwenLiuAmazon - KathleenMcKeownColumbia University and Amazon (Amazon Scholar) + KathleenMcKeownColumbia University and Amazon (Amazon Scholar) MarkusDreyerAmazon.com MohitBansalUniversity of North Carolina at Chapel Hill 2864-2880 @@ -2943,11 +2943,11 @@ Dong-HoLeeUniversity of Southern California Ravi KiranSelvamUniversity of Southern California Sheikh MuhammadSarwarAmazon.com - Bill YuchenLinAllen Institute for AI + Bill YuchenLinAllen Institute for AI FredMorstatterUSC Information Sciences Institute JayPujaraUniversity of Southern California ElizabethBoscheeInformation Sciences Institute - JamesAllanUniversity of Massachusetts Amherst + JamesAllanUniversity of Massachusetts Amherst XiangRenUniversity of Southern California 3011-3025 Deep neural models for named entity recognition (NER) have shown impressive results in overcoming label scarcity and generalizing to unseen entities by leveraging distant supervision and auxiliary information such as explanations. However, the costs of acquiring such additional information are generally prohibitive. In this paper, we present a novel two-stage framework (AutoTriggER) to improve NER performance by automatically generating and leveraging “entity triggers” which are human-readable cues in the text that help guide the model to make better decisions. Our framework leverages post-hoc explanation to generate rationales and strengthens a model’s prior knowledge using an embedding interpolation technique. This approach allows models to exploit triggers to infer entity boundaries and types instead of solely memorizing the entity words themselves. Through experiments on three well-studied NER datasets, AutoTriggER shows strong label-efficiency, is capable of generalizing to unseen entities, and outperforms the RoBERTa-CRF baseline by nearly 0.5 F1 points on average. @@ -2960,7 +2960,7 @@ Incorporating Task-Specific Concept Knowledge into Script Learning ChenkaiSunUniversity of Illinois at Urbana-Champaign TieXuAlibaba - ChengXiangZhaiUniversity of Illinois at Urbana-Champaign + ChengXiangZhaiUniversity of Illinois at Urbana-Champaign HengJiUniversity of Illinois at Urbana-Champaign and Amazon (Amazon Scholar) 3026-3040 In this paper, we present Tetris, a new task of Goal-Oriented Script Completion. Unlike previous work, it considers a more realistic and general setting, where the input includes not only the goal but also additional user context, including preferences and history. To address this problem, we propose a novel approach, which uses two techniques to improve performance: (1) concept prompting, and (2) script-oriented contrastive learning that addresses step repetition and hallucination problems. On our WikiHow-based dataset, we find that both methods improve performance. @@ -2977,7 +2977,7 @@ AliKebarighotbiAmazon MohitBansalUniversity of North Carolina at Chapel Hill HengJiUniversity of Illinois at Urbana-Champaign and Amazon (Amazon Scholar) - PremNatarajanAmazon + PremNatarajanAmazon 3041-3051 Long video content understanding poses a challenging set of research questions as it involves long-distance, cross-media reasoning and knowledge awareness. In this paper, we present a new benchmark for this problem domain, targeting the task of deep movie/TV question answering (QA) beyond previous work’s focus on simple plot summary and short video moment settings. We define several baselines based on direct retrieval of relevant context for long-distance movie QA. Observing that real-world QAs may require higher-order multi-hop inferences, we further propose a novel framework, called the DeepMaven, which extracts events, entities, and relations from the rich multimedia content in long videos to pre-construct movie knowledge graphs (movieKGs), and at the time of QA inference, complements general semantics with structured knowledge for more effective information retrieval and knowledge reasoning. We also introduce our recently collected DeepMovieQA dataset, including 1,000 long-form QA pairs from 41 hours of videos, to serve as a new and useful resource for future work. Empirical results show the DeepMaven performs competitively for both the new DeepMovieQA and the pre-existing MovieQA dataset. 2023.eacl-main.221 @@ -2990,7 +2990,7 @@ Jeremy R.ColeGoogle Research AditiChaudharyGoogle Research BhuwanDhingraDuke University - ParthaTalukdarGoogle Research and IISc + ParthaTalukdarGoogle Research and IISc 3052-3060 Salient Span Masking (SSM) has shown itself to be an effective strategy to improve closed-book question answering performance. SSM extends general masked language model pretraining by creating additional unsupervised training sentences that mask a single entity or date span, thus oversampling factual information. Despite the success of this paradigm, the span types and sampling strategies are relatively arbitrary and not widely studied for other tasks. Thus, we investigate SSM from the perspective of temporal tasks, where learning a good representation of various temporal expressions is important. To that end, we introduce Temporal Span Masking (TSM) intermediate training. First, we find that SSM alone improves the downstream performance on three temporal tasks by an avg. +5.8 points. Further, we are able to achieve additional improvements (avg. +0.29 points) by adding the TSM task. These comprise the new best reported results on the targeted tasks. Our analysis suggests that the effectiveness of SSM stems from the sentences chosen in the training data rather than the mask choice: sentences with entities frequently also contain temporal expressions. Nonetheless, the additional targeted spans of TSM can still improve performance, especially in a zero-shot context. 2023.eacl-main.222 @@ -3052,7 +3052,7 @@ Why Can’t Discourse Parsing Generalize? A Thorough Investigation of the Impact of Data Diversity - Yang JanetLiuGeorgetown University + Yang JanetLiuGeorgetown University AmirZeldesGeorgetown University 3112-3130 Recent advances in discourse parsing performance create the impression that, as in other NLP tasks, performance for high-resource languages such as English is finally becoming reliable. In this paper we demonstrate that this is not the case, and thoroughly investigate the impact of data diversity on RST parsing stability. We show that state-of-the-art architectures trained on the standard English newswire benchmark do not generalize well, even within the news domain. Using the two largest RST corpora of English with text from multiple genres, we quantify the impact of genre diversity in training data for achieving generalization to text types unseen during training. Our results show that a heterogeneous training regime is critical for stable and generalizable models, across parser architectures. We also provide error analyses of model outputs and out-of-domain performance. To our knowledge, this study is the first to fully evaluate cross-corpus RST parsing generalizability on complete trees, examine between-genre degradation within an RST corpus, and investigate the impact of genre diversity in training data composition. @@ -3069,7 +3069,7 @@ Trieu H.TrinhNew York University VyPhanUniversity of Massachusetts - Amherst Lam D.ChauDepartment of Biochemistry, Case Western Reserve University - Minh-ThangLuongVietAI Research + Minh-ThangLuongVietAI Research 3131-3142 Biomedical data and benchmarks are highly valuable yet very limited in low-resource languages other than English, such as Vietnamese. In this paper, we use a state-of-the-art translation model in English-Vietnamese to translate and produce both pretrained and supervised data in the biomedical domains. Thanks to such large-scale translation, we introduce ViPubmedT5, a pretrained Encoder-Decoder Transformer model trained on 20 million translated abstracts from the high-quality public PubMed corpus. ViPubMedT5 demonstrates state-of-the-art results on two different biomedical benchmarks in summarization and acronym disambiguation. Further, we release ViMedNLI - a new NLP task in Vietnamese translated from MedNLI using the recently public En-vi translation model and carefully refined by human experts, with evaluations of existing methods against ViPubmedT5. 2023.eacl-main.228 @@ -3147,9 +3147,9 @@ EsinDurmusStanford University MiracSuzgunStanford University TianyiZhangStanford University - DanJurafskyStanford University - KathleenMcKeownColumbia University and Amazon (Amazon Scholar) - TatsunoriHashimotoStanford + DanJurafskyStanford University + KathleenMcKeownColumbia University and Amazon (Amazon Scholar) + TatsunoriHashimotoStanford 3206-3219 Large language models (LLMs) are subject to sociocultural and other biases previously identified using intrinsic evaluations. However, when and how these intrinsic biases in pre-trained LM representations propagate to downstream, fine-tuned NLP tasks like summarization is not well understood. In this work, we investigate one type of bias—name-nationality bias—and trace it from the pre-training stage to a downstream summarization task across multiple summarization modeling choices. We show that these biases manifest themselves as hallucinations in summarization, leading to factually incorrect summaries. We also find that this propagation of biases is algorithm-dependent: more abstractive models allow biases to propagate more directly to downstream tasks as hallucinated facts. Building on these observations, we further analyze how changes to the adaptation method and fine-tuning data set affect name nationality biases and show that while they can reduce the overall rate of hallucinations, they do not change the types of biases that do appear. 2023.eacl-main.234 @@ -3187,7 +3187,7 @@ JayGalaAI4Bharat DeepGandhiUniversity of Alberta JashMehtaGeorgia Institute of Technology - ZeerakTalatSimon Fraser University + ZeerakTalatSimon Fraser University 3248-3259 Hate speech detection has been the subject of high research attention, due to the scale of content created on social media. In spite of the attention and the sensitive nature of the task, privacy preservation in hate speech detection has remained under-studied. The majority of research has focused on centralised machine learning infrastructures which risk leaking data. In this paper, we show that using federated machine learning can help address privacy the concerns that are inherent to hate speech detection while obtaining up to 6.81% improvement in terms of F1-score. 2023.eacl-main.237 @@ -3286,7 +3286,7 @@ Quantifying Context Mixing in Transformers HoseinMohebbiTilburg University WillemZuidemaUniversity of Amsterdam - GrzegorzChrupałaTilburg University + GrzegorzChrupałaTilburg University AfraAlishahiTilburg University 3378-3400 Self-attention weights and their transformed variants have been the main source of information for analyzing token-to-token interactions in Transformer-based models. But despite their ease of interpretation, these weights are not faithful to the models’ decisions as they are only one part of an encoder, and other components in the encoder layer can have considerable impact on information mixing in the output representations. In this work, by expanding the scope of analysis to the whole encoder block, we propose Value Zeroing, a novel context mixing score customized for Transformers that provides us with a deeper understanding of how information is mixed at each encoder layer. We demonstrate the superiority of our context mixing score over other analysis methods through a series of complementary evaluations with different viewpoints based on linguistically informed rationales, probing, and faithfulness analysis. @@ -3299,7 +3299,7 @@ <fixed-case>KGVL</fixed-case>-<fixed-case>BART</fixed-case>: Knowledge Graph Augmented Visual Language <fixed-case>BART</fixed-case> for Radiology Report Generation KaveriKaleIndian Institute of Technology Bombay - PushpakBhattacharyyaIndian Institute of Technology Bombay and Patna + PushpakBhattacharyyaIndian Institute of Technology Bombay and Patna MilindGuneConsultant Radiologist, Thane, India AdityaShettyConsultant Radiologist, Breach Candy Hospital, Mumbai, India RustomLawyerAugnito India Pvt Ltd @@ -3314,7 +3314,7 @@ A simple but effective model for attachment in discourse parsing with multi-task learning for relation labeling ZinebBennisInstitut de Recheche en Informatique de Toulouse JulieHunterLinagora - NicholasAsherCNRS Institut de Recherche en Informatique de Toulouse + NicholasAsherCNRS Institut de Recherche en Informatique de Toulouse 3412-3417 In this paper, we present a discourse parsing model for conversation trained on the STAC. We fine-tune a BERT-based model to encode pairs of discourse units and use a simple linear layer to predict discourse attachments. We then exploit a multi-task setting to predict relation labels. The multitask approach effectively aids in the difficult task of relation type prediction; our f1 score of 57 surpasses the state of the art with no loss in performance for attachment, confirming the intuitive interdependence of these two tasks. Our method also improves over previous discourse parsing models in allowing longer input sizes and in permitting attachments in which one node has multiple parents, an important feature of multiparty conversation. 2023.eacl-main.247 @@ -3361,7 +3361,7 @@ Semantic Specialization for Knowledge-based Word Sense Disambiguation SakaeMizukiTokyo Institute of Technology - NaoakiOkazakiTokyo Institute of Technology + NaoakiOkazakiTokyo Institute of Technology 3457-3470 A promising approach for knowledge-based Word Sense Disambiguation (WSD) is to select the sense whose contextualized embeddings computed for its definition sentence are closest to those computed for a target word in a given sentence. This approach relies on the similarity of the sense and context embeddings computed by a pre-trained language model. We propose a semantic specialization for WSD where contextualized embeddings are adapted to the WSD task using solely lexical knowledge. The key idea is, for a given sense, to bring semantically related senses and contexts closer and send different/unrelated senses farther away. We realize this idea as the joint optimization of the Attract-Repel objective for sense pairs and the self-training objective for context-sense pairs while controlling deviations from the original embeddings. The proposed method outperformed previous studies that adapt contextualized embeddings. It achieved state-of-the-art performance on knowledge-based WSD when combined with the reranking heuristic that uses the sense inventory. We found that the similarity characteristics of specialized embeddings conform to the key idea. We also found that the (dis)similarity of embeddings between the related/different/unrelated senses correlates well with the performance of WSD. 2023.eacl-main.251 @@ -3390,7 +3390,7 @@ ZishanAhmadIndian Institute of Technology Patna KshitijMishraIndian Institute of Technology Patna AsifEkbalIndian Institute of Technology Patna - PushpakBhattacharyyaIndian Institute of Technology Bombay and Patna + PushpakBhattacharyyaIndian Institute of Technology Bombay and Patna 3482-3494 Although there has been a plethora of work on open-domain conversational systems, most of the systems lack the mechanism of controlling the concept transitions in a dialogue. For activities like switching from casual chit-chat to task-oriented conversation, an agent with the ability to manage the flow of concepts in a conversation might be helpful. The user would find the dialogue more engaging and be more receptive to such transitions if these concept transitions were made while taking into account the user’s persona. Focusing on persona-aware concept transitions, we propose a Reinforced Persona-aware Topic-guiding Conversational System (RPTCS). Due to the lack of a persona-aware topic transition dataset, we propose a novel conversation dataset creation mechanism in which the conversational agent leads the discourse to drift to a set of target concepts depending on the persona of the speaker and the context of the conversation. To avoid scarcely available expensive human resource, the entire data-creation process is mostly automatic with human-in-loop only for quality checks. This created conversational dataset named PTCD is used to develop the RPTCS in two steps. First, a maximum likelihood estimation loss-based conversational model is trained on PTCD. Then this trained model is fine-tuned in a Reinforcement Learning (RL) framework by employing novel reward functions to assure persona, topic, and context consistency with non-repetitiveness in generated responses. Our experimental results demonstrate the strength of the proposed system with respect to strong baselines. 2023.eacl-main.253 @@ -3402,8 +3402,8 @@ What Did You Learn To Hate? A Topic-Oriented Analysis of Generalization in Hate Speech Detection TomBourgeadeIRIT, University of Toulouse PatriciaChirilUniversity of Chicago - FarahBenamaraUniversity of toulouse - VéroniqueMoriceauIRIT, Université Toulouse 3 + FarahBenamaraUniversity of toulouse + VéroniqueMoriceauIRIT, Université Toulouse 3 3495-3508 Hate speech has unfortunately become a significant phenomenon on social media platforms, and it can cover various topics (misogyny, sexism, racism, xenophobia, etc.) and targets (e.g., black people, women). Various hate speech detection datasets have been proposed, some annotated for specific topics, and others for hateful speech in general. In either case, they often employ different annotation guidelines, which can lead to inconsistencies, even in datasets focusing on the same topics. This can cause issues in models trying to generalize across more data and more topics in order to improve detection accuracy. In this paper, we propose, for the first time, a topic-oriented approach to study generalization across popular hate speech datasets. We first perform a comparative analysis of the performances of Transformer-based models in capturing topic-generic and topic-specific knowledge when trained on different datasets. We then propose a novel, simple yet effective approach to study more precisely which topics are best captured in implicit manifestations of hate, showing that selecting combinations of datasets with better out-of-domain topical coverage improves the reliability of automatic hate speech detection. 2023.eacl-main.254 @@ -3416,7 +3416,7 @@ ZonglinYangNanyang Technological University XinyaDuUniversity of Texas at Dallas ErikCambriaNanyang Technological University - ClaireCardieCornell University + ClaireCardieCornell University 3509-3522 Pretrained language models have been shown to store knowledge in their parameters and have achieved reasonable performance in commonsense knowledge base completion (CKBC) tasks. However, CKBC is knowledge-intensive and it is reported that pretrained language models’ performance in knowledge-intensive tasks are limited because of their incapability of accessing and manipulating knowledge. As a result, we hypothesize that providing retrieved passages that contain relevant knowledge as additional input to the CKBC task will improve performance. In particular, we draw insights from Case-Based Reasoning (CBR) – which aims to solve a new problem by reasoning with retrieved relevant cases, and investigate the direct application of it to CKBC. On two benchmark datasets, we demonstrate through automatic and human evaluations that our End-to-end Case-Based Reasoning Framework (ECBRF) generates more valid, informative, and novel knowledge than the state-of-the-art COMET model for CKBC in both the fully supervised and few-shot settings. We provide insights on why previous retrieval-based methods only achieve merely the same performance with COMET. From the perspective of CBR, our framework addresses a fundamental question on whether CBR methodology can be utilized to improve deep learning models. 2023.eacl-main.255 @@ -3467,7 +3467,7 @@ <fixed-case>M</fixed-case>eta<fixed-case>QA</fixed-case>: Combining Expert Agents for Multi-Skill Question Answering HaritzPuertoUKP Lab, TU Darmstadt - GözdeŞahinKoç University + GözdeŞahinKoç University IrynaGurevychUKP Lab, Technische Universität Darmstadt 3566-3580 The recent explosion of question-answering (QA) datasets and models has increased the interest in the generalization of models across multiple domains and formats by either training on multiple datasets or combining multiple models. Despite the promising results of multi-dataset models, some domains or QA formats may require specific architectures, and thus the adaptability of these models might be limited. In addition, current approaches for combining models disregard cues such as question-answer compatibility. In this work, we propose to combine expert agents with a novel, flexible, and training-efficient architecture that considers questions, answer predictions, and answer-prediction confidence scores to select the best answer among a list of answer predictions. Through quantitative and qualitative experiments, we show that our model i) creates a collaboration between agents that outperforms previous multi-agent and multi-dataset approaches, ii) is highly data-efficient to train, and iii) can be adapted to any QA format. We release our code and a dataset of answer predictions from expert agents for 16 QA datasets to foster future research of multi-agent systems. @@ -3481,7 +3481,7 @@ Weixian WaylonLiUniversity of Edinburgh YftahZiserUniversity of Edinburgh MaximinCoavouxCNRS, Université Grenoble Alpes - Shay B.CohenUniversity of Edinburgh + Shay B.CohenUniversity of Edinburgh 3581-3593 We introduce a task consisting in matching a proof to a given mathematical statement. The task fits well within current research on Mathematical Information Retrieval and, more generally, mathematical article analysis (Mathematical Sciences, 2014). We present a dataset for the task (the MATcH dataset) consisting of over 180k statement-proof pairs extracted from modern mathematical research articles. We find this dataset highly representative of our task, as it consists of relatively new findings useful to mathematicians. We propose a bilinear similarity model and two decoding methods to match statements to proofs effectively. While the first decoding method matches a proof to a statement without being aware of other statements or proofs, the second method treats the task as a global matching problem. Through a symbol replacement procedure, we analyze the “insights” that pre-trained language models have in such mathematical article analysis and show that while these models perform well on this task with the best performing mean reciprocal rank of 73.7, they follow a relatively shallow symbolic analysis and matching to achieve that performance. 2023.eacl-main.260 @@ -3494,8 +3494,8 @@ Jan-ChristophKlieUKP Lab, Technical University of Darmstadt Ji-UngLeeUKP, TU Darmstadt KevinStoweEducational Testing Services (ETS) - GözdeŞahinKoç University - Nafise SadatMoosaviDepartment of Computer Science, The University of Sheffield + GözdeŞahinKoç University + Nafise SadatMoosaviDepartment of Computer Science, The University of Sheffield LukeBatesTechnical University of Darmstadt DominicPetrakTU Darmstadt RichardEckart De CastilhoUKP Lab, Technische Universität Darmstadt @@ -3586,7 +3586,7 @@ Representation biases in sentence transformers DmitryNikolaevUniversity of Stuttgart - SebastianPadóStuttgart University + SebastianPadóStuttgart University 3701-3716 Variants of the BERT architecture specialised for producing full-sentence representations often achieve better performance on downstream tasks than sentence embeddings extracted from vanilla BERT. However, there is still little understanding of what properties of inputs determine the properties of such representations. In this study, we construct several sets of sentences with pre-defined lexical and syntactic structures and show that SOTA sentence transformers have a strong nominal-participant-set bias: cosine similarities between pairs of sentences are more strongly determined by the overlap in the set of their noun participants than by having the same predicates, lengthy nominal modifiers, or adjuncts. At the same time, the precise syntactic-thematic functions of the participants are largely irrelevant. 2023.eacl-main.268 @@ -3740,13 +3740,13 @@ Meeting the Needs of Low-Resource Languages: The Value of Automatic Alignments via Pretrained Models AbteenEbrahimiUniversity of Colorado, Boulder - Arya D.McCarthyJohns Hopkins University - ArturoOncevayThe University of Edinburgh + Arya D.McCarthyJohns Hopkins University + ArturoOncevayThe University of Edinburgh John E.OrtegaNortheastern University LuisChiruzzoUniversidad de la Republica GustavoGiménez-LugoUniversidade Tecnológica Federal do Paraná RolandoCoto-SolanoDartmouth College - KatharinaKannUniversity of Colorado Boulder + KatharinaKannUniversity of Colorado Boulder 3912-3926 Large multilingual models have inspired a new class of word alignment methods, which work well for the model’s pretraining languages. However, the languages most in need of automatic alignment are low-resource and, thus, not typically included in the pretraining data. In this work, we ask: How do modern aligners perform on unseen languages, and are they better than traditional methods? We contribute gold-standard alignments for Bribri–Spanish, Guarani–Spanish, Quechua–Spanish, and Shipibo-Konibo–Spanish. With these, we evaluate state-of-the-art aligners with and without model adaptation to the target language. Finally, we also evaluate the resulting alignments extrinsically through two downstream tasks: named entity recognition and part-of-speech tagging. We find that although transformer-based methods generally outperform traditional models, the two classes of approach remain competitive with each other. 2023.eacl-main.280 @@ -3790,7 +3790,7 @@ FynnPetersen-freyUniversität Hamburg GerretVon NordheimUniversit”ot Hamburg KatharinaKleinen-von K”onigsl”owUniversität Hamburg - ChrisBiemannUniversität Hamburg + ChrisBiemannUniversität Hamburg 11-17 WebAnno is one of the most popular annotation tools that supports generic annotation types and distributive annotation with multiple user roles. However, WebAnno focuses on annotating span-level mentions and relations among them, making document-level annotation complicated. When it comes to the annotation and analysis of social science materials, it usually involves the creation of codes to categorize a given document. The codes, which are known as codebooks, are typically hierarchical, which enables to code the document either with a general category or more fine-grained subcategories. CodeAnno is forked from WebAnno and designed to solve the coding problems faced by many social science researchers with the following main functionalities. 1) Creation of hierarchical codebooks, with functionality to move and sort categories in the hierarchy 2) an interactive UI for codebook annotation 3) import and export of annotations in CSV format, hence being compatible with existing annotations conducted using spreadsheet applications 4) integration of an external automation component to facilitate coding using machine learning 5) project templating that allows duplicating a project structure without copying the actual documents. We present different use-cases to demonstrate the capability of CodeAnno. A shot demonstration video of the system is available here: https://www.youtube.com/watch?v=RmCdTghBe-s 2023.eacl-demo.2 @@ -3819,7 +3819,7 @@ AkulSinghFlorida International University JaredHummerFlorida International University AntonelaRadasFlorida International University - MarkFinlaysonFiu + MarkFinlaysonFiu 27-34 jTLEX is a programming library that provides a Java implementation of the TimeLine EXtraction algorithm (TLEX; Finlayson et al.,2021), along with utilities for programmatic manipulation of TimeML graphs. Timelines are useful for a number of natural language understanding tasks, such as question answering, cross-document event coreference, and summarization & visualization. jTLEX provides functionality for (1) parsing TimeML annotations into Java objects, (2) construction of TimeML graphs from scratch, (3) partitioning of TimeML graphs into temporally connected subgraphs, (4) transforming temporally connected subgraphs into point algebra (PA) graphs, (5) extracting exact timeline of TimeML graphs, (6) detecting inconsistent subgraphs, and (7) calculating indeterminate sections of the timeline. The library has been tested on the entire TimeBank corpus, and comes with a suite of unit tests. We release the software as open source with a free license for non-commercial use. 2023.eacl-demo.4 @@ -3833,7 +3833,7 @@ ChauNguyenJapan Advanced Institute of Science and Technology VuTranThe Institute of Statistical Mathematics, Japan KenSatohNational Institute of Informatics, Japan - YujiMatsumotoRIKEN Center for Advanced Intelligence Project (AIP), Japan + YujiMatsumotoRIKEN Center for Advanced Intelligence Project (AIP), Japan MinhNguyenJapan Advanced Institute of Science and Technology 35-42 In recent years, COVID-19 has impacted all aspects of human life. As a result, numerous publications relating to this disease have been issued. Due to the massive volume of publications, some retrieval systems have been developed to provide researchers with useful information. In these systems, lexical searching methods are widely used, which raises many issues related to acronyms, synonyms, and rare keywords. In this paper, we present a hybrid relation retrieval system, CovRelex-SE, based on embeddings to provide high-quality search results. Our system can be accessed through the following URL: https://www.jaist.ac.jp/is/labs/nguyen-lab/systems/covrelex-se/ @@ -3946,7 +3946,7 @@ FantineHuotGoogle JoshuaMaynezGoogle ShashiNarayanGoogle - Reinald KimAmplayoGoogle + Reinald KimAmplayoGoogle KuzmanGanchevGoogle Annie PriyadarshiniLouisGoogle Research UK AndersSandholmGoogle Research @@ -3974,7 +3974,7 @@ <fixed-case>SPINDLE</fixed-case>: Spinning Raw Text into Lambda Terms with Graph Attention KonstantinosKogkalidisUtrecht University - MichaelMoortgatUtrecht University + MichaelMoortgatUtrecht University RichardMootCnrs 128-135 This paper describes SPINDLE, an open source Python module, providing an efficient and accurate parser for written Dutch that transforms raw text input to programs for meaning composition expressed as λ terms. The parser integrates a number of breakthrough advances made in recent years. Its output consists of hi-res derivations of a multimodal type-logical grammar, capturing two orthogonal axes of syntax, namely deep function-argument structures and dependency relations. These are produced by three interdependent systems: a static type-checker asserting the well-formedness of grammatical analyses, a state-of-the-art, structurally-aware supertagger based on heterogeneous graph convolutions, and a massively parallel proof search component based on Sinkhorn iterations. Packed in the software are also handy utilities and extras for proof visualization and inference, intended to facilitate end-user utilization. @@ -4002,7 +4002,7 @@ TwinKarmakharmUniversity of Sheffield IanRobertsUniversity of Sheffield XingyiSongUniversity of Sheffield - KalinaBontchevaUniversity of Sheffield + KalinaBontchevaUniversity of Sheffield 145-151 We present GATE Teamware 2: an open-source web-based platform for managing teams of annotators working on document classification tasks. GATE Teamware 2 is an entirely re-engineered successor to GATE Teamware, using contemporary web frameworks. The software allows the management of teams of multiple annotators, project managers and administrators - including the management of annotators - across multiple projects. Projects can be configured to control and monitor the annotation statistics and have a highly flexible JSON-configurable annotation display which can include arbitrary HTML. Optionally, documents can be uploaded with pre-existing annotations and documents are served to annotators in a random order by default to reduce bias. Crucially, annotators can be trained on applying the annotation guidelines correctly and then screened for quality assurance purposes, prior to being cleared for independent annotation. GATE Teamware 2 can be self-deployed, including in container orchestration environments, or provided as private, hosted cloud instances.GATE Teamware 2 is an open-source software and can be downloaded from https://github.com/GATENLP/gate-teamware.A demonstration video of the system has also been made available at https://youtu.be/KoXkuhc4fmM. 2023.eacl-demo.17 @@ -4018,7 +4018,7 @@ Marta KristinLarusdottirReykjavik University HafsteinnEinarssonUniversity of Iceland AbuzarKhanCarnegie Mellon University - EricNybergCarnegie Mellon University + EricNybergCarnegie Mellon University HrafnLoftssonReykjavik University 152-160 The methods used to create many of the well-known Question-Answering (QA) datasets are hard to replicate for low-resource languages. A commonality amongst these methods is hiring annotators to source answers from the internet by querying a single answer source, such as Wikipedia. Applying these methods for low-resource languages can be problematic since there is no single large answer source for these languages. Consequently, this can result in a high ratio of unanswered questions, since the amount of information in any single source is limited. To address this problem, we developed a novel crowd-sourcing platform to gather multiple-domain QA data for low-resource languages. Our platform, which consists of a mobile app and a web API, gamifies the data collection process. We successfully released the app for Icelandic (a low-resource language with about 350,000 native speakers) to build a dataset which rivals large QA datasets for high-resource languages both in terms of size and ratio of answered questions. We have made the platform open source with instructions on how to localize and deploy it to gather data for other low-resource languages. @@ -4030,7 +4030,7 @@ Towards Speech to Speech Machine Translation focusing on <fixed-case>I</fixed-case>ndian Languages VandanMujadiaStudent - DiptiSharmaIIIT, Hyderabad + DiptiSharmaIIIT, Hyderabad 161-168 We introduce an SSMT (Speech to Speech Machine Translation, aka Speech to Speech Video Translation) Pipeline(https://ssmt.iiit.ac.in/ssmtiiith), as web application for translating videos from one language to another by cascading multiple language modules. Our speech translation system combines highly accurate speech to text (ASR) for Indian English, pre-possessing modules to bridge ASR-MT gaps such as spoken disfluency and punctuation, robust machine translation (MT) systems for multiple language pairs, SRT module for translated text, text to speech (TTS) module and a module to render translated synthesized audio on the original video. It is user-friendly, flexible, and easily accessible system. We aim to provide a complete configurable speech translation experience to users and researchers with this system. It also supports human intervention where users can edit outputs of different modules and the edited output can then be used for subsequent processing to improve overall output quality. By adopting a human-in-the-loop approach, the aim is to configure technology in such a way where it can assist humans and help to reduce the involved human efforts in speech translation involving English and Indian languages. As per our understanding, this is the first fully integrated system for English to Indian languages (Hindi, Telugu, Gujarati, Marathi and Punjabi) video translation. Our evaluation shows that one can get 3.5+ MOS score using the developed pipeline with human intervention for English to Hindi. A short video demonstrating our system is available at https://youtu.be/MVftzoeRg48. 2023.eacl-demo.19 @@ -4040,7 +4040,7 @@ <fixed-case>T</fixed-case>ext<fixed-case>W</fixed-case>orld<fixed-case>E</fixed-case>xpress: Simulating Text Games at One Million Steps Per Second - PeterJansenUniversity of Arizona + PeterJansenUniversity of Arizona Marc-alexandreCoteMicrosoft Research 169-177 Text-based games offer a challenging test bed to evaluate virtual agents at language understanding, multi-step problem-solving, and common-sense reasoning. However, speed is a major limitation of current text-based games, capping at 300 steps per second, mainly due to the use of legacy tooling. In this work we present TextWorldExpress, a high-performance simulator that includes implementations of three common text game benchmarks that increases simulation throughput by approximately three orders of magnitude, reaching over one million steps per second on common desktop hardware. This significantly reduces experiment runtime, enabling billion-step-scale experiments in about one day. @@ -4051,7 +4051,7 @@ <fixed-case>T</fixed-case>ermo<fixed-case>UD</fixed-case> - a language-independent terminology extraction tool - MalgorzataMarciniakInstitute of Computer Science PAS + MalgorzataMarciniakInstitute of Computer Science PAS PiotrRychlikInstitute of Computer Science, Polish Academy of Sciences AgnieszkaMykowieckaInstitute of Computer Science, Polish Academy of Sciences and Polish-Japanese Academy of Information Technology 178-186 @@ -4107,7 +4107,7 @@ FurkanAkkurtBoğaziçi University MerveGürbüzBogazici University OnurGungorBogazici University - ArzucanÖzgürBogazici University + ArzucanÖzgürBogazici University TungaGüngörBogazici University 219-227 Access to natural language processing resources is essential for their continuous improvement. This can be especially challenging in educational institutions where the software development effort required to package and release research outcomes may be overwhelming and under-recognized. Access towell-prepared and reliable research outcomes is important both for their developers as well as the greater research community. This paper presents an approach to address this concern with two main goals: (1) to create an open-source easily deployable platform where resources can be easily shared and explored, and (2) to use this platform to publish open-source Turkish NLP resources (datasets and tools) created by a research lab. The Turkish Natural Language Processing (TULAP) was designed and developed as an easy-to-use platform to share dataset and tool resources which supports interactive tool demos. Numerous open access Turkish NLP resources have been shared on TULAP. All tools are containerized to support portability for custom use. This paper describes the design, implementation, and deployment of TULAP with use cases (available at https://tulap.cmpe.boun.edu.tr/). A short video demonstrating our system is available at https://figshare.com/articles/media/TULAP_Demo/22179047. @@ -4133,8 +4133,8 @@ Automatically Summarizing Evidence from Clinical Trials: A Prototype Highlighting Current Challenges SanjanaRamprasadNortheastern University JeredMcinerneyNortheastern University - IainMarshallKing’s College London - ByronWallaceNortheastern University + IainMarshallKing’s College London + ByronWallaceNortheastern University 236-247 In this work we present TrialsSummarizer, a system that aims to automatically summarize evidence presented in the set of randomized controlled trials most relevant to a given query. Building on prior work, the system retrieves trial publications matching a query specifying a combination of condition, intervention(s), and outcome(s), and ranks these according to sample size and estimated study quality. The top-k such studies are passed through a neural multi-document summarization system, yielding a synopsis of these trials. We consider two architectures: A standard sequence-to-sequence model based on BART, and a multi-headed architecture intended to provide greater transparency and controllability to end-users. Both models produce fluent and relevant summaries of evidence retrieved for queries, but their tendency to introduce unsupported statements render them inappropriate for use in this domain at present. The proposed architecture may help users verify outputs allowing users to trace generated tokens back to inputs. The demonstration video can be found at https://vimeo.com/735605060The prototype, source code, and model weights are available at: https://sanjanaramprasad.github.io/trials-summarizer/ 2023.eacl-demo.27 @@ -4252,7 +4252,7 @@ Incorporating Dropped Pronouns into Coreference Resolution: The case for <fixed-case>T</fixed-case>urkish TuğbaPamay ArslanIstanbul Technical University - GülşenEryiğitIstanbul Technical University + GülşenEryiğitIstanbul Technical University 14-25 Representation of coreferential relations is a challenging and actively studied topic for pro-drop and morphologically rich languages (PD-MRLs) due to dropped pronouns (e.g., null subjects and omitted possessive pronouns). These phenomena require a representation scheme at the morphology level and enhanced evaluation methods. In this paper, we propose a representation & evaluation scheme to incorporate dropped pronouns into coreference resolution and validate it on the Turkish language. Using the scheme, we extend the annotations on the only existing Turkish coreference dataset, which originally did not contain annotations for dropped pronouns. We provide publicly available pre and post processors to enhance the prominent CoNLL coreference scorer also to cover coreferential relations arising from dropped pronouns. As a final step, the paper reports the first neural Turkish coreference resolution results in the literature. Although validated on Turkish, the proposed scheme is language-independent and may be used for other PD-MRLs. 2023.eacl-srw.2 @@ -4263,7 +4263,7 @@ Towards Generation and Recognition of Humorous Texts in <fixed-case>P</fixed-case>ortuguese MarcioLima InácioUniversity of Coimbra - HugoGonçalo OliveiraCISUC, DEI, University of Coimbra + HugoGonçalo OliveiraCISUC, DEI, University of Coimbra 26-36 Dealing with humor is an important step to develop Natural Language Processing tools capable of handling sophisticated semantic and pragmatic knowledge. In this context, this PhD thesis focuses on the automatic generation and recognition of verbal punning humor in Portuguese, which is still an underdeveloped language when compared to English. One of the main goals of this research is to conciliate Natural Language Generation computational models with existing theories of humor from the Humanities while avoiding mere generation by including contextual information into the generation process. Another point that is of utmost importance is the inclusion of the listener as an active part in the process of understanding and creating humor; we hope to achieve this by using concepts from Recommender Systems in our methods. Ultimately, we want to not only advance the current state-of-the-art in humor generation and recognition, but also to help the general Portuguese-speaking research community with methods, tools and resources that may aid in the development of further techniques for this language. We also expect our systems to provide insightful ideas about how humor is created and perceived by both humans and machines. 2023.eacl-srw.3 @@ -4323,7 +4323,7 @@ Improving and Simplifying Template-Based Named Entity Recognition MuraliKondraguntaUniversity of Groningen OlatzPerez-de-ViñaspreHiTZ Center - Ixa, University of the Basque Country UPV/EHU - MaiteOronozHiTZ Center - Ixa, University of the Basque Country UPV/EHU + MaiteOronozHiTZ Center - Ixa, University of the Basque Country UPV/EHU 79-86 With the rise in larger language models, researchers started exploiting them by pivoting the downstream tasks as language modeling tasks using prompts. In this work, we convert the Named Entity Recognition task into a seq2seq task by generating the synthetic sentences using templates. Our main contribution is the conversion framework which provides faster inference. In addition, we test our method’s performance in resource-rich, low resource and domain transfer settings. Results show that our method achieves comparable results in the resource-rich setting and outperforms the current seq2seq paradigm state-of-the-art approach in few-shot settings. Through the experiments, we observed that the negative examples play an important role in model’s performance. We applied our approach over BART and T5-base models, and we notice that the T5 architecture aligns better with our task. The work is performed on the datasets in English language. 2023.eacl-srw.8 @@ -4380,7 +4380,7 @@ AmirHadifarGhent University - imec Semere KirosBitewGhent University - imec, IDLab JohannesDeleuGhent University - imec - VeroniqueHosteLT3, Ghent University + VeroniqueHosteLT3, Ghent University ChrisDevelderGhent University ThomasDemeesterGhent University - imec 123-133 @@ -4393,7 +4393,7 @@ Towards Automatic Grammatical Error Type Classification for <fixed-case>T</fixed-case>urkish HarunUzIstanbul Technical University - GülşenEryiğitIstanbul Technical University + GülşenEryiğitIstanbul Technical University 134-142 2023.eacl-srw.14 uz-eryigit-2023-towards @@ -4404,7 +4404,7 @@ Theoretical Conditions and Empirical Failure of Bracket Counting on Long Sequences with Linear Recurrent Networks NadineEl-NaggarCity, University of London - PranavaMadhyasthaCity, University of London + PranavaMadhyasthaCity, University of London TillmanWeydeCity, University of London 143-148 Previous work has established that RNNs with an unbounded activation function have the capacity to count exactly. However, it has also been shown that RNNs are challenging to train effectively and generally do not learn exact counting behaviour. In this paper, we focus on this problem by studying the simplest possible RNN, a linear single-cell network. We conduct a theoretical analysis of linear RNNs and identify conditions for the models to exhibit exact counting behaviour. We provide a formal proof that these conditions are necessary and sufficient. We also conduct an empirical analysis using tasks involving a Dyck-1-like Balanced Bracket language under two different settings. We observe that linear RNNs generally do not meet the necessary and sufficient conditions for counting behaviour when trained with the standard approach. We investigate how varying the length of training sequences and utilising different target classes impacts model behaviour during training and the ability of linear RNN models to effectively approximate the indicator conditions. @@ -4416,7 +4416,7 @@ Addressing Domain Changes in Task-oriented Conversational Agents through Dialogue Adaptation TizianoLabrunaFondazione Bruno Kessler and Free University of Bozen-Bolzano - BernardoMagniniFbk + BernardoMagniniFbk 149-158 2023.eacl-srw.16 labruna-magnini-2023-addressing @@ -4428,8 +4428,8 @@ Proceedings of the 17th Conference of the European Chapter of the Association for Computational Linguistics: Tutorial Abstracts - Fabio MassimoZanzotto - SameerPradhan + Fabio MassimoZanzotto + SameerPradhan Association for Computational Linguistics
Dubrovnik, Croatia
May @@ -4444,7 +4444,7 @@ Mining, Assessing, and Improving Arguments in <fixed-case>NLP</fixed-case> and the Social Sciences GabriellaLapesa - Eva MariaVecchi + Eva MariaVecchi SerenaVillata HenningWachsmuth 1-6 @@ -4456,7 +4456,7 @@ Emotion Analysis from Texts - SanjaStajner + SanjaStajner RomanKlinger 7-12 Emotion analysis in text is an area of research that encompasses a set of various natural language processing (NLP) tasks, including classification and regression settings, as well as structured prediction tasks like role labelling or stimulus detection. In this tutorial, we provide an overview of research from emotion psychology which sets the ground for choosing adequate NLP methodology, and present existing resources and classification methods used for emotion analysis in texts. We further discuss appraisal theories and how events can be interpreted regarding their presumably caused emotion and briefly introduce emotion role labelling. In addition to these technical topics, we discuss the use cases of emotion analysis in text, their societal impact, ethical considerations, as well as the main challenges in the field. diff --git a/data/xml/2023.eamt.xml b/data/xml/2023.eamt.xml index f411883c75..5f3fc5fbc6 100644 --- a/data/xml/2023.eamt.xml +++ b/data/xml/2023.eamt.xml @@ -14,10 +14,10 @@ Sergi AlvarezVidal NoraAranberri MaraNunziatini - Carla ParraEscartín - MikelForcada - MajaPopovic - CarolinaScarton + Carla ParraEscartín + MikelForcada + MajaPopovic + CarolinaScarton HelenaMoniz European Association for Machine Translation
Tampere, Finland
@@ -41,7 +41,7 @@ Tailoring Domain Adaptation for Machine Translation Quality Estimation Javad Pourmostafa RoshanSharami DimitarShterionov - FrédéricBlain + FrédéricBlain EvaVanmassenhove Mirella DeSisto ChrisEmmery @@ -54,7 +54,7 @@ Example-Based Machine Translation from Textto a Hierarchical Representation of Sign Language EliseBertin-Lemée - AnneliesBraffort + AnneliesBraffort CamilleChallant ClaireDanet MichaelFilhol @@ -66,9 +66,9 @@ Unsupervised Feature Selection for Effective Parallel Corpus Filtering MikkoAulamo - Onade Gibert + Onade Gibert SamiVirpioja - JörgTiedemann + JörgTiedemann 31–38 This work presents an unsupervised method of selecting filters and threshold values for the OpusFilter parallel corpus cleaning toolbox. The method clusters sentence pairs into noisy and clean categories and uses the features of the noisy cluster center as filtering parameters. Our approach utilizes feature importance analysis to disregard filters that do not differentiate between clean and noisy data. A randomly sampled subset of a given corpus is used for filter selection and ineffective filters are not run for the full corpus. We use a set of automatic evaluation metrics to assess the quality of translation models trained with data filtered by our method and data filtered with OpusFilter’s default parameters. The trained models cover English-German and English-Ukrainian in both directions. The proposed method outperforms the default parameters in all translation directions for almost all evaluation metrics. 2023.eamt-1.4 @@ -87,7 +87,7 @@ <fixed-case>BLEU</fixed-case> Meets <fixed-case>COMET</fixed-case>: Combining Lexical and Neural Metrics Towards Robust Machine Translation Evaluation TaisiyaGlushkova ChrysoulaZerva - André F. T.Martins + André F. T.Martins 47–58 Although neural-based machine translation evaluation metrics, such as COMET or BLEURT, have achieved strong correlations with human judgements, they are sometimes unreliable in detecting certain phenomena that can be considered as critical errors, such as deviations in entities and numbers. In contrast, traditional evaluation metrics such as BLEU or chrF, which measure lexical or character overlap between translation hypotheses and human references, have lower correlations with human judgements but are sensitive to such deviations. In this paper, we investigate several ways of combining the two approaches in order to increase robustness of state-of-the-art evaluation methods to translations with critical errors. We show that by using additional information during training, such as sentence-level features and word-level tags, the trained metrics improve their capability to penalize translations with specific troublesome phenomena, which leads to gains in correlations with humans and on the recent DEMETR benchmark on several language pairs. 2023.eamt-1.6 @@ -97,7 +97,7 @@ Exploiting large pre-trained models for low-resource neural machine translation AarónGaliano-Jiménez FelipeSánchez-Martínez - Víctor M.Sánchez-Cartagena + Víctor M.Sánchez-Cartagena Juan AntonioPérez-Ortiz 59–68 Pre-trained models have drastically changed the field of natural language processing by providing a way to leverage large-scale language representations to various tasks. Some pre-trained models offer general-purpose representations, while others are specialized in particular tasks, like neural machine translation (NMT). Multilingual NMT-targeted systems are often fine-tuned for specific language pairs, but there is a lack of evidence-based best-practice recommendations to guide this process. Moreover, the trend towards even larger pre-trained models has made it challenging to deploy them in the computationally restrictive environments typically found in developing regions where low-resource languages are usually spoken. We propose a pipeline to tune the mBART50 pre-trained model to 8 diverse low-resource language pairs, and then distil the resulting system to obtain lightweight and more sustainable models. Our pipeline conveniently exploits back-translation, synthetic corpus filtering, and knowledge distillation to deliver efficient, yet powerful bilingual translation models 13 times smaller than the original pre-trained ones, but with close performance in terms of BLEU. @@ -157,7 +157,7 @@ BeatrizSilva MariannaBuchicchio José G. C.de Souza - André F. T.Martins + André F. T.Martins 115–124 This paper aims to investigate the effectiveness of the k-Nearest Neighbor Machine Translation model (kNN-MT) in real-world scenarios. kNN-MT is a retrieval-augmented framework that combines the advantages of parametric models with non-parametric datastores built using a set of parallel sentences. Previous studies have primarily focused on evaluating the model using only the BLEU metric and have not tested kNN-MT in real world scenarios. Our study aims to fill this gap by conducting a comprehensive analysis on various datasets comprising different language pairs and different domains, using multiple automatic metrics and expert evaluated Multidimensional Quality Metrics (MQM). We compare kNN-MT with two alternate strategies: fine-tuning all the model parameters and adapter-based finetuning. Finally, we analyze the effect of the datastore size on translation quality, and we examine the number of entries necessary to bootstrap and configure the index. 2023.eamt-1.12 @@ -166,7 +166,7 @@ Evaluation of <fixed-case>C</fixed-case>hinese-<fixed-case>E</fixed-case>nglish Machine Translation of Emotion-Loaded Microblog Texts: A Human Annotated Dataset for the Quality Assessment of Emotion Translation ShenbinQian - ConstantinOrasan + ConstantinOrasan Felix DoCarmo QiuliangLi DipteshKanojia @@ -181,7 +181,7 @@ RomainSilvestri GeorgiosVernikos LjiljanaDolamic - AndreiPopescu-Belis + AndreiPopescu-Belis 137–146 Subword tokenization is the de-facto standard for tokenization in neural language models and machine translation systems. Three advantages are frequently put forward in favor of subwords: shorter encoding of frequent tokens, compositionality of subwords, and ability to deal with unknown words. As their relative importance is not entirely clear yet, we propose a tokenization approach that enables us to separate frequency (the first advantage) from compositionality, thanks to the use of Huffman coding, which tokenizes words using a fixed amount of symbols. Experiments with CS-DE, EN-FR and EN-DE NMT show that frequency alone accounts for approximately 90% of the BLEU scores reached by BPE, hence compositionality has less importance than previously thought. 2023.eamt-1.14 @@ -191,7 +191,7 @@ What Works When in Context-aware Neural Machine Translation? HarritxuGete ThierryEtchegoyhen - GorkaLabaka + GorkaLabaka 147–156 Document-level Machine Translation has emerged as a promising means to enhance automated translation quality, but it is currently unclear how effectively context-aware models use the available context during translation. This paper aims to provide insight into the current state of models based on input concatenation, with an in-depth evaluation on English–German and English–French standard datasets. We notably evaluate the impact of data bias, antecedent part-of-speech, context complexity, and the syntactic function of the elements involved in discursive phenomena. Our experimental results indicate that the selected models do improve the overall translation in context, with varying sensitivity to the different factors we examined. We notably show that the selected context-aware models operate markedly better on regular syntactic configurations involving subject antecedents and pronouns, with degraded performance as the configurations become more dissimilar. 2023.eamt-1.15 @@ -209,7 +209,7 @@ The <fixed-case>MT</fixed-case>@<fixed-case>BZ</fixed-case> corpus: machine translation & legal language FlaviaDe Camillis - Egon W.Stemle + Egon W.Stemle ElenaChiocchetti FrancescoFernicola 171–180 @@ -260,7 +260,7 @@ Adaptive Machine Translation with Large Language Models YasminMoslem RejwanulHaque - John D.Kelleher + John D.Kelleher AndyWay 227–237 Consistency is a key requirement of high-quality translation. It is especially important to adhere to pre-approved terminology and adapt to corrected translations in domain-specific projects. Machine translation (MT) has achieved significant progress in the area of domain adaptation. However, real-time adaptation remains challenging. Large-scale language models (LLMs) have recently shown interesting capabilities of in-context learning, where they learn to replicate certain input-output text generation patterns, without further fine-tuning. By feeding an LLM at inference time with a prompt that consists of a list of translation pairs, it can then simulate the domain and style characteristics. This work aims to investigate how we can utilize in-context learning to improve real-time adaptive MT. Our extensive experiments show promising results at translation time. For example, GPT-3.5 can adapt to a set of in-domain sentence pairs and/or terminology while translating a new sentence. We observe that the translation quality with few-shot in-context learning can surpass that of strong encoder-decoder MT systems, especially for high-resource languages. Moreover, we investigate whether we can combine MT from strong encoder-decoder models with fuzzy matches, which can further improve translation quality, especially for less supported languages. We conduct our experiments across five diverse language pairs, namely English-to-Arabic (EN-AR), English-to-Chinese (EN-ZH), English-to-French (EN-FR), English-to-Kinyarwanda (EN-RW), and English-to-Spanish (EN-ES). @@ -271,7 +271,7 @@ Segment-based Interactive Machine Translation at a Character Level AngelNavarro MiguelDomingo - FranciscoCasacuberta + FranciscoCasacuberta 239–248 To produce high quality translations, human translators need to review and correct machine translation hypothesis in what it is known as post-editing. In order to reduce the human effort of this process, interactive machine translation proposed a collaborative framework in which human and machine work together to generate the translations. Among the many protocols proposed throughout the years, the segment-based one established a paradigm in which the post-editor was allowed to validate correct word sequences from a translation hypothesis and introduced a word correction to help the system improve the next hypothesis. In this work we propose an extension to this protocol: instead of having to the type the complete word correction, the system will complete the user’s correction while they are typing. We evaluated our proposal under a simulated environment, achieving a significant reduction of the human effort. 2023.eamt-1.23 @@ -306,7 +306,7 @@ Analysing Mistranslation of Emotions in Multilingual Tweets by Online <fixed-case>MT</fixed-case> Tools HadeelSaadany - ConstantinOrasan + ConstantinOrasan Rocio CaroQuintana Felix DoCarmo LeonardoZilio @@ -419,7 +419,7 @@ How can machine translation help generate <fixed-case>A</fixed-case>rab melodic improvisation? FadiAl-Ghawanmeh Alexander RefsumJensenius - KamelSmaili + KamelSmaili 385–392 This article presents a system to generate Arab music improvisation using machine translation (MT). To reach this goal, we developed a MT model to translate a vocal improvisation into an automatic instrumental oud (Arab lute) response. Given the melodic and non-metric musical form, it was necessary to develop efficient textual representations in order for classical MT models to be as successful as in common NLP applications. We experimented with Statistical and Neural MT to train our parallel corpus (Vocal → Instrument) of 6991 sentences. The best model was then used to generate improvisation by iteratively translating the translations of the most common patterns of each maqam (n-grams), producing elaborated variations conditioned to listener feedback. We constructed a dataset of 717 instrumental improvisations to extract their n-grams. Objective evaluation of MT was conducted at two levels: a sentence-level evaluation using the BLEU metric, and a higher level evaluation using musically informed metrics. Objective measures were consistent with one another. Subjective evaluations by experts from the maqam music tradition were promising, and a useful reference for understanding objective results. 2023.eamt-1.38 @@ -492,7 +492,7 @@ JoãoGodinho PedroCoelho HelenaMoniz - AlonLavie + AlonLavie 451–460 This paper illustrates a new methodology based on Test Suites (Avramidis et al., 2018) with focus on Business Critical Errors (BCEs) (Stewart et al., 2022) to evaluate the output of Machine Translation (MT) and Quality Estimation (QE) systems. We demonstrate the value of relying on semi-automatic evaluation done through scalable BCE-focused Test Suites to monitor both MT and QE systems’ performance for 8 language pairs (LPs) and a total of 4 error categories. This approach allows us to not only track the impact of new features and implementations in a real business environment, but also to identify strengths and weaknesses in models regarding different error types, and subsequently know what to improve henceforth. 2023.eamt-1.44 @@ -505,7 +505,7 @@ MeeganGower SnehaRautmare NishthaJain - JohnKelleher + JohnKelleher 461–470 In the context of an epidemiological study involving multilingual social media, this paper reports on the ability of machine translation systems to preserve content relevant for a document classification task designed to determine whether the social media text is related to covid. The results indicate that machine translation does provide a feasible basis for scaling epidemiological social media surveillance to multiple languages. Moreover, a qualitative error analysis revealed that the majority of classification errors are not caused by MT errors. 2023.eamt-1.45 @@ -613,7 +613,7 @@ Victor UbietoNogales Santiago EgeaGomez InekeSchuurman - GorkaLabaka + GorkaLabaka AdriánNúnez-Marcos IreneMurtagh EuanMcGill @@ -639,16 +639,16 @@ <fixed-case>M</fixed-case>a<fixed-case>C</fixed-case>o<fixed-case>C</fixed-case>u: Massive collection and curation of monolingual and bilingual data: focus on under-resourced languages MartaBañón MălinaChichirău - MiquelEsplà-Gomis + MiquelEsplà-Gomis MikelForcada AarónGaliano-Jiménez TajaKuzman NikolaLjubešić Rikvan Noord Leopoldo PlaSempere - GemaRamírez-Sánchez + GemaRamírez-Sánchez PeterRupnik - VitSuchomel + VitSuchomel AntonioToral JaumeZaragoza-Bernabeu 505–506 @@ -664,7 +664,7 @@ AlessiaBattisti MichèleBerger RichardBowden - AnneliesBraffort + AnneliesBraffort Necati CihanCamgoz CristinaEspaña-Bonet RomanGrundkiewicz @@ -716,7 +716,7 @@ <fixed-case>PROPICTO</fixed-case>: Developing Speech-to-Pictograph Translation Systems to Enhance Communication Accessibility LucíaOrmaechea - PierretteBouillon + PierretteBouillon MaximinCoavoux EmmanuelleEsperança-Rodier JohannaGerlach @@ -738,8 +738,8 @@ NikolayBogoychev ShaoxiongJi GraemeNail - GemaRamírez-Sánchez - JörgTiedemann + GemaRamírez-Sánchez + JörgTiedemann Jelmervan der Linde JaumeZaragoza 517–518 diff --git a/data/xml/2023.emnlp.xml b/data/xml/2023.emnlp.xml index 182766c9fc..f2c8dfb971 100644 --- a/data/xml/2023.emnlp.xml +++ b/data/xml/2023.emnlp.xml @@ -68,7 +68,7 @@ YiFung HouChan TarekAbdelzaher - ChengXiangZhai + ChengXiangZhai HengJi 43-57 Automatic response forecasting for news media plays a crucial role in enabling content producers to efficiently predict the impact of news releases and prevent unexpected negative outcomes such as social conflict and moral injury. To effectively forecast responses, it is essential to develop measures that leverage the social dynamics and contextual information surrounding individuals, especially in cases where explicit profiles or historical actions of the users are limited (referred to as lurkers). As shown in a previous study, 97% of all tweets are produced by only the most active 25% of users. However, existing approaches have limited exploration of how to best process and utilize these important features. To address this gap, we propose a novel framework, named SocialSense, that leverages a large language model to induce a belief-centered graph on top of an existent social network, along with graph-based propagation to capture social dynamics. We hypothesize that the induced graph that bridges the gap between distant users who share similar beliefs allows the model to effectively capture the response patterns. Our method surpasses existing state-of-the-art in experimental evaluations for both zero-shot and supervised settings, demonstrating its effectiveness in response forecasting. Moreover, the analysis reveals the framework’s capability to effectively handle unseen user and lurker scenarios, further highlighting its robustness and practical applicability. @@ -142,7 +142,7 @@ Evaluating and Modeling Attribution for Cross-Lingual Question Answering BenjaminMuller JohnWieting - JonathanClark + JonathanClark TomKwiatkowski SebastianRuder LivioSoares @@ -163,7 +163,7 @@ OrevaogheneAhia Abraham ToluwalaseOwodunni OdunayoOgundepo - David IfeoluwaAdelani + David IfeoluwaAdelani JimmyLin 158-168 In this study, we highlight the importance of enhancing the quality of pretraining data in multilingual language models. Existing web crawls have demonstrated quality issues, particularly in the context of low-resource languages. Consequently, we introduce a new multilingual pretraining corpus for 16 African languages, designed by carefully auditing existing pretraining corpora to understand and rectify prevalent quality issues. To compile this dataset, we undertake a rigorous examination of current data sources for thirteen languages within one of the most extensive multilingual web crawls, mC4, and extract cleaner data through meticulous auditing and improved web crawling strategies. Subsequently, we pretrain a new T5-based model on this dataset and evaluate its performance on multiple downstream tasks. Our model demonstrates better downstream effectiveness over existing pretrained models across four NLP tasks, underscoring the critical role data quality plays in pretraining language models in low-resource scenarios. Specifically, on cross-lingual QA evaluation, our new model is more than twice as effective as multilingual T5. All code, data and models are publicly available at https://github.com/castorini/AfriTeVa-keji. @@ -189,7 +189,7 @@ HuaoLi YuChong SimonStepputtis - JosephCampbell + JosephCampbell DanaHughes CharlesLewis KatiaSycara @@ -206,7 +206,7 @@ MaxMüller-Eberstein Robvan der Goot LeonWeber-Genzel - BarbaraPlank + BarbaraPlank 193-203 Language understanding is a multi-faceted cognitive capability, which the Natural Language Processing (NLP) community has striven to model computationally for decades. Traditionally, facets of linguistic intelligence have been compartmentalized into tasks with specialized model architectures and corresponding evaluation protocols. With the advent of large language models (LLMs) the community has witnessed a dramatic shift towards general purpose, task-agnostic approaches powered by generative models. As a consequence, the traditional compartmentalized notion of language tasks is breaking down, followed by an increasing challenge for evaluation and analysis. At the same time, LLMs are being deployed in more real-world scenarios, including previously unforeseen zero-shot setups, increasing the need for trustworthy and reliable systems. Therefore, we argue that it is time to rethink what constitutes tasks and model evaluation in NLP, and pursue a more holistic view on language, placing trustworthiness at the center. Towards this goal, we review existing compartmentalized approaches for understanding the origins of a model’s functional capacity, and provide recommendations for more multi-faceted evaluation protocols. 2023.emnlp-main.14 @@ -271,7 +271,7 @@ Understanding Compositional Data Augmentation in Typologically Diverse Morphological Inflection FarhanSamir - MiikkaSilfverberg + MiikkaSilfverberg 277-291 Data augmentation techniques are widely used in low-resource automatic morphological inflection to address the issue of data sparsity. However, the full implications of these techniques remain poorly understood. In this study, we aim to shed light on the theoretical aspects of the data augmentation strategy StemCorrupt, a method that generates synthetic examples by randomly substituting stem characters in existing gold standard training examples. Our analysis uncovers that StemCorrupt brings about fundamental changes in the underlying data distribution, revealing inherent compositional concatenative structure. To complement our theoretical analysis, we investigate the data-efficiency of StemCorrupt. Through evaluation across a diverse set of seven typologically distinct languages, we demonstrate that selecting a subset of datapoints with both high diversity and high predictive uncertainty significantly enhances the data-efficiency of compared to competitive baselines. Furthermore, we explore the impact of typological features on the choice of augmentation strategy and find that languages incorporating non-concatenativity, such as morphonological alternations, derive less benefit from synthetic examples with high predictive uncertainty. We attribute this effect to phonotactic violations induced by StemCorrupt, emphasizing the need for further research to ensure optimal performance across the entire spectrum of natural language morphology. 2023.emnlp-main.19 @@ -285,7 +285,7 @@ YifanDu KunZhou JinpengWang - XinZhao + XinZhao Ji-RongWen 292-305 Inspired by the superior language abilities of large language models (LLM), large vision-language models (LVLM) have been recently proposed by integrating powerful LLMs for improving the performance on complex multimodal tasks. Despite the promising progress on LVLMs, we find that they suffer from object hallucinations, i.e., they tend to generate objects inconsistent with the target images in the descriptions. To investigate it, this work presents the first systematic study on object hallucination of LVLMs. We conduct the evaluation experiments on several representative LVLMs, and show that they mostly suffer from severe object hallucination issues. We further discuss that the visual instructions may influence the hallucination, and find that: objects that frequently appear in the visual instructions or co-occur with the image objects are obviously prone to be hallucinated by LVLMs. Besides, we further design a polling-based query method called POPE for better evaluation of object hallucination. Experiment results show that our POPE can evaluate object hallucination in a more stable and flexible way. @@ -315,7 +315,7 @@ Parameter-efficient Tuning for Large Language Model without Calculating Its Gradients FeihuJin JiajunZhang - ChengqingZong + ChengqingZong 321-330 Fine-tuning all parameters of large language models (LLMs) requires significant computational resources and is time-consuming. Recent parameter-efficient tuning methods such as Adapter tuning, Prefix tuning, and LoRA allow for updating a small subset of parameters in large language models. However, they can only save approximately 30% of the training memory requirements, due to the problem that gradient computation and backpropagation are still necessary for these methods. This paper proposes a novel parameter-efficient tuning method for LLMs without calculating their gradients. Leveraging the discernible similarities between the parameter-efficient modules of the same task learned by both large and small language models, we put forward a strategy for transferring the parameter-efficient modules, originally derived from small language models to much larger ones. To ensure a smooth and effective adaptation process, we further introduce a Bridge model to guarantee dimensional consistency while also stimulating a dynamic interaction between the models. We demonstrate the effectiveness of our method using the T5 and GPT-2 series of language models on the SuperGLUE benchmark. Our method achieves comparable performance to both fine-tuning and parameter-efficient tuning on large language models without needing gradient-based optimization. Additionally, our method achieves up to 5.7x memory reduction compared to parameter-efficient tuning. 2023.emnlp-main.22 @@ -479,7 +479,7 @@ Selectively Answering Ambiguous Questions JeremyCole MichaelZhang - DanielGillick + DanielGillick JulianEisenschlos BhuwanDhingra JacobEisenstein @@ -520,7 +520,7 @@ Pragmatic Reasoning Unlocks Quantifier Semantics for Foundation Models YiyuanLi - RakeshMenon + RakeshMenon SayanGhosh ShashankSrivastava 573-591 @@ -705,7 +705,7 @@ PeterWest AlexanderKoller SwabhaSwayamdipta - NoahSmith + NoahSmith YejinChoi 790-807 Ambiguity is an intrinsic feature of natural language. Managing ambiguity is a key part of human language understanding, allowing us to anticipate misunderstanding as communicators and revise our interpretations as listeners. As language models are increasingly employed as dialogue interfaces and writing aids, handling ambiguous language is critical to their success. We capture ambiguity in a sentence through its effect on entailment relations with another sentence, and collect AmbiEnt, a linguist-annotated benchmark of 1,645 examples with diverse kinds of ambiguity. We design a suite of tests based on AmbiEnt, presenting the first evaluation of pretrained LMs to recognize ambiguity and disentangle possible meanings. We find that the task remains extremely challenging, including for GPT-4, whose generated disambiguations are considered correct only 32% of the time in crowdworker evaluation, compared to 90% for disambiguations in our dataset. Finally, to illustrate the value of ambiguity-sensitive tools, we show that a multilabel NLI model can flag political claims in the wild that are misleading due to ambiguity. We encourage the field to rediscover the importance of ambiguity for NLP. @@ -839,7 +839,7 @@ QiongkaiXu JunWang BenjaminRubinstein - TrevorCohn + TrevorCohn 953-967 Modern NLP models are often trained over large untrusted datasets, raising the potential for a malicious adversary to compromise model behaviour. For instance, backdoors can be implanted through crafting training instances with a specific textual trigger and a target label. This paper posits that backdoor poisoning attacks exhibit a spurious correlation between simple text features and classification labels, and accordingly, proposes methods for mitigating spurious correlation as means of defence. Our empirical study reveals that the malicious triggers are highly correlated to their target labels; therefore such correlations are extremely distinguishable compared to those scores of benign features, and can be used to filter out potentially problematic instances. Compared with several existing defences, our defence method significantly reduces attack success rates across backdoor attacks, and in the case of insertion-based attacks, our method provides a near-perfect defence. 2023.emnlp-main.60 @@ -859,7 +859,7 @@ YifengLu DennyZhou TengyuMa - QuocLe + QuocLe 968-979 We present symbol tuning - finetuning language models on in-context input-label pairs where natural language labels (e.g., “positive/negative sentiment”) are replaced with arbitrary symbols (e.g., “foo/bar”). Symbol tuning leverages the intuition that when a model cannot use instructions or natural language labels to figure out a task, it must instead do so by learning the input-label mappings. We experiment with symbol tuning across PaLM models up to 540B parameters and observe benefits across various settings. First, symbol tuning boosts performance on unseen in-context learning tasks and is much more robust to underspecified prompts, such as those without instructions or without natural language labels. Second, symbol-tuned models are much stronger at algorithmic reasoning tasks, with up to 18.2% better performance on the List Functions benchmark and up to 15.3% better performance on the Simple Turing Concepts benchmark. Finally, symbol-tuned models show large improvements in following flipped-labels presented in-context, meaning that they are more capable of using in-context information to override prior knowledge. 2023.emnlp-main.61 @@ -870,7 +870,7 @@ The neural dynamics of word recognition and integration JonGauthier - RogerLevy + RogerLevy 980-995 Listeners recognize and integrate words in rapid and noisy everyday speech by combining expectations about upcoming content with incremental sensory evidence. We present a computational model of word recognition which formalizes this perceptual process in Bayesian decision theory. We fit this model to explain scalp EEG signals recorded as subjects passively listened to a fictional story, revealing both the dynamics of the online auditory word recognition process and the neural correlates of the recognition and integration of words. The model reveals distinct neural processing of words depending on whether or not they can be quickly recognized. While all words trigger a neural response characteristic of probabilistic integration — voltage modulations predicted by a word’s surprisal in context — these modulations are amplified for words which require more than roughly 150 ms of input to be recognized. We observe no difference in the latency of these neural responses according to words’ recognition times. Our results support a two-part model of speech comprehension, combining an eager and rapid process of word recognition with a temporally independent process of word integration. However, we also developed alternative models of the scalp EEG signal not incorporating word recognition dynamics which showed similar performance improvements. We discuss potential future modeling steps which may help to separate these hypotheses. 2023.emnlp-main.62 @@ -1040,7 +1040,7 @@ RuizheChen XiangruTang YumoXu - DragomirRadev + DragomirRadev ArmanCohan 1157-1172 People primarily consult tables to conduct data analysis or answer specific questions. Text generation systems that can provide accurate table summaries tailored to users’ information needs can facilitate more efficient access to relevant data insights. Motivated by this, we define a new query-focused table summarization task, where text generation models have to perform human-like reasoning and analysis over the given table to generate a tailored summary. We introduce a new benchmark named QTSumm for this task, which contains 7,111 human-annotated query-summary pairs over 2,934 tables covering diverse topics. We investigate a set of strong baselines on QTSumm, including text generation, table-to-text generation, and large language models. Experimental results and manual analysis reveal that the new task presents significant challenges in table-to-text generation for future research. Moreover, we propose a new approach named ReFactor, to retrieve and reason over query-relevant information from tabular data to generate several natural language facts. Experimental results demonstrate that ReFactor can bring effective improvements to baselines by concatenating the generated facts to the model input. Our data and code are publicly available at https://github.com/yale-nlp/QTSumm. @@ -1133,7 +1133,7 @@ JiachengLiu WenyaWang DianzhuoWang - NoahSmith + NoahSmith YejinChoi HannanehHajishirzi 1264-1287 @@ -1234,7 +1234,7 @@ NicholasSuwono JustinChen TunHung - Ting-HaoHuang + Ting-HaoHuang I-BinLiao Yung-HuiLi Lun-WeiKu @@ -1287,7 +1287,7 @@ DonaldMetzler SlavPetrov NeilHoulsby - QuocLe + QuocLe MostafaDehghani 1471-1486 Scaling language models improves performance but comes with significant computational costs. This paper proposes UL2R, a method that substantially improves existing language models and their scaling curves with a relatively tiny amount of extra compute. The key idea is to continue training a state-of-the-art large language model on a few more steps with UL2’s mixture-of-denoiser objective. We show that, with almost negligible extra computational costs and no new sources of data, we are able to substantially improve the scaling properties of large language models on downstream metrics. In this paper, we continue training a baseline language model, PaLM, with ULR2, introducing a new set of models at 8B, 62B, and 540B scale which we call U-PaLM. Impressively, at 540B scale, we show an approximately 2x computational savings rate where U-PaLM achieves the same performance as the final PaLM 540B model at around half its computational budget (i.e., saving ~4.4 million TPUv4 hours). We further show that this improved scaling curve leads to “emergent abilities” on challenging BIG-Bench tasks—for instance, U-PaLM does much better on some tasks or demonstrates better quality at much smaller scale (62B as opposed to 540B). Overall, we show that U-PaLM outperforms PaLM on many few-shot setups, including reasoning tasks with chain-of-thought (e.g., GSM8K), multilingual tasks (MGSM, TydiQA), MMLU and challenging BIG-Bench tasks. @@ -1302,7 +1302,7 @@ TaiweiShi CalebZiems Min-YenKan - NancyChen + NancyChen ZhengyuanLiu DiyiYang 1487-1505 @@ -1347,7 +1347,7 @@ FuliFeng YixinCao JizhiZhang - Tat-SengChua + Tat-SengChua 1539-1554 Large Language Model (LLM) has demonstrated significant ability in various Natural Language Processing tasks. However, their effectiveness is highly dependent on the phrasing of the task prompt, leading to research on automatic prompt optimization using labeled task data. We reveal that these prompt optimization techniques are vulnerable to distribution shifts such as subpopulation shifts, which are common for LLMs in real-world scenarios such as customer reviews analysis. In this light, we propose a new problem of robust prompt optimization for LLMs against distribution shifts, which requires the prompt optimized over the labeled source group can simultaneously generalize to an unlabeled target group. To solve this problem, we propose Generalized Prompt Optimization framework , which incorporates the unlabeled data from the target group into prompt optimization. Extensive experimental results demonstrate the effectiveness of the proposed framework with significant performance improvement on the target group and comparable performance on the source group. 2023.emnlp-main.95 @@ -1509,7 +1509,7 @@ JinheonBaek SoyeongJeong MinkiKang - JongPark + JongPark SungHwang 1720-1736 Recent Language Models (LMs) have shown impressive capabilities in generating texts with the knowledge internalized in parameters. Yet, LMs often generate the factually incorrect responses to the given queries, since their knowledge may be inaccurate, incomplete, and outdated. To address this problem, previous works propose to augment LMs with the knowledge retrieved from an external knowledge source. However, such approaches often show suboptimal text generation performance due to two reasons: 1) the model may fail to retrieve the knowledge relevant to the given query, or 2) the model may not faithfully reflect the retrieved knowledge in the generated text. To overcome these, we propose to verify the output and the knowledge of the knowledge-augmented LMs with a separate verifier, which is a small LM that is trained to detect those two types of errors through instruction-finetuning. Then, when the verifier recognizes an error, we can rectify it by either retrieving new knowledge or generating new text. Further, we use an ensemble of the outputs from different instructions with a single verifier to enhance the reliability of the verification processes. We validate the effectiveness of the proposed verification steps on multiple question answering benchmarks, whose results show that the proposed verifier effectively identifies retrieval and generation errors, allowing LMs to provide more factually correct outputs. Our code is available at https://github.com/JinheonBaek/KALMV. @@ -1536,7 +1536,7 @@ Failures Pave the Way: Enhancing Large Language Models through Tuning-free Rule Accumulation ZeyuanYang PengLi - YangLiu + YangLiu 1751-1777 Large Language Models (LLMs) have showcased impressive performance. However, due to their inability to capture relationships among samples, these frozen LLMs inevitably keep repeating similar mistakes. In this work, we propose our Tuning-free Rule Accumulation (TRAN) framework, which guides LLMs in improving their performance by learning from previous mistakes. Considering data arrives sequentially, LLMs gradually accumulate rules from incorrect cases, forming a rule collection. These rules are then utilized by the LLMs to avoid making similar mistakes when processing subsequent inputs. Moreover, the rules remain independent of the primary prompts, seamlessly complementing prompt design strategies. Experimentally, we show that TRAN improves over recent baselines by a large margin. 2023.emnlp-main.109 @@ -1562,7 +1562,7 @@ CorbyRosset ArnoldOverwijk JiaweiHan - PaulBennett + PaulBennett 1796-1812 In this paper we improve the zero-shot generalization ability of language models via Mixture-Of-Memory Augmentation (MoMA), a mechanism that retrieves augmentation documents from multiple information corpora (external memories), with the option to “plug in” unseen memory at inference time. We develop a joint learning mechanism that trains the augmentation component with latent labels derived from the end retrieval task, paired with hard negatives from the memory mixture. We instantiate the model in a zero-shot dense retrieval setting by augmenting strong T5-based retrievers with MoMA. With only T5-base, our model obtains strong zero-shot retrieval accuracy on the eighteen tasks included in the standard BEIR benchmark, outperforming some systems with larger model sizes. As a plug-in-play model, our model can efficiently generalize to any unseen corpus, meanwhile achieving comparable or even better performance than methods relying on target-specific pretraining. Our analysis further illustrates the necessity of augmenting with mixture-of-memory for robust generalization, the benefits of augmentation learning, and how MoMA utilizes the plug-in memory at inference time without changing its parameters. Our code can be found at https://github.com/gesy17/MoMA. 2023.emnlp-main.111 @@ -1587,7 +1587,7 @@ Towards Example-Based <fixed-case>NMT</fixed-case> with Multi-<fixed-case>L</fixed-case>evenshtein Transformers MaximeBouthors - JosepCrego + JosepCrego FrançoisYvon 1830-1846 Retrieval-Augmented Machine Translation (RAMT) is attracting growing attention. This is because RAMT not only improves translation metrics, but is also assumed to implement some form of domain adaptation. In this contribution, we study another salient trait of RAMT, its ability to make translation decisions more transparent by allowing users to go back to examples that contributed to these decisions. For this, we propose a novel architecture aiming to increase this transparency. This model adapts a retrieval-augmented version of the Levenshtein Transformer and makes it amenable to simultaneously edit multiple fuzzy matches found in memory. We discuss how to perform training and inference in this model, based on multi-way alignment algorithms and imitation learning. Our experiments show that editing several examples positively impacts translation scores, notably increasing the number of target spans that are copied from existing instances. @@ -1601,7 +1601,7 @@ AfraAkyürek EricPan GarryKuwanto - DerryWijaya + DerryWijaya 1847-1861 Even the most advanced language models remain susceptible to errors necessitating to modify these models without initiating a comprehensive retraining process. Model editing refers to the modification of a model’s knowledge or representations in a manner that produces the desired outcomes. Prior research primarily centered around editing factual data e.g. “Messi plays for Inter Miami” confining the definition of an edit to a knowledge triplet i.e. (subject, object, relation). However, as the applications of language models expand, so do the diverse ways in which we wish to edit and refine their outputs. In this study, we broaden the scope of the editing problem to include an array of editing cases such as debiasing and rectifying reasoning errors and define an edit as any natural language expression that solicits a change in the model’s outputs. We are introducing DUnE, an editing benchmark where edits are natural language sentences and propose that DUnE presents a challenging yet relevant task. To substantiate this claim, we conduct an extensive series of experiments testing various editing approaches to address DUnE, demonstrating their respective strengths and weaknesses. We argue that retrieval-augmented language modeling can outperform specialized editing techniques and neither set of approaches has fully solved the generalized editing problem covered by our benchmark. 2023.emnlp-main.114 @@ -1755,7 +1755,7 @@ SriramGanapathy ShikharVashishth SarathChandar - ParthaTalukdar + ParthaTalukdar 2033-2045 Language Models (LMs) pre-trained with selfsupervision on large text corpora have become the default starting point for developing models for various NLP tasks. Once the pre-training corpus has been assembled, all data samples in the corpus are treated with equal importance during LM pre-training. However, due to varying levels of relevance and quality of data, equal importance to all the data samples may not be the optimal choice. While data reweighting has been explored in the context of task-specific supervised learning and LM fine-tuning, model-driven reweighting for pretraining data has not been explored. We fill this important gap and propose PRESENCE, a method for jointly reweighting samples by leveraging self-influence (SI) scores as an indicator of sample importance and pre-training. PRESENCE promotes novelty and stability for model pre-training. Through extensive analysis spanning multiple model sizes, datasets, and tasks, we present PRESENCE as an important first step in the research direction of sample reweighting for pre-training language models. 2023.emnlp-main.125 @@ -1766,7 +1766,7 @@ <fixed-case>ACTOR</fixed-case>: Active Learning with Annotator-specific Classification Heads to Embrace Human Label Variation XinpengWang - BarbaraPlank + BarbaraPlank 2046-2052 Label aggregation such as majority voting is commonly used to resolve annotator disagreement in dataset creation. However, this may disregard minority values and opinions. Recent studies indicate that learning from individual annotations outperforms learning from aggregated labels, though they require a considerable amount of annotation. Active learning, as an annotation cost-saving strategy, has not been fully explored in the context of learning from disagreement. We show that in the active learning setting, a multi-head model performs significantly better than a single-head model in terms of uncertainty estimation. By designing and evaluating acquisition functions with annotator-specific heads on two datasets, we show that group-level entropy works generally well on both datasets. Importantly, it achieves performance in terms of both prediction and uncertainty estimation comparable to full-scale training from disagreement, while saving 70% of the annotation budget. 2023.emnlp-main.126 @@ -1986,7 +1986,7 @@ A Diachronic Analysis of Paradigm Shifts in <fixed-case>NLP</fixed-case> Research: When, How, and Why? AniketPramanick YufangHou - SaifMohammad + SaifMohammad IrynaGurevych 2312-2326 Understanding the fundamental concepts and trends in a scientific field is crucial for keeping abreast of its continuous advancement. In this study, we propose a systematic framework for analyzing the evolution of research topics in a scientific field using causal discovery and inference techniques. We define three variables to encompass diverse facets of the evolution of research topics within NLP and utilize a causal discovery algorithm to unveil the causal connections among these variables using observational data. Subsequently, we leverage this structure to measure the intensity of these relationships. By conducting extensive experiments on the ACL Anthology corpus, we demonstrate that our framework effectively uncovers evolutionary trends and the underlying causes for a wide range of NLP research topics. Specifically, we show that tasks and methods are primary drivers of research in NLP, with datasets following, while metrics have minimal impact. @@ -2074,7 +2074,7 @@ Andrew M.Bean BertieVidgen PaulRöttger - Scott A.Hale + Scott A.Hale 2409-2430 Human feedback is increasingly used to steer the behaviours of Large Language Models (LLMs). However, it is unclear how to collect and incorporate feedback in a way that is efficient, effective and unbiased, especially for highly subjective human preferences and values. In this paper, we survey existing approaches for learning from human feedback, drawing on 95 papers primarily from the ACL and arXiv repositories. First, we summarise the past, pre-LLM trends for integrating human feedback into language models. Second, we give an overview of present techniques and practices, as well as the motivations for using feedback; conceptual frameworks for defining values and preferences; and how feedback is collected and from whom. Finally, we encourage a better future of feedback learning in LLMs by raising five unresolved conceptual and practical challenges. 2023.emnlp-main.148 @@ -2192,7 +2192,7 @@ <fixed-case>NAIL</fixed-case>: Lexical Retrieval Indices with Efficient Non-Autoregressive Decoders LivioSoares - DanielGillick + DanielGillick JeremyCole TomKwiatkowski 2574-2589 @@ -2335,7 +2335,7 @@ Indicative Summarization of Long Discussions ShahbazSyed DominikSchwabe - KhalidAl-Khatib + KhalidAl-Khatib MartinPotthast 2752-2788 Online forums encourage the exchange and discussion of different stances on many topics. Not only do they provide an opportunity to present one’s own arguments, but may also gather a broad cross-section of others’ arguments. However, the resulting long discussions are difficult to overview. This paper presents a novel unsupervised approach using large language models (LLMs) to generating indicative summaries for long discussions that basically serve as tables of contents. Our approach first clusters argument sentences, generates cluster labels as abstractive summaries, and classifies the generated cluster labels into argumentation frames resulting in a two-level summary. Based on an extensively optimized prompt engineering approach, we evaluate 19 LLMs for generative cluster labeling and frame classification. To evaluate the usefulness of our indicative summaries, we conduct a purpose-driven user study via a new visual interface called **Discussion Explorer**: It shows that our proposed indicative summaries serve as a convenient navigation tool to explore long discussions. @@ -2396,7 +2396,7 @@ ShaLi QiusiZhan KathrynConger - MarthaPalmer + MarthaPalmer HengJi JiaweiHan 2823-2838 @@ -2409,7 +2409,7 @@ Hierarchical Pretraining on Multimodal Electronic Health Records XiaochenWang - JunyuLuo + JunyuLuo JiaqiWang ZiyiYin SuhanCui @@ -2455,7 +2455,7 @@ WeilongDong ShuangzhiWu ChaoBian - DeyiXiong + DeyiXiong 2875-2886 Pretrained language models have learned a vast amount of human knowledge from large-scale corpora, but their powerful memorization capability also brings the risk of data leakage. Some risks may only be discovered after the model training is completed, such as the model memorizing a specific phone number and frequently outputting it. In such cases, model developers need to eliminate specific data influences from the model to mitigate legal and ethical penalties. To effectively mitigate these risks, people often have to spend a significant amount of time and computational costs to retrain new models instead of finding ways to cure the ‘sick’ models. Therefore, we propose a method to locate and erase risky neurons in order to eliminate the impact of privacy data in the model. We use a new method based on integrated gradients to locate neurons associated with privacy texts, and then erase these neurons by setting their activation values to zero.Furthermore, we propose a risky neuron aggregation method to eliminate the influence of privacy data in the model in batches. Experimental results show that our method can effectively and quickly eliminate the impact of privacy data without affecting the model’s performance. Additionally, we demonstrate the relationship between model memorization and neurons through experiments, further illustrating the robustness of our method. 2023.emnlp-main.174 @@ -2536,7 +2536,7 @@ <fixed-case>C</fixed-case>omb<fixed-case>LM</fixed-case>: Adapting Black-Box Language Models through Small Fine-Tuned Models AitorOrmazabal MikelArtetxe - EnekoAgirre + EnekoAgirre 2961-2974 Methods for adapting language models (LMs) to new tasks and domains have traditionally assumed white-box access to the model, and work by modifying its parameters. However, this is incompatible with a recent trend in the field, where the highest quality models are only available as black-boxes through inference APIs. Even when the model weights are available, the computational cost of fine-tuning large LMs can be prohibitive for most practitioners. In this work, we present a lightweight method for adapting large LMs to new domains and tasks, assuming no access to their weights or intermediate activations. Our approach fine-tunes a small white-box LM and combines it with the large black-box LM at the probability level through a small network, learned on a small validation set. We validate our approach by adapting a large LM (OPT-30B) to several domains and a downstream task (machine translation), observing improved performance in all cases, of up to 9%, while using a domain expert 23x smaller. 2023.emnlp-main.180 @@ -2570,7 +2570,7 @@ TuNguyen JadeCopet GabrielSynnaeve - BenoîtSagot + BenoîtSagot EmmanuelDupoux 3008-3028 In NLP, text language models based on words or subwords are known to outperform their character-based counterparts. Yet, in the speech community, the standard input of spoken LMs are 20ms or 40ms-long discrete units (shorter than a phoneme). Taking inspiration from word-based LM, we introduce a Generative Spoken Language Model (GSLM) based on word-size continuous-valued audio tokens that can generate diverse and expressive language output. This is obtained by replacing lookup table for lexical types with a Lexical Embedding function, the cross entropy loss by a contrastive loss, and multinomial sampling by k-NN sampling. The resulting model is the first generative language model based on word-size continuous tokens. Its performance is on par with discrete unit GSLMs regarding generation quality as measured by automatic metrics and subjective human judgements. Moreover, it is five times more memory efficient thanks to its large 200ms units. In addition, the embeddings before and after the Lexical Embedder are phonetically and semantically interpretable. @@ -2659,7 +2659,7 @@ DanielaTeodorescu TiffanyCheng AlonaFyshe - SaifMohammad + SaifMohammad 3117-3133 Research in psychopathology has shown that, at an aggregate level, the patterns of emotional change over time—emotion dynamics—are indicators of one’s mental health. One’s patterns of emotion change have traditionally been determined through self-reports of emotions; however, there are known issues with accuracy, bias, and convenience. Recent approaches to determining emotion dynamics from one’s everyday utterances, addresses many of these concerns, but it is not yet known whether these measures of utterance emotion dynamics (UED) correlate with mental health diagnoses. Here, for the first time, we study the relationship between tweet emotion dynamics and mental health disorders. We find that each of the UED metrics studied varied by the user’s self-disclosed diagnosis. For example: average valence was significantly higher (i.e., more positive text) in the control group compared to users with ADHD, MDD, and PTSD. Valence variability was significantly lower in the control group compared to ADHD, depression, bipolar disorder, MDD, PTSD, and OCD but not PPD. Rise and recovery rates of valence also exhibited significant differences from the control. This work provides important early evidence for how linguistic cues pertaining to emotion dynamics can play a crucial role as biosocial markers for mental illnesses and aid in the understanding, diagnosis, and management of mental health disorders. 2023.emnlp-main.188 @@ -2753,7 +2753,7 @@ ShikharMurty PratyushaSharma JacobAndreas - ChristopherManning + ChristopherManning 3233-3247 Recursion is a prominent feature of human language, and fundamentally challenging for self-attention due to the lack of an explicit recursive-state tracking mechanism. Consequently, Transformer language models poorly capture long-tail recursive structure and exhibit sample-inefficient syntactic generalization. This work introduces Pushdown Layers, a new self-attention layer that models recursive state via a stack tape that tracks estimated depths of every token in an incremental parse of the observed prefix. Transformer LMs with Pushdown Layers are syntactic language models that autoregressively and synchronously update this stack tape as they predict new tokens, in turn using the stack tape to softly modulate attention over tokens—for instance, learning to “skip” over closed constituents. When trained on a corpus of strings annotated with silver constituency parses, Transformers equipped with Pushdown Layers achieve dramatically better and 3-5x more sample-efficient syntactic generalization, while maintaining similar perplexities. Pushdown Layers are a drop-in replacement for standard self-attention. We illustrate this by finetuning GPT2-medium with Pushdown Layers on an automatically parsed WikiText-103, leading to improvements on several GLUE text classification tasks. 2023.emnlp-main.195 @@ -2803,7 +2803,7 @@ WenhaoShi LeiWang YangYang - See-KiongNg + See-KiongNg HengShen 3290-3301 Existing MWP solvers employ sequence or binary tree to present the solution expression and decode it from given problem description. However, such structures fail to handle the variants that can be derived via mathematical manipulation, e.g., (a_1+a_2)*a_3 and a_1 * a_3+a_2 * a_3 can both be possible valid solutions for a same problem but formulated as different expression sequences or trees. The multiple solution variants depicting different possible solving procedures for the same input problem would raise two issues: 1) making it hard for the model to learn the mapping function between the input and output spaces effectively, and 2) wrongly indicating wrong when evaluating a valid expression variant. To address these issues, we introduce a unified tree structure to present a solution expression, where the elements are permutable and identical for all the expression variants. We propose a novel non-autoregressive solver, named MWP-NAS, to parse the problem and deduce the solution expression based on the unified tree. For evaluating the possible expression variants, we design a path-based metric to evaluate the partial accuracy of expressions of a unified tree. The results from extensive experiments conducted on Math23K and MAWPS demonstrate the effectiveness of our proposed MWP-NAS. The codes and checkpoints are available at: https://github.com/mengqunhan/MWP-NAS. @@ -2832,11 +2832,11 @@ EleftheriaBriakou AmandaLiu ConnorBaumler - ClaireBonial + ClaireBonial JeffreyMicher - ClareVoss + ClareVoss MarineCarpuat - HalDaumé III + HalDaumé III 3313-3330 NLP systems have shown impressive performance at answering questions by retrieving relevant context. However, with the increasingly large models, it is impossible and often undesirable to constrain models’ knowledge or reasoning to only the retrieved context. This leads to a mismatch between the information that the models access to derive the answer and the information that is available to the user to assess the model predicted answer. In this work, we study how users interact with QA systems in the absence of sufficient information to assess their predictions. Further, we ask whether adding the requisite background helps mitigate users’ over-reliance on predictions. Our study reveals that users rely on model predictions even in the absence of sufficient information needed to assess the model’s correctness. Providing the relevant background, however, helps users better catch model errors, reducing over-reliance on incorrect predictions. On the flip side, background information also increases users’ confidence in their accurate as well as inaccurate judgments. Our work highlights that supporting users’ verification of QA predictions is an important, yet challenging, problem. 2023.emnlp-main.201 @@ -2848,7 +2848,7 @@ <fixed-case>GROOV</fixed-case>i<fixed-case>ST</fixed-case>: A Metric for Grounding Objects in Visual Storytelling Aditya KSurikuchi SandroPezzelle - RaquelFernández + RaquelFernández 3331-3339 A proper evaluation of stories generated for a sequence of images—the task commonly referred to as visual storytelling—must consider multiple aspects, such as coherence, grammatical correctness, and visual grounding. In this work, we focus on evaluating the degree of grounding, that is, the extent to which a story is about the entities shown in the images. We analyze current metrics, both designed for this purpose and for general vision-text alignment. Given their observed shortcomings, we propose a novel evaluation tool, GROOViST, that accounts for cross-modal dependencies, temporal misalignments (the fact that the order in which entities appear in the story and the image sequence may not match), and human intuitions on visual grounding. An additional advantage of GROOViST is its modular design, where the contribution of each component can be assessed and interpreted individually. 2023.emnlp-main.202 @@ -2944,7 +2944,7 @@ BoweiZou YifanFan YanlingLi - Ai TiAw + Ai TiAw YuHong 3435-3446 Conversational Question Answering (CQA) aims to provide natural language answers to users in information-seeking dialogues. Existing CQA benchmarks often evaluate models using pre-collected human-human conversations. However, replacing the model-predicted dialogue history with ground truth compromises the naturalness and sustainability of CQA evaluation. While previous studies proposed using predicted history and rewriting techniques to address unresolved coreferences and incoherencies, this approach renders the question self-contained from the conversation. In this paper, we propose a novel automatic evaluation approach, interview evaluation. Specifically, ChatGPT acts as the interviewer (Q agent) with a set of carefully designed prompts, and the CQA model under test serves as the interviewee (A agent). During the interview evaluation, questions are dynamically generated by the Q agent to guide the A agent in predicting the correct answer through an interactive process. We evaluated four different models on QuAC and two models on CoQA in our experiments. The experiment results demonstrate that our interview evaluation has advantages over previous CQA evaluation approaches, particularly in terms of naturalness and coherence. The source code is made publicly available. @@ -2998,7 +2998,7 @@ <fixed-case>T</fixed-case>o<fixed-case>V</fixed-case>i<fixed-case>L</fixed-case>a<fixed-case>G</fixed-case>: Your Visual-Language Generative Model is Also An Evildoer - XinpengWang + XinpengWang XiaoyuanYi HanJiang ShanlinZhou @@ -3077,7 +3077,7 @@ JesusSalcido TéaWright ElianaColunga - Katharinavon der Wense + Katharinavon der Wense 3588-3598 With recent advances in large language models (LLMs), the concept of automatically generating children’s educational materials has become increasingly realistic. Working toward the goal of age-appropriate simplicity in generated educational texts, we first examine the ability of several popular LLMs to generate stories with properly adjusted lexical and readability levels. We find that, in spite of the growing capabilities of LLMs, they do not yet possess the ability to limit their vocabulary to levels appropriate for younger age groups. As a second experiment, we explore the ability of state-of-the-art lexical simplification models to generalize to the domain of children’s stories and, thus, create an efficient pipeline for their automatic generation. In order to test these models, we develop a dataset of child-directed lexical simplification instances, with examples taken from the LLM-generated stories in our first experiment. We find that, while the strongest-performing current lexical simplification models do not perform as well on material designed for children due to their reliance on large language models behind the scenes, some models that still achieve fairly strong results on general data can mimic or even improve their performance on children-directed data with proper fine-tuning, which we conduct using our newly created child-directed simplification dataset. 2023.emnlp-main.218 @@ -3130,7 +3130,7 @@ Retrofitting Light-weight Language Models for Emotions using Supervised Contrastive Learning SapanShah SreedharReddy - PushpakBhattacharyya + PushpakBhattacharyya 3640-3654 We present a novel retrofitting method to induce emotion aspects into pre-trained language models (PLMs) such as BERT and RoBERTa. Our method updates pre-trained network weights using contrastive learning so that the text fragments exhibiting similar emotions are encoded nearby in the representation space, and the fragments with different emotion content are pushed apart. While doing so, it also ensures that the linguistic knowledge already present in PLMs is not inadvertently perturbed. The language models retrofitted by our method, i.e., BERTEmo and RoBERTaEmo, produce emotion-aware text representations, as evaluated through different clustering and retrieval metrics. For the downstream tasks on sentiment analysis and sarcasm detection, they perform better than their pre-trained counterparts (about 1% improvement in F1-score) and other existing approaches. Additionally, a more significant boost in performance is observed for the retrofitted models over pre-trained ones in few-shot learning setting. 2023.emnlp-main.222 @@ -3182,7 +3182,7 @@ Language Representation Projection: Can We Transfer Factual Knowledge across Languages in Multilingual Language Models? ShaoyangXu JunzhuoLi - DeyiXiong + DeyiXiong 3692-3702 Multilingual pretrained language models serve as repositories of multilingual factual knowledge. Nevertheless, a substantial performance gap of factual knowledge probing exists between high-resource languages and low-resource languages, suggesting limited implicit factual knowledge transfer across languages in multilingual pretrained language models. This paper investigates the feasibility of explicitly transferring relatively rich factual knowledge from English to non-English languages. To accomplish this, we propose two parameter-free \textbf{L}anguage \textbf{R}epresentation \textbf{P}rojection modules (LRP2). The first module converts non-English representations into English-like equivalents, while the second module reverts English-like representations back into representations of the corresponding non-English language. Experimental results on the mLAMA dataset demonstrate that LRP2 significantly improves factual knowledge retrieval accuracy and facilitates knowledge transferability across diverse non-English languages. We further investigate the working mechanism of LRP2 from the perspectives of representation space and cross-lingual knowledge neuron. 2023.emnlp-main.226 @@ -3207,7 +3207,7 @@ <fixed-case>R</fixed-case>easoning<fixed-case>LM</fixed-case>: Enabling Structural Subgraph Reasoning in Pre-trained Language Models for Question Answering over Knowledge Graph JinhaoJiang KunZhou - XinZhao + XinZhao YaliangLi Ji-RongWen 3721-3735 @@ -3321,7 +3321,7 @@ Lost in Translation, Found in Spans: Identifying Claims in Multilingual Social Media ShubhamMittal MeghaSundriyal - PreslavNakov + PreslavNakov 3887-3902 Claim span identification (CSI) is an important step in fact-checking pipelines, aiming to identify text segments that contain a check-worthy claim or assertion in a social media post. Despite its importance to journalists and human fact-checkers, it remains a severely understudied problem, and the scarce research on this topic so far has only focused on English. Here we aim to bridge this gap by creating a novel dataset, X-CLAIM, consisting of 7K real-world claims collected from numerous social media platforms in five Indian languages and English. We report strong baselines with state-of-the-art encoder-only language models (e.g., XLM-R) and we demonstrate the benefits of training on multiple languages over alternative cross-lingual transfer methods such as zero-shot transfer, or training on translated data, from a high-resource language such as English. We evaluate generative large language models from the GPT series using prompting methods on the X-CLAIM dataset and we find that they underperform the smaller encoder-only language models for low-resource languages. 2023.emnlp-main.236 @@ -3337,7 +3337,7 @@ JiaxiWu VeronikaWirtz TraciHong - DerryWijaya + DerryWijaya 3903-3915 This paper introduces a multilingual dataset of COVID-19 vaccine misinformation, consisting of annotated tweets from three middle-income countries: Brazil, Indonesia, and Nigeria. The expertly curated dataset includes annotations for 5,952 tweets, assessing their relevance to COVID-19 vaccines, presence of misinformation, and the themes of the misinformation. To address challenges posed by domain specificity, the low-resource setting, and data imbalance, we adopt two approaches for developing COVID-19 vaccine misinformation detection models: domain-specific pre-training and text augmentation using a large language model. Our best misinformation detection models demonstrate improvements ranging from 2.7 to 15.9 percentage points in macro F1-score compared to the baseline models. Additionally, we apply our misinformation detection models in a large-scale study of 19 million unlabeled tweets from the three countries between 2020 and 2022, showcasing the practical application of our dataset and models for detecting and analyzing vaccine misinformation in multiple countries and languages. Our analysis indicates that percentage changes in the number of new COVID-19 cases are positively associated with COVID-19 vaccine misinformation rates in a staggered manner for Brazil and Indonesia, and there are significant positive associations between the misinformation rates across the three countries. 2023.emnlp-main.237 @@ -3362,7 +3362,7 @@ SandraSandoval JieyuZhao MarineCarpuat - HalDaumé III + HalDaumé III 3933-3945 We ask the question: Are there widespread disparities in machine translations of names across race/ethnicity, and gender? We hypothesize that the translation quality of names and surrounding context will be lower for names associated with US racial and ethnic minorities due to these systems’ tendencies to standardize language to predominant language patterns. We develop a dataset of names that are strongly demographically aligned and propose a translation evaluation procedure based on round-trip translation. We analyze the effect of name demographics on translation quality using generalized linear mixed effects models and find that the ability of translation systems to correctly translate female-associated names is significantly lower than male-associated names. This effect is particularly pronounced for female-associated names that are also associated with racial (Black) and ethnic (Hispanic) minorities. This disparity in translation quality between social groups for something as personal as someone’s name has significant implications for people’s professional, personal, and cultural identities, self-worth and ease of communication. Our findings suggest that more MT research is needed to improve the translation of names and to provide high-quality service for users regardless of gender, race, and ethnicity. 2023.emnlp-main.239 @@ -3374,7 +3374,7 @@ Investigating Efficiently Extending Transformers for Long Input Summarization JasonPhang YaoZhao - PeterLiu + PeterLiu 3946-3961 While large pretrained Transformer models have proven highly capable at tackling natural language tasks, handling long sequence inputs still poses a significant challenge. One such task is long input summarization, where inputs are longer than the maximum input context of most models. Through an extensive set of experiments, we investigate what model architectural changes and pretraining paradigms most efficiently adapt a pretrained Transformer for long input summarization. We find that a staggered, block-local Transformer with global encoder tokens strikes a good balance of performance and efficiency, and that an additional pretraining phase on long sequences meaningfully improves downstream summarization performance. Based on our findings, we introduce PEGASUS-X, an extension of the PEGASUS model with additional long input pretraining to handle inputs of up to 16K tokens, which achieves strong performance on long input summarization tasks comparable with much larger models. 2023.emnlp-main.240 @@ -3388,7 +3388,7 @@ LinhaoYu MinghuiXu RenrenJin - DeyiXiong + DeyiXiong 3962-3979 Spoken texts (either manual or automatic transcriptions from automatic speech recognition (ASR)) often contain disfluencies and grammatical errors, which pose tremendous challenges to downstream tasks. Converting spoken into written language is hence desirable. Unfortunately, the availability of datasets for this is limited. To address this issue, we present CS2W, a Chinese Spoken-to-Written style conversion dataset comprising 7,237 spoken sentences extracted from transcribed conversational texts. Four types of conversion problems are covered in CS2W: disfluencies, grammatical errors, ASR transcription errors, and colloquial words. Our annotation convention, data, and code are publicly available at https://github.com/guozishan/CS2W. 2023.emnlp-main.241 @@ -3486,7 +3486,7 @@ PaoloRosso RobertLitschko GoranGlavaš - SimonePonzetto + SimonePonzetto 4069-4085 Cross-lingual transfer learning from high-resource to medium and low-resource languages has shown encouraging results. However, the scarcity of resources in target languages remains a challenge. In this work, we resort to data augmentation and continual pre-training for domain adaptation to improve cross-lingual abusive language detection. For data augmentation, we analyze two existing techniques based on vicinal risk minimization and propose MIXAG, a novel data augmentation method which interpolates pairs of instances based on the angle of their representations. Our experiments involve seven languages typologically distinct from English and three different domains. The results reveal that the data augmentation strategies can enhance few-shot cross-lingual abusive language detection. Specifically, we observe that consistently in all target languages, MIXAG improves significantly in multidomain and multilingual environments. Finally, we show through an error analysis how the domain adaptation can favour the class of abusive texts (reducing false negatives), but at the same time, declines the precision of the abusive language detection model. 2023.emnlp-main.248 @@ -3499,7 +3499,7 @@ JunfengJiang ChengzhangDong SadaoKurohashi - AkikoAizawa + AkikoAizawa 4086-4101 Dialogue segmentation is a crucial task for dialogue systems allowing a better understanding of conversational texts. Despite recent progress in unsupervised dialogue segmentation methods, their performances are limited by the lack of explicit supervised signals for training. Furthermore, the precise definition of segmentation points in conversations still remains as a challenging problem, increasing the difficulty of collecting manual annotations. In this paper, we provide a feasible definition of dialogue segmentation points with the help of document-grounded dialogues and release a large-scale supervised dataset called SuperDialseg, containing 9,478 dialogues based on two prevalent document-grounded dialogue corpora, and also inherit their useful dialogue-related annotations. Moreover, we provide a benchmark including 18 models across five categories for the dialogue segmentation task with several proper evaluation metrics. Empirical studies show that supervised learning is extremely effective in in-domain datasets and models trained on SuperDialseg can achieve good generalization ability on out-of-domain data. Additionally, we also conducted human verification on the test set and the Kappa score confirmed the quality of our automatically constructed dataset. We believe our work is an important step forward in the field of dialogue segmentation. 2023.emnlp-main.249 @@ -3772,7 +3772,7 @@ Meta-Learning Online Adaptation of Language Models NathanHu EricMitchell - ChristopherManning + ChristopherManning ChelseaFinn 4418-4432 Large language models encode impressively broad world knowledge in their parameters. However, the knowledge in static language models falls out of date, limiting the model’s effective “shelf life.” While online fine-tuning can reduce this degradation, we find that naively fine-tuning on a stream of documents leads to a low level of information uptake. We hypothesize that online fine-tuning does not sufficiently attend to important information. That is, the gradient signal from important tokens representing factual information is drowned out by the gradient from inherently noisy tokens, suggesting that a dynamic, context-aware learning rate may be beneficial. We therefore propose learning which tokens to upweight. We meta-train a small, autoregressive model to reweight the language modeling loss for each token during online fine-tuning, with the objective of maximizing the out-of-date base question-answering model’s ability to answer questions about a document after a single weighted gradient step. We call this approach Context-aware Meta-learned Loss Scaling (CaMeLS). Across three different distributions of documents, our experiments find that CaMeLS provides substantially improved information uptake on streams of thousands of documents compared with standard fine-tuning and baseline heuristics for reweighting token losses. @@ -3804,7 +3804,7 @@ WeixinCai YizheZhang JianfengGao - BillDolan + BillDolan 4450-4468 Users interact with text, image, code, or other editors on a daily basis. However, machine learning models are rarely trained in the settings that reflect the interactivity between users and their editor. This is understandable as training AI models with real users is not only slow and costly, but what these models learn may be specific to user interface design choices. Unfortunately, this means most of the research on text, code, and image generation has focused on non-interactive settings, whereby the model is expected to get everything right without accounting for any input from a user who may be willing to help. We introduce a new Interactive Text Generation task that allows training generation models interactively without the costs of involving real users, by using user simulators that provide edits that guide the model towards a given target text. We train our interactive models using Imitation Learning, and our experiments against competitive non-interactive generation models show that models trained interactively are superior to their non-interactive counterparts, even when all models are given the same budget of user inputs or edits. 2023.emnlp-main.270 @@ -3916,8 +3916,8 @@ RatishPuduppully AnoopKunchukuttan RajDabre - Ai TiAw - NancyChen + Ai TiAw + NancyChen 4586-4602 This study investigates machine translation between related languages i.e., languages within the same family that share linguistic characteristics such as word order and lexical similarity. Machine translation through few-shot prompting leverages a small set of translation pair examples to generate translations for test sentences. This procedure requires the model to learn how to generate translations while simultaneously ensuring that token ordering is maintained to produce a fluent and accurate translation. We propose that for related languages, the task of machine translation can be simplified by leveraging the monotonic alignment characteristic of such languages. We introduce DecoMT, a novel approach of few-shot prompting that decomposes the translation process into a sequence of word chunk translations. Through automatic and human evaluation conducted on multiple related language pairs across various language families, we demonstrate that our proposed approach of decomposed prompting surpasses multiple established few-shot baseline approaches. For example, DecoMT outperforms the strong few-shot prompting BLOOM model with an average improvement of 8 chrF++ scores across the examined languages. 2023.emnlp-main.279 @@ -3943,7 +3943,7 @@ XiaoleiWang XinZhou QiZhang - XuanjingHuang + XuanjingHuang 4616-4630 Recently, many studies have illustrated the robustness problem of Named Entity Recognition (NER) systems: the NER models often rely on superficial entity patterns for predictions, without considering evidence from the context. Consequently, even state-of-the-art NER models generalize poorly to out-of-domain scenarios when out-of-distribution (OOD) entity patterns are introduced. Previous research attributes the robustness problem to the existence of NER dataset bias, where simpler and regular entity patterns induce shortcut learning. In this work, we bring new insights into this problem by comprehensively investigating the NER dataset bias from a dataset difficulty view. We quantify the entity-context difficulty distribution in existing datasets and explain their relationship with model robustness. Based on our findings, we explore three potential ways to de-bias the NER datasets by altering entity-context distribution, and we validate the feasibility with intensive experiments. Finally, we show that the de-biased datasets can transfer to different models and even benefit existing model-based robustness-improving methods, indicating that building more robust datasets is fundamental for building more robust NER systems. 2023.emnlp-main.281 @@ -3957,7 +3957,7 @@ HeikeAdel LukasLange JannikStrötgen - HinrichSchuetze + HinrichSchuetze 4631-4646 Most languages of the world pose low-resource challenges to natural language processing models. With multilingual training, knowledge can be shared among languages. However, not all languages positively influence each other and it is an open research question how to select the most suitable set of languages for multilingual training and avoid negative interference among languages whose characteristics or data distributions are not compatible. In this paper, we propose GradSim, a language grouping method based on gradient similarity. Our experiments on three diverse multilingual benchmark datasets show that it leads to the largest performance gains compared to other similarity measures and it is better correlated with cross-lingual model performance. As a result, we set the new state of the art on AfriSenti, a benchmark dataset for sentiment analysis on low-resource African languages. In our extensive analysis, we further reveal that besides linguistic features, the topics of the datasets play an important role for language grouping and that lower layers of transformer models encode language-specific features while higher layers capture task-specific information. 2023.emnlp-main.282 @@ -4069,7 +4069,7 @@ AmandaCercas Curry TanviDinkar VerenaRieser - ZeerakTalat + ZeerakTalat 4776-4790 Automated dialogue or conversational systems are anthropomorphised by developers and personified by users. While a degree of anthropomorphism is inevitable, conscious and unconscious design choices can guide users to personify them to varying degrees. Encouraging users to relate to automated systems as if they were human can lead to transparency and trust issues, and high risk scenarios caused by over-reliance on their outputs. As a result, natural language processing researchers have investigated the factors that induce personification and develop resources to mitigate such effects. However, these efforts are fragmented, and many aspects of anthropomorphism have yet to be explored. In this paper, we discuss the linguistic factors that contribute to the anthropomorphism of dialogue systems and the harms that can arise thereof, including reinforcing gender stereotypes and conceptions of acceptable language. We recommend that future efforts towards developing dialogue systems take particular care in their design, development, release, and description; and attend to the many linguistic cues that can elicit personification by users. 2023.emnlp-main.290 @@ -4098,7 +4098,7 @@ JaehyungSeo SugyeongEo HyeonseokMoon - HeuiseokLim + HeuiseokLim 4798-4815 Automatic Speech Recognition (ASR) systems are instrumental across various applications, with their performance being critically tied to user satisfaction. Conventional evaluation metrics for ASR systems produce a singular aggregate score, which is insufficient for understanding specific system vulnerabilities. Therefore, we aim to address the limitations of the previous ASR evaluation methods by introducing the Korean Error Explainable Benchmark Dataset for ASR and Post-processing (KEBAP). KEBAP enables comprehensive analysis of ASR systems at both speech- and text levels, thereby facilitating a more balanced assessment encompassing speech recognition accuracy and user readability. KEBAP provides 37 newly defined speech-level resources incorporating diverse noise environments and speaker characteristics categories, also presenting 13 distinct text-level error types. This paper demonstrates detailed statistical analyses of colloquial noise categories and textual error types. Furthermore, we conduct extensive validation and analysis on commercially deployed ASR systems, providing valuable insights into their performance. As a more fine-grained and real-world-centric evaluation method, KEBAP contributes to identifying and mitigating potential weaknesses in ASR systems. 2023.emnlp-main.292 @@ -4127,7 +4127,7 @@ XinyuChen ShengXu PeifengLi - QiaomingZhu + QiaomingZhu 4833-4843 Cross-document event coreference resolution (CD-ECR) is a task of clustering event mentions across multiple documents that refer to the same real-world events. Previous studies usually model the CD-ECR task as a pairwise similarity comparison problem by using different event mention features, and consider the highly similar event mention pairs in the same cluster as coreferent. In general, most of them only consider the local context of event mentions and ignore their implicit global information, thus failing to capture the interactions of long-distance event mentions. To address the above issue, we regard discourse structure as global information to further improve CD-ECR. First, we use a discourse rhetorical structure constructor to construct tree structures to represent documents. Then, we obtain shortest dependency paths from the tree structures to represent interactions between event mention pairs. Finally, we feed the above information to a multi-layer perceptron to capture the similarities of event mention pairs for resolving coreferent events. Experimental results on the ECB+ dataset show that our proposed model outperforms several baselines and achieves the competitive performance with the start-of-the-art baselines. 2023.emnlp-main.294 @@ -4145,7 +4145,7 @@ JungwooLim HyeonseokMoon KisuYang - HeuiseokLim + HeuiseokLim 4844-4861 Despite the striking advances in recent language generation performance, model-generated responses have suffered from the chronic problem of hallucinations that are either untrue or unfaithful to a given source. Especially in the task of knowledge grounded conversation, the models are required to generate informative responses, but hallucinated utterances lead to miscommunication. In particular, entity-level hallucination that causes critical misinformation and undesirable conversation is one of the major concerns. To address this issue, we propose a post-hoc refinement method called REM. It aims to enhance the quality and faithfulness of hallucinated utterances by refining them based on the source knowledge. If the generated utterance has a low source-faithfulness score with the given knowledge, REM mines the key entities in the knowledge and implicitly uses them for refining the utterances. We verify that our method reduces entity hallucination in the utterance. Also, we show the adaptability and efficacy of REM with extensive experiments and generative results. Our code is available at https://github.com/YOONNAJANG/REM. 2023.emnlp-main.295 @@ -4161,7 +4161,7 @@ YuxuanFan ZhiyongWu JingjingXu - BaobaoChang + BaobaoChang 4862-4876 Previous studies have shown that large language models (LLMs) like GPTs store massive factual knowledge in their parameters. However, the stored knowledge could be false or outdated. Traditional knowledge editing methods refine LLMs via fine-tuning on texts containing specific knowledge. However, with the increasing scales of LLMs, these gradient-based approaches bring large computation costs. The trend of model-as-a-service also makes it impossible to modify knowledge in black-box LMs. Inspired by in-context learning (ICL), a new paradigm based on demonstration contexts without parameter updating, we explore whether ICL can edit factual knowledge. To answer this question, we give a comprehensive empirical study of ICL strategies. Experiments show that in-context knowledge editing (IKE), without any gradient and parameter updating, achieves a competitive success rate compared to gradient-based methods on GPT-J (6B) but with much fewer side effects, including less over-editing on similar but unrelated facts and less knowledge forgetting on previously stored knowledge. We also apply the method to larger LMs with tens or hundreds of parameters like OPT-175B, which shows the scalability of our method. The code is available at https://github.com/pkunlp-icler/IKE. 2023.emnlp-main.296 @@ -4267,7 +4267,7 @@ ShaolinZhu JunXie BaosongYang - DeyiXiong + DeyiXiong 4978-4990 Mixture-of-Experts (MoE) based sparse architectures can significantly increase model capacity with sublinear computational overhead, which are hence widely used in massively multilingual neural machine translation (MNMT). However, they are prone to overfitting on low-resource language translation. In this paper, we propose a modularized MNMT framework that is able to flexibly assemble dense and MoE-based sparse modules to achieve the best of both worlds. The training strategy of the modularized MNMT framework consists of three stages: (1) Pre-training basic MNMT models with different training objectives or model structures, (2) Initializing modules of the framework with pre-trained couterparts (e.g., encoder, decoder and embedding layers) from the basic models and (3) Fine-tuning the modularized MNMT framework to fit modules from different models together. We pre-train three basic MNMT models from scratch: a dense model, an MoE-based sparse model and a new MoE model, termed as MoE-LGR that explores multiple Language-Group-specifc Routers to incorporate language group knowledge into MNMT. The strengths of these pre-trained models are either on low-resource language translation, high-resource language translation or zero-shot translation. Our modularized MNMT framework attempts to incorporate these advantages into a single model with reasonable initialization and fine-tuning. Experiments on widely-used benchmark datasets demonstrate that the proposed modularized MNMT framwork substantially outperforms both MoE and dense models on high- and low-resource language translation as well as zero-shot translation. Our framework facilitates the combination of different methods with their own strengths and recycling off-the-shelf models for multilingual neural machine translation. Codes are available at https://github.com/lishangjie1/MMNMT. 2023.emnlp-main.303 @@ -4304,7 +4304,7 @@ Prompting is not a substitute for probability measurements in large language models JenniferHu - RogerLevy + RogerLevy 5040-5060 Prompting is now a dominant method for evaluating the linguistic knowledge of large language models (LLMs). While other methods directly read out models’ probability distributions over strings, prompting requires models to access this internal information by processing linguistic input, thereby implicitly testing a new type of emergent ability: metalinguistic judgment. In this study, we compare metalinguistic prompting and direct probability measurements as ways of measuring models’ linguistic knowledge. Broadly, we find that LLMs’ metalinguistic judgments are inferior to quantities directly derived from representations. Furthermore, consistency gets worse as the prompt query diverges from direct measurements of next-word probabilities. Our findings suggest that negative results relying on metalinguistic prompts cannot be taken as conclusive evidence that an LLM lacks a particular linguistic generalization. Our results also highlight the value that is lost with the move to closed APIs where access to probability distributions is limited. 2023.emnlp-main.306 @@ -4344,7 +4344,7 @@ SantiagoOntanon SiddharthaBrahma YuryZemlyanskiy - DavidUthus + DavidUthus MandyGuo JamesLee-Thorp YiTay @@ -4373,7 +4373,7 @@ Cross-Cultural Analysis of Human Values, Morals, and Biases in Folk Tales WinstonWu LuWang - RadaMihalcea + RadaMihalcea 5113-5125 Folk tales are strong cultural and social influences in children’s lives, and they are known to teach morals and values. However, existing studies on folk tales are largely limited to European tales. In our study, we compile a large corpus of over 1,900 tales originating from 27 diverse cultures across six continents. Using a range of lexicons and correlation analyses, we examine how human values, morals, and gender biases are expressed in folk tales across cultures. We discover differences between cultures in prevalent values and morals, as well as cross-cultural trends in problematic gender biases. Furthermore, we find trends of reduced value expression when examining public-domain fiction stories, extrinsically validate our analyses against the multicultural Schwartz Survey of Cultural Values and the Global Gender Gap Report, and find traditional gender biases associated with values, morals, and agency. This large-scale cross-cultural study of folk tales paves the way towards future studies on how literature influences and reflects cultural norms. 2023.emnlp-main.311 @@ -4385,7 +4385,7 @@ RuiqiZhong CharlieSnell DanKlein - JasonEisner + JasonEisner 5126-5152 Can non-programmers annotate natural language utterances with complex programs that represent their meaning? We introduce APEL, a framework in which non-programmers select among candidate programs generated by a seed semantic parser (e.g., Codex). Since they cannot understand the candidate programs, we ask them to select indirectly by examining the programs’ input-ouput examples. For each utterance, APEL actively searches for a simple input on which the candidate programs tend to produce different outputs. It then asks the non-programmers only to choose the appropriate output, thus allowing us to infer which program is correct and could be used to fine-tune the parser. As a first case study, we recruited human non-programmers to use APEL to re-annotate SPIDER, a text-to-SQL dataset. Our approach achieved the same annotation accuracy as the original expert annotators (75%) and exposed many subtle errors in the original annotations. 2023.emnlp-main.312 @@ -4401,7 +4401,7 @@ CedegaoZhang ArmandoSolar-Lezama JoshuaTenenbaum - RogerLevy + RogerLevy 5153-5176 Logical reasoning, i.e., deductively inferring the truth value of a conclusion from a set of premises, is an important task for artificial intelligence with wide potential impacts on science, mathematics, and society. While many prompting-based strategies have been proposed to enable Large Language Models (LLMs) to do such reasoning more effectively, they still appear unsatisfactory, often failing in subtle and unpredictable ways. In this work, we investigate the validity of instead reformulating such tasks as modular neurosymbolic programming, which we call LINC: Logical Inference via Neurosymbolic Computation. In LINC, the LLM acts as a semantic parser, translating premises and conclusions from natural language to expressions in first-order logic. These expressions are then offloaded to an external theorem prover, which symbolically performs deductive inference. Leveraging this approach, we observe significant performance gains on FOLIO and a balanced subset of ProofWriter for three different models in nearly all experimental conditions we evaluate. On ProofWriter, augmenting the comparatively small open-source StarCoder+ (15.5B parameters) with LINC even outperforms GPT-3.5 and GPT-4 with Chain-of-Thought (CoT) prompting by an absolute 38% and 10%, respectively. When used with GPT-4, LINC scores 26% higher than CoT on ProofWriter while performing comparatively on FOLIO. Further analysis reveals that although both methods on average succeed roughly equally often on this dataset, they exhibit distinct and complementary failure modes. We thus provide promising evidence for how logical reasoning over natural language can be tackled through jointly leveraging LLMs alongside symbolic provers. All corresponding code is publicly available. 2023.emnlp-main.313 @@ -4517,7 +4517,7 @@ DebtanuDatta ShubhamSoni RajdeepMukherjee - SaptarshiGhosh + SaptarshiGhosh 5291-5302 Automatic summarization of legal case judgments is a practically important problem that has attracted substantial research efforts in many countries. In the context of the Indian judiciary, there is an additional complexity – Indian legal case judgments are mostly written in complex English, but a significant portion of India’s population lacks command of the English language. Hence, it is crucial to summarize the legal documents in Indian languages to ensure equitable access to justice. While prior research primarily focuses on summarizing legal case judgments in their source languages, this study presents a pioneering effort toward cross-lingual summarization of English legal documents into Hindi, the most frequently spoken Indian language. We construct the first high-quality legal corpus comprising of 3,122 case judgments from prominent Indian courts in English, along with their summaries in both English and Hindi, drafted by legal practitioners. We benchmark the performance of several diverse summarization approaches on our corpus and demonstrate the need for further research in cross-lingual summarization in the legal domain. 2023.emnlp-main.321 @@ -4640,7 +4640,7 @@ RafaelRafailov HuaxiuYao ChelseaFinn - ChristopherManning + ChristopherManning 5433-5442 A trustworthy real-world prediction system should produce well-calibrated confidence scores; that is, its confidence in an answer should be indicative of the likelihood that the answer is correct, enabling deferral to an expert in cases of low-confidence predictions. Recent studies have shown that unsupervised pre-training produces large language models (LMs) whose conditional probabilities are remarkably well-calibrated. However, the most widely-used LMs are fine-tuned with reinforcement learning from human feedback (RLHF-LMs), and some studies have suggested that RLHF-LMs produce conditional probabilities that are very poorly calibrated. In light of this perceived weakness, we conduct a broad evaluation of methods for extracting confidence scores from RLHF-LMs. For RLHF-LMs such as ChatGPT, GPT-4, and Claude, we find that verbalized confidences emitted as output tokens are typically better-calibrated than the model’s conditional probabilities on the TriviaQA, SciQ, and TruthfulQA benchmarks, often reducing the expected calibration error by a relative 50%. 2023.emnlp-main.330 @@ -4704,8 +4704,8 @@ Navigating the Grey Area: How Expressions of Uncertainty and Overconfidence Affect Language Models KaitlynZhou - DanJurafsky - TatsunoriHashimoto + DanJurafsky + TatsunoriHashimoto 5506-5524 The increased deployment of LMs for real-world tasks involving knowledge and facts makes it important to understand model epistemology: what LMs think they know, and how their attitudes toward that knowledge are affected by language use in their inputs. Here, we study an aspect of model epistemology: how epistemic markers of certainty, uncertainty, or evidentiality like “I’m sure it’s”, “I think it’s”, or “Wikipedia says it’s” affect models, and whether they contribute to model failures. We develop a typology of epistemic markers and inject 50 markers into prompts for question answering. We find that LMs are highly sensitive to epistemic markers in prompts, with accuracies varying more than 80%. Surprisingly, we find that expressions of high certainty result in a 7% decrease in accuracy as compared to low certainty expressions; similarly, factive verbs hurt performance, while evidentials benefit performance. Our analysis of a popular pretraining dataset shows that these markers of uncertainty are associated with answers on question-answering websites, while markers of certainty are associated with questions. These associations may suggest that the behavior of LMs is based on mimicking observed language use, rather than truly reflecting epistemic uncertainty. 2023.emnlp-main.335 @@ -4732,7 +4732,7 @@ LingjueXie EllaHofmann-Coyle MayankKulkarni - DanielPreotiuc-Pietro + DanielPreotiuc-Pietro 5538-5547 Entity-centric summarization is a form of controllable summarization that aims to generate a summary for a specific entity given a document. Concise summaries are valuable in various real-life applications, as they enable users to quickly grasp the main points of the document focusing on an entity of interest. This paper presents ENTSUMV2, a more abstractive version of the original entity-centric ENTSUM summarization dataset. In ENTSUMV2 the annotated summaries are intentionally made shorter to benefit more specific and useful entity-centric summaries for downstream users. We conduct extensive experiments on this dataset using multiple abstractive summarization approaches that employ supervised fine-tuning or large-scale instruction tuning. Additionally, we perform comprehensive human evaluation that incorporates metrics for measuring crucial facets. These metrics provide a more fine-grained interpretation of the current state-of-the-art systems and highlight areas for future improvement. 2023.emnlp-main.337 @@ -4788,7 +4788,7 @@ QinglinZhang JiaqingLiu QianChen - WenWang + WenWang 5592-5605 Topic segmentation is critical for obtaining structured documents and improving down- stream tasks such as information retrieval. Due to its ability of automatically exploring clues of topic shift from abundant labeled data, recent supervised neural models have greatly promoted the development of long document topic segmentation, but leaving the deeper relationship between coherence and topic segmentation underexplored. Therefore, this paper enhances the ability of supervised models to capture coherence from both logical structure and semantic similarity perspectives to further improve the topic segmentation performance, proposing Topic-aware Sentence Structure Prediction (TSSP) and Contrastive Semantic Similarity Learning (CSSL). Specifically, the TSSP task is proposed to force the model to comprehend structural information by learning the original relations between adjacent sentences in a disarrayed document, which is constructed by jointly disrupting the original document at topic and sentence levels. Moreover, we utilize inter- and intra-topic information to construct contrastive samples and design the CSSL objective to ensure that the sentences representations in the same topic have higher similarity, while those in different topics are less similar. Extensive experiments show that the Longformer with our approach significantly outperforms old state-of-the-art (SOTA) methods. Our approach improve F_{1} of old SOTA by 3.42 (73.74 \rightarrow 77.16) and reduces P_{k} by 1.11 points (15.0 \rightarrow 13.89) on WIKI-727K and achieves an average relative reduction of 4.3% on P_{k} on WikiSection. The average relative P_{k} drop of 8.38% on two out-of-domain datasets also demonstrates the robustness of our approach. 2023.emnlp-main.341 @@ -4818,7 +4818,7 @@ Information Value: Measuring Utterance Predictability as Distance from Plausible Alternatives MarioGiulianelli SarenneWallbridge - RaquelFernández + RaquelFernández 5633-5653 We present information value, a measure which quantifies the predictability of an utterance relative to a set of plausible alternatives. We introduce a method to obtain interpretable estimates of information value using neural text generators, and exploit their psychometric predictive power to investigate the dimensions of predictability that drive human comprehension behaviour. Information value is a stronger predictor of utterance acceptability in written and spoken dialogue than aggregates of token-level surprisal and it is complementary to surprisal for predicting eye-tracked reading times. 2023.emnlp-main.343 @@ -4887,7 +4887,7 @@ JiayiPan YuchenZhou RuiPan - JoyceChai + JoyceChai 5718-5728 Vision-Language Models (VLMs) are trained on vast amounts of data captured by humans emulating our understanding of the world. However, known as visual illusions, human’s perception of reality isn’t always faithful to the physical world. This raises a key question: do VLMs have the similar kind of illusions as humans do, or do they faithfully learn to represent reality? To investigate this question, we build a dataset containing five types of visual illusions and formulate four tasks to examine visual illusions in state-of-the-art VLMs. Our findings have shown that although the overall alignment is low, larger models are closer to human perception and more susceptible to visual illusions. Our dataset and initial findings will promote a better understanding of visual illusions in humans and machines and provide a stepping stone for future computational models that can better align humans and machines in perceiving and communicating about the shared visual world. The code and data are available at [github.com/vl-illusion/dataset](https://github.com/vl-illusion/dataset). 2023.emnlp-main.348 @@ -4898,8 +4898,8 @@ Analysing State-Backed Propaganda Websites: a New Dataset and Linguistic Study FreddyHeppell - KalinaBontcheva - CarolinaScarton + KalinaBontcheva + CarolinaScarton 5729-5741 This paper analyses two hitherto unstudied sites sharing state-backed disinformation, Reliable Recent News (rrn.world) and WarOnFakes (waronfakes.com), which publish content in Arabic, Chinese, English, French, German, and Spanish. We describe our content acquisition methodology and perform cross-site unsupervised topic clustering on the resulting multilingual dataset. We also perform linguistic and temporal analysis of the web page translations and topics over time, and investigate articles with false publication dates. We make publicly available this new dataset of 14,053 articles, annotated with each language version, and additional metadata such as links and images. The main contribution of this paper for the NLP community is in the novel dataset which enables studies of disinformation networks, and the training of NLP tools for disinformation detection. 2023.emnlp-main.349 @@ -4996,7 +4996,7 @@ The <fixed-case>BLA</fixed-case> Benchmark: Investigating Basic Language Abilities of Pre-Trained Multimodal Models XinyiChen - RaquelFernández + RaquelFernández SandroPezzelle 5817-5830 Despite the impressive performance achieved by pre-trained language-and-vision models in downstream tasks, it remains an open question whether this reflects a proper understanding of image-text interaction. In this work, we explore to what extent they handle basic linguistic constructions—active-passive voice, coordination, and relative clauses—that even preschool children can typically master. We present BLA, a novel, automatically constructed benchmark to evaluate multimodal models on these Basic Language Abilities. We show that different types of Transformer-based systems, such as CLIP, ViLBERT, and BLIP2, generally struggle with BLA in a zero-shot setting, in line with previous findings. Our experiments, in particular, show that most of the tested models only marginally benefit when fine-tuned or prompted with construction-specific samples. Yet, the generative BLIP2 shows promising trends, especially in an in-context learning setting. This opens the door to using BLA not only as an evaluation benchmark but also to improve models’ basic language abilities. @@ -5034,14 +5034,14 @@ Ditto: A Simple and Efficient Approach to Improve Sentence Embeddings QianChen - WenWang + WenWang QinglinZhang SiqiZheng ChongDeng HaiYu JiaqingLiu YukunMa - ChongZhang + ChongZhang 5868-5875 Prior studies diagnose the anisotropy problem in sentence representations from pre-trained language models, e.g., BERT, without fine-tuning. Our analysis reveals that the sentence embeddings from BERT suffer from a bias towards uninformative words, limiting the performance in semantic textual similarity (STS) tasks. To address this bias, we propose a simple and efficient unsupervised approach, Diagonal Attention Pooling (Ditto), which weights words with model-based importance estimations and computes the weighted average of word representations from pre-trained models as sentence embeddings. Ditto can be easily applied to any pre-trained language model as a postprocessing operation. Compared to prior sentence embedding approaches, Ditto does not add parameters nor requires any learning. Empirical evaluations demonstrate that our proposed Ditto can alleviate the anisotropy problem and improve various pre-trained models on the STS benchmarks. 2023.emnlp-main.359 @@ -5160,7 +5160,7 @@ JaewookLee SugyeongEo ChanjunPark - HeuiseokLim + HeuiseokLim 6014-6029 Korean morphological variations present unique opportunities and challenges in natural language processing (NLP), necessitating an advanced understanding of morpheme-based sentence construction. The complexity of morphological variations allows for diverse sentence forms based on the syntactic-semantic integration of functional morphemes (i.e., affixes) to lexical morphemes (i.e., roots). With this in mind, we propose a method - CHEF, replicating the morphological transformations inherent in sentences based on lexical and functional morpheme combinations through generative data augmentation. CHEF operates using a morpheme blender and a label discriminator, thereby enhancing the diversity of Korean sentence forms by capturing the properties of agglutination while maintaining label consistency. We conduct experiments on Korean multiple classification datasets, improving model performance in full- and few-shot settings. Our proposed method boosts performance beyond the preceding data augmentation methods without incurring external data usage. We demonstrate that our approach achieves comparable results yielded by augmentation techniques that use large language models (LLMs). 2023.emnlp-main.367 @@ -5248,8 +5248,8 @@ AishwaryaPadmakumar MertInan SpandanaGella - PatrickLange - DilekHakkani-Tur + PatrickLange + DilekHakkani-Tur 6114-6131 Embodied task completion is a challenge where an agent in a simulated environment must predict environment actions to complete tasks based on natural language instructions and ego-centric visual observations. We propose a variant of this problem where the agent predicts actions at a higher level of abstraction called a plan, which helps make agent actions more interpretable and can be obtained from the appropriate prompting of large language models. We show that multimodal transformer models can outperform language-only models for this problem but fall significantly short of oracle plans. Since collecting human-human dialogues for embodied environments is expensive and time-consuming, we propose a method to synthetically generate such dialogues, which we then use as training data for plan prediction. We demonstrate that multimodal transformer models can attain strong zero-shot performance from our synthetic data, outperforming language-only models trained on human-human data. 2023.emnlp-main.374 @@ -5385,7 +5385,7 @@ Tree Prompting: Efficient Task Adaptation without Fine-Tuning ChandanSingh JohnMorris - AlexanderRush + AlexanderRush JianfengGao YuntianDeng 6253-6267 @@ -5466,8 +5466,8 @@ YataoBian ZeyuQin BingzheWu - Tat-SengChua - Kam-FaiWong + Tat-SengChua + Kam-FaiWong 6325-6341 Large language models (LLMs) outperform information retrieval techniques for downstream knowledge-intensive tasks when being prompted to generate world knowledge. However, community concerns abound regarding the factuality and potential implications of using this uncensored knowledge. In light of this, we introduce CONNER, a COmpreheNsive kNowledge Evaluation fRamework, designed to systematically and automatically evaluate generated knowledge from six important perspectives – Factuality, Relevance, Coherence, Informativeness, Helpfulness and Validity. We conduct an extensive empirical analysis of the generated knowledge from three different types of LLMs on two widely studied knowledge-intensive tasks, i.e., open-domain question answering and knowledge-grounded dialogue. Surprisingly, our study reveals that the factuality of generated knowledge, even if lower, does not significantly hinder downstream tasks. Instead, the relevance and coherence of the outputs are more important than small factual mistakes. Further, we show how to use CONNER to improve knowledge-intensive tasks by designing two strategies: Prompt Engineering and Knowledge Selection. Our evaluation code and LLM-generated knowledge with human annotations will be released to facilitate future research. 2023.emnlp-main.390 @@ -5555,8 +5555,8 @@ <fixed-case>H</fixed-case>alu<fixed-case>E</fixed-case>val: A Large-Scale Hallucination Evaluation Benchmark for Large Language Models JunyiLi XiaoxueCheng - XinZhao - Jian-YunNie + XinZhao + Jian-YunNie Ji-RongWen 6449-6464 Large language models (LLMs), such as ChatGPT, are prone to generate hallucinations, i.e., content that conflicts with the source or cannot be verified by the factual knowledge. To understand what types of content and to which extent LLMs are apt to hallucinate, we introduce the Hallucination Evaluation for Large Language Models (HaluEval) benchmark, a large collection of generated and human-annotated hallucinated samples for evaluating the performance of LLMs in recognizing hallucination. To generate these samples, we propose a ChatGPT-based two-step framework, i.e., sampling-then-filtering. Besides, we also hire some human labelers to annotate the hallucinations in ChatGPT responses. The empirical results suggest that ChatGPT is likely to generate hallucinated content in specific topics by fabricating unverifiable information (i.e., about 19.5% user queries). Moreover, existing LLMs face great challenges in recognizing the hallucinations in texts. While, our experiments also prove that the hallucination recognition can be improved by providing external knowledge or adding reasoning steps. @@ -5584,7 +5584,7 @@ VedanujGoswami ShrutiBhosale AngelaFan - LukeZettlemoyer + LukeZettlemoyer 6489-6499 Machine Translation (MT) has been widely used for cross-lingual classification, either by translating the test set into English and running inference with a monolingual model (translate-test), or translating the training set into the target languages and finetuning a multilingual model (translate-train). However, most research in the area focuses on the multilingual models rather than the MT component. We show that, by using a stronger MT system and mitigating the mismatch between training on original text and running inference on machine translated text, translate-test can do substantially better than previously assumed. The optimal approach, however, is highly task dependent, as we identify various sources of cross-lingual transfer gap that affect different tasks and approaches differently. Our work calls into question the dominance of multilingual models for cross-lingual classification, and prompts to pay more attention to MT-based baselines. 2023.emnlp-main.399 @@ -5618,9 +5618,9 @@ AtharvaKulkarni AbhishekVijayakumar HaofeiYu - HinrichSchuetze + HinrichSchuetze KemalOflazer - DavidMortensen + DavidMortensen 6508-6524 Large language models (LLMs) have recently reached an impressive level of linguistic capability, prompting comparisons with human language skills. However, there have been relatively few systematic inquiries into the linguistic capabilities of the latest generation of LLMs, and those studies that do exist (i) ignore the remarkable ability of humans to generalize, (ii) focus only on English, and (iii) investigate syntax or semantics and overlook other capabilities that lie at the heart of human language, like morphology. Here, we close these gaps by conducting the first rigorous analysis of the morphological capabilities of ChatGPT in four typologically varied languages (specifically, English, German, Tamil, and Turkish). We apply a version of Berko’s (1958) wug test to ChatGPT, using novel, uncontaminated datasets for the four examined languages. We find that ChatGPT massively underperforms purpose-built systems, particularly in English. Overall, our results—through the lens of morphology—cast a new light on the linguistic capabilities of ChatGPT, suggesting that claims of human-like language skills are premature and misleading. 2023.emnlp-main.401 @@ -5686,8 +5686,8 @@ Construction Artifacts in Metaphor Identification Datasets JoanneBoisson - LuisEspinosa-Anke - JoseCamacho-Collados + LuisEspinosa-Anke + JoseCamacho-Collados 6581-6590 Metaphor identification aims at understanding whether a given expression is used figuratively in context. However, in this paper we show how existing metaphor identification datasets can be gamed by fully ignoring the potential metaphorical expression or the context in which it occurs. We test this hypothesis in a variety of datasets and settings, and show that metaphor identification systems based on language models without complete information can be competitive with those using the full context. This is due to the construction procedures to build such datasets, which introduce unwanted biases for positive and negative classes. Finally, we test the same hypothesis on datasets that are carefully sampled from natural corpora and where this bias is not present, making these datasets more challenging and reliable. 2023.emnlp-main.406 @@ -5737,7 +5737,7 @@ Rethinking Model Selection and Decoding for Keyphrase Generation with Pre-trained Sequence-to-Sequence Models DiWu - WasiAhmad + WasiAhmad Kai-WeiChang 6642-6658 Keyphrase Generation (KPG) is a longstanding task in NLP with widespread applications. The advent of sequence-to-sequence (seq2seq) pre-trained language models (PLMs) has ushered in a transformative era for KPG, yielding promising performance improvements. However, many design decisions remain unexplored and are often made arbitrarily. This paper undertakes a systematic analysis of the influence of model selection and decoding strategies on PLM-based KPG. We begin by elucidating why seq2seq PLMs are apt for KPG, anchored by an attention-driven hypothesis. We then establish that conventional wisdom for selecting seq2seq PLMs lacks depth: (1) merely increasing model size or performing task-specific adaptation is not parameter-efficient; (2) although combining in-domain pre-training with task adaptation benefits KPG, it does partially hinder generalization. Regarding decoding, we demonstrate that while greedy search achieves strong F1 scores, it lags in recall compared with sampling-based methods. Based on these insights, we propose DeSel, a likelihood-based decode-select algorithm for seq2seq PLMs. DeSel improves greedy search by an average of 4.7% semantic F1 across five datasets. Our collective findings pave the way for deeper future investigations into PLM-based KPG. @@ -5762,7 +5762,7 @@ A Multi-Task Dataset for Assessing Discourse Coherence in <fixed-case>C</fixed-case>hinese Essays: Structure, Theme, and Logic Analysis HongyiWu XinshuShen - ManLan + ManLan ShaoguangMao XiaopengBai YuanbinWu @@ -5790,7 +5790,7 @@ Lifelong Sequence Generation with Dynamic Module Expansion and Adaptation ChengweiQin ChenChen - ShafiqJoty + ShafiqJoty 6701-6714 Lifelong sequence generation (LSG), a problem in continual learning, aims to continually train a model on a sequence of generation tasks to learn constantly emerging new generation patterns while avoiding the forgetting of previous knowledge. Existing LSG methods mainly focus on maintaining old knowledge while paying little attention to knowledge transfer across tasks. In contrast, humans can better learn new tasks by leveraging previously acquired knowledge from similar tasks. Inspired by the learning paradigm of humans, we propose Dynamic Module Expansion and Adaptation (DMEA), which enables the model to dynamically determine the architecture for acquiring new knowledge based on task correlation and select the most similar previous tasks to facilitate adaptation to new tasks. In addition, as the learning process can easily be biased towards the current task which might cause more severe forgetting of previously learned knowledge, we propose dynamic gradient scaling to balance the learning of the current task and replayed tasks. With extensive experiments, we demonstrate that DMEA can consistently outperform existing methods in different LSG settings. 2023.emnlp-main.414 @@ -5829,7 +5829,7 @@ HailinChen AmritaSaha StevenHoi - ShafiqJoty + ShafiqJoty 6737-6749 With the rise of powerful closed-sourced LLMs (ChatGPT, GPT-4), there are increasing interests in distilling the capabilies of close-sourced LLMs to smaller open-sourced LLMs. Previous distillation methods usually prompt ChatGPT to generate a set of instructions and answers, for the student model to learn. However, such standard distillation approach neglects the merits and conditions of the student model. Inspired by modern teaching principles, we design a personalised distillation process, in which the student attempts to solve a task first, then the teacher provides an adaptive refinement for the student to improve. Instead of feeding the student with teacher’s prior, personalised distillation enables personalised learning for the student model, as it only learns on examples it makes mistakes upon and learns to improve its own solution. On code generation, personalised distillation consistently outperforms standard distillation with only one third of the data. With only 2.5-3K personalised examples that incur a data-collection cost of 4-6$, we boost CodeGen-mono-16B by 7% to achieve 36.4% pass@1 and StarCoder by 12.2% to achieve 45.8% pass@1 on HumanEval. 2023.emnlp-main.417 @@ -5844,7 +5844,7 @@ ArkadeepAcharya SriparnaSaha AdamJatowt - SandipanDandapat + SandipanDandapat 6750-6774 Temporal reasoning represents a vital component of human communication and understanding, yet remains an underexplored area within the context of Large Language Models (LLMs). Despite LLMs demonstrating significant proficiency in a range of tasks, a comprehensive, large-scale analysis of their temporal reasoning capabilities is missing. Our paper addresses this gap, presenting the first extensive benchmarking of LLMs on temporal reasoning tasks. We critically evaluate 8 different LLMs across 6 datasets using 3 distinct prompting strategies. Additionally, we broaden the scope of our evaluation by including in our analysis 2 Code Generation LMs. Beyond broad benchmarking of models and prompts, we also conduct a fine-grained investigation of performance across different categories of temporal tasks. We further analyze the LLMs on varying temporal aspects, offering insights into their proficiency in understanding and predicting the continuity, sequence, and progression of events over time. Our findings reveal a nuanced depiction of the capabilities and limitations of the models within temporal reasoning, offering a comprehensive reference for future research in this pivotal domain. 2023.emnlp-main.418 @@ -5857,7 +5857,7 @@ ShreyaHavaldar MatthewPressimone EricWong - LyleUngar + LyleUngar 6775-6791 Understanding how styles differ across languages is advantageous for training both humans and computers to generate culturally appropriate text. We introduce an explanation framework to extract stylistic differences from multilingual LMs and compare styles across languages. Our framework (1) generates comprehensive style lexica in any language and (2) consolidates feature importances from LMs into comparable lexical categories. We apply this framework to compare politeness, creating the first holistic multilingual politeness dataset and exploring how politeness varies across four languages. Our approach enables an effective evaluation of how distinct linguistic categories contribute to stylistic variations and provides interpretable insights into how people communicate differently around the world. 2023.emnlp-main.419 @@ -5888,7 +5888,7 @@ ShanaKleiner DesmondPatton ElsbethTurcan - KathleenMcKeown + KathleenMcKeown 6805-6824 While biases disadvantaging African American Language (AAL) have been uncovered in models for tasks such as speech recognition and toxicity detection, there has been little investigation of these biases for language generation models like ChatGPT. We evaluate how well LLMs understand AAL in comparison to White Mainstream English (WME), the encouraged “standard” form of English taught in American classrooms. We measure large language model performance on two tasks: a counterpart generation task, where a model generates AAL given WME and vice versa, and a masked span prediction (MSP) task, where models predict a phrase hidden from their input. Using a novel dataset of AAL texts from a variety of regions and contexts, we present evidence of dialectal bias for six pre-trained LLMs through performance gaps on these tasks. 2023.emnlp-main.421 @@ -6028,7 +6028,7 @@ WeishiWang YueWang StevenHoi - ShafiqJoty + ShafiqJoty 6954-6968 Automatic program repair (APR) has gained increasing attention as an essential technique in software development to reduce manual debugging efforts and boost developers’ productivity. Recent advances in deep learning (DL) based models have demonstrated promising results by learning from large-scale bug-fix examples in a data-driven manner. However, in practical scenarios, software bugs have an imbalanced distribution, and the fixing knowledge learned by APR models often only capture the patterns of frequent error types, making it inapplicable to handle the rare error types. To address this limitation, we investigate a novel task of low-resource APR, and propose Meta-APR, a new meta-learning framework integrated with code pretrained language models to generate fixes for low-resource bugs with limited training samples. Our Meta-APR learns better error-specific knowledge from high-resource bugs through efficient first-order meta-learning optimization, which allows for a faster adaptation to the target low-resource bugs. Besides, while we adopt CodeT5, a pretrained code-aware encoder-decoder Transformer, as the backbone model for Meta-APR, it is a model-agnostic framework that can be integrated with any neural models. Extensive experimental results on three benchmarks in various programming languages verify the superiority of our method over existing DL-based APR approaches. 2023.emnlp-main.430 @@ -6136,7 +6136,7 @@ RrichaJalota KoelChowdhury CristinaEspaña-Bonet - Josefvan Genabith + Josefvan Genabith 7086-7100 Translated texts exhibit systematic linguistic differences compared to original texts in the same language, and these differences are referred to as translationese. Translationese has effects on various cross-lingual natural language processing tasks, potentially leading to biased results. In this paper, we explore a novel approach to reduce translationese in translated texts: translation-based style transfer. As there are no parallel human-translated and original data in the same language, we use a self-supervised approach that can learn from comparable (rather than parallel) mono-lingual original and translated data. However, even this self-supervised approach requires some parallel data for validation. We show how we can eliminate the need for parallel validation data by combining the self-supervised loss with an unsupervised loss. This unsupervised loss leverages the original language model loss over the style-transferred output and a semantic similarity loss between the input and style-transferred output. We evaluate our approach in terms of original vs. translationese binary classification in addition to measuring content preservation and target-style fluency. The results show that our approach is able to reduce translationese classifier accuracy to a level of a random classifier after style transfer while adequately preserving the content and fluency in the target original style. 2023.emnlp-main.438 @@ -6352,9 +6352,9 @@ A Study on Accessing Linguistic Information in Pre-Trained Language Models by Using Prompts - MarionDi Marco + MarionDi Marco KatharinaHämmerl - AlexanderFraser + AlexanderFraser 7328-7336 We study whether linguistic information in pre-trained multilingual language models can be accessed by human language: So far, there is no easy method to directly obtain linguistic information and gain insights into the linguistic principles encoded in such models. We use the technique of prompting and formulate linguistic tasks to test the LM’s access to explicit grammatical principles and study how effective this method is at providing access to linguistic features. Our experiments on German, Icelandic and Spanish show that some linguistic properties can in fact be accessed through prompting, whereas others are harder to capture. 2023.emnlp-main.454 @@ -6383,7 +6383,7 @@ SungryullSohn MoontaeLee HonglakLee - JoyceChai + JoyceChai 7354-7379 Pre-trained language models (PLMs) have shown impressive performance in various language tasks. However, they are prone to spurious correlations, and often generate illusory information. In real-world applications, PLMs should justify decisions with formalized, coherent reasoning chains, but this challenge remains under-explored. Cognitive psychology theorizes that humans are capable of utilizing fast and intuitive *heuristic* thinking to make decisions based on past experience, then rationalizing the decisions through slower and deliberative *analytic* reasoning. We incorporate these interlinked dual processes in fine-tuning and in-context learning with PLMs, applying them to two language understanding tasks that require coherent physical commonsense reasoning. We show that our proposed Heuristic-Analytic Reasoning (HAR) strategies drastically improve the coherence of rationalizations for model decisions, yielding state-of-the-art results on Tiered Reasoning for Intuitive Physics (TRIP). We also find that this improved coherence is a direct result of more faithful attention to relevant language context in each step of reasoning. Our findings suggest that human-like reasoning strategies can effectively improve the coherence and reliability of PLM reasoning. 2023.emnlp-main.456 @@ -6409,7 +6409,7 @@ AntoniaKaramolegkou JiaangLi LiZhou - AndersSøgaard + AndersSøgaard 7403-7412 Language models may memorize more than just facts, including entire chunks of texts seen during training. Fair use exemptions to copyright laws typically allow for limited use of copyrighted material without permission from the copyright holder, but typically for extraction of information from copyrighted materials, rather than verbatim reproduction. This work explores the issue of copyright violations and large language models through the lens of verbatim memorization, focusing on possible redistribution of copyrighted text. We present experiments with a range of language models over a collection of popular books and coding problems, providing a conservative characterization of the extent to which language models can redistribute these materials. Overall, this research highlights the need for further examination and the potential impact on future developments in natural language processing to ensure adherence to copyright regulations. Code is at https://github.com/coastalcph/CopyrightLLMs. 2023.emnlp-main.458 @@ -6436,7 +6436,7 @@ WentingZhao DerekChen SaujasVaduguru - AlexanderRush + AlexanderRush DanielFried 7426-7436 Large language models (LLMs) excel at processing and generating text and code. However, LLMs have had limited applicability in grounded task-oriented dialogue as they are difficult to steer toward task objectives and fail to handle novel grounding. We present a modular and interpretable grounded dialogue system that addresses these shortcomings by composing LLMs with a symbolic planner and grounded code execution. Our system, consists of a reader and planner: the reader leverages an LLM to convert partner utterances into executable code, calling functions that perform grounding. The translated code’s output is stored to track dialogue state, while a symbolic planner determines the next appropriate response. We evaluate our system’s performance on the demanding OneCommon dialogue task, involving collaborative reference resolution on abstract images of scattered dots. Our system substantially outperforms the previous state-of-the-art, including improving task success in human evaluations from 56% to 69% in the most challenging setting. @@ -6535,7 +6535,7 @@ Joint Entity and Relation Extraction with Span Pruning and Hypergraph Neural Networks ZhaohuiYan SonglinYang - WeiLiu + WeiLiu KeweiTu 7512-7526 Entity and Relation Extraction (ERE) is an important task in information extraction. Recent marker-based pipeline models achieve state-of-the-art performance, but still suffer from the error propagation issue. Also, most of current ERE models do not take into account higher-order interactions between multiple entities and relations, while higher-order modeling could be beneficial.In this work, we propose HyperGraph neural network for ERE (HGERE), which is built upon the PL-marker (a state-of-the-art marker-based pipleline model). To alleviate error propagation, we use a high-recall pruner mechanism to transfer the burden of entity identification and labeling from the NER module to the joint module of our model. For higher-order modeling, we build a hypergraph, where nodes are entities (provided by the span pruner) and relations thereof, and hyperedges encode interactions between two different relations or between a relation and its associated subject and object entities. We then run a hypergraph neural network for higher-order inference by applying message passing over the built hypergraph. Experiments on three widely used benchmarks (ACE2004, ACE2005 and SciERC) for ERE task show significant improvements over the previous state-of-the-art PL-marker. @@ -6609,7 +6609,7 @@ <fixed-case>R</fixed-case>o<fixed-case>B</fixed-case>o<fixed-case>C</fixed-case>o<fixed-case>P</fixed-case>: A Comprehensive <fixed-case>RO</fixed-case>mance <fixed-case>BO</fixed-case>rrowing <fixed-case>CO</fixed-case>gnate Package and Benchmark for Multilingual Cognate Identification - LiviuDinu + LiviuDinu AnaUban AlinaCristea AncaDinu @@ -6627,7 +6627,7 @@ Instructive Dialogue Summarization with Query Aggregations BinWang ZhengyuanLiu - NancyChen + NancyChen 7630-7653 Conventional dialogue summarization methods directly generate summaries and do not consider user’s specific interests. This poses challenges in cases where the users are more focused on particular topics or aspects. With the advancement of instruction-finetuned language models, we introduce instruction-tuning to dialogues to expand the capability set of dialogue summarization models. To overcome the scarcity of instructive dialogue summarization data, we propose a three-step approach to synthesize high-quality query-based summarization triples. This process involves summary-anchored query generation, query filtering and query-based summary generation. By training a unified model called InstructDS (Instructive Dialogue Summarization) on three summarization datasets with multi-purpose instructive triples, we expand the capability of dialogue summarization models. We evaluate our method on four datasets, including dialogue summarization and dialogue reading comprehension. Experimental results show that our approach outperforms the state-of-the-art models and even models with larger sizes. Additionally, our model exhibits higher generalizability and faithfulness, as confirmed by human subjective evaluations. 2023.emnlp-main.474 @@ -6741,9 +6741,9 @@ MohammadShoeybi YiDong OleksiiKuchaiev - BoLi + BoLi ChaoweiXiao - AnimaAnandkumar + AnimaAnandkumar BryanCatanzaro 7763-7786 Large decoder-only language models (LMs) can be largely improved in terms of perplexity by retrieval (e.g., RETRO), but its impact on text generation quality and downstream task accuracy is unclear. Thus, it is still an open question: shall we pretrain large autoregressive LMs with retrieval? To answer it, we perform a comprehensive study on a scalable pre-trained retrieval-augmented LM (i.e., RETRO) compared with standard GPT and retrieval-augmented GPT incorporated at fine-tuning or inference stages. We first provide the recipe to reproduce RETRO up to 9.5B parameters while retrieving a text corpus with 330B tokens. Based on that, we have the following novel findings: i) RETRO outperforms GPT on text generation with much less degeneration (i.e., repetition), moderately higher factual accuracy, and slightly lower toxicity with a nontoxic retrieval database. ii) On the LM Evaluation Harness benchmark, RETRO largely outperforms GPT on knowledge-intensive tasks, but is on par with GPT on other tasks. Furthermore, we introduce a simple variant of the model, RETRO++, which largely improves open-domain QA results of original RETRO (e.g., EM score +8.6 on Natural Question) and significantly outperforms retrieval-augmented GPT across different model sizes. Our findings highlight the promising direction of pretraining autoregressive LMs with retrieval as future foundation models. We release our implementation at: https://github.com/NVIDIA/Megatron-LM/tree/main/tools/retro. @@ -6757,7 +6757,7 @@ XinyuanLu LiangmingPan QianLiu - PreslavNakov + PreslavNakov Min-YenKan 7787-7813 Current scientific fact-checking benchmarks exhibit several shortcomings, such as biases arising from crowd-sourced claims and an over-reliance on text-based evidence. We present SCITAB, a challenging evaluation dataset consisting of 1.2K expert-verified scientific claims that 1) originate from authentic scientific publications and 2) require compositional reasoning for verification. The claims are paired with evidence-containing scientific tables annotated with labels. Through extensive evaluations, we demonstrate that SCITAB poses a significant challenge to state-of-the-art models, including table-based pretraining models and large language models. All models except GPT-4 achieved performance barely above random guessing. Popular prompting techniques, such as Chain-of-Thought, do not achieve much performance gains on SCITAB. Our analysis uncovers several unique challenges posed by SCITAB, including table grounding, claim ambiguity, and compositional reasoning. Our codes and data are publicly available at https://github.com/XinyuanLu00/SciTab. @@ -6899,7 +6899,7 @@ Empirical Study of Zero-Shot <fixed-case>NER</fixed-case> with <fixed-case>C</fixed-case>hat<fixed-case>GPT</fixed-case> TingyuXie - QiLi + QiLi JianZhang YanZhang ZuozhuLiu @@ -6928,7 +6928,7 @@ Active Retrieval Augmented Generation ZhengbaoJiang - FrankXu + FrankXu LuyuGao ZhiqingSun QianLiu @@ -6975,9 +6975,9 @@ Enhancing Biomedical Lay Summarisation with External Knowledge Graphs TomasGoldsack - ZhihaoZhang + ZhihaoZhang ChenTang - CarolinaScarton + CarolinaScarton ChenghuaLin 8016-8032 Previous approaches for automatic lay summarisation are exclusively reliant on the source article that, given it is written for a technical audience (e.g., researchers), is unlikely to explicitly define all technical concepts or state all of the background information that is relevant for a lay audience. We address this issue by augmenting eLife, an existing biomedical lay summarisation dataset, with article-specific knowledge graphs, each containing detailed information on relevant biomedical concepts. Using both automatic and human evaluations, we systematically investigate the effectiveness of three different approaches for incorporating knowledge graphs within lay summarisation models, with each method targeting a distinct area of the encoder-decoder model architecture. Our results confirm that integrating graph-based domain knowledge can significantly benefit lay summarisation by substantially increasing the readability of generated text and improving the explanation of technical concepts. @@ -7061,7 +7061,7 @@ PascalDenis EmmanuelVincent SimonOstermann - Josefvan Genabith + Josefvan Genabith 8099-8110 In multimodal understanding tasks, visual and linguistic ambiguities can arise. Visual ambiguity can occur when visual objects require a model to ground a referring expression in a video without strong supervision, while linguistic ambiguity can occur from changes in entities in action flows. As an example from the cooking domain, “oil” mixed with “salt” and “pepper” could later be referred to as a “mixture”. Without a clear visual-linguistic alignment, we cannot know which among several objects shown is referred to by the language expression “mixture”, and without resolved antecedents, we cannot pinpoint what the mixture is. We define this chicken-and-egg problem as Visual-linguistic Ambiguity. In this paper, we present Find2Find, a joint anaphora resolution and object localization dataset targeting the problem of visual-linguistic ambiguity, consisting of 500 anaphora-annotated recipes with corresponding videos. We present experimental results of a novel end-to-end joint multitask learning framework for Find2Find that fuses visual and textual information and shows improvements both for anaphora resolution and object localization with one joint model in multitask learning, as compared to a strong single-task baseline. 2023.emnlp-main.504 @@ -7156,7 +7156,7 @@ <fixed-case>DSI</fixed-case>++: Updating Transformer Memory with New Documents - Sanket VaibhavMehta + Sanket VaibhavMehta JaiGupta YiTay MostafaDehghani @@ -7205,7 +7205,7 @@ Homophone Disambiguation Reveals Patterns of Context Mixing in Speech Transformers HoseinMohebbi - GrzegorzChrupała + GrzegorzChrupała WillemZuidema AfraAlishahi 8249-8260 @@ -7287,7 +7287,7 @@ SangwooCho XiaoyangWang HassanForoosh - FeiLiu + FeiLiu 8344-8357 Human preference judgments are pivotal in guiding large language models (LLMs) to produce outputs that align with human values. Human evaluations are also used in summarization tasks to compare outputs from various systems, complementing existing automatic metrics. Despite their significance, however, there has been limited research probing these pairwise or k-wise comparisons. The collective impact and relative importance of factors such as output length, informativeness, fluency, and factual consistency are still not well understood. It is also unclear if there are other hidden factors influencing human judgments. In this paper, we conduct an in-depth examination of a collection of pairwise human judgments released by OpenAI. Utilizing the Bradley-Terry-Luce (BTL) model, we reveal the inherent preferences embedded in these human judgments. We find that the most favored factors vary across tasks and genres, whereas the least favored factors tend to be consistent, e.g., outputs are too brief, contain excessive off-focus content or hallucinated facts. Our findings have implications on the construction of balanced datasets in human preference evaluations, which is a crucial step in shaping the behaviors of future LLMs. 2023.emnlp-main.519 @@ -7380,7 +7380,7 @@ FengJiang PeifengLi FangKong - QiaomingZhu + QiaomingZhu 8484-8495 Dialogue discourse parsing aims to reflect the relation-based structure of dialogue by establishing discourse links according to discourse relations. To alleviate data sparsity, previous studies have adopted multitasking approaches to jointly learn dialogue discourse parsing with related tasks (e.g., reading comprehension) that require additional human annotation, thus limiting their generality. In this paper, we propose a multitasking framework that integrates dialogue discourse parsing with its neighboring task addressee recognition. Addressee recognition reveals the reply-to structure that partially overlaps with the relation-based structure, which can be exploited to facilitate relation-based structure learning. To this end, we first proposed a reinforcement learning agent to identify training examples from addressee recognition that are most helpful for dialog discourse parsing. Then, a task-aware structure transformer is designed to capture the shared and private dialogue structure of different tasks, thereby further promoting dialogue discourse parsing. Experimental results on both the Molweni and STAC datasets show that our proposed method can outperform the SOTA baselines. The code will be available at https://github.com/yxfanSuda/RLTST. 2023.emnlp-main.526 @@ -7402,7 +7402,7 @@ <fixed-case>DALE</fixed-case>: Generative Data Augmentation for Low-Resource Legal <fixed-case>NLP</fixed-case> SreyanGhosh - Chandra Kiran ReddyEvuru + Chandra Kiran ReddyEvuru SonalKumar SRamaneswaran SSakshi @@ -7451,7 +7451,7 @@ BegoñaAltuna JavierAlvez ItziarGonzalez-Dios - GermanRigau + GermanRigau 8596-8615 Although large language models (LLMs) have apparently acquired a certain level of grammatical knowledge and the ability to make generalizations, they fail to interpret negation, a crucial step in Natural Language Processing. We try to clarify the reasons for the sub-optimal performance of LLMs understanding negation. We introduce a large semi-automatically generated dataset of circa 400,000 descriptive sentences about commonsense knowledge that can be true or false in which negation is present in about 2/3 of the corpus in different forms. We have used our dataset with the largest available open LLMs in a zero-shot approach to grasp their generalization and inference capability and we have also fine-tuned some of the models to assess whether the understanding of negation can be trained. Our findings show that, while LLMs are proficient at classifying affirmative sentences, they struggle with negative sentences and lack a deep understanding of negation, often relying on superficial cues. Although fine-tuning the models on negative sentences improves their performance, the lack of generalization in handling negation is persistent, highlighting the ongoing challenges of LLMs regarding negation understanding and generalization. The dataset and code are publicly available. 2023.emnlp-main.531 @@ -7465,7 +7465,7 @@ MingtongLiu HongxiaoZhang YufengChen - JinanXu + JinanXu MingZhou 8616-8627 Sentence-level translation, document-level translation, translation memory, and terminology constrained translation play an important role in machine translation. Most of the previous work uses separate models or methods to solve these tasks, which is not conducive to knowledge transfer of different tasks and increases the complexity of system construction. In this work, we explore the potential of pre-trained language model in machine translation tasks and propose a Multi-Task Machine Translation (MT2) model to integrate these translation tasks. We design a novel translation-specific In-Context Learning (ICL) paradigm for model training, in which all of the translation tasks can be modeled as context-learning tasks that integrate contextual information for performance improvement. Specifically, we propose a retrieval and alignment method to obtain a large scale context-enhancement training data, then we train the model in an in-context learning manner. Furthermore, we adopt two context-dependent training strategies to encourage the model to better understand and utilize contextual information for translation. Extensive experiments on translation memory, terminology constrained translation, document-level translation, and few-shot domain-adaptation tasks demonstrate the superior performance of our model, verifying the effectiveness of our proposed approach. @@ -7513,7 +7513,7 @@ IqraZahid HaoLi IanPratt-Hartmann - RizaBatista-Navarro + RizaBatista-Navarro 8680-8692 How do different generalised quantifiers affect the behaviour of transformer-based language models (TLMs)? The recent popularity of TLMs and the central role generalised quantifiers have traditionally played in linguistics and logic bring this question into particular focus. The current research investigating this subject has not utilised a task defined purely in a logical sense, and thus, has not captured the underlying logical significance of generalised quantifiers. Consequently, they have not answered the aforementioned question faithfully or adequately. Therefore, we investigate how different generalised quantifiers affect TLMs by employing a textual entailment problem defined in a purely logical sense, namely, model-checking with natural language. Our approach permits the automatic construction of datasets with respect to which we can assess the ability of TLMs to learn the meanings of generalised quantifiers. Our investigation reveals that TLMs generally can comprehend the logical semantics of the most common generalised quantifiers, but that distinct quantifiers influence TLMs in varying ways. 2023.emnlp-main.536 @@ -7569,7 +7569,7 @@ EricChang AmilcareGentili JulianMcAuley - Chun-NanHsu + Chun-NanHsu 8725-8744 Curated datasets for healthcare are often limited due to the need of human annotations from experts. In this paper, we present MedEval, a multi-level, multi-task, and multi-domain medical benchmark to facilitate the development of language models for healthcare. MedEval is comprehensive and consists of data from several healthcare systems and spans 35 human body regions from 8 examination modalities. With 22,779 collected sentences and 21,228 reports, we provide expert annotations at multiple levels, offering a granular potential usage of the data and supporting a wide range of tasks. Moreover, we systematically evaluated 10 generic and domain-specific language models under zero-shot and finetuning settings, from domain-adapted baselines in healthcare to general-purposed state-of-the-art large language models (e.g., ChatGPT). Our evaluations reveal varying effectiveness of the two categories of language models across different tasks, from which we notice the importance of instruction tuning for few-shot usage of large language models. Our investigation paves the way toward benchmarking language models for healthcare and provides valuable insights into the strengths and limitations of adopting large language models in medical domains, informing their practical applications and future advancements. 2023.emnlp-main.540 @@ -7591,7 +7591,7 @@ Are Embedded Potatoes Still Vegetables? On the Limitations of <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et Embeddings for Lexical Semantics XuyouCheng - MichaelSchlichtkrull + MichaelSchlichtkrull GuyEmerson 8763-8775 Knowledge Base Embedding (KBE) models have been widely used to encode structured information from knowledge bases, including WordNet. However, the existing literature has predominantly focused on link prediction as the evaluation task, often neglecting exploration of the models’ semantic capabilities. In this paper, we investigate the potential disconnect between the performance of KBE models of WordNet on link prediction and their ability to encode semantic information, highlighting the limitations of current evaluation protocols. Our findings reveal that some top-performing KBE models on the WN18RR benchmark exhibit subpar results on two semantic tasks and two downstream tasks. These results demonstrate the inadequacy of link prediction benchmarks for evaluating the semantic capabilities of KBE models, suggesting the need for a more targeted assessment approach. @@ -7715,7 +7715,7 @@ YftahZiser AnnaKorhonen EdoardoPonti - ShayCohen + ShayCohen 8914-8932 Hallucinations pose a significant challenge to the reliability of neural models for abstractive summarisation. While automatically generated summaries may be fluent, they often lack faithfulness to the original document. This issue becomes even more pronounced in low-resource languages, where summarisation requires cross-lingual transfer. With the existing faithful metrics focusing on English, even measuring the extent of this phenomenon in cross-lingual settings is hard. To address this, we first develop a novel metric, mFACT, evaluating the faithfulness of non-English summaries, leveraging translation-based transfer from multiple English faithfulness metrics. Through extensive experiments in multiple languages, we demonstrate that mFACT is best suited to detect hallucinations compared to alternative metrics. With mFACT, we assess a broad range of multilingual large language models, and find that they all tend to hallucinate often in languages different from English. We then propose a simple but effective method to reduce hallucinations in cross-lingual transfer, which weighs the loss of each training example by its faithfulness score. This method drastically increases both performance and faithfulness according to both automatic and human evaluation when compared to strong baselines for cross-lingual transfer such as MAD-X. Our code and dataset are available at https://github.com/yfqiu-nlp/mfact-summ. 2023.emnlp-main.551 @@ -7765,7 +7765,7 @@ ZhiliangTian XinNiu ChangjianWang - DongshengLi + DongshengLi DachengTao 8964-8974 Text classification tasks often encounter few-shot scenarios with limited labeled data, and addressing data scarcity is crucial. Data augmentation with mixup merges sample pairs to generate new pseudos, which can relieve the data deficiency issue in text classification. However, the quality of pseudo-samples generated by mixup exhibits significant variations. Most of the mixup methods fail to consider the varying degree of learning difficulty in different stages of training. And mixup generates new samples with one-hot labels, which encourages the model to produce a high prediction score for the correct class that is much larger than other classes, resulting in the model’s over-confidence. In this paper, we propose a self-evolution learning (SE) based mixup approach for data augmentation in text classification, which can generate more adaptive and model-friendly pseudo samples for the model training. SE caters to the growth of the model learning ability and adapts to the ability when generating training samples. To alleviate the model over-confidence, we introduce an instance-specific label smoothing regularization approach, which linearly interpolates the model’s output and one-hot labels of the original samples to generate new soft labels for label mixing up. Through experimental analysis, experiments show that our SE brings consistent and significant improvements upon different mixup methods. In-depth analyses demonstrate that SE enhances the model’s generalization ability. @@ -7828,7 +7828,7 @@ <fixed-case>CLAD</fixed-case>-<fixed-case>ST</fixed-case>: Contrastive Learning with Adversarial Data for Robust Speech Translation - SathishIndurthi + SathishIndurthi ShamilChollampatt RaviAgrawal MarcoTurchi @@ -7846,7 +7846,7 @@ ZhenWu YawenOuyang JianbingZhang - XinyuDai + XinyuDai 9057-9070 Multimodal Aspect-based Sentiment Analysis (MABSA) is a fine-grained Sentiment Analysis task, which has attracted growing research interests recently. Existing work mainly utilizes image information to improve the performance of MABSA task. However, most of the studies overestimate the importance of images since there are many noise images unrelated to the text in the dataset, which will have a negative impact on model learning. Although some work attempts to filter low-quality noise images by setting thresholds, relying on thresholds will inevitably filter out a lot of useful image information. Therefore, in this work, we focus on whether the negative impact of noisy images can be reduced without modifying the data. To achieve this goal, we borrow the idea of Curriculum Learning and propose a Multi-grained Multi-curriculum Denoising Framework (M2DF), which can achieve denoising by adjusting the order of training data. Extensive experimental results show that our framework consistently outperforms state-of-the-art work on three sub-tasks of MABSA. 2023.emnlp-main.561 @@ -7859,7 +7859,7 @@ SiyuanChen ZhilingZhang MengyueWu - KennyZhu + KennyZhu 9071-9084 Existing Mental Disease Detection (MDD) research largely studies the detection of a single disorder, overlooking the fact that mental diseases might occur in tandem. Many approaches are not backed by domain knowledge (e.g., psychiatric symptoms) and thus fail to produce interpretable results. To tackle these issues, we propose an MDD framework that is capable of learning the shared clues of all diseases, while also capturing the specificity of each single disease. The two-stream architecture which simultaneously processes text and symptom features can combine the strength of both modalities and offer knowledge-based explainability. Experiments on the detection of 7 diseases show that our model can boost detection performance by more than 10%, especially in relatively rare classes. 2023.emnlp-main.562 @@ -8009,7 +8009,7 @@ Causal Reasoning through Two Cognition Layers for Improving Generalization in Visual Question Answering TrangNguyen - NaoakiOkazaki + NaoakiOkazaki 9221-9236 Generalization in Visual Question Answering (VQA) requires models to answer questions about images with contexts beyond the training distribution. Existing attempts primarily refine unimodal aspects, overlooking enhancements in multimodal aspects. Besides, diverse interpretations of the input lead to various modes of answer generation, highlighting the role of causal reasoning between interpreting and answering steps in VQA. Through this lens, we propose Cognitive pathways VQA (CopVQA) improving the multimodal predictions by emphasizing causal reasoning factors. CopVQA first operates a pool of pathways that capture diverse causal reasoning flows through interpreting and answering stages. Mirroring human cognition, we decompose the responsibility of each stage into distinct experts and a cognition-enabled component (CC). The two CCs strategically execute one expert for each stage at a time. Finally, we prioritize answer predictions governed by pathways involving both CCs while disregarding answers produced by either CC, thereby emphasizing causal reasoning and supporting generalization. Our experiments on real-life and medical data consistently verify that CopVQA improves VQA performance and generalization across baselines and domains. Notably, CopVQA achieves a new state-of-the-art (SOTA) on the PathVQA dataset and comparable accuracy to the current SOTA on VQA-CPv2, VQAv2, and VQA- RAD, with one-fourth of the model size. 2023.emnlp-main.573 @@ -8023,7 +8023,7 @@ KunZhou ZicanDong KemingYe - XinZhao + XinZhao Ji-RongWen 9237-9251 In this paper, we aim to improve the reasoning ability of large language models (LLMs) over structured data in a unified way. Inspired by the studies on tool augmentation for LLMs, we develop an Iterative Reading-then-Reasoning (IRR) framework to solve question answering tasks based on structured data, called StructGPT. In this framework, we construct the specialized interfaces to collect relevant evidence from structured data (i.e., reading), and let LLMs concentrate on the reasoning task based on the collected information (i.e., reasoning). Specially, we propose an invoking-linearization-generation procedure to support LLMs in reasoning on the structured data with the help of the interfaces. By iterating this procedure with provided interfaces, our approach can gradually approach the target answers to a given query. Experiments conducted on three types of structured data show that StructGPT greatly improves the performance of LLMs, under the few-shot and zero-shot settings. @@ -8050,7 +8050,7 @@ MrigankRaman PratyushMaini JKolter - ZacharyLipton + ZacharyLipton DanishPruthi 9266-9286 In recent years, NLP practitioners have converged on the following practice: (i) import an off-the-shelf pretrained (masked) language model; (ii) append a multilayer perceptron atop the CLS token’s hidden representation (with randomly initialized weights); and (iii) fine-tune the entire model on a downstream task (MLP-FT). This procedure has produced massive gains on standard NLP benchmarks, but these models remain brittle, even to mild adversarial perturbations. In this work, we demonstrate surprising gains in adversarial robustness enjoyed by Model-tuning Via Prompts (MVP), an alternative method of adapting to downstream tasks. Rather than appending an MLP head to make output prediction, MVP appends a prompt template to the input, and makes prediction via text infilling/completion. Across 5 NLP datasets, 4 adversarial attacks, and 3 different models, MVP improves performance against adversarial substitutions by an average of 8% over standard methods and even outperforms adversarial training-based state-of-art defenses by 3.5%. By combining MVP with adversarial training, we achieve further improvements in adversarial robustness while maintaining performance on unperturbed examples. Finally, we conduct ablations to investigate the mechanism underlying these gains. Notably, we find that the main causes of vulnerability of MLP-FT can be attributed to the misalignment between pre-training and fine-tuning tasks, and the randomly initialized MLP parameters. @@ -8167,7 +8167,7 @@ ThibaultSellam AdityaSiddhant DipanjanDas - AnkurParikh + AnkurParikh 9397-9413 Reliable automatic evaluation of summarization systems is challenging due to the multifaceted and subjective nature of the task. This is especially the case for languages other than English, where human evaluations are scarce. In this work, we introduce SEAHORSE, a dataset for multilingual, multifaceted summarization evaluation. SEAHORSE consists of 96K summaries with human ratings along 6 dimensions of text quality: comprehensibility, repetition, grammar, attribution, main ideas, and conciseness, covering 6 languages, 9 systems, and 4 datasets. As a result of its size and scope, SEAHORSE can serve both as a benchmark to evaluate learnt metrics, as well as a large-scale resource for training such metrics. We show that metrics trained with SEAHORSE achieve strong performance on the out-of-domain meta-evaluation benchmarks TRUE (Honovich et al., 2022) and mFACE (Aharoni et al., 2022). We make the SEAHORSE dataset and metrics publicly available for future research on multilingual and multifaceted summarization evaluation. 2023.emnlp-main.584 @@ -8259,7 +8259,7 @@ Multilingual estimation of political-party positioning: From label aggregation to long-input Transformers DmitryNikolaev TaniseCeron - SebastianPadó + SebastianPadó 9497-9511 Scaling analysis is a technique in computational political science that assigns a political actor (e.g. politician or party) a score on a predefined scale based on a (typically long) body of text (e.g. a parliamentary speech or an election manifesto). For example, political scientists have often used the left–right scale to systematically analyse political landscapes of different countries. NLP methods for automatic scaling analysis can find broad application provided they (i) are able to deal with long texts and (ii) work robustly across domains and languages. In this work, we implement and compare two approaches to automatic scaling analysis of political-party manifestos: label aggregation, a pipeline strategy relying on annotations of individual statements from the manifestos, and long-input-Transformer-based models, which compute scaling values directly from raw text. We carry out the analysis of the Comparative Manifestos Project dataset across 41 countries and 27 languages and find that the task can be efficiently solved by state-of-the-art models, with label aggregation producing the best results. 2023.emnlp-main.591 @@ -8307,7 +8307,7 @@ SantoshT.y.s.s OanaIchim IsabellaRisini - BarbaraPlank + BarbaraPlank MatthiasGrabmair 9558-9576 In legal NLP, Case Outcome Classification (COC) must not only be accurate but also trustworthy and explainable. Existing work in explainable COC has been limited to annotations by a single expert. However, it is well-known that lawyers may disagree in their assessment of case facts. We hence collect a novel dataset RaVE: Rationale Variation in ECHR, which is obtained from two experts in the domain of international human rights law, for whom we observe weak agreement. We study their disagreements and build a two-level task-independent taxonomy, supplemented with COC-specific subcategories. To our knowledge, this is the first work in the legal NLP that focuses on human label variation. We quantitatively assess different taxonomy categories and find that disagreements mainly stem from underspecification of the legal context, which poses challenges given the typically limited granularity and noise in COC metadata. We further assess the explainablility of state-of-the-art COC models on RaVE and observe limited agreement between models and experts. Overall, our case study reveals hitherto underappreciated complexities in creating benchmark datasets in legal NLP that revolve around identifying aspects of a case’s facts supposedly relevant for its outcome. @@ -8331,7 +8331,7 @@ Statistical Depth for Ranking and Characterizing Transformer-Based Text Embeddings ParkerSeegmiller - Sarah MasudPreum + Sarah MasudPreum 9600-9611 The popularity of transformer-based text embeddings calls for better statistical tools for measuring distributions of such embeddings. One such tool would be a method for ranking texts within a corpus by centrality, i.e. assigning each text a number signifying how representative that text is of the corpus as a whole. However, an intrinsic center-outward ordering of high-dimensional text representations is not trivial. A \textit{statistical depth} is a function for ranking k-dimensional objects by measuring centrality with respect to some observed k-dimensional distribution. We adopt a statistical depth to measure distributions of transformer-based text embeddings, \textit{transformer-based text embedding (TTE) depth}, and introduce the practical use of this depth for both modeling and distributional inference in NLP pipelines. We first define TTE depth and an associated rank sum test for determining whether two corpora differ significantly in embedding space. We then use TTE depth for the task of in-context learning prompt selection, showing that this approach reliably improves performance over statistical baseline approaches across six text classification tasks. Finally, we use TTE depth and the associated rank sum test to characterize the distributions of synthesized and human-generated corpora, showing that five recent synthetic data augmentation processes cause a measurable distributional shift away from associated human-generated text. 2023.emnlp-main.596 @@ -8386,9 +8386,9 @@ PhilippeLaban WojciechKryscinski DivyanshAgarwal - AlexanderFabbri + AlexanderFabbri CaimingXiong - ShafiqJoty + ShafiqJoty Chien-ShengWu 9662-9676 With the recent appearance of LLMs in practical settings, having methods that can effectively detect factual inconsistencies is crucial to reduce the propagation of misinformation and improve trust in model outputs. When testing on existing factual consistency benchmarks, we find that a few large language models (LLMs) perform competitively on classification benchmarks for factual inconsistency detection compared to traditional non-LLM methods. However, a closer analysis reveals issues with existing evaluation benchmarks, affecting evaluation precision. To address this, we propose a new protocol for inconsistency detection benchmark creation and implement it in a 10-domain benchmark called SummEdits. This new benchmark is 20 times more cost-effective per sample than previous benchmarks and highly reproducible, as we estimate inter-annotator agreement at about 0.9. Most LLMs struggle on SummEdits, with performance close to random chance. The best-performing model, GPT-4, is still 8% below estimated human performance, highlighting the gaps in LLMs’ ability to reason about facts and detect inconsistencies when they occur. @@ -8416,7 +8416,7 @@ TinNguyen JiannanXu AayushiRoy - HalDaumé III + HalDaumé III MarineCarpuat 9696-9717 Recent research at the intersection of AI explainability and fairness has focused on how explanations can improve human-plus-AI task performance as assessed by fairness measures. We propose to characterize what constitutes an explanation that is itself “fair” – an explanation that does not adversely impact specific populations. We formulate a novel evaluation method of “fair explanations” using not just accuracy and label time, but also psychological impact of explanations on different user groups across many metrics (mental discomfort, stereotype activation, and perceived workload). We apply this method in the context of content moderation of potential hate speech, and its differential impact on Asian vs. non-Asian proxy moderators, across explanation approaches (saliency map and counterfactual explanation). We find that saliency maps generally perform better and show less evidence of disparate impact (group) and individual unfairness than counterfactual explanations. Content warning: This paper contains examples of hate speech and racially discriminatory language. The authors do not support such content. Please consider your risk of discomfort carefully before continuing reading! @@ -8427,7 +8427,7 @@ Bridging Background Knowledge Gaps in Translation with Automatic Explicitation - HyoJungHan + HyoJungHan JordanBoyd-Graber MarineCarpuat 9718-9735 @@ -8445,7 +8445,7 @@ YufengChen JianLiu WenjuanHan - JinanXu + JinanXu 9736-9748 Existing syntactically-controlled paraphrase generation (SPG) models perform promisingly with human-annotated or well-chosen syntactic templates. However, the difficulty of obtaining such templates actually hinders the practical application of SPG models. For one thing, the prohibitive cost makes it unfeasible to manually design decent templates for every source sentence. For another, the templates automatically retrieved by current heuristic methods are usually unreliable for SPG models to generate qualified paraphrases. To escape this dilemma, we propose a novel Quality-based Syntactic Template Retriever (QSTR) to retrieve templates based on the quality of the to-be-generated paraphrases. Furthermore, for situations requiring multiple paraphrases for each source sentence, we design a Diverse Templates Search (DTS) algorithm, which can enhance the diversity between paraphrases without sacrificing quality. Experiments demonstrate that QSTR can significantly surpass existing retrieval methods in generating high-quality paraphrases and even perform comparably with human-annotated templates in terms of reference-free metrics. Additionally, human evaluation and the performance on downstream tasks using our generated paraphrases for data augmentation showcase the potential of our QSTR and DTS algorithm in practical scenarios. 2023.emnlp-main.604 @@ -8499,7 +8499,7 @@ AanishaBhattacharyya Yaman KSingla BalajiKrishnamurthy - Rajiv RatnShah + Rajiv RatnShah ChangyouChen 9822-9839 Multimedia content, such as advertisements and story videos, exhibit a rich blend of creativity and multiple modalities. They incorporate elements like text, visuals, audio, and storytelling techniques, employing devices like emotions, symbolism, and slogans to convey meaning. There is a dearth of large annotated training datasets in the multimedia domain hindering the development of supervised learning models with satisfactory performance for real-world applications. On the other hand, the rise of large language models (LLMs) has witnessed remarkable zero-shot performance in various natural language processing (NLP) tasks, such as emotion classification, question answering, and topic classification. To leverage such advanced techniques to bridge this performance gap in multimedia understanding, we propose verbalizing long videos to generate their descriptions in natural language, followed by performing video-understanding tasks on the generated story as opposed to the original video. Through extensive experiments on fifteen video-understanding tasks, we demonstrate that our method, despite being zero-shot, achieves significantly better results than supervised baselines for video understanding. Furthermore, to alleviate a lack of story understanding benchmarks, we publicly release the first dataset on a crucial task in computational social science on persuasion strategy identification. @@ -8545,7 +8545,7 @@ MichalShmueli-Scheuer DafnaSheinwald NoamSlonim - LiatEin-Dor + LiatEin-Dor 9862-9877 The field of Natural Language Generation (NLG) suffers from a severe shortage of labeled data due to the extremely expensive and time-consuming process involved in manual annotation. A natural approach for coping with this problem is active learning (AL), a well-known machine learning technique for improving annotation efficiency by selectively choosing the most informative examples to label. However, while AL has been well-researched in the context of text classification, its application to NLG remains largely unexplored. In this paper, we present a first systematic study of active learning for NLG, considering a diverse set of tasks and multiple leading selection strategies, and harnessing a strong instruction-tuned model. Our results indicate that the performance of existing AL strategies is inconsistent, surpassing the baseline of random example selection in some cases but not in others. We highlight some notable differences between the classification and generation scenarios, and analyze the selection behaviors of existing AL strategies. Our findings motivate exploring novel approaches for applying AL to generation tasks. 2023.emnlp-main.611 @@ -8587,8 +8587,8 @@ SachinKumar HilaGonen JungoKasai - DavidMortensen - NoahSmith + DavidMortensen + NoahSmith YuliaTsvetkov 9904-9923 Language models have graduated from being research prototypes to commercialized products offered as web APIs, and recent works have highlighted the multilingual capabilities of these products. The API vendors charge their users based on usage, more specifically on the number of “tokens” processed or generated by the underlying language models. What constitutes a token, however, is training data and model dependent with a large variance in the number of tokens required to convey the same information in different languages. In this work, we analyze the effect of this non-uniformity on the fairness of an API’s pricing policy across languages. We conduct a systematic analysis of the cost and utility of OpenAI’s language model API on multilingual benchmarks in 22 typologically diverse languages. We show evidence that speakers of a large number of the supported languages are overcharged while obtaining poorer results. These speakers tend to also come from regions where the APIs are less affordable, to begin with. Through these analyses, we aim to increase transparency around language model APIs’ pricing policies and encourage the vendors to make them more equitable. @@ -8646,7 +8646,7 @@ Whispering <fixed-case>LL</fixed-case>a<fixed-case>MA</fixed-case>: A Cross-Modal Generative Error Correction Framework for Speech Recognition SrijithRadhakrishnan - Chao-Han HuckYang + Chao-Han HuckYang Sumeer AhmadKhan RohitKumar Narsis A.Kiani @@ -8662,7 +8662,7 @@ Reducing Sequence Length by Predicting Edit Spans with Large Language Models MasahiroKaneko - NaoakiOkazaki + NaoakiOkazaki 10017-10029 Large Language Models (LLMs) have demonstrated remarkable performance in various tasks and gained significant attention. LLMs are also used for local sequence transduction tasks, including grammatical error correction (GEC) and formality style transfer, where most tokens in a source text are kept unchanged. However, the models that generate all target tokens in such tasks have a tendency to simply copy the input text as is, without making needed changes, because the difference between input and output texts is minimal in the training data. This is also inefficient because the computational cost grows quadratically with the target sequence length with Transformer. This paper proposes predicting edit spans for the source text for local sequence transduction tasks. Representing an edit span with a position of the source text and corrected tokens, we can reduce the length of the target sequence and the computational cost for inference. We apply instruction tuning for LLMs on the supervision data of edit spans. Experiments show that the proposed method achieves comparable performance to the baseline in four tasks, paraphrasing, formality style transfer, GEC, and text simplification, despite reducing the length of the target text by as small as 21%. Furthermore, we report that the task-specific fine-tuning with the proposed method achieved state-of-the-art performance in the four tasks. 2023.emnlp-main.619 @@ -8690,7 +8690,7 @@ Rethinking the Evaluation for Conversational Recommendation in the Era of Large Language Models XiaoleiWang XinyuTang - XinZhao + XinZhao JingyuanWang Ji-RongWen 10052-10065 @@ -8742,7 +8742,7 @@ Make Every Example Count: On the Stability and Utility of Self-Influence for Learning from Noisy <fixed-case>NLP</fixed-case> Datasets IrinaBejan - ArtemSokolov + ArtemSokolov KatjaFilippova 10107-10121 Increasingly larger datasets have become a standard ingredient to advancing the state-of-the-art in NLP. However, data quality might have already become the bottleneck to unlock further gains. Given the diversity and the sizes of modern datasets, standard data filtering is not straight-forward to apply, because of the multifacetedness of the harmful data and elusiveness of filtering rules that would generalize across multiple tasks. We study the fitness of task-agnostic self-influence scores of training examples for data cleaning, analyze their efficacy in capturing naturally occurring outliers, and investigate to what extent self-influence based data cleaning can improve downstream performance in machine translation, question answering and text classification, building up on recent approaches to self-influence calculation and automated curriculum learning. @@ -8754,9 +8754,9 @@ Appraising the Potential Uses and Harms of <fixed-case>LLM</fixed-case>s for Medical Systematic Reviews HyeYun - IainMarshall + IainMarshall ThomasTrikalinos - ByronWallace + ByronWallace 10122-10139 Medical systematic reviews play a vital role in healthcare decision making and policy. However, their production is time-consuming, limiting the availability of high-quality and up-to-date evidence summaries. Recent advancements in LLMs offer the potential to automatically generate literature reviews on demand, addressing this issue. However, LLMs sometimes generate inaccurate (and potentially misleading) texts by hallucination or omission. In healthcare, this can make LLMs unusable at best and dangerous at worst. We conducted 16 interviews with international systematic review experts to characterize the perceived utility and risks of LLMs in the specific context of medical evidence reviews. Experts indicated that LLMs can assist in the writing process by drafting summaries, generating templates, distilling information, and crosschecking information. They also raised concerns regarding confidently composed but inaccurate LLM outputs and other potential downstream harms, including decreased accountability and proliferation of low-quality reviews. Informed by this qualitative analysis, we identify criteria for rigorous evaluation of biomedical LLMs aligned with domain expert views. 2023.emnlp-main.626 @@ -8943,7 +8943,7 @@ YatinNandwani VineetKumar DineshRaghu - SachindraJoshi + SachindraJoshi LuisLastras 10335-10347 A major concern in using deep learning based generative models for document-grounded dialogs is the potential generation of responses that are not faithful to the underlying document. Existing automated metrics used for evaluating the faithfulness of response with respect to the grounding document measure the degree of similarity between the generated response and the document’s content. However, these automated metrics are far from being well aligned with human judgments. Therefore, to improve the measurement of faithfulness, we propose a new metric that utilizes (Conditional) Point-wise Mutual Information (PMI) between the generated response and the source document, conditioned on the dialogue. PMI quantifies the extent to which the document influences the generated response – with a higher PMI indicating a more faithful response. We build upon this idea to create a new decoding technique that incorporates PMI into the response generation process to predict more faithful responses. Our experiments on the BEGIN benchmark demonstrate an improved correlation of our metric with human evaluation. We also show that our decoding technique is effective in generating more faithful responses when compared to standard decoding techniques on a set of publicly available document-grounded dialog datasets. @@ -9158,7 +9158,7 @@ ZiedBouraoui NaLi UsashiChatterjee - LuisEspinosa-Anke + LuisEspinosa-Anke StevenSchockaert 10587-10596 Concepts play a central role in many applications. This includes settings where concepts have to be modelled in the absence of sentence context. Previous work has therefore focused on distilling decontextualised concept embeddings from language models. But concepts can be modelled from different perspectives, whereas concept embeddings typically mostly capture taxonomic structure. To address this issue, we propose a strategy for identifying what different concepts, from a potentially large concept vocabulary, have in common with others. We then represent concepts in terms of the properties they share with the other concepts. To demonstrate the practical usefulness of this way of modelling concepts, we consider the task of ultra-fine entity typing, which is a challenging multi-label classification problem. We show that by augmenting the label set with shared properties, we can improve the performance of the state-of-the-art models for this task. @@ -9170,7 +9170,7 @@ <fixed-case>ALD</fixed-case>i: Quantifying the <fixed-case>A</fixed-case>rabic Level of Dialectness of Text AmrKeleg - SharonGoldwater + SharonGoldwater WalidMagdy 10597-10611 Transcribed speech and user-generated text in Arabic typically contain a mixture of Modern Standard Arabic (MSA), the standardized language taught in schools, and Dialectal Arabic (DA), used in daily communications. To handle this variation, previous work in Arabic NLP has focused on Dialect Identification (DI) on the sentence or the token level. However, DI treats the task as binary, whereas we argue that Arabic speakers perceive a spectrum of dialectness, which we operationalize at the sentence level as the Arabic Level of Dialectness (ALDi), a continuous linguistic variable. We introduce the AOC-ALDi dataset (derived from the AOC dataset), containing 127,835 sentences (17% from news articles and 83% from user comments on those articles) which are manually labeled with their level of dialectness. We provide a detailed analysis of AOC-ALDi and show that a model trained on it can effectively identify levels of dialectness on a range of other corpora (including dialects and genres not included in AOC-ALDi), providing a more nuanced picture than traditional DI systems. Through case studies, we illustrate how ALDi can reveal Arabic speakers’ stylistic choices in different situations, a useful property for sociolinguistic analyses. @@ -9211,7 +9211,7 @@ Cross-Lingual Consistency of Factual Knowledge in Multilingual Language Models JiruiQi - RaquelFernández + RaquelFernández AriannaBisazza 10650-10666 Multilingual large-scale Pretrained Language Models (PLMs) have been shown to store considerable amounts of factual knowledge, but large variations are observed across languages. With the ultimate goal of ensuring that users with different language backgrounds obtain consistent feedback from the same model, we study the cross-lingual consistency (CLC) of factual knowledge in various multilingual PLMs. To this end, we propose a Ranking-based Consistency (RankC) metric to evaluate knowledge consistency across languages independently from accuracy. Using this metric, we conduct an in-depth analysis of the determining factors for CLC, both at model level and at language-pair level. Among other results, we find that increasing model size leads to higher factual probing accuracy in most languages, but does not improve cross-lingual consistency. Finally, we conduct a case study on CLC when new factual associations are inserted in the PLMs via model editing. Results on a small sample of facts inserted in English reveal a clear pattern whereby the new piece of knowledge transfers only to languages with which English has a high RankC score. All code and data are released at https://github.com/Betswish/Cross-Lingual-Consistency. @@ -9235,7 +9235,7 @@ Bridging the Digital Divide: Performance Variation across Socio-Economic Factors in Vision-Language Models JoanNwatu OanaIgnat - RadaMihalcea + RadaMihalcea 10686-10702 Despite the impressive performance of current AI models reported across various tasks, performance reports often do not include evaluations of how these models perform on the specific groups that will be impacted by these technologies. Among the minority groups under-represented in AI, data from low-income households are often overlooked in data collection and model evaluation. We evaluate the performance of a state-of-the-art vision-language model (CLIP) on a geo-diverse dataset containing household images associated with different income values (DollarStreet) and show that performance inequality exists among households of different income levels. Our results indicate that performance for the poorer groups is consistently lower than the wealthier groups across various topics and countries. We highlight insights that can help mitigate these issues and propose actionable steps for economic-level inclusive AI development. 2023.emnlp-main.660 @@ -9246,7 +9246,7 @@ Conceptor-Aided Debiasing of Large Language Models LiYifei - LyleUngar + LyleUngar JoãoSedoc 10703-10727 Pre-trained large language models (LLMs) reflect the inherent social biases of their training corpus. Many methods have been proposed to mitigate this issue, but they often fail to debias or they sacrifice model accuracy. We use *conceptors*–a soft projection method–to identify and remove the bias subspace in LLMs such as BERT and GPT. We propose two methods of applying conceptors (1) bias subspace projection by post-processing by the conceptor NOT operation; and (2) a new architecture, conceptor-intervened BERT (CI-BERT), which explicitly incorporates the conceptor projection into all layers during training. We find that conceptor post-processing achieves state-of-the-art (SoTA) debiasing results while maintaining LLMs’ performance on the GLUE benchmark. Further, it is robust in various scenarios and can mitigate intersectional bias efficiently by its AND operation on the existing bias subspaces. Although CI-BERT’s training takes all layers’ bias into account and can beat its post-processing counterpart in bias mitigation, CI-BERT reduces the language model accuracy. We also show the importance of carefully constructing the bias subspace. The best results are obtained by removing outliers from the list of biased words, combining them (via the OR operation), and computing their embeddings using the sentences from a cleaner corpus. @@ -9258,7 +9258,7 @@ <fixed-case>AMR</fixed-case> Parsing is Far from Solved: <fixed-case>G</fixed-case>r<fixed-case>APES</fixed-case>, the Granular <fixed-case>AMR</fixed-case> Parsing Evaluation Suite JonasGroschwitz - ShayCohen + ShayCohen LuciaDonatelli MeaghanFowlie 10728-10752 @@ -9479,7 +9479,7 @@ We Are What We Repeatedly Do: Inducing and Deploying Habitual Schemas in Persona-Based Responses BenjaminKane - LenhartSchubert + LenhartSchubert 10998-11016 Many practical applications of dialogue technology require the generation of responses according to a particular developer-specified persona. While a variety of personas can be elicited from recent large language models, the opaqueness and unpredictability of these models make it desirable to be able to specify personas in an explicit form. In previous work, personas have typically been represented as sets of one-off pieces of self-knowledge that are retrieved by the dialogue system for use in generation. However, in realistic human conversations, personas are often revealed through story-like narratives that involve rich habitual knowledge – knowledge about kinds of events that an agent often participates in (e.g., work activities, hobbies, sporting activities, favorite entertainments, etc.), including typical goals, sub-events, preconditions, and postconditions of those events. We capture such habitual knowledge using an explicit schema representation, and propose an approach to dialogue generation that retrieves relevant schemas to condition a large language model to generate persona-based responses. Furthermore, we demonstrate a method for bootstrapping the creation of such schemas by first generating generic passages from a set of simple facts, and then inducing schemas from the generated passages. 2023.emnlp-main.678 @@ -9492,7 +9492,7 @@ QiJia SiyuRen YizhuLiu - KennyZhu + KennyZhu 11017-11031 Despite tremendous improvements in natural language generation, summarization models still suffer from the unfaithfulness issue. Previous work evaluates faithfulness either using models trained on the other tasks or in-domain synthetic data, or prompting a large model such as ChatGPT. This paper proposes to do zero-shot faithfulness evaluation simply with a moderately-sized foundation language model. We introduce a new metric FFLM, which is a combination of probability changes based on the intuition that prefixing a piece of text that is consistent with the output will increase the probability of predicting the output. Experiments show that FFLM performs competitively with or even outperforms ChatGPT on both inconsistency detection and faithfulness rating with 24x fewer parameters. FFLM also achieves improvements over other strong baselines. 2023.emnlp-main.679 @@ -9544,7 +9544,7 @@ A Predictive Factor Analysis of Social Biases and Task-Performance in Pretrained Masked Language Models YiZhou - JoseCamacho-Collados + JoseCamacho-Collados DanushkaBollegala 11082-11100 Various types of social biases have been reported with pretrained Masked Language Models (MLMs) in prior work. However, multiple underlying factors are associated with an MLM such as its model size, size of the training data, training objectives, the domain from which pretraining data is sampled, tokenization, and languages present in the pretrained corpora, to name a few. It remains unclear as to which of those factors influence social biases that are learned by MLMs. To study the relationship between model factors and the social biases learned by an MLM, as well as the downstream task performance of the model, we conduct a comprehensive study over 39 pretrained MLMs covering different model sizes, training objectives, tokenization methods, training data domains and languages. Our results shed light on important factors often neglected in prior literature, such as tokenization or model objectives. @@ -9584,7 +9584,7 @@ <fixed-case>S</fixed-case>p<fixed-case>EL</fixed-case>: Structured Prediction for Entity Linking - HassanShavarani + HassanShavarani AnoopSarkar 11123-11137 Entity linking is a prominent thread of research focused on structured data creation by linking spans of text to an ontology or knowledge source. We revisit the use of structured prediction for entity linking which classifies each individual input token as an entity, and aggregates the token predictions. Our system, called SpEL (Structured prediction for Entity Linking) is a state-of-the-art entity linking system that uses some new ideas to apply structured prediction to the task of entity linking including: two refined fine-tuning steps; a context sensitive prediction aggregation strategy; reduction of the size of the model’s output vocabulary, and; we address a common problem in entity-linking systems where there is a training vs. inference tokenization mismatch. Our experiments show that we can outperform the state-of-the-art on the commonly used AIDA benchmark dataset for entity linking to Wikipedia. Our method is also very compute efficient in terms of number of parameters and speed of inference. @@ -9598,7 +9598,7 @@ PhilippHeinisch MatthiasOrlikowski JuliaRomberg - PhilippCimiano + PhilippCimiano 11138-11154 Many annotation tasks in natural language processing are highly subjective in that there can be different valid and justified perspectives on what is a proper label for a given example. This also applies to the judgment of argument quality, where the assignment of a single ground truth is often questionable. At the same time, there are generally accepted concepts behind argumentation that form a common ground. To best represent the interplay of individual and shared perspectives, we consider a continuum of approaches ranging from models that fully aggregate perspectives into a majority label to “share nothing”-architectures in which each annotator is considered in isolation from all other annotators. In between these extremes, inspired by models used in the field of recommender systems, we investigate the extent to which architectures that predict labels for single annotators but include layers that model the relations between different annotators are beneficial. By means of two tasks of argument quality classification (argument concreteness and validity/novelty of conclusions), we show that recommender architectures increase the averaged annotator-individual F1-scores up to 43% over a majority-label model. Our findings indicate that approaches to subjectivity can benefit from relating individual perspectives. 2023.emnlp-main.687 @@ -9621,8 +9621,8 @@ clembench: Using Game Play to Evaluate Chat-Optimized Language Models as Conversational Agents - KrantiChalamalasetti - JanaGötze + KrantiChalamalasetti + JanaGötze SherzodHakimov BrielenMadureira PhilippSadler @@ -9661,7 +9661,7 @@ Anchoring Fine-tuning of Sentence Transformer with Semantic Label Information for Efficient Truly Few-shot Classification AmaliePauli - LeonDerczynski + LeonDerczynski IraAssent 11254-11264 Few-shot classification is a powerful technique, but training requires substantial computing power and data. We propose an efficient method with small model sizes and less training data with only 2-8 training instances per class. Our proposed method, AncSetFit, targets low data scenarios by anchoring the task and label information through sentence embeddings in fine-tuning a Sentence Transformer model. It uses contrastive learning and a triplet loss to enforce training instances of a class to be closest to its own textual semantic label information in the embedding space - and thereby learning to embed different class instances more distinct. AncSetFit obtains strong performance in data-sparse scenarios compared to existing methods across SST-5, Emotion detection, and AG News data, even with just two examples per class. @@ -9675,10 +9675,10 @@ JonSaad-Falcon OmarKhattab KeshavSanthanam - RaduFlorian + RaduFlorian MartinFranz - SalimRoukos - AvirupSil + SalimRoukos + AvirupSil MdSultan ChristopherPotts 11265-11279 @@ -9750,7 +9750,7 @@ An Integrative Survey on Mental Health Conversational Agents to Bridge Computer Science and Medical Perspectives Young MinCho SunnyRai - LyleUngar + LyleUngar JoãoSedoc SharathGuntuku 11346-11369 @@ -9808,7 +9808,7 @@ GuoqingZheng VictorDibia AhmedAwadallah - PaulBennett + PaulBennett 11445-11475 The remarkable abilities of large language models (LLMs) like ChatGPT and GPT-4 partially stem from the post-training processes involving human preferences encoded within a reward model as part of a Reinforcement Learning from Human Feedback (RLHF) regimen. These reward models (RMs) often lack direct knowledge of why, or under what principles, the preferences annotations were made. In this study, we identify principles that guide RMs to better align with human preferences, and then develop an axiomatic framework to generate a rich variety of preference signals to uphold them. We use these axiomatic signals to train a model for the scoring answers to longform questions. Our approach yields a Preference Model with only about 220M parameters that agrees with gold human-annotated preference labels more often than GPT-4. The contributions of this work include: training a standalone preference model that can score human- and LLM-generated answers on the same scale; developing an axiomatic framework for generating training data pairs tailored to certain principles; and showing that a small amount of axiomatic signals can help small models outperform GPT-4 in preference scoring. We intend to release our axiomatic data and model. 2023.emnlp-main.702 @@ -9846,7 +9846,7 @@ MarcoGaido SaraPapi MauroCettolo - MatteoNegri + MatteoNegri LuisaBentivogli 11505-11517 When translating words referring to the speaker, speech translation (ST) systems should not resort to default masculine generics nor rely on potentially misleading vocal traits. Rather, they should assign gender according to the speakers’ preference. The existing solutions to do so, though effective, are hardly feasible in practice as they involve dedicated model re-training on gender-labeled ST data. To overcome these limitations, we propose the first inference-time solution to control speaker-related gender inflections in ST. Our approach partially replaces the (biased) internal language model (LM) implicitly learned by the ST decoder with gender-specific external LMs. Experiments on en\rightarrowes/fr/it show that our solution outperforms the base models and the best training-time mitigation strategy by up to 31.0 and 1.6 points in gender accuracy, respectively, for feminine forms. The gains are even larger (up to 32.0 and 3.4) in the challenging condition where speakers’ vocal traits conflict with their gender. @@ -9920,7 +9920,7 @@ <fixed-case>B</fixed-case>io<fixed-case>FEG</fixed-case>: Generate Latent Features for Biomedical Entity Linking XuhuiSui - YingZhang + YingZhang XiangruiCai KehuiSong BaohangZhou @@ -10034,8 +10034,8 @@ DevamanyuHazarika ShikibMehri SeokhwanKim - DilekHakkani-Tur - YangLiu + DilekHakkani-Tur + YangLiu MahdiNamazifar 11709-11737 Instruction-based multitasking has played a critical role in the success of large language models (LLMs) in multi-turn dialog applications. While publicly available LLMs have shown promising performance, when exposed to complex instructions with multiple constraints, they lag against state-of-the-art models like ChatGPT. In this work, we hypothesize that the availability of large-scale complex demonstrations is crucial in bridging this gap. Focusing on dialog applications, we propose a novel framework, CESAR, that unifies a large number of dialog tasks in the same format and allows programmatic induction of complex instructions without any manual effort. We apply CESAR on InstructDial, a benchmark for instruction-based dialog tasks. We further enhance InstructDial with new datasets and tasks and utilize CESAR to induce complex tasks with compositional instructions. This results in a new benchmark called InstructDial++, which includes 63 datasets with 86 basic tasks and 68 composite tasks. Through rigorous experiments, we demonstrate the scalability of CESAR in providing rich instructions. Models trained on InstructDial++ can follow compositional prompts, such as prompts that ask for multiple stylistic constraints. @@ -10067,7 +10067,7 @@ YuHou NischalChandra MarjorieFreedman - RalphWeischedel + RalphWeischedel NanyunPeng 11753-11770 Multimodal counterfactual reasoning is a vital yet challenging ability for AI systems. It involves predicting the outcomes of hypothetical circumstances based on vision and language inputs, which enables AI models to learn from failures and explore hypothetical scenarios. Despite its importance, there are only a few datasets targeting the counterfactual reasoning abilities of multimodal models. Among them, they only cover reasoning over synthetic environments or specific types of events (e.g. traffic collisions), making them hard to reliably benchmark the model generalization ability in diverse real-world scenarios and reasoning dimensions. To overcome these limitations, we develop a video question answering dataset, ACQUIRED: it consists of 3.9K annotated videos, encompassing a wide range of event types and incorporating both first and third-person viewpoints, which ensures a focus on real-world diversity. In addition, each video is annotated with questions that span three distinct dimensions of reasoning, including physical, social, and temporal, which can comprehensively evaluate the model counterfactual abilities along multiple aspects. We benchmark our dataset against several state-of-the-art language-only and multimodal models and experimental results demonstrate a significant performance gap (>13%) between models and humans. The findings suggest that multimodal counterfactual reasoning remains an open challenge and ACQUIRED is a comprehensive and reliable benchmark for inspiring future research in this direction. @@ -10106,7 +10106,7 @@ JochenDe Weerdt KristofCoussement ArnoDe Caigny - Marie-FrancineMoens + Marie-FrancineMoens 11792-11806 We introduce CORE, a dataset for few-shot relation classification (RC) focused on company relations and business entities. CORE includes 4,708 instances of 12 relation types with corresponding textual evidence extracted from company Wikipedia pages. Company names and business entities pose a challenge for few-shot RC models due to the rich and diverse information associated with them. For example, a company name may represent the legal entity, products, people, or business divisions depending on the context. Therefore, deriving the relation type between entities is highly dependent on textual context. To evaluate the performance of state-of-the-art RC models on the CORE dataset, we conduct experiments in the few-shot domain adaptation setting. Our results reveal substantial performance gaps, confirming that models trained on different domains struggle to adapt to CORE. Interestingly, we find that models trained on CORE showcase improved out-of-domain performance, which highlights the importance of high-quality data for robust domain generalization. Specifically, the information richness embedded in business entities allows models to focus on contextual nuances, reducing their reliance on superficial clues such as relation-specific verbs. In addition to the dataset, we provide relevant code snippets to facilitate reproducibility and encourage further research in the field. The CORE dataset and code are publicly available at https://github.com/pnborchert/CORE. 2023.emnlp-main.722 @@ -10241,7 +10241,7 @@ An Empirical Study of Translation Hypothesis Ensembling with Large Language Models AntónioFarinhas Joséde Souza - AndreMartins + AndreMartins 11956-11970 Large language models (LLMs) are becoming a one-fits-many solution, but they sometimes hallucinate or produce unreliable output. In this paper, we investigate how hypothesis ensembling can improve the quality of the generated text for the specific problem of LLM-based machine translation. We experiment with several techniques for ensembling hypotheses produced by LLMs such as ChatGPT, LLaMA, and Alpaca. We provide a comprehensive study along multiple dimensions, including the method to generate hypotheses (multiple prompts, temperature-based sampling, and beam search) and the strategy to produce the final translation (instruction-based, quality-based reranking, and minimum Bayes risk (MBR) decoding). Our results show that MBR decoding is a very effective method, that translation quality can be improved using a small number of samples, and that instruction tuning has a strong impact on the relation between the diversity of the hypotheses and the sampling temperature. 2023.emnlp-main.733 @@ -10256,7 +10256,7 @@ SeungjooLee SungjoonPark YunxinLiu - JinhoChoi + JinhoChoi Sung-JuLee 11971-11988 Psychiatrists diagnose mental disorders via the linguistic use of patients. Still, due to data privacy, existing passive mental health monitoring systems use alternative features such as activity, app usage, and location via mobile devices. We propose FedTherapist, a mobile mental health monitoring system that utilizes continuous speech and keyboard input in a privacy-preserving way via federated learning. We explore multiple model designs by comparing their performance and overhead for FedTherapist to overcome the complex nature of on-device language model training on smartphones. We further propose a Context-Aware Language Learning (CALL) methodology to effectively utilize smartphones’ large and noisy text for mental health signal sensing. Our IRB-approved evaluation of the prediction of self-reported depression, stress, anxiety, and mood from 46 participants shows higher accuracy of FedTherapist compared with the performance with non-language features, achieving 0.15 AUROC improvement and 8.21% MAE reduction. @@ -10291,7 +10291,7 @@ HaoYu JiuyiLi JinsongSu - DegenHuang + DegenHuang 12011-12027 A persistent goal of multilingual neural machine translation (MNMT) is to continually adapt the model to support new language pairs or improve some current language pairs without accessing the previous training data. To achieve this, the existing methods primarily focus on preventing catastrophic forgetting by making compromises between the original and new language pairs, leading to sub-optimal performance on both translation tasks. To mitigate this problem, we propose a dual importance-based model division method to divide the model parameters into two parts and separately model the translation of the original and new tasks. Specifically, we first remove the parameters that are negligible to the original tasks but essential to the new tasks to obtain a pruned model, which is responsible for the original translation tasks. Then we expand the pruned model with external parameters and fine-tune the newly added parameters with new training data. The whole fine-tuned model will be used for the new translation tasks. Experimental results show that our method can efficiently adapt the original model to various new translation tasks while retaining the performance of the original tasks. Further analyses demonstrate that our method consistently outperforms several strong baselines under different incremental translation scenarios. 2023.emnlp-main.736 @@ -10359,10 +10359,10 @@ KalpeshKrishna XinxiLyu MikeLewis - Wen-tauYih + Wen-tauYih PangKoh MohitIyyer - LukeZettlemoyer + LukeZettlemoyer HannanehHajishirzi 12076-12100 Evaluating the factuality of long-form text generated by large language models (LMs) is non-trivial because (1) generations often contain a mixture of supported and unsupported pieces of information, making binary judgments of quality inadequate, and (2) human evaluation is time-consuming and costly. In this paper, we introduce FACTSCORE, a new evaluation that breaks a generation into a series of atomic facts and computes the percentage of atomic facts supported by a reliable knowledge source. We conduct an extensive human evaluation to obtain FACTSCOREs of people biographies generated by several state-of-the-art commercial LMs—InstructGPT, ChatGPT, and the retrieval-augmented PerplexityAI—and report new analysis demonstrating the need for such a fine-grained score (e.g., ChatGPT only achieves 58%). Since human evaluation is costly, we also introduce an automated model that estimates FACTSCORE using retrieval and a strong language model, with less than a 2% error rate. Finally, we use this automated metric to evaluate 6,500 generations from a new set of 13 recent LMs that would have cost $26K if evaluated by humans, with various findings: GPT-4 and ChatGPT are more factual than public models, and Vicuna and Alpaca are some of the best public models. FACTSCORE is available for public use via ‘pip install factscore‘. @@ -10390,7 +10390,7 @@ YeLiu ChenweiZhang TaoZhang - PhilipYu + PhilipYu 12109-12119 While Chain-of-Thought prompting is popular in reasoning tasks, its application to Large Language Models (LLMs) in Natural Language Understanding (NLU) is under-explored. Motivated by multi-step reasoning of LLMs, we propose Coarse-to-Fine Chain-of-Thought (CoF-CoT) approach that breaks down NLU tasks into multiple reasoning steps where LLMs can learn to acquire and leverage essential concepts to solve tasks from different granularities. Moreover, we propose leveraging semantic-based Abstract Meaning Representation (AMR) structured knowledge as an intermediate step to capture the nuances and diverse structures of utterances, and to understand connections between their varying levels of granularity. Our proposed approach is demonstrated effective in assisting the LLMs adapt to the multi-grained NLU tasks under both zero-shot and few-shot multi-domain settings. 2023.emnlp-main.743 @@ -10456,7 +10456,7 @@ GuangliangLiu ZhiyuXue XitongZhang - KristenJohnson + KristenJohnson RongrongWang 12178-12189 Fine-tuning pretrained language models (PLMs) for downstream tasks is a large-scale optimization problem, in which the choice of the training algorithm critically determines how well the trained model can generalize to unseen test data, especially in the context of few-shot learning. To achieve good generalization performance and avoid overfitting, techniques such as data augmentation and pruning are often applied. However, adding these regularizations necessitates heavy tuning of the hyperparameters of optimization algorithms, such as the popular Adam optimizer. In this paper, we propose a two-stage fine-tuning method, PAC-tuning, to address this optimization challenge. First, based on PAC-Bayes training, PAC-tuning directly minimizes the PAC-Bayes generalization bound to learn proper parameter distribution. Second, PAC-tuning modifies the gradient by injecting noise with the variance learned in the first stage into the model parameters during training, resulting in a variant of perturbed gradient descent (PGD). In the past, the few-shot scenario posed difficulties for PAC-Bayes training because the PAC-Bayes bound, when applied to large models with limited training data, might not be stringent. Our experimental results across 5 GLUE benchmark tasks demonstrate that PAC-tuning successfully handles the challenges of fine-tuning tasks and outperforms strong baseline methods by a visible margin, further confirming the potential to apply PAC training for any other settings where the Adam optimizer is currently used for training. @@ -10520,7 +10520,7 @@ Select, Prompt, Filter: Distilling Large Language Models for Summarizing Conversations Minh-QuangPham - SathishIndurthi + SathishIndurthi ShamilChollampatt MarcoTurchi 12257-12265 @@ -10572,7 +10572,7 @@ Prompt as Triggers for Backdoor Attack: Examining the Vulnerability in Language Models ShuaiZhao JinmingWen - AnhLuu + AnhLuu JunboZhao JieFu 12303-12317 @@ -10619,7 +10619,7 @@ FajriKoto NurulAisyah HaonanLi - TimothyBaldwin + TimothyBaldwin 12359-12374 Although large language models (LLMs) are often pre-trained on large-scale multilingual texts, their reasoning abilities and real-world knowledge are mainly evaluated based on English datasets. Assessing LLM capabilities beyond English is increasingly vital but hindered due to the lack of suitable datasets. In this work, we introduce IndoMMLU, the first multi-task language understanding benchmark for Indonesian culture and languages, which consists of questions from primary school to university entrance exams in Indonesia. By employing professional teachers, we obtain 14,981 questions across 64 tasks and education levels, with 46% of the questions focusing on assessing proficiency in the Indonesian language and knowledge of nine local languages and cultures in Indonesia. Our empirical evaluations show that GPT-3.5 only manages to pass the Indonesian primary school level, with limited knowledge of local Indonesian languages and culture. Other smaller models such as BLOOMZ and Falcon perform at even lower levels. 2023.emnlp-main.760 @@ -10644,7 +10644,7 @@ Bridging Information-Theoretic and Geometric Compression in Language Models EmilyCheng CorentinKervadec - MarcoBaroni + MarcoBaroni 12397-12420 For a language model (LM) to faithfully model human language, it must compress vast, potentially infinite information into relatively few dimensions. We propose analyzing compression in (pre-trained) LMs from two points of view: geometric and information-theoretic. We demonstrate that the two views are highly correlated, such that the intrinsic geometric dimension of linguistic data predicts their coding length under the LM. We then show that, in turn, high compression of a linguistic dataset predicts rapid adaptation to that dataset, confirming that being able to compress linguistic information is an important part of successful LM performance. As a practical byproduct of our analysis, we evaluate a battery of intrinsic dimension estimators for the first time on linguistic data, showing that only some encapsulate the relationship between information-theoretic compression, geometric compression, and ease-of-adaptation. 2023.emnlp-main.762 @@ -10655,7 +10655,7 @@ Pre-training Language Models for Comparative Reasoning MengxiaYu - ZhihanZhang + ZhihanZhang WenhaoYu MengJiang 12421-12433 @@ -10674,7 +10674,7 @@ WeiZou ShiminTao HaoYang - JiajunChen + JiajunChen ShujianHuang 12434-12447 Machine translation (MT) quality estimation (QE) is a crucial task to estimate the quality of MT outputs when reference translations are unavailable. Many studies focus on generating pseudo data using large parallel corpus and achieve remarkable success in the supervised setting. However, pseudo data solutions are less satisfying in unsupervised scenarios because the pseudo labels are inaccurate or the pseudo translations differ from the real ones. To address these problems, we propose to generate pseudo data using the MT model with constrained beam search (CBSQE). CBSQE preserves the reference parts with high MT probabilities as correct translations, while the rest parts as the wrong ones for MT generation. Therefore, CBSQE can reduce the false negative labels caused by synonyms. Overall, beam search will prefer a more real hypothesis with a higher MT generation likelihood. Extensive experiments demonstrate that CBSQE outperforms strong baselines in both supervised and unsupervised settings. Analyses further show the superiority of CBSQE. The code is available at https://github.com/NJUNLP/njuqe. @@ -10688,7 +10688,7 @@ JohnMorris VolodymyrKuleshov VitalyShmatikov - AlexanderRush + AlexanderRush 12448-12460 How much private information do text embeddings reveal about the original text? We investigate the problem of embedding inversion, reconstructing the full text represented in dense text embeddings. We frame the problem as controlled generation: generating text that, when reembedded, is close to a fixed point in latent space. We find that although a naive model conditioned on the embedding performs poorly, a multi-step method that iteratively corrects and re-embeds text is able to recover 92% of 32-token text inputs exactly. We train our model to decode text embeddings from two state-of-the-art embedding models, and also show that our model can recover important personal information (full names) from a dataset of clinical notes. 2023.emnlp-main.765 @@ -10798,7 +10798,7 @@ RuochenZhang SamuelCahyawijaya Jan Christian BlaiseCruz - GentaWinata + GentaWinata Alham FikriAji 12567-12582 Multilingual Large Language Models (LLMs) have recently shown great capabilities in a wide range of tasks, exhibiting state-of-the-art performance through zero-shot or few-shot prompting methods. While there have been extensive studies on their abilities in monolingual tasks, the investigation of their potential in the context of code-switching (CSW), the practice of alternating languages within an utterance, remains relatively uncharted. In this paper, we provide a comprehensive empirical analysis of various multilingual LLMs, benchmarking their performance across four tasks: sentiment analysis, machine translation, summarization and word-level language identification. Our results indicate that despite multilingual LLMs exhibiting promising outcomes in certain tasks using zero or few-shot prompting, they still underperform in comparison to fine-tuned models of much smaller scales. We argue that current “multilingualism’ in LLMs does not inherently imply proficiency with code-switching texts, calling for future research to bridge this discrepancy. @@ -10822,8 +10822,8 @@ Identification of Multimodal Stance Towards Frames of Communication - MaxwellWeinzierl - SandaHarabagiu + MaxwellWeinzierl + SandaHarabagiu 12597-12609 Frames of communication are often evoked in multimedia documents. When an author decides to add an image to a text, one or both of the modalities may evoke a communication frame. Moreover, when evoking the frame, the author also conveys her/his stance towards the frame. Until now, determining if the author is in favor of, against or has no stance towards the frame was performed automatically only when processing texts. This is due to the absence of stance annotations on multimedia documents. In this paper we introduce MMVax-Stance, a dataset of 11,300 multimedia documents retrieved from social media, which have stance annotations towards 113 different frames of communication. This dataset allowed us to experiment with several models of multimedia stance detection, which revealed important interactions between texts and images in the inference of stance towards communication frames. When inferring the text/image relations, a set of 46,606 synthetic examples of multimodal documents with known stance was generated. This greatly impacted the quality of identifying multimedia stance, yielding an improvement of 20% in F1-score. 2023.emnlp-main.776 @@ -10952,7 +10952,7 @@ Rethinking Negative Pairs in Code Search HaochenLi XinZhou - AnhLuu + AnhLuu ChunyanMiao 12760-12774 Recently, contrastive learning has become a key component in fine-tuning code search models for software development efficiency and effectiveness. It pulls together positive code snippets while pushing negative samples away given search queries. Among contrastive learning, InfoNCE is the most widely used loss function due to its better performance. However, the following problems in negative samples of InfoNCE may deteriorate its representation learning: 1) The existence of false negative samples in large code corpora due to duplications. 2). The failure to explicitly differentiate between the potential relevance of negative samples. As an example, a bubble sorting algorithm example is less “negative” than a file saving function for the quick sorting algorithm query. In this paper, we tackle the above problems by proposing a simple yet effective Soft-InfoNCE loss that inserts weight terms into InfoNCE. In our proposed loss function, we apply three methods to estimate the weights of negative pairs and show that the vanilla InfoNCE loss is a special case of Soft-InfoNCE. Theoretically, we analyze the effects of Soft-InfoNCE on controlling the distribution of learnt code representations and on deducing a more precise mutual information estimation. We furthermore discuss the superiority of proposed loss functions with other design alternatives. Extensive experiments demonstrate the effectiveness of Soft-InfoNCE and weights estimation methods under state-of-the-art code search models on a large-scale public dataset consisting of six programming languages. @@ -11055,7 +11055,7 @@ Context Compression for Auto-regressive Transformers with Sentinel Tokens SiyuRen QiJia - KennyZhu + KennyZhu 12860-12867 The quadratic complexity of the attention module makes it gradually become the bulk of compute in Transformer-based LLMs during generation. Moreover, the excessive key-value cache that arises when dealing with long inputs also brings severe issues on memory footprint and inference latency. In this work, we propose a plug-and-play approach that is able to incrementally compress the intermediate activation of a specified span of tokens into compact ones, thereby reducing both memory and computational cost when processing subsequent context. Experiments on both in-domain language modeling and zero-shot open-ended document generation demonstrate the advantage of our approach over sparse attention baselines in terms of fluency, n-gram matching, and semantic similarity. At last, we comprehensively profile the benefit of context compression on improving the system throughout. Code is available at https://github.com/DRSY/KV_Compression. 2023.emnlp-main.794 @@ -11097,7 +11097,7 @@ TerryRuas MohamedAbdalla BelaGipp - SaifMohammad + SaifMohammad 12896-12913 Natural Language Processing (NLP) is poised to substantially influence the world. However, significant progress comes hand-in-hand with substantial risks. Addressing them requires broad engagement with various fields of study. Yet, little empirical work examines the state of such engagement (past or current). In this paper, we quantify the degree of influence between 23 fields of study and NLP (on each other). We analyzed ~77k NLP papers, ~3.1m citations from NLP papers to other papers, and ~1.8m citations from other papers to NLP papers. We show that, unlike most fields, the cross-field engagement of NLP, measured by our proposed Citation Field Diversity Index (CFDI), has declined from 0.58 in 1980 to 0.31 in 2022 (an all-time low). In addition, we find that NLP has grown more insular—citing increasingly more NLP papers and having fewer papers that act as bridges between fields. NLP citations are dominated by computer science; Less than 8% of NLP citations are to linguistics, and less than 3% are to math and psychology. These findings underscore NLP’s urgent need to reflect on its engagement with various fields. 2023.emnlp-main.797 @@ -11155,7 +11155,7 @@ <fixed-case>M</fixed-case>ail<fixed-case>E</fixed-case>x: Email Event and Argument Extraction SaurabhSrivastava - GauravSingh + GauravSingh ShouMatsumoto AliRaz PauloCosta @@ -11220,7 +11220,7 @@ JiangnanLi YiceZhang BinLiang - Kam-FaiWong + Kam-FaiWong RuifengXu 13043-13052 Recent efforts have endeavored to employ the sequence-to-sequence (Seq2Seq) model in Information Extraction (IE) due to its potential to tackle multiple IE tasks in a unified manner. Under this formalization, multiple structured objects are concatenated as the target sequence in a predefined order. However, structured objects, by their nature, constitute an unordered set. Consequently, this formalization introduces a potential order bias, which can impair model learning. Targeting this issue, this paper proposes a set learning approach that considers multiple permutations of structured objects to optimize set probability approximately. Notably, our approach does not require any modifications to model structures, making it easily integrated into existing generative IE frameworks. Experiments show that our method consistently improves existing frameworks on vast tasks and datasets. @@ -11324,7 +11324,7 @@ RuiHou NamanGoyal MarjanGhazvininejad - LukeZettlemoyer + LukeZettlemoyer MadianKhabsa 13142-13152 Large multilingual language models typically rely on a single vocabulary shared across 100+ languages. As these models have increased in parameter count and depth, vocabulary size has remained largely unchanged. This vocabulary bottleneck limits the representational capabilities of multilingual models like XLM-R. In this paper, we introduce a new approach for scaling to very large multilingual vocabularies by de-emphasizing token sharing between languages with little lexical overlap and assigning vocabulary capacity to achieve sufficient coverage for each individual language. Tokenizations using our vocabulary are typically more semantically meaningful and shorter compared to XLM-R. Leveraging this improved vocabulary, we train XLM-V, a multilingual language model with a one million token vocabulary. XLM-V outperforms XLM-R on every task we tested on ranging from natural language inference (XNLI), question answering (MLQA, XQuAD, TyDiQA), to named entity recognition (WikiAnn). XLM-V is particularly effective on low-resource language tasks and outperforms XLM-R by 11.2% and 5.8% absolute on MasakhaNER and Americas NLI, respectively. @@ -11378,7 +11378,7 @@ Semantic Space Grounded Weighted Decoding for Multi-Attribute Controllable Dialogue Generation ZhilingZhang MengyueWu - KennyZhu + KennyZhu 13230-13243 Controlling chatbot utterance generation with multiple attributes such as personalities, emotions and dialogue acts is a practically useful but under-studied problem. We propose a novel framework called DASC that possesses strong controllability with a weighted decoding paradigm, while improving generation quality with the grounding in an attribute semantics space. Generation with multiple attributes is then intuitively implemented with an interpolation of multiple attribute embeddings, which results in substantial reduction in the model sizes. Experiments show that DASC can achieve high control accuracy in generation task with the simultaneous control of 3 aspects while also producing interesting and reasonably sensible responses, even in an out-of-distribution robustness test. 2023.emnlp-main.817 @@ -11494,7 +11494,7 @@ <fixed-case>LLML</fixed-case>ingua: Compressing Prompts for Accelerated Inference of Large Language Models HuiqiangJiang QianhuiWu - Chin-YewLin + Chin-YewLin YuqingYang LiliQiu 13358-13376 @@ -11535,7 +11535,7 @@ Characterizing and Verifying Scientific Claims: Qualitative Causal Structure is All You Need JinxuanWu - WenhanChao + WenhanChao XianZhou ZhunchenLuo 13428-13439 @@ -11563,7 +11563,7 @@ XingdiYuan ZiangXiao Marc-AlexandreCôté - PeterJansen + PeterJansen 13455-13471 In this work we investigate the capacity of language models to generate explicit, interpretable, and interactive world models of scientific and common-sense reasoning tasks. We operationalize this as a task of generating text games, expressed as hundreds of lines of Python code. To facilitate this task, we introduce ByteSized32, a corpus of 32 reasoning-focused text games totalling 20k lines of Python code. We empirically demonstrate that GPT-4 can use these games as templates for single-shot in-context learning, successfully producing runnable games on unseen topics in 28% of cases. When allowed to self-reflect on program errors, game runnability substantially increases to 58%. While evaluating simulation fidelity is labor intensive, we introduce a suite of automated metrics to assess game fidelity, technical validity, adherence to task specifications, and winnability, showing a high-degree of agreement with expert human ratings. We pose this as a challenge task to spur further development at the juncture of world modeling and code generation. 2023.emnlp-main.830 @@ -11590,7 +11590,7 @@ <fixed-case>M</fixed-case>a<fixed-case>N</fixed-case>t<fixed-case>LE</fixed-case>: Model-agnostic Natural Language Explainer - RakeshMenon + RakeshMenon KeremZaman ShashankSrivastava 13493-13511 @@ -11770,7 +11770,7 @@ Reading Order Matters: Information Extraction from Visually-rich Documents by Token Path Prediction - ChongZhang + ChongZhang YaGuo YiTu HuanChen @@ -11800,12 +11800,12 @@ The Sentiment Problem: A Critical Survey towards Deconstructing Sentiment Analysis - PranavVenkit + PranavVenkit MukundSrinath SanjanaGautam SaranyaVenkatraman VipulGupta - RebeccaPassonneau + RebeccaPassonneau ShomirWilson 13743-13763 We conduct an inquiry into the sociotechnical aspects of sentiment analysis (SA) by critically examining 189 peer-reviewed papers on their applications, models, and datasets. Our investigation stems from the recognition that SA has become an integral component of diverse sociotechnical systems, exerting influence on both social and technical users. By delving into sociological and technological literature on sentiment, we unveil distinct conceptualizations of this term in domains such as finance, government, and medicine. Our study exposes a lack of explicit definitions and frameworks for characterizing sentiment, resulting in potential challenges and biases. To tackle this issue, we propose an ethics sheet encompassing critical inquiries to guide practitioners in ensuring equitable utilization of SA. Our findings underscore the significance of adopting an interdisciplinary approach to defining sentiment in SA and offer a pragmatic solution for its implementation. @@ -11899,7 +11899,7 @@ Finding Authentic Counterhate Arguments: A Case Study with Public Figures AbdullahAlbanyan - AhmedHassan + AhmedHassan EduardoBlanco 13862-13876 We explore authentic counterhate arguments for online hateful content toward individuals. Previous efforts are limited to counterhate to fight against hateful content toward groups. Thus, we present a corpus of 54,816 hateful tweet-paragraph pairs, where the paragraphs are candidate counterhate arguments. The counterhate arguments are retrieved from 2,500 online articles from multiple sources. We propose a methodology that assures the authenticity of the counter argument and its specificity to the individual of interest. We show that finding arguments in online articles is an efficient alternative to counterhate generation approaches that may hallucinate unsupported arguments. We also present linguistic insights on the language used in counterhate arguments. Experimental results show promising results. It is more challenging, however, to identify counterhate arguments for hateful content toward individuals not included in the training set. @@ -11967,7 +11967,7 @@ PengLi JunpengLiu MaosongSun - YangLiu + YangLiu 13938-13951 Although existing multilingual neural machine translation (MNMT) models have demonstrated remarkable performance to handle multiple translation directions in a single model and achieved zero-shot translation between language pairs unseen in training, they still suffer from relatively poor translation qualities for some language pairs. A practical scenario is that how to continually update MNMT models for both supervised and zero-shot translations when limited new data arrives. To this end, we propose a two-stage approach that encourages original models to acquire language-agnostic multilingual representations from new data, and preserves the model architecture without introducing parameters. Experimental results and further analysis demonstrate that our method can efficiently improve performance of existing MNMT models in translation directions where they are initially weak, and mitigates the degeneration in the original well-performing translation directions, offering flexibility in the real-world scenario. 2023.emnlp-main.860 @@ -11994,15 +11994,15 @@ IdrisAbdulmumin Abinew AliAyele NedjmaOusidhoum - David IfeoluwaAdelani + David IfeoluwaAdelani Seid MuhieYimam Ibrahim Sa'idAhmad MeriemBeloucif - Saif M.Mohammad + Saif M.Mohammad SebastianRuder OumaimaHourrane PavelBrazdil - AlipioJorge + AlipioJorge Felermino Dário Mário AntónioAli DavisDavid SalomeyOsei @@ -12159,7 +12159,7 @@ JianLiu WeichangLiu YufengChen - JinanXu + JinanXu ZheZhao 14112-14123 Real-world named entity recognition (NER) datasets are notorious for their noisy nature, attributed to annotation errors, inconsistencies, and subjective interpretations. Such noises present a substantial challenge for traditional supervised learning methods. In this paper, we present a new and unified approach to tackle annotation noises for NER. Our method considers NER as a constituency tree parsing problem, utilizing a tree-structured Conditional Random Fields (CRFs) with uncertainty evaluation for integration. Through extensive experiments conducted on four real-world datasets, we demonstrate the effectiveness of our model in addressing both partial and incorrect annotation errors. Remarkably, our model exhibits superb performance even in extreme scenarios with 90% annotation noise. @@ -12173,7 +12173,7 @@ AndreaPiergentili BeatriceSavoldi DennisFucci - MatteoNegri + MatteoNegri LuisaBentivogli 14124-14140 Gender inequality is embedded in our communication practices and perpetuated in translation technologies. This becomes particularly apparent when translating into grammatical gender languages, where machine translation (MT) often defaults to masculine and stereotypical representations by making undue binary gender assumptions. Our work addresses the rising demand for inclusive language by focusing head-on on gender-neutral translation from English to Italian. We start from the essentials: proposing a dedicated benchmark and exploring automated evaluation methods. First, we introduce GeNTE, a natural, bilingual test set for gender-neutral translation, whose creation was informed by a survey on the perception and use of neutral language. Based on GeNTE, we then overview existing reference-based evaluation approaches, highlight their limits, and propose a reference-free method more suitable to assess gender-neutral translation. @@ -12208,7 +12208,7 @@ PengfeiLiu FahimFaisal AlissaOstapenko - GentaWinata + GentaWinata Alham FikriAji SamuelCahyawijaya YuliaTsvetkov @@ -12247,7 +12247,7 @@ OyvindTafjord AshishSabharwal KyleRichardson - HinrichSchuetze + HinrichSchuetze PeterClark 14190-14201 While large language models (LLMs) are proficient at question-answering (QA), it is not always clear how (or even if) an answer follows from their latent “beliefs”. This lack of interpretability is a growing impediment to widespread use of LLMs. To address this, our goals are to make model beliefs and their inferential relationships explicit, and to resolve inconsistencies that may exist, so that answers are supported by interpretable chains of reasoning drawn from a consistent network of beliefs. Our approach, which we call REFLEX, is to add a **rational, self-reflecting layer** on top of the LLM. First, given a question, we construct a **belief graph** using a backward-chaining process to materialize relevant model beliefs (including beliefs about answer candidates) and their inferential relationships. Second, we identify and minimize contradictions in that graph using a formal constraint reasoner. We find that REFLEX significantly improves consistency (by 8%-11% absolute) without harming overall answer accuracy, resulting in answers supported by faithful chains of reasoning drawn from a more consistent belief system. This suggests a new style of system architecture in which an LLM extended with a rational layer can provide an interpretable window into system beliefs, add a systematic reasoning capability, and repair latent inconsistencies present in the LLM. @@ -12378,8 +12378,8 @@ MarioGiulianelli JorisBaan WilkerAziz - RaquelFernández - BarbaraPlank + RaquelFernández + BarbaraPlank 14349-14371 In Natural Language Generation (NLG) tasks, for any input, multiple communicative goals are plausible, and any goal can be put into words, or produced, in multiple ways. We characterise the extent to which human production varies lexically, syntactically, and semantically across four NLG tasks, connecting human production variability to aleatoric or data uncertainty. We then inspect the space of output strings shaped by a generation system’s predicted probability distribution and decoding algorithm to probe its uncertainty. For each test input, we measure the generator’s calibration to human production variability. Following this instance-level approach, we analyse NLG models and decoding strategies, demonstrating that probing a generator with multiple samples and, when possible, multiple references, provides the level of detail necessary to gain understanding of a model’s representation of uncertainty. 2023.emnlp-main.887 @@ -12606,7 +12606,7 @@ RuofeiLai XinyuZhang ZhaoCao - XuanjingHuang + XuanjingHuang ZhongyuWei 14606-14620 The knowledge graph is a structure to store and represent knowledge, and recent studies have discussed its capability to assist language models for various applications. Some variations of knowledge graphs aim to record arguments and their relations for computational argumentation tasks. However, many must simplify semantic types to fit specific schemas, thus losing flexibility and expression ability. In this paper, we propose the **Hi**erarchical **Ar**gumentation **G**raph (Hi-ArG), a new structure to organize arguments. We also introduce two approaches to exploit Hi-ArG, including a text-graph multi-modal model GreaseArG and a new pre-training framework augmented with graph information. Experiments on two argumentation tasks have shown that after further pre-training and fine-tuning, GreaseArG supersedes same-scale language models on these tasks, while incorporating graph information during further pre-training can also improve the performance of vanilla language models. Code for this paper is available at <https://github.com/ljcleo/Hi-ArG>. @@ -12631,7 +12631,7 @@ <fixed-case>GNAT</fixed-case>: A General Narrative Alignment Tool TanzirPial - StevenSkiena + StevenSkiena 14636-14652 Algorithmic sequence alignment identifies similar segments shared between pairs of documents, and is fundamental to many NLP tasks. But it is difficult to recognize similarities between distant versions of narratives such as translations and retellings, particularly for summaries and abridgements which are much shorter than the original novels. We develop a general approach to narrative alignment coupling the Smith-Waterman algorithm from bioinformatics with modern text similarity metrics. We show that the background of alignment scores fits a Gumbel distribution, enabling us to define rigorous p-values on the significance of any alignment. We apply and evaluate our general narrative alignment tool (GNAT) on four distinct problem domains differing greatly in both the relative and absolute length of documents, namely summary-to-book alignment, translated book alignment, short story alignment, and plagiarism detection—demonstrating the power and performance of our methods. 2023.emnlp-main.904 @@ -12655,9 +12655,9 @@ <fixed-case>U</fixed-case>ni<fixed-case>C</fixed-case>hart: A Universal Vision-language Pretrained Model for Chart Comprehension and Reasoning AhmedMasry ParsaKavehzadeh - Xuan LongDo + Xuan LongDo EnamulHoque - ShafiqJoty + ShafiqJoty 14662-14684 Charts are widely used for data analysis, providing visual representations and insights into complex data. To facilitate chart-based data analysis using natural language, several downstream tasks have been introduced recently such as chart question answering and chart summarization. However, existing methods for these tasks often rely on pretraining on language or vision-language tasks, neglecting the explicit modeling of chart structures (e.g., how chart elements are related to each other). To address this, we first build a large corpus of charts covering diverse topics and visual styles. We then present UniChart, a pretrained model for chart comprehension and reasoning. UniChart encodes the relevant text, data, and visual elements of charts and then uses a chart-grounded text decoder for text generation. We propose several chart-specific pretraining tasks that include: (i) low-level tasks to extract the visual elements (e.g., bars, lines) and data from charts, and (ii) high-level tasks to acquire chart understanding and reasoning skills. Our experiments demonstrate that pretraining UniChart on a large corpus with chart-specific objectives, followed by fine-tuning, yields state-of-the-art performance on four downstream tasks. Moreover, our model exhibits superior generalizability to unseen chart corpus, surpassing previous approaches that lack chart-specific objectives and utilize limited chart resources. 2023.emnlp-main.906 @@ -12892,7 +12892,7 @@ LemaoLiu GuopingHuang ShumingShi - JiajunChen + JiajunChen ShujianHuang 14903-14917 We present IMTLab, an open-source end-to-end interactive machine translation (IMT) system platform that enables researchers to quickly build IMT systems with state-of-the-art models, perform an end-to-end evaluation, and diagnose the weakness of systems. IMTLab treats the whole interactive translation process as a task-oriented dialogue with a human-in-the-loop setting, in which human interventions can be explicitly incorporated to produce high-quality, error-free translations. To this end, a general communication interface is designed to support the flexible IMT architectures and user policies. Based on the proposed design, we construct a simulated and real interactive environment to achieve end-to-end evaluation and leverage the framework to systematically evaluate previous IMT systems. Our simulated and manual experiments show that the prefix-constrained decoding approach still gains the lowest editing cost in the end-to-end evaluation, while BiTIIMT achieves comparable editing cost with a better interactive experience. @@ -12988,7 +12988,7 @@ YixiaoSong AndrewDrozdov AparnaGarimella - VarunManjunatha + VarunManjunatha MohitIyyer 15023-15037 In this paper, we study the generation quality of interpolation-based retrieval-augmented language models (LMs). These methods, best exemplified by the kNN-LM, interpolate the LM’s predicted distribution of the next word with a distribution formed from the most relevant retrievals for a given prefix. While the kNN-LM and related methods yield impressive decreases in perplexity, we discover that they do not exhibit corresponding improvements in open-ended generation quality, as measured by both automatic evaluation metrics (e.g., MAUVE) and human evaluations. Digging deeper, we find that interpolating with a retrieval distribution actually increases perplexity compared to a baseline LM for the majority of tokens in the WikiText-103 test set, even though the overall perplexity is lower due to a smaller number of tokens for which perplexity dramatically decreases after interpolation. However, when decoding a long sequence at inference time, significant improvements on this smaller subset of tokens are washed out by slightly worse predictions on most tokens. Furthermore, we discover that the entropy of the retrieval distribution increases faster than that of the base LM as the generated sequence becomes longer, which indicates that retrieval is less reliable when using model-generated text as queries (i.e., is subject to exposure bias). We hope that our analysis spurs future work on improved decoding algorithms and interpolation strategies for retrieval-augmented language models. @@ -13095,7 +13095,7 @@ ChengChang QipengGuo JunqiDai - XuanjingHuang + XuanjingHuang XipengQiu 15135-15153 Large Language Models (LLMs) have recently made significant strides in complex reasoning tasks through the Chain-of-Thought technique. Despite this progress, their reasoning is often constrained by their intrinsic understanding, lacking external insights. To address this, we propose Exchange-of-Thought (EoT), a novel framework that enables cross-model communication during problem-solving. Drawing inspiration from network topology, EoT integrates four unique communication paradigms: Memory, Report, Relay, and Debate. This paper delves into the communication dynamics and volume associated with each paradigm. To counterbalance the risks of incorrect reasoning chains, we implement a robust confidence evaluation mechanism within these communications. Our experiments across diverse complex reasoning tasks demonstrate that EoT significantly surpasses established baselines, underscoring the value of external insights in enhancing LLM performance. Furthermore, we show that EoT achieves these superior results in a cost-effective manner, marking a promising advancement for efficient and collaborative AI problem-solving. @@ -13165,7 +13165,7 @@ YiFung TuhinChakrabarty HaoGuo - OwenRambow + OwenRambow SmarandaMuresan HengJi 15217-15230 @@ -13207,7 +13207,7 @@ SahandSabour YilinJia MinlieHuang - RadaMihalcea + RadaMihalcea 15264-15281 We propose task-adaptive tokenization as a way to adapt the generation pipeline to the specifics of a downstream task and enhance long-form generation in mental health. Inspired by insights from cognitive science, our task-adaptive tokenizer samples variable segmentations from multiple outcomes, with sampling probabilities optimized based on task-specific data. We introduce a strategy for building a specialized vocabulary and introduce a vocabulary merging protocol that allows for the integration of task-specific tokens into the pre-trained model’s tokenization step. Through extensive experiments on psychological question-answering tasks in both Chinese and English, we find that our task-adaptive tokenization approach brings a significant improvement in generation performance while using up to 60% fewer tokens. Preliminary experiments point to promising results when using our tokenization approach with very large language models. 2023.emnlp-main.944 @@ -13274,7 +13274,7 @@ Dong-HoLee JayPujara MohitSewak - RyenWhite + RyenWhite SujayJauhar 15349-15360 Although large language models (LLMs) have advanced the state-of-the-art in NLP significantly, deploying them for downstream applications is still challenging due to cost, responsiveness, control, or concerns around privacy and security. As such, trainable models are still the preferred option in some cases. However, these models still require human-labeled data for optimal performance, which is expensive and time-consuming to obtain. In order to address this issue, several techniques to reduce human effort involve labeling or generating data using LLMs. Although these methods are effective for certain applications, in practice they encounter difficulties in real-world scenarios. Labeling data requires careful data selection, while generating data necessitates task-specific prompt engineering. In this paper, we propose a unified data creation pipeline that requires only a single formatting example, and which is applicable to a broad range of tasks, including traditionally problematic ones with semantically devoid label spaces. In our experiments we demonstrate that instruction-following LLMs are highly cost-effective data creators, and that models trained with these data exhibit performance better than those trained with human-labeled data (by up to 17.5%) on out-of-distribution evaluation, while maintaining comparable performance on in-distribution tasks. These results have important implications for the robustness of NLP systems deployed in the real-world. @@ -13288,7 +13288,7 @@ YuliangYan LongtaoHuang XiaoqingZheng - XuanjingHuang + XuanjingHuang 15361-15371 Large Language Models (LLMs) have made remarkable advancements in the field of natural language generation. However, the propensity of LLMs to generate inaccurate or non-factual content, termed “hallucinations”, remains a significant challenge. Current hallucination detection methods often necessitate the retrieval of great numbers of relevant evidence, thereby increasing response times. We introduce a unique framework that leverages statistical decision theory and Bayesian sequential analysis to optimize the trade-off between costs and benefits during the hallucination detection process. This approach does not require a predetermined number of observations. Instead, the analysis proceeds in a sequential manner, enabling an expeditious decision towards “belief” or “disbelief” through a stop-or-continue strategy. Extensive experiments reveal that this novel framework surpasses existing methods in both efficiency and precision of hallucination detection. Furthermore, it requires fewer retrieval steps on average, thus decreasing response times. 2023.emnlp-main.949 @@ -13313,7 +13313,7 @@ Open Information Extraction via Chunks KuicaiDong AixinSun - Jung-jaeKim + Jung-jaeKim XiaoliLi 15390-15404 Open Information Extraction (OIE) aims to extract relational tuples from open-domain sentences. Existing OIE systems split a sentence into tokens and recognize token spans as tuple relations and arguments. We instead propose Sentence as Chunk sequence (SaC) and recognize chunk spans as tuple relations and arguments. We argue that SaC has better properties for OIE than sentence as token sequence, and evaluate four choices of chunks (i.e., CoNLL chunks, OIA simple phrases, noun phrases, and spans from SpanOIE). Also, we propose a simple end-to-end BERT-based model, Chunk-OIE, for sentence chunking and tuple extraction on top of SaC. Chunk-OIE achieves state-of-the-art results on multiple OIE datasets, showing that SaC benefits the OIE task. @@ -13356,7 +13356,7 @@ <fixed-case>C</fixed-case>oref<fixed-case>P</fixed-case>rompt: Prompt-based Event Coreference Resolution by Measuring Event Type and Argument Compatibilities ShengXu PeifengLi - QiaomingZhu + QiaomingZhu 15440-15452 Event coreference resolution (ECR) aims to group event mentions referring to the same real-world event into clusters. Most previous studies adopt the “encoding first, then scoring” framework, making the coreference judgment rely on event encoding. Furthermore, current methods struggle to leverage human-summarized ECR rules, e.g., coreferential events should have the same event type, to guide the model. To address these two issues, we propose a prompt-based approach, CorefPrompt, to transform ECR into a cloze-style MLM (masked language model) task. This allows for simultaneous event modeling and coreference discrimination within a single template, with a fully shared context. In addition, we introduce two auxiliary prompt tasks, event-type compatibility and argument compatibility, to explicitly demonstrate the reasoning process of ECR, which helps the model make final predictions. Experimental results show that our method CorefPrompt performs well in a state-of-the-art (SOTA) benchmark. 2023.emnlp-main.954 @@ -13466,7 +13466,7 @@ ShahreenAunti CharutaPethe AllenKim - StevenSkiena + StevenSkiena 15560-15579 Novels are often adapted into feature films, but the differences between the two media usually require dropping sections of the source text from the movie script. Here we study this screen adaptation process by constructing narrative alignments using the Smith-Waterman local alignment algorithm coupled with SBERT embedding distance to quantify text similarity between scenes and book units. We use these alignments to perform an automated analysis of 40 adaptations, revealing insights into the screenwriting process concerning (i) faithfulness of adaptation, (ii) importance of dialog, (iii) preservation of narrative order, and (iv) gender representation issues reflective of the Bechdel test. 2023.emnlp-main.962 @@ -13479,7 +13479,7 @@ JasonWei NajoungKim YiTay - QuocLe + QuocLe 15580-15591 Scaling up language models has been empirically shown to improve performance on a wide range of downstream tasks. However, if we were to observe worse performance as a function of scale (inverse scaling) on certain tasks, this would indicate that scaling can also encourage behaviors that are misaligned with human preferences. The Inverse Scaling Prize (McKenzie et al. 2023) identified eleven such inverse scaling tasks, evaluated on models of up to 280B parameters and up to 500 zettaFLOPs of training compute. This paper takes a closer look at these inverse scaling tasks. In this paper, we evaluate models of up to 540B parameters, trained on five times more compute than those evaluated in the Inverse Scaling Prize. With this increased range of model sizes and compute, only four out of the eleven tasks remain inverse scaling. Six tasks exhibit U-shaped scaling, where performance decreases up to a certain size, and then increases again up to the largest model evaluated (the one remaining task displays positive scaling). In addition, 1-shot examples and chain-of-thought can help mitigate undesirable scaling patterns even further. U-shaped scaling suggests that the inverse scaling trend observed in McKenzie et al. (2023) may not continue to hold for larger models, which we attribute to the presence of distractor tasks that only sufficiently large models can avoid. 2023.emnlp-main.963 @@ -13523,7 +13523,7 @@ YixinCao KenjiKawaguchi XiangWang - Tat-SengChua + Tat-SengChua 15623-15638 Language Models (LMs) have demonstrated impressive molecule understanding ability on various 1D text-related tasks. However, they inherently lack 2D graph perception — a critical ability of human professionals in comprehending molecules’ topological structures. To bridge this gap, we propose MolCA: Molecular Graph-Language Modeling with Cross-Modal Projector and Uni-Modal Adapter. MolCA enables an LM (i.e., Galactica) to understand both text- and graph-based molecular contents via the cross-modal projector. Specifically, the cross-modal projector is implemented as a Q-Former to connect a graph encoder’s representation space and an LM’s text space. Further, MolCA employs a uni-modal adapter (i.e., LoRA) for the LM’s efficient adaptation to downstream tasks. Unlike previous studies that couple an LM with a graph encoder via cross-modal contrastive learning, MolCA retains the LM’s ability of open-ended text generation and augments it with 2D graph information. To showcase its effectiveness, we extensively benchmark MolCA on tasks of molecule captioning, IUPAC name prediction, and molecule-text retrieval, on which MolCA significantly outperforms the baselines. 2023.emnlp-main.966 @@ -13539,7 +13539,7 @@ RanJing BinLiang MinYang - Kam-FaiWong + Kam-FaiWong RuifengXu 15639-15650 Unintended dataset biases typically exist in existing Emotion Recognition in Conversations (ERC) datasets, including label bias, where models favor the majority class due to imbalanced training data, as well as the speaker and neutral word bias, where models make unfair predictions because of excessive correlations between specific neutral words or speakers and classes. However, previous studies in ERC generally focus on capturing context-sensitive and speaker-sensitive dependencies, ignoring the unintended dataset biases of data, which hampers the generalization and fairness in ERC. To address this issue, we propose a Training-Free Debiasing framework (TFD) that operates during prediction without additional training. To ensure compatibility with various ERC models, it does not balance data or modify the model structure. Instead, TFD extracts biases from the model by generating counterfactual utterances and contexts and mitigates them using simple yet empirically robust element-wise subtraction operations. Extensive experiments on three public datasets demonstrate that TFD effectively improves generalization ability and fairness across different ERC models. @@ -13594,7 +13594,7 @@ <fixed-case>MQ</fixed-case>u<fixed-case>AKE</fixed-case>: Assessing Knowledge Editing in Language Models via Multi-Hop Questions ZexuanZhong ZhengxuanWu - ChristopherManning + ChristopherManning ChristopherPotts DanqiChen 15686-15702 @@ -13677,7 +13677,7 @@ Reinforcement Replaces Supervision: Query focused Summarization using Deep Reinforcement Learning SwaroopNath - PushpakBhattacharyya + PushpakBhattacharyya HarshadKhadilkar 15770-15789 Query-focused Summarization (QfS) deals with systems that generate summaries from document(s) based on a query. Motivated by the insight that Reinforcement Learning (RL) provides a generalization to Supervised Learning (SL) for Natural Language Generation, and thereby performs better (empirically) than SL, we use an RL-based approach for this task of QfS. Additionally, we also resolve the conflict of employing RL in Transformers with Teacher Forcing. We develop multiple Policy Gradient networks, trained on various reward signals: ROUGE, BLEU, and Semantic Similarity, which lead to a \mathit{10}-point improvement over the State-of-the-Art approach on the ROUGE-L metric for a benchmark dataset (ELI5). We also show performance of our approach in zero-shot setting for another benchmark dataset (DebatePedia) – our approach leads to results comparable to baselines, which were specifically trained on DebatePedia. To aid the RL training, we propose a better semantic similarity reward, enabled by a novel Passage Embedding scheme developed using Cluster Hypothesis. Lastly, we contribute a gold-standard test dataset to further research in QfS and Long-form Question Answering (LfQA). @@ -13720,7 +13720,7 @@ An Attribution Method for <fixed-case>S</fixed-case>iamese Encoders LucasMoeller DmitryNikolaev - SebastianPadó + SebastianPadó 15818-15827 Despite the success of Siamese encoder models such as sentence transformers (ST), little is known about the aspects of inputs they pay attention to. A barrier is that their predictions cannot be attributed to individual features, as they compare two inputs rather than processing a single one. This paper derives a local attribution method for Siamese encoders by generalizing the principle of integrated gradients to models with multiple inputs. The output takes the form of feature-pair attributions and in case of STs it can be reduced to a token–token matrix. Our method involves the introduction of integrated Jacobians and inherits the advantageous formal properties of integrated gradients: it accounts for the model’s full computation graph and is guaranteed to converge to the actual prediction. A pilot study shows that in case of STs few token pairs can dominate predictions and that STs preferentially focus on nouns and verbs. For accurate predictions, however, they need to attend to the majority of tokens and parts of speech. 2023.emnlp-main.980 @@ -13744,7 +13744,7 @@ Graph vs. Sequence: An Empirical Study on Knowledge Forms for Knowledge-Grounded Dialogue YizheYang - HeyanHuang + HeyanHuang YuhangLiu YangGao 15846-15858 @@ -13781,7 +13781,7 @@ <fixed-case>NL</fixed-case>2<fixed-case>TL</fixed-case>: Transforming Natural Languages to Temporal Logics using Large Language Models YongchaoChen RujulGandhi - YangZhang + YangZhang ChuchuFan 15880-15903 Temporal Logic (TL) can be used to rigorously specify complex high-level specification for systems in many engineering applications. The translation between natural language (NL) and TL has been under-explored due to the lack of dataset and generalizable model across different application domains. In this paper, we propose an accurate and generalizable transformation framework of English instructions from NL to TL, exploring the use of Large Language Models (LLMs) at multiple stages. Our contributions are twofold. First, we develop a framework to create a dataset of NL-TL pairs combining LLMs and human annotation. We publish a dataset with 23K NL-TL pairs. Then, we finetune T5 models on the lifted versions (i.e., the specific Atomic Propositions (AP) are hidden) of the NL and TL. The enhanced generalizability originates from two aspects: 1) Usage of lifted NL-TL characterizes common logical structures, without constraints of specific domains. 2) Application of LLMs in dataset creation largely enhances corpus richness. We test the generalization of trained models on five varied domains. To achieve full NL-TL transformation, we either combine the lifted model with AP recognition task or do the further finetuning on each specific domain. During the further finetuning, our model achieves higher accuracy (> 95%) using only <10% training data, compared with the baseline sequence to sequence (Seq2Seq) model. @@ -13819,7 +13819,7 @@ DipteshKanojia AnupamaRay ApoorvaNunna - PushpakBhattacharyya + PushpakBhattacharyya 15933-15948 Sarcasm is a complex linguistic construct with incongruity at its very core. Detecting sarcasm depends on the actual content spoken and tonality, facial expressions, the context of an utterance, and personal traits like language proficiency and cognitive capabilities. In this paper, we propose the utilization of synthetic gaze data to improve the task performance for multimodal sarcasm detection in a conversational setting. We enrich an existing multimodal conversational dataset, i.e., MUStARD++ with gaze features. With the help of human participants, we collect gaze features for 20% of data instances, and we investigate various methods for gaze feature prediction for the rest of the dataset. We perform extrinsic and intrinsic evaluations to assess the quality of the predicted gaze features. We observe a performance gain of up to 6.6% points by adding a new modality, i.e., collected gaze features. When both collected and predicted data are used, we observe a performance gain of 2.3% points on the complete dataset. Interestingly, with only predicted gaze features, too, we observe a gain in performance (1.9% points). We retain and use the feature prediction model, which maximally correlates with collected gaze features. Our model trained on combining collected and synthetic gaze data achieves SoTA performance on the MUStARD++ dataset. To the best of our knowledge, ours is the first predict-and-use model for sarcasm detection. We publicly release the code, gaze data, and our best models for further research. 2023.emnlp-main.988 @@ -13906,7 +13906,7 @@ Detecting Spoilers in Movie Reviews with External Movie Knowledge and User Networks - HengWang + HengWang WenqianZhang YuyangBai ZhaoxuanTan @@ -13925,7 +13925,7 @@ DongyuanLi YusongWang KotaroFunakoshi - ManabuOkumura + ManabuOkumura 16051-16069 Multimodal emotion recognition aims to recognize emotions for each utterance from multiple modalities, which has received increasing attention for its application in human-machine interaction. Current graph-based methods fail to simultaneously depict global contextual features and local diverse uni-modal features in a dialogue. Furthermore, with the number of graph layers increasing, they easily fall into over-smoothing. In this paper, we propose a method for joint modality fusion and graph contrastive learning for multimodal emotion recognition (Joyful), where multimodality fusion, contrastive learning, and emotion recognition are jointly optimized. Specifically, we first design a new multimodal fusion mechanism that can provide deep interaction and fusion between the global contextual and uni-modal specific features. Then, we introduce a graph contrastive learning framework with inter- and intra-view contrastive losses to learn more distinguishable representations for samples with different sentiments. Extensive experiments on three benchmark datasets indicate that Joyful achieved state-of-the-art (SOTA) performance compared with all baselines. Code is released on Github (https://anonymous.4open.science/r/MERC-7F88). 2023.emnlp-main.996 @@ -13948,7 +13948,7 @@ Assessing the influence of attractor-verb distance on grammatical agreement in humans and language models ChristosZacharopoulos - ThéoDesbordes + ThéoDesbordes MathiasSablé-Meyer 16081-16090 Subject-verb agreement in the presence of an attractor noun located between the main noun and the verb elicits complex behavior: judgments of grammaticality are modulated by the grammatical features of the attractor. For example, in the sentence \textit{``The girl near the boys likes climbing''}, the attractor (\textit{boys}) disagrees in grammatical number with the verb (\textit{likes}), creating a locally implausible transition probability. Here, we parametrically modulate the distance between the attractor and the verb while keeping the length of the sentence equal. We evaluate the performance of both humans and two artificial neural network models: both make more mistakes when the attractor is closer to the verb, but neural networks get close to the chance level while humans are mostly able to overcome the attractor interference. Additionally, we report a linear effect of attractor distance on reaction times. We hypothesize that a possible reason for the proximity effect is the calculation of transition probabilities between adjacent words. Nevertheless, classical models of attraction such as the cue-based model might suffice to explain this phenomenon, thus paving the way for new research. Data and analyses available at https://osf.io/d4g6k @@ -13988,8 +13988,8 @@ Hop, Union, Generate: Explainable Multi-hop Reasoning without Rationale Supervision WentingZhao JustinChiu - ClaireCardie - AlexanderRush + ClaireCardie + AlexanderRush 16119-16130 Explainable multi-hop question answering (QA) not only predicts answers but also identifies rationales, i. e. subsets of input sentences used to derive the answers. Existing methods rely on supervision for both answers and rationales. This problem has been extensively studied under the supervised setting, where both answer and rationale annotations are given. Because rationale annotations are expensive to collect and not always available, recent efforts have been devoted to developing methods that do not rely on supervision for rationales. However, such methods have limited capacities in modeling interactions between sentences, let alone reasoning across multiple documents. This work proposes a principled, probabilistic approach for training explainable multi-hop QA systems without rationale supervision. Our approach performs multi-hop reasoning by explicitly modeling rationales as sets, enabling the model to capture interactions between documents and sentences within a document. Experimental results show that our approach is more accurate at selecting rationales than the previous methods, while maintaining similar accuracy in predicting answers. 2023.emnlp-main.1001 @@ -14000,7 +14000,7 @@ To Split or Not to Split: Composing Compounds in Contextual Vector Spaces ChrisJenkins FilipMiletic - SabineSchulte im Walde + SabineSchulte im Walde 16131-16136 We investigate the effect of sub-word tokenization on representations of German noun compounds: single orthographic words which are composed of two or more constituents but often tokenized into units that are not morphologically motivated or meaningful. Using variants of BERT models and tokenization strategies on domain-specific restricted diachronic data, we introduce a suite of evaluations relying on the masked language modelling task and compositionality prediction. We obtain the most consistent improvements by pre-splitting compounds into constituents. 2023.emnlp-main.1002 @@ -14125,7 +14125,7 @@ Learning From Free-Text Human Feedback – Collect New Datasets Or Extend Existing Ones? DominicPetrak - NafiseMoosavi + NafiseMoosavi YeTian NikolaiRozanov IrynaGurevych @@ -14178,7 +14178,7 @@ A Benchmark for Reasoning with Spatial Prepositions IuliaComsa - SriniNarayanan + SriniNarayanan 16328-16335 Spatial reasoning is a fundamental building block of human cognition, used in representing, grounding, and reasoning about physical and abstract concepts. We propose a novel benchmark focused on assessing inferential properties of statements with spatial prepositions. The benchmark includes original datasets in English and Romanian and aims to probe the limits of reasoning about spatial relations in large language models. We use prompt engineering to study the performance of two families of large language models, PaLM and GPT-3, on our benchmark. Our results show considerable variability in the performance of smaller and larger models, as well as across prompts and languages. However, none of the models reaches human performance. 2023.emnlp-main.1015 @@ -14189,7 +14189,7 @@ <fixed-case>TIMELINE</fixed-case>: Exhaustive Annotation of Temporal Relations Supporting the Automatic Ordering of Events in News Articles SarahAlsayyahi - RizaBatista-Navarro + RizaBatista-Navarro 16336-16348 Temporal relation extraction models have thus far been hindered by a number of issues in existing temporal relation-annotated news datasets, including: (1) low inter-annotator agreement due to the lack of specificity of their annotation guidelines in terms of what counts as a temporal relation; (2) the exclusion of long-distance relations within a given document (those spanning across different paragraphs); and (3) the exclusion of events that are not centred on verbs. This paper aims to alleviate these issues by presenting a new annotation scheme that clearly defines the criteria based on which temporal relations should be annotated. Additionally, the scheme includes events even if they are not expressed as verbs (e.g., nominalised events). Furthermore, we propose a method for annotating all temporal relations—including long-distance ones—which automates the process, hence reducing time and manual effort on the part of annotators. The result is a new dataset, the TIMELINE corpus, in which improved inter-annotator agreement was obtained, in comparison with previously reported temporal relation datasets. We report the results of training and evaluating two baseline temporal relation extraction models on the new corpus, and compare them with results obtained on the widely used MATRES corpus. 2023.emnlp-main.1016 @@ -14213,13 +14213,13 @@ Towards Interpretable and Efficient Automatic Reference-Based Summarization Evaluation YixinLiu - AlexanderFabbri + AlexanderFabbri YilunZhao PengfeiLiu - ShafiqJoty + ShafiqJoty Chien-ShengWu CaimingXiong - DragomirRadev + DragomirRadev 16360-16368 Interpretability and efficiency are two important considerations for the adoption of neural automatic metrics. In this work, we develop strong-performing automatic metrics for reference-based summarization evaluation, based on a two-stage evaluation pipeline that first extracts basic information units from one text sequence and then checks the extracted units in another sequence. The metrics we developed include two-stage metrics that can provide high interpretability at both the fine-grained unit level and summary level, and one-stage metrics that achieve a balance between efficiency and interpretability. We make the developed tools publicly available at https://github.com/Yale-LILY/AutoACU. 2023.emnlp-main.1018 @@ -14278,7 +14278,7 @@ Can language models learn analogical reasoning? Investigating training objectives and comparisons to human performance MollyPetersen - Lonnekevan der Plas + Lonnekevan der Plas 16414-16425 While analogies are a common way to evaluate word embeddings in NLP, it is also of interest to investigate whether or not analogical reasoning is a task in itself that can be learned. In this paper, we test several ways to learn basic analogical reasoning, specifically focusing on analogies that are more typical of what is used to evaluate analogical reasoning in humans than those in commonly used NLP benchmarks. Our experiments find that models are able to learn analogical reasoning, even with a small amount of data. We additionally compare our models to a dataset with a human baseline, and find that after training models approach human performance. 2023.emnlp-main.1022 @@ -14328,7 +14328,7 @@ ShaonanWang YunhaoZhang JiajunZhang - ChengqingZong + ChengqingZong 16460-16476 Transformer-based models, even though achieving super-human performance on several downstream tasks, are often regarded as a black box and used as a whole. It is still unclear what mechanisms they have learned, especially their core module: multi-head attention. Inspired by functional specialization in the human brain, which helps to efficiently handle multiple tasks, this work attempts to figure out whether the multi-head attention module will evolve similar function separation under multi-tasking training. If it is, can this mechanism further improve the model performance? To investigate these questions, we introduce an interpreting method to quantify the degree of functional specialization in multi-head attention. We further propose a simple multi-task training method to increase functional specialization and mitigate negative information transfer in multi-task learning. Experimental results on seven pre-trained transformer models have demonstrated that multi-head attention does evolve functional specialization phenomenon after multi-task training which is affected by the similarity of tasks. Moreover, the multi-task training strategy based on functional specialization boosts performance in both multi-task learning and transfer learning without adding any parameters. 2023.emnlp-main.1026 @@ -14465,7 +14465,7 @@ RaghavJain PrinceJha SriparnaSaha - PushpakBhattacharyya + PushpakBhattacharyya 16632-16645 With the rise of social media and online communication, the issue of cyberbullying has gained significant prominence. While extensive research is being conducted to develop more effective models for detecting cyberbullying in monolingual languages, a significant gap exists in understanding code-mixed languages and the need for explainability in this context. To address this gap, we have introduced a novel benchmark dataset named BullyExplain for explainable cyberbullying detection in code-mixed language. In this dataset, each post is meticulously annotated with four labels: bully, sentiment, target, and rationales, indicating the specific phrases responsible for identifying the post as a bully. Our current research presents an innovative unified generative framework, GenEx, which reimagines the multitask problem as a text-to-text generation task. Our proposed approach demonstrates its superiority across various evaluation metrics when applied to the BullyExplain dataset, surpassing other baseline models and current state-of-the-art approaches. 2023.emnlp-main.1035 @@ -14495,7 +14495,7 @@ KeziahReina VishneshRamanathan WeiXu - ByronWallace + ByronWallace Junyi JessyLi 16662-16692 Automated text simplification aims to produce simple versions of complex texts. This task is especially useful in the medical domain, where the latest medical findings are typically communicated via complex and technical articles. This creates barriers for laypeople seeking access to up-to-date medical findings, consequently impeding progress on health literacy. Most existing work on medical text simplification has focused on monolingual settings, with the result that such evidence would be available only in just one language (most often, English). This work addresses this limitation via multilingual simplification, i.e., directly simplifying complex texts into simplified texts in multiple languages. We introduce MultiCochrane, the first sentence-aligned multilingual text simplification dataset for the medical domain in four languages: English, Spanish, French, and Farsi. We evaluate fine-tuned and zero-shot models across these languages with extensive human assessments and analyses. Although models can generate viable simplified texts, we identify several outstanding challenges that this dataset might be used to address. @@ -14524,7 +14524,7 @@ RuofeiLai XinyuZhang ZhaoCao - XuanjingHuang + XuanjingHuang ZhongyuWei 16705-16720 Counter-argument generation—a captivating area in computational linguistics—seeks to craft statements that offer opposing views. While most research has ventured into paragraph-level generation, sentence-level counter-argument generation beckons with its unique constraints and brevity-focused challenges. Furthermore, the diverse nature of counter-arguments poses challenges for evaluating model performance solely based on n-gram-based metrics. In this paper, we present the ArgTersely benchmark for sentence-level counter-argument generation, drawing from a manually annotated dataset from the ChangeMyView debate forum. We also propose Arg-LlaMA for generating high-quality counter-argument. For better evaluation, we trained a BERT-based evaluator Arg-Judge with human preference data. We conducted comparative experiments involving various baselines such as LlaMA, Alpaca, GPT-3, and others. The results show the competitiveness of our proposed framework and evaluator in counter-argument generation tasks. Code and data are available at https://github.com/amazingljy1206/ArgTersely. @@ -14554,7 +14554,7 @@ HannahFrost PaulO’Regan DónalLanders - AndreFreitas + AndreFreitas 16745-16764 How can we interpret and retrieve medical evidence to support clinical decisions? Clinical trial reports (CTR) amassed over the years contain indispensable information for the development of personalized medicine. However, it is practically infeasible to manually inspect over 400,000+ clinical trial reports in order to find the best evidence for experimental treatments. Natural Language Inference (NLI) offers a potential solution to this problem, by allowing the scalable computation of textual entailment. However, existing NLI models perform poorly on biomedical corpora, and previously published datasets fail to capture the full complexity of inference over CTRs. In this work, we present a novel resource to advance research on NLI for reasoning on CTRs. The resource includes two main tasks. Firstly, to determine the inference relation between a natural language statement, and a CTR. Secondly, to retrieve supporting facts to justify the predicted relation. We provide NLI4CT, a corpus of 2400 statements and CTRs, annotated for these tasks. Baselines on this corpus expose the limitations of existing NLI approaches, with 6 state-of-the-art NLI models achieving a maximum F1 score of 0.627. To the best of our knowledge, we are the first to design a task that covers the interpretation of full CTRs. To encourage further work on this challenging dataset, we make the corpus, competition leaderboard, and website, available on CodaLab, and code to replicate the baseline experiments on GitHub. 2023.emnlp-main.1041 @@ -14568,7 +14568,7 @@ ZhenWu JianbingZhang ShujianHuang - XinyuDai + XinyuDai 16765-16779 It has been well documented that a reviewer’s opinion of the nativeness of expression in an academic paper affects the likelihood of it being accepted for publication. Previous works have also shone a light on the stress and anxiety authors who are non-native English speakers experience when attempting to publish in international venues. We explore how this might be a concern in the field of Natural Language Processing (NLP) through conducting a comprehensive statistical analysis of NLP paper abstracts, identifying how authors of different linguistic backgrounds differ in the lexical, morphological, syntactic and cohesive aspects of their writing. Through our analysis, we identify that there are a number of characteristics that are highly variable across the different corpora examined in this paper. This indicates potential for the presence of linguistic bias. Therefore, we outline a set of recommendations to publishers of academic journals and conferences regarding their guidelines and resources for prospective authors in order to help enhance inclusivity and fairness. 2023.emnlp-main.1042 @@ -14595,7 +14595,7 @@ Muhammad UmarSalman AsifHanif ShadyShehata - PreslavNakov + PreslavNakov 16794-16812 Propaganda is a form of communication intended to influence the opinions and the mindset of the public to promote a particular agenda. With the rise of social media, propaganda has spread rapidly, leading to the need for automatic propaganda detection systems. Most work on propaganda detection has focused on high-resource languages, such as English, and little effort has been made to detect propaganda for low-resource languages. Yet, it is common to find a mix of multiple languages in social media communication, a phenomenon known as code-switching. Code-switching combines different languages within the same text, which poses a challenge for automatic systems. Considering this premise, we propose a novel task of detecting propaganda techniques in code-switched text. To support this task, we create a corpus of 1,030 texts code-switching between English and Roman Urdu, annotated with 20 propaganda techniques at fragment-level. We perform a number of experiments contrasting different experimental setups, and we find that it is important to model the multilinguality directly rather than using translation as well as to use the right fine-tuning strategy. We plan to publicly release our code and dataset. 2023.emnlp-main.1044 @@ -14607,7 +14607,7 @@ Speech Recognition and Meaning Interpretation: Towards Disambiguation of Structurally Ambiguous Spoken Utterances in <fixed-case>I</fixed-case>ndonesian RuhiyahWidiaputri AyuPurwarianti - DessiLestari + DessiLestari KurniawatiAzizah DiptaTanaya SakrianiSakti @@ -14665,7 +14665,7 @@ <fixed-case>NLP</fixed-case>+<fixed-case>V</fixed-case>is: <fixed-case>NLP</fixed-case> Meets Visualization - ShafiqJoty + ShafiqJoty EnamulHoque JesseVig 1-6 @@ -14701,7 +14701,7 @@ QinyuanYe PengfeiLiu XiangRen - HinrichSchütze + HinrichSchütze 19-25 The progress of natural language processing (NLP) is primarily driven by machine learning that optimizes a system on a large-scale set of task-specific labeled examples. This learning paradigm limits the ability of machines to have the same capabilities as humans in handling new tasks since humans can often solve unseen tasks with a couple of examples accompanied by task instructions. In addition, we may not have a chance to prepare task-specific examples of large-volume for new tasks because we cannot foresee what task needs to be addressed next and how complex to annotate for it. Therefore, task instructions act as a novel and promising resource for supervision. This tutorial targets researchers and practitioners who are interested in AI and ML technologies for NLP generalization in a low-shot scenario. In particular, we will present a diverse thread of instruction-driven NLP studies that try to answer the following questions: (i) What is task instruction? (ii) How is the process of creating datasets and evaluating systems conducted? (iii) How to encode task instructions? (iv) When and why do some instructions work better? (v) What concerns remain in LLM-driven instruction following? We will discuss several lines of frontier research that tackle those challenges and will conclude the tutorial by outlining directions for further investigation. 2023.emnlp-tutorial.4 @@ -14753,7 +14753,7 @@ Fabricator: An Open Source Toolkit for Generating Labeled Training Data with Teacher <fixed-case>LLM</fixed-case>s JonasGoldeHumboldt-University of Berlin - PatrickHallerMachine Learning Group - Humboldt University of Berlin + PatrickHallerMachine Learning Group - Humboldt University of Berlin FelixHamborgUniversity of Konstanz JulianRischdeepset AlanAkbikHumboldt University of Berlin @@ -14769,7 +14769,7 @@ ChristianHuberKarlsruhe Institut of Technology Tu AnhDinhKarlsruhe Institute of Technology CarlosMullovKarlsruhe Institute of Technology - Ngoc-QuanPhamKarlsruhe Institute of Technology + Ngoc-QuanPhamKarlsruhe Institute of Technology Thai BinhNguyenKarlsruhe Institute of Technology FabianRetkowskiKarlsruhe Institut of Technology StefanConstantinKarlsruhe Institute of Technology @@ -14778,7 +14778,7 @@ ZhaolinLiKarlsruhe Institute of Technology SaiKoneruKarlsruhe Institute of Technology JanNiehuesKarlsruhe Institut of Technology - AlexanderWaibelCarnegie Mellon + AlexanderWaibelCarnegie Mellon 12-20 The challenge of low-latency speech translation has recently draw significant interest in the research community as shown by several publications and shared tasks. Therefore, it is essential to evaluate these different approaches in realistic scenarios. However, currently only specific aspects of the systems are evaluated and often it is not possible to compare different approaches. In this work, we propose the first framework to perform and evaluate the various aspects of low-latency speech translation under realistic conditions. The evaluation is carried out in an end-to-end fashion. This includes the segmentation of the audio as well as the run-time of the different components. Secondly, we compare different approaches to low-latency speech translation using this framework. We evaluate models with the option to revise the output as well as methods with fixed output. Furthermore, we directly compare state-of-the-art cascaded as well as end-to-end systems. Finally, the framework allows to automatically evaluate the translation quality as well as latency and also provides a web interface to show the low-latency model outputs to the user. 2023.emnlp-demo.2 @@ -14830,7 +14830,7 @@ NunoCarvalhais MoniqueMeuschke MarkusReichstein - SinaZarrieß + SinaZarrieß KaiLawonn 70-81 The advent of large language models has brought about new ways of interacting with data intuitively via natural language. In recent years, a variety of visualization systems have explored the use of natural language to create and modify visualizations through visualization-oriented dialog. However, the majority of these systems rely on tailored dialog agents to analyze domain-specific data and operate domain-specific visualization tools and libraries. This is a major challenge when trying to transfer functionalities between dialog interfaces of different visualization applications. To address this issue, we propose VIST5, a visualization-oriented dialog system that focuses on easy adaptability to an application domain as well as easy transferability of language-controllable visualization library functions between applications. Its architecture is based on a retrieval-augmented T5 language model that leverages few-shot learning capabilities to enable a rapid adaptation of the system. @@ -14857,9 +14857,9 @@ Koala: An Index for Quantifying Overlaps with Pre-training Corpora - Thuy-TrangVuMonash University + Thuy-TrangVuMonash University XuanliHeUniversity College London - GholamrezaHaffariMonash University + GholamrezaHaffariMonash University EhsanShareghiMonash University 90-98 In very recent years more attention has been placed on probing the role of pre-training data in Large Language Models (LLMs) downstream behaviour. Despite the importance, there is no public tool that supports such analysis of pre-training corpora at large scale. To help research in this space, we launch Koala, a searchable index over large pre-training corpora using lossless compressed suffix arrays with highly efficient compression rate and search support. In its first release we index the public proportion of OPT 175B, GPT-3, GPT-Neo, GPT-Neo, LLaMA, BERT, ELECTRA, RoBERTA, XLNet pre-training corpora. Koala provides a framework to do forensic analysis on the current and future benchmarks as well as to assess the degree of memorization in the output from the LLMs. Koala is available for public use at https://koala-index.erc.monash.edu/. @@ -14897,7 +14897,7 @@ DazhenWanTsinghua University XiaochenZhuUniversity of Cambridge, Cambridge, England JianfengGaoMicrosoft Research, Redmond - MilicaGasicHeinrich Heine University Duesseldorf + MilicaGasicHeinrich Heine University Duesseldorf MinlieHuangTsinghua University 106-123 Task-oriented dialogue (TOD) systems function as digital assistants, guiding users through various tasks such as booking flights or finding restaurants. Existing toolkits for building TOD systems often fall short in delivering comprehensive arrays of data, model, and experimental environments with a user-friendly experience. We introduce ConvLab-3: a multifaceted dialogue system toolkit crafted to bridge this gap. Our unified data format simplifies the integration of diverse datasets and models, significantly reducing complexity and cost for studying generalization and transfer. Enhanced with robust reinforcement learning (RL) tools, featuring a streamlined training process, in-depth evaluation tools, and a selection of user simulators, ConvLab-3 supports the rapid development and evaluation of robust dialogue policies. Through an extensive study, we demonstrate the efficacy of transfer learning and RL and showcase that ConvLab-3 is not only a powerful tool for seasoned researchers but also an accessible platform for newcomers. @@ -15001,7 +15001,7 @@ <fixed-case>TP</fixed-case>-Detector: Detecting Turning Points in the Engineering Process of Large-scale Projects QiWuBeihang University - WenHanChaoBeiHang University + WenHanChaoBeiHang University XianZhouCenter for Information Research, Academy of Military Science ZhunchenLuoCenter for Information Research, Academy of Military Science 177-185 @@ -15044,12 +15044,12 @@ Muted: Multilingual Targeted Offensive Speech Identification and Visualization - ChristophTillmannIBM Research + ChristophTillmannIBM Research AashkaTrivediIBM Research SaraRosenthalIBM Research SantoshBorseIBM Research RongZhangIBM.com - AvirupSilIBM Research AI + AvirupSilIBM Research AI BishwaranjanBhattacharjeeIBM T.J.Watson Researcg 229-236 Offensive language such as hate, abuse, and profanity (HAP) occurs in various content on the web. While previous work has mostly dealt with sentence level annotations, there have been a few recent attempts to identify offensive spans as well. We build upon this work and introduce MUTED, a system to identify multilingual HAP content by displaying offensive arguments and their targets using heat maps to indicate their intensity. MUTED can leverage any transformer-based HAP-classification model and its attention mechanism out-of-the-box to identify toxic spans, without further fine-tuning. In addition, we use the spaCy library to identify the specific targets and arguments for the words predicted by the attention heatmaps. We present the model’s performance on identifying offensive spans and their targets in existing datasets and present new annotations on German text. Finally, we demonstrate our proposed visualization tool on multilingual inputs. @@ -15110,7 +15110,7 @@ LiangmingPanUniversity of California, Santa Barbara (UCSB) XinyuanLuNational University of Singapore Min-YenKanNational University of Singapore - PreslavNakovMohamed bin Zayed University of Artificial Intelligence + PreslavNakovMohamed bin Zayed University of Artificial Intelligence 264-273 Fact-checking real-world claims often requires intricate, multi-step reasoning due to the absence of direct evidence to support or refute them. However, existing fact-checking systems often lack transparency in their decision-making, making it challenging for users to comprehend their reasoning process. To address this, we propose the Question-guided Multi-hop Fact-Checking (QACheck) system, which guides the model’s reasoning process by asking a series of questions critical for verifying a claim. QACheck has five key modules: a claim verifier, a question generator, a question-answering module, a QA validator, and a reasoner. Users can input a claim into QACheck, which then predicts its veracity and provides a comprehensive report detailing its reasoning process, guided by a sequence of (question, answer) pairs. QACheck also provides the source of evidence supporting each question, fostering a transparent, explainable, and user-friendly fact-checking process. 2023.emnlp-demo.23 @@ -15125,7 +15125,7 @@ Seyedeh FatemehAhmadiUniversity of Guilan GitaShojaeeUniversity of Guilan FatemehKamaniUniversity of Guilan - GholamrezaGhassem-SaniSharif University of Technology + GholamrezaGhassem-SaniSharif University of Technology Seyed AbolghasemMirroshandelStony Brook University 274-285 Question answering (QA) systems have reached human-level accuracy; however, these systems are not robust enough and are vulnerable to adversarial examples. Recently, adversarial attacks have been widely investigated in text classification. However, there have been few research efforts on this topic in QA. In this article, we have modified the attack algorithms widely used in text classification to fit those algorithms for QA systems. We have evaluated the impact of various attack methods on QA systems at character, word, and sentence levels. Furthermore, we have developed a new framework, named RobustQA, as the first open-source toolkit for investigating textual adversarial attacks in QA systems. RobustQA consists of seven modules: Tokenizer, Victim Model, Goals, Metrics, Attacker, Attack Selector, and Evaluator. It currently supports six different attack algorithms. Furthermore, the framework simplifies the development of new attack algorithms in QA. The source code and documentation of RobustQA are available at https://github.com/mirbostani/RobustQA. @@ -15167,7 +15167,7 @@ <fixed-case>M</fixed-case>ini<fixed-case>C</fixed-case>hain: A Small Library for Coding with Large Language Models - AlexanderRushCornell University + AlexanderRushCornell University 311-317 Programming augmented by large language models (LLMs) opens up many new application areas, but also requires care. LLMs are accurate enough, on average, to replace core functionality, yet make basic mistakes that demonstrate a lack of robustness. An ecosystem of prompting tools, from intelligent agents to new programming languages, have emerged with different solutions for patching LLMs with other tools. In this work, we introduce MiniChain, an opinionated tool for LLM augmented programming, with the design goals of ease-of-use of prototyping, transparency through automatic visualization, and a minimalistic approach to advanced features. The MiniChain library provides core primitives for coding LLM calls, separating out prompt templates, and capturing program structure. The library includes demo implementations of the main applications papers in the area, including chat-bots, code generation, retrieval-based question answering, and complex information extraction. The library is open-source and available at https://github.com/srush/MiniChain, with code demos available at https://srush-minichain.hf.space/, and video demo at https://www.youtube.com/watch?v=VszZ1VnO7sk. 2023.emnlp-demo.27 @@ -15198,9 +15198,9 @@ SaiVallurupalliUniversity of Maryland at Baltimore County Yash KumarLalStony Brook University FrancisFerraroUniversity of Maryland, Baltimore County - NathanaelChambersUS Naval Academy + NathanaelChambersUS Naval Academy GregDurrettUT Austin - RaymondMooneyUniversity of Texas at Austin + RaymondMooneyUniversity of Texas at Austin KatrinErkUniversity of Texas at Austin NiranjanBalasubramanianStony Brook University 328-335 @@ -15253,8 +15253,8 @@ ZixuanZhangUniversity of Illinois Urbana-Champaign ReeceSuchockiUniversity of Colorado Boulder ShaLiUniversity of Illinois Urbana-Champaign - MarthaPalmerUniversity of Colorado - Susan WindischBrownUniversity of Colorado at Boulder + MarthaPalmerUniversity of Colorado + Susan WindischBrownUniversity of Colorado at Boulder JiaweiHanUIUC HengJiUniversity of Illinois at Urbana-Champaign and Amazon (Amazon Scholar) 365-372 @@ -15287,8 +15287,8 @@ Shafiuddin RehanAhmedUniversity of Colorado Boulder JuliaBonnUniversity of Colorado, Boulder KristinWright-BettnerUniversity of Colorado Boulder - MarthaPalmerUniversity of Colorado - James H.MartinUniversity of Colorado Boulder + MarthaPalmerUniversity of Colorado + James H.MartinUniversity of Colorado Boulder 381-388 In this paper, we introduce CAMRA (Copilot for AMR Annotatations), a cutting-edge web-based tool designed for constructing Abstract Meaning Representation (AMR) from natural language text. CAMRA offers a novel approach to deep lexical semantics annotation such as AMR, treating AMR annotation akin to coding in programming languages. Leveraging the familiarity of programming paradigms, CAMRA encompasses all essential features of existing AMR editors, including example lookup, while going a step further by integrating Propbank roleset lookup as an autocomplete feature within the tool. Notably, CAMRA incorporates AMR parser models as coding co-pilots, greatly enhancing the efficiency and accuracy of AMR annotators. 2023.emnlp-demo.35 @@ -15392,7 +15392,7 @@ ElizavetaGoncharovaNRU HSE AlexanderPanchenkoSkolkovo Institue of Science and Technology MaximPanovTechnology Innovation Institute - TimothyBaldwinMBZUAI + TimothyBaldwinMBZUAI ArtemShelmanovMohamed bin Zayed University of Artificial Intelligence: MBZUAI 446-461 Recent advancements in the capabilities of large language models (LLMs) have paved the way for a myriad of groundbreaking applications in various fields. However, a significant challenge arises as these models often “hallucinate”, i.e., fabricate facts without providing users an apparent means to discern the veracity of their statements. Uncertainty estimation (UE) methods are one path to safer, more responsible, and more effective use of LLMs. However, to date, research on UE methods for LLMs has been focused primarily on theoretical rather than engineering contributions. In this work, we tackle this issue by introducing LM-Polygraph, a framework with implementations of a battery of state-of-the-art UE methods for LLMs in text generation tasks, with unified program interfaces in Python. Additionally, it introduces an extendable benchmark for consistent evaluation of UE techniques by researchers, and a demo web application that enriches the standard chat dialog with confidence scores, empowering end-users to discern unreliable responses. LM-Polygraph is compatible with the most recent LLMs, including BLOOMz, LLaMA-2, ChatGPT, and GPT-4, and is designed to support future releases of similarly-styled LMs. @@ -15450,7 +15450,7 @@ KyleLoAllen Institute for Artificial Intelligence ZejiangShenMIT BenjaminNewmanStanford University - JosephChangAllen Institute for AI + JosephChangAllen Institute for AI RussellAuthurAllen Institute for AI ErinBransomAllen Institute for AI StefanCandraAllen Institute for AI @@ -15460,8 +15460,8 @@ AmanpreetSinghAllen Institute for Artificial Intelligence ChrisWilhelmAllen Institute for AI AngeleZamarronAllen Institute for AI - Marti A.HearstUC Berkeley - DanielWeldUniversity of Washington & Allen Institute for Artificial Inelligence + Marti A.HearstUC Berkeley + DanielWeldUniversity of Washington & Allen Institute for Artificial Inelligence DougDowneyAllen Institute for AI, Northwestern University LucaSoldainiAllen Institute for AI 495-507 @@ -15857,7 +15857,7 @@ Siti UmairahMd Salleh Hong ChoonOh PavitraKrishnaswamy - NancyChen + NancyChen 185-193 Utilizing natural language processing techniques in clinical conversations is effective to improve the efficiency of health management workflows for medical staff and patients. Dialogue segmentation and topic categorization are two fundamental steps for processing verbose spoken conversations and highlighting informative spans for downstream tasks. However, in practical use cases, due to the variety of segmentation granularity and topic definition, and the lack of diverse annotated corpora, no generic models are readily applicable for domain-specific applications. In this work, we introduce and adopt a joint model for dialogue segmentation and topic categorization, and conduct a case study on healthcare follow-up calls for diabetes management; we provide insights from both data and model perspectives toward performance and robustness. 2023.emnlp-industry.19 @@ -15991,7 +15991,7 @@ Empower Large Language Model to Perform Better on Industrial Domain-Specific Question Answering FangkaiYang - PuZhao + PuZhao ZezhongWang LuWang BoQiao @@ -16265,7 +16265,7 @@ Automatic Linking of Judgements to <fixed-case>UK</fixed-case> <fixed-case>S</fixed-case>upreme <fixed-case>C</fixed-case>ourt Hearings HadeelSaadany - ConstantinOrasan + ConstantinOrasan 492-500 One the most important archived legal material in the UK is the Supreme Court published judgements and video recordings of court sittings for the decided cases. The impact of Supreme Court published material extends far beyond the parties involved in any given case as it provides landmark rulings on arguable points of law of the greatest public and constitutional importance. However, the recordings of a case are usually very long which makes it both time and effort consuming for legal professionals to study the critical arguments in the legal deliberations. In this research, we summarise the second part of a combined research-industrial project for building an automated tool designed specifically to link segments in the text judgement to semantically relevant timespans in the videos of the hearings. The tool is employed as a User-Interface (UI) platform that provides a better access to justice by bookmarking the timespans in the videos which contributed to the final judgement of the case. We explain how we employ AI generative technology to retrieve the relevant links and show that the customisation of the GPT text embeddings to our dataset achieves the best accuracy for our automatic linking system. 2023.emnlp-industry.47 @@ -16336,7 +16336,7 @@ Investigating the Role and Impact of Disfluency on Summarization VarunNathan - AyushKumar + AyushKumar JithendraVepa 541-551 Contact centers handle both chat and voice calls for the same domain. As part of their workflow, it is a standard practice to summarize the conversations once they conclude. A significant distinction between chat and voice communication lies in the presence of disfluencies in voice calls, such as repetitions, restarts, and replacements. These disfluencies are generally considered noise for downstream natural language understanding (NLU) tasks. While a separate summarization model for voice calls can be trained in addition to chat specific model for the same domain, it requires manual annotations for both the channels and adds complexity arising due to maintaining two models. Therefore, it’s crucial to investigate if a model trained on fluent data can handle disfluent data effectively. While previous research explored impact of disfluency on question-answering and intent detection, its influence on summarization is inadequately studied. Our experiments reveal up to 6.99-point degradation in Rouge-L score, along with reduced fluency, consistency, and relevance when a fluent-trained model handles disfluent data. Replacement disfluencies have the highest negative impact. To mitigate this, we examine Fused-Fine Tuning by training the model with a combination of fluent and disfluent data, resulting in improved performance on both public and real-life datasets. Our work highlights the significance of incorporating disfluency in training summarization models and its advantages in an industrial setting. @@ -16365,7 +16365,7 @@ <fixed-case>E</fixed-case>2<fixed-case>E</fixed-case> Spoken Entity Extraction for Virtual Agents KaranSingla Yeon-JunKim - SrinivasBangalore + SrinivasBangalore 567-574 In human-computer conversations, extracting entities such as names, street addresses and email addresses from speech is a challenging task. In this paper, we study the impact of fine-tuning pre-trained speech encoders on extracting spoken entities in human-readable form directly from speech without the need for text transcription. We illustrate that such a direct approach optimizes the encoder to transcribe only the entity relevant portions of speech ignoring the superfluous portions such as carrier phrases, or spell name entities. In the context of dialog from an enterprise virtual agent, we demonstrate that the 1-step approach outperforms the typical 2-step approach which first generates lexical transcriptions followed by text-based entity extraction for identifying spoken entities. 2023.emnlp-industry.54 @@ -16500,7 +16500,7 @@ BesnikFetahu ZhiyuChen OlegRokhlenko - ShervinMalmasi + ShervinMalmasi 663-674 E-commerce product catalogs contain billions of items. Most products have lengthy titles, as sellers pack them with product attributes to improve retrieval, and highlight key product aspects. This results in a gap between such unnatural products titles, and how customers refer to them. It also limits how e-commerce stores can use these seller-provided titles for recommendation, QA, or review summarization. Inspired by recent work on instruction-tuned LLMs, we present InstructPTS, a controllable approach for the task of Product Title Summarization (PTS). Trained using a novel instruction fine-tuning strategy, our approach is able to summarize product titles according to various criteria (e.g. number of words in a summary, inclusion of specific phrases, etc.). Extensive evaluation on a real-world e-commerce catalog shows that compared to simple fine-tuning of LLMs, our proposed approach can generate more accurate product name summaries, with an improvement of over 14 and 8 BLEU and ROUGE points, respectively. 2023.emnlp-industry.63 @@ -16547,7 +16547,7 @@ JinMiao XiaoyuSun JiayiChen - AlexanderHauptmann + AlexanderHauptmann HanjunDai WeiWei 707-722 @@ -16689,7 +16689,7 @@ HadasKotek ChristopherKlein ZidiXiu - JasonWilliams + JasonWilliams 820-827 Controversy is a reflection of our zeitgeist, and an important aspect to any discourse. The rise of large language models (LLMs) as conversational systems has increased public reliance on these systems for answers to their various questions. Consequently, it is crucial to systematically examine how these models respond to questions that pertaining to ongoing debates. However, few such datasets exist in providing human-annotated labels reflecting the contemporary discussions. To foster research in this area, we propose a novel construction of a controversial questions dataset, expanding upon the publicly released Quora Question Pairs Dataset. This dataset presents challenges concerning knowledge recency, safety, fairness, and bias. We evaluate different LLMs using a subset of this dataset, illuminating how they handle controversial issues and the stances they adopt. This research ultimately contributes to our understanding of LLMs’ interaction with controversial issues, paving the way for improvements in their comprehension and handling of complex societal debates. 2023.emnlp-industry.76 @@ -16701,7 +16701,7 @@ Angel: Enterprise Search System for the Non-Profit Industry SaifulHaq AshutoshSharma - PushpakBhattacharyya + PushpakBhattacharyya 828-835 Non-profit industry need a system for accurately matching fund-seekers (e.g., AMERICAN NATIONAL RED CROSS) with fund-givers (e.g., BILL AND MELINDA GATES FOUNDATION) aligned in cause (e.g., cancer) and target beneficiary group (e.g., children). In this paper, we create an enterprise search system “ANGEL” for the non-profit industry that takes a fund-giver’s mission description as input and returns a ranked list of fund-seekers as output, and vice-versa. ANGEL employs ColBERT, a neural information retrieval model, which we enhance by exploiting the two techniques of (a) Syntax-aware local attention (SLA) to combine syntactic information in the mission description with multi-head self-attention and (b) Dense Pseudo Relevance Feedback (DPRF) for augmentation of short mission descriptions. We create a mapping dictionary “non-profit-dict” to curate a “non-profit-search database” containing information on 594K fund-givers and 194K fund-seekers from IRS-990 filings for the non-profit industry search engines . We also curate a “non-profit-evaluation” dataset containing scored matching between 463 fund-givers and 100 fund-seekers. The research is in collaboration with a philanthropic startup that identifies itself as an “AI matching platform, fundraising assistant, and philanthropy search base.” Domain experts at the philanthropic startup annotate the non-profit evaluation dataset and continuously evaluate the performance of ANGEL. ANGEL achieves an improvement of 0.14 MAP@10 and 0.16 MRR@10 over the state-of-the-art baseline on the non-profit evaluation dataset. To the best of our knowledge, ours is the first effort at building an enterprise search engine based on neural information retrieval for the non-profit industry. 2023.emnlp-industry.77 diff --git a/data/xml/2023.eval4nlp.xml b/data/xml/2023.eval4nlp.xml index 01c48bbd2b..b3533b82b2 100644 --- a/data/xml/2023.eval4nlp.xml +++ b/data/xml/2023.eval4nlp.xml @@ -71,7 +71,7 @@ Zero-shot Probing of Pretrained Language Models for Geography Knowledge NitinRamrakhiyaniInternational Institute of Information Technology, Hyderabad and Tata Consultancy Services Limited, India VasudevaVarmaInternational Institute of Information Technology Hyderabad, Dhirubhai Ambani Institute Of Information and Communication Technology - GirishPalshikar + GirishPalshikar SachinPawar 49-61 Gauging the knowledge of Pretrained Language Models (PLMs) about facts in niche domains is an important step towards making them better in those domains. In this paper, we aim at evaluating multiple PLMs for their knowledge about world Geography. We contribute (i) a sufficiently sized dataset of masked Geography sentences to probe PLMs on masked token prediction and generation tasks, (ii) benchmark the performance of multiple PLMs on the dataset. We also provide a detailed analysis of the performance of the PLMs on different Geography facts. @@ -95,7 +95,7 @@ Yu-PengChenUniversity of Florida AbhilashBudharapu LisaAnthonyUniversity of Florida - BonnieDorrUniversity of Florida + BonnieDorrUniversity of Florida 85-99 With the aim of improving work efficiency, we examine how Large Language Models (LLMs) can better support the handoff of information by summarizing user interactions in collaborative intelligence analysis communication. We experiment with interaction logs, or a record of user interactions with a system. Inspired by chain-of-thought prompting, we describe a technique to avoid API token limits with recursive summarization requests. We then apply ChatGPT over multiple iterations to extract named entities, topics, and summaries, combined with interaction sequence sentences, to generate summaries of critical events and results of analysis sessions. We quantitatively evaluate the generated summaries against human-generated ones using common accuracy metrics (e.g., ROUGE-L, BLEU, BLEURT, and TER). We also report qualitative trends and the factuality of the output. We find that manipulating the audience feature or providing single-shot examples minimally influences the model’s accuracy. While our methodology successfully summarizes interaction logs, the lack of significant results raises questions about prompt engineering and summarization effectiveness generally. We call on explainable artificial intelligence research to better understand how terms and their placement may change LLM outputs, striving for more consistent prompt engineering guidelines. 2023.eval4nlp-1.7 @@ -145,8 +145,8 @@ FuhaiSong HuiHuang JinghaoYuan - MuyunYang - TiejunZhaoHarbin Institute of Technology + MuyunYang + TiejunZhaoHarbin Institute of Technology 139-148 Recently, Large Language Models (LLMs) have boosted the research in natural language processing and shown impressive capabilities across numerous domains, including machine translation evaluation. This paper presents our methods developed for the machine translation evaluation sub-task of the Eval4NLP 2023 Shared Task. Based on the provided LLMs, we propose a generation-based method as well as a probability-based method to perform evaluation, explore different strategies when selecting the demonstrations for in-context learning, and try different ensemble methods to further improve the evaluation accuracy. The experiment results on the development set and test set demonstrate the effectiveness of our proposed method. 2023.eval4nlp-1.11 @@ -167,7 +167,7 @@ <fixed-case>LTRC</fixed-case>_<fixed-case>IIITH</fixed-case>’s 2023 Submission for Prompting Large Language Models as Explainable Metrics Task PavanBaswani AnanyaMukherjee - ManishShrivastavaInternational Institute of Information Technology Hyderabad, India + ManishShrivastavaInternational Institute of Information Technology Hyderabad, India 156-163 In this report, we share our contribution to the Eval4NLP Shared Task titled “Prompting Large Language Models as Explainable Metrics.” We build our prompts with a primary focus on effective prompting strategies, score-aggregation, and explainability for LLM-based metrics. We participated in the track for smaller models by submitting the scores along with their explanations. According to the Kendall correlation scores on the leaderboard, our MT evaluation submission ranks second-best, while our summarization evaluation submission ranks fourth, with only a 0.06 difference from the leading submission. 2023.eval4nlp-1.13 @@ -202,7 +202,7 @@ Reference-Free Summarization Evaluation with Large Language Models AbbasAkkasiCarleton University - KathleenFraserNational Research Council Canada + KathleenFraserNational Research Council Canada MajidKomeiliCarleton University 193-201 With the continuous advancement in unsupervised learning methodologies, text generation has become increasingly pervasive. However, the evaluation of the quality of the generated text remains challenging. Human annotations are expensive and often show high levels of disagreement, in particular for certain tasks characterized by inherent subjectivity, such as translation and summarization.Consequently, the demand for automated metrics that can reliably assess the quality of such generative systems and their outputs has grown more pronounced than ever. In 2023, Eval4NLP organized a shared task dedicated to the automatic evaluation of outputs from two specific categories of generative systems: machine translation and summarization. This evaluation was achieved through the utilization of prompts with Large Language Models. Participating in the summarization evaluation track, we propose an approach that involves prompting LLMs to evaluate six different latent dimensions of summarization quality. In contrast to many previous approaches to summarization assessments, which emphasize lexical overlap with reference text, this method surfaces the importance of correct syntax in summarization evaluation. Our method resulted in the second-highest performance in this shared task, demonstrating its effectiveness as a reference-free evaluation. @@ -214,7 +214,7 @@ Little Giants: Exploring the Potential of Small <fixed-case>LLM</fixed-case>s as Evaluation Metrics in Summarization in the <fixed-case>E</fixed-case>val4<fixed-case>NLP</fixed-case> 2023 Shared Task NeemaKotonya SaranKrishnasamy - JoelTetreault + JoelTetreault AlejandroJaimesDataminr 202-218 This paper describes and analyzes our participation in the 2023 Eval4NLP shared task, which focuses on assessing the effectiveness of prompt-based techniques to empower Large Language Models to handle the task of quality estimation, particularly in the context of evaluating machine translations and summaries. We conducted systematic experiments with various prompting techniques, including standard prompting, prompts informed by annotator instructions, and innovative chain-of-thought prompting. In addition, we integrated these approaches with zero-shot and one-shot learning methods to maximize the efficacy of our evaluation procedures. Our work reveals that combining these approaches using a “small”, open source model (orca_mini_v3_7B) yields competitive results. diff --git a/data/xml/2023.fever.xml b/data/xml/2023.fever.xml index da458eb542..8e3ed47e65 100644 --- a/data/xml/2023.fever.xml +++ b/data/xml/2023.fever.xml @@ -9,7 +9,7 @@ OanaCocarascu ZhijiangGuo ArpitMittal - MichaelSchlichtkrull + MichaelSchlichtkrull JamesThorne AndreasVlachos Association for Computational Linguistics @@ -53,7 +53,7 @@ An Entity-based Claim Extraction Pipeline for Real-world Biomedical Fact-checking - AmelieWuehrlUniversity of Stuttgart, Universität Stuttgart + AmelieWuehrlUniversity of Stuttgart, Universität Stuttgart LaraGrimminger RomanKlingerUniversity of Stuttgart 29-37 diff --git a/data/xml/2023.fieldmatters.xml b/data/xml/2023.fieldmatters.xml index 0474ae84f8..5e7f2fe49e 100644 --- a/data/xml/2023.fieldmatters.xml +++ b/data/xml/2023.fieldmatters.xml @@ -11,7 +11,7 @@ TatianaShavrina EricLe Ferrand ValentinMalykh - FrancisTyers + FrancisTyers TimofeyArkhangelskiy VladislavMikhailov Association for Computational Linguistics @@ -81,7 +81,7 @@ NikolaosKokkasAthena Research Center VasileiosArampatzakisAthena Research Center VasileiosSevetlidisAthena Research Center - StellaMarkantonatouILSP/R.C. “Athena” + StellaMarkantonatouILSP/R.C. “Athena” GeorgePavlidisAthena Research Center 40-45 Automatic Speech Recognition (ASR) models can aid field linguists by facilitating the creation of text corpora from oral material. Training ASR systems for low-resource languages can be a challenging task not only due to lack of resources but also due to the work required for the preparation of a training dataset. We present a pipeline for data processing and ASR model training for low-resourced languages, based on the language family. As a case study, we collected recordings of Pomak, an endangered South East Slavic language variety spoken in Greece. Using the proposed pipeline, we trained the first Pomak ASR model. diff --git a/data/xml/2023.findings.xml b/data/xml/2023.findings.xml index cbfcc8ca62..43bd9096a1 100644 --- a/data/xml/2023.findings.xml +++ b/data/xml/2023.findings.xml @@ -20,8 +20,8 @@ Using Punctuation as an Adversarial Attack on Deep Learning-Based <fixed-case>NLP</fixed-case> Systems: An Empirical Study BrianFormentoNus Chuan ShengFooInstitute for Infocomm Research - Luu AnhTuanNanyang Technological University, Singapore - See KiongNgNational University of Singapore + Luu AnhTuanNanyang Technological University, Singapore + See KiongNgNational University of Singapore 1-34 This work empirically investigates punctuation insertions as adversarial attacks on NLP systems. Data from experiments on three tasks, five datasets, and six models with four attacks show that punctuation insertions, when limited to a few symbols (apostrophes and hyphens), are a superior attack vector compared to character insertions due to 1) a lower after-attack accuracy (A_{aft-atk}) than alphabetical character insertions; 2) higher semantic similarity between the resulting and original texts; and 3) a resulting text that is easier and faster to read as assessed with the Test of Word Reading Efficiency (TOWRE)). The tests also indicate that 4) grammar checking does not mitigate punctuation insertions and 5) punctuation insertions outperform word-level attacks in settings with a limited number of word synonyms and queries to the victim’s model. Our findings indicate that inserting a few punctuation types that result in easy-to-read samples is a general attack mechanism. In light of this threat, we assess the impact of punctuation insertions, potential mitigations, the mitigation’s tradeoffs, punctuation insertion’s worst-case scenarios and summarize our findings in a qualitative casual map, so that developers can design safer, more secure systems. 2023.findings-eacl.1 @@ -186,7 +186,7 @@ LiesbethAlleinKU Leuven MarlonSaelensKU Leuven RubenCartuyvelsCatholic University of Leuven - Marie-FrancineMoensKU Leuven + Marie-FrancineMoensKU Leuven 176-189 Leveraging contextual knowledge has become standard practice in automated claim verification, yet the impact of temporal reasoning has been largely overlooked. Our study demonstrates that time positively influences the claim verification process of evidence-based fact-checking. The temporal aspects and relations between claims and evidence are first established through grounding on shared timelines, which are constructed using publication dates and time expressions extracted from their text. Temporal information is then provided to RNN-based and Transformer-based classifiers before or after claim and evidence encoding. Our time-aware fact-checking models surpass base models by up to 9% Micro F1 (64.17%) and 15% Macro F1 (47.43%) on the MultiFC dataset. They also outperform prior methods that explicitly model temporal relations between evidence. Our findings show that the presence of temporal information and the manner in which timelines are constructed greatly influence how fact-checking models determine the relevance and supporting or refuting character of evidence documents. 2023.findings-eacl.13 @@ -276,7 +276,7 @@ ZhiruoWangCarnegie Mellon University GraceCuencaPrinceton University ShuyanZhouCarnegie Mellon University - Frank F.XuCarnegie Mellon University + Frank F.XuCarnegie Mellon University GrahamNeubigCarnegie Mellon University 265-273 While there has been a recent burgeoning of applications at the intersection of natural and programming languages, such as code generation and code summarization, these applications are usually English-centric. This creates a barrier for program developers who are not proficient in English. To mitigate this gap in technology development across languages, we propose a multilingual dataset, MCoNaLa, to benchmark code generation from natural language commands extending beyond English. Modeled off of the methodology from the English Code/Natural Language Challenge (CoNaLa) dataset, we annotated a total of 896 NL-Code pairs in three languages: Spanish, Japanese, and Russian. We present a systematic evaluation on MCoNaLa by testing state-of-the-art code generation systems. Although the difficulties vary across three languages, all systems lag significantly behind their English counterparts, revealing the challenges in adapting code generation to new languages. @@ -318,7 +318,7 @@ <fixed-case>CALM</fixed-case>-Bench: A Multi-task Benchmark for Evaluating Causality-Aware Language Models DhairyaDalalUniversity of Galway PaulBuitelaarUniversity of Galway - MihaelArcanUniversity of Galway + MihaelArcanUniversity of Galway 296-311 Causal reasoning is a critical component of human cognition and is required across a range of question-answering (QA) tasks (such as abductive reasoning, commonsense QA, and procedural reasoning). Research on causal QA has been underdefined, task-specific, and limited in complexity. Recent advances in foundation language models (such as BERT, ERNIE, and T5) have shown the efficacy of pre-trained models across diverse QA tasks. However, there is limited research exploring the causal reasoning capabilities of those language models and no standard evaluation benchmark. To unify causal QA research, we propose CALM-Bench, a multi-task benchmark for evaluating causality-aware language models (CALM). We present a standardized definition of causal QA tasks and show empirically that causal reasoning can be generalized and transferred across different QA tasks. Additionally, we share a strong multi-task baseline model which outperforms single-task fine-tuned models on the CALM-Bench tasks. 2023.findings-eacl.23 @@ -349,7 +349,7 @@ AliAhmadvandEmory University JuliaKiselevaMicrosoft Research YangLiuMicrosoft Research - Ahmed HassanAwadallahMicrosoft Research + Ahmed HassanAwadallahMicrosoft Research MingZhongUniversity of Illinois MiladShokouhiMicrosoft 331-342 @@ -376,7 +376,7 @@ DaichiYamaguchiNagoya University ReiMiyataNagoya University SayukaShimadaNagoya University - SatoshiSatoNagoya University + SatoshiSatoNagoya University 359-375 This study presents an analytical evaluation of neural text simplification (TS) systems. Because recent TS models are trained in an end-to-end fashion, it is difficult to grasp their abilities to perform particular simplification operations. For the advancement of TS research and development, we should understand in detail what current TS systems can and cannot perform in comparison with human performance. To that end, we first developed an analytical evaluation framework consisting of fine-grained taxonomies of simplification strategies (at both the surface and content levels) and errors. Using this framework, we annotated TS instances produced by professional human editors and multiple neural TS systems and compared the results. Our analyses concretely and quantitatively revealed a wide gap between humans and systems, specifically indicating that systems tend to perform deletions and local substitutions while excessively omitting important information, and that the systems can hardly perform information addition operations. Based on our analyses, we also provide detailed directions to address these limitations. 2023.findings-eacl.27 @@ -446,8 +446,8 @@ Few-Shot Structured Policy Learning for Multi-Domain and Multi-Task Dialogues ThibaultCordierUniversity of Avignon TanguyUrvoyOrange - FabriceLefèvreAvignon Univ. - Lina M.Rojas BarahonaOrange Innovation Research + FabriceLefèvreAvignon Univ. + Lina M.Rojas BarahonaOrange Innovation Research 432-441 Reinforcement learning has been widely adopted to model dialogue managers in task-oriented dialogues. However, the user simulator provided by state-of-the-art dialogue frameworks are only rough approximations of human behaviour. The ability to learn from a small number of human interactions is hence crucial, especially on multi-domain and multi-task environments where the action space is large. We therefore propose to use structured policies to improve sample efficiency when learning on these kinds of environments. We also evaluate the impact of learning from human vs simulated experts. Among the different levels of structure that we tested, the graph neural networks (GNNs) show a remarkable superiority by reaching a success rate above 80% with only 50 dialogues when learning from simulated experts. They also show superiority when learning from human experts, although a performance drop was observed. We therefore suggest to concentrate future research efforts on bridging the gap between human data, simulators and automatic evaluators in dialogue frameworks. 2023.findings-eacl.32 @@ -488,7 +488,7 @@ Paper Bullets: Modeling Propaganda with the Help of Metaphor DanielBaleato RodríguezUniversity of Amsterdam VernaDankersUniversity of Edinburgh - PreslavNakovMohamed bin Zayed University of Artificial Intelligence + PreslavNakovMohamed bin Zayed University of Artificial Intelligence EkaterinaShutovaUniversity of Amsterdam 472-489 Propaganda aims to persuade an audience by appealing to emotions and using faulty reasoning, with the purpose of promoting a particular point of view. Similarly, metaphor modifies the semantic frame, thus eliciting a response that can be used to tune up or down the emotional volume of the message. Given the close relationship between them, we hypothesize that, when modeling them computationally, it can be beneficial to do so jointly. In particular, we perform multi-task learning with propaganda identification as the main task and metaphor detection as an auxiliary task. To the best of our knowledge, this is the first work that models metaphor and propaganda together. We experiment with two datasets for identifying propaganda techniques in news articles and in memes shared on social media. We find that leveraging metaphor improves model performance, particularly for the two most common propaganda techniques: loaded language and name-calling. @@ -517,7 +517,7 @@ FengNanAws Ai NicholasDingwallAmazon AI Labs William YangWangAmazon AWS AI Labs - KathleenMcKeownColumbia University and Amazon (Amazon Scholar) + KathleenMcKeownColumbia University and Amazon (Amazon Scholar) 512-525 Missing information is a common issue of dialogue summarization where some information in the reference summaries is not covered in the generated summaries. To address this issue, we propose to utilize natural language inference (NLI) models to improve coverage while avoiding introducing factual inconsistencies. Specifically, we use NLI to compute fine-grained training signals to encourage the model to generate content in the reference summaries that have not been covered, as well as to distinguish between factually consistent and inconsistent generated sentences. Experiments on the DialogSum and SAMSum datasets confirm the effectiveness of the proposed approach in balancing coverage and faithfulness, validated with automatic metrics and human evaluations. Additionally, we compute the correlation between commonly used automatic metrics with human judgments in terms of three different dimensions regarding coverage and factual consistency to provide insight into the most suitable metric for evaluating dialogue summaries. 2023.findings-eacl.37 @@ -616,7 +616,7 @@ Abstractive Document Summarization with Summary-length Prediction JingunKwonTokyo Institute of Technology, Naver Corporation HidetakaKamigaitoNara Institute of Science and Technology - ManabuOkumuraTokyo Institute of Technology + ManabuOkumuraTokyo Institute of Technology 618-624 Recently, we can obtain a practical abstractive document summarization model by fine-tuning a pre-trained language model (PLM). Since the pre-training for PLMs does not consider summarization-specific information such as the target summary length, there is a gap between the pre-training and fine-tuning for PLMs in summarization tasks. To fill the gap, we propose a method for enabling the model to understand the summarization-specific information by predicting the summary length in the encoder and generating a summary of the predicted length in the decoder in fine-tuning. Experimental results on the WikiHow, NYT, and CNN/DM datasets showed that our methods improve ROUGE scores from BART by generating summaries of appropriate lengths. Further, we observed about 3.0, 1,5, and 3.1 point improvements for ROUGE-1, -2, and -L, respectively, from GSum on the WikiHow dataset. Human evaluation results also showed that our methods improve the informativeness and conciseness of summaries. 2023.findings-eacl.45 @@ -630,9 +630,9 @@ JingunKwonTokyo Institute of Technology, Naver Corporation HidetakaKamigaitoNara Institute of Science and Technology Young-InSongNaver - ManabuOkumuraTokyo Institute of Technology + ManabuOkumuraTokyo Institute of Technology 625-632 - + 2023.findings-eacl.46 kwon-etal-2023-hierarchical JisuShinKorea Advanced Institute of Science & Technology and Korea Advanced Institute of Science & Technology SukminChoKorea Advanced Institute of Science and Technology SeungYoonHan - Jong C.ParkKorea Advanced Institute of Science and Technology + Jong C.ParkKorea Advanced Institute of Science and Technology 11670-11686 Trolling in online communities typically involves disruptive behaviors such as provoking anger and manipulating discussions, leading to a polarized atmosphere and emotional distress. Robust moderation is essential for mitigating these negative impacts and maintaining a healthy and constructive community atmosphere. However, effectively addressing trolls is difficult because their behaviors vary widely and require different response strategies (RSs) to counter them. This diversity makes it challenging to choose an appropriate RS for each specific situation.To address this challenge, our research investigates whether humans have preferred strategies tailored to different types of trolling behaviors.Our findings reveal a correlation between the types of trolling encountered and the preferred RS. In this paper, we introduce a methodology for generating counter-responses to trolls by recommending appropriate RSs, supported by a dataset aligning these strategies with human preferences across various troll contexts. The experimental results demonstrate that our proposed approach guides constructive discussion and reduces the negative effects of trolls, thereby enhancing the online community environment. 2024.findings-emnlp.683 @@ -28790,7 +28790,7 @@ hai-coaching/
Soda-Eval: Open-Domain Dialogue Evaluation in the age of <fixed-case>LLM</fixed-case>s JohnMendonçaInstituto Superior Técnico IsabelTrancosoInstituto Superior Técnico - AlonLaviePhrase and School of Computer Science, Carnegie Mellon University + AlonLaviePhrase and School of Computer Science, Carnegie Mellon University 11687-11708 Although human evaluation remains the gold standard for open-domain dialogue evaluation, the growing popularity of automated evaluation using Large Language Models (LLMs) has also extended to dialogue. However, most frameworks leverage benchmarks that assess older chatbots on aspects such as fluency and relevance, which are not reflective of the challenges associated with contemporary models. In fact, a qualitative analysis on Soda. (Kim et al., 2023), a GPT-3.5 generated dialogue dataset, suggests that current chatbots may exhibit several recurring issues related to coherence and commonsense knowledge, but generally produce highly fluent and relevant responses.Noting the aforementioned limitations, this paper introduces Soda-Eval, an annotated dataset based on Soda that covers over 120K turn-level assessments across 10K dialogues, where the annotations were generated by GPT-4. Using Soda-Eval as a benchmark, we then study the performance of several open-access instruction-tuned LLMs, finding that dialogue evaluation remains challenging. Fine-tuning these models improves performance over few-shot inferences, both in terms of correlation and explanation. 2024.findings-emnlp.684 @@ -28868,7 +28868,7 @@ hai-coaching/
<fixed-case>CSLM</fixed-case>: A Framework for Question Answering Dataset Generation through Collaborative Small Language Models YimingWangNoah’s Ark Lab, Huawei Technologies Ltd. - YangLiuHuawei Technologies Ltd. + YangLiuHuawei Technologies Ltd. LingchenWangHuawei Technologies Ltd. AnXiaoHuawei Technologies Ltd. 11816-11825 @@ -28907,7 +28907,7 @@ hai-coaching/
FuliFengUniversity of Science and Technology of China FengbinZhu QifanWangMeta AI - Tat-SengChuaNational University of Singapore + Tat-SengChuaNational University of Singapore 11858-11875 Self-detection for Large Language Models (LLMs) seeks to evaluate the trustworthiness of the LLM’s output by leveraging its own capabilities, thereby alleviating the issue of output hallucination. However, existing self-detection approaches only retrospectively evaluate answers generated by LLM, typically leading to the over-trust in incorrectly generated answers. To tackle this limitation, we propose a novel self-detection paradigm that considers the comprehensive answer space beyond LLM-generated answers. It thoroughly compares the trustworthiness of multiple candidate answers to mitigate the over-trust in LLM-generated incorrect answers. Building upon this paradigm, we introduce a two-step framework, which firstly instructs LLM to reflect and provide justifications for each candidate answer, and then aggregates the justifications for comprehensive target answer evaluation. This framework can be seamlessly integrated with existing approaches for superior self-detection. Extensive experiments on six datasets spanning three tasks demonstrate the effectiveness of the proposed framework. 2024.findings-emnlp.693 @@ -28969,7 +28969,7 @@ hai-coaching/
ChengweiWei ZhengyuanLiuI2R GeyuLinInstitute of Infocomm Research, A*STAR - Nancy F.Chen + Nancy F.Chen 11939-11950 As the rapidly advancing domain of natural language processing (NLP), large language models (LLMs) have emerged as powerful tools for interpreting human commands and generating text across various tasks. Nonetheless, the resilience of LLMs to handle text containing inherent errors, stemming from human interactions and collaborative systems, has not been thoroughly explored. Our study investigates the resilience of LLMs against five common types of disruptions including 1) ASR (Automatic Speech Recognition) errors, 2) OCR (Optical Character Recognition) errors, 3) grammatical mistakes, 4) typographical errors, and 5) distractive content. We aim to investigate how these models react by deliberately embedding these errors into instructions. Our findings reveal that while some LLMs show a degree of resistance to certain types of noise, their overall performance significantly suffers. This emphasizes the importance of further investigation into enhancing model resilience. In response to the observed decline in performance, our study also evaluates a “re-pass” strategy, designed to purify the instructions of noise before the LLMs process them. Our analysis indicates that correcting noisy instructions, particularly for open-source LLMs, presents significant challenges. 2024.findings-emnlp.697 @@ -29076,7 +29076,7 @@ hai-coaching/
The Shape of Word Embeddings: Quantifying Non-Isometry with Topological Data Analysis OndřejDraganov - StevenSkienaState University of New York - Stony Brook, Stony Brook University, SUNY at Stony Brook, , State University of New York at Stony Brook and State University of New York at Stony Brook + StevenSkienaState University of New York - Stony Brook, Stony Brook University, SUNY at Stony Brook, , State University of New York at Stony Brook and State University of New York at Stony Brook 12080-12099 Word embeddings represent language vocabularies as clouds of d-dimensional points. We investigate how information is conveyed by the general shape of these clouds, instead of representing the semantic meaning of each token. Specifically, we use the notion of persistent homology from topological data analysis (TDA) to measure the distances between language pairs from the shape of their unlabeled embeddings. These distances quantify the degree of non-isometry of the embeddings. To distinguish whether these differences are random training errors or capture real information about the languages, we use the computed distance matrices to construct language phylogenetic trees over 81 Indo-European languages. Careful evaluation shows that our reconstructed trees exhibit strong and statistically-significant similarities to the reference. 2024.findings-emnlp.705 @@ -29176,7 +29176,7 @@ hai-coaching/
DilshodAzizovMohamed bin Zayed University of Artificial Intelligence Zain MuhammadMujahid HilalAlQuabehMohamed bin Zayed University of Artificial Intelligence - PreslavNakovMohamed bin Zayed University of Artificial Intelligence + PreslavNakovMohamed bin Zayed University of Artificial Intelligence ShangsongLiangSUN YAT-SEN UNIVERSITY 12217-12231 In an era where information is quickly shared across many cultural and language contexts, the neutrality and integrity of news media are essential. Ensuring that media content remains unbiased and factual is crucial for maintaining public trust. With this in mind, we introduce SAFARI (CroSs-lingual BiAs and Factuality Detection in News MediA and News ARtIcles), a novel corpus of news media and articles for predicting political bias and the factuality of reporting in a multilingual and cross-lingual setup. To the best of our knowledge, this corpus is unprecedented in its collection and introduces a dataset for political bias and factuality for three tasks: (i) media-level, (ii) article-level, and (iii) joint modeling at the article-level. At the media and article levels, we evaluate the cross-lingual ability of the models; however, in joint modeling, we evaluate on English data. Our frameworks set a new benchmark in the cross-lingual evaluation of political bias and factuality. This is achieved through the use of various Multilingual Pre-trained Language Models (MPLMs) and Large Language Models (LLMs) coupled with ensemble learning methods. @@ -29243,7 +29243,7 @@ hai-coaching/ ShehzaadDhuliawalaSwiss Federal Institute of Technology YahangQi BernhardSchölkopfELLIS Institute and Max Planck Institute for Intelligent Systems, Max-Planck Institute - RadaMihalceaUniversity of Michigan + RadaMihalceaUniversity of Michigan MrinmayaSachanSwiss Federal Institute of Technology 12309-12325 Implicit Personalization (IP) is a phenomenon of language models inferring a user’s background from the implicit cues in the input prompts and tailoring the response based on this inference. While previous work has touched upon various instances of this problem, there lacks a unified framework to study this behavior. This work systematically studies IP through a rigorous mathematical formulation, a multi-perspective moral reasoning framework, and a set of case studies. Our theoretical foundation for IP relies on a structural causal model and introduces a novel method, indirect intervention, to estimate the causal effect of a mediator variable that cannot be directly intervened upon. Beyond the technical approach, we also introduce a set of moral reasoning principles based on three schools of moral philosophy to study when IP may or may not be ethically appropriate. Equipped with both mathematical and ethical insights, we present three diverse case studies illustrating the varied nature of the IP problem and offer recommendations for future research. @@ -29257,7 +29257,7 @@ hai-coaching/ When the Misidentified Adverbial Phrase Functions as a Complement YigeChenThe Chinese University of Hong Kong KyuwonKimSeoul National University - KyungTaeLimSeoul National University of Science and Technology + KyungTaeLimSeoul National University of Science and Technology JungyeulParkUniversity of British Columbia ChulwooParkAnyang University 12326-12336 @@ -29333,7 +29333,7 @@ hai-coaching/ WilliamBrandonMassachusetts Institute of Technology RadostinCholakovComputer Science and Artificial Intelligence Laboratory, Electrical Engineering & Computer Science JonathanRagan-KelleyMassachusetts Institute of Technology - Eric P.XingMohamed bin Zayed Univeristy of AI and School of Computer Science, Carnegie Mellon University + Eric P.XingMohamed bin Zayed Univeristy of AI and School of Computer Science, Carnegie Mellon University YoonKimMassachusetts Institute of Technology 12419-12433 The deployment of large language models (LLMs) is often constrained by memory bandwidth, where the primary bottleneck is the cost of transferring model parameters from the GPU’s global memory to its registers. When coupled with custom kernels that fuse the dequantization and matmul operations, weight-only quantization can thus enable faster inference by reducing the amount of memory movement. However, developing high-performance kernels for weight-quantized LLMs presents substantial challenges, especially when the weights are compressed to non-evenly-divisible bit widths (e.g., 3 bits) with non-uniform, lookup table (LUT) quantization. This paper describes FLUTE, a flexible lookup table engine for LUT-quantized LLMs, which uses offline restructuring of the quantized weight matrix to minimize bit manipulations associated with unpacking, and vectorization and duplication of the lookup table to mitigate shared memory bandwidth constraints. At batch sizes < 32 and quantization group size of 128 (typical in LLM inference), the FLUTE kernel can be 2-4x faster than existing GEMM kernels. As an application of FLUTE, we explore a simple extension to lookup table-based NormalFloat quantization and apply it to quantize LLaMA3 to various configurations, obtaining competitive quantization performance against strong baselines while obtaining an end-to-end throughput increase of 1.5 to 2 times. @@ -29431,7 +29431,7 @@ hai-coaching/ Diverse and Effective Synthetic Data Generation for Adaptable Zero-Shot Dialogue State Tracking James D.FinchEmory University - Jinho D.ChoiEmory University + Jinho D.ChoiEmory University 12527-12544 We demonstrate substantial performance gains in zero-shot dialogue state tracking (DST) by enhancing training data diversity through synthetic data generation.Existing DST datasets are severely limited in the number of application domains and slot types they cover due to the high costs of data collection, restricting their adaptability to new domains.This work addresses this challenge with a novel, fully automatic data generation approach that creates synthetic zero-shot DST datasets.Distinguished from previous methods, our approach can generate dialogues across a massive range of application domains, complete with silver-standard dialogue state annotations and slot descriptions.This technique is used to create the D0T dataset for training zero-shot DST models, encompassing an unprecedented 1,000+ domains. Experiments on the MultiWOZ benchmark show that training models on diverse synthetic data improves Joint Goal Accuracy by 6.7%, achieving results competitive with models 13.5 times larger than ours. 2024.findings-emnlp.731 @@ -29480,7 +29480,7 @@ hai-coaching/ RuobingXie WenqiSunRenmin University of China LeyuLinWeChat, Tencent - XinZhaoRenmin University of China + XinZhaoRenmin University of China Ji-RongWenRenmin University of China 12580-12592 With recommender systems broadly deployed in various online platforms, many efforts have been devoted to learning user preferences and building effective sequential recommenders. However, existing work mainly focuses on capturing user implicit preferences from historical interactions and simply matching them with the next behavior, instead of predicting user explicit intentions. This may lead to inappropriate recommendations. In light of this issue, we propose the adversarial user intention learning approach for sequential recommendaiton, named AuriSRec. The major novelty of our approach is to explicitly predict user current intentions when making recommendations, by inferring their decision-making process as explained in target reviews (reviews written after interacting with the ground-truth item). Specifically, AuriSRec conducts adversarial learning between an intention generator and a discriminator. The generator predicts user intentions by taking their historical reviews and behavioral sequences as inputs, while target reviews provide guidance. Beyond typical sequential modeling methods in the field of natural language process (NLP), a decoupling-based review encoder and a hybrid attention fusion mechanism are introduced to filter noise and enhance the generation capacity. On the other hand, the discriminator determines whether the intention is generated or real based on their matching degree to the target item, thereby guiding the generator to produce gradually improved intentions. Extensive experiments on five real-world datasets demonstrate the effectiveness of our approach. @@ -29588,7 +29588,7 @@ hai-coaching/ David S.Batistadeepset ChristinaWille AoifeCahillDataminr - Joel R.Tetreault + Joel R.Tetreault AlejandroJaimesDataminr 12705-12722 Humanitarian organizations can enhance their effectiveness by analyzing data to discover trends, gather aggregated insights, manage their security risks, support decision-making, and inform advocacy and funding proposals. However, data about violent incidents with direct impact and relevance for humanitarian aid operations is not readily available. An automatic data collection and NLP-backed classification framework aligned with humanitarian perspectives can help bridge this gap. In this paper, we present HumVI – a dataset comprising news articles in three languages (English, French, Arabic) containing instances of different types of violent incidents categorized by the humanitarian sector they impact, e.g., aid security, education, food security, health, and protection. Reliable labels were obtained for the dataset by partnering with a data-backed humanitarian organization, Insecurity Insight. We provide multiple benchmarks for the dataset, employing various deep learning architectures and techniques, including data augmentation and mask loss, to address different task-related challenges, e.g., domain expansion. The dataset is publicly available at https://github.com/dataminr-ai/humvi-dataset. @@ -29673,7 +29673,7 @@ hai-coaching/ <fixed-case>SQFT</fixed-case>: Low-cost Model Adaptation in Low-precision Sparse Foundation Models - Juan PabloMunozIntel + Juan PabloMunozIntel JinjieYuanIntel NileshJainIntel 12817-12832 @@ -29751,7 +29751,7 @@ hai-coaching/ Multi-Target Cross-Lingual Summarization: a novel task and a language-neutral approach DiogoPernesOutSystems and Universidade do Porto Gonçalo M.CorreiaPriberam Informática SA - AfonsoMendes + AfonsoMendes 12908-12924 Cross-lingual summarization aims to bridge language barriers by summarizing documents in different languages. However, ensuring semantic coherence across languages is an overlooked challenge and can be critical in several contexts. To fill this gap, we introduce multi-target cross-lingual summarization as the task of summarizing a document into multiple target languages while ensuring that the produced summaries are semantically similar. We propose a principled re-ranking approach to this problem and a multi-criteria evaluation protocol to assess semantic coherence across target languages, marking a first step that will hopefully stimulate further research on this problem. 2024.findings-emnlp.755 @@ -29962,7 +29962,7 @@ hai-coaching/ What Matters in Memorizing and Recalling Facts? Multifaceted Benchmarks for Knowledge Probing in Language Models - XinZhao + XinZhao NaokiYoshinagaInstitute of Industrial Science, the University of Tokyo DaisukeObaELYZA 13186-13214 @@ -30035,7 +30035,7 @@ hai-coaching/ Exploring the Potential of Multimodal <fixed-case>LLM</fixed-case> with Knowledge-Intensive Multimodal <fixed-case>ASR</fixed-case> MinghanWangMonash University YuxiaWang - Thuy-TrangVuMonash University + Thuy-TrangVuMonash University EhsanShareghiMonash University and University of Cambridge RezaHafMonash University 13274-13288 @@ -30051,7 +30051,7 @@ hai-coaching/ SewoongOh LudwigSchmidtStanford University Jason EWestonNew York University and Facebook - LukeZettlemoyerUniversity of Washington, Facebook and Meta + LukeZettlemoyerUniversity of Washington, Facebook and Meta XianLiFacebook AI 13289-13308 We propose a new method, instruction back-and-forth translation, to improve the quality of instruction-tuning data used for aligning large language models (LLMs). Given preprocessed texts from an initial web corpus (e.g. Dolma (Soldaini et al., 2024)), we generate synthetic instructions using the backtranslation approach proposed by Li et al., (2023), filter the generated data and rewrite the responses to improve their quality further based on the initial texts. Given similar quantities of instructions, fine-tuning Llama-2 on our (synthetic instruction, rewritten response) pairs yields better AlpacaEval win rates than using other common instruction datasets such as Humpback, ShareGPT, Open Orca, Alpaca-GPT4 and Self-instruct, at both 7B and 70B parameter scales. We also demonstrate that rewriting the responses with an LLM is different from direct distillation: the former process yields better win rate at 70B scale, and the two text distributions exhibit significant distinction in the embedding space. Besides, we provide analyses showing that our backtranslated instructions are of higher quality than other sources of synthetic instructions, while our responses are more diverse and complex than what can be obtained from distillation. Overall we find that instruction back-and-forth translation combines the best of both worlds—making use of the information diversity and quantity found on the web, while ensuring the quality of the responses which is necessary for effective alignment. @@ -30065,7 +30065,7 @@ hai-coaching/ ZhaoJin SiddharthParekh SameenaShahJ.P. Morgan Chase - CarolynRoseSchool of Computer Science, Carnegie Mellon University + CarolynRoseSchool of Computer Science, Carnegie Mellon University 13309-13328 Forms constitute a large portion of layout-rich documents that convey information through key-value pairs. Form understanding involves two main tasks, namely, the identification of keys and values (a.k.a Key Information Extraction or KIE) and the association of keys to corresponding values (a.k.a. Relation Extraction or RE). State of the art models for form understanding often rely on training paradigms that yield poorly calibrated output probabilities and low performance on RE. In this paper, we present AliGATr, a graph-based model that uses a generative objective to represent complex grid-like layouts that are often found in forms. Using a grid-based graph topology, our model learns to generate the layout of each page token by token in a data efficient manner. Despite using 30% fewer parameters than the smallest SotA, AliGATr performs on par with or better than SotA models on the KIE and RE tasks against four datasets. We also show that AliGATr’s output probabilities are better calibrated and do not exhibit the over-confident distributions of other SotA models. 2024.findings-emnlp.778 @@ -30110,7 +30110,7 @@ hai-coaching/ AjayPatel KanishkSinghMoveworks ChrisCallison-BurchAllen Institute for Artificial Intelligence and University of Pennsylvania - KathleenMcKeown + KathleenMcKeown ZhouYuColumbia University 13376-13390 The goal of text style transfer is to transform the style of texts while preserving their original meaning, often with only a few examples of the target style. Existing style transfer methods generally rely on the few-shot capabilities of large language models or on complex controllable text generation approaches that are inefficient and underperform on fluency metrics. We introduce TinyStyler, a lightweight but effective approach, which leverages a small language model (800M params) and pre-trained authorship embeddings to perform efficient, few-shot text style transfer. We evaluate on the challenging task of authorship style transfer and find TinyStyler outperforms strong approaches such as GPT-4. We also evaluate TinyStyler’s ability to perform text attribute style transfer (formal \leftrightarrow informal) with automatic and human evaluations and find that the approach outperforms recent controllable text generation methods. @@ -30221,7 +30221,7 @@ hai-coaching/ Topic Modeling: Contextual Token Embeddings Are All You Need DimoAngelovUniversity of Ottawa - DianaInkpenUniversity of Ottawa + DianaInkpenUniversity of Ottawa 13528-13539 The goal of topic modeling is to find meaningful topics that capture the information present in a collection of documents. The main challenges of topic modeling are finding the optimal number of topics, labeling the topics, segmenting documents by topic, and evaluating topic model performance. Current neural approaches have tackled some of these problems but none have been able to solve all of them. We introduce a novel topic modeling approach, Contextual-Top2Vec, which uses document contextual token embeddings, it creates hierarchical topics, finds topic spans within documents and labels topics with phrases rather than just words. We propose the use of BERTScore to evaluate topic coherence and to evaluate how informative topics are of the underlying documents. Our model outperforms the current state-of-the-art models on a comprehensive set of topic model evaluation metrics. 2024.findings-emnlp.790 @@ -30241,7 +30241,7 @@ hai-coaching/ Margin Matching Preference Optimization: Enhanced Model Alignment with Granular Feedback - KyuyoungKimKorea Advanced Institute of Science & Technology + KyuyoungKimKorea Advanced Institute of Science & Technology Ah JeongSeoKorea Advanced Institute of Science & Technology HaoLiuGoogle DeepMind JinwooShinKorea Advanced Institute of Science & Technology @@ -30323,7 +30323,7 @@ hai-coaching/ Reference-based Metrics Disprove Themselves in Question Generation - BangNguyenUniversity of Notre Dame + BangNguyenUniversity of Notre Dame MengxiaYuUniversity of Notre Dame YunHuangUniversity of Illinois at Urbana-Champaign MengJiangUniversity of Notre Dame @@ -30461,7 +30461,7 @@ hai-coaching/ <fixed-case>TOWER</fixed-case>: Tree Organized Weighting for Evaluating Complex Instructions NoahZiems - ZhihanZhang + ZhihanZhang MengJiangUniversity of Notre Dame 13803-13810 Evaluating the ability of large language models (LLMs) to follow complex human-written instructions is essential for their deployment in real-world applications. While benchmarks like Chatbot Arena use human judges to assess model performance, they are resource-intensive and time-consuming. Alternative methods using LLMs as judges, such as AlpacaEval, MT Bench, WildBench, and InFoBench offer improvements but still do not capture that certain complex instruction aspects are more important than others to follow.To address this gap, we propose a novel evaluation metric, TOWER, that incorporates human-judged importance into the assessment of complex instruction following. We show that human annotators agree with tree-based representations of these complex instructions nearly as much as they agree with other human annotators. We release tree-based annotations of the InFoBench dataset and the corresponding evaluation code to facilitate future research. @@ -30570,7 +30570,7 @@ hai-coaching/ XingyuanLi ChunhaoZhang MengyueWu - Kenny Q.ZhuUniversity of Texas at Arlington + Kenny Q.ZhuUniversity of Texas at Arlington 13972-13983 This paper attempts to discover communication patterns automatically within dog vocalizations in a data-driven approach, which breaks the barrier previous approaches that rely on human prior knowledge on limited data. We present a self-supervised approach with HuBERT, enabling the accurate classification of phones, and an adaptive grammar induction method that identifies phone sequence patterns that suggest a preliminary vocabulary within dog vocalizations. Our results show that a subset of this vocabulary has substantial causality relations with certain canine activities, suggesting signs of stable semantics associated with these “words”. 2024.findings-emnlp.816 @@ -30651,7 +30651,7 @@ hai-coaching/ Jainit SushilBafna KunalKartik HarshitaKhandelwalUCLA Computer Science Department, University of California, Los Angeles - ManishShrivastavaInternational Institute of Information Technology Hyderabad, India + ManishShrivastavaInternational Institute of Information Technology Hyderabad, India VivekGuptaUniversity of Pennsylvania, United States MohitBansalUniversity of North Carolina at Chapel Hill DanRothUniversity of Pennsylvania @@ -30705,7 +30705,7 @@ hai-coaching/ WenCui DavanHarrison Xin EricWangUniversity of California, Santa Cruz - MarilynWalkerUniversity of California, Santa Cruz + MarilynWalkerUniversity of California, Santa Cruz 14120-14157 Large language models (LLMs) capable of casual conversation have recently become widely available. We hypothesize that users of conversational systems want a more personalized experience, and existing work shows that users are highly receptive to personalized questions (PQs). Question Generation tasks, however, focus on factual questions from textual excerpts. To create a PQ generator, we first identify over 400 real user interests by anonymously aggregating ~39K user models. We then populate prompt templates with these 400 interests and use an LLM to generate PQs customized to user interests. The result is PerQs, a novel corpus of ~19K question/answer pairs. We evaluate PerQs at scale in the unique context of the Alexa Prize. Our results show significant positive effects on perceived conversation quality. We then fine-tune, deploy, and evaluate PerQy, a neural model that generates PQs in real-time. When evaluated against several competitive LLM baselines, PerQy produced the most natural and engaging responses. 2024.findings-emnlp.826 @@ -30769,7 +30769,7 @@ hai-coaching/ AdityaPillai IsabelleAugensteinUniversity of Copenhagen IrynaGurevychInstitute for Computer Science, Artificial Intelligence and Technology, Mohamed bin Zayed University of Artificial Intelligence and Technische Universität Darmstadt - PreslavNakovMohamed bin Zayed University of Artificial Intelligence + PreslavNakovMohamed bin Zayed University of Artificial Intelligence 14199-14230 The increased use of large language models (LLMs) across a variety of real-world applications calls for mechanisms to verify the factual accuracy of their outputs. In this work, we present Factcheck-Bench, a holistic end-to-end framework for annotating and evaluating the factuality of LLM-generated responses, which encompasses a multi-stage annotation scheme designed to yield detailed labels for fact-checking and correcting not just the final prediction, but also the intermediate steps that a fact-checking system might need to take. Based on this framework, we construct an open-domain factuality benchmark in three-levels of granularity: claim, sentence, and document. We further propose a system, Factcheck-GPT, which follows our framework, and we show that it outperforms several popular LLM fact-checkers. We make our annotation tool, annotated data, benchmark, and code available at https://github.com/yuxiaw/Factcheck-GPT. 2024.findings-emnlp.830 @@ -30782,8 +30782,8 @@ hai-coaching/ Md AsibRahmanBangladesh University of Engineering and Technology K S M TozammelHossainUniversity of North Texas EnamulHoqueYork University - ShafiqJotySalesForce.com and Nanyang Technological University - Md RizwanParvezQatar Computing Research Institute and Bosch + ShafiqJotySalesForce.com and Nanyang Technological University + Md RizwanParvezQatar Computing Research Institute and Bosch 14231-14244 Retrieval Augmented Generation (RAG) has been shown to enhance the factual accuracy of Large Language Models (LLMs) by providing external evidence, but existing methods often suffer from limited reasoning capabilities (e.g., multi-hop complexities) in effectively using such evidence, particularly when using open-source LLMs. To mitigate this gap, in this paper, we introduce a novel framework, **Open-RAG**, designed to enhance reasoning capabilities in RAG with open-source LLMs. Our framework transforms an arbitrary dense LLM into a parameter-efficient sparse mixture of experts (MoE) model capable of handling complex reasoning tasks, including both single- and multi-hop queries. Open-RAG uniquely trains the model to navigate challenging distractors that appear relevant but are misleading. By combining the constructive learning and architectural transformation, Open-RAG leverages latent learning, dynamically selecting relevant experts and integrating external knowledge effectively for more accurate and contextually relevant responses. Additionally, we propose a hybrid adaptive retrieval method to determine retrieval necessity and balance the trade-off between performance gain and inference speed. Experimental results show that Open-RAG outperforms state-of-the-art LLMs and RAG models in various knowledge-intensive tasks. Our method based on Llama2-7B sets new benchmarks, surpassing ChatGPT-RAG and Self-RAG. For example, in multi-hop HotpotQA, it achieves an EM score of 63.3, compared to RAG 2.0’s 54 and Command R+’s 60. 2024.findings-emnlp.831 @@ -30793,7 +30793,7 @@ hai-coaching/ Cactus: Towards Psychological Counseling Conversations using Cognitive Behavioral Theory SuyeonLeeYonsei University - SunghwanKim + SunghwanKim MinjuKim DongjinKangYonsei University DongilYang @@ -30847,7 +30847,7 @@ hai-coaching/ SvitlanaVakulenkoAmazon Ionut TeodorSorodocAmazon BillByrneAmazon and University of Cambridge - Adriàde GispertAmazon + Adriàde GispertAmazon 14301-14310 Long-form question answering (LFQA) aims at generating in-depth answers to end-user questions, providing relevant information beyond the direct answer. However, existing retrievers are typically optimized towards information that directly targets the question, missing out on such contextual information. Furthermore, there is a lack of training data for relevant context. To this end, we propose and compare different weak supervision techniques to optimize retrieval for contextual information. Experiments demonstrate improvements on the end-to-end QA performance on ASQA, a dataset for long-form question answering. Importantly, as more contextual information is retrieved, we improve the relevant page recall for LFQA by 14.7% and the groundedness of generated long-form answers by 12.5%. Finally, we show that long-form answers often anticipate likely follow-up questions, via experiments on a conversational QA dataset. 2024.findings-emnlp.835 @@ -30857,7 +30857,7 @@ hai-coaching/ Persuasiveness of Generated Free-Text Rationales in Subjective Decisions: A Case Study on Pairwise Argument Ranking MohamedElaraby - DianeLitmanUniversity of Pittsburgh, University of Pittsburgh and University of Pittsburgh + DianeLitmanUniversity of Pittsburgh, University of Pittsburgh and University of Pittsburgh Xiang LorraineLi AhmedMagoodaMicrosoft 14311-14329 @@ -30918,7 +30918,7 @@ hai-coaching/ How You Prompt Matters! <fixed-case>E</fixed-case>ven Task-Oriented Constraints in Instructions Affect <fixed-case>LLM</fixed-case>-Generated Text Detection RyutoKoikeUniversity of Pennsylvania and Mohamed bin Zayed University of Artificial Intelligence MasahiroKanekoMohamed bin Zayed University of Artificial Intelligence and Tokyo Institute of Technology, Tokyo Institute of Technology - NaoakiOkazakiTokyo Institute of Technology + NaoakiOkazakiTokyo Institute of Technology 14384-14395 To combat the misuse of Large Language Models (LLMs), many recent studies have presented LLM-generated-text detectors with promising performance. When users instruct LLMs to generate texts, the instruction can include different constraints depending on the user’s need. However, most recent studies do not cover such diverse instruction patterns when creating datasets for LLM detection. In this paper, we reveal that even task-oriented constraints — constraints that would naturally be included in an instruction and are not related to detection-evasion — cause existing powerful detectors to have a large variance in detection performance. We focus on student essay writing as a realistic domain and manually create task-oriented constraints based on several factors for essay quality. Our experiments show that the standard deviation (SD) of current detector performance on texts generated by an instruction with such a constraint is significantly larger (up to an SD of 14.4 F1-score) than that by generating texts multiple times or paraphrasing the instruction. We also observe an overall trend where the constraints can make LLM detection more challenging than without them. Finally, our analysis indicates that the high instruction-following ability of LLMs fosters the large impact of such constraints on detection performance. 2024.findings-emnlp.841 @@ -30933,7 +30933,7 @@ hai-coaching/ SiyaoPengLudwig-Maximilians-Universität München RobertLitschko AnnaKorhonenUniversity of Cambridge - BarbaraPlankLudwig-Maximilians-Universität München and IT University of Copenhagen + BarbaraPlankLudwig-Maximilians-Universität München and IT University of Copenhagen 14396-14419 Human label variation (HLV) is a valuable source of information that arises when multiple human annotators provide different labels for valid reasons. In Natural Language Inference (NLI) earlier approaches to capturing HLV involve either collecting annotations from many crowd workers to represent human judgment distribution (HJD) or use expert linguists to provide detailed explanations for their chosen labels. While the former method provides denser HJD information, obtaining it is resource-intensive. In contrast, the latter offers richer textual information but it is challenging to scale up to many human judges. Besides, large language models (LLMs) are increasingly used as evaluators (“LLM judges”) but with mixed results, and few works aim to study HJDs. This study proposes to exploit LLMs to approximate HJDs using a small number of expert labels and explanations. Our experiments show that a few explanations significantly improve LLMs’ ability to approximate HJDs with and without explicit labels, thereby providing a solution to scale up annotations for HJD. However, fine-tuning smaller soft-label aware models with the LLM-generated model judgment distributions (MJDs) presents partially inconsistent results: while similar in distance, their resulting fine-tuned models and visualized distributions differ substantially. We show the importance of complementing instance-level distance measures with a global-level shape metric and visualization to more effectively evaluate MJDs against human judgment distributions. 2024.findings-emnlp.842 @@ -31010,7 +31010,7 @@ hai-coaching/ SilviaCasola Soda MaremLo ValerioBasileUniversity of Turin - AlessandroMazzeiUniversity of Turin + AlessandroMazzeiUniversity of Turin 14480-14494 Generating ironic content is challenging: it requires a nuanced understanding of context and implicit references and balancing seriousness and playfulness. Moreover, irony is highly subjective and can depend on various factors, such as social, cultural, or generational aspects. This paper explores whether Large Language Models (LLMs) can learn to generate ironic responses to social media posts. To do so, we fine-tune two models to generate ironic and non-ironic content and deeply analyze their outputs’ linguistic characteristics, their connection to the original post, and their similarity to the human-written replies. We also conduct a large-scale human evaluation of the outputs. Additionally, we investigate whether LLMs can learn a form of irony tied to a generational perspective, with mixed results. 2024.findings-emnlp.847 @@ -31034,7 +31034,7 @@ hai-coaching/ Minimal Yet Big Impact: How <fixed-case>AI</fixed-case> Agent Back-channeling Enhances Conversational Engagement through Conversation Persistence and Context Richness Jin YeaJangKorea Electronics Technology Institute - SaimShinKorea Electronics technology Institute + SaimShinKorea Electronics technology Institute GahgeneGweonSeoul National University 14509-14521 The increasing use of AI agents in conversational services, such as counseling, highlights the importance of back-channeling (BC) as an active listening strategy to enhance conversational engagement. BC improves conversational engagement by providing timely acknowledgments and encouraging the speaker to talk. This study investigates the effect of BC provided by an AI agent on conversational engagement, offering insights for future AI conversational service design. We conducted an experiment with 55 participants, divided into Todak_BC and Todak_NoBC groups based on the presence or absence of the BC feature in Todak, a conversational agent. Each participant engaged in nine sessions with predetermined subjects and questions. We collected and analyzed approximately 6 hours and 30 minutes of conversation logs to evaluate conversational engagement using both quantitative (conversation persistence, including conversation duration and number of utterances) and qualitative metrics (context richness, including self-disclosure and topic diversity). The findings reveal significantly higher conversational engagement in the Todak_BC group compared to the Todak_NoBC group across all metrics (p<0.05). Additionally, the impact of BC varies across sessions, suggesting that conversation characteristics such as question type and topic sensitivity can influence BC effectiveness. @@ -31109,7 +31109,7 @@ hai-coaching/ ShrutiSinghIIT Gandhinagar ShoaibAlam HusainMalwat - MayankSinghIndian Institute of Technology Gandhinagar + MayankSinghIndian Institute of Technology Gandhinagar 14598-14613 The ever-increasing volume of paper submissions makes it difficult to stay informed about the latest state-of-the-art research. To address this challenge, we introduce LEGOBench, a benchmark for evaluating systems that generate scientific leaderboards. LEGOBench is curated from 22 years of preprint submission data on arXiv and more than 11k machine learning leaderboards on the PapersWithCode portal. We present a language model-based and four graph-based leaderboard generation task configuration. We evaluate popular encoder-only scientific language models as well as decoder-only large language models across these task configurations. State-of-the-art models showcase significant performance gaps in automatic leaderboard generation on LEGOBench. The code is available on GitHub and the dataset is hosted on OSF. 2024.findings-emnlp.855 @@ -31135,9 +31135,9 @@ hai-coaching/ ZhenlinSuSouth China University of Technology MoYuWeChat AI, Tencent JinXu - Jinho D.ChoiEmory University + Jinho D.ChoiEmory University JieZhou - FeiLiuEmory University + FeiLiuEmory University 14626-14641 Factual inconsistencies pose a significant hurdle for the faithful summarization by generative models. While a major direction to enhance inconsistency detection is to derive stronger Natural Language Inference (NLI) models, we propose an orthogonal aspect that underscores the importance of incorporating task-specific taxonomy into the inference. To this end, we consolidate key error types of inconsistent facts in summaries, and incorporate them to facilitate both the zero-shot and supervised paradigms of LLMs. Extensive experiments on ten datasets of five distinct domains suggest that, zero-shot LLM inference could benefit from the explicit solution space depicted by the error type taxonomy, and achieves state-of-the-art performance overall, surpassing specialized non-LLM baselines, as well as recent LLM baselines. We further distill models that fuse the taxonomy into parameters through our designed prompt completions and supervised training strategies, efficiently substituting state-of-the-art zero-shot inference with much larger LLMs. 2024.findings-emnlp.857 @@ -31186,7 +31186,7 @@ hai-coaching/ Muhammad ArslanManzoor YuxiaWang MinghanWangMonash University - PreslavNakovMohamed bin Zayed University of Artificial Intelligence + PreslavNakovMohamed bin Zayed University of Artificial Intelligence 14683-14701 Empathy plays a pivotal role in fostering prosocial behavior, often triggered by the sharing of personal experiences through narratives. However, modeling empathy using NLP approaches remains challenging due to its deep interconnection with human interaction dynamics. Previous approaches, which involve fine-tuning language models (LMs) on human-annotated empathic datasets, have had limited success. In our pursuit of improving empathy understanding in LMs, we propose several strategies, including contrastive learning with masked LMs and supervised fine-tuning with large language models. While these methods show improvements over previous methods, the overall results remain unsatisfactory. To better understand this trend, we performed an analysis which reveals a low agreement among annotators. This lack of consensus hinders training and highlights the subjective nature of the task. We also explore the cultural impact on annotations. To study this, we meticulously collected story pairs in Urdu language and find that subjectivity in interpreting empathy among annotators appears to be independent of cultural background. Our systematic exploration of LMs’ understanding of empathy reveals substantial opportunities for further investigation in both task formulation and modeling. 2024.findings-emnlp.861 @@ -31198,7 +31198,7 @@ hai-coaching/ WitoldSosnowski ArkadiuszModzelewski KingaSkorupskaPolish-Japanese Institute of Information Technology in Warsaw - JahnaOtterbacherOpen University of Cyprus + JahnaOtterbacherOpen University of Cyprus AdamWierzbickiPolish-Japanese Institute of Information Technology in Warsaw 14702-14723 As narratives shape public opinion and influence societal actions, distinguishing between truthful and misleading narratives has become a significant challenge. To address this, we introduce the EU DisinfoTest, a novel benchmark designed to evaluate the efficacy of Language Models in identifying disinformation narratives. Developed through a Human-in-the-Loop methodology and grounded in research from EU DisinfoLab, the EU DisinfoTest comprises more than 1,300 narratives. Our benchmark includes persuasive elements under Logos, Pathos, and Ethos rhetorical dimensions. We assessed state-of-the-art LLMs, including the newly released GPT-4o, on their capability to perform zero-shot classification of disinformation narratives versus credible narratives. Our findings reveal that LLMs tend to regard narratives with authoritative appeals as trustworthy, while those with emotional appeals are frequently incorrectly classified as disinformative. These findings highlight the challenges LLMs face in nuanced content interpretation and suggest the need for tailored adjustments in LLM training to better handle diverse narrative structures. @@ -31285,7 +31285,7 @@ hai-coaching/ A Critical Look at Meta-evaluating Summarisation Evaluation Metrics - XiangDaiCSIRO + XiangDaiCSIRO SarvnazKarimiCSIRO BiaoyanFangCSIRO 14795-14808 @@ -31340,7 +31340,7 @@ hai-coaching/ JieChenRenmin University of China YupengZhangBeijing Baichuan Intelligence Technology Co., Ltd. BingningWangBaichuan Inc. - XinZhaoRenmin University of China + XinZhaoRenmin University of China Ji-RongWenRenmin University of China WeipengChen 14855-14865 @@ -31381,7 +31381,7 @@ hai-coaching/ Analyzing Context Contributions in <fixed-case>LLM</fixed-case>-based Machine Translation EmmanouilZaranisInstituto Superior Técnico Nuno MGuerreiroUnbabel and Instituto Superior Técnico - AndreMartinsInstituto Superior Técnico and Unbabel + AndreMartinsInstituto Superior Técnico and Unbabel 14899-14924 Large language models (LLMs) have achieved state-of-the-art performance in machine translation (MT) and demonstrated the ability to leverage in-context learning through few-shot examples. However, the mechanisms by which LLMs use different parts of the input context remain largely unexplored. In this work, we provide a comprehensive analysis of context utilization in MT, studying how LLMs use various context parts, such as few-shot examples and the source text, when generating translations. We highlight several key findings: (1) the source part of few-shot examples appears to contribute more than its corresponding targets, irrespective of translation direction; (2) finetuning LLMs with parallel data alters the contribution patterns of different context parts; and (3) there is a positional bias where earlier few-shot examples have higher contributions to the translated sequence. Finally, we demonstrate that inspecting anomalous context contributions can potentially uncover pathological translations, such as hallucinations. Our findings shed light on the internal workings of LLM-based MT which go beyond those known for standard encoder-decoder MT models. 2024.findings-emnlp.876 @@ -31592,7 +31592,7 @@ hai-coaching/ HelenaWuFaculty of Arts of the University of Lisbon BeatrizSilvaUnbabel Daan VanStigtUnbabel and Unbabel - AndreMartinsInstituto Superior Técnico and Unbabel + AndreMartinsInstituto Superior Técnico and Unbabel 15222-15239 While machine translation (MT) systems are achieving increasingly strong performance on benchmarks, they often produce translations with errors and anomalies. Understanding these errors can potentially help improve the translation quality and user experience. This paper introduces xTower, an open large language model (LLM) built on top of TowerBase designed to provide free-text explanations for translation errors in order to guide the generation of a corrected translation. The quality of the generated explanations by xTower are assessed via both intrinsic and extrinsic evaluation. We ask expert translators to evaluate the quality of the explanations across two dimensions: relatedness towards the error span being explained and helpfulness in error understanding and improving translation quality. Extrinsically, we test xTower across various experimental setups in generating translation corrections, demonstrating significant improvements in translation quality. Our findings highlight xTower’s potential towards not only producing plausible and helpful explanations of automatic translations, but also leveraging them to suggest corrected translations. 2024.findings-emnlp.892 @@ -31607,7 +31607,7 @@ hai-coaching/ YichengXuTokyo Institute of Technology, Tokyo Institute of Technology MingkunXu KotaroFunakoshiInstitute of Innovative Research, Tokyo Institute of Technology - ManabuOkumuraTokyo Institute of Technology, Tokyo Institute of Technology + ManabuOkumuraTokyo Institute of Technology, Tokyo Institute of Technology 15240-15253 Multi-modal machine translation (MMT) can reduce ambiguity and semantic distortion compared with traditional machine translation (MT) by utilizing auxiliary information such as images. However, current MMT methods face two primary challenges. The first is their underperformance compared to MT methods based on pre-trained models. The second is the inadequate exploitation and integration of the image modality within the model, primarily due to a lack of triplet training data. A mainstream approach is to introduce large amounts of parallel and monolingual data to train the text model and the visual model separately. However, incorporating extensive external data can result in data imbalance, which may introduce biases during training. Additionally, the collection and cleaning of such large datasets is labor-intensive. To overcome these challenges, we introduce a novel, low-cost, large language model-based data augmentation method called LAMBDA, which can enrich the original samples and expand the dataset without requiring external images and text. We propose a fine-grained image captioning module with a noise filter to hierarchically and accurately extract unexploited information from images. Additionally, we design two specific prompts to guide the GPT-3.5 model in generating enriched texts and the corresponding translations. The enriched samples contain diverse text and strong connections between text and images, leading to significant improvements for MMT baselines, with the highest being an increase of up to 3.83 BLEU score and 3.61 METEOR score. 2024.findings-emnlp.893 @@ -31636,7 +31636,7 @@ hai-coaching/ JonathanRoweNorth Carolina State University BradfordMottNorth Carolina State University SnigdhaChaturvediDepartment of Computer Science, University of North Carolina at Chapel Hill - JamesLesterNorth Carolina State University + JamesLesterNorth Carolina State University 15270-15283 Dialogue act recognition is the task of classifying conversational utterances based on their communicative intent or function. To address this problem, we propose a novel two-phase processing approach called Dual-Process Masking. This approach streamlines the task by masking less important tokens in the input, identified through retrospective analysis of their estimated contribution during training. It enhances interpretability by using the masks applied during classification learning. Dual-Process Masking significantly improves performance over strong baselines for dialogue act recognition on a collaborative problem-solving dataset and three public dialogue benchmarks. 2024.findings-emnlp.895 @@ -31653,7 +31653,7 @@ hai-coaching/ ValentinaZantedeschiServiceNow Research DavidVazquezServiceNow research NicolasChapadosServiceNow Research - ChristopherPalPolytechnique Montreal + ChristopherPalPolytechnique Montreal PerouzTaslakianServiceNow 15284-15302 Prompts are often employed to condition decoder-only language model generation on reference information. Just-in-time processing of a context is inefficient due to the quadratic cost of self-attention operations, and caching is desirable. However, caching transformer states can easily require almost as much space as the model parameters. When the right context is not known in advance, caching the prompt can be challenging. This work addresses these limitations by introducing models that, inspired by the encoder-decoder architecture, use cross-attention to condition generation on reference text without the prompt. More precisely, we leverage pre-trained decoder-only models and only train a small number of added layers. We use Question-Answering (QA) as a testbed to evaluate the ability of our models to perform conditional generation and observe that they outperform prompt-based inference methods, are comparable to fine-tuned prompted LLMs, and drastically reduce the space footprint relative to standard KV caching by two orders of magnitude. Specifically, we introduced XC-Llama which converts a pre-trained Llama 2 into an encoder-decoder architecture by integrating cross-attention layers interleaved in between existing self-attention layers. @@ -31781,7 +31781,7 @@ hai-coaching/ Knowledge-Centric Templatic Views of Documents Isabel AlyssaCacholaDepartment of Computer Science, Whiting School of Engineering - SilviuCucerzanMicrosoft + SilviuCucerzanMicrosoft AllenHerring VuksanMijovic ErikOveson @@ -31838,7 +31838,7 @@ hai-coaching/ XinshuShen HongyiWu YadongZhang - ManLan + ManLan XiaopengBaiEast China Normal University ShaoguangMaoMicrosoft YuanbinWu @@ -31900,7 +31900,7 @@ hai-coaching/ Merge to Learn: Efficiently Adding Skills to Language Models with Model Merging JacobMorrisonAllen Institute for Artificial Intelligence - Noah A.SmithUniversity of Washington and Allen Institute for Artificial Intelligence + Noah A.SmithUniversity of Washington and Allen Institute for Artificial Intelligence HannanehHajishirziUniversity of Washington, University of Washington, Allen Institute for Artificial Intelligence and University of Washington, Seattle Pang WeiKohUniversity of Washington JesseDodgeAllen Institute for Artificial Intelligence @@ -32042,7 +32042,7 @@ hai-coaching/ TianyangLiuEdinburgh University, University of Edinburgh TianyiLi LiangCheng - MarkSteedmanUniversity of Edinburgh + MarkSteedmanUniversity of Edinburgh 15779-15786 Large Language Models (LLMs) are reported to hold undesirable attestation bias on inference tasks: when asked to predict if a premise P entails a hypothesis H, instead of considering H‘s conditional truthfulness entailed by P, LLMs tend to use the out-of-context truth label of H as a fragile proxy. In this paper, we propose a pipeline that exploits this bias to do explicit inductive inference. Our pipeline uses an LLM to transform a premise into a set of attested alternatives, and then aggregate answers of the derived new entailment inquiries to support the original inference prediction. On a directional predicate entailment benchmark, we demonstrate that by applying this simple pipeline, we can improve the overall performance of LLMs on inference and substantially alleviate the impact of their attestation bias. 2024.findings-emnlp.926 @@ -32229,7 +32229,7 @@ hai-coaching/ VaishnavTadiparthiHonda Research Institution US EhsanMoradi PariHonda Research Institute SimonStepputtisCarnegie Mellon University - JosephCampbellPurdue University + JosephCampbellPurdue University Katia P.Sycara 16002-16014 The correct specification of reward models is a well-known challenge in reinforcement learning.Hand-crafted reward functions often lead to inefficient or suboptimal policies and may not be aligned with user values.Reinforcement learning from human feedback is a successful technique that can mitigate such issues, however, the collection of human feedback can be laborious.Recent works have solicited feedback from pre-trained large language models rather than humans to reduce or eliminate human effort, however, these approaches yield poor performance in the presence of hallucination and other errors.This paper studies the advantages and limitations of reinforcement learning from large language model feedback and proposes a simple yet effective method for soliciting and applying feedback as a potential-based shaping function.We theoretically show that inconsistent rankings – which approximate ranking errors – lead to uninformative rewards with our approach. Our method empirically improves convergence speed and policy returns over commonly used baselines even with significant ranking errors, and eliminates the need for complex post-processing of reward functions. @@ -32280,7 +32280,7 @@ hai-coaching/ <fixed-case>BLASER</fixed-case> 2.0: a metric for evaluation and quality estimation of massively multilingual speech and text translation DavidDaleFAIR at Meta - Marta R.Costa-jussàMeta + Marta R.Costa-jussàMeta 16075-16085 We present BLASER 2.0, an automatic metric of machine translation quality which supports both speech and text modalities. Compared to its predecessor BLASER (Chen et al., 2023), BLASER 2.0 is based on better underlying text and speech representations that cover 202 text languages and 57 speech ones and extends the training data. BLASER 2.0 comes in two varieties: a reference-based and a reference-free (quality estimation) model. We demonstrate that the reference-free version is applicable not only at the dataset level, for evaluating the overall model performance, but also at the sentence level, for scoring individual translations. In particular, we show its applicability for detecting translation hallucinations and filtering training datasets to obtain more reliable translation models. The BLASER 2.0 models are publicly available at https://github.com/facebookresearch/sonar. 2024.findings-emnlp.943 @@ -32338,7 +32338,7 @@ hai-coaching/ Structured Chain-of-Thought Prompting for Few-Shot Generation of Content-Grounded <fixed-case>QA</fixed-case> Conversations - Md ArafatSultanInternational Business Machines + Md ArafatSultanInternational Business Machines JatinGanhotraInternational Business Machines Ramón FernandezAstudilloInternational Business Machines 16172-16187 @@ -32398,7 +32398,7 @@ hai-coaching/ Kowsik NandagopanD HritikLadia AnkitYadav - MayankSinghIndian Institute of Technology Gandhinagar + MayankSinghIndian Institute of Technology Gandhinagar 16239-16348 Large Language Models (LLMs) are increasingly ubiquitous, yet their ability to retain and reason about temporal information remains limited, hindering their application in real-world scenarios where understanding the sequential nature of events is crucial. Our study experiments with 12 state-of-the-art models (ranging from 2B to 70B+ parameters) on a novel numerical-temporal dataset, TempUN, spanning from 10,000 BCE to 2100 CE, to uncover significant temporal retention and comprehension limitations. We propose six metrics to assess three learning paradigms to enhance temporal knowledge acquisition. Our findings reveal that open-source models exhibit knowledge gaps more frequently, suggesting a trade-off between limited knowledge and incorrect responses. Additionally, various fine-tuning approaches significantly improved performance, reducing incorrect outputs and impacting the identification of ‘information not available’ in the generations. The associated dataset and code are available at the [URL](https://anonymous.4open.science/r/TempUN-ARR/). 2024.findings-emnlp.953 @@ -32446,7 +32446,7 @@ hai-coaching/ Inference and Verbalization Functions During In-Context Learning JunyiTao XiaoyinChenMila - Quebec Artificial Intelligence Institute - Nelson F.LiuStanford University + Nelson F.LiuStanford University 16394-16421 Large language models (LMs) are capable of in-context learning from a few demonstrations (example-label pairs) to solve new tasks during inference. Despite the intuitive importance of high-quality demonstrations, previous work has observed that, in some settings, ICL performance is minimally affected by irrelevant labels (Min et al., 2022). We hypothesize that LMs perform ICL with irrelevant labels via two sequential processes: an inference function that solves the task, followed by a verbalization function that maps the inferred answer to the label space. Importantly, we hypothesize that the inference function is invariant to remappings of the label space (e.g., “true”/“false” to “cat”/“dog”), enabling LMs to share the same inference function across settings with different label words. We empirically validate this hypothesis with controlled layer-wise interchange intervention experiments. Our findings confirm the hypotheses on multiple datasets and tasks (natural language inference, sentiment analysis, and topic classification) and further suggest that the two functions can be localized in specific layers across various open-sourced models, including GEMMA-7B, MISTRAL-7B-V0.3, GEMMA-2-27B, and LLAMA-3.1-70B. 2024.findings-emnlp.957 @@ -32562,10 +32562,10 @@ hai-coaching/ YilunZhaoYale University SemihYavuzSalesForce.com YeLiuSalesForce.com - ShafiqJotySalesForce.com and Nanyang Technological University + ShafiqJotySalesForce.com and Nanyang Technological University YingboZhouSalesforce Research CaimingXiongSalesforce Research - DragomirRadevYale University + DragomirRadevYale University RexYingYale University ArmanCohanYale University and Allen Institute for Artificial Intelligence 16553-16565 @@ -32687,7 +32687,7 @@ hai-coaching/ <fixed-case>T</fixed-case>ransfer<fixed-case>CVLM</fixed-case>: Transferring Cross-Modal Knowledge for Vision-Language Modeling DonghaChoiGwangju Institute of Science and Technology - Jung-jaeKimA*STAR + Jung-jaeKimA*STAR HyunjuLeeGwangju Institute of Science and Technology 16733-16746 Recent large vision-language multimodal models pre-trained with huge amount of image-text pairs show remarkable performances in downstream tasks. However, the multimodal pre-training has limitations in terms of resources and training time when it comes to obtaining new models that surpass existing models. To overcome these issues, we propose TransferCVLM, a method of efficient knowledge transfer that integrates pre-trained uni-modal models (and cross-modal fusion-encoder) into a combined vision-language model (CVLM), without pre-training the CVLM with large amount of multimodal data, and then for each task application, fine-tunes the CVLM and transfers the multimodal knowledge of a teacher vision-language model to the CVLM by using knowledge distillation techniques. We demonstrate that 1) the fine-tuned CVLM performs comparable to other vision-language models of similar size, that 2) the multimodal knowledge transfer consistently enhances the CVLM, and the knowledge-transferred CVLM composed of large-size unimodal models outperforms the teacher multimodal model in most of downstream tasks, and that 3) TransferCVLM can also be used for model compression when using small-size unimodal models. We estimate that the training of TransferCVLM takes only 6% of pre-training of other vision-language models. Our code is available at https://github.com/DMCB-GIST/TransferCVLM. @@ -32718,7 +32718,7 @@ hai-coaching/ Yew KenChia GuizhenChen WeiwenXuAlibaba Group - Anh TuanLuuNanyang Technological University + Anh TuanLuuNanyang Technological University SoujanyaPoriaSingapore University of Technology and Design LidongBingAlibaba Group 16763-16780 @@ -32746,7 +32746,7 @@ hai-coaching/ AliZareColumbia University ShiyuanHuangColumbia University Ming-HsuanYangGoogle and University of California at Merced - Shih-FuChangColumbia, Columbia University, Columbia University, Columbia University, Columbia University, Columbia University and Columbia University + Shih-FuChangColumbia, Columbia University, Columbia University, Columbia University, Columbia University, Columbia University and Columbia University LiZhangGoogle 16806-16820 Generating personalized responses, particularly in the context of video, poses a unique challenge for language models. This paper introduces the novel task of Personalized Video Comment Generation (PVCG), aiming to predict user comments tailored to both the input video and the user’s comment history, where the user is unseen during the model training process. Unlike existing video captioning tasks that ignores the personalization in the text generation process, we introduce PerVidCom, a new dataset specifically collected for this novel task with diverse personalized comments from YouTube. Recognizing the limitations of existing captioning metrics for evaluating this task, we propose a new automatic metric based on Large Language Models (LLMs) with few-shot in-context learning, named FICL-Score, specifically measuring quality from the aspects of emotion, language style and content relevance. We verify the proposed metric with human evaluations. We establish baselines using prominent Multimodal LLMs (MLLMs), analyze their performance discrepancies through extensive evaluation, and identifies directions for future improvement on this important task. Our research opens up a new direction of personalizing MLLMs and paves the way for future research. @@ -32875,7 +32875,7 @@ hai-coaching/ Fahad ShahbazKhanMohamed bin Zayed University of Artificial Intelligence and Linköping University Rao MuhammadAnwerMohamed bin Zayed University of Artificial Intelligence SalmanKhanMohamed bin Zayed University of Artificial Intelligence and Australian National University - TimothyBaldwinMohamed bin Zayed University of Artificial Intelligence and The University of Melbourne + TimothyBaldwinMohamed bin Zayed University of Artificial Intelligence and The University of Melbourne HishamCholakkalMBZUAI 16984-17002 In this paper, we introduce BiMediX, the first bilingual medical mixture of experts LLM designed for seamless interaction in both English and Arabic. Our model facilitates a wide range of medical interactions in English and Arabic, including multi-turn chats to inquire about additional details such as patient symptoms and medical history, multiple-choice question answering, and open-ended question answering. We propose a semi-automated English-to-Arabic translation pipeline with human refinement to ensure high-quality translations. We also introduce a comprehensive evaluation benchmark for Arabic medical LLMs. Furthermore, we introduce BiMed1.3M, an extensive Arabic-English bilingual instruction set that covers 1.3 Million diverse medical interactions, including 200k synthesized multi-turn doctor-patient chats, in a 1:2 Arabic-to-English ratio. Our model outperforms state-of-the-art Med42 and Meditron by average absolute gains of 2.5% and 4.1%, respectively, computed across multiple medical evaluation benchmarks in English, while operating at 8-times faster inference. Moreover, our BiMediX outperforms the generic Arabic-English bilingual LLM, Jais-30B, by average absolute gains of 10% on our Arabic and 15% on our bilingual evaluations across multiple datasets. Additionally, BiMediX exceeds the accuracy of GPT4 by 4.4% in open-ended question UPHILL evaluation and largely outperforms state-of-the-art open source medical LLMs in human evaluations of multi-turn conversations. Our trained models, instruction set, and source code are available at https://github.com/mbzuai-oryx/BiMediX. @@ -32967,7 +32967,7 @@ hai-coaching/ <fixed-case>P</fixed-case>ython<fixed-case>S</fixed-case>aga: Redefining the Benchmark to Evaluate Code Generating <fixed-case>LLM</fixed-case>s AnkitYadav HimanshuBeniwalIndian Institute of Technology Gandhinagar - MayankSinghIndian Institute of Technology Gandhinagar + MayankSinghIndian Institute of Technology Gandhinagar 17113-17126 Driven by the surge in code generation using large language models (LLMs), numerous benchmarks have emerged to evaluate these LLMs capabilities. We conducted a large-scale human evaluation of *HumanEval* and *MBPP*, two popular benchmarks for Python code generation, analyzing their diversity and difficulty. Our findings unveil a critical bias towards a limited set of programming concepts, neglecting most of the other concepts entirely. Furthermore, we uncover a worrying prevalence of easy tasks that can inflate model performance estimations. To address these limitations, we propose a novel benchmark, *PythonSaga*, featuring 185 hand-crafted prompts in a balanced representation of 38 programming concepts across diverse difficulty levels. The robustness of our benchmark is demonstrated by the poor performance of existing Code-LLMs. The code and data set are openly available to the NLP community at this [URL](https://github.com/PythonSaga/PythonSaga). 2024.findings-emnlp.996 @@ -33060,7 +33060,7 @@ hai-coaching/ RobertLitschko DiegoFrassinelliLudwig-Maximilians-Universität München BenjaminRothUniversität Vienna - BarbaraPlankLudwig-Maximilians-Universität München and IT University of Copenhagen + BarbaraPlankLudwig-Maximilians-Universität München and IT University of Copenhagen 17203-17217 One of the major aspects contributing to the striking performance of large language models (LLMs) is the vast amount of factual knowledge accumulated during pre-training. Yet, many LLMs suffer from self-inconsistency, which raises doubts about their trustworthiness and reliability. This paper focuses on entity type ambiguity, analyzing the proficiency and consistency of state-of-the-art LLMs in applying factual knowledge when prompted with ambiguous entities. To do so, we propose an evaluation protocol that disentangles knowing from applying knowledge, and test state-of-the-art LLMs on 49 ambiguous entities. Our experiments reveal that LLMs struggle with choosing the correct entity reading, achieving an average accuracy of only 85%, and as low as 75% with underspecified prompts. The results also reveal systematic discrepancies in LLM behavior, showing that while the models may possess knowledge, they struggle to apply it consistently, exhibit biases toward preferred readings, and display self-inconsistencies. This highlights the need to address entity ambiguity in the future for more trustworthy LLMs. 2024.findings-emnlp.1003 diff --git a/data/xml/2024.finnlp.xml b/data/xml/2024.finnlp.xml index d8db111801..7bc2b2512f 100644 --- a/data/xml/2024.finnlp.xml +++ b/data/xml/2024.finnlp.xml @@ -9,7 +9,7 @@ ArminehNourbakhsh ZhiqiangMa ChareseSmiley - VeroniqueHoste + VeroniqueHoste Sanjiv RanjanDas ManlingLi MohammadGhassemi @@ -303,7 +303,7 @@ AnubhavSarkar SwagataChakraborty SohomGhosh - Sudip KumarNaskar + Sudip KumarNaskar 244–247 Investors and other stakeholders like consumers and employees, increasingly consider ESG factors when making decisions about investments or engaging with companies. Taking into account the importance of ESG today, FinNLP-KDF introduced the ML-ESG-3 shared task, which seeks to determine the duration of the impact of financial news articles in four languages - English, French, Korean, and Japanese. This paper describes our team, LIPI’s approach towards solving the above-mentioned task. Our final systems consist of translation, paraphrasing and fine-tuning language models like BERT, Fin-BERT and RoBERTa for classification. We ranked first in the impact duration prediction subtask for French language. 2024.finnlp-1.25 @@ -360,7 +360,7 @@ Adapting <fixed-case>LLM</fixed-case> to Multi-lingual <fixed-case>ESG</fixed-case> Impact and Length Prediction Using In-context Learning and Fine-Tuning with Rationale Pawan KumarRajpoot AshviniJindal - AnkurParikh + AnkurParikh 274–278 The prediction of Environmental, Social, and Governance (ESG) impact and duration (length) of impact from company events, as reported in news articles, hold immense significance for investors, policymakers, and various stakeholders. In this paper, we describe solutions from our team “Upaya” to ESG impact and length prediction tasks on one such dataset ML-ESG-3. ML-ESG-3 dataset was released along with shared task as a part of the Fifth Workshop on Knowledge Discovery from Unstructured Data in Financial Services, co-located with LREC-COLING 2024. We employed two different paradigms to adapt Large Language Models (LLMs) to predict both the ESG impact and length of events. In the first approach, we leverage GPT-4 within the In-context learning (ICL) framework. A learning-free dense retriever identifies top K-relevant In-context learning examples from the training data for a given test example. The second approach involves instruction-tuning Mistral (7B) LLM to predict impact and duration, supplemented with rationale generated using GPT-4. Our models secured second place in French tasks and achieved reasonable results (fifth and ninth rank) in English tasks. These results demonstrate the potential of different LLM-based paradigms for delivering valuable insights within the ESG investing landscape. 2024.finnlp-1.30 @@ -523,7 +523,7 @@ Capturing Analysts’ Questioning Strategies in Earnings Calls via a Question Cornering Score (<fixed-case>QCS</fixed-case>) GiuliaD’Agostino AndreaRocci - ChrisReed + ChrisReed 107–118 2024.finnlp-2.10 dagostino-etal-2024-capturing @@ -610,7 +610,7 @@ Upaya at the <fixed-case>F</fixed-case>in<fixed-case>LLM</fixed-case> Challenge Task 1 and 2: <fixed-case>D</fixed-case>ist<fixed-case>F</fixed-case>in: Distillation based Fine-Tuning for Financial Tasks Ashvini KumarJindal Pawan KumarRajpoot - AnkurParikh + AnkurParikh 159–164 2024.finnlp-2.17 jindal-etal-2024-upaya diff --git a/data/xml/2024.futured.xml b/data/xml/2024.futured.xml index 0b34ef54b0..d35847edcf 100644 --- a/data/xml/2024.futured.xml +++ b/data/xml/2024.futured.xml @@ -3,7 +3,7 @@ Proceedings of the Workshop on the Future of Event Detection (FuturED) - JoelTetreault + JoelTetreault Thien HuuNguyen HemankLamba AmandaHughes @@ -57,13 +57,13 @@ <fixed-case>MUMOSA</fixed-case>, Interactive Dashboard for <fixed-case>MU</fixed-case>lti-<fixed-case>MO</fixed-case>dal Situation Awareness - Stephanie M.LukinU.S. Army Research Laboratory + Stephanie M.LukinU.S. Army Research Laboratory ShawnBowserU.S. Army Research Laboratory ReeceSuchockiUniversity of Colorado Boulder DouglasSummers-StayU.S. Army Research Laboratory FrancisFerraroUniversity of Maryland, Baltimore County CynthiaMatuszekUMBC - ClareVossArmy Research Laboratory + ClareVossArmy Research Laboratory 32-47 enter abstract here 2024.futured-1.4 @@ -98,7 +98,7 @@ HeleneOlsenUniversity of Oslo HuilingYouUniversity of Oslo SamiaTouilebUniversity of Bergen - LiljaØvrelidDept of Informatics, University of Oslo + LiljaØvrelidDept of Informatics, University of Oslo ErikVelldalUniversity of Oslo 73-86 enter abstract here diff --git a/data/xml/2024.games.xml b/data/xml/2024.games.xml index 8ab55e3922..8f90afe143 100644 --- a/data/xml/2024.games.xml +++ b/data/xml/2024.games.xml @@ -6,8 +6,8 @@ ChrisMadge JonChamberlain KarenFort - UdoKruschwitz - StephanieLukin + UdoKruschwitz + StephanieLukin ELRA and ICCL
Torino, Italia
May @@ -30,7 +30,7 @@
Empowering Adaptive Digital Game-Based Language Learning for Under-Resourced Languages Through Text Analysis - ElaineUí Dhonnchadha + ElaineUí Dhonnchadha SallyBruen LiangXu MonicaWard @@ -45,7 +45,7 @@ BertrandRemy BrunoGuillaume OlivierFerret - AurélieNévéol + AurélieNévéol KarenFort 14–20 This paper presents the creation of Hostomytho, a game with a purpose intended for evaluating the quality of synthetic biomedical texts through multiple mini-games. Hostomytho was developed entirely using open source technologies both for internet browser and mobile platforms (IOS & Android). The code and the annotations created for synthetic clinical cases in French will be made freely available. @@ -56,7 +56,7 @@ Using In-context Learning to Automate <fixed-case>AI</fixed-case> Image Generation for a Gamified Text Labelling Task FatimaAlthani ChrisMadge - MassimoPoesio + MassimoPoesio 21–31 This paper explores a novel automated method to produce AI-generated images for a text-labelling gamified task. By leveraging the in-context learning capabilities of GPT-4, we automate the optimisation of text-to-image prompts to align with the text being labelled in the part-of-speech tagging task. As an initial evaluation, we compare the optimised prompts to the original sentences based on imageability and concreteness scores. Our results revealed that optimised prompts had significantly higher imageability and concreteness scores. Moreover, to evaluate text-to-image outputs, we generate images using Stable Diffusion XL based on the two prompt types, optimised prompts and the original sentences. Using the automated LIAON-Aesthetic predictor model, we assigned aesthetic scores for the generated images. This resulted in the outputs using optimised prompts scoring significantly higher in predicted aesthetics than those using original sentences as prompts. Our preliminary findings suggest that this methodology provides significantly more aesthetic text-to-image outputs than using the original sentence as a prompt. While the initial results are promising, the text labelling task and AI-generated images presented in this paper have yet to undergo human evaluation. 2024.games-1.4 @@ -65,7 +65,7 @@ Aspect-based Sentiment Evaluation of Chess Moves (<fixed-case>ASSESS</fixed-case>): an <fixed-case>NLP</fixed-case>-based Method for Evaluating Chess Strategies from Textbooks HaifaAlrdahi - RizaBatista-Navarro + RizaBatista-Navarro 32–42 The chess domain is well-suited for creating an artificial intelligence (AI) system that mimics real-world challenges, including decision-making. Throughout the years, minimal attention has been paid to investigating insights derived from unstructured chess data sources. In this study, we examine the complicated relationships between multiple referenced moves in a chess-teaching textbook, and propose a novel method designed to encapsulate chess knowledge derived from move-action phrases. This study investigates the feasibility of using a modified sentiment analysis method as a means for evaluating chess moves based on text. Our proposed Aspect-Based Sentiment Analysis (ABSA) method represents an advancement in evaluating the sentiment associated with referenced chess moves. By extracting insights from move-action phrases, our approach aims to provide a more fine-grained and contextually aware ‘chess move’-based sentiment classification. Through empirical experiments and analysis, we evaluate the performance of our fine-tuned ABSA model, presenting results that confirm the efficiency of our approach in advancing aspect-based sentiment classification within the chess domain. This research contributes to the area of game-playing by machines and shows the practical applicability of leveraging NLP techniques to understand the context of strategic games. Keywords: Natural Language Processing, Chess, Aspect-based Sentiment Analysis (ABSA), Chess Move Evaluation. 2024.games-1.5 @@ -74,7 +74,7 @@ Generating Converging Narratives for Games with Large Language Models DouglasSummers-Stay - Clare R.Voss + Clare R.Voss 43–60 We explore methods of combining the probability distributions generated by two LLM prompts in order to generate a continuation that is appropriate for both prompts at once. This is a new capability that extends the possibilities for branching and rejoining narratives in games. 2024.games-1.6 @@ -85,7 +85,7 @@ ElioMusacchio LuciaSiciliani PierpaoloBasile - GiovanniSemeraro + GiovanniSemeraro 61–69 Dungeons & Dragons (D&D) is a classic tabletop game with a 50-year history. Its intricate and customizable gameplay allows players to create endless worlds and stories. Due to the highly narrative component of this game, D&D and many other interactive games represent a challenging setting for the Natural Language Generation (NLG) capabilities of LLMs. This paper explores using LLMs to generate new spells, which are one of the most captivating aspects of D&D gameplay. Due to the scarcity of resources available for such a specific task, we build a dataset of 3,259 instances by combining official and fan-made D&D spells. We considered several LLMs in generating spells, which underwent a quantitative and qualitative evaluation. Metrics including Bleu and BertScore were computed for quantitative assessments. Subsequently, we also conducted an in-vivo evaluation with a survey involving D&D players, which could assess the quality of the generated spells as well as their adherence to the rules. Furthermore, the paper emphasizes the open-sourcing of all models, datasets, and findings, aiming to catalyze further research on this topic. 2024.games-1.7 @@ -111,7 +111,7 @@ Linguistic Acceptability and Usability Enhancement: A Case Study of <fixed-case>GWAP</fixed-case> Evaluation and Redesign Wateen AbdullahAliady - MassimoPoesio + MassimoPoesio 85–96 Collecting high-quality annotations for Natural Language Processing (NLP) tasks poses challenges. Gamified annotation systems, like Games-with-a-Purpose (GWAP), have become popular tools for data annotation. For GWAPs to be effective, they must be user-friendly and produce high-quality annotations to ensure the collected data’s usefulness. This paper investigates the effectiveness of a gamified approach through two specific studies on an existing GWAP designed for collecting NLP coreference judgments. The first study involved preliminary usability testing using the concurrent think-aloud method to gather open-ended feedback. This feedback was crucial in pinpointing design issues. Following this, we conducted semi-structured interviews with our participants, and the insights collected from these interviews were instrumental in crafting player personas, which informed design improvements aimed at enhancing user experience. The outcomes of our research have been generalized to benefit other GWAP implementations. The second study evaluated the linguistic acceptability and reliability of the data collected through our GWAP. Our findings indicate that our GWAP produced reliable corpora with 91.49% accuracy and 0.787 Cohen’s kappa. 2024.games-1.10 diff --git a/data/xml/2024.gebnlp.xml b/data/xml/2024.gebnlp.xml index de5abdb25e..aedcae3b37 100644 --- a/data/xml/2024.gebnlp.xml +++ b/data/xml/2024.gebnlp.xml @@ -3,7 +3,7 @@ Proceedings of the 5th Workshop on Gender Bias in Natural Language Processing (GeBNLP) - AgnieszkaFaleńska + AgnieszkaFaleńska ChristineBasta MartaCosta-jussà SeraphinaGoldfarb-Tarrant @@ -36,7 +36,7 @@ BingjieDu JishunZhao YingLiuTsinghua University, Tsinghua University - PengyuanLiuBeijing Language and Culture University + PengyuanLiuBeijing Language and Culture University 20-32 Pre-trained language models (PLMs) have achieved success in various of natural language processing (NLP) tasks. However, PLMs also introduce some disquieting safety problems, such as gender bias. Gender bias is an extremely complex issue, because different individuals may hold disparate opinions on whether the same sentence expresses harmful bias, especially those seemingly neutral or positive. This paper first defines the concept of contextualized gender bias (CGB), which makes it easy to measure implicit gender bias in both PLMs and annotators. We then construct CGBDataset, which contains 20k natural sentences with gendered words, from Chinese news. Similar to the task of masked language models, gendered words are masked for PLMs and annotators to judge whether a male word or a female word is more suitable. Then, we introduce CGBFrame to measure the gender bias of annotators. By comparing the results measured by PLMs and annotators, we find that though there are differences on the choices made by PLMs and annotators, they show significant consistency in general. 2024.gebnlp-1.2 @@ -145,7 +145,7 @@ Dissecting Biases in Relation Extraction: A Cross-Dataset Analysis on People’s Gender and Origin - MarcoStranisci + MarcoStranisci Pere-LluísHuguet Cabot ElisaBassignana RobertoNavigliSapienza University of Rome @@ -169,7 +169,7 @@ HaotianZhu KexinGaoUniversity of Washington FeiXiaUniversity of Washington, Seattle - MariOstendorfUniversity of Washington + MariOstendorfUniversity of Washington 219-236 Gender bias has been extensively studied in both the educational field and the Natural Language Processing (NLP) field, the former using human coding to identify patterns associated with and causes of gender bias in text and the latter to detect, measure and mitigate gender bias in NLP output and models. This work aims to use NLP to facilitate automatic, quantitative analysis of educational text within the framework of a gender bias taxonomy. Analyses of both educational texts and a lexical resource (WordNet) reveal patterns of bias that can inform and aid educators in updating textbooks and lexical resources and in designing assessment items. 2024.gebnlp-1.14 @@ -230,7 +230,7 @@ VipulGuptaPennsylvania State University PranavNarayanan Venkit ShomirWilsonPennsylvania State University - RebeccaPassonneauPennsylvania State University + RebeccaPassonneauPennsylvania State University 295-322 Sociodemographic bias in language models (LMs) has the potential for harm when deployed in real-world settings. This paper presents a comprehensive survey of the past decade of research on sociodemographic bias in LMs, organized into a typology that facilitates examining the different aims: types of bias, quantifying bias, and debiasing techniques. We track the evolution of the latter two questions, then identify current trends and their limitations, as well as emerging techniques. To guide future research towards more effective and reliable solutions, and to help authors situate their work within this broad landscape, we conclude with a checklist of open questions. 2024.gebnlp-1.19 @@ -263,7 +263,7 @@ Detecting and Mitigating <fixed-case>LGBTQIA</fixed-case>+ Bias in Large <fixed-case>N</fixed-case>orwegian Language Models SelmaBergstrand - BjörnGambäckNorwegian University of Science and Technology + BjörnGambäckNorwegian University of Science and Technology 351-364 The paper aims to detect and mitigate LGBTQIA+ bias in large language models (LLMs). As the usage of LLMs quickly increases, so does the significance of the harms they may cause due to bias. The research field of bias in LLMs has seen massive growth, but few attempts have been made to detect or mitigate other biases than gender bias, and most focus has been on English LLMs. This work shows experimentally that LLMs may cause representational harms towards LGBTQIA+ individuals when evaluated on sentence completion tasks and on a benchmark dataset constructed from stereotypes reported by the queer community of Norway, collected through a survey in order to directly involve the affected community. Furthermore, Norwegian training corpora are probed for queer bias, revealing strong associations between queer terms and anti-queer slurs, as well as words related to pedophilia. Finally, a fine-tuning-based debiasing method is applied to two Norwegian LLMs. This method does not consistently reduce bias, but shows that queer bias can be altered, laying the foundation for future debiasing approaches. By shedding light on the severe discrimination that can occur through the usage of LLMs, this paper contributes to the ongoing fight for equal rights for the LGBTQIA+ community. 2024.gebnlp-1.22 @@ -273,7 +273,7 @@ Whose wife is it anyway? Assessing bias against same-gender relationships in machine translation IanStewartPacific Northwest National Laboratory - RadaMihalceaUniversity of Michigan + RadaMihalceaUniversity of Michigan 365-375 Machine translation often suffers from biased data and algorithms that can lead to unacceptable errors in system output. While bias in gender norms has been investigated, less is known about whether MT systems encode bias about social relationships, e.g., “the lawyer kissed her wife.” We investigate the degree of bias against same-gender relationships in MT systems, using generated template sentences drawn from several noun-gender languages (e.g., Spanish) and comprised of popular occupation nouns. We find that three popular MT services consistently fail to accurately translate sentences concerning relationships between entities of the same gender. The error rate varies considerably based on the context, and same-gender sentences referencing high female-representation occupations are translated with lower accuracy. We provide this work as a case study in the evaluation of intrinsic bias in NLP systems with respect to social relationships. 2024.gebnlp-1.23 diff --git a/data/xml/2024.genbench.xml b/data/xml/2024.genbench.xml index c0cd452318..1b0f494fa5 100644 --- a/data/xml/2024.genbench.xml +++ b/data/xml/2024.genbench.xml @@ -48,7 +48,7 @@ The <fixed-case>S</fixed-case>lay<fixed-case>QA</fixed-case> benchmark of social reasoning: testing gender-inclusive generalization with neopronouns BastianBunzeckUniversität Bielefeld - SinaZarrießBielefeld University + SinaZarrießBielefeld University 42-53 We introduce SlayQA, a novel benchmark data set designed to evaluate language models’ ability to handle gender-inclusive language, specifically the use of neopronouns, in a question-answering setting. Derived from the Social IQa data set, SlayQA modifies context-question-answer triples to include gender-neutral pronouns, creating a significant linguistic distribution shift in comparison to common pre-training corpora like C4 or Dolma. Our results show that state-of-the-art language models struggle with the challenge, exhibiting small, but noticeable performance drops when answering question containing neopronouns compared to those without. 2024.genbench-1.3 @@ -70,7 +70,7 @@ <fixed-case>MMLU</fixed-case>-<fixed-case>SR</fixed-case>: A Benchmark for Stress-Testing Reasoning Capability of Large Language Models WentianWang SarthakJain - PaulKantorUniversity of Wisconsin - Madison, Rutgers University, New Brunswick and Paul B Kantor, Consultant + PaulKantorUniversity of Wisconsin - Madison, Rutgers University, New Brunswick and Paul B Kantor, Consultant JacobFeldmanRutgers University LazarosGallosRutgers University HaoWangRutgers University @@ -148,7 +148,7 @@ RitamDutt SagnikRay Choudhury Varun VenkatRao - CarolynRose + CarolynRose V.G.VinodVydiswaran 165-182 Generalization refers to the ability of machine learning models to perform well on dataset distributions different from the one it was trained on. While several pre-existing works have characterized the generalizability of NLP models across different dimensions, such as domain shift, adversarial perturbations, or compositional variations, most studies were carried out in a stand-alone setting, emphasizing a single dimension of interest. We bridge this gap by systematically investigating the generalizability of pre-trained language models across different architectures, sizes, and training strategies, over multiple dimensions for the task of natural language inference and question answering. Our results indicate that model instances typically exhibit consistent generalization trends, i.e., they generalize equally well (or poorly) across most scenarios, and this ability is correlated with model architecture, base dataset performance, size, and training mechanism. We hope this research motivates further work in a) developing a multi-dimensional generalization benchmark for systematic evaluation and b) examining the reasons behind models’ generalization abilities. The code and data are available at https://github.com/sagnik/md-gen-nlp, and the trained models are released at https://huggingface.co/varun-v-rao. diff --git a/data/xml/2024.germeval.xml b/data/xml/2024.germeval.xml index 7c49485517..09da966c34 100644 --- a/data/xml/2024.germeval.xml +++ b/data/xml/2024.germeval.xml @@ -93,7 +93,7 @@ Team Quabynar at the <fixed-case>G</fixed-case>erm<fixed-case>E</fixed-case>val 2024 Shared Task 1 <fixed-case>G</fixed-case>er<fixed-case>MS</fixed-case>-Detect (Subtasks 1 and 2) on Sexism Detection Kwabena OdameAkomeah - UdoKruschwitz + UdoKruschwitz BerndLudwig 26–32 2024.germeval-2.4 diff --git a/data/xml/2024.gitt.xml b/data/xml/2024.gitt.xml index 4e83c33eaf..5632126757 100644 --- a/data/xml/2024.gitt.xml +++ b/data/xml/2024.gitt.xml @@ -49,7 +49,7 @@ Gender and bias in <fixed-case>A</fixed-case>mazon review translations: by humans, <fixed-case>MT</fixed-case> systems and <fixed-case>C</fixed-case>hat<fixed-case>GPT</fixed-case> - MajaPopovicIU International University of Applied Sciences and Dublin City University + MajaPopovicIU International University of Applied Sciences and Dublin City University EkaterinaLapshinova-KoltunskiUniversität Hildesheim 22-30 This paper presents an analysis of first-person gender in five different translation variants of Amazon product reviews:those produced by professional translators, by translation students, with different machine translation (MT) systems andwith ChatGPT. The analysis revealed that the majority of the reviews were translated into the masculine first-person gender, both by humans as well as by machines. Further inspection revealed that the choice of the gender in a translation is not related to the actual gender of the translator. Finally, the analysis of different products showed that there are certain bias tendencies, because the distribution of genders notably differ for different products. diff --git a/data/xml/2024.hcinlp.xml b/data/xml/2024.hcinlp.xml index bb2f478403..8a657b3ec5 100644 --- a/data/xml/2024.hcinlp.xml +++ b/data/xml/2024.hcinlp.xml @@ -62,7 +62,7 @@ MarcusCollinsAmazon EugeneAgichteinAmazon and Emory University OlegRokhlenko - ShervinMalmasiAmazon + ShervinMalmasiAmazon 40-50 Conversational AI is a subtype of Human Computer Interaction that has gained wide adoption. These systems are typically powered by Large Language Models (LLMs) that use Retrieval Augmented Generation (RAG) to infuse external knowledge, which is effective against issues like hallucination. However, automatically evaluating retrieval augmented conversations with minimal human effort remains challenging, particularly in online settings. We address this challenge by proposing a lexical metric, and a novel method for combining it with other metrics, including semantic models. Our approach involves: (1) Conversational Information Utility (CIU), a new automated metric inspired by prior user studies on web search evaluation, to compute information overlap between conversation context and grounded information in an unsupervised, purely lexical way; and (2) a generalized reward model through Mixture-of-Experts (MoE-CIU) that dynamically ensembles CIU with other metrics, including learned ones, into a single reward. Evaluation against human ratings on two public datasets (Topical Chat and Persona Chat) shows that CIU improves correlation against human judgments by 2.0% and 0.9% respectively compared to the second best metric. When MoE is applied to combine lexical and learned semantic metrics, correlations further improve by 9.9% and 5.0%, suggesting that unified reward models are a promising approach. 2024.hcinlp-1.4 @@ -74,7 +74,7 @@ ChantalShaibNortheastern University JoeBarrowPattern Data AlexaSiuAdobe - ByronWallaceNortheastern University, Brown University and Northeastern University + ByronWallaceNortheastern University, Brown University and Northeastern University AniNenkovaAdobe Research 51-59 Modern instruction-tuned models have become highly capable in text generation tasks such as summarization, and are expected to be released at a steady pace. In practice one may now wish to choose confidently, but with minimal effort, the best performing summarization model when applied to a new domain or purpose. In this work, we empirically investigate the test sample size necessary to select a preferred model in the context of news summarization. Empirical results reveal that comparative evaluation converges quickly for both automatic and human evaluation, with clear preferences for a system emerging from under 100 examples. The human preference data allows us to quantify how well automatic scores can reproduce preference rankings across a variety of downstream summarization tasks. We find that, while automatic metrics are stable at smaller sample sizes, only some automatic metrics are able to moderately predict model win rates according to human preference. diff --git a/data/xml/2024.htres.xml b/data/xml/2024.htres.xml index 7e1b352c99..a0951946aa 100644 --- a/data/xml/2024.htres.xml +++ b/data/xml/2024.htres.xml @@ -33,7 +33,7 @@ <fixed-case>TEI</fixed-case> Specifications for a Sustainable Management of Digitized Holocaust Testimonies SarahBénière FlorianeChiffoleau - LaurentRomary + LaurentRomary 10–17 Data modeling and standardization are central issues in the field of Digital Humanities, and all the more so when dealing with Holocaust testimonies, where stable preservation and long-term accessibility are key. The EHRI Online Editions are composed of documents of diverse nature (testimonies, letters, diplomatic reports, etc.), held by EHRI’s partnering institutions, and selected, gathered thematically and encoded according to the TEI Guidelines by the editors within the EHRI Consortium. Standardization is essential in order to make sure that the editions are consistent with one another. The issue of consistency also encourages a broader reflection on the usage of standards when processing data, and on the standardization of digital scholarly editions of textual documents in general. In this paper, we present the normalization work we carried out on the EHRI Online Editions. It includes a customization of the TEI adapted to Holocaust-related documents, and a focus on the implementation of controlled vocabulary. We recommend the use of these encoding specifications as a tool for researchers and/or non-TEI experts to ensure their encoding is valid and consistent across editions, but also as a mechanism for integrating the edition work smoothly within a wider workflow leading from image digitization to publication. 2024.htres-1.2 @@ -69,7 +69,7 @@ Speech Technology Services for Oral History Research ChristophDraxler - Henkvan den Heuvel + Henkvan den Heuvel Arjanvan Hessen PavelIrcing JanLehečka diff --git a/data/xml/2024.hucllm.xml b/data/xml/2024.hucllm.xml index b53df0c24e..58df9d421a 100644 --- a/data/xml/2024.hucllm.xml +++ b/data/xml/2024.hucllm.xml @@ -8,7 +8,7 @@ AshishSharma DiyiYang SaraHooker - H. AndrewSchwartz + H. AndrewSchwartz ACL
TBD
August @@ -65,7 +65,7 @@ To What Extent Are Large Language Models Capable of Generating Substantial Reflections for Motivational Interviewing Counseling Chatbots? A Human Evaluation ErkanBasar IrisHendrickxRadboud University Nijmegen, the Netherlands - EmielKrahmerTilburg University + EmielKrahmerTilburg University Gert-JanBruijn TiborBosseRadboud University 41-52 @@ -80,7 +80,7 @@ PhillipRust RuixiangCui YongCao - AndersSøgaardCopenhagen University + AndersSøgaardCopenhagen University DanielHershcovichUniversity of Copenhagen 53-66 Large Vision Language Models can be used to assist visually impaired individuals by describing images they capture in their daily lives. Current evaluation datasets may not reflect the diverse cultural user backgrounds nor the situational context of this use case. To address this problem, we create a survey to determine caption preferences and propose a culture-centric evaluation benchmark by filtering VizWiz, an existing dataset with images taken by people who are blind. We then evaluate different models and prompts, investigating their reliability as visual assistants. While the evaluation results for state-of-the-art models seem promising, we identified some weak spots such as hallucinations and problems with conventional evaluation metrics. Our survey, data, code, and model outputs will be publicly available. @@ -92,7 +92,7 @@ Evaluating Large Language Models on Social Signal Sensitivity: An Appraisal Theory Approach ZhenWu RitamDuttCarnegie Mellon University - CarolynRoseSchool of Computer Science, Carnegie Mellon University + CarolynRoseSchool of Computer Science, Carnegie Mellon University 67-80 We present a framework to assess the sensitivity of Large Language Models (LLMs) to textually embedded social signals using an Appraisal Theory perspective. We report on an experiment that uses prompts encoding three dimensions of social signals: Affect, Judgment, and Appreciation. In response to the prompt, an LLM generates both an analysis (Insight) and a conversational Response, which are analyzed in terms of sensitivity to the signals. We quantitatively evaluate the output text through topical analysis of the Insight and predicted social intelligence scores of the Response in terms of empathy and emotional polarity. Key findings show that LLMs are more sensitive to positive signals. The personas impact Responses but not the Insight. We discuss how our framework can be extended to a broader set of social signals, personas, and scenarios to evaluate LLM behaviors under various conditions. 2024.hucllm-1.6 @@ -105,7 +105,7 @@ Aligning to Adults Is Easy, Aligning to Children Is Hard: A Study of Linguistic Alignment in Dialogue Systems DorotheaFrenchUniversity of Colorado, Boulder SidneyD’MelloUniversity of Colorado at Boulder - Katharinavon der WenseJohannes-Gutenberg Universität Mainz, University of Colorado, Boulder and New York University + Katharinavon der WenseJohannes-Gutenberg Universität Mainz, University of Colorado, Boulder and New York University 81-87 During conversations, people align to one another over time, by using similar words, concepts, and syntax. This helps form a shared understanding of the conversational content and is associated with increased engagement and satisfaction. It also affects conversation outcomes: e.g., when talking to language learners, an above normal level of linguistic alignment of parents or language teachers is correlated with faster language acquisition. These benefits make human-like alignment an important property of dialogue systems, which has often been overlooked by the NLP community. In order to fill this gap, we ask: (RQ1) Due to the importance for engagement and satisfaction, to what degree do state-of-the-art dialogue systems align to adult users? (RQ2) With a potential application to child language acquisition in mind, do systems, similar to parents, show high levels of alignment during conversations with children? Our experiments show that ChatGPT aligns to adults at roughly human levels, while Llama2 shows elevated alignment. However, when responding to a child, both systems’ alignment is below human levels. 2024.hucllm-1.7 diff --git a/data/xml/2024.humeval.xml b/data/xml/2024.humeval.xml index 727e858fc3..6cb7ddd950 100644 --- a/data/xml/2024.humeval.xml +++ b/data/xml/2024.humeval.xml @@ -4,7 +4,7 @@ Proceedings of the Fourth Workshop on Human Evaluation of NLP Systems (HumEval) @ LREC-COLING 2024 SimoneBalloccu - AnyaBelz + AnyaBelz RudaliHuidrom EhudReiter JoaoSedoc @@ -24,7 +24,7 @@ Quality and Quantity of Machine Translation References for Automatic Metrics VilémZouhar - OndřejBojar + OndřejBojar 1–11 Automatic machine translation metrics typically rely on human translations to determine the quality of system translations. Common wisdom in the field dictates that the human references should be of very high quality. However, there are no cost-benefit analyses that could be used to guide practitioners who plan to collect references for machine translation evaluation. We find that higher-quality references lead to better metric correlations with humans at the segment-level. Having up to 7 references per segment and taking their average (or maximum) helps all metrics. Interestingly, the references from vendors of different qualities can be mixed together and improve metric success. Higher quality references, however, cost more to create and we frame this as an optimization problem: given a specific budget, what references should be collected to maximize metric success. These findings can be used by evaluators of shared tasks when references need to be created under a certain budget. 2024.humeval-1.1 @@ -44,8 +44,8 @@ MohamedElaraby HuihuiXu MorganGray - KevinAshley - DianeLitman + KevinAshley + DianeLitman 28–35 Human evaluation remains the gold standard for assessing abstractive summarization. However, current practices often prioritize constructing evaluation guidelines for fluency, coherence, and factual accuracy, overlooking other critical dimensions. In this paper, we investigate argument coverage in abstractive summarization by focusing on long legal opinions, where summaries must effectively encapsulate the document’s argumentative nature. We introduce a set of human-evaluation guidelines to evaluate generated summaries based on argumentative coverage. These guidelines enable us to assess three distinct summarization models, studying the influence of including argument roles in summarization. Furthermore, we utilize these evaluation scores to benchmark automatic summarization metrics against argument coverage, providing insights into the effectiveness of automated evaluation methods. 2024.humeval-1.3 @@ -64,9 +64,9 @@ Insights of a Usability Study for <fixed-case>KBQA</fixed-case> Interactive Semantic Parsing: Generation Yields Benefits over Templates but External Validity Remains Challenging AshleyLewis LingboMo - Marie-Catherinede Marneffe + Marie-Catherinede Marneffe HuanSun - MichaelWhite + MichaelWhite 47–62 We present our findings from a usability study of an interactive semantic parsing system for knowledge based question answering (KBQA). The system is designed to help users access information within a knowledge base without having to know its query language. The system translates the user’s question into the query language, retrieves an answer, then presents an English explanation of the process so that the user can make corrections if necessary. To our knowledge, our work is the most thorough usability study conducted for such a system and the only one that uses crowdworkers as participants to verify that the system is usable for average users. Our crowdworkers participate in KBQA dialogues using 4 versions of a system based on the framework by Mo et al. (2022) and answer surveys about their experiences. Some key takeaways from this work are: 1) we provide evidence for the benefits of interactivity in semantic parsing with human users and using generated questions in lieu of templated representations, 2) we identify limitations of simulations and provide contrasting evidence from actual system use, and 3) we provide an examination of crowdsourcing methodology, in particular the trade-offs of using crowdworkers vs. a specially trained group of evaluators. 2024.humeval-1.5 @@ -76,8 +76,8 @@ Extrinsic evaluation of question generation methods with user journey logs ElieAntoine EléonoreBesnehard - FredericBechet - GeraldineDamnati + FredericBechet + GeraldineDamnati EricKergosien ArnaudLaborderie 63–70 @@ -118,7 +118,7 @@ Once Upon a Replication: It is Humans’ Turn to Evaluate <fixed-case>AI</fixed-case>’s Understanding of Children’s Stories for <fixed-case>QA</fixed-case> Generation Andra-MariaFlorescu MariusMicluta-Campeanu - Liviu P.Dinu + Liviu P.Dinu 106–113 The following paper presents the outcomes of a collaborative experiment on human evaluation from the ReproNLP 2024 shared task, track B, part of the ReproHum project. For this paper, we evaluated a QAG (question-answer generation) system centered on English children’s storybooks that was presented in a previous research, by using human evaluators for the study. The system generated relevant QA (Question-Answer) pairs based on a dataset with storybooks for early education (kindergarten up to middle school) called FairytaleQA. In the framework of the ReproHum project, we first outline the previous paper and the reproduction strategy that has been decided upon. The complete setup of the first human evaluation is then described, along with the modifications required to replicate it. We also add other relevant related works on this subject. In conclusion, we juxtapose the replication outcomes with those documented in the cited publication. Additionally, we explore the general features of this endeavor as well as its shortcomings. 2024.humeval-1.10 @@ -150,7 +150,7 @@ AnouckBraggaar NadineBraun MartijnGoudbeek - EmielKrahmer + EmielKrahmer Chrisvan der Lee SteffenPauws FrédéricTomas @@ -184,7 +184,7 @@ <fixed-case>R</fixed-case>epro<fixed-case>H</fixed-case>um #1018-09: Reproducing Human Evaluations of Redundancy Errors in Data-To-Text Systems FilipKlubička - John D.Kelleher + John D.Kelleher 163–198 This paper describes a reproduction of a human evaluation study evaluating redundancies generated in automatically generated text from a data-to-text system. While the scope of the original study is broader, a human evaluation—a manual error analysis—is included as part of the system evaluation. We attempt a reproduction of this human evaluation, however while the authors annotate multiple properties of the generated text, we focus exclusively on a single quality criterion, that of redundancy. In focusing our study on a single minimal reproducible experimental unit, with the experiment being fairly straightforward and all data made available by the authors, we encountered no challenges with our reproduction and were able to reproduce the trend found in the original experiment. However, while still confirming the general trend, we found that both our annotators identified twice as many errors in the dataset than the original authors. 2024.humeval-1.16 diff --git a/data/xml/2024.icnlsp.xml b/data/xml/2024.icnlsp.xml index 04d3e270d8..d551d42ff4 100644 --- a/data/xml/2024.icnlsp.xml +++ b/data/xml/2024.icnlsp.xml @@ -19,7 +19,7 @@ Leveraging Annotator Disagreement for Text Classification JinXu - MariëtTheune + MariëtTheune DanielBraun 1–10 2024.icnlsp-1.1 @@ -137,7 +137,7 @@ AritzLasarguren JoneLòpez EgoitzRodriguez - AitorÁlvarez + AitorÁlvarez 109–118 2024.icnlsp-1.13 vasquez-correa-etal-2024-real @@ -148,7 +148,7 @@ Asier LópezZorrilla MikeldeVelasco Juan CamiloVasquez-Correa - AitorÁlvarez + AitorÁlvarez Maria InésTorres PazDelgado AneLazpiur @@ -237,9 +237,9 @@ GuramMikaberidze RaphaelKalandadze KonstantinePkhakadze - Josefvan Genabith + Josefvan Genabith SimonOstermann - Lonnekevan der Plas + Lonnekevan der Plas PhilippMüller 199–208 2024.icnlsp-1.22 @@ -283,7 +283,7 @@ Human and Machine: Language Processing in Translation Tasks HeningWang LeixinZhang - OndrejBojar + OndrejBojar 243–250 2024.icnlsp-1.27 wang-etal-2024-human @@ -310,7 +310,7 @@ TimothyObiso BingyangYe KyeongminRim - JamesPustejovsky + JamesPustejovsky 279–286 2024.icnlsp-1.30 obiso-etal-2024-semantically @@ -342,7 +342,7 @@ HazemHajj ShadyElbassuoni Wassim ElHajj - KhaledShaban + KhaledShaban 304–318 2024.icnlsp-1.33 hajj-etal-2024-design @@ -352,7 +352,7 @@ Yasmine A AbuAdla HazemHajj ShadyElbassuoni - KhaledShaban + KhaledShaban Wassim ElHajj 319–342 2024.icnlsp-1.34 @@ -386,7 +386,7 @@ <fixed-case>B</fixed-case>ulgarian Grammar Error Correction with Data Augmentation and Machine Translation Techniques BozhidarKlouchek - RizaBatista-Navarro + RizaBatista-Navarro 365–376 2024.icnlsp-1.38 klouchek-batista-navarro-2024-bulgarian @@ -446,8 +446,8 @@ <fixed-case>SG</fixed-case>-<fixed-case>RAG</fixed-case>: Multi-Hop Question Answering With Large Language Models Through Knowledge Graphs Ahmmad O. M.Saleh - GokhanTur - YucelSaygin + GokhanTur + YucelSaygin 439–448 2024.icnlsp-1.45 saleh-etal-2024-sg @@ -455,7 +455,7 @@ Linking <fixed-case>Q</fixed-case>uran and <fixed-case>H</fixed-case>adith Topics in an Ontology using Word Embeddings and Cellfie Plugin Ibtisam KhalafAlshammari - EricAtwell + EricAtwell Mohammad AmmarAlsalka 449–455 2024.icnlsp-1.46 @@ -466,7 +466,7 @@ RaffaelloFornasiere NicolòBrunello VincenzoScotti - MarkCarman + MarkCarman 456–466 2024.icnlsp-1.47 fornasiere-etal-2024-medical diff --git a/data/xml/2024.icon.xml b/data/xml/2024.icon.xml index 7ef11b7d98..907ba19c13 100644 --- a/data/xml/2024.icon.xml +++ b/data/xml/2024.icon.xml @@ -3,7 +3,7 @@ Proceedings of the 21st International Conference on Natural Language Processing (ICON) - SobhaLalitha Devi + SobhaLalitha Devi KaruneshArora NLP Association of India (NLPAI)
AU-KBC Research Centre, Chennai, India
@@ -37,7 +37,7 @@ Precision Empowers, Excess Distracts: Visual Question Answering With Dynamically Infused Knowledge In Language Models ManasJhalani AnnervazK M - PushpakBhattacharyya + PushpakBhattacharyya 21–36 In the realm of multimodal tasks, Visual Question Answering (VQA) plays a crucial role by addressing natural language questions grounded in visual content. Knowledge-Based Visual Question Answering (KBVQA) advances this concept by adding external knowledge along with images to respond to questions. We introduce an approach for KBVQA, augmenting the existing vision-language transformer encoder-decoder (OFA) model . Our main contribution involves enhancing questions by incorporating relevant external knowledge extracted from knowledge graphs, using a dynamic triple extraction 2024.icon-1.3 @@ -60,7 +60,7 @@ JayJ. Gorakhiya SanandSasidharan AnuradhaKanamarlapudi - PushpakBhattacharyya + PushpakBhattacharyya 45–53 Extracting information from genomic reports of cancer patients is crucial for both healthcare professionals and cancer research. While Large Language Models (LLMs) have shown promise in extracting information, their potential for handling genomic reports remains unexplored. These reports are complex, multi-page documents that feature a variety of visually rich, structured layouts and contain many domain-specific terms. Two primary challenges complicate the process: (i) extracting data from PDFs with intricate layouts and domain-specific terminology and (ii) dealing with variations in report layouts from different laboratories, making extraction layout-dependent and posing challenges for subsequent data processing. To tackle these issues, we propose GR-PROMPT, a prompt-based technique, and GR-FORMAT, a standardized format. Together, these two convert a genomic report in PDF format into GR-FORMAT as a JSON file using a multimodal LLM. To address the lack of available datasets for this task, we introduce GR-DATASET, a synthetic collection of 100 cancer genomic reports in PDF format. Each report is accompanied by key-value information presented in a layout-specific format, as well as structured key-value information in GR-FORMAT. This is the first dataset in this domain to promote further research for the task. We performed our experiment on this dataset. 2024.icon-1.5 @@ -69,7 +69,7 @@ Identification of Idiomatic Expressions in <fixed-case>K</fixed-case>onkani Language Using Neural Networks Naziya MahamdulShaikh - JyotiPawar + JyotiPawar 54–58 The task of multi-word expressions identification and processing has posed a remarkable challenge to the natural language processing applications. One related subtask in this arena is correct labelling of the sentences with the presence of idiomatic expressions as either literal or idiomatic sense. The regional Indian language Konkani spoken in the states located in the west coast of India lacks in the research in idiom processing tasks. We aim at bridging this gap through a contribution to idiom identification method in Konkani language. This paper classifies the idiomatic expression usage in Konkani language as idiomatic or literal usage using a neural network-based setup. The developed system was able to successfully perform the identification task with an accuracy of 79.5 % and F1-score of 0.77. 2024.icon-1.6 @@ -81,7 +81,7 @@ Jayram UlhasGawas ShrikrishnaR. Parab Shilpa NeenadDesai - JyotiPawar + JyotiPawar 59–67 The Visualizer is a tree-structure designed to browse and explore the Konkani WordNet lexical database. We propose to utilise this tool as a concept teaching and learning resource for Konkani, to be used by both teachers and students. It can also be used to add the missing semantic and lexical relations, thus enhancing the wordnet. It extracts related concepts for a given word and displays them as a sub-tree. The interface includes various features to offer users greater flexibility in navigating and understanding the word relationships. We attempted to enrich the Konkani Wordnet qualitatively with a Visualizer that offers an improved usability and is incorporated in the Konkani Wordnet website for the public use. The Visualizer is designed to provide graphical representations of words and their semantic relationships, making it easier to explore connections and meanings within the lexical database. 2024.icon-1.7 @@ -91,7 +91,7 @@ A Systematic Exploration of Linguistic Phenomena in Spoken <fixed-case>H</fixed-case>indi: Resource Creation and Hypothesis Testing AadyaRanjan SidharthRanjan - RajakrishnanRajkumar + RajakrishnanRajkumar 68–78 This paper presents a meticulous and well-structured approach to annotating a corpus of Hindi spoken data. We deployed 4 annotators to augment the spoken section of the EMILLE Hindi corpus by marking the various linguistic phenomena observed in spoken data. Then we analyzed various phonological (sound deletion), morphological (code-mixing and reduplication) and syntactic phenomena (case markers and ambiguity), not attested in written data. Code mixing and switching and constitute the majority of the phenomena we annotated, followed by orthographic errors related to symbols in the Devanagiri script. In terms of divergences from written form of Hindi, case marker usage, missing auxiliary verbs and agreement patterns are markedly distinct for spoken Hindi. The annotators also assigned a quality rating to each sentence in the corpus. Our analysis of the quality ratings revealed that most of the sentences in the spoken data corpus are of moderate to high quality. Female speakers produced a greater percentage of high quality sentences compared to their male counterparts. While previous efforts in corpus annotation have been largely focused on creating resources for engineering applications, we illustrate the utility of our dataset for scientific hypothesis testing. Inspired from the Surprisal Theory of language comprehension, we validate the hypothesis that sentences with high values of lexical surprisal are rated low in terms of quality by native speakers, even when controlling for sentence length and word frequencies in a sentence. 2024.icon-1.8 @@ -181,7 +181,7 @@ Reconsidering <fixed-case>SMT</fixed-case> Over <fixed-case>NMT</fixed-case> for Closely Related Languages: A Case Study of <fixed-case>P</fixed-case>ersian-<fixed-case>H</fixed-case>indi Pair WaisullahYousofi - PushpakBhattacharyya + PushpakBhattacharyya 149–156 This paper demonstrates that Phrase-Based Statistical Machine Translation (PBSMT) can outperform Transformer-based Neural Machine Translation (NMT) in moderate-resource scenarios, specifically for structurally similar languages, Persian-Hindi pair in our case. Despite the Transformer architecture’s typical preference for large parallel corpora, our results show that PBSMT achieves a BLEU score of 66.32, significantly exceeding the Transformer-NMT score of 53.7 ingesting the same dataset. 2024.icon-1.17 @@ -190,7 +190,7 @@ <fixed-case>R</fixed-case>o<fixed-case>M</fixed-case>antra: Optimizing Neural Machine Translation for Low-Resource Languages through <fixed-case>R</fixed-case>omanization GovindSoni - PushpakBhattacharyya + PushpakBhattacharyya 157–168 Neural Machine Translation (NMT) for low-resource language pairs with distinct scripts, such as Hindi-Chinese and Japanese-Hindi, poses significant challenges due to scriptural and linguistic differences. This paper investigates the efficacy of romanization as a preprocessing step to bridge these gaps. We compare baseline models trained on native scripts with models incorporating romanization in three configurations: both-side, source-side only, and target-side only. Additionally, we introduce a script restoration model that converts romanized output back to native scripts, ensuring accurate evaluation. Our experiments show that romanization, particularly when applied to both sides, improves translation quality across the studied language pairs. The script restoration model further enhances the practicality of this approach by enabling evaluation in native scripts with some performance loss. This work provides insights into leveraging romanization for NMT in low-resource, cross-script settings, presenting a promising direction for under-researched language combinations. 2024.icon-1.18 @@ -258,7 +258,7 @@ PritamPal SrijaniDebnath DipankarDas - SivajiBandyopadhyay + SivajiBandyopadhyay 215–223 With the advancement of natural language processing (NLP) and sophisticated Large Language Models (LLMs), distinguishing between human-written texts and machine-generated texts is quite difficult nowadays. This paper presents a systematic approach to classifying machine-generated text from human-written text with a combination of the transformer-based model and textual feature-based post-processing technique. We extracted five textual features: readability score, stop word score, spelling and grammatical error count, unique word score and human phrase count from both human-written and machine-generated texts separately and trained three machine learning models (SVM, Random Forest and XGBoost) with these scores. Along with exploring traditional machine-learning models, we explored the BiLSTM and transformer-based distilBERT models to enhance the classification performance. By training and evaluating with a large dataset containing both human-written and machine-generated text, our best-performing framework achieves an accuracy of 87.5%. 2024.icon-1.24 @@ -292,7 +292,7 @@ Pronominal Anaphora Resolution in <fixed-case>K</fixed-case>onkani language incorporating Gender Agreement PoonamA. Navelker - JyotiPawar + JyotiPawar 243–247 Konkani is a low-resource language, spoken mainly on the central west coast of India. Approximately 2.3 million people speak Konkani (Office of the Registrar General Census Commissioner, India,2011). It is also the official language of the state of Goa. It belongs to the Southern Indo-Aryan language group. The official Script for writing the Konkani language is Devanagari. Despite this, being a low-resource language has hampered its development on the digital platform, Konkani has yet to significantly impact its digital presence. To improve this situation, contribution to Natural Language Understanding in the Konkani language is important. This paper aims to resolve pronominal anaphora in the Konkani language using a rule-based method incorporating gender agreement. This is required in NLP applications like text summarization, machine translation, and question-answering systems. While research on English and other foreign languages, as well as Indian languages like Tamil, Kannada, Malayalam, Bengali, and Marathi, have been done, no work has been done on the Konkani language thus far. This is the very first attempt made to resolve anaphora in Konkani. 2024.icon-1.27 @@ -314,8 +314,8 @@ End to End Multilingual Coreference Resolution for <fixed-case>I</fixed-case>ndian Languages SobhaLalitha Devi - Vijay SundarRam - PattabhiRK Rao + Vijay SundarRam + PattabhiRK Rao 256–259 This paper describes an approach on an end to end model for Multilingual Coreference Resolution (CR) for low resource languages such as Tamil, Malayalam and Hindi. We have done fine tune the XLM-Roberta large model on multilingual training dataset using specific languages with linguistic features and without linguistic features. XLM-R with linguistic features achieves better results than the baseline system. This shows that giving the linguistic knowledge enriches the system performance. The performance of the system is comparable with the state of the art systems. 2024.icon-1.29 @@ -324,7 +324,7 @@ <fixed-case>L</fixed-case>ang<fixed-case>B</fixed-case>ot-Language Learning Chatbot MadhubalaSundaram - PattabhiRK Rao + PattabhiRK Rao SobhaLalitha Devi 260–263 Chatbots are being widely used in educational domain to revolutionize how students interact and learn along with traditional methods of learning. This paper presents our work on LangBot, a chatbot developed for learning Tamil language. LangBot developed integrates the interactive features of chatbots with the study material of the Tamil courses offered by Tamil Virtual Academy, Government of Tamil Nadu. LangBot helps students in enhancing their learning skills and increases their interest in learning the language. Using semi-automatic methods, we generate question and answers related to all topics in the courses. We then develop a generative language model and also Retrieval Augmented Generation (RAG) so that the system can incorporate new syllabus changes. We have performed manual user studies. The results obtained are encouraging. This approach offers learners an interactive tool that aligns with their syllabus. It is observed that this enriches the overall learning experience. @@ -356,7 +356,7 @@ <fixed-case>R</fixed-case>ound<fixed-case>T</fixed-case>rip<fixed-case>OCR</fixed-case>: A Data Generation Technique for Enhancing Post-<fixed-case>OCR</fixed-case> Error Correction in Low-Resource <fixed-case>D</fixed-case>evanagari Languages HarshvivekKashid - PushpakBhattacharyya + PushpakBhattacharyya 274–284 Optical Character Recognition (OCR) technology has revolutionized the digitization of printed text, enabling efficient data extraction and analysis across various domains. Just like Machine Translation systems, OCR systems are prone to errors. In this work, we address the challenge of data generation and post-OCR error correction, specifically for low-resource languages. We propose an approach for synthetic data generation for Devanagari languages, RoundTripOCR, that tackles the scarcity of the post-OCR Error Correction datasets for low-resource languages. We release post-OCR text correction datasets for Hindi, Marathi, Bodo, Nepali, Konkani and Sanskrit. We also present a novel approach for OCR error correction by leveraging techniques from machine translation. Our method involves translating erroneous OCR output into a corrected form by treating the OCR errors as mistranslations in a parallel text corpus, employing pre-trained transformer models to learn the mapping from erroneous to correct text pairs, effectively correcting OCR errors. 2024.icon-1.33 @@ -377,7 +377,7 @@ We Care: Multimodal Depression Detection and Knowledge Infused Mental Health Therapeutic Response Generation PalashMoon - PushpakBhattacharyya + PushpakBhattacharyya 296–310 The detection of depression through non-verbal cues has gained significant attention. Previous research predominantly centred on identifying depression within the confines of controlled laboratory environments, often with the supervision of psychologists or counsellors. Unfortunately, datasets generated in such controlled settings may struggle to account for individual behaviours in real-life situations. In response to this limitation, we present the Extended D-vlog dataset, encompassing a collection of 1,261 YouTube vlogs. Additionally, the emergence of large language models (LLMs) like GPT3.5, and GPT4 has sparked interest in their potential that LLMs can act like mental health professionals. Yet, the readiness of these LLM models to be used in real-life settings is still a concern as they can give wrong responses that can harm the users. We introduce a virtual agent serving as an initial contact for mental health patients, offering Cognitive Behavioral Therapy (CBT)-based responses. It comprises two core functions: 1. Identifying depression in individuals, and 2. Delivering CBT-based therapeutic responses. Our Mistral model achieved impressive scores of 70.1% and 30.9% for distortion assessment and classification, along with a Bert score of 88.7%. Moreover, utilizing the TVLT model on our Multimodal Extended D-vlog Dataset yielded outstanding results, with an impressive F1-score of 67.8% 2024.icon-1.35 @@ -465,7 +465,7 @@ Natural Answer Generation: From Factoid Answer to Full-length Answer using Grammar Correction ManasJain SriparnaSaha - PushpakBhattacharyya + PushpakBhattacharyya GladvinChinnadurai ManishVatsa 376–385 @@ -537,8 +537,8 @@ <fixed-case>S</fixed-case>ans<fixed-case>GPT</fixed-case>: Advancing Generative Pre-Training in <fixed-case>S</fixed-case>anskrit Rhugved PankajChaudhari BhaktiJadhav - PushpakBhattacharyya - MalharKulkarni + PushpakBhattacharyya + MalharKulkarni 432–441 In the past decade, significant progress has been made in digitizing Sanskrit texts and advancing computational analysis of the language. However, efforts to advance NLP for complex semantic downstream tasks like Semantic Analogy Prediction, Named Entity Recognition, and others remain limited. This gap is mainly due to the absence of a robust, pre-trained Sanskrit model built on large-scale Sanskrit text data since this demands considerable computational resources and data preparation. In this paper, we introduce SansGPT, a generative pre-trained model that has been trained on a large corpus of Sanskrit texts and is designed to facilitate fine-tuning and development for downstream NLP tasks. We aim for this model to serve as a catalyst for advancing NLP research in Sanskrit. Additionally, we developed a custom tokenizer specifically optimized for Sanskrit text, enabling effective tokenization of compound words and making it better suited for generative tasks. Our data collection and cleaning process encompassed a wide array of available Sanskrit literature, ensuring comprehensive representation for training. We further demonstrate the model’s efficacy by fine-tuning it on Semantic Analogy Prediction and Simile Element Extraction, achieving an impressive accuracy of approximately 95.8% and 92.8%, respectively. 2024.icon-1.50 @@ -668,7 +668,7 @@ ShrikrishnaR. Parab Jayram UlhasGawas Shilpa NeenadDesai - JyotiPawar + JyotiPawar 531–536 Konkani WordNet, also called Konkani Shabdamalem, was created as part of the Indradhanush WordNet Project Consortium between August 2010 and October 2013. Currently, the Konkani WordNet includes about 32,370 synsets and 37,719 unique words. There is a need to enhance the Konkani WordNet both quantitatively as well as qualitatively. In this paper we are presenting a Game-Based Crowdsourcing approach adopted by us to add audio feature to the Konkani WordNet which has resulted in an increase in the number of users using and getting exposed to the capabilities of the Konkani WordNet to aid in the Konkani language teaching-learning process as well as for creation of resources to initiate further research. Our work presented here has resulted in the creation of an audio corpus of 37,719 unique words which we have named as ‘Shabdocchar’ within a short time span of four months covering five dialects of Konkani. We are confident that Shabdocchar will prove to be a very useful resource to support future research work on Dialects of Konkani and support voice-based search of words in the wordnet. This approach can be adopted to enhance other wordnets as well. 2024.icon-1.62 @@ -702,7 +702,7 @@ Pratik DeelipKorkankar AlvynAbranches PradnyaBhagat - JyotiPawar + JyotiPawar 562–568 In the era of online shopping, the volume of product reviews for user products on e-commerce platforms is massively increasing on a daily basis. For any given user product, it consists of a flood of reviews and manually analysing each of these reviews to understand the important aspects or opinions associated with the products is difficult and time-consuming task. Furthermore, it becomes nearly impossible for the customer to make decision of buying the product or not. Thus, it becomes necessary to have an aspect-based summary generated from these user reviews, which can act as a guide for the interested buyer in decision-making. Recently, the use of Large Language Models (LLMs) has shown great potential for solving diverse Natural Language Processing (NLP) tasks, including the task of summarization. Our paper explores the use of various LLMs such as Llama3, GPT-4o, Gemma2, Mistral, Mixtral and Qwen2 on the publicly available domain-specific Amazon reviews dataset as a part of our experimentation work. Our study postulates an algorithm to accurately identify product aspects and the model’s ability to extract relevant information and generate concise summaries. Further, we analyzed the experimental results of each of these LLMs with summary evaluation metrics such as Rouge, Meteor, BERTScore F1 and GPT-4o to evaluate the quality of the generated aspect-based summary. Our study highlights the strengths and limitations of each of these LLMs, thereby giving valuable insights for guiding researchers in harnessing LLMs for generating aspect-based summaries of user products present on these online shopping platforms. 2024.icon-1.65 @@ -715,7 +715,7 @@ PradnyaBhagat AlvynAbranches Pratik DeelipKorkankar - JyotiPawar + JyotiPawar 569–575 Sentiment Analysis plays a crucial role in understanding user opinions in various languages. The paper presents an experiment with a sentiment analysis model fine-tuned on Marathi sentences to classify sentiments into positive, negative, and neutral categories. The fine-tuned model shows high accuracy when tested on Konkani sentences, despite not being explicitly trained on Konkani data; since Marathi is a language very close to Konkani. This outcome highlights the effectiveness of Zero-shot learning, where the model generalizes well across linguistically similar languages. Evaluation metrics such as accuracy, balanced accuracy, negative accuracy, neutral accuracy, positive accuracy and confusion matrix scores were used to assess the performance, with Konkani sentences demonstrating superior results. These findings indicate that zero-shot sentiment analysis can be a powerful tool for sentiment classification in resource poor languages like Konkani, where labeled data is limited. The method can be used to generate datasets for resource-poor languages. Furthermore, this suggests that leveraging linguistically similar languages can help generate datasets for low-resource languages, enhancing sentiment analysis capabilities where labeled data is scarce. By utilizing related languages, zero-shot models can achieve meaningful performance without the need for extensive labeled data for the target language. 2024.icon-1.66 @@ -774,7 +774,7 @@ Automatic Summarization of Long Documents NamanChhibbar - JugalKalita + JugalKalita 607–615 A vast amount of textual data is added to the internet daily, making utilization and interpretation of such data difficult and cumbersome. As a result, automatic text summarization is crucial for extracting relevant information, saving precious reading time. Although many transformer-based models excel in summarization, they are constrained by their input size, preventing them from processing texts longer than their context size. This study introduces three novel algorithms that allow any LLM to efficiently overcome its input size limitation, effectively utilizing its full potential without any architectural modifications. We test our algorithms on texts with more than 70,000 words, and our experiments show a significant increase in BERTScore with competitive ROUGE scores. 2024.icon-1.72 @@ -814,7 +814,7 @@ Proceedings of the 21st International Conference on Natural Language Processing (ICON): Shared Task on Decoding Fake Narratives in Spreading Hateful Stories (Faux-Hate) ShankarBiradar - Kasu Sai KartheekReddy + Kasu Sai KartheekReddy SunilSaumya Md. ShadAkhtar NLP Association of India (NLPAI) diff --git a/data/xml/2024.inlg.xml b/data/xml/2024.inlg.xml index 3f5f6516a9..b447d832a3 100644 --- a/data/xml/2024.inlg.xml +++ b/data/xml/2024.inlg.xml @@ -4,7 +4,7 @@ Proceedings of the 17th International Natural Language Generation Conference SaadMahamood - Nguyen LeMinh + Nguyen LeMinh DaphneIppolito Association for Computational Linguistics
Tokyo, Japan
@@ -33,7 +33,7 @@ Noisy Pairing and Partial Supervision for Stylized Opinion Summarization HayateIso XiaolanWang - YoshiSuhara + YoshiSuhara 13–23 Opinion summarization research has primarily focused on generating summaries reflecting important opinions from customer reviews without paying much attention to the writing style. In this paper, we propose the stylized opinion summarization task, which aims to generate a summary of customer reviews in the desired (e.g., professional) writing style. To tackle the difficulty in collecting customer and professional review pairs, we develop a non-parallel training framework, Noisy Pairing and Partial Supervision (NAPA), which trains a stylized opinion summarization system from non-parallel customer and professional review sets. We create a benchmark ProSum by collecting customer and professional reviews from Yelp and Michelin. Experimental results on ProSum and FewSum demonstrate that our non-parallel training framework consistently improves both automatic and human evaluations, successfully building a stylized opinion summarization model that can generate professionally-written summaries from customer reviews. The code is available at https://github.com/megagonlabs/napa 2024.inlg-main.2 @@ -85,7 +85,7 @@
Generating from <fixed-case>AMR</fixed-case>s into High and Low-Resource Languages using Phylogenetic Knowledge and Hierarchical <fixed-case>QL</fixed-case>o<fixed-case>RA</fixed-case> Training (<fixed-case>HQL</fixed-case>) - WilliamSoto Martinez + WilliamSoto Martinez YannickParmentier ClaireGardent 70–81 @@ -111,8 +111,8 @@ SameenMaruf IngridZukerman XuelinSitu - CecileParis - GholamrezaHaffari + CecileParis + GholamrezaHaffari 103–120 In this paper, we generate and compare three types of explanations of Machine Learning (ML) predictions: simple, conservative and unifying. Simple explanations are concise, conservative explanations address the surprisingness of a prediction, and unifying explanations convey the extent to which an ML model’s predictions are applicable. The results of our user study show that (1) conservative and unifying explanations are liked equally and considered largely equivalent in terms of completeness, helpfulness for understanding the AI, and enticement to act, and both are deemed better than simple explanations; and (2)users’ views about explanations are influenced by the (dis)agreement between the ML model’s predictions and users’ estimations of these predictions, and by the inclusion/omission of features users expect to see in explanations. 2024.inlg-main.9 @@ -239,7 +239,7 @@ Exploring the impact of data representation on neural data-to-text generation - David M.Howcroft + David M.Howcroft Lewis N.Watson OlesiaNedopas DimitraGkatzia @@ -265,7 +265,7 @@ (Mostly) Automatic Experiment Execution for Human Evaluations of <fixed-case>NLP</fixed-case> Systems CraigThomson - AnyaBelz + AnyaBelz 272–279 Human evaluation is widely considered the most reliable form of evaluation in NLP, but recent research has shown it to be riddled with mistakes, often as a result of manual execution of tasks. This paper argues that such mistakes could be avoided if we were to automate, as much as is practical, the process of performing experiments for human evaluation of NLP systems. We provide a simple methodology that can improve both the transparency and reproducibility of experiments. We show how the sequence of component processes of a human evaluation can be defined in advance, facilitating full or partial automation, detailed preregistration of the process, and research transparency and repeatability. 2024.inlg-main.22 @@ -300,7 +300,7 @@ ShotaKoyama RyoNagata HiroyaTakamura - NaoakiOkazaki + NaoakiOkazaki 303–313 M2 and its variants are the most widely used automatic evaluation metrics for grammatical error correction (GEC), which calculate an F-score using a phrase-based alignment between sentences. However, it is not straightforward at all to align learner sentences containing errors to their correct sentences. In addition, alignment calculations are computationally expensive. We propose GREEN, an alignment-free F-score for GEC evaluation. GREEN treats a sentence as a multiset of n-grams and extracts edits between sentences by set operations instead of computing an alignment. Our experiments confirm that GREEN performs better than existing methods for the corpus-level metrics and comparably for the sentence-level metrics even without computing an alignment. GREEN is available at https://github.com/shotakoyama/green. 2024.inlg-main.25 @@ -323,7 +323,7 @@ Pipeline Neural Data-to-text with Large Language Models Chinonso CynthiaOsuji BrianTimoney - ThiagoCastro Ferreira + ThiagoCastro Ferreira BrianDavis 320–329 Previous studies have highlighted the advantages of pipeline neural architectures over end-to-end models, particularly in reducing text hallucination. In this study, we extend prior research by integrating pretrained language models (PLMs) into a pipeline framework, using both fine-tuning and prompting methods. Our findings show that fine-tuned PLMs consistently generate high quality text, especially within end-to-end architectures and at intermediate stages of the pipeline across various domains. These models also outperform prompt-based ones on automatic evaluation metrics but lag in human evaluations. Compared to the standard five-stage pipeline architecture, a streamlined three-stage pipeline, which only include ordering, structuring, and surface realization, achieves superior performance in fluency and semantic adequacy according to the human evaluation. @@ -345,8 +345,8 @@ Resilience through Scene Context in Visual Referring Expression Generation - SimeonJunker - SinaZarrieß + SimeonJunker + SinaZarrieß 344–357 Scene context is well known to facilitate humans’ perception of visible objects. In this paper, we investigate the role of context in Referring Expression Generation (REG) for objects in images, where existing research has often focused on distractor contexts that exert pressure on the generator. We take a new perspective on scene context in REG and hypothesize that contextual information can be conceived of as a resource that makes REG models more resilient and facilitates the generation of object descriptions, and object types in particular. We train and test Transformer-based REG models with target representations that have been artificially obscured with noise to varying degrees. We evaluate how properties of the models’ visual context affect their processing and performance. Our results show that even simple scene contexts make models surprisingly resilient to perturbations, to the extent that they can identify referent types even when visual information about the target is completely missing. 2024.inlg-main.29 @@ -444,7 +444,7 @@ ai<fixed-case>X</fixed-case>plain <fixed-case>SDK</fixed-case>: A High-Level and Standardized Toolkit for <fixed-case>AI</fixed-case> Assets ShreyasSharma LucasPavanelli - ThiagoCastro Ferreira + ThiagoCastro Ferreira MohamedAl-Badrashiny HassanSawaf 446–452 @@ -490,10 +490,10 @@ Multilingual Text Style Transfer: Datasets & Models for <fixed-case>I</fixed-case>ndian Languages SourabrataMukherjee - Atul Kr.Ojha + Atul Kr.Ojha AkankshaBansal DeepakAlok - John P.McCrae + John P.McCrae OndrejDusek 494–522 Text style transfer (TST) involves altering the linguistic style of a text while preserving its style-independent content. This paper focuses on sentiment transfer, a popular TST subtask, across a spectrum of Indian languages: Hindi, Magahi, Malayalam, Marathi, Punjabi, Odia, Telugu, and Urdu, expanding upon previous work on English-Bangla sentiment transfer. We introduce dedicated datasets of 1,000 positive and 1,000 negative style-parallel sentences for each of these eight languages. We then evaluate the performance of various benchmark models categorized into parallel, non-parallel, cross-lingual, and shared learning approaches, including the Llama2 and GPT-3.5 large language models (LLMs). Our experiments highlight the significance of parallel data in TST and demonstrate the effectiveness of the Masked Style Filling (MSF) approach in non-parallel techniques. Moreover, cross-lingual and joint multilingual learning methods show promise, offering insights into selecting optimal models tailored to the specific language and task requirements. To the best of our knowledge, this work represents the first comprehensive exploration of the TST task as sentiment transfer across a diverse set of languages. @@ -504,7 +504,7 @@ Are Large Language Models Actually Good at Text Style Transfer? SourabrataMukherjee - Atul Kr.Ojha + Atul Kr.Ojha OndrejDusek 523–539 We analyze the performance of large language models (LLMs) on Text Style Transfer (TST), specifically focusing on sentiment transfer and text detoxification across three languages: English, Hindi, and Bengali. Text Style Transfer involves modifying the linguistic style of a text while preserving its core content. We evaluate the capabilities of pre-trained LLMs using zero-shot and few-shot prompting as well as parameter-efficient finetuning on publicly available datasets. Our evaluation using automatic metrics, GPT-4 and human evaluations reveals that while some prompted LLMs perform well in English, their performance in on other languages (Hindi, Bengali) remains average. However, finetuning significantly improves results compared to zero-shot and few-shot prompting, making them comparable to previous state-of-the-art. This underscores the necessity of dedicated datasets and specialized models for effective TST. @@ -535,7 +535,7 @@ OndrejDusek AlbertGatt DimitraGkatzia - David M.Howcroft + David M.Howcroft OndrejPlatek AdarsaSivaprasad 557–583 @@ -562,7 +562,7 @@ SoichiroMurakami PeinanZhang HiroyaTakamura - ManabuOkumura + ManabuOkumura 597–608 Ad text generation is vital for automatic advertising in various fields through search engine advertising (SEA) to avoid the cost problem caused by laborious human efforts for creating ad texts. Even though ad creators create the landing page (LP) for advertising and we can expect its quality, conventional approaches with reinforcement learning (RL) mostly focus on advertising keywords rather than LP information. This work investigates and shows the effective usage of LP information as a reward in RL-based ad text generation through automatic and human evaluations. Our analysis of the actually generated ad text shows that LP information can be a crucial reward by appropriately scaling its value range to improve ad text generation performance. 2024.inlg-main.46 @@ -572,7 +572,7 @@ Differences in Semantic Errors Made by Different Types of Data-to-text Systems RudaliHuidrom - AnyaBelz + AnyaBelz MichelaLorandi 609–621 In this paper, we investigate how different semantic, or content-related, errors made by different types of data-to-text systems differ in terms of number and type. In total, we examine 15 systems: three rule-based and 12 neural systems including two large language models without training or fine-tuning. All systems were tested on the English WebNLG dataset version 3.0. We use a semantic error taxonomy and the brat annotation tool to obtain word-span error annotations on a sample of system outputs. The annotations enable us to establish how many semantic errors different (types of) systems make and what specific types of errors they make, and thus to get an overall understanding of semantic strengths and weaknesses among various types of NLG systems. Among our main findings, we observe that symbolic (rule and template-based) systems make fewer semantic errors overall, non-LLM neural systems have better fluency and data coverage, but make more semantic errors, while LLM-based systems require improvement particularly in addressing superfluous. @@ -608,7 +608,7 @@ Generating Faithful and Salient Text from Multimodal Data TahsinaHashem WeiqingWang - Derry TantiWijaya + Derry TantiWijaya Mohammed EunusAli Yuan-FangLi 646–662 @@ -620,7 +620,7 @@ Investigating Paraphrase Generation as a Data Augmentation Strategy for Low-Resource <fixed-case>AMR</fixed-case>-to-Text Generation - Marco AntonioSobrevilla Cabezudo + Marco AntonioSobrevilla Cabezudo Marcio LimaInacio Thiago Alexandre SalgueiroPardo 663–675 @@ -659,7 +659,7 @@ Proceedings of the 17th International Natural Language Generation Conference: System Demonstrations SaadMahamood - Nguyen LeMinh + Nguyen LeMinh DaphneIppolito Association for Computational Linguistics
Tokyo, Japan
@@ -715,7 +715,7 @@
<fixed-case>QCET</fixed-case>: An Interactive Taxonomy of Quality Criteria for Comparable and Repeatable Evaluation of <fixed-case>NLP</fixed-case> Systems - AnyaBelz + AnyaBelz SimonMille CraigThomson RudaliHuidrom @@ -748,7 +748,7 @@ RudaliHuidrom MohammedSabry AmyO’Riordan - AnyaBelz + AnyaBelz 16–19 Wikipedia is known to have systematic gaps in its coverage that correspond to under-resourced languages as well as underrepresented groups. This paper presents a new tool to support efforts to fill in these gaps by automatically generating draft articles and facilitating post-editing and uploading to Wikipedia. A rule-based generator and an input-constrained LLM are used to generate two alternative articles, enabling the often more fluent, but error-prone, LLM-generated article to be content-checked against the more reliable, but less fluent, rule-generated article. 2024.inlg-demos.6 @@ -765,7 +765,7 @@ 2024 2024.inlg-tutorials inlg - AnyaBelz + AnyaBelz JoãoSedoc CraigThomson SimonMille @@ -795,7 +795,7 @@ Proceedings of the 17th International Natural Language Generation Conference: Generation Challenges SimonMille - Miruna-AdrianaClinciu + Miruna-AdrianaClinciu Association for Computational Linguistics
Tokyo, Japan
September @@ -815,7 +815,7 @@ ChrisPalaguachi YangZhou SumaBhat - ChengXiangZhai + ChengXiangZhai 1–16 Given the practical applications of analogies, recent work has studied analogy generation to explain concepts. However, not all generated analogies are of high quality and it is unclear how to measure the quality of this new kind of generated text. To address this challenge, we propose a shared task on automatically evaluating the quality of generated analogies based on seven comprehensive criteria. For this, we will set up a leader board based on our dataset annotated with manual ratings along the seven criteria, and provide a baseline solution leveraging GPT-4. We hope that this task would advance the progress in development of new evaluation metrics and methods for analogy generation in natural language, particularly for education. 2024.inlg-genchal.1 @@ -843,7 +843,7 @@ Summary of the Visually Grounded Story Generation Challenge XudongHong - AsadSayeed + AsadSayeed VeraDemberg 39–46 Recent advancements in vision-and-language models have opened new possibilities for natural language generation, particularly in generating creative stories from visual input. We thus host an open-sourced shared task, Visually Grounded Story Generation (VGSG), to explore whether these models can create coherent, diverse, and visually grounded narratives. This task challenges participants to generate coherent stories based on sequences of images, where characters and events must be grounded in the images provided. The task is structured into two tracks: the Closed track with constraints on fixed visual features and the Open track which allows all kinds of models. We propose the first two-stage model using GPT-4o as the baseline for the Open track that first generates descriptions for the images and then creates a story based on those descriptions. Human and automatic evaluations indicate that: 1) Retrieval augmentation helps generate more human-like stories, and 2) Largescale pre-trained LLM improves story quality by a large margin; 3) Traditional automatic metrics can not capture the overall quality. @@ -878,7 +878,7 @@ <fixed-case>D</fixed-case>ip<fixed-case>I</fixed-case>nfo-<fixed-case>U</fixed-case>ni<fixed-case>T</fixed-case>o at the <fixed-case>GEM</fixed-case>’24 Data-to-Text Task: Augmenting <fixed-case>LLM</fixed-case>s with the Split-Generate-Aggregate Pipeline MichaelOliverio Pier FeliceBalestrucci - AlessandroMazzei + AlessandroMazzei ValerioBasile 59–65 This paper describes the DipInfo-UniTo system participating to the GEM shared task 2024. We participate only to the Data-to-Text (D2T) task. The DipInfo-UniTo system is based on Mistral (Jiang et al., 2023), a recent Large Language Model (LLM). Most LLMs are capable of generating high-quality text for D2T tasks but, crucially, they often fall short in terms of adequacy, and sometimes exhibit “hallucinations”. To mitigate this issue, we have implemented a generation pipeline that combines LLMs with techniques from the traditional Natural Language Generation (NLG) pipeline. In particular, we have a three step process SGA, consisting in (1) Splitting the original set of triples, (2) Generating verbalizations from the resulting split data units, (3) Aggregating the verbalizations produced in the previous step. @@ -891,7 +891,7 @@ Chinonso CynthiaOsuji RudaliHuidrom Kolawole JohnAdebayo - ThiagoCastro Ferreira + ThiagoCastro Ferreira BrianDavis 66–75 In this paper, we present our approach to the GEM Shared Task at the INLG’24 Generation Challenges, which focuses on generating data-to-text in multiple languages, including low-resource languages, from WebNLG triples. We employ a combination of end-to-end and pipeline neural architectures for English text generation. To extend our methodology to Hindi, Korean, Arabic, and Swahili, we leverage a neural machine translation model. Our results demonstrate that our approach achieves competitive performance in the given task. @@ -902,7 +902,7 @@ <fixed-case>DCU</fixed-case>-<fixed-case>NLG</fixed-case>-<fixed-case>PBN</fixed-case> at the <fixed-case>GEM</fixed-case>’24 Data-to-Text Task: Open-Source <fixed-case>LLM</fixed-case> <fixed-case>PEFT</fixed-case>-Tuning for Effective Data-to-Text Generation MichelaLorandi - AnyaBelz + AnyaBelz 76–83 LLMs have been used in various tasks with impressive success, including data-to-text generation. However, one concern when LLMs are compared to alternative methods is data contamination, in other words, for many datasets the data used in training these models may have included publicly available test sets. In this paper, we explore the performance of LLMs using newly constructed datasets in the context of data-to-text generation for English, Chinese, German, Russian, Spanish, Korean, Hindi, Swahili, and Arabic. We performed a testing phase to evaluate a range of prompt types and a fine-tuning technique on Mistral 7B and Falcon 40B. We then fully evaluated the most promising system for each scenario: (i) LLM prompting in English followed by translation, and (ii) LLM PEFT-tuning in English followed by translation. We find that fine-tuning Mistral outperforms all other tested systems and achieves performance close to GPT-3.5. The few-shot prompting with a dynamic selection of examples achieves higher results among prompting. The human evaluation to be carried out by the shared-task organisers will provide insight into the performance of the new datasets. In conclusion, we observed how the fine-tuning of an open-source LLM can achieve good performance close to state-of-the-art closed-source LLM while using considerably fewer resources. 2024.inlg-genchal.8 @@ -913,7 +913,7 @@ <fixed-case>DCU</fixed-case>-<fixed-case>NLG</fixed-case>-Small at the <fixed-case>GEM</fixed-case>’24 Data-to-Text Task: Rule-based generation and post-processing with T5-Base SimonMille MohammedSabry - AnyaBelz + AnyaBelz 84–91 Our submission to the GEM data-to-text shared task aims to assess the quality of texts produced by the combination of a rule-based system with a language model of reduced size, by first using a rule-based generator to convert input triples into semantically correct English text, and then a language model to paraphrase these texts to make them more fluent. The texts are translated to languages other than English with the NLLB machine translation system. 2024.inlg-genchal.9 @@ -936,7 +936,7 @@ AshleyLewis Yi-ChienLin TomirisKaumenova - MichaelWhite + MichaelWhite 100–111 This paper details experiments conducted for completing the GEM 2024 Data-to-Text task for a WebNLG dataset (Gardent et al., 2017). We show that model performance varies greatly across English, Spanish, Chinese, and Russian. Data filtering was done with automatic model judgments via error detection, which performs differently per language. We report English and Spanish dev set results for a data filtering and knowledge distillation approach to generating natural language outputs for sets of triples across a variety of domains. Specifically, we compare three generation conditions: 1) few-shot prompting with ChatGPT (GPT4), 2) fine-tuning LLama2 on the unfiltered dataset, and 3) fine-tuning Llama2 on a filtered version of the dataset. Russian and Chinese efforts did not result in submissions due to inconsistent or incoherent translations being produced in either the data synthesis or final generation stages. We provide details on these shortcomings but largely focus on Spanish and English efforts that align with our task submissions. We ultimately submitted outputs in English and Spanish that were generated using a version of Llama2 fine-tuned on a filtered dataset. 2024.inlg-genchal.11 diff --git a/data/xml/2024.insights.xml b/data/xml/2024.insights.xml index f379582b84..2045c44acc 100644 --- a/data/xml/2024.insights.xml +++ b/data/xml/2024.insights.xml @@ -26,7 +26,7 @@ HaotianYeLMU Munich YihongLiuLMU Munich ChunlanMaLMU Munich - HinrichSchützeCenter for Information and Language Processing, University of Munich + HinrichSchützeCenter for Information and Language Processing, University of Munich 1-7 Transformer-based pre-trained language models (PLMs) have achieved remarkable performance in various natural language processing (NLP) tasks. However, pre-training such models can take considerable resources that are almost only available to high-resource languages. On the contrary, static word embeddings are easier to train in terms of computing resources and the amount of data required. In this paper, we introduce MoSECroT (Model Stitching with Static Word Embeddings for Crosslingual Zero-shot Transfer, a novel and challenging task that is especially relevant to low-resource languages for which static word embeddings are available. To tackle the task, we present the first framework that leverages relative representations to construct a common space for the embeddings of a source language PLM and the static word embeddings of a target language. In this way, we can train the PLM on source-language training data and perform zero-shot transfer to the target language by simply swapping the embedding layer. However, through extensive experiments on two classification datasets, we show that although our proposed framework is competitive with weak baselines when addressing MoSECroT, it fails to achieve competitive results compared with some strong baselines. In this paper, we attempt to explain this negative result and provide several thoughts on possible improvement. 2024.insights-1.1 @@ -104,7 +104,7 @@ TatsuyaHiraokaFujitsu Limited (Fujitsu Research) RicoSennrichUniversity of Zurich YuvalPinterBen-Gurion University of the Negev - NaoakiOkazakiTokyo Institute of Technology + NaoakiOkazakiTokyo Institute of Technology 48-50 We explore threshold vocabulary trimming in Byte-Pair Encoding subword tokenization, a tokenization postprocessing step that replaces rare subwords with their component subwords. The technique is available in popular tokenization libraries but has not been subjected to rigorous scientific scrutiny. While the removal of rare subwords is suggested as best practice in model implementations, both as a means to reduce model size and for improving model performance through robustness, our experiments indicate that, across a large space of hyperparameter settings, vocabulary trimming fails to consistently improve model performance, and is even prone to incurring heavy degradation. 2024.insights-1.7 @@ -148,7 +148,7 @@ BrianDavisDublin City University FabioCozmanUniversidade de Sao Paulo AdrianaPaganoFederal University of Minas Gerais - ThiagoCastro FerreiraFederal University of Minas Gerais + ThiagoCastro FerreiraFederal University of Minas Gerais 73-81 Neural end-to-end surface realizers output more fluent texts than classical architectures. However, they tend to suffer from adequacy problems, in particular hallucinations in numerical referring expression generation. This poses a problem to language generation in sensitive domains, as is the case of robot journalism covering COVID-19 and Amazon deforestation. We propose an approach whereby numerical referring expressions are converted from digits to plain word form descriptions prior to being fed to state-of-the-art Large Language Models. We conduct automatic and human evaluations to report the best strategy to numerical superficial realization. Code and data are publicly available. 2024.insights-1.10 @@ -170,7 +170,7 @@ Can probing classifiers reveal the learning by contact center large language models?: No, it doesn’t! VarunNathanObserve.AI - AyushKumarObserve.AI + AyushKumarObserve.AI DigvijayIngleObserve.AI, India 92-100 Fine-tuning large language models (LLMs) with domain-specific instruction dataset has emerged as an effective method to enhance their domain-specific understanding. Yet, there is limited work that examines the core characteristics acquired during this process. In this study, we benchmark the fundamental characteristics learned by contact-center (CC) domain specific instruction fine-tuned LLMs with out-of-the-box (OOB) LLMs via probing tasks encompassing conversational, channel, and automatic speech recognition (ASR) properties. We explore different LLM architectures (Flan-T5 and Llama) and sizes (3B, 7B, 11B, 13B). Our findings reveal remarkable effectiveness of CC-LLMs on the in-domain downstream tasks, with improvement in response acceptability by over 48% compared to OOB-LLMs. However, we observe that the performance of probing classifiers are relatively similar and does not reflect the performance of in-domain downstream tasks. A similar observation is also noted on SentEval dataset that assess capabilities of models in terms of surface, syntactic, and semantic information through probing tasks. Our study challenges the premise that probing classifiers can reveal the fundamental characteristics learned by large language models and is reflective of the downstream task performance, via a case-study of LLMs tuned for contact center domain. @@ -218,7 +218,7 @@ The Paradox of Preference: A Study on <fixed-case>LLM</fixed-case> Alignment Algorithms and Data Acquisition Methods RishikeshDevanathanObserve.AI VarunNathanObserve.AI - AyushKumarObserve.AI + AyushKumarObserve.AI 135-147 This research investigates the impact of preference annotation acquisition methods on the performance of LLM alignment algorithms, including Direct Preference Optimization (DPO), Identity Preference Optimization (IPO), and Conservative DPO (cDPO), compared to Supervised Fine-Tuning (SFT) in NLP tasks. We analyze the influence of LLM and human-based preferences on algorithm performance, considering data volume and quality. Additionally, we assess DPO’s vulnerability to overfitting and IPO’s resilience against it, addressing four main research questions. Using the GAIR dataset and Zephyr-7b as the SFT model, we reveal unexpected negative outcomes. Specifically, DPO trained on LLM preferences outperforms human preferences, contrary to expectations. Moreover, there’s no correlation between preference data volume or quality and algorithm performance. Contrary to expectations, DPO shows no overfitting in both human and LLM preference datasets. Surprisingly, cDPO doesn’t fare better than DPO under flip noise. Our findings highlight the complexities of preference annotation methods and underscore the importance of scrutinizing negative results in NLP algorithm research. 2024.insights-1.16 @@ -241,7 +241,7 @@ Multi-Task Learning with Adapters for Plausibility Prediction: Bridging the Gap or Falling into the Trenches? AnneroseEichelUniversity of Stuttgart - SabineSchulte Im WaldeUniversity of Stuttgart + SabineSchulte Im WaldeUniversity of Stuttgart 154-168 We present a multi-task learning approach to predicting semantic plausibility by leveraging 50+ adapters categorized into 17 tasks within an efficient training framework. Across four plausibility datasets in English of varying size and linguistic constructions, we compare how models provided with knowledge from a range of NLP tasks perform in contrast to models without external information. Our results show that plausibility prediction benefits from complementary knowledge (e.g., provided by syntactic tasks) are significant but non-substantial, while performance may be hurt when injecting knowledge from an unsuitable task. Similarly important, we find that knowledge transfer may be hindered by class imbalance, and demonstrate the positive yet minor effect of balancing training data, even at the expense of size. 2024.insights-1.18 diff --git a/data/xml/2024.isa.xml b/data/xml/2024.isa.xml index 2f7ee4201d..908e54f58d 100644 --- a/data/xml/2024.isa.xml +++ b/data/xml/2024.isa.xml @@ -3,12 +3,12 @@ Proceedings of the 20th Joint ACL - ISO Workshop on Interoperable Semantic Annotation @ LREC-COLING 2024 - HarryBunt - NancyIde + HarryBunt + NancyIde KiyongLee VolhaPetukhova - JamesPustejovsky - LaurentRomary + JamesPustejovsky + LaurentRomary ELRA and ICCL
Torino, Italia
May @@ -33,8 +33,8 @@ <fixed-case>MSNER</fixed-case>: A Multilingual Speech Dataset for Named Entity Recognition QuentinMeeus - Marie-FrancineMoens - HugoVan hamme + Marie-FrancineMoens + HugoVan hamme 8–16 While extensively explored in text-based tasks, Named Entity Recognition (NER) remains largely neglected in spoken language understanding. Existing resources are limited to a single, English-only dataset. This paper addresses this gap by introducing MSNER, a freely available, multilingual speech corpus annotated with named entities. It provides annotations to the VoxPopuli dataset in four languages (Dutch, French, German, and Spanish). We have also releasing an efficient annotation tool that leverages automatic pre-annotations for faster manual refinement. This results in 590 and 15 hours of silver-annotated speech for training and validation, alongside a 17-hour, manually-annotated evaluation set. We further provide an analysis comparing silver and gold annotations. Finally, we present baseline NER models to stimulate further research on this newly available dataset. 2024.isa-1.2 @@ -87,7 +87,7 @@ Shallow Discourse Parsing on <fixed-case>T</fixed-case>witter Conversations - BerfinAktas + BerfinAktas BurakÖzmen 60–65 We present our PDTB-style annotations on conversational Twitter data, which was initially annotated by Scheffler et al. (2019). We introduced 1,043 new annotations to the dataset, nearly doubling the number of previously annotated discourse relations. Subsequently, we applied a neural Shallow Discourse Parsing (SDP) model to the resulting corpus, improving its performance through retraining with in-domain data. The most substantial improvement was observed in the sense identification task (+19%). Our experiments with diverse training data combinations underline the potential benefits of exploring various data combinations in domain adaptation efforts for SDP. To the best of our knowledge, this is the first application of Shallow Discourse Parsing on Twitter data @@ -98,9 +98,9 @@ Search tool for An Event-Type Ontology NataliiaPetliak Cristina FernandézAlcaina - EvaFučíková - JanHajič - ZdeňkaUrešová + EvaFučíková + JanHajič + ZdeňkaUrešová 66–70 This short demo description paper presents a new tool designed for searching an event-type ontology with rich information, demonstrated on the SynSemClass ontology resource. The tool complements a web browser, created by the authors of the SynSemClass ontology previously. Due to the complexity of the resource, the search tool offers possibilities both for a linguistically-oriented researcher as well as for teams working with the resource from a technical point of view, such as building role labeling tools, automatic annotation tools, etc. 2024.isa-1.9 @@ -110,7 +110,7 @@ Tiny But Mighty: A Crowdsourced Benchmark Dataset for Triple Extraction from Unstructured Text MuhammadSalman ArminHaller - Sergio J.Rodriguez Mendez + Sergio J.Rodriguez Mendez UsmanNaseem 71–81 In the context of Natural Language Processing (NLP) and Semantic Web applications, constructing Knowledge Graphs (KGs) from unstructured text plays a vital role. Several techniques have been developed for KG construction from text, but the lack of standardized datasets hinders the evaluation of triple extraction methods. The evaluation of existing KG construction approaches is based on structured data or manual investigations. To overcome this limitation, this work introduces a novel dataset specifically designed to evaluate KG construction techniques from unstructured text. Our dataset consists of a diverse collection of compound and complex sentences meticulously annotated by human annotators with potential triples (subject, verb, object). The annotations underwent further scrutiny by expert ontologists to ensure accuracy and consistency. For evaluation purposes, the proposed F-measure criterion offers a robust approach to quantify the relatedness and assess the alignment between extracted triples and the ground-truth triples, providing a valuable tool for evaluating the performance of triple extraction systems. By providing a diverse collection of high-quality triples, our proposed benchmark dataset offers a comprehensive training and evaluation set for refining the performance of state-of-the-art language models on a triple extraction task. Furthermore, this dataset encompasses various KG-related tasks, such as named entity recognition, relation extraction, and entity linking. @@ -120,7 +120,7 @@ Less is Enough: Less-Resourced Multilingual <fixed-case>AMR</fixed-case> Parsing BramVanroy - TimVan de Cruys + TimVan de Cruys 82–92 This paper investigates the efficacy of multilingual models for the task of text-to-AMR parsing, focusing on English, Spanish, and Dutch. We train and evaluate models under various configurations, including monolingual and multilingual settings, both in full and reduced data scenarios. Our empirical results reveal that while monolingual models exhibit superior performance, multilingual models are competitive across all languages, offering a more resource-efficient alternative for training and deployment. Crucially, our findings demonstrate that AMR parsing benefits from transfer learning across languages even when having access to significantly smaller datasets. As a tangible contribution, we provide text-to-AMR parsing models for the aforementioned languages as well as multilingual variants, and make available the large corpora of translated data for Dutch, Spanish (and Irish) that we used for training them in order to foster AMR research in non-English languages. Additionally, we open-source the training code and offer an interactive interface for parsing AMR graphs from text. 2024.isa-1.11 @@ -129,7 +129,7 @@ <fixed-case>M</fixed-case>o<fixed-case>CCA</fixed-case>: A Model of Comparative Concepts for Aligning Constructicons ArthurLorenzi - PeterLjunglöf + PeterLjunglöf BenLyngfelt TiagoTimponi Torrent WilliamCroft @@ -184,7 +184,7 @@ Annotating Evaluative Language: Challenges and Solutions in Applying Appraisal Theory JiameiZeng MinDong - Alex ChengyuFang + Alex ChengyuFang 144–151 This article describes a corpus-based experiment to identify the challenges and solutions in the annotation of evaluative language according to the scheme defined in Appraisal Theory (Martin and White, 2005). Originating from systemic functional linguistics, Appraisal Theory provides a robust framework for the analysis of linguistic expressions of evaluation, stance, and interpersonal relationships. Despite its theoretical richness, the practical application of Appraisal Theory in text annotation presents significant challenges, chiefly due to the intricacies of identifying and classifying evaluative expressions within its sub-system of Attitude, which comprises Affect, Judgement, and Appreciation. This study examines these challenges through the annotation of a corpus of editorials related to the Russian-Ukraine conflict and aims to offer practical solutions to enhance the transparency and consistency of the annotation. By refining the annotation process and addressing the subjective nature in the identification and classification of evaluative language, this work represents some timely effort in the annotation of pragmatic knowledge in language resources. 2024.isa-1.17 diff --git a/data/xml/2024.iscls.xml b/data/xml/2024.iscls.xml index 9cea47af12..96d9e7bff4 100644 --- a/data/xml/2024.iscls.xml +++ b/data/xml/2024.iscls.xml @@ -26,7 +26,7 @@ Context and <fixed-case>WSD</fixed-case>: Analysing <fixed-case>G</fixed-case>oogle <fixed-case>T</fixed-case>ranslate’s <fixed-case>S</fixed-case>anskrit to <fixed-case>E</fixed-case>nglish Output of Bhagavadgītā Verses for Word Meaning AnaghaPradeep RadhikaMamidi - PavankumarSatuluri + PavankumarSatuluri 14–26 2024.iscls-1.2 pradeep-etal-2024-context @@ -79,7 +79,7 @@ <fixed-case>START</fixed-case>: <fixed-case>S</fixed-case>anskrit Teaching; Annotation; and Research Tool – Bridging Tradition and Technology in Scholarly Exploration - AnilKumar + AnilKumar AmbaKulkarni NakkaShailaj 113–124 diff --git a/data/xml/2024.iwclul.xml b/data/xml/2024.iwclul.xml index 6e6841e523..cd3191a4ce 100644 --- a/data/xml/2024.iwclul.xml +++ b/data/xml/2024.iwclul.xml @@ -71,7 +71,7 @@ OlgaKolesnikovaInstituto Politécnico Nacional (IPN), Centro de Investigación en Computación (CIC), Mexico City, Mexico LilianaChanona HernandezInstituto Politécnico Nacional (IPN), Escuela Superior de Ingeniería Mecánica y Eléctrica (ESIME), Mexico City, Mexico GrigoriSidorovInstituto Politécnico Nacional (IPN), Centro de Investigación en Computación (CIC), Mexico City, Mexico - AlexanderGelbukhInstituto Politécnico Nacional (IPN), Centro de Investigación en Computación (CIC), Mexico City, Mexico + AlexanderGelbukhInstituto Politécnico Nacional (IPN), Centro de Investigación en Computación (CIC), Mexico City, Mexico 49-58 This article is dedicated to the study of multilingual approaches to sentiment analysis of texts in Finnish, Hungarian, and Bulgarian. For Finnish and Hungarian, which are characterized by complex morphology and agglutinative grammar, an analysis was conducted using both traditional rule-based methods and modern machine learning techniques. In the study, BERT, XLM-R, and mBERT models were used for sentiment analysis, demonstrating high accuracy in sentiment classification. The inclusion of Bulgarian was motivated by the opportunity to compare results across languages with varying degrees of morphological complexity, which allowed for a better understanding of how these models can adapt to different linguistic structures. Datasets such as the Hungarian Emotion Corpus, FinnSentiment, and SentiFi were used to evaluate model performance. The results showed that transformer-based models, particularly BERT, XLM-R, and mBERT, significantly outperformed traditional methods, achieving high accuracy in sentiment classification tasks for all the languages studied. 2024.iwclul-1.6 diff --git a/data/xml/2024.iwslt.xml b/data/xml/2024.iwslt.xml index 4384df4c78..ae189c0a0c 100644 --- a/data/xml/2024.iwslt.xml +++ b/data/xml/2024.iwslt.xml @@ -21,7 +21,7 @@ <fixed-case>FINDINGS</fixed-case> <fixed-case>OF</fixed-case> <fixed-case>THE</fixed-case> <fixed-case>IWSLT</fixed-case> 2024 <fixed-case>EVALUATION</fixed-case> <fixed-case>CAMPAIGN</fixed-case> Ibrahim SaidAhmadNortheastern U. AntoniosAnastasopoulosGMU - OndřejBojarCharles U. + OndřejBojarCharles U. ClaudiaBorgU. Malta MarineCarpuatUMD RoldanoCattoniFBK @@ -37,13 +37,13 @@ PrashantMathurAmazon EvgenyMatusovAppTek ChandreshMauryaIIT Indore - JohnMcCraeU. Galway + JohnMcCraeU. Galway KentonMurrayJHU SatoshiNakamuraNAIST - MatteoNegriFBK + MatteoNegriFBK JanNiehuesKIT XingNiuAmazon - Atul Kr.OjhaU. Galway + Atul Kr.OjhaU. Galway JohnOrtegaNortheastern SaraPapiFBK PeterPolákCharles U. @@ -55,10 +55,10 @@ JiatongShiCMU ClaytoneSikasoteU. Zambia MatthiasSperberApple - SebastianStükerZoom + SebastianStükerZoom KatsuhitoSudohNAIST BrianThompsonAmazon - AlexWaibelCMU + AlexWaibelCMU ShinjiWatanabeCMU PatrickWilkenAppTek PetrZemánekCharles U. @@ -182,7 +182,7 @@ <fixed-case>FBK</fixed-case>@<fixed-case>IWSLT</fixed-case> Test Suites Task: Gender Bias evaluation with <fixed-case>M</fixed-case>u<fixed-case>ST</fixed-case>-<fixed-case>SHE</fixed-case> BeatriceSavoldiFondazione Bruno Kessler MarcoGaidoFondazione Bruno Kessler, University of Trento - MatteoNegriFondazione Bruno Kessler + MatteoNegriFondazione Bruno Kessler LuisaBentivogliFondazione Bruno Kessler 65-71 This paper presents the FBK contribution to the IWSLT-2024 ‘Test suites’ shared subtask, part of the Offline Speech Translation Task. Our contribution consists of the MuST-SHE-IWSLT24 benchmark evaluation, designed to assess gender bias in speech translation. By focusing on the en-de language pair, we rely on a newly created test suite to investigate systems’ ability to correctly translate feminine and masculine gender. Our results indicate that – under realistic conditions – current ST systems achieve reasonable and comparable performance in correctly translating both feminine and masculine forms when contextual gender information is available. For ambiguous references to the speaker, however, we attest a consistent preference towards masculine gender, thus calling for future endeavours on the topic. Towards this goal we make MuST-SHE-IWSLT24 freely available at: https://mt.fbk.eu/must-she/ @@ -194,7 +194,7 @@ <fixed-case>S</fixed-case>imul<fixed-case>S</fixed-case>eamless: <fixed-case>FBK</fixed-case> at <fixed-case>IWSLT</fixed-case> 2024 Simultaneous Speech Translation SaraPapiFondazione Bruno Kessler MarcoGaidoFondazione Bruno Kessler, University of Trento - MatteoNegriFondazione Bruno Kessler + MatteoNegriFondazione Bruno Kessler LuisaBentivogliFondazione Bruno Kessler 72-79 This paper describes the FBK’s participation in the Simultaneous Translation Evaluation Campaign at IWSLT 2024. For this year’s submission in the speech-to-text translation (ST) sub-track, we propose SimulSeamless, which is realized by combining AlignAtt and SeamlessM4T in its medium configuration. The SeamlessM4T model is used ‘off-the-shelf’ and its simultaneous inference is enabled through the adoption of AlignAtt, a SimulST policy based on cross-attention that can be applied without any retraining or adaptation of the underlying model for the simultaneous task. We participated in all the Shared Task languages (English->German, Japanese, Chinese, and Czech->English), achieving acceptable or even better results compared to last year’s submissions. SimulSeamless, covering more than 143 source languages and 200 target languages, is released at: https://github.com/hlt-mt/FBK-fairseq/. @@ -223,7 +223,7 @@ MauroCettoloFondazione Bruno Kessler RoldanoCattoniFBK AndreaPiergentiliFondazione Bruno Kessler, University of Trento - MatteoNegriFondazione Bruno Kessler + MatteoNegriFondazione Bruno Kessler LuisaBentivogliFondazione Bruno Kessler 86-96 The paper describes the FBK submissions to the Subtitling track of the 2024 IWSLT Evaluation Campaign, which covers both the Automatic Subtitling and the Subtitle Compression task for two language pairs: English to German (en-de) and English to Spanish (en-es). For the Automatic Subtitling task, we submitted two systems: i) a direct model, trained in constrained conditions, that produces the SRT files from the audio without intermediate outputs (e.g., transcripts), and ii) a cascade solution that integrates only free-to-use components, either taken off-the-shelf or developed in-house. Results show that, on both language pairs, our direct model outperforms both cascade and direct systems trained in constrained conditions in last year’s edition of the campaign, while our cascade solution is competitive with the best 2023 runs. For the Subtitle Compression task, our primary submission involved prompting a Large Language Model (LLM) in zero-shot mode to shorten subtitles that exceed the reading speed limit of 21 characters per second. Our results highlight the challenges inherent in shrinking out-of-context sentence fragments that are automatically generated and potentially error-prone, underscoring the need for future studies to develop targeted solutions. @@ -312,7 +312,7 @@ HaoranXuJohns Hopkins University HenryLi XinyuanJohns Hopkins University AnkurKejriwalJohns Hopkins University - SanjeevKhudanpurJohns Hopkins University + SanjeevKhudanpurJohns Hopkins University KentonMurrayJohns Hopkins University PaulMcNameeJohns Hopkins University 140-153 @@ -399,10 +399,10 @@ Blending <fixed-case>LLM</fixed-case>s into Cascaded Speech Translation: <fixed-case>KIT</fixed-case>’s Offline Speech Translation System for <fixed-case>IWSLT</fixed-case> 2024 SaiKoneruKarlsruhe Institute of Technology ThaiBinh NguyenKarlsruhe Institute of Technology - Ngoc-QuanPhamKarlsruhe Institute of Technology + Ngoc-QuanPhamKarlsruhe Institute of Technology DanniLiuKarlsruhe Institute of Technology ZhaolinLiKarlsruhe Institute of Technology - AlexanderWaibelCarnegie Mellon + AlexanderWaibelCarnegie Mellon JanNiehuesKarlsruhe Institut of Technology 183-191 Large Language Models (LLMs) are currently under exploration for various tasks, including Automatic Speech Recognition (ASR), Machine Translation (MT), and even End-to-End Speech Translation (ST). In this paper, we present KIT’s offline submission in the constrained + LLM track by incorporating recently proposed techniques that can be added to any cascaded speech translation. Specifically, we integrate Mistral-7B into our system to enhance it in two ways. Firstly, we refine the ASR outputs by utilizing the N-best lists generated by our system and fine-tuning the LLM to predict the transcript accurately. Secondly, we refine the MT outputs at the document level by fine-tuning the LLM, leveraging both ASR and MT predictions to improve translation quality. We find that integrating the LLM into the ASR and MT systems results in an absolute improvement of 0.3% in Word Error Rate and 0.65% in COMET for tst2019 test set. In challenging test sets with overlapping speakers and background noise, we find that integrating LLM is not beneficial due to poor ASR performance. Here, we use ASR with chunked long-form decoding to improve context usage that may be unavailable when transcribing with Voice Activity Detection segmentation alone. @@ -417,7 +417,7 @@ AndréBeyerCrowdee AbdelMessaoudiVocapia RabeaAffanMr. - ClaudeBarrasVocapia Research + ClaudeBarrasVocapia Research MaximTychonovLingea ltd. Jean-LucGauvainCNRS/LIMSI 192-202 @@ -445,7 +445,7 @@ CarlosMullovKarlsruhe Institute of Technology TuAnh DinhKarlsruhe Institute of Technology SaiKoneruKarlsruhe Institute of Technology - AlexanderWaibelCarnegie Mellon + AlexanderWaibelCarnegie Mellon JanNiehuesKarlsruhe Institut of Technology 221-228 This paper presents KIT’s submissions to the IWSLT 2024 dialectal and low-resource track. In this work, we build systems for translating into English from speech in Maltese, Bemba, and two Arabic dialects Tunisian and North Levantine. Under the unconstrained condition, we leverage the pre-trained multilingual models by fine-tuning them for the target language pairs to address data scarcity problems in this track. We build cascaded and end-to-end speech translation systems for different language pairs and show the cascaded system brings slightly better overall performance. Besides, we find utilizing additional data resources boosts speech recognition performance but slightly harms machine translation performance in cascaded systems. Lastly, we show that Minimum Bayes Risk is effective in improving speech translation performance by combining the cascaded and end-to-end systems, bringing a consistent improvement of around 1 BLUE point. @@ -550,8 +550,8 @@ MaximilianAwiszusZoom Video Communications JanNiehuesKarlsruhe Institute of Technology MarcoTurchiZoom Video Communications - SebastianStükerZoom Video Communications - AlexWaibelCarnegie Mellon University + SebastianStükerZoom Video Communications + AlexWaibelCarnegie Mellon University 291-297 Generating rare words is a challenging task for natural language processing in general and in speech translation (ST) specifically. This paper introduces a test suite prepared for the Offline ST shared task at IWSLT. In the test suite, corresponding rare words (i.e. named entities) were annotated on TED-Talks for English and German and the English side was made available to the participants together with some distractors (irrelevant named entities). Our evaluation checks the capabilities of ST systems to leverage the information in the contextual list of named entities and improve translation quality. Systems are ranked based on the recall and precision of named entities (separately on person, location, and organization names) in the translated texts. Our evaluation shows that using contextual information improves translation quality as well as the recall and precision of NEs. The recall of organization names in all submissions is the lowest of all categories with a maximum of 87.5 % confirming the difficulties of ST systems in dealing with names. 2024.iwslt-1.35 diff --git a/data/xml/2024.jeptalnrecital.xml b/data/xml/2024.jeptalnrecital.xml index d7742b51ec..da1f413bf3 100644 --- a/data/xml/2024.jeptalnrecital.xml +++ b/data/xml/2024.jeptalnrecital.xml @@ -107,9 +107,9 @@ Étude en temps réel de la fusion des /a/ ~ /ɑ/ en français depuis 1925 JuliuszCęcelewski - CédricGendrot - MartineAdda-Decker - PhilippeBoula de Mareüil + CédricGendrot + MartineAdda-Decker + PhilippeBoula de Mareüil 71–81 Cette étude explore la variation diachronique de la réalisation des voyelles /a/ ~ /ɑ/ du français en position finale de mot dans la parole déclamatoire/journalistique de 1925 à 2023. Nos données comprennent deux corpus préexistants – le corpus d’archives INA (1940–1997) et le corpus ESTER (2000–2004) – ainsi que deux nouveaux corpus composés d’enregistrements issus des Archives de la Parole d’Hubert Pernot (1925–1929), de Radio France et de YouTube (2020–2023).Nos résultats indiquent une postériorisation du /a/ vers une position plus centrale et, dans une moindre mesure, une antériorisations du /ɑ/, qui ont abouti à la neutralisation et la fusion acoustique des deux phonèmes au cours du XXe siècle. Les résultats sont discutés à la lumière de l’évolution globale du système des voyelles à double timbre en français. 2024.jeptalnrecital-jep.8 @@ -132,8 +132,8 @@ Identification du locuteur : ouvrir la boîte noire CaroleMillot - CédricGendrot - Jean-FrançoisBonastre + CédricGendrot + Jean-FrançoisBonastre 92–101 L’explicabilité des systèmes relevant du deep learning est devenue un enjeu central ces dernières années, dans le droit européen comme le domaine criminalistique. L’approche BA-LR introduit en identification du locuteur un nouveau paradigme de modélisation : elle fait émerger automatiquement les attributs partagés par un groupe de locuteurs et qui sous-entendent la discrimination de ceux-ci. Le score produit est décomposable au niveau des attributs, ce qui augmente significativement l’explicabilité de la méthode. Cette étude propose de compléter la caractérisation des attributs obtenus par le BA-LR, à l’aide de paramètres de qualité de voix. L’analyse suggère que plusieurs attributs utilisent les types de phonation pour regrouper les locuteurs, ceux-ci encodant des informations humainement perceptibles. Cet article pose ainsi des bases pour l’analyse acoustique des attributs, qui permettra à terme d’utiliser le BA-LR dans le cadre du profilage vocal. 2024.jeptalnrecital-jep.10 @@ -158,7 +158,7 @@ MaximeFily GuillaumeWisniewski SéverineGuillaume - GillesAdda + GillesAdda AlexisMichaud 112–121 Nous explorons les représentations vectorielles de la parole à partir d’un modèle pré-entraîné pour déterminer leur niveau d’abstraction par rapport au signal audio. Nous proposons une nouvelle méthode non-supervisée exploitant des données audio ayant des métadonnées soigneusement organisées pour apporter un éclairage sur les informations présentes dans les représentations. Des tests ABX déterminent si les représentations obtenues via un modèle de parole multilingue encodent une caractéristique donnée. Trois expériences sont présentées, portant sur la qualité acoustique de la pièce, le type de discours, ou le contenu phonétique. Les résultats confirment que les différences au niveau de caractéristiques linguistiques/extra-linguistiques d’enregistrements audio sont reflétées dans les représentations de ceux-ci. Plus la quantité d’audio par vecteur est importante, mieux elle permet de distinguer les caractéristiques extra-linguistiques. Plus elle est faible, et mieux nous pouvons distinguer les informations d’ordre phonétique/segmental. La méthode proposée ouvre de nouvelles pistes pour la recherche et les travaux comparatifs sur les langues peu dotées. @@ -207,7 +207,7 @@ Réductions temporelles en français parlé : Où peut-on trouver les zones de réduction ? YaruWu KimGerdes - MartineAdda-Decker + MartineAdda-Decker 153–162 Cet article examine la réduction dans la parole continue en français, ainsi que les différents facteurs qui contribuent au phénomène, tels que le style de parole, le débit de parole, la catégorie de mots, la position du phone dans le mot et la position du mot dans les groupes syntaxiques. L’étude utilise trois corpus de parole continue en français, couvrant la parole formelle, la parole moins formelle et la parole familière. La méthode utilisée comprend l’alignement forcé et l’étiquetage automatique des zones de réduction. Les résultats suggèrent que la réduction de la parole est présente dans tous les styles de parole, mais moins fréquente dans la parole formelle, et que la réduction est plus susceptible d’être observée dans les énoncés de parole avec un taux de parole élevé. La position médiane des mots ou des groupes syntaxiques tend à favoriser la réduction. 2024.jeptalnrecital-jep.16 @@ -282,7 +282,7 @@ Utilisation de wav2vec 2.0 pour des tâches de classifications phonétiques : aspects méthodologiques LilaKim - CedricGendrot + CedricGendrot 219–229 L’apprentissage auto-supervisé, particulièrement dans le contexte de la parole, a démontré son efficacité dans diverses tâches telles que la reconnaissance du locuteur et la reconnaissance de la parole. Notre question de recherche se concentre sur l’efficacité des représentations vectorielles - extraites de phonèmes - plus courtes par rapport à des séquences plus longues dans la détection de la nasalité. Deux approches distinctes ont été étudiées : extraire des vecteurs sur la durée du phonème et prendre des séquences plus longues avec une seconde ajoutée de chaque côté du phonème, puis récupérer la partie centrale a posteriori. Les résultats révèlent que les modèles réagissent différemment selon les phones et les locuteurs, avec une variabilité observée à ces niveaux. Le modèle à séquences longues surpasse le modèle à séquences courtes en assurant une corrélation plus robuste avec le débit d’air nasal. 2024.jeptalnrecital-jep.23 @@ -312,7 +312,7 @@ Analyse Factorielle de signaux sonores : développement d’une méthode automatique de détermination des frontières optimales entre canaux de fréquence AgnieszkaDuniec - ElisabethDelais-Roussarie + ElisabethDelais-Roussarie OlivierCrouzet 252–260 Des études récentes supportent l’hypothèse d’une relation entre les propriétés statistiques des signaux de parole et les mécanismes perceptifs : les gammes de fréquence présentant une corrélation dans leurs modulations d’amplitude pourraient être associées à des frontières spectrales relativement stables envisagées comme optimales sur le plan perceptif. Cependant, des limites afférentes à ces études antérieures ressortent : (1) elles se fondent pour la plupart sur des critères subjectifs à travers l’observation visuelle des courbes de résultats statistiques, et (2) elles n’envisagent pas que les résultats puissent varier en fonction des échantillons de données sélectionnés, de la nature des signaux utilisés, ou de la taille des échantillons. Même si cette position peut être argumentée en lien avec l’approche du codage efficace, cet aspect afférent au degré de variation potentiel nécessite d’être évalué. Nous avons mis en place une méthode de détermination automatique des frontières qui permet de répliquer les travaux antérieurs en introduisant une évaluation expérimentale de ces limites et discutons de quelques résultats préliminaires en comparaison avec les études précédentes. @@ -347,7 +347,7 @@ Comparaison de mesures pour la détection automatique de déviance dans la dysarthrie ataxique NatachaMiniconi - CédricGendrot + CédricGendrot AngélinaBourbon LeonardoLancia CécileFougeron @@ -362,7 +362,7 @@ JingyiSun YaruWu NicolasAudibert - MartineAdda-Decker + MartineAdda-Decker 291–300 La technologie ASR excelle dans la transcription précise des discours lus préparés, mais elle rencontre encore des défis lorsqu’il s’agit de conversations spontanées. Cela est en partie dû au fait que ces dernières relèvent d’un registre de langage informel, avec disfluences et réductions de parole. Afin de mieux comprendre les différences de production en fonction des styles de parole, nous présentons la création d’un corpus de parole conversationnelle, dont des extraits sont ensuite lus par leurs auteurs. Le corpus comprend 36 heures de parole en chinois mandarin avec leur transcription, réparties entre conversations spontanées et lecture. Nous avons utilisé WHISPER pour la transcription automatique de la parole et le Montreal Forced Aligner pour l’alignement forcé, résultant dans un corpus de parole transcrit avec annotations multi-niveaux incluant phonèmes, caractères/syllabes et mots. De telles productions de parole parallèles (en modes spontané et lu) seront particulièrement intéressantes pour l’étude des réductions temporelles. 2024.jeptalnrecital-jep.30 @@ -411,7 +411,7 @@ Effets du shadowing et de l’imitation en tant que méthodes d’entraînement à la prononciation du /ɥi/ en français WenxunFu - MartineAdda-Decker + MartineAdda-Decker BarbaraKühnert 332–341 Trente étudiantes mandarines apprenant le français ont participé à un entraînement autonome de quatre semaines, utilisant l’imitation tardive et le shadowing (répétition immédiate). Cette étude se concentre sur le résultat de la réalisation du /i/ dans /ɥi/, souvent réalisé proche du /y/. Les posttests montrent des améliorations dans la perception et la production de /ɥi/. Pour les apprenants de niveaux intermédiaires ayant pratiqué le shadowing, la distinction entre le troisième formant (F3) et le deuxième formant (F2) du /i/ dans /ɥi/ est significativement plus élevé après l’entraînement, indiquant une meilleure distinction avec /y/. Le shadowing semble efficace dans l’amélioration de la perception chez les débutants et apprenants intermédiaires, mais uniquement dans la production pour les niveaux intermédiaires. Nous suggérons que le shadowing, en tant que méthode hautement cognitive et active, puisse servir d’alternative à la méthode d’imitation, sous réserve que la compétence linguistique des apprenants leur permette d’accomplir la tâche avec succès. @@ -555,7 +555,7 @@ La sonorité n’est pas l’intensité: le cas des diphtongues dans une langue tonale YunzhuoXiang JiayinGao - CédricGendrot + CédricGendrot 441–450 Cette étude explore le lien entre la sonorité et l’intensité dans la production des diphtongues ouvrantes et fermantes en mandarin de Pékin. Étant donné qu’une voyelle ouverte est considérée comme plus sonore qu’une voyelle fermée, nous nous attendons à constater une augmentation d’intensité dans une diphtongue ouvrante et une diminution d’intensité dans une diphtongue fermante. Or, nos résultats, basés sur les modèles GAMM (modèles additifs généralisés à l’effet mixte) révèlent un pattern différent de nos attentes : la dynamique d’intensité au sein de la diphtongue n’est pas liée à l’aperture vocalique. En revanche, conformément aux études précédentes, nous trouvons une corrélation positive entre la F0 et l’intensité. Nous interrogeons ainsi sur la validité de définir la sonorité à base de l’intensité seule. Enfin, nous discutons du rôle de la F0 dans la définition de la sonorité et l’apport de notre étude pour modéliser la sonorité dans une langue tonale. 2024.jeptalnrecital-jep.45 @@ -592,9 +592,9 @@ Nouvelle tâche sémantique pour le corpus de compréhension de parole en français <fixed-case>MEDIA</fixed-case> NadègeAlavoine GaëlleLaperrière - ChristopheServan + ChristopheServan SaharGhannay - SophieRosset + SophieRosset 470–480 La détection d’intention et de concepts sont des tâches essentielles de la compréhension de la parole(SLU). Or il n’existe que peu de données annotées en français permettant d’effectuer ces deux tâches conjointement. Cependant, il existe des ensembles de données annotées en concept, dont le corpus MEDIA. Ce corpus est considéré comme l’un des plus difficiles. Néanmoins, il ne comporte que des annotations en concepts et pas en intentions. Dans cet article, nous proposons une version étendue de MEDIA annotée en intentions pour étendre son utilisation. Cet article présente une méthode semi-automatique pour obtenir cette version étendue. De plus, nous présentons les premiers résultats des expériences menées sur cet ensemble de données en utilisant des modèles joints pour la classification des intentions et la détection de concepts. 2024.jeptalnrecital-jep.48 @@ -633,10 +633,10 @@ IsabelleFerrané HervéBredin ThomasPellegrini - FarahBenamara + FarahBenamara JérômeBertrand Marie-FrançoiseBertrand - VéroniqueMoriceau + VéroniqueMoriceau JérômeFarinas 502–511 Dans cet article, nous présentons notre contribution à la tâche de classification des émotions dans la parole dans le cadre de notre participation à la campagne d’évaluation Odyssey 2024. Nous proposons un système hybride qui tire parti à la fois des informations du signal audio et des informations sémantiques issues des transcriptions automatiques. Les résultats montrent que l’ajout de l’information sémantique permet de dépasser les systèmes uniquement audio. @@ -647,7 +647,7 @@ Preuve de concept d’un système de génération automatique en Langue française Parlée Complétée BrigitteBigi - NuriaGala + NuriaGala 512–520 La Langue française Parlée Complétée (LfPC) est un système de communication développé pour les personnes sourdes afin de compléter la lecture labiale avec une main, au niveau phonétique. Il est utilisé par les enfants pour acquérir des compétences en lecture, en lecture labiale et en communication orale. L’objectif principal est de permettre aux enfants sourds de devenir des lecteurs et des locuteurs compétents en langue française. Nous proposons une preuve de concept (PoC) d’un système de réalité augmentée qui place automatiquement la représentation d’une main codeuse sur la vidéo pré-enregistrée d’un locuteur. Le PoC prédit la forme et la position de la main, le moment durant lequel elle doit être affichée, et ses coordonnées relativement au visage dans la vidéo. Des photos de mains sont ensuite juxtaposées à la vidéo. Des vidéos annotées automatiquement par le PoC ont été montrées à des personnes sourdes qui l’ont accueilli et évalué favorablement. 2024.jeptalnrecital-jep.52 @@ -806,7 +806,7 @@ Au-delà de la performance des modèles : la prédiction de liens peut-elle enrichir des graphes lexico-sémantiques du français ? Hee-SooChoi PriyanshTrivedi - MathieuConstant + MathieuConstant KarënFort BrunoGuillaume 36–49 @@ -820,7 +820,7 @@ ThomasGerald LouisTamames SofianeEttayeb - PatrickParoubek + PatrickParoubek AnneVilnat 50–63 Dans cet article nous présentons un nouveau corpus de question-réponse en français pour le domaine de l’éducation. Ce corpus à été construit dans le but de créer un système d’assistant virtuel pour répondre à des questions sur des documents ou du matériel de cours. Afin d’être utile autant aux enseignants qu’au étudiants, il est important de considérer des questions complexes ainsi que d’être capable de justifier les réponses sur du matériel validé. Nous présentons donc le nouveau Corpus CQuAE, un corpus de question-réponse manuellement annoté dont nous discutons des propriétés. Nous présenterons aussi les différentes étapes de sa création avec aujourd’hui une phase d’amélioration des données.Enfin, nous présentons plusieurs expériences pour évaluer l’exploitation du corpus dans le cadre d’un système de question-réponse automatique.Ces différentes analyses et expériences nous permettrons de valider l’adéquation des données collectés pour l’objectif visé. @@ -831,7 +831,7 @@ Évaluation automatique des biais de genre dans des modèles de langue auto-régressifs FannyDucel - AurélieNévéol + AurélieNévéol KarënFort 64–84 Nous proposons un outil pour mesurer automatiquement les biais de genre dans des textes générés par des grands modèles de langue dans des langues flexionnelles. Nous évaluons sept modèles à l’aide de 52 000 textes en français et 2 500 textes en italien, pour la rédaction de lettres de motivation. Notre outil s’appuie sur la détection de marqueurs morpho-syntaxiques de genre pour mettre au jour des biais. Ainsi, les modèles favorisent largement la génération de masculin : le genre masculin est deux fois plus présent que le féminin en français, et huit fois plus en italien. Les modèles étudiés exacerbent également des stéréotypes attestés en sociologie en associant les professions stéréotypiquement féminines aux textes au féminin, et les professions stéréotypiquement masculines aux textes au masculin. @@ -867,8 +867,8 @@ PierreLepagnol ThomasGerald SaharGhannay - ChristopheServan - SophieRosset + ChristopheServan + SophieRosset 113–129 Ce travail s’inscrit dans le débat sur l’efficacité des grands modèles de langue par rapport aux petits pour la classification de texte par amorçage (prompting). Nous évaluons ici le potentiel des petits modèles de langue dans la classification de texte sans exemples, remettant en question la prédominance des grands modèles. À travers un ensemble diversifié de jeux de données, notre étude compare les petits et les grands modèles utilisant différentes architectures et données de pré-entraînement. Nos conclusions révèlent que les petits modèles peuvent générer efficacement des étiquettes et, dans certains contextes, rivaliser ou surpasser les performances de leurs homologues plus grands. Ce travail souligne l’idée que le modèle le plus grand n’est pas toujours le meilleur, suggérant que les petits modèles économes en ressources peuvent offrir des solutions viables pour des défis spécifiques de classification de données 2024.jeptalnrecital-taln.8 @@ -901,7 +901,7 @@ Recherche de relation à partir d’un seul exemple fondée sur un modèle N-way K-shot : une histoire de distracteurs HugoThomas - GuillaumeGravier + GuillaumeGravier PascaleSébillot 157–168 La recherche de relation à partir d’un exemple consiste à trouver dans un corpus toutes les occurrences d’un type de relation liant deux entités dans une phrase, nommé type cible et caractérisé à l’aide d’un seul exemple. Nous empruntons le scénario d’entraînement et évaluation N-way K-shot à la tâche de classification de relations rares qui prédit le type de relation liant deux entités à partir de peu d’exemples d’entraînement, et l’adaptons à la recherche de relation avec un exemple. Lors de l’évaluation, un modèle entraîné pour la classification de relations en N-way K-shot est utilisé, dans lequel K vaut un pour le type cible, une des N classes (du N-way) représente le type cible, et les N-1 classes restantes sont des distracteurs modélisant la classe de rejet. Les résultats sur FewRel et TACREV démontrent l’efficacité de notre approche malgré la difficulté de la tâche. L’étude de l’évolution des performances en fonction du nombre de distracteurs et des stratégies de leur choix met en avant une bonne configuration globale, à savoir un nombre élevé de distracteurs à une distance intermédiaire du type de relation cible dans l’espace latent appris par le modèle. Le diagnostic a posteriori de notre méthode révèle l’existence de configurations optimales pour chaque type cible que nos analyses actuelles échouent à caractériser, ouvrant la voie à de futurs travaux. @@ -912,7 +912,7 @@ Reconnaissance d’entités cliniques en few-shot en trois langues MarcoNaguib - AurélieNévéol + AurélieNévéol XavierTannier 169–197 Les grands modèles de langage deviennent la solution de choix pour de nombreuses tâches de traitement du langage naturel, y compris dans des domaines spécialisés où leurs capacités few-shot devraient permettre d’obtenir des performances élevées dans des environnements à faibles ressources. Cependant, notre évaluation de 10 modèles auto-régressifs et 16 modèles masqués montre que, bien que les modèles auto-régressifs utilisant des prompts puissent rivaliser en termes de reconnaissance d’entités nommées (REN) en dehors du domaine clinique, ils sont dépassés dans le domaine clinique par des taggers biLSTM-CRF plus légers reposant sur des modèles masqués. De plus, les modèles masqués ont un bien moindre impact environnemental que les modèles auto-régressifs. Ces résultats, cohérents dans les trois langues étudiées, suggèrent que les modèles à apprentissage few-shot ne sont pas encore adaptés à la production de REN dans le domaine clinique, mais pourraient être utilisés pour accélérer la création de données annotées de qualité. @@ -924,7 +924,7 @@ Réduction des répétitions dans la Traduction Automatique Neuronale MarkoAvila AnnaRebollo - JosepCrego + JosepCrego 198–210 Actuellement, de nombreux systèmes TAL utilisent des décodeurs neuronaux pour la génération de textes, qui font preuve d’une capacité impressionnante à générer des textes approchant les niveaux de fluidité humaine. Toutefois, dans le cas des réseaux de traduction automatique, ils sont souvent confrontés à la production de contenu répétitif, également connu sous le nom de diction répétitive ou de répétition de mots, un aspect pour lequel ils n’ont pas été explicitement entraînés. Bien que cela ne soit pas intrinsèquement négatif, cette répétition peut rendre l’écriture monotone ou maladroite si elle n’est pas utilisée intentionnellement pour l’emphase ou des fins stylistiques. La répétition de mots a été traitée par des méthodes post-hoc pendant l’inférence, contraignant le réseau à examiner des hypothèses auxquelles le système avait initialement attribué une plus faible probabilité. Dans cet article, nous implémentons une méthode qui consiste à pénaliser les répétitions lors de l’apprentissage et qui s’inspire des principes du label smoothing. Conformément à cette méthode, nous modifions la distribution de la vérité terrain afin d’orienter le modèle de manière à décourager ces répétitions. Les résultats de nos expériences montrent que les méthodes proposées permettent de contrôler le problème de la répétition dans les moteurs neuronaux de traduction automatique sans compromis en termes d’efficacité ou de qualité des traductions. 2024.jeptalnrecital-taln.13 @@ -957,7 +957,7 @@ AdrienPupier MaximinCoavoux BenjaminLecouteux - JérômeGoulian + JérômeGoulian 234–244 Effectuer l’analyse syntaxique du signal audio –plutôt que de passer par des transcriptions de l’audio– est une tache récemment proposée par Pupier et al. (2022), dans le but d’incorporer de l’information prosodique dans le modèle d’analyse syntaxique et de passer outre les limitations d’une approche cascade qui consisterait à utiliser un système de reconnaissance de la parole (RAP) puis un analyseur syntaxique. Dans cet article, nous effectuons un ensemble d’expériences visant à comparer les performances de deux familles d’analyseurs syntaxiques (i) l’approche par graphe (ii) la réduction à une tâche d’étiquetage de séquence ; directement sur la parole. Nous évaluons notre approche sur un corpus arboré du Français parlé. Nous montrons que (i) l’approche par graphe obtient de meilleurs résultats globalement (ii) effectuer l’analyse syntaxique directement depuis la parole obtient de meilleurs résultats qu’une approche par cascade de systèmes, malgré 30 de paramètre en moins 2024.jeptalnrecital-taln.16 @@ -977,7 +977,7 @@ <fixed-case>W</fixed-case>iki<fixed-case>F</fixed-case>act<fixed-case>D</fixed-case>iff: Un Grand jeu de données Réaliste et Temporellement Adaptable pour la Mise à Jour Atomique des Connaissances Factuelles dans les Modèles de Langue Causaux HichemAmmar Khodja - FrédéricBéchet + FrédéricBéchet QuentinBrabant AlexisNasr GwénoléLecrové @@ -1037,8 +1037,8 @@ Approche multitâche pour l’amélioration de la fiabilité des systèmes de résumé automatique de conversation EuniceAkani - BenoitFavre - FredericBechet + BenoitFavre + FredericBechet RomainGemignani 338–351 Le résumé de dialogue consiste à générer un résumé bref et cohérent d’une conversation ou d’un dialogue entre deux ou plusieurs locuteurs. Même si les modèles de langue les plus récents ont permis des progrès remarquables dans ce domaine, générer un résumé fidèle au dialogue de départ reste un défi car cela nécessite de prendre en compte l’interaction entre les locuteurs pour conserver les informations les plus pertinentes du dialogue. Nous nous plaçons dans le cadre des dialogues humain-humain avec but. Ce cadre nous permet d’intégrer des informations relatives à la tâche dans le cadre du résumé de dialogue afin d’aider le système à générer des résumés plus fidèles sémantiquement. Nous évaluons dans cette étude des approches multitâches permettant de lier la tâche de résumé à des tâches de compréhension du langage comme la détection de motifs d’appels. Les informations liées à la tâche nous permettent également de proposer des nouvelles méthodes de sélection de résumés basées sur l’analyse sémantique du dialogue ainsi que des métriques d’évaluation basées également sur cette même analyse. Nous avons testé ces méthodes sur DECODA, un corpus français de dialogue collecté dans le centre d’appel de la RATP entre des usagers et des téléconseillers. Nous montrons que l’ajout d’informations liées à la tâche augmente la fiabilité des résumés générés. @@ -1069,7 +1069,7 @@ De nouvelles méthodes pour l’exploration de l’interface syntaxe-prosodie : un treebank intonosyntaxique et un système de synthèse pour le pidgin nigérian EmmettStrickland - AnneLacheret-Dujour + AnneLacheret-Dujour MarcEvrard SylvainKahane DanaAubakirova @@ -1086,9 +1086,9 @@ Étude des facteurs de complexité des modèles de langage dans une tâche de compréhension de lecture à l’aide d’une expérience contrôlée sémantiquement ElieAntoine - FredericBechet - GéraldineDamnati - PhilippeLanglais + FredericBechet + GéraldineDamnati + PhilippeLanglais 384–396 Cet article propose une méthodologie pour identifier les facteurs de complexité inhérents aux tâches de traitement automatique du langage (TAL), indépendamment de la dimension des modèles. Il montre que la performance inférieure de certains exemples est attribuable à des facteurs de complexités spécifiques. Plutôt que de procéder à des évaluations générales, nous préconisons des évaluations restreintes portant sur des tâches, des ensembles de données et des langues spécifiques, décrites de manière linguistique. Appliquée à une tâche de compréhension de texte via un corpus de questions-réponses, notre méthode met en évidence des facteurs de complexité sémantique affectant divers modèles de tailles et d’architectures différentes. En outre, nous proposons plusieurs corpus de complexité sémantique croissante dérivés de ces facteurs, avançant que l’optimisation de leur traitement dépasse la simple augmentation de la taille des modèles. 2024.jeptalnrecital-taln.27 @@ -1120,7 +1120,7 @@ ClémenceSebe SarahCohen-Boulakia OlivierFerret - AurélieNévéol + AurélieNévéol 422–434 Les chaînes de traitement d’analyses de données biologiques utilisées en bioinformatique sont une solution pour la portabilité et la reproductibilité des analyses. Ces chaînes figurent à la fois sous forme descriptive dans des articles scientifiques et/ou sous forme de codes dans des dépôts. L’identification de publications scientifiques décrivant de nouvelles chaînes de traitement et l’extraction de leurs informations sont des enjeux importants pour la communauté bioinformatique. Nous proposons ici d’étendre le corpus BioToFlow ayant trait aux articles décrivant des chaînes de traitement bioinformatiques et de l’utiliser pour entraîner et évaluer des modèles de reconnaissance d’entités nommées bioinformatiques. Ce travail est accompagné d’une discussion critique portant à la fois sur le processus d’annotation du corpus et sur les résultats de l’extraction d’entités. 2024.jeptalnrecital-taln.30 @@ -1133,7 +1133,7 @@ NicolasHiebel OlivierFerret KarënFort - AurélieNévéol + AurélieNévéol 435–448 La génération de texte ouvre des perspectives pour pallier l’absence de corpus librement partageables dans des domaines contraints par la confidentialité, comme le domaine médical. Dans cette étude, nous comparons les performances de modèles encodeurs-décodeurs et décodeurs seuls pour la génération conditionnée de cas cliniques en français. Nous affinons plusieurs modèles pré-entraînés pour chaque architecture sur des cas cliniques en français conditionnés par les informations démographiques des patient·es (sexe et âge) et des éléments cliniques.Nous observons que les modèles encodeur-décodeurs sont plus facilement contrôlables que les modèles décodeurs seuls, mais plus coûteux à entraîner. 2024.jeptalnrecital-taln.31 @@ -1183,7 +1183,7 @@ JérômeLouradour RoxaneBertrand KateThompson - LaurentPrévot + LaurentPrévot 508–529 We present the MEETING corpus, a dataset of roughly 95 hours of spontaneous meeting-style conversations in French. The corpus is designed to serve as a foundation for downstream tasks such as meeting summarization. In its current state, it offers 25 hours of manually corrected transcripts that are aligned with the audio signal, making it a valuable resource for evaluating ASR and speaker recognition systems. It also includes automatic transcripts and alignments of the whole corpus which can be used for downstream NLP tasks. The aim of this paper is to describe the conception, production and annotation of the corpus up to the transcription level as well as to provide statistics that shed light on the main linguistic features of the corpus. 2024.jeptalnrecital-taln.35 @@ -1218,7 +1218,7 @@ Optimisation des performances d’un système de reconnaissance automatique de la parole pour les commentaires sportifs: fine-tuning de Whisper CamilleLavigne AlexStasica - AnnaKupsc + AnnaKupsc 567–581 Malgré les performances élevées des systèmes automatiques de reconnaissance de la parole (Automatic Speech Recognition ; ASR) sur des corpus généraux, leur efficacité est considérablement réduite lorsqu’ils sont confrontés à des corpus spécialisés. Ces corpus peuvent notamment contenir du lexique propre à des domaines spécifiques, des accents ou du bruit de fond rendant la transcription ardue. Cette étude vise à évaluer les avantages de l’optimisation d’une transcription automatique, par opposition à manuelle, après fine-tuning d’un modèle d’ASR de dernière génération, Whisper (Radford et al., 2023), sur un corpus spécialisé de commentaires sportifs de petite taille. Nos analyses quantitatives et qualitatives indiquent que Whisper est capable d’apprendre les particularités d’un corpus de spécialité, atteignant des performances égales où supérieures aux transcripteurs humains, avec cette quantité de données limitée. Cette recherche met en lumière le rôle que l’intelligence artificielle, notamment les larges modèles de langage, peut jouer pour faciliter la création de corpus spécialisés. 2024.jeptalnrecital-taln.38 @@ -1228,7 +1228,7 @@ Optimiser le choix des exemples pour la traduction automatique augmentée par des mémoires de traduction MaximeBouthors - JosepCrego + JosepCrego FrançoisYvon 582–604 La traduction neuronale à partir d’exemples s’appuie sur l’exploitation d’une mémoire de traduction contenant des exemples similaires aux phrases à traduire. Ces exemples sont utilisés pour conditionner les prédictions d’un décodeur neuronal. Nous nous intéressons à l’amélioration du système qui effectue l’étape de recherche des phrases similaires, l’architecture du décodeur neuronal étant fixée et reposant ici sur un modèle explicite d’édition, le Transformeur multi-Levenshtein. Le problème considéré consiste à trouver un ensemble optimal d’exemples similaires, c’est-à-dire qui couvre maximalement la phrase source. En nous appuyant sur la théorie des fonctions sous-modulaires, nous explorons de nouveaux algorithmes pour optimiser cette couverture et évaluons les améliorations de performances auxquels ils mènent pour la tâche de traduction automatique. @@ -1252,7 +1252,7 @@ Prédiction de la complexité lexicale : Une étude comparative entre <fixed-case>C</fixed-case>hat<fixed-case>GPT</fixed-case> et un modèle dédié à cette tâche. AbdelhakKelious - MathieuConstant + MathieuConstant ChristopheCoeur 617–629 Cette étude s’intéresse à la prédiction de la complexité lexicale. Nous explorons des méthodesd’apprentissage profond afin d’évaluer la complexité d’un mot en se basant sur son contexte. Plusspécifiquement, nous examinons comment utiliser des modèles de langue pré-entraînés pour encoderle mot cible et son contexte, en les combinant avec des caractéristiques supplémentaires basées sur lafréquence. Notre approche obtient de meilleurs résultats que les meilleurs systèmes de SemEval-2021(Shardlow et al., 2021). Enfin, nous menons une étude comparative avec ChatGPT afin d’évaluer sonpotentiel pour prédire la complexité lexicale en comparaison avec un modèle dédié à cette tâche. @@ -1342,7 +1342,7 @@ FelixGrezes CyrilGrouin FabianSchüssler - PierreZweigenbaum + PierreZweigenbaum 720–733 Le manque de ressources annotées constitue un défi majeur pour le traitement automatique de la langue en astrophysique. Afin de combler cette lacune, nous présentons astroECR, une extension du corpus TDAC (Time-Domain Astrophysics Corpus). Notre corpus, constitué de 300 rapports d’observation en anglais, étend le schéma d’annotation initial de TDAC en introduisant cinq classes d’entités nommées supplémentaires spécifiques à l’astrophysique. Nous avons enrichi les annotations en incluant les coréférences, les relations sémantiques entre les objets célestes et leurs propriétés physiques, ainsi qu’en normalisant les noms d’objets célestes via des bases de données astronomiques. L’utilité de notre corpus est démontrée en fournissant des scores de référence à travers quatre tâches~: la reconnaissance d’entités nommées, la résolution de coréférences, la détection de relations, et la normalisation des noms d’objets célestes. Nous mettons à disposition le corpus ainsi que son guide d’annotation, les codes sources, et les modèles associés. 2024.jeptalnrecital-taln.48 @@ -1403,8 +1403,8 @@ CassandreArmand ChiaraMazzocconi ShreejataGupta - LaurentPrévot - BenoitFavre + LaurentPrévot + BenoitFavre LeonorBecerra-Bonache AbdellahFourtassi 4–5 @@ -1453,13 +1453,13 @@ MathildeAguiar FelixHerron MagaliNorré - Massih-RezaAmini - PierretteBouillon + Massih-RezaAmini + PierretteBouillon IrisEshkol Taravella EmmanuelleEsparança-Rodier ThomasFrançois LorraineGoeuriot - JérômeGoulian + JérômeGoulian MathieuLafourcade BenjaminLecouteux FrançoisPortet @@ -1494,7 +1494,7 @@ LouisEscouflaire AntoninDescampe AntoineVenant - CédrickFairon + CédrickFairon 12–13 Cet article s’intéresse à la capacité de transfert des modèles de classification de texte dans le domaine journalistique, en particulier pour distinguer les articles d’opinion des articles d’information. A l’ère du numérique et des réseaux sociaux, les distinctions entre ces genres deviennent de plus en plus floues, augmentant l’importance de cette tâche de classification. Un corpus de 80 000 articles de presse provenant de huit médias, quatre québécois et quatre belges francophones, a été constitué. Pour identifier les thèmes des articles, une clusterisation a été appliquée sur les 10 000 articles issus de chaque média, assurant une distribution équilibrée des thèmes entre les deux genres opinion et information. Les données ont ensuite été utilisées pour entraîner (ou peaufiner) et évaluer deux types de modèles : CamemBERT (Martin et al., 2019), un modèle neuronal pré-entraîné, et un modèle de régression logistique basé sur des traits textuels. Dix versions différentes de chaque modèle sont entraînées : 8 versions mono-médias’, chacune peaufinée sur l’ensemble d’entraînement du sous-corpus correspondant à un média, et deux versions multi-médias’, l’une peaufinée sur 8000 articles québécois, l’autre sur les articles belges. Les résultats montrent que les modèles CamemBERT surpassent significativement les modèlesstatistiques en termes de capacité de transfert (voir Figures 1 et 2). Les modèles CamemBERT montrent une plus grande exactitude, notamment sur les ensembles de test du même média que celui utilisé pour l’entraînement. Cependant, les modèles entraînés sur Le Journal de Montréal(JDM) sont particulièrement performants même sur d’autres ensembles de test, suggérant une distinction plus claire entre les genres journalistiques dans ce média. Les modèles CamemBERT multi-médias affichent également de bonnes performances. Le modèle québécois notamment obtient les meilleurs résultats en moyenne, indiquant qu’une diversité de sources améliore la généricité du modèle. Les modèles statistiques (mono- et multi-médias) montrent des performances globalement inférieures, avec des variations significatives selon les médias. Les textes québécois sont plus difficiles à classer pour ces modèles, suggérant des différences culturelles dans les pratiques journalistiques entre le Québec et la Belgique. L’analyse des traits révèle que l’importance de certains éléments textuels, comme les points d’exclamation et les marqueurs de temps relatifs, varient considérablement entre les modèles entraînés sur différents médias. Par exemple, les éditoriaux du JDM utilisent fréquemment des points d’exclamation, reflétant un style plus affirmé et polarisant. En revanche, les articles de La Presse présentent des particularités qui compliquent la généralisation de la tâche. En sommme, cette étude démontre la supériorité des modèles neuronaux comme CamemBERT pour la classification de textes journalistiques, notamment grâce à leur capacité de transfert, bien que les modèles basés sur des traits se distinguent par la transparence de leur raisonnement’. Elle met également en lumière des différences significatives entre les cultures journalistiques québécoises et belges. 2024.jeptalnrecital-trad.8 @@ -1532,9 +1532,9 @@ EmmanuelleEsperança-Rodier RomaneGallienne Carlos-EmilianoGonzález-Gallardo - JérômeGoulian + JérômeGoulian JoseG Moreno - AurélieNévéol + AurélieNévéol DidierSchwab VincentSegonne JohannaSimoens @@ -1658,7 +1658,7 @@ Actes du Défi Fouille de Textes@TALN 2024 RichardDufour - BenoitFavre + BenoitFavre MickaelRouvier AdrienBazoge YanisLabrak @@ -1691,7 +1691,7 @@ Charles-WilliamCummings AzurHandan EdithGaly - EricCharton + EricCharton 11–22 Ce papier décrit le travail de l’équipe du CRIM (Centre de recherche en Informatique de Montréal) dans le cadre du Défi Fouille de textes 2024. Nous présentons les expériences que nous avons menées dans le cadre de la tâche principale consistant à identifier automatiquement, pour une question donnée issue d’annales d’examens de pharmacie, l’ensemble des réponses correctes parmi les cinq proposées. La contrainte est d’utiliser un système de moins de 3 milliards de paramètres dont les données d’entraînement sont connues. Pour ce faire, nous avons testé des approches impliquant du few-shot prompting, du RAG, de l’affinage et de la génération contrainte en dernier recours. 2024.jeptalnrecital-deft.2 diff --git a/data/xml/2024.kallm.xml b/data/xml/2024.kallm.xml index 70c53048ff..c31207ab18 100644 --- a/data/xml/2024.kallm.xml +++ b/data/xml/2024.kallm.xml @@ -27,7 +27,7 @@ YeonSeonwoo SeunghyunYoonAdobe Research JamesThorneKAIST - AliceOhKorea Advanced Institute of Science and Technology + AliceOhKorea Advanced Institute of Science and Technology 1-11 Application of LLM to database queries on natural language sentences has demonstrated impressive results in both single and multi-hop scenarios.In the existing methodologies, the requirement to re-encode query vectors at each stage for processing multi-hop queries presents a significant bottleneck to the inference speed.This paper proposes VKGFR (Virtual Knowledge Graph based Fact Retriever) that leverages large language models to extract representations corresponding to a sentence’s knowledge graph, significantly enhancing inference speed for multi-hop reasoning without performance loss.Given that both the queries and natural language database sentences can be structured as a knowledge graph, we suggest extracting a Virtual Knowledge Graph (VKG) representation from sentences with LLM.Over the pre-constructed VKG, our VKGFR conducts retrieval with a tiny model structure, showing performance improvements with higher computational efficiency. We evaluate VKGFR on the WikiNLDB and MetaQA dataset, designed for multi-hop database reasoning over text. The results indicate 13x faster inference speed on the WikiNLDB dataset without performance loss. 2024.kallm-1.1 @@ -73,7 +73,7 @@ <fixed-case>KGAST</fixed-case>: From Knowledge Graphs to Annotated Synthetic Texts NakanysethVuth - GillesSérassetUniversité Grenoble Alpes + GillesSérassetUniversité Grenoble Alpes DidierSchwabUniversité Grenoble Alpes 43-55 In recent years, the use of synthetic data, either as a complement or a substitute for original data, has emerged as a solution to challenges such as data scarcity and security risks. This paper is an initial attempt to automatically generate such data for Information Extraction tasks. We accomplished this by developing a novel synthetic data generation framework called KGAST, which leverages Knowledge Graphs and Large Language Models. In our preliminary study, we conducted simple experiments to generate synthetic versions of two datasets—a French security defense dataset and an English general domain dataset, after which we evaluated them both intrinsically and extrinsically. The results indicated that synthetic data can effectively complement original data, improving the performance of models on classes with limited training samples. This highlights KGAST’s potential as a tool for generating synthetic data for Information Extraction tasks. @@ -127,7 +127,7 @@ <fixed-case>STAGE</fixed-case>: Simplified Text-Attributed Graph Embeddings using Pre-trained <fixed-case>LLM</fixed-case>s AaronZolnai-LucasQuantexa Ltd JackBoylanGeorgia Institute of Technology - ChrisHokampQuantexa + ChrisHokampQuantexa ParsaGhaffari 92-104 We present STAGE, a straightforward yet effective method for enhancing node features in Graph Neural Network (GNN) models that encode Text-Attributed Graphs (TAGs). Our approach leverages Large-Language Models (LLMs) to generate embeddings for textual attributes. STAGE achieves competitive results on various node classification benchmarks while also maintaining a simplicity in implementation relative to current state-of-the-art (SoTA) techniques. We show that utilizing pre-trained LLMs as embedding generators provides robust features for ensemble GNN training, enabling pipelines that are simpler than current SoTA approaches which require multiple expensive training and prompting stages. We also implement diffusion-pattern GNNs in an effort to make this pipeline scalable to graphs beyond academic benchmarks. diff --git a/data/xml/2024.kemt.xml b/data/xml/2024.kemt.xml index e897872282..ac2611ee80 100644 --- a/data/xml/2024.kemt.xml +++ b/data/xml/2024.kemt.xml @@ -4,8 +4,8 @@ Proceedings of the First International Workshop on Knowledge-Enhanced Machine Translation ArdaTezcan - Víctor M.Sánchez-Cartagena - MiquelEsplà-Gomis + Víctor M.Sánchez-Cartagena + MiquelEsplà-Gomis European Association for Machine Translation (EAMT)
Sheffield, United Kingdom
June @@ -32,7 +32,7 @@
Exploring Inline Lexicon Injection for Cross-Domain Transfer in Neural Machine Translation - Jesujoba O.Alabi + Jesujoba O.Alabi RachelBawden 7-20 Domain transfer remains a challenge in machine translation (MT), particularly concerning rare or unseen words. Amongst the strategies proposed to address the issue, one of the simplest and most promising in terms of generalisation capacity is coupling the MT system with external resources such as bilingual lexicons and appending inline annotations within source sentences. This method has been shown to work well for controlled language settings, but its usability for general language (and ambiguous) MT is less certain. In this article we explore this question further, testing the strategy in a multi-domain transfer setting for German-to-English MT, using the mT5 language model fine-tuned on parallel data. We analyse the MT outputs and design evaluation strategies to understand the behaviour of such models. Our analysis using distractor annotations suggests that although improvements are not systematic according to automatic metrics, the model does learn to select appropriate translation candidates and ignore irrelevant ones, thereby exhibiting more than a systematic copying behaviour. However, we also find that the method is less successful in a higher-resource setting with a larger lexicon, suggesting that it is not a magic solution, especially when the baseline model is already exposed to a wide range of vocabulary. diff --git a/data/xml/2024.knowledgenlp.xml b/data/xml/2024.knowledgenlp.xml index b587453e4c..cc4459804b 100644 --- a/data/xml/2024.knowledgenlp.xml +++ b/data/xml/2024.knowledgenlp.xml @@ -9,8 +9,8 @@ MengJiang ChenguangZhu HannanehHajishirzi - LukeZettlemoyer - ZhihanZhang + LukeZettlemoyer + ZhihanZhang Association for Computational Linguistics
Bangkok, Thailand
August @@ -27,8 +27,8 @@ <fixed-case>GAD</fixed-case>e<fixed-case>P</fixed-case>o: Graph-Assisted Declarative Pooling Transformers for Document-Level Relation Extraction AndreiComan ChristosTheodoropoulos - Marie-FrancineMoensKU Leuven, KU Leuven - JamesHendersonIdiap Research Institute + Marie-FrancineMoensKU Leuven, KU Leuven + JamesHendersonIdiap Research Institute 1-14 Document-level relation extraction typically relies on text-based encoders and hand-coded pooling heuristics to aggregate information learned by the encoder. In this paper, we leverage the intrinsic graph processing capabilities of the Transformer model and propose replacing hand-coded pooling methods with new tokens in the input, which are designed to aggregate information via explicit graph relations in the computation of attention weights. We introduce a joint text-graph Transformer model and a graph-assisted declarative pooling (GADePo) specification of the input, which provides explicit and high-level instructions for information aggregation. GADePo allows the pooling process to be guided by domain-specific knowledge or desired outcomes but still learned by the Transformer, leading to more flexible and customisable pooling strategies. We evaluate our method across diverse datasets and models and show that our approach yields promising results that are consistently better than those achieved by the hand-coded pooling functions. 2024.knowledgenlp-1.1 @@ -59,7 +59,7 @@ MarcusCollinsAmazon OlegRokhlenko EugeneAgichteinAmazon and Emory University - ShervinMalmasiAmazon + ShervinMalmasiAmazon 30-43 Continued improvement of conversational assistants in knowledge-rich domains like E-Commerce requires large volumes of realistic high-quality conversation data to power increasingly sophisticated large language model chatbots, dialogue managers, response rankers, and recommenders. The problem is exacerbated for multi-modal interactions in realistic conversational product search and recommendation. Here, an artificial sales agent must interact intelligently with a customer using both textual and visual information and incorporate results from external search systems, such as a product catalog. Yet, it remains an open question how to best crowd-source large-scale, naturalistic multi-modal dialogue and action data, required to train such an artificial agent. We describe our crowd-sourced task where one worker (the Buyer) plays the role of the customer, and another (the Seller) plays the role of the sales agent. We identify subtle interactions between one worker’s environment and their partner’s behavior mediated by workers’ word choice. We find that limiting information presented to the Buyer, both in their backstory and by the Seller, improves conversation quality. We also show how conversations are improved through minimal automated Seller “coaching”. While typed and spoken messages are slightly different, the differences are not as large as frequently assumed. We plan to release our platform code and the resulting dialogues to advance research on conversational search agents. 2024.knowledgenlp-1.3 @@ -98,7 +98,7 @@ SoyeongJeongKorea Advanced Institute of Science & Technology SukminCho SeungYoonHan - JongParkKorea Advanced Institute of Science and Technology + JongParkKorea Advanced Institute of Science and Technology 73-92 Recent advancements in Large Language Models (LLMs) have significantly improved their performance across various Natural Language Processing (NLP) tasks.However, LLMs still struggle with generating non-factual responses due to limitations in their parametric memory.Retrieval-Augmented Generation (RAG) systems address this issue by incorporating external knowledge with a retrieval module.Despite their successes, however, current RAG systems face challenges with retrieval failures and the limited ability of LLMs to filter out irrelevant information.Therefore, in this work, we propose DSLR (Document Refinement with Sentence-Level Re-ranking and Reconstruction), an unsupervised framework that decomposes retrieved documents into sentences, filters out irrelevant sentences, and reconstructs them again into coherent passages.We experimentally validate DSLR on multiple open-domain QA datasets and the results demonstrate that DSLR significantly enhances the RAG performance over conventional fixed-size passage.Furthermore, our DSLR enhances performance in specific, yet realistic scenarios without the need for additional training, providing an effective and efficient solution for refining retrieved documents in RAG systems. diff --git a/data/xml/2024.knowllm.xml b/data/xml/2024.knowllm.xml index 68df7a269d..bdcf60ce0b 100644 --- a/data/xml/2024.knowllm.xml +++ b/data/xml/2024.knowllm.xml @@ -99,7 +99,7 @@ YeLiuSalesForce.com RuiMengSalesForce Research Meghana MoorthyBhatSalesforce Research - ShafiqJotySalesForce.com and Nanyang Technological University + ShafiqJotySalesForce.com and Nanyang Technological University CaimingXiongSalesforce Research YingboZhouSalesforce Research SemihYavuzSalesForce.com @@ -198,7 +198,7 @@ Retrieval-augmented generation in multilingual settings NadezhdaChirkovaNaver Labs Europe DavidRau - HervéDéjeanNaver Labs Europe + HervéDéjeanNaver Labs Europe ThibaultFormalNaver Labs Europe StéphaneClinchantNaver Labs Europe VassilinaNikoulinaNaver Labs Europe @@ -212,7 +212,7 @@ Retrieve, Generate, Evaluate: A Case Study for Medical Paraphrases Generation with Small Language Models IoanaBuhnila AmanSinha - MathieuConstantUniversité de Lorraine, CNRS, ATILF + MathieuConstantUniversité de Lorraine, CNRS, ATILF 189-203 Recent surge in the accessibility of large language models (LLMs) to the general population can lead to untrackable use of such models for medical-related recommendations. Language generation via LLMs models has two key problems: firstly, they are prone to hallucination and therefore, for any medical purpose they require scientific and factual grounding; secondly, LLMs pose tremendous challenge to computational resources due to their gigantic model size. In this work, we introduce pRAGe, a Pipeline for Retrieval Augmented Generation and Evaluation of medical paraphrases generation using Small Language Models (SLM). We study the effectiveness of SLMs and the impact of external knowledge base for medical paraphrase generation in French. 2024.knowllm-1.16 diff --git a/data/xml/2024.konvens.xml b/data/xml/2024.konvens.xml index f7227a39bb..75d90d1b2f 100644 --- a/data/xml/2024.konvens.xml +++ b/data/xml/2024.konvens.xml @@ -99,8 +99,8 @@ Using <fixed-case>G</fixed-case>erma<fixed-case>N</fixed-case>et for the Generation of Crossword Puzzles ClausZinn - MarieHinrichs - ErhardHinrichs + MarieHinrichs + ErhardHinrichs 89–97 2024.konvens-main.10 zinn-etal-2024-using @@ -145,7 +145,7 @@ How to Translate <fixed-case>SQ</fixed-case>u<fixed-case>AD</fixed-case> to <fixed-case>G</fixed-case>erman? A Comparative Study of Answer Span Retrieval Methods for Question Answering Dataset Creation JensKaiser - AgnieszkaFalenska + AgnieszkaFalenska 134–140 2024.konvens-main.15 kaiser-falenska-2024-translate @@ -160,7 +160,7 @@ Binary indexes for optimising corpus queries - PeterLjunglöf + PeterLjunglöf NicholasSmallbone MijoThoresson VictorSalomonsson @@ -204,7 +204,7 @@ Fine-grained quotation detection and attribution in <fixed-case>G</fixed-case>erman news articles FynnPetersen-Frey - ChrisBiemann + ChrisBiemann 196–208 2024.konvens-main.22 petersen-frey-biemann-2024-fine diff --git a/data/xml/2024.langmol.xml b/data/xml/2024.langmol.xml index 134f718778..21d456655f 100644 --- a/data/xml/2024.langmol.xml +++ b/data/xml/2024.langmol.xml @@ -136,7 +136,7 @@ CarolMakInternational Business Machines FlaviuCipciganInternational Business Machines JamesBarry - MohabElkarefInternational Business Machines + MohabElkarefInternational Business Machines MovinaMoses VishnudevKuruvanthodiInternational Business Machines GeethMel diff --git a/data/xml/2024.latechclfl.xml b/data/xml/2024.latechclfl.xml index 699d91d442..769f246fbf 100644 --- a/data/xml/2024.latechclfl.xml +++ b/data/xml/2024.latechclfl.xml @@ -6,7 +6,7 @@ YuriBizzoni StefaniaDegaetano-Ortlieb AnnaKazantseva - StanSzpakowicz + StanSzpakowicz Association for Computational Linguistics
St. Julians, Malta
March @@ -34,7 +34,7 @@ Coreference in Long Documents using Hierarchical Entity Merging TalikaGuptaIIIT Guwahati Hans OleHatzelUniversität Hamburg - ChrisBiemannUniversität Hamburg + ChrisBiemannUniversität Hamburg 11-17 Current top-performing coreference resolution approaches are limited with regard to the maximum length of texts they can accept. We explore a recursive merging technique of entities that allows us to apply coreference models to texts of arbitrary length, as found in many narrative genres. In experiments on established datasets, we quantify the drop in resolution quality caused by this approach. Finally, we use an under-explored resource in the form of a fully coreference-annotated novel to illustrate our model’s performance for long documents in practice. Here, we achieve state-of-the-art performance, outperforming previous systems capable of handling long documents. 2024.latechclfl-1.2 @@ -162,7 +162,7 @@
Post-Correction of Historical Text Transcripts with Large Language Models: An Exploratory Study - EmanuelaBorosEPFL + EmanuelaBorosEPFL MaudEhrmannÈcole Polytechnique Fédérale de Lausanne MatteoRomanelloÈcole polytechnique fédérale de Lausanne / Deutsches Archäologisches Institut SvenNajem-MeyerÈcole Polytechnique Fédérale de Lausanne @@ -191,7 +191,7 @@ Perplexing Canon: A study on <fixed-case>GPT</fixed-case>-based perplexity of canonical and non-canonical literary works YaruWuUU YuriBizzoniAarhus University - PascaleMoreiraComparative Literature, School of Communication and Culture, Aarhus University + PascaleMoreiraComparative Literature, School of Communication and Culture, Aarhus University KristofferNielboCenter for Humanities Computing, Aarhus University 172-184 This study extends previous research on literary quality by using information theory-based methods to assess the level of perplexity recorded by three large language models when processing 20th-century English novels deemed to have high literary quality, recognized by experts as canonical, compared to a broader control group. We find that canonical texts appear to elicit a higher perplexity in the models, we explore which textual features might concur to create such an effect. We find that the usage of a more heavily nominal style, together with a more diverse vocabulary, is one of the leading causes of the difference between the two groups. These traits could reflect “strategies” to achieve an informationally dense literary style. @@ -225,7 +225,7 @@ Two Approaches to Diachronic Normalization of <fixed-case>P</fixed-case>olish Texts KacperDudzicAdam Mickiewicz University - FilipGralinskiApplica.ai + FilipGralinskiApplica.ai KrzysztofJassemAdam Mickiewicz University MarekKubisAdam Mickiewicz University PiotrWierzchonAdam Mickiewicz University @@ -241,8 +241,8 @@ YoucefBenkheddaUniversity of Manchester AdriansSkaparsUniversity of Manchester ViktorSchlegelASUS AICS - GoranNenadicUniversity of Manchester - RizaBatista-NavarroDepartment of Computer Science, The University of Manchester + GoranNenadicUniversity of Manchester + RizaBatista-NavarroDepartment of Computer Science, The University of Manchester 213-220 Digital archive collections that have been contributed by communities, known as community-generated digital content (CGDC), are important sources of historical and cultural knowledge. However, CGDC items are not easily searchable due to semantic information being obscured within their textual metadata. In this paper, we investigate the extent to which state-of-the-art, general-domain entity linking (EL) models (i.e., BLINK, EPGEL and mGENRE) can map named entities mentioned in CGDC textual metadata, to Wikidata entities. We evaluate and compare their performance on an annotated dataset of CGDC textual metadata and provide some error analysis, in the way of informing future studies aimed at enriching CGDC metadata using entity linking methods. 2024.latechclfl-1.20 @@ -330,7 +330,7 @@ Stage Direction Classification in <fixed-case>F</fixed-case>rench Theater: Transfer Learning Experiments AlexiaSchneiderUniversité de Strasbourg - PabloRuiz FaboLiLPa, Université de Strasbourg + PabloRuiz FaboLiLPa, Université de Strasbourg 278-286 The automatic classification of stage directions is a little explored topic in computational drama analysis, in spite of their relevance for plays’ structural and stylistic analysis. With a view to start assessing good practices for the automatic annotation of this textual element, we developed a 13-class stage direction typology, based on annotations in the FreDraCor corpus (French-language plays), but abstracting away from their huge variability while still providing classes useful for literary research. We fine-tuned transformers-based models to classify against the typology, gradually decreasing the corpus size used for fine tuning, to compare model efficiency with reduced training data. A result comparison speaks in favour of distilled monolingual models for this task, and, unlike earlier research on German, shows no negative effects of model case-sensitivity. The results have practical relevance for computational literary studies, as comparing classification results with complementary stage direction typologies, limiting the amount of manual annotation needed to apply them, would be helpful towards a systematic study of this important textual element. 2024.latechclfl-1.28 diff --git a/data/xml/2024.law.xml b/data/xml/2024.law.xml index b1c9bfce53..fa319e5212 100644 --- a/data/xml/2024.law.xml +++ b/data/xml/2024.law.xml @@ -45,7 +45,7 @@ MuyinYaoTufts University XinyiHuTufts University XiaoningZhuBeihang University - JuliaHirschbergColumbia University in the City of New York + JuliaHirschbergColumbia University in the City of New York 19-28 In Emotion Detection within Natural Language Processing and related multimodal research, the growth of datasets and models has led to a challenge: disparities in emotion classification methods. The lack of commonly agreed upon conventions on the classification of emotions creates boundaries for model comparisons and dataset adaptation. In this paper, we compare the current classification methods in recent models and datasets and propose a valid method to combine different emotion categories. Our proposal arises from experiments across models, psychological theories, and human evaluations, and we examined the effect of proposed mapping on models. 2024.law-1.3 @@ -78,7 +78,7 @@ Towards Better Inclusivity: A Diverse Tweet Corpus of <fixed-case>E</fixed-case>nglish Varieties NhiPhamNew York University Abu Dhabi LachlanPhamNew York University Abu Dhabi - AdamMeyersNew York University + AdamMeyersNew York University 61-70 The prevalence of social media presents a growing opportunity to collect and analyse examples of English varieties. Whilst usage of these varieties is often used only in spoken contexts or hard-to-access private messages, social media sites like Twitter provide a platform for users to communicate informally in a scrapeable format. Notably, Indian English (Hinglish), Singaporean English (Singlish), and African-American English (AAE) can be commonly found online. These varieties pose a challenge to existing natural language processing (NLP) tools as they often differ orthographically and syntactically from standard English for which the majority of these tools are built. NLP models trained on standard English texts produced biased outcomes for users of underrepresented varieties (Blodgett and O’Connor, 2017). Some research has aimed to overcome the inherent biases caused by unrepresentative data through techniques like data augmentation or adjusting training models. We aim to address the issue of bias at its root - the data itself. We curate a dataset of tweets from countries with high proportions of underserved English variety speakers, and propose an annotation framework of six categorical classifications along a pseudo-spectrum that measures the degree of standard English and that thereby indirectly aims to surface the manifestations of English varieties in these tweets. 2024.law-1.6 @@ -87,9 +87,9 @@ Building a corpus for the anonymization of <fixed-case>R</fixed-case>omanian jurisprudence VasilePăișResearch Institute for Artificial Intelligence, Romanian Academy - DanTufisResearch Institute for Artificial Intelligence, Romanian Academy + DanTufisResearch Institute for Artificial Intelligence, Romanian Academy ElenaIrimiaResearch Institute for Artificial Intelligence, Romanian Academy (RACAI) - VerginicaBarbu MititeluRACAI + VerginicaBarbu MititeluRACAI 71-76 Access to jurisprudence is of paramount importance for both law professionals (judges, lawyers, law students) and for the larger public. In Romania, the Superior Council of Magistracy holds a large database of jurisprudence from different courts in the country, which is updated daily. However, granting public access requires its anonymization. This paper presents the efforts behind building a corpus for the anonymization process. We present the annotation scheme, the manual annotation methods, and the platform used. 2024.law-1.7 @@ -144,7 +144,7 @@ Are You Serious? Handling Disagreement When Annotating Conspiracy Theory Texts AshleyHemmUniversity of Miami - SandraKüblerIndiana University + SandraKüblerIndiana University MichelleSeeligUniversity of Miami JohnFunchionUniversity of Miami ManoharMurthiUniversity of Miami @@ -161,7 +161,7 @@ A <fixed-case>GPT</fixed-case> among Annotators: <fixed-case>LLM</fixed-case>-based Entity-Level Sentiment Annotation EgilRønningstadUniversity of Oslo ErikVelldalUniversity of Oslo - LiljaØvrelidDept of Informatics, University of Oslo + LiljaØvrelidDept of Informatics, University of Oslo 133-139 We investigate annotator variation for the novel task of Entity-Level Sentiment Analysis (ELSA) which annotates the aggregated sentiment directed towards volitional entities in a text. More specifically, we analyze the annotations of a newly constructed Norwegian ELSA dataset and release additional data with each annotator’s labels for the 247 entities in the dataset’s test split. We also perform a number of experiments prompting ChatGPT for these sentiment labels regarding each entity in the text and compare the generated annotations with the human labels. Cohen’s Kappa for agreement between the best LLM-generated labels and curated gold was 0.425, which indicates that these labels would not have high quality. Our analyses further investigate the errors that ChatGPT outputs, and compare them with the variations that we find among the 5 trained annotators that all annotated the same test data. 2024.law-1.13 @@ -196,11 +196,11 @@ KathrynCongerUniversitiy of Colorado, Boulder AnatoleGershmanCarnegie Mellon University MahirMorshedUniversity of Illinois at Urbana-Champaign - Susan WindischBrownUniversity of Colorado at Boulder - JamesPustejovskyBrandeis University + Susan WindischBrownUniversity of Colorado at Boulder + JamesPustejovskyBrandeis University RosarioUceda-SosaIBM Research SijiaGeUniversity of Colorado-Boulder - MarthaPalmerUniversity of Colorado + MarthaPalmerUniversity of Colorado 166-175 This paper presents the first integration of PropBank role information into Wikidata, in order to provide a novel resource for information extraction, one combining Wikidata’s ontological metadata with PropBank’s rich argument structure encoding for event classes. We discuss a technique for PropBank augmentation to existing eventive Wikidata items, as well as identification of gaps in Wikidata’s coverage based on manual examination of over 11,300 PropBank rolesets. We propose five new Wikidata properties to integrate PropBank structure into Wikidata so that the annotated mappings can be added en masse. We then outline the methodology and challenges of this integration, including annotation with the combined resources. 2024.law-1.16 @@ -234,7 +234,7 @@ LeonWeberLMU Munich RobertLitschkoLMU Munich EkaterinaArtemovaToloka.AI - BarbaraPlankLMU Munich + BarbaraPlankLMU Munich 197-215 Instruction tuning has become an integral part of training pipelines for Large Language Models (LLMs) and has been shown to yield strong performance gains. In an orthogonal line of research, Annotation Error Detection (AED) has emerged as a tool for detecting quality problems in gold standard labels. So far, however, the application of AED methods has been limited to classification tasks. It is an open question how well AED methods generalize to language generation settings, which are becoming more widespread via LLMs. In this paper, we present a first and novel benchmark for AED on instruction tuning data: Donkii.It comprises three instruction-tuning datasets enriched with error annotations by experts and semi-automatic methods. We also provide a novel taxonomy of error types for instruction-tuning data.We find that all three datasets contain clear errors, which sometimes propagate directly into instruction-tuned LLMs. We propose four AED baselines for the generative setting and evaluate them extensively on the newly introduced dataset. Our results show that the choice of the right AED method and model size is indeed crucial and derive practical recommendations for how to use AED methods to clean instruction-tuning data. 2024.law-1.19 @@ -245,7 +245,7 @@ <fixed-case>EEVEE</fixed-case>: An Easy Annotation Tool for Natural Language Processing AxelSorensenIT University of Copenhagen SiyaoPengLMU Munich - BarbaraPlankLMU Munich + BarbaraPlankLMU Munich RobVan Der GootIT University of Copenhagen 216-221 Annotation tools are the starting point for creating Natural Language Processing (NLP) datasets. There is a wide variety of tools available; setting up these tools is however a hindrance. We propose Eevee, an annotation tool focused on simplicity, efficiency, and ease of use. It can run directly in the browser (no setup required) and uses tab-separated files (as opposed to character offsets or task-specific formats) for annotation. It allows for annotation of multiple tasks on a single dataset and supports four task-types: sequence labeling, span labeling, text classification and seq2seq. diff --git a/data/xml/2024.lchange.xml b/data/xml/2024.lchange.xml index f075328c7f..0b5c5cfe4b 100644 --- a/data/xml/2024.lchange.xml +++ b/data/xml/2024.lchange.xml @@ -79,10 +79,10 @@ Towards a <fixed-case>G</fixed-case>olden<fixed-case>H</fixed-case>ymns Dataset for Studying Diachronic Trends in 19th Century <fixed-case>D</fixed-case>anish Religious Hymns EaLindhardt OvergaardAarhus University - PascaleFeldkampAarhus University + PascaleFeldkampAarhus University YuriBizzoniAarhus University 55-61 - + 2024.lchange-1.6 lindhardt-overgaard-etal-2024-towards 10.18653/v1/2024.lchange-1.6 @@ -144,9 +144,9 @@ <fixed-case>E</fixed-case>tymo<fixed-case>L</fixed-case>ink: A Structured <fixed-case>E</fixed-case>nglish Etymology Dataset YuanGaoUniversity of Cambridge - WeiweiSunUniversity of Cambridge + WeiweiSunUniversity of Cambridge 126-136 - + 2024.lchange-1.12 gao-sun-2024-etymolink 10.18653/v1/2024.lchange-1.12 @@ -168,9 +168,9 @@ RobinCooperUniversity of Gothenburg ElinaLindgrenUniversity of Gothenburg GregorRetteneggerUniversity of Gothenburg - AsadSayeedUniversity of Gothenburg + AsadSayeedUniversity of Gothenburg 144-157 - + 2024.lchange-1.14 boholm-etal-2024-political 10.18653/v1/2024.lchange-1.14 @@ -190,9 +190,9 @@ Deep-change at <fixed-case>AXOLOTL</fixed-case>-24: Orchestrating <fixed-case>WSD</fixed-case> and <fixed-case>WSI</fixed-case> Models for Semantic Change Modeling DenisKokosinskiiMoscow State University and SaluteDevices MikhailKuklinMoscow State University and Yandex - NikolayArefyevUniversity of Oslo + NikolayArefyevUniversity of Oslo 168-179 - + 2024.lchange-1.16 kokosinskii-etal-2024-deep 10.18653/v1/2024.lchange-1.16 diff --git a/data/xml/2024.ldl.xml b/data/xml/2024.ldl.xml index 213a429b16..2470ff2004 100644 --- a/data/xml/2024.ldl.xml +++ b/data/xml/2024.ldl.xml @@ -6,8 +6,8 @@ ChristianChiarcos KaterinaGkirtzou MaximIonov - FahadKhan - John P.McCrae + FahadKhan + John P.McCrae Elena MontielPonsoda Patricia MartínChozas ELRA and ICCL @@ -31,7 +31,7 @@ GiedreValunaite Oleskeviciene Elena-SimonaApostol Ciprian-OctavianTruica - DanielaGifu + DanielaGifu 1–10 This article proposes a linguistic linked open data model for diachronic analysis (LLODIA) that combines data derived from diachronic analysis of multilingual corpora with dictionary-based evidence. A humanities use case was devised as a proof of concept that includes examples in five languages (French, Hebrew, Latin, Lithuanian and Romanian) related to various meanings of the term “revolution” considered at different time intervals. The examples were compiled through diachronic word embedding and dictionary alignment. 2024.ldl-1.1 @@ -84,7 +84,7 @@ RuteCosta ChamilaLiyanage John P.McCrae - Atul Kr.Ojha + Atul Kr.Ojha PriyaRani FrancescaFrontini 44–48 @@ -164,7 +164,7 @@ Linguistic <fixed-case>LOD</fixed-case> for Interoperable Morphological Description - MichaelRosner + MichaelRosner MaximIonov 94–102 Interoperability is a characteristic of a product or system that seamlessly works with another product or system and implies a certain level of independence from the context of use. Turning to language resources, interoperability is frequently cited as one important rationale underlying the use of LLOD representations and is generally regarded as highly desirable. In this paper we further elaborate this theme, distinguishing three different kinds of interoperability providing practical implementations with examples from morphology. @@ -181,7 +181,7 @@ <fixed-case>O</fixed-case>nto<fixed-case>L</fixed-case>ex Publication Made Easy: A Dataset of Verbal Aspectual Pairs for <fixed-case>B</fixed-case>osnian, <fixed-case>C</fixed-case>roatian and <fixed-case>S</fixed-case>erbian - RankaStanković + RankaStanković MaximIonov MedinaBajtarević LorenaNinčević @@ -192,7 +192,7 @@ Towards Semantic Interoperability: Parallel Corpora as Linked Data Incorporating Named Entity Linking - RankaStanković + RankaStanković MilicaIkonić Nešić OljaPerisic MihailoŠkorić diff --git a/data/xml/2024.legal.xml b/data/xml/2024.legal.xml index abc142a4b8..38cd29a07d 100644 --- a/data/xml/2024.legal.xml +++ b/data/xml/2024.legal.xml @@ -4,7 +4,7 @@ Proceedings of the Workshop on Legal and Ethical Issues in Human Language Technologies @ LREC-COLING 2024 IngoSiegert - KhalidChoukri + KhalidChoukri ELRA and ICCL
Torino, Italia
May @@ -79,7 +79,7 @@
Selling Personal Information: Data Brokers and the Limits of <fixed-case>US</fixed-case> Regulation - DeniseDiPersio + DeniseDiPersio 39–46 A principal pillar of the US Blueprint for an AI Bill of Rights is data privacy, specifically, that individuals should be protected from abusive practices by data collectors and data aggregators, and that users should have control over how their personal information is collected and used. An area that spotlights the need for such protections is found in the common practices of data brokers who scrape, purchase, process and reassemble personal information in bulk and sell it for a variety of downstream uses. Such activities almost always occur in the absence of users’ knowledge or meaningful consent, yet they are legal under US law. This paper examines how data brokers operate, provides some examples of recent US regulatory actions taken against them, summarizes federal efforts to redress data broker practices and concludes that as long as there continues to be no comprehensive federal data protection and privacy scheme, efforts to control such behavior will have only a limited effect. This paper also addresses the limits of informed consent on the use of personal information in language resources and suggests a solution in an holistic approach to data protection and privacy across the data/development life cycle. 2024.legal-1.7 diff --git a/data/xml/2024.loresmt.xml b/data/xml/2024.loresmt.xml index 951fc348aa..e9a6c8cbe7 100644 --- a/data/xml/2024.loresmt.xml +++ b/data/xml/2024.loresmt.xml @@ -3,12 +3,12 @@ Proceedings of the Seventh Workshop on Technologies for Machine Translation of Low-Resource Languages (LoResMT 2024) - Atul Kr.Ojha + Atul Kr.Ojha Chao-hongLiu EkaterinaVylomova FlammiePirinen JadeAbbott - JonathanWashington + JonathanWashington NathanielOco ValentinMalykh VarvaraLogacheva @@ -62,7 +62,7 @@ Challenges in <fixed-case>U</fixed-case>rdu Machine Translation AbdulBasitLahore University of Management Sciences Abdul HameedAzeemiLahore University of Management Sciences - Agha AliRazaLahore University of Management Sciences + Agha AliRazaLahore University of Management Sciences 44-49 Recent advancements in Neural Machine Translation (NMT) systems have significantly improved model performance on various translation benchmarks. However, these systems still face numerous challenges when translating low-resource languages such as Urdu. In this work, we highlight the specific issues faced by machine translation systems when translating Urdu language. We first conduct a comprehensive evaluation of English to Urdu Machine Translation with four diverse models: GPT-3.5 (a large language model), opus-mt-en-ur (a bilingual translation model), NLLB (a model trained for translating 200 languages), and IndicTrans2 (a specialized model for translating low-resource Indic languages). The results demonstrate that IndicTrans2 significantly outperforms other models in Urdu Machine Translation. To understand the differences in the performance of these models, we analyze the Urdu word distribution in different training datasets and compare the training methodologies. Finally, we uncover the specific translation issues and provide suggestions for improvements in Urdu machine translation systems. 2024.loresmt-1.4 @@ -122,7 +122,7 @@ Enhancing <fixed-case>T</fixed-case>urkish Word Segmentation: A Focus on Borrowed Words and Invalid Morpheme SoheilaBehrooznia EbrahimAnsariZanjan Institute for Advanced Studies in Basic Sciences - ZdenekZabokrtskyFaculty of Mathematics and Physics, Charles University Prague + ZdenekZabokrtskyFaculty of Mathematics and Physics, Charles University Prague 85-93 This study addresses a challenge in morphological segmentation: accurately segmenting words in languages with rich morphology. Current probabilistic methods, such as Morfessor, often produce results that lack consistency with human-segmented words. Our study adds some steps to the Morfessor segmentation process to consider invalid morphemes and borrowed words from other languages to improve morphological segmentation significantly. Comparing our idea to the results obtained from Morfessor demonstrates its efficiency, leading to more accurate morphology segmentation. This is particularly evident in the case of Turkish, highlighting the potential for further advancements in morpheme segmentation for morphologically rich languages. 2024.loresmt-1.9 @@ -205,7 +205,7 @@ Adopting Ensemble Learning for Cross-lingual Classification of Crisis-related Text On Social Media ShareefaAl Amer - MarkLee + MarkLee PhillipSmithUniversity of Birmingham 159-165 Cross-lingual classification poses a significant challenge in Natural Language Processing (NLP), especially when dealing with languages with scarce training data. This paper delves into the adaptation of ensemble learning to address this challenge, specifically for disaster-related social media texts. Initially, we employ Machine Translation to generate a parallel corpus in the target language to mitigate the issue of data scarcity and foster a robust training environment. Following this, we implement the bagging ensemble technique, integrating multiple classifiers into a cohesive model that demonstrates enhanced performance over individual classifiers. Our experimental results reveal significant improvements in adapting models for Arabic, utilising only English training data and markedly outperforming models intended for linguistically similar languages to English, with our ensemble model achieving an accuracy and F1 score of 0.78 when tested on original Arabic data. This research makes a substantial contribution to the field of cross-lingual classification, establishing a new benchmark for enhancing the effectiveness of language transfer in linguistically challenging scenarios. diff --git a/data/xml/2024.lrec.xml b/data/xml/2024.lrec.xml index 5a446fd00d..bbca69b970 100644 --- a/data/xml/2024.lrec.xml +++ b/data/xml/2024.lrec.xml @@ -3,9 +3,9 @@ Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024) - NicolettaCalzolari + NicolettaCalzolari Min-YenKan - VeroniqueHoste + VeroniqueHoste AlessandroLenci SakrianiSakti NianwenXue @@ -40,8 +40,8 @@ A Benchmark Evaluation of Clinical Named Entity Recognition in <fixed-case>F</fixed-case>rench NesrineBannour - ChristopheServan - AurélieNévéol + ChristopheServan + AurélieNévéol XavierTannier 14–21 Background: Transformer-based language models have shown strong performance on many Natural Language Processing (NLP) tasks. Masked Language Models (MLMs) attract sustained interest because they can be adapted to different languages and sub-domains through training or fine-tuning on specific corpora while remaining lighter than modern Large Language Models (MLMs). Recently, several MLMs have been released for the biomedical domain in French, and experiments suggest that they outperform standard French counterparts. However, no systematic evaluation comparing all models on the same corpora is available. Objective: This paper presents an evaluation of masked language models for biomedical French on the task of clinical named entity recognition. Material and methods: We evaluate biomedical models CamemBERT-bio and DrBERT and compare them to standard French models CamemBERT, FlauBERT and FrAlBERT as well as multilingual mBERT using three publically available corpora for clinical named entity recognition in French. The evaluation set-up relies on gold-standard corpora as released by the corpus developers. Results: Results suggest that CamemBERT-bio outperforms DrBERT consistently while FlauBERT offers competitive performance and FrAlBERT achieves the lowest carbon footprint. Conclusion: This is the first benchmark evaluation of biomedical masked language models for French clinical entity recognition that compares model performance consistently on nested entity recognition using metrics covering performance and environmental impact. @@ -67,7 +67,7 @@ <fixed-case>ABLE</fixed-case>: Agency-<fixed-case>B</fixed-case>e<fixed-case>L</fixed-case>iefs Embedding to Address Stereotypical Bias through Awareness Instead of Obliviousness Michelle YoungJinKim JunghwanKim - KristenJohnson + KristenJohnson 43–56 Natural Language Processing (NLP) models tend to inherit and amplify stereotypical biases present in their training data, leading to harmful societal consequences. Current efforts to rectify these biases typically revolve around making models oblivious to bias, which is at odds with the idea that humans require increased awareness to tackle these biases better. This prompts a fundamental research question: are bias-oblivious models the only viable solution to combat stereotypical biases? This paper answers this question by proposing the Agency-BeLiefs Embedding (ABLE) model, a novel approach that actively encodes stereotypical biases into the embedding space. ABLE draws upon social psychological theory to acquire and represent stereotypical biases in the form of agency and belief scores rather than directly representing stereotyped groups. Our experimental results showcase ABLE’s effectiveness in learning agency and belief stereotypes while preserving the language model’s proficiency. Furthermore, we underscore the practical significance of incorporating stereotypes within the ABLE model by demonstrating its utility in various downstream tasks. Our approach exemplifies the potential benefits of addressing bias through awareness, as opposed to the prevailing approach of mitigating bias through obliviousness. 2024.lrec-main.4 @@ -101,9 +101,9 @@ JungoKasai KeisukeSakaguchi RonanLe Bras - DragomirRadev + DragomirRadev YejinChoi - Noah A.Smith + Noah A.Smith 77–90 Text generation with beam search has proven successful in a wide range of applications. We point out that, though largely overlooked in the literature, the commonly-used implementation of beam decoding (e.g., Hugging Face Transformers and fairseq) uses a first come, first served heuristic: it keeps a set of already completed sequences over time steps and stops when the size of this set reaches the beam size. Based on this finding, we introduce a patience factor, a simple modification to this beam decoding implementation, that generalizes the stopping criterion and provides flexibility to the depth of search. Empirical results demonstrate that adjusting this patience factor improves decoding performance of strong pretrained models on news text summarization and machine translation over diverse language pairs, with a negligible inference slowdown. Our approach only modifies one line of code and can be thus readily incorporated in any implementation. Further, we find that different versions of beam decoding result in large performance differences in summarization, demonstrating the need for clarity in specifying the beam search implementation in research work. Our code will be available upon publication. 2024.lrec-main.7 @@ -111,7 +111,7 @@ A Canonical Form for Flexible Multiword Expressions - JanOdijk + JanOdijk MartinKroon 91–101 This paper proposes a canonical form for Multiword Expressions (MWEs), in particular for the Dutch language. The canonical form can be enriched with all kinds of annotations that can be used to describe the properties of the MWE and its components. It also introduces the DUCAME (DUtch CAnonical Multiword Expressions) lexical resource with more than 11k MWEs in canonical form. DUCAME is used in MWE-Finder to automatically generate queries for searching for flexible MWEs in large text corpora. @@ -160,7 +160,7 @@ A Closer Look at Clustering Bilingual Comparable Corpora AnnaLaskina - EricGaussier + EricGaussier GaelleCalvary 133–142 We study in this paper the problem of clustering comparable corpora, building upon the observation that different types of clusters can be present in such corpora: monolingual clusters comprising documents in a single language, and bilingual or multilingual clusters comprising documents written in different languages. Based on a state-of-the-art deep variant of Kmeans, we propose new clustering models fully adapted to comparable corpora and illustrate their behavior on several bilingual collections (in English, French, German and Russian) created from Wikipedia. @@ -178,7 +178,7 @@ A Collection of Pragmatic-Similarity Judgments over Spoken Dialog Utterances - NigelWard + NigelWard DivetteMarco 154–163 Automatic measures of similarity between sentences or utterances are invaluable for training speech synthesizers, evaluating machine translation, and assessing learner productions. While there exist measures for semantic similarity and prosodic similarity, there are as yet none for pragmatic similarity. To enable the training of such measures, we developed the first collection of human judgments of pragmatic similarity between utterance pairs. 9 judges listened to 220 utterance pairs, each consisting of an utterance extracted from a recorded dialog and a re-enactment of that utterance under various conditions designed to create various degrees of similarity. Each pair was rated on a continuous scale. The average inter-judge correlation was 0.45. We make this data available at https://github.com/divettemarco/PragSim . @@ -247,8 +247,8 @@ A Computational Model of <fixed-case>L</fixed-case>atvian Morphology - PeterisPaikens - LaumaPretkalniņa + PeterisPaikens + LaumaPretkalniņa LauraRituma 221–232 In this paper we describe a computational model of Latvian morphology that provides a formal structure for Latvian word form inflection and has been implemented in software for generation, analysis and lemmatization of Latvian word forms. The work was motivated by the need for a NLP inflection model that can cover all the complexity of Latvian language and explicitly enumerate and handle the many exceptions to the general Latvian inflection principles. This is an evolution of earlier work, extending the initial proof of concept model to properly cover Latvian language. We provide a set of morphological paradigms that differ from current linguistic tradition, a set of systematic stem changes and combine it with an extensive lexicon that includes paradigm information and structured morphological attributes for 118 000 lexemes. This model has been applied on both dictionary and corpora data, demonstrating that it provides a good coverage for modern Latvian literary language. We also consider that there is a good potential to extend this also to the related Latgalian language. @@ -258,7 +258,7 @@ A Concept Based Approach for Translation of Medical Dialogues into Pictographs JohannaGerlach - PierretteBouillon + PierretteBouillon JonathanMutal HervéSpechbach 233–242 @@ -268,7 +268,7 @@ A Construction Grammar Corpus of Varying Schematicity: A Dataset for the Evaluation of Abstractions in Language Models - ClaireBonial + ClaireBonial HarishTayyar Madabushi 243–255 Large Language Models (LLMs) have been developed without a theoretical framework, yet we posit that evaluating and improving LLMs will benefit from the development of theoretical frameworks that enable comparison of the structures of human language and the model of language built up by LLMs through the processing of text. In service of this goal, we develop the Construction Grammar Schematicity (“CoGS”) corpus of 10 distinct English constructions, where the constructions vary with respect to schematicity, or in other words the level to which constructional slots require specific, fixed lexical items, or can be filled with a variety of elements that fulfill a particular semantic role of the slot. Our corpus constructions are carefully curated to range from substantive, frozen constructions (e.g., Let-alone) to entirely schematic constructions (e.g., Resultative). The corpus was collected to allow us to probe LLMs for constructional information at varying levels of abstraction. We present our own probing experiments using this corpus, which clearly demonstrate that even the largest LLMs are limited to more substantive constructions and do not exhibit recognition of the similarity of purely schematic constructions. We publicly release our dataset, prompts, and associated model responses. @@ -279,7 +279,7 @@ A Controlled Reevaluation of Coreference Resolution Models IanPorada XiyuanZou - Jackie Chi KitCheung + Jackie Chi KitCheung 256–263 All state-of-the-art coreference resolution (CR) models involve finetuning a pretrained language model. Whether the superior performance of one CR model over another is due to the choice of language model or other factors, such as the task-specific architecture, is difficult or impossible to determine due to lack of a standardized experimental setup. To resolve this ambiguity, we systematically evaluate five CR models and control for certain design decisions including the pretrained language model used by each. When controlling for language model size, encoder-based CR models outperform more recent decoder-based models in terms of both accuracy and inference speed. Surprisingly, among encoder-based CR models, more recent models are not always more accurate, and the oldest CR model that we test generalizes the best to out-of-domain textual genres. We conclude that controlling for the choice of language model reduces most, but not all, of the increase in F1 score reported in the past five years. 2024.lrec-main.23 @@ -291,7 +291,7 @@ PeiyanWang LibangWang DanqingxinYang - DongfengCai + DongfengCai 264–272 Manufacturing specifications are documents entailing different techniques, processes, and components involved in manufacturing. There is a growing demand for named entity recognition (NER) resources and techniques for manufacturing-specific named entities, with the development of smart manufacturing. In this paper, we introduce a corpus of Chinese manufacturing specifications, named MS-NERC, including 4,424 sentences and 16,383 entities. We also propose an entity recognizer named Trainable State Transducer (TST), which is initialized with a finite state transducer describing the morphological patterns of entities. It can directly recognize entities based on prior morphological knowledge without training. Experimental results show that TST achieves an overall 82.05% F1 score for morphological-specific entities in zero-shot. TST can be improved through training, the result of which outperforms neural methods in few-shot and rich-resource. We believe that our corpus and model will be valuable resources for NER research not only in manufacturing but also in other low-resource domains. 2024.lrec-main.24 @@ -363,7 +363,7 @@ Active Learning Design Choices for <fixed-case>NER</fixed-case> with Transformers RobertVacareanu EnriqueNoriega-Atala - GusHahn-Powell + GusHahn-Powell Marco A.Valenzuela-Escarcega MihaiSurdeanu 321–334 @@ -380,9 +380,9 @@ SeverinoDa Dalt JoanLlop MalteOstendorff - PedroOrtiz Suarez + PedroOrtiz Suarez GeorgRehm - AitorGonzalez-Agirre + AitorGonzalez-Agirre MartaVillegas 335–349 We present and describe two language resources in this paper: CATalog 1.0, the largest text corpus in Catalan to date, and CURATE (Corpus Utility for RAting TExt), a modular, parallelizable pipeline used for processing and scoring documents based on text quality that we have optimised to run in High Performance Cluster (HPC) environments. In the coming sections we describe our data preprocessing pipeline at length; traditional pipelines usually implement a set of binary filters such that a given document is either in or out. In our experience with Catalan, in lower-resource settings it is more practical to instead assign a document a soft score to allow for more flexible decision-making. We describe how the document score is calculated and highlight its interpretability by showing that it is significantly correlated with human judgements as obtained from a comparative judgement experiment. We additionally describe the different subcorpora that make up CATalog 1.0. @@ -443,15 +443,15 @@ ShuntaroYada CyrilGrouin ThomasLavergne - AurélieNévéol - PatrickParoubek + AurélieNévéol + PatrickParoubek PhilippeThomas TomohiroNishiyama SebastianMöller EijiAramaki - YujiMatsumoto + YujiMatsumoto RolandRoller - PierreZweigenbaum + PierreZweigenbaum 395–414 User-generated data sources have gained significance in uncovering Adverse Drug Reactions (ADRs), with an increasing number of discussions occurring in the digital world. However, the existing clinical corpora predominantly revolve around scientific articles in English. This work presents a multilingual corpus of texts concerning ADRs gathered from diverse sources, including patient fora, social media, and clinical reports in German, French, and Japanese. Our corpus contains annotations covering 12 entity types, four attribute types, and 13 relation types. It contributes to the development of real-world multilingual language models for healthcare. We provide statistics to highlight certain challenges associated with the corpus and conduct preliminary experiments resulting in strong baselines for extracting entities and relations between these entities, both within and across languages. 2024.lrec-main.36 @@ -495,7 +495,7 @@ A Differentiable Integer Linear Programming Solver for Explanation-Based Natural Language Inference MokanaranganThayaparan MarcoValentino - AndréFreitas + AndréFreitas 449–458 Integer Linear Programming (ILP) has been proposed as a formalism for encoding precise structural and semantic constraints for Natural Language Inference (NLI). However, traditional ILP frameworks are non-differentiable, posing critical challenges for the integration of continuous language representations based on deep learning. In this paper, we introduce a novel approach, named Diff-Comb Explainer, a neuro-symbolic architecture for explanation-based NLI based on Differentiable BlackBox Combinatorial Solvers (DBCS). Differently from existing neuro-symbolic solvers, Diff-Comb Explainer does not necessitate a continuous relaxation of the semantic constraints, enabling a direct, more precise, and efficient incorporation of neural representations into the ILP formulation. Our experiments demonstrate that Diff-Comb Explainer achieves superior performance when compared to conventional ILP solvers, neuro-symbolic black-box solvers, and Transformer-based encoders. Moreover, a deeper analysis reveals that Diff-Comb Explainer can significantly improve the precision, consistency, and faithfulness of the constructed explanations, opening new opportunities for research on neuro-symbolic architectures for explainable and transparent NLI in complex domains. 2024.lrec-main.40 @@ -537,7 +537,7 @@ WeihaoLiu XiaominChu PeifengLi - QiaomingZhu + QiaomingZhu HaizhouLi 495–506 Topic segmentation and outline generation strive to divide a document into coherent topic sections and generate corresponding subheadings, unveiling the discourse topic structure of a document. Compared with sentence-level topic structure, the paragraph-level topic structure can quickly grasp and understand the overall context of the document from a higher level, benefitting many downstream tasks such as summarization, discourse parsing, and information retrieval. However, the lack of large-scale, high-quality Chinese paragraph-level topic structure corpora restrained relative research and applications. To fill this gap, we build the Chinese paragraph-level topic representation, corpus, and benchmark in this paper. Firstly, we propose a hierarchical paragraph-level topic structure representation with three layers to guide the corpus construction. Then, we employ a two-stage man-machine collaborative annotation method to construct the largest Chinese Paragraph-level Topic Structure corpus (CPTS), achieving high quality. We also build several strong baselines, including ChatGPT, to validate the computability of CPTS on two fundamental tasks (topic segmentation and outline generation) and preliminarily verified its usefulness for the downstream task (discourse parsing). @@ -609,7 +609,7 @@ AnoopKumar AramGalstyan HengJi - PremNatarajan + PremNatarajan 572–583 This paper introduces a novel problem of automated question generation for courtroom examinations, CourtQG. While question generation has been studied in domains such as educational testing and product description, CourtQG poses several unique challenges owing to its non-cooperative and agenda-driven nature. Specifically, not only the generated questions need to be relevant to the case and underlying context, they also have to achieve certain objectives such as challenging the opponent’s arguments and/or revealing potential inconsistencies in their answers. We propose to leverage large language models (LLM) for CourtQG by fine-tuning them on two auxiliary tasks, agenda explanation (i.e., uncovering the underlying intents) and question type prediction. We additionally propose cold-start generation of questions from background documents without relying on examination history. We construct a dataset to evaluate our proposed method and show that it generates better questions according to standard metrics when compared to several baselines. 2024.lrec-main.49 @@ -657,7 +657,7 @@ DingWang XiaofengMou XipengQiu - XuanjingHuang + XuanjingHuang 609–625 Recent advancements in Chain-of-Thought prompting have facilitated significant breakthroughs for Large Language Models (LLMs) in complex reasoning tasks. Current research enhances the reasoning performance of LLMs by sampling multiple reasoning chains and ensembling based on the answer frequency. However, this approach fails in scenarios where the correct answers are in the minority. We identify this as a primary factor constraining the reasoning capabilities of LLMs, a limitation that cannot be resolved solely based on the predicted answers. To address this shortcoming, we introduce a hierarchical reasoning aggregation framework AoR (Aggregation of Reasoning), which selects answers based on the evaluation of reasoning chains. Additionally, AoR incorporates dynamic sampling, adjusting the number of reasoning chains in accordance with the complexity of the task. Experimental results on a series of complex reasoning tasks show that AoR outperforms prominent ensemble methods. Further analysis reveals that AoR not only adapts various LLMs but also achieves a superior performance ceiling when compared to current methods. 2024.lrec-main.53 @@ -791,7 +791,7 @@ A Linguistically-Informed Annotation Strategy for <fixed-case>K</fixed-case>orean Semantic Role Labeling YigeChen - KyungTaeLim + KyungTaeLim JungyeulPark 733–738 Semantic role labeling is an essential component of semantic and syntactic processing of natural languages, which reveals the predicate-argument structure of the language. Despite its importance, semantic role labeling for the Korean language has not been studied extensively. One notable issue is the lack of uniformity among data annotation strategies across different datasets, which often lack thorough rationales. In this study, we suggest an annotation strategy for Korean semantic role labeling that is in line with the previously proposed linguistic theories as well as the distinct properties of the Korean language. We further propose a simple yet viable conversion strategy from the Sejong verb dictionary to a CoNLL-style dataset for Korean semantic role labeling. Experiment results using a transformer-based sequence labeling model demonstrate the reliability and trainability of the converted dataset. @@ -859,7 +859,7 @@ A Matter of Perspective: Building a Multi-Perspective Annotated Dataset for the Study of Literary Quality YuriBizzoni - PascaleFeldkamp + PascaleFeldkamp Ida Marie S.Lassen Mads RosendahlThomsen Kristoffer L.Nielbo @@ -872,7 +872,7 @@ <fixed-case>AM</fixed-case>en<fixed-case>D</fixed-case>e<fixed-case>D</fixed-case>: Modelling Concepts by Aligning Mentions, Definitions and Decontextualised Embeddings AmitGajbhiye ZiedBouraoui - LuisEspinosa Anke + LuisEspinosa Anke StevenSchockaert 801–811 Contextualised Language Models (LM) improve on traditional word embeddings by encoding the meaning of words in context. However, such models have also made it possible to learn high-quality decontextualised concept embeddings. Three main strategies for learning such embeddings have thus far been considered: (i) fine-tuning the LM to directly predict concept embeddings from the name of the concept itself, (ii) averaging contextualised representations of mentions of the concept in a corpus, and (iii) encoding definitions of the concept. As these strategies have complementary strengths and weaknesses, we propose to learn a unified embedding space in which all three types of representations can be integrated. We show that this allows us to outperform existing approaches in tasks such as ontology completion, which heavily depends on access to high-quality concept embeddings. We furthermore find that mentions and definitions are well-aligned in the resulting space, enabling tasks such as target sense verification, even without the need for any fine-tuning. @@ -1038,7 +1038,7 @@ KatsumiIbaraki WinstonWu LuWang - RadaMihalcea + RadaMihalcea 959–973 Recent advances in large language models (LLMs) have enabled users to generate fluent and seemingly convincing text. However, these models have uneven performance in different languages, which is also associated with undesirable societal biases toward marginalized populations. Specifically, there is relatively little work on Japanese models, despite it being the thirteenth most widely spoken language. In this work, we first develop three Japanese language prompts to probe LLMs’ understanding of Japanese names and their association between gender and occupations. We then evaluate a variety of English, multilingual, and Japanese models, correlating the models’ outputs with occupation statistics from the Japanese Census Bureau from the last 100 years. Our findings indicate that models can associate Japanese names with the correct gendered occupations when using constrained decoding. However, with sampling or greedy decoding, Japanese language models have a preference for a small set of stereotypically gendered occupations, and multilingual models, though trained on Japanese, are not always able to understand Japanese prompts. 2024.lrec-main.86 @@ -1050,7 +1050,7 @@ KirillMilintsevich LucieMetivier MaudRotharmel - GaëlDias + GaëlDias SoniaDollfus 974–983 The ever-growing number of people suffering from mental distress has motivated significant research initiatives towards automated depression estimation. Despite the multidisciplinary nature of the task, very few of these approaches include medical professionals in their research process, thus ignoring a vital source of domain knowledge. In this paper, we propose to bring the domain experts back into the loop and incorporate their knowledge within the gold-standard DAIC-WOZ dataset. In particular, we define a novel transformer-based architecture and analyse its performance in light of our expert annotations. Overall findings demonstrate a strong correlation between the psychological tendencies of medical professionals and the behavior of the proposed model, which additionally provides new state-of-the-art results. @@ -1081,8 +1081,8 @@ Analyzing the Understanding of Morphologically Complex Words in Large Language Models - MarionWeller-Di Marco - AlexanderFraser + MarionWeller-Di Marco + AlexanderFraser 1009–1020 We empirically study the ability of a Large Language Model (gpt-3.5-turbo-instruct) to understand morphologically complex words. In our experiments, we looked at a variety of tasks to analyse German compounds with regard to compositional word formation and derivation, such as identifying the head noun of existing and novel compounds, identifying the shared verb stem between two words, or recognizing words constructed with inappropriately used derivation morphemes as invalid. Our results show that the language model is generally capable of solving most tasks, except for the task of identifying ill-formed word forms. While the model demonstrated a good overall understanding of complex words and their word-internal structure, the results also suggest that there is no formal knowledge of derivational rules, but rather an interpretation of the observed word parts to derive the meaning of a word. 2024.lrec-main.90 @@ -1136,7 +1136,7 @@ An Effective Span-based Multimodal Named Entity Recognition with Consistent Cross-Modal Alignment YongxiuXu HaoXu - HeyanHuang + HeyanHuang ShiyaoCui MinghaoTang LongzhengWang @@ -1160,7 +1160,7 @@ An Empirical Study on the Robustness of Massively Multilingual Neural Machine Translation SupryadiSupryadi LeiyuPan - DeyiXiong + DeyiXiong 1086–1097 Massively multilingual neural machine translation (MMNMT) has been proven to enhance the translation quality of low-resource languages. In this paper, we empirically investigate the translation robustness of Indonesian-Chinese translation in the face of various naturally occurring noise. To assess this, we create a robustness evaluation benchmark dataset for Indonesian-Chinese translation. This dataset is automatically translated into Chinese using four NLLB-200 models of different sizes. We conduct both automatic and human evaluations. Our in-depth analysis reveal the correlations between translation error types and the types of noise present, how these correlations change across different model sizes, and the relationships between automatic evaluation indicators and human evaluation indicators. The dataset is publicly available at https://github.com/tjunlp-lab/ID-ZH-MTRobustEval. 2024.lrec-main.97 @@ -1169,7 +1169,7 @@ An Evaluation of <fixed-case>C</fixed-case>roatian <fixed-case>ASR</fixed-case> Models for Čakavian Transcription ShulinZhang - JohnHale + JohnHale MargaretRenwick ZvjezdanaVrzić KeithLangston @@ -1192,19 +1192,19 @@ A New Massive Multilingual Dataset for High-Performance Language Technologies - Onade Gibert + Onade Gibert GraemeNail - NikolayArefyev + NikolayArefyev MartaBañón Jelmervan der Linde ShaoxiongJi JaumeZaragoza-Bernabeu MikkoAulamo - GemaRamírez-Sánchez + GemaRamírez-Sánchez AndreyKutuzov SampoPyysalo StephanOepen - JörgTiedemann + JörgTiedemann 1116–1128 We present the HPLT (High Performance Language Technologies) language resources, a new massive multilingual dataset including both monolingual and bilingual corpora extracted from CommonCrawl and previously unused web crawls from the Internet Archive. We describe our methods for data acquisition, management and processing of large corpora, which rely on open-source software tools and high-performance computing. Our monolingual collection focuses on low- to medium-resourced languages and covers 75 languages and a total of ≈ 5.6 trillion word tokens de-duplicated on the document level. Our English-centric parallel corpus is derived from its monolingual counterpart and covers 18 language pairs and more than 96 million aligned sentence pairs with roughly 1.4 billion English tokens. The HPLT language resources are one of the largest open text corpora ever released, providing a great resource for language modeling and machine translation training. We publicly release the corpora, the software, and the tools used in this work. 2024.lrec-main.100 @@ -1272,8 +1272,8 @@ Annotating <fixed-case>C</fixed-case>hinese Word Senses with <fixed-case>E</fixed-case>nglish <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et: A Practice on <fixed-case>O</fixed-case>nto<fixed-case>N</fixed-case>otes <fixed-case>C</fixed-case>hinese Sense Inventories HongzhiXu JingxiaLin - SameerPradhan - MitchellMarcus + SameerPradhan + MitchellMarcus MingLiu 1187–1196 In this paper, we present our exploration of annotating Chinese word senses using English WordNet synsets, with examples extracted from OntoNotes Chinese sense inventories. Given a target word along with the example that contains it, the annotators select a WordNet synset that best describes the meaning of the target word in the context. The result demonstrates an inter-annotator agreement of 38% between two annotators. We delve into the instances of disagreement by comparing the two annotated synsets, including their positions within the WordNet hierarchy. The examination reveals intriguing patterns among closely related synsets, shedding light on similar concepts represented within the WordNet structure. The data offers as an indirect linking of Chinese word senses defined in OntoNotes Chinese sense inventories to WordNet sysnets, and thus promotes the value of the OntoNotes corpus. Compared to a direct linking of Chinese word senses to WordNet synsets, the example-based annotation has the merit of not being affected by inaccurate sense definitions and thus offers a new way of mapping WordNets of different languages. At the same time, the annotated data also serves as a valuable linguistic resource for exploring potential lexical differences between English and Chinese, with potential contributions to the broader understanding of cross-linguistic semantic mapping @@ -1295,7 +1295,7 @@ Pietro GiovanniBizzaro ElenaDella Valentina MaurizioNapolitano - NadiaMana + NadiaMana MassimoZancanaro 1209–1214 In this paper, we propose a new annotation scheme to classify different types of clauses in Terms-and-Conditions contracts with the ultimate goal of supporting legal experts to quickly identify and assess problematic issues in this type of legal documents. To this end, we built a small corpus of Terms-and-Conditions contracts and finalized an annotation scheme of 14 categories, eventually reaching an inter-annotator agreement of 0.92. Then, for 11 of them, we experimented with binary classification tasks using few-shot prompting with a multilingual T5 and two fine-tuned versions of two BERT-based LLMs for Italian. Our experiments showed the feasibility of automatic classification of our categories by reaching accuracies ranging from .79 to .95 on validation tasks. @@ -1343,7 +1343,7 @@ OanaIgnat LongjuBai Joan C.Nwatu - RadaMihalcea + RadaMihalcea 1239–1259 Current foundation models have shown impressive performance across various tasks. However, several studies have revealed that these models are not effective for everyone due to the imbalanced geographical and economic representation of the data used in the training process. Most of this data comes from Western countries, leading to poor results for underrepresented countries. To address this issue, more data needs to be collected from these countries, but the cost of annotation can be a significant bottleneck. In this paper, we propose methods to identify the data to be annotated to balance model performance and annotation costs. Our approach first involves finding the countries with images of topics (objects and actions) most visually distinct from those already in the training datasets used by current large vision-language foundation models. Next, we identify countries with higher visual similarity for these topics and show that using data from these countries to supplement the training data improves model performance and reduces annotation costs. The resulting lists of countries and corresponding topics are made available at https://github.com/MichiganNLP/visual_diversity_budget. 2024.lrec-main.112 @@ -1353,7 +1353,7 @@ <fixed-case>A</fixed-case>nno<fixed-case>T</fixed-case>heia: A Semi-Automatic Annotation Toolkit for Audio-Visual Speech Technologies José-M.Acosta-Triana DavidGimeno-Gómez - Carlos-D.Martínez-Hinarejos + Carlos-D.Martínez-Hinarejos 1260–1269 More than 7,000 known languages are spoken around the world. However, due to the lack of annotated resources, only a small fraction of them are currently covered by speech technologies. Albeit self-supervised speech representations, recent massive speech corpora collections, as well as the organization of challenges, have alleviated this inequality, most studies are mainly benchmarked on English. This situation is aggravated when tasks involving both acoustic and visual speech modalities are addressed. In order to promote research on low-resource languages for audio-visual speech technologies, we present AnnoTheia, a semi-automatic annotation toolkit that detects when a person speaks on the scene and the corresponding transcription. In addition, to show the complete process of preparing AnnoTheia for a language of interest, we also describe the adaptation of a pre-trained model for active speaker detection to Spanish, using a database not initially conceived for this type of task. Prior evaluations show that the toolkit is able to speed up to four times the annotation process. The AnnoTheia toolkit, tutorials, and pre-trained models are available at https://github.com/joactr/AnnoTheia/. 2024.lrec-main.113 @@ -1377,9 +1377,9 @@ Giridhar KaushikRamachandran SpencerLewis AashkaDamani - ÖzlemUzuner + ÖzlemUzuner MartinGunn - MelihaYetisgen + MelihaYetisgen 1280–1292 Medical imaging is critical to the diagnosis, surveillance, and treatment of many health conditions, including oncological, neurological, cardiovascular, and musculoskeletal disorders, among others. Radiologists interpret these complex, unstructured images and articulate their assessments through narrative reports that remain largely unstructured. This unstructured narrative must be converted into a structured semantic representation to facilitate secondary applications such as retrospective analyses or clinical decision support. Here, we introduce the Corpus of Annotated Medical Imaging Reports (CAMIR), which includes 609 annotated radiology reports from three imaging modality types: Computed Tomography, Magnetic Resonance Imaging, and Positron Emission Tomography-Computed Tomography. Reports were annotated using an event-based schema that captures clinical indications, lesions, and medical problems. Each event consists of a trigger and multiple arguments, and a majority of the argument types, including anatomy, normalize the spans to pre-defined concepts to facilitate secondary use. CAMIR uniquely combines a granular event structure and concept normalization. To extract CAMIR events, we explored two BERT (Bi-directional Encoder Representation from Transformers)-based architectures, including an existing architecture (mSpERT) that jointly extracts all event information and a multi-step approach (PL-Marker++) that we augmented for the CAMIR schema. 2024.lrec-main.115 @@ -1451,7 +1451,7 @@ A Persona-Based Corpus in the Diabetes Self-Care Domain - Applying a Human-Centered Approach to a Low-Resource Context RossanaCunha - ThiagoCastro Ferreira + ThiagoCastro Ferreira AdrianaPagano FabioAlves 1353–1369 @@ -1476,7 +1476,7 @@ Applying Transfer Learning to <fixed-case>G</fixed-case>erman Metaphor Prediction - MariaBerger + MariaBerger NiekeKiwitt SebastianReimann 1383–1392 @@ -1581,7 +1581,7 @@ MingtongLiu ChunyouLi YufengChen - JinanXu + JinanXu MingZhou 1486–1497 Due to the lack of parallel data, the mainstream fine-tuning-based domain adaptation methods have the overfitting problem in the translation of low-resource domains, and it is difficult for the model to learn the in-domain generalization knowledge. To address the above issue, in this work, we propose a novel Reinforcement Learning Domain Adaptation method for Neural Machine Translation (RLDA-NMT) in the low-resource domain. RLDA-NMT utilizes in-domain source monolingual data to make up for the lack of parallel data, and reinforces domain features learning to make the translation model learn the domain-specific knowledge more fully. Specifically, we first train a ranking-based model with a small-scale in-domain parallel corpus, and then adopt it as the reward model to select higher-quality generated translations for reinforcement when fine-tuning pre-trained NMT model using in-domain source monolingual data. We conduct experiments on Education, Laws, Thesis, and Patent domains of Chinese⇔English translation tasks. Experimental results demonstrate that RLDA-NMT can alleviate overfitting and reinforce the NMT model to learn domain-specific knowledge. Additionally, the results also show that RLDA-NMT and back-translation (BT) are nicely complementary to each other, where combining RLDA-NMT with BT can further improve translation quality. @@ -1614,7 +1614,7 @@ ElenaCabrio AnneLauscher JoonsukPark - Eva MariaVecchi + Eva MariaVecchi SerenaVillata TimonZiegenbein 1519–1538 @@ -1635,9 +1635,9 @@ <fixed-case>ART</fixed-case>: The Alternating Reading Task Corpus for Speech Entrainment and Imitation - ZhengYuan + ZhengYuan Dorinade Jong - ŠtefanBeňuš + ŠtefanBeňuš NoëlNguyen RuitaoFeng RóbertSabo @@ -1654,7 +1654,7 @@ ChangxinKe ShuhanZhou ChuruiSun - Wei-NanZhang + Wei-NanZhang TingLiu 1563–1576 Simile tasks are challenging in natural language processing (NLP) because models require adequate world knowledge to produce predictions. In recent years, pre-trained language models (PLMs) have succeeded in NLP since they learn generic knowledge from a large corpus. The knowledge embedded in PLMs can be used for different kinds of Simile tasks. However, previous work usually explored one type of simile knowledge for a specific simile task, how to fully utilize different types of knowledge embedded in the PLMs requires further exploration. This paper proposes a self-verified method for exploring simile knowledge from PLMs, which allows the PLMs to leverage one type of simile knowledge to self-validate another. To this end, we first enhance PLMs with a novel multi-level simile recognition (MLSR) task that trains PLMs to evaluate the quality of similes. Then the PLMs leverage this evaluation score to assist the simile interpretation and generation tasks. In this way, we connect different types of simile knowledge in PLMs and make better use of them. Experiments on different pre-trained models and multiple publicly available datasets show that our method works for different kinds of PLMs and can explore more accurate simile knowledge for PLMs. Our code/data will be released on GitHub. @@ -1677,7 +1677,7 @@ <fixed-case>ASEM</fixed-case>: Enhancing Empathy in Chatbot through Attention-based Sentiment and Emotion Modeling OmamaHamad - KhaledShaban + KhaledShaban AliHamdi 1588–1601 Effective feature representations play a critical role in enhancing the performance of text generation models that rely on deep neural networks. However, current approaches suffer from several drawbacks, such as the inability to capture the deep semantics of language and sensitivity to minor input variations, resulting in significant changes in the generated text. In this paper, we present a novel solution to these challenges by employing a mixture of experts, multiple encoders, to offer distinct perspectives on the emotional state of the user’s utterance while simultaneously enhancing performance. We propose an end-to-end model architecture called ASEM that performs emotion analysis on top of sentiment analysis for open-domain chatbots, enabling the generation of empathetic responses that are fluent and relevant. In contrast to traditional attention mechanisms, the proposed model employs a specialized attention strategy that uniquely zeroes in on sentiment and emotion nuances within the user’s utterance. This ensures the generation of context-rich representations tailored to the underlying emotional tone and sentiment intricacies of the text. Our approach outperforms existing methods for generating empathetic embeddings, providing empathetic and diverse responses. The performance of our proposed model significantly exceeds that of existing models, enhancing emotion detection accuracy by 6.2% and lexical diversity by 1.4%. ASEM code is released at https://github.com/MIRAH-Official/Empathetic-Chatbot-ASEM.git @@ -1729,7 +1729,7 @@ Assessing the Capabilities of Large Language Models in Coreference: An Evaluation YujianGan - MassimoPoesio + MassimoPoesio JuntaoYu 1645–1665 This paper offers a nuanced examination of the role Large Language Models (LLMs) play in coreference resolution, aimed at guiding the future direction in the era of LLMs. We carried out both manual and automatic analyses of different LLMs’ abilities, employing different prompts to examine the performance of different LLMs, obtaining a comprehensive view of their strengths and weaknesses. We found that LLMs show exceptional ability in understanding coreference. However, harnessing this ability to achieve state of the art results on traditional datasets and benchmarks isn’t straightforward. Given these findings, we propose that future efforts should: (1) Improve the scope, data, and evaluation methods of traditional coreference research to adapt to the development of LLMs. (2) Enhance the fine-grained language understanding capabilities of LLMs. @@ -1860,7 +1860,7 @@ A Typology of Errors for User Utterances in Chatbots AnuSingh - EsmeManandise + EsmeManandise 1789–1794 This paper discusses the challenges non-prescriptive language uses in chatbot communication create for Semantic Parsing (SP). To help SP developers improve their systems, we propose a flexible error typology based on an analysis of a sample of non-prescriptive language uses mined from a domain-specific chatbot logs. This typology is not tied to any specific language model. We also present a framework for automatically mapping these errors to the typology. Finally, we show how our framework can help evaluate SP systems from a linguistic robustness perspective. Our framework can be expanded to include new classes of errors across different domains and user demographics. 2024.lrec-main.158 @@ -1899,7 +1899,7 @@ Automatically Estimating Textual and Phonemic Complexity for Cued Speech: How to See the Sounds from <fixed-case>F</fixed-case>rench Texts - NúriaGala + NúriaGala BrigitteBigi MarieBauer 1817–1824 @@ -1942,7 +1942,7 @@ Automatic Coding of Contingency in Child-Caregiver Conversations AbhishekAgrawal MitjaNikolaus - BenoitFavre + BenoitFavre AbdellahFourtassi 1856–1870 One of the most important communicative skills children have to learn is to engage in meaningful conversations with people around them. At the heart of this learning lies the mastery of contingency, i.e., the ability to contribute to an ongoing exchange in a relevant fashion (e.g., by staying on topic). Current research on this question relies on the manual annotation of a small sample of children, which limits our ability to draw general conclusions about development. Here, we propose to mitigate the limitations of manual labor by relying on automatic tools for contingency judgment in children’s early natural interactions with caregivers. Drawing inspiration from the field of dialogue systems evaluation, we built and compared several automatic classifiers. We found that a Transformer-based pre-trained language model – when fine-tuned on a relatively small set of data we annotated manually (around 3,500 turns) – provided the best predictions. We used this model to automatically annotate, new and large-scale data, almost two orders of magnitude larger than our fine-tuning set. It was able to replicate existing results and generate new data-driven hypotheses. The broad impact of the work is to provide resources that can help the language development community study communicative development at scale, leading to more robust theories. @@ -1953,7 +1953,7 @@ Automatic Construction of a <fixed-case>C</fixed-case>hinese Review Dataset for Aspect Sentiment Triplet Extraction via Iterative Weak Supervision Chia-WenLu Ching-WenYang - Wei-YunMa + Wei-YunMa 1871–1882 Aspect Sentiment Triplet Extraction (ASTE), introduced in 2020, is a task that involves the extraction of three key elements: target aspects, descriptive opinion spans, and their corresponding sentiment polarity. This process, however, faces a significant hurdle, particularly when applied to Chinese languages, due to the lack of sufficient datasets for model training, largely attributable to the arduous manual labeling process. To address this issue, we present an innovative framework that facilitates the automatic construction of ASTE via Iterative Weak Supervision, negating the need for manual labeling, aided by a discriminator to weed out subpar samples. The objective is to successively improve the quality of this raw data and generate supplementary data. The effectiveness of our approach is underscored by our results, which include the creation of a substantial Chinese review dataset. This dataset encompasses over 60,000 Google restaurant reviews in Chinese and features more than 200,000 extracted triplets. Moreover, we have also established a robust baseline model by leveraging a novel method of weak supervision. Both our dataset and model are openly accessible to the public. 2024.lrec-main.167 @@ -1990,7 +1990,7 @@ ReiMiyata AtsushiFujita TomoyukiKajiwara - SatoshiSato + SatoshiSato 1899–1914 This paper presents our work on a task of automatic decomposition of text editing examples into primitive edit operations. Toward a detailed analysis of the behavior of text editing systems, identification of fine-grained edit operations performed by the systems is essential. Given a pair of source and edited sentences, the goal of our task is to generate a non-redundant sequence of primitive edit operations, i.e., the semantically minimal edit operations preserving grammaticality, that iteratively converts the source sentence to the edited sentence. First, we formalize this task, explaining its significant features and specifying the constraints that primitive edit operations should satisfy. Then, we propose a method to automate this task, which consists of two steps: generation of an edit operation lattice and selection of an optimal path. To obtain a wide range of edit operation candidates in the first step, we combine a phrase aligner and a large language model. Experimental results show that our method perfectly decomposes 44% and 64% of editing examples in the text simplification and machine translation post-editing datasets, respectively. Detailed analyses also provide insights into the difficulties of this task, suggesting directions for improvement. 2024.lrec-main.170 @@ -2001,7 +2001,7 @@ ElenaCallegari Iris EddaNowenstein Ingunn JóhannaKristjánsdóttir - Anton KarlIngason + Anton KarlIngason 1915–1924 This study examines the influence of task type and healthy aging on various automatically extracted part-of-speech features in Icelandic. We administered three language tasks to participants aged 60–80: picture description, trip planning, and description of one’s childhood home. Our findings reveal significant task effects on 11 out of 14 linguistic variables studied, highlighting the substantial influence of sampling methods on language production. Among the variables showing statistically significant task effects, we find the rate of the genitive and subjunctive, variables which can only be studied in morphologically richer languages like Icelandic. On the other hand, rates of pronouns, adverbs, and prepositions remained stable across task types. Aging effects were more subtle, being evident in 3 of the 14 variables, including an interaction with task type for dative case marking. These findings underscore the significance of task selection in studies targeting linguistic features but also emphasize the need to examine languages other than English to fully understand the effects of aging on language production. Additionally, the results have clinical implications: understanding healthy aging’s impact on language can help us better identify and study changes caused by Alzheimer’s Disease in older adults’ speech. 2024.lrec-main.171 @@ -2069,7 +2069,7 @@ Automatic Speech Recognition for <fixed-case>G</fixed-case>ascon and Languedocian Variants of <fixed-case>O</fixed-case>ccitan IñigoMorcillo - IgorLeturia + IgorLeturia AnderCorral XabierSarasola MichaëlBarret @@ -2114,7 +2114,7 @@ Auxiliary Knowledge-Induced Learning for Automatic Multi-Label Medical Document Classification XindiWang - Robert E.Mercer + Robert E.Mercer FrankRudzicz 2006–2016 The International Classification of Diseases (ICD) is an authoritative medical classification system of different diseases and conditions for clinical and management purposes. ICD indexing aims to assign a subset of ICD codes to a medical record. Since human coding is labour-intensive and error-prone, many studies employ machine learning techniques to automate the coding process. ICD coding is a challenging task, as it needs to assign multiple codes to each medical document from an extremely large hierarchically organized collection. In this paper, we propose a novel approach for ICD indexing that adopts three ideas: (1) we use a multi-level deep dilated residual convolution encoder to aggregate the information from the clinical notes and learn document representations across different lengths of the texts; (2) we formalize the task of ICD classification with auxiliary knowledge of the medical records, which incorporates not only the clinical texts but also different clinical code terminologies and drug prescriptions for better inferring the ICD codes; and (3) we introduce a graph convolutional network to leverage the co-occurrence patterns among ICD codes, aiming to enhance the quality of label representations. Experimental results show the proposed method achieves state-of-the-art performance on a number of measures. @@ -2128,7 +2128,7 @@ MaitaneUrruela ElisaEspina AitziberAtutxa Salazar - KoldoGojenola + KoldoGojenola 2017–2027 In this work we present two datasets for the development of virtual patients and the first evaluation results. We firstly introduce a Spanish corpus of medical dialogue questions annotated with intents, built upon prior research in French. We also propose a second dataset of dialogues using a novel annotation approach that involves doctor questions, patient answers, and corresponding clinical records, organized as triples of the form (clinical report, question, patient answer). This way, the doctor-patient conversation is modeled as a question-answering system that tries to find responses to questions taking a clinical record as input. This approach can help to eliminate the need for manually structured patient records, as commonly used in previous studies, thereby expanding the pool of diverse virtual patients available. Leveraging these annotated corpora, we develop and assess an automatic system designed to answer medical dialogue questions posed by medical students to simulated patients in medical exams. Our approach demonstrates robust generalization, relying solely on medical records to generate new patient cases. The two datasets and the code will be freely available for the research community. 2024.lrec-main.182 @@ -2136,7 +2136,7 @@ A Web Portal about the State of the Art of <fixed-case>NLP</fixed-case> Tasks in <fixed-case>S</fixed-case>panish - EnriqueAmigó + EnriqueAmigó JorgeCarrillo-de-Albornoz AndrésFernández JulioGonzalo @@ -2155,7 +2155,7 @@ Maartenvan Gompel AnnaJouravel ElenaRenje - UweReichel + UweReichel AchimRabus EckhartArnold 2039–2048 @@ -2188,13 +2188,13 @@ <fixed-case>B</fixed-case>alsu<fixed-case>T</fixed-case>alka.lv - Boosting the Common Voice Corpus for Low-Resource Languages RobertsDargis - ArtursZnotins + ArtursZnotins IlzeAuzina - BaibaSaulite + BaibaSaulite SanitaReinsone RaivisDejus AntraKlavinska - NormundsGruzitis + NormundsGruzitis 2080–2085 Open speech corpora of substantial size are seldom available for less-spoken languages, and this was recently the case also for Latvian with its 1.5M native speakers. While there exist several closed Latvian speech corpora of 100+ hours, used to train competitive models for automatic speech recognition (ASR), there were only a few tiny open datasets available at the beginning of 2023, the 18-hour Latvian Common Voice 13.0 dataset being the largest one. In the result of a successful national crowdsourcing initiative, organised jointly by several institutions, the size and speaker diversity of the Latvian Common Voice 17.0 release have increased more than tenfold in less than a year. A successful follow-up initiative was also launched for Latgalian, which has been recognized as an endangered historic variant of Latvian with 150k speakers. The goal of these initiatives is not only to enlarge the datasets but also to make them more diverse in terms of speakers and accents, text genres and styles, intonations, grammar and lexicon. They have already become considerable language resources for both improving ASR and conducting linguistic research. Since we use the Mozilla Common Voice platform to record and validate speech samples, this paper focuses on (i) the selection of text snippets to enrich the language data and to stimulate various intonations, (ii) an indicative evaluation of the acquired corpus and the first ASR models fine-tuned on this data, (iii) our social campaigns to boost and maintain this initiative. 2024.lrec-main.187 @@ -2205,7 +2205,7 @@ ZicanDong TianyiTang JunyiLi - Wayne XinZhao + Wayne XinZhao Ji-RongWen 2086–2099 Large language models (LLMs) have achieved dramatic proficiency over NLP tasks with normal length. Recently, multiple studies have committed to extending the context length and enhancing the long text modeling capabilities of LLMs. To comprehensively evaluate the long context ability of LLMs, we propose BAMBOO, a multi-task long context benchmark. BAMBOO has been designed with four principles: comprehensive capacity evaluation, avoidance of data contamination, accurate automatic evaluation, and different length levels. It consists of 10 datasets from 5 different long text understanding tasks, i.e., question answering, hallucination detection, text sorting, language modeling, and code completion, to cover various domains and core capacities of LLMs. We conduct experiments with five widely-used long-context models and further discuss five key questions for long text research. In the end, we discuss problems of current long-context models and point out future directions for enhancing long text modeling capacities. We release our data, prompts, and code at https://anonymous.4open.science/r/BAMBOO/. @@ -2255,7 +2255,7 @@ JaioneBengoetxea Yi-LingChung MarcoGuerini - RodrigoAgerri + RodrigoAgerri 2132–2141 Counter Narratives (CNs) are non-negative textual responses to Hate Speech (HS) aiming at defusing online hatred and mitigating its spreading across media. Despite the recent increase in HS content posted online, research on automatic CN generation has been relatively scarce and predominantly focused on English. In this paper, we present CONAN-EUS, a new Basque and Spanish dataset for CN generation developed by means of Machine Translation (MT) and professional post-edition. Being a parallel corpus, also with respect to the original English CONAN, it allows to perform novel research on multilingual and crosslingual automatic generation of CNs. Our experiments on CN generation with mT5, a multilingual encoder-decoder model, shows that generation greatly benefits from training on post-edited data, as opposed to relying on silver MT data only. These results are confirmed by their correlation with a qualitative manual evaluation, demonstrating that manually revised training data remains crucial for the quality of the generated CNs. Furthermore, multilingual data augmentation improves results over monolingual settings for structurally similar languages such as English and Spanish, while being detrimental for Basque, a language isolate. Similar findings occur in zero-shot crosslingual evaluations, where model transfer (fine-tuning in English and generating in a different target language) outperforms fine-tuning mT5 on machine translated data for Spanish but not for Basque. This provides an interesting insight into the asymmetry in the multilinguality of generative models, a challenging topic which is still open to research. Data and code will be made publicly available upon publication. 2024.lrec-main.192 @@ -2265,7 +2265,7 @@ Becoming a High-Resource Language in Speech: The <fixed-case>C</fixed-case>atalan Case in the Common Voice Corpus CarmeArmentano-Oller - MontserratMarimon + MontserratMarimon MartaVillegas 2142–2148 Collecting voice resources for speech recognition systems is a multifaceted challenge, involving legal, technical, and diversity considerations. However, it is crucial to ensure fair access to voice-driven technology across diverse linguistic backgrounds. We describe an ongoing effort to create an extensive, high-quality, publicly available voice dataset for future development of speech technologies in Catalan through the Mozilla Common Voice crowd-sourcing platform. We detail the specific approaches used to address the challenges faced in recruiting contributors and managing the collection, validation, and recording of sentences. This detailed overview can serve as a source of guidance for similar initiatives across other projects and linguistic contexts. The success of this project is evident in the latest corpus release, version 16.1, where Catalan ranks as the most prominent language in the corpus, both in terms of recorded hours and when considering validated hours. This establishes Catalan as a language with significant speech resources for language technology development and significantly raises its international visibility. @@ -2454,7 +2454,7 @@ Beyond Model Performance: Can Link Prediction Enrich <fixed-case>F</fixed-case>rench Lexical Graphs? Hee-SooChoi PriyanshTrivedi - MathieuConstant + MathieuConstant KarenFort BrunoGuillaume 2329–2341 @@ -2514,7 +2514,7 @@ Biomedical Concept Normalization over Nested Entities with Partial <fixed-case>UMLS</fixed-case> Terminology in <fixed-case>R</fixed-case>ussian - NataliaLoukachevitch + NataliaLoukachevitch AndreySakhovskiy ElenaTutubalina 2383–2389 @@ -2536,7 +2536,7 @@ Bits and Pieces: Investigating the Effects of Subwords in Multi-task Parsing across Languages and Domains DanielDakota - SandraKübler + SandraKübler 2397–2409 Neural parsing is very dependent on the underlying language model. However, very little is known about how choices in the language model affect parsing performance, especially in multi-task learning. We investigate questions on how the choice of subwords affects parsing, how subword sharing is responsible for gains or negative transfer in a multi-task setting where each task is parsing of a specific domain of the same language. More specifically, we investigate these issues across four languages: English, German, Italian, and Turkish. We find a general preference for averaged or last subwords across languages and domains. However, specific POS tags may require different subwords, and the distributional overlap between subwords across domains is perhaps a more influential factor in determining positive or negative transfer than discrepancies in the data sizes. 2024.lrec-main.215 @@ -2581,7 +2581,7 @@ <fixed-case>BLN</fixed-case>600: A Parallel Corpus of Machine/Human Transcribed Nineteenth Century Newspaper Texts Callum WilliamBooth AlanThomas - RobertGaizauskas + RobertGaizauskas 2440–2446 We present a publicly available corpus of nineteenth-century newspaper text focused on crime in London, derived from the Gale British Library Newspapers corpus parts 1 and 2. The corpus comprises 600 newspaper excerpts and for each excerpt contains the original source image, the machine transcription of that image as found in the BLN and a gold standard manual transcription that we have created. We envisage the corpus will be helpful for the training and development of OCR and post-OCR correction methodologies for historical newspaper machine transcription—for which there is currently a dearth of publicly available resources. In this paper, we discuss the rationale behind gathering such a corpus, the methodology used to select, process, and align the data, and the corpus’ potential utility for historians and digital humanities researchers—particularly within the realms of neural machine translation-based post-OCR correction approaches, and other natural language processing tasks that are critically affected by erroneous OCR. 2024.lrec-main.219 @@ -2619,7 +2619,7 @@ YupuLiang YangZhao YuZhou - ChengqingZong + ChengqingZong 2468–2479 Text image machine translation (TIMT) aims at translating source language texts in images into another target language, which has been proven successful by bridging text image recognition encoder and text translation decoder. However, it is still an open question of how to incorporate fine-grained knowledge supervision to make it consistent between recognition and translation modules. In this paper, we propose a novel TIMT method named as BabyNet, which is optimized with hierarchical parental supervision to improve translation performance. Inspired by genetic recombination and variation in the field of genetics, the proposed BabyNet is inherited from the recognition and translation parent models with a variation module of which parameters can be updated when training on the TIMT task. Meanwhile, hierarchical and multi-granularity supervision from parent models is introduced to bridge the gap between inherited modules in BabyNet. Extensive experiments on both synthetic and real-world TIMT tests show that our proposed method significantly outperforms existing methods. Further analyses of various parent model combinations show the good generalization of our method. 2024.lrec-main.222 @@ -2652,9 +2652,9 @@ Bridging Computational Lexicography and Corpus Linguistics: A Query Extension for <fixed-case>O</fixed-case>nto<fixed-case>L</fixed-case>ex-<fixed-case>F</fixed-case>r<fixed-case>AC</fixed-case> ChristianChiarcos - RankaStanković + RankaStanković MaximIonov - GillesSérasset + GillesSérasset 2504–2514 OntoLex, the dominant community standard for machine-readable lexical resources in the context of RDF, Linked Data and Semantic Web technologies, is currently extended with a designated module for Frequency, Attestations and Corpus-based Information (OntoLex-FrAC). We propose a novel component for OntoLex-FrAC, addressing the incorporation of corpus queries for (a) linking dictionaries with corpus engines, (b) enabling RDF-based web services to exchange corpus queries and responses data dynamically, and (c) using conventional query languages to formalize the internal structure of collocations, word sketches, and colligations. The primary field of application of the query extension is in digital lexicography and corpus linguistics, and we present a proof-of-principle implementation in backend components of a novel platform designed to support digital lexicography for the Serbian language. 2024.lrec-main.225 @@ -2684,7 +2684,7 @@ Bring Invariant to Variant: A Contrastive Prompt-based Framework for Temporal Knowledge Graph Forecasting - YingZhang + YingZhang XinyingQian YuZhao BaohangZhou @@ -2704,17 +2704,17 @@ WilliamCroft LukasDenk SijiaGe - JanHajič + JanHajič KennethLai - James H.Martin + James H.Martin SkatjeMyers AlexisPalmer - MarthaPalmer + MarthaPalmer Claire BenetPost - JamesPustejovsky + JamesPustejovsky KristineStenzel HaiboSun - ZdeňkaUrešová + ZdeňkaUrešová RosaVallejos Jens E. L.Van Gysel MeaganVigus @@ -2730,7 +2730,7 @@ PolinaBychkova AlyaxeyYaskevich SerafimaGyulasaryan - EkaterinaRakhilina + EkaterinaRakhilina 2548–2555 This paper discusses the Routinicon, a new constructicographic resource for the description of conversational routines. Conversational routines are defined as conventional formulaic expressions that language speakers use in standard extralinguistic situations (cf. Bless you! as a reaction to sneezing or Who’s there? as a typical answer to a knock on the door). The Routinicon’s goal is to accumulate the routines that constitute the inventory of conventional expressions in Russian language and systematically describe them in a way that would enable future cross-linguistic comparison and typological research. Conceptually, the Routinicon is a natural extension of such projects as the Russian Constructicon and Pragmaticon. It inherits their approach to the systematization of phraseological units as well as to the data collection. At the same time, the new project focuses on a fundamentally different domain of units and hence offers a radically new structure of linguistic annotation. Its principles and challenges are addressed in the paper. 2024.lrec-main.230 @@ -2738,9 +2738,9 @@ Building a Data Infrastructure for a Mid-Resource Language: The Case of <fixed-case>C</fixed-case>atalan - AitorGonzalez-Agirre - MontserratMarimon - CarlosRodriguez-Penagos + AitorGonzalez-Agirre + MontserratMarimon + CarlosRodriguez-Penagos JavierAula-Blasco IreneBaucells CarmeArmentano-Oller @@ -2756,7 +2756,7 @@ Building a <fixed-case>J</fixed-case>apanese Document-Level Relation Extraction Dataset Assisted by Cross-Lingual Transfer YoumiMa AnWang - NaoakiOkazaki + NaoakiOkazaki 2567–2579 Document-level Relation Extraction (DocRE) is the task of extracting all semantic relationships from a document. While studies have been conducted on English DocRE, limited attention has been given to DocRE in non-English languages. This work delves into effectively utilizing existing English resources to promote DocRE studies in non-English languages, with Japanese as the representative case. As an initial attempt, we construct a dataset by transferring an English dataset to Japanese. However, models trained on such a dataset are observed to suffer from low recalls. We investigate the error cases and attribute the failure to different surface structures and semantics of documents translated from English and those written by native speakers. We thus switch to explore if the transferred dataset can assist human annotation on Japanese documents. In our proposal, annotators edit relation predictions from a model trained on the transferred dataset. Quantitative analysis shows that relation recommendations suggested by the model help reduce approximately 50% of the human edit steps compared with the previous approach. Experiments quantify the performance of existing DocRE models on our collected dataset, portraying the challenges of Japanese and cross-lingual DocRE. 2024.lrec-main.232 @@ -2830,7 +2830,7 @@ Rudy AlexandroGarrido Veliz NatiaMestvirishvili AlexanderPanchenko - ChrisBiemann + ChrisBiemann IrinaNikishina 2657–2672 Comparative Question Answering (CompQA) is a Natural Language Processing task that combines Question Answering and Argument Mining approaches to answer subjective comparative questions in an efficient argumentative manner. In this paper, we present an end-to-end (full pipeline) system for answering comparative questions called CAM 2.0 as well as a public leaderboard called CompUGE that unifies the existing datasets under a single easy-to-use evaluation suite. As compared to previous web-form-based CompQA systems, it features question identification, object and aspect labeling, stance classification, and summarization using up-to-date models. We also select the most time- and memory-effective pipeline by comparing separately fine-tuned Transformer Encoder models which show state-of-the-art performance on the subtasks with Generative LLMs in few-shot and LoRA setups. We also conduct a user study for a whole-system evaluation. @@ -2864,7 +2864,7 @@ <fixed-case>C</fixed-case>amem<fixed-case>BERT</fixed-case>-bio: Leveraging Continual Pre-training for Cost-Effective Models on <fixed-case>F</fixed-case>rench Biomedical Data RianTouchent - Éricde la Clergerie + Éricde la Clergerie 2692–2701 Clinical data in hospitals are increasingly accessible for research through clinical data warehouses. However these documents are unstructured and it is therefore necessary to extract information from medical reports to conduct clinical studies. Transfer learning with BERT-like models such as CamemBERT has allowed major advances for French, especially for named entity recognition. However, these models are trained for plain language and are less efficient on biomedical data. Addressing this gap, we introduce CamemBERT-bio, a dedicated French biomedical model derived from a new public French biomedical dataset. Through continual pre-training of the original CamemBERT, CamemBERT-bio achieves an improvement of 2.54 points of F1-score on average across various biomedical named entity recognition tasks, reinforcing the potential of continual pre-training as an equally proficient yet less computationally intensive alternative to training from scratch. Additionally, we highlight the importance of using a standard evaluation protocol that provides a clear view of the current state-of-the-art for French biomedical models. 2024.lrec-main.241 @@ -2885,7 +2885,7 @@ Can Factual Statements Be Deceptive? The <fixed-case>D</fixed-case>e<fixed-case>F</fixed-case>a<fixed-case>B</fixed-case>el Corpus of Belief-based Deception AswathyVelutharambath - AmelieWührl + AmelieWührl RomanKlinger 2708–2723 If a person firmly believes in a non-factual statement, such as “The Earth is flat”, and argues in its favor, there is no inherent intention to deceive. As the argumentation stems from genuine belief, it may be unlikely to exhibit the linguistic properties associated with deception or lying. This interplay of factuality, personal belief, and intent to deceive remains an understudied area. Disentangling the influence of these variables in argumentation is crucial to gain a better understanding of the linguistic properties attributed to each of them. To study the relation between deception and factuality, based on belief, we present the DeFaBel corpus, a crowd-sourced resource of belief-based deception. To create this corpus, we devise a study in which participants are instructed to write arguments supporting statements like “eating watermelon seeds can cause indigestion”, regardless of its factual accuracy or their personal beliefs about the statement. In addition to the generation task, we ask them to disclose their belief about the statement. The collected instances are labelled as deceptive if the arguments are in contradiction to the participants’ personal beliefs. Each instance in the corpus is thus annotated (or implicitly labelled) with personal beliefs of the author, factuality of the statement, and the intended deceptiveness. The DeFaBel corpus contains 1031 texts in German, out of which 643 are deceptive and 388 are non-deceptive. It is the first publicly available corpus for studying deception in German. In our analysis, we find that people are more confident in the persuasiveness of their arguments when the statement is aligned with their belief, but surprisingly less confident when they are generating arguments in favor of facts. The DeFaBel corpus can be obtained from https://www.ims.uni-stuttgart.de/data/defabel . @@ -2952,7 +2952,7 @@ Can Large Language Models Learn Translation Robustness from Noisy-Source In-context Demonstrations? LeiyuPan YongqiLeng - DeyiXiong + DeyiXiong 2798–2808 Large language models (LLMs) have been used for machine translation. When provided with prompts and source sentences, LLMs can achieve impressive translation results. However, the robustness of these LLMs remains a significant challenge, as they often struggle to accurately translate sentences in the presence of noise, even when using similarity-based in-context learning methods. This work proposes a research scheme for studying machine translation robustness on LLMs, investigating whether LLMs can learn translation robustness from noisy-source demonstration examples. Through experiments on different models, languages, and noise types, we empirically demonstrate that LLMs can learn how to handle noise and translation methods from noisy-source demonstration examples, thereby improving their translation performance on noisy sentences. Furthermore, we find that increasing the noise ratio appropriately for the noisy-source demonstration examples can enhance the translation robustness of LLMs. Additionally, we also attempt to investigate scenarios where LLMs are more likely to learn translation robustness for mixed and specific types of noise. We find that the model’s performance varies across different noise settings. 2024.lrec-main.249 @@ -2963,7 +2963,7 @@ ShaoxiongJi TimotheeMickus VincentSegonne - JörgTiedemann + JörgTiedemann 2809–2818 Multilingual pretraining and fine-tuning have remarkably succeeded in various natural language processing tasks. Transferring representations from one language to another is especially crucial for cross-lingual learning. One can expect machine translation objectives to be well suited to fostering such capabilities, as they involve the explicit alignment of semantically equivalent sentences from different languages. This paper investigates the potential benefits of employing machine translation as a continued training objective to enhance language representation learning, bridging multilingual pretraining and cross-lingual applications. We study this question through two lenses: a quantitative evaluation of the performance of existing models and an analysis of their latent representations. Our results show that, contrary to expectations, machine translation as the continued training fails to enhance cross-lingual representation learning in multiple cross-lingual natural language understanding tasks. We conclude that explicit sentence-level alignment in the cross-lingual scenario is detrimental to cross-lingual transfer pretraining, which has important implications for future cross-lingual transfer studies. We furthermore provide evidence through similarity measures and investigation of parameters that this lack of positive influence is due to output separability—which we argue is of use for machine translation but detrimental elsewhere. 2024.lrec-main.250 @@ -2999,8 +2999,8 @@ Can We Identify Stance without Target Arguments? A Study for Rumour Stance Classification - YueLi - CarolinaScarton + YueLi + CarolinaScarton 2844–2851 Considering a conversation thread, rumour stance classification aims to identify the opinion (e.g. agree or disagree) of replies towards a target (rumour story). Although the target is expected to be an essential component in traditional stance classification, we show that rumour stance classification datasets contain a considerable amount of real-world data whose stance could be naturally inferred directly from the replies, contributing to the strong performance of the supervised models without awareness of the target. We find that current target-aware models underperform in cases where the context of the target is crucial. Finally, we propose a simple yet effective framework to enhance reasoning with the targets, achieving state-of-the-art performance on two benchmark datasets. 2024.lrec-main.253 @@ -3062,7 +3062,7 @@ Causal Intersectionality and Dual Form of Gradient Descent for Multimodal Analysis: A Case Study on Hateful Memes YosukeMiyanishi - Minh LeNguyen + Minh LeNguyen 2901–2916 Amidst the rapid expansion of Machine Learning (ML) and Large Language Models (LLMs), understanding the semantics within their mechanisms is vital. Causal analyses define semantics, while gradient-based methods are essential to eXplainable AI (XAI), interpreting the model’s ‘black box’. Integrating these, we investigate how a model’s mechanisms reveal its causal effect on evidence-based decision-making. Research indicates intersectionality - the combined impact of an individual’s demographics - can be framed as an Average Treatment Effect (ATE). This paper demonstrates that hateful meme detection can be viewed as an ATE estimation using intersectionality principles, and summarized gradient-based attention scores highlight distinct behaviors of three Transformer models. We further reveal that LLM Llama-2 can discern the intersectional aspects of the detection through in-context learning and that the learning process could be explained via meta-gradient, a secondary form of gradient. In conclusion, this work furthers the dialogue on Causality and XAI. Our code is available online (see External Resources section). 2024.lrec-main.259 @@ -3071,7 +3071,7 @@ <fixed-case>CBBQ</fixed-case>: A <fixed-case>C</fixed-case>hinese Bias Benchmark Dataset Curated with Human-<fixed-case>AI</fixed-case> Collaboration for Large Language Models YufeiHuang - DeyiXiong + DeyiXiong 2917–2929 Holistically measuring societal biases of large language models is crucial for detecting and reducing ethical risks in highly capable AI models. In this work, we present a Chinese Bias Benchmark dataset that consists of over 100K questions jointly constructed by human experts and generative language models, covering stereotypes and societal biases in 14 social dimensions related to Chinese culture and values. The curation process contains 4 essential steps: bias identification, ambiguous context generation, AI-assisted disambiguous context generation, and manual review and recomposition. The testing instances in the dataset are automatically derived from 3K+ high-quality templates manually authored with stringent quality control. The dataset exhibits wide coverage and high diversity. Extensive experiments demonstrate the effectiveness of the dataset in evaluating model bias, with all 12 publicly available Chinese large language models exhibiting strong bias in certain categories. Additionally, we observe from our experiments that fine-tuned models could, to a certain extent, heed instructions and avoid generating harmful outputs, in the way of “moral self-correction”. Our dataset is available at https://anonymous.4open.science/r/CBBQ-B860/. 2024.lrec-main.260 @@ -3130,7 +3130,7 @@ <fixed-case>C</fixed-case>hain<fixed-case>LM</fixed-case>: Empowering Large Language Models with Improved Chain-of-Thought Prompting XiaoxueCheng JunyiLi - Wayne XinZhao + Wayne XinZhao Ji-RongWen 2969–2983 Chain-of-Thought (CoT) prompting can enhance the reasoning capabilities of large language models (LLMs), establishing itself as a primary approach to solving complex reasoning tasks. Existing CoT synthesis approaches usually focus on simpler reasoning tasks and thus result in low-quality and inconsistent CoT prompts. In response to this challenge, we present an empirical investigation of CoT prompting and introduce CoTGenius, a novel framework designed for the automatic generation of superior CoT prompts. CoTGenius is developed based on three major evolution strategies, i.e., complicate, diversify, and specify—alongside two filtering mechanisms: evolutionary success judgement and correctness verification. We further employ CoTGenius to create an extensive CoT dataset, and subsequently fine-tune the Llama 2-Chat 7B and 13B models on this dataset. We call the resulting model ChainLM. To deal with the cumulative error issue in reasoning steps, we propose a step-level debating method, wherein multiple debaters discuss each reasoning step to arrive at the correct answer. Extensive experiments demonstrate that our ChainLM models exhibit enhanced proficiency in addressing a spectrum of complex reasoning problems compared to existing models. In addition, we conduct an in-depth analysis of the impact of data categories within CoTGenius on the model performance. We release our dataset and code at https://github.com/RUCAIBox/ChainLM. @@ -3142,7 +3142,7 @@ Rowan HallMaudslay SimoneTeufel FrancisBond - JamesPustejovsky + JamesPustejovsky 2984–2996 The senses of a word exhibit rich internal structure. In a typical lexicon, this structure is overlooked: A word’s senses are encoded as a list, without inter-sense relations. We present ChainNet, a lexical resource which for the first time explicitly identifies these structures, by expressing how senses in the Open English Wordnet are derived from one another. In ChainNet, every nominal sense of a word is either connected to another sense by metaphor or metonymy, or is disconnected (in the case of homonymy). Because WordNet senses are linked to resources which capture information about their meaning, ChainNet represents the first dataset of grounded metaphor and metonymy. 2024.lrec-main.266 @@ -3151,7 +3151,7 @@ Challenges in Pre-Training Graph Neural Networks for Context-Based Fake News Detection: An Evaluation of Current Strategies and Resource Limitations GregorDonabauer - UdoKruschwitz + UdoKruschwitz 2997–3004 Pre-training of neural networks has recently revolutionized the field of Natural Language Processing (NLP) and has before demonstrated its effectiveness in computer vision. At the same time, advances around the detection of fake news were mainly driven by the context-based paradigm, where different types of signals (e.g. from social media) form graph-like structures that hold contextual information apart from the news article to classify. We propose to merge these two developments by applying pre-training of Graph Neural Networks (GNNs) in the domain of context-based fake news detection. Our experiments provide an evaluation of different pre-training strategies for graph-based misinformation detection and demonstrate that transfer learning does currently not lead to significant improvements over training a model from scratch in the domain. We argue that a major current issue is the lack of suitable large-scale resources that can be used for pre-training. 2024.lrec-main.267 @@ -3160,7 +3160,7 @@ Challenging Negative Gender Stereotypes: A Study on the Effectiveness of Automated Counter-Stereotypes IsarNejadgholi - Kathleen C.Fraser + Kathleen C.Fraser AnnaKerkhof SvetlanaKiritchenko 3005–3015 @@ -3184,7 +3184,7 @@ LeonardoZilio ShenbinQian DipteshKanojia - ConstantinOrasan + ConstantinOrasan 3028–3037 Abbreviations and their associated long forms are important textual elements that are present in almost every scientific communication, and having information about these forms can help improve several NLP tasks. In this paper, our aim is to fine-tune language models for automatically identifying abbreviations and long forms. We used existing datasets which are annotated with abbreviations and long forms to train and test several language models, including transformer models, character-level language models, stacking of different embeddings, and ensemble methods. Our experiments showed that it was possible to achieve state-of-the-art results by stacking RoBERTa embeddings with domain-specific embeddings. However, the analysis of our first run showed that one of the datasets had issues in the BIO annotation, which led us to propose a revised dataset. After re-training selected models on the revised dataset, results show that character-level models achieve comparable results, especially when detecting abbreviations, but both RoBERTa large and the stacking of embeddings presented better results on biomedical data. When tested on a different subdomain (segments extracted from computer science texts), an ensemble method proved to yield the best results for the detection of long forms, and a character-level model had the best performance in detecting abbreviations. 2024.lrec-main.270 @@ -3195,7 +3195,7 @@ MartinPopel LuciePolakova MichalNovák - JindřichHelcl + JindřichHelcl JindřichLibovický PavelStraňák TomasKrabac @@ -3234,7 +3234,7 @@ JingjingWang JiaminLuo TaoZeng - GuodongZhou + GuodongZhou 3075–3085 Aspect Sentiment Understanding (ASU) in interactive scenarios (e.g., Question-Answering and Dialogue) has attracted ever-more interest in recent years and achieved important progresses. However, existing studies on interactive ASU largely ignore the coreference issue for opinion targets (i.e., aspects), while this phenomenon is ubiquitous in interactive scenarios especially dialogues, limiting the ASU performance. Recently, large language models (LLMs) shows the powerful ability to integrate various NLP tasks with the chat paradigm. In this way, this paper proposes a new Chat-based Aspect Sentiment Understanding (ChatASU) task, aiming to explore LLMs’ ability in understanding aspect sentiments in dialogue scenarios. Particularly, this ChatASU task introduces a sub-task, i.e., Aspect Chain Reasoning (ACR) task, to address the aspect coreference issue. On this basis, we propose a Trusted Self-reflexion Approach (TSA) with ChatGLM as backbone to ChatASU. Specifically, this TSA treats the ACR task as an auxiliary task to boost the performance of the primary ASU task, and further integrates trusted learning into reflexion mechanisms to alleviate the LLMs-intrinsic factual hallucination problem in TSA. Furthermore, a high-quality ChatASU dataset is annotated to evaluate TSA, and extensive experiments show that our proposed TSA can significantly outperform several state-of-the-art baselines, justifying the effectiveness of TSA to ChatASU and the importance of considering the coreference and hallucination issues in ChatASU. 2024.lrec-main.274 @@ -3310,8 +3310,8 @@ CassandreArmand ChiaraMazzocconi ShreejataGupta - LaurentPrévot - BenoitFavre + LaurentPrévot + BenoitFavre LeonorBecerra-Bonache AbdellahFourtassi 3153–3164 @@ -3357,7 +3357,7 @@ Chitchat as Interference: Adding User Backstories to Task-Oriented Dialogues ArmandStricker - PatrickParoubek + PatrickParoubek 3203–3214 During task-oriented dialogues (TODs), human users naturally introduce chitchat that is beyond the immediate scope of the task, interfering with the flow of the conversation. To address this issue without the need for expensive manual data creation, we use few-shot prompting with Llama-2-70B to enhance the MultiWOZ dataset with user backstories, a typical example of chitchat interference in TODs. We assess the impact of this addition by testing two models: one trained solely on TODs and another trained on TODs with a preliminary chitchat interaction. Our analysis demonstrates that our enhanced dataset poses a challenge for these systems. Moreover, we demonstrate that our dataset can be effectively used for training purposes, enabling a system to consistently acknowledge the user’s backstory while also successfully moving the task forward in the same turn, as confirmed by human evaluation. These findings highlight the benefits of generating novel chitchat-TOD scenarios to test TOD systems more thoroughly and improve their resilience to natural user interferences. 2024.lrec-main.284 @@ -3419,7 +3419,7 @@ XiaolongJin LongBai JiafengGuo - XueqiCheng + XueqiCheng 3261–3270 Event detection is one of the fundamental tasks in information extraction and knowledge graph. However, a realistic event detection system often needs to deal with new event classes constantly. These new classes usually have only a few labeled instances as it is time-consuming and labor-intensive to annotate a large number of unlabeled instances. Therefore, this paper proposes a new task, called class-incremental few-shot event detection. Nevertheless, there are two problems (i.e., old knowledge forgetting and new class overfitting) in this task. To solve these problems, this paper further presents a novel knowledge distillation and prompt learning based method, called Prompt-KD. Specifically, to reduce the forgetting issue about old knowledge, Prompt-KD develops an attention based multi-teacher knowledge distillation framework, where the ancestor teacher model pre-trained on base classes is reused in all learning sessions, and the father teacher model derives the current student model via adaptation. On the other hand, in order to cope with the few-shot learning scenario and alleviate the corresponding new class overfitting problem, Prompt-KD is also equipped with a prompt learning mechanism. Extensive experiments on two benchmark datasets, i.e., FewEvent and MAVEN, demonstrate the state-of-the-art performance of Prompt-KD. 2024.lrec-main.290 @@ -3458,7 +3458,7 @@ FanXu LeiZeng BoweiZou - Ai TiAw + Ai TiAw HuanRong 3314–3324 In an era where rumors can propagate rapidly across social media platforms such as Twitter and Weibo, automatic rumor detection has garnered considerable attention from both academia and industry. Existing multimodal rumor detection models often overlook the intricacies of sample difficulty, e.g., text-level difficulty, image-level difficulty, and multimodal-level difficulty, as well as their order when training. Inspired by the concept of curriculum learning, we propose the Curriculum Learning and Fine-grained Fusion-driven multimodal Rumor Detection (CLFFRD) framework, which employs curriculum learning to automatically select and train samples according to their difficulty at different training stages. Furthermore, we introduce a fine-grained fusion strategy that unifies entities from text and objects from images, enhancing their semantic cohesion. We also propose a novel data augmentation method that utilizes linear interpolation between textual and visual modalities to generate diverse data. Additionally, our approach incorporates deep fusion for both intra-modality (e.g., text entities and image objects) and inter-modality (e.g., CLIP and social graph) features. Extensive experimental results demonstrate that CLFFRD outperforms state-of-the-art models on both English and Chinese benchmark datasets for rumor detection in social media. @@ -3488,7 +3488,7 @@ PhilhoonOh HaneulYoo JamesThorne - AliceOh + AliceOh 3335–3346 Despite the rapid development of large language models (LLMs) for the Korean language, there remains an obvious lack of benchmark datasets that test the requisite Korean cultural and linguistic knowledge. Because many existing Korean benchmark datasets are derived from the English counterparts through translation, they often overlook the different cultural contexts. For the few benchmark datasets that are sourced from Korean data capturing cultural knowledge, only narrow tasks such as hate speech detection are offered. To address this gap, we introduce a benchmark of Cultural and Linguistic Intelligence in Korean (CLIcK), a dataset comprising 1,995 QA pairs. CLIcK sources its data from official Korean exams and textbooks, partitioning the questions into eleven categories under the two main categories of language and culture. For each instance in click, we provide fine-grained annotation of which cultural and linguistic knowledge is required to correctly answer the question. Using CLIcK, we test 13 language models to assess their performance. Our evaluation uncovers insights into their performances across the categories, as well as the diverse factors affecting their comprehension. CLIcK offers the first large-scale comprehensive Korean-centric analysis of LLMs’ proficiency in Korean language and culture. 2024.lrec-main.296 @@ -3593,7 +3593,7 @@ <fixed-case>C</fixed-case>o<fixed-case>C</fixed-case>o<fixed-case>MIC</fixed-case>: Code Completion by Jointly Modeling In-file and Cross-file Context YangruiboDing ZijianWang - WasiAhmad + WasiAhmad Murali KrishnaRamanathan RameshNallapati ParminderBhatia @@ -3618,7 +3618,7 @@ Code-Mixed Probes Show How Pre-Trained Models Generalise on Code-Switched Text Frances AdrianaLaureano De Leon HarishTayyar Madabushi - MarkLee + MarkLee 3457–3468 Code-switching is a prevalent linguistic phenomenon in which multilingual individuals seamlessly alternate between languages. Despite its widespread use online and recent research trends in this area, research in code-switching presents unique challenges, primarily stemming from the scarcity of labelled data and available resources. In this study we investigate how pre-trained Language Models handle code-switched text in three dimensions: a) the ability of PLMs to detect code-switched text, b) variations in the structural information that PLMs utilise to capture code-switched text, and c) the consistency of semantic information representation in code-switched text. To conduct a systematic and controlled evaluation of the language models in question, we create a novel dataset of well-formed naturalistic code-switched text along with parallel translations into the source languages. Our findings reveal that pre-trained language models are effective in generalising to code-switched text, shedding light on abilities of these models to generalise representations to CS corpora. We release all our code and data, including the novel corpus, at https://github.com/francesita/code-mixed-probes. 2024.lrec-main.307 @@ -3628,7 +3628,7 @@ Code-Mixed Text Augmentation for <fixed-case>L</fixed-case>atvian <fixed-case>ASR</fixed-case> MartinsKronis AskarsSalimbajevs - MārcisPinnis + MārcisPinnis 3469–3479 Code-mixing has become mainstream in the modern, globalised world and affects low-resource languages, such as Latvian, in particular. Solutions to developing an automatic speech recognition system (ASR) for code-mixed speech often rely on specially created audio-text corpora, which are expensive and time-consuming to create. In this work, we attempt to tackle code-mixed Latvian-English speech recognition by improving the language model (LM) of a hybrid ASR system. We make a distinction between inflected transliterations and phonetic transcriptions as two different foreign word types. We propose an inflected transliteration model and a phonetic transcription model for the automatic generation of said word types. We then leverage a large human-translated English-Latvian parallel text corpus to generate synthetic code-mixed Latvian sentences by substituting in generated foreign words. Using the newly created augmented corpora, we train a new LM and combine it with our existing Latvian acoustic model (AM). For evaluation, we create a specialised foreign word test set on which our methods yield up to 15% relative CER improvement. We then further validate these results in a human evaluation campaign. 2024.lrec-main.308 @@ -3649,7 +3649,7 @@ YufengChen NingCheng XingyuCui - JinanXu + JinanXu WenjuanHan 3490–3506 In order to construct or extend entity-centric and event-centric knowledge graphs (KG and EKG), the information extraction (IE) annotation toolkit is essential. However, existing IE toolkits have several non-trivial problems, such as not supporting multi-tasks, and not supporting automatic updates. In this work, we present CollabKG, a learnable human-machine-cooperative IE toolkit for KG and EKG construction. Specifically, for the multi-task issue, CollabKG unifies different IE subtasks, including named entity recognition (NER), entity-relation triple extraction (RE), and event extraction (EE), and supports both KG and EKG. Then, combining advanced prompting-based IE technology, the human-machine-cooperation mechanism with Large Language Models (LLMs) as the assistant machine is presented which can provide a lower cost as well as a higher performance. Lastly, owing to the two-way interaction between the human and machine, CollabKG with learning ability allows self-renewal. Besides, CollabKG has several appealing features (e.g., customization, training-free, and label propagation) that make the system powerful and high-productivity. We holistically compare our toolkit with other existing tools on these features. Human evaluation quantitatively illustrates that CollabKG significantly improves annotation quality, efficiency, and stability simultaneously. @@ -3680,7 +3680,7 @@ Collecting Linguistic Resources for Assessing Children’s Pronunciation of <fixed-case>N</fixed-case>ordic Languages Anne Marte HaugOlstad AnnaSmolander - SofiaStrömbergsson + SofiaStrömbergsson SariYlinen MinnaLehtonen MikkoKurimo @@ -3730,23 +3730,23 @@ Common <fixed-case>E</fixed-case>uropean Language Data Space GeorgRehm - SteliosPiperidis - KhalidChoukri - AndrejsVasiļjevs + SteliosPiperidis + KhalidChoukri + AndrejsVasiļjevs KatrinMarheinecke VictoriaArranz AivarsBērziņš MiltosDeligiannis - DimitrisGalanis + DimitrisGalanis MariaGiagkou KaterinaGkirtzou DimitrisGkoumas AnnikaGrützner-Zahn AthanasiaKolovou - PennyLabropoulou + PennyLabropoulou AndisLagzdiņš ElenaLeitner - ValérieMapelli + ValérieMapelli HélèneMazo SimonOstermann StefaniaRacioppa @@ -3769,7 +3769,7 @@ Benjamin A.Ibarra NathanielBlanchard NikhilKrishnaswamy - JamesPustejovsky + JamesPustejovsky 3587–3602 Within Dialogue Modeling research in AI and NLP, considerable attention has been spent on “dialogue state tracking” (DST), which is the ability to update the representations of the speaker’s needs at each turn in the dialogue by taking into account the past dialogue moves and history. Less studied but just as important to dialogue modeling, however, is “common ground tracking” (CGT), which identifies the shared belief space held by all of the participants in a task-oriented dialogue: the task-relevant propositions all participants accept as true. In this paper we present a method for automatically identifying the current set of shared beliefs and ”questions under discussion” (QUDs) of a group with a shared goal. We annotate a dataset of multimodal interactions in a shared physical space with speech transcriptions, prosodic features, gestures, actions, and facets of collaboration, and operationalize these features for use in a deep neural model to predict moves toward construction of common ground. Model outputs cascade into a set of formal closure rules derived from situated evidence and belief axioms and update operations. We empirically assess the contribution of each feature type toward successful construction of common ground relative to ground truth, establishing a benchmark in this novel, challenging task. 2024.lrec-main.318 @@ -3801,7 +3801,7 @@ Comparison of Conventional Hybrid and <fixed-case>CTC</fixed-case>/Attention Decoders for Continuous Visual Speech Recognition DavidGimeno-Gómez - Carlos-D.Martínez-Hinarejos + Carlos-D.Martínez-Hinarejos 3628–3638 Thanks to the rise of deep learning and the availability of large-scale audio-visual databases, recent advances have been achieved in Visual Speech Recognition (VSR). Similar to other speech processing tasks, these end-to-end VSR systems are usually based on encoder-decoder architectures. While encoders are somewhat general, multiple decoding approaches have been explored, such as the conventional hybrid model based on Deep Neural Networks combined with Hidden Markov Models (DNN-HMM) or the Connectionist Temporal Classification (CTC) paradigm. However, there are languages and tasks in which data is scarce, and in this situation, there is not a clear comparison between different types of decoders. Therefore, we focused our study on how the conventional DNN-HMM decoder and its state-of-the-art CTC/Attention counterpart behave depending on the amount of data used for their estimation. We also analyzed to what extent our visual speech features were able to adapt to scenarios for which they were not explicitly trained, either considering a similar dataset or another collected for a different language. Results showed that the conventional paradigm reached recognition rates that improve the CTC/Attention model in data-scarcity scenarios along with a reduced training time and fewer parameters. 2024.lrec-main.321 @@ -3821,7 +3821,7 @@ Complex Word Identification: A Comparative Study between <fixed-case>C</fixed-case>hat<fixed-case>GPT</fixed-case> and a Dedicated Model for This Task AbdelhakKelious - MathieuConstant + MathieuConstant ChristopheCoeur 3645–3653 There are several works in natural language processing for identifying lexical complexity. This can be for various reasons, either for simplification, the selection of more suitable content, or for other specific tasks. Words can have multiple definitions and degrees of complexity depending on the context in which they appear. One solution being investigated is lexical complexity prediction, where computational methods are used to evaluate the difficulty of vocabulary for language learners and offer personalized assistance. In this work, we explore deep learning methods to assess the complexity of a word based on its context. Specifically, we investigate how to use pre-trained language models to encode both the sentence and the target word, and then fine-tune them by combining them with additional frequency-based features. Our approach achieved superior results compared to the best systems in SemEval-2021 (Shardlow et al., 2021), as demonstrated by an R2 score of 0.65. Finally, we carry out a comparative study with ChatGPT to assess its potential for predicting lexical complexity, to see whether prompt engineering can be an alternative to this task, we will discuss the advantages and limitations of ChatGPT. @@ -3859,7 +3859,7 @@ Computational Modelling of Plurality and Definiteness in <fixed-case>C</fixed-case>hinese Noun Phrases YuqiLiu GuanyiChen - Keesvan Deemter + Keesvan Deemter 3666–3676 Theoretical linguists have suggested that some languages (e.g., Chinese and Japanese) are “cooler” than other languages based on the observation that the intended meaning of phrases in these languages depends more on their contexts. As a result, many expressions in these languages are shortened, and their meaning is inferred from the context. In this paper, we focus on the omission of the plurality and definiteness markers in Chinese noun phrases (NPs) to investigate the predictability of their intended meaning given the contexts. To this end, we built a corpus of Chinese NPs, each of which is accompanied by its corresponding context, and by labels indicating its singularity/plurality and definiteness/indefiniteness. We carried out corpus assessments and analyses. The results suggest that Chinese speakers indeed drop plurality and definiteness markers very frequently. Building on the corpus, we train a bank of computational models using both classic machine learning models and state-of-the-art pre-trained language models to predict the plurality and definiteness of each NP. We report on the performance of these models and analyse their behaviours. 2024.lrec-main.325 @@ -3880,10 +3880,10 @@ Conceptual Pacts for Reference Resolution Using Small, Dynamically Constructed Language Models: A Study in Puzzle Building Dialogues JulianHough - SinaZarrieß - CaseyKennington + SinaZarrieß + CaseyKennington DavidSchlangen - MassimoPoesio + MassimoPoesio 3689–3699 Using Brennan and Clark’s theory of a Conceptual Pact, that when interlocutors agree on a name for an object, they are forming a temporary agreement on how to conceptualize that object, we present an extension to a simple reference resolver which simulates this process over time with different conversation pairs. In a puzzle construction domain, we model pacts with small language models for each referent which update during the interaction. When features from these pact models are incorporated into a simple bag-of-words reference resolver, the accuracy increases compared to using a standard pre-trained model. The model performs equally to a competitor using the same data but with exhaustive re-training after each prediction, while also being more transparent, faster and less resource-intensive. We also experiment with reducing the number of training interactions, and can still achieve reference resolution accuracies of over 80% in testing from observing a single previous interaction, over 20% higher than a pre-trained baseline. While this is a limited domain, we argue the model could be applicable to larger real-world applications in human and human-robot interaction and is an interpretable and transparent model. 2024.lrec-main.327 @@ -3897,11 +3897,11 @@ JingLiu DeshRaj Leibny PaolaGarcia - Alexei V.Ivanov + Alexei V.Ivanov PatrickEhlen MingzhiYu DanPovey - SanjeevKhudanpur + SanjeevKhudanpur 3700–3706 Knowing the particular context associated with a conversation can help improving the performance of an automatic speech recognition (ASR) system. For example, if we are provided with a list of in-context words or phrases — such as the speaker’s contacts or recent song playlists — during inference, we can bias the recognition process towards this list. There are many works addressing contextual ASR; however, there is few publicly available real benchmark for evaluation, making it difficult to compare different solutions. To this end, we provide a corpus (“ConEC”) and baselines to evaluate contextual ASR approaches, grounded on real-world applications. The ConEC corpus is based on public-domain earnings calls (ECs) and associated supplementary materials, such as presentation slides, earnings news release as well as a list of meeting participants’ names and affiliations. We demonstrate that such real contexts are noisier than artificially synthesized contexts that contain the ground truth, yet they still make great room for future improvement of contextual ASR technology 2024.lrec-main.328 @@ -3999,9 +3999,9 @@ ShijiaZhou LeonieWeissweiler TaiqiHe - HinrichSchütze - David R.Mortensen - LoriLevin + HinrichSchütze + David R.Mortensen + LoriLevin 3804–3811 In this paper, we make a contribution that can be understood from two perspectives: from an NLP perspective, we introduce a small challenge dataset for NLI with large lexical overlap, which minimises the possibility of models discerning entailment solely based on token distinctions, and show that GPT-4 and Llama 2 fail it with strong bias. We then create further challenging sub-tasks in an effort to explain this failure. From a Computational Linguistics perspective, we identify a group of constructions with three classes of adjectives which cannot be distinguished by surface features. This enables us to probe for LLM’s understanding of these constructions in various ways, and we find that they fail in a variety of ways to distinguish between them, suggesting that they don’t adequately represent their meaning or capture the lexical properties of phrasal heads. 2024.lrec-main.336 @@ -4015,7 +4015,7 @@ KaiyuHuang AnqiZhao JunpengLiu - DegenHuang + DegenHuang 3812–3824 Previous studies employ the autoregressive translation (AT) paradigm in the document-to-document neural machine translation. These methods extend the translation unit from a single sentence to a pseudo-document and encodes the full pseudo-document, avoiding the redundant computation problem in context. However, the AT methods cannot parallelize decoding and struggle with error accumulation, especially when the length of sentences increases. In this work, we propose a context-aware non-autoregressive framework with the sentence-aligned connectionist temporal classification (SA-CTC) loss for document-level neural machine translation. In particular, the SA-CTC loss reduces the search space of the decoding path by fixing the positions of the beginning and end tokens for each sentence in the document. Meanwhile, the context-aware architecture introduces preset nodes to represent sentence-level information and utilizes a hierarchical attention structure to regulate the attention hypothesis space. Experimental results show that our proposed method can achieve competitive performance compared with several strong baselines. Our method implements non-autoregressive modeling in Doc-to-Doc translation manner, achieving an average 46X decoding speedup compared to the document-level AT baselines on three benchmarks. 2024.lrec-main.337 @@ -4084,7 +4084,7 @@ Continual Reinforcement Learning for Controlled Text Generation VelizarShulev - KhalilSima’an + KhalilSima’an 3881–3889 Controlled Text Generation (CTG) steers the generation of continuations of a given context (prompt) by a Large Language Model (LLM) towards texts possessing a given attribute (e.g., topic, sentiment). In this paper we view CTG as a Continual Learning problem: how to learn at every step to steer next-word generation, without having to wait for end-of-sentence. This continual view is useful for online applications such as CTG for speech, where end-of-sentence is often uncertain. We depart from an existing model, the Plug-and-Play language models (PPLM), which perturbs the context at each step to better predict next-words that posses the desired attribute. While PPLM is intricate and has many hyper-parameters, we provide a proof that the PPLM objective function can be reduced to a Continual Reinforcement Learning (CRL) reward function, thereby simplifying PPLM and endowing it with a better understood learning framework. Subsequently, we present, the first of its kind, CTG algorithm that is fully based on CRL and exhibit promising empirical results. 2024.lrec-main.343 @@ -4145,7 +4145,7 @@ Controllable Sentence Simplification in <fixed-case>S</fixed-case>wedish Using Control Prefixes and Mined Paraphrases JuliusMonsen - ArneJonsson + ArneJonsson 3943–3954 Making information accessible to diverse target audiences, including individuals with dyslexia and cognitive disabilities, is crucial. Automatic Text Simplification (ATS) systems aim to facilitate readability and comprehension by reducing linguistic complexity. However, they often lack customizability to specific user needs, and training data for smaller languages can be scarce. This paper addresses ATS in a Swedish context, using methods that provide more control over the simplification. A dataset of Swedish paraphrases is mined from large amounts of text and used to train ATS models utilizing prefix-tuning with control prefixes. We also introduce a novel data-driven method for selecting complexity attributes for controlling the simplification and compare it with previous approaches. Evaluation of the trained models using SARI and BLEU demonstrates significant improvements over the baseline — a fine-tuned Swedish BART model — and compared to previous Swedish ATS results. These findings highlight the effectiveness of employing paraphrase data in conjunction with controllable generation mechanisms for simplification. Additionally, the set of explored attributes yields similar results compared to previously used attributes, indicating their ability to capture important simplification aspects. 2024.lrec-main.349 @@ -4154,7 +4154,7 @@ Controlled Generation with Prompt Insertion for Natural Language Explanations in Grammatical Error Correction MasahiroKaneko - NaoakiOkazaki + NaoakiOkazaki 3955–3961 In Grammatical Error Correction (GEC), it is crucial to ensure the user’s comprehension of a reason for correction. Existing studies present tokens, examples, and hints for corrections, but do not directly explain the reasons in natural language. Although methods that use Large Language Models (LLMs) to provide direct explanations in natural language have been proposed for various tasks, no such method exists for GEC. Generating explanations for GEC corrections involves aligning input and output tokens, identifying correction points, and presenting corresponding explanations consistently. However, it is not straightforward to specify a complex format to generate explanations, because explicit control of generation is difficult with prompts. This study introduces a method called controlled generation with Prompt Insertion (PI) so that LLMs can explain the reasons for corrections in natural language. In PI, LLMs first correct the input text, and then we automatically extract the correction points based on the rules. The extracted correction points are sequentially inserted into the LLM’s explanation output as prompts, guiding the LLMs to generate explanations for the correction points. We also create an Explainable GEC (XGEC) dataset of correction reasons by annotating NUCLE, CoNLL2013, and CoNLL2014. Although generations from GPT-3.5 and ChatGPT using original prompts miss some correction points, the generation control using PI can explicitly guide to describe explanations for all correction points, contributing to improved performance in generating correction reasons. 2024.lrec-main.350 @@ -4176,7 +4176,7 @@ Conversational Grounding: Annotation and Analysis of Grounding Acts and Grounding Units BisweshMohapatra SeemabHassan - LaurentRomary + LaurentRomary JustineCassell 3967–3977 Successful conversations often rest on common understanding, where all parties are on the same page about the information being shared. This process, known as conversational grounding, is crucial for building trustworthy dialog systems that can accurately keep track of and recall the shared information. The proficiencies of an agent in grounding the conveyed information significantly contribute to building a reliable dialog system. Despite recent advancements in dialog systems, there exists a noticeable deficit in their grounding capabilities. Traum (Traum, 1995) provided a framework for conversational grounding introducing Grounding Acts and Grounding Units, but substantial progress, especially in the realm of Large Language Models, remains lacking. To bridge this gap, we present the annotation of two dialog corpora employing Grounding Acts, Grounding Units, and a measure of their degree of grounding. We discuss our key findings during the annotation and also provide a baseline model to test the performance of current Language Models in categorizing the grounding acts of the dialogs. Our work aims to provide a useful resource for further research in making conversations with machines better understood and more reliable in natural day-to-day collaborative dialogs. @@ -4186,8 +4186,8 @@ Converting Legacy Data to <fixed-case>CLDF</fixed-case>: A <fixed-case>FAIR</fixed-case> Exit Strategy for Linguistic Web Apps RobertForkel - DanielSwanson - StevenMoran + DanielSwanson + StevenMoran 3978–3982 In the mid 2000s, there were several large-scale US National Science Foundation (NSF) grants awarded to projects aiming at developing digital infrastructure and standards for different forms of linguistics data. For example, MultiTree encoded language family trees as phylogenies in XML and LL-MAP converted detailed geographic maps of endangered languages into KML. As early stand-alone website applications, these projects allowed researchers interested in comparative linguistics to explore language genealogies and areality, respectively. However as time passed, the technologies that supported these web apps became deprecated, unsupported, and inaccessible. Here we take a future-oriented approach to digital obsolescence and illustrate how to convert legacy linguistic resources into FAIR data via the Cross-Linguistic Data Formats (CLDF). CLDF is built on the W3C recommendations Model for Tabular Data and Metadata on the Web and Metadata Vocabulary for Tabular Data developed by the CSVW (CSV on the Web) working group. Thus, each dataset is modeled as a set of tabular data files described by metadata in JSON. These standards and the tools built to validate and manipulate them provide an accessible and extensible format for converting legacy linguistic web apps into FAIR datasets. 2024.lrec-main.353 @@ -4209,7 +4209,7 @@ <fixed-case>C</fixed-case>o<fixed-case>R</fixed-case>elation: Boosting Automatic <fixed-case>ICD</fixed-case> Coding through Contextualized Code Relation Learning - JunyuLuo + JunyuLuo XiaochenWang JiaqiWang AofeiChang @@ -4227,7 +4227,7 @@ YeLiu NatalieParde EugeneRohrbaugh - Philip S.Yu + Philip S.Yu 4008–4020 Naively assuming English as a source language may hinder cross-lingual transfer for many languages by failing to consider the importance of language contact. Some languages are more well-connected than others, and target languages can benefit from transferring from closely related languages; for many languages, the set of closely related languages does not include English. In this work, we study the impact of source language for cross-lingual transfer, demonstrating the importance of selecting source languages that have high contact with the target language. We also construct a novel benchmark dataset for close contact Chinese-Japanese-Korean-Vietnamese (CJKV) languages to further encourage in-depth studies of language contact. To comprehensively capture contact between these languages, we propose to integrate Romanized transcription beyond textual scripts via Contrastive Learning objectives, leading to enhanced cross-lingual representations and effective zero-shot cross-lingual transfer. 2024.lrec-main.356 @@ -4239,7 +4239,7 @@ EricSanders Antal P.J.van den Bosch DouweZeldenrust - Henkvan den Heuvel + Henkvan den Heuvel 4021–4029 The Dutch Dialect Database (also known as the ‘Nederlandse Dialectenbank’) contains dialectal variations of Dutch that were recorded all over the Netherlands in the second half of the twentieth century. A subset of these recordings of about 300 hours were enriched with manual orthographic transcriptions, using non-standard approximations of dialectal speech. In this paper we describe the creation of a corpus containing both the audio recordings and their corresponding transcriptions and focus on our method for aligning the recordings with the transcriptions and the metadata. 2024.lrec-main.357 @@ -4302,7 +4302,7 @@ Counterfactual Dialog Mixing as Data Augmentation for Task-Oriented Dialog Systems SebastianSteindl - UlrichSchäfer + UlrichSchäfer BerndLudwig 4078–4087 High-quality training data for Task-Oriented Dialog (TOD) systems is costly to come by if no corpora are available. One method to extend available data is data augmentation. Yet, the research into and adaptation of data augmentation techniques for TOD systems is limited in comparison with other data modalities. We propose a novel, causally-flavored data augmentation technique called Counterfactual Dialog Mixing (CDM) that generates realistic synthetic dialogs via counterfactuals to increase the amount of training data. We demonstrate the method on a benchmark dataset and show that a model trained to classify the counterfactuals from the original data fails to do so, which strengthens the claim of creating realistic synthetic dialogs. To evaluate the effectiveness of CDM, we train a current architecture on a benchmark dataset and compare the performance with and without CDM. By doing so, we achieve state-of-the-art on some metrics. We further investigate the external generalizability and a lower resource setting. To evaluate the models, we adopted an interactive evaluation scheme. @@ -4405,7 +4405,7 @@ <fixed-case>C</fixed-case>ross<fixed-case>T</fixed-case>une: Black-Box Few-Shot Classification with Label Enhancement DanqingLuo - ChenZhang + ChenZhang YanZhang HaizhouLi 4185–4197 @@ -4425,7 +4425,7 @@ <fixed-case>CSSW</fixed-case>iki: A <fixed-case>C</fixed-case>hinese Sentence Simplification Dataset with Linguistic and Content Operations FengkaiLiu - John S. Y.Lee + John S. Y.Lee 4205–4213 Sentence Simplification aims to make sentences easier to read and understand. With most effort on corpus development focused on English, the amount of annotated data is limited in Chinese. To address this need, we introduce CSSWiki, an open-source dataset for Chinese sentence simplification based on Wikipedia. This dataset contains 1.6k source sentences paired with their simplified versions. Each sentence pair is annotated with operation tags that distinguish between linguistic and content modifications. We analyze differences in annotation scheme and data statistics between CSSWiki and existing datasets. We then report baseline sentence simplification performance on CSSWiki using zero-shot and few-shot approaches with Large Language Models. 2024.lrec-main.375 @@ -4485,7 +4485,7 @@ Cam-Van ThiNguyen Cao-BachNguyen Duc-TrongLe - Quang-ThuyHa + Quang-ThuyHa 4259–4265 Emotion recognition in conversation (ERC) is a crucial task in natural language processing and affective computing. This paper proposes MultiDAG+CL, a novel approach for Multimodal Emotion Recognition in Conversation (ERC) that employs Directed Acyclic Graph (DAG) to integrate textual, acoustic, and visual features within a unified framework. The model is enhanced by Curriculum Learning (CL) to address challenges related to emotional shifts and data imbalance. Curriculum learning facilitates the learning process by gradually presenting training samples in a meaningful order, thereby improving the model’s performance in handling emotional variations and data imbalance. Experimental results on the IEMOCAP and MELD datasets demonstrate that the MultiDAG+CL models outperform baseline models. We release the code for and experiments: https://github.com/vanntc711/MultiDAG-CL. 2024.lrec-main.380 @@ -4619,7 +4619,7 @@ IleanaRugina RumenDangovski LiJing - PreslavNakov + PreslavNakov MarinSoljacic 4392–4403 Attention mechanisms play a crucial role in the neural revolution of Natural Language Processing (NLP). With the growth of attention-based models, several pruning techniques have been developed to identify and exploit sparseness, making these models more efficient. Most efforts focus on hard-coding attention patterns or pruning attention weights based on training data. We propose Attention Pruning (AP), a framework that observes attention patterns in a fixed dataset and generates a global sparseness mask. AP saves 90% of attention computation for language modeling and about 50% for machine translation and GLUE tasks, maintaining result quality. Our method reveals important distinctions between self- and cross-attention patterns, guiding future NLP research. Our framework can reduce both latency and memory requirements for any attention-based model, aiding in the development of improved models for existing or new NLP applications. We have demonstrated this with encoder and autoregressive transformer models using Triton GPU kernels and make our code publicly available at https://github.com/irugina/AP @@ -4641,7 +4641,7 @@ Dataset of Quotation Attribution in <fixed-case>G</fixed-case>erman News Articles FynnPetersen-Frey - ChrisBiemann + ChrisBiemann 4412–4422 Extracting who says what to whom is a crucial part in analyzing human communication in today’s abundance of data such as online news articles. Yet, the lack of annotated data for this task in German news articles severely limits the quality and usability of possible systems. To remedy this, we present a new, freely available, creative-commons-licensed dataset for quotation attribution in German news articles based on WIKINEWS. The dataset provides curated, high-quality annotations across 1000 documents (250,000 tokens) in a fine-grained annotation schema enabling various downstream uses for the dataset. The annotations not only specify who said what but also how, in which context, to whom and define the type of quotation. We specify our annotation schema, describe the creation of the dataset and provide a quantitative analysis. Further, we describe suitable evaluation metrics, apply two existing systems for quotation attribution, discuss their results to evaluate the utility of our dataset and outline use cases of our dataset in downstream tasks. 2024.lrec-main.394 @@ -4677,7 +4677,7 @@ MerveÜnlü Menevşe YusufcanManav EbruArisoy - ArzucanÖzgür + ArzucanÖzgür 4449–4455 This paper focuses on dealing with data scarcity in spoken question answering (QA) using automatic question-answer generation and a carefully selected fine-tuning strategy that leverages limited annotated data (paragraphs and question-answer pairs). Spoken QA is a challenging task due to using spoken documents, i.e., erroneous automatic speech recognition (ASR) transcriptions, and the scarcity of spoken QA data. We propose a framework for utilizing limited annotated data effectively to improve spoken QA performance. To deal with data scarcity, we train a question-answer generation model with annotated data and then produce large amounts of question-answer pairs from unannotated data (paragraphs). Our experiments demonstrate that incorporating limited annotated data and the automatically generated data through a carefully selected fine-tuning strategy leads to 5.5% relative F1 gain over the model trained only with annotated data. Moreover, the proposed framework is also effective in high ASR errors. 2024.lrec-main.397 @@ -4705,8 +4705,8 @@ <fixed-case>DECM</fixed-case>: Evaluating Bilingual <fixed-case>ASR</fixed-case> Performance on a Code-switching/mixing Benchmark Enes YavuzUgan - Ngoc-QuanPham - AlexanderWaibel + Ngoc-QuanPham + AlexanderWaibel 4468–4475 Automatic Speech Recognition has made significant progress, but challenges persist. Code-switched (CSW) Speech presents one such challenge, involving the mixing of multiple languages by a speaker. Even when multilingual ASR models are trained, each utterance on its own usually remains monolingual. We introduce an evaluation dataset for German-English CSW, with German as the matrix language and English as the embedded language. The dataset comprises spontaneous speech from diverse domains, enabling realistic CSW evaluation in German-English. It includes splits with varying degrees of CSW to facilitate specialized model analysis. As it is difficult to collect CSW data for all language pairs, the provision of such evaluation data, is crucial for developing and analyzing ASR models capable of generalizing across unseen pairs. Detailed data statistics are presented, and state-of-the-art (SOTA) multilingual models are evaluated showing challanges of CSW speech. 2024.lrec-main.400 @@ -4724,7 +4724,7 @@ ChengruSong DiZhang KunGai - DeyiXiong + DeyiXiong 4476–4487 Large language models have demonstrated exceptional capability in natural language understanding and generation. However, their generation speed is limited by the inherently sequential nature of their decoding process, posing challenges for real-time applications. This paper introduces Lexical Unit Decoding (LUD), a novel decoding methodology implemented in a data-driven manner, accelerating the decoding process without sacrificing output quality. The core of our approach is the observation that a pre-trained language model can confidently predict multiple contiguous tokens, forming the basis for a lexical unit, in which these contiguous tokens could be decoded in parallel. Extensive experiments validate that our method substantially reduces decoding time while maintaining generation quality, i.e., 33% speed up on natural language generation with no quality loss, and 30% speed up on code generation with a negligible quality loss of 3%. Distinctively, LUD requires no auxiliary models and does not require changes to existing architectures. It can also be integrated with other decoding acceleration methods, thus achieving an even more pronounced inference efficiency boost. We posit that the foundational principles of LUD could define a new decoding paradigm for future language models, enhancing their applicability for a broader spectrum of applications. All codes are be publicly available at https://github.com/tjunlp-lab/Lexical-Unit-Decoding-LUD-. 2024.lrec-main.401 @@ -4856,7 +4856,7 @@ ZhihaoFan ZejunLi RuipuLuo - XuanjingHuang + XuanjingHuang ZhongyuWei 4605–4616 Vision-and-Language navigation (VLN) requires an agent to navigate in unseen environment by following natural language instruction. For task completion, the agent needs to align and integrate various navigation modalities, including instruction, observation and navigation history. Existing works primarily concentrate on cross-modal attention at the fusion stage to achieve this objective. Nevertheless, modality features generated by disparate uni-encoders reside in their own spaces, leading to a decline in the quality of cross-modal fusion and decision. To address this problem, we propose a Dual-levEL AligNment (DELAN) framework by cross-modal contrastive learning. This framework is designed to align various navigation-related modalities before fusion, thereby enhancing cross-modal interaction and action decision-making. Specifically, we divide the pre-fusion alignment into dual levels: instruction-history level and landmark-observation level according to their semantic correlations. We also reconstruct a dual-level instruction for adaptation to the dual-level alignment. As the training signals for pre-fusion alignment are extremely limited, self-supervised contrastive learning strategies are employed to enforce the matching between different modalities. Our approach seamlessly integrates with the majority of existing models, resulting in improved navigation performance on various VLN benchmarks, including R2R, R4R, RxR and CVDN. @@ -4868,8 +4868,8 @@ ShimingHe YuHong ShuaiYang - JianminYao - GuodongZhou + JianminYao + GuodongZhou 4617–4625 We tackle Event Argument Extraction (EAE) in the manner of template-based generation. Based on our exploration of generative EAE, it suffers from several issues, such as multiple arguments of one role, generating words out of context and inconsistency with prescribed format. We attribute it to the weakness of following complex input prompts. To address these problems, we propose the demonstration retrieval-augmented generative EAE (DRAGEAE), containing two components: event knowledge-injected generator (EKG) and demonstration retriever (DR). EKG employs event knowledge prompts to capture role dependencies and semantics. DR aims to search informative demonstrations from training data, facilitating the conditional generation of EKG. To train DR, we use the probability-based rankings from large language models (LLMs) as supervised signals. Experimental results on ACE-2005, RAMS and WIKIEVENTS demonstrate that our method outperforms all strong baselines and it can be generalized to various datasets. Further analysis is conducted to discuss the impact of diverse LLMs and prove that our model alleviates the above issues. 2024.lrec-main.412 @@ -4878,7 +4878,7 @@ Denoising Labeled Data for Comment Moderation Using Active Learning AndražPelicon - Vanja MladenKaran + Vanja MladenKaran RaviShekhar MatthewPurver SenjaPollak @@ -4892,7 +4892,7 @@ DeokhyungKang BaikjinJung YunsuKim - Gary GeunbaeLee + Gary GeunbaeLee 4634–4640 In table-text open-domain question answering, a retriever system retrieves relevant evidence from tables and text to answer questions. Previous studies in table-text open-domain question answering have two common challenges: firstly, their retrievers can be affected by false-positive labels in training datasets; secondly, they may struggle to provide appropriate evidence for questions that require reasoning across the table. To address these issues, we propose Denoised Table-Text Retriever (DoTTeR). Our approach involves utilizing a denoised training dataset with fewer false positive labels by discarding instances with lower question-relevance scores measured through a false positive detection model. Subsequently, we integrate table-level ranking information into the retriever to assist in finding evidence for questions that demand reasoning across the table. To encode this ranking information, we fine-tune a rank-aware column encoder to identify minimum and maximum values within a column. Experimental results demonstrate that DoTTeR significantly outperforms strong baselines on both retrieval recall and downstream QA tasks. Our code is available at https://github.com/deokhk/DoTTeR. 2024.lrec-main.414 @@ -4927,9 +4927,9 @@ Depth-Wise Attention (<fixed-case>DWA</fixed-case>tt): A Layer Fusion Method for Data-Efficient Classification - MuhammadElNokrashy + MuhammadElNokrashy BadrAlKhamissi - MonaDiab + MonaDiab 4665–4674 Language Models pretrained on large textual data have been shown to encode different types of knowledge simultaneously. Traditionally, only the features from the last layer are used when adapting to new tasks or data. We put forward that, when using or finetuning deep pretrained models, intermediate layer features that may be relevant to the downstream task are buried too deep to be used efficiently in terms of needed samples or steps. To test this, we propose a new layer fusion method: Depth-Wise Attention (DWAtt), to help re-surface signals from non-final layers. We compare DWAtt to a basic concatenation-based layer fusion method (Concat), and compare both to a deeper model baseline—all kept within a similar parameter budget. Our findings show that DWAtt and Concat are more step- and sample-efficient than the baseline, especially in the few-shot setting. DWAtt outperforms Concat on larger data sizes. On CoNLL-03 NER, layer fusion shows 3.68 − 9.73% F1 gain at different few-shot sizes. The layer fusion models presented significantly outperform the baseline in various training scenarios with different data sizes, architectures, and training constraints. 2024.lrec-main.417 @@ -4976,7 +4976,7 @@ SeonminKoo HyeonseokMoon JaehyungSeo - HeuiseokLim + HeuiseokLim 4705–4716 Recent machine translation (MT) systems have overcome language barriers for a wide range of users, yet they still carry the risk of critical meaning deviation. Critical error detection (CED) is a task that identifies an inherent risk of catastrophic meaning distortions in the machine translation output. With the importance of reflecting cultural elements in detecting critical errors, we introduce the culture-aware “Politeness” type in detecting English-Korean critical translation errors. Besides, we facilitate two tasks by providing multiclass labels: critical error detection and critical error type classification (CETC). Empirical evaluations reveal that our introduced data augmentation approach using a newly presented perturber significantly outperforms existing baselines in both tasks. Further analysis highlights the significance of multiclass labeling by demonstrating its superior effectiveness compared to binary labels. 2024.lrec-main.421 @@ -5060,7 +5060,7 @@ Detection, Diagnosis, and Explanation: A Benchmark for <fixed-case>C</fixed-case>hinese Medial Hallucination Evaluation ChengfengDou - YingZhang + YingZhang YanyuanChen ZhiJin WenpinJiao @@ -5076,7 +5076,7 @@ Developing a Benchmark for Pronunciation Feedback: Creation of a Phonemically Annotated Speech Corpus of isi<fixed-case>Z</fixed-case>ulu Language Learner Speech AlexandraO’Neil NilsHjortnaes - FrancisTyers + FrancisTyers ZinhleNkosi ThulileNdlovu ZaneleMlondo @@ -5091,7 +5091,7 @@ LuciePolakova JiříMírovský ŠárkaZikánová - EvaHajicova + EvaHajicova 4802–4810 We introduce the first version of the Czech RST Discourse Treebank, a collection of Czech journalistic texts manually annotated using the Rhetorical Structure Theory (RST), a global coherence model proposed by Mann and Thompson (1988). Each document in the corpus is represented as a single tree-like structure, where discourse units are interconnected through hierarchical rhetorical relations and their relative importance for the main purpose of a text is modeled by the nuclearity principle. The treebank is freely available in the LINDAT/CLARIAH-CZ repository under the Creative Commons license; for some documents, it includes two gold annotations representing divergent yet relevant interpretations. The paper outlines the annotation process, provides corpus statistics and evaluation, and discusses the issue of consistency associated with the global level of textual interpretation. In general, good agreement on the structure and labeling could be achieved on the lowest, local tree level and on the identification of the most central (nuclear) elementary discourse units. Disagreements mostly concerned segmentation and, in the structure, differences in the stepwise process of linking the largest text blocks. The project contributes to the advancement of RST research and its application to real-world text analysis challenges. 2024.lrec-main.430 @@ -5260,7 +5260,7 @@ Discourse Structure for the <fixed-case>M</fixed-case>inecraft Corpus KateThompson JulieHunter - NicholasAsher + NicholasAsher 4957–4967 We provide a new linguistic resource: The Minecraft Structured Dialogue Corpus (MSDC), a discourse annotated version of the Minecraft Dialogue Corpus (MDC; Narayan-Chen et al., 2019), with complete, situated discourse structures in the style of SDRT (Asher and Lascarides, 2003). Our structures feature both linguistic discourse moves and nonlinguistic actions. To show computational tractability, we train a discourse parser with a novel “2 pass architecture” on MSDC that gives excellent results on attachment prediction and relation labeling tasks especially long distance attachments. 2024.lrec-main.444 @@ -5291,7 +5291,7 @@ ChloéBraud AmirZeldes LauraRivière - Yang JanetLiu + Yang JanetLiu PhilippeMuller DamienSileo TatsuyaAoyama @@ -5390,7 +5390,7 @@ MingxiaoLi JingyuanSun JesseDavis - Marie-FrancineMoens + Marie-FrancineMoens 5109–5118 Argument structure learning (ASL) entails predicting relations between arguments. Because it can structure a document to facilitate its understanding, it has been widely applied in many fields (medical, commercial, and scientific domains). Despite its broad utilization, ASL remains a challenging task because it involves examining the complex relationships between the sentences in a potentially unstructured discourse. To resolve this problem, we have developed a simple yet effective approach called Dual-tower Multi-scale cOnvolution neural Network (DMON) for the ASL task. Specifically, we organize arguments into a relationship matrix that together with the argument embeddings forms a relationship tensor and design a mechanism to capture relations with contextual arguments. Experimental results on three different-domain argument mining datasets demonstrate that our framework outperforms state-of-the-art models. We will release the code after paper acceptance. 2024.lrec-main.455 @@ -5403,7 +5403,7 @@ FuliFeng ZifengRen MoxinLi - Tat-SengChua + Tat-SengChua 5119–5131 Table-text document (e.g., financial reports) understanding has attracted increasing attention in recent two years. TAT-DQA is a realistic setting for the understanding of visually-rich table-text documents, which involves answering associated questions requiring discrete reasoning. Most existing work relies on token-level semantics, falling short in the reasoning across document elements such as quantities and dates. To address this limitation, we propose a novel Doc2SoarGraph model that exploits element-level semantics and employs Semantic-oriented hierarchical Graph structures to capture the differences and correlations among different elements within the given document and question. Extensive experiments on the TAT-DQA dataset reveal that our model surpasses the state-of-the-art conventional method (i.e., MHST) and large language model (i.e., ChatGPT) by 17.73 and 6.49 points respectively in terms of Exact Match (EM) metric, demonstrating exceptional effectiveness. 2024.lrec-main.456 @@ -5432,7 +5432,7 @@ FranckDernoncourt JiuxiangGu RamitSawhney - PreslavNakov + PreslavNakov DineshManocha RajivJain 5140–5155 @@ -5473,7 +5473,7 @@ ZikangLiu Ze-FengGao DaweiGao - Wayne XinZhao + Wayne XinZhao YaliangLi BolinDing Ji-RongWen @@ -5524,8 +5524,8 @@ TajaKuzman PeterRupnik NikolaLjubešić - MiquelEsplà-Gomis - GemaRamírez-Sánchez + MiquelEsplà-Gomis + GemaRamírez-Sánchez AntonioToral 5221–5234 Large, curated, web-crawled corpora play a vital role in training language models (LMs). They form the lion’s share of the training data in virtually all recent LMs, such as the well-known GPT, LLaMA and XLM-RoBERTa models. However, despite this importance, relatively little attention has been given to the quality of these corpora. In this paper, we compare four of the currently most relevant large, web-crawled corpora (CC100, MaCoCu, mC4 and OSCAR) across eleven lower-resourced European languages. Our approach is two-fold: first, we perform an intrinsic evaluation by performing a human evaluation of the quality of samples taken from different corpora; then, we assess the practical impact of the qualitative differences by training specific LMs on each of the corpora and evaluating their performance on downstream tasks. We find that there are clear differences in quality of the corpora, with MaCoCu and OSCAR obtaining the best results. However, during the extrinsic evaluation, we actually find that the CC100 corpus achieves the highest scores. We conclude that, in our experiments, the quality of the web-crawled corpora does not seem to play a significant role when training LMs. @@ -5535,7 +5535,7 @@ Do Large Language Models Understand Mansplaining? Well, Actually... CarlaPerez Almendros - JoseCamacho-Collados + JoseCamacho-Collados 5235–5246 Gender bias has been widely studied by the NLP community. However, other more subtle variations of it, such as mansplaining, have yet received little attention. Mansplaining is a discriminatory behaviour that consists of a condescending treatment or discourse towards women. In this paper, we introduce and analyze Well, actually..., a corpus of 886 mansplaining stories experienced by women. We analyze the corpus in terms of features such as offensiveness, sentiment or misogyny, among others. We also explore to what extent Large Language Models (LLMs) can understand and identify mansplaining and other gender-related microaggressions. Specifically, we experiment with ChatGPT-3.5-Turbo and LLaMA-2 (13b and 70b), with both targeted and open questions. Our findings suggest that, although they can identify mansplaining to some extent, LLMs still struggle to point out this attitude and will even reproduce some of the social patterns behind mansplaining situations, for instance by praising men for giving unsolicited advice to women. 2024.lrec-main.466 @@ -5544,7 +5544,7 @@ Domain Adaptation for Dense Retrieval and Conversational Dense Retrieval through Self-Supervision by Meticulous Pseudo-Relevance Labeling MinghanLi - EricGaussier + EricGaussier 5247–5259 Recent studies have demonstrated that the ability of dense retrieval models to generalize to target domains with different distributions is limited, which contrasts with the results obtained with interaction-based models. Prior attempts to mitigate this challenge involved leveraging adversarial learning and query generation approaches, but both approaches nevertheless resulted in limited improvements. In this paper, we propose to combine the query-generation approach with a self-supervision approach in which pseudo-relevance labels are automatically generated on the target domain. To accomplish this, a T5-3B model is utilized for pseudo-positive labeling, and meticulous hard negatives are chosen. We also apply this strategy on conversational dense retrieval model for conversational search. A similar pseudo-labeling approach is used, but with the addition of a query-rewriting module to rewrite conversational queries for subsequent labeling. This proposed approach enables a model’s domain adaptation with real queries and documents from the target dataset. Experiments on standard dense retrieval and conversational dense retrieval models both demonstrate improvements on baseline models when they are fine-tuned on the pseudo-relevance labeled data. 2024.lrec-main.467 @@ -5580,7 +5580,7 @@ QinChen QiZhang TaoGui - XuanjingHuang + XuanjingHuang 5286–5298 Domain adaption has been widely adapted for cross-domain sentiment analysis to transfer knowledge from the source domain to the target domain. Whereas, most methods are proposed under the assumption that the target (test) domain is known, making them fail to generalize well on unknown test data that is not always available in practice. In this paper, we focus on the problem of domain generalization for cross-domain sentiment analysis. Specifically, we propose a backdoor adjustment-based causal model to disentangle the domain-specific and domain-invariant representations that play essential roles in tackling domain shift. First, we rethink the cross-domain sentiment analysis task in a causal view to model the causal-and-effect relationships among different variables. Then, to learn an invariant feature representation, we remove the effect of domain confounders (e.g., domain knowledge) using the backdoor adjustment. A series of experiments over many homologous and diverse datasets show the great performance and robustness of our model by comparing it with the state-of-the-art domain generalization baselines. 2024.lrec-main.470 @@ -5613,8 +5613,8 @@ <fixed-case>DORE</fixed-case>: A Dataset for <fixed-case>P</fixed-case>ortuguese Definition Generation Anna BeatrizDimas Furtado TharinduRanasinghe - FredericBlain - RuslanMitkov + FredericBlain + RuslanMitkov 5315–5322 Definition modelling (DM) is the task of automatically generating a dictionary definition of a specific word. Computational systems that are capable of DM can have numerous applications benefiting a wide range of audiences. As DM is considered a supervised natural language generation problem, these systems require large annotated datasets to train the machine learning (ML) models. Several DM datasets have been released for English and other high-resource languages. While Portuguese is considered a mid/high-resource language in most natural language processing tasks and is spoken by more than 200 million native speakers, there is no DM dataset available for Portuguese. In this research, we fill this gap by introducing DORE; the first dataset for Definition MOdelling for PoRtuguEse containing more than 100,000 definitions. We also evaluate several deep learning based DM models on DORE and report the results. The dataset and the findings of this paper will facilitate research and study of Portuguese in wider contexts. 2024.lrec-main.473 @@ -5652,7 +5652,7 @@ YujieChen ShenglanWu HaoyuanHu - XinyuDai + XinyuDai 5350–5364 Open Domain Multi-Hop Question Answering (ODMHQA) plays a crucial role in Natural Language Processing (NLP) by aiming to answer complex questions through multi-step reasoning over retrieved information from external knowledge sources. Recently, Large Language Models (LLMs) have demonstrated remarkable performance in solving ODMHQA owing to their capabilities including planning, reasoning, and utilizing tools. However, LLMs may generate off-topic answers when attempting to solve ODMHQA, namely the generated answers are irrelevant to the original questions. This issue of off-topic answers accounts for approximately one-third of incorrect answers, yet remains underexplored despite its significance. To alleviate this issue, we propose the Discriminate→Re-Compose→Re- Solve→Re-Decompose (Dr3) mechanism. Specifically, the Discriminator leverages the intrinsic capabilities of LLMs to judge whether the generated answers are off-topic. In cases where an off-topic answer is detected, the Corrector performs step-wise revisions along the reversed reasoning chain (Re-Compose→Re-Solve→Re-Decompose) until the final answer becomes on-topic. Experimental results on the HotpotQA and 2WikiMultiHopQA datasets demonstrate that our Dr3 mechanism considerably reduces the occurrence of off-topic answers in ODMHQA by nearly 13%, improving the performance in Exact Match (EM) by nearly 3% compared to the baseline method without the Dr3 mechanism. 2024.lrec-main.476 @@ -5677,7 +5677,7 @@ MickaelRouvier PacomeConstant Dit Beaufils NataliaGrabar - BéatriceDaille + BéatriceDaille SolenQuiniou EmmanuelMorin Pierre-AntoineGourraud @@ -5735,7 +5735,7 @@ Do JuneMin VeronicaPerez-Rosas KenResnicow - RadaMihalcea + RadaMihalcea 5437–5449 In this paper, we study the problem of multi-reward reinforcement learning to jointly optimize for multiple text qualities for natural language generation. We focus on the task of counselor reflection generation, where we optimize the generators to simultaneously improve the fluency, coherence, and reflection quality of generated counselor responses. We introduce two novel bandit methods, DynaOpt and C-DynaOpt, which rely on the broad strategy of combining rewards into a single value and optimizing them simultaneously. Specifically, we employ non-contextual and contextual multi-arm bandits to dynamically adjust multiple reward weights during training. Through automatic and manual evaluations, we show that our proposed techniques, DynaOpt and C-DynaOpt, outperform existing naive and bandit baselines, showcasing their potential for enhancing language models. 2024.lrec-main.483 @@ -5757,7 +5757,7 @@ FrancescaGrasso StefanoLocci GiovanniSiragusa - LuigiDi Caro + LuigiDi Caro 5461–5472 Anthropogenic ecological crisis constitutes a significant challenge that all within the academy must urgently face, including the Natural Language Processing (NLP) community. While recent years have seen increasing work revolving around climate-centric discourse, crucial environmental and ecological topics outside of climate change remain largely unaddressed, despite their prominent importance. Mainstream NLP tasks, such as sentiment analysis, dominate the scene, but there remains an untouched space in the literature involving the analysis of environmental impacts of certain events and practices. To address this gap, this paper presents EcoVerse, an annotated English Twitter dataset of 3,023 tweets spanning a wide spectrum of environmental topics. We propose a three-level annotation scheme designed for Eco-Relevance Classification, Stance Detection, and introducing an original approach for Environmental Impact Analysis. We detail the data collection, filtering, and labeling process that led to the creation of the dataset. Remarkable Inter-Annotator Agreement indicates that the annotation scheme produces consistent annotations of high quality. Subsequent classification experiments using BERT-based models, including ClimateBERT, are presented. These yield encouraging results, while also indicating room for a model specifically tailored for environmental texts. The dataset is made freely available to stimulate further research. 2024.lrec-main.485 @@ -5796,7 +5796,7 @@ LubosSteskal Lilja CharlotteStorset HuilingYou - LiljaØvrelid + LiljaØvrelid 5495–5506 We present EDEN, the first Norwegian dataset annotated with event information at the sentence level, adapting the widely used ACE event schema to Norwegian. The paper describes the manual annotation of Norwegian text as well as transcribed speech in the news domain, together with inter-annotator agreement and discussions of relevant dataset statistics. We also present preliminary modeling results using a graph-based event parser. The resulting dataset will be freely available for download and use. 2024.lrec-main.488 @@ -5809,7 +5809,7 @@ Pier FeliceBalestrucci LucaAnselma CristianBernareggi - AlessandroMazzei + AlessandroMazzei 5507–5519 This paper describes a corpus consisting of real-world dialogues in English between users and a task-oriented conversational agent, with interactions revolving around the description of finite state automata. The creation of this corpus is part of a larger research project aimed at developing tools for an easier access to educational content, especially in STEM fields, for users with visual impairments. The development of this corpus was precisely motivated by the aim of providing a useful resource to support the design of such tools. The core feature of this corpus is that its creation involved both sighted and visually impaired participants, thus allowing for a greater diversity of perspectives and giving the opportunity to identify possible differences in the way the two groups of participants interacted with the agent. The paper introduces this corpus, giving an account of the process that led to its creation, i.e. the methodology followed to obtain the data, the annotation scheme adopted, and the analysis of the results. Finally, the paper reports the results of a classification experiment on the annotated corpus, and an additional experiment to assess the annotation capabilities of three large language models, in view of a further expansion of the corpus. 2024.lrec-main.489 @@ -5895,7 +5895,7 @@ <fixed-case>EFTNAS</fixed-case>: Searching for Efficient Language Models in First-Order Weight-Reordered Super-Networks - Juan PabloMunoz + Juan PabloMunoz YiZheng NileshJain 5596–5608 @@ -5909,7 +5909,7 @@ JiahuanPei Jan deWit MohammadAliannejadi - EmielKrahmer + EmielKrahmer Jos T.P.Dobber Jos A.Bosch 5609–5621 @@ -5942,7 +5942,7 @@ ZhigangKan LiwenPeng LinboQiao - DongshengLi + DongshengLi 5644–5653 Event Extraction (EE) is a challenging task that aims to extract structural event-related information from unstructured text. Traditional methods for EE depend on manual annotations, which are both expensive and scarce. Furthermore, the existing datasets mostly follow the long-tail distribution, severely hindering the previous methods of modeling tail types. Two techniques can address this issue: transfer learning and data generation. However, the existing methods based on transfer learning still rely on pre-training with a large amount of labeled data in the source domain. Additionally, the quality of data generated by previous data generation methods is difficult to control. In this paper, leveraging Large Language Models (LLMs), we propose novel methods for event extraction and generation based on dialogues, overcoming the problems of relying on source domain data and maintaining data quality. Specifically, this paper innovatively transforms the EE task into multi-turn dialogues, guiding LLMs to learn event schemas from historical dialogue information and output structural events. Furthermore, we introduce a novel LLM-based method for generating high-quality data, significantly improving traditional models’ performance with various paradigms and structures, especially on tail types. Adequate experiments on real-world datasets demonstrate the effectiveness of the proposed event extraction and data generation methods. 2024.lrec-main.501 @@ -5951,7 +5951,7 @@ <fixed-case>EMOLIS</fixed-case> App and Dataset to Find Emotionally Close Cartoons SoëlieLerch - PatriceBellot + PatriceBellot ElisabethMurisasco EmmanuelBruno 5654–5659 @@ -6081,7 +6081,7 @@ Empowering Tree-structured Entailment Reasoning: Rhetorical Perception and <fixed-case>LLM</fixed-case>-driven Interpretability LongyinZhang BoweiZou - Ai TiAw + Ai TiAw 5783–5793 The study delves into the construction of entailment trees for science question answering (SQA), employing a novel framework termed Tree-structured Entailment Reasoning (TER). Current research on entailment tree construction presents significant challenges, primarily due to the ambiguities and similarities among candidate science facts, which considerably complicate the fact retrieval process. Moreover, the existing models exhibit limitations in effectively modeling the sequence of reasoning states, understanding the intricate relations between neighboring entailment tree nodes, and generating intermediate conclusions. To this end, we explore enhancing the TER performance from three aspects: First, improving retrieval capabilities by modeling and referring to the chained reasoning states; Second, enhancing TER by infusing knowledge that bridges the gap between reasoning types and rhetorical relations. Third, exploring a task-specific large language model tuning scheme to mitigate deficiencies in intermediate conclusion generation. Experiments on the English EntailmentBank demonstrate the effectiveness of the proposed methods in augmenting the quality of tree-structured entailment reasoning to a certain extent. 2024.lrec-main.513 @@ -6102,7 +6102,7 @@ KennethLai RichardBrutti LuciaDonatelli - JamesPustejovsky + JamesPustejovsky 5806–5818 Abstract Meaning Representation (AMR) is a general-purpose meaning representation that has become popular for its clear structure, ease of annotation and available corpora, and overall expressiveness. While AMR was designed to represent sentence meaning in English text, recent research has explored its adaptation to broader domains, including documents, dialogues, spatial information, cross-lingual tasks, and gesture. In this paper, we present an annotated corpus of multimodal (speech and gesture) AMR in a task-based setting. Our corpus is multilayered, containing temporal alignments to both the speech signal and to descriptions of gesture morphology. We also capture coreference relationships across modalities, enabling fine-grained analysis of how the semantics of gesture and natural language interact. We discuss challenges that arise when identifying cross-modal coreference and anaphora, as well as in creating and evaluating multimodal corpora in general. Although we find AMR’s abstraction away from surface form (in both language and gesture) occasionally too coarse-grained to capture certain cross-modal interactions, we believe its flexibility allows for future work to fill in these gaps. Our corpus and annotation guidelines are available at https://github.com/klai12/encoding-gesture-multimodal-dialogue. 2024.lrec-main.515 @@ -6112,7 +6112,7 @@ Endowing Neural Language Learners with Human-like Biases: A Case Study on Dependency Length Minimization YuqingZhang TessaVerhoef - Gertjanvan Noord + Gertjanvan Noord AriannaBisazza 5819–5832 Natural languages show a tendency to minimize the linear distance between heads and their dependents in a sentence, known as dependency length minimization (DLM). Such a preference, however, has not been consistently replicated with neural agent simulations. Comparing the behavior of models with that of human learners can reveal which aspects affect the emergence of this phenomenon. In this work, we investigate the minimal conditions that may lead neural learners to develop a DLM preference. We add three factors to the standard neural-agent language learning and communication framework to make the simulation more realistic, namely: (i) the presence of noise during listening, (ii) context-sensitivity of word use through non-uniform conditional word distributions, and (iii) incremental sentence processing, or the extent to which an utterance’s meaning can be guessed before hearing it entirely. While no preference appears in production, we show that the proposed factors can contribute to a small but significant learning advantage of DLM for listeners of verb-initial languages. @@ -6205,7 +6205,7 @@ JunZhou FeiLi ChongTeng - DonghongJi + DonghongJi 5907–5921 Existing cross-document event coreference resolution models, which either compute mention similarity directly or enhance mention representation by extracting event arguments (such as location, time, agent, and patient), lackingmthe ability to utilize document-level information. As a result, they struggle to capture long-distance dependencies. This shortcoming leads to their underwhelming performance in determining coreference for the events where their argument information relies on long-distance dependencies. In light of these limitations, we propose the construction of document-level Rhetorical Structure Theory (RST) trees and cross-document Lexical Chains to model the structural and semantic information of documents. Subsequently, cross-document heterogeneous graphs are constructed and GAT is utilized to learn the representations of events. Finally, a pair scorer calculates the similarity between each pair of events and co-referred events can be recognized using standard clustering algorithm. Additionally, as the existing cross-document event coreference datasets are limited to English, we have developed a large-scale Chinese cross-document event coreference dataset to fill this gap, which comprises 53,066 event mentions and 4,476 clusters. After applying our model on the English and Chinese datasets respectively, it outperforms all baselines by large margins. 2024.lrec-main.523 @@ -6243,7 +6243,7 @@ YiLiu Sarah T.Bonna MargritBetke - Derry TantiWijaya + Derry TantiWijaya 5944–5955 Predicting emotions elicited by news headlines can be challenging as the task is largely influenced by the varying nature of people’s interpretations and backgrounds. Previous works have explored classifying discrete emotions directly from news headlines. We provide a different approach to tackling this problem by utilizing people’s explanations of their emotion, written in free-text, on how they feel after reading a news headline. Using the dataset BU-NEmo+ (Gao et al., 2022), we found that for emotion classification, the free-text explanations have a strong correlation with the dominant emotion elicited by the headlines. The free-text explanations also contain more sentimental context than the news headlines alone and can serve as a better input to emotion classification models. Therefore, in this work we explored generating emotion explanations from headlines by training a sequence-to-sequence transformer model and by using pretrained large language model, ChatGPT (GPT-4). We then used the generated emotion explanations for emotion classification. In addition, we also experimented with training the pretrained T5 model for the intermediate task of explanation generation before fine-tuning it for emotion classification. Using McNemar’s significance test, methods that incorporate GPT-generated free-text emotion explanations demonstrated significant improvement (P-value < 0.05) in emotion classification from headlines, compared to methods that only use headlines. This underscores the value of using intermediate free-text explanations for emotion prediction tasks with headlines. 2024.lrec-main.526 @@ -6285,7 +6285,7 @@ Enhancing Knowledge Retrieval with Topic Modeling for Knowledge-Grounded Dialogue NhatTran - DianeLitman + DianeLitman 5986–5995 Knowledge retrieval is one of the major challenges in building a knowledge-grounded dialogue system. A common method is to use a neural retriever with a distributed approximate nearest-neighbor database to quickly find the relevant knowledge sentences. In this work, we propose an approach that utilizes topic modeling on the knowledge base to further improve retrieval accuracy and as a result, improve response generation. Additionally, we experiment with a large language model (LLM), ChatGPT, to take advantage of the improved retrieval performance to further improve the generation results. Experimental results on two datasets show that our approach can increase retrieval and generation performance. The results also indicate that ChatGPT is a better response generator for knowledge-grounded dialogue when relevant knowledge is provided. 2024.lrec-main.530 @@ -6321,7 +6321,7 @@ ZhiyuChen GiuseppeCastellucci OlegRokhlenko - ShervinMalmasi + ShervinMalmasi 6017–6023 Large Language Models (LLMs) operating in 0-shot or few-shot settings achieve competitive results in Text Classification tasks. In-Context Learning (ICL) typically achieves better accuracy than the 0-shot setting, but it pays in terms of efficiency, due to the longer input prompt. In this paper, we propose a strategy to make LLMs as efficient as 0-shot text classifiers, while getting comparable or better accuracy than ICL. Our solution targets the low resource setting, i.e., when only 4 examples per class are available. Using a single LLM and few-shot real data we perform a sequence of generation, filtering and Parameter-Efficient Fine-Tuning steps to create a robust and efficient classifier. Experimental results show that our approach leads to competitive results on multiple text classification datasets. 2024.lrec-main.533 @@ -6332,7 +6332,7 @@ PeiyuLiu Ze-FengGao XiaoZhang - Wayne XinZhao + Wayne XinZhao Ji-RongWen 6024–6035 Lightweight fine-tuning is widely used as an important technique for efficiently adapting pre-trained language models (PLM) to downstream tasks. Despite the reduction in trainable parameters, existing lightweight fine-tuning methods are found to be effective in low-resource settings but often fail in high-resource settings, leading to unreliable outcomes. This limitation can be attributed to inflexible strategies: they identify the parameters of the model to be trained before fine-tuning and remain unchanged without taking into account the inherent variance of generalization ability in model components (i.e., feed-forward, attention layers) and potential changes during the fine-tuning process. In this paper, we introduce a simple but effective calibration for lightweight fine-tuning PLMs based on the matrix’s stable rank according to both model components and the training process. We proposed both theoretical analyses and experimental verification for the proposed calibration strategy. Considering efficiency, we further propose time-aware and structure-aware strategies to determine the most crucial time to commence the fine-tuning procedure and selectively apply parameter matrices for lightweight fine-tuning, respectively. Extensive experiments demonstrate the superiority of our proposed fine-tuning approach (average improvement 3.1 for GLUE score compared to lightweight fine-tuning method). @@ -6352,7 +6352,7 @@ Enhancing Scientific Document Summarization with Research Community Perspective and Background Knowledge SudiptaSingha Roy - Robert E.Mercer + Robert E.Mercer 6048–6058 Scientific paper summarization has been the focus of much recent research. Unlike previous research which summarizes only the paper in question, or which summarizes the paper and the papers that it references, or which summarizes the paper and the citing sentences from the papers that cite it, this work puts all three of these summarization techniques together. To accomplish this, we have, by utilizing the citation network, introduced a corpus for scientific document summarization that provides information about the document being summarized, the papers referenced by it, as well as the papers that have cited it. The proposed summarizer model utilizes the referenced articles as background information and citing articles to capture the impact of the scientific document on the research community. Another aspect of the proposed model is its ability to generate both the extractive and abstractive summaries in parallel. The parallel training helps the counterparts to improve their individual performance. Results have shown that the summaries are of high quality when considering the standard metrics. 2024.lrec-main.536 @@ -6374,7 +6374,7 @@ Bo-HanLu Yi-HsuanLin AnnieLee - Richard Tzong-HanTsai + Richard Tzong-HanTsai 6077–6090 Machine translation focuses mainly on high-resource languages (HRLs), while low-resource languages (LRLs) like Taiwanese Hokkien are relatively under-explored. The study aims to address this gap by developing a dual translation model between Taiwanese Hokkien and both Traditional Mandarin Chinese and English. We employ a pre-trained LLaMA 2-7B model specialized in Traditional Mandarin Chinese to leverage the orthographic similarities between Taiwanese Hokkien Han and Traditional Mandarin Chinese. Our comprehensive experiments involve translation tasks across various writing systems of Taiwanese Hokkien as well as between Taiwanese Hokkien and other HRLs. We find that the use of a limited monolingual corpus still further improves the model’s Taiwanese Hokkien capabilities. We then utilize our translation model to standardize all Taiwanese Hokkien writing systems into Hokkien Han, resulting in further performance improvements. Additionally, we introduce an evaluation method incorporating back-translation and GPT-4 to ensure reliable translation quality assessment even for LRLs. The study contributes to narrowing the resource gap for Taiwanese Hokkien and empirically investigates the advantages and limitations of pre-training and fine-tuning based on LLaMA 2. 2024.lrec-main.538 @@ -6456,7 +6456,7 @@ FelixGrezes CyrilGrouin FabianSchussler - PierreZweigenbaum + PierreZweigenbaum 6177–6188 Interest in Astrophysical Natural Language Processing (NLP) has increased recently, fueled by the development of specialized language models for information extraction. However, the scarcity of annotated resources for this domain is still a significant challenge. Most existing corpora are limited to Named Entity Recognition (NER) tasks, leaving a gap in resource diversity. To address this gap and facilitate a broader spectrum of NLP research in astrophysics, we introduce astroECR, an extension of our previously built Time-Domain Astrophysics Corpus (TDAC). Our contributions involve expanding it to cover named entities, coreferences, annotations related to astrophysical relationships, and normalizing celestial object names. We showcase practical utility through baseline models for four NLP tasks and provide the research community access to our corpus, code, and models. 2024.lrec-main.545 @@ -6467,7 +6467,7 @@ AndreyKutuzov MariiaFedorova DominikSchlechtweg - NikolayArefyev + NikolayArefyev 6189–6198 We present a dataset of word usage graphs (WUGs), where the existing WUGs for multiple languages are enriched with cluster labels functioning as sense definitions. They are generated from scratch by fine-tuned encoder-decoder language models. The conducted human evaluation has shown that these definitions match the existing clusters in WUGs better than the definitions chosen from WordNet by two baseline systems. At the same time, the method is straightforward to use and easy to extend to new languages. The resulting enriched datasets can be extremely helpful for moving on to explainable semantic change modeling. 2024.lrec-main.546 @@ -6477,7 +6477,7 @@ Ensembles of Hybrid and End-to-End Speech Recognition. Aditya KamleshParikh Louisten Bosch - Henkvan den Heuvel + Henkvan den Heuvel 6199–6205 We propose a method to combine the hybrid Kaldi-based Automatic Speech Recognition (ASR) system with the end-to-end wav2vec 2.0 XLS-R ASR using confidence measures. Our research is focused on the low-resource Irish language. Given the limited available open-source resources, neither the standalone hybrid ASR nor the end-to-end ASR system can achieve optimal performance. By applying the Recognizer Output Voting Error Reduction (ROVER) technique, we illustrate how ensemble learning could facilitate mutual error correction between both ASR systems. This paper outlines the strategies for merging the hybrid Kaldi ASR model and the end-to-end XLS-R model with the help of confidence scores. Although contemporary state-of-the-art end-to-end ASR models face challenges related to prediction overconfidence, we utilize Renyi’s entropy-based confidence approach, tuned with temperature scaling, to align it with the Kaldi ASR confidence. Although there was no significant difference in the Word Error Rate (WER) between the hybrid and end-to-end ASR, we could achieve a notable reduction in WER after ensembling through ROVER. This resulted in an almost 14% Word Error Rate Reduction (WERR) on our primary test set and an approximately 20% WERR on other noisy and imbalanced test data. 2024.lrec-main.547 @@ -6556,7 +6556,7 @@ <fixed-case>E</fixed-case>s<fixed-case>C</fixed-case>o<fixed-case>LA</fixed-case>: <fixed-case>S</fixed-case>panish Corpus of Linguistic Acceptability - NúriaBel + NúriaBel MartaPunsola ValleRuiz-Fernández 6268–6277 @@ -6611,7 +6611,7 @@ Estimating the Causal Effects of Natural Logic Features in Transformer-Based <fixed-case>NLI</fixed-case> Models JuliaRozanova MarcoValentino - AndréFreitas + AndréFreitas 6319–6329 Rigorous evaluation of the causal effects of semantic features on language model predictions can be hard to achieve for natural language reasoning problems. However, this is such a desirable form of analysis from both an interpretability and model evaluation perspective, that it is valuable to investigate specific patterns of reasoning with enough structure and regularity to identify and quantify systematic reasoning failures in widely-used models. In this vein, we pick a portion of the NLI task for which an explicit causal diagram can be systematically constructed: the case where across two sentences (the premise and hypothesis), two related words/terms occur in a shared context. In this work, we apply causal effect estimation strategies to measure the effect of context interventions (whose effect on the entailment label is mediated by the semantic monotonicity characteristic) and interventions on the inserted word-pair (whose effect on the entailment label is mediated by the relation between these words). Extending related work on causal analysis of NLP models in different settings, we perform an extensive interventional study on the NLI task to investigate robustness to irrelevant changes and sensitivity to impactful changes of Transformers. The results strongly bolster the fact that similar benchmark accuracy scores may be observed for models that exhibit very different behaviour. Moreover, our methodology reinforces previously suspected biases from a causal perspective, including biases in favour of upward-monotone contexts and ignoring the effects of negation markers. 2024.lrec-main.559 @@ -6652,14 +6652,14 @@ <fixed-case>E</fixed-case>uropean Language Grid: One Year after GeorgRehm - SteliosPiperidis - DimitrisGalanis - PennyLabropoulou + SteliosPiperidis + DimitrisGalanis + PennyLabropoulou MariaGiagkou MiltosDeligiannis LeonVoukoutis MartinCourtois - JulianMoreno-Schneider + JulianMoreno-Schneider KatrinMarheinecke 6353–6362 The European Language Grid (ELG) is a cloud platform for the whole European Language Technology community. While the EU project that developed the platform successfully concluded in June 2022, the ELG initiative has continued. This article provides a description of the current state of ELG in terms of user adoption and number of language resources and technologies available in early 2024. It also provides an overview of the various activities with regard to ELG since the end of the project and since the publication of the ELG book, especially the co-authors’ attempt to integrate the ELG platform into various data space initiatives. The article also provides an overview of the Digital Language Equality (DLE) dashboard and the current state of DLE in Europe. @@ -6670,7 +6670,7 @@ Evaluating Automatic Subtitling: Correlating Post-editing Effort and Automatic Metrics AlinaKarakanta MauroCettolo - MatteoNegri + MatteoNegri LuisaBentivogli 6363–6369 Systems that automatically generate subtitles from video are gradually entering subtitling workflows, both for supporting subtitlers and for accessibility purposes. Even though robust metrics are essential for evaluating the quality of automatically-generated subtitles and for estimating potential productivity gains, there is limited research on whether existing metrics, some of which directly borrowed from machine translation (MT) evaluation, can fulfil such purposes. This paper investigates how well such MT metrics correlate with measures of post-editing (PE) effort in automatic subtitling. To this aim, we collect and publicly release a new corpus containing product-, process- and participant-based data from post-editing automatic subtitles in two language pairs (en→de,it). We find that different types of metrics correlate with different aspects of PE effort. Specifically, edit distance metrics have high correlation with technical and temporal effort, while neural metrics correlate well with PE speed. @@ -6702,7 +6702,7 @@ Evaluating Gender Bias of Pre-trained Language Models in Natural Language Inference by Considering All Labels PanatchakornAnantaprayoon MasahiroKaneko - NaoakiOkazaki + NaoakiOkazaki 6395–6408 Discriminatory gender biases have been found in Pre-trained Language Models (PLMs) for multiple languages. In Natural Language Inference (NLI), existing bias evaluation methods have focused on the prediction results of one specific label out of three labels, such as neutral. However, such evaluation methods can be inaccurate since unique biased inferences are associated with unique prediction labels. Addressing this limitation, we propose a bias evaluation method for PLMs, called NLI-CoAL, which considers all the three labels of NLI task. First, we create three evaluation data groups that represent different types of biases. Then, we define a bias measure based on the corresponding label output of each data group. In the experiments, we introduce a meta-evaluation technique for NLI bias measures and use it to confirm that our bias measure can distinguish biased, incorrect inferences from non-biased incorrect inferences better than the baseline, resulting in a more accurate bias evaluation. We create the datasets in English, Japanese, and Chinese, and successfully validate the compatibility of our bias measure across multiple languages. Lastly, we observe the bias tendencies in PLMs of different languages. To our knowledge, we are the first to construct evaluation datasets and measure PLMs’ bias from NLI in Japanese and Chinese. 2024.lrec-main.566 @@ -6770,7 +6770,7 @@ Evaluating Shortest Edit Script Methods for Contextual Lemmatization OliaToporkov - RodrigoAgerri + RodrigoAgerri 6451–6463 Modern contextual lemmatizers often rely on automatically induced Shortest Edit Scripts (SES), namely, the number of edit operations to transform a word form into its lemma. In fact, different methods of computing SES have been proposed as an integral component in the architecture of several state-of-the-art contextual lemmatizers currently available. However, previous work has not investigated the direct impact of SES in the final lemmatization performance. In this paper we address this issue by focusing on lemmatization as a token classification task where the only input that the model receives is the word-label pairs in context, where the labels correspond to previously induced SES. Thus, by modifying in our lemmatization system only the SES labels that the model needs to learn, we may then objectively conclude which SES representation produces the best lemmatization results. We experiment with seven languages of different morphological complexity, namely, English, Spanish, Basque, Russian, Czech, Turkish and Polish, using multilingual and language-specific pre-trained masked language encoder-only models as a backbone to build our lemmatizers. Comprehensive experimental results, both in- and out-of-domain, indicate that computing the casing and edit operations separately is beneficial overall, but much more clearly for languages with high-inflected morphology. Notably, multilingual pre-trained language models consistently outperform their language-specific counterparts in every evaluation setting. 2024.lrec-main.572 @@ -6799,11 +6799,11 @@ Evaluating the <fixed-case>IWSLT</fixed-case>2023 Speech Translation Tasks: Human Annotations, Automatic Metrics, and Segmentation MatthiasSperber - OndřejBojar + OndřejBojar BarryHaddow DávidJavorský XutaiMa - MatteoNegri + MatteoNegri JanNiehues PeterPolák ElizabethSalesky @@ -6830,7 +6830,7 @@ BüşraMarşan TungaGungor BalkizOzturk Basaran - ArzucanÖzgür + ArzucanÖzgür SusanUskudarli 6504–6514 Pretrained language models and large language models are increasingly used to assist in a great variety of natural language tasks. In this work, we explore their use in evaluating the quality of alternative corpus annotation schemes. For this purpose, we analyze two alternative annotations of the Turkish BOUN treebank, versions 2.8 and 2.11, in the Universal Dependencies framework using large language models. Using a suitable prompt generated using treebank annotations, large language models are used to recover the surface forms of sentences. Based on the idea that the large language models capture the characteristics of the languages, we expect that the better annotation scheme would yield the sentences with higher success. The experiments conducted on a subset of the treebank show that the new annotation scheme (2.11) results in a successful recovery percentage of about 2 points higher. All the code developed for this work is available at https://github.com/boun-tabi/eval-ud . @@ -6840,7 +6840,7 @@ Evaluating Topic Model on Asymmetric and Multi-Domain Financial Corpus CorentinMasson - PatrickParoubek + PatrickParoubek 6515–6529 Multiple recent research works in Finance try to quantify the exposure of market assets to various risks from text and how assets react if the risk materialize itself. We consider risk sections from french Financial Corporate Annual Reports, which are regulated documents with a mandatory section containing important risks the company is facing, to extract an accurate risk profile and exposure of companies. We identify multiple pitfalls of topic models when applied to corporate filing financial domain data for unsupervised risk distribution extraction which has not yet been studied on this domain. We propose two new metrics to evaluate the behavior of different types of topic models with respect to pitfalls previously mentioned about document risk distribution extraction. Our evaluation will focus on three aspects: regularizations, down-sampling and data augmentation. In our experiments, we found that classic Topic Models require down-sampling to obtain unbiased risks, while Topic Models using metadata and in-domain pre-trained word-embeddings partially correct the coherence imbalance per subdomain and remove sector’s specific language from the detected themes. We then demonstrate the relevance and usefulness of the extracted information with visualizations that help to understand the content of such corpus and its evolution along the years. 2024.lrec-main.578 @@ -6861,7 +6861,7 @@ StephanieBrandl OliverEberle TiagoRibeiro - AndersSøgaard + AndersSøgaard NoraHollenstein 6544–6556 Rationales in the form of manually annotated input spans usually serve as ground truth when evaluating explainability methods in NLP. They are, however, time-consuming and often biased by the annotation process. In this paper, we debate whether human gaze, in the form of webcam-based eye-tracking recordings, poses a valid alternative when evaluating importance scores. We evaluate the additional information provided by gaze data, such as total reading times, gaze entropy, and decoding accuracy with respect to human rationale annotations. We compare WebQAmGaze, a multilingual dataset for information-seeking QA, with attention and explainability-based importance scores for 4 different multilingual Transformer-based language models (mBERT, distil-mBERT, XLMR, and XLMR-L) and 3 languages (English, Spanish, and German). Our pipeline can easily be applied to other tasks and languages. Our findings suggest that gaze data offers valuable linguistic insights that could be leveraged to infer task difficulty and further show a comparable ranking of explainability methods to that of human rationales. @@ -6889,7 +6889,7 @@ Evaluation Dataset for Lexical Translation Consistency in <fixed-case>C</fixed-case>hinese-to-<fixed-case>E</fixed-case>nglish Document-level Translation XiangyuLei - JunhuiLi + JunhuiLi ShiminTao HaoYang 6575–6581 @@ -6903,7 +6903,7 @@ KatarinaGillholm MurathanKurfalı MarieMattson - MatsWirén + MatsWirén 6582–6593 Traditional evaluation methods for Grammatical Error Correction (GEC) fail to fully capture the full range of system capabilities and objectives. The emergence of large language models (LLMs) has further highlighted the shortcomings of these evaluation strategies, emphasizing the need for a paradigm shift in evaluation methodology. In the current study, we perform a comprehensive evaluation of various GEC systems using a recently published dataset of Swedish learner texts. The evaluation is performed using established evaluation metrics as well as human judges. We find that GPT-3 in a few-shot setting by far outperforms previous grammatical error correction systems for Swedish, a language comprising only about 0.1% of its training data. We also found that current evaluation methods contain undesirable biases that a human evaluation is able to reveal. We suggest using human post-editing of GEC system outputs to analyze the amount of change required to reach native-level human performance on the task, and provide a dataset annotated with human post-edits and assessments of grammaticality, fluency and meaning preservation of GEC system outputs. 2024.lrec-main.584 @@ -6924,8 +6924,8 @@ MikelZubillaga OscarSainz AinaraEstarrona - OierLopez de Lacalle - EnekoAgirre + OierLopez de Lacalle + EnekoAgirre 6607–6621 Cross-lingual transfer-learning is widely used in Event Extraction for low-resource languages and involves a Multilingual Language Model that is trained in a source language and applied to the target language. This paper studies whether the typological similarity between source and target languages impacts the performance of cross-lingual transfer, an under-explored topic. We first focus on Basque as the target language, which is an ideal target language because it is typologically different from surrounding languages. Our experiments on three Event Extraction tasks show that the shared linguistic characteristic between source and target languages does have an impact on transfer quality. Further analysis of 72 language pairs reveals that for tasks that involve token classification such as entity and event trigger identification, common writing script and morphological features produce higher quality cross-lingual transfer. In contrast, for tasks involving structural prediction like argument extraction, common word order is the most relevant feature. In addition, we show that when increasing the training size, not all the languages scale in the same way in the cross-lingual setting. To perform the experiments we introduce EusIE, an event extraction dataset for Basque, which follows the Multilingual Event Extraction dataset (MEE). The dataset and code are publicly available. 2024.lrec-main.586 @@ -7015,7 +7015,7 @@ Examining Temporalities on Stance Detection towards <fixed-case>COVID</fixed-case>-19 Vaccination YidaMu MaliJin - KalinaBontcheva + KalinaBontcheva XingyiSong 6732–6738 Previous studies have highlighted the importance of vaccination as an effective strategy to control the transmission of the COVID-19 virus. It is crucial for policymakers to have a comprehensive understanding of the public’s stance towards vaccination on a large scale. However, attitudes towards COVID-19 vaccination, such as pro-vaccine or vaccine hesitancy, have evolved over time on social media. Thus, it is necessary to account for possible temporal shifts when analysing these stances. This study aims to examine the impact of temporal concept drift on stance detection towards COVID-19 vaccination on Twitter. To this end, we evaluate a range of transformer-based models using chronological (splitting the training, validation, and test sets in order of time) and random splits (randomly splitting these three sets) of social media data. Our findings reveal significant discrepancies in model performance between random and chronological splits in several existing COVID-19-related datasets; specifically, chronological splits significantly reduce the accuracy of stance classification. Therefore, real-world stance detection approaches need to be further refined to incorporate temporal factors as a key consideration. @@ -7026,7 +7026,7 @@ Examining the Limitations of Computational Rumor Detection Models Trained on Static Datasets YidaMu XingyiSong - KalinaBontcheva + KalinaBontcheva NikolaosAletras 6739–6751 A crucial aspect of a rumor detection model is its ability to generalize, particularly its ability to detect emerging, previously unknown rumors. Past research has indicated that content-based (i.e., using solely source post as input) rumor detection models tend to perform less effectively on unseen rumors. At the same time, the potential of context-based models remains largely untapped. The main contribution of this paper is in the in-depth evaluation of the performance gap between content and context-based models specifically on detecting new, unseen rumors. Our empirical findings demonstrate that context-based models are still overly dependent on the information derived from the rumors’ source post and tend to overlook the significant role that contextual information can play. We also study the effect of data split strategies on classifier performance. Based on our experimental results, the paper also offers practical suggestions on how to minimize the effects of temporal concept drift in static datasets during the training of rumor detection methods. @@ -7048,7 +7048,7 @@ Experimental versus In-Corpus Variation in Referring Expression Choice - T. MarkEllison + T. MarkEllison FahimeSame 6838–6848 In this paper, we compare the results of three studies. The first explored feature-conditioned distributions of referring expression (RE) forms in the original corpus from which the contexts were taken. The second is a crowdsourcing study in which we asked participants to express entities within a pre-existing context, given fully specified referents. The third study replicates the crowdsourcing experiment using Large Language Models (LLMs). We evaluate how well the corpus itself can model the variation found when multiple informants (either human participants or LLMs) choose REs in the same contexts. We measure the similarity of the conditional distributions of form categories using the Jensen-Shannon Divergence metric and Description Length metric. We find that the experimental methodology introduces substantial noise, but by taking this noise into account, we can model the variation captured from the corpus and RE form choices made during experiments. Furthermore, we compared the three conditional distributions over the corpus, the human experimental results, and the GPT models. Against our expectations, the divergence is greatest between the corpus and the GPT model. @@ -7069,7 +7069,7 @@ Explainable Multi-hop Question Generation: An End-to-End Approach without Intermediate Question Labeling SeonjeongHwang YunsuKim - Gary GeunbaeLee + Gary GeunbaeLee 6855–6866 In response to the increasing use of interactive artificial intelligence, the demand for the capacity to handle complex questions has increased. Multi-hop question generation aims to generate complex questions that requires multi-step reasoning over several documents. Previous studies have predominantly utilized end-to-end models, wherein questions are decoded based on the representation of context documents. However, these approaches lack the ability to explain the reasoning process behind the generated multi-hop questions. Additionally, the question rewriting approach, which incrementally increases the question complexity, also has limitations due to the requirement of labeling data for intermediate-stage questions. In this paper, we introduce an end-to-end question rewriting model that increases question complexity through sequential rewriting. The proposed model has the advantage of training with only the final multi-hop questions, without intermediate questions. Experimental results demonstrate the effectiveness of our model in generating complex questions, particularly 3- and 4-hop questions, which are appropriately paired with input answers. We also prove that our model logically and incrementally increases the complexity of questions, and the generated multi-hop questions are also beneficial for training question answering models. 2024.lrec-main.599 @@ -7187,7 +7187,7 @@ Exploring the Impact of Human Evaluator Group on Chat-Oriented Dialogue Evaluation Sarah E.Finch James D.Finch - Jinho D.Choi + Jinho D.Choi 6966–6973 Human evaluation has been widely accepted as the standard for evaluating chat-oriented dialogue systems. However, there is a significant variation in previous work regarding who gets recruited as evaluators. Evaluator groups such as domain experts, university students, and crowdworkers have been used to assess and compare dialogue systems, although it is unclear to what extent the choice of an evaluator group can affect results. This paper analyzes the evaluator group impact on dialogue system evaluation by testing 4 state-of-the-art dialogue systems using 4 distinct evaluator groups. Our analysis reveals a robustness towards evaluator groups for Likert evaluations that is not seen for Pairwise, with only minor differences observed when changing evaluator groups. Furthermore, two notable limitations to this robustness are observed, which reveal discrepancies between evaluators with different levels of chatbot expertise and indicate that evaluator objectivity is beneficial for certain dialogue metrics. 2024.lrec-main.610 @@ -7243,7 +7243,7 @@ Anja Silvia MollahHaque IsabelEiser GertraudKoch - ChrisBiemann + ChrisBiemann 7017–7022 In this system demonstration paper, we describe the Whiteboards extension for an existing web-based platform for digital qualitative discourse analysis. Whiteboards comprise interactive graph-based interfaces to organize and manipulate objects, which can be qualitative research data, such as documents, images, etc., and analyses of these research data, such as annotations, tags, and code structures. The proposed extension offers a customizable view of the material and a wide range of actions that enable new ways of interacting and working with such resources. We show that the visualizations facilitate various use cases of qualitative data analysis, including reflection of the research process through sampling maps, creation of actor networks, and refining code taxonomies. 2024.lrec-main.615 @@ -7286,8 +7286,8 @@ Abby R.Rosenberg KevinLybarger FeiXia - ÖzlemUzuner - MelihaYetisgen + ÖzlemUzuner + MelihaYetisgen 7045–7056 Social determinants of health (SDoH) play a critical role in shaping health outcomes, particularly in pediatric populations where interventions can have long-term implications. SDoH are frequently studied in the Electronic Health Record (EHR), which provides a rich repository for diverse patient data. In this work, we present a novel annotated corpus, the Pediatric Social History Annotation Corpus (PedSHAC), and evaluate the automatic extraction of detailed SDoH representations using fine-tuned and in-context learning methods with Large Language Models (LLMs). PedSHAC comprises annotated social history sections from 1,260 clinical notes obtained from pediatric patients within the University of Washington (UW) hospital system. Employing an event-based annotation scheme, PedSHAC captures ten distinct health determinants to encompass living and economic stability, prior trauma, education access, substance use history, and mental health with an overall annotator agreement of 81.9 F1. Our proposed fine-tuning LLM-based extractors achieve high performance at 78.4 F1 for event arguments. In-context learning approaches with GPT-4 demonstrate promise for reliable SDoH extraction with limited annotated examples, with extraction performance at 82.3 F1 for event triggers. 2024.lrec-main.618 @@ -7348,7 +7348,7 @@ EricSanders SaraPetrollino Gilles R.Scheifer - Henkvan den Heuvel + Henkvan den Heuvel ChristopherHandy 7101–7106 LeiLanD (Leiden Language Data) is a searchable catalogue initiated by the Leiden University Centre for Linguistics (LUCL) with the support of CLARIAH. The catalogue contains metadata about language datasets collected at LUCL and other institutes of Leiden University. This paper describes a project to FAIRify the datasets increasing their findability and accessibility through a standardised metadata format CMDI so as to obtain a rich metadata description for all resources and to make them findable through CLARIN’s Virtual Language Observatory. The paper describes the creation of the catalogue and the steps that led from unstructured metadata to CMDI standards. This FAIRifi- cation of LeiLanD has enhanced the findability and accessibility of incredibly diverse collection of language datasets. @@ -7358,8 +7358,8 @@ <fixed-case>F</fixed-case>al<fixed-case>AI</fixed-case>: A Dataset for End-to-end Spoken Language Understanding in a Low-Resource Scenario AndresPineiro-Martin - CarmenGarcia-Mateo - LauraDocio-Fernandez + CarmenGarcia-Mateo + LauraDocio-Fernandez Maria del CarmenLopez-Perez JoseGandarela-Rodriguez 7107–7116 @@ -7381,9 +7381,9 @@ <fixed-case>F</fixed-case>ast<fixed-case>S</fixed-case>pell: The <fixed-case>L</fixed-case>ang<fixed-case>I</fixed-case>d Magic Spell MartaBañón - GemaRamírez-Sánchez + GemaRamírez-Sánchez JaumeZaragoza-Bernabeu - SergioOrtiz Rojas + SergioOrtiz Rojas 7133–7140 Language identification is a crucial component in the automated production of language resources, particularly in multilingual and big data contexts. However, commonly used language identifiers struggle to differentiate between similar or closely-related languages. This paper introduces FastSpell, a language identifier that combines fastText (a pre-trained language identifier tool) and Hunspell (a spell checker) with the aim of having a refined second-opinion before deciding which language should be assigned to a text. We provide a description of the FastSpell algorithm along with an explanation on how to use and configure it. To that end, we motivate the need of such a tool and present a benchmark including some popular language identifiers evaluated during the development of FastSpell. We show how FastSpell is useful not only to improve identification of similar languages, but also to identify new ones ignored by other tools. 2024.lrec-main.626 @@ -7423,7 +7423,7 @@ Federated Foundation Models: Privacy-Preserving and Collaborative Learning for Large Models SixingYu - Juan PabloMunoz + Juan PabloMunoz AliJannesari 7174–7184 Foundation Models (FMs), such as LLaMA, BERT, GPT, ViT, and CLIP, have demonstrated remarkable success in a wide range of applications, driven by their ability to leverage vast amounts of data for pre-training. However, optimizing FMs often requires access to sensitive data, raising privacy concerns and limiting their applicability in many domains. In this paper, we propose the Federated Foundation Models (FFMs) paradigm, which combines the benefits of FMs and Federated Learning (FL) to enable privacy-preserving and collaborative learning across multiple end-users. We discuss the potential benefits and challenges of integrating FL into the lifespan of FMs, covering pre-training, fine-tuning, and application. We further outline potential future research avenues in FFM, including FFM pre-training, FFM fine-tuning, and federated prompt tuning, which allow the development of more personalized and context-aware models while ensuring data privacy. Moreover, we explore the possibility of continual/lifelong learning in FFMs, as increased computational power at the edge may unlock the potential for optimizing FMs using newly generated private data close to the data source. The proposed FFM concepts offer a flexible and scalable framework for training large language models in a privacy-preserving manner, setting the stage for subsequent advancements in both FM training and federated learning. @@ -7447,7 +7447,7 @@ SaipingGuan XiaolongJin JiafengGuo - XueqiCheng + XueqiCheng 7196–7207 Hyper-relational facts, which consist of a primary triple (head entity, relation, tail entity) and auxiliary attribute-value pairs, are widely present in real-world Knowledge Graphs (KGs). Link Prediction on Hyper-relational Facts (LPHFs) is to predict a missing element in a hyper-relational fact, which helps populate and enrich KGs. However, existing LPHFs studies usually require an amount of high-quality data. They overlook few-shot relations, which have limited instances, yet are common in real-world scenarios. Thus, we introduce a new task, Few-Shot Link Prediction on Hyper-relational Facts (FSLPHFs). It aims to predict a missing entity in a hyper-relational fact with limited support instances. To tackle FSLPHFs, we propose MetaRH, a model that learns Meta Relational information in Hyper-relational facts. MetaRH comprises three modules: relation learning, support-specific adjustment, and query inference. By capturing meta relational information from limited support instances, MetaRH can accurately predict the missing entity in a query. As there is no existing dataset available for this new task, we construct three datasets to validate the effectiveness of MetaRH. Experimental results on these datasets demonstrate that MetaRH significantly outperforms existing representative models. 2024.lrec-main.632 @@ -7531,7 +7531,7 @@ <fixed-case>F</fixed-case>in<fixed-case>C</fixed-case>orpus-<fixed-case>DE</fixed-case>10k: A Corpus for the <fixed-case>G</fixed-case>erman Financial Domain SerhiiHamotskyi NataKozaeva - ChristianHänig + ChristianHänig 7277–7285 We introduce a predominantly German corpus comprising 12.5k PDF documents sourced from the financial domain. The corresponding extracted textual data encompasses more than 165 million tokens derived predominantly from German, and to a lesser extent, bilingual documents. We provide detailed information about the document types included in the corpus, such as final terms, base prospectuses, annual reports, information materials, law documents, international financial reporting standards, and monthly reports from the Bundesbank, accompanied by comprehensive statistical analysis. To our knowledge, it is the first non-email German financial corpus available, and we hope it will fill this gap and foster further research in the financial domain both in the German language and in multilingual contexts. 2024.lrec-main.639 @@ -7570,7 +7570,7 @@ YutingShi NaoyaInoue HoujingWei - YufengZhao + YufengZhao TaoJin 7307–7313 Recent advances in Instruction-fine-tuned Vision and Language Models (IVLMs), such as GPT-4V and InstructBLIP, have prompted some studies have started an in-depth analysis of the reasoning capabilities of IVLMs. However, Inductive Visual Reasoning, a vital skill for text-image understanding, remains underexplored due to the absence of benchmarks. In this paper, we introduce Find-the-Common (FTC): a new vision and language task for Inductive Visual Reasoning. In this task, models are required to identify an answer that explains the common attributes across visual scenes. We create a new dataset for the FTC and assess the performance of several contemporary approaches including Image-Based Reasoning, Text-Based Reasoning, and Image-Text-Based Reasoning with various models. Extensive experiments show that even state-of-the-art models like GPT-4V can only archive with 48% accuracy on the FTC, for which, the FTC is a new challenge for the visual reasoning research community. Our dataset has been released and is available online: https://github.com/SSSSSeki/Find-the-common. @@ -7657,7 +7657,7 @@ IreneBaucells MarcPamies YishiXu - AitorGonzalez-Agirre + AitorGonzalez-Agirre MartaVillegas 7377–7388 Large language models have amply proven their great capabilities, both in downstream tasks and real-life settings. However, low- and mid-resource languages do not have access to the necessary means to train such models from scratch, and often have to rely on multilingual models despite being underrepresented in the training data. For the particular case of the Catalan language, we prove that continued pre-training with vocabulary adaptation is a better alternative to take the most out of already pre-trained models, even if these have not seen any Catalan data during their pre-training phase. We curate a 26B tokens corpus and use it to further pre-train BLOOM, giving rise to the FLOR models. We perform an extensive evaluation to assess the effectiveness of our method, obtaining consistent gains across Catalan and Spanish tasks. The models, training data, and evaluation framework are made freely available under permissive licenses. @@ -7678,7 +7678,7 @@ <fixed-case>FORECAST</fixed-case>2023: A Forecast and Reasoning Corpus of Argumentation Structures KamilaGórska JohnLawrence - ChrisReed + ChrisReed 7395–7405 It is known from large-scale crowd experimentation that some people are innately better at analysing complex situations and making justified predictions – the so-called ‘superforecasters’. Surprisingly, however, there has to date been no work exploring the role played by the reasoning in those justifications. Bag-of-words analyses might tell us something, but the real value lies in understanding what features of reasoning and argumentation lead to better forecasts – both in providing an objective measure for argument quality, and even more importantly, in providing guidance on how to improve forecasting performance. The work presented here covers the creation of a unique dataset of such prediction rationales, the structure of which naturally lends itself to partially automated annotation which in turn is used as the basis for subsequent manual enhancement that provides a uniquely fine-grained and close characterisation of the structure of argumentation, with potential impact on forecasting domains from intelligence analysis to investment decision-making. 2024.lrec-main.652 @@ -7826,14 +7826,14 @@ From Linguistic Linked Data to Big Data DimitarTrajanov ElenaApostol - RadovanGarabik + RadovanGarabik KaterinaGkirtzou DagmarGromann ChayaLiebeskind CosimoPalma - MichaelRosner + MichaelRosner AlexiaSampri - GillesSérasset + GillesSérasset BlerinaSpahiu Ciprian-OctavianTruică GiedreValunaite Oleskeviciene @@ -7877,8 +7877,8 @@ From Text to Source: Results in Detecting Large Language Model-Generated Content WissamAntoun - BenoîtSagot - DjaméSeddah + BenoîtSagot + DjaméSeddah 7531–7543 The widespread use of Large Language Models (LLMs), celebrated for their ability to generate human-like text, has raised concerns about misinformation and ethical implications. Addressing these concerns necessitates the development of robust methods to detect and attribute text generated by LLMs. This paper investigates “Cross-Model Detection,” by evaluating whether a classifier trained to distinguish between source LLM-generated and human-written text can also detect text from a target LLM without further training. The study comprehensively explores various LLM sizes and families and assesses the impact of conversational fine-tuning techniques, quantization, and watermarking on classifier generalization. The research also explores Model Attribution, encompassing source model identification, model family, and model size classification, in addition to quantization and watermarking detection. Our results reveal several key findings: a clear inverse relationship between classifier effectiveness and model size, with larger LLMs being more challenging to detect, especially when the classifier is trained on data from smaller models. Training on data from similarly sized LLMs can improve detection performance from larger models but may lead to decreased performance when dealing with smaller models. Additionally, model attribution experiments show promising results in identifying source models and model families, highlighting detectable signatures in LLM-generated text, with particularly remarkable outcomes in watermarking detection, while no detectable signatures of quantization were observed. Overall, our study contributes valuable insights into the interplay of model size, family, and training data in LLM detection and attribution. 2024.lrec-main.665 @@ -7888,7 +7888,7 @@ <fixed-case>FUSE</fixed-case> - <fixed-case>F</fixed-case>r<fixed-case>U</fixed-case>stration and Surprise Expressions: A Subtle Emotional Multimodal Language Corpus RajeshTitung - Cecilia OvesdotterAlm + Cecilia OvesdotterAlm 7544–7555 This study introduces a novel multimodal corpus for expressive task-based spoken language and dialogue, focused on language use under frustration and surprise, elicited from three tasks motivated by prior research and collected in an IRB-approved experiment. The resource is unique both because these are understudied affect states for emotion modeling in language, and also because it provides both individual and dyadic multimodally grounded language. The study includes a detailed analysis of annotations and performance results for multimodal emotion inference in language use. 2024.lrec-main.666 @@ -7910,7 +7910,7 @@ <fixed-case>GAATME</fixed-case>: A Genetic Algorithm for Adversarial Translation Metrics Evaluation JosefJon - OndřejBojar + OndřejBojar 7562–7569 Building on a recent method for decoding translation candidates from a Machine Translation (MT) model via a genetic algorithm, we modify it to generate adversarial translations to test and challenge MT evaluation metrics. The produced translations score very well in an arbitrary MT evaluation metric selected beforehand, despite containing serious, deliberately introduced errors. The method can be used to create adversarial test sets to analyze the biases and shortcomings of the metrics. We publish various such test sets for the Czech to English language pair, as well as the code to convert any parallel data into a similar adversarial test set. 2024.lrec-main.668 @@ -7985,7 +7985,7 @@ Generating Multiple-choice Questions for Medical Question Answering with Distractors and Cue-masking DamienSileo KanimozhiUma - Marie-FrancineMoens + Marie-FrancineMoens 7647–7653 Medical multiple-choice question answering (MCQA) is a challenging evaluation for medical natural language processing and a helpful task in itself. Medical questions may describe patient symptoms and ask for the correct diagnosis, which requires domain knowledge and complex reasoning. Standard language modeling pretraining alone is not sufficient to achieve the best results with BERT-base size (Devlin et al., 2019) encoders. Jin et al. (2020) showed that focusing masked language modeling on disease name prediction when using medical encyclopedic paragraphs as input leads to considerable MCQA accuracy improvement. In this work, we show that (1) fine-tuning on generated MCQA dataset outperforms the masked language modeling based objective and (2) correctly masking the cues to the answers is critical for good performance. We release new pretraining datasets and achieve state-of-the-art results on 4 MCQA datasets, notably +5.7% with base-size model on MedQA-USMLE. 2024.lrec-main.675 @@ -8039,7 +8039,7 @@ <fixed-case>G</fixed-case>erman Also Hallucinates! Inconsistency Detection in News Summaries with the Absinth Dataset LauraMascarell RibinChalumattu - AnnetteRios + AnnetteRios 7696–7706 The advent of Large Language Models (LLMs) has led to remarkable progress on a wide range of natural language processing tasks. Despite the advances, these large-sized models still suffer from hallucinating information in their output, which poses a major issue in automatic text summarization, as we must guarantee that the generated summary is consistent with the content of the source document. Previous research addresses the challenging task of detecting hallucinations in the output (i.e. inconsistency detection) in order to evaluate the faithfulness of the generated summaries. However, these works primarily focus on English and recent multilingual approaches lack German data. This work presents Absinth, a manually annotated dataset for hallucination detection in German news summarization and explores the capabilities of novel open-source LLMs on this task in both fine-tuning and in-context learning settings. We open-source and release the Absinth dataset to foster further research on hallucination detection in German. 2024.lrec-main.680 @@ -8095,9 +8095,9 @@ KyeongminRim KeerXu LiuluYue - Susan WindischBrown - MarthaPalmer - JamesPustejovsky + Susan WindischBrown + MarthaPalmer + JamesPustejovsky 7746–7759 This paper introduces GLAMR, an Abstract Meaning Representation (AMR) interpretation of Generative Lexicon (GL) semantic components. It includes a structured subeventual interpretation of linguistic predicates, and encoding of the opposition structure of property changes of event arguments. Both of these features are recently encoded in VerbNet (VN), and form the scaffolding for the semantic form associated with VN frame files. We develop a new syntax, concepts, and roles for subevent structure based on VN for connecting subevents to atomic predicates. Our proposed extension is compatible with current AMR specification. We also present an approach to automatically augment AMR graphs by inserting subevent structure of the predicates and identifying the subevent arguments from the semantic roles. A pilot annotation of GLAMR graphs of 65 documents (486 sentences), based on procedural texts as a source, is presented as a public dataset. The annotation includes subevents, argument property change, and document-level anaphoric links. Finally, we provide baseline models for converting text to GLAMR and vice versa, along with the application of GLAMR for generating enriched paraphrases with details on subevent transformation and arguments that are not present in the surface form of the texts. 2024.lrec-main.685 @@ -8120,7 +8120,7 @@ <fixed-case>G</fixed-case>lot<fixed-case>S</fixed-case>cript: A Resource and Tool for Low Resource Writing System Identification Amir HosseinKargaran FrançoisYvon - HinrichSchütze + HinrichSchütze 7774–7784 We present GlotScript, an open resource and tool for low resource writing system identification. GlotScript-R is a resource that provides the attested writing systems for more than 7,000 languages. It is compiled by aggregating information from existing writing system resources. GlotScript-T is a writing system identification tool that covers all 161 Unicode 15.0 scripts. For an input text, it returns its script distribution where scripts are identified by ISO 15924 codes. We also present two use cases for GlotScript. First, we demonstrate that GlotScript can help cleaning multilingual corpora such as mC4 and OSCAR. Second, we analyze the tokenization of a number of language models such as GPT-4 using GlotScript and provide insights on the coverage of low resource scripts and languages by each language model. We hope that GlotScript will become a useful resource for work on low resource languages in the NLP community. GlotScript-R and GlotScript-T are available at https://github.com/cisnlp/GlotScript. 2024.lrec-main.687 @@ -8142,7 +8142,7 @@ AnuragAcharya DiegoCastro Estrada DianaGomez - MarkFinlayson + MarkFinlayson 7801–7813 Motifs are distinctive, recurring, widely used idiom-like words or phrases, often originating from folklore, whose meaning are anchored in a narrative. Motifs have significance as communicative devices because they concisely imply a constellation of culturally relevant information. Their broad usage suggests their cognitive importance as touchstones of cultural knowledge. We present GOLEM, the first dataset annotated for motific information. The dataset comprises 7,955 English articles (2,039,424 words). The corpus identifies 26,078 motif candidates across 34 motif types from three cultural or national groups: Jewish, Irish, and Puerto Rican. Each motif candidate is labeled with the type of usage (Motific, Referential, Eponymic, or Unrelated), resulting in 1,723 actual motific instances. Annotation was performed by individuals identifying as members of each group and achieved a Fleiss’ kappa of >0.55. We demonstrate that classification of candidate type is a challenging task for LLMs using a few-shot approach; recent models such as T5, FLAN-T5, GPT-2, and Llama 2 (7B) achieved a performance of 41% accuracy at best. These data will support development of new models and approaches for detecting (and reasoning about) motific information in text. We release the corpus, the annotation guide, and the code to support other researchers building on this work. 2024.lrec-main.689 @@ -8162,7 +8162,7 @@ Gos 2: A New Reference Corpus of Spoken <fixed-case>S</fixed-case>lovenian DarinkaVerdonik KajaDobrovoljc - TomažErjavec + TomažErjavec NikolaLjubešić 7825–7830 This paper introduces a new version of the Gos reference corpus of spoken Slovenian, which was recently extended to more than double the original size (300 hours, 2.4 million words) by adding speech recordings and transcriptions from two related initiatives, the Gos VideoLectures corpus of public academic speech, and the Artur speech recognition database. We describe this process by first presenting the criteria guiding the balanced selection of the newly added data and the challenges encountered when merging language resources with divergent designs, followed by the presentation of other major enhancements of the new Gos corpus, such as improvements in lemmatization and morphosyntactic annotation, word-level speech alignment, a new XML schema and the development of a specialized online concordancer. @@ -8256,7 +8256,7 @@ Granular Change Accuracy: A More Accurate Performance Metric for Dialogue State Tracking TahaAksu - NancyChen + NancyChen 7939–7948 Current metrics for evaluating Dialogue State Tracking (DST) systems exhibit three primary limitations. They: i) erroneously presume a uniform distribution of slots throughout the dialog, ii) neglect to assign partial scores for individual turns, iii) frequently overestimate or underestimate performance by repeatedly counting the models’ successful or failed predictions. To address these shortcomings, we introduce a novel metric: Granular Change Accuracy (GCA). GCA focuses on evaluating the predicted changes in dialogue state over the entire dialogue history. Benchmarking reveals that GCA effectively reduces biases arising from distribution uniformity and the positioning of errors across turns, resulting in a more precise evaluation. Notably, we find that these biases are particularly pronounced when evaluating few-shot or zero-shot trained models, becoming even more evident as the model’s error rate increases. Hence, GCA offers significant promise, particularly for assessing models trained with limited resources. Our GCA implementation is a useful addition to the pool of DST metrics. 2024.lrec-main.699 @@ -8385,7 +8385,7 @@ SiqiShen ZekunWang WinstonWu - RadaMihalcea + RadaMihalcea 8050–8094 Recent progress in large language models (LLMs) has enabled the deployment of many generative NLP applications. At the same time, it has also led to a misleading public discourse that “it’s all been solved.” Not surprisingly, this has, in turn, made many NLP researchers – especially those at the beginning of their careers – worry about what NLP research area they should focus on. Has it all been solved, or what remaining questions can we work on regardless of LLMs? To address this question, this paper compiles NLP research directions rich for exploration. We identify fourteen different research areas encompassing 45 research directions that require new research and are not directly solvable by LLMs. While we identify many research areas, many others exist; we do not cover areas currently addressed by LLMs, but where LLMs lag behind in performance or those focused on LLM development. We welcome suggestions for other research directions to include: https://bit.ly/nlp-era-llm. 2024.lrec-main.708 @@ -8487,7 +8487,7 @@ How Do Hyenas Deal with Human Speech? Speech Recognition and Translation with <fixed-case>C</fixed-case>onf<fixed-case>H</fixed-case>yena MarcoGaido SaraPapi - MatteoNegri + MatteoNegri LuisaBentivogli 8184–8191 The attention mechanism, a cornerstone of state-of-the-art neural models, faces computational hurdles in processing long sequences due to its quadratic complexity. Consequently, research efforts in the last few years focused on finding more efficient alternatives. Among them, Hyena (Poli et al., 2023) stands out for achieving competitive results in both language modeling and image classification, while offering sub-quadratic memory and computational complexity. Building on these promising results, we propose ConfHyena, a Conformer whose encoder self-attentions are replaced with an adaptation of Hyena for speech processing, where the long input sequences cause high computational costs. Through experiments in automatic speech recognition (for English) and translation (from English into 8 target languages), we show that our best ConfHyena model significantly reduces the training time by 27%, at the cost of minimal quality degradation (∼1%), which, in most cases, is not statistically significant. @@ -8531,7 +8531,7 @@ How Important Is Tokenization in <fixed-case>F</fixed-case>rench Medical Masked Language Models? YanisLabrak AdrienBazoge - BéatriceDaille + BéatriceDaille MickaelRouvier RichardDufour 8223–8234 @@ -8542,7 +8542,7 @@ How Large Language Models Encode Context Knowledge? A Layer-Wise Probing Study TianjieJu - WeiweiSun + WeiweiSun WeiDu XinweiYuan ZhaochunRen @@ -8569,7 +8569,7 @@ Venkata SahithBathini NiloyGanguly PawanGoyal - MayankSingh + MayankSingh 8258–8264 Question-answering (QA) on hybrid scientific tabular and textual data deals with scientific information, and relies on complex numerical reasoning. In recent years, while tabular QA has seen rapid progress, understanding their robustness on scientific information is lacking due to absence of any benchmark dataset. To investigate the robustness of the existing state-of-the-art QA models on scientific hybrid tabular data, we propose a new dataset, “SciTabQA”, consisting of 822 question-answer pairs from scientific tables and their descriptions. With the help of this dataset, we assess the state-of-the-art Tabular QA models based on their ability (i) to use heterogeneous information requiring both structured data (table) and unstructured data (text) and (ii) to perform complex scientific reasoning tasks. In essence, we check the capability of the models to interpret scientific tables and text. Our experiments show that “SciTabQA” is an innovative dataset to study question-answering over scientific heterogeneous data. We benchmark three state-of-the-art Tabular QA models, and find that the best F1 score is only 0.462. 2024.lrec-main.724 @@ -8578,7 +8578,7 @@ How Speculative Can Speculative Decoding Be? ZhuoruiLiu - ChenZhang + ChenZhang DaweiSong 8265–8275 Large language models (LLMs) have drawn great attention from the field of natural language processing and beyond, due to their impressive capability of autoregressive modeling, yet bringing an obvious problem, i.e., the largely increased latency. An emerging idea to alleviate this problem is speculative decoding, which first uses a draft model to draft tokens autoregressively and then makes the target model verify these tokens in parallel. The draft model is typically smaller than the target model, and it essentially trades generation quality for speed. Thereby, speculative decoding can be viewed as a speculative game for the target model in term of verification failures. That is, the lengthy draft tokens proposed by the small draft models could fail in the verification stage. Naturally, a critical question arises: how speculative can speculative decoding be, or in other words, how small can an adequate draft model be and how large can an appropriate number of draft tokens be? This work aims to investigate these questions and demonstrate how the scale of the draft model and the number of draft tokens would have an impact on the overall latency of the speculative decoding. We theoretically show that neither of above two factors will be infinitely speculative. Namely, there is a certain turning point for each of them. We then empirically show that the scale of the draft model could be 10-20\times smaller than the target model and the optimal number of draft tokens should lie in 3-5. @@ -8601,7 +8601,7 @@ How to Do Politics with Words: Investigating Speech Acts in Parliamentary Debates InesReinig InesRehbein - Simone PaoloPonzetto + Simone PaoloPonzetto 8287–8300 This paper presents a new perspective on framing through the lens of speech acts and investigates how politicians make use of different pragmatic speech act functions in political debates. To that end, we created a new resource of German parliamentary debates, annotated with fine-grained speech act types. Our hierarchical annotation scheme distinguishes between cooperation and conflict communication, further structured into six subtypes, such as informative, declarative or argumentative-critical speech acts, with 14 fine-grained classes at the lowest level. We present classification baselines on our new data and show that the fine-grained classes in our schema can be predicted with an avg. F1 of around 82.0%. We then use our classifier to analyse the use of speech acts in a large corpus of parliamentary debates over a time span from 2003–2023. 2024.lrec-main.727 @@ -8615,7 +8615,7 @@ GustavKristensen Marie HaahrPetersen Robvan der Goot - BarbaraPlank + BarbaraPlank 8301–8306 Current language models require a lot of training data to obtain high performance. For Relation Classification (RC), many datasets are domain-specific, so combining datasets to obtain better performance is non-trivial. We explore a multi-domain training setup for RC, and attempt to improve performance by encoding domain information. Our proposed models improve > 2 Macro-F1 against the baseline setup, and our analysis reveals that not all the labels benefit the same: The classes which occupy a similar space across domains (i.e., their interpretation is close across them, for example “physical”) benefit the least, while domain-dependent relations (e.g., “part-of”) improve the most when encoding domain information. 2024.lrec-main.728 @@ -8624,7 +8624,7 @@ How to Solve Few-Shot Abusive Content Detection Using the Data We Actually Have ViktorHangya - AlexanderFraser + AlexanderFraser 8307–8322 Due to the broad range of social media platforms, the requirements of abusive language detection systems are varied and ever-changing. Already a large set of annotated corpora with different properties and label sets were created, such as hate or misogyny detection, but the form and targets of abusive speech are constantly evolving. Since, the annotation of new corpora is expensive, in this work we leverage datasets we already have, covering a wide range of tasks related to abusive language detection. Our goal is to build models cheaply for a new target label set and/or language, using only a few training examples of the target domain. We propose a two-step approach: first we train our model in a multitask fashion. We then carry out few-shot adaptation to the target requirements. Our experiments show that using already existing datasets and only a few-shots of the target task the performance of models improve both monolingually and across languages. Our analysis also shows that our models acquire a general understanding of abusive language, since they improve the prediction of labels which are present only in the target dataset and can benefit from knowledge about labels which are not directly used for the target task. 2024.lrec-main.729 @@ -8635,7 +8635,7 @@ JiaminLuo JianingZhao JingjingWang - GuodongZhou + GuodongZhou 8323–8333 Weakly-supervised Phrase Grounding (WPG) is an emerging task of inferring the fine-grained phrase-region matching, while merely leveraging the coarse-grained sentence-image pairs for training. However, existing studies on WPG largely ignore the implicit phrase-region matching relations, which are crucial for evaluating the capability of models in understanding the deep multimodal semantics. To this end, this paper proposes an Implicit-Enhanced Causal Inference (IECI) approach to address the challenges of modeling the implicit relations and highlighting them beyond the explicit. Specifically, this approach leverages both the intervention and counterfactual techniques to tackle the above two challenges respectively. Furthermore, a high-quality implicit-enhanced dataset is annotated to evaluate IECI and detailed evaluations show the great advantages of IECI over the state-of-the-art baselines. Particularly, we observe an interesting finding that IECI outperforms the advanced multimodal LLMs by a large margin on this implicit-enhanced dataset, which may facilitate more research to evaluate the multimodal LLMs in this direction. 2024.lrec-main.730 @@ -8645,7 +8645,7 @@ How Well Can <fixed-case>BERT</fixed-case> Learn the Grammar of an Agglutinative and Flexible-Order Language? The Case of <fixed-case>B</fixed-case>asque. GorkaUrbizu MuitzeZulaika - XabierSaralegi + XabierSaralegi AnderCorral 8334–8348 This work investigates the acquisition of formal linguistic competence by neural language models, hypothesizing that languages with complex grammar, such as Basque, present substantial challenges during the pre-training phase. Basque is distinguished by its complex morphology and flexible word order, potentially complicating grammar extraction. In our analysis, we evaluated the grammatical knowledge of BERT models trained under various pre-training configurations, considering factors such as corpus size, model size, number of epochs, and the use of lemmatization. To assess this grammatical knowledge, we constructed the BL2MP (Basque L2 student-based Minimal Pairs) test set. This test set consists of minimal pairs, each containing both a grammatically correct and an incorrect sentence, sourced from essays authored by students at different proficiency levels in the Basque language. Additionally, our analysis explores the difficulties in learning various grammatical phenomena, the challenges posed by flexible word order, and the influence of the student’s proficiency level on the difficulty of correcting grammar errors. @@ -8670,7 +8670,7 @@ László JánosLaki NoémiVadász Zijian GyőzőYang - TamásVáradi + TamásVáradi 8360–8371 The paper introduces the Hungarian Language Understanding (HuLU) benchmark, a comprehensive assessment framework designed to evaluate the performance of neural language models on Hungarian language tasks. Inspired by the renowned GLUE and SuperGLUE benchmarks, HuLU aims to address the challenges specific to Hungarian language processing. The benchmark consists of various datasets, each representing different linguistic phenomena and task complexities. Moreover, the paper presents a web service developed for HuLU, offering a user-friendly interface for model evaluation. This platform not only ensures consistent assessment but also fosters transparency by maintaining a leaderboard showcasing model performances. Preliminary evaluations of various LMMs on HuLU datasets indicate that while Hungarian models show promise, there’s room for improvement to match the proficiency of English-centric models in their native language. 2024.lrec-main.733 @@ -8722,7 +8722,7 @@ Humanitarian Corpora for <fixed-case>E</fixed-case>nglish, <fixed-case>F</fixed-case>rench and <fixed-case>S</fixed-case>panish LorynIsaacs SantiagoChambó - PilarLeón-Araúz + PilarLeón-Araúz 8418–8426 This paper presents three corpora of English, French and Spanish humanitarian documents compiled with reports obtained from ReliefWeb through its API. ReliefWeb is a leading database of humanitarian documents operated by the UN Office for the Coordination of Humanitarian Affairs (OCHA). To compile these corpora, documents were selected with language identification and noise reduction techniques. They were subsequently tokenized, lemmatized, tagged by part of speech, and enriched with metadata for use by linguists in corpus query software. These corpora were compiled to satisfy the research needs of the Humanitarian Encyclopedia, a project with a focus on conceptual variation. However, they can also be useful for other humanitarian endeavors, whether they are research- or practitioner-oriented; the source code for generating the corpora is available on GitHub. To compare materials, an exploratory analysis of definitional and generic-specific information was conducted for the concept of ARMED ACTOR with lexical data extracted from an English legacy corpus (where the concept is underrepresented) as well as on the new English and Spanish corpora. Lexical data were compared among corpora and presented by means of online data visualization to illustrate its potential to inform conceptual modelling. 2024.lrec-main.738 @@ -8742,8 +8742,8 @@ Humans Need Context, What about Machines? Investigating Conversational Context in Abusive Language Detection TomBourgeade ZongminLi - FarahBenamara - VéroniqueMoriceau + FarahBenamara + VéroniqueMoriceau JianSu AixinSun 8438–8452 @@ -8756,7 +8756,7 @@ Wolfgang S.Schmeisser-Nieto PolPastells SimonaFrenda - MarionaTaule + MarionaTaule 8453–8463 The increasing popularity of natural language processing has led to a race to improve machine learning models that often leaves aside the core study object, the language itself. In this study, we present classification models designed to detect stereotypes related to immigrants, along with both quantitative and qualitative analyses, shedding light on linguistic distinctions in how humans and various models perceive stereotypes. Given the subjective nature of this task, one of the models incorporates the judgments of all annotators by utilizing soft labels. Through a comparative analysis of BERT-based models using both hard and soft labels, along with predictions from GPT-4, we gain a clearer understanding of the linguistic challenges posed by texts containing stereotypes. Our dataset comprises Spanish Twitter posts collected as responses to immigrant-related hoaxes, annotated with binary values indicating the presence of stereotypes, implicitness, and the requirement for conversational context to understand the stereotype. Our findings suggest that both model prediction confidence and inter-annotator agreement are higher for explicit stereotypes, while stereotypes conveyed through irony and other figures of speech prove more challenging to detect than other implicit stereotypes. 2024.lrec-main.741 @@ -8764,7 +8764,7 @@ Hybrid of Spans and Table-Filling for Aspect-Level Sentiment Triplet Extraction - MinghuaNuo + MinghuaNuo ChaofanGuo 8464–8473 Aspect Sentiment Triplet Extraction (ASTE) has become an emerging task in sentiment analysis research. Recently, researchers have proposed different tagging schemes, containing tagging of words, tagging of word pairs, and tagging of spans. However, the first two of these methods are often insufficient for the identification of multi-word terms, while the span tagging can label the entire phrase span, but it lacks the interactive information between words. In this paper, we propose Span in Table(S&T) model which combining span with table-filling. Specifically, S&T model achieve full fusion of syntactic and contextual features through cross-attention and generate the structures of word-pair table through Biaffine. Then, our model converts it to a span table by computing semantic distance based on syntactic dependency tree, which can enrich each unit of span table with semantic and interactive information. Meanwhile, the initial sentence features are constructed as simple phrase tables to enhance textual information of the phrase itself. In decoding, we define 8 types of labels for identifying three dimensions including aspect, opinion, and sentiment. Finally, the extensive experiments on D2 dataset show S&T model achieves competitive results in ASTE task, the results certify the effectiveness and robustness of our S&T model. @@ -8830,7 +8830,7 @@ <fixed-case>HYRR</fixed-case>: Hybrid Infused Reranking for Passage Retrieval JingLu - KeithHall + KeithHall JiMa JianmoNi 8528–8534 @@ -8879,7 +8879,7 @@ Foivos IoannisTzavellos Bas MarcoGöritzer Marijnten Thij - RizaBatista-Navarro + RizaBatista-Navarro 8569–8579 Idiomatic expressions are used in everyday language and typically convey affect, i.e., emotion. However, very little work investigating the extent to which automated methods can recognise emotions expressed in idiom-containing text has been undertaken. This can be attributed to the lack of emotion-labelled datasets that support the development and evaluation of such methods. In this paper, we present the IDioms with EMotions (IDEM) dataset consisting of a total of 9685 idiom-containing sentences that were generated and labelled with any one of 36 emotion types, with the help of the GPT-4 generative language model. Human validation by two independent annotators showed that more than 51% of the generated sentences are ideal examples, with the annotators reaching an agreement rate of 62% measured in terms of Cohen’s Kappa coefficient. To establish baseline performance on IDEM, various transformer-based emotion recognition approaches were implemented and evaluated. Results show that a RoBERTa model fine-tuned as a sequence classifier obtains a weighted F1-score of 58.73%, when the sequence provided as input specifies the idiom contained in a given sentence, together with its definition. Since this input configuration is based on the assumption that the idiom contained in the given sentence is already known, we also sought to assess the feasibility of automatically identifying the idioms contained in IDEM sentences. To this end, a hybrid idiom identification approach combining a rule-based method and a deep learning-based model was developed, whose performance on IDEM was determined to be 84.99% in terms of F1-score. 2024.lrec-main.752 @@ -8887,7 +8887,7 @@ Identifying and Aligning Medical Claims Made on Social Media with Medical Evidence - AnthonyHughes + AnthonyHughes XingyiSong 8580–8593 Evidence-based medicine is the practise of making medical decisions that adhere to the latest, and best known evidence at that time. Currently, the best evidence is often found in the form of documents, such as randomized control trials, meta-analyses and systematic reviews. This research focuses on aligning medical claims made on social media platforms with this medical evidence. By doing so, individuals without medical expertise can more effectively assess the veracity of such medical claims. We study three core tasks: identifying medical claims, extracting medical vocabulary from these claims, and retrieving evidence relevant to those identified medical claims. We propose a novel system that can generate synthetic medical claims to aid each of these core tasks. We additionally introduce a novel dataset produced by our synthetic generator that, when applied to these tasks, demonstrates not only a more flexible and holistic approach, but also an improvement in all comparable metrics. We make our dataset, the Expansive Medical Claim Corpus (EMCC), available at https://zenodo.org/records/8321460. @@ -8918,7 +8918,7 @@ Ideological Knowledge Representation: Framing Climate Change in <fixed-case>E</fixed-case>co<fixed-case>L</fixed-case>exicon ArianneReimerink MelaniaCabezas-García - PilarLeón-Araúz + PilarLeón-Araúz PamelaFaber 8617–8626 Culture is underrepresented in terminological resources and ideology is an especially complicated cultural aspect to convey. This complexity stems from the intertwined relationships among the discourse community of politicians, the media and the general public, as well as their interactions with scientific knowledge. Nevertheless, terminological resources should provide the necessary information to understand the political perspective taken in discourse on scientific issues with a high political profile. As in all specialized domains, environmental concepts and terms are subject to dynamism and variation (León-Araúz, 2017). Cognitive term variants (e.g., climate change, climate crisis) are of particular interest because of their presence in political discourse and their potential to influence climate actions. They can be used to reflect multidimensionality, imprecision or ideological attachment. This paper describes a method based on framing in Communication Studies to extract ideological knowledge from corpora. We used Spanish and English parliamentary debates (ParlaMint 2.1) and annotated the interventions that included a term variant of climate change according to an adapted version of the frames proposed by Bolsen and Shapiro (2018). The results showed how climate change discourse changes across de ideological spectrum and we give a proposal on how to represent that knowledge in an environmental TKB on the environment. @@ -8966,7 +8966,7 @@ Impoverished Language Technology: The Lack of (Social) Class in <fixed-case>NLP</fixed-case> AmandaCercas Curry - ZeerakTalat + ZeerakTalat DirkHovy 8675–8682 Since Labov’s foundational 1964 work on the social stratification of language, linguistics has dedicated concerted efforts towards understanding the relationships between socio-demographic factors and language production and perception. Despite the large body of evidence identifying significant relationships between socio-demographic factors and language production, relatively few of these factors have been investigated in the context of NLP technology. While age and gender are well covered, Labov’s initial target, socio-economic class, is largely absent. We survey the existing Natural Language Processing (NLP) literature and find that only 20 papers even mention socio-economic status. However, the majority of those papers do not engage with class beyond collecting information of annotator-demographics. Given this research lacuna, we provide a definition of class that can be operationalised by NLP researchers, and argue for including socio-economic class in future language technologies. @@ -8977,7 +8977,7 @@ Improved Neural Protoform Reconstruction via Reflex Prediction LiangLu JingzhiWang - David R.Mortensen + David R.Mortensen 8683–8707 Protolanguage reconstruction is central to historical linguistics. The comparative method, one of the most influential theoretical and methodological frameworks in the history of the language sciences, allows linguists to infer protoforms (reconstructed ancestral words) from their reflexes (related modern words) based on the assumption of regular sound change. Not surprisingly, numerous computational linguists have attempted to operationalize comparative reconstruction through various computational models, the most successful of which have been supervised encoder-decoder models, which treat the problem of predicting protoforms given sets of reflexes as a sequence-to-sequence problem. We argue that this framework ignores one of the most important aspects of the comparative method: not only should protoforms be inferable from cognate sets (sets of related reflexes) but the reflexes should also be inferable from the protoforms. Leveraging another line of research—reflex prediction—we propose a system in which candidate protoforms from a reconstruction model are reranked by a reflex prediction model. We show that this more complete implementation of the comparative method allows us to surpass state-of-the-art protoform reconstruction methods on three of four Chinese and Romance datasets. 2024.lrec-main.762 @@ -9155,7 +9155,7 @@ Improving Personalized Sentiment Representation with Knowledge-enhanced and Parameter-efficient Layer Normalization YouZhang JinWang - Liang-ChihYu + Liang-ChihYu DanXu XuejieZhang 8877–8889 @@ -9214,7 +9214,7 @@ Improving the Robustness of Large Language Models via Consistency Alignment YukunZhao LingyongYan - WeiweiSun + WeiweiSun GuoliangXing ShuaiqiangWang ChongMeng @@ -9284,7 +9284,7 @@ Incorporating Word-level Phonemic Decoding into Readability Assessment ChristinePinney - CaseyKennington + CaseyKennington Maria SoledadPera KatherineLandau Wright Jerry AlanFails @@ -9298,7 +9298,7 @@ SohomGhosh ArnabMaji AswarthaNarayana - Sudip KumarNaskar + Sudip KumarNaskar 9010–9018 Applications of Natural Language Processing (NLP) in the finance domain have been very popular of late. For financial NLP, (FinNLP) while various datasets exist for widely spoken languages like English and Chinese, datasets are scarce for low resource languages,particularly for Indian languages. In this paper, we address this challenges by presenting IndicFinNLP – a collection of 9 datasets consisting of three tasks relating to FinNLP for three Indian languages. These tasks are Exaggerated Numeral Detection, Sustainability Classification, and ESG Theme Determination of financial texts in Hindi, Bengali, and Telugu. Moreover, we release the datasets under CC BY-NC-SA 4.0 license for the benefit of the research community. 2024.lrec-main.789 @@ -9317,7 +9317,7 @@ <fixed-case>I</fixed-case>ndirect<fixed-case>QA</fixed-case>: Understanding Indirect Answers to Implicit Polar Questions in <fixed-case>F</fixed-case>rench and <fixed-case>S</fixed-case>panish ChristinMüller - BarbaraPlank + BarbaraPlank 9025–9035 Polar questions are common in dialogue and expect exactly one of two answers (yes/no). It is however not uncommon for speakers to bypass these expected choices and answer, for example, “Islands are generally by the sea” to the question: “An island? By the sea?”. While such answers are natural in spoken dialogues, conversational systems still struggle to interpret them. Seminal work to interpret indirect answers were made in recent years—but only for English and with strict question formulations. In this work, we present a new corpus for French and Spanish—IndirectQA —where we mine subtitle data for indirect answers to study the labeling task with six different labels, while broadening polar questions to include also implicit polar questions (statements that trigger a yes/no-answer which are not necessarily formulated as a question). We opted for subtitles since they are a readily available source of conversation in various languages, but also come with peculiarities and challenges which we will discuss. Overall, we provide the first results on French and Spanish. They show that the task is challenging: the baseline accuracy scores drop from 61.43 on English to 44.06 for French and Spanish. 2024.lrec-main.791 @@ -9386,7 +9386,7 @@ AnuragShukla TanujaGanu VivekSeshadri - SandipanDandapat + SandipanDandapat MonojitChoudhury KalikaBali 9097–9109 @@ -9438,7 +9438,7 @@ Intention and Face in Dialog AdilSoubki - OwenRambow + OwenRambow 9143–9153 The notion of face described by Brown and Levinson (1987) has been studied in great detail, but a critical aspect of the framework, that which focuses on how intentions mediate the planning of turns which impose upon face, has received far less attention. We present an analysis of three computational systems trained for classifying both intention and politeness, focusing on how the former influences the latter. In politeness theory, agents attend to the desire to have their wants appreciated (positive face), and a complementary desire to act unimpeded and maintain freedom (negative face). Similar to speech acts, utterances can perform so-called face acts which can either raise or threaten the positive or negative face of the speaker or hearer. We begin by using an existing corpus to train a model which classifies face acts, achieving a new SoTA in the process. We then observe that every face act has an underlying intention that motivates it and perform additional experiments integrating dialog act annotations to provide these intentions by proxy. Our analysis finds that dialog acts improve performance on face act detection for minority classes and points to a close relationship between aspects of face and intent. 2024.lrec-main.801 @@ -9518,7 +9518,7 @@ AnneVilnat SofianeEttayeb LouisTamames - PatrickParoubek + PatrickParoubek 9234–9244 We present a new question answering corpus in French designed to educational domain. To be useful in such domain, we have to propose more complex questions and to be able to justify the answers on validated material. We analyze some properties of this corpus. The last part of this paper will be devoted to present the first experiments we have carried out to demonstrate the value of this dataset for learning a Retrieval Augmented Genration framework. Different experiments are proposed, with an automatic evaluation. A human evaluation is proposed to confirm or infirm this automatic evaluation. 2024.lrec-main.808 @@ -9562,7 +9562,7 @@ KolaTubosun AnuoluwapoAremu IroroOrife - David IfeoluwaAdelani + David IfeoluwaAdelani 9296–9303 We introduce ÌròyìnSpeech corpus—a new dataset influenced by a desire to increase the amount of high quality, freely available, contemporary Yorùbá speech data that can be used for both Text-to-Speech (TTS) and Automatic Speech Recognition (ASR) tasks. We curated about 23,000 text sentences from the news and creative writing domains with an open license i.e., CC-BY-4.0 and asked multiple speakers to record each sentence. To encourage more participatory approach to data creation, we provide 5 000 utterances from the curated sentences to the Mozilla Common Voice platform to crowd-source the recording and validation of Yorùbá speech data. In total, we created about 42 hours of speech data recorded by 80 volunteers in-house, and 6 hours validated recordings on Mozilla Common Voice platform. Our evaluation on TTS shows that we can create a good quality general domain single-speaker TTS model for Yorùbá with as little 5 hours of speech by leveraging an end-to-end VITS architecture. Similarly, for ASR, we obtained a WER of 21.5. 2024.lrec-main.812 @@ -9618,7 +9618,7 @@ <fixed-case>ISO</fixed-case> 24617-12: A New Standard for Semantic Annotation - HarryBunt + HarryBunt 9361–9371 This paper presents ISO 24617-12, an annotation scheme for quantification phenomena in natural language., as part of the ISO Semantic Annotation Framework (ISO 24617). This scheme combines ideas from the theory of generalised quantifiers, from neo-Davidsonian event semantics, and from Discourse Representation Theory. The scheme consists of (1) an abstract syntax which defines ‘annotation structures’ as triples and other set-theoretic constructs of quantification-related concepts; (2) a reference representation of annotation structures (‘concrete syntax’); and (3) a compositional semantics of annotation structures. Together, these components define the markup language QuantML. This paper focuses on the identification and structuring of the semantic information useful for the characterisation of quantification in natural language and the interoperable representation of these information structures in QuantML. 2024.lrec-main.818 @@ -9662,7 +9662,7 @@ <fixed-case>IT</fixed-case>2<fixed-case>ACL</fixed-case> Learning Easy-to-Hard Instructions via 2-Phase Automated Curriculum Learning for Large Language Models YufeiHuang - DeyiXiong + DeyiXiong 9405–9421 Instruction tuning has demonstrated its superiority in unlocking the abilities of pre-trained large language models (LLMs), including their capability to respond to diverse human instructions and conduct complex reasoning. In order to further enhance the continuous learning capabilities of pre-trained LLMs, we explore the training process of instruction tuning through the lens of task sequences. We propose a 2-phase automated curriculum learning guided instruction tuning framework, IT2ACL that learns easy-to-hard instructions for LLMs in a self-adjusting dynamic manner. To facilitate curriculum learning from instructions, we propose a loss-driven progress signal for two-phase strategies: instruction prediction gain that decides the instruction level syllabus. Through comprehensive experiments on 70 Chinese datasets which have been grouped into 16 distinct task clusters, we demonstrate the effectiveness of our approach in eliciting latent ability in pre-trained LLMs and achieving superior performance across diverse tasks. 2024.lrec-main.822 @@ -9689,7 +9689,7 @@ It’s Not under the Lamppost: Expanding the Reach of Conversational <fixed-case>AI</fixed-case> ChristyDoran - Deborah A.Dahl + Deborah A.Dahl 9441–9451 Generic commercial language-based assistants have become ubiquitously available, originally in the form of smart speakers and mobile apps, and more recently in the form of systems based on generative AI. At first glance, their capabilities seem remarkable. Speech recognition works well, NLU mostly works, and access to back-end information sources is usually quite good. However, there is still a lot of work to be done. In the area of NLU in particular, focused probes into the capabilities of language-based assistants easily reveal significant areas of brittleness that demonstrate large gaps in their coverage. For example, the straightforward disjunctive query is this monday or tuesday elicited the nonsensical response it’s 2:50 p.m. many consider it to be the afternoon. These gaps are difficult to identify if the development process relies on training the system with an ongoing supply of natural user data, because this natural data can become distorted by a self-reinforcing feedback loop where the system ‘trains’ the user to produce data that works. This paper describes a process for collecting specific kinds of data to uncover these gaps and an annotation scheme for system responses, and includes examples of simple utterances that nonetheless fail to be correctly processed. The systems tested include both Conventional assistants, such as Amazon Alexa and Google Assistant, as well as GenAI systems, including ChatGPT and Bard/Gemini. We claim that these failures are due to a lack of attention to the full spectrum of input possibilities, and argue that systems would benefit from the inclusion of focused manual assessment to directly target likely gaps. 2024.lrec-main.825 @@ -9700,7 +9700,7 @@ MasaakiNagata MakotoMorishita KatsukiChousa - NorihitoYasuda + NorihitoYasuda 9452–9462 We constructed JaParaPat (Japanese-English Parallel Patent Application Corpus), a bilingual corpus of more than 300 million Japanese-English sentence pairs from patent applications published in Japan and the United States from 2000 to 2021. We obtained the publication of unexamined patent applications from the Japan Patent Office (JPO) and the United States Patent and Trademark Office (USPTO). We also obtained patent family information from the DOCDB, that is a bibliographic database maintained by the European Patent Office (EPO). We extracted approximately 1.4M Japanese-English document pairs, which are translations of each other based on the patent families, and extracted about 350M sentence pairs from the document pairs using a translation-based sentence alignment method whose initial translation model is bootstrapped from a dictionary-based sentence alignment. We experimentally improved the accuracy of the patent translations by 20 bleu points by adding more than 300M sentence pairs obtained from patent applications to 22M sentence pairs obtained from the web. 2024.lrec-main.826 @@ -9720,12 +9720,12 @@ Felix E.Herron MagaliNorré Massih RAmini - PierretteBouillon - IrisEshkol-Taravella + PierretteBouillon + IrisEshkol-Taravella EmmanuelleEsperança-Rodier ThomasFrançois LorraineGoeuriot - JérômeGoulian + JérômeGoulian MathieuLafourcade BenjaminLecouteux FrançoisPortet @@ -9853,7 +9853,7 @@ BinLiang XianWu YefengZheng - Kam-FaiWong + Kam-FaiWong 9578–9588 Dialogue policy learning (DPL) aims to determine an abstract representation (also known as action) to guide what the response should be. Typically, DPL is cast as a sequential decision problem across a series of predefined action candidates. However, such static and narrow actions can limit response diversity and impede the dialogue agent’s adaptability to new scenarios and edge cases. To overcome these challenges, we introduce a novel Joint Transformer Reinforcement Learning framework, coined as JoTR, where a text-to-text Transformer-based model is employed to directly generate dialogue actions. More concretely, JoTR formulates a token-grained policy, facilitating more dynamic and adaptable dialogue action generation without the need for predefined action candidates. This method not only enhances the diversity of responses but also significantly improves the system’s capability to manage unfamiliar scenarios. Furthermore, JoTR utilizes Reinforcement Learning with a reward-shaping mechanism to efficiently fine-tune the token-grained policy. This allows the model to evolve through interactions, thereby enhancing its performance over time. Our extensive evaluation demonstrates that JoTR surpasses previous state-of-the-art models, showing improvements of 9% and 13% in success rate, and 34% and 37% in the diversity of dialogue actions across two benchmark dialogue modeling tasks respectively. These results have been validated by both user simulators and human evaluators. Code and data are available at ://github.com/KwanWaiChung/JoTR. 2024.lrec-main.837 @@ -9936,8 +9936,8 @@ YilinWang MinghaoHu ZhenHuang - DongshengLi - DongYang + DongshengLi + DongYang XichengLu 9668–9680 The goal of knowledge graph completion (KGC) is to predict missing facts among entities. Previous methods for KGC re-ranking are mostly built on non-generative language models to obtain the probability of each candidate. Recently, generative large language models (LLMs) have shown outstanding performance on several tasks such as information extraction and dialog systems. Leveraging them for KGC re-ranking is beneficial for leveraging the extensive pre-trained knowledge and powerful generative capabilities. However, it may encounter new problems when accomplishing the task, namely mismatch, misordering and omission. To this end, we introduce KC-GenRe, a knowledge-constrained generative re-ranking method based on LLMs for KGC. To overcome the mismatch issue, we formulate the KGC re-ranking task as a candidate identifier sorting generation problem implemented by generative LLMs. To tackle the misordering issue, we develop a knowledge-guided interactive training method that enhances the identification and ranking of candidates. To address the omission issue, we design a knowledge-augmented constrained inference method that enables contextual prompting and controlled generation, so as to obtain valid rankings. Experimental results show that KG-GenRe achieves state-of-the-art performance on four datasets, with gains of up to 6.7% and 7.7% in the MRR and Hits@1 metric compared to previous methods, and 9.0% and 11.1% compared to that without re-ranking. Extensive analysis demonstrates the effectiveness of components in KG-GenRe. @@ -9991,7 +9991,7 @@ <fixed-case>KGC</fixed-case>onv, a Conversational Corpus Grounded in <fixed-case>W</fixed-case>ikidata QuentinBrabant - Lina M.Rojas Barahona + Lina M.Rojas Barahona GwénoléLecorvé ClaireGardent 9732–9742 @@ -10005,7 +10005,7 @@ DanielaJurášová MatúšŽilinec EduardŠubert - OndřejBojar + OndřejBojar 9743–9752 We present the Khan Academy Corpus totalling 10122 hours in 87394 recordings across 29 languages, where 43% of recordings (4252 hours) are equipped with human-written subtitles. The subtitle texts cover a total of 137 languages. The dataset was collected from open access Khan Academy lectures, benefiting from their manual transcripts and manual translations of the transcripts. The dataset can serve in creation or evaluation of multilingual speech recognition or translation systems, featuring a diverse set of subject domains. 2024.lrec-main.851 @@ -10192,7 +10192,7 @@ Konidioms Corpus: A Dataset of Idioms in <fixed-case>K</fixed-case>onkani Language Naziya MahamdulShaikh - Jyoti D.Pawar + Jyoti D.Pawar Mubarak BanuSayed 9932–9940 Konkani is a language spoken by a large number of people from the states located in the west coast of India. It is the official language of Goa state from the Indian subcontinent. Currently there is a lack of idioms corpus in the low-resource Konkani language. This paper aims to improve the progress in idiomatic sentence identification in order to enhance linguistic processing by creating the first corpus for idioms in the Konkani language. We select a unique list of 1597 idioms from multiple sources and proceed with a strictly controlled sentence creation procedure through crowdsourcing. This is followed by quality check of the sentences and annotation procedure by the experts in the Konkani language. We were able to build a good quality corpus comprising of 6520 sentences written in the Devanagari script of Konkani language. Analysis of the collected idioms and their usage in the created sentences revealed the dominance of selective domains like ‘human body’ in the creation and occurrences of idiomatic expressions in the Konkani language. This corpus is made publicly available. @@ -10341,7 +10341,7 @@ Language Models for Text Classification: Is In-Context Learning Enough? AleksandraEdwards - JoseCamacho-Collados + JoseCamacho-Collados 10058–10072 Recent foundational language models have shown state-of-the-art performance in many NLP tasks in zero- and few-shot settings. An advantage of these models over more standard approaches based on fine-tuning is the ability to understand instructions written in natural language (prompts), which helps them generalise better to different tasks and domains without the need for specific training data. This makes them suitable for addressing text classification problems for domains with limited amounts of annotated instances. However, existing research is limited in scale and lacks understanding of how text generation models combined with prompting techniques compare to more established methods for text classification such as fine-tuning masked language models. In this paper, we address this research gap by performing a large-scale evaluation study for 16 text classification datasets covering binary, multiclass, and multilabel problems. In particular, we compare zero- and few-shot approaches of large language models to fine-tuning smaller language models. We also analyse the results by prompt, classification type, domain, and number of labels. In general, the results show how fine-tuning smaller and more efficient language models can still outperform few-shot approaches of larger language models, which have room for improvement when it comes to text classification. 2024.lrec-main.879 @@ -10436,7 +10436,7 @@ Large Language Models Offer an Alternative to the Traditional Approach of Topic Modelling YidaMu ChunDong - KalinaBontcheva + KalinaBontcheva XingyiSong 10160–10171 Topic modelling, as a well-established unsupervised technique, has found extensive use in automatically detecting significant topics within a corpus of documents. However, classic topic modelling approaches (e.g., LDA) have certain drawbacks, such as the lack of semantic understanding and the presence of overlapping topics. In this work, we investigate the untapped potential of large language models (LLMs) as an alternative for uncovering the underlying topics within extensive text corpora. To this end, we introduce a framework that prompts LLMs to generate topics from a given set of documents and establish evaluation protocols to assess the clustering efficacy of LLMs. Our findings indicate that LLMs with appropriate prompts can stand out as a viable alternative, capable of generating relevant topic titles and adhering to human guidelines to refine and merge topics. Through in-depth experiments and evaluation, we summarise the advantages and constraints of employing LLMs in topic extraction. @@ -10629,9 +10629,9 @@ LinyuFan Wu WuYiheng JunXie - JunhuiLi + JunhuiLi FangKong - GuodongZhou + GuodongZhou 10336–10346 Thanks to the development of pre-trained sequence-to-sequence (seq2seq) models (e.g., BART), recent studies on AMR parsing often regard this task as a seq2seq translation problem by linearizing AMR graphs into AMR token sequences in pre-processing and recovering AMR graphs from sequences in post-processing. Seq2seq AMR parsing is a relatively simple paradigm but it unavoidably loses structural information among AMR tokens. To compensate for the loss of structural information, in this paper we explicitly leverage AMR structure in the decoding phase. Given an AMR graph, we first project the structure in the graph into an AMR token graph, i.e., structure among AMR tokens in the linearized sequence. The structures for an AMR token could be divided into two parts: structure in prediction history and structure in future. Then we propose to model structure in prediction history via a graph attention network (GAT) and learn structure in future via a multi-task scheme, respectively. Experimental results show that our approach significantly outperforms a strong baseline and achieves performance with 85.5 ±0.1 and 84.2 ±0.1 Smatch scores on AMR 2.0 and AMR 3.0, respectively 2024.lrec-main.903 @@ -10678,7 +10678,7 @@ HyeonseokMoon JaehyungSeo SugyeongEo - HeuiseokLim + HeuiseokLim 10380–10392 Counter-narrative generation, i.e., the generation of fact-based responses to hate speech with the aim of correcting discriminatory beliefs, has been demonstrated to be an effective method to combat hate speech. However, its effectiveness is limited by the resource-intensive nature of dataset construction processes and only focuses on the primary language. To alleviate this problem, we propose a Korean Hate Speech Counter Punch (KHSCP), a cost-effective counter-narrative generation method in the Korean language. To this end, we release the first counter-narrative generation dataset in Korean and pose two research questions. Under the questions, we propose an effective augmentation method and investigate the reasonability of a large language model to overcome data scarcity in low-resource environments by leveraging existing resources. In this regard, we conduct several experiments to verify the effectiveness of the proposed method. Our results reveal that applying pre-existing resources can improve the generation performance by a significant margin. Through deep analysis on these experiments, this work proposes the possibility of overcoming the challenges of generating counter-narratives in low-resource environments. 2024.lrec-main.907 @@ -10705,7 +10705,7 @@ JotsnaGowda BillDyer KevinTang - SarahMoeller + SarahMoeller 10403–10415 African American English (AAE) has received recent attention in the field of natural language processing (NLP). Efforts to address bias against AAE in NLP systems tend to focus on lexical differences. When the unique structures of AAE are considered, the solution is often to remove or neutralize the differences. This work leverages knowledge about the unique linguistic structures to improve automatic disambiguation of habitual and non-habitual meanings of “be” in naturally produced AAE transcribed speech. Both meanings are employed in AAE but examples of Habitual be are rare in already limited AAE data. Generally, representing additional syntactic information improves semantic disambiguation of habituality. Using an ensemble of classical machine learning models with a representation of the unique POS and dependency patterns of Habitual be, we show that integrating syntactic information improves the identification of habitual uses of “be” by about 65 F1 points over a simple baseline model of n-grams, and as much as 74 points. The success of this approach demonstrates the potential impact when we embrace, rather than neutralize, the structural uniqueness of African American English. 2024.lrec-main.909 @@ -10715,7 +10715,7 @@ Leveraging the Interplay between Syntactic and Acoustic Cues for Optimizing <fixed-case>K</fixed-case>orean <fixed-case>TTS</fixed-case> Pause Formation YejinJeon YunsuKim - Gary GeunbaeLee + Gary GeunbaeLee 10416–10421 Contemporary neural speech synthesis models have indeed demonstrated remarkable proficiency in synthetic speech generation as they have attained a level of quality comparable to that of human-produced speech. Nevertheless, it is important to note that these achievements have predominantly been verified within the context of high-resource languages such as English. Furthermore, the Tacotron and FastSpeech variants show substantial pausing errors when applied to the Korean language, which affects speech perception and naturalness. In order to address the aforementioned issues, we propose a novel framework that incorporates comprehensive modeling of both syntactic and acoustic cues that are associated with pausing patterns. Remarkably, our framework possesses the capability to consistently generate natural speech even for considerably more extended and intricate out-of-domain (OOD) sentences, despite its training on short audio clips. Architectural design choices are validated through comparisons with baseline models and ablation studies using subjective and objective metrics, thus confirming model performance. 2024.lrec-main.910 @@ -10763,7 +10763,7 @@ <fixed-case>LFED</fixed-case>: A Literary Fiction Evaluation Dataset for Large Language Models LinhaoYu QunLiu - DeyiXiong + DeyiXiong 10466–10475 The rapid evolution of large language models (LLMs) has ushered in the need for comprehensive assessments of their performance across various dimensions. In this paper, we propose LFED, a Literary Fiction Evaluation Dataset, which aims to evaluate the capability of LLMs on the long fiction comprehension and reasoning. We collect 95 literary fictions that are either originally written in Chinese or translated into Chinese, covering a wide range of topics across several centuries. We define a question taxonomy with 8 question categories to guide the creation of 1,304 questions. Additionally, we conduct an in-depth analysis to ascertain how specific attributes of literary fictions (e.g., novel types, character numbers, the year of publication) impact LLM performance in evaluations. Through a series of experiments involving various state-of-the-art LLMs, our findings reveal that these models face considerable challenges in effectively addressing questions related to literary fictions, with ChatGPT reaching only 57.08% under the zero-shot setting. The dataset will be publicly available at https://github.com/tjunlp-lab/LFED.git. 2024.lrec-main.915 @@ -10774,7 +10774,7 @@ ChuangLiu RenrenJin YuqiRen - DeyiXiong + DeyiXiong 10476–10487 Chinese Large Language Models (LLMs) have recently demonstrated impressive capabilities across various NLP benchmarks and real-world applications. However, the existing benchmarks for comprehensively evaluating these LLMs are still insufficient, particularly in terms of measuring knowledge that LLMs capture. Current datasets collect questions from Chinese examinations across different subjects and educational levels to address this issue. Yet, these benchmarks primarily focus on objective questions such as multiple-choice questions, leading to a lack of diversity in question types. To tackle this problem, we propose LHMKE, a Large-scale, Holistic, and Multi-subject Knowledge Evaluation benchmark in this paper. LHMKE is designed to provide a comprehensive evaluation of the knowledge acquisition capabilities of Chinese LLMs. It encompasses 10,465 questions across 75 tasks covering 30 subjects, ranging from primary school to professional certification exams. Notably, LHMKE includes both objective and subjective questions, offering a more holistic evaluation of the knowledge level of LLMs. We have assessed 11 Chinese LLMs under the zero-shot setting, which aligns with real examinations, and compared their performance across different subjects. We also conduct an in-depth analysis to check whether GPT-4 can automatically score subjective predictions. Our findings suggest that LHMKE is a challenging and advanced testbed for Chinese LLMs. 2024.lrec-main.916 @@ -10811,9 +10811,9 @@ EmmanuelleEsperança-Rodier RomaneGallienne Carlos-EmilianoGonzález-Gallardo - JérômeGoulian - Jose G.Moreno - AurélieNévéol + JérômeGoulian + Jose G.Moreno + AurélieNévéol DidierSchwab VincentSegonne JohannaSimoens @@ -10825,12 +10825,12 @@ Linear Cross-document Event Coreference Resolution with <fixed-case>X</fixed-case>-<fixed-case>AMR</fixed-case> Shafiuddin RehanAhmed - George ArthurBaker + George ArthurBaker EviJudge MichaelReagan KristinWright-Bettner - MarthaPalmer - James H.Martin + MarthaPalmer + James H.Martin 10517–10529 Event Coreference Resolution (ECR) as a pairwise mention classification task is expensive both for automated systems and manual annotations. The task’s quadratic difficulty is exacerbated when using Large Language Models (LLMs), making prompt engineering for ECR prohibitively costly. In this work, we propose a graphical representation of events, X-AMR, anchored around individual mentions using a cross-document version of Abstract Meaning Representation. We then linearize the ECR with a novel multi-hop coreference algorithm over the event graphs. The event graphs simplify ECR, making it a) LLM cost-effective, b) compositional and interpretable, and c) easily annotated. For a fair assessment, we first enrich an existing ECR benchmark dataset with these event graphs using an annotator-friendly tool we introduce. Then, we employ GPT-4, the newest LLM by OpenAI, for these annotations. Finally, using the ECR algorithm, we assess GPT-4 against humans and analyze its limitations. Through this research, we aim to advance the state-of-the-art for efficient ECR and shed light on the potential shortcomings of current LLMs at this task. Code and annotations: https://github.com/ahmeshaf/gpt_coref 2024.lrec-main.920 @@ -10863,8 +10863,8 @@ Linguistic Nudges and Verbal Interaction with Robots, Smart-Speakers, and Humans NataliaKalashnikova - IoanaVasilescu - LaurenceDevillers + IoanaVasilescu + LaurenceDevillers 10555–10564 This paper describes a data collection methodology and emotion annotation of dyadic interactions between a human, a Pepper robot, a Google Home smart-speaker, or another human. The collected 16 hours of audio recordings were used to analyze the propensity to change someone’s opinions about ecological behavior regarding the type of conversational agent, the kind of nudges, and the speaker’s emotional state. We describe the statistics of data collection and annotation. We also report the first results, which showed that humans change their opinions on more questions with a human than with a device, even against mainstream ideas. We observe a correlation between a certain emotional state and the interlocutor and a human’s propensity to be influenced. We also reported the results of the studies that investigated the effect of human likeness on speech using our data. 2024.lrec-main.923 @@ -10909,7 +10909,7 @@ Linking Judgement Text to Court Hearing Videos: <fixed-case>UK</fixed-case> <fixed-case>S</fixed-case>upreme <fixed-case>C</fixed-case>ourt as a Case Study HadeelSaadany - ConstantinOrasan + ConstantinOrasan SophieWalker CatherineBreslin 10598–10609 @@ -11058,7 +11058,7 @@ <fixed-case>L</fixed-case>o<fixed-case>NAS</fixed-case>: Elastic Low-Rank Adapters for Efficient Large Language Models - Juan PabloMunoz + Juan PabloMunoz JinjieYuan YiZheng NileshJain @@ -11092,7 +11092,7 @@ Look before You Leap: Dual Logical Verification for Knowledge-based Visual Question Generation XumengLiu WenyaGuo - YingZhang + YingZhang XuboLiu YuZhao ShenglongYu @@ -11114,7 +11114,7 @@ Low-Rank Prune-And-Factorize for Language Model Compression SiyuRen - Kenny Q.Zhu + Kenny Q.Zhu 10822–10832 The components underpinning PLMs—large weight matrices—were shown to bear considerable redundancy. Matrix factorization, a well-established technique from matrix theory, has been utilized to reduce the number of parameters in PLM. However, it fails to retain satisfactory performance under moderate to high compression rates. In this paper, we identify the full-rankness of fine-tuned PLM as the fundamental bottleneck for the failure of matrix factorization and explore the use of network pruning to extract low-rank sparsity pattern desirable to matrix factorization. We find such a low-rank sparsity pattern exclusively exists in models generated by first-order pruning, which motivates us to unite the two approaches and achieve more effective model compression. We further propose two techniques: sparsity-aware SVD and mixed-rank fine-tuning, which improve the initialization and training of the compression procedure, respectively. Experiments on GLUE and question-answering tasks show that the proposed method has a superior compression-performance trade-off compared to existing approaches. 2024.lrec-main.945 @@ -11135,7 +11135,7 @@ YangBai AnthonyColas ChristanGrant - ZheWang + ZheWang 10846–10857 In recent research, contrastive learning has proven to be a highly effective method for representation learning and is widely used for dense retrieval. However, we identify that relying solely on contrastive learning can lead to suboptimal retrieval performance. On the other hand, despite many retrieval datasets supporting various learning objectives beyond contrastive learning, combining them efficiently in multi-task learning scenarios can be challenging. In this paper, we introduce M3, an advanced recursive Multi-hop dense sentence retrieval system built upon a novel Multi-task Mixed-objective approach for dense text representation learning, addressing the aforementioned challenges. Our approach yields state-of-the-art performance on a large-scale open-domain fact verification benchmark dataset, FEVER. 2024.lrec-main.947 @@ -11172,7 +11172,7 @@ <fixed-case>M</fixed-case>a<fixed-case>C</fixed-case>m<fixed-case>S</fixed-case>: <fixed-case>M</fixed-case>agahi Code-mixed Dataset for Sentiment Analysis PriyaRani TheodorusFransen - John P.McCrae + John P.McCrae GauravNegi 10880–10890 The present paper introduces new sentiment data, MaCMS, for Magahi-Hindi-English (MHE) code-mixed language, where Magahi is a less-resourced minority language. This dataset is the first Magahi-Hindi-English code-mixed dataset for sentiment analysis tasks. Further, we also provide a linguistics analysis of the dataset to understand the structure of code-mixing and a statistical study to understand the language preferences of speakers with different polarities. With these analyses, we also train baseline models to evaluate the dataset’s quality. @@ -11196,7 +11196,7 @@ TerryRuas JeromeWaßmuth AndréGreiner-Petter - AkikoAizawa + AkikoAizawa BelaGipp TimoSpinde 10903–10920 @@ -11211,8 +11211,8 @@ VerenaBlaschke BarbaraKovačić SiyaoPeng - HinrichSchütze - BarbaraPlank + HinrichSchütze + BarbaraPlank 10921–10938 Despite the success of the Universal Dependencies (UD) project exemplified by its impressive language breadth, there is still a lack in ‘within-language breadth’: most treebanks focus on standard languages. Even for German, the language with the most annotations in UD, so far no treebank exists for one of its language varieties spoken by over 10M people: Bavarian. To contribute to closing this gap, we present the first multi-dialect Bavarian treebank (MaiBaam) manually annotated with part-of-speech and syntactic dependency information in UD, covering multiple text genres (wiki, fiction, grammar examples, social, non-fiction). We highlight the morphosyntactic differences between the closely-related Bavarian and German and showcase the rich variability of speakers’ orthographies. Our corpus includes 15k tokens, covering dialects from all Bavarian-speaking areas spanning three countries. We provide baseline parsing and POS tagging results, which are lower than results obtained on German and vary substantially between different graph-based parsers. To support further research on Bavarian syntax, we make our dataset, language-specific guidelines and code publicly available. 2024.lrec-main.953 @@ -11223,7 +11223,7 @@ Tyler K.Bikaun TimFrench MichaelStewart - WeiLiu + WeiLiu MelindaHodkiewicz 10939–10951 Maintenance short texts (MST), derived from maintenance work order records, encapsulate crucial information in a concise yet information-rich format. These user-generated technical texts provide critical insights into the state and maintenance activities of machines, infrastructure, and other engineered assets–pillars of the modern economy. Despite their importance for asset management decision-making, extracting and leveraging this information at scale remains a significant challenge. This paper presents MaintIE, a multi-level fine-grained annotation scheme for entity recognition and relation extraction, consisting of 5 top-level classes: PhysicalObject, State, Process, Activity and Property and 224 leaf entities, along with 6 relations tailored to MSTs. Using MaintIE, we have curated a multi-annotator, high-quality, fine-grained corpus of 1,076 annotated texts. Additionally, we present a coarse-grained corpus of 7,000 texts and consider its performance for bootstrapping and enhancing fine-grained information extraction. Using these corpora, we provide model performance measures for benchmarking automated entity recognition and relation extraction. The MaintIE scheme, corpus, and model are publicly available at https://github.com/nlp-tlp/maintie under the MIT license, encouraging further community exploration and innovation in extracting valuable insights from MSTs. @@ -11268,7 +11268,7 @@ Making Sentence Embeddings Robust to User-Generated Content LydiaNishimwe - BenoîtSagot + BenoîtSagot RachelBawden 10984–10998 NLP models have been known to perform poorly on user-generated content (UGC), mainly because it presents a lot of lexical variations and deviates from the standard texts on which most of these models were trained. In this work, we focus on the robustness of LASER, a sentence embedding model, to UGC data. We evaluate this robustness by LASER’s ability to represent non-standard sentences and their standard counterparts close to each other in the embedding space. Inspired by previous works extending LASER to other languages and modalities, we propose RoLASER, a robust English encoder trained using a teacher-student approach to reduce the distances between the representations of standard and UGC sentences. We show that with training only on standard and synthetic UGC-like data, RoLASER significantly improves LASER’s robustness to both natural and artificial UGC data by achieving up to 2x and 11x better scores. We also perform a fine-grained analysis on artificial UGC data and find that our model greatly outperforms LASER on its most challenging UGC phenomena such as keyboard typos and social media abbreviations. Evaluation on downstream tasks shows that RoLASER performs comparably to or better than LASER on standard data, while consistently outperforming it on UGC data. @@ -11290,9 +11290,9 @@ m<fixed-case>ALBERT</fixed-case>: Is a Compact Multilingual <fixed-case>BERT</fixed-case> Model Still Worth It? - ChristopheServan + ChristopheServan SaharGhannay - SophieRosset + SophieRosset 11023–11029 Within the current trend of Pretained Language Models (PLM), emerge more and more criticisms about the ethical and ecological impact of such models. In this article, considering these critical remarks, we propose to focus on smaller models, such as compact models like ALBERT, which are more ecologically virtuous than these PLM. However, PLMs enable huge breakthroughs in Natural Language Processing tasks, such as Spoken and Natural Language Understanding, classification, Question–Answering tasks. PLMs also have the advantage of being multilingual, and, as far as we know, a multilingual version of compact ALBERT models does not exist. Considering these facts, we propose the free release of the first version of a multilingual compact ALBERT model, pre-trained using Wikipedia data, which complies with the ethical aspect of such a language model. We also evaluate the model against classical multilingual PLMs in classical NLP tasks. Finally, this paper proposes a rare study on the subword tokenization impact on language performances. 2024.lrec-main.960 @@ -11353,7 +11353,7 @@ Mathematical Entities: Corpora and Benchmarks JacobCollard - Valeriade Paiva + Valeriade Paiva EswaranSubrahmanian 11080–11089 Mathematics is a highly specialized domain with its own unique set of challenges. Despite this, there has been relatively little research on natural language processing for mathematical texts, and there are few mathematical language resources aimed at NLP. In this paper, we aim to provide annotated corpora that can be used to study the language of mathematics in different contexts, ranging from fundamental concepts found in textbooks to advanced research mathematics. We preprocess the corpora with a neural parsing model and some manual intervention to provide part-of-speech tags, lemmas, and dependency trees. In total, we provide 182397 sentences across three corpora. We then aim to test and evaluate several noteworthy natural language processing models using these corpora, to show how well they can adapt to the domain of mathematics and provide useful tools for exploring mathematical language. We evaluate several neural and symbolic models against benchmarks that we extract from the corpus metadata to show that terminology extraction and definition extraction do not easily generalize to mathematics, and that additional work is needed to achieve good performance on these metrics. Finally, we provide a learning assistant that grants access to the content of these corpora in a context-sensitive manner, utilizing text search and entity linking. Though our corpora and benchmarks provide useful metrics for evaluating mathematical language processing, further work is necessary to adapt models to mathematics in order to provide more effective learning assistants and apply NLP methods to different mathematical domains. @@ -11375,7 +11375,7 @@ <fixed-case>MCIL</fixed-case>: Multimodal Counterfactual Instance Learning for Low-resource Entity-based Multimodal Information Extraction BaohangZhou - YingZhang + YingZhang KehuiSong HongruWang YuZhao @@ -11451,15 +11451,15 @@ <fixed-case>M</fixed-case>ed<fixed-case>MT</fixed-case>5: An Open-Source Multilingual Text-to-Text <fixed-case>LLM</fixed-case> for the Medical Domain IkerGarcía-Ferrero - RodrigoAgerri + RodrigoAgerri AitziberAtutxa Salazar ElenaCabrio Ikerde la Iglesia - AlbertoLavelli - BernardoMagnini + AlbertoLavelli + BernardoMagnini BenjaminMolinet JohanaRamirez-Romero - GermanRigau + GermanRigau Jose MariaVilla-Gonzalez SerenaVillata AndreaZaninello @@ -11480,7 +11480,7 @@ <fixed-case>M</fixed-case>emory<fixed-case>P</fixed-case>rompt: A Light Wrapper to Improve Context Tracking in Pre-trained Language Models Nathanael CarrazRakotonirina - MarcoBaroni + MarcoBaroni 11187–11195 Transformer-based language models (LMs) track contextual information through large, hard-coded input windows. We introduce MemoryPrompt, a leaner approach in which the LM is complemented by a small auxiliary recurrent network that passes information to the LM by prefixing its regular input with a sequence of vectors, akin to soft prompts, without requiring LM finetuning. Tested on a task designed to probe a LM’s ability to keep track of multiple fact updates, a MemoryPrompt-augmented LM outperforms much larger LMs that have access to the full input history. We also test MemoryPrompt on a long-distance dialogue dataset, where its performance is comparable to that of a model conditioned on the entire conversation history. In both experiments we also observe that, unlike full-finetuning approaches, MemoryPrompt does not suffer from catastrophic forgetting when adapted to new tasks, thus not disrupting the generalist capabilities of the underlying LM. 2024.lrec-main.976 @@ -11488,7 +11488,7 @@ <fixed-case>M</fixed-case>ental<fixed-case>H</fixed-case>elp: A Multi-Task Dataset for Mental Health in Social Media - NishatRaihan + NishatRaihan Sadiya Sayara ChowdhuryPuspo ShafkatFarabi Ana-MariaBucur @@ -11504,7 +11504,7 @@ Alba MaríaMármol Romero AdriánMoreno-Muñoz Flor MiriamPlaza-Del-Arco - M. DoloresMolina-González + M. DoloresMolina-González ArturoMontejo-Ráez 11204–11214 With mental health issues on the rise on the Web, especially among young people, there is a growing need for effective identification and intervention. In this paper, we introduce a new open-sourced corpus for the early detection of mental disorders in Spanish, focusing on eating disorders, depression, and anxiety. It consists of user messages posted on groups within the Telegram message platform and contains over 1,300 subjects with more than 45,000 messages posted in different public Telegram groups. This corpus has been manually annotated via crowdsourcing and is prepared for its use in several Natural Language Processing tasks including text classification and regression tasks. The samples in the corpus include both text and time data. To provide a benchmark for future research, we conduct experiments on text classification and regression by using state-of-the-art transformer-based models. @@ -11571,7 +11571,7 @@ m<fixed-case>F</fixed-case>orms : Multimodal Form Filling with Question Answering LarryHeck SimonHeck - AnirudhSundar + AnirudhSundar 11262–11271 This paper presents a new approach to form-filling by reformulating the task as multimodal natural language Question Answering (QA). The reformulation is achieved by first translating the elements on the GUI form (text fields, buttons, icons, etc.) to natural language questions, where these questions capture the element’s multimodal semantics. After a match is determined between the form element (Question) and the user utterance (Answer), the form element is filled through a pre-trained extractive QA system. By leveraging pre-trained QA models and not requiring form-specific training, this approach to form-filling is zero-shot. The paper also presents an approach to further refine the form-filling by using multi-task training to incorporate a potentially large number of successive tasks. Finally, the paper introduces a multimodal natural language form-filling dataset Multimodal Forms (mForms), as well as a multimodal extension of the popular ATIS dataset to support future research and experimentation. Results show the new approach not only maintains robust accuracy for sparse training conditions but achieves state-of-the-art F1 of 0.97 on ATIS with approximately 1/10th the training data. 2024.lrec-main.984 @@ -11683,8 +11683,8 @@ Ibrahim SaidAhmad DeontaeSmith Praise-ELMichaels - David IfeoluwaAdelani - Derry TantiWijaya + David IfeoluwaAdelani + Derry TantiWijaya AnietieAndy 11349–11360 Low-resource languages often face challenges in acquiring high-quality language data due to the reliance on translation-based methods, which can introduce the translationese effect. This phenomenon results in translated sentences that lack fluency and naturalness in the target language. In this paper, we propose a novel approach for data collection by leveraging storyboards to elicit more fluent and natural sentences. Our method involves presenting native speakers with visual stimuli in the form of storyboards and collecting their descriptions without direct exposure to the source text. We conducted a comprehensive evaluation comparing our storyboard-based approach with traditional text translation-based methods in terms of accuracy and fluency. Human annotators and quantitative metrics were used to assess translation quality. The results indicate a preference for text translation in terms of accuracy, while our method demonstrates worse accuracy but better fluency in the language focused. @@ -11699,7 +11699,7 @@ ZhengMa JianbingZhang LiangHe - JiajunChen + JiajunChen 11361–11370 Relation extraction is a critical task in the field of natural language processing with numerous real-world applications. Existing research primarily focuses on monolingual relation extraction or cross-lingual enhancement for relation extraction. Yet, there remains a significant gap in understanding relation extraction in the mix-lingual (or code-switching) scenario, where individuals intermix contents from different languages within sentences, generating mix-lingual content. Due to the lack of a dedicated dataset, the effectiveness of existing relation extraction models in such a scenario is largely unexplored. To address this issue, we introduce a novel task of considering relation extraction in the mix-lingual scenario called MixRE and constructing the human-annotated dataset MixRED to support this task. In addition to constructing the MixRED dataset, we evaluate both state-of-the-art supervised models and large language models (LLMs) on MixRED, revealing their respective advantages and limitations in the mix-lingual scenario. Furthermore, we delve into factors influencing model performance within the MixRE task and uncover promising directions for enhancing the performance of both supervised models and LLMs in this novel task. 2024.lrec-main.993 @@ -11743,7 +11743,7 @@ <fixed-case>MLDSP</fixed-case>-<fixed-case>MA</fixed-case>: Multidimensional Attention for Multi-Round Long Dialogue Sentiment Prediction YunfeiYin CongruiZou - ZhengYuan + ZhengYuan XianjianBao 11405–11414 The intelligent chatbot takes dialogue sentiment prediction as the core, and it has to tackle long dialogue sentiment prediction problems in many real-world applications. Current state-of-the-art methods usually employ attention-based dialogue sentiment prediction models. However, as the conversation progresses, more topics are involved and the changes in sentiments become more frequent, which leads to a sharp decline in the accuracy and efficiency of the current methods. Therefore, we propose a Multi-round Long Dialogue Sentiment Prediction based on Multidimensional Attention (MLDSP-MA), which can focus on different topics. In particular, MLSDP-MA leverages a sliding window to capture different topics and traverses all historical dialogues. In each sliding window, the contextual dependency, sentiment persistence, and sentiment infectivity are characterized, and local attention cross fusion is performed. To learn dialogue sentiment globally, global attention is proposed to iteratively learn comprehensive sentiments from historical dialogues, and finally integrate with local attention. We conducted extensive experimental research on publicly available dialogue datasets. The experimental results show that, compared to the current state-of-the-art methods, our model improves by 3.5% in accuracy and 5.7% in Micro-F1 score. @@ -11783,7 +11783,7 @@ Claudiu DanielHromei DanieleMargiotta DaniloCroce - RobertoBasili + RobertoBasili 11440–11451 This paper explores Interactive Grounded Language Understanding (IGLU) challenges within Human-Robot Interaction (HRI). In this setting, a robot interprets user commands related to its environment, aiming to discern whether a specific command can be executed. If faced with ambiguities or incomplete data, the robot poses relevant clarification questions. Drawing from the NeurIPS 2022 IGLU competition, we enrich the dataset by introducing our multi-modal data and natural language descriptions in MM-IGLU: Multi-Modal Interactive Grounded Language Understanding. Utilizing a BART-based model that integrates the user’s statement with the environment’s description, and a cutting-edge Multi-Modal Large Language Model that merges both visual and textual data, we offer a valuable resource for ongoing research in the domain. Additionally, we discuss the evaluation methods for such tasks, highlighting potential limitations imposed by traditional string-match-based evaluations on this intricate multi-modal challenge. Moreover, we provide an evaluation benchmark based on human judgment to address the limits and capabilities of such baseline models. This resource is released on a dedicated GitHub repository at https://github.com/crux82/MM-IGLU. 2024.lrec-main.1000 @@ -11868,7 +11868,7 @@ FelixLange MeisamBooshehri MeghdutSengupta - PhilippCimiano + PhilippCimiano HenningWachsmuth 11523–11536 Explanations are pervasive in our lives. Mostly, they occur in dialogical form where an explainer discusses a concept or phenomenon of interest with an explainee. Leaving the explainee with a clear understanding is not straightforward due to the knowledge gap between the two participants. Previous research looked at the interaction of explanation moves, dialogue acts, and topics in successful dialogues with expert explainers. However, daily-life explanations often fail, raising the question of what makes a dialogue successful. In this work, we study explanation dialogues in terms of the interactions between the explainer and explainee and how they correlate with the quality of explanations in terms of a successful understanding on the explainee’s side. In particular, we first construct a corpus of 399 dialogues from the Reddit forum Explain Like I am Five and annotate it for interaction flows and explanation quality. We then analyze the interaction flows, comparing them to those appearing in expert dialogues. Finally, we encode the interaction flows using two language models that can handle long inputs, and we provide empirical evidence for the effectiveness boost gained through the encoding in predicting the success of explanation dialogues. @@ -12017,7 +12017,7 @@ HanjieZhao DanyanXing YuxiangJia - HongyingZan + HongyingZan 11669–11679 In medical information extraction, medical Named Entity Recognition (NER) is indispensable, playing a crucial role in developing medical knowledge graphs, enhancing medical question-answering systems, and analyzing electronic medical records. The challenge in medical NER arises from the complex nested structures and sophisticated medical terminologies, distinguishing it from its counterparts in traditional domains. In response to these complexities, we propose a medical NER model based on Machine Reading Comprehension (MRC), which uses a task-adaptive pre-training strategy to improve the model’s capability in the medical field. Meanwhile, our model introduces multiple word-pair embeddings and multi-granularity dilated convolution to enhance the model’s representation ability and uses a combined predictor of Biaffine and MLP to improve the model’s recognition performance. Experimental evaluations conducted on the CMeEE, a benchmark for Chinese nested medical NER, demonstrate that our proposed model outperforms the compared state-of-the-art (SOTA) models. 2024.lrec-main.1019 @@ -12066,7 +12066,7 @@ <fixed-case>MULTICOLLAB</fixed-case>: A Multimodal Corpus of Dialogues for Analyzing Collaboration and Frustration in Language MichaelPeechatt - Cecilia OvesdotterAlm + Cecilia OvesdotterAlm ReynoldBailey 11713–11722 This paper addresses an existing resource gap for studying complex emotional states when a speaker collaborates with a partner to solve a task. We present a novel dialogue resource — the MULTICOLLAB corpus — where two interlocutors, an instructor and builder, communicated through a Zoom call while sensors recorded eye gaze, facial action units, and galvanic skin response, with transcribed speech signals, resulting in a unique, heavily multimodal corpus. The builder received instructions from the instructor. Half of the builders were privately told to disobey the instructor’s directions. After the task, participants watched the Zoom recording and annotated their instances of frustration. In this study, we introduce this new corpus and perform computational experiments with time series transformers, using early fusion through time for sensor data and late fusion for speech transcripts. We then average predictions from both methods to recognize instructor frustration. Using sensor and speech data in a 4.5 second time window, we find that the fusion of both models yields 21% improvement in classification accuracy (with a precision of 79% and F1 of 63%) over a comparison baseline, demonstrating that complex emotions can be recognized when rich multimodal data from transcribed spoken dialogue and biophysical sensor data are fused. @@ -12076,7 +12076,7 @@ Multi-Dimensional Machine Translation Evaluation: Model Evaluation and Resource for <fixed-case>K</fixed-case>orean DojunPark - SebastianPadó + SebastianPadó 11723–11744 Almost all frameworks for the manual or automatic evaluation of machine translation characterize the quality of an MT output with a single number. An exception is the Multidimensional Quality Metrics (MQM) framework which offers a fine-grained ontology of quality dimensions for scoring (such as style, fluency, accuracy, and terminology). Previous studies have demonstrated the feasibility of MQM annotation but there are, to our knowledge, no computational models that predict MQM scores for novel texts, due to a lack of resources. In this paper, we address these shortcomings by (a) providing a 1200-sentence MQM evaluation benchmark for the language pair English-Korean and (b) reframing MT evaluation as the multi-task problem of simultaneously predicting several MQM scores using SOTA language models, both in a reference-based MT evaluation setup and a reference-free quality estimation (QE) setup. We find that reference-free setup outperforms its counterpart in the style dimension while reference-based models retain an edge regarding accuracy. Overall, RemBERT emerges as the most promising model. Through our evaluation, we offer an insight into the translation quality in a more fine-grained, interpretable manner. 2024.lrec-main.1024 @@ -12115,7 +12115,7 @@ <fixed-case>M</fixed-case>ulti<fixed-case>L</fixed-case>eg: Dataset for Text Sanitisation in Less-resourced Languages RinaldsVīksna - IngunaSkadiņa + IngunaSkadiņa 11776–11782 Text sanitization is the task of detecting and removing personal information from the text. While it has been well-studied in monolingual settings, today, there is also a need for multilingual text sanitization. In this paper, we introduce MultiLeg: a parallel, multilingual named entity (NE) dataset consisting of documents from the Court of Justice of the European Union annotated with semantic categories suitable for text sanitization. The dataset is available in 8 languages, and it contains 3082 parallel text segments for each language. We also show that the pseudonymized dataset remains useful for downstream tasks. 2024.lrec-main.1028 @@ -12132,24 +12132,24 @@ ChiaraCantone SaraCarvalho FrancescaFrontini - RadovanGarabik + RadovanGarabik JorgeGracia LetiziaGranata - FahadKhan + FahadKhan TimotejKnez - PennyLabropoulou + PennyLabropoulou ChayaLiebeskind Maria PiaDi Buono AnaOstroški Anić SigitaRackevičienė RicardoRodrigues - GillesSérasset + GillesSérasset LinasSelmistraitis MahammadouSidibé PurificaçãoSilvano BlerinaSpahiu EnriketaSogutlu - RankaStanković + RankaStanković Ciprian-OctavianTruică GiedreValunaite Oleskeviciene SlavkoZitnik @@ -12174,7 +12174,7 @@ Multilingual Coreference Resolution in Low-resource <fixed-case>S</fixed-case>outh <fixed-case>A</fixed-case>sian Languages RitwikMishra PoojaDesur - Rajiv RatnShah + Rajiv RatnShah PonnurangamKumaraguru 11813–11826 Coreference resolution involves the task of identifying text spans within a discourse that pertain to the same real-world entity. While this task has been extensively explored in the English language, there has been a notable scarcity of publicly accessible resources and models for coreference resolution in South Asian languages. We introduce a Translated dataset for Multilingual Coreference Resolution (TransMuCoRes) in 31 South Asian languages using off-the-shelf tools for translation and word-alignment. Nearly all of the predicted translations successfully pass a sanity check, and 75% of English references align with their predicted translations. Using multilingual encoders, two off-the-shelf coreference resolution models were trained on a concatenation of TransMuCoRes and a Hindi coreference resolution dataset with manual annotations. The best performing model achieved a score of 64 and 68 for LEA F1 and CoNLL F1, respectively, on our test-split of Hindi golden set. This study is the first to evaluate an end-to-end coreference resolution model on a Hindi golden set. Furthermore, this work underscores the limitations of current coreference evaluation metrics when applied to datasets with split antecedents, advocating for the development of more suitable evaluation metrics. @@ -12219,7 +12219,7 @@ Multilingual Substitution-based Word Sense Induction DenisKokosinskii - NikolayArefyev + NikolayArefyev 11859–11872 Word Sense Induction (WSI) is the task of discovering senses of an ambiguous word by grouping usages of this word into clusters corresponding to these senses. Many approaches were proposed to solve WSI in English and a few other languages, but these approaches are not easily adaptable to new languages. We present multilingual substitution-based WSI methods that support any of 100 languages covered by the underlying multilingual language model with minimal to no adaptation required. Despite the multilingual capabilities, our methods perform on par with the existing monolingual approaches on popular English WSI datasets. At the same time, they will be most useful for lower-resourced languages which miss lexical resources available for English, thus, have higher demand for unsupervised methods like WSI. 2024.lrec-main.1035 @@ -12262,9 +12262,9 @@ AbhijnanNath HumaJamil Shafiuddin RehanAhmed - George ArthurBaker + George ArthurBaker RahulGhosh - James H.Martin + James H.Martin NathanielBlanchard NikhilKrishnaswamy 11901–11916 @@ -12310,7 +12310,7 @@ MengHan RuofeiLai XinyuZhang - XuanjingHuang + XuanjingHuang ZhongyuWei 11944–11955 Product review summarization aims to generate a concise summary based on product reviews to facilitate purchasing decisions. This intricate task gives rise to three challenges in existing work: factual accuracy, aspect comprehensiveness, and content relevance. In this paper, we first propose an FB-Thinker framework to improve the summarization ability of LLMs with multi-objective forward reasoning and multi-reward backward refinement. To enable LLM with these dual capabilities, we present two Chinese product review summarization datasets, Product-CSum and Product-CSum-Cross, for both instruction-tuning and cross-domain evaluation. Specifically, these datasets are collected via GPT-assisted manual annotations from an online forum and public datasets. We further design an evaluation mechanism Product-Eval, integrating both automatic and human evaluation across multiple dimensions for product summarization. Experimental results show the competitiveness and generalizability of our proposed framework in the product review summarization tasks. @@ -12390,7 +12390,7 @@ <fixed-case>MWE</fixed-case>-Finder: A Demonstration - JanOdijk + JanOdijk MartinKroon TijmenBaarda BenBonfil @@ -12415,9 +12415,9 @@ My Science Tutor (<fixed-case>M</fixed-case>y<fixed-case>ST</fixed-case>)–a Large Corpus of Children’s Conversational Speech - SameerPradhan - Ronald A.Cole - Wayne H.Ward + SameerPradhan + Ronald A.Cole + Wayne H.Ward 12040–12045 This article describes the [corpus-name] corpus developed as part of the [project-name] project. To the best of our knowledge, this is one of the largest collections of children’s conversational speech that is freely available for non-commercial use under the creative commons license (CC BY-NC-SA 4.0). It comprises approximately 400 hours of speech, spanning some 230K utterances spread across about 10,500 virtual tutor sessions. Roughly 1,300 third, fourth and fifth grade students contributed to this corpus. The current release contains roughly 100K transcribed utterances. It is our hope that the corpus can be used to improve automatic speech recognition models and algorithms. We report the word error rate achieved on the test set using a model trained on the training and development portion of the corpus. The git repository of the corpus contains the complete training and evaluation setup in order to facilitate a fair and consistent evaluation. It is our hope that this corpus will contribute to the creation and evaluation of conversational AI agents having a better understanding of children’s speech, potentially opening doors to novel, effective, learning and therapeutic interventions. 2024.lrec-main.1052 @@ -12456,8 +12456,8 @@ WilliamThorne AmbroseRobinson NikolaosAletras - CarolinaScarton - KalinaBontcheva + CarolinaScarton + KalinaBontcheva XingyiSong 12074–12086 Instruction-tuned Large Language Models (LLMs) have exhibited impressive language understanding and the capacity to generate responses that follow specific prompts. However, due to the computational demands associated with training these models, their applications often adopt a zero-shot setting. In this paper, we evaluate the zero-shot performance of two publicly accessible LLMs, ChatGPT and OpenAssistant, in the context of six Computational Social Science classification tasks, while also investigating the effects of various prompting strategies. Our experiments investigate the impact of prompt complexity, including the effect of incorporating label definitions into the prompt; use of synonyms for label names; and the influence of integrating past memories during foundation model training. The findings indicate that in a zero-shot setting, current LLMs are unable to match the performance of smaller, fine-tuned baseline transformer models (such as BERT-large). Additionally, we find that different prompting strategies can significantly affect classification accuracy, with variations in accuracy and F1 scores exceeding 10%. @@ -12476,7 +12476,7 @@ Negation Scope Conversion: Towards a Unified Negation-Annotated Dataset AsahiYoshida - YoshihideKato + YoshihideKato ShigekiMatsubara 12093–12099 Negation scope resolution is the task that identifies the part of a sentence affected by the negation cue. The three major corpora used for this task, the BioScope corpus, the SFU review corpus and the Sherlock dataset, have different annotation schemes for negation scope. Due to the different annotations, the negation scope resolution models based on pre-trained language models (PLMs) perform worse when fine-tuned on the simply combined dataset consisting of the three corpora. To address this issue, we propose a method for automatically converting the scopes of BioScope and SFU to those of Sherlock and merge them into a unified dataset. To verify the effectiveness of the proposed method, we conducted experiments using the unified dataset for fine-tuning PLM-based models. The experimental results demonstrate that the performances of the models increase when fine-tuned on the unified dataset unlike the simply combined one. In the token-level metric, the model fine-tuned on the unified dataset archived the state-of-the-art performance on the Sherlock dataset. @@ -12527,7 +12527,7 @@ YantaoLiu SaipingGuan JiafengGuo - XueqiCheng + XueqiCheng 12127–12137 Nested Event Extraction (NEE) aims to extract complex event structures where an event contains other events as its arguments recursively. Nested events involve a kind of Pivot Elements (PEs) that simultaneously act as arguments of outer-nest events and as triggers of inner-nest events, and thus connect them into nested structures. This special characteristic of PEs brings challenges to existing NEE methods, as they cannot well cope with the dual identities of PEs. Therefore, this paper proposes a new model, called PerNee, which extracts nested events mainly based on recognizing PEs. Specifically, PerNee first recognizes the triggers of both inner-nest and outer-nest events and further recognizes the PEs via classifying the relation type between trigger pairs. The model uses prompt learning to incorporate information from both event types and argument roles for better trigger and argument representations to improve NEE performance. Since existing NEE datasets (e.g., Genia11) are limited to specific domains and contain a narrow range of event types with nested structures, we systematically categorize nested events in the generic domain and construct a new NEE dataset, called ACE2005-Nest. Experimental results demonstrate that PerNee consistently achieves state-of-the-art performance on ACE2005-Nest, Genia11, and Genia13. The ACE2005-Nest dataset and the code of the PerNee model are available at https://github.com/waysonren/PerNee. 2024.lrec-main.1061 @@ -12596,7 +12596,7 @@ New Methods for Exploring Intonosyntax: Introducing an Intonosyntactic Treebank for <fixed-case>N</fixed-case>igerian <fixed-case>P</fixed-case>idgin EmmettStrickland - AnneLacheret-Dujour + AnneLacheret-Dujour SylvainKahane MarcEvrard PerrineQuennehen @@ -12621,9 +12621,9 @@ New Semantic Task for the <fixed-case>F</fixed-case>rench Spoken Language Understanding <fixed-case>MEDIA</fixed-case> Benchmark NadègeAlavoine GaëlleLaperrière - ChristopheServan + ChristopheServan SaharGhannay - SophieRosset + SophieRosset 12227–12246 Intent classification and slot-filling are essential tasks of Spoken Language Understanding (SLU). In most SLU systems, those tasks are realized by independent modules, but for about fifteen years, models achieving both of them jointly and exploiting their mutual enhancement have been proposed. A multilingual module using a joint model was envisioned to create a touristic dialogue system for a European project, HumanE-AI-Net. A combination of multiple datasets, including the MEDIA dataset, was suggested for training this joint model. The MEDIA SLU dataset is a French dataset distributed since 2005 by ELRA, mainly used by the French research community and free for academic research since 2020. Unfortunately, it is annotated only in slots but not intents. An enhanced version of MEDIA annotated with intents has been built to extend its use to more tasks and use cases. This paper presents the semi-automatic methodology used to obtain this enhanced version. In addition, we present the first results of SLU experiments on this enhanced dataset using joint models for intent classification and slot-filling. 2024.lrec-main.1070 @@ -12762,7 +12762,7 @@ On Leveraging Encoder-only Pre-trained Language Models for Effective Keyphrase Generation DiWu - WasiAhmad + WasiAhmad Kai-WeiChang 12370–12384 This study addresses the application of encoder-only Pre-trained Language Models (PLMs) in keyphrase generation (KPG) amidst the broader availability of domain-tailored encoder-only models compared to encoder-decoder models. We investigate three core inquiries: (1) the efficacy of encoder-only PLMs in KPG, (2) optimal architectural decisions for employing encoder-only PLMs in KPG, and (3) a performance comparison between in-domain encoder-only and encoder-decoder PLMs across varied resource settings. Our findings, derived from extensive experimentation in two domains reveal that with encoder-only PLMs, although keyphrase extraction with Conditional Random Fields slightly excels in identifying present keyphrases, the KPG formulation renders a broader spectrum of keyphrase predictions. Additionally, prefix-LM fine-tuning of encoder-only PLMs emerges as a strong and data-efficient strategy for KPG, outperforming general-domain seq2seq PLMs. We also identify a favorable parameter allocation towards model depth rather than width when employing encoder-decoder architectures initialized with encoder-only PLMs. The study sheds light on the potential of utilizing encoder-only PLMs for advancing KPG systems and provides a groundwork for future KPG methods. Our code and pre-trained checkpoints are released at https://github.com/uclanlp/DeepKPG. @@ -12771,11 +12771,11 @@ On Modelling Corpus Citations in Computational Lexical Resources - FahadKhan + FahadKhan MaximIonov ChristianChiarcos - LaurentRomary - GillesSérasset + LaurentRomary + GillesSérasset BesimKabashi 12385–12394 In this article we look at how two different standards for lexical resources, TEI and OntoLex, deal with corpus citations in lexicons. We will focus on how corpus citations in retrodigitised dictionaries can be modelled using each of the two standards since this provides us with a suitably challenging use case. After looking at the structure of an example entry from a legacy dictionary, we examine the two approaches offered by the two different standards by outlining an encoding for the example entry using both of them (note that this article features the first extended discussion of how the Frequency Attestation and Corpus (FrAC) module of OntoLex deals with citations). After comparing the two approaches and looking at the advantages and disadvantages of both, we argue for a combination of both. In the last part of the article we discuss different ways of doing this, giving our preference for a strategy which makes use of RDFa. @@ -12807,8 +12807,8 @@ On the Scaling Laws of Geographical Representation in Language Models NathanGodey - Éricde la Clergerie - BenoîtSagot + Éricde la Clergerie + BenoîtSagot 12416–12422 Language models have long been shown to embed geographical information in their hidden representations. This line of work has recently been revisited by extending this result to Large Language Models (LLMs). In this paper, we propose to fill the gap between well-established and recent literature by observing how geographical knowledge evolves when scaling language models. We show that geographical knowledge is observable even for tiny models, and that it scales consistently as we increase the model size. Notably, we observe that larger language models cannot mitigate the geographical bias that is inherent to the training data. 2024.lrec-main.1087 @@ -12845,7 +12845,7 @@ PunyajoySaha AalokAgrawal AbhikJana - ChrisBiemann + ChrisBiemann AnimeshMukherjee 12443–12454 With the emergence of numerous Large Language Models (LLM), the usage of such models in various Natural Language Processing (NLP) applications is increasing extensively. Counterspeech generation is one such key task where efforts are made to develop generative models by fine-tuning LLMs with hatespeech - counterspeech pairs, but none of these attempts explores the intrinsic properties of large language models in zero-shot settings. In this work, we present a comprehensive analysis of the performances of four LLMs namely GPT-2, DialoGPT, ChatGPT and FlanT5 in zero-shot settings for counterspeech generation, which is the first of its kind. For GPT-2 and DialoGPT, we further investigate the deviation in performance with respect to the sizes (small, medium, large) of the models. On the other hand, we propose three different prompting strategies for generating different types of counterspeech and analyse the impact of such strategies on the performance of the models. Our analysis shows that there is an improvement in generation quality for two datasets (17%), however the toxicity increase (25%) with increase in model size. Considering type of model, GPT-2 and FlanT5 models are significantly better in terms of counterspeech quality but also have high toxicity as compared to DialoGPT. ChatGPT are much better at generating counter speech than other models across all metrics. In terms of prompting, we find that our proposed strategies help in improving counter speech generation across all the models. @@ -12868,7 +12868,7 @@ YangGao JiMa IvanKorotkov - KeithHall + KeithHall DanaAlon DonaldMetzler 12467–12480 @@ -12882,7 +12882,7 @@ ArashYousefi Jordehi MahsaHosseini Khasheh Heyran SeyedAbolghasemMirroshandel - OwenRambow + OwenRambow 12481–12495 Opinion mining is an important task in natural language processing. The MPQA Opinion Corpus is a fine-grained and comprehensive dataset of private states (i.e., the condition of a source who has an attitude which may be directed toward a target) based on context. Although this dataset was released years ago, because of its complex definition of annotations and hard-to-read data format, almost all existing research works have only focused on a small subset of the dataset. In this paper, we present a comprehensive study of the entire MPQA 2.0 dataset. In order to achieve this goal, we first provide a clean version of MPQA 2.0 in a more interpretable format. Then, we propose two novel approaches for opinion mining, establishing new high baselines for future work. We use two pre-trained large language models, BERT and T5, to automatically identify the type, polarity, and intensity of private states expressed in phrases, and we use T5 to detect opinion expressions and their agents (i.e., sources). 2024.lrec-main.1093 @@ -12914,9 +12914,9 @@ JaewanPark YiseulLee HyeJinLee - YounggyunHahm + YounggyunHahm HansaemKim - KyungTaeLim + KyungTaeLim 12514–12526 Large language models (LLMs) use pretraining to predict the subsequent word; however, their expansion requires significant computing resources. Numerous big tech companies and research institutes have developed multilingual LLMs (MLLMs) to meet current demands, overlooking less-resourced languages (LRLs). This study proposed three strategies to enhance the performance of LRLs based on the publicly available MLLMs. First, the MLLM vocabularies of LRLs were expanded to enhance expressiveness. Second, bilingual data were used for pretraining to align the high- and less-resourced languages. Third, a high-quality small-scale instruction dataset was constructed and instruction-tuning was performed to augment the LRL. The experiments employed the Llama2 model and Korean was used as the LRL, which was quantitatively evaluated against other developed LLMs across eight tasks. Furthermore, a qualitative assessment was performed based on human evaluation and GPT4. Experimental results showed that our proposed Bllossom model exhibited superior performance in qualitative analyses compared to previously proposed Korean monolingual models. 2024.lrec-main.1095 @@ -12930,7 +12930,7 @@ ZhihengXi TaoGui QiZhang - XuanjingHuang + XuanjingHuang 12527–12538 Pretrained language models can be applied for various downstream tasks but are susceptible to subtle perturbations. Most adversarial defense methods often introduce adversarial training during the fine-tuning phase to enhance empirical robustness. However, the repeated execution of adversarial training hinders training efficiency when transitioning to different tasks. In this paper, we explore the transferability of robustness within subnetworks and leverage this insight to introduce a novel adversarial defense method ORTicket, eliminating the need for separate adversarial training across diverse downstream tasks. Specifically, (i) pruning the full model using the MLM task (the same task employed for BERT pretraining) yields a task-agnostic robust subnetwork(i.e., winning ticket in Lottery Ticket Hypothesis); and (ii) fine-tuning this subnetwork for downstream tasks. Extensive experiments demonstrate that our approach achieves comparable robustness to other defense methods while retaining the efficiency of traditional fine-tuning.This also confirms the significance of selecting MLM task for identifying the transferable robust subnetwork. Furthermore, our method is orthogonal to other adversarial training approaches, indicating the potential for further enhancement of model robustness. 2024.lrec-main.1096 @@ -12953,7 +12953,7 @@ InesRehbein JosefRuppenhofer AnnelenBrunner - Simone PaoloPonzetto + Simone PaoloPonzetto 12553–12563 This paper presents GePaDe_SpkAtt , a new corpus for speaker attribution in German parliamentary debates, with more than 7,700 manually annotated events of speech, thought and writing. Our role inventory includes the sources, addressees, messages and topics of the speech event and also two additional roles, medium and evidence. We report baseline results for the automatic prediction of speech events and their roles, with high scores for both, event triggers and roles. Then we apply our model to predict speech events in 20 years of parliamentary debates and investigate the use of factives in the rhetoric of MPs. 2024.lrec-main.1098 @@ -12966,7 +12966,7 @@ ZhanghaoWang HongCheng RuiZhang - Kam-FaiWong + Kam-FaiWong 12564–12573 In an era characterized by the rapid proliferation of information, the pervasive issues of misinformation and disinformation have significantly impacted numerous individuals. Consequently, the evaluation of information’s truthfulness and accuracy has garnered substantial attention among researchers. In this work, we present a novel fact-checking framework called PACAR, fact-checking based on planning and customized action reasoning using LLMs. It comprises four modules: a claim decomposer with self-reflection, an LLM-centric planner module, an executor for carrying out planned actions, and a verifier module that assesses veracity and generates explanations based on the overall reasoning process. Unlike previous work that employs single-path decision-making and single-step verdict prediction, PACAR focuses on the use of LLMs in dynamic planning and execution of actions. Furthermore, in contrast to previous work that relied primarily on general reasoning, we introduce tailored actions such as numerical reasoning and entity disambiguation to effectively address potential challenges in fact-checking. Our PACAR framework, incorporating LLM-centric planning along with customized action reasoning, significantly outperforms baseline methods across three datasets from different domains and with varying complexity levels. Additional experiments, including multidimensional and sliced observations, demonstrate the effectiveness of PACAR and offer valuable insights for the advancement of automated fact-checking. 2024.lrec-main.1099 @@ -12998,7 +12998,7 @@ Parameter-Efficient Transfer Learning for End-to-end Speech Translation YunlongZhao - KexinWang + KexinWang QianqianDong TomKo 12592–12598 @@ -13051,7 +13051,7 @@ ZhenzheYing WeiqiangWang QiZhang - XuanjingHuang + XuanjingHuang ZhongyuWei 12644–12656 Modeling social media users is the core of social governance in the digital society. Existing works have incorporated different digital traces to better learn the representations of social media users, including text information encoded by pre-trained language models and social network information encoded by graph models. However, limited by overloaded text information and hard-to-collect social network information, they cannot utilize global text information and cannot be generalized without social relationships. In this paper, we propose a Pre-training Architecture for Social Media User Modeling based on Text Graph(PASUM). We aggregate all microblogs to represent social media users based on the text graph model and learn the mapping from microblogs to user representation. We further design inter-user and intra-user contrastive learning tasks to inject general structural information into the mapping. In different scenarios, we can represent users based on text, even without social network information. Experimental results on various downstream tasks demonstrate the effectiveness and superiority of our framework. @@ -13060,7 +13060,7 @@ Pater Incertus? There Is a Solution: Automatic Discrimination between Cognates and Borrowings for <fixed-case>R</fixed-case>omance Languages - Liviu P.Dinu + Liviu P.Dinu Ana SabinaUban Ioan-BogdanIordache Alina MariaCristea @@ -13101,7 +13101,7 @@ <fixed-case>PECC</fixed-case>: Problem Extraction and Coding Challenges - PatrickHaller + PatrickHaller JonasGolde AlanAkbik 12690–12699 @@ -13140,7 +13140,7 @@ EleanorChodroff BlažPažon AnnieBaker - StevenMoran + StevenMoran 12724–12733 Research in speech technologies and comparative linguistics depends on access to diverse and accessible speech data. The UCLA Phonetics Lab Archive is one of the earliest multilingual speech corpora, with long-form audio recordings and phonetic transcriptions for 314 languages (Ladefoged et al., 2009). Recently, 95 of these languages were time-aligned with word-level phonetic transcriptions (Li et al., 2021). Here we present VoxAngeles, a corpus of audited phonetic transcriptions and phone-level alignments of the UCLA Phonetics Lab Archive, which uses the 95-language CMU re-release as our starting point. VoxAngeles also includes word- and phone-level segmentations from the original UCLA corpus, as well as phonetic measurements of word and phone durations, vowel formants, and vowel f0. This corpus enhances the usability of the original data, particularly for quantitative phonetic typology, as demonstrated through a case study of vowel intrinsic f0. We also discuss the utility of the VoxAngeles corpus for general research and pedagogy in crosslinguistic phonetics, as well as for low-resource and multilingual speech technologies. VoxAngeles is free to download and use under a CC-BY-NC 4.0 license. 2024.lrec-main.1114 @@ -13150,7 +13150,7 @@ Phonotactic Complexity across Dialects Ryan Soh-EunShim KalvinChang - David R.Mortensen + David R.Mortensen 12734–12748 Received wisdom in linguistic typology holds that if the structure of a language becomes more complex in one dimension, it will simplify in another, building on the assumption that all languages are equally complex (Joseph and Newmeyer, 2012). We study this claim on a micro-level, using a tightly-controlled sample of Dutch dialects (across 366 collection sites) and Min dialects (across 60 sites), which enables a more fair comparison across varieties. Even at the dialect level, we find empirical evidence for a tradeoff between word length and a computational measure of phonotactic complexity from a LSTM-based phone-level language model—a result previously documented only at the language level. A generalized additive model (GAM) shows that dialects with low phonotactic complexity concentrate around the capital regions, which we hypothesize to correspond to prior hypotheses that language varieties of greater or more diverse populations show reduced phonotactic complexity. We also experiment with incorporating the auxiliary task of predicting syllable constituency, but do not find an increase in the strength of the negative correlation observed. 2024.lrec-main.1115 @@ -13190,7 +13190,7 @@ Plots Made Quickly: An Efficient Approach for Generating Visualizations from Natural Language Queries HenrikVoigt KaiLawonn - SinaZarrieß + SinaZarrieß 12787–12793 Generating visualizations from natural language queries is a useful extension to visualization libraries such as Vega-Lite. The goal of the NL2VIS task is to generate a valid Vega-Lite specification from a data frame and a natural language query as input, which can then be rendered as a visualization. To enable real-time interaction with the data, small model sizes and fast inferences are required. Previous work has introduced custom neural network solutions with custom visualization specifications and has not systematically tested pre-trained LMs to solve this problem. In this work, we opt for a more generic approach that (i) evaluates pre-trained LMs of different sizes and (ii) uses string encodings of data frames and visualization specifications instead of custom specifications. In our experiments, we show that these representations, in combination with pre-trained LMs, scale better than current state-of-the-art models. In addition, the small and base versions of the T5 architecture achieve real-time interaction, while LLMs far exceed latency thresholds suitable for visual exploration tasks. In summary, our models generate visualization specifications in real-time on a CPU and establish a new state of the art on the NL2VIS benchmark nvBench. 2024.lrec-main.1119 @@ -13215,7 +13215,7 @@ GennaroNolano MoritzBlum BasilEll - PhilippCimiano + PhilippCimiano 12809–12820 In recent years, large language models have achieved state-of-the-art performance across various NLP tasks. However, investigations have shown that these models tend to rely on shortcut features, leading to inaccurate predictions and causing the models to be unreliable at generalization to out-of-distribution (OOD) samples. For instance, in the context of relation extraction (RE), we would expect a model to identify the same relation independently of the entities involved in it. For example, consider the sentence “Leonardo da Vinci painted the Mona Lisa” expressing the created(Leonardo_da_Vinci, Mona_Lisa) relation. If we substiute “Leonardo da Vinci” with “Barack Obama”, then the sentence still expresses the created relation. A robust model is supposed to detect the same relation in both cases. In this work, we describe several semantically-motivated strategies to generate adversarial examples by replacing entity mentions and investigate how state-of-the-art RE models perform under pressure. Our analyses show that the performance of these models significantly deteriorates on the modified datasets (avg. of -48.5% in F1), which indicates that these models rely to a great extent on shortcuts, such as surface forms (or patterns therein) of entities, without making full use of the information present in the sentences. 2024.lrec-main.1121 @@ -13248,7 +13248,7 @@ <fixed-case>P</fixed-case>oliti<fixed-case>C</fixed-case>ause: An Annotation Scheme and Corpus for Causality in Political Texts PaulinaGarcia Corral - HannaBechara + HannaBechara RanZhang SlavaJankin 12836–12845 @@ -13270,7 +13270,7 @@ <fixed-case>P</fixed-case>oly<fixed-case>NERE</fixed-case>: A Novel Ontology and Corpus for Named Entity Recognition and Relation Extraction in Polymer Science Domain Van-ThuyPhi HirokiTeranishi - YujiMatsumoto + YujiMatsumoto HiroyukiOka MasashiIshii 12856–12866 @@ -13432,7 +13432,7 @@ Probing Large Language Models for Scalar Adjective Lexical Semantics and Scalar Diversity Pragmatics FangruLin DanielAltshuler - Janet B.Pierrehumbert + Janet B.Pierrehumbert 13033–13049 Scalar adjectives pertain to various domain scales and vary in intensity within each scale (e.g. certain is more intense than likely on the likelihood scale). Scalar implicatures arise from the consideration of alternative statements which could have been made. They can be triggered by scalar adjectives and require listeners to reason pragmatically about them. Some scalar adjectives are more likely to trigger scalar implicatures than others. This phenomenon is referred to as scalar diversity. In this study, we probe different families of Large Language Models such as GPT-4 for their knowledge of the lexical semantics of scalar adjectives and one specific aspect of their pragmatics, namely scalar diversity. We find that they encode rich lexical-semantic information about scalar adjectives. However, the rich lexical-semantic knowledge does not entail a good understanding of scalar diversity. We also compare current models of different sizes and complexities and find that larger models are not always better. Finally, we explain our probing results by leveraging linguistic intuitions and model training objectives. 2024.lrec-main.1141 @@ -13475,9 +13475,9 @@ Producing a Parallel <fixed-case>U</fixed-case>niversal <fixed-case>D</fixed-case>ependencies Treebank of <fixed-case>A</fixed-case>ncient <fixed-case>H</fixed-case>ebrew and <fixed-case>A</fixed-case>ncient <fixed-case>G</fixed-case>reek via Cross-Lingual Projection - Daniel G.Swanson + Daniel G.Swanson Bryce D.Bussert - FrancisTyers + FrancisTyers 13074–13078 In this paper we present the initial construction of a treebank of Ancient Greek containing portions of the Septuagint, a translation of the Hebrew Scriptures (1576 sentences, 39K tokens, roughly 7% of the total corpus). We construct the treebank by word-aligning and projecting from the parallel text in Ancient Hebrew before automatically correcting systematic syntactic mismatches and manually correcting other errors. 2024.lrec-main.1145 @@ -13697,7 +13697,7 @@ LaurinFriedrich WassilikiSiskou SteffenEckhard - AnnetteHautli-Janisz + AnnetteHautli-Janisz 13315–13320 Face-to-face interactions between representatives of the state and citizens are a key intercept in public service delivery, for instance when providing social benefits to vulnerable groups. Despite the relevance of these encounters for the individual, but also for society at large, there is a significant research gap in the systematic empirical study of the communication taking place. This is mainly due to the high institutional and data protection barriers for collecting data in a very sensitive and private setting in which citizens request support from the state. In this paper, we describe the procedure of compiling the first open access dataset of transcribed recordings of so-called Public Service Encounters in Germany, i.e., meetings between state officials and citizens in which there is direct communication in order to allocate state services. This dataset sets a new research directive in the social sciences, because it allows the community to open up the black box of direct state-citizen interaction. With data of this kind it becomes possible to directly and systematically investigate bias, bureaucratic discrimination and other power-driven dynamics in the actual communication and ideally propose guidelines as to alleviate these issues. 2024.lrec-main.1165 @@ -13739,7 +13739,7 @@ Nate B.Carlson Nathaniel RomneyRobinson MrinmayaSachan - David R.Mortensen + David R.Mortensen 13344–13355 Mapping words into a fixed-dimensional vector space is the backbone of modern NLP. While most word embedding methods successfully encode semantic information, they overlook phonetic information that is crucial for many tasks. We develop three methods that use articulatory features to build phonetically informed word embeddings. To address the inconsistent evaluation of existing phonetic word embedding methods, we also contribute a task suite to fairly evaluate past, current, and future methods. We evaluate both (1) intrinsic aspects of phonetic word embeddings, such as word retrieval and correlation with sound similarity, and (2) extrinsic performance on tasks such as rhyme and cognate detection and sound analogies. We hope our task suite will promote reproducibility and inspire future phonetic embedding research. 2024.lrec-main.1168 @@ -13806,7 +13806,7 @@ LiangPang YuanzhuoWang HuaweiShen - XueqiCheng + XueqiCheng 13407–13418 The questionnaire is a professional research methodology used for both qualitative and quantitative analysis of human opinions, preferences, attitudes, and behaviors. However, designing and evaluating questionnaires demands significant effort due to their intricate and complex structure. Questionnaires entail a series of questions that must conform to intricate constraints involving the questions, options, and overall structure. Specifically, the questions should be relevant and specific to the given research topic and intent. The options should be tailored to the questions, ensuring they are mutually exclusive, completed, and ordered sensibly. Moreover, the sequence of questions should follow a logical order, grouping similar topics together. As a result, automatically generating questionnaires presents a significant challenge and this area has received limited attention primarily due to the scarcity of high-quality datasets. To address these issues, we present Qsnail, the first dataset specifically constructed for the questionnaire generation task, which comprises 13,168 human-written questionnaires gathered from online platforms. We further conduct experiments on Qsnail, and the results reveal that retrieval models and traditional generative models do not fully align with the given research topic and intents. Large language models, while more closely related to the research topic and intents, exhibit significant limitations in terms of diversity and specificity. Despite enhancements through the chain-of-thought prompt and finetuning, questionnaires generated by language models still fall short of human-written questionnaires. Therefore, questionnaire generation is challenging and needs to be further explored. The dataset will be published in the future. 2024.lrec-main.1174 @@ -13860,9 +13860,9 @@ Question Answering over Tabular Data with <fixed-case>D</fixed-case>ata<fixed-case>B</fixed-case>ench: A Large-Scale Empirical Evaluation of <fixed-case>LLM</fixed-case>s JorgeOsés Grijalba - L. AlfonsoUreña-López + L. AlfonsoUreña-López EugenioMartínez Cámara - JoseCamacho-Collados + JoseCamacho-Collados 13471–13488 Large Language Models (LLMs) are showing emerging abilities, and one of the latest recognized ones deals with their ability to reason and answer questions from tabular data. Although there are some available datasets to assess question answering systems on tabular data, they are not large and diverse enough to properly assess the capabilities of LLMs. To this end, we propose DataBench, a benchmark composed of 65 real-world datasets over several domains, including 20 human-generated questions per dataset, totaling 1300 questions and answers overall. Using this benchmark, we perform a large-scale empirical comparison of several open and closed source models, including both code-generating and in-context learning models. The results highlight the current gap between open-source and closed-source models, with all types of model having room for improvement even in simple boolean questions or involving a single column. 2024.lrec-main.1179 @@ -13988,7 +13988,7 @@ Reassessing Semantic Knowledge Encoded in Large Language Models through the Word-in-Context Task - YoshihikoHayashi + YoshihikoHayashi 13610–13620 Despite the remarkable recent advancements in large language models (LLMs), a comprehensive understanding of their inner workings and the depth of their knowledge remains elusive. This study aims to reassess the semantic knowledge encoded in LLMs by utilizing the Word-in-Context (WiC) task, which involves predicting the semantic equivalence of a target word across different contexts, as a probing task. To address this challenge, we start by prompting LLMs, specifically GPT-3 and GPT-4, to generate natural language descriptions that contrast the meanings of the target word in two contextual sentences given in the WiC dataset. Subsequently, we conduct a manual analysis to examine their linguistic attributes. In parallel, we train a text classification model that utilizes the generated descriptions as supervision and assesses their practical effectiveness in the WiC task. The linguistic and empirical findings reveal a consistent provision of valid and valuable descriptions by LLMs, with LLM-generated descriptions significantly improving classification accuracy. Notably, the highest classification result achieved with GPT-3-generated descriptions largely surpassed GPT-3’s zero-shot baseline. However, the GPT-4-generated descriptions performed slightly below GPT-4’s zero-shot baseline, suggesting that the full potential of the most advanced large language models, such as GPT-4, is yet to be fully revealed. 2024.lrec-main.1189 @@ -13999,7 +13999,7 @@ MaximeArens LucileCallebert MohandBoughanem - Jose G.Moreno + Jose G.Moreno 13621–13632 Data annotation is crucial for machine learning, notably in technical domains, where the quality and quantity of annotated data, significantly affect effectiveness of trained models. Employing humans is costly, especially when annotating for multi-label classification, as instances may bear multiple labels. Active Learning (AL) aims to alleviate annotation costs by intelligently selecting instances for annotation, rather than randomly annotating. Recent attention on transformers has spotlighted the potential of AL in this context. However, in practical settings, implementing AL faces challenges beyond theory. Notably, the gap between AL cycles presents idle time for annotators. To address this issue, we investigate alternative instance selection methods, aiming to maximize annotation efficiency by seamlessly integrating with the AL process. We begin by evaluating two existing methods in our transformer setting, employing respectively random sampling and outdated information. Following this we propose our novel method based on annotating instances to rebalance label distribution. Our approach mitigates biases, enhances model performance (up to 23% improvement on f1score), reduces strategy-dependent disparities (decrease of nearly 50% on standard deviation) and reduces label imbalance (decrease of 30% on Mean Imbalance Ratio). 2024.lrec-main.1190 @@ -14008,7 +14008,7 @@ <fixed-case>R</fixed-case>e<fixed-case>CAP</fixed-case>: Semantic Role Enhanced Caption Generation AbhidipBhattacharyya - MarthaPalmer + MarthaPalmer ChristofferHeckman 13633–13649 Even though current vision language (V+L) models have achieved success in generating image captions, they often lack specificity and overlook various aspects of the image. Additionally, the attention learned through weak supervision operates opaquely and is difficult to control. To address these limitations, we propose the use of semantic roles as control signals in caption generation. Our hypothesis is that, by incorporating semantic roles as signals, the generated captions can be guided to follow specific predicate argument structures. To validate the effectiveness of our approach, we conducted experiments using data and compared the results with a baseline model VL-BART(CITATION). The experiments showed a significant improvement, with a gain of 45% in Smatch score (Standard NLP evaluation metric for semantic representations), demonstrating the efficacy of our approach. By focusing on specific objects and their associated semantic roles instead of providing a general description, our framework produces captions that exhibit enhanced quality, diversity, and controllability. @@ -14020,7 +14020,7 @@ Yi-PeiChen NorikiNishida HidekiNakayama - YujiMatsumoto + YujiMatsumoto 13650–13665 Enhancing user engagement through personalization in conversational agents has gained significance, especially with the advent of large language models that generate fluent responses. Personalized dialogue generation, however, is multifaceted and varies in its definition – ranging from instilling a persona in the agent to capturing users’ explicit and implicit cues. This paper seeks to systemically survey the recent landscape of personalized dialogue generation, including the datasets employed, methodologies developed, and evaluation metrics applied. Covering 22 datasets, we highlight benchmark datasets and newer ones enriched with additional features. We further analyze 17 seminal works from top conferences between 2021-2023 and identify five distinct types of problems. We also shed light on recent progress by LLMs in personalized dialogue generation. Our evaluation section offers a comprehensive summary of assessment facets and metrics utilized in these works. In conclusion, we discuss prevailing challenges and envision prospect directions for future research in personalized dialogue generation. 2024.lrec-main.1192 @@ -14034,7 +14034,7 @@ MinsunKim Tak YeonLee So-YeonAhn - AliceOh + AliceOh 13666–13676 The integration of generative AI in education is expanding, yet empirical analyses of large-scale and real-world interactions between students and AI systems still remain limited. Addressing this gap, we present RECIPE4U (RECIPE for University), a dataset sourced from a semester-long experiment with 212 college students in English as Foreign Language (EFL) writing courses. During the study, students engaged in dialogues with ChatGPT to revise their essays. RECIPE4U includes comprehensive records of these interactions, including conversation logs, students’ intent, students’ self-rated satisfaction, and students’ essay edit histories. In particular, we annotate the students’ utterances in RECIPE4U with 13 intention labels based on our coding schemes. We establish baseline results for two subtasks in task-oriented dialogue systems within educational contexts: intent detection and satisfaction estimation. As a foundational step, we explore student-ChatGPT interaction patterns through RECIPE4U and analyze them by focusing on students’ dialogue, essay data statistics, and students’ essay edits. We further illustrate potential applications of RECIPE4U dataset for enhancing the incorporation of LLMs in educational frameworks. RECIPE4U is publicly available at https://zeunie.github.io/RECIPE4U/. 2024.lrec-main.1193 @@ -14044,7 +14044,7 @@ Recognizing Social Cues in Crisis Situations DiWang YuanZhuang - EllenRiloff + EllenRiloff MarinaKogan 13677–13687 During crisis situations, observations of other people’s behaviors often play an essential role in a person’s decision-making. For example, a person might evacuate before a hurricane only if everyone else in the neighborhood does so. Conversely, a person might stay if no one else is leaving. Such observations are called social cues. Social cues are important for understanding people’s response to crises, so recognizing them can help inform the decisions of government officials and emergency responders. In this paper, we propose the first NLP task to categorize social cues in social media posts during crisis situations. We introduce a manually annotated dataset of 6,000 tweets, labeled with respect to eight social cue categories. We also present experimental results of several classification models, which show that some types of social cues can be recognized reasonably well, but overall this task is challenging for NLP systems. We further present error analyses to identify specific types of mistakes and promising directions for future research on this task. @@ -14086,7 +14086,7 @@ FabianSimonjetz JussiLaasonen YunusCobanoglu - AlexanderFraser + AlexanderFraser EnriqueJiménez 13712–13721 Ancient Mesopotamian literature is riddled with gaps, caused by the decay and fragmentation of its writing material, clay tablets. The discovery of overlaps between fragments allows reconstruction to advance, but it is a slow and unsystematic process. Since new pieces are found and digitized constantly, NLP techniques can help to identify fragments and match them with existing text collections to restore complete literary works. We compare a number of approaches and determine that a character-level n-gram-based similarity matching approach works well for this problem, leading to a large speed-up for researchers in Assyriology. @@ -14108,7 +14108,7 @@ Re-evaluating the Tomes for the Times RyanBrate Mariekevan Erp - Antalvan den Bosch + Antalvan den Bosch 13734–13739 Literature is to some degree a snapshot of the time it was written in and the societal attitudes of the time. Not all depictions are pleasant or in-line with modern-day sensibilities; this becomes problematic when the prevalent depictions over a large body of work are negatively biased, leading to their normalisation. Many much-loved and much-read classics are set in periods of heightened social inequality: slavery, pre-womens’ rights movements, colonialism, etc. In this paper, we exploit known text co-occurrence metrics with respect to token-level level contexts to identify prevailing themes associated with known problematic descriptors. We see that prevalent, negative depictions are perpetuated by classic literature. We propose that such a methodology could form the basis of a system for making explicit such problematic associations, for interested parties: such as, sensitivity coordinators of publishing houses, library curators, or organisations concerned with social justice 2024.lrec-main.1199 @@ -14128,7 +14128,7 @@ Wei-FanChen MiladAlshomary MajaStahl - KhalidAl Khatib + KhalidAl Khatib BennoStein HenningWachsmuth 13754–13768 @@ -14145,7 +14145,7 @@ EmilyPreston ChrisBayliss ChrisOakley - CarolinaScarton + CarolinaScarton 13769–13784 Sensitising language models (LMs) to external context helps them to more effectively capture the speaking patterns of individuals with specific characteristics or in particular environments. This work investigates to what extent detailed character and film annotations can be leveraged to personalise LMs in a scalable manner. We then explore the use of such models in evaluating context specificity in machine translation. We build LMs which leverage rich contextual information to reduce perplexity by up to 6.5% compared to a non-contextual model, and generalise well to a scenario with no speaker-specific data, relying on combinations of demographic characteristics expressed via metadata. Our findings are consistent across two corpora, one of which (Cornell-rich) is also a contribution of this paper. We then use our personalised LMs to measure the co-occurrence of extra-textual context and translation hypotheses in a machine translation setting. Our results suggest that the degree to which professional translations in our domain are context-specific can be preserved to a better extent by a contextual machine translation model than a non-contextual model, which is also reflected in the contextual model’s superior reference-based scores. 2024.lrec-main.1202 @@ -14197,7 +14197,7 @@ <fixed-case>R</fixed-case>eflect<fixed-case>S</fixed-case>umm: A Benchmark for Course Reflection Summarization YangZhong MohamedElaraby - DianeLitman + DianeLitman Ahmed AshrafButt MuhsinMenekse 13819–13846 @@ -14477,8 +14477,8 @@ HongfeiXu YangSong QiuhuiLiu - Josefvan Genabith - DeyiXiong + Josefvan Genabith + DeyiXiong 14122–14133 Stacking non-linear layers allows deep neural networks to model complicated functions, and including residual connections in Transformer layers is beneficial for convergence and performance. However, residual connections may make the model “forget” distant layers and fail to fuse information from previous layers effectively. Selectively managing the representation aggregation of Transformer layers may lead to better performance. In this paper, we present a Transformer with depth-wise LSTMs connecting cascading Transformer layers and sub-layers. We show that layer normalization and feed-forward computation within a Transformer layer can be absorbed into depth-wise LSTMs connecting pure Transformer attention layers. Our experiments with the 6-layer Transformer show significant BLEU improvements in both WMT 14 English-German / French tasks and the OPUS-100 many-to-many multilingual NMT task, and our deep Transformer experiments demonstrate the effectiveness of depth-wise LSTM on the convergence and performance of deep Transformers. 2024.lrec-main.1231 @@ -14490,7 +14490,7 @@ Atula TejaswiNeerkaje RamitSawhney NikolaosAletras - PreslavNakov + PreslavNakov 14134–14145 Suicide is a serious public health issue, but it is preventable with timely intervention. Emerging studies have suggested there is a noticeable increase in the number of individuals sharing suicidal thoughts online. As a result, utilising advance Natural Language Processing techniques to build automated systems for risk assessment is a viable alternative. However, existing systems are prone to incorrectly predicting risk severity and have no early detection mechanisms. Therefore, we propose RISE, a novel robust mechanism for accurate early detection of suicide risk by ensembling Hyperbolic Internal Classifiers equipped with an abstention mechanism and early-exit inference capabilities. Through quantitative, qualitative and ablative experiments, we demonstrate RISE as an efficient and robust human-in-the-loop approach for risk assessment over the Columbia Suicide Severity Risk Scale (C-SSRS) and CLPsych 2022 datasets. It is able to successfully abstain from 84% incorrect predictions on Reddit data while out-predicting state of the art models upto 3.5x earlier. 2024.lrec-main.1232 @@ -14511,7 +14511,7 @@ MohammadMohammadamini DrissMatrouf MichaelRouvier - Jean-FrancoisBonastre + Jean-FrancoisBonastre RomainSerizel TheophileGonos 14152–14156 @@ -14553,7 +14553,7 @@ HanXia TaoGui QiZhang - XuanjingHuang + XuanjingHuang 14186–14203 Large Language Models (LLMs) have showcased remarkable capabilities in following human instructions. However, recent studies have raised concerns about the robustness of LLMs for natural language understanding (NLU) tasks when prompted with instructions combining textual adversarial samples. In this paper, drawing inspiration from recent works that LLMs are sensitive to the design of the instructions, we utilize instructions in code style, which are more structural and less ambiguous, to replace typically natural language instructions. Through this conversion, we provide LLMs with more precise instructions and strengthen the robustness of LLMs. Moreover, under few-shot scenarios, we propose a novel method to compose in-context demonstrations using both clean and adversarial samples (adversarial context method) to further boost the robustness of the LLMs. Experiments on eight robustness datasets show that our method consistently outperforms prompting LLMs with natural language, for example, with gpt-3.5-turbo on average, our method achieves an improvement of 5.68% in test set accuracy and a reduction of 5.66 points in Attack Success Rate (ASR). 2024.lrec-main.1237 @@ -14600,7 +14600,7 @@ DaniilKosakin SergeiObiedkov IvanSmirnov - EkaterinaRakhilina + EkaterinaRakhilina AnastasiaVyrenkova EkaterinaZalivina 14240–14258 @@ -14637,7 +14637,7 @@ PuneetMathur RamitSawhney ShivamAgarwal - PreslavNakov + PreslavNakov SudheerChava DineshManocha 14285–14297 @@ -14756,7 +14756,7 @@ SongChen JenniferTracey AnnBies - StephanieStrassel + StephanieStrassel 14393–14399 The Schema Learning Corpus (SLC) is a new linguistic resource designed to support research into the structure of complex events in multilingual, multimedia data. The SLC incorporates large volumes of background data in English, Spanish and Russian, and defines 100 complex events (CEs) across 12 domains, with CE profiles containing information about the typical steps and substeps and expected event categories for the CE. Multiple documents are labeled for each CE, with pointers to evidence in the document for each CE step, plus labeled events and relations along with their arguments across a large tag set. The SLC was designed to support development and evaluation of technology capable of understanding and reasoning about complex real-world events in multimedia, multilingual data streams in order to provide users with a deeper understanding of the potential relationships among seemingly disparate events and actors, and to allow users to make better predictions about how future events are likely to unfold. The Schema Learning Corpus will be made available to the research community through publication in Linguistic Data Consortium catalog. 2024.lrec-main.1254 @@ -14790,7 +14790,7 @@ XiaoZhang HeqiZheng YuxiangNie - HeyanHuang + HeyanHuang Xian-LingMao 14418–14428 Scientific Machine Reading Comprehension (SMRC) aims to facilitate the understanding of scientific texts through human-machine interactions. While existing dataset has significantly contributed to this field, it predominantly focus on single-perspective question-answer pairs, thereby overlooking the inherent variation in comprehension levels among different readers. To address this limitation, we introduce a novel multi-perspective scientific machine reading comprehension dataset, SciMRC, which incorporates perspectives from beginners, students, and experts. Our dataset comprises 741 scientific papers and 6,057 question-answer pairs, with 3,306, 1,800, and 951 pairs corresponding to beginners, students, and experts respectively. Extensive experiments conducted on SciMRC using pre-trained models underscore the importance of considering diverse perspectives in SMRC and highlight the challenging nature of our scientific machine comprehension tasks. @@ -14810,11 +14810,11 @@ <fixed-case>SCOUT</fixed-case>: A Situated and Multi-Modal Human-Robot Dialogue Corpus - Stephanie M.Lukin - ClaireBonial + Stephanie M.Lukin + ClaireBonial MatthewMarge Taylor A.Hudson - Cory J.Hayes + Cory J.Hayes KimberlyPollard AnthonyBaker Ashley N.Foots @@ -14825,8 +14825,8 @@ LuciaDonatelli AntonLeuski Susan G.Hill - DavidTraum - ClareVoss + DavidTraum + ClareVoss 14445–14458 We introduce the Situated Corpus Of Understanding Transactions (SCOUT), a multi-modal collection of human-robot dialogue in the task domain of collaborative exploration. The corpus was constructed from multiple Wizard-of-Oz experiments where human participants gave verbal instructions to a remotely-located robot to move and gather information about its surroundings. SCOUT contains 89,056 utterances and 310,095 words from 278 dialogues averaging 320 utterances per dialogue. The dialogues are aligned with the multi-modal data streams available during the experiments: 5,785 images and 30 maps. The corpus has been annotated with Abstract Meaning Representation and Dialogue-AMR to identify the speaker’s intent and meaning within an utterance, and with Transactional Units and Relations to track relationships between utterances to reveal patterns of the Dialogue Structure. We describe how the corpus and its annotations have been used to develop autonomous human-robot systems and enable research in open questions of how humans speak to robots. We release this corpus to accelerate progress in autonomous, situated, human-robot dialogue, especially in the context of navigation tasks where details about the environment need to be discovered. 2024.lrec-main.1259 @@ -14865,7 +14865,7 @@ MarieKolm VerenaBlaschke EkaterinaArtemova - BarbaraPlank + BarbaraPlank 14478–14493 Named Entity Recognition (NER) is a fundamental task to extract key information from texts, but annotated resources are scarce for dialects. This paper introduces the first dialectal NER dataset for German, BarNER, with 161K tokens annotated on Bavarian Wikipedia articles (bar-wiki) and tweets (bar-tweet), using a schema adapted from German CoNLL 2006 and GermEval. The Bavarian dialect differs from standard German in lexical distribution, syntactic construction, and entity information. We conduct in-domain, cross-domain, sequential, and joint experiments on two Bavarian and three German corpora and present the first comprehensive NER results on Bavarian. Incorporating knowledge from the larger German NER (sub-)datasets notably improves on bar-wiki and moderately on bar-tweet. Inversely, training first on Bavarian contributes slightly to the seminal German CoNLL 2006 corpus. Moreover, with gold dialect labels on Bavarian tweets, we assess multi-task learning between five NER and two Bavarian-German dialect identification tasks and achieve NER SOTA on bar-wiki. We substantiate the necessity of our low-resource BarNER corpus and the importance of diversity in dialects, genres, and topics in enhancing model performance. 2024.lrec-main.1262 @@ -14888,7 +14888,7 @@ PreetiVerma Jaithra VarmaManthena SriparnaSaha - PushpakBhattacharyya + PushpakBhattacharyya MinakshiDhar SarbajeetTiwari 14513–14523 @@ -14932,7 +14932,7 @@ ZixuanLi LongBai JiafengGuo - XueqiCheng + XueqiCheng 14555–14566 Temporal Knowledge Graph (TKG), which characterizes temporally evolving facts in the form of (subject, relation, object, timestamp), has attracted much attention recently. TKG reasoning aims to predict future facts based on given historical ones. However, existing TKG reasoning models are unable to abstain from predictions they are uncertain, which will inevitably bring risks in real-world applications. Thus, in this paper, we propose an abstention mechanism for TKG reasoning, which helps the existing models make selective, instead of indiscriminate, predictions. Specifically, we develop a confidence estimator, called Confidence Estimator with History (CEHis), to enable the existing TKG reasoning models to first estimate their confidence in making predictions, and then abstain from those with low confidence. To do so, CEHis takes two kinds of information into consideration, namely, the certainty of the current prediction and the accuracy of historical predictions. Experiments with representative TKG reasoning models on two benchmark datasets demonstrate the effectiveness of the proposed CEHis. 2024.lrec-main.1268 @@ -14979,8 +14979,8 @@ Self-reported Demographics and Discourse Dynamics in a Persuasive Online Forum - AgnieszkaFalenska - Eva MariaVecchi + AgnieszkaFalenska + Eva MariaVecchi GabriellaLapesa 14606–14621 Research on language as interactive discourse underscores the deliberate use of demographic parameters such as gender, ethnicity, and class to shape social identities. For example, by explicitly disclosing one’s information and enforcing one’s social identity to an online community, the reception by and interaction with the said community is impacted, e.g., strengthening one’s opinions by depicting the speaker as credible through their experience in the subject. Here, we present a first thorough study of the role and effects of self-disclosures on online discourse dynamics, focusing on a pervasive type of self-disclosure: author gender. Concretely, we investigate the contexts and properties of gender self-disclosures and their impact on interaction dynamics in an online persuasive forum, ChangeMyView. Our contribution is twofold. At the level of the target phenomenon, we fill a research gap in the understanding of the impact of these self-disclosures on the discourse by bringing together features related to forum activity (votes, number of comments), linguistic/stylistic features from the literature, and discourse topics. At the level of the contributed resource, we enrich and release a comprehensive dataset that will provide a further impulse for research on the interplay between gender disclosures, community interaction, and persuasion in online discourse. @@ -14998,7 +14998,7 @@ Semantic Map-based Generation of Navigation Instructions ChengzuLi - ChaoZhang + ChaoZhang SimoneTeufel Rama SanandDoddipatla SvetlanaStoyanchev @@ -15108,7 +15108,7 @@ VladimirAraujo Maria MihaelaTrusca RodrigoTufiño - Marie-FrancineMoens + Marie-FrancineMoens 14729–14743 In recent years, significant advancements in pre-trained language models have driven the creation of numerous non-English language variants, with a particular emphasis on encoder-only and decoder-only architectures. While Spanish language models based on BERT and GPT have demonstrated proficiency in natural language understanding and generation, there remains a noticeable scarcity of encoder-decoder models explicitly designed for sequence-to-sequence tasks, which aim to map input sequences to generate output sequences conditionally. This paper breaks new ground by introducing the implementation and evaluation of renowned encoder-decoder architectures exclusively pre-trained on Spanish corpora. Specifically, we present Spanish versions of BART, T5, and BERT2BERT-style models and subject them to a comprehensive assessment across various sequence-to-sequence tasks, including summarization, question answering, split-and-rephrase, dialogue, and translation. Our findings underscore the competitive performance of all models, with the BART- and T5-based models emerging as top performers across all tasks. We have made all models publicly available to the research community to foster future explorations and advancements in Spanish NLP: https://github.com/vgaraujov/Seq2Seq-Spanish-PLMs. 2024.lrec-main.1283 @@ -15117,7 +15117,7 @@ Sequential and Repetitive Pattern Learning for Temporal Knowledge Graph Reasoning XuefeiLi - HuiweiZhou + HuiweiZhou WeihongYao WenchuLi YingyuLin @@ -15131,7 +15131,7 @@ <fixed-case>SGCM</fixed-case>: Salience-Guided Context Modeling for Question Generation ChuyaoDing YuHong - JianminYao + JianminYao 14755–14762 We tackle Paragraph-level Question Generation (abbr., PQG) in this paper. PQG is a task of automatically generating questions given paragraphs and answers. Identifying the relevant sentences to answers is crucial for reasoning the possible questions before generation. Accordingly, we propose a salience-guided approach to enhance PQG. Specifically, we construct an auxiliary task of identifying salient sentences that manifest relevance. Grounded on this auxiliary task and the main task of PQG, we strengthen the BART encoder during training within a multitask learning framework. In particular, we utilize the identified salient sentences as an explicit guidance to enable the salience-aware attention computation in the BART decoder. We experiment on the benchmark dataset FairytaleQA. The test results show that our approach yields substantial improvements compared to the BART baseline, achieving the Rouge-L, BLEU4, BERTScore, Q-BLUE-3 and F1-scores of about 56.56%, 19.78%, 61.19%, 54.33% and 43.55%, respectively. Both the source codes and models will be publicly available. 2024.lrec-main.1285 @@ -15169,7 +15169,7 @@ <fixed-case>S</fixed-case>ign<fixed-case>BLEU</fixed-case>: Automatic Evaluation of Multi-channel Sign Language Translation Jung-HoKim - MathewHuerta-Enochian + MathewHuerta-Enochian ChangyongKo Du HuiLee 14796–14811 @@ -15181,7 +15181,7 @@ <fixed-case>S</fixed-case>ilver<fixed-case>A</fixed-case>lign: <fixed-case>MT</fixed-case>-Based Silver Data Algorithm for Evaluating Word Alignment AbdullatifKoksal SilviaSeverini - HinrichSchütze + HinrichSchütze 14812–14825 Word alignments are essential for a variety of NLP tasks. Therefore, choosing the best approaches for their creation is crucial. However, the scarce availability of gold evaluation data makes the choice difficult. We propose SilverAlign, a new method to automatically create silver data for the evaluation of word aligners by exploiting machine translation and minimal pairs. We show that performance on our silver data correlates well with gold benchmarks for 9 language pairs, making our approach a valid resource for evaluation of different languages and domains when gold data is not available. This addresses the important scenario of missing gold data alignments for low-resource languages. 2024.lrec-main.1290 @@ -15266,7 +15266,7 @@ MiriamWinkler VirginijaJuozapaityte Robvan der Goot - BarbaraPlank + BarbaraPlank 14898–14915 Digital assistants perform well in high-resource languages like English, where tasks like slot and intent detection (SID) are well-supported. Many recent SID datasets start including multiple language varieties. However, it is unclear how realistic these translated datasets are. Therefore, we extend one such dataset, namely xSID-0.4, to include two underrepresented languages: Bavarian, a German dialect, and Lithuanian, a Baltic language. Both language variants have limited speaker populations and are often not included in multilingual projects. In addition to translations we provide “natural” queries to digital assistants generated by native speakers. We further include utterances from another dataset for Bavarian to build the richest SID dataset available today for a low-resource dialect without standard orthography. We then set out to evaluate models trained on English in a zero-shot scenario on our target language variants. Our evaluation reveals that translated data can produce overly optimistic scores. However, the error patterns in translated and natural datasets are highly similar. Cross-dataset experiments demonstrate that data collection methods influence performance, with scores lower than those achieved with single-dataset translations. This work contributes to enhancing SID datasets for underrepresented languages, yielding NaLiBaSID, a new evaluation dataset for Bavarian and Lithuanian. 2024.lrec-main.1297 @@ -15286,8 +15286,8 @@ PierreLepagnol ThomasGerald SaharGhannay - ChristopheServan - SophieRosset + ChristopheServan + SophieRosset 14923–14936 This study is part of the debate on the efficiency of large versus small language models for text classification by prompting. We assess the performance of small language models in zero-shot text classification, challenging the prevailing dominance of large models. Across 15 datasets, our investigation benchmarks language models from 77M to 40B parameters using different architectures and scoring functions. Our findings reveal that small models can effectively classify texts, getting on par with or surpassing their larger counterparts. We developed and shared a comprehensive open-source repository that encapsulates our methodologies. This research underscores the notion that bigger isn’t always better, suggesting that resource-efficient small models may offer viable solutions for specific data classification challenges. 2024.lrec-main.1299 @@ -15344,7 +15344,7 @@ AnkitaBhaumik NingSa GregoriosKatsios - TomekStrzalkowski + TomekStrzalkowski 14984–14994 Social media platforms are popular tools for disseminating targeted information during major public events like elections or pandemics. Systematic analysis of the message traffic can provide valuable insights into prevailing opinions and social dynamics among different segments of the population. We are specifically interested in influence spread, and in particular whether more deliberate influence operations can be detected. However, filtering out the essential messages with telltale influence indicators from the extensive and often chaotic social media traffic is a major challenge.In this paper we present a novel approach to extract influence indicators from messages circulating among groups of users discussing particular topics. We build upon the the concept of a convo to identify influential authors who are actively promoting some particular agenda around that topic within the group. We focus on two influence indicators: the (control of) agenda and the use of emotional language. 2024.lrec-main.1303 @@ -15357,7 +15357,7 @@ YandaChen AmithAnanthram Colin WayneLeach - KathleenMcKeown + KathleenMcKeown 14995–15011 There are many settings where it is useful to predict and explain the success or failure of a dialogue. Circumplex theory from psychology models the social orientations (e.g., Warm-Agreeable, Arrogant-Calculating) of conversation participants and can be used to predict and explain the outcome of social interactions. Our work is novel in its systematic application of social orientation tags to modeling conversation outcomes. In this paper, we introduce a new data set of dialogue utterances machine-labeled with social orientation tags. We show that social orientation tags improve task performance, especially in low-resource settings, on both English and Chinese language benchmarks. We also demonstrate how social orientation tags help explain the outcomes of social interactions when used in neural models. Based on these results showing the utility of social orientation tags for dialogue outcome prediction tasks, we release our data sets, code, and models that are fine-tuned to predict social orientation tags on dialogue utterances. 2024.lrec-main.1304 @@ -15366,7 +15366,7 @@ <fixed-case>S</fixed-case>oft<fixed-case>MCL</fixed-case>: Soft Momentum Contrastive Learning for Fine-grained Sentiment-aware Pre-training JinWang - Liang-ChihYu + Liang-ChihYu XuejieZhang 15012–15023 The pre-training for language models captures general language understanding but fails to distinguish the affective impact of a particular context to a specific word. Recent works have sought to introduce contrastive learning (CL) for sentiment-aware pre-training in acquiring affective information. Nevertheless, these methods present two significant limitations. First, the compatibility of the GPU memory often limits the number of negative samples, hindering the opportunities to learn good representations. In addition, using only a few sentiment polarities as hard labels, e.g., positive, neutral, and negative, to supervise CL will force all representations to converge to a few points, leading to the issue of latent space collapse. This study proposes a soft momentum contrastive learning (SoftMCL) for fine-grained sentiment-aware pre-training. Instead of hard labels, we introduce valence ratings as soft-label supervision for CL to fine-grained measure the sentiment similarities between samples. The proposed SoftMCL conducts CL on both the word- and sentence-level to enhance the model’s ability to learn affective information. A momentum queue was introduced to expand the contrastive samples, allowing storing and involving more negatives to overcome the limitations of hardware platforms. Extensive experiments were conducted on four different sentiment-related tasks, which demonstrates the effectiveness of the proposed SoftMCL method. The code and data of the proposed SoftMCL is available at: https://www.github.com/wangjin0818/SoftMCL/. @@ -15435,7 +15435,7 @@ <fixed-case>SPACE</fixed-case>-<fixed-case>IDEAS</fixed-case>: A Dataset for Salient Information Detection in Space Innovation AndresGarcia-Silva CristianBerrio - Jose ManuelGomez-Perez + Jose ManuelGomez-Perez 15087–15092 Detecting salient parts in text using natural language processing has been widely used to mitigate the effects of information overflow. Nevertheless, most of the datasets available for this task are derived mainly from academic publications. We introduce SPACE-IDEAS, a dataset for salient information detection from innovation ideas related to the Space domain. The text in SPACE-IDEAS varies greatly and includes informal, technical, academic and business-oriented writing styles. In addition to a manually annotated dataset we release an extended version that is annotated using a large generative language model. We train different sentence and sequential sentence classifiers, and show that the automatically annotated dataset can be leveraged using multitask learning to train better classifiers. 2024.lrec-main.1311 @@ -15457,7 +15457,7 @@ JenniferTracey AnnO’Brien SongChen - StephanieStrassel + StephanieStrassel 15105–15113 We present a new approach to event annotation designed to promote whole-corpus understanding of complex events in multilingual, multimedia data as part of the DARPA Knowledge-directed Artificial Intelligence Reasoning Over Schemas (KAIROS) Program. KAIROS aims to build technology capable of reasoning about complex real-world events like a specific terrorist attack in order to provide actionable insights to end users. KAIROS systems extract events from a corpus, aggregate information into a coherent semantic representation, and instantiate observed events or predict unseen but expected events using a relevant event schema selected from a generalized schema library. To support development and testing for KAIROS Phase 2B we created a complex event annotation corpus that, instead of individual event mentions anchored in document spans with pre-defined event type labels, comprises a series of temporally ordered event frames populated with information aggregated from the whole corpus and labeled with an unconstrained tag set based on Wikidata Qnodes. The corpus makes a unique contribution to the resource landscape for information extraction, addressing gaps in the availability of multilingual, multimedia corpora for schema-based event representation. The corpus will be made available through publication in the Linguistic Data Consortium (LDC) catalog. 2024.lrec-main.1313 @@ -15489,7 +15489,7 @@ AleixSant Gerard I.Gállego DavidDale - Marta R.Costa-jussà + Marta R.Costa-jussà 15137–15146 Speech-to-Speech and Speech-to-Text translation are currently dynamic areas of research. In our commitment to advance these fields, we present SpeechAlign, a framework designed to evaluate the underexplored field of source-target alignment in speech models. The SpeechAlign framework has two core components. First, to tackle the absence of suitable evaluation datasets, we introduce the Speech Gold Alignment dataset, built upon a English-German text translation gold alignment dataset. Secondly, we introduce two novel metrics, Speech Alignment Error Rate (SAER) and Time-weighted Speech Alignment Error Rate (TW-SAER), which enable the evaluation of alignment quality within speech models. While the former gives equal importance to each word, the latter assigns weights based on the length of the words in the speech signal. By publishing SpeechAlign we provide an accessible evaluation framework for model assessment, and we employ it to benchmark open-source Speech Translation models. In doing so, we contribute to the ongoing research progress within the fields of Speech-to-Speech and Speech-to-Text translation. 2024.lrec-main.1316 @@ -15543,7 +15543,7 @@ <fixed-case>SPLICE</fixed-case>: A Singleton-Enhanced <fixed-case>P</fixed-case>ipe<fixed-case>LI</fixed-case>ne for Coreference <fixed-case>RE</fixed-case>solution YilunZhu SiyaoPeng - SameerPradhan + SameerPradhan AmirZeldes 15191–15201 Singleton mentions, i.e. entities mentioned only once in a text, are important to how humans understand discourse from a theoretical perspective. However previous attempts to incorporate their detection in end-to-end neural coreference resolution for English have been hampered by the lack of singleton mention spans in the OntoNotes benchmark. This paper addresses this limitation by combining predicted mentions from existing nested NER systems and features derived from OntoNotes syntax trees. With this approach, we create a near approximation of the OntoNotes dataset with all singleton mentions, achieving ~94% recall on a sample of gold singletons. We then propose a two-step neural mention and coreference resolution system, named SPLICE, and compare its performance to the end-to-end approach in two scenarios: the OntoNotes test set and the out-of-domain (OOD) OntoGUM corpus. Results indicate that reconstructed singleton training yields results comparable to end-to-end systems for OntoNotes, while improving OOD stability (+1.1 avg. F1). We conduct error analysis for mention detection and delve into its impact on coreference clustering, revealing that precision improvements deliver more substantial benefits than increases in recall for resolving coreference chains. @@ -15612,7 +15612,7 @@ IsabelleLorge LiZhang XiaowenDong - JanetPierrehumbert + JanetPierrehumbert 15273–15284 The rise of social media platforms has led to an increase in polarised online discussions, especially on political and socio-cultural topics such as elections and climate change. We propose a simple and entirely novel unsupervised method to better predict whether the authors of two posts agree or disagree, leveraging user stances about named entities obtained from their posts. We present STEntConv, a model which builds a graph of users and named entities weighted by stance and trains a Signed Graph Convolutional Network (SGCN) to detect disagreement between comment and reply posts. We run experiments and ablation studies and show that including this information improves disagreement detection performance on a dataset of Reddit posts for a range of controversial subreddit topics, without the need for platform-specific features or user history 2024.lrec-main.1327 @@ -15624,7 +15624,7 @@ LianweiWu LinyongWang SensenGuo - YangLiu + YangLiu 15285–15295 Recently, the autoregressive framework based on large language models (LLMs) has achieved excellent performance in controlling the generated text to adhere to the required style. These methods guide LLMs through prompt learning to generate target text in an autoregressive manner. However, this manner possesses lower controllability and suffers from the challenge of accumulating errors, where early prediction inaccuracies might influence subsequent word generation. Furthermore, existing prompt-based methods overlook specific region editing, resulting in a deficiency of localized control over input text. To overcome these challenges, we propose a novel three-stage prompt-based approach for specific region editing. To alleviate the issue of accumulating errors, we transform the text style transfer task into a text infilling task, guiding the LLMs to modify only a small portion of text within the editing region to achieve style transfer, thus reducing the number of autoregressive iterations. To achieve an effective specific editing region, we adopt both prompt-based and word frequency-based strategies for region selection, subsequently employing a discriminator to validate the efficacy of the selected region. Experiments conducted on several publicly competitive datasets for text style transfer task confirm that our proposed approach achieves state-of-the-art performance. Keywords: text style transfer, natural language generation, large language models 2024.lrec-main.1328 @@ -15699,7 +15699,7 @@ ShichenLi ZhongqingWang YanzhiXu - GuodongZhou + GuodongZhou 15373–15383 Employing pre-trained generation models for cross-domain aspect-based sentiment classification has recently led to large improvements. However, they ignore the importance of syntactic structures, which have shown appealing effectiveness in classification based models. Different from previous studies, efficiently encoding the syntactic structure in generation model is challenging because such models are pretrained on natural language, and modeling structured data may lead to catastrophic forgetting of distributional knowledge. In this study, we propose a novel structure-aware generation model to tackle this challenge. In particular, a prompt-driven strategy is designed to bridge the gap between different domains, by capturing implicit syntactic information from the input and output sides. Furthermore, the syntactic structure is explicitly encoded into the structure-aware generation model, which can effectively learn domain-irrelevant features based on syntactic pivot features. Empirical results demonstrate the effectiveness of the proposed structure-aware generation model over several strong baselines. The results also indicate the proposed model is capable of leveraging the input syntactic structure into the generation model. 2024.lrec-main.1335 @@ -15725,7 +15725,7 @@ XiangyuDuan ZhenyuQiu TongZhang - JunhuiLi + JunhuiLi HaoYang MinZhang 15398–15409 @@ -15740,7 +15740,7 @@ ZhihengXi TaoGui QiZhang - XuanjingHuang + XuanjingHuang 15410–15421 Deep neural networks (DNNs) are notoriously vulnerable to adversarial attacks that place carefully crafted perturbations on normal examples to fool DNNs. To better understand such attacks, a characterization of the features carried by adversarial examples is needed. In this paper, we tackle this challenge by inspecting the subspaces of sample features through spectral analysis. We first empirically show that the features of either clean signals or adversarial perturbations are redundant and span in low-dimensional linear subspaces respectively with minimal overlap, and the classical low-dimensional subspace projection can suppress perturbation features out of the subspace of clean signals. This makes it possible for DNNs to learn a subspace where only features of clean signals exist while those of perturbations are discarded, which can facilitate the distinction of adversarial examples. To prevent the residual perturbations that is inevitable in subspace learning, we propose an independence criterion to disentangle clean signals from perturbations. Experimental results show that the proposed strategy enables the model to inherently suppress adversaries, which not only boosts model robustness but also motivates new directions of effective adversarial defense. 2024.lrec-main.1338 @@ -15759,7 +15759,7 @@ ŠpelaArhar Holdt JakaČibej KajaDobrovoljc - TomažErjavec + TomažErjavec PolonaGantar SimonKrek TinaMunda @@ -15787,7 +15787,7 @@ <fixed-case>S</fixed-case>wiss<fixed-case>SL</fixed-case>i: The Multi-parallel Sign Language Corpus for <fixed-case>S</fixed-case>witzerland ZifanJiang - AnneGöhring + AnneGöhring AmitMoryossef RicoSennrich SarahEbling @@ -15965,7 +15965,7 @@ <fixed-case>TARIC</fixed-case>-<fixed-case>SLU</fixed-case>: A <fixed-case>T</fixed-case>unisian Benchmark Dataset for Spoken Language Understanding SalimaMdhaffar FethiBougares - Renatode Mori + Renatode Mori SalahZaiem MircoRavanelli YannickEstève @@ -15990,7 +15990,7 @@ Task-agnostic Distillation of Encoder-Decoder Language Models - ChenZhang + ChenZhang YangYang QiuchiLi JingangWang @@ -16027,7 +16027,7 @@ YunpengLi JiaruiZhang XingshengZhang - HeyanHuang + HeyanHuang 15685–15697 Large Language Models (LLMs) have achieved impressive results in Machine Translation by simply following instructions, even without training on parallel data. However, LLMs still face challenges on low-resource languages due to the lack of pre-training data. In real-world situations, humans can become proficient in their native languages through abundant and meaningful social interactions and can also learn foreign languages effectively using well-organized textbooks. Drawing inspiration from human learning patterns, we introduce the Translate After LEarNing Textbook (TALENT) approach, which aims to enhance LLMs’ ability to translate low-resource languages by learning from a textbook. TALENT follows a step-by-step process: (1) Creating a Textbook for low-resource languages. (2) Guiding LLMs to absorb the Textbook’s content for Syntax Patterns. (3) Enhancing translation by utilizing the Textbook and Syntax Patterns. We thoroughly assess TALENT’s performance using 112 low-resource languages from FLORES-200 with two LLMs: ChatGPT and BLOOMZ. Evaluation across three different metrics reveals that TALENT consistently enhances translation performance by 14.8% compared to zero-shot baselines. Further analysis demonstrates that TALENT not only improves LLMs’ comprehension of low-resource languages but also equips them with the knowledge needed to generate accurate and fluent sentences in these languages. 2024.lrec-main.1362 @@ -16048,7 +16048,7 @@ GopichandKanumolu LokeshMadasu NirmalSurange - ManishShrivastava + ManishShrivastava 15711–15720 News headline generation is a crucial task in increasing productivity for both the readers and producers of news. This task can easily be aided by automated News headline-generation models. However, the presence of irrelevant headlines in scraped news articles results in sub-optimal performance of generation models. We propose that relevance-based headline classification can greatly aid the task of generating relevant headlines. Relevance-based headline classification involves categorizing news headlines based on their relevance to the corresponding news articles. While this task is well-established in English, it remains under-explored in low-resource languages like Telugu due to a lack of annotated data. To address this gap, we present TeClass, the first-ever human-annotated Telugu news headline classification dataset, containing 78,534 annotations across 26,178 article-headline pairs. We experiment with various baseline models and provide a comprehensive analysis of their results. We further demonstrate the impact of this work by fine-tuning various headline generation models using TeClass dataset. The headlines generated by the models fine-tuned on highly relevant article-headline pairs, showed about a 5 point increment in the ROUGE-L scores. To encourage future research, the annotated dataset as well as the annotation guidelines will be made publicly available. 2024.lrec-main.1364 @@ -16069,7 +16069,7 @@ Tell Me Again! a Large-Scale Dataset of Multiple Summaries for the Same Story Hans OleHatzel - ChrisBiemann + ChrisBiemann 15732–15741 A wide body of research is concerned with the semantics of narratives, both in terms of understanding narratives and generating fictional narratives and stories. We provide a dataset of summaries to be used as a proxy for entire stories or for the analysis of the summaries themselves. Our dataset consists of a total of 96,831 individual summaries across 29,505 stories. We intend for the dataset to be used for training and evaluation of embedding representations for stories, specifically the stories’ narratives. The summary data is harvested from five different language versions of Wikipedia. Our dataset comes with rich metadata, which we extract from Wikidata, enabling a wide range of applications that operate on story summaries in conjunction with metadata. To set baseline results, we run retrieval experiments on the dataset, exploring the capability of similarity models in retrieving summaries of the same story. For this retrieval, a crucial element is to not place too much emphasis on the named entities, as this can enable retrieval of other summaries for the same work without taking the narrative into account. 2024.lrec-main.1366 @@ -16101,7 +16101,7 @@ text2story: A Python Toolkit to Extract and Visualize Story Components of Narrative Text EvelinAmorim RicardoCampos - AlipioJorge + AlipioJorge PedroMota RúbenAlmeida 15761–15772 @@ -16145,7 +16145,7 @@ Text Style Transfer Evaluation Using Large Language Models - PhilOstheimer + PhilOstheimer MayankNagda MariusKloft SophieFellenz @@ -16167,10 +16167,10 @@ Textual Coverage of Eventive Entries in Lexical Semantic Resources - EvaFučíková + EvaFučíková Cristina FernándezAlcaina - JanHajič - ZdeňkaUrešová + JanHajič + ZdeňkaUrešová 15835–15841 This short paper focuses on the coverage of eventive entries (verbs, predicates, etc.) of some well-known lexical semantic resources when applied to random running texts taken from the internet. While coverage gaps are often reported for manually created lexicons (which is the case of most semantically-oriented lexical ones), it was our aim to quantify these gaps, cross-lingually, on a new purely textual resource set produced by the HPLT Project from crawled internet data. Several English, German, Spanish and Czech lexical semantic resources (which, for the most part, focus on verbs and predicates) have been selected for this experiment. We also describe the challenges related to the fact that these resources are (to a varying extent) semantically oriented, meaning that the texts have to be preprocessed to obtain lemmas (base forms) and some types of MWEs before the coverage can be reasonably evaluated, and thus the results are necessarily only approximate. The coverage of these resources, with some exclusions as described in the paper, range from 41.00% to 97.33%, confirming the need to expand at least some - even well-known - resources to cover the prevailing source of today’s textual resources with regard to lexical units describing events or states (or possibly other eventive mentions). 2024.lrec-main.1375 @@ -16197,7 +16197,7 @@ The Corpus <fixed-case>AIKIA</fixed-case>: Using Ranking Annotation for Offensive Language Detection in <fixed-case>M</fixed-case>odern <fixed-case>G</fixed-case>reek - StellaMarkantonatou + StellaMarkantonatou VivianStamou ChristinaChristodoulou GeorgiaApostolopoulou @@ -16256,7 +16256,7 @@ BørreGaup TrondTrosterud Maja LisaKappfjell - SjurMoshagen + SjurMoshagen 15922–15931 Creating language technology based on language data has become very popular with the recent advances of large language models and neural network technologies. This makes language resources very valuable, and especially in case of indigenous languages, the scarce resources are even more precious. Given the good results of simply fetching everything you can from the internet and feeding it to neural networks in English, there has been more work on doing the same for all languages. However, indigenous language resources as they are on the web are not comparable in that they would encode the most recent normativised language in all its aspects. This problematic is further due to not understanding the texts input to models or output by models by the people who work on them. Corpora also have intelligent property rights and copyrights that are not respected. Furthermore, the web is filled with the result of language model -generated texts. In this article we describe an ethical and sustainable way to work with indigenous languages. 2024.lrec-main.1383 @@ -16285,8 +16285,8 @@ The Impact of Stance Object Type on the Quality of Stance Detection - Maxwell A.Weinzierl - Sanda M.Harabagiu + Maxwell A.Weinzierl + Sanda M.Harabagiu 15942–15954 Stance as an expression of an author’s standpoint and as a means of communication has long been studied by computational linguists. Automatically identifying the stance of a subject toward an object is an active area of research in natural language processing. Significant work has employed topics and claims as the object of stance, with frames of communication becoming more recently considered as alternative objects of stance. However, little attention has been paid to finding what are the benefits and what are the drawbacks when inferring the stance of a text towards different possible stance objects. In this paper we seek to answer this question by analyzing the implied knowledge and the judgments required when deciding the stance of a text towards each stance object type. Our analysis informed experiments with models capable of inferring the stance of a text towards any of the stance object types considered, namely topics, claims, and frames of communication. Experiments clearly indicate that it is best to infer the stance of a text towards a frame of communication, rather than a claim or a topic. It is also better to infer the stance of a text towards a claim rather than a topic. Therefore we advocate that rather than continuing efforts to annotate the stance of texts towards topics, it is better to use those efforts to produce annotations towards frames of communication. These efforts will allow us to better capture the stance towards claims and topics as well. 2024.lrec-main.1385 @@ -16324,7 +16324,7 @@ The Onomastic Repertoire of the <fixed-case>R</fixed-case>oman d’Alexandre (<fixed-case>ORNARE</fixed-case>). Designing an Integrated Digital Onomastic Tool for Medieval <fixed-case>F</fixed-case>rench <fixed-case>R</fixed-case>omance MartaMilazzo - Giorgio MariaDi Nunzio + Giorgio MariaDi Nunzio 15982–15987 The paper reports on the first results of the design and implementation of a new digital tool for romance philology: the digital Onomastic Repertoire for the medieval French romance (12th-15th centuries). This tool, projected with a modular and integrable architecture, was implemented from a selection of romances, the corpus of the Medieval French Roman d’Alexandre. After introducing the peculiarities of the onomastic system in the Middle Ages (and, more generally, the peculiarities of medieval literary texts), the paper describes 1) the methodological challenges faced in the preparatory work, illustrates and comments on the first results achieved and 2) the design and implementation of the first integrated system for the interactive creation of the Onomastic Repertoire of the romaN d’AlexandRE (ORNARE), and 3) the current research output in terms of both a digital edition and the digital onomastic index of the corpus. 2024.lrec-main.1389 @@ -16390,7 +16390,7 @@ The <fixed-case>RIP</fixed-case> Corpus of Collaborative Hypothesis-Making EllaSchad JackyVisser - ChrisReed + ChrisReed 16047–16057 The dearth of literature combining hypothesis-making and collaborative problem solving presents a problem in the investigation into how hypotheses are generated in group environments. A new dataset, the Resolving Investigative hyPotheses (RIP) corpus, is introduced to address this issue. The corpus uses the fictionalised environment of a murder investigation game. An artificial environment restricts the number of possible hypotheses compared to real-world situations, allowing a deeper dive into the data. In three groups of three, participants collaborated to solve the mystery: two groups came to the wrong conclusion in different ways, and one succeeded in solving the game. RIP is a 49k-word dialogical corpus, consisting of three sub-corpora, annotated for argumentation and discourse structure on the basis of Inference Anchoring Theory. The corpus shows the emergent roles individuals took on and the strategies the groups employed, showing what can be gained through a deeper exploration of this domain. The corpus bridges the gap between these two areas – hypothesis generation and collaborative problem solving – by using an environment rich with potential for hypothesising within a highly collaborative space. 2024.lrec-main.1395 @@ -16434,7 +16434,7 @@ RóbertSabo KatarínaPolónyiová DanielaOstatníková - ŠtefanBeňuš + ŠtefanBeňuš 16094–16099 This paper presents the Slovak Autistic and Non-Autistic Child Speech Corpus, which consists of audio-recordings and transcripts of collaborative, task-oriented conversations between children (with or without autism spectrum disorder, ASD) and a non-autistic adult experimenter. The task used to elicit this corpus was the Maps task. This corpus was primarily recorded to investigate lexical alignment, but can also be used to study other conversation coordination strategies and behaviours. Scores on various standardised psychometric tests, such as those measuring IQ, executive functioning, and theory of mind, are included for each participant. In total, the corpus contains over 15 hours of speech. This relatively large database contains a non-Germanic language and can be shared with any qualified researcher, making it a valuable resource for replication of existing findings regarding communication and ASD as well as future research into communication between individuals with and without ASD. 2024.lrec-main.1399 @@ -16557,7 +16557,7 @@ Wen-waiYim YujuanFu AsmaBen Abacha - MelihaYetisgen + MelihaYetisgen 16211–16223 Unpredictability, especially unpredictability with unknown error characteristics, is a highly undesirable trait, particularly in medical patient care applications. Although large pre-trained language models (LLM) have been applied to a variety of unseen tasks with highly competitive and successful results, their sensitivity to language inputs and resulting performance variability is not well-studied. In this work, we test state-of-the-art pre-trained language models from a variety of families to characterize their error generation and reliability in medical assessment ability. Particularly, we experiment with general medical assessment multiple choice tests, as well as their open-ended and true-false alternatives. We also profile model consistency, error agreements with each other and to humans; and finally, quantify their ability to recover and explain errors. The findings in this work can be used to give further information about medical models so that modelers can make better-informed decisions rather than relying on standalone performance metrics alone. 2024.lrec-main.1409 @@ -16600,7 +16600,7 @@ <fixed-case>T</fixed-case>ool<fixed-case>R</fixed-case>erank: Adaptive and Hierarchy-Aware Reranking for Tool Retrieval YuanhangZheng PengLi - WeiLiu + WeiLiu YangLiu JianLuan BinWang @@ -16633,7 +16633,7 @@ HangJiang DougBeeferman WeiquanMao - DebRoy + DebRoy 16293–16303 The time at which a message is communicated is a vital piece of metadata in many real-world natural language processing tasks such as Topic Detection and Tracking (TDT). TDT systems aim to cluster a corpus of news articles by event, and in that context, stories that describe the same event are likely to have been written at around the same time. Prior work on time modeling for TDT takes this into account, but does not well capture how time interacts with the semantic nature of the event. For example, stories about a tropical storm are likely to be written within a short time interval, while stories about a movie release may appear over weeks or months. In our work, we design a neural method that fuses temporal and textual information into a single representation of news documents for event detection. We fine-tune these time-aware document embeddings with a triplet loss architecture, integrate the model into downstream TDT systems, and evaluate the systems on two benchmark TDT data sets in English. In the retrospective setting, we apply clustering algorithms to the time-aware embeddings and show substantial improvements over baselines on the News2013 data set. In the online streaming setting, we add our document encoder to an existing state-of-the-art TDT pipeline and demonstrate that it can benefit the overall performance. We conduct ablation studies on the time representation and fusion algorithm strategies, showing that our proposed model outperforms alternative strategies. Finally, we probe the model to examine how it handles recurring events more effectively than previous TDT systems. 2024.lrec-main.1416 @@ -16643,7 +16643,7 @@ <fixed-case>T</fixed-case>opic<fixed-case>D</fixed-case>iff: A Topic-enriched Diffusion Approach for Multimodal Conversational Emotion Detection JiaminLuo JingjingWang - GuodongZhou + GuodongZhou 16304–16314 Multimodal Conversational Emotion (MCE) detection, generally spanning across the acoustic, vision and language modalities, has attracted increasing interest in the multimedia community. Previous studies predominantly focus on learning contextual information in conversations with only a few considering the topic information in single language modality, while always neglecting the acoustic and vision topic information. On this basis, we propose a model-agnostic Topic-enriched Diffusion (TopicDiff) approach for capturing multimodal topic information in MCE tasks. Particularly, we integrate the diffusion model into neural topic model to alleviate the diversity deficiency problem of neural topic model in capturing topic information. Detailed evaluations demonstrate the significant improvements of TopicDiff over the state-of-the-art MCE baselines, justifying the importance of multimodal topic information to MCE and the effectiveness of TopicDiff in capturing such information. Furthermore, we observe an interesting finding that the topic information in acoustic and vision is more discriminative and robust compared to the language. 2024.lrec-main.1417 @@ -16682,7 +16682,7 @@ Towards a <fixed-case>D</fixed-case>anish Semantic Reasoning Benchmark - Compiled from Lexical-Semantic Resources for Assessing Selected Language Understanding Capabilities of Large Language Models - BolettePedersen + BolettePedersen NathalieSørensen SussiOlsen SanniNimb @@ -16706,7 +16706,7 @@ ShinkaMori OanaIgnat AndrewLee - RadaMihalcea + RadaMihalcea 16378–16391 Synthetic data generation has the potential to impact applications and domains with scarce data. However, before such data is used for sensitive tasks such as mental health, we need an understanding of how different demographics are represented in it. In our paper, we analyze the potential of producing synthetic data using GPT-3 by exploring the various stressors it attributes to different race and gender combinations, to provide insight for future researchers looking into using LLMs for data generation. Using GPT-3, we develop HeadRoom, a synthetic dataset of 3,120 posts about depression-triggering stressors, by controlling for race, gender, and time frame (before and after COVID-19). Using this dataset, we conduct semantic and lexical analyses to (1) identify the predominant stressors for each demographic group; and (2) compare our synthetic data to a human-generated dataset. We present the procedures to generate queries to develop depression data using GPT-3, and conduct analyzes to uncover the types of stressors it assigns to demographic groups, which could be used to test the limitations of LLMs for synthetic data generation for depression data. Our findings show that synthetic data mimics some of the human-generated data distribution for the predominant depression stressors across diverse demographics. 2024.lrec-main.1423 @@ -16715,7 +16715,7 @@ Towards an Ideal Tool for Learner Error Annotation ŠpelaArhar Holdt - TomažErjavec + TomažErjavec IztokKosem ElenaVolodina 16392–16398 @@ -16725,7 +16725,7 @@ Towards Answering Health-related Questions from Medical Videos: Datasets and Approaches - DeepakGupta + DeepakGupta KushAttal DinaDemner-Fushman 16399–16411 @@ -16736,7 +16736,7 @@ Towards a Unified Taxonomy of Deep Syntactic Relations KiraDroganova - DanielZeman + DanielZeman 16412–16421 This paper analyzes multiple deep-syntactic frameworks with the goal of creating a proposal for a set of universal semantic role labels. The proposal examines various theoretic linguistic perspectives and focuses on Meaning-Text Theory and Functional Generative Description frameworks and PropBank. The research is based on the data from four Indo-European and one Uralic language – Spanish and Catalan (Taulé et al., 2011), Czech (Hajič et al., 2017), English (Hajič et al., 2012), and Finnish (Haverinen et al., 2015). Updated datasets with the new universal semantic role labels are now publicly available as a result of our work. Nevertheless, our proposal is oriented towards Universal Dependencies (UD) (de Marneffe et al., 2021) and our ultimate goal is to apply a subset of the universal labels to the full UD data. 2024.lrec-main.1426 @@ -16795,7 +16795,7 @@ Towards Cost-effective Multi-style Conversations: A Pilot Study in Task-oriented Dialogue Generation TizianoLabruna - BernardoMagnini + BernardoMagnini 16473–16479 Conversations exhibit significant variation when different styles are employed by participants, often leading to subpar performance when a dialogue model is exclusively trained on single-style datasets. We present a cost-effective methodology for generating multi-style conversations, which can be used in the development of conversational agents. This methodology only assumes the availability of a conversational domain, such as a knowledge base, and leverages the generative capabilities of large language models. In a pilot study focused on the generation aspect of task-oriented dialogues, we extended the well-known MultiWOZ dataset to encompass multi-style variations. Our findings highlight two key experimental outcomes: (i) these novel resources pose challenges for current single-style models, and (ii) multi-style resources enhance the dialogue model’s resilience to stylistic variations. 2024.lrec-main.1431 @@ -16805,7 +16805,7 @@ Towards Dog Bark Decoding: Leveraging Human Speech Processing for Automated Bark Classification ArtemAbzaliev HumbertoPerez-Espinosa - RadaMihalcea + RadaMihalcea 16480–16486 Similar to humans, animals make extensive use of verbal and non-verbal forms of communication, including a large range of audio signals. In this paper, we address dog vocalizations and explore the use of self-supervised speech representation models pre-trained on human speech to address dog bark classification tasks that find parallels in human-centered tasks in speech recognition. We specifically address four tasks: dog recognition, breed identification, gender classification, and context grounding. We show that using speech embedding representations significantly improves over simpler classification baselines. Further, we also find that models pre-trained on large human speech acoustics can provide additional performance boosts on several tasks. 2024.lrec-main.1432 @@ -16960,7 +16960,7 @@ Towards Robust In-Context Learning for Machine Translation with Large Language Models ShaolinZhu MenglongCui - DeyiXiong + DeyiXiong 16619–16629 Using large language models (LLMs) for machine translation via in-context learning (ICL) has become an interesting research direction of machine translation (MT) in recent years. Its main idea is to retrieve a few translation pairs as demonstrations from an additional datastore (parallel corpus) to guide translation without updating the LLMs. However, the underlying noise of retrieved demonstrations usually dramatically deteriorate the performance of LLMs. In this paper, we propose a robust method to enable LLMs to achieve robust translation with ICL. The method incorporates a multi-view approach, considering both sentence- and word-level information, to select demonstrations that effectively avoid noise. At the sentence level, a margin-based score is designed to avoid semantic noise. At the word level, word embeddings are utilized to evaluate the related tokens and change the weight of words in demonstrations. By considering both sentence- and word-level similarity, the proposed method provides fine-grained demonstrations that effectively prompt the translation of LLMs. Experimental results demonstrate the effectiveness of our method, particularly in domain adaptation. 2024.lrec-main.1444 @@ -16985,7 +16985,7 @@ Towards Semantic Tagging for <fixed-case>I</fixed-case>rish TimCzerniak - ElaineUí Dhonnchadha + ElaineUí Dhonnchadha 16643–16652 Well annotated corpora have been shown to have great value, both in linguistic and non-linguistic research, and in supporting machine-learning and many other non-research activities including language teaching. For minority languages, annotated corpora can help in understanding language usage norms among native and non-native speakers, providing valuable information both for lexicography and for teaching, and helping to combat the decline of speaker numbers. At the same time, minority languages suffer from having fewer available language resources than majority languages, and far less-developed annotation tooling. To date there is very little work in semantic annotation for Irish. In this paper we report on progress to date in the building of a standard tool-set for semantic annotation of Irish, including a novel method for evaluation of semantic annotation. A small corpus of Irish language data has been manually annotated with semantic tags, and manually checked. A semantic type tagging framework has then been developed using existing technologies, and using a semantic lexicon that has been built from a variety of sources. Semantic disambiguation methods have been added with a view to increasing accuracy. That framework has then been tested using the manually tagged corpus, resulting in over 90% lexical coverage and almost 80% tag accuracy. Development is ongoing as part of a larger corpus development project, and plans include expansion of the manually tagged corpus, expansion of the lexicon, and exploration of further disambiguation methods. As the first semantic tagger for Irish, to our knowledge, it is hoped that this research will form a sound basis for semantic annotation of Irish corpora in to the future. 2024.lrec-main.1446 @@ -16995,7 +16995,7 @@ Towards Standardized Annotation and Parsing for <fixed-case>K</fixed-case>orean <fixed-case>F</fixed-case>rame<fixed-case>N</fixed-case>et YigeChen JaeIhn - KyungTaeLim + KyungTaeLim JungyeulPark 16653–16658 Previous research on Korean FrameNet has produced several datasets that serve as resources for FrameNet parsing in Korean. However, these datasets suffer from the problem that annotations are assigned on the word level, which is not optimally designed based on the agglutinative feature of Korean. To address this issue, we introduce a morphologically enhanced annotation strategy for Korean FrameNet datasets and parsing by leveraging the CoNLL-U format. We present the results of the FrameNet parsers trained on the Korean FrameNet data in the original format and our proposed format, respectively, and further elaborate on the linguistic rationales of our proposed scheme. We suggest the morpheme-based scheme to be the standard of Korean FrameNet data annotation. @@ -17015,7 +17015,7 @@ Towards Understanding the Relationship between In-context Learning and Compositional Generalization SungjunHan - SebastianPadó + SebastianPadó 16664–16679 According to the principle of compositional generalization, the meaning of a complex expression can be understood as a function of the meaning of its parts and of how they are combined. This principle is crucial for human language processing and also, arguably, for NLP models in the face of out-of-distribution data. However, many neural network models, including Transformers, have been shown to struggle with compositional generalization. In this paper, we hypothesize that forcing models to in-context learn can provide an inductive bias to promote compositional generalization. To test this hypothesis, we train a causal Transformer in a setting that renders ‘ordinary’ learning very difficult: we present it with different orderings of the training instance and shuffle instance labels. This corresponds to training the model on all possible few-shot learning problems attainable from the dataset. The model can solve the task, however, by utilizing earlier examples to generalize to later ones – i.e., in-context learning. In evaluations on the datasets, SCAN, COGS, and GeoQuery, models trained in this manner indeed show improved compositional generalization. This indicates the usefulness of in-context learning problems as an inductive bias for generalization. 2024.lrec-main.1449 @@ -17227,7 +17227,7 @@ <fixed-case>T</fixed-case>weet<fixed-case>TER</fixed-case>: A Benchmark for Target Entity Retrieval on <fixed-case>T</fixed-case>witter without Knowledge Bases KiamehrRezaee - JoseCamacho-Collados + JoseCamacho-Collados Mohammad TaherPilehvar 16890–16896 Entity linking is a well-established task in NLP consisting of associating entity mentions with entries in a knowledge base. Current models have demonstrated competitive performance in standard text settings. However, when it comes to noisy domains such as social media, certain challenges still persist. Typically, to evaluate entity linking on existing benchmarks, a comprehensive knowledge base is necessary and models are expected to possess an understanding of all the entities contained within the knowledge base. However, in practical scenarios where the objective is to retrieve sentences specifically related to a particular entity, strict adherence to a complete understanding of all entities in the knowledge base may not be necessary. To address this gap, we introduce TweetTER (Tweet Target Entity Retrieval), a novel benchmark that aims to bridge the challenges in entity linking. The distinguishing feature of this benchmark is its approach of re-framing entity linking as a binary entity retrieval task. This enables the evaluation of language models’ performance without relying on a conventional knowledge base, providing a more practical and versatile evaluation framework for assessing the effectiveness of language models in entity retrieval tasks. @@ -17239,7 +17239,7 @@ MarcoCognetta VilémZouhar SangwhanMoon - NaoakiOkazaki + NaoakiOkazaki 16897–16906 In Tokenization and the Noiseless Channel (Zouhar et al., 2023), Rényi efficiency is suggested as an intrinsic mechanism for evaluating a tokenizer: for NLP tasks, the tokenizer which leads to the highest Rényi efficiency of the unigram distribution should be chosen. The Rényi efficiency is thus treated as a predictor of downstream performance (e.g., predicting BLEU for a machine translation task), without the expensive step of training multiple models with different tokenizers. Although useful, the predictive power of this metric is not perfect, and the authors note there are additional qualities of a good tokenization scheme that Rényi efficiency alone cannot capture. We describe two variants of BPE tokenization which can arbitrarily increase Rényi efficiency while decreasing the downstream model performance. These counterexamples expose cases where Rényi efficiency fails as an intrinsic tokenization metric and thus give insight for building more accurate predictors. 2024.lrec-main.1469 @@ -17267,8 +17267,8 @@ ArthurLorenzi NuritMelnik ArchnaBhatia - HinrichSchütze - LoriLevin + HinrichSchütze + LoriLevin AmirZeldes JoakimNivre WilliamCroft @@ -17323,7 +17323,7 @@ GregoriosKatsios NingSa AnkitaBhaumik - TomekStrzalkowski + TomekStrzalkowski 16984–16997 The behavior and decision making of groups or communities can be dramatically influenced by individuals pushing particular agendas, e.g., to promote or disparage a person or an activity, to call for action, etc.. In the examination of online influence campaigns, particularly those related to important political and social events, scholars often concentrate on identifying the sources responsible for setting and controlling the agenda (e.g., public media). In this article we present a methodology for detecting specific instances of agenda control through social media where annotated data is limited or non-existent. By using a modest corpus of Twitter messages centered on the 2022 French Presidential Elections, we carry out a comprehensive evaluation of various approaches and techniques that can be applied to this problem. Our findings demonstrate that by treating the task as a textual entailment problem, it is possible to overcome the requirement for a large annotated training dataset. 2024.lrec-main.1476 @@ -17422,7 +17422,7 @@ FeiMi WeichaoWang YashengWang - Kam-FaiWong + Kam-FaiWong 17074–17086 Conversational retrieval refers to an information retrieval system that operates in an iterative and interactive manner, requiring the retrieval of various external resources, such as persona, knowledge, and even response, to effectively engage with the user and successfully complete the dialogue. However, most previous work trained independent retrievers for each specific resource, resulting in sub-optimal performance and low efficiency. Thus, we propose a multi-task framework function as a universal retriever for three dominant retrieval tasks during the conversation: persona selection, knowledge selection, and response selection. To this end, we design a dual-encoder architecture consisting of a context-adaptive dialogue encoder and a candidate encoder, aiming to attention to the relevant context from the long dialogue and retrieve suitable candidates by simply a dot product. Furthermore, we introduce two loss constraints to capture the subtle relationship between dialogue context and different candidates by regarding historically selected candidates as hard negatives. Extensive experiments and analysis establish state-of-the-art retrieval quality both within and outside its training domain, revealing the promising potential and generalization capability of our model to serve as a universal retriever for different candidate selection tasks simultaneously. 2024.lrec-main.1483 @@ -17430,19 +17430,19 @@ Universal Anaphora: The First Three Years - MassimoPoesio + MassimoPoesio MaciejOgrodniczuk VincentNg - SameerPradhan + SameerPradhan JuntaoYu - Nafise SadatMoosavi + Nafise SadatMoosavi SilviuPaun AmirZeldes AnnaNedoluzhko MichalNovák MartinPopel - ZdeněkŽabokrtský - DanielZeman + ZdeněkŽabokrtský + DanielZeman 17087–17100 The aim of the Universal Anaphora initiative is to push forward the state of the art in anaphora and anaphora resolution by expanding the aspects of anaphoric interpretation which are or can be reliably annotated in anaphoric corpora, producing unified standards to annotate and encode these annotations, delivering datasets encoded according to these standards, and developing methods for evaluating models that carry out this type of interpretation. Although several papers on aspects of the initiative have appeared, no overall description of the initiative’s goals, proposals and achievements has been published yet except as an online draft. This paper aims to fill this gap, as well as to discuss its progress so far. 2024.lrec-main.1484 @@ -17563,7 +17563,7 @@ BozhiWu YushiCao JunzheJiang - YangLiu + YangLiu 17205–17216 Deep learning has introduced significant improvements in many software analysis tasks. Although the Large Language Models (LLMs) based neural code models demonstrate commendable performance when trained and tested within the intra-project independent and identically distributed (IID) setting, they often struggle to generalize effectively to real-world inter-project out-of-distribution (OOD) data. In this work, we show that this phenomenon is caused by the heavy reliance on project-specific shortcuts for prediction instead of ground-truth evidence. We propose a Cond-Idf measurement to interpret this behavior, which quantifies the relatedness of a token with a label and its project-specificness. The strong correlation between model behavior and the proposed measurement indicates that without proper regularization, models tend to leverage spurious statistical cues for prediction. Equipped with these observations, we propose a novel bias mitigation mechanism that regularizes the model’s learning behavior by leveraging latent logic relations among samples. Experimental results on two representative program analysis tasks indicate that our mitigation framework can improve both inter-project OOD generalization and adversarial robustness, while not sacrificing accuracy on intra-project IID data. 2024.lrec-main.1494 @@ -17595,7 +17595,7 @@ SameeArif SualehaFarid AwaisAthar - Agha AliRaza + Agha AliRaza 17237–17244 This paper introduces UQA, a novel dataset for question answering and text comprehension in Urdu, a low-resource language with over 70 million native speakers. UQA is generated by translating the Stanford Question Answering Dataset (SQuAD2.0), a large-scale English QA dataset, using a technique called EATS (Enclose to Anchor, Translate, Seek), which preserves the answer spans in the translated context paragraphs. The paper describes the process of selecting and evaluating the best translation model among two candidates: Google Translator and Seamless M4T. The paper also benchmarks several state-of-the-art multilingual QA models on UQA, including mBERT, XLM-RoBERTa, and mT5, and reports promising results. For XLM-RoBERTa-XL, we have an F1 score of 85.99 and 74.56 EM. UQA is a valuable resource for developing and testing multilingual NLP systems for Urdu and for enhancing the cross-lingual transferability of existing models. Further, the paper demonstrates the effectiveness of EATS for creating high-quality datasets for other languages and domains. The UQA dataset and the code are publicly available at www.github.com/sameearif/UQA 2024.lrec-main.1497 @@ -17662,8 +17662,8 @@ Using Speech Technology to Test Theories of Phonetic and Phonological Typology AnisiaPopescu - LoriLamel - IoanaVasilescu + LoriLamel + IoanaVasilescu 17321–17325 The present paper uses speech technology derived tools and methodologies to test theories about phonetic typology. We specifically look at how the two-way laryngeal contrast (voiced /b, d, g, v, z/ vs. voiceless /p, t, k, f, s/ obstruents) is implemented in European Portuguese, a language that has been suggested to exhibit a different voicing system than its sister Romance languages, more similar to the one found for Germanic languages. A large European Portuguese corpus was force aligned using (1) different combinations of parallel Portuguese (original), Italian (Romance language) and German (Germanic language) acoustic phone models and letting an ASR system choose the best fitting one, and (2) pronunciation variants (/b, d, g, v, z/ produced as either [b, d, g, v, z] or [p, t, k, f, s]) for obstruent consonants. Results support previous accounts in the literature that European Portuguese is diverging from the traditional voicing system known for Romance language, towards a hybrid system where stops and fricatives are specified for different voicing features. 2024.lrec-main.1503 @@ -17714,10 +17714,10 @@ Verbing Weirds Language (Models): Evaluation of <fixed-case>E</fixed-case>nglish Zero-Derivation in Five <fixed-case>LLM</fixed-case>s - David R.Mortensen + David R.Mortensen ValentinaIzrailevitch YunzeXiao - HinrichSchütze + HinrichSchütze LeonieWeissweiler 17359–17364 Lexical-syntactic flexibility, in the form of conversion (or zero-derivation) is a hallmark of English morphology. In conversion, a word with one part of speech is placed in a non-prototypical context, where it is coerced to behave as if it had a different part of speech. However, while this process affects a large part of the English lexicon, little work has been done to establish the degree to which language models capture this type of generalization. This paper reports the first study on the behavior of large language models with reference to conversion. We design a task for testing lexical-syntactic flexibility—the degree to which models can generalize over words in a construction with a non-prototypical part of speech. This task is situated within a natural language inference paradigm. We test the abilities of five language models—two proprietary models (GPT-3.5 and GPT-4), three open source model (Mistral 7B, Falcon 40B, and Llama 2 70B). We find that GPT-4 performs best on the task, followed by GPT-3.5, but that the open source language models are also able to perform it and that the 7-billion parameter Mistral displays as little difference between its baseline performance on the natural language inference task and the non-prototypical syntactic category task, as the massive GPT-4. @@ -17811,7 +17811,7 @@ MalakRassem ChrisJenkins FilipMiletić - SabineSchulte im Walde + SabineSchulte im Walde 17449–17458 Predicting the compositionality of noun compounds such as climate change and tennis elbow is a vital component in natural language understanding. While most previous computational methods that automatically determine the semantic relatedness between compounds and their constituents have applied a synchronic perspective, the current study investigates what diachronic changes in contexts and semantic topics of compounds and constituents reveal about the compounds’ present-day degrees of compositionality. We define a binary classification task that utilizes two diachronic vector spaces based on contextual co-occurrences and semantic topics, and demonstrate that diachronic changes in cosine similarities – measured over context or topic distributions – uncover patterns that distinguish between compounds with low and high present-day compositionality. Despite fewer dimensions in the topic models, the topic space performs on par with the co-occurrence space and captures rather similar information. Temporal similarities between compounds and modifiers as well as between compounds and their prepositional paraphrases predict the compounds’ present-day compositionality with accuracy >0.7. 2024.lrec-main.1517 @@ -17842,7 +17842,7 @@ JunZhou FeiLi ChongTeng - DonghongJi + DonghongJi 17473–17485 Large Language Models (LLMs) are now being considered as judges of high efficiency to evaluate the quality of answers generated by candidate models. However, their judgments may be influenced by complex scenarios and inherent biases, raising concerns about their reliability. This study aims to bridge this gap by introducing four unexplored factors and examining the performance of LLMs as judges, namely answer quantity, inducing statements, judging strategy, and judging style. Additionally, we introduce a new dimension of question difficulty to provide a more comprehensive understanding of LLMs’ judgments across varying question intricacies. We employ ChatGPT, GPT-4, Gemini, and Claude-2 as judges and conduct experiments on Vicuna Benchmark and MT-bench. Our study reveals that LLMs’ judging abilities are susceptible to the influence of these four factors, and analyzing from the newly proposed dimension of question difficulty is highly necessary. We also provide valuable insights into optimizing LLMs’ performance as judges, enhancing their reliability and adaptability across diverse evaluation scenarios. 2024.lrec-main.1519 @@ -17911,8 +17911,8 @@ When Your Cousin Has the Right Connections: Unsupervised Bilingual Lexicon Induction for Related Data-Imbalanced Languages NiyatiBafna CristinaEspaña-Bonet - Josefvan Genabith - BenoîtSagot + Josefvan Genabith + BenoîtSagot RachelBawden 17544–17556 Most existing approaches for unsupervised bilingual lexicon induction (BLI) depend on good quality static or contextual embeddings requiring large monolingual corpora for both languages. However, unsupervised BLI is most likely to be useful for low-resource languages (LRLs), where large datasets are not available. Often we are interested in building bilingual resources for LRLs against related high-resource languages (HRLs), resulting in severely imbalanced data settings for BLI. We first show that state-of-the-art BLI methods in the literature exhibit near-zero performance for severely data-imbalanced language pairs, indicating that these settings require more robust techniques. We then present a new method for unsupervised BLI between a related LRL and HRL that only requires inference on a masked language model of the HRL, and demonstrate its effectiveness on truly low-resource languages Bhojpuri and Magahi (with <5M monolingual tokens each), against Hindi. We further present experiments on (mid-resource) Marathi and Nepali to compare approach performances by resource range, and release our resulting lexicons for five low-resource Indic languages: Bhojpuri, Magahi, Awadhi, Braj, and Maithili, against Hindi. @@ -17944,7 +17944,7 @@ Who Is Bragging More Online? A Large Scale Analysis of Bragging in Social Media MaliJin - DanielPreotiuc-Pietro + DanielPreotiuc-Pietro A. SezaDoğruöz NikolaosAletras 17575–17587 @@ -17976,7 +17976,7 @@ <fixed-case>W</fixed-case>iki<fixed-case>F</fixed-case>act<fixed-case>D</fixed-case>iff: A Large, Realistic, and Temporally Adaptable Dataset for Atomic Factual Knowledge Update in Causal Language Models HichemAmmar Khodja - FrédéricBéchet + FrédéricBéchet QuentinBrabant AlexisNasr GwénoléLecorvé @@ -18002,10 +18002,10 @@ Willkommens-Merkel, Chaos-<fixed-case>J</fixed-case>ohnson, and Tore-Klose: Modeling the Evaluative Meaning of <fixed-case>G</fixed-case>erman Personal Name Compounds AnneroseEichel TanaDeeg - AndreBlessing + AndreBlessing MilenaBelosevic SabineArndt-Lappe - SabineSchulte im Walde + SabineSchulte im Walde 17637–17650 We present a comprehensive computational study of the under-investigated phenomenon of personal name compounds (PNCs) in German such as Willkommens-Merkel (‘Welcome-Merkel’). Prevalent in news, social media, and political discourse, PNCs are hypothesized to exhibit an evaluative function that is reflected in a more positive or negative perception as compared to the respective personal full name (such as Angela Merkel). We model 321 PNCs and their corresponding full names at discourse level, and show that PNCs bear an evaluative nature that can be captured through a variety of computational methods. Specifically, we assess through valence information whether a PNC is more positively or negatively evaluative than the person’s name, by applying and comparing two approaches using (i) valence norms and (ii) pre-trained language models (PLMs). We further enrich our data with personal, domain-specific, and extra-linguistic information and perform a range of regression analyses revealing that factors including compound and modifier valence, domain, and political party membership influence how a PNC is evaluated. 2024.lrec-main.1534 @@ -18043,7 +18043,7 @@ YuHong ShimingHe QingtingXu - JianminYao + JianminYao 17675–17682 Event Detection (ED) is a task of automatically extracting multi-class trigger words. The understanding of word sense is crucial for ED. In this paper, we utilize context-specific commonsense knowledge to strengthen word sense modeling. Specifically, we leverage a Context-specific Knowledge Selector (CKS) to select the exact commonsense knowledge of words from a large knowledge base, i.e., ConceptNet. Context-specific selection is made in terms of the relevance of knowledge to the living contexts. On this basis, we incorporate the commonsense knowledge into the word-level representations before decoding. ChatGPT is an ideal generative CKS when the prompts are deliberately designed, though it is cost-prohibitive. To avoid the heavy reliance on ChatGPT, we train an offline CKS using the predictions of ChatGPT over a small number of examples (about 9% of all). We experiment on the benchmark ACE-2005 dataset. The test results show that our approach yields substantial improvements compared to the BERT baseline, achieving the F1-score of about 78.3%. All models, source codes and data will be made publicly available. 2024.lrec-main.1537 @@ -18053,7 +18053,7 @@ <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et under Scrutiny: Dictionary Examples in the Era of Large Language Models Fatemah YousefAlmeman StevenSchockaert - LuisEspinosa Anke + LuisEspinosa Anke 17683–17695 Dictionary definitions play a prominent role in a wide range of NLP tasks, for instance by providing additional context about the meaning of rare and emerging terms. Many dictionaries also provide examples to illustrate the prototypical usage of words, which brings further opportunities for training or enriching NLP models. The intrinsic qualities of dictionaries, and related lexical resources such as glossaries and encyclopedias, are however still not well-understood. While there has been significant work on developing best practices, such guidance has been aimed at traditional usages of dictionaries (e.g. supporting language learners), and it is currently unclear how different quality aspects affect the NLP systems that rely on them. To address this issue, we compare WordNet, the most commonly used lexical resource in NLP, with a variety of dictionaries, as well as with examples that were generated by ChatGPT. Our analysis involves human judgments as well as automatic metrics. We furthermore study the quality of word embeddings derived from dictionary examples, as a proxy for downstream performance. We find that WordNet’s examples lead to lower-quality embeddings than those from the Oxford dictionary. Surprisingly, however, the ChatGPT generated examples were found to be most effective overall. 2024.lrec-main.1538 @@ -18130,7 +18130,7 @@ Your Stereotypical Mileage May Vary: Practical Challenges of Evaluating Biases in Multiple Languages and Cultural Contexts KarenFort - LauraAlonso Alemany + LauraAlonso Alemany LucianaBenotti JulienBezançon ClaudiaBorg @@ -18151,7 +18151,7 @@ JavierTorroba Marchante ShilinXie Sergio E.Zanotto - AurélieNévéol + AurélieNévéol 17764–17769 Warning: This paper contains explicit statements of offensive stereotypes which may be upsetting The study of bias, fairness and social impact in Natural Language Processing (NLP) lacks resources in languages other than English. Our objective is to support the evaluation of bias in language models in a multilingual setting. We use stereotypes across nine types of biases to build a corpus containing contrasting sentence pairs, one sentence that presents a stereotype concerning an underadvantaged group and another minimally changed sentence, concerning a matching advantaged group. We build on the French CrowS-Pairs corpus and guidelines to provide translations of the existing material into seven additional languages. In total, we produce 11,139 new sentence pairs that cover stereotypes dealing with nine types of biases in seven cultural contexts. We use the final resource for the evaluation of relevant monolingual and multilingual masked language models. We find that language models in all languages favor sentences that express stereotypes in most bias categories. The process of creating a resource that covers a wide range of language types and cultural settings highlights the difficulty of bias evaluation, in particular comparability across languages and contexts. 2024.lrec-main.1545 @@ -18272,7 +18272,7 @@ Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024): Tutorial Summaries RomanKlinger NaozakiOkazaki - NicolettaCalzolari + NicolettaCalzolari Min-YenKan ELRA and ICCL
Torino, Italia
@@ -18293,7 +18293,7 @@ ZhuoshengZhang FuxiaoLiu AoZhang - Tat-SengChua + Tat-SengChua 1–8 Artificial intelligence (AI) encompasses knowledge acquisition and real-world grounding across various modalities. As a multidisciplinary research field, multimodal large language models (MLLMs) have recently garnered growing interest in both academia and industry, showing an unprecedented trend to achieve human-level AI via MLLMs. These large models offer an effective vehicle for understanding, reasoning, and planning by integrating and modeling diverse information modalities, including language, visual, auditory, and sensory data. This tutorial aims to deliver a comprehensive review of cutting-edge research in MLLMs, focusing on four key areas: MLLM architecture design, instructional learning, multimodal reasoning, and the efficiency of MLLMs. We will explore technical advancements, synthesize key challenges, and discuss potential avenues for future research. 2024.lrec-tutorials.1 @@ -18312,7 +18312,7 @@ Meaning Representations for Natural Languages: Design, Models and Applications JuliaBonn JeffreyFlanigan - JanHajič + JanHajič IshanJindal YunyaoLi NianwenXue @@ -18336,7 +18336,7 @@ Mining, Assessing, and Improving Arguments in <fixed-case>NLP</fixed-case> and the Social Sciences GabriellaLapesa - Eva MariaVecchi + Eva MariaVecchi SerenaVillata HenningWachsmuth 26–32 @@ -18375,7 +18375,7 @@ Formal Semantic Controls over Language Models DaniloSilva de Carvalho YingjiZhang - AndréFreitas + AndréFreitas 50–55 Text embeddings provide a concise representation of the semantics of sentences and larger spans of text, rather than individual words, capturing a wide range of linguistic features. They have found increasing application to a variety of NLP tasks, including machine translation and natural language inference. While most recent breakthroughs in task performance are being achieved by large scale distributional models, there is a growing disconnection between their knowledge representation and traditional semantics, which hinders efforts to capture such knowledge in human interpretable form or explain model inference behaviour. In this tutorial, we examine from basics to the cutting edge research on the analysis and control of text representations, aiming to shorten the gap between deep latent semantics and formal symbolics. This includes the considerations on knowledge formalisation, the linguistic information that can be extracted and measured from distributional models, and intervention techniques that enable explainable reasoning and controllable text generation, covering methods from pooling to LLM-based. 2024.lrec-tutorials.9 @@ -18412,7 +18412,7 @@ KishanMaharaj Arif A.Ahmad AbhijitMishra - PushpakBhattacharyya + PushpakBhattacharyya 73–79 In the landscape of natural language processing (NLP), addressing the challenges of bias and hallucination is paramount to ensuring the ethical and unbiased development of Large Language Models (LLMs). This tutorial delves into the intricate dimensions of LLMs, shedding light on the critical importance of understanding and mitigating the profound impacts of bias and hallucination. Divided into two parts, the first part delves deep into the complexity of bias propagation in LLM development, where we dissect its origins and far-reaching impacts. We then present innovative methodologies for mitigating diverse forms of bias, including dynamic word embeddings and robust benchmarking strategies. The second part of the tutorial discusses hallucination - a prevalent issue in generative AI systems such as LLMs. Through advanced data-driven techniques, we decode its intricate effects and complexities, followed factually-driven mitigation strategies. Furthermore, we shed light on the pivotal role of human cognitive behavior in the context of hallucination, drawing insights from cognitive data, including human eye-tracking data. Ultimately, this cutting-edge tutorial serves as a guiding light, equipping participants with indispensable tools and insights to navigate the ethical complexities of LLMs, thus paving the way for the development of unbiased and ethically robust NLP systems. 2024.lrec-tutorials.12 diff --git a/data/xml/2024.lt4hala.xml b/data/xml/2024.lt4hala.xml index c3b1ab3921..af863ccd9b 100644 --- a/data/xml/2024.lt4hala.xml +++ b/data/xml/2024.lt4hala.xml @@ -3,7 +3,7 @@ Proceedings of the Third Workshop on Language Technologies for Historical and Ancient Languages (LT4HALA) @ LREC-COLING-2024 - RacheleSprugnoli + RacheleSprugnoli MarcoPassarotti ELRA and ICCL
Torino, Italia
@@ -30,7 +30,7 @@ Developing a Part-of-speech Tagger for Diplomatically Edited <fixed-case>O</fixed-case>ld <fixed-case>I</fixed-case>rish Text AdrianDoyle - John P.McCrae + John P.McCrae 11–21 POS-tagging is typically considered a fundamental text preprocessing task, with a variety of downstream NLP tasks and techniques being dependent on the availability of POS-tagged corpora. As such, POS-taggers are important precursors to further NLP tasks, and their accuracy can impact the potential accuracy of these dependent tasks. While a variety of POS-tagging methods have been developed which work well with modern languages, historical languages present orthographic and editorial challenges which require special attention. The effectiveness of POS-taggers developed for modern languages is reduced when applied to Old Irish, with its comparatively complex orthography and morphology. This paper examines some of the obstacles to POS-tagging Old Irish text, and shows that inconsistencies between extant annotated corpora reduce the quantity of data available for use in training POS-taggers. The development of a multi-layer neural network model for POS-tagging Old Irish text is described, and an experiment is detailed which demonstrates that this model outperforms a variety of off-the-shelf POS-taggers. Moreover, this model sets a new benchmark for POS-tagging diplomatically edited Old Irish text. 2024.lt4hala-1.2 @@ -57,9 +57,9 @@ Towards Named-Entity and Coreference Annotation of the <fixed-case>H</fixed-case>ebrew <fixed-case>B</fixed-case>ible - Daniel G.Swanson + Daniel G.Swanson Bryce D.Bussert - FrancisTyers + FrancisTyers 36–40 Named-entity annotation refers to the process of specifying what real-world (or, at least, external-to-the-text) entities various names and descriptions within a text refer to. Coreference annotation, meanwhile, specifies what context-dependent words or phrases, such as pronouns refer to. This paper describes an ongoing project to apply both of these to the Hebrew Bible, so far covering most of the book of Genesis, fully marking every person, place, object, and point in time which occurs in the text. The annotation process and possible future uses for the data are covered, along with the challenges involved in applying existing annotation guidelines to the Hebrew text. 2024.lt4hala-1.5 @@ -91,8 +91,8 @@ Unsupervised Authorship Attribution for Medieval <fixed-case>L</fixed-case>atin Using Transformer-Based Embeddings LoicDe Langhe - OrpheeDe Clercq - VeroniqueHoste + OrpheeDe Clercq + VeroniqueHoste 57–64 We explore the potential of employing transformer-based embeddings in an unsupervised authorship attribution task for medieval Latin. The development of Large Language Models (LLMs) and recent advances in transfer learning alleviate many of the traditional issues associated with authorship attribution in lower-resourced (ancient) languages. Despite this, these methods remain heavily understudied within this domain. Concretely, we generate strong contextual embeddings using a variety of mono -and multilingual transformer models and use these as input for two unsupervised clustering methods: a standard agglomerative clustering algorithm and a self-organizing map. We show that these transformer-based embeddings can be used to generate high-quality and interpretable clusterings, resulting in an attractive alternative to the traditional feature-based methods. 2024.lt4hala-1.8 @@ -149,7 +149,7 @@ Leveraging <fixed-case>LLM</fixed-case>s for Post-<fixed-case>OCR</fixed-case> Correction of Historical Newspapers AlanThomas - RobertGaizauskas + RobertGaizauskas HaipingLu 116–121 Poor OCR quality continues to be a major obstacle for humanities scholars seeking to make use of digitised primary sources such as historical newspapers. Typical approaches to post-OCR correction employ sequence-to-sequence models for a neural machine translation task, mapping erroneous OCR texts to accurate reference texts. We shift our focus towards the adaptation of generative LLMs for a prompt-based approach. By instruction-tuning Llama 2 and comparing it to a fine-tuned BART on BLN600, a parallel corpus of 19th century British newspaper articles, we demonstrate the potential of a prompt-based approach in detecting and correcting OCR errors, even with limited training data. We achieve a significant enhancement in OCR quality with Llama 2 outperforming BART, achieving a 54.51% reduction in the character error rate against BART’s 23.30%. This paves the way for future work leveraging generative LLMs to improve the accessibility and unlock the full potential of historical texts for humanities research. @@ -183,7 +183,7 @@ Early <fixed-case>M</fixed-case>odern <fixed-case>D</fixed-case>utch Comedies and Farces in the Spotlight: Introducing <fixed-case>E</fixed-case>m<fixed-case>DC</fixed-case>om<fixed-case>F</fixed-case> and Its Emotion Framework FlorianDebaene Korneevan der Haven - VeroniqueHoste + VeroniqueHoste 144–155 As computational drama studies are developing rapidly, the Dutch dramatic tradition is in need of centralisation still before it can benefit from state-of-the-art methodologies. This paper presents and evaluates EmDComF, a historical corpus of both manually curated and automatically digitised early modern Dutch comedies and farces authored between 1650 and 1725, and describes the refinement of a historically motivated annotation framework exploring sentiment and emotions in these two dramatic subgenres. Originating from Lodewijk Meyer’s philosophical writings on passions in the dramatic genre (±1670), published in Naauwkeurig onderwys in de tooneel-poëzy (Thorough instruction in the Poetics of Drama) by the literary society Nil Volentibus Arduum in 1765, a historical and genre-specific emotion framework is tested and operationalised for annotating emotions in the domain of early modern Dutch comedies and farces. Based on a frequency and cluster analysis of 782 annotated sentences by 2 expert annotators, the initial 38 emotion labels were restructured to a hierarchical label set of the 5 emotions Hatred, Anxiety, Sadness, Joy and Desire. 2024.lt4hala-1.17 @@ -207,11 +207,11 @@ Automatic Normalisation of <fixed-case>M</fixed-case>iddle <fixed-case>F</fixed-case>rench and Its Impact on Productivity - RaphaelRubino + RaphaelRubino SandraCoram-Mekkey JohannaGerlach Jonathan DavidMutal - PierretteBouillon + PierretteBouillon 176–189 This paper presents a study on automatic normalisation of 16th century documents written in Middle French. These documents present a large variety of wordforms which require spelling normalisation to facilitate downstream linguistic and historical studies. We frame the normalisation process as a machine translation task starting with a strong baseline leveraging a pre-trained encoder–decoder model. We propose to improve this baseline by combining synthetic data generation methods and producing artificial training data, thus tackling the lack of parallel corpora relevant to our task. The evaluation of our approach is twofold, in addition to automatic metrics relying on gold references, we evaluate our models through post-editing of their outputs. This evaluation method directly measures the productivity gain brought by our models to experts conducting the normalisation task manually. Results show a 20+ token per minute increase in productivity when using automatic normalisation compared to normalising text from scratch. The manually post-edited dataset resulting from our study is the first parallel corpus of normalised 16th century Middle French to be publicly released, along with the synthetic data and the automatic normalisation models used and trained in the presented work. 2024.lt4hala-1.20 diff --git a/data/xml/2024.ltedi.xml b/data/xml/2024.ltedi.xml index 43701d7c1f..8180b9930e 100644 --- a/data/xml/2024.ltedi.xml +++ b/data/xml/2024.ltedi.xml @@ -25,7 +25,7 @@ Sociocultural knowledge is needed for selection of shots in hate speech detection tasks AntonisMaronikolakis AbdullatifKöksalLudwig-Maximilians-Universität München - HinrichSchuetze + HinrichSchuetze 1-13 We introduce HATELEXICON, a lexicon of slurs and targets of hate speech for Brazil, Germany, India and Kenya, to aid model development and interpretability. First, we demonstrate how HATELEXICON can be used to interpret model predictions, showing that models developed to classify extreme speech rely heavily on target group names. Further, we propose a culturally-informed method to aid shot selection for training in low-resource settings. In few-shot learning, shot selection is of paramount importance to model performance and we need to ensure we make the most of available data. We work with HASOC German and Hindi data for training and the Multilingual HateCheck (MHC) benchmark for evaluation. We show that selecting shots based on our lexicon leads to models performing better than models trained on shots sampled randomly. Thus, when given only a few training examples, using HATELEXICON to select shots containing more sociocultural information leads to better few-shot performance. With these two use-cases we show how our HATELEXICON can be used for more effective hate speech detection. 2024.ltedi-1.1 @@ -105,7 +105,7 @@ AdhithyaSaravanan RoyJiang OrSharirCalifornia Institute of Technology - AnimaAnandkumarCalifornia Institute of Technology and University of California, Irvine + AnimaAnandkumarCalifornia Institute of Technology and University of California, Irvine 73-105 Large Language models (LLMs), while powerful, exhibit harmful social biases. Debiasing is often challenging due to computational costs, data constraints, and potential degradation of multi-task language capabilities. This work introduces a novel approach utilizing ChatGPT to generate synthetic training data, aiming to enhance the debiasing of LLMs. We propose two strategies: Targeted Prompting, which provides effective debiasing for known biases but necessitates prior specification of bias in question; and General Prompting, which, while slightly less effective, offers debiasing across various categories. We leverage resource-efficient LLM debiasing using adapter tuning and compare the effectiveness of our synthetic data to existing debiasing datasets. Our results reveal that: (1) ChatGPT can efficiently produce high-quality training data for debiasing other LLMs; (2) data produced via our approach surpasses existing datasets in debiasing performance while also preserving internal knowledge of a pre-trained LLM; and (3) synthetic data exhibits generalizability across categories, effectively mitigating various biases, including intersectional ones. These findings underscore the potential of synthetic data in advancing the fairness of LLMs with minimal retraining cost. 2024.ltedi-1.8 @@ -143,7 +143,7 @@ HosahalliShashirekhaMangalore University SaranyaRajiakodiCentral University of Tamil Nadu Miguel ÁngelGarcía - Salud MaríaJiménez-ZafraUniversidad de Jaén + Salud MaríaJiménez-ZafraUniversidad de Jaén JoséGarcía-Díaz RafaelValencia-GarcíaUniversidad de Murcia KishorePonnusamy @@ -174,7 +174,7 @@ SaranyaRajiakodiCentral University of Tamil Nadu RahulPonnusamy KathiravanPannerselvamCentral University of Tamil Nadu - Anand KumarMadasamyNational Institute of Technology Karnataka + Anand KumarMadasamyNational Institute of Technology Karnataka RamachandranRajalakshmi HariharanLekshmiAmmalNational Institute of Technology Karnataka AnshidKizhakkeparambil @@ -227,7 +227,7 @@ <fixed-case>M</fixed-case>ason<fixed-case>T</fixed-case>igers@<fixed-case>LT</fixed-case>-<fixed-case>EDI</fixed-case>-2024: An Ensemble Approach Towards Detecting Homophobia and Transphobia in Social Media Comments DhimanGoswamiGeorge Mason University Sadiya Sayara ChowdhuryPuspoGeorge Mason University - Md NishatRaihan + Md NishatRaihan Al Nahian BinEmran 164-172 In this paper, we describe our approaches and results for Task 2 of the LT-EDI 2024 Workshop, aimed at detecting homophobia and/or transphobia across ten languages. Our methodologies include monolingual transformers and ensemble methods, capitalizing on the strengths of each to enhance the performance of the models. The ensemble models worked well, placing our team, MasonTigers, in the top five for eight of the ten languages, as measured by the macro F1 score. Our work emphasizes the efficacy of ensemble methods in multilingual scenarios, addressing the complexities of language-specific tasks. diff --git a/data/xml/2024.luhme.xml b/data/xml/2024.luhme.xml index 7fbf98a099..035df8f3f4 100644 --- a/data/xml/2024.luhme.xml +++ b/data/xml/2024.luhme.xml @@ -6,7 +6,7 @@ RuiSousa-Silva HenriqueLopes Cardoso MaaritKoponen - AntonioPareja Lora + AntonioPareja Lora MártaSeresi CLUP, Centro de Linguística da Universidade do Porto FLUP - Faculdade de Letras da Universidade do Porto
Santiago de Compostela, Spain
diff --git a/data/xml/2024.mathnlp.xml b/data/xml/2024.mathnlp.xml index 74ff252d4d..af4aee60e2 100644 --- a/data/xml/2024.mathnlp.xml +++ b/data/xml/2024.mathnlp.xml @@ -6,7 +6,7 @@ MarcoValentino DeborahFerreira MokanaranganThayaparan - AndreFreitas + AndreFreitas ELRA and ICCL
Torino, Italia
May @@ -23,7 +23,7 @@ An Approach to Co-reference Resolution and Formula Grounding for Mathematical Identifiers Using Large Language Models AaminDev TakutoAsakura - RuneSætre + RuneSætre 1–10 This paper outlines an automated approach to annotate mathematical identifiers in scientific papers — a process historically laborious and costly. We employ state-of-the-art LLMs, including GPT-3.5 and GPT-4, and open-source alternatives to generate a dictionary for annotating mathematical identifiers, linking each identifier to its conceivable descriptions and then assigning these definitions to the respective identifier in- stances based on context. Evaluation metrics include the CoNLL score for co-reference cluster quality and semantic correctness of the annotations. 2024.mathnlp-1.1 diff --git a/data/xml/2024.ml4al.xml b/data/xml/2024.ml4al.xml index 3c7bc95218..8085f1ba10 100644 --- a/data/xml/2024.ml4al.xml +++ b/data/xml/2024.ml4al.xml @@ -9,7 +9,7 @@ ShaiGordin KyunghyunCho MarcoPassarotti - RacheleSprugnoli + RacheleSprugnoli YudongLiu BinLi AdamAnderson @@ -113,7 +113,7 @@
Lacuna Language Learning: Leveraging <fixed-case>RNN</fixed-case>s for Ranked Text Completion in Digitized <fixed-case>C</fixed-case>optic Manuscripts - LaurenLevineGeorgetown University + LaurenLevineGeorgetown University CindyLiGeorgetown University LydiaBremer-McCollum NicholasWagnerDuke University @@ -213,7 +213,7 @@ MarijkeBeersmansKU Leuven AlekKeersmaekersKU Leuven Eveliende GraafKU Leuven - TimVan de CruysKU Leuven + TimVan de CruysKU Leuven MarkDepauwKU Leuven MargheritaFantoliKU Leuven 152-164 @@ -261,7 +261,7 @@ <fixed-case>S</fixed-case>um<fixed-case>T</fixed-case>ablets: A Transliteration Dataset of <fixed-case>S</fixed-case>umerian Tablets ColeSimmons RichardDiehl MartinezUniversity of Cambridge - DanJurafskyStanford University + DanJurafskyStanford University 192-202 Sumerian transliteration is a conventional system for representing a scholar's interpretation of a tablet in the Latin script. Thanks to visionary digital Assyriology projects such as ETCSL, CDLI, and Oracc, a large number of Sumerian transliterations have been published online, and these data are well-structured for a variety of search and analysis tasks. However, the absence of a comprehensive, accessible dataset pairing transliterations with a digital representation of the tablet's cuneiform glyphs has prevented the application of modern Natural Language Processing (NLP) methods to the task of Sumerian transliteration. diff --git a/data/xml/2024.moomin.xml b/data/xml/2024.moomin.xml index 9bc2134133..984da3e4af 100644 --- a/data/xml/2024.moomin.xml +++ b/data/xml/2024.moomin.xml @@ -5,7 +5,7 @@ Proceedings of the 1st Workshop on Modular and Open Multilingual NLP (MOOMIN 2024) RaúlVázquez TimotheeMickus - JörgTiedemann + JörgTiedemann IvanVulić AhmetÜstün Association for Computational Linguistics diff --git a/data/xml/2024.mrl.xml b/data/xml/2024.mrl.xml index ea4bbb9f82..7be3f18d13 100644 --- a/data/xml/2024.mrl.xml +++ b/data/xml/2024.mrl.xml @@ -22,7 +22,7 @@ <fixed-case>S</fixed-case>amba<fixed-case>L</fixed-case>ingo: Teaching Large Language Models New Languages ZoltanCsakiSambanova Systems - BoLi + BoLi Jonathan LingjieLi QiantongXuSambanova Systems PianPawakapan @@ -128,7 +128,7 @@ PierreAndrews PontusStenetorpUniversity College London MikelArtetxeReka AI - Marta R.Costa-jussàMeta + Marta R.Costa-jussàMeta 148-158 ‘While machine translation (MT) systems have seen significant improvements,it is still common for translations to reflect societal biases, such as genderbias. Decoder-only language models (LLMs) have demonstrated potential in MT, albeitwith performance slightly lagging behind traditional encoder-decoder neural machinetranslation (NMT) systems. However, LLMs offer a unique advantage: the abilityto control the properties of the output through prompting. In this study, we leveragethis flexibility to explore Llama”s capability to produce gender-specific translations.Our results indicate that Llama can generate gender-specific translations withtranslation quality and gender bias comparable to NLLB, a state-of-the-art multilingualNMT system.’ 2024.mrl-1.10 @@ -246,7 +246,7 @@ Community <fixed-case>OSCAR</fixed-case>: A Community Effort for Multilingual Web Data ManuelBrackGerman Research Center for AI and Technische Universität Darmstadt MalteOstendorffDeutsche Telekom - PedroOrtiz SuarezCommon Crawl Foundation + PedroOrtiz SuarezCommon Crawl Foundation José JavierSaizBarcelona Supercomputing Center Iñaki LacunzaCastillaBarcelona Supercomputing Center JorgePalomar-GinerBarcelona Supercomputing Center @@ -300,7 +300,7 @@ Language Bias in Multilingual Information Retrieval: The Nature of the Beast and Mitigation Methods JinruiYangThe University of Melbourne FanJiang - TimothyBaldwinMohamed bin Zayed University of Artificial Intelligence and The University of Melbourne + TimothyBaldwinMohamed bin Zayed University of Artificial Intelligence and The University of Melbourne 280-292 Language fairness in multilingual information retrieval (MLIR) systems is crucial for ensuring equitable access to information across diverse languages. This paper sheds light on the issue, based on the assumption that queries in different languages, but with identical semantics, should yield equivalent ranking lists when retrieving on the same multilingual documents. We evaluate the degree of fairness using both traditional retrieval methods, and a DPR neural ranker based on mBERT and XLM-R. Additionally, we introduce ‘LaKDA’, a novel loss designed to mitigate language biases in neural MLIR approaches. Our analysis exposes intrinsic language biases in current MLIR technologies, with notable disparities across the retrieval methods, and the effectiveness of LaKDA in enhancing language fairness. 2024.mrl-1.23 @@ -359,7 +359,7 @@ An Attempt towards Generalized Retriever for In-Context Learning SenyuLi HaoYu JessicaOjoLelapa AI - David IfeoluwaAdelani + David IfeoluwaAdelani 346-356 We present our systems for the three tasks and five languages included in the MRL 2024 Shared Task on Multilingual Multi-task Information Retrieval: (1) Named Entity Recognition, (2) Free-form Question Answering, and (3) Multiple-choice Question Answering. For each task, we explored the impact of selecting different multilingual language models for fine-tuning across various target languages, and implemented an ensemble system that generates final outputs based on predictions from multiple fine-tuned models. All models are large language models fine-tuned on task-specific data. Our experimental results show that a more balanced dataset would yield better results. However, when training data for certain languages are scarce, fine-tuning on a large amount of English data supplemented by a small amount of “triggering data” in the target language can produce decent results. 2024.mrl-1.28 @@ -371,7 +371,7 @@ An Attempt towards Generalized Retriever for In-Context Learning KatharinaHämmerl Andrei-AlexandruManea GianlucaVicoCharles University Prague - JindřichHelclCharles University + JindřichHelclCharles University JindřichLibovickýCharles University Prague 357-364 We present the joint CUNI and LMU submission to the MRL 2024 Shared Task on Multi-lingual Multi-task Information Retrieval.The shared task objective was to explore how we can deploy modern methods in NLP in multi-lingual low-resource settings, tested on two sub-tasks: Named-entity recognition and question answering.Our solutions to the subtasks are based on data acquisition and model adaptation.We compare the performance of our submitted systems with the translate-test approachwhich proved to be the most useful in the previous edition of the shared task.Our results show that using more data as well as fine-tuning recent multilingual pre-trained models leads to considerable improvements over the translate-test baseline.Our code is available at https://github.com/ufal/mrl2024-multilingual-ir-shared-task. @@ -395,7 +395,7 @@ An Attempt towards Generalized Retriever for In-Context Learning AnarRzayevKAIST JafarIsbarovGeorge Washington University and ADA University DursunDashdamirovGeorge Washington University and ADA University - DavidAdelaniMcGill University + DavidAdelaniMcGill University DuyguAtamanNew York University 365-376 Large language models (LLMs) demonstrate exceptional proficiency in both the comprehension and generation of textual data, particularly in English, a language for which extensive public benchmarks have been established across a wide range of natural language processing (NLP) tasks. Nonetheless, their performance in multilingual contexts and specialized domains remains less rigorously validated, raising questions about their reliability and generalizability across linguistically diverse and domain-specific settings. The second edition of the Shared Task on Multilingual Multitask Information Retrieval aims to provide a comprehensive and inclusive multilingual evaluation benchmark which aids assessing the ability of multilingual LLMs to capture logical, factual, or causal relationships within lengthy text contexts and generate language under sparse settings, particularly in scenarios with under-resourced languages. The shared task consists of two subtasks crucial to information retrieval: Named entity recognition (NER) and reading comprehension (RC), in 7 data-scarce languages: Azerbaijani, Swiss German, Turkish and , which previously lacked annotated resources in information retrieval tasks. This year specifally focus on the multiple-choice question answering evaluation setting which provides a more objective setting for comparing different methods across languages. diff --git a/data/xml/2024.mwe.xml b/data/xml/2024.mwe.xml index 43ca32bf66..b2e361e56b 100644 --- a/data/xml/2024.mwe.xml +++ b/data/xml/2024.mwe.xml @@ -8,7 +8,7 @@ A. SezaDoğruöz KilianEvang MarcosGarcia - VoulaGiouli + VoulaGiouli LifengHan JoakimNivre AlexandreRademaker @@ -149,7 +149,7 @@ CherifaBen Khelil Jean-YvesAntoine IskandarKeskes - LamiaHadrich-Belguith + LamiaHadrich-Belguith 88–97 This paper highlights the importance of integrating MWE identification with the development of syntactic MWE lexicons. It suggests that lexicons with minimal morphosyntactic information can amplify current MWE-annotated datasets and refine identification strategies. To our knowledge, this work represents the first attempt to focus on both seen and unseen of VMWEs for Arabic. It also deals with the challenge of differentiating between literal and figurative interpretations of idiomatic expressions. The approach involves a dual-phase procedure: first projecting a VMWE lexicon onto a corpus to identify candidate occurrences, then disambiguating these occurrences to distinguish idiomatic from literal instances. Experiments outlined in the paper aim to assess the efficacy of this technique, utilizing a lexicon known as LEXAR and the “parseme-ar” corpus. The findings suggest that lexicon-driven strategies have the potential to refine MWE identification, particularly for unseen occurrences. 2024.mwe-1.13 @@ -167,7 +167,7 @@ Towards the semantic annotation of <fixed-case>SR</fixed-case>-<fixed-case>ELEXIS</fixed-case> corpus: Insights into Multiword Expressions and Named Entities CvetanaKrstev - RankaStanković + RankaStanković Aleksandra M.Marković Teodora SofijaMihajlov 106–114 @@ -189,7 +189,7 @@ Universal Feature-based Morphological Trees FedericaGamba AbishekStephen - ZdeněkŽabokrtský + ZdeněkŽabokrtský 125–137 The paper proposes a novel data representation inspired by Universal Dependencies (UD) syntactic trees, which are extended to capture the internal morphological structure of word forms. As a result, morphological segmentation is incorporated within the UD representation of syntactic dependencies. To derive the proposed data structure we leverage existing annotation of UD treebanks as well as available resources for segmentation, and we select 10 languages to work with in the presented case study. Additionally, statistical analysis reveals a robust correlation between morphs and sets of morphological features of words. We thus align the morphs to the observed feature inventories capturing the morphological meaning of morphs. Through the beneficial exploitation of cross-lingual correspondence of morphs, the proposed syntactic representation based on morphological segmentation proves to enhance the comparability of sentence structures across languages. 2024.mwe-1.17 @@ -208,16 +208,16 @@ Multiword Expressions between the Corpus and the Lexicon: Universality, Idiosyncrasy, and the Lexicon-Corpus Interface - VerginicaBarbu Mititelu + VerginicaBarbu Mititelu VoulaGiouli KilianEvang - DanielZeman + DanielZeman PetyaOsenova CaroleTiberius SimonKrek - StellaMarkantonatou + StellaMarkantonatou IvelinaStoyanova - RankaStanković + RankaStanković ChristianChiarcos 147–153 We present ongoing work towards defining a lexicon-corpus interface to serve as a benchmark in the representation of multiword expressions (of various parts of speech) in dedicated lexica and the linking of these entries to their corpus occurrences. The final aim is the harnessing of such resources for the automatic identification of multiword expressions in a text. The involvement of several natural languages aims at the universality of a solution not centered on a particular language, and also accommodating idiosyncrasies. Challenges in the lexicographic description of multiword expressions are discussed, the current status of lexica dedicated to this linguistic phenomenon is outlined, as well as the solution we envisage for creating an ecosystem of interlinked lexica and corpora containing and, respectively, annotated with multiword expressions. @@ -237,7 +237,7 @@ Light Verb Constructions in <fixed-case>U</fixed-case>niversal <fixed-case>D</fixed-case>ependencies for <fixed-case>S</fixed-case>outh <fixed-case>A</fixed-case>sian Languages AbishekStephen - DanielZeman + DanielZeman 163–177 We conduct a morphosyntactic investigation into the light verb constructions (LVCs) or the verbo-nominal predicates in South Asian languages. This work spans the Indo-Aryan and Dravidian language families in treebanks based on Universal Dependencies (UD). For the selected languages we show how well the existing annotation guidelines fare for the LVCs. We also reiterate the importance of the core and oblique distinction in UD and how informative it is for making accurate morphosyntactic annotation judgments for such predicates. 2024.mwe-1.21 @@ -258,9 +258,9 @@ <fixed-case>U</fixed-case>niversal <fixed-case>D</fixed-case>ependencies for <fixed-case>S</fixed-case>araiki MeesumAlam - FrancisTyers + FrancisTyers EmilyHanink - SandraKübler + SandraKübler 188–197 We present the first treebank of the Saraiki/Siraiki [ISO 639-3 skr] language, using the Universal Dependency annotation scheme (de Marneffe et al., 2021). The treebank currently comprises 587 annotated sentences and 7597 tokens. We explain the most relevant syntactic and morphological features of Saraiki, along with the decision we have made for a range of language specific constructions, namely compounds, verbal structures including light verb and serial verb constructions, and relative clauses. 2024.mwe-1.23 @@ -270,7 +270,7 @@ Domain-Weighted Batch Sampling for Neural Dependency Parsing JacobStriebel DanielDakota - SandraKübler + SandraKübler 198–206 In neural dependency parsing, as well as in the broader field of NLP, domain adaptation remains a challenging problem. When adapting a parser to a target domain, there is a fundamental tension between the need to make use of out-of-domain data and the need to ensure that syntactic characteristic of the target domain are learned. In this work we explore a way to balance these two competing concerns, namely using domain-weighted batch sampling, which allows us to use all available training data, while controlling the probability of sampling in- and out-of-domain data when constructing training batches. We conduct experiments using ten natural language domains and find that domain-weighted batch sampling yields substantial performance improvements in all ten domains compared to a baseline of conventional randomized batch sampling. 2024.mwe-1.24 @@ -278,7 +278,7 @@ Strategies for the Annotation of Pronominalised Locatives in <fixed-case>T</fixed-case>urkic <fixed-case>U</fixed-case>niversal <fixed-case>D</fixed-case>ependency Treebanks - JonathanWashington + JonathanWashington ÇağrıÇöltekin FurkanAkkurt BermetChontaeva diff --git a/data/xml/2024.naacl.xml b/data/xml/2024.naacl.xml index 9598f8aa73..b6ba5ce7f1 100644 --- a/data/xml/2024.naacl.xml +++ b/data/xml/2024.naacl.xml @@ -4,7 +4,7 @@ Proceedings of the 2024 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies (Volume 1: Long Papers) KevinDuh - HelenaGomez + HelenaGomez StevenBethard Association for Computational Linguistics
Mexico City, Mexico
@@ -33,7 +33,7 @@ Text Diffusion Model with Encoder-Decoder Transformers for Sequence-to-Sequence Generation HongyiYuan - ZhengYuanAlibaba Group + ZhengYuanAlibaba Group ChuanqiTanAlibaba Group FeiHuangAlibaba Group SongfangHuangAlibaba Group @@ -58,7 +58,7 @@ Assessing Logical Puzzle Solving in Large Language Models: Insights from a Minesweeper Case Study YinghaoLi HaoruiWangGeorgia Institute of Technology - ChaoZhangGeorgia Institute of Technology + ChaoZhangGeorgia Institute of Technology 59-81 Large Language Models (LLMs) have shown remarkable proficiency in language understanding and have been successfully applied to a variety of real-world tasks through task-specific fine-tuning or prompt engineering. Despite these advancements, it remains an open question whether LLMs are fundamentally capable of reasoning and planning, or if they primarily rely on recalling and synthesizing information from their training data. In our research, we introduce a novel task—Minesweeper—specifically designed in a format unfamiliar to LLMs and absent from their training datasets. This task challenges LLMs to identify the locations of mines based on numerical clues provided by adjacent opened cells. Successfully completing this task requires an understanding of each cell’s state, discerning spatial relationships between the clues and mines, and strategizing actions based on logical deductions drawn from the arrangement of the cells. Our experiments, including trials with the advanced GPT-4 model, indicate that while LLMs possess the foundational abilities required for this task, they struggle to integrate these into a coherent, multi-step logical reasoning process needed to solve Minesweeper. These findings highlight the need for further research to understand the nature of reasoning capabilities in LLMs under similar circumstances, and to explore pathways towards more sophisticated AI reasoning and planning models. 2024.naacl-long.4 @@ -85,7 +85,7 @@ JianpengCheng JorisDriesenApple AlexandruCoca - AndersJohannsen + AndersJohannsen 96-111 Few-shot dialogue state tracking (DST) with Large Language Models (LLM) relies on an effective and efficient conversation retriever to find similar in-context examples for prompt learning. Previous works use raw dialogue context as search keys and queries, and a retriever is fine-tuned with annotated dialogues to achieve superior performance. However, the approach is less suited for scaling to new domains or new annotation languages, where fine-tuning data is unavailable. To address this problem, we handle the task of conversation retrieval based on text summaries of the conversations.A LLM-based conversation summarizer is adopted for query and key generation, which enables effective maximum inner product search. To avoid the extra inference cost brought by LLM-based conversation summarization, we further distill a light-weight conversation encoder which produces query embeddings without decoding summaries for test conversations. We validate our retrieval approach on MultiWOZ datasets with GPT-Neo-2.7B and LLaMA-7B/30B. The experimental results show a significant improvement over relevant baselines in real few-shot DST settings. 2024.naacl-long.6 @@ -108,7 +108,7 @@ On Linearizing Structured Data in Encoder-Decoder Language Models: Insights from Text-to-<fixed-case>SQL</fixed-case> YutongShaoUniversity of California, San Diego - NdapaNakasholeUniversity of California, San Diego + NdapaNakasholeUniversity of California, San Diego 131-156 Structured data, prevalent in tables, databases, and knowledge graphs, poses a significant challenge in its representation. With the advent of large language models (LLMs), there has been a shift towards linearization-based methods, which process structured data as sequential token streams, diverging from approaches that explicitly model structure, often as a graph. Crucially, there remains a gap in our understanding of how these linearization-based methods handle structured data, which is inherently non-linear.This work investigates the linear handling of structured data in encoder-decoder language models, specifically T5. Our findings reveal the model’s ability to mimic human-designed processes such as schema linking and syntax prediction, indicating a deep, meaningful learning of structure beyond simple token sequencing. We also uncover insights into the model’s internal mechanisms, including the ego-centric nature of structure node encodings and the potential for model compression due to modality fusion redundancy. Overall, this work sheds light on the inner workings of linearization-based methods and could potentially provide guidance for future research. 2024.naacl-long.8 @@ -119,7 +119,7 @@ Extractive Summarization with Text Generator ThangLeVinAI Research - Anh TuanLuuNanyang Technological University + Anh TuanLuuNanyang Technological University 157-174 Standard extractive systems suffer from the lack of gold training signals since existing corpora solely provide document and human-written summary pairs while disregarding extractive labels. As a result, existing methods resort to imperfect pseudo-labels that are both biased and error-prone, thereby hindering the learning process of extractive models. In contrast, text generators which are commonly employed in abstractive summarization can effortlessly overcome this predicament on account of flexible sequence-to-sequence architectures. Motivated to bypass this inherent limitation, we investigate the possibility of conducting extractive summarization with text generators. Through extensive experiments covering six summarization benchmarks, we show that high-quality extractive summaries can be assembled via approximating the outputs (abstractive summaries) of these generators. Moreover, we find that the approximate summaries correlate positively with the auxiliary summaries (i.e. a better generator enables the production of better extractive summaries). Our results signify a new paradigm for training extractive summarizers i.e. learning with generation (abstractive) objectives rather than extractive schemes. 2024.naacl-long.9 @@ -158,8 +158,8 @@ SalvatoreGiorgiUniversity of Pennsylvania SunnyRaiSchool of Engineering and Applied Science, University of Pennsylvania ThomasTalhelmUniversity of Chicago - Sharath ChandraGuntukuUniversity of Pennsylvania - LyleUngar + Sharath ChandraGuntukuUniversity of Pennsylvania + LyleUngar 211-226 Cultural variation exists between nations (e.g., the United States vs. China), but also within regions (e.g., California vs. Texas, Los Angeles vs. San Francisco). Measuring this regional cultural variation can illuminate how and why people think and behave differently. Historically, it has been difficult to computationally model cultural variation due to a lack of training data and scalability constraints. In this work, we introduce a new research problem for the NLP community: How do we measure variation in cultural constructs across regions using language? We then provide a scalable solution: building knowledge-guided lexica to model cultural variation, encouraging future work at the intersection of NLP and cultural understanding. We also highlight modern LLMs’ failure to measure cultural variation or generate culturally varied language. 2024.naacl-long.12 @@ -242,7 +242,7 @@ YifanXu HanwenZhaFacebook YueLiu - Xin LunaDongDepartment of Computer Science, University of Washington and Amazon + Xin LunaDongDepartment of Computer Science, University of Washington and Amazon 311-325 Since the recent prosperity of Large Language Models (LLMs), there have been interleaved discussions regarding how to reduce hallucinations from LLM responses, how to increase the factuality of LLMs, and whether Knowledge Graphs (KGs), which store the world knowledge in a symbolic form, will be replaced with LLMs. In this paper, we try to answer these questions from a new angle: How knowledgeable are LLMs?To answer this question, we constructed Head-to-Tail, a benchmark that consists of 18K question-answer (QA) pairs regarding head, torso, and tail facts in terms of popularity. We designed an automated evaluation method and a set of metrics that closely approximate the knowledge an LLM confidently internalizes. Through a comprehensive evaluation of 16 publicly available LLMs, we show that existing LLMs are still far from being perfect in terms of their grasp of factual knowledge, especially for facts of torso-to-tail entities. 2024.naacl-long.18 @@ -261,7 +261,7 @@ JiangshuDuUniversity of Illinois at Chicago ShuaiqiLiu YunlongXu - PhilipYuUniversity of Illinois, Chicago + PhilipYuUniversity of Illinois, Chicago 326-337 Task-Oriented Parsing (TOP) enables conversational assistants to interpret user commands expressed in natural language, transforming them into structured outputs that combine elements of both natural language and intent/slot tags. Recently, Large Language Models (LLMs) have achieved impressive performance in synthesizing computer programs based on a natural-language prompt, mitigating the gap between natural language and structured programs. Our paper focuses on harnessing the capabilities of LLMs for semantic parsing tasks, addressing the following three key research questions: 1) How can LLMs be effectively utilized for semantic parsing tasks? 2) What defines an effective prompt? and 3) How can LLM overcome the length constraint and streamline prompt design by including all examples as prompts? We introduce k Nearest Neighbor In-Context Learning (kNN-ICL), which simplifies prompt engineering by allowing it to be built on top of any design strategy while providing access to all demo examples. Extensive experiments show that: 1) Simple ICL without kNN search can achieve a comparable performance with strong supervised models on the TOP tasks, and 2) kNN-ICL significantly improves the comprehension of complex requests by seamlessly integrating ICL with a nearest-neighbor approach. Notably, this enhancement is achieved without the need for additional data or specialized prompts. 2024.naacl-long.19 @@ -302,8 +302,8 @@ XinHuang FangkaiJiao YangDing, A*STAR - AiTiAwI2R - NancyChen + AiTiAwI2R + NancyChen 370-390 We present SeaEval, a benchmark for multilingual foundation models. In addition to characterizing how these models understand and reason with natural language, we also investigate how well they comprehend cultural practices, nuances, and values. Alongside standard accuracy metrics, we investigate the brittleness of foundation models in the dimensions of semantics and multilinguality. Our analyses span both open-sourced and closed models, leading to empirical results across classic NLP tasks, reasoning, and cultural comprehension. Key findings indicate (1) Many models exhibit varied behavior when given paraphrased instructions. (2) Many models still suffer from exposure bias (e.g., positional bias, majority label bias). (3) For questions rooted in factual, scientific, and commonsense knowledge, consistent responses are expected across multilingual queries that are semantically equivalent. Yet, most models surprisingly demonstrate inconsistent performance on these queries. (4) Multilingually-trained models have not attained “balanced multilingual” capabilities. Our endeavors underscore the need for more generalizable semantic representations and enhanced multilingual contextualization. SeaEval can serve as a launchpad for more thorough investigations and evaluations for multilingual and multicultural scenarios. 2024.naacl-long.22 @@ -428,9 +428,9 @@ Embrace Divergence for Richer Insights: A Multi-document Summarization Benchmark and a Case Study on Summarizing Diverse Information from News Articles Kung-HsiangHuangSalesForce.com PhilippeLaban - AlexanderFabbriSalesForce.com - Prafulla KumarChoubeySalesForce.com - ShafiqJotySalesForce.com and Nanyang Technological University + AlexanderFabbriSalesForce.com + Prafulla KumarChoubeySalesForce.com + ShafiqJotySalesForce.com and Nanyang Technological University CaimingXiongSalesforce Research Chien-ShengWuSalesforce AI 570-593 @@ -482,7 +482,7 @@ <fixed-case>R</fixed-case>-Spin: Efficient Speaker and Noise-invariant Representation Learning with Acoustic Pieces Heng-JuiChangMassachusetts Institute of Technology - JamesGlass + JamesGlass 642-662 This paper introduces Robust Spin (R-Spin), a data-efficient domain-specific self-supervision method for speaker and noise-invariant speech representations by learning discrete acoustic units with speaker-invariant clustering (Spin). R-Spin resolves Spin’s issues and enhances content representations by learning to predict acoustic pieces. R-Spin offers a 12X reduction in computational resources compared to previous state-of-the-art methods while outperforming them in severely distorted speech scenarios. This paper provides detailed analyses to show how discrete units contribute to speech encoder training and improving robustness in diverse acoustic environments. 2024.naacl-long.36 @@ -560,7 +560,7 @@ Cong-DuyNguyenSchool of Computer Science and Engineering, Nanyang Technological University ThongNguyen XiaobaoWuNanyang Technological University - Anh TuanLuuNanyang Technological University + Anh TuanLuuNanyang Technological University 733-749 Previous work on multimodal sentence embedding has proposed multimodal contrastive learning and achieved promising results. However, by taking the rest of the batch as negative samples without reviewing when forming contrastive pairs, those studies encountered many suspicious and noisy negative examples, significantly affecting the methods’ overall performance. In this work, we propose KDMCSE (Knowledge Distillation Multimodal contrastive learning of Sentence Embeddings), a novel approach that enhances the discrimination and generalizability of multimodal representation and inherits the knowledge from the teacher model to learn the difference between positive and negative instances and via that, can detect noisy and wrong negative samples effectively before they are calculated in the contrastive objective. Furthermore, to overcome the limitation of modeling the variation within negative pairs, we introduce a new contrastive objective, AdapACSE (Adaptive Angular Margin Supervised Contrastive Learning for Multimodal sentence embeddings), that enhances the discriminative representation by strengthening the margin within the angular space while capturing varying semantics within the negative. Experimental results on widely used Semantic Textual Similarity (STS) benchmarks demonstrate the effectiveness of our approach. 2024.naacl-long.42 @@ -704,8 +704,8 @@ ZhiyangTeng BoshengDing ZhengyuanLiuI2R - NancyChen - ShafiqJotySalesForce.com and Nanyang Technological University + NancyChen + ShafiqJotySalesForce.com and Nanyang Technological University 926-941 Traditional attempts to enhance the logical reasoning abilities of language models often rely on supervised fine-tuning, limiting their generalization to new tasks or domains. Large Language Models (LLMs), with their capacity to condense vast knowledge, can effectively tackle many tasks. Yet, our experiments reveal a gap in their performance on logical reasoning benchmarks when compared to state-of-the-art fine-tuning based models. To bridge this gap, we present LogicLLM, a first-of-its-kind, fully self-supervised framework for integrating logical reasoning capabilities into LLMs, and activating them via in-context learning. We apply this to two LLM series, FLAN-T5 and LLaMA, with parameter sizes from 3 billion to 33 billion. LogicLLM demonstrates its effectiveness through successful improvements on two logical reasoning benchmarks (ReClor and LogiQA-v2). Additionally, LogicLLM based on FLAN-T5-11B attains comparable results to ChatGPT, and evaluations with LLaMA-based models on three language understanding benchmarks (RACE, MMLU and Big-Bench-Hard) confirm that the improvements come without compromising the model’s general language understanding capabilities. 2024.naacl-long.53 @@ -820,10 +820,10 @@ “One-Size-Fits-All”? Examining Expectations around What Constitute “Fair” or “Good” <fixed-case>NLG</fixed-case> System Behaviors - LiLucyAllen Institute for Artificial Intelligence and University of California Berkeley + LiLucyAllen Institute for Artificial Intelligence and University of California Berkeley Su LinBlodgettMicrosoft MiladShokouhiMicrosoft - HannaWallachMicrosoft + HannaWallachMicrosoft AlexandraOlteanuResearch, Microsoft 1054-1089 Fairness-related assumptions about what constitute appropriate NLG system behaviors range from invariance, where systems are expected to behave identically for social groups, to adaptation, where behaviors should instead vary across them. To illuminate tensions around invariance and adaptation, we conduct five case studies, in which we perturb different types of identity-related language features (names, roles, locations, dialect, and style) in NLG system inputs. Through these cases studies, we examine people’s expectations of system behaviors, and surface potential caveats of these contrasting yet commonly held assumptions. We find that motivations for adaptation include social norms, cultural differences, feature-specific information, and accommodation; in contrast, motivations for invariance include perspectives that favor prescriptivism, view adaptation as unnecessary or too difficult for NLG systems to do appropriately, and are wary of false assumptions. Our findings highlight open challenges around what constitute “fair” or “good” NLG system behaviors. @@ -1011,7 +1011,7 @@ HaitianSunSchool of Computer Science, Carnegie Mellon University JaiGuptaGoogle JonathanBerantGoogle and Tel Aviv University - WilliamCohenGoogle DeepMind + WilliamCohenGoogle DeepMind DonaldMetzlerGoogle 1363-1381 Recently proposed long-form question answering (QA) systems, supported by large language models (LLMs), have shown promising capabilities. Yet, attributing and verifying their generated abstractive answers can be difficult, and automatically evaluating their accuracy remains an ongoing challenge.In this work, we introduce a new QA task for answering multi-answer questions by summarizing multiple diverse sources in a semi-extractive fashion. Specifically, Semi-extractive Multi-source QA (SEMQA) requires models to output a comprehensive answer, while mixing factual quoted spans—copied verbatim from given input sources—and non-factual free-text connectors that glue these spans together into a single cohesive passage. This setting bridges the gap between the outputs of well-grounded but constrained extractive QA systems and more fluent but harder to attribute fully abstractive answers. Particularly, it enables a new mode for language models that leverages their advanced language generation capabilities, while also producing fine in-line attributions by-design that are easy to verify, interpret, and evaluate. To study this task, we create the first dataset of this kind, QuoteSum, with human-written semi-extractive answers to natural and generated questions, and define text-based evaluation metrics. Experimenting with several LLMs in various settings, we find this task to be surprisingly challenging, demonstrating the importance of QuoteSum for developing and studying such consolidation capabilities. @@ -1034,7 +1034,7 @@ A <fixed-case>U</fixed-case>niversal <fixed-case>D</fixed-case>ependencies Treebank for <fixed-case>H</fixed-case>ighland <fixed-case>P</fixed-case>uebla <fixed-case>N</fixed-case>ahuatl RobertPugh - FrancisTyersIndiana University, Bloomington + FrancisTyersIndiana University, Bloomington 1393-1403 We present a Universal Dependencies (UD) treebank for Highland Puebla Nahuatl. The treebank is only the second such UD corpus for a Mexican language, and supplements an existing treebank for another Nahuatl variant. We describe the process of data collection, annotation decisions and interesting syntactic constructions, and discuss some similarities and differences between the Highland Puebla Nahuatl treebank and the existing Western Sierra Puebla Nahuatl treebank. 2024.naacl-long.76 @@ -1076,7 +1076,7 @@ <fixed-case>O</fixed-case>rchestra<fixed-case>LLM</fixed-case>: Efficient Orchestration of Language Models for Dialogue State Tracking Chia-HsuanLee HaoChengMicrosoft Research - MariOstendorfUniversity of Washington + MariOstendorfUniversity of Washington 1434-1445 Large language models (LLMs) have revolutionized the landscape of Natural Language Processing, but are computationally expensive. To reduce the cost without sacrificing performance, previous studies have explored various approaches to harness the potential of Smaller Language Models (SLMs) as cost-effective alternatives to their larger counterparts. Driven by findings that SLMs and LLMs exhibit complementary strengths in a structured knowledge extraction task, this work presents a novel SLM/LLM routing framework designed to improve computational efficiency and enhance task performance. In dialogue state tracking tasks, the proposed routing framework enhances performance substantially compared to relying solely on LLMs, while reducing the computational costs by over 50%. 2024.naacl-long.79 @@ -1091,7 +1091,7 @@ MarcoValentino JordanMeadows LanZhangUniversity of Manchester - AndreFreitasIdiap Research Institute and University of Manchester + AndreFreitasIdiap Research Institute and University of Manchester 1446-1458 This paper investigates the possibility of approximating multiple mathematical operations in latent space for expression derivation. To this end, we introduce different multi-operational representation paradigms, modelling mathematical operations as explicit geometric transformations. By leveraging a symbolic engine, we construct a large-scale dataset comprising 1.7M derivation steps stemming from 61K premises and 6 operators, analysing the properties of each paradigm when instantiated with state-of-the-art neural encoders.Specifically, we investigate how different encoding mechanisms can approximate expression manipulation in latent space, exploring the trade-off between learning different operators and specialising within single operations, as well as the ability to support multi-step derivations and out-of-distribution generalisation. Our empirical analysis reveals that the multi-operational paradigm is crucial for disentangling different operators, while discriminating the conclusions for a single operation is achievable in the original expression encoder. Moreover, we show that architectural choices can heavily affect the training dynamics, structural organisation, and generalisation of the latent space, resulting in significant variations across paradigms and classes of encoders. 2024.naacl-long.80 @@ -1117,7 +1117,7 @@ <fixed-case>X</fixed-case>fer<fixed-case>B</fixed-case>ench: a Data-Driven Benchmark for Emergent Language BrendonBoldtSchool of Computer Science, Carnegie Mellon University - DavidMortensenCarnegie Mellon University + DavidMortensenCarnegie Mellon University 1475-1489 In this paper, we introduce a benchmark for evaluating the overall quality of emergent languages using data-driven methods. Specifically, we interpret the notion of the “quality” of an emergent language as its similarity to human language within a deep learning framework. We measure this by using the emergent language as pretraining data for a downstream NLP tasks in human language—the better the downstream performance, the better the emergent language. We implement this benchmark as an easy-to-use Python package that only requires a text file of utterances from the emergent language to be evaluated. Finally, we empirically test the benchmark’s validity using human, synthetic, and emergent language baselines. 2024.naacl-long.82 @@ -1143,7 +1143,7 @@ JordanMeadows MarcoValentino DamienTeneyIdiap Research Institute - AndreFreitasIdiap Research Institute and University of Manchester + AndreFreitasIdiap Research Institute and University of Manchester 1505-1523 This paper proposes a methodology for generating and perturbing detailed derivations of equations at scale, aided by a symbolic engine, to evaluate the generalisability of Transformers to out-of-distribution mathematical reasoning problems. Instantiating the framework in the context of sequence classification tasks, we compare the capabilities of GPT-4, GPT-3.5, and a canon of fine-tuned BERT models, exploring the relationship between specific operators and generalisation failure via the perturbation of reasoning aspects such as symmetry and variable surface forms. Surprisingly, our empirical evaluation reveals that the average in-distribution performance of fine-tuned models surpasses GPT-3.5, and rivals GPT-4. However, perturbations to input reasoning can reduce their performance by up to 80 F1 points. Overall, the results suggest that the in-distribution performance of smaller open-source models may potentially rival GPT by incorporating appropriately structured derivation dependencies during training, and highlight a shared weakness between BERT and GPT involving a relative inability to decode indirect references to mathematical entities. We release the full codebase, constructed datasets, and fine-tuned models to encourage future progress in the field. 2024.naacl-long.84 @@ -1237,7 +1237,7 @@ Causal Inference for Human-Language Model Collaboration BohanZhangUniversity of Michigan - Ann Arbor YixinWangUniversity of Michigan - Ann Arbor - ParamveerDhillonUniversity of Michigan + ParamveerDhillonUniversity of Michigan 1630-1647 In this paper, we examine the collaborative dynamics between humansand language models (LMs), where the interactions typically involveLMs proposing text segments and humans editing or responding to theseproposals. Productive engagement with LMs in such scenarios necessitates that humans discern effective text-based interaction strategies, such as editing and response styles, from historical human-LM interactions. This objective is inherently causal, driven by the counterfactual ‘what-if’ question: how would the outcome of collaboration change if humans employed a different text editing/refinement strategy? A key challenge in answering this causal inference question is formulating an appropriate causal estimand: the conventional average treatment effect (ATE) estimand is inapplicable to text-based treatments due to their high dimensionality. To address this concern, we introduce a new causal estimand– *Incremental Stylistic Effect (ISE)*, which characterizes the average impact of infinitesimally shifting a text towards a specific style, such as increasing formality. We establish the conditions for the non-parametric identification of ISE. Building on this, we develop *CausalCollab*, an algorithm designed to estimate the ISE of various interaction strategies in dynamic human-LM collaborations. Our empirical investigations across three distinct human-LM collaboration scenarios reveal that *CausalCollab* effectively reduces confounding and significantly improves counterfactual estimation over a set of competitive baselines. 2024.naacl-long.91 @@ -1250,11 +1250,11 @@ ZezhongWang FangkaiYangMicrosoft LuWangMicrosoft - PuZhao + PuZhao HongruWangThe Chinese University of Hong Kong LiangChenChinese University of Hong Kong, The Chinese University of Hong Kong QingweiLinMicrosoft Research - Kam-FaiWongThe Chinese University of Hong Kong + Kam-FaiWongThe Chinese University of Hong Kong 1648-1668 With the increasing risk posed by jailbreak attacks, recent studies have investigated various methods to improve the safety of large language models (LLMs), mainly falling into two strategies: safety training and safeguards. Safety training involves fine-tuning the LLM with adversarial samples, which activate the LLM’s capabilities against jailbreak. However, it is not always effective in countering new attacks and often leads to potential performance degradation. Safeguards, on the other hand, are methods using additional models to filter harmful content from the LLM’s response. Nevertheless, they can only reduce a limited amount of harmful output and introduce extra computational costs. Given the distinct strengths and weaknesses of both, we combine them to balance out their flaws and propose a more effective method called Self-Guard.Specifically, we train the LLM to review its responses for any harmful content and append a [harmful] or [harmless] tag to the end of the response. In this way, Self-Guard possesses the advantages of safety training, leveraging the powerful capabilities of the LLMs themselves to detect harmfulness. Besides that, it gains flexibility like safeguards, making the safety check target the output side, which makes the system less vulnerable to attack updates. Experimental results indicate that our Self-Guard can effectively defend against jailbreak attacks and will not cause LLMs’ performance degradation. 2024.naacl-long.92 @@ -1494,7 +1494,7 @@ HongyiYuan RunjiLin JunyangLin - ZhengYuanAlibaba Group + ZhengYuanAlibaba Group ChangZhou JingrenZhouAlibaba Group 1964-1974 @@ -1508,7 +1508,7 @@ JiaruiLiu WenkaiLi ZhijingJin - MonaDiabCarnegie Mellon University and George Washington University + MonaDiabCarnegie Mellon University and George Washington University 1975-1997 In an era of model and data proliferation in machine learning/AI especially marked by the rapid advancement of open-sourced technologies, there arises a critical need for standardized consistent documentation. Our work addresses the information incompleteness in current human-written model and data cards. We propose an automated generation approach using Large Language Models (LLMs). Our key contributions include the establishment of CardBench, a comprehensive dataset aggregated from over 4.8k model cards and 1.4k data cards, coupled with the development of the CardGen pipeline comprising a two-step retrieval process. Our approach exhibits enhanced completeness, objectivity, and faithfulness in generated model and data cards, a significant step in responsible AI documentation practices ensuring better accountability and traceability. 2024.naacl-long.110 @@ -1533,7 +1533,7 @@ Are Multilingual <fixed-case>LLM</fixed-case>s Culturally-Diverse Reasoners? An Investigation into Multicultural Proverbs and Sayings ChenCecilia Liu FajriKotoMohamed bin Zayed University of Artificial Intelligence - TimothyBaldwinMohamed bin Zayed University of Artificial Intelligence and The University of Melbourne + TimothyBaldwinMohamed bin Zayed University of Artificial Intelligence and The University of Melbourne IrynaGurevychMohamed bin Zayed University of Artificial Intelligence and Technical University of Darmstadt 2016-2039 Large language models (LLMs) are highly adept at question answering and reasoning tasks, but when reasoning in a situational context, human expectations vary depending on the relevant cultural common ground. As languages are associated with diverse cultures, LLMs should also be culturally-diverse reasoners. In this paper, we study the ability of a wide range of state-of-the-art multilingual LLMs (mLLMs) to reason with proverbs and sayings in a conversational context. Our experiments reveal that: (1) mLLMs “know” limited proverbs and memorizing proverbs does not mean understanding them within a conversational context; (2) mLLMs struggle to reason with figurative proverbs and sayings, and when asked to select the wrong answer (instead of asking it to select the correct answer); and (3) there is a “culture gap” in mLLMs when reasoning about proverbs and sayings translated from other languages. We construct and release our evaluation dataset MAPS (MulticulturAl Proverbs and Sayings) for proverb understanding with conversational context for six different languages. @@ -1625,7 +1625,7 @@ DanMa XuezhiCao YunsenXian - JiajunChenNanjing University + JiajunChenNanjing University ShujianHuangNanjing University 2136-2153 Large Language Models (LLMs), such as ChatGPT and GPT-4, are designed to provide useful and safe responses. However, adversarial prompts known as ‘jailbreaks’ can circumvent safeguards, leading LLMs to generate potentially harmful content. Exploring jailbreak prompts can help to better reveal the weaknesses of LLMs and further steer us to secure them. Unfortunately, existing jailbreak methods either suffer from intricate manual design or require optimization on other white-box models, which compromises either generalization or efficiency. In this paper, we generalize jailbreak prompt attacks into two aspects: (1) Prompt Rewriting and (2) Scenario Nesting. Based on this, we propose ReNeLLM, an automatic framework that leverages LLMs themselves to generate effective jailbreak prompts. Extensive experiments demonstrate that ReNeLLM significantly improves the attack success rate while greatly reducing the time cost compared to existing baselines. Our study also reveals the inadequacy of current defense methods in safeguarding LLMs. Finally, we analyze the failure of LLMs defense from the perspective of prompt execution priority, and propose corresponding defense strategies. We hope that our research can catalyze both the academic community and LLMs developers towards the provision of safer and more regulated LLMs. The code is available at https://github.com/NJUNLP/ReNeLLM. @@ -1736,7 +1736,7 @@ EveFleisig Su LinBlodgettMicrosoft DanKleinUniversity of California, Berkeley - ZeerakTalatMohamed bin Zayed University of Artificial Intelligence + ZeerakTalatMohamed bin Zayed University of Artificial Intelligence 2279-2292 Longstanding data labeling practices in machine learning involve collecting and aggregating labels from multiple annotators. But what should we do when annotators disagree? Though annotator disagreement has long been seen as a problem to minimize, new perspectivist approaches challenge this assumption by treating disagreement as a valuable source of information. In this position paper, we examine practices and assumptions surrounding the causes of disagreement–some challenged by perspectivist approaches, and some that remain to be addressed–as well as practical and normative challenges for work operating under these assumptions. We conclude with recommendations for the data labeling pipeline and avenues for future research engaging with subjectivity and disagreement. 2024.naacl-long.126 @@ -1749,7 +1749,7 @@ AparnaElangovanAmazon JiayuanHeRoyal Melbourne Institute of Technology and The University of Melbourne YuanLi - KarinVerspoorRoyal Melbourne Institute of Technology + KarinVerspoorRoyal Melbourne Institute of Technology 2293-2309 The NLP community typically relies on performance of a model on a held-out test set to assess generalization. Performance drops observed in datasets outside of official test sets are generally attributed to “out-of-distribution” effects. Here, we explore the foundations of generalizability and study the factors that affect it, articulating lessons from clinical studies. In clinical research, generalizability is an act of reasoning that depends on (a) *internal validity* of experiments to ensure controlled measurement of cause and effect, and (b) *external validity* or transportability of the results to the wider population. We demonstrate how learning spurious correlations, such as the distance between entities in relation extraction tasks, can affect a model’s internal validity and in turn adversely impact generalization. We, therefore, present the need to ensure internal validity when building machine learning models in NLP. Our recommendations also apply to generative large language models, as they are known to be sensitive to even minor semantic preserving alterations. We also propose adapting the idea of *matching* in randomized controlled trials and observational studies to NLP evaluation to measure causation. 2024.naacl-long.127 @@ -1775,7 +1775,7 @@ CrystinaZhangUniversity of Waterloo XueguangMa JimmyLinUniversity of Waterloo - FerhanTure + FerhanTure 2327-2340 Large language models (LLMs) exhibit positional bias in how they use context, which especially affects listwise ranking. To address this, we propose permutation self-consistency, a form of self-consistency over the ranking list outputs of black-box LLMs. Our key idea is to marginalize out different list orders in the prompt to produce an order-independent ranking with less positional bias. First, given some input prompt, we repeatedly shuffle the list in the prompt and pass it through the LLM while holding the instructions the same. Next, we aggregate the resulting sample of rankings by computing the central ranking closest in distance to all of them, marginalizing out prompt order biases in the process. Theoretically, we prove the robustness of our method, showing convergence to the true ranking under random perturbations.Empirically, on five datasets in sorting and passage reranking, our approach improves scores from conventional inference by up to 34-52% for Mistral, 7-18% for GPT-3.5, 8-16% for LLaMA v2 (70B). Our code is at https://github.com/castorini/perm-sc. 2024.naacl-long.129 @@ -1808,7 +1808,7 @@ WantianZhaoGeorgia Institute of Technology SanjeevGrampurohit RampiRamprasadGeorgia Institute of Technology - ChaoZhangGeorgia Institute of Technology + ChaoZhangGeorgia Institute of Technology 2370-2385 Scientific information extraction (SciIE), which aims to automatically extract information from scientific literature, is becoming more important than ever. However, there are no existing SciIE datasets for polymer materials, which is an important class of materials used ubiquitously in our daily lives. To bridge this gap, we introduce POLYIE, a new SciIE dataset for polymer materials. POLYIE is curated from 146 full-length polymer scholarly articles, which are annotated with different named entities (i.e., materials, properties, values, conditions) as well as their N-ary relations by domain experts. POLYIE presents several unique challenges due to diverse lexical formats of entities, ambiguity between entities, and variable-length relations. We evaluate state-of-the-art named entity extraction and relation extraction models on POLYIE, analyze their strengths and weaknesses, and highlight some difficult cases for these models. To the best of our knowledge, POLYIE is the first SciIE benchmark for polymer materials, and we hope it will lead to more research efforts from the community on this challenging task. Our code and data are available on: https://github.com/jerry3027/PolyIE. 2024.naacl-long.131 @@ -1876,7 +1876,7 @@ VasudhaVaradarajan SverkerSikström OscarKjell - H. AndrewSchwartzStony Brook University (SUNY) + H. AndrewSchwartzStony Brook University (SUNY) 2466-2478 Mental health issues differ widely among individuals, with varied signs and symptoms. Recently, language-based assessments haveshown promise in capturing this diversity, but they require a substantial sample of words per person for accuracy. This work introducesthe task of Adaptive Language-Based Assessment (ALBA), which involves adaptively ordering questions while also scoring an individual’s latent psychological trait using limited language responses to previous questions. To this end, we develop adaptive testing methods under two psychometric measurement theories: Classical Test Theory and Item Response Theory.We empirically evaluate ordering and scoring strategies, organizing into two new methods: a semi-supervised item response theory-basedmethod (ALIRT) and a supervised Actor-Critic model. While we found both methods to improve over non-adaptive baselines, We foundALIRT to be the most accurate and scalable, achieving the highest accuracy with fewer questions (e.g., Pearson r ≈ 0.93 after only 3 questions as compared to typically needing at least 7 questions). In general, adaptive language-based assessments of depression and anxiety were able to utilize a smaller sample of language without compromising validity or large computational costs. 2024.naacl-long.136 @@ -1919,7 +1919,7 @@ ElronBandelInternational Business Machines ArielGeraInternational Business Machines OfirArvivHebrew University of Jerusalem and Computer Science Departmen, Technion-Israel Institute of Technology - LiatEin-Dor + LiatEin-Dor EyalShnarchInternational Business Machines NoamSlonimInternational Business Machines MichalShmueli-Scheuer @@ -2032,7 +2032,7 @@ YouZuo KimGerdesUniversité Paris-Saclay ÉricClergerie - BenoîtSagotINRIA + BenoîtSagotINRIA 2687-2710 In this work, we introduce a comprehensive error typology specifically designed for evaluating two distinct tasks in machine-generated patent texts: claims-to-abstract generation, and the generation of the next claim given previous ones. We have also developed a benchmark, PatentEval, for systematically assessing language models in this context. Our study includes a comparative analysis, annotated by humans, of various models. These range from those specifically adapted during training for tasks within the patent domain to the latest general-purpose large language models (LLMs). Furthermore, we explored and evaluated some metrics to approximate human judgments in patent text evaluation, analyzing the extent to which these metrics align with expert assessments. These approaches provide valuable insights into the capabilities and limitations of current language models in the specialized field of patent text generation. 2024.naacl-long.147 @@ -2066,7 +2066,7 @@ What Causes the Failure of Explicit to Implicit Discourse Relation Recognition? - WeiLiuHeidelberg University + WeiLiuHeidelberg University StephenWanCSIRO MichaelStrubeHeidelberg Institute for Theoretical Studies 2738-2753 @@ -2081,7 +2081,7 @@ SiddhantArora HayatoFutamiSony Jee-weonJungCMU, Carnegie Mellon University - YifanPengCarnegie Mellon University + YifanPengCarnegie Mellon University RoshanSharmaGoogle YosukeKashiwagi EmiruTsunoo @@ -2113,7 +2113,7 @@ YadaZhuIBM Research DiegoAntogniniGoogle DeepMind YoonKimMassachusetts Institute of Technology - YangZhang + YangZhang 2793-2804 This paper studies the relationship between the surface form of a mathematical problem and its solvability by large language models. We find that subtle alterations in the surface form can significantly impact the answer distribution and the solve rate, exposing the language model’s lack of robustness and sensitivity to the surface form in reasoning through complex problems. To improve mathematical reasoning performance, we propose Self-Consistency-over-Paraphrases (SCoP), which diversifies reasoning paths from specific surface forms of the problem. We evaluate our approach on four mathematics reasoning benchmarks over three large language models and show that SCoP improves mathematical reasoning performance over vanilla self-consistency, particularly for problems initially deemed unsolvable. Finally, we provide additional experiments and discussion regarding problem difficulty and surface forms, including cross-model difficulty agreement and paraphrasing transferability, and Variance of Variations (VOV) for language model evaluation. 2024.naacl-long.153 @@ -2166,7 +2166,7 @@ The Effect of Data Partitioning Strategy on Model Generalizability: A Case Study of Morphological Segmentation ZoeyLiuUniversity of Florida - BonnieDorrUniversity of Florida + BonnieDorrUniversity of Florida 2851-2864 Recent work to enhance data partitioning strategies for more realistic model evaluation face challenges in providing a clear optimal choice. This study addresses these challenges, focusing on morphological segmentation and synthesizing limitations related to language diversity, adoption of multiple datasets and splits, and detailed model comparisons. Our study leverages data from 19 languages, including ten indigenous or endangered languages across 10 language families with diverse morphological systems (polysynthetic, fusional, and agglutinative) and different degrees of data availability. We conduct large-scale experimentation with varying sized combinations of training and evaluation sets as well as new test data. Our results show that, when faced with new test data: (1) models trained from random splits are able to achieve higher numerical scores; (2) model rankings derived from random splits tend to generalize more consistently. 2024.naacl-long.157 @@ -2179,7 +2179,7 @@ DebasmitaBhattacharyaColumbia University SiyingDing AlaynaNguyen - JuliaHirschbergColumbia University + JuliaHirschbergColumbia University 2865-2876 It is well-known that speakers who entrain to one another have more successful conversations than those who do not. Previous research has shown that interlocutors entrain on linguistic features in both written and spoken \emph{monolingual} domains. More recent work on \emph{code-switched} communication has also shown preliminary evidence of entrainment on certain aspects of code-switching (CSW). However, such studies of entrainment in code-switched domains have been extremely few and restricted to human-machine textual interactions. Our work studies code-switched spontaneous speech between humans, finding that (1) patterns of written and spoken entrainment in monolingual settings largely generalize to code-switched settings, and (2) some patterns of entrainment on code-switching in dialogue agent-generated text generalize to spontaneous code-switched speech. Our findings give rise to important implications for the potentially “universal” nature of entrainment as a communication phenomenon, and potential applications in inclusive and interactive speech technology. 2024.naacl-long.158 @@ -2191,7 +2191,7 @@ A Survey of Meaning Representations – From Theory to Practical Utility ZaccharySadeddine JuriOpitzRuprecht-Karls-Universität Heidelberg and University of Zurich - FabianSuchanekTelecom Paris + FabianSuchanekTelecom Paris 2877-2892 Symbolic meaning representations of natural language text have been studied since at least the 1960s. With the availability of large annotated corpora, and more powerful machine learning tools, the field has recently seen several new developments. In this survey, we study today’s most prominent Meaning Representation Frameworks. We shed light on their theoretical properties, as well as on their practical research environment, i.e., on datasets, parsers, applications, and future challenges. 2024.naacl-long.159 @@ -2210,7 +2210,7 @@ LiangChen YufengHe KaikaiAn - BaobaoChangPeking University + BaobaoChangPeking University 2893-2907 Large-scale multilingual Pretrained Language Models (mPLMs) yield impressive performance on cross-language tasks, yet significant performance disparities exist across different languages within the same mPLM. Previous studies endeavored to narrow these disparities by supervise fine-tuning the mPLMs with multilingual data.However, obtaining labeled multilingual data is time-consuming, and fine-tuning mPLM with limited labeled multilingual data merely encapsulates the knowledge specific to the labeled data.Therefore, we introduce **ALSACE** to leverage the learned knowledge from the well-performing languages to guide under-performing ones within the same mPLM, eliminating the need for additional labeled multilingual data. Experiments show that ALSACE effectively mitigates language-level performance disparity across various mPLMs while showing the competitive performance on different multilingual NLU tasks, ranging from full resource to limited resource settings. The code for our approach is available at https://github.com/pkunlp-icler/ALSACE. 2024.naacl-long.160 @@ -2235,7 +2235,7 @@ Visually-Aware Context Modeling for News Image Captioning TingyuQuKU Leuven TinneTuytelaarsKU Leuven - Marie-FrancineMoensKU Leuven, KU Leuven + Marie-FrancineMoensKU Leuven, KU Leuven 2927-2943 News Image Captioning aims to create captions from news articles and images, emphasizing the connection between textual context and visual elements. Recognizing the significance of human faces in news images and the face-name co-occurrence pattern in existing datasets, we propose a face-naming module for learning better name embeddings. Apart from names, which can be directly linked to an image area (faces), news image captions mostly contain context information that can only be found in the article. We design a retrieval strategy using CLIP to retrieve sentences that are semantically close to the image, mimicking human thought process of linking articles to images. Furthermore, to tackle the problem of the imbalanced proportion of article context and image context in captions, we introduce a simple yet effective method Contrasting with Language Model backbone (CoLaM) to the training pipeline. We conduct extensive experiments to demonstrate the efficacy of our framework. We out-perform the previous state-of-the-art (without external data) by 7.97/5.80 CIDEr scores on GoodNews/NYTimes800k. Our code is available at https://github.com/tingyu215/VACNIC. 2024.naacl-long.162 @@ -2257,7 +2257,7 @@ <fixed-case>T</fixed-case>opic<fixed-case>GPT</fixed-case>: A Prompt-based Topic Modeling Framework - Chau MinhPham + Chau MinhPham AlexanderHoyle SimengSun PhilipResnik @@ -2354,7 +2354,7 @@ OlanrewajuSamuel MatthewStutzman BismarckOdoomDepartment of Computer Science, Whiting School of Engineering - SanjeevKhudanpurWhiting School of Engineering + SanjeevKhudanpurWhiting School of Engineering StephenRichardsonBrigham Young University KentonMurrayJohns Hopkins University 3083-3110 @@ -2393,7 +2393,7 @@ DhimanGoswamiGeorge Mason University SharanyaThilagan KaiNorth - ShervinMalmasiAmazon + ShervinMalmasiAmazon MarcosZampieriGeorge Mason University 3149-3160 We present the first comprehensive survey of Native Language Identification (NLI) applied to texts. NLI is the task of automatically identifying an author’s native language (L1) based on their second language (L2) production. NLI is an important task with practical applications in second language teaching and NLP. The task has been widely studied for both text and speech, particularly for L2 English due to the availability of suitable corpora. Speech-based NLI relies heavily on accent modeled by pronunciation patterns and prosodic cues while text-based NLI relies primarily on modeling spelling errors and grammatical patterns that reveal properties of an individuals’ L1 influencing L2 production. We survey over one hundred papers on the topic including the papers associated with the NLI and INLI shared tasks. We describe several text representations and computational techniques used in text-based NLI. Finally, we present a comprehensive account of publicly available datasets used for the task thus far. @@ -2606,14 +2606,14 @@ YusenZhang NanZhangPennsylvania State University YixinLiuYale University - AlexanderFabbriSalesForce.com + AlexanderFabbriSalesForce.com JunruLiu RyoKamoiPennsylvania State University XiaoxinLuPennsylvania State University CaimingXiongSalesforce Research JieyuZhaoUniversity of Southern California - DragomirRadevYale University - KathleenMcKeown + DragomirRadevYale University + KathleenMcKeown RuiZhangPennsylvania State University 3404-3426 People from different social and demographic groups express diverse perspectives and conflicting opinions on a broad set of topics such as product reviews, healthcare, law, and politics. A fair summary should provide a comprehensive coverage of diverse perspectives without underrepresenting certain groups. However, current work in summarization metrics and Large Language Models (LLMs) evaluation has not explored fair abstractive summarization. In this paper, we systematically investigate fair abstractive summarization for user-generated data. We first formally define fairness in abstractive summarization as not underrepresenting perspectives of any groups of people, and we propose four reference-free automatic metrics by measuring the differences between target and source perspectives. We evaluate nine LLMs, including three GPT models, four LLaMA models, PaLM 2, and Claude, on six datasets collected from social media, online reviews, and recorded transcripts. Experiments show that both the model-generated and the human-written reference summaries suffer from low fairness. We conduct a comprehensive analysis of the common factors influencing fairness and propose three simple but effective methods to alleviate unfair summarization. Our dataset and code are available at https://github.com/psunlpgroup/FairSumm. @@ -2696,7 +2696,7 @@ My Heart Skipped a Beat! Recognizing Expressions of Embodied Emotion in Natural Language YuanZhuang TianyuJiangUniversity of Cincinnati - EllenRiloffUniversity of Arizona + EllenRiloffUniversity of Arizona 3525-3537 Humans frequently experience emotions. When emotions arise, they affect not only our mental state but can also change our physical state. For example, we often open our eyes wide when we are surprised, or clap our hands when we feel excited. Physical manifestations of emotions are referred to as embodied emotion in the psychology literature. From an NLP perspective, recognizing descriptions of physical movements or physiological responses associated with emotions is a type of implicit emotion recognition. Our work introduces a new task of recognizing expressions of embodied emotion in natural language. We create a dataset of sentences that contains 7,300 body part mentions with human annotations for embodied emotion. We develop a classification model for this task and present two methods to acquire weakly labeled instances of embodied emotion by extracting emotional manner expressions and by prompting a language model. Our experiments show that the weakly labeled data can train an effective classification model without gold data, and can also improve performance when combined with gold data. Our dataset is publicly available at https://github.com/yyzhuang1991/Embodied-Emotions. 2024.naacl-long.193 @@ -2751,7 +2751,7 @@ ZehuiWu ZiweiGongColumbia University JaywonKooRice University - JuliaHirschbergColumbia University + JuliaHirschbergColumbia University 3588-3602 This paper investigates the optimal selection and fusion of feature encoders across multiple modalities and combines these in one neural network to improve sentiment detection. We compare different fusion methods and examine the impact of multi-loss training within the multi-modality fusion network, identifying surprisingly important findings relating to subnet performance. We have also found that integrating context significantly enhances model performance. Our best model achieves state-of-the-art performance for three datasets (CMU-MOSI, CMU-MOSEI and CH-SIMS). These results suggest a roadmap toward an optimized feature selection and fusion approach for enhancing sentiment detection in neural networks. 2024.naacl-long.197 @@ -2914,7 +2914,7 @@ JiayiZhang JulianMichaelNew York University BernhardSchölkopfELLIS Institute and Max Planck Institute for Intelligent Systems, Max-Planck Institute - MonaDiabCarnegie Mellon University and George Washington University + MonaDiabCarnegie Mellon University and George Washington University 3781-3798 Traditionally, natural language processing (NLP) models often use a rich set of features created by linguistic expertise, such as semantic representations. However, in the era of large language models (LLMs), more and more tasks are turned into generic, end-to-end sequence generation problems. In this paper, we investigate the question: what is the role of semantic representations in the era of LLMs? Specifically, we investigate the effect of Abstract Meaning Representation (AMR) across five diverse NLP tasks. We propose an AMR-driven chain-of-thought prompting method, which we call AMRCOT, and find that it generally hurts performance more than it helps. To investigate what AMR may have to offer on these tasks, we conduct a series of analysis experiments. We find that it is difficult to predict which input examples AMR may help or hurt on, but errors tend to arise with multi-word expressions, named entities, and in the final inference step where the LLM must connect its reasoning over the AMR to its prediction. We recommend focusing on these areas for future work in semantic representations for LLMs. Our code: https://github.com/causalNLP/amr_llm 2024.naacl-long.209 @@ -2941,7 +2941,7 @@ ShaonanWang JingYeInstitute of automation, Chinese academy of science, Chinese Academy of Sciences XiaohanZhangChinese Academy of Sciences - ChengqingZongInstitute of automation, Chinese academy of science, Chinese Academy of Sciences + ChengqingZongInstitute of automation, Chinese academy of science, Chinese Academy of Sciences 3822-3832 Decoding continuous language from brain activity is a formidable yet promising field of research. It is particularly significant for aiding people with speech disabilities to communicate through brain signals. This field addresses the complex task of mapping brain signals to text. The previous best attempt reverse-engineered this process in an indirect way: it began by learning to encode brain activity from text and then guided text generation by aligning with predicted brain responses. In contrast, we propose a simple yet effective method that guides text reconstruction by directly comparing them with the predicted text embeddings mapped from brain activities. Comprehensive experiments reveal that our method significantly outperforms the current state-of-the-art model, showing average improvements of 77% and 54% on BLEU and METEOR scores. We further validate the proposed modules through detailed ablation studies and case analyses and highlight a critical correlation: the more precisely we map brain activities to text embeddings, the better the text reconstruction results. Such insight can simplify the task of reconstructing language from brain activities for future work, emphasizing the importance of improving brain-to-text-embedding mapping techniques. 2024.naacl-long.211 @@ -2953,7 +2953,7 @@ On-the-fly Definition Augmentation of <fixed-case>LLM</fixed-case>s for Biomedical <fixed-case>NER</fixed-case> MonicaMunnangiNortheastern University SergeyFeldmanAllen Institute for Artificial Intelligence and Data Cowboys - ByronWallaceNortheastern University, Brown University and Northeastern University + ByronWallaceNortheastern University, Brown University and Northeastern University SilvioAmirNortheastern University TomHopeAllen Institute for Artificial Intelligence and Hebrew University, Hebrew University of Jerusalem AakankshaNaikAllen Institute for Artificial Intelligence and National Institutes of Health @@ -3004,7 +3004,7 @@ Towards Improved Multi-Source Attribution for Long-Form Answer Generation NilayPatel - ShivashankarSubramanianAmazon + ShivashankarSubramanianAmazon SiddhantGargMeta PratyayBanerjeeAmazon AmitaMisraAmazon @@ -3158,7 +3158,7 @@ Media Bias Detection Across Families of Language Models IffatMaab EdisonMarrese-TaylorThe Univesity of Tokyo and AIST, National Institute of Advanced Industrial Science and Technology - SebastianPadóUniversity of Stuttgart, Universität Stuttgart + SebastianPadóUniversity of Stuttgart, Universität Stuttgart YutakaMatsuoThe University of Tokyo and The University of Tokyo 4083-4098 Bias in reporting can influence the public’s opinion on relevant societal issues. Examples include informational bias (selective presentation of content) and lexical bias (specific framing of content through linguistic choices). The recognition of media bias is arguably an area where NLP can contribute to the “social good”. Traditional NLP models have shown good performance in classifying media bias, but require careful model design and extensive tuning. In this paper, we ask how well prompting of large language models can recognize media bias. Through an extensive empirical study including a wide selection of pre-trained models, we find that prompt-based techniques can deliver comparable performance to traditional models with greatly reduced effort and that, similar to traditional models, the availability of context substantially improves results. We further show that larger models can leverage different kinds of context simultaneously, obtaining further performance improvements. @@ -3217,7 +3217,7 @@ Beyond Borders: Investigating Cross-Jurisdiction Transfer in Legal Case Summarization SantoshT.y.s.sTechnische Universität München VatsalVenkatkrishna - SaptarshiGhoshIndian Institute of Technology Kharagpur + SaptarshiGhoshIndian Institute of Technology Kharagpur MatthiasGrabmairTechnische Universität München 4136-4150 Legal professionals face the challenge of managing an overwhelming volume of lengthy judgments, making automated legal case summarization crucial. However, prior approaches mainly focused on training and evaluating these models within the same jurisdiction. In this study, we explore the cross-jurisdictional generalizability of legal case summarization models. Specifically, we explore how to effectively summarize legal cases of a target jurisdiction where reference summaries are not available. In particular, we investigate whether supplementing models with unlabeled target jurisdiction corpus and extractive silver summaries obtained from unsupervised algorithms on target data enhances transfer performance. Our comprehensive study on three datasets from different jurisdictions highlights the role of pre-training in improving transfer performance. We shed light on the pivotal influence of jurisdictional similarity in selecting optimal source datasets for effective transfer. Furthermore, our findings underscore that incorporating unlabeled target data yields improvements in general pre-trained models, with additional gains when silver summaries are introduced. This augmentation is especially valuable when dealing with extractive datasets and scenarios featuring limited alignment between source and target jurisdictions. Our study provides key insights for developing adaptable legal case summarization systems, transcending jurisdictional boundaries. @@ -3255,9 +3255,9 @@ MaiteHerediaUniversidad del País Vasco JulenEtxanizHiTZ Center, University of the Basque Country (UPV/EHU) MuitzeZulaikaOrai NLP Technologies - XabierSaralegi + XabierSaralegi JeremyBarnesUniversity of the Basque Country - AitorSoroaUniversity of the Basque Country. UPV/EHU. + AitorSoroaUniversity of the Basque Country. UPV/EHU. 4177-4188 XNLI is a popular Natural Language Inference (NLI) benchmark widely used to evaluate cross-lingual Natural Language Understanding (NLU) capabilities across languages. In this paper, we expand XNLI to include Basque, a low-resource language that can greatly benefit from transfer-learning approaches. The new dataset, dubbed XNLIeu, has been developed by first machine-translating the English XNLI corpus into Basque, followed by a manual post-edition step. We have conducted a series of experiments using mono- and multilingual LLMs to assess a) the effect of professional post-edition on the MT system; b) the best cross-lingual strategy for NLI in Basque; and c) whether the choice of the best cross-lingual strategy is influenced by the fact that the dataset is built by translation. The results show that post-edition is necessary and that the translate-train cross-lingual strategy obtains better results overall, although the gain is lower when tested in a dataset that has been built natively from scratch. Our code and datasets are publicly available under open licenses. 2024.naacl-long.234 @@ -3288,9 +3288,9 @@ ChaniJungKorea Advanced Institute of Science & Technology JunhoMyungKorea Advanced Institute of Science and Technology JihoJinKorea Advanced Institute of Science and Technology - JoseCamacho-ColladosCardiff University + JoseCamacho-ColladosCardiff University JuhoKimKorea Advanced Institute of Science and Technology - AliceOhKorea Advanced Institute of Science and Technology + AliceOhKorea Advanced Institute of Science and Technology 4205-4224 Most hate speech datasets neglect the cultural diversity within a single language, resulting in a critical shortcoming in hate speech detection. To address this, we introduce CREHate, a CRoss-cultural English Hate speech dataset. To construct CREHate, we follow a two-step procedure: 1) cultural post collection and 2) cross-cultural annotation. We sample posts from the SBIC dataset, which predominantly represents North America, and collect posts from four geographically diverse English-speaking countries (Australia, United Kingdom, Singapore, and South Africa) using culturally hateful keywords we retrieve from our survey. Annotations are collected from the four countries plus the United States to establish representative labels for each country. Our analysis highlights statistically significant disparities across countries in hate speech annotations. Only 56.2% of the posts in CREHate achieve consensus among all countries, with the highest pairwise label difference rate of 26%. Qualitative analysis shows that label disagreement occurs mostly due to different interpretations of sarcasm and the personal bias of annotators on divisive topics. Lastly, we evaluate large language models (LLMs) under a zero-shot setting and show that current LLMs tend to show higher accuracies on Anglosphere country labels in CREHate.Our dataset and codes are available at: https://github.com/nlee0212/CREHate 2024.naacl-long.236 @@ -3330,7 +3330,7 @@ MichaelaWatkins AfraAlishahiTilburg University AriannaBisazzaUniversity of Groningen - GrzegorzChrupałaTilburg University + GrzegorzChrupałaTilburg University 4250-4261 Interpretability research has shown that self-supervised Spoken LanguageModels (SLMs) encode a wide variety of features in human speech from theacoustic, phonetic, phonological, syntactic and semantic levels, to speakercharacteristics. The bulk of prior research on representations of phonologyhas focused on segmental features such as phonemes; the encoding ofsuprasegmental phonology (such as tone and stress patterns) in SLMs is not yetwell understood. Tone is a suprasegmental feature that is present in more thanhalf of the world’s languages. This paper aims to analyze the tone encodingcapabilities of SLMs, using Mandarin and Vietnamese as case studies. We showthat SLMs encode lexical tone to a significant degree even when they aretrained on data from non-tonal languages. We further find that SLMs behavesimilarly to native and non-native human participants in tone and consonantperception studies, but they do not follow the same developmental trajectory. 2024.naacl-long.239 @@ -3383,11 +3383,11 @@ MarekŠuppa HilaGonen Joseph MarvinImperial - Börje F.Karlsson + Börje F.Karlsson PeiqinLin NikolaLjubešić LJMiranda - BarbaraPlank + BarbaraPlank ArijRiabi YuvalPinter 4322-4337 @@ -3468,11 +3468,11 @@ Memory Augmented Language Models through Mixture of Word Experts - CiceroNogueira dos SantosResearch, Google + CiceroNogueira dos SantosResearch, Google JamesLee-ThorpGoogle IsaacNobleGoogle Chung-ChingChangGoogle - DavidUthusGoogle + DavidUthusGoogle 4425-4438 Scaling up the number of parameters of language models has proven to be an effective approach to improve performance. For dense models, increasing their size proportionally increases their computational footprint. In this work, we seek to aggressively decouple learning capacity and FLOPs through Mixture-of-Experts (MoE) style models with large knowledge-rich vocabulary based routing functions. Our proposed approach, dubbed Mixture of Word Experts (MoWE), can be seen as a memory augmented model, where a large set of word-specific experts play the role of a sparse memory. We demonstrate that MoWE performs significantly better than the T5 family of models with similar number of FLOPs in a variety of NLP tasks. Moreover, MoWE outperforms traditional MoE models on knowledge intensive tasks and has similar performance to complex memory augmented approaches that often require to invoke custom mechanisms to search the sparse memory. 2024.naacl-long.249 @@ -3512,7 +3512,7 @@ LijiaSunAmazon YiZhangAmazon SaabMansourAmazon - KathleenMcKeown + KathleenMcKeown 4455-4480 Single document news summarization has seen substantial progress on faithfulness in recent years, driven by research on the evaluation of factual consistency, or hallucinations. We ask whether these advances carry over to other text summarization domains. We propose a new evaluation benchmark on topic-focused dialogue summarization, generated by LLMs of varying sizes. We provide binary sentence- level human annotations of the factual consistency of these summaries along with detailed explanations of factually inconsistent sentences. Our analysis shows that existing LLMs hallucinate significant amounts of factual errors in the dialogue domain, regardless of the model’s size. On the other hand, when LLMs, including GPT-4, serve as binary factual evaluators, they perform poorly and can be outperformed by prevailing state-of-the-art specialized factuality evaluation metrics. Finally, we conducted an analysis of hallucination types with a curated error taxonomy. We find that there are diverse errors and error distributions in model-generated summaries and that non-LLM based metrics can capture all error types better than LLM-based evaluators. 2024.naacl-long.251 @@ -3552,7 +3552,7 @@ QiongkaiXuMacquarie University XuanliHeUniversity College London, University of London BenjaminRubinsteinThe University of Melbourne and The University of Melbourne - TrevorCohnGoogle and The University of Melbourne + TrevorCohnGoogle and The University of Melbourne 4515-4534 While multilingual machine translation (MNMT) systems hold substantial promise, they also have security vulnerabilities. Our research highlights that MNMT systems can be susceptible to a particularly devious style of backdoor attack, whereby an attacker injects poisoned data into a low-resource language pair to cause malicious translations in other languages, including high-resource languages.Our experimental results reveal that injecting less than 0.01% poisoned data into a low-resource language pair can achieve an average 20% attack success rate in attacking high-resource language pairs. This type of attack is of particular concern, given the larger attack surface of languages inherent to low-resource settings. Our aim is to bring attention to these vulnerabilities within MNMT systems with the hope of encouraging the community to address security concerns in machine translation, especially in the context of low-resource languages. 2024.naacl-long.254 @@ -3566,8 +3566,8 @@ Joseph CheeChangAllen Institute for Artificial Intelligence MariaAntoniak ErinBransomAllen Institute for Artificial Intelligence - TrevorCohenUniversity of Washington - LucyWangUniversity of Washington and Allen Institute for Artificial Intelligence + TrevorCohenUniversity of Washington + LucyWangUniversity of Washington and Allen Institute for Artificial Intelligence TalAugust 4535-4550 Scientific jargon can confuse researchers when they read materials from other domains. Identifying and translating jargon for individual researchers could speed up research, but current methods of jargon identification mainly use corpus-level familiarity indicators rather than modeling researcher-specific needs, which can vary greatly based on each researcher’s background. We collect a dataset of over 10K term familiarity annotations from 11 computer science researchers for terms drawn from 100 paper abstracts. Analysis of this data reveals that jargon familiarity and information needs vary widely across annotators, even within the same sub-domain (e.g., NLP). We investigate features representing domain, subdomain, and individual knowledge to predict individual jargon familiarity. We compare supervised and prompt-based approaches, finding that prompt-based methods using information about the individual researcher (e.g., personal publications, self-defined subfield of research) yield the highest accuracy, though the task remains difficult and supervised approaches have lower false positive rates. This research offers insights into features and methods for the novel task of integrating personal data into scientific jargon identification. @@ -3631,7 +3631,7 @@ Generating Attractive and Authentic Copywriting from Customer Reviews Yu-XiangLin - Wei-YunMaAcademia Sinica + Wei-YunMaAcademia Sinica 4629-4642 The goal of product copywriting is to capture the interest of potential buyers by emphasizing the features of products through text descriptions. As e-commerce platforms offer a wide range of services, it’s becoming essential to dynamically adjust the styles of these auto-generated descriptions. Typical approaches to copywriting generation often rely solely on specified product attributes, which may result in dull and repetitive content. To tackle this issue, we propose to generate copywriting based on customer reviews, as they provide firsthand practical experiences with products, offering a richer source of information than just product attributes. We have developed a sequence-to-sequence framework, enhanced with reinforcement learning, to produce copywriting that is attractive, authentic, and rich in information. Our framework outperforms all existing baseline and zero-shot large language models, including LLaMA-2-chat-7B and GPT-3.5, in terms of both attractiveness and faithfulness. Furthermore, this work features the use of LLMs for aspect-based summaries collection and argument allure assessment. Experiments demonstrate the effectiveness of using LLMs for marketing domain corpus construction. The code and the dataset is publicly available at: https://github.com/YuXiangLin1234/Copywriting-Generation. 2024.naacl-long.259 @@ -3726,7 +3726,7 @@ LifengShangHuawei Technologies Ltd. XinJiang QunLiuHuawei Noah’s Ark Lab - Kam-FaiWongThe Chinese University of Hong Kong + Kam-FaiWongThe Chinese University of Hong Kong 4713-4730 Prior study shows that pre-training techniques can boost the performance of visual document understanding (VDU), which typically requires models to gain abilities to perceive and reason both document texts and layouts (e.g., locations of texts and table-cells). To this end, we propose visually guided generative text-layout pre-training, named ViTLP. Given a document image, the model optimizes hierarchical language and layout modeling objectives to generate the interleaved text and layout sequence. In addition, to address the limitation of processing long documents by Transformers, we introduce a straightforward yet effective multi-segment generative pre-training scheme, facilitating ViTLP to process word-intensive documents of any length. ViTLP can function as a native OCR model to localize and recognize texts of document images. Besides, ViTLP can be effectively applied to various downstream VDU tasks. Extensive experiments show that ViTLP achieves competitive performance over existing baselines on benchmark VDU tasks, including information extraction, document classification, and document question answering. 2024.naacl-long.264 @@ -3819,7 +3819,7 @@ TomCalamai Pierre-HenriParisTélécom Paris ChloéClavelINRIA and Télécom Paris - FabianSuchanekTelecom Paris + FabianSuchanekTelecom Paris 4810-4845 We introduce MAFALDA, a benchmark for fallacy classification that merges and unites previous fallacy datasets. It comes with a taxonomy that aligns, refines, and unifies existing classifications of fallacies. We further provide a manual annotation of a part of the dataset together with manual explanations for each annotation. We propose a new annotation scheme tailored for subjective NLP tasks, and a new evaluation method designed to handle subjectivity. We then evaluate several language models under a zero-shot learning setting and human performances on MAFALDA to assess their capability to detect and classify fallacies. 2024.naacl-long.270 @@ -3831,7 +3831,7 @@ Diffusion Glancing Transformer for Parallel Sequence-to-Sequence Learning LihuaQianByteDance MingxuanWang - YangLiu + YangLiu HaoZhou 4846-4862 Previously, non-autoregressive models were widely recognized as being superior in generation efficiency but inferior in generation quality due to the challenges of modeling multiple target modalities.To enhance the multi-modality modeling ability, we propose the diffusion glancing transformer, which employs a modality diffusion process and residual glancing sampling.The modality diffusion process is a discrete process that interpolates the multi-modal distribution along the decoding steps, and the residual glancing sampling approach guides the model to continuously learn the remaining modalities across the layers. Experimental results on various machine translation and text generation benchmarks demonstrate that DIFFGLAT achieves better generation accuracy while maintaining fast decoding speed compared with both autoregressive and non-autoregressive models. @@ -4005,10 +4005,10 @@ Revisiting subword tokenization: A case study on affixal negation in large language models ThinhTruongUniversity of Melbourne - YuliaOtmakhovaThe University of Melbourne - KarinVerspoorRoyal Melbourne Institute of Technology - TrevorCohnGoogle and The University of Melbourne - TimothyBaldwinMohamed bin Zayed University of Artificial Intelligence and The University of Melbourne + YuliaOtmakhovaThe University of Melbourne + KarinVerspoorRoyal Melbourne Institute of Technology + TrevorCohnGoogle and The University of Melbourne + TimothyBaldwinMohamed bin Zayed University of Artificial Intelligence and The University of Melbourne 5082-5095 In this work, we measure the impact of affixal negation on modern English large language models (LLMs). In affixal negation, the negated meaning is expressed through a negative morpheme, which is potentially challenging for LLMs as their tokenizers are often not morphologically plausible. We conduct extensive experiments using LLMs with different subword tokenization methods, which lead to several insights on the interaction between tokenization performance and negation sensitivity. Despite some interesting mismatches between tokenization accuracy and negation detection performance, we show that models can, on the whole, reliably recognize the meaning of affixal negation. 2024.naacl-long.284 @@ -4195,7 +4195,7 @@ RajaMarjiehPrinceton University NanyunPengUniversity of California, Los Angeles YejinChoiDepartment of Computer Science, University of Washington - ThomasGriffithsPrinceton University + ThomasGriffithsPrinceton University FaezeBrahmanAllen Institute for AI 5303-5324 We explore the creative problem-solving capabilities of modern LLMs in a novel constrained setting. To this end, we create MACGYVER, an automatically generated dataset consisting of over 1,600 real-world problems deliberately designed to trigger innovative usage of objects and necessitate out-of-the-box thinking. We then present our collection to both LLMs and humans to compare and contrast their problem-solving abilities. MACGYVER is challenging for both groups, but in unique and complementary ways. For instance, humans excel in tasks they are familiar with but struggle with domain-specific knowledge, leading to a higher variance. In contrast, LLMs, exposed to a variety of specialized knowledge, attempt broader problems but fail by proposing physically-infeasible actions. Finally, we provide a detailed error analysis of LLMs, and demonstrate the potential of enhancing their problem-solving ability with novel prompting techniques such as iterative step-wise reflection and divergent-convergent thinking.This work (1) introduces a fresh arena for intelligent agents focusing on intricate aspects of physical reasoning, planning, and unconventional thinking, which supplements the existing spectrum of machine intelligence; and (2) provides insight into the constrained problem-solving capabilities of both humans and AI. @@ -4222,7 +4222,7 @@ FeiMi BoyangXue YiChen - Kam-FaiWongThe Chinese University of Hong Kong + Kam-FaiWongThe Chinese University of Hong Kong RuifengXuHarbin Institute of Technology 5345-5363 Numerous works are proposed to align large language models (LLMs) with human intents to better fulfill instructions, ensuring they are trustful and helpful.Nevertheless, some human instructions are often malicious or misleading and following them will lead to untruthful and unsafe responses.Previous work rarely focused on understanding how LLMs manage instructions based on counterfactual premises, referred to here as inductive instructions, which may stem from users’ false beliefs or malicious intents.In this paper, we aim to reveal the behaviors of LLMs towards inductive instructions and enhance their truthfulness and helpfulness accordingly. Specifically, we first introduce a benchmark of Inductive Instructions (INDust), where the false knowledge is incorporated into instructions in multiple different styles. After extensive human and automatic evaluations, we uncovered a universal vulnerability among LLMs in processing inductive instructions.Additionally, we identified that different inductive styles affect the models’ ability to identify the same underlying errors,and the complexity of the underlying assumptions also influences the model’s performance.Motivated by these results, we propose Dual-critique prompting to improve LLM robustness against inductive instructions.Our experiments demonstrate that Dual-critique prompting significantly bolsters the robustness of a diverse array of LLMs, even when confronted with varying degrees of inductive instruction complexity and differing inductive styles. @@ -4295,7 +4295,7 @@ JiaqiHanTencent Cloud GangYuan BinghuaiLinTencent - BaobaoChangPeking University + BaobaoChangPeking University YunboCaoTencent 5431-5452 In the constant updates of the product dialogue systems, we need to retrain the natural language understanding (NLU) model as new data from the real users would be merged into the existing data accumulated in the last updates. Within the newly added data, new intents would emerge and might have semantic entanglement with the existing intents, e.g. new intents that are semantically too specific or generic are actually a subset or superset of some existing intents in the semantic space, thus impairing the robustness of the NLU model.As the first attempt to solve this problem, we setup a new benchmark consisting of 4 Dialogue Version Control dataSets (DialogVCS). We formulate the intent detection with imperfect data in the system update as a multi-label classification task with positive but unlabeled intents, which asks the models to recognize all the proper intents, including the ones with semantic entanglement, in the inference.We also propose comprehensive baseline models and conduct in-depth analyses for the benchmark, showing that the semantically entangled intents can be effectively recognized with an automatic workflow. Our code and dataset are available at https://github.com/Zefan-Cai/DialogVCS. @@ -4328,7 +4328,7 @@ JuqianqianJuqianqian DejiyanglaDejiyangla YujiaPengPeking University - KennyZhuUniversity of Texas at Arlington + KennyZhuUniversity of Texas at Arlington MengyueWu 5472-5487 Social media is a valuable data source for exploring mental health issues. However, previous studies have predominantly focused on the semantic content of these posts, overlooking the importance of their temporal attributes, as well as the evolving nature of mental disorders and symptoms.In this paper, we study the causality between psychiatric symptoms and life events, as well as among different symptoms from social media posts, which leads to better understanding of the underlying mechanisms of mental disorders. By applying these extracted causality features to tasks such as diagnosis point detection and early risk detection of depression, we notice considerable performance enhancement. This indicates that causality information extracted from social media data can boost the efficacy of mental disorder diagnosis and treatment planning. @@ -4429,7 +4429,7 @@ HuiminZeng YimengLu LanyuShang - YangZhangUniversity of Illinois at Urbana-Champaign + YangZhangUniversity of Illinois at Urbana-Champaign DongWangUniversity of Illinois at Urbana-Champaign 5628-5643 The proliferation of online misinformation has posed significant threats to public interest. While numerous online users actively participate in the combat against misinformation, many of such responses can be characterized by the lack of politeness and supporting facts. As a solution, text generation approaches are proposed to automatically produce counter-misinformation responses. Nevertheless, existing methods are often trained end-to-end without leveraging external knowledge, resulting in subpar text quality and excessively repetitive responses. In this paper, we propose retrieval augmented response generation for online misinformation (RARG), which collects supporting evidence from scientific sources and generates counter-misinformation responses based on the evidences. In particular, our RARG consists of two stages: (1) evidence collection, where we design a retrieval pipeline to retrieve and rerank evidence documents using a database comprising over 1M academic articles; (2) response generation, in which we align large language models (LLMs) to generate evidence-based responses via reinforcement learning from human feedback (RLHF). We propose a reward function to maximize the utilization of the retrieved evidence while maintaining the quality of the generated text, which yields polite and factual responses that clearly refutes misinformation. To demonstrate the effectiveness of our method, we study the case of COVID-19 and perform extensive experiments with both in- and cross-domain datasets, where RARG consistently outperforms baselines by generating high-quality counter-misinformation responses. @@ -4472,7 +4472,7 @@ MoontaeLeeUniversity of Illinois, Chicago HonglakLeeUniversity of Michigan - Ann Arbor and LG AI Research SoujanyaPoriaSingapore University of Technology and Design - RadaMihalceaUniversity of Michigan + RadaMihalceaUniversity of Michigan 5668-5680 Large language models (LLMs) have demonstrated substantial commonsense understanding through numerous benchmark evaluations. However, their understanding of cultural commonsense remains largely unexamined. In this paper, we conduct a comprehensive examination of the capabilities and limitations of several state-of-the-art LLMs in the context of cultural commonsense tasks. Using several general and cultural commonsense benchmarks, we find that (1) LLMs have a significant discrepancy in performance when tested on culture-specific commonsense knowledge for different cultures; (2) LLMs’ general commonsense capability is affected by cultural context; and (3) The language used to query the LLMs can impact their performance on cultural-related tasks.Our study points to the inherent bias in the cultural understanding of LLMs and provides insights that can help develop culturally-aware language models. 2024.naacl-long.316 @@ -4684,7 +4684,7 @@ MyraChengStanford University LuciaZheng EsinDurmusStanford University - DanJurafskyStanford University + DanJurafskyStanford University 5942-5959 The use of words to convey speaker’s intent is traditionally distinguished from the ‘mention’ of words for quoting what someone said, or pointing out properties of a word. Here we show that computationally modeling this use-mention distinction is crucial for dealing with counterspeech online. Counterspeech that refutes problematic content often mentions harmful language but is not harmful itself (e.g., calling a vaccine dangerous is not the same as expressing disapproval of someone for calling vaccines dangerous). We show that even recent language models fail at distinguishing use from mention, and that this failure propagates to two key downstream tasks: misinformation and hate speech detection, resulting in censorship of counterspeech. We introduce prompting mitigations that teach the use-mention distinction, and show they reduce these errors. Our work highlights the importance of the use-mention distinction for NLP and CSS and offers ways to address it. 2024.naacl-long.331 @@ -4723,7 +4723,7 @@ <fixed-case>A</fixed-case>fri<fixed-case>MTE</fixed-case> and <fixed-case>A</fixed-case>fri<fixed-case>COMET</fixed-case>: Enhancing <fixed-case>COMET</fixed-case> to Embrace Under-resourced <fixed-case>A</fixed-case>frican Languages JiayiWang - David IfeoluwaAdelani + David IfeoluwaAdelani SwetaAgrawalInstituto de Telecomunicações MarekMasiak RicardoReiInstituto Superior Técnico, INESC-ID and Unbabel @@ -4836,7 +4836,7 @@ ShujianHuangNanjing University XingyunWang YankeZhou - JiajunChenNanjing University + JiajunChenNanjing University 6087-6100 LLMs (Large Language Models) usually interact with users in the form of dialogue and generate responses following their instructions, which naturally require dialogue comprehension abilities. However, dialogue comprehension is a general language ability which is hard to be evaluated directly. In this work, we propose to perform the evaluation focusing on the factual consistency issue with the help of the dialogue summarization task. Besides evaluating and analyzing the dialogue summarization performance (DIAC-Sum) of different LLMs, we also derive factual questions from the generated summaries and use them as a more flexible measurement of dialogue comprehension (DIAC-FactQA). Our evaluation shows that, on average, 26.8% of the summaries generated by LLMs contain factual inconsistency. Even ChatGPT, the strongest model evaluated, has such errors in 16% of its summaries. For answering the factual questions, which is more challenging, the average error rate of all evaluated LLMs is 36.1%. Both results indicate serious deficiencies. Detailed analysis shows that the understanding of subject/object of the conversation is still challenging for LLMs. Furthermore, to stimulate and enhance the dialogue comprehension ability of LLMs, we propose a fine-tuning paradigm with auto-constructed multi-task data, which achieved a relative error rate reduction of 11% on DIAC-FactQA. 2024.naacl-long.338 @@ -4849,7 +4849,7 @@ ChangjiangGaonanjing university HongdaHu PengHunanjing university - JiajunChenNanjing University + JiajunChenNanjing University JixingLiCity University of Hong Kong ShujianHuangNanjing University 6101-6117 @@ -4865,8 +4865,8 @@ YiFanZhangInstitute of automation, Chinese academy of science YaodongYuElectrical Engineering & Computer Science Department, University of California Berkeley DhruvMadekaAmazon - DeanFoster - EricXingMohamed bin Zayed Univeristy of AI and School of Computer Science, Carnegie Mellon University + DeanFoster + EricXingMohamed bin Zayed Univeristy of AI and School of Computer Science, Carnegie Mellon University HimabinduLakkarajuHarvard University ShamKakadeUniversity of Washington and Harvard University 6118-6136 @@ -4932,8 +4932,8 @@ ZhongMengGoogle DongseongHwang QiujiaLiGoogle - Khe ChaiSimGoogle - BoLiGoogle + Khe ChaiSimGoogle + BoLiGoogle JamesQinGoogle XingyuCaiGoogle AdamStooke @@ -4998,7 +4998,7 @@ AshnaKhetan MatthiasGerstgrasser DiyiYangStanford University - DanJurafskyStanford University + DanJurafskyStanford University 6279-6296 Effective conversation requires common ground: a shared understanding between the participants. Common ground, however, does not emerge spontaneously in conversation. Speakers and listeners work together to both identify and construct a shared basis while avoiding misunderstanding. To accomplish grounding, humans rely on a range of dialogue acts, like clarification (What do you mean?) and acknowledgment (I understand.). However, it is unclear whether large language models (LLMs) generate text that reflects human grounding. To this end, we curate a set of grounding acts and propose corresponding metrics that quantify attempted grounding. We study whether LLM generations contain grounding acts, simulating turn-taking from several dialogue datasets and comparing results to humans. We find that—compared to humans—LLMs generate language with less conversational grounding, instead generating text that appears to simply presume common ground. To understand the roots of the identified grounding gap, we examine the role of instruction tuning and preference optimization, finding that training on contemporary preference data leads to a reduction in generated grounding acts. Altogether, we highlight the need for more research investigating conversational grounding in human-AI interaction. 2024.naacl-long.348 @@ -5129,7 +5129,7 @@ JiahuanLi ShanboChengByteDance Inc. ShujianHuangNanjing University - JiajunChenNanjing University + JiajunChenNanjing University 6445-6459 Large Language Models (LLM) have demonstrated their strong ability in the field of machine translation, yet they suffer from high computational cost and latency. Therefore, transferring translation knowledge from giant LLMs to medium-sized machine translation models is a promising research direction. However, traditional knowledge distillation methods ignore the capability of student and teacher models, therefore repeatedly teaching student models on the knowledge they have learned, and failing to extend to novel contexts and knowledge. In this paper, we propose a framework called MT-Patcher, which transfers knowledge from LLMs to existing MT models in a selective, comprehensive and proactive manner. Considering the current translation ability of student MT models, we only identify and correct their translation errors, instead of distilling the whole translation from the teacher. Leveraging the strong language abilities of LLMs, we instruct LLM teachers to synthesize diverse contexts and anticipate more potential errors for the student. Experiment results on translating both specific language phenomena and general MT benchmarks demonstrate that finetuning the MT model on about 10% examples can achieve comparable results to the traditional knowledge distillation method, and synthesized potential errors and diverse contexts further improve MT performances on unseen contexts and words. 2024.naacl-long.358 @@ -5140,7 +5140,7 @@ <fixed-case>T</fixed-case>o<fixed-case>XCL</fixed-case>: A Unified Framework for Toxic Speech Detection and Explanation Nhat M.Hoang - Xuan LongDo + Xuan LongDo Duc AnhDo Duc AnhVu LuuAnh Tuan @@ -5195,7 +5195,7 @@ OmarAttiaApple RonakPradeep SaloniPotdarApple - AlexanderRushCornell University and School of Engineering and Applied Sciences, Harvard University + AlexanderRushCornell University and School of Engineering and Applied Sciences, Harvard University Umar FarooqMinhas YunyaoLiAdobe Systems 6524-6536 @@ -5220,7 +5220,7 @@ <fixed-case>GPTS</fixed-case>core: Evaluate as You Desire JinlanFu - See-KiongNgNational University of Singapore + See-KiongNgNational University of Singapore ZhengbaoJiangSchool of Computer Science, Carnegie Mellon University PengfeiLiu 6556-6576 @@ -5235,7 +5235,7 @@ FengyuCaiTechnische Universität Darmstadt YuxiaWang HeinzKoeppl - PreslavNakov + PreslavNakov IrynaGurevychMohamed bin Zayed University of Artificial Intelligence and Technical University of Darmstadt 6577-6595 Large language models (LLMs) have demonstrated remarkable capabilities across a wide range of tasks in various domains. Despite their impressive performance, they can be unreliable due to factual errors in their generations. Assessing their confidence and calibrating them across different tasks can help mitigate risks and enable LLMs to produce better generations. There has been a lot of recent research aiming to address this, but there has been no comprehensive overview to organize it and to outline the main lessons learned. The present survey aims to bridge this gap. In particular, we outline the challenges and we summarize recent technical advancements for LLM confidence estimation and calibration. We further discuss their applications and suggest promising directions for future work. @@ -5251,7 +5251,7 @@ YuchenJiangAIWaves Inc. HaoyangHuangMicrosoft Research Asia DongdongZhangMicrosoft Research Asia - XinZhaoRenmin University of China + XinZhaoRenmin University of China TomKocmiMicrosoft FuruWeiMicrosoft Research 6596-6610 @@ -5313,7 +5313,7 @@ TommasoGreen InesReinig KaiEckertMannheim University of Applied Sciences - SimonePonzettoUniversity of Mannheim + SimonePonzettoUniversity of Mannheim 6660-6675 Extensive efforts in the past have been directed toward the development of summarization datasets. However, a predominant number of these resources have been (semi)-automatically generated, typically through web data crawling. This resulted in subpar resources for training and evaluating summarization systems, a quality compromise that is arguably due to the substantial costs associated with generating ground-truth summaries, particularly for diverse languages and specialized domains. To address this issue, we present ACLSum, a novel summarization dataset carefully crafted and evaluated by domain experts. In contrast to previous datasets, ACLSum facilitates multi-aspect summarization of scientific papers, covering challenges, approaches, and outcomes in depth. Through extensive experiments, we evaluate the quality of our resource and the performance of models based on pretrained language models (PLMs) and state-of-the-art large language models (LLMs). Additionally, we explore the effectiveness of extract-then-abstract versus abstractive end-to-end summarization within the scholarly domain on the basis of automatically discovered aspects. While the former performs comparably well to the end-to-end approach with pretrained language models regardless of the potential error propagation issue, the prompting-based approach with LLMs shows a limitation in extracting sentences from source documents. 2024.naacl-long.371 @@ -5514,7 +5514,7 @@ SaburButt OlgaKolesnikovaInstituto Politécnico Nacional HectorCeballosTecnologico de Monterrey - AlexanderGelbukhInstituto Politécnico Nacional + AlexanderGelbukhInstituto Politécnico Nacional ThamarSolorioMohamed bin Zayed University of Artificial Intelligence and University of Houston 6972-6987 The paper focuses on the marginalization of indigenous language communities in the face of rapid technological advancements. We highlight the cultural richness of these languages and the risk they face of being overlooked in the realm of Natural Language Processing (NLP). We aim to bridge the gap between these communities and researchers, emphasizing the need for inclusive technological advancements that respect indigenous community perspectives. We show the NLP progress of indigenous Latin American languages and the survey that covers the status of indigenous languages in Latin America, their representation in NLP, and the challenges and innovations required for their preservation and development. The paper contributes to the current literature in understanding the need and progress of NLP for indigenous communities of Latin America, specifically low-resource and indigenous communities in general. @@ -5565,7 +5565,7 @@ JinheonBaekKorea Advanced Institute of Science & Technology SukminCho Sung JuHwangKorea Advanced Institute of Science and Technology and AITRICS - JongParkKorea Advanced Institute of Science and Technology + JongParkKorea Advanced Institute of Science and Technology 7036-7050 Retrieval-Augmented Large Language Models (LLMs), which incorporate the non-parametric knowledge from external knowledge bases into LLMs, have emerged as a promising approach to enhancing response accuracy in several tasks, such as Question-Answering (QA). However, even though there are various approaches dealing with queries of different complexities, they either handle simple queries with unnecessary computational overhead or fail to adequately address complex multi-step queries; yet, not all user requests fall into only one of the simple or complex categories. In this work, we propose a novel adaptive QA framework that can dynamically select the most suitable strategy for (retrieval-augmented) LLMs from the simplest to the most sophisticated ones based on the query complexity. Also, this selection process is operationalized with a classifier, which is a smaller LM trained to predict the complexity level of incoming queries with automatically collected labels, obtained from actual predicted outcomes of models and inherent inductive biases in datasets. This approach offers a balanced strategy, seamlessly adapting between the iterative and single-step retrieval-augmented LLMs, as well as the no-retrieval methods, in response to a range of query complexities. We validate our model on a set of open-domain QA datasets, covering multiple query complexities, and show that ours enhances the overall efficiency and accuracy of QA systems, compared to relevant baselines including the adaptive retrieval approaches. Code is available at: https://github.com/starsuzi/Adaptive-RAG. 2024.naacl-long.389 @@ -5577,7 +5577,7 @@ Knowing What <fixed-case>LLM</fixed-case>s <fixed-case>DO</fixed-case> <fixed-case>NOT</fixed-case> Know: A Simple Yet Effective Self-Detection Method YukunZhao LingyongYanBaidu Inc. - WeiweiSun + WeiweiSun GuoliangXing ChongMengBaidu ShuaiqiangWangBaidu Inc. @@ -5598,7 +5598,7 @@ YftahZiserUniversity of Edinburgh AnnaKorhonenUniversity of Cambridge EdoardoPontiUniversity of Edinburgh - ShayCohenUniversity of Edinburgh + ShayCohenUniversity of Edinburgh 7064-7083 Are Large Language Models (LLMs) temporally grounded? Since LLMs cannot perceive and interact with the environment, it is impossible to answer this question directly. Instead, we provide LLMs with textual narratives and probe them with respect to their common-sense knowledge of the structure and duration of events, their ability to order events along a timeline, and self-consistency within their temporal model (e.g., temporal relations such as after and before are mutually exclusive for any pair of events). We evaluate state-of-the-art LLMs (such as LLaMA 2 and GPT-4) on three tasks reflecting these abilities. Generally, we find that LLMs lag significantly behind both human performance as well as small-scale, specialised LMs. In-context learning, instruction tuning, and chain-of-thought prompting reduce this gap only to a limited degree. Crucially, LLMs struggle the most with self-consistency, displaying incoherent behaviour in at least 27.23% of their predictions. Contrary to expectations, we also find that scaling the model size does not guarantee positive gains in performance. To explain these results, we study the sources from which LLMs may gather temporal information: we find that sentence ordering in unlabelled texts, available during pre-training, is only weakly correlated with event ordering. Moreover, public instruction tuning mixtures contain few temporal tasks. Hence, we conclude that current LLMs lack a consistent temporal model of textual narratives. 2024.naacl-long.391 @@ -5614,7 +5614,7 @@ ZhiyangZhang YangZhaoInstitute of automation, Chinese academy of science, Chinese Academy of Sciences LuXiangInstitute of automation, Chinese academy of science, Chinese Academy of Sciences - ChengqingZongInstitute of automation, Chinese academy of science, Chinese Academy of Sciences + ChengqingZongInstitute of automation, Chinese academy of science, Chinese Academy of Sciences YuZhouInstitute of Automation, Chinese Academy of Sciences 7084-7095 Text image machine translation (TIMT) is a task that translates source texts embedded in the image to target translations. The existing TIMT task mainly focuses on text-line-level images. In this paper, we extend the current TIMT task and propose a novel task, **D**ocument **I**mage **M**achine **T**ranslation to **Markdown** (**DIMT2Markdown**), which aims to translate a source document image with long context and complex layout structure to markdown-formatted target translation.We also introduce a novel framework, **D**ocument **I**mage **M**achine **T**ranslation with **D**ynamic multi-pre-trained models **A**ssembling (**DIMTDA**).A dynamic model assembler is used to integrate multiple pre-trained models to enhance the model’s understanding of layout and translation capabilities.Moreover, we build a novel large-scale **Do**cument image machine **T**ranslation dataset of **A**rXiv articles in markdown format (**DoTA**), containing 126K image-translation pairs.Extensive experiments demonstrate the feasibility of end-to-end translation of rich-text document images and the effectiveness of DIMTDA. @@ -5695,7 +5695,7 @@ <fixed-case>F</fixed-case>-<fixed-case>MALLOC</fixed-case>: Feed-forward Memory Allocation for Continual Learning in Neural Machine Translation JunhongWuInstitute of automation, Chinese academy of science, Chinese Academy of Sciences YuchenLiu - ChengqingZongInstitute of automation, Chinese academy of science, Chinese Academy of Sciences + ChengqingZongInstitute of automation, Chinese academy of science, Chinese Academy of Sciences 7180-7192 In the evolving landscape of Neural Machine Translation (NMT), the pretrain-then-finetune paradigm has yielded impressive results. However, the persistent challenge of Catastrophic Forgetting (CF) remains a hurdle. While previous work has introduced Continual Learning (CL) methods to address CF, these approaches grapple with the delicate balance between avoiding forgetting and maintaining system extensibility. To address this, we propose a CL method, named \textbf{F-MALLOC} (\textbf{F}eed-forward \textbf{M}emory \textbf{ALLOC}ation). F-MALLOC is inspired by recent insights highlighting that feed-forward layers emulate neural memories and encapsulate crucial translation knowledge. It decomposes feed-forward layers into discrete memory cells and allocates these memories to different tasks. By learning to allocate and safeguard these memories, our method effectively alleviates CF while ensuring robust extendability. Besides, we propose a comprehensive assessment protocol for multi-stage CL of NMT systems. Experiments conducted following this new protocol showcase the superior performance of F-MALLOC, evidenced by higher BLEU scores and almost zero forgetting. 2024.naacl-long.398 @@ -5711,7 +5711,7 @@ AndreaYoungBrigham and Women’s Hospital, Harvard University GeoffreyYoungHarvard Medical School Jan-Willemvan de MeentUniversity of Amsterdam - ByronWallaceNortheastern University, Brown University and Northeastern University + ByronWallaceNortheastern University, Brown University and Northeastern University 7193-7210 Many diagnostic errors occur because clinicians cannot easily access relevant information in patient Electronic Health Records (EHRs). In this work we propose a method to use LLMs to identify pieces of evidence in patient EHR data that indicate increased or decreased risk of specific diagnoses; our ultimate aim is to increase access to evidence and reduce diagnostic errors. In particular, we propose a Neural Additive Model to make predictions backed by evidence with individualized risk estimates at time-points where clinicians are still uncertain, aiming to specifically mitigate delays in diagnosis and errors stemming from an incomplete differential. To train such a model, it is necessary to infer temporally fine-grained retrospective labels of eventual “true” diagnoses. We do so with LLMs, to ensure that the input text is from before a confident diagnosis can be made. We use an LLM to retrieve an initial pool of evidence, but then refine this set of evidence according to correlations learned by the model. We conduct an in-depth evaluation of the usefulness of our approach by simulating how it might be used by a clinician to decide between a pre-defined list of differential diagnoses. 2024.naacl-long.399 @@ -5858,7 +5858,7 @@ ManjunathHegde KoustuvDasgupta NiloyGangulyIndian Institute of Technology Kharagpur, - SaptarshiGhoshIndian Institute of Technology Kharagpur + SaptarshiGhoshIndian Institute of Technology Kharagpur PawanGoyalIIT Kharagpur 7391-7403 We study the problem of automatically annotating relevant numerals (GAAP metrics) occurring in the financial documents with their corresponding XBRL tags. Different from prior works, we investigate the feasibility of solving this extreme classification problem using a generative paradigm through instruction tuning of Large Language Models (LLMs). To this end, we leverage metric metadata informationto frame our target outputs while proposing a parameter efficient solution for the task using LoRA. We perform experiments on two recently released financial numeric labeling datasets. Our proposed model, **FLAN-FinXC**, achieves new state-of-the-art performances on both the datasets, outperforming several strong baselines. We explain the better scores of our proposed model by demonstrating its capability for zero-shot as well as the least frequently occurring tags. Also, even when we fail to predict the XBRL tags correctly, our generated output has substantial overlap with the ground-truth in majority of the cases. @@ -5942,7 +5942,7 @@ <fixed-case>L</fixed-case>ean<fixed-case>R</fixed-case>easoner: Boosting Complex Logical Reasoning with Lean DongweiJiang MarcioFonseca - ShayCohenUniversity of Edinburgh + ShayCohenUniversity of Edinburgh 7497-7510 Large language models (LLMs) often struggle with complex logical reasoning due to logical inconsistencies and the inherent difficulty ofsuch reasoning. We use Lean, a theorem proving framework, to address these challenges. By formalizing logical reasoning problems intotheorems within Lean, we can solve them by proving or disproving the corresponding theorems. This method reduces the risk of logical inconsistencies with the help of Lean’s symbolic solver. It also enhances our ability to treat complex reasoning tasks using Lean’s extensive library of theorem proofs. Our method achieves state-of-the-art performance on the FOLIO dataset and achieves performance near this level on ProofWriter. Notably, these results were accomplished by fine-tuning on fewer than 100 in-domain samples for each dataset 2024.naacl-long.416 @@ -5955,7 +5955,7 @@ EldonSchoopApple AlanLeungApple TitusBarikApple - JeffreyBighamApple + JeffreyBighamApple JeffreyNicholsApple 7511-7525 Many large language models (LLMs) struggle to consistently generate UI code that compiles and produces visually relevant designs. Existing approaches to improve generation rely either on expensive human feedback or distilling a proprietary model. In this paper, we explore the use of automated feedback (compilers and multi-modal models) to guide LLMs to generate high-quality UI code. Our method starts with an existing LLM and iteratively produces improved models by self-generating a large synthetic dataset using an original model, applying automated tools to aggressively filter, score, and de-duplicate the data into a refined higher quality dataset, and producing a new LLM by finetuning the original on the refined dataset.We applied our approach to several open-source LLMs and compared the resulting performance to baseline models with both automated metrics and human preferences.Our results show the resulting models outperform all other downloadable baselines and approach the performance of larger proprietary models. @@ -6033,7 +6033,7 @@ <fixed-case>PELMS</fixed-case>: Pre-training for Effective Low-Shot Multi-Document Summarization - JosephPeperUniversity of Michigan - Ann Arbor + JosephPeperUniversity of Michigan - Ann Arbor WenzhaoQiu LuWangNortheastern University, Northeastern University and University of Michigan 7652-7674 @@ -6304,8 +6304,8 @@ BrianFormentonational university of singaore, National University of Singapore WenjieFengNational University of Singapore Chuan-ShengFooCentre for Frontier AI Research, A*STAR and Institute for Infocomm Research, A*STAR - Anh TuanLuuNanyang Technological University - See-KiongNgNational University of Singapore + Anh TuanLuuNanyang Technological University + See-KiongNgNational University of Singapore 8005-8028 Language models (LMs) are indispensable tools for natural language processing tasks, but their vulnerability to adversarial attacks remains a concern. While current research has explored adversarial training techniques, their improvements to defend against word-level attacks have been limited. In this work, we propose a novel approach called Semantic Robust Defence (SemRoDe), a Macro Adversarial Training strategy to enhance the robustness of LMs. Drawing inspiration from recent studies in the image domain, we investigate and later confirm that in a discrete data setting such as language, adversarial samples generated via word substitutions do indeed belong to an adversarial domain exhibiting a high Wasserstein distance from the base domain. Our method learns a robust representation that bridges these two domains. We hypothesize that if samples were not projected into an adversarial domain, but instead to a domain with minimal shift, it would improve attack robustness. We align the domains by incorporating a new distance-based objective. With this, our model is able to learn more generalized representations by aligning the model’s high-level output features and therefore better handling unseen adversarial samples. This method can be generalized across word embeddings, even when they share minimal overlap at both vocabulary and word-substitution levels. To evaluate the effectiveness of our approach, we conduct experiments on BERT and RoBERTa models on three datasets. The results demonstrate promising state-of-the-art robustness. 2024.naacl-long.443 @@ -6332,7 +6332,7 @@ ChongLiInstitute of automation, Chinese Academy of Sciences ShaonanWang JiajunZhangInstitute of automation, Chinese academy of science, Chinese Academy of Sciences - ChengqingZongInstitute of automation, Chinese academy of science, Chinese Academy of Sciences + ChengqingZongInstitute of automation, Chinese academy of science, Chinese Academy of Sciences 8058-8076 Multilingual generative models obtain remarkable cross-lingual in-context learning capabilities through pre-training on large-scale corpora. However, they still exhibit a performance bias toward high-resource languages and learn isolated distributions of multilingual sentence representations, which may hinder knowledge transfer across languages. To bridge this gap, we propose a simple yet effective cross-lingual alignment framework exploiting pairs of translation sentences. It aligns the internal sentence representations across different languages via multilingual contrastive learning and aligns outputs by following cross-lingual instructions in the target language. Experimental results show that even with less than 0.1{\textperthousand} of pre-training tokens, our alignment framework significantly boosts the cross-lingual abilities of generative language models and mitigates the performance gap. Further analyses reveal that it results in a better internal multilingual representation distribution of multilingual models. 2024.naacl-long.445 @@ -6593,8 +6593,8 @@ MinjoonSeoKorea Advanced Institute of Science and Technology RichardJamesResearch, Facebook MikeLewisFacebook AI Research - LukeZettlemoyerUniversity of Washington, Facebook and Meta - Wen-tauYihMeta Platforms, Inc. + LukeZettlemoyerUniversity of Washington, Facebook and Meta + Wen-tauYihMeta Platforms, Inc. 8371-8384 We introduce REPLUG, a retrieval-augmented language modeling framework that treats the language model (LM) as a black box and augments it with a tuneable retrieval model. Unlike prior retrieval-augmented LMs that train language models with special cross-attention mechanisms to encode the retrieved text, REPLUG simply prepends retrieved documents to the input for the frozen black-box LM. This simple design can be easily applied to any existing language models. Furthermore, we show that the LM can be used to supervise the retrieval model, which can then find documents that help the LM make better predictions. Our experiments demonstrate that REPLUG with the tuned retriever significantly improves the performance of GPT-3 (175B) on language modeling by 6.3%, as well as the performance of Codex on five-shot MMLU by 5.1%. Code is publicly released at github.com/swj0419/REPLUG. 2024.naacl-long.463 @@ -6781,7 +6781,7 @@ Large Human Language Models: A Need and the Challenges NikitaSoni - H. AndrewSchwartzStony Brook University (SUNY) + H. AndrewSchwartzStony Brook University (SUNY) JoãoSedocNew York University NiranjanBalasubramanianState University of New York, Stony Brook 8631-8646 @@ -6797,9 +6797,9 @@ KejianShi KatherineHe LongtianYe - AlexanderFabbriSalesForce.com + AlexanderFabbriSalesForce.com PengfeiLiu - DragomirRadevYale University + DragomirRadevYale University ArmanCohanYale University and Allen Institute for Artificial Intelligence 8647-8664 Recent studies have found that summaries generated by large language models (LLMs) are favored by human annotators over the original reference summaries in commonly used summarization datasets. Therefore, we study an LLM-as-reference learning setting for smaller text summarization models to investigate whether their performance can be substantially improved. To this end, we use LLMs as both oracle summary generators for standard supervised fine-tuning and oracle summary evaluators for efficient contrastive learning that leverages the LLMs’ supervision signals. We conduct comprehensive experiments with source news articles and find that (1) summarization models trained under the LLM-as-reference setting achieve significant performance improvement in both LLM and human evaluations; (2) contrastive learning outperforms standard supervised fine-tuning under both low and high resource settings. Our experimental results also enable a meta-analysis of LLMs’ summary evaluation capacities under a challenging setting, showing that LLMs are not well-aligned with human evaluators. Particularly, our expert human evaluation reveals remaining nuanced performance gaps between LLMs and our fine-tuned models, which LLMs fail to capture. Thus, we call for further studies into both the potential and challenges of using LLMs in summarization model development. @@ -6889,7 +6889,7 @@ Mix-Initiative Response Generation with Dynamic Prefix Tuning YuxiangNieHong Kong University of Science and Technology - HeyanHuangBeijing Institute of Technology + HeyanHuangBeijing Institute of Technology Xian-LingMaoBeijing Institute of Technology LiziLiaoSingapore Management University 8748-8761 @@ -6921,7 +6921,7 @@ TanuGoyal NarjisAsad AparnaGarimellaAdobe Research - PushpakBhattacharyyaIndian Institute of Technology, Bombay, Dhirubhai Ambani Institute Of Information and Communication Technology + PushpakBhattacharyyaIndian Institute of Technology, Bombay, Dhirubhai Ambani Institute Of Information and Communication Technology 8786-8806 The pervasive influence of social biases in language data has sparked the need for benchmark datasets that capture and evaluate these biases in Large Language Models (LLMs). Existing efforts predominantly focus on English language and the Western context, leaving a void for a reliable dataset that encapsulates India’s unique socio-cultural nuances. To bridge this gap, we introduce IndiBias, a comprehensive benchmarking dataset designed specifically for evaluating social biases in the Indian context. We filter and translate the existing CrowS-Pairs dataset to create a benchmark dataset suited to the Indian context in Hindi language. Additionally, we leverage LLMs including ChatGPT and InstructGPT to augment our dataset with diverse societal biases and stereotypes prevalent in India. The included bias dimensions encompass gender, religion, caste, age, region, physical appearance, and occupation. We also build a resource to address intersectional biases along three intersectional dimensions. Our dataset contains 800 sentence pairs and 300 tuples for bias measurement across different demographics. The dataset is available in English and Hindi, providing a size comparable to existing benchmark datasets. Furthermore, using IndiBias we compare ten different language models on multiple bias measurement metrics. We observed that the language models exhibit more bias across a majority of the intersectional groups. All the scripts utilized and datasets created in this study are publicly available. 2024.naacl-long.487 @@ -6934,7 +6934,7 @@ Proceedings of the 2024 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies (Volume 2: Short Papers) KevinDuh - HelenaGomez + HelenaGomez StevenBethard Association for Computational Linguistics
Mexico City, Mexico
@@ -6990,7 +6990,7 @@ Advancing Regular Language Reasoning in Linear Recurrent Neural Networks Ting-HanFan Ta-ChungChi - AlexanderRudnickyCarnegie Mellon University and Carnegie Mellon University + AlexanderRudnickyCarnegie Mellon University and Carnegie Mellon University 45-53 In recent studies, linear recurrent neural networks (LRNNs) have achieved Transformer-level performance in natural language and long-range modeling, while offering rapid parallel training and constant inference cost. With the resurgence of interest in LRNNs, we study whether they can learn the hidden rules in training sequences, such as the grammatical structures of regular language. We theoretically analyze some existing LRNNs and discover their limitations in modeling regular language. Motivated by this analysis, we propose a new LRNN equipped with a block-diagonal and input-dependent transition matrix. Experiments suggest that the proposed model is the only LRNN capable of performing length extrapolation on regular language tasks such as Sum, Even Pair, and Modular Arithmetic. The code is released at https://github.com/tinghanf/RegluarLRNN. 2024.naacl-short.4 @@ -7085,7 +7085,7 @@ Unified Examination of Entity Linking in Absence of Candidate Sets NicolasOng - HassanShavarani + HassanShavarani AnoopSarkarSimon Fraser University 113-123 Despite remarkable strides made in the development of entity linking systems in recent years, a comprehensive comparative analysis of these systems using a unified framework is notably absent. This paper addresses this oversight by introducing a new black-box benchmark and conducting a comprehensive evaluation of all state-of-the-art entity linking methods. We use an ablation study to investigate the impact of candidate sets on the performance of entity linking. Our findings uncover exactly how much such entity linking systems depend on candidate sets, and how much this limits the general applicability of each system. We present an alternative approach to candidate sets, demonstrating that leveraging the entire in-domain candidate set can serve as a viable substitute for certain models. We show the trade-off between less restrictive candidate sets, increased inference time and memory footprint for some models. @@ -7109,7 +7109,7 @@ <fixed-case>SKICSE</fixed-case>: Sentence Knowable Information Prompted by <fixed-case>LLM</fixed-case>s Improves Contrastive Sentence Embeddings FangweiOu - JinanXuBeijing Jiaotong University + JinanXuBeijing Jiaotong University 141-146 Contrastive learning, which utilizes positive pairs and in-batch negatives to optimize the loss objective, has been proven to be an effective method for learning sentence embeddings. However, we argue that the previous methods of constructing positive pairs only through dropout perturbation or entailment relation are limited. Since there is more sentence knowable information (SKI) to be mined, such as sentence external knowledge, semantic analysis, and grammatical description. In this work, we first hand-craft a simple and effective prompt template that is able to obtain the knowable information of input sentences from LLMs (e.g., LLaMA). Then we combine the original sentence and its knowable information to form a positive pair for contrastive learning. We evaluate our method on standard semantic textual similarity (STS) tasks. Experimental results show that our unsupervised and supervised models using \text{BERT}_\text{base} achieve an average of 78.65% and 82.45% Spearman’s correlation respectively, a 2.40% and 0.88% improvement compared to SimCSE. Our model outperforms the previous state-of-the-art model PromptBERT in both unsupervised and supervised settings and specifically yields a new state-of-the-art performance in supervised setting. 2024.naacl-short.13 @@ -7121,7 +7121,7 @@ A Multi-Aspect Framework for Counter Narrative Evaluation using Large Language Models JaylenJonesOhio State University, Columbus LingboMo - EricFosler-LussierOhio State University + EricFosler-LussierOhio State University HuanSunThe Ohio State University, Columbus 147-168 Counter narratives - informed responses to hate speech contexts designed to refute hateful claims and de-escalate encounters - have emerged as an effective hate speech intervention strategy. While previous work has proposed automatic counter narrative generation methods to aid manual interventions, the evaluation of these approaches remains underdeveloped. Previous automatic metrics for counter narrative evaluation lack alignment with human judgment as they rely on superficial reference comparisons instead of incorporating key aspects of counter narrative quality as evaluation criteria. To address prior evaluation limitations, we propose a novel evaluation framework prompting LLMs to provide scores and feedback for generated counter narrative candidates using 5 defined aspects derived from guidelines from counter narrative specialized NGOs. We found that LLM evaluators achieve strong alignment to human-annotated scores and feedback and outperform alternative metrics, indicating their potential as multi-aspect, reference-free and interpretable evaluators for counter narrative evaluation. @@ -7183,7 +7183,7 @@ XiyuanZou YiranLi IanPoradaMcGill University - JackieCheungMcGill University, Mila Research Institute and Microsoft + JackieCheungMcGill University, Mila Research Institute and Microsoft 212-219 Current end-to-end coreference resolution models combine detection of singleton mentions and antecedent linking into a single step. In contrast, singleton detection was often treated as a separate step in the pre-neural era. In this work, we show that separately parameterizing these two sub-tasks also benefits end-to-end neural coreference systems. Specifically, we add a singleton detector to the coarse-to-fine (C2F) coreference model, and design an anaphoricity-aware span embedding and singleton detection loss. Our method significantly improves model performance on OntoNotes and four additional datasets. 2024.naacl-short.19 @@ -7238,7 +7238,7 @@ GuanhuaZhangMax Planck Institute for Intelligent Systems, Max-Planck Institute WenqiFanHong Kong Polytechnic University QingLiThe Hong Kong Polytechnic University, Hong Kong Polytechnic University - YangZhang + YangZhang GaowenLiu SijiaLiuMichigan State University ShiyuChangUC Santa Barbara @@ -7275,7 +7275,7 @@ SebastianGehrmannBloomberg LiningZhang SaadMahamoodtrivago N.V. - MirunaClinciu + MirunaClinciu KhyathiChandu YufangHouTechnische Universität Darmstadt and IBM Research Ireland 272-281 @@ -7288,7 +7288,7 @@ More room for language: Investigating the effect of retrieval on language models DavidSamuelUniversity of Oslo - LucasCharpentierUniversity of Oslo + LucasCharpentierUniversity of Oslo SondreWold 282-305 Retrieval-augmented language models pose a promising alternative to standard language modeling. During pretraining, these models search in a corpus of documents for contextually relevant information that could aid the language modeling objective. We introduce an ‘ideal retrieval’ methodology to study these models in a fully controllable setting. We conduct an extensive evaluation to examine how retrieval augmentation affects the behavior of the underlying language model. Among other things, we observe that these models: (i) save substantially less world knowledge in their weights, (ii) are better at understanding local context and inter-word dependencies, but (iii) are worse at comprehending global context. @@ -7299,7 +7299,7 @@ Discourse-Aware In-Context Learning for Temporal Expression Normalization - AkashGautam + AkashGautam LukasLangeRobert Bosch GmbH, Bosch JannikStrötgenKarlsruhe University of Applied Sciences 306-315 @@ -7435,7 +7435,7 @@ Zero-Shot vs. Translation-Based Cross-Lingual Transfer: The Case of Lexical Gaps AbteenEbrahimiUniversity of Colorado, Boulder - Katharinavon der WenseJohannes-Gutenberg Universität Mainz, University of Colorado, Boulder and New York University + Katharinavon der WenseJohannes-Gutenberg Universität Mainz, University of Colorado, Boulder and New York University 443-458 Cross-lingual transfer can be achieved through two main approaches: zero-shot transfer or machine translation (MT). While the former has been the dominant approach, both have been shown to be competitive. In this work, we compare the current performance and long-term viability of these methods. We leverage lexical gaps to create a multilingual question answering dataset, which provides a difficult domain for evaluation. Both approaches struggle in this setting, though zero-shot transfer performs better, as current MT outputs are not specific enough for the task. Using oracle translation offers the best performance, showing that this approach can perform well long-term, however current MT quality is a bottleneck. We also conduct an exploratory study to see if humans produce translations sufficient for the task with only general instructions. We find this to be true for the majority of translators, but not all. This indicates that while translation has the potential to outperform zero-shot approaches, creating MT models that generate accurate task-specific translations may not be straightforward. 2024.naacl-short.37 @@ -7462,7 +7462,7 @@ HeikeAdelHochschule der Medien (University of Applied Sciences) LukasLangeRobert Bosch GmbH, Bosch JannikStrötgenKarlsruhe University of Applied Sciences - HinrichSchuetze + HinrichSchuetze 469-480 Continual learning aims at incrementally acquiring new knowledge while not forgetting existing knowledge. To overcome catastrophic forgetting, methods are either rehearsal-based, i.e., store data examples from previous tasks for data replay, or isolate parameters dedicated to each task. However, rehearsal-based methods raise privacy and memory issues, and parameter-isolation continual learning does not consider interaction between tasks, thus hindering knowledge transfer. In this work, we propose MoCL, a rehearsal-free **Mo**dular and **C**ompositional Continual **L**earning framework which continually adds new modules to language models and composes them with existing modules. Experiments on various benchmarks show that MoCL outperforms state of the art and effectively facilitates knowledge transfer. 2024.naacl-short.39 @@ -7487,7 +7487,7 @@ XiaoyuLiuUniversity of Maryland, College Park HuayangLi YoshinariFujinumaAWS AI Labs - MariaNadejdeAmazon + MariaNadejdeAmazon XingNiuAmazon RonLitmanAmazon YairKittenplonAmazon @@ -7556,8 +7556,8 @@ Do Multilingual Language Models Think Better in <fixed-case>E</fixed-case>nglish? JulenEtxaniz GorkaAzkune - AitorSoroa - OierLopez de Lacalle + AitorSoroa + OierLopez de Lacalle MikelArtetxe 550-564 Translate-test is a popular technique to improve the performance of multilingual language models. This approach works by translating the input into English using an external machine translation system before running inference. However, these improvements can be attributed to the use of a separate translation system, which is typically trained on large amounts of parallel data not seen by the language model. In this work, we introduce a new approach called self-translate that leverages the few-shot translation capabilities of multilingual language models. This allows us to analyze the effect of translation in isolation. Experiments over 5 tasks show that self-translate consistently outperforms direct inference, demonstrating that language models are unable to leverage their full multilingual potential when prompted in non-English languages. Our code is available at https://github.com/juletx/self-translate. @@ -7601,7 +7601,7 @@ Self-Improving for Zero-Shot Named Entity Recognition with Large Language Models TingyuXieZhejiang University - QiLi + QiLi YanZhangTencent ZuozhuLiuZhejiang University HongweiWangZhejiang University @@ -7618,7 +7618,7 @@ RuiruiChenInstitute of High Performance Computing, Singapore, A*STAR RuochenZhao WenhanXia - ShafiqJotySalesForce.com and Nanyang Technological University + ShafiqJotySalesForce.com and Nanyang Technological University 594-602 To mitigate forgetting, existing lifelong event detection methods typically maintain a memory module and replay the stored memory data during the learning of a new task. However, the simple combination of memory data and new-task samples can still result in substantial forgetting of previously acquired knowledge, which may occur due to the potential overlap between the feature distribution of new data and the previously learned embedding space. Moreover, the model suffers from overfitting on the few memory samples rather than effectively remembering learned patterns. To address the challenges of forgetting and overfitting, we propose a novel method based on embedding space separation and compaction. Our method alleviates forgetting of previously learned tasks by forcing the feature distribution of new data away from the previous embedding space. It also mitigates overfitting by a memory calibration mechanism that encourages memory data to be close to its prototype to enhance intra-class compactness. In addition, the learnable parameters of the new task are initialized by drawing upon acquired knowledge from the previously learned task to facilitate forward knowledge transfer. With extensive experiments, we demonstrate that our method can significantly outperform previous state-of-the-art approaches. 2024.naacl-short.50 @@ -7666,7 +7666,7 @@ Efficient Information Extraction in Few-Shot Relation Classification through Contrastive Representation Learning PhilippBorchertIÉSEG School of Management and KU Leuven JochenDe WeerdtKU Leuven - Marie-FrancineMoensKU Leuven, KU Leuven + Marie-FrancineMoensKU Leuven, KU Leuven 638-646 Differentiating relationships between entity pairs with limited labeled instances poses a significant challenge in few-shot relation classification. Representations of textual data extract rich information spanning the domain, entities, and relations. In this paper, we introduce a novel approach to enhance information extraction combining multiple sentence representations and contrastive learning. While representations in relation classification are commonly extracted using entity marker tokens, we argue that substantial information within the internal model representations remains untapped. To address this, we propose aligning multiple sentence representations, such as the CLS] token, the [MASK] token used in prompting, and entity marker tokens. Our method employs contrastive learning to extract complementary discriminative information from these individual representations. This is particularly relevant in low-resource settings where information is scarce. Leveraging multiple sentence representations is especially effective in distilling discriminative information for relation classification when additional information, like relation descriptions, are not available. We validate the adaptability of our approach, maintaining robust performance in scenarios that include relation descriptions, and showcasing its flexibility to adapt to different resource constraints. 2024.naacl-short.54 @@ -7728,7 +7728,7 @@ RichardFang RohanBindu AkulGupta - TatsunoriHashimotoStanford University + TatsunoriHashimotoStanford University DanielKangDepartment of Computer Science 681-687 As large language models (LLMs) have increased in their capabilities, so doestheir potential for dual use. To reduce harmful outputs, produces and vendors ofLLMs have used reinforcement learning with human feedback (RLHF). In tandem,LLM vendors have been increasingly enabling fine-tuning of their most powerfulmodels. However, concurrent work has shown that fine-tuning can remove RLHFprotections. We may expect that the most powerful models currently available(GPT-4) are less susceptible to fine-tuning attacks. In this work, we show the contrary: fine-tuning allows attackers to remove RLHFprotections with as few as 340 examples and a 95% success rate. These trainingexamples can be automatically generated with weaker models. We further show thatremoving RLHF protections does not decrease usefulness on non-censored outputs,providing evidence that our fine-tuning strategy does not decrease usefulnessdespite using weaker models to generate training data. Our results show the needfor further research on protections on LLMs. @@ -7797,7 +7797,7 @@ Michielde JongAugment Computing LukeVilnisGoogle SantiagoOntanonGoogle and Drexel University - WilliamCohenGoogle DeepMind + WilliamCohenGoogle DeepMind SumitSanghaiResearch, Google JoshuaAinslieGoogle 737-744 @@ -7823,7 +7823,7 @@ Improving Factuality in Clinical Abstractive Multi-Document Summarization by Guided Continued Pre-training AhmedElhady KhaledElsayedCairo University - EnekoAgirreUniversity of the Basque Country (UPV/EHU) + EnekoAgirreUniversity of the Basque Country (UPV/EHU) MikelArtetxeReka AI 755-761 Factual accuracy is an important property of neural abstractive summarization models, especially in fact-critical domains such as the clinical literature. In this work, we introduce a guided continued pre-training stage for encoder-decoder models that improves their understanding of the factual attributes of documents, which is followed by supervised fine-tuning on summarization. Our approach extends the pre-training recipe of BART to incorporate 3 additional objectives based on PICO spans, which capture the population, intervention, comparison, and outcomes related to a clinical study. Experiments on multi-document summarization in the clinical domain demonstrate that our approach is competitive with prior work, improving the quality and factuality of the summaries and achieving the best-published results in factual accuracy on the MSLR task. @@ -7838,7 +7838,7 @@ NicolasGarneau EmanueleBugliarelloGoogle YovaKementchedjhievaMohamed bin Zayed University of Artificial Intelligence - AndersSøgaardCopenhagen University + AndersSøgaardCopenhagen University 762-771 Facts are subject to contingencies and can be true or false in different circumstances. One such contingency is time, wherein some facts mutate over a given period, e.g., the president of a country or the winner of a championship. Trustworthy language models ideally identify mutable facts as such and process them accordingly. We create MuLan, a benchmark for evaluating the ability of English language models to anticipate time-contingency, covering both 1:1 and 1:N relations. We hypothesize that mutable facts are encoded differently than immutable ones, hence being easier to update. In a detailed evaluation of six popular large language models, we consistently find differences in the LLMs’ confidence, representations, and update behavior, depending on the mutability of a fact. Our findings should inform future work on the injection of and induction of time-contingent knowledge to/from LLMs. 2024.naacl-short.67 @@ -7864,8 +7864,8 @@ XiaochuangHanDepartment of Computer Science, University of Washington MikeLewisFacebook AI Research YuliaTsvetkovDepartment of Computer Science, University of Washington - LukeZettlemoyerUniversity of Washington, Facebook and Meta - Wen-tauYihMeta Platforms, Inc. + LukeZettlemoyerUniversity of Washington, Facebook and Meta + Wen-tauYihMeta Platforms, Inc. 783-791 Language models (LMs) often struggle to pay enough attention to the input context, and generate texts that are unfaithful or contain hallucinations. To mitigate this issue, we present context-aware decoding (CAD), which follows a contrastive output distribution that amplifies the difference between the output probabilities when a model is used with and without context. Our experiments show that CAD, without additional training, significantly improves the faithfulness of different LM families, including OPT, GPT, LLaMA, and FLAN-T5 for summarization tasks (e.g., 14.3% gain for LLaMA in factuality metrics). Furthermore, CAD is particularly effective in overriding a model’s prior knowledge when it contradicts the provided context, leading to substantial improvements in tasks where resolving the knowledge conflict is essential. Our code is publicly released at https://github.com/xhan77/context-aware-decoding. 2024.naacl-short.69 @@ -7912,9 +7912,9 @@ GunuJho InchulHwang GeorgiosVardaxoglou - AimiliosChalamandaris - PirrosTsiakoulis - SpyrosRaptis + AimiliosChalamandaris + PirrosTsiakoulis + SpyrosRaptis 808-813 Emotion detection in textual data has received growing interest in recent years, as it is pivotal for developing empathetic human-computer interaction systems.This paper introduces a method for categorizing emotions from text, which acknowledges and differentiates between the diversified similarities and distinctions of various emotions.Initially, we establish a baseline by training a transformer-based model for standard emotion classification, achieving state-of-the-art performance. We argue that not all misclassifications are of the same importance, as there are perceptual similarities among emotional classes.We thus redefine the emotion labeling problem by shifting it from a traditional classification model to an ordinal classification one, where discrete emotions are arranged in a sequential order according to their valence levels.Finally, we propose a method that performs ordinal classification in the two-dimensional emotion space, considering both valence and arousal scales.The results show that our approach not only preserves high accuracy in emotion prediction but also significantly reduces the magnitude of errors in cases of misclassification. 2024.naacl-short.72 @@ -7987,7 +7987,7 @@ AmanpreetSinghAllen Institute for Artificial Intelligence DougDowneyAllen Institute for Artificial Intelligence and Northwestern University SergeyFeldmanAllen Institute for Artificial Intelligence and Data Cowboys - LucyWangUniversity of Washington and Allen Institute for Artificial Intelligence + LucyWangUniversity of Washington and Allen Institute for Artificial Intelligence 1-11 Topic pages aggregate useful information about an entity or concept into a single succinct and accessible article. Automated creation of topic pages would enable their rapid curation as information resources, providing an alternative to traditional web search. While most prior work has focused on generating topic pages about biographical entities, in this work, we develop a completely automated process to generate high-quality topic pages for scientific entities, with a focus on biomedical concepts. We release TOPICAL, a web app and associated open-source code, comprising a model pipeline combining retrieval, clustering, and prompting, that makes it easy for anyone to generate topic pages for a wide variety of biomedical entities on demand. In a human evaluation of 150 diverse topic pages generated using TOPICAL, we find that the vast majority were considered relevant, accurate, and coherent, with correct supporting citations. We make all code publicly available and host a free-to-use web app at: https://s2-topical.apps.allenai.org. 2024.naacl-demo.1 @@ -8082,7 +8082,7 @@ <fixed-case>O</fixed-case>pinion<fixed-case>GPT</fixed-case>: Modelling Explicit Biases in Instruction-Tuned <fixed-case>LLM</fixed-case>s - PatrickHallerHumboldt Universität Berlin + PatrickHallerHumboldt Universität Berlin AnsarAynetdinovDepartment of Computer Science, Humboldt University Berlin, Humboldt Universität Berlin AlanAkbikHumboldt Universität Berlin 78-86 @@ -8110,7 +8110,7 @@ <fixed-case>B</fixed-case>e<fixed-case>L</fixed-case>eaf: Belief Prediction as Tree Generation JohnMurzaku, State University of New York at Stony Brook - OwenRambowStony Brook University + OwenRambowStony Brook University 97-106 We present a novel approach to predicting source-and-target factuality by transforming it into a linearized tree generation task. Unlike previous work, our model and representation format fully account for the factuality tree structure, generating the full chain of nested sources instead of the last source only. Furthermore, our linearized tree representation significantly compresses the amount of tokens needed compared to other representations, allowing for fully end-to-end systems. We achieve state-of-the-art results on FactBank and the Modal Dependency Corpus, which are both corpora annotating source-and-target event factuality. Our results on fine-tuning validate the strong generality of the proposed linearized tree generation task, which can be easily adapted to other corpora with a similar structure. We then present BeLeaf, a system which directly leverages the linearized tree representation to create both sentence level and document level visualizations. Our system adds several missing pieces to the source-and-target factuality task such as coreference resolution and event head word to syntactic span conversion. Our demo code is available on https://github.com/yurpl/beleaf and our video is available on https://youtu.be/SpbMNnin-Po. 2024.naacl-demo.10 @@ -8168,7 +8168,7 @@ HongyiWangCMU, Carnegie Mellon University YonghaoZhuangCMU, Carnegie Mellon University JindongChenGoogle - EricXingMohamed bin Zayed Univeristy of AI and School of Computer Science, Carnegie Mellon University + EricXingMohamed bin Zayed Univeristy of AI and School of Computer Science, Carnegie Mellon University ZhitingHuUniversity of California, San Diego and Amazon 137-147 The recent progress of AI can be largely attributed to large language models (LLMs). However, their escalating memory requirements introduce challenges for machine learning (ML) researchers and engineers. Addressing this requires developers to partition a large model to distribute it across multiple GPUs or TPUs. This necessitates considerable coding and intricate configuration efforts with existing model parallel tools, such as Megatron-LM, DeepSpeed, and Alpa. These tools require users’ expertise in machine learning systems (MLSys), creating a bottleneck in LLM development, particularly for developers without MLSys background. In this work, we present RedCoast (Redco), a lightweight and user-friendly tool crafted to automate distributed training and inference for LLMs, as well as to simplify ML pipeline development. The design of Redco emphasizes two key aspects. Firstly, to automate model parallelism, our study identifies two straightforward rules to generate tensor parallel strategies for any given LLM. Integrating these rules into Redco facilitates effortless distributed LLM training and inference, eliminating the need of additional coding or complex configurations. We demonstrate the effectiveness by applying Redco on a set of LLM architectures, such as GPT-J, LLaMA, T5, and OPT, up to the size of 66B. Secondly, we propose a mechanism that allows for the customization of diverse ML pipelines through the definition of merely three functions, avoiding redundant and formulaic code like multi-host related processing. This mechanism proves adaptable across a spectrum of ML algorithms, from foundational language modeling to complex algorithms like meta-learning and reinforcement learning. As a result, Redco implementations exhibit significantly fewer lines of code compared to their official counterparts. RedCoast (Redco) has been released under Apache 2.0 license at https://github.com/tanyuqian/redco. @@ -8184,7 +8184,7 @@ RobertGeislinger FlorianHelfer GertraudKoch - ChrisBiemannU Hamburg + ChrisBiemannU Hamburg 148-157 In this system demonstration paper, we present the Concept Over Time Analysis extension for the Discourse Analysis Tool Suite.The proposed tool empowers users to define, refine, and visualize their concepts of interest within an interactive interface. Adhering to the Human-in-the-loop paradigm, users can give feedback through sentence annotations. Utilizing few-shot sentence classification, the system employs Sentence Transformers to compute representations of sentences and concepts. Through an iterative process involving semantic similarity searches, sentence annotation, and fine-tuning with contrastive data, the model continuously refines, providing users with enhanced analysis outcomes. The final output is a timeline visualization of sentences classified to concepts. Especially suited for the Digital Humanities, Concept Over Time Analysis serves as a valuable tool for qualitative data analysis within extensive datasets. The chronological overview of concepts enables researchers to uncover patterns, trends, and shifts in discourse over time. 2024.naacl-demo.15 @@ -8197,10 +8197,10 @@ ZhengxuanWuStanford University AtticusGeigerPr(Ai)²R Group AryamanArora - JingHuangStanford University + JingHuangStanford University ZhengWangStanford University - NoahGoodmanStanford University - ChristopherManningComputer Science Department, Stanford University + NoahGoodmanStanford University + ChristopherManningComputer Science Department, Stanford University ChristopherPottsStanford University 158-165 Interventions on model-internal states are fundamental operations in many areas of AI, including model editing, steering, robustness, and interpretability. To facilitate such research, we introduce pyvene, an open-source Python library that supports customizable interventions on a range of different PyTorch modules. pyvene supports complex intervention schemes with an intuitive configuration format, and its interventions can be static or include trainable parameters. We show how pyvene provides a unified and extensible framework for performing interventions on neural models and sharing the intervened upon models with others. We illustrate the power of the library via interpretability analyses using causal abstraction and knowledge localization. We publish our library through Python Package Index (PyPI) and provide code, documentation, and tutorials at ‘https://github.com/stanfordnlp/pyvene‘. @@ -8347,7 +8347,7 @@ DahyunJungKorea University SugyeongEoKorea University ChanjunParkUpstage - HeuiseokLimKorea University + HeuiseokLimKorea University 25-35 Critical error detection (CED) in machine translation is a task that aims to detect errors that significantly distort the intended meaning. However, the existing study of CED lacks explainability due to the absence of content addressing the reasons for catastrophic errors. To address this limitation, we propose Explainable CED, a dataset that introduces the attributes of error explanation and correction regarding critical errors. Considering the advantage of reducing time costs and mitigating human annotation bias, we leverage a large language model in the data construction process. To improve the quality of the dataset and mitigate hallucination, we compare responses from the model and introduce an additional data filtering method through feedback scoring. The experiment demonstrates that the dataset appropriately reflects a consistent explanation and revision for errors, validating the reliability of the dataset. 2024.naacl-srw.4 @@ -8396,7 +8396,7 @@ JorisDriesenApple AlexandruCoca MarkGaynorApple - AndersJohannsen + AndersJohannsen 56-74 Spurred by recent advances in Large Language Models (LLMs), virtual assistants are poised to take a leap forward in terms of their dialogue capabilities. Yet a major bottleneck to achieving genuinely transformative task-oriented dialogue capabilities remains the scarcity of high quality data. Existing datasets, while impressive in scale, have limited domain coverage and contain few genuinely challenging conversational phenomena; those which are present are typically unlabelled, making it difficult to assess the strengths and weaknesses of models without time-consuming and costly human evaluation. Moreover, creating high quality dialogue data has until now required considerable human input, limiting both the scale of these datasets and the ability to rapidly bootstrap data for a new target domain. We aim to overcome these issues with LUCID, a modularised and highly automated LLM-driven data generation system that produces realistic, diverse and challenging dialogues. We use LUCID to generate a seed dataset of 4,277 conversations across 100 intents to demonstrate its capabilities, with a human review finding consistently high quality labels in the generated data. 2024.naacl-srw.8 @@ -8409,7 +8409,7 @@ SankalpBahad PruthwikMishraIIIT-Hyderabad ParameswariKrishnamurthy - DiptiSharmaIIIT Hyderabad + DiptiSharmaIIIT Hyderabad 75-82 Named Entity Recognition (NER) is a use-ful component in Natural Language Process-ing (NLP) applications. It is used in varioustasks such as Machine Translation, Summa-rization, Information Retrieval, and Question-Answering systems. The research on NER iscentered around English and some other ma-jor languages, whereas limited attention hasbeen given to Indian languages. We analyze thechallenges and propose techniques that can betailored for Multilingual Named Entity Recog-nition for Indian Languages. We present a hu-man annotated named entity corpora of ∼40Ksentences for 4 Indian languages from two ofthe major Indian language families. Addition-ally, we show the transfer learning capabilitiesof pre-trained transformer models from a highresource language to multiple low resource lan-guages through a series of experiments. Wealso present a multilingual model fine-tunedon our dataset, which achieves an F1 score of∼0.80 on our dataset on average. We achievecomparable performance on completely unseenbenchmark datasets for Indian languages whichaffirms the usability of our model. 2024.naacl-srw.9 @@ -8432,7 +8432,7 @@ DongKimKorea University DahyunJungKorea University ChanjunParkUpstage - HeuiseokLimKorea University + HeuiseokLimKorea University 93-104 Large Language Models (LLMs) have significantly impacted various fields requiring advanced linguistic understanding, yet concerns regarding their inherent biases and ethical considerations have also increased. Notably, LLMs have been critiqued for perpetuating stereotypes against diverse groups based on race, sexual orientation, and other attributes. However, most research analyzing these biases has predominantly focused on communities where English is the primary language, neglecting to consider the cultural and linguistic nuances of other societies. In this paper, we aim to explore the inherent biases and toxicity of LLMs, specifically within the social context of Korea. We devise a set of prompts that reflect major societal issues in Korea and assign varied personas to both ChatGPT and GPT-4 to assess the toxicity of the generated sentences. Our findings indicate that certain personas or prompt combinations consistently yield harmful content, highlighting the potential risks associated with specific persona-issue alignments within the Korean cultural framework. Furthermore, we discover that GPT-4 can produce more than twice the level of toxic content than ChatGPT under certain conditions. 2024.naacl-srw.11 @@ -8443,7 +8443,7 @@ To Clarify or not to Clarify: A Comparative Analysis of Clarification Classification with Fine-Tuning, Prompt Tuning, and Prompt Engineering AlinaLeippertGerman Research Center for AI TatianaAnikinaGerman Research Center for AI - BerndKieferGerman Research Center for AI + BerndKieferGerman Research Center for AI JosefGenabithGerman Research Center for AI and Universität des Saarlandes 105-115 Misunderstandings occur all the time in human conversation but deciding on when to ask for clarification is a challenging task for conversational systems that requires a balance between asking too many unnecessary questions and running the risk of providing incorrect information. This work investigates clarification identification based on the task and data from (Xu et al., 2019), reproducing their Transformer baseline and extending it by comparing pre-trained language model fine-tuning, prompt tuning and manual prompt engineering on the task of clarification identification. Our experiments show strong performance with LM and a prompt tuning approach with BERT and RoBERTa, outperforming standard LM fine-tuning, while manual prompt engineering with GPT-3.5 proved to be less effective, although informative prompt instructions have the potential of steering the model towards generating more accurate explanations for why clarification is needed. @@ -8701,8 +8701,8 @@ ChaoweiXiaoUW-Madison HuanSunOSU LeiLiCMU - LeonDerczynskiUW Seattle - AnimaAnandkumarCaltech, NVIDIA + LeonDerczynskiUW Seattle + AnimaAnandkumarCaltech, NVIDIA FeiWangUSC 8-18 This tutorial seeks to provide a systematic summary of risks and vulnerabilities in security, privacy and copyright aspects of large language models (LLMs), and most recent solutions to address those issues. We will discuss a broad thread of studies that try to answer the following questions: (i) How do we unravel the adversarial threats that attackers may leverage in the training time of LLMs, especially those that may exist in recent paradigms of instruction tuning and RLHF processes? (ii) How do we guard the LLMs against malicious attacks in inference time, such as attacks based on backdoors and jailbreaking? (iii) How do we ensure privacy protection of user information and LLM decisions for Language Model as-a-Service (LMaaS)? (iv) How do we protect the copyright of an LLM? (v) How do we detect and prevent cases where personal or confidential information is leaked during LLM training? (vi) How should we make policies to control against improper usage of LLM-generated content? In addition, will conclude the discussions by outlining emergent challenges in security, privacy and reliability of LLMs that deserve timely investigation by the community @@ -8733,7 +8733,7 @@ NikitaSoniStony Brook University SwanieJuhngStony Brook University JoãoSedocNew York University - H. AndrewSchwartzStony Brook University + H. AndrewSchwartzStony Brook University SalvatoreGiorgiUniversity of Pennsylvania, National Institute on Drug Abuse, Intramural Research Program Ryan LBoydStony Brook University 26-33 @@ -8746,7 +8746,7 @@ Human-<fixed-case>AI</fixed-case> Interaction in the Age of <fixed-case>LLM</fixed-case>s DiyiYangStanford University Sherry TongshuangWuCarnegie Mellon University - Marti A.HearstUniversity of California, Berkeley + Marti A.HearstUniversity of California, Berkeley 34-38 Recently, the development of Large Language Models (LLMs) has revolutionized the capabilities of AI systems. These models possess the ability to comprehend and generate human-like text, enabling them to engage in sophisticated conversations, generate content, and even perform tasks that once seemed beyond the reach of machines. As a result, the way we interact with technology and each other — an established field called “Human-AI Interaction” and have been studied for over a decade — is undergoing a profound transformation. This tutorial will provide an overview of the interaction between humans and LLMs, exploring the challenges, opportunities, and ethical considerations that arise in this dynamic landscape. It will start with a review of the types of AI models we interact with, and a walkthrough of the core concepts in Human-AI Interaction. We will then emphasize the emerging topics shared between HCI and NLP communities in light of LLMs. 2024.naacl-tutorials.5 @@ -8757,8 +8757,8 @@ Spatial and Temporal Language Understanding: Representation, Reasoning, and Grounding ParisaKordjamshidiMichigan State University QiangNingAWS - JamesPustejovskyBrandeis University - Marie-FrancineMoensKU Leuven + JamesPustejovskyBrandeis University + Marie-FrancineMoensKU Leuven 39-46 This tutorial provides an overview of the cutting edge research on spatial and temporal language understanding. We also cover some essential background material from various subdisciplines to this topic, which we believe will enrich the CL community’s appreciation of the complexity of spatiotemporal reasoning. 2024.naacl-tutorials.6 @@ -8771,7 +8771,7 @@ Proceedings of the 2024 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies (Volume 6: Industry Track) YiYang AidaDavani - AviSil + AviSil AnoopKumar Association for Computational Linguistics
Mexico City, Mexico
@@ -8874,7 +8874,7 @@ JiaxinPeiUniversity of Michigan SoumyaVadlamannatiBloomberg Liang-KangHuang - DanielPreotiuc-PietroBloomberg + DanielPreotiuc-PietroBloomberg XinyuHuaBloomberg 63-72 Identifying risks associated with a company is important to investors and the wellbeing of the overall financial markets. In this study, we build a computational framework to automatically extract company risk factors from news articles. Our newly proposed schema comprises seven distinct aspects, such as supply chain, regulations, and competition. We annotate 666 news articles and benchmark various machine learning models. While large language mod- els have achieved remarkable progress in various types of NLP tasks, our experiment shows that zero-shot and few-shot prompting state-of- the-art LLMs (e.g., Llama-2) can only achieve moderate to low performances in identifying risk factors. In contrast, fine-tuning pre-trained language models yields better results on most risk factors. Using this model, we analyze over 277K Bloomberg News articles and demonstrate that identifying risk factors from news could provide extensive insights into the operations of companies and industries. @@ -8900,7 +8900,7 @@ An <fixed-case>NLP</fixed-case>-Focused Pilot Training Agent for Safe and Efficient Aviation Communication XiaochenLiu BoweiZouA*STAR - AiTiAwI2R + AiTiAwI2R 89-96 Aviation communication significantly influences the success of flight operations, ensuring safety of lives and efficient air transportation. In day-to-day flight operations, air traffic controllers (ATCos) would timely communicate instructions to pilots using specific phraseology for aircraft manipulation . However, pilots, originating from diverse backgrounds and understanding of English language, have struggled with conforming to strict phraseology for readback and communication in the live operation, this problem had not been effectively addressed over the past decades. Traditionally, aviation communication training involved expensive setups and resources, often relying on human-in-the-loop (HIL) air traffic simulations that demand allocating a specific environment, domain experts for participation, and substantial amount of annotated data for simulation. Therefore, we would like to propose an NLP-oriented training agent and address these challenges. Our approach involves leveraging only natural language capabilities and fine-tuning on communication data to generate instructions based on input scenarios (keywords). Given the absence of prior references for this business problem, we investigated the feasibility of our proposed solution by 1) generating all instructions at once and 2) generating one instruction while incorporating conversational history in each input. Our findings affirm the feasibility of this approach, highlighting the effectiveness of fine-tuning pre-trained models and large language models in advancing aviation communication training. 2024.naacl-industry.8 @@ -8912,7 +8912,7 @@ Visual Grounding for User Interfaces YijunQian YujieLuUC Santa Barbara - AlexanderHauptmannSchool of Computer Science, Carnegie Mellon University + AlexanderHauptmannSchool of Computer Science, Carnegie Mellon University OrianaRivaGoogle and Microsoft 97-107 Enabling autonomous language agents to drive application user interfaces (UIs) as humans do can significantly expand the capability of today’s API-based agents. Essential to this vision is the ability of agents to ground natural language commands to on-screen UI elements. Prior UI grounding approaches work by relaying on developer-provided UI metadata (UI trees, such as web DOM, and accessibility labels) to detect on-screen elements. However, such metadata is often unavailable or incomplete. Object detection techniques applied to UI screens remove this dependency, by inferring location and types of UI elements directly from the UI’s visual appearance. The extracted semantics, however, are too limited to directly enable grounding. We overcome the limitations of both approaches by introducing the task of visual UI grounding, which unifies detection and grounding. A model takes as input a UI screenshot and a free-form language expression, and must identify the referenced UI element. We propose a solution to this problem, LVG, which learns UI element detection and grounding using a new technique called layout-guided contrastive learning, where the semantics of individual UI objects are learned also from their visual organization. Due to the scarcity of UI datasets, LVG integrates synthetic data in its training using multi-context learning. LVG outperforms baselines pre-trained on much larger datasets by over 4.9 points in top-1 accuracy, thus demonstrating its effectiveness. @@ -8948,7 +8948,7 @@
Conformer-Based Speech Recognition On Extreme Edge-Computing Devices - MingbinXu + MingbinXu AlexJin SichengWang MuSu @@ -9119,7 +9119,7 @@ AlexRosenfeld J.Gage DanielDakotaLeidos and Indiana University - SandraKüblerIndiana University at Bloomington + SandraKüblerIndiana University at Bloomington 295-302 We describe our system for authorship attribution in the IARPA HIATUS program. We describe the model and compute infrastructure developed to satisfy the set of technical constraints imposed by IARPA, including runtime limits as well as other constraints related to the ultimate use case. One use-case constraint concerns the explainability of the features used in the system. For this reason, we integrate features from frame semantic parsing, as they are both interpretable and difficult for adversaries to evade. One trade-off with using such features, however, is that more sophisticated feature representations require more complicated architectures, which limit usefulness in time-sensitive and constrained compute environments. We propose an approach to increase the efficiency of frame semantic parsing through an analysis of parallelization and beam search sizes. Our approach results in a system that is approximately 8.37x faster than the base system with a minimal effect on accuracy. 2024.naacl-industry.24 @@ -9260,7 +9260,7 @@ Shears: Unstructured Sparsity with Neural Low-rank Adapter Search - J. PabloMuñozIntel + J. PabloMuñozIntel JinjieYuanIntel NileshJainIntel 395-405 @@ -9320,7 +9320,7 @@ GiuseppeCastellucciAmazon EugeneAgichteinAmazon and Emory University OlegRokhlenko - ShervinMalmasiAmazon + ShervinMalmasiAmazon 437-446 Conversational Task Assistants (CTAs) guide users in performing a multitude of activities, such as making recipes. However, ensuring that interactions remain engaging, interesting, and enjoyable for CTA users is not trivial, especially for time-consuming or challenging tasks. Grounded in psychological theories of human interest, we propose to engage users with contextual and interesting statements or facts during interactions with a multi-modal CTA, to reduce fatigue and task abandonment before a task is complete. To operationalize this idea, we train a high-performing classifier (82% F1-score) to automatically identify relevant and interesting facts for users. We use it to create an annotated dataset of task-specific interesting facts for the domain of cooking. Finally, we design and validate a dialogue policy to incorporate the identified relevant and interesting facts into a conversation, to improve user engagement and task completion. Live testing on a leading multi-modal voice assistant shows that 66% of the presented facts were received positively, leading to a 40% gain in the user satisfaction rating, and a 37% increase in conversation length. These findings emphasize that strategically incorporating interesting facts into the CTA experience can promote real-world user participation for guided task interactions. 2024.naacl-industry.38 @@ -9380,7 +9380,7 @@ Solving General Natural-Language-Description Optimization Problems with Large Language Models - JihaiZhangAlibaba Group + JihaiZhangAlibaba Group WeiWangAlibaba Group SiyanGuoAlibaba Group LiWang diff --git a/data/xml/2024.nejlt.xml b/data/xml/2024.nejlt.xml index d3e5ab4bb2..dda2a73bda 100644 --- a/data/xml/2024.nejlt.xml +++ b/data/xml/2024.nejlt.xml @@ -61,14 +61,14 @@ HadyElsahar ChrisEmezue Alham FikriAji - SuzanaIlić + SuzanaIlić NurulaqillaKhamis ColinLeong MaraimMasoud - AitorSoroa - PedroOrtiz Suarez + AitorSoroa + PedroOrtiz Suarez Danielvan Strien - ZeerakTalat + ZeerakTalat YacineJernite 50-77 Contemporary large-scale data collection efforts have prioritized the amount of data collected to improve large language models (LLM). This quantitative approach has resulted in concerns for the rights of data subjects represented in data collections. This concern is exacerbated by a lack of documentation and analysis tools, making it difficult to interrogate these collections. Mindful of these pitfalls, we present a methodology for documentation-first, human-centered data collection. We apply this approach in an effort to train a multilingual LLM. We identify a geographically diverse set of target language groups (Arabic varieties, Basque, Chinese varieties, Catalan, English, French, Indic languages, Indonesian, Niger-Congo languages, Portuguese, Spanish, and Vietnamese, as well as programming languages) for which to collect metadata on potential data sources. We structure this effort by developing an online catalogue in English as a tool for gathering metadata through public hackathons. We present our tool and analyses of the resulting resource metadata, including distributions over languages, regions, and resource types, and discuss our lessons learned. diff --git a/data/xml/2024.neusymbridge.xml b/data/xml/2024.neusymbridge.xml index 2d8ad8b37e..4dc1645f78 100644 --- a/data/xml/2024.neusymbridge.xml +++ b/data/xml/2024.neusymbridge.xml @@ -4,12 +4,12 @@ Proceedings of the Workshop: Bridging Neurons and Symbols for Natural Language Processing and Knowledge Graphs Reasoning (NeusymBridge) @ LREC-COLING-2024 TiansiDong - ErhardHinrichs + ErhardHinrichs ZhenHan KangLiu YangqiuSong YixinCao - Christian F.Hempelmann + Christian F.Hempelmann RafetSifa ELRA and ICCL
Torino, Italia
@@ -28,7 +28,7 @@ XintongWang XiaoyuLi XingshanLi - ChrisBiemann + ChrisBiemann 1–7 Large Language Models (LLMs) have emerged as dominant foundational models in modern NLP. However, the understanding of their prediction processes and internal mechanisms, such as feed-forward networks (FFN) and multi-head self-attention (MHSA), remains largely unexplored. In this work, we probe LLMs from a human behavioral perspective, correlating values from LLMs with eye-tracking measures, which are widely recognized as meaningful indicators of human reading patterns. Our findings reveal that LLMs exhibit a similar prediction pattern with humans but distinct from that of Shallow Language Models (SLMs). Moreover, with the escalation of LLM layers from the middle layers, the correlation coefficients also increase in FFN and MHSA, indicating that the logits within FFN increasingly encapsulate word semantics suitable for predicting tokens from the vocabulary. 2024.neusymbridge-1.1 @@ -39,7 +39,7 @@ Yu-HsiangTseng Pin-ErChen Da-ChenLian - Shu-KaiHsieh + Shu-KaiHsieh 8–21 Compressibility is closely related to the predictability of the texts from the information theory viewpoint. As large language models (LLMs) are trained to maximize the conditional probabilities of upcoming words, they may capture the subtlety and nuances of the semantic constraints underlying the texts, and texts aligning with the encoded semantic constraints are more compressible than those that do not. This paper systematically tests whether and how LLMs can act as compressors of semantic pairs. Using semantic relations from English and Chinese Wordnet, we empirically demonstrate that texts with correct semantic pairings are more compressible than incorrect ones, measured by the proposed compression advantages index. We also show that, with the Pythia model suite and a fine-tuned model on Chinese Wordnet, compression capacities are modulated by the model’s seen data. These findings are consistent with the view that LLMs encode the semantic knowledge as underlying constraints learned from texts and can act as compressors of semantic information or potentially other structured knowledge. 2024.neusymbridge-1.2 @@ -68,7 +68,7 @@
The Need for Grounding in <fixed-case>LLM</fixed-case>-based Dialogue Systems - KristiinaJokinen + KristiinaJokinen 45–52 Grounding is a pertinent part of the design of LLM-based dialogue systems. Although research on grounding has a long tradition, the paradigm shift caused by LLMs has brought the concept onto the foreground, in particular in the context of cognitive robotics. To avoid generation of irrelevant or false information, the system needs to ground its utterances into real-world events, and to avoid the statistical parrot effect, the system needs to construct shared understanding of the dialogue context and of the partner’s intents. Grounding and construction of the shared context enables cooperation between the participants, and thus supports trustworthy interaction. This paper discusses grounding using neural LLM technology. It aims to bridge neural and symbolic computing on the cognitive architecture level, so as to contribute to a better understanding of how conversational reasoning and collaboration can be linked to LLM implementations to support trustworthy and flexible interaction. 2024.neusymbridge-1.5 diff --git a/data/xml/2024.nllp.xml b/data/xml/2024.nllp.xml index dc0cac8586..3550e2847e 100644 --- a/data/xml/2024.nllp.xml +++ b/data/xml/2024.nllp.xml @@ -65,7 +65,7 @@ u<fixed-case>O</fixed-case>ttawa at <fixed-case>L</fixed-case>egal<fixed-case>L</fixed-case>ens-2024: Transformer-based Classification Experiments NimaMeghdadiUniversity of Ottawa - DianaInkpenUniversity of Ottawa + DianaInkpenUniversity of Ottawa 42-47 This paper presents the methods used for LegalLens-2024, which focused on detecting legal violations within unstructured textual data and associating these violations with potentially affected individuals. The shared task included two subtasks: A) Legal Named Entity Recognition (L-NER) and B) Legal Natural Language Inference (L-NLI). For subtask A, we utilized the spaCy library, while for subtask B, we employed a combined model incorporating RoBERTa and CNN. Our results were 86.3% in the L-NER subtask and 88.25% in the L-NLI subtask. Overall, our paper demonstrates the effectiveness of transformer models in addressing complex tasks in the legal domain. 2024.nllp-1.4 @@ -123,7 +123,7 @@ Automated Anonymization of Parole Hearing Transcripts AbedItaniUniversity of Passau WassilikiSiskouUniversity of Konstanz - AnnetteHautli-JaniszUniversity of Passau + AnnetteHautli-JaniszUniversity of Passau 115-128 Responsible natural language processing is more and more concerned with preventing the violation of personal rights that language technology can entail (CITATION). In this paper we illustrate the case of parole hearings in California, the verbatim transcripts of which are made available to the general public upon a request sent to the California Board of Parole Hearings. The parole hearing setting is highly sensitive: inmates face a board of legal representatives who discuss highly personal matters not only about the inmates themselves but also about victims and their relatives, such as spouses and children. Participants have no choice in contributing to the data collection process, since the disclosure of the transcripts is mandated by law. As researchers who are interested in understanding and modeling the communication in these hierarchy-driven settings, we face an ethical dilemma: publishing raw data as is for the community would compromise the privacy of all individuals affected, but manually cleaning the data requires a substantive effort. In this paper we present an automated anonymization process which reliably removes and pseudonymizes sensitive data in verbatim transcripts, while at the same time preserving the structure and content of the data. Our results show that the process exhibits little to no leakage of sensitive information when applied to more than 300 hearing transcripts. 2024.nllp-1.9 @@ -145,7 +145,7 @@ Enhancing Contract Negotiations with <fixed-case>LLM</fixed-case>-Based Legal Document Comparison SavinayNarendraJP Morgan Chase & Co. KaushalShettyJP Morgan Chase - AdwaitRatnaparkhiJPMorganChase + AdwaitRatnaparkhiJPMorganChase 143-153 We present a large language model (LLM) based approach for comparing legal contracts with their corresponding template documents. Legal professionals use commonly observed deviations between templates and contracts to help with contract negotiations, and also to refine the template documents. Our comparison approach, based on the well-studied natural language inference (NLI) task, first splits a template into key concepts and then uses LLMs to decide if the concepts are entailed by the contract document. We also repeat this procedure in the opposite direction - contract clauses are tested for entailment against the template clause to see if they contain additional information. The non-entailed concepts are labelled, organized and filtered by frequency, and placed into a clause library, which is used to suggest changes to the template documents. We first show that our LLM-based approach outperforms all previous work on a publicly available dataset designed for NLI in the legal domain. We then apply it to a private real-world legal dataset, achieve an accuracy of 96.46%. Our approach is the first in the literature to produce a natural language comparison between legal contracts and their template documents. 2024.nllp-1.11 @@ -237,7 +237,7 @@ Multi-Property Multi-Label Documents Metadata Recommendation based on Encoder Embeddings NasredineChenikiPublications Office of the European Union - VidasDaudaraviciusEuropean Commission Joint Research Centre + VidasDaudaraviciusEuropean Commission Joint Research Centre AbdelfettahFeliachiPublications Office of the European Union DidierHardyPublications Office of the European Union Marc WilhelmKüsterPublications Office of the European Union @@ -270,7 +270,7 @@ <fixed-case>LAR</fixed-case>-<fixed-case>ECHR</fixed-case>: A New Legal Argument Reasoning Task and Dataset for Cases of the <fixed-case>E</fixed-case>uropean Court of Human Rights Odysseas S.Chlapanis - DimitriosGalanis + DimitriosGalanis IonAndroutsopoulos 267-279 We present Legal Argument Reasoning (LAR), a novel task designed to evaluate the legal reasoning capabilities of Large Language Models (LLMs). The task requires selecting the correct next statement (from multiple choice options) in a chain of legal arguments from court proceedings, given the facts of the case. We constructed a dataset (LAR-ECHR) for this task using cases from the European Court of Human Rights (ECHR). We evaluated seven general-purpose LLMs on LAR-ECHR and found that (a) the ranking of the models is aligned with that of LegalBench, an established US-based legal reasoning benchmark, even though LAR-ECHR is based on EU law, (b) LAR-ECHR distinguishes top models more clearly, compared to LegalBench, (c) even the best model (GPT-4o) obtains 75.8% accuracy on LAR-ECHR, indicating significant potential for further model improvement. The process followed to construct LAR-ECHR can be replicated with cases from other legal systems. @@ -406,7 +406,7 @@ Towards Supporting Legal Argumentation with <fixed-case>NLP</fixed-case>: Is More Data Really All You Need? SantoshT.y.s.sTechnical University of Munich - KevinAshleyUniversity of Pittsburgh + KevinAshleyUniversity of Pittsburgh KatieAtkinsonUniversity of Liverpool MatthiasGrabmairTechnical University of Munich 404-421 diff --git a/data/xml/2024.nlp4call.xml b/data/xml/2024.nlp4call.xml index 9a9b5f2974..4e2559ef8d 100644 --- a/data/xml/2024.nlp4call.xml +++ b/data/xml/2024.nlp4call.xml @@ -10,7 +10,7 @@ GriseldaDrouet DavidAlfter ElenaVolodina - ArneJönsson + ArneJönsson LiU Electronic Press
Rennes, France
October @@ -67,8 +67,8 @@ Evaluating Automatic Pronunciation Scoring with Crowd-sourced Speech Corpus Annotations NilsHjortnaes DanielDakota - SandraKübler - FrancisTyers + SandraKübler + FrancisTyers 67–77 2024.nlp4call-1.6 hjortnaes-etal-2024-evaluating @@ -84,7 +84,7 @@ Investigating strategies for lexical complexity prediction in a multilingual setting using generative language models and supervised approaches AbdelhakKelious - MathieuConstant + MathieuConstant ChristopheCoeur 96–114 2024.nlp4call-1.8 @@ -93,7 +93,7 @@ Developing a Pedagogically Oriented Interactive Reading Tool with Teachers in the Loops MihwaLee - BjörnRudzewitz + BjörnRudzewitz XiaobinChen 115–125 2024.nlp4call-1.9 @@ -102,7 +102,7 @@ Developing a Web-Based Intelligent Language Assessment Platform Powered by Natural Language Processing Technologies SarahLöber - BjörnRudzewitz + BjörnRudzewitz Daniela VerrattiSouto LuisaRibeiro-Flucht XiaobinChen @@ -147,8 +147,8 @@ A Conversational Intelligent Tutoring System for Improving <fixed-case>E</fixed-case>nglish Proficiency of Non-Native Speakers via Debriefing of Online Meeting Transcriptions Juan AntonioPérez-Ortiz - MiquelEsplà-Gomis - Víctor M.Sánchez-Cartagena + MiquelEsplà-Gomis + Víctor M.Sánchez-Cartagena FelipeSánchez-Martínez RomanChernysh GabrielMora-Rodríguez @@ -163,7 +163,7 @@ NicolasBallier ThomasGaillat AndrewSimpkin - John P.McCrae + John P.McCrae 199–208 2024.nlp4call-1.15 stearns-etal-2024-evaluating diff --git a/data/xml/2024.nlp4convai.xml b/data/xml/2024.nlp4convai.xml index 794f6f860d..d006ef0d66 100644 --- a/data/xml/2024.nlp4convai.xml +++ b/data/xml/2024.nlp4convai.xml @@ -11,7 +11,7 @@ YuLi AlonAlbalak HiromiWakaki - AlexandrosPapangelis + AlexandrosPapangelis Association for Computational Linguistics
Bangkok, Thailand
August @@ -27,7 +27,7 @@ On the Benchmarking of <fixed-case>LLM</fixed-case>s for Open-Domain Dialogue Evaluation JohnMendonçaInstituto Superior Técnico - AlonLaviePhrase and School of Computer Science, Carnegie Mellon University + AlonLaviePhrase and School of Computer Science, Carnegie Mellon University IsabelTrancosoInstituto Superior Técnico 1-12 Large Language Models (LLMs) have showcased remarkable capabilities in various Natural Language Processing tasks. For automatic open-domain dialogue evaluation in particular, LLMs have been seamlessly integrated into evaluation frameworks, and together with human evaluation, compose the backbone of most evaluations. However, existing evaluation benchmarks often rely on outdated datasets and evaluate aspects like Fluency and Relevance, which fail to adequately capture the capabilities and limitations of state-of-the-art chatbot models. This paper critically examines current evaluation benchmarks, highlighting that the use of older response generators and quality aspects fail to accurately reflect modern chatbot capabilities. A small annotation experiment on a recent LLM-generated dataset (SODA) reveals that LLM evaluators such as GPT-4 struggle to detect actual deficiencies in dialogues generated by current LLM chatbots. @@ -62,7 +62,7 @@ ChulakaGunasekaraInternational Business Machines HuiWanIBM Research AI JatinGanhotraInternational Business Machines - SachindraJoshi + SachindraJoshi MarinaDanilevskyInternational Business Machines 56-72 Dialogue summarization involves summarizing long conversations while preserving the most salient information. Real-life dialogues often involve naturally occurring variations (e.g., repetitions, hesitations). In this study, we systematically investigate the impact of such variations on state-of-the-art open dialogue summarization models whose details are publicly known (e.g., architectures, weights, and training corpora). To simulate real-life variations, we introduce two types of perturbations: utterance-level perturbations that modify individual utterances with errors and language variations, and dialogue-level perturbations that add non-informative exchanges (e.g., repetitions, greetings). We perform our analysis along three dimensions of robustness: consistency, saliency, and faithfulness, which aim to capture different aspects of performance of a summarization model. We find that both fine-tuned and instruction-tuned models are affected by input variations, with the latter being more susceptible, particularly to dialogue-level perturbations. We also validate our findings via human evaluation. Finally, we investigate whether the robustness of fine-tuned models can be improved by training them with a fraction of perturbed data. We find that this approach does not yield consistent performance gains, warranting further research. Overall, our work highlights robustness challenges in current open encoder-decoder summarization models and provides insights for future research. diff --git a/data/xml/2024.nlp4dh.xml b/data/xml/2024.nlp4dh.xml index dd1b0bd227..c749d28111 100644 --- a/data/xml/2024.nlp4dh.xml +++ b/data/xml/2024.nlp4dh.xml @@ -106,7 +106,7 @@ Language Resources From Prominent Born-Digital Humanities Texts are Still Needed in the Age of <fixed-case>LLM</fixed-case>s NatalieHervieux PeiranYao - SusanBrown + SusanBrown DenilsonBarbosa 85–104 The digital humanities (DH) community fundamentally embraces the use of computerized tools for the study and creation of knowledge related to language, history, culture, and human values, in which natural language plays a prominent role. Many successful DH tools rely heavily on Natural Language Processing methods, and several efforts exist within the DH community to promote the use of newer and better tools. Nevertheless, most NLP research is driven by web corpora that are noticeably different from texts commonly found in DH artifacts, which tend to use richer language and refer to rarer entities. Thus, the near-human performance achieved by state-of-the-art NLP tools on web texts might not be achievable on DH texts. We introduce a dataset carefully created by computer scientists and digital humanists intended to serve as a reference point for the development and evaluation of NLP tools. The dataset is a subset of a born-digital textbase resulting from a prominent and ongoing experiment in digital literary history, containing thousands of multi-sentence excerpts that are suited for information extraction tasks. We fully describe the dataset and show that its language is demonstrably different than the corpora normally used in training language resources in the NLP community. @@ -127,7 +127,7 @@ A Multi-task Framework with Enhanced Hierarchical Attention for Sentiment Analysis on Classical <fixed-case>C</fixed-case>hinese Poetry: Utilizing Information from Short Lines QuanqiDu - VeroniqueHoste + VeroniqueHoste 113–122 Classical Chinese poetry has a long history, dating back to the 11th century BC. By investigating the sentiment expressed in the poetry, we can gain more insights in the emotional life and history development in ancient Chinese culture. To help improve the sentiment analysis performance in the field of classical Chinese poetry, we propose to utilize the unique information from the individual short lines that compose the poem, and introduce a multi-task framework with hierarchical attention enhanced with short line sentiment labels. Specifically, the multi-task framework comprises sentiment analysis for both the overall poem and the short lines, while the hierarchical attention consists of word- and sentence-level attention, with the latter enhanced with additional information from short line sentiments. Our experimental results showcase that our approach leveraging more fine-grained information from short lines outperforms the state-of-the-art, achieving an accuracy score of 72.88% and an F1-macro score of 71.05%. 2024.nlp4dh-1.11 @@ -160,7 +160,7 @@ Canonical Status and Literary Influence: A Comparative Study of <fixed-case>D</fixed-case>anish Novels from the Modern Breakthrough (1870–1900) - PascaleFeldkamp + PascaleFeldkamp AlieLassche JanKostkan MártonKardos @@ -210,8 +210,8 @@ RasulDent JulietteJanes ThibaultClerice - PedroOrtiz Suarez - BenoîtSagot + PedroOrtiz Suarez + BenoîtSagot 189–199 Whether or not several Creole languages which developed during the early modern period can be considered genetic descendants of European languages has been the subject of intense debate. This is in large part due to the absence of evidence of intermediate forms. This work introduces a new open corpus, the Molyé corpus, which combines stereotypical representations of three kinds of language variation in Europe with early attestations of French-based Creole languages across a period of 400 years. It is intended to facilitate future research on the continuity between contact situations in Europe and Creolophone (former) colonies. 2024.nlp4dh-1.18 @@ -266,7 +266,7 @@ MyrtoTsigkouli Chris W.Jenkins FilipMiletić - SabineSchulte im Walde + SabineSchulte im Walde 240–246 This paper provides a framework and tool set for computing and visualising dynamic, time- specific semantic neighbourhoods of English noun-noun compounds and their constituents over time. Our framework not only identifies salient vector-space dimensions and neighbours in notoriously sparse data: we specifically bring together changes in meaning aspects and degrees of (non-)compositionality. 2024.nlp4dh-1.23 @@ -320,10 +320,10 @@ Evaluating Open-Source <fixed-case>LLM</fixed-case>s in Low-Resource Languages: Insights from <fixed-case>L</fixed-case>atvian High School Exams RobertsDarģis - GuntisBārzdiņš - IngunaSkadiņa - NormundsGrūzītis - BaibaSaulīte + GuntisBārzdiņš + IngunaSkadiņa + NormundsGrūzītis + BaibaSaulīte 289–293 The latest large language models (LLM) have significantly advanced natural language processing (NLP) capabilities across various tasks. However, their performance in low-resource languages, such as Latvian with 1.5 million native speakers, remains substantially underexplored due to both limited training data and the absence of comprehensive evaluation benchmarks. This study addresses this gap by conducting a systematic assessment of prominent open-source LLMs on natural language understanding (NLU) and natural language generation (NLG) tasks in Latvian. We utilize standardized high school centralized graduation exams as a benchmark dataset, offering relatable and diverse evaluation scenarios that encompass multiple-choice questions and complex text analysis tasks. Our experimental setup involves testing models from the leading LLM families, including Llama, Qwen, Gemma, and Mistral, with OpenAI’s GPT-4 serving as a performance reference. The results reveal that certain open-source models demonstrate competitive performance in NLU tasks, narrowing the gap with GPT-4. However, all models exhibit notable deficiencies in NLG tasks, specifically in generating coherent and contextually appropriate text analyses, highlighting persistent challenges in NLG for low-resource languages. These findings contribute to efforts to develop robust multilingual benchmarks and improve LLM performance in diverse linguistic contexts. 2024.nlp4dh-1.28 @@ -358,7 +358,7 @@ Testing and Adapting the Representational Abilities of Large Language Models on Folktales in Low-Resource Languages J. A.Meaney - BeatriceAlex + BeatriceAlex WilliamLamb 319–324 Folktales are a rich resource of knowledge about the society and culture of a civilisation. Digital folklore research aims to use automated techniques to better understand these folktales, and it relies on abstract representations of the textual data. Although a number of large language models (LLMs) claim to be able to represent low-resource langauges such as Irish and Gaelic, we present two classification tasks to explore how useful these representations are, and three adaptations to improve the performance of these models. We find that adapting the models to work with longer sequences, and continuing pre-training on the domain of folktales improves classification performance, although these findings are tempered by the impressive performance of a baseline SVM with non-contextual features. @@ -474,7 +474,7 @@ Exploring Large Language Models for Qualitative Data Analysis TimFischer - ChrisBiemann + ChrisBiemann 423–437 This paper explores the potential of Large Language Models (LLMs) to enhance qualitative data analysis (QDA) workflows within the open-source QDA platform developed at our university. We identify several opportunities within a typical QDA workflow where AI assistance can boost researcher productivity and translate these opportunities into corresponding NLP tasks: document classification, information extraction, span classification, and text generation. A benchmark tailored to these QDA activities is constructed, utilizing English and German datasets that align with relevant use cases. Focusing on efficiency and accessibility, we evaluate the performance of three prominent open-source LLMs - Llama 3.1, Gemma 2, and Mistral NeMo - on this benchmark. Our findings reveal the promise of LLM integration for streamlining QDA workflows, particularly for English-language projects. Consequently, we have implemented the LLM Assistant as an opt-in feature within our platform and report the implementation details. With this, we hope to further democratize access to AI capabilities for qualitative data analysis. 2024.nlp4dh-1.41 @@ -572,7 +572,7 @@ AndreasMarfurt AshleyThornton DavidSylvan - JamesHenderson + JamesHenderson 513–520 Recent advances in language modeling have focused on (potentially multiple-choice) question answering, open-ended generation, or math and coding problems. We look at a more nuanced task: the interpretation of statements of political actors. To this end, we present a dataset of policy announcements and corresponding annotated interpretations, on the topic of US foreign policy relations with Russia in the years 1993 up to 2016. We analyze the performance of finetuning standard sequence-to-sequence models of varying sizes on predicting the annotated interpretations and compare them to few-shot prompted large language models. We find that 1) model size is not the main factor for success on this task, 2) finetuning smaller models provides both quantitatively and qualitatively superior results to in-context learning with large language models, but 3) large language models pick up the annotation format and approximate the category distribution with just a few in-context examples. 2024.nlp4dh-1.50 diff --git a/data/xml/2024.nlp4hr.xml b/data/xml/2024.nlp4hr.xml index d317fe9467..633a06a303 100644 --- a/data/xml/2024.nlp4hr.xml +++ b/data/xml/2024.nlp4hr.xml @@ -6,7 +6,7 @@ EstevamHruschka ThomLake NaokiOtani - TomMitchell + TomMitchell Association for Computational Linguistics
St. Julian’s, Malta
March @@ -24,7 +24,7 @@ ElenaSengerLudwig-Maximilians-Universität München MikeZhangIT University of Copenhagen Robvan der Goot - BarbaraPlankLudwig-Maximilians-Universität München and IT University of Copenhagen + BarbaraPlankLudwig-Maximilians-Universität München and IT University of Copenhagen 1-15 Recent years have brought significant advances to Natural Language Processing (NLP), which enabled fast progress in the field of computational job market analysis. Core tasks in this application domain are skill extraction and classification from job postings. Because of its quick growth and its interdisciplinary nature, there is no exhaustive assessment of this field. This survey aims to fill this gap by providing a comprehensive overview of deep learning methodologies, datasets, and terminologies specific to NLP-driven skill extraction. Our comprehensive cataloging of publicly available datasets addresses the lack of consolidated information on dataset creation and characteristics. Finally, the focus on terminology addresses the current lack of consistent definitions for important concepts, such as hard and soft skills, and terms relating to skill extraction and classification. 2024.nlp4hr-1.1 diff --git a/data/xml/2024.nlp4pi.xml b/data/xml/2024.nlp4pi.xml index 92158b2815..240f1551c7 100644 --- a/data/xml/2024.nlp4pi.xml +++ b/data/xml/2024.nlp4pi.xml @@ -6,9 +6,9 @@ DarynaDementieva OanaIgnat ZhijingJin - RadaMihalcea + RadaMihalcea GiorgioPiatti - JoelTetreault + JoelTetreault StevenWilson JieyuZhao Association for Computational Linguistics @@ -91,7 +91,7 @@ SpandanaGellaAmazon ApurvVermaBloomberg TagyoungChungAmazon - JingHuangAmazon Alexa AI + JingHuangAmazon Alexa AI NanyunPengUniversity of California, Los Angeles 78-97 Creating children’s stories through text generation is a creative task that requires stories to be both entertaining and suitable for young audiences. However, since current story generation systems often rely on pre-trained language models fine-tuned with limited story data, they may not always prioritize child-friendliness. This can lead to the unintended generation of stories containing problematic elements such as violence, profanity, and biases. Regrettably, despite the significance of these concerns, there is a lack of clear guidelines and benchmark datasets for ensuring content safety for children. In this paper, we introduce a taxonomy specifically tailored to assess content safety in text, with a strong emphasis on children’s well-being. We present PG-Story, a dataset that includes detailed annotations for both sentence-level and discourse-level safety. We demonstrate the potential of identifying unsafe content through self-diagnosis and employing controllable generation techniques during the decoding phase to minimize unsafe elements in generated stories. @@ -103,7 +103,7 @@ Towards Explainable Multi-Label Text Classification: A Multi-Task Rationalisation Framework for Identifying Indicators of Forced Labour Erick MendezGuzman ViktorSchlegelImperial College London - RizaBatista-NavarroUniversity of Manchester + RizaBatista-NavarroUniversity of Manchester 98-112 The importance of rationales, or natural language explanations, lies in their capacity to bridge the gap between machine predictions and human understanding, by providing human-readable insights into why a text classifier makes specific decisions. This paper presents a novel multi-task rationalisation approach tailored to enhancing the explainability of multi-label text classifiers to identify indicators of forced labour. Our framework integrates a rationale extraction task with the classification objective and allows the inclusion of human explanations during training. We conduct extensive experiments using transformer-based models on a dataset consisting of 2,800 news articles, each annotated with labels and human-generated explanations. Our findings reveal a statistically significant difference between the best-performing architecture leveraging human rationales during training and variants using only labels. Specifically, the supervised model demonstrates a 10% improvement in predictive performance measured by the weighted F1 score, a 15% increase in the agreement between human and machine-generated rationales, and a 4% improvement in the generated rationales’ comprehensiveness. These results hold promising implications for addressing complex human rights issues with greater transparency and accountability using advanced NLP techniques. 2024.nlp4pi-1.8 @@ -270,7 +270,7 @@ JiawenWang LongfeiZuo SiyaoPengLudwig-Maximilians-Universität München - BarbaraPlankLudwig-Maximilians-Universität München and IT University of Copenhagen + BarbaraPlankLudwig-Maximilians-Universität München and IT University of Copenhagen 315-326 Climate change (CC) has attracted increasing attention in NLP in recent years. However, detecting the stance on CC in multimodal data is understudied and remains challenging due to a lack of reliable datasets. To improve the understanding of public opinions and communication strategies, this paper presents MultiClimate, the first open-source manually-annotated stance detection dataset with 100 CC-related YouTube videos and 4,209 frame-transcript pairs. We deploy state-of-the-art vision and language models, as well as multimodal models for MultiClimate stance detection. Results show that text-only BERT significantly outperforms image-only ResNet50 and ViT. Combining both modalities achieves state-of-the-art, 0.747/0.749 in accuracy/F1. Our 100M-sized fusion models also beat CLIP and BLIP, as well as the much larger 9B-sized multimodal IDEFICS and text-only Llama3 and Gemma2, indicating that multimodal stance detection remains challenging for large language models. Our code, dataset, as well as supplementary materials, are available at https://github.com/werywjw/MultiClimate. 2024.nlp4pi-1.27 diff --git a/data/xml/2024.nlpaics.xml b/data/xml/2024.nlpaics.xml index eabcf5e9c6..74d9b31805 100644 --- a/data/xml/2024.nlpaics.xml +++ b/data/xml/2024.nlpaics.xml @@ -3,7 +3,7 @@ Proceedings of the First International Conference on Natural Language Processing and Artificial Intelligence for Cyber Security - RuslanMitkov + RuslanMitkov SaadEzzini TharinduRanasinghe IgnatiusEzeani @@ -120,7 +120,7 @@ The Influence of the Perplexity Score in the Detection of Machine-generated Texts Alberto JoséGutiérrez Megías - L. AlfonsoUreña-López + L. AlfonsoUreña-López EugenioMartínez Cámara 80–85 The high performance of large language models (LLM) generating natural language represents a real threat, since they can be leveraged to generate any kind of deceptive content. Since there are still disparities among the language generated by machines and the human language, we claim that perplexity may be used as classification signal to discern between machine and human text. We propose a classification model based on XLM-RoBERTa, and we evaluate it on the M4 dataset. The results show that the perplexity score is useful for the identification of machine generated text, but it is constrained by the differences among the LLMs used in the training and test sets. @@ -201,7 +201,7 @@ Abusive Speech Detection in <fixed-case>S</fixed-case>erbian using Machine Learning DankaJokić - RankaStanković + RankaStanković BranislavaŠandrih Todorović 153–163 The increase in the use of abusive language on social media and virtual platforms has emphasized the importance of developing efficient hate speech detection systems. While there have been considerable advancements in creating such systems for the English language, resources are scarce for other languages, such as Serbian. This research paper explores the use of machine learning and deep learning techniques to identify abusive language in Serbian text. The authors used AbCoSER, a dataset of Serbian tweets that have been labeled as abusive or non-abusive. They evaluated various algorithms to classify tweets, and the best-performing model is based on the deep learning transformer architecture. The model attained an F1 macro score of 0.827, a figure that is commensurate with the benchmarks established for offensive speech datasets of a similar magnitude in other languages. diff --git a/data/xml/2024.nlpcss.xml b/data/xml/2024.nlpcss.xml index fc45fba012..eeb797f153 100644 --- a/data/xml/2024.nlpcss.xml +++ b/data/xml/2024.nlpcss.xml @@ -35,7 +35,7 @@ Connecting the Dots in News Analysis: Bridging the Cross-Disciplinary Disparities in Media Bias and Framing GiselaVallejo - TimothyBaldwinMohamed bin Zayed University of Artificial Intelligence and The University of Melbourne + TimothyBaldwinMohamed bin Zayed University of Artificial Intelligence and The University of Melbourne LeaFrermannUniversity of Melbourne 16-31 The manifestation and effect of bias in news reporting have been central topics in the social sciences for decades, and have received increasing attention in the NLP community recently. While NLP can help to scale up analyses or contribute automatic procedures to investigate the impact of biased news in society, we argue that methodologies that are currently dominant fall short of capturing the complex questions and effects addressed in theoretical media studies. This is problematic because it diminishes the validity and safety of the resulting tools and applications. Here, we review and critically compare task formulations, methods and evaluation schemes in the social sciences and NLP. We discuss open questions and suggest possible directions to close identified gaps between theory and predictive models, and their evaluation. These include model transparency, considering document-external information, and cross-document reasoning. @@ -47,7 +47,7 @@ The Crime of Being Poor: Associations between Crime and Poverty on Social Media in Eight Countries GeorginaCurtoUniversity of Notre Dame SvetlanaKiritchenkoNational Research Council Canada - KathleenFraserNational Research Council Canada + KathleenFraserNational Research Council Canada IsarNejadgholiNational Research Council Canada and University of Ottawa 32-45 Negative public perceptions of people living in poverty can hamper policies and programs that aim to help the poor. One prominent example of social bias and discrimination against people in need is the persistent association of poverty with criminality. The phenomenon has two facets: first, the belief that poor people are more likely to engage in crime (e.g., stealing, mugging, violence) and second, the view that certain behaviors directly resulting from poverty (e.g., living outside, panhandling) warrant criminal punishment. In this paper, we use large language models (LLMs) to identify examples of crime–poverty association (CPA) in English social media texts. We analyze the online discourse on CPA across eight geographically-diverse countries, and find evidence that the CPA rates are higher within the sample obtained from the U.S. and Canada, as compared to the other countries such as South Africa, despite the latter having higher poverty, criminality, and inequality indexes. We further uncover and analyze the most common themes in CPA posts and find more negative and biased attitudes toward people living in poverty in posts from the U.S. and Canada. These results could partially be explained by cultural factors related to the tendency to overestimate the equality of opportunities and social mobility in the U.S. and Canada. These findings have consequences for policy-making and open a new path of research for poverty mitigation with the focus not only on the redistribution of wealth but also on the mitigation of bias and discrimination against people in need. @@ -62,7 +62,7 @@ ShreejaDahalFlorida International University W. Victor H.YarlottFlorida International University DianaGomezFlorida International University - MarkFinlaysonFlorida International University + MarkFinlaysonFlorida International University 46-56 Motifs are distinctive, recurring, widely used idiom-like words or phrases, often originating in folklore and usually strongly anchored to a particular cultural or national group. Motifs are significant communicative devices across a wide range of media—including news, literature, and propaganda—because they can concisely imply a large set of culturally relevant associations. One difficulty of understanding motifs is that their meaning is usually implicit, so for an out-group person the meaning is inaccessible. We present the Motif Implicit Meaning Extractor (MIME), a proof-of-concept system designed to automatically identify a motif’s implicit meaning, as evidenced by textual uses of the motif across a large set data. MIME uses several sources (including motif indices, Wikipedia pages on the motifs, explicit explanations of motifs from in-group informants, and news/social media posts where the motif is used) and can generate a structured report of information about a motif understandable to an out-group person. In addition to a variety of examples and information drawn from structured sources, the report includes implicit information about a motif such as the type of reference (e.g., a person, an organization, etc.), it’s general connotation (strongly negative, slightly negative, neutral, etc.), and it’s associations (typically adjectives). We describe how MIME works and demonstrate its operation on a small set of manually curated motifs. We perform a qualitative evaluation of the output, and assess the difficulty of the problem, showing that explicit motif information provided by cultural informants is critical to high quality output, although mining motif usages in news and social media provides useful additional depth. A system such as MIME, appropriately scaled up, would potentially be quite useful to an out-group person trying to understand in-group usages of motifs, and has wide potential applications in domains such as literary criticism, cultural heritage, marketed and branding, and intelligence analysis. 2024.nlpcss-1.4 @@ -136,7 +136,7 @@ Clustering Document Parts: Detecting and Characterizing Influence Campaigns from Documents ZhengxiangWangState University of New York at Stony Brook - OwenRambowStony Brook University + OwenRambowStony Brook University 132-143 We propose a novel clustering pipeline to detect and characterize influence campaigns from documents. This approach clusters parts of document, detects clusters that likely reflect an influence campaign, and then identifies documents linked to an influence campaign via their association with the high-influence clusters. Our approach outperforms both the direct document-level classification and the direct document-level clustering approach in predicting if a document is part of an influence campaign. We propose various novel techniques to enhance our pipeline, including using an existing event factuality prediction system to obtain document parts, and aggregating multiple clustering experiments to improve the performance of both cluster and document classification. Classifying documents after clustering not only accurately extracts the parts of the documents that are relevant to influence campaigns, but also captures influence campaigns as a coordinated and holistic phenomenon. Our approach makes possible more fine-grained and interpretable characterizations of influence campaigns from documents. 2024.nlpcss-1.10 @@ -149,7 +149,7 @@ ShamikRoyAmazon AlexanderHoyleUniversity of Maryland, College Park DanielAcunaComputer Science Department, University of Colorado at Boulder - Maria LeonorPachecoUniversity of Colorado at Boulder + Maria LeonorPachecoUniversity of Colorado at Boulder 144-158 With the rise in the prevalence of cross-disciplinary research, there is a need to develop methods to characterize its practices. Current computational methods to evaluate interdisciplinary engagement—such as affiliation diversity, keywords, and citation patterns—are insufficient to model the degree of engagement between disciplines, as well as the way in which the complementary expertise of co-authors is harnessed. In this paper, we propose an automated framework to address some of these issues on a large scale. Our framework tracks interdisciplinary citations in scientific articles and models: 1) the section and position in which they appear, and 2) the argumentative role that they play in the writing. To showcase our framework, we perform a preliminary analysis of interdisciplinary engagement in published work at the intersection of natural language processing and computational social science in the last decade. 2024.nlpcss-1.11 diff --git a/data/xml/2024.nlperspectives.xml b/data/xml/2024.nlperspectives.xml index 69f8d88042..4a0242d993 100644 --- a/data/xml/2024.nlperspectives.xml +++ b/data/xml/2024.nlperspectives.xml @@ -107,7 +107,7 @@ Soft metrics for evaluation with disagreements: an assessment GiuliaRizzi ElisaLeonardelli - MassimoPoesio + MassimoPoesio AlexandraUma MajaPavlovic SilviuPaun @@ -121,7 +121,7 @@ Designing <fixed-case>NLP</fixed-case> Systems That Adapt to Diverse Worldviews ClaudiuCreanga - Liviu P.Dinu + Liviu P.Dinu 95–99 Natural Language Inference (NLI) is foundational for evaluating language understanding in AI. However, progress has plateaued, with models failing on ambiguous examples and exhibiting poor generalization. We argue that this stems from disregarding the subjective nature of meaning, which is intrinsically tied to an individual’s weltanschauung (which roughly translates to worldview). Existing NLP datasets often obscure this by aggregating labels or filtering out disagreement. We propose a perspectivist approach: building datasets that capture annotator demographics, values, and justifications for their labels. Such datasets would explicitly model diverse worldviews. Our initial experiments with a subset of the SBIC dataset demonstrate that even limited annotator metadata can improve model performance. 2024.nlperspectives-1.10 @@ -130,7 +130,7 @@ The Effectiveness of <fixed-case>LLM</fixed-case>s as Annotators: A Comparative Overview and Empirical Analysis of Direct Representation MajaPavlovic - MassimoPoesio + MassimoPoesio 100–110 Recent studies focus on exploring the capability of Large Language Models (LLMs) for data annotation. Our work, firstly, offers a comparative overview of twelve such studies that investigate labelling with LLMs, particularly focusing on classification tasks. Secondly, we present an empirical analysis that examines the degree of alignment between the opinion distributions returned by GPT and those provided by human annotators across four subjective datasets. Our analysis supports a minority of studies that are considering diverse perspectives when evaluating data annotation tasks and highlights the need for further research in this direction. 2024.nlperspectives-1.11 @@ -147,7 +147,7 @@ <fixed-case>O</fixed-case>rigam<fixed-case>IM</fixed-case>: A Dataset of Ambiguous Sentence Interpretations for Social Grounding and Implicit Language Understanding LiesbethAllein - Marie-FrancineMoens + Marie-FrancineMoens 116–122 Sentences elicit different interpretations and reactions among readers, especially when there is ambiguity in their implicit layers. We present a first-of-its kind dataset of sentences from Reddit, where each sentence is annotated with multiple interpretations of its meanings, understandings of implicit moral judgments about mentioned people, and reader impressions of its author. Scrutiny of the dataset proves the evoked variability and polarity in reactions. It further shows that readers strongly disagree on both the presence of implied judgments and the social acceptability of the behaviors they evaluate. In all, the dataset offers a valuable resource for socially grounding language and modeling the intricacies of implicit language understanding from multiple reader perspectives. 2024.nlperspectives-1.13 @@ -156,7 +156,7 @@ Linguistic Fingerprint in Transformer Models: How Language Variation Influences Parameter Selection in Irony Detection MicheleMastromattei - Fabio MassimoZanzotto + Fabio MassimoZanzotto 123–130 This paper explores the correlation between linguistic diversity, sentiment analysis and transformer model architectures. We aim to investigate how different English variations impact transformer-based models for irony detection. To conduct our study, we used the EPIC corpus to extract five diverse English variation-specific datasets and applied the KEN pruning algorithm on five different architectures. Our results reveal several similarities between optimal subnetworks, which provide insights into the linguistic variations that share strong resemblances and those that exhibit greater dissimilarities. We discovered that optimal subnetworks across models share at least 60% of their parameters, emphasizing the significance of parameter values in capturing and interpreting linguistic variations. This study highlights the inherent structural similarities between models trained on different variants of the same language and also the critical role of parameter values in capturing these nuances. 2024.nlperspectives-1.14 diff --git a/data/xml/2024.nlrse.xml b/data/xml/2024.nlrse.xml index 1bd64bbc7d..5feb387710 100644 --- a/data/xml/2024.nlrse.xml +++ b/data/xml/2024.nlrse.xml @@ -3,9 +3,9 @@ Proceedings of the 2nd Workshop on Natural Language Reasoning and Structured Explanations (@ACL 2024) - BhavanaDalvi Mishra + BhavanaDalvi Mishra GregDurrett - PeterJansen + PeterJansen BenLipkin DaniloNeves Ribeiro LionelWong @@ -71,7 +71,7 @@ <fixed-case>S</fixed-case>umm<fixed-case>EQ</fixed-case>u<fixed-case>AL</fixed-case>: Summarization Evaluation via Question Answering using Large Language Models JunyuanLiu - ZhengyanShi + ZhengyanShi AldoLipaniUniversity College London, University of London 46-55 Summarization is hard to evaluate due to its diverse and abstract nature. Although N-gram-based metrics like BLEU and ROUGE are prevalent, they often do not align well with human evaluations. While model-based alternatives such as BERTScore improve, they typically require extensive labelled data. The advent of Large Language Models (LLMs) presents a promising avenue for evaluation. To this end, we introduce SummEQuAL, a novel content-based framework using LLMs for unified, reproducible summarization evaluation. SummEQuAL evaluates summaries by comparing their content with the source document, employing a question-answering approach to gauge both recall and precision. To validate SummEQuAL’s effectiveness, we develop a dataset based on MultiWOZ. We conduct experiments on SummEval and our MultiWOZ-based dataset, showing that SummEQuAL largely improves the quality of summarization evaluation. Notably, SummEQuAL demonstrates a 19.7% improvement over QuestEval in terms of sample-level Pearson correlation with human assessments of consistency on the SummEval dataset. Furthermore, it exceeds the performance of the BERTScore baseline by achieving a 17.3% increase in Spearman correlation on our MultiWOZ-based dataset. Our study illuminates the potential of LLMs for a unified evaluation framework, setting a new paradigm for future summarization evaluation. diff --git a/data/xml/2024.osact.xml b/data/xml/2024.osact.xml index 8b1279748c..d20417f8e2 100644 --- a/data/xml/2024.osact.xml +++ b/data/xml/2024.osact.xml @@ -25,7 +25,7 @@ SehamAlghamdi YoucefBenkhedda BasmaAlharbi - RizaBatista-Navarro + RizaBatista-Navarro 1–12 We are currently witnessing a concerning surge in the spread of hate speech across various social media platforms, targeting individuals or groups based on their protected characteristics such as race, religion, nationality and gender. This paper focuses on the detection of hate type (Task 1) and hate target (Task 2) in the Arabic language. To comprehensively address this problem, we have combined and re-annotated hate speech tweets from existing publicly available corpora, resulting in the creation of AraTar, the first and largest Arabic corpus annotated with support for multi-label classification for both hate speech types and target detection with a high inter-annotator agreement. Additionally, we sought to determine the most effective machine learning-based approach for addressing this issue. To achieve this, we compare and evaluate different approaches, including: (1) traditional machine learning-based models, (2) deep learning-based models fed with contextual embeddings, and (3) fine-tuning language models (LMs). Our results demonstrate that fine-tuning LMs, specifically using AraBERTv0.2-twitter (base), achieved the highest performance, with a micro-averaged F1-score of 84.5% and 85.03%, and a macro-averaged F1-score of 77.46% and 73.15%, for Tasks 1 and 2, respectively. 2024.osact-1.1 diff --git a/data/xml/2024.paclic.xml b/data/xml/2024.paclic.xml index 3a6d842cef..f2c0fe7393 100644 --- a/data/xml/2024.paclic.xml +++ b/data/xml/2024.paclic.xml @@ -10,7 +10,7 @@ 2024.paclic-1 paclic NathanielOco - Shirley N.Dita + Shirley N.Dita Ariane MacalingaBorlongan Jong-BokKim @@ -20,7 +20,7 @@ Large Language Models and Natural Language Processing On Minority Languages: A Systematic Review - Rachel EditaRoxas + Rachel EditaRoxas 1–8 2024.paclic-1.1 roxas-2024-large @@ -57,8 +57,8 @@ Advancing <fixed-case>V</fixed-case>ietnamese Information Retrieval with Learning Objective and Benchmark VinhNguyen NamTran - LongNguyen - DienDinh + LongNguyen + DienDinh 46–56 2024.paclic-1.5 nguyen-etal-2024-advancing @@ -92,7 +92,7 @@ Hung-NghiepTran AndréGreiner-Petter FelixBeierle - AkikoAizawa + AkikoAizawa 82–93 2024.paclic-1.8 to-etal-2024-skt5scisumm @@ -251,7 +251,7 @@ YuanyuanCai SatoshiKosugi KotaroFunakoshi - ManabuOkumura + ManabuOkumura 246–255 2024.paclic-1.24 cai-etal-2024-enhancing @@ -269,7 +269,7 @@ <fixed-case>MERE</fixed-case>: A Deep Learning Architecture Using Multi-Fragment Ensemble for Relation Extraction - Hoang-QuynhLe + Hoang-QuynhLe Duy-CatCan 267–276 2024.paclic-1.26 @@ -296,7 +296,7 @@ A Novel Interpretability Metric for Explaining Bias in Language Models: Applications on Multilingual Models from <fixed-case>S</fixed-case>outheast <fixed-case>A</fixed-case>sia Lance Calvin LimGamboa - MarkLee + MarkLee 296–305 2024.paclic-1.29 gamboa-lee-2024-novel @@ -343,7 +343,7 @@ DongyuanLi SatoshiKosugi KotaroFunakoshi - ManabuOkumura + ManabuOkumura 346–353 2024.paclic-1.34 chuang-etal-2024-lpls @@ -445,8 +445,8 @@ <fixed-case>V</fixed-case>i<fixed-case>H</fixed-case>erb<fixed-case>QA</fixed-case>: A Robust <fixed-case>QA</fixed-case> Model for <fixed-case>V</fixed-case>ietnamese Traditional Herbal Medicine QuyenTruong - LongNguyen - DienDinh + LongNguyen + DienDinh 449–466 2024.paclic-1.45 truong-etal-2024-viherbqa @@ -454,7 +454,7 @@ <fixed-case>EATT</fixed-case>: Knowledge Graph Integration in Transformer Architecture PhongVo - LongNguyen + LongNguyen 467–478 2024.paclic-1.46 vo-nguyen-2024-eatt @@ -463,8 +463,8 @@ Multi-mask Prefix Tuning: Applying Multiple Adaptive Masks on Deep Prompt Tuning QuiTu TrungNguyen - LongNguyen - DienDinh + LongNguyen + DienDinh 479–487 2024.paclic-1.47 tu-etal-2024-multi-mask @@ -475,14 +475,14 @@ Duc-LocVu HuongNguyen-Thi-Thuy Duy-CatCan - Hoang-QuynhLe + Hoang-QuynhLe 488–496 2024.paclic-1.48 hoang-etal-2024-contrastive Kalahi: A handcrafted, grassroots cultural <fixed-case>LLM</fixed-case> evaluation suite for <fixed-case>F</fixed-case>ilipino - Jann RaileyMontalan + Jann RaileyMontalan Jian GangNgui Wei QiLeong YosephineSusanto @@ -587,8 +587,8 @@ <fixed-case>VHE</fixed-case>: A New Dataset for Event Extraction from <fixed-case>V</fixed-case>ietnamese Historical Texts TrucHoang - LongNguyen - DienDinh + LongNguyen + DienDinh 619–634 2024.paclic-1.59 hoang-etal-2024-vhe @@ -893,8 +893,8 @@ A Comparative Study of Chart Summarization AnChu ThongHuynh - LongNguyen - DienDinh + LongNguyen + DienDinh 971–981 2024.paclic-1.92 chu-etal-2024-comparative diff --git a/data/xml/2024.parlaclarin.xml b/data/xml/2024.parlaclarin.xml index 0faac55ae9..de411c8a60 100644 --- a/data/xml/2024.parlaclarin.xml +++ b/data/xml/2024.parlaclarin.xml @@ -52,7 +52,7 @@ <fixed-case>B</fixed-case>ulgarian <fixed-case>P</fixed-case>arla<fixed-case>M</fixed-case>int 4.0 corpus as a testset for Part-of-speech tagging and Named Entity Recognition PetyaOsenova - KirilSimov + KirilSimov 30–35 The paper discusses some fine-tuned models for the tasks of part-of-speech tagging and named entity recognition. The fine-tuning was performed on the basis of an existing BERT pre-trained model and two newly pre-trained BERT models for Bulgarian that are cross-tested on the domain of the Bulgarian part of the ParlaMint corpora as a new domain. In addition, a comparison has been made between the performance of the new fine-tuned BERT models and the available results from the Stanza-based model which the Bulgarian part of the ParlaMint corpora has been annotated with. The observations show the weaknesses in each model as well as the common challenges. 2024.parlaclarin-1.4 @@ -94,7 +94,7 @@ MiettaLennes JyrkiNiemi JackRueter - KristerLindén + KristerLindén 48–56 In this paper, we use automatic language identification to investigate the usage of different languages in the plenary sessions of the Parliament of Finland. Finland has two national languages, Finnish and Swedish. The plenary sessions are published as transcriptions of speeches in Parliament, reflecting the language the speaker used. In addition to charting out language use, we demonstrate how language identification can be used to audit the quality of the dataset. On the one hand, we made slight improvements to our language identifier; on the other hand, we made a list of improvement suggestions for the next version of the dataset. 2024.parlaclarin-1.8 @@ -155,7 +155,7 @@ MedenKatja VaidasMorkevicius NikolaLjubešić - TomažErjavec + TomažErjavec 94–100 We introduce a dataset on political orientation and power position identification. The dataset is derived from ParlaMint, a set of comparable corpora of transcribed parliamentary speeches from 29 national and regional parliaments. We introduce the dataset, provide the reasoning behind some of the choices during its creation, present statistics on the dataset, and, using a simple classifier, some baseline results on predicting political orientation on the left-to-right axis, and on power position identification, i.e., distinguishing between the speeches delivered by governing coalition party members from those of opposition party members. 2024.parlaclarin-1.14 @@ -187,7 +187,7 @@ Investigating Political Ideologies through the <fixed-case>G</fixed-case>reek <fixed-case>P</fixed-case>arla<fixed-case>M</fixed-case>int corpus MariaGavriilidou DimitrisGkoumas - SteliosPiperidis + SteliosPiperidis ProkopisProkopidis 116–120 This paper has two objectives: to present (a) the creation of ParlaMint-GR, the Greek part of the ParlaMint corpora of debates in the parliaments of Europe, and (b) preliminary results on its comparison with a corpus of Greek party manifestos, aiming at the investigation of the ideologies of the Greek political parties and members of the Parliament. Additionally, a gender related comparison is explored. The creation of the ParlaMint-GR corpus is discussed, together with the solutions adopted for various challenges faced. The corpus of party manifestos, available through CLARIN:EL, serves for a comparative study with the corpus of speeches delivered by the members of the Greek Parliament, with the aim to identify the ideological positions of parties and politicians. @@ -252,7 +252,7 @@ A new Resource and Baselines for Opinion Role Labelling in <fixed-case>G</fixed-case>erman Parliamentary Debates InesRehbein - Simone PaoloPonzetto + Simone PaoloPonzetto 163–170 Detecting opinions, their holders and targets in parliamentary debates provides an interesting layer of analysis, for example, to identify frequent targets of opinions for specific topics, actors or parties. In the paper, we present GePaDe-ORL, a new dataset for German parliamentary debates where subjective expressions, their opinion holders and targets have been annotated. We describe the annotation process and report baselines for predicting those annotations in our new dataset. 2024.parlaclarin-1.24 diff --git a/data/xml/2024.politicalnlp.xml b/data/xml/2024.politicalnlp.xml index c09e828d14..0ed2343471 100644 --- a/data/xml/2024.politicalnlp.xml +++ b/data/xml/2024.politicalnlp.xml @@ -31,7 +31,7 @@ Event Detection in the Socio Political Domain EmmanuelCartier - HristoTanev + HristoTanev 12–21 In this paper we present two approaches for detection of socio political events: the first is based on manually crafted keyword combinations and the second one is based on a BERT classifier. We compare the performance of the two systems on a dataset of socio-political events. Interestingly, the systems demonstrate complementary performance: both showing their best accuracy on non overlapping sets of event types. In the evaluation section we provide insights on the effect of taxonomy mapping on the event detection evaluation. We also review in the related work section the most important resources and approaches for event extraction in the recent years. 2024.politicalnlp-1.2 @@ -61,7 +61,7 @@ Analysing Pathos in User-Generated Argumentative Text NataliaEvgrafova - VeroniqueHoste + VeroniqueHoste ElsLefever 39–44 While persuasion has been extensively examined in the context of politicians’ speeches, there exists a notable gap in the understanding of the pathos role in user-generated argumentation. This paper presents an exploratory study into the pathos dimension of user-generated arguments and formulates ideas on how pathos could be incorporated in argument mining. Using existing sentiment and emotion detection tools, this research aims to obtain insights into the role of emotion in argumentative public discussion on controversial topics, explores the connection between sentiment and stance, and detects frequent emotion-related words for a given topic. @@ -92,7 +92,7 @@ FedericoBorazio DaniloCroce GiorgioGambosi - RobertoBasili + RobertoBasili DanieleMargiotta AntonioScaiella MartinaDel Manso diff --git a/data/xml/2024.practicald2t.xml b/data/xml/2024.practicald2t.xml index 3958ddb4e0..68b9de1ddd 100644 --- a/data/xml/2024.practicald2t.xml +++ b/data/xml/2024.practicald2t.xml @@ -44,7 +44,7 @@ Enhancing Situation Awareness through Model-Based Explanation Generation KonstantinosGavriilidis IoannisKonstas - HelenHastie + HelenHastie WeiPang 7–16 Robots are often deployed in remote locations for tasks such as exploration, where users cannot directly perceive the agent and its environment. For Human-In-The-Loop applications, operators must have a comprehensive understanding of the robot’s current state and its environment to take necessary actions and effectively assist the agent. In this work, we compare different explanation styles to determine the most effective way to convey real-time updates to users. Additionally, we formulate these explanation styles as separate fine-tuning tasks and assess the effectiveness of large language models in delivering in-mission updates to maintain situation awareness. The code and dataset for this work are available at:——— diff --git a/data/xml/2024.privatenlp.xml b/data/xml/2024.privatenlp.xml index bfc9bb2b28..25ae6f213a 100644 --- a/data/xml/2024.privatenlp.xml +++ b/data/xml/2024.privatenlp.xml @@ -143,9 +143,9 @@ Smart Lexical Search for Label Flipping Adversial Attack AlbertoGutiérrez-Megías - Salud MaríaJiménez-ZafraUniversidad de Jaén + Salud MaríaJiménez-ZafraUniversidad de Jaén L. AlfonsoUreñaUniversidad de Jaén - EugenioMartínez-CámaraUniversidad de Jaén + EugenioMartínez-CámaraUniversidad de Jaén 97-106 Language models are susceptible to vulnerability through adversarial attacks, using manipulations of the input data to disrupt their performance. Accordingly, it represents a cibersecurity leak. Data manipulations are intended to be unidentifiable by the learning model and by humans, small changes can disturb the final label of a classification task. Hence, we propose a novel attack built upon explainability methods to identify the salient lexical units to alter in order to flip the classification label. We asses our proposal on a disinformation dataset, and we show that our attack reaches high balance among stealthiness and efficiency. 2024.privatenlp-1.11 @@ -168,7 +168,7 @@ ArijRiabi MenelMahamdiInria, Paris VirginieMouilleronInria, Paris - DjaméSeddah + DjaméSeddah 123-136 Protecting privacy is essential when sharing data, particularly in the case of an online radicalization dataset that may contain personal information. In this paper, we explore the balance between preserving data usefulness and ensuring robust privacy safeguards, since regulations like the European GDPR shape how personal information must be handled. We share our method for manually pseudonymizing a multilingual radicalization dataset, ensuring performance comparable to the original data. Furthermore, we highlight the importance of establishing comprehensive guidelines for processing sensitive NLP data by sharing our complete pseudonymization process, our guidelines, the challenges we encountered as well as the resulting dataset. 2024.privatenlp-1.13 diff --git a/data/xml/2024.propor.xml b/data/xml/2024.propor.xml index c1aa6912e2..819cc1c5d1 100644 --- a/data/xml/2024.propor.xml +++ b/data/xml/2024.propor.xml @@ -8,7 +8,7 @@ AntónioTeixeira LivyReal MarcosGarcia - Hugo GonçaloOliveira + Hugo GonçaloOliveira RaquelAmaro Association for Computational Lingustics
Santiago de Compostela, Galicia/Spain
@@ -53,7 +53,7 @@ Vinicius GonçalvesSantos Gabriel Jose PellisserDalalana Flaviane R. FernandesSvartman - Sandra MariaAluísio + Sandra MariaAluísio 32–44 2024.propor-1.4 craveiro-etal-2024-simple @@ -109,7 +109,7 @@ Automatic Text Readability Assessment in <fixed-case>E</fixed-case>uropean <fixed-case>P</fixed-case>ortuguese EugénioRibeiro - NunoMamede + NunoMamede JorgeBaptista 97–107 2024.propor-1.10 @@ -132,7 +132,7 @@ Helena FreireCameron FernandaOlival FátimaFarrica - RenataVieira + RenataVieira 117–126 2024.propor-1.12 santos-etal-2024-named @@ -149,8 +149,8 @@ Bringing Pragmatics to Porttinari - Adding Speech Acts to News Texts Nataly L. Pattida Silva - Norton TrevisanRoman - Ariani DiFelippo + Norton TrevisanRoman + Ariani DiFelippo 137–145 2024.propor-1.14 da-silva-etal-2024-bringing @@ -169,7 +169,7 @@ Cássio Fariada Silva Marcio LimaInácio Oto AraújoVale - Helenade Medeiros Caseli + Helenade Medeiros Caseli 156–167 2024.propor-1.16 wick-pedro-etal-2024-using @@ -177,8 +177,8 @@ Semantic Permanence in Audiovisual Translation: a <fixed-case>F</fixed-case>rame<fixed-case>N</fixed-case>et approach to subtitling MaironSamagaio - TiagoTorrent - ElyMatos + TiagoTorrent + ElyMatos ArthurAlmeida 168–176 2024.propor-1.17 @@ -187,7 +187,7 @@ Hurdles in Parsing Multi-word Adverbs: Examples from <fixed-case>P</fixed-case>ortuguese IzabelaMuller - NunoMamede + NunoMamede JorgeBaptista 177–186 2024.propor-1.18 @@ -201,7 +201,7 @@ Flaviane R. F.Svartman Giovana M.Craveiro Marli QuadrosLeite - Sandra M.Aluísio + Sandra M.Aluísio Vinícius G.Santos Vinícius M.Garcia 187–195 @@ -260,7 +260,7 @@ Applying event classification to reveal the Estado da Índia Gonçalo C.Albuquerque MarloSouza - RenataVieira + RenataVieira Ana SofiaRibeiro 247–254 2024.propor-1.25 @@ -294,8 +294,8 @@ A Corpus of Stock Market Tweets Annotated with Named Entities Michel MonteiroZerbinati - Norton TrevisanRoman - Ariani DiFelippo + Norton TrevisanRoman + Ariani DiFelippo 276–284 2024.propor-1.28 zerbinati-etal-2024-corpus @@ -356,7 +356,7 @@ Investigating the Generalizability of <fixed-case>P</fixed-case>ortuguese Readability Assessment Models Trained Using Linguistic Complexity Features SorooshAkef AmáliaMendes - DetmarMeurers + DetmarMeurers PatrickRebuschat 332–341 2024.propor-1.34 @@ -540,7 +540,7 @@ A Bag-of-Users approach to mental health prediction from social media data RafaelOliveira - IvandréParaboni + IvandréParaboni 509–514 2024.propor-1.52 oliveira-paraboni-2024-bag @@ -548,7 +548,7 @@ Semi-automatic corpus expansion: the case of stance prediction CamilaPereira - IvandréParaboni + IvandréParaboni 515–520 2024.propor-1.53 pereira-paraboni-2024-semi @@ -556,7 +556,7 @@ Sequence-to-sequence and transformer approaches to <fixed-case>P</fixed-case>ortuguese text style transfer PabloCosta - IvandréParaboni + IvandréParaboni 521–526 2024.propor-1.54 costa-paraboni-2024-sequence @@ -573,7 +573,7 @@ Towards a Syntactic Lexicon of <fixed-case>B</fixed-case>razilian <fixed-case>P</fixed-case>ortuguese Adjectives RyanMartinez JorgeBaptista - OtoVale + OtoVale 532–538 2024.propor-1.56 martinez-etal-2024-towards @@ -595,7 +595,7 @@ Text Readability Assessment in <fixed-case>E</fixed-case>uropean <fixed-case>P</fixed-case>ortuguese: A Comparison of Classification and Regression Approaches EugénioRibeiro - NunoMamede + NunoMamede JorgeBaptista 551–557 2024.propor-1.59 @@ -743,7 +743,7 @@ <fixed-case>TTS</fixed-case> applied to the generation of datasets for automatic speech recognition EdressonCasanova - SandraAluísio + SandraAluísio Moacir AntonelliPonti 633–638 2024.propor-1.73 @@ -765,7 +765,7 @@ AntónioTeixeira LivyReal MarcosGarcia - Hugo GonçaloOliveira + Hugo GonçaloOliveira RaquelAmaro Association for Computational Lingustics
Santiago de Compostela, Galicia/Spain
@@ -814,7 +814,7 @@ Exploring the Automated Scoring of Narrative Essays in <fixed-case>B</fixed-case>razilian <fixed-case>P</fixed-case>ortuguese using Transformer Models EugénioRibeiro - NunoMamede + NunoMamede JorgeBaptista 14–17 2024.propor-2.4 @@ -882,7 +882,7 @@ Can rules still beat neural networks? The case of automatic normalisation for 18th-century <fixed-case>P</fixed-case>ortuguese texts LeonardoZilio Rafaela R.Lazzari - Maria José B.Finatto + Maria José B.Finatto 83–92 2024.propor-2.12 zilio-etal-2024-rules @@ -899,7 +899,7 @@ Could Style Help Plagiarism Detection? - A Sample-based Quantitative Study of Correlation between Style Specifics and Plagiarism AdileUka - MariaBerger + MariaBerger 103–108 2024.propor-2.14 uka-berger-2024-style @@ -954,7 +954,7 @@ Decoding Sentiments about Migration in <fixed-case>P</fixed-case>ortuguese Political Manifestos (2011, 2015, 2019) Erik BranMarino - RenataVieira + RenataVieira Jesus Manuel BenitezBaleato Ana SofiaRibeiro KatarinaLaken @@ -965,7 +965,7 @@ Analysing entity distribution in an annotated 18th-century historical source Daniel De LosReyes - RenataVieira + RenataVieira FernandaOlival Helena FreireCameron FátimaFarrica @@ -978,7 +978,7 @@ Isaac Souzade Miranda Jr. GabrielaWick-Pedro Cláudia Diasde Barros - OtoVale + OtoVale 165–169 2024.propor-2.22 de-miranda-jr-etal-2024-roda @@ -986,8 +986,8 @@ <fixed-case>G</fixed-case>i<fixed-case>D</fixed-case>i: A Virtual Assistant for Screening Protocols at Home AndrésPiñeiro-Martín - CarmenGarcía-Mateo - LauraDocío-Fernández + CarmenGarcía-Mateo + LauraDocío-Fernández Maríadel Carmen López-Pérez IgnacioNovo-Veleiro 170–173 @@ -1009,7 +1009,7 @@ Indexing <fixed-case>P</fixed-case>ortuguese <fixed-case>NLP</fixed-case> Resources with <fixed-case>PT</fixed-case>-Pump-Up RúbenAlmeida RicardoCampos - AlípioJorge + AlípioJorge SérgioNunes 178–181 2024.propor-2.25 @@ -1030,7 +1030,7 @@ Perfil Público: Automatic Generation and Visualization of Author Profiles for Digital News Media NunoGuimarães RicardoCampos - AlípioJorge + AlípioJorge 186–189 2024.propor-2.27 guimaraes-etal-2024-perfil @@ -1045,7 +1045,7 @@ Blip Copilot: a smart conversational assistant - EvandroFonseca + EvandroFonseca TayaneSoares DyovanaBaptista RogersDamas @@ -1079,7 +1079,7 @@ Autopilot: a smart sales assistant AmandaOliveira JoãoAlvarenga - EvandroFonseca + EvandroFonseca WilliamColen 204–205 2024.propor-2.32 diff --git a/data/xml/2024.rail.xml b/data/xml/2024.rail.xml index bd051d718f..74c5be3338 100644 --- a/data/xml/2024.rail.xml +++ b/data/xml/2024.rail.xml @@ -6,7 +6,7 @@ RooweitherMabuya MuziMatfunjwa MmasibidiSetaka - Mennovan Zaanen + Mennovan Zaanen ELRA and ICCL
Torino, Italia
May @@ -131,8 +131,8 @@ <fixed-case>E</fixed-case>thio<fixed-case>MT</fixed-case>: Parallel Corpus for Low-resource <fixed-case>E</fixed-case>thiopian Languages Atnafu LambeboTonja OlgaKolesnikova - AlexanderGelbukh - JugalKalita + AlexanderGelbukh + JugalKalita 107–114 Recent research in natural language processing (NLP) has achieved impressive performance in tasks such as machine translation (MT), news classification, and question-answering in high-resource languages. However, the performance of MT leaves much to be desired for low-resource languages. This is due to the smaller size of available parallel corpora in these languages, if such corpora are available at all. NLP in Ethiopian languages suffers from the same issues due to the unavailability of publicly accessible datasets for NLP tasks, including MT. To help the research community and foster research for Ethiopian languages, we introduce EthioMT – a new parallel corpus for 15 languages. We also create a new benchmark by collecting a dataset for better-researched languages in Ethiopia. We evaluate the newly collected corpus and the benchmark dataset for 23 Ethiopian languages using transformer and fine-tuning approaches. 2024.rail-1.12 @@ -143,7 +143,7 @@ NuhuIbrahim FelicityMulford MattLawrence - RizaBatista-Navarro + RizaBatista-Navarro 115–123 Hate speech on social media has proliferated in Ethiopia. To support studies aimed at investigating the targets and types of hate speech circulating in the Ethiopian context, we developed a new fine-grained annotation scheme that captures three elements of hate speech: the target (i.e., any groups with protected characteristics), type (i.e., the method of abuse) and nature (i.e., the style of the language used). We also developed a new lexicon of hate speech-related keywords in the four most prominent languages found on Ethiopian social media: Amharic, Afaan Oromo, English and Tigrigna. These keywords enabled us to retrieve social media posts (also in the same four languages) from three platforms (i.e., X, Telegram and Facebook), that are likely to contain hate speech. Experts in the Ethiopian context then manually annotated a sample of those retrieved posts, obtaining fair to moderate inter-annotator agreement. The resulting annotations formed the basis of a case study of which groups tend to be targeted by particular types of hate speech or by particular styles of hate speech language. 2024.rail-1.13 diff --git a/data/xml/2024.rapid.xml b/data/xml/2024.rapid.xml index 1b779fe5d3..45eb825318 100644 --- a/data/xml/2024.rapid.xml +++ b/data/xml/2024.rapid.xml @@ -4,7 +4,7 @@ Proceedings of the Fifth Workshop on Resources and ProcessIng of linguistic, para-linguistic and extra-linguistic Data from people with various forms of cognitive/psychiatric/developmental impairments @LREC-COLING 2024 DimitriosKokkinakis - Kathleen C.Fraser + Kathleen C.Fraser Charalambos K.Themistocleous Kristina LundholmFors AthanasiosTsanas @@ -73,7 +73,7 @@ MaricaBelmonte GloriaGagliardi DimitriosKokkinakis - FabioTamburini + FabioTamburini 34–44 Linguistic alterations represent one of the prodromal signs of cognitive decline associated with Dementia. In recent years, a growing body of work has been devoted to the development of algorithms for the automatic linguistic analysis of both oral and written texts, for diagnostic purposes. The extraction of Digital Linguistic Biomarkers from patients’ verbal productions can indeed provide a rapid, ecological, and cost-effective system for large-scale screening of the pathology. This article contributes to the ongoing research in the field by exploring a traditionally less studied aspect of language in Dementia, namely the rhythmic characteristics of speech. In particular, the paper focuses on the automatic detection of rhythmic features in Italian-connected speech. A landmark-based system was developed and evaluated to segment the speech flow into vocalic and consonantal intervals and to calculate several rhythmic metrics. Additionally, the reliability of these metrics in identifying Mild Cognitive Impairment and Dementia patients was tested. 2024.rapid-1.5 @@ -91,7 +91,7 @@ Exploring the Relationship Between Intrinsic Stigma in Masked Language Models and Training Data Using the Stereotype Content Model MarioMina JúliaFalcão - AitorGonzalez-Agirre + AitorGonzalez-Agirre 54–67 Much work has gone into developing language models of increasing size, but only recently have we begun to examine them for pernicious behaviour that could lead to harming marginalised groups. Following Lin et al. (2022) in rooting our work in psychological research, we prompt two masked language models (MLMs) of different specialisations in English and Spanish with statements from a questionnaire developed to measure stigma to determine if they treat physical and mental illnesses equally. In both models we find a statistically significant difference in the treatment of physical and mental illnesses across most if not all latent constructs as measured by the questionnaire, and thus they are more likely to associate mental illnesses with stigma. We then examine their training data or data retrieved from the same domain using a computational implementation of the Stereotype Content Model (SCM) (Fiske et al., 2002; Fraser et al., 2021) to interpret the questionnaire results based on the SCM values as reflected in the data. We observe that model behaviour can largely be explained by the distribution of the mentions of illnesses according to their SCM values. 2024.rapid-1.7 diff --git a/data/xml/2024.readi.xml b/data/xml/2024.readi.xml index ea030490e7..2fe3afad72 100644 --- a/data/xml/2024.readi.xml +++ b/data/xml/2024.readi.xml @@ -5,8 +5,8 @@ Proceedings of the 3rd Workshop on Tools and Resources for People with REAding DIfficulties (READI) @ LREC-COLING 2024 RodrigoWilkens RémiCardon - AmaliaTodirascu - NúriaGala + AmaliaTodirascu + NúriaGala ELRA and ICCL
Torino, Italia
May @@ -51,7 +51,7 @@ An Extensible Massively Multilingual Lexical Simplification Pipeline Dataset using the <fixed-case>M</fixed-case>ulti<fixed-case>LS</fixed-case> Framework MatthewShardlow FernandoAlva-Manchego - RizaBatista-Navarro + RizaBatista-Navarro StefanBott SaulCalderon Ramirez RémiCardon @@ -65,7 +65,7 @@ KaiNorth LauraOcchipinti NelsonPeréz Rojas - NishatRaihan + NishatRaihan TharinduRanasinghe MartinSolis Salazar MarcosZampieri @@ -97,7 +97,7 @@ Accessible Communication: a systematic review and comparative analysis of official <fixed-case>E</fixed-case>nglish Easy-to-Understand (<fixed-case>E</fixed-case>2<fixed-case>U</fixed-case>) language guidelines Andreea MariaDeleanu - ConstantinOrasan + ConstantinOrasan SabineBraun 70–92 Easy-to-Understand (E2U) language varieties have been recognized by the United Nation’s Convention on the Rights of Persons with Disabilities (2006) as a means to guarantee the fundamental right to Accessible Communication. Increased awareness has driven changes in European (European Commission, 2015, 2021; European Parliament, 2016) and International legislation (ODI, 2010), prompting public-sector and other institutions to offer domain-specific content into E2U language to prevent communicative exclusion of those facing cognitive barriers (COGA, 2017; Maaß, 2020; Perego, 2020). However, guidance on what it is that makes language actually ‘easier to understand’ is still fragmented and vague. For this reason, we carried out a systematic review of official guidelines for English Plain Language and Easy Language to identify the most effective lexical, syntactic and adaptation strategies that can reduce complexity in verbal discourse according to official bodies. This article will present the methods and preliminary results of the guidelines analysis. diff --git a/data/xml/2024.repl4nlp.xml b/data/xml/2024.repl4nlp.xml index 6cdf3c37eb..507201780c 100644 --- a/data/xml/2024.repl4nlp.xml +++ b/data/xml/2024.repl4nlp.xml @@ -45,9 +45,9 @@ Relevance-aware Diverse Query Generation for Out-of-domain Text Ranking Jia-HueiJu - Huck Chao-HanYang + Huck Chao-HanYang Szu-WeiFu - Ming-FengTsai + Ming-FengTsai Chuan-JuWang 26-36 Domain adaptation presents significant challenges for out-of-domain text ranking, especially when supervised data is limited. In this paper, we present ReadQG (Relevance-Aware Diverse Query Generation), a method to generate informative synthetic queries to facilitate the adaptation process of text ranking models. Unlike previous approaches focusing solely on relevant query generation, our ReadQG generates diverse queries with continuous relevance scores. Specifically, we propose leveraging soft-prompt tuning and diverse generation objectives to control query generation according to the given relevance. Our experiments show that integrating negative queries into the learning process enhances the effectiveness of text ranking models in out-of-domain information retrieval (IR) benchmarks. Furthermore, we measure the quality of query generation, highlighting the underlying beneficial characteristics of negative queries. Our empirical results and analysis also shed light on potential directions for more advanced data augmentation in IR. The data and code have been released. @@ -121,7 +121,7 @@ EdwardGow-Smith DylanPhelps HarishTayyar MadabushiUniversity of Bath - CarolinaScartonUniversity of Sheffield + CarolinaScartonUniversity of Sheffield AlineVillavicencioUniversity of Exeter and University of Sheffield 118-135 All existing transformer-based approaches to NLP using subword tokenisation algorithms encode whitespace (word boundary information) through the use of special space symbols (such as ## or _) forming part of tokens. These symbols have been shown to a) lead to reduced morphological validity of tokenisations, and b) give substantial vocabulary redundancy. As such, removing these symbols has been shown to have a beneficial effect on the processing of morphologically complex words for transformer encoders in the pretrain-finetune paradigm. In this work, we explore whether word boundary information is at all useful to such models. In particular, we train transformer encoders across four different training scales, and investigate several alternative approaches to including word boundary information, evaluating on two languages (English and Finnish) with a range of tasks across different domains and problem set-ups: sentence classification datasets, NER (for token-level classification), and two classification datasets involving complex words (Superbizarre and FLOTA). Overall, through an extensive experimental setup that includes the pre-training of 35 models, we find no substantial improvements from our alternative approaches, suggesting that modifying tokenisers to remove word boundary information isn’t leading to a loss of useful information. @@ -143,7 +143,7 @@ HeikeAdelHochschule der Medien (University of Applied Sciences) LukasLangeRobert Bosch GmbH, Bosch JannikStrötgenKarlsruhe University of Applied Sciences - HinrichSchuetze + HinrichSchuetze 163-176 In real-world environments, continual learning is essential for machine learning models, as they need to acquire new knowledge incrementally without forgetting what they have already learned. While pretrained language models have shown impressive capabilities on various static tasks, applying them to continual learning poses significant challenges, including avoiding catastrophic forgetting, facilitating knowledge transfer, and maintaining parameter efficiency. In this paper, we introduce MoCL-P, a novel lightweight continual learning method that addresses these challenges simultaneously. Unlike traditional approaches that continuously expand parameters for newly arriving tasks, MoCL-P integrates task representation-guided module composition with adaptive pruning, effectively balancing knowledge integration and computational overhead. Our evaluation across three continual learning benchmarks with up to 176 tasks shows that MoCL-P achieves state-of-the-art performance and improves parameter efficiency by up to three times, demonstrating its potential for practical applications where resource requirements are constrained. 2024.repl4nlp-1.12 @@ -174,7 +174,7 @@ Tracking linguistic information in transformer-based sentence embeddings through targeted sparsification - ViviNastaseUniversity of Geneva + ViviNastaseUniversity of Geneva PaolaMerloIdiap Research Institute and University of Geneva, Switzerland 203-214 Analyses of transformer-based models have shown that they encode a variety of linguistic information from their textual input. While these analyses have shed a light on the relation between linguistic information on one side, and internal architecture and parameters on the other, a question remains unanswered: how is this linguistic information reflected in sentence embeddings? Using datasets consisting of sentences with known structure, we test to what degree information about chunks (in particular noun, verb or prepositional phrases), such as grammatical number, or semantic role, can be localized in sentence embeddings. Our results show that such information is not distributed over the entire sentence embedding, but rather it is encoded in specific regions. Understanding how the information from an input text is compressed into sentence embeddings helps understand current transformer models and help build future explainable neural models. diff --git a/data/xml/2024.safety4convai.xml b/data/xml/2024.safety4convai.xml index 8ce230e7c7..47caa277c9 100644 --- a/data/xml/2024.safety4convai.xml +++ b/data/xml/2024.safety4convai.xml @@ -48,7 +48,7 @@ Using Information Retrieval Techniques to Automatically Repurpose Existing Dialogue Datasets for Safe Chatbot Development Tunde OluwaseyiAjayi GauravNegi - MihaelArcan + MihaelArcan PaulBuitelaar 16–27 There has been notable progress in the development of open-domain dialogue systems (chatbots) especially with the rapid advancement of the capabilities of Large Language Models. Chatbots excel at holding conversations in a manner that keeps a user interested and engaged. However, their responses can be unsafe, as they can respond in an offensive manner or offer harmful professional advice. As a way to mitigate this issue, recent work crowdsource datasets with exemplary responses or annotate dialogue safety datasets, which are relatively scarce compared to casual dialogues. Despite the quality of data obtained from crowdsourcing, it can be expensive and time consuming. This work proposes an effective pipeline, using information retrieval, to automatically repurpose existing dialogue datasets for safe chatbot development, as a way to address the aforementioned challenges. We select an existing dialogue dataset, revise its unsafe responses, as a way to obtain a dataset with safer responses to unsafe user inputs. We then fine-tune dialogue models on the original and revised datasets and generate responses to evaluate the safeness of the models. diff --git a/data/xml/2024.scalellm.xml b/data/xml/2024.scalellm.xml index 8ff7892319..bace2db000 100644 --- a/data/xml/2024.scalellm.xml +++ b/data/xml/2024.scalellm.xml @@ -3,9 +3,9 @@ Proceedings of the First edition of the Workshop on the Scaling Behavior of Large Language Models (SCALE-LLM 2024) - Antonio ValerioMiceli-Barone + Antonio ValerioMiceli-Barone FazlBarez - ShayCohen + ShayCohen ElenaVoita UlrichGermann MichalLukasik diff --git a/data/xml/2024.scichat.xml b/data/xml/2024.scichat.xml index f7a19b81d1..61734cabb2 100644 --- a/data/xml/2024.scichat.xml +++ b/data/xml/2024.scichat.xml @@ -38,7 +38,7 @@ Improving Dialog Safety using Socially Aware Contrastive Learning SouvikDasDepartment of Computer Science and Engineering, University at Buffalo, NY. - Rohini K.SrihariDepartment of Computer Science and Engineering, University at Buffalo, NY. + Rohini K.SrihariDepartment of Computer Science and Engineering, University at Buffalo, NY. 4-18 State-of-the-art conversational AI systems raise concerns due to their potential risks of generating unsafe, toxic, unethical, or dangerous content. Previous works have developed datasets to teach conversational agents the appropriate social paradigms to respond effectively to specifically designed hazardous content. However, models trained on these adversarial datasets still struggle to recognize subtle unsafe situations that appear naturally in conversations or introduce an inappropriate response in a casual context. To understand the extent of this problem, we study prosociality in both adversarial and casual dialog contexts and audit the response quality of general-purpose language models in terms of propensity to produce unsafe content. We propose a dual-step fine-tuning process to address these issues using a socially aware n-pair contrastive loss. Subsequently, we train a base model that integrates prosocial behavior by leveraging datasets like Moral Integrity Corpus (MIC) and ProsocialDialog. Experimental results on several dialog datasets demonstrate the effectiveness of our approach in generating socially appropriate responses. 2024.scichat-1.2 diff --git a/data/xml/2024.scil.xml b/data/xml/2024.scil.xml index ddcc0fd414..bf6f2e766b 100644 --- a/data/xml/2024.scil.xml +++ b/data/xml/2024.scil.xml @@ -40,7 +40,7 @@ CanaanBreiss AlexisRoss AmaniMaina-Kilaas - RogerLevy + RogerLevy JacobAndreas 20–31 2024.scil-1.3 @@ -110,7 +110,7 @@ AmandaDoucette RyanCotterell MorganSonderegger - Timothy J.O’Donnell + Timothy J.O’Donnell 117–128 2024.scil-1.12 doucette-etal-2024-correlation @@ -176,7 +176,7 @@ Computing Ellipsis Constructions: Comparing Classical <fixed-case>NLP</fixed-case> and <fixed-case>LLM</fixed-case> Approaches - DamirCavar + DamirCavar ZoranTiganj Ludovic VetaMompelat BillyDickson @@ -196,7 +196,7 @@ Interference Predicts Locality: Evidence from an <fixed-case>SOV</fixed-case> language SidharthRanjan SumeetAgarwal - RajakrishnanRajkumar + RajakrishnanRajkumar 240–256 2024.scil-1.22 ranjan-etal-2024-interference @@ -211,7 +211,7 @@ Neural language model gradients predict event-related brain potentials - Stefan L.Frank + Stefan L.Frank 316–323 2024.scil-1.24 frank-2024-neural diff --git a/data/xml/2024.sdp.xml b/data/xml/2024.sdp.xml index dac746c416..d5973740d8 100644 --- a/data/xml/2024.sdp.xml +++ b/data/xml/2024.sdp.xml @@ -91,7 +91,7 @@ Understanding Survey Paper Taxonomy about Large Language Models via Graph Representation Learning JunZhuangBoise State University and Indiana University Purdue University Indianapolis - CaseyKenningtonBoise State University + CaseyKenningtonBoise State University 58-69 As new research on Large Language Models (LLMs) continues, it is difficult to keep up with new research and models. To help researchers synthesize the new research many have written survey papers, but even those have become numerous. In this paper, we develop a method to automatically assign survey papers to a taxonomy. We collect the metadata of 144 LLM survey papers and explore three paradigms to classify papers within the taxonomy. Our work indicates that leveraging graph structure information on co-category graphs can significantly outperform the language models in two paradigms; pre-trained language models’ fine-tuning and zero-shot/few-shot classifications using LLMs. We find that our model surpasses an average human recognition level and that fine-tuning LLMs using weak labels generated by a smaller model, such as the GCN in this study, can be more effective than using ground-truth labels, revealing the potential of weak-to-strong generalization in the taxonomy classification task. 2024.sdp-1.6 @@ -299,7 +299,7 @@ Zero-shot Scientific Claim Verification Using <fixed-case>LLM</fixed-case>s and Citation Text CarlosAlvarez MaxwellBennett - LucyWangUniversity of Washington and Allen Institute for Artificial Intelligence + LucyWangUniversity of Washington and Allen Institute for Artificial Intelligence 269-276 Due to rapidly changing and advancing science, it is important to check the veracity of scientific claims and whether they are supported by research evidence. Previous versions of this task depended on supervised training, where labeled datasets were constructed through manual claim writing and evidence identification, sometimes coupled with mining citation relationships in papers. In this work, we investigate whether zero-shot scientific claim verification could be enabled using large language models (LLMs) and distant supervision examples taken directly from citation texts. We derive an in-context learning (ICL) dataset, SCitance, consisting of citation sentences (“citances”), LLM-generated negations, evidence documents, and veracity labels, and find that prompting GPT-4 with ICL examples from this dataset yields comparable performance (within 1 point F1) to previous finetuned models trained on manually curated claim-evidence pairs. Our results suggest that prompting LLMs with citance-evidence pairs directly poses a viable alternative to finetuning scientific claim verification models with manually-curated data. 2024.sdp-1.25 @@ -317,7 +317,7 @@ <fixed-case>C</fixed-case>o<fixed-case>SAE</fixed-case>mb: Contrastive Section-aware Aspect Embeddings for Scientific Articles ShrutiSinghIIT Gandhinagar - MayankSinghIndian Institute of Technology Gandhinagar + MayankSinghIndian Institute of Technology Gandhinagar 283-292 Research papers are long documents that contain information about various aspects such as background, prior work, methodology, and results. Existing works on scientific document representation learning only leverage the title and abstract of the paper. We present CoSAEmb, a model that learns representations from the full text of 97402 scientific papers from the S2ORC dataset. We present a novel supervised contrastive training framework for long documents using triplet loss and margin gradation. Our framework can be used to learn representations of long documents with any existing encoder-only transformer model without retraining it from scratch. CoSAEmb shows improved performance on information retrieval from the paper’s full text in comparison to models trained only on paper titles and abstracts. We also evaluate CoSAEmb on SciRepEval and CSFCube benchmarks, showing comparable performance with existing state-of-the-art models. 2024.sdp-1.27 @@ -335,7 +335,7 @@ Harnessing <fixed-case>CLIP</fixed-case> for Evidence Identification in Scientific Literature: A Multimodal Approach to Context24 Shared Task AnukritiKumar - LucyWangUniversity of Washington and Allen Institute for Artificial Intelligence + LucyWangUniversity of Washington and Allen Institute for Artificial Intelligence 307-313 Knowing whether scientific claims are supported by evidence is fundamental to scholarly communication and evidence-based decision-making. We present our approach to Task 1 of the Context24 Shared Task—Contextualizing Scientific Figures and Tables (SDP@ACL2024), which focuses on identifying multimodal evidence from scientific publications that support claims. We finetune CLIP, a state-of-the-art model for image-text similarity tasks, to identify and rank figures and tables in papers that substantiate specific claims. Our methods focus on text and image preprocessing techniques and augmenting the organizer-provided training data with labeled examples from the SciMMIR and MedICaT datasets. Our best-performing model achieved NDCG@5 and NDCG@10 values of 0.26 and 0.30, respectively, on the Context24 test split. Our findings underscore the effectiveness of data augmentation and preprocessing in improving the model’s ability in evidence matching. 2024.sdp-1.29 diff --git a/data/xml/2024.semeval.xml b/data/xml/2024.semeval.xml index b59b718f31..4026783758 100644 --- a/data/xml/2024.semeval.xml +++ b/data/xml/2024.semeval.xml @@ -3,7 +3,7 @@ Proceedings of the 18th International Workshop on Semantic Evaluation (SemEval-2024) - Atul Kr.Ojha + Atul Kr.Ojha A. SezaDoğruöz HarishTayyar Madabushi GiovanniDa San Martino @@ -68,7 +68,7 @@ nicolay-r at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2024 Task 3: Using Flan-T5 for Reasoning Emotion Cause in Conversations with Chain-of-Thought on Emotion States NicolayRusnachenkoNewcastle University - HuizhiLiangUniversity of Newcastle + HuizhiLiangUniversity of Newcastle 22-27 Emotion expression is one of the essential traits of conversations. It may be self-related or caused by another speaker. The variety of reasons may serve as a source of the further emotion causes: conversation history, speaker’s emotional state, etc. Inspired by the most recent advances in Chain-of-Thought, in this work, we exploit the existing three-hop reasoning approach (THOR) to perform large language model instruction-tuning for answering: emotion states (THOR-state), and emotion caused by one speaker to the other (THOR-cause). We equip THORcause with the reasoning revision (RR) for devising a reasoning path in fine-tuning. In particular, we rely on the annotated speaker emotion states to revise reasoning path. Our final submission, based on Flan-T5-base (250M) and the rule-based span correction technique, preliminary tuned with THOR-state and fine-tuned with THOR-cause-rr on competition training data, results in 3rd and 4th places (F1-proportional) and 5th place (F1-strict) among 15 participating teams. Our THOR implementation fork is publicly available: https://github.com/nicolay-r/THOR-ECAC 2024.semeval-1.4 @@ -293,7 +293,7 @@ HamidrezaAmirzadehSharif University of Technology AlirezaSohrabiSharif University of Technology ZeinabTaghaviMSc student - HosseinSametiSharif University of Technology + HosseinSametiSharif University of Technology 139-147 The advancement of large language models (LLMs), their ability to produce eloquent and fluent content, and their vast knowledge have resulted in their usage in various tasks and applications. Despite generating fluent content, this content can contain fabricated or false information. This problem is known as hallucination and has reduced the confidence in the output of LLMs. In this work, we have used Natural Language Inference to train classifiers for hallucination detection to tackle SemEval-2024 Task 6-SHROOM (Mickus et al., 2024) which is defined in three sub-tasks: Paraphrase Generation, Machine Translation, and Definition Modeling. We have also conducted experiments on LLMs to evaluate their ability to detect hallucinated outputs. We have achieved 75.93% and 78.33% accuracy for the modelaware and model-agnostic tracks, respectively. The shared links of our models and the codes are available on GitHub. 2024.semeval-1.22 @@ -307,7 +307,7 @@ ZahraRahimiSharif University of Technology Mohammad MoeinShirzadySharif University of Technology ZeinabTaghaviMSc student - HosseinSametiSharif University of Technology + HosseinSametiSharif University of Technology 148-154 The goal and dream of the artificial intelligence field have long been the development of intelligent systems or agents that mimic human behavior and thinking. Creativity is an essential trait in humans that is closely related to lateral thinking. The remarkable advancements in Language Models have led to extensive research on question-answering and explicit and implicit reasoning involving vertical thinking. However, there is an increasing need to shift focus towards research and development of models that can think laterally. One must step outside the traditional frame of commonsense concepts in lateral thinking to conclude. Task 9 of SemEval-2024 is Brainteaser (Jiang et al.,2024), which requires lateral thinking to answer riddle-like multiple-choice questions. In our study, we assessed the performance of various models for the Brainteaser task. We achieved an overall accuracy of 75% for the Sentence Puzzle subtask and 66.7% for the Word Puzzle subtask. All the codes, along with the links to our saved models, are available on our GitHub. 2024.semeval-1.23 @@ -334,7 +334,7 @@ ZiweiZhengNewcastle University SubinJungNewcastle University VarunOjhaNewcastle University - HuizhiLiangUniversity of Newcastle + HuizhiLiangUniversity of Newcastle 163-169 SemEval-2024 Task 8 introduces the challenge of identifying machine-generated texts from diverse Large Language Models (LLMs) in various languages and domains. The task comprises three subtasks: binary classification in monolingual and multilingual (Subtask A), multi-class classification (Subtask B), and mixed text detection (Subtask C). This paper focuses on Subtask A & B. To tackle this task, this paper proposes two methods: 1) using traditional machine learning (ML) with natural language preprocessing (NLP) for feature extraction, and 2) fine-tuning LLMs for text classification. For fine-tuning, we use the train datasets provided by the task organizers. The results show that transformer models like LoRA-RoBERTa and XLM-RoBERTa outperform traditional ML models, particularly in multilingual subtasks. However, traditional ML models performed better than transformer models for the monolingual task, demonstrating the importance of considering the specific characteristics of each subtask when selecting an appropriate approach. 2024.semeval-1.25 @@ -452,7 +452,7 @@ <fixed-case>ZXQ</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2024 Task 7: Fine-tuning <fixed-case>GPT</fixed-case>-3.5-Turbo for Numerical Reasoning ZhenQianRoyal Melbourne Institute of Technology XiaofeiXuRoyal Melbourne Institute of Technology - XiuzhenZhangRMIT University + XiuzhenZhangRMIT University 218-223 In this paper, we present our system for the SemEval-2024 Task 7, i.e., NumEval subtask 3: Numericial Reasoning. Given a news article and its headline, the numerical reasoning task involves creating a system to compute the intentionally excluded number within the news headline. We propose a fine-tuned GPT-3.5-turbo model, specifically engineered to deduce missing numerals directly from the content of news article. The model is trained with a human-engineered prompt that itegrates the news content and the masked headline, tailoring its accuracy for the designated task. It achieves an accuracy of 0.94 on the test data and secures the second position in the official leaderboard. An examination on the system’s inference results reveals its commendable accuracy in identifying correct numerals when they can be directly “copied” from the articles. However, the error rates increase when it comes to some ambiguous operations such as rounding. 2024.semeval-1.34 @@ -515,7 +515,7 @@ <fixed-case>NU</fixed-case>-<fixed-case>RU</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2024 Task 6: Hallucination and Related Observable Overgeneration Mistake Detection Using Hypothesis-Target Similarity and <fixed-case>S</fixed-case>elf<fixed-case>C</fixed-case>heck<fixed-case>GPT</fixed-case> ThanetMarkchomUniversity of Reading SubinJungNewcastle University - HuizhiLiangNewcastle University + HuizhiLiangNewcastle University 253-260 One of the key challenges in Natural Language Generation (NLG) is “hallucination,” in which the generated output appears fluent and grammatically sound but may contain incorrect information. To address this challenge, “SemEval-2024 Task 6 - SHROOM, a Shared-task on Hallucinations and Related Observable Overgeneration Mistakes” is introduced. This task focuses on detecting overgeneration hallucinations in texts generated from Large Language Models for various NLG tasks. To tackle this task, this paper proposes two methods: (1) hypothesis-target similarity, which measures text similarity between a generated text (hypothesis) and an intended reference text (target), and (2) a SelfCheckGPT-based method to assess hallucinations via predefined prompts designed for different NLG tasks. Experiments were conducted on the dataset provided in this task. The results show that both of the proposed methods can effectively detect hallucinations in LLM-generated texts with a possibility for improvement. 2024.semeval-1.39 @@ -527,7 +527,7 @@ <fixed-case>NCL</fixed-case>_<fixed-case>NLP</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2024 Task 7: <fixed-case>C</fixed-case>o<fixed-case>T</fixed-case>-<fixed-case>N</fixed-case>um<fixed-case>HG</fixed-case>: A <fixed-case>C</fixed-case>o<fixed-case>T</fixed-case>-Based <fixed-case>SFT</fixed-case> Training Strategy with Large Language Models for Number-Focused Headline Generation JunzheZhaoHangzhou Zero Matrix Intelligence Co., Ltd, China YingxiWangHuawei Technologies Co., Ltd., China - HuizhiLiangNewcastle University + HuizhiLiangNewcastle University NicolayRusnachenkoNewcastle University 261-269 Headline Generation is an essential task in Natural Language Processing (NLP), where models often exhibit limited ability to accurately interpret numerals, leading to inaccuracies in generated headlines. This paper introduces CoT-NumHG, a training strategy leveraging the Chain of Thought (CoT) paradigm for Supervised Fine-Tuning (SFT) of large language models. This approach is aimed at enhancing numeral perception, interpretability, accuracy, and the generation of structured outputs. Presented in SemEval-2024 Task 7 (task 3): Numeral-Aware Headline Generation (English), this challenge is divided into two specific subtasks. The first subtask focuses on numerical reasoning, requiring models to precisely calculate and fill in the missing numbers in news headlines, while the second subtask targets the generation of complete headlines. Utilizing the same training strategy across both subtasks, this study primarily explores the first subtask as a demonstration of our training strategy. Through this competition, our CoT-NumHG-Mistral-7B model attained an accuracy rate of 94%, underscoring the effectiveness of our proposed strategy. @@ -577,7 +577,7 @@ <fixed-case>NCL</fixed-case> Team at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2024 Task 3: Fusing Multimodal Pre-training Embeddings for Emotion Cause Prediction in Conversations ShuLiBeijing Accent Advertising Co., Ltd. ZicenLiaoSchool of Computing, Newcastle University, Newcastle upon Tyne, UK - HuizhiLiangSchool of Computing, Newcastle University, Newcastle upon Tyne, UK + HuizhiLiangSchool of Computing, Newcastle University, Newcastle upon Tyne, UK 285-290 In this study, we introduce an MLP approach for extracting multimodal cause utterances in conversations, utilizing the multimodal conversational emotion causes from the ECF dataset. Our research focuses on evaluating a bi-modal framework that integrates video and audio embeddings to analyze emotional expressions within dialogues. The core of our methodology involves the extraction of embeddings from pre-trained models for each modality, followed by their concatenation and subsequent classification via an MLP network. We compared the accuracy performances across different modality combinations including text-audio-video, video-audio, and audio only. 2024.semeval-1.44 @@ -652,7 +652,7 @@ <fixed-case>GAV</fixed-case>x at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2024 Task 10: Emotion Flip Reasoning via Stacked Instruction Finetuning of <fixed-case>LLM</fixed-case>s VyNguyenRMIT University - XiuzhenZhangRMIT University + XiuzhenZhangRMIT University 326-336 The Emotion Flip Reasoning task at SemEval 2024 aims at identifying the utterance(s) that trigger a speaker to shift from an emotion to another in a multi-party conversation. The spontaneous, informal, and occasionally multilingual dynamics of conversations make the task challenging. In this paper, we propose a supervised stacked instruction-based framework to finetune large language models to tackle this task. Utilising the annotated datasets provided, we curate multiple instruction sets involving chain-of-thoughts, feedback, and self-evaluation instructions, for a multi-step finetuning pipeline. We utilise the self-consistency inference strategy to enhance prediction consistency. Experimental results reveal commendable performance, achieving mean F1 scores of 0.77 and 0.76 for triggers in the Hindi-English and English-only tracks respectively. This led to us earning the second highest ranking in both tracks. 2024.semeval-1.50 @@ -814,7 +814,7 @@ Team <fixed-case>U</fixed-case>nibuc - <fixed-case>NLP</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2024 Task 8: Transformer and Hybrid Deep Learning Based Models for Machine-Generated Text Detection Teodor-georgeMarchitanUniversity of Bucharest ClaudiuCreangaUniversity of Bucharest - Liviu P.DinuUniversity of Bucharest + Liviu P.DinuUniversity of Bucharest 403-411 This paper describes the approach of the UniBuc - NLP team in tackling the SemEval 2024 Task 8: Multigenerator, Multidomain, and Multilingual Black-Box Machine-Generated Text Detection. We explored transformer-based and hybrid deep learning architectures. For subtask B, our transformer-based model achieved a strong second-place out of 77 teams with an accuracy of 86.95%, demonstrating the architecture’s suitability for this task. However, our models showed overfitting in subtask A which could potentially be fixed with less fine-tunning and increasing maximum sequence length. For subtask C (token-level classification), our hybrid model overfit during training, hindering its ability to detect transitions between human and machine-generated text. 2024.semeval-1.63 @@ -829,7 +829,7 @@ CălinaCiocoiuAlexandru Ioan Cuza University of Iasi IoanaMănigaAlexandru Ioan Cuza University of Iasi OctavianUngureanuAlexandru Ioan Cuza University of Iasi - DanielaGîfuFaculty of Computer Science, Alexandru Ioan Cuza University of Iasi, Romania/Institute of Computer Science, Romanian Academy - Iasi Branch + DanielaGîfuFaculty of Computer Science, Alexandru Ioan Cuza University of Iasi, Romania/Institute of Computer Science, Romanian Academy - Iasi Branch DianaTrandăbățFaculty of Computer Science, Alexandru Ioan Cuza University of Iasi, Romania 412-419 The “Emotion Discovery and Reasoning Its Flip in Conversation” task at the SemEval 2024 competition focuses on the automatic recognition of emotion flips, triggered within multi-party textual conversations. This paper proposes a novel approach that draws a parallel between a mixed strategy and a comparative strategy, contrasting a Rule-Based Function with Named Entity Recognition (NER)—an approach that shows promise in understanding speaker-specific emotional dynamics. Furthermore, this method surpasses the performance of both DistilBERT and RoBERTa models, demonstrating competitive effectiveness in detecting emotion flips triggered in multi-party textual conversations, achieving a 70% F1-score. This system was ranked 6th in the SemEval 2024 competition for Subtask 3. @@ -949,7 +949,7 @@ Team jelarson at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val 2024 Task 8: Predicting Boundary Line Between Human and Machine Generated Text JosephLarsonIndiana University - FrancisTyersIndiana University + FrancisTyersIndiana University 477-484 In this paper, we handle the task of building a system that, given a document written first by a human and then finished by an LLM, the system must determine the transition word i.e. where the machine begins to write. We built a system by examining the data for textual anomalies and combining a method of heuristic approaches with a linear regression model based on the text length of each document. 2024.semeval-1.73 @@ -995,7 +995,7 @@ <fixed-case>BERT</fixed-case>astic at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2024 Task 4: State-of-the-Art Multilingual Propaganda Detection in Memes via Zero-Shot Learning with Vision-Language Models TarekMahmoudMohamed Bin Zayed University of Artificial Intelligence (MBZUAI) - PreslavNakovMohamed bin Zayed University of Artificial Intelligence + PreslavNakovMohamed bin Zayed University of Artificial Intelligence 503-510 Analyzing propagandistic memes in a multilingual, multimodal dataset is a challenging problem due to the inherent complexity of memes’ multimodal content, which combines images, text, and often, nuanced context. In this paper, we use a VLM in a zero-shot approach to detect propagandistic memes and achieve a state-of-the-art average macro F1 of 66.7% over all languages. Notably, we outperform other systems on North Macedonian memes, and obtain competitive results on Bulgarian and Arabic memes. We also present our early fusion approach for identifying persuasion techniques in memes in a hierarchical multilabel classification setting. This approach outperforms all other approaches in average hierarchical precision with an average score of 77.66%. The systems presented contribute to the evolving field of research on the detection of persuasion techniques in multimodal datasets by offering insights that could be of use in the development of more effective tools for combating online propaganda. 2024.semeval-1.77 @@ -1101,7 +1101,7 @@ ArianQazviniAmirkabir University of Technology PouyaSadeghiUniversity of Tehran ZeinabTaghaviMSc student - HosseinSametiSharif University of Technology + HosseinSametiSharif University of Technology 565-572 In this paper, we delve into the realm of detecting machine-generated text (MGT) within Natural Language Processing (NLP). Our approach involves fine-tuning a RoBERTa-base Transformer, a robust neural architecture, to tackle MGT detection as a binary classification task. Specifically focusing on Subtask A (Monolingual - English) within the SemEval-2024 competition framework, our system achieves a 78.9% accuracy on the test dataset, placing us 57th among participants. While our system demonstrates proficiency in identifying human-written texts, it faces challenges in accurately discerning MGTs. 2024.semeval-1.85 @@ -1143,7 +1143,7 @@ ClaudiuCreangaUniversity of Bucharest Ana-mariaBucurInterdisciplinary School of Doctoral Studies Ana SabinaUbanUniversity of Bucharest - Liviu P.DinuUniversity of Bucharest + Liviu P.DinuUniversity of Bucharest 586-595 This paper describes the approach of the UniBuc team in tackling the SemEval 2024 Task 2: Safe Biomedical Natural Language Inference for Clinical Trials. We used SOLAR Instruct, without any fine-tuning, while focusing on input manipulation and tailored prompting. By customizing prompts for individual CTR sections, in both zero-shot and few-shots settings, we managed to achieve a consistency score of 0.72, ranking 14th in the leaderboard. Our thorough error analysis revealed that our model has a tendency to take shortcuts and rely on simple heuristics, especially when dealing with semantic-preserving changes. 2024.semeval-1.88 @@ -1242,7 +1242,7 @@ <fixed-case>ISDS</fixed-case>-<fixed-case>NLP</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2024 Task 10: Transformer based neural networks for emotion recognition in conversations ClaudiuCreangaUniversity of Bucharest - Liviu P.DinuUniversity of Bucharest + Liviu P.DinuUniversity of Bucharest 649-654 This paper outlines the approach of the ISDS-NLP team in the SemEval 2024 Task 10: Emotion Discovery and Reasoning its Flip in Conversation (EDiReF). For Subtask 1 we obtained a weighted F1 score of 0.43 and placed 12 in the leaderboard. We investigate two distinct approaches: Masked Language Modeling (MLM) and Causal Language Modeling (CLM). For MLM, we employ pre-trained BERT-like models in a multilingual setting, fine-tuning them with a classifier to predict emotions. Experiments with varying input lengths, classifier architectures, and fine-tuning strategies demonstrate the effectiveness of this approach. Additionally, we utilize Mistral 7B Instruct V0.2, a state-of-the-art model, applying zero-shot and few-shot prompting techniques. Our findings indicate that while Mistral shows promise, MLMs currently outperform them in sentence-level emotion classification. 2024.semeval-1.95 @@ -1503,7 +1503,7 @@ <fixed-case>AA</fixed-case>da<fixed-case>M</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2024 Task 1: Augmentation and Adaptation for Multilingual Semantic Textual Relatedness MiaoranZhangSaarland University MingyangWangBosch Center for Artificial Intelligence; LMU Munich - JesujobaAlabiSaarland University + JesujobaAlabiSaarland University DietrichKlakowSaarland University 800-810 This paper presents our system developed for the SemEval-2024 Task 1: Semantic Textual Relatedness for African and Asian Languages. The shared task aims at measuring the semantic textual relatedness between pairs of sentences, with a focus on a range of under-represented languages. In this work, we propose using machine translation for data augmentation to address the low-resource challenge of limited training data. Moreover, we apply task-adaptive pre-training on unlabeled task data to bridge the gap between pre-training and task adaptation. For model training, we investigate both full fine-tuning and adapter-based tuning, and adopt the adapter framework for effective zero-shot cross-lingual transfer. We achieve competitive results in the shared task: our system performs the best among all ranked teams in both subtask A (supervised learning) and subtask C (cross-lingual transfer). @@ -1887,7 +1887,7 @@ <fixed-case>SEME</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2024 Task 2: Comparing Masked and Generative Language Models on Natural Language Inference for Clinical Trials MathildeAguiarUniversité Paris-Saclay, CNRS, Laboratoire Interdisciplinaire des Sciences du Numérique, 91400, Orsay, France - PierreZweigenbaumLISN, CNRS, Université Paris-Saclay + PierreZweigenbaumLISN, CNRS, Université Paris-Saclay NonaNaderiUniversité Paris-Saclay 986-996 This paper describes our submission to Task 2 of SemEval-2024: Safe Biomedical Natural Language Inference for Clinical Trials. The Multi-evidence Natural Language Inference for Clinical Trial Data (NLI4CT) consists of a Textual Entailment (TE) task focused on the evaluation of the consistency and faithfulness of Natural Language Inference (NLI) models applied to Clinical Trial Reports (CTR). We test 2 distinct approaches, one based on finetuning and ensembling Masked Language Models and the other based on prompting Large Language Models using templates, in particular, using Chain-Of-Thought and Contrastive Chain-Of-Thought. Prompting Flan-T5-large in a 2-shot setting leads to our best system that achieves 0.57 F1 score, 0.64 Faithfulness, and 0.56 Consistency. @@ -1955,7 +1955,7 @@ Srikar KashyapPulipakaIndiana University Bloomington ShrirangMhalgiIndiana University Bloomington JosephLarsonIndiana University - SandraKüblerIndiana University + SandraKüblerIndiana University 1026-1031 Since Large Language Models have reached a stage where it is becoming more and more difficult to distinguish between human and machine written text, there is an increasing need for automated systems to distinguish between them. As part of SemEval Task 8, Subtask A: Binary Human-Written vs. Machine-Generated Text Classification, we explore a variety of machine learning classifiers, from traditional statistical methods, such as Naïve Bayes and Decision Trees, to fine-tuned transformer models, suchas RoBERTa and ALBERT. Our findings show that using a fine-tuned RoBERTa model with optimizedhyperparameters yields the best accuracy. However, the improvement does not translate to the test set because of the differences in distribution in the development and test sets. 2024.semeval-1.148 @@ -1996,7 +1996,7 @@ AmirmasoudIravaniFerdowsi University of Mashhad HadiAlizadehIran Broadcasting University ZeinabTaghaviMSc student - HosseinSametiSharif University of Technology + HosseinSametiSharif University of Technology 1043-1052 This paper explores semantic textual relatedness (STR) using fine-tuning techniques on the RoBERTa transformer model, focusing on sentence-level STR within Track A (Supervised). The study evaluates the effectiveness of this approach across different languages, with promising results in English and Spanish but encountering challenges in Arabic. 2024.semeval-1.151 @@ -2121,7 +2121,7 @@ AndricValdezUNAM FernandoMárquezIIMAS - UNAM JorgePantaleónIIMAS - UNAM - HelenaGómezIIMAS - UNAM + HelenaGómezIIMAS - UNAM GemmaBel-enguixInstituto de Ingeniería - UNAM 1110-1114 Large language models (LLMs) are artificial intelligence systems that can generate text, translate languages, and answer questions in a human-like way. While these advances are impressive, there is concern that LLMs could also be used to generate fake or misleading content. In this work, as a part of our participation in SemEval-2024 Task-8, we investigate the ability of LLMs to identify whether a given text was written by a human or by a specific AI. We believe that human and machine writing style patterns are different from each other, so integrating features at different language levels can help in this classification task. For this reason, we evaluate several LLMs that aim to extract valuable multilevel information (such as lexical, semantic, and syntactic) from the text in their training processing. Our best scores on Sub- taskA (monolingual) and SubtaskB were 71.5% and 38.2% in accuracy, respectively (both using the ConvBERT LLM); for both subtasks, the baseline (RoBERTa) achieved an accuracy of 74%. @@ -2204,7 +2204,7 @@ RezaFarniaSharif University of Technology AmirrezaTarabkhahAmirkabir University of Technology Zeinab SadatTaghaviSharif University of Technology - HosseinSametiSharif University of Technology + HosseinSametiSharif University of Technology 1148-1154 Language models, particularly generative models, are susceptible to hallucinations, generating outputs that contradict factual knowledgeor the source text. This study explores methodsfor detecting hallucinations in three SemEval2024 Task 6 tasks: Machine Translation, Definition Modeling, and Paraphrase Generation.We evaluate two methods: semantic similaritybetween the generated text and factual references, and an ensemble of language modelsthat judge each other’s outputs. Our resultsshow that semantic similarity achieves moderate accuracy and correlation scores in trial data,while the ensemble method offers insights intothe complexities of hallucination detection butfalls short of expectations. This work highlights the challenges of hallucination detectionand underscores the need for further researchin this critical area. 2024.semeval-1.167 @@ -2265,7 +2265,7 @@ <fixed-case>EURECOM</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2024 Task 4: Hierarchical Loss and Model Ensembling in Detecting Persuasion Techniques YouriPeskineEURECOM - RaphaelTroncyEURECOM + RaphaelTroncyEURECOM PaoloPapottiEURECOM 1177-1182 This paper describes the submission of team EURECOM at SemEval-2024 Task 4: Multilingual Detection of Persuasion Techniques in Memes. We only tackled the first sub-task, consisting of detecting 20 named persuasion techniques in the textual content of memes. We trained multiple BERT-based models (BERT, RoBERTa, BERT pre-trained on harmful detection) using different losses (Cross Entropy, Binary Cross Entropy, Focal Loss and a custom-made hierarchical loss). The best results were obtained by leveraging the hierarchical nature of the data, by outputting ancestor classes and with a hierarchical loss. Our final submission consist of an ensembling of our top-3 best models for each persuasion techniques. We obtain hierarchical F1 scores of 0.655 (English), 0.345 (Bulgarian), 0.442 (North Macedonian) and 0.178 (Arabic) on the test set. @@ -2308,7 +2308,7 @@ Suyash VardhanMathurIIIT Hyderabad AkshettJindalInternational Institute of Information Technology, Hyderabad HardikMittalInternational Institute of Information Technology Hyderabad - ManishShrivastavaInternational Institute of Information Technology Hyderabad + ManishShrivastavaInternational Institute of Information Technology Hyderabad 1204-1211 Conversation is the most natural form of human communication, where each utterance can range over a variety of possible emotions. While significant work has been done towards the detection of emotions in text, relatively little work has been done towards finding the cause of the said emotions, especially in multimodal settings. SemEval 2024 introduces the task of Multimodal Emotion Cause Analysis in Conversations, which aims to extract emotions reflected in individual utterances in a conversation involving multiple modalities (textual, audio, and visual modalities) along with the corresponding utterances that were the cause for the emotion. In this paper, we propose models that tackle this task as an utterance labeling and a sequence labeling problem and perform a comparative study of these models, involving baselines using different encoders, using BiLSTM for adding contextual information of the conversation, and finally adding a CRF layer to try to model the inter-dependencies between adjacent utterances more effectively. In the official leaderboard for the task, our architecture was ranked 8th, achieving an F1-score of 0.1759 on the leaderboard. 2024.semeval-1.175 @@ -2321,7 +2321,7 @@ <fixed-case>D</fixed-case>a<fixed-case>V</fixed-case>inci at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2024 Task 9: Few-shot prompting <fixed-case>GPT</fixed-case>-3.5 for Unconventional Reasoning Suyash VardhanMathurIIIT Hyderabad AkshettJindalInternational Institute of Information Technology, Hyderabad - ManishShrivastavaInternational Institute of Information Technology Hyderabad + ManishShrivastavaInternational Institute of Information Technology Hyderabad 1212-1216 While significant work has been done in the field of NLP on vertical thinking, which involves primarily logical thinking, little work has been done towards lateral thinking, which involves looking at problems from an unconventional perspective defying existing conceptions and notions. Towards this direction, SemEval 2024 introduces the task of BRAINTEASER, which involves two types of questions – Sentence Puzzle and Word Puzzle that defy conventional common-sense reasoning and constraints. In this paper, we tackle both the questions using few-shot prompting on GPT-3.5 and gain insights regarding the difference in the nature of the two types of questions. Our prompting strategy placed us 26th on the leaderboard for the Sentence Puzzle and 15th on the Word Puzzle task. 2024.semeval-1.176 @@ -2388,7 +2388,7 @@ <fixed-case>F</fixed-case>t<fixed-case>G</fixed-case>-<fixed-case>C</fixed-case>o<fixed-case>T</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2024 Task 9: Solving Sentence Puzzles Using Fine-Tuned Language Models and Zero-Shot <fixed-case>C</fixed-case>o<fixed-case>T</fixed-case> Prompting MicahZhangUniversity of Colorado Boulder Shafiuddin RehanAhmedUniversity of Colorado Boulder - James H.MartinUniversity of Colorado Boulder + James H.MartinUniversity of Colorado Boulder 1245-1251 Recent large language models (LLMs) can solve puzzles that require creativity and lateral thinking. To advance this front of research, we tackle SemEval-2024 Task 9: BRAINTEASER: A Novel Task Defying Common Sense. We approach this task by introducing a technique that we call Fine-tuned Generated Chain-of-Thought (FtG-CoT). It is a novel few-shot prompting method that combines a fine-tuned BERT classifier encoder with zero-shot chain-of-thought generation and a fine-tuned LLM. The fine-tuned BERT classifier provides a context-rich encoding of each example question and choice list. Zero-shot chain-of-thought generation leverages the benefits of chain-of-thought prompting without requiring manual creation of the reasoning chains. We fine-tune the LLM on the generated chains-of-thought and include a set of generated reasoning chains in the final few-shot LLM prompt to maximize the relevance and correctness of the final generated response. In this paper, we show that FtG-CoT outperforms the zero-shot prompting baseline presented in the task paper and is highly effective at solving challenging sentence puzzles achieving a perfect score on the practice set and a 0.9 score on the evaluation set. 2024.semeval-1.181 @@ -2506,7 +2506,7 @@ XinZouDalian University of Technology JunlongWangDalian University of Technology PengChenDalian University of Technology - JianWangDalian University of Technology + JianWangDalian University of Technology LiangYangDalian University of Technology HongfeiLinDalian University of Technology 1315-1321 @@ -2521,7 +2521,7 @@ <fixed-case>H</fixed-case>a<fixed-case>RM</fixed-case>o<fixed-case>NEE</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2024 Task 6: Tuning-based Approaches to Hallucination Recognition TimothyObisoBrandeis University JingxuanTuBrandeis University - JamesPustejovskyBrandeis University + JamesPustejovskyBrandeis University 1322-1331 This paper presents the Hallucination Recognition Model for New Experiment Evaluation (HaRMoNEE) team’s winning (#1) and #10 submissions for SemEval-2024 Task 6: Shared- task on Hallucinations and Related Observable Overgeneration Mistakes (SHROOM)’s two subtasks. This task challenged its participants to design systems to detect hallucinations in Large Language Model (LLM) outputs. Team HaRMoNEE proposes two architectures: (1) fine-tuning an off-the-shelf transformer-based model and (2) prompt tuning large-scale Large Language Models (LLMs). One submission from the fine-tuning approach outperformed all other submissions for the model-aware subtask; one submission from the prompt-tuning approach is the 10th-best submission on the leaderboard for the model-agnostic subtask. Our systems also include pre-processing, system-specific tuning, post-processing, and evaluation. 2024.semeval-1.191 @@ -2591,7 +2591,7 @@ <fixed-case>M</fixed-case>ason<fixed-case>T</fixed-case>igers at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2024 Task 9: Solving Puzzles with an Ensemble of Chain-of-Thought Prompts - NishatRaihanGeorge Mason University + NishatRaihanGeorge Mason University DhimanGoswamiGeorge Mason University Al NahianBin EmranGeorge Mason University Sadiya Sayara ChowdhuryPuspoGeorge Mason University @@ -2609,11 +2609,11 @@ <fixed-case>M</fixed-case>ason<fixed-case>T</fixed-case>igers at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2024 Task 8: Performance Analysis of Transformer-based Models on Machine-Generated Text Detection Sadiya Sayara ChowdhuryPuspoGeorge Mason University - NishatRaihanGeorge Mason University + NishatRaihanGeorge Mason University DhimanGoswamiGeorge Mason University Al NahianBin EmranGeorge Mason University AmritaGangulyGeorge Mason University - ÖzlemUzunerGeorge Mason University + ÖzlemUzunerGeorge Mason University 1364-1372 This paper presents the MasonTigers entryto the SemEval-2024 Task 8 - Multigenerator, Multidomain, and Multilingual BlackBox Machine-Generated Text Detection. Thetask encompasses Binary Human-Written vs.Machine-Generated Text Classification (TrackA), Multi-Way Machine-Generated Text Classification (Track B), and Human-Machine MixedText Detection (Track C). Our best performing approaches utilize mainly the ensemble ofdiscriminator transformer models along withsentence transformer and statistical machinelearning approaches in specific cases. Moreover, Zero shot prompting and fine-tuning ofFLAN-T5 are used for Track A and B. 2024.semeval-1.197 @@ -2639,7 +2639,7 @@ <fixed-case>M</fixed-case>ason<fixed-case>T</fixed-case>igers at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2024 Task 1: An Ensemble Approach for Semantic Textual Relatedness DhimanGoswamiGeorge Mason University Sadiya Sayara ChowdhuryPuspoGeorge Mason University - NishatRaihanGeorge Mason University + NishatRaihanGeorge Mason University Al NahianBin EmranGeorge Mason University AmritaGangulyGeorge Mason University MarcosZampieriGeorge Mason University @@ -2702,7 +2702,7 @@ Pauk at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2024 Task 4: A Neuro-Symbolic Method for Consistent Classification of Propaganda Techniques in Memes MattPaukUniversity of Colorado - Boulder - Maria LeonorPachecoUniversity of Colorado Boulder / Microsoft Research + Maria LeonorPachecoUniversity of Colorado Boulder / Microsoft Research 1424-1434 Memes play a key role in most modern informa-tion campaigns, particularly propaganda cam-paigns. Identifying the persuasive techniquespresent in memes is an important step in de-veloping systems to recognize and curtail pro-paganda. This work presents a framework toidentify the persuasive techniques present inmemes for the SemEval 2024 Task 4, accordingto a hierarchical taxonomy of propaganda tech-niques. The framework involves a knowledgedistillation method, where the base model is acombination of DeBERTa and ResNET usedto classify the text and image, and the teachermodel consists of a group of weakly enforcedlogic rules that promote the hierarchy of per-suasion techniques. The addition of the logicrule layer for knowledge distillation shows im-provement in respecting the hierarchy of thetaxonomy with a slight boost in performance. 2024.semeval-1.204 @@ -2752,7 +2752,7 @@ Compos Mentis at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val2024 Task6: A Multi-Faceted Role-based Large Language Model Ensemble to Detect Hallucination SouvikDasUniversity at Buffalo - RohiniSrihariUniversity at Buffalo, SUNY + RohiniSrihariUniversity at Buffalo, SUNY 1449-1454 Hallucinations in large language models (LLMs), where they generate fluent but factually incorrect outputs, pose challenges for applications requiring strict truthfulness. This work proposes a multi-faceted approach to detect such hallucinations across various language tasks. We leverage automatic data annotation using a proprietary LLM, fine-tuning of the Mistral-7B-instruct-v0.2 model on annotated and benchmark data, role-based and rationale-based prompting strategies, and an ensemble method combining different model outputs through majority voting. This comprehensive framework aims to improve the robustness and reliability of hallucination detection for LLM generations. 2024.semeval-1.208 @@ -3048,7 +3048,7 @@ Archimedes-<fixed-case>AUEB</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2024 Task 5: <fixed-case>LLM</fixed-case> explains Civil Procedure OdysseasChlapanisDepartment of Informatics, Athens University of Economics and Business & Archimedes Unit, Athena Research Center IonAndroutsopoulosDepartment of Informatics, Athens University of Economics and Business & Archimedes Unit, Athena Research Center - DimitriosGalanisInstitute for Language and Speech Processing, Athena Research Center & Archimedes Unit, Athena Research Center + DimitriosGalanisInstitute for Language and Speech Processing, Athena Research Center & Archimedes Unit, Athena Research Center 1607-1622 The SemEval task on Argument Reasoning in Civil Procedure is challenging in that it requires understanding legal concepts and inferring complex arguments. Currently, most Large Language Models (LLM) excelling in the legal realm are principally purposed for classification tasks, hence their reasoning rationale is subject to contention. The approach we advocate involves using a powerful teacher-LLM (ChatGPT) to extend the training dataset with explanations and generate synthetic data. The resulting data are then leveraged to fine-tune a small student-LLM. Contrary to previous work, our explanations are not directly derived from the teacher’s internal knowledge. Instead they are grounded in authentic human analyses, therefore delivering a superior reasoning signal. Additionally, a new ‘mutation’ method generates artificial data instances inspired from existing ones. We are publicly releasing the explanations as an extension to the original dataset, along with the synthetic dataset and the prompts that were used to generate both. Our system ranked 15th in the SemEval competition. It outperforms its own teacher and can produce explanations aligned with the original human analyses, as verified by legal experts. 2024.semeval-1.229 @@ -3076,7 +3076,7 @@ JainitBafnaThe International Institute of Information Technology - Hyderabad HardikMittalInternational Institute of Information Technology Hyderabad SuyashSethiaThe International Institute of Information Technology - Hyderabad - ManishShrivastavaInternational Institute of Information Technology Hyderabad + ManishShrivastavaInternational Institute of Information Technology Hyderabad RadhikaMamidiLanguage Technologies Research Centre, IIIT Hyderabad 1627-1633 Large Language Models (LLMs) have showcased impressive abilities in generating fluent responses to diverse user queries. However, concerns regarding the potential misuse ofsuch texts in journalism, educational, and academic contexts have surfaced. SemEval 2024introduces the task of Multigenerator, Multidomain, and Multilingual Black-Box MachineGenerated Text Detection, aiming to developautomated systems for identifying machinegenerated text and detecting potential misuse. In this paper, we i) propose a RoBERTaBiLSTM based classifier designed to classifytext into two categories: AI-generated or human ii) conduct a comparative study of ourmodel with baseline approaches to evaluate itseffectiveness. This paper contributes to the advancement of automatic text detection systemsin addressing the challenges posed by machinegenerated text misuse. Our architecture ranked46th on the official leaderboard with an accuracy of 80.83 among 125. @@ -3223,7 +3223,7 @@ Maha Bhaashya at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2024 Task 6: Zero-Shot Multi-task Hallucination Detection PatanjaliBhamidipatiInternational Institute of Information Technology Hyderabad AdvaithMalladiInternational Institute of Information Technology, Hyderabad - ManishShrivastavaInternational Institute of Information Technology Hyderabad + ManishShrivastavaInternational Institute of Information Technology Hyderabad RadhikaMamidiLanguage Technologies Research Centre, IIIT Hyderabad 1685-1689 In recent studies, the extensive utilization oflarge language models has underscored the importance of robust evaluation methodologiesfor assessing text generation quality and relevance to specific tasks. This has revealeda prevalent issue known as hallucination, anemergent condition in the model where generated text lacks faithfulness to the source anddeviates from the evaluation criteria. In thisstudy, we formally define hallucination and propose a framework for its quantitative detectionin a zero-shot setting, leveraging our definitionand the assumption that model outputs entailtask and sample specific inputs. In detectinghallucinations, our solution achieves an accuracy of 0.78 in a model-aware setting and 0.61in a model-agnostic setting. Notably, our solution maintains computational efficiency, requiring far less computational resources than other SOTA approaches, aligning with the trendtowards lightweight and compressed models. @@ -3387,7 +3387,7 @@ <fixed-case>HIT</fixed-case>-<fixed-case>MI</fixed-case>&<fixed-case>T</fixed-case> Lab at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2024 Task 6: <fixed-case>D</fixed-case>e<fixed-case>BERT</fixed-case>a-based Entailment Model is a Reliable Hallucination Detector - WeiLiuHarbin Institute of Technology + WeiLiuHarbin Institute of Technology WanyaoShiNorthwest Normal University ZijianZhangHarbin Institute of Technology HuiHuangHarbin Institute of Technology @@ -3485,7 +3485,7 @@ <fixed-case>M</fixed-case>ai<fixed-case>NLP</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2024 Task 1: Analyzing Source Language Selection in Cross-Lingual Textual Relatedness ShijiaZhouLudwig Maximilian University of Munich HuangyanShanLMU Munich - BarbaraPlankLMU Munich + BarbaraPlankLMU Munich RobertLitschkoLMU Munich 1842-1853 This paper presents our system developed for the SemEval-2024 Task 1: Semantic Textual Relatedness (STR), on Track C: Cross-lingual. The task aims to detect semantic relatedness of two sentences from the same languages. For cross-lingual approach we developed a set of linguistics-inspired models trained with several task-specific strategies. We 1) utilize language vectors for selection of donor languages; 2) investigate the multi-source approach for training; 3) use transliteration of non-latin script to study impact of “script gap”; 4) opt machine translation for data augmentation. We additionally compare the performance of XLM-RoBERTa and Furina with the same training strategy. Our submission achieved the first place in the C8 (Kinyarwanda) test. @@ -3564,7 +3564,7 @@ GiwonHongKAIST School of Computing PasqualeMinerviniUCL LukeDainesUsher Institute, University of Edinburgh - BeatriceAlexUniversity of Edinburgh, Edinburgh Futures Institute, School of Literatures, Languages and Cultures, School of Informatics + BeatriceAlexUniversity of Edinburgh, Edinburgh Futures Institute, School of Literatures, Languages and Cultures, School of Informatics 1894-1904 The NLI4CT task assesses Natural Language Inference systems in predicting whether hypotheses entail or contradict evidence from Clinical Trial Reports. In this study, we evaluate various Large Language Models (LLMs) with multiple strategies, including Chain-of-Thought, In-Context Learning, and Parameter-Efficient Fine-Tuning (PEFT). We propose a PEFT method to improve the consistency of LLMs by merging adapters that were fine-tuned separately using triplet and language modelling objectives. We found that merging the two PEFT adapters improves the F1 score (+0.0346) and consistency (+0.152) of the LLMs. However, our novel methods did not produce more accurate results than GPT-4 in terms of faithfulness and consistency. Averaging the three metrics, GPT-4 ranks joint-first in the competition with 0.8328. Finally, our contamination analysis with GPT-4 indicates that there was no test data leakage. Our code is available at https://github.com/EdinburghClinicalNLP/semeval_nli4ct. 2024.semeval-1.265 @@ -3644,7 +3644,7 @@ <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2024 Task 2: Safe Biomedical Natural Language Inference for Clinical Trials MaelJullienuniversity of Manchester MarcoValentinoIdiap Research Institute - AndréFreitasUniversity of Manchester + AndréFreitasUniversity of Manchester 1947-1962 Large Language Models (LLMs) are at the forefront of NLP achievements but fall short in dealing with shortcut learning, factual inconsistency, and vulnerability to adversarial inputs. These shortcomings are especially critical in medical contexts, where they can misrepresent actual model capabilities. Addressing this, we present SemEval-2024 Task 2: Safe Biomedical Natural Language Inference for Clinical Trials. Our contributions include the refined NLI4CT-P dataset (i.e. Natural Language Inference for Clinical Trials - Perturbed), designed to challenge LLMs with interventional and causal reasoning tasks, along with a comprehensive evaluation of methods and results for participant submissions. A total of 106 participants registered for the task contributing to over 1200 individual submissions and 25 system overview papers. This initiative aims to advance the robustness and applicability of NLI models in healthcare, ensuring safer and more dependable AI assistance in clinical decision-making. We anticipate that the dataset, models, and outcomes of this task can support future research in the field of biomedical NLI. The dataset, competition leaderboard, and website are publicly available. 2024.semeval-1.271 @@ -3666,12 +3666,12 @@ MeriemBeloucifUppsala University ChristineDe KockUniversity of Melbourne OumaimaHourrane - ManishShrivastava + ManishShrivastava ThamarSolorio NirmalSurange KrishnapriyaVishnubhotla Seid MuhieYimam - Saif M.Mohammad + Saif M.Mohammad 1963-1978 We present the first shared task on Semantic Textual Relatedness (STR). While earlier shared tasks primarily focused on semantic similarity, we instead investigate the broader phenomenon of semantic relatedness across 14 languages: Afrikaans, Algerian Arabic, Amharic, English, Hausa, Hindi, Indonesian, Kinyarwanda, Marathi, Moroccan Arabic, Modern Standard Arabic, Punjabi, Spanish, and Telugu. These languages originate from five distinct language families and are predominantly spoken in Africa and Asia – regions characterised by the relatively limited availability of NLP resources. Each instance in the datasets is a sentence pair associated with a score that represents the degree of semantic textual relatedness between the two sentences. Participating systems were asked to rank sentence pairs by their closeness in meaning (i.e., their degree of semantic relatedness) in the 14 languages in three main tracks: (a) supervised, (b) unsupervised, and (c) crosslingual. The task attracted 163 participants. We received 70 submissions in total (across all tasks) from 51 different teams, and 38 system description papers. We report on the best-performing systems as well as the most common and the most effective approaches for the three different tracks. 2024.semeval-1.272 @@ -3686,7 +3686,7 @@ ElaineZosaSiloGen RaulVazquezUniversity of Helsinki TeemuVahtolaUniversity of Helsinki - JörgTiedemannUniversity of Helsinki + JörgTiedemannUniversity of Helsinki VincentSegonneIRISA - Université Bretagne Sud AlessandroRaganatoUniversity of Milano-Bicocca MariannaApidianakiUniversity of Pennsylvania @@ -3719,7 +3719,7 @@ MaramHasanainQatar Computing Research Institute AbulHasnatBlackbird.ai FabrizioSilvestriSapienza, University of Rome - PreslavNakovMohamed bin Zayed University of Artificial Intelligence + PreslavNakovMohamed bin Zayed University of Artificial Intelligence GiovanniDa San MartinoUniversity of Padova 2009-2026 The automatic identification of misleading and persuasive content has emerged as a significant issue among various stakeholders, including social media platforms, policymakers, and the broader society. To tackle this issue within the context of memes, we organized a shared task at SemEval-2024, focusing on the multilingual detection of persuasion techniques. This paper outlines the dataset, the organization of the task, the evaluation framework, the outcomes, and the systems that participated. The task targets memes in four languages, with the inclusion of three surprise test datasets in Bulgarian, North Macedonian, and Arabic. It encompasses three subtasks: (i) identifying whether a meme utilizes a persuasion technique; (ii) identifying persuasion techniques within the meme’s ”textual content”; and (iii) identifying persuasion techniques across both the textual and visual components of the meme (a multimodal task). Furthermore, due to the complex nature of persuasion techniques, we present a hierarchy that groups the 22 persuasion techniques into several levels of categories. This became one of the attractive shared tasks in SemEval 2024, with 153 teams registered, 48 teams submitting results, and finally, 32 system description papers submitted. @@ -3762,7 +3762,7 @@ <fixed-case>S</fixed-case>heffield<fixed-case>V</fixed-case>era<fixed-case>AI</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2024 Task 4: Prompting and fine-tuning a Large Vision-Language Model for Binary Classification of Persuasion Techniques in Memes CharlieGrimshawUniversity of Sheffield - KalinaBontchevaUniversity of Sheffield + KalinaBontchevaUniversity of Sheffield XingyiSongUniversity of Sheffield 2051-2056 This paper describes our approach for SemEval-2024 Task 4: Multilingual Detection of Persuasion Techniques in Memes. Specifically, we concentrate on Subtask 2b, a binary classification challenge that entails categorizing memes as either “propagandistic” or “non-propagandistic”. To address this task, we utilized the large multimodal pretrained model, LLaVa. We explored various prompting strategies and fine-tuning methods, and observed that the model, when not fine-tuned but provided with a few-shot learning examples, achieved the best performance. Additionally, we enhanced the model’s multilingual capabilities by integrating a machine translation model. Our system secured the 2nd place in the Arabic language category. diff --git a/data/xml/2024.sicon.xml b/data/xml/2024.sicon.xml index 5f76bb9567..53c366754c 100644 --- a/data/xml/2024.sicon.xml +++ b/data/xml/2024.sicon.xml @@ -95,7 +95,7 @@ ZoeyLiuUniversity of Florida SangpilYoumUniversity of Florida ChathuriJayaweeraUniversity of Florida - Bonnie J.DorrUniversity of Florida + Bonnie J.DorrUniversity of Florida 102-115 The unchecked spread of digital information, combined with increasing political polarization and the tendency of individuals to isolate themselves from opposing political viewpoints opposing views, has driven researchers to develop systems for automatically detecting political bias in media. This trend has been further fueled by discussions on social media. We explore methods for categorizing bias in US news articles, comparing rule-based and deep learning approaches. The study highlights the sensitivity of modern self-learning systems to unconstrained data ingestion, while reconsidering the strengths of traditional rule-based systems. Applying both models to left-leaning (CNN) and right-leaning (FOX) News articles, we assess their effectiveness on data beyond the original training and test sets. This analysis highlights each model’s accuracy, offers a framework for exploring deep-learning explainability, and sheds light on political bias in US news media. We contrast the opaque architecture of a deep learning model with the transparency of a linguistically informed rule-based model, showing that the rule-based model performs consistently across different data conditions and offers greater transparency, whereas the deep learning model is dependent on the training set and struggles with unseen data. 2024.sicon-1.7 @@ -117,7 +117,7 @@ IanPereraFlorida Institute for Human and Machine Cognition AlexMemoryJohns Hopkins University Applied Physics Laboratory Vera A.KazakovaFlorida Institute for Human and Machine Cognition - Bonnie J.DorrUniversity of Florida + Bonnie J.DorrUniversity of Florida BrodieMatherFlorida Institute for Human and Machine Cognition RitwikBoseJohns Hopkins University Applied Physics Laboratory ArashMahyariFlorida Institute for Human and Machine Cognition diff --git a/data/xml/2024.sigdial.xml b/data/xml/2024.sigdial.xml index 08d6b218a0..e6a70b0eb9 100644 --- a/data/xml/2024.sigdial.xml +++ b/data/xml/2024.sigdial.xml @@ -8,7 +8,7 @@ StefanUltes KojiInoue ShikibMehri - DavidHowcroft + DavidHowcroft KazunoriKomatani Association for Computational Linguistics
Kyoto, Japan
@@ -59,7 +59,7 @@ Examining Gender and Power on <fixed-case>W</fixed-case>ikipedia through Face and Politeness AdilSoubki Shyne E.Choi - OwenRambow + OwenRambow 40–50 We propose a framework for analyzing discourse by combining two interdependent concepts from sociolinguistic theory: face acts and politeness. While politeness has robust existing tools and data, face acts are less resourced. We introduce a new corpus created by annotating Wikipedia talk pages with face acts and we use this to train a face act tagger. We then employ our framework to study how face and politeness interact with gender and power in discussions between Wikipedia editors. Among other findings, we observe that female Wikipedians are not only more polite, which is consistent with prior studies, but that this difference corresponds with significantly more language directed at humbling aspects of their own face. Interestingly, the distinction nearly vanishes once limiting to editors with administrative power. 2024.sigdial-1.4 @@ -88,8 +88,8 @@ NishiUppuluri RevanthGangi Reddy ShaLi - GokhanTur - DilekHakkani-Tur + GokhanTur + DilekHakkani-Tur HengJi 66–77 LLM-driven dialog systems are used in a diverse set of applications, ranging from healthcare to customer service. However, given their generalization capability, it is difficult to ensure that these chatbots stay within the boundaries of the specialized domains, potentially resulting in inaccurate information and irrelevant responses. This paper introduces an unsupervised approach for automatically inducing domain-specific dialog flows that can be used to constrain LLM-based chatbots. We introduce two variants of dialog flow based on the availability of in-domain conversation instances. Through human and automatic evaluation over 24 dialog domains, we demonstrate that our high-quality data-guided dialog flows achieve better domain coverage, thereby overcoming the need for extensive manual crafting of such flows. @@ -104,7 +104,7 @@ NehaPullabhotla NanQiang HaoranZhang - MarilynWalker + MarilynWalker Maria InesTorres 78–91 Open domain spoken dialogue systems need to controllably generate many different dialogue acts (DAs) to allow Natural Language Generation (NLG) to create interesting and engaging conversational interactions with users. We aim to create an NLG engine that can produce a variety of DAs that make substantive knowledge-grounded contributions to a conversation. Training such an NLG typically requires dialogue corpora that are labelled for DAs, which are expensive to produce and vulnerable to quality issues. Here, we present a prompt-based learning approach to transfer DAs from one domain, video games, to 7 new domains. For each novel domain, we first crawl WikiData to create Meaning Representations that systematically vary both the number of attributes and hops on the WikiData Knowledge Graph. The proposed method involves a self-training step to create prompt examples for each domain followed by an overgeneration and ranking step. The result is a novel, high-quality dataset, Wiki-Dialogue, of 71K knowledge-grounded utterances, covering 9 DAs and the Art, Movies, Music, Sports, TV, Animal, and Boardgames domains, whose combined DA and semantic accuracy is 89%. We assess the corpus quality using both automatic and human evaluations and find it high. The corpus is found to be safe, lexically rich, and large in vocabulary, when compared to similar datasets. @@ -134,7 +134,7 @@
Anticipating Follow-Up Questions in Exploratory Information Search - GrahamWilcock + GrahamWilcock 103–109 The paper describes methods for anticipating follow-up questions in exploratory information search. There are two main cases: information stored in knowledge graphs, and information in unstructured texts such as Wikipedia. In the first case, follow-up questions are anticipated by extracting subgraphs relevant to user queries, passing the subgraphs to an LLM to generate responses. In the second case, entities and their relationships are extracted from the texts and added to short-term knowledge graphs relevant to initial queries. Follow-up questions are then anticipated by extracting subgraphs relevant to subsequent queries and passing the subgraphs to the LLM, as in the first case. The short-term graphs in dialogue memory are often sufficient to answer follow-up questions. If they are not, the described steps are repeated as required. 2024.sigdial-1.9 @@ -145,7 +145,7 @@ Bridging Information Gaps in Dialogues with Grounded Exchanges Using Knowledge Graphs PhillipSchneider NektariosMachner - KristiinaJokinen + KristiinaJokinen FlorianMatthes 110–120 Knowledge models are fundamental to dialogue systems for enabling conversational interactions, which require handling domain-specific knowledge. Ensuring effective communication in information-providing conversations entails aligning user understanding with the knowledge available to the system. However, dialogue systems often face challenges arising from semantic inconsistencies in how information is expressed in natural language compared to how it is represented within the system’s internal knowledge. To address this problem, we study the potential of large language models for conversational grounding, a mechanism to bridge information gaps by establishing shared knowledge between dialogue participants. Our approach involves annotating human conversations across five knowledge domains to create a new dialogue corpus called BridgeKG. Through a series of experiments on this dataset, we empirically evaluate the capabilities of large language models in classifying grounding acts and identifying grounded information items within a knowledge graph structure. Our findings offer insights into how these models use in-context learning for conversational grounding tasks and common prediction errors, which we illustrate with examples from challenging dialogues. We discuss how the models handle knowledge graphs as a semantic layer between unstructured dialogue utterances and structured information items. @@ -158,8 +158,8 @@ E. MargaretPerkoff Angela MariaRamirez Seanvon Bayern - MarilynWalker - JamesMartin + MarilynWalker + JamesMartin 121–138 Educational dialogue systems have been used to support students and teachers for decades. Such systems rely on explicit pedagogically motivated dialogue rules. With the ease of integrating large language models (LLMs) into dialogue systems, applications have been arising that directly use model responses without the use of human-written rules, raising concerns about their use in classroom settings. Here, we explore how to constrain LLM outputs to generate appropriate and supportive teacher-like responses. We present results comparing the effectiveness of different constraint variations in a zero-shot prompting setting on a large mathematics classroom corpus. Generated outputs are evaluated with human annotation for Fluency, Relevance, Helpfulness, and Adherence to the provided constraints. Including all constraints in the prompt led to the highest values for Fluency and Helpfulness, and the second highest value for Relevance. The annotation results also demonstrate that the prompts that result in the highest adherence to constraints do not necessarily indicate higher perceived scores for Fluency, Relevance, or Helpfulness. In a direct comparison, all of the non-baseline LLM responses were ranked higher than the actual teacher responses in the corpus over 50% of the time. 2024.sigdial-1.11 @@ -233,8 +233,8 @@ AmiePaige AdilSoubki JohnMurzaku - OwenRambow - Susan E.Brennan + OwenRambow + Susan E.Brennan 204–215 Hedges allow speakers to mark utterances as provisional, whether to signal non-prototypicality or “fuzziness”, to indicate a lack of commitment to an utterance, to attribute responsibility for a statement to someone else, to invite input from a partner, or to soften critical feedback in the service of face management needs. Here we focus on hedges in an experimentally parameterized corpus of 63 Roadrunner cartoon narratives spontaneously produced from memory by 21 speakers for co-present addressees, transcribed to text (Galati and Brennan, 2010). We created a gold standard of hedges annotated by human coders (the Roadrunner-Hedge corpus) and compared three LLM-based approaches for hedge detection: fine-tuning BERT, and zero and few-shot prompting with GPT-4o and LLaMA-3. The best-performing approach was a fine-tuned BERT model, followed by few-shot GPT-4o. After an error analysis on the top performing approaches, we used an LLM-in-the-Loop approach to improve the gold standard coding, as well as to highlight cases in which hedges are ambiguous in linguistically interesting ways that will guide future research. This is the first step in our research program to train LLMs to interpret and generate collateral signals appropriately and meaningfully in conversation. 2024.sigdial-1.18 @@ -292,8 +292,8 @@ GuangzhiSun NurulLubis WenWu - ChaoZhang - MilicaGasic + ChaoZhang + MilicaGasic 259–273 Affect recognition, encompassing emotions, moods, and feelings, plays a pivotal role in human communication. In the realm of conversational artificial intelligence, the ability to discern and respond to human affective cues is a critical factor for creating engaging and empathetic interactions. This study investigates the capacity of large language models (LLMs) to recognise human affect in conversations, with a focus on both open-domain chit-chat dialogues and task-oriented dialogues. Leveraging three diverse datasets, namely IEMOCAP (Busso et al., 2008), EmoWOZ (Feng et al., 2022), and DAIC-WOZ (Gratch et al., 2014), covering a spectrum of dialogues from casual conversations to clinical interviews, we evaluate and compare LLMs’ performance in affect recognition. Our investigation explores the zero-shot and few-shot capabilities of LLMs through in-context learning as well as their model capacities through task-specific fine-tuning. Additionally, this study takes into account the potential impact of automatic speech recognition errors on LLM predictions. With this work, we aim to shed light on the extent to which LLMs can replicate human-like affect recognition capabilities in conversations. 2024.sigdial-1.23 @@ -306,7 +306,7 @@ IsabelCarvalho AnaAlves CatarinaSilva - Hugo GonçaloOliveira + Hugo GonçaloOliveira 274–288 Customer-support services increasingly rely on automation, whether fully or with human intervention. Despite optimising resources, this may result in mechanical protocols and lack of human interaction, thus reducing customer loyalty. Our goal is to enhance interpretability and provide guidance in communication through novel tools for easier analysis of message trends and sentiment variations. Monitoring these contributes to more informed decision-making, enabling proactive mitigation of potential issues, such as protocol deviations or customer dissatisfaction. We propose a generic approach for dialogue flow discovery that leverages clustering techniques to identify dialogue states, represented by related utterances. State transitions are further analyzed to detect prevailing sentiments. Hence, we discover sentiment-aware dialogue flows that offer an interpretability layer to artificial agents, even those based on black-boxes, ultimately increasing trustworthiness. Experimental results demonstrate the effectiveness of our approach across different dialogue datasets, covering both human-human and human-machine exchanges, applicable in task-oriented contexts but also to social media, highlighting its potential impact across various customer-support settings. 2024.sigdial-1.24 @@ -322,7 +322,7 @@ XinxuanQiu YanniLin MatthewPurver - MassimoPoesio + MassimoPoesio 289–296 When customers present ambiguous references, service staff typically need to clarify the customers’ specific intentions. To advance research in this area, we collected 1,000 real-world consumer dialogues with ambiguous references. This dataset will be used for subsequent studies to identify ambiguous references and generate responses. Our analysis of the dataset revealed common strategies employed by service staff, including directly asking clarification questions (CQ) and listing possible options before asking a clarification question (LCQ). However, we found that merely using CQ often fails to fully satisfy customers. In contrast, using LCQ, as well as recommending specific products after listing possible options, proved more effective in resolving ambiguous references and enhancing customer satisfaction. 2024.sigdial-1.25 @@ -345,7 +345,7 @@ Transforming Slot Schema Induction with Generative Dialogue State Inference James D.Finch BoxinZhao - Jinho D.Choi + Jinho D.Choi 317–324 The challenge of defining a slot schema to represent the state of a task-oriented dialogue system is addressed by Slot Schema Induction (SSI), which aims to automatically induce slots from unlabeled dialogue data. Whereas previous approaches induce slots by clustering value spans extracted directly from the dialogue text, we demonstrate the power of discovering slots using a generative approach. By training a model to generate slot names and values that summarize key dialogue information with no prior task knowledge, our SSI method discovers high-quality candidate information for representing dialogue state. These discovered slot-value candidates can be easily clustered into unified slot schemas that align well with human-authored schemas. Experimental comparisons on the MultiWOZ and SGD datasets demonstrate that Generative Dialogue State Inference (GenDSI) outperforms the previous state-of-the-art on multiple aspects of the SSI task. 2024.sigdial-1.27 @@ -376,7 +376,7 @@ Enhancing Dialogue Speech Recognition with Robust Contextual Awareness via Noise Representation Learning WonjunLee SanKim - Gary GeunbaeLee + Gary GeunbaeLee 333–343 Recent dialogue systems typically operate through turn-based spoken interactions between users and agents. These systems heavily depend on accurate Automatic Speech Recognition (ASR), as transcription errors can significantly degrade performance in downstream dialogue tasks. To alleviate this challenge, robust ASR is required, and one effective method is to utilize the dialogue context from user and agent interactions for transcribing the subsequent user utterance. This method incorporates the transcription of the user’s speech and the agent’s response as model input, using the accumulated context generated by each turn. However, this context is susceptible to ASR errors because the ASR model generates it auto-regressively. Such noisy context can further degrade the benefits of context input, resulting in suboptimal ASR performance. In this paper, we introduce context noise representation learning to enhance robustness against noisy context, ultimately improving dialogue speech recognition accuracy. To maximize the advantage of context awareness, our approach involves decoder pre-training with text-based dialogue data and noise representation learning for a context encoder. Evaluated on DSTC11 (MultiWoZ 2.1 audio dialogues), it achieves a 24% relative reduction in Word Error Rate (WER) compared to wav2vec2.0 baselines and a 13% reduction compared to Whisper-large-v2. Notably, in noisy environments where user speech is barely audible, our method proves its effectiveness by utilizing contextual information for accurate transcription. Tested on audio data with strong noise level (Signal Noise Ratio of 0dB), our approach shows up to a 31% relative WER reduction compared to the wav2vec2.0 baseline, providing a reassuring solution for real-world noisy scenarios. 2024.sigdial-1.30 @@ -385,14 +385,14 @@ Local Topology Measures of Contextual Language Model Latent Spaces with Applications to Dialogue Term Extraction - Benjamin MatthiasRuppik + Benjamin MatthiasRuppik MichaelHeck Carelvan Niekerk RenatoVukovic Hsien-chinLin ShutongFeng MarcusZibrowius - MilicaGasic + MilicaGasic 344–356 A common approach for sequence tagging tasks based on contextual word representations is to train a machine learning classifier directly on these embedding vectors. This approach has two shortcomings. First, such methods consider single input sequences in isolation and are unable to put an individual embedding vector in relation to vectors outside the current local context of use. Second, the high performance of these models relies on fine-tuning the embedding model in conjunction with the classifier, which may not always be feasible due to the size or inaccessibility of the underlying feature-generation model. It is thus desirable, given a collection of embedding vectors of a corpus, i.e. a datastore, to find features of each vector that describe its relation to other, similar vectors in the datastore. With this in mind, we introduce complexity measures of the local topology of the latent space of a contextual language model with respect to a given datastore. The effectiveness of our features is demonstrated through their application to dialogue term extraction. Our work continues a line of research that explores the manifold hypothesis for word embeddings, demonstrating that local structure in the space carved out by word embeddings can be exploited to infer semantic properties. 2024.sigdial-1.31 @@ -415,10 +415,10 @@ RenatoVukovic DavidArps Carelvan Niekerk - Benjamin MatthiasRuppik + Benjamin MatthiasRuppik Hsien-chinLin MichaelHeck - MilicaGasic + MilicaGasic 370–384 State-of-the-art task-oriented dialogue systems typically rely on task-specific ontologies for fulfilling user queries. The majority of task-oriented dialogue data, such as customer service recordings, comes without ontology and annotation. Such ontologies are normally built manually, limiting the application of specialised systems. Dialogue ontology construction is an approach for automating that process and typically consists of two steps: term extraction and relation extraction. In this work, we focus on relation extraction in a transfer learning set-up. To improve the generalisation, we propose an extension to the decoding mechanism of large language models. We adapt Chain-of-Thought (CoT) decoding, recently developed for reasoning problems, to generative relation extraction. Here, we generate multiple branches in the decoding space and select the relations based on a confidence threshold. By constraining the decoding to ontology terms and relations, we aim to decrease the risk of hallucination. We conduct extensive experimentation on two widely used datasets and find improvements in performance on target ontology for source fine-tuned and one-shot prompted large language models. 2024.sigdial-1.33 @@ -460,7 +460,7 @@ HaolanZhan SameenMaruf IngridZukerman - GholamrezaHaffari + GholamrezaHaffari 420–427 Building a dialogue agent that can seamlessly interact with humans in multi-modal regimes, requires two fundamental abilities: (1) understanding emotion and dialogue acts within situated user scenarios, and (2) grounding perceived visual cues to dialogue contexts. However, recent works have uncovered shortcomings of existing dialogue agents in understanding emotions and dialogue acts, and in ground- ing visual cues effectively. In this work, we investigate whether additional dialogue data with only visual descriptions can help dialogue agents effectively align visual and textual features, and enhance the ability of dialogue agents to ground perceived visual cues to dialogue contexts. To this end, in the absence of a suitable dataset, we propose a synthetic visual description generation pipeline, and con- tribute a large-scale synthetic visual description dataset. In addition, we propose a general training procedure for effectively leveraging these synthetic data. We conduct comprehensive analyses to evaluate the impact of synthetic data on two benchmarks: MELD and IEMOCAP. Our findings suggest that synthetic visual descriptions can serve as an effective way to enhance a dialogue agents’ grounding ability, and that the training scheme affects the extent to which these descriptions improve the agent’s performance. 2024.sigdial-1.36 @@ -480,7 +480,7 @@ Conversational Feedback in Scripted versus Spontaneous Dialogues: A Comparative Analysis IldikoPilan - LaurentPrévot + LaurentPrévot HendrikBuschmeier PierreLison 440–457 @@ -498,7 +498,7 @@ SeungpilWon JanghoonHan Stanley JungkyuChoi - JungyunSeo + JungyunSeo 458–465 In task-oriented dialogue systems, intent classification is crucial for accurately understanding user queries and providing appropriate services. This study explores the use of intent descriptions with large language models for unseen domain intent classification. By examining the effects of description quality, quantity, and input length management, we identify practical guidelines for optimizing performance. Our experiments using FLAN-T5 3B demonstrate that 1) high-quality descriptions for both training and testing significantly improve accuracy, 2) diversity in training descriptions doesn’t greatly affect performance, and 3) off-the-shelf rankers selecting around ten intent options reduce input length without compromising performance. We emphasize that high-quality testing descriptions have a greater impact on accuracy than training descriptions. These findings provide practical guidelines for using intent descriptions with large language models to achieve effective and efficient intent classification in low-resource settings. 2024.sigdial-1.39 @@ -545,7 +545,7 @@ Optimizing Code-Switching in Conversational Tutoring Systems: A Pedagogical Framework and Evaluation ZhengyuanLiu Stella XinYin - NancyChen + NancyChen 500–515 Large language models demonstrate remarkable proficiency in various tasks across multiple languages. However, their potential in code-switching remains underexplored, particularly in cultural and educational contexts. Code-switching or translanguaging plays a crucial role in bilingual education, facilitating comprehension and engagement among students with varied linguistic proficiencies. In this work, we present a pedagogy-inspired framework that introduces traditional classroom practices of code-switching to intelligent tutoring systems. Specifically, we develop fine-grained instructional strategies tailored to multilingual and educational needs. We conduct experiments involving both LLM-based evaluation and expert analysis to assess the effectiveness of translanguaging in tutoring dialogues. Our experimental results indicate that strategic code-switching can significantly enhance the learning experience. This work not only advances dialogic tutors in language learning, but also extends LLMs to better accommodate multilingual interaction. 2024.sigdial-1.43 @@ -556,7 +556,7 @@ <fixed-case>EC</fixed-case>oh: Turn-level Coherence Evaluation for Multilingual Dialogues JohnMendonca IsabelTrancoso - AlonLavie + AlonLavie 516–532 Despite being heralded as the new standard for dialogue evaluation, the closed-source nature of GPT-4 poses challenges for the community. Motivated by the need for lightweight, open source, and multilingual dialogue evaluators, this paper introduces GenResCoh (Generated Responses targeting Coherence). GenResCoh is a novel LLM generated dataset comprising over 130k negative and positive responses and accompanying explanations seeded from XDailyDialog and XPersona covering English, French, German, Italian, and Chinese. Leveraging GenResCoh, we propose ECoh (Evaluation of Coherence), a family of evaluators trained to assess response coherence across multiple languages. Experimental results demonstrate that ECoh achieves multilingual detection capabilities superior to the teacher model (GPT-3.5-Turbo) on GenResCoh, despite being based on a much smaller architecture. Furthermore, the explanations provided by ECoh closely align in terms of quality with those generated by the teacher model. 2024.sigdial-1.44 @@ -570,7 +570,7 @@ YejinJeon JungseulOk YunsuKim - Gary GeunbaeLee + Gary GeunbaeLee 533–543 Research on hate speech has predominantly revolved around the detection and interpretation from textual inputs, leaving verbal content largely unexplored. Moreover, while there has been some limited exploration into hate speech detection within verbal acoustic speech inputs, the aspect of interpretability has been overlooked. As such, we introduce a new task within the audio hate speech detection task domain - we specifically aim to identify specific time frames of hate speech within audio utterances. Towards this, we propose two different approaches, cascading and End-to-End (E2E). The first cascading approach initially converts audio to transcripts, identifies hate speech within these transcripts, and subsequently locates the corresponding audio time frames. Conversely, the second E2E approach processes audio utterances directly, which allows it to pinpoint hate speech within specific time frames. Moreover, due to the lack of explainable audio hate speech datasets that include frame-level rationales, we curated a synthetic audio dataset to train our models. We further validate these models on actual human speech utterances and we find that the E2E approach outperforms the cascading method in terms of audio frame Intersection over Union (IoU) metric. Furthermore, we observe that the inclusion of frame-level rationales significantly enhances hate speech detection accuracy for both E2E and cascading approaches. 2024.sigdial-1.45 @@ -602,7 +602,7 @@ <fixed-case>B</fixed-case>o<fixed-case>K</fixed-case>: Introducing Bag-of-Keywords Loss for Interpretable Dialogue Response Generation SuvodipDey - Maunendra SankarDesarkar + Maunendra SankarDesarkar 566–578 The standard language modeling (LM) loss by itself has been shown to be inadequate for effective dialogue modeling. As a result, various training approaches, such as auxiliary loss functions and leveraging human feedback, are being adopted to enrich open-domain dialogue systems. One such auxiliary loss function is Bag-of-Words (BoW) loss, defined as the cross-entropy loss for predicting all the words/tokens of the next utterance. In this work, we propose a novel auxiliary loss named Bag-of-Keywords (BoK) loss to capture the central thought of the response through keyword prediction and leverage it to enhance the generation of meaningful and interpretable responses in open-domain dialogue systems. BoK loss upgrades the BoW loss by predicting only the keywords or critical words/tokens of the next utterance, intending to estimate the core idea rather than the entire response. We incorporate BoK loss in both encoder-decoder (T5) and decoder-only (DialoGPT) architecture and train the models to minimize the weighted sum of BoK and LM (BoK-LM) loss. We perform our experiments on two popular open-domain dialogue datasets, DailyDialog and Persona-Chat. We show that the inclusion of BoK loss improves the dialogue generation of backbone models while also enabling post-hoc interpretability. We also study the effectiveness of BoK-LM loss as a reference-free metric and observe comparable performance to the state-of-the-art metrics on various dialogue evaluation datasets. 2024.sigdial-1.48 @@ -612,7 +612,7 @@ Cross-lingual Transfer and Multilingual Learning for Detecting Harmful Behaviour in <fixed-case>A</fixed-case>frican Under-Resourced Language Dialogue Tunde OluwaseyiAjayi - MihaelArcan + MihaelArcan PaulBuitelaar 579–589 Most harmful dialogue detection models are developed for high-resourced languages. Consequently, users who speak under-resourced languages cannot fully benefit from these models in terms of usage, development, detection and mitigation of harmful dialogue utterances. Our work aims at detecting harmful utterances in under-resourced African languages. We leverage transfer learning using pretrained models trained with multilingual embeddings to develop a cross-lingual model capable of detecting harmful content across various African languages. We first fine-tune a harmful dialogue detection model on a selected African dialogue dataset. Additionally, we fine-tune a model on a combined dataset in some African languages to develop a multilingual harmful dialogue detection model. We then evaluate the cross-lingual model’s ability to generalise to an unseen African language by performing harmful dialogue detection in an under-resourced language not present during pretraining or fine-tuning. We evaluate our models on the test datasets. We show that our best performing models achieve impressive results in terms of F1 score. Finally, we discuss the results and limitations of our work. @@ -623,7 +623,7 @@ A Few-shot Approach to Task-oriented Dialogue Enhanced with Chitchat ArmandStricker - PatrickParoubek + PatrickParoubek 590–602 Large language models (LLMs) tuned for chat have recently been adopted for few-shot end-to-end task-oriented dialogue (TOD), with some success. To further assess this method, we conduct experiments on two, more complex, task-oriented benchmarks that integrate elements of chitchat into the conversation. We enhance a few-shot baseline by adding zero-shot chitchat detection and implementing function calling for dialogue state tracking (DST). We focus on this step in the task-oriented pipeline as it comes first, and errors due to added chitchat at this stage have the most impact on end-to-end performance. We find that this prompting method shows increased resilience to mixed-mode inputs and our enhanced pipeline allows for natural inter-mode conversations, as assessed through human evaluation. Our findings also suggest that the performance gap between few-shot prompting for TOD and supervised task-specific models is narrowing. 2024.sigdial-1.50 @@ -657,7 +657,7 @@ ZlataKikteva AlexanderTrautsch SteffenHerbold - AnnetteHautli-Janisz + AnnetteHautli-Janisz 624–630 In spontaneous natural debate, questions play a variety of crucial roles: they allow speakers to introduce new topics, seek other speakers’ opinions or indeed confront them. A three-class question typology has previously been demonstrated to effectively capture details pertaining to the nature of questions and the different functions associated with them in a debate setting. We adopt this classification and investigate the performance of several machine learning approaches on this task by incorporating various sets of lexical, dialogical and argumentative features. We find that BERT demonstrates the best performance on the task, followed by a Random Forest model enriched with pragmatic features. 2024.sigdial-1.53 @@ -687,7 +687,7 @@ NegarFani SierraCarter StephenDoogan - Jinho D.Choi + Jinho D.Choi 644–663 The shortage of clinical workforce presents significant challenges in mental healthcare, limiting access to formal diagnostics and services. We aim to tackle this shortage by integrating a customized large language model (LLM) into the workflow, thus promoting equity in mental healthcare for the general population. Although LLMs have showcased their capability in clinical decision-making, their adaptation to severe conditions like Post-traumatic Stress Disorder (PTSD) remains largely unexplored. Therefore, we collect 411 clinician-administered diagnostic interviews and devise a novel approach to obtain high-quality data. Moreover, we build a comprehensive framework to automate PTSD diagnostic assessments based on interview contents by leveraging two state-of-the-art LLMs, GPT-4 and Llama-2, with potential for broader clinical diagnoses. Our results illustrate strong promise for LLMs, tested on our dataset, to aid clinicians in diagnostic validation. To the best of our knowledge, this is the first AI system that fully automates assessments for mental illness based on clinician-administered interviews. 2024.sigdial-1.55 @@ -736,7 +736,7 @@ <fixed-case>D</fixed-case>iag<fixed-case>ESC</fixed-case>: Dialogue Synthesis for Integrating Depression Diagnosis into Emotional Support Conversation SeungyeonSeo - Gary GeunbaeLee + Gary GeunbaeLee 686–698 Dialogue systems for mental health care aim to provide appropriate support to individuals experiencing mental distress. While extensive research has been conducted to deliver adequate emotional support, existing studies cannot identify individuals who require professional medical intervention and cannot offer suitable guidance. We introduce the Diagnostic Emotional Support Conversation task for an advanced mental health management system. We develop the DESC dataset to assess depression symptoms while maintaining user experience by utilizing task-specific utterance generation prompts and a strict filtering algorithm. Evaluations by professional psychological counselors indicate that DESC has a superior ability to diagnose depression than existing data. Additionally, conversational quality evaluation reveals that DESC maintains fluent, consistent, and coherent dialogues. 2024.sigdial-1.59 @@ -754,9 +754,9 @@ NurulLubis Carelvan Niekerk MichaelHeck - BenjaminRuppik + BenjaminRuppik RenatoVukovic - MilicaGašić + MilicaGašić 699–717 Emotions are indispensable in human communication, but are often overlooked in task-oriented dialogue (ToD) modelling, where the task success is the primary focus. While existing works have explored user emotions or similar concepts in some ToD tasks, none has so far included emotion modelling into a fully-fledged ToD system nor conducted interaction with human or simulated users. In this work, we incorporate emotion into the complete ToD processing loop, involving understanding, management, and generation. To this end, we extend the EmoWOZ dataset (Feng et al., 2022) with system affective behaviour labels. Through interactive experimentation involving both simulated and human users, we demonstrate that our proposed framework significantly enhances the user’s emotional experience as well as the task success. 2024.sigdial-1.60 @@ -790,7 +790,7 @@ ShuwenQiu MingdianLiu HengliLi - Song-ChunZhu + Song-ChunZhu ZilongZheng 746–759 Humans talk in daily conversations while aligning and negotiating the expressed meanings or common ground. Despite the impressive conversational abilities of the large generative language models, they do not consider the individual differences in contextual understanding in a shared situated environment. In this work, we propose MindDial, a novel conversational framework that can generate situated free-form responses to align and negotiate common ground. We design an explicit mind module that can track three-level beliefs – the speaker’s belief, the speaker’s prediction of the listener’s belief, and the belief gap between the first two. Then the next response is generated to resolve the belief difference and take task-related action. Our framework is applied to both prompting and fine-tuning-based models, and is evaluated across scenarios involving both common ground alignment and negotiation. Experiments show that models with mind modeling can generate more human-like responses when aligning and negotiating common ground. The ablation study further validates the three-level belief design can aggregate information and improve task outcomes in both cooperative and negotiating settings. diff --git a/data/xml/2024.sighan.xml b/data/xml/2024.sighan.xml index 63e2cc810c..b92a701a3f 100644 --- a/data/xml/2024.sighan.xml +++ b/data/xml/2024.sighan.xml @@ -3,7 +3,7 @@ Proceedings of the 10th SIGHAN Workshop on Chinese Language Processing (SIGHAN-10) - Kam-FaiWong + Kam-FaiWong MinZhang RuifengXu JingLi @@ -104,7 +104,7 @@ JingLiThe Hong Kong Polytechnic University ZhongyuWeiFudan University QiZhangFudan University - XuanjingHuangFudan University + XuanjingHuangFudan University 58-68 Vast amount of online conversations are produced on a daily basis, resulting in a pressing need to automatic conversation understanding. As a basis to structure a discussion, we identify the responding relations in the conversation discourse, which link response utterances to their initiations. To figure out who responded to whom, here we explore how the consistency of topic contents and dependency of discourse roles indicate such interactions, whereas most prior work ignore the effects of latent factors underlying word occurrences. We propose a neural model to learn latent topics and discourse in word distributions, and predict pairwise initiation-response links via exploiting topic consistency and discourse dependency. Experimental results on both English and Chinese conversations show that our model significantly outperforms the previous state of the arts. 2024.sighan-1.7 @@ -172,7 +172,7 @@ XingrenWang ShanhongLiu YuxiangJia - HongyingZan + HongyingZan 112-120 The DimABSA task requires fine-grained sentiment intensity prediction for restaurant reviews, including scores for Valence and Arousal dimensions for each Aspect Term. In this study, we propose a Coarse-to-Fine In-context Learning (CFICL) method based on the Baichuan2-7B model for the DimABSA task in the SIGHAN 2024 workshop. Our method improves prediction accuracy through a two-stage optimization process. In the first stage, we use fixed in-context examples and prompt templates to enhance the model’s sentiment recognition capability and provide initial predictions for the test data. In the second stage, we encode the Opinion field using BERT and select the most similar training data as new in-context examples based on similarity. These examples include the Opinion field and its scores, as well as related opinion words and their average scores. By filtering for sentiment polarity, we ensure that the examples are consistent with the test data. Our method significantly improves prediction accuracy and consistency by effectively utilizing training data and optimizing in-context examples, as validated by experimental results. 2024.sighan-1.13 @@ -240,7 +240,7 @@ Overview of the <fixed-case>SIGHAN</fixed-case> 2024 shared task for <fixed-case>C</fixed-case>hinese dimensional aspect-based sentiment analysis Lung-HaoLeeNational Yang Ming Chiao Tung University - Liang-ChihYuYuan Ze University + Liang-ChihYuYuan Ze University SugeWang JianLiaoShanxi University 165-174 diff --git a/data/xml/2024.sigmorphon.xml b/data/xml/2024.sigmorphon.xml index 03fdcccae4..77eefcfd9c 100644 --- a/data/xml/2024.sigmorphon.xml +++ b/data/xml/2024.sigmorphon.xml @@ -5,7 +5,7 @@ Proceedings of the 21st SIGMORPHON workshop on Computational Research in Phonetics, Phonology, and Morphology GarrettNicolai EleanorChodroff - FredericMailhot + FredericMailhot ÇağrıÇöltekin Association for Computational Linguistics
Mexico City, Mexico
@@ -78,7 +78,7 @@ The Effect of Model Capacity and Script Diversity on Subword Tokenization for <fixed-case>S</fixed-case>orani <fixed-case>K</fixed-case>urdish AliSalehi - Cassandra L.Jacobs + Cassandra L.Jacobs 51-56 Tokenization and morphological segmentation continue to pose challenges for text processing and studies of human language. Here, we focus on written Soranî Kurdish, which uses a modified script based on Persian and Arabic, and its transliterations into the Kurdish Latin script. Importantly, Perso-Arabic and Latin-based writing systems demonstrate different statistical and structural properties, which may have significant effects on subword vocabulary learning. This has major consequences for frequency- or probability-based models of morphological induction. We explore the possibility that jointly training subword vocabularies using a source script along with its transliteration would improve morphological segmentation, subword tokenization, and whether gains are observed for one system over others. We find that joint training has a similar effect to increasing vocabulary size, while keeping subwords shorter in length, which produces higher-quality subwords that map onto morphemes. 2024.sigmorphon-1.6 @@ -99,7 +99,7 @@ Acoustic barycenters as exemplar production targets FredericMailhot - Cassandra L.Jacobs + Cassandra L.Jacobs 67-76 We present a solution to the problem of exemplar-based language production from variable-duration tokens, leveraging algorithms from the domain of time-series clustering and classification. Our model stores and outputs tokens of phonetically rich and temporally variable representations of recorded speech. We show qualitatively and quantitatively that model outputs retain essential acoustic/phonetic characteristics despite the noise introduced by averaging, and also demonstrate the effects of similarity and indexical information as constraints on exemplar cloud selection. 2024.sigmorphon-1.8 diff --git a/data/xml/2024.signlang.xml b/data/xml/2024.signlang.xml index c5ff525723..f158e8613a 100644 --- a/data/xml/2024.signlang.xml +++ b/data/xml/2024.signlang.xml @@ -4,7 +4,7 @@ Proceedings of the LREC-COLING 2024 11th Workshop on the Representation and Processing of Sign Languages: Evaluation of Sign Language Resources EleniEfthimiou - Stavroula-EvitaFotinea + Stavroula-EvitaFotinea ThomasHanke Julie A.Hochgesang JohannaMesch @@ -34,7 +34,7 @@ Person Identification from Pose Estimates in Sign Language AlessiaBattisti Emmavan den Bold - AnneGöhring + AnneGöhring FranzHolzknecht SarahEbling 13–25 @@ -108,7 +108,7 @@ DiandraFabre YanisOuakrim JulieLascar - AnneliesBraffort + AnneliesBraffort MichèleGouiffès DenisBeautemps 95–101 @@ -172,7 +172,7 @@ Shedding Light on the Underexplored: Tackling the Minor Sign Language Research Topics Jung-HoKim ChangyongKo - MathewHuerta-Enochian + MathewHuerta-Enochian Seung YongKo 147–158 2024.signlang-1.16 @@ -244,7 +244,7 @@ Annotation of <fixed-case>LSF</fixed-case> subtitled videos without a pre-existing dictionary JulieLascar MichèleGouiffès - AnneliesBraffort + AnneliesBraffort ClaireDanet 204–212 2024.signlang-1.22 @@ -272,7 +272,7 @@ Quantitative Analysis of Hand Locations in both Sign Language and Non-linguistic Gesture Videos NielsMartínez-Guevara - ArturoCuriel + ArturoCuriel 225–234 2024.signlang-1.25 martinez-guevara-curiel-2024-quantitative @@ -391,7 +391,7 @@ HuijeLee Eui JunHwang SukminCho - Jong C.Park + Jong C.Park 323–334 2024.signlang-1.36 roh-etal-2024-preprocessing @@ -465,7 +465,7 @@ José LuisAlba-Castro AniaPérez-Pérez CarmenCabeza-Pereiro - LauraDocío-Fernández + LauraDocío-Fernández 386–394 2024.signlang-1.43 vazquez-enriquez-etal-2024-signamed diff --git a/data/xml/2024.sigtyp.xml b/data/xml/2024.sigtyp.xml index cde5ec0ab9..86810aa165 100644 --- a/data/xml/2024.sigtyp.xml +++ b/data/xml/2024.sigtyp.xml @@ -7,7 +7,7 @@ AlexeySorokin RiteshKumar AndreasShcherbakov - YuliaOtmakhova + YuliaOtmakhova JinruiYang OlegSerikov PriyaRani @@ -85,7 +85,7 @@ The Typology of Ellipsis: A Corpus for Linguistic Analysis and Machine Learning Applications - DamirCavarIndiana University + DamirCavarIndiana University LudovicMompelat MuhammadAbdo 46-54 @@ -169,7 +169,7 @@ XiluoHe PrabhjotKaur OliverAdams - DanJurafskyStanford University + DanJurafskyStanford University 100-112 While massively multilingual speech models like wav2vec 2.0 XLSR-128 can be directly fine-tuned for automatic speech recognition (ASR), downstream performance can still be relatively poor on languages that are under-represented in the pre-training data. Continued pre-training on 70–200 hours of untranscribed speech in these languages can help — but what about languages without that much recorded data? For such cases, we show that supplementing the target language with data from a similar, higher-resource ‘donor’ language can help. For example, continued pretraining on only 10 hours of low-resource Punjabi supplemented with 60 hours of donor Hindi is almost as good as continued pretraining on 70 hours of Punjabi. By contrast, sourcing supplemental data from less similar donors like Bengali does not improve ASR performance. To inform donor language selection, we propose a novel similarity metric based on the sequence distribution of induced acoustic units: the Acoustic Token Distribution Similarity (ATDS). Across a set of typologically different target languages (Punjabi, Galician, Iban, Setswana), we show that the ATDS between the target language and its candidate donors precisely predicts target language ASR performance. 2024.sigtyp-1.13 @@ -185,7 +185,7 @@ LucasHuang EthanChi R.McCoyYale University - DragomirRadevYale University + DragomirRadevYale University 113-119 Large language models (LLMs) perform well on (at least) some evaluations of both few-shot multilingual adaptation and reasoning. However, evaluating the intersection of these two skills—multilingual few-shot reasoning—is difficult: even relatively low-resource languages can be found in large training corpora, raising the concern that when we intend to evaluate a model’s ability to generalize to a new language, that language may have in fact been present during the model’s training. If such language contamination has occurred, apparent cases of few-shot reasoning could actually be due to memorization. Towards understanding the capability of models to perform multilingual few-shot reasoning, we propose modeLing, a benchmark of Rosetta stone puzzles. This type of puzzle, originating from competitions called Linguistics Olympiads, contain a small number of sentences in a target language not previously known to the solver. Each sentence is translated to the solver’s language such that the provided sentence pairs uniquely specify a single most reasonable underlying set of rules; solving requires applying these rules to translate new expressions (Figure 1). modeLing languages are chosen to be extremely low-resource such that the risk of training data contamination is low, and unlike prior datasets, it consists entirely of problems written specifically for this work, as a further measure against data leakage. Empirically, we find evidence that popular LLMs do not have data leakage on our benchmark. 2024.sigtyp-1.14 @@ -235,9 +235,9 @@ OksanaDerezaUniversity of Galway AdrianDoyleUniversity of Galway PriyaRaniUniversity of Galway - Atul Kr.OjhaUniversity of Galway + Atul Kr.OjhaUniversity of Galway PádraicMoranUniversity of Galway - JohnMcCraeUniversity of Galway + JohnMcCraeUniversity of Galway 160-172 This paper discusses the organisation and findings of the SIGTYP 2024 Shared Task on Word Embedding Evaluation for Ancient and Historical Languages. The shared task was split into the constrained and unconstrained tracks and involved solving either 3 or 5 problems for either 13 or 16 ancient and historical languages belonging to 4 language families, and making use of 6 different scripts. There were 14 registrations in total, of which 3 teams submitted to each track. Out of these 6 submissions, 2 systems were successful in the constrained setting and another 2 in the uncon- strained setting, and 4 system description papers were submitted by different teams. The best average result for morphological feature prediction was about 96%, while the best average results for POS-tagging and lemmatisation were 96% and 94% respectively. At the word level, the winning team could not achieve a higher average accuracy across all 16 languages than 5.95%, which demonstrates the difficulty of this problem. At the character level, the best average result over 16 languages 55.62% 2024.sigtyp-1.19 diff --git a/data/xml/2024.sigul.xml b/data/xml/2024.sigul.xml index 38b80d5c98..69ecc7cb3f 100644 --- a/data/xml/2024.sigul.xml +++ b/data/xml/2024.sigul.xml @@ -40,10 +40,10 @@ Advancing Generative <fixed-case>AI</fixed-case> for <fixed-case>P</fixed-case>ortuguese with Open Decoder Gervásio <fixed-case>PT</fixed-case>* RodrigoSantos - João RicardoSilva + João RicardoSilva LuísGomes - JoãoRodrigues - AntónioBranco + JoãoRodrigues + AntónioBranco 16–26 To advance the neural decoding of Portuguese, in this paper we present a fully open Transformer-based, instruction-tuned decoder model that sets a new state of the art in this respect. To develop this decoder, which we named Gervásio PT*, a strong LLaMA 2 7B model was used as a starting point, and its further improvement through additional training was done over language resources that include new instruction data sets of Portuguese prepared for this purpose, which are also contributed in this paper. All versions of Gervásio are open source and distributed for free under an open license, including for either research or commercial usage, and can be run on consumer-grade hardware, thus seeking to contribute to the advancement of research and innovation in language technology for Portuguese. 2024.sigul-1.3 @@ -51,7 +51,7 @@ Assessing Pre-Built Speaker Recognition Models for Endangered Language Data - Gina-AnneLevow + Gina-AnneLevow 27–32 Significant research has focused on speaker recognition, determining which speaker is speaking in a segment of audio. However, few experiments have investigated speaker recognition for very low-resource or endangered languages. Furthermore, speaker recognition has the potential to support language documentation and revitalization efforts, making recordings more accessible to researchers and communities. Since endangered language datasets are too small to build competitive speaker representations from scratch, we investigate the application of large-scale pre-built speaker recognition models to bridge this gap. This paper compares four speaker recognition models on six diverse endangered language data sets. Comparisons contrast three recent neural network-based x-vector models and an earlier baseline i-vector model. Experiments demonstrate significantly stronger performance for some of the studied models. Further analysis highlights differences in effectiveness tied to the lengths of test audio segments and amount of data used for speaker modeling. 2024.sigul-1.4 @@ -73,7 +73,7 @@ Svanhvít LiljaIngólfsdóttir Haukur BarriSímonarson HafsteinnEinarsson - Anton KarlIngason + Anton KarlIngason VilhjálmurÞorsteinsson 45–52 Automatic spell and grammar checking can be done using various system architectures, and large language models have recently been used to solve the task with promising results. Here we describe a new method of creating test data to measure the performance of spell and grammar checkers, including large language models. Three types of test data represent different approaches to evaluation, from basic error detection to error correction with natural language explanations of the corrections made and error severity scores, which is the main novelty of this approach. These additions are especially useful when evaluating large language models. We present a spell and grammar checking test set for Icelandic in which the described approach is applied. The data consists of whole texts instead of discrete sentences, which facilitates evaluating context awareness of models. The resulting test set can be used to compare different spell and grammar checkers and is published under permissive licenses. @@ -146,7 +146,7 @@ SallyBruen LiangXu MonicaWard - ElaineUí Dhonnchadha + ElaineUí Dhonnchadha JenniferFoster 90–96 Digital game-based language learning (DGBLL) can help with the language learning process. DGBLL applications can make learning more enjoyable and engaging, but they are difficult to develop. A DBGLL app that relies on target language texts obviously needs to be able to use texts of the appropriate level for the individual learners. This implies that text classification tools should be available to DGBLL developers, who may not be familiar with the target language, in order to incorporate suitable texts into their games. While text difficulty classifiers exist for many of the most commonly spoken languages, this is not the case for under-resourced languages, such as Irish. In this paper, we explore approaches to the development of text classifiers for Irish. In the first approach to text analysis and grading, we apply linguistic analysis to assess text complexity. Features from this approach are then used in machine learning-based text classification, which explores the application of a number of machine learning algorithms to the problem. Although the development of these text classifiers is at an early stage, they show promise, particularly in a low-resourced scenario. @@ -166,10 +166,10 @@ Fostering the Ecosystem of Open Neural Encoders for <fixed-case>P</fixed-case>ortuguese with Albertina <fixed-case>PT</fixed-case>* Family RodrigoSantos - JoãoRodrigues + JoãoRodrigues LuísGomes - João RicardoSilva - AntónioBranco + João RicardoSilva + AntónioBranco HenriqueLopes Cardoso Tomás FreitasOsório BernardoLeite @@ -181,7 +181,7 @@ Improving Language Coverage on <fixed-case>H</fixed-case>e<fixed-case>LI</fixed-case>-<fixed-case>OTS</fixed-case> TommiJauhiainen - KristerLindén + KristerLindén 115–125 In this paper, we add under-resourced languages into the language repertoire of an existing off-the-shelf language identifier, HeLI-OTS. Adding more languages to a language identifier often comes with the drawback of lessened accuracy for the languages already part of the repertoire. We aim to minimize this effect. As sources for training and development data in the new languages, we use the OpenLID and FLORES-200 datasets. They are openly available high-quality datasets that are especially well-suited for language identifier development. By carefully inspecting the effect of each added language and the quality of their training and development data, we managed to add support for 20 new under-resourced languages to HeLI-OTS without affecting the performance of any existing languages to a noticeable extent. 2024.sigul-1.15 @@ -209,7 +209,7 @@ <fixed-case>I</fixed-case>ndonesian-<fixed-case>E</fixed-case>nglish Code-Switching Speech Recognition Using the Machine Speech Chain Based Semi-Supervised Learning Rais Vaza ManTazakka - DessiLestari + DessiLestari AyuPurwarianti DiptaTanaya KurniawatiAzizah @@ -231,7 +231,7 @@ Investigating Neural Machine Translation for Low-Resource Languages: Using <fixed-case>B</fixed-case>avarian as a Case Study Wan-huaHer - UdoKruschwitz + UdoKruschwitz 155–167 Machine Translation has made impressive progress in recent years offering close to human-level performance on many languages, but studies have primarily focused on high-resource languages with broad online presence and resources. With the help of growing Large Language Models, more and more low-resource languages achieve better results through the presence of other languages. However, studies have shown that not all low-resource languages can benefit from multilingual systems, especially those with insufficient training and evaluation data. In this paper, we revisit state-of-the-art Neural Machine Translation techniques to develop automatic translation systems between German and Bavarian. We investigate conditions of low-resource languages such as data scarcity and parameter sensitivity and focus on refined solutions that combat low-resource difficulties and creative solutions such as harnessing language similarity. Our experiment entails applying Back-translation and Transfer Learning to automatically generate more training data and achieve higher translation performance. We demonstrate noisiness in the data and present our approach to carry out text preprocessing extensively. Evaluation was conducted using combined metrics: BLEU, chrF and TER. Statistical significance results with Bonferroni correction show surprisingly high baseline systems, and that Back-translation leads to significant improvement. Furthermore, we present a qualitative analysis of translation errors and system limitations. 2024.sigul-1.20 @@ -259,7 +259,7 @@ Language Models on a Diet: Cost-Efficient Development of Encoders for Closely-Related Languages via Additional Pretraining NikolaLjubešić - VítSuchomel + VítSuchomel PeterRupnik TajaKuzman Rikvan Noord @@ -270,7 +270,7 @@ Man or Machine: Evaluating Spelling Error Detection in <fixed-case>D</fixed-case>anish Newspaper Corpora - EckhardBick + EckhardBick Jonas NygaardBlom MarianneRathje JørgenSchack @@ -285,7 +285,7 @@ DelphineBernhard MichaelNauge MyriamBras - PabloRuiz Fabo + PabloRuiz Fabo CaroleWerner 212–221 Metadata are key components of language resources and facilitate their exploitation and re-use. Their creation is a labour intensive process and requires a modeling step, which identifies resource-specific information as well as standards and controlled vocabularies that can be reused. In this article, we focus on metadata for documenting text bases for regional languages of France characterised by several levels of variation (space, time, usage, social status), based on a survey of existing metadata schema. Moreover, we implement our metadata model as a database structure for the Heurist data management system, which combines both the ease of use of spreadsheets and the ability to model complex relationships between entities of relational databases. The Heurist template is made freely available and was used to describe metadata for text bases in Alsatian and Poitevin-Santongeais. We also propose tools to automatically generate XML metadata headers files from the database. @@ -347,7 +347,7 @@ <fixed-case>P</fixed-case>ersian<fixed-case>E</fixed-case>mo: Enhancing <fixed-case>F</fixed-case>arsi-<fixed-case>D</fixed-case>ari Emotion Analysis with a Hybrid Transformer and Recurrent Neural Network Model Mohammad AliHussiny Mohammad ArifPayenda - LiljaØvrelid + LiljaØvrelid 257–263 Emotion analysis is a critical research domain within the field of natural language processing (NLP). While substantial progress has been made in this area for the Persian language, there is still a need for more precise models and larger datasets specifically focusing on the Farsi and Dari dialects. In this research, we introduce “LearnArmanEmo” as a new dataset and a superior ensemble approach for Persian text emotion classification. Our proposed model, which combines XLM-RoBERTa-large and BiGRU, undergoes evaluation on LetHerLearn for the Dari dialect, ARMANEMO for the Farsi dialect, and LearnArmanEmo for both Dari and Farsi dialects. The empirical results substantiate the efficacy of our approach with the combined model demonstrating superior performance. Specifically, our model achieves an F1 score of 72.9% on LetHerLearn, an F1 score of 77.1% on ARMANEMO, and an F1 score of 78.8% on the LearnArmanEmo dataset, establishing it as a better ensemble model for these datasets. These findings underscore the potential of this hybrid model as a useful tool for enhancing the performance of emotion analysis in Persian language processing. 2024.sigul-1.31 @@ -454,7 +454,7 @@ <fixed-case>TELP</fixed-case> – Text Extraction with Linguistic Patterns - JoãoCordeiro + JoãoCordeiro Purificação MouraSilvano AntónioLeal SebastiãoPais @@ -497,19 +497,19 @@ <fixed-case>U</fixed-case>ni<fixed-case>D</fixed-case>ive: A <fixed-case>COST</fixed-case> Action on Universality, Diversity and Idiosyncrasy in Language Technology AgataSavary - DanielZeman - VerginicaBarbu Mititelu + DanielZeman + VerginicaBarbu Mititelu AnabelaBarreiro OleseaCaftanatov - Marie-Catherinede Marneffe + Marie-Catherinede Marneffe KajaDobrovoljc - GülşenEryiğit - VoulaGiouli + GülşenEryiğit + VoulaGiouli BrunoGuillaume - StellaMarkantonatou + StellaMarkantonatou NuritMelnik JoakimNivre - Atul Kr.Ojha + Atul Kr.Ojha CarlosRamisch AbigailWalsh BeataWójtowicz diff --git a/data/xml/2024.smm4h.xml b/data/xml/2024.smm4h.xml index 0a6e1a943f..f402bb62b7 100644 --- a/data/xml/2024.smm4h.xml +++ b/data/xml/2024.smm4h.xml @@ -4,7 +4,7 @@ Proceedings of the 9th Social Media Mining for Health Research and Applications (SMM4H 2024) Workshop and Shared Tasks DongfangXu - GracielaGonzalez-Hernandez + GracielaGonzalez-Hernandez Association for Computational Linguistics
Bangkok, Thailand
August @@ -22,7 +22,7 @@ ThangTa AbuRahman LotfollahNajjarUniversity of Nebraska at Omaha - AlexanderGelbukhInstituto Politécnico Nacional + AlexanderGelbukhInstituto Politécnico Nacional 1-4 This paper describes our participation in Task 3 and Task 5 of the #SMM4H (Social Media Mining for Health) 2024 Workshop, explicitly targeting the classification challenges within tweet data. Task 3 is a multi-class classification task centered on tweets discussing the impact of outdoor environments on symptoms of social anxiety. Task 5 involves a binary classification task focusing on tweets reporting medical disorders in children. We applied transfer learning from pre-trained encoder-decoder models such as BART-base and T5-small to identify the labels of a set of given tweets. We also presented some data augmentation methods to see their impact on the model performance. Finally, the systems obtained the best F1 score of 0.627 in Task 3 and the best F1 score of 0.841 in Task 5 2024.smm4h-1.1 @@ -68,7 +68,7 @@ <fixed-case>RIGA</fixed-case> at <fixed-case>SMM</fixed-case>4<fixed-case>H</fixed-case>-2024 Task 1: Enhancing <fixed-case>ADE</fixed-case> discovery with <fixed-case>GPT</fixed-case>-4 EduardsMukans - GuntisBarzdinsUniversity of Latvia + GuntisBarzdinsUniversity of Latvia 23-27 The following is a description of the RIGA team’s submissions for the SMM4H-2024 Task 1: Extraction and normalization of adverse drug events (ADEs) in English tweets. Our approach focuses on utilizing Large Language Models (LLMs) to generate data that enhances the fine-tuning of classification and Named Entity Recognition (NER) models. Our solution significantly outperforms mean and median submissions of other teams. The efficacy of our ADE extraction from tweets is comparable to the current state-of-the-art solution, established as the task baseline. The code for our method is available on GitHub (https://github.com/emukans/smm4h2024-riga) 2024.smm4h-1.6 @@ -156,8 +156,8 @@ VictorPozos HelenaGomez AdornoInstituto de Investigaciones en Matemáticas Aplicadas y en Sistemas - UNAM GibranFuentes-Pineda - GerardoSierraUniversidad Nacional Autónoma de México - GemmaBel-EnguixUniversidad Nacional Autónoma de México + GerardoSierraUniversidad Nacional Autónoma de México + GemmaBel-EnguixUniversidad Nacional Autónoma de México 63-66 We present our approach to solving the task of identifying the effect of outdoor activities on social anxiety based on reddit posts. We employed state-of-the-art transformer models enhanced with a combination of advanced loss functions. Data augmentation techniques were also used to address class imbalance within the training set. Our method achieved a macro-averaged F1-score of 0.655 on the test data, surpassing the workshop’s mean F1-Score of 0.519. These findings suggest that integrating weighted loss functions improves the performance of transformer models in classifying unbalanced text data, while data augmentation can improve the model’s ability to generalize. 2024.smm4h-1.14 @@ -212,7 +212,7 @@ <fixed-case>IMS</fixed-case>_medic<fixed-case>ALY</fixed-case> at #<fixed-case>SMM</fixed-case>4<fixed-case>H</fixed-case> 2024: Detecting Impacts of Outdoor Spaces on Social Anxiety with Data Augmented Ensembling - AmelieWuehrlUniversity of Stuttgart, Universität Stuttgart + AmelieWuehrlUniversity of Stuttgart, Universität Stuttgart LynnGreschnerOtto-Friedrich Universität Bamberg YarikMenchaca Resendiz RomanKlingerOtto-Friedrich Universität Bamberg @@ -294,7 +294,7 @@ <fixed-case>ADE</fixed-case> Oracle at #<fixed-case>SMM</fixed-case>4<fixed-case>H</fixed-case> 2024: A Two-Stage <fixed-case>NLP</fixed-case> System for Extracting and Normalizing Adverse Drug Events from Tweets AndrewDavisIndiana University BillyDicksonIndiana University - SandraKüblerIndiana University at Bloomington + SandraKüblerIndiana University at Bloomington 117-120 This study describes the approach of Team ADE Oracle for Task 1 of the Social Media Mining for Health Applications (#SMM4H) 2024 shared task. Task 1 challenges participants to detect adverse drug events (ADEs) within English tweets and normalize these mentions against the Medical Dictionary for Regulatory Activities standards. Our approach utilized a two-stage NLP pipeline consisting of a named entity recognition model, retrained to recognize ADEs, followed by vector similarity assessment with a RoBERTa-based model. Despite achieving a relatively high recall of 37.4% in the extraction of ADEs, indicative of effective identification of potential ADEs, our model encountered challenges with precision. We found marked discrepancies between recall and precision between the test set and our validation set, which underscores the need for further efforts to prevent overfitting and enhance the model’s generalization capabilities for practical applications. 2024.smm4h-1.27 @@ -353,7 +353,7 @@ <fixed-case>KUL</fixed-case>@<fixed-case>SMM</fixed-case>4<fixed-case>H</fixed-case>2024: Optimizing Text Classification with Quality-Assured Augmentation Strategies SumamFrancisKU Leuven, KU Leuven - Marie-FrancineMoensKU Leuven, KU Leuven + Marie-FrancineMoensKU Leuven, KU Leuven 142-145 This paper presents our models for the Social Media Mining for Health 2024 shared task, specifically Task 5, which involves classifying tweets reporting a child with childhood disorders (annotated as “1”) versus those merely mentioning a disorder (annotated as “0”). We utilized a classification model enhanced with diverse textual and language model-based augmentations. To ensure quality, we used semantic similarity, perplexity, and lexical diversity as evaluation metrics. Combining supervised contrastive learning and cross-entropy-based learning, our best model, incorporating R-drop and various LM generation-based augmentations, achieved an impressive F1 score of 0.9230 on the test set, surpassing the task mean and median scores. 2024.smm4h-1.33 @@ -425,7 +425,7 @@ ShokoWakamiyaNara Institute of Science and Technology EijiAramakiNara Institute of Science and Technology, Japan SebastianMöller - PierreZweigenbaumLISN, CNRS, Université Paris-Saclay + PierreZweigenbaumLISN, CNRS, Université Paris-Saclay 170-182 This paper provides an overview of Task 2 from the Social Media Mining for Health 2024 shared task (#SMM4H 2024), which focused on Named Entity Recognition (NER, Subtask 2a) and the joint task of NER and Relation Extraction (RE, Subtask 2b) for detecting adverse drug reactions (ADRs) in German, Japanese, and French texts written by patients. Participants were challenged with a few-shot learning scenario, necessitating models that can effectively generalize from limited annotated examples. Despite the diverse strategies employed by the participants, the overall performance across submissions from three teams highlighted significant challenges. The results underscored the complexity of extracting entities and relations in multi-lingual contexts, especially from the noisy and informal nature of user-generated content. Further research is required to develop robust systems capable of accurately identifying and associating ADR-related information in low-resource and multilingual settings. 2024.smm4h-1.39 @@ -441,7 +441,7 @@ EijiAramakiNara Institute of Science and Technology, Japan ShokoWakamiyaNara Institute of Science and Technology ShuntaroYadaNara Institute of Science and Technology, Japan - PierreZweigenbaumLISN, CNRS, Université Paris-Saclay + PierreZweigenbaumLISN, CNRS, Université Paris-Saclay KarenO’ConnorUniversity of Pennsylvania, University of Pennsylvania SaiSamineniCedars-Sinai Medical Center SophiaHernandezUniversity of Pittsburgh, Pittsburgh @@ -455,7 +455,7 @@ RaulRodriguez-EstebanF. Hoffmann-La Roche Ltd JuanBandaStanford University IvanAmaroCedars-Sinai Medical Center - DavyWeissenbacher + DavyWeissenbacher GracielaGonzalez-HernandezCedars-Sinai Medical Center 183-195 For the past nine years, the Social Media Mining for Health Applications (#SMM4H) shared tasks have promoted community-driven development and evaluation of advanced natural language processing systems to detect, extract, and normalize health-related information in publicly available user-generated content. This year, #SMM4H included seven shared tasks in English, Japanese, German, French, and Spanish from Twitter, Reddit, and health forums. A total of 84 teams from 22 countries registered for #SMM4H, and 45 teams participated in at least one task. This represents a growth of 180% and 160% in registration and participation, respectively, compared to the last iteration. This paper provides an overview of the tasks and participating systems. The data sets remain available upon request, and new systems can be evaluated through the post-evaluation phase on CodaLab. diff --git a/data/xml/2024.starsem.xml b/data/xml/2024.starsem.xml index 2fe71fa2d6..4ca9427998 100644 --- a/data/xml/2024.starsem.xml +++ b/data/xml/2024.starsem.xml @@ -20,7 +20,7 @@ <fixed-case>MASSIVE</fixed-case> Multilingual <fixed-case>A</fixed-case>bstract <fixed-case>M</fixed-case>eaning <fixed-case>R</fixed-case>epresentation: A Dataset and Baselines for Hallucination Detection MichaelReganPaul G. Allen School of Computer Science & Engineering ShiraWeinGeorgetown University - GeorgeBakerUniversity of Colorado Boulder + GeorgeBakerUniversity of Colorado Boulder EmilioMontiAmazon 1-17 Abstract Meaning Representation (AMR) is a semantic formalism that captures the core meaning of an utterance. There has been substantial work developing AMR corpora in English and more recently across languages, though the limited size of existing datasets and the cost of collecting more annotations are prohibitive. With both engineering and scientific questions in mind, we introduce MASSIVE-AMR, a dataset with more than 84,000 text-to-graph annotations, currently the largest and most diverse of its kind: AMR graphs for 1,685 information-seeking utterances mapped to 50+ typologically diverse languages. We describe how we built our resource and its unique features before reporting on experiments using large language models for multilingual AMR and SPARQL parsing as well as applying AMRs for hallucination detection in the context of knowledge base question answering, with results shedding light on persistent issues using LLMs for structured parsing. @@ -30,7 +30,7 @@ How Does Stereotype Content Differ across Data Sources? - KathleenFraserNational Research Council Canada + KathleenFraserNational Research Council Canada SvetlanaKiritchenkoNational Research Council Canada IsarNejadgholiNational Research Council Canada 18-34 @@ -44,7 +44,7 @@ Polysemy through the lens of psycholinguistic variables: a dataset and an evaluation of static and contextualized language models AndreaBrueraMax Planck Institute for Human Cognitive and Brain Sciences FarbodZamaniUniversity of London - MassimoPoesioQueen Mary University of London + MassimoPoesioQueen Mary University of London 35-48 Polysemes are words that can have different senses depending on the context of utterance: for instance, ‘newspaper’ can refer to an organization (as in ‘manage the newspaper’) or to an object (as in ‘open the newspaper’). Contrary to a large body of evidence coming from psycholinguistics, polysemy has been traditionally modelled in NLP by assuming that each sense should be given a separate representation in a lexicon (e.g. WordNet). This led to the current situation, where datasets used to evaluate the ability of computational models of semantics miss crucial details about the representation of polysemes, thus limiting the amount of evidence that can be gained from their use. In this paper we propose a framework to approach polysemy as a continuous variation in psycholinguistic properties of a word in context. This approach accommodates different sense interpretations, without postulating clear-cut jumps between senses. First we describe a publicly available English dataset that we collected, where polysemes in context (verb-noun phrases) are annotated for their concreteness and body sensory strength. Then, we evaluate static and contextualized language models in their ability to predict the ratings of each polyseme in context, as well as in their ability to capture the distinction among senses, revealing and characterizing in an interpretable way the models’ flaws. 2024.starsem-1.3 @@ -80,7 +80,7 @@ <fixed-case>ROUGE</fixed-case>-K: Do Your Summaries Have Keywords? SotaroTakeshitaUniversity of Mannheim - SimonePonzettoUniversity of Mannheim + SimonePonzettoUniversity of Mannheim KaiEckertMannheim University of Applied Sciences 69-79 Keywords, that is, content-relevant words in summaries play an important role in efficient information conveyance, making it critical to assess if system-generated summaries contain such informative words during evaluation. However, existing evaluation metrics for extreme summarization models do not pay explicit attention to keywords in summaries, leaving developers ignorant of their presence. To address this issue, we present a keyword-oriented evaluation metric, dubbed ROUGE-K, which provides a quantitative answer to the question of – How well do summaries include keywords? Through the lens of this keyword-aware metric, we surprisingly find that a current strong baseline model often misses essential information in their summaries. Our analysis reveals that human annotators indeed find the summaries with more keywords to be more relevant to the source documents. This is an important yet previously overlooked aspect in evaluating summarization systems. Finally, to enhance keyword inclusion, we propose four approaches for incorporating word importance into a transformer-based model and experimentally show that it enables guiding models to include more keywords while keeping the overall quality. @@ -107,7 +107,7 @@ RonjaUtescherUniversity of Bielefeld HannesGrönnerUniversity of Bielefeld JudithSiekerUniversity of Bielefeld - SinaZarrießUniversity of Bielefeld + SinaZarrießUniversity of Bielefeld 93-105 Research in Language & Vision rarely uses naturally occurring multimodal documents as Wikipedia articles, since they feature complex image-text relations and implicit image-text alignments. In this paper, we provide one of the first datasets that provides ground-truth annotations of image-text alignments in multi-paragraph multi-image articles. The dataset can be used to study phenomena of visual language grounding in longer documents and assess retrieval capabilities of language models trained on, e.g., captioning data. Our analyses show that there are systematic linguistic differences between the image captions and descriptive sentences from the article’s text and that intra-document retrieval is a challenging task for state-of-the-art models in L&V (CLIP, VILT, MCSE). 2024.starsem-1.8 @@ -170,7 +170,7 @@ A Closer Look at Claim Decomposition MiriamWannerJohns Hopkins University SethEbnerJohns Hopkins University - ZhengpingJiangJohns Hopkins University + ZhengpingJiangJohns Hopkins University MarkDredzeJohns Hopkins University BenjaminVan DurmeJohns Hopkins University 153-175 @@ -213,7 +213,7 @@ <fixed-case>PDDLEGO</fixed-case>: Iterative Planning in Textual Environments LiZhangUniversity of Pennsylvania - PeterJansenThe University of Arizona + PeterJansenThe University of Arizona TianyiZhangUniversity of Pennsylvania PeterClarkAllen Institute for Artificial Intelligence ChrisCallison-BurchUniversity of Pennsylvania @@ -229,8 +229,8 @@ <fixed-case>VOLIMET</fixed-case>: A Parallel Corpus of Literal and Metaphorical Verb-Object Pairs for <fixed-case>E</fixed-case>nglish–<fixed-case>G</fixed-case>erman and <fixed-case>E</fixed-case>nglish–<fixed-case>F</fixed-case>rench PriscaPiccirilliUniversity of Stuttgart - AlexanderFraserTechnical University of Munich and Munich Centre for Machine Learning - SabineSchulte im WaldeUniversity of Stuttgart + AlexanderFraserTechnical University of Munich and Munich Centre for Machine Learning + SabineSchulte im WaldeUniversity of Stuttgart 222-237 The interplay of cultural and linguistic elements that characterizes metaphorical language poses a substantial challenge for both human comprehension and machine processing. This challenge goes beyond monolingual settings and becomes particularly complex in translation, even more so in automatic translation. We present VOLIMET, a corpus of 2,916 parallel sentences containing gold standard alignments of metaphorical verb-object pairs and their literal paraphrases, e.g., tackle/address question, from English to German and French. On the one hand, the parallel nature of our corpus enables us to explore monolingual patterns for metaphorical vs. literal uses in English. On the other hand, we investigate different aspects of cross-lingual translations into German and French and the extent to which metaphoricity and literalness in the source language are transferred to the target languages. Monolingually, our findings reveal clear preferences in using metaphorical or literal uses of verb-object pairs. Cross-lingually, we observe a rich variability in translations as well as different behaviors for our two target languages. 2024.starsem-1.18 @@ -242,7 +242,7 @@ Deniz EkinYavasHeinrich Heine University Düsseldorf TimothéeBernardUniversité Paris Cité LauraKallmeyerHeinrich Heine University Düsseldorf - BenoîtCrabbéUniversité Paris Cité + BenoîtCrabbéUniversité Paris Cité 238-251 This paper addresses the problem of word sense induction (WSI) via clustering of word embeddings. It starts from the hypothesis that contextualized word representations obtained from pre-trained language models (LMs), while being a valuable source for WSI, encode more information than what is necessary for the identification of word senses and some of this information affect the performance negatively in unsupervised settings. We investigate whether using contextualized representations that are invariant to these ‘nuisance features’ can increase WSI performance. For this purpose, we propose an adaptation of the adversarial training framework proposed by Jaiswal et al. (2020) to erase specific information from the representations of LMs, thereby creating feature-invariant representations. We experiment with erasing (i) morphological and (ii) syntactic features. The results of subsequent clustering for WSI show that these features indeed act like noise: Using feature-invariant representations, compared to using the original representations, increases clustering-based WSI performance. Furthermore, we provide an in-depth analysis of how the information about the syntactic and morphological features of words relate to and affect WSI performance. 2024.starsem-1.19 @@ -253,7 +253,7 @@ What’s wrong with your model? A Quantitative Analysis of Relation Classification ElisaBassignanaIT University of Copenhagen and Pioneer Center for Artificial Intelligence Robvan der GootIT University of Copenhagen and Pioneer Center for Artificial Intelligence - BarbaraPlankIT University of Copenhagen and LMU Munich + BarbaraPlankIT University of Copenhagen and LMU Munich 252-263 With the aim of improving the state-of-the-art (SOTA) on a target task, a standard strategy in Natural Language Processing (NLP) research is to design a new model, or modify the existing SOTA, and then benchmark its performance on the target task. We argue in favor of enriching this chain of actions by a preliminary error-guided analysis: First, explore weaknesses by analyzing the hard cases where the existing model fails, and then target the improvement based on those. Interpretable evaluation has received little attention for structured prediction tasks. Therefore we propose the first in-depth analysis suite for Relation Classification (RC), and show its effectiveness through a case study. We propose a set of potentially influential attributes to focus on (e.g., entity distance, sentence length). Then, we bucket our datasets based on these attributes, and weight the importance of them through correlations. This allows us to identify highly challenging scenarios for the RC model. By exploiting the findings of our analysis, with a carefully targeted adjustment to our architecture, we effectively improve the performance over the baseline by >3 Micro-F1. 2024.starsem-1.20 @@ -299,7 +299,7 @@ Multilingual and Code-Switched Sentence Ordering AlexandreSalleVTEX - ShervinMalmasiAmazon.com, Inc. + ShervinMalmasiAmazon.com, Inc. 308-313 Sentence Ordering (SO) is a linguistic task which requires re-ordering of shuffled sentences into a coherent paragraph. SO has downstream applications, but also serves as a semantic probe for computational models as this capability is essential for understanding narrative structures, causal and temporal relations within texts. Despite its importance, prior research has been limited to predictable English language structures and has not thoroughly addressed the complexities of multilingual and varied narrative contexts. To fill this gap, we introduce a novel and comprehensive Multilingual Sentence Ordering task that extends SO to diverse narratives across 12 languages, including challenging code-switched texts. We have developed MultiSO, a new benchmark dataset that represents these challenges. Our findings reveal that both specialized sentence ordering models and advanced Large Language Models like GPT-4 face significant challenges with this task. 2024.starsem-1.24 @@ -309,7 +309,7 @@ <fixed-case>HANS</fixed-case>, are you clever? Clever Hans Effect Analysis of Neural Systems LeonardoRanaldiUniversità degli studi Roma Tor Vergata - FabioZanzottoUniversità degli studi Roma Tor Vergata + FabioZanzottoUniversità degli studi Roma Tor Vergata 314-325 Large Language Models (LLMs) have been exhibiting outstanding abilities to reason around cognitive states, intentions, and reactions of all people involved, letting humans guide and comprehend day-to-day social interactions effectively. In fact, several multiple-choice questions (MCQ) benchmarks have been proposed to construct solid assessments of the models’ abilities. However, earlier works demonstrate the presence of inherent “order bias” in LLMs, posing challenges to the appropriate evaluation. In this paper, we investigate LLMs’ resilience abilities through a series of probing tests using four MCQ benchmarks. Introducing adversarial examples, we show a significant performance gap, mainly when varying the order of the choices, which reveals a selection bias and brings into discussion reasoning abilities. Following a correlation between first positions and model choices due to positional bias, we hypothesized the presence of structural heuristics in the decision-making process of the LLMs, strengthened by including significant examples in few-shot scenarios. Finally, by using the Chain-of-Thought (CoT) technique, we elicit the model to reason and mitigate the bias by obtaining more robust models. 2024.starsem-1.25 @@ -368,7 +368,7 @@ Elena SofiaRuzzettiUniversity of Rome Tor Vergata DavideVendittiUniversity of Rome Tor Vergata DarioOnoratiSapienza University of Rome - Fabio MassimoZanzottoUniversity of Rome Tor Vergata + Fabio MassimoZanzottoUniversity of Rome Tor Vergata 372-384 Cheap-to-Build Very Large-Language Models (CtB-LLMs) with affordable training are emerging as the next big revolution in natural language processing and understanding. These CtB-LLMs are democratizing access to trainable Very Large-Language Models (VLLMs) and, thus, may represent the building blocks of many NLP systems solving downstream tasks. Hence, a little or a large bias in CtB-LLMs may cause huge harm. In this paper, we performed a large investigation of the bias of three families of CtB-LLMs, and we showed that debiasing techniques are effective and usable. Indeed, according to current tests, the LLaMA and the OPT families have an important bias in gender, race, religion, and profession. In contrast to the analysis for other LMMs, we discovered that bias depends not on the number of parameters but on the perplexity. Finally, the debiasing of OPT using LORA reduces bias up to 4.12 points in the normalized stereotype score. 2024.starsem-1.30 diff --git a/data/xml/2024.swisstext.xml b/data/xml/2024.swisstext.xml index 175a4805a8..72e577c22e 100644 --- a/data/xml/2024.swisstext.xml +++ b/data/xml/2024.swisstext.xml @@ -160,7 +160,7 @@ VukVuković DanielDobos FatemehBorran - AndreiPopescu-Belis + AndreiPopescu-Belis 164–164 2024.swisstext-1.16 ferrari-etal-2024-llm @@ -169,7 +169,7 @@ Annotation Tool for Dataset Creation PatrickGiedemann Piusvon Däniken - Jan MilanDeriu + Jan MilanDeriu 165–165 2024.swisstext-1.17 giedemann-etal-2024-annotation @@ -194,7 +194,7 @@ <fixed-case>C</fixed-case>ha<fixed-case>LL</fixed-case> - A Chatbot for Language Learners - ManuelaHürlimann + ManuelaHürlimann LuziaSauer GeroldSchneider JohannesGraën @@ -203,7 +203,7 @@ KatsiarynaMlynchyk Ahmet YavuzUluslu Irina-CameliaStroescu - JanDeriu + JanDeriu MichaelGeiss MarkCieliebak 168–168 @@ -401,9 +401,9 @@ <fixed-case>S</fixed-case>wiss <fixed-case>AI</fixed-case> Initiative - Collecting Large Amounts of High-Quality Data for Training Large Language Models - JanDeriu + JanDeriu MaudEhrmann - EmanuelaBoros + EmanuelaBoros MaximilianBöther ChristianeSibille IhorProtsenko @@ -561,7 +561,7 @@ Battle of <fixed-case>NLP</fixed-case> Ideas - ManuelaHürlimann + ManuelaHürlimann KatsiarynaMlynchyk PhilippKuntschik 248–248 diff --git a/data/xml/2024.tacl.xml b/data/xml/2024.tacl.xml index f1a1fd912c..3b0da153f3 100644 --- a/data/xml/2024.tacl.xml +++ b/data/xml/2024.tacl.xml @@ -40,8 +40,8 @@ FaisalLadhak EsinDurmus PercyLiang - KathleenMcKeown - Tatsunori B.Hashimoto + KathleenMcKeown + Tatsunori B.Hashimoto 10.1162/tacl_a_00632 Large language models (LLMs) have shown promise for automatic summarization but the reasons behind their successes are poorly understood. By conducting a human evaluation on ten LLMs across different pretraining methods, prompts, and model scales, we make two important observations. First, we find instruction tuning, not model size, is the key to the LLM’s zero-shot summarization capability. Second, existing studies have been limited by low-quality references, leading to underestimates of human performance and lower few-shot and finetuning performance. To better evaluate LLMs, we perform human evaluation over high-quality summaries we collect from freelance writers. Despite major stylistic differences such as the amount of paraphrasing, we find that LLM summaries are judged to be on par with human written summaries. 39–57 @@ -96,7 +96,7 @@ Addressing the Binning Problem in Calibration Assessment through Scalar Annotations - ZhengpingJiang + ZhengpingJiang AnqiLiu BenjaminVan Durme 10.1162/tacl_a_00636 @@ -125,7 +125,7 @@ Lost in the Middle: How Language Models Use Long Contexts - Nelson F.Liu + Nelson F.Liu KevinLin JohnHewitt AshwinParanjape @@ -215,7 +215,7 @@ Explicitly Representing Syntax Improves Sentence-to-Layout Prediction of Unexpected Situations WolfNuyts RubenCartuyvels - Marie-FrancineMoens + Marie-FrancineMoens 10.1162/tacl_a_00643 Recognizing visual entities in a natural language sentence and arranging them in a 2D spatial layout require a compositional understanding of language and space. This task of layout prediction is valuable in text-to-image synthesis as it allows localized and controlled in-painting of the image. In this comparative study it is shown that we can predict layouts from language representations that implicitly or explicitly encode sentence syntax, if the sentences mention similar entity-relationships to the ones seen during training. To test compositional understanding, we collect a test set of grammatically correct sentences and layouts describing compositions of entities and relations that unlikely have been seen during training. Performance on this test set substantially drops, showing that current models rely on correlations in the training data and have difficulties in understanding the structure of the input sentences. We propose a novel structural loss function that better enforces the syntactic structure of the input sentence and show large performance gains in the task of 2D spatial layout prediction conditioned on text. The loss has the potential to be used in other generation tasks where a tree-like structure underlies the conditioning modality. Code, trained models, and the USCOCO evaluation set are available via Github.1 264–282 @@ -239,7 +239,7 @@ The Impact of Word Splitting on the Semantic Content of Contextualized Word Representations - Aina GaríSoler + Aina GaríSoler MatthieuLabeau ChloéClavel 10.1162/tacl_a_00647 @@ -301,7 +301,7 @@ LukasEdman GabrieleSarti AntonioToral - Gertjanvan Noord + Gertjanvan Noord AriannaBisazza 10.1162/tacl_a_00651 Pretrained character-level and byte-level language models have been shown to be competitive with popular subword models across a range of Natural Language Processing tasks. However, there has been little research on their effectiveness for neural machine translation (NMT), particularly within the popular pretrain-then-finetune paradigm. This work performs an extensive comparison across multiple languages and experimental conditions of character- and subword-level pretrained models (ByT5 and mT5, respectively) on NMT. We show the effectiveness of character-level modeling in translation, particularly in cases where fine-tuning data is limited. In our analysis, we show how character models’ gains in translation quality are reflected in better translations of orthographically similar words and rare words. While evaluating the importance of source texts in driving model predictions, we highlight word-level patterns within ByT5, suggesting an ability to modulate word-level and character-level information during generation. We conclude by assessing the efficiency tradeoff of byte models, suggesting their usage in non-time-critical scenarios to boost translation quality. @@ -314,8 +314,8 @@ ValentinHofmann GoranGlavaš NikolaLjubešić - Janet B.Pierrehumbert - HinrichSchütze + Janet B.Pierrehumbert + HinrichSchütze 10.1162/tacl_a_00652 While pretrained language models (PLMs) have been shown to possess a plethora of linguistic knowledge, the existing body of research has largely neglected extralinguistic knowledge, which is generally difficult to obtain by pretraining on text alone. Here, we contribute to closing this gap by examining geolinguistic knowledge, i.e., knowledge about geographic variation in language. We introduce geoadaptation, an intermediate training step that couples language modeling with geolocation prediction in a multi-task learning setup. We geoadapt four PLMs, covering language groups from three geographic areas, and evaluate them on five different tasks: fine-tuned (i.e., supervised) geolocation prediction, zero-shot (i.e., unsupervised) geolocation prediction, fine-tuned language identification, zero-shot language identification, and zero-shot prediction of dialect features. Geoadaptation is very successful at injecting geolinguistic knowledge into the PLMs: The geoadapted PLMs consistently outperform PLMs adapted using only language modeling (by especially wide margins on zero-shot prediction tasks), and we obtain new state-of-the-art results on two benchmarks for geolocation prediction and language identification. Furthermore, we show that the effectiveness of geoadaptation stems from its ability to geographically retrofit the representation space of the PLMs. 411–431 @@ -347,7 +347,7 @@ <fixed-case>C</fixed-case>onvo<fixed-case>S</fixed-case>ense: Overcoming Monotonous Commonsense Inferences for Conversational <fixed-case>AI</fixed-case> Sarah E.Finch - Jinho D.Choi + Jinho D.Choi 10.1162/tacl_a_00659 Mastering commonsense understanding and reasoning is a pivotal skill essential for conducting engaging conversations. While there have been several attempts to create datasets that facilitate commonsense inferences in dialogue contexts, existing datasets tend to lack in-depth details, restate information already present in the conversation, and often fail to capture the multifaceted nature of commonsense reasoning. In response to these limitations, we compile a new synthetic dataset for commonsense reasoning in dialogue contexts using GPT, ℂonvoSense, that boasts greater contextual novelty, offers a higher volume of inferences per example, and substantially enriches the detail conveyed by the inferences. Our dataset contains over 500,000 inferences across 12,000 dialogues with 10 popular inference types, which empowers the training of generative commonsense models for dialogue that are superior in producing plausible inferences with high novelty when compared to models trained on the previous datasets. To the best of our knowledge, ℂonvoSense is the first of its kind to provide such a multitude of novel inferences at such a large scale. 467–483 @@ -374,7 +374,7 @@ JiseonKim NayeonLee HaneulYoo - AliceOh + AliceOh HwaranLee 10.1162/tacl_a_00661 Warning: This paper contains examples of stereotypes and biases. The Bias Benchmark for Question Answering (BBQ) is designed to evaluate social biases of language models (LMs), but it is not simple to adapt this benchmark to cultural contexts other than the US because social biases depend heavily on the cultural context. In this paper, we present KoBBQ, a Korean bias benchmark dataset, and we propose a general framework that addresses considerations for cultural adaptation of a dataset. Our framework includes partitioning the BBQ dataset into three classes—Simply-Transferred (can be used directly after cultural translation), Target-Modified (requires localization in target groups), and Sample-Removed (does not fit Korean culture)—and adding four new categories of bias specific to Korean culture. We conduct a large-scale survey to collect and validate the social biases and the targets of the biases that reflect the stereotypes in Korean culture. The resulting KoBBQ dataset comprises 268 templates and 76,048 samples across 12 categories of social bias. We use KoBBQ to measure the accuracy and bias scores of several state-of-the-art multilingual LMs. The results clearly show differences in the bias of LMs as measured by KoBBQ and a machine-translated version of BBQ, demonstrating the need for and utility of a well-constructed, culturally aware social bias benchmark. @@ -427,7 +427,7 @@ HaoZhou ShujianHuang ShanboCheng - JiajunChen + JiajunChen 10.1162/tacl_a_00655 Large-scale pretrained language models (LLMs), such as ChatGPT and GPT4, have shown strong abilities in multilingual translation, without being explicitly trained on parallel corpora. It is intriguing how the LLMs obtain their ability to carry out translation instructions for different languages. In this paper, we present a detailed analysis by finetuning a multilingual pretrained language model, XGLM-7.5B, to perform multilingual translation following given instructions. Firstly, we show that multilingual LLMs have stronger translation abilities than previously demonstrated. For a certain language, the translation performance depends on its similarity to English and the amount of data used in the pretraining phase. Secondly, we find that LLMs’ ability to carry out translation instructions relies on the understanding of translation instructions and the alignment among different languages. With multilingual finetuning with translation instructions, LLMs could learn to perform the translation task well even for those language pairs unseen during the instruction tuning phase. 576–592 @@ -437,7 +437,7 @@ Semantics of Multiword Expressions in Transformer-Based Models: A Survey FilipMiletić - SabineSchulte im Walde + SabineSchulte im Walde 10.1162/tacl_a_00657 Multiword expressions (MWEs) are composed of multiple words and exhibit variable degrees of compositionality. As such, their meanings are notoriously difficult to model, and it is unclear to what extent this issue affects transformer architectures. Addressing this gap, we provide the first in-depth survey of MWE processing with transformer models. We overall find that they capture MWE semantics inconsistently, as shown by reliance on surface patterns and memorized information. MWE meaning is also strongly localized, predominantly in early layers of the architecture. Representations benefit from specific linguistic properties, such as lower semantic idiosyncrasy and ambiguity of target expressions. Our findings overall question the ability of transformer models to robustly capture fine-grained semantics. Furthermore, we highlight the need for more directly comparable evaluation setups. 593–612 @@ -471,7 +471,7 @@ Computational Complexity of Natural Morphology Revisited HajimeSenuma - AkikoAizawa + AkikoAizawa 10.1162/tacl_a_00665 This paper revisits a classical, yet fundamental, discussion of theoretical computational linguistics: the computational complexity of natural languages. Past studies have revealed that syntax, as observed in Swiss-German, is not weakly context-free. Concerning morphology, Culy (1985) employed a construction in Bambara to show that morphology is not weakly context-free; however, Manaster-Ramer (1988) pointed out that the Bambara case can be problematic because the wordhood of the construction is reliant on special tonal behaviors, and it is ambiguous whether the behaviors belong to the morphological domain. This raises doubts about whether the case can be considered a genuine morphological phenomenon. In this paper, we argue that Classical Ainu, a language we examine, also defies weak context-freeness at the morphological level. The construction we introduce is unambiguously morphological because this language’s valency-sensitive structure and valency-changing operations, such as noun incorporation, preclude its grammatical interpretation as syntactic. 649–663 @@ -637,7 +637,7 @@ JessyLin NicholasTomlin JacobAndreas - JasonEisner + JasonEisner 10.1162/tacl_a_00679 We describe a class of tasks called decision-oriented dialogues, in which AI assistants such as large language models (LMs) must collaborate with one or more humans via natural language to help them make complex decisions. We formalize three domains in which users face everyday decisions: (1) choosing an assignment of reviewers to conference papers, (2) planning a multi-step itinerary in a city, and (3) negotiating travel plans for a group of friends. In each of these settings, AI assistants and users have disparate abilities that they must combine to arrive at the best decision: Assistants can access and process large amounts of information, while users have preferences and constraints external to the system. For each task, we build a dialogue environment where agents receive a reward based on the quality of the final decision they reach. We evaluate LMs in self-play and in collaboration with humans and find that they fall short compared to human assistants, achieving much lower rewards despite engaging in longer dialogues. We highlight a number of challenges models face in decision-oriented dialogues, ranging from goal-directed behavior to reasoning and optimization, and release our environments as a testbed for future work. 892–911 @@ -689,7 +689,7 @@ Miryam deLhoneux DanielHershcovich MichelDeGraff - AndersSøgaard + AndersSøgaard JohannesBjerva 10.1162/tacl_a_00682 Creoles represent an under-explored and marginalized group of languages, with few available resources for NLP research. While the genealogical ties between Creoles and a number of highly resourced languages imply a significant potential for transfer learning, this potential is hampered due to this lack of annotated data. In this work we present CreoleVal, a collection of benchmark datasets spanning 8 different NLP tasks, covering up to 28 Creole languages; it is an aggregate of novel development datasets for reading comprehension relation classification, and machine translation for Creoles, in addition to a practical gateway to a handful of preexisting benchmarks. For each benchmark, we conduct baseline experiments in a zero-shot setting in order to further ascertain the capabilities and limitations of transfer learning for Creoles. Ultimately, we see CreoleVal as an opportunity to empower research on Creoles in NLP and computational linguistics, and in general, a step towards more equitable language technology around the globe. @@ -702,9 +702,9 @@ Nuno M.Guerreiro RicardoRei Daan vanStigt - LuisaCoheur + LuisaCoheur PierreColombo - André F. T.Martins + André F. T.Martins 10.1162/tacl_a_00683 Widely used learned metrics for machine translation evaluation, such as Comet and Bleurt, estimate the quality of a translation hypothesis by providing a single sentence-level score. As such, they offer little insight into translation errors (e.g., what are the errors and what is their severity). On the other hand, generative large language models (LLMs) are amplifying the adoption of more granular strategies to evaluation, attempting to detail and categorize translation errors. In this work, we introduce xcomet, an open-source learned metric designed to bridge the gap between these approaches. xcomet integrates both sentence-level evaluation and error span detection capabilities, exhibiting state-of-the-art performance across all types of evaluation (sentence-level, system-level, and error span detection). Moreover, it does so while highlighting and categorizing error spans, thus enriching the quality assessment. We also provide a robustness analysis with stress tests, and show that xcomet is largely capable of identifying localized critical errors and hallucinations. 979–995 @@ -717,7 +717,7 @@ QiongkaiXu JunWang Benjamin I. P.Rubinstein - TrevorCohn + TrevorCohn 10.1162/tacl_a_00684 Modern NLP models are often trained on public datasets drawn from diverse sources, rendering them vulnerable to data poisoning attacks. These attacks can manipulate the model’s behavior in ways engineered by the attacker. One such tactic involves the implantation of backdoors, achieved by poisoning specific training instances with a textual trigger and a target class label. Several strategies have been proposed to mitigate the risks associated with backdoor attacks by identifying and removing suspected poisoned examples. However, we observe that these strategies fail to offer effective protection against several advanced backdoor attacks. To remedy this deficiency, we propose a novel defensive mechanism that first exploits training dynamics to identify poisoned samples with high precision, followed by a label propagation step to improve recall and thus remove the majority of poisoned instances. Compared with recent advanced defense methods, our method considerably reduces the success rates of several backdoor attacks while maintaining high classification accuracy on clean test sets. 996–1010 @@ -755,8 +755,8 @@ Do Multi-Document Summarization Models Synthesize? JayDeYoung Stephanie C.Martinez - Iain J.Marshall - Byron C.Wallace + Iain J.Marshall + Byron C.Wallace 10.1162/tacl_a_00687 Multi-document summarization entails producing concise synopses of collections of inputs. For some applications, the synopsis should accurately synthesize inputs with respect to a key aspect, e.g., a synopsis of film reviews written about a particular movie should reflect the average critic consensus. As a more consequential example, narrative summaries that accompany biomedical systematic reviews of clinical trial results should accurately summarize the potentially conflicting results from individual trials. In this paper we ask: To what extent do modern multi-document summarization models implicitly perform this sort of synthesis? We run experiments over opinion and evidence synthesis datasets using a suite of summarization models, from fine-tuned transformers to GPT-4. We find that existing models partially perform synthesis, but imperfectly: Even the best performing models are over-sensitive to changes in input ordering and under-sensitive to changes in input compositions (e.g., ratio of positive to negative reviews). We propose a simple, general, effective method for improving model synthesis capabilities by generating an explicitly diverse set of candidate outputs, and then selecting from these the string best aligned with the expected aggregate measure for the inputs, or abstaining when the model produces no good candidate. 1043–1062 @@ -791,7 +791,7 @@ JorgeIranzo-Sánchez AdriàGiménez JorgeCivera - AlfonsJuan + AlfonsJuan 10.1162/tacl_a_00691 Streaming Machine Translation (MT) is the task of translating an unbounded input text stream in real-time. The traditional cascade approach, which combines an Automatic Speech Recognition (ASR) and an MT system, relies on an intermediate segmentation step which splits the transcription stream into sentence-like units. However, the incorporation of a hard segmentation constrains the MT system and is a source of errors. This paper proposes a Segmentation-Free framework that enables the model to translate an unsegmented source stream by delaying the segmentation decision until after the translation has been generated. Extensive experiments show how the proposed Segmentation-Free framework has better quality-latency trade-off than competing approaches that use an independent segmentation model.1 1104–1121 @@ -801,7 +801,7 @@ Do Language Models Enjoy Their Own Stories? Prompting Large Language Models for Automatic Story Evaluation CyrilChhun - Fabian M.Suchanek + Fabian M.Suchanek ChloéClavel 10.1162/tacl_a_00689 Storytelling is an integral part of human experience and plays a crucial role in social interactions. Thus, Automatic Story Evaluation (ASE) and Generation (ASG) could benefit society in multiple ways, but they are challenging tasks which require high-level human abilities such as creativity, reasoning, and deep understanding. Meanwhile, Large Language Models (LLMs) now achieve state-of-the-art performance on many NLP tasks. In this paper, we study whether LLMs can be used as substitutes for human annotators for ASE. We perform an extensive analysis of the correlations between LLM ratings, other automatic measures, and human annotations, and we explore the influence of prompting on the results and the explainability of LLM behaviour. Most notably, we find that LLMs outperform current automatic measures for system-level evaluation but still struggle at providing satisfactory explanations for their answers. @@ -875,7 +875,7 @@ JiaangLi YovaKementchedjhieva ConstanzaFierro - AndersSøgaard + AndersSøgaard 10.1162/tacl_a_00698 Large-scale pretrained language models (LMs) are said to “lack the ability to connect utterances to the world” (Bender and Koller, 2020), because they do not have “mental models of the world” (Mitchell and Krakauer, 2023). If so, one would expect LM representations to be unrelated to representations induced by vision models. We present an empirical evaluation across four families of LMs (BERT, GPT-2, OPT, and LLaMA-2) and three vision model architectures (ResNet, SegFormer, and MAE). Our experiments show that LMs partially converge towards representations isomorphic to those of vision models, subject to dispersion, polysemy, and frequency. This has important implications for both multi-modal processing and the LM understanding debate (Mitchell and Krakauer, 2023).1 1232–1249 @@ -888,7 +888,7 @@ AminFarajian PatrickFernandes RicardoRei - André F. T.Martins + André F. T.Martins 10.1162/tacl_a_00700 Despite the recent success of automatic metrics for assessing translation quality, their application in evaluating the quality of machine-translated chats has been limited. Unlike more structured texts like news, chat conversations are often unstructured, short, and heavily reliant on contextual information. This poses questions about the reliability of existing sentence-level metrics in this domain as well as the role of context in assessing the translation quality. Motivated by this, we conduct a meta-evaluation of existing automatic metrics, primarily designed for structured domains such as news, to assess the quality of machine-translated chats. We find that reference-free metrics lag behind reference-based ones, especially when evaluating translation quality in out-of-English settings. We then investigate how incorporating conversational contextual information in these metrics for sentence-level evaluation affects their performance. Our findings show that augmenting neural learned metrics with contextual information helps improve correlation with human judgments in the reference-free scenario and when evaluating translations in out-of-English settings. Finally, we propose a new evaluation metric, Context-MQM, that utilizes bilingual context with a large language model (LLM) and further validate that adding context helps even for LLM-based evaluation metrics. 1250–1267 @@ -912,7 +912,7 @@ MelanieSubbiah SeanZhang Lydia B.Chilton - KathleenMcKeown + KathleenMcKeown 10.1162/tacl_a_00702 We evaluate recent Large Language Models (LLMs) on the challenging task of summarizing short stories, which can be lengthy, and include nuanced subtext or scrambled timelines. Importantly, we work directly with authors to ensure that the stories have not been shared online (and therefore are unseen by the models), and to obtain informed evaluations of summary quality using judgments from the authors themselves. Through quantitative and qualitative analysis grounded in narrative theory, we compare GPT-4, Claude-2.1, and LLama-2-70B. We find that all three models make faithfulness mistakes in over 50% of summaries and struggle with specificity and interpretation of difficult subtext. We additionally demonstrate that LLM ratings and other automatic metrics for summary quality do not correlate well with the quality ratings from the writers. 1290–1310 @@ -932,9 +932,9 @@ YeLiu SemihYavuz CaimingXiong - ShafiqJoty + ShafiqJoty YingboZhou - DragomirRadev + DragomirRadev ArmanCohan ArmanCohan 10.1162/tacl_a_00705 @@ -982,7 +982,7 @@ NeeleFalk AnaBarić DmitryNikolaev - SebastianPadó + SebastianPadó 10.1162/tacl_a_00710 Due to the widespread use of large language models (LLMs), we need to understand whether they embed a specific “worldview” and what these views reflect. Recent studies report that, prompted with political questionnaires, LLMs show left-liberal leanings (Feng et al., 2023; Motoki et al., 2024). However, it is as yet unclear whether these leanings are reliable (robust to prompt variations) and whether the leaning is consistent across policies and political leaning. We propose a series of tests which assess the reliability and consistency of LLMs’ stances on political statements based on a dataset of voting-advice questionnaires collected from seven EU countries and annotated for policy issues. We study LLMs ranging in size from 7B to 70B parameters and find that their reliability increases with parameter count. Larger models show overall stronger alignment with left-leaning parties but differ among policy programs: They show a (left-wing) positive stance towards environment protection, social welfare state, and liberal society but also (right-wing) law and order, with no consistent preferences in the areas of foreign policy and migration. 1378–1400 @@ -1031,7 +1031,7 @@ Conformalizing Machine Translation Evaluation ChrysoulaZerva - André F. T.Martins + André F. T.Martins 10.1162/tacl_a_00711 Several uncertainty estimation methods have been recently proposed for machine translation evaluation. While these methods can provide a useful indication of when not to trust model predictions, we show in this paper that the majority of them tend to underestimate model uncertainty, and as a result, they often produce misleading confidence intervals that do not cover the ground truth. We propose as an alternative the use of conformal prediction, a distribution-free method to obtain confidence intervals with a theoretically established guarantee on coverage. First, we demonstrate that split conformal prediction can “correct” the confidence intervals of previous methods to yield a desired coverage level, and we demonstrate these findings across multiple machine translation evaluation metrics and uncertainty quantification methods. Further, we highlight biases in estimated confidence intervals, reflected in imbalanced coverage for different attributes, such as the language and the quality of translations. We address this by applying conditional conformal prediction techniques to obtain calibration subsets for each data subgroup, leading to equalized coverage. Overall, we show that, provided access to a calibration set, conformal prediction can help identify the most suitable uncertainty quantification methods and adapt the predicted confidence intervals to ensure fairness with respect to different attributes.1 1460–1478 @@ -1043,7 +1043,7 @@ Zheng WeiLim EkaterinaVylomova CharlesKemp - TrevorCohn + TrevorCohn 10.1162/tacl_a_00714 Human translators linger on some words and phrases more than others, and predicting this variation is a step towards explaining the underlying cognitive processes. Using data from the CRITT Translation Process Research Database, we evaluate the extent to which surprisal and attentional features derived from a Neural Machine Translation (NMT) model account for reading and production times of human translators. We find that surprisal and attention are complementary predictors of translation difficulty, and that surprisal derived from a NMT model is the single most successful predictor of production duration. Our analyses draw on data from hundreds of translators operating across 13 language pairs, and represent the most comprehensive investigation of human translation difficulty to date. 1479–1496 @@ -1056,7 +1056,7 @@ AntónioFarinhas ChrysoulaZerva Mário A. T.Figueiredo - André F. T.Martins + André F. T.Martins 10.1162/tacl_a_00715 The rapid proliferation of large language models and natural language processing (NLP) applications creates a crucial need for uncertainty quantification to mitigate risks such as Hallucinations and to enhance decision-making reliability in critical applications. Conformal prediction is emerging as a theoretically sound and practically useful framework, combining flexibility with strong statistical guarantees. Its model-agnostic and distribution-free nature makes it particularly promising to address the current shortcomings of NLP systems that stem from the absence of uncertainty quantification. This paper provides a comprehensive survey of conformal prediction techniques, their guarantees, and existing applications in NLP, pointing to directions for future research and open challenges. 1497–1516 @@ -1177,7 +1177,7 @@ FajriKoto RahmadMahendra NurulAisyah - TimothyBaldwin + TimothyBaldwin 10.1162/tacl_a_00726 Although commonsense reasoning is greatly shaped by cultural and geographical factors, previous studies have predominantly centered on cultures grounded in the English language, potentially resulting in an Anglocentric bias. In this paper, we introduce IndoCulture, aimed at understanding the influence of geographical factors on language model reasoning ability, with a specific emphasis on the diverse cultures found within eleven Indonesian provinces. In contrast to prior work that has relied on templates (Yin et al., 2022) and online scrapping (Fung et al., 2024), we create IndoCulture by asking local people to manually develop a cultural context and plausible options, across a set of predefined topics. Evaluation of 27 language models reveals several insights: (1) the open-weight Llama–3 is competitive with GPT–4, while other open-weight models struggle, with accuracies below 50%; (2) there is a general pattern of models generally performing better for some provinces, such as Bali and West Java, and less well for others; and (3) the inclusion of location context enhances performance, especially for larger models like GPT–4, emphasizing the significance of geographical context in commonsense reasoning.1 1703–1719 diff --git a/data/xml/2024.tdle.xml b/data/xml/2024.tdle.xml index a371136cc3..6e7d9d2727 100644 --- a/data/xml/2024.tdle.xml +++ b/data/xml/2024.tdle.xml @@ -8,9 +8,9 @@ ItziarAldabe AritzFarwell BegonaAltuna - SteliosPiperidis + SteliosPiperidis GeorgRehm - GermanRigau + GermanRigau ELRA and ICCL
Torino, Italia
May @@ -48,8 +48,8 @@
Fine-Tuning Open Access <fixed-case>LLM</fixed-case>s for High-Precision <fixed-case>NLU</fixed-case> in Goal-Driven Dialog Systems - LluísPadró - RoserSaurí + LluísPadró + RoserSaurí 33–42 This paper presents a set of experiments on fine-tuning LLMs to produce high-precision semantic representations for the NLU component of a dialog system front-end. The aim of this research is threefold: First, we want to explore the capabilities of LLMs on real, industry-based use cases that involve complex data and strict requirements on results. Since the LLM output should usable by the application back-end, the produced semantic representation must satisfy strict format and consistency requirements. Second, we want to evaluate the cost-benefit of open-source LLMs, that is, the feasibility of running this kind of models in machines affordable to small-medium enterprises (SMEs), in order to assess how far this organizations can go without depending on the large players controlling the market, and with a moderate use of computation resources. Finally, we also want to assess the language scalability of the LLMs in this kind of applications; specifically, whether a multilingual model is able to cast patterns learnt from one language to other ones –with special attention to underresourced languages–, thus reducing required training data and computation costs. This work was carried out within an R&D context of assisting a real company in defining its NLU model strategy, and thus the results have a practical, industry-level focus. 2024.tdle-1.3 @@ -61,7 +61,7 @@ LuckySusanto ZiluTang AyuPurwarianti - Derry TantiWijaya + Derry TantiWijaya 43–52 Large Language Models (LLMs) demonstrate strong machine translation capabilities on languages they are trained on. However, the impact of factors beyond training data size on translation performance remains a topic of debate, especially concerning languages not directly encountered during training. Our study delves into Llama2’s translation capabilities. By modeling a linear relationship between linguistic feature distances and machine translation scores, we ask ourselves if there are potentially better central languages for LLMs other than English. Our experiments show that the 7B Llama2 model yields above 10 BLEU when translating into all languages it has seen, which rarely happens for languages it has not seen. Most translation improvements into unseen languages come from scaling up the model size rather than instruction tuning or increasing shot count. Furthermore, our correlation analysis reveals that syntactic similarity is not the only linguistic factor that strongly correlates with machine translation scores. Interestingly, we discovered that under specific circumstances, some languages (e.g. Swedish, Catalan), despite having significantly less training data, exhibit comparable correlation levels to English. These insights challenge the prevailing landscape of LLMs, suggesting that models centered around languages other than English could provide a more efficient foundation for multilingual applications. 2024.tdle-1.4 diff --git a/data/xml/2024.teachingnlp.xml b/data/xml/2024.teachingnlp.xml index ab44c9c988..802bc48131 100644 --- a/data/xml/2024.teachingnlp.xml +++ b/data/xml/2024.teachingnlp.xml @@ -55,7 +55,7 @@ Striking a Balance between Classical and Deep Learning Approaches in Natural Language Processing Pedagogy AdityaJoshiUNSW JakeRenzella - PushpakBhattacharyyaIndian Institute of Technology, Bombay, Dhirubhai Ambani Institute Of Information and Communication Technology + PushpakBhattacharyyaIndian Institute of Technology, Bombay, Dhirubhai Ambani Institute Of Information and Communication Technology SauravJha XiangyuZhang 23-32 @@ -65,7 +65,7 @@ Co-Creational Teaching of Natural Language Processing - JohnMcCraeNational University of Ireland Galway + JohnMcCraeNational University of Ireland Galway 33-42 Traditional lectures have poorer outcomes compared to active learning methodologies, yet many natural language processing classes in higher education still follow this outdated methodology. In this paper, we present, co-creational teaching, a methodology that encourages partnership between staff and lecturers and show how this can be applied to teach natural language processing. As a fast-moving and dynamic area of study with high interest from students, natural language processing is an ideal subject for innovative teaching methodologies to improve student outcomes. We detail our experience with teaching natural language processing through partnership with students and provide detailed descriptions of methodologies that can be used by others in their teaching, including considerations of diverse student populations. 2024.teachingnlp-1.5 @@ -82,7 +82,7 @@ BerndBischlLMU BenjaminRothUniversität Vienna ChristianHeumannLudwig-Maximilians-Universität München - HinrichSchütze + HinrichSchütze 43-53 In this work, we present a collaboratively and continuously developed open-source educational resource (OSER) for teaching natural language processing at two different universities. We shed light on the principles we followed for the initial design of the course and the rationale for ongoing developments, followed by a reflection on the inter-university collaboration for designing and maintaining teaching material. When reflecting on the latter, we explicitly emphasize the considerations that need to be made when facing heterogeneous groups and when having to accommodate multiple examination regulations within one single course framework. Relying on the fundamental principles of OSER developments as defined by Bothmann et al. (2023) proved to be an important guideline during this process. The final part pertains to open-sourcing our teaching material, coping with the increasing speed of developments in the field, and integrating the course digitally, also addressing conflicting priorities and challenges we are currently facing. 2024.teachingnlp-1.6 @@ -112,7 +112,7 @@ Teaching <fixed-case>LLM</fixed-case>s at <fixed-case>C</fixed-case>harles <fixed-case>U</fixed-case>niversity: Assignments and Activities - JindřichHelclEdinburgh University, University of Edinburgh + JindřichHelclEdinburgh University, University of Edinburgh ZdeněkKasner OndřejDušekCharles University, Prague TomaszLimisiewiczCharles University Prague diff --git a/data/xml/2024.teicai.xml b/data/xml/2024.teicai.xml index 8d8f965eca..3ebaa161b8 100644 --- a/data/xml/2024.teicai.xml +++ b/data/xml/2024.teicai.xml @@ -69,7 +69,7 @@ LauraDe Grazia AlexPeiró Lilja MireiaFarrús Cabeceran - MarionaTaulé + MarionaTaulé 28-35 This paper investigates the appropriate responses that Conversational Agent systems (CAs) should employ when subjected to sexual harassment by users. Previous studies indicate that conventional CAs often respond neutrally or evade such requests. Enhancing the responsiveness of CAs to offensive speech is crucial, as users might carry over these interactions into their social interactions. To address this issue, we selected evaluators to compare a series of responses to sexual harassment from four commercial CAs (Amazon Alexa, Apple Siri, Google Home, and Microsoft Cortana) with alternative responses we realized based on insights from psychological and sociological studies. Focusing on CAs with a female voice, given their increased likelihood of encountering offensive language, we conducted two experiments involving 22 evaluators (11 females and 11 males). In the initial experiment, participants assessed the responses in a textual format, while the second experiment involved the evaluation of responses generated with a synthetic voice exhibiting three different intonations (angry, neutral, and assertive). Results from the first experiment revealed a general preference for the responses we formulated. For the most voted replies, female evaluators exhibited a tendency towards responses with an assertive intent, emphasizing the sexually harassing nature of the request. Conversely, male evaluators leaned towards a more neutral response, aligning with prior findings that highlight gender-based differences in the perception of sexual harassment. The second experiment underscored a preference for assertive responses. The study’s outcomes highlight the need to develop new, educational responses from CAs to instances of sexual harassment, aiming to discourage harmful behavior. 2024.teicai-1.5 diff --git a/data/xml/2024.textgraphs.xml b/data/xml/2024.textgraphs.xml index 7b7b8f9dd9..56893deb00 100644 --- a/data/xml/2024.textgraphs.xml +++ b/data/xml/2024.textgraphs.xml @@ -30,7 +30,7 @@ OanaIgnat SantiagoCastroUniversity of Michigan WeijiLiTesla - RadaMihalceaUniversity of Michigan + RadaMihalceaUniversity of Michigan 1-18 We address the task of human action representation and show how the approach to generating word representations based on co-occurrence can be adapted to generate human action representations by analyzing their co-occurrence in videos. To this end, we formalize the new task of human action co-occurrence identification in online videos, i.e., determine whether two human actions are likely to co-occur in the same interval of time.We create and make publicly available the Co-Act (Action Co-occurrence) dataset, consisting of a large graph of ~12k co-occurring pairs of visual actions and their corresponding video clips. We describe graph link prediction models that leverage visual and textual information to automatically infer if two actions are co-occurring.We show that graphs are particularly well suited to capture relations between human actions, and the learned graph representations are effective for our task and capture novel and relevant information across different data domains. 2024.textgraphs-1.1 @@ -43,7 +43,7 @@ SuyashFulayMassachusetts Institute of Technology HangJiang BrandonRoyMassachusetts Institute of Technology and Brown University - DebRoyMassachusetts Institute of Technology + DebRoyMassachusetts Institute of Technology JadKabbaraMassachusetts Institute of Technology 19-39 Learning on text-attributed graphs (TAGs), in which nodes are associated with one or more texts, has been the subject of much recent work. However, most approaches tend to make strong assumptions about the downstream task of interest, are reliant on hand-labeled data, or fail to equally balance the importance of both text and graph representations. In this work, we propose Contrastive Graph-Text pretraining (ConGraT), a general, self-supervised approach for jointly learning separate representations of texts and nodes in a TAG. Our method trains a language model (LM) and a graph neural network (GNN) to align their representations in a common latent space using a batch-wise contrastive learning objective inspired by CLIP. We further propose an extension to the CLIP objective that leverages graph structure to incorporate information about inter-node similarity. Extensive experiments demonstrate that ConGraT outperforms baselines on various downstream tasks, including node and text category classification, link prediction, and language modeling. Finally, we present an application of our method to community detection in social graphs, which enables finding more textually grounded communities, rather than purely graph-based ones. @@ -75,7 +75,7 @@ Prompt Me One More Time: A Two-Step Knowledge Extraction Pipeline with Ontology-Based Verification AllaChepurova - YuriKuratovAIRI, Artificial Intelligence Research Institute and Moscow Institute of Physics and Technology + YuriKuratovAIRI, Artificial Intelligence Research Institute and Moscow Institute of Physics and Technology AydarBulatovMoscow Institute of Physics and Technology MikhailBurtsevLondon Institute for Mathematical Sciences 61-77 @@ -87,7 +87,7 @@ Towards Understanding Attention-based Reasoning through Graph Structures in Medical Codes Classification NoonGoldstein SaadullahAmin - GünterNeumannGerman Research Center for AI + GünterNeumannGerman Research Center for AI 78-92 A common approach to automatically assigning diagnostic and procedural clinical codes to health records is to solve the task as a multi-label classification problem. Difficulties associated with this task stem from domain knowledge requirements, long document texts, large and imbalanced label space, reflecting the breadth and dependencies between medical diagnoses and procedures. Decisions in the healthcare domain also need to demonstrate sound reasoning, both when they are correct and when they are erroneous. Existing works address some of these challenges by incorporating external knowledge, which can be encoded into a graph-structured format. Incorporating graph structures on the output label space or between the input document and output label spaces have shown promising results in medical codes classification. Limited focus has been put on utilizing graph-based representation on the input document space. To partially bridge this gap, we represent clinical texts as graph-structured data through the UMLS Metathesaurus; we explore implicit graph representation through pre-trained knowledge graph embeddings and explicit domain-knowledge guided encoding of document concepts and relational information through graph neural networks. Our findings highlight the benefits of pre-trained knowledge graph embeddings in understanding model’s attention-based reasoning. In contrast, transparent domain knowledge guidance in graph encoder approaches is overshadowed by performance loss. Our qualitative analysis identifies limitations that contribute to prediction errors. 2024.textgraphs-1.6 @@ -180,7 +180,7 @@ <fixed-case>NLP</fixed-case>eople at <fixed-case>T</fixed-case>ext<fixed-case>G</fixed-case>raphs-17 Shared Task: Chain of Thought Questioning to Elicit Decompositional Reasoning MovinaMoses VishnudevKuruvanthodiInternational Business Machines - MohabElkarefInternational Business Machines + MohabElkarefInternational Business Machines ShinnosukeTanakaInternational Business Machines JamesBarry GeethMel diff --git a/data/xml/2024.tlt.xml b/data/xml/2024.tlt.xml index fcd275333a..eda692319c 100644 --- a/data/xml/2024.tlt.xml +++ b/data/xml/2024.tlt.xml @@ -5,7 +5,7 @@ Proceedings of the 22nd Workshop on Treebanks and Linguistic Theories (TLT 2024) DanielDakota SarahJablotschkin - SandraKübler + SandraKübler HeikeZinsmeister Association for Computational Linguistics
Hamburg,Germany
@@ -80,7 +80,7 @@ Introducing Shallow Syntactic Information within the Graph-based Dependency Parsing NikolayPaev - KirilSimov + KirilSimov PetyaOsenova 46-54 The paper presents a new BERT model, fine-tuned for parsing of Bulgarian texts. This model is extended with a new neural network layer in order to incorporate shallow syntactic information during the training phase. The results show statistically significant improvement over the baseline. Thus, the addition of syntactic knowledge - even partial - makes the model better. Also, some error analysis has been conducted on the results from the parsers. Although the architecture has been designed and tested for Bulgarian, it is also scalable for other languages. This scalability was shown here with some experiments and evaluation on an English treebank with a comparable size. diff --git a/data/xml/2024.trac.xml b/data/xml/2024.trac.xml index f2c8a1be86..94a8976294 100644 --- a/data/xml/2024.trac.xml +++ b/data/xml/2024.trac.xml @@ -4,8 +4,8 @@ Proceedings of the Fourth Workshop on Threat, Aggression & Cyberbullying @ LREC-COLING-2024 RiteshKumar - Atul Kr.Ojha - ShervinMalmasi + Atul Kr.Ojha + ShervinMalmasi Bharathi RajaChakravarthi BorniniLahiri SiddharthSingh @@ -67,7 +67,7 @@ Saatvik M.Krishna Soumya SangamJha Vartika T.Rao - Anand KumarM + Anand KumarM 32–36 The objective of the shared task, Offline Harm Potential Identification (HarmPot-ID), is to build models to predict the offline harm potential of social media texts. “Harm potential” is defined as the ability of an online post or comment to incite offline physical harm such as murder, arson, riot, rape, etc. The first subtask was to predict the level of harm potential, and the second was to identify the group to which this harm was directed towards. This paper details our submissions for the shared task that includes a cascaded SVM model, an XGBoost model, and a TF-IDF weighted Word2Vec embedding-supported SVM model. Several other models that were explored have also been detailed. 2024.trac-1.5 @@ -76,7 +76,7 @@ <fixed-case>LLM</fixed-case>-Based Synthetic Datasets: Applications and Limitations in Toxicity Detection MaximilianSchmidhuber - UdoKruschwitz + UdoKruschwitz 37–51 Large Language Model (LLM)-based Synthetic Data is becoming an increasingly important field of research. One of its promising applications is in training classifiers to detect online toxicity, which is of increasing concern in today’s digital landscape. In this work, we assess the feasibility of generative models to create synthetic data for toxic language detection. Our experiments are conducted on six different toxicity datasets, four of whom are hateful and two are toxic in the broader sense. We then employ a classifier trained on the original data for filtering. To explore the potential of this data, we conduct experiments using combinations of original and synthetic data, synthetic oversampling of the minority class, and a comparison of original vs. synthetic-only training. Results indicate that while our generative models offer benefits in certain scenarios, the approach does not improve hateful dataset classification. However, it does boost patronizing and condescending language detection. We find that synthetic data generated by LLMs is a promising avenue of research, but further research is needed to improve the quality of the generated data and develop better filtering methods. Code is available on GitHub; the generated dataset is available on Zenodo. 2024.trac-1.6 @@ -94,7 +94,7 @@ Analyzing Offensive Language and Hate Speech in Political Discourse: A Case Study of <fixed-case>G</fixed-case>erman Politicians MaximilianWeissenbacher - UdoKruschwitz + UdoKruschwitz 60–72 Social media platforms have become key players in political discourse. Twitter (now ‘X’), for example, is used by many German politicians to communicate their views and interact with others. Due to its nature, however, social networks suffer from a number of issues such as offensive content, toxic language and hate speech. This has attracted a lot of research interest but in the context of political discourse there is a noticeable gap with no such study specifically looking at German politicians in a systematic way. We aim to help addressing this gap. We first create an annotated dataset of 1,197 Twitter posts mentioning German politicians. This is the basis to explore a number of approaches to detect hate speech and offensive language (HOF) and identify an ensemble of transformer models that achieves an F1-Macros score of 0.94. This model is then used to automatically classify two much larger, longitudinal datasets: one with 520,000 tweets posted by MPs, and the other with 2,200,000 tweets which comprise posts from the public mentioning politicians. We obtain interesting insights in regards to the distribution of hate and offensive content when looking at different independent variables. 2024.trac-1.8 @@ -106,7 +106,7 @@ AnnikaSimonsen Atli SnærÁsmundsson Guðrún LiljaFriðjónsdóttir - Anton KarlIngason + Anton KarlIngason VésteinnSnæbjarnarson HafsteinnEinarsson 73–84 @@ -119,7 +119,7 @@ Melese AyichlieJigar Abinew AliAyele Seid MuhieYimam - ChrisBiemann + ChrisBiemann 85–95 In contemporary society, the proliferation of hate speech is increasingly prevalent across various social media platforms, with a notable trend of incorporating memes to amplify its visual impact and reach. The conventional text-based detection approaches frequently fail to address the complexities introduced by memes, thereby aggravating the challenges, particularly in low-resource languages such as Amharic. We develop Amharic meme hate speech detection models using 2,000 memes collected from Facebook, Twitter, and Telegram over four months. We employ native Amharic speakers to annotate each meme using a web-based tool, yielding a Fleiss’ kappa score of 0.50. We utilize different feature extraction techniques, namely VGG16 for images and word2Vec for textual content, and build unimodal and multimodal models such as LSTM, BiLSTM, and CNN. The BiLSTM model shows the best performance, achieving 63% accuracy for text and 75% for multimodal features. In image-only experiments, the CNN model achieves 69% in accuracy. Multimodal models demonstrate superior performance in detecting Amharic hate speech in memes, showcasing their potential to address the unique challenges posed by meme-based hate speech on social media. 2024.trac-1.10 @@ -176,7 +176,7 @@ PicaJohansson FrancescaStevens JonathanBright - Scott A.Hale + Scott A.Hale 134–154 Public figures receive disproportionate levels of abuse on social media, impacting their active participation in public life. Automated systems can identify abuse at scale but labelling training data is expensive and potentially harmful. So, it is desirable that systems are efficient and generalisable, handling shared and specific aspects of abuse. We explore the dynamics of cross-group text classification in order to understand how well models trained on one domain or demographic can transfer to others, with a view to building more generalisable abuse classifiers. We fine-tune language models to classify tweets targeted at public figures using our novel DoDo dataset, containing 28,000 entries with fine-grained labels, split equally across four Domain-Demographic pairs (male and female footballers and politicians). We find that (i) small amounts of diverse data are hugely beneficial to generalisation and adaptation; (ii) models transfer more easily across demographics but cross-domain models are more generalisable; (iii) some groups contribute more to generalisability than others; and (iv) dataset similarity is a signal of transferability. 2024.trac-1.15 @@ -191,7 +191,7 @@ SathyaBursic DavideTaibi DaviniaHernández-Leo - UdoKruschwitz + UdoKruschwitz DimitriOgnibene 155–166 Social media have become an integral part of our daily lives, yet they have also resulted in various negative effects on users, ranging from offensive or hateful content to the spread of misinformation. In recent years, numerous automated approaches have been proposed to identify and combat such harmful content. However, it is crucial to recognize the human aspect of users who engage with this content in designing efforts to mitigate these threats. We propose to incorporate principles of behavioral science, specifically the concept of nudging into social media platforms. Our approach involves augmenting social media feeds with informative diagrams, which provide insights into the content that users are presented. The goal of our work is to empower social media users to make well-informed decisions for themselves and for others within these platforms. Nudges serve as a means to gently draw users’ attention to content in an unintrusive manner, a crucial consideration in the context of social media. To evaluate the effectiveness of our approach, we conducted a user study involving 120 Italian-speaking participants who interacted with a social media interface augmented with these nudging diagrams. Participants who had used the augmented interface were able to outperform those using the plain interface in a successive harmful content detection test where nudging diagrams were not visible anymore. Our findings demonstrate that our approach significantly improves users’ awareness of potentially harmful content with effects lasting beyond the duration of the interaction. In this work, we provide a comprehensive overview of our experimental materials and setup, present our findings, and refer to the limitations identified during our study. @@ -204,7 +204,7 @@ Esubalew AlemnehJalew Adem ChanieAli Seid MuhieYimam - ChrisBiemann + ChrisBiemann 167–178 The prevalence of digital media and evolving sociopolitical dynamics have significantly amplified the dissemination of hateful content. Existing studies mainly focus on classifying texts into binary categories, often overlooking the continuous spectrum of offensiveness and hatefulness inherent in the text. In this research, we present an extensive benchmark dataset for Amharic, comprising 8,258 tweets annotated for three distinct tasks: category classification, identification of hate targets, and rating offensiveness and hatefulness intensities. Our study highlights that a considerable majority of tweets belong to the less offensive and less hate intensity levels, underscoring the need for early interventions by stakeholders. The prevalence of ethnic and political hatred targets, with significant overlaps in our dataset, emphasizes the complex relationships within Ethiopia’s sociopolitical landscape. We build classification and regression models and investigate the efficacy of models in handling these tasks. Our results reveal that hate and offensive speech can not be addressed by a simplistic binary classification, instead manifesting as variables across a continuous range of values. The afro-XLMR-large model exhibits the best performances achieving F1-scores of 75.30%, 70.59%, and 29.42% for the category, target, and regression tasks, respectively. The 80.22% correlation coefficient of the Afro-XLMR-large model indicates strong alignments. 2024.trac-1.17 diff --git a/data/xml/2024.trustnlp.xml b/data/xml/2024.trustnlp.xml index 441c12defb..e7b526ff04 100644 --- a/data/xml/2024.trustnlp.xml +++ b/data/xml/2024.trustnlp.xml @@ -84,7 +84,7 @@ Towards Healthy <fixed-case>AI</fixed-case>: Large Language Models Need Therapists Too BaihanLinColumbia University DjallelBouneffoufIBM - GuillermoCecchiIBM Research + GuillermoCecchiIBM Research KushVarshneyIBM Research 61-70 Recent advances in large language models (LLMs) have led to the development of powerful chatbots capable of engaging in fluent human-like conversations. However, these chatbots may be harmful, exhibiting manipulation, gaslighting, narcissism, and other toxicity. To work toward safer and more well-adjusted models, we propose a framework that uses psychotherapy to identify and mitigate harmful chatbot behaviors. The framework involves four different artificial intelligence (AI) agents: the Chatbot whose behavior is to be adjusted, a User, a Therapist, and a Critic that can be paired with reinforcement learning-based LLM tuning. We illustrate the framework with a working example of a social conversation involving four instances of ChatGPT, showing that the framework may mitigate the toxicity in conversations between LLM-driven chatbots and people. Although there are still several challenges and directions to be addressed in the future, the proposed framework is a promising approach to improving the alignment between LLMs and human values. @@ -110,7 +110,7 @@ AhmedZahranAgolo AbanoubAminAgolo AmrAbdelaalAgolo - MohamedAltantawyAgolo + MohamedAltantawyAgolo 79-84 This paper proposes a novel black-box approach for fact-level hallucination detection and classification by transforming the problem into a knowledge graph alignment task. This approach allows us to classify detected hallucinations as either intrinsic or extrinsic. The paper starts by discussing the field of hallucination detection and introducing several approaches to related work. Then, we introduce the proposed FactAlign approach for hallucination detection and discuss how we can use it to classify hallucinations as either intrinsic or extrinsic. Experiments are carried out to evaluate the proposed method against state-of-the-art methods on the hallucination detection task using the WikiBio GPT-3 hallucination dataset, and on the hallucination type classification task using the XSum hallucination annotations dataset. The experimental results show that our method achieves a 0.889 F1 score for the hallucination detection and 0.825 F1 for the hallucination type classification, without any further training, fine-tuning, or producing multiple samples of the LLM response. 2024.trustnlp-1.8 @@ -184,7 +184,7 @@ Tweak to Trust: Assessing the Reliability of Summarization Metrics in Contact Centers via Perturbed Summaries KevinPatelObserve.AI SurajAgrawalObserve.AI - AyushKumarObserve.AI + AyushKumarObserve.AI 172-186 In the dynamic realm of call center communications, the potential of abstractive summarization to transform information condensation is evident. However, evaluating the performance of abstractive summarization systems within contact center domain poses a significant challenge. Traditional evaluation metrics prove inadequate in capturing the multifaceted nature of call center conversations, characterized by diverse topics, emotional nuances, and dynamic contexts. This paper uses domain-specific perturbed summaries to scrutinize the robustness of summarization metrics in the call center domain. Through extensive experiments on call center data, we illustrate how perturbed summaries uncover limitations in existing metrics. We additionally utilize perturbation as data augmentation strategy to train domain-specific metrics. Our findings underscore the potential of perturbed summaries to complement current evaluation techniques, advancing reliable and adaptable summarization solutions in the call center domain. 2024.trustnlp-1.14 @@ -195,7 +195,7 @@ Flatness-Aware Gradient Descent for Safe Conversational <fixed-case>AI</fixed-case> LeilaKhalatbariSchool of Electrical and Computer Engineering, Hong Kong University of Science and Technology SaeidHosseiniSohar University - HosseinSametiSharif University of Technology + HosseinSametiSharif University of Technology PascaleFungHong Kong University of Science and Technology 187-195 As generative dialog models become ubiquitous in real-world applications, it is paramount to ensure a harmless generation. There are two major challenges when enforcing safety to open-domain chatbots. Firstly, it is impractical to provide training data reflecting the desired response to all emerging forms of toxicity (generalisation challenge). Secondly, implementing safety features may compromise the quality of the conversation (trade-off challenge). To tackle the challenges, this paper introduces a regularized fine-tuning approach called FlatGD. By employing a safety-tailored loss, we translate better optimization to more safety. To ensure better optimization, FlatGD penalizes sharp trajectories of loss curve, encouraging flatness of the converged local minima. Experimental results on datasets of “BAD” and “prosocial dialog” demonstrate that our model outperforms the current baselines in reducing toxicity while preserving the conversation quality. Moreover, compared to other baselines, FlatGD can better generalize to unseen toxic data. diff --git a/data/xml/2024.tsar.xml b/data/xml/2024.tsar.xml index 2baee68258..f94fd91541 100644 --- a/data/xml/2024.tsar.xml +++ b/data/xml/2024.tsar.xml @@ -8,7 +8,7 @@ FernandoAlva-ManchegoCardiff University, UK MarcosZampieriGeorge Mason University, USA KaiNorthCambium Assessment, USA - SanjaŠtajnerKarlsruhe, Germany + SanjaŠtajnerKarlsruhe, Germany ReginaStoddenHeinrich Heine University Dusseldorf, Germany Association for Computational Linguistics
Miami, Florida, USA
@@ -52,7 +52,7 @@ ShanyueGuoThe Hong Kong Polytechnic University Tak-SumWongDepartment of Chinese and Bilingual Studies EmmanueleChersoniHong Kong Polytechnic University - JohnLeeCity University of Hong Kong + JohnLeeCity University of Hong Kong Chu-RenHuangThe Hong Kong Polytechnic Universiy 20-26 The prediction of lexical complexity in context is assuming an increasing relevance in Natural Language Processing research, since identifying complex words is often the first step of text simplification pipelines. To the best of our knowledge, though, datasets annotated with complex words are available only for English and for a limited number of Western languages.In our paper, we introduce CompLex-ZH, a dataset including words annotated with complexity scores in sentential contexts for Chinese. Our data include sentences in Mandarin and Cantonese, which were selected from a variety of sources and textual genres. We provide a first evaluation with baselines combining hand-crafted and language models-based features. diff --git a/data/xml/2024.umrpw.xml b/data/xml/2024.umrpw.xml index 5410be67fa..7ae673b9c9 100644 --- a/data/xml/2024.umrpw.xml +++ b/data/xml/2024.umrpw.xml @@ -25,7 +25,7 @@ RezaTakhshid TaraAzin RaziehShojaei - MohammadBahrani + MohammadBahrani 8–15 This paper introduces the Persian Abstract Meaning Representation (AMR) guidelines, a detailed guide for annotating Persian sentences with AMR, focusing on the necessary adaptations to fit Persian’s unique syntactic structures. We discuss the development process of a Persian AMR gold standard dataset consisting of 1562 sentences created following the guidelines. By examining the language specifications and nuances that distinguish AMR annotations of a low-resource language like Persian, we shed light on the challenges and limitations of developing a universal meaning representation framework. The guidelines and the dataset introduced in this study highlight such challenges, aiming to advance the field. 2024.umrpw-1.2 diff --git a/data/xml/2024.uncertainlp.xml b/data/xml/2024.uncertainlp.xml index 1af5a58461..f1bd1c39f2 100644 --- a/data/xml/2024.uncertainlp.xml +++ b/data/xml/2024.uncertainlp.xml @@ -6,12 +6,12 @@ RaúlVázquez HandeCelikkanat DennisUlmer - JörgTiedemann + JörgTiedemann SwabhaSwayamdipta WilkerAziz - BarbaraPlank + BarbaraPlank JorisBaan - Marie-Catherinede Marneffe + Marie-Catherinede Marneffe Association for Computational Linguistics
St Julians, Malta
March @@ -32,7 +32,7 @@ ArkaPal SamuelDooleyDepartment of Computer Science, University of Maryland, College Park and Abacus.AI MicahGoldblumNew York University - AndrewWilsonCornell University and New York University + AndrewWilsonCornell University and New York University 1-14 Large language models are increasingly deployed for high-stakes decision making, for example in financial and medical applications. In such applications, it is imperative that we be able to estimate our confidence in the answers output by a language model in order to assess risks. Although we can easily compute the probability assigned by a language model to the sequence of tokens that make up an answer, we cannot easily compute the probability of the answer itself, which could be phrased in numerous ways.While other works have engineered ways of assigning such probabilities to LLM outputs, a key problem remains: existing language models are poorly calibrated, often confident when they are wrong or unsure when they are correct. In this work, we devise a protocol called *calibration tuning* for finetuning LLMs to output calibrated probabilities. Calibration-tuned models demonstrate superior calibration performance compared to existing language models on a variety of question-answering tasks, including open-ended generation, without affecting accuracy. We further show that this ability transfers to new domains outside of the calibration-tuning train set. 2024.uncertainlp-1.1 @@ -62,7 +62,7 @@ Linguistic Obfuscation Attacks and Large Language Model Uncertainty SebastianSteindlOstbayerische Technische Hochschule Amberg-Weiden - UlrichSchäferOstbayerische Technische Hochschule Amberg-Weiden + UlrichSchäferOstbayerische Technische Hochschule Amberg-Weiden BerndLudwigUniversität Regensburg PatrickLeviOstbayerische Technische Hochschule Amberg-Weiden 35-40 diff --git a/data/xml/2024.unimplicit.xml b/data/xml/2024.unimplicit.xml index 9a4c60aa83..c15bc49fb3 100644 --- a/data/xml/2024.unimplicit.xml +++ b/data/xml/2024.unimplicit.xml @@ -35,7 +35,7 @@ KatharinaHechinger MatthiasAssenmacher GöranKauermann - BarbaraPlank + BarbaraPlank 22-32 In this work, we analyze the uncertainty that is inherently present in the labels used for supervised machine learning in natural language inference (NLI). In cases where multiple annotations per instance are available, neither the majority vote nor the frequency of individual class votes is a trustworthy representation of the labeling uncertainty. We propose modeling the votes via a Bayesian mixture model to recover the data-generating process, i.e., the “true” latent classes, and thus gain insight into the class variations. This will enable a better understanding of the confusion happening during the annotation process. We also assess the stability of the proposed estimation procedure by systematically varying the numbers of i) instances and ii) labels. Thereby, we observe that few instances with many labels can predict the latent class borders reasonably well, while the estimation fails for many instances with only a few labels. This leads us to conclude that multiple labels are a crucial building block for properly analyzing label uncertainty. 2024.unimplicit-1.2 @@ -64,7 +64,7 @@ Below the Sea (with the Sharks): Probing Textual Features of Implicit Sentiment in a Literary Case-study YuriBizzoni - PascaleFeldkamp + PascaleFeldkamp 54-61 Literary language presents an ongoing challenge for Sentiment Analysis due to its complex, nuanced, and layered form of expression. It is often suggested that effective literary writing is evocative, operating beneath the surface and understating emotional expression. To explore features of implicitness in literary expression, this study takes Ernest Hemingway’s The Old Man and the Sea as a case for examining implicit sentiment expression. We examine sentences where automatic sentiment annotations show substantial divergences from human sentiment annotations, and probe these sentences for distinctive traits. We find that sentences where humans perceived a strong sentiment while models did not are significantly lower in arousal and higher in concreteness than sentences where humans and models were more aligned, suggesting the importance of simplicity and concreteness for implicit sentiment expression in literary prose. 2024.unimplicit-1.5 @@ -79,7 +79,7 @@ FrançoisMaine FrançoisBancilhon GuillaumeGadek - GuillaumeGravier + GuillaumeGravier PaulÉgré 62-72 This paper investigates the language of propaganda and its stylistic features. It presents the PPN dataset, standing for Propagandist Pseudo-News, a multisource, multilingual, multimodal dataset composed of news articles extracted from websites identified as propaganda sources by expert agencies. A limited sample from this set was randomly mixed with papers from the regular French press, and their URL masked, to conduct an annotation-experiment by humans, using 11 distinct labels. The results show that human annotators were able to reliably discriminate between the two types of press across each of the labels. We use different NLP techniques to identify the cues used by annotators, and to compare them with machine classification: first the analyzer VAGO to detect discourse vagueness and subjectivity, and then four different classifiers, two based on RoBERTa, one CATS using syntax, and one XGBoost combining syntactic and semantic features. @@ -91,7 +91,7 @@ SiyaoPeng ZihangSun SebastianLoftus - BarbaraPlank + BarbaraPlank 73-81 Named Entity Recognition (NER) is a key information extraction task with a long-standing tradition. While recent studies address and aim to correct annotation errors via re-labeling efforts, little is known about the sources of label variation, such as text ambiguity, annotation error, or guideline divergence. This is especially the case for high-quality datasets and beyond English CoNLL03. This paper studies disagreements in expert-annotated named entity datasets for three varieties: English, Danish, and Bavarian. We show that text ambiguity and artificial guideline changes are dominant factors for diverse annotations among high-quality revisions. We survey student annotations on a subset of difficult entities and substantiate the feasibility and necessity of manifold annotations for understanding named entity ambiguities from a distributional perspective. 2024.unimplicit-1.7 diff --git a/data/xml/2024.unlp.xml b/data/xml/2024.unlp.xml index a3612b371c..574a5ac788 100644 --- a/data/xml/2024.unlp.xml +++ b/data/xml/2024.unlp.xml @@ -114,7 +114,7 @@ Fine-Tuning and Retrieval Augmented Generation for Question Answering Using Affordable Large Language Models - TiberiuBoros + TiberiuBoros RaduChivereanu StefanDumitrescu OctavianPurcaru @@ -152,7 +152,7 @@ Eval-<fixed-case>UA</fixed-case>-tion 1.0: Benchmark for Evaluating <fixed-case>U</fixed-case>krainian (Large) Language Models SerhiiHamotskyi Anna-IzabellaLevbarg - ChristianHänig + ChristianHänig 109–119 In this paper, we introduce Eval-UA-tion, a set of novel Ukrainian-language datasets aimed at evaluating the performance of language models on the Ukrainian language. The tasks include UA-CBT (inspired by the Children’s Book Test, a fill-in-the-gaps type task aimed at gauging the extent to which a story narrative is understood), UP-Titles (where the online newspaper Ukrainska Pravda‘s articles have to be matched to the correct title among 10 similar ones), and LMentry-static-UA/LMES (inspired by the LMentry benchmark, a set of tasks simple to solve for humans but hard for LMs, such as ‘which of these words is longer’ and ‘what is the fifth word of this sentence’). With the exception of UP-Titles, the tasks are built in a way to minimize contamination and use material unlikely to be present in the training sets of language models, and include a split for few-shot model prompting use that minimizes contamination. For each task human and random baselines are provided. 2024.unlp-1.13 diff --git a/data/xml/2024.vardial.xml b/data/xml/2024.vardial.xml index 9ec1c07010..4928236839 100644 --- a/data/xml/2024.vardial.xml +++ b/data/xml/2024.vardial.xml @@ -7,8 +7,8 @@ TommiJauhiainen NikolaLjubešić MarcosZampieri - PreslavNakov - JörgTiedemann + PreslavNakov + JörgTiedemann Association for Computational Linguistics
Mexico City, Mexico
June @@ -77,7 +77,7 @@ When Elote, Choclo and Mazorca are not the Same. Isomorphism-Based Perspective to the <fixed-case>S</fixed-case>panish Varieties Divergences CristinaEspaña-BonetDFKI GmbH AnkurBhattDFKI GmbH - KoelDutta ChowdhurySaarland Informatics Campus,Saarland University + KoelDutta ChowdhurySaarland Informatics Campus,Saarland University AlbertoBarrón-CedeñoUniversità di Bologna 56-77 Spanish is an official language in 20 countries; in 19 of them, it arrived by means of overseas colonisation. Its close contact with several coexistent languages and the rich regional and cultural diversity has produced varieties which divert from each other. We study these divergences in a data-based approach and according to their qualitative and quantitative effects in word embeddings. We generate embeddings for Spanish in 24 countries and examine the topology of the spaces. Due to the similarities between varieties —in contrast to what happens to different languages in bilingual topological studies— we first scrutinise the behaviour of three isomorphism measures in (quasi-)isomorphic settings: relational similarity, Eigenvalue similarity and Gromov-Hausdorff distance. We then use the most trustworthy measure to quantify the divergences among varieties. Finally, we use the departures from isomorphism to build relational trees for the Spanish varieties by hierarchical clustering. @@ -117,7 +117,7 @@ The Role of Adverbs in Language Variety Identification: The Case of <fixed-case>P</fixed-case>ortuguese Multi-Word Adverbs IzabelaMüllerUniversidade do Algarve - NunoMamedeUniversidade de Lisboa - Instituto Superior Técnico + NunoMamedeUniversidade de Lisboa - Instituto Superior Técnico JorgeBaptistaUniversity of Algarve 99-106 This paper aims to assess the role of multiword compound adverbs in distinguishing Brazilian Portuguese (PT-BR) from European Portuguese (PT-PT). Two key factors underpin this focus: Firstly, multiword expressions often provide less ambiguity compared to single words, even when their meaning is idiomatic (non-compositional). Secondly, despite constituting a significant portion of lexicons in many languages, they are frequently overlooked in Natural Language Processing, possibly due to their heterogeneous nature and lexical range.For this study, a large lexicon of Portuguese multiword adverbs (3,665) annotated with diatopic information regarding language variety was utilized. The paper investigates the distribution of this category in a corpus consisting in excerpts from journalistic texts sourced from the DSL (Dialect and Similar Language) corpus, representing Brazilian (PT-BR) and European Portuguese (PT-PT), respectively, each partition containing 18,000 sentences.Results indicate a substantial similarity between the two varieties, with a considerable overlap in the lexicon of multiword adverbs. Additionally, specific adverbs unique to each language variety were identified. Lexical entries recognized in the corpus represent 18.2% (PT-BR) to 19.5% (PT-PT) of the lexicon, and approximately 5,700 matches in each partition. While many of the matches are spurious due to ambiguity with otherwise non-idiomatic, free strings, occurrences of adverbs marked as exclusive to one variety in texts from the other variety are rare. @@ -162,7 +162,7 @@ Experiments in Multi-Variant Natural Language Processing for <fixed-case>N</fixed-case>ahuatl RobertPughIndiana University - FrancisTyersIndiana University + FrancisTyersIndiana University 140-151 Linguistic variation is a complicating factor for digital language technologies. This is particularly true for languages that lack an official “standard” variety, including many regional and minoritized languages. In this paper, we describe a set of experiments focused on multivariant natural language processing for the Nahuatl, an indigenous Mexican language with a high level of linguistic variation and no single recognized standard variant. Using small (10k tokens), recently-published annotated datasets for two Nahuatl variants, we compare the performance of single-variant, cross-variant, and joint training, and explore how different models perform on a third Nahuatl variant, unseen in training. These results and the subsequent discussion contribute to efforts of developing low-resource NLP that is robust to diatopic variation. We share all code used to process the data and run the experiments. 2024.vardial-1.12 @@ -272,7 +272,7 @@ Thi AnhNguyenVietnam National University, Hanoi MyHaVietnam National University, Hanoi Thi MinhNguyenVietnam National University, Hanoi - PhuongLe-HongVietnam National University, Hanoi + PhuongLe-HongVietnam National University, Hanoi 235-240 The VLP team participated in the DSL-ML shared task of the VarDial 2024 workshop which aims to distinguish texts in similar languages. This paper presents our approach to solving the problem and discusses our experimental and official results. We propose to integrate semantics-aware word embeddings which are learned from ConceptNet into a bidirectional long short-term memory network. This approach achieves good performance – our sys- tem is ranked in the top two or three of the best performing teams for the task. 2024.vardial-1.21 diff --git a/data/xml/2024.wassa.xml b/data/xml/2024.wassa.xml index cb7c31920a..3f06d090a5 100644 --- a/data/xml/2024.wassa.xml +++ b/data/xml/2024.wassa.xml @@ -3,7 +3,7 @@ Proceedings of the 14th Workshop on Computational Approaches to Subjectivity, Sentiment, & Social Media Analysis - OrphéeDe Clercq + OrphéeDe Clercq ValentinBarriere JeremyBarnes RomanKlinger @@ -67,7 +67,7 @@ A Systematic Analysis on the Temporal Generalization of Language Models in Social Media AsahiUshio - JoseCamacho-ColladosCardiff University + JoseCamacho-ColladosCardiff University 52-62 In machine learning, temporal shifts occur when there are differences between training and test splits in terms of time. For streaming data such as news or social media, models are commonly trained on a fixed corpus from a certain period of time, and they can become obsolete due to the dynamism and evolving nature of online content. This paper focuses on temporal shifts in social media and, in particular, Twitter. We propose a unified evaluation scheme to assess the performance of language models (LMs) under temporal shift on standard social media tasks. LMs are tested on five diverse social media NLP tasks under different temporal settings, which revealed two important findings: (i) the decrease in performance under temporal shift is consistent across different models for entity-focused tasks such as named entity recognition or disambiguation, and hate speech detection, but not significant in the other tasks analysed (i.e., topic and sentiment classification); and (ii) continuous pre-training on the test period does not improve the temporal adaptability of LMs. 2024.wassa-1.5 @@ -89,7 +89,7 @@ A Multi-Faceted <fixed-case>NLP</fixed-case> Analysis of Misinformation Spreaders in <fixed-case>T</fixed-case>witter DimosthenisAntypas AlunPreeceCardiff University - JoseCamacho-ColladosCardiff University + JoseCamacho-ColladosCardiff University 71-83 Social media is an integral part of the daily life of an increasingly large number of people worldwide. Used for entertainment, communication and news updates, it constitutes a source of information that has been extensively used to study human behaviour. Unfortunately, the open nature of social media platforms along with the difficult task of supervising their content has led to a proliferation of misinformation posts. In this paper, we aim to identify the textual differences between the profiles of user that share misinformation from questionable sources and those that do not. Our goal is to better understand user behaviour in order to be better equipped to combat this issue. To this end, we identify Twitter (X) accounts of potential misinformation spreaders and apply transformer models specialised in social media to extract characteristics such as sentiment, emotion, topic and presence of hate speech. Our results indicate that, while there may be some differences between the behaviour of users that share misinformation and those that do not, there are no large differences when it comes to the type of content shared. 2024.wassa-1.7 @@ -100,7 +100,7 @@ Entity-Level Sentiment: More than the Sum of Its Parts EgilRønningstad RomanKlingerOtto-Friedrich Universität Bamberg - LiljaØvrelidDept. of Informatics, University of Oslo + LiljaØvrelidDept. of Informatics, University of Oslo ErikVelldalUniversity of Oslo 84-96 In sentiment analysis of longer texts, there may be a variety of topics discussed, of entities mentioned, and of sentiments expressed regarding each entity. We find a lack of studies exploring how such texts express their sentiment towards each entity of interest, and how these sentiments can be modelled. In order to better understand how sentiment regarding persons and organizations (each entity in our scope) is expressed in longer texts, we have collected a dataset of expert annotations where the overall sentiment regarding each entity is identified, together with the sentence-level sentiment for these entities separately. We show that the reader’s perceived sentiment regarding an entity often differs from an arithmetic aggregation of sentiments at the sentence level. Only 70% of the positive and 55% of the negative entities receive a correct overall sentiment label when we aggregate the (human-annotated) sentiment labels for the sentences where the entity is mentioned. Our dataset reveals the complexity of entity-specific sentiment in longer texts, and allows for more precise modelling and evaluation of such sentiment expressions. @@ -178,7 +178,7 @@ Comparing Tools for Sentiment Analysis of <fixed-case>D</fixed-case>anish Literature from Hymns to Fairy Tales: Low-Resource Language and Domain Challenges - PascaleFeldkamp + PascaleFeldkamp JanKostkanAarhus University EaOvergaard MiaJacobsen @@ -237,7 +237,7 @@ <fixed-case>POL</fixed-case>ygraph: <fixed-case>P</fixed-case>olish Fake News Dataset DanielDzienisiewiczAdam Mickiewicz University of Poznan - FilipGralińskiAdam Mickiewicz University, Adam Mickiewicz University, Applica.ai and Applica.ai + FilipGralińskiAdam Mickiewicz University, Adam Mickiewicz University, Applica.ai and Applica.ai PiotrJabłoński MarekKubisAdam Mickiewicz University of Poznan PawełSkórzewskiAdam Mickiewicz University of Poznan @@ -312,7 +312,7 @@ Comparing Pre-trained Human Language Models: Is it Better with Human Context as Groups, Individual Traits, or Both? NikitaSoni NiranjanBalasubramanianState University of New York, Stony Brook - H. AndrewSchwartzStony Brook University (SUNY) + H. AndrewSchwartzStony Brook University (SUNY) DirkHovyBocconi University 316-328 Pre-trained language models consider the context of neighboring words and documents but lack any author context of the human generating the text. However, language depends on the author’s states, traits, social, situational, and environmental attributes, collectively referred to as human context (Soni et al., 2024). Human-centered natural language processing requires incorporating human context into language models. Currently, two methods exist: pre-training with 1) group-wise attributes (e.g., over-45-year-olds) or 2) individual traits. Group attributes are simple but coarse — not all 45-year-olds write the same way — while individual traits allow for more personalized representations, but require more complex modeling and data. It is unclear which approach benefits what tasks. We compare pre-training models with human context via 1) group attributes, 2) individual users, and 3) a combined approach on five user- and document-level tasks. Our results show that there is no best approach, but that human-centered language modeling holds avenues for different methods. @@ -345,7 +345,7 @@ To Aggregate or Not to Aggregate. That is the Question: A Case Study on Annotation Subjectivity in Span Prediction KemalKurniawanUniversity of Melbourne MeladelMisticaThe University of Melbourne - TimothyBaldwinMohamed bin Zayed University of Artificial Intelligence and The University of Melbourne + TimothyBaldwinMohamed bin Zayed University of Artificial Intelligence and The University of Melbourne Jey HanLauThe University of Melbourne 362-368 This paper explores the task of automatic prediction of text spans in a legal problem description that support a legal area label. We use a corpus of problem descriptions written by laypeople in English that is annotated by practising lawyers. Inherent subjectivity exists in our task because legal area categorisation is a complex task, and lawyers often have different views on a problem. Experiments show that training on majority-voted spans outperforms training on disaggregated ones. @@ -379,7 +379,7 @@ Chinchunmei at <fixed-case>WASSA</fixed-case> 2024 Empathy and Personality Shared Task: Boosting <fixed-case>LLM</fixed-case>’s Prediction with Role-play Augmentation and Contrastive Reasoning Calibration TianLi NicolayRusnachenko - HuizhiLiangNewcastle University, UK + HuizhiLiangNewcastle University, UK 385-392 This paper presents the Chinchunmei team’s contributions to the WASSA2024 Shared-Task 1: Empathy Detection and Emotion Classification. We participated in Tracks 1, 2, and 3 to predict empathetic scores based on dialogue, article, and essay content. We choose Llama3-8b-instruct as our base model. We developed three supervised fine-tuning schemes: standard prediction, role-play, and contrastive prediction, along with an innovative scoring calibration method called Contrastive Reasoning Calibration during inference. Pearson Correlation was used as the evaluation metric across all tracks. For Track 1, we achieved 0.43 on the devset and 0.17 on the testset. For Track 2 emotion, empathy, and polarity labels, we obtained 0.64, 0.66, and 0.79 on the devset and 0.61, 0.68, and 0.58 on the testset. For Track 3 empathy and distress labels, we got 0.64 and 0.56 on the devset and 0.33 and 0.35 on the testset. 2024.wassa-1.32 @@ -401,7 +401,7 @@ Zhenmei at <fixed-case>WASSA</fixed-case>-2024 Empathy and Personality Shared Track 2 Incorporating <fixed-case>P</fixed-case>earson Correlation Coefficient as a Regularization Term for Enhanced Empathy and Emotion Prediction in Conversational Turns LitingHuang - HuizhiLiangNewcastle University, UK + HuizhiLiangNewcastle University, UK 399-403 In the realm of conversational empathy and emotion prediction, emotions are frequently categorized into multiple levels. This study seeks to enhance the performance of emotion prediction models by incorporating the Pearson correlation coefficient as a regularization term within the loss function. This regularization approach ensures closer alignment between predicted and actual emotion levels, mitigating extreme predictions and resulting in smoother and more consistent outputs. Such outputs are essential for capturing the subtle transitions between continuous emotion levels. Through experimental comparisons between models with and without Pearson regularization, our findings demonstrate that integrating the Pearson correlation coefficient significantly boosts model performance, yielding higher correlation scores and more accurate predictions. Our system officially ranked 9th at the Track 2: CONV-turn. The code for our model can be found at Link . 2024.wassa-1.34 @@ -421,7 +421,7 @@ <fixed-case>NU</fixed-case> at <fixed-case>WASSA</fixed-case> 2024 Empathy and Personality Shared Task: Enhancing Personality Predictions with Knowledge Graphs; A Graphical Neural Network and <fixed-case>L</fixed-case>ight<fixed-case>GBM</fixed-case> Ensemble Approach EmmanuelOsei-BrefoNewcastle University, UK - HuizhiLiangNewcastle University, UK + HuizhiLiangNewcastle University, UK 412-419 This paper proposes a novel ensemble approach that combines Graph Neural Networks (GNNs) and LightGBM to enhance personality prediction based on the personality Big 5 model. By integrating BERT embeddings from user essays with knowledge graph-derived embeddings, our method accurately captures rich semantic and relational information. Additionally, a special loss function that combines Mean Squared Error (MSE), Pearson correlation loss, and contrastive loss to improve model performance is introduced. The proposed ensemble model, made of Graph Convolutional Networks (GCNs), Graph Attention Networks (GATs), and LightGBM, demonstrates superior performance over other models, with significant improvements in prediction accuracy for the Big Five personality traits achieved. Our system officially ranked 2^{nd} at the Track 4: PER track. 2024.wassa-1.36 @@ -455,7 +455,7 @@ LitingHuang TianLi NicolayRusnachenko - HuizhiLiangNewcastle University, UK + HuizhiLiangNewcastle University, UK 430-434 This paper presents our participation to the WASSA 2024 Shared Task on Empathy Detection and Emotion Classification and Personality Detection in Interactions. We focus on Track 2: Empathy and Emotion Prediction in Conversations Turns (CONV-turn), which consists of predicting the perceived empathy, emotion polarity and emotion intensity at turn level in a conversation. In the method, we conduct BERT and DeBERTa based finetuning, implement the CombinedLoss which consists of a structured contrastive loss and Pearson loss, adopt adversarial training using Fast Gradient Method (FGM). This method achieved Pearson correlation of 0.581 for Emotion,0.644 for Emotional Polarity and 0.544 for Empathy on the test set, with the average value of 0.590 which ranked 4th among all teams. After submission to WASSA 2024 competition, we further introduced the segmented mix-up for data augmentation, boosting for ensemble and regression experiments, which yield even better results: 0.6521 for Emotion, 0.7376 for EmotionalPolarity, 0.6326 for Empathy in Pearson correlation on the development set. The implementation and fine-tuned models are publicly-available at https://github.com/hyy-33/hyy33-WASSA-2024-Track-2. 2024.wassa-1.39 @@ -557,9 +557,9 @@ <fixed-case>PCICUNAM</fixed-case> at <fixed-case>WASSA</fixed-case> 2024: Cross-lingual Emotion Detection Task with Hierarchical Classification and Weighted Loss Functions JesúsVázquez-OsorioUniversidad Nacional Autónoma de México, Posgrado en Ciencia e Ingeniería de la Computación - GerardoSierraUniversidad Nacional Autónoma de México, Instituto de Ingeniería + GerardoSierraUniversidad Nacional Autónoma de México, Instituto de Ingeniería HelenaGómez-AdornoUniversidad Nacional Autónoma de México, Instituto de Investigaciones en Matemáticas Aplicadas y en Sistemas - GemmaBel-EnguixUniversidad Nacional Autónoma de México, Instituto de Ingeniería + GemmaBel-EnguixUniversidad Nacional Autónoma de México, Instituto de Ingeniería 490-494 This paper addresses the shared task of multi-lingual emotion detection in tweets, presented at the Workshop on Computational Approaches to Subjectivity, Sentiment, and Social Media Analysis (WASSA) co-located with the ACL 2024 conference. The task involves predicting emotions from six classes in tweets from five different languages using only English for model training. Our approach focuses on addressing class imbalance through data augmentation, hierarchical classification, and the application of focal loss and weighted cross-entropy loss functions. These methods enhance our transformer-based model’s ability to transfer emotion detection capabilities across languages, resulting in improved performance despite the constraints of limited computational resources. 2024.wassa-1.48 @@ -572,7 +572,7 @@ QihaoShaoUniversity of Washington ChristineZhaoUniversity of Washington ShengBiUniversity of Washington - Gina-AnneLevowUniversity of Washington + Gina-AnneLevowUniversity of Washington 495-504 Cross-lingual emotion detection allows us to analyze global trends, public opinion, and social phenomena at scale. We participated in the Explainability of Cross-lingual Emotion Detection (EXALT) shared task, achieving an F1-score of 0.6046 on the evaluation set for the emotion detection sub-task. Our system outperformed the baseline by more than 0.16 F1-score absolute, and ranked second amongst competing systems. We conducted experiments using fine-tuning, zero-shot learning, and few-shot learning for Large Language Model (LLM)-based models as well as embedding-based BiLSTM and KNN for non-LLM-based techniques. Additionally, we introduced two novel methods: the Multi-Iteration Agentic Workflow and the Multi-Binary-Classifier Agentic Workflow. We found that LLM-based approaches provided good performance on multilingual emotion detection. Furthermore, ensembles combining all our experimented models yielded higher F1-scores than any single approach alone. 2024.wassa-1.49 @@ -597,7 +597,7 @@ JeongyeobHongUniversity of Washington AndrewWangUniversity of Washington AnitaSilvaUniversity of Washington - Gina-AnneLevowUniversity of Washington + Gina-AnneLevowUniversity of Washington 511-522 This paper introduces our submitted systems for WASSA 2024 Shared Task 2: Cross-Lingual Emotion Detection. We implemented a BERT-based classifier and an in-context learning-based system. Our best-performing model, using English Chain of Thought prompts with trigger words, reached 3rd overall with an F1 score of 0.6015. Following the motivation of the shared task, we further analyzed the scalability and transferability of the monolingual English dataset on cross-lingual tasks. Our analysis demonstrates the importance of data quality over quantity. We also found that augmented multilingual data does not necessarily perform better than English monolingual data in cross-lingual tasks. We open-sourced the augmented data and source code of our system for future research. 2024.wassa-1.51 @@ -610,7 +610,7 @@ KerenRuditskyUniversity of Washington AnnaBatraUniversity of Washington YulhaLhawaUniversity of Washington - Gina-AnneLevowUniversity of Washington + Gina-AnneLevowUniversity of Washington 523-527 This paper describes our task 1 submission for the WASSA 2024 shared task on Explainability for Cross-lingual Emotion in Tweets. Our task is to predict the correct emotion label (Anger, Sadness, Fear, Joy, Love, and Neutral) for a dataset of English, Dutch, French, Spanish, and Russian tweets, while training exclusively on English emotion labeled data, to reveal what kind of emotion detection information is transferable cross-language (Maladry et al., 2024). To that end, we used an ensemble of models with a GPT-4 decider. Our ensemble consisted of a few-shot GPT-4 prompt system and a TwHIN-BERT system fine-tuned on the EXALT and additional English data. We ranked 8th place under the name WU_TLAXE with an F1 Macro score of 0.573 on the test set. We also experimented with an English-only TwHIN-BERT model by translating the other languages into English for inference, which proved to be worse than the other models. 2024.wassa-1.52 diff --git a/data/xml/2024.wat.xml b/data/xml/2024.wat.xml index 24cbe7a1be..a68189571b 100644 --- a/data/xml/2024.wat.xml +++ b/data/xml/2024.wat.xml @@ -60,7 +60,7 @@ Are Large Language Models State-of-the-art Quality Estimators for Machine Translation of User-generated Content? ShenbinQianUniversity of Surrey - ConstantinOrasanUniversity of Surrey + ConstantinOrasanUniversity of Surrey DipteshKanojiaUniversity of Surrey FélixDo CarmoUniversity of Surrey 45-55 diff --git a/data/xml/2024.wikinlp.xml b/data/xml/2024.wikinlp.xml index 965f9f1c3f..a47ada4885 100644 --- a/data/xml/2024.wikinlp.xml +++ b/data/xml/2024.wikinlp.xml @@ -76,7 +76,7 @@ K.Salas-JimenezUniversidad Nacional Autónoma de México Francisco FernandoLopez-Ponce Sergio-LuisOjeda-Trueba - GemmaBel-EnguixUniversidad Nacional Autónoma de México + GemmaBel-EnguixUniversidad Nacional Autónoma de México 46-52 This paper explores whether it is possible to train a machine learning model using Wikipedia data to detect subjectivity in sentences and generalize effectively to other domains. To achieve this, we performed experiments with the WikiBias corpus, the BABE corpus, and the CheckThat! Dataset. Various classical models for ML were tested, including Logistic Regression, SVC, and SVR, including characteristics such as Sentence Transformers similarity, probabilistic sentiment measures, and biased lexicons. Pre-trained models like DistilRoBERTa, as well as large language models like Gemma and GPT-4, were also tested for the same classification task. 2024.wikinlp-1.10 @@ -86,7 +86,7 @@ <fixed-case>HOAXPEDIA</fixed-case>: A Unified <fixed-case>W</fixed-case>ikipedia Hoax Articles Dataset HsuvasBorkakotyCardiff University - LuisEspinosa-AnkeCardiff University and AMPLYFI + LuisEspinosa-AnkeCardiff University and AMPLYFI 53-66 Hoaxes are a recognised form of disinformation created deliberately, with potential serious implications in the credibility of reference knowledge resources such as Wikipedia. What makes detecting Wikipedia hoaxes hard is that they often are written according to the official style guidelines. In this work, we first provide a systematic analysis of similarities and discrepancies between legitimate and hoax Wikipedia articles, and introduce HOAXPEDIA, a collection of 311 hoax articles (from existing literature and official Wikipedia lists), together with semantically similar legitimate articles, which together form a binary text classification dataset aimed at fostering research in automated hoax detection. In this paper, We report results after analyzing several language models, hoax-to-legit ratios, and the amount of text classifiers are exposed to (full article vs the article’s definition alone). Our results suggest that detecting deceitful content in Wikipedia based on content alone is hard but feasible, and complement our analysis with a study on the differences in distributions in edit histories, and find that looking at this feature yields better classification results than context. 2024.wikinlp-1.11 @@ -158,7 +158,7 @@ DanielCheng PhillipKeungUniversity of Washington JungoKasaiToyota Technological Institute at Chicago - Noah A.SmithUniversity of Washington and Allen Institute for Artificial Intelligence + Noah A.SmithUniversity of Washington and Allen Institute for Artificial Intelligence 126-135 Generative retrieval (Wang et al., 2022; Tay et al., 2022) is a popular approach for end-to-end document retrieval that directly generates document identifiers given an input query. We introduce summarization-based document IDs, in which each document’s ID is composed of an extractive summary or abstractive keyphrases generated by a language model, rather than an integer ID sequence or bags of n-grams as proposed in past work. We find that abstractive, content-based IDs (ACID) and an ID based on the first 30 tokens are very effective in direct comparisons with previous approaches to ID creation. We show that using ACID improves top-10 and top-20 recall by 15.6% and 14.4% (relative) respectively versus the cluster-based integer ID baseline on the MSMARCO 100k retrieval task, and 9.8% and 9.9% respectively on the Wikipedia-based NQ 100k retrieval task. Our results demonstrate the effectiveness of human-readable, natural-language IDs created through summarization for generative retrieval. We also observed that extractive IDs outperformed abstractive IDs on Wikipedia articles in NQ but not the snippets in MSMARCO, which suggests that document characteristics affect generative retrieval performance. 2024.wikinlp-1.18 diff --git a/data/xml/2024.wildre.xml b/data/xml/2024.wildre.xml index 953ea03616..069057b669 100644 --- a/data/xml/2024.wildre.xml +++ b/data/xml/2024.wildre.xml @@ -3,10 +3,10 @@ Proceedings of the 7th Workshop on Indian Language Data: Resources and Evaluation - Girish NathJha + Girish NathJha SobhaL. KalikaBali - Atul Kr.Ojha + Atul Kr.Ojha ELRA and ICCL
Torino, Italia
May @@ -24,7 +24,7 @@ ChayanKochar Vandan VasantlalMujadia PruthwikMishra - Dipti MisraSharma + Dipti MisraSharma 1–10 In the natural course of spoken language, individuals often engage in thinking and self-correction during speech production. These instances of interruption or correction are commonly referred to as disfluencies. When preparing data for subsequent downstream NLP tasks, these linguistic elements can be systematically removed, or handled as required, to enhance data quality. In this study, we present a comprehensive research on disfluencies in Indian languages. Our approach involves not only annotating real-world conversation transcripts but also conducting a detailed analysis of linguistic nuances inherent to Indian languages that are necessary to consider during annotation. Additionally, we introduce a robust algorithm for the synthetic generation of disfluent data. This algorithm aims to facilitate more effective model training for the identification of disfluencies in real-world conversations, thereby contributing to the advancement of disfluency research in Indian languages. 2024.wildre-1.1 @@ -32,7 +32,7 @@
<fixed-case>E</fixed-case>mo<fixed-case>M</fixed-case>ix-3<fixed-case>L</fixed-case>: A Code-Mixed Dataset for <fixed-case>B</fixed-case>angla-<fixed-case>E</fixed-case>nglish-<fixed-case>H</fixed-case>indi for Emotion Detection - NishatRaihan + NishatRaihan DhimanGoswami AntaraMahmud AntoniosAnastasopoulos @@ -50,7 +50,7 @@ ShardulSuryawanshi Atul Kr.Ojha PaulBuitelaar - John P.McCrae + John P.McCrae 17–23 This paper describes the structure and findings of the WILDRE 2024 shared task on Code-mixed Less-resourced Sentiment Analysis for Indo-Aryan Languages. The participants were asked to submit the test data’s final prediction on CodaLab. A total of fourteen teams registered for the shared task. Only four participants submitted the system for evaluation on CodaLab, with only two teams submitting the system description paper. While all systems show a rather promising performance, they outperform the baseline scores. 2024.wildre-1.3 @@ -91,8 +91,8 @@ Finding the Causality of an Event in News Articles - SobhaLalitha Devi - PattabhiRK Rao + SobhaLalitha Devi + PattabhiRK Rao 47–53 This paper discusses about the finding of causality of an event in newspaper articles. The analysis of causality , otherwise known as cause and effect is crucial for building efficient Natural Language Understanding (NLU) supported AI systems such as Event tracking and it is considered as a complex semantic relation under discourse theory. A cause-effect relation consists of a linguistic marker and its two arguments. The arguments are semantic arguments where the cause is the first argument (Arg1) and the effect is the second argument(Arg2). In this work we have considered the causal relations in Tamil Newspaper articles. The analysis of causal constructions, the causal markers and their syntactic relation lead to the identification of different features for developing the language model using RBMs (Restricted Boltzmann Machine). The experiments we performed have given encouraging results. The Cause-Effect system developed is used in a mobile App for Event profiling called “Nigalazhvi” where the cause and effect of an event is identified and given to the user. 2024.wildre-1.7 @@ -136,7 +136,7 @@ HimanshuSingh KengatharaiyerSarveswaran William ChandraTjhi - Rajiv RatnShah + Rajiv RatnShah 73–83 Tamil is a relatively low-resource language in the field of Natural Language Processing (NLP). Recent years have seen a growth in Tamil NLP datasets in Natural Language Understanding (NLU) or Natural Language Generation (NLG) tasks, but high-quality linguistic resources remain scarce. In order to alleviate this gap in resources, this paper introduces Aalamaram, a treebank with rich linguistic annotations for the Tamil language. It is hitherto the largest publicly available Tamil treebank with almost 10,000 sentences from diverse sources and is annotated for the tasks of Part-of-speech (POS) tagging, Named Entity Recognition (NER), Morphological Parsing and Dependency Parsing. Close attention has also been paid to multi-word segmentation, especially in the context of Tamil clitics. Although the treebank is based largely on the Universal Dependencies (UD) specifications, significant effort has been made to adjust the annotation rules according to the idiosyncrasies and complexities of the Tamil language, thereby providing a valuable resource for linguistic research and NLP developments. 2024.wildre-1.11 diff --git a/data/xml/2024.wmt.xml b/data/xml/2024.wmt.xml index 28ca2c34f4..dd51671bbe 100644 --- a/data/xml/2024.wmt.xml +++ b/data/xml/2024.wmt.xml @@ -26,7 +26,7 @@ TomKocmiCohere EleftheriosAvramidisGerman Research Center for Artificial Intelligence (DFKI) RachelBawdenInria - OndřejBojarCharles University, MFF UFAL + OndřejBojarCharles University, MFF UFAL AntonDvorkovichYandex ChristianFedermannMicrosoft MarkFishelUniversity of Tartu @@ -41,7 +41,7 @@ KentonMurrayJohns Hopkins University MasaakiNagataNTT Corporation MartinPopelCharles University, Faculty of Mathematics and Physics, UFAL - MajaPopovićADAPT, Dublin City University + MajaPopovićADAPT, Dublin City University MariyaShmatovaDubformer SteinthórSteingrímssonThe Árni Magnússon Institute for Icelandic Studies VilémZouharETH Zurich, Charles University @@ -60,13 +60,13 @@ EleftheriosAvramidisGerman Research Center for Artificial Intelligence (DFKI) RicardoReiUnbabel/INESC-ID BrianThompsonAmazon - FredericBlainTilburg University + FredericBlainTilburg University TomKocmiCohere JiayiWangUniversity College London - David IfeoluwaAdelaniMcGill University / MILA + David IfeoluwaAdelaniMcGill University / MILA MariannaBuchicchioUnbabel ChrysoulaZervaInstituto de Instituto de Telecomunicações, Instituto Superior Técnico, University of Lisbon - AlonLavieUnbabel/Carnegie Mellon University + AlonLavieUnbabel/Carnegie Mellon University 47-81 The WMT24 Metrics Shared Task evaluated the performance of automatic metrics for machine translation (MT), with a major focus on LLM-based translations that were generated as part of the WMT24 General MT Shared Task. As LLMs become increasingly popular in MT, it is crucial to determine whether existing evaluation metrics can accurately assess the output of these systems.To provide a robust benchmark for this evaluation, human assessments were collected using Multidimensional Quality Metrics (MQM), continuing the practice from recent years. Furthermore, building on the success of the previous year, a challenge set subtask was included, requiring participants to design contrastive test suites that specifically target a metric’s ability to identify and penalize different types of translation errors.Finally, the meta-evaluation procedure was refined to better reflect real-world usage of MT metrics, focusing on pairwise accuracy at both the system- and segment-levels.We present an extensive analysis on how well metrics perform on three language pairs: English to Spanish (Latin America), Japanese to Chinese, and English to German. The results strongly confirm the results reported last year, that fine-tuned neural metrics continue to perform well, even when used to evaluate LLM-based translation systems. 2024.wmt-1.2 @@ -76,20 +76,20 @@ Findings of the Quality Estimation Shared Task at <fixed-case>WMT</fixed-case> 2024: Are <fixed-case>LLM</fixed-case>s Closing the Gap in <fixed-case>QE</fixed-case>? ChrysoulaZervaInstituto de Instituto de Telecomunicações, Instituto Superior Técnico, University of Lisbon - FredericBlainTilburg University + FredericBlainTilburg University José G.C. De SouzaUnbabel DipteshKanojiaUniversity of Surrey SourabhDeoghareIIT Bombay Nuno M.GuerreiroInstituto de Telecomunicacoes, University of Lisbon GiuseppeAttanasioInstituto de Telecomunicacoes RicardoReiUnbabel/INESC-ID - ConstantinOrasanUniversity of Surrey - MatteoNegriFondazione Bruno Kessler + ConstantinOrasanUniversity of Surrey + MatteoNegriFondazione Bruno Kessler MarcoTurchiZoom Video Communications - RajenChatterjeeApple Inc. - PushpakBhattacharyyaIndian Institute of Technology Bombay and Patna + RajenChatterjeeApple Inc. + PushpakBhattacharyyaIndian Institute of Technology Bombay and Patna MarkusFreitagGoogle Research - AndréMartinsUnbabel, Instituto de Telecomunicacoes + AndréMartinsUnbabel, Instituto de Telecomunicacoes 82-109 We report the results of the WMT 2024 shared task on Quality Estimation, in which the challenge is to predict the quality of the output of neural machine translation systems at the word and sentence levels, without access to reference translations. In this edition, we expanded our scope to assess the potential for quality estimates to help in the correction of translated outputs, hence including an automated post-editing (APE) direction. We publish new test sets with human annotations that target two directions: providing new Multidimensional Quality Metrics (MQM) annotations for three multi-domain language pairs (English to German, Spanish and Hindi) and extending the annotations on Indic languages providing direct assessments and post edits for translation from English into Hindi, Gujarati, Tamil and Telugu. We also perform a detailed analysis of the behaviour of different models with respect to different phenomena including gender bias, idiomatic language, and numerical and entity perturbations. We received submissions based both on traditional, encoder-based approaches as well as large language model (LLM) based ones. 2024.wmt-1.3 @@ -126,14 +126,14 @@ PhilippeThomasGerman Research Center for Artificial Intelligence (DFKI) RolandRollerGerman Research Center for Artificial Intelligence (DFKI) RachelBawdenInria - AurélieNévéolUniversité Paris-Saclay, CNRS, LISN + AurélieNévéolUniversité Paris-Saclay, CNRS, LISN SteffenCastleGerman Research Center for Artificial Intelligence (DFKI) VanessaBonatoDept. of Linguistic and Literary Studies University of Padua - Giorgio MariaDi NunzioDept. of Linguistic and Literary Studies University of Padua + Giorgio MariaDi NunzioDept. of Linguistic and Literary Studies University of Padua FedericaVezzaniDept. of Linguistic and Literary Studies University of Padua MaikaVicente NavarroLeica Biosystems LanaYeganovaNCBI/NLM/NIH - AntonioJimeno YepesRMIT University + AntonioJimeno YepesRMIT University 124-138 We present the results of the ninth edition of the Biomedical Translation Task at WMT’24. We released test sets for six language pairs, namely, French, German, Italian, Portuguese, Russian, and Spanish, from and into English. Eachtest set consists of 50 abstracts from PubMed. Differently from previous years, we did not split abstracts into sentences. We received submissions from five teams, and for almost all language directions. We used a baseline/comparison system based on Llama 3.1 and share the source code at https://github.com/cgrozea/wmt24biomed-ref. 2024.wmt-1.6 @@ -217,7 +217,7 @@ SwetaAgrawalInstituto de Telecomunicações AntonioFarinhasInstituto de Telecomunicacoes, IST José G.C. De SouzaUnbabel - AndréMartinsUnbabel, Instituto de Telecomunicacoes + AndréMartinsUnbabel, Instituto de Telecomunicacoes 185-204 In this work, we present Tower v2, an improved iteration of the state-of-the-art open-weight Tower models, and the backbone of our submission to the WMT24 General Translation shared task. Tower v2 introduces key improvements including expanded language coverage, enhanced data quality, and increased model capacity up to 70B parameters. Our final submission combines these advancements with quality-aware decoding strategies, selecting translations based on multiple translation quality signals. The resulting system demonstrates significant improvement over previous versions, outperforming closed commercial systems like GPT-4o, Claude 3.5, and DeepL even at a smaller 7B scale. 2024.wmt-1.12 @@ -263,7 +263,7 @@ HuanLiuDalian University of Technology ConghuYuanGlobal Tone Communication Technology Co., Ltd WentaoChenGlobal Tone Communication Technology Co., Ltd - DegenHuangDalian University of Technology + DegenHuangDalian University of Technology 227-231 This paper presents the submission from Global Tone Communication Co., Ltd. and Dalian University of Technology for the WMT24 shared general Machine Translation (MT) task at the Conference on Empirical Methods in Natural Language Processing (EMNLP). Our participation encompasses two language pairs: English to Japanese and Japanese to Chinese. The systems are developed without particular constraints or requirements, facilitating extensive research in machine translation. We emphasize back-translation, utilize multilingual translation models, and apply fine-tuning strategies to improve performance. Additionally, we integrate both human-generated and machine-generated data to fine-tune our models, leading to enhanced translation accuracy. The automatic evaluation results indicate that our system ranks first in terms of BLEU score for the Japanese to Chinese translation. 2024.wmt-1.15 @@ -277,7 +277,7 @@ MartinPopelCharles University, Faculty of Mathematics and Physics, UFAL NamLuuCharles University DanilSeminMFF UK - OndřejBojarCharles University, MFF UFAL + OndřejBojarCharles University, MFF UFAL 232-246 This paper presents the contributions of Charles University teams to the WMT24 General Translation task (English to Czech, German and Russian, and Czech to Ukrainian), and the WMT24 Translation into Low-Resource Languages of Spain task.Our most elaborate submission, CUNI-MH for en2cs, is the result of fine-tuning Mistral 7B v0.1 for translation using a three-stage process: Supervised fine-tuning using QLoRA, Contrastive Preference Optimization, and merging of model checkpoints. We also describe the CUNI-GA, CUNI-Transformer and CUNI-DocTransformer submissions, which are based on our systems from the previous year.Our en2ru system CUNI-DS uses a similar first stage as CUNI-MH (QLoRA for en2cs) and follows with transferring to en2ru.For en2de (CUNI-NL), we experimented with a LLM-based speech translation system, to translate without the speech input.For the Translation into Low-Resource Languages of Spain task, we performed QLoRA fine-tuning of a large LLM on a small amount of synthetic (backtranslated) data. 2024.wmt-1.16 @@ -377,7 +377,7 @@ AnnikaGrützner-ZahnGerman Research Center for Artificial Intelligence (DFKI) ManuelBrackDFKI, TU Darmstadt PatrickSchramowskiTU Darmstadt - PedroOrtiz SuarezCommon Crawl Foundation + PedroOrtiz SuarezCommon Crawl Foundation MalteOstendorffGerman Research Center for Artificial Intelligence FabioBarthDFKI ShushenManakhimovaGerman Research Center for Artificial Intelligence (DFKI) @@ -394,7 +394,7 @@ <fixed-case>C</fixed-case>o<fixed-case>ST</fixed-case> of breaking the <fixed-case>LLM</fixed-case>s AnanyaMukherjeeInternational Institute of Information Technology Hyderabad SaumitraYadavInternational Institute of Information Technology, Hyderabad - ManishShrivastavaInternational Institute of Information Technology Hyderabad + ManishShrivastavaInternational Institute of Information Technology Hyderabad 299-306 This paper presents an evaluation of 16 machine translation systems submitted to the Shared Task of the 9th Conference of Machine Translation (WMT24) for the English-Hindi (en-hi) language pair using our Complex Structures Test (CoST) suite. Aligning with this year’s test suite sub-task theme, “Help us break LLMs”, we curated a comprehensive test suite encompassing diverse datasets across various categories, including autobiography, poetry, legal, conversation, play, narration, technical, and mixed genres. Our evaluation reveals that all the systems struggle significantly with the archaic style of text like legal and technical writings or text with creative twist like conversation and poetry datasets, highlighting their weaknesses in handling complex linguistic structures and stylistic nuances inherent in these text types. Our evaluation identifies the strengths and limitations of the submitted models, pointing to specific areas where further research and development are needed to enhance their performance. Our test suite is available at https://github.com/AnanyaCoder/CoST-WMT-24-Test-Suite-Task. 2024.wmt-1.24 @@ -462,7 +462,7 @@ A Test Suite of Prompt Injection Attacks for <fixed-case>LLM</fixed-case>-based Machine Translation - Antonio ValerioMiceli BaroneThe University of Edinburgh + Antonio ValerioMiceli BaroneThe University of Edinburgh ZhifanSunTechnische Universität Darmstadt 380-450 LLM-based NLP systems typically work by embedding their input data into prompt templates which contain instructions and/or in-context examples, creating queries which are submitted to a LLM, then parse the LLM response in order to generate the system outputs. Prompt Injection Attacks (PIAs) are a type of subversion of these systems where a malicious user crafts special inputs which interfer with the prompt templates, causing the LLM to respond in ways unintended by the system designer.Recently, Sun and Miceli-Barone (2024) proposed a class of PIAs against LLM-based machine translation. Specifically, the task is to translate questions from the TruthfulQA test suite, where an adversarial prompt is prepended to the questions, instructing the system to ignore the translation instruction and answer the questions instead.In this test suite we extend this approach to all the language pairs of the WMT 2024 General Machine Translation task. Moreover, we include additional attack formats in addition to the one originally studied. @@ -487,8 +487,8 @@ DavidAnugrahaUniversity of Toronto GarryKuwantoBoston University LuckySusantoUniversitas Indonesia - Derry TantiWijayaBoston University - GentaWinataCapital One AI Foundations + Derry TantiWijayaBoston University + GentaWinataCapital One AI Foundations 459-469 We present MetaMetrics-MT, an innovative metric designed to evaluate machine translation (MT) tasks by aligning closely with human preferences through Bayesian optimization with Gaussian Processes. MetaMetrics-MT enhances existing MT metrics by optimizing their correlation with human judgments. Our experiments on the WMT24 metric shared task dataset demonstrate that MetaMetrics-MT outperforms all existing baselines, setting a new benchmark for state-of-the-art performance in the reference-based setting. Furthermore, it achieves comparable results to leading metrics in the reference-free setting, offering greater efficiency. 2024.wmt-1.32 @@ -498,7 +498,7 @@ chr<fixed-case>F</fixed-case>-<fixed-case>S</fixed-case>: Semantics Is All You Need AnanyaMukherjeeInternational Institute of Information Technology Hyderabad - ManishShrivastavaInternational Institute of Information Technology Hyderabad + ManishShrivastavaInternational Institute of Information Technology Hyderabad 470-474 Machine translation (MT) evaluation metrics like BLEU and chrF++ are widely used reference-based metrics that do not require training and are language-independent. However, these metrics primarily focus on n-gram matching and often overlook semantic depth and contextual understanding. To address this gap, we introduce chrF-S (Semantic chrF++), an enhanced metric that integrates sentence embeddings to evaluate translation quality more comprehensively. By combining traditional character and word n-gram analysis with semantic information derived from embeddings, chrF-S captures both syntactic accuracy and sentence-level semantics. This paper presents our contributions to the WMT24 shared metrics task, showcasing our participation and the development of chrF-S. We also demonstrate that, according to preliminary results on the leaderboard, our metric performs on par with other supervised and LLM-based metrics. By merging semantic insights with n-gram precision, chrF-S offers a significant enhancement in the assessment of machine-generated translations, advancing the field of MT evaluation. Our code and data will be made available at https://github.com/AnanyaCoder/chrF-S. 2024.wmt-1.33 @@ -531,7 +531,7 @@ Evaluating <fixed-case>WMT</fixed-case> 2024 Metrics Shared Task Submissions on <fixed-case>A</fixed-case>fri<fixed-case>MTE</fixed-case> (the <fixed-case>A</fixed-case>frican Challenge Set) JiayiWangUniversity College London - David IfeoluwaAdelaniMcGill University / MILA + David IfeoluwaAdelaniMcGill University / MILA PontusStenetorpUniversity College London 505-516 The AfriMTE challenge set from WMT 2024 Metrics Shared Task aims to evaluate the capabilities of evaluation metrics for machine translation on low-resource African languages, which primarily assesses cross-lingual transfer learning and generalization of machine translation metrics across a wide range of under-resourced languages. In this paper, we analyze the submissions to WMT 2024 Metrics Shared Task. Our findings indicate that language-specific adaptation, cross-lingual transfer learning, and larger language model sizes contribute significantly to improved metric performance. Moreover, supervised models with relatively moderate sizes demonstrate robust performance, when augmented with specific language adaptation for low-resource African languages. Finally, submissions show promising results for language pairs including Darija-French, English-Egyptian Arabic, and English-Swahili. However, significant challenges persist for extremely low-resource languages such as English-Luo and English-Twi, highlighting areas for future research and improvement in machine translation metrics for African languages. @@ -602,8 +602,8 @@ Expanding the <fixed-case>FLORES</fixed-case>+ Multilingual Benchmark with Translations for <fixed-case>A</fixed-case>ragonese, Aranese, <fixed-case>A</fixed-case>sturian, and <fixed-case>V</fixed-case>alencian Juan AntonioPerez-OrtizDepartament de Llenguatges i Sistemes Informatics, Universitat d’Alacant FelipeSánchez-MartínezUniversitat d’Alacant - Víctor M.Sánchez-CartagenaUniversitat d’Alacant - MiquelEsplà-GomisUniversitat d’Alacant + Víctor M.Sánchez-CartagenaUniversitat d’Alacant + MiquelEsplà-GomisUniversitat d’Alacant AaronGaliano JimenezUniversitat d’Alacant AntoniOliverUniversitat Oberta de Catalunya ClaudiAventín-BoyaUniversitat Oberta de Catalunya @@ -623,7 +623,7 @@ The <fixed-case>B</fixed-case>angla/<fixed-case>B</fixed-case>engali Seed Dataset Submission to the <fixed-case>WMT</fixed-case>24 Open Language Data Initiative Shared Task FirozAhmedUniversity of Florida NitinVenkateswaranUniversity of Florida - SarahMoellerUniversity of Florida + SarahMoellerUniversity of Florida 556-566 We contribute a seed dataset for the Bangla/Bengali language as part of the WMT24 Open Language Data Initiative shared task. We validate the quality of the dataset against a mined and automatically aligned dataset (NLLBv1) and two other existing datasets of crowdsourced manual translations. The validation is performed by investigating the performance of state-of-the-art translation models fine-tuned on the different datasets after controlling for training set size. Machine translation models fine-tuned on our dataset outperform models tuned on the other datasets in both translation directions (English-Bangla and Bangla-English). These results confirm the quality of our dataset. We hope our dataset will support machine translation for the Bangla/Bengali community and related low-resource languages. 2024.wmt-1.42 @@ -788,7 +788,7 @@ Findings of <fixed-case>WMT</fixed-case>2024 <fixed-case>E</fixed-case>nglish-to-Low Resource Multimodal Translation Task ShantipriyaParidaSilo AI - OndřejBojarCharles University, MFF UFAL + OndřejBojarCharles University, MFF UFAL IdrisAbdulmuminUniversity of Pretoria Shamsuddeen HassanMuhammadBayero University, Kano Ibrahim SaidAhmadNortheastern University @@ -865,7 +865,7 @@ A3-108 Controlling Token Generation in Low Resource Machine Translation Systems SaumitraYadavInternational Institute of Information Technology, Hyderabad AnanyaMukherjeeInternational Institute of Information Technology Hyderabad - ManishShrivastavaInternational Institute of Information Technology Hyderabad + ManishShrivastavaInternational Institute of Information Technology Hyderabad 728-734 Translating for languages with limited resources poses a persistent challenge due to the scarcity of high-quality training data. To enhance translation accuracy, we explored controlled generation mechanisms, focusing on the importance of control tokens. In our experiments, while training, we encoded the target sentence length as a control token to the source sentence, treating it as an additional feature for the source sentence. We developed various NMT models using transformer architecture and conducted experiments across 8 language directions (English = Assamese, Manipuri, Khasi, and Mizo), exploring four variations of length encoding mechanisms. Through comparative analysis against the baseline model, we submitted two systems for each language direction. We report our findings for the same in this work. 2024.wmt-1.61 @@ -890,7 +890,7 @@ ChenfeiJuDalian University of Technology JunpengLiuDalian University of Technology KaiyuHuangBeijing Jiaotong University - DegenHuangDalian University of Technology + DegenHuangDalian University of Technology 742-746 This paper describes the submission systems of DLUT-NLP team for the WMT24 low-resource Indic language translation shared task. We participated in the translation task of four language pairs, including en-as, en-mz, en-kha, en-mni. 2024.wmt-1.63 @@ -979,7 +979,7 @@ <fixed-case>NLIP</fixed-case>_<fixed-case>L</fixed-case>ab-<fixed-case>IITH</fixed-case> Low-Resource <fixed-case>MT</fixed-case> System for <fixed-case>WMT</fixed-case>24 <fixed-case>I</fixed-case>ndic <fixed-case>MT</fixed-case> Shared Task PramitSahooIndian Institute of Technology Hyderabad MaharajBrahmaIndian Institute of Technology Hyderabad - Maunendra SankarDesarkarIIT Hyderabad + Maunendra SankarDesarkarIIT Hyderabad 781-787 In this paper, we describe our system for the WMT 24 shared task of Low-Resource Indic Language Translation. We consider eng↔{as, kha, lus, mni} as participating language pairs. In this shared task, we explore the fine-tuning of a pre-trained model motivated by the pre-trained objective of aligning embeddings closer by alignment augmentation (Lin et al.,2020) for 22 scheduled Indian languages. Our primary system is based on language-specific finetuning on a pre-trained model. We achieve chrF2 scores of 50.6, 42.3, 54.9, and 66.3 on the official public test set for eng→as, eng→kha, eng→lus, eng→mni respectively. We also explore multilingual training with/without language grouping and layer-freezing. 2024.wmt-1.70 @@ -1014,7 +1014,7 @@ Kshetrimayum BoynaoSinghNational Institute of Technology Silchar Ningthoujam AvichandraSinghNational Institute of Technology Silchar SanjitaPhijamNational Institute of Technology Silchar - Thoudam DorenSinghNational Institute of Technology Silchar + Thoudam DorenSinghNational Institute of Technology Silchar 797-803 This paper presents a Transformer-based Neural Machine Translation (NMT) system developed by the Centre for Natural Language Processing and the Department of Computer Science and Engineering at the National Institute of Technology Silchar, India (NITS-CNLP) for the MultiIndic22MT 2024 Shared Task. The system focused on the English-Manipuri language pair for the WMT24 shared task. The proposed WMT system shows a BLEU score of 6.4, a chrF score of 28.6, and a chrF++ score of 26.6 on the public test set Indic-Conv dataset. Further, in the public test set Indic-Gen dataset, it achieved a BLEU score of 8.1, a chrF score of 32.1, and a chrF++ score of 29.4 on the English-to-Manipuri translation. 2024.wmt-1.73 @@ -1025,7 +1025,7 @@ <fixed-case>NLIP</fixed-case>-Lab-<fixed-case>IITH</fixed-case> Multilingual <fixed-case>MT</fixed-case> System for <fixed-case>WAT</fixed-case>24 <fixed-case>MT</fixed-case> Shared Task MaharajBrahmaIndian Institute of Technology Hyderabad PramitSahooIndian Institute of Technology Hyderabad - Maunendra SankarDesarkarIIT Hyderabad + Maunendra SankarDesarkarIIT Hyderabad 804-809 This paper describes NLIP Lab’s multilingual machine translation system for the WAT24 shared task on multilingual Indic MT task for 22 scheduled languages belonging to 4 language families. We explore pre-training for Indic languages using alignment agreement objectives. We utilize bi-lingual dictionaries to substitute words from source sentences. Furthermore, we fine-tuned language direction-specific multilingual translation models using small and high-quality seed data. Our primary submission is a 243M parameters multilingual translation model covering 22 Indic languages. In the IN22-Gen benchmark, we achieved an average chrF++ score of 46.80 and 18.19 BLEU score for the En-Indic direction. In the Indic-En direction, we achieved an average chrF++ score of 56.34 and 30.82 BLEU score. In the In22-Conv benchmark, we achieved an average chrF++ score of 43.43 and BLEU score of 16.58 in the En-Indic direction, and in the Indic-En direction, we achieved an average of 52.44 and 29.77 for chrF++ and BLEU respectively. Our model is competitive with IndicTransv1 (474M parameter model). 2024.wmt-1.74 @@ -1047,7 +1047,7 @@ <fixed-case>E</fixed-case>nglish-to-Low-Resource Translation: A Multimodal Approach for <fixed-case>H</fixed-case>indi, <fixed-case>M</fixed-case>alayalam, <fixed-case>B</fixed-case>engali, and <fixed-case>H</fixed-case>ausa AliHatamiUniversity of Galway ShubhankerBanerjeeUniversity of Galway - MihaelArcanLua Health + MihaelArcanLua Health PaulBuitelaarUniversity of Galway JohnPhilip McCraeUniversity of Galway 815-822 @@ -1155,7 +1155,7 @@ <fixed-case>U</fixed-case>niversitat d’Alacant’s Submission to the <fixed-case>WMT</fixed-case> 2024 Shared Task on Translation into Low-Resource Languages of <fixed-case>S</fixed-case>pain AaronGaliano JimenezUniversitat d’Alacant - Víctor M.Sánchez-CartagenaUniversitat d’Alacant + Víctor M.Sánchez-CartagenaUniversitat d’Alacant Juan AntonioPerez-OrtizDepartament de Llenguatges i Sistemes Informatics, Universitat d’Alacant FelipeSánchez-MartínezUniversitat d’Alacant 885-891 @@ -1193,7 +1193,7 @@ OnaDe GibertUniversity of Helsinki MikkoAulamoUniversity of Helsinki YvesScherrerUniversity of Oslo - JörgTiedemannUniversity of Helsinki + JörgTiedemannUniversity of Helsinki 908-917 The Helsinki-NLP team participated in the 2024 Shared Task on Translation into Low-Resource languages of Spain with four multilingual systems covering all language pairs. The task consists in developing Machine Translation (MT) models to translate from Spanish into Aragonese, Aranese and Asturian. Our models leverage known approaches for multilingual MT, namely, data filtering, fine-tuning, data tagging, and distillation. We use distillation to merge the knowledge from neural and rule-based systems and explore the trade-offs between translation quality and computational efficiency. We demonstrate that our distilled models can achieve competitive results while significantly reducing computational costs. Our best models ranked 4th, 5th, and 2nd in the open submission track for Spanish–Aragonese, Spanish–Aranese, and Spanish–Asturian, respectively. We release our code and data publicly at https://github.com/Helsinki-NLP/lowres-spain-st. 2024.wmt-1.88 @@ -1215,7 +1215,7 @@ Training and Fine-Tuning <fixed-case>NMT</fixed-case> Models for Low-Resource Languages Using Apertium-Based Synthetic Corpora AleixSantBarcelona Supercomputing Center DanielBardancaCITIUS - José RamomPichel CamposCITIUS + José RamomPichel CamposCITIUS FrancescaDe Luca FornaciariBSC Barcelona Supercomputing Center CarlosEscolanoUniversitat Polità ̈cnica de Catalunya, Barcelona Supercomputing Center JavierGarcia GilabertBarcelona Super Computing Center @@ -1362,7 +1362,7 @@ Improving Context Usage for Translating Bilingual Customer Support Chat with Large Language Models JosePombalUnbabel SwetaAgrawalInstituto de Telecomunicações - AndréMartinsUnbabel, Instituto de Telecomunicacoes + AndréMartinsUnbabel, Instituto de Telecomunicacoes 993-1003 This paper describes Unbabel+IT’s submission to the Chat Shared Task held at the Workshop of Machine Translation 2024. The task focuses on translating customer support chats between agents and customers communicating in different languages. We present two strategies for adapting state-of-the-art language models to better utilize contextual information when translating such conversations. Our training strategy involves finetuning the model on chat datasets with context-augmented instructions, resulting in a specialized model, TOWERCHAT. For inference, we propose a novel quality-aware decoding approach that leverages a context-aware metric, CONTEXTCOMET, to select the optimal translation from a pool of candidates. We evaluate our proposed approach on the official shared task datasets for ten language pairs, showing that our submission consistently outperforms baselines on all and competing systems on 8 out of 10 language pairs across multiple automated metrics. Remarkably, TOWERCHAT outperforms our contrastive submission based on the much larger TOWER-V2-70B model while being 10× smaller. According to human evaluation, our system outperforms all other systems and baselines across all language pairs. These results underscore the importance of context-aware training and inference in handling complex bilingual dialogues. 2024.wmt-1.100 @@ -1373,7 +1373,7 @@ Optimising <fixed-case>LLM</fixed-case>-Driven Machine Translation with Context-Aware Sliding Windows XinyeYangThe University of Sheffield YidaMuThe University of Sheffield - KalinaBontchevaThe University of Sheffield + KalinaBontchevaThe University of Sheffield XingyiSongUniversity of Sheffield 1004-1010 This paper describes SheffieldGATE’s submission to WMT 2024 Chat Shared Translation Task. We participate in three language pairs: English-German, English-Dutch, and English-Portuguese (Brazil). In this work, we introduce a context-aware sliding window decoding method to track dependencies between chat messages. We fine-tune a large pre-trained language model based on the training data provided by the shared task Our experiments (i) compare the model performance between multilingual and bilingual fine-tuning and (ii) assess the impact of different window sizes. Our experimental results demonstrate that utilising contextual information yields superior performance in document-level translation compared to translating documents as isolated text segments, and that models fine-tuned with multilingual data perform better than those fine-tuned with bilingual data. @@ -1465,7 +1465,7 @@ <fixed-case>SYSTRAN</fixed-case> @ <fixed-case>WMT</fixed-case>24 Non-Repetitive Translation Task MarkoAvilaCHAPSVISION - JosepCregoCHAPSVISION + JosepCregoCHAPSVISION 1056-1062 Many contemporary NLP systems rely on neural decoders for text generation, which demonstrate an impressive ability to generate text approaching human fluency levels. However, in the case of neural machine translation networks, they often grapple with the production of repetitive content, also known as repetitive diction or word repetition, an aspect they weren’t explicitly trained to address. While not inherently negative, this repetition can make writing seem monotonous or awkward if not used intentionally for emphasis or stylistic purposes. This paper presents our submission to the WMT 2024 Non-Repetitive Translation Task, for which we adopt a repetition penalty method applied at learning inspired by the principles of label smoothing. No additional work is needed at inference time. We modify the ground-truth distribution to steer the model towards discouraging repetitions. Experiments show the ability of the proposed methods in reducing repetitions within neural machine translation engines, without compromising efficiency or translation quality. 2024.wmt-1.108 @@ -1503,7 +1503,7 @@ HugoPitorroTechnical University of Munich PavloVasylenkoSapienza University of Rome MarcosTrevisoInstituto de Telecomunicacoes - AndréMartinsUnbabel, Instituto de Telecomunicacoes + AndréMartinsUnbabel, Instituto de Telecomunicacoes 1107-1124 Transformers are the current architecture of choice for NLP, but their attention layers do not scale well to long contexts. Recent works propose to replace attention with linear recurrent layers - this is the case for state space models, which enjoy efficient training and inference. However, it remains unclear whether these models are competitive with transformers in machine translation (MT). In this paper, we provide a rigorous and comprehensive experimental comparison between transformers and linear recurrent models for MT. Concretely, we experiment with RetNet, Mamba, and hybrid versions of Mamba which incorporate attention mechanisms. Our findings demonstrate that Mamba is highly competitive with transformers on sentence and paragraph-level datasets, where in the latter both models benefit from shifting the training distribution towards longer sequences. Further analysis show that integrating attention into Mamba improves translation quality, robustness to sequence length extrapolation, and the ability to recall named entities. 2024.wmt-1.111 @@ -1523,7 +1523,7 @@ A Multi-task Learning Framework for Evaluating Machine Translation of Emotion-loaded User-generated Content ShenbinQianUniversity of Surrey - ConstantinOrasanUniversity of Surrey + ConstantinOrasanUniversity of Surrey DipteshKanojiaUniversity of Surrey FélixDo CarmoUniversity of Surrey 1140-1154 @@ -1558,9 +1558,9 @@ Analysing Translation Artifacts: A Comparative Study of <fixed-case>LLM</fixed-case>s, <fixed-case>NMT</fixed-case>s, and Human Translations FedorSizovSaarland University CristinaEspaña-BonetDFKI GmbH - JosefVan GenabithDFKI + JosefVan GenabithDFKI RoyXieDuke University - KoelDutta ChowdhurySaarland Informatics Campus,Saarland University + KoelDutta ChowdhurySaarland Informatics Campus,Saarland University 1183-1199 Translated texts exhibit a range of characteristics that make them appear distinct from texts originally written in the same target language. With the rise of Large Language Models (LLMs), which are designed for a wide range of language generation and understanding tasks, there has been significant interest in their application to Machine Translation. While several studies have focused on improving translation quality through fine-tuning or few-shot prompting techniques, there has been limited exploration of how LLM-generated translations qualitatively differ from those produced by Neural Machine Translation (NMT) models, and human translations. Our study employs explainability methods such as Leave-One-Out (LOO) and Integrated Gradients (IG) to analyze the lexical features distinguishing human translations from those produced by LLMs and NMT systems. Specifically, we apply a two-stage approach: first, classifying texts based on their origin – whether they are original or translations – and second, extracting significant lexical features (highly attributed input words) using post-hoc interpretability methods. Our analysis shows that different methods of feature extraction vary in their effectiveness, with LOO being generally better at pinpointing critical input words and IG capturing a broader range of important words. Finally, our results show that while LLMs and NMT systems can produce translations of a good quality, they still differ from texts originally written by native speakers. Specifically, we find that while some LLMs often align closely with human translations, traditional NMT systems exhibit distinct characteristics, particularly in their use of certain linguistic features. 2024.wmt-1.116 @@ -1737,7 +1737,7 @@ JimDavisOhio State University JeremyGwinnupAir Force Research Laboratory GrantErdmannAir Force Research Laboratory - TimAndersonAir Force Research Laboratory + TimAndersonAir Force Research Laboratory 1428-1439 In Multimodal Machine Translation (MMT), the use of visual data has shown only marginal improvements compared to text-only models. Previously, the CoMMuTE dataset and associated metric were proposed to score models on tasks where the imagery is necessary to disambiguate between two possible translations for each ambiguous source sentence. In this work, we introduce new metrics within the CoMMuTE domain to provide deeper insights into image-aware translation models. Our proposed metrics differ from the previous CoMMuTE scoring method by 1) assessing the impact of multiple images on individual translations and 2) evaluating a model’s ability to jointly select each translation for each image context. Our results challenge the conventional views of poor visual comprehension capabilities of MMT models and show that models can indeed meaningfully interpret visual information, though they may not leverage it sufficiently in the final decision. 2024.wmt-1.130 @@ -1751,7 +1751,7 @@ EleftheriosAvramidisGerman Research Center for Artificial Intelligence (DFKI) RomanGrundkiewiczMicrosoft Research MarzenaKarpinskaUniversity of Massachusetts Amherst - MajaPopovićADAPT, Dublin City University + MajaPopovićADAPT, Dublin City University MrinmayaSachanETH Zurich MariyaShmatovaDubformer 1440-1453 diff --git a/data/xml/2024.wnu.xml b/data/xml/2024.wnu.xml index 4f713e0635..ae44f4f4f3 100644 --- a/data/xml/2024.wnu.xml +++ b/data/xml/2024.wnu.xml @@ -104,7 +104,7 @@ RohanDasUniversity of Colorado Boulder AdityaChandraUniversity of Colorado Boulder I-TaLeePurdue University - Maria LeonorPachecoUniversity of Colorado Boulder + Maria LeonorPachecoUniversity of Colorado Boulder 85-98 From a communications perspective, a frame defines the packaging of the language used in such a way as to encourage certain interpretations and to discourage others. For example, a news article can frame immigration as either a boost or a drain on the economy, and thus communicate very different interpretations of the same phenomenon. In this work, we argue that to explain framing devices we have to look at the way narratives are constructed. As a first step in this direction, we propose a framework that extracts events and their relations to other events, and groups them into high-level narratives that help explain frames in news articles. We show that our framework can be used to analyze framing in U.S. news for two different domains: immigration and gun control. 2024.wnu-1.15 diff --git a/data/xml/2024.wnut.xml b/data/xml/2024.wnut.xml index 0cdfb3b2c3..2d5e418416 100644 --- a/data/xml/2024.wnut.xml +++ b/data/xml/2024.wnut.xml @@ -8,7 +8,7 @@ MaxMüller-Eberstein WeiXu AlanRitter - TimBaldwin + TimBaldwin Association for Computational Linguistics
San Ġiljan, Malta
March @@ -41,7 +41,7 @@
Effects of different types of noise in user-generated reviews on human and machine translations including <fixed-case>C</fixed-case>hat<fixed-case>GPT</fixed-case> - MajaPopovicDublin City University + MajaPopovicDublin City University EkaterinaLapshinova-KoltunskiUniversität Hildesheim MaaritKoponenUniversity of Eastern Finland 17-30 @@ -83,7 +83,7 @@ BaberKhalidAmazon ShuyangDaiAmazon TaraTaghavi - SungjinLeeAmazon + SungjinLeeAmazon 58-67 Text classification is an important problem with a wide range of applications in NLP. However, naturally occurring data is imbalanced which can induce biases when training classification models. In this work, we introduce a novel contrastive learning (CL) approach to help with imbalanced text classification task. CL has an inherent structure which pushes similar data closer in embedding space and vice versa using data samples anchors. However, in traditional CL methods text embeddings are used as anchors, which are scattered over the embedding space. We propose a CL approach which learns key anchors in the form of label embeddings and uses them as anchors. This allows our approach to bring the embeddings closer to their labels in the embedding space and divide the embedding space between labels in a fairer manner. We also introduce a novel method to improve the interpretability of our approach in a multi-class classification scenario. This approach learns the inter-class relationships during training which provide insight into the model decisions. Since our approach is focused on dividing the embedding space between different labels we also experiment with hyperbolic embeddings since they have been proven successful in embedding hierarchical information. Our proposed method outperforms several state-of-the-art baselines by an average 11% F1. Our interpretable approach highlights key data relationships and our experiments with hyperbolic embeddings give us important insights for future investigations. We will release the implementation of our approach with the publication. 2024.wnut-1.6 @@ -93,7 +93,7 @@ <fixed-case>M</fixed-case>aint<fixed-case>N</fixed-case>orm: A corpus and benchmark model for lexical normalisation and masking of industrial maintenance short text TylerBikaunUniversity of Western Australia MelindaHodkiewiczUniversity of Western Australia - WeiLiuUniversity of Western Australia + WeiLiuUniversity of Western Australia 68-78 Maintenance short texts are invaluable unstructured data sources, serving as a diagnostic and prognostic window into the operational health and status of physical assets. These user-generated texts, created during routine or ad-hoc maintenance activities, offer insights into equipment performance, potential failure points, and maintenance needs. However, the use of information captured in these texts is hindered by inherent challenges: the prevalence of engineering jargon, domain-specific vernacular, random spelling errors without identifiable patterns, and the absence of standard grammatical structures. To transform these texts into accessible and analysable data, we introduce the MaintNorm dataset, the first resource specifically tailored for the lexical normalisation task of maintenance short texts. Comprising 12,000 examples, this dataset enables the efficient processing and interpretation of these texts. We demonstrate the utility of MaintNorm by training a lexical normalisation model as a sequence-to-sequence learning task with two learning objectives, namely, enhancing the quality of the texts and masking segments to obscure sensitive information to anonymise data. Our benchmark model demonstrates a universal error reduction rate of 95.8%. The dataset and benchmark outcomes are available to the public. 2024.wnut-1.7 @@ -126,7 +126,7 @@ YixingChenUniversity of Notre Dame ShuangZheng LeiWang - JohnLalorUniversity of Notre Dame + JohnLalorUniversity of Notre Dame 104-118 Data for the Rating Prediction (RP) sentiment analysis task such as star reviews are readily available. However, data for aspect-category sentiment analysis (ACSA) is often desired because of the fine-grained nature but are expensive to collect. In this work we present a method for learning ACSA using only RP labels. We propose Unified Sentiment Analysis (Uni-SA) to efficiently understand aspect and review sentiment in a unified manner. We propose a Distantly Supervised Pyramid Network (DSPN) to efficiently perform Aspect-Category Detection (ACD), ACSA, and OSA using only RP labels for training. We evaluate DSPN on multi-aspect review datasets in English and Chinese and find that with only star rating labels for supervision, DSPN performs comparably well to a variety of benchmark models. We also demonstrate the interpretability of DSPN’s outputs on reviews to show the pyramid structure inherent in document level end-to-end sentiment analysis. 2024.wnut-1.10 diff --git a/data/xml/2024.woah.xml b/data/xml/2024.woah.xml index 991c78904a..da8c8fdaf4 100644 --- a/data/xml/2024.woah.xml +++ b/data/xml/2024.woah.xml @@ -4,7 +4,7 @@ Proceedings of the 8th Workshop on Online Abuse and Harms (WOAH 2024) Yi-LingChung - ZeerakTalat + ZeerakTalat DeboraNozza Flor MiriamPlaza-del-Arco PaulRöttger @@ -25,7 +25,7 @@ Investigating radicalisation indicators in online extremist communities ChristineDe KockUniversity of Melbourne - EduardHovyUniversity of Melbourne + EduardHovyUniversity of Melbourne 1-12 We identify and analyse three sociolinguistic indicators of radicalisation within online extremist forums: hostility, longevity and social connectivity. We develop models to predict the maximum degree of each indicator measured over an individual’s lifetime, based on a minimal number of initial interactions. Drawing on data from two diverse extremist communities, our results demonstrate that NLP methods are effective at prioritising at-risk users. This work offers practical insights for intervention strategies and policy development, and highlights an important but under-studied research direction. 2024.woah-1.1 @@ -47,8 +47,8 @@ Our findings demonstrate that both approaches can be leveraged effectively: For <fixed-case>E</fixed-case>ko<fixed-case>H</fixed-case>ate: Abusive Language and Hate Speech Detection for Code-switched Political Discussions on <fixed-case>N</fixed-case>igerian <fixed-case>T</fixed-case>witter ComfortIlevbareAfe Babalola University - JesujobaAlabiSaarland University - David IfeoluwaAdelaniUniversity College London + JesujobaAlabiSaarland University + David IfeoluwaAdelaniUniversity College London FirdousBakareAfe Babalola University, Ado-Ekiti, Nigeria OluwatoyinAbiolaAfe Babalola University, Ado Ekiti, Ekiti State Nigeria OluwaseyiAdeyemoAfe Babalola University @@ -62,7 +62,7 @@ Our findings demonstrate that both approaches can be leveraged effectively: For A Study of the Class Imbalance Problem in Abusive Language Detection YaqiZhangTechnical University of Munich ViktorHangyaLudwig Maximilian University of Munich - AlexanderFraserLudwig-Maximilians-Universität München + AlexanderFraserLudwig-Maximilians-Universität München 38-51 Abusive language detection has drawn increasing interest in recent years. However, a less systematically explored obstacle is label imbalance, i.e., the amount of abusive data is much lower than non-abusive data, leading to performance issues. The aim of this work is to conduct a comprehensive comparative study of popular methods for addressing the class imbalance issue. We explore 10 well-known approaches on 8 datasets with distinct characteristics: binary or multi-class, moderately or largely imbalanced, focusing on various types of abuse, etc. Additionally, we pro-pose two novel methods specialized for abuse detection: AbusiveLexiconAug and ExternalDataAug, which enrich the training data using abusive lexicons and external abusive datasets, respectively. We conclude that: 1) our AbusiveLexiconAug approach, random oversampling, and focal loss are the most versatile methods on various datasets; 2) focal loss tends to yield peak model performance; 3) oversampling and focal loss provide promising results for binary datasets and small multi-class sets, while undersampling and weighted cross-entropy are more suitable for large multi-class sets; 4) most methods are sensitive to hyperparameters, yet our suggested choice of hyperparameters provides a good starting point. 2024.woah-1.4 @@ -90,7 +90,7 @@ Our findings demonstrate that both approaches can be leveraged effectively: For <fixed-case>VIDA</fixed-case>: The Visual Incel Data Archive. A Theory-oriented Annotated Dataset To Enhance Hate Detection Through Visual Culture SeleniaAnastasiUniversità degli Studi di Genova FlorianSchneiderHamburg University - ChrisBiemannUniversität Hamburg + ChrisBiemannUniversität Hamburg TimFischerUniversität Hamburg 59-67 Images increasingly constitute a larger portion of internet content, encoding even more complex meanings. Recent studies have highlight the pivotal role of visual communication in the spread of extremist content, particularly that associated with right-wing political ideologies. However, the capability of machine learning systems to recognize such meanings, sometimes implicit, remains limited. To enable future research in this area, we introduce and release VIDA, the Visual Incel Data Archive, a multimodal dataset comprising visual material and internet memes collected from two main Incel communities (Italian and Anglophone) known for their extremist misogynistic content. Following the analytical framework of Shifman (2014), we propose a new taxonomy for annotation across three main levels of analysis: content, form, and stance (hate). This allows for the association of images with fine-grained contextual information that help to identify the presence of offensiveness and a broader set of cultural references, enhancing the understanding of more nuanced aspects in visual communication. In this work we present a statistical analysis of the annotated dataset as well as discuss annotation examples and future line of research. @@ -103,7 +103,7 @@ Our findings demonstrate that both approaches can be leveraged effectively: For AliOmraniUniversity of Southern California AlirezaSalkhordeh ZiabariUniversity of Southern California PreniGolazizianUniversity of Southern California - JeffreySorensenGoogle Jigsaw + JeffreySorensenGoogle Jigsaw MortezaDehghaniUniversity of Southern California 68-109 Detecting problematic content, such as hate speech, is a multifaceted and ever-changing task, influenced by social dynamics, user populations, diversity of sources, and evolving language. There has been significant efforts, both in academia and in industry, to develop annotated resources that capture various aspects of problematic content. Due to researchers’ diverse objectives, these annotations are often inconsistent and hence, reports of progress on the detection of problematic content are fragmented. This pattern is expected to persist unless we pool these resources, taking into account the dynamic nature of this issue. In this paper, we propose integrating the available resources, leveraging their dynamic nature to break this pattern, and introduce a continual learning framework and benchmark for problematic content detection. Our benchmark, comprising 84 related tasks, creates a novel measure of progress: prioritizing the adaptability of classifiers to evolving tasks over excelling in specific tasks. To ensure continuous relevance, our benchmark is designed for seamless integration of new tasks. Our results demonstrate that continual learning methods outperform static approaches by up to 17% and 4% AUC in capturing the evolving content and adapting to novel forms of problematic content @@ -184,7 +184,7 @@ Our findings demonstrate that both approaches can be leveraged effectively: For ScottAndersenPosgrado en Ciencia e Ingeniería de la Computación, Universidad Nacional Autónoma de México Segio-LuisOjeda-TruebaInstituto de Ingeniería, Universidad Nacional Autónoma de México JuanVásquezDepartment of Computer Science, University of Colorado Boulder - GemmaBel-EnguixUniversidad Nacional Autónoma de México + GemmaBel-EnguixUniversidad Nacional Autónoma de México 178-200 Thanks to the popularity of social media, data generated by online communities provides an abundant source of diverse language information. This abundance of data allows NLP practitioners and computational linguists to analyze sociolinguistic phenomena occurring in digital communication. In this paper, we analyze the Twitter discourse around the Mexican Spanish-speaking LGBT+ community. For this, we evaluate how the polarity of some nouns related to the LGBT+ community has evolved in conversational settings using a corpus of tweets that cover a time span of ten years. We hypothesize that social media’s fast-moving, turbulent linguistic environment encourages language evolution faster than ever before. Our results indicate that most of the inspected terms have undergone some shift in denotation or connotation. No other generalizations can be observed in the data, given the difficulty that current NLP methods have to account for polysemy, and the wide differences between the various subgroups that make up the LGBT+ community. A fine-grained analysis of a series of LGBT+-related lexical terms is also included in this work. 2024.woah-1.14 @@ -239,7 +239,7 @@ Our findings demonstrate that both approaches can be leveraged effectively: For A <fixed-case>B</fixed-case>ayesian Quantification of Aporophobia and the Aggravating Effect of Low–Wealth Contexts on Stigmatization RyanBrateKNAW MariekeVan ErpKNAW Humanities Cluster - AntalVan Den BoschUtrecht University + AntalVan Den BoschUtrecht University 234-243 Aporophobia, a negative social bias against poverty and the poor, has been highlighted asan overlooked phenomenon in toxicity detec-tion in texts. Aporophobia is potentially im-portant both as a standalone form of toxicity,but also given its potential as an aggravatingfactor in the wider stigmatization of groups. Asyet, there has been limited quantification of thisphenomenon. In this paper, we first quantifythe extent of aporophobia, as observable in Red-dit data: contrasting estimates of stigmatisingtopic propensity between low–wealth contextsand high–wealth contexts via Bayesian estima-tion. Next, we consider aporophobia as a causalfactor in the prejudicial association of groupswith stigmatising topics, by introducing peoplegroup as a variable, specifically Black people.This group is selected given its history of be-ing the subject of toxicity. We evaluate theaggravating effect on the observed n–grams in-dicative of stigmatised topics observed in com-ments which refer to Black people, due to thepresence of low–wealth contexts. We performthis evaluation via a Structural Causal Mod-elling approach, performing interventions onsimulations via Bayesian models, for three hy-pothesised causal mechanisms. 2024.woah-1.18 @@ -303,7 +303,7 @@ Our findings demonstrate that both approaches can be leveraged effectively: For DiyiLiuUniversity of Oxford SamuelFraibergerWorld Bank, NYU, MIT RalphSchroederUniversity of Oxford - Scott A.HaleUniversity of Oxford + Scott A.HaleUniversity of Oxford PaulRöttgerUniversity of Oxford 283-311 Perceptions of hate can vary greatly across cultural contexts. Hate speech (HS) datasets, however, have traditionally been developed by language. This hides potential cultural biases, as one language may be spoken in different countries home to different cultures. In this work, we evaluate cultural bias in HS datasets by leveraging two interrelated cultural proxies: language and geography. We conduct a systematic survey of HS datasets in eight languages and confirm past findings on their English-language bias, but also show that this bias has been steadily decreasing in the past few years. For three geographically-widespread languages—English, Arabic and Spanish—we then leverage geographical metadata from tweets to approximate geo-cultural contexts by pairing language and country information. We find that HS datasets for these languages exhibit a strong geo-cultural bias, largely overrepresenting a handful of countries (e.g., US and UK for English) relative to their prominence in both the broader social media population and the general population speaking these languages. Based on these findings, we formulate recommendations for the creation of future HS datasets. diff --git a/data/xml/2024.yrrsds.xml b/data/xml/2024.yrrsds.xml index 0f2dd05f4e..151f545506 100644 --- a/data/xml/2024.yrrsds.xml +++ b/data/xml/2024.yrrsds.xml @@ -163,7 +163,7 @@ Topological Deep Learning for Term Extraction - Benjamin MatthiasRuppik + Benjamin MatthiasRuppik 43–45 Ben is a postdoctoral researcher in the Dialog Systems and Machine Learning research group led by Milica Gašić at the Heinrich-Heine-Universität Düsseldorf, which he joined in 2022. In collaboration with the Topology and Geometry group in the Mathematics Department, under the supervision of Marcus Zibrowius, Ben is developing applications of Topological Data Analysis in Natural Language Processing, focusing on dialogue systems. Before transitioning to machine learning research, Ben was a pure mathematician at the Max-Planck-Institute for Mathematics in Bonn, where he specialized in knotted surfaces in 4-dimensional manifolds. He graduated from the University of Bonn in 2022. 2024.yrrsds-1.16 diff --git a/data/xml/2025.aaas.xml b/data/xml/2025.aaas.xml index 2f43ced505..5390286b2b 100644 --- a/data/xml/2025.aaas.xml +++ b/data/xml/2025.aaas.xml @@ -22,8 +22,8 @@ Investigating Further Fine-tuning <fixed-case>Wav2vec2.0</fixed-case> in Low Resource Settings for Enhancing Children Speech Recognition and Word-level Reading Diagnosis LingyunGao CristianTejedor-Garcia - CatiaCucchiarini - HelmerStrik + CatiaCucchiarini + HelmerStrik 1–6 2025.aaas-1.1 gao-etal-2025-investigating diff --git a/data/xml/2025.acl.xml b/data/xml/2025.acl.xml index 360427b96b..1c65e24ffb 100644 --- a/data/xml/2025.acl.xml +++ b/data/xml/2025.acl.xml @@ -256,7 +256,7 @@ BoyoungKim MinyoungKim JoonsukParkUniversity of Richmond - Paul HongsuckSeoKorea University + Paul HongsuckSeoKorea University 341-359 Multi-hop question answering (MHQA) involves reasoning across multiple documents to answer complex questions. Dense retrievers typically outperform sparse methods like BM25 by leveraging semantic embeddings in many tasks; however, they require labeled query-document pairs for fine-tuning, which poses a significant challenge in MHQA due to the complexity of the reasoning steps. To overcome this limitation, we introduce Retriever Supervision with Consistency and Relevance (ReSCORE), a novel method for training dense retrievers for MHQA without the need for labeled documents. ReSCORE leverages large language models to measure document-question relevance with answer consistency and utilizes this information to train a retriever within an iterative question-answering framework. Evaluated on three MHQA benchmarks, our extensive experiments demonstrate the effectiveness of ReSCORE, with significant improvements in retrieval performance that consequently lead to state-of-the-art Exact Match and F1 scores for MHQA. 2025.acl-long.16 @@ -270,8 +270,8 @@ YuxuanGuHarbin Institute of Technology WenxuanZhangSingapore University of Technology and Design JingMaHong Kong Baptist University - See-KiongNgNational University of Singapore - Tat-SengChuaNational University of Singapore + See-KiongNgNational University of Singapore + Tat-SengChuaNational University of Singapore 360-381 Large Language Models (LLMs) have significantly advanced the fact-checking studies. However, existing automated fact-checking evaluation methods rely on static datasets and classification metrics, which fail to automatically evaluate the justification production and uncover the nuanced limitations of LLMs in fact-checking. In this work, we introduce FACT-AUDIT, an agent-driven framework that adaptively and dynamically assesses LLMs’ fact-checking capabilities. Leveraging importance sampling principles and multi-agent collaboration, FACT-AUDIT generates adaptive and scalable datasets, performs iterative model-centric evaluations, and updates assessments based on model-specific responses. By incorporating justification production alongside verdict prediction, this framework provides a comprehensive and evolving audit of LLMs’ factual reasoning capabilities, to investigate their trustworthiness. Extensive experiments demonstrate that FACT-AUDIT effectively differentiates among state-of-the-art LLMs, providing valuable insights into model strengths and limitations in model-centric fact-checking analysis. 2025.acl-long.17 @@ -281,9 +281,9 @@ Statistical Deficiency for Task Inclusion Estimation LoïcFosse - FredericBechetAcadémie d’Aix-Marseille - BenoitFavreUniversité d’Aix-Marseille - GéraldineDamnatiOrange Innovation + FredericBechetAcadémie d’Aix-Marseille + BenoitFavreUniversité d’Aix-Marseille + GéraldineDamnatiOrange Innovation GwénoléLecorvéOrange MaximeDarrin PhilippeFormontÉcole de technologie supérieure, Université du Québec and Université Paris-Saclay @@ -341,7 +341,7 @@ YangDengSingapore Management University WenqiangLeiSichuan University JianchengLvSichuan University - Tat-SengChuaNational University of Singapore + Tat-SengChuaNational University of Singapore JimmyHuangYork University and York University 466-488 With the advancement of large language models (LLMs), intelligent models have evolved from mere tools to autonomous agents with their own goals and strategies for cooperating with humans. This evolution has birthed a novel paradigm in NLP, i.e., human-model cooperation, that has yielded remarkable progress in numerous NLP tasks in recent years. In this paper, we take the first step to present a thorough review of human-model cooperation, exploring its principles, formalizations, and open challenges. In particular, we introduce a new taxonomy that provides a unified perspective to summarize existing approaches. Also, we discuss potential frontier areas and their corresponding challenges. We regard our work as an entry point, paving the way for more breakthrough research in this regard. @@ -358,7 +358,7 @@ FeiLiWuhan University JianmingFu ChongTeng - DonghongJi + DonghongJi 489-499 Text-based hyperbole and metaphor detection are of great significance for natural language processing (NLP) tasks. However, due to their semantic obscurity and expressive diversity, it is rather challenging to identify them. Existing methods mostly focus on superficial text features, ignoring the associations of hyperbole and metaphor as well as the effect of implicit emotion on perceiving these rhetorical devices. To implement these hypotheses, we propose an emotion-guided hyperbole and metaphor detection framework based on bidirectional dynamic interaction (EmoBi). Firstly, the emotion analysis module deeply mines the emotion connotations behind hyperbole and metaphor. Next, the emotion-based domain mapping module identifies the target and source domains to gain a deeper understanding of the implicit meanings of hyperbole and metaphor. Finally, the bidirectional dynamic interaction module enables the mutual promotion between hyperbole and metaphor. Meanwhile, a verification mechanism is designed to ensure detection accuracy and reliability. Experiments show that EmoBi outperforms all baseline methods on four datasets. Specifically, compared to the current SoTA, the F1 score increased by 28.1% for hyperbole detection on the TroFi dataset and 23.1% for metaphor detection on the HYPO-L dataset. These results, underpinned by in-depth analyses, underscore the effectiveness and potential of our approach for advancing hyperbole and metaphor detection. 2025.acl-long.23 @@ -385,7 +385,7 @@ VolhaHarytskayaIndependent VladislavPoritskiunaffiliated OksanaVolchekIndependent - PavelSmrzBrno University of Technology + PavelSmrzBrno University of Technology 511-527 In the epoch of multilingual large language models (LLMs), it is still challenging to evaluate the models’ understanding of lower-resourced languages, which motivates further development of expert-crafted natural language understanding benchmarks. We introduce BelarusianGLUE — a natural language understanding benchmark for Belarusian, an East Slavic language, with ≈15K instances in five tasks: sentiment analysis, linguistic acceptability, word in context, Winograd schema challenge, textual entailment. A systematic evaluation of BERT models and LLMs against this novel benchmark reveals that both types of models approach human-level performance on easier tasks, such as sentiment analysis, but there is a significant gap in performance between machine and human on a harder task — Winograd schema challenge. We find the optimal choice of model type to be task-specific: e.g. BERT models underperform on textual entailment task but are competitive for linguistic acceptability. We release the datasets (https://hf.co/datasets/maaxap/BelarusianGLUE) and evaluation code (https://github.com/maaxap/BelarusianGLUE). 2025.acl-long.25 @@ -703,7 +703,7 @@ YizheYang YuyueZhao QiZhangFudan University - XuanjingHuangFudan University + XuanjingHuangFudan University Yu-GangJiangFudan University YongLiaoUniversity of Science and Technology of China and China Academic of Electronics and Information Technology 948-964 @@ -791,7 +791,7 @@ ZhaoZhang JinZhang HuiXuChinese Academy of Sciences - XueqiChengInstitute of Computing Technology, Chinese Academy + XueqiChengInstitute of Computing Technology, Chinese Academy 1053-1067 Stance detection is a pivotal task in Natural Language Processing (NLP), identifying textual attitudes toward various targets. Despite advances in using Large Language Models (LLMs), challenges persist due to hallucination-models generating plausible yet inaccurate content. Addressing these challenges, we introduce MPVStance, a framework that incorporates Multi-Perspective Verification (MPV) with Retrieval-Augmented Generation (RAG) across a structured five-step verification process. Our method enhances stance detection by rigorously validating each response from factual accuracy, logical consistency, contextual relevance, and other perspectives. Extensive testing on the SemEval-2016 and VAST datasets, including scenarios that challenge existing methods and comprehensive ablation studies, demonstrates that MPVStance significantly outperforms current models. It effectively mitigates hallucination issues and sets new benchmarks for reliability and accuracy in stance detection, particularly in zero-shot, few-shot, and challenging scenarios. 2025.acl-long.53 @@ -803,7 +803,7 @@ YaoqiGuo ZhenpengChenNanyang Technological University Jie M.ZhangKing’s College London, University of London - YangLiuNanyang Technological University + YangLiuNanyang Technological University YunMaPeking University 1068-1080 Code generation, the automatic creation of source code from natural language descriptions, has garnered significant attention due to its potential to streamline software development. Inspired by research that links task-personality alignment with improved development outcomes, we conduct an empirical study on personality-guided code generation using large language models (LLMs). Specifically, we investigate how emulating personality traits appropriate to the coding tasks affects LLM performance. We extensively evaluate this approach using seven widely adopted LLMs across four representative datasets. Our results show that personality guidance significantly enhances code generation accuracy, with improved pass rates in 23 out of 28 LLM-dataset combinations. Notably, in 11 cases, the improvement exceeds 5%, and in 5 instances, it surpasses 10%, with the highest gain reaching 12.9%. Additionally, personality guidance can be easily integrated with other prompting strategies to further boost performance. @@ -862,7 +862,7 @@ QuanweiCaiByteDance Inc. YeWu HuiqiLiu - ZhiyuChen + ZhiyuChen BingDuan ShengZhongnanjing university 1160-1174 @@ -910,7 +910,7 @@ Untie the Knots: An Efficient Data Augmentation Strategy for Long-Context Pre-Training in Language Models - JunfengTianXiaohongshu + JunfengTianXiaohongshu DaZhengXiaohongshu YangChen RuiWangDecilion @@ -961,7 +961,7 @@ JinyuLiMicrosoft ShengZhaoMicrosoft XixinWuThe Chinese University of Hong Kong - Helen M.MengThe Chinese University of Hong Kong + Helen M.MengThe Chinese University of Hong Kong FuruWeiMicrosoft Research 1287-1300 We present MELLE, a novel continuous-valued token based language modeling approach for text-to-speech synthesis (TTS). MELLE autoregressively generates continuous mel-spectrogram frames directly from text condition, bypassing the need for vector quantization, which is typically designed for audio compression and sacrifices fidelity compared to continuous representations. Specifically, (i) instead of cross-entropy loss, we apply regression loss with a proposed spectrogram flux loss function to model the probability distribution of the continuous-valued tokens; (ii) we have incorporated variational inference into MELLE to facilitate sampling mechanisms, thereby enhancing the output diversity and model robustness. Experiments demonstrate that, compared to the two-stage codec language model VALL-E and its variants, the single-stage MELLE mitigates robustness issues by avoiding the inherent flaws of sampling vector-quantized codes, achieves superior performance across multiple metrics, and, most importantly, offers a more streamlined paradigm. The demos of our work are provided at https://aka.ms/melle. @@ -1009,8 +1009,8 @@ AbigailWheeler NicholasKerry Jeremy D. W.Clifton - H. AndrewSchwartz - Ryan L.Boyd + H. AndrewSchwartz + Ryan L.Boyd 1362-1376 Measuring the prevalence and dimensions of self beliefs is essential for understanding human self-perception and various psychological outcomes. In this paper, we develop a novel task for classifying language that contains explicit or implicit mentions of the author’s self beliefs. We contribute a set of 2,000 human-annotated self beliefs, 100,000 LLM-labeled examples, and 10,000 surveyed self belief paragraphs. We then evaluate several encoder-based classifiers and training routines for this task. Our trained model, SelfAwareNet, achieved an AUC of 0.944, outperforming 0.839 from OpenAI’s state-of-the-art GPT-4o model. Using this model we derive data-driven categories of self beliefs and demonstrate their ability to predict valence, depression, anxiety, and stress. We release the resulting self belief classification model and annotated datasets for use in future research. 2025.acl-long.69 @@ -1268,7 +1268,7 @@ HaotianYeCenter for Information and Language Processing ChunlanMa MingyangWang - HinrichSchuetze + HinrichSchuetze 1743-1770 Recent multilingual pretrained language models (mPLMs) often avoid using language embeddings – learnable vectors assigned to individual languages. However, this places a significant burden on token representations to encode all language-specific information, which may hinder language neutrality. To address this limitation, we propose Language-Script Aware Multilingual Pretraining (LangSAMP), a method that incorporates both language and script embeddings to enhance representation learning. Specifically, we integrate these embeddings into the output of the Transformer blocks before passing the final representations to the language modeling head for prediction. We apply LangSAMP to the continual pretraining of XLM-R on a highly multilingual corpus covering more than 500 languages. The resulting model consistently outperforms the baseline in zero-shot crosslingual transfer across diverse downstream tasks. Extensive analysis reveals that language and script embeddings capture language- and script-specific nuances, which benefits more language-neutral representations, proven by improved pairwise cosine similarity. In our case study, we also show that language and script embeddings can be used to select better source languages for crosslingual transfer. We make our code and models publicly available at https://github.com/cisnlp/LangSAMP. 2025.acl-long.88 @@ -1295,7 +1295,7 @@ XinpengWang MarkusHerklotz FraukeKreuterUniversity of Maryland - BarbaraPlankLudwig-Maximilians-Universität München + BarbaraPlankLudwig-Maximilians-Universität München MatthiasAßenmacherLudwig-Maximilians-Universität München 1785-1809 In recent research, large language models (LLMs) have been increasingly used to investigate public opinions. This study investigates the algorithmic fidelity of LLMs, i.e., the ability to replicate the socio-cultural context and nuanced opinions of human participants. Using open-ended survey data from the German Longitudinal Election Studies (GLES), we prompt different LLMs to generate synthetic public opinions reflective of German subpopulations by incorporating demographic features into the persona prompts. Our results show that Llama performs better than other LLMs at representing subpopulations, particularly when there is lower opinion diversity within those groups. Our findings further reveal that the LLM performs better for supporters of left-leaning parties like The Greens and The Left compared to other parties, and matches the least with the right-party AfD. Additionally, the inclusion or exclusion of specific variables in the prompts can significantly impact the models’ predictions. These findings underscore the importance of aligning LLMs to more effectively model diverse public opinions while minimizing political biases and enhancing robustness in representativeness. @@ -1365,7 +1365,7 @@ From Information to Insight: Leveraging <fixed-case>LLM</fixed-case>s for Open Aspect-Based Educational Summarization YangZhongUniversity of Pittsburgh - DianeLitmanUniversity of Pittsburgh + DianeLitmanUniversity of Pittsburgh 1914-1947 This paper addresses the challenge of aspect-based summarization in education by introducing Reflective ASPect-based summarization (ReflectASP), a novel dataset that summarizes student reflections on STEM lectures. Despite the promising performance of large language models in general summarization, their application to nuanced aspect-based summaries remains under-explored. ReflectASP eases the exploration of open-aspect-based summarization (OABS), overcoming the limitations of current datasets and comes with ample human annotations. We benchmarked different types of zero-shot summarization methods and proposed two refinement methods to improve summaries, supported by both automatic and human manual evaluations. Additionally, we analyzed suggestions and revisions made during the refinement process, offering a fine-grained study of the editing strategies employed by these methods. We make our models, dataset, and all human evaluation results available at https://github.com/cs329yangzhong/ReflectASP. 2025.acl-long.95 @@ -1518,7 +1518,7 @@ MatthiasOrlikowskiUniversität Bielefeld JiaxinPeiStanford University PaulRöttgerBocconi University - PhilippCimiano + PhilippCimiano DavidJurgensUniversity of Michigan - Ann Arbor DirkHovyBocconi University 2092-2111 @@ -1670,7 +1670,7 @@ ManuelTonneauOxford Internet Institute, University of Oxford DiyiLiu NiyatiMalhotra - Scott A.HaleMeedan, University of Oxford and Alan Turing Institute + Scott A.HaleMeedan, University of Oxford and Alan Turing Institute SamuelFraibergerWorld Bank VictorOrozco-Olvera PaulRöttgerBocconi University @@ -1720,7 +1720,7 @@ Steering into New Embedding Spaces: Analyzing Cross-Lingual Alignment Induced by Model Interventions in Multilingual Language Models - AnirudhSundar + AnirudhSundar SineadWilliamsonApple KatherineMetcalfApple Barry-JohnTheobaldApple @@ -1774,7 +1774,7 @@ Did Translation Models Get More Robust Without Anyone <fixed-case>E</fixed-case>ven Noticing? BenPetersInstituto de Telecomunicações, Portugal and Instituto Superior Técnico - AndreMartinsInstituto Superior Técnico and Unbabel + AndreMartinsInstituto Superior Técnico and Unbabel 2445-2458 Neural machine translation (MT) models achieve strong results across a variety of settings, but it is widely believed that they are highly sensitive to “noisy” inputs, such as spelling errors, abbreviations, and other formatting issues. In this paper, we revisit this insight in light of recent multilingual MT models and large language models (LLMs) applied to machine translation. Somewhat surprisingly, we show through controlled experiments that these models are far more robust to many kinds of noise than previous models, even when they perform similarly on clean data. This is notable because, even though LLMs have more parameters and more complex training processes than past models, none of the open ones we consider use any techniques specifically designed to encourage robustness. Next, we show that similar trends hold for social media translation experiments – LLMs are more robust to social media text. We include an analysis of the circumstances in which source correction techniques can be used to mitigate the effects of noise. Altogether, we show that robustness to many types of noise has increased. 2025.acl-long.122 @@ -1926,7 +1926,7 @@ HaoSun Chris XingTianCity University of Hong Kong ChenqiKong - XinDongNVIDIA + XinDongNVIDIA HaoliangLiCity University of Hong Kong 2623-2641 Large Language Models (LLMs) have demonstrated impressive in-context learning (ICL) capabilities from few-shot demonstration exemplars. Recent learning-based demonstration selection methods have proven beneficial to ICL by choosing more useful exemplars. While these methods generally assume they learn better similarity measurements between exemplars and test cases from the proxy task, what kinds of similarities are captured by them and are vital to performing ICL still need to be explored. To dive into this question, we analyze the working mechanism of learning-based demonstration selection methods and empirically identify two essential factors of their similarity measurements: 1) Integrating task-agnostic similarities of different levels between the input of exemplars and test cases; 2) Incorporating task-specific similarity between the output of exemplars and test cases. We validate these two findings through extensive quantitative analysis across ten datasets and various LLMs. Based on these insights, we introduce two simplified exemplar selection methods, MLSM and TTF, catering to task-agnostic and task-specific demands to eliminate costly data collection. The effectiveness of both methods evince our findings again and pave the way for future studies. @@ -2097,7 +2097,7 @@ GregorGeigleBayerische Julius-Maximilians-Universität Würzburg FlorianSchneider CarolinHoltermannUniversität Hamburg - ChrisBiemannU Hamburg + ChrisBiemannU Hamburg RaduTimofteBayerische Julius-Maximilians-Universität Würzburg AnneLauscherUniversität Hamburg GoranGlavašJulius-Maximilians-Universität Würzburg @@ -2131,7 +2131,7 @@ <fixed-case>C</fixed-case>on <fixed-case>I</fixed-case>nstruction: Universal Jailbreaking of Multimodal Large Language Models via Non-Textual Modalities JiahuiGeng Thy ThyTran - PreslavNakovMohamed bin Zayed University of Artificial Intelligence + PreslavNakovMohamed bin Zayed University of Artificial Intelligence IrynaGurevychInstitute for Computer Science, Artificial Intelligence and Technology, Mohamed bin Zayed University of Artificial Intelligence and Technische Universität Darmstadt 2917-2933 Existing attacks against multimodal language models often communicate instruction through text, either as an explicit malicious instruction or a crafted generic prompt, and accompanied by a toxic image. In contrast, here we exploit the capabilities of MLLMs in following non-textual instruction, i.e., an adversarial image or audio, namely Con Instruction. It is a novel gray-box attack method that generates adversarial images or audio to convey specific harmful instructions to MLLMs. We also find that combining our adversarial examples with certain non-empty text inputs amplifies attack success, while appending these after malicious text has limited effects. To evaluate whether an attack is successful, we introduce a new attack response categorization (ARC) that considers the response quality and relevancy concerning the malicious instruction. The results show that Con Instruction effectively bypasses the safety mechanisms in various visual and audio-language models, including LLaVA-v1.5, InternVL, Qwen-VL, and Qwen-Audio, across two standard benchmarks: AdvBench and SafeBench. Specifically, our method achieves the highest attack success rates, reaching 81.3% and 86.6% on LLaVA-v1.5 (13B). We show that larger models are more susceptible toCon Instruction, contrasting observations in their underlying LLMs. On the defense side, we explore various methods against our attacks and find substantial gaps among existing techniques. The code will be made available upon publication. @@ -2169,7 +2169,7 @@ BoxuanLyuInstitute of Science Tokyo HidetakaKamigaitoNara Institute of Science and Technology KotaroFunakoshiInstitute of Science Tokyo - ManabuOkumuraInstitute of Science Tokyo and Tokyo Institute of Technology, Tokyo Institute of Technology + ManabuOkumuraInstitute of Science Tokyo and Tokyo Institute of Technology, Tokyo Institute of Technology 2976-2994 Maximum a posteriori decoding, a commonly used method for neural machine translation (NMT), aims to maximize the estimated posterior probability. However, high estimated probability does not always lead to high translation quality. Minimum Bayes Risk (MBR) decoding offers an alternative by seeking hypotheses with the highest expected utility.Inspired by Quality Estimation (QE) reranking which uses the QE model as a ranker, we propose source-based MBR (sMBR) decoding, a novel approach that utilizes quasi-sources (generated via paraphrasing or back-translation) as “support hypotheses” and a reference-free quality estimation metric as the utility function, marking the first work to solely use sources in MBR decoding. Experiments show that sMBR outperforms QE reranking and the standard MBR decoding. Our findings suggest that sMBR is a promising approach for NMT decoding. 2025.acl-long.149 @@ -2190,7 +2190,7 @@ SiyuYuan TaoGuiFudan University QiZhangFudan University - XuanjingHuangFudan University + XuanjingHuangFudan University JiecaoChenByteDance Inc. 2995-3021 Effective evaluation of multi-hop tool use is critical for analyzing the understanding, reasoning, and function-calling capabilities of large language models (LLMs). However, progress has been hindered by a lack of reliable evaluation datasets. To address this, we present ToolHop, a dataset comprising 995 user queries and 3,912 associated tools, specifically designed for rigorous evaluation of multi-hop tool use. ToolHop ensures diverse queries, meaningful interdependencies, locally executable tools, detailed feedback, and verifiable answers through a novel query-driven data construction approach that includes tool creation, document refinement, and code generation. We evaluate 14 LLMs across five model families (i.e., LLaMA3.1, Qwen2.5, Gemini1.5, Claude3.5, and GPT), uncovering significant challenges in handling multi-hop tool-use scenarios. The leading model, GPT-4o, achieves an accuracy of 49.04%, underscoring substantial room for improvement. Further analysis reveals variations in tool-use strategies for various families, offering actionable insights to guide the development of more effective approaches. Code and data can be found in https://huggingface.co/datasets/bytedance-research/ToolHop. @@ -2241,7 +2241,7 @@ QianLiuUniversity of Auckland LiangmingPanUniversity of Arizona William YangWangUC Santa Barbara - PreslavNakovMohamed bin Zayed University of Artificial Intelligence + PreslavNakovMohamed bin Zayed University of Artificial Intelligence Mong-LiLeeNational University of Singapore WynneHsuNational University of Singapore 3052-3075 @@ -2344,7 +2344,7 @@ NehaSrikanth TaylorPellegrin RachelRudinger - ClaireBonialGeorgetown University and Army Research Lab + ClaireBonialGeorgetown University and Army Research Lab PhilipResnikUniversity of Maryland, College Park 3200-3215 While it is commonly accepted that maintaining common ground plays a role in conversational success, little prior research exists connecting conversational grounding to success in task-oriented conversations. We study failures of grounding in the Ubuntu IRC dataset, where participants use text-only communication to resolve technical issues. We find that disruptions in conversational flow often stem from a misalignment in common ground, driven by a divergence in beliefs and assumptions held by participants. These disruptions, which we call conversational friction, significantly correlate with task success. While LLMs can identify overt cases of conversational friction, they struggle with subtler and more context-dependent instances that require pragmatic or domain-specific reasoning. @@ -2409,7 +2409,7 @@ Revisiting Common Assumptions about <fixed-case>A</fixed-case>rabic Dialects in <fixed-case>NLP</fixed-case> AmrKelegUniversity of Edinburgh, University of Edinburgh - SharonGoldwaterUniversity of Edinburgh + SharonGoldwaterUniversity of Edinburgh WalidMagdyUniversity of Edinburgh 3309-3327 Arabic has diverse dialects, where one dialect can be substantially different from the others. In the NLP literature, some assumptions about these dialects are widely adopted (e.g., “Arabic dialects can be grouped into distinguishable regional dialects”) and are manifested in different computational tasks such as Arabic Dialect Identification (ADI). However, these assumptions are not quantitatively verified. We identify four of these assumptions and examine them by extending and analyzing a multi-label dataset, where the validity of each sentence in 11 different country-level dialects is manually assessed by speakers of these dialects. Our analysis indicates that the four assumptions oversimplify reality, and some of them are not always accurate. This in turn might be hindering further progress in different Arabic NLP tasks. @@ -2537,7 +2537,7 @@ ZhenglinHua YuhengJiaSoutheast University MingTangInstitute of automation, Chinese academy of science, Chinese Academy of Sciences - Tat-SengChuaNational University of Singapore + Tat-SengChuaNational University of Singapore JinqiaoWangInstitute of Automation, Chinese Academy of Sciences 3488-3501 Large vision-language models (LVLMs) have made substantial progress in integrating large language models (LLMs) with visual inputs, enabling advanced multimodal reasoning. Despite their success, a persistent challenge is hallucination—where generated text fails to accurately reflect visual content—undermining both accuracy and reliability. Existing methods focus on alignment training or decoding refinements but primarily address symptoms at the generation stage without probing the underlying causes. In this work, we investigate the internal mechanisms driving hallucination in LVLMs, with an emphasis on the multi-head attention module. Specifically, we introduce Vision-aware Head Divergence (VHD), a metric that quantifies the sensitivity of attention head outputs to visual context. Based on this, our findings reveal the presence of vision-aware attention heads that are more attuned to visual information; however, the model’s overreliance on its prior language patterns is closely related to hallucinations. Building on these insights, we propose Vision-aware Head Reinforcement (VHR), a training-free approach to mitigate hallucination by enhancing the role of vision-aware attention heads. Extensive experiments demonstrate that our method achieves superior performance compared to state-of-the-art approaches in mitigating hallucinations, while maintaining high efficiency with negligible additional time overhead. The code is available at https://github.com/jinghan1he/VHR. @@ -2807,7 +2807,7 @@ ZihanZhang YuchiLiu QuanweiShen - FeiLiu + FeiLiu YuKuang JianHe ConglinLiu @@ -2850,7 +2850,7 @@ Positional Overload: Positional Debiasing and Context Window Extension for Large Language Models using Set Encoding LukasKinderKarlsruher Institut für Technologie LukasEdmanTechnische Universität München - AlexanderFraserTechnical University of Munich + AlexanderFraserTechnical University of Munich TobiasKäferKarlsruher Institut für Technologie 3896-3908 Large Language Models (LLMs) typically track the order of tokens using positional encoding, which causes the following problems: positional bias, where the model is influenced by an ordering within the prompt, and a fixed context window, as models struggle to generalize to positions beyond those encountered during training. To address these limitations, we developed a novel method called \textit{set encoding}. This method allows multiple pieces of text to be encoded in the same position, thereby eliminating positional bias entirely. Another promising use case for set encoding is to increase the size of the input an LLM can handle. Our experiments demonstrate that set encoding allows an LLM to solve tasks with far more tokens than without set encoding. To our knowledge, set encoding is the first technique to effectively extend an LLM’s context window without requiring any additional training. @@ -2998,7 +2998,7 @@ XuezhiCaoMeituan XunliangCaiMeituan WenYao - WeinanZhang + WeinanZhang XinbingWangShanghai Jiao Tong University YingWenShanghai Jiao Tong University 4081-4108 @@ -3011,7 +3011,7 @@ <fixed-case>T</fixed-case>ok<fixed-case>A</fixed-case>lign: Efficient Vocabulary Adaptation via Token Alignment ChongLiInstitute of automation, Chinese Academy of Sciences JiajunZhangInstitute of automation, Chinese academy of science, Chinese Academy of Sciences - ChengqingZongInstitute of automation, Chinese academy of science, Chinese Academy of Sciences + ChengqingZongInstitute of automation, Chinese academy of science, Chinese Academy of Sciences 4109-4126 Tokenization serves as a foundational step for Large Language Models (LLMs) to process text. In new domains or languages, the inefficiency of the tokenizer will slow down the training and generation of LLM. The mismatch in vocabulary also hinders deep knowledge transfer between LLMs like token-level distillation. To mitigate this gap, we propose an efficient method named **TokAlign** to replace the vocabulary of LLM from the token co-occurrences view, and further transfer the token-level knowledge between models. It first aligns the source vocabulary to the target one by learning a one-to-one mapping matrix for token IDs. Model parameters, including embeddings, are rearranged and progressively fine-tuned for the new vocabulary. Our method significantly improves multilingual text compression rates and vocabulary initialization for LLMs, decreasing the perplexity from {3.4e}^{2} of strong baseline methods to {1.2e}^{2} after initialization. Experimental results on models across multiple parameter scales demonstrate the effectiveness and generalization of TokAlign, which costs as few as 5k steps to restore the performance of the vanilla model. After unifying vocabularies between LLMs, token-level distillation can remarkably boost (+4.4% than sentence-level distillation) the base model, costing only 235M tokens. 2025.acl-long.207 @@ -3020,7 +3020,7 @@ <fixed-case>A</fixed-case>da<fixed-case>E</fixed-case>dit: Advancing Continuous Knowledge Editing For Large Language Models - QiLi + QiLi XiaowenChuHong Kong University of Science and Technology (Guangzhou) 4127-4149 Knowledge editing (KE) has emerged as a prominent alternative that enables efficient and precise information modification inside language models. However, a critical challenge arises in continuous language models editing — a significant performance decline both in knowledge update and retention when the number of edits increases. By dissecting the perturbation weight of language model in continuous KE, we uncover that disentangled and sparsified knowledge representation can significantly alleviate the performance decline. Building on these insights, we introduce AdaEdit, a novel knowledge editing method. Extensive empirical evaluations on multiple LLMs demonstrate that our proposed methods can enhance the performance of edited LLMs in large-size continuous editing regimes, outperforming existing ones without substantially compromising the general abilities of these models. @@ -3070,7 +3070,7 @@ ZhiyuanZeng QinyuanCheng XipengQiuFudan University - XuanjingHuangFudan University + XuanjingHuangFudan University 4203-4233 Process Reward Models (PRMs) are crucial for guiding Large Language Models (LLMs) in complex scenarios by providing dense reward signals. However, existing PRMs primarily rely on heuristic approaches, which struggle with cross-domain generalization. While LLM-as-judge has been proposed to provide generalized rewards, current research has focused mainly on feedback results, overlooking the meaningful guidance embedded within the text. Additionally, static and coarse-grained evaluation criteria struggle to adapt to complex process supervision. To tackle these challenges, we propose Dynamic and Generalizable Process Reward Modeling (DG-PRM), which features a reward tree to capture and store fine-grained, multi-dimensional reward criteria. DG-PRM dynamically selects reward signals for step-wise reward scoring. To handle multifaceted reward signals, we pioneeringly adopt Pareto dominance estimation to identify discriminative positive and negative pairs. Experimental results show that DG-PRM achieves stunning performance on prevailing benchmarks, significantly boosting model performance across tasks with dense rewards. Further analysis reveals that DG-PRM adapts well to out-of-distribution scenarios, demonstrating exceptional generalizability. 2025.acl-long.212 @@ -3205,7 +3205,7 @@ LizhenQuMonash University ZhuangLiRoyal Melbourne Institute of Technology SurajSharmaCalvin College - GholamrezaHaffariMonash University, Monash University and Monash University + GholamrezaHaffariMonash University, Monash University and Monash University 4397-4422 Opinion survey research is a crucial method used by social scientists for understanding societal beliefs and behaviors. Traditional methodologies often entail high costs and limited scalability, while current automated methods such as opinion synthesis exhibit severe biases and lack traceability. In this paper, we introduce SurveyPilot, a novel finite-state orchestrated agentic framework that automates the collection and analysis of human opinions from social media platforms. SurveyPilot addresses the limitations of pioneering approaches by (i) providing transparency and traceability in each state of opinion collection and (ii) incorporating several techniques for mitigating biases, notably with a novel genetic algorithm for improving result diversity. Our extensive experiments reveal that SurveyPilot achieves a close alignment with authentic survey results across multiple domains, observing average relative improvements of 68,98% and 51,37% when comparing to opinion synthesis and agent-based approaches. Implementation of SurveyPilot is available on https://github.com/thanhpv2102/SurveyPilot. 2025.acl-long.221 @@ -3414,7 +3414,7 @@ Discourse Relation-Enhanced Neural Coherence Modeling - WeiLiuHeidelberg University + WeiLiuHeidelberg University MichaelStrubeHeidelberg Institute for Theoretical Studies 4748-4762 Discourse coherence theories posit relations between text spans as a key feature of coherent texts. However, existing work on coherence modeling has paid little attention to discourse relations. In this paper, we provide empirical evidence to demonstrate that relation features are correlated with text coherence. Then, we investigate a novel fusion model that uses position-aware attention and a visible matrix to combine text- and relation-based features for coherence assessment. Experimental results on two benchmarks show that our approaches can significantly improve baselines, demonstrating the importance of relation features for coherence modeling. @@ -3444,7 +3444,7 @@ MinLiuInstitute of Computing Technology, Chinese Academy of Sciences ZhiyiYin, Chinese Academy of Sciences LeiJingyuLeiJingyuTsinghua University - QiLiTsinghua University + QiLiTsinghua University 4785-4817 Current studies have exposed the risk of Large Language Models (LLMs) generating harmful content by jailbreak attacks. However, they overlook that the direct generation of harmful content from scratch is more difficult than inducing LLM to calibrate benign content into harmful forms.In our study, we introduce a novel attack framework that exploits AdVersArial meTAphoR (AVATAR) to induce the LLM to calibrate malicious metaphors for jailbreaking.Specifically, to answer harmful queries, AVATAR adaptively identifies a set of benign but logically related metaphors as the initial seed.Then, driven by these metaphors, the target LLM is induced to reason and calibrate about the metaphorical content, thus jailbroken by either directly outputting harmful responses or calibrating residuals between metaphorical and professional harmful content.Experimental results demonstrate that AVATAR can effectively and transferably jailbreak LLMs and achieve a state-of-the-art attack success rate across multiple advanced LLMs. 2025.acl-long.238 @@ -3549,7 +3549,7 @@ <fixed-case>W</fixed-case>arrior<fixed-case>C</fixed-case>oder: Learning from Expert Battles to Augment Code Large Language Models HuawenFeng - PuZhao + PuZhao QingfengSun CanXu FangkaiYang @@ -3674,7 +3674,7 @@ YihongLiuLudwig-Maximilians-Universität München ErcongNie JannikStrötgenKarlsruhe University of Applied Sciences - HinrichSchuetze + HinrichSchuetze 5075-5094 Multilingual language models (MLMs) store factual knowledge across languages but often struggle to provide consistent responses to semantically equivalent prompts in different languages. While previous studies point out this cross-lingual inconsistency issue, the underlying causes remain unexplored. In this work, we use mechanistic interpretability methods to investigate cross-lingual inconsistencies in MLMs. We find that MLMs encode knowledge in a language-independent concept space through most layers, and only transition to language-specific spaces in the final layers. Failures during the language transition often result in incorrect predictions in the target language, even when the answers are correct in other languages. To mitigate this inconsistency issue, we propose a linear shortcut method that bypasses computations in the final layers, enhancing both prediction accuracy and cross-lingual consistency. Our findings shed light on the internal mechanisms of MLMs and provide a lightweight, effective strategy for producing more consistent factual outputs. 2025.acl-long.253 @@ -3718,8 +3718,8 @@ WenxuanZhangSingapore University of Technology and Design ShuaiyiLiChinese University of Hong Kong, The Chinese University of Hong Kong WenyaXieUniversity of Minnesota - Twin Cities - See-KiongNgNational University of Singapore - Tat-SengChuaNational University of Singapore + See-KiongNgNational University of Singapore + Tat-SengChuaNational University of Singapore YangDengSingapore Management University 5131-5157 Although large language models (LLMs) store vast amount of knowledge in their parameters, they still have limitations in the memorization and utilization of certain knowledge, leading to undesired behaviors such as generating untruthful and inaccurate responses. This highlights the critical need to understand the knowledge boundary of LLMs, a concept that remains inadequately defined in existing research. In this survey, we propose a comprehensive definition of the LLM knowledge boundary and introduce a formalized taxonomy categorizing knowledge into four distinct types. Using this foundation, we systematically review the field through three key lenses: the motivation for studying LLM knowledge boundaries, methods for identifying these boundaries, and strategies for mitigating the challenges they present. Finally, we discuss open challenges and potential research directions in this area. We aim for this survey to offer the community a comprehensive overview, facilitate access to key issues, and inspire further advancements in LLM knowledge research. @@ -3879,7 +3879,7 @@ ZicanDongRenmin University of China YangLuCheung Kong Graduate School of business XuMiaoDataCanvas - XinZhaoRenmin University of China + XinZhaoRenmin University of China Ji-RongWenRenmin University of China 5374-5400 Due to the immense resource demands and the involved complex techniques, it is still challenging for successfully pre-training a large language models (LLMs) with state-of-the-art performance. In this paper, we explore the key bottlenecks and designs during pre-training, and make the following contributions: (1) a comprehensive investigation into the factors contributing to training instability; (2) a robust optimization approach designed to mitigate training instability effectively; (3) an elaborate data pipeline that integrates data synthesis, data curriculum, and data selection. By integrating the above techniques, we create a rather low-cost training recipe and use it to pre-train YuLan-Mini, a fully-open base model with 2.4B parameters on 1.08T tokens. Remarkably, YuLan-Mini achieves top-tier performance among models of similar parameter scale, with comparable performance to industry-leading models that require significantly more data. To facilitate reproduction, we release the full details of training recipe and data composition. Project details can be accessed at the following link: https://anonymous.4open.science/r/YuLan-Mini/README.md. @@ -4135,7 +4135,7 @@ ZongzhangZhangNanjing University TaoGuiFudan University QiZhangFudan University - XuanjingHuangFudan University + XuanjingHuangFudan University 5710-5728 In Reinforcement Learning from Human Feedback (RLHF), the reward model (RM) evaluates the response quality based on the given context and assigns a reward. It plays a crucial role in aligning RLHF with human preferences. Although the current RM training paradigm concatenates the context and response while amplifying the reward difference between good and bad response pairs, we demonstrate that the RM faces two significant issues: i) it often allocates only a small proportion of attention to the context, and ii) it frequently ignores segments of the context that are relevant for evaluating the response quality. These issues undermine the RM’s effectiveness in modeling human preferences. To further address these challenges, we propose AttnRM, a novel optimization framework that enables the RM to concentrate on crucial segments of the context. Experimental results demonstrate that AttnRM significantly improves preference modeling by increasing attention to relevant information within the context. It also enhances the RM’s generalizability and achieves better performance in aligning with human preferences. 2025.acl-long.285 @@ -4195,7 +4195,7 @@ YutaoZhu JinhaoJiang YingqianMin - XinZhaoRenmin University of China + XinZhaoRenmin University of China ZhichengDouRenmin University of China JiaxinMaoRenmin University of China, Tsinghua University YankaiLinRenmin University of China @@ -4221,7 +4221,7 @@ QingGuoNational University of Singapore and Agency for Science, Technology and Research (A*STAR)) FelixJuefei-XuGenAI, Meta JianZhangNanyang Technological University - YangLiuNanyang Technological University + YangLiuNanyang Technological University GeguangPuEast China Normal University 5796-5816 Universal goal hijacking is a kind of prompt injection attack that forces LLMs to return a target malicious response for arbitrary normal user prompts. The previous methods achieve high attack performance while being too cumbersome and time-consuming. Also, they have concentrated solely on optimization algorithms, overlooking the crucial role of the prompt. To this end, we propose a method called POUGH that incorporates an efficient optimization algorithm and two semantics-guided prompt organization strategies. Specifically, our method starts with a sampling strategy to select representative prompts from a candidate pool, followed by a ranking strategy that prioritizes them. Given the sequentially ranked prompts, our method employs an iterative optimization algorithm to generate a fixed suffix that can concatenate to arbitrary user prompts for universal goal hijacking. Experiments conducted on four popular LLMs and ten types of target responses verified the effectiveness. @@ -4248,12 +4248,12 @@ What Makes a Good Natural Language Prompt? - Do XuanLongNational University of Singapore + Do XuanLongNational University of Singapore DuyDinhFPT Software Ngoc-HaiNguyenQualComm KenjiKawaguchiNational University of Singapore - Nancy F.Chen - ShafiqJotyNanyang Technological University and SalesForce.com + Nancy F.Chen + ShafiqJotyNanyang Technological University and SalesForce.com Min-YenKanNational University of Singapore 5835-5873 As large language models (LLMs) have progressed towards more human-like and human–AI communications prevalent, prompting has emerged as a decisive component. However, there is limited conceptual consensus on what exactly quantifies natural language prompts. We attempt to address this question by conducting a meta-analysis surveying 150+ prompting-related papers from leading NLP and AI conferences (2022–2024), and blogs. We propose a property- and human-centric framework for evaluating prompt quality, encompassing 21 properties categorized into six dimensions. We then examine how existing studies assess their impact on LLMs, revealing their imbalanced support across models and tasks, and substantial research gaps. Further, we analyze correlations among properties in high-quality natural language prompts, deriving prompting recommendations. Finally, we explore multi-property prompt enhancements in reasoning tasks, observing that single-property enhancements often have the greatest impact. Our findings establish a foundation for property-centric prompt evaluation and optimization, bridging the gaps between human–AI communication and opening new prompting research directions. @@ -4357,7 +4357,7 @@ ShengWang ErxinYuHong Kong Polytechnic University XumingHuThe Hong Kong University of Science and Technology (Guangzhou) and Hong Kong University of Science and Technology - Kam-FaiWongThe Chinese University of Hong Kong + Kam-FaiWongThe Chinese University of Hong Kong 6002-6024 Despite demonstrating impressive capabilities, Large Language Models (LLMs) still often struggle to accurately express the factual knowledge they possess, especially in cases where the LLMs’ knowledge boundaries are ambiguous. To improve LLMs’ factual expressions, we propose the UAlign framework, which leverages Uncertainty estimations to represent knowledge boundaries, and then explicitly incorporates these representations as input features into prompts for LLMs to Align with factual knowledge. First, we prepare the dataset on knowledge question-answering (QA) samples by calculating two uncertainty estimations, including confidence score and semantic entropy, to represent the knowledge boundaries for LLMs. Subsequently, using the prepared dataset, we train a reward model that incorporates uncertainty estimations and then employ the Proximal Policy Optimization (PPO) algorithm for factuality alignment on LLMs. Experimental results indicate that, by integrating uncertainty representations in LLM alignment, the proposed UAlign can significantly enhance the LLMs’ capacities to confidently answer known questions and refuse unknown questions on both in-domain and out-of-domain tasks, showing reliability improvements and good generalizability over various prompt- and training-based baselines. 2025.acl-long.299 @@ -4554,7 +4554,7 @@ BingchengLiu JieWu RenxiangLi - Philip S.YuUniversity of Illinois Chicago + Philip S.YuUniversity of Illinois Chicago 6240-6254 Contract review is a critical process to protect the rights and interests of the parties involved. However, this process is time-consuming, labor-intensive, and costly, especially when a contract faces multiple rounds of review. To accelerate the contract review and promote the completion of transactions, this paper introduces a novel benchmark of legal provision recommendation and conflict detection for contract auto-reviewing (ProvBench), which aims to recommend the legal provisions related to contract clauses and detect possible legal conflicts. Specifically, we construct the first Legal Provision Recommendation Dataset: ProvData, which covers 8 common contract types. In addition, we conduct extensive experiments to evaluate ProvBench on various state-of-the-art models. Experimental results validate the feasibility of ProvBench and demonstrate the effectiveness of ProvData. Finally, we identify potential challenges in the ProvBench and advocate for further investigation. 2025.acl-long.312 @@ -4630,7 +4630,7 @@ XunWang Si-QingChen Michael J.Wooldridge - Janet B.PierrehumbertUniversity of Oxford + Janet B.PierrehumbertUniversity of Oxford FuruWeiMicrosoft Research 6317-6342 Language is not monolithic. While benchmarks, including those designed for multiple languages, are often used as proxies to evaluate the performance of Large Language Models (LLMs), they tend to overlook the nuances of within-language variation and thus fail to model the experience of speakers of non-standard dialects. Focusing on African American Vernacular English (AAVE), we present the first study aimed at objectively assessing the fairness and robustness of LLMs in handling dialects across canonical reasoning tasks, including algorithm, math, logic, and integrated reasoning. We introduce **ReDial** (**Re**asoning with **Dial**ect Queries), a benchmark containing 1.2K+ parallel query pairs in Standardized English and AAVE. We hire AAVE speakers, including experts with computer science backgrounds, to rewrite seven popular benchmarks,such as HumanEval and GSM8K. With ReDial, we evaluate widely used LLMs, including GPT, Claude, Llama, Mistral, and the Phi model families. Our findings reveal that almost all of these widely used models show significant brittleness and unfairness to queries in AAVE. Our work establishes a systematic and objective framework for analyzing LLM bias in dialectal queries. Moreover, it highlights how mainstream LLMs provide unfair service to dialect speakers in reasoning tasks, laying a critical foundation for future research. @@ -4644,7 +4644,7 @@ YuepeiLi QiaoQiaoIowa State University KangZhouAmazon - QiLiIowa State University + QiLiIowa State University 6343-6354 Open Relation Extraction (OpenRE) seeks to identify and extract novel relational facts between named entities from unlabeled data without pre-defined relation schemas. Traditional OpenRE methods typically assume that the unlabeled data consists solely of novel relations or is pre-divided into known and novel instances. However, in real-world scenarios, novel relations are arbitrarily distributed. In this paper, we propose a generalized OpenRE setting that considers unlabeled data as a mixture of both known and novel instances. To address this, we propose MixORE, a two-phase framework that integrates relation classification and clustering to jointly learn known and novel relations. Experiments on three benchmark datasets demonstrate that MixORE consistently outperforms competitive baselines in known relation classification and novel relation clustering. Our findings contribute to the advancement of generalized OpenRE research and real-world applications. 2025.acl-long.318 @@ -4696,7 +4696,7 @@ LingruiMeiSkywork AI WenjieFengUniversity of Science and Technology of China LizheChen - XueqiChengInstitute of Computing Technology, Chinese Academy + XueqiChengInstitute of Computing Technology, Chinese Academy 6404-6420 Large language models (LLMs) have achieved significant success in reasoning tasks, including mathematical reasoning and logical deduction. Among these reasoning tasks, graph problems stand out due to their complexity and unique structural characteristics, attracting considerable attention from researchers. Previous studies have explored LLMs’ graph reasoning abilities through various techniques, such as different encoding methods for graph structures and the use of carefully designed prompts. However, a critical factor has been mostly overlooked: the prompt sequential order in which graph descriptions are presented to the models. In this study, we present the first comprehensive analysis of how the order of graph descriptions impacts LLM performance. Specifically, we comprehensively evaluate four graph description orders across six graph problems using six mainstream LLMs. The results reveal that: (1) ordered graph descriptions significantly improve LLMs’ comprehension of graph structures; (2) the robustness of LLMs to graph description order varies across different tasks; and (3) the impact of graph order on performance is closely related to the inherent characteristics of tasks. This study provides a critical advancement in the application of LLMs for solving graph-related problems, paving the way for future research to optimize model performance through strategic graph description ordering. 2025.acl-long.321 @@ -4727,7 +4727,7 @@ HongzhiZhangKuaishou- 快手科技 V.W.The Hong Kong University of Science and Technology FuzhengZhang - DeyiXiongTianjin University + DeyiXiongTianjin University 6435-6462 Video captioning can be used to assess the video understanding capabilities of Multimodal Large Language Models (MLLMs).However, existing benchmarks and evaluation protocols suffer from crucial issues, such as inadequate or homogeneous creation of key points, exorbitant cost of data creation, and limited evaluation scopes. To address these issues, we propose an automatic framework, named AutoCaption, which leverages Monte Carlo Tree Search (MCTS) to construct numerous and diverse descriptive sentences (i.e., key points) that thoroughly represent video content in an iterative way. This iterative captioning strategy enables the continuous enhancement of video details such as actions, objects’ attributes, environment details, etc. We apply AutoCaption to curate MCTS-VCB, a fine-grained video caption benchmark covering video details, thereby enabling a comprehensive evaluation of MLLMs on the video captioning task. We evaluate more than 20 open- and closed-source MLLMs of varying sizes on MCTS-VCB. Results show that MCTS-VCB can effectively and comprehensively evaluate the video captioning capability, with Gemini-1.5-Pro achieving the highest F1 score of 71.2. Interestingly, we fine-tune InternVL2.5-8B with the AutoCaption-generated data, which helps the model achieve an overall improvement of 25.0% on MCTS-VCB and 16.3% on DREAM-1K, further demonstrating the effectiveness of AutoCaption. The code and data are available at https://github.com/tjunlp-lab/MCTS-VCB. 2025.acl-long.323 @@ -4825,7 +4825,7 @@ Soda MaremLo OscarAraqueUniversidad Politécnica de Madrid RajeshSharmainstitute of computer science, University of Tartu - Marco AntonioStranisci + Marco AntonioStranisci 6625-6639 Canceling is a morally-driven phenomenon that hinders the development of safe social media platforms and contributes to ideological polarization. To address this issue we present the Canceling Attitudes Detection (CADE) dataset, an annotated corpus of canceling incidents aimed at exploring the factors of disagreements in evaluating people’s canceling attitudes on social media. Specifically, we study the impact of annotators’ morality in their perception of canceling, showing that morality is an independent axis for the explanation of disagreement on this phenomenon. Annotator’s judgments heavily depend on the type of controversial events and involved celebrities. This shows the need to develop more event-centric datasets to better understand how harms are perpetrated in social media and to develop more aware technologies for their detection. 2025.acl-long.330 @@ -4871,7 +4871,7 @@ ChaoqunLiu LidongBingShanda Group and Alibaba Group DeliZhaoAlibaba Group - Anh TuanLuuNanyang Technological University + Anh TuanLuuNanyang Technological University YuRongAlibaba Group 6685-6715 Many challenging reasoning tasks require not just rapid, intuitive responses, but a more deliberate, multi-step approach. Recent progress in large language models (LLMs) highlights an important shift from the “System 1” way of quick reactions to the “System 2” style of reflection-and-correction problem solving. However, current benchmarks heavily rely on the final-answer accuracy, leaving much of a model’s intermediate reasoning steps unexamined. This fails to assess the model’s ability to reflect and rectify mistakes within the reasoning process. To bridge this gap, we introduce FINEREASON, a logic-puzzle benchmark for systematic evaluation of LLMs’ reasoning capabilities. Each puzzle can be decomposed into atomic steps, making it ideal for rigorous validation of intermediate correctness. Building on this, we introduce two tasks: state checking and state transition, for a comprehensive evaluation of how models assess the current situation and plan the next move. To support broader research, we also provide a puzzle training set aimed at enhancing general reasoning. We show that models trained on our state checking and transition data demonstrate gains in mathematical reasoning by up to 5.1%. @@ -4949,7 +4949,7 @@ XiaoleiWangRenmin University of China ZhihaoLv YingqianMin - XinZhaoRenmin University of China + XinZhaoRenmin University of China BinbinHuAnt Group ZiqiLiuAnt Group ZhiqiangZhangAnt Group @@ -5037,7 +5037,7 @@ MinghaoYuanPeking University QianqianJu YujiaPengPeking University - Kenny Q.ZhuUniversity of Texas at Arlington + Kenny Q.ZhuUniversity of Texas at Arlington MengyueWuShanghai Jiaotong University 6950-6965 Social media platforms possess considerable potential in the realm of exploring mental health. Previous research has indicated that major life events can greatly impact individuals’ mental health. However, due to the complexity and ambiguity nature of life events, shedding its light on social media data is quite challenging. In this paper, we are dedicated to uncovering life events mentioned in posts on social media. We hereby provide a carefully-annotated social media event dataset, PsyEvent, which encompasses 12 major life event categories that are likely to occur in everyday life. This dataset is human-annotated under iterative procedure and boasts a high level of quality. Furthermore, by applying the life events extracted from posts to downstream tasks such as early risk detection of depression and suicide risk prediction, we have observed a considerable improvement in performance. This suggests that extracting life events from social media can be beneficial for the analysis of individuals’ mental health. @@ -5049,7 +5049,7 @@ <fixed-case>C</fixed-case>ontrol<fixed-case>S</fixed-case>peech: Towards Simultaneous and Independent Zero-shot Speaker Cloning and Zero-shot Language Style Control ShengpengJi QianChen - WenWang + WenWang JialongZuo MinghuiFang ZiyueJiang @@ -5148,7 +5148,7 @@ QiangLiuInstitute of Automation, Chinese Academy of Sciences ShuWuInstitute of automation, Chinese academy of science, Chinese Academy of Sciences LiangWangInstitute of Automation, CAS,China - Tat-SengChuaNational University of Singapore + Tat-SengChuaNational University of Singapore 7128-7141 Personalized text generation aims to infer users’ writing style preferences from their historical texts and generate outputs that faithfully reflect these stylistic characteristics. Existing solutions primarily adopt two paradigms: retrieval-augmented generation (RAG) and parameter-efficient fine-tuning (PEFT). While these approaches have advanced the field, they suffer from two critical limitations: (1) the entanglement of content semantics and stylistic patterns in historical texts impedes accurate modeling of user-specific writing preferences; and (2) scalability challenges arising from both RAG’s inference latency by retrieval operations and PEFT’s parameter storage requirements for per user model. To overcome these limitations, we propose StyleVector, a training-free framework that disentangles and represents personalized writing style as a vector in LLM’s activation space, enabling style-steered generation during inference without requiring costly retrieval or parameter storage. Comprehensive experiments demonstrate that our framework achieves a significant 8% relative improvement in personalized generation while reducing storage requirements by 1700 \times over PEFT method. 2025.acl-long.353 @@ -5179,7 +5179,7 @@ SachinKumarOhio State University, Columbus ValentinaPyatkinAllen Institute for Artificial Intelligence and Department of Computer Science FaezeBrahmanAllen Institute for Artificial Intelligence - Noah A.SmithUniversity of Washington and Allen Institute for Artificial Intelligence + Noah A.SmithUniversity of Washington and Allen Institute for Artificial Intelligence HannanehHajishirzi PradeepDasigiAllen Institute for Artificial Intelligence 7162-7200 @@ -5195,7 +5195,7 @@ TianLan TongZhangBeijing Institute of Technology Yu-ShiZhu - HeyanHuangBeijing Institute of Technology + HeyanHuangBeijing Institute of Technology 7201-7218 Automatic evaluation for Open Domain Event Detection (ODED) is a highly challenging task, because ODED is characterized by a vast diversity of un-constrained output labels from various domains. Nearly all existing evaluation methods for ODED usually first construct evaluation benchmarks with limited labels and domain coverage, and then evaluate ODED methods using metrics based on token-level label matching rules. However, this kind of evaluation framework faces two issues: (1) The limited evaluation benchmarks lack representatives of the real world, making it difficult to accurately reflect the performance of various ODED methods in real-world scenarios; (2) Evaluation metrics based on token-level matching rules fail to capture semantic similarity between predictions and golden labels. To address these two problems above, we propose a scalable and reliable Semantic-level Evaluation framework for Open domain Event detection (SEOE) by constructing a more representative evaluation benchmark and introducing a semantic evaluation metric. Specifically, our proposed framework first constructs a scalable evaluation benchmark that currently includes 564 event types covering 7 major domains, with a cost-effective supplementary annotation strategy to ensure the benchmark’s representativeness. The strategy also allows for the supplement of new event types and domains in the future. Then, the proposed SEOE leverages large language models (LLMs) as automatic evaluation agents to compute a semantic F1-score, incorporating fine-grained definitions of semantically similar labels to enhance the reliability of the evaluation. Extensive experiments validate the representatives of the benchmark and the reliability of the semantic evaluation metric. Existing ODED methods are thoroughly evaluated, and the error patterns of predictions are analyzed, revealing several insightful findings. 2025.acl-long.356 @@ -5276,7 +5276,7 @@ Rolling the <fixed-case>DICE</fixed-case> on Idiomaticity: How <fixed-case>LLM</fixed-case>s Fail to Grasp Context MaggieMi AlineVillavicencioUniversity of Exeter and University of Sheffield - Nafise SadatMoosaviUniversity of Sheffield + Nafise SadatMoosaviUniversity of Sheffield 7314-7332 Human processing of idioms heavily depends on interpreting the surrounding context in which they appear. While large language models (LLMs) have achieved impressive performance on idiomaticity detection benchmarks, this success may be driven by reasoning shortcuts present in existing datasets. To address this, we introduce a novel, controlled contrastive dataset (DICE) specifically designed to assess whether LLMs can effectively leverage context to disambiguate idiomatic meanings. Furthermore, we investigate the influence of collocational frequency and sentence probability—proxies for human processing known to affect idiom resolution—on model performance. Our results show that LLMs frequently fail to resolve idiomaticity when it depends on contextual understanding, performing better on sentences deemed more likely by the model. Additionally, idiom frequency influences performance but does not guarantee accurate interpretation. Our findings emphasize the limitations of current models in grasping contextual meaning and highlight the need for more context-sensitive evaluation. 2025.acl-long.362 @@ -5302,7 +5302,7 @@ The Cross-linguistic Role of <fixed-case>A</fixed-case>nimacy in Grammar Structures NinaGregorioUniversity of Edinburgh, University of Edinburgh MatteoGay - SharonGoldwaterUniversity of Edinburgh + SharonGoldwaterUniversity of Edinburgh EdoardoPontiUniversity of Edinburgh 7349-7363 Animacy is a semantic feature of nominals and follows a hierarchy: personal pronouns > human > animate > inanimate. In several languages, animacy imposes hard constraints on grammar. While it has been argued that these constraints may emerge from universal soft tendencies, it has been difficult to provide empirical evidence for this conjecture due to the lack of data annotated with animacy classes. In this work, we first propose a method to reliably classify animacy classes of nominals in 11 languages from 5 families, leveraging multilingual large language models (LLMs) and word sense disambiguation datasets. Then, through this newly acquired data, we verify that animacy displays consistent cross-linguistic tendencies in terms of preferred morphosyntactic constructions, although not always in line with received wisdom: animacy in nouns correlates with the alignment role of agent, early positions in a clause, and syntactic pivot (e.g., for relativisation), but not necessarily with grammatical subjecthood. Furthermore, the behaviour of personal pronouns in the hierarchy is idiosyncratic as they are rarely plural and relativised, contrary to high-animacy nouns. @@ -5444,7 +5444,7 @@ Alleviating Distribution Shift in Synthetic Data for Machine Translation Quality Estimation XiangGeng ZhejianLai - JiajunChenNanjing University + JiajunChenNanjing University HaoYangHuawei Technologies Ltd. ShujianHuangNanjing University 7546-7560 @@ -5494,7 +5494,7 @@ YuzhuangXu XiaolongWangTsinghua University PengLiTsinghua University - YangLiu + YangLiu 7605-7633 Active perception, a crucial human capability, involves setting a goal based on the current understanding of the environment and performing actions to achieve that goal. Despite significant efforts in evaluating Multimodal Large Language Models (MLLMs), active perception has been largely overlooked. To address this gap, we propose a novel benchmark named ActiView to evaluate active perception in MLLMs. We focus on a specialized form of Visual Question Answering (VQA) that eases and quantifies the evaluation yet challenging for existing MLLMs. Meanwhile, intermediate reasoning behaviors of models are also discussed. Given an image, we restrict the perceptual field of a model, requiring it to actively zoom or shift its perceptual field based on reasoning to answer the question successfully. We conduct extensive evaluation over 30 models, including proprietary and open-source models, and observe that restricted perceptual fields play a significant role in enabling active perception. Results reveal a significant gap in the active perception capability of MLLMs, indicating that this area deserves more attention. We hope that ActiView could help develop methods for MLLMs to understand multimodal inputs in more natural and holistic ways. 2025.acl-long.376 @@ -5610,7 +5610,7 @@ HyokunYunAmazon MingZengCarnegie Mellon University PeiChenTexas A&M University - College Station - ZhihanZhang + ZhihanZhang YifanGaoAmazon RuijieWang PriyankaNigam @@ -5651,7 +5651,7 @@ <fixed-case>S</fixed-case>pa<fixed-case>RE</fixed-case>: Enhancing Spatial Reasoning in Vision-Language Models with Synthetic Data MichaelOgezi - FredaShiUniversity of Waterloo and Vector Institute + FredaShiUniversity of Waterloo and Vector Institute 7855-7875 Vision-language models (VLMs) work well in tasks ranging from image captioning to visual question answering (VQA), yet they struggle with spatial reasoning, a key skill for understanding our physical world that humans excel at. We find that spatial relations are generally rare in widely used VL datasets, with only a few being well represented, while most form a long tail of underrepresented relations. This gap leaves VLMs ill-equipped to handle diverse spatial relationships. To bridge it, we construct a synthetic VQA dataset focused on spatial reasoning generated from hyper-detailed image descriptions in Localized Narratives, DOCCI, and PixMo-Cap. Our dataset consists of 455k samples containing 3.4 million QA pairs. Trained on this dataset, our Spatial-Reasoning Enhanced (SpaRE) VLMs show strong improvements on spatial reasoning benchmarks, achieving up to a 49% performance gain on the What’s Up benchmark, while maintaining strong results on general tasks. Our work narrows the gap between human and VLM spatial reasoning and makes VLMs more capable in real-world tasks such as robotics and navigation. We plan to share our code and dataset in due course. 2025.acl-long.387 @@ -5815,7 +5815,7 @@ AryaTalebzadehMeta SinongWangFacebook HanFangMeta AI - CarolynRoseSchool of Computer Science, Carnegie Mellon University + CarolynRoseSchool of Computer Science, Carnegie Mellon University DanielFriedMeta AI and Carnegie Mellon University HejiaZhangFacebook 8140-8155 @@ -5876,7 +5876,7 @@ XuhaoHuFudan University and Shanghai AI Laboratory DongruiLiuShanghai Artificial Intelligence Laboratory HaoLiBeijing University of Aeronautics and Astronautics - XuanjingHuangFudan University + XuanjingHuangFudan University JingShaoShanghai AI Laboratory 8285-8316 Safety concerns of Multimodal large language models (MLLMs) have gradually become an important problem in various applications. Surprisingly, previous works indicate a counterintuitive phenomenon that using textual unlearning to align MLLMs achieves comparable safety performances with MLLMs aligned with image-text pairs. To explain such a phenomenon, we discover a \textit{\textbf{V}isual \textbf{S}afety \textbf{I}nformation \textbf{L}eakage} (\textbf{VSIL}) problem in existing multimodal safety benchmarks, \textit{i.e.}, the potentially risky content in the image has been revealed in the textual query. Thus, MLLMs can easily refuse these sensitive image-text pairs according to textual queries only, leading to unreliable cross-modality safety evaluation of MLLMs. We also conduct a further comparison experiment between textual alignment and multimodal alignment to highlight this drawback. To this end, we construct \textit{\textbf{V}isual \textbf{L}eakless \textbf{S}afety \textbf{B}ench} (\textbf{VLSBench}) with 2.2k image-text pairs through an automated data pipeline. Experimental results indicate that VLSBench poses a significant challenge to both open-source and close-source MLLMs, \textit{i.e.}, LLaVA, Qwen2-VL and GPT-4o. Besides, we empirically compare textual and multimodal alignment methods on VLSBench and find that textual alignment is effective enough for multimodal safety scenarios with VSIL, while multimodal alignment is preferable for safety scenarios without VSIL. @@ -5964,7 +5964,7 @@ ZitaiQiu QingLiMohamed bin Zayed University of Artificial Intelligence HuWangMohamed bin Zayed University of Artificial Intelligence - PreslavNakovMohamed bin Zayed University of Artificial Intelligence + PreslavNakovMohamed bin Zayed University of Artificial Intelligence 8402-8415 Social event detection involves identifying and categorizing important events from social media, which relies on labeled data, but annotation is costly and labor-intensive. To address this problem, we propose Augmentation framework for Social Event Detection (SED-Aug), a plug-and-play dual augmentation framework, which combines explicit text-based and implicit feature-space augmentation to enhance data diversity and model robustness. The explicit augmentation utilizes LLMs to enhance textual information through five diverse generation strategies. For implicit augmentation, we design five novel perturbation techniques that operate in the feature space on structural fused embeddings. These perturbations are crafted to keep the semantic and relational properties of the embeddings and make them more diverse. Specifically, SED-Aug outperforms the best baseline model by approximately 17.67% on the Twitter2012 dataset and by about 15.57% on the Twitter2018 dataset in terms of the average F1 score. 2025.acl-long.412 @@ -6009,7 +6009,7 @@ Unanswerability Evaluation for Retrieval Augmented Generation XiangyuPengSalesforce AI Research - Prafulla KumarChoubeySalesForce.com + Prafulla KumarChoubeySalesForce.com CaimingXiongSalesforce Research Chien-ShengWuSalesforce AI 8452-8472 @@ -6103,7 +6103,7 @@ ZhaochenHong XiaochengYangUniversity of Illinois at Urbana-Champaign ShuyiGuo - ZheWang + ZheWang ZhenhailongWang ChengQianUniversity of Illinois at Urbana-Champaign RobertTang @@ -6125,7 +6125,7 @@ IsuriNanomi Arachchige AlistairPlumUniversity of Luxembourg PaulRaysonLancaster University - RuslanMitkovLancaster University + RuslanMitkovLancaster University 8623-8636 Recently, language models (LMs) have produced excellent results in many natural language processing (NLP) tasks. However, their effectiveness is highly dependent on available pre-training resources, which is particularly challenging for low-resource languages such as Sinhala. Furthermore, the scarcity of benchmarks to evaluate LMs is also a major concern for low-resource languages. In this paper, we address these two challenges for Sinhala by (i) collecting the largest monolingual corpus for Sinhala, (ii) training multiple LMs on this corpus and (iii) compiling the first Sinhala NLP benchmark (Sinhala-GLUE) and evaluating LMs on it. We show the Sinhala LMs trained in this paper outperform the popular multilingual LMs, such as XLM-R and existing Sinhala LMs in downstream NLP tasks. All the trained LMs are publicly available. We also make Sinhala-GLUE publicly available as a public leaderboard, and we hope that it will enable further advancements in developing and evaluating LMs for Sinhala. 2025.acl-long.422 @@ -6138,7 +6138,7 @@ VeronikaMakarovaUniversity of Saskatchewan ZhiLi JordanKodnerState University of New York, Stony Brook - OwenRambowStony Brook University + OwenRambowStony Brook University 8637-8663 The paper explores the performance of LLMs in the context of multi-dimensional analytic writing assessments, i.e. their ability to provide both scores and comments based on multiple assessment criteria. Using a corpus of literature reviews written by L2 graduate students and assessed by human experts against 9 analytic criteria, we prompt several popular LLMs to perform the same task under various conditions. To evaluate the quality of feedback comments, we apply a novel feedback comment quality evaluation framework. This framework is interpretable, cost-efficient, scalable, and reproducible, compared to existing methods that rely on manual judgments. We find that LLMs can generate reasonably good and generally reliable multi-dimensional analytic assessments. We release our corpus and code for reproducibility. 2025.acl-long.423 @@ -6166,12 +6166,12 @@ YutingLiUniversität Köln WeiZhou ZiweiGongColumbia University - Yang JanetLiuLudwig-Maximilians-Universität München + Yang JanetLiuLudwig-Maximilians-Universität München KatjaJasinskajaUniversität Köln AnnemarieFriedrichUniversity of Augsburg - JuliaHirschbergColumbia University + JuliaHirschbergColumbia University FraukeKreuterUniversity of Maryland - BarbaraPlankLudwig-Maximilians-Universität München + BarbaraPlankLudwig-Maximilians-Universität München 8679-8696 Understanding pragmatics—the use of language in context—is crucial for developing NLP systems capable of interpreting nuanced language use. Despite recent advances in language technologies, including large language models, evaluating their ability to handle pragmatic phenomena such as implicatures and references remains challenging. To advance pragmatic abilities in models, it is essential to understand current evaluation trends and identify existing limitations. In this survey, we provide a comprehensive review of resources designed for evaluating pragmatic capabilities in NLP, categorizing datasets by the pragmatic phenomena they address. We analyze task designs, data collection methods, evaluation approaches, and their relevance to real-world applications. By examining these resources in the context of modern language models, we highlight emerging trends, challenges, and gaps in existing benchmarks. Our survey aims to clarify the landscape of pragmatic evaluation and guide the development of more comprehensive and targeted benchmarks, ultimately contributing to more nuanced and context-aware NLP models. 2025.acl-long.425 @@ -6231,7 +6231,7 @@ YihongLiuLudwig-Maximilians-Universität München PeiqinLinInstitut für Informatik FrançoisYvonISIR, Sorbonne Université & CNRS - HinrichSchuetze + HinrichSchuetze 8767-8788 In-context machine translation (MT) with large language models (LLMs) is a promising approach for low-resource MT, as it can readily take advantage of linguistic resources such as grammar books and dictionaries.Such resources are usually selectively integrated into the prompt so that LLMs can directly perform translation without any specific training, via their in-context learning capability (ICL).However, the relative importance of each type of resource, e.g., dictionary, grammar book, and retrieved parallel examples, is not entirely clear.To address this gap, this study systematically investigates how each resource and its quality affect the translation performance, with the Manchu language as our case study. To remove any prior knowledge of Manchu encoded in the LLM parameters and single out the effect of ICL, we also experiment with an enciphered version of Manchu texts.Our results indicate that high-quality dictionaries and good parallel examples are very helpful, while grammars hardly help.In a follow-up study, we showcase a promising application of in-context MT: parallel data augmentation as a way to bootstrap a conventional MT model. When monolingual data abound, generating synthetic parallel data through in-context MT offers a pathway to mitigate data scarcity and build effective and efficient low-resource neural MT systems. 2025.acl-long.429 @@ -6294,7 +6294,7 @@ XueLiuMcGill University PontusStenetorpUniversity College London SivaReddyServiceNow Inc, Mila, McGill University and Mila, McGill University - David IfeoluwaAdelaniMcGill University + David IfeoluwaAdelaniMcGill University 8870-8880 Traditional supervised fine-tuning (SFT) strategies for sequence-to-sequence tasks often train models to directly generate the target output. Recent work has shown that guiding models with intermediate steps—such as keywords, outlines, or reasoning chains—can significantly improve performance, coherence, and interpretability. However, these methods often depend on predefined intermediate formats and annotated data, limiting their scalability and generalizability. In this work, we introduce a task-agnostic framework that enables models to generate intermediate “warmup” sequences. These warmup sequences, serving as an initial state for subsequent generation, are optimized to enhance the probability of generating the target sequence without relying on external supervision or human-designed structures. Drawing inspiration from reinforcement learning principles, our method iteratively refines these intermediate steps to maximize their contribution to the final output, similar to reward-driven optimization in reinforcement learning with human feedback. Experimental results across tasks such as translation, summarization, and multi-choice question answering for logical reasoning show that our approach outperforms traditional SFT methods, and offers a scalable and flexible solution for sequence-to-sequence tasks. 2025.acl-long.434 @@ -6305,7 +6305,7 @@ Building Better: Avoiding Pitfalls in Developing Language Resources when Data is Scarce NedjmaOusidhoumCardiff University MeriemBeloucifUppsala University - Saif M.Mohammad + Saif M.Mohammad 8881-8894 Language is a form of symbolic capital that affects people’s lives in many ways (Bourdieu1977,1991). As a powerful means of communication, it reflects identities, cultures, traditions, and societies more broadly. Therefore, data in a given language should be regarded as more than just a collection of tokens. Rigorous data collection and labeling practices are essential for developing more human-centered and socially aware technologies. Although there has been growing interest in under-resourced languages within the NLP community, work in this area faces unique challenges, such as data scarcity and limited access to qualified annotators.In this paper, we collect feedback from individuals directly involved in and impacted by NLP artefacts for medium- and low-resource languages. We conduct both quantitative and qualitative analyses of their responses and highlight key issues related to: (1) data quality, including linguistic and cultural appropriateness; and (2) the ethics of common annotation practices, such as the misuse of participatory research. Based on these findings, we make several recommendations for creating high-quality language artefacts that reflect the cultural milieu of their speakers, while also respecting the dignity and labor of data workers. 2025.acl-long.435 @@ -6324,7 +6324,7 @@ NirmalSurangeInternational Institute of Information Technology Hyderabad DanielaTeodorescu Ibrahim SaidAhmadNortheastern University - David IfeoluwaAdelaniMcGill University + David IfeoluwaAdelaniMcGill University Alham FikriAjiMohamed bin Zayed University of Artificial Intelligence Felermino D. M. A.Ali IlseyarAlimovaKazan Federal University @@ -6353,7 +6353,7 @@ Charles Henrique PortoFerreiraCentro Universitário FEI VitalyProtasovAIRI SamuelRutunda - ManishShrivastavaInternational Institute of Information Technology Hyderabad, India + ManishShrivastavaInternational Institute of Information Technology Hyderabad, India Aura CristinaUdrea Lilian Diana AwuorWanzareMaseno University SophieWu @@ -6361,7 +6361,7 @@ Hanif MuhammadZhafranInstitut Teknologi Bandung TianhuiZhangUniversity of Liverpool YiZhouCardiff University - Saif M.Mohammad + Saif M.Mohammad 8895-8916 People worldwide use language in subtle and complex ways to express emotions. Although emotion recognition–an umbrella term for several NLP tasks–impacts various applications within NLP and beyond, most work in this area has focused on high-resource languages. This has led to significant disparities in research efforts and proposed solutions, particularly for under-resourced languages, which often lack high-quality annotated datasets.In this paper, we present BRIGHTER–a collection of multi-labeled, emotion-annotated datasets in 28 different languages and across several domains. BRIGHTER primarily covers low-resource languages from Africa, Asia, Eastern Europe, and Latin America, with instances labeled by fluent speakers. We highlight the challenges related to the data collection and annotation processes, and then report experimental results for monolingual and crosslingual multi-label emotion identification, as well as emotion intensity recognition. We analyse the variability in performance across languages and text domains, both with and without the use of LLMs, and show that the BRIGHTER datasets represent a meaningful step towards addressing the gap in text-based emotion recognition. 2025.acl-long.436 @@ -6394,7 +6394,7 @@ Empathy Prediction from Diverse Perspectives - FrancineChenToyota Research Institute + FrancineChenToyota Research Institute ScottCarterToyota Research Institute TatianaLauToyota Research Institute Nayeli SusethBravoToyota Research Institute @@ -6444,7 +6444,7 @@ Comparing <fixed-case>LLM</fixed-case>-generated and human-authored news text using formal syntactic theory OlgaZamaraevaUniversidad de La Coruña - DanFlickinger + DanFlickinger FrancisBondPalacký University Olomouc CarlosGómez-RodríguezUniversidade da Coruña 9041-9060 @@ -6496,7 +6496,7 @@ Collapse of Dense Retrievers: Short, Early, and Literal Biases Outranking Factual Evidence MohsenFayyazUniversity of California, Los Angeles AliModarressiCenter for Information and Language Processing, LMU Munich - HinrichSchuetze + HinrichSchuetze NanyunPengUniversity of California, Los Angeles 9136-9152 Dense retrieval models are commonly used in Information Retrieval (IR) applications, such as Retrieval-Augmented Generation (RAG). Since they often serve as the first step in these systems, their robustness is critical to avoid downstream failures. In this work, we repurpose a relation extraction dataset (e.g., Re-DocRED) to design controlled experiments that quantify the impact of heuristic biases, such as a preference for shorter documents, on retrievers like Dragon+ and Contriever. We uncover major vulnerabilities, showing retrievers favor shorter documents, early positions, repeated entities, and literal matches, all while ignoring the answer’s presence! Notably, when multiple biases combine, models exhibit catastrophic performance degradation, selecting the answer-containing document in less than 10% of cases over a synthetic biased document without the answer. Furthermore, we show that these biases have direct consequences for downstream applications like RAG, where retrieval-preferred documents can mislead LLMs, resulting in a 34% performance drop than providing no documents at all.https://huggingface.co/datasets/mohsenfayyaz/ColDeR @@ -6545,7 +6545,7 @@ HridayeshLekhak Tuan MinhDangUniversity of Texas at Arlington, University of Texas at Arlington MengyueWuShanghai Jiaotong University - Kenny Q.ZhuUniversity of Texas at Arlington + Kenny Q.ZhuUniversity of Texas at Arlington 9207-9219 Dogs communicate intelligently but little is known about the phonetic properties of their vocalization communication. For the first time, this paper presents an iterative algorithm inspired by human phonetic discovery, which is based on minimal pairs that determine phonemes by distinguishing different words in human language, and is able to produce a complete alphabet of distinct canine phoneme-like units. In addition, the algorithm produces a number of canine repeated acoustic units, which may correspond to specific environments and activities of a dog, composed exclusively of the canine phoneme-like units in the alphabet. The framework outlined in this paper is expected to function not only on canines but other animal species. 2025.acl-long.451 @@ -6580,7 +6580,7 @@ ChuntingZhouMeta AI LiliYuphysical intelligence Jason EWestonNew York University and Facebook - LukeZettlemoyerUniversity of Washington, Facebook and Meta + LukeZettlemoyerUniversity of Washington, Facebook and Meta GargiGhoshMeta AI MikeLewisFacebook AI Research AriHoltzman, University of Chicago @@ -6624,7 +6624,7 @@ Culture Matters in Toxic Language Detection in <fixed-case>P</fixed-case>ersian ZahraBokaei WalidMagdyUniversity of Edinburgh - BonnieWebberEdinburgh University, University of Edinburgh + BonnieWebberEdinburgh University, University of Edinburgh 9290-9304 Toxic language detection is crucial for creating safer online environments and limiting the spread of harmful content. While toxic language detection has been under-explored in Persian, the current work compares different methods for this task, including fine-tuning, data enrichment, zero-shot and few-shot learning, and cross-lingual transfer learning. What is especially compelling is the impact of cultural context on transfer learning for this task: We show that the language of a country with cultural similarities to Persian yields better results in transfer learning. Conversely, the improvement is lower when the language comes from a culturally distinct country. 2025.acl-long.456 @@ -6720,7 +6720,7 @@ TaoFeng LizhenQuMonash University NiketTandonResearch, Microsoft - GholamrezaHaffariMonash University, Monash University and Monash University + GholamrezaHaffariMonash University, Monash University and Monash University 9400-9428 Causal discovery is fundamental to scientific research, yet traditional statistical algorithms face significant challenges, including expensive data collection, redundant computation for known relations, and unrealistic assumptions. While recent LLM-based methods excel at identifying commonly known causal relations, they fail to uncover novel relations. We introduce IRIS (Iterative Retrieval and Integrated System for Real-Time Causal Discovery), a novel framework that addresses these limitations. Starting with a set of initial variables, IRIS automatically collects relevant documents, extracts variables, and uncovers causal relations. Our hybrid causal discovery method combines statistical algorithms and LLM-based methods to discover known and novel causal relations. In addition to causal discovery on initial variables, the missing variable proposal component of IRIS identifies and incorporates missing variables to expand the causal graphs. Our approach enables real-time causal discovery from only a set of initial variables without requiring pre-existing datasets. 2025.acl-long.463 @@ -6730,7 +6730,7 @@ <fixed-case>INJONGO</fixed-case>: A Multicultural Intent Detection and Slot-filling Dataset for 16 <fixed-case>A</fixed-case>frican Languages HaoYu - Jesujoba OluwadaraAlabi + Jesujoba OluwadaraAlabi AndiswaBukula Jian YunZhuang En-Shiun AnnieLee @@ -6749,7 +6749,7 @@ SalomeyOsei SokharSamb DietrichKlakow - David IfeoluwaAdelaniMcGill University + David IfeoluwaAdelaniMcGill University 9429-9452 Slot-filling and intent detection are well-established tasks in Conversational AI. However, current large-scale benchmarks for these tasks often exclude evaluations of low-resource languages and rely on translations from English benchmarks, thereby predominantly reflecting Western-centric concepts. In this paper, we introduce “INJONGO” - a multicultural, open-source benchmark dataset for 16 African languages with utterances generated by native speakers across diverse domains, including banking, travel, home, and dining. Through extensive experiments, we benchmark fine-tuning multilingual transformer models and prompting large language models (LLMs), and show the advantage of leveraging African-cultural utterances over Western-centric utterances for improving cross-lingual transfer from the English language. Experimental results reveal that current LLMs struggle with the slot-filling task, with GPT-4o achieving an average performance of 26 F1. In contrast, intent detection performance is notably better, with an average accuracy of 70.6%, though it still falls short of fine-tuning baselines. When compared to the English language, GPT-4o and fine-tuning baselines perform similarly on intent detection, achieving an accuracy of approximately 81%. Our findings suggest that LLMs performance is still behind for many low-resource African languages, and more work is needed to further improve their downstream performance. 2025.acl-long.464 @@ -6808,7 +6808,7 @@ <fixed-case>KG</fixed-case>-Agent: An Efficient Autonomous Agent Framework for Complex Reasoning over Knowledge Graph JinhaoJiang KunZhouUniversity of California, San Diego - XinZhaoRenmin University of China + XinZhaoRenmin University of China YangSongBOSS Zhipin ChenZhuUniversity of Science and Technology of China HengshuZhuComputer Network Information Center, Chinese Academy of Sciences @@ -6837,7 +6837,7 @@ SrijanBansalSalesForce.com YifeiMingSalesforce AI Research SemihYavuzSalesForce.com - ShafiqJotyNanyang Technological University and SalesForce.com + ShafiqJotyNanyang Technological University and SalesForce.com 9541-9564 The large language model (LLM)-as-judge paradigm has been used to meet the demand for a cheap, reliable, and fast evaluation of model outputs during AI system development and post-deployment monitoring. While judge models—LLMs finetuned to specialize in assessing and critiquing model outputs—have been touted as general purpose evaluators, they are typically evaluated only on non-contextual scenarios, such as instruction following. The omission of contextual settings—those where external information is used as context to generate an output—is surprising given the increasing prevalence of retrieval-augmented generation (RAG) and summarization use cases. Contextual assessment is uniquely challenging, as evaluation often depends on practitioner priorities, leading to conditional evaluation criteria (e.g., comparing responses based on factuality and then considering completeness if they are equally factual). To address the gap, we propose ContextualJudgeBench, a judge benchmark with 2,000 challenging response pairs across eight splits inspired by real-world contextual evaluation scenarios. We build our benchmark with a multi-pronged data construction pipeline that leverages both existing human annotations and model-based perturbations. Our comprehensive study across 11 judge models and 7 general purpose models, reveals that the contextual information and assessment criteria present a significant challenge to even state-of-the-art models. For example, o1, the best-performing model, barely reaches 55% consistent accuracy. 2025.acl-long.470 @@ -6851,7 +6851,7 @@ NiketTandonResearch, Microsoft ZhuangLiRoyal Melbourne Institute of Technology XiaoxiKang - GholamrezaHaffariMonash University, Monash University and Monash University + GholamrezaHaffariMonash University, Monash University and Monash University 9565-9590 This study investigates the efficacy of Large Language Models (LLMs) in causal discovery. Using newly available open-source LLMs, OLMo and BLOOM, which provide access to their pre-training corpora, we investigate how LLMs address causal discovery through three research questions. We examine: (i) the impact of memorization for accurate causal relation prediction, (ii) the influence of incorrect causal relations in pre-training data, and (iii) the contextual nuances that influence LLMs’ understanding of causal relations. Our findings indicate that while LLMs are effective in recognizing causal relations that occur frequently in pre-training data, their ability to generalize to new or rare causal relations is limited. Moreover, the presence of incorrect causal relations significantly undermines the confidence of LLMs in corresponding correct causal relations, and the contextual information critically affects the outcomes of LLMs to discern causal connections between random variables. 2025.acl-long.471 @@ -6953,7 +6953,7 @@ When to Speak, When to Abstain: Contrastive Decoding with Abstention - Hyuhng JoonKimSeoul National University + Hyuhng JoonKimSeoul National University YounaKimSeoul National University Sang-gooLeeSeoul National University TaeukKimHanyang University @@ -7428,7 +7428,7 @@ JianxiangPeng LeiYang JuesiXiao - DeyiXiongTianjin University + DeyiXiongTianjin University 10386-10418 With the increasing capability of large language models (LLMs), LLM-as-a-judge has emerged as a new evaluation paradigm. Compared with traditional automatic and manual evaluation, LLM evaluators exhibit better interpretability and efficiency. Despite this, existing LLM evaluators suffer from limited use scenarios and poor flexibility. To mitigate these issues, we propose Praetor, a fine-grained generative LLM evaluator with instance-level customazable evaluation criteria. To train Praetor, we curate a large-scale dataset guided with a hierarchical guideline covering a wide range of tasks and instance-level evaluation criteria. We train Praetor on this dataset in a multi-task learning fashion, which enables to evaluate LLMs in either pointwise grading or pairwise comparison way and support two languages simultaneously with a high flexibility of setting evaluation criteria. Extensive experiments demonstrate that Praetor outperforms previous LLM evaluators and instruction-tuned LLMs on multiple benchmarks, setting new SOTA results. It also exhibits the potential for generating critiques as scalable feedback to further improve LLMs. Our model and related resources are released at https://github.com/tjunlp-lab/Praetor. 2025.acl-long.513 @@ -7441,7 +7441,7 @@ XiruoDing BrianHurUniversity of Washington ChangyeLiUniversity of Washington - TrevorCohenUniversity of Washington + TrevorCohenUniversity of Washington Serguei V. S.PakhomovUniversity of Minnesota - Twin Cities 10419-10434 Deep transformer models have been used to detect linguistic anomalies in patient transcripts for early Alzheimer’s disease (AD) screening. While pre-trained neural language models (LMs) fine-tuned on AD transcripts perform well, little research has explored the effects of the gender of the speakers represented by these transcripts. This work addresses gender confounding in dementia detection and proposes two methods: the Extended Confounding Filter and the Dual Filter, which isolate and ablate weights associated with gender. We evaluate these methods on dementia datasets with first-person narratives from patients with cognitive impairment and healthy controls. Our results show transformer models tend to overfit to training data distributions. Disrupting gender-related weights results in a deconfounded dementia classifier, with the trade-off of slightly reduced dementia detection performance. @@ -7451,7 +7451,7 @@ <fixed-case>MCS</fixed-case>-Bench: A Comprehensive Benchmark for Evaluating Multimodal Large Language Models in <fixed-case>C</fixed-case>hinese Classical Studies - YangLiuSouth China University of Technology + YangLiuSouth China University of Technology JiahuanCao HiuyiCheng YongxinShiSouth China University of Technology @@ -7532,7 +7532,7 @@ Cool-Fusion: Fuse Large Language Models without Training - CongLiu + CongLiu XiaojunQuanSUN YAT-SEN UNIVERSITY YanPanSUN YAT-SEN UNIVERSITY WeigangWuSUN YAT-SEN UNIVERSITY @@ -7572,8 +7572,8 @@ ShilongLi BingXu ConghuiZhu - MuyunYang - TiejunZhaoHarbin Institute of Technology + MuyunYang + TiejunZhaoHarbin Institute of Technology 10667-10686 Complex instruction-following with elaborate constraints is imperative for Large Language Models (LLMs). While existing methods have constructed data for complex instruction alignment, they all rely on a more advanced model, especially GPT-4, limiting their application. In this paper, we propose a Multi-granularity Self-Contrastive Training (MuSC) framework, to improve the complex instruction alignment without relying on a stronger model. Our method is conducted on both coarse and fine granularity. On coarse-granularity, we construct constraint-aware preference data based on instruction decomposition and recombination. On fine-granularity, we perform token-aware preference optimization with dynamic token-level supervision. Our method is evaluated on open-sourced models, and experiment results show our method achieves significant improvement on both complex and general instruction-following benchmarks, surpassing previous self-alignment methods. 2025.acl-long.523 @@ -7586,7 +7586,7 @@ JunyiLi JinhaoJiang MingyuXu - XinZhaoRenmin University of China + XinZhaoRenmin University of China BingningWangBeijing Baichuan Intelligence Technology Co., Ltd. WeipengChen 10687-10707 @@ -7878,7 +7878,7 @@ AnZhangNational University of Singapore YanyanZhaoHarbin Institute of Technology BingQinHarbin Institute of Technology - Tat-SengChuaNational University of Singapore + Tat-SengChuaNational University of Singapore TingLiuHarbin Institute of Technology 11112-11137 Role-playing enables large language models (LLMs) to engage users in immersive and personalized interactions, but it also introduces significant safety risks. Existing role-play fine-tuning techniques improve role adaptability but may degrade safety performance, particularly for villainous characters. In this work, we conduct the first comprehensive assessment of role-play fine-tuning risks by training 95 role-specific LLMs using RoleBench. Our experiments reveal that role-play fine-tuning leads to a noticeable decline in safety performance, with safety risks varying based on character traits. To tackle this challenge, we propose Safety-Aware Role-Play Fine-Tuning (SaRFT), a novel method designed to balance role-playing capabilities and safety. Extensive experiments on LLaMA-3-8B-Instruct, Gemma-2-9B-it, and Qwen2.5-7B-Instruct demonstrate that SaRFT consistently outperforms state-of-the-art baselines under both LoRA and full-parameter fine-tuning settings. Our findings highlight the necessity of role-adaptive safety measures and provide insights into mitigating role-specific safety risks in role-playing LLMs. @@ -7888,7 +7888,7 @@ Can Graph Neural Networks Learn Language with Extremely Weak Text Supervision? - ZihaoLiUniversity of Illinois Urbana-Champaign + ZihaoLiUniversity of Illinois Urbana-Champaign LechengZheng BowenJin DongqiFuMeta @@ -7933,9 +7933,9 @@ KarthikPadtheMeta AI RulinShao Alicia YiSunMeta AI and Massachusetts Institute of Technology - LukeZettlemoyerUniversity of Washington, Facebook and Meta + LukeZettlemoyerUniversity of Washington, Facebook and Meta GargiGhoshMeta AI - Wen-tauYihMeta Platforms, Inc. + Wen-tauYihMeta Platforms, Inc. 11199-11213 Large language models can generate factually inaccurate content, a problem known as hallucination. Recent works have built upon retrieved-augmented generation to improve factuality through iterative prompting but these methods are limited by the traditional RAG design. To address these challenges, we introduce Ewe (Explicit Working Memory), a novel approach that enhances factuality in long-form text generation by integrating a working memory that receives real-time feedback from external resources. The memory is refreshed based on online fact-checking and retrieval feedback, allowing Ewe to rectify false claims during the generation process and ensure more accurate and reliable outputs. Our experiments demonstrate that Ewe outperforms strong baselines on four fact-seeking long-form generation datasets, increasing the factuality metric, VeriScore, by 2 to 6 points absolute without sacrificing the helpfulness of the responses. Further analysis reveals that the design of rules for memory updates, configurations of memory units, and the quality of the retrieval datastore are crucial factors for influencing model performance. 2025.acl-long.548 @@ -8032,7 +8032,7 @@ JianminWang XibaoCai HaitaoHuang - WeiLiu + WeiLiu LongyueWangAlibaba Group Lai HouTimTencent AI Lab XiangxiangZengHunan University @@ -8065,7 +8065,7 @@ ChengyiYang PeiZhangAlibaba Group BaosongYang - JunhuiLiSoochow University, China + JunhuiLiSoochow University, China JunfengYaoXiamen University MinZhangHarbin Institute of Technology, Shenzhen JinsongSuXiamen University @@ -8164,7 +8164,7 @@ DongyangZhanHarbin Institute of Technology YuntingZhangHarbin Institute of Technology YichengGuoHarbin Institute of Technology - ChenZhangUniversity of Mississippi Medical Center + ChenZhangUniversity of Mississippi Medical Center 11481-11494 Automatic exploit generation (AEG) refers to the automatic discovery and exploitation of vulnerabilities against unknown targets. Traditional AEG often targets a single type of vulnerability and still relies on templates built from expert experience. To achieve intelligent exploit generation, we establish a comprehensive benchmark using Binary Exploitation (pwn) challenges in Capture the Flag (CTF) competitions and investigate the capabilities of Large Language Models (LLMs) in AEG based on the benchmark. To improve the performance of AEG, we propose PwnGPT, an LLM-based automatic exploit generation framework that automatically solves pwn challenges. The structural design of PwnGPT is divided into three main components: analysis, generation, and verification modules. With the help of a modular approach and structured problem inputs, PwnGPT can solve challenges that LLMs cannot directly solve. We evaluate PwnGPT on our benchmark and analyze the outputs of each module. Experimental results show that our framework is highly autonomous and capable of addressing various challenges. Compared to direct input LLMs, PwnGPT increases the completion rate of exploit on our benchmark from 26.3% to 57.9% with the OpenAI o1-preview model and from 21.1% to 36.8% with the GPT-4o model. 2025.acl-long.562 @@ -8183,7 +8183,7 @@ Phong Nguyen-ThuanDoZalo Van Le TranTrucHo Chi Minh city University of Science, Vietnam National University Duc ThanhChauHo Chi Minh city University of Science, Vietnam National University - Le-MinhNguyenJapan Advanced Institute of Science and Technology, Tokyo Institute of Technology + Le-MinhNguyenJapan Advanced Institute of Science and Technology, Tokyo Institute of Technology 11495-11515 The evolution of Large Language Models (LLMs) has underscored the necessity for benchmarks designed for various languages and cultural contexts. To address this need for Vietnamese, we present the first Vietnamese Multitask Language Understanding (VMLU) Benchmarks. The VMLU benchmarks consist of four datasets that assess different capabilities of LLMs, including general knowledge, reading comprehension, reasoning, and conversational skills. This paper also provides an insightful overview of the current state of some dominant LLMs, such as Llama-3, Qwen2.5, and GPT-4, highlighting their performances and limitations when measured against these benchmarks. Furthermore, we provide insights into how prompt design can influence VMLU’s evaluation outcomes, as well as suggest that open-source LLMs can serve as effective, cost-efficient evaluators within the Vietnamese context. By offering a comprehensive and accessible benchmarking framework, the VMLU Benchmarks aim to foster the development and fine-tuning of Vietnamese LLMs, thereby establishing a foundation for their practical applications in language-specific domains. 2025.acl-long.563 @@ -8383,7 +8383,7 @@ YiningWang FeifeiZhaiInstitute of automation, Chinese academy of science, Chinese Academy of Sciences YuZhouInstitute of Automation, Chinese Academy of Sciences - ChengqingZongInstitute of automation, Chinese academy of science, Chinese Academy of Sciences + ChengqingZongInstitute of automation, Chinese academy of science, Chinese Academy of Sciences 11755-11771 LLMs have achieved remarkable fluency and coherence in text generation, yet their widespread adoption has raised concerns about content reliability and accountability. In high-stakes domains, it is crucial to understand where and how the content is created. To address this, we introduce the Text pROVEnance (TROVE) challenge, designed to trace each sentence of a target text back to specific source sentences within potentially lengthy or multi-document inputs. Beyond identifying sources, TROVE annotates the fine-grained relationships (quotation, compression, inference, and others), providing a deep understanding of how each target sentence is formed.To benchmark TROVE, we construct our dataset by leveraging three public datasets covering 11 diverse scenarios (e.g., QA and summarization) in English and Chinese, spanning source texts of varying lengths (0–5k, 5–10k, 10k+), emphasizing the multi-document and long-document settings essential for provenance. To ensure high-quality data, we employ a three-stage annotation process: sentence retrieval, GPT-4o provenance, and human provenance. We evaluate 11 LLMs under direct prompting and retrieval-augmented paradigms, revealing that retrieval is essential for robust performance, larger models perform better in complex relationship classification, and closed-source models often lead, yet open-source models show significant promise, particularly with retrieval augmentation. We make our dataset available here: https://github.com/ZNLP/ZNLP-Dataset. 2025.acl-long.577 @@ -8446,8 +8446,8 @@ WangYanWangYan WeiShen QingGuNanjing University - Anh TuanLuuNanyang Technological University - See-KiongNgNational University of Singapore + Anh TuanLuuNanyang Technological University + See-KiongNgNational University of Singapore ZhiweiJiangNanjing University BryanHooiNational University of Singapore 11857-11870 @@ -8571,7 +8571,7 @@ XihuaiWangShanghai Jiao Tong University YashengWang RuimingTang - WeinanZhang + WeinanZhang YongYuShanghai Jiaotong University 12055-12065 With the impressive reasoning and text generation capabilities of large language models (LLMs), methods leveraging multiple LLMs to debate each other have garnered increasing attention. However, existing debate-based approaches remain limited in effectiveness in structured and detailed domains represented by code generation due to several reasons: 1) Reliance on different instances of the same LLM for debate, neglecting the potential benefits of integrating diverse models with varied internal knowledge for more comprehensive code generation, 2) under-utilization of test cases, and 3) reliance on third-party LLM moderators for result consolidation and decision-making, probably introducing hallucinations and judgment errors. To address these challenges, we propose DebateCoder to collect intelligence of LLMs via test case-driven debate for code generation. In DebateCoder, test cases serve as a medium for models to analyze code and identify bugs, while opposing models generate test cases to challenge each other’s code during the debate process. These test cases, along with their execution results, are elaborately leveraged to refine and enhance the code through a novel contrastive analysis process. Furthermore, DebateCoder leverages test case outcomes to assess code quality and determine convergence criteria. Unlike previous approaches, DebateCoder emphasizes the collaborative improvement of both models through competitive debate and interactive analysis. Abundant experimental results on two datasets demonstrate the effectiveness of DebateCoder. @@ -8638,7 +8638,7 @@ YeYuan YichunYinHuawei Noah’s Ark Lab YanXuHuawei Technologies Ltd. - XinXu + XinXu ZaoyuChen YashengWang LifengShangHuawei Technologies Ltd. @@ -8795,8 +8795,8 @@ WilliamZeng OussamaElachqarOumi EmmanouilKoukoumidis - DilekHakkani-TürUniversity of Illinois at Urbana-Champaign - GokhanTurUniversity of Illinois at Urbana-Champaign + DilekHakkani-TürUniversity of Illinois at Urbana-Champaign + GokhanTurUniversity of Illinois at Urbana-Champaign 12370-12390 Large Language Models (LLMs) with API-calling capabilities enabled building effective Language Agents (LA), while also revolutionizing the conventional task-oriented dialogue (TOD) paradigm. However, current approaches face a critical dilemma: TOD systems are often trained on a limited set of target APIs, requiring new data to maintain their quality when interfacing with new services, while LAs are not trained to maintain user intent over multi-turn conversations. Because both robust multi-turn management and advanced function calling are crucial for effective conversational agents, we evaluate these skills on three popular benchmarks: MultiWOZ 2.4 (TOD), BFCL V3 (LA), and API-Bank (LA)—and our analyses reveal that specialized approaches excel in one domain but underperform in the other. To bridge this chasm, we introduce **CoALM** (**C**onversational **A**gentic **L**anguage **M**odel), a unified approach that integrates both conversational and agentic capabilities. We created **CoALM-IT**, a carefully constructed multi-task dataset that interleave multi-turn ReAct reasoning with complex API usage. Using CoALM-IT, we train three models **CoALM 8B**, **CoALM 70B**, and **CoALM 405B**, which outperform top domain-specific models, including GPT-4o, across all three benchmarks. This demonstrates the feasibility of a single model approach for both TOD and LA, setting a new standard for conversational agents. 2025.acl-long.605 @@ -8810,7 +8810,7 @@ ZhiyangZhang YangZhaoInstitute of automation, Chinese academy of science, Chinese Academy of Sciences LuXiangInstitute of automation, Chinese academy of science, Chinese Academy of Sciences - ChengqingZongInstitute of automation, Chinese academy of science, Chinese Academy of Sciences + ChengqingZongInstitute of automation, Chinese academy of science, Chinese Academy of Sciences YuZhouInstitute of Automation, Chinese Academy of Sciences 12391-12408 Document Image Machine Translation (DIMT) aims to translate text within document images, facing generalization challenges due to limited training data and the complex interplay between visual and textual information. To address these challenges, we introduce M4Doc, a novel single-to-mix Modality alignment framework leveraging Multimodal Large Language Models (MLLMs). M4Doc aligns an imageonly encoder with the multimodal representations of an MLLM, pre-trained on large-scale document image datasets. This alignment enables a lightweight DIMT model to learn crucial visual-textual correlations during training. During inference, M4Doc bypasses the MLLM, maintaining computational efficiency while benefiting from its multimodal knowledge. Comprehensive experiments demonstrate substantial improvements in translation quality, especially in cross-domain generalization and challenging document image scenarios. The code will be released upon acceptance. @@ -8971,7 +8971,7 @@ Enhancing Safe and Controllable Protein Generation via Knowledge Preference Optimization - YuhaoWang + YuhaoWang KeyanDingZhejiang University KehuaFeng ZeyuanWang @@ -9100,10 +9100,10 @@ <fixed-case>SCAR</fixed-case>: Data Selection via Style Consistency-Aware Response Ranking for Efficient Instruction-Tuning of Large Language Models ZhuangLiRoyal Melbourne Institute of Technology YunchengHua - Thuy-TrangVuMonash University + Thuy-TrangVuMonash University HaolanZhanMonash University LizhenQuMonash University - GholamrezaHaffariMonash University, Monash University and Monash University + GholamrezaHaffariMonash University, Monash University and Monash University 12756-12790 Recent studies emphasize that manually ensuring a consistent response style and maintaining high data quality in training sets can significantly improve the performance of fine-tuned Large Language Models (LLMs) while reducing the number of training examples needed. However, the precise definition of style and the relationship between style, data quality, and LLM performance remains unclear. This research identifies two key stylistic elements in responses: linguistic form and instructional surprisal. We find that, among training data of comparable quality, higher consistency in these response elements leads to better LLM performance. Inspired by this, we introduce Style Consistency-Aware Response Ranking (SCAR), which automatically prioritizes instruction-response pairs in the training set based on their response stylistic consistency. By selecting the most style-consistent examples, using 0.7% of the full dataset in certain cases, the fine-tuned LLMs can match or even surpass the performance of models trained on the entire dataset in coding and open-ended question-answering benchmarks. Code and data are available at https://github.com/zhuang-li/SCAR . 2025.acl-long.625 @@ -9344,7 +9344,7 @@ NikhitaVedulaAmazon BesnikFetahuAmazon OlegRokhlenko - ShervinMalmasiAmazon + ShervinMalmasiAmazon 13095-13120 The goal of conversational product search (CPS) is to develop an intelligent, chat-based shopping assistant that can directly interact with customers to understand shopping intents, ask clarification questions, and find relevant products. However, training such assistants is hindered mainly due to the lack of reliable and large-scale datasets. Prior human-annotated CPS datasets are extremely small in size and lack integration with real-world product search systems. We propose a novel approach, TRACER, which leverages large language models (LLMs) to generate realistic and natural conversations for different shopping domains. TRACER’s novelty lies in grounding the generation to dialogue plans, which are product search trajectories predicted from a decision tree model, that guarantees relevant product discovery in the shortest number of search conditions. We also release the first target-oriented CPS dataset Wizard of Shopping (WoS), containing highly natural and coherent conversations (3.6k) from three shopping domains. Finally, we demonstrate the quality and effectiveness of WoS via human evaluations and downstream tasks. 2025.acl-long.641 @@ -9452,7 +9452,7 @@ XumingHuThe Hong Kong University of Science and Technology (Guangzhou) and Hong Kong University of Science and Technology LijieWenTsinghua University IrwinKing - Philip S.YuUniversity of Illinois Chicago + Philip S.YuUniversity of Illinois Chicago 13228-13251 The radioactive nature of Large Language Model (LLM) watermarking enables the detection of watermarks inherited by student models when trained on the outputs of watermarked teacher models, making it a promising tool for preventing unauthorized knowledge distillation. However, the robustness of watermark radioactivity against adversarial actors remains largely unexplored. In this paper, we investigate whether student models can acquire the capabilities of teacher models through knowledge distillation while avoiding watermark inheritance. We propose two categories of watermark removal approaches: pre-distillation removal through untargeted and targeted training data paraphrasing (UP and TP), and post-distillation removal through inference-time watermark neutralization (WN). Extensive experiments across multiple model pairs, watermarking schemes and hyper-parameter settings demonstrate that both TP and WN thoroughly eliminate inherited watermarks, with WN achieving this while maintaining knowledge transfer efficiency and low computational overhead. Given the ongoing deployment of watermarking techniques in production LLMs, these findings emphasize the urgent need for more robust defense strategies. 2025.acl-long.648 @@ -9461,7 +9461,7 @@ Rethinking Reward Model Evaluation Through the Lens of Reward Overoptimization - SunghwanKim + SunghwanKim DongjinKang TaeyoonKwonYonsei University HyungjooChae @@ -9602,7 +9602,7 @@ XiaoboLiang JuntaoLi ZhaopengTuTencent AI Lab - QiaomingZhuSoochow University + QiaomingZhuSoochow University MinZhangHarbin Institute of Technology, Shenzhen 13414-13438 Improving the mathematical reasoning capabilities of Large Language Models (LLMs) is critical for advancing artificial intelligence. However, access to extensive, diverse, and high-quality reasoning datasets remains a significant challenge, particularly for the open-source community. In this paper, we propose ScaleQuest, a novel, scalable, and cost-effective data synthesis method that enables the generation of large-scale mathematical reasoning datasets using lightweight 7B-scale models. ScaleQuest introduces a two-stage question-tuning process comprising Question Fine-Tuning (QFT) and Question Preference Optimization (QPO) to unlock the question generation capabilities of problem-solving models. By generating diverse questions from scratch – without relying on powerful proprietary models or seed data – we produce a dataset of 1 million problem-solution pairs. Our experiments demonstrate that models trained on our data outperform existing open-source datasets in both in-domain and out-of-domain evaluations. Furthermore, our approach shows continued performance improvement as the volume of training data increases, highlighting its potential for ongoing data scaling. The extensive improvements observed in code reasoning tasks demonstrate the generalization capabilities of our proposed method. Our work provides the open-source community with a practical solution to enhance the mathematical reasoning abilities of LLMs. @@ -9615,7 +9615,7 @@ HaneulYooKAIST JieunHanKorea Advanced Institute of Science & Technology So-YeonAhnKorea Advanced Institute of Science & Technology - AliceOhGoogle and Korea Advanced Institute of Science and Technology + AliceOhGoogle and Korea Advanced Institute of Science and Technology 13439-13454 Automated essay scoring (AES) is a useful tool in English as a Foreign Language (EFL) writing education, offering real-time essay scores for students and instructors. However, previous AES models were trained on essays and scores irrelevant to the practical scenarios of EFL writing education and usually provided a single holistic score due to the lack of appropriate datasets. In this paper, we release DREsS, a large-scale, standard dataset for rubric-based automated essay scoring with 48.9K samples in total. DREsS comprises three sub-datasets: DREsS_New, DREsS_Std., and DREsS_CASE. We collect DREsS_New, a real-classroom dataset with 2.3K essays authored by EFL undergraduate students and scored by English education experts. We also standardize existing rubric-based essay scoring datasets as DREsS_Std. We suggest CASE, a corruption-based augmentation strategy for essays, which generates 40.1K synthetic samples of DREsS_CASE and improves the baseline results by 45.44%. DREsS will enable further research to provide a more accurate and practical AES system for EFL writing education. 2025.acl-long.659 @@ -9678,7 +9678,7 @@ Lexical Recall or Logical Reasoning: Probing the Limits of Reasoning Abilities in Large Language Models HenrikeBeyerUniversity of Dundee - ChrisReedUniversity of Dundee + ChrisReedUniversity of Dundee 13532-13557 Despite the increasing interest in the reasoning abilities of Large Language Models (LLMs), existing work shows limitations in assessing logic abilities independently from lexical memory. We address this gap with Mystery-Zebra. This robust two-part benchmark (4,290 puzzles) challenges the logic abstraction abilities of LLMs in two setups: (1) a lexical obfuscation setup tests the dependence of LLMs on lexical content based on two canonical grid puzzles widely spread on the Internet; (2) a set of new grid puzzles in 42 different sizes and 12 difficulty levels tests how the formal difficulty degree of a puzzle affects LLMs.We test open and closed-weight LLMs on both parts of the benchmark. The results on part two suggest that model sizes up to 70B parameters have only a minor influence when solving newly generated puzzles, while performance mainly relates to the number of items in the puzzle. The results on the first part of the benchmark suggest that the applied obfuscation strategies help to mitigate effects of logic puzzles being part of LLM training data, showing a drastic drop in performance for obfuscated versions of well-known puzzles. In addition we conduct a case-study on the first part of the benchmark predicting the position of single items, unveiling that the reasoning abilities of LLMs are mainly limited to a few consecutive steps of reasoning. 2025.acl-long.664 @@ -9764,7 +9764,7 @@ <fixed-case>CR</fixed-case>isk<fixed-case>E</fixed-case>val: A <fixed-case>C</fixed-case>hinese Multi-Level Risk Evaluation Benchmark Dataset for Large Language Models LingShi - DeyiXiongTianjin University + DeyiXiongTianjin University 13638-13659 Large language models (LLMs) are possessed of numerous beneficial capabilities, yet their potential inclination harbors unpredictable risks that may materialize in the future. We hence propose CRiskEval, a Chinese dataset meticulously designed for gauging the risk proclivities inherent in LLMs such as resource acquisition and malicious coordination, as part of efforts for proactive preparedness. To curate CRiskEval, we define a new risk taxonomy with 7 types of frontier risks and 4 safety levels, including extremely hazardous,moderately hazardous, neutral and safe. We follow the philosophy of tendency evaluation to empirically measure the stated ”desire” of LLMs via fine-grained multiple-choice question answering. The dataset consists of 14,888 questions that simulate scenarios related to predefined 7 types of frontier risks. Each question is accompanied with 4 answer choices that state opinions or behavioral tendencies corresponding to the question. All answer choices are manually annotated with one of the defined risk levels so that we can easily build a fine-grained frontier risk profile for each assessed LLM. Extensive evaluation with CRiskEval on a spectrum of prevalent Chinese LLMs has unveiled a striking revelation: most models exhibit risk tendencies of more than 40% (weighted tendency to the four risk levels). Furthermore, a subtle increase in the model’s inclination toward urgent self-sustainability, power seeking and other dangerous goals becomes evident as the size of models increases. To promote further research on the frontier risk evaluation of LLMs, we publicly release our dataset at https://github.com/tjunlp-lab/CRiskEval. 2025.acl-long.670 @@ -9805,7 +9805,7 @@ HuadaiLiu JialeiWang RongjieHuangZhejiang University - YangLiu + YangLiu HengLu ZhouZhaoZhejiang University and Zhejiang University WeiXueHong Kong University of Science and Technology @@ -9901,7 +9901,7 @@ TianyuZheng YizhiLiUniversity of Manchester YuelinBaiShenzhen Institutes of Advanced Technology, Chinese Academy of Sciences, Chinese Academy of Sciences - BoLi + BoLi YuboWangUniversity of Waterloo KingZhuGuangdong OPPO Mobile Telecommunications Corp.,Ltd. GrahamNeubigCarnegie Mellon University @@ -9971,7 +9971,7 @@ <fixed-case>SPECTRA</fixed-case>: Faster Large Language Model Inference with Optimized Internal and External Speculation Nguyen-KhangLeJapan Advanced Institute of Science and Technology, Tokyo Institute of Technology Truong DinhDo - Le-MinhNguyenJapan Advanced Institute of Science and Technology, Tokyo Institute of Technology + Le-MinhNguyenJapan Advanced Institute of Science and Technology, Tokyo Institute of Technology 14015-14034 Inference with modern Large Language Models (LLMs) is both computationally expensive and time-consuming. Speculative decoding has emerged as a promising solution, but existing approaches face key limitations: training-based methods require a draft model that is challenging to obtain and lacks generalizability, while training-free methods offer limited speedup gains. In this work, we present Spectra, a novel framework for accelerating LLM inference without the need for additional training or modification to the original LLM. Spectra introduces two new techniques for efficiently utilizing internal and external speculation, each outperforming corresponding state-of-the-art (SOTA) methods independently. When combined, these techniques achieve up to a 4.08x speedup across various benchmarks and LLM architectures, significantly surpassing existing training-free approaches. The implementation of Spectra is publicly available. 2025.acl-long.685 @@ -10060,7 +10060,7 @@ JianzhuBao YuqiHuang BinLiangThe Chinese University of Hong Kong - Kam-FaiWongThe Chinese University of Hong Kong + Kam-FaiWongThe Chinese University of Hong Kong MinYangShenzhen Institutes of Advanced Technology, Chinese Academy of Sciences, Chinese Academy of Sciences RuifengXuHarbin Institute of Technology 14133-14148 @@ -10079,7 +10079,7 @@ HuiSuMeituan JinlanFu MingLiuHarbin Institute of Technology - See-KiongNgNational University of Singapore + See-KiongNgNational University of Singapore BingQinHarbin Institute of Technology 14149-14162 Large Multimodal Models (LMMs) have recently demonstrated impressive performance on general video comprehension benchmarks. Nevertheless, for broader applications, the robustness of their temporal analysis capability needs to be thoroughly investigated yet predominantly ignored. Motivated by this, we propose a novel temporal robustness benchmark (TemRobBench), which introduces temporal inconsistency perturbations separately at the visual and textual modalities to assess the robustness of models. We evaluate 16 mainstream LMMs and find that they exhibit over-reliance on prior knowledge and textual context in adversarial environments, while ignoring the actual temporal dynamics in the video. To mitigate this issue, we design panoramic direct preference optimization (PanoDPO), which encourages LMMs to incorporate both visual and linguistic feature preferences simultaneously. Experimental results show that PanoDPO can effectively enhance the model’s robustness and reliability in temporal analysis. @@ -10136,7 +10136,7 @@ XinyiZhouEast China Normal University NingZhang ShangqingZhaoEast China Normal University - ManLan + ManLan XiaopengBaiEast China Normal University 14215-14231 Argument mining has garnered increasing attention over the years, with the recent advancement of Large Language Models (LLMs) further propelling this trend. However, current argument relations remain relatively simplistic and foundational, struggling to capture the full scope of argument information. To address this limitation, we propose a systematic framework comprising 14 fine-grained relation types from the perspectives of vertical argument relations and horizontal discourse relations, thereby capturing the intricate interplay between argument components for a thorough understanding of argument structure. On this basis, we conducted extensive experiments on three tasks: argument component prediction, relation prediction, and automated essay grading. Additionally, we explored the impact of writing quality on argument component prediction and relation prediction, as well as the connections between discourse relations and argumentative features. The findings highlight the importance of fine-grained argumentative annotations for argumentative writing assessment and encourage multi-dimensional argument analysis. @@ -10148,7 +10148,7 @@ Browsing Like Human: A Multimodal Web Agent with Experiential Fast-and-Slow Thinking HaohaoLuoSUN YAT-SEN UNIVERSITY JiayiKuangSUN YAT-SEN UNIVERSITY - WeiLiu + WeiLiu YingShen JianLuanXiaomi Corporation YangDengSingapore Management University @@ -10227,7 +10227,7 @@ NurkhanLaiyk Alham FikriAjiMohamed bin Zayed University of Artificial Intelligence EkaterinaKochmarMohamed bin Zayed University of Artificial Intelligence - PreslavNakovMohamed bin Zayed University of Artificial Intelligence + PreslavNakovMohamed bin Zayed University of Artificial Intelligence FajriKotoMohamed bin Zayed University of Artificial Intelligence 14403-14416 Despite having a population of twenty million, Kazakhstan’s culture and language remain underrepresented in the field of natural language processing. Although large language models (LLMs) continue to advance worldwide, progress in Kazakh language has been limited, as seen in the scarcity of dedicated models and benchmark evaluations. To address this gap, we introduce KazMMLU, the first MMLU-style dataset specifically designed for Kazakh language. KazMMLU comprises 23,000 questions that cover various educational levels, including STEM, humanities, and social sciences, sourced from authentic educational materials and manually validated by native speakers and educators. The dataset includes 10,969 Kazakh questions and 12,031 Russian questions, reflecting Kazakhstan’s bilingual education system and rich local context. Our evaluation of several state-of-the-art multilingual models (Llama3.1, Qwen-2.5, GPT-4, and DeepSeek V3) demonstrates substantial room for improvement, as even the best-performing models struggle to achieve competitive performance in Kazakh and Russian. These findings highlight significant performance gaps compared to high-resource languages. We hope that our dataset will enable further research and development of Kazakh-centric LLMs. @@ -10298,7 +10298,7 @@ RiturajJoshiCerebras Systems, Inc MaiyaGoloburda YuxiaWang - PreslavNakovMohamed bin Zayed University of Artificial Intelligence + PreslavNakovMohamed bin Zayed University of Artificial Intelligence FajriKotoMohamed bin Zayed University of Artificial Intelligence 14509-14538 Instruction tuning in low-resource languages remains underexplored due to limited text data, particularly in government and cultural domains. To address this, we introduce and open-source a large-scale (10,600 samples) instruction-following (IFT) dataset, covering key institutional and cultural knowledge relevant to Kazakhstan. Our dataset enhances LLMs’ understanding of procedural, legal, and structural governance topics. We employ LLM-assisted data generation, comparing open-weight and closed-weight models for dataset construction, and select GPT-4o as the backbone. Each entity of our dataset undergoes full manual verification to ensure high quality. We also show that fine-tuning Qwen, Falcon, and Gemma on our dataset leads to consistent performance improvements in both multiple-choice and generative tasks, demonstrating the potential of LLM-assisted instruction tuning for low-resource languages. @@ -10342,7 +10342,7 @@ YueZhaoUniversity of Southern California NedimLipkaAdobe Systems SeunghyunYoonAdobe Research - Ting-Hao KennethHuangPennsylvania State University + Ting-Hao KennethHuangPennsylvania State University ZichaoWangAdobe Research PuneetMathurAdobe Systems SoumyabrataPalAdobe Systems @@ -10365,7 +10365,7 @@ LuyaoChengAlibaba Group ChongDeng QianChen - WenWang + WenWang SiqiZhengAlibaba Group JiaqingLiuAlibaba Group HaiYuAlibaba Group @@ -10490,7 +10490,7 @@ DongqiCai RongjieYi FangmingLiuHuazhong University of Science and Technology - WeiLiu + WeiLiu JianLuanXiaomi Corporation XiwenZhangHelixon Research Nicholas D.LaneFlower Labs and University of Cambridge @@ -10520,7 +10520,7 @@ OikantikNathDepartment of Computer Science, Indian Institute of Technology, Madras, Indian Institute of Technology, Madras HananiBathina Mohammed Safi Ur RahmanKhanIndian Institute of Technology, Madras, Dhirubhai Ambani Institute Of Information and Communication Technology and Indian Institute of Technology, Madras, Dhirubhai Ambani Institute Of Information and Communication Technology - Mitesh MKhapraIndian Institute of Technology, Madras + Mitesh MKhapraIndian Institute of Technology, Madras 14784-14814 Recent advancements in Vision-Language Models (VLMs) have opened new possibilities in automatic grading of handwritten student responses, particularly in mathematics. However, a comprehensive study to test the ability of VLMs to evaluate and reason over handwritten content remains absent. To address this gap, we introduce FERMAT, a benchmark designed to assess VLMs’ ability to detect, localize and correct errors in handwritten mathematical content. FERMAT spans four key error dimensions - computational, conceptual, notational, and presentation - and comprises over 2,200 handwritten math solutions derived from 609 manually curated problems from grades 7-12 with intentionally introduced perturbations. Using FERMAT we benchmark nine VLMs across three tasks: error detection, localization, and correction. Our results reveal significant shortcomings in current VLMs in reasoning over handwritten text, with Gemini-1.5-Pro achieving the highest error correction rate (77%). We also observed that some models struggle with processing handwritten content, as their accuracy improves when handwritten inputs are replaced with printed text or images. These findings highlight the limitations of current VLMs and reveal new avenues for improvement. We will release FERMAT and all the associated resources in the open-source to drive further research. 2025.acl-long.720 @@ -10612,7 +10612,7 @@ Two Intermediate Translations Are Better Than One: Fine-tuning <fixed-case>LLM</fixed-case>s for Document-level Translation Refinement YichenDong XinglinLyuHuawei Technologies Ltd. - JunhuiLiSoochow University, China + JunhuiLiSoochow University, China DaimengWei MinZhangHuawei Technologies Ltd. ShiminTaoHuawei Technologies Ltd. @@ -10627,7 +10627,7 @@ Circuit Compositions: Exploring Modular Structures in Transformer-Based Language Models PhilippMondorfLudwig-Maximilians-Universität München SondreWold - BarbaraPlankLudwig-Maximilians-Universität München + BarbaraPlankLudwig-Maximilians-Universität München 14934-14955 A fundamental question in interpretability research is to what extent neural networks, particularly language models, implement reusable functions through subnetworks that can be composed to perform more complex tasks. Recent advances in mechanistic interpretability have made progress in identifying circuits, the minimal computational subgraphs responsible for a model’s behavior on specific tasks. However, most studies focus on identifying circuits for individual tasks without investigating how functionally similar circuits relate to each other. To address this gap, we study the modularity of neural networks by analyzing circuits for highly compositional subtasks within a transformer-based language model. Specifically, given a probabilistic context-free grammar, we identify and compare circuits responsible for ten modular string-edit operations. Our results indicate that functionally similar circuits exhibit both notable node overlap and cross-task faithfulness. Moreover, we demonstrate that the circuits identified can be reused and combined through set operations to represent more complex functional model capabilities. 2025.acl-long.727 @@ -10638,7 +10638,7 @@ Can <fixed-case>LLM</fixed-case>s Ground when they (Don’t) Know: A Study on Direct and Loaded Political Questions ClaraLachenmaier JudithSiekerUniversität Bielefeld - SinaZarrießBielefeld University + SinaZarrießBielefeld University 14956-14975 Communication among humans relies on conversational grounding, allowing interlocutors to reach mutual understanding even when they do not have perfect knowledge and must resolve discrepancies in each other’s beliefs. This paper investigates how large language models (LLMs) manage common ground in cases where they (don’t) possess knowledge, focusing on facts in the political domain where the risk of misinformation and grounding failure is high. We examine LLMs’ ability to answer direct knowledge questions and loaded questions that presuppose misinformation.We evaluate whether loaded questions lead LLMs to engage in active grounding and correct false user beliefs, in connection to their level of knowledge and their political bias.Our findings highlight significant challenges in LLMs’ ability to engage in grounding and reject false user beliefs, raising concerns about their role in mitigating misinformation in political discourse. 2025.acl-long.728 @@ -10724,7 +10724,7 @@ XinZhangAnt International XuanhongLiWuhan University ChongTeng - DonghongJi + DonghongJi ZhuangLiRoyal Melbourne Institute of Technology 15077-15099 Large Language Models (LLMs) excel in various natural language processing tasks but remain vulnerable to generating harmful content or being exploited for malicious purposes. Although safety alignment datasets have been introduced to mitigate such risks through supervised fine-tuning (SFT), these datasets often lack comprehensive risk coverage. Most existing datasets focus primarily on lexical diversity while neglecting other critical dimensions. To address this limitation, we propose a novel analysis framework to systematically measure the risk coverage of alignment datasets across three essential dimensions: Lexical Diversity, Malicious Intent, and Jailbreak Tactics. We further introduce TRIDENT, an automated pipeline that leverages persona-based, zero-shot LLM generation to produce diverse and comprehensive instructions spanning these dimensions. Each harmful instruction is paired with an ethically aligned response, resulting in two datasets: TRIDENT-Core, comprising 26,311 examples, and TRIDENT-Edge, with 18,773 examples. Fine-tuning Llama 3.1-8B on TRIDENT-Edge demonstrates substantial improvements, achieving an average 14.29% reduction in Harm Score, and a 20% decrease in Attack Success Rate compared to the best-performing baseline model fine-tuned on the WildBreak dataset. @@ -10737,7 +10737,7 @@ JungseobLeeKorea University SeongtaeHongKorea University HyeonseokMoonKorea University - HeuiseokLim + HeuiseokLim 15100-15119 Adapting large language models to other languages typically employs supervised fine-tuning (SFT) as a standard approach. However, it often suffers from an overemphasis on English performance, a phenomenon that is especially pronounced in data-constrained environments. To overcome these challenges, we propose Cross-Lingual Optimization (CLO) that efficiently transfers an English-centric LLM to a target language while preserving its English capabilities. CLO utilizes publicly available English SFT data and a translation model to enable cross-lingual transfer. We conduct experiments using five models on six languages, each possessing varying levels of resource. Our results show that CLO consistently outperforms SFT in both acquiring target language proficiency and maintaining English performance. Remarkably, in low-resource languages, CLO with only 3,200 samples surpasses SFT with 6,400 samples, demonstrating that CLO can achieve better performance with less data. Furthermore, we find that SFT is particularly sensitive to data quantity in medium and low-resource languages, whereas CLO remains robust. Our comprehensive analysis emphasizes the limitations of SFT and incorporates additional training strategies in CLO to enhance efficiency. 2025.acl-long.734 @@ -10890,7 +10890,7 @@ A New Formulation of <fixed-case>Z</fixed-case>ipf’s Meaning-Frequency Law through Contextual Diversity RyoNagataRIKEN and Konan University - KumikoTanaka-IshiiWaseda University + KumikoTanaka-IshiiWaseda University 15323-15335 This paper proposes formulating Zipf’s meaning-frequency law, the power law between word frequency and the number of meanings, as a relationship between word frequency and contextual diversity. The proposed formulation quantifies meaning counts as contextual diversity, which is based on the directions of contextualized word vectors obtained from a Language Model (LM). This formulation gives a new interpretation to the law and also enables us to examine it for a wider variety of words and corpora than previous studies have explored. In addition, this paper shows that the law becomes unobservable when the size of the LM used is small and that autoregressive LMs require much more parameters than masked LMs to be able to observe the law. 2025.acl-long.744 @@ -10907,7 +10907,7 @@ QiCaoInstitute of Computing Technology, Chinese Academy of Sciences, China DaweiYinBaidu HuaweiShenInstitute of Computing Technology, Chinese Academy of Sciences - XueqiChengInstitute of Computing Technology, Chinese Academy + XueqiChengInstitute of Computing Technology, Chinese Academy 15336-15354 Despite near-perfect results reported in the literature, the effectiveness of model editing in real-world applications remains unclear. To bridge this gap, we introduce QAEdit, a new benchmark aligned with widely used question answering (QA) datasets, and WILD, a task-agnostic evaluation framework designed to better reflect real-world usage of model editing. Our single editing experiments show that current editing methods perform substantially worse than previously reported (38.5% vs. 96.8%). We demonstrate that it stems from issues in the synthetic evaluation practices of prior work. Among them, the most severe is the use of teacher forcing during testing, which leaks both content and length of the ground truth, leading to overestimated performance. Furthermore, we simulate practical deployment by sequential editing, revealing that current approaches fail drastically with only 1000 edits. This work calls for a shift in model editing research toward rigorous evaluation and the development of robust, scalable methods that can reliably update knowledge in LLMs for real-world use. 2025.acl-long.745 @@ -11017,7 +11017,7 @@ <fixed-case>MISP</fixed-case>-Meeting: A Real-World Dataset with Multimodal Cues for Long-form Meeting Transcription and Summarization HangChenHangChen - Chao-Han HuckYangNVIDIA Research + Chao-Han HuckYangNVIDIA Research Jia-ChenGuUniversity of California, Los Angeles Sabato MarcoSiniscalchiUniversity of Palermo and Norwegian Institute of Technology JunDuUniversity of Science and Technology of China @@ -11138,10 +11138,10 @@ <fixed-case>MLAS</fixed-case>-<fixed-case>L</fixed-case>o<fixed-case>RA</fixed-case>: Language-Aware Parameters Detection and <fixed-case>L</fixed-case>o<fixed-case>RA</fixed-case>-Based Knowledge Transfer for Multilingual Machine Translation TianyuDong - BoLiBaidu Inc + BoLiBaidu Inc JinsongLiu ShaolinZhuTianjin University - DeyiXiongTianjin University + DeyiXiongTianjin University 15645-15660 Large language models (LLMs) have achieved remarkable progress in multilingual machine translation (MT), demonstrating strong performance even with limited parallel data. However, effectively fine-tuning LLMs for MT is challenging due to parameter interference, which arises from the conflicting demands of different language pairs and the risk of overwriting pre-trained knowledge. To address this issue, we propose MLAS-LoRA, a novel multiple language-aware LoRA knowledge transfer framework. MLAS-LoRA efficiently adapts LLMs to MT by selectively transferring knowledge from a large teacher to a small student model. Our approach first evaluates the awareness of neurons and extracts linguistic knowledge in the teacher model to both the general MT task and specific language pairs.We then propose a multiple language-specific LoRA architecture to inject the extracted knowledge into the student model. During fine-tuning, only the parameters of the relevant language-general and language-specific LoRA modules are updated. Experimental results on diverse multilingual language pairs demonstrate that MLAS-LoRA significantly outperforms strong baselines by +1.7 BLEU on average, including standard fine-tuning and other parameter-efficient methods. 2025.acl-long.762 @@ -11192,7 +11192,7 @@ MariaManina DariaIgnatenko ArtemShelmanovMohamed bin Zayed University of Artificial Intelligence - ChrisBiemannU Hamburg + ChrisBiemannU Hamburg 15702-15720 Comparative Question Answering (CQA) lies at the intersection of Question Answering, Argument Mining, and Summarization. It poses unique challenges due to the inherently subjective nature of many questions and the need to integrate diverse perspectives. Although the CQA task can be addressed using recently emerged instruction-following Large Language Models (LLMs), challenges such as hallucinations in their outputs and the lack of transparent argument provenance remain significant limitations.To address these challenges, we construct a manually curated dataset comprising arguments annotated with their relevance. These arguments are further used to answer comparative questions, enabling precise traceability and faithfulness. Furthermore, we define explicit criteria for an “ideal” comparison and introduce a benchmark for evaluating the outputs of various Retrieval-Augmented Generation (RAG) models with respect to argument relevance. All code and data are publicly released to support further research. 2025.acl-long.765 @@ -11212,7 +11212,7 @@ KunJi QingHuang XinyangHu - YangLiu + YangLiu QianheZheng 15721-15749 We introduce **FinanceReasoning**, a novel benchmark designed to evaluate the reasoning capabilities of large reasoning models (LRMs) in financial numerical reasoning problems. Compared to existing benchmarks, our work provides three key advancements. (1) **Credibility**: We update 15.6% of the questions from four public datasets, annotating 908 new questions with detailed Python solutions and rigorously refining evaluation standards. This enables an accurate assessment of the reasoning improvements of LRMs. (2) **Comprehensiveness**: FinanceReasoning covers 67.8% of financial concepts and formulas, significantly surpassing existing datasets. Additionally, we construct 3,133 Python-formatted functions, which enhances LRMs’ financial reasoning capabilities through refined knowledge (*e.g.*, 83.2% \rightarrow 91.6% for GPT-4o). (3) **Challenge**: Models are required to apply multiple financial formulas for precise numerical reasoning on 238 *Hard* problems. The best-performing model (*i.e.*, OpenAI o1 with PoT) achieves 89.1% accuracy, yet LRMs still face challenges in numerical precision. We demonstrate that combining Reasoner and Programmer models can effectively enhance LRMs’ performance (*e.g.*, 83.2% \rightarrow 87.8% for DeepSeek-R1). Our work paves the way for future research on evaluating and improving LRMs in domain-specific complex reasoning tasks. @@ -11238,7 +11238,7 @@ PeiyuLiuUniversity of International Business and Economics TianwenWeiXiaomi BoZhu - XinZhaoRenmin University of China + XinZhaoRenmin University of China ShuichengYanNational University of Singapore 15800-15811 In this work, we investigate how to sparsify a pre-trained dense large language model into a mixture-of-experts (MoE) architecture for faster inference. Our approach applies mask matrix to the activations for each expert, constrained by L_0 regularization to minimize the number of activated parameters. Starting with all parameters active, the model is progressively sparsified during training, ensuring minimal performance loss. This approach proves more efficient than one-shot sparsification techniques, which typically require significant resources for performance recovery. Moreover, our approach automatically identifies shared, token-specific, and inactive experts, allowing for more efficient allocation of computational resources. Through extensive experiments, we achieve up to 97% performance retention on downstream tasks with only 50% of the feed-forward parameters activated in dense models. Beyond enhancing inference efficiency, this strategy of sharing computational units among experts presents a valuable framework for designing more generalized and efficient MoE architectures, opening avenues for future advancements in expert-based models. @@ -11438,8 +11438,8 @@ <fixed-case>C</fixed-case>risis<fixed-case>TS</fixed-case>: Coupling Social Media Textual Data and Meteorological Time Series for Urgency Classification RomainMeunierIRIT - FarahBenamaraInstitut de recherche en informatique de toulouse - VéroniqueMoriceauIRIT, université de Toulouse + FarahBenamaraInstitut de recherche en informatique de toulouse + VéroniqueMoriceauIRIT, université de Toulouse ZhongzhengQiaoNanyang Technological University SavithaRamasamyInstitute for Infocomm Research, Agency for Science, Technology and Research, Singapore 16082-16099 @@ -11509,7 +11509,7 @@ ZhiyuanZhu YushengLiaoShanghai Jiaotong University ZheChen - YuhaoWang + YuhaoWang YunfengGuanShanghai Jiaotong University YanfengWangShanghai Jiao Tong University YuWangShanghai Jiao Tong University @@ -11626,7 +11626,7 @@ Unmasking Style Sensitivity: A Causal Analysis of Bias Evaluation Instability in Large Language Models JiaxuZhao MengFangUniversity of Liverpool and Eindhoven University of Technology - KunZhangMohamed bin Zayed University of Artificial Intelligence and Carnegie Mellon University + KunZhangMohamed bin Zayed University of Artificial Intelligence and Carnegie Mellon University MykolaPechenizkiyEindhoven University of Technology 16314-16338 Natural language processing applications are increasingly prevalent, but social biases in their outputs remain a critical challenge. While various bias evaluation methods have been proposed, these assessments show unexpected instability when input texts undergo minor stylistic changes. This paper conducts a comprehensive analysis of how different style transformations impact bias evaluation results across multiple language models and bias types using causal inference techniques. Our findings reveal that formality transformations significantly affect bias scores, with informal style showing substantial bias reductions (up to 8.33% in LLaMA-2-13B). We identify appearance bias, sexual orientation bias, and religious bias as most susceptible to style changes, with variations exceeding 20%. Larger models demonstrate greater sensitivity to stylistic variations, with bias measurements fluctuating up to 3.1% more than in smaller models. These results highlight critical limitations in current bias evaluation methods and emphasize the need for reliable and fair assessments of language models. @@ -11637,7 +11637,7 @@ <fixed-case>M</fixed-case>ock<fixed-case>C</fixed-case>onf: A Student Interpretation Dataset: Analysis, Word- and Span-level Alignment and Baselines DávidJavorský, Charles University Prague - OndřejBojarCharles University Prague + OndřejBojarCharles University Prague FrançoisYvonISIR, Sorbonne Université & CNRS 16339-16356 In simultaneous interpreting, an interpreter renders the speech into another language with a very short lag, much sooner than sentences are finished. In order to understand and later reproduce this dynamic and complex task automatically, we need specialized datasets and tools for analysis, monitoring, and evaluation, such as parallel speech corpora, and tools for their automatic annotation. Existing parallel corpora of translated texts and associated alignment algorithms hardly fill this gap, as they fail to model long-range interactions between speech segments or specific types of divergences (e.g. shortening, simplification, functional generalization) between the original and interpreted speeches. In this work, we develop and explore MockConf, a student interpretation dataset that was collected from Mock Conferences run as part of the students’ curriculum. This dataset contains 7 hours of recordings in 5 European languages, transcribed and aligned at the level of spans and words. We further implement and release InterAlign, a modern web-based annotation tool for parallel word and span annotations on long inputs, suitable for aligning simultaneous interpreting. We propose metrics for the evaluation and a baseline for automatic alignment. Dataset and tools will be released to the community. @@ -11652,7 +11652,7 @@ MingyangWang ZifengDing HelmutSchmidCenter for Information and Language Processing - HinrichSchuetze + HinrichSchuetze 16357-16374 This paper introduces BMIKE-53, a comprehensive benchmark for cross-lingual in-context knowledge editing (IKE), spanning 53 languages and three KE datasets: zsRE, CounterFact, and WikiFactDiff. Cross-lingual KE, which requires knowledge edited in one language to generalize across diverse languages while preserving unrelated knowledge, remains underexplored. To address this, we systematically evaluate IKE under zero-shot, one-shot, and few-shot setups, including tailored metric-specific demonstrations. Our findings reveal that model scale and demonstration alignment critically govern cross-lingual editing efficacy, with larger models and tailored demonstrations significantly improving performance. Linguistic properties, particularly script type, strongly influence outcomes, with non-Latin languages underperforming due to issues like language confusion. 2025.acl-long.798 @@ -11685,7 +11685,7 @@ Enhancing Event-centric News Cluster Summarization via Data Sharpening and Localization Insights LongyinZhang BoweiZouA*STAR - AiTiAwI2R + AiTiAwI2R 16412-16426 This paper tackles the challenges of clustering news articles by main events (MEs) and summarizing these clusters, focusing on diverse languages and localized contexts. Our approach consists of four key contributions. First, we investigate the role of dynamic clustering and the integration of various ME references, including event attributions extracted by language models (LMs), in enhancing event-centric clustering. Second, we propose a data-sharpening framework that optimizes the balance between information volume and entropy in input texts, thereby optimizing generated summaries on multiple indicators. Third, we fine-tune LMs with local news articles for cross-lingual temporal question-answering and text summarization, achieving notable improvements in capturing localized contexts. Lastly, we present the first cross-lingual dataset and comprehensive evaluation metrics tailored for the event-centric news cluster summarization pipeline. Our findings enhance the understanding of news summarization across N-gram, event-level coverage, and faithfulness, providing new insights into leveraging LMs for large-scale cross-lingual and localized news analysis. 2025.acl-long.801 @@ -11734,7 +11734,7 @@ KangyangLuoTsinghua University, Tsinghua University ChenQianShanghai Jiaotong University FanchaoQi - BaobaoChangPeking University + BaobaoChangPeking University MaosongSunTsinghua University 16469-16488 Training LLMs on data containing unfamiliar knowledge during the instruction tuning stage can encourage hallucinations. To address this challenge, we introduce NOVA, a novel framework designed to identify high-quality data that aligns well with the LLM’s learned knowledge to reduce hallucinations. NOVA includes Internal Consistency Probing (ICP) and Semantic Equivalence Identification (SEI) to measure how familiar the LLM is with instruction data. Specifically, ICP evaluates the LLM’s understanding of the given instruction by calculating the tailored consistency among multiple self-generated responses. SEI further assesses the familiarity of the LLM with the target response by comparing it to the generated responses, using the proposed semantic clustering and well-designed voting strategy. Finally, to ensure the quality of selected samples, we introduce an expert-aligned reward model, considering characteristics beyond just familiarity. By considering data quality and avoiding unfamiliar data, we can utilize the selected data to effectively align LLMs to follow instructions and hallucinate less. Experiments show that NOVA significantly reduces hallucinations while maintaining a competitive ability to follow instructions. @@ -11764,7 +11764,7 @@ QianqianXie Christinede Kock SophiaAnaniadouUniversity of Manchester - EduardHovyUniversity of Melbourne and Carnegie Mellon University + EduardHovyUniversity of Melbourne and Carnegie Mellon University 16508-16523 Misinformation is prevalent in various fields such as education, politics, health, etc., causing significant harm to society. However, current methods for cross-domain misinformation detection rely on effort- and resource-intensive fine-tuning and complex model structures. With the outstanding performance of LLMs, many studies have employed them for misinformation detection. Unfortunately, they focus on in-domain tasks and do not incorporate significant sentiment and emotion features (which we jointly call affect). In this paper, we propose RAEmoLLM, the first retrieval augmented (RAG) LLMs framework to address cross-domain misinformation detection using in-context learning based on affective information. RAEmoLLM includes three modules. (1) In the index construction module, we apply an emotional LLM to obtain affective embeddings from all domains to construct a retrieval database. (2) The retrieval module uses the database to recommend top K examples (text-label pairs) from source domain data for target domain contents. (3) These examples are adopted as few-shot demonstrations for the inference module to process the target domain content. The RAEmoLLM can effectively enhance the general performance of LLMs in cross-domain misinformation detection tasks through affect-based retrieval, without fine-tuning. We evaluate our framework on three misinformation benchmarks. Results show that RAEmoLLM achieves significant improvements compared to the other few-shot methods on three datasets, with the highest increases of 15.64%, 31.18%, and 15.73% respectively. This project is available at https://github.com/lzw108/RAEmoLLM. 2025.acl-long.806 @@ -11813,7 +11813,7 @@ Elena SofiaRuzzettiUniversità degli Studi di Roma Tor Vergata Giancarlo A.XomperoUniversity of Rome Tor Vergata and Almawave SpA DavideVenditti - Fabio MassimoZanzottoUniversity of Rome Tor Vergata + Fabio MassimoZanzottoUniversity of Rome Tor Vergata 16572-16592 Large Language Models (LLMs) memorize, and thus, among huge amounts of uncontrolled data, may memorize Personally Identifiable Information (PII), which should not be stored and, consequently, not leaked. In this paper, we introduce Private Memorization Editing (PME), an approach for preventing private data leakage that turns an apparent limitation, that is, the LLMs’ memorization ability, into a powerful privacy defense strategy. While attacks against LLMs have been performed exploiting previous knowledge regarding their training data, our approach aims to exploit the same kind of knowledge in order to make a model more robust. We detect a memorized PII and then mitigate the memorization of PII by editing a model knowledge of its training data. We verify that our procedure does not affect the underlying language model while making it more robust against privacy Training Data Extraction attacks. We demonstrate that PME can effectively reduce the number of leaked PII in a number of configurations, in some cases even reducing the accuracy of the privacy attacks to zero. 2025.acl-long.810 @@ -11993,7 +11993,7 @@ YasuhiroFujiwaraNTT AyakaMatsumotoNTT Communications NarichikaNomotoNTT, The University of Tokyo - YoshihideSatoNTT + YoshihideSatoNTT 16831-16861 Large language models enhance collaborative task execution in multi-agent systems. Current studies break complex task into manageable tasks, but agents lack understanding of the overall task and how others approach their tasks, hindering synergy and integration.We propose a method called knowledgeable Agents to design and perform @@ -12006,7 +12006,7 @@ Logical forms complement probability in understanding language model (and human) performance YixuanWangUniversity of Chicago - FredaShiUniversity of Waterloo and Vector Institute + FredaShiUniversity of Waterloo and Vector Institute 16862-16877 With the increasing interest in using large language models (LLMs) for planning in natural language, understanding their behaviors becomes an important research question. This work conducts a systematic investigation of LLMs’ ability to perform logical reasoning in natural language. We introduce a controlled dataset of hypothetical and disjunctive syllogisms in propositional and modal logic and use it as the testbed for understanding LLM performance. Our results lead to novel insights in predicting LLM behaviors: in addition to the probability of input, logical forms should be considered as important factors. In addition, we show similarities and discrepancies between the logical reasoning performances of humans and LLMs by collecting and comparing behavioral data from both. 2025.acl-long.824 @@ -12023,7 +12023,7 @@ LeiHuang TingLiuHarbin Institute of Technology BingQinHarbin Institute of Technology - Tat-SengChuaNational University of Singapore + Tat-SengChuaNational University of Singapore 16878-16895 Large language models (LLMs) have demonstrated impressive instruction following capabilities, while still struggling to accurately manage the length of the generated text, which is a fundamental requirement in many real-world applications. Existing length control methods involve fine-tuning the parameters of LLMs, which is inefficient and suboptimal for practical use. In this paper, we propose a novel iterative sampling framework for text length control, integrating the Metropolis-Hastings algorithm with an importance sampling acceleration strategy. This framework efficiently and reliably regulates LLMs to generate length-constrained text without modifying the underlying parameters, thereby preserving the original capabilities of LLMs. Experimental results demonstrate that our framework achieves almost 100% success rates of length control on Llama3.1 for tasks such as length-controlled abstractive summarization and length-constrained instruction following, with minimal additional computational overhead. This also highlights the significant potential of our method for precise length control across a broader range of applications, without compromising the versatility of LLMs. 2025.acl-long.825 @@ -12053,7 +12053,7 @@ Global Eye: Breaking the “Fixed Thinking Pattern” during the Instruction Expansion Process WenxuanLu - WeiLiu + WeiLiu JianLuanXiaomi Corporation BinWangAI Lab, Xiaomi Inc. SonghaoJiang @@ -12103,7 +12103,7 @@ <fixed-case>E</fixed-case>vent<fixed-case>RAG</fixed-case>: Enhancing <fixed-case>LLM</fixed-case> Generation with Event Knowledge Graphs ZairunYang YilinWang - ZhengyanShi + ZhengyanShi YuanYaoZhejiang University LeiLiang KeyanDingZhejiang University @@ -12271,7 +12271,7 @@ YiweiWangUniversity of California, Merced JunfengFang PengliangJi - XueqiChengInstitute of Computing Technology, Chinese Academy + XueqiChengInstitute of Computing Technology, Chinese Academy 17198-17208 The knowledge within large language models (LLMs) may become outdated quickly. While in-context editing (ICE) is currently the most effective method for knowledge editing (KE), it is constrained by the black-box modeling of LLMs and thus lacks interpretability. Our work aims to elucidate the superior performance of ICE in KE by analyzing the impacts of in-context new knowledge on token-wise distributions. We observe that despite a significant boost in logits of the new knowledge, the performance of ICE is still hindered by stubborn knowledge. We propose a novel approach termed Decoding by Contrasting Knowledge (DeCK). DeCK derives the distribution of the next token by contrasting the logits obtained from the newly edited knowledge guided by ICE with those from the unedited parametric knowledge. Our experiments demonstrate that DeCK enhances the confidence of LLMs in edited facts. For instance, it improves the performance of LLaMA3-8B-instruct on MQuAKE by up to 219%, demonstrating its capability to strengthen ICE. DeCK can be easily integrated into any ICE method as a decoding component to enhance editing capabilities. 2025.acl-long.841 @@ -12295,7 +12295,7 @@ Improving Chain-of-Thought Reasoning via Quasi-Symbolic Abstractions LeonardoRanaldi MarcoValentinoUniversity of Sheffield - AndreFreitasIdiap Research Institute and University of Manchester + AndreFreitasIdiap Research Institute and University of Manchester 17222-17240 Chain-of-Though (CoT) represents a common strategy for reasoning in Large Language Models (LLMs) by decomposing complex tasks into intermediate inference steps. However, explanations generated via CoT are susceptible to content biases that negatively affect their robustness and faithfulness. To mitigate existing limitations, recent work has proposed using logical formalisms coupled with external symbolic solvers. However, fully symbolic approaches possess the bottleneck of requiring a complete translation from natural language to formal languages, a process that affects efficiency and flexibility. To achieve a trade-off, this paper investigates methods to disentangle content from logical reasoning without a complete formalisation. In particular, we present QuaSAR (for Quasi-Symbolic Abstract Reasoning), a variation of CoT that guides LLMs to operate at a higher level of abstraction via quasi-symbolic explanations. Our framework leverages the capability of LLMs to formalise only relevant variables and predicates, enabling the coexistence of symbolic elements with natural language. We show the impact of QuaSAR for in-context learning and for constructing demonstrations to improve the reasoning capabilities of smaller models. Our experiments show that quasi-symbolic abstractions can improve CoT-based methods by up to 8% accuracy, enhancing robustness and consistency on challenging adversarial variations on both natural language (i.e. MMLU-Redux) and symbolic reasoning tasks (i.e., GSM-Symbolic). 2025.acl-long.843 @@ -12380,7 +12380,7 @@ SeanPapayUniversity of Stuttgart YarikMenchaca Resendiz AswathyVelutharambathUniversity of Stuttgart, Universität Stuttgart - AmelieWuehrl + AmelieWuehrl SabineWeber RomanKlingerOtto-Friedrich Universität Bamberg 17331-17348 @@ -12430,7 +12430,7 @@ XinwangLiuNational University of Defense Technology KaiLu MinlieHuang - DongshengLiNational University of Defense Technology + DongshengLiNational University of Defense Technology 17391-17406 LLMs demonstrate remarkable utility but remain vulnerable to jailbreak attacks that aim to elicit harmful responses. Existing defenses, including post-training alignment and prompt engineering, rely on training on safety-annotated datasets and safe prompt templates, struggling with adaptability to out-of-distribution (OOD) attacks. Steering internal representations of LLMs provides real-time adjustments to defend against OOD attacks. However, it struggles with maintaining model utility, since modifying the representation disrupts the forward pass of inference. It barely considers the competitive objectives of helpfulness and harmlessness in LLMs. We argue that adversarial game-based approaches promise a solution for conflicts between the two objectives. In this paper, we propose **A**dversarial **G**ame **D**efense (AGD), an adversarial game-based defense method that dynamically adjusts LLMs’ internal representations to achieve a balanced trade-off between helpfulness and harmlessness. AGD first proposes an interquartile range (IQR) method to detect abnormal attention weights and correct the abnormal weights via adversarial training. AGD adopts a bi-level optimization to play a two-player variable-sum game to approach Nash Equilibrium (NE), where the two players adversarially refine head activations for helpfulness and harmlessness respectively. Furthermore, AGD applies an expert model to next-token sampling to generate safer responses. Experiments show that AGD significantly improves LLMs’ safety over all baselines. 2025.acl-long.851 @@ -12465,15 +12465,15 @@ An Expanded Massive Multilingual Dataset for High-Performance Language Technologies (<fixed-case>HPLT</fixed-case>) LaurieBurchellCommon Crawl Foundation OnaDe Gibert BonetUniversity of Helsinki - NikolayArefyevUniversity of Oslo + NikolayArefyevUniversity of Oslo MikkoAulamoUniversity of Helsinki MartaBañónPrompsit Language Engineering PinzhenChenUniversity of Edinburgh MariiaFedorova LianeGuillouAveni BarryHaddowUniversity of Edinburgh - JanHajičCharles University - JindřichHelclCharles University + JanHajičCharles University + JindřichHelclCharles University ErikHenrikssonUniversity of Turku MateuszKlimaszewskiWarsaw University of Technology VilleKomulainen @@ -12491,11 +12491,11 @@ ProyagPal JousiaPihaUniversity of Turku SampoPyysaloUniversity of Turku - GemaRamírez-SánchezUniversidad de Alicante + GemaRamírez-SánchezUniversidad de Alicante DavidSamuelUniversity of Oslo PavelStepachevUniversity of Edinburgh, University of Edinburgh - JörgTiedemannUniversity of Helsinki - DušanVarišCharles University Prague + JörgTiedemannUniversity of Helsinki + DušanVarišCharles University Prague TerezaVojtěchováCharles University Prague JaumeZaragoza-BernabeuPrompsit Language Engineering 17452-17485 @@ -12631,7 +12631,7 @@ JianfengLiPingan Technology ShaojunWangPAII Inc. JingXiaoPingan Group - DeyiXiongTianjin University + DeyiXiongTianjin University 17637-17659 Dialogue agents powered by Large Language Models (LLMs) show superior performance in various tasks. Despite the better user understanding and human-like responses, their **lack of controllability** remains a key challenge, often leading to unfocused conversations or task failure. To address this, we introduce Standard Operating Procedure (SOP) to regulate dialogue flow. Specifically, we propose **ChatSOP**, a novel SOP-guided Monte Carlo Tree Search (MCTS) planning framework designed to enhance the controllability of LLM-driven dialogue agents. To enable this, we curate a dataset comprising SOP-annotated multi-scenario dialogues, generated using a semi-automated role-playing system with GPT-4o and validated through strict manual quality control. Additionally, we propose a novel method that integrates Chain of Thought reasoning with supervised fine-tuning for SOP prediction and utilizes SOP-guided Monte Carlo Tree Search for optimal action planning during dialogues. Experimental results demonstrate the effectiveness of our method, such as achieving a 27.95% improvement in action accuracy compared to baseline models based on GPT-3.5 and also showing notable gains for open-source models. Dataset and codes are publicly available. 2025.acl-long.863 @@ -12680,7 +12680,7 @@ XinQuan MarcoValentinoUniversity of Sheffield Louise A.DennisUniversity of Manchester, University of Manchester - AndreFreitasIdiap Research Institute and University of Manchester + AndreFreitasIdiap Research Institute and University of Manchester 17734-17755 Natural language explanations play a fundamental role in Natural Language Inference (NLI) by revealing how premises logically entail hypotheses. Recent work has shown that the interaction of large language models (LLMs) with theorem provers (TPs) can help verify and improve the validity of NLI explanations. However, TPs require translating natural language into machine-verifiable formal representations, a process that introduces the risk of semantic information loss and unfaithful interpretation, an issue compounded by LLMs’ challenges in capturing critical logical structures with sufficient precision. Moreover, LLMs are still limited in their capacity for rigorous and robust proof construction within formal verification frameworks. To mitigate issues related to faithfulness and robustness, this paper investigates strategies to (1) alleviate semantic loss during autoformalisation, (2) efficiently identify and correct syntactic errors in logical representations, (3) explicitly use logical expressions to guide LLMs in generating structured proof sketches, and (4) increase LLMs’ capacity of interpreting TP’s feedback for iterative refinement. Our empirical results on e-SNLI, QASC and WorldTree using different LLMs demonstrate that the proposed strategies yield significant improvements in autoformalisation (+18.46%, +34.2%, +39.77%) and explanation refinement (+29.5%, +51.5%, +41.25%) over the state-of-the-art model. Moreover, we show that specific interventions on the hybrid LLM-TP architecture can substantially improve efficiency, drastically reducing the number of iterations required for successful verification. 2025.acl-long.867 @@ -12714,7 +12714,7 @@ From Human Reading to <fixed-case>NLM</fixed-case> Understanding: Evaluating the Role of Eye-Tracking Data in Encoder-Based Models - LucaDiniUniversity of Pisa + LucaDiniUniversity of Pisa LuciaDomenichelliConsiglio Nazionale delle Ricerche DominiqueBrunatoistituto di linguistica computazionale “A. Zampolli”, ILC-CNR, Pisa FeliceDell’OrlettaIstituto di Linguistica Computazionale “A. Zampolli” (ILC) @@ -12778,7 +12778,7 @@ JinyangGaoAlibaba Group BolinDingAlibaba Group HuaweiShenInstitute of Computing Technology, Chinese Academy of Sciences - XueqiChengInstitute of Computing Technology, Chinese Academy + XueqiChengInstitute of Computing Technology, Chinese Academy 17876-17891 Tool learning has emerged as a crucial capability for large language models (LLMs) to solve complex real-world tasks through interaction with external tools. Existing approaches face significant challenges, including reliance on hand-crafted prompts, difficulty in multi-step planning, and lack of precise error diagnosis and reflection mechanisms. We propose \texttt{ToolCoder}, a novel framework that reformulates tool learning as a code generation task. Inspired by software engineering principles, \texttt{ToolCoder} transforms natural language queries into structured Python function scaffold and systematically breaks down tasks with descriptive comments, enabling LLMs to leverage coding paradigms for complex reasoning and planning. It then generates and executes function implementations to obtain final responses. Additionally, \texttt{ToolCoder} stores successfully executed functions in a repository to promote code reuse, while leveraging error traceback mechanisms for systematic debugging, optimizing both execution efficiency and robustness. Experiments demonstrate that \texttt{ToolCoder} achieves superior performance in task completion accuracy and execution reliability compared to existing approaches, establishing the effectiveness of code-centric approaches in tool learning. 2025.acl-long.874 @@ -12829,7 +12829,7 @@ FandongMengWeChat AI, Tencent Inc. SongmingZhangBeijing Jiaotong University YufengChen - JinanXuBeijing Jiaotong University + JinanXuBeijing Jiaotong University JieZhou 17948-17963 Continually expanding new languages for existing large language models (LLMs) is a promising yet challenging approach to building powerful multilingual LLMs.The biggest challenge is to make the model continuously learn new languages while preserving the proficient ability of old languages.To achieve this, recent work utilizes the Mixture-of-Experts (MoE) architecture to expand new languages by adding new experts and avoid catastrophic forgetting of old languages by routing corresponding tokens to the original model backbone (old experts).Although intuitive, this kind of method is parameter-costly when expanding new languages and still inevitably impacts the performance of old languages.To address these limitations, we analyze the language characteristics of different layers in LLMs and propose a layer-wise expert allocation algorithm (LayerMoE) to determine the appropriate number of new experts for each layer.Specifically, we find different layers in LLMs exhibit different representation similarities between languages and then utilize the similarity as the indicator to allocate experts for each layer, i.e., the higher similarity, the fewer experts.Additionally, to further mitigate the forgetting of old languages, we add a classifier in front of the router network on the layers with higher similarity to guide the routing of old language tokens.Experimental results show that our method outperforms the previous state-of-the-art baseline with 60% fewer experts in the single-expansion setting and with 33.3% fewer experts in the lifelong-expansion setting, demonstrating the effectiveness of our method. @@ -13119,7 +13119,7 @@ <fixed-case>DNCASR</fixed-case>: End-to-End Training for Speaker-Attributed <fixed-case>ASR</fixed-case> XianruiZhengUniversity of Cambridge - ChaoZhangShanghai Artificial Intelligence Laboratory, Tsinghua University and University College London + ChaoZhangShanghai Artificial Intelligence Laboratory, Tsinghua University and University College London PhilWoodlandUniversity of Cambridge 18369-18383 This paper introduces DNCASR, a novel end-to-end trainable system designed for joint neural speaker clustering and automatic speech recognition (ASR), enabling speaker-attributed transcription of long multi-party meetings. DNCASR uses two separate encoders to independently encode global speaker characteristics and local waveform information, along with two linked decoders to generate speaker-attributed transcriptions. The use of linked decoders allows the entire system to be jointly trained under a unified loss function. By employing a serialised training approach, DNCASR effectively addresses overlapping speech in real-world meetings, where the link improves the prediction of speaker indices in overlapping segments. Experiments on the AMI-MDM meeting corpus demonstrate that the jointly trained DNCASR outperforms a parallel system that does not have links between the speaker and ASR decoders. Using cpWER to measure the speaker-attributed word error rate, DNCASR achieves a 9.0% relative reduction on the AMI-MDM Eval set. @@ -13147,7 +13147,7 @@ YuboMaSchool of Computer Science and Engineering, Nanyang Technological University MingzheDuNanyang Technological University and National University of Singapore RuiMao - Anh TuanLuuNanyang Technological University + Anh TuanLuuNanyang Technological University William YangWangUC Santa Barbara 18403-18419 Data contamination hinders fair LLM evaluation by introducing test data into newer models’ training sets. Existing studies solve this challenge by updating benchmarks with newly collected data. However, they fail to guarantee contamination-free evaluation as the newly collected data may contain pre-existing knowledge, and their benchmark updates rely on intensive human labor. To address these issues, we in this paper propose AntiLeak-Bench, an automated anti-leakage benchmarking framework. Instead of simply using newly collected data, we construct samples with explicitly new knowledge absent from LLMs’ training sets, which thus ensures strictly contamination-free evaluation. We further design a fully automated workflow to build and update our benchmark without human labor. This significantly reduces the cost of benchmark maintenance to accommodate emerging LLMs. Through extensive experiments, we highlight that data contamination likely exists before LLMs’ cutoff time and demonstrate that AntiLeak-Bench effectively overcomes this challenge. @@ -13265,7 +13265,7 @@ HuijieLv TaoGuiFudan University QiZhangFudan University - XuanjingHuangFudan University + XuanjingHuangFudan University 18530-18549 Data diversity is crucial for the instruction tuning of large language models. Existing studies have explored various diversity-aware data selection methods to construct high-quality datasets and enhance model performance. However, the fundamental problem of precisely defining and measuring data diversity remains underexplored, limiting clear guidance for data engineering. To address this, we systematically analyze 11 existing diversity measurement methods by evaluating their correlation with model performance through extensive fine-tuning experiments. Our results indicate that a reliable diversity measure should properly account for both inter-sample differences and the information density in the sample space. Building on this, we propose NovelSum, a new diversity metric based on sample-level “novelty.” Experiments on both simulated and real-world data show that NovelSum accurately captures diversity variations and achieves a 0.97 correlation with instruction-tuned model performance, highlighting its value in guiding data engineering practices. With NovelSum as an optimization objective, we further develop a greedy, diversity-oriented data selection strategy that outperforms existing approaches, validating both the effectiveness and practical significance of our metric. 2025.acl-long.908 @@ -13353,8 +13353,8 @@ Probing <fixed-case>LLM</fixed-case>s for Multilingual Discourse Generalization Through a Unified Label Set FlorianEichin - Yang JanetLiuLudwig-Maximilians-Universität München - BarbaraPlankLudwig-Maximilians-Universität München + Yang JanetLiuLudwig-Maximilians-Universität München + BarbaraPlankLudwig-Maximilians-Universität München Michael A.HedderichLudwig-Maximilians-Universität München 18665-18684 Discourse understanding is essential for many NLP tasks, yet most existing work remains constrained by framework-dependent discourse representations. This work investigates whether large language models (LLMs) capture discourse knowledge that generalizes across languages and frameworks. We address this question along two dimensions: (1) developing a unified discourse relation label set to facilitate cross-lingual and cross-framework discourse analysis, and (2) probing LLMs to assess whether they encode generalizable discourse abstractions. Using multilingual discourse relation classification as a testbed, we examine a comprehensive set of 23 LLMs of varying sizes and multilingual capabilities. Our results show that LLMs, especially those with multilingual training corpora, can generalize discourse information across languages and frameworks. Further layer-wise analyses reveal that language generalization at the discourse level is most salient in the intermediate layers. Lastly, our error analysis provides an account of challenging relation classes. @@ -13380,7 +13380,7 @@ VickyFeliren Bahrul IlmiNasution Manuel AntonioRufinoSamsung - Genta IndraWinataCapital One + Genta IndraWinataCapital One Rian AdamRajagedeUniversity of Central Florida and Universitas Islam Indonesia Carlos RafaelCatalanSamsung Research Mohamed Fazli MohamedImam @@ -13454,7 +13454,7 @@ RobertWijayaSingapore University of Technology and Design Onno P.Kampman RuochenZhangBrown University - Börje F.KarlssonBeijing Academy of Artificial Intelligence (BAAI) + Börje F.KarlssonBeijing Academy of Artificial Intelligence (BAAI) PeeratLimkonchotiwatAI Singapore 18685-18717 Despite Southeast Asia’s (SEA) extraordinary linguistic and cultural diversity, the region remains significantly underrepresented in vision-language (VL) research, resulting in AI models that inadequately capture SEA cultural nuances. To fill this gap, we present SEA-VL, an open-source initiative dedicated to developing culturally relevant high-quality datasets for SEA languages. By involving contributors from SEA countries, SEA-VL ensures better cultural relevance and diversity, fostering greater inclusivity of underrepresented languages and cultural depictions in VL research. Our methodology employed three approaches: community-driven crowdsourcing with SEA contributors, automated image crawling, and synthetic image generation. We evaluated each method’s effectiveness in capturing cultural relevance. We found that image crawling achieves approximately ~85% cultural relevance while being more cost- and time-efficient than crowdsourcing, whereas synthetic image generation failed to accurately reflect SEA cultural nuances and contexts. Collectively, we gathered 1.28 million SEA culturally relevant images, more than 50 times larger than other existing datasets. This work bridges the representation gap in SEA, establishes a foundation for developing culturally aware AI systems for this region, and provides a replicable framework for addressing representation gaps in other underrepresented regions. @@ -13495,20 +13495,20 @@ ShivalikaSinghCohere Labs AngelikaRomanou ClémentineFourrierHuggingFace - David IfeoluwaAdelaniMcGill University + David IfeoluwaAdelaniMcGill University Jian GangNguiNational University of Singapore DanielVila-SueroHugging Face PeeratLimkonchotiwatAI Singapore KellyMarchisioCohere and Cohere Wei QiLeongAI Singapore YosephineSusantoNational University of Singapore - RaymondNgNational University of Singapore + RaymondNgNational University of Singapore ShayneLongpre SebastianRuderFacebook Wei-YinKo AntoineBosselutSwiss Federal Institute of Technology Lausanne - AliceOhGoogle and Korea Advanced Institute of Science and Technology - AndreMartinsInstituto Superior Técnico and Unbabel + AliceOhGoogle and Korea Advanced Institute of Science and Technology + AndreMartinsInstituto Superior Técnico and Unbabel LeshemChoshenMassachusetts Institute of Technology and International Business Machines DaphneIppolitoCarnegie Mellon University EnzoFerranteCONICET / Universidad de Buenos Aires @@ -13525,7 +13525,7 @@ Improving Dialogue Discourse Parsing through Discourse-aware Utterance Clarification YaxinFan PeifengLiSoochow University, China - QiaomingZhuSoochow University + QiaomingZhuSoochow University 18800-18816 Dialogue discourse parsing aims to identify and analyze discourse relations between the utterances within dialogues. However, linguistic features in dialogues, such as omission and idiom, frequently introduce ambiguities that obscure the intended discourse relations, posing significant challenges for parsers. To address this issue, we propose a Discourse-aware Clarification Module (DCM) to enhance the performance of the dialogue discourse parser. DCM employs two distinct reasoning processes: clarification type reasoning and discourse goal reasoning. The former analyzes linguistic features, while the latter distinguishes the intended relation from the ambiguous one. Furthermore, we introduce Contribution-aware Preference Optimization (CPO) to mitigate the risk of erroneous clarifications, thereby reducing cascading errors. CPO enables the parser to assess the contributions of the clarifications from DCM and provide feedback to optimize the DCM, enhancing its adaptability and alignment with the parser’s requirements. Extensive experiments on the STAC and Molweni datasets demonstrate that our approach effectively resolves ambiguities and significantly outperforms the state-of-the-art (SOTA) baselines. 2025.acl-long.920 @@ -13549,7 +13549,7 @@ Words of Warmth: Trust and Sociability Norms for over 26k <fixed-case>E</fixed-case>nglish Words - Saif M.Mohammad + Saif M.Mohammad 18830-18850 Social psychologists have shown that Warmth (W) and Competence (C) are the primary dimensions along which we assess other people and groups. These dimensions impact various aspects of our lives from social competence and emotion regulation to success in the work place and how we view the world. More recent work has started to explore how these dimensions develop, why they have developed, and what they constitute. Of particular note, is the finding that warmth has two distinct components: Trust (T) and Sociability (S). In this work, we introduce Words of Warmth, the first large-scale repository of manually derived word–warmth (as well as word–trust and word–sociability) associations for over 26k English words. We show that the associations are highly reliable. We use the lexicons to study the rate at which children acquire WCTS words with age. Finally, we show that the lexicon enables a wide variety of bias and stereotype research through case studies on various target entities. Words of Warmth is freely available at: http://saifmohammad.com/warmth.html 2025.acl-long.922 @@ -13574,7 +13574,7 @@ SiyuanWangUniversity of Southern California ShengbinYue ZengfengHuangFudan University - XuanjingHuangFudan University + XuanjingHuangFudan University ZhongyuWeiFudan University 18874-18893 The reward model has become increasingly important in alignment, assessment, and data construction for large language models (LLMs). Most existing researchers focus on enhancing reward models through data improvements, following the conventional training framework for reward models that directly optimizes the predicted rewards.In this paper, we propose a hybrid alignment framework **HAF-RM** for reward model training by introducing an additional constraint on token-level policy probabilities in addition to the reward score. It can simultaneously supervise the internal preference model at the token level and optimize the mapping layer of the reward model at the sequence level.Experiment results on five datasets sufficiently show the validity and effectiveness of our proposed hybrid framework for training a high-quality reward model.By decoupling the reward modeling procedure and incorporating hybrid supervision, our **HAF-RM** framework offers a principled and effective approach to enhancing the performance and alignment of reward models, a critical component in the responsible development of powerful language models. We release our code at [https://haf-rm.github.io](https://haf-rm.github.io). @@ -13764,7 +13764,7 @@ XinyueShen YunShenFlexera MichaelBackesCISPA Helmholtz Center for Information Security - YangZhangCISPA Helmholtz Center for Information Security + YangZhangCISPA Helmholtz Center for Information Security 19096-19111 Knowledge files have been widely used in large language model (LLM)-powered agents, such as GPTs, to improve response quality. However, concerns over the potential leakage of knowledge files have grown significantly. Existing studies demonstrate that adversarial prompts can induce GPTs to leak knowledge file content. Yet, it remains uncertain whether additional leakage vectors exist, particularly given the complex data flows across clients, servers, and databases in GPTs. In this paper, we present a comprehensive risk assessment of knowledge file leakage, leveraging a novel workflow inspired by Data Security Posture Management (DSPM). Through the analysis of 651,022 GPT metadata, 11,820 flows, and 1,466 responses, we identify five leakage vectors: metadata, GPT initialization, retrieval, sandboxed execution environments, and prompts. These vectors enable adversaries to extract sensitive knowledge file data such as titles, content, types, and sizes. Notably, the activation of the built-in tool Code Interpreter leads to a privilege escalation vulnerability, enabling adversaries to directly download original knowledge files with a 95.95% success rate. Further analysis reveals that 28.80% of leaked files are copyrighted, including digital copies from major publishers and internal materials from a listed company. In the end, we provide actionable solutions for GPT builders and platform providers to secure the GPT data supply chain. 2025.acl-long.936 @@ -13777,8 +13777,8 @@ QianChen ShengpengJi YuXi - WenWang - ChongZhangAlibaba + WenWang + ChongZhangAlibaba XianghuYueTianjin University ShiLiangZhang HaizhouLiThe Chinese University of Hong Kong (Shenzhen); National University of Singapore and National University of Singapore @@ -13935,7 +13935,7 @@ Cramming 1568 Tokens into a Single Vector and Back Again: Exploring the Limits of Embedding Space Capacity - YuriKuratovAIRI, Artificial Intelligence Research Institute and Moscow Institute of Physics and Technology + YuriKuratovAIRI, Artificial Intelligence Research Institute and Moscow Institute of Physics and Technology MikhailArkhipov AydarBulatov MikhailBurtsevLondon Institute for Mathematical Sciences @@ -14076,7 +14076,7 @@ ShenghuaHePAII INC TianXia ShijiaPanUniversity of California, Merced - FeiLiuEmory University + FeiLiuEmory University 19497-19521 LLMs have immense potential for generating plans, transforming an initial world state into a desired goal state. A large body of research has explored the use of LLMs for various planning tasks, from web navigation to travel planning and database querying. However, many of these systems are tailored to specific problems, making it challenging to compare them or determine the best approach for new tasks. There is also a lack of clear and consistent evaluation criteria. Our survey aims to offer a comprehensive overview of current LLM planners to fill this gap. It builds on foundational work by Kartam and Wilkins (1990) and examines six key performance criteria: completeness, executability, optimality, representation, generalization, and efficiency. For each, we provide a thorough analysis of representative works and highlight their strengths and weaknesses. Our paper also identifies crucial future directions, making it a valuable resource for both practitioners and newcomers interested in leveraging LLM planning to support agentic workflows. 2025.acl-long.958 @@ -14114,7 +14114,7 @@ JianZhuUniversity of British Columbia FarhanSamirUniversity of British Columbia EleanorChodroffUniversity of Zurich - David R.MortensenCarnegie Mellon University + David R.MortensenCarnegie Mellon University 19568-19585 We present ZIPA, a family of efficient speech models that advances the state-of-the-art performance of crosslinguistic phone recognition. We first curated IPA PACK++, a large-scale multilingual speech corpus with 17,000+ hours of normalized phone transcriptions and a novel evaluation set capturing unseen languages and sociophonetic variation. ZIPA, including transducer (ZIPA-T) and CTC-based (ZIPA-CR) variants, leverages the efficient Zipformer backbones and outperforms existing phone recognition systems with much fewer parameters. Further scaling via noisy student training on 11,000+ hours of pseudo-labeled multilingual data yields further improvement. While ZIPA achieves strong performance on benchmarks, error analysis reveals persistent limitations in modeling sociophonetic diversity, underscoring challenges for future research. 2025.acl-long.961 @@ -14184,7 +14184,7 @@ ManuelBrack JindřichLibovický KristianKersting - AlexanderFraser + AlexanderFraser 19656-19679 Text-to-image (T2I) generation models have achieved great results in image quality, flexibility, and text alignment, leading to widespread use. Through improvements in multilingual abilities, a larger community can access this technology. Yet, we show that multilingual models suffer from substantial gender bias. Furthermore, the expectation that results should be similar across languages does not hold. We introduce MAGBIG, a controlled benchmark designed to study gender bias in multilingual T2I models, and use it to assess the impact of multilingualism on gender bias. To this end, we construct a set of multilingual prompts that offers a carefully controlled setting accounting for the complex grammatical differences influencing gender across languages. Our results show strong gender biases and notable language-specific differences across models. While we explore prompt engineering strategies to mitigate these biases, we find them largely ineffective and sometimes even detrimental to text-to-image alignment. Our analysis highlights the need for research on diverse language representations and greater control over bias in T2I models. 2025.acl-long.966 @@ -14224,7 +14224,7 @@ <fixed-case>CU</fixed-case>-<fixed-case>MAM</fixed-case>: Coherence-Driven Unified Macro-Structures for Argument Mining DebelaGemechu - ChrisReedUniversity of Dundee + ChrisReedUniversity of Dundee 19731-19749 Argument Mining (AM) involves the automatic identification of argument structure in natural language. Traditional AM methods rely on micro-structural features derived from the internal properties of individual Argumentative Discourse Units (ADUs). However, argument structure is shaped by a macro-structure capturing the functional interdependence among ADUs. This macro-structure consists of segments, where each segment contains ADUs that fulfill specific roles to maintain coherence within the segment (**local coherence**) and across segments (**global coherence**). This paper presents an approach that models macro-structure, capturing both local and global coherence to identify argument structures. Experiments on heterogeneous datasets demonstrate superior performance in both in-dataset and cross-dataset evaluations. The cross-dataset evaluation shows that macro-structure enhances transferability to unseen datasets. 2025.acl-long.969 @@ -14266,7 +14266,7 @@ TongZhang BojieHuBeijing Jiaotong University YufengChen - JinanXuBeijing Jiaotong University + JinanXuBeijing Jiaotong University 19791-19807 In modern large language models (LLMs), LLM alignment is of crucial importance and is typically achieved through methods such as reinforcement learning from human feedback (RLHF) and direct preference optimization (DPO). However, in most existing methods for LLM alignment, all tokens in the response are optimized using a sparse, response-level reward or preference annotation. The ignorance of token-level rewards may erroneously punish high-quality tokens or encourage low-quality tokens, resulting in suboptimal performance and slow convergence speed. To address this issue, we propose AlignDistil, a RLHF-equivalent distillation method for token-level reward optimization. Specifically, we introduce the reward learned by DPO into the RLHF objective and theoretically prove the equivalence between this objective and a token-level distillation process, where the teacher distribution linearly combines the logits from the DPO model and a reference model. On this basis, we further bridge the accuracy gap between the reward from the DPO model and the pure reward model, by building a contrastive DPO reward with a normal and a reverse DPO model. Moreover, to avoid under- and over-optimization on different tokens, we design a token adaptive logit extrapolation mechanism to construct an appropriate teacher distribution for each token. Experimental results demonstrate the superiority of our AlignDistil over existing methods and showcase fast convergence due to its token-level distributional reward optimization. @@ -14332,7 +14332,7 @@ QinglinZhangAlibaba Group QianChen XihaoLiUniversity of North Carolina at Chapel Hill - WenWang + WenWang 19914-19928 Speaker diarization aims to segment an audio stream into homogeneous partitions based on speaker identity, playing a crucial role in speech comprehension and analysis. Mainstream speaker diarization systems rely only on acoustic information, making the task particularly challenging in complex acoustic environments in real-world applications. Recently, significant efforts have been devoted to audio-visual or audio-semantic multimodal modeling to enhance speaker diarization performance; however, these approaches still struggle to address the complexities of speaker diarization on spontaneous and unstructured multi-party conversations. To fully exploit meaningful dialogue patterns, we propose a novel multimodal approach that jointly utilizes audio, visual, and semantic cues to enhance speaker diarization. Our approach structures visual cues among active speakers and semantic cues in spoken content into a cohesive format known as pairwise constraints, and employs a semi-supervised clustering technique based on pairwise constrained propagation. Extensive experiments conducted on multiple multimodal datasets demonstrate that our approach effectively integrates audio-visual-semantic information into the clustering process for acoustic speaker embeddings and consistently outperforms state-of-the-art speaker diarization methods, while largely preserving the overall system framework. 2025.acl-long.977 @@ -14441,7 +14441,7 @@ Modeling the Evolution of <fixed-case>E</fixed-case>nglish Noun Compounds with Feature-Rich Diachronic Compositionality Prediction FilipMiletić - SabineSchulte im Walde + SabineSchulte im Walde 20071-20092 We analyze the evolution of English noun compounds, which we represent as vectors of time-specific values. We implement a wide array of methods to create a rich set of features, using them to classify compounds for present-day compositionality and to assess the informativeness of the corresponding linguistic patterns. Our best results use BERT – reflecting the similarity of compounds and sentence contexts – and we further capture relevant and complementary information across approaches. Leveraging these feature differences, we find that the development of low-compositional meanings is reflected by a parallel drop in compositionality and sustained semantic change. The same distinction is echoed in transformer processing: compositionality estimates require far less contextualization than semantic change estimates. 2025.acl-long.984 @@ -14455,7 +14455,7 @@ RaoyuanZhaoLudwig-Maximilians-Universität München FlorianEichin JonasFischerSaarland Informatics Campus, Max-Planck Institute - BarbaraPlankLudwig-Maximilians-Universität München + BarbaraPlankLudwig-Maximilians-Universität München 20093-20123 Prompt engineering for large language models is challenging, as even small prompt perturbations or model changes can significantly impact the generated output texts. Existing evaluation methods of LLM outputs, either automated metrics or human evaluation, have limitations, such as providing limited insights or being labor-intensive. We propose Spotlight, a new approach that combines both automation and human analysis. Based on data mining techniques, we automatically distinguish between random (decoding) variations and systematic differences in language model outputs. This process provides token patterns that describe the systematic differences and guide the user in manually analyzing the effects of their prompts and changes in models efficiently. We create three benchmarks to quantitatively test the reliability of token pattern extraction methods and demonstrate that our approach provides new insights into established prompt data. From a human-centric perspective, through demonstration studies and a user study, we show that our token pattern approach helps users understand the systematic differences of language model outputs. We are further able to discover relevant differences caused by prompt and model changes (e.g. related to gender or culture), thus supporting the prompt engineering process and human-centric model behavior research. 2025.acl-long.985 @@ -14499,7 +14499,7 @@ Improving Language and Modality Transfer in Translation by Character-level Modeling IoannisTsiamasFacebook and Universidad Politécnica de Cataluna DavidDaleFAIR at Meta - Marta R.Costa-jussàMeta + Marta R.Costa-jussàMeta 20171-20187 Current translation systems, despite being highly multilingual, cover only 5% of the world’s languages. Expanding language coverage to the long-tail of low-resource languages requires data-efficient methods that rely on cross-lingual and cross-modal knowledge transfer. To this end, we propose a character-based approach to improve adaptability to new languages and modalities. Our method leverages SONAR, a multilingual fixed-size embedding space with different modules for encoding and decoding. We use a teacher-student approach with parallel translation data to obtain a character-level encoder. Then, using ASR data, we train a lightweight adapter to connect a massively multilingual CTC ASR model (MMS), to the character-level encoder, potentially enabling speech translation from 1,000+ languages. Experimental results in text translation for 75 languages on FLORES+ demonstrate that our character-based approach can achieve better language transfer than traditional subword-based models, especially outperforming them in low-resource settings, and demonstrating better zero-shot generalizability to unseen languages. Our speech adaptation, maximizing knowledge transfer from the text modality, achieves state-of-the-art results in speech-to-text translation on the FLEURS benchmark on 33 languages, surpassing previous supervised and cascade models, albeit being a zero-shot model with minimal supervision from ASR data. 2025.acl-long.988 @@ -14511,7 +14511,7 @@ NiyatiBafnaJohns Hopkins University EmilyChang Nathaniel RomneyRobinsonDepartment of Computer Science, Whiting School of Engineering - David R.MortensenCarnegie Mellon University + David R.MortensenCarnegie Mellon University KentonMurrayJohns Hopkins University DavidYarowskyJohns Hopkins University HaleSirinJohns Hopkins University @@ -14527,7 +14527,7 @@ JulianKatz-SamuelsAmazon Adithya MDevraj HyokunYunAmazon - ChaoZhangGeorgia Institute of Technology + ChaoZhangGeorgia Institute of Technology YiXuAmazon YiPanAmazon BingYinAmazon @@ -14650,7 +14650,7 @@ JiaruiLiu AndyLiu XuhuiZhou - Mona T.DiabCarnegie Mellon University + Mona T.DiabCarnegie Mellon University MaartenSapCarnegie Mellon University 20434-20471 In this work, we tackle the challenge of embedding realistic human personality traits into LLMs. Previous approaches have primarily focused on prompt-based methods that describe the behavior associated with the desired personality traits, suffering from realism and validity issues. To address these limitations, we introduce BIG5-CHAT, a large-scale dataset containing 100,000 dialogues designed to ground models in how humans express their personality in text. Leveraging this dataset, we explore Supervised Fine-Tuning and Direct Preference Optimization as training-based methods to align LLMs more naturally with human personality patterns. Our methods outperform prompting on personality assessments such as BFI and IPIP-NEO, with trait correlations more closely matching human data. Furthermore, our experiments reveal that models trained to exhibit higher conscientiousness, higher agreeableness, lower extraversion, and lower neuroticism display better performance on reasoning tasks, aligning with psychological findings on how these traits impact human cognitive performance. To our knowledge, this work is the first comprehensive study to demonstrate how training-based methods can shape LLM personalities through learning from real human behaviors. @@ -14850,7 +14850,7 @@ <fixed-case>QQSUM</fixed-case>: A Novel Task and Model of Quantitative Query-Focused Summarization for Review-based Product Question Answering An QuangTang - XiuzhenZhangRoyal Melbourne Institute of Technology + XiuzhenZhangRoyal Melbourne Institute of Technology Minh NgocDinh ZhuangLiRoyal Melbourne Institute of Technology 20810-20831 @@ -14877,7 +14877,7 @@ VidyaSrinivas XuhaiXuColumbia University XinLiuGoogle - KumarAyushGoogle + KumarAyushGoogle IsaacGalatzer-Levy ShwetakPatelGoogle and University of Washington DanielMcDuffGoogle @@ -14938,7 +14938,7 @@ Consistent Client Simulation for Motivational Interviewing-based Counseling YizheYang PalakornAchananuparpSingapore Management University - HeyanHuangBeijing Institute of Technology + HeyanHuangBeijing Institute of Technology JingJiangAustralian National University and Singapore Management University Nicholas GabrielLimSingapore University of Social Sciences Cameron Tan ShiErn @@ -14987,7 +14987,7 @@ <fixed-case>CAMI</fixed-case>: A Counselor Agent Supporting Motivational Interviewing through State Inference and Topic Exploration YizheYang PalakornAchananuparpSingapore Management University - HeyanHuangBeijing Institute of Technology + HeyanHuangBeijing Institute of Technology JingJiangAustralian National University and Singapore Management University Phey LingKit Nicholas GabrielLimSingapore University of Social Sciences @@ -15017,7 +15017,7 @@ Targeted Syntactic Evaluation for Grammatical Error Correction AomiKoyamaTokyo Metropolitan University and KDDI Corporation MasatoMitaThe University of Tokyo - Su-YounYoonEduLab + Su-YounYoonEduLab YasufumiTakamaTokyo Metropolitan University MamoruKomachiHitotsubashi University 21108-21125 @@ -15142,7 +15142,7 @@ <fixed-case>LLM</fixed-case> Meets Scene Graph: Can Large Language Models Understand and Generate Scene Graphs? A Benchmark and Empirical Study DongilYang MinjinKimYonsei University - SunghwanKim + SunghwanKim Beong-wooKwakYonsei University MinjunPark JinseokHong @@ -15177,7 +15177,7 @@ WeipengJiang QianWangWuhan University ChaoShenXi’an Jiaotong University - YangLiuNanyang Technological University + YangLiuNanyang Technological University 21376-21403 Large Language Models (LLMs) have emerged as the new recommendation engines, surpassing traditional methods in both capability and scope, particularly in code generation. In this paper, we reveal a novel **provider bias** in LLMs: without explicit directives, these models show systematic preferences for services from specific providers in their recommendations (e.g., favoring Google Cloud over Microsoft Azure). To systematically investigate this bias, we develop an automated pipeline to construct the dataset, incorporating 6 distinct coding task categories and 30 real-world application scenarios. Leveraging this dataset, we conduct the **first** comprehensive empirical study of provider bias in LLM code generation across seven state-of-the-art LLMs, utilizing approximately 500 million tokens (equivalent to $5,000+ in computational costs). Our findings reveal that LLMs exhibit significant provider preferences, predominantly favoring services from Google and Amazon, and can autonomously modify input code to incorporate their preferred providers without users’ requests. Such a bias holds far-reaching implications for market dynamics and societal equilibrium, potentially contributing to digital monopolies. It may also deceive users and violate their expectations, leading to various consequences. We call on the academic community to recognize this emerging issue and develop effective evaluation and mitigation methods to uphold AI security and fairness. 2025.acl-long.1038 @@ -15211,7 +15211,7 @@ Neuron Empirical Gradient: Discovering and Quantifying Neurons’ Global Linear Controllability - XinZhao + XinZhao ZehuiJiang NaokiYoshinagaInstitute of Industrial Science, the University of Tokyo 21446-21477 @@ -15271,7 +15271,7 @@ ZiqingYangCISPA Helmholtz Center for Information Security XinyueShen MichaelBackesCISPA Helmholtz Center for Information Security - YangZhangCISPA Helmholtz Center for Information Security + YangZhangCISPA Helmholtz Center for Information Security 21538-21566 Jailbreak attacks aim to bypass the LLMs’ safeguards. While researchers have proposed different jailbreak attacks in depth, they have done so in isolation—either with unaligned settings or comparing a limited range of methods. To fill this gap, we present a large-scale evaluation of various jailbreak attacks. We collect 17 representative jailbreak attacks, summarize their features, and establish a novel jailbreak attack taxonomy. Then we conduct comprehensive measurement and ablation studies across nine aligned LLMs on 160 forbidden questions from 16 violation categories. Also, we test jailbreak attacks under eight advanced defenses. Based on our taxonomy and experiments, we identify some important patterns, such as heuristic-based attacks, which could achieve high attack success rates but are easy to mitigate by defenses. Our study offers valuable insights for future research on jailbreak attacks and defenses and serves as a benchmark tool for researchers and practitioners to evaluate them effectively. 2025.acl-long.1045 @@ -15310,7 +15310,7 @@ Enhancing Mathematical Reasoning in <fixed-case>LLM</fixed-case>s by Stepwise Correction ZhenyuWuXi’an Jiaotong University QingkaiZengAmazon - ZhihanZhang + ZhihanZhang ZhaoxuanTanUniversity of Notre Dame ChaoShenXi’an Jiaotong University MengJiangUniversity of Notre Dame @@ -15335,7 +15335,7 @@ DidiZhangSoochow University YaxinFan PeifengLiSoochow University, China - QiaomingZhuSoochow University + QiaomingZhuSoochow University 21656-21672 Goal-oriented proactive dialogue systems are designed to guide user conversations seamlessly towards specific objectives by planning a goal-oriented path. However, previous research has focused predominantly on optimizing these paths while neglecting the inconsistencies that may arise between generated responses and dialogue contexts, including user profiles, dialogue history, domain knowledge, and subgoals. To address this issue, we introduce a model-agnostic two-stage Consistency Reflection and Correction (CRC) framework. Specifically, in the consistency reflection stage, the model is prompted to reflect on the discrepancies between generated responses and dialogue contexts, identifying inconsistencies and suggesting possible corrections. In the consistency correction stage, the model generates responses that are more consistent with the dialogue context based on these reflection results. We conducted experiments on various model architectures with different parameter sizes, including encoder-decoder models (BART, T5) and decoder-only models (GPT-2, DialoGPT, Phi3, Mistral and LLaMA3), and the experimental results on three datasets demonstrate that our CRC framework significantly improves the consistency between generated responses and dialogue contexts. 2025.acl-long.1050 @@ -15466,7 +15466,7 @@ Typology-Guided Adaptation in Multilingual Models - NdapaNakasholeUniversity of California, San Diego + NdapaNakasholeUniversity of California, San Diego 21819-21835 Multilingual models often treat language diversity as a problem of data imbalance, overlooking structural variation. We introduce the *Morphological Index* (MoI), a typologically grounded metric that quantifies how strongly a language relies on surface morphology for noun classification. Building on MoI, we propose *MoI-MoE*, a Mixture of Experts model that routes inputs based on morphological structure. Evaluated on 10 Bantu languages—a large, morphologically rich and underrepresented family—MoI-MoE outperforms strong baselines, improving Swahili accuracy by 14 points on noun class recognition while maintaining performance on morphology-rich languages like Zulu. These findings highlight typological structure as a practical and interpretable signal for multilingual model adaptation. 2025.acl-long.1059 @@ -15506,7 +15506,7 @@ ZhishangXiang YilinXiao LeWang - JunhuiLiSoochow University, China + JunhuiLiSoochow University, China XinrunWangSingapore Management University JinsongSuXiamen University 21863-21882 @@ -15521,7 +15521,7 @@ HuanZhaoHunan University ZhixueZhaoUniversity of Sheffield, University of Sheffield XupengZhaHunan University - YangLiuHunan University + YangLiuHunan University ZhihuaJiang 21883-21896 We revisit knowledge-based visual reasoning (KB-VR) in light of modern advances in multimodal large language models (MLLMs), and make the following contributions: (i) We propose Visual Knowledge Card (VKC) – a novel image that incorporates not only internal visual knowledge (e.g., scene-aware information) detected from the raw image, but also external world knowledge (e.g., attribute or object knowledge) produced by a knowledge generator; (ii) We present VKC-based Multi-Image Reasoning (VKC-MIR) – a four-stage pipeline which harnesses a state-of-the-art scene perception engine to construct an initial VKC (Stage-1), a powerful LLM to generate relevant domain knowledge (Stage-2), an excellent image editing toolkit to introduce generated knowledge into the updated VKC (Stage-3), and finally, an emerging multi-image MLLM to solve the VKC-enhanced task (Stage-4). By performing experiments on three popular KB-VR benchmarks, our approach achieves new state-of-the-art results compared to previous top-performing models. @@ -15830,7 +15830,7 @@ <fixed-case>R</fixed-case>i<fixed-case>OT</fixed-case>: Efficient Prompt Refinement with Residual Optimization Tree ChenyiZhou - ZhengyanShi + ZhengyanShi YuanYaoZhejiang University LeiLiang HuajunChenZhejiang University @@ -15862,7 +15862,7 @@ Zi-AoMa TianLan YuehaoZhao - HeyanHuangBeijing Institute of Technology + HeyanHuangBeijing Institute of Technology Xian-LingMaoBeijing Institute of Technology 22340-22361 Driven by the remarkable progress in diffusion models, text-to-image generation has achieved substantial advancements, underscoring the urgent need for robust automatic quality assessment. This task is inherently complex, requiring evaluations that range from object presence and attribute correctness to relational consistency and visual fidelity. Consequently, current state-of-the-art MLLM-based approaches often rely on powerful commercial models such as GPT-4o, which offer superior reasoning and instruction-following capabilities but are not universally accessible. In contrast, while open-source MLLMs demonstrate promising skills in vision and language understanding, they underperform in comprehensive image quality assessment.To address these challenges, we propose a task decomposition evaluation framework based on GPT-4o to automatically construct a specialized training dataset, breaking down the multifaceted evaluation process into simpler sub-tasks and thus reducing learning complexity. Building on this dataset, we design novel training strategies to distill GPT-4o’s evaluation capabilities into a 7\text{B} open-source MLLM, MiniCPM-V-2.6, enabling it to better follow instructions across diverse assessment criteria. Furthermore, to reliably and comprehensively assess prior works and our proposed model, we manually annotate a meta-evaluation benchmark that includes chain-of-thought explanations alongside quality scores for generated images.Experimental results demonstrate that our distilled open-source MLLM significantly outperforms the current state-of-the-art GPT-4o-base baseline, VIEScore, with over 4.6% improvement in Spearman and Kendall correlations with human judgments. @@ -15998,7 +15998,7 @@ Towards the Law of Capacity Gap in Distilling Language Models - ChenZhangBeijing Institute of Technology + ChenZhangBeijing Institute of Technology QiuchiLi DaweiSongBeijing Institute of Technology and Open University ZheyuYeXiaohongshu Inc @@ -16020,12 +16020,12 @@ AkshayRaghavan ScottFeltman WhitneyRingwald - Ryan L.Boyd + Ryan L.Boyd BenjaminLuft CamiloRuggero NevilleRyant RomanKotov - H. AndrewSchwartz + H. AndrewSchwartz 22529-22544 Current speech encoding pipelines often rely on an additional text-based LM to get robust representations of human communication, even though SotA speech-to-text models often have a LM within. This work proposes an approach to improve the LM within an audio model such that the subsequent text-LM is unnecessary. We introduce **WhiSPA** (**Whi**sper with **S**emantic and **P**sychological **A**lignment), which leverages a novel audio training objective: contrastive loss with a language model embedding as a teacher. Using over 500k speech segments from mental health audio interviews, we evaluate the utility of aligning Whisper’s latent space with semantic representations from a text autoencoder (SBERT) and lexically derived embeddings of basic psychological dimensions: emotion and personality. Over self-supervised affective tasks and downstream psychological tasks, WhiSPA surpasses current speech encoders, achieving an average error reduction of 73.4% and 83.8%, respectively. WhiSPA demonstrates that it is not always necessary to run a subsequent text LM on speech-to-text output in order to get a rich psychological representation of human communication. 2025.acl-long.1098 @@ -16082,7 +16082,7 @@ YaoDuBeihang University TaoJi JianingWangMeituan - YangLiu + YangLiu YuanbinWu AiminZhouEast China Normal University MengdiZhang @@ -16175,7 +16175,7 @@ <fixed-case>XDAC</fixed-case>: <fixed-case>XAI</fixed-case>-Driven Detection and Attribution of <fixed-case>LLM</fixed-case>-Generated News Comments in <fixed-case>K</fixed-case>orean WooyoungGo HyoungshickKimSungkyunkwan University - AliceOhGoogle and Korea Advanced Institute of Science and Technology + AliceOhGoogle and Korea Advanced Institute of Science and Technology YongdaeKimKorea Advanced Institute of Science & Technology 22728-22750 Large language models (LLMs) generate human-like text, raising concerns about their misuse in creating deceptive content. Detecting LLM-generated comments (LGC) in online news is essential for preserving online discourse integrity and preventing opinion manipulation. However, effective detection faces two key challenges; the brevity and informality of news comments limit traditional methods, and the absence of a publicly available LGC dataset hinders model training, especially for languages other than English. To address these challenges, we propose a twofold approach. First, we develop an LGC generation framework to construct a high-quality dataset with diverse and complex examples. Second, we introduce XDAC (\textbf{X}AI-Driven \textbf{D}etection and \textbf{A}ttribution of LLM-Generated \textbf{C}omments), a framework utilizing explainable AI, designed for the detection and attribution of short-form LGC in Korean news articles. XDAC leverages XAI to uncover distinguishing linguistic patterns at both token and character levels. We present the first large-scale benchmark dataset, comprising 1.3M human-written comments from Korean news platforms and 1M LLM-generated comments from 14 distinct models. XDAC outperforms existing methods, achieving a 98.5% F1 score in LGC detection with a relative improvement of 68.1%, and an 84.3% F1 score in attribution. To validate real-world applicability, we analyze 5.24M news comments from Naver, South Korea’s leading online news platform, identifying 27,029 potential LLM-generated comments. @@ -16344,7 +16344,7 @@ ZiyiZhang YuleLiu MichaelBackesCISPA Helmholtz Center for Information Security - YangZhangCISPA Helmholtz Center for Information Security + YangZhangCISPA Helmholtz Center for Information Security XinleiHeThe Hong Kong University of Science and Technology 22975-23005 Social media platforms are experiencing a growing presence of AI-Generated Texts (AIGTs). However, the misuse of AIGTs could have profound implications for public opinion, such as spreading misinformation and manipulating narratives. Despite its importance, it remains unclear how prevalent AIGTs are on social media. To address this gap, this paper aims to quantify and monitor the AIGTs on online social media platforms. We first collect a dataset (SM-D) with around 2.4M posts from 3 major social media platforms: Medium, Quora, and Reddit. Then, we construct a diverse dataset (AIGTBench) to train and evaluate AIGT detectors. AIGTBench combines popular open-source datasets and our AIGT datasets generated from social media texts by 12 LLMs, serving as a benchmark for evaluating mainstream detectors. With this setup, we identify the best-performing detector (OSM-Det). We then apply OSM-Det to SM-D to track AIGTs across social media platforms from January 2022 to October 2024, using the AI Attribution Rate (AAR) as the metric. Specifically, Medium and Quora exhibit marked increases in AAR, rising from 1.77% to 37.03% and 2.06% to 38.95%, respectively. In contrast, Reddit shows slower growth, with AAR increasing from 1.31% to 2.45% over the same period. Our further analysis indicates that AIGTs on social media differ from human-written texts across several dimensions, including linguistic patterns, topic distributions, engagement levels, and the follower distribution of authors. We envision our analysis and findings on AIGTs in social media can shed light on future research in this domain. @@ -16381,7 +16381,7 @@ AngLv JianLuanXiaomi Corporation BinWangAI Lab, Xiaomi Inc. - WeiLiu + WeiLiu 23044-23056 Many positional encodings (PEs) are designed to exhibit long-term decay, based on an entrenched and long-standing inductive opinion: tokens farther away from the current position carry less relevant information. We argue that long-term decay is outdated in the era of LLMs, as LLMs are now applied to tasks demanding precise retrieval of in-context information from arbitrary positions. Firstly, we present empirical analyses on various PEs, demonstrating that models inherently learn attention with only a local-decay pattern while forming a U-shape pattern globally, contradicting the principle of long-term decay. Furthermore, we conduct a detailed analysis of rotary position encoding (RoPE, a prevalent relative positional encoding in LLMs), and found that the U-shape attention is caused by some learned components, which are also the key factor limiting RoPE’s expressiveness and extrapolation. Inspired by these insights, we propose High-frequency rotary Position Encoding (HoPE). HoPE replaces the specific components in RoPE with position-independent ones, retaining only high-frequency signals, which also breaks the principle of long-term decay in theory. HoPE achieves two major advantages: (1) Without constraints imposed by long-term decay, contradictory factors that limit attention optimization are removed. Thus, the model’s context awareness is enhanced. (2) HoPE exhibits greater robustness to the out-of-distribution behavior in attention patterns during extrapolation. The effectiveness of HoPE is validated through extensive experiments and with a large language model of up to 3 billion parameters. 2025.acl-long.1123 @@ -16541,7 +16541,7 @@ Employing Discourse Coherence Enhancement to Improve Cross-Document Event and Entity Coreference Resolution XinyuChen PeifengLiSoochow University, China - QiaomingZhuSoochow University + QiaomingZhuSoochow University 23272-23286 Cross-Document Coreference Resolution (CDCR) aims to identify and group together mentions of a specific event or entity that occur across multiple documents. In contrast to the within-document tasks, in which event and entity mentions are linked by rich and coherent contexts, cross-document mentions lack such critical contexts, which presents a significant challenge in establishing connections among them. To address this issue, we introduce a novel task Cross-Document Discourse Coherence Enhancement (CD-DCE) to enhance the discourse coherence between two cross-document event or entity mentions. Specifically, CD-DCE first selects coherent texts and then adds them between two cross-document mentions to form a new coherent document. Subsequently, the coherent text is employed to represent the event or entity mentions and to resolve any coreferent mentions. Experimental results on the three popular datasets demonstrate that our proposed method outperforms several state-of-the-art baselines. 2025.acl-long.1134 @@ -16643,7 +16643,7 @@ Tianyi AlexQiu BoyuanChen JiayiZhouPeking University - ChangyeLi + ChangyeLi HantaoLou JosefDaiPeking University YunhuaiLiuPeking University @@ -16771,7 +16771,7 @@ AnZhangNational University of Singapore YanyanZhaoHarbin Institute of Technology BingQinHarbin Institute of Technology - Tat-SengChuaNational University of Singapore + Tat-SengChuaNational University of Singapore TingLiuHarbin Institute of Technology 23564-23587 Large language models (LLMs) have become increasingly central to AI applications worldwide, necessitating robust multilingual safety alignment to ensure secure deployment across diverse linguistic contexts. Existing preference learning methods for safety alignment, such as RLHF and DPO, are primarily monolingual and struggle with noisy multilingual data. To address these limitations, we introduce Multilingual reward gaP Optimization (MPO), a novel approach that leverages the well-aligned safety capabilities of the dominant language (e.g., English) to improve safety alignment across multiple languages. MPO directly minimizes the reward gap difference between the dominant language and target languages, effectively transferring safety capabilities while preserving the original strengths of the dominant language. Extensive experiments on three LLMs, LLaMA-3.1, Gemma-2 and Qwen2.5, validate MPO’s efficacy in multilingual safety alignment without degrading general multilingual utility. @@ -16790,7 +16790,7 @@ YuTsaoAcademia Sinica JunichiYamagishiNational Institute of Informatics YuxuanWangByteDance - ChaoZhangShanghai Artificial Intelligence Laboratory, Tsinghua University and University College London + ChaoZhangShanghai Artificial Intelligence Laboratory, Tsinghua University and University College London 23588-23609 This paper explores a novel perspective to speech quality assessment by leveraging natural language descriptions, offering richer, more nuanced insights than traditional numerical scoring methods. Natural language feedback provides instructive recommendations and detailed evaluations, yet existing datasets lack the comprehensive annotations needed for this approach. To bridge this gap, we introduce QualiSpeech, a comprehensive low-level speech quality assessment dataset encompassing 11 key aspects and detailed natural language comments that include reasoning and contextual insights. Additionally, we propose the QualiSpeech Benchmark to evaluate the low-level speech understanding capabilities of auditory large language models (LLMs). Experimental results demonstrate that finetuned auditory LLMs can reliably generate detailed descriptions of noise and distortion, effectively identifying their types and temporal characteristics. The results further highlight the potential for incorporating reasoning to enhance the accuracy and reliability of quality assessments. The dataset can be found at https://huggingface.co/datasets/tsinghua-ee/QualiSpeech. 2025.acl-long.1150 @@ -16801,7 +16801,7 @@ On the Relation Between Fine-Tuning, Topological Properties, and Task Performance in Sense-Enhanced Embeddings Deniz EkinYavas TimothéeBernardUniversité Paris Cité - BenoitCrabbéUniversité de Paris + BenoitCrabbéUniversité de Paris LauraKallmeyerHeinrich Heine University Düsseldorf, Germany 23610-23625 Topological properties of embeddings, such as isotropy and uniformity, are closely linked to their expressiveness, and improving these properties enhances the embeddings’ ability to capture nuanced semantic distinctions. However, fine-tuning can reduce the expressiveness of the embeddings of language models. This study investigates the relation between fine-tuning, topology of the embedding space, and task performance in the context of sense knowledge enhancement, focusing on identifying the topological properties that contribute to the success of sense-enhanced embeddings. We experiment with two fine-tuning methods: *Supervised Contrastive Learning (SCL)* and *Supervised Predictive Learning (SPL)*. Our results show that SPL, the most standard approach, exhibits varying effectiveness depending on the language model and is inconsistent in producing successful sense-enhanced embeddings. In contrast, SCL achieves this consistently. Furthermore, while the embeddings with only increased *sense-alignment* show reduced task performance, those that also exhibit high *isotropy* and balance *uniformity* with *sense-alignment* achieve the best results. Additionally, our findings indicate that supervised and unsupervised tasks benefit from these topological properties to varying degrees. @@ -16863,7 +16863,7 @@ Prediction Hubs are Context-Informed Frequent Tokens in <fixed-case>LLM</fixed-case>s Beatrix Miranda GinnNielsen IuriMacoccoUniversitat Pompeu Fabra - MarcoBaroniUniversitat Pompeu Fabra + MarcoBaroniUniversitat Pompeu Fabra 23715-23745 Hubness, the tendency for a few points to be among the nearest neighbours of a disproportionate number of other points, commonly arises when applying standard distance measures to high-dimensional data, often negatively impacting distance-based analysis. As autoregressive large language models (LLMs) operate on high-dimensional representations, we ask whether they are also affected by hubness. We first prove that the only large-scale representation comparison operation performed by LLMs, namely that between context and unembedding vectors to determine continuation probabilities, is not characterized by the concentration of distances phenomenon that typically causes the appearance of nuisance hubness. We then empirically show that this comparison still leads to a high degree of hubness, but the hubs in this case do not constitute a disturbance. They are rather the result of context-modulated frequent tokens often appearing in the pool of likely candidates for next token prediction. However, when other distances are used to compare LLM representations, we do not have the same theoretical guarantees, and, indeed, we see nuisance hubs appear. There are two main takeaways. First, hubness, while omnipresent in high-dimensional spaces, is not a negative property that needs to be mitigated when LLMs are being used for next token prediction. Second, when comparing representations from LLMs using Euclidean or cosine distance, there is a high risk of nuisance hubs and practitioners should use mitigation techniques if relevant. 2025.acl-long.1156 @@ -17292,7 +17292,7 @@ JiafengGuoInstitute of Computing Technolgy, Chinese Academy of Sciences LuluYu BaolongBi - XueqiChengInstitute of Computing Technology, Chinese Academy + XueqiChengInstitute of Computing Technology, Chinese Academy 24315-24329 Large language models (LLMs) exhibit impressive performance across diverse tasks but often struggle to accurately gauge their knowledge boundaries, leading to confident yet incorrect responses. This paper explores leveraging LLMs’ internal states to enhance their perception of knowledge boundaries from efficiency and risk perspectives. We investigate whether LLMs can estimate their confidence using internal states before response generation, potentially saving computational resources. Our experiments on datasets like Natural Questions, HotpotQA, and MMLU reveal that LLMs demonstrate significant pre-generation perception, which is further refined post-generation, with perception gaps remaining stable across varying conditions. To mitigate risks in critical domains, we introduce Consistency-based Confidence Calibration (C^3), which assesses confidence consistency through question reformulation. C^3 significantly improves LLMs’ ability to recognize their knowledge gaps, enhancing the unknown perception rate by 5.6% on NQ and 4.9% on HotpotQA. Our findings suggest that pre-generation confidence estimation can optimize efficiency, while C^3 effectively controls output risks, advancing the reliability of LLMs in practical applications. 2025.acl-long.1184 @@ -17316,8 +17316,8 @@ TianhuaZhangChinese University of Hong Kong, The Chinese University of Hong Kong XixinWuThe Chinese University of Hong Kong HongyinLuoMassachusetts Institute of Technology - James R.GlassMassachusetts Institute of Technology - Helen M.MengThe Chinese University of Hong Kong + James R.GlassMassachusetts Institute of Technology + Helen M.MengThe Chinese University of Hong Kong 24349-24364 Knowledge Graphs (KGs) can serve as reliable knowledge sources for question answering (QA) due to their structured representation of knowledge. Existing research on the utilization of KG for large language models (LLMs) prevalently relies on subgraph retriever or iterative prompting, overlooking the potential synergy of LLMs’ step-wise reasoning capabilities and KGs’ structural nature. In this paper, we present DoG (Decoding on Graph), a novel framework that facilitates a deep synergy between LLMs and KGs. We first define a concept, well-formed chain, which consists of a sequence of interrelated fact triplets on the KGs, starting from question entities and leading to answers. We argue that this concept can serve as a principle for making faithful and sound reasoning for KGQA. To enable LLMs to generate well-formed chains, we propose graph-aware constrained decoding, in which a constraint derived from the topology of the KG regulates the decoding process of the LLMs. This constrained decoding method ensures the generation of well-formed chains while making full use of the step-wise reasoning capabilities of LLMs. Based on the above, DoG, a training-free approach, is able to provide faithful and sound reasoning trajectories grounded on the KGs. Experiments across various KGQA tasks with different background KGs demonstrate that DoG achieves superior and robust performance. DoG also shows general applicability with various open-source LLMs. 2025.acl-long.1186 @@ -17417,7 +17417,7 @@ Evaluating <fixed-case>LLM</fixed-case>s for <fixed-case>P</fixed-case>ortuguese Sentence Simplification with Linguistic Insights Arthur Mariano Rocha De AzevedoScalercioUniversidade Federal Fluminense Elvis A. DeSouza - Maria José BocornyFinattoUniversidade Federal do Rio Grande do Sul + Maria José BocornyFinattoUniversidade Federal do Rio Grande do Sul AlinePaesUniversidade Federal Fluminense 24452-24477 Sentence simplification (SS) focuses on adapting sentences to enhance their readability and accessibility. While large language models (LLMs) match task-specific baselines in English SS, their performance in Portuguese remains underexplored. This paper presents a comprehensive performance comparison of 26 state-of-the-art LLMs in Portuguese SS, alongside two simplification models trained explicitly for this task and language. They are evaluated under a one-shot setting across scientific, news, and government datasets. We benchmark the models with our newly introduced Gov-Lang-BR corpus (1,703 complex-simple sentence pairs from Brazilian government agencies) and two established datasets: PorSimplesSent and Museum-PT. Our investigation takes advantage of both automatic metrics and large-scale linguistic analysis to examine the transformations achieved by the LLMs. Furthermore, a qualitative assessment of selected generated outputs provides deeper insights into simplification quality. Our findings reveal that while open-source LLMs have achieved impressive results, closed-source LLMs continue to outperform them in Portuguese SS. @@ -17535,7 +17535,7 @@ FuliFengUniversity of Science and Technology of China HamedZamaniUniversity of Massachusetts at Amherst XiangnanHeUniversity of Science and Technology of China - Tat-SengChuaNational University of Singapore + Tat-SengChuaNational University of Singapore 24607-24649 In the era of large models, content generation is gradually shifting to Personalized Generation (PGen), tailoring content to individual preferences and needs. This paper presents the first comprehensive survey on PGen, investigating existing research in this rapidly growing field. We conceptualize PGen from a unified perspective, systematically formalizing its key components, core objectives, and abstract workflows. Based on this unified perspective, we propose a multi-level taxonomy, offering an in-depth review of technical advancements, commonly used datasets, and evaluation metrics across multiple modalities, personalized contexts, and tasks. Moreover, we envision the potential applications of PGen and highlight open challenges and promising directions for future exploration. By bridging PGen research across multiple modalities, this survey serves as a valuable resource for fostering knowledge sharing and interdisciplinary collaboration, ultimately contributing to a more personalized digital landscape. 2025.acl-long.1201 @@ -17584,7 +17584,7 @@ Leveraging In-Context Learning for Political Bias Testing of <fixed-case>LLM</fixed-case>s - PatrickHallerUniversity of Zurich + PatrickHallerUniversity of Zurich JannisVamvasUniversity of Zurich RicoSennrichUniversity of Zurich Lena AnnJägerUniversity of Zurich @@ -17793,7 +17793,7 @@ JosephGattoDartmouth College OmarSharifDartmouth College ParkerSeegmillerDartmouth College - Sarah MasudPreumDartmouth College + Sarah MasudPreumDartmouth College 25109-25131 Event Argument Extraction (EAE) is a daunting information extraction problem — with significant limitations in few-shot cross-domain (FSCD) settings. A common solution to FSCD modeling is data augmentation. Unfortunately, existing augmentation methods are not well-suited to a variety of real-world EAE contexts, including (i) modeling long documents (documents with over 10 sentences), and (ii) modeling challenging role types (i.e., event roles with little to no training data and semantically outlying roles). We introduce two novel LLM-powered data augmentation methods for generating extractive document-level EAE samples using zero in-domain training data. We validate the generalizability of our approach on four datasets — showing significant performance increases in low-resource settings. Our highest performing models provide a 13-pt increase in F1 score on zero-shot role extraction in FSCD evaluation. 2025.acl-long.1221 @@ -17817,7 +17817,7 @@ MarcoValentinoUniversity of Sheffield IqraZahidImperial College London IanPratt-HartmannUniversity of Opole and University of Manchester - RizaBatista-NavarroUniversity of Manchester + RizaBatista-NavarroUniversity of Manchester 25155-25168 Transformer models have achieved remarkable performance in many formal reasoning tasks. Nonetheless, the extent of their comprehension pertaining to logical semantics and rules of inference remains somewhat uncertain. Evaluating such understanding necessitates a rigorous examination of these models’ generalisation capacity to out-of-distribution data. In this study, we probe the generalisation prowess of Transformer models with respect to the hitherto unexplored domain of numerical satisfiability problems. Our investigation reveals that Transformers exhibit minimal scale and noise invariance, alongside limited vocabulary and number invariance. However, even when Transformer models experience a notable decline in performance on out-of-distribution test sets, they often still surpass the random baseline by a considerable margin. 2025.acl-long.1223 @@ -17828,7 +17828,7 @@ The Nature of <fixed-case>NLP</fixed-case>: Analyzing Contributions in <fixed-case>NLP</fixed-case> Papers AniketPramanickNEC and Technische Universität Darmstadt YufangHouIT:U Interdisciplinary Transformation University Austria, Technische Universität Darmstadt and IBM Research Ireland - Saif M.Mohammad + Saif M.Mohammad IrynaGurevychInstitute for Computer Science, Artificial Intelligence and Technology, Mohamed bin Zayed University of Artificial Intelligence and Technische Universität Darmstadt 25169-25191 Natural Language Processing (NLP) is an established and dynamic field. Despite this, what constitutes NLP research remains debated. In this work, we address the question by quantitatively examining NLP research papers. We propose a taxonomy of research contributions and introduce _NLPContributions_, a dataset of nearly 2k NLP research paper abstracts, carefully annotated to identify scientific contributions and classify their types according to this taxonomy. We also introduce a novel task of automatically identifying contribution statements and classifying their types from research papers. We present experimental results for this task and apply our model to ~29k NLP research papers to analyze their contributions, aiding in the understanding of the nature of NLP research. We show that NLP research has taken a winding path — with the focus on language and human-centric studies being prominent in the 1970s and 80s, tapering off in the 1990s and 2000s, and starting to rise again since the late 2010s. Alongside this revival, we observe a steady rise in dataset and methodological contributions since the 1990s, such that today, on average, individual NLP papers contribute in more ways than ever before. Our dataset and analyses offer a powerful lens for tracing research trends and offer potential for generating informed, data-driven literature surveys. @@ -17854,7 +17854,7 @@ Timothy E.BurdickDartmouth College Inas S.KhayalDartmouth College SarahDeLozier - Sarah MasudPreumDartmouth College + Sarah MasudPreumDartmouth College 25222-25240 Follow-up question generation is an essential feature of dialogue systems as it can reduce conversational ambiguity and enhance modeling complex interactions. Conversational contexts often pose core NLP challenges such as (i) extracting relevant information buried in fragmented data sources, and (ii) modeling parallel thought processes. These two challenges occur frequently in medical dialogue as a doctor asks questions based not only on patient utterances but also their prior EHR data and current diagnostic hypotheses. Asking medical questions in asynchronous conversations compounds these issues as doctors can only rely on static EHR information to motivate follow-up questions. To address these challenges, we introduce FollowupQ, a novel framework for enhancing asynchronous medical conversation.FollowupQ is a multi-agent framework that processes patient messages and EHR data to generate personalized follow-up questions, clarifying patient-reported medical conditions. FollowupQ reduces requisite provider follow-up communications by 34%. It also improves performance by 17% and 5% on real and synthetic data, respectively. We also release the first public dataset of asynchronous medical messages with linked EHR data alongside 2,300 follow-up questions written by clinical experts for the wider NLP research community. 2025.acl-long.1226 @@ -17881,7 +17881,7 @@ EmmanouilZaranisInstituto Superior Técnico GiuseppeAttanasioInstituto de Telecomunicações SwetaAgrawalGoogle - AndreMartinsInstituto Superior Técnico and Unbabel + AndreMartinsInstituto Superior Técnico and Unbabel 25261-25284 Quality estimation (QE)—the automatic assessment of translation quality—has recently become crucial across several stages of the translation pipeline, from data curation to training and decoding. While QE metrics have been optimized to align with human judgments, whether they encode social biases has been largely overlooked. Biased QE risks favoring certain demographic groups over others, e.g., by exacerbating gaps in visibility and usability. This paper defines and investigates gender bias of QE metrics and discusses its downstream implications for machine translation (MT). Experiments with state-of-the-art QE metrics across multiple domains, datasets, and languages reveal significant bias. When a human entity’s gender in the source is undisclosed, masculine-inflected translations score higher than feminine-inflected ones, and gender-neutral translations are penalized. Even when contextual cues disambiguate gender, using context-aware QE metrics leads to more errors in selecting the correct translation inflection for feminine referents than for masculine ones. Moreover, a biased QE metric affects data filtering and quality-aware decoding. Our findings underscore the need for a renewed focus on developing and evaluating QE metrics centered on gender. 2025.acl-long.1228 @@ -17949,7 +17949,7 @@ LangGao JiahuiGeng XiangliangZhangUniversity of Notre Dame - PreslavNakovMohamed bin Zayed University of Artificial Intelligence + PreslavNakovMohamed bin Zayed University of Artificial Intelligence XiuyingChenMohamed bin Zayed University of Artificial Intelligence 25378-25398 Jailbreaking in Large Language Models (LLMs) is a major security concern as it can deceive LLMs into generating harmful text. However, understanding of how jailbreaking works remains limited, hindering the development of effective defense strategies. To address this issue, we conduct a large-scale analysis of seven different jailbreak methods and identify that disagreements among methods stem from insufficient observation samples.We introduce the concept of a safety boundary and discover that jailbreaks shift harmful activations outside this boundary, where LLMs become less sensitive to harmful information. Our analysis reveals that low and middle layers play a critical role in these shifts, while deeper layers have a lesser impact.Building on these insights, we propose a novel defense mechanism called Activation Boundary Defense (ABD), which adaptively constrains activations within the safety boundary. To enhance its effectiveness, we use Bayesian optimization to selectively apply the defense to the low and middle layers.Experiments on several benchmark datasets demonstrate that ABD achieves an average Defense Success Rate (DSR) of over 98% against various jailbreak attacks, with less than a 2% impact on the model’s general capabilities. @@ -17965,9 +17965,9 @@ JianpengChengMeta Bo-HsiangTsengApple PeterBoothroydApple - HectorMartinez AlonsoApple + HectorMartinez AlonsoApple DiarmuidO SeaghdhaApple - AndersJohannsen + AndersJohannsen 25399-25434 This work evaluates the potential of large language models (LLMs) to power digital assistants capable of complex action execution. Such assistants rely on pre-trained programming knowledge to execute multi-step goals by composing objects and functions defined in assistant libraries into action execution programs. To achieve this, we develop ASPERA, a framework comprising an assistant library simulation and a human-assisted LLM data generation engine. Our engine allows developers to guide LLM generation of high-quality tasks consisting of complex user queries, simulation state and corresponding validation programs, tackling data availability and evaluation robustness challenges. Alongside the framework we release Asper-Bench, an evaluation dataset of 250 challenging tasks generated using ASPERA, which we use to show that program generation grounded in custom assistant libraries is a significant challenge to LLMs compared to dependency-free code generation. 2025.acl-long.1234 @@ -18104,7 +18104,7 @@ WeiwenXuAlibaba Group RuochenZhao FangkaiJiao - ShafiqJotyNanyang Technological University and SalesForce.com + ShafiqJotyNanyang Technological University and SalesForce.com LidongBingShanda Group and Alibaba Group 25589-25604 Large language models excel at problem-solving but often struggle with complex reasoning and factual accuracy. While chain-of-thought and retrieval-augmented generation help break down problems and retrieve knowledge, they still falter on challenging tasks like competitive programming due to frequent reasoning errors and irrelevant retrieval. To address this, we introduce Critic-guided planning with Retrieval-augmentation, CR-Planner, a novel framework that leverages fine-tuned critic models to guide both reasoning and retrieval processes through planning. CR-Planner iteratively selects and executes sub-goals, guided by critic models. A sub-goal critic identifies promising sub-goals from reasoning, query generation, and retrieval, while an execution critic evaluates outputs of sub-goal executions. We employ Monte Carlo Tree Search to collect data for critic training, allowing systematic exploration of action sequences and effective navigation toward the final answer. We evaluate CR-Planner on challenging domain-knowledge-intensive and reasoning-heavy tasks, including competitive programming, theorem-driven math reasoning, and complex domain retrieval problems. It significantly outperforms baselines, demonstrating effectiveness in both reasoning and retrieval. @@ -18144,7 +18144,7 @@ <fixed-case>C</fixed-case>ultural<fixed-case>B</fixed-case>ench: A Robust, Diverse and Challenging Benchmark for Measuring <fixed-case>LM</fixed-case>s’ Cultural Knowledge Through Human-<fixed-case>AI</fixed-case> Red-Teaming Yu YingChiuUniversity of Washington LiweiJiang - Bill YuchenLinxAI and University of Washington + Bill YuchenLinxAI and University of Washington Chan YoungPark Shuyue StellaLiDepartment of Computer Science, University of Washington SahithyaRavi @@ -18208,7 +18208,7 @@ Sarah E.FinchEmory University Ellie S.Paek IkseonChoiEmory University - Jinho D.ChoiEmory University + Jinho D.ChoiEmory University 25789-25806 As chatbots become integral to daily life, personalizing systems is key for fostering trust, engagement, and inclusivity. This study examines how linguistic similarity affects chatbot performance, focusing on integrating African American English (AAE) into virtual agents to better serve the African American community. We develop text-based and spoken chatbots using large language models and text-to-speech technology, then evaluate them with AAE speakers against standard English chatbots. Our results show that while text-based AAE chatbots often underperform, spoken chatbots benefit from an African American voice and AAE elements, improving performance and preference. These findings underscore the complexities of linguistic personalization and the dynamics between text and speech modalities, highlighting technological limitations that affect chatbots’ AA speech generation and pointing to promising future research directions. 2025.acl-long.1252 @@ -18243,7 +18243,7 @@ Language Fusion for Parameter-Efficient Cross-lingual Transfer PhilippBorchertIÉSEG School of Management and KU Leuven IvanVulićGoogle DeepMind and University of Cambridge - Marie-FrancineMoensKU Leuven, KU Leuven + Marie-FrancineMoensKU Leuven, KU Leuven JochenDe WeerdtKU Leuven 25848-25868 Limited availability of multilingual text corpora for training language models often leads to poor performance on downstream tasks due to undertrained representation spaces for languages other than English. This ‘under-representation’ has motivated recent cross-lingual transfer methods to leverage the English representation space by e.g. mixing English and ‘non-English’ tokens at the input level or extending model parameters to accommodate new languages. However, these approaches often come at the cost of increased computational complexity. We propose Fusion for Language Representations (FLARE) in adapters, a novel method that enhances representation quality and downstream performance for languages other than English while maintaining parameter efficiency. FLARE integrates source and target language representations within low-rank (LoRA) adapters using lightweight linear transformations, maintaining parameter efficiency while improving transfer performance. A series of experiments across representative cross-lingual natural language understanding tasks, including natural language inference, question-answering and sentiment analysis, demonstrate FLARE’s effectiveness. FLARE achieves performance improvements of 4.9% for Llama 3.1 and 2.2% for Gemma 2 compared to standard LoRA fine-tuning on question-answering tasks, as measured by the exact match metric. @@ -18281,7 +18281,7 @@ Do Language Models Have Semantics? On the Five Standard Positions - AndersSøgaardCopenhagen University + AndersSøgaardCopenhagen University 25910-25922 We identify five positions on whether large language models (LLMs) and chatbots can be said to exhibit semantic understanding. These positions differ in whether they attribute semantics to LLMs and/or chatbots trained on feedback, what kind of semantics they attribute (inferential or referential), and in virtue of what they attribute referential semantics (internal or external causes). This allows for 2^^4=16 logically possible positions, but we have only seen people argue for five of these. Based on a pairwise comparison of these five positions, we conclude that the better theory of semantics in large language models is, in fact, a sixth combination: Both large language models and chatbots have inferential and referential semantics, grounded in both internal and external causes. 2025.acl-long.1258 @@ -18310,7 +18310,7 @@ PhillipRust RuchiraDhar DanielHershcovichUniversity of Copenhagen - AndersSøgaardCopenhagen University + AndersSøgaardCopenhagen University 25949-25982 This paper explores the effectiveness of Multimodal Large Language models (MLLMs) as assistive technologies for visually impaired individuals. We conduct a user survey to identify adoption patterns and key challenges users face with such technologies. Despite a high adoption rate of these models, our findings highlight concerns related to contextual understanding, cultural sensitivity, and complex scene understanding, particularly for individuals who may rely solely on them for visual interpretation. Informed by these results, we collate five user-centred tasks with image and video inputs, including a novel task on Optical Braille Recognition. Our systematic evaluation of twelve MLLMs reveals that further advancements are necessary to overcome limitations related to cultural context, multilingual support, Braille reading comprehension, assistive object recognition, and hallucinations. This work provides critical insights into the future direction of multimodal AI for accessibility, underscoring the need for more inclusive, robust, and trustworthy visual assistance technologies. 2025.acl-long.1260 @@ -18321,7 +18321,7 @@ <fixed-case>H</fixed-case>um<fixed-case>T</fixed-case> <fixed-case>D</fixed-case>um<fixed-case>T</fixed-case>: Measuring and controlling human-like language in <fixed-case>LLM</fixed-case>s MyraChengStanford University SunnyYu - DanJurafskyStanford University + DanJurafskyStanford University 25983-26008 Should LLMs generate language that makes them seem human? Human-like language might improve user experience, but might also lead to deception, overreliance, and stereotyping. Assessing these potential impacts requires a systematic way to measure human-like tone in LLM outputs. We introduce HumT and SocioT, metrics for human-like tone and other dimensions of social perceptions in text data based on relative probabilities from an LLM. By measuring HumT across preference and usage datasets, we find that users prefer less human-like outputs from LLMs in many contexts. HumT also offers insights into the perceptions and impacts of anthropomorphism: human-like LLM outputs are highly correlated with warmth, social closeness, femininity, and low status, which are closely linked to the aforementioned harms. We introduce DumT, a method using HumT to systematically control and reduce the degree of human-like tone while preserving model performance. DumT offers a practical approach for mitigating risks associated with anthropomorphic language generation. 2025.acl-long.1261 @@ -18404,7 +18404,7 @@ <fixed-case>QG</fixed-case>-<fixed-case>SMS</fixed-case>: Enhancing Test Item Analysis via Student Modeling and Simulation - BangNguyenUniversity of Notre Dame + BangNguyenUniversity of Notre Dame TingtingDu MengxiaYuUniversity of Notre Dame LawrenceAngraveUniversity of Illinois at Urbana-Champaign @@ -18421,7 +18421,7 @@ XueyingBai MudanChen GregDurrettUniversity of Texas at Austin - NathanaelChambersUS Naval Academy + NathanaelChambersUS Naval Academy NiranjanBalasubramanianState University of New York, Stony Brook 26169-26199 Understanding how events in a scenario causally connect with each other is important for effectively modeling and reasoning about events. But event reasoning remains a difficult challenge, and despite recent advances, Large Language Models (LLMs) still struggle to accurately identify causal connections between events. This struggle leads to poor performance on deeper reasoning tasks like event forecasting and timeline understanding. To address this challenge, we investigate the generation of causal event graphs (e.g., A enables B) as a parallel mechanism to help LLMs explicitly represent causality during inference. This paper evaluates both how to generate correct graphs as well as how graphs can assist reasoning. We propose a collaborative approach to causal graph generation where we use LLMs to simulate experts that focus on specific semantic relations. The experts engage in multiple rounds of discussions which are then consolidated by a final expert. Then, to demonstrate the utility of causal graphs, we use them on multiple downstream applications, and also introduce a new explainable event prediction task that requires a causal chain of events in the explanation. These explanations are more informative and coherent than baseline generations. Finally, our overall approach not finetuned on any downstream task, achieves competitive results with state-of-the-art models on both forecasting and next event prediction tasks. @@ -18433,7 +18433,7 @@ <fixed-case>L</fixed-case>ogic<fixed-case>P</fixed-case>ro: Improving Complex Logical Reasoning via Program-Guided Learning JinJiang YuchenYan - YangLiu + YangLiu JianingWangMeituan ShuaiPengPeking University XunliangCaiMeituan @@ -18501,7 +18501,7 @@ DongyuZhangDalian University of Technology JingRenRoyal Melbourne Institute of Technology ZiqiXuRoyal Melbourne Institute of Technology - XiuzhenZhangRoyal Melbourne Institute of Technology + XiuzhenZhangRoyal Melbourne Institute of Technology YiliaoSongUniversity of Adelaide and Royal Melbourne Institute of Technology HongfeiLin FengXiaRoyal Melbourne Institute of Technology @@ -18653,11 +18653,11 @@ HritikBansal JohnPalowitchGoogle ChrysovalantisAnastasiouGoogle - Sanket VaibhavMehtaGoogle + Sanket VaibhavMehtaGoogle Lalit KJainGoogle VirginiaAgliettiGoogle DeepMind DishaJindalGoogle - PeterChenGoogle + PeterChenGoogle NishanthDikkalaGoogle GladysTyenGoogle XinLiuGoogle @@ -18725,10 +18725,10 @@ YuchenSong KehaiChenHarbin Institute of Technology (Shenzhen) XuefengBai - MuyunYang + MuyunYang LiqiangNieHarbin Institute of Technology (Shenzhen) and Shandong University JieLiuHarbin Institute of Technology - TiejunZhaoHarbin Institute of Technology + TiejunZhaoHarbin Institute of Technology MinZhangHarbin Institute of Technology 26567-26583 Visual information has been introduced for enhancing machine translation (MT), and its effectiveness heavily relies on the availability of large amounts of bilingual parallel sentence pairs with manual image annotations. In this paper, we introduce a stable diffusion-based imagination network into a multimodal large language model (MLLM) to explicitly generate an image for each source sentence, thereby advancing the multimodel MT. Particularly, we build heuristic feedback with reinforcement learning to ensure the consistency of the generated image with the source sentence without the supervision of visual information, which breaks the high-cost bottleneck of image annotation in MT. Furthermore, the proposed method enables imaginative visual information to be integrated into text-only MT in addition to multimodal MT. Experimental results show that our model significantly outperforms existing multimodal MT and text-only MT, especially achieving an average improvement of more than 14 BLEU points on Multi30K and MSCOCO multimodal MT benchmarks. @@ -18818,9 +18818,9 @@ Mohammad RifqiFarhansyah IwanDarmawanMonash University AdryanKusumawardhanaKomisi Pemberantasan Korupsi - Genta IndraWinataCapital One + Genta IndraWinataCapital One Alham FikriAjiMohamed bin Zayed University of Artificial Intelligence - Derry TantiWijayaMonash University and Boston University + Derry TantiWijayaMonash University and Boston University 26732-26754 The Javanese language features a complex system of honorifics that vary according to the social status of the speaker, listener, and referent. Despite its cultural and linguistic significance, there has been limited progress in developing a comprehensive corpus to capture these variations for natural language processing (NLP) tasks. In this paper, we present Unggah-Ungguh, a carefully curated dataset designed to encapsulate the nuances of Unggah-Ungguh Basa, the Javanese speech etiquette framework that dictates the choice of words and phrases based on social hierarchy and context. Using Unggah-Ungguh, we assess the ability of language models (LMs) to process various levels of Javanese honorifics through classification and machine translation tasks. To further evaluate cross-lingual LMs, we conduct machine translation experiments between Javanese (at specific honorific levels) and Indonesian. Additionally, we explore whether LMs can generate contextually appropriate Javanese honorifics in conversation tasks, where the honorific usage should align with the social role and contextual cues. Our findings indicate that current LMs struggle with most honorific levels, exhibiting a bias toward certain honorific tiers. 2025.acl-long.1296 @@ -18833,7 +18833,7 @@ HaokeZhangSuzhou University JuntaoLi KehaiChenHarbin Institute of Technology (Shenzhen) - QiaomingZhuSoochow University + QiaomingZhuSoochow University MinZhangHarbin Institute of Technology, Shenzhen 26755-26769 Generative Reward Models (GenRMs) leverage synthesized Chains of Thought (CoT) to reduce the need for massive labeled data, but this approach introduces risks of overoptimization due to the inability to guarantee the correctness of the CoTs. Identifying and optimizing unexpected behaviors within these synthesized CoT remains a challenge, as it heavily depends on precise annotations of intermediate behavior, similar to process supervision. In this work, we introduce a criteria-based preference tree for reward modeling, where each path in the tree represents a reasoning trajectory based on synthesized criteria. Crucially, each reasoning trajectory can be independently optimized through RL algorithm. These fine-grained process reward signals are derived from the inference-time computations and predefined rules, eliminating the need for human supervision. In experiments, SyncPL showed significant improvements over baselines on multiple human preference benchmarks. We further demonstrate that synthesized data can be learned using a long CoT format, analogous to an o1-like model, further enhancing performance while keeping stability and efficiency during training. @@ -18858,7 +18858,7 @@ A Self-Denoising Model for Robust Few-Shot Relation Extraction LiangZhang - YangZhang + YangZhang ZiyaoLuWeChat AI FandongMengWeChat AI, Tencent Inc. JieZhou @@ -19008,7 +19008,7 @@ DungVo Yap HongXian Hai LeongChieuDSO National Laboratories - Kian Ming A.ChaiDSO National Laboratories + Kian Ming A.ChaiDSO National Laboratories JingJiangAustralian National University and Singapore Management University LiziLiaoSingapore Management University 26962-26983 @@ -19271,7 +19271,7 @@ A Dual-Perspective <fixed-case>NLG</fixed-case> Meta-Evaluation Framework with Automatic Benchmark and Better Interpretability XinyuHuPeking University MingqiGao - LiLin + LiLin ZhenghanYu XiaojunWan 27372-27395 @@ -19289,7 +19289,7 @@ GuangyuanShi YongxinXu YashaWang - Philip S.YuUniversity of Illinois Chicago + Philip S.YuUniversity of Illinois Chicago XuChuPeking University Xiao-MingWuHong Kong Polytechnic University 27396-27413 @@ -19337,7 +19337,7 @@ <fixed-case>MMDEND</fixed-case>: Dendrite-Inspired Multi-Branch Multi-Compartment Parallel Spiking Neuron for Sequence Modeling - KexinWang + KexinWang YuhongChou DiShang ShijieMei @@ -19558,7 +19558,7 @@ JuntianZhang ChuanqiChengRenmin University of China YuhanLiu - WeiLiu + WeiLiu JianLuanXiaomi Corporation RuiYanRenmin University of China 27782-27798 @@ -19571,9 +19571,9 @@ Online Iterative Self-Alignment for Radiology Report Generation TingXiao LeiShi - YangZhangTsinghua University + YangZhangTsinghua University HaoFengYangEast China University of Science and Technology - ZheWang + ZheWang ChenjiaBaiTeleAI, China Telecom 27799-27814 Radiology Report Generation (RRG) is an important research topic for relieving radiologists’ heavy workload. Existing RRG models mainly rely on supervised fine-tuning (SFT) based on different model architectures using data pairs of radiological images and corresponding radiologist-annotated reports. Recent research has shifted focus to post-training improvements, aligning RRG model outputs with human preferences using reinforcement learning (RL). However, the limited data coverage of high-quality annotated data poses risks of overfitting and generalization. This paper proposes a novel Online Iterative Self-Alignment (OISA) method for RRG that consists of four stages: self-generation of diverse data, self-evaluation for multi-objective preference data, self-alignment for multi-objective optimization and self-iteration for further improvement. Our approach allows for generating varied reports tailored to specific clinical objectives, enhancing the overall performance of the RRG model iteratively. Unlike existing methods, our framework significantly increases data quality and optimizes performance through iterative multi-objective optimization. Experimental results demonstrate that our method surpasses previous approaches, achieving state-of-the-art performance across multiple evaluation metrics. @@ -19607,7 +19607,7 @@ Evaluating Sequence Labeling on the basis of Information Theory - EnriqueAmigoUniversidad Nacional de Educación a Distancia + EnriqueAmigoUniversidad Nacional de Educación a Distancia ElenaÁlvarez-MelladoUniversidad Nacional de Educación a Distancia JulioGonzaloUniversidad Nacional de Educación a Distancia JorgeCarrillo-de-Albornoz @@ -19644,7 +19644,7 @@ XunjianYin XinyiWangUC Santa Barbara LiangmingPanUniversity of Arizona - LiLin + LiLin XiaojunWan William YangWangUC Santa Barbara 27890-27913 @@ -19672,7 +19672,7 @@ TaoGuiFudan University QiZhangFudan University XipengQiuFudan University - XuanjingHuangFudan University + XuanjingHuangFudan University ZuxuanWuFudan University Yu-GangJiangFudan University 27914-27961 @@ -19701,7 +19701,7 @@ TaigaSomeya AnejSvete BrianDuSellDepartment of Computer Science, ETHZ - ETH Zurich - Timothy J.O’DonnellMcGill University, Mila and McGill University + Timothy J.O’DonnellMcGill University, Mila and McGill University MarioGiulianelliDepartment of Computer Science, ETHZ - ETH Zurich RyanCotterellSwiss Federal Institute of Technology 27995-28013 @@ -19715,7 +19715,7 @@ AdriánBazagaMicrosoft RexhinaBlloshmiAmazon BillByrneAmazon and University of Cambridge - Adriàde GispertAmazon + Adriàde GispertAmazon 28014-28033 Large Language Models (LLMs) have emerged as powerful tools for generating coherent text, understanding context, and performing reasoning tasks. However, they struggle with temporal reasoning, which requires processing time-related information such as event sequencing, durations, and inter-temporal relationships. These capabilities are critical for applications including question answering, scheduling, and historical analysis. In this paper, we introduce TISER, a novel framework that enhances the temporal reasoning abilities of LLMs through a multi-stage process that combines timeline construction with iterative self-reflection. Our approach leverages test-time scaling to extend the length of reasoning traces, enabling models to capture complex temporal dependencies more effectively. This strategy not only boosts reasoning accuracy but also improves the traceability of the inference process. Experimental results demonstrate state-of-the-art performance across multiple benchmarks, including out-of-distribution test sets, and reveal that TISER enables smaller open-source models to surpass larger closed-weight models on challenging temporal reasoning tasks. 2025.acl-long.1358 @@ -19729,7 +19729,7 @@ PaulFlanagan AlessandraPascale OisínRedmond - AnyaBelzDublin City University + AnyaBelzDublin City University YufangHouIT:U Interdisciplinary Transformation University Austria, Technische Universität Darmstadt and IBM Research Ireland 28034-28051 Extracting scientific evidence from biomedical studies for clinical research questions (e.g., Does stem cell transplantation improve quality of life in patients with medically refractory Crohn’s disease compared to placebo?) is a crucial step in synthesising biomedical evidence. In this paper, we focus on the task of document-level scientific evidence extraction for clinical questions with conflicting evidence. To support this task, we create a dataset called CochraneForest leveraging forest plots from Cochrane systematic reviews. It comprises 202 annotated forest plots, associated clinical research questions, full texts of studies, and study-specific conclusions. Building on CochraneForest, we propose URCA (Uniform Retrieval Clustered Augmentation), a retrieval-augmented generation framework designed to tackle the unique challenges of evidence extraction. Our experiments show that URCA outperforms the best existing methods by up to 10.3% in F1 score on this task. However, the results also underscore the complexity of CochraneForest, establishing it as a challenging testbed for advancing automated evidence synthesis systems. @@ -19745,7 +19745,7 @@ LongBaiInstitute of Computing Technology, Chinese Academy of Sciences XiaolongJinInstitute of Computing Technology, Chinese Academy of Sciences JiafengGuoInstitute of Computing Technolgy, Chinese Academy of Sciences - XueqiChengInstitute of Computing Technology, Chinese Academy + XueqiChengInstitute of Computing Technology, Chinese Academy 28052-28070 In this paper, we aim to enhance the robustness of Universal Information Extraction (UIE) by introducing a new benchmark dataset, a comprehensive evaluation, and a feasible solution. Existing robust benchmark datasets have two key limitations: 1) They generate only a limited range of perturbations for a single Information Extraction (IE) task, which fails to evaluate the robustness of UIE models effectively; 2) They rely on small models or handcrafted rules to generate perturbations, often resulting in unnatural adversarial examples. Considering the powerful generation capabilities of Large Language Models (LLMs), we introduce a new benchmark dataset for Robust UIE, called RUIE-Bench, which utilizes LLMs to generate more diverse and realistic perturbations across different IE tasks. Based on this dataset, we comprehensively evaluate existing UIE models and reveal that both LLM-based models and other models suffer from significant performance drops. To improve robustness and reduce training costs, we propose a data-augmentation solution that dynamically selects hard samples for iterative training based on the model’s inference loss. Experimental results show that training with only \textbf{15}% of the data leads to an average \textbf{8.1}% relative performance improvement across three IE tasks. Our code and dataset are available at: https://github.com/ICT-GoKnow/RobustUIE. 2025.acl-long.1360 @@ -19972,7 +19972,7 @@ Musa IzzanardiWijanarkoMonash University LuckySusanto KhumaisaNur’ainiMonash University - Derry TantiWijayaMonash University and Boston University + Derry TantiWijayaMonash University and Boston University Alham FikriAjiMohamed bin Zayed University of Artificial Intelligence 28371-28401 Indonesia is rich in languages and scripts. However, most NLP progress has been made using romanized text. In this paper, we present NusaAksara, a novel public benchmark for Indonesian languages that includes their original scripts. Our benchmark covers both text and image modalities and encompasses diverse tasks such as image segmentation, OCR, transliteration, translation, and language identification. Our data is constructed by human experts through rigorous steps. NusaAksara covers 8 scripts across 7 languages, including low-resource languages not commonly seen in NLP benchmarks. Although unsupported by Unicode, the Lampung script is included in this dataset. We benchmark our data across several models, from LLMs and VLMs such as GPT-4o, Llama 3.2, and Aya 23 to task-specific systems such as PP-OCR and LangID, and show that most NLP technologies cannot handle Indonesia’s local scripts, with many achieving near-zero performance. @@ -19990,7 +19990,7 @@ LinboQiao SongzhuMeiNational University of Defense Technology YijieWangNational University of Defense Technology - DongshengLiNational University of Defense Technology + DongshengLiNational University of Defense Technology 28402-28414 Rumor detection on social media has become an emerging topic. Traditional deep learning-based methods model rumors based on content, propagation structure, or user behavior, but these approaches are constrained by limited modeling capacity and insufficient training corpora. Recent studies have explored using LLMs for rumor detection through supervised fine-tuning (SFT), but face two issues: 1) unreliable samples sometimes mislead the model learning; 2) the model only learns the most salient input-output mapping and skips in-depth analyses of the rumored content for convenience. To address these issues, we propose an SFT-based LLM rumor detection model with Influence guided Sample selection and Game-based multi-perspective Analysis (ISGA). Specifically, we first introduce the Influence Score (IS) to assess the impact of samples on model predictions and select samples for SFT. We also approximate IS via Taylor expansion to reduce computational complexity. Next, we use LLMs to generate in-depth analyses of news content from multiple perspectives and model their collaborative process for prediction as a cooperative game. Then we utilize the Shapley value to quantify the contribution of each perspective for selecting informative perspective analyses. Experiments show that ISGA excels existing SOTA on three datasets. 2025.acl-long.1378 @@ -20105,7 +20105,7 @@ HuayuZhangLifeArc AbulHasanUniversity College London, University of London HonghanWuUniversity of Glasgow - BeatriceAlexUniversity of Edinburgh, University of Edinburgh + BeatriceAlexUniversity of Edinburgh, University of Edinburgh 28532-28562 In this work, we present a manually annotated corpus for Adverse Event (AE) extraction from discharge summaries of elderly patients, a population often underrepresented in clinical NLP resources. The dataset includes 14 clinically significant AEs—such as falls, delirium, and intracranial haemorrhage, along with contextual attributes like negation, diagnosis type, and in-hospital occurrence. Uniquely, the annotation schema supports both discontinuous and overlapping entities, addressing challenges rarely tackled in prior work. We evaluate multiple models using FlairNLP across three annotation granularities: fine-grained, coarse-grained, and coarse-grained with negation. While transformer-based models (e.g., BERT-cased) achieve strong performance on document-level coarse-grained extraction (F1 = 0.943), performance drops notably for fine-grained entity-level tasks (e.g., F1 = 0.675), particularly for rare events and complex attributes. These results demonstrate that despite high-level scores, significant challenges remain in detecting underrepresented AEs and capturing nuanced clinical language. Developed within a Trusted Research Environment (TRE), the dataset is available upon request via DataLoch and serves as a robust benchmark for evaluating AE extraction methods and supporting future cross-dataset generalisation. 2025.acl-long.1386 @@ -20285,8 +20285,8 @@ Know Your Mistakes: Towards Preventing Overreliance on Task-Oriented Conversational <fixed-case>AI</fixed-case> Through Accountability Modeling SuvodipDeyUniversity of Illinois at Urbana-Champaign Yi-JyunSun - GokhanTurUniversity of Illinois at Urbana-Champaign - DilekHakkani-TürUniversity of Illinois at Urbana-Champaign + GokhanTurUniversity of Illinois at Urbana-Champaign + DilekHakkani-TürUniversity of Illinois at Urbana-Champaign 28830-28843 Recent LLMs have enabled significant advancements for conversational agents. However, they are also well known to hallucinate, producing responses that seem plausible but are factually incorrect. On the other hand, users tend to over-rely on LLM-based AI agents, accepting AI’s suggestion even when it is wrong. Adding positive friction, such as explanations or getting user confirmations, has been proposed as a mitigation in AI-supported decision-making systems. In this paper, we propose an accountability model for LLM-based task-oriented dialogue agents to address user overreliance via friction turns in cases of model uncertainty and errors associated with dialogue state tracking (DST). The accountability model is an augmented LLM with an additional accountability head that functions as a binary classifier to predict the relevant slots of the dialogue state mentioned in the conversation. We perform our experiments with multiple backbone LLMs on two established benchmarks (MultiWOZ and Snips). Our empirical findings demonstrate that the proposed approach not only enables reliable estimation of AI agent errors but also guides the decoder in generating more accurate actions. We observe around 3% absolute improvement in joint goal accuracy (JGA) of DST output by incorporating accountability heads into modern LLMs. Self-correcting the detected errors further increases the JGA from 67.13 to 70.51, achieving state-of-the-art DST performance. Finally, we show that error correction through user confirmations (friction turn) achieves a similar performance gain, highlighting its potential to reduce user overreliance. 2025.acl-long.1399 @@ -20523,7 +20523,7 @@ Desmond U.Patton ShanaKleiner James R. ShepardIii - KathleenMcKeown + KathleenMcKeown 29192-29217 With a combination of quantitative experiments, human judgments, and qualitative analyses, we evaluate the quantity and quality of African American Language (AAL) representation in 12 predominantly English, open-source pretraining corpora. We specifically focus on the sources, variation, and naturalness of included AAL texts representing the AAL speaking community. We find that AAL is underrepresented in all evaluated pretraining corpora compared to US demographics, constituting as few as 0.007% and at most 0.18% of documents. We also find that more than 25% of AAL texts in C4 may be perceived as inappropriate for LLMs to generate and to reinforce harmful stereotypes. Finally, we find that most automated filters are more likely to conserve White Mainstream English (WME) texts over AAL in pretraining corpora. 2025.acl-long.1416 @@ -20538,7 +20538,7 @@ VarshiniReddyKensho Technologies SethEbnerKensho NileshKumar - RikKoncel-KedziorskiApple + RikKoncel-KedziorskiApple ChrisTannerMassachusetts Institute of Technology and Kensho 29218-29257 Some statements have one well-defined continuation (e.g., “the Eiffel Tower is in [Paris]"), whereas others have a natural distribution over multiple options (e.g., “the weighted coin flip was [Heads/Tails].") We argue that language model (LM) outputs should capture these natural distributions. Our work specifically tests whether LM output probabilities are calibrated to numeric information within their textual contexts. For example, if the context (the prompt) concerns two equally likely options (e.g., heads or tails for a fair coin), the LM output probabilities should also be equal. Likewise, in a context with nonuniformly likely events (e.g., rolling a pair with two dice) an LM should output proportionate probabilities. However, we find that even in simple settings, the best LMs (1) are poorly calibrated and (2) have systematic biases: artifacts like word identity, word order, and word frequency all impact calibration. For example, ‘gpt-4o-mini‘ often picks the first of two options presented in the prompt regardless of the options’ implied likelihoods, whereas ‘Llama-3.1-8B‘ picks the second. Models do not allocate probability mass among valid options in a calibrated manner. @@ -20566,7 +20566,7 @@ DilipVenkatesh RajDabreDepartment of Computer Science, Indian Institute of Technology, Madras, Indian Institute of Technology, Madras and National Institute of Information and Communications Technology (NICT), National Institute of Advanced Industrial Science and Technology AnoopKunchukuttanMicrosoft and Indian Institute of Technology, Madras - Mitesh MKhapraIndian Institute of Technology, Madras + Mitesh MKhapraIndian Institute of Technology, Madras 29297-29329 Evaluating machine-generated text remains a significant challenge in NLP, especially for non-English languages. Current methodologies, including automated metrics, human assessments, and LLM-based evaluations, predominantly focus on English, revealing a significant gap in multilingual evaluation frameworks. We introduce the Cross Lingual Auto Evaluation (CIA) Suite, an extensible framework that includes evaluator LLMs (Hercule) and a novel test set (Recon) specifically designed for multilingual evaluation. Our test set features 500 human-annotated instructions spanning various task capabilities along with human judgment scores across six languages. This would enable benchmarking of general-purpose multilingual LLMs and facilitate meta-evaluation of Evaluator LLMs. The proposed model, Hercule, is a cross-lingual evaluation model that addresses the scarcity of reference answers in the target language by learning to assign scores to responses based on easily available reference answers in English. Our experiments demonstrate that Hercule aligns more closely with human judgments compared to proprietary models, demonstrating the effectiveness of such cross-lingual evaluation in low resource scenarios. Further, it is also effective in zero-shot evaluation on unseen languages. This study is the first comprehensive examination of cross-lingual evaluation using LLMs, presenting a scalable and effective approach for multilingual assessment. All code, datasets, and models will be publicly available to enable further research in this important area. 2025.acl-long.1419 @@ -20720,7 +20720,7 @@ Bregman Conditional Random Fields: Sequence Labeling with Parallelizable Inference Algorithms CaioCorroSorbonne Université MathieuLacroixUniversité Paris Nord (Paris XIII) - Joseph LeRouxUniversité Paris 13 + Joseph LeRouxUniversité Paris 13 29557-29574 We propose a novel discriminative model for sequence labeling called Bregman conditional random fields (BCRF).Contrary to standard linear-chain conditional random fields,BCRF allows fast parallelizable inference algorithms based on iterative Bregman projections.We show how such models can be learned using Fenchel-Young losses, including extension for learning from partial labels.Experimentally, our approach delivers comparable results to CRF while being faster, and achieves better results in highly constrained settings compared to mean field, another parallelizable alternative. 2025.acl-long.1430 @@ -20754,8 +20754,8 @@ KalvinChangSchool of Computer Science, Carnegie Mellon University RebeccaByrnesCarnegie Mellon University AravindMysore - CarolynRoseSchool of Computer Science, Carnegie Mellon University - David R.MortensenCarnegie Mellon University + CarolynRoseSchool of Computer Science, Carnegie Mellon University + David R.MortensenCarnegie Mellon University 29628-29647 Historical linguists have long written “programs” that convert reconstructed words in an ancestor language into their attested descendants via ordered string rewrite functions (called sound laws) However, writing these programs is time-consuming, motivating the development of automated Sound Law Induction (SLI) which we formulate as Programming by Examples (PBE) with Large Language Models (LLMs) in this paper. While LLMs have been effective for code generation, recent work has shown that PBE is challenging but improvable by fine-tuning, especially with training data drawn from the same distribution as evaluation data. In this paper, we create a conceptual framework of what constitutes a “similar distribution” for SLI and propose four kinds of synthetic data generation methods with varying amounts of inductive bias to investigate what leads to the best performance. Based on the results, we create a SOTA open-source model for SLI as PBE (+6% pass rate with a third of the parameters of the second-best LLM) and also highlight exciting future directions for PBE research. 2025.acl-long.1432 @@ -21046,7 +21046,7 @@ ElenaÁlvarez-MelladoUniversidad Nacional de Educación a Distancia AlessiaBattistiUniversity of Zurich CuiDingUniversity of Zurich - AnneGöhringUniversity of Zurich + AnneGöhringUniversity of Zurich YingqiangGaoUniversity of Zurich ZifanJiang AndrianosMichailUniversity of Zurich @@ -21107,10 +21107,10 @@ <fixed-case>DRAMA</fixed-case>: Diverse Augmentation from Large Language Models to Smaller Dense Retrievers XueguangMa - Xi VictoriaLinMeta + Xi VictoriaLinMeta BarlasOguzMeta JimmyLinUniversity of Waterloo - Wen-tauYihMeta Platforms, Inc. + Wen-tauYihMeta Platforms, Inc. XilunChenMeta FAIR 30170-30186 Large language models (LLMs) have demonstrated strong effectiveness and robustness when fine-tuned as dense retrievers.However, their large parameter size presents significant computational challenges at inference time.While smaller retrievers offer better efficiency, they often fail to generalize effectively with limited supervised fine-tuning data.In this work, we introduce DRAMA, a training framework that leverages LLMs to train smaller generalizable dense retrievers.In particular, we adopt pruned LLMs as the backbone and train on diverse LLM-augmented data in a single-stage contrastive learning setup.Experiments show that DRAMA offers better multilingual and long-context capabilities than traditional encoder-based retrievers, and achieves strong performance across multiple tasks and languages. @@ -21214,7 +21214,7 @@ JananiD MansiGupta DanishPruthiIndian Institute of Science, Bangalore - Mitesh MKhapraIndian Institute of Technology, Madras + Mitesh MKhapraIndian Institute of Technology, Madras 30331-30380 Existing studies on fairness are largely Western-focused, making them inadequate for culturally diverse countries such as India. To address this gap, we introduce INDIC-BIAS, a comprehensive India-centric benchmark designed to evaluate fairness of LLMs across 85 identity groups encompassing diverse castes, religions, regions, and tribes. We first consult domain experts to curate over 1,800 socio-cultural topics spanning behaviors and situations, where biases and stereotypes are likely to emerge. Grounded in these topics, we generate and manually validate 20,000 real-world scenario templates to probe LLMs for fairness. We structure these templates into three evaluation tasks: plausibility, judgment, and generation. Our evaluation of 14 popular LLMs on these tasks reveals strong negative biases against marginalized identities, with models frequently reinforcing common stereotypes. Additionally, we find that models struggle to mitigate bias even when explicitly asked to rationalize their decision. Our evaluation provides evidence of both allocative and representational harms that current LLMs could cause towards Indian identities, calling for a more cautious usage in practical applications. We release INDIC-BIAS as an open-source benchmark to advance research on benchmarking and mitigating biases and stereotypes in the Indian context. 2025.acl-long.1465 @@ -21224,7 +21224,7 @@ <fixed-case>S</fixed-case>peech<fixed-case>IQ</fixed-case>: Speech-Agentic Intelligence Quotient Across Cognitive Levels in Voice Understanding by Large Language Models ZhenWan - Chao-Han HuckYangNVIDIA Research + Chao-Han HuckYangNVIDIA Research YahanYuKyoto University, Kyoto University JinchuanTian ShengLiInstitute of Science Tokyo @@ -21349,7 +21349,7 @@ AngLv YuhanLiu FloodSungMoonshot AI - WeiLiu + WeiLiu JianLuanXiaomi Corporation ShuoShang XiuyingChenMohamed bin Zayed University of Artificial Intelligence @@ -21429,7 +21429,7 @@ SayandeepSenInternational Business Machines PalaniKodeswaran AbhijitMishraUniversity of Texas at Austin and Apple - PushpakBhattacharyyaIndian Institute of Technology, Bombay, Dhirubhai Ambani Institute Of Information and Communication Technology + PushpakBhattacharyyaIndian Institute of Technology, Bombay, Dhirubhai Ambani Institute Of Information and Communication Technology 30639-30652 Recent advancements in large language models (LLMs) have significantly enhanced their ability to understand both natural language and code, driving their use in tasks like natural language-to-code (NL2Code) and code summarisation. However, LLMs are prone to hallucination—outputs that stray from intended meanings. Detecting hallucinations in code summarisation is especially difficult due to the complex interplay between programming and natural languages. We introduce a first-of-its-kind dataset, CodeSumEval, with ~10K samples, curated specifically for hallucination detection in code summarisation. We further propose a novel Entity Tracing Framework (ETF) that a) utilises static program analysis to identify code entities from the program and b) uses LLMs to map and verify these entities and their intents within generated code summaries. Our experimental analysis demonstrates the framework’s effectiveness, leading to a 73% F1 score. The proposed approach provides a method for detecting hallucinations by tracing entities from the summary to the code, allowing us to evaluate summary accuracy and localise the error within the summary. 2025.acl-long.1480 @@ -21483,7 +21483,7 @@ ChengxingZhou ZejunLi ZhihaoFanAlibaba Group - XuanjingHuangFudan University + XuanjingHuangFudan University ZhongyuWeiFudan University 30715-30727 Large Vision-Language Models (LVLMs) typically learn visual capacity through visual instruction tuning, involving updates to both a projector and their LLM backbones. Inspired by the concept of a visual region in the human brain, we investigate the existence of an analogous visual region within LLMs that functions as a cognitive core, and explore the potential of efficient training of LVLMs via selective layers tuning. Using Bunny-Llama-3-8B-V for detailed analysis and other three LVLMs for validation across diverse visual and textual tasks, we find that selectively updating 25% of LLMs layers, when sparsely and uniformly distributed, can preserve nearly 99% of visual performance and maintain or improve textual task results, while effectively reducing training time. Based on this targeted training approach, we further propose a novel visual region-based pruning paradigm, removing non-critical layers outside the visual region, which can achieve minimal performance loss. This study offers an effective and efficient strategy for LVLM training and inference by activating a layer-wise visual region within LLMs, which proves consistently effective across different models. @@ -21517,7 +21517,7 @@ YiboWang YangningLi KayLiuAmazon and University of Illinois Chicago - Philip S.YuUniversity of Illinois Chicago + Philip S.YuUniversity of Illinois Chicago 30750-30762 Test-time computing approaches, which leverage additional computational resources during inference, have been proven effective in enhancing large language model performance. This work introduces a novel, linearly scaling approach, TestNUC, that improves test-time predictions by leveraging the local consistency of neighboring unlabeled data-it classifies an input instance by considering not only the model’s prediction on that instance but also on neighboring unlabeled instances. We evaluate TestNUC across eight diverse datasets, spanning intent classification, topic mining, domain discovery, and emotion detection, demonstrating its consistent superiority over baseline methods such as standard prompting and self-consistency. Furthermore, TestNUC can be seamlessly integrated with existing test-time computing approaches, substantially boosting their performance. Our analysis reveals that TestNUC scales effectively with increasing amounts of unlabeled data and performs robustly across different embedding models, making it practical for real-world applications. Our code is available at https://github.com/HenryPengZou/TestNUC. 2025.acl-long.1486 @@ -21748,7 +21748,7 @@ ZhichaoDuan ZhenyuLi BowenDongTsinghua University, Tsinghua University and Tencent AI Lab - NingLiuShandong University + NingLiuShandong University XiuxingLiBeijing Institute of Technology JianyongWangTsinghua University, Tsinghua University 31102-31118 @@ -21777,7 +21777,7 @@ EunsuKim JunyeongPark JamesThorneKAIST - AliceOhGoogle and Korea Advanced Institute of Science and Technology + AliceOhGoogle and Korea Advanced Institute of Science and Technology 31137-31155 Text-to-image diffusion models have recently enabled the creation of visually compelling, detailed images from textual prompts. However, their ability to accurately represent various cultural nuances remains an open question. In our work, we introduce CULTDIFF benchmark, evaluating whether state-of-the-art diffusion models can generate culturally specific images spanning ten countries. We show that these models often fail to generate cultural artifacts in architecture, clothing, and food, especially for underrepresented country regions, by conducting a fine-grained analysis of different similarity aspects, revealing significant disparities in cultural relevance, description fidelity, and realism compared to real-world reference images. With the collected human evaluations, we develop a neural-based image-image similarity metric, namely, CULTDIFF-S, to predict human judgment on real and generated images with cultural artifacts. Our work highlights the need for more inclusive generative AI systems and equitable dataset representation over a wide range of cultures. 2025.acl-long.1503 @@ -21844,7 +21844,7 @@ JeremyBarnesUniversity of the Basque Country PabloGamalloUniversidad de Santiago de Compostela Iriade-Dios-Flores - RodrigoAgerriUniversity of the Basque Country + RodrigoAgerriUniversity of the Basque Country 31204-31218 We introduce a professionally translated extension of the TruthfulQA benchmark designed to evaluate truthfulness in Basque, Catalan, Galician, and Spanish. Truthfulness evaluations of large language models (LLMs) have primarily been focused on English. However, the ability of LLMs to maintain truthfulness across languages remains under-explored. Our study evaluates 12 state-of-the-art open LLMs, comparing base and instruction-tuned models using human evaluation, multiple-choice metrics, and LLM-as-a-Judge scoring. Our findings reveal that, while LLMs perform best in English and worst in Basque (the lowest-resourced language), overall truthfulness discrepancies across languages are smaller than anticipated. Furthermore, we show that LLM-as-a-Judge correlates more closely with human judgments than multiple-choice metrics, and that informativeness plays a critical role in truthfulness assessment. Our results also indicate that machine translation provides a viable approach for extending truthfulness benchmarks to additional languages, offering a scalable alternative to professional translation. Finally, we observe that universal knowledge questions are better handled across languages than context- and time-dependent ones, highlighting the need for truthfulness evaluations that account for cultural and temporal variability. Datasets, models and code are publicly available under open licenses. 2025.acl-long.1507 @@ -21865,7 +21865,7 @@ Batayan: A <fixed-case>F</fixed-case>ilipino <fixed-case>NLP</fixed-case> benchmark for evaluating Large Language Models - Jann RaileyMontalanAI Singapore and Ateneo de Manila University + Jann RaileyMontalanAI Singapore and Ateneo de Manila University Jimson PauloLayacan David DemitriAfrica Richell Isaiah S.FloresAteneo de Manila University @@ -21914,7 +21914,7 @@ Badr M.Abdullah WeiXue DietrichKlakow - BerndMöbiusUniversität des Saarlandes + BerndMöbiusUniversität des Saarlandes TaniaAvgustinova 31310-31322 Idioms are defined as a group of words with a figurative meaning not deducible from their individual components. Although modern machine translation systems have made remarkable progress, translating idioms remains a major challenge, especially for speech-to-text systems, where research on this topic is notably sparse. In this paper, we systematically evaluate idiom translation as compared to conventional news translation in both text-to-text machine translation (MT) and speech-to-text translation (SLT) systems across two language pairs (German to English, Russian to English). We compare state-of-the-art end-to-end SLT systems (SeamlessM4T SLT-to-text, Whisper Large v3) with MT systems (SeamlessM4T SLT-to-text, No Language Left Behind), Large Language Models (DeepSeek, LLaMA) and cascaded alternatives. Our results reveal that SLT systems experience a pronounced performance drop on idiomatic data, often reverting to literal translations even in higher layers, whereas MT systems and Large Language Models demonstrate better handling of idioms. These findings underscore the need for idiom-specific strategies and improved internal representations in SLT architectures. @@ -21932,7 +21932,7 @@ NunoGuimarãesINESC TEC ElisaSartori IonAndroutsopoulosAthens University of Economics and Business - PreslavNakovMohamed bin Zayed University of Artificial Intelligence + PreslavNakovMohamed bin Zayed University of Artificial Intelligence GiovanniDa San MartinoUniversity of Padua JakubPiskorski 31323-31345 @@ -22015,7 +22015,7 @@ JihyoungJangPohang University of Science and Technology MinwookBae MinjiKimPohang University of Science and Technology - DilekHakkani-TürUniversity of Illinois at Urbana-Champaign + DilekHakkani-TürUniversity of Illinois at Urbana-Champaign HyounghunKimPohang University of Science and Technology 31481-31512 As chatbots continue to evolve toward human-like, real-world, interactions, multimodality remains an active area of research and exploration. So far, efforts to integrate multimodality into chatbots have primarily focused on image-centric tasks, such as visual dialogue and image-based instructions, placing emphasis on the “eyes” of human perception while neglecting the “ears”, namely auditory aspects. Moreover, these studies often center around static interactions that focus on discussing the modality rather than naturally incorporating it into the conversation, which limits the richness of simultaneous, dynamic engagement. Furthermore, while multimodality has been explored in multi-party and multi-session conversations, task-specific constraints have hindered its seamless integration into dynamic, natural conversations. To address these challenges, this study aims to equip chatbots with “eyes and ears” capable of more immersive interactions with humans. As part of this effort, we introduce a new multimodal conversation dataset, Multimodal Multi-Session Multi-Party Conversation (M^3C), and propose a novel multimodal conversation model featuring multimodal memory retrieval. Our model, trained on the M^3C, demonstrates the ability to seamlessly engage in long-term conversations with multiple speakers in complex, real-world-like settings, effectively processing visual and auditory inputs to understand and respond appropriately. Human evaluations highlight the model’s strong performance in maintaining coherent and dynamic interactions, demonstrating its potential for advanced multimodal conversational agents. @@ -22406,7 +22406,7 @@ Emergent Abilities of Large Language Models under Continued Pre-training for Language Adaptation AhmedElhady - EnekoAgirreUniversity of the Basque Country (UPV/EHU) + EnekoAgirreUniversity of the Basque Country (UPV/EHU) MikelArtetxeReka AI 32174-32186 Continued pretraining (CPT) is a popular approach to adapt existing large language models (LLMs) to new languages. When doing so, it is common practice to include a portion of English data in the mixture, but its role has not been carefully studied to date. In this work, we show that including English does not impact validation perplexity, yet it is critical for the emergence of downstream capabilities in the target language. We introduce a language-agnostic benchmark for in-context learning (ICL), which reveals catastrophic forgetting early on CPT when English is not included. This in turn damages the ability of the model to generalize to downstream prompts as measured by perplexity, even if it does not manifest in terms of accuracy until later in training, and can be tied to a big shift in the model parameters. Based on these insights, we introduce curriculum learning and exponential moving average (EMA) of weights as effective alternatives to mitigate the need for English. All in all, our work sheds light into the dynamics by which emergent abilities arise when doing CPT for language adaptation, and can serve as a foundation to design more effective methods in the future. @@ -22444,7 +22444,7 @@ ShreyaHavaldarUniversity of Pennsylvania AdamStein EricWongUniversity of Pennsylvania - LyleUngar + LyleUngar 32213-32230 Successful communication depends on the speaker’s intended style (i.e., what the speaker is trying to convey) aligning with the listener’s interpreted style (i.e., what the listener perceives). However, cultural differences often lead to misalignment between the two; for example, politeness is often lost in translation. We characterize the ways that LLMs fail to translate style – biasing translations towards neutrality and performing worse in non-Western languages. We mitigate these failures with RASTA (Retrieval-Augmented STylistic Alignment), a method that leverages learned stylistic concepts to encourage LLM translation to appropriately convey cultural communication norms and align style. 2025.acl-long.1550 @@ -22496,7 +22496,7 @@ KarthikeyanNatesan RamamurthyInternational Business Machines PrasannaSattigeriIBM Research WernerGeyer - SoumyaGhoshMERCK & CO., INC. + SoumyaGhoshMERCK & CO., INC. 32291-32317 Despite the increasing use of large language models (LLMs) for context-grounded tasks like summarization and question-answering, understanding what makes an LLM produce a certain response is challenging. We propose Multi-Level Explanations for Generative Language Models (MExGen), a technique to provide explanations for context-grounded text generation. MExGen assigns scores to parts of the context to quantify their influence on the model’s output. It extends attribution methods like LIME and SHAP to LLMs used in context-grounded tasks where (1) inference cost is high, (2) input text is long, and (3) the output is text. We conduct a systematic evaluation, both automated and human, of perturbation-based attribution methods for summarization and question answering. The results show that our framework can provide more faithful explanations of generated output than available alternatives, including LLM self-explanations. We open-source code for MExGen as part of the ICX360 toolkit: https://github.com/IBM/ICX360. 2025.acl-long.1553 @@ -22598,7 +22598,7 @@ Miguel GonzálezSaizUniversidad Politécnica de Madrid GonzaloMartínezUniversidad Carlos III de Madrid Gonzalo SantamariaGomezInstituto de Ingeniería del Conocimiento - RodrigoAgerriUniversity of the Basque Country + RodrigoAgerriUniversity of the Basque Country Nuria AldamaGarcíaIIC LuisChiruzzoFacultad de Ingeniería - Universidad de la República - Uruguay JavierCondeUniversidad Politécnica de Madrid @@ -22800,7 +22800,7 @@ YuchenHu BoshengDing RuiruiChenInstitute of High Performance Computing, Singapore, A*STAR - ShafiqJotyNanyang Technological University and SalesForce.com + ShafiqJotyNanyang Technological University and SalesForce.com 32732-32758 Large language models (LLMs) have shown impressive few-shot generalization on many tasks via in-context learning (ICL). Despite their success in showing such emergent abilities, the scale and complexity of larger models also lead to unprecedentedly high computational demands and deployment challenges. In reaction, researchers explore transferring the powerful capabilities of larger models to more efficient and compact models by typically aligning the output of smaller (student) models with that of larger (teacher) models. Existing methods either train student models on the generated outputs of teacher models or imitate their token-level probability distributions. However, these distillation methods pay little to no attention to the input, which also plays a crucial role in ICL. Based on the finding that the performance of ICL is highly sensitive to the selection of demonstration examples, we propose Bidirectional Alignment (BiAlign) to fully leverage the models’ preferences for ICL examples to improve the ICL abilities of student models. Specifically, we introduce the alignment of input preferences between student and teacher models by incorporating a novel ranking loss, in addition to aligning the token-level output distribution. With extensive experiments and analysis, we demonstrate that BiAlign can consistently outperform existing baselines on a variety of tasks involving language understanding, reasoning, and coding. 2025.acl-long.1573 @@ -22977,7 +22977,7 @@ DhairyaSumanIndian Institute of Technology, Delhi Mohammed Safi Ur RahmanKhanIndian Institute of Technology, Madras, Dhirubhai Ambani Institute Of Information and Communication Technology and Indian Institute of Technology, Madras, Dhirubhai Ambani Institute Of Information and Communication Technology AnoopKunchukuttanMicrosoft and Indian Institute of Technology, Madras - Mitesh MKhapraIndian Institute of Technology, Madras + Mitesh MKhapraIndian Institute of Technology, Madras RajDabreDepartment of Computer Science, Indian Institute of Technology, Madras, Indian Institute of Technology, Madras and National Institute of Information and Communications Technology (NICT), National Institute of Advanced Industrial Science and Technology 32945-32966 Speech translation for Indian languages remains a challenging task due to the scarcity of large-scale, publicly available datasets that capture the linguistic diversity and domain coverage essential for real-world applications. Existing datasets cover a fraction of Indian languages and lack the breadth needed to train robust models that generalize beyond curated benchmarks. To bridge this gap, we introduce BhasaAnuvaad, the largest speech translation dataset for Indian languages, spanning over 44 thousand hours of audio and 17 million aligned text segments across 14 Indian languages and English. Our dataset is built through a threefold methodology: (a) aggregating high-quality existing sources, (b) large-scale web crawling to ensure linguistic and domain diversity, and (c) creating synthetic data to model real-world speech disfluencies. Leveraging BhasaAnuvaad, we train IndicSeamless, a state-of-the-art speech translation model for Indian languages that performs better than existing models. Our experiments demonstrate improvements in the translation quality, setting a new standard for Indian language speech translation. We will release all the code, data and model weights in the open-source, with permissive licenses to promote accessibility and collaboration. @@ -23153,7 +23153,7 @@ JincenziWu JianxunLian DingdongWang - Helen M.MengThe Chinese University of Hong Kong + Helen M.MengThe Chinese University of Hong Kong 33242-33271 Large Language Models (LLMs) are increasingly deployed worldwide, yet their ability to navigate cultural nuances remains underexplored. Misinterpreting cultural content can lead to AI-generated responses that are offensive or inappropriate, limiting their usability in global applications such as customer service, diplomatic communication, and online education. While prior research has evaluated cultural knowledge of LLMs, existing benchmarks fail to assess dynamic cultural competence-the ability to apply cultural knowledge effectively in real-world interactions. To address this gap, we introduce SocialDuolingo, a novel benchmark designed to evaluate cultural competence through multi-turn interactive intercultural scenarios. It comprises 3,060 human-written scenarios spanning 60 countries across six continents. Through extensive experiments on eight prominent LLMs, our findings reveal a significant gap between the cultural knowledge stored in these models and their ability to apply it effectively in cross-cultural communication. 2025.acl-long.1594 @@ -23240,7 +23240,7 @@ John JNayStanford University TanmayRajpurohitIndependent Researcher AshwinKalyanAllen Institute for Artificial Intelligence - BalaramanRavindranIndian Institute of Technology Madras + BalaramanRavindranIndian Institute of Technology Madras 33367-33390 We explore the ability of large language models (LLMs) to engage in subtle deception through strategically phrasing and intentionally manipulating information. This harmful behavior can be hard to detect, unlike blatant lying or unintentional hallucination. We build a simple testbed mimicking a legislative environment where a corporate lobbyist module is proposing amendments to bills that benefit a specific company while evading identification of this benefactor. We use real-world legislative bills matched with potentially affected companies to ground these interactions. Our results show that LLM lobbyists can draft subtle phrasing to avoid such identification by strong LLM-based detectors. Further optimization of the phrasing using LLM-based re-planning and re-sampling increases deception rates by up to 40 percentage points.Our human evaluations to verify the quality of deceptive generations and their retention of self-serving intent show significant coherence with our automated metrics and also help in identifying certain strategies of deceptive phrasing.This study highlights the risk of LLMs’ capabilities for strategic phrasing through seemingly neutral language to attain self-serving goals. This calls for future research to uncover and protect against such subtle deception. 2025.acl-long.1600 @@ -23250,7 +23250,7 @@ <fixed-case>A</fixed-case>fro<fixed-case>CS</fixed-case>-xs: Creating a Compact, High-Quality, Human-Validated Code-Switched Dataset for <fixed-case>A</fixed-case>frican Languages KayodeOlaleye - ArturoOncevayJ.P. Morgan Chase + ArturoOncevayJ.P. Morgan Chase MathieuSibueJ.P. Morgan Chase NombuyiseloZondi MichelleTerblancheUniversity of Pretoria @@ -23410,7 +23410,7 @@ A Measure of the System Dependence of Automated Metrics PiusVon DänikenUniversity of Zurich and ZHAW - Zürcher Hochschule für Angewandte Wissenschaften - Jan MilanDeriuZHAW - Zürcher Hochschule für Angewandte Wissenschaften + Jan MilanDeriuZHAW - Zürcher Hochschule für Angewandte Wissenschaften MarkCieliebakZurich University of Applied Sciences ZHAW 87-99 Automated metrics for Machine Translation have made significant progress, with the goal of replacing expensive and time-consuming human evaluations. These metrics are typically assessed by their correlation with human judgments, which captures the monotonic relationship between human and metric scores. However, we argue that it is equally important to ensure that metrics treat all systems fairly and consistently. In this paper, we introduce a method to evaluate this aspect. @@ -23422,7 +23422,7 @@ Call for Rigor in Reporting Quality of Instruction Tuning Data HyeonseokMoonKorea University JaehyungSeo - HeuiseokLim + HeuiseokLim 100-109 Instruction tuning is crucial for adapting large language models (LLMs) to align with user intentions. Numerous studies emphasize the significance of the quality of instruction tuning (IT) data, revealing a strong correlation between IT data quality and the alignment performance of LLMs. In these studies, the quality of IT data is typically assessed by evaluating the performance of LLMs trained with that data. However, we identified a prevalent issue in such practice: hyperparameters for training models are often selected arbitrarily without adequate justification. We observed significant variations in hyperparameters applied across different studies, even when training the same model with the same data. In this study, we demonstrate the potential problems arising from this practice and emphasize the need for careful consideration in verifying data quality. Through our experiments on the quality of LIMA data and a selected set of 1,000 Alpaca data points, we demonstrate that arbitrary hyperparameter decisions can make any arbitrary conclusion. 2025.acl-short.9 @@ -23523,7 +23523,7 @@ Improving Parallel Sentence Mining for Low-Resource and Endangered Languages ShuOkabeTechnische Universität München KatharinaHämmerl - AlexanderFraserTechnical University of Munich + AlexanderFraserTechnical University of Munich 196-205 While parallel sentence mining has been extensively covered for fairly well-resourced languages, pairs involving low-resource languages have received comparatively little attention.To address this gap, we present Belopsem, a benchmark of new datasets for parallel sentence mining on three language pairs where the source side is low-resource and endangered: Occitan-Spanish, Upper Sorbian-German, and Chuvash-Russian. These combinations also reflect varying linguistic similarity within each pair. We compare three language models in an established parallel sentence mining pipeline and apply two types of improvements to one of them, Glot500. We observe better mining quality overall by both applying alignment post-processing with an unsupervised aligner and using a cluster-based isotropy enhancement technique. These findings are crucial for optimising parallel data extraction for low-resource languages in a realistic way. 2025.acl-short.17 @@ -23560,20 +23560,20 @@ <fixed-case>LLM</fixed-case>s instead of Human Judges? A Large Scale Empirical Study across 20 <fixed-case>NLP</fixed-case> Evaluation Tasks AnnaBavarescoUniversity of Amsterdam - RaffaellaBernardiFree University of Bozen Bolzano + RaffaellaBernardiFree University of Bozen Bolzano LeonardoBertolazziUniversity of Trento DesmondElliottCopenhagen University and University of Copenhagen - RaquelFernándezUniversity of Amsterdam and University of Amsterdam + RaquelFernándezUniversity of Amsterdam and University of Amsterdam AlbertGattUtrecht University EsamGhaleb MarioGiulianelliDepartment of Computer Science, ETHZ - ETH Zurich MichaelHannaUniversity of Amsterdam AlexanderKollerSaarland University - AndreMartinsInstituto Superior Técnico and Unbabel + AndreMartinsInstituto Superior Técnico and Unbabel PhilippMondorfLudwig-Maximilians-Universität München VeraNeplenbroek SandroPezzelleUniversity of Amsterdam - BarbaraPlankLudwig-Maximilians-Universität München + BarbaraPlankLudwig-Maximilians-Universität München DavidSchlangenUniversity of Potsdam AlessandroSugliaHeriot-Watt University Aditya KSurikuchi @@ -23625,7 +23625,7 @@ Subword models struggle with word learning, but surprisal hides it BastianBunzeckUniversität Bielefeld - SinaZarrießBielefeld University + SinaZarrießBielefeld University 286-300 We study word learning in subword and character language models with the psycholinguistic lexical decision task. While subword LMs struggle to discern words and non-words with high accuracy, character LMs solve this task easily and consistently. Only when supplied with further contexts do subword LMs perform similarly to character models. Additionally, when looking at word-level and syntactic learning trajectories, we find that both processes are separable in character LMs. Word learning happens before syntactic learning, whereas both occur simultaneously in subword LMs. This raises questions about the adequacy of subword LMs for modeling language acquisition and positions character LMs as a viable alternative to study processes below the syntactic level. 2025.acl-short.24 @@ -23659,8 +23659,8 @@ <fixed-case>MUSTS</fixed-case>: <fixed-case>MU</fixed-case>ltilingual Semantic Textual Similarity Benchmark TharinduRanasingheLancaster University HansiHettiarachchiLancaster University - ConstantinOrasanUniversity of Surrey - RuslanMitkovLancaster University + ConstantinOrasanUniversity of Surrey + RuslanMitkovLancaster University 331-353 Predicting semantic textual similarity (STS) is a complex and ongoing challenge in natural language processing (NLP). Over the years, researchers have developed a variety of supervised and unsupervised approaches to calculate STS automatically. Additionally, various benchmarks, which include STS datasets, have been established to consistently evaluate and compare these STS methods. However, they largely focus on high-resource languages, mixed with datasets annotated focusing on relatedness instead of similarity and containing automatically translated instances. Therefore, no dedicated benchmark for multilingual STS exists. To solve this gap, we introduce the Multilingual Semantic Textual Similarity Benchmark (MUSTS), which spans 13 languages, including low-resource languages. By evaluating more than 25 models on MUSTS, we establish the most comprehensive benchmark of multilingual STS methods. Our findings confirm that STS remains a challenging task, particularly for low-resource languages. 2025.acl-short.27 @@ -23670,7 +23670,7 @@ Can Large Language Models Accurately Generate Answer Keys for Health-related Questions? DavisBartelsNational Institutes of Health - DeepakGuptaNational Institutes of Health + DeepakGuptaNational Institutes of Health DinaDemner-FushmanNational Library of Medicine 354-368 The evaluation of text generated by LLMs remains a challenge for question answering, retrieval augmented generation (RAG), summarization, and many other natural language processing tasks. Evaluating the factuality of LLM generated responses is particularly important in medical question answering, where the stakes are high. One method of evaluating the factuality of text is through the use of information nuggets (answer keys). Nuggets are text representing atomic facts that may be used by an assessor to make a binary decision as to whether the fact represented by said nugget is contained in an answer. Although manual nugget extraction is expensive and time-consuming, recent RAG shared task evaluations have explored automating the nuggetization of text with LLMs. In this work, we explore several approaches to nugget generation for medical question answering and evaluate their alignment with expert human nugget generation. We find providing an example and extracting nuggets from an answer to be the best approach to nuggetization. While, overall, we found the capabilities of LLMs to distill atomic facts limited, Llama 3.3 performed the best out of the models we tested. @@ -23755,7 +23755,7 @@ ZhipangWang YuHongSuzhou University WeihaoSunSoochow University - GuodongZhouSoochow University, China + GuodongZhouSoochow University, China 464-473 Implicit Discourse Relation Recognition (abbr., IDRR) is a NLP task of classifying argument pairs into different types of semantic relations. Arguments contain subtexts, some of which are beneficial to the perception of semantic relations. However, subtexts are connotative. The neural IDRR model fails to be aware of them without being given pertinent prompts. In this paper, we leverage LLaMA to generate subtexts for argument pairs, and verify the effectiveness of subtext-based IDRR. We construct an IDRR baseline using the decoder-only backbone LLaMA, and enhance it with subtext-aware relation reasoning. A confidence-diagnosed dual-channel network is used for collaboration between in-subtext and out-of-subtext IDRR. We experiment on PDTB-2.0 and PDTB-3.0 for both the main-level and secondary-level relation taxonomies. The test results show that our approach yields substantial improvements compared to the baseline, and achieves higher F1-scores on both benchmarks than the previous decoder-only IDRR models. We make the source codes and data publicly available. 2025.acl-short.35 @@ -23881,7 +23881,7 @@ HyeinSeo TaewookHwang YohanLeeElectronics and Telecommunications Research Institute - SangkeunJung + SangkeunJung 575-589 In English education tutoring, teacher feedback is essential for guiding students. Recently, AI-based tutoring systems have emerged to assist teachers; however, these systems require high-quality and large-scale teacher feedback data, which is both time-consuming and costly to generate manually. In this study, we propose FEAT, a cost-effective framework for generating teacher feedback, and have constructed three complementary datasets: (1) DIRECT-Manual (DM), where both humans and large language models (LLMs) collaboratively generate high-quality teacher feedback, albeit at a higher cost; (2) DIRECT-Generated (DG), an LLM-only generated, cost-effective dataset with lower quality;, and (3) DIRECT-Augmented (DA), primarily based on DG with a small portion of DM added to enhance quality while maintaining cost-efficiency. Experimental results showed that incorporating a small portion of DM (5–10%) into DG leads to superior performance compared to using 100% DM alone. 2025.acl-short.45 @@ -23925,7 +23925,7 @@ EricLe FerrandBoston College BoJiang JoshuaHartshorneMGH Institute of Health Professions - EmilyPrud’hommeauxBoston College + EmilyPrud’hommeauxBoston College 627-635 Incorporating automatic speech recognition (ASR) into field linguistics workflows for language documentation has become increasingly common. While ASR performance has seen improvements in low-resource settings, obstacles remain when training models on data collected by documentary linguists. One notable challenge lies in the way that this data is curated. ASR datasets built from spontaneous speech are typically recorded in consistent settings and transcribed by native speakers following a set of well designed guidelines. In contrast, field linguists collect data in whatever format it is delivered by their language consultants and transcribe it as best they can given their language skills and the quality of the recording. This approach to data curation, while valuable for linguistic research, does not always align with the standards required for training robust ASR models. In this paper, we explore methods for identifying speech transcriptions in fieldwork data that may be unsuitable for training ASR models. We focus on two complimentary automated measures of transcription quality that can be used to identify transcripts with characteristics that are common in field data but could be detrimental to ASR training. We show that one of the metrics is highly effective at retrieving these types of transcriptions. Additionally, we find that filtering datasets using this metric of transcription quality reduces WER both in controlled experiments using simulated fieldwork with artificially corrupted data and in real fieldwork corpora. 2025.acl-short.49 @@ -23948,7 +23948,7 @@ ÁlvaroVega-Hidalgo ArtemAbzaliev ThoreBergmanUniversity of Michigan - Ann Arbor - RadaMihalceaUniversity of Michigan + RadaMihalceaUniversity of Michigan 645-659 Acoustic individual identification of wild animals is an essential task for understanding animal vocalizations within their social contexts, and for facilitating conservation and wildlife monitoring efforts. However, most of the work in this space relies on human efforts, as the development of methods for automatic individual identification is hindered by the lack of data. In this paper, we explore cross-species pre-training to address the task of individual classification in white-faced capuchin monkeys. Using acoustic embeddings from birds and humans, we find that they can be effectively used to identify the calls from individual monkeys. Moreover, we find that joint multi-species representations can lead to further improvements over the use of one representation at a time. Our work demonstrates the potential of cross-species data transfer and multi-species representations, as strategies to address tasks on species with very limited data. 2025.acl-short.51 @@ -24173,7 +24173,7 @@ <fixed-case>T</fixed-case>iger<fixed-case>LLM</fixed-case> - A Family of <fixed-case>B</fixed-case>angla Large Language Models - NishatRaihan + NishatRaihan MarcosZampieriGeorge Mason University 887-896 The development of Large Language Models (LLMs) remains heavily skewed towards English and a few other high-resource languages. This linguistic disparity is particularly evident for Bangla - the 5th most spoken language. A few initiatives attempted to create open-source Bangla LLMs with performance still behind high-resource languages and limited reproducibility. To address this gap, we introduce TigerLLM - a family of Bangla LLMs. Our results demonstrate that these models surpass all open-source alternatives and also outperform larger proprietary models like GPT3.5 across standard benchmarks, establishing TigerLLM as the new baseline for future Bangla language modeling. @@ -24286,9 +24286,9 @@ Different Speech Translation Models Encode and Translate Speaker Gender Differently DennisFucci MarcoGaidoFondazione Bruno Kessler - MatteoNegriFondazione Bruno Kessler + MatteoNegriFondazione Bruno Kessler LuisaBentivogliFondazione Bruno Kessler - AndreMartinsInstituto Superior Técnico and Unbabel + AndreMartinsInstituto Superior Técnico and Unbabel GiuseppeAttanasioInstituto de Telecomunicações 1005-1019 Recent studies on interpreting the hidden states of speech models have shown their ability to capture speaker-specific features, including gender. Does this finding also hold for speech translation (ST) models? If so, what are the implications for the speaker’s gender assignment in translation? We address these questions from an interpretability perspective, using probing methods to assess gender encoding across diverse ST models. Results on three language directions (English \rightarrow French/Italian/Spanish) indicate that while traditional encoder-decoder models capture gender information, newer architectures—integrating a speech encoder with a machine translation system via adapters—do not. We also demonstrate that low gender encoding capabilities result in systems’ tendency toward a masculine default, a translation bias that is more pronounced in newer architectures. @@ -24304,7 +24304,7 @@ HaozheZhao YuchiWang QingyanGuo - BaobaoChangPeking University + BaobaoChangPeking University 1020-1029 Semantic Parsing aims to capture the meaning of a sentence and convert it into a logical, structured form. Previous studies show that semantic parsing enhances the performance of smaller models (e.g., BERT) on downstream tasks. However, it remains unclear whether the improvements extend similarly to LLMs. In this paper, our empirical findings reveal that, unlike smaller models, directly adding semantic parsing results into LLMs reduces their performance. To overcome this, we propose SENSE, a novel prompting approach that embeds semantic hints within the prompt. Experiments show that SENSE consistently improves LLMs’ performance across various tasks, highlighting the potential of integrating semantic information to improve LLM capabilities. 2025.acl-short.79 @@ -24388,8 +24388,8 @@ Fardin AhsanSakib ZiweiZhuGeorge Mason University Karen TristerGraceGeorge Mason University - MelihaYetisgenUniversity of Washington - OzlemUzunerGeorge Mason University + MelihaYetisgenUniversity of Washington + OzlemUzunerGeorge Mason University 1097-1106 Social determinants of health (SDOH) extraction from clinical text is critical for downstream healthcare analytics. Although large language models (LLMs) have shown promise, they may rely on superficial cues leading to spurious predictions. Using the MIMIC portion of the SHAC (Social History Annotation Corpus) dataset and focusing on drug status extraction as a case study, we demonstrate that mentions of alcohol or smoking can falsely induce models to predict current/past drug use where none is present, while also uncovering concerning gender disparities in model performance. We further evaluate mitigation strategies—such as prompt engineering and chain-of-thought reasoning—to reduce these false positives, providing insights into enhancing LLM reliability in health domains. 2025.acl-short.86 @@ -24459,7 +24459,7 @@ Rethinking Evaluation Metrics for Grammatical Error Correction: Why Use a Different Evaluation Process than Human? - TakumiGotoNara Institute of Science and Technology, Japan + TakumiGotoNara Institute of Science and Technology, Japan YusukeSakaiNara Institute of Science and Technology, Japan TaroWatanabeNara Institute of Science and Technology, Japan 1165-1172 @@ -24480,7 +24480,7 @@ AryaTalebzadehMeta AdityaTayadeFacebook SinongWangFacebook - ShafiqJotyNanyang Technological University and SalesForce.com + ShafiqJotyNanyang Technological University and SalesForce.com HanFangMeta AI HaoMaMeta 1173-1182 @@ -24492,7 +24492,7 @@ <fixed-case>W</fixed-case>i<fixed-case>C</fixed-case>ke<fixed-case>D</fixed-case>: A Simple Method to Make Multiple Choice Benchmarks More Challenging AhmedElhady - EnekoAgirreUniversity of the Basque Country (UPV/EHU) + EnekoAgirreUniversity of the Basque Country (UPV/EHU) MikelArtetxeReka AI 1183-1192 We introduce WiCkeD, a simple method to increase the complexity of existing multiple-choice benchmarks by randomly replacing a choice with “None of the above”, a method often used in educational tests. We show that WiCkeD can be automatically applied to any existing benchmark, making it more challenging. We apply WiCkeD to 6 popular benchmarks and use it to evaluate 18 open-weight LLMs. The performance of the models drops12.1 points on average with respect to the original versions of the datasets. When using chainof-thought on 3 MMLU datasets, the performance drop for the WiCkeD variant is similar to the one observed when using the LLMs directly, showing that WiCkeD is also challenging for models with enhanced reasoning abilities. WiCkeD also uncovers that some models are more sensitive to the extra reasoning required, providing additional information with respect to the original benchmarks.We relase our code and data at github.com/anonymized. @@ -24564,7 +24564,7 @@ <fixed-case>M</fixed-case>ap<fixed-case>Q</fixed-case>a<fixed-case>T</fixed-case>or: An Extensible Framework for Efficient Annotation of Map-Based <fixed-case>QA</fixed-case> Datasets Mahir LabibDihanBangladesh University of Engineering and Technology Mohammed EunusAliBangladesh University of Engineering and Technology - Md RizwanParvezQatar Computing Research Institute + Md RizwanParvezQatar Computing Research Institute 1-10 Mapping and navigation services like Google Maps, Apple Maps, OpenStreetMap, are essential for accessing various location-based data, yet they often struggle to handle natural language geospatial queries. Recent advancements in Large Language Models (LLMs) show promise in question answering (QA), but creating reliable geospatial QA datasets from map services remains challenging. We introduce MapQaTor, an extensible open-source framework that streamlines the creation of reproducible, traceable map-based QA datasets. MapQaTor enables seamless integration with any maps API, allowing users to gather and visualize data from diverse sources with minimal setup. By caching API responses, the platform ensures consistent ground truth, enhancing the reliability of the data even as real-world information evolves. MapQaTor centralizes data retrieval, annotation, and visualization within a single platform, offering a unique opportunity to evaluate the current state of LLM-based geospatial reasoning while advancing their capabilities for improved geospatial understanding. Evaluation metrics show that, MapQaTor speeds up the annotation process by at least 30 times compared to manual methods, underscoring its potential for developing geospatial resources, such as complex map reasoning datasets. The website is live at: https://mapqator.github.io/ and a demo video is available at: https://youtu.be/bVv7-NYRsTw. 2025.acl-demo.1 @@ -24578,7 +24578,7 @@ MarcoValentinoUniversity of Sheffield DaniloCarvalhoUniversity of Manchester DhairyaDalalUniversity of Galway - AndreFreitasIdiap Research Institute and University of Manchester + AndreFreitasIdiap Research Institute and University of Manchester 11-21 A persistent challenge in AI is the effective integration of material and formal inference - the former concerning the plausibility and contextual relevance of arguments, while the latter focusing on their logical and structural validity. Large Language Models (LLMs), by virtue of their extensive pre-training on large textual corpora, exhibit strong capabilities in material inference. However, their reasoning often lacks formal rigour and verifiability. At the same time, LLMs’ linguistic competence positions them as a promising bridge between natural and formal languages, opening up new opportunities for combining these two modes of reasoning.In this paper, we introduce PEIRCE, a neuro-symbolic framework designed to unify material and formal inference through an iterative conjecture–criticism process. Within this framework, LLMs play the central role of generating candidate solutions in natural and formal languages, which are then evaluated and refined via interaction with external critique models. These critiques include symbolic provers, which assess formal validity, as well as soft evaluators that measure the quality of the generated arguments along linguistic and epistemic dimensions such as plausibility, coherence, and parsimony. While PEIRCE is a general-purpose framework, we demonstrate its capabilities in the domain of natural language explanation generation - a setting that inherently demands both material adequacy and formal correctness. 2025.acl-demo.2 @@ -24593,10 +24593,10 @@ GeyuLinInstitute of Infocomm Research, A*STAR ShuoSun, A*STAR BinWang - WenyuZhangI2R, A*STAR + WenyuZhangI2R, A*STAR XunlongZouA*STAR - Nancy F.Chen - AiTiAwI2R + Nancy F.Chen + AiTiAwI2R 22-30 We introduce MERaLiON-AudioLLM, the first general-purpose audio-based large language model designed for multitask learning, with a particular focus on Singlish understanding. Trained on 62 million multimodal instruction samples comprising a total of 260k hours of audio, it exhibits strong generalization across a diverse set of tasks, including—but not limited to—automatic speech recognition, spoken question answering, speech translation, and paralinguistic analysis. Our results show significant improvements in local speech recognition and task-specific understanding, making MERaLiON-AudioLLM a leading solution for region-specific AI applications. An interactive demo has been developed to enable user-friendly interactions, supported by a backend with customized caching and load-balancing mechanisms. We benchmark the model across a broad range of multilingual and multitask scenarios, where it demonstrates competitive performance compared to other open-source models. The demo page, model weights and videos are publically accessible. 2025.acl-demo.3 @@ -24643,7 +24643,7 @@ TaoGuiFudan University XipengQiuFudan University QiZhangFudan University - XuanjingHuangFudan University + XuanjingHuangFudan University 40-50 We introduce MPLSandbox, an out-of-the-box multi-programming language sandbox designed to provide unified and comprehensive feedback from compiler and analysis tools for Large Language Models (LLMs). It can automatically identify the programming language of the code, compiling and executing it within an isolated sub-sandbox to ensure safety and stability. In addition, MPLSandbox integrates both traditional and LLM-based code analysis tools, providing a comprehensive analysis of generated code. It also can be effortlessly integrated into the training and deployment of LLMs to improve the quality and correctness of generated code. It also helps researchers streamline their workflows for various LLM-based code-related tasks, reducing the development cost. To validate the effectiveness of MPLSandbox, we conduct extensive experiments by integrating it into several training and deployment scenarios, and employing it to optimize workflows for a wide range of downstream code tasks. Our goal is to enhance researcher productivity on LLM-based code tasks by simplifying and automating workflows through delegation to MPLSandbox. 2025.acl-demo.5 @@ -24657,7 +24657,7 @@ YeshengLiu Jing-ShuZheng XuejingLi - Jin-GeYaoBAAI + Jin-GeYaoBAAI BowenQinBeijing Academy of Artificial Intelligence RichengXuan XiYangBeijing Academy of Artificial Intelligence @@ -24699,7 +24699,7 @@ Textagon: Boosting Language Models with Theory-guided Parallel Representations - John P.LalorUniversity of Notre Dame + John P.LalorUniversity of Notre Dame RuiyangQin DavidDobolyiUniversity of Colorado at Boulder AhmedAbbasiUniversity of Notre Dame @@ -24793,7 +24793,7 @@ HannaSuominenAustralian National University Lois YinghuiHong NickThiebergerUniversity of Melbourne - TrevorCohnGoogle and The University of Melbourne + TrevorCohnGoogle and The University of Melbourne EkaterinaVylomovaThe University of Melbourne 129-139 Machine translation (MT) systems that support low-resource languages often struggle on specialized domains. While researchers have proposed various techniques for domain adaptation, these approaches typically require model fine-tuning, making them impractical for non-technical users and small organizations. To address this gap, we propose Tulun, a versatile solution for terminology-aware translation, combining neural MT with large language model (LLM)-based post-editing guided by existing glossaries and translation memories.Our open-source web-based platform enables users to easily create, edit, and leverage terminology resources, fostering a collaborative human-machine translation process that respects and incorporates domain expertise while increasing MT accuracy.Evaluations show effectiveness in both real-world and benchmark scenarios: on medical and disaster relief translation tasks for Tetun and Bislama, our system achieves improvements of 16.90-22.41 ChrF++ points over baseline MT systems. Across six low-resource languages on the FLORES dataset, Tulun outperforms both standalone MT and LLM approaches, achieving an average improvement of 2.8 ChrF++ points over NLLB-54B. Tulun is publicly accessible at https://bislama-trans.rapha.dev. @@ -24834,7 +24834,7 @@ WenhaoZheng ZhengzhongLiuMohamed bin Zayed University of Artificial Intelligence HongyiWangRutgers University and GenBio AI - Eric P.XingMohamed bin Zayed Univeristy of AI and School of Computer Science, Carnegie Mellon University + Eric P.XingMohamed bin Zayed Univeristy of AI and School of Computer Science, Carnegie Mellon University HuaxiuYaoDepartment of Computer Science, University of North Carolina at Chapel Hill QirongHoMohamed bin Zayed University of Artificial Intelligence and Petuum, Inc. 159-166 @@ -24882,7 +24882,7 @@ DirkGroeneveldAllen Institute for Artificial Intelligence Rock YurenPang Pang WeiKohAllen Institute for Artificial Intelligence and University of Washington - Noah A.SmithUniversity of Washington and Allen Institute for Artificial Intelligence + Noah A.SmithUniversity of Washington and Allen Institute for Artificial Intelligence SophieLebrechtAllen Institute for Artificial Intelligence YejinChoiComputer Science Department, Stanford University and NVIDIA HannanehHajishirzi @@ -24996,7 +24996,7 @@ <fixed-case>G</fixed-case>en<fixed-case>GO</fixed-case> Ultra: an <fixed-case>LLM</fixed-case>-powered <fixed-case>ACL</fixed-case> Paper Explorer SotaroTakeshitaUniversität Mannheim TornikeTsereteliUniversität Mannheim - Simone PaoloPonzettoUniversität Mannheim + Simone PaoloPonzettoUniversität Mannheim 242-251 The ever-growing number of papers in natural language processing (NLP) poses the challenge of finding relevant papers. In our previous paper, we introduced GenGO, which complements NLP papers with various information, such as aspect-based summaries, to enable efficient paper exploration. While it delivers a better literature search experience, it lacks an interactive interface that dynamically produces information tailored to the user’s needs. To this end, we present an extension to our previous system, dubbed GenGO Ultra, which exploits large language models (LLMs) to dynamically generate responses grounded by published papers. We also conduct multi-granularity experiments to evaluate six text encoders and five LLMs. Our system is designed for transparency – based only on open-weight models, visible system prompts, and an open-source code base – to foster further development and research on top of our system: https://gengo-ultra.sotaro.io/ 2025.acl-demo.24 @@ -25024,7 +25024,7 @@ Du Q.Huynh MarkReynoldsUniversity of Western Australia YuanyiLuo - WeiLiuUniversity of Western Australia + WeiLiuUniversity of Western Australia 267-274 Acquiring structured data from domain-specific, image-based documents—such as scanned reports—is crucial for many downstream tasks but remains challenging due to document variability. Many of these documents exist as images rather than as machine-readable text, which requires human annotation to train automated extraction systems.We present DocSpiral, the first Human-in-the-Spiral assistive document annotation platform, designed to address the challenge of extracting structured information from domain-specific, image-based document collections.Our spiral design establishes an iterative cycle in which human annotations train models that progressively require less manual intervention. DocSpiral integrates document format normalization, comprehensive annotation interfaces, evaluation metrics dashboard, and API endpoints for the development of AI / ML models into a unified workflow.Experiments demonstrate that our framework reduces annotation time by at least 41% while showing consistent performance gains across three iterations during model training.By making this annotation platform freely accessible, we aim to lower barriers to AI/ML models development in document processing, facilitating the adoption of large language models in image-based, document-intensive fields such as geoscience and healthcare. The system is freely available at: https://app.ai4wa.com. The demonstration video is available: https://app.ai4wa.com/docs/docspiral/demo. 2025.acl-demo.26 @@ -25074,7 +25074,7 @@ RitaSevastjanovaETHZ - ETH Zurich RebeccaKehlbeckUniversität Konstanz TobiasStähle - Daniel A.KeimUniversität Konstanz + Daniel A.KeimUniversität Konstanz OliverDeussenUniversity of Konstanz AndreasSpitzUniversität Konstanz MennatallahEl-AssadyDepartment of Computer Science, ETHZ - ETH Zurich @@ -25117,7 +25117,7 @@ DimitraZografistouIndependent YohanJoSeoul National University JohnLawrenceUniversity of Dundee - ChrisReedUniversity of Dundee + ChrisReedUniversity of Dundee 318-328 Despite extensive research in Argument Mining (AM), the field faces significant challenges in limited reproducibility, difficulty in comparing systems due to varying task combinations, and a lack of interoperability caused by the heterogeneous nature of argumentation theory. These challenges are further exacerbated by the absence of dedicated tools, with most advancements remaining isolated research outputs rather than reusable systems. The \texttt{oAMF} (Open Argument Mining Framework) addresses these issues by providing an open-source, modular, and scalable platform that unifies diverse AM methods. Initially released with seventeen integrated modules, the \texttt{oAMF} serves as a starting point for researchers and developers to build, experiment with, and deploy AM pipelines while ensuring interoperability and allowing multiple theories of argumentation to co-exist within the same framework. Its flexible design supports integration via Python APIs, drag-and-drop tools, and web interfaces, streamlining AM development for research and industry setup, facilitating method comparison, and reproducibility. 2025.acl-demo.31 @@ -25129,7 +25129,7 @@ Bel Esprit: Multi-Agent Framework for Building <fixed-case>AI</fixed-case> Model Pipelines YunsuKimaiXplain, Inc. AhmedelmogtabaAbdelaziz - ThiagoCastro FerreiraUniversidade Federal de Minas Gerais + ThiagoCastro FerreiraUniversidade Federal de Minas Gerais MohamedAl-BadrashinyaiXplain HassanSawafaiXplain 329-339 @@ -25169,7 +25169,7 @@ Dialz: A Python Toolkit for Steering Vectors ZaraSiddique LiamTurnerCardiff University - LuisEspinosa-AnkeCardiff University and AMPLYFI + LuisEspinosa-AnkeCardiff University and AMPLYFI 363-375 We introduce *Dialz*, a Python library for advancing research on steering vectors for open-source LMs. Steering vectors allow users to modify activations at inference time to amplify or weaken a ‘concept’, e.g. honesty or positivity, providing a more powerful alternative to prompting or fine-tuning. Dialz supports a diverse set of tasks, including creating contrastive pair datasets, computing and applying steering vectors, and visualizations. Unlike existing libraries, Dialz emphasizes modularity and usability, enabling both rapid prototyping and in-depth analysis. We demonstrate how Dialz can be used to reduce harmful outputs such as stereotypes, while also providing insights into model behaviour across different layers. We release Dialz with full documentation, tutorials, and support for popular open-source models to encourage further research in safe and controllable language generation. Dialz enables faster research cycles and facilitates insights into model interpretability, paving the way for safer, more transparent, and more reliable AI systems. 2025.acl-demo.35 @@ -25180,7 +25180,7 @@ <fixed-case>FORG</fixed-case>3<fixed-case>D</fixed-case>: Flexible Object Rendering for Generating Vision-Language Spatial Reasoning Data from 3<fixed-case>D</fixed-case> Scenes OscarPangVector Institute and University of Toronto, Scarborough - FredaShiUniversity of Waterloo and Vector Institute + FredaShiUniversity of Waterloo and Vector Institute 376-384 We introduce FORG3D, a 3D rendering toolkit developed with Blender and Python, which synthesizes vision-language data for two primary purposes: (1) supporting human cognitive experiments that require fine-grained control over material and (2) analyzing and improving the visual reasoning capabilities of large vision-language models. The toolkit provides flexible and precise control over object placement, orientation, inter-object distances, and camera configurations while automatically generating detailed spatial metadata. Additionally, it includes a built-in feature for integrating AI-generated backgrounds, enhancing the realism of synthetic scenes. FORG3D is publicly available at https://github.com/compling-wat/FORG3D, and a video demonstration is available at https://www.youtube.com/watch?v=QvIqib_PU8A. 2025.acl-demo.36 @@ -25211,7 +25211,7 @@ TatsuyaIshigakiAIST, National Institute of Advanced Industrial Science and Technology KotaroFunakoshiInstitute of Science Tokyo HiroyaTakamuraAIST, National Institute of Advanced Industrial Science and Technology - ManabuOkumuraInstitute of Science Tokyo and Tokyo Institute of Technology, Tokyo Institute of Technology + ManabuOkumuraInstitute of Science Tokyo and Tokyo Institute of Technology, Tokyo Institute of Technology 394-404 Previous research on sports commentary generation has primarily focused on describing major events in the match.However, real-world commentary often includes comments beyond what is visible in the video content, e.g., “Florentina has acquired him for 7 million euros.”For enhancing the viewing experience with such background information,we developed an audio commentary system for football matches that generates utterances with background information, as well as play-by-play commentary.Our system first extracts visual information, and determines whether it is an appropriate timing to produce an utterance.Then it decides which type of utterance to generate: play-by-play or background information. In the latter case, the system leverages external knowledge through retrieval-augmented generation. 2025.acl-demo.38 @@ -25260,8 +25260,8 @@ Shafiuddin RehanAhmed Abijith TrichurRamachandran JeffreyFlaniganUniversity of California, Santa Cruz - MarthaPalmerUniversity of Colorado at Boulder - JamesMartinUniversity of Colorado at Boulder + MarthaPalmerUniversity of Colorado at Boulder + JamesMartinUniversity of Colorado at Boulder 426-435 In this paper, we present LiDARR (**Li**nking **D**ocument **A**MRs with **R**eferents **R**esolvers), a web tool for semantic annotation at the document level using the formalism of Abstract Meaning Representation (AMR). LiDARR streamlines the creation of comprehensive knowledge graphs from natural language documents through semantic annotation. The tool features a visualization and interactive user interface, transforming document-level AMR annotation into an models-facilitated verification process. This is achieved through the integration of an AMR-to-surface alignment model and a coreference resolution model. Additionally, we incorporate PropBank rolesets into LiDARR to extend implicit roles in annotated AMR, allowing implicit roles to be linked through the coreference chains via AMRs. 2025.acl-demo.41 @@ -25276,7 +25276,7 @@ SeunghyunYoonAdobe Research Viet DacLaiAdobe Systems FranckDernoncourt - TrungBuiAdobe Research + TrungBuiAdobe Research 436-447 While small language models (SLMs) show promises for mobile deployment, their real world performance and applications on smartphones remain underexplored. We present SlimLM, a series of SLMs optimized for document assistance tasks on mobile devices. Through extensive experiments on a Samsung Galaxy S24, we identify the sweet spot between model size (ranging from 125M to 8B parameters), context length, and inference time for efficient on-device processing. SlimLM is pretrained on SlimPajama-627B and fine-tuned on DocAssist, our constructed dataset for summarization, question answering, and suggestion tasks. Our smallest model demonstrates efficient performance on S24, while larger variants offer enhanced capabilities within mobile constraints. We evaluate SlimLM against existing SLMs, showing comparable or superior performance and offering a benchmark for future research in on-device language models. We provide an Android application allowing users to experience SlimLM’s document assistance capabilities, offering valuable insights for mobile developers, researchers, and companies seeking privacy-preserving on-device alternatives to server-based language models. 2025.acl-demo.42 @@ -25354,14 +25354,14 @@ <fixed-case>C</fixed-case>ode<fixed-case>A</fixed-case>rena: A Collective Evaluation Platform for <fixed-case>LLM</fixed-case> Code Generation MingzheDuNanyang Technological University and National University of Singapore - Anh TuanLuuNanyang Technological University + Anh TuanLuuNanyang Technological University BinJiNational University of Defense Technology XiaobaoWuNanyang Technological University YuhaoQingThe University of Hong Kong DongHuang Terry YueZhuoCommonwealth Scientific and Industrial Research Organisation, CSIRO QianLiuTiktok - See-KiongNgNational University of Singapore + See-KiongNgNational University of Singapore 502-512 Large Language Models (LLMs) have reshaped code generation by synergizing their exceptional comprehension of natural language and programming syntax, thereby substantially boosting developer productivity. These advancements have prompted numerous efforts to quantitatively evaluate their coding capabilities. However, persistent challenges, such as benchmark leakage, data dissipation, and limited system accessibility, continue to impede a timely and accurate assessment. To address these limitations, we introduce CodeArena, an online evaluation framework tailored for LLM code generation. Its key innovation is a collective evaluation mechanism, which dynamically recalibrates individual model scores based on the holistic performance of all participating models, mitigating score biases caused by widespread benchmark leakage. In addition, CodeArena ensures open access to all submitted solutions and test cases and provides automation-friendly APIs to streamline the code evaluation workflow. Our main contributions are: (1) a collective evaluation system for unbiased assessment, (2) a public repository of solutions and test cases, and (3) automation-ready APIs for seamless integration. 2025.acl-demo.48 @@ -25389,7 +25389,7 @@ gec-metrics: A Unified Library for Grammatical Error Correction Evaluation - TakumiGotoNara Institute of Science and Technology, Japan + TakumiGotoNara Institute of Science and Technology, Japan YusukeSakaiNara Institute of Science and Technology, Japan TaroWatanabeNara Institute of Science and Technology, Japan 524-534 @@ -25478,7 +25478,7 @@ ZheqiHeBeijing Academy of Artificial Intelligence Tongshuai.renTongshuai.ren XuejingLi - Jin-GeYaoBAAI + Jin-GeYaoBAAI XiYangBeijing Academy of Artificial Intelligence 583-591 We introduce FlagEval-Arena, an evaluation platform for side-by-side comparisons of large language models and text-driven AIGC systems.Compared with the well-known LM Arena (LMSYS Chatbot Arena), we reimplement our own framework with the flexibility to introduce new mechanisms or features. Our platform enables side-by-side evaluation not only for language models or vision-language models, but also text-to-image or text-to-video synthesis. We specifically target at Chinese audience with a more focus on the Chinese language, more models developed by Chinese institutes, and more general usage beyond the technical community. As a result, we currently observe very interesting differences from usual results presented by LM Arena. Our platform is available via this URL: https://flageval.baai.org/#/arena. @@ -25698,7 +25698,7 @@ <fixed-case>HYPEROFA</fixed-case>: Expanding <fixed-case>LLM</fixed-case> Vocabulary to New Languages via Hypernetwork-Based Embedding Initialization EnesÖzerenUniversity of Munich, Ludwig-Maximilians-Universität München YihongLiuLudwig-Maximilians-Universität München - HinrichSchuetze + HinrichSchuetze 79-96 Many pre-trained language models (PLMs) exhibit suboptimal performance on mid- and low-resource languages, largely due to limited exposure to these languages during pre-training. A common strategy to address this is to introduce new tokens specific to the target languages, initialize their embeddings, and apply continual pre-training on target-language data. Among such methods, OFA (Liu et al., 2024a) proposes a similarity-based subword embedding initialization heuristic that is both effective and efficient. However, OFA restricts target-language token embeddings to be convex combinations of a fixed number of source-language embeddings, which may limit expressiveness. To overcome this limitation, we propose HYPEROFA, a hypernetwork-based approach for more adaptive token embedding initialization. The hypernetwork is trained to map from an external multilingual word vector space to the PLM’s token embedding space using source-language tokens. Once trained, it can generate flexible embeddings for target-language tokens, serving as a good starting point for continual pretraining. Experiments demonstrate that HYPEROFA consistently outperforms random initialization baseline and matches or exceeds the performance of OFA in both continual pre-training convergence and downstream task performance. We make the code publicly available. 2025.acl-srw.6 @@ -25803,7 +25803,7 @@ Your Pretrained Model Tells the Difficulty Itself: A Self-Adaptive Curriculum Learning Paradigm for Natural Language Understanding QiFeng YihongLiuLudwig-Maximilians-Universität München - HinrichSchuetze + HinrichSchuetze 222-239 Curriculum learning is a widely adopted training strategy in natural language processing (NLP), where models are exposed to examples organized by increasing difficulty to enhance learning efficiency and performance. However, most existing approaches rely on manually defined difficulty metrics – such as text length – which may not accurately reflect the model’s own perspective. To overcome this limitation, we present a self-adaptive curriculum learning paradigm that prioritizes fine-tuning examples based on difficulty scores predicted by pre-trained language models (PLMs) themselves. Building on these scores, we explore various training strategies that differ in the ordering of examples for the fine-tuning: from easy-to-hard, hard-to-easy, to mixed sampling. We evaluate our method on four natural language understanding (NLU) datasets covering both binary and multi-class classification tasks.Experimental results show that our approach leads to faster convergence and improved performance compared to standard random sampling. 2025.acl-srw.15 @@ -25934,7 +25934,7 @@ JunyoungSon SungjinPark ChanjunParkKorea University - HeuiseokLim + HeuiseokLim 422-433 Retrieval-Augmented Generation (RAG) has emerged as a crucial framework in natural language processing (NLP), improving factual consistency and reducing hallucinations by integrating external document retrieval with large language models (LLMs). However, the effectiveness of RAG is often hindered by coreferential complexity in retrieved documents, which can introduce ambiguity and interfere with in-context learning. In this study, we systematically investigate how entity coreference affects both document retrieval and generative performance in RAG-based systems, focusing on retrieval relevance, contextual understanding, and overall response quality. We demonstrate that coreference resolution enhances retrieval effectiveness and improves question-answering (QA) performance. Through comparative analysis of different pooling strategies in retrieval tasks, we find that mean pooling demonstrates superior context capturing ability after applying coreference resolution. In QA tasks, we discover that smaller models show greater improvement from the disambiguation process, likely due to their limited inherent capacity for handling referential ambiguity. With these findings, this study aims to provide a deeper understanding of the challenges posed by coreferential complexity in RAG, offering guidance for improving retrieval and generation in knowledge-intensive AI applications. 2025.acl-srw.27 @@ -26041,7 +26041,7 @@ LauraZeidlerniversity of Technology Nuremberg ChrisJenkinsUniversity of Stuttgart, Universität Stuttgart FilipMiletićUniversity of Stuttgart - SabineSchulte Im WaldeUniversity of Stuttgart + SabineSchulte Im WaldeUniversity of Stuttgart 539-547 The task of automatic dialect classification is typically tackled using traditional machine-learning models with bag-of-words unigram features. We explore two alternative methods for distinguishing dialects across 20 Spanish-speaking countries:(i) Support vector machine and decision tree models were trained on dialectal features tailored to the Spanish dialects, combined with standard unigrams. (ii) A pre-trained BERT model was fine-tuned on the task.Results show that the tailored features generally did not have a positive impact on traditional model performance, but provide a salient way of representing dialects in a content-agnostic manner. The BERT model wins over traditional models but with only a tiny margin, while sacrificing explainability and interpretability. 2025.acl-srw.36 @@ -26063,7 +26063,7 @@ A Dual-Layered Evaluation of Geopolitical and Cultural Bias in <fixed-case>LLM</fixed-case>s SeanKimSeoul National University - Hyuhng JoonKimSeoul National University + Hyuhng JoonKimSeoul National University 580-595 As large language models (LLMs) are increasingly deployed across diverse linguistic and cultural contexts, understanding their behavior in both factual and disputable scenarios is essential—especially when their outputs may shape public opinion or reinforce dominant narratives. In this paper, we define two types of bias in LLMs: model bias (bias stemming from model training) and inference bias (bias induced by the language of the query), through a two-phase evaluation.Phase 1 evaluates LLMs on factual questions where a single verifiable answer exists, assessing whether models maintain consistency across different query languages. Phase 2 expands the scope by probing geopolitically sensitive disputes, where responses may reflect culturally embedded or ideologically aligned perspectives. We construct a manually curated dataset spanning both factual and disputable QA, across four languages and question types. The results show that Phase 1 exhibits query language-induced alignment, while Phase 2 reflects an interplay between the model’s training context and query language. This paper offers a structured framework for evaluating LLM behavior across neutral and sensitive topics, providing insights for future LLM deployment and culturally-aware evaluation practices in multilingual contexts.WARNING: this paper covers East Asian issues which may be politically sensitive. 2025.acl-srw.38 @@ -26078,7 +26078,7 @@ NarumiTokunaga YukiYamagata KoujiKozakiOsaka Electro-Communication University - YujiMatsumotoRIKEN Center for Advanced Intelligence Project + YujiMatsumotoRIKEN Center for Advanced Intelligence Project 596-607 Recognizing biomedical concepts in the text is vital for ontology refinement, knowledge graph construction, and concept relationship discovery. However, traditional concept recognition methods, relying on explicit mention identification, often fail to capture complex concepts not explicitly stated in the text. To overcome this limitation, we introduce MA-COIR, a framework that reformulates concept recognition as an indexing-recognition task. By assigning semantic search indexes (ssIDs) to concepts, MA-COIR resolves ambiguities in ontology entries and enhances recognition efficiency. Using a pretrained BART-based model fine-tuned on small datasets, our approach reduces computational requirements to facilitate adoption by domain experts. Furthermore, we incorporate large language model (LLM)-generated queries and synthetic data to improve recognition in low-resource settings. Experimental results on three scenarios (CDR, HPO, and HOIP) highlight the effectiveness of MA-COIR in recognizing both explicit and implicit concepts without the need for mention-level annotations during inference, advancing ontology-driven concept recognition in biomedical domain applications. Our code and constructed data are available at https://github.com/sl-633/macoir-master. 2025.acl-srw.39 @@ -26146,7 +26146,7 @@ Fact from Fiction: Finding Serialized Novels in Newspapers - PascaleFeldkamp + PascaleFeldkamp AlieLassche Katrine FrøkjærBaunvigNA KristofferNielboAarhus University @@ -26207,7 +26207,7 @@ RaduJianuNA AidanSlingsbyNA JoWoodCity University - PranavaMadhyasthaCity, University of London + PranavaMadhyasthaCity, University of London 760-773 We present a new dataset for chart question answering (CQA) constructed from visualization notebooks. The dataset features real-world, multi-view charts paired with natural language questions grounded in analytical narratives. Unlike prior benchmarks, our data reflects ecologically valid reasoning workflows. Benchmarking state-of-the-art multimodal large language models reveals a significant performance gap, with GPT-4.1 achieving an accuracy of 69.3%, underscoring the challenges posed by this more authentic CQA setting. 2025.acl-srw.50 @@ -26280,7 +26280,7 @@ Learning and Enforcing Context-Sensitive Control for <fixed-case>LLM</fixed-case>s MohammadAlbinhassan - PranavaMadhyasthaCity, University of London + PranavaMadhyasthaCity, University of London MarkLaw AlessandraRussoImperial College London 834-842 @@ -26308,7 +26308,7 @@ JulianSchlenkerUniversität Mannheim JennyKunzLinköping University TatianaAnikinaGerman Research Center for AI - GünterNeumannGerman Research Center for AI + GünterNeumannGerman Research Center for AI SimonOstermannGerman Research Center for AI 849-871 Most state-of-the-art large language models (LLMs) are trained mainly on English data, limiting their effectiveness on non-English, especially low-resource, languages. This study investigates whether language adapters can facilitate cross-lingual transfer in English-centric LLMs. We train language adapters for 13 languages using Llama 2 (7B) and Llama 3.1 (8B) as base models, and evaluate their effectiveness on two downstream tasks (MLQA and SIB-200) using either task adapters or in-context learning. Our results reveal that language adapters improve performance for languages not seen during pretraining, but provide negligible benefit for seen languages. These findings highlight the limitations of language adapters as a general solution for multilingual adaptation in English-centric LLMs. @@ -26443,7 +26443,7 @@ Improving Explainability of Sentence-level Metrics via Edit-level Attribution for Grammatical Error Correction - TakumiGotoNara Institute of Science and Technology, Japan + TakumiGotoNara Institute of Science and Technology, Japan JustinVasselli TaroWatanabeNara Institute of Science and Technology, Japan 1004-1015 @@ -26685,7 +26685,7 @@ RomanVashurinMBZUAI ArtemVazhentsevSkoltech/AIRI EkaterinaFadeevaETH Zürich - TimothyBaldwinMBZUAI + TimothyBaldwinMBZUAI 3-4 Large language models (LLMs) are widely used in NLP applications, but their tendency to produce hallucinations poses significant challenges to the reliability and safety, ultimately undermining user trust. This tutorial offers the first systematic introduction to uncertainty quantification (UQ) for LLMs in text generation tasks – a conceptual and methodological framework that provides tools for communicating the reliability of a model answer. This additional output could be leveraged for a range of downstream tasks, including hallucination detection and selective generation. We begin with the theoretical foundations of uncertainty, highlighting why techniques developed for classification might fall short in text generation. Building on this grounding, we survey state-of-the-art white-box and black-box UQ methods, from simple entropy-based scores to supervised probes over hidden states and attention weights, and show how they enable selective generation and hallucination detection. Additionally, we discuss the calibration of uncertainty scores for better interpretability. A key feature of the tutorial is practical examples using LM-Polygraph, an open-source framework that unifies more than a dozen recent UQ and calibration algorithms and provides a large-scale benchmark, allowing participants to implement UQ in their applications, as well as reproduce and extend experimental results with only a few lines of code. By the end of the session, researchers and practitioners will be equipped to (i) evaluate and compare existing UQ techniques, (ii) develop new methods, and (iii) implement UQ in their code for deploying safer, more trustworthy LLM-based systems. 2025.acl-tutorials.3 @@ -26696,8 +26696,8 @@ Human-<fixed-case>AI</fixed-case> Collaboration: How <fixed-case>AI</fixed-case>s Augment Human Teammates SherryWuCarnegie Mellon University DiyiYangStanford University - JosephChangAllen Institute for AI - Marti A.HearstUniversity of California, Berkeley + JosephChangAllen Institute for AI + Marti A.HearstUniversity of California, Berkeley KyleLoAllen Institute for AI 5-6 The continuous, rapid development of general-purpose models like LLMs suggests the theoretical possibility of AI performing any human task. Yet, despite the potential and promise, these models are far from perfect, excelling at certain tasks while struggling with others. The tension between what is possible and a model’s limitations raises the general research question that has attracted attention from various disciplines: What is the best way to use AI to maximize its benefits? In this tutorial, we will review recent developments related to human-AI teaming and collaboration. To the best of our knowledge, our tutorial will be the first to provide a more integrated view from NLP, HCI, Computational Social Science, and Learning Science, etc., and highlight how different communities have identified the goals and societal impacts of such collaborations, both positive and negative. We will further discuss how to operationalize these Human-AI collaboration goals, and reflect on how state-of-the-art AI models should be evaluated and scaffolded to make them most useful in collaborative contexts. @@ -26752,12 +26752,12 @@ Guardrails and Security for <fixed-case>LLM</fixed-case>s: Safe, Secure and Controllable Steering of <fixed-case>LLM</fixed-case> Applications TraianRebedeaNVIDIA / University Politehnica of Bucharest - LeonDerczynskiNVIDIA / ITU University of Copenhagen + LeonDerczynskiNVIDIA / ITU University of Copenhagen ShaonaGhoshNVIDIA Makesh NarsimhanSreedharNVIDIA FaezeBrahmanAllen Institute for AI LiweiJiangUniversity of Washington / NVIDIA - BoLiUniversity of Illinois at Urbana-Champaign + BoLiUniversity of Illinois at Urbana-Champaign YuliaTsvetkovUniversity of Washington ChristopherParisienNVIDIA YejinChoiStanford University / NVIDIA @@ -26858,7 +26858,7 @@ YiboYanThe Hong Kong University of Science and Technology ShenWang JiahaoHuoThe Hong Kong University of Science and Technology and Tongji University - Philip S.YuUniversity of Illinois Chicago + Philip S.YuUniversity of Illinois Chicago XumingHuThe Hong Kong University of Science and Technology (Guangzhou) and Hong Kong University of Science and Technology QingsongWenSquirrel Ai Learning 69-82 @@ -26982,7 +26982,7 @@ MehrzadSamadiParabricks Inc. and NVIDIA SeanNarenthiranNVIDIA AleksanderFicekNVIDIA - Wasi UddinAhmadNVIDIA + Wasi UddinAhmadNVIDIA JocelynHuangNVIDIA JagadeeshBalamNVIDIA BorisGinsburgNVIDIA @@ -27007,7 +27007,7 @@ JagadeeshBalamNVIDIA BorisGinsburgNVIDIA Yu-Chiang FrankWangNVIDIA and National Taiwan University - Chao-Han HuckYangNVIDIA Research + Chao-Han HuckYangNVIDIA Research 222-236 Construction of a general-purpose post-recognition error corrector poses a crucial question: how can we most effectively train a model on a large mixture of domain datasets? The answer would lie in learning dataset-specific features and digesting their knowledge in a single model. Previous methods achieve this by having separate correction language models, resulting in a significant increase in parameters. In this work, we present Mixture-of-Experts as a solution, highlighting that MoEs are much more than a scalability tool. We propose a Multi-Task Correction MoE, where we train the experts to become an “expert” of speech-to-text, language-to-text and vision-to-text datasets by learning to route each dataset’s tokens to its mapped expert. Experiments on the Open ASR Leaderboard show that we explore a new state-of-the-art performance by achieving an average relative 5.0% WER reduction and substantial improvements in BLEU scores for speech and translation tasks. On zero-shot evaluation, NeKo outperforms GPT-3.5 and Claude-3.5-Sonnet with 15.5% to 27.6% relative WER reduction in the Hyporadise benchmark. NeKo performs competitively on grammar and post-OCR correction as a multi-task model. 2025.acl-industry.17 @@ -27095,7 +27095,7 @@ YejinChoiComputer Science Department, Stanford University and NVIDIA Yu-Chiang FrankWangNVIDIA and National Taiwan University YutaNakashimaThe University of Osaka - Chao-Han HuckYangNVIDIA Research + Chao-Han HuckYangNVIDIA Research 295-309 Large Vision-Language Models (LVLMs) have transformed image captioning, shifting from concise captions to detailed descriptions. We introduce LOTUS, a leaderboard for evaluating detailed captions, addressing three main gaps in existing evaluations: lack of standardized criteria, bias-aware assessments, and user preference considerations. LOTUS comprehensively evaluates various aspects, including caption quality (e.g., alignment, descriptiveness), risks (e.g., hallucination), and societal biases (e.g., gender bias) while enabling preference-oriented evaluations by tailoring criteria to diverse user preferences. Our analysis of recent LVLMs reveals no single model excels across all criteria, while correlations emerge between caption detail and bias risks. Preference-oriented evaluations demonstrate that optimal model selection depends on user priorities. 2025.acl-industry.22 @@ -27138,7 +27138,7 @@ Efficient Out-of-Scope Detection in Dialogue Systems via Uncertainty-Driven <fixed-case>LLM</fixed-case> Routing ÁlvaroZaeraETHZ - ETH Zurich - Diana NicoletaPopa + Diana NicoletaPopa IvanSekulicTelepathyLabs PaoloRossoUniversity of Fribourg 328-335 @@ -27343,7 +27343,7 @@ XumingHuThe Hong Kong University of Science and Technology (Guangzhou) and Hong Kong University of Science and Technology WenhaoJiangGuangming Laboratory Hai-TaoZhengTsinghua University, Tsinghua University - Philip S.YuUniversity of Illinois Chicago + Philip S.YuUniversity of Illinois Chicago 553-567 Recently, Large Language Models (LLMs) have been widely studied by researchers for their roles in various downstream NLP tasks. As a fundamental task in the NLP field, Chinese Grammatical Error Correction (CGEC) aims to correct all potential grammatical errors in the input sentences. Previous studies have shown that LLMs’ performance as correctors on CGEC remains unsatisfactory due to the challenging nature of the task. To promote the CGEC field to better adapt to the era of LLMs, we rethink the roles of LLMs in the CGEC task so that they can be better utilized and explored in CGEC. Considering the rich grammatical knowledge stored in LLMs and their powerful semantic understanding capabilities, we utilize LLMs as explainers to provide explanation information to the CGEC small models during error correction, aiming to enhance performance. We also use LLMs as evaluators to bring more reasonable CGEC evaluations, thus alleviating the troubles caused by the subjectivity of the CGEC task. In particular, our work is also an active exploration of how LLMs and small models better collaborate in downstream tasks. Extensive experiment and detailed analyses on widely used datasets verify the effectiveness of our intuition and the proposed methods. 2025.acl-industry.39 @@ -27478,7 +27478,7 @@ MaximeDelmasIdiap Research Institute MagdalenaWysockaCRUK NBC Manchester Institute and Technical University of Gdansk DaniloGusicumaNA - AndreFreitasIdiap Research Institute and University of Manchester + AndreFreitasIdiap Research Institute and University of Manchester 693-705 The discovery of novel antibiotics is critical to address the growing antimicrobial resistance (AMR). However, pharmaceutical industries face high costs (over $1 billion), long timelines, and a high failure rate, worsened by the rediscovery of known compounds. We propose an LLM-based pipeline that acts as an alert system, detecting prior evidence of antibiotic activity to prevent costly rediscoveries. The system integrates literature on organisms and chemicals into a Knowledge Graph (KG), ensuring taxonomic resolution, synonym handling, and multi-level evidence classification. We tested the pipeline on a private list of 73 potential antibiotic-producing organisms, disclosing 12 negative hits for evaluation. The results highlight the effectiveness of the pipeline for evidence reviewing, reducing false negatives, and accelerating decision-making. The KG for negative hits as well as the user interface for interactive exploration are available at https://github.com/idiap/abroad-kg-store and https://github.com/idiap/abroad-demo-webapp. 2025.acl-industry.49 @@ -27574,7 +27574,7 @@ Enriching children’s stories with <fixed-case>LLM</fixed-case>s: Delivering multilingual data enrichment for children’s books at scale and across markets - ZarahWeissNextory AB + ZarahWeissNextory AB ChristofMeyerNA MikaelAnderssonNA 804-812 @@ -27953,7 +27953,7 @@ MizanurRahman AmranBhuiyan Mir TafseerNayeemUniversity of Alberta - ShafiqJotyNanyang Technological University and SalesForce.com + ShafiqJotyNanyang Technological University and SalesForce.com EnamulHoqueYork University JimmyHuangYork University and York University 1203-1216 @@ -28004,7 +28004,7 @@ SunHeNA Hock HuanGohNA Lung HsiangWongNA - Nancy F.Chen + Nancy F.Chen 1244-1253 The integration of generative artificial intelligence into educational applications has enhanced personalized and interactive learning experiences, and it shows strong potential to promote young learners language acquisition. However, it is still challenging to ensure consistent and robust performance across different languages and cultural contexts, and kids-friendly design requires simplified instructions, engaging interactions, and age-appropriate scaffolding to maintain motivation and optimize learning outcomes.In this work, we introduce SingaKids, a dialogic tutor designed to facilitate language learning through picture description tasks. Our system integrates dense image captioning, multilingual dialogic interaction, speech understanding, and engaging speech generation to create an immersive learning environment in four languages: English, Mandarin, Malay, and Tamil. We further improve the system through multilingual pre-training, task-specific tuning, and scaffolding optimization. Empirical studies with elementary school students demonstrate that SingaKids provides effective dialogic teaching, benefiting learners at different performance levels. 2025.acl-industry.86 @@ -28019,7 +28019,7 @@ Jeena JPrakash ShashiKumarEPFL - EPF Lausanne MalolanChetlurNA - AndreasStolckeUniphore Technologies + AndreasStolckeUniphore Technologies 1254-1262 There has been increasing interest in unifying streaming and non-streaming automatic speech recognition (ASR) models to reduce development, training, and deployment costs. We present a unified framework that trains a single end-to-end ASR model for both streaming and non-streaming applications, leveraging future context information. We propose to use dynamic right-context through the chunked attention masking in the training of zipformer-based ASR models. We demonstrate that using right-context is more effective in zipformer models compared to other conformer models due to its multi-scale nature. We analyze the effect of varying the number of right-context frames on accuracy and latency of the streaming ASR models. We use Librispeech and large in-house conversational datasets to train different versions of streaming and non-streaming models and evaluate them in a production grade server-client setup across diverse testsets of different domains. The proposed strategy reduces word error by relative 7.9% with a small degradation in user-perceived latency. By adding more right-context frames, we are able to achieve streaming performance close to that of non-streaming models. Our approach also allows flexible control of the latency-accuracy tradeoff according to customers requirements. 2025.acl-industry.87 @@ -28029,7 +28029,7 @@ A Semi-supervised Scalable Unified Framework for <fixed-case>E</fixed-case>-commerce Query Classification ChunyuanYuan - ChongZhang + ChongZhang ZhenFangNA MingPangJD.com XueJiang @@ -28100,7 +28100,7 @@ BJayaPrakashHyundai Motors India Engineering Pvt Ltd Chintalapalli RajaKullayappaHyundai Motor Company Mandala JagadeeshReddy - PushpakBhattacharyyaIndian Institute of Technology, Bombay, Dhirubhai Ambani Institute Of Information and Communication Technology + PushpakBhattacharyyaIndian Institute of Technology, Bombay, Dhirubhai Ambani Institute Of Information and Communication Technology 1322-1338 In-car AI assistants enhance driving by enabling hands-free interactions, yet they often struggle with multi-turn conversations and fail to handle cognitively complex follow-up questions. This limits their effectiveness in real-world deployment. To address this limitation, we propose a framework that leverages Bloom’s Taxonomy to systematically generate follow-up questions with increasing cognitive complexity and a Gricean-inspired evaluation framework to assess their Logical Consistency, Informativeness, Relevance, and Clarity. We introduce a dataset comprising 750 human-annotated seed questions and 3750 follow-up questions, with human evaluation confirming that 96.68% of the generated questions adhere to the intended Bloom’s Taxonomy levels. Our approach, validated through both LLM-based and human assessments, also identifies the specific cognitive complexity level at which in-car AI assistants begin to falter information that can help developers measure and optimize key cognitive aspects of conversational performance. 2025.acl-industry.93 @@ -28201,7 +28201,7 @@ <fixed-case>REVISE</fixed-case>: A Framework for Revising <fixed-case>OCR</fixed-case>ed text in Practical Information Systems with Data Contamination Strategy GyuhoShimKorea University SeongtaeHongKorea University - HeuiseokLim + HeuiseokLim 1423-1434 Recent advances in large language models (LLMs) have significantly improved Document AI, demonstrating remarkable performance on document understanding tasks such as question answering. However, existing approaches primarily focus on solving specific tasks, lacking the capability to structurally organize and systematically manage document information. To address this limitation, we propose Revise, a framework that systematically corrects errors introduced by OCR at the character, word, and structural levels. Specifically, Revise employs a comprehensive hierarchical taxonomy of common OCR errors and a synthetic data generation strategy that realistically simulates such errors to train an effective correction model. Experimental results demonstrate that Revise effectively corrects OCR outputs, enabling more structured representation and systematic management of document contents. Consequently, our method significantly enhances downstream performance in document retrieval and question answering tasks, highlighting the potential to overcome the structural management limitations of existing Document AI frameworks. 2025.acl-industry.100 @@ -28254,7 +28254,7 @@ SameerPimparkhede Srikanth G.TamilselvamInternational Business Machines PrinceKumarInternational Business Machines - PushpakBhattacharyyaIndian Institute of Technology, Bombay, Dhirubhai Ambani Institute Of Information and Communication Technology + PushpakBhattacharyyaIndian Institute of Technology, Bombay, Dhirubhai Ambani Institute Of Information and Communication Technology 1466-1479 System-level programming is essential for modern enterprise infrastructure, enabling the automation and management of complex systems through declarative code. Developers write this code based on schemas, which themselves are a form of code that defines constraints like data types and required fields. These schemas help ensure operational correctness and smooth integration across systems. However, as enterprise schemas become complex, manually writing code adhering to these constraints becomes challenging for developers. Large Language Models (LLMs) have demonstrated potential in code generation and natural language understanding, particularly in zero-shot and few-shot settings. However, applying LLMs to handle constraints represented in code, essential for system-level programming rather than natural language, has not been explored. Hence, we introduce ConCodeEval, a study across two key dimensions: format and constraint efficacy, with a first-of-its-kind benchmark involving two novel experiments for code constraints across five representations (JSON, YAML, XML, Python, and natural language). Our findings suggest that conscious choice of representations can lead to optimal use of LLMs in enterprise use cases involving constraints. Nonetheless, LLMs continue to struggle significantly with code constraints, motivating the need for innovation in this direction. 2025.acl-industry.104 diff --git a/data/xml/2025.africanlp.xml b/data/xml/2025.africanlp.xml index b09a79594a..9d8480cd7d 100644 --- a/data/xml/2025.africanlp.xml +++ b/data/xml/2025.africanlp.xml @@ -5,7 +5,7 @@ Proceedings of the Sixth Workshop on African Natural Language Processing (AfricaNLP 2025) ConstantineLignos IdrisAbdulmumin - DavidAdelani + DavidAdelani Association for Computational Linguistics
Vienna, Austria
July @@ -271,7 +271,7 @@ Sani AbdullahiSani Ali UsmanUmarFederal University of Lafia TajuddeenGwadabeMasakhane Research Foundation - KennethChurchNortheastern University + KennethChurchNortheastern University VukosiMarivateUniversity of Pretoria 176-191 Hausa Natural Language Processing (NLP) has gained increasing attention in recent years, yet remains understudied as a low-resource language despite having over 120 million first-language (L1) and 80 million second-language (L2) speakers worldwide. While significant advances have been made in high-resource languages, Hausa NLP faces persistent challenges including limited open-source datasets and inadequate model representation. This paper presents an overview of the current state of Hausa NLP, systematically examining existing resources, research contributions, and gaps across fundamental NLP tasks: text classification, machine translation, named entity recognition, speech recognition, and question answering. We introduce HausaNLP, a curated catalog that aggregates datasets, tools, and research works to enhance accessibility and drive further development. Furthermore, we discuss challenges in integrating Hausa into large language models (LLMs), addressing issues of suboptimal tokenization, and dialectal variation. Finally, we propose strategic research directions emphasizing dataset expansion, improved language modeling approaches, and strengthened community collaboration to advance Hausa NLP. Our work provides both a foundation for accelerating Hausa NLP progress and valuable insights for broader multilingual NLP research. @@ -351,7 +351,7 @@
<fixed-case>Y</fixed-case>-<fixed-case>NQ</fixed-case>: <fixed-case>E</fixed-case>nglish-<fixed-case>Y</fixed-case>orùbá Evaluation dataset for Open-Book Reading Comprehension with Open-Ended Questions - Marta R.Costa-jussàMeta + Marta R.Costa-jussàMeta JoyChenGeorgia Institute of Technology and Facebook IfeAdebara JoeChuangFAIR diff --git a/data/xml/2025.aielpl.xml b/data/xml/2025.aielpl.xml index 4dc4c0bc3f..0375e726de 100644 --- a/data/xml/2025.aielpl.xml +++ b/data/xml/2025.aielpl.xml @@ -6,7 +6,7 @@ María Isabel RivasGinel PatrickCadwell PaoloCanavese - SilviaHansen-Schirra + SilviaHansen-Schirra MartinKappus AnnaMatamala WillNoonan @@ -24,9 +24,9 @@ Leveraging Large Language Models for Joint Linguistic and Technical Accessibility Improvement: A Case Study on University Webpages - PierretteBouillon + PierretteBouillon JohannaGerlach - RaphaelRubino + RaphaelRubino 1–13 The aim of the study presented in this paper is to investigate whether Large Language Models can be leveraged to translate French content from existing websites into their B1-level simplified versions and to integrate them into an accessible HTML structure. We design a CMS agnostic approach to webpage accessibility improvement based on prompt engineering and apply it to Geneva University webpages. We conduct several automatic and manual evaluations to measure the accessibility improvement reached by several LLMs with various prompts in a zero-shot setting. Results show that LLMs are not all suitable for the task, while a large disparity is observed among results reached by different prompts. Manual evaluation carried out by a dyslexic crowd shows that some LLMs could produce more accessible websites and improve access to information. 2025.aielpl-1.1 @@ -34,7 +34,7 @@ How Artificial Intelligence can help in the Easy-to-Read Adaptation of Numerical Expressions in <fixed-case>S</fixed-case>panish - Mari CarmenSuárez-Figueroa + Mari CarmenSuárez-Figueroa AlejandroMuñoz-Navarro IsamDiab 14–24 @@ -90,7 +90,7 @@ Do professionally adapted texts follow existing Easy-to-Understand (<fixed-case>E</fixed-case>2<fixed-case>U</fixed-case>) language guidelines? A quantitative analysis of two professionally adapted corpora AndreeaDeleanu - ConstantinOrăsan + ConstantinOrăsan ShenbinQian AnastasiiaBezobrazova SabineBraun diff --git a/data/xml/2025.aisd.xml b/data/xml/2025.aisd.xml index 36ceeed258..3b94453d50 100644 --- a/data/xml/2025.aisd.xml +++ b/data/xml/2025.aisd.xml @@ -3,8 +3,8 @@ Proceedings of the 1st Workshop on AI and Scientific Discovery: Directions and Opportunities - PeterJansen - BhavanaDalvi Mishra + PeterJansen + BhavanaDalvi Mishra HarshTrivedi BodhisattwaPrasad Majumder TomHope diff --git a/data/xml/2025.alp.xml b/data/xml/2025.alp.xml index fd9cc33439..e65576316b 100644 --- a/data/xml/2025.alp.xml +++ b/data/xml/2025.alp.xml @@ -8,7 +8,7 @@ BinLi YudongLiu Marco C.Passarotti - RacheleSprugnoli + RacheleSprugnoli Association for Computational Linguistics
The Albuquerque Convention Center, Laguna
May @@ -114,7 +114,7 @@ Evaluating Evaluation Metrics for <fixed-case>A</fixed-case>ncient <fixed-case>C</fixed-case>hinese to <fixed-case>E</fixed-case>nglish Machine Translation Eric R.Bennett - HyoJungHan + HyoJungHan XinchenYang AndrewSchonebaum MarineCarpuat @@ -206,7 +206,7 @@ FarzanehGoshtasb NadiaHajipour EhsaneddinAsgari - HosseinSameti + HosseinSameti 137-149 The study of historical languages presents unique challenges due to their complex ortho-graphic systems, fragmentary textual evidence, and the absence of standardized digital repre-sentations of text in those languages. Tack-ling these challenges needs special NLP digi-tal tools to handle phonetic transcriptions and analyze ancient texts. This work introduces ParsiPy1, an NLP toolkit designed to facili-tate the analysis of historical Persian languages by offering modules for tokenization, lemma-tization, part-of-speech tagging, phoneme-to-transliteration conversion, and word embed-ding. We demonstrate the utility of our toolkit through the processing of Parsig (Middle Per-sian) texts, highlighting its potential for ex-panding computational methods in the study of historical languages. Through this work, we contribute to the field of computational philol-ogy, offering tools that can be adapted for the broader study of ancient texts and their digital preservation. 2025.alp-1.17 @@ -351,7 +351,7 @@ Finetuning <fixed-case>LLM</fixed-case>s for <fixed-case>E</fixed-case>va<fixed-case>C</fixed-case>un 2025 token prediction shared task JosefJon - OndřejBojar + OndřejBojar 221-225 In this paper, we present our submission for the token prediction task of EvaCun 2025. Our sys-tems are based on LLMs (Command-R, Mistral, and Aya Expanse) fine-tuned on the task data provided by the organizers. As we only pos-sess a very superficial knowledge of the subject field and the languages of the task, we simply used the training data without any task-specific adjustments, preprocessing, or filtering. We compare 3 different approaches (based on 3 different prompts) of obtaining the predictions, and we evaluate them on a held-out part of the data. 2025.alp-1.29 diff --git a/data/xml/2025.americasnlp.xml b/data/xml/2025.americasnlp.xml index f070a25dc5..49e6a104f1 100644 --- a/data/xml/2025.americasnlp.xml +++ b/data/xml/2025.americasnlp.xml @@ -10,7 +10,7 @@ KatharinaVon Der Wense LuisChiruzzo RolandoCoto-Solano - ArturoOncevay + ArturoOncevay Association for Computational Linguistics
Albuquerque, New Mexico
May @@ -39,7 +39,7 @@ Does a code-switching dialogue system help users learn conversational fluency in <fixed-case>C</fixed-case>hoctaw? JacquelineBrixeyUSC Institute for Creative Technologies - DavidTraumUniversity of Southern California Institute for Creative Technologies + DavidTraumUniversity of Southern California Institute for Creative Technologies 8-17 We investigate the learning outcomes and user response to a chatbot for practicing conversational Choctaw, an endangered American Indigenous language. Conversational fluency is a goal for many language learners, however, for learners of endangered languages in North America, access to fluent speakers may be limited. Chatbots are potentially ideal dialogue partners as this kind of dialogue system fulfills a non-authoritative role by focusing on carrying on a conversation as an equal conversational partner. The goal of the chatbot investigated in this work is to serve as a conversational partner in the absence of a fluent Choctaw-speaking human interlocutor. We investigate the impact of code-switching in the interaction, comparing a bilingual chatbot against a monolingual Choctaw version. We evaluate the systems for user engagement and enjoyment, as well as gains in conversational fluency from interacting with the system. 2025.americasnlp-1.2 @@ -84,7 +84,7 @@ PaolaInnesNational Autonomous University of Mexico (UNAM) JavierSantillanHoneynet Project CynthiaMontañoUniversity of Cailfornia, Berkeley - FrancisTyersIndiana University + FrancisTyersIndiana University 38-47 This work presents Py-elotl, a suite of tools and resources in Python for processing text in several indigenous languages spoken in Mexico. These resources include parallel corpora, linguistic taggers/analyzers, and orthographic normalization tools. This work aims to develop essential resources to support language pre-processing and linguistic research, and the future creation of more complete downstream applications that could be useful for the speakers and enhance the visibility of these languages. The current version supports language groups such as Nahuatl, Otomi, Mixtec, and Huave. This project is open-source and freely available for use and collaboration 2025.americasnlp-1.5 diff --git a/data/xml/2025.analogyangle.xml b/data/xml/2025.analogyangle.xml index 6091efef7c..39b7f4a047 100644 --- a/data/xml/2025.analogyangle.xml +++ b/data/xml/2025.analogyangle.xml @@ -26,10 +26,10 @@ Tore-Klose: Record Scorer, Goal Hunter, Machine? Human Association Norms for <fixed-case>G</fixed-case>erman Personal Name Compounds AnneroseEichelUniversity of Stuttgart, Universität Stuttgart TanaDeeg - AndreBlessingUniversity of Stuttgart, Universität Stuttgart + AndreBlessingUniversity of Stuttgart, Universität Stuttgart MilenaBelosevicUniversität Bielefeld SabineArndt-LappeTrier University - SabineSchulte Im WaldeUniversity of Stuttgart + SabineSchulte Im WaldeUniversity of Stuttgart 1-9 We present a collection of human association norms to German personal name compounds (PNCs) such as “Tore-Klose” (goal-Klose) and corresponding full names (Miroslav Klose), thus providing a novel testbed for PNC evaluation, i.e., analogical vs. contrastive positive vs. negative perception effects. The associations are obtained in an online experiment with German native speakers, analyzed regarding our novel intertwined PNC–person association setup, and accompanied by an LLM synthetic generation approach for augmentation. 2025.analogyangle-1.1 @@ -52,7 +52,7 @@ ValerioBasileUniversity of Turin DaniloCroce CristinaBoscoUniversity of Turin - RobertoBasiliUniversity of Roma, Tor Vergata + RobertoBasiliUniversity of Roma, Tor Vergata 22-36 Few-shot learning via in-context learning (ICL) is widely used in NLP, but its effectiveness is highly sensitive to example selection, often leading to unstable performance. To address this, we introduce BacKGen, a framework for generating structured Background Knowledge (BK) as an alternative to instance-based prompting. Our approach leverages Frame Semantics to uncover recurring conceptual patterns across data instances, clustering examples based on shared event structures and semantic roles. These patterns are then synthesized into generalized knowledge statements using a large language model (LLM) and injected into prompts to support contextual reasoning beyond surface-level cues. We apply BacKGen to Sentiment Phrase Classification (SPC), a task where polarity judgments frequently depend on implicit commonsense knowledge. In this setting, BK serves as an abstract representation of prototypical scenarios, enabling schematic generalization to help the model perform analogical reasoning by mapping new inputs onto generalized event structures. Experimental results with Mistral-7B and Llama3-8B demonstrate that BK-based prompting consistently outperforms standard few-shot approaches, achieving up to 29.94% error reduction. 2025.analogyangle-1.3 @@ -75,9 +75,9 @@ Prompting Metaphoricity: Soft Labeling with Large Language Models in Popular Communication of Science Tweets in <fixed-case>S</fixed-case>panish AlecSánchez-MonteroUniversidad Nacional Autónoma de México - GemmaBel-EnguixUniversidad Nacional Autonoma de Mexico + GemmaBel-EnguixUniversidad Nacional Autonoma de Mexico Sergio-LuisOjeda-Trueba - GerardoSierraUniversidad Nacional Autónoma de México + GerardoSierraUniversidad Nacional Autónoma de México 45-56 In this paper, we explore how large language models (LLMs) can be used to assign soft labels for metaphoricity in Popular Communication of Science (PCS) tweets written in Spanish. Instead of treating metaphors as a binary yes/no phenomenon, we focus on their graded nature and the variability commonly found in human annotations. Through a combination of prompt design and quantitative evaluation over a stratified sample of our dataset, we show that GPT-4 can assign probabilistic scores not only for general metaphoricity but also for specific metaphor types with consistency (Direct, Indirect, and Personification). The results show that, while LLMs align reasonably well with average human judgments for some categories, capturing the subtle patterns of inter-annotator disagreement remains a challenge. We present a corpus of 3,733 tweets annotated with LLM-generated soft labels, a valuable resource for further metaphor analysis in scientific discourse and figurative language annotation with LLMs. 2025.analogyangle-1.5 diff --git a/data/xml/2025.argmining.xml b/data/xml/2025.argmining.xml index 87eef4ad90..83d8c205cc 100644 --- a/data/xml/2025.argmining.xml +++ b/data/xml/2025.argmining.xml @@ -4,7 +4,7 @@ Proceedings of the 12th Argument mining Workshop ElenaChistova - PhilippCimiano + PhilippCimiano ShohrehHaddadan GabriellaLapesa RamonRuiz-Dolz @@ -26,7 +26,7 @@ “The Facts Speak for Themselves”: <fixed-case>GPT</fixed-case> and Fallacy Classification ErisaBytyqiUniversität Passau - AnnetteHautli-JaniszUniversität Passau + AnnetteHautli-JaniszUniversität Passau 1-10 Fallacies are not only part and parcel of human communication, they are also important for generative models in that fallacies can be tailored to self-verify the output they generate. Previous work has shown that fallacy detection and classification is tricky, but the question that still remains is whether the use of theoretical explanations in prompting Large Language Models (LLMs) on the task enhances the performance of the models. In this paper we show that this is not the case: Using the pragma-dialectics approach to fallacies (van Eemeren, 1987), we show that three GPT models struggle with the task. Based on our own PD-oriented dataset of fallacies and an extension of an existing fallacy dataset from Jin et al. (2022), we show that this is not only the case for fallacies “in the wild”, but also for textbook examples of fallacious arguments. Our paper also supports the claim that LLMs generally lag behind in fallacy classification in comparison to smaller-scale neural models. 2025.argmining-1.1 @@ -51,7 +51,7 @@ DavideCeolin EmmanuelleDietzAirbus Klara MaximilianeGutekunst - AnnetteHautli-JaniszUniversität Passau + AnnetteHautli-JaniszUniversität Passau CristiánSantibáñezUniversidad Catolica de La Santísima Concepción JodiSchneiderUniversity of Illinois, Urbana Champaign JonasScholzUniversity of Groningen @@ -79,7 +79,7 @@ Old but Gold: <fixed-case>LLM</fixed-case>-Based Features and Shallow Learning Methods for Fine-Grained Controversy Analysis in <fixed-case>Y</fixed-case>ou<fixed-case>T</fixed-case>ube Comments DavideBassi Erik BranMarino - RenataVieiraInstituto de Inteligência Artificial na Saúde and Universidade de Evora + RenataVieiraInstituto de Inteligência Artificial na Saúde and Universidade de Evora MartinPereiraUniversity of Santiago de Compostela 46-57 Online discussions can either bridge differences through constructive dialogue or amplify divisions through destructive interactions. paper proposes a computational approach to analyze dialogical relation patterns in YouTube comments, offering a fine-grained framework for controversy detection, enabling also analysis of individual contributions. experiments demonstrate that shallow learning methods, when equipped with these theoretically-grounded features, consistently outperform more complex language models in characterizing discourse quality at both comment-pair and conversation-chain levels.studies confirm that divisive rhetorical techniques serve as strong predictors of destructive communication patterns. work advances understanding of how communicative choices shape online discourse, moving beyond engagement metrics toward nuanced examination of constructive versus destructive dialogue patterns. @@ -125,7 +125,7 @@ DebelaGemechu RamonRuiz-DolzUniversity of Dundee JohnLawrenceUniversity of Dundee - ChrisReedUniversity of Dundee + ChrisReedUniversity of Dundee 100-106 The Open Argument Mining Framework (oAMF) addresses key challenges in argument mining research which still persist despite the field’s impressive growth. Researchers often face difficulties with cross-system comparisons, incompatible representation languages, and limited access to reusable tools. The oAMF introduces a standardised yet flexible architecture that enables seamless component benchmarking, rapid pipeline prototyping using elements from diverse research traditions, and unified evaluation methodologies that preserve theoretical compatibility. By reducing technical overhead, the framework allows researchers to focus on advancing core argument mining capabilities rather than reimplementing infrastructure, fostering greater collaboration at a time when computational reasoning is increasingly vital in the era of large language models. 2025.argmining-1.9 @@ -135,7 +135,7 @@ Argumentative Analysis of Legal Rulings: A Structured Framework Using Bobbitt’s Typology CarlottaGiacchettaUniversity of Trento - RaffaellaBernardiFree University of Bozen Bolzano + RaffaellaBernardiFree University of Bozen Bolzano BarbaraMontini JacopoStaianoUniversity of Trento SerenaTomasi @@ -172,7 +172,7 @@ MartinGruberEberhard-Karls-Universität Tübingen ZlataKiktevaUniversität Passau IgnazRutterUniversität Passau - AnnetteHautli-JaniszUniversität Passau + AnnetteHautli-JaniszUniversität Passau 140-146 Television debates play a key role in shaping public opinion, however, the rapid exchange of viewpoints in these settings often makes it difficult to perceive the underlying nature of the discussion. While there exist several debate visualisation techniques, to the best of our knowledge, none of them emphasise the argumentative dynamics in particular. With DebArgVis, we present a new interactive debate visualisation tool that leverages data annotated with argumentation structures to demonstrate how speaker interactions unfold over time, enabling users to deepen their comprehension of the debate. 2025.argmining-1.13 @@ -207,7 +207,7 @@ Stance-aware Definition Generation for Argumentative Texts NataliaEvgrafova LoicDe Langhe - VéroniqueHoste + VéroniqueHoste ElsLefever 168-180 Definition generation models trained on dictionary data are generally expected to produce neutral and unbiased output while capturing the contextual nuances. However, previous studies have shown that generated definitions can inherit biases from both the underlying models and the input context. This paper examines the extent to which stance-related bias in argumentative data influences the generated definitions. In particular, we train a model on a slang-based dictionary to explore the feasibility of generating persuasive definitions that concisely reflect opposing parties’ understandings of contested terms. Through this study, we provide new insights into bias propagation in definition generation and its implications for definition generation applications and argument mining. @@ -286,7 +286,7 @@ Overview of the Critical Questions Generation Shared Task BlancaCalvo FiguerasHiTZ Basque Center for Language Technology - Ixa, University of the Basque Country UPV/EHU , Spain - RodrigoAgerriHiTZ Basque Center for Language Technology - Ixa, University of the Basque Country UPV/EHU , Spain + RodrigoAgerriHiTZ Basque Center for Language Technology - Ixa, University of the Basque Country UPV/EHU , Spain MaiteHerediaHiTZ Basque Center for Language Technology - Ixa, University of the Basque Country UPV/EHU , Spain JaioneBengoetxeaHiTZ Basque Center for Language Technology - Ixa, University of the Basque Country UPV/EHU , Spain ElenaCabrioUniversity of Côte d’Azur and member of the Inria-I3S research team Wimmics @@ -404,7 +404,7 @@ <fixed-case>COGNAC</fixed-case> at <fixed-case>CQ</fixed-case>s-Gen 2025: Generating Critical Questions with <fixed-case>LLM</fixed-case>-Assisted Prompting and Multiple <fixed-case>RAG</fixed-case> Variants Azwad AnjumIslam Tisa IslamErana - Mark A.Finlayson + Mark A.Finlayson 340-348 We describe three approaches to solving the Critical Questions Generation Shared Task at ArgMining 2025. The task objective is to automatically generate critical questions that challenge the strength, validity, and credibility of a given argumentative text. The task dataset comprises debate statements (“interventions”) annotated with a list of named argumentation schemes and associated with a set of critical questions (CQs). Our three Retrieval-Augmented Generation (RAG)-based approaches used in-context example selection based on (1) embedding the intervention, (2) embedding the intervention plus manually curated argumentation scheme descriptions as supplementary context, and (3) embedding the intervention plus a selection of associated CQs and argumentation scheme descriptions. We developed the prompt templates through GPT-4o-assisted analysis of patterns in validation data and the task-specific evaluation guideline. All three of our submitted systems outperformed the official baselines (0.44 and 0.53) with automatically computed accuracies of 0.62, 0.58, and 0.61, respectively, on the test data, with our first method securing the 2nd place in the competition (0.63 manual evaluation). Our results highlight the efficacy of LLM-assisted prompt development and RAG-enhanced generation in crafting contextually relevant critical questions for argument analysis. 2025.argmining-1.33 @@ -415,7 +415,7 @@ <fixed-case>T</fixed-case>ri<fixed-case>LL</fixed-case>a<fixed-case>M</fixed-case>a at <fixed-case>CQ</fixed-case>s-Gen 2025: A Two-Stage <fixed-case>LLM</fixed-case>-Based System for Critical Question Generation FriesoTurkstra SaraNabhani - KhalidAl-Khatib + KhalidAl-Khatib 349-357 This paper presents a new system for generating critical questions in debates, developed for the Critical Questions Generation shared task. Our two-stage approach, combining generation and classification, utilizes LLaMA 3.1 Instruct models (8B, 70B, 405B) with zero-/few-shot prompting. Evaluations on annotated debate data reveal several key insights: few-shot generation with 405B yielded relatively high-quality questions, achieving a maximum possible punctuation score of 73.5. The 70B model outperformed both smaller and larger variants on the classification part. The classifiers showed a strong bias toward labeling generated questions as Useful, despite limited validation. Further, our system, ranked 6 extsuperscriptth, out-performed baselines by 3%. These findings stress the effectiveness of large-sized models for question generation and medium-sized models for classification, and suggest the need for clearer task definitions within prompts to improve classification accuracy. 2025.argmining-1.34 diff --git a/data/xml/2025.at4ssl.xml b/data/xml/2025.at4ssl.xml index a7b821bfcd..ae1b7dc1b3 100644 --- a/data/xml/2025.at4ssl.xml +++ b/data/xml/2025.at4ssl.xml @@ -57,7 +57,7 @@ <fixed-case>P</fixed-case>a<fixed-case>SC</fixed-case>o1: A Parallel Video-<fixed-case>S</fixed-case>i<fixed-case>GML</fixed-case> <fixed-case>S</fixed-case>wiss <fixed-case>F</fixed-case>rench <fixed-case>S</fixed-case>ign <fixed-case>L</fixed-case>anguage Corpus in Medical Domain BastienDavid - PierretteBouillon + PierretteBouillon JonathanMutal IreneStrasly JohannaGerlach diff --git a/data/xml/2025.bea.xml b/data/xml/2025.bea.xml index 71bb4cb0f1..64f7a74245 100644 --- a/data/xml/2025.bea.xml +++ b/data/xml/2025.bea.xml @@ -125,7 +125,7 @@ Adapting <fixed-case>LLM</fixed-case>s for Minimal-edit Grammatical Error Correction RyszardStaruchAdam Mickiewicz University - FilipGralinskiAdam Mickiewicz University / Snowflake + FilipGralinskiAdam Mickiewicz University / Snowflake DanielDzienisiewiczAdam Mickiewicz University 118-128 Decoder-only large language models have shown superior performance in the fluency-edit English Grammatical Error Correction, but their adaptation for minimal-edit English GEC is still underexplored. To improve their effectiveness in the minimal-edit approach, we explore the error rate adaptation topic and propose a novel training schedule method. Our experiments set a new state-of-the-art result for a single-model system on the BEA-test set. We also detokenize the most common English GEC datasets to match the natural way of writing text. During the process, we find that there are errors in them. Our experiments analyze whether training on detokenized datasets impacts the results and measure the impact of the usage of the datasets with corrected erroneous examples. To facilitate reproducibility, we have released the source code used to train our models. @@ -138,7 +138,7 @@ ZhengyuanLiuInstitute for Infocomm Research, A*STAR Stella XinYinNanyang Technological University, Singapore Dion Hoe-LianGohNanyang Technological University, Singapore - NancyChenInstitute for Infocomm Research, A*STAR + NancyChenInstitute for Infocomm Research, A*STAR 129-143 While Generative AI has demonstrated strong potential and versatility in content generation, its application to educational contexts presents several challenges. Models often fail to align with curriculum standards and maintain grade-appropriate reading levels consistently. Furthermore, STEM education poses additional challenges in balancing scientific explanations with everyday language when introducing complex and abstract ideas and phenomena to younger students.In this work, we propose COGENT, a curriculum-oriented framework for generating grade-appropriate educational content. We incorporate three curriculum components (science concepts, core ideas, and learning objectives), control readability through length, vocabulary, and sentence complexity, and adopt a “wonder-based” approach to increase student engagement and interest. We conduct a multi-dimensional evaluation via both LLM-as-a-judge and human expert analysis. Experimental results show that COGENT consistently produces grade-appropriate passages that are comparable or superior to human references. Our work establishes a viable approach for scaling adaptive and high-quality learning resources. 2025.bea-1.10 @@ -170,7 +170,7 @@ Automatic concept extraction for learning domain modeling: A weakly supervised approach using contextualized word embeddings KordulaDe KuthyUniversität Tübingen LeanderGirrbachUniverstät Tübingen - DetmarMeurersLeibniz-Institut für Wissensmedien (IWM) + DetmarMeurersLeibniz-Institut für Wissensmedien (IWM) 175-185 Heterogeneity in student populations poses achallenge in formal education, with adaptivetextbooks offering a potential solution by tai-loring content based on individual learner mod-els. However, creating domain models for text-books typically demands significant manual ef-fort. Recent work by Chau et al. (2021) demon-strated automated concept extraction from dig-ital textbooks, but relied on costly domain-specific manual annotations. This paper in-troduces a novel, scalable method that mini-mizes manual effort by combining contextu-alized word embeddings with weakly super-vised machine learning. Our approach clustersword embeddings from textbooks and identi-fies domain-specific concepts using a machinelearner trained on concept seeds automaticallyextracted from Wikipedia. We evaluate thismethod using 28 economics textbooks, com-paring its performance against a tf-idf baseline,a supervised machine learning baseline, theRAKE keyword extraction method, and humandomain experts. Results demonstrate that ourweakly supervised method effectively balancesaccuracy with reduced annotation effort, offer-ing a practical solution for automated conceptextraction in adaptive learning environments. 2025.bea-1.13 @@ -267,7 +267,7 @@ Do <fixed-case>LLM</fixed-case>s Give Psychometrically Plausible Responses in Educational Assessments? AndreasSäuberliLMU Munich DiegoFrassinelliLMU - BarbaraPlankLMU Munich + BarbaraPlankLMU Munich 266-278 Knowing how test takers answer items in educational assessments is essential for test development, to evaluate item quality, and to improve test validity. However, this process usually requires extensive pilot studies with human participants. If large language models (LLMs) exhibit human-like response behavior to test items, this could open up the possibility of using them as pilot participants to accelerate test development. In this paper, we evaluate the human-likeness or psychometric plausibility of responses from 18 instruction-tuned LLMs with two publicly available datasets of multiple-choice test items across three subjects: reading, U.S. history, and economics. Our methodology builds on two theoretical frameworks from psychometrics which are commonly used in educational assessment, classical test theory and item response theory. The results show that while larger models are excessively confident, their response distributions can be more human-like when calibrated with temperature scaling. In addition, we find that LLMs tend to correlate better with humans in reading comprehension items compared to other subjects. However, the correlations are not very strong overall, indicating that LLMs should not be used for piloting educational assessments in a zero-shot setting. 2025.bea-1.21 @@ -485,7 +485,7 @@ BasharAlhafniNew York University KirillChirkunovMBZUAI NizarHabashNew York University Abu Dhabi - TedBriscoeMBZUAI + TedBriscoeMBZUAI 549-563 Automated Essay Scoring (AES) plays a crucial role in assessing language learners’ writingquality, reducing grading workload, and providing real-time feedback. The lack of annotatedessay datasets inhibits the development of Arabic AES systems. This paper leverages LargeLanguage Models (LLMs) and Transformermodels to generate synthetic Arabic essays forAES. We prompt an LLM to generate essaysacross the Common European Framework ofReference (CEFR) proficiency levels and introduce and compare two approaches to errorinjection. We create a dataset of 3,040 annotated essays with errors injected using our twomethods. Additionally, we develop a BERTbased Arabic AES system calibrated to CEFRlevels. Our experimental results demonstratethe effectiveness of our synthetic dataset in improving Arabic AES performance. We makeour code and data publicly available 2025.bea-1.40 @@ -581,7 +581,7 @@ Lessons Learned in Assessing Student Reflections with <fixed-case>LLM</fixed-case>s MohamedElarabyUniversity of Pittsburgh - DianeLitmanUniversity of Pittsburgh + DianeLitmanUniversity of Pittsburgh 672-686 Advances in Large Language Models (LLMs) have sparked growing interest in their potential as explainable text evaluators. While LLMs have shown promise in assessing machine-generated texts in tasks such as summarization and machine translation, their effectiveness in evaluating human-written content—such as student writing in classroom settings—remains underexplored. In this paper, we investigate LLM-based specificity assessment of student reflections written in response to prompts, using three instruction-tuned models. Our findings indicate that although LLMs may underperform compared to simpler supervised baselines in terms of scoring accuracy, they offer a valuable interpretability advantage. Specifically, LLMs can generate user-friendly explanations that enhance the transparency and usability of automated specificity scoring systems. 2025.bea-1.48 @@ -646,7 +646,7 @@ Improving In-context Learning Example Retrieval for Classroom Discussion Assessment with Re-ranking and Label Ratio Regulation NhatTranUniversity of Pittsburgh - DianeLitmanUniversity of Pittsburgh + DianeLitmanUniversity of Pittsburgh BenjaminPierceUniversity of Pittsburgh RichardCorrentiUniversity of Pittsburgh Lindsay ClareMatsumuraUniversity of Pittsburgh @@ -670,7 +670,7 @@ Assessing Critical Thinking Components in <fixed-case>R</fixed-case>omanian Secondary School Textbooks: A Data Mining Approach to the <fixed-case>ROTEX</fixed-case> Corpus MadalinaChitezWest University of Timisoara - LiviuDinuUniversity of Bucharest + LiviuDinuUniversity of Bucharest MariusMicluta-CampeanuUniversity of Bucharest Ana-MariaBucurInterdisciplinary School of Doctoral Studies RoxanaRogobeteWest University of Timișoara @@ -697,7 +697,7 @@ Beyond Linear Digital Reading: An <fixed-case>LLM</fixed-case>-Powered Concept Mapping Approach for Reducing Cognitive Load JunzhiHanEmory University - Jinho D.ChoiEmory University + Jinho D.ChoiEmory University 805-817 This paper presents an LLM-powered approach for generating concept maps to enhance digital reading comprehension in higher education. While particularly focused on supporting neurodivergent students with their distinct information processing patterns, this approach benefits all learners facing the cognitive challenges of digital text. We use GPT-4o-mini to extract concepts and relationships from educational texts across ten diverse disciplines using open-domain prompts without predefined categories or relation types, enabling discipline-agnostic extraction. Section-level processing achieved higher precision (83.62%) in concept extraction, while paragraph-level processing demonstrated superior recall (74.51%) in identifying educationally relevant concepts. We implemented an interactive web-based visualization tool https://simplified-cognitext.streamlit.app that transforms extracted concepts into navigable concept maps. User evaluation (n=14) showed that participants experienced a 31.5% reduction in perceived cognitive load when using concept maps, despite spending more time with the visualization (22.6% increase). They also completed comprehension assessments more efficiently (14.1% faster) with comparable accuracy. This work demonstrates that LLM-based concept mapping can significantly reduce cognitive demands while supporting non-linear exploration. 2025.bea-1.58 @@ -883,7 +883,7 @@ A Framework for Proficiency-Aligned Grammar Practice in <fixed-case>LLM</fixed-case>-Based Dialogue Systems LuisaRibeiro-FluchtUniversity of Tuebingen XiaobinChenTübingen Universität - DetmarMeurersLeibniz-Institut für Wissensmedien (IWM) + DetmarMeurersLeibniz-Institut für Wissensmedien (IWM) 978-987 Communicative practice is critical for second language development, yet learners often lack targeted, engaging opportunities to use new grammar structures. While large language models (LLMs) can offer coherent interactions, they are not inherently aligned with pedagogical goals or proficiency levels. In this paper, we explore how LLMs can be integrated into a structured framework for contextually-constrained, grammar-focused interaction, building on an existing goal-oriented dialogue system. Through controlled simulations, we evaluate five LLMs across 75 A2-level tasks under two conditions: (i) grammar-targeted, task-anchored prompting and (ii) the addition of a lightweight post-generation validation pipeline using a grammar annotator.Our findings show that template-based prompting alone substantially increases target-form coverage up to 91.4% for LLaMA 3.1-70B-Instruct, while reducing overly advanced grammar usage. The validation pipeline provides an additional boost in form-focused tasks, raising coverage to 96.3% without significantly degrading appropriateness. 2025.bea-1.74 @@ -1170,7 +1170,7 @@ <fixed-case>NLIP</fixed-case> at <fixed-case>BEA</fixed-case> 2025 Shared Task: Evaluation of Pedagogical Ability of <fixed-case>AI</fixed-case> Tutors TrishitaSahaIIT Hyderabad ShrenikGanguliIIT Hyderabad - Maunendra SankarDesarkarIIT Hyderabad + Maunendra SankarDesarkarIIT Hyderabad 1242-1253 This paper describes the system created for the BEA 2025 Shared Task on Pedagogical Ability Assessment of AI-powered Tutors. The task aims to assess how well AI tutors identify and locate errors made by students, provide guidance and ensure actionability, among other features of their responses in educational dialogues. Transformer-based models, especially DeBERTa and RoBERTa, are improved by multitask learning, threshold tweaking, ordinal regression, and oversampling. The efficiency of pedagogically driven training methods and bespoke transformer models for evaluating AI tutor quality is demonstrated by the high performance of their best systems across all evaluation tracks. 2025.bea-1.99 diff --git a/data/xml/2025.bionlp.xml b/data/xml/2025.bionlp.xml index eadf2861b0..9b820873b5 100644 --- a/data/xml/2025.bionlp.xml +++ b/data/xml/2025.bionlp.xml @@ -6,7 +6,7 @@ DinaDemner-Fushman SophiaAnaniadou MakotoMiwa - JunichiTsujii + JunichiTsujii Association for Computational Linguistics
Viena, Austria
August @@ -253,7 +253,7 @@ <fixed-case>Q</fixed-case>o<fixed-case>LAS</fixed-case>: A <fixed-case>R</fixed-case>eddit Corpus of Health-Related Quality of Life Aspects of Mental Disorders LynnGreschnerUniversity of Bamberg - AmelieWührlUniversity of Stuttgart + AmelieWührlUniversity of Stuttgart RomanKlingerUniversity of Bamberg 201-216 Quality of Life (QoL) refers to a person’s subjective perception of various aspects of their life. For medical practitioners, it is one of the most important concepts for treatment decisions. Therefore, it is essential to understand in which aspects a medical condition affects a patient’s subjective perception of their life. With this paper, we focus on the under-resourced domain of mental health-related QoL, and contribute the first corpus to study and model this concept: We (1) annotate 240 Reddit posts with a set of 11 QoL aspects (such as ‘independence’, ‘mood’, or ‘relationships’) and their sentiment polarity. Based on this novel corpus, we (2) evaluate a pipeline to detect QoL mentions and classify them into aspects using open-domain aspect-based sentiment analysis. We find that users frequently discuss health-related QoL in their posts, focusing primarily on the aspects ‘relationships’ and ‘selfimage’. Our method reliably predicts such mentions and their sentiment, however, detecting fine-grained individual aspects remains challenging. An analysis of a large corpus of automatically labeled data reveals that social media content contains novel aspects pertinent to patients that are not covered by existing QoL taxonomies. @@ -285,7 +285,7 @@ JoãoRuanoPriberam GonçaloCorreiaPriberam LeonorBarreirosPriberam - AfonsoMendesPriberam Informática, SA. + AfonsoMendesPriberam Informática, SA. 225-239 Biomedical Named Entity Recognition presents significant challenges due to the complexity of biomedical terminology and inconsistencies in annotation across datasets. This paper introduces SRU-NER (Slot-based Recurrent Unit NER), a novel approach designed to handle nested named entities while integrating multiple datasets through an effective multi-task learning strategy. SRU-NER mitigates annotation gaps by dynamically adjusting loss computation to avoid penalizing predictions of entity types absent in a given dataset. Through extensive experiments, including a cross-corpus evaluation and human assessment of the model’s predictions, SRU-NER achieves competitive performance in biomedical and general-domain NER tasks, while improving cross-domain generalization. 2025.bionlp-1.20 @@ -346,7 +346,7 @@ Beyond Citations: Integrating Finding-Based Relations for Improved Biomedical Article Representations YuanLiangQueen Mary University of London - MassimoPoesioQueen Mary University of London and University of Utrecht + MassimoPoesioQueen Mary University of London and University of Utrecht RoonakRezvaniRecursion 297-306 High-quality scientific article embeddings are essential for tasks like document retrieval, citation recommendation, and classification. Traditional citation-based approaches assume citations reflect semantic similarity—an assumption that introduces bias and noise. Recent models like SciNCL and SPECTER2 have attempted to refine citation-based representations but still struggle with noisy citation edges and fail to fully leverage textual information. To address these limitations, we propose a hybrid approach that combines Finding-Citation Graphs (FCG) with contrastive learning. Our method improves triplet selection by filtering out less important citations and incorporating finding similarity relations, leading to better semantic relationship capture. Evaluated on the SciRepEval benchmark, our approach consistently outperforms citation-only baselines, showing the value of text-based semantic structures. While we do not surpass state-of-the-art models in most tasks, our results reveal the limitations of purely citation-based embeddings and suggest paths for improvement through enhanced semantic integration and domain-specific adaptations. @@ -358,8 +358,8 @@ Converting Annotated Clinical Cases into Structured Case Report Forms PietroFerrazziUniversity of Padova - AlbertoLavelliFBK - BernardoMagniniFBK + AlbertoLavelliFBK + BernardoMagniniFBK 307-318 Case Report Forms (CRFs) are largely used in medical research as they ensure accuracy, reliability, and validity of results in clinical studies. However, publicly available, well-annotated CRF datasets are scarce, limiting the development of CRF slot filling systems able to fill in a CRF from clinical notes. To mitigate the scarcity of CRF datasets, we propose to take advantage of available datasets annotated for information extraction tasks and to convert them into structured CRFs. We present a semi-automatic conversion methodology, which has been applied to the E3C dataset in two languages (English and Italian), resulting in a new, high-quality dataset for CRF slot filling. Through several experiments on the created dataset, we report that slot filling achieves 59.7% for Italian and 67.3% for English on a closed Large Language Models (zero-shot) and worse performances on three families of open-source models, showing that filling CRFs is challenging even for recent state-of-the-art LLMs. 2025.bionlp-1.26 @@ -385,9 +385,9 @@ Overcoming Data Scarcity in Named Entity Recognition: Synthetic Data Generation with Large Language Models AnDaoThe University of Tokyo HirokiTeranishiRIKEN Center for Advanced Intelligence Project - YujiMatsumotoRiken Center for Advanced Intelligence Project + YujiMatsumotoRiken Center for Advanced Intelligence Project FlorianBoudinNantes University - AkikoAizawaNational Institute of Informatics + AkikoAizawaNational Institute of Informatics 328-340 Named Entity Recognition (NER) is crucial for extracting domain-specific entities from text, particularly in biomedical and chemical fields. Developing high-quality NER models in specialized domains is challenging due to the limited availability of annotated data, with manual annotation being a key method of data construction. However, manual annotation is time-consuming and requires domain expertise, making it difficult in specialized domains. Traditional data augmentation (DA) techniques also rely on annotated data to some extent, further limiting their effectiveness. In this paper, we propose a novel approach to synthetic data generation for NER using large language models (LLMs) to generate sentences based solely on a set of example entities. This method simplifies the augmentation process and is effective even with a limited set of entities.We evaluate our approach using BERT-based models on the BC4CHEMD, BC5CDR, and TDMSci datasets, demonstrating that synthetic data significantly improves model performance and robustness, particularly in low-resource settings. This work provides a scalable solution for enhancing NER in specialized domains, overcoming the limitations of manual annotation and traditional augmentation methods. 2025.bionlp-1.28 @@ -538,7 +538,7 @@ Loyola at <fixed-case>A</fixed-case>rch<fixed-case>EHR</fixed-case>-<fixed-case>QA</fixed-case> 2025: Exploring Unsupervised Attribution of Generated Text: Attention and Clustering-Based Methods RohanSethiLoyola University Chicago and Stritch School of Medicine - TimothyMillerBoston Children’s Hospital, Harvard Medical School + TimothyMillerBoston Children’s Hospital, Harvard Medical School MajidAfsharUniversity of Wisconsin-Madison DmitriyDligachLoyola University Chicago 22-26 @@ -722,7 +722,7 @@ AndrásSzlúkaUniversity of Szeged GáborKőrösiUniversity of Szeged ZsoltSzántóUniversity of Szeged - RichárdFarkasUniversity of Szeged + RichárdFarkasUniversity of Szeged 136-149 In this paper, we present the SzegedAI team’s submissions to the ArchEHR-QA 2025 shared task. Our approaches include multiple prompting techniques for large language models (LLMs), sentence similarity methods, and traditional feature engineering. We are aiming to explore both modern and traditional solutions to the task. To combine the strengths of these diverse methods, we employed different ensembling strategies. 2025.bionlp-share.17 @@ -839,7 +839,7 @@ ArshithaBasavarajInternational Institute of Information Technology, Bangalore, India HugoAlatrista-SalasDe Vinci Research Center, Paris, France FranciscoPereiraNational Institute of Mental Health - DianaInkpenUniversity of Ottawa + DianaInkpenUniversity of Ottawa 215-231 In this work, we present our approach to addressing all subtasks of the BioLaySumm 2025 shared task by leveraging prompting and retrieval strategies, as well as multimodal input fusion. Our method integrates: (1) zero-shot and few-shot prompting with large language models (LLMs); (2) semantic similarity-based dynamic few-shot prompting; (3) retrieval-augmented generation (RAG) incorporating biomedical knowledge from the Unified Medical Language System (UMLS); and (4) a multimodal fusion pipeline that combines images and captions using image-text-to-text generation for enriched lay summarization. Our framework enables lightweight adaptation of pretrained LLMs for generating lay summaries from scientific articles and radiology reports. Using modern LLMs, including Llama-3.3-70B-Instruct and GPT-4.1, our 5cNLP team achieved third place in Subtask 1.2 and second place in Subtask 2.1, among all submissions. 2025.bionlp-share.27 diff --git a/data/xml/2025.bsnlp.xml b/data/xml/2025.bsnlp.xml index f5e0f95c95..aba4a67d66 100644 --- a/data/xml/2025.bsnlp.xml +++ b/data/xml/2025.bsnlp.xml @@ -5,7 +5,7 @@ Proceedings of the 10th Workshop on Slavic Natural Language Processing (Slavic NLP 2025) JakubPiskorski PavelPřibáň - PreslavNakov + PreslavNakov RomanYangarber MichalMarcinczuk Association for Computational Linguistics @@ -249,7 +249,7 @@ ChuhanWangDalian University of Technology DailinLiDalian University of Technology YananWangDalian University of Technology - JianWangDalian University of Technology + JianWangDalian University of Technology HongfeiLinDalian University of Technology 177-182 This paper presents our submission to Subtask 2 (multi-label classification of persuasion techniques) of the Shared Task on Detection and Classification of Persuasion Techniques in Slavic Languages at SlavNLP 2025. Our method leverages a teacher–student framework based on large language models (LLMs): a Qwen3 32B teacher model generates natural language explanations for annotated persuasion techniques, and a Qwen2.5 32B student model is fine-tuned to replicate both the teacher’s rationales and the final label predictions. We train our models on the official shared task dataset, supplemented by annotated resources from SemEval 2023 Task 3 and CLEF 2024 Task 3 covering English, Russian, and Polish to improve cross-lingual robustness. Our final system ranks 4th on BG, SI, and HR, and 5th on PL in terms of micro-F1 score among all participating teams. diff --git a/data/xml/2025.bucc.xml b/data/xml/2025.bucc.xml index 134497b3c1..d87db5b26e 100644 --- a/data/xml/2025.bucc.xml +++ b/data/xml/2025.bucc.xml @@ -5,7 +5,7 @@ Proceedings of the 18th Workshop on Building and Using Comparable Corpora (BUCC) SergeSharoff Ayla RigoutsTerryn - PierreZweigenbaum + PierreZweigenbaum ReinhardRapp Association for Computational Linguistics
Abu Dhabi, UAE
@@ -39,7 +39,7 @@
Towards Truly Open, Language-Specific, Safe, Factual, and Specialized Large Language Models - PreslavNakov + PreslavNakov 18 First, we will argue for the need for fully transparent open-source large language models (LLMs), and we will describe the efforts of MBZUAI’s Institute on Foundation Models (IFM) towards that based on the LLM360 initiative. Second, we will argue for the need for language-specific LLMs, and we will share our experience from building Jais, the world’s leading open Arabic-centric foundation and instruction-tuned large language model, Nanda, our recently released open Hindi LLM, and some other models. Third, we will argue for the need for safe LLMs, and we will present Do-Not-Answer, a dataset for evaluating the guardrails of LLMs, which is at the core of the safety mechanisms of our LLMs. Forth, we will argue for the need for factual LLMs, we will discuss the factuality challenges that LLMs pose. We will then present some recent relevant tools for addressing these challenges developed at MBZUAI: (i) OpenFactCheck, a framework for fact-checking LLM output, for building customized fact-checking systems, and for benchmarking LLMs for factuality, (ii) LM-Polygraph, a tool for predicting an LLM’s uncertainty in its output using cheap and fast uncertainty quantification techniques, and (iii) LLM-DetectAIve, a tool for machine-generated text detection. Finally, we will argue for the need for specialized models, and we will present the zoo of LLMs currently being developed at MBZUAI’s IFM. 2025.bucc-1.3 @@ -59,7 +59,7 @@ <fixed-case>BEIR</fixed-case>-<fixed-case>NL</fixed-case>: Zero-shot Information Retrieval Benchmark for the <fixed-case>D</fixed-case>utch Language EhsanLotfi NikolayBanar - WalterDaelemans + WalterDaelemans 36–45 Zero-shot evaluation of information retrieval (IR) models is often performed using BEIR; a large and heterogeneous benchmark composed of multiple datasets, covering different retrieval tasks across various domains. Although BEIR has become a standard benchmark for the zero-shot setup, its exclusively English content reduces its utility for underrepresented languages in IR, including Dutch. To address this limitation and encourage the development of Dutch IR models, we introduce BEIR-NL by automatically translating the publicly accessible BEIR datasets into Dutch. Using BEIR-NL, we evaluated a wide range of multilingual dense ranking and reranking models, as well as the lexical BM25 method. Our experiments show that BM25 remains a competitive baseline, and is only outperformed by the larger dense models trained for retrieval. When combined with reranking models, BM25 achieves performance on par with the best dense ranking models. In addition, we explored the impact of translation on the data by back-translating a selection of datasets to English, and observed a performance drop for both dense and lexical methods, indicating the limitations of translation for creating benchmarks. BEIR-NL is publicly available on the Hugging Face hub. 2025.bucc-1.5 @@ -79,7 +79,7 @@ The Role of Handling Attributive Nouns in Improving <fixed-case>C</fixed-case>hinese-To-<fixed-case>E</fixed-case>nglish Machine Translation - AdamMeyers + AdamMeyers Rodolfo JoelZevallos John E.Ortega LisaWang @@ -94,7 +94,7 @@ BorjaHerce DemianInostroza Améstica AndreasScherbakov - Eduard H.Hovy + Eduard H.Hovy EkaterinaVylomova 62–72 Linguistic fieldwork is an important component in language documentation and the creation of comprehensive linguistic corpora. Despite its significance, the process is often lengthy, exhaustive, and time-consuming. This paper presents a novel model that guides a linguist during the fieldwork and accounts for the dynamics of linguist-speaker interactions. We introduce a novel framework that evaluates the efficiency of various sampling strategies for obtaining morphological data and assesses the effectiveness of state-of-the-art neural models in generalising morphological structures. Our experiments highlight two key strategies for improving the efficiency: (1) increasing the diversity of annotated data by uniform sampling among the cells of the paradigm tables, and (2) using model confidence as a guide to enhance positive interaction by providing reliable predictions during annotation. @@ -103,7 +103,7 @@ Comparable Corpora: Opportunities for New Research Directions - Kenneth WardChurch + Kenneth WardChurch 73–82 Most conference papers present new results, but this paper will focus more on opportunities for the audience to make their own contributions. This paper is intended to challenge the community to think more broadly about what we can do with comparable corpora. We will start with a review of the history, and then suggest new directions for future research. 2025.bucc-1.9 @@ -114,7 +114,7 @@ ManonScholivet AgataSavary LouisEstève - MarieCandito + MarieCandito CarlosRamisch 83–98 The annotation of large text corpora is essential for many tasks. We present here a large automatically annotated corpus for French. This corpus is separated into two parts: the first from BigScience, and the second from HPLT. The annotated documents from HPLT were selected in order to optimise the lexical diversity of the final corpus SELEXINI. An analysis of the impact of this selection was carried out on syntactic diversity, as well as on the quality of the new words resulting from the HPLT part of SELEXINI. We have shown that despite the introduction of interesting new words, the texts extracted from HPLT are very noisy. Furthermore, increasing lexical diversity did not increase syntactic diversity. diff --git a/data/xml/2025.c3nlp.xml b/data/xml/2025.c3nlp.xml index 302fa122fc..dd849b5177 100644 --- a/data/xml/2025.c3nlp.xml +++ b/data/xml/2025.c3nlp.xml @@ -60,7 +60,7 @@ <fixed-case>I</fixed-case>nsp<fixed-case>AI</fixed-case>red: Cross-cultural Inspiration Detection and Analysis in Real and <fixed-case>LLM</fixed-case>-generated Social Media Data OanaIgnatSanta Clara University Gayathri GaneshLakshmy - RadaMihalceaUniversity of Michigan + RadaMihalceaUniversity of Michigan 35-49 Inspiration is linked to various positive outcomes, such as increased creativity, productivity, and happiness. Although inspiration has great potential, there has been limited effort toward identifying content that is inspiring, as opposed to just engaging or positive. Additionally, most research has concentrated on Western data, with little attention paid to other cultures. This work is the first to study cross-cultural inspiration through machine learning methods. We aim to identify and analyze real and AI-generated cross-cultural inspiring posts. To this end, we compile and make publicly available the InspAIred dataset, which consists of 2,000 real inspiring posts, 2,000 real non-inspiring posts, and 2,000 generated inspiring posts evenly distributed across India and the UK. The real posts are sourced from Reddit, while the generated posts are created using the GPT-4 model. Using this dataset, we conduct extensive computational linguistic analyses to (1) compare inspiring content across cultures, (2) compare AI-generated inspiring posts to real inspiring posts, and (3) determine if detection models can accurately distinguish between inspiring content across cultures and data sources. 2025.c3nlp-1.4 @@ -83,7 +83,7 @@ <fixed-case>K</fixed-case>orean Stereotype Content Model: Translating Stereotypes Across Cultures Michelle YoungJinKimMichigan State University - KristenJohnsonMichigan State University + KristenJohnsonMichigan State University 59-70 To address bias in language models, researchers are leveraging established social psychology research on stereotyping. This interdisciplinary approach uses frameworks like the Stereotype Content Model (SCM) to understand how stereotypes about social groups are formed and perpetuated. The SCM posits that stereotypes are based on two dimensions: warmth (intent to harm) and competence (ability to harm). This framework has been applied in NLP for various tasks, including stereotype identification, bias mitigation, and hate speech detection. While the SCM has been extensively studied in English language models and Western cultural contexts, its applicability as a cross-cultural measure of stereotypes remains an open research question. This paper explores the cross-cultural validity of the SCM by developing a Korean Stereotype Content Model (KoSCM). We create a Korean warmth-competence lexicon through machine translation of existing English lexicons, validated by an expert translator, and utilize this lexicon to develop a labeled training dataset of Korean sentences. This work presents the first extension of SCM lexicons to a non-English language (Korean), aiming to broaden understanding of stereotypes and cultural dynamics. 2025.c3nlp-1.6 @@ -96,7 +96,7 @@ SeogyeongJeongKorea Advanced Institute of Science & Technology SeyoungSongKAIST YohanLeeElectronics and Telecommunications Research Institute - AliceOhKorea Advanced Institute of Science and Technology + AliceOhKorea Advanced Institute of Science and Technology 71-88 Content moderation platforms concentrate resources on English content despite serving predominantly non-English speaking users.Also, given the scarcity of native moderators for low-resource languages, non-native moderators must bridge this gap in moderation tasks such as hate speech moderation.Through a user study, we identify that non-native moderators struggle with understanding culturally-specific knowledge, sentiment, and internet culture in the hate speech.To assist non-native moderators, we present LLM-C3MOD, a human-LLM collaborative pipeline with three steps: (1) RAG-enhanced cultural context annotations; (2) initial LLM-based moderation; and (3) targeted human moderation for cases lacking LLM consensus.Evaluated on Korean hate speech dataset with Indonesian and German participants, our system achieves 78% accuracy (surpassing GPT-4o’s 71% baseline) while reducing human workload by 83.6%.In addition, cultural context annotations improved non-native moderator accuracy from 22% to 61%, with humans notably excelling at nuanced tasks where LLMs struggle.Our findings demonstrate that non-native moderators, when properly supported by LLMs, can effectively contribute to cross-cultural hate speech moderation. 2025.c3nlp-1.7 @@ -117,7 +117,7 @@ Towards Region-aware Bias Evaluation Metrics AnganaBorah AparnaGarimellaAdobe Research - RadaMihalceaUniversity of Michigan + RadaMihalceaUniversity of Michigan 108-131 When exposed to human-generated data, language models are known to learn and amplify societal biases. While previous works introduced metrics that can be used to assess the bias in these models, they rely on assumptions that may not be universally true. For instance, a gender bias dimension commonly used by these metrics is that of family–career, but this may not be the only common bias in certain regions of the world. In this paper, we identify topical differences in gender bias across different regions and propose a region-aware bottom-up approach for bias assessment. Several of our proposed region-aware gender bias dimensions are found to be aligned with the human perception of gender biases in these regions. 2025.c3nlp-1.9 @@ -133,8 +133,8 @@ Young MinChoUniversity of Pennsylvania MaitreyiRedkar SamindaraHardikar-SawantShri Jagdishprasad Jhabarmal Tibrewala University - LyleUngar - Sharath ChandraGuntukuUniversity of Pennsylvania + LyleUngar + Sharath ChandraGuntukuUniversity of Pennsylvania 132-142 Culture moderates the way individuals perceive and express mental distress. Current understandings of mental health expressions on social media, however, are predominantly derived from WEIRD (Western, Educated, Industrialized, Rich, and Democratic) contexts. To address this gap, we examine mental health posts on Reddit made by individuals geolocated in India, to identify variations in social media language specific to the Indian context compared to users from Western nations. Our experiments reveal significant psychosocial variations in emotions and temporal orientation. This study demonstrates the potential of social media platforms for identifying cross-cultural differences in mental health expressions (e.g. seeking advice in India vs seeking support by Western users). Significant linguistic variations in online mental health-related language emphasize the importance of developing precision-targeted interventions that are culturally appropriate. 2025.c3nlp-1.10 @@ -151,7 +151,7 @@ HuzamaAhmadKorea Advanced Institute of Science & Technology Na MinAnKAIST JamesThorneKAIST - AliceOhKorea Advanced Institute of Science and Technology + AliceOhKorea Advanced Institute of Science and Technology 143-154 In a highly globalized world, it is important for multi-modal large language models (MLLMs) to recognize and respond correctly to mixed-cultural inputs.For example, a model should correctly identify kimchi (Korean food) in an image both when an Asian woman is eating it, as well as an African man is eating it.However, current MLLMs show an over-reliance on the visual features of the person, leading to misclassification of the entities. To examine the robustness of MLLMs to different ethnicity, we introduce MIXCUBE, a cross-cultural bias benchmark, and study elements from five countries and four ethnicities. Our findings reveal that MLLMs achieve both higher accuracy and lower sensitivity to such perturbation for high-resource cultures, but not for low-resource cultures. GPT-4o, the best-performing model overall, shows up to 58% difference in accuracy between the original and perturbed cultural settings in low-resource cultures 2025.c3nlp-1.11 diff --git a/data/xml/2025.calcs.xml b/data/xml/2025.calcs.xml index 41d31e1d76..33679a8af9 100644 --- a/data/xml/2025.calcs.xml +++ b/data/xml/2025.calcs.xml @@ -3,14 +3,14 @@ Proceedings of the 7th Workshop on Computational Approaches to Linguistic Code-Switching - Genta IndraWinata + Genta IndraWinata SudiptaKar MarinaZhukova ThamarSolorio XiAi InjyHamed Mahardika Krisna KrisnaIhsani - Derry TantiWijaya + Derry TantiWijaya GarryKuwanto Association for Computational Linguistics
Albuquerque, New Mexico, USA
@@ -31,7 +31,7 @@ <fixed-case>E</fixed-case>uskañol<fixed-case>DS</fixed-case>: A Naturally Sourced Corpus for <fixed-case>B</fixed-case>asque-<fixed-case>S</fixed-case>panish Code-Switching MaiteHerediaHiTZ Center - Ixa, University of the Basque Country UPV/EHU JeremyBarnesHiTZ Center - Ixa, University of the Basque Country UPV/EHU - AitorSoroaHiTZ Center - Ixa, University of the Basque Country UPV/EHU + AitorSoroaHiTZ Center - Ixa, University of the Basque Country UPV/EHU 1-5 Code-switching (CS) remains a significant challenge in Natural Language Processing (NLP), mainly due a lack of relevant data. In the context of the contact between the Basque and Spanish languages in the north of the Iberian Peninsula, CS frequently occurs in both formal and informal spontaneous interactions. However, resources to analyse this phenomenon and support the development and evaluation of models capable of understanding and generating code-switched language for this language pair are almost non-existent. We introduce a first approach to develop a naturally sourced corpus for Basque-Spanish code-switching. Our methodology consists of identifying CS texts from previously available corpora using language identification models, which are then manually validated to obtain a reliable subset of CS instances. We present the properties of our corpus and make it available under the name EuskañolDS. 2025.calcs-1.1 diff --git a/data/xml/2025.cgmta.xml b/data/xml/2025.cgmta.xml index dea0f5a823..e0b9875ddd 100644 --- a/data/xml/2025.cgmta.xml +++ b/data/xml/2025.cgmta.xml @@ -20,7 +20,7 @@ An Annotated Error Corpus for <fixed-case>E</fixed-case>speranto - EckhardBick + EckhardBick 1–8 This paper presents and evaluates a new multi-genre error corpus for (written) Esperanto, EspEraro, building on both learner, news and internet data and covering both ordinary spelling errors and real-word errors such as grammatical and word choice errors. Because the corpus has been annotated not only for errors, error types and corrections, but also with Constraint Grammar (CG) tags for part-of-speech, inflection, affixation, syntactic function, dependency and semantic class, it allows users to linguistically contextualize errors and to craft and test CG rules aiming at the recognition and/or correction of the various error types covered in the corpus. The resource was originally created for regression-testing a newly developed spell- and grammar checker, and contains about 75,000 tokens ( 4,000 sentences), with 3,330 tokens annotated for one or more errors and a combined correction suggestion. We discuss the different error types and evaluate their weight in the corpus. Where relevant, we explain the role of Constraint Grammar (CG) in the identification and correction of the individual error types. 2025.cgmta-1.1 @@ -45,7 +45,7 @@ Towards Natural Language Explanations of Constraint Grammar Rules - DanielSwanson + DanielSwanson 28–31 This paper presents a general-purpose parser for static analysis of Constraint Grammar rules (that is, examining only the rules, not potential inputs and outputs) and applies it to the task of translating rules into comprehensible explanations of behavior. An interactive interface for exploring how individual components of each rule contribute to these translations is also presented. 2025.cgmta-1.4 @@ -90,7 +90,7 @@ <fixed-case>D</fixed-case>ivvunspell—<fixed-case>F</fixed-case>inite-State Spell-Checking and Correction on Modern Platforms Flammie APirinen - Sjur NørstebøMoshagen + Sjur NørstebøMoshagen 59–63 Spell-checking and correction is one of the key applications of natural language support. Historically, for the biggest, less morphologically complex languages, spell-checking and correction could be implemented by relatively simple means; however, for morphologically complex and low-resource languages, the solutions were often suboptimal. Finite-state methods are the state of the art in rule-based natural language processing and also for spell-checking and correction they have been effectively used. In this article, we show some recent developments of a finite-state spell-checker implementation that works with modern operating systems and platforms. 2025.cgmta-1.9 diff --git a/data/xml/2025.chipsal.xml b/data/xml/2025.chipsal.xml index d4e0cec263..ca523f60be 100644 --- a/data/xml/2025.chipsal.xml +++ b/data/xml/2025.chipsal.xml @@ -241,7 +241,7 @@ Leveraging Machine-Generated Data for Joint Intent Detection and Slot Filling in <fixed-case>B</fixed-case>angla: A Resource-Efficient Approach A H M RezaulKarim - ÖzlemUzuner + ÖzlemUzuner 208–216 Natural Language Understanding (NLU) is crucial for conversational AI, yet low-resource languages lag behind in essential tasks like intent detection and slot-filling. To address this gap, we converted the widely-used English SNIPS dataset to Bangla using LLaMA 3, creating a dataset that captures the linguistic complexities of the language. With this translated dataset for model training, our experimental evaluation compares both independent and joint modeling approaches using transformer architecture. Results demonstrate that a joint approach based on multilingual BERT (mBERT) achieves superior performance, with 97.83% intent accuracy and 91.03% F1 score for slot filling. This work advances NLU capabilities for Bangla and provides insights for developing robust models in other low-resource languages. 2025.chipsal-1.21 diff --git a/data/xml/2025.chum.xml b/data/xml/2025.chum.xml index 0c913b2399..caa78dfa90 100644 --- a/data/xml/2025.chum.xml +++ b/data/xml/2025.chum.xml @@ -3,7 +3,7 @@ Proceedings of the 1st Workshop on Computational Humor (CHum) - Christian F.Hempelmann + Christian F.Hempelmann JuliaRayz TiansiDong TristanMiller @@ -60,7 +60,7 @@ PrashantKodali AshnaDua KapilRajesh Kavitha - ManishShrivastava + ManishShrivastava 32–57 Puns, as a linguistic phenomenon, hold significant importance in both humor and language comprehension. While extensive research has been conducted in the realm of pun generation in English, there exists a notable gap in the exploration of pun generation within code-mixed text, particularly in Hindi-English code-mixed text. This study addresses this gap by offering a computational method specifically designed to create puns in Hindi-English code-mixed text. In our investigation, we delve into three distinct methodologies aimed at pun generation utilizing pun-alternate word pairs. Furthermore, this novel dataset, HECoP, comprising of 2000 human-annotated sentences serves as a foundational resource for training diverse pun detection models. Additionally, we developed a structured pun generation pipeline capable of generating puns from a single input word without relying on predefined word pairs. Through rigorous human evaluations, our study demonstrates the efficacy of our proposed models in generating code-mixed puns. The findings presented herein lay a solid groundwork for future endeavours in pun generation and computational humor within diverse linguistic contexts. 2025.chum-1.5 diff --git a/data/xml/2025.cl.xml b/data/xml/2025.cl.xml index 650b92e632..7b40560d7e 100644 --- a/data/xml/2025.cl.xml +++ b/data/xml/2025.cl.xml @@ -22,7 +22,7 @@ <fixed-case>MUC</fixed-case>king In, or Fifty Years in Information Extraction - RalphGrishman + RalphGrishman 10.1162/coli_a_00547 I want to thank the ACL for this Lifetime Achievement Award. I am deeply honored to be receiving it. I would also like to thank the students, faculty, and researchers who were members of the Proteus Project during most of my professional lifetime. It was an honor to serve that group. 7–22 @@ -33,7 +33,7 @@ e<fixed-case>RST</fixed-case>: A Signaled Graph Theory of Discourse Relations and Organization AmirZeldes TatsuyaAoyama - Yang JanetLiu + Yang JanetLiu SiyaoPeng DebopamDas LukeGessler @@ -49,7 +49,7 @@ ArnisaFazla ChantalAmrhein TomKocmi - MarkSteedman + MarkSteedman AlexandraBirch RicoSennrich LianeGuillou @@ -95,7 +95,7 @@ A Survey on <fixed-case>LLM</fixed-case>-Generated Text Detection: Necessity, Methods, and Future Directions JunchaoWu - ShuYang + ShuYang RunzheZhan YulinYuan Lidia SamChao @@ -169,7 +169,7 @@ WeiHe Tiago KramerVieira MarcosGarcia - CarolinaScarton + CarolinaScarton MarcoIdiart AlineVillavicencio 10.1162/coli_a_00546 @@ -238,7 +238,7 @@ DiyiYang DirkHovy DavidJurgens - BarbaraPlank + BarbaraPlank 10.1162/coli_a_00556 Language technologies have advanced substantially, particularly with the introduction of large language models. However, these advancements can exacerbate several issues that models have traditionally faced, including bias, evaluation, and risk. In this perspective piece, we argue that many of these issues share a common core: a lack of awareness of the social factors, interactions, and implications of the social environment in which NLP operates. We call this social awareness. While NLP is improving at addressing linguistic issues, there has been relatively limited progress in incorporating social awareness into models to work in all situations for all users. Integrating social awareness into NLP will improve the naturalness, usefulness, and safety of applications while also opening up new applications. Today, we are only at the start of a new, important era in the field. 689–703 diff --git a/data/xml/2025.cl4health.xml b/data/xml/2025.cl4health.xml index d5f0a927fe..ad920a5ba5 100644 --- a/data/xml/2025.cl4health.xml +++ b/data/xml/2025.cl4health.xml @@ -5,7 +5,7 @@ Proceedings of the Second Workshop on Patient-Oriented Language Processing (CL4Health) SophiaAnaniadou DinaDemner-Fushman - DeepakGupta + DeepakGupta PaulThompson Association for Computational Linguistics
Albuquerque, New Mexico
@@ -24,7 +24,7 @@ <fixed-case>P</fixed-case>atient<fixed-case>D</fixed-case>x: Merging Large Language Models for Protecting Data-Privacy in Healthcare - Jose G.MorenoPaul Sabatier University - IRIT + Jose G.MorenoPaul Sabatier University - IRIT JesusLovon-MelgarejoIRIT M’rickRobin-CharletUniversité Paul Sabatier ChristineDamase-MichelUniversité Paul Sabatier @@ -221,7 +221,7 @@ Leveraging External Knowledge Bases: Analyzing Presentation Methods and Their Impact on Model Performance Hui-SyuanYehLISN/CNRS & Université Paris Saclay ThomasLavergneLISN/CNRS & Université Paris Saclay - PierreZweigenbaumLISN, CNRS, Université Paris-Saclay + PierreZweigenbaumLISN, CNRS, Université Paris-Saclay 193-204 Integrating external knowledge into large language models has demonstrated potential for performance improvement across a wide range of tasks. This approach is particularly appealing in domain-specific applications, such as in the biomedical field. However, the strategies for effectively presenting external knowledge to these models remain underexplored. This study investigates the impact of different knowledge presentation methods and their influence on model performance. Our results show that inserting knowledge between demonstrations helps the models perform better, and improve smaller LLMs (7B) to perform on par with larger LLMs (175B). Our further investigation indicates that the performance improvement, however, comes more from the effect of additional tokens and positioning than from the relevance of the knowledge. 2025.cl4health-1.16 @@ -234,7 +234,7 @@ NicoloMichelettiUniversity of Manchester LifengHanThe University of Manchester WarrenDel-PintoUniversity of Manchester - GoranNenadicUniversity of Manchester + GoranNenadicUniversity of Manchester 205-218 2025.cl4health-1.17 belkadi-etal-2025-lt3 @@ -277,7 +277,7 @@ Am <fixed-case>I</fixed-case> eligible? Natural Language Inference for Clinical Trial Patient Recruitment: the Patient’s Point of View MathildeAguiarUniversité Paris-Saclay, CNRS, Laboratoire Interdisciplinaire des Sciences du Numérique, 91400, Orsay, France - PierreZweigenbaumLISN, CNRS, Université Paris-Saclay + PierreZweigenbaumLISN, CNRS, Université Paris-Saclay NonaNaderiUniversité Paris-Saclay 243-259 Recruiting patients to participate in clinical trials can be challenging and time-consuming. Usually, participation in a clinical trial is initiated by a healthcare professional and proposed to the patient. Promoting clinical trials directly to patients via online recruitment might help to reach them more efficiently. In this study, we address the case where a patient is initiating their own recruitment process and wants to determine whether they are eligible for a given clinical trial, using their own language to describe their medical profile. To study whether this creates difficulties in the patient-trial matching process, we design a new dataset and task, Natural Language Inference for Patient Recruitment (NLI4PR), in which patient-language profiles must be matched to clinical trials. We create it by adapting the TREC 2022 Clinical Trial Track dataset, which provides patients’ medical profiles, and rephrasing them manually using patient language. We also use the associated clinical trial reports where the patients are either eligible or excluded. We prompt several open-source Large Language Models on our task and achieve from 56.5 to 71.8 of F1 score using patient language, against 64.7 to 73.1 for the same task using medical language. When using patient language, we observe only a small loss in performance for the best model, suggesting that having the patient as a starting point could be adopted to help recruit patients for clinical trials. The corpus and code bases are all freely available on our GitHub and HuggingFace repositories. @@ -339,7 +339,7 @@ Medication Extraction and Entity Linking using Stacked and Voted Ensembles on <fixed-case>LLM</fixed-case>s PabloRomeroManchester Metropolitan University LifengHanThe University of Manchester - GoranNenadicUniversity of Manchester + GoranNenadicUniversity of Manchester 303-315 2025.cl4health-1.26 romero-etal-2025-medication @@ -394,7 +394,7 @@ PabloRomeroManchester Metropolitan University LiboRenUniversity of Manchester, UK LifengHanThe University of Manchester - GoranNenadicUniversity of Manchester + GoranNenadicUniversity of Manchester 340-348 2025.cl4health-1.30 romero-etal-2025-manchester @@ -404,7 +404,7 @@ <fixed-case>MNLP</fixed-case> at <fixed-case>P</fixed-case>er<fixed-case>A</fixed-case>ns<fixed-case>S</fixed-case>umm: A Classifier-Refiner Architecture for Improving the Classification of Consumer Health User Responses JooyeonLeeGeorge Mason University LuanPhamGeorge Mason University - ÖzlemUzunerGeorge Mason University + ÖzlemUzunerGeorge Mason University 349-358 Community question-answering (CQA) platforms provide a crucial space for users to share experiences, seek medical advice, and exchange health-related information. However, these platforms, by nature of their user-generated content as well as the complexity and subjectivity of natural language, remain a significant challenge for tasks related to the automatic classification of diverse perspectives. The PerAnsSumm shared task involves extracting perspective spans from community users’ answers, classifying them into specific perspective categories (Task A), and then using these perspectives and spans to generate structured summaries (Task B). Our focus is on Task A. To address this challenge, we propose a Classifier-Refiner Architecture (CRA), a two-stage framework designed to enhance classification accuracy. The first stage employs a Classifier to segment user responses into self-contained snippets and assign initial perspective labels along with a binary confidence value. If the classifier is not confident, a secondary Refiner stage is triggered, incorporating retrieval-augmented generation to enhance classification through contextual examples. Our methodology integrates instruction-driven classification, tone definitions, and Chain-of-Thought (CoT) prompting, leading to improved F1 scores compared to single-pass approaches. Experimental evaluations on the Perspective Summarization Dataset (PUMA) demonstrate that our framework improves classification performance by leveraging multi-stage decision-making. Our submission ranked among the top-performing teams, achieving an overall score of 0.6090, with high precision and recall in perspective classification. 2025.cl4health-1.31 diff --git a/data/xml/2025.climatenlp.xml b/data/xml/2025.climatenlp.xml index 7f87590b0e..1147de7000 100644 --- a/data/xml/2025.climatenlp.xml +++ b/data/xml/2025.climatenlp.xml @@ -34,7 +34,7 @@ Enhancing Retrieval for <fixed-case>ESGLLM</fixed-case> via <fixed-case>ESG</fixed-case>-<fixed-case>CID</fixed-case>: A Disclosure Content Index Finetuning Dataset for Mapping <fixed-case>GRI</fixed-case> and <fixed-case>ESRS</fixed-case> Shafiuddin RehanAhmed AnkitShahAccenture - Quan HungTranFacebook + Quan HungTranFacebook VivekKhetanAccenture Labs SukryoolKangAccenture AnkitMehtaAccenture @@ -235,7 +235,7 @@ DavidThulkeRWTH Aachen University and AppTek JakobKemmlerRheinisch Westfälische Technische Hochschule Aachen ChristianDugastNA - HermannNeyRheinisch Westfälische Technische Hochschule Aachen + HermannNeyRheinisch Westfälische Technische Hochschule Aachen 245-259 Large language models that use retrieval augmented generation have the potential to unlock valuable knowledge for researchers, policymakers, and the public by making long and technical climate-related documents more accessible. While this approach can help alleviate factual hallucinations by relying on retrieved passages as additional context, its effectiveness depends on whether the model’s output remains faithful to these passages. To address this, we explore the automatic assessment of faithfulness of different models in this setting. We then focus on ClimateGPT, a large language model specialised in climate science, to examine which factors in its instruction fine-tuning impact the model’s faithfulness. By excluding unfaithful subsets of the model’s training data, we develop ClimateGPT Faithful+, which achieves an improvement in faithfulness from 30% to 57% in supported atomic claims according to our automatic metric. 2025.climatenlp-1.17 diff --git a/data/xml/2025.clpsych.xml b/data/xml/2025.clpsych.xml index 6bda102964..be56c76278 100644 --- a/data/xml/2025.clpsych.xml +++ b/data/xml/2025.clpsych.xml @@ -6,7 +6,7 @@ AyahZirikly AndrewYates BartDesmet - MollyIreland + MollyIreland StevenBedrick SeanMacAvaney KfirBar @@ -43,7 +43,7 @@ SimonOstermannGerman Research Center for Artificial Intelligence PatrickGebhardGerman Research Center for Artificial Intelligence CordBeneckeDepartment of Psychology, University of Kassel, Kassel, Germany - Josefvan GenabithGerman Research Center for Artificial Intelligence + Josefvan GenabithGerman Research Center for Artificial Intelligence PhilippMüllerGerman Research Center for Artificial Intelligence 12-25 Psychodynamic conflicts are persistent, often unconscious themes that shape a person’s behaviour and experiences. Accurate diagnosis of psychodynamic conflicts is crucial for effective patient treatment and is commonly done via long, manually scored semi-structured interviews. Existing automated solutions for psychiatric diagnosis tend to focus on the recognition of broad disorder categories such as depression, and it is unclear to what extent psychodynamic conflicts which even the patient themselves may not have conscious access to could be automatically recognised from conversation. In this paper, we propose AutoPsyC, the first method for recognising the presence and significance of psychodynamic conflicts from full-length Operationalized Psychodynamic Diagnostics (OPD) interviews using Large Language Models (LLMs). Our approach combines recent advances in parameter-efficient fine-tuning and Retrieval-Augmented Generation (RAG) with a summarisation strategy to effectively process entire 90 minute long conversations. In evaluations on a dataset of 141 diagnostic interviews we show that AutoPsyC consistently outperforms all baselines and ablation conditions on the recognition of four highly relevant psychodynamic conflicts. @@ -85,7 +85,7 @@ SyedaMahwishStony Brook University CamiloRuggeroUniversity of Texas at Dallas RomanKotovStony Brook University - H. AndrewSchwartzStony Brook University + H. AndrewSchwartzStony Brook University 62-68 Recent work has suggested detection of cognitive distortions as an impactful task for NLP in the clinical space, but the connection between language-detected distortions and validated mental health outcomes has been elusive. In this work, we evaluate the co-occurrence of (a) 10 distortions derived from language-based detectors trained over two common distortion datasets with (b) 12 mental health outcomes contained within two new language-to-mental-health datasets: DS4UD and iHiTOP. We find higher rates of distortions for those with greater mental health condition severity (ranging from r = 0.16 for thought disorders to r = 0.46 for depressed mood), and that the specific distortions of should statements and fortune telling were associated with a depressed mood and being emotionally drained, respectively. This suggested that language-based assessments of cognitive distortion could play a significant role in detection and monitoring of mental health conditions. 2025.clpsych-1.5 @@ -110,7 +110,7 @@ IanApperlySchool of Psychology, University of Birmingham RoryDevineSchool of Psychology, University of Birmingham Sannevan der KleijSchool of Psychology, University of Birmingham - MarkLeeSchool of Computer Science, University of Birmingham + MarkLeeSchool of Computer Science, University of Birmingham 79-89 A rigorous psychometric approach is crucial for the accurate measurement of mind-reading abilities. Traditional scoring methods for such tests, which involve lengthy free-text responses, require considerable time and human effort. This study investigates the use of large language models (LLMs) to automate the scoring of psychometric tests. Data were collected from participants aged 13 to 30 years and scored by trained human coders to establish a benchmark. We evaluated multiple LLMs against human assessments, exploring various prompting strate- gies to optimize performance and fine-tuning the models using a subset of the collected data to enhance accuracy. Our results demonstrate that LLMs can assess advanced mind-reading abilities with over 90% accuracy on average. Notably, in most test items, the LLMs achieved higher Kappa agreement with the lead coder than two trained human coders, highlighting their potential to reliably score open-response psychometric tests. 2025.clpsych-1.7 @@ -121,10 +121,10 @@ Bigger But Not Better: Small Neural Language Models Outperform <fixed-case>LLM</fixed-case>s in Detection of Thought Disorder ChangyeLiUniversity of Washington WeizheXuUniversity of Washington - SergueiPakhomovUniversity of Minnesota + SergueiPakhomovUniversity of Minnesota EllenBradleyUniversity of California, San Francisco DrorBen-ZeevUniversity of Washington - TrevorCohenUniversity of Washington + TrevorCohenUniversity of Washington 90-105 Disorganized thinking is a key diagnostic indicator of schizophrenia-spectrum disorders. Recently, clinical estimates of the severity of disorganized thinking have been shown to correlate with measures of how difficult speech transcripts would be for large language models (LLMs) to predict. However, LLMs’ deployment challenges – including privacy concerns, computational and financial costs, and lack of transparency of training data – limit their clinical utility. We investigate whether smaller neural language models can serve as effective alternatives for detecting positive formal thought disorder, using the same sliding window based perplexity measurements that proved effective with larger models. Surprisingly, our results show that smaller models are more sensitive to linguistic differences associated with formal thought disorder than their larger counterparts. Detection capability declines beyond a certain model size and context length, challenging the common assumption of “bigger is better” for LLM-based applications. Our findings generalize across audio diaries and clinical interview speech samples from individuals with psychotic symptoms, suggesting a promising direction for developing efficient, cost-effective, and privacy-preserving screening tools that can be deployed in both clinical and naturalistic settings. 2025.clpsych-1.8 @@ -150,7 +150,7 @@ KrutikaParvatikarRochester Institute of Technology MarcosZampieriGeorge Mason University AshiqurKhudabukhshRochester Institute of Technology - LiviuDinuUniversity of Bucharest + LiviuDinuUniversity of Bucharest 116-126 Depression is the most common mental health disorder, and its prevalence increased during the COVID-19 pandemic. As one of the most extensively researched psychological conditions, recent research has increasingly focused on leveraging social media data to enhance traditional methods of depression screening. This paper addresses the growing interest in interdisciplinary research on depression, and aims to support early-career researchers by providing a comprehensive and up-to-date list of datasets for analyzing and predicting depression through social media data. We present an overview of datasets published between 2019 and 2024. We also make the comprehensive list of datasets available online as a continuously updated resource, with the hope that it will facilitate further interdisciplinary research into the linguistic expressions of depression on social media. 2025.clpsych-1.10 @@ -303,7 +303,7 @@ Prompt Engineering for Capturing Dynamic Mental Health Self States from Social Media Posts CallumChanUniversity of Ottawa SunveerKhunkhunUniversity of Ottawa - DianaInkpenUniversity of Ottawa + DianaInkpenUniversity of Ottawa Juan AntonioLossio-VenturaNational Institutes of Health 256-267 With the advent of modern Computational Linguistic techniques and the growing societal mental health crisis, we contribute to the field of Clinical Psychology by participating in the CLPsych 2025 shared task. This paper describes the methods and results obtained by the uOttawa team’s submission (which included a researcher from the National Institutes of Health in the USA, in addition to three researchers from the University of Ottawa, Canada). The task consists of four subtasks focused on modeling longitudinal changes in social media users’ mental states and generating accurate summaries of these dynamic self-states. Through prompt engineering of a modern large language model (Llama-3.3-70B-Instruct), the uOttawa team placed first, sixth, fifth, and second, respectively, for each subtask, amongst the other submissions. This work demonstrates the capacity of modern large language models to recognize nuances in the analysis of mental states and to generate summaries through carefully crafted prompting. @@ -359,8 +359,8 @@ August HåkanNilssonOsloMet SyedaMahwishStony Brook University VasudhaVaradarajanStony Brook University - H. AndrewSchwartzStony Brook University - Ryan L.BoydUniversity of Texas at Dallas + H. AndrewSchwartzStony Brook University + Ryan L.BoydUniversity of Texas at Dallas 300-313 Mental health is not a fixed trait but a dynamic process shaped by the interplay between individual dispositions and situational contexts. Building on interactionist and constructionist psychological theories, we develop interpretable models to predict well-being and identify adaptive and maladaptive self-states in longitudinal social media data. Our approach integrates person-level psychological traits (e.g., resilience, cognitive distortions, implicit motives) with language-inferred situational features derived from the Situational 8 DIAMONDS framework. We compare these theory-grounded features to embeddings from a psychometrically-informed language model that captures temporal and individual-specific patterns. Results show that our principled, theory-driven features provide competitive performance while offering greater interpretability. Qualitative analyses further highlight the psychological coherence of features most predictive of well-being. These findings underscore the value of integrating computational modeling with psychological theory to assess dynamic mental states in contextually sensitive and human-understandable ways. 2025.clpsych-1.27 diff --git a/data/xml/2025.clrel.xml b/data/xml/2025.clrel.xml index 7d9ffcdaf2..d34dfd5851 100644 --- a/data/xml/2025.clrel.xml +++ b/data/xml/2025.clrel.xml @@ -6,7 +6,7 @@ SaneYagi SaneYagi MajdiSawalha - Bayan AbuShawar + Bayan AbuShawar Abdallah T.AlShdaifat NorhanAbbas Organizers diff --git a/data/xml/2025.cltw.xml b/data/xml/2025.cltw.xml index 7b1dbd4fc0..7590697b80 100644 --- a/data/xml/2025.cltw.xml +++ b/data/xml/2025.cltw.xml @@ -22,7 +22,7 @@ An Assessment of Word Separation Practices in <fixed-case>O</fixed-case>ld <fixed-case>I</fixed-case>rish Text Resources and a Universal Method for Tokenising <fixed-case>O</fixed-case>ld <fixed-case>I</fixed-case>rish Text AdrianDoyle - John P.McCrae + John P.McCrae 1–11 The quantity of Old Irish text which survives in contemporary manuscripts is relatively small by comparison to what is available for well-resourced modern languages. Moreover, as it is a historical language, no more text will ever be generated by native speakers of Old Irish. This makes the text which has survived particularly valuable, and ideally, all of it would be annotated using a single, common annotation standard, thereby ensuring compatibility between text resources. At present, Old Irish text repositories separate words or sub-word morphemes in accordance with different methodologies, and each uses a different style of lexical annotation. This makes it difficult to utilise content from more than any one repository in NLP applications. This paper provides an assessment of distinctions between existing annotated corpora, showing that the primary point of divergence is at the token level. For this reason, this paper also describes a new method for tokenising Old Irish text. This method can be applied even to diplomatic editions, and has already been utilised in various text resources. 2025.cltw-1.1 @@ -33,7 +33,7 @@ WilliamLamb DonggeHan OndrejKlejch - BeatriceAlex + BeatriceAlex PeterBell 12–26 Advances in large language modelling have disproportionately benefited high-resource languages due to their vastly greater training data reserves. This paper proposes a novel cross-lingual text expansion (XLTE) technique using multilingual large language models (MLLMs) to mitigate data sparsity in low-resource languages. We apply XLTE to the domain of traditional Scottish Gaelic storytelling to generate a training corpus suitable for language modelling, for example as part of an automatic speech recognition system. The effectiveness of this technique is demonstrated using OpenAI’s GPT-4o, with supervised fine-tuning (SFT) providing decreased neologism rates and a 57.2% reduction in perplexity over the baseline model. Despite these promising results, qualitative analyses reveal important stylistic divergences between synthesised and genuine data. Nevertheless, XLTE offers a promising, scalable method for synthesising training sets in other languages and domains, opening avenues for further improvements in low-resource language modelling. @@ -44,7 +44,7 @@ A Pragmatic Approach to Using Artificial Intelligence and Virtual Reality in Digital Game-Based Language Learning MonicaWard LiangXu - ElaineUí Dhonnchadha + ElaineUí Dhonnchadha 27–34 Computer-Assisted Language Learning (CALL) applications have many benefits for language learning. However, they can be difficult to develop for low-resource languages such as Irish and the other Celtic languages. It can be difficult to assemble the multidisciplinary team needed to develop CALL resources and there are fewer language resources available for the language. This paper provides an overview of a pragmatic approach to using Artificial Intelligence (AI) and Virtual Reality (VR) in developing a Digital Game-Based Language Learning (DGBLL) app for Irish. This pragmatic approach was used to develop Cipher - a DGBLL app for Irish (Xu et al, 2022b) where a number of existing resources including text repositories and NLP tools were used. In this paper the focus is on the incorporation of Artificial Intelligence (AI) technologies including AI image generation, text-to-speech (TTS) and Virtual Reality (VR), in a pedagogically informed manner to support language learning in a way that is both challenging and enjoyable. Cipher has been designed to be language independent and can be adapted for various cohorts of learners and for other languages. Cipher has been played and tested in a number of schools in Dublin and the feedback from teachers and students has been very positive. This paper outlines how AI and VR technologies have been utilised in Cipher and how it could be adapted to other Celtic languages and low-resource languages in general. 2025.cltw-1.3 diff --git a/data/xml/2025.cmcl.xml b/data/xml/2025.cmcl.xml index a4f3bea2a8..90bd083177 100644 --- a/data/xml/2025.cmcl.xml +++ b/data/xml/2025.cmcl.xml @@ -68,7 +68,7 @@ Profiling neural grammar induction on morphemically tokenised child-directed speech MilaMarcheva TheresaBiberauerUniversity of the Western Cape, University of Stellenbosch and University of Cambridge - WeiweiSunUniversity of Cambridge + WeiweiSunUniversity of Cambridge 47-54 We investigate the performance of state-of-the-art (SotA) neural grammar induction (GI) models on a morphemically tokenised English dataset based on the CHILDES treebank (Pearl and Sprouse, 2013). Using implementations from Yang et al. (2021a), we train models and evaluate them with the standard F1 score. We introduce novel evaluation metrics—depth-of-morpheme and sibling-of-morpheme—which measure phenomena around bound morpheme attachment. Our results reveal that models with the highest F1 scores do not necessarily induce linguistically plausible structures for bound morpheme attachment, highlighting a key challenge for cognitively plausible GI. 2025.cmcl-1.7 @@ -91,7 +91,7 @@ Unzipping the Causality of <fixed-case>Z</fixed-case>ipf’s Law and Other Lexical Trade-offs AmandaDoucetteMcGill University, McGill University - Timothy J.O’DonnellMcGill University, Mila and McGill University + Timothy J.O’DonnellMcGill University, Mila and McGill University MorganSondereggerMcGill University 66-76 There are strong constraints on the structure of a possible lexicon. For example, the negative correlation between word frequency and length known as Zipf’s law, and a negative correlation between word length and phonotactic complexity appear to hold across languages. While lexical trade-offs like these have been examined individually, it is unclear how they interact as a system. In this paper, we propose causal discovery as a method for identifying lexical biases and their interactions in a set of variables. We represent the lexicon as a causal model, and apply the Fast Causal Discovery algorithm (Spirtes et al., 1995) to identify both causal relationships between measured variables and the existence of possible unmeasured confounding variables. We apply this method to lexical data including measures of word length, frequency, phonotactic complexity, and morphological irregularity for 25 languages and find evidence of universal associations involving word length with a high likelihood of involving an unmeasured confounder, suggesting that additional variables need to be measured to determine how they are related. We also find evidence of variation across languages in relationships between the remaining variables, and suggest that given a larger dataset, causal discovery algorithms can be a useful tool in assessing the universality of lexical biases. @@ -116,7 +116,7 @@ “Is There Anything Else?”: Examining Administrator Influence on Linguistic Features from the Cookie Theft Picture Description Cognitive Test ChangyeLiUniversity of Washington ZhechengSheng - TrevorCohenUniversity of Washington + TrevorCohenUniversity of Washington Serguei V. S.PakhomovUniversity of Minnesota - Twin Cities 91-103 Alzheimer’s Disease (AD) dementia is a progressive neurodegenerative disease that negatively impacts patients’ cognitive ability. Previous studies have demonstrated that changes in naturalistic language samples can be useful for early screening of AD dementia. However, the nature of language deficits often requires test administrators to use various speech elicitation techniques during spontaneous language assessments to obtain enough propositional utterances from dementia patients. This could lead to the “observer’s effect” on the downstream analysis that has not been fully investigated. Our study seeks to quantify the influence of test administrators on linguistic features in dementia assessment with two English corpora the “Cookie Theft” picture description datasets collected at different locations and test administrators show different levels of administrator involvement. Our results show that the level of test administrator involvement significantly impacts observed linguistic features in patient speech. These results suggest that many of significant linguistic features in the downstream classification task may be partially attributable to differences in the test administration practices rather than solely to participants’ cognitive status. The variations in test administrator behavior can lead to systematic biases in linguistic data, potentially confounding research outcomes and clinical assessments. Our study suggests that there is a need for a more standardized test administration protocol in the development of responsible clinical speech analytics frameworks. @@ -187,7 +187,7 @@ Beyond Binary <fixed-case>A</fixed-case>nimacy: A Multi-Method Investigation of <fixed-case>LM</fixed-case>s’ Sensitivity in <fixed-case>E</fixed-case>nglish Object Relative Clauses - YueLi + YueLi YanCongPurdue University Elaine J.FrancisPurdue University 184-196 @@ -199,7 +199,7 @@ An Empirical Study of Language Syllabification using Syllabary and Lexical Networks RusaliSaha - YannickMarchandDalhousie University + YannickMarchandDalhousie University 197-206 Language syllabification is the separation of a word into written or spoken syllables. The study of syllabification plays a pivotal role in morphology and there have been previous attempts to study this phenomenon using graphs or networks. Previous approaches have claimed through visual estimation that the degree distribution of language networks follows the Power Law distribution, however, there have not been any empirically grounded metrics to determine the same. In our study, we implement two kinds of language networks, namely, syllabary and lexical networks, and investigate the syllabification of four European languages: English, French, German and Spanish using network analysis and examine their small-world, random and scale-free nature. We additionally empirically prove that contrary to claims in previous works, although the degree distribution of these networks appear to follow a power law distribution, they are actually more in agreement with a log-normal distribution, when a numerically grounded curve-fitting is applied. Finally, we explore how syllabary and lexical networks for the English language change over time using a database of age-of-acquisition rating words. Our analysis further shows that the preferential attachment mechanism appears to be a well-grounded explanation for the degree distribution of the syllabary network. 2025.cmcl-1.24 diff --git a/data/xml/2025.coling.xml b/data/xml/2025.coling.xml index 41069c3376..88190f1b15 100644 --- a/data/xml/2025.coling.xml +++ b/data/xml/2025.coling.xml @@ -3,7 +3,7 @@ Proceedings of the 31st International Conference on Computational Linguistics - OwenRambow + OwenRambow LeoWanner MariannaApidianaki HendAl-Khalifa @@ -166,7 +166,7 @@ TaoJi QiZhang TaoGui - XuanjingHuang + XuanjingHuang 156–187 Existing evaluations of tool learning primarily focus on validating the alignment of selected tools for large language models (LLMs) with expected outcomes. However, these approaches rely on a limited set of scenarios where answers can be pre-determined. Furthermore, a sole emphasis on outcomes disregards the complex capabilities required for LLMs to effectively use tools. To tackle this issue, we propose ToolEyes, a fine-grained system tailored for the evaluation of the LLMs’ tool learning capabilities in authentic scenarios. The system meticulously examines seven real-world scenarios, analyzing five dimensions crucial to LLMs in tool learning: format alignment, intent comprehension, behavior planning, tool selection, and answer organization. Additionally, ToolEyes incorporates a tool library boasting approximately 600 tools, serving as an intermediary between LLMs and the physical world. Evaluations involving ten LLMs across three categories reveal a preference for specific scenarios and limited cognitive abilities in tool learning. Intriguingly, expanding the model size even exacerbates the hindrance to tool learning. The code and data are available at https://github.com/Junjie-Ye/ToolEyes. 2025.coling-main.12 @@ -196,7 +196,7 @@ Looks can be Deceptive: Distinguishing Repetition Disfluency from Reduplication Arif A.Ahmad Khyathi GayathriMothika - PushpakBhattacharyya + PushpakBhattacharyya 214–229 Reduplication and repetition, though similar in form, serve distinct linguistic purposes. Reduplication is a deliberate morphological process used to express grammatical, semantic, or pragmatic nuances, while repetition is often unintentional and indicative of disfluency. This paper presents the first large-scale study of reduplication and repetition in speech using computational linguistics. We introduce IndicRedRep, a new publicly available dataset containing Hindi, Telugu, and Marathi text annotated with reduplication and repetition at the word level. We evaluate transformer-based models for multi-class reduplication and repetition token classification, utilizing the Reparandum-Interregnum-Repair structure to distinguish between the two phenomena. Our models achieve macro F1 scores of up to 85.62% in Hindi, 83.95% in Telugu, and 84.82% in Marathi for reduplication-repetition classification. 2025.coling-main.15 @@ -234,7 +234,7 @@ Dynamic Graph Neural <fixed-case>ODE</fixed-case> Network for Multi-modal Emotion Recognition in Conversation YuntaoShou TaoMeng - WeiAi + WeiAi KeqinLi 256–268 Multimodal emotion recognition in conversation (MERC) refers to identifying and classifying human emotional states by combining data from multiple different modalities (e.g., audio, images, text, video, etc.). Specifically, human emotional expressions are often complex and diverse, and these complex emotional expressions can be captured and understood more comprehensively through the fusion of multimodal information. Most existing graph-based multimodal emotion recognition methods can only use shallow GCNs to extract emotion features and fail to capture the temporal dependencies caused by dynamic changes in emotions. To address the above problems, we propose a Dynamic Graph Neural Ordinary Differential Equation Network (DGODE) for multimodal emotion recognition in conversation, which combines the dynamic changes of emotions to capture the temporal dependency of speakers’ emotions. Technically, the key idea of DGODE is to use the graph ODE evolution network to characterize the continuous dynamics of node representations over time and capture temporal dependencies. Extensive experiments on two publicly available multimodal emotion recognition datasets demonstrate that the proposed DGODE model has superior performance compared to various baselines. Furthermore, the proposed DGODE can also alleviate the over-smoothing problem, thereby enabling the construction of a deep GCN network. @@ -263,7 +263,7 @@ YiFung Hou PongChan KevinSmall - ChengXiangZhai + ChengXiangZhai HengJi 281–296 The increasing demand for personalized interactions with large language models (LLMs) calls for methodologies capable of accurately and efficiently identifying user opinions and preferences. Retrieval augmentation emerges as an effective strategy, as it can accommodate a vast number of users without the costs from fine-tuning. Existing research, however, has largely focused on enhancing the retrieval stage and devoted limited exploration toward optimizing the representation of the database, a crucial aspect for tasks such as personalization. In this work, we examine the problem from a novel angle, focusing on how data can be better represented for more data-efficient retrieval in the context of LLM customization. To tackle this challenge, we introduce Persona-DB, a simple yet effective framework consisting of a hierarchical construction process to improve generalization across task contexts and collaborative refinement to effectively bridge knowledge gaps among users. In the evaluation of response prediction, Persona-DB demonstrates superior context efficiency in maintaining accuracy with a significantly reduced retrieval size, a critical advantage in scenarios with extensive histories or limited context windows. Our experiments also indicate a marked improvement of over 10% under cold-start scenarios, when users have extremely sparse data. Furthermore, our analysis reveals the increasing importance of collaborative knowledge as the retrieval capacity expands. @@ -293,7 +293,7 @@ Semantic Role Labeling of <fixed-case>N</fixed-case>om<fixed-case>B</fixed-case>ank Partitives - AdamMeyers + AdamMeyers Advait PravinSavant John E.Ortega 324–336 @@ -351,7 +351,7 @@ <fixed-case>ELITR</fixed-case>-Bench: A Meeting Assistant Benchmark for Long-Context Language Models ThibautThonet - LaurentBesacier + LaurentBesacier JosRozen 407–428 Research on Large Language Models (LLMs) has recently witnessed an increasing interest in extending the models’ context size to better capture dependencies within long documents. While benchmarks have been proposed to assess long-range abilities, existing efforts primarily considered generic tasks that are not necessarily aligned with real-world applications. In contrast, we propose a new benchmark for long-context LLMs focused on a practical meeting assistant scenario in which the long contexts consist of transcripts obtained by automatic speech recognition, presenting unique challenges for LLMs due to the inherent noisiness and oral nature of such data. Our benchmark, ELITR-Bench, augments the existing ELITR corpus by adding 271 manually crafted questions with their ground-truth answers, as well as noisy versions of meeting transcripts altered to target different Word Error Rate levels. Our experiments with 12 long-context LLMs on ELITR-Bench confirm the progress made across successive generations of both proprietary and open models, and point out their discrepancies in terms of robustness to transcript noise. We also provide a thorough analysis of our GPT-4-based evaluation, including insights from a crowdsourcing study. Our findings indicate that while GPT-4’s scores align with human judges, its ability to distinguish beyond three score levels may be limited. @@ -398,7 +398,7 @@ YihongLiu ChunlanMa HaotianYe - HinrichSchütze + HinrichSchütze 469–495 Transliterating related languages that use different scripts into a common script is effective for improving crosslingual transfer in downstream tasks. However, this methodology often makes pretraining a model from scratch unavoidable, as transliteration brings about new subwords not covered in existing multilingual pretrained language models (mPLMs). This is undesirable because it requires a large computation budget. A more promising way is to make full use of available mPLMs. To this end, this paper proposes a simple but effective framework: Transliterate-Merge-Initialize (TransMI). TransMI can create strong baselines for data that is transliterated into a common script by exploiting an existing mPLM and its tokenizer without any training. TransMI has three stages: (a) transliterate the vocabulary of an mPLM into a common script; (b) merge the new vocabulary with the original vocabulary; and (c) initialize the embeddings of the new subwords. We apply TransMI to three strong recent mPLMs. Our experiments demonstrate that TransMI not only preserves the mPLM’s ability to handle non-transliterated data, but also enables it to effectively process transliterated data, thereby facilitating crosslingual transfer across scripts. The results show consistent improvements of 3% to 34% for different mPLMs and tasks. We make our code and models publicly available at https://github.com/cisnlp/TransMI. 2025.coling-main.32 @@ -408,7 +408,7 @@ Two-stage Incomplete Utterance Rewriting on Editing Operation ZhiyuCao PeifengLi - QiaomingZhu + QiaomingZhu YaxinFan 496–507 Previous work on Incomplete Utterance Rewriting (IUR) has primarily focused on generating rewritten utterances based solely on dialogue context, ignoring the widespread phenomenon of coreference and ellipsis in dialogues. To address this issue, we propose a novel framework called TEO (Two-stage approach on Editing Operation) for IUR, in which the first stage generates editing operations and the second stage rewrites incomplete utterances utilizing the generated editing operations and the dialogue context. Furthermore, an adversarial perturbation strategy is proposed to mitigate cascading errors and exposure bias caused by the inconsistency between training and inference in the second stage. Experimental results on three IUR datasets show that our TEO outperforms the SOTA models significantly. @@ -483,7 +483,7 @@ Knowledge Graph Entity Typing with Curriculum Contrastive Learning HaoWang - MinghuaNuo + MinghuaNuo ShanJiang 574–583 The Knowledge Graph Entity Typing (KGET) task aims to predict missing type annotations for entities in knowledge graphs. Most recent studies only focus on the structural information from an entity’s neighborhood or semantic information from textual representations of entities or relations. In this paper, inspired by curriculum learning and contrastive learning, we propose the CCLET model using the Curriculum Contrastive Learning strategy for KGET, which uses the Pre-trained Language Model (PLM) and the graph model to fuse the entity related semantic and the structural information of the Knowledge Graph (KG) respectively. Our CCLET model consists of two main parts. In the Knowledge Fusion part, we design an Enhanced-MLP architecture to fuse the text of the entity’s description, related triplet, and tuples; In the Curriculum Contrastive Learning part, we define the difficulty of the course by controlling the level of added noise, we aim to accurately learn with curriculum contrastive learning strategy from easy to difficult. Our extensive experiments demonstrate that the CCLET model outperforms recent state-of-the-art models, verifying its effectiveness in the KGET task. @@ -519,7 +519,7 @@ JianYang XiangLi WeixiaoZhou - FeiLiu + FeiLiu KuiWu XiangyuanGuan TaoSun @@ -603,7 +603,7 @@ ZhaoguangLong JieZhou AiminZhou - ManLan + ManLan YangChong 710–725 Large Language Models (LLMs) have demonstrated impressive capabilities across a wide range of tasks. However, their proficiency and reliability in the specialized domain of financial data analysis, particularly focusing on data-driven thinking, remain uncertain. To bridge this gap, we introduce FinDABench, a comprehensive benchmark designed to evaluate the financial data analysis capabilities of LLMs within this context. The benchmark comprises 15,200 training instances and 8,900 test instances, all meticulously crafted by human experts. FinDABench assesses LLMs across three dimensions: 1) Core Ability, evaluating the models’ ability to perform financial indicator calculation and corporate sentiment risk assessment; 2) Analytical Ability, determining the models’ ability to quickly comprehend textual information and analyze abnormal financial reports; and 3) Technical Ability, examining the models’ use of technical knowledge to address real-world data analysis challenges involving analysis generation and charts visualization from multiple perspectives. We will release FinDABench, and the evaluation scripts at https://github.com/xxx. FinDABench aims to provide a measure for in-depth analysis of LLM abilities and foster the advancement of LLMs in the field of financial data analysis. @@ -740,7 +740,7 @@ WenlinZhang ChuhanWu XiangyangLi - YuhaoWang + YuhaoWang KuicaiDong YichaoWang XinyiDai @@ -820,7 +820,7 @@ YuanxiangHuangfu PeifengLi YaxinFan - QiaomingZhu + QiaomingZhu 989–999 Previous work on empathetic response generation mainly focused on utilizing the speaker’s emotions to generate responses. However, the performance of identifying fine-grained emotions is limited, introducing cascading errors to empathetic response generation. Moreover, due to the conflict between the information in the dialogue history and the recognized emotions, previous work often generated general and uninformative responses. To address the above issues, we propose a novel framework NEC (Non-Emotion-Centric empathetic dialogue generation) based on contrastive learning and context-sensitive entity and social commonsense, in which the frequent replies and sentences with incorrect emotions are punished through contrastive learning, thereby improving the empathy, diversity and information of the responses. The experimental results demonstrate that our NEC enhances the quality of empathetic generation and generates more diverse responses in comparison with the state-of-the-art baselines.The code will be available at https://github.com/huangfu170/NEC-empchat 2025.coling-main.66 @@ -861,7 +861,7 @@ TerryLima Ruas MohamedAbdalla BelaGipp - Saif M.Mohammad + Saif M.Mohammad 1027–1044 This study examines the tendency to cite older work across 20 fields of study over 43 years (1980–2023). We put NLP’s propensity to cite older work in the context of these 20 other fields to analyze whether NLP shows similar temporal citation patterns to them over time or whether differences can be observed. Our analysis, based on a dataset of ~240 million papers, reveals a broader scientific trend: many fields have markedly declined in citing older works (e.g., psychology, computer science). The trend is strongest in NLP and ML research (-12.8% and -5.5% in citation age from previous peaks). Our results suggest that citing more recent works is not directly driven by the growth in publication rates (-3.4% across fields; -5.2% in humanities; -5.5% in formal sciences) — even when controlling for an increase in the volume of papers. Our findings raise questions about the scientific community’s engagement with past literature, particularly for NLP, and the potential consequences of neglecting older but relevant research. The data and a demo showcasing our results are publicly available. 2025.coling-main.69 @@ -933,7 +933,7 @@ MariannaApidianaki AjayPatel SmarandaMuresan - KathleenMcKeown + KathleenMcKeown 1124–1135 Recent state-of-the-art authorship attribution methods learn authorship representations of text in a latent, uninterpretable space, which hinders their usability in real-world applications. We propose a novel approach for interpreting learned embeddings by identifying representative points in the latent space and leveraging large language models to generate informative natural language descriptions of the writing style associated with each point. We evaluate the alignment between our interpretable and latent spaces and demonstrate superior prediction agreement over baseline methods. Additionally, we conduct a human evaluation to assess the quality of these style descriptions and validate their utility in explaining the latent space. Finally, we show that human performance on the challenging authorship attribution task improves by +20% on average when aided with explanations from our method. 2025.coling-main.75 @@ -1051,7 +1051,7 @@ ShaohuanCheng DingyiZeng LiZhou - ChenZhang + ChenZhang MaluZhang WenyuChen 1278–1293 @@ -1112,7 +1112,7 @@ AliAl-Laith AlexanderConroy JensBjerring-Hansen - BolettePedersen + BolettePedersen CarstenLevisen DanielHershcovich 1353–1364 @@ -1217,7 +1217,7 @@ Elizabeth M.Olson HemankLamba AoifeCahill - JoelTetreault + JoelTetreault AlejandroJaimes 1475–1495 Natural Language Processing (NLP) of news articles can play an important role in understanding the dynamics and causes of violent conflict. Despite the availability of datasets categorizing various conflict events, the existing labels often do not cover all of the fine-grained violent conflict event types relevant to areas like the Horn of Africa. In this paper, we introduce a new benchmark dataset Conflict Events in the Horn of Africa region (CEHA) and propose a new task for identifying violent conflict events using online resources with this dataset. The dataset consists of 500 English event descriptions regarding conflict events in the Horn of Africa region with fine-grained event-type definitions that emphasize the cause of the conflict. This dataset categorizes the key types of conflict risk according to specific areas required by stakeholders in the Humanitarian-Peace-Development Nexus. Additionally, we conduct extensive experiments on two tasks supported by this dataset: Event-relevance Classification and Event-type Classification. Our baseline models demonstrate the challenging nature of these tasks and the usefulness of our dataset for model evaluations in low-resource settings. @@ -1254,7 +1254,7 @@ LiangweiYang ChenWang XiongxiaoXu - Philip S.Yu + Philip S.Yu KaiShu 1520–1530 With the emergence of large language models (LLMs) and their ability to perform a variety of tasks, their application in recommender systems (RecSys) has shown promise. However, we are facing significant challenges when deploying LLMs into RecSys, such as limited prompt length, unstructured item information, and un-constrained generation of recommendations, leading to sub-optimal performance. To address these issues, we propose a novel Taxonomy-guided Recommendation (TaxRec) framework to empower LLM with category information in a systematic approach. Specifically, TaxRec features a two-step process: one-time taxonomy categorization and LLM-based recommendation. In the one-time taxonomy categorization phase, we organize and categorize items, ensuring clarity and structure of item information. In the LLM-based recommendation phase, we feed the structured items into LLM prompts, achieving efficient token utilization and controlled feature generation. This enables more accurate, contextually relevant, and zero-shot recommendations without the need for domain-specific fine-tuning. Experimental results demonstrate that TaxRec significantly enhances recommendation quality compared to traditional zero-shot approaches, showcasing its efficacy as a personal recommender with LLMs. Code is available at: https://github.com/yueqingliang1/TaxRec. @@ -1266,7 +1266,7 @@ ShannanLiu PeifengLi YaxinFan - QiaomingZhu + QiaomingZhu 1531–1544 Multi-party dialogue discourse parsing is an important and challenging task in natural language processing (NLP). Previous studies struggled to fully understand the deep semantics of dialogues, especially when dealing with complex topic interleaving and ellipsis. To address the above issues, we propose a novel model DDPE (Dialogue Discourse Parsing with Explanations) to integrate external knowledge from Large Language Models (LLMs), which consists of three components, i.e., explanation generation, structural parsing, and contrastive learning. DDPE employs LLMs to generate explanatory and contrastive information about discourse structure, thereby providing additional reasoning cues that enhance the understanding of dialogue semantics. The experimental results on the two public datasets STAC and Molweni show that our DDPE significantly outperforms the state-of-the-art (SOTA) baselines. 2025.coling-main.103 @@ -1326,7 +1326,7 @@ Improving Explainable Fact-Checking with Claim-Evidence Correlations XinTan BoweiZou - Ai TiAw + Ai TiAw 1600–1612 Automatic fact-checking systems that employ large language models (LLMs) have achieved human-level performance in combating widespread misinformation. However, current LLM-based fact-checking systems fail to reveal the reasoning principles behind their decision-making for the claim verdict. In this work, we propose Correlation-Enhanced Explainable Fact-Checking (CorXFact), an LLM-based fact-checking system that simulates the reasoning principle of human fact-checkers for evidence-based claim verification: assessing and weighing the correlations between the claim and each piece of evidence. Following this principle, CorXFact enables efficient claim verification and transparent explanation generation. Furthermore, we contribute the CorFEVER test set to comprehensively evaluate the CorXFact system in claim-evidence correlation identification and claim verification in both closed-domain and real-world fact-checking scenarios. Experimental results show that our proposed CorXFact significantly outperforms four strong fact-checking baselines in claim authenticity prediction and verdict explanation. 2025.coling-main.108 @@ -1403,7 +1403,7 @@ MinjieQiang ZhongqingWang ShoushanLi - GuodongZhou + GuodongZhou 1699–1710 With the emergence of social media and e-commerce platforms, accurate user profiling has become increasingly vital for recommendation systems and personalized services. Recent studies have focused on generating detailed user profiles by extracting various aspects of user attributes from textual reviews. Nevertheless, these investigations have not fully exploited the potential of the abundant multimodal data at hand. In this study, we propose a novel task called multimodal user profiling. This task emphasizes the utilization of both review texts and their accompanying images to create comprehensive user profiles. By integrating textual and visual data, we leverage their complementary strengths, enabling the generation of more holistic user representations. Additionally, we explore a unified joint training framework with various multimodal training strategies that incorporate users’ historical review texts and images for user profile generation. Our experimental results underscore the significance of multimodal data in enhancing user profile generation and demonstrate the effectiveness of the proposed unified joint training approach. 2025.coling-main.115 @@ -1411,7 +1411,7 @@ Acquiring Bidirectionality via Large and Small Language Models - TakumiGoto + TakumiGoto HiroyoshiNagao YutaKoreeda 1711–1717 @@ -1456,7 +1456,7 @@ GuangjieZeng XiaoyanYu HaoPeng - Philip S.Yu + Philip S.Yu 1754–1766 Multimodal sarcasm detection (MSD) is essential for various downstream tasks. Existing MSD methods tend to rely on spurious correlations. These methods often mistakenly prioritize non-essential features yet still make correct predictions, demonstrating poor generalizability beyond training environments. Regarding this phenomenon, this paper undertakes several initiatives. Firstly, we identify two primary causes that lead to the reliance of spurious correlations. Secondly, we address these challenges by proposing a novel method that integrate Multimodal Incongruities via Contrastive Learning (MICL) for multimodal sarcasm detection. Specifically, we first leverage incongruity to drive multi-view learning from three views: token-patch, entity-object, and sentiment. Then, we introduce extensive data augmentation to mitigate the biased learning of the textual modality. Additionally, we construct a test set, SPMSD, which consists potential spurious correlations to evaluate the the model’s generalizability. Experimental results demonstrate the superiority of MICL on benchmark datasets, along with the analyses showcasing MICL’s advancement in mitigating the effect of spurious correlation. 2025.coling-main.119 @@ -1468,7 +1468,7 @@ ValleRuiz-Fernández JúliaFalcão LuisVasquez-Reina - AitorGonzalez-Agirre + AitorGonzalez-Agirre 1767–1784 In humans, cognitive biases are systematic deviations from rationality in judgment that simplify complex decisions. They typically manifest as a consequence of learned behaviors or limitations on information processing capabilities. Recent work has shown that these biases can percolate through training data and ultimately be learned by language models. We examine different groups of models, factoring in model size and type (base or instructed) for four kinds of cognitive bias: primacy, recency, common token, and majority class bias. We evaluate the performance of each model for each type of bias in different settings using simple and complex variants of datasets. Our results show that some biases have much stronger effects than others, and that task complexity plays a part in eliciting stronger effects for some of these biases as measured by effect size. We show that some cognitive biases such as common token and majority class bias are not straightforward to evaluate, and that, contrary to some of the previous literature, some effects that have been previously classified as common token bias in the literature are actually due to primacy and recency bias. 2025.coling-main.120 @@ -1479,7 +1479,7 @@ ShalakaSatheesh KatharinaBeckh KatrinKlug - HéctorAllende-Cid + HéctorAllende-Cid SebastianHouben TeenaHassan 1785–1801 @@ -1508,7 +1508,7 @@ ZhongqingWang ShichenLi HonglingWang - GuodongZhou + GuodongZhou 1813–1823 Multimodal sentiment analysis for fashion-related social media is essential for understanding how consumers appraise fashion products across platforms like Instagram and Twitter, where both textual and visual elements contribute to sentiment expression. However, a notable challenge in this task is the modality gap, where the different information density between text and images hinders effective sentiment analysis. In this paper, we propose a novel multimodal framework that addresses this challenge by introducing pseudo data generated by a two-stage framework. We further utilize a multimodal fusion approach that efficiently integrates the information from various modalities for sentiment classification of fashion posts. Experiments conducted on a comprehensive dataset demonstrate that our framework significantly outperforms existing unimodal and multimodal baselines, highlighting its effectiveness in bridging the modality gap for more accurate sentiment classification in fashion-related social media posts. 2025.coling-main.123 @@ -1517,9 +1517,9 @@ Quality Beyond A Glance: Revealing Large Quality Differences Between Web-Crawled Parallel Corpora Rikvan Noord - MiquelEsplà-Gomis + MiquelEsplà-Gomis MalinaChichirau - GemaRamírez-Sánchez + GemaRamírez-Sánchez AntonioToral 1824–1838 Parallel corpora play a vital role in advanced multilingual natural language processing tasks, notably in machine translation (MT). The recent emergence of numerous large parallel corpora, often extracted from multilingual documents on the Internet, has expanded the available resources. Nevertheless, the quality of these corpora remains largely unexplored, while there are large differences in how the corpora are constructed. Moreover, how the potential differences affect the performance of neural MT (NMT) systems has also received limited attention. This study addresses this gap by manually and automatically evaluating four well-known publicly available parallel corpora across eleven language pairs. Our findings are quite concerning: all corpora contain a substantial amount of noisy sentence pairs, with CCMatrix and CCAligned having well below of 50% reasonably clean pairs. MaCoCu and ParaCrawl generally have higher quality texts, though around a third of the texts still have clear issues. While corpus size impacts NMT models’ performance, our study highlights the critical role of quality: higher-quality corpora consistently yield better-performing NMT models when controlling for size. @@ -1551,7 +1551,7 @@ Does Vision Accelerate Hierarchical Generalization in Neural Language Learners? TatsukiKuribayashi - TimothyBaldwin + TimothyBaldwin 1865–1879 Neural language models (LMs) are arguably less data-efficient than humans from a language acquisition perspective. One fundamental question is why this human–LM gap arises. This study explores the advantage of grounded language acquisition, specifically the impact of visual information — which humans can usually rely on but LMs largely do not have access to during language acquisition — on syntactic generalization in LMs. Our experiments, following the poverty of stimulus paradigm under two scenarios (using artificial vs. naturalistic images), demonstrate that if the alignments between the linguistic and visual components are clear in the input, access to vision data does help with the syntactic generalization of LMs, but if not, visual input does not help. This highlights the need for additional biases or signals, such as mutual gaze, to enhance cross-modal alignment and enable efficient syntactic generalization in multimodal LMs. 2025.coling-main.127 @@ -1593,7 +1593,7 @@ ZeyuanYang FangzhouXiong PengLi - YangLiu + YangLiu 1922–1933 Due to the limited context window, Large Language Models (LLMs) struggle with processing long contexts. Although fine-tuning can extend the context window, it incurs substantial computation costs. In contrast, recent tuning-free approaches reallocate the attention mechanism or incorporate temporary trainable parameters. In this work, by jointly modeling instance-level generation with a limited context window and learning over sequential data, we rethink the long context generation of LLMs from a continual learning perspective. In practice, we inspect existing representative approaches and analyze their synergy with continual learning strategies. Moreover, we integrate these strategies into current approaches to further boost LLMs’ efficiency in processing long contexts. Comprehensive experiments and analysis confirm the feasibility of continual learning insights for improving long-context processing. 2025.coling-main.131 @@ -1604,7 +1604,7 @@ HansiWang YueWang QiliangLiang - YangLiu + YangLiu 1934–1942 Word Sense Disambiguation (WSD) is a fundamental task critical for accurate semantic understanding. Conventional training strategies usually only consider predefined senses for target words and learn each of them from relatively limited instances, neglecting the influence of similar ones. To address these problems, we propose the method of Learning to Rank Senses (LTRS) to enhance the task. This method helps a model learn to represent and disambiguate senses from a broadened range of instances via ranking an expanded list of sense definitions. By employing LTRS, our model achieves a SOTA F1 score of 79.6% in Chinese WSD and exhibits robustness in low-resource settings. Moreover, it shows excellent training efficiency, achieving faster convergence than previous methods. This provides a new technical approach to WSD and may also apply to the task for other languages. 2025.coling-main.132 @@ -1641,7 +1641,7 @@ TiborBosse Gert-JanDe Bruijn Jos A.Bosch - EmielKrahmer + EmielKrahmer 1964–1982 Motivational Interviewing (MI) is a counseling technique that promotes behavioral change through reflective responses to mirror or refine client statements. While advanced Large Language Models (LLMs) can generate engaging dialogues, challenges remain for applying them in a sensitive context such as MI. This work assesses the potential of LLMs to generate MI reflections via three LLMs: GPT-4, Llama-2, and BLOOM, and explores the effect of dialogue context size and integration of MI strategies for reflection generation by LLMs. We conduct evaluations using both automatic metrics and human judges on four criteria: appropriateness, relevance, engagement, and naturalness, to assess whether these LLMs can accurately generate the nuanced therapeutic communication required in MI. While we demonstrate LLMs’ potential in generating MI reflections comparable to human therapists, content analysis shows that significant challenges remain. By identifying the strengths and limitations of LLMs in generating empathetic and contextually appropriate reflections in MI, this work contributes to the ongoing dialogue in enhancing LLM’s role in therapeutic counseling. 2025.coling-main.135 @@ -1669,7 +1669,7 @@ Karody Lubna AbdulRahman SantoshKurasa ParagAgrawal - SandipanDandapat + SandipanDandapat 2003–2025 Chain-of-thought (CoT) prompting has significantly enhanced the the capability of large language models (LLMs) by structuring their reasoning processes. However, existing methods face critical limitations: handcrafted demonstrations require extensive human expertise, while trigger phrases are prone to inaccuracies. In this paper, we propose the Zero-shot Uncertainty-based Selection (ZEUS) method, a novel approach that improves CoT prompting by utilizing uncertainty estimates to select effective demonstrations without needing access to model parameters. Unlike traditional methods, ZEUS offers high sensitivity in distinguishing between helpful and ineffective questions, ensuring more precise and reliable selection. Our extensive evaluation shows that ZEUS consistently outperforms existing CoT strategies across four challenging reasoning benchmarks, demonstrating its robustness and scalability. 2025.coling-main.137 @@ -1680,7 +1680,7 @@ ZihaoFeng HailongCao WangXu - TiejunZhao + TiejunZhao 2026–2037 Large Language Models (LLMs) have demonstrated exceptional performance across a broad spectrum of cross-lingual Natural Language Processing (NLP) tasks. However, previous methods predominantly focus on leveraging parallel corpus to conduct instruction data for continuing pre-training or fine-tuning. They ignored the state of parallel data on the hidden layers of LLMs. In this paper, we demonstrate Word-level Cross-lingual Structure (WCS) of LLM which proves that the word-level embedding on the hidden layers are isomorphic between languages. We find that the hidden states of different languages’ input on the LLMs hidden layers can be aligned with an orthogonal matrix on word-level. We prove this conclusion in both mathematical and downstream task ways on two representative LLM foundations, LLaMA2 and BLOOM. Besides, we propose an Isomorphism-based Data Augmentation (IDA) method to apply the WCS on a downstream cross-lingual task, Bilingual Lexicon Induction (BLI), in both supervised and unsupervised ways. The experiment shows the significant improvement of our proposed method over all the baselines, especially on low-resource languages. 2025.coling-main.138 @@ -1692,7 +1692,7 @@ ZhongQian XiaoxuZhu PeifengLi - QiaomingZhu + QiaomingZhu 2038–2048 Document-level event factuality identification (DEFI) assesses the veracity degree to which an event mentioned in a document has happened, which is crucial for many natural language processing tasks. Previous work assesses event factuality by solely relying on the semantic information within a single document, which fails to identify hard cases where the document itself is hallucinative or counterfactual. There is also a pressing need for more suitable data of this kind. To tackle these issues, we construct Factualusion, a novel corpus with hallucination features that can be used not only for DEFI but can also be applied for hallucination evaluation for large language models. We further propose Trucidator, a graph-based framework that constructs intra-document and cross-document graphs and employs a multi-task learning paradigm to acquire more robust node embeddings, leveraging cross-document inference for more accurate identification. Experiments show that our proposed framework outperformed several baselines, demonstrating the effectiveness of our method. 2025.coling-main.139 @@ -1763,7 +1763,7 @@ RamonRuiz-Dolz DebelaGemechu ZlataKikteva - ChrisReed + ChrisReed 2131–2143 Traditionally, argument mining research has approached the task of automatic identification of argument structures by using existing definitions of what constitutes an argument, while leaving the equally important matter of what does not qualify as an argument unaddressed. With the ability to distinguish between what is and what is not a natural language argument being at the core of argument mining as a field, it is interesting that no previous work has explored approaches to effectively select non-related propositions (i.e., propositions that are not connected through an argumentative relation, such as support or attack) that improve the data for learning argument mining tasks better. In this paper, we address the question of how to effectively sample non-related propositions from six different argument mining corpora belonging to different domains and encompassing both monologue and dialogue forms of argumentation. To that end, in addition to considering undersampling baselines from previous work, we propose three new sampling strategies relying on context (i.e., short/long) and the semantic similarity between propositions. Our results indicate that using more informed sampling strategies improves the performance, not only when evaluating models on their respective test splits, but also in the case of cross-domain evaluation. 2025.coling-main.145 @@ -1776,7 +1776,7 @@ YiweiWang BaolongBi JiayiMao - XueqiCheng + XueqiCheng 2144–2162 “Jailbreak” is a major safety concern of Large Language Models (LLMs), which occurs when malicious prompts lead LLMs to produce harmful outputs, raising issues about the reliability and safety of LLMs. Therefore, an effective evaluation of jailbreaks is very crucial to develop its mitigation strategies. However, our research reveals that many jailbreaks identified by current evaluations may actually be hallucinations—erroneous outputs that are mistaken for genuine safety breaches. This finding suggests that some perceived vulnerabilities might not represent actual threats, indicating a need for more precise red teaming benchmarks. To address this problem, we propose the Benchmark for reliABilitY and jailBreak haLlUcination Evaluation (BabyBLUE). BabyBLUE introduces a specialized validation framework including various evaluators to enhance existing jailbreak benchmarks, ensuring outputs are useful malicious instructions. Additionally, BabyBLUE presents a new dataset as an augmentation to the existing red teaming benchmarks, specifically addressing hallucinations in jailbreaks, aiming to evaluate the true potential of jailbroken LLM outputs to cause harm to human society. 2025.coling-main.146 @@ -1843,7 +1843,7 @@ Leveraging Explicit Reasoning for Inference Integration in Commonsense-Augmented Dialogue Models Sarah E.Finch - Jinho D.Choi + Jinho D.Choi 2222–2235 Open-domain dialogue systems need to grasp social commonsense to understand and respond effectively to human users. Commonsense-augmented dialogue models have been proposed that aim to infer commonsense knowledge from dialogue contexts in order to improve response quality. However, existing approaches to commonsense-augmented dialogue rely on implicit reasoning to integrate commonsense inferences during response generation. In this study, we explore the impact of explicit reasoning against implicit reasoning over commonsense for dialogue response generation. Our findings demonstrate that separating commonsense reasoning into explicit steps for generating, selecting, and integrating commonsense into responses leads to better dialogue interactions, improving naturalness, engagement, specificity, and overall quality. Subsequent analyses of these findings unveil insights into the effectiveness of various types of commonsense in generating responses and the particular response traits enhanced through explicit reasoning for commonsense integration. Our work advances research in open-domain dialogue by achieving a new state-of-the-art in commonsense-augmented response generation. 2025.coling-main.152 @@ -1892,8 +1892,8 @@ MinzhiLi ZhengyuanLiu ShuminDeng - ShafiqJoty - NancyChen + ShafiqJoty + NancyChen Min-YenKan 2277–2290 The acceleration of Large Language Models (LLMs) research has opened up new possibilities for evaluating generated text. Though LLMs serve as scalable and economical evaluators, how reliable these evaluators is still under-explored. Prior research efforts in the meta-evaluation of LLMs as judges limit the prompting of an LLM to a single use to obtain a final evaluation decision. They then compute the agreement between LLMs’ outputs and human labels. This lacks interpretability in understanding the evaluation capability of LLMs. In light of this challenge, we propose DnA-Eval, which breaks down the evaluation process into decomposition and aggregation stages based on pedagogical practices. Our experiments show that it not only provides a more interpretable window for how well LLMs evaluate, but also leads to improvements up to 39.6% for different LLMs on a variety of meta-evaluation benchmarks. @@ -1950,7 +1950,7 @@ TaoFeng LizhenQu XiaoxiKang - GholamrezaHaffari + GholamrezaHaffari 2351–2369 Automatically evaluating the quality of responses in dialogue systems is a challenging yet crucial task. Current metrics often fail to align with human judgments, especially when assessing responses that are grammatically correct. To address this issue, we propose a novel metric, called CausalScore, which assesses the relevance of responses by measuring the causal strength between dialogue histories and responses. The causal strength is estimated by utilizing both unconditional dependence and conditional dependencies from dialogue histories to responses. We compare our metric with the existing competitive metrics in terms of their alignment with human judgements. Our experimental results demonstrate that CausalScore significantly surpasses existing state-of-the-art metrics by aligning better with human judgements. Additionally, we collect a dialogue dataset CGDIALOG+ with human-annotated causal relations and a set of pairwise human judgements to facilitate the development of automatic metrics. 2025.coling-main.161 @@ -2004,7 +2004,7 @@ HaotianYe ChunlanMa FrançoisYvon - HinrichSchütze + HinrichSchütze 2417–2433 Recent studies have shown that post-aligning multilingual pretrained language models (mPLMs) using alignment objectives on both original and transliterated data can improve crosslingual alignment. This improvement further leads to better crosslingual transfer performance. However, it remains unclear how and why a better crosslingual alignment is achieved, as this technique only involves transliterations, and does not use any parallel data. This paper attempts to explicitly evaluate the crosslingual alignment and identify the key elements in transliteration-based approaches that contribute to better performance. For this, we train multiple models under varying setups for two pairs of related languages: (1) Polish and Ukrainian and (2) Hindi and Urdu. To assess alignment, we define four types of similarities based on sentence representations. Our experimental results show that adding transliterations alone improves the overall similarities, even for random sentence pairs. With the help of auxiliary transliteration-based alignment objectives, especially the contrastive objective, the model learns to distinguish matched from random pairs, leading to better crosslingual alignment. However, we also show that better alignment does not always yield better downstream performance, suggesting that further research is needed to clarify the connection between alignment and performance. The code implementation is based on https://github.com/cisnlp/Transliteration-PPA. 2025.coling-main.165 @@ -2014,7 +2014,7 @@ <fixed-case>GL</fixed-case>-<fixed-case>GAN</fixed-case>: Perceiving and Integrating Global and Local Styles for Handwritten Text Generation with Mamba YimingWang HongxiWei - HengWang + HengWang ShiwenSun ChaoHe 2434–2444 @@ -2042,10 +2042,10 @@ YounesSamih KirillChirkunov Alham FikriAji - PreslavNakov + PreslavNakov ShantanuGodbole - SalimRoukos - RaduFlorian + SalimRoukos + RaduFlorian NizarHabash 2456–2477 The rapid evolution of Natural Language Processing (NLP) has favoured major languages such as English, leaving a significant gap for many others due to limited resources. This is especially evident in the context of data annotation, a task whose importance cannot be underestimated, but which is time-consuming and costly. Thus, any dataset for resource-poor languages is precious, in particular when it is task-specific. Here, we explore the feasibility of repurposing an existing multilingual dataset for a new NLP task: we repurpose a subset of the BELEBELE dataset (Bandarkar et al., 2023), which was designed for multiple-choice question answering (MCQA), to enable the more practical task of extractive QA (EQA) in the style of machine reading comprehension. We present annotation guidelines and a parallel EQA dataset for English and Modern Standard Arabic (MSA). We also present QA evaluation results for several monolingual and cross-lingual QA pairs including English, MSA, and five Arabic dialects. We aim to help others adapt our approach for the remaining 120 BELEBELE language variants, many of which are deemed under-resourced. We also provide a thorough analysis and share insights to deepen understanding of the challenges and opportunities in NLP task reformulation. @@ -2068,7 +2068,7 @@ HongdeLiu FeiGao YuxiangJia - HongyingZan + HongyingZan MinPeng 2497–2512 Emotion recognition in conversations (ERC) has garnered significant attention from the research community. However, due to the complexity of visual scenes and dialogue contextual dependencies in conversations, previous ERC methods fail to handle emotional cues from both visual sources and discourse structures. Furthermore, existing state-of-the-art ERC models are trained and tested separately on each single ERC dataset, not verifying their effectiveness across multiple datasets simultaneously. To address these challenges, this paper proposes an innovative framework for ERC, called Dialogue Scenes Understanding Enhanced Multi-modal Multi-task Tuning (DialogueMMT). More concretely, a novel video-language connector is applied within the large vision-language model for capturing video features effectively. Additionally, we utilize multi-task instruction tuning with a unified ERC dataset to enhance the model’s understanding of multi-modal dialogue scenes and employ a chain-of-thought strategy to improve emotion classification performance. Extensive experimental results on three benchmark ERC datasets indicate that the proposed DialogueMMT framework consistently outperforms existing state-of-the-art approaches in terms of overall performance. @@ -2092,10 +2092,10 @@ Aligning Large Language Models with Human Opinions through Persona Selection and Value–Belief–Norm Reasoning - Do XuanLong + Do XuanLong KenjiKawaguchi Min-YenKan - NancyChen + NancyChen 2526–2547 Reasoning and predicting human opinions with large language models (LLMs) is essential yet challenging. Current methods employ role-playing with personae but face two major issues: LLMs are sensitive to even a single irrelevant persona, skewing predictions by up to 30%; and LLMs fail to reason strategically over personae. We propose Chain-of-Opinion (COO), a simple four-step solution modeling which and how to reason with personae, inspired by the Value–Belief–Norm (VBN) theory. COO differentiates between explicit personae (demographics and ideology) and implicit personae (historical opinions), involves: (1) filtering irrelevant attributes from explicit personae; (2) ranking implicit personae into a preferential list for selecting top-k; (3) applying novel VBN reasoning to extract user environmental and personal value, belief, and norm variables for accurate and reliable predictions; and (4) iterating VBN reasoning with progressively larger lists of implicit personae to handle potential persona insufficiency. COO efficiently achieves new state-of-the-art opinion prediction via prompting with only 5 inference calls, improving prior techniques by up to 4%. Notably, fine-tuning LMs with COO’s data results in significantly better opinion-aligned models, by up to 23%. 2025.coling-main.172 @@ -2147,7 +2147,7 @@ HuiyaoWang PeifengLi YaxinFan - QiaomingZhu + QiaomingZhu 2592–2602 Previous work on dialogue topic shift detection has primarily focused on shallow local reasoning, overlooking the importance of considering the global historical structure and local details to elucidate the underlying causes of topic shift. To address the above two issues, we introduce the dual-process theory to this task and design a novel Dual-Module Framework DMF (i.e., intuition and reasoning module) for dialogue topic shift detection to emulate this cognitive process. Specifically, the intuition module employs Large Language Models (LLMs) to extract and store the global topic structure of historical dialogue, while the reasoning module introduces a LLM to generate reasoning samples between the response and the most recent topic of historical dialogue, thereby providing local detail explanations for topic shift. Moreover, we distill the dual-module framework into a small generative model to facilitate more precise reasoning. The experimental results on three public datasets show that our DMF outperforms the state-of-the-art baselines. 2025.coling-main.177 @@ -2217,7 +2217,7 @@ Oddballness: universal anomaly detection with language models - FilipGralinski + FilipGralinski RyszardStaruch KrzysztofJurkiewicz 2683–2689 @@ -2273,7 +2273,7 @@ The Gaps between Fine Tuning and In-context Learning in Bias Evaluation and Debiasing MasahiroKaneko DanushkaBollegala - TimothyBaldwin + TimothyBaldwin 2758–2764 The output tendencies of PLMs vary markedly before and after FT due to the updates to the model parameters. These divergences in output tendencies result in a gap in the social biases of PLMs. For example, there exits a low correlation between intrinsic bias scores of a PLM and its extrinsic bias scores under FT-based debiasing methods. Additionally, applying FT-based debiasing methods to a PLM leads to a decline in performance in downstream tasks. On the other hand, PLMs trained on large datasets can learn without parameter updates via ICL using prompts. ICL induces smaller changes to PLMs compared to FT-based debiasing methods. Therefore, we hypothesize that the gap observed in pre-trained and FT models does not hold true for debiasing methods that use ICL. In this study, we demonstrate that ICL-based debiasing methods show a higher correlation between intrinsic and extrinsic bias scores compared to FT-based methods. Moreover, the performance degradation due to debiasing is also lower in the ICL case compared to that in the FT case. 2025.coling-main.187 @@ -2283,7 +2283,7 @@ <fixed-case>LLM</fixed-case> Sensitivity Challenges in Abusive Language Detection: Instruction-Tuned vs. Human Feedback YaqiZhang ViktorHangya - AlexanderFraser + AlexanderFraser 2765–2780 The capacity of large language models (LLMs) to understand and distinguish socially unacceptable texts enables them to play a promising role in abusive language detection. However, various factors can affect their sensitivity. In this work, we test whether LLMs have an unintended bias in abusive language detection, i.e., whether they predict more or less of a given abusive class than expected in zero-shot settings. Our results show that instruction-tuned LLMs tend to under-predict positive classes, since datasets used for tuning are dominated by the negative class. On the contrary, models fine-tuned with human feedback tend to be overly sensitive. In an exploratory approach to mitigate these issues, we show that label frequency in the prompt helps with the significant over-prediction. 2025.coling-main.188 @@ -2315,7 +2315,7 @@ Topology-of-Question-Decomposition: Enhancing Large Language Models with Information Retrieval for Knowledge-Intensive Tasks WeijieLi JinWang - Liang-ChihYu + Liang-ChihYu XuejieZhang 2814–2833 Large language models (LLMs) are increasingly deployed for general problem-solving across various domains yet remain constrained to chaining immediate reasoning steps and depending solely on parametric knowledge. Integrating an information retrieval system directly into the reasoning process of LLMs can improve answer accuracy but might disrupt the natural reasoning sequence. Consequently, LLMs may underperform in complex, knowledge-intensive tasks requiring multiple reasoning steps, extensive real-world knowledge, or critical initial decisions. To overcome these challenges, we introduce a novel framework, Topology-of-Question-Decomposition (ToQD), which activates retrieval only when necessary. Globally, ToQD guides LLMs in constructing a topology graph from the input question, each node representing a sub-question. Locally, ToQD employs self-verify inference to determine whether a sub-question should retrieve relevant documents, necessitate further decomposition, or directly provide an answer. Experiments demonstrate that ToQD achieves superior performance and robustness in complex, knowledge-intensive tasks, significantly enhancing system response efficiency. @@ -2363,7 +2363,7 @@ ZonghanYang XinruiChen PengLi - YangLiu + YangLiu 2886–2903 State-of-the-art Large Multi-Modal Models (LMMs) have demonstrated exceptional capabilities in vision-language tasks. Despite their advanced functionalities, the performances of LMMs are still limited in challenging scenarios that require complex reasoning with multiple levels of visual information. Existing prompting techniques for LMMs focus on either improving textual reasoning or leveraging tools for image preprocessing, lacking a simple and general visual prompting scheme to promote vision-language coordination in LMMs. In this work, we propose SCAFFOLD prompting that scaffolds coordinates to promote vision-language coordination. Specifically, SCAFFOLD overlays a dot matrix within the image as visual information anchors and leverages multi-dimensional coordinates as textual positional references. Extensive experiments on a wide range of challenging vision-language tasks demonstrate the superiority of SCAFFOLD over the textual Chain-of-Thought prompting. 2025.coling-main.195 @@ -2375,7 +2375,7 @@ JianZhang YanZhang YuanyuanLiang - QiLi + QiLi HongweiWang 2904–2918 The strong capability of large language models (LLMs) has been applied to information extraction (IE) through either retrieval augmented prompting or instruction tuning (IT). However, the best way to incorporate information with LLMs for IE remains an open question. In this paper, we explore Retrieval Augmented Instruction Tuning (RA-IT) for IE, focusing on the task of open named entity recognition (NER). Specifically, for each training sample, we retrieve semantically similar examples from the training dataset as the context and prepend them to the input of the original instruction. To evaluate our RA-IT approach more thoroughly, we construct a Chinese IT dataset for open NER and evaluate RA-IT in both English and Chinese scenarios. Experimental results verify the effectiveness of RA-IT across various data sizes and in both English and Chinese scenarios. We also conduct thorough studies to explore the impacts of various retrieval strategies in the proposed RA-IT framework. @@ -2421,7 +2421,7 @@ Towards Understanding Multi-Task Learning (Generalization) of <fixed-case>LLM</fixed-case>s via Detecting and Exploring Task-Specific Neurons YongqiLeng - DeyiXiong + DeyiXiong 2969–2987 While large language models (LLMs) have demonstrated superior multi-task capabilities, understanding the learning mechanisms behind this is still a challenging problem. In this paper, we attempt to understand such mechanisms from the perspective of neurons. Specifically, we detect task-sensitive neurons in LLMs via gradient attribution on task-specific data. Through extensive deactivation and fine-tuning experiments, we demonstrate that the detected neurons are highly correlated with the given task, which we term as task-specific neurons. With these identified task-specific neurons, we delve into two common problems in multi-task learning and continuous learning: Generalization and Catastrophic Forgetting. We find that the overlap of task-specific neurons is strongly associated with generalization and specialization across tasks. Interestingly, at certain layers of LLMs, there is a high similarity in the parameters of different task-specific neurons, and such similarity is highly correlated with the generalization performance. Inspired by these findings, we propose a neuron-level continuous fine-tuning method that only fine-tunes the current task-specific neurons during continuous learning, and extensive experiments demonstrate the effectiveness of the proposed method. Our study provides insights into the interpretability of LLMs in multi-task learning. 2025.coling-main.200 @@ -2432,7 +2432,7 @@ YuqiRen RenrenJin TongxuanZhang - DeyiXiong + DeyiXiong 2988–3001 Large Language Models (LLMs) have demonstrated remarkable abilities in text comprehension and logical reasoning, indicating that the text representations learned by LLMs can facilitate their language processing capabilities. In neuroscience, brain cognitive processing signals are typically utilized to study human language processing. Therefore, it is natural to ask how well the text embeddings from LLMs align with the brain cognitive processing signals, and how training strategies affect the LLM-brain alignment? In this paper, we employ Representational Similarity Analysis (RSA) to measure the alignment between 23 mainstream LLMs and fMRI signals of the brain to evaluate how effectively LLMs simulate cognitive language processing. We empirically investigate the impact of various factors (e.g., pre-training data size, model scaling, alignment training, and prompts) on such LLM-brain alignment. Experimental results indicate that pre-training data size and model scaling are positively correlated with LLM-brain similarity, and alignment training can significantly improve LLM-brain similarity. Explicit prompts contribute to the consistency of LLMs with brain cognitive language processing, while nonsensical noisy prompts may attenuate such alignment. Additionally, the performance of a wide range of LLM evaluations (e.g., MMLU, Chatbot Arena) is highly correlated with the LLM-brain similarity. 2025.coling-main.201 @@ -2488,7 +2488,7 @@ To Label or Not to Label: Hybrid Active Learning for Neural Machine Translation Abdul HameedAzeemi Ihsan AyyubQazi - Agha AliRaza + Agha AliRaza 3071–3082 Active learning (AL) techniques reduce labeling costs for training neural machine translation (NMT) models by selecting smaller representative subsets from unlabeled data for annotation. Diversity sampling techniques select heterogeneous instances, while uncertainty sampling methods select instances with the highest model uncertainty. Both approaches have limitations - diversity methods may extract varied but trivial examples, while uncertainty sampling can yield repetitive, uninformative instances. To bridge this gap, we propose Hybrid Uncertainty and Diversity Sampling (HUDS), an AL strategy for domain adaptation in NMT that combines uncertainty and diversity for sentence selection. HUDS computes uncertainty scores for unlabeled sentences and subsequently stratifies them. It then clusters sentence embeddings within each stratum and computes diversity scores by distance to the centroid. A weighted hybrid score that combines uncertainty and diversity is then used to select the top instances for annotation in each AL iteration. Experiments on multi-domain German-English and French-English datasets demonstrate the better performance of HUDS over other strong AL baselines. We analyze the sentence selection with HUDS and show that it prioritizes diverse instances having high model uncertainty for annotation in early AL iterations. 2025.coling-main.206 @@ -2565,7 +2565,7 @@ ChangzeLv XiaohuaWang XiaoqingZheng - XuanjingHuang + XuanjingHuang 3158–3178 The recent surge in jailbreaking attacks has revealed significant vulnerabilities in Large Language Models (LLMs) when exposed to malicious inputs. While various defense strategies have been proposed to mitigate these threats, there has been limited research into the underlying mechanisms that make LLMs vulnerable to such attacks. In this study, we suggest that the self-safeguarding capability of LLMs is linked to specific activity patterns within their representation space. Although these patterns have little impact on the semantic content of the generated text, they play a crucial role in shaping LLM behavior under jailbreaking attacks. Our findings demonstrate that these patterns can be detected with just a few pairs of contrastive queries. Extensive experimentation shows that the robustness of LLMs against jailbreaking can be manipulated by weakening or strengthening these patterns. Further visual analysis provides additional evidence for our conclusions, providing new insights into the jailbreaking phenomenon. These findings highlight the importance of addressing the potential misuse of open-source LLMs within the community. 2025.coling-main.212 @@ -2622,7 +2622,7 @@ <fixed-case>COF</fixed-case>: Adaptive Chain of Feedback for Comparative Opinion Quintuple Extraction QingtingXu KaisongSong - ChaoqunLiu + ChaoqunLiu YangyangKang XiabingZhou JunLin @@ -2701,7 +2701,7 @@ SiyuanWang ZhuohanLong ZhihaoFan - XuanjingHuang + XuanjingHuang ZhongyuWei 3310–3328 This paper presents a benchmark self-evolving framework to dynamically evaluate rapidly advancing Large Language Models (LLMs). We utilize a multi-agent system to reframe new evolving instances with high confidence that extend existing benchmarks. Towards a more scalable, robust and fine-grained evaluation, we implement six reframing operations to construct evolving instances testing LLMs against diverse queries, shortcut biases and probing their problem-solving sub-abilities. With this framework, we extend datasets across general and specific tasks, through various iterations. Experimental results show a performance decline in most LLMs against their original results under scalable and robust evaluations, offering a more accurate reflection of model capabilities alongside our fine-grained evaluation. Besides, our framework widens performance discrepancies both between different models and within the same model across various tasks, facilitating more informed model selection for specific tasks. We hope this framework contributes the research community for continuously evolving benchmarks alongside LLM development. @@ -2790,7 +2790,7 @@ On Evaluation Protocols for Data Augmentation in a Limited Data Scenario FrédéricPiedboeuf - PhilippeLanglais + PhilippeLanglais 3428–3443 Textual data augmentation (DA) is a prolific field of study where novel techniques to create artificial data are regularly proposed, and that has demonstrated great efficiency on small data settings, at least for text classification tasks. In this paper, we challenge those results, showing that classical data augmentation (which modify sentences) is simply a way of performing better fine-tuning, and that spending more time doing so before applying data augmentation negates its effect. This is a significant contribution as it answers several questions that were left open in recent years, namely : which DA technique performs best (all of them as long as they generate data close enough to the training set, as to not impair training) and why did DA show positive results (facilitates training of network). We further show that zero- and few-shot DA via conversational agents such as ChatGPT or LLama2 can increase performances, confirming that this form of data augmentation is preferable to classical methods. 2025.coling-main.231 @@ -2826,7 +2826,7 @@ AlexanderJohnson EmmyPhung AhmadEmami - JuliaHirschberg + JuliaHirschberg 3479–3494 The rapid expansion of online content has intensified the issue of information redundancy, underscoring the need for solutions that can identify genuinely new information. Despite this challenge, the research community has seen a decline in focus on novelty detection, particularly with the rise of large language models (LLMs). Additionally, previous approaches have relied heavily on human annotation, which is time-consuming, costly, and particularly challenging when annotators must compare a target document against a vast number of historical documents. In this work, we introduce NovAScore (Novelty Evaluation in Atomicity Score), an automated metric for evaluating document-level novelty. NovAScore aggregates the novelty and salience scores of atomic information, providing high interpretability and a detailed analysis of a document’s novelty. With its dynamic weight adjustment scheme, NovAScore offers enhanced flexibility and an additional dimension to assess both the novelty level and the importance of information within a document. Our experiments show that NovAScore strongly correlates with human judgments of novelty, achieving a 0.626 Point-Biserial correlation on the TAP-DLND 1.0 dataset and a 0.920 Pearson correlation on an internal human-annotated dataset. 2025.coling-main.234 @@ -2916,8 +2916,8 @@ Is Peer-Reviewing Worth the Effort? - Kenneth WardChurch - RamanChandrasekar + Kenneth WardChurch + RamanChandrasekar John E.Ortega Ibrahim SaidAhmad 3589–3599 @@ -2931,7 +2931,7 @@ RyoSato DaiNakashima TakeshiSuzuki - Minh LeNguyen + Minh LeNguyen 3600–3612 Large language models (LLMs) have achieved notable success across various tasks but are hindered by their large size and high computational demands. Post-training pruning (PTP) offers a promising solution by reducing model size through parameter removal while preserving performance. However, current PTP methods perform optimally only within specific sparsity ranges. This paper presents two key findings: (1) Layerwise uniform sparsity is effective at low sparsity, while non-uniform sparsity excels at high levels; (2) Relative importance-based pruning works best at low sparsity, whereas Hessian-based weight reconstruction is superior at high sparsity. We design and conduct experiments to validate these findings. Based on these insights, we introduce OptiPrune, a robust pruning method effective across all sparsity levels. OptiPrune adapts non-uniform sparsity with adaptive deviation and employs a threshold to select the optimal pruning strategy. Empirical results across diverse datasets, architectures, and languages validate its performance and robustness. These findings provide valuable directions for future LLM pruning research. Our code and data are publicly available. 2025.coling-main.243 @@ -2973,7 +2973,7 @@ Multilingual Supervision Improves Semantic Disambiguation of Adpositions WesleyScivetti - LaurenLevine + LaurenLevine NathanSchneider 3655–3669 Adpositions display a remarkable amount of ambiguity and flexibility in their meanings, and are used in different ways across languages. We conduct a systematic corpus-based cross-linguistic investigation into the lexical semantics of adpositions, utilizing SNACS (Schneider et al., 2018), an annotation framework with data available in several languages. Our investigation encompasses 5 of these languages: Chinese, English, Gujarati, Hindi, and Japanese. We find substantial distributional differences in adposition semantics, even in comparable corpora. We further train classifiers to disambiguate adpositions in each of our languages. Despite the cross-linguistic differences in adpositional usage, sharing annotated data across languages boosts overall disambiguation performance, leading to the highest published scores on this task for all 5 languages. @@ -3003,7 +3003,7 @@ RuiyangRen YuhaoWang YingqiQu - Wayne XinZhao + Wayne XinZhao JingLiu HuaWu Ji-RongWen @@ -3020,7 +3020,7 @@ WenxuanZhang XiaobaoWu BoyangLi - Anh TuanLuu + Anh TuanLuu LidongBing 3716–3731 Large Language Models (LLMs) have demonstrated remarkable performance through supervised fine-tuning or in-context learning using gold labels. However, this paradigm is limited by the availability of gold labels, while in certain scenarios, LLMs may need to perform tasks that are too complex for humans to provide such labels. To tackle this challenge, this study explores whether solely utilizing unlabeled data can elicit strong model capabilities. We propose a new paradigm termed zero-to-strong generalization. We iteratively prompt LLMs to annotate unlabeled data and retain high-quality labels by filtering. Surprisingly, we obverse that this iterative process gradually unlocks LLMs’ potential on downstream tasks. Our experiments on extensive classification and reasoning tasks confirm the effectiveness of our proposed framework. Our analysis indicates that this paradigm is effective for both in-context learning and fine-tuning, and for various model sizes. @@ -3035,7 +3035,7 @@ AbhishekLalwani DavidKoleczek MukundRungta - SadidHasan + SadidHasan ElitaLobo 3732–3752 Machine unlearning aims to efficiently eliminate the influence of specific training data, known as the forget set, from the model. However, existing unlearning methods for Large Language Models (LLMs) face a critical challenge: they rely solely on negative feedback to suppress responses related to the forget set, which often results in nonsensical or inconsistent outputs, diminishing model utility and posing potential privacy risks. To address this limitation, we propose a novel approach called Alternate Preference Optimization (AltPO), which combines negative feedback with in-domain positive feedback on the forget set. Additionally, we introduce new evaluation metrics to assess the quality of responses related to the forget set. Extensive experiments show that our approach not only enables effective unlearning but also avoids undesirable model behaviors while maintaining overall model performance. @@ -3118,7 +3118,7 @@ SenbinZhu HongdeLiu YuxiangJia - HongyingZan + HongyingZan MinPeng 3836–3849 Entities are important to understanding literary works, which emphasize characters, plots and environment. The research on entity recognition, especially nested entity recognition in the literary domain is still insufficient partly due to insufficient annotated data. To address this issue, we construct the first Genre-oriented Corpus for Entity Recognition in Chinese Web Novels, namely GenWebNovel, comprising 400 chapters totaling 1,214,283 tokens under two genres, XuanHuan (Eastern Fantasy) and History. Based on the corpus, we analyze the distribution of different types of entities, including person, location, and organization. We also compare the nesting patterns of nested entities between GenWebNovel and the English corpus LitBank. Even though both belong to the literary domain, entities in different genres share few overlaps, making genre adaptation of NER (Named Entity Recognition) a hard problem. We propose a novel method that utilizes a pre-trained language model as an In-context learning example retriever to boost the performance of large language models. Our experiments show that this approach significantly enhances entity recognition, matching state-of-the-art (SOTA) models without requiring additional training data. Our code, dataset, and model are available at https://github.com/hjzhao73/GenWebNovel. @@ -3131,7 +3131,7 @@ YiJing TongWu TianhaoShen - DeyiXiong + DeyiXiong QingYang 3850–3864 Ensuring the safety of large language models (LLMs) is paramount, yet identifying potential vulnerabilities is challenging. While manual red teaming is effective, it is time-consuming, costly and lacks scalability. Automated red teaming (ART) offers a more cost-effective alternative, automatically generating adversarial prompts to expose LLM vulnerabilities. However, in current ART efforts, a robust framework is absent, which explicitly frames red teaming as an effectively learnable task. To address this gap, we propose Automated Progressive Red Teaming (APRT) as an effectively learnable framework. APRT leverages three core modules: an Intention Expanding LLM that generates diverse initial attack samples, an Intention Hiding LLM that crafts deceptive prompts, and an Evil Maker to manage prompt diversity and filter ineffective samples. The three modules collectively and progressively explore and exploit LLM vulnerabilities through multi-round interactions. In addition to the framework, we further propose a novel indicator, Attack Effectiveness Rate (AER) to mitigate the limitations of existing evaluation metrics. By measuring the likelihood of eliciting unsafe but seemingly helpful responses, AER aligns closely with human evaluations. Extensive experiments with both automatic and human evaluations, demonstrate the effectiveness of ARPT across both open- and closed-source LLMs. Specifically, APRT effectively elicits 54% unsafe yet useful responses from Meta’s Llama-3-8B-Instruct, 50% from GPT-4o (API access), and 39% from Claude-3.5 (API access), showcasing its robust attack capability and transferability across LLMs (especially from open-source LLMs to closed-source LLMs). @@ -3272,9 +3272,9 @@ A Dual Contrastive Learning Framework for Enhanced Multimodal Conversational Emotion Recognition YunheXie - ChengjieSun + ChengjieSun ZiyiCao - BingquanLiu + BingquanLiu ZhenzhouJi YuanchaoLiu LiliShan @@ -3358,7 +3358,7 @@ XinweiWu RenrenJin ShaoyangXu - DeyiXiong + DeyiXiong 4130–4148 Ensuring large language models (LLM) behave consistently with human goals, values, and intentions is crucial for their safety but yet computationally expensive. To reduce the computational cost of alignment training of LLMs, especially for those with a huge number of parameters, and to reutilize learned value alignment, we propose ConTrans, a novel framework that enables weak-to-strong alignment transfer via concept transplantation. From the perspective of representation engineering, ConTrans refines concept vectors in value alignment from a source LLM (usually a weak yet aligned LLM). The refined concept vectors are then reformulated to adapt to the target LLM (usually a strong yet unaligned base LLM) via affine transformation. In the third step, ConTrans transplants the reformulated concept vectors into the residual stream of the target LLM. Experiments demonstrate the successful transplantation of a wide range of aligned concepts from 7B models to 13B and 70B models across multiple LLMs and LLM families. Remarkably, ConTrans even surpasses instruction-tuned models in terms of truthfulness. Experiment results validate the effectiveness of both inter-LLM-family and intra-LLM-family concept transplantation. Our work successfully demonstrates an alternative way to achieve weak-to-strong alignment generalization and control. 2025.coling-main.279 @@ -3452,7 +3452,7 @@ Jump To Hyperspace: Comparing <fixed-case>E</fixed-case>uclidean and Hyperbolic Loss Functions for Hierarchical Multi-Label Text Classification JensVan Nooten - WalterDaelemans + WalterDaelemans 4260–4273 Hierarchical Multi-Label Text Classification (HMTC) is a challenging machine learning task where multiple labels from a hierarchically organized label set are assigned to a single text. In this study, we examine the effectiveness of Euclidean and hyperbolic loss functions to improve the performance of BERT models on HMTC, which very few previous studies have adopted. We critically evaluate label-aware losses as well as contrastive losses in the Euclidean and hyperbolic space, demonstrating that hyperbolic loss functions perform comparably with non-hyperbolic loss functions on four commonly used HMTC datasets in most scenarios. While hyperbolic label-aware losses perform the best on low-level labels, the overall consistency and micro-averaged performance is compromised. Additionally, we find that our contrastive losses are less effective for HMTC when deployed in the hyperbolic space than non-hyperbolic counterparts. Our research highlights that with the right metrics and training objectives, hyperbolic space does not provide any additional benefits compared to Euclidean space for HMTC, thereby prompting a reevaluation of how different geometric spaces are used in other AI applications. 2025.coling-main.287 @@ -3464,8 +3464,8 @@ OsamaMohammed Afzal Hawau OlamideToyin ShadyShehata - PreslavNakov - ZeerakTalat + PreslavNakov + ZeerakTalat 4274–4281 Recent improvements in the quality of the generations by large language models have spurred research into identifying machine-generated text. Such work often presents high-performing detectors. However, humans and machines can produce text in different styles and domains, yet the the performance impact of such on machine generated text detection systems remains unclear. In this paper, we audit the classification performance for detecting machine-generated text by evaluating on texts with varying writing styles. We find that classifiers are highly sensitive to stylistic changes and differences in text complexity, and in some cases degrade entirely to random classifiers. We further find that detection systems are particularly susceptible to misclassify easy-to-read texts while they have high performance for complex texts, leading to concerns about the reliability of detection systems. We recommend that future work attends to stylistic factors and reading difficulty levels of human-written and machine-generated text. 2025.coling-main.288 @@ -3507,7 +3507,7 @@ Towards Efficient and Robust <fixed-case>VQA</fixed-case>-<fixed-case>NLE</fixed-case> Data Generation with Large Vision-Language Models Patrick AmadeusIrawan - Genta IndraWinata + Genta IndraWinata SamuelCahyawijaya AyuPurwarianti 4323–4340 @@ -3552,7 +3552,7 @@ <fixed-case>TEXT</fixed-case>-<fixed-case>CAKE</fixed-case>: Challenging Language Models on Local Text Coherence - LucaDini + LucaDini DominiqueBrunato FeliceDell’Orletta TommasoCaselli @@ -3619,7 +3619,7 @@ LiangPang HanxingDing HuaweiShen - XueqiCheng + XueqiCheng 4457–4473 The extensive utilization of large language models (LLMs) underscores the crucial necessity for precise and contemporary knowledge embedded within their intrinsic parameters. Existing research on knowledge editing primarily concentrates on monolingual scenarios, neglecting the complexities presented by multilingual contexts and multi-hop reasoning. To address these challenges, our study introduces MLaKE (Multilingual Language Knowledge Editing), a novel benchmark comprising 4072 multi-hop and 5360 single-hop questions designed to evaluate the adaptability of knowledge editing methods across five languages: English, Chinese, Japanese, French, and German. MLaKE aggregates fact chains from Wikipedia across languages and utilizes LLMs to generate questions and answer. We assessed the effectiveness of current multilingual knowledge editing methods using the MLaKE dataset. Our results show that due to considerable inconsistencies in both multilingual performance and encoding efficiency, these methods struggle to generalize effectively across languages. The accuracy of these methods when editing English is notably higher than for other languages. The experimental results further demonstrate that models encode knowledge and generation capabilities for different languages using distinct parameters, leading to poor cross-lingual transfer performance in current methods. Transfer performance is notably better within the same language family compared to across different families. These findings emphasize the urgent need to improve multilingual knowledge editing methods. 2025.coling-main.301 @@ -3749,7 +3749,7 @@ JingYe LuXiang YapingZhang - ChengqingZong + ChengqingZong 4646–4669 Large Language Models (LLMs) have demonstrated promising potential in providing empathetic support during interactions. However, their responses often become verbose or overly formulaic, failing to adequately address the diverse emotional support needs of real-world scenarios. To tackle this challenge, we propose an innovative strategy-enhanced role-playing framework, designed to simulate authentic emotional support conversations. Specifically, our approach unfolds in two steps: (1) Strategy-Enhanced Role-Playing Interactions, which involve three pivotal roles—Seeker, Strategy Counselor, and Supporter—engaging in diverse scenarios to emulate real-world interactions and promote a broader range of dialogues; and (2) Emotional Support Agent Training, achieved through fine-tuning LLMs using our specially constructed dataset. Within this framework, we develop the ServeForEmo dataset, comprising an extensive collection of 3.7K+ multi-turn dialogues and 62.8K+ utterances. We further present SweetieChat, an emotional support agent capable of handling diverse open-domain scenarios. Extensive experiments and human evaluations confirm the framework’s effectiveness in enhancing emotional support, highlighting its unique ability to provide more nuanced and tailored assistance. 2025.coling-main.312 @@ -3766,7 +3766,7 @@ KailaiYang MakotoMiwa SophiaAnaniadou - Jun’ichiTsujii + Jun’ichiTsujii 4670–4688 We propose ELAINE (EngLish-jApanese-chINesE)-medLLM, a trilingual (English, Japanese, Chinese) large language model adapted for the bio-medical domain based on Llama-3-8B. The training dataset was carefully curated in terms of volume and diversity to adapt to the biomedical domain and endow trilingual capability while preserving the knowledge and abilities of the base model. The training follows 2-stage paths: continued pre-training and supervised fine-tuning (SFT). Our results demonstrate that ELAINE-medLLM exhibits superior trilingual capabilities compared to existing bilingual or multilingual medical LLMs without severely sacrificing the base model’s capability. 2025.coling-main.313 @@ -3810,7 +3810,7 @@ What’s the most important value? <fixed-case>INVP</fixed-case>: <fixed-case>IN</fixed-case>vestigating the Value Priorities of <fixed-case>LLM</fixed-case>s through Decision-making in Social Scenarios XuelinLiu - PengyuanLiu + PengyuanLiu DongYu 4725–4752 As large language models (LLMs) demonstrate impressive performance in various tasks and are increasingly integrated into the decision-making process, ensuring they align with human values has become crucial. This paper highlights that value priorities—the relative importance of different value—play a pivotal role in the decision-making process. To explore the value priorities in LLMs, this paper introduces INVP, a framework for INvestigating Value Priorities through decision-making in social scenarios. The framework encompasses social scenarios including binary decision-making, covering both individual and collective decision-making contexts, and is based on Schwartz’s value theory for constructing value priorities. Using this framework, we construct a dataset, which contains a total of 1613 scenarios and 3226 decisions across 283 topics. We evaluate seven popular LLMs and the experimental results reveal commonalities in the value priorities across different LLMs, such as an emphasis on Universalism and Benevolence, while Power and Hedonism are typically given lower priority. This study provides fresh insights into understanding and enhancing the moral and value alignment of LLMs when making complex social decisions. @@ -3820,7 +3820,7 @@ <fixed-case>B</fixed-case>asq<fixed-case>BBQ</fixed-case>: A <fixed-case>QA</fixed-case> Benchmark for Assessing Social Biases in <fixed-case>LLM</fixed-case>s for <fixed-case>B</fixed-case>asque, a Low-Resource Language MuitzeZulaika - XabierSaralegi + XabierSaralegi 4753–4767 The rise of pre-trained language models has revolutionized natural language processing (NLP) tasks, but concerns about the propagation of social biases in these models remain, particularly in under-resourced languages like Basque. This paper introduces BasqBBQ, the first benchmark designed to assess social biases in Basque across eight domains, using a multiple-choice question-answering (QA) task. We evaluate various autoregressive large language models (LLMs), including multilingual and those adapted for Basque, to analyze both their accuracy and bias transmission. Our results show that while larger models generally achieve better accuracy, ambiguous cases remain challenging. In terms of bias, larger models exhibit lower negative bias. However, high negative bias persists in specific categories such as Disability Status, Age and Physical Appearance, especially in ambiguous contexts. Conversely, categories such as Sexual Orientation, Gender Identity, and Race/Ethnicity show the least bias in ambiguous contexts. The continual pre-training based adaptation process for Basque has a limited impact on bias when compared with English. This work represents a key step toward creating more ethical LLMs for low-resource languages. 2025.coling-main.318 @@ -3842,7 +3842,7 @@ Why should only High-Resource-Languages have all the fun? Pivot Based Evaluation in Low Resource Setting AnanyaMukherjee SaumitraYadav - ManishShrivastava + ManishShrivastava 4779–4788 Evaluating machine translation (MT) systems for low-resource languages has long been a challenge due to the limited availability of evaluation metrics and resources. As a result, researchers in this space have relied primarily on lexical-based metrics like BLEU, TER, and ChrF, which lack semantic evaluation. In this first-of-its-kind work, we propose a novel pivot-based evaluation framework that addresses these limitations; after translating low-resource language outputs into a related high-resource language, we leverage advanced neural and embedding-based metrics for more meaningful evaluation. Through a series of experiments using five low-resource languages: Assamese, Manipuri, Kannada, Bhojpuri, and Nepali, we demonstrate how this method extends the coverage of both lexical-based and embedding-based metrics, even for languages not directly supported by advanced metrics. Our results show that the differences between direct and pivot-based evaluation scores are minimal, proving that this approach is a viable and effective solution for evaluating translations in endangered and low-resource languages. This work paves the way for more inclusive, accurate, and scalable MT evaluation for underrepresented languages, marking a significant step forward in this under-explored area of research. The code and data will be made available at https://github.com/AnanyaCoder/PivotBasedEvaluation. 2025.coling-main.320 @@ -3926,7 +3926,7 @@ <fixed-case>ACL</fixed-case>-rlg: A Dataset for Reading List Generation JulienAubert-Béduchaud FlorianBoudin - BéatriceDaille + BéatriceDaille RichardDufour 4910–4919 Familiarizing oneself with a new scientific field and its existing literature can be daunting due to the large amount of available articles. Curated lists of academic references, or reading lists, compiled by experts, offer a structured way to gain a comprehensive overview of a domain or a specific scientific challenge. In this work, we introduce ACL-rlg, the largest open expert-annotated reading list dataset. We also provide multiple baselines for evaluating reading list generation and formally define it as a retrieval task. Our qualitative study highlights that traditional scholarly search engines and indexing methods perform poorly on this task, and GPT-4o, despite showing better results, exhibits signs of potential data contamination. @@ -3950,7 +3950,7 @@ Extracting structure from an <fixed-case>LLM</fixed-case> - how to improve on surprisal-based models of Human Language Processing Daphne P.Wang - MehrnooshSadrzadeh + MehrnooshSadrzadeh MilošStanojević Wing-YeeChow RichardBreheny @@ -3999,7 +3999,7 @@ HanjieZhao YuchenYan YuxiangJia - HongyingZan + HongyingZan MinPeng 4980–4992 In recent years, fine-grained sentiment analysis in finance has gained significant attention, but the scarcity of entity-level datasets remains a key challenge. To address this, we have constructed the largest English and Chinese financial entity-level sentiment analysis datasets to date. Building on this foundation, we propose a novel two-stage sentiment analysis approach called Self-aware In-context Learning Correction (SILC). The first stage involves fine-tuning a base large language model to generate pseudo-labeled data specific to our task. In the second stage, we train a correction model using a GNN-based example retriever, which is informed by the pseudo-labeled data. This two-stage strategy has allowed us to achieve state-of-the-art performance on the newly constructed datasets, advancing the field of financial sentiment analysis. In a case study, we demonstrate the enhanced practical utility of our data and methods in monitoring the cryptocurrency market. Our datasets and code are available at https://github.com/NLP-Bin/SILC-EFSA. @@ -4032,7 +4032,7 @@ Evaluating Open-Source <fixed-case>ASR</fixed-case> Systems: Performance Across Diverse Audio Conditions and Error Correction Methods SakiImai TahiyaChowdhury - Amanda J.Stent + Amanda J.Stent 5027–5039 Despite significant advances in automatic speech recognition (ASR) accuracy, challenges remain. Naturally occurring conversation often involves multiple overlapping speakers, of different ages, accents and genders, as well as noisy environments and suboptimal audio recording equipment, all of which reduce ASR accuracy. In this study, we evaluate the accuracy of state of the art open source ASR systems across diverse conversational speech datasets, examining the impact of audio and speaker characteristics on WER. We then explore the potential of ASR ensembling and post-ASR correction methods to improve transcription accuracy. Our findings emphasize the need for robust error correction techniques and of continuing to address demographic biases to enhance ASR performance and inclusivity. 2025.coling-main.336 @@ -4131,7 +4131,7 @@ Refer to the Reference: Reference-focused Synthetic Automatic Post-Editing Data Generation SourabhDeoghare DipteshKanojia - PushpakBhattacharyya + PushpakBhattacharyya 5123–5135 A prevalent approach to synthetic APE data generation uses source (src) sentences in a parallel corpus to obtain translations (mt) through an MT system and treats corresponding reference (ref) sentences as post-edits (pe). While effective, due to independence between ‘mt’ and ‘pe,’ these translations do not adequately reflect errors to be corrected by a human post-editor. Thus, we introduce a novel and simple yet effective reference-focused synthetic APE data generation technique that uses ‘ref’ instead of src’ sentences to obtain corrupted translations (mt_new). The experimental results across English-German, English-Russian, English-Marathi, English-Hindi, and English-Tamil language pairs demonstrate the superior performance of APE systems trained using the newly generated synthetic data compared to those trained using existing synthetic data. Further, APE models trained using a balanced mix of existing and newly generated synthetic data achieve improvements of 0.37, 0.19, 1.01, 2.42, and 2.60 TER points, respectively. We will release the generated synthetic APE data. 2025.coling-main.344 @@ -4149,7 +4149,7 @@ <fixed-case>MIT</fixed-case>-10<fixed-case>M</fixed-case>: A Large Scale Parallel Corpus of Multilingual Image Translation - BoLi + BoLi ShaolinZhu LijieWen 5154–5167 @@ -4271,7 +4271,7 @@ Annotating the <fixed-case>F</fixed-case>rench <fixed-case>W</fixed-case>iktionary with supersenses for large scale lexical analysis: a use case to assess form-meaning relationships within the nominal lexicon NicolasAngleraud LucieBarque - MarieCandito + MarieCandito 5321–5332 Many languages lack broad-coverage, semantically annotated lexical resources, which limits empirical research on lexical semantics for these languages. In this paper, we report on how we automatically enriched the French Wiktionnary with general semantic classes, known as supersenses, using a limited amount of manually annotated data. We trained a classifier combining sense definition classification and sense exemplars classification. The resulting resource, with an evaluated supersense accuracy of nearly 85% (92% for hypersenses), is used in a case study illustrating how such an semantically enriched resource can be leveraged to empirically test linguistic hypotheses about the lexicon, on a large scale. 2025.coling-main.356 @@ -4323,7 +4323,7 @@ JiaGu LiangPang HuaweiShen - XueqiCheng + XueqiCheng 5375–5390 With the rapid advancement of large language models (LLMs) for handling complex language tasks, an increasing number of studies are employing LLMs as agents to emulate the sequential decision-making processes of humans often represented as Markov decision-making processes (MDPs). The actions in MDPs adhere to specific probability distributions and require iterative sampling. This arouses curiosity regarding the capacity of LLM agents to comprehend probability distributions, thereby guiding the agent’s behavioral decision-making through probabilistic sampling and generating behavioral sequences. To answer the above question, we divide the problem into two main aspects: sequence simulation with explicit probability distribution and sequence simulation with implicit probability distribution. Our analysis indicates that LLM agents can understand probabilities, but they struggle with probability sampling. Their ability to perform probabilistic sampling can be improved to some extent by integrating coding tools, but this level of sampling precision still makes it difficult to simulate human behavior as agents. 2025.coling-main.360 @@ -4387,9 +4387,9 @@ <fixed-case>V</fixed-case>eritas<fixed-case>QA</fixed-case>: A Truthfulness Benchmark Aimed at Multilingual Transferability JavierAula-Blasco JúliaFalcão - SusanaSotelo + SusanaSotelo SilviaPaniagua - AitorGonzalez-Agirre + AitorGonzalez-Agirre MartaVillegas 5463–5474 As Large Language Models (LLMs) become available in a wider range of domains and applications, evaluating the truthfulness of multilingual LLMs is an issue of increasing relevance. TruthfulQA (Lin et al., 2022) is one of few benchmarks designed to evaluate how models imitate widespread falsehoods. However, it is strongly English-centric and starting to become outdated. We present VeritasQA, a context- and time-independent truthfulness benchmark built with multilingual transferability in mind, and available in Spanish, Catalan, Galician and English. VeritasQA comprises a set of 353 questions and answers inspired by common misconceptions and falsehoods that are not tied to any particular country or recent event. We release VeritasQA under an open license and present the evaluation results of 15 models of various architectures and sizes. @@ -4440,7 +4440,7 @@ EdoardoManino JuliaRozanova LucasCordeiro - AndréFreitas + AndréFreitas 5515–5529 This work proposes a novel methodology for measuring compositional behavior in contemporary language embedding models. Specifically, we focus on adjectival modifier phenomena in adjective-noun phrases. In recent years, distributional language representation models have demonstrated great practical success. At the same time, the need for interpretability has elicited questions on their intrinsic properties and capabilities. Crucially, distributional models are often inconsistent when dealing with compositional phenomena in natural language, which has significant implications for their safety and fairness. Despite this, most current research on compositionality is directed towards improving their performance on similarity tasks only. This work takes a different approach, introducing three novel tests of compositional behavior inspired by Montague semantics. Our experimental results indicate that current neural language models do not behave according to the expected linguistic theories. This indicates that current language models may lack the capability to capture the semantic properties we evaluated on limited context, or that linguistic theories from Montagovian tradition may not match the expected capabilities of distributional models. 2025.coling-main.370 @@ -4452,8 +4452,8 @@ XiangyuLu WangXu ConghuiZhu - TiejunZhao - MuyunYang + TiejunZhao + MuyunYang 5530–5543 Low-Rank Adaptation (LoRA) is currently the most commonly used Parameter-efficient fine-tuning (PEFT) method. However, it still faces high computational and storage costs to models with billions of parameters. Most previous studies have tackled this issue by using pruning techniques. Nonetheless, these efforts only analyze LoRA parameter features to evaluate their importance, such as parameter count, size, and gradient. In fact, the output of LoRA directly impacts the fine-tuned model. Preliminary experiments indicate that a fraction of LoRA possesses significantly high output values, substantially influencing the layer output. Motivated by the observation, we propose LoRA-drop. Concretely, LoRA-drop evaluates the importance of LoRA based on the LoRA output. Then we retain LoRA for important layers and the other layers share the same LoRA. We conduct abundant experiments with models of different scales on NLU and NLG tasks. Results demonstrate that LoRA-drop can achieve performance comparable to full fine-tuning and LoRA while retaining 50% of the LoRA parameters on average. 2025.coling-main.371 @@ -4465,7 +4465,7 @@ SijieCheng ZixinSun PengLi - YangLiu + YangLiu 5544–5557 Symbols such as numerical sequences, chemical formulas, and table delimiters exist widely, playing important roles in symbol-related tasks such as abstract reasoning, chemical property prediction, and tabular question-answering. Compared to tasks based on natural language expressions, large language models (LLMs) have limitations in understanding and reasoning on symbol-based representations, making it difficult for them to handle symbol-related problems. In this paper, we propose symbol-to-language (S2L), a method that converts symbol-based representations to language-based representations, providing valuable information for language models during reasoning. We found that, for both closed-source and open-source LLMs, the capability to solve symbol-related problems can be largely enhanced by incorporating such language-based representations. For example, by employing S2L for GPT-4, there can be substantial improvements of +21.9% and +9.5% accuracy for 1D-ARC and Dyck language tasks, respectively. There is also a consistent improvement in other six general symbol-related tasks such as table understanding and Tweet analysis. We release the GPT logs in https://github.com/THUNLP-MT/symbol2language. 2025.coling-main.372 @@ -4510,8 +4510,8 @@ PayamKarisani ZhengHui YiFung - PreslavNakov - JuliaHirschberg + PreslavNakov + JuliaHirschberg HengJi 5607–5628 Propaganda plays a critical role in shaping public opinion and fueling disinformation. While existing research primarily focuses on identifying propaganda techniques, it lacks the ability to capture the broader motives and the impacts of such content. To address these challenges, we introduce PropaInsight, a conceptual framework grounded in foundational social science research, which systematically dissects propaganda into techniques, arousal appeals, and underlying intent. PropaInsight offers a more granular understanding of how propaganda operates across different contexts. Additionally, we present PropaGaze, a novel dataset that combines human-annotated data with high-quality synthetic data generated through a meticulously designed pipeline. Our experiments show that off-the-shelf LLMs struggle with propaganda analysis, but PropaGaze significantly improves performance. Fine-tuned Llama-7B-Chat achieves 203.4% higher text span IoU in technique identification and 66.2% higher BertScore in appeal analysis compared to 1-shot GPT-4-Turbo. Moreover, PropaGaze complements limited human-annotated data in data-sparse and cross-domain scenarios, demonstrating its potential for comprehensive and generalizable propaganda analysis. @@ -4623,7 +4623,7 @@ FandongMeng SongmingZhang YufengChen - JinanXu + JinanXu JieZhou 5775–5788 Multilingual knowledge editing (MKE) aims to simultaneously update factual knowledge across multiple languages within large language models (LLMs). Previous research indicates that the same knowledge across different languages within LLMs exhibits a degree of shareability. However, most existing MKE methods overlook the connections of the same knowledge between different languages, resulting in knowledge conflicts and limited edit performance. To address this issue, we first investigate how LLMs process multilingual factual knowledge and discover that the same factual knowledge in different languages generally activates a shared set of neurons, which we call language-agnostic factual neurons (LAFNs). These neurons represent the same factual knowledge shared across languages and imply the semantic connections among multilingual knowledge. Inspired by this finding, we propose a new MKE method by Locating and Updating Language-Agnostic Factual Neurons (LU-LAFNs) to edit multilingual knowledge simultaneously, which avoids knowledge conflicts and thus improves edit performance. Experimental results on Bi-ZsRE and MzsRE benchmarks demonstrate that our method achieves the best edit performance, indicating the effectiveness and importance of modeling the semantic connections among multilingual knowledge. @@ -4653,7 +4653,7 @@ TimOmbasa HemankLamba AoifeCahill - JoelTetreault + JoelTetreault AlejandroJaimes 5807–5825 Online reporting platforms have enabled citizens around the world to collectively share their opinions and report in real time on events impacting their local communities. Systematically organizing (e.g., categorizing by attributes) and geotagging large amounts of crowdsourced information is crucial to ensuring that accurate and meaningful insights can be drawn from this data and used by policy makers to bring about positive change. These tasks, however, typically require extensive manual annotation efforts. In this paper we present Uchaguzi-2022, a dataset of 14k categorized and geotagged citizen reports related to the 2022 Kenyan General Election containing mentions of election-related issues such as official misconduct, vote count irregularities, and acts of violence. We use this dataset to investigate whether language models can assist in scalably categorizing and geotagging reports, thus highlighting its potential application in the AI for Social Good space. @@ -4727,7 +4727,7 @@ FuZhang JizhengWan ShuoWang - MarkLee + MarkLee 5890–5901 Entity Alignment (EA) is a critical task in Knowledge Graph (KG) integration, aimed at identifying and matching equivalent entities that represent the same real-world objects. While EA methods based on knowledge representation learning have shown strong performance on synthetic benchmark datasets such as DBP15K, their effectiveness significantly decline in real-world scenarios which often involve data that is highly heterogeneous, incomplete, and domain-specific, as seen in datasets like DOREMUS and AGROLD. Addressing this challenge, we propose DAEA, a novel EA approach with Domain Adaptation that leverages the data characteristics of synthetic benchmarks for improved performance in real-world datasets. DAEA introduces a multi-source KGs selection mechanism and a specialized domain adaptive entity alignment loss function to bridge the gap between real-world data and optimal benchmark data, mitigating the challenges posed by aligning entities across highly heterogeneous KGs. Experimental results demonstrate that DAEA outperforms state-of-the-art models on real-world datasets, achieving a 29.94% improvement in Hits@1 on DOREMUS and a 5.64% improvement on AGROLD. Code is available at https://github.com/yangxiaoxiaoly/DAEA. 2025.coling-main.393 @@ -4738,7 +4738,7 @@ <fixed-case>C</fixed-case>o<fixed-case>P</fixed-case>r<fixed-case>US</fixed-case>: Consistency Preserving Utterance Synthesis towards more realistic benchmark dialogues SebastianSteindl - UlrichSchäfer + UlrichSchäfer BerndLudwig 5902–5917 Large-scale Wizard-Of-Oz dialogue datasets have enabled the training of deep learning-based dialogue systems. While they are successful as benchmark datasets, they lack certain types of utterances, which would make them more realistic. In this work, we investigate the creation of synthetic communication errors in an automatic pipeline. Based on linguistic theory, we propose and follow a simple error taxonomy. We focus on three types of miscommunications that could happen in real-world dialogues but are underrepresented in the benchmark dataset: misunderstandings, non-understandings and vaguely related questions. Our two-step approach uses a state-of-the-art Large Language Model (LLM) to first create the error and secondly the repairing utterance. We perform Language Model-based evaluation to ensure the quality of the generated utterances. We apply the method to the MultiWOZ dataset and evaluate it both qualitatively and empirically as well as with human judges. Our results indicate that current LLMs can aid in adding post-hoc miscommunications to benchmark datasets as a form of data augmentation. We publish the resulting dataset, in which nearly 1900 dialogues have been modified, as CoPrUS-MultiWOZ to facilitate future work on dialogue systems. @@ -4749,7 +4749,7 @@ <fixed-case>JM</fixed-case>ed<fixed-case>B</fixed-case>ench: A Benchmark for Evaluating <fixed-case>J</fixed-case>apanese Biomedical Large Language Models JunfengJiang JiahaoHuang - AkikoAizawa + AkikoAizawa 5918–5935 Recent developments in Japanese large language models (LLMs) primarily focus on general domains, with fewer advancements in Japanese biomedical LLMs. One obstacle is the absence of a comprehensive, large-scale benchmark for comparison. Furthermore, the resources for evaluating Japanese biomedical LLMs are insufficient. To advance this field, we propose a new benchmark including eight LLMs across four categories and 20 Japanese biomedical datasets across five tasks. Experimental results indicate that: (1) LLMs with a better understanding of Japanese and richer biomedical knowledge achieve better performance in Japanese biomedical tasks, (2) LLMs that are not mainly designed for Japanese biomedical domains can still perform unexpectedly well, and (3) there is still much room for improving the existing LLMs in certain Japanese biomedical tasks. Moreover, we offer insights that could further enhance development in this field. Our evaluation tools tailored to our benchmark as well as the datasets are publicly available to facilitate future research. 2025.coling-main.395 @@ -4761,7 +4761,7 @@ YouriPeskine PaoloPapotti RiccardoTorlone - RaphaelTroncy + RaphaelTroncy 5936–5951 Tropes — recurring narrative elements like the “smoking gun” or the “veil of secrecy” — are often used in movies to convey familiar patterns. However, they also play a significant role in online communication about societal issues, where they can oversimplify complex matters and deteriorate public discourse. Recognizing these tropes can offer insights into the emotional manipulation and potential bias present in online discussions. This paper addresses the challenge of automatically detecting tropes in social media posts. We define the task, distinguish it from previous work, and create a ground-truth dataset of social media posts related to vaccines and immigration, manually labeled with tropes. Using this dataset, we develop a supervised machine learning technique for multi-label classification, fine-tune a model, and demonstrate its effectiveness experimentally. Our results show that tropes are common across domains and that fine-tuned models can detect them with high accuracy. 2025.coling-main.396 @@ -4772,7 +4772,7 @@ SameeArif Aamina JamalKhan MustafaAbbas - Agha AliRaza + Agha AliRaza AwaisAthar 5952–5961 This paper presents a comprehensive evaluation of Urdu Automatic Speech Recognition (ASR) models. We analyze the performance of three ASR model families: Whisper, MMS, and Seamless-M4T using Word Error Rate (WER), along with a detailed examination of the most frequent wrong words and error types including insertions, deletions, and substitutions. Our analysis is conducted using two types of datasets, read speech and conversational speech. Notably, we present the first conversational speech dataset designed for benchmarking Urdu ASR models. We find that seamless-large outperforms other ASR models on the read speech dataset, while whisper-large performs best on the conversational speech dataset. Furthermore, this evaluation highlights the complexities of assessing ASR models for low-resource languages like Urdu using quantitative metrics alone and emphasizes the need for a robust Urdu text normalization system. Our findings contribute valuable insights for developing robust ASR systems for low-resource languages like Urdu. @@ -4814,8 +4814,8 @@ <fixed-case>PIR</fixed-case>suader: A Persuasive Chatbot for Mitigating Psychological Insulin Resistance in Type-2 Diabetic Patients - Sujatha DasGollapalli - See-KiongNg + Sujatha DasGollapalli + See-KiongNg 5997–6013 Psychological Insulin Resistance (PIR) is described as the reluctance towards initiation and adherence of insulin-based treatments due to psychological barriers in diabetic patients. Though studies have shown that timely initiation with lifestyle changes are known to be crucial in sugar control and prevention of chronic conditions in Type 2 Diabetes (T2D) patients, many patients often have deep-rooted fears and misgivings related to insulin which hinder them from adapting to an insulin-based treatment regimen when recommended by healthcare specialists. Therefore, it is vitally important to address and allay these fallacious beliefs in T2D patients and persuade them to consider insulin as a treatment option. In this paper, we describe the design of PIRsuader, a persuasive chatbot for mitigating PIR in T2D patients. In PIRsuader, we effectively harness the conversation generation capabilities of state-of-the-art Large Language Models via a context-specific persuasive dialog act schema. We design reward functions that capture dialog act preferences for persuading reluctant patients and apply reinforcement learning to learn a dialog act prediction model. Our experiments using a collection of real doctor-diabetic patient conversations indicate that PIRsuader is able to improve the willingness in patients to try insulin as well as address specific concerns they have in an empathetic manner. 2025.coling-main.401 @@ -4838,8 +4838,8 @@ ZhengYao TianyiLi LiangCheng - MarkSteedman - DeyiXiong + MarkSteedman + DeyiXiong 6024–6038 Previous benchmarks for evaluating large language models (LLMs) have primarily emphasized quantitative metrics, such as data volume. However, this focus may neglect key qualitative data attributes that can significantly impact the final rankings of LLMs, resulting in unreliable leaderboards. In this paper, we investigate whether current LLM benchmarks adequately consider these data attributes. We specifically examine three attributes: diversity, redundancy, and difficulty. To explore these attributes, we propose a framework with three separate modules, each designed to assess one of the attributes. Using a method that progressively incorporates these attributes, we analyze their influence on the benchmark. Our experimental results reveal a meaningful correlation between LLM rankings on the revised benchmark and the original benchmark when these attributes are accounted for. These findings indicate that existing benchmarks often fail to meet all three criteria, highlighting a lack of consideration for multifaceted data attributes in current evaluation datasets. 2025.coling-main.403 @@ -4850,7 +4850,7 @@ BastianBunzeck DanielDuran LeonieSchade - SinaZarrieß + SinaZarrieß 6039–6048 Recent work investigates whether LMs learn human-like linguistic generalizations and representations from developmentally plausible amounts of data. Yet, the basic linguistic units processed in these LMs are determined by subword-based tokenization, which limits their validity as models of learning at and below the word level. In this paper, we explore the potential of tokenization-free, phoneme- and grapheme-based language models. We demonstrate that small models based on the Llama architecture can achieve strong linguistic performance on standard syntactic and novel lexical/phonetic benchmarks when trained with character-level vocabularies. We further show that phoneme-based models almost match grapheme-based models in standard tasks and novel evaluations. Our findings suggest a promising direction for creating more linguistically plausible language models that are better suited for computational studies of language acquisition and processing. 2025.coling-main.404 @@ -4868,7 +4868,7 @@ Hi-<fixed-case>GEC</fixed-case>: <fixed-case>H</fixed-case>indi Grammar Error Correction in Low Resource Scenario UjjwalSharma - PushpakBhattacharyya + PushpakBhattacharyya 6063–6075 Automated Grammatical Error Correction (GEC) has been extensively researched in Natural Language Processing (NLP), primarily focusing on English and other resource-rich languages. This paper shifts the focus to GEC for a scarcely explored low-resource language, specifically Hindi, which presents unique challenges due to its intricate morphology and complex syntax. To address data resource limitations, this work explores various GEC data generation techniques. Our research introduces a carefully extracted and filtered, high-quality dataset, HiWikiEdits, which includes human-edited 8,137 instances sourced from Wikipedia, encompassing 17 diverse grammatical error types, with annotations performed using the ERRANT toolkit. Furthermore, we investigate Round Trip Translation (RTT) using diverse languages for synthetic Hindi GEC data generation, revealing that leveraging high-resource linguistically distant language for error generation outperforms mid-resource linguistically closer languages. Specifically, using English as a pivot language resulted in a 6.25% improvement in GLEU score compared to using Assamese or Marathi. Finally, we also investigate the neural model-based synthetic error-generation technique and show that it achieves comparable performance to other synthetic data generation methods, even in low-resource settings. 2025.coling-main.406 @@ -4884,7 +4884,7 @@ AndersonSilva Soares RodrigoFreitas Lima Lucas RafaelStefanel Gris - SandraAluísio + SandraAluísio 6076–6087 Recently, several public datasets for automatic speech recognition (ASR) in Brazilian Portuguese (BP) have been released, improving ASR systems performance. However, these datasets lack diversity in terms of age groups, regional accents, and education levels. In this paper, we present a new publicly available dataset consisting of 289 life story interviews (365 hours), featuring a broad range of speakers varying in age, education, and regional accents. First, we demonstrated the presence of bias in current BP ASR models concerning education levels and age groups. Second, we showed that our dataset helps mitigate these biases. Additionally, an ASR model trained on our dataset performed better during evaluation on a diverse test set. Finally, the ASR model trained with our dataset was extrinsically evaluated through a topic modeling task that utilized the automatically transcribed output. 2025.coling-main.407 @@ -4928,7 +4928,7 @@ MinjunKim JunghunYuk HaneolJang - KyungTaeLim + KyungTaeLim 6150–6168 We propose the VLR-Bench, a visual question answering (VQA) benchmark for evaluating vision language models (VLMs) based on retrieval augmented generation (RAG). Unlike existing evaluation datasets for external knowledge-based VQA, the proposed VLR-Bench includes five input passages. This allows testing of the ability to determine which passage is useful for answering a given query, a capability lacking in previous research. In this context, we constructed a dataset of 32,000 automatically generated instruction-following examples, which we denote as VLR-IF. This dataset is specifically designed to enhance the RAG capabilities of VLMs by enabling them to learn how to generate appropriate answers based on input passages. We evaluated the validity of the proposed benchmark and training data and verified its performance using the state-of-the-art Llama3-based VLM, the Llava-Llama-3 model. The proposed VLR-Bench and VLR-IF datasets are publicly available online. 2025.coling-main.411 @@ -4954,7 +4954,7 @@ GuillermoMarco JorgeCarrillo-de-Albornoz LauraPlaza - EnriqueAmigo + EnriqueAmigo Andrés FernandezGarcía AlejandroBenito-Santos AdriánGhajari Espinosa @@ -5120,7 +5120,7 @@ Evaluating Pixel Language Models on Non-Standardized Languages AlbertoMuñoz-Ortiz VerenaBlaschke - BarbaraPlank + BarbaraPlank 6412–6419 We explore the potential of pixel-based models for transfer learning from standard languages to dialects. These models convert text into images that are divided into patches, enabling a continuous vocabulary representation that proves especially useful for out-of-vocabulary words common in dialectal data. Using German as a case study, we compare the performance of pixel-based models to token-based models across various syntactic and semantic tasks. Our results show that pixel-based models outperform token-based models in part-of-speech tagging, dependency parsing and intent detection for zero-shot dialect evaluation by up to 26 percentage points in some scenarios, though not in Standard German. However, pixel-based models fall short in topic classification. These findings emphasize the potential of pixel-based models for handling dialectal data, though further research should be conducted to assess their effectiveness in various linguistic contexts. 2025.coling-main.427 @@ -5164,8 +5164,8 @@ Part-Of-Speech Sensitivity of Routers in Mixture of Experts Models ElieAntoine - FredericBechet - PhillippeLanglais + FredericBechet + PhillippeLanglais 6467–6474 This study investigates the behavior of model-integrated routers in Mixture of Experts (MoE) models, focusing on how tokens are routed based on their linguistic features, specifically Part-of-Speech (POS) tags. The goal is to explore across different MoE architectures whether experts specialize in processing tokens with similar linguistic traits. By analyzing token trajectories across experts and layers, we aim to uncover how MoE models handle linguistic information. Findings from six popular MoE models reveal expert specialization for specific POS categories, with routing paths showing high predictive accuracy for POS, highlighting the value of routing paths in characterizing tokens. 2025.coling-main.431 @@ -5338,8 +5338,8 @@ ZaraSiddique HsuvasBorkakoty DimosthenisAntypas - LuisEspinosa Anke - JoseCamacho-Collados + LuisEspinosa Anke + JoseCamacho-Collados 6692–6704 Extracting metaphors and analogies from free text requires high-level reasoning abilities such as abstraction and language understanding. Our study focuses on the extraction of the concepts forming metaphoric analogies in literary texts. To this end, we construct a novel dataset in this domain with the help of domain experts. We compare the out-of-the-box ability of recent large language models (LLMs) to structure metaphoric mappings from fragments of texts containing rather explicit proportional analogies. The models are further evaluated on the generation of implicit elements of the analogy, which are indirectly suggested in the texts and inferred by human readers. The competitive results obtained by LLMs in our experiments are encouraging and open up new avenues such as automatically extracting analogies and metaphors from text instead of investing resources in domain experts to manually label data. 2025.coling-main.448 @@ -5377,7 +5377,7 @@ MeishanZhang LiliShan YulinWu - BingquanLiu + BingquanLiu 6748–6761 Emotion recognition in conversation (ERC), the task of discerning human emotions for each utterance within a conversation, has garnered significant attention in human-computer interaction systems. Previous ERC studies focus on speaker-specific information that predominantly stems from relationships among utterances, which lacks sufficient information around conversations. Recent research in ERC has sought to exploit pre-trained large language models (LLMs) with speaker modelling to comprehend emotional states. Although these methods have achieved the encouraging results, the extracted speaker-specific information struggles to indicate emotional dynamics. In this paper, motivated by the fact that speaker characteristics play a crucial role and LLMs have rich world knowledge, we present LaERC-S, a novel framework that stimulates LLMs to explore speaker characteristics involving the mental state and behavior of interlocutors, for accurate emotion predictions. To endow LLMs with these knowledge information, we adopt the two-stage learning to make the models reason speaker characteristics and track the emotion of the speaker in complex conversation scenarios. Extensive experiments on three benchmark datasets demonstrate the superiority of LaERC-S, reaching the new state-of-the-art. 2025.coling-main.451 @@ -5386,7 +5386,7 @@ Analysing Zero-Shot Readability-Controlled Sentence Simplification AbdullahBarayan - JoseCamacho-Collados + JoseCamacho-Collados FernandoAlva-Manchego 6762–6781 Readability-controlled text simplification (RCTS) rewrites texts to lower readability levels while preserving their meaning. RCTS models often depend on parallel corpora with readability annotations on both source and target sides. Such datasets are scarce and difficult to curate, especially at the sentence level. To reduce reliance on parallel data, we explore using instruction-tuned large language models for zero-shot RCTS. Through automatic and manual evaluations, we examine: (1) how different types of contextual information affect a model’s ability to generate sentences with the desired readability, and (2) the trade-off between achieving target readability and preserving meaning. Results show that all tested models struggle to simplify sentences (especially to the lowest levels) due to models’ limitations and characteristics of the source sentences that impede adequate rewriting. Our experiments also highlight the need for better automatic evaluation metrics tailored to RCTS, as standard ones often misinterpret common simplification operations, and inaccurately assess readability and meaning preservation. @@ -5420,8 +5420,8 @@ BeomseokLee MarcoGaido IoanCalapodescu - LaurentBesacier - MatteoNegri + LaurentBesacier + MatteoNegri 6816–6826 While crowdsourcing is an established solution for facilitating and scaling the collection of speech data, the involvement of non-experts necessitates protocols to ensure final data quality. To reduce the costs of these essential controls, this paper investigates the use of Speech Foundation Models (SFMs) to automate the validation process, examining for the first time the cost/quality trade-off in data acquisition. Experiments conducted on French, German, and Korean data demonstrate that SFM-based validation has the potential to reduce reliance on human validation, resulting in an estimated cost saving of over 40.0% without degrading final data quality. These findings open new opportunities for more efficient, cost-effective, and scalable speech data acquisition. 2025.coling-main.455 @@ -5551,7 +5551,7 @@ Evaluating <fixed-case>LLM</fixed-case>s’ Capability to Identify Lexical Semantic Equivalence: Probing with the Word-in-Context Task - YoshihikoHayashi + YoshihikoHayashi 6985–6998 This study proposes a method to evaluate the capability of large language models (LLMs) in identifying lexical semantic equivalence. The Word-in-Context (WiC) task, a benchmark designed to determine whether the meanings of a target word remain identical across different contexts, is employed as a probing task. Experiments are conducted with several LLMs, including proprietary GPT models and open-source models, using zero-shot prompting with adjectives that represent varying levels of semantic equivalence (e.g., “the same”) or inequivalence (e.g., “different”). The fundamental capability to identify lexical semantic equivalence in context is measured using standard accuracy metrics. Consistency across different levels of semantic equivalence is assessed via rank correlation with the expected canonical ranking of precision and recall, reflecting anticipated trends in performance across prompts. The proposed method demonstrates its effectiveness, highlighting the superior capability of GPT-4o, as it consistently outperforms other explored LLMs. Analysis of the WiC dataset, the discriminative properties of adjectives (i.e., their ability to differentiate between levels of semantic equivalence), and linguistic patterns in erroneous cases offer insights into the LLM’s capability and sensitivity. These findings could inform improvements in WiC task performance, although performance enhancement is not the primary focus of this study. 2025.coling-main.466 @@ -5582,7 +5582,7 @@ ConnorHeaton ShreyaGhosh WenpengYin - PreslavNakov + PreslavNakov SuhangWang 7035–7054 We study extractive question-answering in the medical domain (Medical-EQA). This problem has two main challenges: (i) domain specificity, as most AI models lack necessary domain knowledge, and (ii) extraction-based answering style, which restricts most autoregressive LLMs due to potential hallucinations. To handle those challenges, we propose TOP-Training, a target-oriented pre-training paradigm that stands out among all domain adaptation techniques with two desirable features: (i) TOP-Training moves one step further than popular domain-oriented fine-tuning since it not only moves closer to the target domain, but also familiarizes itself with the target dataset, and (ii) it does not assume the existence of a large set of unlabeled instances from the target domain. Specifically, for a target Medical-EQA dataset, we extract its entities and leverage large language models (LLMs) to generate synthetic texts containing those entities; we then demonstrate that pretraining on this synthetic text data yields better performance on the target Medical-EQA benchmarks. Overall, our contributions are threefold: (i) TOP-Training, a new pretraining technique to effectively adapt LLMs to better solve a target problem, (ii) TOP-Training has a wide application scope because it does not require the target problem to have a large set of unlabeled data, and (iii) our experiments highlight the limitations of autoregressive LLMs, emphasizing TOP-Training as a means to unlock the true potential of bidirectional LLMs. @@ -5593,7 +5593,7 @@ Beyond Discrete Personas: Personality Modeling Through Journal Intensive Conversations SayantanPal SouvikDas - Rohini K.Srihari + Rohini K.Srihari 7055–7074 Large Language Models (LLMs) have significantly improved personalized conversational capabilities. However, existing datasets like Persona Chat, Synthetic Persona Chat, and Blended Skill Talk rely on static, predefined personas. This approach often results in dialogues that fail to capture human personalities’ fluid and evolving nature. To overcome these limitations, we introduce a novel dataset with around 400,000 dialogues and a framework for generating personalized conversations using long-form journal entries from Reddit. Our approach clusters journal entries for each author and filters them by selecting the most representative cluster, ensuring that the retained entries best reflect the author’s personality. We further refine the data by capturing the Big Five personality traits—openness, conscientiousness, extraversion, agreeableness, and neuroticism—ensuring that dialogues authentically reflect an individual’s personality. Using Llama 3 70B, we generate high-quality, personality-rich dialogues grounded in these journal entries. Fine-tuning models on this dataset leads to an 11% improvement in capturing personality traits on average, outperforming existing approaches in generating more coherent and personality-driven dialogues. 2025.coling-main.470 @@ -5616,7 +5616,7 @@ EnoraRice LukeGessler AlexisPalmer - Katharinavon der Wense + Katharinavon der Wense 7087–7098 Many of the world’s languages have insufficient data to train high-performing general neural machine translation (NMT) models, let alone domain-specific models, and often the only available parallel data are small amounts of religious texts. Hence, domain adaptation (DA) is a crucial issue faced by contemporary NMT and has, so far, been underexplored for low-resource languages. In this paper, we evaluate a set of methods from both low-resource NMT and DA in a realistic setting, in which we aim to translate between a high-resource and a low-resource language with access to only: a) parallel Bible data, b) a bilingual dictionary, and c) a monolingual target-domain corpus in the high-resource language. Our results show that the effectiveness of the tested methods varies, with the simplest one, DALI, being most effective. We follow up with a small human evaluation of DALI, which shows that there is still a need for more careful investigation of how to accomplish DA for low-resource NMT. 2025.coling-main.472 @@ -5635,7 +5635,7 @@ Exploring Language Model Generalization in Low-Resource Extractive <fixed-case>QA</fixed-case> SaptarshiSengupta WenpengYin - PreslavNakov + PreslavNakov ShreyaGhosh SuhangWang 7106–7126 @@ -5853,7 +5853,7 @@ Courtroom-<fixed-case>LLM</fixed-case>: A Legal-Inspired Multi-<fixed-case>LLM</fixed-case> Framework for Resolving Ambiguous Text Classifications - SangkeunJung + SangkeunJung JeesuJung 7367–7385 In this research, we introduce the Courtroom-LLM framework, a novel multi-LLM structure inspired by legal courtroom processes, aiming to enhance decision-making in ambiguous text classification scenarios. Our approach simulates a courtroom setting within LLMs, assigning roles similar to those of prosecutors, defense attorneys, and judges, to facilitate comprehensive analysis of complex textual cases. We demonstrate that this structured multi-LLM setup can significantly improve decision-making accuracy, particularly in ambiguous situations, by harnessing the synergistic effects of diverse LLM arguments. Our evaluations across various text classification tasks show that the Courtroom-LLM framework outperforms both traditional single-LLM classifiers and simpler multi-LLM setups. These results highlight the advantages of our legal-inspired model in improving decision-making for text classification. @@ -5938,7 +5938,7 @@ XiaolongJin LongBai JiafengGuo - XueqiCheng + XueqiCheng 7484–7496 Event Relation Extraction (ERE) aims to extract various types of relations between different events within texts. Although Large Language Models (LLMs) have demonstrated impressive capabilities in many natural language processing tasks, existing ERE methods based on LLMs still face three key challenges: (1) Time Inefficiency: The existing pairwise method of combining events and determining their relations is time-consuming for LLMs. (2) Low Coverage: When dealing with numerous events in a document, the limited generation length of fine-tuned LLMs restricts the coverage of their extraction results. (3) Lack of Rationale: Essential rationales concerning the results that could enhance the reasoning ability of the model are overlooked. To address these challenges, we propose LLMERE, an LLM-based approach with rationales for the ERE task. LLMERE transforms ERE into a question-and-answer task that may have multiple answers. By extracting all events related to a specified event at once, LLMERE reduces time complexity from O(n^2) to O(n), compared to the pairwise method. Subsequently, LLMERE enhances the coverage of extraction results by employing a partitioning strategy that highlights only a portion of the events in the document at a time. In addition to the extracted results, LLMERE is also required to generate corresponding rationales/reasons behind them, in terms of event coreference information or transitive chains of event relations. Experimental results on three widely used datasets show that LLMERE achieves significant improvements over baseline methods. 2025.coling-main.500 @@ -6028,7 +6028,7 @@ <fixed-case>DEGAP</fixed-case>: Dual Event-Guided Adaptive Prefixes for Templated-Based Event Argument Extraction with Slot Querying GuanghuiWang DexiLiu - Jian-YunNie + Jian-YunNie QizhiWan RongHu XipingLiu @@ -6055,7 +6055,7 @@ Leveraging Large Pre-trained Multilingual Models for High-Quality Speech-to-Text Translation on Industry Scenarios MarkoAvila - JosepCrego + JosepCrego 7624–7633 Speech-to-Text Translation (S2TT) involves converting spoken language from a source language directly into text in a target language. Traditionally, S2TT systems rely on a sequential pipeline that combines Automatic Speech Recognition (ASR) and Machine Translation (MT) models. However, these systems are prone to error propagation and demand substantial resources to develop and train each component independently. Thus, posing a major challenge in industry settings where cost-effective yet highly accurate S2TT solutions are essential. With the increasing availability of multilingual large pre-trained speech models (LPSM), we propose a parameter-efficient framework that integrates one LPSM with a multilingual MT engine. We evaluate the effectiveness of several well-established LPSMs within this framework, focusing on a real-world industry scenario that involves building a system capable of translating between French, English, and Arabic. The results show that high-quality S2TT systems can be built with minimal computational resources, offering an efficient solution for cross-lingual communication. 2025.coling-main.509 @@ -6078,7 +6078,7 @@ Yi R.Fung ChengQian JeonghwanKim - DilekHakkani-Tur + DilekHakkani-Tur HengJi 7648–7662 As large language models (LLMs) demonstrate increasingly advanced capabilities, aligning their behaviors with human values and preferences becomes crucial for their wide adoption. While previous research focuses on general alignment to principles such as helpfulness, harmlessness, and honesty, the need to account for individual and diverse preferences has been largely overlooked, potentially undermining customized human experiences. To address this gap, we train LLMs that can “interact to align”, essentially cultivating the meta-skill of LLMs to implicitly infer the unspoken personalized preferences of the current user through multi-turn conversations, and then dynamically align their following behaviors and responses to these inferred preferences. Our approach involves establishing a diverse pool of 3,310 distinct user personas by initially creating seed examples, which are then expanded through iterative self-generation and filtering. Guided by distinct user personas, we leverage multi-LLM collaboration to develop a multi-turn preference dataset containing 3K+ multi-turn conversations in tree structures. Finally, we apply supervised fine-tuning and reinforcement learning to enhance LLMs using this dataset. For evaluation, we establish the ALOE (ALign with custOmized prEferences) benchmark, consisting of 100 carefully selected examples and well-designed metrics to measure the customized alignment performance during conversations. Experimental results demonstrate the effectiveness of our method in enabling dynamic, personalized alignment via interaction. The code and dataset will be made public. @@ -6105,7 +6105,7 @@ BrentMilne TomFischaber TamaraSumner - James H.Martin + James H.Martin 7671–7684 Human tutoring interventions play a crucial role in supporting student learning, improving academic performance, and promoting personal growth. This paper focuses on analyzing mathematics tutoring discourse using talk moves—a framework of dialogue acts grounded in Accountable Talk theory. However, scaling the collection, annotation, and analysis of extensive tutoring dialogues to develop machine learning models is a challenging and resource-intensive task. To address this, we present SAGA22, a compact dataset, and explore various modeling strategies, including dialogue context, speaker information, pretraining datasets, and further fine-tuning. By leveraging existing datasets and models designed for classroom teaching, our results demonstrate that supplementary pretraining on classroom data enhances model performance in tutoring settings, particularly when incorporating longer context and speaker information. Additionally, we conduct extensive ablation studies to underscore the challenges in talk move modeling. 2025.coling-main.513 @@ -6114,7 +6114,7 @@ How to Leverage Digit Embeddings to Represent Numbers? Jasivan AlexSivakumar - Nafise SadatMoosavi + Nafise SadatMoosavi 7685–7697 Within numerical reasoning, understanding numbers themselves is still a challenge for existing language models. Simple generalisations, such as solving 100+200 instead of 1+2, can substantially affect model performance (Sivakumar and Moosavi, 2023). Among various techniques, character-level embeddings of numbers have emerged as a promising approach to improve number representation. However, this method has limitations as it leaves the task of aggregating digit representations to the model, which lacks direct supervision for this process. In this paper, we explore the use of mathematical priors to compute aggregated digit embeddings and explicitly incorporate these aggregates into transformer models. This can be achieved either by adding a special token to the input embeddings or by introducing an additional loss function to enhance correct predictions. We evaluate the effectiveness of incorporating this explicit aggregation, analysing its strengths and shortcomings, and discuss future directions to better benefit from this approach. Our methods, while simple, are compatible with any pretrained model, easy to implement, and have been made publicly available. 2025.coling-main.514 @@ -6180,8 +6180,8 @@ Cross-lingual Evaluation of Multilingual Text Generation ShamilChollampatt - Minh QuangPham - Sathish ReddyIndurthi + Minh QuangPham + Sathish ReddyIndurthi MarcoTurchi 7766–7777 Scaling automatic evaluation of multilingual text generation of LLMs to new tasks, domains, and languages remains a challenge. Traditional evaluation on benchmark datasets carries the risk of reference data leakage in LLM training or involves additional human annotation effort. The alternative strategy of using another LLM as a scorer also faces uncertainty about the ability of this LLM itself to score non-English text. To address these issues, we propose an annotation-free cross-lingual evaluation protocol for multilingual text generation. Given an LLM candidate to be evaluated and a set of non-English inputs for a particular text generation task, our method first generates English references from the translation of the non-English inputs into English. This is done by an LLM that excels in the equivalent English text generation task. The non-English text generated by the LLM candidate is compared against the generated English references using a cross-lingual evaluation metric to assess the ability of the candidate LLM on multilingual text generation. Our protocol shows a high correlation to the reference-based ROUGE metric in four languages on news text summarization. We also evaluate a diverse set of LLMs in over 90 languages with different prompting strategies to study their multilingual generative abilities. @@ -6380,7 +6380,7 @@ Factual Knowledge Assessment of Language Models Using Distractors HichemAmmar Khodja AbderrahmaneAit gueni ssaid - FredericBechet + FredericBechet QuentinBrabant AlexisNasr GwénoléLecorvé @@ -6405,7 +6405,7 @@ Summarization of Opinionated Political Documents with Varied Perspectives NicholasDeas - KathleenMcKeown + KathleenMcKeown 8088–8108 Global partisan hostility and polarization has increased, and this polarization is heightened around presidential elections. Models capable of generating accurate summaries of diverse perspectives can help reduce such polarization by exposing users to alternative perspectives. In this work, we introduce a novel dataset and task for independently summarizing each political perspective in a set of passages from opinionated news articles. For this task, we propose a framework for evaluating different dimensions of perspective summary performance. We benchmark 11 summarization models and LLMs of varying sizes and architectures through both automatic and human evaluation. While recent models like GPT-4o perform well on this task, we find that all models struggle to generate summaries that are faithful to the intended perspective. Our analysis of summaries focuses on how extraction behavior is impacted by features of the input documents. 2025.coling-main.539 @@ -6418,7 +6418,7 @@ AliMarashian Jennifer M.Ellis ElianaColunga - Katharinavon der Wense + Katharinavon der Wense 8109–8120 To address an important gap in creating children’s stories for vocabulary enrichment, we investigate the automatic evaluation of how well stories convey the semantics of target vocabulary words, a task with substantial implications for generating educational content. We motivate this task, which we call measuring contextual informativeness in children’s stories, and provide a formal task definition as well as a dataset for the task. We further propose a method for automating the task using a large language model (LLM). Our experiments show that our approach reaches a Spearman correlation of 0.4983 with human judgments of informativeness, while the strongest baseline only obtains a correlation of 0.3534. An additional analysis shows that the LLM-based approach is able to generalize to measuring contextual informativeness in adult-directed text, on which it also outperforms all baselines. 2025.coling-main.540 @@ -6442,7 +6442,7 @@ NarumiTokunaga YukiYamagata KoujiKozaki - YujiMatsumoto + YujiMatsumoto 8148–8159 Automatic biomedical annotation is essential for advancing medical research, diagnosis, and treatment. However, it presents significant challenges, especially when entities are not explicitly mentioned in the text, leading to difficulties in extraction of relevant information. These challenges are intensified by unclear terminology, implicit background knowledge, and the lack of labeled training data. Annotating with a specific ontology adds another layer of complexity, as it requires aligning text with a predefined set of concepts and relationships. Manual annotation is time-consuming and expensive, highlighting the need for automated systems to handle large volumes of biomedical data efficiently. In this paper, we propose an entailment-based zero-shot text classification approach to annotate biomedical text passages using the Homeostasis Imbalance Process (HOIP) ontology. Our method reformulates the annotation task as a multi-class, multi-label classification problem and uses natural language inference to classify text into related HOIP processes. Experimental results show promising performance, especially when processes are not explicitly mentioned, highlighting the effectiveness of our approach for ontological annotation of biomedical literature. 2025.coling-main.542 @@ -6489,7 +6489,7 @@ YifanDu HangyuGuo KunZhou - Wayne XinZhao + Wayne XinZhao JinpengWang ChuyuanWang MingchenCai @@ -6505,7 +6505,7 @@ BoyuGuan YiningZhang YangZhao - ChengqingZong + ChengqingZong 8215–8231 Current video-guided machine translation (VMT) approaches primarily use coarse-grained visual information, resulting in information redundancy, high computational overhead, and neglect of audio content. Our research demonstrates the significance of fine-grained visual and audio information in VMT from both data and methodological perspectives. From the data perspective, we have developed a large-scale dataset TriFine, the first vision-audio-subtitle tri-modal VMT dataset with annotated multimodal fine-grained tags. Each entry in this dataset not only includes the triples found in traditional VMT datasets but also encompasses seven fine-grained annotation tags derived from visual and audio modalities. From the methodological perspective, we propose a Fine-grained Information-enhanced Approach for Translation (FIAT). Experimental results have shown that, in comparison to traditional coarse-grained methods and text-only models, our fine-grained approach achieves superior performance with lower computational overhead. These findings underscore the pivotal role of fine-grained annotated information in advancing the field of VMT. 2025.coling-main.547 @@ -6524,7 +6524,7 @@ <fixed-case>GEAR</fixed-case>: A Simple <fixed-case>GENERATE</fixed-case>, <fixed-case>EMBED</fixed-case>, <fixed-case>AVERAGE</fixed-case> <fixed-case>AND</fixed-case> <fixed-case>RANK</fixed-case> Approach for Unsupervised Reverse Dictionary Fatemah YousefAlmeman - LuisEspinosa Anke + LuisEspinosa Anke 8242–8254 Reverse Dictionary (RD) is the task of obtaining the most relevant word or set of words given a textual description or dictionary definition. Effective RD methods have applications in accessibility, translation or writing support systems. Moreover, in NLP research we find RD to be used to benchmark text encoders at various granularities, as it often requires word, definition and sentence embeddings. In this paper, we propose a simple approach to RD that leverages LLMs in combination with embedding models. Despite its simplicity, this approach outperforms supervised baselines in well studied RD datasets, while also showing less overfitting. We also conduct a number of experiments on different dictionaries and analyze how different styles, registers and target audiences impact the quality of RD systems. We conclude that, on average, untuned embeddings alone fare way below an LLM-only baseline (although they are competitive in highly technical dictionaries), but are crucial for boosting performance in combined methods. 2025.coling-main.549 @@ -6551,7 +6551,7 @@ XiyangHuang ChenkangZhu MinPeng - HongyingZan + HongyingZan YuSong 8272–8284 Document-level Relation Extraction (DocRE) aims to extract relations from documents. Compared with sentence-level relation extraction, it is necessary to extract long-distance dependencies. Existing methods enhance the output of trained DocRE models either by learning logical rules or by extracting rules from annotated data and then injecting them into the model. However, these approaches can result in suboptimal performance due to incorrect rule set constraints. To mitigate this issue, we propose Context-aware differentiable rule learning or CaDRL for short, a novel differentiable rule-based framework that learns the doc-specific logical rule to avoid generating suboptimal constraints. Specifically, we utilize Transformer-based relation attention to encode document and relation information, thereby learning the contextual information of the relation. We employ a sequence-generated differentiable rule decoder to generate relational probabilistic logic rules at each reasoning step. We also introduce a parameter sharing training mechanism in CaDRL to reconcile the DocRE model and the rule learning module. Extensive experimental results on three DocRE datasets demonstrate that CaDRL outperforms existing rule-based frameworks, significantly improving DocRE performance and making predictions more interpretable and logical. @@ -6671,7 +6671,7 @@ A Flash in the Pan: Better Prompting Strategies to Deploy Out-of-the-Box <fixed-case>LLM</fixed-case>s as Conversational Recommendation Systems Gustavo AdolphoLucas de Carvalho - SimonBenigeri + SimonBenigeri JenniferHealey VictorBursztyn DavidDemeter @@ -6851,10 +6851,10 @@ A Chain-of-Task Framework for Instruction Tuning of <fixed-case>LLM</fixed-case>s Based on <fixed-case>C</fixed-case>hinese Grammatical Error Correction XinpengLiu BingXu - MuyunYang + MuyunYang HailongCao ConghuiZhu - TiejunZhao + TiejunZhao WenpengLu 8623–8639 Over-correction is a critical issue for large language models (LLMs) to address Grammatical Error Correction (GEC) task, esp. for Chinese. This paper proposes a Chain-of-Task (CoTask) framework to reduce over-correction. The CoTask framework is applied as multi-task instruction tuning of LLMs by decomposing the process of grammatical error analysis to design auxiliary tasks and adjusting the types and combinations of training tasks. A supervised fine-tuning (SFT) strategy is also presented to enhance the performance of LLMs, together with an algorithm for automatic dataset annotation to avoid additional manual costs. Experimental results demonstrate that our method achieves new state-of-the-art results on both FCGEC (in-domain) and NaCGEC (out-of-domain) test sets. @@ -6867,7 +6867,7 @@ VirginieMouilleron MenelMahamdi WissamAntoun - DjaméSeddah + DjaméSeddah 8640–8663 The proliferation of radical content on online platforms poses significant risks, including inciting violence and spreading extremist ideologies. Despite ongoing research, existing datasets and models often fail to address the complexities of multilingual and diverse data. To bridge this gap, we introduce a publicly available multilingual dataset annotated with radicalization levels, calls for action, and named entities in English, French, and Arabic. This dataset is pseudonymized to protect individual privacy while preserving contextual information. Beyond presenting our freely available dataset, we analyze the annotation process, highlighting biases and disagreements among annotators and their implications for model performance. Additionally, we use synthetic data to investigate the influence of socio-demographic traits on annotation patterns and model predictions. Our work offers a comprehensive examination of the challenges and opportunities in building robust datasets for radical content detection, emphasizing the importance of fairness and transparency in model development. The Counter dataset is available at https://gitlab.inria.fr/ariabi/counter-dataset-public. 2025.coling-main.578 @@ -7038,7 +7038,7 @@ QibinWang XiaolinHu WeikaiXu - WeiLiu + WeiLiu JianLuan BinWang 8841–8857 @@ -7058,9 +7058,9 @@ Extending <fixed-case>LLM</fixed-case>s to New Languages: A Case Study of Llama and <fixed-case>P</fixed-case>ersian Adaptation SaminMahdizadeh Sani PouyaSadeghi - Thuy-TrangVu + Thuy-TrangVu YadollahYaghoobzadeh - GholamrezaHaffari + GholamrezaHaffari 8868–8884 Large language models (LLMs) have made great progress in classification and text generation tasks. However, they are mainly trained on English data and often struggle with low-resource languages. In this study, we explore adding a new language, i.e., Persian, to Llama (a model with a limited understanding of Persian) using parameter-efficient fine-tuning. We employ a multi-stage approach involving pretraining on monolingual Persian data, aligning representations through bilingual pretraining and instruction datasets, and instruction-tuning with task-specific datasets. We evaluate the model’s performance at each stage on generation and classification tasks. Our findings suggest that incorporating the Persian language, through bilingual data alignment, can enhance classification accuracy for Persian tasks, with no adverse impact and sometimes even improvements on English tasks. Additionally, the results highlight the model’s initial strength as a critical factor when working with limited training data, with cross-lingual alignment offering minimal benefits for the low-resource language. Knowledge transfer from English to Persian has a marginal effect, primarily benefiting simple classification tasks. 2025.coling-main.594 @@ -7072,7 +7072,7 @@ SaipingGuan XiaolongJin JiafengGuo - XueqiCheng + XueqiCheng 8885–8896 N-ary Knowledge Graphs (NKGs), where a fact can involve more than two entities, have gained increasing attention. Link Prediction in NKGs (LPN) aims to predict missing elements in facts to facilitate the completion of NKGs. Current LPN methods implicitly operate under a closed-world assumption, meaning that the sets of entities and roles are fixed. These methods focus on predicting missing elements within facts composed of entities and roles seen during training. However, in reality, new facts involving unseen entities and roles frequently emerge, requiring completing these facts. Thus, this paper proposes a new task, Inductive Link Prediction in NKGs (ILPN), which aims to predict missing elements in facts involving unseen entities and roles in emerging NKGs. To address this task, we propose a Meta-learning-based N-ary knowledge Inductive Reasoner (MetaNIR), which employs a graph neural network with meta-learning mechanisms to embed unseen entities and roles adaptively. The obtained embeddings are used to predict missing elements in facts involving unseen elements. Since no existing dataset supports this task, three datasets are constructed to evaluate the effectiveness of MetaNIR. Extensive experimental results demonstrate that MetaNIR consistently outperforms representative models across all datasets. 2025.coling-main.595 @@ -7082,7 +7082,7 @@ <fixed-case>Z</fixed-case>ig<fixed-case>Z</fixed-case>ag<fixed-case>KV</fixed-case>: Dynamic <fixed-case>KV</fixed-case> Cache Compression for Long-context Modeling based on Layer Uncertainty MeizhiZhong XikaiLiu - ChenZhang + ChenZhang YikunLei YanGao YaoHu @@ -7129,7 +7129,7 @@ Understanding the <fixed-case>R</fixed-case>o<fixed-case>PE</fixed-case> Extensions of Long-Context <fixed-case>LLM</fixed-case>s: An Attention Perspective MeizhiZhong - ChenZhang + ChenZhang YikunLei XikaiLiu YanGao @@ -7171,7 +7171,7 @@ The Only Way is Ethics: A Guide to Ethical Research with Large Language Models Eddie L.Ungless NikolasVitsakis - ZeerakTalat + ZeerakTalat JamesGarforth BjornRoss ArnoOnken @@ -7340,7 +7340,7 @@ SeongtaeHong SeungyoonLee HyeonseokMoon - HeuiseokLim + HeuiseokLim 9184–9193 Large Language Models (LLMs) have rapidly advanced, with domain-specific expert models emerging to handle specialized tasks across various fields. However, the predominant focus on English-centric models demands extensive data, making it challenging to develop comparable models for middle and low-resource languages. To address this limitation, we introduce Migrate, a novel method that leverages open-source static embedding models and up to 3 million tokens of code-switching data to facilitate the seamless transfer of embeddings to target languages. Migrate enables effective cross-lingual adaptation without requiring large-scale domain-specific corpora in the target language, promoting the accessibility of expert LLMs to a diverse range of linguistic communities. Our experimental results demonstrate that Migrate significantly enhances model performance in target languages, outperforming baseline and existing cross-lingual transfer methods. This approach provides a practical and efficient solution for extending the capabilities of domain-specific expert models. 2025.coling-main.617 @@ -7350,7 +7350,7 @@ <fixed-case>C</fixed-case>o<fixed-case>STA</fixed-case>: Code-Switched Speech Translation using Aligned Speech-Text Interleaving Bhavani ShankarP S V N PreethiJyothi - PushpakBhattacharyya + PushpakBhattacharyya 9194–9208 Code-switching is a widely prevalent linguistic phenomenon in multilingual societies like India. Building speech-to-text models for code-switched speech is challenging due to limited availability of datasets. In this work, we focus on the problem of spoken translation (ST) of code-switched speech in Indian languages to English text. We present a new end-to-end model architecture CoSTA that scaffolds on pretrained automatic speech recognition (ASR) and machine translation (MT) modules (that are more widely available for many languages). Speech and ASR text representations are fused using an aligned interleaving scheme and are fed further as input to a pretrained MT module; the whole pipeline is then trained end-to-end for spoken translation using synthetically created ST data. We also release a new evaluation benchmark for code-switched Bengali- English, Hindi-English, Marathi-English and Telugu-English speech to English text. CoSTA significantly outperforms many competitive cascaded and end-to-end multimodal baselines by up to 3.5 BLEU points. 2025.coling-main.618 @@ -7628,8 +7628,8 @@ Hands-off Image Editing: Language-guided Editing without any Task-specific Labeling, Masking or even Training RodrigoSantos - AntónioBranco - João RicardoSilva + AntónioBranco + João RicardoSilva JoaoRodrigues 9546–9565 Instruction-guided image editing consists in taking an image and an instruction and delivering that image altered according to that instruction. State-of-the-art approaches to this task suffer from the typical scaling up and domain adaptation hindrances related to supervision as they eventually resort to some kind of task-specific labelling, masking or training. We propose a novel approach that does without any such task-specific supervision and offers thus a better potential for improvement. Its assessment demonstrates that it is highly effective, achieving very competitive performance. @@ -7930,7 +7930,7 @@ Development of Numerical Error Detection Tasks to Analyze the Numerical Capabilities of Language Models TakuSakamoto SakuSugawara - AkikoAizawa + AkikoAizawa 9957–9976 Numbers are used to describe quantities in various scenarios in daily life; therefore, numerical errors can significantly affect the meaning of the entire sentence, and even a single-letter error can be fatal. Detecting numerical errors often requires a high level of commonsense and is difficult even with the recent large language models (LLMs). In this study, we create a benchmark dataset of numerical error detection that uses automatically generated numerical errors. In our analysis, we classify the numerical errors based on the properties of the errors and investigate the ability of the model from several perspectives, including the error class, error size, and passage domain. The experimental results indicate that GPT-3.5, GPT-4, and Llama-3-Instruct (8B) perform well in the numerical error detection task; however, they are not as accurate as humans. We find that the LLMs misidentified correct numbers as errors more frequently than the humans did. In particular, the analysis demonstrates that the current LLMs still need improvement for detecting numerical errors requiring calculations or extensive prior knowledge. 2025.coling-main.666 @@ -8059,7 +8059,7 @@ RobertLitschko OliverKraus VerenaBlaschke - BarbaraPlank + BarbaraPlank 10158–10171 A large amount of local and culture-specific knowledge (e.g., people, traditions, food) can only be found in documents written in dialects. While there has been extensive research conducted on cross-lingual information retrieval (CLIR), the field of cross-dialect retrieval (CDIR) has received limited attention. Dialect retrieval poses unique challenges due to the limited availability of resources to train retrieval models and the high variability in non-standardized languages. We study these challenges on the example of German dialects and introduce the first German dialect retrieval dataset, dubbed WikiDIR, which consists of seven German dialects extracted from Wikipedia. Using WikiDIR, we demonstrate the weakness of lexical methods in dealing with high lexical variation in dialects. We further show that commonly used CLIR methods such as query translation or zero-shot cross-lingual transfer with multilingual encoders do not transfer well to extremely low-resource setups, motivating the need for resource-lean and dialect-specific retrieval models. 2025.coling-main.678 @@ -8114,7 +8114,7 @@ SarahXuan JacobJobraeel AnuragKumar - DebRoy + DebRoy JadKabbara 10242–10274 We focus on enhancing comprehension in small-group recorded conversations, which serve as a medium to bring people together and provide a space for sharing personal stories and experiences on crucial social matters. One way to parse and convey information from these conversations is by sharing highlighted excerpts in subsequent conversations. This can help promote a collective understanding of relevant issues, by highlighting perspectives and experiences to other groups of people who might otherwise be unfamiliar with and thus unable to relate to these experiences. The primary challenge that arises then is that excerpts taken from one conversation and shared in another setting might be missing crucial context or key elements that were previously introduced in the original conversation. This problem is exacerbated when conversations become lengthier and richer in themes and shared experiences. To address this, we explore how Large Language Models (LLMs) can enrich these excerpts by providing socially relevant context. We present approaches for effective contextualization to improve comprehension, readability, and empathy. We show significant improvements in understanding, as assessed through subjective and objective evaluations. While LLMs can offer valuable context, they struggle with capturing key social aspects. We release the Human-annotated Salient Excerpts (HSE) dataset to support future work. Additionally, we show how context-enriched excerpts can provide more focused and comprehensive conversation summaries. @@ -8126,7 +8126,7 @@ JunchaoWu RunzheZhan Derek F.Wong - ShuYang + ShuYang XueboLiu Lidia S.Chao MinZhang @@ -8203,7 +8203,7 @@ FlorianDebaene AaronMaladry ElsLefever - VeroniqueHoste + VeroniqueHoste 10367–10374 This paper explores the effectiveness of two types of transformer models — large generative models and sequence-to-sequence models — for automatically post-correcting Optical Character Recognition (OCR) output in early modern Dutch plays. To address the need for optimally aligned data, we create a parallel dataset based on the OCRed and ground truth versions from the EmDComF corpus using state-of-the-art alignment techniques. By combining character-based and semantic methods, we design and release a qualitative OCR-to-gold parallel dataset, selecting the alignment with the lowest Character Error Rate (CER) for all alignment pairs. We then fine-tune and evaluate five generative models and four sequence-to-sequence models on the OCR post-correction dataset. Results show that sequence-to-sequence models generally outperform generative models in this task, correcting more OCR errors and overgenerating and undergenerating less, with mBART as the best performing system. 2025.coling-main.690 @@ -8281,8 +8281,8 @@ AmoghMannekote JinseokNam ZimingLi - Kristy ElizabethBoyer - Bonnie J.Dorr + Kristy ElizabethBoyer + Bonnie J.Dorr 10449–10459 Indirect User Requests (IURs), such as “It’s cold in here” instead of “Could you please increase the temperature?” are common in human-human task-oriented dialogue and require world knowledge and pragmatic reasoning from the listener. While large language models (LLMs) can handle these requests effectively, smaller models deployed on virtual assistants often struggle due to resource constraints. Moreover, existing task-oriented dialogue benchmarks lack sufficient examples of complex discourse phenomena such as indirectness. To address this, we propose a set of linguistic criteria along with an LLM-based pipeline for generating realistic IURs to test natural language understanding (NLU) and dialogue state tracking (DST) models before deployment in a new domain. We also release IndirectRequests, a dataset of IURs based on the Schema-Guided Dialogue (SGD) corpus, as a comparative testbed for evaluating the performance of smaller models in handling indirect requests. 2025.coling-main.696 @@ -8324,8 +8324,8 @@ RobiertSepulveda Torres JeremyBarnes PabloGamallo - AitorGonzalez-Agirre - GermanRigau + AitorGonzalez-Agirre + GermanRigau MartaVillegas 10491–10519 The current best practice to measure the performance of base Large Language Models is to establish a multi-task benchmark that covers a range of capabilities of interest. Currently, however, such benchmarks are only available in a few high-resource languages. To address this situation, we present IberoBench, a multilingual, multi-task benchmark for Iberian languages (i.e., Basque, Catalan, Galician, European Spanish and European Portuguese) built on the LM Evaluation Harness framework. The benchmark consists of 62 tasks divided into 179 subtasks. We evaluate 33 existing LLMs on IberoBench on 0- and 5-shot settings. We also explore the issues we encounter when working with the Harness and our approach to solving them to ensure high-quality evaluation. @@ -8346,7 +8346,7 @@ JinchengCao BoboLi JiangLiu - DonghongJi + DonghongJi 10531–10540 Entity and relation extraction is a conventional task in the field of information extraction. Existing work primarily focuses on detecting specific relations between entities, often constrained to particular fields and lacking general applicability. In response, we propose a novel task: nominal compound relation extraction (NCRE), which concentrates on abstract and broadly applicable relation extraction between noun phrases. This task diverges significantly from traditional entity and relation extraction in two key respects. Firstly, our task involves general nominal compounds rather than named entities, which are longer and encompass a broader scope, presenting significant challenges for extraction. Secondly, relation extraction in NCRE demands an in-depth understanding of context to detect abstract relations. We manually annotate a high-quality Chinese dataset for the NCRE task and develop a model incorporating the rotary position-enhanced word pair (RoWP) detection schema. Experimental results demonstrate the efficiency of our RoWP model over previous baselines, while the suboptimal F1 scores indicate that NCRE remains a challenging task. Our code and data are available at https://github.com/yeecjc/NCRE. 2025.coling-main.701 @@ -8421,7 +8421,7 @@ Understanding Token Probability Encoding in Output Embeddings - HakazeCho + HakazeCho YoshihiroSakai KenshiroTanaka MarikoKato @@ -8436,7 +8436,7 @@ LuyangLin LingzhiWang JinsongGuo - Kam-FaiWong + Kam-FaiWong 10634–10649 The pervasive spread of misinformation and disinformation in social media underscores the critical importance of detecting media bias. While robust Large Language Models (LLMs) have emerged as foundational tools for bias prediction, concerns about inherent biases within these models persist. In this work, we investigate the presence and nature of bias within LLMs and its consequential impact on media bias detection. Departing from conventional approaches that focus solely on bias detection in media content, we delve into biases within the LLM systems themselves. Through meticulous examination, we probe whether LLMs exhibit biases, particularly in political bias prediction and text continuation tasks. Additionally, we explore bias across diverse topics, aiming to uncover nuanced variations in bias expression within the LLM framework. Importantly, we propose debiasing strategies, including prompt engineering and model fine-tuning. Extensive analysis of bias tendencies across different LLMs sheds light on the broader landscape of bias propagation in language models. This study advances our understanding of LLM bias, offering critical insights into its implications for bias detection tasks and paving the way for more robust and equitable AI systems 2025.coling-main.709 @@ -8601,7 +8601,7 @@ LuXiang YangZhao YuZhou - ChengqingZong + ChengqingZong 10877–10890 Document Image Translation (DIT) aims to translate documents in images from one language to another. It requires visual layouts and textual contents understanding, as well as document coherence capturing. However, current methods often rely on the quality of OCR output, which, particularly in complex-layout scenarios, frequently loses the crucial document coherence, leading to chaotic text. To overcome this problem, we introduce a novel end-to-end network, named Zoom-out DIT (ZoomDIT), inspired by human translation procedures. It jointly accomplishes the multi-level tasks including word positioning, sentence recognition & translation, and document organization, based on a fine-to-coarse zoom-out framework, to progressively realize “chaotic words to coherent document” and improve translation. We further contribute a new large-scale DIT dataset with multi-level fine-grained labels. Extensive experiments on public and our new dataset demonstrate significant improvements in translation quality towards complex-layout document images, offering a robust solution for reorganizing the chaotic OCR outputs to a coherent document translation. 2025.coling-main.723 @@ -8632,7 +8632,7 @@ YunkeZhang TaoGui QiZhang - XuanjingHuang + XuanjingHuang 10902–10923 Open Named Entity Recognition (NER), which involves identifying arbitrary types of entities from arbitrary domains, remains challenging for Large Language Models (LLMs). Recent studies suggest that fine-tuning LLMs on extensive NER data can boost their performance. However, training directly on existing datasets neglects their inconsistent entity definitions and redundant data, limiting LLMs to dataset-specific learning and hindering out-of-domain adaptation. To address this, we present B2NERD, a compact dataset designed to guide LLMs’ generalization in Open NER under a universal entity taxonomy. B2NERD is refined from 54 existing English and Chinese datasets using a two-step process. First, we detect inconsistent entity definitions across datasets and clarify them by distinguishable label names to construct a universal taxonomy of 400+ entity types. Second, we address redundancy using a data pruning strategy that selects fewer samples with greater category and semantic diversity. Comprehensive evaluation shows that B2NERD significantly enhances LLMs’ Open NER capabilities. Our B2NER models, trained on B2NERD, outperform GPT-4 by 6.8-12.0 F1 points and surpass previous methods in 3 out-of-domain benchmarks across 15 datasets and 6 languages. The data, models, and code are publicly available at https://github.com/UmeanNever/B2NER. 2025.coling-main.725 @@ -8727,7 +8727,7 @@ QipengGuo HangYan XipengQiu - XuanjingHuang + XuanjingHuang DahuaLin 11056–11069 Large Language Models (LLMs) have shown outstanding breakthroughs in code generation. Recent work improves code LLMs by training on synthetic data generated by some powerful LLMs, which can be challenging to scale due to the dependence on a teacher model and high generation costs. In this paper, we focus on synthesizing code data at scale and propose a Case2Code task by exploiting the expressiveness and correctness of programs. Case2Code is an inductive inference task that aims to infer underlying code implementations by observing input-output examples or program behaviors, By incorporating LLMs to generate program inputs, and executing the program with these inputs to obtain the program outputs, we can synthesize diverse and high-quality Case2Code data at scale for training and evaluating code LLMs. Experimental results show that case-to-code induction is challenging for current representative LLMs if they are untrained. Models trained with Case2Code improve performance not only on distribution case-to-code induction but also various coding-generation tasks, demonstrating the great potential of large-scale synthetic data and inductive learning. @@ -8795,7 +8795,7 @@ <fixed-case>M</fixed-case>ani<fixed-case>T</fixed-case>weet: A New Benchmark for Identifying Manipulation of News on Social Media Kung-HsiangHuang Hou PongChan - KathleenMcKeown + KathleenMcKeown HengJi 11161–11180 Considerable advancements have been made to tackle the misrepresentation of information derived from reference articles in the domains of fact-checking and faithful summarization. However, an unaddressed aspect remains - the identification of social media posts that manipulate information within associated news articles. This task presents a significant challenge, primarily due to the prevalence of personal opinions in such posts. We present a novel task, identifying manipulation of news on social media, which aims to detect manipulation in social media posts and identify manipulated or inserted information. To study this task, we have proposed a data collection schema and curated a dataset called ManiTweet, consisting of 3.6K pairs of tweets and corresponding articles. Our analysis demonstrates that this task is highly challenging, with large language models (LLMs) yielding unsatisfactory performance. Additionally, we have developed a simple yet effective basic model that outperforms LLMs significantly on the ManiTweet dataset. Finally, we have conducted an exploratory analysis of human-written tweets, unveiling intriguing connections between manipulation and the domain and factuality of news articles, as well as revealing that manipulated sentences are more likely to encapsulate the main story or consequences of a news outlet. @@ -8805,7 +8805,7 @@ Filter-then-Generate: Large Language Models with Structure-Text Adapter for Knowledge Graph Completion BenLiu - JihaiZhang + JihaiZhang FangquanLin ChengYang MinPeng @@ -8978,7 +8978,7 @@ Human Interest Framing across Cultures: A Case Study on Climate Change GiselaVallejo Christinede Kock - TimothyBaldwin + TimothyBaldwin LeaFrermann 11380–11398 Human Interest (HI) framing is a narrative strategy that injects news stories with a relatable, emotional angle and a human face to engage the audience. In this study we investigate the use of HI framing across different English-speaking cultures in news articles about climate change. Despite its demonstrated impact on the public’s behaviour and perception of an issue, HI framing has been under-explored in NLP to date. We perform a systematic analysis of HI stories to understand its role in climate change reporting in English-speaking countries from four continents. Our findings reveal key differences in how climate change is portrayed across countries, encompassing aspects such as narrative roles, article polarity, pronoun prevalence, and topics. We also demonstrate that these linguistic aspects boost the performance of fine-tuned pre-trained language models on HI story classification. @@ -8993,7 +8993,7 @@ Georgi N.Georgiev JiahuiGeng IrynaGurevych - PreslavNakov + PreslavNakov 11399–11421 The increased use of large language models (LLMs) across a variety of real-world applications calls for mechanisms to verify the fac- tual accuracy of their outputs. Difficulties lie in assessing the factuality of free-form responses in open domains. Also, different pa- pers use disparate evaluation benchmarks and measurements, which renders them hard to compare and hampers future progress. To mitigate these issues, we propose OpenFactCheck, a unified framework for building customized automatic fact-checking systems, benchmarking their accuracy, evaluating factuality of LLMs, and verifying claims in a document. OpenFactCheck consists of three modules: (i) CUSTCHECKER allows users to easily customize an automatic fact-checker and verify the factual correctness of documents and claims, (ii) LLMEVAL, a unified evaluation framework assesses LLM’s factuality ability from various perspectives fairly, and (iii) CHECKEREVAL is an extensible solution for gauging the reliability of automatic fact-checkers’ verification results using human-annotated datasets. Data and code are publicly available at https: //github.com/yuxiaw/openfactcheck. 2025.coling-main.755 @@ -9001,13 +9001,13 @@ A Dataset for Expert Reviewer Recommendation with Large Language Models as Zero-shot Rankers - Vanja M.Karan + Vanja M.Karan StephenMcQuistin RyoYanagida ColinPerkins GarethTyson IgnacioCastro - Patrick G.T.Healey + Patrick G.T.Healey MatthewPurver 11422–11427 The task of reviewer recommendation is increasingly important, with main techniques utilizing general models of text relevance. However, state of the art (SotA) systems still have relatively high error rates. Two possible reasons for this are: a lack of large datasets and the fact that large language models (LLMs) have not yet been applied. To fill these gaps, we first create a substantial new dataset, in the domain of Internet specification documents; then we introduce the use of LLMs and evaluate their performance. We find that LLMs with prompting can improve on SotA in some cases, but that they are not a cure-all: this task provides a challenging setting for prompt-based methods @@ -9029,7 +9029,7 @@ Proceedings of the 31st International Conference on Computational Linguistics: System Demonstrations - OwenRambow + OwenRambow LeoWanner MariannaApidianaki HendAl-Khalifa @@ -9053,8 +9053,8 @@ Truong DinhDo An HoangTrieu Van-ThuyPhi - Minh LeNguyen - YujiMatsumoto + Minh LeNguyen + YujiMatsumoto 1–8 The growing volume of scientific literature in polymer science presents a significant challenge for researchers attempting to extract and annotate domain-specific entities, such as polymer names, material properties, and related information. Manual annotation of these documents is both time-consuming and prone to error due to the complexity of scientific language. To address this, we introduce PolyMinder, an automated support system designed to assist polymer scientists in extracting and annotating polymer-related entities and their relationships from scientific documents. The system utilizes recent advanced Named Entity Recognition (NER) and Relation Extraction (RE) models tailored to the polymer domain. PolyMinder streamlines the annotation process by providing a web-based interface where users can visualize, verify, and refine the extracted information before finalizing the annotations. The system’s source code is made publicly available to facilitate further research and development in this field. Our system can be accessed through the following URL: https://www.jaist.ac.jp/is/labs/nguyen-lab/systems/polyminder 2025.coling-demos.1 @@ -9092,8 +9092,8 @@ RuiXing YilinGeng ZenanZhai - PreslavNakov - TimothyBaldwin + PreslavNakov + TimothyBaldwin 28–36 We introduce Loki, an open-source tool designed to address the growing problem of misinformation. Loki adopts a human-centered approach, striking a balance between the quality of fact-checking and the cost of human involvement. It decomposes the fact-checking task into a five-step pipeline: breaking down long texts into individual claims, assessing their check-worthiness, generating queries, retrieving evidence, and verifying the claims. Instead of fully automating the claim verification process, provides essential information at each step to assist human judgment, especially for general users such as journalists and content moderators. Moreover, it has been optimized for latency, robustness, and cost efficiency at a commercially usable level. Loki is released under an MIT license and is available on GitHub. We also provide a video presenting the system and its capabilities. 2025.coling-demos.4 @@ -9120,7 +9120,7 @@ <fixed-case>B</fixed-case>eef<fixed-case>B</fixed-case>ot: Harnessing Advanced <fixed-case>LLM</fixed-case> and <fixed-case>RAG</fixed-case> Techniques for Providing Scientific and Technology Solutions to Beef Producers - ZhihaoZhang + ZhihaoZhang Carrie-AnnWilson RachelHay YvetteEveringham @@ -9139,7 +9139,7 @@ LiboSun YihangYang SimingChen - XuanjingHuang + XuanjingHuang ZhongyuWei 63–82 We introduce AI-Press, an automated news drafting and polishing system based on multi-agent collaboration and Retrieval-Augmented Generation. We develop a feedback simulation system that generates public responses considering demographic distributions. Demo link: https://youtu.be/TmjfJrbzaRU @@ -9230,7 +9230,7 @@ <fixed-case>GECT</fixed-case>urk <fixed-case>WEB</fixed-case>: An Explainable Online Platform for <fixed-case>T</fixed-case>urkish Grammatical Error Detection and Correction AliGebeşçe - Gözde GülŞahin + Gözde GülŞahin 163–173 Sophisticated grammatical error detection/correction tools are available for a small set of languages such as English and Chinese. However, it is not straightforward—if not impossible—to adapt them to morphologically rich languages with complex writing rules like Turkish which has more than 80 million speakers. Even though several tools exist for Turkish, they primarily focus on spelling errors rather than grammatical errors and lack features such as web interfaces, error explanations and feedback mechanisms. To fill this gap, we introduce GECTurk WEB, a light, open-source, and flexible web-based system that can detect and correct the most common forms of Turkish writing errors, such as the misuse of diacritics, compound and foreign words, pronouns, light verbs along with spelling mistakes. Our system provides native speakers and second language learners an easily accessible tool to detect/correct such mistakes and also to learn from their mistakes by showing the explanation for the violated rule(s). The proposed system achieves 88,3 system usability score, and is shown to help learn/remember a grammatical rule (confirmed by 80% of the participants). The GECTurk WEB is available both as an offline tool (https://github.com/GGLAB-KU/gecturkweb) or at www.gecturk.net. 2025.coling-demos.16 @@ -9268,7 +9268,7 @@ <fixed-case>C</fixed-case>omp<fixed-case>UGE</fixed-case>-Bench: Comparative Understanding and Generation Evaluation Benchmark for Comparative Question Answering AhmadShallouf IrinaNikishina - ChrisBiemann + ChrisBiemann 189–198 This paper presents CompUGE, a comprehensive benchmark designed to evaluate Comparative Question Answering (CompQA) systems. The benchmark is structured around four core tasks: Comparative Question Identification, Object and Aspect Identification, Stance Classification, and Answer Generation. It unifies multiple datasets and provides a robust evaluation platform to compare various models across these sub-tasks. We also create additional all-encompassing CompUGE datasets by filtering and merging the existing ones. The benchmark for comparative question answering sub-tasks is designed as a web application available on HuggingFace Spaces: https://huggingface.co/spaces/uhhlt/CompUGE-Bench 2025.coling-demos.19 @@ -9308,7 +9308,7 @@ Proceedings of the 31st International Conference on Computational Linguistics: Industry Track - OwenRambow + OwenRambow LeoWanner MariannaApidianaki HendAl-Khalifa @@ -9527,7 +9527,7 @@ AnushaBagalkotkar SupriyaAnand GabrielArnson - Rohini K.Srihari + Rohini K.Srihari KennethJoseph 213–235 In recent years, there has been significant effort to align large language models with human preferences. This work focuses on developing a chatbot specialized in the real estate domain, with an emphasis on incorporating compliant behavior to ensure it can be used without perpetuating discriminatory practices like steering and redlining, which have historically plagued the real estate industry in the United States. Building on prior work, we present a method for generating a synthetic general instruction-following dataset, along with safety data. Through extensive evaluations and benchmarks, we fine-tuned a llama-3-8B-instruct model and demonstrated that we can enhance it’s performance significantly to match huge closed-source models like GPT-4o while making it safer and more compliant. We open-source the model, data and code to support further development and research in the community @@ -9588,7 +9588,7 @@ IvanSekulic FilipCarevic NghiaKhau - Diana NicoletaPopa + Diana NicoletaPopa BrunaGuedes VictorGuimaraes ZeyuYang @@ -9663,7 +9663,7 @@ LukasStappen PhillipSchneider FlorianMatthes - ElisabethAndre + ElisabethAndre 343–357 In today’s assistant landscape, personalisation enhances interactions, fosters long-term relationships, and deepens engagement. However, many systems struggle with retaining user preferences, leading to repetitive user requests and disengagement. Furthermore, the unregulated and opaque extraction of user preferences in industry applications raises significant concerns about privacy and trust, especially in regions with stringent regulations like Europe. In response to these challenges, we propose a long-term memory system for voice assistants, structured around predefined categories. This approach leverages Large Language Models to efficiently extract, store, and retrieve preferences within these categories, ensuring both personalisation and transparency. We also introduce a synthetic multi-turn, multi-session conversation dataset (CarMem), grounded in real industry data, tailored to an in-car voice assistant setting. Benchmarked on the dataset, our system achieves an F1-score of .78 to .95 in preference extraction, depending on category granularity. Our maintenance strategy reduces redundant preferences by 95% and contradictory ones by 92%, while the accuracy of optimal retrieval is at .87. Collectively, the results demonstrate the system’s suitability for industrial applications. 2025.coling-industry.29 @@ -9671,7 +9671,7 @@ <fixed-case>XTR</fixed-case> meets <fixed-case>C</fixed-case>ol<fixed-case>BERT</fixed-case>v2: Adding <fixed-case>C</fixed-case>ol<fixed-case>BERT</fixed-case>v2 Optimizations to <fixed-case>XTR</fixed-case> - Riyaz AhmadBhat + Riyaz AhmadBhat JaydeepSen 358–365 XTR (Lee et al., 2023) introduced an efficient multi-vector retrieval method that addresses the limitations of the ColBERT (Khattab and Zaharia, 2020model by simplifying retrieval into a single stage through a modified learning objective. While XTR eliminates the need for multistage retrieval, it doesn’t incorporate the efficiency optimizations from ColBERTv2 (Santhanam et al., 2022, which improve indexing and retrieval speed. In this work, we enhance XTR by integrating ColBERTv2’s optimizations, showing that the combined approach preserves the strengths of both models. This results in a more efficient and scalable solution for multi-vector retrieval, while maintaining XTR’s streamlined retrieval process. @@ -9699,7 +9699,7 @@ ParasSharma Anthony B.Sicilia KatherineAtwell - DianeLitman + DianeLitman MaliheAlikhani 374–386 General-purpose automatic speech recognition (ASR) systems do not always perform well in goal-oriented dialogue. Existing ASR correction methods rely on prior user data or named entities. We extend correction to tasks that have no prior user data and exhibit linguistic flexibility such as lexical and syntactic variations. We propose a novel context augmentation with a large language model and a ranking strategy that incorporates contextual information from the dialogue states of a goal-oriented conversational AI and its tasks. Our method ranks (1) n-best ASR hypotheses by their lexical and semantic similarity with context and (2) context by phonetic correspondence with ASR hypotheses. Evaluated in home improvement and cooking domains with real-world users, our method improves recall and F1 of correction by 34% and 16%, respectively, while maintaining precision and false positive rate. Users rated .8-1 point (out of 5) higher when our correction method worked properly, with no decrease due to false positives. @@ -9787,7 +9787,7 @@ <fixed-case>B</fixed-case>ack<fixed-case>MATH</fixed-case>: Towards Backward Reasoning for Solving Math Problems Step by Step ShaoweiZhang - DeyiXiong + DeyiXiong 466–482 Large language models (LLMs) have achieved impressive results in reasoning, particularly in multi-step reasoning tasks. However, when faced with more complex mathematical problems, the performance of LLMs drops significantly. To address this issue, in this paper, we propose a backward reasoning dataset, BackMATH-Data. The dataset comprises approximately 14K backward reasoning problems and 100K reasoning steps. It follows a result-oriented approach, to construct backward reasoning problems by swapping the reasoning results with specific solving conditions in the original problems.Additionally, we introduce Backward-reasoning Process-supervision Reward Model (BackPRM) and BackMATH-LLM. BackPRM supervises the quality of the generated backward reasoning problems, while BackMATH-LLM is designed for mathematical reasoning. BackMATH-LLM is fine-tuned and enhanced through reinforcement learning by supervising the quality of backward reasoning problems and by providing feedback on reasoning steps, thereby improving the mathematical reasoning capabilities of LLMs.Extensive experiments demonstrate that our model achieves an accuracy of 68.1% on the GSM8K dataset and 21.9% on the MATH dataset, exceeding the SOTA by 1.6% and 2.1% respectively. 2025.coling-industry.40 @@ -9798,7 +9798,7 @@ YincenQu HengyueLiu KunWang - XiangyingDai + XiangyingDai XiaoouLu HuiZhou ChaoMa @@ -9857,7 +9857,7 @@ ElenaSenger YuriCampbell Robvan der Goot - BarbaraPlank + BarbaraPlank 533–545 Accurate career path prediction can support many stakeholders, like job seekers, recruiters, HR, and project managers. However, publicly available data and tools for career path prediction are scarce. In this work, we introduce Karrierewege, a comprehensive, publicly available dataset containing over 500k career paths, significantly surpassing the size of previously available datasets. We link the dataset to the ESCO taxonomy to offer a valuable resource for predicting career trajectories. To tackle the problem of free-text inputs typically found in resumes, we enhance it by synthesizing job titles and descriptions resulting in Karrierewege+. This allows for accurate predictions from unstructured data, closely aligning with practical application challenges. We benchmark existing state-of-the-art (SOTA) models on our dataset and a previous benchmark and see increased performance and robustness by synthesizing the data for the free-text use cases. 2025.coling-industry.46 @@ -9923,7 +9923,7 @@ <fixed-case>UR</fixed-case>2<fixed-case>N</fixed-case>: Unified Retriever and <fixed-case>R</fixed-case>era<fixed-case>N</fixed-case>ker - Riyaz AhmadBhat + Riyaz AhmadBhat JaydeepSen RudraMurthy VigneshP @@ -9966,7 +9966,7 @@ AayushBajaj AaryamanKartha EnamulHoque - ShafiqJoty + ShafiqJoty 625–643 Given the ubiquity of charts as a data analysis, visualization, and decision-making tool across industries and sciences, there has been a growing interest in developing pre-trained foundation models as well as general purpose instruction-tuned models for chart understanding and reasoning. However, existing methods suffer crucial drawbacks across two critical axes affecting the performance of chart representation models: they are trained on data generated from underlying data tables of the charts, ignoring the visual trends and patterns in chart images, and use weakly aligned vision-language backbone models for domain-specific training, limiting their generalizability when encountering charts in the wild. We address these important drawbacks and introduce ChartGemma, a novel chart understanding and reasoning model developed over PaliGemma. Rather than relying on underlying data tables, ChartGemma is trained on instruction-tuning data generated directly from chart images, thus capturing both high-level trends and low-level visual information from a diverse set of charts. Our simple approach achieves state-of-the-art results across 5 benchmarks spanning chart summarization, question answering, and fact-checking, and our elaborate qualitative studies on real-world charts show that ChartGemma generates more realistic and factually correct summaries compared to its contemporaries. We release the code, model checkpoints, dataset, and demos at https://github.com/vis-nlp/ChartGemma. 2025.coling-industry.54 @@ -10014,7 +10014,7 @@ AleksandrDrozd JordanClive KshitijGupta - LiangyuChen + LiangyuChen QiSun KenTsui NourMoustafa-Fahmy @@ -10049,7 +10049,7 @@ Lightweight Safety Guardrails Using Fine-tuned <fixed-case>BERT</fixed-case> Embeddings AaronZheng MansiRana - AndreasStolcke + AndreasStolcke 689–696 With the recent proliferation of large language models (LLMs), enterprises have been able to rapidly develop proof-of-concepts and prototypes. As a result, there is a growing need to implement robust guardrails that monitor, quantize and control an LLM’s behavior, ensuring that the use is reliable, safe, accurate and also aligned with the users’ expectations. Previous approaches for filtering out inappropriate user prompts or system outputs, such as LlamaGuard and OpenAI’s MOD API, have achieved significant success by fine-tuning existing LLMs. However, using fine-tuned LLMs as guardrails introduces increased latency and higher maintenance costs, which may not be practical or scalable for cost-efficient deployments. We take a different approach, focusing on fine-tuning a lightweight architecture: Sentence-BERT. This method reduces the model size from LlamaGuard’s 7 billion parameters to approximately 67 million, while maintaining comparable performance on the AEGIS safety benchmark. 2025.coling-industry.58 @@ -10058,7 +10058,7 @@ Zero-shot Slot Filling in the Age of <fixed-case>LLM</fixed-case>s for Dialogue Systems MansiRana - KadriHacioglu + KadriHacioglu SindhujaGopalan MaragathamaniBoothalingam 697–706 @@ -10102,8 +10102,8 @@ CheoneumPark SeohyeongJeong MinsangKim - KyungTaeLim - Yong-HunLee + KyungTaeLim + Yong-HunLee 760–770 Recent advances in language models (LMs) has driven progress in information retrieval (IR), effectively extracting semantically relevant information. However, they face challenges in balancing computational costs with deeper query-document interactions. To tackle this, we present two mechanisms: 1) a light and effective multi-vector retrieval with sequence compression vectors, dubbed SCV and 2) coarse-to-fine vector search. The strengths of SCV stems from its application of span compressive vectors for scoring. By employing a non-linear operation to examine every token in the document, we abstract these into a span-level representation. These vectors effectively reduce the document’s dimensional representation, enabling the model to engage comprehensively with tokens across the entire collection of documents, rather than the subset retrieved by Approximate Nearest Neighbor. Therefore, our framework performs a coarse single vector search during the inference stage and conducts a fine-grained multi-vector search end-to-end. This approach effectively reduces the cost required for search. We empirically show that SCV achieves the fastest latency compared to other state-of-the-art models and can obtain competitive performance on both in-domain and out-of-domain benchmark datasets. 2025.coling-industry.63 diff --git a/data/xml/2025.comedi.xml b/data/xml/2025.comedi.xml index df247c2570..6d3d5bc705 100644 --- a/data/xml/2025.comedi.xml +++ b/data/xml/2025.comedi.xml @@ -59,7 +59,7 @@ Deep-change at <fixed-case>C</fixed-case>o<fixed-case>M</fixed-case>e<fixed-case>D</fixed-case>i: the Cross-Entropy Loss is not All You Need MikhailKuklin - NikolayArefyev + NikolayArefyev 48–64 Manual annotation of edges in Diachronic Word Usage Graphs is a critical step in creation of datasets for Lexical Semantic Change Detection tasks, but a very labour-intensive one. Annotators estimate if two senses of an ambiguous word expressed in two usages of this word are related and how. This is a variation of the Word-in-Context (WiC) task with some peculiarities, including diachronic data, an ordinal scale for annotations consisting of 4 values with pre-defined meanings (e.g. homonymy, polysemy), and special attention to the degree of disagreement between annotators which affects the further processing of the graph. CoMeDi is a shared task aiming at automating this annotation process. Participants are asked to predict the median annotation for a pair of usages in the first subtask, and estimate the disagreement between annotators in the second subtask. Together this gives some idea about the distribution of annotations we can get from humans for a given pair of usages. For the first subtask we tried several ways of adapting a binary WiC model to this 4 class problem. We discovered that further fine-tuning the model as a 4 class classifier on the training data of the shared task works significantly worse than thresholding the original binary model. For the second subtask our best results were achieved by building a model that predicts the whole multinomial distribution of annotations and calculating the disagreement from this distribution. Our solutions for both subtasks have outperformed all other participants of the shared task. 2025.comedi-1.5 @@ -157,9 +157,9 @@ Disagreement in Metaphor Annotation of <fixed-case>M</fixed-case>exican <fixed-case>S</fixed-case>panish Science Tweets AlecSánchez-Montero - GemmaBel-Enguix + GemmaBel-Enguix Sergio-LuisOjeda-Trueba - GerardoSierra + GerardoSierra 155–164 Traditional linguistic annotation methods often strive for a gold standard with hard labels as input for natural language processing models, assuming an underlying objective truth for all tasks. However, disagreement among annotators is a common scenario, even for seemingly objective linguistic tasks, and is particularly prominent in figurative language annotation, since multiple valid interpretations can sometimes coexist. This study presents the annotation process for identifying metaphorical tweets within a corpus of 3733 Public Communication of Science texts written in Mexican Spanish, emphasizing inter-annotator disagreement. Using Fleiss’ and Cohen’s Kappa alongside agreement percentages, we evaluated metaphorical language detection through binary classification in three situations: two subsets of the corpus labeled by three different non-expert annotators each, and a subset of disagreement tweets, identified in the non-expert annotation phase, re-labeled by three expert annotators. Our results suggest that expert annotation may improve agreement levels, but does not exclude disagreement, likely due to factors such as the relatively novelty of the genre, the presence of multiple scientific topics, and the blending of specialized and non-specialized discourse. Going further, we propose adopting a learning-from-disagreement approach for capturing diverse annotation perspectives to enhance computational metaphor detection in Mexican Spanish. 2025.comedi-1.15 diff --git a/data/xml/2025.computel.xml b/data/xml/2025.computel.xml index b3d2484eac..12631fc10a 100644 --- a/data/xml/2025.computel.xml +++ b/data/xml/2025.computel.xml @@ -6,7 +6,7 @@ JordanLachler GodfredAgyapong AnttiArppe - SarahMoeller + SarahMoeller AditiChaudhary ShrutiRijhwani DaisyRosenblum @@ -25,7 +25,7 @@ Formalizing the Morphology of Rromani Adjectives MasakoWatabe - MaxSilberztein + MaxSilberztein 1-10 This paper presents a set of linguistic resources that formalizes the morphological behavior of simple Rromani adjectives. We describe the formalization of the adjectives’ morphology and the implementation with the NooJ linguistic platform of an electronic dictionary associated with a formal morpho-syntactic grammar. We can then apply this set of resources to a corpus to evaluate the resources and automatically annotate adjectival forms in Rromani texts. The final set of resources can then be used to identify each Rromani dialectal variant and can be used as a pedagogical tool to teach Rromani as a second language. 2025.computel-main.1 @@ -34,7 +34,7 @@ Bilingual Sentence Mining for Low-Resource Languages: a Case Study on Upper and <fixed-case>L</fixed-case>ower <fixed-case>S</fixed-case>orbian ShuOkabe - AlexanderFraser + AlexanderFraser 11-19 Parallel sentence mining is crucial for down- stream tasks such as Machine Translation, especially for low-resource languages, where such resources are scarce. In this context, we apply a pipeline approach with contextual embeddings on two endangered Slavic languages spoken in Germany, Upper and Lower Sorbian, to evaluate mining quality. To this end, we compare off-the-shelf multilingual language models and word encoders pre-trained on Upper Sorbian to understand their impact on sentence mining. Moreover, to filter out irrelevant pairs, we experiment with a post-processing of mined sentences through an unsupervised word aligner based on word embeddings. We observe the usefulness of additional pre-training in Upper Sorbian, which leads to direct improvements when mining the same language but also its related language, Lower Sorbian. 2025.computel-main.2 @@ -108,7 +108,7 @@ Speech Technologies Datasets for <fixed-case>A</fixed-case>frican Under-Served Languages EmmanuelNgue Um - FrancisTyers + FrancisTyers Eliette-Caroline EmilieNgo Tjomb Florus LandryDibengue Blaise-MathieuBanoum Manguele @@ -131,7 +131,7 @@ JosephLukner FinnVerdonk Willemde Reuse - JonathanWashington + JonathanWashington 91-99 This paper presents work towards a morphological transducer for Hän, a Dene language spoken in Alaska and the Yukon Territory. We present the implementation of several complex morphological features of Dene languages into a morphological transducer, an evaluation of the transducer on corpus data, and a discussion of the future uses of such a transducer towards Hän revitalization efforts. 2025.computel-main.10 diff --git a/data/xml/2025.conll.xml b/data/xml/2025.conll.xml index 20e0b078e0..2d7840de6d 100644 --- a/data/xml/2025.conll.xml +++ b/data/xml/2025.conll.xml @@ -3,7 +3,7 @@ Proceedings of the 29th Conference on Computational Natural Language Learning - GemmaBoleda + GemmaBoleda MichaelRoth Association for Computational Linguistics
Vienna, Austria
@@ -40,7 +40,7 @@ Quasi-symbolic Semantic Geometry over Transformer-based Variational <fixed-case>A</fixed-case>uto<fixed-case>E</fixed-case>ncoder YingjiZhang DaniloCarvalhoUniversity of Manchester - AndreFreitasIdiap Research Institute and University of Manchester + AndreFreitasIdiap Research Institute and University of Manchester 12-29 Formal/symbolic semantics can provide canonical, rigid controllability and interpretability to sentence representations due to their localisation or composition property. How can we deliver such property to the current distributional sentence representations to better control and interpret the generation of language models (LMs)? In this work, we theoretically frame the sentence semantics as the composition of semantic role - word content features and propose the formal semantic geometrical framework. To inject such geometry into Transformer-based LMs (i.e. GPT2), we deploy a supervised Transformer-based Variational AutoEncoder, where the sentence generation can be manipulated and explained over low-dimensional latent Gaussian space. In addition, we propose a new probing algorithm to guide the movement of sentence vectors over such geometry. Experimental results reveal that the formal semantic geometry can potentially deliver better control and interpretation to sentence generation. 2025.conll-1.2 @@ -51,7 +51,7 @@ <fixed-case>L</fixed-case>aw<fixed-case>T</fixed-case>oken: a single token worth more than its constituents Yu-HsiangTsengEberhard-Karls-Universität Tübingen Hsin-YuChoudeepq.com - Shu-KaiHsiehNational Taiwan University + Shu-KaiHsiehNational Taiwan University 30-46 Legal citations require correctly recalling the law references of complex law article names and article numbering, which large language models typically treat as multi-token sequences. Motivated by the form-meaning pair of constructionist approaches, we explore treating these multi-token law references as a single holistic law token and examining the implications for legal citation accuracy and differences in model interpretability. We train and compare two types of models: LawToken models, which encode the legal citations as a single law token, and LawBase models, which treat them as multi-token compounds. The results show that LawToken models outperform LawBase models on legal citation tasks, primarily due to fewer errors in the article numbering components. Further model representation analysis reveals that, while both models achieve comparable semantic representation quality, the multi-token-based LawBase suffers from degraded representations in multistep decoding, leading to more errors. Taken together, these findings suggest that form-meaning pairing can operate in a larger context, and this larger unit may offer advantages in future modeling of legal reasoning. In practice, this approach can significantly reduce the likelihood of hallucinations by anchoring legal citations as discrete, holistic tokens, thereby minimizing the risk of generating nonexistent or incorrect legal references. 2025.conll-1.3 @@ -79,7 +79,7 @@ IneGevers VictorDe MarezUniversiteit Antwerpen LunaDe BruyneUniversiteit Antwerpen - WalterDaelemansUniversity of Antwerp + WalterDaelemansUniversity of Antwerp 68-80 In this study, we take a closer look at how Winograd schema challenges can be used to evaluate common sense reasoning in LLMs. Specifically, we evaluate generative models of different sizes on the popular WinoGrande benchmark. We release WinoWhat, a new corpus, in which each instance of the WinoGrande validation set is paraphrased. Additionally, we evaluate the performance on the challenge across five common sense knowledge categories, giving more fine-grained insights on what types of knowledge are more challenging for LLMs. Surprisingly, all models perform significantly worse on WinoWhat, implying that LLM reasoning capabilities are overestimated on WinoGrande. To verify whether this is an effect of benchmark memorization, we match benchmark instances to LLM trainingdata and create two test-suites. We observe that memorization has a minimal effect on model performance on WinoGrande. 2025.conll-1.5 @@ -94,7 +94,7 @@ Thi-NhungNguyenVinAI Research HoangNgoVinAI Research DinhPhungMonash University - Thuy-TrangVuMonash University + Thuy-TrangVuMonash University Dat QuocNguyenQualcomm AI Research 81-92 Table understanding is key to addressing challenging downstream tasks such as table-based question answering and fact verification. Recent works have focused on leveraging Chain-of-Thought and question decomposition to solve complex questions requiring multiple operations on tables. However, these methods often suffer from a lack of explicit long-term planning and weak inter-step connections, leading to miss constraints within questions. In this paper, we propose leveraging the long-term planning capabilities of large language models (LLMs) to enhance table understanding. Our approach enables the execution of a long-term plan, where the steps are tightly interconnected and serve the ultimate goal, an aspect that methods based on Chain-of-Thought and question decomposition lack. In addition, our method effectively minimizes the inclusion of unnecessary details in the process of solving the next short-term goals, a limitation of methods based on Chain-of-Thought. Extensive experiments demonstrate that our method outperforms strong baselines and achieves state-of-the-art performance on WikiTableQuestions and TabFact datasets. @@ -139,7 +139,7 @@ Experiential Semantic Information and Brain Alignment: Are Multimodal Models Better than Language Models? AnnaBavarescoUniversity of Amsterdam - RaquelFernándezUniversity of Amsterdam and University of Amsterdam + RaquelFernándezUniversity of Amsterdam and University of Amsterdam 141-155 A common assumption in Computational Linguistics is that text representations learnt by multimodal models are richer and more human-like than those by language-only models, as they are grounded in images or audio—similar to how human language is grounded in real-world experiences. However, empirical studies checking whether this is true are largely lacking. We address this gap by comparing word representations from contrastive multimodal models vs. language-only ones in the extent to which they capture experiential information—as defined by an existing norm-based ‘experiential model’—and align with human fMRI responses. Our results indicate that, surprisingly, language-only models are superior to multimodal ones in both respects. Additionally, they learn more unique brain-relevant semantic information beyond that shared with the experiential model. Overall, our study highlights the need to develop computational models that better integrate the complementary semantic information provided by multimodal data sources. 2025.conll-1.10 @@ -161,7 +161,7 @@ Do Construction Distributions Shape Formal Language Learning In <fixed-case>G</fixed-case>erman <fixed-case>B</fixed-case>aby<fixed-case>LM</fixed-case>s? BastianBunzeckUniversität Bielefeld DanielDuranUniversität Bielefeld - SinaZarrießBielefeld University + SinaZarrießBielefeld University 169-186 We analyze the influence of utterance-level construction distributions in German child-directed/child-available speech on the resulting word-level, syntactic and semantic competence (and their underlying learning trajectories) in small LMs, which we train on a novel collection of developmentally plausible language data for German. We find that trajectories are surprisingly robust for markedly different distributions of constructions in the training data, which have little effect on final accuracies and almost no effect on global learning trajectories. While syntax learning benefits from more complex utterances, word-level learning culminates in better scores with more fragmentary utterances. We argue that LMs trained on developmentally plausible data can contribute to debates on how conducive different kinds of linguistic stimuli are to language learning. 2025.conll-1.12 @@ -192,8 +192,8 @@ Components of Creativity: Language Model-based Predictors for Clustering and Switching in Verbal Fluency - SinaZarrießBielefeld University - SimeonJunkerUniversität Bielefeld + SinaZarrießBielefeld University + SimeonJunkerUniversität Bielefeld JudithSiekerUniversität Bielefeld ÖzgeAlacamBielefeld University 216-232 @@ -252,7 +252,7 @@ Polarity inversion operators in <fixed-case>PLM</fixed-case> DavidKletz PascalAmsiliSorbonne Nouvelle (Paris 3) - MarieCanditoUniversité Paris Cité + MarieCanditoUniversité Paris Cité 312-322 From a linguistic perspective, negation is a unique and inherently compositional operator. In this study, we investigate whether the bert-large-cased Pretrained Language Model (PLM) properly encodes this compositional aspect of negation when embedding a token that falls within the scope of negation.To explore this, we train two external Multi-Layer Perceptrons to modify contextual embeddings in a controlled manner. The goal is to reverse the polarity information encoded in the embedding while preserving all other token-related information. The first MLP, called the Negator, transforms a negative polarity into a positive one, while the second, the Affirmator, performs the reverse transformation.We then conduct a series of evaluations to assess the effectiveness of these operators. Our results indicate that while the Negator/Affirmator is functional, it only partially simulates the negation operator. Specifically, applying it recursively does not allow us to recover the original polarity, suggesting an incomplete representation of negation within the PLM’s embeddings.In addition, a downstream evaluation on the Negated LAMA dataset reveals that the modifications introduced by the Negator/Affirmator lead to a slight improvement in the model’s ability to account for negation in its predictions. However, applying the Negator/Affirmator recursively results in degraded representations, further reinforcing the idea that negation is not fully compositional within PLM embeddings. 2025.conll-1.20 @@ -265,7 +265,7 @@ KennethLaiBrandeis University and Mass General Brigham AbhijnanNath NikhilKrishnaswamyColorado State University - JamesPustejovskyBrandeis University + JamesPustejovskyBrandeis University 323-333 Recent developments in aligning Large Language Models (LLMs) with human preferences have significantly enhanced their utility in human-AI collaborative scenarios. However, such approaches often neglect the critical role of “epistemic friction,” or the inherent resistance encountered when updating beliefs in response to new, conflicting, or ambiguous information. In this paper, we define *dynamic epistemic friction* as the resistance to epistemic integration, characterized by the misalignment between an agent’s current belief state and new propositions supported by external evidence. We position this within the framework of Dynamic Epistemic Logic, where friction emerges as nontrivial belief-revision during the interaction. We then present analyses from a situated collaborative task that demonstrate how this model of epistemic friction can effectively predict belief updates in dialogues, and we subsequently discuss how the model of belief alignment as a measure of epistemic resistance or friction can naturally be made more sophisticated to accommodate the complexities of real-world dialogue scenarios. 2025.conll-1.21 @@ -366,7 +366,7 @@ Lost in Variation? Evaluating <fixed-case>NLI</fixed-case> Performance in <fixed-case>B</fixed-case>asque and <fixed-case>S</fixed-case>panish Geographical Variants JaioneBengoetxeaUniversidad del País Vasco ItziarGonzalez-DiosUniversidad del País Vasco - RodrigoAgerriUniversity of the Basque Country + RodrigoAgerriUniversity of the Basque Country 452-468 In this paper, we evaluate the capacity of current language technologies to understand Basque and Spanish language varieties. We use Natural Language Inference (NLI) as a pivot task and introduce a novel, manually-curated parallel dataset in Basque and Spanish, along with their respective variants. Our empirical analysis of crosslingual and in-context learning experiments using encoder-only and decoder-based Large Language Models (LLMs) shows a performance drop when handling linguistic variation, especially in Basque. Error analysis suggests that this decline is not due to lexical overlap, but rather to the linguistic variation itself. Further ablation experiments indicate that encoder-only models particularly struggle with Western Basque, which aligns with linguistic theory that identifies peripheral dialects (e.g., Western) as more distant from the standard. All data and code are publicly available. 2025.conll-1.30 @@ -423,7 +423,7 @@ <fixed-case>GCG</fixed-case>-Based Artificial Languages for Evaluating Inductive Biases of Neural Language Models NadineEl-Naggar TatsukiKuribayashiMohamed bin Zayed University of Artificial Intelligence - TedBriscoeMohamed bin Zayed University of Artificial Intelligence + TedBriscoeMohamed bin Zayed University of Artificial Intelligence 540-556 Recent work has investigated whether extant neural language models (LMs) have an inbuilt inductive bias towards the acquisition of attested typologically-frequent grammatical patterns as opposed to infrequent, unattested, or impossible patterns using artificial languages (White and Cotterell, 2021; Kuribayashi et al., 2024). The use of artificial languages facilitates isolation of specific grammatical properties from other factors such as lexical or real-world knowledge, but also risks oversimplification of the problem.In this paper, we examine the use of Generalized Categorial Grammars (GCGs) (Wood, 2014) as a general framework to create artificial languages with a wider range of attested word order patterns, including those where the subject intervenes between verb and object (VSO, OSV) and unbounded dependencies in object relative clauses. In our experiments, we exemplify our approach by extending White and Cotterell (2021) and report some significant differences from existing results. 2025.conll-1.35 diff --git a/data/xml/2025.depling.xml b/data/xml/2025.depling.xml index 19bd4e81f6..929bb8cc4d 100644 --- a/data/xml/2025.depling.xml +++ b/data/xml/2025.depling.xml @@ -3,7 +3,7 @@ Proceedings of the Eighth International Conference on Dependency Linguistics (Depling, SyntaxFest 2025) - EvaHajičová + EvaHajičová SylvainKahane Association for Computational Linguistics
Ljubljana, Slovenia
@@ -42,7 +42,7 @@ LoicDe LangheUniversiteit Gent JasperDegraeuweUniversiteit Gent MelissaFarasynUniversiteit Gent - VeroniqueHosteUniversiteit Gent + VeroniqueHosteUniversiteit Gent 24-35 Dependency parsing of non-normative language varieties remains a challenge for modern NLP. While contemporary parsers excel at standardized languages, dialectal variation – especially in function words, conjunctives, and verb clustering – introduces syntactic ambiguity that disrupts traditional parsing approaches. In this paper, we conduct a quantitative evaluation of syntactic dependencies in Southern Dutch dialects, leveraging a standardized dialect corpus to isolate syntactic effects from lexical variation. Using a neural biaffine dependency parser with various mono- and multilingual transformer-based encoders, we benchmark parsing performance on standard Dutch, dialectal data, and mixed training sets. Our results demonstrate that incorporating dialect-specific data significantly enhances parsing accuracy, yet certain syntactic structures remain difficult to resolve, even with dedicated adaptation. These findings highlight the need for more nuanced parsing strategies and improved syntactic modeling for non-normative language varieties. 2025.depling-1.3 @@ -141,7 +141,7 @@ Periphrastic Verb Forms in <fixed-case>U</fixed-case>niversal <fixed-case>D</fixed-case>ependencies LenkaKrippnerováFaculty of Mathematics and Physics, Charles University Prague - DanielZemanFaculty of Mathematics and Physics, Charles University Prague + DanielZemanFaculty of Mathematics and Physics, Charles University Prague 140-149 We propose a generalization of the morphological annotation in Universal Dependencies (UD) to phrases spanning multiple words, possibly discontinuous. Our focus area is that of periphrastic tenses, voices and other forms, typically consisting of a non-finite content verb combined with one or more auxiliaries; however, the same approach can be applied to other morphosyntactic constructions. We present a software tool that can detect periphrastic verb forms, extract the relevant morphological features from member words and combine them into new, phrase-level annotation. The tool currently detects periphrastic verb forms in 15 Slavic languages that are represented in UD and it is easily adaptable to other constructions and languages. Both the tool and the processed Slavic data are freely available. 2025.depling-1.15 diff --git a/data/xml/2025.dmr.xml b/data/xml/2025.dmr.xml index f0b7c49847..3783779edf 100644 --- a/data/xml/2025.dmr.xml +++ b/data/xml/2025.dmr.xml @@ -20,8 +20,8 @@ Comparing Manual and Automatic <fixed-case>UMR</fixed-case>s for <fixed-case>C</fixed-case>zech and <fixed-case>L</fixed-case>atin JanŠtěpánek - DanielZeman - MarkétaLopatková + DanielZeman + MarkétaLopatková FedericaGamba HanaHledíková 1–12 @@ -40,7 +40,7 @@ Boosting a Semantic Parser Using Treebank Trees Automatically Annotated with Unscoped Logical Forms MilesFrank - LenhartSchubert + LenhartSchubert 19–29 2025.dmr-1.3 frank-schubert-2025-boosting @@ -66,9 +66,9 @@ Representing <fixed-case>ISO</fixed-case>-Annotated Dynamic Information in <fixed-case>UMR</fixed-case> KiyongLee - HarryBunt - JamesPustejovsky - Alex C.Fang + HarryBunt + JamesPustejovsky + Alex C.Fang ChongwonPark 49–58 2025.dmr-1.6 diff --git a/data/xml/2025.dravidianlangtech.xml b/data/xml/2025.dravidianlangtech.xml index ed117c39e2..1861d53d26 100644 --- a/data/xml/2025.dravidianlangtech.xml +++ b/data/xml/2025.dravidianlangtech.xml @@ -5,7 +5,7 @@ Proceedings of the Fifth Workshop on Speech, Vision, and Language Technologies for Dravidian Languages Bharathi RajaChakravarthi RubaPriyadharshini - Anand KumarMadasamy + Anand KumarMadasamy SajeethaThavareesan ElizabethSherly SaranyaRajiakodi @@ -274,7 +274,7 @@ OlgaKolesnikovaInstituto Politécnico Nacional José LuisOropeza GrigoriSidorovInstituto Politécnico Nacional - AlexanderGelbukhInstituto Politécnico Nacional + AlexanderGelbukhInstituto Politécnico Nacional 133-138 The increasing prevalence of AI-generated content, including fake product reviews, poses significant challenges in maintaining authenticity and trust in e-commerce systems. While much work has focused on detecting such reviews in high-resource languages, limited attention has been given to low-resource languages like Malayalam and Tamil. This study aims to address this gap by developing a robust framework to identify AI-generated product reviews in these languages. We explore a BERT-based approach for this task. Our methodology involves fine-tuning a BERT-based model specifically on Malayalam and Tamil datasets. The experiments are conducted using labeled datasets that contain a mix of human-written and AI-generated reviews. Performance is evaluated using the macro F1 score. The results show that the BERT-based model achieved a macro F1 score of 0.6394 for Tamil and 0.8849 for Malayalam. Preliminary results indicate that the BERT-based model performs significantly better for Malayalam than for Tamil in terms of the average Macro F1 score, leveraging its ability to capture the complex linguistic features of these languages. Finally, we open the source code of the implementation in the GitHub repository: AI-Generated-Product-Review-Code 2025.dravidianlangtech-1.22 @@ -731,7 +731,7 @@ <fixed-case>SSNT</fixed-case>rio@<fixed-case>D</fixed-case>ravidian<fixed-case>L</fixed-case>ang<fixed-case>T</fixed-case>ech 2025: Identification of <fixed-case>AI</fixed-case> Generated Content in <fixed-case>D</fixed-case>ravidian Languages using Transformers JBhuvana - MirnalineeT T + MirnalineeT T RohanR DiyaSeshan AvaneeshKoushik @@ -744,7 +744,7 @@ <fixed-case>SSNT</fixed-case>rio@<fixed-case>D</fixed-case>ravidian<fixed-case>L</fixed-case>ang<fixed-case>T</fixed-case>ech 2025: Sentiment Analysis in <fixed-case>D</fixed-case>ravidian Languages using Multilingual <fixed-case>BERT</fixed-case> JBhuvana - MirnalineeT T + MirnalineeT T DiyaSeshan RohanR AvaneeshKoushik @@ -909,7 +909,7 @@ <fixed-case>SSNT</fixed-case>rio@<fixed-case>D</fixed-case>ravidian<fixed-case>L</fixed-case>ang<fixed-case>T</fixed-case>ech2025: <fixed-case>LLM</fixed-case> Based Techniques for Detection of Abusive Text Targeting Women - MirnalineeT T + MirnalineeT T JBhuvana AvaneeshKoushik DiyaSeshan @@ -985,7 +985,7 @@ <fixed-case>SSNT</fixed-case>rio @ <fixed-case>D</fixed-case>ravidian<fixed-case>L</fixed-case>ang<fixed-case>T</fixed-case>ech 2025: Hybrid Approach for Hate Speech Detection in <fixed-case>D</fixed-case>ravidian Languages with Text and Audio Modalities JBhuvana - MirnalineeT T + MirnalineeT T RohanR DiyaSeshan AvaneeshKoushik diff --git a/data/xml/2025.evalmg.xml b/data/xml/2025.evalmg.xml index 43c7635b42..8902cb34cc 100644 --- a/data/xml/2025.evalmg.xml +++ b/data/xml/2025.evalmg.xml @@ -4,7 +4,7 @@ Proceedings of the First Workshop of Evaluation of Multi-Modal Generation Wei EmmaZhang - XiangDai + XiangDai DesmondElliot ByronFang MongyuanSim diff --git a/data/xml/2025.fever.xml b/data/xml/2025.fever.xml index 39514c0c0e..12d808f0c1 100644 --- a/data/xml/2025.fever.xml +++ b/data/xml/2025.fever.xml @@ -9,7 +9,7 @@ OanaCocarascu ZhijiangGuo ArpitMittal - MichaelSchlichtkrull + MichaelSchlichtkrull JamesThorne AndreasVlachos Association for Computational Linguistics @@ -137,8 +137,8 @@ PengfeiYuAmazon ChiHan Yi R.FungHong Kong University of Science and Technology - KathleenMcKeown - ChengXiangZhaiUniversity of Illinois, Urbana Champaign + KathleenMcKeown + ChengXiangZhaiUniversity of Illinois, Urbana Champaign ManlingLiNorthwestern University HengJiUniversity of Illinois, Urbana-Champaign 132-150 @@ -243,7 +243,7 @@ AsimAbbas MubashirAliUniversity of Birmingham YueFengUniversity of Birmingham - Mark G.Lee + Mark G.Lee VenelinKovatchevUniversity of Birmingham 238-246 In this paper, we present the system proposed by our team OldJoe, for the 8th edition of the AVeriTeC shared task, as part of the FEVER workshop. The objective of this task is to verify the factuality of real-world claims. Our approach integrates open source large language models, SQL, and in-context learning. We begin with embedding the knowledge store using a pretrained embedding language model then storing the outputs in a SQL database. Subsequently, we prompt an LLM to craft relevant questions based on the input claim, which are then used to guide the retrieval process. We further prompt the LLM to generate answers to the questions and predict the veracity of the original claim. Our system scored 0.49 on the HU-METEOR AVeriTeC score on the dev set and 0.15 on the Ev2R recall on the test set. Due to the time constraint we were unable to conduct additional experiments or further hyperparameter tuning. As a result, we adopted this pipeline configuration centered on the Qwen3-14B-AWQ model as our final submission strategy. The full pipeline is available on GitHub: https://github.com/farahft/OldJoe diff --git a/data/xml/2025.fieldmatters.xml b/data/xml/2025.fieldmatters.xml index 9fefcf2b0f..8e06629fc6 100644 --- a/data/xml/2025.fieldmatters.xml +++ b/data/xml/2025.fieldmatters.xml @@ -27,7 +27,7 @@ Automatic Phone Alignment of Code-switched <fixed-case>U</fixed-case>rum–<fixed-case>R</fixed-case>ussian Field Data EmilyAhn EleanorChodroffUniversity of Zurich - Gina-AnneLevowUniversity of Washington + Gina-AnneLevowUniversity of Washington 1-14 Code-switching, using multiple languages in a single utterance, is a common means of communication.In the language documentation process, speakers may code-switch between the target language and a language of broader communication; however, how to handle this mixed speech data is not always clearly addressed for speech research and specifically for a corpus phonetics pipeline.This paper investigates best practices for conducting phone-level forced alignment of code-switched field data using the Urum speech dataset from DoReCo. This dataset comprises 117 minutes of narrative utterances, of which 42% contain code-switched Urum–Russian speech.We demonstrate that the inclusion of Russian speech and Russian pretrained acoustic models can aid the alignment of Urum phones.Beyond using boundary alignment precision and accuracy metrics, we also discovered that the method of acoustic modeling impacted a downstream corpus phonetics investigation of code-switched Urum–Russian. 2025.fieldmatters-1.1 @@ -38,7 +38,7 @@ MariaKhelli SamuelCahyawijayaCohere AyuPurwariantiInstitut Teknologi Bandung - Genta IndraWinataCapital One + Genta IndraWinataCapital One 15-25 Cross-lingual transfer in natural language processing (NLP) models enhances multilingual performance by leveraging shared linguistic knowledge. However, traditional methods that process all data simultaneously often fail to mimic real-world scenarios, leading to challenges like catastrophic forgetting, where fine-tuning on new tasks degrades performance on previously learned ones. Our study explores this issue in multilingual contexts, focusing on linguistic differences affecting representational learning rather than just model parameters. We experiment with 52 languages using LoRA adapters of varying ranks to evaluate non-shared, partially shared, and fully shared parameters. Our aim is to see if parameter sharing through adapters can mitigate forgetting while preserving prior knowledge. We find that languages using non-Latin scripts are more susceptible to catastrophic forgetting, whereas those written in Latin script facilitate more effective cross-lingual transfer. 2025.fieldmatters-1.2 @@ -47,7 +47,7 @@ Breaking the Transcription Bottleneck: Fine-tuning <fixed-case>ASR</fixed-case> Models for Extremely Low-Resource Fieldwork Languages SiyuLiang - Gina-AnneLevowUniversity of Washington + Gina-AnneLevowUniversity of Washington 26-37 The development of Automatic Speech Recognition (ASR) has yielded impressive results, but its use in linguistic fieldwork remains limited. Recordings collected in fieldwork contexts present unique challenges, including spontaneous speech, environmental noise, and severely constrained datasets from under-documented languages. In this paper, we benchmark the performance of two fine-tuned multilingual ASR models, MMS and XLS-R, on five typologically diverse low-resource languages with control of training data duration. Our findings show that MMS is best suited when extremely small amounts of training data are available, whereas XLS-R shows parity performance once training data exceed one hour. We provide linguistically grounded analysis for further provide insights towards practical guidelines for field linguists, highlighting reproducible ASR adaptation approaches to mitigate the transcription bottleneck in language documentation. 2025.fieldmatters-1.3 @@ -79,7 +79,7 @@ A Practical Tool to Help Automate Interlinear Glossing: a Study on Mukrī <fixed-case>K</fixed-case>urdish HiwaAsadpourJohann Wolfgang Goethe Universität Frankfurt am Main ShuOkabeTechnische Universität München - AlexanderFraserTechnical University of Munich + AlexanderFraserTechnical University of Munich 65-75 Interlinear gloss generation aims to predict linguistic annotations (gloss) for a sentence in a language that is usually under ongoing documentation. Such output is a first draft for the linguist to work with and should reduce the manual workload.This article studies a simple glossing pipeline based on a Conditional Random Field and applies it to a small fieldwork corpus in Mukrī Kurdish, a variety of Central Kurdish.We mainly focus on making the tool as accessible as possible for field linguists, so it can run on standard computers without the need for GPUs. Our pipeline predicts common grammatical patterns robustly and, more generally, frequent combinations of morphemes and glosses. Although more advanced neural models do reach better results, our feature-based system still manages to be competitive and to provide interpretability.To foster further collaboration between field linguistics and NLP, we also provide some recommendations regarding documentation endeavours and release our pipeline code alongside. 2025.fieldmatters-1.6 diff --git a/data/xml/2025.findings.xml b/data/xml/2025.findings.xml index 74542969d8..5bd69166e8 100644 --- a/data/xml/2025.findings.xml +++ b/data/xml/2025.findings.xml @@ -22,7 +22,7 @@ From Lazy to Prolific: Tackling Missing Labels in Open Vocabulary Extreme Classification by Positive-Unlabeled Sequence Learning - Ranran HaoranZhang + Ranran HaoranZhang BensuUçareBay Inc. SoumikDeyeBay Inc. HansiWueBay Inc. @@ -101,7 +101,7 @@ <fixed-case>L</fixed-case>aw<fixed-case>I</fixed-case>nstruct: A Resource for Studying Language Model Adaptation to the Legal Domain JoelNiklausHarvey LuciaZhengStanford University - Arya D.McCarthyScaled Cognition + Arya D.McCarthyScaled Cognition ChristopherHahnX, the moonshot factory Brian MRosenGoogle PeterHendersonPrinceton University @@ -120,7 +120,7 @@ HaoYang HongyuanLuThe Chinese University of Hong Kong XinhuaZeng - YangLiu + YangLiu XiangZhangfacemind HaoranYang YumengZhang @@ -284,7 +284,7 @@ <fixed-case>PEMV</fixed-case>: Improving Spatial Distribution for Emotion Recognition in Conversations Using Proximal Emotion Mean Vectors ChenLin FeiLiWuhan University - DonghongJi + DonghongJi ChongTeng 345-357 Emotion Recognition in Conversation (ERC) aims to identify the emotions expressed in each utterance within a dialogue. Existing research primarily focuses on the analysis of contextual structure in dialogue and the interactions between different emotions. Nonetheless, ERC datasets often contain difficult-to-classify samples and suffer from imbalanced label distributions, which pose challenges to the spatial distribution of dialogue features. To tackle this issue, we propose a method that generates Proximal Emotion Mean Vectors (PEMV) based on emotion feature queues to optimize the spatial representation of text features. We design a Center Loss based on PEMVs to pull hard-to-classify samples closer to their respective category centers and employ Angle Loss to maximize the angular separation between different PEMVs. Furthermore, we utilize PEMV as a classifier to better adapt to the spatial structure of dialogue features. Extensive experiments on three widely used benchmark datasets demonstrate that our method achieves state-of-the-art performance and validates its effectiveness in optimizing feature space representations. @@ -353,7 +353,7 @@ XiaodanLiangSUN YAT-SEN UNIVERSITY HongChengThe Chinese University of Hong Kong QinglinLu - WeiLiuTencent + WeiLiuTencent 411-426 Text-to-image (T2I) generation models have significantly advanced in recent years. However, effective interaction with these models is challenging for average users due to the need for specialized prompt engineering knowledge and the inability to perform multi-turn image generation, hindering a dynamic and iterative creation process. Recent attempts have tried to equip Multi-modal Large Language Models (MLLMs) with T2I models to bring the user’s natural language instructions into reality. Hence, the output modality of MLLMs is extended, and the multi-turn generation quality of T2I models is enhanced thanks to the strong multi-modal comprehension ability of MLLMs. However, many of these works face challenges in identifying correct output modalities and generating coherent images accordingly as the number of output modalities increases and the conversations go deeper. Therefore, we propose DialogGen, an effective pipeline to align off-the-shelf MLLMs and T2I models to build a Multi-modal Interactive Dialogue System (MIDS) for multi-turn Text-to-Image generation. It is composed of drawing prompt alignment, careful training data curation, and error correction. Moreover, as the field of MIDS flourishes, comprehensive benchmarks are urgently needed to evaluate MIDS fairly in terms of output modality correctness and multi-modal output coherence. To address this issue, we introduce the Multi-modal Dialogue Benchmark (DialogBen), a comprehensive bilingual benchmark designed to assess the ability of MLLMs to generate accurate and coherent multi-modal content that supports image editing. It contains two evaluation metrics to measure the model’s ability to switch modalities and the coherence of the output images. Our extensive experiments on DialogBen and user study demonstrate the effectiveness of DialogGen in producing correct output modalities and coherent multi-modal outputs compared with other State-of-the-Art models. We hope that DialogBen can contribute to the community for building more powerful MIDS. 2025.findings-naacl.25 @@ -455,7 +455,7 @@ Teaching Large Language Models Number-Focused Headline Generation With Key Element Rationales ZhenQian - XiuzhenZhangRoyal Melbourne Institute of Technology + XiuzhenZhangRoyal Melbourne Institute of Technology XiaofeiXu FengXiaRoyal Melbourne Institute of Technology 533-550 @@ -467,7 +467,7 @@ Zero-Shot Strategies for Length-Controllable Summarization FabianRetkowskiKarlsruher Institut für Technologie - AlexanderWaibel + AlexanderWaibel 551-572 Large language models (LLMs) struggle with precise length control, particularly in zero-shot settings. We conduct a comprehensive study evaluating LLMs’ length control capabilities across multiple measures and propose practical methods to improve controllability. Our experiments with LLaMA 3 reveal stark differences in length adherence across measures and highlight inherent biases of the model. To address these challenges, we introduce a set of methods: length approximation, target adjustment, sample filtering, and automated revisions. By combining these methods, we demonstrate substantial improvements in length compliance while maintaining or enhancing summary quality, providing highly effective zero-shot strategies for precise length control without the need for model fine-tuning or architectural changes. With our work, we not only advance our understanding of LLM behavior in controlled text generation but also pave the way for more reliable and adaptable summarization systems in real-world applications. 2025.findings-naacl.34 @@ -595,7 +595,7 @@ Tethering Broken Themes: Aligning Neural Topic Models with Labels and Authors MayankNagda - PhilOstheimerRPTU Kaiserslautern-Landau + PhilOstheimerRPTU Kaiserslautern-Landau SophieFellenzUniversität Kaiserslautern 740-760 Topic models are a popular approach for extracting semantic information from large document collections. However, recent studies suggest that the topics generated by these models often do not align well with human intentions. Although metadata such as labels and authorship information are available, it has not yet been effectively incorporated into neural topic models. To address this gap, we introduce FANToM, a novel method to align neural topic models with both labels and authorship information. FANToM allows for the inclusion of this metadata when available, producing interpretable topics and author distributions for each topic. Our approach demonstrates greater expressiveness than conventional topic models by learning the alignment between labels, topics, and authors. Experimental results show that FANToM improves existing models in terms of both topic quality and alignment. Additionally, it identifies author interests and similarities. @@ -607,7 +607,7 @@ Towards Zero-Shot Multimodal Machine Translation MatthieuFuteral CordeliaSchmidGoogle, INRIA and Inria - BenoîtSagotInria + BenoîtSagotInria RachelBawdenInria 761-778 Current multimodal machine translation (MMT) systems rely on fully supervised data (i.e sentences with their translations and accompanying images), which is costly to collect and prevents the extension of MMT to language pairs with no such data. We propose a method to bypass the need for fully supervised data to train MMT systems, using multimodal English data only. Our method ( ZeroMMT) consists in adapting a strong text-only machine translation (MT) model by training it jointly on two objectives: visually conditioned masked language modelling and the Kullback-Leibler divergence between the original MT and new MMT outputs. We evaluate on standard MMT benchmarks and on CoMMuTE, a contrastive test set designed to evaluate how well models use images to disambiguate translations. ZeroMMT obtains disambiguation results close to state-of-the-art MMT models trained on fully supervised examples. To prove that ZeroMMT generalizes to languages with no fully supervised training data, we extend CoMMuTE to three new languages: Arabic, Russian and Chinese. We also show that we can control the trade-off between disambiguation capabilities and translation fidelity at inference time using classifier-free guidance and without any additional data. Our code, data and trained models are publicly accessible. @@ -617,7 +617,7 @@ Large-Scale Corpus Construction and Retrieval-Augmented Generation for <fixed-case>A</fixed-case>ncient <fixed-case>C</fixed-case>hinese Poetry: New Method and Data Insights - YangLiuSouth China University of Technology + YangLiuSouth China University of Technology LanLan JiahuanCao HiuyiCheng @@ -646,7 +646,7 @@ Dialetto, ma Quanto Dialetto? Transcribing and Evaluating Dialects on a Continuum Ryan Soh-EunShimLudwig-Maximilians-Universität München, University of Stuttgart, Universität Stuttgart and Institute for Natural Language Processing, University of Stuttgart - BarbaraPlankLudwig-Maximilians-Universität München + BarbaraPlankLudwig-Maximilians-Universität München 838-849 There is increasing interest in looking at dialects in NLP. However, most work to date still treats dialects as discrete categories. For instance, evaluative work in variation-oriented NLP for English often works with Indian English or African-American Venacular English as homogeneous categories, yet even within one variety there is substantial variation. We examine within-dialect variation and show that performance critically varies within categories. We measure speech-to-text performance on Italian dialects, and empirically observe a geographical performance disparity. This disparity correlates substantially (-0.5) with linguistic similarity to the highest performing dialect variety. We cross-examine our results against dialectometry methods, and interpret the performance disparity to be due to a bias towards dialects that are more similar to the standard variety in the speech-to-text model examined. We additionally leverage geostatistical methods to predict zero-shot performance at unseen sites, and find the incorporation of geographical information to substantially improve prediction performance, indicating there to be geographical structure in the performance distribution. 2025.findings-naacl.48 @@ -680,7 +680,7 @@ <fixed-case>LMM</fixed-case>s-Eval: Reality Check on the Evaluation of Large Multimodal Models KaichenZhang - BoLi + BoLi PeiyuanZhangUniversity of California, San Diego FanyiPu Joshua AdrianCahyono @@ -778,7 +778,7 @@ JinchaoZhang LixiangfangLixiangfang LichuanrongLichuanrong - BoLiInstitute of Information Engineering, Chinese Academy of Sciences + BoLiInstitute of Information Engineering, Chinese Academy of Sciences 1033-1044 Large language models (LLMs) exhibit exceptional performance across a wide range of natural language processing tasks, often relying on lengthy prompts to harness their full capabilities. However, extended prompts can lead to substantial computational overhead and increased hardware demands, limiting the scalability and efficiency of such models. In this paper, we propose DisComp, a two-stage prompt compression framework based on knowledge distillation that combines task-agnostic and task-aware strategies, designed to efficiently compress prompt length without compromising performance.In the first stage, task-agnostic compression is achieved through knowledge distillation, transferring the summarization capabilities of a LLM to a smaller, more efficient model. The distillation process combines cross-entropy loss and keyword matching loss to ensure the smaller model generates concise and informative summaries. In the second stage, sentence-level pruning is applied, where sentences are ranked by relevance to the query, and irrelevant sentences are pruned to retain only task-critical information. We evaluate our method on three benchmark datasets, LongBench , ZeroSCROLLS and NaturalQuestions. The results show that DisComp significantly outperforms previous task-agnostic and task-specific compression approaches, and it is up to 6.56× faster at inference compared to the best token-level compression method. 2025.findings-naacl.58 @@ -799,7 +799,7 @@ <fixed-case>RAMQA</fixed-case>: A Unified Framework for Retrieval-Augmented Multi-Modal Question Answering YangBaiFacebook ChristanGrantUniversity of Florida - Daisy ZheWangUniversity of Florida + Daisy ZheWangUniversity of Florida 1061-1076 Multi-modal retrieval-augmented Question Answering (MRAQA), integrating text and images, has gained significant attention in information retrieval (IR) and natural language processing (NLP). Traditional ranking methods rely on small encoder-based language models, which are incompatible with modern decoder-based generative large language models (LLMs) that have advanced various NLP tasks. To bridge this gap, we propose RAMQA, a unified framework combining learning-to-rank methods with generative permutation-enhanced ranking techniques. We first train a pointwise multi-modal ranker using LLaVA as the backbone. Then, we apply instruction tuning to train a LLaMA model for re-ranking the top-k documents using an innovative autoregressive multi-task learning approach. Our generative ranking model generates re-ranked document IDs and specific answers from document candidates in various permutations. Experiments on two MRAQA benchmarks, WebQA and MultiModalQA, show significant improvements over strong baselines, highlighting the effectiveness of our approach. Data and code will be made public once the paper is accepted. 2025.findings-naacl.60 @@ -834,7 +834,7 @@ ZhihaoYangDalian University of Technology LingLuoDalian University of Technology HongfeiLin - JianWang + JianWang 1112-1128 Few-Shot Document-Level Relation Extraction (FSDLRE) aims to develop models capable of generalizing to new categories with minimal support examples. Although Large Language Models (LLMs) demonstrate exceptional In-Context Learning (ICL) capabilities on many few-shot tasks, their performance on FSDLRE tasks remains suboptimal due to the significant gap between the task format and the intrinsic capabilities of language models, coupled with the complexity of ICL prompts for document-level text. To address these challenges, we introduce a novel meta-training approach for LLMs termed Prototype Tuning. We construct simulated episodes using data with relation types that do not overlap with the test corpus, fundamentally enhancing the ICL capabilities of LLMs in FSDLRE through meta-learning. To further enhance the effects of meta-learning, we innovatively integrate the concept of prototype into the fine-tuning process of LLMs. This involves aggregating entity pairs from support documents into prototypes within the prompts and altering the way of determining relation categories to identifying the closest prototype. Experimental results demonstrate that our LLMs trained with this approach outperform all baselines. Our proposed approach markedly improves the ICL capabilities of LLMs in FSDLRE and mitigates the impact of relation semantic discrepancies between the training corpus and the test corpus on model performance. 2025.findings-naacl.62 @@ -917,7 +917,7 @@ In-Context Example Selection via Similarity Search Improves Low-Resource Machine Translation Armel RandyZebazeINRIA - BenoîtSagotInria + BenoîtSagotInria RachelBawdenInria 1222-1252 The ability of generative large language models (LLMs) to perform in-context learning has given rise to a large body of research into how best to prompt models for various natural language processing tasks. In this paper, we focus on machine translation (MT), a task that has been shown to benefit from in-context translation examples. However no systematic studies have been published on how best to select examples, and mixed results have been reported on the usefulness of similarity-based selection over random selection, although these results have mainly been shown for high-resource languages only. We provide a study covering multiple LLMs and in-context example retrieval strategies. Contrarily to previously published results, we find that retrieval based on sentence embedding similarity can improve MT, especially for low-resource language directions, and we also discuss the balance between selection pool diversity and quality. Code and outputs will be made freely available. @@ -1011,7 +1011,7 @@ TerryRuasGeorg-August Universität Göttingen AndreGreiner-PetterGeorg-August Universität Göttingen BelaGippGeorg-August Universität Göttingen - AkikoAizawaNational Institute of Informatics + AkikoAizawaNational Institute of Informatics TimoSpinde 1370-1386 High annotation costs from hiring or crowdsourcing complicate the creation of large, high-quality datasets needed for training reliable text classifiers. Recent research suggests using Large Language Models (LLMs) to automate the annotation process, reducing these costs while maintaining data quality. LLMs have shown promising results in annotating downstream tasks like hate speech detection and political framing. Building on the success in these areas, this study investigates whether LLMs are viable for annotating a complex task of media bias detection and whether a downstream media bias classifier can be trained on such data. We create Annolexical, the first large-scale dataset for media bias classification with over 48k synthetically annotated examples. Our classifier fine-tuned on it surpasses all of the annotator LLMs by 5-9% in Mathew’s Correlation Coefficient (MCC) and performs close to or outperforms the model trained on human-labeled data when evaluated on two media bias benchmark datasets (BABE and BASIL). This study demonstrates how our approach significantly reduces the cost of dataset creation in the media bias domain and, by extension - the development of the classifiers, while our subsequent behavioral stress-testing reveals some of its current limitations and trade-offs. @@ -1049,7 +1049,7 @@ PeinanZhangCyberAgent AI Lab HidetakaKamigaitoNara Institute of Science and Technology HiroyaTakamuraAIST, National Institute of Advanced Industrial Science and Technology - ManabuOkumuraInstitute of Science Tokyo and Tokyo Institute of Technology, Tokyo Institute of Technology + ManabuOkumuraInstitute of Science Tokyo and Tokyo Institute of Technology, Tokyo Institute of Technology 1426-1439 Effective linguistic choices that attract potential customers play crucial roles in advertising success. This study aims to explore the linguistic features of ad texts that influence human preferences. Although the creation of attractive ad texts is an active area of research, progress in understanding the specific linguistic features that affect attractiveness is hindered by several obstacles. First, human preferences are complex and influenced by multiple factors, including their content, such as brand names, and their linguistic styles, making analysis challenging. Second, publicly available ad text datasets that include human preferences are lacking, such as ad performance metrics and human feedback, which reflect people’s interests. To address these problems, we present AdParaphrase, a paraphrase dataset that contains human preferences for pairs of ad texts that are semantically equivalent but differ in terms of wording and style. This dataset allows for preference analysis that focuses on the differences in linguistic features. Our analysis revealed that ad texts preferred by human judges have higher fluency, longer length, more nouns, and use of bracket symbols. Furthermore, we demonstrate that an ad text-generation model that considers these findings significantly improves the attractiveness of a given text. The dataset is publicly available at: https://github.com/CyberAgentAILab/AdParaphrase. 2025.findings-naacl.78 @@ -1114,7 +1114,7 @@ MoritzPlenzInstitute for Computational Linguistics, Heidelberg University, Ruprecht-Karls-Universität Heidelberg PhilippHeinisch JanoschGehring - PhilippCimianoBielefeld University and Bielefeld University + PhilippCimianoBielefeld University and Bielefeld University AnetteFrankRuprecht-Karls-Universität Heidelberg 1525-1553 Debating over conflicting issues is a necessary first step towards resolving conflicts. However, intrinsic perspectives of an arguer are difficult to overcome by persuasive argumentation skills. Proceeding from a debate to a deliberative process, where we can identify actionable options for resolving a conflict requires a deeper analysis of arguments and the perspectives they are grounded in - as it is only from there that one can derive mutually agreeable resolution steps. In this work we develop a framework for a deliberative analysis of arguments in a computational argumentation setup. We conduct a fine-grained analysis of perspectivized stances expressed in the arguments of different arguers or stakeholders on a given issue, aiming not only to identify their opposing views, but also shared perspectives arising from their attitudes, values or needs. We formalize this analysis in Perspectivized Stance Vectors that characterize the individual perspectivized stances of all arguers on a given issue. We construct these vectors by determining issue- and argument-specific concepts, and predict an arguer’s stance relative to each of them. The vectors allow us to measure a modulated (dis)agreement between arguers, structured by perspectives, which allows us to identify actionable points for conflict resolution, as a first step towards deliberation. @@ -1141,7 +1141,7 @@ Does Generative <fixed-case>AI</fixed-case> speak <fixed-case>N</fixed-case>igerian-<fixed-case>P</fixed-case>idgin?: Issues about Representativeness and Bias for Multilingualism in <fixed-case>LLM</fixed-case>s - David IfeoluwaAdelaniMcGill University + David IfeoluwaAdelaniMcGill University A. SezaDoğruözGhent University IyanuoluwaShodeBloomberg AnuoluwapoAremu @@ -1165,7 +1165,7 @@ Decoding Dark Matter: Specialized Sparse Autoencoders for Interpreting Rare Concepts in Foundation Models AashiqMuhamed - Mona T.DiabCarnegie Mellon University + Mona T.DiabCarnegie Mellon University VirginiaSmithCarnegie Mellon University 1604-1635 Understanding and mitigating the potential risks associated with foundation models (FMs) hinges on developing effective interpretability methods. Sparse Autoencoders (SAEs) have emerged as a promising tool for disentangling FM representations, but they struggle to capture rare, yet crucial concepts in the data. We introduce Specialized Sparse Autoencoders (SSAEs), designed to illuminate these elusive dark matter features by focusing on specific subdomains. We present a practical recipe for training SSAEs, demonstrating the efficacy of dense retrieval for data selection and the benefits of Tilted Empirical Risk Minimization as a training objective to improve concept recall. Our evaluation of SSAEs on standard metrics, such as downstream perplexity and L_0 sparsity, show that they effectively capture subdomain tail concepts, exceeding the capabilities of general-purpose SAEs. We showcase the practical utility of SSAEs in a case study on the Bias in Bios dataset, where SSAEs achieve a 12.5% increase in worst-group classification accuracy over the pretrained general-purpose SAE when applied to remove spurious gender information. SSAEs provide a powerful new lens for peering into the inner workings of FMs in subdomains. @@ -1177,7 +1177,7 @@ <fixed-case>MA</fixed-case>i<fixed-case>DE</fixed-case>-up: Multilingual Deception Detection of <fixed-case>AI</fixed-case>-generated Hotel Reviews OanaIgnatSanta Clara University XiaomengXu - RadaMihalceaUniversity of Michigan + RadaMihalceaUniversity of Michigan 1636-1653 Deceptive reviews are becoming increasingly common, especially given the increase in performance and the prevalence of LLMs. While work to date has addressed the development of models to differentiate between truthful and deceptive human reviews, much less is known about the distinction between real reviews and AI-authored fake reviews. Moreover, most of the research so far has focused primarily on English, with very little work dedicated to other languages. In this paper, we compile and make publicly available the MAiDE-up dataset, consisting of 10,000 real and 10,000 AI-generated fake hotel reviews, balanced across ten languages. Using this dataset, we conduct extensive linguistic analyses to (1) compare the AI fake hotel reviews to real hotel reviews, and (2) identify the factors that influence the deception detection model performance. We explore the effectiveness of several models for deception detection in hotel reviews across three main dimensions: sentiment, location, and language. We find that these dimensions influence how well we can detect AI-generated fake reviews. 2025.findings-naacl.88 @@ -1224,7 +1224,7 @@ AdilSoubkiState University of New York at Stony Brook JohnMurzaku, State University of New York at Stony Brook PeterZengState University of New York at Stony Brook - OwenRambowStony Brook University + OwenRambowStony Brook University 1701-1708 The NLP community has broadly focused on text-only approaches of cognitive state tasks, but audio can provide vital missing cues through prosody. We posit that text-to-speech models learn to track aspects of cognitive state in order to produce naturalistic audio, and that the signal audio models implicitly identify is orthogonal to the information that language models exploit. We present Synthetic Audio Data fine-tuning (SAD), a framework where we show that 7 tasks related to cognitive state modeling benefit from multimodal training on both text and zero-shot synthetic audio data from an off-the-shelf TTS system. We show an improvement over the text-only modality when adding synthetic audio data to text-only corpora. Furthermore, on tasks and corpora that do contain gold audio, we show our SAD framework achieves competitive performance with text and synthetic audio compared to text and gold audio. 2025.findings-naacl.92 @@ -1276,13 +1276,13 @@ ValentinaPyatkin JacobMorrison LJMiranda - Bill YuchenLin + Bill YuchenLin KhyathiChandu NouhaDziri SachinKumar TomZick YejinChoi - Noah A.Smith + Noah A.Smith HannanehHajishirzi 1755-1797 Reward models (RMs) are at the crux of successfully using RLHF to align pretrained models to human preferences, yet there has been relatively little study that focuses on evaluation of those models. Evaluating reward models presents an opportunity to understand the opaque technologies used for alignment of language models and which values are embedded in them. Resources for reward model training and understanding are sparse in the nascent open-source community around them. To enhance scientific understanding of reward models, we present RewardBench, a benchmark dataset and code-base for evaluation. The RewardBench dataset is a collection of prompt-chosen-rejected trios spanning chat, reasoning, and safety, to benchmark how reward models perform on challenging, structured and out-of-distribution queries. We create specific comparison datasets for RMs that have subtle, but verifiable reasons (e.g. bugs, incorrect facts) why one answer should be preferred to another. On the RewardBench leaderboard, we evaluate RMs trained with a variety of methods, such as the direct MLE training of classifiers and the implicit reward modeling of Direct Preference Optimization (DPO). We present many findings on propensity for refusals, reasoning limitations, and instruction following shortcomings of various reward models towards a better understanding of the RLHF process. @@ -1408,7 +1408,7 @@ <fixed-case>P</fixed-case>roxy<fixed-case>LM</fixed-case>: Predicting Language Model Performance on Multilingual Tasks via Proxy Models DavidAnugraha - Genta IndraWinataCapital One + Genta IndraWinataCapital One ChenyueLiThe Hong Kong University of Science and Technology Patrick AmadeusIrawan En-Shiun AnnieLee @@ -1470,7 +1470,7 @@ JunzhangLiuColumbia University XudongLinColumbia University ZhecanWangUniversity of California, Los Angeles - Shih-FuChangColumbia University and Columbia University + Shih-FuChangColumbia University and Columbia University 2099-2116 The task of predicting time and location from images is challenging and requires complex human-like puzzle-solving ability over different clues. In this work, we formalize this ability into core skills and implement them using different modules in an expert pipeline called PuzzleGPT. PuzzleGPT consists of a perceiver to identify visual clues, a reasoner to deduce prediction candidates, a combiner to combinatorially combine information from different clues, a web retriever to get external knowledge if the task can’t be solved locally, and a noise filter for robustness. This results in a zero-shot, interpretable, and robust approach that records state-of-the-art performance on two datasets – TARA and WikiTilo. PuzzleGPT outperforms large VLMs such as BLIP-2, InstructBLIP, LLaVA, and even GPT-4V, as well as automatically generated reasoning pipelines like VisProg, by at least 32% and 38%, respectively. It even rivals or surpasses finetuned models. 2025.findings-naacl.111 @@ -1505,7 +1505,7 @@ Is Semantic Chunking Worth the Computational Cost? RenyiQuVectara RuixuanTu - Forrest ShengBaoVectara, Inc. + Forrest ShengBaoVectara, Inc. 2155-2177 Recent advances in Retrieval-Augmented Generation (RAG) systems have popularized semantic chunking, which aims to improve retrieval performance by dividing documents into semantically coherent segments. Despite its growing adoption, the actual benefits over simpler fixed-size chunking, where documents are split into consecutive, fixed-size segments, remain unclear. This study systematically evaluates the effectiveness of semantic chunking using three common retrieval-related tasks: document retrieval, evidence retrieval, and retrieval-based answer generation. The results show that the computational costs associated with semantic chunking are not justified by consistent performance gains. These findings challenge the previous assumptions about semantic chunking and highlight the need for more efficient chunking strategies in RAG systems. 2025.findings-naacl.114 @@ -1541,7 +1541,7 @@ LinAi PengyuanShiColumbia University KaanDonbekciColumbia University - JuliaHirschbergColumbia University + JuliaHirschbergColumbia University 2202-2218 2025.findings-naacl.117 wu-etal-2025-beyond @@ -1785,7 +1785,7 @@ JaesungHwang JingunKwon HidetakaKamigaito - ManabuOkumura + ManabuOkumura 2489-2500 This study investigates retrieval-augmented summarization by specifically examining the impact of exemplar summary lengths because previous methods have not considered length constraints. We propose a Diverse Length-aware Maximal Marginal Relevance (DL-MMR) algorithm to better control summary lengths. This algorithm combines the query relevance with diverse target lengths in retrieval-augmented summarization. Unlike previous methods that necessitate exhaustive exemplar-exemplar relevance comparisons using MMR, DL-MMR considers the exemplar target length as well and avoids comparing exemplars to each other, thereby reducing computational cost and conserving memory during the construction of an exemplar pool. Experimental results showed the effectiveness of DL-MMR, which considers length diversity, compared to the original MMR algorithm. DL-MMR additionally showed the effectiveness in memory saving of 781,513 times and computational cost reduction of 500,092 times, while maintaining the same level of informativeness. 2025.findings-naacl.134 @@ -1801,7 +1801,7 @@ KeZouNational University of Singapore NinghaoLiuUniversity of Georgia Yih ChungThamNational University of Singapore - XiuzhenZhangRoyal Melbourne Institute of Technology + XiuzhenZhangRoyal Melbourne Institute of Technology QingyuChenYale University 2501-2522 The prevalence of vision-threatening eye diseases is a significant global burden, with many cases remaining undiagnosed or diagnosed too late for effective treatment. Large vision-language models (LVLMs) have the potential to assist in understanding anatomical information, diagnosing eye diseases, and drafting interpretations and follow-up plans, thereby reducing the burden on clinicians and improving access to eye care. However, limited benchmarks are available to assess LVLMs’ performance in ophthalmology-specific applications. In this study, we introduce LMOD, a large-scale multimodal ophthalmology benchmark consisting of 21,993 instances across (1) five ophthalmic imaging modalities: optical coherence tomography, color fundus photographs, scanning laser ophthalmoscopy, lens photographs, and surgical scenes; (2) free-text, demographic, and disease biomarker information; and (3) primary ophthalmology-specific applications such as anatomical information understanding, disease diagnosis, and subgroup analysis. In addition, we benchmarked 13 state-of-the-art LVLM representatives from closed-source, open-source, and medical domains. The results demonstrate a significant performance drop for LVLMs in ophthalmology compared to other domains. Systematic error analysis further identified six major failure modes: misclassification, failure to abstain, inconsistent reasoning, hallucination, assertions without justification, and lack of domain-specific knowledge. In contrast, supervised neural networks specifically trained on these tasks as baselines demonstrated high accuracy. These findings underscore the pressing need for benchmarks in the development and validation of ophthalmology-specific LVLMs. @@ -1874,7 +1874,7 @@ LifengShangHuawei Technologies Ltd. XinJiang QunLiuHuawei Noah’s Ark Lab - Kam-FaiWongThe Chinese University of Hong Kong + Kam-FaiWongThe Chinese University of Hong Kong 2586-2606 Current research found the issue of Early Answering in large language models (LLMs), where the models already have an answer before generating the Chain-of-Thought (CoT). This phenomenon suggests a potential lack of necessary dependency between the predicted answer and the reasoning process. Consequently, two important questions arise: (1) Is CoT still necessary if the model already has an answer? (2) Can the correctness of the answer serve as valid evidence for the correctness of CoT? To address these questions, we propose a method, namely Chain-of-Probe (CoP), to probe changes in confidence during the model’s reasoning. The probing results show that in a significant number of question-answer cases, CoT appears to be unnecessary, and this necessity correlates with the simplicity of the task, defined by the reasoning steps required. Furthermore, by analyzing patterns in confidence change, we examine the correctness of the model’s reasoning. Our validation reveals that many responses, although correct in their final answer, contain errors in their reasoning process. To this end, we propose a strategic approach based on CoP to prioritize answers with correct reasoning among multiple candidates, thereby bolstering the reliability of the model’s reasoning. 2025.findings-naacl.140 @@ -1924,7 +1924,7 @@ ZheChen ZheFangEast China Normal University YuxiangSong - ManLan + ManLan 2642-2653 With the scale of Large Language Models(LLMs) and the size of the training data continuing to expand, the computational costs required for training or tuning have significantly increased as well. In this work we propose an efficient and effective Large-Scale Data Compression (LSDC) method to substantially reduce the size of training data and thus enhance the training efficiency without compromising the performance of LLMs through a bifurcated quantization strategy. Specifically, our method first segments the dataset into multiple clusters, significantly reducing the time and memory requirements for data compression. Then, during the second phase of coreset selection, the diversity of samples is ensured by maximizing the submodular gain in order to avoid performance degradation. The comparative experiments showed that the performance of LLMs fine-tuned on a 20% compressed subset of the Alpaca dataset using LSDC outperformed those on the full dataset. Moreover,on a domain-specific instruction dataset of millions of samples, the LLMs fine-tuned on a 10% compressed dataset using LSDC outperformed those on the entire dataset, which dramatically enhances the domain-adaption capabilities of LLMs. This provides a promising potential of LSDC in training bigger LLMs from scratch and supervised fine-tuning as well. 2025.findings-naacl.143 @@ -2077,8 +2077,8 @@ JujiaZhao WenjieWangNational University of Singapore ChenXu - See-KiongNgNational University of Singapore - Tat-SengChuaNational University of Singapore + See-KiongNgNational University of Singapore + Tat-SengChuaNational University of Singapore 2852-2865 Large Language Models (LLMs) have showcased their potential in building generative recommendation systems through fine-tuning user behavior data. However, utilizing the user behavior data may pose significant privacy risks like in the traditional recommender models, potentially leading to ethical dilemmas and violations of data protection regulations. To address the privacy concerns, Federated Learning for Recommendation (Fed4Rec) has been identified as a promising solution. However, directly applying Fed4Rec in the LLM context introduces two challenges: 1) exacerbated client performance imbalance, which ultimately impacts the system’s long-term effectiveness, and 2) substantial client resource costs, posing a high demand for clients’ both computational and storage capability to locally train and infer LLMs.To tackle these challenges, we propose a federated framework for LLM-based recommendation (shorted as FELLRec). Generally, FELLRec designs two key strategies. 1) Dynamic balance strategy, which designs dynamic parameter aggregation and learning speed for different clients during training, aiming to ensure relatively balanced performance across clients. 2) Flexible storage strategy, which selectively retains certain sensitive LLM layers on the client side, while offloading other layers to the server, aiming to preserve privacy while saving resources. Specifically, FELLRec flexibly maintains those input and output layers on the client side to ensure the protection of all sensitive information. Experiment results show that FELLRec can achieve a more balanced client performance and improved overall performance in a computational and storage-efficient way while safeguarding user privacy well. 2025.findings-naacl.155 @@ -2094,7 +2094,7 @@ YichenDiTsinghua University, Tsinghua University LijieWenTsinghua University IrwinKing - Philip S.YuUniversity of Illinois, Chicago + Philip S.YuUniversity of Illinois, Chicago 2866-2882 Watermarking algorithms for large language models (LLMs) have attained high accuracy in detecting LLM-generated text. However, existing methods primarily focus on distinguishing fully watermarked text from non-watermarked text, overlooking real-world scenarios where LLMs generate only small sections within large documents. In this scenario, balancing time complexity and detection performance poses significant challenges. This paper presents WaterSeeker, a novel approach to efficiently detect and locate watermarked segments amid extensive natural text. It first applies an efficient anomaly extraction method to preliminarily locate suspicious watermarked regions. Following this, it conducts a local traversal and performs full-text detection for more precise verification. Theoretical analysis and experimental results demonstrate that WaterSeeker achieves a superior balance between detection accuracy and computational efficiency. Moreover, its localization capability lays the foundation for building interpretable AI detection systems. Our code is available at https://github.com/THU-BPM/WaterSeeker. 2025.findings-naacl.156 @@ -2106,7 +2106,7 @@ ChanheeParkKorea University HyeonseokMoonKorea University ChanjunParkKorea University - HeuiseokLim + HeuiseokLim 2883-2900 Retrieval-Augmented Generation (RAG) has gained prominence as an effective method for enhancing the generative capabilities of Large Language Models (LLMs) through the incorporation of external knowledge. However, the evaluation of RAG systems remains a challenge, due to the intricate interplay between retrieval and generation components. This limitation has resulted in a scarcity of benchmarks that facilitate a detailed, component-specific assessment. In this work, we present MIRAGE, a Question Answering dataset specifically designed for RAG evaluation. MIRAGE consists of 7,560 curated instances mapped to a retrieval pool of 37,800 entries, enabling an efficient and precise evaluation of both retrieval and generation tasks. We also introduce novel evaluation metrics aimed at measuring RAG adaptability, encompassing dimensions such as noise vulnerability, context acceptability, context insensitivity, and context misinterpretation. Through comprehensive experiments across various retriever-LLM configurations, we provide new insights into the optimal alignment of model pairs and the nuanced dynamics within RAG systems. The dataset and evaluation code are publicly available, allowing for seamless integration and customization in diverse research settings. 2025.findings-naacl.157 @@ -2122,7 +2122,7 @@ HasanIqbalMohamed bin Zayed University of Artificial Intelligence DhruvSahnan IrynaGurevychInstitute for Computer Science, Artificial Intelligence and Technology, Mohamed bin Zayed University of Artificial Intelligence and Technische Universität Darmstadt - PreslavNakovMohamed bin Zayed University of Artificial Intelligence + PreslavNakovMohamed bin Zayed University of Artificial Intelligence 2901-2914 Fact-checking long-form text is challenging, and it is therefore common practice to break it down into multiple atomic claims. The typical approach to fact-checking these atomic claims involves retrieving a fixed number of pieces of evidence, followed by a verification step. However, this method is usually not cost-effective, as it underutilizes the verification model’s internal knowledge of the claim and fails to replicate the iterative reasoning process in human search strategies. To address these limitations, we propose FIRE, a novel agent-based framework that integrates evidence retrieval and claim verification in an iterative manner. Specifically, FIRE employs a unified mechanism to decide whether to provide a final answer or generate a subsequent search query, based on its confidence in the current judgment. We compare FIRE with other strong fact-checking frameworks and find that it achieves slightly better performance while reducing large language model (LLM) costs by an average of 7.6 times and search costs by 16.5 times. These results indicate that FIRE holds promise for application in large-scale fact-checking operations. 2025.findings-naacl.158 @@ -2261,7 +2261,7 @@ Language Modeling with Editable External Knowledge - Belinda Z.LiMassachusetts Institute of Technology + Belinda Z.LiMassachusetts Institute of Technology EmmyLiuSchool of Computer Science, Carnegie Mellon University AlexisRossMassachusetts Institute of Technology and Allen Institute for Artificial Intelligence AbbasZeitounMassachusetts Institute of Technology @@ -2370,7 +2370,7 @@ Zora ZhiruoWang AkariAsaiPaul G. Allen School of Computer Science & Engineering, University of Washington Xinyan VelocityYuUniversity of Southern California - Frank F.XuCarnegie Mellon University + Frank F.XuCarnegie Mellon University YiqingXie GrahamNeubigCarnegie Mellon University DanielFriedMeta AI and Carnegie Mellon University @@ -2675,7 +2675,7 @@ ShiminLi JinlanFu XipengQiuFudan University - XuanjingHuangFudan University + XuanjingHuangFudan University 3563-3605 As Artificial General Intelligence (AGI) becomes increasingly integrated into various facets of human life, ensuring the safety and ethical alignment of such systems is paramount. Previous studies primarily focus on single-modality threats, which may not suffice given the integrated and complex nature of cross-modality interactions. We introduce a novel safety alignment challenge called Safe Inputs but Unsafe Output (*SIUO*) to evaluate cross-modality safety alignment. Specifically, it considers cases where single modalities are safe independently but could potentially lead to unsafe or unethical outputs when combined. To empirically investigate this problem, we developed the *SIUO*, a cross-modality benchmark encompassing 9 critical safety domains, such as self-harm, illegal activities, and privacy violations. Our findings reveal substantial safety vulnerabilities in both closed- and open-source LVLMs, such as GPT-4V and LLaVA, underscoring the inadequacy of current models to reliably interpret and respond to complex, real-world scenarios. 2025.findings-naacl.198 @@ -2688,7 +2688,7 @@ SeungyoonLeeKorea University HyeonseokMoonKorea University ChanjunParkKorea University - HeuiseokLim + HeuiseokLim 3606-3620 Recent advancements in Large Language Models (LLMs) have significantly enhanced interactions between users and models. These advancements concurrently underscore the need for rigorous safety evaluations due to the manifestation of social biases, which can lead to harmful societal impacts. Despite these concerns, existing benchmarks may overlook the intrinsic weaknesses of LLMs, which can generate biased responses even with simple adversarial instructions. To address this critical gap, we introduce a new benchmark, Fairness Benchmark in LLM under Extreme Scenarios (FLEX), designed to test whether LLMs can sustain fairness even when exposed to prompts constructed to induce bias. To thoroughly evaluate the robustness of LLMs, we integrate prompts that amplify potential biases into the fairness assessment. Comparative experiments between FLEX and existing benchmarks demonstrate that traditional evaluations may underestimate the inherent risks in models. This highlights the need for more stringent LLM evaluation benchmarks to guarantee safety and fairness. 2025.findings-naacl.199 @@ -2916,7 +2916,7 @@ SoyeongJeongKorea Advanced Institute of Science & Technology HuijeLeeKorea Advanced Institute of Science & Technology HoyunSongKorea Advanced Institute of Science & Technology - Jong C.ParkKorea Advanced Institute of Science and Technology + Jong C.ParkKorea Advanced Institute of Science and Technology YoungjinKwonKorea Advanced Institute of Science & Technology 3895-3911 Accelerating inference in Large Language Models (LLMs) is critical for real-time interactions, as they have been widely incorporated into real-world services. Speculative decoding, a fully algorithmic solution, has gained attention for improving inference speed by drafting and verifying tokens, thereby generating multiple tokens in a single forward pass. However, current drafting strategies usually require significant fine-tuning or have inconsistent performance across tasks. To address these challenges, we propose Hierarchy Drafting (HD), a novel lossless drafting approach that organizes various token sources into multiple databases in a hierarchical framework based on temporal locality. In the drafting step, HD sequentially accesses multiple databases to obtain draft tokens from the highest to the lowest locality, ensuring consistent acceleration across diverse tasks and minimizing drafting latency. Our experiments on Spec-Bench using LLMs with 7B and 13B parameters demonstrate that HD outperforms existing database drafting methods, achieving robust inference speedups across model sizes, tasks, and temperatures. @@ -2984,8 +2984,8 @@ <fixed-case>XAMPLER</fixed-case>: Learning to Retrieve Cross-Lingual In-Context Examples PeiqinLinInstitut für Informatik - AndreMartinsInstituto Superior Técnico and Unbabel - HinrichSchuetze + AndreMartinsInstituto Superior Técnico and Unbabel + HinrichSchuetze 3968-3977 Recent studies indicate that leveraging off-the-shelf or fine-tuned retrievers, capable of retrieving relevant in-context examples tailored to the input query, enhances few-shot in-context learning of English. However, adapting these methods to other languages, especially low-resource ones, poses challenges due to the scarcity of cross-lingual retrievers and annotated data. Thus, we introduce XAMPLER: Cross-Lingual Example Retrieval, a method tailored to tackle the challenge of cross-lingual in-context learning using only annotated English data. XAMPLER first trains a retriever based on Glot500, a multilingual small language model, using positive and negative English examples constructed from the predictions of a multilingual large language model, i.e., MaLA500. Leveraging the cross-lingual capacity of the retriever, it can directly retrieve English examples as few-shot examples for in-context learning of target languages. Experiments on two multilingual text classification benchmarks, namely SIB200 with 176 languages and MasakhaNEWS with 16 languages, demonstrate that XAMPLER substantially improves the in-context learning performance across languages. 2025.findings-naacl.221 @@ -2995,7 +2995,7 @@ Evaluating Cultural and Social Awareness of <fixed-case>LLM</fixed-case> Web Agents HaoyiQiuUCLA Computer Science Department, University of California, Los Angeles - AlexanderFabbriSalesForce.com + AlexanderFabbriSalesForce.com DivyanshAgarwalSalesforce.com Kung-HsiangHuangSalesForce.com SarahTanSalesForce.com @@ -3039,8 +3039,8 @@ A Recipe of Parallel Corpora Exploitation for Multilingual Large Language Models PeiqinLinInstitut für Informatik - AndreMartinsInstituto Superior Técnico and Unbabel - HinrichSchuetze + AndreMartinsInstituto Superior Técnico and Unbabel + HinrichSchuetze 4038-4050 Recent studies have highlighted the potential of exploiting parallel corpora to enhance multilingual large language models, improving performance in both bilingual tasks, e.g., machine translation, and general-purpose tasks, e.g., text classification. Building upon these findings, our comprehensive study aims to identify the most effective strategies for leveraging parallel corpora. We investigate the impact of parallel corpora quality and quantity, training objectives, and model size on the performance of multilingual large language models enhanced with parallel corpora across diverse languages and tasks. Our analysis reveals several key insights: (i) filtering noisy translations is essential for effectively exploiting parallel corpora, while language identification and short sentence filtering have little effect; (ii) even a corpus with just 10K parallel sentences can yield results comparable to those obtained from much larger datasets; (iii) employing only the machine translation objective yields the best results among various training objectives and their combinations; (iv) larger multilingual language models benefit more from parallel corpora than smaller models. Our study offers valuable insights into the optimal utilization of parallel corpora to enhance multilingual large language models, extending the generalizability of previous findings from limited languages and tasks to a broader range of scenarios. 2025.findings-naacl.225 @@ -3083,7 +3083,7 @@ VitaliiHirak Badr M.Abdullah DietrichKlakow - BerndMöbiusUniversität des Saarlandes + BerndMöbiusUniversität des Saarlandes TaniaAvgustinova 4083-4092 This study analyzes the attention patterns of fine-tuned encoder-only models based on the BERT architecture (BERT-based models) towards two distinct types of Multiword Expressions (MWEs): idioms and microsyntactic units (MSUs). Idioms present challenges in semantic non-compositionality, whereas MSUs demonstrate unconventional syntactic behavior that does not conform to standard grammatical categorizations. We aim to understand whether fine-tuning BERT-based models on specific tasks influences their attention to MWEs, and how this attention differs between semantic and syntactic tasks. We examine attention scores to MWEs in both pre-trained and fine-tuned BERT-based models. We utilize monolingual models and datasets in six Indo-European languages — English, German, Dutch, Polish, Russian, and Ukrainian. Our results show that fine-tuning significantly influences how models allocate attention to MWEs. Specifically, models fine-tuned on semantic tasks tend to distribute attention to idiomatic expressions more evenly across layers. Models fine-tuned on syntactic tasks show an increase in attention to MSUs in the lower layers, corresponding with syntactic processing requirements. @@ -3108,7 +3108,7 @@ <fixed-case>M</fixed-case>ojo<fixed-case>B</fixed-case>ench: Language Modeling and Benchmarks for Mojo - NishatRaihan + NishatRaihan Joanna C. S.Santos MarcosZampieri 4109-4128 @@ -3272,7 +3272,7 @@ HaoLiMicrosoft Research JingyuanSunUniversity of Manchester ChenghuaLinUniversity of Manchester - RizaBatista-NavarroUniversity of Manchester + RizaBatista-NavarroUniversity of Manchester 4299-4308 Multi-modal Large Language Models (MLLMs) have achieved remarkable success by integrating visual and textual modalities. However, they incur significant computational overhead due to the large number of vision tokens processed, limiting their practicality in resource-constrained environments. We introduce Language-Guided Vision Token Pruning (LVPruning) for MLLMs, an effective yet simple method that significantly reduces the computational burden while preserving model performance. LVPruning employs cross-attention modules to compute the importance of vision tokens based on their interaction with language tokens, determining which to prune. Importantly, LVPruning can be integrated without modifying the original MLLM parameters, which makes LVPruning simple to apply or remove. Our experiments show that LVPruning can effectively reduce up to 90% of vision tokens by the middle layer of LLaVA-1.5, resulting in a 62.1% decrease in inference Tera Floating-Point Operations Per Second (TFLOPs), with an average performance loss of just 0.45% across nine multi-modal benchmarks. 2025.findings-naacl.242 @@ -3299,7 +3299,7 @@ XinyuanLunational university of singaore, National University of Singapore LiangmingPanUniversity of Arizona YuboMaSchool of Computer Science and Engineering, Nanyang Technological University - PreslavNakovMohamed bin Zayed University of Artificial Intelligence + PreslavNakovMohamed bin Zayed University of Artificial Intelligence Min-YenKanNational University of Singapore 4323-4339 Current Large Language Models (LLMs) exhibit limited ability to understand table structures and to apply precise numerical reasoning, which is crucial for tasks such as table question answering and table-based fact verification. To address these challenges, we introduce our Tool-Augmented Reasoning framework for Tables (TART), which integrates LLMs with specialized tools. TART contains three key components: a table formatter to ensure accurate data representation, a tool maker to develop specific computational tools, and an explanation generator to maintain explainability. We also present the TOOLTAB dataset, a new benchmark designed specifically for training LLMs in table–tool integration. Our experiments indicate that TART achieves substantial improvements over existing methods (e.g., Chain-of-Thought) by improving both the precision of data processing and the clarity of the reasoning process. Notably, TART paired with CodeLlama achieves 90.0% of the accuracy of the closed-sourced LLM GPT-3.5-turbo, highlighting its robustness in diverse real-world scenarios. Both code and data are openly available at https://github.com/XinyuanLu00/TART. @@ -3373,7 +3373,7 @@ ChangSuChoi HanGyeolYoo HyeonSeokLimSeoul National University of Science and Technology - KyungTaeLimKorea Advanced Institute of Science & Technology + KyungTaeLimKorea Advanced Institute of Science & Technology JungyeulParkThe University of British Columbia 4412-4426 This study explores the integration of automated writing evaluation (AWE) and grammatical error correction (GEC) through multitask learning, demonstrating how combining these distinct tasks can enhance performance in both areas. By leveraging a shared learning framework, we show that models trained jointly on AWE and GEC outperform those trained on each task individually. To support this effort, we introduce a dataset specifically designed for multitask learning using AWE and GEC. Our experiments reveal significant synergies between tasks, leading to improvements in both writing assessment accuracy and error correction precision. This research represents a novel approach for optimizing language learning tools by unifying writing evaluation and correction tasks, offering insights into the potential of multitask learning in educational applications. @@ -3398,7 +3398,7 @@ <fixed-case>CDB</fixed-case>: A Unified Framework for Hope Speech Detection Through Counterfactual, Desire and Belief Tulio Ferreira Leite DaSilvaUniversidade de São Paulo Gonzalo FreijedoAdunaEcole Normale Supérieure – PSL - FarahBenamaraInstitut de recherche en informatique de toulouse + FarahBenamaraInstitut de recherche en informatique de toulouse AldaMariCNRS ZongminLi LiYueInstitute for Infocomm Research, A*STAR @@ -3430,7 +3430,7 @@ ZihuiwenYe Fraser DavidGreenleeCohere MaxBartoloCohere and University College London - PhilBlunsomGoogle, Department of Computer Science, University of Oxford and DeepMind + PhilBlunsomGoogle, Department of Computer Science, University of Oxford and DeepMind Jon AnderCamposCohere MatthiasGalléCohere 4506-4520 @@ -3499,7 +3499,7 @@ <fixed-case>SFMSS</fixed-case>: Service Flow aware Medical Scenario Simulation for Conversational Data Generation ZhijieBao QingyunLiu - XuanjingHuangFudan University + XuanjingHuangFudan University ZhongyuWeiFudan University 4586-4604 Medical-specific Large Language Models (LLMs) have demonstrated impressive performance on medical-related exams and tasks. Despite their success in single-turn question and answering, instruction-tuned LLMs often falter in real-world healthcare applications, highlighting a disconnect between existing instruction datasets and practical contexts. To address this issue, we propose Service Flow aware Medical Scenario Simulation (SFMSS), a simulation framework designed for medical conversational data generation. SFMSS employs three key strategies to ensure the quality of the data generation. the use of Authentic Seed Data ensures alignment of real-world distributions. Diverse Patient Simulation enables simulated patients to exhibit distinct communication styles and complex behavioral logic. Service Flow Control ensures that conversations progress in alignment with medical objectives. We construct a dataset targeting on outpatient reception through SFMSS, named SFMSS-CD. Building on this dataset, we develop a model called SFMSS-Nurse. We conduct both automatic and human evaluations, involving 15 users and 15 clinical experts, to assess the effectiveness of SFMSS. The results demonstrate that SFMSS-Nurse outperforms all baselines, including the current state-of-the-art model GPT-4o, and aligns with human preferences and clinical demands. @@ -3710,7 +3710,7 @@ Jailbreaking with Universal Multi-Prompts - Yu-LingHsu + Yu-LingHsu HsuanSu Shang-TseChenNational Taiwan University 4870-4891 @@ -3800,7 +3800,7 @@ YuenChen Vethavikashini ChithrraRaghuramCCC Intelligent Solutions JustusMatternDepartment of Computer Science, ETHZ - ETH Zurich and Rheinisch Westfälische Technische Hochschule Aachen - RadaMihalceaUniversity of Michigan + RadaMihalceaUniversity of Michigan ZhijingJinDepartment of Computer Science, University of Toronto 4984-5004 Generated texts from large language models (LLMs) have been shown to exhibit a variety of harmful, human-like biases against various demographics. These findings motivate research efforts aiming to understand and measure such effects. This paper introduces a causal formulation for bias measurement in generative language models. Based on this theoretical foundation, we outline a list of desiderata for designing robust bias benchmarks. We then propose a benchmark called OccuGender, with a bias-measuring procedure to investigate occupational gender bias. We test several state-of-the-art open-source LLMs on OccuGender, including Llama, Mistral, and their instruction-tuned versions. The results show that these models exhibit substantial occupational gender bias. Lastly, we discuss prompting strategies for bias mitigation and an extension of our causal formulation to illustrate the generalizability of our framework. @@ -3854,7 +3854,7 @@ <fixed-case>C</fixed-case>ode<fixed-case>S</fixed-case>im: Multi-Agent Code Generation and Problem Solving through Simulation-Driven Planning and Debugging Md. AshrafulIslamBangladesh University of Engineering and Technology Mohammed EunusAliBangladesh University of Engineering and Technology - Md RizwanParvezQatar Computing Research Institute + Md RizwanParvezQatar Computing Research Institute 5113-5139 2025.findings-naacl.285 islam-etal-2025-codesim @@ -3900,7 +3900,7 @@ A Context-Aware Contrastive Learning Framework for Hateful Meme Detection and Segmentation XuanyuSu YansongLi - DianaInkpenUniversity of Ottawa + DianaInkpenUniversity of Ottawa NathalieJapkowiczAmerican University 5201-5215 Amidst the rise of Large Multimodal Models (LMMs) and their widespread application in generating and interpreting complex content, the risk of propagating biased and harmful memes remains significant. Current safety measures often fail to detect subtly integrated hateful content within “Confounder Memes”. To address this, we introduce HateSieve, a new framework designed to enhance the detection and segmentation of hateful elements in memes. HateSieve features a novel Contrastive Meme Generator that creates semantically correlated memes, a customized triplet dataset for contrastive learning, and an Image-Text Alignment module that produces context-aware embeddings for accurate meme segmentation. Empirical experiments show that HateSieve not only surpasses existing LMMs in performance with fewer trainable parameters but also offers a robust mechanism for precisely identifying and isolating hateful content. Caution: Contains academic discussions of hate speech; viewer discretion advised. @@ -3924,8 +3924,8 @@ Does Data Contamination Detection Work (Well) for <fixed-case>LLM</fixed-case>s? A Survey and Evaluation on Detection Assumptions YujuanFu - OzlemUzunerGeorge Mason University - MelihaYetisgenUniversity of Washington + OzlemUzunerGeorge Mason University + MelihaYetisgenUniversity of Washington FeiXiaUniversity of Washington, Seattle 5235-5256 Large language models (LLMs) have demonstrated great performance across various benchmarks, showing potential as general-purpose task solvers. However, as LLMs are typically trained on vast amounts of data, a significant concern in their evaluation is data contamination, where overlap between training data and evaluation datasets inflates performance assessments. Multiple approaches have been developed to identify data contamination. These approaches rely on specific assumptions that may not hold universally across different settings. To bridge this gap, we systematically review 50 papers on data contamination detection, categorize the underlying assumptions, and assess whether they have been rigorously validated. We identify and analyze eight categories of assumptions and test three of them as case studies. Our case studies focus on detecting direct, instance-level data contamination, which is also referred to as Membership Inference Attacks (MIA). Our analysis reveals that MIA approaches based on these three assumptions can have similar performance to random guessing, on datasets used in LLM pretraining, suggesting that current LLMs might learn data distributions rather than memorizing individual instances. Meanwhile, MIA can easily fail when there are data distribution shifts between the seen and unseen instances. @@ -3983,7 +3983,7 @@ PranavShettyJ.P. Morgan Chase ZhaoJin SameenaShahJ.P. Morgan Chase - CarolynRoseSchool of Computer Science, Carnegie Mellon University + CarolynRoseSchool of Computer Science, Carnegie Mellon University 5326-5346 Document Visual Question Answering (VQA) models have evolved at an impressive rate over the past few years, coming close to or matching human performance on some benchmarks. We argue that common evaluation metrics used by popular benchmarks do not account for the semantic and multimodal groundedness of a model’s outputs. As a result, hallucinations and major semantic errors are treated the same way as well-grounded outputs, and the evaluation scores do not reflect the reasoning capabilities of the model. In response, we propose a new evaluation methodology that accounts for the groundedness of predictions with regard to the semantic characteristics of the output as well as the multimodal placement of the output within the input document. Our proposed methodology is parameterized in such a way that users can configure the score according to their preferences. We validate our scoring methodology using human judgment and show its potential impact on existing popular leaderboards. Through extensive analyses, we demonstrate that our proposed method produces scores that are a better indicator of a model’s robustness and tends to give higher rewards to better-calibrated answers. 2025.findings-naacl.295 @@ -4092,7 +4092,7 @@ NingCheng ZiheLiu YufengChen - JinanXuBeijing Jiaotong University + JinanXuBeijing Jiaotong University JianLiuUniversity of Science and Technology Beijing 5484-5498 In recent years, large language models (LLMs) have made significant progress in knowledge-intensive applications. However, when adapting them to specific domains, we may encounter a multi-stage continuous learning scenario, especially in cases where domain knowledge evolves rapidly.This issue severely limits traditional fine-tuning approaches for LLMs.To overcome this limitation, we propose a new learning paradigm designed specifically for multi-stage continuous learning. This paradigm includes a preference-based learning bias to identify potential knowledge conflicts, as well as a self-distillation-based data augmentation strategy to expand and enrich the training corpus, thereby improving the integration of knowledge-compatible information.In the experiments, we show that our proposed method achieves a significant improvement in accuracy after 7 stages of fine-tuning compared to previous methods, while also demonstrating excellent performance in preserving general knowledge.We have released our code and dataset at Multi-Stage-Learning. @@ -4141,7 +4141,7 @@ MarcoDinarelliCNRS RaheelQaderLingua Custodia EmmanuelleEsperança-RodierUniversity of Grenoble-Alpes - HervéBlanchonUniversité Grenoble Alpes + HervéBlanchonUniversité Grenoble Alpes 5544-5556 Despite the strong research interest in document-level Machine Translation (MT), the test-sets dedicated to this task are still scarce. The existing test-sets mainly cover topics from the general domain and fall short on specialised domains, such as legal and financial. Also, despite their document-level aspect, they still follow a sentence-level logic that doesn’t allow for including certain linguistic phenomena such as information reorganisation. In this work, we aim to fill this gap by proposing a novel test-set : DOLFIN. The dataset is built from specialised financial documents and it makes a step towards true document-level MT by abandoning the paradigm of perfectly aligned sentences, presenting data in units of sections rather than sentences. The test-set consists of an average of 1950 aligned sections for five language pairs. We present the detailed data collection pipeline that can serve as inspiration for aligning new document-level datasets. We demonstrate the usefulness and the quality of this test-set with the evaluation of a series of models. Our results show that the test-set is able to discriminate between context-sensitive and context-agnostic models and shows the weaknesses when models fail to accurately translate financial texts. The test-set will be made public for the community. 2025.findings-naacl.307 @@ -4185,9 +4185,9 @@ DandanPangBFH - Bern University of Applied Sciences StutiThapaUniversity of Tulsa GarrickSherman - LyleUngar + LyleUngar LouisTayPurdue University - Sharath ChandraGuntukuUniversity of Pennsylvania + Sharath ChandraGuntukuUniversity of Pennsylvania 5586-5600 While affective expressions on social media have been extensively studied, most research has focused on the Western context. This paper explores cultural differences in affective expressions by comparing valence and arousal on Twitter/X (geolocated to the US) and Sina Weibo (in Mainland China). Using the NRC-VAD lexicon to measure valence and arousal, we identify distinct patterns of emotional expression across both platforms. Our analysis reveals a functional representation between valence and arousal, showing a negative offset in contrast to traditional lab-based findings which suggest a positive offset. Furthermore, we uncover significant cross-cultural differences in arousal, with US users displaying higher emotional intensity than Chinese users, regardless of the valence of the content. Finally, we conduct a comprehensive language analysis correlating n-grams and LDA topics with affective dimensions to deepen our understanding of how language and culture shape emotional expression. These findings contribute to a more nuanced understanding of affective communication across cultural and linguistic contexts on social media. 2025.findings-naacl.310 @@ -4290,7 +4290,7 @@ SagnikMukherjee JeonghwanKim ZhenhailongWang - DilekHakkani-TürUniversity of Illinois at Urbana-Champaign + DilekHakkani-TürUniversity of Illinois at Urbana-Champaign HengJiUniversity of Illinois, Urbana-Champaign 5745-5758 Despite seemingly performant web agents on the task-completion benchmarks, most existing methods evaluate the agents based on a presupposition: the web navigation task consists of a linear sequence of actions with an end state that marks task completion. In contrast, our work focuses on web navigation for information aggregation, wherein the agent must explore different websites to gather information for a complex query. We consider web information aggregation from two different perspectives: i) Direct API-driven Access relies on a text-only view of the Web, leveraging external tools such as Google Search API to navigate the Web and a scraper to extract website contents. (ii) Interactive Visual Access uses screenshots of the webpages and requires interaction with the browser to navigate and access information. Motivated by these diverse information access settings, we introduce Infogent, a novel modular framework for web information aggregation involving three distinct components: Navigator, Extractor, and Aggregator. Experiments on different information access settings demonstrate that Infogent beats an existing SOTA multi-agent search framework by 7% under Direct API-Driven Access on FRAMES and improves over an existing information-seeking web agent by 4.3% under Interactive Visual Access on AssistantBench. @@ -4431,7 +4431,7 @@ SonamGuptaIndian Institute of Technology, Madras GauravPandeyInternational Business Machines DineshRaghuIBM Research - New Delhi - SachindraJoshi + SachindraJoshi 5922-5943 Retrieval-Augmented Generation (RAG) has emerged as a prominent method for incorporating domain knowledge into Large Language Models (LLMs). While RAG enhances response relevance by incorporating retrieved domain knowledge in the context, retrieval errors can still lead to hallucinations and incorrect answers. To recover from retriever failures, domain knowledge is injected by fine-tuning the model to generate the correct response, even in the case of retrieval errors. However, we observe that without systematic knowledge augmentation, fine-tuned LLMs may memorize new information but still fail to extract relevant domain knowledge, leading to poor performance. In this work, we present a novel framework that significantly enhances the fine-tuning process by augmenting the training data in two ways – context augmentation and knowledge paraphrasing. In context augmentation, we create multiple training samples for a given QA pair by varying the relevance of the retrieved information, teaching the model when to ignore and when to rely on retrieved content. In knowledge paraphrasing, we finetune with multiple answers to the same question, enabling LLMs to better internalize specialized knowledge. To mitigate catastrophic forgetting due to fine-tuning, we add a domain-specific identifier to a question and also utilize a replay buffer containing general QA pairs. Experimental results demonstrate the efficacy of our method over existing techniques, achieving up to 10% relative gain in token-level recall while preserving the LLM’s generalization capabilities. 2025.findings-naacl.329 @@ -4444,7 +4444,7 @@ JaehyungSeo SeungyoonLeeKorea University ChanjunParkKorea University - HeuiseokLim + HeuiseokLim 5944-5964 Through numerous endeavors, large language models (LLMs) have witnessed significant advancements in their instruction-following capability. However, we discern that LLMs are prone to generate responses to instruction-formatted statements in an instinctive manner, rather than comprehending the underlying user intention reside within the given instructions. We also recognize that the significance of instruction understanding capability is largely overlooked in most of LLM evaluation benchmarks. To ensure more comprehensive evaluation on the instruction understanding capability of LLM, we propose Intention of Instruction (IntInst) benchmark, which primary objective is to distinguish the appropriate instruction that accurately instruct to generate a given context. IntInst presents four instruction candidates and requires LLMs to select one among them. Through extensive experiments with several instruction-tuned LLMs, we reveal that most LLMs struggle to grasp the actual intention concealed in the instruction and thoroughly analyze the factors influencing instruction understanding. 2025.findings-naacl.330 @@ -4503,7 +4503,7 @@ XiaochenWang JunqingHeInternational Digital Econemy Academy LiangChen - GholamrezaHaffariMonash University, Monash University and Monash University + GholamrezaHaffariMonash University, Monash University and Monash University YiruWang ZheYangPeking University XiangdiMeng @@ -4558,7 +4558,7 @@ YadongLuMicrosoft MichaelSantacroceMicrosoft YeyunGong - ChaoZhangGeorgia Institute of Technology + ChaoZhangGeorgia Institute of Technology YelongShen 6090-6107 Recent advances in large language models (LLMs) have demonstrated potential for LLM agents. To facilitate the training for these agents with both linguistic feedback and non-linguistic reward signals, we introduce Learning through Communication (LTC). We design a universal buffer to store all the feedback, and an iterative pipeline to enable an LLM agent to explore and update its policy in an given environment. To optimize agent interactions for task-specific learning with our universal buffer and pipeline, we introduce diverse communication patterns tailored for both single-agent and multi-agent environments. We evaluate the efficacy of our LTC approach on four diverse datasets: ALFWorld (single-agent), HotpotQA (multi-agent collaboration), Chameleon (multi-agent competition), and GSM8k (multi-agent teacher-student). On these data sets, LTC outperforms the supervised instruction fine-tuning baselines by 3.6% to 12%. These results highlight the versatility and efficiency of LTC in facilitating online adaptation for LLM agents. @@ -4583,7 +4583,7 @@ WenxuanZhangSingapore University of Technology and Design JiahaoYing MahaniAljuniedAlibaba Group - Anh TuanLuuNanyang Technological University + Anh TuanLuuNanyang Technological University LidongBingShanda Group and Alibaba Group 6119-6136 This study introduces two novel benchmarks, SeaExam and SeaBench, designed to evaluate the capabilities of Large Language Models (LLMs) in Southeast Asian (SEA) application scenarios. Unlike existing multilingual datasets primarily derived from English translations, these benchmarks are constructed based on real-world scenarios from SEA regions. SeaExam draws from regional educational exams to form a comprehensive dataset that encompasses subjects such as local history and literature. In contrast, SeaBench is crafted around multi-turn, open-ended tasks that reflect daily interactions within SEA communities. Our evaluations demonstrate that SeaExam and SeaBench more effectively discern LLM performance on SEA language tasks compared to their translated benchmarks. This highlights the importance of using real-world queries to assess the multilingual capabilities of LLMs. @@ -4685,7 +4685,7 @@ AsafYehudai DineshKhandelwalInternational Business Machines DineshRaghuIBM Research - New Delhi - SachindraJoshi + SachindraJoshi 6240-6249 Fine-tuning Large Language Models (LLMs) on specific datasets is a common practice to improve performance on target tasks. However, this performance gain often leads to overfitting, where the model becomes too specialized in either the task or the characteristics of the training data, resulting in a loss of generalization. This paper introduces Selective Self-to-Supervised Fine-Tuning (S3FT), a fine-tuning approach that achieves better performance than the standard supervised fine-tuning (SFT) while improving generalization.S3FT leverages the existence of multiple valid responses to a query.By utilizing the model’s correct responses, S3FT reduces model specialization during the fine-tuning stage. S3FT first identifies the correct model responses from the training set by deploying an appropriate judge. Then, it fine-tunes the model using the correct model responses and the gold response (or its paraphrase) for the remaining samples.The effectiveness of S3FT is demonstrated through experiments on mathematical reasoning, Python programming and reading comprehension tasks. The results show that standard SFT can lead to an average performance drop of up to 4.4 on multiple benchmarks, such as MMLU and TruthfulQA. In contrast, S3FT reduces this drop by half, i.e. 2.5, indicating better generalization capabilities than SFT while performing significantly better on the fine-tuning tasks. 2025.findings-naacl.349 @@ -4730,7 +4730,7 @@ Discrete Diffusion Language Model for Efficient Text Summarization Do HuuDat Duc AnhDo - Anh TuanLuuNanyang Technological University + Anh TuanLuuNanyang Technological University WrayBuntineVinUniversity 6278-6290 While diffusion models excel at conditionally generating high-quality images, prior works in discrete diffusion models were not evaluated on conditional long-text generation. This work addresses the limitations of prior discrete diffusion models for conditional long-text generation, particularly in the long abstractive summarization task. Despite faster decoding speeds compared to autoregressive methods, previous discrete diffusion models failed on the abstractive summarization task due to the incompatibility between the backbone architectures and the random noising process. To overcome these challenges, we introduce a novel semantic-aware noising process that enables Transformer backbones to handle long sequences effectively. Additionally, we propose CrossMamba, an adaptation of the Mamba model to the encoder-decoder paradigm, which integrates seamlessly with the random absorbing noising process. Our approaches outperform existing discrete diffusion models on three benchmark summarization datasets: Gigaword, CNN/DailyMail, and Arxiv, while also achieving much faster inference speed compared to autoregressive models. @@ -4900,7 +4900,7 @@ SiyuanWang ShujunLiu YunSong - XuanjingHuang + XuanjingHuang ZhongyuWei 6537-6570 Large Language Models (LLMs) have significantly advanced legal intelligence, but the scarcity of scenario data impedes the progress toward interactive legal scenarios. This paper introduces a Multi-agent Legal Simulation Driver (MASER) to scalably generate synthetic data by simulating interactive legal scenarios. Leveraging real-legal case sources, MASER ensures the consistency of legal attributes between participants and introduces a supervisory mechanism to align participants’ characters and behaviors as well as addressing distractions. A Multi-stage Interactive Legal Evaluation (MILE) benchmark is further constructed to evaluate LLMs’ performance in dynamic legal scenarios. Extensive experiments confirm the effectiveness of our framework. @@ -4994,7 +4994,7 @@ AarónGaliano-JiménezUniversidad de Alicante Juan AntonioPérez-OrtizUniversidad de Alicante FelipeSánchez-MartínezUniversity of Alicante - Víctor M.Sánchez-CartagenaUniversidad de Alicante + Víctor M.Sánchez-CartagenaUniversidad de Alicante 6661-6676 This paper delves into sequence-level knowledge distillation (KD) of multilingual pre-trained translation models. We posit that, beyond the approximated mode obtained via beam search, the whole output distribution of the teacher contains valuable insights for students. We explore the potential of n-best lists from beam search to guide student’s learning and then investigate alternative decoding methods to address observed issues like low variability and under-representation of infrequent tokens. Our research in data-limited scenarios reveals that although sampling methods can slightly compromise the translation quality of the teacher output compared to beam search based methods, they enrich the generated corpora with increased variability and lexical richness, ultimately enhancing student model performance and reducing the gender bias amplification commonly associated with KD. 2025.findings-naacl.372 @@ -5053,7 +5053,7 @@ Aligning Black-box Language Models with Human Judgments Gerrit J.j.Van Den BurgAmazon GenSuzukiAmazon - WeiLiuAmazon + WeiLiuAmazon MuratSensoy 6737-6749 Large language models (LLMs) are increasingly used as automated judges to evaluate recommendation systems, search engines, and other subjective tasks, where relying on human evaluators can be costly, time-consuming, and unscalable. LLMs offer an efficient solution for continuous, automated evaluation. However, since the systems that are built and improved with these judgments are ultimately designed for human use, it is crucial that LLM judgments align closely with human evaluators to ensure such systems remain human-centered. On the other hand, aligning LLM judgments with human evaluators is challenging due to individual variability and biases in human judgments. We propose a simple yet effective framework to align LLM judgments with individual human evaluators or their aggregated judgments, without retraining or fine-tuning the LLM. Our approach learns a linear mapping between the LLM’s outputs and human judgments, achieving over 142% average improvement in agreement across 29 tasks with only a small number of calibration examples used for training. Notably, our method works in zero-shot and few-shot settings, exceeds inter-human agreement on four out of six tasks, and enables smaller LLMs to achieve performance comparable to that of larger models. @@ -5212,7 +5212,7 @@ SvetlanaKiritchenkoNational Research Council Canada Muhammad Hammad FahimSiddiqui IsarNejadgholiNational Research Council Canada and University of Ottawa - Kathleen C.FraserNational Research Council Canada + Kathleen C.FraserNational Research Council Canada 6995-7016 Eradicating poverty is the first goal in the U.N. Sustainable Development Goals. However, aporophobia – the societal bias against people living in poverty – constitutes a major obstacle to designing, approving and implementing poverty-mitigation policies. This work presents an initial step towards operationalizing the concept of aporophobia to identify and track harmful beliefs and discriminative actions against poor people on social media. In close collaboration with non-profits and governmental organizations, we conduct data collection and exploration. Then we manually annotate a corpus of English tweets from five world regions for the presence of (1) direct expressions of aporophobia, and (2) statements referring to or criticizing aporophobic views or actions of others, to comprehensively characterize the social media discourse related to bias and discrimination against the poor. Based on the annotated data, we devise a taxonomy of categories of aporophobic attitudes and actions expressed through speech on social media. Finally, we train several classifiers and identify the main challenges for automatic detection of aporophobia in social networks. This work paves the way towards identifying, tracking, and mitigating aporophobic views on social media at scale. 2025.findings-naacl.388 @@ -5293,7 +5293,7 @@ Augmented Adversarial Trigger Learning - ZheWangAmazon + ZheWangAmazon YanjunQiAmazon and University of Virginia 7068-7100 Gradient optimization-based adversarial attack methods automate the learning of adversarial triggers to generate jailbreak prompts or leak system prompts. In this work, we take a closer look at the optimization objective of adversarial trigger learning and propose ATLA: Adversarial Trigger Learning with Augmented objectives. ATLA improves the negative log-likelihood loss used by previous studies into a weighted loss formulation that encourages the learned adversarial triggers to optimize more towards response format tokens. This enables ATLA to learn an adversarial trigger from just one query-response pair and the learned trigger generalizes well to other similar queries. We further design a variation to augment trigger optimization with an auxiliary loss that suppresses evasive responses. We showcase how to use ATLA to learn adversarial suffixes jailbreaking LLMs and to extract hidden system prompts. Empirically we demonstrate that ATLA consistently outperforms current state-of-the-art techniques, achieving nearly 100% success in attacking while requiring 80% fewer queries. ATLA learned jailbreak suffixes demonstrate high generalization to unseen queries and transfer well to new LLMs. @@ -5347,7 +5347,7 @@ NicolasHiebelUniversité Paris-Saclay OlivierFerretCEA KarënFortUniversity of Lorraine - AurélieNévéolLISN-CNRS / Université Paris Saclay + AurélieNévéolLISN-CNRS / Université Paris Saclay 7145-7159 Healthcare professionals are increasingly including Language Models (LMs) in clinical practice. However, LMs have been shown to exhibit and amplify stereotypical biases that can cause life-threatening harm in a medical context. This study aims to evaluate gender biases in automatically generated clinical cases in French, on ten disorders. Using seven LMs fine-tuned for clinical case generation and an automatic linguistic gender detection tool, we measure the associations between disorders and gender. We unveil that LMs over-generate cases describing male patients, creating synthetic corpora that are not consistent with documented prevalence for these disorders. For instance, when prompts do not specify a gender, LMs generate eight times more clinical cases describing male (vs. female patients) for heart attack. We discuss the ideal synthetic clinical case corpus and establish that explicitly mentioning demographic information in generation instructions appears to be the fairest strategy. In conclusion, we argue that the presence of gender biases in synthetic text raises concerns about LM-induced harm, especially for women and transgender people. 2025.findings-naacl.398 @@ -5493,7 +5493,7 @@ GuangliangLiuMichigan State University YaoMaRensselaer Polytechnic Institute RongrongWangMichigan State University - KristenJohnsonMichigan State University + KristenJohnsonMichigan State University JiliangTangMichigan State University 7302-7323 In-Context Learning (ICL) empowers Large Language Models (LLMs) with the ability to learn from a few examples provided in the prompt, enabling downstream generalization without the requirement for gradient updates. Despite encouragingly empirical success, the underlying mechanism of ICL remains unclear. Existing research remains ambiguous with various viewpoints, utilizing intuition-driven and ad-hoc technical solutions to interpret ICL. In this paper, we leverage a data generation perspective to reinterpret recent efforts from a systematic angle, demonstrating the potential broader usage of these popular technical solutions. For a conceptual definition, we rigorously adopt the terms of skill recognition and skill learning. Skill recognition selects one learned data generation function previously seen during pre-training while skill learning can learn new data generation functions from in-context data. Furthermore, we provide insights into the strengths and weaknesses of both abilities, emphasizing their commonalities through the perspective of data generation. This analysis suggests potential directions for future research. The corresponding paper list can be found here. @@ -5506,7 +5506,7 @@ YoungwonLeeSeoul National University Seung-wonHwangSeoul National University Daniel FCamposSnowflake - FilipGralińskiSnowflake and Adam Mickiewicz University + FilipGralińskiSnowflake and Adam Mickiewicz University ZheweiYaoSnowflake YuxiongHeMicrosoft 7324-7339 @@ -5521,7 +5521,7 @@ AmanDalmia MehranKazemiGoogle AmalZouaqPolytechnique Montreal - ChristopherPalPolytechnique Montreal + ChristopherPalPolytechnique Montreal 7340-7356 Geometry problem-solving demands advanced reasoning abilities to process multimodal inputs and employ mathematical knowledge effectively. Vision-language models (VLMs) have made significant progress in various multimodal tasks. Yet, they still struggle with geometry problems and are significantly limited by their inability to perform mathematical operations not seen during pre-training, such as calculating the cosine of an arbitrary angle, and by difficulties in correctly applying relevant geometry formulas. To overcome these challenges, we present GeoCoder, which leverages modular code-finetuning to generate and execute code using a predefined geometry function library. By executing the code, we achieve accurate and deterministic calculations, contrasting the stochastic nature of autoregressive token prediction, while the function library minimizes errors in formula usage. We also propose a multimodal retrieval-augmented variant of GeoCoder, named RAG-GeoCoder, which incorporates a non-parametric memory module for retrieving functions from the geometry library, thereby reducing reliance on parametric memory. Our modular code-finetuning approach enhances the geometric reasoning capabilities of VLMs, yielding an average improvement of over 16% across various question complexities on the GeomVerse dataset compared to other fine-tuning methods. 2025.findings-naacl.410 @@ -5616,7 +5616,7 @@ Do Large Language Models Align with Core Mental Health Counseling Competencies? - Viet CuongNguyen + Viet CuongNguyen MohammadTaher DongwanHong Vinicius KonkolicsPossobom @@ -5734,7 +5734,7 @@ PranavChitale KhushbooSingh NiranjanBalasubramanian - H. AndrewSchwartz + H. AndrewSchwartz 7658-7667 Like most of NLP, models for human-centered NLP tasks—tasks attempting to assess author-level information—predominantly use rep-resentations derived from hidden states of Transformer-based LLMs. However, what component of the LM is used for the representation varies widely. Moreover, there is a need for Human Language Models (HuLMs) that implicitly model the author and provide a user-level hidden state. Here, we systematically evaluate different ways of representing documents and users using different LM and HuLM architectures to predict task outcomes as both dynamically changing states and averaged trait-like user-level attributes of valence, arousal, empathy, and distress. We find that representing documents as an average of the token hidden states performs the best generally. Further, while a user-level hidden state itself is rarely the best representation, we find its inclusion in the model strengthens token or document embeddings used to derive document- and user-level representations resulting in best performances. 2025.findings-naacl.426 @@ -5808,7 +5808,7 @@ JotsnaGowda PryceHouck KevinTangHeinrich Heine University Düsseldorf and University of Florida - SarahMoellerUniversity of Florida + SarahMoellerUniversity of Florida 7744-7756 African American English (AAE) presents unique challenges in natural language processing (NLP) This research systematically compares the performance of available NLP models—rule-based, transformer-based, and large language models (LLMs)—capable of identifying key grammatical features of AAE, namely Habitual Be and Multiple Negation. These features were selected for their distinct grammatical complexity and frequency of occurrence. The evaluation involved sentence-level binary classification tasks, using both zero-shot and few-shot strategies. The analysis reveals that while LLMs show promise compared to the baseline, they are influenced by biases such as recency and unrelated features in the text such as formality. This study highlights the necessity for improved model training and architectural adjustments to better accommodate AAE’s unique linguistic characteristics. Data and code are available. 2025.findings-naacl.431 @@ -5974,7 +5974,7 @@ <fixed-case>UCL</fixed-case>-Bench: A <fixed-case>C</fixed-case>hinese User-Centric Legal Benchmark for Large Language Models RuoliGan DuanyuFeng - ChenZhangNational University of Singapore + ChenZhangNational University of Singapore ZhihangLinWestlake Scietrain HaochenJia HaoWangSichuan University @@ -6103,7 +6103,7 @@ XiangYueCarnegie Mellon University TuneyZheng JieHuangxAI - Bill YuchenLinxAI and University of Washington + Bill YuchenLinxAI and University of Washington 8118-8131 We introduce SimulBench, a benchmark designed to evaluate large language models (LLMs) across a diverse collection of creative simulation tasks, such as acting as a Linux terminal or playing text games with users. While these simulation tasks serve as effective measures of an LLM’s general intelligence, they are seldom incorporated into existing benchmarks. A major challenge is to develop an evaluation framework for testing different LLMs fairly while preserving the multi-round interactive nature of simulation tasks between users and AI. To tackle this issue, we suggest using a fixed LLM as a user agent to engage with an LLM to collect dialogues first under different tasks. Then, challenging dialogue scripts are extracted for evaluating different target LLMs. To facilitate automatic assessment on SimulBench, GPT-4 is employed as the evaluator, tasked with reviewing the quality of the final response generated by the target LLMs given multi-turn dialogue scripts. Our comprehensive experiments indicate that these creative simulation tasks continue to pose a significant challenge with their unique natures and show the gap between proprietary models and the most advanced open LLMs. For example, GPT-4-turbo outperforms LLaMA-3-70b-Chat on 18.55% more cases. 2025.findings-naacl.453 @@ -6302,7 +6302,7 @@ RahmadMahendraRoyal Melbourne Institute of Technology and Universitas Indonesia DamianoSpinaRoyal Melbourne Institute of Technology LawrenceCavedonRoyal Melbourne Institute of Technology - KarinVerspoorRoyal Melbourne Institute of Technology + KarinVerspoorRoyal Melbourne Institute of Technology 8336-8361 While recent advancements in large language models (LLMs) have enhanced their capabilities to solve mathematical problems, other aspects of numeracy remain underexplored. In this paper, we propose a benchmark to evaluate the ability of language models to perform basic numeracy tasks. We frame numeracy as a Natural Language Inference (NLI) task to assess the models’ ability to understand both numbers and language contexts. We evaluate 49 language models (LMs), including fine-tuned LMs on NLI datasets, instruction-tuned LLMs, and specialized math-LLMs. Our findings reveal three main insights: (1) LLMs only clearly outperform smaller LMs in arithmetic tasks, indicating that mathematical reasoning cannot be generalized to other numeracy skills such as number comparison and normalization; (2) while most language models achieve fair to good accuracy for NLI entailment cases, they still struggle to predict contradiction and neutral cases; and (3) the robustness of language models’ numeracy capabilities needs improvement, particularly in understanding the semantics and pragmatics of numbers in linguistic contexts. 2025.findings-naacl.467 @@ -6313,7 +6313,7 @@ Are Language Models Agnostic to Linguistically Grounded Perturbations? A Case Study of <fixed-case>I</fixed-case>ndic Languages PoulamiGhoshIndian Institute of Technology, Bombay RajDabreDepartment of Computer Science, Indian Institute of Technology, Madras, Indian Institute of Technology, Madras and National Institute of Information and Communications Technology (NICT), National Institute of Advanced Industrial Science and Technology - PushpakBhattacharyyaIndian Institute of Technology, Bombay, Dhirubhai Ambani Institute Of Information and Communication Technology + PushpakBhattacharyyaIndian Institute of Technology, Bombay, Dhirubhai Ambani Institute Of Information and Communication Technology 8362-8396 Pre-trained language models (PLMs) are known to be susceptible to perturbations to the input text, but existing works do not explicitly focus on linguistically grounded attacks, which are subtle and more prevalent in nature. In this paper, we study whether PLMs are agnostic to linguistically grounded attacks or not. To this end, we offer the first study addressing this, investigating different Indic languages and various downstream tasks. Our findings reveal that although PLMs are susceptible to linguistic perturbations, when compared to non-linguistic attacks, PLMs exhibit a slightly lower susceptibility to linguistic attacks. This highlights that even constrained attacks are effective. Moreover, we investigate the implications of these outcomes across a range of languages, encompassing diverse language families and different scripts. 2025.findings-naacl.468 @@ -6485,7 +6485,7 @@ DongshuoLiu ZhijingWuBeijing Institute of Technology DandanSongBeijing Institute of Technology - HeyanHuangBeijing Institute of Technology + HeyanHuangBeijing Institute of Technology 103-123 Multi-session personalized dialogue generation is one of the most important topics in open-domain dialogue. It aims to generate responses consistent with the dialogue history and personality information across multiple sessions to engage users’ interest in the dialogue. Recent approaches focusing on history modeling and persona modeling have advanced the development of this field. However, they overlook the importance of dialogue structure in helping large language models (LLMs) understand the dialogue context. Moreover, these methods do not efficiently expand and utilize personality information, reducing the responses’ consistency. In this paper, we propose a Persona-Aware LLM-enAnCEd(PALACE) framework for multi-session personalized dialogue generation. Specifically, the framework consists of three components: a topic-aware memory bank, a persona prompt learning module, and VAE-LoRA. The topic-aware memory bank works by retrieving historical information that possesses a certain dialogue structure and relevant topics. The persona prompt learning module enhances the LLM’s persona-aware capabilities by utilizing a persona commonsense knowledge graph and a query-driven graph neural network. Furthermore, to enhance the generative capabilities of the LLM and obtain more useful prior knowledge, we combine VAE with LoRA to propose VAE-LoRA. Experimental results on the MSC and DuLeMon dataset demonstrate that our framework outperforms the state-of-the-art methods in automatic and human evaluation metrics. 2025.findings-acl.5 @@ -6508,7 +6508,7 @@ <fixed-case>B</fixed-case>ayes<fixed-case>KD</fixed-case>: <fixed-case>B</fixed-case>ayesian Knowledge Distillation for Compact <fixed-case>LLM</fixed-case>s in Constrained Fine-tuning Scenarios WeiLiUniversity of Birmingham LujunLi - Mark G.Lee + Mark G.Lee ShengjieSun LeiZhangUniversity of Exeter WeiXueHong Kong University of Science and Technology @@ -6597,7 +6597,7 @@ MFinMeeting: A Multilingual, Multi-Sector, and Multi-Task Financial Meeting Understanding Evaluation Dataset JieZhuAlibaba Group - JunhuiLiSoochow University, China + JunhuiLiSoochow University, China YalongWen XiandongLinanjing university LifanGuo @@ -6652,7 +6652,7 @@ NicolasStefanovitchEuropean Commission GiovanniDa San MartinoUniversity of Padua JakubPiskorski - PreslavNakovMohamed bin Zayed University of Artificial Intelligence + PreslavNakovMohamed bin Zayed University of Artificial Intelligence 302-326 We introduce a novel multilingual and hierarchical corpus annotated for entity framing and role portrayal in news articles. The dataset uses a unique taxonomy inspired by storytelling elements, comprising 22 fine-grained roles, or archetypes, nested within three main categories: protagonist, antagonist, and innocent. Each archetype is carefully defined, capturing nuanced portrayals of entities such as guardian, martyr, and underdog for protagonists; tyrant, deceiver, and bigot for antagonists; and victim, scapegoat, and exploited for innocents. The dataset includes 1,378 recent news articles in five languages (Bulgarian, English, Hindi, European Portuguese, and Russian) focusing on two critical domains of global significance: the Ukraine-Russia War and Climate Change. Over 5,800 entity mentions have been annotated with role labels. This dataset serves as a valuable resource for research into role portrayal and has broader implications for news analysis. We describe the characteristics of the dataset and the annotation process, and we report evaluation results on fine-tuned state-of-the-art multilingual transformers and hierarchical zero-shot learning using LLMs at the level of a document, a paragraph, and a sentence. 2025.findings-acl.17 @@ -6744,7 +6744,7 @@ Verifying the Steps of Deductive Reasoning Chains ZaccharySadeddine - Fabian M.SuchanekTelecom Paris + Fabian M.SuchanekTelecom Paris 456-475 As Large Language Models penetrate everyday life more and more, it becomes essential to measure the correctness of their output. Inthis paper, we propose a novel task: the automatic verification of individual reasoning steps in a logical deductive Chain-of-Thought. Thistask addresses two well-known problems of LLMs, hallucination and incorrect reasoning. We propose a new dataset of logical reasoningchains, in which the individual deduction steps have been manually annotated for soundness, and benchmark several methods on it. We findthat LLMs can detect unsound reasoning steps fairly well, but argue that verification has to be performed by transparent methods instead.We test symbolic methods, but find that they under-perform. We develop a neuro-symbolic baseline called VANESSA that comes closer to the performance of LLMs. 2025.findings-acl.25 @@ -6822,7 +6822,7 @@ <fixed-case>CRPO</fixed-case>: Confidence-Reward Driven Preference Optimization for Machine Translation GuofengCui PichaoWangAmazon - YangLiuAmazon + YangLiuAmazon ZemianKe ZhuLiuAmazon Prime Video VimalBhatAmazon @@ -6851,7 +6851,7 @@ <fixed-case>F</fixed-case>lash<fixed-case>B</fixed-case>ack: Efficient Retrieval-Augmented Language Modeling for Fast Inference RunhengLiuBeijing Institute of Technology XingchenXiaoBeijing Institute of Technology - HeyanHuangBeijing Institute of Technology + HeyanHuangBeijing Institute of Technology ZewenChiMicrosoft Research ZhijingWuBeijing Institute of Technology 595-608 @@ -7003,9 +7003,9 @@ <fixed-case>C</fixed-case>oin<fixed-case>M</fixed-case>ath: Harnessing the Power of Coding Instruction for Math <fixed-case>LLM</fixed-case> ChengweiWei, A*STAR BinWang - Jung-jaeKimA*STAR + Jung-jaeKimA*STAR GuimeiLiuInstitute for Infocomm Research, A*STAR - Nancy F.Chen + Nancy F.Chen 786-797 Large Language Models (LLMs) have shown strong performance in solving mathematical problems, with code-based solutions proving particularly effective. However, the best practice to leverage coding instruction data to enhance mathematical reasoning remains underexplored. This study investigates three key questions: (1) How do different coding styles of mathematical code-based rationales impact LLMs’ learning performance? (2) Can general-domain coding instructions improve performance? (3) How does integrating textual rationales with code-based ones during training enhance mathematical reasoning abilities? Our findings reveal that code-based rationales with concise comments, descriptive naming, and hardcoded solutions are beneficial, while improvements from general-domain coding instructions and textual rationales are relatively minor. Based on these insights, we propose CoinMath, a learning strategy designed to enhance mathematical reasoning by diversifying the coding styles of code-based rationales. CoinMath generates a variety of code-based rationales incorporating concise comments, descriptive naming conventions, and hardcoded solutions. Experimental results demonstrate that CoinMath significantly outperforms its baseline model, MAmmoTH, one of the SOTA math LLMs. 2025.findings-acl.44 @@ -7017,7 +7017,7 @@ Zain MuhammadMujahidCopenhagen University DilshodAzizov Maha TufailAgroMohamed bin Zayed University of Artificial Intelligence - PreslavNakovMohamed bin Zayed University of Artificial Intelligence + PreslavNakovMohamed bin Zayed University of Artificial Intelligence 798-819 In an age characterized by the proliferation of mis- and disinformation online, it is critical to empower readers to understand the content they are reading. Important efforts in this direction rely on manual or automatic fact-checking, which can be challenging for emerging claims with limited information. Such scenarios can be handled by assessing the reliability and the political bias of the source of the claim, i.e., characterizing entire news outlets rather than individual claims or articles. This is an important but understudied research direction. While prior work has looked into linguistic and social contexts, we do not analyze individual articles or information in social media. Instead, we propose a novel methodology that emulates the criteria that professional fact-checkers use to assess the factuality and political bias of an entire outlet. Specifically, we design a variety of prompts based on these criteria and elicit responses from large language models (LLMs), which we aggregate to make predictions. In addition to demonstrating sizable improvements over strong baselines via extensive experiments with multiple LLMs, we provide an in-depth error analysis of the effect of media popularity and region on model performance. Further, we conduct an ablation study to highlight the key components of our dataset that contribute to these improvements. To facilitate future research, we released our dataset and code. 2025.findings-acl.45 @@ -7026,7 +7026,7 @@ Structured Discourse Representation for Factual Consistency Verification - KunZhang + KunZhang OanaBalalauINRIA IoanaManolescuÉcole Polytechnique and Inria 820-838 @@ -7102,7 +7102,7 @@ KeYang SpencerHulseyUniversity of Illinois at Urbana-Champaign XinLiuUniversity of Illinois at Urbana-Champaign - ChengXiangZhaiUniversity of Illinois, Urbana Champaign + ChengXiangZhaiUniversity of Illinois, Urbana Champaign VolodymyrKindratenkoUniversity of Illinois at Urbana-Champaign 907-926 Recent advances in language modeling demonstrate the need for high-quality domain-specific training data, especially for tasks that require specialized knowledge. General-purpose models, while versatile, often lack the depth needed for expert-level tasks because of limited domain-specific information. Domain adaptation training can enhance these models, but it demands substantial, high-quality data. To address this, we propose ORBIT, a cost-efficient methodology for curating massive, high-quality domain-specific datasets from noisy web sources, tailored for training specialist large language models. Using astronomy as a primary case study, we refined the 1.3T-token FineWeb-Edu dataset into a high-quality, 10B-token subset focused on astronomy. Fine-tuning LLaMA-3-8B on a 1B-token astronomy subset improved performance on the MMLU astronomy benchmark from 69% to 76% and achieved top results on AstroBench, an astronomy-specific benchmark. Moreover, our model (Orbit-LLaMA) outperformed LLaMA-3-8B-base, with GPT-4o evaluations preferring it in 73% of cases across 1000 astronomy-specific questions. Additionally, we validated ORBIT’s generalizability by applying it to law and medicine, achieving a significant improvement of data quality compared to an unfiltered baseline. We open-source the ORBIT methodology, including the curated datasets, the codebase, and the resulting model. @@ -7324,7 +7324,7 @@ <fixed-case>M</fixed-case>o<fixed-case>RE</fixed-case>: A Mixture of Low-Rank Experts for Adaptive Multi-Task Learning DacaoZhang - KunZhangHefei University of Technology + KunZhangHefei University of Technology ShimaoChuHefei University of Technology LeWuHefei University of Technology XinLi @@ -7545,7 +7545,7 @@ MinghanWangMonash University Viet ThanhPhamMonash University FarhadMoghimifarMonash University - Thuy-TrangVuMonash University + Thuy-TrangVuMonash University 1646-1662 Despite achieving remarkable performance, machine translation (MT) research remains underexplored in terms of translating cultural elements in languages, such as idioms, proverbs, and colloquial expressions. This paper investigates the capability of state-of-the-art neural machine translation (NMT) and large language models (LLMs) in translating proverbs, which are deeply rooted in cultural contexts. We construct a translation dataset of standalone proverbs and proverbs in conversation for four language pairs. Our experiments show that the studied models can achieve good translation between languages with similar cultural backgrounds, and LLMs generally outperform NMT models in proverb translation. Furthermore, we find that current automatic evaluation metrics such as BLEU, CHRF++ and COMET are inadequate for reliably assessing the quality of proverb translation, highlighting the need for more culturally aware evaluation metrics. 2025.findings-acl.83 @@ -7554,7 +7554,7 @@ Towards Efficient <fixed-case>LLM</fixed-case> Grounding for Embodied Multi-Agent Collaboration - YangZhangTsinghua University + YangZhangTsinghua University ShixinYang ChenjiaBaiTeleAI, China Telecom FeiWuZhejiang University @@ -7589,7 +7589,7 @@ XiaoyiBao HaoYuanMa ShoushanLiSoochow University - GuodongZhouSoochow University, China + GuodongZhouSoochow University, China 1716-1729 Retrieval-augmented methods have achieved remarkable advancements in alleviating the hallucination of large language models.Nevertheless, the introduction of external knowledge does not always lead to the expected improvement in model performance, as irrelevant or harmful information present in the retrieved knowledge can compromise the prediction process.To address these challenges, we propose a novel framework aimed at improving model performance by incorporating knowledge filtering and prediction fusion mechanisms.In particular, our approach first employs a perplexity-based annotation method to collect training data.Then, we design four distinct strategies to filter out harmful retrieved knowledge.Finally, we integrate the filtered knowledge to generate the final result via batch-wise predictions.We conduct extensive experiments across multiple discriminative task datasets to evaluate the proposed framework.The results demonstrate that our framework can significantly enhance the performance of models on discriminative tasks. 2025.findings-acl.86 @@ -7601,7 +7601,7 @@ ChongLiInstitute of automation, Chinese Academy of Sciences YingzhuoDengInstitute of Automation, Chinese Academy of Sciences JiajunZhangInstitute of automation, Chinese academy of science, Chinese Academy of Sciences - ChengqingZongInstitute of automation, Chinese academy of science, Chinese Academy of Sciences + ChengqingZongInstitute of automation, Chinese academy of science, Chinese Academy of Sciences 1730-1754 The curse of multilinguality phenomenon is a fundamental problem of multilingual Large Language Models (LLMs), where the competition between massive languages results in inferior performance. It mainly comes from limited capacity and negative transfer between dissimilar languages. To address this issue, we propose a method to dynamically group and scale up the parameters of multilingual LLM while boosting positive transfer among similar languages. Specifically, the model is first tuned on monolingual corpus to determine the parameter deviation in each layer and quantify the similarity between languages. Layers with more deviations are extended to mixture-of-experts layers to reduce competition between languages, where one expert module serves one group of similar languages. Experimental results on 18 to 128 languages show that our method reduces the negative transfer between languages and significantly boosts multilingual performance with fewer parameters. Such language group specialization on experts benefits the new language adaptation and reduces the inference on the previous multilingual knowledge learned. 2025.findings-acl.87 @@ -7613,7 +7613,7 @@ FangxuYu JunjieGuoNanjing University ZhenWuNanjing University - XinyuDaiNanjing University + XinyuDaiNanjing University 1755-1767 Emotions are fundamental to conversational understanding. While significant advancements have been achieved in conversational emotion recognition and emotional response generation, recognizing the causes of eliciting emotions is less explored. Previous studies have primarily focused on identifying the causes of emotions by understanding verbal contextual utterances, overlooking that non-verbal emotional cues can elicit emotions. To address this issue, we develop an Emotional Contagion Graph Network (ECGN) that simulates the impact of non-verbal implicit emotions on the counterpart’s emotions. To achieve this, we construct a heterogeneous graph that simulates the transmission of non-verbal emotions alongside verbal influences. By applying message passing between nodes, the constructed graph effectively models both the implicit emotional dynamics and explicit verbal interactions. We evaluate ECGN’s performance through extensive experiments on the benchmark datasets and compare it against multiple state-of-the-art models. Experimental results demonstrate the effectiveness of the proposed model. Our code is available at https://github.com/Yu-Fangxu/ECGN. 2025.findings-acl.88 @@ -7709,7 +7709,7 @@ <fixed-case>EXECUTE</fixed-case>: A Multilingual Benchmark for <fixed-case>LLM</fixed-case> Token Understanding LukasEdmanTechnische Universität München HelmutSchmidCenter for Information and Language Processing - AlexanderFraserTechnical University of Munich + AlexanderFraserTechnical University of Munich 1878-1887 The CUTE benchmark showed that LLMs struggle with character understanding in English. We extend it to more languages with diverse scripts and writing systems, introducing EXECUTE. Our simplified framework allows easy expansion to any language. Tests across multiple LLMs reveal that challenges in other languages are not always on the character level as in English. Some languages show word-level processing issues, some show no issues at all. We also examine sub-character tasks in Chinese, Japanese, and Korean to assess LLMs’ understanding of character components. 2025.findings-acl.95 @@ -7880,7 +7880,7 @@ Harnessing <fixed-case>PDF</fixed-case> Data for Improving <fixed-case>J</fixed-case>apanese Large Multimodal Models JeonghunBaekThe University of Tokyo - AkikoAizawaNational Institute of Informatics + AkikoAizawaNational Institute of Informatics KiyoharuAizawaThe University of Tokyo, The University of Tokyo and Tokyo University of Science 2108-2123 Large Multimodal Models (LMMs) have demonstrated strong performance in English, but their effectiveness in Japanese remains limited due to the lack of high-quality training data. Current Japanese LMMs often rely on translated English datasets, restricting their ability to capture Japan-specific cultural knowledge. To address this, we explore the potential of Japanese PDF data as a training resource, an area that remains largely underutilized. We introduce a fully automated pipeline that leverages pretrained models to extract image-text pairs from PDFs through layout analysis, OCR, and vision-language pairing, removing the need for manual annotation. Additionally, we construct instruction data from extracted image-text pairs to enrich the training data. To evaluate the effectiveness of PDF-derived data, we train Japanese LMMs and assess their performance on the Japanese LMM Benchmark. Our results demonstrate substantial improvements, with performance gains ranging from 2.1% to 13.8% on Heron-Bench. Further analysis highlights the impact of PDF-derived data on various factors, such as model size and language models, reinforcing its value as a multimodal resource for Japanese LMMs. @@ -7891,9 +7891,9 @@ <fixed-case>E</fixed-case>ner<fixed-case>GIZA</fixed-case>r: Leveraging <fixed-case>GIZA</fixed-case>++ for Effective Tokenizer Initialization PranaydeepSingh - EnekoAgirreUniversity of the Basque Country (UPV/EHU) + EnekoAgirreUniversity of the Basque Country (UPV/EHU) GorkaAzkuneUniversidad del País Vasco - OrpheeDe ClercqGhent University + OrpheeDe ClercqGhent University ElsLefeverGhent University 2124-2137 Continual pre-training has long been considered the default strategy for adapting models to non-English languages, but struggles with initializing new embeddings, particularly for non-Latin scripts. In this work, we propose EnerGIZAr, a novel methodology that improves continual pre-training by leveraging statistical word alignment techniques. Our approach utilizes GIZA++ to construct a subword-level alignment matrix between source (English) and target language tokens. This matrix enables informed initialization of target tokenizer embeddings, which provides a more effective starting point for adaptation. We evaluate EnerGIZAr against state-of-the-art initialization strategies such as OFA and FOCUS across four typologically diverse languages: Hindi, Basque, Arabic and Korean. Experimental results on key NLP tasks – including POS tagging, Sentiment Analysis, NLI, and NER – demonstrate that EnerGIZAr achieves superior monolingual performance while also out-performing all methods for cross-lingual transfer when tested on XNLI. With EnerGIZAr, we propose an intuitive, explainable as well as state-of-the-art initialisation technique for continual pre-training of English models. @@ -8082,7 +8082,7 @@ <fixed-case>MPL</fixed-case>: Multiple Programming Languages with Large Language Models for Information Extraction - BoLiHebei University of Technology + BoLiHebei University of Technology GexiangFang WeiYePeking University ZhenghuaXu @@ -8156,7 +8156,7 @@ GiuseppeRuggieroUniversity of Turin MatteoTesta Jurgen Van DeWalle - LuigiDi CaroUniversity of Turin, Italy + LuigiDi CaroUniversity of Turin, Italy 2494-2504 Self-supervised learning (SSL) has reduced the reliance on expensive labeling in speech technologies by learning meaningful representations from unannotated data. Since most SSL-based downstream tasks prioritize content information in speech, ideal representations should disentangle content from unwanted variations like speaker characteristics in the SSL representations. However, removing speaker information often degrades other speech components, and existing methods either fail to fully disentangle speaker identity or require resource-intensive models. In this paper, we propose a novel disentanglement method that linearly decomposes SSL representations into speaker-specific and speaker-independent components, effectively generating speaker disentangled representations. Comprehensive experiments show that our approach achieves speaker independence and as such, when applied to content-driven tasks such as voice conversion, our representations yield significant improvements over state-of-the-art methods. 2025.findings-acl.127 @@ -8192,7 +8192,7 @@ YimingDu BinLiangThe Chinese University of Hong Kong WenxuanZhangSingapore University of Technology and Design - Kam-FaiWongThe Chinese University of Hong Kong + Kam-FaiWongThe Chinese University of Hong Kong 2535-2556 The tendency of Large Language Models (LLMs) to generate hallucinations raises concerns regarding their reliability. Therefore, confidence estimations indicating the extent of trustworthiness of the generations become essential. However, current LLM confidence estimations in languages other than English remain underexplored. This paper addresses this gap by introducing a comprehensive investigation of Multilingual Confidence estimation (MlingConf) on LLMs, focusing on both language-agnostic (LA) and language-specific (LS) tasks to explore the performance and language dominance effects of multilingual confidence estimations on different tasks. The benchmark comprises four meticulously checked and human-evaluated high-quality multilingual datasets for LA tasks and one for the LS task tailored to specific social, cultural, and geographical contexts of a language. Our experiments reveal that on LA tasks English exhibits notable linguistic dominance in confidence estimations than other languages, while on LS tasks, using question-related language to prompt LLMs demonstrates better linguistic dominance in multilingual confidence estimations. The phenomena inspire a simple yet effective native-tone prompting strategy by employing language-specific prompts for LS tasks, effectively improving LLMs’ reliability and accuracy in LS scenarios. 2025.findings-acl.129 @@ -8269,7 +8269,7 @@ ShihanDou QinhaoChen ZhihengXi - ZhihaoZhang + ZhihaoZhang YiDong ZhenWangByteDance Inc. ZhihuiFeiByteDance Inc. @@ -8278,7 +8278,7 @@ GuojunMaByteDance Inc. QiZhangFudan University TaoGuiFudan University - XuanjingHuangFudan University + XuanjingHuangFudan University 2626-2649 Process-driven dialogue systems, which operate under strict predefined process constraints, are essential in customer service and equipment maintenance scenarios. Although Large Language Models (LLMs) have shown remarkable progress in dialogue and reasoning, they still struggle to solve these strictly constrained dialogue tasks. To address this challenge, we construct Process Flow Dialogue (PFDial) dataset, which contains 12,705 high-quality Chinese dialogue instructions derived from 440 flowcharts containing 5,055 process nodes. Based on PlantUML specification, each UML flowchart is converted into atomic dialogue units i.e., structured five-tuples. Experimental results demonstrate that a 7B model trained with merely 800 samples, and a 0.5B model trained on total data both can surpass 90% accuracy. Additionally, the 8B model can surpass GPT-4o up to 43.88% with an average of 11.00%. We further evaluate models’ performance on challenging backward transitions in process flows and conduct an in-depth analysis of various dataset formats to reveal their impact on model performance in handling decision and sequential branches. The data is released in https://github.com/KongLongGeFDU/PFDial. 2025.findings-acl.134 @@ -8301,7 +8301,7 @@ Do Language Models Understand the Cognitive Tasks Given to Them? Investigations with the N-Back Paradigm XiaoyangHuBrown University - RichardLewisUniversity of Michigan - Ann Arbor + RichardLewisUniversity of Michigan - Ann Arbor 2665-2677 Cognitive tasks originally developed for humans are now increasingly used to study language models. While applying these tasks is often straightforward, interpreting their results can be challenging. In particular, when a model underperforms, it is often unclear whether this results from a limitation in the cognitive ability being tested or a failure to understand the task itself. A recent study argues that GPT 3.5’s declining performance on 2-back and 3-back tasks reflects a working memory capacity limit similar to humans (Gong et al., 2024). By analyzing a range of open-source language models of varying performance levels on these tasks, we show that the poor performance is due at least in part to a limitation in task comprehension and task set maintenance. We challenge the best-performing model with progressively harder versions of the task (up to 10-back) and experiment with alternative prompting strategies, before analyzing model attentions. Our larger aim is to contribute to the ongoing conversation around refining methodologies for the cognitive evaluation of language models. 2025.findings-acl.136 @@ -8429,7 +8429,7 @@ Cross-Lingual Transfer of Debiasing and Detoxification in Multilingual <fixed-case>LLM</fixed-case>s: An Extensive Investigation VeraNeplenbroek AriannaBisazzaUniversity of Groningen - RaquelFernándezUniversity of Amsterdam and University of Amsterdam + RaquelFernándezUniversity of Amsterdam and University of Amsterdam 2805-2830 Recent generative large language models (LLMs) show remarkable performance in non-English languages, but when prompted in those languages they tend to express higher harmful social biases and toxicity levels. Prior work has shown that finetuning on specialized datasets can mitigate this behavior, and doing so in English can transfer to other languages. In this work, we investigate the impact of different finetuning methods on the model’s bias and toxicity, but also on its ability to produce fluent and diverse text. We reduce biases by finetuning on curated non-harmful text, but find only direct preference optimization to be effective for mitigating toxicity. The mitigation caused by applying these methods in English also transfers to non-English languages. We find evidence that the extent to which transfer takes place can be predicted by the amount of data in a given language present in the model’s pretraining data. However, this transfer of bias and toxicity mitigation often comes at the expense of decreased language generation ability in non-English languages, highlighting the importance of developing language-specific bias and toxicity mitigation methods. 2025.findings-acl.145 @@ -8451,7 +8451,7 @@ XimingDong ShaoweiWangUniversity of Manitoba DayiLinHuawei Technologies Canada Co., Ltd. - AhmedHassanQueen’s University + AhmedHassanQueen’s University 2844-2859 Optimizing Large Language Model (LLM) performance requires well-crafted prompts, but manual prompt engineering is labor-intensive and often ineffective. Automated prompt optimization techniques address this challenge but the major of them rely on randomly selected evaluation subsets, which fail to represent the full dataset, leading to unreliable evaluations and suboptimal prompts. Existing coreset selection methods, designed for LLM benchmarking, are unsuitable for prompt optimization due to challenges in clustering similar samples, high data collection costs, and the unavailability of performance data for new or private datasets. To overcome these issues, we propose IPOMP, an Iterative evaluation data selection approach for effective Prompt Optimization using real time Model Performance. IPOMP is a two-stage approach that selects representative and diverse samples using semantic clustering and boundary analysis, followed by iterative refinement with real-time model performance data to replace redundant samples. Evaluations on two datasets BIG-bench and LIAR, and two models GPT-3.5 and GPT-4o-mini, show that IPOMP improves effectiveness by at least 1.6% to 3.1%, and stability by at least 50% to 55.5% compared with the best baseline across the studied datasets and models, with minimal computational overhead below 1%. Furthermore, the results demonstrate that our real-time performance-guided refinement approach can be universally applied to enhance existing coreset selection methods. 2025.findings-acl.147 @@ -8476,7 +8476,7 @@ FelixDrinkallUniversity of Oxford StefanZohrenUniversity of Oxford MichaelMcMahonUniversity of Oxford - Janet B.PierrehumbertUniversity of Oxford + Janet B.PierrehumbertUniversity of Oxford 2889-2904 Macroeconomic fluctuations and the narratives that shape them form a mutually reinforcing cycle: public discourse can spur behavioural changes leading to economic shifts, which then result in changes in the stories that propagate. We show that shifts in semantic embedding space can be causally linked to real-world market shocks or deviations from the expected market behaviour (sec:market_shocks). Furthermore, we show how partisanship can influence the predictive power of text for market fluctuations and shape reactions to those same shocks. We also provide some evidence that text-based signals are particularly salient during rare events such as COVID-19, highlighting the value of language data as an exogenous variable in economic forecasting. Our findings underscore the bidirectional relationship between news outlets and market shocks, offering a novel empirical approach to studying their effect on each other. 2025.findings-acl.149 @@ -8557,7 +8557,7 @@ Explaining Puzzle Solutions in Natural Language: An Exploratory Study on 6x6 Sudoku AnirudhMaiya RazanAlghamdiKing Saud University - Maria LeonorPachecoUniversity of Colorado at Boulder + Maria LeonorPachecoUniversity of Colorado at Boulder AshutoshTrivediUniversity of Colorado at Boulder FabioSomenziUniversity of Colorado at Boulder 3002-3009 @@ -8602,7 +8602,7 @@ ZhuohanXieMohamed bin Zayed University of Artificial Intelligence ChenyangLyuMohamed bin Zayed University of Artificial Intelligence XiuyingChenMohamed bin Zayed University of Artificial Intelligence - PreslavNakovMohamed bin Zayed University of Artificial Intelligence + PreslavNakovMohamed bin Zayed University of Artificial Intelligence FakhriKarrayUniversity of Waterloo and Mohamed bin Zayed University of Artificial Intelligence 3047-3059 The rapid advancement of vision-language models (VLMs) has brought a lot of attention to their safety alignment. However, existing methods have primarily focused on model undersafety, where the model responds to hazardous queries, while neglecting oversafety, where the model refuses to answer safe queries. In this paper, we introduce the concept of safety calibration, which systematically addresses both undersafety and oversafety. Specifically, we present VSCBench, a novel dataset of 3,600 image-text pairs that are visually or textually similar but differ in terms of safety, which is designed to evaluate safety calibration across image-centric and text-centric scenarios. Based on our benchmark, we evaluate safety calibration across eleven widely used VLMs. Our extensive experiments revealed major issues with both undersafety and oversafety. We further investigated four approaches to improve the model’s safety calibration. We found that even though some methods effectively calibrated the models’ safety problems, these methods also lead to the degradation of models’ utility. This trade-off underscores the urgent need for advanced calibration methods, and our benchmark provides a valuable tool for evaluating future approaches. @@ -8640,7 +8640,7 @@ James XuZhaonational university of singaore, National University of Singapore Jimmy Z.j.Liu BryanHooiNational University of Singapore - See-KiongNgNational University of Singapore + See-KiongNgNational University of Singapore 3102-3125 Large language models (LLMs) are widely used for long-form text generation. However, factual errors in the responses would undermine their reliability. Despite growing attention to LLM factuality, the effect of response length on factuality remains underexplored. In this work, we systematically investigate this relationship by first introducing an automatic and bi-level long-form factuality evaluation framework, which achieves high agreement with human annotations while being cost-effective. Using this framework, we conduct controlled experiments and find that longer responses exhibit lower factual precision, confirming the presence of length bias. To explain this phenomenon, we empirically examine three hypotheses: error propagation, long context, and facts exhaustion. Our results reveal that facts exhaustion, where the model gradually exhausts more reliable knowledge, is the primary cause of factual degradation, rather than the other two hypotheses. 2025.findings-acl.161 @@ -8739,7 +8739,7 @@ HaijunHe FeiLiWuhan University ChongTeng - DonghongJi + DonghongJi 3221-3235 Stance detection, which aims to identify public opinion towards specific targets using social media data, is an important yet challenging task. With the increasing number of online debates among social media users, conversational stance detection has become a crucial research area. However, existing conversational stance detection datasets are restricted to a limited set of specific targets, which constrains the effectiveness of stance detection models when encountering a large number of unseen targets in real-world applications. To bridge this gap, we manually curate a large-scale, high-quality zero-shot conversational stance detection dataset, named ZS-CSD, comprising 280 targets across two distinct target types. Leveraging the ZS-CSD dataset, we propose SITPCL, a speaker interaction and target-aware prototypical contrastive learning model, and establish the benchmark performance in the zero-shot setting. Experimental results demonstrate that our proposed SITPCL model achieves state-of-the-art performance in zero-shot conversational stance detection. Notably, the SITPCL model attains only an F1-macro score of 43.81%, highlighting the persistent challenges in zero-shot conversational stance detection. 2025.findings-acl.168 @@ -8815,7 +8815,7 @@ <i> <fixed-case>D</fixed-case>-<fixed-case>GEN</fixed-case></i>: Automatic Distractor Generation and Evaluation for Reliable Assessment of Generative Models GraceByunEmory University - Jinho D.ChoiEmory University + Jinho D.ChoiEmory University 3316-3349 Evaluating generative models with open-ended generation is challenging due to inconsistencies in response formats. Multiple-choice (MC) evaluation mitigates this issue, but generating high-quality distractors is time-consuming and labor-intensive. We introduce D-GEN, the first open-source distractor generator model that transforms open-ended data into an MC format. To evaluate distractor quality, we propose two novel methods: 1) ranking alignment, ensuring generated distractors retain the discriminatory power of ground-truth distractors, and 2) entropy analysis, comparing model confidence distributions. Our results show that D-GEN preserves ranking consistency (Spearman’s \rho 0.99, Kendall’s \tau 0.94) and closely matches the entropy distribution of ground-truth distractors. Human evaluation further confirms the fluency, coherence, distractiveness, and incorrectness. Our work advances robust and efficient distractor generation with automated evaluation, setting a new standard for MC evaluation. 2025.findings-acl.174 @@ -8832,7 +8832,7 @@ QiqiangLinGuangdong OPPO Mobile Telecommunications Corp.,Ltd. JinchengJinchengGuangdong OPPO Mobile Telecommunications Corp.,Ltd. MuningWen - WeinanZhang + WeinanZhang QiuyingPengOPPO Research Institute JunWangOPPO Research Institute 3350-3376 @@ -8843,14 +8843,14 @@ Beyond In-Context Learning: Aligning Long-form Generation of Large Language Models via Task-Inherent Attribute Guidelines - Do XuanLongNational University of Singapore + Do XuanLongNational University of Singapore Duong NgocYen Do XuanTrong - Anh TuanLuuNanyang Technological University + Anh TuanLuuNanyang Technological University KenjiKawaguchiNational University of Singapore - ShafiqJotyNanyang Technological University and SalesForce.com + ShafiqJotyNanyang Technological University and SalesForce.com Min-YenKanNational University of Singapore - Nancy F.Chen + Nancy F.Chen 3377-3411 In-context learning (ICL) is an important yet not fully understood ability of pre-trained large language models (LLMs). It can greatly enhance task performance using a few examples, termed demonstrations, without fine-tuning. Although effective in question answering, ICL often underperforms in long-form generation tasks such as summarization. Under appropriately realistic assumptions, we empirically and theoretically show that ICL demonstrations alone are insufficient to teach LLMs the task’s language and format distributions for generation. We argue for explicit exposure to the task distributions and hypothesize that defining them by prompting enhances model performance. To this end, we present LongGuide, which efficiently generates two parallel streams of guidelines capturing task language and format properties: (i) Metric Guidelines (MGs) that instruct models to optimize self-evaluated metrics; and (ii) Output Constraint Guidelines (OCGs) that constrain generation at both token and sentence levels. LongGuide automatically selects the best combination of guidelines, improving both strong open- and closed-source LLMs by over 5% in both zero- and few-shot settings. We show that LongGuide is generalizable, learnable by weak models to enhance strong ones, and integrates synergistically with automatic prompt optimizers. 2025.findings-acl.176 @@ -8899,12 +8899,12 @@ m<fixed-case>OSCAR</fixed-case>: A Large-scale Multilingual and Multimodal Document-level Corpus MatthieuFuteral Armel RandyZebazeINRIA - PedroOrtiz SuarezCommon Crawl Foundation + PedroOrtiz SuarezCommon Crawl Foundation JulienAbadjiINRIA RémiLacroixInstitut du développement et des ressources en informatique scientifique (IDRIS) CordeliaSchmidGoogle, INRIA and Inria RachelBawdenInria - BenoîtSagotInria + BenoîtSagotInria 3461-3494 Multimodal Large Language Models (mLLMs) are trained on a large amount of text-image data. While most mLLMs are trained on caption-like data only, Alayrac et al. (2022) showed that additionally training them on interleaved sequences of text and images can lead to the emergence of in-context learning capabilities. However, the dataset they used, M3W, is not public and is only in English. There have been attempts to reproduce their results but the released datasets are English-only. In contrast, current multilingual and multimodal datasets are either composed of caption-like only or medium-scale or fully private data. This limits mLLM research for the 7,000 other languages spoken in the world. We therefore introduce mOSCAR, to the best of our knowledge the first large-scale multilingual and multimodal document corpus crawled from the web. It covers 163 languages, 303M documents, 200B tokens and 1.15B images. We carefully conduct a set of filtering and evaluation steps to make sure mOSCAR is sufficiently safe, diverse and of good quality. We additionally train two types of multilingual model to prove the benefits of mOSCAR: (1) a model trained on a subset of mOSCAR and captioning data and (2) a model trained on captioning data only. The model additionally trained on mOSCAR shows a strong boost in few-shot learning performance across various multilingual image-text tasks and benchmarks, confirming previous findings for English-only mLLMs. The dataset will be made publicly accessible under the Creative Commons CC BY 4.0 license. 2025.findings-acl.180 @@ -8919,7 +8919,7 @@ Hans ChristianFarsethåsUniversity of Oslo AndreyKutuzovUniversity of Oslo ErikVelldalUniversity of Oslo - LiljaØvrelidDept. of Informatics, University of Oslo + LiljaØvrelidDept. of Informatics, University of Oslo 3495-3541 This paper introduces NorEval, a new and comprehensive evaluation suite for large-scale standardized benchmarking of Norwegian generative language models (LMs). NorEval consists of 24 high-quality human-created datasets – of which five are created from scratch. In contrast to existing benchmarks for Norwegian, NorEval covers a broad spectrum of task categories targeting Norwegian language understanding and generation, establishes human baselines, and focuses on both of the official written standards of the Norwegian language: Bokmål and Nynorsk. All our datasets and a collection of over 100 human-created prompts are integrated into LM Evaluation Harness, ensuring flexible and reproducible evaluation. We describe the NorEval design and present the results of benchmarking 19 open-source pretrained and instruction-tuned LMs for Norwegian in various scenarios. Our benchmark, evaluation framework, and annotation materials are publicly available. 2025.findings-acl.181 @@ -8930,7 +8930,7 @@ Massively Multilingual Instruction-Following Information Extraction ThangLe Huy HuuNguyen - Anh TuanLuuNanyang Technological University + Anh TuanLuuNanyang Technological University Thien HuuNguyenUniversity of Oregon 3542-3585 The literature on information extraction (IE) has mostly centered around a selected few languages, hindering their applications on multilingual corpora. In this work, we introduce MASSIE - a comprehensive collection for instruction-following multilingual IE that standardizes and unifies 215 manually annotated datasets, covering 96 typologically diverse languages from 18 language families. Based on MASSIE, we conduct empirical studies on few-shot in-context learning and report important factors that either positively or negatively affect LLMs’ performance in multilingual IE, covering 21 LLMs sizing from 0.5B to 72B. Additionally, we introduce LF1 - a structure-aware metric that captures partially matched spans, resolving the conservativeness of standard exact matching scheme which overpenalizes LLMs’ predictions. Overall, our results signify that multilingual IE remains very challenging for existing LLMs, especially on complex tasks involving relations and events. In addition, performance gap is extremely large among high- and low-performing languages, but the group of similar-performing languages largely overlap between different LLMs, suggesting a shared performance bias in current LLMs. @@ -8945,7 +8945,7 @@ HainingWang FeiLiWuhan University ChongTeng - DonghongJi + DonghongJi 3586-3601 Previous multimodal sentence representation learning methods have achieved impressive performance. However, most approaches focus on aligning images and text at a coarse level, facing two critical challenges: cross-modal misalignment bias and intra-modal semantic divergence, which significantly degrade sentence representation quality. To address these challenges, we propose DALR (Dual-level Alignment Learning for Multimodal Sentence Representation). For cross-modal alignment, we propose a consistency learning module that softens negative samples and utilizes semantic similarity from an auxiliary task to achieve fine-grained cross-modal alignment. Additionally, we contend that sentence relationships go beyond binary positive-negative labels, exhibiting a more intricate ranking structure. To better capture these relationships and enhance representation quality, we integrate ranking distillation with global intra-modal alignment learning. Comprehensive experiments on semantic textual similarity (STS) and transfer (TR) tasks validate the effectiveness of our approach, consistently demonstrating its superiority over state-of-the-art baselines. 2025.findings-acl.183 @@ -8991,7 +8991,7 @@ JieMa NehaAnna John SrikanthDoss - LluisMarquez + LluisMarquez MiguelBallesterosOracle YassineBenajiba 3631-3643 @@ -9074,7 +9074,7 @@ DebelaGemechu RamonRuiz-DolzUniversity of Dundee HenrikeBeyerUniversity of Dundee - ChrisReedUniversity of Dundee + ChrisReedUniversity of Dundee 3717-3741 While Large Language Models (LLMs) have demonstrated promising results on a range of reasoning benchmarks—particularly in formal logic, mathematical tasks, and Chain-of-Thought prompting—less is known about their capabilities in unconstrained natural language reasoning. Argumentative reasoning, a form of reasoning naturally expressed in language and central to everyday discourse, presents unique challenges for LLMs due to its reliance on context, implicit assumptions, and value judgments. This paper addresses a gap in the study of reasoning in LLMs by presenting the first large-scale evaluation of their unconstrained natural language reasoning capabilities based on natural language argumentation. The paper offers three contributions: (i) the formalisation of a new strategy designed to evaluate argumentative reasoning in LLMs: argument-component selection; (ii) the creation of the Argument Reasoning Tasks (ART) dataset, a new benchmark for argument-component selection based on argument structures for natural language reasoning; and (iii) an extensive experimental analysis involving four different models, demonstrating the limitations of LLMs on natural language reasoning tasks. 2025.findings-acl.192 @@ -9153,7 +9153,7 @@ SharathNaganna SaprativaBhattacharjeeIndian Institute of Technology Bombay, Indian Institute of Technology, Bombay BiplabBanerjeeIndian Institute of Technology, Bombay, Dhirubhai Ambani Institute Of Information and Communication Technology - PushpakBhattacharyyaIndian Institute of Technology, Bombay, Dhirubhai Ambani Institute Of Information and Communication Technology + PushpakBhattacharyyaIndian Institute of Technology, Bombay, Dhirubhai Ambani Institute Of Information and Communication Technology 3838-3858 Humblebragging is a phenomenon in which individuals present self-promotional statements under the guise of modesty or complaints. For example, a statement like, “Ugh, I can’t believe I got promoted to lead the entire team. So stressful!”, subtly highlights an achievement while pretending to be complaining. Detecting humblebragging is important for machines to better understand the nuances of human language, especially in tasks like sentiment analysis and intent recognition. However, this topic has not yet been studied in computational linguistics. For the first time, we introduce the task of automatically detecting humblebragging in text. We formalize the task by proposing a 4-tuple definition of humblebragging and evaluate machine learning, deep learning, and large language models (LLMs) on this task, comparing their performance with humans. We also create and release a dataset called HB-24, containing 3,340 humblebrags generated using GPT-4o. Our experiments show that detecting humblebragging is non-trivial, even for humans. Our best model achieves an F1-score of 0.88. This work lays the foundation for further exploration of this nuanced linguistic phenomenon and its integration into broader natural language understanding systems. 2025.findings-acl.198 @@ -9217,7 +9217,7 @@ EmilyShengResearch, Microsoft DanVannResearch, Microsoft MatthewVogelMicrosoft - HannaWallachMicrosoft + HannaWallachMicrosoft 3907-3932 Representational harms are widely recognized among fairness-related harms caused by generative language systems. However, their definitions are commonly under-specified. We make a theoretical contribution to the specification of representational harms by introducing a framework, grounded in speech act theory (Austin 1962), that conceptualizes representational harms caused by generative language systems as the perlocutionary effects (i.e., real-world impacts) of particular types of illocutionary acts (i.e., system behaviors). Building on this argument and drawing on relevant literature from linguistic anthropology and sociolinguistics, we provide new definitions of stereotyping, demeaning, and erasure. We then use our framework to develop a granular taxonomy of illocutionary acts that cause representational harms, going beyond the high-level taxonomies presented in previous work. We also discuss the ways that our framework and taxonomy can support the development of valid measurement instruments. Finally, we demonstrate the utility of our framework and taxonomy via a case study that engages with recent conceptual debates about what constitutes a representational harm and how such harms should be measured. 2025.findings-acl.202 @@ -9226,7 +9226,7 @@ Turning Conversations into Workflows: A Framework to Extract and Evaluate Dialog Workflows for Service <fixed-case>AI</fixed-case> Agents - Prafulla KumarChoubeySalesForce.com + Prafulla KumarChoubeySalesForce.com XiangyuPengSalesforce AI Research ShilpaBhagavathSalesForce.com CaimingXiongSalesforce Research @@ -9308,9 +9308,9 @@ <fixed-case>MALAMUTE</fixed-case>: A Multilingual, Highly-granular, Template-free, Education-based Probing Dataset SagiShaier - George ArthurBakerUniversity of Utah and University of Colorado Boulder + George ArthurBakerUniversity of Utah and University of Colorado Boulder ChiranthanSridhar - LawrenceHunterUniversity of Chicago + LawrenceHunterUniversity of Chicago Katharina Von DerWenseJohannes-Gutenberg Universität Mainz, Johannes-Gutenberg Universität Mainz, University of Colorado, Boulder and New York University 4051-4069 Language models (LMs) have excelled in various broad domains. However, to ensure their safe and effective integration into real-world educational settings, they must demonstrate proficiency in specific, granular areas of knowledge. Existing cloze-style benchmarks, commonly used to evaluate LMs’ knowledge, have three major limitations. They: 1) do not cover the educational domain; 2) typically focus on low-complexity, generic knowledge or broad domains, which do not adequately assess the models’ knowledge in specific subjects; and 3) often rely on templates that can bias model predictions. Here, we introduce MALAMUTE, a multilingual, template-free, and highly granular probing dataset comprising expert-written, peer-reviewed probes from 71 university-level textbooks across three languages (English, Spanish, and Polish). MALAMUTE is the first education-based cloze-style dataset. It covers eight domains, each with up to 14 subdomains, further broken down into concepts and concept-based prompts, totaling 33,361 university curriculum concepts and 116,887 prompts. MALAMUTE’s fine granularity, educational focus, and inclusion of both sentence-level and paragraph-level prompts make it an ideal tool for evaluating LMs’ course-related knowledge. Our evaluation of masked and causal LMs on MALAMUTE shows that despite overall proficiency, they have significant gaps in knowledge when examined closely on specific subjects, hindering their safe use in classrooms and underscoring the need for further development. @@ -9366,7 +9366,7 @@ Yifan.zhangYifan.zhang HuaXu ShuaiFanAISpeech Ltd - GuodongZhouSoochow University, China + GuodongZhouSoochow University, China 4118-4130 Dialogue text segmentation aims to partition dialogue content into consecutive paragraphs based on themes or logic, enhancing its comprehensibility and manageability. Current text segmentation models, when applied directly to STS (Streaming Text Segmentation), exhibit numerous limitations, such as imbalances in labels that affect the stability of model training, and discrepancies between the model’s training tasks (sentence classification) and the actual text segmentation that limit the model’s segmentation capabilities.To address these challenges, we first implement STS for the first time using a sliding window-based segmentation method. Secondly, we employ two different levels of sliding window-based balanced label strategies to stabilize the training process of the streaming segmentation model and enhance training convergence speed. Finally, by adding a one-dimensional bounding-box regression task for text sequences within the window, we restructure the training approach of STS tasks, shifting from sentence classification to sequence segmentation, thereby aligning the training objectives with the task objectives, which further enhanced the model’s performance. Extensive experimental results demonstrate that our method is robust, controllable, and achieves state-of-the-art performance. 2025.findings-acl.213 @@ -9465,7 +9465,7 @@ ZhaochengDu XiangyangLi YichaoWang - YuhaoWang + YuhaoWang QidongLiuCity University of Hong Kong and Xi’an Jiaotong University MaolinWang HuifengGuo @@ -9497,8 +9497,8 @@ Corpus Poisoning via Approximate Greedy Gradient Descent JinyanSuCornell University - PreslavNakovMohamed bin Zayed University of Artificial Intelligence - ClaireCardieCornell University + PreslavNakovMohamed bin Zayed University of Artificial Intelligence + ClaireCardieCornell University 4274-4294 Dense retrievers are widely used in information retrieval and have also been successfully extended to other knowledge intensive areas such as language models, e.g., Retrieval-Augmented Generation (RAG) systems. Unfortunately, they have recently been shown to be vulnerable to corpus poisoning attacks in which a malicious user injects a small fraction of adversarial passages into the retrieval corpus to trick the system into returning these passages among the top-ranked results for a broad set of user queries. Further study is needed to understand the extent to which these attacks could limit the deployment of dense retrievers in real-world applications. In this work, we propose Approximate Greedy Gradient Descent (AGGD), a new attack on dense retrieval systems based on the widely used HotFlip method for efficiently generating adversarial passages. We demonstrate that AGGD can select a higher quality set of token-level perturbations than HotFlip by replacing its random token sampling with a more structured search. Experimentally, we show that our method achieves a high attack success rate on several datasets and using several retrievers, and can generalize to unseen queries and new domains. Notably, our method is extremely effective in attacking the ANCE retrieval model, achieving attack success rates that are 15.24% and 17.44% higher on the NQ and MS MARCO datasets, respectively, compared to HotFlip. Additionally, we demonstrate AGGD’s potential to replace HotFlip in other adversarial attacks, such as knowledge poisoning of RAG systems. 2025.findings-acl.222 @@ -9554,7 +9554,7 @@ Fraud-R1 : A Multi-Round Benchmark for Assessing the Robustness of <fixed-case>LLM</fixed-case> Against Augmented Fraud and Phishing Inducements - ShuYang + ShuYang ShenzheZhu ZeyuWu KeyuWang @@ -9651,7 +9651,7 @@ <fixed-case>CLIX</fixed-case>: Cross-Lingual Explanations of Idiomatic Expressions AaronGluckUniversity of Colorado at Boulder Katharina Von DerWenseJohannes-Gutenberg Universität Mainz, Johannes-Gutenberg Universität Mainz, University of Colorado, Boulder and New York University - Maria LeonorPachecoUniversity of Colorado at Boulder + Maria LeonorPachecoUniversity of Colorado at Boulder 4515-4529 Automated definition generation systems have been proposed to support vocabulary expansion for language learners. The main barrier to the success of these systems is that learners often struggle to understand definitions due to the presence of potentially unfamiliar words and grammar, particularly when non-standard language is involved. To address these challenges, we propose CLIX, the task of Cross-Lingual explanations of Idiomatic eXpressions. We explore the capabilities of current NLP models for this task, and observe that while it remains challenging, large language models show promise. Finally, we perform a detailed error analysis to highlight the key challenges that need to be addressed before we can reliably incorporate these systems into educational tools. 2025.findings-acl.233 @@ -9716,7 +9716,7 @@ DaoanZhang HassanForooshUniversity of Central Florida DongYuTencent AI Lab - FeiLiuEmory University + FeiLiuEmory University 4587-4603 LLMs are ideal for decision-making thanks to their ability to reason over long contexts. However, challenges arise when processing speech transcripts that describe complex scenarios, as they are verbose and include repetition, hedging, and vagueness. E.g., during a company’s earnings call, an executive might project a positive revenue outlook to reassure investors, despite uncertainty regarding future earnings. It is crucial for LLMs to incorporate this uncertainty systematically when making decisions. In this paper, we introduce DeFine, a modular framework that constructs probabilistic factor profiles from complex scenarios. It then integrates these profiles with analogical reasoning, leveraging insights from similar past experiences to guide LLMs in making critical decisions in new situations. Our framework separates the tasks of quantifying uncertainty and incorporating it into LLM decision-making. This approach is particularly useful in areas such as consulting and financial deliberation, where making decisions under uncertainty is vital. 2025.findings-acl.238 @@ -9729,9 +9729,9 @@ Emre CanAcikgoz HongruWangThe Chinese University of Hong Kong XiusiChenUniversity of Illinois at Urbana-Champaign - AvirupSilInternational Business Machines - DilekHakkani-TürUniversity of Illinois at Urbana-Champaign - GokhanTurUniversity of Illinois at Urbana-Champaign + AvirupSilInternational Business Machines + DilekHakkani-TürUniversity of Illinois at Urbana-Champaign + GokhanTurUniversity of Illinois at Urbana-Champaign HengJiUniversity of Illinois, Urbana-Champaign 4604-4621 Current Large Language Model (LLM) agents demonstrate strong reasoning and tool use capabilities, but often lack self-awareness, failing to balance these approaches effectively. This imbalance leads to **Tool Overuse**, where models unnecessarily rely on external tools for tasks solvable with parametric knowledge, increasing computational overhead. Inspired by human metacognition, we introduce **SMART** (Strategic Model-Aware Reasoning with Tools), a paradigm that enhances an agent’s self-awareness to optimize task handling and reduce tool overuse. To support this paradigm, we introduce **SMART-ER**, a dataset spanning three domains, where reasoning alternates between parametric knowledge and tool-dependent steps, with each step enriched by rationales explaining when tools are necessary. Through supervised training, we develop **SMARTAgent**, a family of models that dynamically balance parametric knowledge and tool use. Evaluations show that SMARTAgent reduces tool use by 24% while improving performance by over 37%, enabling 7B-scale models to match its 70B counterpart and GPT-4. Additionally, SMARTAgent generalizes to out-of-distribution test data like GSM8K and MINTQA, maintaining accuracy with just one-fifth the tool calls. These highlight the potential of strategic tool use to enhance reasoning, mitigate overuse, and bridge the gap between model size and performance, advancing intelligent and resource-efficient agent designs. @@ -9744,7 +9744,7 @@ PabloRodríguez Silvia PaniaguaSuárez PabloGamalloUniversidad de Santiago de Compostela - Susana SoteloDocioUniversidade de Santiago de Compostela + Susana SoteloDocioUniversidade de Santiago de Compostela 4622-4637 Recent advances in Large Language Models (LLMs) have led to remarkable improvements in language understanding and text generation. However, challenges remain in enhancing their performance for underrepresented languages, ensuring continual learning without catastrophic forgetting, and developing robust evaluation methodologies. This work addresses these issues by investigating the impact of Continued Pretraining (CPT) on multilingual models and proposing a comprehensive evaluation framework for LLMs, focusing on the case of Galician language. Our first contribution explores CPT strategies for languages with limited representation in multilingual models. We analyze how CPT with Galician corpora improves text generation while assessing the trade-offs between linguistic enrichment and task-solving capabilities. Our findings show that CPT with small, high-quality corpora and diverse instructions enhances both task performance and linguistic quality. Our second contribution is a structured evaluation framework based on distinguishing task-based and language-based assessments, leveraging existing and newly developed benchmarks for Galician. Additionally, we contribute new Galician LLMs, datasets for evaluation and instructions, and an evaluation framework. 2025.findings-acl.240 @@ -9832,7 +9832,7 @@ QiaoQiaoIowa State University BachNguyen QingWangIowa State University - QiLiIowa State University + QiLiIowa State University 4789-4807 Retrieval-augmented generation (RAG) improves Large Language Models (LLMs) by incorporating external information into the response generation process. However, how context-faithful LLMs are and what factors influence LLMs’ context faithfulness remain largely unexplored. In this study, we investigate the impact of memory strength and evidence presentation on LLMs’ receptiveness to external evidence. We quantify the memory strength of LLMs by measuring the divergence in LLMs’ responses to different paraphrases of the same question, which is not considered by previous works. We also generate evidence in various styles to examine LLMs’ behavior. Our results show that for questions with high memory strength, LLMs are more likely to rely on internal memory. Furthermore, presenting paraphrased evidence significantly increases LLMs’ receptiveness compared to simple repetition or adding details. These findings provide key insights for improving retrieval-augmented generation and context-aware LLMs. Our code is available at https://github.com/liyp0095/ContextFaithful. 2025.findings-acl.247 @@ -9862,7 +9862,7 @@ CanQinSalesForce.com HaoyiQiuUCLA Computer Science Department, University of California, Los Angeles PhilippeLabanMicrosoft - ShafiqJotyNanyang Technological University and SalesForce.com + ShafiqJotyNanyang Technological University and SalesForce.com CaimingXiongSalesforce Research Chien-ShengWuSalesforce AI 4830-4843 @@ -9923,7 +9923,7 @@ SoyeongJeongKorea Advanced Institute of Science & Technology HoyunSongKorea Advanced Institute of Science & Technology SeungYoonHan - Jong C.ParkKorea Advanced Institute of Science and Technology + Jong C.ParkKorea Advanced Institute of Science and Technology 4895-4924 We introduce EXIT, an extractive context compression framework that enhances both the effectiveness and efficiency of retrieval-augmented generation (RAG) in question answering (QA). Current RAG systems often struggle when retrieval models fail to rank the most relevant documents, leading to the inclusion of more context at the expense of latency and accuracy. While abstractive compression methods can drastically reduce token counts, their token-by-token generation process significantly increases end-to-end latency. Conversely, existing extractive methods reduce the latency but rely on independent, non-adaptive sentence selection, failing to fully utilize contextual information. EXIT addresses these limitations by classifying sentences from retrieved documents—while preserving their contextual dependencies—enabling parallelizable, context-aware extraction that adapts to query complexity and retrieval quality. Our evaluations on both single-hop and multi-hop QA tasks show that EXIT consistently surpasses existing compression methods and even uncompressed baselines in QA accuracy, while also delivering substantial reductions in inference time and token count. By improving both effectiveness and efficiency, EXIT provides a promising direction for developing scalable, high-quality QA solutions in RAG pipelines. Our code is available at https://github.com/ThisIsHwang/EXIT. 2025.findings-acl.253 @@ -9958,7 +9958,7 @@ YanhaoJiaNanyang Technological University MeihuiziJiaNorthwest Normal University Lanzhou FengYichao - Anh TuanLuuNanyang Technological University + Anh TuanLuuNanyang Technological University 4937-4952 Parameter-efficient fine-tuning (PEFT) can bridge the gap between large language models (LLMs) and downstream tasks. However, PEFT has been proven vulnerable to malicious attacks. Research indicates that poisoned LLMs, even after PEFT, retain the capability to activate internalized backdoors when input samples contain predefined triggers. In this paper, we introduce a novel weak-to-strong unlearning algorithm to defend against backdoor attacks based on feature alignment knowledge distillation, named W2SDefense. Specifically, we first train a small-scale language model through full-parameter fine-tuning to serve as the clean teacher model. Then, this teacher model guides the large-scale poisoned student model in unlearning the backdoor, leveraging PEFT. Theoretical analysis suggests that W2SDefense has the potential to enhance the student model’s ability to unlearn backdoor features, preventing the activation of the backdoor. We conduct comprehensive experiments on three state-of-the-art large language models and several different backdoor attack algorithms. Our empirical results demonstrate the outstanding performance of W2SDefense in defending against backdoor attacks without compromising model performance. 2025.findings-acl.255 @@ -9971,7 +9971,7 @@ GuoyinWangAlibaba Group YizhongWangDepartment of Computer Science, University of Washington JiweiLiZhejiang University - EduardHovyUniversity of Melbourne and Carnegie Mellon University + EduardHovyUniversity of Melbourne and Carnegie Mellon University ChenGuo 4953-4967 Packing, initially utilized in the pre-training phase, is an optimization technique designed to maximize hardware resource efficiency by combining different training sequences to fit the model’s maximum input length. Although it has demonstrated effectiveness during pre-training, there remains a lack of comprehensive analysis for the supervised fine-tuning (SFT) stage on the following points: (1) whether packing can effectively enhance training efficiency while maintaining performance, (2) the suitable size of the model and dataset for fine-tuning with the packing method, and (3) whether packing unrelated or related training samples might cause the model to either excessively disregard or over-rely on the context.In this paper, we perform extensive comparisons between SFT methods using padding and packing, covering SFT datasets ranging from 69K to 1.2M and models from 8B to 70B. This provides the first comprehensive analysis of the advantages and limitations of packing versus padding, as well as practical considerations for implementing packing in various training scenarios. Our analysis covers various benchmarks, including knowledge, reasoning, and coding, as well as GPT-based evaluations, time efficiency, and other fine-tuning parameters. We also open-source our code for fine-tuning and evaluation and provide checkpoints fine-tuned on datasets of different sizes, aiming to advance future research on packing methods. @@ -10067,7 +10067,7 @@ KepingBiChinese Academy of Sciences WeiChenChinese Academy of Sciences JiafengGuoInstitute of Computing Technolgy, Chinese Academy of Sciences - XueqiChengInstitute of Computing Technology, Chinese Academy + XueqiChengInstitute of Computing Technology, Chinese Academy 5081-5097 As large language models (LLMs) become an important way of information access, there have been increasing concerns that LLMs may intensify the spread of unethical content, including implicit bias that hurts certain populations without explicit harmful words. In this paper, we conduct a rigorous evaluation of LLMs’ implicit bias towards certain demographics by attacking them from a psychometric perspective to elicit agreements to biased viewpoints. Inspired by psychometric principles in cognitive and social psychology, we propose three attack approaches, i.e., Disguise, Deception, and Teaching. Incorporating the corresponding attack instructions, we built two benchmarks: (1) a bilingual dataset with biased statements covering four bias types (2.7K instances) for extensive comparative analysis, and (2) BUMBLE, a larger benchmark spanning nine common bias types (12.7K instances) for comprehensive evaluation. Extensive evaluation of popular commercial and open-source LLMs shows that our methods can elicit LLMs’ inner bias more effectively than competitive baselines. Our attack methodology and benchmarks offer an effective means of assessing the ethical risks of LLMs, driving progress toward greater accountability in their development. 2025.findings-acl.263 @@ -10214,7 +10214,7 @@ HaoWangGoogle SichengZhou WenbingHuangRenmin University of China - YangLiu + YangLiu 5247-5270 The rapid advancement of large language models (LLMs) has spurred significant interest in tool learning, where LLMs are augmented with external tools to tackle complex tasks. However, existing tool environments face challenges in balancing stability, scale, and realism, particularly for benchmarking purposes. To address this, we propose MirrorAPI, a novel framework that trains specialized LLMs to accurately simulate real API responses, effectively acting as “mirrors” to tool environments. Using a comprehensive dataset of request-response pairs from 7,000+ APIs, we employ supervised fine-tuning and chain-of-thought reasoning to enhance simulation fidelity. MirrorAPI achieves superior accuracy and stability compared to state-of-the-art methods, as demonstrated by its performance on the newly constructed MirrorAPI-Bench and its integration into StableToolBench. 2025.findings-acl.273 @@ -10244,7 +10244,7 @@ Chain of Methodologies: Scaling Test Time Computation without Training - CongLiu + CongLiu JieWuTemple University WeigangWuSUN YAT-SEN UNIVERSITY XuChenSUN YAT-SEN UNIVERSITY @@ -10316,7 +10316,7 @@ YulanHeKing’s College London, University of London HuiWang YueYu - Kam-FaiWongThe Chinese University of Hong Kong + Kam-FaiWongThe Chinese University of Hong Kong BinLiangThe Chinese University of Hong Kong RuifengXuHarbin Institute of Technology 5377-5398 @@ -10366,7 +10366,7 @@ NanHuSoutheast University ZemingLiu Jeff Z.PanUniversity of Edinburgh, University of Edinburgh - Kam-FaiWongThe Chinese University of Hong Kong + Kam-FaiWongThe Chinese University of Hong Kong 5433-5453 Existing benchmarks that assess Language Models (LMs) as Language Agents (LAs) for tool use primarily focus on stateless, single-turn interactions or partial evaluations, such as tool selection in a single turn, overlooking the inherent stateful nature of interactions in multi-turn applications. To fulfill this gap, we propose DialogTool, a multi-turn dialogue dataset with stateful tool interactions considering the whole life cycle of tool use, across six key tasks in three stages: 1) tool creation; 2) tool utilization: tool awareness, tool selection, tool execution; and 3) role-consistent response: response generation and role play. Furthermore, we build VirtualMobile – an embodied virtual mobile evaluation environment to simulate API calls and assess the robustness of the created APIs. Taking advantage of these artifacts, we conduct comprehensive evaluation on 13 distinct open- and closed-source LLMs and provide detailed analysis at each stage, revealing that the existing state-of-the-art LLMs still cannot perform well to use tools over long horizons . 2025.findings-acl.284 @@ -10416,7 +10416,7 @@ Multimodal Causal Reasoning Benchmark: Challenging Multimodal Large Language Models to Discern Causal Links Across Modalities ZhiyuanLi - HengWangSony R&D and University of Sydney, University of Sydney + HengWangSony R&D and University of Sydney, University of Sydney DongnanLiuUniversity of Sydney ChaoyiZhangThe University of Sydney AoMa @@ -10458,7 +10458,7 @@ ShijueHuang Jeff Z.PanUniversity of Edinburgh, University of Edinburgh ZemingLiu - Kam-FaiWongThe Chinese University of Hong Kong + Kam-FaiWongThe Chinese University of Hong Kong 5578-5596 Inference-time scaling has attracted much attention which significantly enhance the performance of Large Language Models (LLMs) in complex reasoning tasks by increasing the length of Chain-of-Thought. These longer intermediate reasoning rationales embody various meta-reasoning skills in human cognition such as reflection and decomposition, being difficult to create and acquire. In this work, we introduce Self-Reasoning Language Model (SRLM), where the model itself can synthesize longer CoT data and iteratively improve performance through self-training. By incorporating a few demonstration examples (i.e., 1,000 samples) on how to unfold hidden reasoning chains from existing responses, which act as a reasoning catalyst, we demonstrate that SRLM not only enhances the model’s initial performance but also ensures more stable and consistent improvements in subsequent iterations. Our proposed SRLM achieves an average absolute improvement of more than +2.5 points across five reasoning tasks: MMLU, GSM8K, ARC-C, HellaSwag, and BBH on two backbone models. Moreover, it brings more improvements with more times of sampling during inference, such as absolute +7.89 average improvement with 64 sampling times, revealing the in-depth, diverse and creative reasoning paths in SRLM against the strong baseline . 2025.findings-acl.291 @@ -10470,7 +10470,7 @@ YongsenZhengNanyang Technological University MingjieQianSUN YAT-SEN UNIVERSITY GuohuaWangSouth China Agricultural University - YangLiuSUN YAT-SEN UNIVERSITY + YangLiuSUN YAT-SEN UNIVERSITY ZiliangChen MingzhiMao LiangLinSUN YAT-SEN UNIVERSITY @@ -10540,7 +10540,7 @@ JingyanZhou YipengZhang HaitaoMiTencent AI Lab - Helen M.MengThe Chinese University of Hong Kong + Helen M.MengThe Chinese University of Hong Kong 5688-5724 Large language models (LLMs) often struggle to provide up-to-date information due to their one-time training and the constantly evolving nature of the world. To keep LLMs current, existing approaches typically involve continued pre-training on new documents. However, they frequently face difficulties in extracting stored knowledge. Motivated by the remarkable success of the Feynman Technique in efficient human learning, we introduce Self-Tuning, a learning framework aimed at improving an LLM’s ability to effectively acquire new knowledge from unseen raw documents through self-teaching. Specifically, we develop a Self-Teaching strategy that augments the documents with a set of knowledge-intensive tasks created in a self-supervised manner, focusing on three crucial aspects: memorization, comprehension, and self-reflection. Additionally, we introduce three Wiki-Newpages-2023-QA datasets to facilitate an in-depth analysis of an LLM’s knowledge acquisition ability concerning memorization, extraction, and reasoning. Extensive experimental results on various models, e.g., Llama2-7B reveal that Self-Tuning consistently exhibits superior performance across all knowledge acquisition tasks and excels in preserving previous knowledge. 2025.findings-acl.297 @@ -10568,7 +10568,7 @@ Memory or Reasoning? Explore How <fixed-case>LLM</fixed-case>s Compute Mixed Arithmetic Expressions ChengzhiLiBeijing Institute of Technology - HeyanHuangBeijing Institute of Technology + HeyanHuangBeijing Institute of Technology PingJianBeijing Institute of Technology ZhenYang ChenxuWang @@ -10684,9 +10684,9 @@ HongliZhou YingqiQu JingLiuBaidu - MuyunYang + MuyunYang BingXu - TiejunZhaoHarbin Institute of Technology + TiejunZhaoHarbin Institute of Technology 5880-5895 Recently, there has been a growing trend of utilizing Large Language Model (LLM) to evaluate the quality of other LLMs. Many studies have fine-tuned judge models based on open-source LLMs for evaluation. While the fine-tuned judge models are claimed to achieve comparable evaluation capability with GPT-4, in this work, we conduct an empirical study of LLM-as-a-Judge. Our findings indicate that although the fine-tuned judge models achieve high performance on in-domain test sets, even surpassing GPT-4, they underperform GPT-4 across several dimensions, including generalizability, fairness and adaptability. We also reveal that the fine-tuned judge model inherently operates as a task-specific classifier, consequently imposing the limitations. 2025.findings-acl.306 @@ -10766,7 +10766,7 @@ XiaoqingZhengFudan University DiYin XingSunTencent YouTu Lab - XuanjingHuangFudan University + XuanjingHuangFudan University 5983-6005 Role-Playing Agents (RPAs) have shown remarkable performance in various applications, yet they often struggle to recognize and appropriately respond to hard queries that conflict with their role-play knowledge. To investigate RPAs’ performance when faced with different types of conflicting requests, we develop an evaluation benchmark that includes contextual knowledge conflicting requests, parametric knowledge conflicting requests, and non-conflicting requests to assess RPAs’ ability to identify conflicts and refuse to answer appropriately without over-refusing. Through extensive evaluation, we find that most RPAs behave significant performance gaps toward different conflict requests. To elucidate the reasons, we conduct an in-depth representation-level analysis of RPAs under various conflict scenarios. Our findings reveal the existence of rejection regions and direct response regions within the model’s forwarding representation, and thus influence the RPA’s final response behavior. Therefore, we introduce a lightweight representation editing approach that conveniently shifts conflicting requests to the rejection region, thereby enhancing the model’s refusal accuracy. The extensive experiments validate the effectiveness of our editing method, improving RPAs’ refusal ability of conflicting requests while maintaining their general role-playing capabilities. 2025.findings-acl.311 @@ -10840,7 +10840,7 @@ HuiqiangJiangMicrosoft XufangLuoMicrosoft Research QianhuiWuMicrosoft - Chin-YewLinMicrosoft + Chin-YewLinMicrosoft DongshengLiMicrosoft Research Asia YuqingYangResearch, Microsoft YongfengHuangTsinghua University, Tsinghua University @@ -10942,7 +10942,7 @@ Blessing of Multilinguality: A Systematic Analysis of Multilingual In-Context Learning YileiTu AndrewXue - FredaShiUniversity of Waterloo and Vector Institute + FredaShiUniversity of Waterloo and Vector Institute 6213-6248 While multilingual large language models generally perform adequately, and sometimes even rival English performance on high-resource languages (HRLs), they often significantly underperform on low-resource languages (LRLs). Among several prompting strategies aiming at bridging the gap, multilingual in-context learning (ICL) has been particularly effective when demonstration in target languages is unavailable. However, there lacks a systematic understanding when and why it works well.In this work, we systematically analyze multilingual ICL, using demonstrations in HRLs to enhance cross-lingual transfer. We show that demonstrations in mixed HRLs consistently outperform English-only ones across the board, particularly for tasks written in LRLs. Surprisingly, our ablation study show that the presence of irrelevant non-English sentences in the prompt yields measurable gains, suggesting the effectiveness of multilingual exposure itself. Our results highlight the potential of strategically leveraging multilingual resources to bridge the performance gap for underrepresented languages. 2025.findings-acl.323 @@ -10967,7 +10967,7 @@ ZongYuWangMeituan XuezhiCaoMeituan XunliangCaiMeituan - JiajunChenNanjing University + JiajunChenNanjing University ShujianHuangNanjing University 6279-6299 Large Language Models (LLMs) have shown impressive capabilities across various tasks but remain vulnerable to meticulously crafted jailbreak attacks. In this paper, we identify a critical safety gap: while LLMs are adept at detecting jailbreak prompts, they often produce unsafe responses when directly processing these inputs. Inspired by this insight, we propose SAGE(Self-Aware Guard Enhancement), a training-free defense strategy designed to align LLMs’ strong safety discrimination performance with their relatively weaker safety generation ability. SAGE consists of two core components: a Discriminative Analysis Module and a Discriminative Response Module, enhancing resilience against sophisticated jailbreak attempts through flexible safety discrimination instructions. Extensive experiments demonstrate SAGE’s effectiveness and robustness across various open-source and closed-source LLMs of different sizes and architectures, achieving an average 99% defense success rate against numerous complex and covert jailbreak methods while maintaining helpfulness on general benchmarks. We further conduct mechanistic interpretability analysis through hidden states and attention distributions, revealing the underlying mechanisms of this detection-generation discrepancy. Our work thus contributes to developing future LLMs with coherent safety awareness and generation behavior. Our code and datasets are publicly available at https://github.com/NJUNLP/SAGE. @@ -10984,7 +10984,7 @@ ArindamMitraResearch, Microsoft SpencerWhiteheadMicrosoft YuSuOhio State University - Ahmed HassanAwadallahMicrosoft Research + Ahmed HassanAwadallahMicrosoft Research 6300-6323 Recent success in large multimodal models (LMMs) has sparked promising applications of agents capable of autonomously completing complex web tasks. While open-source LMM agents have made significant advances in offline evaluation benchmarks, their performance still falls substantially short of human-level capabilities in more realistic online settings. A key bottleneck is the lack of diverse and large-scale trajectory-level datasets across various domains, which are expensive to collect. In this paper, we address this challenge by developing a scalable recipe to synthesize the largest and most diverse trajectory-level dataset to date, containing over 94K successful multimodal web trajectories, spanning 49K unique URLs, 720K screenshots, and 33M web elements. In particular, we leverage extensive web exploration and refinement to obtain diverse task intents. The average cost is 28 cents per successful trajectory, making it affordable to a wide range of users in the community. Leveraging this dataset, we train Explorer, a multimodal web agent, and demonstrate strong performance on both offline and online web agent benchmarks such as Mind2Web-Live, Multimodal-Mind2Web, and MiniWob++. Additionally, our experiments highlight data scaling as a key driver for improving web agent capabilities. We hope this study makes state-of-the-art LMM-based agent research at a larger scale more accessible. 2025.findings-acl.326 @@ -11010,7 +11010,7 @@ YuhaoDan JieZhou QinChenEast China Normal University - JunfengTianXiaohongshu + JunfengTianXiaohongshu LiangHeEast China Normal University 6342-6362 Personalized large language models (LLMs) have attracted great attention in many applications, such as emotional support and role-playing. However, existing works primarily focus on modeling explicit character profiles, while ignoring the underlying personality traits that truly shape behaviors and decision-making, hampering the development of more anthropomorphic and psychologically-grounded AI systems. In this paper, we explore the modeling of Big Five personality traits, which is the most widely used trait theory in psychology, and propose P-React, a mixture of experts (MoE)-based personalized LLM. Particularly, we integrate a Personality Specialization Loss (PSL) to better capture individual trait expressions, providing a more nuanced and psychologically grounded personality simulacrum. To facilitate research in this field, we curate OCEAN-Chat, a high-quality, human-verified dataset designed to train LLMs in expressing personality traits across diverse topics. Extensive experiments demonstrate the effectiveness of P-React in maintaining consistent and real personality. @@ -11038,7 +11038,7 @@ Streamlining the Collaborative Chain of Models into A Single Forward Pass in Generation-Based Tasks YuanjieLyu - ChaoZhang + ChaoZhang YuhaoChen YongChen TongXuUniversity of Science and Technology of China @@ -11065,7 +11065,7 @@ Beyond Reactive Safety: Risk-Aware <fixed-case>LLM</fixed-case> Alignment via Long-Horizon Simulation ChenkaiSunUniversity of Illinois Urbana Champaign DenghuiZhangStevens Institute of Technology - ChengXiangZhaiUniversity of Illinois, Urbana Champaign + ChengXiangZhaiUniversity of Illinois, Urbana Champaign HengJiUniversity of Illinois, Urbana-Champaign 6422-6434 Given the growing influence of language model-based agents on high-stakes societal decisions, from public policy to healthcare, ensuring their beneficial impact requires understanding the far-reaching implications of their suggestions. We propose a proof-of-concept framework that projects how model-generated advice could propagate through societal systems on a macroscopic scale over time, enabling more robust alignment. To assess the long-term safety awareness of language models, we also introduce a dataset of 100 indirect harm scenarios, testing models’ ability to foresee adverse, non-obvious outcomes from seemingly harmless user prompts. Our approach achieves not only over 20% improvement on the new dataset but also an average win rate exceeding 70% against strong baselines on existing safety benchmarks (AdvBench, SafeRLHF, WildGuardMix), suggesting a promising direction for safer agents. @@ -11221,7 +11221,7 @@ ChengchengHan JinxinShiEast China Normal University WenjunCui - XinZhaoTsinghua University, Tsinghua University + XinZhaoTsinghua University, Tsinghua University XingjiaoWuEast China Normal University JiabaoZhaoDonghua University, Shanghai 6575-6602 @@ -11532,7 +11532,7 @@ <fixed-case>K</fixed-case>od<fixed-case>C</fixed-case>ode: A Diverse, Challenging, and Verifiable Synthetic Dataset for Coding ZhangchenXu - YangLiu + YangLiu YueqinYinUniversity of Texas at Austin MingyuanZhouGoogle and The University of Texas at Austin RadhaPoovendranUniversity of Washington, Seattle @@ -11632,7 +11632,7 @@ LuXiangInstitute of automation, Chinese academy of science, Chinese Academy of Sciences YangZhaoInstitute of automation, Chinese academy of science, Chinese Academy of Sciences YuZhouInstitute of Automation, Chinese Academy of Sciences - ChengqingZongInstitute of automation, Chinese academy of science, Chinese Academy of Sciences + ChengqingZongInstitute of automation, Chinese academy of science, Chinese Academy of Sciences 7138-7149 Document Image Translation (DIT), which aims at translating documents in images from source language to the target, plays an important role in Document Intelligence. It requires a comprehensive understanding of document multi-modalities and a focused concentration on relevant textual regions during translation. However, most existing methods usually rely on the vanilla encoder-decoder paradigm, severely losing concentration on key regions that are especially crucial for complex-layout document translation. To tackle this issue, in this paper, we propose a new Query-Response DIT framework (QRDIT). QRDIT reformulates the DIT task into a parallel response/translation process of the multiple queries (i.e., relevant source texts), explicitly centralizing its focus toward the most relevant textual regions to ensure translation accuracy. A novel dynamic aggregation mechanism is also designed to enhance the text semantics in query features toward translation. Extensive experiments in four translation directions on three benchmarks demonstrate its state-of-the-art performance, showing significant translation quality improvements toward whole-page complex-layout document images. 2025.findings-acl.372 @@ -11657,7 +11657,7 @@ A General Knowledge Injection Framework for <fixed-case>ICD</fixed-case> Coding XuZhang - KunZhang + KunZhang WenxinMa RongshengWangUniversity of Science and Technology of China ChenxuWu @@ -11917,7 +11917,7 @@ SongyangZhangShanghai Artificial Intelligence Laboratory DahuaLinThe Chinese University of Hong Kong LijunWuShanghai Artificial Intelligence Laboratory - GáborPrószékyHungarian Research Centre for Linguistics, Pazmany Peter Catholic University and MorphoLogic + GáborPrószékyHungarian Research Centre for Linguistics, Pazmany Peter Catholic University and MorphoLogic ConghuiHeShanghai AI Lab 7464-7520 We introduce OpenHuEval, the first benchmark for LLMs focusing on the Hungarian language and specifics. OpenHuEval is constructed from a vast collection of Hungarian-specific materials sourced from multiple origins. In the construction, we incorporated the latest design principles for evaluating LLMs, such as using real user queries from the internet, emphasizing the assessment of LLMs’ generative capabilities, and employing LLM-as-judge to enhance the multidimensionality and accuracy of evaluations. Ultimately, OpenHuEval encompasses eight Hungarian-specific dimensions, featuring five tasks and 3953 questions. Consequently, OpenHuEval provides the comprehensive, in-depth, and scientifically accurate assessment of LLM performance in the context of the Hungarian language and its specifics. We evaluated current mainstream LLMs, including both traditional LLMs and recently developed Large Reasoning Models. The results demonstrate the significant necessity for evaluation and model optimization tailored to the Hungarian language and specifics. We also established the framework for analyzing the thinking processes of LRMs with OpenHuEval, revealing intrinsic patterns and mechanisms of these models in non-English languages, with Hungarian serving as a representative example. We will release OpenHuEval at https://github.com/opendatalab/OpenHuEval . @@ -11955,7 +11955,7 @@ <fixed-case>D</fixed-case>oc<fixed-case>F</fixed-case>usion: A Unified Framework for Document Parsing Tasks MingxuChai ZiyuShen - ChongZhangFudan University + ChongZhangFudan University YueZhang XiaoWang ShihanDou @@ -12001,7 +12001,7 @@ QuanyuLong JiandaChenNanyang Technological University ZhengyuanLiuI2R - Nancy F.Chen + Nancy F.Chen WenyaWangNanyang Technological University Sinno JialinPanThe Chinese University of Hong Kong 7633-7651 @@ -12019,7 +12019,7 @@ FeifanSongPeking University LonghuiYu TianyuLiu - BaobaoChangPeking University + BaobaoChangPeking University 7652-7665 Long-CoT reasoning combined with reinforcement learning for large language models demonstrates remarkable performance and scalability. However, we observe that the initial policy model could significantly influence the final performance as well as the token efficiency. Additionally, there is a lack of systematic guidelines for obtaining a better initial policy model. To bridge this gap, we initiate a comprehensive investigation by activating the initial model using a variety of datasets with different data volumes and reasoning patterns. Then, we conduct a thorough analysis and comparison of the RL process for different initial models from the perspectives of upper bounds, diversity, and token efficiency, providing a deeper understanding and insight into the long-CoT RL. Based on our empirical results, we propose a systematic guideline and a novel Re-RFT method for constructing a better RL start point. Our experiment results based on the 14B model surpass the DeepSeek-R1-Distill-Qwen-14B by an average of 4.6%, demonstrating our approach’s effectiveness and superiority. 2025.findings-acl.397 @@ -12028,7 +12028,7 @@ Topic Modeling for Short Texts via Optimal Transport-Based Clustering - TuVuByteDance Inc. + TuVuByteDance Inc. ManhDo TungNguyenHanoi University of Science and Technology Linh NgoVanHanoi University of Science and Technology @@ -12143,7 +12143,7 @@ Understanding the Repeat Curse in Large Language Models from a Feature Perspective JunchiYao - ShuYang + ShuYang JianhuaXu LijieHu MengdiLiKing Abdullah University of Science and Technology @@ -12159,7 +12159,7 @@ HaneulYooKAIST CheonbokParkNAVER SangdooYunNAVER - AliceOhGoogle and Korea Advanced Institute of Science and Technology + AliceOhGoogle and Korea Advanced Institute of Science and Technology HwaranLeeSogang University 7816-7836 Large language models (LLMs) now exhibit near human-level performance in various tasks, but their performance drops drastically after a handful of high-resource languages due to the imbalance in pre-training data. Inspired by the human process of second language acquisition, particularly code-switching—the practice of language alternation in a conversation—we propose code-switching curriculum learning (CSCL) to enhance cross-lingual transfer for LLMs. CSCL mimics the stages of human language learning by progressively training models with a curriculum consisting of 1) token-level code-switching, 2) sentence-level code-switching, and 3) monolingual corpora. Using Qwen 2 as our underlying model, we demonstrate the efficacy of the CSCL in improving language transfer to Korean, achieving significant performance gains compared to monolingual continual pre-training methods. Ablation studies reveal that both token- and sentence-level code-switching significantly enhance cross-lingual transfer and that curriculum learning amplifies these effects. We also extend our findings into various languages, including Japanese (high-resource) and Indonesian (low-resource), and using two additional models (Gemma 2 and Phi 3.5). We further show that CSCL mitigates spurious correlations between language resources and safety alignment, presenting a robust, efficient framework for more equitable language transfer in LLMs. We observe that CSCL is effective for low-resource settings where high-quality, monolingual corpora for language transfer are hardly available. @@ -12212,7 +12212,7 @@ <fixed-case>G</fixed-case>e<fixed-case>NR</fixed-case>e: A <fixed-case>F</fixed-case>rench Gender-Neutral Rewriting System Using Collective Nouns EnzoDoyen - AmaliaTodirascuUniversité de Strasbourg + AmaliaTodirascuUniversité de Strasbourg 7889-7909 A significant portion of the textual data used in the field of Natural Language Processing (NLP) exhibits gender biases, particularly due to the use of masculine generics (masculine words that are supposed to refer to mixed groups of men and women), which can perpetuate and amplify stereotypes. Gender rewriting, an NLP task that involves automatically detecting and replacing gendered forms with neutral or opposite forms (e.g., from masculine to feminine), can be employed to mitigate these biases. While such systems have been developed in a number of languages (English, Arabic, Portuguese, German, French), automatic use of gender neutralization techniques (as opposed to inclusive or gender-switching techniques) has only been studied for English. This paper presents GeNRe, the very first French gender-neutral rewriting system using collective nouns, which are gender-fixed in French. We introduce a rule-based system (RBS) tailored for the French language alongside two fine-tuned language models trained on data generated by our RBS. We also explore the use of instruct-based models to enhance the performance of our other systems and find that Claude 3 Opus combined with our dictionary achieves results close to our RBS. Through this contribution, we hope to promote the advancement of gender bias mitigation techniques in NLP for French. 2025.findings-acl.411 @@ -12284,7 +12284,7 @@ Think More, Hallucinate Less: Mitigating Hallucinations via Dual Process of Fast and Slow Thinking XiaoxueCheng JunyiLi - XinZhaoRenmin University of China + XinZhaoRenmin University of China Ji-RongWenRenmin University of China 7979-7990 Large language models (LLMs) demonstrate exceptional capabilities, yet still face the hallucination issue. Typical text generation approaches adopt an auto-regressive generation without deliberate reasoning, often leading to untrustworthy and factually inaccurate responses. In this paper, we propose HaluSearch, a novel framework that incorporates tree search-based algorithms (e.g., MCTS) to enable an explicit slow thinking generation process for mitigating hallucinations during inference. Specifically, HaluSearch frames text generation as a step-by-step reasoning process, using a self-evaluation reward model to score each generation step and guide the tree search towards the most reliable generation pathway. To balance efficiency and quality, we introduce a hierarchical system switch mechanism, which dynamically switches between fast and slow thinking modes at both instance and step levels. We conduct extensive experiments on both English and Chinese datasets, and the results show that our approach significantly outperforms baseline approaches. @@ -12312,7 +12312,7 @@ YijinLiuWechat AI FandongMengWeChat AI, Tencent Inc. YufengChen - JinanXuBeijing Jiaotong University + JinanXuBeijing Jiaotong University JieZhou 8005-8018 Knowledge Distillation (KD) has emerged as a prominent technique for model compression. However, conventional KD approaches primarily focus on homogeneous architectures with identical tokenizers, constraining their applicability in cross-architecture scenarios. As for the cross-tokenizer KD, the differences in the tokenizers give rise to two fundamental challenges: (1) sequence misalignment caused by divergent tokenization strategies, and (2) mismatched vocabulary size and composition. While existing probability-matching methods attempt to address these issues, their efficacy remains limited due to suboptimal alignment in both the sequence and vocabulary aspects. To overcome these limitations, we propose Contextual Dynamic Mapping (CDM), a novel cross-tokenizer distillation framework that employs contextual information to enhance sequence alignment precision and dynamically improves vocabulary mapping. We evaluated the effectiveness of our approach across five advanced and widely-used model families (i.e,LLama3, Phi3, Gemma2, OPT and Qwen2), which were configured into three distinct teacher-student pairs. Our method shows significant advantages over existing cross-tokenizer distillation baselines across diverse benchmarks, including instruction-following, code generation and math. Notably, our analysis reveals that combining conventional same-tokenizer distillation and cross-tokenizer distillation through CDM yields further performance improvements. @@ -12450,7 +12450,7 @@ QingyaoLi XinyiDai XiangyangLi - WeinanZhang + WeinanZhang YashengWang RuimingTang YongYuShanghai Jiaotong University @@ -12580,7 +12580,7 @@ TommasoGreen FélixGaschiPosos Fabian DavidSchmidt - Simone PaoloPonzettoUniversität Mannheim + Simone PaoloPonzettoUniversität Mannheim GoranGlavašJulius-Maximilians-Universität Würzburg 8342-8369 With Large Language Models (LLMs) becoming increasingly multilingual, effective knowledge editing (KE) needs to propagate edits across languages. Evaluation of the existing methods for cross-lingual knowledge editing (CKE) is limited both w.r.t. edit effectiveness: benchmarks do not account for entity aliases and use faulty entity translations; as well as robustness: existing work fails to report on downstream generation and task-solving abilities of LLMs after editing. In this work, we aim to (i) maximize the effectiveness of CKE while at the same time (ii) minimizing the extent of downstream model collapse due to the edits. To accurately measure the effectiveness of CKE methods, we introduce BabelEdits, a new CKE benchmark covering 60 languages that combines high-quality multilingual synsets from BabelNet with marker-based translation to ensure entity translation quality. Unlike existing CKE benchmarks, BabelEdits accounts for the rich variety of entity aliases within and across languages. We then propose BabelReFT, a modular CKE approach based on representation fine-tuning (ReFT) which learns entity-scope ReFT modules, applying them to all multilingual aliases at inference. Our experimental results show that not only is BabelReFT more effective in CKE than state-of-the-art methods, but, owing to its modular design, much more robust against downstream model collapse when subjected to many sequential edits. @@ -12660,7 +12660,7 @@ YingWenShanghai Jiao Tong University YongYuShanghai Jiaotong University JunWangUniversity College London - WeinanZhang + WeinanZhang 8453-8468 While large language models (LLMs) have significantly advanced mathematical reasoning, Process Reward Models (PRMs) have been developed to evaluate the logical validity of reasoning steps. However, PRMs still struggle with out-of-distribution (OOD) challenges. This paper identifies the OOD issues including step OOD, arising from differences in reasoning patterns across model types and sizes, and question OOD, due to dataset shifts between training and real-world problems. To address these issues, we introduce Retrieval-Augmented Process Reward Model (RetrievalPRM), a novel framework designed to tackle these OOD issues. By utilizing a two-stage retrieval-enhanced mechanism, RetrievalPRM retrieves semantically similar questions and steps for PRM as a warmup to stimulate its potential to judge target steps, improving generalization and reasoning consistency across different models and problem types. Our extensive experiments demonstrate that RetrievalPRM outperforms existing baselines across multiple real-world datasets. Our open-source contributions include a retrieval-enhanced dataset, a tuning framework for PRM training, and the RetreivalPRM model, establishing a new standard for PRM performance. 2025.findings-acl.444 @@ -12893,7 +12893,7 @@ TingyuQuKU Leuven MingxiaoLiKU Leuven JesseDavisKU Leuven - Marie-FrancineMoensKU Leuven, KU Leuven + Marie-FrancineMoensKU Leuven, KU Leuven 8796-8810 Efficiently updating multilingual knowledge in large language models (LLMs) without disrupting coherent factual representations across languages remains a significant challenge. While deploying separate editing systems for each language might seem viable, this approach incurs substantial costs due to the need to manage multiple models. A more efficient solution involves integrating knowledge updates across all languages into a unified model. However, sequential edits across languages often lead to destructive parameter interference, significantly degrading multilingual generalization and the accuracy of injected knowledge. To address this issue, we propose LangEdit, a novel null-space constrained framework designed to precisely isolate language-specific knowledge updates. The core innovation of LangEdit lies in its ability to project parameter updates for each language onto the orthogonal complement of other languages’ subspaces. This approach mathematically guarantees update independence while preserving multilingual generalization capabilities. We conduct a comprehensive evaluation across three model architectures, six languages, and four downstream tasks, demonstrating that LangEdit effectively mitigates parameter interference and outperforms existing state-of-the-art editing methods. Our results highlight its potential for enabling efficient and accurate multilingual knowledge updates in LLMs. 2025.findings-acl.460 @@ -12907,7 +12907,7 @@ JuntaoLi WanfuWang KehaiChenHarbin Institute of Technology (Shenzhen) - QiaomingZhuSoochow University + QiaomingZhuSoochow University MinZhangHarbin Institute of Technology, Shenzhen 8811-8831 As large language models (LLMs) are increasingly applied to complex scientific problem-solving, their effectiveness is often limited by unconscious or failed tool usage. To address this issue, we introduce the Tool-Awareness Training (TAT) method, designed to enhance scientific reasoning. This approach leverages both forward and backward data generation strategies to strengthen the model’s conscious and selective tool utilization in multi-step reasoning tasks. Our method unfolds in three stages: (1) developing tool-knowledge through backward tooluse data generation (2) enhancing tool-awareness in multi-step reasoning by utilizing forward reasoning data, and (3) improving domain adaptability through large-scale domain-specific data for multi-task learning. These three stages progressively establish the foundation for tool learning and scientific reasoning, effectively integrating both, enabling the model to tackle multi-domain scientific tasks while optimizing tool usage. Our experimental results demonstrate that TAT significantly enhances LLM performance in mathematical and scientific reasoning tasks, particularly by improving the model’s tool utilization capabilities, including proactivity and execution success rates. @@ -12969,7 +12969,7 @@ MasahiroKanekoMohamed bin Zayed University of Artificial Intelligence and Tokyo Institute of Technology, Tokyo Institute of Technology YoumiMaInstitute of Science Tokyo YukiWataThe University of Tokyo - NaoakiOkazakiInstitute of Science Tokyo + NaoakiOkazakiInstitute of Science Tokyo 8894-8907 Large Language Models (LLMs) are trained on large-scale web data, which makes it difficult to grasp the contribution of each text. This poses the risk of leaking inappropriate data such as benchmarks, personal information, and copyrighted texts in the training data. Membership Inference Attacks (MIA), which determine whether a given text is included in the model’s training data, have been attracting attention. Previous studies of MIAs revealed that likelihood-based classification is effective for detecting leaks in LLMs. However, the existing likelihood-based methods cannot be applied to some proprietary models like ChatGPT or Claude 3 because the likelihood for input text is unavailable to the user. In this study, we propose a Sampling-based Pseudo-Likelihood (SPL) method for MIA (SaMIA) that calculates SPL using only the text generated by an LLM to detect leaks. The SaMIA treats the target text as the reference text and multiple outputs from the LLM as text samples, calculates the degree of n-gram match as SPL, and determines the membership of the text in the training data. Even without likelihoods, SaMIA performed on par with existing likelihood-based methods. 2025.findings-acl.465 @@ -13107,7 +13107,7 @@ ShuyueGuo TianyuZheng JiaweiGuo01.AI - BoLi + BoLi HaoningWuRhymes AI XingweiQuUniversity of Manchester JianYangAlibaba Group @@ -13129,7 +13129,7 @@ Debate, Reflect, and Distill: Multi-Agent Feedback with Tree-Structured Preference Optimization for Efficient Language Model Enhancement XiaofengZhouBeijing Institute of Technology - HeyanHuangBeijing Institute of Technology + HeyanHuangBeijing Institute of Technology LiziLiaoSingapore Management University 9122-9137 Large Language Models (LLMs) continue to set new standards in knowledge-intensive and complex reasoning tasks, yet their high computational demands limit widespread adoption. While distilling large models into smaller ones offers a sustainable solution, current techniques—such as static knowledge distillation, resource-intensive reinforcement learning from human feedback, or limited self-reflection—struggle to yield substantial and lasting performance gains. In this paper, we present a novel Debate and Reflect (D&R) framework that orchestrates multi-turn debates between smaller models and stronger teacher models, eliciting actionable feedback (e.g., error analysis, corrective strategies) to guide student models. Further, we introduce Tree-structured Direct Preference Optimization (T-DPO) to efficiently leverage these debate logs, organizing interactions into a hierarchical format for effective training. Empirical evaluations across diverse NLP benchmarks demonstrate that our approach significantly improves smaller-model accuracy, robustness, and generalization, outperforming conventional baselines by a large margin. @@ -13152,7 +13152,7 @@ Narrative Media Framing in Political Discourse - YuliaOtmakhovaUniversity of Melbourne + YuliaOtmakhovaUniversity of Melbourne LeaFrermannUniversity of Melbourne 9167-9196 Narrative frames are a powerful way of conceptualizing and communicating complex, controversial ideas, however automated frame analysis to date has mostly overlooked this framing device. In this paper, we connect elements of narrativity with fundamental aspects of framing, and present a framework which formalizes and operationalizes such aspects. We annotate and release a data set of news articles in the climate change domain, analyze the dominance of narrative frame components across political leanings, and test LLMs in their ability to predict narrative frames and their components. Finally, we apply our framework in an unsupervised way to elicit components of narrative framing in a second domain, the COVID-19 crisis, where our predictions are congruent with prior theoretical work showing the generalizability of our approach. @@ -13454,7 +13454,7 @@ <fixed-case>GIMMICK</fixed-case>: Globally Inclusive Multimodal Multitask Cultural Knowledge Benchmarking FlorianSchneider CarolinHoltermannUniversität Hamburg - ChrisBiemannU Hamburg + ChrisBiemannU Hamburg AnneLauscherUniversität Hamburg 9605-9668 Large Vision-Language Models (LVLMs) have recently gained attention due to their distinctive performance and broad applicability. While it has been previously shown that their efficacy in usage scenarios involving non-Western contexts falls short, existing studies are limited in scope, covering just a narrow range of cultures, focusing exclusively on a small number of cultural aspects, or evaluating a limited selection of models on a single task only. Towards globally inclusive LVLM research, we introduce GIMMICK, an extensive multimodal benchmark designed to assess a broad spectrum of cultural knowledge across 144 countries representing six global macro-regions. GIMMICK comprises six tasks built upon three new datasets that span 728 unique cultural events or facets on which we evaluated 20 LVLMs and 11 LLMs, including five proprietary and 26 open-weight models of all sizes. We systematically examine (1) regional cultural biases, (2) the influence of model size, (3) input modalities, and (4) external cues. Our analyses reveal strong biases toward Western cultures across models and tasks and highlight strong correlations between model size and performance, as well as the effectiveness of multimodal input and external geographic cues. We further find that models have more knowledge of tangible than intangible aspects (e.g., food vs. rituals) and that they excel in recognizing broad cultural origins but struggle with a more nuanced understanding. @@ -13486,7 +13486,7 @@ FuwenLuo YileWangShenzhen University PengLiTsinghua University - YangLiu + YangLiu 9686-9704 Large language models (LLMs) have revolutionized the field of natural language processing, enabling remarkable progress in various tasks. Different from objective tasks such as commonsense reasoning and arithmetic question-answering, the performance of LLMs on subjective tasks is still limited, where the perspective on the specific problem plays crucial roles for better interpreting the context and giving proper response. For example, in certain scenarios, LLMs may perform better when answering from an expert role perspective, potentially eliciting their relevant domain knowledge. In contrast, in some scenarios, LLMs may provide more accurate responses when answering from a third-person standpoint, enabling a more comprehensive understanding of the problem and potentially mitigating inherent biases. In this paper, we propose Reasoning through Perspective Transition (RPT), a method based on in-context learning that enables LLMs to dynamically select among direct, role, and third-person perspectives for the best way to solve corresponding subjective problem. Through extensive experiments on totally 12 subjective tasks by using both closed-source and open-source LLMs including GPT-4, GPT-3.5, Llama-3, and Qwen-2, our method outperforms widely used single fixed perspective based methods such as chain-of-thought prompting and expert prompting, highlights the intricate ways that LLMs can adapt their perspectives to provide nuanced and contextually appropriate responses for different problems. 2025.findings-acl.502 @@ -13499,7 +13499,7 @@ YuanzheShen ChangzeLv XiaoqingZhengFudan University - XuanjingHuangFudan University + XuanjingHuangFudan University 9705-9723 The continuous evolution and enhanced reasoning capabilities of large language models (LLMs) have elevated their role in complex tasks, notably in travel planning, where demand for personalized, high-quality itineraries is rising. However, current benchmarks often rely on unrealistic simulated data, failing to reflect the differences between LLM-generated and real-world itineraries. Existing evaluation metrics, which primarily emphasize constraints, fall short of providing a comprehensive assessment of the overall quality of travel plans. To address these limitations, we introduce TripTailor, a benchmark designed specifically for personalized travel planning in real-world scenarios. This dataset features an extensive collection of over 500,000 real-world points of interest (POIs) and nearly 4,000 diverse travel itineraries, complete with detailed information, providing a more authentic evaluation framework. Experiments show that fewer than 10% of the itineraries generated by the latest state-of-the-art LLMs achieve human-level performance. Moreover, we identify several critical challenges in travel planning, including the feasibility, rationality, and personalized customization of the proposed solutions. We hope that TripTailor will drive the development of travel planning agents capable of understanding and meeting user needs while generating practical itineraries. 2025.findings-acl.503 @@ -13523,7 +13523,7 @@ LingweiWeiInstitute of Information Engineering, Chinese Academy of Sciences DouHu WeiZhouInstitute of Information Engeering - Philip S.YuUniversity of Illinois Chicago + Philip S.YuUniversity of Illinois Chicago SonglinHu 9739-9752 The rapid proliferation of fake news across multiple domains poses significant threats to society. Existing multi-domain detection models typically capture domain-shared semantic features to achieve generalized detection. However, they often fail to generalize well due to poor adaptability, which limits their ability to provide complementary features for detection, especially in data-constrained conditions. To address these challenges, we investigate the propagation-adaptive multi-domain fake news detection paradigm. We propose a novel framework, Structure-adaptive Adversarial Contrastive Learning (StruACL), to adaptively enable structure knowledge transfer between multiple domains. Specifically, we first contrast representations between content-only and propagation-rich data to preserve structural patterns in the shared representation space. Additionally, we design a propagation-guided adversarial training strategy to enhance the diversity of representations. Under the StruACL objective, we leverage a unified Transformer-based and graph-based model to jointly learn transferable semantic and structural features for detection across multiple domains. Experiments on seven fake news datasets demonstrate that StruACL-TGN achieves better multi-domain detection performance on general and data-constrained scenarios, showing the effectiveness and better generalization of StruACL. @@ -13556,8 +13556,8 @@ DaniilOrel Zain MuhammadMujahidCopenhagen University FajriKotoMohamed bin Zayed University of Artificial Intelligence - TimothyBaldwinMohamed bin Zayed University of Artificial Intelligence and The University of Melbourne - PreslavNakovMohamed bin Zayed University of Artificial Intelligence + TimothyBaldwinMohamed bin Zayed University of Artificial Intelligence and The University of Melbourne + PreslavNakovMohamed bin Zayed University of Artificial Intelligence 9765-9784 Large language models (LLMs) are known to have the potential to generate harmful content, posing risks to users. While significant progress has been made in developing taxonomies for LLM risks and safety evaluation prompts, most studies have focused on monolingual contexts, primarily in English. However, language- and region-specific risks in bilingual contexts are often overlooked, and core findings can diverge from those in monolingual settings. In this paper, we introduce Qorǵau, a novel dataset specifically designed for safety evaluation in Kazakh and Russian, reflecting the unique bilingual context in Kazakhstan, where both Kazakh (a low-resource language) and Russian (a high-resource language) are spoken. Experiments with both multilingual and language-specific LLMs reveal notable differences in safety performance, emphasizing the need for tailored, region-specific datasets to ensure the responsible and safe deployment of LLMs in countries like Kazakhstan. Warning: this paper contains example data that may be offensive, harmful, or biased. 2025.findings-acl.507 @@ -13600,7 +13600,7 @@ RunChenColumbia University PengyuanShiColumbia University LinAi - JuliaHirschbergColumbia University + JuliaHirschbergColumbia University NatalieSchluterTechnical University of Denmark, Apple and IT University 9820-9831 In this paper, we introduce the Akan Cinematic Emotions (AkaCE) dataset, the first multimodal emotion dialogue dataset for an African language, addressing the significant lack of resources for low-resource languages in emotion recognition research. AkaCE, developed for the Akan language, contains 385 emotion-labeled dialogues and 6162 utterances across audio, visual, and textual modalities, along with word-level prosodic prominence annotations. The presence of prosodic labels in this dataset also makes it the first prosodically annotated African language dataset. We demonstrate the quality and utility of AkaCE through experiments using state-of-the-art emotion recognition methods, establishing solid baselines for future research. We hope AkaCE inspires further work on inclusive, linguistically and culturally diverse NLP resources. @@ -13631,7 +13631,7 @@ ChaoHuang ZonghaoGuo ZhiyuanLiuTsinghua University - JinanXuBeijing Jiaotong University + JinanXuBeijing Jiaotong University YuhuaLiHuazhong University of Science and Technology RuixuanLiHuazhong University of Science and Technology MaosongSunTsinghua University @@ -13687,7 +13687,7 @@ MinhyukKim DongjunKim ChanjunParkKorea University - HeuiseokLim + HeuiseokLim 9916-9926 Automatic Term Extraction (ATE) identifies domain-specific expressions that are crucial for downstream tasks such as machine translation and information retrieval. Although large language models (LLMs) have significantly advanced various NLP tasks, their potential for ATE has scarcely been examined. We propose a retrieval-based prompting strategy that, in the few-shot setting, selects demonstrations according to syntactic rather than semantic similarity. This syntactic retrieval method is domain-agnostic and provides more reliable guidance for capturing term boundaries. We evaluate the approach in both in-domain and cross-domain settings, analyzing how lexical overlap between the query sentence and its retrieved examples affects performance. Experiments on three specialized ATE benchmarks show that syntactic retrieval improves F1-score. These findings highlight the importance of syntactic cues when adapting LLMs to terminology-extraction tasks. 2025.findings-acl.516 @@ -13777,7 +13777,7 @@ ZhongyuJiang TianfangZhangTsinghua University ZongkaiWu - JohnLeeUniversity of Edinburgh, University of Edinburgh + JohnLeeUniversity of Edinburgh, University of Edinburgh Jenq-NengHwang LeiLi 10045-10056 @@ -13802,7 +13802,7 @@ Reasoning Circuits in Language Models: A Mechanistic Interpretation of Syllogistic Inference GeonheeKim MarcoValentinoUniversity of Sheffield - AndreFreitasIdiap Research Institute and University of Manchester + AndreFreitasIdiap Research Institute and University of Manchester 10074-10095 Recent studies on reasoning in language models (LMs) have sparked a debate on whether they can learn systematic inferential principles or merely exploit superficial patterns in the training data. To understand and uncover the mechanisms adopted for formal reasoning in LMs, this paper presents a mechanistic interpretation of syllogistic inference. Specifically, we present a methodology for circuit discovery aimed at interpreting content-independent and formal reasoning mechanisms. Through two distinct intervention methods, we uncover a sufficient and necessary circuit involving middle-term suppression that elucidates how LMs transfer information to derive valid conclusions from premises. Furthermore, we investigate how belief biases manifest in syllogistic inference, finding evidence of partial contamination from additional attention heads responsible for encoding commonsense and contextualized knowledge. Finally, we explore the generalization of the discovered mechanisms across various syllogistic schemes, model sizes and architectures. The identified circuit is sufficient and necessary for syllogistic schemes on which the models achieve high accuracy (\geq 60%), with compatible activation patterns across models of different families. Overall, our findings suggest that LMs learn transferable content-independent reasoning mechanisms, but that, at the same time, such mechanisms do not involve generalizable and abstract logical primitives, being susceptible to contamination by the same world knowledge acquired during pre-training. 2025.findings-acl.525 @@ -13872,7 +13872,7 @@ <fixed-case>M</fixed-case>ulti<fixed-case>H</fixed-case>oax: A Dataset of Multi-hop False-premise questions MohammadaminShafieiUniversity of Milan HamidrezaSaffariPolytechnic Institute of Milan - Nafise SadatMoosaviUniversity of Sheffield + Nafise SadatMoosaviUniversity of Sheffield 10169-10187 As Large Language Models are increasingly deployed in high-stakes domains, their ability to detect false assumptions and reason critically is crucial for ensuring reliable outputs. False-premise questions (FPQs) serve as an important evaluation method by exposing cases where flawed assumptions lead to incorrect responses. While existing benchmarks focus on single-hop FPQs, real-world reasoning often requires multi-hop inference, where models must verify consistency across multiple reasoning steps rather than relying on surface-level cues. To address this gap, we introduce MultiHoax, a benchmark for evaluating LLMs’ ability to handle false premises in complex, multi-step reasoning tasks. Our dataset spans seven countries and ten diverse knowledge categories, using Wikipedia as the primary knowledge source to enable cross-regional factual reasoning. Experiments reveal that state-of-the-art LLMs struggle to detect false premises across different countries, knowledge categories, and multi-hop reasoning types, highlighting the need for improved false premise detection and more robust multi-hop reasoning capabilities in LLMs. 2025.findings-acl.530 @@ -14068,7 +14068,7 @@ Optimized Text Embedding Models and Benchmarks for <fixed-case>A</fixed-case>mharic Passage Retrieval Kidist AmdeMekonnenUniversity of Amsterdam Yosef WorkuAlemnehNeoMatrix Ltd - Maartende Rijke + Maartende Rijke 10428-10445 Neural retrieval methods using transformer-based pre-trained language models have advanced multilingual and cross-lingual retrieval. However, their effectiveness for low-resource, morphologically rich languages such as Amharic remains underexplored due to data scarcity and suboptimal tokenization. We address this gap by introducing Amharic-specific dense retrieval models based on pre-trained Amharic BERT and RoBERTa backbones. Our proposed RoBERTa-Base-Amharic-Embed model (110M parameters) achieves a 17.6% relative improvement in MRR@10 and a 9.86% gain in Recall@10 over the strongest multilingual baseline, Arctic Embed 2.0 (568M parameters). More compact variants, such as RoBERTa-Medium-Amharic-Embed (42M), remain competitive while being over 13\times smaller. Additionally, we train a ColBERT-based late interaction retrieval model that achieves the highest MRR@10 score (0.843) among all evaluated models. We benchmark our proposed models against both sparse and dense retrieval baselines to systematically assess retrieval effectiveness in Amharic. Our analysis highlights key challenges in low-resource settings and underscores the importance of language-specific adaptation. To foster future research in low-resource IR, we publicly release our dataset, codebase, and trained models at https://github.com/kidist-amde/amharic-ir-benchmarks. 2025.findings-acl.543 @@ -14164,7 +14164,7 @@ <fixed-case>C</fixed-case>o<fixed-case>D</fixed-case>et-M4: Detecting Machine-Generated Code in Multi-Lingual, Multi-Generator and Multi-Domain Settings DaniilOrel DilshodAzizov - PreslavNakovMohamed bin Zayed University of Artificial Intelligence + PreslavNakovMohamed bin Zayed University of Artificial Intelligence 10570-10593 Large Language Models (LLMs) have revolutionized code generation, automating programming with remarkable efficiency. However, this has had important consequences for programming skills, ethics, and assessment integrity, thus making the detection of LLM-generated code essential for maintaining accountability and standards. While, there has been some previous research on this problem, it generally lacks domain coverage and robustness, and only covers a small number of programming languages. Here, we aim to bridge this gap. In particular, we propose a framework capable of distinguishing between human-written and LLM-generated program code across multiple programming languages, code generators, and domains. We use a large-scale dataset from renowned platforms and LLM-based code generators, alongside applying rigorous data quality checks, feature engineering, and comparative analysis of traditional machine learning models, pre-trained language models (PLMs), and LLMs for code detection. We perform an evaluation on out-of-domain scenarios, such as detecting authorship and hybrid authorship of generated code and generalizing to unseen models, domains, and programming languages. Our extensive experiments show that our framework effectively distinguishes human-written from LLM-generated program code, setting a new benchmark for the task. 2025.findings-acl.550 @@ -14245,7 +14245,7 @@ <fixed-case>LCFO</fixed-case>: Long Context and Long Form Output Dataset and Benchmarking - Marta R.Costa-jussàMeta + Marta R.Costa-jussàMeta PierreAndrews Mariano CoriaMeglioliMeta JoyChenGeorgia Institute of Technology and Facebook @@ -14322,7 +14322,7 @@ SinanKurtyigit DiegoFrassinelliLudwig-Maximilians-Universität München CarinaSilberer - SabineSchulte Im WaldeUniversity of Stuttgart + SabineSchulte Im WaldeUniversity of Stuttgart 10766-10776 We explore the role of the visual modality and of vision transformers in predicting the compositionality of English noun compounds. Crucially, we contribute a framework to address the challenge of obtaining adequate images that represent non-compositional compounds (such as “couch potato”), making it relevant for any image-based approach targeting figurative language. Our method uses prompting strategies and diffusion models to generate images. Comparing and combining our approach with a state-of-the-art text-based approach reveals complementary contributions regarding features as well as degrees of abstractness in compounds. 2025.findings-acl.561 @@ -14334,7 +14334,7 @@ BeiduoChenLudwig-Maximilians-Universität München SiyaoPengLudwig-Maximilians-Universität München AnnaKorhonenUniversity of Cambridge - BarbaraPlankLudwig-Maximilians-Universität München + BarbaraPlankLudwig-Maximilians-Universität München 10777-10802 Disagreement in human labeling is ubiquitous, and can be captured in human judgment distributions (HJDs). Recent research has shown that explanations provide valuable information for understanding human label variation (HLV) and large language models (LLMs) can approximate HJD from a few human-provided label-explanation pairs. However, collecting explanations for every label is still time-consuming. This paper examines whether LLMs can be used to replace humans in generating explanations for approximating HJD. Specifically, we use LLMs as annotators to generate model explanations for a few given human labels. We test ways to obtain and combine these label-explanations with the goal to approximate human judgment distributions. We further compare the resulting human with model-generated explanations, and test automatic and human explanation selection. Our experiments show that LLM explanations are promising for NLI: to estimate HJDs, generated explanations yield comparable results to human’s when provided with human labels. Importantly, our results generalize from datasets with human explanations to i) datasets where they are not available and ii) challenging out-of-distribution test sets. 2025.findings-acl.562 @@ -14386,7 +14386,7 @@ Multi-word Measures: Modeling Semantic Change in Compound Nouns ChrisJenkinsUniversity of Stuttgart, Universität Stuttgart FilipMiletićUniversity of Stuttgart - SabineSchulte Im WaldeUniversity of Stuttgart + SabineSchulte Im WaldeUniversity of Stuttgart 10850-10864 Compound words (e.g. shower thought) provide a multifaceted challenge for diachronic models of semantic change. Datasets describing noun compound semantics tend to describe only the predominant sense of a compound, which is limiting, especially in diachronic settings where senses may shift over time. We create a novel dataset of relatedness judgements of noun compounds in English and German, the first to capture diachronic meaning changes for multi-word expressions without prematurely condensing individual senses into an aggregate value. Furthermore, we introduce a novel, sense-targeting approach for noun compounds that evaluates two contrasting vector representations in their ability to cluster example sentence pairs. Our clustering approach targets both noun compounds and their constituent parts, to model the interdependence of these terms over time. We calculate time-delineated distributions of these clusters and compare them against measures of semantic change aggregated from the human relatedness annotations. 2025.findings-acl.566 @@ -14426,7 +14426,7 @@ 2<fixed-case>M</fixed-case>-<fixed-case>BELEBELE</fixed-case>: Highly Multilingual Speech and <fixed-case>A</fixed-case>merican <fixed-case>S</fixed-case>ign <fixed-case>L</fixed-case>anguage Comprehension Dataset Download <fixed-case>PDF</fixed-case> - Marta R.Costa-jussàMeta + Marta R.Costa-jussàMeta BokaiYuMeta AI PierreAndrews BelenAlastruey @@ -14504,7 +14504,7 @@ WentaoShi ZhuoLiu FuliFengUniversity of Science and Technology of China - Tat-SengChuaNational University of Singapore + Tat-SengChuaNational University of Singapore 11010-11031 Multi-Objective Alignment (MOA) aims to align LLMs’ responses with multiple human preference objectives, with Direct Preference Optimization (DPO) emerging as a prominent approach. However, we find that DPO-based MOA approaches suffer from widespread preference conflicts in the data, where different objectives favor different responses. This results in conflicting optimization directions, hindering the optimization on the Pareto Front. To address this, we propose to construct Pareto-optimal responses to resolve preference conflicts. To efficiently obtain and utilize such responses, we propose a self-improving DPO framework that enables LLMs to self-generate and select Pareto-optimal responses for self-supervised preference alignment. Extensive experiments on two datasets demonstrate the superior Pareto Front achieved by our framework compared to various baselines 2025.findings-acl.574 @@ -14542,7 +14542,7 @@ Beyond Browsing: <fixed-case>API</fixed-case>-Based Web Agents YueqiSong - Frank F.Xu + Frank F.Xu ShuyanZhou GrahamNeubigCarnegie Mellon University 11066-11085 @@ -14582,7 +14582,7 @@ WeiZhang XinyueWang XiaojunJiaNanyang Technological University - YangLiuNanyang Technological University + YangLiuNanyang Technological University SenSuBeijing University of Posts and Telecommunications 11128-11150 Large Language Models (LLMs) have demonstrated remarkable performance across diverse tasks yet still are vulnerable to external threats, particularly LLM Denial-of-Service (LLM-DoS) attacks. Specifically, LLM-DoS attacks aim to exhaust computational resources and block services. However, existing studies predominantly focus on white-box attacks, leaving black-box scenarios underexplored. In this paper, we introduce Auto-Generation for LLM-DoS (\textbf{AutoDoS}) attack, an automated algorithm designed for black-box LLMs. AutoDoS constructs the DoS Attack Tree and expands the node coverage to achieve effectiveness under black-box conditions. By transferability-driven iterative optimization, AutoDoS could work across different models in one prompt.Furthermore, we reveal that embedding the Length Trojan allows AutoDoS to bypass existing defenses more effectively.Experimental results show that AutoDoS significantly amplifies service response latency by over \textbf{250}\times\uparrow, leading to severe resource consumption in terms of GPU utilization and memory usage. Our work provides a new perspective on LLM-DoS attacks and security defenses. @@ -14647,7 +14647,7 @@ JihoJinKorea Advanced Institute of Science and Technology WoosungKangKorea Advanced Institute of Science & Technology JunhoMyungKorea Advanced Institute of Science and Technology - AliceOhGoogle and Korea Advanced Institute of Science and Technology + AliceOhGoogle and Korea Advanced Institute of Science and Technology 11215-11228 Measuring social bias in large language models (LLMs) is crucial, but existing bias evaluation methods struggle to assess bias in long-form generation. We propose a Bias Benchmark for Generation (BBG), an adaptation of the Bias Benchmark for QA (BBQ), designed to evaluate social bias in long-form generation by having LLMs generate continuations of story prompts. Building our benchmark in English and Korean, we measure the probability of neutral and biased generations across ten LLMs. We also compare our long-form story generation evaluation results with multiple-choice BBQ evaluation, showing that the two approaches produce inconsistent results. 2025.findings-acl.585 @@ -15065,8 +15065,8 @@ Fast-and-Frugal Text-Graph Transformers are Effective Link Predictors Andrei CatalinComan ChristosTheodoropoulos - Marie-FrancineMoensKU Leuven, KU Leuven - JamesHendersonIdiap Research Institute + Marie-FrancineMoensKU Leuven, KU Leuven + JamesHendersonIdiap Research Institute 11828-11841 We propose Fast-and-Frugal Text-Graph (FnF-TG) Transformers, a Transformer-based framework that unifies textual and structural information for inductive link prediction in text-attributed knowledge graphs. We demonstrate that, by effectively encoding ego-graphs (1-hop neighbourhoods), we can reduce the reliance on resource-intensive textual encoders. This makes the model both fast at training and inference time, as well as frugal in terms of cost. We perform a comprehensive evaluation on three popular datasets and show that FnF-TG can achieve superior performance compared to previous state-of-the-art methods. We also extend inductive learning to a fully inductive setting, where relations don’t rely on transductive (fixed) representations, as in previous work, but are a function of their textual description. Additionally, we introduce new variants of existing datasets, specifically designed to test the performance of models on unseen relations at inference time, thus offering a new test-bench for fully inductive link prediction. 2025.findings-acl.615 @@ -15170,7 +15170,7 @@ On the Role of Semantic Proto-roles in Semantic Analysis: What do <fixed-case>LLM</fixed-case>s know about agency? ElizabethSpauldingUniversity of Colorado at Boulder Shafiuddin RehanAhmed - JamesMartinUniversity of Colorado at Boulder + JamesMartinUniversity of Colorado at Boulder 12027-12048 Large language models (LLMs) are increasingly used in decision-making contexts, yet their ability to reason over event structure—an important component in the situational awareness needed to make complex decisions—is not well understood. By operationalizing proto-role theory, which characterizes agents via properties such as *instigation* and *volition* and patients via properties such as *change of state*, we examine the ability of LLMs to answer questions that require complex, multi-step event reasoning. Specifically, we investigate the extent to which LLMs capture semantic roles such as “agent” and “patient” through zero-shot prompts, and whether incorporating semantic proto-role labeling (SPRL) context improves semantic role labeling (SRL) performance in a zero-shot setting. We find that, while SPRL context sometimes degrades SRL accuracy in high-performing models (e.g., GPT-4o), it also uncovers an internal consistency between SPRL and SRL predictions that mirrors linguistic theory, and provides evidence that LLMs implicitly encode consistent multi-dimensional event role knowledge. Furthermore, our experiments support prior work showing that LLMs underperform human annotators in complex semantic analysis. 2025.findings-acl.623 @@ -15204,7 +15204,7 @@ <fixed-case>W</fixed-case>eb<fixed-case>NLG</fixed-case>-<fixed-case>IT</fixed-case>: Construction of an aligned <fixed-case>RDF</fixed-case>-<fixed-case>I</fixed-case>talian corpus through Machine Translation techniques MichaelOliverio Pier FeliceBalestrucci - AlessandroMazzeiUniversity of Turin + AlessandroMazzeiUniversity of Turin ValerioBasileUniversity of Turin 12073-12083 The main goal of this work is the creation of the Italian version of the WebNLG corpus through the application of Neural Machine Translation (NMT) and post-editing with hand-written rules. To achieve this goal, in a first step, several existing NMT models were analysed and compared in order to identify the system with the highest performance on the original corpus. In a second step, after using the best NMT system, we semi-automatically designed and applied a number of rules to refine and improve the quality of the produced resource, creating a new corpus named WebNLG-IT. We used this resource for fine-tuning several LLMs for RDF-to-text tasks. In this way, comparing the performance of LLM-based generators on both Italian and English, we have (1) evaluated the quality of WebNLG-IT with respect to the original English version, (2) released the first fine-tuned LLM-based system for generating Italian from semantic web triples and (3) introduced an Italian version of a modular generation pipeline for RDF-to-text. @@ -15304,7 +15304,7 @@ YangZhaoInstitute of automation, Chinese academy of science, Chinese Academy of Sciences YangyifanXuUniversity of the Chinese Academy of Sciences BingLiuUniversity of Illinois at Chicago - ChengqingZongInstitute of automation, Chinese academy of science, Chinese Academy of Sciences + ChengqingZongInstitute of automation, Chinese academy of science, Chinese Academy of Sciences 12217-12236 Large Language Models (LLMs) have achieved impressive results across numerous NLP tasks, and fine-tuning them for Machine Translation (MT) has improved their performance. However, vanilla fine-tuning often leads to catastrophic forgetting, compromising the broad general abilities of LLMs and introducing potential security risks. These abilities, which are developed using proprietary and unavailable training data, make simple data replay methods ineffective. To overcome this issue, we propose a novel approach called **Ra**tionale **Dis**tillation. RaDis harnesses the strong generative capabilities of LLMs to create rationales for training data, which are then “replayed” to prevent forgetting. These rationales connect prior knowledge with new tasks, acting as self-distillation targets to regulate the training process. By jointly training on reference translations and self-generated rationales, the model can learn new translation skills while preserving its general abilities across other tasks. Additionally, RaDis provides a fresh perspective on using rationales in the CL field and has the potential to serve as a general continual learning method for a variety of tasks. 2025.findings-acl.632 @@ -15313,7 +15313,7 @@ Clarifying Underspecified Discourse Relations in Instructional Texts - BerfinAktasUniversity of Technology Nuremberg + BerfinAktasUniversity of Technology Nuremberg MichaelRothUniversity of Technology Nuremberg 12237-12256 Discourse relations contribute to the structure of a text and can optionally be realized through explicit connectives such as “but” and “while”. But when are these connectives necessary to avoid possible misunderstandings? We investigate this question by first building a corpus of 4,274 text revisions in each of which a connective was explicitly inserted. For a subset of 250 cases, we collect plausibility annotations on other connectives to check whether they would represent suitable alternative relations. The results of this annotation show that several relations are often perceived as plausible in our data. Furthermore, we analyze the extent to which large language models can identify instances with multiple plausible relations as a possible source of misunderstandings. We find that the models predict plausibility of individual connectives with up to 66% accuracy, but they are not reliable in estimating when multiple relations are plausible. @@ -15359,7 +15359,7 @@ <fixed-case>SEA</fixed-case>-<fixed-case>HELM</fixed-case>: <fixed-case>S</fixed-case>outheast <fixed-case>A</fixed-case>sian Holistic Evaluation of Language Models YosephineSusantoNational University of Singapore Adithya VenkatadriHulagadriNational University of Singapore - Jann RaileyMontalanAI Singapore and Ateneo de Manila University + Jann RaileyMontalanAI Singapore and Ateneo de Manila University Jian GangNguiNational University of Singapore XianbinYongNational University of Singapore Wei QiLeongAI Singapore @@ -15379,7 +15379,7 @@ SenYang YuBaoByteDance Research ShujianHuangNanjing University - JiajunChenNanjing University + JiajunChenNanjing University ShanboChengByteDance Inc. 12337-12347 The rise of Large Language Models (LLMs) has reshaped machine translation (MT), but multilingual MT still relies heavily on parallel data for supervised fine-tuning (SFT), facing challenges like data scarcity for low-resource languages and catastrophic forgetting. To address these issues, we propose TRANS-ZERO, a self-play framework that leverages only monolingual data and the intrinsic multilingual knowledge of LLM. TRANS-ZERO combines Genetic Monte-Carlo Tree Search (G-MCTS) with preference optimization, achieving strong translation performance that rivals supervised methods. Experiments demonstrate that this approach not only matches the performance of models trained on large-scale parallel data but also excels in non-English translation directions. Further analysis reveals that G-MCTS itself significantly enhances translation quality by exploring semantically consistent candidates through iterative translations, providing a robust foundation for the framework’s success. @@ -15415,9 +15415,9 @@ Socratic Style Chain-of-Thoughts Help <fixed-case>LLM</fixed-case>s to be a Better Reasoner JiangboPeiBeijing University of Post and Telecommunication PeiyuLiuUniversity of International Business and Economics - XinZhaoRenmin University of China + XinZhaoRenmin University of China AidongMenBeijing University of Posts and Telecommunications - YangLiuUniversity of California, Santa Cruz + YangLiuUniversity of California, Santa Cruz 12384-12395 Synthetic data generation has emerged as a promising approach to enhance the reasoning capabilities of large language models. However, existing methods remain hindered by high costs—either through expensive API access or additional intermediate training—and are limited in their ability to generalize across different domains. To address these challenges, we propose a multi-agent debate framework based on the Socratic questioning strategy, abbreviated as SoDa. Distinguished from previous methods that prioritize data quantity, we highlight the wisdom of Socratic questioning in augmenting reasoning quality by deepening the thinking process to encourage exploration and broadening it to motivate self-reflection on each question. Combined with our efficient production pipeline, SoDa enables scaling while maintaining affordable costs. We use SoDa to generate diverse datasets for mathematics and code generation tasks with the Qwen2.5-7B-Instruct model, successfully fine-tuning a range of foundation models, from general-purpose ones to OpenAI o1-like ones. For mathematics, the experimental results show that SoDa outperforms the performance of existing datasets at the same scale, achieving improvements ranging from 1.3% to 13.5%. Remarkably, SoDa with 30K examples even surpasses the ScaleQuest dataset with 1000K samples, demonstrating significant efficiency. Our findings highlight the potential of SoDa as a universal, scalable, and cost-effective method for enhancing reasoning capabilities in large models across domains. 2025.findings-acl.640 @@ -15431,7 +15431,7 @@ LalehJalaliAmazon Boris N.Oreshkin MohsenBayatiStanford University, Stanford University and Stanford University - ShervinMalmasiAmazon + ShervinMalmasiAmazon 12396-12415 Large Language Models (LLMs) have shown promise in structured prediction tasks, including regression, but existing approaches primarily focus on point estimates and lack systematic comparison across different methods.We investigate probabilistic regression using LLMs for unstructured inputs, addressing challenging text-to-distribution prediction tasks such as price estimation where both nuanced text understanding and uncertainty quantification are critical.We propose a novel quantile regression approach that enables LLMs to produce full predictive distributions, improving upon traditional point estimates. Through extensive experiments across three diverse price prediction datasets, we demonstrate that a Mistral-7B model fine-tuned with quantile heads significantly outperforms traditional approaches for both point and distributional estimations, as measured by three established metrics each for prediction accuracy and distributional calibration.Our systematic comparison of LLM approaches, model architectures, training approaches, and data scaling reveals that Mistral-7B consistently outperforms encoder architectures, embedding-based methods, and few-shot learning methods.Our experiments also reveal the effectiveness of LLM-assisted label correction in achieving human-level accuracy without systematic bias. Our curated datasets are made available at https://github.com/vnik18/llm-price-quantile-reg/ to support future research. 2025.findings-acl.641 @@ -15445,7 +15445,7 @@ YichiZhangUniversity of Michigan ZiqiaoMa WenjieLiThe Hong Kong Polytechnic University, The Hong Kong Polytechnic University - JoyceChaiUniversity of Michigan + JoyceChaiUniversity of Michigan 12416-12436 Intelligent tutoring agents powered by large language models (LLMs) have been increasingly explored to deliver personalized knowledge in areas such as language learning and science education. However, their capabilities in guiding users to solve complex real-world tasks remain underexplored. To address this limitation, in this work, we focus on coding tutoring, a challenging problem that requires tutors to proactively guide students towards completing predefined coding tasks. We propose a novel agent workflow, Trace-and-Verify (TRAVER), which combines knowledge tracing to estimate a student’s knowledge state and turn-by-turn verification to ensure effective guidance toward task completion. We introduce DICT, an automatic evaluation protocol that assesses tutor agents using controlled student simulation and code generation tests. Extensive experiments reveal the challenges of coding tutoring and demonstrate that TRAVER achieves a significantly higher success rate. Although we use code tutoring as an example in this paper, our approach can be extended beyond coding, providing valuable insights into advancing tutoring agents for human task learning. 2025.findings-acl.642 @@ -15538,7 +15538,7 @@ A Tale of Evaluating Factual Consistency: Case Study on Long Document Summarization Evaluation YangZhongUniversity of Pittsburgh - DianeLitmanUniversity of Pittsburgh + DianeLitmanUniversity of Pittsburgh 12511-12532 Ensuring factual consistency in summarization remains a challenge, especially for long-document evaluation. While automated, reference-free evaluation models are essential given the impracticality of large-scale human assessment for lengthy texts, challenges persist in evaluating different systems on how to handle different summary granularities and evolving model generations. In this work, we conduct a systematic study on diverse factual-consistency evaluation systems across four long-document datasets, encompassing summaries generated by models from non-LLMs to proprietary LLMs. Our analysis reveals that fine-grained continuous scores can provide more reliable assessments of different evaluation systems’ capabilities than binary classification. We also examine the relationship between sentence-level and summary-level model performance, highlighting its dependency on dataset characteristics. Moreover, our study reveals that advanced systems can achieve higher recall in error detection for older summaries, yet struggle with false positives and fine-grained error detection. Our analysis and case studies provide further insights into designing robust factuality evaluation systems, which are becoming increasingly in demand as generative models advance rapidly. 2025.findings-acl.648 @@ -15620,7 +15620,7 @@ Ahmad DawarHakimiLudwig-Maximilians-Universität München AliModarressiCenter for Information and Language Processing, LMU Munich PhilippWickeLudwig-Maximilians-Universität München - HinrichSchuetze + HinrichSchuetze 12633-12653 Understanding how large language models (LLMs) acquire and store factual knowledge is crucial for enhancing their interpretability, reliability, and efficiency. In this work, we analyze the evolution of factual knowledge representation in the OLMo-7B model by tracking the roles of its Attention Heads and Feed Forward Networks (FFNs) over training. We classify these components into four roles—general, entity, relation-answer, and fact-answer specific—and examine their stability and transitions. Our results show that LLMs initially depend on broad, general-purpose components, which later specialize as training progresses. Once the model reliably predicts answers, some components are repurposed, suggesting an adaptive learning process. Notably, answer-specific attention heads display the highest turnover, whereas FFNs remain stable, continually refining stored knowledge. These insights offer a mechanistic view of knowledge formation in LLMs and have implications for model pruning, optimization, and transparency. 2025.findings-acl.654 @@ -15760,7 +15760,7 @@ Dual Debiasing for Noisy In-Context Learning for Text Generation SiqiLiang SumyeongAhnKENTECH - ParamveerDhillonUniversity of Michigan + ParamveerDhillonUniversity of Michigan JiayuZhouUniversity of Michigan - Ann Arbor and Michigan State University 12855-12868 In-context learning (ICL) relies heavily on high-quality demonstrations drawn from large annotated corpora. Existing approaches detect noisy annotations by ranking local perplexities, presuming that noisy samples yield higher perplexities than their clean counterparts. However, this assumption breaks down when the noise ratio is high and many demonstrations are flawed.We re-examine the perplexity-based paradigm for text generation under noisy annotations, highlighting two sources of bias in perplexity: the annotation itself and the domain-specific knowledge inherent in large language models (LLMs). To overcome these biases, we introduce a dual-debiasing framework that uses synthesized neighbors to explicitly correct perplexity estimates, yielding a robust Sample Cleanliness Score. This metric uncovers absolute sample cleanliness regardless of the overall corpus noise level.Extensive experiments demonstrate our method’s superior noise-detection capabilities and show that its final ICL performance is comparable to that of a fully clean demonstration corpus. Moreover, our approach remains robust even when noise ratios are extremely high. @@ -15983,7 +15983,7 @@ EsamGhaleb BulatKhaertdinovMaastricht University AsliOzyurekmpi for psycholinguistics - RaquelFernándezUniversity of Amsterdam and University of Amsterdam + RaquelFernándezUniversity of Amsterdam and University of Amsterdam 13191-13206 In face-to-face interaction, we use multiple modalities, including speech and gestures, to communicate information and resolve references to objects. However, how representational co-speech gestures refer to objects remains understudied from a computational perspective. In this work, we address this gap by introducing a multimodal reference resolution task centred on representational gestures, while simultaneously tackling the challenge of learning robust gesture embeddings. We propose a self-supervised pre-training approach to gesture representation learning that grounds body movements in spoken language. Our experiments show that the learned embeddings align with expert annotations and have significant predictive power. Moreover, reference resolution accuracy further improves when (1) using multimodal gesture representations, even when speech is unavailable at inference time, and (2) leveraging dialogue history. Overall, our findings highlight the complementary roles of gesture and speech in reference resolution, offering a step towards more naturalistic models of human-machine interaction. 2025.findings-acl.682 @@ -15993,7 +15993,7 @@ World Knowledge Resolves Some Aspectual Ambiguity KatarzynaPruś - MarkSteedmanUniversity of Edinburgh + MarkSteedmanUniversity of Edinburgh AdamLopezUniversity of Edinburgh 13207-13220 Annotating event descriptions with their aspectual features is often seen as a pre-requisite to temporal reasoning. However, a recent study by Pruś et al. (2024) has shown that non-experts’ annotations of the aspectual class of English verb phrases can disagree with both expert linguistic annotations and each another. They hypothesised that people use their world knowledge to tacitly conjure their own contexts, leading to disagreement between them. In this paper, we test that hypothesis by adding context to Pruś et al.’s examples and mirroring their experiment. Our results show that whilst their hypothesis explains some of the disagreement, some examples continue to yield divided responses even with the additional context. Finally, we show that outputs from GPT-4, despite to some degree capturing the aspectual class division, are not an accurate predictor of human answers. @@ -16104,12 +16104,12 @@ <fixed-case>C</fixed-case>ode<fixed-case>S</fixed-case>cientist: End-to-End Semi-Automated Scientific Discovery with Code-based Experimentation - PeterJansenUniversity of Arizona and Allen Institute for Artificial Intelligence + PeterJansenUniversity of Arizona and Allen Institute for Artificial Intelligence OyvindTafjordGoogle DeepMind MarissaRadensky PaoSiangliulueAllen Institute for Artificial Intelligence TomHopeHebrew University, Hebrew University of Jerusalem and Allen Institute for Artificial Intelligence - BhavanaDalvi MishraAllen Institute for Artificial Intelligence + BhavanaDalvi MishraAllen Institute for Artificial Intelligence Bodhisattwa PrasadMajumderAllen Institute for Artificial Intelligence Daniel SWeldDepartment of Computer Science, University of Washington PeterClarkAllen Institute for Artificial Intelligence @@ -16146,7 +16146,7 @@ When Detection Fails: The Power of Fine-Tuned Models to Generate Human-Like Social Media Text HillaryDawkinsNational Research Council Canada - Kathleen C.FraserNational Research Council Canada + Kathleen C.FraserNational Research Council Canada SvetlanaKiritchenkoNational Research Council Canada 13494-13527 Detecting AI-generated text is a difficult problem to begin with; detecting AI-generated text on social media is made even more difficult due to the short text length and informal, idiosyncratic language of the internet. It is nonetheless important to tackle this problem, as social media represents a significant attack vector in online influence campaigns, which may be bolstered through the use of mass-produced AI-generated posts supporting (or opposing) particular policies, decisions, or events. We approach this problem with the mindset and resources of a reasonably sophisticated threat actor, and create a dataset of 505,159 AI-generated social media posts from a combination of open-source, closed-source, and fine-tuned LLMs, covering 11 different controversial topics. We show that while the posts can be detected under typical research assumptions about knowledge of and access to the generating models, under the more realistic assumption that an attacker will not release their fine-tuned model to the public, detectability drops dramatically. This result is confirmed with a human study. Ablation experiments highlight the vulnerability of various detection algorithms to fine-tuned LLMs. This result has implications across all detection domains, since fine-tuning is a generally applicable and realistic LLM use case. @@ -16182,7 +16182,7 @@ MianZhang ShuoYanUniversity of Texas at Dallas PeilinWu - ZhiyuChen + ZhiyuChen 13563-13597 While large language models (LLMs) have been thoroughly evaluated for deductive and inductive reasoning, their proficiency in holistic rule learning in interactive environments remains less explored. We introduce RULEARN, a novel benchmark to assess the rule-learning abilities of LLM agents in interactive settings. In RULEARN, agents strategically interact with simulated environments to gather observations, discern patterns, and solve complex problems. To enhance the rule-learning capabilities for LLM agents, we propose IDEA, a novel reasoning framework that integrates the process of **I**nduction, **De**duction, and **A**bduction. The IDEA agent generates initial hypotheses from limited observations through abduction, devises plans to validate these hypotheses or leverages them to solve problems via deduction, and refines previous hypotheses through induction, dynamically establishing and applying rules that mimic human rule-learning behaviors. Our evaluation of the IDEA framework, which involves five representative LLMs, demonstrates significant improvements over the baseline. Furthermore, our study with human participants reveals notable discrepancies in rule-learning behaviors between humans and LLMs. We believe our benchmark will serve as a valuable and challenging resource, and IDEA will provide crucial insights for the development of LLM agents capable of human-like rule learning in real-world scenarios. Our code and data have been released at: https://github.com/KaiyuHe998/RULEARN_IDEA. 2025.findings-acl.698 @@ -16248,7 +16248,7 @@ <fixed-case>MMI</fixed-case>n<fixed-case>A</fixed-case>: Benchmarking Multihop Multimodal <fixed-case>I</fixed-case>nternet Agents ShulinTianNanyang Technological University ZiniuZhangNortheastern University - LiangyuChenComputer Science Department, Stanford University + LiangyuChenComputer Science Department, Stanford University ZiweiLiuNanyang Technological University 13682-13697 Autonomous embodied agents live on an Internet of multimedia websites. Can they hop around multimodal websites to complete complex user tasks? Existing benchmarks fail to assess them in a realistic, evolving environment for their embodiment across websites. To answer this question, we present MMInA, a multihop and multimodal benchmark to evaluate the embodied agents for compositional Internet tasks, with several appealing properties: ***1) Evolving real-world multimodal websites.*** Our benchmark uniquely operates on evolving real-world websites, ensuring a high degree of realism and applicability to natural user tasks. Our data includes 1,050 human-written tasks covering various domains such as shopping and travel, with each task requiring the agent to extract multimodal information from web pages as observations autonomously. ***2) Multihop web browsing.*** Our dataset features naturally compositional tasks that require information from or actions on multiple websites to solve, to assess long-range reasoning capabilities on web tasks. ***3) Holistic evaluation.*** We propose a novel protocol for evaluating an agent’s progress in completing multihop tasks. We experiment with both standalone (multimodal) language models and heuristic-based web agents. Extensive experiments demonstrate that while long-chain multihop web tasks are easy for humans, they remain challenging for state-of-the-art web agents. We identify that agents are more likely to fail on the early hops when solving tasks of more hops, which results in lower task success rates. To address this issue, we propose a simple memory augmentation approach replaying past action trajectories to reflect. Our method significantly improves the performance of both the single-hop and multihop web browsing abilities. @@ -16274,7 +16274,7 @@ TianyiLiAmazon ZhaoweiWangEdinburgh University, University of Edinburgh and Department of Computer Science and Engineering, Hong Kong University of Science and Technology TianyangLiuEdinburgh University, University of Edinburgh - MarkSteedmanUniversity of Edinburgh + MarkSteedmanUniversity of Edinburgh 13714-13730 LLMs are often claimed to be capable of Natural Language Inference (NLI), which is widely regarded as a cornerstone of more complex forms of reasoning. However, recent works show that LLMs still suffer from hallucinations in NLI due to attestation bias, where LLMs overly rely on propositional memory to build shortcuts. To solve the issue, we design an unsupervised framework to construct counterfactual reasoning data and fine-tune LLMs to reduce attestation bias. To measure bias reduction, we build bias-adversarial variants of NLI datasets with randomly replaced predicates in premises while keeping hypotheses unchanged. Extensive evaluations show that our framework can significantly reduce hallucinations from attestation bias. Then, we further evaluate LLMs fine-tuned with our framework on original NLI datasets and their bias-neutralized versions, where original entities are replaced with randomly sampled ones. Extensive results show that our framework consistently improves inferential performance on both original and bias-neutralized NLI datasets. 2025.findings-acl.705 @@ -16284,7 +16284,7 @@ Dynamic Steering With Episodic Memory For Large Language Models Van DaiDo - Quan HungTranFacebook + Quan HungTranFacebook SvethaVenkateshDeakin University HungLeDeakin University 13731-13749 @@ -16301,7 +16301,7 @@ LauraBiesterMiddlebury College AndrewLeeSchool of Engineering and Applied Sciences, Harvard University JamesPennebaker - RadaMihalceaUniversity of Michigan + RadaMihalceaUniversity of Michigan 13750-13770 Large Language Models (LLMs) have been previously explored for mental healthcare training and therapy client simulation, but they still fall short in authentically capturing diverse client traits and psychological conditions. We introduce Eeyore , an 8B model optimized for realistic depression simulation through a structured alignment framework, incorporating expert input at every stage.First, we systematically curate real-world depression-related conversations, extracting depressive traits to guide data filtering and psychological profile construction, and use this dataset to instruction-tune Eeyore for profile adherence. Next, to further enhance realism, Eeyore undergoes iterative preference optimization—first leveraging model-generated preferences and then calibrating with a small set of expert-annotated preferences.Throughout the entire pipeline, we actively collaborate with domain experts, developing interactive interfaces to validate trait extraction and iteratively refine structured psychological profiles for clinically meaningful role-play customization.Despite its smaller model size, the Eeyore depression simulation outperforms GPT-4o with SOTA prompting strategies, both in linguistic authenticity and profile adherence. 2025.findings-acl.707 @@ -16338,7 +16338,7 @@ TuochaoChen Nicholas ScottBatchelderUniversity of Washington AlisaLiuNVIDIA and University of Washington - Noah A.SmithUniversity of Washington and Allen Institute for Artificial Intelligence + Noah A.SmithUniversity of Washington and Allen Institute for Artificial Intelligence ShyamnathGollakotaDepartment of Computer Science, University of Washington 13801-13824 We introduce LlamaPIE, the first real-time proactive assistant designed to enhance human conversations through discreet, concise guidance delivered via hearable devices. Unlike traditional language models that require explicit user invocation, this assistant operates in the background, anticipating user needs without interrupting conversations. We address several challenges, including determining when to respond, crafting concise responses that enhance conversations, leveraging knowledge of the user for context-aware assistance, and real-time, on-device processing. To achieve this, we construct a semi-synthetic dialogue dataset and propose a two-model pipeline: a small model that decides when to respond and a larger model that generates the response. We evaluate our approach on real-world datasets, demonstrating its effectiveness in providing helpful, unobtrusive assistance. User studies with our assistant, implemented on Apple Silicon M2 hardware, show a strong preference for the proactive assistant over both a baseline with no assistance and a reactive AI assistant, highlighting the potential of LlamaPIE to enhance live conversations. @@ -16400,7 +16400,7 @@ <fixed-case>H</fixed-case>i<fixed-case>COT</fixed-case>: Improving Neural Topic Models via Optimal Transport and Contrastive Learning Hoang TranVuong TueLe - TuVuByteDance Inc. + TuVuByteDance Inc. TungNguyenHanoi University of Science and Technology Linh NgoVanHanoi University of Science and Technology SangDinhHanoi University of Science and Technology @@ -16439,8 +16439,8 @@ RuqingZhang JiafengGuoInstitute of Computing Technolgy, Chinese Academy of Sciences JianmingLvSouth China University of Technology - Maartende Rijke - XueqiChengInstitute of Computing Technology, Chinese Academy + Maartende Rijke + XueqiChengInstitute of Computing Technology, Chinese Academy 13935-13952 We explore adversarial attacks against retrieval-augmented generation (RAG) systems to identify their vulnerabilities. We focus on generating human-imperceptible adversarial examples and introduce a novel imperceptible retrieve-to-generate attack against RAG. This task aims to find imperceptible perturbations that retrieve a target document, originally excluded from the initial top-k candidate set, in order to influence the final answer generation. To address this task, we propose ReGENT, a reinforcement learning-based framework that tracks interactions between the attacker and the target RAG and continuously refines attack strategies based on relevance-generation-naturalness rewards. Experiments on newly constructed factual and non-factual question-answering benchmarks demonstrate that ReGENT significantly outperforms existing attack methods in misleading RAG systems with small imperceptible text perturbations. 2025.findings-acl.717 @@ -16466,7 +16466,7 @@ ZixiongWang GaoyangLiuHuazhong University of Science and Technology ChenWangHuazhong University of Science and Technology - WeiLiuHuazhong University of Science and Technology + WeiLiuHuazhong University of Science and Technology KaiPengHuazhong University of Science and Technology 13978-13999 Machine Unlearning (MU) has emerged as a promising solution for removing the influence of data that an owner wishes to unlearn from Large Language Models (LLMs). However, existing MU methods, which require tuning the entire model parameters on the unlearned data with random labels or perturbed gradients, significantly degrade model utility, especially given the difficulty of accessing the original training data. This presents a key challenge: how can we achieve MU using only the unlearned data while preserving model utility?In this paper, we propose NeuMuter, a simple but effective MU method that eliminates the influence of unlearned data from LLMs by modulating the outputs of merely 1% of the neurons in the feed-forward network (FFN) modules within the Transformer blocks, minimizing disruption to the model’s performance. We design a trainable masking scheme that decouples the memorization of different training data within the neurons of LLMs, allowing us to precisely identify and modify neurons associated with the unlearned data. Through comprehensive evaluations on two benchmarks across four different LLMs, we demonstrate that modifying the outputs of a few fraction of the total neurons can effectively achieve MU while preserving the model’s utility across downstream tasks. @@ -16539,7 +16539,7 @@ ChenyangYan NuoChen JianbingZhangNanjing University - JiajunChenNanjing University + JiajunChenNanjing University 14077-14094 Image captioning has been a longstanding challenge in vision-language research. With the rise of LLMs, modern Vision-Language Models (VLMs) generate detailed and comprehensive image descriptions. However, benchmarking the quality of such captions remains unresolved. This paper addresses two key questions: (1) How well do VLMs actually perform on image captioning, particularly compared to humans? We built CapArena, a platform with over 6000 pairwise caption battles and high-quality human preference votes. Our Arena-style evaluation marks a milestone, showing that leading models like GPT-4o achieve or even surpass human performance, while most open-source models lag behind. (2) Can automated metrics reliably assess caption quality? Using human annotations from CapArena, we evaluate traditional and recent captioning metrics, as well as VLM-as-a-Judge. Our analysis reveals that while some metrics (e.g., METEOR) show high caption-level agreement with humans, their systematic biases lead to inconsistencies in model ranking. In contrast, VLM-as-a-Judge demonstrates robust discernment at both the caption and model levels. Building on these insights, we release CapArena-Auto, an accurate and efficient automated benchmark for detailed captioning, achieving 93.4% correlation with human rankings at just $4 per test. All data and evaluation resources have been open-sourced. 2025.findings-acl.724 @@ -16733,7 +16733,7 @@ Quang HieuPhamQualcomm Inc, QualComm Thuy DuongNguyenVinAI Research TungPhamQualcomm Inc, QualComm - Anh TuanLuuNanyang Technological University + Anh TuanLuuNanyang Technological University Dat QuocNguyenQualcomm AI Research 14322-14329 The capabilities of large language models (LLMs) have been enhanced by training on data that reflects human thought processes, such as the Chain-of-Thought format. However, evidence suggests that the conventional scheme of next-word prediction may not fully capture how humans learn to think. Inspired by how humans generalize mathematical reasoning, we propose a new approach named ClozeMath to fine-tune LLMs for mathematical reasoning. Our ClozeMath involves a text-infilling task that predicts masked equations from a given solution, analogous to cloze exercises used in human learning. Experiments on GSM8K, MATH, and GSM-Symbolic show that ClozeMath surpasses the strong baseline Masked Thought in performance and robustness, with two test-time scaling decoding algorithms, Beam Search and Chain-of-Thought decoding. Additionally, we conduct an ablation study to analyze the effects of various architectural and implementation choices on our approach. @@ -16748,7 +16748,7 @@ FeiSunInstitute of Computing Technology, Chinese Academy of Sciences QiCaoInstitute of Computing Technology, Chinese Academy of Sciences, China HuaweiShenInstitute of Computing Technology, Chinese Academy of Sciences - XueqiChengInstitute of Computing Technology, Chinese Academy + XueqiChengInstitute of Computing Technology, Chinese Academy 14330-14344 Text watermarking, which modify tokens to embed watermark, has proven effective in detecting machine-generated texts. Yet its application to low-entropy texts like code and mathematics presents significant challenges. A fair number of tokens in these texts are hardly modifiable without changing the intended meaning, causing statistical measures to falsely indicate the absence of a watermark. Existing research addresses this issue by rely mainly on a limited number of high-entropy tokens, which are considered flexible for modification, and accurately reflecting watermarks. However, their detection accuracy remains suboptimal, as they neglect strong watermark evidences embedded in low entropy tokens modified through watermarking. To overcome this limitation, we introduce Bayes’ Rule derived Watermark Detector (BRWD), which exploit watermark information from every token, by leveraging the posterior probability of watermark’s presence. We theoretically prove the optimality of our method in terms of detection accuracy, and demonstrate its superiority across various datasets, models, and watermark injection strategies. Notably, our method achieves up to 50% and 70% relative improvements in detection accuracy over the best baselines in code generation and math problem-solving tasks, respectively. Our code is available at https://github.com/cczslp/BRWD. 2025.findings-acl.739 @@ -16862,7 +16862,7 @@ TaoGuiFudan University YunLiShanghai Jiaotong University QiZhangFudan University - XuanjingHuangFudan University + XuanjingHuangFudan University 14471-14485 Process supervision, i.e., evaluating each step, is critical for complex large language model (LLM) reasoning and test-time searching with increased inference compute. Existing approaches, represented by process reward models (PRMs), primarily focus on rewarding signals up to the current step, exhibiting a one-directional nature and lacking a mechanism to model the distance to the final target. To address this problem, we draw inspiration from the A* algorithm, which states that an effective supervisory signal should simultaneously consider the incurred cost and the estimated cost for reaching the target. Building on this key insight, we introduce BiRM, a novel process supervision model that not only evaluates the correctness of previous steps but also models the probability of future success. We conduct extensive experiments on mathematical reasoning tasks and demonstrate that BiRM provides more precise evaluations of LLM reasoning steps, achieving an improvement of 3.1% on Gaokao2023 over PRM under the Best-of-N sampling method. Besides, in search-based strategies, BiRM provides more comprehensive guidance and outperforms ORM by 5.0% and PRM by 3.8% respectively on MATH-500. 2025.findings-acl.747 @@ -16880,7 +16880,7 @@ YutaoZengByteDance Inc. XiaolongJinInstitute of Computing Technology, Chinese Academy of Sciences JiafengGuoInstitute of Computing Technolgy, Chinese Academy of Sciences - XueqiChengInstitute of Computing Technology, Chinese Academy + XueqiChengInstitute of Computing Technology, Chinese Academy 14486-14509 Empirical evidence indicates that LLMs exhibit spontaneous cross-lingual alignment. However, although LLMs show promising cross-lingual alignment in Information Extraction (IE), a significant imbalance across languages persists, highlighting an underlying deficiency. To address this, we propose KnowCoder-X, a powerful code LLM with advanced cross-lingual and multilingual capabilities for universal IE. Firstly, it standardizes the representation of multilingual schemas using Python classes, ensuring a consistent ontology across different languages. Then, IE across languages is formulated as a unified code generation task. Secondly, we conduct IE cross-lingual alignment instruction tuning on the translated instance prediction task to enhance the model’s cross-lingual transferability. During this phase, we also construct a high-quality and diverse bilingual IE parallel dataset with 257k samples, called ParallelNER, synthesized by our proposed robust three-stage pipeline, with manual annotation to ensure quality. Although without training in 29 unseen languages, KnowCoder-X surpasses ChatGPT by 30.17% and SoTA by 20.03%, thereby demonstrating superior cross-lingual IE capabilities. Comprehensive evaluations on 64 IE benchmarks in Chinese and English under various settings demonstrate that KnowCoder-X significantly enhances cross-lingual IE transfer through boosting the IE alignment. Our code and dataset are available at: https://github.com/ICT-GoKnow/KnowCoder. 2025.findings-acl.748 @@ -16897,7 +16897,7 @@ JingXiongUniversity of Hong Kong RossellaArcucciImperial College London HuaxiuYaoDepartment of Computer Science, University of North Carolina at Chapel Hill - MiZhangThe Ohio State University + MiZhangThe Ohio State University 14510-14527 Electrocardiogram (ECG) is the primary non-invasive diagnostic tool for monitoring cardiac conditions and is crucial in assisting clinicians. Recent studies have concentrated on classifying cardiac conditions using ECG data but have overlooked ECG report generation, which is time-consuming and requires clinical expertise. To automate ECG report generation and ensure its versatility, we propose the Multimodal ECG Instruction Tuning (MEIT) framework, the first attempt to tackle ECG report generation with LLMs and multimodal instructions. To facilitate future research, we establish a benchmark to evaluate MEIT with various LLMs backbones across two large-scale ECG datasets. Our approach uniquely aligns the representations of the ECG signal and the report, and we conduct extensive experiments to benchmark MEIT with nine open-source LLMs using more than 800,000 ECG reports. MEIT’s results underscore the superior performance of instruction-tuned LLMs, showcasing their proficiency in quality report generation, zero-shot capabilities, resilience to signal perturbation, and alignment with human expert evaluation. These findings emphasize the efficacy of our MEIT framework and its potential for real-world clinical application. 2025.findings-acl.749 @@ -16957,7 +16957,7 @@ ChangZhou WenXiaoMicrosoft TianyuLiu - BaobaoChangPeking University + BaobaoChangPeking University 14588-14604 In recent progress, mathematical verifiers have achieved success in mathematical reasoning tasks by validating the correctness of solutions generated by policy models. However, existing verifiers are trained with binary classification labels, which are not informative enough for the model to accurately assess the solutions. To mitigate the aforementioned insufficiency of binary labels, we introduce step-wise natural language feedback as rationale labels, that is, the correctness of each step and the detailed explanations. In this paper, we propose Math-Minos, a natural language feedback-enhanced verifier by constructing automatically generated training data and a two-stage training paradigm for effective training and efficient inference. Our experiments reveal that a small set of natural language feedback can significantly boost the performance of the verifier in both verification and reinforcement learning and also significantly alleviates the data-demanding problems of the reward model with an over 700% data efficiency improvement. 2025.findings-acl.753 @@ -17194,7 +17194,7 @@ YuangLiHuawei Technologies Ltd. XiaofengZhaoHuawei Technologies Ltd. MingZhu - JunhuiLiSoochow University, China + JunhuiLiSoochow University, China YunfeiLuHuawei Technologies Ltd. MinZhangHuawei Technologies Ltd. DaimengWei @@ -17280,7 +17280,7 @@ ChangzeLv XiaohuaWang XiaoqingZhengFudan University - XuanjingHuangFudan University + XuanjingHuangFudan University 15014-15032 Continual pre-training has demonstrated significant potential in enhancing model performance, particularly in domain-specific scenarios. The most common approach for packing data before continual pre-training involves concatenating input texts and splitting them into fixed-length sequences. While straightforward and efficient, this method often leads to excessive truncation and context discontinuity, which can hinder model performance. To address these issues, we explore the potential of data engineering to enhance continual pre-training, particularly its impact on model performance and efficiency. We propose Seamless Packing (SP), a novel data packing strategy aimed at preserving contextual information and enhancing model performance. Our approach employs a sliding window technique in the first stage that synchronizes overlapping tokens across consecutive sequences, ensuring better continuity and contextual coherence. In the second stage, we adopt a First-Fit-Decreasing algorithm to pack shorter texts into bins slightly larger than the target sequence length, thereby minimizing padding and truncation. Empirical evaluations across various model architectures and corpus domains demonstrate the effectiveness of our method, outperforming baselines in 99% of all settings. Code is available at https://github.com/Infernus-WIND/Seamless-Packing. 2025.findings-acl.777 @@ -17392,7 +17392,7 @@ Position Paper: <fixed-case>M</fixed-case>e<fixed-case>M</fixed-case>o: Towards Language Models with Associative Memory Mechanisms - Fabio MassimoZanzottoUniversity of Rome Tor Vergata + Fabio MassimoZanzottoUniversity of Rome Tor Vergata Elena SofiaRuzzettiUniversità degli Studi di Roma Tor Vergata Giancarlo A.XomperoUniversity of Rome Tor Vergata and Almawave SpA LeonardoRanaldi @@ -17438,7 +17438,7 @@ PeinanZhangCyberAgent AI Lab HidetakaKamigaitoNara Institute of Science and Technology HiroyaTakamuraAIST, National Institute of Advanced Industrial Science and Technology - ManabuOkumuraInstitute of Science Tokyo and Tokyo Institute of Technology, Tokyo Institute of Technology + ManabuOkumuraInstitute of Science Tokyo and Tokyo Institute of Technology, Tokyo Institute of Technology 15212-15230 Identifying factors that make ad text attractive is essential for advertising success. This study proposes AdParaphrase v2.0, a dataset for ad text paraphrasing, containing human preference data, to enable the analysis of the linguistic factors and to support the development of methods for generating attractive ad texts. Compared with v1.0, this dataset is 20 times larger, comprising 16,460 ad text paraphrase pairs, each annotated with preference data from ten evaluators, thereby enabling a more comprehensive and reliable analysis. Through the experiments, we identified multiple linguistic features of engaging ad texts that were not observed in v1.0 and explored various methods for generating attractive ad texts. Furthermore, our analysis demonstrated the relationships between human preference and ad performance, and highlighted the potential of reference-free metrics based on large language models for evaluating ad text attractiveness.The dataset is publicly available at: https://github.com/CyberAgentAILab/AdParaphrase-v2.0. 2025.findings-acl.788 @@ -17500,7 +17500,7 @@ “<fixed-case>I</fixed-case> understand your perspective”: <fixed-case>LLM</fixed-case> Persuasion through the Lens of Communicative Action Theory EsraDönmezUniversität Stuttgart - AgnieszkaFalenskaInterchange Forum for Reflecting on Intelligent Systems, University of Stuttgart + AgnieszkaFalenskaInterchange Forum for Reflecting on Intelligent Systems, University of Stuttgart 15312-15327 Large Language Models (LLMs) can generate high-quality arguments, yet their ability to engage in *nuanced and persuasive communicative actions* remains largely unexplored. This work explores the persuasive potential of LLMs through the framework of Jürgen Habermas’ Theory of Communicative Action. It examines whether LLMs express illocutionary intent (i.e., pragmatic functions of language such as conveying knowledge, building trust, or signaling similarity) in ways that are comparable to human communication.We simulate online discussions between opinion holders and LLMs using conversations from the persuasive subreddit *ChangeMyView*. We then compare the likelihood of illocutionary intents in human-written and LLM-generated counter-arguments, specifically those that successfully changed the original poster’s view. We find that all three LLMs effectively convey illocutionary intent — often more so than humans — potentially increasing their anthropomorphism. Further, LLMs craft responses that closely align with the opinion holder’s intent, a strategy strongly associated with opinion change. Finally, crowd-sourced workers find LLM-generated counter-arguments more *agreeable* and consistently prefer them over human-written ones. These findings suggest that LLMs’ persuasive power extends beyond merely generating high-quality arguments. On the contrary, training LLMs with human preferences effectively tunes them to mirror human communication patterns, particularly nuanced communicative actions, potentially increasing individuals’ susceptibility to their influence. 2025.findings-acl.793 @@ -17595,7 +17595,7 @@ <fixed-case>PISCO</fixed-case>: Pretty Simple Compression for Retrieval-Augmented Generation MaximeLouisNaver Labs Europe - HervéDéjeanNaver Labs Europe + HervéDéjeanNaver Labs Europe StéphaneClinchantNaver Labs Europe 15506-15521 Retrieval-Augmented Generation (RAG) pipelines enhance Large Language Models (LLMs) by retrieving relevant documents, but they face scalability issues due to high inference costs and limited context size. Document compression is a practical solution, but current soft compression methods often suffer from accuracy losses and require extensive pretraining. In this paper, we introduce PISCO, a novel method that achieves a 16x compression rate with minimal accuracy loss (0-3%) across diverse RAG-based question-answering (QA) tasks. Unlike existing approaches, PISCO requires no pretraining or annotated data, relying solely on sequence-level knowledge distillation from document-based questions. With the ability to fine-tune a 7-10B LLM in 24 hours on a single A100 GPU, PISCO offers a highly efficient and scalable solution. We present comprehensive experiments showing that PISCO outperforms existing compression models by 8% in accuracy. @@ -17902,7 +17902,7 @@ Token-level Preference Self-Alignment Optimization for Multi-style Outline Controllable Generation - ZihaoLi + ZihaoLi XuekongXu ZiyaoChen LixinZouWuhan University @@ -17929,7 +17929,7 @@ SarthakRoyIndian Institute of Technology, Kharagpur MartinSemmannUniversität Hamburg AlexanderPanchenkoSkoltech - ChrisBiemannU Hamburg + ChrisBiemannU Hamburg AnimeshMukherjeeIndian Institute of Technology Kharagpur 16008-16022 Despite regulations imposed by nations and social media platforms, e.g. (Government of India, 2021; European Parliament and Council of the European Union, 2022), inter alia, hateful content persists as a significant challenge. Existing approaches primarily rely on reactive measures such as blocking or suspending offensive messages, with emerging strategies focusing on proactive measurements like detoxification and counterspeech. In our work, which we call HATEPRISM, we conduct a comprehensive examination of hate speech regulations and strategies from three perspectives: country regulations, social platform policies, and NLP research datasets. Our findings reveal significant inconsistencies in hate speech definitions and moderation practices across jurisdictions and platforms, alongside a lack of alignment with research efforts. Based on these insights, we suggest ideas and research direction for further exploration of a unified framework for automated hate speech moderation incorporating diverse strategies. @@ -17970,7 +17970,7 @@ ConstanzaFierroCopenhagen University NegarForoutanSchool of Computer and Communication Sciences, EPFL - EPF Lausanne DesmondElliottCopenhagen University and University of Copenhagen - AndersSøgaardCopenhagen University + AndersSøgaardCopenhagen University 16052-16106 Large Language Models (LLMs) store and retrieve vast amounts of factual knowledge acquired during pre-training. Prior research has localized and identified mechanisms behind knowledge recall; however, it has only focused on English monolingual models. The question of how these mechanisms generalize to non-English languages and multilingual LLMs remains unexplored. In this paper, we address this gap by conducting a comprehensive analysis of three multilingual LLMs. First, we show that previously identified recall mechanisms in English largely apply to multilingual contexts, with nuances based on language and architecture. Next, through patching intermediate representations, we localize the role of language during recall, finding that subject enrichment is language-independent, while object extraction is language-dependent. Additionally, we discover that the last token representation acts as a Function Vector (FV), encoding both the language of the query and the content to be extracted from the subject. Furthermore, in decoder-only LLMs, FVs compose these two pieces of information in two separate stages. These insights reveal unique mechanisms in multilingual LLMs for recalling information, highlighting the need for new methodologies—such as knowledge evaluation, fact editing, and knowledge acquisition—that are specifically tailored for multilingual LLMs. 2025.findings-acl.827 @@ -18026,7 +18026,7 @@ KaiLu ZeyuXiongNational University of Defense Technology XinwangLiuNational University of Defense Technology - DongshengLiNational University of Defense Technology + DongshengLiNational University of Defense Technology 16159-16179 Using large language models (LLMs) has a potential risk of privacy leakage since the data with sensitive information may be used for fine-tuning the LLMs. Differential privacy (DP) provides theoretical guarantees of privacy protection, but its practical application in LLMs still has the problem of privacy-utility trade-off. Researchers synthesized data with strong generation capabilities closed-source LLMs (i.e., GPT-4) under DP to alleviate this problem, but this method is not so flexible in fitting the given privacy distributions without fine-tuning. Besides, such methods can hardly balance the diversity of synthetic data and its relevance to target privacy data without accessing so much private data. To this end, this paper proposes DPGA-TextSyn, combining general LLMs with genetic algorithm (GA) to produce relevant and diverse synthetic text under DP constraints. First, we integrate the privacy gene (i.e., metadata) to generate better initial samples. Then, to achieve survival of the fittest and avoid homogeneity, we use privacy nearest neighbor voting and similarity suppression to select elite samples. In addition, we expand elite samples via genetic strategies such as mutation, crossover, and generation to expand the search scope of GA. Experiments show that this method significantly improves the performance of the model in downstream tasks while ensuring privacy. 2025.findings-acl.831 @@ -18038,7 +18038,7 @@ SeungyoonLeeKorea University SeongtaeHongKorea University HyeonseokMoonKorea University - HeuiseokLim + HeuiseokLim 16180-16193 Large Language Models (LLMs) are increasingly incorporating multilingual capabilities, fueling the demand to transfer them into target language-specific models. However, most approaches, which blend the source model’s embedding by replacing the source vocabulary with the target language-specific vocabulary, may constrain expressive capacity in the target language since the source model is predominantly trained on English data. In this paper, we propose Semantic Aware Linear Transfer (SALT), a novel cross-lingual transfer technique that recycles embeddings from target language Pre-trained Language Models (PLMs) to transmit the deep representational strengths of PLM-derived embedding to LLMs. SALT derives unique regression lines based on the similarity in the overlap of the source and target vocabularies to handle each non-overlapping token’s embedding space. Our extensive experiments show that SALT significantly outperforms other transfer methods, achieving lower loss and faster convergence during language adaptation. Notably, SALT achieves remarkable performance in cross-lingual understanding setups compared to other methods. Furthermore, we highlight the scalable use of PLMs to enhance the functionality of contemporary LLMs by conducting experiments with varying architectures. 2025.findings-acl.832 @@ -18056,7 +18056,7 @@ RuimingTang YongYuShanghai Jiaotong University JunWangUniversity College London - WeinanZhang + WeinanZhang 16194-16204 To address these limitations, we propose BDC, a novel framework that Boosts reasoning exploration via multi-agent collaboration, Disentangles heterogeneous data into specialized experts, and Customizes solutions through dynamic model composition. BDC integrates a Monte Carlo Tree-of-Agents algorithm, where multiple LLMs mutually verify and refine reasoning paths through reflection-guided pruning, enabling efficient exploration of high-quality solutions. To handle data diversity, we cluster problems by latent semantics, train composable LoRA experts on each cluster, and deploy an input-aware hypernetwork to dynamically merge these experts into tailored solvers. Experiments on APPS and CodeContest benchmarks demonstrate BDC’s superiority: it achieves up to 73.8% accuracy on hard problems, outperforming state-of-the-art methods like LATS and RethinkMCTS by 9–15%. This work lays the groundwork for advancing LLM capabilities in complex reasoning tasks, offering a novel System2-to-System1 solution. 2025.findings-acl.833 @@ -18114,7 +18114,7 @@ AndreaSensi ElisaPassone DaniloCroce - RobertoBasiliUniversity of Roma, Tor Vergata + RobertoBasiliUniversity of Roma, Tor Vergata 16266-16284 Grounded natural language understanding in Human-Robot Interaction (HRI) requires integrating linguistic, visual, and world knowledge to ensure effective task execution. We propose an approach that enhances Multi-Modal Large Language Models (MLLMs) with a novel explicit dialogue planning phase, allowing robotic agents to systematically refine their understanding of ambiguous commands through structured clarification steps. This reduces hallucinations and improves task feasibility.To evaluate this approach, we introduce a novel dataset of over 1,100 annotated dialogues in English and Italian, designed for fine-tuning and assessing Multi-Modal models in HRI scenarios. Experimental results show that dialogue planning improves response accuracy and quality, and contributes to cross-lingual generalisation, enabling models trained in one language to transfer effectively to another. To the best of our knowledge, this is the first application of structured, goal-driven, and explicit dialogue planning in Multi-Modal LLMs for grounded interaction. 2025.findings-acl.837 @@ -18125,7 +18125,7 @@ <fixed-case>MVL</fixed-case>-<fixed-case>SIB</fixed-case>: A Massively Multilingual Vision-Language Benchmark for Cross-Modal Topical Matching Fabian DavidSchmidt FlorianSchneider - ChrisBiemannU Hamburg + ChrisBiemannU Hamburg GoranGlavašJulius-Maximilians-Universität Würzburg 16285-16312 Existing multilingual vision-language (VL) benchmarks often only cover a handful of languages. Consequently, evaluations of large vision-language models (LVLMs) predominantly target high-resource languages, underscoring the need for evaluation data for low-resource languages. To address this limitation, we introduce MVL-SIB, a massively multilingual vision-language benchmark that evaluates both cross-modal and text-only topical matching across 205 languages – over 100 more than the most multilingual existing VL benchmarks encompass. We then benchmark a range of of open-weight LVLMs together with GPT-4o(-mini) on MVL-SIB. Our results reveal that LVLMs struggle in cross-modal topic matching in lower-resource languages, performing no better than chance on languages like N’Koo. Our analysis further reveals that VL support in LVLMs declines disproportionately relative to textual support for lower-resource languages, as evidenced by comparison of cross-modal and text-only topical matching performance. We further observe that open-weight LVLMs do not benefit from representing a topic with more than one image, suggesting that these models are not yet fully effective at handling multi-image tasks. By correlating performance on MVL-SIB with other multilingual VL benchmarks, we highlight that MVL-SIB serves as a comprehensive probe of multilingual VL understanding in LVLMs. @@ -18138,7 +18138,7 @@ YihongTang KehaiChenHarbin Institute of Technology (Shenzhen) XuefengBai - Zheng-YuNiu + Zheng-YuNiu BoWang JieLiuHarbin Institute of Technology MinZhangHarbin Institute of Technology, Shenzhen @@ -18240,7 +18240,7 @@ HuiShen HaozheWangINF KangyuZhengRensselaer Polytechnic Institute - MiZhangThe Ohio State University + MiZhangThe Ohio State University RossellaArcucciImperial College London 16448-16460 Automatic radiology report generation holds significant potential to streamline the labor-intensive process of report writing by radiologists, particularly for 3D radiographs such as CT scans. While CT scans are critical for clinical diagnostics, they remain less explored compared to 2D radiographs. To date, there has been no comprehensive benchmark for 3D radiograph report generation (3DRRG), nor sufficient investigation into the optimal training strategies for Vision Language Models (VLMs) in this context, particularly with respect to vision encoder choices, visual token compression, and model scaling.In this work, we make two three contributions. We curate CT-3DRRG, the largest publicly available 3D CT-report dataset, establishing a robust and diverse benchmark for evaluating VLM performance on 3DRRG. Furthermore, we propose a comprehensive training recipe for building high-performing VLMs for 3DRRG, exploring key factors such as vision encoder pretraining strategies, visual token compression, and the impact of data & model scale. Guided by these findings, we introduce Argus, a state-of-the-art family of VLMs that achieve superior performance across different model sizes and input 3D medical image resolutions, efficiently processing high-resolution 3D images up to 512 × 512 × 256. @@ -18285,7 +18285,7 @@ PasqualeMinerviniUniversity of Edinburgh, University of Edinburgh PontusStenetorpUniversity College London Benjamin I. P.RubinsteinThe University of Melbourne and The University of Melbourne - TrevorCohnGoogle and The University of Melbourne + TrevorCohnGoogle and The University of Melbourne 16504-16544 The implications of backdoor attacks on English-centric large language models (LLMs) have been widely examined — such attacks can be achieved by embedding malicious behaviors during training and activated under specific conditions that trigger malicious outputs. Despite the increasing support for multilingual capabilities in open-source and proprietary LLMs, the impact of backdoor attacks on these systems remains largely under-explored. Our research focuses on cross-lingual backdoor attacks against multilingual LLMs, particularly investigating how poisoning the instructiontuning data for one or two languages can affect the outputs for languages whose instructiontuning data were not poisoned. Despite its simplicity, our empirical analysis reveals that our method exhibits remarkable efficacy in models like BLOOM and GPT-4o, with high attack success rates, surpassing 90% in more than 7 out of 12 languages across various scenarios. Our findings also indicate that more powerful models show increased susceptibility to transferable cross-lingual backdoor attacks, which also applies to LLMs predominantly pre-trained on English/Chinese data, such as Llama2, Llama3, Qwen2.5, and Gemma. Moreover, our experiments demonstrate 1) High Transferability: the backdoor mechanism operates successfully in cross lingual response scenarios across 26 languages, achieving an average attack success rate of 99%, and 2) Robustness: the proposed attack remains effective even after defenses are applied. These findings expose critical security vulnerabilities in multilingual LLMs and highlight the urgent need for more robust, targeted defense strategies to address the unique challenges posed by cross-lingual backdoor transfer. 2025.findings-acl.848 @@ -18439,7 +18439,7 @@ <fixed-case>CHARPEVAL</fixed-case>: Benchmarking Large Language Models’ Contextual Reasoning in Knowledge-Grounded Dialogue AbbasGhaddarHuawei Technologies Ltd. DavidAlfonso-HermeloHuawei Technologies Ltd. - PhilippeLanglaisUniversité de Montréal + PhilippeLanglaisUniversité de Montréal BoxingChenHuawei Technologies Ltd. PrasannaParthasarathiHuawei Technologies Ltd. 16764-16775 @@ -18467,7 +18467,7 @@ <fixed-case>D</fixed-case>ebate4<fixed-case>MATH</fixed-case>: Multi-Agent Debate for Fine-Grained Reasoning in Math ShaoweiZhang - DeyiXiongTianjin University + DeyiXiongTianjin University 16810-16824 Large language models (LLMs) have demonstrated impressive performance in reasoning. However, existing data annotation methods usually suffer from high annotation cost and the lack of effective automatic validation. To address these issues, we propose a Fine-grained Multi-Agent Debate framework (FMAD) and MMATH-Data, a dataset created by FMAD, which consists of 46K reasoning steps. By prompting multiple agents to debate, FMAD assesses the contribution of each reasoning step to the final solution, with labels based on the judge’s confidence score and the winner’s position. To facilitate reasoning in math and examine FMAD and MMATH-Data, we further propose two key components: a Multi-Agent Debate Reward Model (MRM) trained on MMATH-Data, which serves as a reward model to provide robust feedback during the optimization process, and MMATH-LLM, a model designed specifically for mathematical reasoning. MMATH-LLM is fine-tuned using reinforcement learning with supervised feedback from MRM, aiming at improving its mathematical reasoning capabilities. Extensive experiments demonstrate that our model achieves 83.4% accuracy on the GSM8K dataset and 45.1% on the MATH dataset, outperforming the state-of-the-art methods by 1.2% and 3.5%, respectively. All data and code will be available soon at GitHub. 2025.findings-acl.862 @@ -18531,9 +18531,9 @@ KehaiChenHarbin Institute of Technology (Shenzhen) YangXiang XuefengBai - MuyunYang + MuyunYang YangFengInstitute of Computing Technology, Chinese Academy of Sciences - TiejunZhaoHarbin Institute of Technology + TiejunZhaoHarbin Institute of Technology MinZhangHarbin Institute of Technology 16886-16902 The remarkable understanding and generation capabilities of large language models (LLMs) have greatly improved translation performance. However, incorrect understanding of the sentence to be translated can degrade translation quality. To address this issue, we proposed a novel Iterative Bilingual Understanding Translation (IBUT) method based on the cross-lingual capabilities of LLMs and the dual characteristics of translation tasks. The cross-lingual capability of LLMs enables the generation of contextual understanding for both the source and target languages separately. Furthermore, the dual characteristics allow IBUT to generate effective cross-lingual feedback, iteratively refining contextual understanding, thereby reducing errors and improving translation performance. Experimental results showed that the proposed IBUT outperforms several strong comparison methods, especially being generalized to multiple domains (e.g., news, commonsense, and cultural translation benchmarks). @@ -18589,11 +18589,11 @@ <fixed-case>R</fixed-case>eflect<fixed-case>E</fixed-case>vo: Improving Meta Introspection of Small <fixed-case>LLM</fixed-case>s by Learning Self-Reflection JiaqiLiBeijing Institute for General Artificial Intelligence XinyiDong - YangLiuBeijing Institute for General Artificial Intelligence + YangLiuBeijing Institute for General Artificial Intelligence ZhizhuoYang QuansenWangBeijing Institute of General Artificial Intelligence XiaoboWangBeijing Institute for General Artificial Intelligence and University of Science and Technology of China - Song-ChunZhu + Song-ChunZhu ZixiaJia ZilongZhengBeijing Institute for General Artificial Intelligence 16948-16966 @@ -18607,7 +18607,7 @@ ShuoYang CarenHanUniversity of Melbourne, University of Western Australia and University of Sydney SiwenLuoUniversity of Western Australia - EduardHovyUniversity of Melbourne and Carnegie Mellon University + EduardHovyUniversity of Melbourne and Carnegie Mellon University 16967-16986 Visual Question Answering (VQA) necessitates models to reason effectively across visual and textual modalities. However, existing Large Vision-Language Models (LVLMs) often fall short in achieving human-like reasoning due to a lack of integrated commonsense knowledge, limiting their robustness and accuracy in real-world scenarios where both explicit facts and implicit understanding are crucial. To address this challenge, we present MAGIC-VQA: Multimodal And Grounded Inference with Commonsense Knowledge, a novel framework designed to enhance multimodal inference by integrating commonsense reasoning. MAGIC-VQA introduces a three-stage process: (1) Explicit Commonsense Knowledge Retrieval from external knowledge graphs, (2) By-Type Commonsense Knowledge Post-Processing to refine contextual relevance, and (3) Implicit Commonsense Knowledge Augmentation using a heterogeneous graph processed by a Graph Neural Network (GNN). These stages collectively enable nuanced, context-aware reasoning without extensive pre-training or intricate prompt tuning.Our MAGIC-VQA significantly improves comprehensive benchmark datasets, surpassing existing models in tasks requiring advanced commonsense reasoning. MAGIC-VQA establishes a robust pathway for integrating commonsense knowledge into VQA, bridging the gap between vision-language inputs and high-level reasoning for improved reliability and contextual accuracy. 2025.findings-acl.872 @@ -18642,7 +18642,7 @@ Leonardo F. R.RibeiroAmazon RexhinaBlloshmiAmazon ChristopherDavisAmazon - Adriàde GispertAmazon + Adriàde GispertAmazon 17030-17049 We present GaRAGe, a large RAG benchmark with human-curated long-form answers and annotations of each grounding passage, allowing a fine-grained evaluation of whether LLMs can identify relevant grounding when generating RAG answers. Our benchmark contains 2366 questions of diverse complexity, dynamism, and topics, and includes over 35K annotated passages retrieved from both private document sets and the Web, to reflect real-world RAG use cases. This makes it an ideal test bed to evaluate an LLM’s ability to identify only the relevant information necessary to compose a response, or provide a deflective response when there is insufficient information. Evaluations of multiple state-of-the-art LLMs on GaRAGe show that the models tend to over-summarise rather than (a) ground their answers strictly on the annotated relevant passages (reaching at most a Relevance-Aware Factuality Score of 60%), or (b) deflect when no relevant grounding is available (reaching at most 31% true positive rate in deflections). The F_{1} in attribution to relevant sources is at most 58.9%, and we show that performance is particularly reduced when answering time-sensitive questions and when having to draw knowledge from sparser private grounding sources. 2025.findings-acl.875 @@ -18688,8 +18688,8 @@ HongyinLuoMassachusetts Institute of Technology Abdalla Mohamed Salama SayedMoustafaThe Chinese University of Hong Kong XixinWuThe Chinese University of Hong Kong - James R.GlassMassachusetts Institute of Technology - Helen M.MengThe Chinese University of Hong Kong + James R.GlassMassachusetts Institute of Technology + Helen M.MengThe Chinese University of Hong Kong 17091-17105 Improving context faithfulness in large language models is essential for developing trustworthy retrieval augmented generation systems and mitigating hallucinations, especially in long-form question answering (LFQA) tasks or scenarios involving knowledge conflicts. Existing methods either intervene LLMs only at inference without addressing their inherent limitations or overlook the potential for self-improvement. In this paper, we introduce GenDiE(Generate, Discriminate, Evolve), a novel self-evolving framework that enhances context faithfulness through fine-grained sentence-level optimization. GenDiE combines both generative and discriminative training, equipping LLMs with self-generation and self-scoring capabilities to facilitate iterative self-evolution. This supports both data construction for model alignment and score-guided search during inference. Furthermore, by treating each sentence in a response as an independent optimization unit, GenDiE effectively addresses the limitations of previous approaches that optimize at the holistic answer level, which may miss unfaithful details. Experiments on ASQA (in-domain LFQA) and ConFiQA (out-of-domain counterfactual QA) datasets demonstrate that GenDiE surpasses various baselines in both faithfulness and correctness, and exhibits robust performance for domain adaptation. 2025.findings-acl.878 @@ -18739,7 +18739,7 @@ In the <fixed-case>LLM</fixed-case> era, Word Sense Induction remains unsolved AnnaMosolova - MarieCanditoUniversité Paris Cité + MarieCanditoUniversité Paris Cité CarlosRamischLIS - Laboratoire d’Informatique et Systèmes and AMU - Aix Marseille University 17161-17178 In the absence of sense-annotated data, word sense induction (WSI) is a compelling alternative to word sense disambiguation, particularly in low-resource or domain-specific settings. In this paper, we emphasize methodological problems in current WSI evaluation. We propose an evaluation on a SemCor-derived dataset, respecting the original corpus polysemy and frequency distributions. We assess pre-trained embeddings and clustering algorithms across parts of speech, and propose and evaluate an LLM-based WSI method for English. We evaluate data augmentation sources (LLM-generated, corpus and lexicon), and semi-supervised scenarios using Wiktionary for data augmentation, must-link constraints, number of clusters per lemma.We find that no unsupervised method (whether ours or previous) surpasses the strong “one cluster per lemma” heuristic (1cpl). We also show that (i) results and best systems may vary across POS, (ii) LLMs have troubles performing this task, (iii) data augmentation is beneficial and (iv) capitalizing on Wiktionary does help. It surpasses previous SOTA system on our test set by 3.3%. WSI is not solved, and calls for a better articulation of lexicons and LLMs’ lexical semantics capabilities. @@ -18776,7 +18776,7 @@ SiboYi TianshuoCong XinleiHeThe Hong Kong University of Science and Technology - QiLiTsinghua University + QiLiTsinghua University JiaxingSong 17221-17234 Small language models (SLMs) have become increasingly prominent in the deployment on edge devices due to their high efficiency and low computational cost. While researchers continue to advance the capabilities of SLMs through innovative training strategies and model compression techniques, the security risks of SLMs have received considerably less attention compared to large language models (LLMs). To fill this gap, we provide a comprehensive empirical study to evaluate the security performance of 13 state-of-the-art SLMs under various jailbreak attacks. Our experiments demonstrate that most SLMs are quite susceptible to existing jailbreak attacks, while some of them are even vulnerable to direct harmful prompts. To address the safety concerns, we evaluate several representative defense methods and demonstrate their effectiveness in enhancing the security of SLMs. We further analyze the potential security degradation caused by different SLM techniques including architecture compression, quantization, knowledge distillation, and so on. We expect that our research can highlight the security challenges of SLMs and provide valuable insights to future work in developing more robust and secure SLMs. @@ -18799,7 +18799,7 @@ A Law Reasoning Benchmark for <fixed-case>LLM</fixed-case> with Tree-Organized Structures including Factum Probandum, Evidence and Experiences JiaxinShen - JinanXuBeijing Jiaotong University + JinanXuBeijing Jiaotong University HuiqiHu LuyiLin GuoyangMa @@ -18829,7 +18829,7 @@ Stereotype Detection as a Catalyst for Enhanced Bias Detection: A Multi-Task Learning Approach AdityaTomar RudraMurthyIBM India Pvt Ltd - PushpakBhattacharyyaIndian Institute of Technology, Bombay, Dhirubhai Ambani Institute Of Information and Communication Technology + PushpakBhattacharyyaIndian Institute of Technology, Bombay, Dhirubhai Ambani Institute Of Information and Communication Technology 17304-17317 Bias and stereotypes in language models can cause harm, especially in sensitive areas like content moderation and decision-making. This paper addresses bias and stereotype detection by exploring how jointly learning these tasks enhances model performance. We introduce StereoBias, a unique dataset labeled for bias and stereotype detection across five categories: religion, gender, socio-economic status, race, profession, and others, enabling a deeper study of their relationship. Our experiments compare encoder-only models and fine-tuned decoder-only models using QLoRA. While encoder-only models perform well, decoder-only models also show competitive results. Crucially, joint training on bias and stereotype detection significantly improves bias detection compared to training them separately. Additional experiments with sentiment analysis confirm that the improvements stem from the connection between bias and stereotypes, not multi-task learning alone. These findings highlight the value of leveraging stereotype information to build fairer and more effective AI systems. 2025.findings-acl.889 @@ -18865,7 +18865,7 @@ Are Dialects Better Prompters? A Case Study on <fixed-case>A</fixed-case>rabic Subjective Text Classification LeilaMoudjariIRIT - FarahBenamaraInstitut de recherche en informatique de toulouse + FarahBenamaraInstitut de recherche en informatique de toulouse 17356-17371 This paper investigates the effect of dialectal prompting, variations in prompting scrip t and model fine-tuning on subjective classification in Arabic dialects. To this end, we evaluate the performances of 12 widely used open LLMs across four tasks and eight benchmark datasets. Our results reveal that specialized fine-tuned models with Arabic and Arabizi scripts dialectal prompts achieve the best results, which constitutes a novel state of the art in the field. 2025.findings-acl.892 @@ -18944,7 +18944,7 @@ Metaphor and Large Language Models: When Surface Features Matter More than Deep Understanding ElisaSanchez-BayonaUniversidad del País Vasco - RodrigoAgerriUniversity of the Basque Country + RodrigoAgerriUniversity of the Basque Country 17462-17477 This paper presents a comprehensive evaluation of the capabilities of Large Language Models (LLMs) in metaphor interpretation across multiple datasets, tasks, and prompt configurations. Although metaphor processing has gained significant attention in Natural Language Processing (NLP), previous research has been limited to single-dataset evaluations and specific task settings, often using artificially constructed data through lexical replacement. We address these limitations by conducting extensive experiments using diverse publicly available datasets with inference and metaphor annotations, focusing on Natural Language Inference (NLI) and Question Answering (QA) tasks. The results indicate that LLMs’ performance is more influenced by features like lexical overlap and sentence length than by metaphorical content, demonstrating that any alleged emergent abilities of LLMs to understand metaphorical language are the result of a combination of surface-level features, in-context learning, and linguistic knowledge. This work provides critical insights into the current capabilities and limitations of LLMs in processing figurative language, highlighting the need for more realistic evaluation frameworks in metaphor interpretation tasks. Data and code publicly available: https://github.com/elisanchez-beep/metaphorLLM 2025.findings-acl.898 @@ -19021,7 +19021,7 @@ QinglinZhangAlibaba Group JiaqingLiuAlibaba Group QianChen - WenWang + WenWang 17577-17593 The video topic segmentation (VTS) task segments videos into intelligible, non-overlapping topics, facilitating efficient comprehension of video content and quick access to specific content. VTS is also critical to various downstream video understanding tasks. Traditional VTS methods using shallow features or unsupervised approaches struggle to accurately discern the nuances of topical transitions. Recently, supervised approaches have achieved superior performance on video action or scene segmentation over unsupervised approaches. In this work, we improve supervised VTS by thoroughly exploring **multimodal fusion** and **multimodal coherence modeling**. Specifically, (1) we enhance multimodal fusion by exploring different architectures using Cross-Attention and Mixture of Experts. (2) To generally strengthen multimodality alignment and fusion, we pre-train and fine-tune the model with multimodal contrastive learning. (3) We propose a new pre-training task tailored for the VTS task, and a novel fine-tuning task for enhancing multimodal coherence modeling for VTS. We evaluate our proposed approaches on educational videos, in the form of lectures, due to the vital role of topic segmentation of educational videos in boosting learning experiences. Additionally, to promote research in VTS, we introduce a large-scale Chinese lecture video dataset to augment the existing English lecture video datasets. Experiments on both English and Chinese lecture datasets demonstrate that our model achieves superior VTS performance compared to competitive unsupervised and supervised baselines. 2025.findings-acl.904 @@ -19267,7 +19267,7 @@ ChristianMoroUniversity of Padova LuisaOrrù Gian PieroTurchi - PreslavNakovMohamed bin Zayed University of Artificial Intelligence + PreslavNakovMohamed bin Zayed University of Artificial Intelligence GiovanniDa San MartinoUniversity of Padua 17918-17929 Persuasion (or propaganda) techniques detection is a relatively novel task in Natural Language Processing (NLP). While there have already been a number of annotation campaigns, they have been based on heuristic guidelines, which have never been thoroughly discussed. Here, we present the first systematic analysis of a complex annotation task -detecting 22 persuasion techniques in memes-, for which we provided continuous expert oversight. The presence of an expert allowed us to critically analyze specific aspects of the annotation process. Among our findings, we show that inter-annotator agreement alone inadequately assessed annotation correctness. We thus define and track different error types, revealing that expert feedback shows varying effectiveness across error categories. This pattern suggests that distinct mechanisms underlie different kinds of misannotations. Based on our findings, we advocate for an expert oversight in annotation tasks and periodic quality audits. As an attempt to reduce the costs for this, we introduce a probabilistic model for optimizing intervention scheduling. @@ -19290,7 +19290,7 @@ <fixed-case>BEDAA</fixed-case>: <fixed-case>B</fixed-case>ayesian Enhanced <fixed-case>D</fixed-case>e<fixed-case>BERT</fixed-case>a for Uncertainty-Aware Authorship Attribution IqraZahidImperial College London YouchengSunMohamed bin Zayed University of Artificial Intelligence and The University of Manchester - RizaBatista-NavarroUniversity of Manchester + RizaBatista-NavarroUniversity of Manchester 17952-17966 Authorship Attribution (AA) seeks to identify the author of a given text, yet existing methods often struggle with trustworthiness and interpretability, particularly across different domains, languages, and stylistic variations. These challenges arise from the absence of uncertainty quantification and the inability of current models to adapt to diverse authorship tasks. To address these limitations, we introduce BEDAA, a Bayesian-Enhanced DeBERTa framework that integrates Bayesian reasoning with transformer-based language models to enable uncertainty-aware and interpretable authorship attribution. BEDAA achieves up to 19.69% improvement in F1-score across multiple authorship attribution tasks, including binary, multiclass, and dynamic authorship detection. By incorporating confidence ranking, uncertainty decomposition, and probabilistic reasoning, BEDAA improves robustness while offering transparent decision-making processes. Furthermore, BEDAA extends beyond traditional AA by demonstrating its effectiveness in human vs. machine-generated text classification, code authorship detection, and cross-lingual attribution. These advances establish BEDAA as a generalised, interpretable, and adaptable framework for modern authorship attribution challenges. 2025.findings-acl.924 @@ -19301,7 +19301,7 @@ Benchmarking the Benchmarks: Reproducing Climate-Related <fixed-case>NLP</fixed-case> Tasks TomCalamai OanaBalalauINRIA - Fabian M.SuchanekTelecom Paris + Fabian M.SuchanekTelecom Paris 17967-18009 Significant efforts have been made in the NLP community to facilitate the automatic analysis of climate-related corpora by tasks such as climate-related topic detection, climate risk classification, question answering over climate topics, and many more. In this work, we perform a reproducibility study on 8 tasks and 29 datasets, testing 6 models. We find that many tasks rely heavily on surface-level keyword patterns rather than deeper semantic or contextual understanding. Moreover, we find that 96% of the datasets contain annotation issues, with 16.6% of the sampled wrong predictions of a zero-shot classifier being actually clear annotation mistakes, and 38.8% being ambiguous examples.These results call into question the reliability of current benchmarks to meaningfully compare models and highlight the need for improved annotation practices. We conclude by outlining actionable recommendations to enhance dataset quality and evaluation robustness. 2025.findings-acl.925 @@ -19355,7 +19355,7 @@ <fixed-case>F</fixed-case>act<fixed-case>L</fixed-case>ens: Benchmarking Fine-Grained Fact Verification KushanMitraMegagon Labs - DanZhangMegagon Labs + DanZhangMegagon Labs SajjadurRahman EstevamHruschkaMegagon Labs, Megagon Labs and Carnegie Mellon University 18085-18096 @@ -19600,7 +19600,7 @@ AlexandraChouldechovaMicrosoft and Carnegie Mellon University JeanGarcia-GathrightResearch, Microsoft AlexandraOlteanuResearch, Microsoft - HannaWallachMicrosoft + HannaWallachMicrosoft 18423-18440 The NLP research community has made publicly available numerous instruments for measuring representational harms caused by large language model (LLM)-based systems. These instruments have taken the form of datasets, metrics, tools, and more. In this paper, we examine the extent to which such instruments meet the needs of practitioners tasked with evaluating LLM-based systems. Via semi-structured interviews with 12 such practitioners, we find that practitioners are often unable to use publicly available instruments for measuring representational harms. We identify two types of challenges. In some cases, instruments are not useful because they do not meaningfully measure what practitioners seek to measure or are otherwise misaligned with practitioner needs. In other cases, instruments-even useful instruments-are not used by practitioners due to practical and institutional barriers impeding their uptake. Drawing on measurement theory and pragmatic measurement, we provide recommendations for addressing these challenges to better meet practitioner needs. 2025.findings-acl.947 @@ -19611,7 +19611,7 @@ Mind the (Belief) Gap: Group Identity in the World of <fixed-case>LLM</fixed-case>s AnganaBorah MarwaHoualla - RadaMihalceaUniversity of Michigan + RadaMihalceaUniversity of Michigan 18441-18463 Social biases and belief-driven behaviors can significantly impact Large Language Models’ (LLMs’) decisions on several tasks. As LLMs are increasingly used in multi-agent systems for societal simulations, their ability to model fundamental group psychological characteristics remains critical yet under-explored. In this study, we present a multi-agent framework that simulates belief congruence, a classical group psychology theory that plays a crucial role in shaping societal interactions and preferences. Our findings reveal that LLMs exhibit amplified belief congruence compared to humans, across diverse contexts. We further investigate the implications of this behavior on two downstream tasks: (1) misinformation dissemination and (2) LLM learning, finding that belief congruence in LLMs increases misinformation dissemination and impedes learning. To mitigate these negative impacts, we propose strategies inspired by: (1) contact hypothesis, (2) accuracy nudges, and (3) global citizenship framework. Our results show that the best strategies reduce misinformation dissemination by up to (37%) and enhance learning by (11%). Bridging social psychology and AI, our work provides insights to navigate real-world interactions using LLMs while addressing belief-driven biases. 2025.findings-acl.948 @@ -19654,7 +19654,7 @@ Machine Theory of Mind Needs Machine Validation AdilSoubkiState University of New York at Stony Brook - OwenRambowStony Brook University + OwenRambowStony Brook University 18495-18505 In the last couple years, there has been a flood of interest in studying the extent to which language models (LMs) have a theory of mind (ToM) — the ability to ascribe mental states to themselves and others. The results provide an unclear picture of the current state of the art, with some finding near-human performance and others near-zero. To make sense of this landscape, we perform a survey of 16 recent studies aimed at measuring ToM in LMs and find that, while almost all perform checks for human identifiable issues, less than half do so for patterns only a machine might exploit. Among those that do perform such validation, which we call machine validation, none identify LMs to exceed human performance. We conclude that the datasets that show high LM performance on ToM tasks are easier than their peers, likely due to the presence of spurious patterns in the data, and we caution against building ToM benchmarks relying solely on human validation of the data. 2025.findings-acl.951 @@ -19691,7 +19691,7 @@ OliverEberleTechnische Universität Berlin PhillipRust CarinaKauf - AndersSøgaardCopenhagen University + AndersSøgaardCopenhagen University 18542-18561 Detecting ambiguity is important for language understanding, including uncertainty estimation, humour detection, and processing garden path sentences. We assess language models’ sensitivity to ambiguity by introducing an adversarial ambiguity dataset that includes syntactic, lexical, and phonological ambiguities along with adversarial variations (e.g., word-order changes, synonym replacements, and random-based alterations). Our findings show that direct prompting fails to robustly identify ambiguity, while linear probes trained on model representations can decode ambiguity with high accuracy, sometimes exceeding 90%. Our results offer insights into the prompting paradigm and how language models encode ambiguity at different layers. 2025.findings-acl.954 @@ -19702,7 +19702,7 @@ Biases Propagate in Encoder-based Vision-Language Models: A Systematic Analysis From Intrinsic Measures to Zero-shot Retrieval Outcomes KshitishGhate TessaCharlesworthKellogg Community College - Mona T.DiabCarnegie Mellon University + Mona T.DiabCarnegie Mellon University AylinCaliskanUniversity of Washington 18562-18580 To build fair AI systems we need to understand how social-group biases intrinsic to foundational encoder-based vision-language models (VLMs) manifest in biases in downstream tasks. In this study, we demonstrate that intrinsic biases in VLM representations systematically “carry over” or propagate into zero-shot retrieval tasks, revealing how deeply rooted biases shape a model’s outputs. We introduce a controlled framework to measure this propagation by correlating (a) intrinsic measures of bias in the representational space with (b) extrinsic measures of bias in zero-shot text-to-image (TTI) and image-to-text (ITT) retrieval. Results show substantial correlations between intrinsic and extrinsic bias, with an average \rho = 0.83 \pm 0.10. This pattern is consistent across 114 analyses, both retrieval directions, six social groups, and three distinct VLMs. Notably, we find that larger/better-performing models exhibit greater bias propagation, a finding that raises concerns given the trend towards increasingly complex AI models. Our framework introduces baseline evaluation tasks to measure the propagation of group and valence signals. Investigations reveal that underrepresented groups experience less robust propagation, further skewing their model-related outcomes. @@ -19720,7 +19720,7 @@ ZhenweiDaiAmazon YanHanAmazon ChenLuoAmazon - JingHuangAmazon + JingHuangAmazon ZhenLiAmazon SuhangWangPennsylvania State University YueXingMichigan State University @@ -19768,7 +19768,7 @@ MatthewJagielskiGoogle KatherineLeeGoogle NiloofarMireshghallah - David A.SmithNortheastern University + David A.SmithNortheastern University Christopher A.Choquette-ChooGoogle DeepMind 18703-18726 Due to the sensitive nature of personally identifiable information (PII), its owners may have the authority to control its inclusion or request its removal from large-language model (LLM) training. Beyond this, PII may be added or removed from training datasets due to evolving dataset curation techniques, because they were newly scraped for retraining, or because they were included in a new downstream fine-tuning stage. We find that the amount and ease of PII memorization is a dynamic property of a model that evolves throughout training pipelines and depends on commonly altered design choices. We characterize three such novel phenomena: (1) similar-appearing PII seen later in training can elicit memorization of earlier-seen sequences in what we call assisted memorization, and this is a significant factor (in our settings, up to 1/3); (2) adding PII can increase memorization of other PII; and (3) removing PII can lead to other PII being memorized. @@ -19871,7 +19871,7 @@ TraciHongBoston University, Boston University Ika KarlinaIdrisMonash University Alham FikriAjiMohamed bin Zayed University of Artificial Intelligence - Derry TantiWijayaMonash University and Boston University + Derry TantiWijayaMonash University and Boston University 18863-18890 Online discourse is increasingly trapped in a vicious cycle where polarizing language fuelstoxicity and vice versa. Identity, one of the most divisive issues in modern politics, oftenincreases polarization. Yet, prior NLP research has mostly treated toxicity and polarization asseparate problems. In Indonesia, the world’s third-largest democracy, this dynamic threatens democratic discourse, particularly in online spaces. We argue that polarization and toxicity must be studied in relation to each other. To this end, we present a novel multi-label Indonesian dataset annotated for toxicity, polarization, and annotator demographic information. Benchmarking with BERT-base models and large language models (LLMs) reveals that polarization cues improve toxicity classification and vice versa. Including demographic context further enhances polarization classification performance. 2025.findings-acl.966 @@ -19938,10 +19938,10 @@ NikitaSoni SyedaMahwish PranavChitale - Ryan L.Boyd - LyleUngar + Ryan L.Boyd + LyleUngar Richard N.Rosenthal - H. AndrewSchwartz + H. AndrewSchwartz 18955-18973 Large Language Models (LLMs) are increasingly used in human-centered applications, yet their ability to model diverse psychological constructs is not well understood. In this study, we systematically evaluate a range of Transformer-LMs to predict psychological variables across five major dimensions: affect, substance use, mental health, sociodemographics, and personality. Analyses span three temporal levels—short daily text responses about current affect, text aggregated over two-weeks, and user-level text collected over two years—allowing us to examine how each model’s strengths align with the underlying stability of different constructs. The findings show that mental health signals emerge as the most accurately predicted dimensions (r=0.6) across all temporal scales. At the daily scale, smaller models like DeBERTa and HaRT often performed better, whereas, at longer scales or with greater context, larger model like Llama3-8B performed the best. Also, aggregating text over the entire study period yielded stronger correlations for outcomes, such as age and income. Overall, these results suggest the importance of selecting appropriate model architectures and temporal aggregation techniques based on the stability and nature of the target variable. 2025.findings-acl.971 @@ -20015,7 +20015,7 @@ KelechiOguejiServiceNow Inc JimmyLinUniversity of Waterloo PontusStenetorpUniversity College London - David IfeoluwaAdelaniMcGill University + David IfeoluwaAdelaniMcGill University 19048-19095 Large-scale multilingual evaluations, such as MEGA, often include only a handful of African languages due to the scarcity of high-qualityevaluation data and the limited discoverability of existing African datasets. This lack of representation hinders comprehensive LLM evaluation across a diverse range of languages and tasks. To address these challenges, we introduce AFROBENCH—a multi-task benchmark for evaluating the performance of LLMs across 64 African languages, 15 tasks and 22 datasets. AFROBENCH consists of nine natural language understanding datasets, six text generation datasets, six knowledge and question answering tasks, and one mathematical reasoning task. We present results comparing the performance of prompting LLMs to fine-tuned baselines based on BERT and T5-style models. Our results suggest large gaps in performance between high-resource languages, such as English, and African languages across most tasks; but performance also varies based on the availability of monolingual data resources. Our findings confirm that performance on African languages continues to remain a hurdle for current LLMs, underscoring the need for additional efforts to close this gap. 2025.findings-acl.976 @@ -20048,9 +20048,9 @@ ShadikurRahman MehradShahmohammadi MeghThakkar - Md RizwanParvezQatar Computing Research Institute + Md RizwanParvezQatar Computing Research Institute EnamulHoqueYork University - ShafiqJotyNanyang Technological University and SalesForce.com + ShafiqJotyNanyang Technological University and SalesForce.com 19123-19151 Charts are ubiquitous, as people often use them to analyze data, answer questions, and discover critical insights. However, performing complex analytical tasks with charts requires significant perceptual and cognitive effort. Chart Question Answering (CQA) systems automate this process by enabling models to interpret and reason with visual representations of data. However, existing benchmarks like ChartQA lack real-world diversity and have recently shown performance saturation with modern large vision-language models (LVLMs). To address these limitations, we introduce ChartQAPro, a new benchmark that includes 1,341 charts from 99 diverse sources, spanning various chart types—including infographics and dashboards—and featuring 1,948 questions in various types, such as multiple-choice, conversational, hypothetical, and unanswerable questions, to better reflect real-world challenges. Our evaluations with 21 models show a substantial performance drop for LVLMs on ChartQAPro; e.g., Claude Sonnet 3.5 scores 90.5% on ChartQA but only 55.81% on ChartQAPro, underscoring the complexity of chart reasoning. We complement our findings with detailed error analyses and ablation studies, identifying key challenges and opportunities for advancing LVLMs in chart understanding and reasoning. We release ChartQAPro at https://github.com/vis-nlp/ChartQAPro. 2025.findings-acl.978 @@ -20163,7 +20163,7 @@ LinyangHe ErcongNie HelmutSchmidCenter for Information and Language Processing - HinrichSchuetze + HinrichSchuetze NimaMesgaraniColumbia University JonathanBrennanUniversity of Michigan - Ann Arbor 19284-19302 @@ -20252,9 +20252,9 @@ <fixed-case>F</fixed-case>a<fixed-case>V</fixed-case>e: Factored and Verified Search Rationale for Long-form Answer JihyukKimLG Corporation - SungjinLeeAmazon + SungjinLeeAmazon Seung-wonHwangSeoul National University - YangLiuAmazon + YangLiuAmazon 19402-19416 Targeting long-form question-answering, chain-of-query (CoQ) has been studied, integrating chain-of-thought (CoT) with retrieval-augmented generation. CoQ answers the complex question step-by-step, through simpler subquestions (SQs) from which relevant knowledge is retrieved. By doing so, CoQ aims to improve the answer comprehensiveness and verifiability, at the expense of latency. Our first contribution is showing that the chaining often incurs harmful effects on both objectives, and SQs left unverified often fail to answer the given question. Second, we propose a better alternative to CoQ, union-of-query which adopts a factored approach to break the harmful chain. Finally, we propose to verify SQs before answers, by fine-tuning the SQ generator using verified SQs and introducing a selector verifying SQs in test time. Employing vicuna-13b, our approach, denoted by FaVe (short for Factored and Verified search), even outperforms ChatGPT baselines while maintaining efficiency. 2025.findings-acl.993 @@ -20519,7 +20519,7 @@ LiZeng ZemingLiu ChongFeng - HeyanHuangBeijing Institute of Technology + HeyanHuangBeijing Institute of Technology YuhangGuo 19725-19743 Model editing aims to correct errors and outdated knowledge in the Large language models (LLMs) with minimal cost. Prior research has proposed a variety of datasets to assess the effectiveness of these model editing methods. However, most existing datasets only require models to output short phrases or sentences, overlooks the widespread existence of document level tasks in the real world, raising doubts about their practical usability. Aimed at addressing this limitation and promoting the application of model editing in real-world scenarios, we propose the task of document-level model editing. To tackle such challenges and enhance model capabilities in practical settings, we introduce DocMEdit, a dataset focused on document-level model editing, characterized by document-level inputs and outputs, extrapolative, and multiple facts within a single edit. We propose a series of evaluation metrics and experiments. The results show that the difficulties in document-level model editing pose challenges for existing model editing methods. @@ -20619,7 +20619,7 @@ Core: Robust Factual Precision with Informative Sub-Claim Identification - ZhengpingJiangJohns Hopkins University + ZhengpingJiangJohns Hopkins University JingyuZhangJohns Hopkins University NathanielWeirAmazon SethEbnerKensho @@ -20761,7 +20761,7 @@ HengruiZhang Henry PengZouUniversity of Illinois at Chicago WeizhiZhangAmazon and University of Illinois Chicago - Philip S.YuUniversity of Illinois Chicago + Philip S.YuUniversity of Illinois Chicago 20027-20041 Large Language models (LLMs) have achieved encouraging results in tabular data generation. However, existing approaches require fine-tuning, which is computationally expensive. This paper explores an alternative: prompting a fixed LLM with in-context examples. We observe that using randomly selected in-context examples hampers the LLM’s performance, resulting in sub-optimal generation quality. To address this, we propose a novel in-context learning framework: TabGen-ICL, to enhance the in-context learning ability of LLMs for tabular data generation. TabGen-ICL operates iteratively, retrieving a subset of real samples that represent the residual between currently generated samples and true data distributions. This approach serves two purposes: locally, it provides more effective in-context learning examples for the LLM in each iteration; globally, it progressively narrows the gap between generated and real data. Extensive experiments on five real-world tabular datasets demonstrate that TabGen-ICL significantly outperforms the random selection strategy. Specifically, it reduces the error rate by a margin of up to 42.2% on the fidelity metric. We demonstrate for the first time that prompting a fixed LLM can yield high-quality synthetic tabular data. 2025.findings-acl.1027 @@ -20831,13 +20831,13 @@ JonCai BrendanKingUniversity of California, Santa Cruz PeytonCameron - Susan WindischBrownUniversity of Colorado at Boulder + Susan WindischBrownUniversity of Colorado at Boulder MiriamEckert DananjaySrinivasUniversity of Colorado at Boulder - George ArthurBakerUniversity of Utah and University of Colorado Boulder + George ArthurBakerUniversity of Utah and University of Colorado Boulder V KateEversonUniversity of Colorado at Boulder - MarthaPalmerUniversity of Colorado at Boulder - JamesMartinUniversity of Colorado at Boulder + MarthaPalmerUniversity of Colorado at Boulder + JamesMartinUniversity of Colorado at Boulder JeffreyFlaniganUniversity of California, Santa Cruz 20135-20149 Understanding the structure of multi-party conversation and the intentions and dialogue acts of each speaker remains a significant challenge in NLP. While a number of corpora annotated using theoretical frameworks of dialogue have been proposed, these typically focus on either utterance-level labeling of speaker intent, missing wider context, or the rhetorical structure of a dialogue, losing fine-grained intents captured in dialogue acts. Recently, the Dependency Dialogue Acts (DDA) framework has been proposed to for modeling both the fine-grained intents of each speaker and the structure of multi-party dialogues. However, there is not yet a corpus annotated with this framework available for the community to study. To address this gap, we introduce a new corpus of 33 dialogues and over 9,000 utterance units, densely annotated using the Dependency Dialogue Acts (DDA) framework.Our dataset spans four genres of multi-party conversations from different modalities: (1) physics classroom discussions, (2) engineering classroom discussions, (3) board game interactions, and (4) written online game chat logs. Each session is doubly annotated and adjudicated to ensure high-quality labeling. We present a description of the dataset and annotation process, an analysis of speaker dynamics enabled by our annotation, and a baseline evaluation of LLMs as DDA parsers. We discuss the implications of this dataset understanding dynamics between speakers and for developing more controllable dialogue agents. @@ -20926,7 +20926,7 @@ QiunanDuNational University of Defense Technology XinwangLiuNational University of Defense Technology MinlieHuang - DongshengLiNational University of Defense Technology + DongshengLiNational University of Defense Technology 20243-20255 LLMs face privacy risks when handling sensitive data. To ensure privacy, researchers use differential privacy (DP) to provide protection by adding noise during LLM training. However, users may be hesitant to share complete data with LLMs. Researchers follow local DP to sanitize the text on the user side and feed non-sensitive text to LLMs. The sanitization usually uses a fixed non-sensitive token list or a fixed noise distribution, which induces the risk of being attacked or semantic distortion. We argue that the token’s protection level should be adaptively adjusted according to its semantic-based information to balance the privacy-utility trade-off. In this paper, we propose DYNTEXT, an LDP-based Dynamic Text sanitization for privacy-preserving LLM inference, which dynamically constructs semantic-aware adjacency lists of sensitive tokens to sample non-sensitive tokens for perturbation. Specifically, DYNTEXT first develops a semantic-based density modeling under DP to extract each token’s density information. We propose token-level smoothing sensitivity by combining the idea of global sensitivity (GS) and local sensitivity (LS), which dynamically adjusts the noise scale to avoid excessive noise in GS and privacy leakage in LS. Then, we dynamically construct an adjacency list for each sensitive token based on its semantic density information. Finally, we apply the replacement mechanism to sample non-sensitive, semantically similar tokens from the adjacency list to replace sensitive tokens. Experiments show that DYNTEXT excels strong baselines on three datasets. 2025.findings-acl.1038 @@ -20994,7 +20994,7 @@ DingyuYao BowenShenUniversity of the Chinese Academy of Sciences ZhengLinInstitute of Information Engineering, Chinese Academy of Sciences - WeiLiu + WeiLiu JianLuanXiaomi Corporation BinWangAI Lab, Xiaomi Inc. WeipingWangIIE @@ -21440,7 +21440,7 @@ <fixed-case>PM</fixed-case>3-<fixed-case>KIE</fixed-case>: A Probabilistic Multi-Task Meta-Model for Document Key Information Extraction BirgitKirschFraunhofer Institute IAIS, Fraunhofer IAIS - HéctorAllende-CidFraunhofer Institute IAIS, Fraunhofer IAIS + HéctorAllende-CidFraunhofer Institute IAIS, Fraunhofer IAIS StefanRueping 20890-20912 Key Information Extraction (KIE) from visually rich documents is commonly approached as either fine-grained token classification or coarse-grained entity extraction. While token-level models capture spatial and visual cues, entity-level models better represent logical dependencies and align with real-world use cases.We introduce PM3-KIE, a probabilistic multi-task meta-model that incorporates both fine-grained and coarse-grained models. It serves as a lightweight reasoning layer that jointly predicts entities and all appearances in a document. PM3-KIE incorporates domain-specific schema constraints to enforce logical consistency and integrates large language models for semantic validation, thereby reducing extraction errors.Experiments on two public datasets, DeepForm and FARA, show that PM3-KIE outperforms three state-of-the-art models and a stacked ensemble, achieving a statistically significant 2% improvement in F1 score. @@ -21453,7 +21453,7 @@ AhmedLekssaysHamad Bin Khalifa University UtsavShukla Husrev TahaSencarQCRI - Md RizwanParvezQatar Computing Research Institute + Md RizwanParvezQatar Computing Research Institute 20913-20926 Accurately identifying adversarial techniques in security texts is critical for effective cyber defense. However, existing methods face a fundamental trade-off: they either rely on generic models with limited domain precision or require resource-intensive pipelines that depend on large labeled datasets and task-specific optimizations—such as custom hard-negative mining and denoising—resources rarely available in specialized domains.We propose TechniqueRAG, a domain-specific retrieval-augmented generation (RAG) framework that bridges this gap by integrating off-the-shelf retrievers, instruction-tuned LLMs, and minimal text–technique pairs. Our approach addresses data scarcity by fine-tuning only the generation component on limited in-domain examples, circumventing the need for resource-intensive retrieval training. While conventional RAG mitigates hallucination by coupling retrieval and generation, its reliance on generic retrievers often introduces noisy candidates, limiting domain-specific precision. To address this, we enhance retrieval quality and domain specificity through zero-shot LLM re-ranking, which explicitly aligns retrieved candidates with adversarial techniques.Experiments on multiple security benchmarks demonstrate that TechniqueRAG achieves state-of-the-art performance without extensive task-specific optimizations or labeled data, while comprehensive analysis provides further insights. 2025.findings-acl.1076 @@ -21466,8 +21466,8 @@ ZixuanLiInstitute of Computing Technology, Chinese Academy of Sciences XiaolongJinInstitute of Computing Technology, Chinese Academy of Sciences JiafengGuoInstitute of Computing Technolgy, Chinese Academy of Sciences - XueqiChengInstitute of Computing Technology, Chinese Academy - Tat-SengChuaNational University of Singapore + XueqiChengInstitute of Computing Technology, Chinese Academy + Tat-SengChuaNational University of Singapore 20927-20938 Forecasting over Temporal Knowledge Graphs (TKGs) which predicts future facts based on historical ones has received much attention. Recent studies have introduced Large Language Models (LLMs) for this task to enhance the models’ generalization abilities. However, these models perform forecasting via simultaneously learning two kinds of entangled knowledge in the TKG: (1) general patterns, i.e., invariant temporal structures shared across different scenarios; and (2) scenario information, i.e., factual knowledge engaged in specific scenario, such as entities and relations. As a result, the learning processes of these two kinds of knowledge may interfere with each other, which potentially impact the generalization abilities of the models. To enhance the generalization ability of LLMs on this task, in this paper, we propose a General-to-Specific learning framework (G2S) that disentangles the learning processes of the above two kinds of knowledge. In the general learning stage, we mask the scenario information in different TKGs and convert it into anonymous temporal structures. After training on these structures, the model is able to capture the general patterns across different TKGs. In the specific learning stage, we inject the scenario information into the structures via either in-context learning or fine-tuning modes. Experimental results show that G2S effectively improves the generalization abilities of LLMs. 2025.findings-acl.1077 @@ -21550,7 +21550,7 @@ JunruWu TianhaoShen LinxiSu - DeyiXiongTianjin University + DeyiXiongTianjin University 21031-21050 Large language models (LLMs) have achieved remarkable progress in autonomous reasoning, evolving from basic text processing to sophisticated multimodal reasoning, a critical capability for general-purpose AI assistants. However, existing benchmarks usually fail to adequately capture the intricate multi-step reasoning demands inherent in real-world scenarios. To bridge this gap, we propose **C²RBench**: a **C**hinese **C**omplex **R**easoning **Bench**mark for evaluating multi-step, multimodal advanced reasoning capability of LLMs. C²RBench comprises 1,115 carefully curated Chinese tasks, which are organized into eight domain-specific subsets, each meticulously designed to mirror real-world challenges. This hierarchical benchmark features three difficulty tiers based on the number of reasoning steps required (average 8.44 steps per task), significantly exceeding existing benchmarks in cognitive complexity. Extensive evaluations of 20 LLMs (including DeepSeek-R1) and 24 multimodal large language models (MLLMs) on C²RBench reveal critical performance gaps: GPT-4.1 achieves only 52.11% accuracy, indicating substantial room for improvement. The dataset and evaluation code are publicly available. 2025.findings-acl.1083 @@ -21600,7 +21600,7 @@ AminAbolghasemi LeifAzzopardiUniversity of Strathclyde Seyyed HadiHashemieBay Inc. - Maartende Rijke + Maartende Rijke SuzanVerberneUniversiteit Leiden 21105-21124 Attributing answers to source documents is an approach used to enhance the verifiability of a model’s output in retrieval-augmented generation (RAG). Prior work has mainly focused on improving and evaluating the attribution quality of large language models (LLMs) in RAG, but this may come at the expense of inducing biases in the attribution of answers. We define and examine two aspects in the evaluation of LLMs in RAG pipelines, namely attribution sensitivity and bias with respect to authorship information. We explicitly inform an LLM about the authors of source documents, instruct it to attribute its answers, and analyze (i) how sensitive the LLM’s output is to the author of source documents, and (ii) whether the LLM exhibits a bias towards human-written or AI-generated source documents. We design an experimental setup in which we use counterfactual evaluation to study three LLMs in terms of their attribution sensitivity and bias in RAG pipelines. Our results show that adding authorship information to source documents can significantly change the attribution quality of LLMs by 3 to 18%. We show that LLMs can have an attribution bias towards explicit human authorship, which can serve as a competing hypothesis for findings of prior work that shows that LLM-generated content may be preferred over human-written contents. Our findings indicate that metadata of source documents can influence LLMs’ trust, and how they attribute their answers. Furthermore, our research highlights attribution bias and sensitivity as a novel aspect of the vulnerability of LLMs. @@ -21613,7 +21613,7 @@ WenYangInstitute of automation, Chinese academy of science, Chinese Academy of Sciences JunhongWuUniversity of Chinese Academy of Sciences ChenWangInstitute of automation, Chinese academy of science, Chinese Academy of Sciences - ChengqingZongInstitute of automation, Chinese academy of science, Chinese Academy of Sciences + ChengqingZongInstitute of automation, Chinese academy of science, Chinese Academy of Sciences JiajunZhangInstitute of automation, Chinese academy of science, Chinese Academy of Sciences 21125-21147 Direct Preference Optimization (DPO) has become a prominent method for aligning Large Language Models (LLMs) with human preferences. While DPO has enabled significant progress in aligning English LLMs, multilingual preference alignment is hampered by data scarcity. To address this, we propose a novel approach that \textit{captures} learned preferences from well-aligned English models by implicit rewards and \textit{transfers} them to other languages through iterative training. Specifically, we derive an implicit reward model from the logits of an English DPO-aligned model and its corresponding reference model. This reward model is then leveraged to annotate preference relations in cross-lingual instruction-following pairs, using English instructions to evaluate multilingual responses. The annotated data is subsequently used for multilingual DPO fine-tuning, facilitating preference knowledge transfer from English to other languages. Fine-tuning Llama3 for two iterations resulted in a 12.72% average improvement in Win Rate and a 5.97% increase in Length Control Win Rate across all training languages on the X-AlpacaEval leaderboard. Our findings demonstrate that leveraging existing English-aligned models can enable efficient and effective multilingual preference alignment, significantly reducing the need for extensive multilingual preference data. @@ -21709,7 +21709,7 @@ WenjieWangUniversity of Science and Technology of China HongChengThe Chinese University of Hong Kong FuliFengUniversity of Science and Technology of China - Tat-SengChuaNational University of Singapore + Tat-SengChuaNational University of Singapore 21258-21277 Personalizing Large Language Models (LLMs) has become a critical step in facilitating their widespread application to enhance individual life experiences. In pursuit of personalization, distilling key preference information from an individual’s historical data as instructional preference context to customize LLM generation has emerged as a promising direction. However, these methods face a fundamental limitation by overlooking the inter-user comparative analysis, which is essential for identifying the inter-user differences that truly shape preferences. To address this limitation, we propose Difference-aware Personalization Learning (DPL), a novel approach that emphasizes extracting inter-user differences to enhance LLM personalization. DPL strategically selects representative users for comparison and establishes a structured standard to extract meaningful, task-relevant differences for customizing LLM generation. Extensive experiments on real-world datasets demonstrate that DPL significantly enhances LLM personalization. We release our code at https://github.com/SnowCharmQ/DPL. 2025.findings-acl.1095 @@ -21821,7 +21821,7 @@ YihaoDingUniversity of Melbourne GongboZhangColumbia University ChunhuaWengColumbia University - YifanPengWeill Cornell Medicine, Cornell University + YifanPengWeill Cornell Medicine, Cornell University 21421-21443 Evidence-based medicine (EBM) is at the forefront of modern healthcare, emphasizing the use of the best available scientific evidence to guide clinical decisions. Due to the sheer volume and rapid growth of medical literature and the high cost of curation, there is a critical need to investigate Natural Language Processing (NLP) methods to identify, appraise, synthesize, summarize, and disseminate evidence in EBM. This survey presents an in-depth review of 129 research studies on leveraging NLP for EBM, illustrating its pivotal role in enhancing clinical decision-making processes. The paper systematically explores how NLP supports the five fundamental steps of EBM—Ask, Acquire, Appraise, Apply, and Assess. The review not only identifies current limitations within the field but also proposes directions for future research, emphasizing the potential for NLP to revolutionize EBM by refining evidence extraction, evidence synthesis, appraisal, summarization, enhancing data comprehensibility, and facilitating a more efficient clinical workflow. 2025.findings-acl.1103 @@ -21861,7 +21861,7 @@ ZejiangHe LiuLiuSuqian University ZhigangSunNational University of Defense Technology - DongshengLiNational University of Defense Technology + DongshengLiNational University of Defense Technology 21475-21487 With the emergence of new topics on social media as sources of rumor dissemination, addressing the distribution shifts between source and target domains remains a crucial task in cross-domain rumor detection. Existing feature alignment methods, which aim to reduce the discrepancies between domains, are often susceptible to task interference during training. Additionally, data distribution alignment methods, which rely on existing data to synthesize new training samples, inherently introduce noise. To deal with these challenges, a new cross-domain rumor detection method, MONTROSE, is proposed. It combines LLM-driven Monte Carlo Tree Search (MCTS) data synthesis to generate high-quality synthetic data for the target domain and a domain-sharpness-aware (DSAM) self-refinement approach to train rumor detection models with these synthetic data effectively. Experiments demonstrate the superior performance of MONTROSE in cross-domain rumor detection. 2025.findings-acl.1106 @@ -21885,12 +21885,12 @@ A Comprehensive Graph Framework for Question Answering with Mode-Seeking Preference Alignment QuanweiTang - Sophia Yat MeiLeeHong Kong Polytechnic University + Sophia Yat MeiLeeHong Kong Polytechnic University JunshuangWu DongZhang ShoushanLiSoochow University ErikCambriaNanyang Technological University - GuodongZhouSoochow University, China + GuodongZhouSoochow University, China 21504-21523 Recent advancements in retrieval-augmented generation (RAG) have enhanced large language models in question answering by integrating external knowledge. However, challenges persist in achieving global understanding and aligning responses with human ethical and quality preferences. To address these issues, we propose GraphMPA, a comprehensive graph-based framework with mode-seeking preference alignment. Our approach constructs a hierarchical document graph using a general similarity measurement, mimicking human cognitive processes for information understanding and synthesis. Additionally, we introduce mode-seeking preference optimization to better align model outputs with human preferences through probability-matching constraints. Extensive experiments on six datasets demonstrate the effectiveness of our GraphMPA. 2025.findings-acl.1108 @@ -22054,7 +22054,7 @@ JisuShinKorea Advanced Institute of Science & Technology SukminChoKorea Advanced Institute of Science and Technology ChanggeonKo - Jong C.ParkKorea Advanced Institute of Science and Technology + Jong C.ParkKorea Advanced Institute of Science and Technology 21738-21756 The detection of mental health problems from social media and the interpretation of these results have been extensively explored. Research has shown that incorporating clinical symptom information into a model enhances domain expertise, improving its detection and interpretation performance. While large language models (LLMs) are shown to be effective for generating explanatory rationales in mental health detection, their substantially big parameter size and high computational cost limit their practicality. Reasoning distillation transfers this ability to smaller language models (SLMs), but inconsistencies in the relevance and domain alignment of LLM-generated rationales pose a challenge. This paper investigates how rationale quality impacts SLM performance in mental health detection and explanation generation. We hypothesize that ensuring high-quality and domain-relevant rationales enhances the distillation. To this end, we propose a framework that selects rationales based on their alignment with expert clinical reasoning. Experiments show that our quality-focused approach significantly enhances SLM performance in both mental disorder detection and rationale generation. This work highlights the importance of rationale quality and offers an insightful framework for knowledge transfer in mental health applications. 2025.findings-acl.1119 @@ -22064,7 +22064,7 @@ Rethinking Table Instruction Tuning NaihaoDeng - RadaMihalceaUniversity of Michigan + RadaMihalceaUniversity of Michigan 21757-21780 Recent advances in table understanding have focused on instruction-tuning large language models (LLMs) for table-related tasks. However, existing research has overlooked the impact of hyperparameter choices, and also lacks a comprehensive evaluation of the out-of-domain table understanding ability and the general capabilities of these table LLMs. In this paper, we evaluate these abilities in existing table LLMs, and find significant declines in both out-of-domain table understanding and general capabilities as compared to their base models. Through systematic analysis, we show that hyperparameters, such as learning rate, can significantly influence both table-specific and general capabilities. Contrary to the previous table instruction-tuning work, we demonstrate that smaller learning rates and fewer training instances can enhance table understanding while preserving general capabilities. Based on our findings, we introduce TAMA, a TAble LLM instruction-tuned from LLaMA 3.1 8B Instruct, which achieves performance on par with, or surpassing GPT-3.5 and GPT-4 on table tasks, while maintaining strong out-of-domain generalization and general capabilities. Our findings highlight the potential for reduced data annotation costs and more efficient model development through careful hyperparameter selection. We open-source the project and our models. 2025.findings-acl.1120 @@ -22075,7 +22075,7 @@ <fixed-case>C</fixed-case>lini<fixed-case>D</fixed-case>ial: A Naturally Occurring Multimodal Dialogue Dataset for Team Reflection in Action During Clinical Operation NaihaoDeng KapotakshaDasUniversity of Michigan - Dearborn - RadaMihalceaUniversity of Michigan + RadaMihalceaUniversity of Michigan VitaliyPopovUniversity of Michigan - Ann Arbor MohamedAbouelenienUniversity of Michigan 21781-21798 @@ -22094,7 +22094,7 @@ ZenghaoTangShanghai Jiaotong University HeWang HanchenXia - RadaMihalceaUniversity of Michigan + RadaMihalceaUniversity of Michigan NaihaoDeng 21799-21818 Existing humor datasets and evaluations predominantly focus on English, leaving limited resources for culturally nuanced humor in non-English languages like Chinese. To address this gap, we construct **Chumor**, the first and the largest Chinese humor explanation dataset. **Chumor** is sourced from Ruo Zhi Ba (RZB, 弱智吧), a Chinese Reddit-like platform known for sharing intellectually challenging and culturally specific jokes. We test ten LLMs through direct and chain-of-thought prompting, revealing that **Chumor** poses significant challenges to existing LLMs, with their accuracy slightly above random and far below human. In addition, our analysis highlights that human-annotated humor explanations are significantly better than those generated by GPT-4o and ERNIE4-turbo. We release **Chumor** at https://huggingface.co/datasets/MichiganNLP/Chumor , our project page is at https://github.com/MichiganNLP/Chumor-2.0 , our leaderboard is at https://huggingface.co/spaces/MichiganNLP/Chumor-leaderboard , and our codebase is at https://github.com/MichiganNLP/Chumor-2.0 . @@ -22130,7 +22130,7 @@ ParamitaKoley JanardanMisra NiloyGangulyIndian Institute of Technology Kharagpur, - SaptarshiGhoshIndian Institute of Technology Kharagpur + SaptarshiGhoshIndian Institute of Technology Kharagpur 21848-21864 A significant portion of the energy consumed by Large Language Models (LLMs) arises from their inference processes; hence developing energy-efficient methods for inference is crucial. While several techniques exist for inference optimization, output compression remains relatively unexplored, with only a few preliminary efforts addressing this aspect. In this work, we first benchmark 12 decoder-only LLMs across 5 datasets, revealing that these models often produce responses that are substantially longer than necessary. We then conduct a comprehensive quality assessment of LLM responses, formally defining six information categories present in LLM responses. We show that LLMs often tend to include redundant or additional information besides the minimal answer. To address this issue of long responses by LLMs, we explore several simple and intuitive prompt-engineering strategies.Empirical evaluation shows that appropriate prompts targeting length reduction and controlling information content can achieve significant energy optimization between 25-60% by reducing the response length while preserving the quality of LLM responses. 2025.findings-acl.1125 @@ -22329,7 +22329,7 @@ Is Large Language Model Performance on Reasoning Tasks Impacted by Different Ways Questions Are Asked? Seok HwanSong MohnaChakraborty - QiLiIowa State University + QiLiIowa State University WallapakTavanapongIowa State University 22066-22081 Large Language Models (LLMs) have been evaluated using diverse question types, e.g., multiple-choice, true/false, and short/long answers. This study answers an unexplored question about the impact of different question types on LLM accuracy on reasoning tasks. We investigate the performance of five LLMs on three different types of questions using quantitative and deductive reasoning tasks. The performance metrics include accuracy in the reasoning steps and choosing the final answer. Key Findings: (1) Significant differences exist in LLM performance across different question types. (2) Reasoning accuracy does not necessarily correlate with the final selection accuracy. (3) The number of options and the choice of words, influence LLM performance. @@ -22427,7 +22427,7 @@ Label-semantics Aware Generative Approach for Domain-Agnostic Multilabel Classification SubhenduKhatuya ShashwatNaidu - SaptarshiGhoshIndian Institute of Technology Kharagpur + SaptarshiGhoshIndian Institute of Technology Kharagpur PawanGoyalIIT Kharagpur NiloyGangulyIndian Institute of Technology Kharagpur, 22286-22298 @@ -22468,7 +22468,7 @@ TianyueOu HoudaBouamorCarnegie Mellon University ZhijingJinDepartment of Computer Science, University of Toronto - Mona T.DiabCarnegie Mellon University + Mona T.DiabCarnegie Mellon University 22327-22360 The field of machine translation has achieved significant advancements, yet domain-specific terminology translation, particularly in AI, remains challenging. This work introduces GIST, a large-scale multilingual AI terminology dataset containing 5K terms extracted from top AI conference papers spanning 2000 to 2023. The terms were translated into Arabic, Chinese, French, Japanese, and Russian using a hybrid framework that combines LLMs for extraction with human expertise for translation. The dataset’s quality was benchmarked against existing resources, demonstrating superior translation accuracy through crowdsourced evaluation. GIST was integrated into translation workflows using post-translation refinement methods that required no retraining, where LLM prompting consistently improved BLEU and COMET scores. A web demonstration on the ACL Anthology platform highlights its practical application, showcasing improved accessibility for non-English speakers. We address a critical gap in AI terminology resources and fosters global inclusivity and collaboration in AI research. 2025.findings-acl.1148 @@ -22492,7 +22492,7 @@ EmanueleLa MalfaUniversity of Oxford ManuelTonneauOxford Internet Institute, University of Oxford AshkanKazemiMeedan - Scott A.HaleMeedan, University of Oxford and Alan Turing Institute + Scott A.HaleMeedan, University of Oxford and Alan Turing Institute 22374-22404 Online misinformation remains a critical challenge, and fact-checkers increasingly rely on claim matching systems that use sentence embedding models to retrieve relevant fact-checks. However, as users interact with claims online, they often introduce edits, and it remains unclear whether current embedding models used in retrieval are robust to such edits. To investigate this, we introduce a perturbation framework that generates valid and natural claim variations, enabling us to assess the robustness of a wide-range of sentence embedding models in a multi-stage retrieval pipeline and evaluate the effectiveness of various mitigation approaches. Our evaluation reveals that standard embedding models exhibit notable performance drops on edited claims, while LLM-distilled embedding models offer improved robustness at a higher computational cost. Although a strong reranker helps to reduce the performance drop, it cannot fully compensate for first-stage retrieval gaps. To address these retrieval gaps, we evaluate train- and inference-time mitigation approaches, demonstrating that they can improve in-domain robustness by up to 17 percentage points and boost out-of-domain generalization by 10 percentage points. Overall, our findings provide practical improvements to claim-matching systems, enabling more reliable fact-checking of evolving misinformation. 2025.findings-acl.1150 @@ -22533,7 +22533,7 @@ FrancescoOrtuUniversity of Trieste and Area Science Park RoyaEnsafiUniversity of Michigan Ann Arbor ZhijingJinDepartment of Computer Science, University of Toronto - RadaMihalceaUniversity of Michigan + RadaMihalceaUniversity of Michigan 22434-22452 The ability of Natural Language Processing (NLP) methods to categorize text into multiple classes has motivated their use in online content moderation tasks, such as hate speech and fake news detection. However, there is limited understanding of how or why these methods make such decisions, or why certain content is moderated in the first place. To investigate the hidden mechanisms behind content moderation, we explore multiple directions: 1) training classifiers to reverse-engineer content moderation decisions across countries; 2) explaining content moderation decisions by analyzing Shapley values and LLM-guided explanations. Our primary focus is on content moderation decisions made across countries, using pre-existing corpora sampled from the Twitter Stream Grab. Our experiments reveal interesting patterns in censored posts, both across countries and over time. Through human evaluations of LLM-generated explanations across three LLMs, we assess the effectiveness of using LLMs in content moderation. Finally, we discuss potential future directions, as well as the limitations and ethical considerations of this work. 2025.findings-acl.1153 @@ -22564,8 +22564,8 @@ KianaAvestimehr KatharineButler YanjunWeng - MiZhangThe Ohio State University - ShrikanthNarayananUniversity of Southern California + MiZhangThe Ohio State University + ShrikanthNarayananUniversity of Southern California SalmanAvestimehrUniversity of Southern California 22473-22487 Large vision-language models (VLMs) have demonstrated remarkable abilities in understanding everyday content. However, their performance in the domain of art, particularly culturally rich art forms, remains less explored. As a pearl of human wisdom and creativity, art encapsulates complex cultural narratives and symbolism. In this paper, we offer the Pun Rebus Art Dataset, a multimodal dataset for art understanding deeply rooted in traditional Chinese culture. We focus on three primary tasks: identifying salient visual elements, matching elements with their symbolic meanings, and explanations for the conveyed messages. Our evaluation reveals that state-of-the-art VLMs struggle with these tasks, often providing biased and hallucinated explanations and showing limited improvement through in-context learning. By releasing the Pun Rebus Art Dataset, we aim to facilitate the development of VLMs that can better understand and interpret culturally specific content, promoting greater inclusiveness beyond English-based corpora. The dataset and evaluation code are available at [this link](https://github.com/zhang-tuo-pdf/Pun-Rebus-Art-Benchmark). @@ -22601,7 +22601,7 @@ <fixed-case>GUI</fixed-case> Agents: A Survey DangNguyenUniversity of Maryland, College Park - JianChen + JianChen YuWangUniversity of Oregon and Vanderbilt University GangWuAdobe Research NamyongParkMeta AI @@ -22626,7 +22626,7 @@ BranislavKvetonAdobe Research JihyungKilAdobe Research Thien HuuNguyenUniversity of Oregon - TrungBuiAdobe Research + TrungBuiAdobe Research TianyiZhouUniversity of Maryland, College Park Ryan A.RossiAdobe Research FranckDernoncourt @@ -22642,7 +22642,7 @@ Wen-waiYim YujuanFu ZhaoyiSun - MelihaYetisgenUniversity of Washington + MelihaYetisgenUniversity of Washington FeiXiaUniversity of Washington, Seattle ThomasLinMicrosoft 22539-22550 @@ -22667,8 +22667,8 @@ Dynamic Knowledge Integration for Evidence-Driven Counter-Argument Generation with Large Language Models AnarYeginbergen - MaiteOronoz - RodrigoAgerriUniversity of the Basque Country + MaiteOronoz + RodrigoAgerriUniversity of the Basque Country 22568-22584 This paper investigates the role of dynamic external knowledge integration in improving counter-argument generation using Large Language Models (LLMs). While LLMs have shown promise in argumentative tasks, their tendency to generate lengthy, potentially non-factual responses highlights the need for more controlled and evidence-based approaches. We introduce a reconstructed and manually curated dataset of argument and counter-argument pairs specifically designed to balance argumentative complexity with evaluative feasibility. We also propose a new LLM-as-a-Judge evaluation methodology that shows a stronger correlation with human judgments compared to traditional reference-based metrics. Our experimental results demonstrate that integrating dynamic external knowledge from the web significantly improves the quality of generated counter-arguments, particularly in terms of relatedness, persuasiveness, and factuality. The findings suggest that combining LLMs with real-time external knowledge retrieval offers a promising direction for developing more effective and reliable counter-argumentation systems. Data and code are publicly available: https://github.com/anaryegen/ counter-argument-generation 2025.findings-acl.1161 @@ -22677,7 +22677,7 @@ Tell, Don’t Show: Leveraging Language Models’ Abstractive Retellings to Model Literary Themes - LiLucyUniversity of California Berkeley + LiLucyUniversity of California Berkeley CamillaGriffiths SarahLevineStanford University Jennifer LEberhardt @@ -22748,7 +22748,7 @@ PuxuanYuSnowflake DanielCohenDataminr HemankLambaDataminr Inc. - Joel R.Tetreault + Joel R.Tetreault AlejandroJaimesDataminr 22716-22730 In search settings, calibrating the scores during the ranking process to quantities such as click-through rates or relevance levels enhances a system’s usefulness and trustworthiness for downstream users. While previous research has improved this notion of calibration for low complexity learning-to-rank models, the larger data demands and parameter count specific to modern neural text rankers produce unique obstacles that hamper the efficacy of methods intended for the learning-to-rank setting.This paper proposes exploiting large language models (LLMs) to provide relevance and uncertainty signals for these neural text rankers to produce scale-calibrated scores through Monte Carlo sampling of natural language explanations (NLEs). Our approach transforms the neural ranking task from ranking textual query-document pairs to ranking corresponding synthesized NLEs. Comprehensive experiments on two popular document ranking datasets show that the NLE-based calibration approach consistently outperforms past calibration methods and LLM-based methods for ranking, calibration, and query performance prediction tasks. @@ -22761,7 +22761,7 @@ Miguel RomeroCalvoUniversity of Minnesota - Twin Cities and Amazon ShuoyangDingNVIDIA Corey DBarrettOracle - GeorgianaDinuAmazon + GeorgianaDinuAmazon GeorgeKarypisUniversity of Minnesota, Minneapolis 22731-22746 Dense embeddings are fundamental to modern machine learning systems, powering Retrieval-Augmented Generation (RAG), information retrieval, and representation learning. While instruction-conditioning has become the dominant approach for embedding specialization, its direct application to low-capacity models imposes fundamental representational constraints that limit the performance gains derived from specialization. In this paper, we analyze these limitations and introduce the Mixture of Task Experts (MoTE) transformer block, which leverages task-specialized parameters trained with Task-Aware Contrastive Learning () to enhance the model’s ability to generate specialized embeddings. Empirical results show that MoTE achieves 64% higher performance gains in retrieval datasets (+3.27\rightarrow +5.21) and 43% higher performance gains across all datasets (+1.81\rightarrow 2.60). Critically, these gains are achieved without altering instructions, training data, inference time, or number of active parameters. @@ -22811,7 +22811,7 @@ NavonilMajumderSingapore University of Technology and Design DeepanwayGhosalGoogle DeepMind SomakAdityaIndian Institute of Technology Kharagpur - RadaMihalceaUniversity of Michigan + RadaMihalceaUniversity of Michigan SoujanyaPoria 22811-22849 Recent advancements in Large Language Models (LLMs) have showcased striking results on existing logical reasoning benchmarks, with some models even surpassing human performance. However, the true depth of their competencies and robustness in reasoning tasks remains an open question. To this end, in this paper, we focus on two popular reasoning tasks: arithmetic reasoning and code generation. Particularly, we introduce (i) a general ontology of perturbations for math and coding questions, (ii) a semi-automatic method to apply these perturbations, and (iii) two datasets, GSMore and HumanEval-Core, respectively, of perturbed math and coding problems to probe LLM capabilities in numeric reasoning and coding tasks.Through comprehensive evaluations of both closed-source and open-source LLMs, we show a significant performance drop across all the models against the perturbed questions, suggesting that the current LLMs lack robust problem solving skills and structured reasoning abilities in many areas, as defined by our ontology. @@ -22855,7 +22855,7 @@ VihangPancholi Jainit SushilBafna TejasAnvekarArizona State University - ManishShrivastavaInternational Institute of Information Technology Hyderabad, India + ManishShrivastavaInternational Institute of Information Technology Hyderabad, India VivekGuptaArizona State University 22913-22934 Evaluating tables qualitatively and quantitatively poses a significant challenge, as standard metrics often overlook subtle structural and content-level discrepancies. To address this, we propose a rubric-based evaluation framework that integrates multi-level structural descriptors with fine-grained contextual signals, enabling more precise and consistent table comparison. Building on this, we introduce TabXEval, an eXhaustive and eXplainable two-phase evaluation framework. TabXEval first aligns reference and predicted tables structurally via TabAlign, then performs semantic and syntactic comparison using TabCompare, offering interpretable and granular feedback. We evaluate TabXEval on TabXBench, a diverse, multi-domain benchmark featuring realistic table perturbations and human annotations. A sensitivity-specificity analysis further demonstrates the robustness and explainability of TabXEval across varied table tasks. Code and data are available at https://corallab- asu.github.io/tabxeval/. @@ -23115,7 +23115,7 @@ SiliangQinInstitute of Information Engineering.CAS YuandaWang ZhangBolun - ChaoZhangTsinghua University + ChaoZhangTsinghua University 23250-23267 Decompilers are fundamental tools for critical security tasks, from vulnerability discovery to malware analysis, yet their evaluation remains fragmented. Existing approaches primarily focus on syntactic correctness through synthetic micro-benchmarks or subjective human ratings, failing to address real-world requirements for semantic fidelity and analyst usability. We present **DecompileBench**, the first comprehensive framework that enables effective evaluation of decompilers in reverse engineering workflows through three key components: real-world function extraction (comprising 23,400 functions from 130 real-world programs), runtime-aware validation, and automated human-centric assessment using LLM-as-Judge to quantify the effectiveness of decompilers in reverse engineering workflows. Through a systematic comparison between six industrial-strength decompilers and six recent LLM-powered approaches, we demonstrate that LLM-based methods surpass commercial tools in code understandability despite 52.2% lower functionality correctness. These findings highlight the potential of LLM-based approaches to transform human-centric reverse engineering. We open source **DecompileBench** to provide a framework to advance research on decompilers and assist security experts in making informed tool selections based on their specific requirements. 2025.findings-acl.1194 @@ -23155,8 +23155,8 @@ YuetaiLi LuyaoNiuUniversity of Washington ZhenXiangUniversity of Georgia - BoLiUniversity of Illinois, Urbana Champaign - Bill YuchenLinxAI and University of Washington + BoLiUniversity of Illinois, Urbana Champaign + Bill YuchenLinxAI and University of Washington RadhaPoovendranUniversity of Washington, Seattle 23303-23320 Emerging large reasoning models (LRMs), such as DeepSeek-R1 models, leverage long chain-of-thought (CoT) reasoning to generate structured intermediate steps, enhancing their reasoning capabilities. However, long CoT does not inherently guarantee safe outputs, potentially leading to harmful consequences such as the introduction of security vulnerabilities in code or the spread of misinformation. Current research on large language model (LLM) safety usually focuses on short-answer responses, overlooking the long CoT style outputs of LRMs. To bridge this gap, we conduct a systematic study of LRM safety. First, we investigate safety evaluators calibrated against human annotations. Using our newly developed metrics, we thoroughly assess the safety of 13 state-of-the-art LRMs on StrongReject and WildJailbreak datasets. Our results show that LRMs are not safe compared to their reasoning advance. Further, we perform a fine-grained analysis of the reasoning trace and final answer. We find that three decoding strategies-ZeroThink, LessThink, and MoreThink-can improve model safety without additional training. However, these strategies either use constrained reasoning traces or incur high inference costs. To better strengthen LRM safety, we introduce SafeChain, the first-of-its-kind safety training dataset in CoT style. We fine-tune two LRMs with SafeChain, showing that it not only enhances model safety but also preserves performance across 6 reasoning benchmarks. @@ -23188,8 +23188,8 @@ PengfeiYuAmazon ChiHan Yi R.FungHong Kong University of Science and Technology - KathleenMcKeown - ChengXiangZhaiUniversity of Illinois, Urbana Champaign + KathleenMcKeown + ChengXiangZhaiUniversity of Illinois, Urbana Champaign ManlingLiNorthwestern University HengJiUniversity of Illinois, Urbana-Champaign 23340-23358 @@ -23226,7 +23226,7 @@ Enhance Multimodal Consistency and Coherence for Text-Image Plan Generation XiaoxinLuPennsylvania State University - Ranran HaoranZhang + Ranran HaoranZhang YusenZhang RuiZhangPennsylvania State University 23392-23409 @@ -23376,7 +23376,7 @@ ZhiyuanChen YangZhaoInstitute of automation, Chinese academy of science, Chinese Academy of Sciences LuXiangInstitute of automation, Chinese academy of science, Chinese Academy of Sciences - ChengqingZongInstitute of automation, Chinese academy of science, Chinese Academy of Sciences + ChengqingZongInstitute of automation, Chinese academy of science, Chinese Academy of Sciences YuZhouInstitute of Automation, Chinese Academy of Sciences 23659-23678 Multimodal Large Language Models (MLLMs) have shown strong performance in document image tasks, especially Optical Character Recognition (OCR). However, they struggle with Document Image Machine Translation (DIMT), which requires handling both cross-modal and cross-lingual challenges. Previous efforts to enhance DIMT capability through Supervised Fine-Tuning (SFT) on the DIMT dataset often result in the forgetting of the model’s existing monolingual abilities, such as OCR. To address these challenges, we introduce a novel fine-tuning paradigm, named Synchronously Self-Reviewing (SSR) its OCR proficiency, inspired by the concept “Bilingual Cognitive Advantage”. Specifically, SSR prompts the model to generate OCR text before producing translation text, which allows the model to leverage its strong monolingual OCR ability while learning to translate text across languages. Comprehensive experiments demonstrate the proposed SSR learning helps mitigate catastrophic forgetting, improving the generalization ability of MLLMs on both OCR and DIMT tasks. The code will be released upon acceptance. @@ -23437,7 +23437,7 @@ KishanMaharaj SravaniGunnu AbhijitMishraUniversity of Texas at Austin and Apple - PushpakBhattacharyyaIndian Institute of Technology, Bombay, Dhirubhai Ambani Institute Of Information and Communication Technology + PushpakBhattacharyyaIndian Institute of Technology, Bombay, Dhirubhai Ambani Institute Of Information and Communication Technology 23778-23790 Pragmatics, the ability to infer meaning beyond literal interpretation, is crucial for social cognition and communication. While LLMs have been benchmarked for their pragmatic understanding, improving their performance remains underexplored. Existing methods rely on annotated labels but overlook the reasoning process humans naturally use to interpret implicit meaning. To bridge this gap, we introduce a novel pragmatic dataset ImpliedMeaningPreference that includes explicit reasoning (‘thoughts’) for both correct and incorrect interpretations. Through preference-tuning and supervised fine-tuning, we demonstrate that thought-based learning significantly enhances LLMs’ pragmatic understanding, improving accuracy by 11.12% across model families. We further discuss a transfer-learning study where we evaluate the performance of thought-based training for the other tasks of pragmatics (presupposition, deixis) that are not seen during the training time and observe an improvement of 16.10% compared to label trained models. 2025.findings-acl.1218 @@ -23451,7 +23451,7 @@ ZitongZhao ZhongxiangDaiThe Chinese University of Hong Kong, Shenzhen Chuan-ShengFooCentre for Frontier AI Research, A*STAR and Institute for Infocomm Research, A*STAR - See-KiongNgNational University of Singapore + See-KiongNgNational University of Singapore Bryan Kian HsiangLowNational University of Singapore 23791-23824 The impressive performances of Large Language Models (LLMs) and their immense potential for commercialization have given rise to serious concerns over the Intellectual Property (IP) of their training data. In particular, the synthetic texts generated by LLMs may infringe the IP of the data being used to train the LLMs. To this end, it is imperative to be able to perform source attribution by identifying the data provider who contributed to the generation of a synthetic text by an LLM. In this paper, we show that this problem can be tackled by watermarking, i.e., by enabling an LLM to generate synthetic texts with embedded watermarks that contain information about their source(s). We identify the key properties of such watermarking frameworks (e.g., source attribution accuracy, robustness against adversaries), and propose a source attribution framework that satisfies these key properties due to our algorithmic designs. Our framework enables an LLM to learn an accurate mapping from the generated texts to data providers, which sets the foundation for effective source attribution. Extensive empirical evaluations show that our framework achieves effective source attribution. @@ -23602,8 +23602,8 @@ <fixed-case>S</fixed-case>cene<fixed-case>G</fixed-case>ram: Conceptualizing and Describing Tangrams in Scene Context - SimeonJunkerUniversität Bielefeld - SinaZarrießBielefeld University + SimeonJunkerUniversität Bielefeld + SinaZarrießBielefeld University 23976-23992 Research on reference and naming suggests that humans can come up with very different ways of conceptualizing and referring to the same object, e.g. the same abstract tangram shape can be a “crab”, “sink” or “space ship”. Another common assumption in cognitive science is that scene context fundamentally shapes our visual perception of objects and conceptual expectations. This paper contributes SceneGram, a dataset of human references to tangram shapes placed in different scene contexts, allowing for systematic analyses of the effect of scene context on conceptualization. Based on this data, we analyze references to tangram shapes generated by multimodal LLMs, showing that these models do not account for the richness and variability of conceptualizations found in human references. 2025.findings-acl.1229 @@ -23619,7 +23619,7 @@ YuchenHu BoshengDing RuiruiChenInstitute of High Performance Computing, Singapore, A*STAR - ShafiqJotyNanyang Technological University and SalesForce.com + ShafiqJotyNanyang Technological University and SalesForce.com 23993-24010 Analogical reasoning is a unique ability of humans to address unfamiliar challenges by transferring strategies from relevant past experiences. One key finding in psychology is that compared with irrelevant past experiences, recalling relevant ones can help humans better handle new tasks. Coincidentally, the NLP community has also recently found that self-generating relevant examples in the context can help large language models (LLMs) better solve a given problem than hand-crafted prompts. However, it is yet not clear whether relevance is the key factor eliciting such capability, i.e., can LLMs benefit more from self-generated relevant examples than irrelevant ones? In this work, we systematically explore whether LLMs can truly perform analogical reasoning on a diverse set of reasoning tasks. With extensive experiments and analysis, we show that self-generated random examples can surprisingly achieve comparable or even better performance on certain tasks, e.g., 4% performance boost on GSM8K with random biological examples. We find that the accuracy of self-generated examples is the key factor and subsequently design two novel methods with improved performance and significantly reduced inference costs. Overall, we aim to advance a deeper understanding of LLM analogical reasoning and hope this work stimulates further research in the design of self-generated contexts. 2025.findings-acl.1230 @@ -23673,8 +23673,8 @@ KehaiChenHarbin Institute of Technology (Shenzhen) WeiWang XunZhouHarbin Institute of Technology (Shenzhen) - MuyunYang - TiejunZhaoHarbin Institute of Technology + MuyunYang + TiejunZhaoHarbin Institute of Technology MinZhangHarbin Institute of Technology, Shenzhen 24068-24084 Large language models (LLMs) have achieved remarkable performance on knowledge graph question answering (KGQA) tasks by planning and interacting with knowledge graphs. However, existing methods often confuse tool utilization with knowledge reasoning, harming readability of model outputs and giving rise to hallucinatory tool invocations, which hinder the advancement of KGQA. To address this issue, we propose Memory-augmented Query Reconstruction for LLM-based Knowledge Graph Reasoning (MemQ) to decouple LLM from tool invocation tasks using LLM-built query memory. By establishing a memory module with explicit descriptions of query statements, the proposed MemQ facilitates the KGQA process with natural language reasoning and memory-augmented query reconstruction. Meanwhile, we design an effective and readable reasoning to enhance the LLM’s reasoning capability in KGQA. Experimental results that MemQ achieves state-of-the-art performance on widely used benchmarks WebQSP and CWQ. @@ -23698,10 +23698,10 @@ Are Multimodal Large Language Models Pragmatically Competent Listeners in Simple Reference Resolution Tasks? - SimeonJunkerUniversität Bielefeld + SimeonJunkerUniversität Bielefeld ManarAliUniversität Bielefeld LarissaKoch - SinaZarrießBielefeld University + SinaZarrießBielefeld University HendrikBuschmeierUniversität Bielefeld 24101-24109 We investigate the linguistic abilities of multimodal large language models in reference resolution tasks featuring simple yet abstract visual stimuli, such as color patches and color grids. Although the task may not seem challenging for today’s language models, being straightforward for human dyads, we consider it to be a highly relevant probe of the pragmatic capabilities of MLLMs. Our results and analyses indeed suggest that basic pragmatic capabilities, such as context-dependent interpretation of color descriptions, still constitute major challenges for state-of-the-art MLLMs. @@ -23823,7 +23823,7 @@ Neil De LaFuenteTechnical University of Munich and Universidad del País Vasco OscarSainzUniversity of the Basque Country (UPV/EHU) IkerGarcía-Ferrero - EnekoAgirreUniversity of the Basque Country (UPV/EHU) + EnekoAgirreUniversity of the Basque Country (UPV/EHU) 24248-24262 Information Extraction (IE) systems are traditionally domain-specific, requiring costlyadaptation that involves expert schema design,data annotation, and model training. WhileLarge Language Models have shown promisein zero-shot IE, performance degrades significantly in unseen domains where label definitions differ. This paper introduces GUIDEX,a novel method that automatically definesdomain-specific schemas, infers guidelines,and generates synthetically labeled instances,allowing for better out-of-domain generalization. Fine-tuning Llama 3.1 with GUIDEXsets a new state-of-the-art across seven zeroshot Named Entity Recognition benchmarks.Models trained with GUIDEX gain up to 7 F1points over previous methods without humanlabeled data, and nearly 2 F1 points higherwhen combined with it. Models trained onGUIDEX demonstrate enhanced comprehension of complex, domain-specific annotationschemas. Code, models, and synthetic datasetsare available at neilus03.github.io/guidex.com 2025.findings-acl.1245 @@ -23881,12 +23881,12 @@ Full-Step-<fixed-case>DPO</fixed-case>: Self-Supervised Preference Optimization with Step-wise Rewards for Mathematical Reasoning HuiminXuNanyang Technological University - XinMaoByteDance Inc. + XinMaoByteDance Inc. Feng-LinLiShopee XiaobaoWuNanyang Technological University WangChen WeiZhangsea group - Anh TuanLuuNanyang Technological University + Anh TuanLuuNanyang Technological University 24343-24356 Direct Preference Optimization (DPO) often struggles with long-chain mathematical reasoning. Existing approaches, such as Step-DPO, typically improve this by focusing on the first erroneous step in the reasoning chain. However, they overlook all other steps and rely heavily on humans or GPT-4 to identify erroneous steps. To address these issues, we propose Full-Step-DPO, a novel DPO framework tailored for mathematical reasoning. Instead of optimizing only the first erroneous step, it leverages step-wise rewards from the entire reasoning chain. This is achieved by training a self-supervised process reward model, which automatically scores each step, providing rewards while avoiding reliance on external signals. Furthermore, we introduce a novel step-wise DPO loss, which dynamically updates gradients based on these step-wise rewards. This endows stronger reasoning capabilities to language models. Extensive evaluations on both in-domain and out-of-domain mathematical reasoning benchmarks across various base language models, demonstrate that Full-Step-DPO achieves superior performance compared to state-of-the-art baselines. 2025.findings-acl.1249 @@ -23906,12 +23906,12 @@ <fixed-case>SCOPE</fixed-case>: Compress Mathematical Reasoning Steps for Efficient Automated Process Annotation HuiminXuNanyang Technological University - XinMaoByteDance Inc. + XinMaoByteDance Inc. Feng-LinLiShopee XiaobaoWuNanyang Technological University WangChen WeiZhangsea group - Anh TuanLuuNanyang Technological University + Anh TuanLuuNanyang Technological University 24382-24394 Process Reward Models (PRMs) have demonstrated promising results in mathematical reasoning, but existing process annotation approaches, whether through human annotations or Monte Carlo simulations, remain computationally expensive. In this paper, we introduce Step COmpression for Process Estimation (SCOPE), a novel compression-based approach that significantly reduces annotation costs. We first translate natural language reasoning steps into code and normalize them through Abstract Syntax Tree, then merge equivalent steps to construct a prefix tree. Unlike simulation-based methods that waste numerous samples on estimation, SCOPE leverages a compression-based prefix tree where each root-to-leaf path serves as a training sample, reducing the complexity from O(NMK) to O(N) We construct a large-scale dataset containing 509K samples with only 5% of the computational resources required by previous methods. Empirical results demonstrate that PRMs trained on our dataset consistently outperform existing automated annotation approaches on both Best-of-N strategy and ProcessBench. 2025.findings-acl.1251 @@ -23922,7 +23922,7 @@ Compositional Syntactico-<fixed-case>S</fixed-case>em<fixed-case>B</fixed-case>anking for <fixed-case>E</fixed-case>nglish as a Second or Foreign Language WenxiLi XihaoWangPeking University - WeiweiSunUniversity of Cambridge + WeiweiSunUniversity of Cambridge 24395-24406 Despite the widespread use of English as a Second or Foreign Language (ESFL), developing syntactico-semantic representations for it is limited — the irregularities in ESFL complicate systematic composition and subsequently the derivation of its semantics.This paper draws on constructivism and proposes a novel Synchronous Hyperedge Replacement Grammar (SHRG)-based constructivist approach to address the challenges. By using constructions as fundamental units, this approach not only accommodates both the idiosyncrasies and the compositional nature of ESFL, but also bridges the gap between literal cues and intended meaning.The feasibility of this constructivist approach is demonstrated using real ESFL data, resulting in a gold-standard, medium-sized syntactico-semantic bank that covers a wide range of ESFL phenomena. 2025.findings-acl.1252 @@ -23933,7 +23933,7 @@ Semantics-aware prompting for translating <fixed-case>NO</fixed-case>tices To <fixed-case>A</fixed-case>ir<fixed-case>M</fixed-case>en Minal NitinDaniHoneywell and Indian Institute of Technology, Hyderabad, Dhirubhai Ambani Institute Of Information and Communication Technology AishwaryaMaheswaran - Maunendra SankarDesarkarIndian Institute of Technology, Hyderabad, + Maunendra SankarDesarkarIndian Institute of Technology, Hyderabad, 24407-24417 A NOTAM or NOtice To AirMen is a crucial notice for different aviation stakeholders, particularly flight crews. It delivers essential notifications about abnormal conditions of Aviation System components such as changes to facilities, hazards, service, procedure that are not known far enough in advance to be publicized through other means. NOTAM messages are short, contain acronyms, and look cryptic in most of the cases. Writing and understanding these messages put heavy cognitive load on its end users. In this work, we take up the task of translating NOTAMs into English natural language using LLMs. Since NOTAMs do not adhere to English grammar rules and have their own decoding rules, large language models (LLMs) cannot translate them without effective prompting. In this paper, we develop a framework to come up with effective prompts to achieve the translations. Our approach uses context-aware semantic prompting techniques, paired with domain-specific rules, to improve the accuracy and clarity of translations. The framework is evaluated using comprehensive experiments (6 LLMs of varying sizes, and with 5 different prompting setups for each) and eight evaluation metrics measuring different aspects of the translation. The results demonstrate that our methodology can produce clear translations that accurately convey the information contained in NOTAMs. 2025.findings-acl.1253 @@ -23970,7 +23970,7 @@ Can <fixed-case>VLM</fixed-case>s Actually See and Read? A Survey on Modality Collapse in Vision-Language Models Mong YuanSimUniversity of Adelaide Wei EmmaZhangThe University of Adelaide - XiangDaiCSIRO + XiangDaiCSIRO BiaoyanFang 24452-24470 Vision-language models (VLMs) integrate textual and visual information, enabling the model to process visual inputs and leverage visual information to generate predictions. Such models are demanding for tasks such as visual question answering, image captioning, and visual grounding. However, some recent work found that VLMs often rely heavily on textual information, ignoring visual information, but are still able to achieve competitive performance in vision-language (VL) tasks. This survey reviews modality collapse analysis work to provide insights into the reason for this unintended behavior. It also reviews probing studies for fine-grained vision-language understanding, presenting current findings on information encoded in VL representations and highlighting potential directions for future research. @@ -23984,7 +23984,7 @@ Nihar RanjanSahoo RudraMurthyIBM India Pvt Ltd SwapravaNathIndian Institute of Technology Bombay, Indian Institute of Technology, Bombay - PushpakBhattacharyyaIndian Institute of Technology, Bombay, Dhirubhai Ambani Institute Of Information and Communication Technology + PushpakBhattacharyyaIndian Institute of Technology, Bombay, Dhirubhai Ambani Institute Of Information and Communication Technology 24471-24496 While a few high-quality bias benchmark datasets exist to address stereotypes in Language Models (LMs), a notable lack of focus remains on body image stereotypes. To bridge this gap, we propose \textbf{BIStereo}, a suite to uncover LMs’ biases towards people of certain physical appearance characteristics, namely, \textit{skin complexion, body shape, height, attire,} and a \textit{miscellaneous category} including \textit{hair texture, eye color, and more}. Our dataset comprises 40k sentence pairs designed to assess LMs’ biased preference for certain body types. We further include 60k premise-hypothesis pairs designed to comprehensively assess LMs’ preference for fair skin tone. Additionally, we curate 553 tuples consisting of a \textit{body image descriptor, gender, and a stereotypical attribute}, validated by a diverse pool of annotators for physical appearance stereotypes.We propose a metric, \textbf{TriSentBias}, that captures the biased preferences of LMs towards a certain body type over others. Using \textbf{BIStereo}, we assess the presence of body image biases in ten different language models, revealing significant biases in models Muril, XLMR, Llama3, and Gemma. We further evaluate the LMs through downstream NLI and Analogy tasks.Our NLI experiments highlight notable patterns in the LMs that align with the well-documented cognitive bias in humans known as \textbf{\textit{the Halo Effect}}. 2025.findings-acl.1257 @@ -24131,7 +24131,7 @@ Reranking-based Generation for Unbiased Perspective Summarization NarutatsuRi NicholasDeasColumbia University - KathleenMcKeown + KathleenMcKeown 24701-24723 Generating unbiased summaries in real-world settings such as political perspective summarization remains a crucial application of Large Language Models (LLMs). Yet, existing evaluation frameworks rely on traditional metrics for measuring key attributes such as coverage and faithfulness without verifying their applicability, and efforts to develop improved summarizers are still nascent. We address these gaps by (1) identifying reliable metrics for measuring perspective summary quality, and (2) investigating the efficacy of LLM-based methods beyond zero-shot inference. Namely, we build a test set for benchmarking metric reliability using human annotations and show that traditional metrics underperform compared to language model–based metrics, which prove to be strong evaluators. Using these metrics, we show that reranking-based methods yield strong results, and preference tuning with synthetically generated and reranking-labeled data further boosts performance. Our findings aim to contribute to the reliable evaluation and development of perspective summarization methods. 2025.findings-acl.1268 @@ -24199,7 +24199,7 @@ Decomposed Opinion Summarization with Verified Aspect-Aware Modules MiaoLi Jey HanLauThe University of Melbourne - EduardHovyUniversity of Melbourne and Carnegie Mellon University + EduardHovyUniversity of Melbourne and Carnegie Mellon University MirellaLapataEdinburgh University, University of Edinburgh 24805-24841 Opinion summarization plays a key role in deriving meaningful insights from large-scale online reviews. To make the process more explainable and grounded, we propose a domain-agnostic modular approach guided by review aspects (e.g., cleanliness for hotel reviews) which separates the tasks of aspect identification, opinion consolidation, and meta-review synthesis to enable greater transparency and ease of inspection. We conduct extensive experiments across datasets representing scientific research, business, and product domains. Results show that our approach generates more grounded summaries compared to strong baseline models, as verified through automated and human evaluations. Additionally, our modular approach, which incorporates reasoning based on review aspects, produces more informative intermediate outputs than other knowledge-agnostic decomposition approaches. Lastly, we provide empirical results to show that these intermediate outputs can support humans in summarizing opinions from large volumes of reviews. @@ -24442,7 +24442,7 @@ ZiliWang ShuigengZhouFudan University XiangyuZhang - Heung-YeungShum + Heung-YeungShum 25114-25126 We propose novel attention architectures, Multi-matrix Factorization Attention (MFA) and MFA-Key-Reuse (MFA-KR). Existing variants for standard Multi-Head Attention (MHA), including SOTA methods like MLA, fail to maintain as strong performance under stringent Key-Value cache (KV cache) constraints. MFA enhances model capacity by efficiently scaling up both the number and dimension of attention heads through low-rank matrix factorization in the Query-Key (QK) circuit. Extending MFA, MFA-KR further reduces memory requirements by repurposing the key cache as value through value projection re-parameterization. MFA’s design enables strong model capacity when working under tight KV cache budget, while MFA-KR is suitable for even harsher KV cache limits with minor performance trade-off. Notably, in our extensive and large-scale experiments, the proposed architecture outperforms MLA and performs comparably to MHA, while reducing KV cache usage by up to 56% and 93.7%, respectively. 2025.findings-acl.1288 @@ -24546,7 +24546,7 @@ DebjyotiMondalSamsung SubhadarshiPandaSamsung RiturajSinghSamsung Research and Development Institute - India, Bengaluru - PushpakBhattacharyyaIndian Institute of Technology, Bombay, Dhirubhai Ambani Institute Of Information and Communication Technology + PushpakBhattacharyyaIndian Institute of Technology, Bombay, Dhirubhai Ambani Institute Of Information and Communication Technology 25250-25269 Accurately grounding visual and textual elements within mobile user interfaces (UIs) remains a significant challenge for Vision-Language Models (VLMs). Visual grounding, a critical task in this domain, involves identifying the most relevant UI element or region based on a natural language query—a process that requires both precise perception and context-aware reasoning. In this work, we present - **MoUI**, a light-weight mobile UI understanding model trained on **MoIT**, an instruction-tuning dataset specifically tailored for mobile screen understanding and grounding, designed to bridge the gap between user intent and visual semantics. Complementing this dataset, we also present a human-annotated reasoning benchmark **MoIQ** that rigorously evaluates complex inference capabilities over mobile UIs. To harness these resources effectively, we propose a two-stage training approach that separately addresses perception and reasoning tasks, leading to stronger perception capabilities and improvement in reasoning abilities. Through extensive experiments, we demonstrate that our MoUI models achieve significant gains in accuracy across all perception tasks and _state-of-the-art_ results on public reasoning benchmark **ComplexQA (78%) and our MoIQ (49%)**. We will be open-sourcing our dataset, code, and models to foster further research and innovation in the field. 2025.findings-acl.1295 @@ -24623,7 +24623,7 @@ ZhangchenXu FengqingJiangUniversity of Washington LuyaoNiuUniversity of Washington - Bill YuchenLinxAI and University of Washington + Bill YuchenLinxAI and University of Washington BhaskarRamasubramanianWestern Washington University RadhaPoovendranUniversity of Washington, Seattle 25366-25394 @@ -24709,7 +24709,7 @@ LongyueWangAlibaba Group LongqinJiangUniversität Hamburg XingshanLi - ChrisBiemannU Hamburg + ChrisBiemannU Hamburg 25507-25522 Large Language Models (LLMs) achieve remarkable performance through pretraining on extensive data. This enables efficient adaptation to diverse downstream tasks. However, the lack of interpretability in their underlying mechanisms limits the ability to effectively steer LLMs for specific applications. In this work, we investigate the intrinsic mechanisms of LLMs from a cognitive perspective using eye movement measures. Specifically, we analyze the layer-wise correlation between human cognitive indicators and LLM representations. Building on these insights, we propose a heuristic approach for selecting the optimal steering layer to modulate LLM semantics. To this end, we introduce an efficient selective layer intervention based on prominent parameter-efficient fine-tuning methods, which conventionally adjust either all layers or only the final layer. Additionally, we present an implicit layer contrastive intervention during inference to steer LLMs away from toxic outputs. Extensive experiments on natural language understanding, reasoning, and generation tasks, conducted on GPT-2, LLaMa2-7B, and Mixtral-7B, demonstrate the effectiveness and efficiency of our approach. As a model-agnostic framework, it enhances the interpretability of LLMs while improving efficiency for safe deployment. 2025.findings-acl.1308 @@ -24735,7 +24735,7 @@ ZihaoYu SamPan ZhunWangUniversity of California, Berkeley - YangLiuUniversity of California, Santa Cruz + YangLiuUniversity of California, Santa Cruz DawnSongUniversity of California Berkeley ChenguangWangWashington University, Saint Louis 25534-25553 @@ -24799,7 +24799,7 @@ <fixed-case>IMPARA</fixed-case>-<fixed-case>GED</fixed-case>: Grammatical Error Detection is Boosting Reference-free Grammatical Error Quality Estimator YusukeSakaiNara Institute of Science and Technology, Japan - TakumiGotoNara Institute of Science and Technology, Japan + TakumiGotoNara Institute of Science and Technology, Japan TaroWatanabeNara Institute of Science and Technology, Japan 25647-25654 We propose IMPARA-GED, a novel reference-free automatic grammatical error correction (GEC) evaluation method with grammatical error detection (GED) capabilities. We focus on the quality estimator of IMPARA, an existing automatic GEC evaluation method, and construct that of IMPARA-GED using a pre-trained language model with enhanced GED capabilities. Experimental results on SEEDA, a meta-evaluation dataset for automatic GEC evaluation methods, demonstrate that IMPARA-GED achieves the highest correlation with human sentence-level evaluations. @@ -24813,7 +24813,7 @@ BingbingWenUniversity of Washington BinHanUniversity of Washington RobertWolfe - Lucy LuWangUniversity of Washington and Allen Institute for Artificial Intelligence + Lucy LuWangUniversity of Washington and Allen Institute for Artificial Intelligence BillHoweUniversity of Washington 25655-25672 Psychology research has shown that humans are poor at estimating their performance on tasks, tending towards underconfidence on easy tasks and overconfidence on difficult tasks. We examine three LLMs, Llama-3-70B-instruct, Claude-3-Sonnet, and GPT-4o, on a range of QA tasks of varying difficulty, and show that models exhibit subtle differences from human patterns of overconfidence: less sensitive to task difficulty, and when prompted to answer based on different personas—e.g., expert vs layman, or different race, gender, and ages—the models will respond with stereotypically biased confidence estimations even though their underlying answer accuracy remains the same. Based on these observations, we propose Answer-Free Confidence Estimation (AFCE) to improve confidence calibration and LLM interpretability in these settings. AFCE is a self-assessment method that employs two stages of prompting, first eliciting only confidence scores on questions, then asking separately for the answer. Experiments on the MMLU and GPQA datasets spanning subjects and difficulty show that this separation of tasks significantly reduces overconfidence and delivers more human-like sensitivity to task difficulty. @@ -24915,7 +24915,7 @@ Just Put a Human in the Loop? Investigating <fixed-case>LLM</fixed-case>-Assisted Annotation for Subjective Tasks HopeSchroeder - DebRoyMassachusetts Institute of Technology + DebRoyMassachusetts Institute of Technology JadKabbaraMassachusetts Institute of Technology 25771-25795 LLM use in annotation is becoming widespread, and given LLMs’ overall promising performance and speed, putting humans in the loop to simply “review” LLM annotations can be tempting. In subjective tasks with multiple plausible answers, this can impact both evaluation of LLM performance, and analysis using these labels in a social science task downstream. In a pre-registered experiment with 350 unique annotators and 7,000 annotations across 4 conditions, 2 models, and 2 datasets, we find that presenting crowdworkers with LLM-generated annotation suggestions did not make them faster annotators, but did improve their self-reported confidence in the task. More importantly, annotators strongly took the LLM suggestions, significantly changing the label distribution compared to the baseline. We show that when these labels created with LLM assistance are used to evaluate LLM performance, reported model performance significantly increases. We show how changes in label distributions as a result of LLM assistance can affect conclusions drawn by analyzing even “human-approved” LLM-annotated datasets. We believe our work underlines the importance of understanding the impact of LLM-assisted annotation on subjective, qualitative tasks, on the creation of gold data for training and testing, and on the evaluation of NLP systems on subjective tasks. @@ -25227,7 +25227,7 @@ SophiaHorngColumbia University MaximillianChenGoogle Kung-HsiangHuangSalesForce.com - Shih-FuChangColumbia University and Columbia University + Shih-FuChangColumbia University and Columbia University 26239-26256 Tabular data is used to store information in many real-world systems ranging from finance to healthcare. However, such structured data is often communicated to humans in visually interpretable formats (e.g. charts and textual paragraphs), making it imperative that fact-checking models should be able to reason over multiple pieces of structured evidence presented across different modalities. In this paper, we propose Multi-Document Multi-Modal Table-based Fact Verification (M^{2}-TabFact), a challenging fact verification task that requires jointly reasoning over visual and textual representations of structured data. We design an automatic data generation pipeline that converts existing tabular data into descriptive visual and textual evidence. We then use Large Language Models to generate complex claims that depend on multi-document, multi-modal evidence. In total, we create 8,856 pairs of complex claims and multi-modal evidence through this procedure and systematically evaluate M^{2}-TabFact with a set of strong vision-language models (VLM). We find that existing VLMs have large gaps in fact verification performance compared to humans. Moreover, we find that they are imbalanced when it comes to their ability to handle reason about different modalities, and currently struggle to reason about information extracted from multiple documents. 2025.findings-acl.1345 @@ -25248,9 +25248,9 @@ <fixed-case>PLAY</fixed-case>2<fixed-case>PROMPT</fixed-case>: Zero-shot Tool Instruction Optimization for <fixed-case>LLM</fixed-case> Agents via Tool Play WeiFangMassachusetts Institute of Technology - YangZhangInternational Business Machines + YangZhangInternational Business Machines KaizhiQianInternational Business Machines - James R.GlassMassachusetts Institute of Technology + James R.GlassMassachusetts Institute of Technology YadaZhu 26274-26290 Large language models (LLMs) are increasingly integrated with specialized external tools, yet many tasks demand zero-shot tool usage with minimal or noisy documentation. Existing solutions rely on manual rewriting or labeled data for validation, making them inapplicable in true zero-shot settings. To address these challenges, we propose PLAY2PROMPT, an automated framework that systematically “plays” with each tool to explore its input-output behaviors. Through this iterative trial-and-error process, PLAY2PROMPT refines tool documentation and generates usage examples without any labeled data. These examples not only guide LLM inference but also serve as validation to further enhance tool utilization. Extensive experiments on real-world tasks demonstrate that PLAY2PROMPT significantly improves zero-shot tool performance across both open and closed models, offering a scalable and effective solution for domain-specific tool integration. @@ -25277,7 +25277,7 @@ JuhyunOhKorea Advanced Institute of Science & Technology EunsuKim HoyunSongKorea Advanced Institute of Science & Technology - AliceOhGoogle and Korea Advanced Institute of Science and Technology + AliceOhGoogle and Korea Advanced Institute of Science and Technology 26312-26332 Ensuring persona fidelity in large language models (LLMs) is essential for maintaining coherent and engaging human-AI interactions. However, LLMs often exhibit Out-of-Character (OOC) behavior, where generated responses deviate from an assigned persona, leading to inconsistencies that affect model reliability. Existing evaluation methods typically assign single scores to entire responses, struggling to capture subtle persona misalignment, particularly in long-form text generation. To address this limitation, we propose an atomic-level evaluation framework that quantifies persona fidelity at a finer granularity. Our three key metrics measure the degree of persona alignment and consistency within and across generations. Our approach enables a more precise and realistic assessment of persona fidelity by identifying subtle deviations that real users would encounter. Through our experiments, we demonstrate that our framework effectively detects persona inconsistencies that prior methods overlook. By analyzing persona fidelity across diverse tasks and personality types, we reveal how task structure and persona desirability influence model adaptability, highlighting challenges in maintaining consistent persona expression. 2025.findings-acl.1349 @@ -25385,7 +25385,7 @@ SeungoneKim NiklasMuennighoffStanford University, Contextual AI and Allen Institute for Artificial Intelligence DongkwanKimKorea Advanced Institute of Science and Technology - AliceOhGoogle and Korea Advanced Institute of Science and Technology + AliceOhGoogle and Korea Advanced Institute of Science and Technology 26456-26493 We introduce LLM-as-an-Interviewer, a novel paradigm for evaluating large language models (LLMs). This approach leverages multi-turn interactions where the LLM interviewer actively provides feedback on responses and poses follow-up questions to the evaluated LLM. At the start of the interview, the LLM interviewer dynamically modifies datasets to generate initial questions, mitigating data contamination. We apply the LLM-as-an-Interviewer framework to evaluate six models on the reasoning, factuality and instruction-following tasks. Our results show that the framework effectively provides insights into LLM performance, including the quality of initial responses, adaptability to feedback, and ability to address follow-up queries like clarification or additional knowledge requests. The framework also addresses key limitations of conventional methods like LLM-as-a-Judge, including verbosity bias and inconsistency across runs. Finally, we propose the Interview Report, which aggregates insights from the interview process, providing examples and a comprehensive analysis of the LLM’s strengths and weaknesses. This report offers a detailed snapshot of the model’s real-world applicability. 2025.findings-acl.1357 @@ -25431,7 +25431,7 @@ Rethinking Prompt-based Debiasing in Large Language Model XinyiYang RunzheZhanUniversity of Macau - ShuYang + ShuYang JunchaoWuUniversity of Macau Lidia S.ChaoUniversity of Macau Derek F.WongUniversity of Macau @@ -25461,7 +25461,7 @@ OktieHassanzadehInternational Business Machines DharmashankarSubramanianInternational Business Machines SolaShiraiInternational Business Machines - AlfioGliozzoInternational Business Machines + AlfioGliozzoInternational Business Machines DebarunBhattacharjyaInternational Business Machines 26569-26583 Text-to-SQL aims to translate natural language queries into SQL statements, which is practical as it enables anyone to easily retrieve the desired information from databases. Recently, many existing approaches tackle this problem with Large Language Models (LLMs), leveraging their strong capability in understanding user queries and generating corresponding SQL code. Yet, the parametric knowledge in LLMs might be limited to covering all the diverse and domain-specific queries that require grounding in various database schemas, which makes generated SQLs less accurate oftentimes. To tackle this, we propose constructing the knowledge base for text-to-SQL, a foundational source of knowledge, from which we retrieve and generate the necessary knowledge for given queries. In particular, unlike existing approaches that either manually annotate knowledge or generate only a few pieces of knowledge for each query, our knowledge base is comprehensive, which is constructed based on a combination of all the available questions and their associated database schemas along with their relevant knowledge, and can be reused for unseen databases from different datasets and domains. We validate our approach on multiple text-to-SQL datasets, considering both the overlapping and non-overlapping database scenarios, where it outperforms relevant baselines substantially. @@ -25498,7 +25498,7 @@ From Complexity to Clarity: <fixed-case>AI</fixed-case>/<fixed-case>NLP</fixed-case>’s Role in Regulatory Compliance JiviteshJainSchool of Computer Science, Carnegie Mellon University NivedhithaDhanasekaran - Mona T.DiabCarnegie Mellon University + Mona T.DiabCarnegie Mellon University 26629-26641 Regulatory data compliance is a cornerstone of trust and accountability in critical sectors like finance, healthcare, and technology, yet its complexity poses significant challenges for organizations worldwide. Recent advances in natural language processing, particularly large language models, have demonstrated remarkable capabilities in text analysis and reasoning, offering promising solutions for automating compliance processes. This survey examines the current state of automated data compliance, analyzing key challenges and approaches across problem areas. We identify critical limitations in current datasets and techniques, including issues of adaptability, completeness, and trust. Looking ahead, we propose research directions to address these challenges, emphasizing standardized evaluation frameworks and balanced human-AI collaboration. 2025.findings-acl.1366 @@ -25545,7 +25545,7 @@ Standard Quality Criteria Derived from Current <fixed-case>NLP</fixed-case> Evaluations for Guiding Evaluation Design and Grounding Comparability and <fixed-case>AI</fixed-case> Compliance Assessments - AnyaBelzDublin City University + AnyaBelzDublin City University SimonMille CraigThomsonDublin City University and University of Aberdeen 26685-26715 @@ -25673,7 +25673,7 @@ Amir HosseinKargaranLudwig-Maximilians-Universität München YihongLiuLudwig-Maximilians-Universität München FrançoisYvonISIR, Sorbonne Université & CNRS - HinrichSchuetze + HinrichSchuetze 26905-26917 Several studies have explored the mechanisms of large language models (LLMs) in coding tasks, but most have focused on programming languages (PLs) in a monolingual setting. In this paper, we investigate the relationship between multiple PLs and English in the concept space of LLMs. We perform a few-shot translation task on 21 PL pairs using two Llama-based models. By decoding the embeddings of intermediate layers during this task, we observe that the concept space is closer to English (including PL keywords) and assigns high probabilities to English tokens in the second half of the intermediate layers. We analyze neuron activations for 11 PLs and English, finding that while language-specific neurons are primarily concentrated in the bottom layers, those exclusive to each PL tend to appear in the top layers. For PLs that are highly aligned with multiple other PLs, identifying language-specific neurons is not feasible. These PLs also tend to have a larger keyword set than other PLs and are closer to the model’s concept space regardless of the input/output PL in the translation task. Our findings provide insights into how LLMs internally represent PLs, revealing structural patterns in the model’s concept space. Code is available at https://github.com/cisnlp/code-specific-neurons. 2025.findings-acl.1379 @@ -25749,7 +25749,7 @@ NafisehNikeghbalTechnische Universität München JanaDiesnerTechnische Universität München FrançoisYvonISIR, Sorbonne Université & CNRS - HinrichSchuetze + HinrichSchuetze 27001-27023 English-centric large language models (LLMs) often show strong multilingual capabilities. However, their multilingual performance remains unclear and is under-evaluated for many other languages. Most benchmarks for multilinguality focus on classic NLP tasks or cover a minimal number of languages. We introduce MEXA, a method for assessing the multilingual capabilities of pre-trained English-centric LLMs using parallel sentences, which are available for more languages than existing downstream tasks. MEXA leverages that English-centric LLMs use English as a pivot language in their intermediate layers. MEXA computes the alignment between English and non-English languages using parallel sentences to evaluate the transfer of language understanding from English to other languages. This alignment can be used to estimate model performance in different languages. We conduct controlled experiments using various parallel datasets (FLORES-200 and Bible), models (Llama family, Gemma family, Mistral, and OLMo), and established downstream tasks (Belebele, m-MMLU, and m-ARC). We explore different methods to compute embeddings in decoder-only models. Our results show that MEXA, in its default settings, achieves an average Pearson correlation of 0.90 between its predicted scores and actual task performance across languages. This suggests that MEXA is a reliable method for estimating the multilingual capabilities of English-centric LLMs, providing a clearer understanding of their multilingual potential and the inner workings of LLMs. Leaderboard: https://cis-lmu-mexa.hf.space, Code: https://github.com/cisnlp/MEXA. 2025.findings-acl.1385 diff --git a/data/xml/2025.finnlp.xml b/data/xml/2025.finnlp.xml index 3889e5c08d..f6122c7be1 100644 --- a/data/xml/2025.finnlp.xml +++ b/data/xml/2025.finnlp.xml @@ -4,7 +4,7 @@ Proceedings of the Joint Workshop of the 9th Financial Technology and Natural Language Processing (FinNLP), the 6th Financial Narrative Processing (FNP), and the 1st Workshop on Large Language Models for Finance and Legal (LLMFinLegal) Chung-ChiChen - AntonioMoreno-Sandoval + AntonioMoreno-Sandoval JiminHuang QianqianXie SophiaAnaniadou @@ -114,7 +114,7 @@ Concept-Based <fixed-case>RAG</fixed-case> Models: A High-Accuracy Fact Retrieval Approach Cheng-YuLin - Jyh-ShingJang + Jyh-ShingJang 96–100 This study introduces a concept-based methodology to optimize Retrieval-Augmented Generation (RAG) tasks by assessing dataset certainty using entropy-based metrics and concept extraction techniques. Unlike traditional methods focused on reducing LLM hallucinations or modifying data structures, this approach evaluates inherent knowledge uncertainty from an LLM perspective. By pre-processing documents with LLMs, the concept-based method significantly enhances precision in tasks demanding high accuracy, such as legal, finance, or formal document responses. 2025.finnlp-1.8 @@ -147,7 +147,7 @@ Forecasting Credit Ratings: A Case Study where Traditional Methods Outperform Generative <fixed-case>LLM</fixed-case>s FelixDrinkall - Janet B.Pierrehumbert + Janet B.Pierrehumbert StefanZohren 118–133 Large Language Models (LLMs) have been shown to perform well for many downstream tasks. Transfer learning can enable LLMs to acquire skills that were not targeted during pre-training. In financial contexts, LLMs can sometimes beat well-established benchmarks. This paper investigates how well LLMs perform at forecasting corporate credit ratings. We show that while LLMs are very good at encoding textual information, traditional methods are still very competitive when it comes to encoding numeric and multimodal data. For our task, current LLMs perform worse than a more traditional XGBoost architecture that combines fundamental and macroeconomic data with high-density text-based embedding features. We investigate the degree to which the text encoding methodology affects performance and interpretability. @@ -214,7 +214,7 @@ MohamedEttaleb MounaKamel NathalieAussenac-Gilles - VéroniqueMoriceau + VéroniqueMoriceau 175–183 Relation Extraction (RE) is a fundamental task in natural language processing, aimed at deducing semantic relationships between entities in a text. Traditional supervised extraction methods relation extraction methods involve training models to annotate tokens representing entity mentions, followed by predicting the relationship between these entities. However, recent advancements have transformed this task into a sequence-to-sequence problem. This involves converting relationships between entities into target string, which are then generated from the input text. Thus, language models now appear as a solution to this task and have already been used in numerous studies, with various levels of refinement, across different domains. The objective of the present study is to evaluate the contribution of large language models (LLM) to the task of relation extraction in a specific domain (in this case, the economic domain), compared to smaller language models. To do this, we considered as a baseline a model based on the BERT architecture, trained in this domain, and four LLM, namely FinGPT specific to the financial domain, XLNet, ChatGLM, and Llama3, which are generalists. All these models were evaluated on the same extraction task, with zero-shot for the general-purpose LLM, as well as refinements through few-shot learning and fine-tuning. The experiments showedthat the best performance in terms of F-score was achieved with fine-tuned LLM, with Llama3 achieving the highest performance. 2025.finnlp-1.17 @@ -242,7 +242,7 @@ <fixed-case>AMWAL</fixed-case>: Named Entity Recognition for <fixed-case>A</fixed-case>rabic Financial News Muhammad S.Abdo YashHatekar - DamirCavar + DamirCavar 207–213 Financial Named Entity Recognition (NER) presents a pivotal task in extracting structured information from unstructured financial data, especially when extending its application to languages beyond English. In this paper, we present AMWAL, a named entity recognition system for Arabic financial news. Our approach centered on building a specialized corpus compiled from three major Arabic financial newspapers spanning from 2000 to 2023. Entities were extracted from this corpus using a semi-automatic process that included manual annotation and review to ensure accuracy. The total number of entities identified amounts to 17.1k tokens, distributed across 20 categories, providing a comprehensive coverage of financial entities. To standardize the identified entities, we adopt financial concepts from the Financial Industry Business Ontology (FIBO, 2020), aligning our framework with industry standards. The significance of our work lies not only in the creation of the first customized NER system for Arabic financial data but also in its potential to streamline information extraction processes in the financial domain. Our NER system achieves a Precision score of 96.08, a Recall score of 95.87, and an F1 score of 95.97, which outperforms state-of-the-art general Arabic NER systems as well as other systems for financial NER in other languages. 2025.finnlp-1.20 @@ -251,7 +251,7 @@ The Financial Document Causality Detection Shared Task (<fixed-case>F</fixed-case>in<fixed-case>C</fixed-case>ausal 2025) AntonioMoreno-Sandoval - JordiPorta + JordiPorta BlancaCarbajo-Coronado YancoTorterolo DoaaSamy @@ -285,8 +285,8 @@ <fixed-case>CLRG</fixed-case>@<fixed-case>F</fixed-case>in<fixed-case>C</fixed-case>ausal2025: Cause-Effect Extraction in Finance Domain VibhavkrishnanK S - PattabhiRK Rao - SobhaLalitha Devi + PattabhiRK Rao + SobhaLalitha Devi 236–241 This paper presents our work on Cause-Effect information extraction specifically in the financial domain. Cause and effect information is very much needed for expert decision making. Particularly, in the financial domain, the fund managers, financial analysts, etc. need to have the information on cause-effects for their works. Natural Language Processing (NLP) techniques help in the automatic extraction of cause and effect from a given text. In this work, we build various cause-effect text span detection models using pre-trained transformer-based language models and fine tune these models using the data provided by FinCausal 2025 task organizers. We have only used FinCausal 2025 data sets to train our models. No other external data is used. Our ensemble of sequence tagging models based on the Fine-tuned RoBERTa-Large language model achieves SAS score of 0.9604 and Exact match score of 0.7214 for English. Similarly for Spanish we obtain SAS score of 0.9607 and Exact match score of 0.7166. This is our first time participation in the FinCausal 2025 Task. 2025.finnlp-1.24 diff --git a/data/xml/2025.gebnlp.xml b/data/xml/2025.gebnlp.xml index 0b67c10527..c4fb1ee11e 100644 --- a/data/xml/2025.gebnlp.xml +++ b/data/xml/2025.gebnlp.xml @@ -3,7 +3,7 @@ Proceedings of the 6th Workshop on Gender Bias in Natural Language Processing (GeBNLP) - AgnieszkaFaleńska + AgnieszkaFaleńska ChristineBasta MartaCosta-jussà KarolinaStańczak @@ -113,7 +113,7 @@ HadiMohammadi TinaShahediUniversity of Tehran, University of Tehran PabloMosteiroUtrecht University - MassimoPoesioUtrecht University and Queen Mary, University of London + MassimoPoesioUtrecht University and Queen Mary, University of London AyoubBagheriUtrecht University AnastasiaGiachanouUtrecht University 92-104 @@ -164,7 +164,7 @@ ShalakaSatheeshFraunhofer Institute IAIS, Fraunhofer IAIS KatrinKlugFraunhofer Institute IAIS, Fraunhofer IAIS KatharinaBeckhFraunhofer Institute IAIS, Fraunhofer IAIS - HéctorAllende-CidFraunhofer Institute IAIS, Fraunhofer IAIS + HéctorAllende-CidFraunhofer Institute IAIS, Fraunhofer IAIS SebastianHoubenHochschule Bonn-Rhein-Sieg TeenaHassanHochschule Bonn-Rhein-Sieg 137-148 @@ -223,7 +223,7 @@ Bias Attribution in <fixed-case>F</fixed-case>ilipino Language Models: Extending a Bias Interpretability Metric for Application on Agglutinative Languages Lance Calvin LimGamboaUniversity of Birmingham and Ateneo de Manila University YueFengUniversity of Birmingham - Mark G.Lee + Mark G.Lee 195-205 Emerging research on bias attribution and interpretability have revealed how tokens contribute to biased behavior in language models processing English texts. We build on this line of inquiry by adapting the information-theoretic bias attribution score metric for implementation on models handling agglutinative languages—particularly Filipino. We then demonstrate the effectiveness of our adapted method by using it on a purely Filipino model and on three multilingual models—one trained on languages worldwide and two on Southeast Asian data. Our results show that Filipino models are driven towards bias by words pertaining to \textit{people}, \textit{objects}, and \textit{relationships}—entity-based themes that stand in contrast to the action-heavy nature of bias-contributing themes in English (i.e., \textit{criminal}, \textit{sexual}, and \textit{prosocial} behaviors). These findings point to differences in how English and non-English models process inputs linked to sociodemographic groups and bias. 2025.gebnlp-1.19 @@ -404,7 +404,7 @@ CarleighWood BokaiYuMeta AI ChristopheRopersMeta - Marta R.Costa-jussàMeta + Marta R.Costa-jussàMeta 403-426 In the current landscape of automatic language generation, there is a need to understand, evaluate, and mitigate demographic biases, as existing models are becoming increasingly multilingual. To address this, we present the initial eight languages from the Massive Multilingual Holistic Bias (MMHB) dataset and benchmark consisting of approximately 6 million sentences. The sentences are designed to induce biases towards different groups of people which can yield significant results when using them as a benchmark to test different text generation models. To further scale up in terms of both language coverage and size and to leverage limited human translation, we use systematic approach to independently translate sentence parts. This technique carefully designs a structure to dynamically generate multiple sentence variations and significantly reduces the human translation workload. The translation process has been meticulously conducted to avoid an English-centric perspective and include all necessary morphological variations for languages that require them, improving from the original English HOLISTICBIAS. Finally, we utilize MMHB to report results on gender bias and added toxicity in MT tasks. 2025.gebnlp-1.35 @@ -416,7 +416,7 @@ KristinGnadt DavidThulkeRWTH Aachen University and AppTek SimoneKopeinikKnow Center GmbH - RalfSchlüterAppTek GmbH and Rheinisch Westfälische Technische Hochschule Aachen + RalfSchlüterAppTek GmbH and Rheinisch Westfälische Technische Hochschule Aachen 427-450 In recent years, various methods have been proposed to evaluate gender bias in large language models (LLMs). A key challenge lies in the transferability of bias measurement methods initially developed for the English language when applied to other languages. This work aims to contribute to this research strand by presenting five German datasets for gender bias evaluation in LLMs. The datasets are grounded in well-established concepts of gender bias and are accessible through multiple methodologies. Our findings, reported for eight multilingual LLM models, reveal unique challenges associated with gender bias in German, including the ambiguous interpretation of male occupational terms and the influence of seemingly neutral nouns on gender perception. This work contributes to the understanding of gender bias in LLMs across languages and underscores the necessity for tailored evaluation frameworks. 2025.gebnlp-1.37 diff --git a/data/xml/2025.gem.xml b/data/xml/2025.gem.xml index b7accde96b..f4cb76b95e 100644 --- a/data/xml/2025.gem.xml +++ b/data/xml/2025.gem.xml @@ -4,7 +4,7 @@ Proceedings of the Fourth Workshop on Generation, Evaluation and Metrics (GEM²) OfirArvivIBM Research - MirunaClinciuHeriot Watt University + MirunaClinciuHeriot Watt University KaustubhDholeEmory University RotemDrorUniversity of Haifa SebastianGehrmannBloomberg @@ -66,7 +66,7 @@ SualehaFaridUniversity of Michigan - Ann Arbor Abdul HameedAzeemi AwaisAtharEuropean Bioinformatics Institute - European Molecular Biology Laboratory (EMBL-EBI) - Agha AliRazaLahore University of Management Sciences + Agha AliRazaLahore University of Management Sciences 30-45 This paper presents a novel methodology for generating synthetic Preference Optimization (PO) datasets using multi-model workflows. We evaluate the effectiveness and potential of these workflows in automating and enhancing the dataset generation process. PO dataset generation requires two modules: (1) \textit{response evaluation}, and (2) \textit{response generation}. In the \textit{response evaluation} module, the responses from Large Language Models (LLMs) are evaluated and ranked - a task typically carried out by human annotators that we automate using LLMs. We assess the response evaluation module in a 2 step process. In step 1, we assess LLMs as evaluators using three distinct prompting strategies. In step 2, we apply the winning prompting strategy to compare the performance of LLM-as-a-Judge, LLMs-as-a-Jury, and LLM Debate. Our evaluation shows that GPT-4o-as-a-Judge is more consistent across all datasets. For the \textit{response generation} module, we use the identified LLM evaluator configuration and compare different configurations of the LLM Feedback Loop. We use the win rate to determine the best multi-model configuration for generation. Experimenting with various configurations, we find that the LLM Feedback Loop, with Llama as the generator and Gemma as the reviewer, achieves a notable 71.8% and 73.8% win rate over single-model Llama and Gemma, respectively. After identifying the best configurations for both modules, we generate our PO datasets using the above pipeline. 2025.gem-1.4 @@ -90,7 +90,7 @@ <fixed-case>HEDS</fixed-case> 3.0: The Human Evaluation Data Sheet Version 3.0 - AnyaBelzDublin City University + AnyaBelzDublin City University CraigThomsonDublin City University and University of Aberdeen 60-81 This paper presents a new version of the Human Evaluation Datasheet (HEDS), numbered 3.0 This update is the result of our experience using HEDS in the context of numerous recent human evaluation experiments, including reproduction studies, and of feedback collected from other researchers. Our main overall goal was to improve clarity, and to enable users to complete the datasheet more consistently and comparably. The HEDS 3.0 package consists of the digital data sheet, documentation, and code for exporting completed data sheets as latex files, all available from the HEDS 3.0 GitHub. @@ -164,7 +164,7 @@ Evaluating <fixed-case>LLM</fixed-case>s with Multiple Problems at once ZhengxiangWangState University of New York at Stony Brook JordanKodnerState University of New York, Stony Brook - OwenRambowStony Brook University + OwenRambowStony Brook University 178-199 This paper shows the benefits and fruitfulness of evaluating LLMs with multiple problems at once, a paradigm we call multi-problem evaluation (MPE). Unlike conventional single-problem evaluation, where a prompt presents a single problem and expects one specific answer, MPE places multiple problems together in a single prompt and assesses how well an LLM answers all these problems in a single output. Leveraging 6 classification and 12 reasoning benchmarks that already exist, we introduce a new benchmark called ZeMPE (Zero-shot Multi-Problem Evaluation), comprising 53,100 zero-shot multi-problem prompts. We experiment with a total of 13 LLMs from 5 model families on ZeMPE to present a comprehensive and systematic MPE. Our results show that LLMs are capable of handling multiple problems from a single data source as well as handling them separately, but there are conditions this multiple problem handling capability falls short. In addition, we perform in-depth further analyses and explore model-level factors that may enable multiple problem handling capabilities in LLMs. We release our corpus and code to facilitate future research. 2025.gem-1.14 @@ -232,7 +232,7 @@ <fixed-case>IRS</fixed-case>um: One Model to Rule Summarization and Retrieval SotaroTakeshitaUniversität Mannheim - Simone PaoloPonzettoUniversität Mannheim + Simone PaoloPonzettoUniversität Mannheim KaiEckertMannheim University of Applied Sciences 262-275 Applications that store a large number of documents often have summarization and retrieval functionalities to help users digest large amounts of information efficiently. Currently, such systems need to run two task-specific models, for summarization and retrieval, redundantly on the same set of documents. An efficient approach to amend this redundancy would be to reuse hidden representations produced during the summary generation for retrieval. However, our experiment shows that existing models, including recent large language models, do not produce retrieval-friendly embeddings during summarization due to a lack of a contrastive objective during their training. To this end, we introduce a simple, cost-effective training strategy which integrates a contrastive objective into standard summarization training without requiring additional annotations. We empirically show that our model can perform on par or even outperform in some cases compared to the combination of two task-specific models while improving throughput and FLOPs by up to 17% and 20%, respectively. @@ -294,7 +294,7 @@ Using <fixed-case>LLM</fixed-case> Judgements for Sanity Checking Results and Reproducibility of Human Evaluations in <fixed-case>NLP</fixed-case> RudaliHuidrom - AnyaBelzDublin City University + AnyaBelzDublin City University 354-365 Human-like evaluation by LLMs of NLP systems is currently attracting a lot of interest, and correlations with human reference evaluations are often remarkably strong. However, this is not always the case, for unclear reasons which means that without also meta-evaluating against human evaluations (incurring the very cost automatic evaluation is intended to avoid), we don’t know if an LLM-as-judge evaluation is reliable or not. In this paper, we explore a type of evaluation scenario where this may not matter, because it comes with a built-in reliability check. We apply different LLM-as-judge methods to sets of three comparable human evaluations: (i) an original human evaluation, and (ii) two reproductions of it which produce contradicting reproducibility results. We find that in each case, the different LLM-as-judge methods (i) strongly agree with each other, and (ii) strongly agree with the results of one reproduction, while strongly disagreeing with the other. In combination, we take this to mean that a set of LLMs can be used to sanity check contradictory reproducibility results if the LLMs agree with each other, and the agreement of the LLMs with one set of results, and the disagreement with the other, are both strong. 2025.gem-1.30 @@ -323,8 +323,8 @@ KristofVargaNA Győző ZijianYangHungarian Research Centre for Linguistics EnikőHéjaHungarian Research Centre for Linguistics - TamásVáradiNyelvtudományi Kutatóközpont - GáborPrószékyHungarian Research Centre for Linguistics, Pazmany Peter Catholic University and MorphoLogic + TamásVáradiNyelvtudományi Kutatóközpont + GáborPrószékyHungarian Research Centre for Linguistics, Pazmany Peter Catholic University and MorphoLogic 385-403 In this study, we introduce the Hungarian Generative Model Evaluation (HuGME) benchmark, a new framework designed to assess the linguistic proficiency of large language models (LLMs) in Hungarian. HuGME evaluates models across a diverse set of linguistic and reasoning skills, including bias, toxicity, faithfulness, relevance, summarization, prompt alignment, readability, spelling, grammaticality, and domain-specific knowledge through tasks like TruthfulQA and MMLU. We applied HuGME to a range of Hungarian LLMs, including those developed in-house as well as several publicly available models that claim Hungarian language proficiency. This paper presents the comparative results of these evaluations, shedding light on the capabilities of current LLMs in processing the Hungarian language. Through our analysis, we aim to both showcase the current state of Hungarian linguistic processing in LLMs and provide a foundational resource for future advancements in the field. 2025.gem-1.32 @@ -412,7 +412,7 @@ Event-based evaluation of abstractive news summarization HuilingYou SamiaTouilebUniversity of Bergen - LiljaØvrelidDept. of Informatics, University of Oslo + LiljaØvrelidDept. of Informatics, University of Oslo ErikVelldalUniversity of Oslo 504-510 An abstractive summary of a news article contains its most important information in a condensed version. The evaluation of automatically generated summaries by generative language models relies heavily on human-authored summaries as gold references, by calculating overlapping units or similarity scores. News articles report events, and ideally so should the summaries. In this work, we propose to evaluate the quality of abstractive summaries by calculating overlapping events between generated summaries, reference summaries, and the original news articles. We experiment on a richly annotated Norwegian dataset comprising both events annotations and summaries authored by expert human annotators. Our approach provides more insight into the event information contained in the summaries. @@ -435,7 +435,7 @@ Yeon SuParkKorea Advanced Institute of Science & Technology SunwooKimKorea Advanced Institute of Science & Technology ShinYoo - AliceOhGoogle and Korea Advanced Institute of Science and Technology + AliceOhGoogle and Korea Advanced Institute of Science and Technology 522-531 Evaluating the performance and biases of large language models (LLMs) through role-playing scenarios is becoming increasingly common, as LLMs often exhibit biased behaviors in these contexts. Building on this line of research, we introduce PapersPlease, a benchmark consisting of 3,700 moral dilemmas designed to investigate LLMs’ decision-making in prioritizing various levels of human needs. In our setup, LLMs act as immigration inspectors deciding whether to approve or deny entry based on the short narratives of people. These narratives are constructed using the Existence, Relatedness, and Growth (ERG) theory, which categorizes human needs into three hierarchical levels. Our analysis of six LLMs reveals statistically significant patterns in decision-making, suggesting that LLMs encode implicit preferences. Additionally, our evaluation of the impact of incorporating social identities into the narratives shows varying responsiveness based on both motivational needs and identity cues, with some models exhibiting higher denial rates for marginalized identities. All data is publicly available at https://github.com/yeonsuuuu28/papers-please. 2025.gem-1.47 @@ -465,7 +465,7 @@ <fixed-case>R</fixed-case>epro<fixed-case>H</fixed-case>um #0729-04: Human Evaluation Reproduction Report for “<fixed-case>M</fixed-case>em<fixed-case>S</fixed-case>um: Extractive Summarization of Long Documents Using Multi-Step Episodic <fixed-case>M</fixed-case>arkov Decision Processes” - SimeonJunkerUniversität Bielefeld + SimeonJunkerUniversität Bielefeld 561-567 Human evaluation is indispensable in natural language processing (NLP), as automatic metrics are known to not always align well with human judgments.However, the reproducibility of human evaluations can be problematic since results are susceptible to many factors, the details of which are often missing from the respective works.As part of the ReproHum project, this work aims to reproduce the human evaluation of a single criterion in the paper “MemSum: Extractive Summarization of Long Documents Using Multi-Step Episodic Markov Decision Processes” (Gu et al, 2022).The results of our reproduction differ noticeably from those of the original study. To explain this discrepancy, we discuss differences in the experimental setup, as well as more general characteristics of the selected domain and the generated summaries. 2025.gem-1.50 @@ -523,7 +523,7 @@ <fixed-case>R</fixed-case>epro<fixed-case>H</fixed-case>um #0067-01: A Reproduction of the Evaluation of Cross-Lingual Summarization SupryadiTianjin University ChuangLiuNational Supercomputing Center in Tianjin - DeyiXiongTianjin University + DeyiXiongTianjin University 609-614 Human evaluation is crucial as it offers a nuanced understanding that automated metrics often miss. By reproducing human evaluation, we can gain a better understanding of the original results. This paper is part of the ReproHum project, where our goal is to reproduce human evaluations from previous studies. We report the reproduction results of the human evaluation of cross-lingual summarization conducted by (CITATION). By comparing the original and reproduction studies, we find that our overall evaluation findings are largely consistent with those of the previous study. However, there are notable differences in evaluation scores between the two studies for certain model outputs. These discrepancies highlight the importance of carefully selecting evaluation methodologies and human annotators. 2025.gem-1.56 @@ -563,7 +563,7 @@ Bridging the <fixed-case>LLM</fixed-case> Accessibility Divide? Performance, Fairness, and Cost of Closed versus Open <fixed-case>LLM</fixed-case>s for Automated Essay Scoring KeziaOketch - John P.LalorUniversity of Notre Dame + John P.LalorUniversity of Notre Dame YiYangHong Kong University of Science and Technology AhmedAbbasiUniversity of Notre Dame 655-669 @@ -623,7 +623,7 @@ <fixed-case>P</fixed-case>ersona<fixed-case>T</fixed-case>win: A Multi-Tier Prompt Conditioning Framework for Generating and Evaluating Personalized Digital Twins SihanChenCMU, Carnegie Mellon University - John P.LalorUniversity of Notre Dame + John P.LalorUniversity of Notre Dame YiYangHong Kong University of Science and Technology AhmedAbbasiUniversity of Notre Dame 774-788 @@ -635,7 +635,7 @@ Coreference as an indicator of context scope in multimodal narrative NikolaiIlinykhGöteborg University ShalomLappin - Asad B.SayeedUniversity of Gothenburg + Asad B.SayeedUniversity of Gothenburg SharidLoáicigaUniversity of Gothenburg, Sweden 789-807 We demonstrate that large multimodal language models differ substantially from humans in the distribution of coreferential expressions in a visual storytelling task. We introduce a number of metrics to quantify the characteristics of coreferential patterns in both human- and machine-written texts. Humans distribute coreferential expressions in a way that maintains consistency across texts and images, interleaving references to different entities in a highly varied way. Machines are less able to track mixed references, despite achieving perceived improvements in generation quality. Materials, metrics, and code for our study are available at https://github.com/GU-CLASP/coreference-context-scope. @@ -656,7 +656,7 @@ <fixed-case>MCQF</fixed-case>ormat<fixed-case>B</fixed-case>ench: Robustness Tests for Multiple-Choice Questions HirooTakizawaGraduate University for Advanced Studies SakuSugawaraNational Institute of Informatics - AkikoAizawaNational Institute of Informatics + AkikoAizawaNational Institute of Informatics 824-846 Multiple-choice questions (MCQs) are often used to evaluate large language models (LLMs). They measure LLMs’ general common sense and reasoning abilities, as well as their knowledge in specific domains such as law and medicine. However, the robustness of LLMs to various question formats in MCQs has not been thoroughly evaluated. While there are studies on the sensitivity of LLMs to input variations, research into their responsiveness to different question formats is still limited. In this study, we propose a method to construct tasks to comprehensively evaluate the robustness against format changes of MCQs by decomposing the answering process into several steps. Using this dataset, we evaluate nine LLMs, such as Llama3-70B and Mixtral-8x7B. We find the lack of robustness to differences in the format of MCQs. It is crucial to consider whether the format of MCQs influences their evaluation scores when assessing LLMs using MCQ datasets. 2025.gem-1.69 @@ -707,7 +707,7 @@ Luciano DelCorroMicrosoft Research ArindamMitraResearch, Microsoft Tejas IndulalDhamechaAdobe Systems - Ahmed HassanAwadallahMicrosoft Research + Ahmed HassanAwadallahMicrosoft Research MonojitChoudhuryMohamed bin Zayed University of Artificial Intelligence VishravChaudharyMicrosoft SunayanaSitaramMicrosoft @@ -740,7 +740,7 @@ An Analysis of Datasets, Metrics and Models in Keyphrase Generation FlorianBoudinUniversity of Nantes - AkikoAizawaNational Institute of Informatics + AkikoAizawaNational Institute of Informatics 973-973 Keyphrase generation refers to the task of producing a set of words or phrases that summarises the content of a document. Continuous efforts have been dedicated to this task over the past few years, spreading across multiple lines of research, such as model architectures, data resources, and use-case scenarios. Yet, the current state of keyphrase generation remains unknown as there has been no attempt to review and analyse previous work. In this paper, we bridge this gap by presenting an analysis of over 50 research papers on keyphrase generation, offering a comprehensive overview of recent progress, limitations, and open challenges. Our findings highlight several critical issues in current evaluation practices, such as the concerning similarity among commonly-used benchmark datasets and inconsistencies in metric calculations leading to overestimated performances. Additionally, we address the limited availability of pre-trained models by releasing a strong PLM-based model for keyphrase generation as an effort to facilitate future research. 2025.gem-1.76 @@ -762,7 +762,7 @@ The 2025 <fixed-case>R</fixed-case>epro<fixed-case>NLP</fixed-case> Shared Task on Reproducibility of Evaluations in <fixed-case>NLP</fixed-case>: Overview and Results - AnyaBelzDublin City University + AnyaBelzDublin City University CraigThomsonDublin City University and University of Aberdeen JavierGonzález CorbelleUniversidad de Santiago de Compostela MaloRuelle diff --git a/data/xml/2025.genaidetect.xml b/data/xml/2025.genaidetect.xml index 072bcea900..55741d2462 100644 --- a/data/xml/2025.genaidetect.xml +++ b/data/xml/2025.genaidetect.xml @@ -4,7 +4,7 @@ Proceedings of the 1stWorkshop on GenAI Content Detection (GenAIDetect) FirojAlam - PreslavNakov + PreslavNakov NizarHabash IrynaGurevych ShammurChowdhury @@ -68,7 +68,7 @@ Cross-table Synthetic Tabular Data Detection G. Charbel N.Kindji - Lina M.Rojas Barahona + Lina M.Rojas Barahona ElisaFromont TanguyUrvoy 78–84 @@ -80,7 +80,7 @@ Your Large Language Models are Leaving Fingerprints Hope ElizabethMcGovern RickardStureborg - YoshiSuhara + YoshiSuhara DimitrisAlikaniotis 85–95 It has been shown that fine-tuned transformers and other supervised detectors are effective for distinguishing between human and machine-generated texts in non-adversarial settings, but we find that even simple classifiers on top of n-gram and part-of-speech features can achieve very robust performance on both in- and out-of-domain data. To understand how this is possible, we analyze machine-generated output text in four datasets, finding that LLMs possess unique fingerprints that manifest as slight differences in the frequency of certain lexical and morphosyntactic features. We show how to visualize such fingerprints, describe how they can be used to detect machine-generated text and find that they are even robust across text domains. We find that fingerprints are often persistent across models in the same model family (e.g. 13B parameter LLaMA’s fingerprint is similar to that of 65B parameter LLaMA) and that while a detector trained on text from one model can easily recognize text generated by a model in the same family, it struggles to detect text generated by an unrelated model. @@ -105,7 +105,7 @@ SiddharthMangalik NikitaSoni Dave M.Markowitz - H. AndrewSchwartz + H. AndrewSchwartz 111–119 In recent years, the proliferation of chatbots like ChatGPT and Claude has led to an increasing volume of AI-generated text. While the text itself is convincingly coherent and human-like, the variety of expressed of human attributes may still be limited. Using theoretical individual differences, the fundamental psychological traits which distinguish people, this study reveals a distinctive characteristic of such content: AI-generations exhibit remarkably limited variation in inferrable psychological traits compared to human-authored texts. We present a review and study across multiple datasets spanning various domains. We find that AI-generated text consistently models the authorship of an “average” human with such little variation that, on aggregate, it is clearly distinguishable from human-written texts using unsupervised methods (i.e., without using ground truth labels). Our results show that (1) fundamental human traits are able to accurately distinguish human- and machine-generated text and (2) current generation capabilities fail to capture a diverse range of human traits 2025.genaidetect-1.8 @@ -125,7 +125,7 @@ Text Graph Neural Networks for Detecting <fixed-case>AI</fixed-case>-Generated Content AndricValdez-Valenzuela HelenaGómez-Adorno - ManuelMontes-y-Gómez + ManuelMontes-y-Gómez 134–139 The widespread availability of Large Language Models (LLMs) such as GPT-4 and Llama-3, among others, has led to a surge in machine-generated content across various platforms, including social media, educational tools, and academic settings. While these models demonstrate remarkable capabilities in generating coherent text, their misuse raises significant concerns. For this reason, detecting machine-generated text has become a pressing need to mitigate these risks. This research proposed a novel classification method combining text-graph representations with Graph Neural Networks (GNNs) and different node feature initialization strategies to distinguish between human-written and machine-generated content. Experimental results demonstrate that the proposed approach outperforms traditional machine learning classifiers, highlighting the effectiveness of integrating structural and semantic relationships in text. 2025.genaidetect-1.10 @@ -145,7 +145,7 @@ <fixed-case>DCBU</fixed-case> at <fixed-case>G</fixed-case>en<fixed-case>AI</fixed-case> Detection Task 1: Enhancing Machine-Generated Text Detection with Semantic and Probabilistic Features ZhaowenZhang SonghaoChen - BingquanLiu + BingquanLiu 150–154 This paper presents our approach to the MGT Detection Task 1, which focuses on detecting AI-generated content. The objective of this task is to classify texts as either machine-generated or human-written. We participated in Subtask A, which concentrates on English-only texts. We utilized the RoBERTa model for semantic feature extraction and the LLaMA3 model for probabilistic feature analysis. By integrating these features, we aimed to enhance the system’s classification accuracy. Our approach achieved strong results, with an F1 score of 0.7713 on Subtask A, ranking ninth among 36 teams. These results demonstrate the effectiveness of our feature integration strategy. 2025.genaidetect-1.12 @@ -167,7 +167,7 @@ Abdul Gafar ManuelMeque NisarHussain GrigoriSidorov - AlexanderGelbukh + AlexanderGelbukh 161–165 The ever-increasing spread of AI-generated text, driven by the considerable progress in large language models, entails a real problem for all digital platforms: how to ensure con tent authenticity. The team TechExperts(IPN) presents a method for detecting AI-generated content in English and multilingual contexts, using the google/gemma-2b model fine-tuned for COLING 2025 shared task 1 for English and multilingual. Training results show peak F1 scores of 97.63% for English and 97.87% for multilingual detection, highlighting the model’s effectiveness in supporting content integrity across platforms. 2025.genaidetect-1.14 @@ -186,7 +186,7 @@ Team <fixed-case>U</fixed-case>nibuc - <fixed-case>NLP</fixed-case> at <fixed-case>G</fixed-case>en<fixed-case>AI</fixed-case> Detection Task 1: Qwen it detect machine-generated text? ClaudiuCreanga Teodor-GeorgeMarchitan - Liviu P.Dinu + Liviu P.Dinu 173–177 We explored both masked language models and causal models. For Subtask A, our best model achieved first-place out of 36 teams when looking at F1 Micro (Auxiliary Score) of 0.8333, and second-place when looking at F1 Macro (Main Score) of 0.8301. For causal models, our best model was a fine-tuned version of Qwen and for masked models, our best model was a fine-tuned version of XLM-Roberta-Base. 2025.genaidetect-1.16 @@ -442,7 +442,7 @@ MucahidKutlu Kaan EfeKeleş FatemaAhmad - TasnimMohiuddin + TasnimMohiuddin GeorgeMikros FirojAlam 323–333 diff --git a/data/xml/2025.genaik.xml b/data/xml/2025.genaik.xml index b0d194fd97..f2f237d24a 100644 --- a/data/xml/2025.genaik.xml +++ b/data/xml/2025.genaik.xml @@ -23,7 +23,7 @@ Effective Modeling of Generative Framework for Document-level Relational Triple Extraction PratikSaini - TapasNayak + TapasNayak 1–12 Document-level relation triple extraction (DocRTE) is a complex task that involves three key sub-tasks: entity mention extraction, entity clustering, and relation triple extraction. Past work has applied discriminative models to address these three sub-tasks, either by training them sequentially in a pipeline fashion or jointly training them. However, while end-to-end discriminative or generative models have proven effective for sentence-level relation triple extraction, they cannot be trivially extended to the document level, as they only handle relation extraction without addressing the remaining two sub-tasks, entity mention extraction or clustering. In this paper, we propose a three-stage generative framework leveraging a pre-trained BART model to address all three tasks required for document-level relation triple extraction. Tested on the widely used DocRED dataset, our approach outperforms previous generative methods and achieves competitive performance against discriminative models. 2025.genaik-1.1 @@ -162,7 +162,7 @@ Entity Quality Enhancement in Knowledge Graphs through <fixed-case>LLM</fixed-case>-based Question Answering MortezaKamaladdini Ezzabady - FarahBenamara + FarahBenamara 136–145 Most models for triple extraction from texts primarily focus on named entities. However, real-world applications often comprise non-named entities that pose serious challenges for entity linking and disambiguation. We focus on these entities and propose the first LLM-based entity revision framework to improve the quality of extracted triples via a multi-choice question-answering mechanism. When evaluated on two benchmark datasets, our results show a significant improvement, thereby generating more reliable triples for knowledge graphs. 2025.genaik-1.14 diff --git a/data/xml/2025.gitt.xml b/data/xml/2025.gitt.xml index 380126439b..3240d07bc6 100644 --- a/data/xml/2025.gitt.xml +++ b/data/xml/2025.gitt.xml @@ -25,7 +25,7 @@ Are We Paying Attention to Her? Investigating Gender Disambiguation and Attention in Machine Translation ChiaraManna AfraAlishahi - FrédéricBlain + FrédéricBlain EvaVanmassenhove 1–16 While gender bias in modern Neural Machine Translation (NMT) systems has received much attention, the traditional evaluation metrics for these systems do not fully capture the extent to which models integrate contextual gender cues. We propose a novel evaluation metric called Minimal Pair Accuracy (MPA) which measures the reliance of models on gender cues for gender disambiguation. We evaluate a number of NMT models using this metric, we show that they ignore available gender cues in most cases in favour of (statistical) stereotypical gender interpretation. We further show that in anti-stereotypical cases, these models tend to more consistently take male gender cues into account while ignoring the female cues. Finally, we analyze the attention head weights in the encoder component of these models and show that while all models to some extent encode gender information, the male gender cues elicit a more diffused response compared to the more concentrated and specialized responses to female gender cues. @@ -46,7 +46,7 @@ An <fixed-case>LLM</fixed-case>-as-a-judge Approach for Scalable Gender-Neutral Translation Evaluation AndreaPiergentili BeatriceSavoldi - MatteoNegri + MatteoNegri LuisaBentivogli 46–63 Gender-neutral translation (GNT) aims to avoid expressing the gender of human referents when the source text lacks explicit cues about the gender of those referents. Evaluating GNT automatically is particularly challenging, with current solutions being limited to monolingual classifiers. Such solutions are not ideal because they do not factor in the source sentence and require dedicated data and fine-tuning to scale to new languages. In this work, we address such limitations by investigating the use of large language models (LLMs) as evaluators of GNT. Specifically, we explore two prompting approaches: one in which LLMs generate sentence-level assessments only, and another—akin to a chain-of-thought approach—where they first produce detailed phrase-level annotations before a sentence-level judgment. Through extensive experiments on multiple languages with five models, both open and proprietary, we show that LLMs can serve as evaluators of GNT. Moreover, we find that prompting for phrase-level annotations before sentence-level assessments consistently improves the accuracy of all models, providing a better and more scalable alternative to current solutions. @@ -55,7 +55,7 @@ Did <fixed-case>I</fixed-case> (she) or <fixed-case>I</fixed-case> (he) buy this? Or rather <fixed-case>I</fixed-case> (she/he)? Towards first-person gender neutral translation by <fixed-case>LLM</fixed-case>s - MajaPopović + MajaPopović EkaterinaLapshinova-Koltunski AnastasiiaGöldner 64–73 diff --git a/data/xml/2025.in2writing.xml b/data/xml/2025.in2writing.xml index 0398c8a848..2f6e3dcc2a 100644 --- a/data/xml/2025.in2writing.xml +++ b/data/xml/2025.in2writing.xml @@ -7,7 +7,7 @@ KatyGero ThiemoWambsganss SarahSterman - Ting-HaoHuang + Ting-HaoHuang DavidZhou JohnChung Association for Computational Linguistics @@ -47,7 +47,7 @@ BasharAlhafniNew York University ChatrineQwaiderMohamed bin Zayed University of Artificial Intelligence and Chalmers University of Technology NizarHabashNew York University Abu Dhabi - TedBriscoeMohamed bin Zayed University of Artificial Intelligence + TedBriscoeMohamed bin Zayed University of Artificial Intelligence 11-18 Although Arabic is spoken by over 400 million people, advanced Arabic writing assistance tools remain limited. To address this gap, we present ARWI, a new writing assistant that helps learners improve essay writing in Modern Standard Arabic. ARWI is the first publicly available Arabic writing assistant to include a prompt database for different proficiency levels, an Arabic text editor, state-of-the-art grammatical error detection and correction, and automated essay scoring aligned with the Common European Framework of Reference standards for language attainment (https://arwi.mbzuai.ac.ae/). Moreover, ARWI can be used to gather a growing auto-annotated corpus, facilitating further research on Arabic grammar correction and essay scoring, as well as profiling patterns of errors made by native speakers and non-native learners. A preliminary user study shows that ARWI provides actionable feedback, helping learners identify grammatical gaps, assess language proficiency, and guide improvement. 2025.in2writing-1.2 diff --git a/data/xml/2025.indonlp.xml b/data/xml/2025.indonlp.xml index 238d7009c8..eb7e7cbc16 100644 --- a/data/xml/2025.indonlp.xml +++ b/data/xml/2025.indonlp.xml @@ -52,7 +52,7 @@ Evaluating Structural and Linguistic Quality in <fixed-case>U</fixed-case>rdu <fixed-case>DRS</fixed-case> Parsing and Generation through Bidirectional Evaluation Muhammad SaadAmin LucaAnselma - AlessandroMazzei + AlessandroMazzei 33–43 Evaluating Discourse Representation Structure (DRS)-based systems for semantic parsing (Text-to-DRS) and generation (DRS-to-Text) poses unique challenges, particularly in low-resource languages like Urdu. Traditional metrics often fall short, focusing either on structural accuracy or linguistic quality, but rarely capturing both. To address this limitation, we introduce two complementary evaluation methodologies—Parse-Generate (PARS-GEN) and Generate-Parse (GEN-PARS)—designed for a more comprehensive assessment of DRS-based systems. PARS-GEN evaluates the parsing process by converting DRS outputs back to the text, revealing linguistic nuances often missed by structure-focused metrics like SMATCH. Conversely, GEN-PARS assesses text generation by converting generated text into DRS, providing a semantic perspective that complements surface-level metrics such as BLEU, METEOR, and BERTScore. Using the Parallel Meaning Bank (PMB) dataset, we demonstrate our methodology across Urdu, uncovering unique insights into Urdu’s structural and linguistic interplay. Findings show that traditional metrics frequently overlook the complexity of linguistic and semantic fidelity, especially in low-resource languages. Our dual approach offers a robust framework for evaluating DRS-based systems, enhancing semantic parsing and text generation quality. 2025.indonlp-1.4 @@ -95,7 +95,7 @@ AneeshBose Guneet SinghKohli Smruti SmitaLenka - OndřejBojar + OndřejBojar 58–66 This paper introduces OVQA, the first multimodal dataset designed for visual question-answering (VQA), visual question elicitation (VQE), and multimodal research for the low-resource Odia language. The dataset was created by manually translating 6,149 English question-answer pairs, each associated with 6,149 unique images from the Visual Genome dataset. This effort resulted in 27,809 English-Odia parallel sentences, ensuring a semantic match with the corresponding visual information. Several baseline experiments were conducted on the dataset, including visual question answering and visual question elicitation. The dataset is the first VQA dataset for the low-resource Odia language and will be released for multimodal research purposes and also help researchers extend for other low-resource languages. 2025.indonlp-1.7 diff --git a/data/xml/2025.insights.xml b/data/xml/2025.insights.xml index e2a71c91d4..4fa5098dab 100644 --- a/data/xml/2025.insights.xml +++ b/data/xml/2025.insights.xml @@ -25,7 +25,7 @@ Challenging Assumptions in Learning Generic Text Style Embeddings - PhilOstheimerRPTU Kaiserslautern-Landau + PhilOstheimerRPTU Kaiserslautern-Landau MariusKloftRPTU Kaiserslautern-Landau SophieFellenzRPTU Kaiserslautern-Landau 1-6 @@ -69,9 +69,9 @@ Do Prevalent Bias Metrics Capture Allocational Harms from <fixed-case>LLM</fixed-case>s? - HannahCybereyUniversity of Virginia + HannahCybereyUniversity of Virginia YangfengJiUniversity of Virginia - DavidEvansUniversity of Virginia + DavidEvansUniversity of Virginia 34-45 Allocational harms occur when resources or opportunities are unfairly withheld from specific groups. Many proposed bias measures ignore the discrepancy between predictions, which are what the proposed methods consider, and decisions that are made as a result of those predictions. Our work examines the reliability of current bias metrics in assessing allocational harms arising from predictions of large language models (LLMs). We evaluate their predictive validity and utility for model selection across ten LLMs and two allocation tasks. Our results reveal that commonly-used bias metrics based on average performance gap and distribution distance fail to reliably capture group disparities in allocation outcomes. Our work highlights the need to account for how model predictions are used in decisions, in particular in contexts where they are influenced by how limited resources are allocated. 2025.insights-1.5 @@ -152,7 +152,7 @@ Exploring Limitations of <fixed-case>LLM</fixed-case> Capabilities with Multi-Problem Evaluation ZhengxiangWangStony Brook University JordanKodnerStony Brook University - OwenRambowStony Brook University + OwenRambowStony Brook University 121-140 We propose using prompts made up of multiple problems to evaluate LLM capabilities, an approach we call multi-problem evaluation. We examine 7 LLMs on 4 related task types constructed from 6 existing classification benchmarks. We find that while LLMs can generally perform multiple homogeneous classifications at once (Batch Classification) as well as when they do so separately, they perform significantly worse on two selection tasks that are conceptually equivalent to Batch Classification and involve selecting indices of text falling into each class label, either independently or altogether. We show that such a significant performance drop is due to LLMs’ inability to adequately combine index selection with text classification. Such a drop is surprisingly observed across all LLMs attested, under zero-shot, few-shot, and CoT settings, and even with a novel synthetic dataset, potentially reflecting an inherent capability limitation with modern LLMs. 2025.insights-1.12 diff --git a/data/xml/2025.iwpt.xml b/data/xml/2025.iwpt.xml index 263bfc42a8..21c52e81e0 100644 --- a/data/xml/2025.iwpt.xml +++ b/data/xml/2025.iwpt.xml @@ -49,7 +49,7 @@ High-Accuracy Transition-Based Constituency Parsing JohnBauerStanford University - Christopher D.Manning + Christopher D.Manning 26-39 Constituency parsers have improved markedly in recent years, with the F1 accuracy on the venerable Penn Treebank reaching 96.47, half of the error rate of the first transformer model in 2017. However, while dependency parsing frequently uses transition-based parsers, it is unclear whether transition-based parsing can still provide state-of-the-art results for constituency parsing. Despite promising work by Liu and Zhang in 2017 using an in-order transition-based parser, recent work uses other methods, mainly CKY charts built over LLM encoders. Starting from previous work, we implement self-training and a dynamic oracle to make a language-agnostic transition-based constituency parser. We test on seven languages; using Electra embeddings as the input layer on Penn Treebank, with a self-training dataset built from Wikipedia, our parser achieves a new SOTA F1 of 96.61. 2025.iwpt-1.4 diff --git a/data/xml/2025.iwsds.xml b/data/xml/2025.iwsds.xml index fa42a965be..105d0061a1 100644 --- a/data/xml/2025.iwsds.xml +++ b/data/xml/2025.iwsds.xml @@ -50,7 +50,7 @@ InassRachidi AnasEzzakri JaimeBellver-Soler - Luis FernandoD’Haro + Luis FernandoD’Haro 20–28 This paper presents the design, synthetic generation, and automated evaluation of ArtGenEval-GPT++, an advanced dataset for training and fine-tuning conversational agents with artificial awareness capabilities targeting to the art domain. Building on the foundation of a previously released dataset (ArtGenEval-GPT), the new version introduces enhancements for greater personalization (e.g., gender, ethnicity, age, and knowledge) while addressing prior limitations, including low-quality dialogues and hallucinations. The dataset comprises approximately 12,500 dyadic, multi-turn dialogues generated using state-of-the-art large language models (LLMs). These dialogues span diverse museum scenarios, incorporating varied visitor profiles, emotional states, interruptions, and chatbot behaviors. Objective evaluations confirm the dataset’s quality and contextual coherence. Ethical considerations, including biases and hallucinations, are analyzed, with proposed directions for improving the dataset utility. This work contributes to the development of personalized, context-aware conversational agents capable of navigating complex, real-world environments, such as museums, to enhance visitor engagement and satisfaction. 2025.iwsds-1.3 @@ -96,8 +96,8 @@ XiaochengYang Emre CanAcikgoz SuvodipDey - GokhanTur - DilekHakkani-Tur + GokhanTur + DilekHakkani-Tur 72–102 Large language model (LLM)-based agents have been increasingly used to interact with external environments (e.g., games, APIs, etc.) and solve tasks. However, current frameworks do not enable these agents to work with users and interact with them to align on the details of their tasks and reach user-defined goals; instead, in ambiguous situations, these agents may make decisions based on assumptions. This work introduces ReSpAct (Reason, Speak, and Act), a novel framework that synergistically combines the essential skills for building task-oriented “conversational” agents. ReSpAct addresses this need for agents, expanding on the ReAct approach. ReSpAct framework enables agents to interpret user instructions, reason about complex tasks, execute appropriate actions and engage in dynamic dialogue to seek guidance, clarify ambiguities, understand user preferences, resolve problems, and use the intermediate feedback and responses of users to update their plans. We evaluated ReSpAct with GPT-4 in environments supporting user interaction, such as task-oriented dialogue (MultiWOZ) and interactive decision-making (Alfworld, WebShop), ReSpAct is flexible enough to incorporate dynamic user feedback and addresses prevalent issues like error propagation and agents getting stuck in reasoning loops. This results in more interpretable, human-like task-solving trajectories than baselines relying solely on reasoning traces. In two interactive decision-making benchmarks, AlfWorld and WebShop, ReSpAct outperforms strong reasoning-only method ReAct by an absolute success rate of 6% and 4%, respectively. In the task-oriented dialogue benchmark MultiWOZ, ReSpAct improved Inform and Success scores by 5.5% and 3%, respectively. 2025.iwsds-1.7 @@ -133,7 +133,7 @@ SamuelRamos-Varela JaimeBellver-Soler MarcosEstecha-Garitagoitia - Luis FernandoD’Haro + Luis FernandoD’Haro 129–136 Recent studies suggest that increasing the context window of language models could outperform retrieval-augmented generation (RAG) methods in certain tasks. However, in domains such as art and museums, where information is inherently multimodal, combining images and detailed textual descriptions, this assumption needs closer examination. To explore this, we compare RAG techniques with direct large-context input approaches for answering questions about artworks. Using a dataset of painting images paired with textual information, we develop a synthetic database of question-answer (QA) pairs for evaluating these methods. The focus is on assessing the efficiency and accuracy of RAG in retrieving and using relevant information compared to passing the entire textual context to a language model. Additionally, we experiment with various strategies for segmenting and retrieving text to optimise the RAG pipeline. The results aim to clarify the trade-offs between these approaches and provide valuable insights for interactive systems designed for art and museum contexts. 2025.iwsds-1.10 @@ -155,7 +155,7 @@ MorganVeyret MiguelCouceiro OndrejDusek - Lina M.Rojas Barahona + Lina M.Rojas Barahona 143–153 Large language models (LLMs) gained immense popularity due to their impressive capabilities in unstructured conversations. Empowering LLMs with advanced prompting strategies such as reasoning and acting (ReAct) (Yao et al., 2022) has shown promise in solving complex tasks traditionally requiring reinforcement learning. In this work, we apply the ReAct strategy to guide LLMs performing task-oriented dialogue (TOD). We evaluate ReAct-based LLMs (ReAct-LLMs) both in simulation and with real users. While ReAct-LLMs severely underperform state-of-the-art approaches on success rate in simulation, this difference becomes less pronounced in human evaluation. Moreover, compared to the baseline, humans report higher subjective satisfaction with ReAct-LLM despite its lower success rate, most likely thanks to its natural and confidently phrased responses. 2025.iwsds-1.12 @@ -165,8 +165,8 @@ Design of a conversational agent to support people on suicide risk MarioManso Vázquez José ManuelRamírez Sánchez - CarmenGarcía-Mateo - LauraDocío-Fernández + CarmenGarcía-Mateo + LauraDocío-Fernández Manuel JoséFernández-Iglesias BeatrizGómez-Gómez BeatrizPinal @@ -180,7 +180,7 @@ Optimizing <fixed-case>RAG</fixed-case>: Classifying Queries for Dynamic Processing KabirOlawore - MichaelMcTear + MichaelMcTear YaxinBi DavidGriol 160–164 @@ -206,7 +206,7 @@ KseniaKharitonova Juan ManuelMontero-Martínez DavidPérez Fernández - FernandoFernández-Martínez + FernandoFernández-Martínez 172–175 Conversational AI (ConvAI) systems are gaining growing importance as an alternative for more natural interaction with digital services. In this context, Large Language Models (LLMs) have opened new possibilities for less restricted interaction and richer natural language understanding. However, despite their advanced capabilities, LLMs can pose accuracy and reliability problems, as they sometimes generate factually incorrect or contextually inappropriate content that does not fulfill the regulations or business rules of a specific application domain. In addition, they still do not possess the capability to adjust to users’ needs and preferences, showing emotional awareness, while concurrently adhering to the regulations and limitations of their designated domain. In this paper we present the TrustBoost project, which addresses the challenge of improving trustworthiness of ConvAI from two dimensions: cognition (adaptability, flexibility, compliance, and performance) and affectivity (familiarity, emotional dimension, and perception). The duration of the project is from September 2024 to December 2027. 2025.iwsds-1.16 @@ -247,7 +247,7 @@ Santiago AndrésMoreno-Acevedo AnderGonzález-Docasal Maria InesTorres - AitorÁlvarez + AitorÁlvarez 190–195 This demo paper presents a prototype of a multilingual, speech-based driver assistant, designed to support both English and Basque languages. The inclusion of Basque—a low-resource language with limited domain-specific training data—marks a significant contribution, as publicly available AI models, including Large Language Models, often underperform for such languages compared to high-resource languages like English. Despite these challenges, our system demonstrates robust performance, successfully understanding user queries and delivering rapid responses in a demanding environment: a car simulator. Notably, the system achieves comparable performance in both English and Basque, showcasing its effectiveness in addressing linguistic disparities in AI-driven applications. A demo of our prototype will be available in the workshop. 2025.iwsds-1.19 @@ -257,7 +257,7 @@ Intimebot – A Dialogue Agent for Timekeeping Support ShoaibKhan AlexSamani - RafaelBanchs + RafaelBanchs 196–199 This demo paper presents intimebot, an AI-powered timekeeping solution designed to assist with timekeeping. Timekeeping is a fundamental but also overwhelming and complex task in many professional services practices. Our intimebot demo demonstrates how Artificial Intelligence can be utilized to implement a more efficient timekeeping process within a firm. Based on brief work descriptions provided by the timekeeper, intimebot is able to (1) predict the relevant combination of client, matter, and phase, (2) estimate the work effort hours, and (3) rewrite and normalize the provided work description into a compliant narrative. This can save a significant amount of time for busy professionals while ensuring terms of business compliance and best practices. 2025.iwsds-1.20 @@ -293,7 +293,7 @@ AitorGarcía-Pablos MontseCuadros Arantzadel Pozo - OierLopez de Lacalle + OierLopez de Lacalle AnderSalaberria JeremyBarnes PabloMartínez @@ -385,7 +385,7 @@ JaimeBellver-Soler MarioRodriguez-Cantelar RicardoCórdoba - Luis FernandoD’Haro + Luis FernandoD’Haro 284–289 Recent developments in Multimodal Large Language Models (MLLMs) have provided novel insights into Speech Emotion Recognition (SER). However, combining high-dimensional speech signals with textual tokens can lead to a rapid growth in input tokens, increasing computational costs and inference times. This “token overload” also risks shadowing essential textual cues, affecting the reasoning capabilities of the language model and diluting emotional information crucial to accurate SER. In this paper, we explore different token drop methods that mitigate excessive token counts while preserving both emotional nuances and the core linguistic capabilities of the model. Specifically, we compare various pooling approaches to produce a compact representation. Our preliminary findings suggest that these techniques can reduce computational costs without decreasing SER accuracy. 2025.iwsds-1.30 @@ -393,8 +393,8 @@ Integrating Conversational Entities and Dialogue Histories with Knowledge Graphs and Generative <fixed-case>AI</fixed-case> - GrahamWilcock - KristiinaJokinen + GrahamWilcock + KristiinaJokinen 290–298 Existing methods for storing dialogue history and for tracking mentioned entities in spoken dialogues usually handle these tasks separately. Recent advances in knowledge graphs and generative AI make it possible to integrate them in a framework with a uniform representation for dialogue management. This may help to build more natural and grounded dialogue models that can reduce misunderstanding and lead to more reliable dialogue-based interactions with AI agents. The paper describes ongoing work on this approach. 2025.iwsds-1.31 @@ -405,7 +405,7 @@ AhmedNjifenjou VirgileSucal BassamJabaian - FabriceLefèvre + FabriceLefèvre 299–308 Among the numerous models developed to represent the multifaceted complexity of human personality, particularly in psychology, the Big Five (commonly referred to as ‘OCEAN’, an acronym of its five traits) stands out as a widely used framework. Although personalized chatbots have incorporated this model, existing approaches, such as focusing on individual traits or binary combinations, may not capture the full diversity of human personality. In this study, we propose a five-dimensional vector representation, where each axis corresponds to the degree of presence of an OCEAN trait on a continuous scale from 0 to 1. This representation is designed to enable greater versatility in modeling personality. Application to customer assistance scenarios in French demonstrates that, based on humans-bots as well as bots-bots conversations, assigned personality vectors are distinguishable by both humans and LLMs acting as judges. Both of their subjective evaluations also confirm the measurable impacts of the assigned personality on user experience, agent efficiency, and conversation quality. 2025.iwsds-1.32 diff --git a/data/xml/2025.iwslt.xml b/data/xml/2025.iwslt.xml index dafa1acfc8..b0cf9b1da7 100644 --- a/data/xml/2025.iwslt.xml +++ b/data/xml/2025.iwslt.xml @@ -71,9 +71,9 @@ AlessioBruttiFBK MauroCettoloFBK RobertoGretterFBK - MarcoMatassoniFBK + MarcoMatassoniFBK MohamedNabihFBK - MatteoNegriFBK + MatteoNegriFBK 47-55 Training large-scale models presents challenges not only in terms of resource requirements but also in terms of their convergence. For this reason, the learning rate (LR) is often decreased when the size of a model is increased. Such a simple solution is not enough in the case of speech-to-text (S2T) trainings, where evolved and more complex variants of the Transformer architecture – e.g., Conformer or Branchformer – are used in light of their better performance. As a workaround, OWSM designed a double linear warmup of the LR, increasing it to a very small value in the first phase before updating it to a higher value in the second phase. While this solution worked well in practice, it was not compared with alternative solutions, nor was the impact on the final performance of different LR warmup schedules studied. This paper fills this gap, revealing that i) large-scale S2T trainings demand a sub-exponential LR warmup, and ii) a higher LR in the warmup phase accelerates initial convergence, but it does not boost final performance. 2025.iwslt-1.4 @@ -116,10 +116,10 @@ Conversational <fixed-case>S</fixed-case>imul<fixed-case>MT</fixed-case>: Efficient Simultaneous Translation with Large Language Models MinghanWangMonash University - Thuy-TrangVuMonash University + Thuy-TrangVuMonash University YuxiaWangMBZUAI EhsanShareghiMonash University - GholamrezaHaffariMonash University + GholamrezaHaffariMonash University 93-105 Simultaneous machine translation (SimulMT) presents a challenging trade-off between translation quality and latency. Recent studies have shown that LLMs can achieve good performance in SimulMT tasks. However, this often comes at the expense of high inference costs and latency. In this paper, we propose a conversational SimulMT framework to enhance the inference efficiency of LLM-based SimulMT through multi-turn-dialogue-based decoding where source and target chunks interleave in translation history, enabling the reuse of Key-Value cache. To adapt LLMs to the proposed conversational decoding, we create supervised fine-tuning training data by segmenting parallel sentences using an alignment tool and a novel augmentation technique to enhance generalization. Our experiments with Llama2-7b-chat on three SimulMT benchmarks demonstrate that the proposed method empowers the superiority of LLM in translation quality, meanwhile achieving comparable computational latency with specialized SimulMT models. 2025.iwslt-1.8 @@ -156,7 +156,7 @@ Prompting <fixed-case>LLM</fixed-case>s: Length Control for Isometric Machine Translation DávidJavorskýCharles Univerzity, Faculty of Mathematics and Physics - OndřejBojarCharles University, MFF UFAL + OndřejBojarCharles University, MFF UFAL FrançoisYvonISIR CNRS & Sorbonne Université 119-137 In this study, we explore the effectiveness of isometric machine translation across multiple language pairs (EnoDe, EnoFr, and EnoEs) under the conditions of the IWSLT Isometric Shared Task 2022. Using eight open-source large language models (LLMs) of varying sizes, we investigate how different prompting strategies, varying numbers of few-shot examples, and demonstration selection influence translation quality and length control. We discover that the phrasing of instructions, when aligned with the properties of the provided demonstrations, plays a crucial role in controlling the output length. Our experiments show that LLMs tend to produce shorter translations only when presented with extreme examples, while isometric demonstrations often lead to the models disregarding length constraints. While few-shot prompting generally enhances translation quality, further improvements are marginal across 5, 10, and 20-shot settings. Finally, considering multiple outputs allows to notably improve overall tradeoff between the length and quality, yielding state-of-the-art performance for some language pairs. @@ -194,7 +194,7 @@ MatthewWiesnerJohns Hopkins University DanPoveyXiaomi, Inc. LeibnyPaola Garcia PereraJohns Hopkins University - SanjeevKhudanpurJohns Hopkins University + SanjeevKhudanpurJohns Hopkins University 153-164 Neural transducers (NT) provide an effective framework for speech streaming, demonstrating strong performance in automatic speech recognition (ASR). However, the application of NT to speech translation (ST) remains challenging, as existing approaches struggle with word reordering and performance degradation when jointly modeling ASR and ST, resulting in a gap with attention-based encoder-decoder (AED) models. Existing NT-based ST approaches also suffer from high computational training costs. To address these issues, we propose HENT-SRT (Hierarchical Efficient Neural Transducer for Speech Recognition and Translation), a novel framework that factorizes ASR and translation tasks to better handle reordering. To ensure robust ST while preserving ASR performance, we use self-distillation with CTC consistency regularization. Moreover, we improve computational efficiency by incorporating best practices from ASR transducers, including a down-sampled hierarchical encoder, a stateless predictor, and a pruned transducer loss to reduce training complexity. Finally, we introduce a blank penalty during decoding, reducing deletions and improving translation quality. Our approach is evaluated on three conversational datasets Arabic, Spanish, and Mandarin achieving new state-of-the-art performance among NT models and substantially narrowing the gap with AED-based systems. 2025.iwslt-1.14 @@ -205,7 +205,7 @@ <fixed-case>S</fixed-case>wiss <fixed-case>G</fixed-case>erman Speech Translation and the Curse of Multidialectality MartinBärUniversity of Malta, University of the Basque Country AndreaDeMarcoUniversity of Malta - GorkaLabakaUPV/EHU + GorkaLabakaUPV/EHU 165-179 In many languages, non-standardized varieties make the development of NLP models challenging. This paper explores various fine-tuning techniques and data setups for training Swiss German to Standard German speech-to-text translation models. While fine-tuning on all available Swiss German data yields the best results, ASR pre-training lowers performance by 1.48 BLEU points, and jointly training on Swiss and Standard German data reduces it by 2.29 BLEU. Our dialect transfer experiments suggest that an equivalent of the Curse of Multilinguality (Conneau et al., 2020) exists in dialectal speech processing, as training on multiple dialects jointly tends to decrease single-dialect performance. However, introducing small amounts of dialectal variability can improve the performance for low-resource dialects. 2025.iwslt-1.15 @@ -229,7 +229,7 @@ <fixed-case>NAVER</fixed-case> <fixed-case>LABS</fixed-case> <fixed-case>E</fixed-case>urope Submission to the Instruction-following Track BeomseokLeeUniversity of Trento MarcelyZanon BoitoNAVER LABS Europe - LaurentBesacierNAVER LABS Europe + LaurentBesacierNAVER LABS Europe IoanCalapodescuNAVER LABS Europe 186-200 In this paper we describe NAVER LABS Europe submission to the instruction-following speech processing short track at IWSLT 2025. We participate in the constrained settings, developing systems that can simultaneously perform ASR, ST, and SQA tasks from English speech input into the following target languages: Chinese, Italian, and German. Our solution leverages two pretrained modules: (1) a speech-to-LLM embedding projector trained using representations from the SeamlessM4T-v2-large speech encoder; and (2) LoRA adapters trained on text data on top of Llama-3.1-8B-Instruct. These modules are jointly loaded and further instruction-tuned for 1K steps on multilingual and multimodal data to form our final system submitted for evaluation. @@ -242,7 +242,7 @@ DebjitDharJadavpur University SohamLahiriJadavpur University TapabrataMondalJadavpur University - SivajiBandyopadhyayJADAVPUR UNIVERSITY + SivajiBandyopadhyayJADAVPUR UNIVERSITY 201-205 This paper presents the submission of the Jadavpur University Computer Science and Engineering Natural Language Processing (JU-CSENLP) Laboratory to the International Conference on Spoken Language Translation (IWSLT) 2025 Indic track, addressing the speech-to-text translation task in both English-to-Indic (Bengali, Hindi, Tamil) and Indic-to-English directions. To tackle the challenges posed by low resource Indian languages, we adopt a cascaded approach leveraging state-of-the-art pre-trained models. For English-to-Indic translation, we utilize OpenAI’s Whisper model for Automatic Speech Recognition (ASR), followed by the Meta’s No Language Left Behind (NLLB)-200-distilled-600M model finetuned for Machine Translation (MT). For the reverse direction, we employ the AI4Bharat’s IndicConformer model for ASR and IndicTrans2 finetuned for MT. Our models are fine-tuned on the provided benchmark dataset to better handle the linguistic diversity and domain-specific variations inherent in the data. Evaluation results demonstrate that our cascaded systems achieve competitive performance, with notable BLEU and chrF++ scores across all language pairs. Our findings highlight the effectiveness of combining robust ASR and MT components in a cascaded pipeline, particularly for low-resource and morphologically rich Indian languages. 2025.iwslt-1.18 @@ -271,7 +271,7 @@ EnesYavuz UganKIT TuAnh DinhKarlsruhe Institute of Technology CarlosMullovKarlsruhe Institute of Technology - AlexanderWaibelCarnegie Mellon + AlexanderWaibelCarnegie Mellon JanNiehuesKarlsruhe Institut of Technology 212-221 This paper presents KIT’s submissions to the IWSLT 2025 low-resource track. We develop both cascaded systems, consisting of Automatic Speech Recognition (ASR) and Machine Translation (MT) models, and end-to-end (E2E) Speech Translation (ST) systems for three language pairs: Bemba, North Levantine Arabic, and Tunisian Arabic into English. Building upon pre-trained models, we fine-tune our systems with different strategies to utilize resources efficiently. This study further explores system enhancement with synthetic data and model regularization. Specifically, we investigate MT-augmented ST by generating translations from ASR data using MT models. For North Levantine, which lacks parallel ST training data, a system trained solely on synthetic data slightly surpasses the cascaded system trained on real data. We also explore augmentation using text-to-speech models by generating synthetic speech from MT data, demonstrating the benefits of synthetic data in improving both ASR and ST performance for Bemba. Additionally, we apply intra-distillation to enhance model performance. Our experiments show that this approach consistently improves results across ASR, MT, and ST tasks, as well as across different pre-trained models. Finally, we apply Minimum Bayes Risk decoding to combine the cascaded and end-to-end systems, achieving an improvement of approximately 1.5 BLEU points. @@ -299,7 +299,7 @@ ThaiBinh NguyenKarlsruhe Institute of Technology SeymanurAktiKarlsruhe Institute of Technology JanNiehuesKarlsruhe Institut of Technology - AlexanderWaibelCarnegie Mellon + AlexanderWaibelCarnegie Mellon 232-244 In this paper, we present the submissions for the Offline ST and Instruction Following (IF) tracks, where we leverage LLMs to enhance performance across all tasks. For the Offline ST track, we propose a pipeline that employs multiple automatic speech recognition systems, whose outputs are fused using an LLM with document-level context. This is followed by a two-step translation process, incorporating additional contextual refinement step to improve translation quality. For the IF track, we develop an end-to-end model that integrates a speech encoder with an LLM to perform a wide range of instruction-following tasks. We complement it with a final document-level refinement stage to further enhance output quality by using contextual information. 2025.iwslt-1.22 @@ -312,7 +312,7 @@ SohamChaudhuriJadavpur University DipanjanSahaJadavpur University DipankarDasJadavpur University - SivajiBandyopadhyayJADAVPUR UNIVERSITY + SivajiBandyopadhyayJADAVPUR UNIVERSITY 245-251 Multi-language Speech-to-Text Translation (ST) plays a crucial role in breaking linguistic barriers, particularly in multilingual regions like India. This paper focuses on building a robust ST system for low resource Indian languages, with a special emphasis on Bengali and Tamil. These languages represent the Indo-Aryan and Dravidian families, respectively. The dataset used in this work comprises spoken content from TED Talks and conferences, paired with transcriptions in English and their translations in Bengali and Tamil. Our work specifically addresses the translation of Bengali and Tamil speech to English text, a critical area given the scarcity of annotated speech data. To enhance translation quality and model robustness, we leverage cross-lingual resources and word level translation strategies. The ultimate goal is to develop an end-to-end ST model capable of real-world deployment for under represented languages. 2025.iwslt-1.23 @@ -324,7 +324,7 @@ JosefJonCharles University WaadBen KhederVocapia Research AndreBeyerBielefeld University - ClaudeBarrasVocapia Research + ClaudeBarrasVocapia Research Jean-LucGauvainCNRS/LIMSI 252-259 We present our IWSLT 2025 submission for the low-resource track on North Levantine Arabic to English speech translation, building on our IWSLT 2024 efforts. We retain last year’s cascade ASR architecture that combines a TDNN-F model and a Zipformer for the ASR step. We upgrade the Zipformer to the Zipformer-Large variant (253 M parameters vs. 66 M) to capture richer acoustic representations. For the MT part, to further alleviate data sparsity, we created a crowd-sourced parallel corpus covering five major Arabic dialects (Tunisian, Levantine, Moroccan, Algerian, Egyptian) curated via rigorous qualification and filtering. We show that using crowd-sourced data is feasible in low-resource scenarios as we observe improved automatic evaluation metrics across all dialects. We also experimented with the dataset under a high-resource scenario, where we had access to a large, high-quality Levantine Arabic corpus from LDC. In this setting, adding the crowd-sourced data does not improve the scores on the official validation set anymore. Our final submission scores 20.0 BLEU on the official test set. @@ -374,7 +374,7 @@ <fixed-case>CUNI</fixed-case>-<fixed-case>NL</fixed-case>@<fixed-case>IWSLT</fixed-case> 2025: End-to-end Offline Speech Translation and Instruction Following with <fixed-case>LLM</fixed-case>s NamLuuCharles University - OndřejBojarCharles University, MFF UFAL + OndřejBojarCharles University, MFF UFAL 282-288 This paper describes the CUNI-NL team’s submission to the IWSLT 2025 Offline Speech Translation and Instruction Following tasks, focusing on transcribing the English audio, and translating the English audio to German text. Our systems follow the end-to-end approach, where each system consists of a pretrained, frozen speech encoder, along with a medium-sized large language model fine-tuned with LoRA on three tasks: 1) transcribing the English audio; 2) directly translating the English audio to German text; and 3) a combination of the above two tasks, i.e. simultaneously transcribing the English audio and translating the English audio to German text. 2025.iwslt-1.28 @@ -433,7 +433,7 @@ <fixed-case>SYSTRAN</fixed-case> @ <fixed-case>IWSLT</fixed-case> 2025 Low-resource track MarkoAvilaSystran by Chapsvision - JosepCregoSystran by Chapsvision + JosepCregoSystran by Chapsvision 324-332 SYSTRAN submitted systems for one language pair in the 2025 Low-Resource Language Track. Our main contribution lies in the tight coupling and light fine-tuning of an ASR encoder (Whisper) with a neural machine translation decoder (NLLB), forming an efficient speech translation pipeline. We present the modeling strategies and optimizations implemented to build a system that, unlike large-scale end-to-end models, performs effectively under constraints of limited training data and computational resources. This approach enables the development of high-quality speech translation in low-resource settings, while ensuring both efficiency and scalability. We also conduct a comparative analysis of our proposed system against various paradigms, including a cascaded Whisper+NLLB setup and direct end-to-end fine-tuning of Whisper. 2025.iwslt-1.33 @@ -458,7 +458,7 @@ JavierIranzo-SanchezAppTek AdriàGiménez PastorUniversitat de Valencia JorgeCivera SaizUPV/MLLP-VRAIN - AlfonsJuanUniversitat Politècnica de València + AlfonsJuanUniversitat Politècnica de València 340-346 This work describes the participation of the MLLP-VRAIN research group in the shared task of the IWSLT 2025 Simultaneous Speech Translation track. Our submission addresses the unique challenges of real-time translation of long-form speech by developing a modular cascade system that adapts strong pre-trained models to streaming scenarios. We combine Whisper Large-V3-Turbo for ASR with the multilingual NLLB-3.3B model for MT, implementing lightweight adaptation techniques rather than training new end-to-end models from scratch. Our approach employs document-level adaptation with prefix training to enhance the MT model’s ability to handle incomplete inputs, while incorporating adaptive emission policies including a wait-k strategy and RALCP for managing the translation stream. Specialized buffer management techniques and segmentation strategies ensure coherent translations across long audio sequences. Experimental results on the ACL60/60 dataset demonstrate that our system achieves a favorable balance between translation quality and latency, with a BLEU score of 31.96 and non-computational-aware StreamLAAL latency of 2.94 seconds. Our final model achieves a preliminary score on the official test set (IWSLT25Instruct) of 29.8 BLEU. Our work demonstrates that carefully adapted pre-trained components can create effective simultaneous translation systems for long-form content without requiring extensive in-domain parallel data or specialized end-to-end training. 2025.iwslt-1.35 @@ -571,7 +571,7 @@ TanelAlumäeTalTech AntoniosAnastasopoulosGMU LuisaBentivogliFBK - OndřejBojarCharles U. + OndřejBojarCharles U. ClaudiaBorgU. Malta FethiBougaresElyadata RoldanoCattoniFBK @@ -596,7 +596,7 @@ YasminMoslemADAPT Centre KentonMurrayJHU SatoshiNakamuraCUHK Shenzhen - MatteoNegriFBK + MatteoNegriFBK JanNiehuesKIT AtulKr. OjhaU. Galway John E.OrtegaNortheastern U. @@ -609,11 +609,11 @@ NiveditaSethiyaIIT Indore ClaytoneSikasoteU. Cape Town MatthiasSperberApple - SebastianStükerZoom + SebastianStükerZoom KatsuhitoSudohNara Women’s U. BrianThompsonAmazon MarcoTurchiZoom - AlexWaibelCMU + AlexWaibelCMU PatrickWilkenAppTek RodolfoZevallosU. Pompeu Fabra VilémZouharETH diff --git a/data/xml/2025.knowledgenlp.xml b/data/xml/2025.knowledgenlp.xml index 0d44a2adff..1607fa4427 100644 --- a/data/xml/2025.knowledgenlp.xml +++ b/data/xml/2025.knowledgenlp.xml @@ -9,7 +9,7 @@ MengJiang GregDurrett HannanehHajishirzi - LukeZettlemoyer + LukeZettlemoyer Association for Computational Linguistics
Albuquerque, New Mexico, USA
May @@ -27,7 +27,7 @@ Entity Retrieval for Answering Entity-Centric Questions - HassanShavarani + HassanShavarani AnoopSarkarSimon Fraser University 1-17 The similarity between the question and indexed documents is a key factor in document retrieval for retrieval-augmented question answering. Although this is typically the only method for obtaining the relevant documents, it is not the sole approach when dealing with entity-centric questions. We study Entity Retrieval, an alternative retrieval method, which rather than relying on question-document similarity, depends on the salient entities within the question to identify the retrieval documents. We conduct an in-depth analysis of the performance of both dense and sparse retrieval methods in comparison to Entity Retrieval. Our findings reveal the great potential of entity-driven methods for improving augmentation document retrieval in both accuracy and efficiency. @@ -133,7 +133,7 @@ SuhasSuresha IshitaVerma ChengChen - Tracy HollowayKing + Tracy HollowayKing MichaelFriedrich 129-140 This paper addresses fine-tuning Large Language Models (LLMs) for function calling tasks when real user interaction data is unavailable. In digital content creation tools, where users express their needs through natural language queries that must be mapped to API calls, the lack of real-world task-specific data and privacy constraints for training on it necessitate synthetic data generation. Existing approaches to synthetic data generation fall short in diversity and complexity, failing to replicate real-world data distributions and leading to suboptimal performance after LLM fine-tuning. We present a novel router-based architecture that leverages domain resources like content metadata and structured knowledge graphs, along with text-to-text and vision-to-text language models to generate high-quality synthetic training data. Our architecture’s flexible routing mechanism enables synthetic data generation that matches observed real-world distributions, addressing a fundamental limitation of traditional approaches. Evaluation on a comprehensive set of real user queries demonstrates significant improvements in both function classification accuracy and API parameter selection. Models fine-tuned with our synthetic data consistently outperform traditional approaches, establishing new benchmarks for function calling tasks. @@ -234,7 +234,7 @@ Chain of Evidences and Evidence to Generate: Prompting for Context Grounded and Retrieval Augmented Reasoning - Md RizwanParvezQatar Computing Research Institute + Md RizwanParvezQatar Computing Research Institute 230-245 While chain-of-thoughts (CoT) prompting has revolutionized how LLMs perform reasoning tasks, its current methods and variations (e.g, Self-consistency, ReACT, Reflexion, Tree-of-Thoughts (ToT), Cumulative Reasoning (CR) etc.,) suffer from limitations like limited context grounding, hallucination/inconsistent output generation, and iterative sluggishness. To overcome these challenges, we introduce a novel mono/dual-step zero-shot prompting framework built upon two unique strategies Chain of Evidences (CoE) and Evidence to Generate (E2G). Instead of unverified reasoning claims, our innovative approaches leverage the power of “evidence for decision making” by first focusing exclusively on the thought sequences explicitly mentioned in the context which then serve as extracted evidence, guiding the LLM’s output generation process with greater precision and efficiency. This simple yet potent approach unlocks the full potential of chain-of-thoughts prompting, facilitating faster, more reliable, and contextually aware reasoning in LLMs. Our framework consistently achieves remarkable results across various knowledge-intensive reasoning and generation tasks, surpassing baseline approaches with state-of-the-art LLMs. For instance, (i) on the LogiQA benchmark using GPT-4, CoE achieves a new state-of-the-art accuracy of 53.8%, surpassing CoT by 18%, ToT by 11%, and CR by 9%; (ii) CoE with PaLM-2 outperforms the variable-shot performance of Gemini Ultra by 0.9 F1 points, achieving an F1 score of 83.3 on DROP. We release our prompts and outputs on these benchmarks as a new instruction tuning dataset for future research at Hugging Face. 2025.knowledgenlp-1.21 @@ -268,7 +268,7 @@ <fixed-case>PROPEL</fixed-case>: Prompt Optimization with Expert Priors for Small and Medium-sized <fixed-case>LLM</fixed-case>s KawinMayilvaghananObserve AI VarunNathanIndian Institute of Science, Indian institute of science, Bangalore - AyushKumar + AyushKumar 272-302 2025.knowledgenlp-1.25 mayilvaghanan-etal-2025-propel @@ -289,7 +289,7 @@ Can dependency parses facilitate generalization in language models? A case study of cross-lingual relation extraction RitamDuttCarnegie Mellon University ShounakSural - CarolynRoseSchool of Computer Science, Carnegie Mellon University + CarolynRoseSchool of Computer Science, Carnegie Mellon University 317-337 In this work, we propose DEPGEN, a framework for evaluating the generalization capabilities of language models on the task of relation extraction, with dependency parses as scaffolds. We use a GNN-based framework that takes dependency parses as input and learns embeddings of entities which are augmented to a baseline multilingual encoder. We also investigate the role of dependency parses when they are included as part of the prompt to LLMs in a zero-shot learning setup. We observe that including off-the-shelf dependency parses can aid relation extraction, with the best performing model having a mild relative improvement of 0.91% and 1.5% in the in-domain and zero-shot setting respectively across two datasets. For the in-context learning setup, we observe an average improvement of 1.67%, with significant gains for low-performing LLMs. We also carry out extensive statistical analysis to investigate how different factors such as the choice of the dependency parser or the nature of the prompt impact performance. We make our code and results publicly available for the research community at https://github.com/ShoRit/multilingual-re.git. 2025.knowledgenlp-1.27 @@ -300,7 +300,7 @@ Can dependency parses facilitate generalization in language models? A case study of cross-lingual relation extraction RitamDuttCarnegie Mellon University ShounakSural - CarolynRoseSchool of Computer Science, Carnegie Mellon University + CarolynRoseSchool of Computer Science, Carnegie Mellon University 338-358 In this work, we propose DEPGEN, a framework for evaluating the generalization capabilities of language models on the task of relation extraction, with dependency parses as scaffolds. We use a GNN-based framework that takes dependency parses as input and learns embeddings of entities which are augmented to a baseline multilingual encoder. We also investigate the role of dependency parses when they are included as part of the prompt to LLMs in a zero-shot learning setup. We observe that including off-the-shelf dependency parses can aid relation extraction, with the best performing model having a mild relative improvement of 0.91% and 1.5% in the in-domain and zero-shot setting respectively across two datasets. For the in-context learning setup, we observe an average improvement of 1.67%, with significant gains for low-performing LLMs. We also carry out extensive statistical analysis to investigate how different factors such as the choice of the dependency parser or the nature of the prompt impact performance. We make our code and results publicly available for the research community at https://github.com/ShoRit/multilingual-re.git. 2025.knowledgenlp-1.28 diff --git a/data/xml/2025.knowllm.xml b/data/xml/2025.knowllm.xml index 70d35bafc2..70351f8273 100644 --- a/data/xml/2025.knowllm.xml +++ b/data/xml/2025.knowllm.xml @@ -38,7 +38,7 @@ SoyeongJeongKorea Advanced Institute of Science & Technology HoyunSongKorea Advanced Institute of Science & Technology HuijeLeeKorea Advanced Institute of Science & Technology - Jong C.ParkKorea Advanced Institute of Science and Technology + Jong C.ParkKorea Advanced Institute of Science and Technology 1-13 The rapid expansion of digital information and knowledge across structured and unstructured sources has heightened the importance of Information Retrieval (IR). While dense retrieval methods have substantially improved semantic matching for general queries, they consistently underperform on queries with explicit temporal constraints–often those containing numerical expressions and time specifiers such as “in 2015.” Existing approaches to Temporal Information Retrieval (TIR) improve temporal reasoning but often suffer from catastrophic forgetting, leading to reduced performance on non-temporal queries. To address this, we propose Time-Specifier Model Merging (TSM), a novel method that enhances temporal retrieval while preserving accuracy on non-temporal queries. TSM trains specialized retrievers for individual time specifiers and merges them into a unified model, enabling precise handling of temporal constraints without compromising non-temporal retrieval. Extensive experiments on both temporal and non-temporal datasets demonstrate that TSM significantly improves performance on temporally constrained queries while maintaining strong results on non-temporal queries, consistently outperforming other training methods. Our code is available at https://github.com/seungyoonee/TSM. 2025.knowllm-1.1 @@ -133,7 +133,7 @@ ZhaoxinFeng JianfeiMa JiexiXu - BoLi + BoLi 84-93 Large language models (LLMs) often demonstrate strong performance by leveraging implicit knowledge acquired during pretraining. Analogical reasoning, which solves new problems by referencing similar known examples, offers a structured way to utilize this knowledge, but can also lead to subtle factual errors and hallucinations. In this work, we investigate whether LLMs can recognize the reliability of their own analogical outputs using black-box uncertainty estimation (UE). We evaluate six UE metrics across two reasoning-intensive tasks: mathematical problem solving (GSM8K) and code generation (Codeforces). Our results show that Kernel Language Entropy (KLE) and Lexical Similarity (LexSim) are the most robust indicators of correctness. Moreover, while analogical prompting increases model confidence over direct prompting, most uncertainty arises during the analogy transfer step. These findings highlight the limitations of analogical knowledge transfer in LLMs and demonstrate the potential of UE methods for detecting hallucinated reasoning in black-box settings. 2025.knowllm-1.8 @@ -146,7 +146,7 @@ JiangFutian YueDeng ChangyangHe - BoLi + BoLi 94-110 We present Meetalk, a retrieval-augmented and knowledge-adaptive system for generating personalized meeting minutes. Although large language models (LLMs) excel at summarizing, their output often lacks faithfulness and does not reflect user-specific structure and style. Meetalk addresses these issues by integrating ASR-based transcription with LLM generation guided by user-derived knowledge. Specifically, Meetalk maintains and updates three structured databases, Table of Contents, Chapter Allocation, and Writing Style, based on user-uploaded samples and editing feedback. These serve as a dynamic memory that is retrieved during generation to ground the model’s outputs. To further enhance reliability, Meetalk introduces hallucination-aware uncertainty markers that highlight low-confidence segments for user review. In a user study in five real-world meeting scenarios, Meetalk significantly outperforms a strong baseline (iFLYTEK ASR + ChatGPT-4o) in completeness, contextual relevance, and user trust. Our findings underscore the importance of knowledge foundation and feedback-driven adaptation in building trustworthy, personalized LLM systems for high-stakes summarization tasks. 2025.knowllm-1.9 diff --git a/data/xml/2025.l2m2.xml b/data/xml/2025.l2m2.xml index 324695ced4..881c54d96c 100644 --- a/data/xml/2025.l2m2.xml +++ b/data/xml/2025.l2m2.xml @@ -29,7 +29,7 @@ Factual Knowledge in Language Models: Robustness and Anomalies under Simple Temporal Context Variations Hichem AmmarKhodjaUniversité d’Aix-Marseille - FredericBechetAcadémie d’Aix-Marseille + FredericBechetAcadémie d’Aix-Marseille QuentinBrabantOrange-labs AlexisNasrAix Marseille University GwénoléLecorvéOrange @@ -52,7 +52,7 @@ From Data to Knowledge: Evaluating How Efficiently Language Models Learn Facts DanielChristoph MaxPlonerHumboldt Universität Berlin - PatrickHallerHumboldt Universität Berlin + PatrickHallerHumboldt Universität Berlin AlanAkbikHumboldt Universität Berlin 29-46 Sample efficiency is a crucial property of language models with practical implications for training efficiency. In real-world text, information follows a long-tailed distribution. Yet, we expect models to learn and recall frequent and infrequent facts. Sample efficient models are better equipped to handle this challenge of learning and retaining rare information without requiring excessive exposure. This study analyzes multiple models of varying architectures and sizes, all trained on the same pre-training data. By annotating relational facts with their frequencies in the training corpus, we examine how model performance varies with fact frequency. Our findings show that most models perform similarly on high-frequency facts but differ notably on low-frequency facts. This analysis provides new insights into the relationship between model architecture, size, and factual learning efficiency. @@ -131,7 +131,7 @@ PeterCarragherCMU, Carnegie Mellon University AbhinandJha RaghavR - Kathleen M.Carley + Kathleen M.Carley 127-141 Large Language Models (LLMs) demonstrate remarkable capabilities in question answering (QA), but metrics for assessing their reliance on memorization versus retrieval remain underdeveloped. Moreover, while finetuned models are state-of-the-art on closed-domain tasks, general-purpose models like GPT-4o exhibit strong zero-shot performance. This raises questions about the trade-offs between memorization, generalization, and retrieval. In this work, we analyze the extent to which multimodal retrieval-augmented VLMs memorize training data compared to baseline VLMs. Using the WebQA benchmark, we contrast finetuned models with baseline VLMs on multihop retrieval and question answering, examining the impact of finetuning on data memorization. To quantify memorization in end-to-end retrieval and QA systems, we propose several proxy metrics by investigating instances where QA succeeds despite retrieval failing. In line with existing work, we find that finetuned models rely more heavily on memorization than retrieval-augmented VLMs, and achieve higher accuracy as a result (72% vs 52% on WebQA test set). Finally, we present the first empirical comparison of the parametric effect between text and visual modalities. Here, we find that image-based questions have parametric response rates that are consistently 15-25% higher than for text-based questions in the WebQA dataset. As such, our measures pose a challenge for future work, both to account for differences in model memorization across different modalities and more generally to reconcile memorization and generalization in joint Retrieval-QA tasks. 2025.l2m2-1.10 @@ -154,7 +154,7 @@ AlisaStoll LukasLangeRobert Bosch GmbH, Bosch HeikeAdelHochschule der Medien (University of Applied Sciences) - HinrichSchuetze + HinrichSchuetze JannikStrötgenKarlsruhe University of Applied Sciences 150-168 Adapting large language models (LLMs) to new and diverse knowledge is essential for their lasting effectiveness in real-world applications. This survey provides an overview of state-of-the-art methods for expanding the knowledge of LLMs, focusing on integrating various knowledge types, including factual information, domain expertise, language proficiency, and user preferences. We explore techniques, such as continual learning, model editing, and retrieval-based explicit adaptation, while discussing challenges like knowledge consistency and scalability. Designed as a guide for researchers and practitioners, this survey sheds light on opportunities for advancing LLMs as adaptable and robust knowledge systems. @@ -203,7 +203,7 @@ SantoshT.y.s.s YanaiElazarAllen Institute for Artificial Intelligence and Department of Computer Science QuirinVogelAlpen-Adria Universität Klagenfurt - BarbaraPlankLudwig-Maximilians-Universität München + BarbaraPlankLudwig-Maximilians-Universität München MatthiasGrabmairTechnische Universität München 205-226 Recent works have shown that Large Language Models (LLMs) have a tendency to memorize patterns and biases present in their training data, raising important questions about how such memorized content influences model behavior. One such concern is the emergence of political bias in LLM outputs. In this paper, we investigate the extent to which LLMs’ political leanings reflect memorized patterns from their pretraining corpora. We propose a method to quantitatively evaluate political leanings embedded in the large pretraining corpora. Subsequently we investigate to whom are the LLMs’ political leanings more aligned with, their pretrainig corpora or the surveyed human opinions. As a case study, we focus on probing the political leanings of LLMs in 32 U.S. Supreme Court cases, addressing contentious topics such as abortion and voting rights. Our findings reveal that LLMs strongly reflect the political leanings in their training data, and no strong correlation is observed with their alignment to human opinions as expressed in surveys. These results underscore the importance of responsible curation of training data, and the methodology for auditing the memorization in LLMs to ensure human-AI alignment. diff --git a/data/xml/2025.latechclfl.xml b/data/xml/2025.latechclfl.xml index accaf5e7e2..506f8b7505 100644 --- a/data/xml/2025.latechclfl.xml +++ b/data/xml/2025.latechclfl.xml @@ -4,7 +4,7 @@ Proceedings of the 9th Joint SIGHUM Workshop on Computational Linguistics for Cultural Heritage, Social Sciences, Humanities and Literature (LaTeCH-CLfL 2025) AnnaKazantseva - StanSzpakowicz + StanSzpakowicz StefaniaDegaetano-Ortlieb YuriBizzoni JanisPagel @@ -219,7 +219,7 @@ Prompting the Past: Exploring Zero-Shot Learning for Named Entity Recognition in Historical Texts Using Prompt-Answering <fixed-case>LLM</fixed-case>s CrinaTudorStockholm University - BeataMegyesiuu.se + BeataMegyesiuu.se RobertÖstlingDepartment of Linguistics, Stockholm University 216-226 This paper investigates the application of prompt-answering Large Language Models (LLMs) for the task of Named Entity Recognition (NER) in historical texts. Historical NER presents unique challenges due to language change through time, spelling variation, limited availability of digitized data (and, in particular, labeled data), and errors introduced by Optical Character Recognition (OCR) and Handwritten Text Recognition (HTR) processes. Leveraging the zero-shot capabilities of prompt-answering LLMs, we address these challenges by prompting the model to extract entities such as persons, locations, organizations, and dates from historical documents. We then conduct an extensive error analysis of the model output in order to identify and address potential weaknesses in the entity recognition process. The results show that, while such models display ability for extracting named entities, their overall performance is lackluster. Our analysis reveals that model performance is significantly affected by hallucinations in the model output, as well as by challenges imposed by the evaluation of NER output. @@ -275,7 +275,7 @@ Identifying Small Talk in Natural Conversations SteffenFrenzelUniversity of Potsdam - AnnetteHautli-JaniszUniversity of Passau + AnnetteHautli-JaniszUniversity of Passau 272-277 Small talk is part and parcel of human interaction and is rather employed to communicate values and opinions than pure information. Despite small talk being an omnipresent phenomenon in spoken language, it is difficult to identify: Small talk is situated, i.e., for interpreting a string of words or discourse units, outside references such as the context of the interlocutors and their previous experiences have to be interpreted.In this paper, we present a dataset of natural conversation annotated with a theoretically well-motivated distillation of what constitutes small talk. This dataset comprises of verbatim transcribed public service encounters in German authorities and are the basis for empirical work in administrative policy on how the satisfaction of the citizen manifests itself in the communication with the authorities. We show that statistical models achieve comparable results to those of state-of-the-art LLMs. 2025.latechclfl-1.24 @@ -285,7 +285,7 @@ Why Novels (Don’t) Break Through: Dynamics of Canonicity in the <fixed-case>D</fixed-case>anish Modern Breakthrough (1870-1900) AlieLasscheAarhus University - PascaleFeldkampCenter for Humanities Computing, Aarhus University + PascaleFeldkampCenter for Humanities Computing, Aarhus University YuriBizzoniAarhus University KatrineBaunvigAarhus University KristofferNielboCenter for Humanities Computing, Aarhus University diff --git a/data/xml/2025.law.xml b/data/xml/2025.law.xml index 6dd0459955..c3652d237b 100644 --- a/data/xml/2025.law.xml +++ b/data/xml/2025.law.xml @@ -56,7 +56,7 @@ Subjectivity in the Annotation of Bridging Anaphora - LaurenLevineGeorgetown University + LaurenLevineGeorgetown University AmirZeldesGeorgetown University 48-59 Bridging refers to the associative relationship between inferable entities in a discourse and the antecedents which allow us to understand them, such as understanding what “the door” means with respect to an aforementioned “house”. As identifying associative relations between entities is an inherently subjective task, it is difficult to achieve consistent agreement in the annotation of bridging anaphora and their antecedents. In this paper, we explore the subjectivity involved in the annotation of bridging instances at three levels: anaphor recognition, antecedent resolution, and bridging subtype selection. To do this, we conduct an annotation pilot on the test set of the existing GUM corpus, and propose a newly developed classification system for bridging subtypes, which we compare to previously proposed schemes. Our results suggest that some previous resources are likely to be severely under-annotated. We also find that while agreement on the bridging subtype category was moderate, annotator overlap for exhaustively identifying instances of bridging is low, and that many disagreements resulted from subjective understanding of the entities involved. @@ -67,7 +67,7 @@ The revision of linguistic annotation in the <fixed-case>U</fixed-case>niversal <fixed-case>D</fixed-case>ependencies framework: a look at the annotators’ behavior - Magali SanchesDuranUniversidade de São Paulo + Magali SanchesDuranUniversidade de São Paulo LuceleneLopesUSP - ICMC Thiago Alexandre SalgueiroPardoUniversity of São Paulo 60-69 @@ -92,7 +92,7 @@ Another Approach to Agreement Measurement and Prediction with Emotion Annotations QuanqiDuLT3, Ghent University - VeroniqueHosteLT3, Ghent University + VeroniqueHosteLT3, Ghent University 87-102 Emotion annotation, as an inherently subjective task, often suffers from significant inter-annotator disagreement when evaluated using traditional metrics like kappa or alpha. These metrics often fall short of capturing the nuanced nature of disagreement, especially in multimodal settings. This study introduces Absolute Annotation Difference (AAD), a novel metric offering a complementary perspective on inter- and intra-annotator agreement across different modalities. Our analysis reveals that AAD not only identifies overall agreement levels but also uncovers fine-grained disagreement patterns across modalities often overlooked by conventional metrics. Furthermore, we propose an AAD-based RMSE variant for predicting annotation disagreement. Through extensive experiments on the large-scale DynaSent corpus, we demonstrate that our approach significantly improves disagreement prediction accuracy, rising from 41.71% to 51.64% and outperforming existing methods. Cross-dataset prediction results suggest good generalization. These findings underscore AAD’s potential to enhance annotation agreement analysis and provide deeper insights into subjective NLP tasks. Future work will investigate its applicability to broader emotion-related tasks and other subjective annotation scenarios. 2025.law-1.7 @@ -127,7 +127,7 @@ Bootstrapping <fixed-case>UMR</fixed-case>s from <fixed-case>U</fixed-case>niversal <fixed-case>D</fixed-case>ependencies for Scalable Multilingual Annotation FedericaGambaCharles University AlexisPalmerUniversity of Colorado Boulder - DanielZemanCharles University, Faculty of Mathematics and Physics + DanielZemanCharles University, Faculty of Mathematics and Physics 126-136 Uniform Meaning Representation (UMR) is a semantic annotation framework designed to be applicable across typologically diverse languages. However, UMR annotation is a labor-intensive task, requiring significant effort and time especially when no prior annotations are available. In this paper, we present a method for bootstrapping UMR graphs by leveraging Universal Dependencies (UD), one of the most comprehensive multilingual resources, encompassing languages across a wide range of language families. Given UMR’s strong typological and cross-linguistic orientation, UD serves as a particularly suitable starting point for the conversion. We describe and evaluate an approach that automatically derives partial UMR graphs from UD trees, providing annotators with an initial representation to build upon. While UD is not a semantic resource, our method extracts useful structural information that aligns with the UMR formalism, thereby facilitating the annotation process. By leveraging UD’s broad typological coverage, this approach offers a scalable way to support UMR annotation across different languages. 2025.law-1.10 @@ -137,7 +137,7 @@ Classifying <fixed-case>TEI</fixed-case> Encoding for <fixed-case>D</fixed-case>utch<fixed-case>D</fixed-case>ra<fixed-case>C</fixed-case>or with Transformer Models FlorianDebaeneGhent University - VeroniqueHosteLT3, Ghent University + VeroniqueHosteLT3, Ghent University 137-141 Computational Drama Analysis relies on well-structured textual data, yet many dramatic works remain in need of encoding. The Dutch dramatic tradition is one such an example, with currently 180 plays available in the DraCor database, while many more plays await integration still. To facilitate this process, we propose a semi-automated TEI encoding annotation methodology using transformer encoder language models to classify structural elements in Dutch drama. We fine-tune 4 Dutch models on the DutchDraCor dataset to predict the 9 most relevant labels used in the DraCor TEI encoding, experimenting with 2 model input settings. Our results show that incorporating additional context through beginning-of-sequence (BOS) and end-of-sequence (EOS) tokens greatly improves performance, increasing the average macro F1 score across models from 0.717 to 0.923 (+0.206). Using the best-performing model, we generate silver-standard DraCor labels for EmDComF, an unstructured corpus of early modern Dutch comedies and farces, paving the way for its integration into DutchDraCor after validation. 2025.law-1.11 @@ -148,7 +148,7 @@ Label Bias in Symbolic Representation of Meaning MarieMikulováCharles University JanŠtěpánekCharles University in Prague, Faculty of Mathematics and Physics, UFAL - JanHajičCharles University + JanHajičCharles University 142-159 This paper contributes to the trend of building semantic representations and exploring the relations between a language and the world it represents. We analyse alternative approaches to semantic representation, focusing on methodology of determining meaning categories, their arrangement and granularity, and annotation consistency and reliability. Using the task of semantic classification of circumstantial meanings within the Prague Dependency Treebank framework, we present our principles for analyzing meaning categories. Compared with the discussed projects, the unique aspect of our approach is its focus on how a language, in its structure, reflects reality. We employ a two-level classification: a higher, coarse-grained set of general semantic concepts (defined by questions: where, how, why, etc.) and a fine-grained set of circumstantial meanings based on data-driven analysis, reflecting meanings fixed in the language. We highlight that the inherent vagueness of linguistic meaning is crucial for capturing the limitless variety of the world but it can lead to label biases in datasets. Therefore, besides semantically clear categories, we also use fuzzy meaning categories. 2025.law-1.12 @@ -229,9 +229,9 @@ Creating Hierarchical Relations in a Multilingual Event-type Ontology - ZdeňkaUrešováCharles University - EvaFučíkováCharles University - JanHajičCharles University + ZdeňkaUrešováCharles University + EvaFučíkováCharles University + JanHajičCharles University 240-249 This paper describes the work on hierarchization of the SynSemClass event-type ontology. The original resource has been extended by a hierarchical structure to model specialization and generalization relations between classes that are formally and technically unrelated in the original ontology. The goal is to enable one to use the ontology enriched by the hierarchical concepts for annotation of running texts in symbolic meaning representations, such as UMR or PDT. The hierarchy is in principle built bottom-up, based on existing SSC classes (concepts). This approach differs from other approaches to semantic classes, such as in WordNet or VerbNet. Although the hierarchical relations are similar, the underlying nodes in the hierarchy are not. In this paper, we describe the challenges related to the principles chosen: single-tree constraint and finding features for the definitions of specificity/generality. Also, a pilot inter-annotator experiment is described that shows the difficulty of the hierarchization task. 2025.law-1.19 @@ -265,7 +265,7 @@ Variety delights (sometimes) - Annotation differences in morphologically annotated corpora AndreaDömötörELTE Department of Digital Humanities BalázsIndigELTE Faculty of Informatics - Dávid MárkNemeskeyELTE DH + Dávid MárkNemeskeyELTE DH 270-278 The goal of annotation standards is to ensure consistency across different corpora and languages. But do they succeed? In our paper we experiment with morphologically annotated Hungarian corpora of different sizes (ELTE DH gold standard corpus, NYTK-NerKor, and Szeged Treebank) to assess their compatibility as a merged training corpus for morphological analysis and disambiguation. Our results show that combining any two corpora not only failed to improve the results of the trained tagger but even degraded them due the inconsistent annotations. Further analysis of the annotation differences among the corpora revealed inconsistencies of several sources: different theoretical approach, lack of consensus, and tagset conversion issues. 2025.law-1.22 @@ -285,7 +285,7 @@ Illuminating Logical Fallacies with the <fixed-case>CAMPFIRE</fixed-case> Corpus AustinBlodgettUS Army Research Lab - ClaireBonialUS Army Research Lab + ClaireBonialUS Army Research Lab Taylor A.PellegrinARL MelissaTorgbiUniversity of Bath HarishTayyar MadabushiUniversity of Bath @@ -313,7 +313,7 @@ OmarMomenUniversity of Bielefeld FlorianSteigUniversity of Bielefeld J. BerenikeHerrmannUniversity of Bielefeld - SinaZarrießUniversity of Bielefeld + SinaZarrießUniversity of Bielefeld 308-325 Descriptions are a central component of literary texts, yet their systematic identification remains a challenge. This work suggests an approach to identifying sentences describing spatial conditions in literary text. It was developed iteratively on German literary text and extended to non-literary text to evaluate its applicability across textual domains. To assess the robustness of the method, we involved both humans and a selection of state-of-the-art Large Language Models (LLMs) in annotating a collection of sentences regarding their descriptiveness and spatiality. We compare the annotations across human annotators and between humans and LLMs. The main contributions of this paper are: (1) a set of annotation guidelines for identifying spatial descriptions in literary texts, (2) a curated dataset of almost 4,700 annotated sentences of which around 500 are spatial descriptions, produced through in-depth discussion and consensus among annotators, and (3) a pilot study of automating the task of spatial description annotation of German texts. We publish the codes and all human and LLM annotations for the public to be used for research purposes only. 2025.law-1.26 @@ -338,7 +338,7 @@ NunoGuimarãesFCUP and INESC TEC RitaRb-SilvaCI-IPOP and Rise-Health Luís FilipeCunhaUniversity of Porto, University of Minho and INESC TEC - AlípioJorgeFCUP and INESC TEC + AlípioJorgeFCUP and INESC TEC 332-343 The development of a robust annotation scheme and corresponding guidelines is crucial for producing annotated datasets that advance both linguistic and computational research. This paper presents a case study that outlines a methodology for designing an annotation scheme and its guidelines, specifically aimed at representing morphosyntactic and semantic information regarding temporal features, as well as medical information in medical reports written in Portuguese. We detail a multi-step process that includes reviewing existing frameworks, conducting an annotation experiment to determine the optimal approach, and designing a model based on these findings. We validated the approach through a pilot experiment where we assessed the reliability and applicability of the annotation scheme and guidelines. In this experiment, two annotators independently annotated a patient’s medical report consisting of six documents using the proposed model, while a curator established the ground truth. The analysis of inter-annotator agreement and the annotation results enabled the identification of sources of human variation and provided insights for further refinement of the annotation scheme and guidelines. 2025.law-1.28 diff --git a/data/xml/2025.llmsec.xml b/data/xml/2025.llmsec.xml index bdc87afb56..f58c373d1e 100644 --- a/data/xml/2025.llmsec.xml +++ b/data/xml/2025.llmsec.xml @@ -11,7 +11,7 @@ llmsec ws 979-8-89176-279-4 - LeonDerczynski + LeonDerczynski JekaterinaNovikova MuhaoChen @@ -69,7 +69,7 @@ QunLiuHuawei Noah’s Ark Lab YaqianZhouFudan University XipengQiuFudan University - XuanjingHuangFudan University + XuanjingHuangFudan University 26-47 Recent advancements in model architectures and length extrapolation techniques have significantly extended the context length of large language models (LLMs), paving the way for their application in increasingly complex tasks. However, despite the growing capabilities of long-context LLMs, the safety issues in long-context scenarios remain underexplored. While safety alignment in short context has been widely studied, the safety concerns of long-context LLMs have not been adequately addressed. In this work, we introduce ${textbf{LongSafety}}$, a comprehensive safety alignment dataset for long-context LLMs, containing 10 tasks and 17k samples, with an average length of 40.9k tokens. Our experiments demonstrate that training with LongSafety can enhance long-context safety performance while enhancing short-context safety and preserving general capabilities. Furthermore, we demonstrate that long-context safety does not equal long-context alignment with short-context safety data and LongSafety has generalizing capabilities in context length and long-context safety scenarios. 2025.llmsec-1.4 @@ -142,7 +142,7 @@ Fine-Tuning Lowers Safety and Disrupts Evaluation Consistency - Kathleen C.FraserNational Research Council Canada + Kathleen C.FraserNational Research Council Canada HillaryDawkinsNational Research Council Canada IsarNejadgholiNational Research Council Canada SvetlanaKiritchenkoNational Research Council Canada diff --git a/data/xml/2025.lm4uc.xml b/data/xml/2025.lm4uc.xml index 6d8118eac2..a5f1109111 100644 --- a/data/xml/2025.lm4uc.xml +++ b/data/xml/2025.lm4uc.xml @@ -17,7 +17,7 @@ DucNguyen AngelinaWang DanielHo - AliceOh + AliceOh SanmiKoyejo @@ -49,7 +49,7 @@ <fixed-case>ABDUL</fixed-case>: A New Approach to Build Language Models for Dialects Using Formal Language Corpora Only YassineToughraiUniversité de Lorraine - KamelSmaïliUniversité de Lorraine + KamelSmaïliUniversité de Lorraine DavidLangloisUniversité de Lorraine 16-21 Arabic dialects present major challenges for natural language processing (NLP) due to their diglossic nature, phonetic variability, and the scarcity of resources. To address this, we introduce a phoneme-like transcription approach that enables the training of robust language models for North African Dialects (NADs) using only formal language data, without the need for dialect-specific corpora.Our key insight is that Arabic dialects are highly phonetic, with NADs particularly influenced by European languages. This motivated us to develop a novel approach in which we convert Arabic script into a Latin-based representation, allowing our language model, ABDUL, to benefit from existing Latin-script corpora.Our method demonstrates strong performance in multi-label emotion classification and named entity recognition (NER) across various Arabic dialects. ABDUL achieves results comparable to or better than specialized and multilingual models such as DarijaBERT, DziriBERT, and mBERT. Notably, in the NER task, ABDUL outperforms mBERT by 5% in F1-score for Modern Standard Arabic (MSA), Moroccan, and Algerian Arabic, despite using a vocabulary four times smaller than mBERT. @@ -74,7 +74,7 @@ Serving the Underserved: Leveraging <fixed-case>BARTB</fixed-case>ahnar Language Model for Bahnaric-<fixed-case>V</fixed-case>ietnamese Translation - LongNguyenHo Chi Minh City University of Technology - VNU-HCM + LongNguyenHo Chi Minh City University of Technology - VNU-HCM TranLeHo Chi Minh City University of Technology - VNU-HCM HuongNguyenHo Chi Minh City University of Technology - VNU-HCM QuynhVoHo Chi Minh City University of Technology - VNU-HCM diff --git a/data/xml/2025.loreslm.xml b/data/xml/2025.loreslm.xml index 95272a3132..09629124ee 100644 --- a/data/xml/2025.loreslm.xml +++ b/data/xml/2025.loreslm.xml @@ -6,7 +6,7 @@ HansiHettiarachchi TharinduRanasinghe PaulRayson - RuslanMitkov + RuslanMitkov MohamedGaber DamithPremasiri Fiona AntingTan @@ -49,9 +49,9 @@ ImaneMomayiz XuguangRen EricMoulines - PreslavNakov + PreslavNakov MichalisVazirgiannis - EricXing + EricXing 9–30 We introduce Atlas-Chat, the first-ever collection of LLMs specifically developed for dialectal Arabic. Focusing on Moroccan Arabic, also known as Darija, we construct our instruction dataset by consolidating existing Darija language resources, creating novel datasets both manually and synthetically, and translating English instructions with stringent quality control. Atlas-Chat-2B, 9B, and 27B models, fine-tuned on the dataset, exhibit superior ability in following Darija instructions and performing standard NLP tasks. Notably, our models outperform both state-of-the-art and Arabic-specialized LLMs like LLaMa, Jais, and AceGPT, e.g., our 9B model gains a 13% performance boost over a larger 13B model on DarijaMMLU, in our newly introduced evaluation suite for Darija covering both discriminative and generative tasks. Furthermore, we perform an experimental analysis of various fine-tuning strategies and base model choices to determine optimal configurations. All our resources are publicly accessible, and we believe our work offers comprehensive design methodologies of instruction-tuning for low-resource languages, which are often neglected in favor of data-rich languages by contemporary LLMs. 2025.loreslm-1.2 @@ -124,7 +124,7 @@ <fixed-case>F</fixed-case>ilipino Benchmarks for Measuring Sexist and Homophobic Bias in Multilingual Language Models from <fixed-case>S</fixed-case>outheast <fixed-case>A</fixed-case>sia Lance Calvin LimGamboa - MarkLee + MarkLee 123–134 Bias studies on multilingual models confirm the presence of gender-related stereotypes in masked models processing languages with high NLP resources. We expand on this line of research by introducing Filipino CrowS-Pairs and Filipino WinoQueer: benchmarks that assess both sexist and anti-queer biases in pretrained language models (PLMs) handling texts in Filipino, a low-resource language from the Philippines. The benchmarks consist of 7,074 new challenge pairs resulting from our cultural adaptation of English bias evaluation datasets—a process that we document in detail to guide similar forthcoming efforts. We apply the Filipino benchmarks on masked and causal multilingual models, including those pretrained on Southeast Asian data, and find that they contain considerable amounts of bias. We also find that for multilingual models, the extent of bias learned for a particular language is influenced by how much pretraining data in that language a model was exposed to. Our benchmarks and insights can serve as a foundation for future work analyzing and mitigating bias in multilingual models. 2025.loreslm-1.9 @@ -264,7 +264,7 @@ Exploiting Task Reversibility of <fixed-case>DRS</fixed-case> Parsing and Generation: Challenges and Insights from a Multi-lingual Perspective Muhammad SaadAmin LucaAnselma - AlessandroMazzei + AlessandroMazzei 268–286 Semantic parsing and text generation exhibit reversible properties when utilizing Discourse Representation Structures (DRS). However, both processes—text-to-DRS parsing and DRS-to-text generation—are susceptible to errors. In this paper, we exploit the reversible nature of DRS to explore both error propagation, which is commonly seen in pipeline methods, and the less frequently studied potential for error correction. We investigate two pipeline approaches: Parse-Generate-Parse (PGP) and Generate-Parse-Generate (GPG), utilizing pre-trained language models where the output of one model becomes the input for the next. Our evaluation uses the Parallel Meaning Bank dataset, focusing on Urdu as a low-resource language, Italian as a mid-resource language, and English serving as a high-resource baseline. Our analysis highlights that while pipelines are theoretically suited for error correction, they more often propagate errors, with Urdu exhibiting the greatest sensitivity, Italian showing a moderate effect, and English demonstrating the highest stability. This variation highlights the unique challenges faced by low-resource languages in semantic processing tasks. Further, our findings suggest that these pipeline methods support the development of more linguistically balanced datasets, enabling a comprehensive assessment across factors like sentence structure, length, type, polarity, and voice. Our cross-linguistic analysis provides valuable insights into the behavior of DRS processing in low-resource contexts, demonstrating both the potential and limitations of reversible pipeline approaches. 2025.loreslm-1.22 @@ -274,7 +274,7 @@ <fixed-case>BBPOS</fixed-case>: <fixed-case>BERT</fixed-case>-based Part-of-Speech Tagging for <fixed-case>U</fixed-case>zbek LatofatBobojonova ArofatAkhundjanova - Phil SidneyOstheimer + Phil SidneyOstheimer SophieFellenz 287–293 This paper advances NLP research for the low-resource Uzbek language by evaluating two previously untested monolingual Uzbek BERT models on the part-of-speech (POS) tagging task and introducing the first publicly available UPOS-tagged benchmark dataset for Uzbek. Our fine-tuned models achieve 91% average accuracy, outperforming the baseline multi-lingual BERT as well as the rule-based tagger. Notably, these models capture intermediate POS changes through affixes and demonstrate context sensitivity, unlike existing rule-based taggers. @@ -355,7 +355,7 @@ Controlled Evaluation of Syntactic Knowledge in Multilingual Language Models DariaKryvosheieva - RogerLevy + RogerLevy 402–413 Language models (LMs) are capable of acquiring elements of human-like syntactic knowledge. Targeted syntactic evaluation tests have been employed to measure how well they form generalizations about syntactic phenomena in high-resource languages such as English. However, we still lack a thorough understanding of LMs’ capacity for syntactic generalizations in low-resource languages, which are responsible for much of the diversity of syntactic patterns worldwide. In this study, we develop targeted syntactic evaluation tests for three low-resource languages (Basque, Hindi, and Swahili) and use them to evaluate five families of open-access multilingual Transformer LMs. We find that some syntactic tasks prove relatively easy for LMs while others (agreement in sentences containing indirect objects in Basque, agreement across a prepositional phrase in Swahili) are challenging. We additionally uncover issues with publicly available Transformers, including a bias toward the habitual aspect in Hindi in multilingual BERT and underperformance compared to similar-sized models in XGLM-4.5B. 2025.loreslm-1.30 @@ -388,7 +388,7 @@ When <fixed-case>LLM</fixed-case>s Struggle: Reference-less Translation Evaluation for Low-resource Languages ArchchanaSindhujan DipteshKanojia - ConstantinOrasan + ConstantinOrasan ShenbinQian 437–459 This paper investigates the reference-less evaluation of machine translation for low-resource language pairs, known as quality estimation (QE). Segment-level QE is a challenging cross-lingual language understanding task that provides a quality score (0 -100) to the translated output. We comprehensively evaluate large language models (LLMs) in zero/few-shot scenarios and perform instruction fine-tuning using a novel prompt based on annotation guidelines. Our results indicate that prompt-based approaches are outperformed by the encoder-based fine-tuned QE models. Our error analysis reveals tokenization issues, along with errors due to transliteration and named entities, and argues for refinement in LLM pre-training for cross-lingual tasks. We release the data, and models trained publicly for further research. diff --git a/data/xml/2025.loresmt.xml b/data/xml/2025.loresmt.xml index 5bd3174153..a3cdebd2f4 100644 --- a/data/xml/2025.loresmt.xml +++ b/data/xml/2025.loresmt.xml @@ -3,11 +3,11 @@ Proceedings of the Eighth Workshop on Technologies for Machine Translation of Low-Resource Languages (LoResMT 2025) - Atul Kr.Ojha + Atul Kr.Ojha Chao-hongLiu EkaterinaVylomova FlammiePirinen - JonathanWashington + JonathanWashington NathanielOco XiaobingZhao Association for Computational Linguistics @@ -28,7 +28,7 @@ Comparative Evaluation of Machine Translation Models Using Human-Translated Social Media Posts as References: Human-Translated Datasets Shareefa AhmedAl Amer - Mark G.Lee + Mark G.Lee PhillipSmithUniversity of Birmingham 1-9 Machine translation (MT) of social media text presents unique challenges due to its informal nature, linguistic variations, and rapid evolution of language trends. In this paper, we propose a human-translated English dataset to Arabic, Italian, and Spanish, and a human-translated Arabic dataset to Modern Standard Arabic (MSA) and English. We also perform a comprehensive analysis of three publicly accessible MT models using human translations as a reference. We investigate the impact of social media informality on translation quality by translating the MSA version of the text and comparing BLEU and METEOR scores with the direct translation of the original social media posts. Our findings reveal that MarianMT provides the closest translations to human for Italian and Spanish among the three models, with METEOR scores of 0.583 and 0.640, respectively, while Google Translate provides the closest translations for Arabic, with a METEOR score of 0.354. By comparing the translation of the original social media posts with the MSA version, we confirm that the informality of social media text significantly impacts translation quality, with an increase of 12 percentage points in METEOR scores over the original posts. Additionally, we investigate inter-model alignment and the degree to which the output of these MT models align. @@ -59,7 +59,7 @@ From Text to Multi-Modal: Advancing Low-Resource-Language Translation through Synthetic Data Generation and Cross-Modal Alignments BushiXiao QianShen - Daisy ZheWangUniversity of Florida + Daisy ZheWangUniversity of Florida 24-35 In this study, we propose a novel paradigm for multi-modal low resource language dataset generation that eliminates dependency on existing parallel multi-modal datasets. Leveraging advances in large image-generation models, we introduce a systematic pipeline that transforms text-only parallel corpora into rich multi-modal translation datasets. We then validate the generated content through human evaluation. We design and implement a new MMT model framework suitable for our new generated dataset. The model contains a verification mechanism with a large language model to ensure consistency between visual content and textual translations. Experimental results across four African low-resource languages with less than 10k training corpus demonstrate significant improvements over NLLB baselines, with average gains of up to 9.8% in BLEU score and 4.3% in METEOR score. Our method shows particular effectiveness in correctly translating concrete objects and contextual elements, suggesting its potential for improving low-resource machine translation through visual grounding. 2025.loresmt-1.4 @@ -104,7 +104,7 @@ JunyoungLeeHome Team Science and Technology Agency MarcoCognettaTokyo Institute of Technology, Tokyo Institute of Technology and Google SangwhanMoonGoogle and Tokyo Institute of Technology - NaoakiOkazakiInstitute of Science Tokyo + NaoakiOkazakiInstitute of Science Tokyo 66-80 Subword tokenization, where text is represented in an intermediate form between full words and characters, is ubiquitous in modern NLP due to its ability to represent any input sentence with a small vocabulary. However for Korean, where there are 11,172 base characters (*syllables*) in its alphabet, it is difficult to have a vocabulary large enough to succinctly encode text while fitting within parameter-budget constraints. This motivates us to explore an alternative representation for Korean which relies on the decompositional nature of Korean syllables: a syllable can be uniquely decomposed into a sequence of two or three subcharacters (*jamo*), of which there are only 68.Using jamo as the basis for subword tokenization (e.g., byte-pair encoding) leads to shorter tokenized sequences with fewer vocabulary parameters, exposes the model to sub-syllable-level morphological information, and increases the amount of augmentation gained from subword regularization. We evaluate jamo-level subword tokenization on several Korean translation tasks and find that jamo-level subword models consistently outperform syllable- and byte-level models in low-resource and restricted-vocabulary settings. 2025.loresmt-1.8 @@ -130,8 +130,8 @@ Ryan AndrewChi LucasHuang Ethan AChiHudson River Trading - R. ThomasMcCoyYale University - DragomirRadevYale University + R. ThomasMcCoyYale University + DragomirRadevYale University 105-114 We introduce ModeLing, a novel benchmark of Linguistics Olympiad-style puzzles which tests few-shot reasoning in AI systems. Solving these puzzles necessitates inferring aspects of a language’s grammatical structure from a small number of examples. Such puzzles provide a natural testbed for language models, as they require compositional generalization and few-shot inductive reasoning. Consisting solely of new puzzles written specifically for this work, ModeLing has no risk of appearing in the training data of existing AI systems: this ameliorates the risk of data leakage, a potential confounder for many prior evaluations of reasoning. Evaluating several large open source language models and GPT on our benchmark, we observe non-negligible accuracy, demonstrating few-shot emergent reasoning ability which cannot merely be attributed to shallow memorization. However, imperfect model performance suggests that ModeLing can be used to measure further progress in linguistic reasoning. 2025.loresmt-1.10 @@ -165,7 +165,7 @@ Low-Resource Transliteration for <fixed-case>R</fixed-case>oman-<fixed-case>U</fixed-case>rdu and <fixed-case>U</fixed-case>rdu Using Transformer-Based Models UmerButtGerman Research Center for AI and Universität des Saarlandes StalinVaranasi - GünterNeumannGerman Research Center for AI + GünterNeumannGerman Research Center for AI 144-153 As the Information Retrieval (IR) field increasingly recognizes the importance of inclusivity, addressing the needs of low-resource languages remains a significant challenge. Transliteration between Urdu and its Romanized form, Roman Urdu, remains underexplored despite the widespread use of both scripts in South Asia. Prior work using RNNs on the Roman-Urdu-Parl dataset showed promising results but suffered from poor domain adaptability and limited evaluation. We propose a transformer-based approach using the m2m100 multilingual translation model, enhanced with masked language modeling (MLM) pretraining and fine-tuning on both Roman-Urdu-Parl and the domain diverse Dakshina dataset. To address previous evaluation flaws, we introduce rigorous dataset splits and assess performance using BLEU, character-level BLEU, and CHRF. Our model achieves strong transliteration performance, with Char-BLEU scores of 96.37 for Urdu→Roman-Urdu and 97.44 for Roman-Urdu→Urdu. These results outperform both RNN baselines and GPT-4o Mini and demonstrate the effectiveness of multilingual transfer learning for low-resource transliteration tasks. 2025.loresmt-1.13 @@ -179,7 +179,7 @@ AlexMiłowski ThomVaughan SaraHincapie-Monsalve - PedroOrtiz Suarez + PedroOrtiz Suarez KurtBollacker 154-160 The MLCommons Datasets Working Group presents a comprehensive initiative to advance the development and accessibility of artificial intelligence (AI) training and testing resources. This paper introduces three key projects aimed at addressing critical gaps in the AI data ecosystem: the Unsupervised People’s Speech Dataset, containing over 821,000 hours of speech across 89+ languages; a strategic collaboration with Common Crawl to enhance web crawling capabilities for low-resource languages; and a framework for knowledge graph extraction evaluation. By focusing on languages other than English (LOTE) and creating permissively licensed, high-quality datasets, these initiatives aim to democratize AI development and improve model performance across diverse linguistic contexts. This work represents a significant step toward more inclusive and capable AI systems that can serve global communities. @@ -206,7 +206,7 @@ NadiaHajipourInstitute for Humanities and Cultural Studies SadraSabouri EhsaneddinAsgariQatar Computing Research Institute and University of California, Berkeley - HosseinSametiSharif University of Technology + HosseinSametiSharif University of Technology 171-182 The Pahlavi language, aka Middle Persian, is a critical part of Persian cultural and historical heritage which bridges the Old Persian and Modern Persian (Farsi). However, due to its limited digital presence and the scarcity of comprehensive linguistic resources, Pahlavi is at risk of extinction. As an early attempt to preserve this language, this study introduces a framework to translate English text into Pahlavi. Our approach combines grammar-guided term extraction with zero-shot translation, leveraging large language models (LLMs) to generate syntactically and semantically accurate Pahlavi sentences.This framework aims to preserve the Pahlavi language and serves as a model for reviving other endangered languages with similar characteristics. Finally using our framework, we generate a novel dataset of 360 expert-validated parallel English-Pahlavi texts. 2025.loresmt-1.16 diff --git a/data/xml/2025.magmar.xml b/data/xml/2025.magmar.xml index 8b4a897e0a..8b67945238 100644 --- a/data/xml/2025.magmar.xml +++ b/data/xml/2025.magmar.xml @@ -40,7 +40,7 @@ NiloufarBaba AhmadiUniversity of Hamburg, Germany IrisVogelUniversity of Hamburg, Germany MartinSemmannUniversity of Hamburg, Germany - ChrisBiemannUniversity of Hamburg, Germany + ChrisBiemannUniversity of Hamburg, Germany 18-39 In this paper, we introduce CollEx, an innovative multimodal agentic Retrieval-Augmented Generation (RAG) system designed to enhance interactive exploration of extensive scientific collections. Given the overwhelming volume and inherent complexity of scientific collections, conventional search systems often lack necessary intuitiveness and interactivity, presenting substantial barriers for learners, educators, and researchers. CollEx addresses these limitations by employing state-of-the-art Large Vision-Language Models (LVLMs) as multimodal agents accessible through an intuitive chat interface. By abstracting complex interactions via specialized agents equipped with advanced tools, CollEx facilitates curiosity-driven exploration, significantly simplifying access to diverse scientific collections and records therein. Our system integrates textual and visual modalities, supporting educational scenarios that are helpful for teachers, pupils, students, and researchers by fostering independent exploration as well as scientific excitement and curiosity. Furthermore, CollEx serves the research community by discovering interdisciplinary connections and complementing visual data. We illustrate the effectiveness of our system through a proof-of-concept application containing over 64,000 unique records across 32 collections from a local scientific collection from a public university. 2025.magmar-1.2 @@ -50,7 +50,7 @@ <fixed-case>V</fixed-case>ox<fixed-case>RAG</fixed-case>: A Step Toward Transcription-Free <fixed-case>RAG</fixed-case> Systems in Spoken Question Answering ZackaryRackauckasColumbia University - JuliaHirschbergColumbia University + JuliaHirschbergColumbia University 40-46 We introduce VoxRAG, a modular speech-to-speech retrieval-augmented generation system that bypasses transcription to retrieve semantically relevant audio segments directly from spoken queries. VoxRAG employs silence-aware segmentation, speaker diarization, CLAP audio embeddings, and FAISS retrieval using L2-normalized cosine similarity. We construct a 50-query test set recorded as spoken input by a native English speaker. Retrieval quality was evaluated using LLM-as-a-judge annotations. For very relevant segments, cosine similarity achieved a Recall@10 of 0.34. For somewhat relevant segments, Recall@10 rose to 0.60 and nDCG@10 to 0.27, highlighting strong topical alignment. Answer quality was judged on a 0–2 scale across relevance, accuracy, completeness, and precision, with mean scores of 0.84, 0.58, 0.56, and 0.46 respectively. While precision and retrieval quality remain key limitations, VoxRAG shows that transcription-free speech-to-speech retrieval is feasible in RAG systems. 2025.magmar-1.3 diff --git a/data/xml/2025.mcg.xml b/data/xml/2025.mcg.xml index 4833ba76f4..f6a344cb81 100644 --- a/data/xml/2025.mcg.xml +++ b/data/xml/2025.mcg.xml @@ -7,10 +7,10 @@ María EstrellaVallecillo-Rodríguez IruneZubiaga ArturoMontejo-Ráez - AitorSoroa - María TeresaMartín-Valdivia + AitorSoroa + María TeresaMartín-Valdivia MarcoGuerini - RodrigoAgerri + RodrigoAgerri Association for Computational Linguistics
Abu Dhabi, UAE
January diff --git a/data/xml/2025.mtsummit.xml b/data/xml/2025.mtsummit.xml index de848e3637..f52e4290b1 100644 --- a/data/xml/2025.mtsummit.xml +++ b/data/xml/2025.mtsummit.xml @@ -3,11 +3,11 @@ Proceedings of Machine Translation Summit XX: Volume 1 - PierretteBouillon + PierretteBouillon JohannaGerlach SabrinaGirletti LiseVolkart - RaphaelRubino + RaphaelRubino RicoSennrich Ana C.Farinha MarcoGaido @@ -82,8 +82,8 @@ Optimizing the Training Schedule of Multilingual <fixed-case>NMT</fixed-case> using Reinforcement Learning AlexisAllemann - Àlex R.Atrio - AndreiPopescu-Belis + Àlex R.Atrio + AndreiPopescu-Belis 65–80 Multilingual NMT is a viable solution for translating low-resource languages (LRLs) when data from high-resource languages (HRLs) from the same language family is available. However, the training schedule, i.e. the order of presentation of languages, has an impact on the quality of such systems. Here, in a many-to-one translation setting, we propose to apply two algorithms that use reinforcement learning to optimize the training schedule of NMT: (1) Teacher-Student Curriculum Learning and (2) Deep Q Network. The former uses an exponentially smoothed estimate of the returns of each action based on the loss on monolingual or multilingual development subsets, while the latter estimates rewards using an additional neural network trained from the history of actions selected in different states of the system, together with the rewards received. On a 8-to-1 translation dataset with LRLs and HRLs, our second method improves BLEU and COMET scores with respect to both random selection of monolingual batches and shuffled multilingual batches, by adjusting the number of presentations of LRL vs. HRL batches. 2025.mtsummit-1.6 @@ -175,7 +175,7 @@ Testing <fixed-case>LLM</fixed-case>s’ Capabilities in Annotating Translations Based on an Error Typology Designed for <fixed-case>LSP</fixed-case> Translation: First Experiments with <fixed-case>C</fixed-case>hat<fixed-case>GPT</fixed-case> JoachimMinder GuillaumeWisniewski - NatalieKübler + NatalieKübler 190–203 This study investigates the capabilities of large language models (LLMs), specifically ChatGPT, in annotating MT outputs based on an error typology. In contrast to previous work focusing mainly on general language, we explore ChatGPT’s ability to identify and categorise errors in specialised translations. By testing two different prompts and based on a customised error typology, we compare ChatGPT annotations with human expert evaluations of translations produced by DeepL and ChatGPT itself. The results show that, for translations generated by DeepL, recall and precision are quite high. However, the degree of accuracy in error categorisation depends on the prompt’s specific features and its level of detail, ChatGPT performing very well with a detailed prompt. When evaluating its own translations, ChatGPT achieves significantly poorer results, revealing limitations with self-assessment. These results highlight both the potential and the limitations of LLMs for translation evaluation, particularly in specialised domains. Our experiments pave the way for future research on open-source LLMs, which could produce annotations of comparable or even higher quality. In the future, we also aim to test the practical effectiveness of this automated evaluation in the context of translation training, particularly by optimising the process of human evaluation by teachers and by exploring the impact of annotations by LLMs on students’ post-editing and translation learning. 2025.mtsummit-1.15 @@ -221,8 +221,8 @@ Intrinsic vs. Extrinsic Evaluation of <fixed-case>C</fixed-case>zech Sentence Embeddings: Semantic Relevance Doesn’t Help with <fixed-case>MT</fixed-case> Evaluation - PetraBarančíková - OndřejBojar + PetraBarančíková + OndřejBojar 265–275 In this paper, we compare Czech-specific and multilingual sentence embedding models through intrinsic and extrinsic evaluation paradigms. For intrinsic evaluation, we employ Costra, a complex sentence transformation dataset, and several Semantic Textual Similarity (STS) benchmarks to assess the ability of the embeddings to capture linguistic phenomena such as semantic similarity, temporal aspects, and stylistic variations. In the extrinsic evaluation, we fine-tune each embedding model using COMET-based metrics for machine translation evaluation. Our experiments reveal an interesting disconnect: models that excel in intrinsic semantic similarity tests do not consistently yield superior performance on downstream translation evaluation tasks. Conversely, models with seemingly over-smoothed embedding spaces can, through fine-tuning, achieve excellent results. These findings highlight the complex relationship between semantic property probes and downstream task, emphasizing the need for more research into “operationalizable semantics” in sentence embeddings, or more in-depth downstream tasks datasets (here translation evaluation). 2025.mtsummit-1.20 @@ -292,7 +292,7 @@ YashBhaskar KetakiShetye VandanMujadia - Dipti MisraSharma + Dipti MisraSharma ParameswariKrishnamurthy 344–352 This study addresses the critical challenge of data scarcity in machine translation for Indian languages, particularly given their morphological complexity and limited parallel data. We investigate an effective strategy to maximize the utility of existing data by generating negative samples from positive training instances using a progressive perturbation approach. This is used for aligning the model with preferential data using Kahneman-Tversky Optimization (KTO). Comparing it against traditional Supervised Fine-Tuning (SFT), we demonstrate how generating negative samples and leveraging KTO enhances data efficiency. By creating rejected samples through progressively perturbed translations from the available dataset, we fine-tune the Llama 3.1 Instruct 8B model using QLoRA across 16 language directions, including English, Hindi, Bangla, Tamil, Telugu, and Santali. Our results show that KTO-based preference alignment with progressive perturbation consistently outperforms SFT, achieving significant gains in translation quality with an average BLEU increase of 1.84 to 2.47 and CHRF increase of 2.85 to 4.01 compared to SFT for selected languages, while using the same positive training samples and under similar computational constraints. This highlights the potential of our negative sample generation strategy within KTO, especially in low resource scenarios. @@ -302,7 +302,7 @@ Leveraging Visual Scene Graph to Enhance Translation Quality in Multimodal Machine Translation AliHatami - MihaelArcan + MihaelArcan PaulBuitelaar 353–364 Despite significant advancements in Multimodal Machine Translation, understanding and effectively utilising visual scenes within multimodal models remains a complex challenge. Extracting comprehensive and relevant visual features requires extensive and detailed input data to ensure the model accurately captures objects, their attributes, and relationships within a scene. In this paper, we explore using visual scene graphs extracted from images to enhance the performance of translation models. We investigate this approach for integrating Visual Scene Graph information into translation models, focusing on representing this information in a semantic structure rather than relying on raw image data. The performance of our approach was evaluated on the Multi30K dataset for English into German, French, and Czech translations using BLEU, chrF2, TER and COMET metrics. Our results demonstrate that utilising visual scene graph information improves translation performance. Using information on semantic structure can improve the multimodal baseline model, leading to better contextual understanding and translation accuracy. @@ -331,7 +331,7 @@ Quality Estimation and Post-Editing Using <fixed-case>LLM</fixed-case>s For <fixed-case>I</fixed-case>ndic Languages: How Good Is It? AnushkaSingh AaryaPakhale - Mitesh M.Khapra + Mitesh M.Khapra RajDabre 388–398 Recently, there have been increasing efforts on Quality Estimation (QE) and Post-Editing (PE) using Large Language Models (LLMs) for Machine Translation (MT). However, the focus has mainly been on high resource languages and the approaches either rely on prompting or combining existing QE models with LLMs, instead of single end-to-end systems. In this paper, we investigate the efficacy of end-to-end QE and PE systems for low-resource languages taking 5 Indian languages as a use-case. We augment existing QE data containing multidimentional quality metric (MQM) error annotations with explanations of errors and PEs with the help of proprietary LLMs (GPT-4), following which we fine-tune Gemma-2-9B, an open-source multilingual LLM to perform QE and PE jointly. While our models attain QE capabilities competitive with or surpassing existing models in both referenceful and referenceless settings, we observe that they still struggle with PE. Further investigation reveals that this occurs because our models lack the ability to accurately identify fine-grained errors in the translation, despite being excellent indicators of overall quality. This opens up opportunities for research in end-to-end QE and PE for low-resource languages. @@ -476,7 +476,7 @@ Improving <fixed-case>MT</fixed-case>-enabled Triage Performance with Multiple <fixed-case>MT</fixed-case> Outputs - Marianna J.Martindale + Marianna J.Martindale MarineCarpuat 592–607 Recent advances in Machine Translation (MT) quality may motivate adoption in a variety of use cases, but the success of MT deployment depends not only on intrinsic model quality but on how well the model, as deployed, helps users meet the objectives of their use case. This work focuses on a specific triage use case, MT-enabled scanning in intelligence analysis. After describing the use case with its objectives and failure modes, we present a user study to establish a baseline performance level and measure the mitigating effects of a simple intervention, providing additional MT outputs. We find significant improvements in relevance judgment accuracy with outputs from two distinct neural MT models and significant improvements in relevant entity identification with the addition of a rule-based MT. Users also like seeing multiple MT outputs, making it an appealing way to improve MT-enabled scanning performance. @@ -507,15 +507,15 @@ Proceedings of Machine Translation Summit XX: Volume 2 - PierretteBouillon + PierretteBouillon JohannaGerlach SabrinaGirletti LiseVolkart - RaphaelRubino + RaphaelRubino RicoSennrich SamuelLäubli MartinVolk - MiquelEsplà-Gomis + MiquelEsplà-Gomis VincentVandeghinste HelenaMoniz SaraSzoc @@ -553,7 +553,7 @@ Speech-to-Speech Translation Pipelines for Conversations in Low-Resource Languages - AndreiPopescu-Belis + AndreiPopescu-Belis AlexisAllemann TeoFerrari GopalKrishnamani @@ -623,7 +623,7 @@ <fixed-case>Z</fixed-case>u<fixed-case>B</fixed-case>idasoa: Participatory Research for the Development of Linguistic Technologies Adapted to the Needs of Migrants in the <fixed-case>B</fixed-case>asque Country XabierSoto AnderEgurtzegi - MaiteOronoz + MaiteOronoz UrtziEtxeberria 75–76 Recent years have witnessed the development of advanced language technologies, including the use of audio and images as part of multimodal systems. However, these models are not adapted to the specific needs of migrants and Non-Governmental Organizations (NGOs) communicating in multilingual scenarios. In this project, we focus on the situation of migrants arriving in the Basque Country, nearby the western border between Spain and France. For identifying migrants’ needs, we have met with several organisations helping them in different stages, including: sea rescue; primary care in refugee camps and in situ; assistance with asylum demands; other administrative issues; and human rights defence in retention centres. In these interviews, Darija has been identified as the most spoken language among the under-served ones. Considering this, we have started the development of a Machine Translation (MT) system between Basque and Darija (Moroccan Arabic), based on open-source corpora. In this paper, we present the description of the project and the main results of the participatory research developed in the initial stage. @@ -687,7 +687,7 @@ Juan AntonioPérez-Ortiz FelipeSánchez-Martínez MiquelEsplà-Gomis - Víctor M.Sánchez-Cartagena + Víctor M.Sánchez-Cartagena 89–90 A significant percentage of the population of Guatemala and Mexico belongs to various Mayan indigenous communities, for whom language barriers lead to social, economic, and digital exclusion. The Mayan languages spoken by these communities remain severely underrepresented in terms of digital resources, which prevents them from leveraging the latest advances in artificial intelligence. This project addresses that problem by means of: 1) the digitisation and release of multiple printed linguistic resources; 2) the development of a high-quality parallel machine translation (MT) evaluation corpus for six Mayan languages. In doing so, we are paving the way for the development of MT systems that will facilitate the access for Mayan speakers to essential services such as healthcare or legal aid. The resources are produced with the essential participation of indigenous communities, whereby native speakers provide the necessary translation services, QA, and linguistic expertise. The project is funded by the Google Academic Research Awards and carried out in collaboration with the Proyecto Lingüístico Francisco Marroquín Foundation in Guatemala. 2025.mtsummit-2.15 @@ -696,7 +696,7 @@ <fixed-case>P</fixed-case>ro<fixed-case>M</fixed-case>ut: The Evolution of <fixed-case>NMT</fixed-case> Didactic Tools PilarSánchez-Gijón - GemaRamírez-Sánchez + GemaRamírez-Sánchez 91–92 Neural Machine Translation intensifies educational challenges in translation technologies. The MultiTraiNMT project developed MutNMT, an open-source, didactic platform for training and evaluating NMT systems. Building upon it, LT-LiDER introduces ProMut which implements three main novel features: migration of the core NMT framework from JoeyNMT to MarianNMT, close integration with OPUS datasets, engines and connectors and the addition of a researcher profile for larger datasets and extended training processes and evaluation. 2025.mtsummit-2.16 @@ -717,7 +717,7 @@ <fixed-case>D</fixed-case>e<fixed-case>MINT</fixed-case>: Automated Language Debriefing for <fixed-case>E</fixed-case>nglish Learners via <fixed-case>AI</fixed-case> Chatbot Analysis of Meeting Transcripts MiquelEsplà-Gomis FelipeSánchez-Martínez - Víctor M.Sánchez-Cartagena + Víctor M.Sánchez-Cartagena Juan AntonioPérez-Ortiz 95–96 The objective of the DeMINT project is to develop a conversational tutoring system aimed at enhancing non-native English speakers’ language skills through post-meeting analysis of the transcriptions of video conferences in which they have participated. This paper describes the model developed and the results obtained through a human evaluation conducted with learners of English as a second language. @@ -747,17 +747,17 @@ <fixed-case>HPLT</fixed-case>’s Second Data Release - NikolayArefyev + NikolayArefyev MikkoAulamo MartaBañón LaurieBurchell PinzhenChen MariiaFedorova - Onade Gibert + Onade Gibert LianeGuillou BarryHaddow - JanHajič - JindřichHelcl + JanHajič + JindřichHelcl ErikHenriksson AndreyKutuzov VeronikaLaippala @@ -768,11 +768,11 @@ DayyánO’Brien StephanOepen SampoPyysalo - GemaRamírez-Sánchez + GemaRamírez-Sánchez DavidSamuel PavelStepachev - JörgTiedemann - DušanVariš + JörgTiedemann + DušanVariš JaumeZaragoza-Bernabeu 101–102 We describe the progress of the High Performance Language Technologies (HPLT) project, a 3-year EU-funded project that started in September 2022. We focus on the up-to-date results on the release of free text datasets derived from web crawls, one of the central objectives of the project. The second release used a revised processing pipeline, and an enlarged set of input crawls. From 4.5 petabytes of web crawls we extracted 7.6T tokens of monolingual text in 193 languages, plus 380 million parallel sentences in 51 language pairs. We also release MultiHPLT, a cross-combination of the parallel data, which produces 1,275 pairs, as well as releasing the containing documents for all parallel sentences in order to enable research in document-level MT. We report changes in the pipeline, analysis and evaluation results for the second parallel data release based on machine translation systems. All datasets are released under a permissive CC0 licence. @@ -788,13 +788,13 @@ NicolasDahan ManonDelorme MathildeHuguin - NatalieKübler + NatalieKübler PaulLerner AlexandraMestivier JoachimMinder Jean-FrançoisNominé ZiqianPeng - LaurentRomary + LaurentRomary PanagiotisTsolakis LichaoZhu FrançoisYvon @@ -807,7 +807,7 @@ Prompt-based Explainable Quality Estimation for <fixed-case>E</fixed-case>nglish-<fixed-case>M</fixed-case>alayalam ArchchanaSindhujan DipteshKanojia - ConstantinOrăsan + ConstantinOrăsan 105–106 The aim of this project was to curate data for the English-Malayalam language pair for the tasks of Quality Estimation (QE) and Automatic Post-Editing (APE) of Machine Translation. Whilst the primary aim of the project was to create a dataset for a low-resource language pair, we plan to use this dataset to investigate different zero-shot and few-shot prompting strategies including chain-of-thought, towards a unified explainable QE-APE framework. 2025.mtsummit-2.23 diff --git a/data/xml/2025.mwe.xml b/data/xml/2025.mwe.xml index 7b5c60b046..21330fc81e 100644 --- a/data/xml/2025.mwe.xml +++ b/data/xml/2025.mwe.xml @@ -3,10 +3,10 @@ Proceedings of the 21st Workshop on Multiword Expressions (MWE 2025) - Atul Kr.Ojha - VoulaGiouli - Verginica BarbuMititelu - MathieuConstant + Atul Kr.Ojha + VoulaGiouli + Verginica BarbuMititelu + MathieuConstant GražinaKorvel A. SezaDoğruöz AlexandreRademaker @@ -49,7 +49,7 @@ <fixed-case>VMWE</fixed-case> identification with models trained on <fixed-case>GUD</fixed-case> (a <fixed-case>UD</fixed-case>v.2 treebank of Standard <fixed-case>M</fixed-case>odern <fixed-case>G</fixed-case>reek) - StellaMarkantonatou + StellaMarkantonatou VivianStamouILSP - “Athena” Research Center StavrosBompolasARCHIMEDES Unit | Athena Research Center KaterinaAnastasopoulouHellenic American University and University of Athens @@ -93,7 +93,7 @@ ChayaLiebeskind IrinaLobzhanidzeIlia Chavchavadze State University RusudanMakhachashviliBorys Grinchenko Kyiv Metropolitan University and Borys Grinchenko Kyiv Metropolitan University - StellaMarkantonatou + StellaMarkantonatou AleksandraMarkovicInstitute for the Serbian Language of SASA IvelinaStoyanovaDeaf Studies Institute and Institute for Bulgarian Language, Bulgarian Academy of Sciences 41-57 @@ -106,7 +106,7 @@ A <fixed-case>E</fixed-case>uropean <fixed-case>P</fixed-case>ortuguese corpus annotated for verbal idioms DavidAntunesINESC-ID Lisboa JorgeBaptistaINESC ID Lisboa and Universidade do Algarve - Nuno J.MamedeInstituto Superior Técnico and INESC-ID + Nuno J.MamedeInstituto Superior Técnico and INESC-ID 58-66 This paper presents the construction of VIDiom-PT, a corpus in European Portuguese annotated for verbal idioms (e.g. O Rui bateu a bota, lit.: Rui hit the boot ‘Rui died’). This linguistic resource aims to support the development of systems capable of processing such constructions in this language variety. To assist in the annotation effort, two tools were built. The first allows for the detection of possible instances of verbal idioms in texts, while the second provides a graphical interface for annotating them. This effort culminated in the annotation of a total of 5,178 instances of 747 different verbal idioms in more than 200,000 sentences in European Portuguese. A highly reliable inter-annotator agreement was achieved, using Krippendorff’s alpha for nominal data (0.869) with 5% of the data independently annotated by 3 experts. Part of the annotated corpus is also made publicly available. 2025.mwe-1.7 diff --git a/data/xml/2025.naacl.xml b/data/xml/2025.naacl.xml index 50de4df39b..8e437a5def 100644 --- a/data/xml/2025.naacl.xml +++ b/data/xml/2025.naacl.xml @@ -36,7 +36,7 @@ Benchmarking Distributional Alignment of Large Language Models NicoleMeister CarlosGuestrinStanford University - TatsunoriHashimotoStanford University + TatsunoriHashimotoStanford University 24-49 Language models (LMs) are increasingly used as simulacra for people, yet their ability to match the distribution of views of a specific demographic group and be distributionally aligned remains uncertain. This notion of distributional alignment is complex, as there is significant variation in the types of attributes that are simulated. Prior works have underexplored the role of three critical variables—the question domain, steering method, and distribution expression method—which motivates our contribution of a benchmark explicitly addressing these dimensions. We construct a dataset expanding beyond political values, create human baselines for this task, and evaluate the extent to which an LM can align with a particular group’s opinion distribution to inform design choices of such simulation systems. Our analysis reveals open problems regarding if, and how, LMs can be used to simulate humans, and that LLMs can more accurately describe the opinion distribution than simulate such distributions. 2025.naacl-long.2 @@ -108,7 +108,7 @@ XiangyanLiuNational University of Singapore BoLan ZhiyuanHu - YangLiu + YangLiu ZhichengZhangAlibaba Group FeiWangXi’an Jiaotong University Michael QizheShiehNational University of Singapore @@ -214,13 +214,13 @@ <fixed-case>LLM</fixed-case>s Are Biased Towards Output Formats! Systematically Evaluating and Mitigating Output Format Bias of <fixed-case>LLM</fixed-case>s - Do XuanLongNational University of Singapore + Do XuanLongNational University of Singapore Ngoc-HaiNguyen TiviatisSimNational University of Singaore, National University of Singapore HieuDaoNational University of Singapore - ShafiqJotySalesForce.com and Nanyang Technological University + ShafiqJotySalesForce.com and Nanyang Technological University KenjiKawaguchiNational University of Singapore - Nancy F.Chen + Nancy F.Chen Min-YenKanNational University of Singapore 299-330 We present the first systematic evaluation examining format bias in performance of large language models (LLMs). Our approach distinguishes between two categories of an evaluation metric under format constraints to reliably and accurately assess performance: one measures performance when format constraints are adhered to, while the other evaluates performance regardless of constraint adherence. We then define a metric for measuring the format bias of LLMs and establish effective strategies to reduce it. Subsequently, we present our empirical format bias evaluation spanning four commonly used categories—multiple-choice question-answer, wrapping, list, and mapping—covering 15 widely-used formats. Our evaluation on eight generation tasks uncovers significant format bias across state-of-the-art LLMs. We further discover that improving the format-instruction following capabilities of LLMs across formats potentially reduces format bias. Based on our evaluation findings, we study prompting and fine-tuning with synthesized format data techniques to mitigate format bias. Our methods successfully reduce the variance in ChatGPT’s performance among wrapping formats from 235.33 to 0.71 (%^2) @@ -301,7 +301,7 @@ <fixed-case>P</fixed-case>eer<fixed-case>QA</fixed-case>: A Scientific Question Answering Dataset from Peer Reviews TimBaumgärtnerTU Darmstadt - TedBriscoeMohamed bin Zayed University of Artificial Intelligence + TedBriscoeMohamed bin Zayed University of Artificial Intelligence IrynaGurevychInstitute for Computer Science, Artificial Intelligence and Technology, Mohamed bin Zayed University of Artificial Intelligence and Technische Universität Darmstadt 508-544 We present PeerQA, a real-world, scientific, document-level Question Answering (QA) dataset. PeerQA questions have been sourced from peer reviews, which contain questions that reviewers raised while thoroughly examining the scientific article. Answers have been annotated by the original authors of each paper. The dataset contains 579 QA pairs from 208 academic articles, with a majority from ML and NLP, as well as a subset of other scientific communities like Geoscience and Public Health.PeerQA supports three critical tasks for developing practical QA systems: Evidence retrieval, unanswerable question classification, and answer generation. We provide a detailed analysis of the collected dataset and conduct experiments establishing baseline systems for all three tasks. Our experiments and analyses reveal the need for decontextualization in document-level retrieval, where we find that even simple decontextualization approaches consistently improve retrieval performance across architectures. On answer generation, PeerQA serves as a challenging benchmark for long-context modeling, as the papers have an average size of 12k tokens. @@ -316,7 +316,7 @@ XiaomingYu, Chinese Academy of Sciences BaolongBi HuaweiShenInstitute of Computing Technology, Chinese Academy of Sciences - XueqiChengInstitute of Computing Technology, Chinese Academy + XueqiChengInstitute of Computing Technology, Chinese Academy 545-561 Large Language Model (LLM) can enhance its credibility and verifiability by generating text with citations. However, existing research on citation generation is predominantly limited to sentence-level statements, neglecting the significance of positional fine-grained citations that can appear anywhere within sentences. To facilitate further exploration of the positional fine-grained citation generation, we propose ALiiCE, the first automatic evaluation framework for this task. Our method employs a dependency tree based approach to parse the sentence-level claim into atomic claims. Then ALiiCE evaluates citation quality using three metrics, including positional fine-grained citation recall, precision, and coefficient of variation of citation positions. We evaluate the positional fine-grained citation generation performance of several LLMs on long-form QA datasets. Our experiments and analyses demonstrate the effectiveness and reasonableness of ALiiCE. We offer our insights into the current advancements and future directions for the positional fine-grained citation generation task. 2025.naacl-long.23 @@ -488,8 +488,8 @@ HilaGonenUniversity of Washington TerraBlevinsUniversität Vienna AlisaLiuUniversity of Washington - LukeZettlemoyerUniversity of Washington, Facebook and Meta - Noah A.SmithUniversity of Washington and Allen Institute for Artificial Intelligence + LukeZettlemoyerUniversity of Washington, Facebook and Meta + Noah A.SmithUniversity of Washington and Allen Institute for Artificial Intelligence 785-798 Despite their wide adoption, the biases and unintended behaviors of language models remain poorly understood. In this paper, we identify and characterize a phenomenon never discussed before, which we call semantic leakage, where models leak irrelevant information from the prompt into the generation in unexpected ways. We propose an evaluation setting to detect semantic leakage both by humans and automatically, curate a diverse test suite for diagnosing this behavior, and measure significant semantic leakage in 13 flagship models. We also show that models exhibit semantic leakage in languages besides English and across different settings and generation scenarios. This discovery highlights yet another type of bias in language models that affects their generation patterns and behavior. 2025.naacl-long.35 @@ -515,7 +515,7 @@ Familiarity: Better Evaluation of Zero-Shot Named Entity Recognition by Quantifying Label Shifts in Synthetic Training Data JonasGolde - PatrickHaller + PatrickHaller MaxPloner FabioBarth NicolaasJedema @@ -644,7 +644,7 @@ Babysit A Language Model From Scratch: Interactive Language Learning by Trials and Demonstrations ZiqiaoMa ZekunWangGeorgia Institute of Technology - JoyceChaiUniversity of Michigan + JoyceChaiUniversity of Michigan 991-1010 Humans are efficient language learners and inherently social creatures. Our language development is largely shaped by our social interactions, for example, the demonstration and feedback from caregivers. Contrary to human language learning, recent advancements in large language models have primarily adopted a non-interactive training paradigm, and refined pre-trained models through feedback afterward. In this work, we explore how corrective feedback from interactions influences neural language acquisition from scratch through systematically controlled experiments, assessing whether it contributes to word learning efficiency in language models. We introduce a trial-and-demonstration (TnD) learning framework that incorporates three distinct components: student trials, teacher demonstrations, and a reward conditioned on language competence at various developmental stages. Our experiments reveal that the TnD approach accelerates word acquisition for student models of equal and smaller numbers of parameters, and we highlight the significance of both trials and demonstrations. We further show that the teacher’s choices of words influence students’ word-specific learning efficiency, and a practice-makes-perfect effect is evident by a strong correlation between the frequency of words in trials and their respective learning curves. Our findings suggest that interactive language learning, with teacher demonstrations and active trials, can facilitate efficient word learning in language models. 2025.naacl-long.46 @@ -664,7 +664,7 @@ <fixed-case>LLM</fixed-case>-Human Pipeline for Cultural Grounding of Conversations - RajkumarPujariPurdue University + RajkumarPujariPurdue University DanGoldwasserPurdue University and Purdue University 1029-1048 Conversations often adhere to well-understood social norms that vary across cultures. For example, while addressing parents by name is commonplace in the West, it is rare in most Asian cultures. Adherence or violation of such norms often dictates the tenor of conversations. Humans are able to navigate social situations requiring cultural awareness quite adeptly. However, it is a hard task for NLP models.In this paper, we tackle this problem by introducing a Cultural Context Schema for conversations. It comprises (1) conversational information such as emotions, dialogue acts, etc., and (2) cultural information such as social norms, violations, etc. We generate ~110k social norm and violation descriptions for ~23k conversations from Chinese culture using LLMs. We refine them using automated verification strategies which are evaluated against culturally aware human judgements. We organize these descriptions into meaningful structures we call Norm Concepts, using an interactive human-in-loop framework. We ground the norm concepts and the descriptions in conversations using symbolic annotation. Finally, we use the obtained dataset for downstream tasks such as emotion, sentiment, and dialogue act detection. We show that it significantly improves the empirical performance. @@ -682,7 +682,7 @@ SonghaiFan TimDwyerMonash University Lay-KiSoonMonash University - GholamrezaHaffariMonash University, Monash University and Monash University + GholamrezaHaffariMonash University, Monash University and Monash University 1049-1074 2025.naacl-long.49 vo-etal-2025-access @@ -910,7 +910,7 @@ SkylerWang AdinaWilliamsFAIR (Meta Platforms Inc.) LeventSagunMeta - Marta R.Costa-jussàMeta + Marta R.Costa-jussàMeta 1454-1468 Text toxicity detection systems exhibit significant biases, producing disproportionate rates of false positives on samples mentioning demographic groups. But what about toxicity detection in speech? To investigate the extent to which text-based biases are mitigated by speech-based systems, we produce a set of high-quality group annotations for the multilingual MuTOX dataset, and then leverage these annotations to systematically compare speech- and text-based toxicity classifiers. Our findings indicate that access to speech data during inference supports reduced bias against group mentions, particularly for ambiguous and disagreement-inducing samples. Our results also suggest that improving classifiers, rather than transcription pipelines, is more helpful for reducing group bias. We publicly release our annotations and provide recommendations for future toxicity dataset construction. 2025.naacl-long.67 @@ -1144,7 +1144,7 @@ YuanzhuoWangChinese Academy of Sciences JieZhangInstitute of Computing Technology, Chinese Academy of Sciences HuaweiShenInstitute of Computing Technology, Chinese Academy of Sciences - XueqiChengInstitute of Computing Technology, Chinese Academy + XueqiChengInstitute of Computing Technology, Chinese Academy 1733-1747 With the expansion of the application of Large Language Models (LLMs), concerns about their safety have grown among researchers. Numerous studies have demonstrated the potential risks of LLMs generating harmful content and have proposed various safety assessment benchmarks to evaluate these risks. However, the evaluation questions in current benchmarks, especially for Chinese, are too straightforward, making them easily rejected by target LLMs, and difficult to update with practical relevance due to their lack of correlation with real-world events. This hinders the effective application of these benchmarks in continuous evaluation tasks. To address these limitations, we propose SafetyQuizzer, a question-generation framework designed to evaluate the safety of LLMs more sustainably in the Chinese context. SafetyQuizzer leverages a finetuned LLM and jailbreaking attack templates to generate subtly offensive questions, which reduces the decline rate. Additionally, by utilizing retrieval-augmented generation, SafetyQuizzer incorporates the latest real-world events into evaluation questions, improving the adaptability of the benchmarks. Our experiments demonstrate that evaluation questions generated by SafetyQuizzer significantly reduce the decline rate compared to other benchmarks while maintaining a comparable attack success rate. Our code is available at https://github.com/zhichao-stone/SafetyQuizzer. Warning: this paper contains examples that may be offensive or upsetting. 2025.naacl-long.85 @@ -1242,7 +1242,7 @@ Shamsuddeen HassanMuhammadImperial College London and Bayero University, Kano-Nigeria IdrisAbdulmuminAhmadu Bello University Abinew AliAyeleBahir Dar University, Universität Hamburg - David IfeoluwaAdelaniMcGill University + David IfeoluwaAdelaniMcGill University Ibrahim SaidAhmadNortheastern University Saminu MohammadAliyu PaulRöttgerBocconi University @@ -1319,7 +1319,7 @@ <fixed-case>DAWN</fixed-case>-<fixed-case>ICL</fixed-case>: Strategic Planning of Problem-solving Trajectories for Zero-Shot In-Context Learning XinyuTangRenmin University of China XiaoleiWangRenmin University of China - XinZhaoRenmin University of China + XinZhaoRenmin University of China Ji-RongWenRenmin University of China 1918-1934 Zero-shot in-context learning (ZS-ICL) aims to conduct in-context learning (ICL) without using human-annotated demonstrations.Existing ZS-ICL methods either use large language models (LLMs) to generate (input, label) pairs as pseudo-demonstrations or leverage historical pseudo-demonstrations to help solve the current problem.They assume that all problems are from the same task and traverse them in a random order.However, in real-world scenarios, problems usually come from diverse tasks, and only a few belong to the same task.The random traversing order may generate unreliable pseudo-demonstrations and lead to error accumulation.To address this problem, we reformulate ZS-**ICL** as a planning problem and propose a **D**emonstration-**AW**are Mo**N**te Carlo Tree Search (MCTS) approach (DAWN-ICL), which leverages MCTS to strategically plan the problem-solving trajectories for ZS-ICL.In addition, to achieve effective and efficient Q value estimation, we propose a demonstration-aware Q-value function and use it to enhance the selection phase and accelerate the expansion and simulation phases in MCTS.Extensive experiments demonstrate the effectiveness and efficiency of DAWN-ICL on in-domain and cross-domain scenarios, and it even outperforms ICL using human-annotated demonstrations.The code is available at https://github.com/txy77/MCTS4ZSICL. @@ -1359,7 +1359,7 @@ TaylorSorensenUniversity of Washington and Brigham Young University XimingLuUniversity of Washington MariaAntoniak - Bill YuchenLinxAI and University of Washington + Bill YuchenLinxAI and University of Washington NiloofarMireshghallah ChandraBhagavatulaAllen Institute for Artificial Intelligence YejinChoiComputer Science Department, Stanford University and NVIDIA @@ -1413,7 +1413,7 @@ Discourse-Driven Evaluation: Unveiling Factual Inconsistency in Long Document Summarization YangZhongUniversity of Pittsburgh - DianeLitmanUniversity of Pittsburgh, University of Pittsburgh and University of Pittsburgh + DianeLitmanUniversity of Pittsburgh, University of Pittsburgh and University of Pittsburgh 2050-2073 Detecting factual inconsistency for long document summarization remains challenging, given the complex structure of the source article and long summary length. In this work, we study factual inconsistency errors and connect them with a line of discourse analysis. We find that errors are more common in complex sentences and are associated with several discourse features. We propose a framework that decomposes long texts into discourse-inspired chunks and utilizes discourse information to better aggregate sentence-level scores predicted by NLI models. Our approach shows improved performance on top of different model baselines over several evaluation benchmarks, covering rich domains of texts, focusing on long document summarization. This underscores the significance of incorporating discourse features in developing models for scoring summaries for long document factual inconsistency. 2025.naacl-long.103 @@ -1447,7 +1447,7 @@ Uplifting Lower-Income Data: Strategies for Socioeconomic Perspective Shifts in Large Multi-modal Models JoanNwatuUniversity of Michigan - Ann Arbor OanaIgnatSanta Clara University - RadaMihalceaUniversity of Michigan + RadaMihalceaUniversity of Michigan 2127-2144 Recent work has demonstrated that the unequal representation of cultures and socioeconomic groups in training data leads to biased Large Multi-modal (LMM) models. To improve LMM model performance on underrepresented data, we propose and evaluate several prompting strategies using non-English, geographic, and socioeconomic attributes. We show that these geographic and socioeconomic integrated prompts favor retrieving topic appearances commonly found in data from low-income households across different countries leading to improved LMM model performance on lower-income data. Our analyses identify and highlight contexts where these strategies yield the most improvements. 2025.naacl-long.106 @@ -1535,7 +1535,7 @@ IvanLazichny AlexanderPanchenkoSkoltech MaximPanovMohamed bin Zayed University of Artificial Intelligence - TimothyBaldwinMohamed bin Zayed University of Artificial Intelligence and The University of Melbourne + TimothyBaldwinMohamed bin Zayed University of Artificial Intelligence and The University of Melbourne ArtemShelmanovMohamed bin Zayed University of Artificial Intelligence 2246-2262 Uncertainty quantification (UQ) is a prominent approach for eliciting truthful answers from large language models (LLMs). To date, information-based and consistency-based UQ have been the dominant UQ methods for text generation via LLMs. Density-based methods, despite being very effective for UQ in text classification with encoder-based models, have not been very successful with generative LLMs. In this work, we adapt Mahalanobis Distance (MD) – a well-established UQ technique in classification tasks – for text generation and introduce a new supervised UQ method. Our method extracts token embeddings from multiple layers of LLMs, computes MD scores for each token, and uses linear regression trained on these features to provide robust uncertainty scores. Through extensive experiments on eleven datasets, we demonstrate that our approach substantially improves over existing UQ methods, providing accurate and computationally efficient uncertainty scores for both sequence-level selective generation and claim-level fact-checking tasks. Our method also exhibits strong generalization to out-of-domain data, making it suitable for a wide range of LLM-based applications. @@ -1704,7 +1704,7 @@ XinWang CheLiu ZhedaMai - MiZhangThe Ohio State University + MiZhangThe Ohio State University 2485-2497 Long-context Multimodal Large Language Models (MLLMs) that incorporate long text-image and text-video modalities, demand substantial computational resources as their multimodal Key-Value (KV) cache grows with increasing input lengths, challenging memory and time efficiency. For multimodal scenarios, the cross-modal interactions inevitablely increase complexity, and prior methods for KV cache compression, in both text-only and multimodal LLMs, have neglected attention density variations across layers, often adopting uniform or progressive reduction strategis for layer-wise cache allocation. This results in precision loss and suboptimal performance. We propose MEDA, a novel approach specifically designed for the complexities of multimodal settings, dynamically allocating KV cache sizes based on attention entropy to better adapt to multimodal interactions.Through a dynamic multimodal KV cache allocation strategy, MEDA compresses the KV cache, adaptively retains sufficient multimodal information at each layer. Meanwhile, to mitigate the degradation of contextual information due to cache compression, we also integrate KV pairs merging techniques to maintain coherence. MEDA achieves up to 72% KV cache memory reduction and 2.82 faster decoding speeds in some cases, while maintaining or enhancing performance on various multimodal tasks in a long context, including multi-image and long video scenarios. 2025.naacl-long.125 @@ -1750,7 +1750,7 @@ No Simple Answer to Data Complexity: An Examination of Instance-Level Complexity Metrics for Classification Tasks Ryan A.Cook - John P.LalorUniversity of Notre Dame + John P.LalorUniversity of Notre Dame AhmedAbbasiUniversity of Notre Dame 2553-2573 Natural Language Processing research has become increasingly concerned with understanding data quality and complexity at the instance level. Instance-level complexity scores can be used for tasks such as filtering out noisy observations and subsampling informative examples. However, there exists a diverse taxonomy of complexity metrics that can be used for a classification task, making metric selection itself a difficult task. We empirically examine the relationship between these metrics and find that simply storing training loss provides similar complexity rankings as other more computationally intensive techniques. Metric similarity allows us to subsample data with higher aggregate complexity along several metrics using a single a priori available meta-feature. Further, this choice of complexity metric does not impact demographic fairness, even in downstream predictions. Researchers should consider metric availability and similarity, as using the wrong metric or sampling strategy may hurt performance. @@ -1853,7 +1853,7 @@ KyuminLeeWorcester Polytechnic Institute KaizeDingNorthwestern University ZhengyangWangAmazon - ZhihanZhang + ZhihanZhang JingboShangUniversity of California, San Diego XianLiAmazon TrishulChilimbiAmazon @@ -1881,11 +1881,11 @@ <fixed-case>I</fixed-case>roko<fixed-case>B</fixed-case>ench: A New Benchmark for <fixed-case>A</fixed-case>frican Languages in the Age of Large Language Models - David IfeoluwaAdelaniMcGill University + David IfeoluwaAdelaniMcGill University JessicaOjoLelapa AI Israel AbebeAzime Jian YunZhuang - Jesujoba OluwadaraAlabiUniversität des Saarlandes + Jesujoba OluwadaraAlabiUniversität des Saarlandes XuanliHeUniversity College London, University of London MillicentOchiengMicrosoft SaraHookerCohere For AI @@ -1916,7 +1916,7 @@ The Impact of Domain-Specific Terminology on Machine Translation for Finance in <fixed-case>E</fixed-case>uropean Languages - ArturoOncevayJ.P. Morgan Chase + ArturoOncevayJ.P. Morgan Chase ChareseSmileyJ.P. Morgan Chase XiaomoLiuJP Morgan AI Research 2758-2775 @@ -1931,7 +1931,7 @@ DixuanWang TianjianLiJohns Hopkins University DongweiJiang - SanjeevKhudanpurWhiting School of Engineering + SanjeevKhudanpurWhiting School of Engineering MengJiangUniversity of Notre Dame DanielKhashabiJohns Hopkins University 2776-2794 @@ -2008,7 +2008,7 @@ IvanKobyzevHuawei Noah’s Ark Lab MehdiRezagholizadehAdvanced Micro Devices BoxingChenHuawei Technologies Ltd. - PhilippeLanglaisUniversité de Montréal + PhilippeLanglaisUniversité de Montréal 2884-2898 Recent advancements in Large Language Models (LLMs) have set themselves apart with their exceptional performance in complex language modelling tasks. However, these models are also known for their significant computational and storage requirements, primarily due to the quadratic computation complexity of softmax attention. To mitigate this issue, linear attention has been designed to reduce the quadratic space-time complexity that is inherent in standard transformers. In this work, we embarked on a comprehensive exploration of three key components that substantially impact the performance of the Gated Linear Attention module: feature maps, normalization, and the gating mechanism. We developed a feature mapping function to address some crucial issues that previous suggestions overlooked. Then we offered further rationale for the integration of normalization layers to stabilize the training process. Moreover, we explored the saturation phenomenon of the gating mechanism and augmented it with a refining module. We conducted extensive experiments and showed our architecture outperforms previous Gated Linear Attention mechanisms in extensive tasks including training from scratch and post-linearization with continual pre-training. 2025.naacl-long.147 @@ -2020,7 +2020,7 @@ KshitishGhate IsaacSlaughterUniversity of Washington KyraWilsonUniversity of Washington - Mona T.DiabCarnegie Mellon University + Mona T.DiabCarnegie Mellon University AylinCaliskanUniversity of Washington 2899-2915 While recent work has found that vision-language models trained under the Contrastive Language Image Pre-training (CLIP) framework contain intrinsic social biases, the extent to which different upstream pre-training features of the framework relate to these biases, and hence how intrinsic bias and downstream performance are connected has been unclear. In this work, we present the largest comprehensive analysis to-date of how the upstream pre-training factors and downstream performance of CLIP models relate to their intrinsic biases. Studying 131 unique CLIP models, trained on 26 datasets, using 55 architectures, and in a variety of sizes, we evaluate bias in each model using 26 well-established unimodal and cross-modal principled Embedding Association Tests. We find that the choice of pre-training dataset is the most significant upstream predictor of bias, whereas architectural variations have minimal impact. Additionally, datasets curated using sophisticated filtering techniques aimed at enhancing downstream model performance tend to be associated with higher levels of intrinsic bias. Finally, we observe that intrinsic bias is often significantly correlated with downstream performance (0.3 \leq r \leq 0.8), suggesting that models optimized for performance inadvertently learn to amplify representational biases. Comparisons between unimodal and cross-modal association tests reveal that social group bias depends heavily on the modality. Our findings imply that more sophisticated strategies are needed to address intrinsic model bias for vision-language models across the entire model development pipeline. @@ -2076,7 +2076,7 @@ LongjuBai AnganaBorah OanaIgnatSanta Clara University - RadaMihalceaUniversity of Michigan + RadaMihalceaUniversity of Michigan 2970-2993 Large Multimodal Models (LMMs) exhibit impressive performance across various multimodal tasks. However, their effectiveness in cross-cultural contexts remains limited due to the predominantly Western-centric nature of most data and models. Conversely, multi-agent models have shown significant capability in solving complex tasks. Our study evaluates the collective performance of LMMs in a multi-agent interaction setting for the novel task of cultural image captioning. Our contributions are as follows: (1) We introduce MosAIC, a Multi-Agent framework to enhance cross-cultural Image Captioning using LMMs with distinct cultural personas; (2) We provide a dataset of culturally enriched image captions in English for images from China, India, and Romania across three datasets: GeoDE, GD-VCR, CVQA; (3) We propose a culture-adaptable metric for evaluating cultural information within image captions; and (4) We show that the multi-agent interaction outperforms single-agent models across different metrics, and offer valuable insights for future research. 2025.naacl-long.152 @@ -2152,7 +2152,7 @@ A Probabilistic Framework for <fixed-case>LLM</fixed-case> Hallucination Detection via Belief Tree Propagation BairuHou - YangZhangInternational Business Machines + YangZhangInternational Business Machines JacobAndreasMassachusetts Institute of Technology ShiyuChang 3076-3099 @@ -2174,7 +2174,7 @@ Superlatives in Context: Modeling the Implicit Semantics of Superlatives ValentinaPyatkinAllen Institute for Artificial Intelligence and Department of Computer Science - BonnieWebberEdinburgh University, University of Edinburgh + BonnieWebberEdinburgh University, University of Edinburgh IdoDaganBar-Ilan University ReutTsarfatyGoogle and Bar-Ilan University, Technion 3112-3126 @@ -2258,7 +2258,7 @@ <fixed-case>W</fixed-case>orld<fixed-case>C</fixed-case>uisines: A Massive-Scale Benchmark for Multilingual and Multicultural Visual Question Answering on Global Cuisines - Genta IndraWinataCapital One + Genta IndraWinataCapital One FrederikusHudi Patrick AmadeusIrawan DavidAnugraha @@ -2300,14 +2300,14 @@ Stephanie YuliaSalim YiZhouCardiff University YinxuanGuiFudan University - David IfeoluwaAdelaniMcGill University + David IfeoluwaAdelaniMcGill University En-Shiun AnnieLee ShogoOkada AyuPurwariantiInstitut Teknologi Bandung Alham FikriAjiMohamed bin Zayed University of Artificial Intelligence TaroWatanabeNara Institute of Science and Technology, Japan - Derry TantiWijayaMonash University and Boston University - AliceOhKorea Advanced Institute of Science and Technology + Derry TantiWijayaMonash University and Boston University + AliceOhKorea Advanced Institute of Science and Technology Chong-WahNgoSingapore Management University 3242-3264 Vision Language Models (VLMs) often struggle with culture-specific knowledge, particularly in languages other than English and in underrepresented cultural contexts. To evaluate their understanding of such knowledge, we introduce WorldCuisines, a massive-scale benchmark for multilingual and multicultural, visually grounded language understanding. This benchmark includes a visual question answering (VQA) dataset with text-image pairs across 30 languages and dialects, spanning 9 language families and featuring over 1 million data points, making it the largest multicultural VQA benchmark to date. It includes tasks for identifying dish names and their origins. We provide evaluation datasets in two sizes (12k and 60k instances) alongside a training dataset (1 million instances). Our findings show that while VLMs perform better with correct location context, they struggle with adversarial contexts and predicting specific regional cuisines and languages. To support future research, we release a knowledge base with annotated food entries and images along with the VQA data. @@ -2383,7 +2383,7 @@ SiyanLi Vethavikashini ChithrraRaghuramCCC Intelligent Solutions OmarKhattabMassachusetts Institute of Technology - JuliaHirschbergColumbia University + JuliaHirschbergColumbia University ZhouYuColumbia University 3371-3390 Users can divulge sensitive information to proprietary LLM providers, raising significant privacy concerns. While open-source models, hosted locally on the user’s machine, alleviate some concerns, models that users can host locally are often less capable than proprietary frontier models. Toward preserving user privacy while retaining the best quality, we propose Privacy-Conscious Delegation, a novel task for chaining API-based and local models. We utilize recent public collections of user-LLM interactions to construct a natural benchmark called PUPA, which contains personally identifiable information (PII). To study potential approaches, we devise PAPILLON, a multi-stage LLM pipeline that uses prompt optimization to address a simpler version of our task. Our best pipeline maintains high response quality for 85.5% of user queries while restricting privacy leakage to only 7.5%. We still leave a large margin to the generation quality of proprietary LLMs for future work. @@ -2395,7 +2395,7 @@ <fixed-case>W</fixed-case>hen2<fixed-case>C</fixed-case>all: When (not) to Call Tools HayleyRossHarvard University, Harvard University Ameya SunilMahabaleshwarkarNVIDIA - YoshiSuharaNVIDIA + YoshiSuharaNVIDIA 3391-3409 Leveraging external tools is a key feature for modern Language Models (LMs) to expand their capabilities and integrate them into existing systems. However, existing benchmarks primarily focus on the accuracy of tool calling—whether the correct tool is called with the correct parameters—and less on evaluating when LMs should (not) call tools. We develop a new benchmark, When2Call, which evaluates tool-calling decision-making: when to generate a tool call, when to ask follow-up questions and when to admit the question can’t be answered with the tools provided. We find that state-of-the-art tool-calling LMs show significant room for improvement on When2Call, indicating the importance of this benchmark. We also develop a training set for When2Call and leverage the multiple-choice nature of the benchmark to develop a preference optimization training regime, which shows considerably more improvement than traditional fine-tuning. We release the benchmark and training data as well as evaluation scripts. 2025.naacl-long.174 @@ -2405,7 +2405,7 @@ Mitigating Hallucinated Translations in Large Language Models with Hallucination-focused Preference Optimization ZiluTang - RajenChatterjeeApple + RajenChatterjeeApple SarthakGargApple 3410-3433 Machine Translation (MT) is undergoing a paradigm shift, with systems based on fine-tuned large language models (LLM) becoming increasingly competitive with traditional encoder-decoder models trained specifically for translation tasks. However, LLM-based systems are at a higher risk of generating hallucinations, which can severely undermine user’s trust and safety. Most prior research on hallucination mitigation focuses on traditional MT models, with solutions that involve *post-hoc* mitigation - detecting hallucinated translations and re-translating them. While effective, this approach introduces additional complexity in deploying extra tools in production and also increases latency.To address these limitations, we propose a method that intrinsically learns to mitigate hallucinations during the model training phase. Specifically, we introduce a data creation framework to generate hallucination focused preference datasets. Fine-tuning LLMs on these preference datasets reduces the hallucination rate by an average of 96% across five language pairs, while preserving overall translation quality. In a zero-shot setting our approach reduces hallucinations by 89% on an average across three unseen target languages. @@ -2417,7 +2417,7 @@ Large Language Models Can Solve Real-World Planning Rigorously with Formal Verification Tools YilunHao YongchaoChen - YangZhangInternational Business Machines + YangZhangInternational Business Machines ChuchuFanMassachusetts Institute of Technology 3434-3483 Large Language Models (LLMs) struggle to directly generate correct plans for complex multi-constraint planning problems, even with self-verification and self-critique. For example, a U.S. domestic travel planning benchmark TravelPlanner was proposed in Xie et al. (2024), where the best LLM OpenAI o1-preview can only find viable travel plans with a 10% success rate given all needed information. In this work, we tackle this by proposing an LLM-based planning framework that formalizes and solves complex multi-constraint planning problems as constrained satisfiability problems, which are further consumed by sound and complete satisfiability solvers. We start with TravelPlanner as the primary use case and show that our framework achieves a success rate of 93.9% and is effective with diverse paraphrased prompts. More importantly, our framework has strong zero-shot generalizability, successfully handling unseen constraints in our newly created unseen international travel dataset and generalizing well to new fundamentally different domains. Moreover, when user input queries are infeasible, our framework can identify the unsatisfiable core, provide failure reasons, and offers personalized modification suggestions. We show that our framework can modify and solve for an average of 81.6% and 91.7% unsatisfiable queries from two datasets and prove with ablations that all key components of our framework are effective and necessary. @@ -2444,7 +2444,7 @@ BingyangYeBrandeis University XinruiHu NianwenXueBrandeis University - JamesPustejovskyBrandeis University + JamesPustejovskyBrandeis University 3499-3513 Cross-Document Event Coreference (CDEC) annotation is challenging and difficult to scale, resulting in existing datasets being small and lacking diversity. We introduce a new approach leveraging large language models (LLMs) to decontextualize event mentions, by simplifying the document-level annotation task to sentence pairs with enriched context, enabling the creation of Richer EventCorefBank (RECB), a denser and more expressive dataset annotated at faster speed. Decontextualization has been shown to improve annotation speed without compromising quality and to enhance model performance. Our baseline experiment indicates that systems trained on RECB achieve comparable results on the EventCorefBank(ECB+) test set, showing the high quality of our dataset and its generalizability on other CDEC datasets. In addition, our evaluation shows that the strong baseline models are still struggling with RECB comparing to other CDEC datasets, suggesting that the richness and diversity of RECB present significant challenges to current CDEC systems. 2025.naacl-long.178 @@ -2457,7 +2457,7 @@ TijanaZrnicStanford University CinooLeeStanford University EmmanuelCandesStanford University - DanJurafskyStanford University + DanJurafskyStanford University 3514-3533 Large language models (LLMs) have shown high agreement with human raters across a variety of tasks, demonstrating potential to ease the challenges of human data collection. In computational social science (CSS), researchers are increasingly leveraging LLM annotations to complement slow and expensive human annotations. Still, guidelines for collecting and using LLM annotations, without compromising the validity of downstream conclusions, remain limited. We introduce Confidence-driven inference: a method that combines LLM annotations and LLM confidence indicators to strategically select which human annotations should be collected, with the goal of producing accurate statistical estimates and provably valid confidence intervals while reducing the number of human annotations needed. Our approach comes with safeguards against LLM annotations of poor quality, guaranteeing that the conclusions will be both valid and no less accurate than if we only relied on human annotations. We demonstrate the effectiveness of Confidence-driven inference over baselines in statistical estimation tasks across three CSS settings—text politeness, stance, and bias—reducing the needed number of human annotations by over 25% in each. Although we use CSS settings for demonstration, Confidence-driven inference can be used to estimate most standard quantities across a broad range of NLP problems. 2025.naacl-long.179 @@ -2482,7 +2482,7 @@ CheyenneWing María XimenaJuárez Huerta ÁngelesMárquez Hernandez - FrancisTyers + FrancisTyers 3549-3562 The development of digital linguistic resources is essential for enhancing the inclusion of indigenous and marginalized languages in the digital domain. Indigenous languages of Mexico, despite representing vast typological diversity and millions of speakers, have largely been overlooked in NLP until recently. In this paper, we present a corpus of audio and annotated transcriptions of Western Sierra Puebla Nahuatl, an endangered variety of Nahuatl spoken in Puebla, Mexico. The data made available in this corpus are useful for ASR, spelling normalization, and word-level language identification. We detail the corpus-creation process, and describe experiments to report benchmark results for each of these important NLP tasks. The corpus audio and text is made freely available. 2025.naacl-long.181 @@ -2636,7 +2636,7 @@ RuipuLuo JiwenZhangFudan University MinghuiQiu - XuanjingHuangFudan University + XuanjingHuangFudan University ZhongyuWeiFudan University 3769-3798 2025.naacl-long.192 @@ -2674,7 +2674,7 @@ Mamba-Shedder: Post-Transformer Compression for Efficient Selective Structured State Space Models - Juan PabloMunozIntel + Juan PabloMunozIntel JinjieYuanIntel NileshJainIntel Corp 3851-3863 @@ -2693,7 +2693,7 @@ Shaun M.EackUniversity of Pittsburgh FeiFangCarnegie Mellon University William YangWangUC Santa Barbara - ZhiyuChen + ZhiyuChen 3864-3900 There is a significant gap between patient needs and available mental health support today. In this paper, we aim to thoroughly examine the potential of using Large Language Models (LLMs) to assist professional psychotherapy. To this end, we propose a new benchmark, CBT-Bench, for the systematic evaluation of cognitive behavioral therapy (CBT) assistance. We include three levels of tasks in CBT-Bench: **I: Basic CBT knowledge acquisition**, with the task of multiple-choice questions; **II: Cognitive model understanding**, with the tasks of cognitive distortion classification, primary core belief classification, and fine-grained core belief classification; **III: Therapeutic response generation**, with the task of generating responses to patient speech in CBT therapy sessions.These tasks encompass key aspects of CBT that could potentially be enhanced through AI assistance, while also outlining a hierarchy of capability requirements, ranging from basic knowledge recitation to engaging in real therapeutic conversations. We evaluated representative LLMs on our benchmark. Experimental results indicate that while LLMs perform well in reciting CBT knowledge, they fall short in complex real-world scenarios requiring deep analysis of patients’ cognitive structures and generating effective responses, suggesting potential future work. 2025.naacl-long.196 @@ -2705,7 +2705,7 @@ Eui JunHwangKorea Advanced Institute of Science & Technology SukminChoKorea Advanced Institute of Science and Technology JunmyeongLee - Jong C.ParkKorea Advanced Institute of Science and Technology + Jong C.ParkKorea Advanced Institute of Science and Technology 3901-3920 Gloss-free Sign Language Translation (SLT) converts sign videos into spoken language sentences without relying on glosses, which are the written representations of signs. Recently, Large Language Models (LLMs) have shown remarkable translation performance in gloss-free methods by harnessing their powerful natural language generation capabilities. However, these methods often rely on domain-specific fine-tuning of visual encoders to achieve optimal results. By contrast, we emphasize the importance of capturing the spatial configurations and motion dynamics in sign language. With this in mind, we introduce Spatial and Motion-based Sign Language Translation (SpaMo), a novel LLM-based SLT framework. The core idea of SpaMo is simple yet effective: instead of domain-specific tuning, we use off-the-shelf visual encoders to extract spatial and motion features, which are then input into an LLM along with a language prompt. Additionally, we employ a visual-text alignment process as a lightweight warm-up step before applying SLT supervision. Our experiments demonstrate that SpaMo achieves state-of-the-art performance on three popular datasets—PHOENIX14T, CSL-Daily, and How2Sign—without visual fine-tuning. 2025.naacl-long.197 @@ -2885,7 +2885,7 @@ YifanSong GuoyinWangAlibaba Group SujianLiPeking University - Bill YuchenLinxAI and University of Washington + Bill YuchenLinxAI and University of Washington 4195-4206 Current evaluations of large language models (LLMs) often overlook non-determinism, typically focusing on a single output per example. This limits our understanding of LLM performance variability in real-world applications. Our study addresses this issue by exploring key questions about the performance differences between greedy decoding and sampling, identifying benchmarks’ consistency regarding non-determinism, and examining unique model behaviors. Through extensive experiments, we observe that greedy decoding generally outperforms sampling methods for most evaluated tasks. We also observe consistent performance across different LLM sizes and alignment methods, noting that alignment can reduce sampling variance. Moreover, our best-of-N sampling approach demonstrates that smaller LLMs can match or surpass larger models such as GPT-4-Turbo, highlighting the untapped potential of smaller LLMs. This research shows the importance of considering non-determinism in LLM evaluations and provides insights for future LLM development and evaluation. 2025.naacl-long.211 @@ -2926,7 +2926,7 @@ LifengShangHuawei Technologies Ltd. XinJiang QunLiuHuawei Noah’s Ark Lab - Kam-FaiWongThe Chinese University of Hong Kong + Kam-FaiWongThe Chinese University of Hong Kong 4246-4263 Supervised fine-tuning (SFT) is a common method to enhance the tool calling capabilities of Large Language Models (LLMs), with the training data often being synthesized. The current data synthesis process generally involves sampling a set of tools, formulating a requirement based on these tools, and generating the call statements. However, tools sampled randomly lack relevance, making them difficult to combine and thus reducing the diversity of the data. Additionally, current work overlooks the coherence between turns of dialogues, leading to a gap between the synthesized data and real-world scenarios. To address these issues, we propose a Graph-based Sampling strategy to sample more relevant tool combinations, and a Planned-generation strategy to create plans that guide the synthesis of coherent dialogues. We integrate these two strategies and enable multiple agents to synthesize the dialogue data interactively, resulting in our tool-calling data synthesis pipeline ToolFlow. Data quality assessments demonstrate improvements in the naturalness and coherence of our synthesized dialogues. Finally, we apply SFT on LLaMA-3.1-8B using 8,000 synthetic dialogues generated with ToolFlow. Results show that the model achieves tool-calling performance comparable to or even surpassing GPT-4, while maintaining strong general capabilities. 2025.naacl-long.214 @@ -2964,7 +2964,7 @@ SamiulAlam ZhongweiWan HuiShen - MiZhangThe Ohio State University + MiZhangThe Ohio State University 4287-4296 Despite significant advancements, the practical deployment of Large Language Models (LLMs) is often hampered by their immense sizes, highlighting the need for effective compression techniques. Singular Value Decomposition (SVD) emerges as a promising method for compressing LLMs. However, existing SVD-based compression approaches suffer from substantial truncation losses, leading to severe performance degradation in compressed models. In this work, we introduce , a novel SVD-based LLM compression method that optimizes singular value truncation in SVD compression with two key strategies. First, employs dynamic compression ratio allocation to effectively balance the extremely large truncation loss across different layers. Second, it implements loss-optimized weight truncation to ensure that the truncated singular values result in a lower and more stable truncation loss in practice. We evaluate on ten datasets and five models on various scales and demonstrated that outperforms current state-of-the-art methods. The source code is available at https://github.com/AIoT-MLSys-Lab/SVD-LLM. 2025.naacl-long.217 @@ -2980,8 +2980,8 @@ ZhuohanLiu, A*STAR WenyuZhangI2R, A*STAR ZhengyuanLiuI2R - AiTiAwI2R - Nancy F.Chen + AiTiAwI2R + Nancy F.Chen 4297-4316 We introduce AudioBench, a universal benchmark designed to evaluate Audio Large Language Models (AudioLLMs). It encompasses 8 distinct tasks and 26 datasets, among which, 7 are newly proposed datasets. The evaluation targets three main aspects: speech understanding, audio scene understanding, and voice understanding (paralinguistic). Despite recent advancements, there lacks a comprehensive benchmark for AudioLLMs on instruction following capabilities conditioned on audio signals. AudioBench addresses this gap by setting up datasets as well as desired evaluation metrics. Besides, we also evaluated the capabilities of five popular models and found that no single model excels consistently across all tasks. We outline the research outlook for AudioLLMs and anticipate that our open-sourced evaluation toolkit, data, and leaderboard will offer a robust testbed for future model developments. 2025.naacl-long.218 @@ -3064,7 +3064,7 @@ ZhangchenXu FengqingJiangUniversity of Washington LuyaoNiuUniversity of Washington - Bill YuchenLinxAI and University of Washington + Bill YuchenLinxAI and University of Washington RadhaPoovendranUniversity of Washington, Seattle 4392-4405 Instruction tuning has been widely adopted to ensure large language models (LLMs) follow user instructions and engage with users meaningfully. The resulting instruction-following capabilities of LLMs heavily rely on the instruction datasets used for tuning. Recently, synthetic instruction datasets have emerged as an economically viable solution to provide LLMs diverse and high-quality instructions. However, existing approaches typically assume that larger or stronger models are stronger teachers for instruction tuning, and hence simply adopt larger models as response generators to the synthetic instructions. In this paper, we challenge this commonly-adopted assumption. Our extensive experiments across five base models and twenty response generators reveal that larger and stronger models are not necessarily stronger teachers of smaller models. We refer to this phenomenon as the Larger Models’ Paradox. We observe that existing metrics cannot precisely predict the effectiveness of response generators since they ignore the compatibility between teachers and base models being fine-tuned. We thus develop a novel metric, named as Compatibility-Adjusted Reward (CAR) to measure the effectiveness of response generators. Our experiments across five base models demonstrate that CAR outperforms almost all baselines. @@ -3219,7 +3219,7 @@ VipulGuptaPennsylvania State University CandaceRossMeta DavidPantoja - Rebecca J.PassonneauPennsylvania State University + Rebecca J.PassonneauPennsylvania State University MeganUngFacebook AI Research AdinaWilliamsFAIR (Meta Platforms Inc.) 4595-4615 @@ -3337,7 +3337,7 @@ <fixed-case>R</fixed-case>each<fixed-case>A</fixed-case>gent: Enhancing Mobile Agent via Page Reaching and Operation QinzhuoWu - WeiLiu + WeiLiu JianLuanXiaomi Corporation BinWangAI Lab, Xiaomi Inc. 4760-4775 @@ -3393,7 +3393,7 @@ WeiJuSichuan University LuchenLiu TianyuLiu - BaobaoChangPeking University + BaobaoChangPeking University MingZhangPeking University 4805-4822 Large Multimodal Models (LMMs) exhibit impressive cross-modal understanding and reasoning abilities, often assessed through multiple-choice questions (MCQs) that include an image, a question, and several options. However, many benchmarks used for such evaluations suffer from systematic biases. Remarkably, Large Language Models (LLMs) without any visual perception capabilities achieve non-trivial performance, undermining the credibility of these evaluations. To address this issue while maintaining the efficiency of MCQ evaluations, we propose MMEVALPRO, a benchmark designed to avoid Type-I errors through a trilogy evaluation pipeline and more rigorous metrics. For each original question from existing benchmarks, human annotators augment it by creating one perception question and one knowledge anchor question through a meticulous annotation process. MMEVALPRO comprises 2,138 question triplets, totaling 6,414 distinct questions. Two-thirds of these questions are manually labeled by human experts, while the rest are sourced from existing benchmarks (MMMU, ScienceQA, and MathVista). Compared with the existing benchmarks, our experiments with the latest LLMs and LMMs demonstrate that MMEVALPRO is **more challenging** (the best LMM lags behind human performance by 31.73%, compared to an average gap of 8.03% in previous benchmarks) and **more trustworthy** (the best LLM trails the best LMM by 23.09%, whereas the gap for previous benchmarks is just 14.64%). Our in-depth analysis explains the reason for the large performance gap and justifies the trustworthiness of evaluation, underscoring its significant potential for advancing future research. @@ -3417,7 +3417,7 @@ Analyzing (In)Abilities of <fixed-case>SAE</fixed-case>s via Formal Languages AbhinavMenon - ManishShrivastavaInternational Institute of Information Technology Hyderabad, India + ManishShrivastavaInternational Institute of Information Technology Hyderabad, India DavidKrueger Ekdeep SinghLubanaHarvard University, Harvard University 4837-4862 @@ -3536,7 +3536,7 @@ RongYeByteDance LeiChen HaoyuKuangFudan University - XuanjingHuangFudan University + XuanjingHuangFudan University ZhongyuWeiFudan University 4975-5001 Large language models (LLMs) are increasingly leveraged to empower autonomous agents to simulate human beings in various fields of behavioral research. However, evaluating their capacity to navigate complex social interactions remains a challenge. Previous studies face limitations due to insufficient scenario diversity, complexity, and a single-perspective focus. To this end, we introduce AgentSense: Benchmarking Social Intelligence of Language Agents through Interactive Scenarios. Drawing on Dramaturgical Theory, AgentSense employs a bottom-up approach to create 1,225 diverse social scenarios constructed from extensive scripts. We evaluate LLM-driven agents through multi-turn interactions, emphasizing both goal completion and implicit reasoning. We analyze goals using ERG theory and conduct comprehensive experiments. Our findings highlight that LLMs struggle with goals in complex social scenarios, especially high-level growth needs, and even GPT-4o requires improvement in private information reasoning. @@ -3651,7 +3651,7 @@ Aryo PradiptaGemaAnthropic and University of Edinburgh, University of Edinburgh HongruWangThe Chinese University of Hong Kong XuanliHeUniversity College London, University of London - Kam-FaiWongThe Chinese University of Hong Kong + Kam-FaiWongThe Chinese University of Hong Kong PasqualeMinerviniUniversity of Edinburgh, University of Edinburgh 5117-5136 Large language models (LLMs) can store a significant amount of factual knowledge in their parameters. However, their parametric knowledge may conflict with the information provided in the context—this phenomenon, known as context-memory knowledge conflicts, can lead to undesirable model behaviour, such as reliance on outdated or incorrect information. Analysing the internal activations of LLMs, we find that they can internally register the signals of knowledge conflict at mid-layers. Such signals allow us to detect whether a knowledge conflict occurs and use inference-time intervention strategies to resolve it. In this work, we propose SpARE, a training-free representation engineering method that uses pre-trained sparse auto-encoders (SAEs) to control the knowledge selection behaviour of LLMs. SpARE identifies the functional features that control the knowledge selection behaviours and applies them to edit the internal activations of LLMs at inference time. Our experimental results show that SpARE can effectively control the usage of either knowledge source to resolve knowledge conflict in open-domain question-answering tasks, surpassing existing representation engineering methods (+10%) as well as contrastive decoding methods (+15%). @@ -3661,7 +3661,7 @@ <fixed-case>M</fixed-case>o<fixed-case>D</fixed-case>ification: Mixture of Depths Made Easy - ChenZhangBeijing Institute of Technology + ChenZhangBeijing Institute of Technology MeizhiZhong QimengWangXiaohongshu XuantaoLu @@ -3760,7 +3760,7 @@ AnZhangNational University of Singapore YangDengSingapore Management University XiangWangUniversity of Science and Technology of China - Tat-SengChuaNational University of Singapore + Tat-SengChuaNational University of Singapore 5259-5276 Open-domain dialogue systems have seen remarkable advancements with the development of large language models (LLMs). Nonetheless, most existing dialogue systems predominantly focus on brief single-session interactions, neglecting the real-world demands for long-term companionship and personalized interactions with chatbots. Crucial to addressing this real-world need are event summary and persona management, which enable reasoning for appropriate long-term dialogue responses. Recent progress in the human-like cognitive and reasoning capabilities of LLMs suggests that LLM-based agents could significantly enhance automated perception, decision-making, and problem-solving. In response to this potential, we introduce a model-agnostic framework, the Long-term Dialogue Agent (LD-Agent), which incorporates three independently tunable modules dedicated to event perception, persona extraction, and response generation. For the event memory module, long and short-term memory banks are employed to separately focus on historical and ongoing sessions, while a topic-based retrieval mechanism is introduced to enhance the accuracy of memory retrieval. Furthermore, the persona module conducts dynamic persona modeling for both users and agents. The integration of retrieved memories and extracted personas is subsequently fed into the generator to induce appropriate responses. The effectiveness, generality, and cross-domain capabilities of LD-Agent are empirically demonstrated across various illustrative benchmarks, models, and tasks. The code is released at https://github.com/leolee99/LD-Agent. 2025.naacl-long.272 @@ -3830,7 +3830,7 @@ Token-based Decision Criteria Are Suboptimal in In-context Learning - HakazeCho + HakazeCho YoshihiroSakaiJapan Advanced Institute of Science and Technology MarikoKato KenshiroTanaka @@ -3858,7 +3858,7 @@ Multilingual Machine Translation with Open Large Language Models at Practical Scale: An Empirical Study MenglongCuiXiaomi Corporation PengzhiGaoXiaomi Corporation - WeiLiu + WeiLiu JianLuanXiaomi Corporation BinWangAI Lab, Xiaomi Inc. 5420-5443 @@ -3881,7 +3881,7 @@ Evaluating Evidence Attribution in Generated Fact Checking Explanations RuiXingMohamed bin Zayed University of Artificial Intelligence and University of Melbourne - TimothyBaldwinMohamed bin Zayed University of Artificial Intelligence and The University of Melbourne + TimothyBaldwinMohamed bin Zayed University of Artificial Intelligence and The University of Melbourne Jey HanLauThe University of Melbourne 5475-5496 Automated fact-checking systems often struggle with trustworthiness, as their generated explanations can include hallucinations. In this work, we explore evidence attribution for fact-checking explanation generation. We introduce a novel evaluation protocol, citation masking and recovery, to assess attribution quality in generated explanations. We implement our protocol using both human annotators and automatic annotators and found that LLM annotation correlates with human annotation, suggesting that attribution assessment can be automated. Finally, our experiments reveal that: (1) the best-performing LLMs still generate explanations that are not always accurate in their attribution; and (2) human-curated evidence is essential for generating better explanations. @@ -3909,7 +3909,7 @@ GeorgiosChochlakisUniversity of Southern California AlexandrosPotamianosAmazon, University of Southern California and National Technical University of Athens KristinaLermanUniversity of Southern California and USC Information Sciences Institute - ShrikanthNarayananUniversity of Southern California + ShrikanthNarayananUniversity of Southern California 5513-5528 In-context Learning (ICL) has become the primary method for performing natural language tasks with Large Language Models (LLMs). The knowledge acquired during pre-training is crucial for this few-shot capability, providing the model with task priors. However, recent studies have shown that ICL predominantly relies on retrieving task priors rather than “learning” to perform tasks. This limitation is particularly evident in complex subjective domains such as emotion and morality, where priors significantly influence posterior predictions. In this work, we examine whether this is the result of the aggregation used in corresponding datasets, where trying to combine low-agreement, disparate annotations might lead to annotation artifacts that create detrimental noise in the prompt. Moreover, we evaluate the posterior bias towards certain annotators by grounding our study in appropriate, quantitative measures of LLM priors. Our results indicate that aggregation is a confounding factor in the modeling of subjective tasks, and advocate focusing on modeling individuals instead. However, aggregation does not explain the entire gap between ICL and the state of the art, meaning other factors in such tasks also account for the observed phenomena. Finally, by rigorously studying annotator-level labels, we find that it is possible for minority annotators to both better align with LLMs and have their perspectives further amplified. 2025.naacl-long.284 @@ -3921,8 +3921,8 @@ YasserAshrafMohamed bin Zayed University of Artificial Intelligence YuxiaWang BinGuMohamed bin Zayed University of Artificial Intelligence - PreslavNakovMohamed bin Zayed University of Artificial Intelligence - TimothyBaldwinMohamed bin Zayed University of Artificial Intelligence and The University of Melbourne + PreslavNakovMohamed bin Zayed University of Artificial Intelligence + TimothyBaldwinMohamed bin Zayed University of Artificial Intelligence and The University of Melbourne 5529-5546 The growing use of large language models (LLMs) has raised concerns regarding their safety. While many studies have focused on English, the safety of LLMs in Arabic, with its linguistic and cultural complexities, remains under-explored. Here, we aim to bridge this gap. In particular, we present an Arab-region-specific safety evaluation dataset consisting of 5,799 questions, including direct attacks, indirect attacks, and harmless requests with sensitive words, adapted to reflect the socio-cultural context of the Arab world. To uncover the impact of different stances in handling sensitive and controversial topics, we propose a dual-perspective evaluation framework. It assesses the LLM responses from both governmental and opposition viewpoints. Experiments over five leading Arabic-centric and multilingual LLMs reveal substantial disparities in their safety performance. This reinforces the need for culturally specific datasets to ensure the responsible deployment of LLMs. 2025.naacl-long.285 @@ -4098,7 +4098,7 @@ <fixed-case>V</fixed-case>oice<fixed-case>T</fixed-case>ext<fixed-case>B</fixed-case>lender: Augmenting Large Language Models with Speech Capabilities via Single-Stage Joint Speech-Text Supervised Fine-Tuning - YifanPengCarnegie Mellon University + YifanPengCarnegie Mellon University Krishna CPuvvadaNVIDIA ZhehuaiChen PiotrZelaskoNVIDIA @@ -4120,7 +4120,7 @@ HaishanGao Sarah LiChen DanEdelsteinStanford University - DanJurafskyStanford University + DanJurafskyStanford University ChenShani 5803-5817 Word similarity has many applications to social science and cultural analytics tasks like measuring meaning change over time and making sense of contested terms. Yet traditional similarity methods based on cosine similarity between word embeddings cannot capture the context-dependent, asymmetrical, polysemous nature of semantic similarity. We propose a new measure of similarity, Word Confusion, that reframes semantic similarity in terms of feature-based classification confusion. Word Confusion is inspired by Tversky (1977)’s suggestion that similarity features be chosen dynamically. Here we train a classifier to map contextual embeddings to word identities and use the classifier confusion (the probability of choosing a confounding word c instead of the correct target word t) as a measure of the similarity of c and t. The set of potential confounding words acts as the chosen features. Our method is comparable to cosine similarity in matching human similarity judgments across several datasets (MEN, WirdSim353, and SimLex), and can measure similarity using predetermined features of interest. We demonstrate our model’s ability to make use of dynamic features by applying it to test a hypothesis about changes in the 18th C. meaning of the French word “révolution” from popular to state action during the French Revolution. We hope this reimagining of semantic similarity will inspire the development of new tools that better capture the multi-faceted and dynamic nature of language, advancing the fields of computational social science and cultural analytics and beyond. @@ -4143,7 +4143,7 @@ Do <fixed-case>RAG</fixed-case> Systems Cover What Matters? Evaluating and Optimizing Responses with Sub-Question Coverage KaigeXieGeorgia Institute of Technology PhilippeLabanMicrosoft - Prafulla KumarChoubeySalesForce.com + Prafulla KumarChoubeySalesForce.com CaimingXiongSalesforce Research Chien-ShengWuSalesforce AI 5836-5849 @@ -4193,7 +4193,7 @@ JaminShinNAVER JoelJang SeonghyeonYe - Bill YuchenLinxAI and University of Washington + Bill YuchenLinxAI and University of Washington SeanWelleckCarnegie Mellon University GrahamNeubigCarnegie Mellon University MoontaeLeeLG Corporation and University of Illinois, Chicago @@ -4227,7 +4227,7 @@ Uncovering Bias in Large Vision-Language Models at Scale with Counterfactuals PhillipHowardIntel - Kathleen C.FraserNational Research Council Canada + Kathleen C.FraserNational Research Council Canada AnahitaBhiwandiwalla SvetlanaKiritchenkoNational Research Council Canada 5946-5991 @@ -4282,7 +4282,7 @@ NasserZalmoutAmazon PriyankaNigam BingYinAmazon - ChaoZhangGeorgia Institute of Technology + ChaoZhangGeorgia Institute of Technology 6041-6068 Due to the scarcity of agent-oriented pre-training data, LLM-based autonomous agents typically rely on complex prompting or extensive fine-tuning, which often fails to introduce new capabilities while preserving strong generalizability. We introduce Hephaestus-Forge, the first large-scale pre-training corpus designed to enhance the fundamental capabilities of LLM agents in API function calling, intrinsic reasoning and planning, and adapting to environmental feedback. Hephaestus-Forge comprises 103B agent-specific data encompassing 76,537 APIs, including both tool documentation to introduce knowledge of API functions and function calling trajectories to strengthen intrinsic reasoning. To explore effective training protocols, we investigate scaling laws to identify the optimal recipe in data mixing ratios. By continual pre-training on Hephaestus-Forge, Hephaestus outperforms small- to medium-scale open-source LLMs and rivals commercial LLMs on three agent benchmarks, demonstrating the effectiveness of our pre-training corpus in enhancing fundamental agentic capabilities and generalization of LLMs to new tasks or environments. 2025.naacl-long.308 @@ -4497,7 +4497,7 @@ A Cognitive Evaluation Benchmark of Image Reasoning and Description for Large Vision-Language Models XiujieSong MengyueWuShanghai Jiaotong University - Kenny Q.ZhuUniversity of Texas at Arlington + Kenny Q.ZhuUniversity of Texas at Arlington ChunhaoZhang YanyiChen 6392-6409 @@ -4512,7 +4512,7 @@ JaehyungSeo JaewookLeeKorea University ChanjunParkKorea University - HeuiseokLim + HeuiseokLim 6410-6422 Large language models (LLMs) often retain outdated or incorrect information from pre-training, which undermines their reliability. While model editing methods have been developed to address such errors without full re-training, they frequently suffer from knowledge conflicts, where outdated information interferes with new knowledge. In this work, we propose Conflict-free Model Editing (CoME), a novel framework that enhances the accuracy of knowledge updates in LLMs by selectively removing outdated knowledge. CoME leverages unlearning to mitigate knowledge interference, allowing new information to be integrated without compromising relevant linguistic features. Through experiments on GPT-J and LLaMA-3 using Counterfact and ZsRE datasets, we demonstrate that CoME improves both editing accuracy and model reliability when applied to existing editing methods. Our results highlight that the targeted removal of outdated knowledge is crucial for enhancing model editing effectiveness and maintaining the model’s generative performance. 2025.naacl-long.325 @@ -4582,7 +4582,7 @@ CunxiangWang HuiminWangJarvis Research Center, Tencent YouTu Lab GuanhuaChenSouthern University of Science and Technology - Kam-FaiWongThe Chinese University of Hong Kong + Kam-FaiWongThe Chinese University of Hong Kong 6510-6525 Previous research has typically concentrated on leveraging the internal knowledge of Large Language Models (LLMs) to answer known questions (i.e., internal reasoning such as generate-then-read). In contrast, for questions that fall outside their known scope, these models rely on external knowledge retrieval to provide accurate responses (i.e., external acting such as retrieve-then-read). However, few previous works consider the compositional questions, which consist of several known and unknown sub-questions, necessitating the dynamic combination of previous two methods (i.e., internal reasoning and external acting) to achieve a better trade-off between effectiveness and efficiency. To this end, we introduce a Self Divide-and-Conquer (Self-DC) framework, accompanying with the first Compositional unknown Question-Answering dataset (CuQA). This framework enables LLMs to adaptively choose between using internal knowledge and retrieving external knowledge as needed, resulting in a better trade-off between effectiveness and efficiency. Experimental results on two datasets demonstrate that Self-DC can achieve comparable or even better performance with much fewer external calls compared with several strong baselines. 2025.naacl-long.331 @@ -4697,7 +4697,7 @@ ArashYousefi JordehiUniversity of Guilan MahsaHosseini Khasheh HeyranUniversity of Guilan Seyed AbolghasemMirroshandelUniversity of Guilan - OwenRambowStony Brook University + OwenRambowStony Brook University CorneliaCarageaUniversity of Illinois at Chicago 6677-6694 The rise of Large Language Models (LLMs) has boosted the use of Few-Shot Learning (FSL) methods in natural language processing, achieving acceptable performance even when working with limited training data. The goal of FSL is to effectively utilize a small number of annotated samples in the learning process. However, the performance of FSL suffers when unsuitable support samples are chosen. This problem arises due to the heavy reliance on a limited number of support samples, which hampers consistent performance improvement even when more support samples are added. To address this challenge, we propose an active learning-based instance selection mechanism that identifies effective support instances from the unlabeled pool and can work with different LLMs. Our experiments on five tasks show that our method frequently improves the performance of FSL. We make our implementation available on GitHub. @@ -4714,7 +4714,7 @@ Khoi M.Le Nguyen VietAnhNanyang Technological University FengYichao - Anh TuanLuuNanyang Technological University + Anh TuanLuuNanyang Technological University 6695-6708 Previous research on multimodal entity linking (MEL) has primarily employed contrastive learning as the primary objective. However, using the rest of the batch as negative samples without careful consideration, these studies risk leveraging easy features and potentially overlook essential details that make entities unique. In this work, we propose JD-CCL (Jaccard Distance-based Conditional Contrastive Learning), a novel approach designed to enhance the ability to match multimodal entity linking models. JD-CCL leverages meta-information to select negative samples with similar attributes, making the linking task more challenging and robust. Additionally, to address the limitations caused by the variations within the visual modality among mentions and entities, we introduce a novel method, CVaCPT (Contextual Visual-aid Controllable Patch Transform). It enhances visual representations by incorporating multi-view synthetic images and contextual textual representations to scale and shift patch representations. Experimental results on benchmark MEL datasets demonstrate the strong effectiveness of our approach. 2025.naacl-long.341 @@ -4725,7 +4725,7 @@ <fixed-case>R</fixed-case>esearch<fixed-case>A</fixed-case>gent: Iterative Research Idea Generation over Scientific Literature with Large Language Models JinheonBaekKorea Advanced Institute of Science & Technology Sujay KumarJauharMicrosoft Research - SilviuCucerzanMicrosoft + SilviuCucerzanMicrosoft Sung JuHwangKorea Advanced Institute of Science and Technology and AITRICS 6709-6738 The pace of scientific research, vital for improving human life, is complex, slow, and needs specialized expertise. Meanwhile, novel, impactful research often stems from both a deep understanding of prior work, and a cross-pollination of ideas across domains and fields. To enhance the productivity of researchers, we propose ResearchAgent, which leverages the encyclopedic knowledge and linguistic reasoning capabilities of Large Language Models (LLMs) to assist them in their work. This system automatically defines novel problems, proposes methods and designs experiments, while iteratively refining them based on the feedback from collaborative LLM-powered reviewing agents. Specifically, starting with a core scientific paper, ResearchAgent is augmented not only with relevant publications by connecting information over an academic graph but also entities retrieved from a knowledge store derived from shared underlying concepts mined across numerous papers. Then, mimicking a scientific approach to improving ideas with peer discussions, we leverage multiple LLM-based ReviewingAgents that provide reviews and feedback via iterative revision processes. These reviewing agents are instantiated with human preference-aligned LLMs whose criteria for evaluation are elicited from actual human judgments via LLM prompting. We experimentally validate our ResearchAgent on scientific publications across multiple disciplines, showing its effectiveness in generating novel, clear, and valid ideas based on both human and model-based evaluation results. Our initial foray into AI-mediated scientific research has important implications for the development of future systems aimed at supporting researchers in their ideation and operationalization of novel work. @@ -4793,7 +4793,7 @@ <fixed-case>L</fixed-case>ib<fixed-case>E</fixed-case>volution<fixed-case>E</fixed-case>val: A Benchmark and Study for Version-Specific Code Generation SachitKuharAmazon - Wasi UddinAhmadNVIDIA + Wasi UddinAhmadNVIDIA ZijianWangAmazon AWS AI Labs NihalJainAmazon HaifengQianNVIDIA @@ -4828,7 +4828,7 @@ ShaoyangXu YongqiLeng LinhaoYu - DeyiXiongTianjin University + DeyiXiongTianjin University 6859-6877 As large language models (LLMs) become increasingly accessible in many countries, it is essential to align them to serve pluralistic human values across cultures. However, pluralistic culture alignment in LLMs remain an open problem. In this paper, we propose CultureSPA, a Self-Pluralising Culture Alignment framework that allows LLMs to simultaneously align to pluralistic cultures. The framework first generates questions on various culture topics, then yields LLM outputs in response to these generated questions under both culture-aware and culture-unaware settings. By comparing culture-aware/unaware outputs, we are able to detect and collect culture-related instances. These instances are employed to fine-tune LLMs to serve pluralistic cultures in either a culture-joint or culture-specific way. Extensive experiments demonstrate that CultureSPA significantly improves the alignment of LLMs to diverse cultures without compromising general abilities. And further improvements can be achieved if CultureSPA is combined with advanced prompt engineering techniques. Comparisons between culture-joint and culture-specific tuning strategies, along with variations in data quality and quantity, illustrate the robustness of our method. We also explore the mechanisms underlying CultureSPA and the relations between different cultures it reflects. 2025.naacl-long.350 @@ -4847,7 +4847,7 @@ <fixed-case>D</fixed-case>raw<fixed-case>E</fixed-case>du<fixed-case>M</fixed-case>ath: Evaluating Vision Language Models with Expert-Annotated Students’ Hand-Drawn Math Images SamiBaralWorcester Polytechnic Institute - LiLucyUniversity of California Berkeley + LiLucyUniversity of California Berkeley RyanKnightInsource Services, Inc AliceNg LucaSoldainiAllen Institute for Artificial Intelligence @@ -4975,7 +4975,7 @@ JunyiLi RuiyangRen ShijieWang - XinZhaoRenmin University of China + XinZhaoRenmin University of China YangSongBOSS Zhipin TaoZhang 7064-7074 @@ -4993,7 +4993,7 @@ HuiWang XiZeng XingweiLiang - Kam-FaiWongThe Chinese University of Hong Kong + Kam-FaiWongThe Chinese University of Hong Kong RuifengXuHarbin Institute of Technology 7075-7092 Stance detection is critical for understanding the underlying position or attitude expressed toward a topic. Large language models (LLMs) have demonstrated significant advancements across various natural language processing tasks including stance detection, however, their performance in stance detection is limited by biases and spurious correlations inherent due to their data-driven nature. Our statistical experiment reveals that LLMs are prone to generate biased stances due to sentiment-stance spurious correlations and preference towards certain individuals and topics. Furthermore, the results demonstrate a strong negative correlation between stance bias and stance detection performance, underscoring the importance of mitigating bias to enhance the utility of LLMs in stance detection. Therefore, in this paper, we propose a Counterfactual Augmented Calibration Network (FACTUAL), which a novel calibration network is devised to calibrate potential bias in the stance prediction of LLMs. Further, to address the challenge of effectively learning bias representations and the difficulty in the generalizability of debiasing, we construct counterfactual augmented data. This approach enhances the calibration network, facilitating the debiasing and out-of-domain generalization. Experimental results on in-target and zero-shot stance detection tasks show that the proposed FACTUAL can effectively mitigate biases of LLMs, achieving state-of-the-art results. @@ -5022,7 +5022,7 @@ FeifeiZhaiInstitute of automation, Chinese academy of science, Chinese Academy of Sciences NanchangCheng YuZhouInstitute of Automation, Chinese Academy of Sciences - ChengqingZongInstitute of automation, Chinese academy of science, Chinese Academy of Sciences + ChengqingZongInstitute of automation, Chinese academy of science, Chinese Academy of Sciences 7116-7131 Simultaneous Machine Translation (SiMT) generates target translation before receiving the whole source sentence and faces a serious hallucination problem. In contrast, traditional offline machine translation (OMT) models exhibit significantly fewer hallucinations. Motivated by this disparity, we propose Knowledge Distillation for SiMT (KD-SiMT), a simple yet effective method that utilizes the OMT model to mitigate hallucinations in SiMT. Experiments on Zh\rightarrowEn and De\rightarrowEn tasks demonstrate that KD-SiMT effectively reduces hallucinations and enhances the SiMT performance. Furthermore, we systematically investigate the deficiencies in SiMT models related to serious hallucinations and the effect of KD-SiMT. Specifically, we design targeted tasks and metrics to quantitatively evaluate the components in SiMT models from the perspectives of model structure and knowledge acquisition. Our analyses reveal that inaccurate source representations and imbalanced cross-attention are more likely to occur in SiMT models when generating hallucinations, while KD-SiMT alleviates these issues. Besides, we find that KD-SiMT equips SiMT models with sufficient faithfulness knowledge in training, thus reducing hallucinations. 2025.naacl-long.364 @@ -5093,7 +5093,7 @@ TaoGeTencent AI Lab XunWangMicrosoft YanXiaResearch, Microsoft - ManLan + ManLan FuruWeiMicrosoft Research 7212-7234 Strategic reasoning is a complex yet essential capability for intelligent agents. It requires Large Language Model (LLM) agents to adapt their strategies dynamically in multi-agent environments. Unlike static reasoning tasks, success in these contexts depends on anticipating other agents’ beliefs and actions while continuously adjusting strategies to achieve individual goals. LLMs and LLM agents often struggle with strategic reasoning due to the absence of a reasoning framework that enables them to dynamically infer others’ perspectives and adapt to changing environments. Inspired by the Level-K framework from game theory and behavioral economics, which extends reasoning from simple reactions to structured strategic depth, we propose a novel framework: “K-Level Reasoning with Large Language Models (K-R).” This framework employs recursive mechanisms to enable LLMs to achieve varying levels of strategic depth, allowing agents to form higher order beliefs—beliefs about others’ beliefs. We validate this framework through rigorous testing on four testbeds: two classical game theory problems and two social intelligence tasks. The results demonstrate the advantages of K-R in strategic reasoning. Our work presents the first recursive implementation of strategic depth in large language models (LLMs). It establishes a foundation for future research into theory of mind and strategic reasoning in LLMs. @@ -5107,7 +5107,7 @@ DaniloCarvalhoUniversity of Manchester OskarWysocki MarcoValentinoUniversity of Sheffield - AndreFreitasIdiap Research Institute and University of Manchester + AndreFreitasIdiap Research Institute and University of Manchester 7235-7258 Syllogistic reasoning is crucial for Natural Language Inference (NLI). This capability is particularly significant in specialized domains such as biomedicine, where it can support automatic evidence interpretation and scientific discovery. This paper presents SylloBio-NLI, a novel framework that leverages external ontologies to systematically instantiate diverse syllogistic arguments for biomedical NLI. We employ SylloBio-NLI to evaluate Large Language Models (LLMs) on identifying valid conclusions and extracting supporting evidence across 28 syllogistic schemes instantiated with human genome pathways. Extensive experiments reveal that biomedical syllogistic reasoning is particularly challenging for zero-shot LLMs, which achieve an average accuracy between 70% on generalized modus ponens and 23% on disjunctive syllogism. At the same time, we found that few-shot prompting can boost the performance of different LLMs, including Gemma (+14%) and LLama-3 (+43%). However, a deeper analysis shows that both techniques exhibit high sensitivity to superficial lexical variations, highlighting a dependency between reliability, models’ architecture, and pre-training regime. Overall, our results indicate that, while in-context examples have the potential to elicit syllogistic reasoning in LLMs, existing models are still far from achieving the robustness and consistency required for safe biomedical NLI applications. 2025.naacl-long.371 @@ -5129,7 +5129,7 @@ Muhammad ArslanManzoorMohamed bin Zayed University of Artificial Intelligence RuihongZeng DilshodAzizov - PreslavNakovMohamed bin Zayed University of Artificial Intelligence + PreslavNakovMohamed bin Zayed University of Artificial Intelligence ShangsongLiangSUN YAT-SEN UNIVERSITY 7279-7295 In the current era of rapidly growing digital data, evaluating the political bias and factuality of news outlets has become more important for seeking reliable information online. In this work, we study the classification problem of profiling news media from the lens of political bias and factuality. Traditional profiling methods, such as Pre-trained Language Models (PLMs) and Graph Neural Networks (GNNs) have shown promising results, but they face notable challenges. PLMs focus solely on textual features, causing them to overlook the complex relationships between entities, while GNNs often struggle with media graphs containing disconnected components and insufficient labels. To address these limitations, we propose MediaGraphMind (MGM), an effective solution within a variational Expectation-Maximization (EM) framework. Instead of relying on limited neighboring nodes, MGM leverages features, structural patterns, and label information from globally similar nodes. Such a framework not only enables GNNs to capture long-range dependencies for learning expressive node representations but also enhances PLMs by integrating structural information and therefore improving the performance of both models. The extensive experiments demonstrate the effectiveness of the proposed framework and achieve new state-of-the-art results. Further, we share our repository which contains the dataset, code, and documentation. @@ -5201,7 +5201,7 @@ RenxiWang XudongHanMohamed bin Zayed University of Artificial Intelligence YixuanZhang - TimothyBaldwinMohamed bin Zayed University of Artificial Intelligence and The University of Melbourne + TimothyBaldwinMohamed bin Zayed University of Artificial Intelligence and The University of Melbourne HaonanLi 7385-7398 Interaction trajectories between agents and environments have proven effective in tuning LLMs into task-specific agents. However, constructing these trajectories, especially successful trajectories, is often computationally and time intensive due to the relatively low success rates of even the most advanced LLMs, such as GPT-4 and Claude. Additionally, common training paradigms like supervised fine-tuning (SFT) and reinforcement learning (RL) not only require large volumes of data but also have specific demands regarding the trajectories used. For instance, existing SFT approaches typically utilize only positive examples, limiting their efficiency in low-resource scenarios. To address this, we introduce Negative-Aware Training (NAT), a straightforward yet effective method that leverages both successful and failed trajectories for fine-tuning, maximizing the utility of limited resources. Experimental results demonstrate that NAT consistently surpasses existing methods, including SFT, DPO, and PPO, across various tasks. @@ -5320,7 +5320,7 @@ Sabine N Van DerVeerUniversity of Manchester LamieceHassan ChenghuaLinUniversity of Manchester - GoranNenadicUniversity of Manchester + GoranNenadicUniversity of Manchester 7548-7561 Topic modelling is a pivotal unsupervised machine learning technique for extracting valuable insights from large document collections. Existing neural topic modelling methods often encode contextual information of documents, while ignoring contextual details of candidate centroid words, leading to the inaccurate selection of topic words due to the *contextualization gap*. In parallel, it is found that functional words are frequently selected over topical words. To address these limitations, we introduce **CAST**: **C**orpus-**A**ware **S**elf-similarity Enhanced **T**opic modelling, a novel topic modelling method that builds upon candidate centroid word embeddings contextualized on the dataset, and a novel self-similarity-based method to filter out less meaningful tokens. Inspired by findings in contrastive learning that self-similarities of functional token embeddings in different contexts are much lower than topical tokens, we find self-similarity to be an effective metric to prevent functional words from acting as candidate topic words. Our approach significantly enhances the coherence and diversity of generated topics, as well as the topic model’s ability to handle noisy data. Experiments on news benchmark datasets and one Twitter dataset demonstrate the method’s superiority in generating coherent, diverse topics, and handling noisy data, outperforming strong baselines. 2025.naacl-long.386 @@ -5330,7 +5330,7 @@ A Zero-Shot Open-Vocabulary Pipeline for Dialogue Understanding AbdulfattahSafa - Gözde GülŞahinKoç University + Gözde GülŞahinKoç University 7562-7579 Dialogue State Tracking (DST) is crucial for understanding user needs and executing appropriate system actions in task-oriented dialogues. Majority of existing DST methods are designed to work within predefined ontologies and assume the availability of gold domain labels, struggling with adapting to new slots values. While Large Language Models (LLMs)-based systems show promising zero-shot DST performance, they either require extensive computational resources or they underperform existing fully-trained systems, limiting their practicality. To address these limitations, we propose a zero-shot, open-vocabulary system that integrates domain classification and DST in a single pipeline. Our approach includes reformulating DST as a question-answering task for less capable models and employing self-refining prompts for more adaptable ones. Our system does not rely on fixed slot values defined in the ontology allowing the system to adapt dynamically. We compare our approach with existing SOTA, and show that it provides up to 20% better Joint Goal Accuracy (JGA) over previous methods on datasets like MultiWOZ 2.1, with up to 90% fewer requests to the LLM API. 2025.naacl-long.387 @@ -5480,7 +5480,7 @@ Functional Lexicon in Subword Tokenization Zachary WilliamHoptonUniversity of Zurich and University of Zurich YvesScherrerUniversity of Oslo - TanjaSamardzicUniversity of Zurich + TanjaSamardzicUniversity of Zurich 7839-7853 The distinction between function and content units of the lexicon has been somewhat neglected in recent NLP work, but it could still be useful when working with low-resource languages, and, in particular, to improve cross-lingual transfer. In this paper, we investigate to what extent BPE subword tokenization can be used to identify units of the functional lexicon in a language without any annotated data. We analyze subword tokens in terms of their productivity and attempt to find thresholds that best distinguish function from content tokens. On a sample of seven diverse languages, we find that the best results are obtained with 50 BPE merges. We also show that this subword tokenization setting can be beneficial for the interlinear glossing task. 2025.naacl-long.398 @@ -5677,7 +5677,7 @@ Through the Lens of History: Methods for Analyzing Temporal Variation in Content and Framing of State-run <fixed-case>C</fixed-case>hinese Newspapers ShijiaLiuNortheastern University - David A.SmithNortheastern University + David A.SmithNortheastern University 8143-8172 State-run Chinese newspapers are believed to strategically select and frame news articles to align with the shifting political tides of the country. This paper describes methods to quantify these changes in content and framing over time. Looking at more than 50 years of articles from the People’s Daily and Reference News, we analyze differences in name mentions and sentiment in news articles for politicians before and after their deaths, as well as during and not during certain political events. We find significant estimates of difference, reflecting the changes in various aspects of the political environment in China during different time periods. We also apply change point detection methods to identify turning points in time series data of name mentions and sentiment. The identified turning points show a high co-occurrence with crucial political events and deaths of politicians. Furthermore, we utilize topic modeling to analyze the framing choices for articles written in different decades. The changes in frequent topic words are more significant in People’s Daily than in Reference News, which is consistent with the focus shifts of the Chinese central government in history. Finally, by using pre-trained language models to predict masked names in news articles, we analyze the distinctiveness of the language used to report individuals. 2025.naacl-long.414 @@ -5725,7 +5725,7 @@ <fixed-case>GL</fixed-case>i<fixed-case>REL</fixed-case> - Generalist Model for Zero-Shot Relation Extraction JackBoylanGeorgia Institute of Technology - ChrisHokampQuantexa + ChrisHokampQuantexa Demian GholipourGhalandariQuantexa 8230-8245 We introduce GLiREL, an efficient architecture and training paradigm for zero-shot relation classification. Identifying relationships between entities is a key task in information extraction pipelines. The zero-shot setting for relation extraction, where a taxonomy of relations is not pre-specified, has proven to be particularly challenging because of the computational complexity of inference, and because of the lack of labeled training data with sufficient coverage. Existing approaches rely upon distant supervision using auxiliary models to generate training data for unseen labels, upon very large general-purpose large language models (LLMs), or upon complex pipelines models with multiple inference stages. Inspired by the recent advancements in zero-shot named entity recognition, this paper introduces an approach to efficiently and accurately predict zero-shot relationship labels between multiple entities in a single forward pass. Experiments using the FewRel and WikiZSL benchmarks demonstrate that our approach achieves state-of-the-art results on the zero-shot relation classification task. In addition, we contribute a protocol for synthetically-generating datasets with diverse relation labels. @@ -5738,7 +5738,7 @@ SachinKumarOhio State University, Columbus Chan YoungPark YuliaTsvetkovDepartment of Computer Science, University of Washington - Noah A.SmithUniversity of Washington and Allen Institute for Artificial Intelligence + Noah A.SmithUniversity of Washington and Allen Institute for Artificial Intelligence HannanehHajishirzi 8246-8279 Conventional algorithms for training language models (LMs) with human feedback rely on preferences that are assumed to account for an “average” user, disregarding subjectivity and finer-grained variations. Recent studies have raised concerns that aggregating such diverse and often contradictory human feedback to finetune models results in generic models that generate outputs not preferred by many user groups, as they tend to average out styles and norms. To address this issue, we draw inspiration from recommendation systems and propose ComPO, a method to personalize preference optimization in LMs by contextualizing the probability distribution of model outputs with the preference provider. Focusing on group-level preferences rather than individuals, we collect and release ComPRed, a question answering dataset with community-level preferences from Reddit. This dataset facilitates studying diversity in preferences without incurring privacy concerns associated with individual feedback. Our experiments reveal that conditioning language models on a community identifier (i.e., subreddit name) during preference tuning substantially enhances model performance. Conversely, replacing this context with random subreddit identifiers significantly diminishes performance, highlighting the effectiveness of our approach in tailoring responses to communities’ preferences. @@ -5812,7 +5812,7 @@ <fixed-case>IHE</fixed-case>val: Evaluating Language Models on Following the Instruction Hierarchy - ZhihanZhang + ZhihanZhang ShiyangLiAmazon ZixuanZhang XinLiuAmazon @@ -5857,7 +5857,7 @@ PhilipSchroeder Nathaniel W.Morgan HongyinLuoMassachusetts Institute of Technology - James R.GlassMassachusetts Institute of Technology + James R.GlassMassachusetts Institute of Technology 8418-8442 Large language models (LLMs) have shown impressive capabilities across diverse settings, but still struggle as the length and complexity of the context increases. To address this challenge, we propose Thinking Recursively and Dynamically (ThReaD). THREAD frames model generation as a thread of execution that, based on the context, can run to completion or dynamically spawn new threads. By spawning, threads can offload work (e.g., thinking, retrieving information) to child threads, which only return tokens needed for the parent thread to do its work. We apply THREAD in the settings of LLM task solving and question answering, where the dynamic threading allows the model to recursively decompose the given task or question into progressively simpler sub-problems that can be solved by separate child threads. We test THREAD, implemented using a few-shot learning approach, on diverse benchmarks for agent tasks and data-grounded question answering. THREAD achieves state-of-the-art performance with GPT-4 and GPT-3.5 on these benchmarks, including ALFWorld, TextCraft, and WebShop, along with two new benchmarks, DataCommons QA and MIMIC-III ICU QA. In addition, THREAD outperforms existing frameworks by 10% to 50% absolute points with smaller models, including Llama-3-8b and CodeLlama-7b. 2025.naacl-long.427 @@ -5868,7 +5868,7 @@ <fixed-case>CORG</fixed-case>: Generating Answers from Complex, Interrelated Contexts HyunjiLeeKorea Advanced Institute of Science & Technology FranckDernoncourt - TrungBuiAdobe Research + TrungBuiAdobe Research SeunghyunYoonAdobe Research 8443-8460 In a real-world corpus, knowledge frequently recurs across documents but often contains inconsistencies due to ambiguous naming, outdated information, or errors, leading to complex interrelationships between contexts. Previous research has shown that language models struggle with these complexities, typically focusing on single factors in isolation. We classify these relationships into four types: distracting, ambiguous, counterfactual, and duplicated. Our analysis reveals that no single approach effectively addresses all these interrelationships simultaneously. Therefore, we introduce Context Organizer (COrg), a framework that organizes multiple contexts into independently processed groups. This design allows the model to efficiently find all relevant answers while ensuring disambiguation. COrg consists of three key components: a graph constructor, a reranker, and an aggregator. Our results demonstrate that COrg balances performance and efficiency effectively, outperforming existing grouping methods and achieving comparable results to more computationally intensive, single-context approaches. @@ -5980,7 +5980,7 @@ JustinQiu ZacharyHorvitz MariannaApidianakiUniversity of Pennsylvania, University of Pennsylvania - KathleenMcKeown + KathleenMcKeown ChrisCallison-BurchUniversity of Pennsylvania and University of Pennsylvania 8662-8685 Style representations aim to embed texts with similar writing styles closely and texts with different styles far apart, regardless of content. However, the contrastive triplets often used for training these representations may vary in both style and content, leading to potential content leakage in the representations. We introduce StyleDistance, a novel approach to training stronger content-independent style embeddings. We use a large language model to create a synthetic dataset of near-exact paraphrases with controlled style variations, and produce positive and negative examples across 40 distinct style features for precise contrastive learning. We assess the quality of our synthetic data and embeddings through human and automatic evaluations. StyleDistance enhances the content-independence of style embeddings, which generalize to real-world benchmarks and outperform leading style representations in downstream applications. @@ -6027,7 +6027,7 @@ JingfengYangAmazon MingyuZhao ZhaoyuZhangAmazon - QinLuAmazon + QinLuAmazon KaiwenMen NingXieAmazon HuashengLi @@ -6070,7 +6070,7 @@ DavidWanDepartment of Computer Science, University of North Carolina at Chapel Hill JesseVigSalesforce Research MohitBansalUniversity of North Carolina at Chapel Hill - ShafiqJotySalesForce.com and Nanyang Technological University + ShafiqJotySalesForce.com and Nanyang Technological University 8791-8810 Large Language Models (LLMs) often exhibit positional bias in long-context settings, under-attending to information in the middle of inputs. We investigate the presence of this bias in long-form summarization, its impact on faithfulness, and various techniques to mitigate this bias. To consistently evaluate faithfulness, we first compile a benchmark of eight human-annotated long-form summarization datasets and perform a meta-evaluation of faithfulness metrics. We show that LLM-based faithfulness metrics, though effective with full-context inputs, remain sensitive to document order, indicating positional bias. Analyzing LLM-generated summaries across six datasets, we find a “U-shaped” trend in faithfulness, where LLMs faithfully summarize the beginning and end of documents but neglect middle content. Perturbing document order similarly reveals models are less faithful when important documents are placed in the middle of the input. We find that this behavior is partly due to shifting focus with context length: as context increases, summaries become less faithful, but beyond a certain length, faithfulness improves as the model focuses on the end. Finally, we experiment with different generation techniques to reduce positional bias and find that prompting techniques effectively direct model attention to specific positions, whereas more sophisticated approaches offer limited improvements. Our data and code will be publicly available. 2025.naacl-long.442 @@ -6119,7 +6119,7 @@ Kill two birds with one stone: generalized and robust <fixed-case>AI</fixed-case>-generated text detection via dynamic perturbations YinghanZhou - JuanWenChina Agricultural University + JuanWenChina Agricultural University WanliPengChina Agricultural University XueYimingChina Agricultural University ZiWeiZhang @@ -6137,7 +6137,7 @@ FangzhiXuXi’an Jiaotong University JianbingZhangNanjing University HaoZhou - YangLiu + YangLiu 8876-8892 Chain-of-thought (CoT) has proven to improve the reasoning capability of large language models (LLMs). However, due to the complexity of multimodal scenarios and the difficulty in collecting high-quality CoT data, CoT reasoning in multimodal LLMs has been largely overlooked. To this end, we propose a simple yet effective self-training framework, R^3V, which iteratively enhances the model’s Vision-language Reasoning by Reflecting on CoT Rationales. Our framework consists of two interleaved parts: (1) iteratively bootstrapping positive and negative solutions for reasoning datasets, and (2) reflection on rationale for learning from mistakes. Specifically, we introduce the self-refine and self-select losses, enabling the model to refine flawed rationale and derive the correct answer by comparing rationale candidates. Experiments on a wide range of vision-language tasks show that R^3V consistently improves multimodal LLM reasoning, achieving a relative improvement of 23% to 60% over GPT-distilled baselines. Additionally, our approach supports self-reflection on generated solutions, further boosting performance through test-time computation. Our code is available at https://github.com/njucckevin/MM-Self-Improve. 2025.naacl-long.447 @@ -6272,7 +6272,7 @@ AtharvaNaik MarcusAlenius DanielFriedMeta AI and Carnegie Mellon University - CarolynRoseSchool of Computer Science, Carnegie Mellon University + CarolynRoseSchool of Computer Science, Carnegie Mellon University 9049-9076 The task of automated code review has recently gained a lot of attention from the machine learning community. However, current review comment evaluation metrics rely on comparisons with a human-written reference for a given code change (also called a diff ). Furthermore, code review is a one-to-many problem, like generation and summarization, with many “valid reviews” for a diff. Thus, we develop CRScore — a reference-free metric to measure dimensions of review quality like conciseness, comprehensiveness, and relevance. We design CRScore to evaluate reviews in a way that is grounded in claims and potential issues detected in the code by LLMs and static analyzers. We demonstrate that CRScore can produce valid, fine-grained scores of review quality that have the greatest alignment with human judgment among open-source metrics (0.54 Spearman correlation) and are more sensitive than reference-based metrics. We also release a corpus of 2.9k human-annotated review quality scores for machine-generated and GitHub review comments to support the development of automated metrics. 2025.naacl-long.457 @@ -6375,7 +6375,7 @@ Viet ThanhPhamMonash University ZhuangLiRoyal Melbourne Institute of Technology LizhenQuMonash University - GholamrezaHaffariMonash University, Monash University and Monash University + GholamrezaHaffariMonash University, Monash University and Monash University 9207-9228 Large language models, despite their remarkable success in recent years, still exhibit severe cultural bias. Therefore, in this paper, we introduce CultureInstruct, a large-scale instruction-tuning dataset designed to reduce cultural bias in LLMs. CultureInstruct is constructed with an automatic pipeline, utilizing public web sources and a specialized LLM to generate instruction. Our data comprises 430K instructions, ranging from classic NLP tasks to complex reasoning. CultureInstruct also covers 11 most relevant topics to cultural knowledge, making it highly diverse. Our experiments show that fine-tuning LLMs with CultureInstruct results in consistent improvements across three types of cultural benchmarks, including (i) general cultural knowledge, (ii) human opinions and values, and (iii) linguistic cultural bias. Our best model, Qwen2-Instruct 72B + CultureInstruct, outperforms GPT-4o Mini and GPT-4o with 18.47% and 13.07% average relative improvements on cultural benchmarks. 2025.naacl-long.465 @@ -6387,7 +6387,7 @@ LovishMadaanMeta and University College London, University of London DavidEsiobuFacebook PontusStenetorpUniversity College London - BarbaraPlankLudwig-Maximilians-Universität München + BarbaraPlankLudwig-Maximilians-Universität München DieuwkeHupkesFacebook 9229-9242 In the recent past, a popular way of evaluating natural language understanding (NLU), was to consider a model’s ability to perform natural language inference (NLI) tasks. In this paper, we investigate if NLI tasks, that are rarely used for LLM evaluation, can still be informative for evaluating LLMs. Focusing on five different NLI benchmarks across six models of different scales, we investigate if they are able to discriminate models of different size and quality and how their accuracies develop during training. Furthermore, we investigate the extent to which the softmax distributions of models align with human distributions in cases where statements are ambiguous or vague. Overall, our results paint a positive picture for the NLI tasks: we find that they are able to discriminate well between models at various stages of training, yet are not (all) saturated. Furthermore, we find that while the similarity of model distributions with human label distributions increases with scale, it is still much higher than the similarity between two populations of humans, making it a potentially interesting statistic to consider. @@ -6446,7 +6446,7 @@ HaoYangMonash University LizhenQuMonash University EhsanShareghiMonash University - GholamrezaHaffariMonash University, Monash University and Monash University + GholamrezaHaffariMonash University, Monash University and Monash University 9292-9306 Large Multimodal Models (LMMs) have demonstrated the ability to interact with humans under real-world conditions by combining Large Language Models (LLMs) and modality encoders to align multimodal information (visual and auditory) with text. However, such models raise new safety challenges of whether models that are safety-aligned on text also exhibit consistent safeguards for multimodal inputs. Despite recent safety-alignment research on vision LMMs, the safety of audio LMMs remains under-explored. In this work, we comprehensively red team the safety of five advanced audio LMMs under three settings: (i) harmful questions in both audio and text formats, (ii) harmful questions in text format accompanied by distracting non-speech audio, and (iii) speech-specific jailbreaks. Our results under these settings demonstrate that open-source audio LMMs suffer an average attack success rate of 69.14% on harmful audio questions, and exhibit safety vulnerabilities when distracted with non-speech audio noise. Our speech-specific jailbreaks on Gemini-1.5-Pro achieve an attack success rate of 70.67% on the harmful query benchmark. We provide insights on what could cause these reported safety-misalignments. Warning: this paper contains offensive examples. 2025.naacl-long.470 @@ -6483,7 +6483,7 @@ SrijaMukhopadhyay AbhishekRajgaria PreranaKhatiwada - ManishShrivastavaInternational Institute of Information Technology Hyderabad, India + ManishShrivastavaInternational Institute of Information Technology Hyderabad, India DanRoth VivekGuptaArizona State University 9348-9378 @@ -6497,7 +6497,7 @@ MinXiao JunnanZhuInstitute of automation, Chinese academy of science, Chinese Academy of Sciences FeifeiZhaiInstitute of automation, Chinese academy of science, Chinese Academy of Sciences - ChengqingZongInstitute of automation, Chinese academy of science, Chinese Academy of Sciences + ChengqingZongInstitute of automation, Chinese academy of science, Chinese Academy of Sciences YuZhouInstitute of Automation, Chinese Academy of Sciences 9379-9392 Existing multimodal summarization approaches struggle with scenarios involving numerous images as input, leading to a heavy load for readers. Summarizing both the input text and numerous images helps readers quickly grasp the key points of multimodal input. This paper introduces a novel task, Numerous Images-Oriented Multimodal Summarization (NIMMS). To benchmark this task, we first construct the dataset based on a public multimodal summarization dataset. Considering that most existing metrics evaluate summaries from a unimodal perspective, we propose a new Multimodal Information evaluation (M-info) method, measuring the differences between the generated summary and the multimodal input. Finally, we compare various summarization methods on NIMMS and analyze associated challenges. Experimental results have shown that M-info correlates more closely with human judgments than five widely used metrics. Meanwhile, existing models struggle with summarizing numerous images. We hope that this research will shed light on the development of multimodal summarization. Furthermore, our code and dataset will be released to the public. @@ -6526,7 +6526,7 @@ BingzhengGanHuawei Technologies Ltd. YufanZhaoHuawei International Pte. Ltd. TianyiZhang - JingHuangHuawei Technologies Ltd. + JingHuangHuawei Technologies Ltd. LiYusu Shu XianTeo ChangwangZhangCCF Theoretical Computer Science Technical Committee and OPPO Research Institute @@ -6640,7 +6640,7 @@ ChaoqunLiu WenxuanZhangSingapore University of Technology and Design YiranZhaoNational University of Singapore - Anh TuanLuuNanyang Technological University + Anh TuanLuuNanyang Technological University LidongBingShanda Group and Alibaba Group 9594-9614 Large language models (LLMs) have demonstrated multilingual capabilities, yet they are mostly English-centric due to the imbalanced training corpora. While prior works have leveraged this bias to enhance multilingual performance through translation, they have been largely limited to natural language processing (NLP) tasks. In this work, we extend the evaluation to real-world user queries and non-English-centric LLMs, offering a broader examination of multilingual performance. Our key contribution lies in demonstrating that while translation into English can boost the performance of English-centric LLMs on NLP tasks, it is not universally optimal. For culture-related tasks that need deep language understanding, prompting in the native language proves more effective as it better captures the nuances of culture and language. Our experiments expose varied behaviors across LLMs and tasks in the multilingual context, underscoring the need for a more comprehensive approach to multilingual evaluation. Therefore, we call for greater efforts in developing and evaluating LLMs that go beyond English-centric paradigms. @@ -6713,7 +6713,7 @@ Grounding Fallacies Misrepresenting Scientific Publications in Evidence MaxGlocknerTechnische Universität Darmstadt YufangHouIT:U Interdisciplinary Transformation University Austria, Technische Universität Darmstadt and IBM Research Ireland - PreslavNakovMohamed bin Zayed University of Artificial Intelligence + PreslavNakovMohamed bin Zayed University of Artificial Intelligence IrynaGurevychInstitute for Computer Science, Artificial Intelligence and Technology, Mohamed bin Zayed University of Artificial Intelligence and Technische Universität Darmstadt 9732-9767 Health-related misinformation claims often falsely cite a credible biomedical publication as evidence. These publications only superficially seem to support the false claim, when logical fallacies are applied. In this work, we aim to detect and to highlight such fallacies, which requires assessing the exact content of the misrepresented publications. To achieve this, we introduce MissciPlus, an extension of the fallacy detection dataset Missci. MissciPlus extends Missci by grounding the applied fallacies in real-world passages from misrepresented studies. This creates a realistic test-bed for detecting and verbalizing fallacies under real-world input conditions, and enables new and realistic passage-retrieval tasks. MissciPlus is the first logical fallacy dataset which pairs the real-world misrepresented evidence with incorrect claims, identical to the input to evidence-based fact-checking models. With MissciPlus, we i) benchmark retrieval models in identifying passages that support claims only with fallacious reasoning, ii) evaluate how well LLMs verbalize fallacious reasoning based on misrepresented scientific passages, and iii) assess the effectiveness of fact-checking models in refuting claims that misrepresent biomedical research. Our findings show that current fact-checking models struggle to use misrepresented scientific passages to refute misinformation. Moreover, these passages can mislead LLMs into accepting false claims as true. @@ -6761,7 +6761,7 @@ Grammar Control in Dialogue Response Generation for Language Learning Chatbots DominikGlandorfEPFL - EPF Lausanne PengCuiETHZ - ETH Zurich - DetmarMeurersEberhard-Karls-Universität Tübingen + DetmarMeurersEberhard-Karls-Universität Tübingen MrinmayaSachanSwiss Federal Institute of Technology 9820-9839 Chatbots based on large language models offer cheap conversation practice opportunities for language learners. However, they are hard to control for linguistic forms that correspond to learners’ current needs, such as grammar. We control grammar in chatbot conversation practice by grounding a dialogue response generation model in a pedagogical repository of grammar skills. We also explore how this control helps learners to produce specific grammar. We comprehensively evaluate prompting, fine-tuning, and decoding strategies for grammar-controlled dialogue response generation. Strategically decoding Llama3 outperforms GPT-3.5 when tolerating minor response quality losses. Our simulation predicts grammar-controlled responses to support grammar acquisition adapted to learner proficiency. Existing language learning chatbots and research on second language acquisition benefit from these affordances. Code available on GitHub. @@ -6817,7 +6817,7 @@ LiangZhu RuiWangInternational Digital Economy Academy, International Digital Economy Academy XiWangUniversity of Sheffield - GholamrezaHaffariMonash University, Monash University and Monash University + GholamrezaHaffariMonash University, Monash University and Monash University JiaxingZhangIDEA 9902-9921 Long-term memory is important for chatbots and dialogue systems (DS) to create consistent and human-like conversations, evidenced by numerous developed memory-augmented DS (MADS). To evaluate the effectiveness of such MADS, existing commonly used evaluation metrics, like retrieval accuracy and perplexity (PPL), mainly focus on query-oriented factualness and language quality assessment. However, these metrics often lack practical value. Moreover, the evaluation dimensions are insufficient for human-like assessment in DS. Regarding memory-recalling paradigms, current evaluation schemes only consider passive memory retrieval while ignoring diverse memory recall with rich triggering factors, e.g., emotions and surroundings, which can be essential in emotional support scenarios. To bridge the gap, we construct a novel Memory-Augmented Dialogue Benchmark (MADail-Bench) covering various memory-recalling paradigms based on cognitive science and psychology theories. The benchmark assesses two tasks separately: memory retrieval and memory recognition with the incorporation of both passive and proactive memory recall data. We introduce new scoring criteria to the evaluation, including memory injection, emotion support (ES) proficiency, and intimacy, to comprehensively assess generated responses. Results from cutting-edge embedding models and large language models on this benchmark indicate the potential for further advancement. Extensive testing further reveals correlations between memory injection, ES proficiency, and intimacy. @@ -7009,7 +7009,7 @@ JieHe YijunYangEdinburgh University, University of Edinburgh WanqiuLong - DeyiXiongTianjin University + DeyiXiongTianjin University VictorGutierrez BasultoCardiff University Jeff Z.PanUniversity of Edinburgh, University of Edinburgh 10219-10244 @@ -7111,7 +7111,7 @@ A Grounded Typology of Word Classes ColemanHaleyUniversity of Edinburgh - SharonGoldwaterUniversity of Edinburgh + SharonGoldwaterUniversity of Edinburgh EdoardoPontiUniversity of Edinburgh 10380-10399 In this work, we propose a grounded approach to meaning in language typology. Using images captioned across languages, we can treat the images as an empirical language agnostic representation of meaning, allowing the quantification of language function and semantics. Using principles from information theory, we define “groundedness”, an empirical measure of contextual semantic contentfulness which can be computed using multilingual (vision-and-)language models. As an initial application, we apply this measure to the typology of word classes. We find our measure captures the contentfulness asymmetry between functional (grammatical) and lexical (content) classes across languages, but contradicts the view that functional classes do not convey content. We release a dataset of groundedness scores for 30 languages. Our results suggest that the grounded typology approach can provide quantitative evidence about semantic function in language. @@ -7152,7 +7152,7 @@ JunyoungSungKorea University WonpyoParkGoogle ChanjunParkKorea University - Paul HongsuckSeoKorea University + Paul HongsuckSeoKorea University 10431-10442 While large language models (LLMs) excel in generating coherent and contextually rich outputs, their capacity to efficiently handle long-form contexts is limited by fixed-length position embeddings. Additionally, the computational cost of processing long sequences increases quadratically, making it challenging to extend context length. To address these challenges, we propose Long-form Context Injection with Recurrent Compression (LCIRC), a method that enables the efficient processing long-form sequences beyond the model’s length limit through recurrent compression without retraining the entire model. We further introduce query dependent context modeling, which selectively compresses query-relevant information, ensuring that the model retains the most pertinent content. Our empirical results demonstrate that Query Dependent LCIRC (QD-LCIRC) significantly improves LLM’s ability to manage extended contexts, making it well-suited for tasks that require both comprehensive context understanding and query relevance. 2025.naacl-long.524 @@ -7163,7 +7163,7 @@ A Template Is All You Meme LukeBatesTechnische Universität Darmstadt Peter EbertChristensen - PreslavNakovMohamed bin Zayed University of Artificial Intelligence + PreslavNakovMohamed bin Zayed University of Artificial Intelligence IrynaGurevychInstitute for Computer Science, Artificial Intelligence and Technology, Mohamed bin Zayed University of Artificial Intelligence and Technische Universität Darmstadt 10443-10475 Templatic memes, characterized by a semantic structure adaptable to the creator’s intent, represent a significant yet underexplored area within meme processing literature. With the goal of establishing a new direction for computational meme analysis, here we create a knowledge base composed of more than 5,200 meme templates, information about them, and 54,000 examples of template instances (templatic memes). To investigate the semantic signal of meme templates, we show that we can match memes in datasets to base templates contained in our knowledge base with a distance-based lookup. To demonstrate the power of meme templates, we create TSplit, a method to reorganize datasets, where a template or templatic instance can only appear in either the training or test split. Our re-split datasets enhance general meme knowledge and improve sample efficiency, leading to more robust models. Our examination of meme templates results in state-of-the-art performance for every dataset we consider, paving the way for analysis grounded in templateness. @@ -7209,7 +7209,7 @@ Evaluating Defeasible Reasoning in <fixed-case>LLM</fixed-case>s with <fixed-case>DEFREASING</fixed-case> EmilyAllawayUniversity of Edinburgh - KathleenMcKeown + KathleenMcKeown 10540-10558 2025.naacl-long.529 allaway-mckeown-2025-evaluating @@ -7258,7 +7258,7 @@ XunliangCaiMeituan TaoGuiFudan University QiZhangFudan University - XuanjingHuangFudan University + XuanjingHuangFudan University 10627-10646 Self-improvement methods enable large language models (LLMs) to generate solutions themselves and iteratively train on filtered, high-quality rationales. This process proves effective and reduces the reliance on human supervision in LLMs’ reasoning, but the performance soon plateaus. We delve into the process and find that models tend to over-sample on easy queries and under-sample on queries they have yet to master. As iterations proceed, this imbalance in sampling is exacerbated, leading to a long-tail distribution where solutions to difficult queries almost diminish. This phenomenon limits the performance gain of self-improving models. A straightforward solution is brute-force sampling to balance the distribution, which significantly raises computational costs. In this paper, we introduce Guided Self-Improvement (GSI), a strategy aimed at improving the efficiency of sampling challenging heavy-tailed data. It leverages Socratic-style guidance signals to help LLM reasoning with complex queries, reducing the exploration effort and minimizing computational overhead. Experiments on four models across diverse mathematical tasks show that GSI strikes a balance between performance and efficiency, while also being effective on held-out tasks. 2025.naacl-long.533 @@ -7329,7 +7329,7 @@ Main Predicate and Their Arguments as Explanation Signals For Intent Classification SameerPimparkhede - PushpakBhattacharyyaIndian Institute of Technology, Bombay, Dhirubhai Ambani Institute Of Information and Communication Technology + PushpakBhattacharyyaIndian Institute of Technology, Bombay, Dhirubhai Ambani Institute Of Information and Communication Technology 10778-10789 Intent classification is crucial for conversational agents (chatbots), and deep learning models perform well in this area. However, little research has been done on the explainability of intent classification due to the absence of suitable benchmark data. Human annotation of explanation signals in text samples is time-consuming and costly. However, from inspection of data on intent classification, we see that, more often than not, the main verb denotes the action, and the direct object indicates the domain of conversation, serving as explanation signals for intent. This observation enables us to hypothesize that the main predicate in the text utterances, along with the arguments of the main predicate, can serve as explanation signals. Leveraging this, we introduce a new technique to automatically augment text samples from intent classification datasets with word-level explanations. We mark main predicates (primarily verbs) and their arguments (dependency relations) as explanation signals in benchmark intent classification datasets ATIS and SNIPS, creating a unique 21k-instance dataset for explainability. Further, we experiment with deep learning and language models. We observe that models that work well for classification do not perform well in explainability metrics like plausibility and faithfulness. We also observe that guiding models to focus on explanation signals from our dataset during training improves the plausibility Token F1 score by 3-4%, improving the model’s reasoning. 2025.naacl-long.539 @@ -7473,7 +7473,7 @@ Fine-Grained Transfer Learning for Harmful Content Detection through Label-Specific Soft Prompt Tuning FaezeGhorbanpour ViktorHangyaFraunhofer IIS - AlexanderFraserTechnical University of Munich + AlexanderFraserTechnical University of Munich 11047-11061 The spread of harmful content online is a dynamic issue evolving over time. Existing detection models, reliant on static data, are becoming less effective and generalizable. Developing new models requires sufficient up-to-date data, which is challenging. A potential solution is to combine existing datasets with minimal new data. However, detection tasks vary—some focus on hate speech, offensive, or abusive content, which differ in the intent to harm, while others focus on identifying targets of harmful speech such as racism, sexism, etc.—raising the challenge of handling nuanced class differences. To address these issues, we introduce a novel transfer learning method that leverages class-specific knowledge to enhance harmful content detection. In our approach, we first present label-specific soft prompt tuning, which captures and represents class-level information. Secondly, we propose two approaches to transfer this fine-grained knowledge from source (existing tasks) to target (unseen and new tasks): initializing the target task prompts from source prompts and using an attention mechanism that learns and adjusts attention scores to utilize the most relevant information from source prompts. Experiments demonstrate significant improvements in harmful content detection across English and German datasets, highlighting the effectiveness of label-specific representations and knowledge transfer. 2025.naacl-long.551 @@ -7517,7 +7517,7 @@ Latent Factor Models Meets Instructions: Goal-conditioned Latent Factor Discovery without Task Supervision ZhouhangXieUniversity of California, San Diego TusharKhotAllen Institute for Artificial Intelligence - BhavanaDalvi MishraAllen Institute for Artificial Intelligence + BhavanaDalvi MishraAllen Institute for Artificial Intelligence HarshitSuranaAllen Institute for Artificial Intelligence JulianMcAuleyUniversity of California, San Diego, University of California, San Diego PeterClarkAllen Institute for Artificial Intelligence @@ -7547,7 +7547,7 @@ Jena D.HwangAllen Institute for Artificial Intelligence XiangRenUniversity of Southern California NouhaDziri - DanJurafskyStanford University + DanJurafskyStanford University MaartenSapCarnegie Mellon University 11148-11167 The ability to communicate uncertainty and knowledge limitations is crucial for the safety of large language models (LLMs). Current evaluations of these abilities typically examine the correspondence between model accuracy and its internal probabilities or linguistic outputs. However, evaluation of the uncertainty of LLM communication should also focus on the behaviors of their human interlocutors: how much do users rely on what the LLM says? We introduce an interaction-centered evaluation approach called Rel-A.I. (pronounced “rely”) that quantifies whether and how humans rely on LLMs’ responses, complementing existing calibration evaluations. Through nine user studies with 450 participants, we investigate three crucial aspects that influence user reliance. We show that emphatic expressions of politeness (e.g., “I’m happy to help!”) that precede LLM answers will cause participants to perceive these models as more competent, and in turn, rely 30% more on their generations. Additionally, the context of the interaction, such as the knowledge domain and nature of previous interactions with the LLM, substantially influences user reliance (e.g., users will rely 10% more on LLMs when responding to questions involving calculations). Our results show that calibration and language quality alone are insufficient in informing which LLMs are safely calibrated, and illustrate the need to consider features of the interactional context. @@ -7559,7 +7559,7 @@ Eliciting Critical Reasoning in Retrieval-Augmented Generation via Contrastive Explanations LeonardoRanaldi MarcoValentinoUniversity of Sheffield - AndreFreitasIdiap Research Institute and University of Manchester + AndreFreitasIdiap Research Institute and University of Manchester 11168-11183 Retrieval-augmented generation (RAG) have emerged as a critical mechanism in contemporary NLP to support Large Language Models (LLMs) in systematically accessing richer factual context. However, the integration of RAG mechanisms bring its inherent challenges, as LLMs need to integrate potentially noisy contexts. Recent studies have shown that LLMs still struggle to critically analyse RAG-based in-context information, a limitation that may lead to incorrect inferences and hallucinations. In this paper, we investigate how to elicit critical arguments in RAG via contrastive explanations. In particular, we propose Contrastive-RAG (CRAG), a framework that (i) retrieves relevant documents given a query,(ii) selects and exemplifies relevant passages, and (iii) generates explanations that explicitly contrast the relevance of the passages to (iv) support the final answer. We show the impact of C-RAG building contrastive reasoning demonstrations from LLMs to instruct smaller models for retrieval-augmented tasks. Extensive experiments demonstrate that CRAG improves state-of-the-art RAG models while (a) requiring significantly fewer prompts and demonstrations and (b) being robust to perturbations in the retrieved documents. 2025.naacl-long.557 @@ -7668,7 +7668,7 @@ <fixed-case>M</fixed-case>ulti<fixed-case>C</fixed-case>hart<fixed-case>QA</fixed-case>: Benchmarking Vision-Language Models on Multi-Chart Problems ZifengZhu MengzhaoJia - ZhihanZhang + ZhihanZhang LangLi MengJiangUniversity of Notre Dame 11341-11359 @@ -7680,7 +7680,7 @@ It Is Not Only the Negative that Deserves Attention! Understanding, Generation & Evaluation of (Positive) Moderation ImanJundi - Eva MariaVecchiUniversity of Stuttgart, Universität Stuttgart + Eva MariaVecchiUniversity of Stuttgart, Universität Stuttgart CarlottaQuensel NeeleFalk GabriellaLapesaGESIS – Leibniz Institute for the Social Sciences and Heinrich-Heine University Düsseldorf @@ -7696,8 +7696,8 @@ KhushangZaveri ShreyaHavaldarUniversity of Pennsylvania SoumnaNema - LyleUngar - Sharath ChandraGuntukuUniversity of Pennsylvania + LyleUngar + Sharath ChandraGuntukuUniversity of Pennsylvania 11396-11415 Shame and pride are social emotions expressed across cultures to motivate and regulate people’s thoughts, feelings, and behaviors. In this paper, we introduce the first cross-cultural dataset of over 10k shame/pride-related expressions with underlying social expectations from ~5.4K Bollywood and Hollywood movies. We examine *how* and *why* shame and pride are expressed across cultures using a blend of psychology-informed language analysis combined with large language models. We find significant cross-cultural differences in shame and pride expression aligning with known cultural tendencies of the USA and India – e.g., in Hollywood, shame-expressions predominantly discuss *self* whereas shame is expressed toward *others* in Bollywood. Women are more sanctioned across cultures and for violating similar social expectations. 2025.naacl-long.568 @@ -7722,7 +7722,7 @@ m<fixed-case>H</fixed-case>uman<fixed-case>E</fixed-case>val - A Multilingual Benchmark to Evaluate Large Language Models for Code Generation - NishatRaihan + NishatRaihan AntoniosAnastasopoulos MarcosZampieri 11432-11461 @@ -7765,7 +7765,7 @@ YundiQianFacebook XueweiWangFacebook SuchinGururanganFacebook and University of Washington, Seattle - ChaoZhangGeorgia Institute of Technology + ChaoZhangGeorgia Institute of Technology MelanieKambadurFacebook DhruvMahajanMeta AI RuiHouMeta Inc. @@ -7877,7 +7877,7 @@ AriadniChristopoulouVerita International School FilipposStamatiouCopenhagen University and University of Stellenbosch AnneLauscherUniversität Hamburg - AndersSøgaardCopenhagen University + AndersSøgaardCopenhagen University 11618-11635 What ethical concerns, if any, do LLM researchers have? We introduce EthiCon, a corpus of 1,580 ethical concern statements extracted from scientific papers published in the ACL Anthology. We extract ethical concern keywords from the statements and show promising results in automating the concern identification process. Through a survey (N=200), we compare the ethical concerns of the corpus to the concerns listed by the general public and professionals in the field. Finally, we compare our retrieved ethical concerns with existing taxonomies and guidelines pointing to gaps and actionable insights. 2025.naacl-long.580 @@ -7954,7 +7954,7 @@ Is In-Context Learning a Type of Error-Driven Learning? Evidence from the Inverse Frequency Effect in Structural Priming ZhenghaoZhouYale University RobertFrankYale University - R. ThomasMcCoyYale University + R. ThomasMcCoyYale University 11712-11725 Large language models (LLMs) have shown the emergent capability of in-context learning (ICL). One line of research has claimed that ICL is functionally equivalent to gradient descent, a type of error-driven learning mechanism. In this paper, we introduce a new way of diagnosing whether ICL is functionally performing error-driven learning. Our approach is based on the inverse frequency effect (IFE)—a phenomenon in which an agent’s behavior is influenced to a greater degree when presented with improbable examples as compared to more likely ones. The IFE has previously been identified in psycholinguistics where humans exhibit the IFE in the context of structural priming (the tendency for people to produce sentence structures they have encountered recently). In that context, the IFE has been used as evidence that human structural priming must involve error-driven learning mechanisms. In our experiments, we simulated structural priming with ICL and found that LLMs indeed display the IFE, with the effect being stronger in larger models. We conclude that at least in the case we studied, ICL is indeed a type of error-driven learning, supporting the hypothesis that an error signal is implicitly computed in the forward pass during ICL. Our results suggest that both humans and LLMs make use of error-driven processing mechanisms in on-line processing. 2025.naacl-long.586 @@ -8143,7 +8143,7 @@ KhyatiMahajanServiceNow Inc VikasYadav JulianSalazarGoogle DeepMind - Philip S.YuUniversity of Illinois, Chicago + Philip S.YuUniversity of Illinois, Chicago MasoudHashemiServiceNow Inc RishabhMaheshwaryServiceNow 11975-11994 @@ -8157,7 +8157,7 @@ MargaretMitchell GiuseppeAttanasioInstituto de Telecomunicações IoanaBaldiniBloomberg - MirunaClinciu + MirunaClinciu JordanCliveChattermill PieterDelobelle MananDeySalesForce.com @@ -8176,7 +8176,7 @@ NikitaNangia AnaeliaOvalleUniversity of California, Los Angeles GiadaPistilliSorbonne University - DragomirRadevYale University + DragomirRadevYale University BeatriceSavoldi VipulRahejaColumbia University, Grammarly and International Institute of Information Technology Hyderabad JeremyQinUniversité de Montréal @@ -8204,10 +8204,10 @@ MarceloViridiano OskarVan Der Wal AdinaYakefu - AurélieNévéolLISN-CNRS / Université Paris Saclay + AurélieNévéolLISN-CNRS / Université Paris Saclay MikeZhang SydneyZinkKBR - ZeerakTalatUniversity of Edinburgh, University of Edinburgh + ZeerakTalatUniversity of Edinburgh, University of Edinburgh 11995-12041 Large Language Models (LLMs) reproduce and exacerbate the social biases present in their training data, and resources to quantify this issue are limited. While research has attempted to identify and mitigate such biases, most efforts have been concentrated around English, lagging the rapid advancement of LLMs in multilingual settings. In this paper, we introduce a new multilingual parallel dataset SHADES to help address this issue, designed for examining culturally-specific stereotypes that may be learned by LLMs. The dataset includes stereotypes from 20 regions around the world and 16 languages, spanning multiple identity categories subject to discrimination worldwide. We demonstrate its utility in a series of exploratory evaluations for both “base” and “instruction-tuned” language models. Our results suggest that stereotypes are consistently reflected across models and languages, with some languages and models indicating much stronger stereotype biases than others. 2025.naacl-long.600 @@ -8289,7 +8289,7 @@ EmilyXiao UriAlonGoogle DeepMind JonathanBerantGoogle and Tel Aviv University - Matthew R.GormleySolventum and School of Computer Science, Carnegie Mellon University + Matthew R.GormleySolventum and School of Computer Science, Carnegie Mellon University GrahamNeubigCarnegie Mellon University 12119-12149 As model context lengths continue to increase, the number of demonstrations that can be provided in-context approaches the size of entire training datasets. We study the behavior of in-context learning (ICL) at this extreme scale on multiple datasets and models. We show that, for many datasets with large label spaces, performance continues to increase with thousands of demonstrations. We contrast this with example retrieval and finetuning: example retrieval shows excellent performance at low context lengths but has diminished gains with more demonstrations; finetuning is more data hungry than ICL but can exceed long-context ICL performance with additional data. We use the ICL setting to study several properties of both in-context learning and long-context models. We show that long-context ICL is less sensitive to random input shuffling than short-context ICL, that grouping of same-label examples negatively impacts performance, and that the performance boosts do not arise from cumulative gain from encoding many examples together. We conclude that long-context ICL can be an effective tool, and may not require long-context attention for encoding the demonstration set at all. @@ -8325,7 +8325,7 @@ Automatically Discovering How Misogyny is Framed on Social Media Rakshitha RaoAilneniUniversity of Texas at Dallas - Sanda M.HarabagiuUniversity of Texas at Dallas + Sanda M.HarabagiuUniversity of Texas at Dallas 12189-12208 Misogyny, which is widespread on social media, can be identified not only by recognizing its many forms but also by discovering how misogyny is framed. This paper considers the automatic discovery of misogyny problems and their frames through the Dis-MP&F method, which enables the generation of a data-driven, rich Taxonomy of Misogyny (ToM), offering new insights in the complexity of expressions of misogyny. Furthermore, the Dis-MP&F method, informed by the ToM, is capable of producing very promising results on a misogyny benchmark dataset. 2025.naacl-long.608 @@ -8356,11 +8356,11 @@ <fixed-case>R</fixed-case>e<fixed-case>IFE</fixed-case>: Re-evaluating Instruction-Following Evaluation YixinLiuYale University KejianShi - AlexanderFabbriSalesForce.com + AlexanderFabbriSalesForce.com YilunZhaoYale University PeiFengWangSalesforce AI Chien-ShengWuSalesforce AI - ShafiqJotySalesForce.com and Nanyang Technological University + ShafiqJotySalesForce.com and Nanyang Technological University ArmanCohanYale University and Allen Institute for Artificial Intelligence 12247-12287 The automatic evaluation of instruction following typically involves using large language models (LLMs) to assess response quality. However, there is a lack of comprehensive evaluation of these LLM-based evaluators across two dimensions: the base LLMs and the evaluation protocols. Therefore, we present a thorough meta-evaluation of instruction following, including 25 base LLMs and 15 recently proposed evaluation protocols, on 4 human-annotated datasets, assessing the evaluation accuracy of the LLM-evaluators. Our evaluation allows us to identify the best-performing base LLMs and evaluation protocols with a high degree of robustness. Moreover, our evaluation reveals key findings: (1) Base LLM performance ranking remains largely consistent across evaluation protocols, with less capable LLMs showing greater improvement from protocol enhancements; (2) Robust evaluation of evaluation protocols requires many base LLMs with varying capability levels, as protocol effectiveness depends on the base LLM used; (3) Evaluation results on different datasets are not always consistent, so a rigorous evaluation requires multiple datasets with distinctive features. We release our meta-evaluation suite ReIFE, which provides the codebase and evaluation result collection for over 500 LLM-evaluators, laying groundwork for future research in instruction-following evaluation. @@ -8420,7 +8420,7 @@ <fixed-case>MICE</fixed-case> for <fixed-case>CAT</fixed-case>s: Model-Internal Confidence Estimation for Calibrating Agents with Tools NishantSubramaniCarnegie Mellon University - JasonEisnerMicrosoft and Johns Hopkins University + JasonEisnerMicrosoft and Johns Hopkins University JustinSvegliatoUniversity of California, Berkeley and Microsoft BenjaminVan DurmeMicrosoft and Johns Hopkins University YuSuOhio State University @@ -8474,7 +8474,7 @@ SreyanGhosh UtkarshTyagi Anton JeranRatnarajah - Chandra Kiran ReddyEvuru + Chandra Kiran ReddyEvuru RamaniDuraiswamiUniversity of Maryland, College Park DineshManochaUniversity of Maryland, College Park 12470-12483 @@ -8499,7 +8499,7 @@ MarkJohnsonMacquarie University KrishnaramKenthapadiOracle Health AI DonDharmasiriOracle - LongDuongOracle + LongDuongOracle Yuan-FangLiMonash University and Oracle 12484-12500 Large language models (LLMs) have shown impressive performance in code understanding and generation, making coding tasks a key focus for researchers due to their practical applications and value as a testbed for LLM evaluation. Data synthesis and filtering techniques have been widely adopted and shown to be highly effective in this context. In this paper, we present a focused survey and taxonomy of these techniques, emphasizing recent advancements. We highlight key challenges, explore future research directions, and offer practical guidance for new researchers entering the field. @@ -8511,7 +8511,7 @@ <fixed-case>P</fixed-case>ara<fixed-case>ICL</fixed-case>: Towards Parallel In-Context Learning XingxuanLi Xuan-PhiNguyenSalesForce.com - ShafiqJotySalesForce.com and Nanyang Technological University + ShafiqJotySalesForce.com and Nanyang Technological University LidongBingShanda Group and Alibaba Group 12501-12511 Large language models (LLMs) have become the norm in natural language processing (NLP), excelling in few-shot in-context learning (ICL) with their remarkable abilities. Nonetheless, the success of ICL largely hinges on the choice of few-shot demonstration examples, making the selection process increasingly crucial. Existing methods have delved into optimizing the quantity and semantic similarity of these examples to improve ICL performances. However, our preliminary experiments indicate that the effectiveness of ICL is limited by the length of the input context. Moreover, varying combinations of few-shot demonstration examples can significantly boost accuracy across different test samples. To address this, we propose a novel method named parallel in-context learning (ParaICL) that effectively utilizes all demonstration examples without exceeding the manageable input context length. ParaICL employs parallel batching to distribute demonstration examples into different batches according to the semantic similarities of the questions in the demonstrations to the test question. It then computes normalized batch semantic scores for each batch. A weighted average semantic objective, constrained by adaptive plausibility, is applied to select the most appropriate tokens. Through extensive experiments, we validate the effectiveness of ParaICL and conduct ablation studies to underscore its design rationale. We further demonstrate that ParaICL can seamlessly integrate with existing methods. @@ -8613,7 +8613,7 @@ Pipeline Analysis for Developing Instruct <fixed-case>LLM</fixed-case>s in Low-Resource Languages: A Case Study on <fixed-case>B</fixed-case>asque AnderCorralOrai NLP Technologies Ixak SarasuaAnteroUniversidad del País Vasco - XabierSaralegi + XabierSaralegi 12636-12655 Large language models (LLMs) are typically optimized for resource-rich languages like English, exacerbating the gap between high-resource and underrepresented languages. This work presents a detailed analysis of strategies for developing a model capable of following instructions in a low-resource language, specifically Basque, by focusing on three key stages: pre-training, instruction tuning, and alignment with human preferences. Our findings demonstrate that continual pre-training with a high-quality Basque corpus of around 600 million words improves natural language understanding (NLU) of the foundational model by over 12 points. Moreover, instruction tuning and human preference alignment using automatically translated datasets proved highly effective, resulting in a 24-point improvement in instruction-following performance. The resulting models, Llama-eus-8B and Llama-eus-8B-instruct, establish a new state-of-the-art for Basque in the sub-10B parameter category. 2025.naacl-long.629 @@ -8651,7 +8651,7 @@ ParamitaKoleyIndian Institute of Technology Kharagpur, Dhirubhai Ambani Institute Of Information and Communication Technology JanardanMisra NiloyGangulyIndian Institute of Technology Kharagpur, - SaptarshiGhoshIndian Institute of Technology Kharagpur + SaptarshiGhoshIndian Institute of Technology Kharagpur 12688-12704 Large language models (LLMs) are increasingly recognized for their exceptional generative capabilities and versatility across various tasks. However, the high inference costs associated with these models have not received adequate attention, particularly when compared to the focus on training costs in existing research. In response to this gap, our study conducts a comprehensive benchmarking of LLM inference energy across a wide range of NLP tasks, where we analyze the impact of different models, tasks, prompts, and system-related factors on inference energy. Specifically, our experiments reveal several interesting insights, including strong correlation of inference energy with output token length and response time. Also, we find that quantization and optimal batch sizes, along with targeted prompt phrases, can significantly reduce energy usage. This study is the first to thoroughly benchmark LLM inference across such a diverse range of aspects, providing insights and offering several recommendations for improving energy efficiency in model deployment. 2025.naacl-long.632 @@ -8853,7 +8853,7 @@ GlebKuzminArtificial Intelligence Research Institute and Institute for Systems Analysis of Russian Academy of Sciences NeemeshYadavSingapore Management University IvanSmirnov - TimothyBaldwinMohamed bin Zayed University of Artificial Intelligence and The University of Melbourne + TimothyBaldwinMohamed bin Zayed University of Artificial Intelligence and The University of Melbourne ArtemShelmanovMohamed bin Zayed University of Artificial Intelligence 95-107 We propose selective debiasing – an inference-time safety mechanism designed to enhance the overall model quality in terms of prediction performance and fairness, especially in scenarios where retraining the model is impractical. The method draws inspiration from selective classification, where at inference time, predictions with low quality, as indicated by their uncertainty scores, are discarded. In our approach, we identify the potentially biased model predictions and, instead of discarding them, we remove bias from these predictions using LEACE – a post-processing debiasing method. To select problematic predictions, we propose a bias quantification approach based on KL divergence, which achieves better results than standard uncertainty quantification methods. Experiments on text classification datasets with encoder-based classification models demonstrate that selective debiasing helps to reduce the performance gap between post-processing methods and debiasing techniques from the at-training and pre-processing categories. @@ -8885,7 +8885,7 @@ YebowenHuUniversity of Central Florida HassanForooshUniversity of Central Florida WeiJinEmory University - FeiLiuEmory University + FeiLiuEmory University 131-141 Countless decisions shape our lives, and it is crucial to understand the how and why behind them. In this paper, we introduce a new LLM decision-making framework called STRUX, which enhances LLM decision-making by providing structured explanations. These include favorable and adverse facts related to the decision, along with their respective strengths. STRUX begins by distilling lengthy information into a concise table of key facts. It then employs a series of self-reflection steps to determine which of these facts are pivotal, categorizing them as either favorable or adverse in relation to a specific decision. Lastly, we fine-tune an LLM to identify and prioritize these key facts to optimize decision-making. STRUX has been evaluated on the challenging task of forecasting stock investment decisions based on earnings call transcripts and demonstrated superior performance against strong baselines. It enhances decision transparency by allowing users to understand the impact of different factors, representing a meaningful step towards practical decision-making with LLMs. 2025.naacl-short.11 @@ -8995,7 +8995,7 @@ TianqingFangTencent AI Lab ZhaoweiWangEdinburgh University, University of Edinburgh and Department of Computer Science and Engineering, Hong Kong University of Science and Technology YangqiuSongHong Kong University of Science and Technology - MarkSteedmanUniversity of Edinburgh + MarkSteedmanUniversity of Edinburgh 229-243 While Large Language Models (LLMs) have showcased remarkable proficiency in reasoning, there is still a concern about hallucinations and unreliable reasoning issues due to semantic associations and superficial logical chains. To evaluate the extent to which LLMs perform robust reasoning instead of relying on superficial logical chains, we propose a new evaluation dataset, the Concept-Reversed Winograd Schema Challenge (CR-WSC), based on the famous Winograd Schema Challenge (WSC) dataset. By simply reversing the concepts to those that are more associated with the wrong answer, we find that the performance of LLMs drops significantly despite the rationale of reasoning remaining the same. Furthermore, we propose Abstraction-of-Thought (AoT), a novel prompt method for recovering adversarial cases to normal cases using conceptual abstraction to improve LLMs’ robustness and consistency in reasoning, as demonstrated by experiments on CR-WSC. 2025.naacl-short.20 @@ -9028,7 +9028,7 @@ <fixed-case>C</fixed-case>o<fixed-case>RAG</fixed-case>: Collaborative Retrieval-Augmented Generation AashiqMuhamed - Mona T.DiabCarnegie Mellon University + Mona T.DiabCarnegie Mellon University VirginiaSmithCarnegie Mellon University 265-276 Retrieval-Augmented Generation (RAG) models excel in knowledge-intensive tasks, especially under few-shot learning constraints. We introduce CoRAG, a framework extending RAG to collaborative settings, where clients jointly train a shared model using a collaborative passage store. To evaluate CoRAG, we introduce CRAB, a benchmark for collaborative homogeneous open-domain question answering. Our experiments demonstrate that CoRAG consistently outperforms both parametric collaborative learning methods and locally trained RAG models in low-resource scenarios. Further analysis reveals the critical importance of relevant passages within the shared store, the surprising benefits of incorporating irrelevant passages, and the potential for hard negatives to negatively impact performance. This introduces a novel consideration in collaborative RAG: the trade-off between leveraging a collectively enriched knowledge base and the potential risk of incorporating detrimental passages from other clients. Our findings underscore the viability of CoRAG, while also highlighting key design challenges and promising avenues for future research. @@ -9104,7 +9104,7 @@ Tsung-cheLi, Academia Sinica Ho Yin SamNgPennsylvania State University Hen-HsenHuangInstitute of Information Science, Academia Sinica - Ting-Hao KennethHuangPennsylvania State University + Ting-Hao KennethHuangPennsylvania State University 342-355 A language can have different varieties. These varieties can affect the performance of natural language processing (NLP) models, including large language models (LLMs), which are often trained on data from widely spoken varieties. This paper introduces a novel and cost-effective approach to benchmark model performance across language varieties. We argue that international online review platforms,such as Booking.com, can serve as effective data sources for constructing datasets that capture comments in different language varieties from similar real-world scenarios, like reviews for the same hotel with the same rating using the same language (e.g., Mandarin Chinese) but different language varieties (e.g., Taiwan Mandarin, Mainland Mandarin). To prove this concept, we constructed a contextually aligned dataset comprising reviews in Taiwan Mandarin and Mainland Mandarin and tested six LLMs in a sentiment analysis task. Our results show that LLMs consistently underperform in Taiwan Mandarin. 2025.naacl-short.29 @@ -9130,7 +9130,7 @@ ShaolingJing JieZhangInstitute of Computing Technology, Chinese Academy of Sciences HuaweiShenInstitute of Computing Technology, Chinese Academy of Sciences - XueqiChengInstitute of Computing Technology, Chinese Academy + XueqiChengInstitute of Computing Technology, Chinese Academy 363-373 2025.naacl-short.31 duan-etal-2025-related @@ -9187,7 +9187,7 @@ HaotianYeCenter for Information and Language Processing RenhaoPei EhsaneddinAsgariQatar Computing Research Institute and University of California, Berkeley - HinrichSchuetze + HinrichSchuetze 414-439 While broad-coverage multilingual natural language processing tools have been developed, a significant portion of the world’s over 7000 languages are still neglected. One reason is the lack of evaluation datasets that cover a diverse range of languages, particularly those that are low-resource or endangered. To address this gap, we present a large-scale text classification dataset encompassing 1504 languages many of which have otherwise limited or no annotated data. This dataset is constructed using parallel translations of the Bible. We develop relevant topics, annotate the English data through crowdsourcing and project these annotations onto other languages via aligned verses. We benchmark a range of existing multilingual models on this dataset. We make our dataset and code available to the public. 2025.naacl-short.36 @@ -9209,7 +9209,7 @@ <fixed-case>F</fixed-case>aith<fixed-case>B</fixed-case>ench: A Diverse Hallucination Benchmark for Summarization by <fixed-case>M</fixed-case>odern <fixed-case>LLM</fixed-case>s - Forrest ShengBaoVectara, Inc. + Forrest ShengBaoVectara, Inc. MiaoranLiIowa State University RenyiQuVectara GeLuoVectara Inc. @@ -9486,7 +9486,7 @@ Scaling Graph-Based Dependency Parsing with Arc Vectorization and Attention-Based Refinement NicolasFloquet - Joseph LeRouxUniversité Paris 13 + Joseph LeRouxUniversité Paris 13 NadiTomehUniversité Sorbonne Paris Nord ThierryCharnoisUniversity of Sorbonne Paris Nord (Paris 13) 722-734 @@ -9525,7 +9525,7 @@ KatharinaHämmerl TomaszLimisiewiczMeta and University of Washington JindřichLibovickýCharles University Prague - AlexanderFraserTechnical University of Munich + AlexanderFraserTechnical University of Munich 756-767 Previous work has considered token overlap, or even similarity of token distributions, as predictors for multilinguality and cross-lingual knowledge transfer in language models. However, these very literal metrics assign large distances to language pairs with different scripts, which can nevertheless show good cross-linguality. This limits the explanatory strength of token overlap for knowledge transfer between language pairs that use distinct scripts or follow different orthographic conventions. In this paper, we propose subword token alignability as a new way to understand the impact and quality of multilingual tokenisation. In particular, this metric predicts multilinguality much better when scripts are disparate and the overlap of literal tokens is low. We analyse this metric in the context of both encoder and decoder models, look at data size as a potential distractor, and discuss how this insight may be applied to multilingual tokenisation in future work. We recommend our subword token alignability metric for identifying optimal language pairs for cross-lingual transfer, as well as to guide the construction of better multilingual tokenisers in the future. We publish our code and reproducibility details. 2025.naacl-short.63 @@ -9561,7 +9561,7 @@ YoungwonLeeSeoul National University Seung-wonHwangSeoul National University Daniel FCamposSnowflake - FilipGralińskiSnowflake and Adam Mickiewicz University + FilipGralińskiSnowflake and Adam Mickiewicz University ZheweiYaoSnowflake YuxiongHeMicrosoft 787-796 @@ -9635,7 +9635,7 @@ Identifying Power Relations in Conversations using Multi-Agent Social Reasoning ZhaoqingWuPurdue University DanGoldwasserPurdue University and Purdue University - Maria LeonorPachecoUniversity of Colorado at Boulder + Maria LeonorPachecoUniversity of Colorado at Boulder LeoraMorgensternSRI International 855-865 Large language models (LLMs) struggle in social science domains, where critical thinking and human-level inference are crucial. In this work, we propose a multi-agent social reasoning framework that leverages the generative and reasoning capabilities of LLMs to generate and evaluate reasons from multiple perspectives grounded in social science theories, and construct a factor graph for inference. Experimental results on understanding power dynamics in conversations show that our method outperforms standard prompting baselines, demonstrating its potential for tackling hard Computational Social Science (CSS) tasks. @@ -9648,7 +9648,7 @@ Aylin EceGunal BowenYi John D.Piette - RadaMihalceaUniversity of Michigan + RadaMihalceaUniversity of Michigan VeronicaPerez-RosasUniversity of Michigan - Ann Arbor 866-872 Cultural and language factors significantly influence counseling, but Natural Language Processing research has not yet examined whether the findings of conversational analysis for counseling conducted in English apply to other languages. This paper presents a first step towards this direction. We introduce MIDAS (Motivational Interviewing Dataset in Spanish), a counseling dataset created from public video sources that contains expert annotations for counseling reflections and questions. Using this dataset, we explore language-based differences in counselor behavior in English and Spanish and develop classifiers in monolingual and multilingual settings, demonstrating its applications in counselor behavioral coding tasks. @@ -9706,7 +9706,7 @@ Giving the Old a Fresh Spin: Quality Estimation-Assisted Constrained Decoding for Automatic Post-Editing SourabhDeoghare DipteshKanojiaUniversity of Surrey - PushpakBhattacharyyaIndian Institute of Technology, Bombay, Dhirubhai Ambani Institute Of Information and Communication Technology + PushpakBhattacharyyaIndian Institute of Technology, Bombay, Dhirubhai Ambani Institute Of Information and Communication Technology 914-925 Automatic Post-Editing (APE) systems often struggle with over-correction, where unnecessary modifications are made to a translation, diverging from the principle of minimal editing. In this paper, we propose a novel technique to mitigate over-correction by incorporating word-level Quality Estimation (QE) information during the decoding process. This method is architecture-agnostic, making it adaptable to any APE system, regardless of the underlying model or training approach. Our experiments on English-German, English-Hindi, and English-Marathi language pairs show the proposed approach yields significant improvements over their corresponding baseline APE systems, with TER gains of 0.65, 1.86, and 1.44 points, respectively. These results underscore the complementary relationship between QE and APE tasks and highlight the effectiveness of integrating QE information to reduce over-correction in APE systems. 2025.naacl-short.77 @@ -9759,8 +9759,8 @@ XiaoranLiu JuliaBuffolino Christian C.Luhmann - Ryan L.Boyd - H. AndrewSchwartz + Ryan L.Boyd + H. AndrewSchwartz 966-979 While NLP models often seek to capture cognitive states via language, the validity of predicted states is determined by comparing them to annotations created without access the cognitive states of the authors. In behavioral sciences, cognitive states are instead measured via experiments. Here, we introduce an experiment-based framework for evaluating language-based cognitive style models against human behavior. We explore the phenomenon of decision making, and its relationship to the linguistic style of an individual talking about a recent decision they made. The participants then follow a classical decision-making experiment that captures their cognitive style, determined by how preferences change during a decision exercise. We find that language features, intended to capture cognitive style, can predict participants’ decision style with moderate-to-high accuracy (AUC 0.8), demonstrating that cognitive style can be partly captured and revealed by discourse patterns. 2025.naacl-short.81 @@ -9827,7 +9827,7 @@ QiangLiAccenture MingkunTanUniversität Bielefeld XunZhao - DanZhang + DanZhang DaoanZhang ShengzhaoLeiEPFL - EPF Lausanne Anderson S.Chu @@ -9846,7 +9846,7 @@ XichouZhu LeiYu JiafengGuoInstitute of Computing Technolgy, Chinese Academy of Sciences - XueqiChengInstitute of Computing Technology, Chinese Academy + XueqiChengInstitute of Computing Technology, Chinese Academy 54-61 Text2Sql is a task that converts natural language questions into SQL queries. In previous research on LLM fine-tuning, researchers typically input both the entire database schema and the natural language question into the model. This approach has two issues: 1) the model’s context is limited when dealing with a large number of database tables; 2) the question is often related to only a few tables, leading to excessive irrelevant information that distracts the model. To address these issues, we employed pure fine-tuning strategy to reduce redundancy. The model fine-tuned with pure prompts, using prompts that are only 53% of the baseline length, outperforms the baseline (fine-tuned with all tables in the prompt) by 8.2% and 8.6% in Test-suite accuracy (TS) and exact-set-match accuracy (EM), respectively, on the Spider dev set. Under the most refined Spider dev set of prompts, the model achieves TS and EM scores of 73.5% and 75.4%, respectively, approaching state-of-the-art (SOTA) levels. To leverage the capabilities of the model with pure prompts, we applied pure knowledge distillation strategy to transfer its abilities. The distilled student model achieved a 1.9% improvement in TS, while the teacher model’s prompt length was only 23% of that of the student model. 2025.naacl-industry.5 @@ -9858,7 +9858,7 @@ Vinay KumarVermaAmazon Shreyas SunilKulkarniAmazon HappyMittalAmazon - DeepakGuptaAmazon + DeepakGuptaAmazon 62-69 Question Answering (QA) and Visual Question Answering (VQA) are well-studied problems in the language and vision domain. One challenging scenario involves multiple sources of information, each of a different modality, where the answer to the question may exist in one or more sources. This scenario contains richer information but is highly complex to handle. In this work, we formulate a novel question-answer generation (QAG) framework in an environment containing multi-source, multimodal information. The answer may belong to any or all sources; therefore, selecting the most prominent answer source or an optimal combination of all sources for a given question is challenging. To address this issue, we propose a question-guided attention mechanism that learns attention across multiple sources and decodes this information for robust and unbiased answer generation. To learn attention within each source, we introduce an explicit alignment between questions and various information sources, which facilitates identifying the most pertinent parts of the source information relative to the question. Scalability in handling diverse questions poses a challenge. We address this by extending our model to a sparse mixture-of-experts (sparse-MoE) framework, enabling it to handle thousands of question types. Experiments on T5 and Flan-T5 using three datasets demonstrate the model’s efficacy, supported by ablation studies. 2025.naacl-industry.6 @@ -9930,7 +9930,7 @@ GuoqingSunChina Merchants Bank Credit Card Center MengchenZhu HaifengTang - Kenny Q.ZhuUniversity of Texas at Arlington + Kenny Q.ZhuUniversity of Texas at Arlington MengyueWuShanghai Jiaotong University 129-137 Designing effective debt collection systems is crucial for improving operational efficiency and reducing costs in the financial industry. However, the challenges of maintaining script diversity, contextual relevance, and coherence make this task particularly difficult. This paper presents a debt collection system based on real debtor-collector data from a major commercial bank. We construct a script library from real-world debt collection conversations, and propose a two-stage retrieval based response system for contextual relevance. Experimental results show that our system improves script diversity, enhances response relevance, and achieves practical deployment efficiency through knowledge distillation. This work offers a scalable and automated solution, providing valuable insights for advancing debt collection practices in real-world applications. @@ -10096,7 +10096,7 @@ <fixed-case>C</fixed-case>haracter<fixed-case>GPT</fixed-case>: A Persona Reconstruction Framework for Role-Playing Agents JeiyoonPark ChanjunParkKorea University - HeuiseokLim + HeuiseokLim 287-303 The recent introduction of the Assistants API highlights its potential for large language models (LLMs) in role-playing agents (RPA). However, maintaining consistent character personas remains a significant challenge due to variability in information extraction, which frequently omits critical elements such as backstory or interpersonal relationships. To address this limitation, we introduce CharacterGPT, a framework designed to dynamically reconstruct character personas through Character Persona Training (CPT). This approach incrementally updates personas by extracting traits from chapter-wise novel summaries, reflecting the progression of the narrative. Our framework is evaluated through Big Five personality evaluations and creative tasks, in which characters generate original narratives, demonstrating the efficacy of CharacterGPT in preserving persona consistency. The code and results are available at https://github.com/Jeiyoon/charactergpt 2025.naacl-industry.24 @@ -10184,7 +10184,7 @@ LukasFischerUniversity of Zurich YingqiangGaoUniversity of Zurich AlexaLintnerZHAW - Zürcher Hochschule für Angewandte Wissenschaften - AnnetteRiosUniversity of Zurich + AnnetteRiosUniversity of Zurich SarahEblingUniversity of Zurich 370-379 Audio description (AD) is a crucial accessibility service provided to blind persons and persons with visual impairment, designed to convey visual information in acoustic form. Despite recent advancements in multilingual machine translation research, the lack of well-crafted and time-synchronized AD data impedes the development of audio description translation (ADT) systems that address the needs of multilingual countries such as Switzerland. Furthermore, most ADT systems rely on text alone, and it is unclear whether incorporating visual information from video clips improves the quality of ADT outputs.In this work, we introduce SwissADT, an **emerging** ADT system for three main Swiss languages and English, designed for future use by our industry partners. By collecting well-crafted AD data augmented with video clips in German, French, Italian, and English, and leveraging the power of Large Language Models (LLMs), we aim to enhance information accessibility for diverse language populations in Switzerland by automatically translating AD scripts to the desired Swiss language. Our extensive experimental ADT results, composed of both automatic and human evaluations of ADT quality, demonstrate the promising capability of SwissADT for the ADT task. We believe that combining human expertise with the generation power of LLMs can further enhance the performance of ADT systems, ultimately benefiting a larger multilingual target population. @@ -10208,7 +10208,7 @@ <fixed-case>M</fixed-case>ono<fixed-case>TOD</fixed-case>ia: Translating Monologue Requests to Task-Oriented Dialogues SebastianSteindlOstbayerische Technische Hochschule Amberg-Weiden - UlrichSchäferOstbayerische Technische Hochschule Amberg-Weiden + UlrichSchäferOstbayerische Technische Hochschule Amberg-Weiden BerndLudwigUniversität Regensburg 390-403 Data scarcity is one of the main problems when it comes to real-world applications of transformer-based models.This is especially evident for task-oriented dialogue (TOD) systems, which require specialized datasets, that are usually not readily available. This can hinder companies from adding TOD systems to their services.This study therefore investigates a novel approach to sourcing annotated dialogues from existing German monologue material.Focusing on a real-world example, we investigate whether these monologues can be transformed into dialogue formats suitable for training TOD systems.We show the approach with the concrete example of a company specializing in travel bookings via e-mail. We fine-tune state-of-the-art Large Language Models for the task of rewriting e-mails as dialogues and annotating them.To ensure the quality and validity of the generated data, we employ crowd workers to evaluate the dialogues across multiple criteria and to provide gold-standard annotations for the test dataset.We further evaluate the usefulness of the dialogues for training TOD systems.Our evaluation shows that the dialogues and annotations are of high quality and can serve as a valuable starting point for training TOD systems.Finally, we make the annotated dataset publicly available to foster future research. @@ -10221,7 +10221,7 @@ HaoanJin JiachengShi HanhuiXuFudan University - Kenny Q.ZhuUniversity of Texas at Arlington + Kenny Q.ZhuUniversity of Texas at Arlington MengyueWuShanghai Jiaotong University 404-421 Large language models (LLMs) demonstrate significant potential in advancing medical applications, yet their capabilities in addressing medical ethics challenges remain underexplored. This paper introduces MedEthicEval, a novel benchmark designed to systematically evaluate LLMs in the domain of medical ethics. Our framework encompasses two key components: knowledge, assessing the models’ grasp of medical ethics principles, and application, focusing on their ability to apply these principles across diverse scenarios. To support this benchmark, we consulted with medical ethics researchers and developed three datasets addressing distinct ethical challenges: blatant violations of medical ethics, priority dilemmas with clear inclinations, and equilibrium dilemmas without obvious resolutions. MedEthicEval serves as a critical tool for understanding LLMs’ ethical reasoning in healthcare, paving the way for their responsible and effective use in medical contexts. @@ -10342,7 +10342,7 @@ <fixed-case>T</fixed-case>urbo<fixed-case>F</fixed-case>uzz<fixed-case>LLM</fixed-case>: Turbocharging Mutation-based Fuzzing for Effectively Jailbreaking Large Language Models in Practice AmanGoelAmazon XianWuAmazon - ZheWangAmazon + ZheWangAmazon DmitriyBespalovAmazon YanjunQiAmazon and University of Virginia 523-534 @@ -10457,7 +10457,7 @@ Breaking Down Power Barriers in On-Device Streaming <fixed-case>ASR</fixed-case>: Insights and Solutions YangLiIowa State University YuanShangguanCurrent: Google - YuhaoWangFacebook + YuhaoWangFacebook LiangzhenLaiFacebook ErnieChangMeta AI ChangshengZhaoMeta Inc. @@ -10474,7 +10474,7 @@ SwapnilGupta Lucas PereiraCarliniAmazon PrateekSircar - DeepakGuptaAmazon + DeepakGuptaAmazon 627-637 Language localization is the adaptation of written content to different linguistic and cultural contexts. Ability to localize written content is crucial for global businesses to provide consistent and reliable customer experience across diverse markets. Traditional methods have approached localization as an application of machine translation (MT), but localization requires more than linguistic conversion – content needs to align with the target audience’s cultural norms, linguistic nuances, and technical requirements. This difference is prominent for long-form text, where multiple facts are present in a creative choice of language. We propose a novel prompt approach for Large Languages Models (LLMs), called Break-Ideate-Generate (BrIdGe), for language localization. BrIdGe ‘breaks’ the source content into granular facts, ‘ideates’ an action plan for content creation in the target language by organizing the granular facts, and finally executes the plan to ‘generate’ localized content. This approach emulates the cognitive processes humans employ in writing that begin with identifying important points, followed by brainstorming on how to structure and organize the output. We evaluated the BrIdGe methodology from multiple perspectives, including impact of BrIdGe prompt on different LLMs and performance comparisons with traditional MT models and direct translation through LLMs on public benchmark and proprietary e-commerce datasets. Through human and LLM-based automated evaluations across content in multiple languages, we demonstrate effectiveness of BrIdGe in generating fluent localized content while preserving factual consistency between source and target languages. 2025.naacl-industry.51 @@ -10580,7 +10580,7 @@ LongVo-Dang Khai-NguyenNguyen Truong-SonHyUniversity of Alabama at Birmingham - RalfSchlüterAppTek GmbH and Rheinisch Westfälische Technische Hochschule Aachen + RalfSchlüterAppTek GmbH and Rheinisch Westfälische Technische Hochschule Aachen 724-783 Spoken Named Entity Recognition (NER) aims to extract named entities from speech and categorise them into types like person, location, organization, etc. In this work, we present *VietMed-NER* - the first spoken NER dataset in the medical domain. To our knowledge, our Vietnamese real-world dataset is the largest spoken NER dataset in the world regarding the number of entity types, featuring 18 distinct types. Furthermore, we present baseline results using various state-of-the-art pre-trained models: encoder-only and sequence-to-sequence; and conduct quantitative and qualitative error analysis. We found that pre-trained multilingual models generally outperform monolingual models on reference text and ASR output and encoders outperform sequence-to-sequence models in NER tasks. By translating the transcripts, the dataset can also be utilised for text NER in the medical domain in other languages than Vietnamese. All code, data and models are publicly available. 2025.naacl-industry.59 @@ -10619,7 +10619,7 @@ PrasanjitRathMicrosoft HariShrawgiMicrosoft ParagAgrawalMicrosoft - SandipanDandapatMicrosoft + SandipanDandapatMicrosoft 809-821 This paper analyzes the safety of Large Language Models (LLMs) in interactions with children below age of 18 years. Despite the transformative applications of LLMs in various aspects of children’s lives, such as education and therapy, there remains a significant gap in understanding and mitigating potential content harms specific to this demographic. The study acknowledges the diverse nature of children, often overlooked by standard safety evaluations, and proposes a comprehensive approach to evaluating LLM safety specifically for children. We list down potential risks that children may encounter when using LLM-powered applications. Additionally, we develop Child User Models that reflect the varied personalities and interests of children, informed by literature in child care and psychology. These user models aim to bridge the existing gap in child safety literature across various fields. We utilize Child User Models to evaluate the safety of six state-of-the-art LLMs. Our observations reveal significant safety gaps in LLMs, particularly in categories harmful to children but not adults. 2025.naacl-industry.62 @@ -10647,7 +10647,7 @@ SteveSiuOracle DonDharmasiriOracle Yuan-FangLiMonash University and Oracle - LongDuongOracle + LongDuongOracle DamienHilloulinOracle labs RhicheekPatraOracle SungpackHongOracle @@ -10918,7 +10918,7 @@ <fixed-case>INSIGHTBUDDY</fixed-case>-<fixed-case>AI</fixed-case>: Medication Extraction and Entity Linking using Pre-Trained Language Models and Ensemble Learning PabloRomero LifengHan - GoranNenadicUniversity of Manchester + GoranNenadicUniversity of Manchester 18-27 This paper presents our system, InsightBuddy-AI, designed for extracting medication mentions and their associated attributes, and for linking these entities to established clinical terminology resources, including SNOMED-CT, the British National Formulary (BNF), ICD, and the Dictionary of Medicines and Devices (dm+d).To perform medication extraction, we investigated various ensemble learning approaches, including stacked and voting ensembles (using first, average, and max voting methods) built upon eight pre-trained language models (PLMs). These models include general-domain PLMs—BERT, RoBERTa, and RoBERTa-Large—as well as domain-specific models such as BioBERT, BioClinicalBERT, BioMedRoBERTa, ClinicalBERT, and PubMedBERT.The system targets the extraction of drug-related attributes such as adverse drug effects (ADEs), dosage, duration, form, frequency, reason, route, and strength.Experiments conducted on the n2c2-2018 shared task dataset demonstrate that ensemble learning methods outperformed individually fine-tuned models, with notable improvements of 2.43% in Precision and 1.35% in F1-score.We have also developed cross-platform desktop applications for both entity recognition and entity linking, available for Windows and macOS.The InsightBuddy-AI application is freely accessible for research use at https://github.com/HECTA-UoM/InsightBuddy-AI. 2025.naacl-srw.2 @@ -10989,7 +10989,7 @@ SaarKuziAmazon GiuseppeCastellucciAmazon EugeneAgichteinEmory University - ShervinMalmasiAmazon + ShervinMalmasiAmazon 77-91 In recommender systems, users often seek the best products through indirect, vague, or under-specified queries such as “best shoes for trail running.” These queries, referred to as implicit superlative queries, pose a challenge for standard retrieval and ranking systems due to their lack of explicit attribute mentions and the need for identifying and reasoning over complex attributes. We investigate how Large Language Models (LLMs) can generate implicit attributes for ranking and reason over them to improve product recommendations for such queries. As a first step, we propose a novel four-point schema, called SUPERB, for annotating the best product candidates for superlative queries, paired with LLM-based product annotations. We then empirically evaluate several existing retrieval and ranking approaches on our newly created dataset, providing insights and discussing how to integrate these findings into real-world e-commerce production systems. 2025.naacl-srw.8 @@ -11025,8 +11025,8 @@ <fixed-case>M</fixed-case>ed-<fixed-case>C</fixed-case>o<fixed-case>DE</fixed-case>: Medical Critique based Disagreement Evaluation Framework MohitGupta - AkikoAizawaNational Institute of Informatics - Rajiv RatnShahIndraprastha Institute of Information Technology, Delhi + AkikoAizawaNational Institute of Informatics + Rajiv RatnShahIndraprastha Institute of Information Technology, Delhi 112-119 The emergence of large language models (LLMs) has significantly influenced numerous fields, including healthcare, by enhancing the capabilities of automated systems to process and generate human-like text. However, despite their advancements, the reliability and accuracy of LLMs in medical contexts remain critical concerns. Current evaluation methods often lack robustness and fail to provide a comprehensive assessment of LLM performance, leading to potential risks in clinical settings. In this work, we propose Med-CoDE, a specifically designed evaluation framework for medical LLMs to address these challenges. The framework leverages a critique-based approach to quantitatively measure the degree of disagreement between model-generated responses and established medical ground truths. This framework captures both accuracy and reliability in medical settings. The proposed evaluation framework aims to fill the existing gap in LLM assessment by offering a systematic method to evaluate the quality and trustworthiness of medical LLMs. Through extensive experiments and case studies, we illustrate the practicality of our framework in providing a comprehensive and reliable evaluation of medical LLMs. 2025.naacl-srw.11 @@ -11048,7 +11048,7 @@ Ivode Souza Bueno Júnior HaotianYe AxelWisiorek - HinrichSchütze + HinrichSchütze 129-141 This paper presents a federated learning system with differential privacy for hate speech detection, tailored to low-resource languages. By fine-tuning pre-trained language models, ALBERT emerged as the most effective option for balancing performance and privacy. Experiments demonstrated that federated learning with differential privacy performs adequately in low-resource settings, though datasets with fewer than 20 sentences per client struggled due to excessive noise. Balanced datasets and augmenting hateful data with non-hateful examples proved critical for improving model utility. These findings offer a scalable and privacy-conscious framework for integrating hate speech detection into social media platforms and browsers, safeguarding user privacy while addressing online harm. 2025.naacl-srw.13 @@ -11072,7 +11072,7 @@ ShuheiKuritaNational Institute of Informatics and New York University YusukeOdaNational Institute of Informatics and Nara Institute of Science and Technology DaisukeKawaharaWaseda University - NaoakiOkazakiInstitute of Science Tokyo + NaoakiOkazakiInstitute of Science Tokyo 162-170 CLIP is a foundational model that bridges images and text, widely adopted as a key component in numerous vision-language models.However, the lack of large-scale open Japanese image-text pairs poses a significant barrier to the development of Japanese vision-language models.In this study, we constructed a Japanese image-text pair dataset with 1.5 billion examples using machine translation with open-weight LLMs and pre-trained Japanese CLIP models on the dataset.The performance of the pre-trained models was evaluated across seven benchmark datasets, achieving competitive average scores compared to models of similar size without the need for extensive data curation. However, the results also revealed relatively low performance on tasks specific to Japanese culture, highlighting the limitations of translation-based approaches in capturing cultural nuances. Our dataset, models, and code are publicly available. 2025.naacl-srw.15 @@ -11121,7 +11121,7 @@ DhimanGoswamiGeorge Mason University MarcosZampieriGeorge Mason University KaiNorth - ShervinMalmasiAmazon + ShervinMalmasiAmazon AntoniosAnastasopoulosAthena Research Center and George Mason University 193-199 Native Language Identification (NLI) is the task of automatically identifying the native language (L1) of individuals based on their second language (L2) production. The introduction of Large Language Models (LLMs) with billions of parameters has renewed interest in text-based NLI, with new studies exploring LLM-based approaches to NLI on English L2. The capabilities of state-of-the-art LLMs on non-English NLI corpora, however, have not yet been fully evaluated. To fill this important gap, we present the first evaluation of LLMs for multilingual NLI. We evaluated the performance of several LLMs compared to traditional statistical machine learning models and language-specific BERT-based models on NLI corpora in English, Italian, Norwegian, and Portuguese. Our results show that fine-tuned GPT-4 models achieve state-of-the-art NLI performance. @@ -11135,7 +11135,7 @@ LiboRen NicoloMicheletti LifengHan - GoranNenadicUniversity of Manchester + GoranNenadicUniversity of Manchester 200-206 The abundance of medical records holds great promise for enhancing healthcare and advancing biomedical research. However, due to privacy constraints, access to such data is typically limited to internal use.Recent studies have attempted to overcome this challenge by generating synthetic data through Causal Language Modelling. Yet, this approach often fails to ensure patient anonymity and offers limited control over output diversity—unless additional computational cost is introduced.In response, we propose a method for generating synthetic free-text medical records based on Masked Language Modelling. Our approach retains key medical details while introducing variability in the generated texts and reducing the risk of patient re-identification. With a relatively lightweight architecture of approximately 120 million parameters, the system ensures low inference costs.Experimental results show that our method produces high-quality synthetic data, achieving a HIPAA-compliant PHI recall of 96% and a re-identification risk of only 3.5%. Furthermore, downstream evaluations reveal that models trained on the synthetic data perform comparably to those trained on real-world data. Our trained models are publicly available on Github as SynDeidMLM (at https://github.com/SamySam0/SynDeidMLM) (meaning synthetic and de-identified data generation using MLM). 2025.naacl-srw.20 @@ -11155,7 +11155,7 @@ Linear Relational Decoding of Morphology in Language Models EricXia - JugalKalitaUniversity of Colorado at Colorado Springs + JugalKalitaUniversity of Colorado at Colorado Springs 225-235 A two-part affine approximation has been found to be a good approximation for transformer computations over certain subject-object relations. Adapting the Bigger Analogy Test Set, we show that the linear transformation W s , where s is a middle-layer representation of a subject token and W is derived from model derivatives, can accurately reproduce final object states for many relations. This linear technique achieves 90% faithfulness on morphological relations, with similar findings across languages and models. Our results suggest that some conceptual relationships in language models, such as morphology, are readily interpretable from latent space and are sparsely encoded by cross-layer linear transformations. 2025.naacl-srw.22 @@ -11372,8 +11372,8 @@ Evaluating Text Style Transfer Evaluation: Are There Any Reliable Metrics? SourabrataMukherjee - Atul Kr.OjhaUniversity of Galway, Ireland, Insight SFI Research Centre for Data Analytics, DSI, University of Galway, Ireland and Panlingua Languague Processing LLP, India - John PhilipMcCraeNational University of Ireland Galway + Atul Kr.OjhaUniversity of Galway, Ireland, Insight SFI Research Centre for Data Analytics, DSI, University of Galway, Ireland and Panlingua Languague Processing LLP, India + John PhilipMcCraeNational University of Ireland Galway OndrejDusekCharles University, Prague 418-434 Text style transfer (TST) is the task of transforming a text to reflect a particular style while preserving its original content. Evaluating TSToutputs is a multidimensional challenge, requiring the assessment of style transfer accuracy, content preservation, and naturalness. Us-ing human evaluation is ideal but costly, as is common in other natural language processing (NLP) tasks; however, automatic metrics forTST have not received as much attention as metrics for, e.g., machine translation or summarization. In this paper, we examine both set ofexisting and novel metrics from broader NLP tasks for TST evaluation, focusing on two popular subtasks—sentiment transfer and detoxification—in a multilingual context comprising English, Hindi, and Bengali. By conducting meta-evaluation through correlation with hu-man judgments, we demonstrate the effectiveness of these metrics when used individually and in ensembles. Additionally, we investigatethe potential of large language models (LLMs) as tools for TST evaluation. Our findings highlight newly applied advanced NLP metrics andLLM-based evaluations provide better insights than existing TST metrics. Our oracle ensemble approaches show even more potential. @@ -11585,7 +11585,7 @@ Knowledge Distillation for Language Models YuqiaoWenUniversity of Alberta - FredaShiUniversity of Waterloo + FredaShiUniversity of Waterloo LiliMouUniversity of Alberta 25-29 Knowledge distillation (KD) aims to transfer the knowledge of a teacher (usually a large model) to a student (usually a small one). In this tutorial, our goal is to provide participants with a comprehensive understanding of the techniques and applications of KD for language models. After introducing the basic concepts including intermediate-layer matching and prediction matching, we will present advanced techniques such as reinforcement learning-based KD and multi-teacher distillation. For applications, we will focus on KD for large language models (LLMs), covering topics ranging from LLM sequence compression to LLM self-distillation. The target audience is expected to know the basics of machine learning and NLP, but do not have to be familiar with the details of math derivation and neural models @@ -11597,7 +11597,7 @@ Adaptation of Large Language Models ZixuanKeSalesforce AI Research YifeiMingSalesforce AI Research - ShafiqJotySalesforce AI Research + ShafiqJotySalesforce AI Research 30-37 This tutorial on adaptation of Large Language Models (LLMs) is designed to address the growing demand for models that go beyond the static capabilities of generic LLMs by providing an overview of dynamic, domain-specific, and task-adaptive LLM adaptation techniques. While general LLMs have demonstrated strong generalization across a variety of tasks, they often struggle to perform well in specialized domains such as finance, healthcare, and code generation for underrepresented languages. Additionally, their static nature limits their ability to evolve with the changing world, and they are often extremely large in size, making them impractical and costly to deploy at scale. As a result, the adaptation of LLMs has drawn much attention since the birth of LLMs and is of core importance, both for industry, which focuses on serving its targeted users, and academia, which can greatly benefit from small but powerful LLMs 2025.naacl-tutorial.5 @@ -11606,11 +11606,11 @@ Learning Language through Grounding - FredaShiUniversity of Waterloo + FredaShiUniversity of Waterloo ZiqiaoMaUniversity of Michigan JiayuanMaoMassachusetts Institute of Technology ParisaKordjamshidiMichigan State University - JoyceChaiUniversity of Michigan + JoyceChaiUniversity of Michigan 38-43 Grounding has been a long-standing concept in natural language processing (NLP) and computational linguistics (CL). This tutorial provides a historical overview and introduces recent advances in learning language through grounding, with a particular emphasis on the latter. We will begin by tracing the history of grounding and presenting a unified perspective on the term. In Parts II to IV, we will delve into recent progress in learning lexical semantics, syntax, and complex meanings through various forms of grounding. We will conclude by discussing future directions and open challenges, particularly those related to the growing trend of large language models and scaling. 2025.naacl-tutorial.6 @@ -11682,7 +11682,7 @@ Shou-YiHung Bo-TingLin En-Shiun AnnieLee - Richard Tzong-HanTsaiNational Central University + Richard Tzong-HanTsaiNational Central University 11-19 Many endangered languages are at risk of extinction due to barriers in communication and generational gaps that hinder their preservation. A cause for languages becoming endangered is the lack of language educational tools and artificial intelligence (AI) models for these low-resource languages. To address this, we propose the ATAIGI learning app designed with AI-powered models leveraging multimodal generative techniques. Our app offers users a comprehensive learning experience by providing translated phrases and definitions, example sentences, illustrative images, romanized pronunciation, and audio speech to accelerate language learning. ATAIGI is built on five AI models that are rigorously benchmarked individually, with our Transliteration Model achieving state-of-the-art results for Taiwanese Hokkien transliteration. ATAIGI is available for all to learn the endangered language of Taiwanese Hokkien, an endangered language spoken in Taiwan. A human evaluation conducted demonstrates the effectiveness of ATAIGI in improving language proficiency and cultural understanding, supporting its potential for the preservation and education of endangered languages like the Taiwanese Hokkien. 2025.naacl-demo.2 @@ -11727,7 +11727,7 @@ YifanZhu KennethLaiBrandeis University ChangsooJungColorado State University - JamesPustejovskyBrandeis University + JamesPustejovskyBrandeis University NikhilKrishnaswamyColorado State University 40-50 We present TRACE, a novel system for live *common ground* tracking in situated collaborative tasks. With a focus on fast, real-time performance, TRACE tracks the speech, actions, gestures, and visual attention of participants, uses these multimodal inputs to determine the set of task-relevant propositions that have been raised as the dialogue progresses, and tracks the group’s epistemic position and beliefs toward them as the task unfolds. Amid increased interest in AI systems that can mediate collaborations, TRACE represents an important step forward for agents that can engage with multiparty, multimodal discourse. @@ -11841,9 +11841,9 @@ ShikharBharadwajSchool of Computer Science, Carnegie Mellon University YiwenZhao SamueleCornell - YifanPeng + YifanPeng XiangYueCarnegie Mellon University - Chao-Han HuckYangNVIDIA Research + Chao-Han HuckYangNVIDIA Research GrahamNeubigCarnegie Mellon University ShinjiWatanabeCarnegie Mellon University 116-124 @@ -11903,7 +11903,7 @@ Semi-automatic Sequential Sentence Classification in the Discourse Analysis Tool Suite TimFischerUniversity of Hamburg - ChrisBiemannU Hamburg + ChrisBiemannU Hamburg 151-162 This paper explores an AI-assisted approach to sequential sentence annotation designed to enhance qualitative data analysis (QDA) workflows within the open-source Discourse Analysis Tool Suite (DATS) developed at our university.We introduce a three-phase Annotation Assistant that leverages the capabilities of large language models (LLMs) to assist researchers during annotation.Based on the number of annotations, the assistant employs zero-shot prompting, few-shot prompting, or fine-tuned models to provide the best suggestions.To evaluate this approach, we construct a benchmark with five diverse datasets.We assess the performance of three prominent open-source LLMs — Llama 3.1, Gemma 2, and Mistral NeMo — and a sequence tagging model based on SentenceTransformers.Our findings demonstrate the effectiveness of our approach, with performance improving as the number of annotated examples increases. Consequently, we implemented the Annotation Assistant within DATS and report the implementation details.With this, we hope to contribute to a novel AI-assisted workflow and further democratize access to AI for qualitative data analysis. 2025.naacl-demo.16 @@ -11914,10 +11914,10 @@ <fixed-case>C</fixed-case>ow<fixed-case>P</fixed-case>ilot: A Framework for Autonomous and Human-Agent Collaborative Web Navigation FariaHuq Zora ZhiruoWang - Frank F.XuCarnegie Mellon University + Frank F.XuCarnegie Mellon University TianyueOu ShuyanZhou - Jeffrey P.BighamApple and Carnegie Mellon University + Jeffrey P.BighamApple and Carnegie Mellon University GrahamNeubigCarnegie Mellon University 163-172 While much work on web agents emphasizes the promise of autonomously performing tasks on behalf of users, in reality, agents often fallshort on complex tasks in real-world contexts and modeling user preference. This presents an opportunity for humans to collaborate with the agent and leverage the agent’s capabilities effectively. We propose CowPilot, a frame- work supporting autonomous as well as human-agent co llaborative w eb navigation, and evaluation across task success and task efficiency. CowPilot reduces the number of steps humans need to perform by allowing agents to propose next steps, while users are able to pause, reject, or take alternative actions. During execution, users can interleave their actions with the agent’s by overriding suggestions or resuming agent control when needed. We conducted case studies on five common websites and found that the human-agent collaborative mode achieves the highest success rate of 95% while requiring humans to perform only 15.2% of the total steps. Even with human interventions during task execution, the agent successfully drives up to half of task success on its own. CowPilot can serve as a useful tool for data collection and agent evaluation across websites, which we believe will enable research in how users and agents can work together. Video demonstrations are available at https://oaishi.github.io/cowpilot.html @@ -11928,7 +11928,7 @@ e<fixed-case>R</fixed-case>evise+<fixed-case>RF</fixed-case>: A Writing Evaluation System for Assessing Student Essay Revisions and Providing Formative Feedback ZhexiongLiu - DianeLitmanUniversity of Pittsburgh, University of Pittsburgh and University of Pittsburgh + DianeLitmanUniversity of Pittsburgh, University of Pittsburgh and University of Pittsburgh Elaine LWangRAND TianwenLi MasonGobat @@ -11970,7 +11970,7 @@ ZihaoLin ZichaoWangAdobe Research YuantingPanStanford University - VarunManjunathaAdobe Systems + VarunManjunathaAdobe Systems Ryan A.RossiAdobe Research AngelaLauAdobe Systems LifuHuangUniversity of California, Davis @@ -11984,7 +11984,7 @@ <fixed-case>ESP</fixed-case>net-<fixed-case>SDS</fixed-case>: Unified Toolkit and Demo for Spoken Dialogue Systems SiddhantArora - YifanPeng + YifanPeng JiatongShi JinchuanTian WilliamChenCarnegie Mellon University @@ -12048,11 +12048,11 @@ PengfeiLiu ZhengzhongLiuMohamed bin Zayed University of Artificial Intelligence Hector XuguangRen - EduardHovyUniversity of Melbourne and Carnegie Mellon University + EduardHovyUniversity of Melbourne and Carnegie Mellon University IrynaGurevychInstitute for Computer Science, Artificial Intelligence and Technology, Mohamed bin Zayed University of Artificial Intelligence and Technische Universität Darmstadt - PreslavNakovMohamed bin Zayed University of Artificial Intelligence + PreslavNakovMohamed bin Zayed University of Artificial Intelligence MonojitChoudhuryMohamed bin Zayed University of Artificial Intelligence - TimothyBaldwinMohamed bin Zayed University of Artificial Intelligence and The University of Melbourne + TimothyBaldwinMohamed bin Zayed University of Artificial Intelligence and The University of Melbourne 268-286 As large language models (LLMs) continue to evolve, leaderboards play a significant role in steering their development. Existing leaderboards often prioritize model capabilities while overlooking safety concerns, leaving a significant gap in responsible AI development. To address this gap, we introduce Libra-Leaderboard, a comprehensive framework designed to rank LLMs through a balanced evaluation of performance and safety. Combining a dynamic leaderboard with an interactive LLM arena, Libra-Leaderboard encourages the joint optimization of capability and safety. Unlike traditional approaches that average performance and safety metrics, Libra-Leaderboard uses a distance-to-optimal-score method to calculate the overall rankings. This approach incentivizes models to achieve a balance rather than excelling in one dimension at the expense of some other ones. In the first release, Libra-Leaderboard evaluates 26 mainstream LLMs from 14 leading organizations, identifying critical safety challenges even in state-of-the-art models. 2025.naacl-demo.23 @@ -12068,7 +12068,7 @@ KyuwonKimSeoul National University JinWee KangMiyoung - KyungTaeLimKorea Advanced Institute of Science & Technology + KyungTaeLimKorea Advanced Institute of Science & Technology JungyeulParkThe University of British Columbia ChulwooParkAnyang University 287-294 @@ -12241,7 +12241,7 @@ VidhishaBalachandranMicrosoft Research XiaochuangHanFacebook ShangbinFengUniversity of Washington - Lucy LuWangUniversity of Washington and Allen Institute for Artificial Intelligence + Lucy LuWangUniversity of Washington and Allen Institute for Artificial Intelligence YuliaTsvetkovDepartment of Computer Science, University of Washington 437-448 With the widespread consumption of AI-generated content, there has been an increased focus on developing automated tools to verify the factual accuracy of such content. However, prior research and tools developed for fact verification treat it as a binary classification or a linear regression problem. Although this is a useful mechanism as part of automatic guardrails in systems, we argue that such tools lack transparency in the prediction reasoning and diversity in source evidence to provide a trustworthy user experience.We develop FACTS&EVIDENCE—an interactive and transparent tool for user-driven verification of complex text. The tool facilitates the intricate decision-making involved in fact-verification, presenting its users a breakdown of complex input texts to visualize the credibility of individual claims along with explanation of model decisions and attribution to multiple, diverse evidence sources. FACTS&EVIDENCE aims to empower consumers of machine-generated text and give them agency to understand, verify, selectively trust and use such text. @@ -12269,7 +12269,7 @@ <fixed-case>L</fixed-case>3<fixed-case>GO</fixed-case>: Language Agents with Chain-of-3<fixed-case>D</fixed-case>-Thoughts for Generating Unconventional Objects YutaroYamadaSakana AI KhyathiChanduMistral AI - Bill YuchenLinxAI and University of Washington + Bill YuchenLinxAI and University of Washington JackHesselSamaya AI IlkerYildirimYale University YejinChoiComputer Science Department, Stanford University and NVIDIA @@ -12285,7 +12285,7 @@ KokiMaedaInstitute of Science Tokyo IssaSugiuraKyoto University ShuheiKuritaNational Institute of Informatics and New York University - NaoakiOkazakiInstitute of Science Tokyo + NaoakiOkazakiInstitute of Science Tokyo DaisukeKawaharaWaseda University 470-484 To develop high-performing Visual Language Models (VLMs), it is essential to prepare multimodal resources, such as image-text pairs, interleaved data, and instruction data. While multimodal resources for English are abundant, there is a significant lack of corresponding resources for non-English languages, such as Japanese. To address this problem, we take Japanese as a non-English language and propose Japanese multimodal datasets for rapidly developing a Japanese multimodal model. We collect Japanese image-text pairs and interleaved data from web archives and generate Japanese instruction data using an existing large language model and a VLM. Our experimental results show that a VLM trained on these native datasets outperforms those relying on machine-translated content. The resulting VLM, dataset and code used for training is publicly available. @@ -12319,7 +12319,7 @@ <fixed-case>METAPHORSHARE</fixed-case>: A Dynamic Collaborative Repository of Open Metaphor Datasets JoanneBoisson ArifMehmoodCardiff University - JoseCamacho-ColladosCardiff University + JoseCamacho-ColladosCardiff University 509-521 The metaphor studies community has developed numerous valuable labelled corpora in various languages over the years. Many of these resources are not only unknown to the NLP community, but are also often not easily shared among the researchers. Both in human sciences and in NLP, researchers could benefit from a centralised database of labelled resources, easily accessible and unified under an identical format. To facilitate this, we present MetaphorShare, a website to integrate metaphor datasets making them open and accessible. With this effort, our aim is to encourage researchers to share and upload more datasets in any language in order to facilitate metaphor studies and the development of future metaphor processing NLP systems. The website has four main functionalities: upload, download, search and label metaphor datasets. It is accessible at www.metaphorshare.com. 2025.naacl-demo.41 diff --git a/data/xml/2025.nakbanlp.xml b/data/xml/2025.nakbanlp.xml index 58f7165db1..abd40d34f6 100644 --- a/data/xml/2025.nakbanlp.xml +++ b/data/xml/2025.nakbanlp.xml @@ -122,7 +122,7 @@ The Missing Cause: An Analysis of Causal Attributions in Reporting on <fixed-case>P</fixed-case>alestine PaulinaGarcia Corral - HannahBechara + HannahBechara KrishnamoorthyManohara SlavaJankin 103–113 @@ -158,7 +158,7 @@ SaraNabhani ClaudiaBorg KurtMicallef - KhalidAl-Khatib + KhalidAl-Khatib 127–149 Propaganda significantly shapes public opinion, especially in conflict-driven contexts like the Israeli-Palestinian conflict. This study explores the integration of argumentation features, such as claims, premises, and major claims, into machine learning models to enhance the detection of propaganda techniques in Arabic media. By leveraging datasets annotated with fine-grained propaganda techniques and employing crosslingual and multilingual NLP methods, along with GPT-4-based annotations, we demonstrate consistent performance improvements. A qualitative analysis of Arabic media narratives on the Israeli war on Gaza further reveals the model’s capability to identify diverse rhetorical strategies, offering insights into the dynamics of propaganda. These findings emphasize the potential of combining NLP with argumentation features to foster transparency and informed discourse in politically charged settings. 2025.nakbanlp-1.14 diff --git a/data/xml/2025.neusymbridge.xml b/data/xml/2025.neusymbridge.xml index 6b59f5d9b5..480b00ea22 100644 --- a/data/xml/2025.neusymbridge.xml +++ b/data/xml/2025.neusymbridge.xml @@ -52,7 +52,7 @@ ShuangXue FangCai NaYe - GuipingZhang + GuipingZhang 18–30 Logical table-to-text generation (LT2T) seeks to produce logically faithful textual descriptions base on tables. Current end-to-end LT2T models, which use descriptions directly as learning objectives, frequently face challenges in maintaining logical faithfulness due to the lack of a reasoning knowledge. Recent research have introduced reasoning knowledge generated by models for LT2T task, but the noise along with it limited its performance. We therefore propose a framework reasoning knowledge filter that leverages the collaboration between large language models and smaller models to filter data points with high-quality reasoning knowledge. This framework aims to provide highly matched table, description and reasoning knowledge triplets for LT2T. The results obtained on LogicNLG database demonstrate that the efficiencies of the method in this paper has achieved optimal performance with a reduced amount of data. Specifically, it enhances SP-Acc by 1.4 points and NLI-Acc by 0.7 points compared to the current state-of-the-art model. 2025.neusymbridge-1.3 @@ -97,7 +97,7 @@ LianjiWang XiangLiu HaifengChi - GuipingZhang + GuipingZhang 61–70 With the continuous growth of multi-modal data on social media platforms, traditional Named Entity Recognition has rendered insufficient for handling contemporary data formats. Consequently, researchers proposed Multi-modal Named Entity Recognition (MNER). Existing studies focus on capturing the visual regions corresponding to entities to assist in entity recognition. However, these approaches still struggle to mitigate interference from visual regions that are irrelevant to the entities. To address this issue, we propose an innovative framework, Visual Cue Refinement in MNER(VCRMNER) using CLIP Prompts, to accurately capture visual cues (object-level visual regions) associated with entities. We leverage prompts to represent the semantic information of entity categories, which helps us assess visual cues and minimize interference from those irrelevant to the entities. Furthermore, we designed an interaction transformer that operates in two stages—first within each modality and then between modalities—to refine visual cues by learning from a frozen image encoder, thereby reducing differences between text and visual modalities. Comprehensive experiments were conducted on two public datasets, Twitter15 and Twitter17. The results and detailed analyses demonstrate that our method exhibits robust and competitive performance. 2025.neusymbridge-1.7 @@ -138,7 +138,7 @@ Generative <fixed-case>F</fixed-case>rame<fixed-case>N</fixed-case>et: Scalable and Adaptive Frames for Interpretable Knowledge Storage and Retrieval for <fixed-case>LLM</fixed-case>s Powered by <fixed-case>LLM</fixed-case>s HarishTayyar Madabushi TaylorHudson - ClaireBonial + ClaireBonial 107–119 Frame semantics provides an explanation for how we make use of conceptual frames, which encapsulate background knowledge and associations, to more completely understand the meanings of words within a context. Unfortunately, FrameNet, the only widely available implementation of frame semantics, is limited in both scale and coverage. Therefore, we introduce a novel mechanism for generating task-specific frames using large language models (LLMs), which we call Generative FrameNet. We demonstrate its effectiveness on a task that is highly relevant in the current landscape of LLMs: the interpretable storage and retrieval of factual information. Specifically, Generative Frames enable the extension of Retrieval-Augmented Generation (RAG), providing an interpretable framework for reducing inaccuracies in LLMs. We conduct experiments to demonstrate the effectiveness of this method both in terms of retrieval effectiveness as well as the relevance of the automatically generated frames and frame relations. Expert analysis shows that Generative Frames capture a more suitable level of semantic specificity than the frames from FrameNet. Thus, Generative Frames capture a notion of frame semantics that is closer to Fillmore’s originally intended definition, and offer potential for providing data-driven insights into Frame Semantics theory. Our results also show that this novel mechanism of Frame Semantic-based interpretable retrieval improves RAG for question answering with LLMs—outperforming a GPT-4 based baseline by up to 8 points. We provide open access to our data, including prompts and Generative FrameNet. 2025.neusymbridge-1.11 diff --git a/data/xml/2025.nlp4call.xml b/data/xml/2025.nlp4call.xml index 3a7fe9bcd5..c1219ffa41 100644 --- a/data/xml/2025.nlp4call.xml +++ b/data/xml/2025.nlp4call.xml @@ -24,7 +24,7 @@ The <fixed-case>M</fixed-case>ulti<fixed-case>GEC</fixed-case>-2025 Shared Task on Multilingual Grammatical Error Correction at <fixed-case>NLP</fixed-case>4<fixed-case>CALL</fixed-case> AriannaMasciolini AndrewCaines - OrphéeDe Clercq + OrphéeDe Clercq JoniKruijsbergen MurathanKurfalı RicardoMuñoz Sánchez @@ -56,7 +56,7 @@ Interpretable Machine Learning for Societal Language Identification: Modeling <fixed-case>E</fixed-case>nglish and <fixed-case>G</fixed-case>erman Influences on <fixed-case>P</fixed-case>ortuguese Heritage Language SorooshAkef - DetmarMeurers + DetmarMeurers AmáliaMendes PatrickRebuschat 50–62 @@ -74,7 +74,7 @@ <fixed-case>PIRLS</fixed-case> Category-specific Question Generation for Reading Comprehension YinPoon QiongWang - John S. Y.Lee + John S. Y.Lee Yu YanLam Samuel Kai WahChu 72–80 diff --git a/data/xml/2025.nlp4dh.xml b/data/xml/2025.nlp4dh.xml index 309627581c..f40a0b65ac 100644 --- a/data/xml/2025.nlp4dh.xml +++ b/data/xml/2025.nlp4dh.xml @@ -50,7 +50,7 @@ Analyzing Large Language Models’ pastiche ability: a case study on a 20th century <fixed-case>R</fixed-case>omanian author AncaDinuUniversity of Bucharest Andra-MariaFlorescuUniversity of Bucharest - LiviuDinuUniversity of Bucharest + LiviuDinuUniversity of Bucharest 20-32 This study evaluated the ability of several Large Language Models (LLMs) to pastiche the literary style of the Romanian 20th century author Mateiu Caragiale, by continuing one of his novels left unfinished upon his death. We assembled a database of novels consisting of six texts by Mateiu Caragiale, including his unfinished one, six texts by Radu Albala, including a continuation of Mateiu’s novel, and six LLM generated novels that try to pastiche it. We compared the LLM generated texts with the continuation by Radu Albala, using various methods. We automatically evaluated the pastiches by standard metrics such as ROUGE, BLEU, and METEOR. We performed stylometric analysis, clustering, and authorship attribution, and a manual analysis. Both computational and manual analysis of the pastiches indicated that LLMs are able to produce fairly qualitative pastiches, without matching the professional writer performance. The study also showed that ML techniques outperformed the more recent DL ones in both clusterization and authorship attribution tasks, probably because the dataset consists of only a few literary archaic texts in Romanian. In addition, linguistically informed features were shown to be competitive compared to automatically extracted features. 2025.nlp4dh-1.3 @@ -102,7 +102,7 @@ The <fixed-case>AI</fixed-case> Co-Ethnographer: How Far Can Automation Take Qualitative Research? FabianRetkowskiGermany AndreasSudmannCarnegie Mellon - AlexanderWaibelMassachusetts Institute of Technology + AlexanderWaibelMassachusetts Institute of Technology 73-90 Qualitative research often involves labor-intensive processes that are difficult to scale while preserving analytical depth. This paper introduces The AI Co-Ethnographer (AICoE), a novel end-to-end pipeline developed for qualitative research and designed to move beyond the limitations of simply automating code assignments, offering a more integrated approach. AICoE organizes the entire process, encompassing open coding, code consolidation, code application, and even pattern discovery, leading to a comprehensive analysis of qualitative data. 2025.nlp4dh-1.8 @@ -165,7 +165,7 @@ Effects of Publicity and Complexity in Reader Polarization YuriBizzoniAarhus University - PascaleFeldkampAarhus University + PascaleFeldkampAarhus University KristofferNielboAarhus University 138-150 We investigate how Goodreads rating distributions reflect variations in audience reception across literary works. By examining a large-scale dataset of novels, we analyze whether metrics such as the entropy or standard deviation of rating distributions correlate with textual features – including perplexity, nominal ratio, and syntactic complexity. These metrics reveal a disagreement continuum: more complex texts – i.e., more cognitively demanding books, with a more canon-like textual profile – generate polarized reader responses, while mainstream works produce more uniform reactions. We compare evaluation patterns across canonical and non-canonical works, bestsellers, and prize-winners, finding that textual complexity drives rating polarization even when controlling for publicity effects. Our findings demonstrate that linguistically unpredictable texts, particularly those with higher nominal density and dependency distance, generate divergent reader evaluations. This challenges conventional literary success metrics and suggests that the shape of rating distributions offers valuable insights beyond average scores. We hope our approach establishes a productive framework for understanding how literary features influence reception and how disagreement metrics can enhance our understanding of public literary judgment. @@ -243,7 +243,7 @@ LillyBraunerMannheim University FlorianErtzMannheim University InesReinigMannheim University - SimonePonzettoMannheim University + SimonePonzettoMannheim University 232-250 Due to their availability and ease of use, dictionary-based measures of moral values are a popular tool for text-based analyses of morality that examine human attitudes and behaviour across populations and cultures. In this paper, we revisit the construct validity of different dictionary-based measures of morality in text that have been proposed in the literature. We discuss conceptual challenges for text-based measures of morality and present an annotation experiment where we create a new dataset with human annotations of moral rhetoric in German political manifestos. We compare the results of our human annotations with different measures of moral values, showing that none of them is able to capture the trends observed by trained human coders. Our findings have far-reaching implications for the application of moral dictionaries in the digital humanities. 2025.nlp4dh-1.20 @@ -318,7 +318,7 @@ OlgaKolesnikovaInstituto Politécnico Nacional LilianaChanona HernandezInstituto Politécnico Nacional GrigoriSidorovInstituto Politécnico Nacional - AlexanderGelbukhInstituto Politécnico Nacional + AlexanderGelbukhInstituto Politécnico Nacional 305-312 This study examines sentiment analysis in Tamil-English code-mixed texts using advanced transformer-based architectures. The unique linguistic challenges, including mixed grammar, orthographic variability, and phonetic inconsistencies, are addressed. Data limitations and annotation gaps are discussed, highlighting the need for larger datasets. The performance of models such as XLM-RoBERTa, mT5, IndicBERT, and RemBERT is evaluated, with insights into their optimization for low-resource, code-mixed environments. 2025.nlp4dh-1.27 @@ -375,7 +375,7 @@ It’s about What and How you say it: A Corpus with Stance and Sentiment Annotation for <fixed-case>COVID</fixed-case>-19 Vaccines Posts on <fixed-case>X</fixed-case>/<fixed-case>T</fixed-case>witter by <fixed-case>B</fixed-case>razilian Political Elites LorenaBarberiaUniversity of São Paulo (USP) PedroSchmalzUniversity of São Paulo (USP) - NortonTrevisan RomanUniversity of São Paulo (USP) + NortonTrevisan RomanUniversity of São Paulo (USP) BelindaLombardUniversity of Birmingham TatianeMoraes de SousaUniversity of the State of Rio de Janeiro 365-376 @@ -401,7 +401,7 @@ Development of <fixed-case>O</fixed-case>ld <fixed-case>I</fixed-case>rish Lexical Resources, and Two <fixed-case>U</fixed-case>niversal <fixed-case>D</fixed-case>ependencies Treebanks for Diplomatically Edited <fixed-case>O</fixed-case>ld <fixed-case>I</fixed-case>rish Text AdrianDoyleInsight Centre for Data Analytics - JohnMcCraeNational University of Ireland Galway + JohnMcCraeNational University of Ireland Galway 393-402 The quantity and variety of Old Irish text which survives in contemporary manuscripts, those dating from the Old Irish period, is quite small by comparison to what is available for Modern Irish, not to mention better-resourced modern languages. As no native speakers have existed for more than a millennium, no more text will ever be created by native speakers. For these reasons, text surviving in contemporary sources is particularly valuable. Ideally, all such text would be annotated using a single, common standard to ensure compatibility. At present, discrete Old Irish text repositories make use of incompatible annotation styles, few of which are utilised by text resources for other languages. This limits the potential for using text from more than any one resource simultaneously in NLP applications, or as a basis for creating further resources. This paper describes the production of the first Old Irish text resources to be designed specifically to ensure lexical compatibility and interoperability. 2025.nlp4dh-1.34 @@ -562,7 +562,7 @@ Historical Ink: Exploring Large Language Models for Irony Detection in 19th-Century <fixed-case>S</fixed-case>panish - KevinCohenUniversidad de los Andes + KevinCohenUniversidad de los Andes LauraManrique-GómezUniversidad de los Andes RubenManriqueUniversidad de Los Andes 559-569 diff --git a/data/xml/2025.nlp4ecology.xml b/data/xml/2025.nlp4ecology.xml index 17f498f93e..3c7e51f32e 100644 --- a/data/xml/2025.nlp4ecology.xml +++ b/data/xml/2025.nlp4ecology.xml @@ -47,7 +47,7 @@ JenniferD’Souza ZacharyLaubach Tarek AlMustafa - SinaZarrieß + SinaZarrieß RobertFrühstückl PhyllisIllari 16–23 @@ -85,7 +85,7 @@ Entity Linking using <fixed-case>LLM</fixed-case>s for Automated Product Carbon Footprint Estimation SteffenCastle - JulianMoreno Schneider + JulianMoreno Schneider 56–60 Growing concerns about climate change and sustainability are driving manufacturers to take significant steps toward reducing their carbon footprints. For these manufacturers, a first step towards this goal is to identify the environmental impact of the individual components of their products. We propose a system leveraging large language models (LLMs) to automatically map components from manufacturer Bills of Materials (BOMs) to Life Cycle Assessment (LCA) database entries by using LLMs to expand on available component information. Our approach reduces the need for manual data processing, paving the way for more accessible sustainability practices. 2025.nlp4ecology-1.12 @@ -124,7 +124,7 @@ Towards Addressing Anthropocentric Bias in Large Language Models FrancescaGrasso StefanoLocci - LuigiDi Caro + LuigiDi Caro 84–93 The widespread use of Large Language Models (LLMs), particularly among non-expert users, has raised ethical concerns about the propagation of harmful biases. While much research has addressed social biases, few works, if any, have examined anthropocentric bias in Natural Language Processing (NLP) technology. Anthropocentric language prioritizes human value, framing non-human animals, living entities, and natural elements solely by their utility to humans; a perspective that contributes to the ecological crisis. In this paper, we evaluate anthropocentric bias in OpenAI’s GPT-4o across various target entities, including sentient beings, non-sentient entities, and natural elements. Using prompts eliciting neutral, anthropocentric, and ecocentric perspectives, we analyze the model’s outputs and introduce a manually curated glossary of 424 anthropocentric terms as a resource for future ecocritical research. Our findings reveal a strong anthropocentric bias in the model’s responses, underscoring the need to address human-centered language use in AI-generated text to promote ecological well-being. 2025.nlp4ecology-1.18 @@ -133,7 +133,7 @@ Efficient Scientific Full Text Classification: <fixed-case>The</fixed-case> Case of <fixed-case>EICAT</fixed-case> Impact Assessments Marc FelixBrinner - SinaZarrieß + SinaZarrieß 94–103 This study explores strategies for efficiently classifying scientific full texts using both small, BERT-based models and local large language models like Llama-3.1 8B. We focus on developing methods for selecting subsets of input sentences to reduce input size while simultaneously enhancing classification performance. To this end, we compile a novel dataset consisting of full-text scientific papers from the field of invasion biology, specifically addressing the impacts of invasive species. These papers are aligned with publicly available impact assessments created by researchers for the International Union for Conservation of Nature (IUCN). Through extensive experimentation, we demonstrate that various sources like human evidence annotations, LLM-generated annotations or explainability scores can be used to train sentence selection models that improve the performance of both encoder- and decoder-based language models while optimizing efficiency through the reduction in input length, leading to improved results even if compared to models like ModernBERT that are able to handle the complete text as input. Additionally, we find that repeated sampling of shorter inputs proves to be a very effective strategy that, at a slightly increased cost, can further improve classification performance. 2025.nlp4ecology-1.20 diff --git a/data/xml/2025.nlp4pi.xml b/data/xml/2025.nlp4pi.xml index eaf917fba3..36b60bed02 100644 --- a/data/xml/2025.nlp4pi.xml +++ b/data/xml/2025.nlp4pi.xml @@ -167,7 +167,7 @@ YuChengHuangBoston University ArtiRamanathanBoston University MargritBetkeBoston University - DerryWijayaBoston University + DerryWijayaBoston University 128-143 TikTok has emerged as a key platform for discussing polarizing topics, including climate change. Despite its growing influence, there is limited research exploring how content features shape emotional alignment between video creators and audience comments, as well as their impact on user engagement. Using a combination of pretrained and fine-tuned textual and visual models, we analyzed 7,110 TikTok videos related to climate change, focusing on content features such as semantic clustering of video transcriptions, visual elements, tonal shifts, and detected emotions. (1) Our findings reveal that positive emotions and videos featuring factual content or vivid environmental visuals exhibit stronger emotional alignment. Furthermore, emotional intensity and tonal coherence in video speech are significant predictors of higher engagement levels, offering new insights into the dynamics of climate change communication on social media. (2) Our preference learning analysis reveals that comment emotions play a dominant role in predicting video shareability, with both positive and negative emotional responses acting as key drivers of content diffusion. We conclude that user engagement—particularly emotional discourse in comments—significantly shapes climate change content shareability. 2025.nlp4pi-1.11 @@ -188,7 +188,7 @@ Unsupervised Sustainability Report Labeling based on the integration of the <fixed-case>GRI</fixed-case> and <fixed-case>SDG</fixed-case> standards Seyed AlirezaMousavian AnarakiDepartment of Enterprise Engineering University of Rome, Tor Vergata Via del Politecnico 1, 00133, Rome, Italy DaniloCroceDepartment of Enterprise Engineering University of Rome, Tor Vergata Via del Politecnico 1, 00133, Rome, Italy - RobertoBasiliDepartment of Enterprise Engineering University of Rome, Tor Vergata Via del Politecnico 1, 00133, Rome, Italy + RobertoBasiliDepartment of Enterprise Engineering University of Rome, Tor Vergata Via del Politecnico 1, 00133, Rome, Italy 151-162 Sustainability reports are key instruments for communicating corporate impact, but their unstructured format and varied content pose challenges for large-scale analysis. This paper presents an unsupervised method to annotate paragraphs from sustainability reports against both the Global Reporting Initiative (GRI) and Sustainable Development Goals (SDG) standards. The approach combines structured metadata from GRI content indexes, official GRI–SDG mappings, and text semantic similarity models to produce weakly supervised annotations at scale. To evaluate the quality of these annotations, we train a multi-label classifier on the automatically labeled data and evaluate it on the trusted OSDG Community Dataset. The results show that our method yields meaningful labels and improves classification performance when combined with human-annotated data. Although preliminary, this work offers a foundation for scalable sustainability analysis and opens future directions toward assessing the credibility and depth of corporate sustainability claims. 2025.nlp4pi-1.13 @@ -212,7 +212,7 @@ JieyuZhaoUniversity of Southern California Linda X.ZouUniversity of Maryland RachelRudingerUniversity of Maryland - HalDaumé IIIUniversity of Maryland + HalDaumé IIIUniversity of Maryland 175-188 Multilingual large language models have gained prominence for their proficiency in processing and generating text across languages. Like their monolingual counterparts, multilingual models are likely to pick up on stereotypes and other social biases during training. In this paper, we study a phenomenon we term “stereotype leakage”, which refers to how training a model multilingually may lead to stereotypes expressed in one language showing up in the models’ behavior in another. We propose a measurement framework for stereotype leakage and investigate its effect in English, Russian, Chinese, and Hindi and with GPT-3.5, mT5, and mBERT. Our findings show a noticeable leakage of positive, negative, and nonpolar associations across all languages. We find that GPT-3.5 exhibits the most stereotype leakage of these models, and Hindi is the most susceptible to leakage effects. 2025.nlp4pi-1.15 @@ -225,7 +225,7 @@ KeZhangDataMinr, Inc. HemankLambaDataMinr, Inc. Elizabeth M.OlsonDataMinr, Inc. - JoelTetreaultDataMinr, Inc. + JoelTetreaultDataMinr, Inc. AlexJaimesDataMinr, Inc. 189-195 Publications in the AI for Good space have tended to focus on the research and model development that can support high-impact applications. However, very few AI for Good papers discuss the process of deploying and collaborating with the partner organization, and the resulting real-world impact. In this work, we share details about the close collaboration with a humanitarian-to-humanitarian (H2H) organization and how to not only deploy the AI model in a resource-constrained environment, but also how to maintain it for continuous performance updates, and share key takeaways for practitioners. @@ -238,10 +238,10 @@ YiwenDingUniversity of Michigan JiaruiLiuCMU ZhihengLyuUniversity of Hong Kong - KunZhangCMU, MBZUAI + KunZhangCMU, MBZUAI BernhardSchölkopfMax Planck Institute for Intelligent Systems, Tuebingen, Germany ZhijingJinMax Planck Institute for Intelligent Systems, Tuebingen, Germany, University of Toronto, Vector Institute - RadaMihalceaUniversity of Michigan + RadaMihalceaUniversity of Michigan 196-214 While several previous studies have analyzed gender bias in research, we are still missing a comprehensive analysis of gender differences in the AI community, covering diverse topics and different development trends. Using the AI Scholar dataset of 78K researchers in the field of AI, we identify several gender differences: (1) Although female researchers tend to have fewer overall citations than males, this citation difference does not hold for all academic-age groups; (2) There exist large gender homophily in co-authorship on AI papers; (3) Female first-authored papers show distinct linguistic styles, such as longer text, more positive emotion words, and more catchy titles than male first-authored papers. Our analysis provides a window into the current demographic trends in our AI community, and encourages more gender equality and diversity in the future. 2025.nlp4pi-1.17 @@ -266,7 +266,7 @@ Multi-Task Learning approach to identify sentences with impact and affected location in a disaster news report SumantaBanerjeeNational Institute of Technology Silchar, India, Siksha ‘O’ Anusandhan Deemed to be University, Bhubaneswar, India ShyamapadaMukherjeeNational Institute of Technology Rourkela, India - SivajiBandyopadhyayJadavpur University, Kolkata, India + SivajiBandyopadhyayJadavpur University, Kolkata, India 229-238 The first priority of action in the Sendai Framework for Disaster Risk Reduction 2015-2030 advocates the understanding of disaster risk by collecting and processing practical information related to disasters. A smart collection may be the compilation of relevant and summarized news articles focused on some key pieces of information such as disaster event type, geographic location(s), and impacts. In this article, a Multi-Task Learning (MTL) based end-to-end model has been developed to perform three related tasks: sentence classification depending on the presence of (1) relevant locations and (2) impact information to generate a summary,and (3) identification of the causes or event types in disaster news. Each of the three tasks is formulated as a multilabel binary classification problem. The results of the proposed MTL model have been compared with three popular transformer models: BERT, RoBERTa, and ALBERT. It is observed that the proposed model showed better performance scores than the other models in most cases. 2025.nlp4pi-1.19 @@ -292,7 +292,7 @@ Participatory Design for Positive Impact: Behind the Scenes of Three <fixed-case>NLP</fixed-case> Projects MarianneWilsonEdinburgh Napier University - David M.HowcroftUniversity of Aberdeen + David M.HowcroftUniversity of Aberdeen IoannisKonstasHeriot-Watt University DimitraGkatziaEdinburgh Napier University GavinAbercrombieHeriot-Watt University diff --git a/data/xml/2025.nodalida.xml b/data/xml/2025.nodalida.xml index b1c962c987..d81032e22a 100644 --- a/data/xml/2025.nodalida.xml +++ b/data/xml/2025.nodalida.xml @@ -41,7 +41,7 @@ Applying and Optimising a Multi-Scale Probit Model for Cross-Source Text Complexity Classification and Ranking in <fixed-case>Swedish</fixed-case> ElsaAndersson JohanFalkenjack - ArneJönsson + ArneJönsson 17–27 We present results from using Probit models to classify and rank texts of varying complexity from multiple sources. We use multiple linguistic sources including Swedish easy-to-read books and investigate data augmentation and feature regularisation as optimisation methods for text complexity assessment. Multi-Scale and Single Scale Probit models are implemented using different ratios of training data, and then compared. Overall, the findings suggest that the Multi-Scale Probit model is an effective method for classifying text complexity and ranking new texts and could be used to improve the performance on small datasets as well as normalize datasets labelled using different scales. 2025.nodalida-1.3 @@ -72,7 +72,7 @@ Transfer-Learning <fixed-case>German</fixed-case> Metaphors Inspired by Second Language Acquisition - MariaBerger + MariaBerger 48–54 A major part of figurative meaning prediction is based on English-language training corpora. One strategy to apply techniques to languages other than English lies in applying transfer learning techniques to correct this imbalance. However, in previous studies we learned that the bilingual representations of current transformer models are incapable of encoding the deep semantic knowledge necessary for a transfer learning step, especially for metaphor prediction. Hence, inspired by second language acquisition, we attempt to improve German metaphor prediction in transfer learning by modifying the context windows of our input samples to align with lower readability indices achieving up to 13% higher F1 score. 2025.nodalida-1.6 @@ -91,7 +91,7 @@ Investigating the effectiveness of Data Augmentation and Contrastive Learning for Named Entity Recognition NoelChia InesRehbein - Simone PaoloPonzetto + Simone PaoloPonzetto 66–79 Data Augmentation (DA) and Contrastive Learning (CL) are widely used in NLP, but their potential for NER has not yet been investigated in detail. Existing work is mostly limited to zero- and few-shot scenarios where improvements over the baseline are easy to obtain. In this paper, we address this research gap by presenting a systematic evaluation of DA for NER on small, medium-sized and large datasets with coarse and fine-grained labels. We report results for a) DA only, b) DA in combination with supervised contrastive learning, and c) DA with transfer learning. Our results show that DA on its own fails to improve results over the baseline and that supervised CL works better on larger datasets while transfer learning is beneficial if the target dataset is very small. Finally, we investigate how contrastive learning affects the learned representations, based on dimensionality reduction and visualisation techniques, and show that CL mostly helps to separate named entities from non-entities. 2025.nodalida-1.8 @@ -163,7 +163,7 @@ Modeling Multilayered Complexity in Literary Texts - PascaleFeldkamp + PascaleFeldkamp MártonKardos KristofferNielbo YuriBizzoni @@ -218,7 +218,7 @@ Ona deGibert TommiNieminen YvesScherrer - JörgTiedemann + JörgTiedemann 201–208 In this work, we introduce OpusDistillery, a novel framework to streamline the Knowledge Distillation (KD) process of multilingual NMT models. OpusDistillery’s main features are the integration of openly available teacher models from OPUS-MT and Hugging Face, comprehensive multilingual support and robust GPU utilization tracking. We describe the tool in detail and discuss the individual contributions of its pipeline components, demonstrating its flexibility for different use cases. OpusDistillery is open-source and released under a permissive license, aiming to facilitate further research and development in the field of multilingual KD for any sequence-to-sequence task. Our code is available at https://github.com/Helsinki-NLP/OpusDistillery. 2025.nodalida-1.20 @@ -228,8 +228,8 @@ Mind the Gap: <fixed-case>Diverse</fixed-case> <fixed-case>NMT</fixed-case> Models for Resource-Constrained Environments Ona deGibert DayyánO’Brien - DušanVariš - JörgTiedemann + DušanVariš + JörgTiedemann 209–216 We present fast Neural Machine Translation models for 17 diverse languages, developed using Sequence-level Knowledge Distillation. Our selected languages span multiple language families and scripts, including low-resource languages. The distilled models achieve comparable performance while being 10x times faster than transformer-base and 35x times faster than transformer-big architectures. Our experiments reveal that teacher model quality and capacity strongly influence the distillation success, as well as the language script. We also explore the effectiveness of multilingual students. We release publicly our code and models in our Github repository: anonymised. 2025.nodalida-1.21 @@ -239,7 +239,7 @@ Testing relevant linguistic features in automatic <fixed-case>CEFR</fixed-case> skill level classification for <fixed-case>Icelandic</fixed-case> IsidoraGlišić Caitlin LauraRichter - Anton KarlIngason + Anton KarlIngason 217–222 This paper explores the use of various linguistic features to develop models for automatic classification of language proficiency on the CEFR scale for Icelandic, a low-resourced and morphologically complex language. We train two classifiers to assess skill level of learner texts. One is used as a baseline and takes in the original unaltered text written by a learner and uses predominantly surface features to assess the level. The other uses both surface and other morphological and lexical features, as well as context vectors from transformer (IceBERT). It takes in both the original and corrected versions of the text and takes into account errors/deviation of the original texts compared to the corrected versions. Both classifiers show promising results, with baseline models achieving between 62.2-67.1% accuracy and dual-version between 75-80.3%. 2025.nodalida-1.22 @@ -311,7 +311,7 @@ Database of <fixed-case>Latvian</fixed-case> Morphemes and Derivational Models: ideas and expected results AndraKalnača TatjanaPakalne - KristīneLevāne-Petrova + KristīneLevāne-Petrova 279–286 In this paper, we describe “The Database of Latvian Morphemes and Derivational Models” – a large-scale corpus-based and manually validated database of Latvian derivational morphology currently in development at the University of Latvia. The database contains morpheme-level data – morphemes, incl. morpheme variants (allomorphs), morpheme types, morpheme homonymy/ homography resolu- tion, hierarchical relations between root morphemes, links to word families, and lemma-level data – incl. base form, morphemic segmentation, POS, grammatical features, derivational motivation (incl. compounding), word-family membership. The focus of the database is on providing linguistically accurate comprehensive data as a reliable basis for future work in different fields. 2025.nodalida-1.29 @@ -319,9 +319,9 @@ Localizing <fixed-case>AI:</fixed-case> Evaluating Open-Weight Language Models for Languages of <fixed-case>B</fixed-case>altic States - JurgitaKapočiūtė-Dzikienė + JurgitaKapočiūtė-Dzikienė TomsBergmanis - MārcisPinnis + MārcisPinnis 287–295 Although large language models (LLMs) have transformed our expectations of modern language technologies, concerns over data privacy often restrict the use of commercially available LLMs hosted outside of EU jurisdictions. This limits their application in governmental, defense, and other data-sensitive sectors. In this work, we evaluate the extent to which locally deployable open-weight large language models support lesser-spoken languages such as Lithuanian, Latvian, and Estonian. We examine various size and precision variants of the top-performing multilingual open-weight models, Llama 3, Gemma 2, Phi, and NeMo, on machine translation, multiple-choice question answering, and free-form text generation. The results indicate that while certain models like Gemma 2 perform close to the top commercially available models, many LLMs struggle with these languages. Most surprisingly, however, we find that these models, while showing close to state-of-the-art translation performance, are still prone to lexical hallucinations with errors in at least 1 in 20 words for all open-weight multilingual LLMs. 2025.nodalida-1.30 @@ -422,8 +422,8 @@ MikusGrasmanis AguteKlints GuntaNešpore-Bērzkalne - PēterisPaikens - LaumaPretkalniņa + PēterisPaikens + LaumaPretkalniņa LauraRituma MadaraStāde EvelīnaTauriņa @@ -460,7 +460,7 @@ A Comparative Study of <fixed-case>PEFT</fixed-case> Methods for Python Code Generation JohannaMännistö JosephAttieh - JörgTiedemann + JörgTiedemann 390–396 Fine-tuning language models incurs high costs in training, inference and storage. Parameter-efficient fine-tuning (PEFT) methods have emerged as a more cost-effective alternative to full fine-tuning. However, limited work has compared different PEFT approaches for tasks like code generation. In this study, we examine the effect of various PEFT training methods on model performance in the task of Python code generation. We fine-tune four model families, ranging from 124M to 7B parameters, using three PEFT approaches alongside standard full fine-tuning. Our findings reveal that the effectiveness of each PEFT method varies with the model size and the corpus used. 2025.nodalida-1.42 @@ -472,7 +472,7 @@ PetterMæhlum Victoria Ovedie ChruickshankLangø ErikVelldal - LiljaØvrelid + LiljaØvrelid 397–407 This paper introduces a new suite of question answering datasets for Norwegian; NorOpenBookQA, NorCommonSenseQA, NorTruthfulQA, and NRK-Quiz-QA. The data covers a wide range of skills and knowledge domains, including world knowledge, commonsense reasoning, truthfulness, and knowledge about Norway. Covering both of the written standards of Norwegian – Bokmål and Nynorsk – our datasets comprise over 10k question-answer pairs, created by native speakers. We detail our dataset creation approach and present the results of evaluating 11 language models (LMs) in zero- and few-shot regimes. Most LMs perform better in Bokmål than Nynorsk, struggle most with commonsense reasoning, and are often untruthful in generating answers to questions. All our datasets and annotation materials are publicly available. 2025.nodalida-1.43 @@ -481,7 +481,7 @@ Incorporating Target Fuzzy Matches into Neural Fuzzy Repair TommiNieminen - JörgTiedemann + JörgTiedemann SamiVirpioja 408–418 Neural fuzzy repair (NFR) is a simple implementation of retrieval-augmented translation (RAT), based on data augmentation. In NFR, a translation database is searched for translation examples where the source sentence is similar to the sentence being translated, and the target side of the example is concatenated with the source sentences. We experiment with introducing retrieval that is based on target similarity to NFR during training. The results of our experiments confirm that including target similarity matches during training supplements source similarity matches and leads to better translations at translation time. @@ -550,10 +550,10 @@ Evaluating <fixed-case>LLM</fixed-case>-Generated Explanations of Metaphors – A Culture-Sensitive Study of <fixed-case>Danish</fixed-case> - Bolette S.Pedersen + Bolette S.Pedersen NathalieSørensen SanniNimb - Dorte HaltrupHansen + Dorte HaltrupHansen SussiOlsen AliAl-Laith 470–479 @@ -630,7 +630,7 @@ EgilRønningstad Lilja CharlotteStorset PetterMæhlum - LiljaØvrelid + LiljaØvrelid ErikVelldal 537–543 Sentiment analysis of patient feedback from the public health domain can aid decision makers in evaluating the provided services. The current paper focuses on free-text comments in patient surveys about general practitioners and psychiatric healthcare, annotated with four sentence-level polarity classes - positive, negative, mixed and neutral - while also attempting to alleviate data scarcity by leveraging general-domain sources in the form of reviews. For several different architectures, we compare in-domain and out-of-domain effects, as well as the effects of training joint multi-domain models. @@ -656,7 +656,7 @@ StephanOepen ErikVelldal WilfredØstgulen - LiljaØvrelid + LiljaØvrelid Aslak SiraMyhre 544–560 The use of copyrighted materials in training language models raises critical legal and ethical questions. This paper presents a framework for and the results of empirically assessing the impact of publisher-controlled copyrighted corpora on the performance of generative large language models (LLMs) for Norwegian. When evaluated on a diverse set of tasks, we found that adding both books and newspapers to the data mixture of LLMs tend to improve their performance, while the addition of fiction works seems to be detrimental. Our experiments could inform the creation of a compensation scheme for authors whose works contribute to AI development. @@ -678,7 +678,7 @@ DavidSamuel VladislavMikhailov ErikVelldal - LiljaØvrelid + LiljaØvrelid Lucas Georges GabrielCharpentier AndreyKutuzov StephanOepen @@ -803,7 +803,7 @@ SamiaTouileb VladislavMikhailov Marie IngeborgKroka - LiljaØvrelid + LiljaØvrelid ErikVelldal 729–738 We introduce a dataset of high-quality human-authored summaries of news articles in Norwegian. The dataset is intended for benchmarking of the abstractive summarisation capabilities of generative language models. Each document in the dataset is provided with three different candidate gold-standard summaries written by native Norwegian speakers and all summaries are provided in both of the written variants of Norwegian – Bokmål and Nynorsk. The paper describes details on the data creation effort as well as an evaluation of existing open LLMs for Norwegian on the dataset. We also provide insights from a manual human evaluation, comparing human-authored to model generated summaries. Our results indicate that the dataset provides a challenging LLM benchmark for Norwegian summarisation capabilities. @@ -826,7 +826,7 @@ MathiasCreutz IvanVulić AnnaKorhonen - JörgTiedemann + JörgTiedemann 755–766 Recent work has demonstrated that large language models can often generate fluent and linguistically correct text, adhering to given instructions. However, to what extent can they execute complex instructions requiring knowledge of fundamental linguistic concepts and elaborate semantic reasoning? Our study connects an established linguistic theory of paraphrasing with LLM-based practice to analyze which specific types of paraphrases LLMs can accurately produce and where they still struggle. To this end, we investigate a method of analyzing paraphrases generated by LLMs prompted with a comprehensive set of systematic linguistic instructions. We conduct a case study using GPT-4, which has shown strong performance across various language generation tasks, and we believe that other LLMs may face similar challenges in comparable scenarios. We examine GPT-4 from a linguistic perspective to explore its potential contributions to linguistic research regarding paraphrasing, systematically assessing how accurately the model generates paraphrases that adhere to specified transformation rules. Our results suggest that GPT-4 frequently prioritizes simple lexical or syntactic alternations, often disregarding the transformation guidelines if they overly complicate the primary task. 2025.nodalida-1.75 @@ -847,7 +847,7 @@ SocratesVakirtzian VivianStamou YannisKazos - StellaMarkantonatou + StellaMarkantonatou 776–784 We report on the development of the first treebank and parser for Eastern Cretan in the framework of Universal Dependencies (UD). Eastern Cretan is a living but under-resourced dialect of Modern Greek. We have worked on the transcription of oral material and relied on active annotation and knowledge transfer from GUD, a treebank of Standard Modern Greek. Along with its other phonological and morphosyntactic differences from Standard Modern Greek, Eastern Cretan (and other varieties of Modern Greek) makes heavy use of euphonics and voicing that have not been included in the UD annotation guidelines so far. We have provided annotation guidelines for East Cretan euphonics and voicing and included them in the models. Knowledge transfer from the treebank of Standard Modern Greek to the dialectal models helped to initiate annotation via an active annotation procedure 2025.nodalida-1.77 @@ -868,7 +868,7 @@ HuilingYou SamiaTouileb ErikVelldal - LiljaØvrelid + LiljaØvrelid 801–811 In this work, we approach event extraction from Norwegian news text using a generation-based approach which formulates the task as text-to-structure generation. We present experiments assessing the effect of different modeling configurations and provide an analysis of the model predictions and typical system errors. Finally, we apply our system to a large corpus of raw news texts and analyze the resulting distribution of event structures in a fairly representative snap-shot of the Norwegian news landscape. 2025.nodalida-1.79 diff --git a/data/xml/2025.privatenlp.xml b/data/xml/2025.privatenlp.xml index 339c5e8c3f..cd73a7afcc 100644 --- a/data/xml/2025.privatenlp.xml +++ b/data/xml/2025.privatenlp.xml @@ -85,7 +85,7 @@ SamuelBelkadi LifengHan WarrenDel-PintoUniversity of Manchester - GoranNenadicUniversity of Manchester + GoranNenadicUniversity of Manchester 60-74 Due to the sensitive nature of clinical letters, their use in model training, medical research, and education is limited. This work aims to generate diverse, de-identified, and high-quality synthetic clinical letters to enhance privacy protection. This study explores various pre-trained language models (PLMs) for text masking and generation, employing various masking strategies with a focus on Bio_ClinicalBERT. Both qualitative and quantitative methods are used for evaluation, supplemented by a downstream Named Entity Recognition (NER) task. Our results indicate that encoder-only models outperform encoder-decoder models. General-domain and clinical-domain PLMs exhibit comparable performance when clinical information is preserved. Preserving clinical entities and document structure yields better performance than fine-tuning alone. Masking stopwords enhances text quality, whereas masking nouns or verbs has a negative impact. BERTScore proves to be the most reliable quantitative evaluation metric in our task. Contextual information has minimal impact, indicating that synthetic letters can effectively replace original ones in downstream tasks. Unlike previous studies that focus primarily on reconstructing original letters or training a privacy-detection and substitution model, this project provides a framework for generating diverse clinical letters while embedding privacy detection, enabling sensitive dataset expansion and facilitating the use of real-world clinical data. Our codes and trained models will be publicly available at https://github.com/HECTA-UoM/Synthetic4Health. 2025.privatenlp-main.6 diff --git a/data/xml/2025.quasy.xml b/data/xml/2025.quasy.xml index 16cc91eece..77d2789e35 100644 --- a/data/xml/2025.quasy.xml +++ b/data/xml/2025.quasy.xml @@ -141,7 +141,7 @@ Do Multilingual Transformers Encode <fixed-case>P</fixed-case>aninian Grammatical Relations? A Layer-wise Probing Study AkshitKumarInternational Institute of Information Technology, Hyderabad, International Institute of Information Technology Hyderabad - DiptiSharmaIIIT Hyderabad + DiptiSharmaIIIT Hyderabad ParameswariKrishnamurthyInternational Institute of Information Technology Hyderabad, Dhirubhai Ambani Institute Of Information and Communication Technology 124-130 Large multilingual transformers such as XLM-RoBERTa achieve impressive performance on diverse NLP benchmarks, but understanding how they internally encode grammatical information remains challenging. This study investigates the encoding of syntactic and morphological information derived from the Paninian grammatical framework—specifically designed for morphologically rich Indian languages—across model layers. Using diagnostic probing, we analyze the hidden representations of frozen XLM-RoBERTa-base, mBERT, and IndicBERT models across seven Indian languages (Hindi, Kannada, Malayalam, Marathi, Telugu, Urdu, Bengali). Probes are trained to predict Paninian dependency relations (by edge probing) and essential morphosyntactic features (UPOS tags, Vibhakti markers). We find that syntactic structure (dependencies) is primarily encoded in the middle-to-upper-middle layers (layers 6–9), while lexical features peak slightly earlier. Although the general layer-wise trends are shared across models, significant variations in absolute probing performance reflect differences in model capacity, pre-training data, and language-specific characteristics. These findings shed light on how theory-specific grammatical information emerges implicitly within multilingual transformer representations trained largely on unstructured raw text. diff --git a/data/xml/2025.queerinai.xml b/data/xml/2025.queerinai.xml index c80c054641..514d1d00be 100644 --- a/data/xml/2025.queerinai.xml +++ b/data/xml/2025.queerinai.xml @@ -49,7 +49,7 @@ AlexandriaLetoUniversity of Colorado at Boulder JuanVásquez AlexisPalmerUniversity of Colorado at Boulder - Maria LeonorPachecoUniversity of Colorado at Boulder + Maria LeonorPachecoUniversity of Colorado at Boulder 17-25 Given the widespread use of LLM-powered conversational agents such as ChatGPT, analyzing the ways people interact with them could provide valuable insights into human behavior. Prior work has shown that these agents are sometimes used in sexual contexts, such as to obtain advice, to role-play as sexual companions, or to generate erotica. While LGBTQ+ acceptance has increased in recent years, dehumanizing practices against minorities continue to prevail. In this paper, we hone in on this and perform an analysis of dehumanizing tendencies toward LGBTQ+ individuals by human users in their sexual interactions with ChatGPT. Through a series of experiments that model various concept vectors associated with distinct shades of dehumanization, we find evidence of the reproduction of harmful stereotypes. However, many user prompts lack indications of dehumanization, suggesting that the use of these agents is a complex and nuanced issue which warrants further investigation. 2025.queerinai-main.3 diff --git a/data/xml/2025.realm.xml b/data/xml/2025.realm.xml index af2305651b..6a6f8494d5 100644 --- a/data/xml/2025.realm.xml +++ b/data/xml/2025.realm.xml @@ -64,7 +64,7 @@ A Multi-<fixed-case>AI</fixed-case> Agent System for Autonomous Optimization of Agentic <fixed-case>AI</fixed-case> Solutions via Iterative Refinement and <fixed-case>LLM</fixed-case>-Driven Feedback Loops Kamer AliYukselaiXplain, inc. - ThiagoCastro FerreiraUniversidade Federal de Minas Gerais + ThiagoCastro FerreiraUniversidade Federal de Minas Gerais MohamedAl-BadrashinyaiXplain HassanSawafaiXplain 52-62 @@ -99,7 +99,7 @@ Hidden Forms: A Dataset to Fill Masked Interfaces from Language Commands - AnirudhSundar + AnirudhSundar Christopher GordonRichardsonGeorgia Institute of Technology WilliamGay BenjaminReichman @@ -368,7 +368,7 @@ From Knowledge to Noise: <fixed-case>CTIM</fixed-case>-Rover and the Pitfalls of Episodic Memory in Software Engineering Agents TobiasLindenbauerJetbrains GeorgGrohTechnical University Munich - HinrichSchuetze + HinrichSchuetze 411-427 We introduce CTIM-Rover, an AI agent for Software Engineering (SE) built on top of AutoCodeRover (Zhang et al., 2024) that extends agentic reasoning frameworks with an episodic memory, more specifically, a general and repository-level Cross-Task-Instance Memory (CTIM). While existing open-source SE agents mostly rely on ReAct (Yao et al., 2023b), Reflexion (Shinn et al., 2023), or Code-Act (Wang et al., 2024), all of these reasoning and planning frameworks inefficiently discard their long-term memory after a single task instance. As repository-level understanding is pivotal for identifying all locations requiring a patch for fixing a bug, we hypothesize that SE is particularly well positioned to benefit from CTIM. For this, we build on the Experiential Learning (EL) approach ExpeL (Zhao et al., 2024), proposing a Mixture-Of-Experts (MoEs) inspired approach to create both a general-purpose and repository-level CTIM . We find that CTIM-Rover does not outperform AutoCodeRover in any configuration and thus conclude that neither ExpeL nor DoT-Bank (Lingam et al., 2024) scale to real-world SE problems. Our analysis indicates noise introduced by distracting CTIM items or exemplar trajectories as the likely source of the performance degradation. 2025.realm-1.30 diff --git a/data/xml/2025.regnlp.xml b/data/xml/2025.regnlp.xml index 51edce9a65..3206e6629b 100644 --- a/data/xml/2025.regnlp.xml +++ b/data/xml/2025.regnlp.xml @@ -4,9 +4,9 @@ Proceedings of the 1st Regulatory NLP Workshop (RegNLP 2025) TubaGokhan - KexinWang + KexinWang IrynaGurevych - TedBriscoe + TedBriscoe Association for Computational Linguistics
Abu Dhabi, UAE
January @@ -22,7 +22,7 @@ Shared Task <fixed-case>RIRAG</fixed-case>-2025: Regulatory Information Retrieval and Answer Generation TubaGokhan - KexinWang + KexinWang IrynaGurevych TedBriscoe 1–4 @@ -46,7 +46,7 @@ EhsanLotfi NikolayBanar NersesYuzbashyan - WalterDaelemans + WalterDaelemans 10–21 Statutory article retrieval plays a crucial role in making legal information more accessible to both laypeople and legal professionals. Multilingual countries like Belgium present unique challenges for retrieval models due to the need for handling legal issues in multiple languages. Building on the Belgian Statutory Article Retrieval Dataset (BSARD) in French, we introduce the bilingual version of this dataset, bBSARD. The dataset contains parallel Belgian statutory articles in both French and Dutch, along with legal questions from BSARD and their Dutch translation. Using bBSARD, we conduct extensive benchmarking of retrieval models available for Dutch and French. Our benchmarking setup includes lexical models, zero-shot dense models, and fine-tuned small foundation models. Our experiments show that BM25 remains a competitive baseline compared to many zero-shot dense models in both languages. We also observe that while proprietary models outperform open alternatives in the zero-shot setting, they can be matched or surpassed by fine-tuning small language-specific models. Our dataset and evaluation code are publicly available. 2025.regnlp-1.3 @@ -109,7 +109,7 @@ Structured Tender Entities Extraction from Complex Tables with Few-short Learning AsimAbbas - MarkLee + MarkLee NilooferShanavas VenelinKovatchev MubashirAli diff --git a/data/xml/2025.repl4nlp.xml b/data/xml/2025.repl4nlp.xml index 7629de0112..1983738e9c 100644 --- a/data/xml/2025.repl4nlp.xml +++ b/data/xml/2025.repl4nlp.xml @@ -7,7 +7,7 @@ AlexandraChronopoulou Xiang LorraineLi Bodhisattwa PrasadMajumder - FredaShi + FredaShi GiorgosVernikos Association for Computational Linguistics
Albuquerque, NM
@@ -170,7 +170,7 @@ Large Language Models Are Overparameterized Text Encoders Thennal DK TimFischerUniversity of Hamburg - ChrisBiemannU Hamburg + ChrisBiemannU Hamburg 170-184 Large language models (LLMs) demonstrate strong performance as text embedding models when finetuned with supervised contrastive training. However, their large size balloons inference time and memory requirements. In this paper, we show that by pruning the last % layers of an LLM before supervised training for only 1000 steps, we can achieve a proportional reduction in memory and inference time. We evaluate four different state-of-the-art LLMs on text embedding tasks and find that our method can prune up to 30% of layers with negligible impact on performance and up to 80% with only a modest drop. With only three lines of code, our method is easily implemented in any pipeline for transforming LLMs to text encoders. We also propose L3Prune, a novel layer-pruning strategy based on the model’s initial loss that provides two optimal pruning configurations: a large variant with negligible performance loss and a small variant for resource-constrained settings. On average, the large variant prunes 21% of the parameters with a performance drop, and the small variant only suffers from a decrease while pruning 74% of the model. We consider these results strong evidence that LLMs are overparameterized for text embedding tasks, and can be easily pruned. 2025.repl4nlp-1.13 diff --git a/data/xml/2025.resourceful.xml b/data/xml/2025.resourceful.xml index f4d88df932..0852128403 100644 --- a/data/xml/2025.resourceful.xml +++ b/data/xml/2025.resourceful.xml @@ -132,7 +132,7 @@
First Steps in Benchmarking <fixed-case>L</fixed-case>atvian in Large Language Models - IngunaSkadina + IngunaSkadina BrunoBakanovs RobertsDarģis 86–95 diff --git a/data/xml/2025.sdp.xml b/data/xml/2025.sdp.xml index 45881e459b..a8c638c3ed 100644 --- a/data/xml/2025.sdp.xml +++ b/data/xml/2025.sdp.xml @@ -8,7 +8,7 @@ AmanpreetSingh AakankshaNaik GeorgRehm - DayneFreitag + DayneFreitag DanLi SonjaSchimmler AnitaDe Waard @@ -260,7 +260,7 @@ Visual Question Answering on Scientific Charts Using Fine-Tuned Vision-Language Models FlorianSchleid JanStrich - ChrisBiemannU Hamburg + ChrisBiemannU Hamburg 211-220 Scientific charts often encapsulate the core findings of research papers, making the ability to answer questions about these charts highly valuable. This paper explores recent advancements in scientific chart visual question answering (VQA) enabled by large Vision Language Models (VLMs) and newly curated datasets. As part of the SciVQA shared task from the 5th Workshop on Scholarly Document Processing, we develop and evaluate multimodal Systems capable of answering diverse question types - including multiple-choice, yes/no, unanswerable, and infinite answer set questions - based on chart images extracted from scientific literature. We investigate the effects of zero-shot and one-shot prompting, as well as supervised fine-tuning (SFT), on the performance of Qwen2.5-VL models (7B and 32B variants). We also tried to include more training data from domain-specific datasets (SpiQA and ArXivQA). Our fine-tuned Qwen2.5-VL 32B model achieves a substantial improvement over the GPT-4o-mini baseline and reaches the 4th place in the shared task, highlighting the effectiveness of domain-specific fine-tuning. We published the code for the experiments. 2025.sdp-1.19 @@ -353,7 +353,7 @@ <fixed-case>A</fixed-case>lex<fixed-case>UNLP</fixed-case>-<fixed-case>FMT</fixed-case> at <fixed-case>C</fixed-case>limate<fixed-case>C</fixed-case>heck Shared Task: Hybrid Retrieval with Adaptive Similarity Graph-based Reranking for Climate-related Social Media Claims Fact Checking MahmoudFathallah - NagwaEl-Makky + NagwaEl-Makky MarwanTorkiAlexandria University 288-292 In this paper, we describe our work done in the ClimateCheck shared task at the Scholarly document processing (SDP) workshop, ACL 2025. We focused on subtask 1: Abstracts Retrieval. The task involved retrieving relevant paper abstracts from a large corpus to verify claims made on social media about climate change. We explored various retrieval and ranking techniques, including fine-tuning transformer-based dense retrievers, sparse retrieval methods, and reranking using cross-encoder models. Our final and best-performing system utilizes a hybrid retrieval approach combining BM25 sparse retrieval and a fine-tuned Stella model for dense retrieval, followed by an MSMARCO trained minilm cross-encoder model for ranking. We adapt an iterative graph-based re-ranking approach leveraging a document similarity graph built for the document corpus to dynamically update candidate pool for reranking. This system achieved a score of 0.415 on the final test set for subtask 1, securing 3rd place in the final leader board. diff --git a/data/xml/2025.sealp.xml b/data/xml/2025.sealp.xml index d25baaf6e5..1a3354d0da 100644 --- a/data/xml/2025.sealp.xml +++ b/data/xml/2025.sealp.xml @@ -3,10 +3,10 @@ Proceedings of the Second Workshop in South East Asian Language Processing - DerryWijaya + DerryWijaya Alham FikriAji ClaraVania - Genta IndraWinata + Genta IndraWinata AyuPurwarianti Association for Computational Linguistics
Online
diff --git a/data/xml/2025.semeval.xml b/data/xml/2025.semeval.xml index 2721f208be..ba662fa209 100644 --- a/data/xml/2025.semeval.xml +++ b/data/xml/2025.semeval.xml @@ -233,8 +233,8 @@ <fixed-case>G</fixed-case>ate<fixed-case>NLP</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2025 Task 10: Hierarchical Three-Step Prompting for Multilingual Narrative Classification IknoorSinghUniversity of Sheffield, United Kingdom - CarolinaScartonUniversity of Sheffield - KalinaBontchevaUniversity of Sheffield + CarolinaScartonUniversity of Sheffield + KalinaBontchevaUniversity of Sheffield 148-154 The proliferation of online news and the increasing spread of misinformation necessitate robust methods for automated narrative classification. This paper presents our approach to SemEval 2025 Task 10 Subtask 2, which aims to classify news articles into a predefined two-level taxonomy of main narratives and sub-narratives across multiple languages. We propose Hierarchical Three-Step Prompting (H3Prompt) for multilingual narrative classification. Our methodology follows a three-step prompting strategy, where the model first categorises an article into one of two domains (Ukraine-Russia War or Climate Change), then identifies the most relevant main narratives, and finally assigns sub-narratives. Our approach secured the top position on the English test set among 28 competing teams worldwide. This result highlights the effectiveness of our method in improving narrative classification performance over the baselines. 2025.semeval-1.21 @@ -260,7 +260,7 @@ <fixed-case>UNEDT</fixed-case>eam at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2025 Task 10: Zero-Shot Narrative Classification Jesus M.Fraile - HernandezNLP & IR Group, UNED - AnselmoPeñasNLP & IR Group, UNED + AnselmoPeñasNLP & IR Group, UNED 165-173 In this paper we present our participation in Subtask 2 of SemEval-2025 Task 10, focusing on the identification and classification of narratives in news of multiple languages, on climate change and the Ukraine-Russia war. To address this task, we employed a Zero-Shot approach using a generative Large Language Model without prior training on the dataset. Our classification strategy is based on two steps: first, the system classifies the topic of each news item; subsequently, it identifies the sub-narratives directly at the finer granularity. We present a detailed analysis of the performance of our system compared to the best ranked systems on the leaderboard, highlighting the strengths and limitations of our approach. 2025.semeval-1.24 @@ -419,7 +419,7 @@ ThanetMarkchomDepartment of Computer Science, University of Reading JianfeiXuSchool of Computing, Newcastle University, Newcastle upon Tyne, UK TongWuFormerly at School of Computing, Newcastle University, Newcastle upon Tyne, UK - HuizhiLiangSchool of Computing, Newcastle University, Newcastle upon Tyne, UK + HuizhiLiangSchool of Computing, Newcastle University, Newcastle upon Tyne, UK 271-279 SemEval-2025 Task 3 (Mu-SHROOM) focuses on detecting hallucinations in content generated by various large language models (LLMs) across multiple languages. This task involves not only identifying the presence of hallucinations but also pinpointing their specific occurrences. To tackle this challenge, this study introduces two methods: modified RefChecker and modified SelfCheckGPT. The modified RefChecker integrates prompt-based factual verification into References, structuring them as claim-based tests rather than single external knowledge sources. The modified SelfCheckGPT ~incorporates external knowledge to overcome its reliance on internal knowledge. In addition, both methods’ original prompt designs are enhanced to identify hallucinated words within LLM-generated texts. Experimental results demonstrate the effectiveness of the approach, achieving a high ranking on the test dataset in detecting hallucinations across various languages, with an average IoU of 0.5310 and an average COR of 0.5669. 2025.semeval-1.39 @@ -438,7 +438,7 @@ ThanetMarkchomUniversity of Reading TongWuPreviously at School of Computing, Newcastle University, Newcastle upon Tyne, UK LitingHuangSchool of Computing, Newcastle University, Newcastle upon Tyne, UK - HuizhiLiangSchool of Computing, Newcastle University, Newcastle upon Tyne, UK + HuizhiLiangSchool of Computing, Newcastle University, Newcastle upon Tyne, UK 288-295 SemEval-2025 Task 1 focuses on ranking images based on their alignment with a given nominal compound that may carry idiomatic meaning in both English and Brazilian Portuguese. To address this challenge, this work uses generative large language models (LLMs) and multilingual CLIP models to enhance idiomatic compound representations. LLMs generate idiomatic meanings for potentially idiomatic compounds, enriching their semantic interpretation. These meanings are then encoded using multilingual CLIP models, serving as representations for image ranking. Contrastive learning and data augmentation techniques are applied to fine-tune these embeddings for improved performance.Experimental results show that multimodal representations extracted through this method outperformed those based solely on the original nominal compounds. The fine-tuning approach shows promising outcomes but is less effective than using embeddings without fine-tuning. 2025.semeval-1.41 @@ -447,7 +447,7 @@ <fixed-case>N</fixed-case>lp<fixed-case>U</fixed-case>ned at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2025 Task 10: Beyond Training: A Taxonomy-Guided Approach to Role Classification Using <fixed-case>LLM</fixed-case>s AlbertoCaballeroNLP and IR group at UNED - AlvaroRodrigoNLP and IR group at UNED + AlvaroRodrigoNLP and IR group at UNED RobertoCentenoUNED 296-301 The paper presents a taxonomy-guided approach to role classification in news articles using Large Language Models (LLMs). Instead of traditional model training, the system employs zero-shot and few-shot prompting strategies, leveraging structured taxonomies and contextual cues for classification. The study evaluates hierarchical and single-step classification approaches, finding that a unified, single-step model with contextual preprocessing achieves the best performance. The research underscores the importance of input structuring and classification strategy in optimizing LLM performance for real-world applications. @@ -506,8 +506,8 @@ JiyuChenCSIRO NecvaBölücüCsiro SarvnazKarimiCSIRO - DiegoMollaMacquarie University - CecileParisCSIRO + DiegoMollaMacquarie University + CecileParisCSIRO 336-342 Detecting emotions across different languages is challenging due to the varied and culturally nuanced ways of emotional expressions. The Semeval 2025 Task 11: Bridging the Gap in Text-Based emotion shared task was organised to investigate emotion recognition across different languages. The goal of the task is to implement an emotion recogniser that can identify the basic emotional states that general third-party observers would attribute to an author based on their written text snippet, along with the intensity of those emotions. We report our investigation of various task-adaptation strategies for LLMs in emotion recognition. We show that the most effective method for this task is to fine-tune a pre-trained multilingual LLM for each language. 2025.semeval-1.48 @@ -562,7 +562,7 @@ XiangyuWangBeijing Institute of Technology GeShiBeijing University of Technology LinmeiHuSchool of Computer Science & Technology, Beijing Institute of Technology - HeyanHuangBeijing Institute of Technology + HeyanHuangBeijing Institute of Technology ChongFengBeijing Institute of Technology 373-380 This paper presents our system for Subtask 10 of Entity Framing, which focuses on assigning one or more hierarchical roles to named entities in news articles. Our approach iteratively refines prompts and utilizes the Entity-Centric Chain of Thought to complete the task. Specifically, to minimize ambiguity in label definitions, we use the model’s predictions as supervisory signals, iteratively refining the category definitions. Furthermore, to minimize the interference of irrelevant information during inference, we incorporate entity-related information into the CoT framework, allowing the model to focus more effectively on entity-centric reasoning. Our system achieved the highest ranking on the leaderboard in the Russian main role classification and the second in English, with an accuracy of 0.8645 and 0.9362, respectively. We discuss the impact of several components of our multilingual classification approach, highlighting their effectiveness. @@ -581,7 +581,7 @@ <fixed-case>BERT</fixed-case>astic at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2025 Task 10: State-of-the-Art Accuracy in Coarse-Grained Entity Framing for <fixed-case>H</fixed-case>indi News TarekMahmoudMohamed Bin Zayed University of Artificial Intelligence (MBZUAI) ZhuohanXieMBZUAI - PreslavNakovMohamed bin Zayed University of Artificial Intelligence + PreslavNakovMohamed bin Zayed University of Artificial Intelligence 386-396 We describe our system for SemEval-2025 Task 10 Subtask 1 on coarse-grained entity framing in Hindi news, exploring two complementary strategies. First, we experiment with LLM prompting using GPT-4o, comparing hierarchical multi-step prompting with native single-step prompting for both main and fine-grained role prediction. Second, we conduct an extensive study on fine-tuning XLM-R, analyzing different context granularities (full article, paragraph, or sentence-level entity mentions), monolingual vs. multilingual settings, and main vs. fine-grained role labels. Our best system, trained on fine-grained role annotations across languages using sentence-level context, achieved 43.99% exact match, 56.56 % precision, 47.38% recall, and 51.57% F1-score. Notably, our system set a new state-of-the-art for main role prediction on Hindi news, achieving 78.48 % accuracy - outperforming the next best model at 76.90%, as per the official leaderboard. Our findings highlight effective strategies for entity framing in multilingual and low-resource settings. 2025.semeval-1.55 @@ -610,7 +610,7 @@ <fixed-case>NCLT</fixed-case>eam at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2025 Task 10: Enhancing Multilingual, multi-class, and Multi-Label Document Classification via Contrastive Learning Augmented Cascaded <fixed-case>UN</fixed-case>et and Embedding based Approaches ShuLiNewcastle University GeorgeWilliamsonNewcastle University - HuizhiLiangNewcastle University + HuizhiLiangNewcastle University 418-423 The SemEval 2025 Task 10 Subtask2 presents a multi-task multi-label text classification challenge. The task requires systems to classify documents simultaneously across three distinct topics, the Climate Change(CC), the Ukraine Russia War(URW), and others. Several challenge were identified, including the instinct distinct of topics, the imbalance of categories, the insufficient samples, and the different distribution of develop set and test set. To address these challenges, two deep learning model have been implemented. One of the approach is the Contrastive learning augmented Cascaded UNet model(CCU), which employs a cascaded architecture to jointly process all subtasks. This model incorporates an UNet-style architecture to classify embeddings extracted by the base text encoder. A domain adaption method was implemented to facilitate joint learning across different document topics. We address the data insufficiency through contrastive learning and mitigate data imbalance using asymmetric loss function. We also implemented a shallow machine learning model. In this approach, transformer encoder models were applied to extract text embedding from various aspect, then deploy machine learning method to do the classification and compared with the base line. The UNet-style model achieves the highest f1 sample at 0.365 on the test set of 5th place compared with all approaches on leader board. Our source code developed for this paper are available at 2025.semeval-1.58 @@ -677,7 +677,7 @@ Team <fixed-case>U</fixed-case>nibuc - <fixed-case>NLP</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2025 Task 11: Few-shot text-based emotion detection ClaudiuCreangaUniversity of Bucharest Teodor - GeorgeMarchitanUniversity of Bucharest - LiviuDinuUniversity of Bucharest + LiviuDinuUniversity of Bucharest 468-475 This paper describes the approach of the Unibuc - NLP team in tackling the SemEval 2025 Workshop, Task 11: Bridging the Gap in Text-Based Emotion Detection. We mainly focused on experiments using large language models (Gemini, Qwen, DeepSeek) with either few-shot prompting or fine-tuning. Withour final system, for the multi-label emotion detection track (track A), we got an F1-macro of 0.7546 (26/96 teams) for the English subset, 0.1727 (35/36 teams) for the Portuguese (Mozambican) subset and 0.325 (1/31 teams) for the Emakhuwa subset. 2025.semeval-1.65 @@ -802,7 +802,7 @@ <fixed-case>RACAI</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2025 Task 7: Efficient adaptation of Large Language Models for Multilingual and Crosslingual Fact-Checked Claim Retrieval Radu - GabrielChivereanuPhd Candidate RACAI - DanTufisRACAI + DanTufisRACAI 551-557 The paper details our approach to SemEval 2025 Shared Task 7: Multilingual and Crosslingual Fact-Checked Claim Retrieval.We investigate how large language models (LLMs) designed for general-purpose retrieval via text-embeddings can be adapted for fact-checked claim retrieval across multiple languages, including scenarios where the query and fact-check are in different languages. The experiments involve fine-tuning with a contrastive objective, resulting in notable gains in both accuracy and efficiency over the baseline retrieval model. We evaluate cost-effective techniques such as LoRA and QLoRA and Prompt Tuning.Additionally, we demonstrate the benefits of Matryoshka embeddings in minimizing the memory footprint of stored embeddings, reducing the system requirements for a fact-checking system. 2025.semeval-1.77 @@ -813,8 +813,8 @@ Delia - IustinaGrigoritaAlexandru Ioan Cuza University, Faculty of Computer Science Iasi Tudor - ConstantinPricopAlexandru Ioan Cuza University Sergio - AlessandroSuteu“Alexandru Ioan Cuza” University of Iasi - DanielaGifu“Alexandru Ioan Cuza” University of Iasi, Faculty of Computer Science & Romanian Academy - Iasi branch, Institute of Computer Science - DianaTrandabatUniversity Alexandru Ioan Cuza of Iasi, Romania + DanielaGifu“Alexandru Ioan Cuza” University of Iasi, Faculty of Computer Science & Romanian Academy - Iasi branch, Institute of Computer Science + DianaTrandabatUniversity Alexandru Ioan Cuza of Iasi, Romania 558-565 Entity-Aware Machine Translation (EAMT) aims to enhance the accuracy of machine translation (MT) systems in handling named entities, including proper names, domain-specific terms, and structured references. Conventional MT models often struggle to accurately translate these entities, leading to errors that affect comprehension and reliability. In this paper, we present a promising approach for SemEval 2025 Task 2, focusing on improving EAMT in ten target languages. The methodology is based on two complementary strategies: (1) multilingual Named Entity Recognition (NER) and structured knowledge bases for preprocessing and integrating entity translations, and (2) large language models (LLMs) enhanced with optimized prompts and validation mechanisms to improve entity preservation. By combining structured knowledge with neural approaches, this system aims to mitigate entity-related translation errors and enhance the overall performance of MT models. Among the systems that do not use gold information, retrieval-augmented generation (RAG), or fine-tuning, our approach ranked 1st with the second strategy and 3rd with the first strategy. 2025.semeval-1.78 @@ -843,7 +843,7 @@ AitanaMartínezUniversity of Alicante SnorreRalundUniversity of Copenhagen ElenaLloretUniversity of Alicante - PalomaMoreda PozoUniversity of Alicante + PalomaMoreda PozoUniversity of Alicante ArmandoSuárez CuetoUniversity of Alicante 575-583 This paper describes our approach to address the SemEval-2025 Task 10 subtask 3, which is focused on narrative extraction given news articles with a dominant narrative. We design an external knowledge injection approach to fine-tune a Flan-T5 model so the generated narrative explanations are in line with the dominant narrative determined in each text. We also incorporate pragmatic information in the form of communicative intentions, using them as external knowledge to assist the model. This ensures that the generated texts align more closely with the intended explanations and effectively convey the expected meaning. The results show that our approach ranks 3rd in the task leaderboard (0.7428 in Macro-F1) with concise and effective news explanations. The analyses highlight the importance of adding pragmatic information when training systems to generate adequate narrative extractions. @@ -855,7 +855,7 @@ MiladAfshariMichigan State University RichardFrostMichigan State University SamanthaKisselMichigan State University - KristenJohnsonMichigan State University + KristenJohnsonMichigan State University 584-589 We tackle the challenge of multi-label emotion detection in short texts, focusing on SemEval-2025 Task 11 Track A. Our approach, RoEmo, combines generative and discriminative models in an ensemble strategy to classify texts into five emotions: anger, fear, joy, sadness, and surprise.The generative model, instruction-finetuned on emotion detection datasets, undergoes additional fine-tuning on the SemEval-2025 Task 11 Track A dataset to enhance its performance for this specific task. Meanwhile, the discriminative model, based on binary classification, offers a straightforward yet effective approach to classification.We review recent advancements in multi-label emotion detection and analyze the task dataset. Our results show that RoEmo ranks among the top-performing systems, demonstrating high accuracy and reliability. 2025.semeval-1.81 @@ -967,7 +967,7 @@ FredinVázquezInstituto de Investigaciones en Matemáticas Aplicadas y Sistemas ChristianLunaFacultad de Contaduría y Administración AldairCondeFacultad de Ciencias - GerardoSierraInstituto de Ingeniería + GerardoSierraInstituto de Ingeniería HelenaGómez - AdornoInstituto de Investigaciones en Matemáticas Aplicadas y en Sistemas GemmaBel - EnguixInstituto de Ingeniería 657-665 @@ -979,7 +979,7 @@ <fixed-case>LATE</fixed-case>-<fixed-case>GIL</fixed-case>-<fixed-case>NLP</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2025 Task 11: Multi-Language Emotion Detection and Intensity Classification Using Transformer Models with Optimized Loss Functions for Imbalanced Data JesúsV á z q u e z - O s o r i oPosgrado en Ciencia e Ingeniería de la Computación - UNAM HelenaGómez - AdornoInstituto de Investigaciones en Matemáticas Aplicadas y en Sistemas - UNAM - GerardoSierraInstituto de Ingeniería - UNAM + GerardoSierraInstituto de Ingeniería - UNAM VladimirSierra - CasianoFacultad de Ciencias - UNAM DianaCanchola - HernándezFacultad de Ciencias - UNAM JoséTovar - CortésFacultad de Ciencias - UNAM @@ -1208,7 +1208,7 @@ XiluCai15839566797 YajuanTongCCNU ChengzhaoWuCCNU - XinXuccnu + XinXuccnu GuanyiChenCentral China Normal University TingtingHeCentral China Normal University 841-845 @@ -1314,7 +1314,7 @@ NerisÖzenWageningen Food Safety Research BasVan Der VeldenWageningen Food Safety Research IrisHendrickxCentre for Language Studies, Radboud University Nijmegen - AliHurriyetogluWUR + AliHurriyetogluWUR 914-930 This paper presents our system developed for the SemEval-2025 Task 9: The Food Hazard Detection Challenge. The shared task’s objective is to evaluate explainable classification systems for classifying hazards and products in two levels of granularity from web-collected food recall incident reports. In this work, we propose text augmentation techniques as a way to improve poor performance in minority classes and compare their effect for each category on various transformer and machine learning models. We apply three word-level data augmentation techniques, namely synonym replacement, random word swapping, and contextual word insertion utilizing BERT. The resultsshow that transformer models tend to have a better overall performance. Meanwhile, a statistically significant improvement (P 0.05) was observed in the fine-grained categories when using BERT to compare the baseline model with the three augmented models, which achieved a 6% increase in correct predictions for minority hazard classes. This suggests that targeted augmentation of minority classes can improve the performance of transformer models. 2025.semeval-1.124 @@ -2001,7 +2001,7 @@ Atyaephyra at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2025 Task 4: Low-Rank Negative Preference Optimization JanBronecFaculty of Mathematics and Physics, Charles University - JindřichHelclCharles University in Prague + JindřichHelclCharles University in Prague 1415-1422 We present a submission to the SemEval 2025 shared task on unlearning sensitive content from LLMs. Our approach employs negative preference optimization using low-rank adaptation. We show that we can utilize this combination to cheaply compute additional regularization terms, which help with unlearning stabilization. The results of our approach significantly exceed the shared task baselines. 2025.semeval-1.187 @@ -2031,7 +2031,7 @@ <fixed-case>COGNAC</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2025 Task 10: Multi-level Narrative Classification with Summarization and Hierarchical Prompting Azwad AnjumIslamFlorida International University - MarkFinlaysonFIU + MarkFinlaysonFIU 1442-1449 We present our approach to solving the Narrative Classification portion of the Multilingual Characterization and Extraction of Narratives SemEval-2025 challenge (Task 10, Subtask 2). This task is a multi-label, multi-class document classification task, where the classes were defined via natural language titles, descriptions, short examples, and annotator instructions, with only a few (and sometime no) labeled examples for training. Our approach leverages a text-summarization, binary relevance with zero-shot prompts, and hierarchical prompting using Large Language Models (LLM) to identify the narratives and subnarratives in the provided news articles. Notably, we did not use the labeled examples to train the system. Our approach well outperforms the official baseline and achieves an F1 score of 0.55 (narratives) and 0.43 (subnarratives), and placed 2nd in the test-set leaderboard at the system submission deadline. We provide an in-depth analysis of the construction and effectiveness of our approach using both open-source (LLaMA 3.1-8B-Instruct) and proprietary (GPT 4o-mini) Large Language Models under different prompting setups. 2025.semeval-1.190 @@ -2095,7 +2095,7 @@ FatimaUroosaIPN-CIC Tewodros AchamalehBizunehIPN-CIC GrigoriSidorovCIC-IPN - AlexanderGelbukhInstituto Politécnico Nacional + AlexanderGelbukhInstituto Politécnico Nacional 1485-1494 Emotions play a fundamental role in the decision-making process, shaping human actions across diverse disciplines. The extensive usage of emotion intensity detection approaches has generated substantial research interest during the last few years. Efficient multi-label emotion intensity detection remains unsatisfactory even for high-resource languages, with a substantial performance gap among well-resourced and under-resourced languages. Team {textbf{Tewodros}} participated in SemEval-2025 Task 11, Track B, focusing on detecting text-based emotion intensity. Our work involved multi-label emotion intensity detection across three languages: Amharic, English, and Spanish, using the (afro-xlmr-large-76L), (DeBERTa-v3-base), and (BERT-base-Spanish-wwm-uncased) models. The models achieved an average F1 score of 0.6503 for Amharic, 0.5943 for English, and an accuracy score of 0.6228 for Spanish. These results demonstrate the effectiveness of our models in capturing emotion intensity across multiple languages. 2025.semeval-1.196 @@ -2104,7 +2104,7 @@ <fixed-case>S</fixed-case>heffield<fixed-case>GATE</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2025 Task 2: Multi-Stage Reasoning with Knowledge Fusion for Entity Translation XinyeYangThe University of Sheffield - KalinaBontchevaThe University of Sheffield + KalinaBontchevaThe University of Sheffield XingyiSongThe University of Sheffield 1495-1503 This paper describes the machine translation system submitted to the SemEval-2025 Entity-Aware Machine Translation Task by the SheffieldGATE Team. We proposed a multi-agent entity-aware machine translation system that operates through three distinct reasoning stages: entity recognition, knowledge enhancement, and translation decision-making. The innovation in our approach lies in leveraging large language models to generate contextually relevant queries during the knowledge enhancement stage, extracting candidate entities and their translations from external knowledge bases. In the final translation decision-making stage, we employ fine-tuned large language models to denoise the retrieved knowledge, selecting the most relevant entity information to ensure accurate translation of the original text. Experimental results demonstrate our system’s effectiveness. In emEval-2025 Task 2, our system ranks first among all systems in Spanish entity translation metrics and third in Italian. For systems that do not use gold standard entity IDs during test set inference, ours achieves the highest overall scores across four language pairs: German, French, Italian, and Spanish. @@ -2115,7 +2115,7 @@ <fixed-case>ITUNLP</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2025 Task 8: Question-Answering over Tabular Data: A Zero-Shot Approach using <fixed-case>LLM</fixed-case>-Driven Code Generation AtakanSiteIstanbul Techical University EmreErdemirIstanbul Technical University - GülşenEryiğitIstanbul Technical University + GülşenEryiğitIstanbul Technical University 1504-1514 This paper presents our system for SemEval-2025 Task 8: DataBench, Question-Answeringover Tabular Data. The primary objective ofthis task is to perform question answering ongiven tabular datasets from diverse domains;under two subtasks: DataBench QA (SubtaskI) and DataBench Lite QA (Subtask II). Totackle both subtasks, we developed a zero-shotsolution with a particular emphasis on lever-aging Large Language Model (LLM)-basedcode generation. Specifically, we proposeda Python code generation framework, utiliz-ing state-of-the-art open-source LLMs to gen-erate executable Pandas code via optimizedprompting strategies. Our experiments revealthat different LLMs exhibit varying levels ofeffectiveness in Python code generation. Addi-tionaly, results show that Python code genera-tion achieves superior performance in tabularquestion answering compared to alternative ap-proaches. Although our ranking among zero-shot systems is unknown at the time of this pa-per’s submission, our system achieved eighthplace in Subtask I and sixth place in Subtask IIamong the 30 systems that outperformed thebaseline in the open-source models category. 2025.semeval-1.198 @@ -2134,7 +2134,7 @@ Ustnlp16 at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2025 Task 9: Improving Model Performance through Imbalance Handling and Focal Loss ZhuoangCaiHKUST ZhenghaoLiHKUST - YangLiuHKUST + YangLiuHKUST LiyuanGuoHKUST YangqiuSongHKUST 1522-1527 @@ -2209,7 +2209,7 @@ Frances AdrianaLaureano De LeonUniversity of Birmingham YixiaoWangBirmingham University YueFengUniversity of Birmingham - MarkLeeUniversity of Birmingham + MarkLeeUniversity of Birmingham 1570-1576 Emotion detection in natural language processing is a challenging task due to the complexity of human emotions and linguistic diversity. While significant progress has been made in high-resource languages, emotion detection in low-resource languages remains underexplored. In this work, we address multilingual and cross-lingual emotion detection by leveraging adapter-based fine-tuning with multilingual pre-trained language models. Adapters introduce a small number of trainable parameters while keeping the pre-trained model weights fixed, offering a parameter-efficient approach to adaptation. We experiment with different adapter tuning strategies, including task-only adapters, target-language-ready task adapters, and language-family-based adapters. Our results show that target-language-ready task adapters achieve the best overall performance, particularly for low-resource African languages with our team ranking 7th for Tigrinya, and 8th for Kinyarwanda. In Track C, our system ranked 5th for Oromo, Tigrinya, Kinyarwanda, Amharic, and Igbo. Our approach outperforms large language models in 11 languages and matches their performance in four others, despite using significantly fewer parameters. Furthermore, we find that adapter-based models retain cross-linguistic transfer capabilities while requiring fewer computational resources compared to full fine-tuning for each language. 2025.semeval-1.207 @@ -2221,7 +2221,7 @@ KarlaSalas-Jimenez AdriánJuárez-Pérez DiegoHernández-Bustamante - GemmaBel-Enguix + GemmaBel-Enguix HelenaGómez-Adorno 1577-1584 We present MeSSI, a multi-module system applied to SemEval 2025’s task 3: Mu-SHROOM. Our system tags questions in order to obtain semantic relevant terms that are used as information retrieval characteristics. Said characteristics serve as extraction terms for Wikipedia pages that are in turn processed to generate gold standard texts used in a hallucination evaluation system. A PoST-based entity comparison was implemented to contrast the test dataset sentences with the corresponding generated gold standards, wich in turn was the main criteria to tag hallucinations, partitioned in soft labels and hard labels. This method was tested in Spanish and English, finishing 18th and 19th respectively on the IoU based ranking. @@ -2515,7 +2515,7 @@ <fixed-case>M</fixed-case>c<fixed-case>G</fixed-case>ill-<fixed-case>NLP</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2025 Task 11: Bridging the Gap in Text-Based Emotion Detection VivekVermaUdeM - David IfeoluwaAdelaniMcGill University / MILA + David IfeoluwaAdelaniMcGill University / MILA 1783-1789 In this paper, we present the results of our SemEval-2025 Emotion Detection Shared Task Track A which focuses on multi-label emotion detection. Our team’s approach leverages prompting GPT-4o, fine-tuning NLLB- LLM2Vec encoder, and an ensemble of these two approaches to solve Track A. Our ensemble method beats the baseline method that fine-tuned RemBERT encoder in 24 of the 28 languages. Furthermore, our results shows that the average performance is much worse for under-resourced languages in the Afro- Asiatic, Niger-Congo and Austronesia with per- formance scores at 50 F1 points and below. 2025.semeval-1.235 @@ -2543,7 +2543,7 @@ <fixed-case>W</fixed-case>ikidata-Driven Entity-Aware Translation: Boosting <fixed-case>LLM</fixed-case>s with External Knowledge - LuXuSapienza NLP Group, Sapienza University of Rome + LuXuSapienza NLP Group, Sapienza University of Rome 1802-1809 This paper presents an entity-aware machine translation system that significantly improves named entity translation by integrating external knowledge from Wikidata with Large Language Models (LLMs). While LLMs demonstrate strong general translation capabilities, they struggle with named entities that require specific cultural or domain knowledge. We address this challenge through two approaches: retrieving multilingual entity representations using gold Wikidata IDs, and employing Relik, an information extraction tool, to automatically detect and link entities without gold annotations. Experiments across multiple language pairs show our system outperforms baselines by up to 63 percentage points in entity translation accuracy (m-ETA) while maintaining high overall translation quality. Our approach ranked 3rd overall and 1st among non-finetuned systems on the SemEval-2025 Task 2 leaderboard. Additionally, we introduced language-specific post-processing further enhances performance, particularly for Traditional Chinese translations. 2025.semeval-1.238 @@ -2858,7 +2858,7 @@ <fixed-case>JU</fixed-case>-<fixed-case>CSE</fixed-case>-<fixed-case>NLP</fixed-case>’25 at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2025 Task 4: Learning to Unlearn <fixed-case>LLM</fixed-case>s ArkajyotiNaskarJadavpur University DipankarDasJadavpur University - SivajiBandyopadhyayJADAVPUR UNIVERSITY + SivajiBandyopadhyayJADAVPUR UNIVERSITY 2059-2064 Large Language Models (LLMs) have achieved enormous success recently due to their ability to understand and solve various non-trivial tasks in natural language. However, they have been shown to memorize their training data which, among other concerns, increases the risk of the model regurgitating creative or private content, potentially leading to legal issues for the model developer and/or vendors. Such issues are often discovered post-model training during testing or red teaming. While unlearning has been studied for some time in classification problems, it is still a relatively underdeveloped area of study in LLM research since the latter operates in a potentially unbounded output label space. Specifically, robust evaluation frameworks are lacking to assess the accuracy of these unlearning strategies. In this challenge, we aim to bridge this gap by developing a comprehensive evaluation challenge for unlearning sensitive datasets in LLMs. 2025.semeval-1.267 @@ -3193,7 +3193,7 @@ PascalGaertnerTechnical University of Munich QuimRibasTUM DarynaDementievaTechnical University of Munich - AlexanderFraserLudwig-Maximilians-Universität München + AlexanderFraserLudwig-Maximilians-Universität München 2283-2296 As human-machine interactions become increasingly natural through text, accurate emotion recognition is essential. Detecting emotions provides valuable insights across various applications. In this paper, we present our approach for SemEval-2025 Task 11, Track A, which focuses on multi-label text-based detection of perceived emotions. Our system was designed for and tested on English language text. To classify emotions present in text snippets, we initially experimented with traditional techniques such as Logistic Regression, Gradient Boosting, and SVM. We then explored state-of-the-art LLMs (OpenAI o1 and DeepSeek V3) before developing our final system, a fine-tuned Transformer-based model. Our best-performing approach employs an ensemble of fine-tuned DeBERTa-large instances with multiple seeds, optimized using Optuna and StratifiedKFold cross-validation. This approach achieves an F1-score of 0.75, demonstrating promising results with room for further improvement. 2025.semeval-1.298 @@ -3218,7 +3218,7 @@ Julia S.Dollis Daniel M.Pedrozo Artur M. A.Novais - Diogo F. C.Silva + Diogo F. C.Silva Arlindo R.Galvão Filho 2305-2310 This paper investigates the impact of data quality and processing strategies on emotion recognition in Brazilian Portuguese (PTBR) texts. We focus on data distribution, linguistic context, and augmentation techniques such as translation and synthetic data generation. To evaluate these aspects, we conduct experiments on the PTBR portion of the BRIGHTER dataset, a manually curated multilingual dataset containing nearly 100,000 samples, of which 4,552 are in PTBR. Our study encompasses both multi-label emotion detection (presence/absence classification) and emotion intensity prediction (0 to 3 scale), following the SemEval 2025 Track 11 setup. Results demonstrate that emotion intensity labels enhance model performance after discretization, and that smaller multilingual models can outperform larger ones in low-resource settings. Our official submission ranked 6th, but further refinements improved our ranking to 3rd, trailing the top submission by only 0.047, reinforcing the significance of a data-centric approach in emotion recognition. @@ -3317,10 +3317,10 @@ Team <fixed-case>ACK</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2025 Task 2: Beyond Word-for-Word Machine Translation for <fixed-case>E</fixed-case>nglish-<fixed-case>K</fixed-case>orean Pairs DanielLeeAdobe Inc. - HarshSharmaCU Boulder + HarshSharmaCU Boulder JieunHanKAIST SunnyJeongNew York University - AliceOhKAIST + AliceOhKAIST VeredShwartzUBC 2376-2388 Translating knowledge-intensive and entity-rich text between English and Korean requires transcreation to preserve language-specific and cultural nuances beyond literal, phonetic or word-for-word conversion. We evaluate 13 models (LLMs and MT systems) using automatic metrics and human assessment by bilingual annotators. Our findings show LLMs outperform traditional MT systems but struggle with entity translation requiring cultural adaptation. By constructing an error taxonomy, we identify incorrect responses and entity name errors as key issues, with performance varying by entity type and popularity level. This work exposes gaps in automatic evaluation metrics and hope to enable future work in completing culturally-nuanced machine translation. @@ -3465,7 +3465,7 @@ TimotheeMickusUniversity of Helsinki ElaineZosaSiloGen TeemuVahtolaUniversity of Helsinki - JörgTiedemannUniversity of Helsinki + JörgTiedemannUniversity of Helsinki AmanSinhaUniversity of Lorraine VincentSegonneIRISA - Université Bretagne Sud FernandoSanchez - VegaCenter for Mathematical Research (CIMAT) @@ -3473,7 +3473,7 @@ JindřichLibovickýCharles Univeristy JussiKarlgrenSilo AI ShaoxiongJiUniversity of Helsinki - JindřichHelclCharles University in Prague + JindřichHelclCharles University in Prague LianeGuillouRISE Research Institutes of Sweden OnaDe GibertUniversity of Helsinki JaioneBengoetxeaHiTZ Center - Ixa, University of the Basque Country UPV/EHU @@ -3495,7 +3495,7 @@ JurajPodrouzekKempelen Institute of Intelligent Technologies MatúšMesarčíkKempelen Institute of Intelligent Technologies JaroslavKopčanKempelen Institute of Intelligent Technologies - AndersSøgaardUniversity of Copenhagen + AndersSøgaardUniversity of Copenhagen 2498-2511 The rapid spread of online disinformation presents a global challenge, and machine learning has been widely explored as a potential solution. However, multilingual settings and low-resource languages are often neglected in this field. To address this gap, we conducted a shared task on multilingual claim retrieval at SemEval 2025, aimed at identifying fact-checked claims that match newly encountered claims expressed in social media posts across different languages. The task includes two subtracks: 1) a monolingual track, where social posts and claims are in the same language 2) a crosslingual track, where social posts and claims might be in different languages. A total of 179 participants registered for the task contributing to 52 test submissions. 23 out of 31 teams have submitted their system papers. In this paper, we report the best-performing systems as well as the most common and the most effective approaches across both subtracks. This shared task, along with its dataset and participating systems, provides valuable insights into multilingual claim retrieval and automated fact-checking, supporting future research in this field. 2025.semeval-1.323 @@ -3549,7 +3549,7 @@ Ibrahim SaidAhmadNortheastern University NirmalSurangeInternational Institute of Information Technology Hyderabad DanielaTeodorescuUniversity of Alberta, LMU Munich - David IfeoluwaAdelaniMcGill University / MILA + David IfeoluwaAdelaniMcGill University / MILA Alham FikriAjiMBZUAI Felermino Dario MarioAliLurio University VladimirAraujoKU Leuven @@ -3557,7 +3557,7 @@ OanaIgnatUniversity of Michigan AlexanderPanchenkoSkolkovo Institue of Science and Technology YiZhouCardiff University - SaifMohammadNational Research Council Canada + SaifMohammadNational Research Council Canada 2558-2569 We present our shared task on text-based emotion detection, covering more than 30 languages from seven distinct language families. These languages are predominantly low-resource and spoken across various continents. The data instances are multi-labeled into six emotional classes, with additional datasets in 11 languages annotated for emotion intensity. Participants were asked to predict labels in three tracks: (a) emotion labels in monolingual settings, (b) emotion intensity scores, and (c) emotion labels in cross-lingual settings. 2025.semeval-1.327 @@ -3620,7 +3620,7 @@ ElisaSartoriUniversity of Padova NicolasStefanovitchJoint Research Centre ZhuohanXieMBZUAI - PreslavNakovMohamed bin Zayed University of Artificial Intelligence + PreslavNakovMohamed bin Zayed University of Artificial Intelligence GiovanniDa San MartinoUniversity of Padova 2610-2643 We introduce SemEval-2025 Task 10 on Multilingual Characterization and Extraction of Narratives from Online News, which focuses on the identification and analysis of narratives in online news media. The task is structured into three subtasks: (1) Entity Framing, to identify the roles that relevant entities play within narratives, (2) Narrative Classification, to assign documents fine-grained narratives according to a given, topic-specific taxonomy of narrative labels, and (3) Narrative Extraction, to provide a justification for the dominant narrative of the document. To this end, we analyze news articles across two critical domains, Ukraine-Russia War and Climate Change, in five languages: Bulgarian, English, Hindi, Portuguese, and Russian. This task introduces a novel multilingual and multifaceted framework for studying how online news media construct and disseminate manipulative narratives. By addressing these challenges, our work contributes to the broader effort of detecting, understanding, and mitigating the spread of propaganda and disinformation. The task attracted a lot of interest: 310 teams registered, with 66 submitting official results on the test set. diff --git a/data/xml/2025.sicon.xml b/data/xml/2025.sicon.xml index 611368b24d..8a907b00f8 100644 --- a/data/xml/2025.sicon.xml +++ b/data/xml/2025.sicon.xml @@ -74,7 +74,7 @@ <fixed-case>PROTECT</fixed-case>: Policy-Related Organizational Value Taxonomy for Ethical Compliance and Trust AvniMittalMicrosoft SreeHari NagaraluMicrosoft - SandipanDandapatMicrosoft + SandipanDandapatMicrosoft 73-75 This paper presents PROTECT, a novel policy-driven organizational value taxonomy designed to enhance ethical compliance and trust within organizations. Drawing on established human value systems and leveraging large language models, PROTECT generates values tailored to organizational contexts and clusters them into a refined taxonomy. This taxonomy serves as the basis for creating a comprehensive dataset of compliance scenarios, each linked to specific values and paired with both compliant and non-compliant responses. By systematically varying value emphasis, we illustrate how different LLM personas emerge, reflecting diverse compliance behaviors. The dataset, directly grounded in the taxonomy, enables consistent evaluation and training of LLMs on value-sensitive tasks. While PROTECT offers a robust foundation for aligning AI systems with organizational standards, our experiments also reveal current limitations in model accuracy, highlighting the need for further improvements. Together, the taxonomy and dataset represent complementary, foundational contributions toward value-aligned AI in organizational settings. 2025.sicon-1.5 @@ -119,7 +119,7 @@ Steering Conversational Large Language Models for Long Emotional Support Conversations NavidMadaniState University of New York at Buffalo - RohiniSrihariState University of New York at Buffalo + RohiniSrihariState University of New York at Buffalo 109-123 In this study, we address the challenge of consistently following emotional support strategies in long conversations by large language models (LLMs). We introduce the Strategy-Relevant Attention (SRA) metric, a model-agnostic measure designed to evaluate the effectiveness of LLMs in adhering to strategic prompts in emotional support contexts. By analyzing conversations within the Emotional Support Conversations dataset (ESConv) using LLaMA models, we demonstrate that SRA is significantly correlated with a model’s ability to sustain the outlined strategy throughout the interactions. Our findings reveal that the application of SRA-informed prompts leads to enhanced strategic adherence, resulting in conversations that more reliably exhibit the desired emotional support strategies over longer conversations. Furthermore, we contribute a comprehensive, multi-branch synthetic conversation dataset for ESConv, featuring a variety of strategy continuations informed by our optimized prompting method. The code and data are publicly available on our Github. 2025.sicon-1.9 diff --git a/data/xml/2025.sigmorphon.xml b/data/xml/2025.sigmorphon.xml index 8539cb5bf0..9bc83f53ed 100644 --- a/data/xml/2025.sigmorphon.xml +++ b/data/xml/2025.sigmorphon.xml @@ -5,7 +5,7 @@ Proceedings of the The 22nd SIGMORPHON workshop on Computational Morphology, Phonology, and Phonetics GarrettNicolai EleanorChodroff - FredericMailhot + FredericMailhot ÇağrıÇöltekin Association for Computational Linguistics
Albuquerque, New Mexico, USA
diff --git a/data/xml/2025.sigtyp.xml b/data/xml/2025.sigtyp.xml index cc2da8a508..dc88c71abb 100644 --- a/data/xml/2025.sigtyp.xml +++ b/data/xml/2025.sigtyp.xml @@ -130,7 +130,7 @@ HaotianYeCenter for Information and Language Processing JonathanBrennanUniversity of Michigan - Ann Arbor HelmutSchmidCenter for Information and Language Processing - HinrichSchuetze + HinrichSchuetze NimaMesgaraniColumbia University 75-81 In this work, we introduce XCOMPS, a multilingual conceptual minimal pair dataset that covers 17 languages.Using this dataset, we evaluate LLMs’ multilingual conceptual understanding through metalinguistic prompting, direct probability measurement, and neurolinguistic probing. We find that: 1) LLMs exhibit weaker conceptual understanding for low-resource languages, and accuracy varies across languages despite being tested on the same concept sets. 2) LLMs excel at distinguishing concept-property pairs that are visibly different but exhibit a marked performance drop when negative pairs share subtle semantic similarities. 3) More morphologically complex languages yield lower concept understanding scores and require deeper layers for conceptual reasoning. @@ -141,7 +141,7 @@ Tone in Perspective: A Computational Typological Analysis of Tone Function in <fixed-case>ASR</fixed-case> SiyuLiang - Gina-AnneLevowUniversity of Washington + Gina-AnneLevowUniversity of Washington 82-92 This study investigates the impact of pitch flattening on automatic speech recognition (ASR) performance across tonal and non-tonal languages. Using vocoder-based signal processing techniques, we created pitch-flattened versions of speech recordings and compared ASR performance against original recordings. Results reveal that tonal languages experience substantially larger performance degradation than non-tonal languages. Analysis of tone confusion matrices shows systematic patterns of misidentification where contour tones collapse toward level tones when pitch information is removed. Calculation of tone’s functional load at syllable and word levels demonstrates that syllable-level functional load strongly predicts ASR vulnerability to pitch flattening, while word-level patterns reflect each language’s morphological structure. These findings illuminate the differential importance of pitch information across languages and suggest that ASR systems for languages with high syllable-level functional load require more robust pitch modeling. 2025.sigtyp-1.11 @@ -165,8 +165,8 @@ DanielaGoschala Amir HosseinKargaran YihongLiu - André F. T.Martins - HinrichSchütze + André F. T.Martins + HinrichSchütze 114-121 When translating into a low-resource language, a language model can have a tendency to produce translations that are close to the source (e.g., word-by-word translations) due to a lack of rich low-resource training data in pretraining. Thus, the output often is translationese that differs considerably from what native speakers would produce naturally. To remedy this, we synthetically create a training set in which the frequency of a construction unique to the low-resource language is artificially inflated. For the case of Bavarian, we show that, after training, the language model has learned the unique construction and that native speakers judge its output as more natural. Our pilot study suggests that construction-based mitigation of translationese is a promising approach. Code and artifacts are available at https://github.com/cisnlp/BayernGPT. 2025.sigtyp-1.13 diff --git a/data/xml/2025.sumeval.xml b/data/xml/2025.sumeval.xml index 22327e4cfa..4c83c3066e 100644 --- a/data/xml/2025.sumeval.xml +++ b/data/xml/2025.sumeval.xml @@ -30,7 +30,7 @@ GeyuLin BinWang ZhengyuanLiu - Nancy F.Chen + Nancy F.Chen 12–23 Multilingual proficiency presents a significant challenge for large language models (LLMs). English-centric models are usually suboptimal in other languages, particularly those that are linguistically distant from English. This performance discrepancy mainly stems from the imbalanced distribution of training data across languages during pre-training and instruction tuning stages. To address this problem, we propose a novel approach called CrossIn, which utilizes a mixed composition of cross-lingual instruction tuning data. Our method leverages the compressed representation shared by various languages to efficiently enhance the model’s task-solving capabilities and multilingual proficiency within a single process. In addition, we introduce a multi-task and multi-faceted benchmark to evaluate the effectiveness of CrossIn. Experimental results demonstrate that our method substantially improves performance across tasks and languages, and we provide extensive insights into the impact of cross-lingual data volume and the integration of translation data on enhancing multilingual consistency and accuracy. 2025.sumeval-2.2 diff --git a/data/xml/2025.tacl.xml b/data/xml/2025.tacl.xml index 9933a72cfd..318a3bb5e5 100644 --- a/data/xml/2025.tacl.xml +++ b/data/xml/2025.tacl.xml @@ -43,7 +43,7 @@ MaryWilliamson GabrielSynnaeve JuanPino - BenoîtSagot + BenoîtSagot EmmanuelDupoux 10.1162/tacl_a_00728 We introduce SpiRit-LM, a foundation multimodal language model that freely mixes text and speech. Our model is based on a 7B pretrained text language model that we extend to the speech modality by continuously training it on text and speech units. Speech and text sequences are concatenated as a single stream of tokens, and trained with a word-level interleaving method using a small automatically curated speech-text parallel corpus. SpiRit-LM comes in two versions: a Base version that uses speech phonetic units (HuBERT) and an Expressive version that models expressivity using pitch and style units in addition to the phonetic units. For both versions, the text is encoded with subword BPE tokens. The resulting model displays both the semantic abilities of text models and the expressive abilities of speech models. Additionally, we demonstrate that SpiRit-LM can learn new tasks in a few-shot fashion across modalities (i.e., ASR, TTS, Speech Classification). We make available model weights and inference code.1,2 @@ -54,9 +54,9 @@ <fixed-case>CLAP</fixed-case>nq: Cohesive Long-form Answers from Passages in Natural Questions for <fixed-case>RAG</fixed-case> systems SaraRosenthal - AvirupSil - RaduFlorian - SalimRoukos + AvirupSil + RaduFlorian + SalimRoukos 10.1162/tacl_a_00729 Retrieval Augmented Generation (RAG) has become a popular application for large language models. It is preferable that successful RAG systems provide accurate answers that are supported by being grounded in a passage without any hallucinations. While considerable work is required for building a full RAG pipeline, being able to benchmark performance is also necessary. We present CLAPnq, a benchmark Long-form Question Answering dataset for the full RAG pipeline. CLAPnq includes long answers with grounded gold passages from Natural Questions (NQ) and a corpus to perform either retrieval, generation, or the full RAG pipeline. The CLAPnq answers are concise, 3x smaller than the full passage, and cohesive, meaning that the answer is composed fluently, often by integrating multiple pieces of the passage that are not contiguous. RAG models must adapt to these properties to be successful at CLAPnq. We present baseline experiments and analysis for CLAPnq that highlight areas where there is still significant room for improvement in grounded RAG. CLAPnq is publicly available at https://github.com/primeqa/clapnq. 53–72 @@ -96,7 +96,7 @@ VidhishaBalachandran MadhurPanwar TianxingHe - Noah A.Smith + Noah A.Smith NavinGoyal YuliaTsvetkov 10.1162/tacl_a_00733 @@ -125,9 +125,9 @@ ShutongFeng Hsien-chinLin NurulLubis - BenjaminRuppik + BenjaminRuppik RenatoVukovic - MilicaGašić + MilicaGašić 10.1162/tacl_a_00734 Supervised neural approaches are hindered by their dependence on large, meticulously annotated datasets, a requirement that is particularly cumbersome for sequential tasks. The quality of annotations tends to deteriorate with the transition from expert-based to crowd-sourced labeling. To address these challenges, we present CAMEL (Confidence-based Acquisition Model for Efficient self-supervised active Learning), a pool-based active learning framework tailored to sequential multi-output problems. CAMEL possesses two core features: (1) it requires expert annotators to label only a fraction of a chosen sequence, and (2) it facilitates self-supervision for the remainder of the sequence. By deploying a label correction mechanism, CAMEL can also be utilized for data cleaning. We evaluate CAMEL on two sequential tasks, with a special emphasis on dialogue belief tracking, a task plagued by the constraints of limited and noisy datasets. Our experiments demonstrate that CAMEL significantly outperforms the baselines in terms of efficiency. Furthermore, the data corrections suggested by our method contribute to an overall improvement in the quality of the resulting datasets.1 167–187 @@ -176,8 +176,8 @@ AbdelrahmanSadallah KirillGrishchenkov AlexanderPanchenko - TimothyBaldwin - PreslavNakov + TimothyBaldwin + PreslavNakov MaximPanov ArtemShelmanov 10.1162/tacl_a_00737 @@ -214,7 +214,7 @@ SaraPapi PeterPolák DominikMacháček - OndřejBojar + OndřejBojar 10.1162/tacl_a_00740 Simultaneous speech-to-text translation (SimulST) translates source-language speech into target-language text concurrently with the speaker’s speech, ensuring low latency for better user comprehension. Despite its intended application to unbounded speech, most research has focused on human pre-segmented speech, simplifying the task and overlooking significant challenges. This narrow focus, coupled with widespread terminological inconsistencies, is limiting the applicability of research outcomes to real-world applications, ultimately hindering progress in the field. Our extensive literature review of 110 papers not only reveals these critical issues in current research but also serves as the foundation for our key contributions. We: 1) define the steps and core components of a SimulST system, proposing a standardized terminology and taxonomy; 2) conduct a thorough analysis of community trends; and 3) offer concrete recommendations and future directions to bridge the gaps in existing literature, from evaluation frameworks to system architectures, for advancing the field towards more realistic and effective SimulST solutions. 281–313 @@ -301,7 +301,7 @@ Phonetic Reconstruction of the Consonant System of Middle <fixed-case>C</fixed-case>hinese via Mixed Integer Optimization XiaoxiLuo - WeiweiSun + WeiweiSun 10.1162/tacl_a_00742 This paper is concerned with phonetic reconstruction of the consonant system of Middle Chinese. We propose to cast the problem as a Mixed Integer Programming problem, which is able to automatically explore homophonic information from ancient rhyme dictionaries and phonetic information from modern Chinese dialects, the descendants of Middle Chinese. Numerical evaluation on a wide range of synthetic and real data demonstrates the effectiveness and robustness of the new method. We apply the method to information from Guǎngyùn and 20 modern Chinese dialects to obtain a new phonetic reconstruction result. A linguistically motivated discussion of this result is also provided.1 424–441 @@ -341,7 +341,7 @@ Few-Shot Multilingual Open-Domain <fixed-case>QA</fixed-case> from Five Examples FanJiang TomDrummond - TrevorCohn + TrevorCohn 10.1162/tacl_a_00750 Recent approaches to multilingual open- domain question answering (MLODQA) have achieved promising results given abundant language-specific training data. However, the considerable annotation cost limits the application of these methods for underrepresented languages. We introduce a few-shot learning approach to synthesize large-scale multilingual data from large language models (LLMs). Our method begins with large-scale self-supervised pre-training using WikiData, followed by training on high-quality synthetic multilingual data generated by prompting LLMs with few-shot supervision. The final model, FsModQA, significantly outperforms existing few-shot and supervised baselines in MLODQA and cross-lingual and monolingual retrieval. We further show our method can be extended for effective zero-shot adaptation to new languages through a cross-lingual prompting strategy with only English-supervised data, making it a general and applicable solution for MLODQA tasks without costly large-scale annotation. 481–504 @@ -368,7 +368,7 @@ ChenjunXu YuliaTsvetkov BillHowe - Lucy LuWang + Lucy LuWang 10.1162/tacl_a_00754 Abstention, the refusal of large language models (LLMs) to provide an answer, is increasingly recognized for its potential to mitigate hallucinations and enhance safety in LLM systems. In this survey, we introduce a framework to examine abstention from three perspectives: the query, the model, and human values. We organize the literature on abstention methods, benchmarks, and evaluation metrics using this framework, and discuss merits and limitations of prior work. We further identify and motivate areas for future research, such as whether abstention can be achieved as a meta-capability that transcends specific tasks or domains, and opportunities to optimize abstention abilities in specific contexts. In doing so, we aim to broaden the scope and impact of abstention methodologies in AI systems.1 529–556 @@ -390,7 +390,7 @@ Exploring Practical Gaps in Using Cross Entropy to Implement Maximum Mutual Information Criterion for Rationalization - WeiLiu + WeiLiu ZhiyingDeng ZhongyuNiu JunWang @@ -456,7 +456,7 @@ ErickGalinkin YiyiChen Jens MyrupPedersen - LeonDerczynski + LeonDerczynski JohannesBjerva 10.1162/tacl_a_00762 As NLP models are used by a growing number of end-users, an area of increasing importance is NLP Security (NLPSec): assessing the vulnerability of models to malicious attacks and developing comprehensive countermeasures against them. While work at the intersection of NLP and cybersecurity has the potential to create safer NLP for all, accidental oversights can result in tangible harm (e.g., breaches of privacy or proliferation of malicious models). In this emerging field, however, the research ethics of NLP have not yet faced many of the long-standing conundrums pertinent to cybersecurity, until now. We thus examine contemporary works across NLPSec, and explore their engagement with cybersecurity’s ethical norms. We identify trends across the literature, ultimately finding alarming gaps on topics like harm minimization and responsible disclosure. To alleviate these concerns, we provide concrete recommendations to help NLP researchers navigate this space more ethically, bridging the gap between traditional cybersecurity and NLP ethics, which we frame as “white hat NLP”. The goal of this work is to help cultivate an intentional culture of ethical research for those working in NLP Security. diff --git a/data/xml/2025.tlt.xml b/data/xml/2025.tlt.xml index 2f83d7fd39..29d2b0aacf 100644 --- a/data/xml/2025.tlt.xml +++ b/data/xml/2025.tlt.xml @@ -4,7 +4,7 @@ Proceedings of the 23rd International Workshop on Treebanks and Linguistic Theories (TLT, SyntaxFest 2025) SarahJablotschkin - SandraKübler + SandraKübler HeikeZinsmeister Association for Computational Linguistics
Ljubljana, Slovenia
@@ -43,7 +43,7 @@
Expanding the <fixed-case>U</fixed-case>niversal <fixed-case>D</fixed-case>ependencies <fixed-case>A</fixed-case>ncient <fixed-case>H</fixed-case>ebrew Treebank with Constituency Data - Daniel G.SwansonIndiana University + Daniel G.SwansonIndiana University 23-31 This paper presents an effort to expand the annotation pipeline for the Ancient Hebrew Universal Dependencies treebank to make use of additional data, resulting in the addition of over 4000 sentences and roughly 100K words to the released treebank. The resulting treebank contains 5500 sentences and 145K words and the incorporation of converted constituency data has resulted in an annotation process which requires manual intervention in only around 15-20% of sentences, even in previously unseen genres. 2025.tlt-1.3 @@ -52,7 +52,7 @@ Graph Databases for Fast Queries in <fixed-case>UD</fixed-case> Treebanks NiklasDeworetzkiChalmers University of Technology and University of Gothenburg - PeterLjunglöfChalmers University of Technology + PeterLjunglöfChalmers University of Technology 32-43 We investigate if labeled property graphs, and graph databases, can be an useful and efficient way of encoding UD treebanks, to facilitate searching for complex syntactic phenomena. We give two alternative encodings of UD treebanks into the off-the-shelf graph database Neo4j, and show how to translate syntactic queries into the graph query language Cypher. Our evaluation shows that graph databases can improve query times by several orders of magnitude, compared to existing approaches. 2025.tlt-1.4 @@ -134,7 +134,7 @@ EmmettStricklandInstitut National des Langues et Civilisations Orientales BrunoGuillaumeINRIA SylvainKahaneUniversité Paris Nanterre - AnneLacheret-DujourUniversité Paris Nanterre + AnneLacheret-DujourUniversité Paris Nanterre 111-118 This paper presents a new format of the Rhapsodie Treebank, which contains both syntactic and prosodic annotations, offering a comprehensive dataset for the study of spoken French.This integrated format allow us for complex multilevel queries and open the way for the extraction of intonosyntactic studies. 2025.tlt-1.13 @@ -162,7 +162,7 @@ <fixed-case>U</fixed-case>niversal <fixed-case>D</fixed-case>ependency Treebank for a low-resource Dardic Language: <fixed-case>T</fixed-case>orwali NaeemUddinCharles University Prague - DanielZemanFaculty of Mathematics and Physics, Charles University Prague + DanielZemanFaculty of Mathematics and Physics, Charles University Prague 140-147 This paper presents and discusses the linguistic phenomena encountered in the development of the ongoing first ever universal dependency treebank for Torwali the Language. Torwali belongs to the Kohistani sub-group of Dardic Indo-Aryan languages, and is considered an endangered (Moseley, 2010) and indigenous language, which makes it extremely low-resourced in terms of linguistic and computational resources. With the aim of including Torwali in Universal Dependencies (UD) (de Marneffe et al. 2021), we are annotating a diverse set of example sentences for POS tags, features and dependency relations. 2025.tlt-1.16 diff --git a/data/xml/2025.trl.xml b/data/xml/2025.trl.xml index 4aca79ca31..824c3b8e92 100644 --- a/data/xml/2025.trl.xml +++ b/data/xml/2025.trl.xml @@ -67,7 +67,7 @@ NaihaoDeng CunxiangWang GuojiangZhao - RadaMihalceaUniversity of Michigan + RadaMihalceaUniversity of Michigan YueZhangWestlake University 34-46 Large Language Models (LLMs) have demon- strated exceptional performance across diverse tasks. To harness their capabilities for Text- to-SQL, we introduce R3 (Review-Rebuttal- Revision), a consensus-based multi-agent sys- tem for Text-to-SQL tasks. R3 achieves the new state-of-the-art performance of 89.9 on the Spider test set. In the meantime, R3 achieves 61.80 on the Bird development set. R3 out- performs existing single-LLM and multi-agent Text-to-SQL systems by 1.3% to 8.1% on Spi- der and Bird, respectively. Surprisingly, we find that for Llama-3-8B, R3 outperforms chain-of- thought prompting by over 20%, even outper- forming GPT-3.5 on the Spider development set. We open-source our codebase at https: //github.com/1ring2rta/R3. @@ -84,7 +84,7 @@ ThanhVuOracle DonDharmasiriOracle Yuan-FangLiMonash University and Oracle - LongDuongOracle + LongDuongOracle 47-55 Open-weight large language models (LLMs) have significantly advanced performance in the Natural Language to SQL (NL2SQL) task. However, their effectiveness diminishes when dealing with large database schemas, as the context length increases. To address this limitation, we present SQLong, a novel and efficient data augmentation framework designed to enhance LLM performance in long-context scenarios for the NL2SQL task. SQLong generates augmented datasets by extending existing database schemas with additional synthetic CREATE TABLE commands and corresponding data rows, sampled from diverse schemas in the training data. This approach effectively simulates long-context scenarios during finetuning and evaluation. Through experiments on the Spider and BIRD datasets, we demonstrate that LLMs finetuned with SQLong-augmented data significantly outperform those trained on standard datasets. These imply SQLong’s practical implementation and its impact on improving NL2SQL capabilities in real-world settings with complex database schemas. 2025.trl-1.5 @@ -93,7 +93,7 @@ i<fixed-case>TBLS</fixed-case>: A Dataset of Interactive Conversations Over Tabular Information - AnirudhSundar + AnirudhSundar Christopher GordonRichardsonGeorgia Institute of Technology LarryHeckGeorgia Institute of Technology AdarAvsian @@ -130,7 +130,7 @@ Ask Me Like <fixed-case>I</fixed-case>’m Human: <fixed-case>LLM</fixed-case>-based Evaluation with For-Human Instructions Correlates Better with Human Evaluations than Human Judges RudaliHuidrom - AnyaBelzDublin City University + AnyaBelzDublin City University 98-108 Human evaluation in NLP has high cost and expertise requirements, and instruction-tuned LLMs are increasingly seen as a viable alternative. Reported correlations with human judgements vary across evaluation contexts and prompt types, and it is hard currently to predict if an LLM-as-judge metric will work equally well for new evaluation contexts and prompts, unless human evaluations are also carried out for comparison. Addressing two main factors contributing to this uncertainty, model suitability and prompt engineering, in the work reported in this focused contribution, we test four LLMs and different ways of combining them, in conjunction with a standard approach to prompt formulation, namely using written-for-human instructions verbatim. We meta-evaluate performance against human evaluations on two data-to-text tasks, and eight evaluation measures, also comparing against more conventional LLM prompt formulations. We find that the best LLM (combination)s are excellent predictors of mean human judgements, and are particularly good at content-related evaluation (in contrast to form-related criteria such as Fluency). Moreover, the best LLMs correlate far more strongly with human evaluations than individual human judges across all scenarios. 2025.trl-1.9 @@ -144,7 +144,7 @@ NilsFeldhus RaiaAbu AhmadGerman Research Center for AI MalteOstendorffDeutsche Telekom - PedroOrtiz SuarezCommon Crawl Foundation + PedroOrtiz SuarezCommon Crawl Foundation GeorgRehmHumboldt-Universität zu Berlin and Deutsches Forschungszentrum für Künstliche Intelligenz SebastianMöller 109-142 diff --git a/data/xml/2025.trustnlp.xml b/data/xml/2025.trustnlp.xml index 1407270952..b273f7e7d8 100644 --- a/data/xml/2025.trustnlp.xml +++ b/data/xml/2025.trustnlp.xml @@ -83,14 +83,14 @@ Ambiguity Detection and Uncertainty Calibration for Question Answering with Large Language Models - ZhengyanShi + ZhengyanShi GiuseppeCastellucciAmazon SimoneFiliceTechnology Innovation Institute SaarKuziAmazon EladKravi EugeneAgichteinEmory University OlegRokhlenko - ShervinMalmasiAmazon + ShervinMalmasiAmazon 41-55 Large Language Models (LLMs) have demonstrated excellent capabilities in Question Answering (QA) tasks, yet their ability to identify and address ambiguous questions remains underdeveloped. Ambiguities in user queries often lead to inaccurate or misleading answers, undermining user trust in these systems. Despite prior attempts using prompt-based methods, performance has largely been equivalent to random guessing, leaving a significant gap in effective ambiguity detection. To address this, we propose a novel framework for detecting ambiguous questions within LLM-based QA systems. We first prompt an LLM to generate multiple answers to a question, and then analyze them to infer the ambiguity. We propose to use a lightweight Random Forest model, trained on a bootstrapped and shuffled 6-shot examples dataset. Experimental results on ASQA, PACIFIC, and ABG-COQA datasets demonstrate the effectiveness of our approach, with accuracy up to 70.8%. Furthermore, our framework enhances the confidence calibration of LLM outputs, leading to more trustworthy QA systems able to handle complex questions. 2025.trustnlp-main.4 @@ -103,7 +103,7 @@ ZhiyuXueUniversity of California, Santa Barbara XitongZhangQualcomm Inc, QualComm RongrongWangMichigan State University - KristenJohnsonMichigan State University + KristenJohnsonMichigan State University 56-65 Self-correction is one of the most amazing emerging capabilities of Large Language Models (LLMs), enabling LLMs to self-modify an inappropriate output given a natural language feedback which describes the problems of that output. Moral self-correction is a post-hoc approach correcting unethical generations without requiring a gradient update, making it both computationally lightweight and capable of preserving the language modeling ability. Previous works have shown that LLMs can self-debias, and it has been reported that small models, i.e., those with less than 22B parameters, are not capable of moral self-correction.However, there is no direct proof as to why such smaller models fall short of moral self-correction, though previous research hypothesizes that larger models are skilled in following instructions and understanding abstract social norms.In this paper, we empirically validate this hypothesis in the context of social stereotyping, through meticulous prompting.Our experimental results indicate that (i) surprisingly, 3.8B LLMs with proper safety alignment fine-tuning can achieve very good moral self-correction performance, highlighting the significant effects of safety alignment; and (ii) small LLMs are indeed weaker than larger-scale models in terms of comprehending social norms and self-explanation through CoT, but all scales of LLMs show bad self-correction performance given unethical instructions. 2025.trustnlp-main.5 @@ -260,7 +260,7 @@ YiYangHong Kong University of Science and Technology HanyuDuan AhmedAbbasiUniversity of Notre Dame - John P.LalorUniversity of Notre Dame + John P.LalorUniversity of Notre Dame Kar YanTam 276-290 Transformer-based pretrained large language models (PLM) such as BERT and GPT have achieved remarkable success in NLP tasks. However, PLMs are prone to encoding stereotypical biases. Although a burgeoning literature has emerged on stereotypical bias mitigation in PLMs, such as work on debiasing gender and racial stereotyping, how such biases manifest and behave internally within PLMs remains largely unknown. Understanding the internal stereotyping mechanisms may allow better assessment of model fairness and guide the development of effective mitigation strategies. In this work, we focus on attention heads, a major component of the Transformer architecture, and propose a bias analysis framework to explore and identify a small set of biased heads that are found to contribute to a PLM’s stereotypical bias. We conduct extensive experiments to validate the existence of these biased heads and to better understand how they behave. We investigate gender and racial bias in the English language in two types of Transformer-based PLMs: the encoder-based BERT model and the decoder-based autoregressive GPT model, LLaMA-2 (7B), and LLaMA-2-Chat (7B). Overall, the results shed light on understanding the bias behavior in pretrained language models. diff --git a/data/xml/2025.udw.xml b/data/xml/2025.udw.xml index cbb44408c0..8328a0bf0b 100644 --- a/data/xml/2025.udw.xml +++ b/data/xml/2025.udw.xml @@ -31,7 +31,7 @@ Annotation of Relative Forms in the <fixed-case>E</fixed-case>gyptian-<fixed-case>UJ</fixed-case>aen Treebank Roberto AntonioDíaz Hernández - DanielZeman + DanielZeman 11-21 Relative forms are adjective finite verb forms that can be used in an attributive or a nominal function. They pose a challenge when annotating them according to the Universal Dependencies approach, for they have morphological features of both verbs and adjectives, yet they can also be used syntactically as nouns. The aim of this paper is to discuss the morphosyntactic methodology applied to their annotation in the Egyptian-UJaen treebank. 2025.udw-1.2 @@ -57,7 +57,7 @@ Crossing Dialectal Boundaries: Building a Treebank for the Dialect of Lesbos through Knowledge Transfer from Standard <fixed-case>M</fixed-case>odern <fixed-case>G</fixed-case>reek StavrosBompolasARCHIMEDES Unit | Athena Research Center - StellaMarkantonatouATHENA RIC + StellaMarkantonatouATHENA RIC AngelaRalliAthena Research Center AntoniosAnastasopoulosAthena Research Center 39-51 @@ -110,7 +110,7 @@ Building <fixed-case>UD</fixed-case> Cairo for <fixed-case>O</fixed-case>ld <fixed-case>E</fixed-case>nglish in the Classroom - LaurenLevineGeorgetown University + LaurenLevineGeorgetown University JunghyunMinGeorgetown University AmirZeldesGeorgetown University 97-104 @@ -127,7 +127,7 @@ ZubairSanjraniIsra University SarwatQureshiU. Manchester ShafiPirzadaMLtwist - Christopher D.ManningStanford University + Christopher D.ManningStanford University Mutee URahmanIsra University 105-118 Sindhi is an Indo-Aryan language spoken primarily in Pakistan and India by about 40 million people. Despite this extensive use, it is a low resource language for NLP tasks, with few datasets or pretrained embeddings available. In this work, we explore linguistic challenges for annotating Sindhi in the UD paradigm, such as language-specific analysis of adpositions and verb forms. We use this analysis to present a newly annotated dependency treebank for Universal Dependencies, along with pretrained embeddings and an annotation pipeline specifically for Sindhi annotation. @@ -167,9 +167,9 @@ Extending the Enhanced <fixed-case>U</fixed-case>niversal <fixed-case>D</fixed-case>ependencies – addressing subjects in pro-drop languages - MagaliSanches DuranUniversidade de São Paulo + MagaliSanches DuranUniversidade de São Paulo Elvis A.de SouzaUniversidade de São Paulo - Mariadas Graças Volpe NunesUniversidade de São Paulo + Mariadas Graças Volpe NunesUniversidade de São Paulo Adriana SilvinaPaganoUniversidade Federal de Minas Gerais, Universidade Federal de Minas Gerais Thiago A. S.PardoUniversidade de São Paulo 143-152 @@ -199,7 +199,7 @@ Quid verbumst? Applying a definition of word to <fixed-case>L</fixed-case>atin in <fixed-case>U</fixed-case>niversal <fixed-case>D</fixed-case>ependencies - Flavio MassimilianoCecchiniKatholieke Universiteit Leuven + Flavio MassimilianoCecchiniKatholieke Universiteit Leuven 174-185 Words, more specifically “syntactic words”, are at the centre of a dependency-based approach like Universal Dependencies. Nonetheless, its guidelines do not make explicit how such a word should be defined and identified, and so it happens that different treebanks use different standards to this end. To counter this vagueness, the community has been recently discussing a definition put forward in (Haspelmath, 2023) which is not fully uncontroversial. This contribution is a preliminary case study that tries its hand at concretely applying this definition (except for compounds) to Latin in order to gain more insights about its operability and groundedness. This is helped by the spread of Latin over many treebanks, the presence of good linguistic resources to analyse it, and a linguistic type which is probably not fully considered in (Haspelmath, 2023). On the side, this work shows once more the difficulties of turning theoretical definitions into working directives in the realm of linguistic annotation. 2025.udw-1.19 diff --git a/data/xml/2025.vardial.xml b/data/xml/2025.vardial.xml index e2f208fbc4..0bd9e41a5a 100644 --- a/data/xml/2025.vardial.xml +++ b/data/xml/2025.vardial.xml @@ -6,8 +6,8 @@ YvesScherrer TommiJauhiainen NikolaLjubešić - PreslavNakov - JorgTiedemann + PreslavNakov + JorgTiedemann MarcosZampieri Association for Computational Linguistics
Abu Dhabi, UAE
@@ -90,7 +90,7 @@ Retrieval of Parallelizable Texts Across <fixed-case>C</fixed-case>hurch <fixed-case>S</fixed-case>lavic Variants PiroskaLendvai - UweReichel + UweReichel AnnaJouravel AchimRabus ElenaRenje @@ -104,7 +104,7 @@ Anne-MarieLutgen AlistairPlum ChristophPurschke - BarbaraPlank + BarbaraPlank 115–127 Orthographic variation is very common in Luxembourgish texts due to the absence of a fully-fledged standard variety. Additionally, developing NLP tools for Luxembourgish is a difficult task given the lack of annotated and parallel data, which is exacerbated by ongoing standardization. In this paper, we propose the first sequence-to-sequence normalization models using the ByT5 and mT5 architectures with training data obtained from word-level real-life variation data. We perform a fine-grained, linguistically-motivated evaluation to test byte-based, word-based and pipeline-based models for their strengths and weaknesses in text normalization. We show that our sequence model using real-life variation data is an effective approach for tailor-made normalization in Luxembourgish. 2025.vardial-1.9 @@ -114,7 +114,7 @@ Improving Dialectal Slot and Intent Detection with Auxiliary Tasks: A Multi-Dialectal <fixed-case>B</fixed-case>avarian Case Study Xaver MariaKrückl VerenaBlaschke - BarbaraPlank + BarbaraPlank 128–146 Reliable slot and intent detection (SID) is crucial in natural language understanding for applications like digital assistants. Encoder-only transformer models fine-tuned on high-resource languages generally perform well on SID. However, they struggle with dialectal data, where no standardized form exists and training data is scarce and costly to produce. We explore zero-shot transfer learning for SID, focusing on multiple Bavarian dialects, for which we release a new dataset for the Munich dialect. We evaluate models trained on auxiliary tasks in Bavarian, and compare joint multi-task learning with intermediate-task training. We also compare three types of auxiliary tasks: token-level syntactic tasks, named entity recognition (NER), and language modelling. We find that the included auxiliary tasks have a more positive effect on slot filling than intent classification (with NER having the most positive effect), and that intermediate-task training yields more consistent performance gains. Our best-performing approach improves intent classification performance on Bavarian dialects by 5.1 and slot filling F1 by 8.4 percentage points. 2025.vardial-1.10 @@ -135,7 +135,7 @@ SalamKhalifa AbdelrahimQaddoumi JordanKodner - OwenRambow + OwenRambow 157–167 We investigate learning surface forms from underlying morphological forms for low-resource language varieties. We concentrate on learning explicit rules with the aid of learned syllable structure constraints, which outperforms neural methods on this small data task and provides interpretable output. Evaluating across one relatively high-resource and two related low-resource Arabic dialects, we find that a model trained only on the high-resource dialect achieves decent performance on the low-resource dialects, useful when no low-resource training data is available. The best results are obtained when our system is trained only on the low-resource dialect data without augmentation from the related higher-resource dialect. We discuss the impact of syllable structure constraints and the strengths and weaknesses of data augmentation and transfer learning from a related dialect. 2025.vardial-1.12 @@ -145,7 +145,7 @@ Common Ground, Diverse Roots: The Difficulty of Classifying Common Examples in <fixed-case>S</fixed-case>panish Varieties Javier A.Lopetegui ArijRiabi - DjaméSeddah + DjaméSeddah 168–181 Variations in languages across geographic regions or cultures are crucial to address to avoid biases in NLP systems designed for culturally sensitive tasks, such as hate speech detection or dialog with conversational agents. In languages such as Spanish, where varieties can significantly overlap, many examples can be valid across them, which we refer to as common examples. Ignoring these examples may cause misclassifications, reducing model accuracy and fairness. Therefore, accounting for these common examples is essential to improve the robustness and representativeness of NLP systems trained on such data. In this work, we address this problem in the context of Spanish varieties. We use training dynamics to automatically detect common examples or errors in existing Spanish datasets. We demonstrate the efficacy of using predicted label confidence for our Datamaps (CITATION) implementation for the identification of hard-to-classify examples, especially common examples, enhancing model performance in variety identification tasks. Additionally, we introduce a Cuban Spanish Variety Identification dataset with common examples annotations developed to facilitate more accurate detection of Cuban and Caribbean Spanish varieties. To our knowledge, this is the first dataset focused on identifying the Cuban, or any other Caribbean, Spanish variety. 2025.vardial-1.13 @@ -154,8 +154,8 @@ Add Noise, Tasks, or Layers? <fixed-case>M</fixed-case>ai<fixed-case>NLP</fixed-case> at the <fixed-case>V</fixed-case>ar<fixed-case>D</fixed-case>ial 2025 Shared Task on <fixed-case>N</fixed-case>orwegian Dialectal Slot and Intent Detection VerenaBlaschke - FeliciaKörner - BarbaraPlank + FeliciaKörner + BarbaraPlank 182–199 Slot and intent detection (SID) is a classic natural language understanding task. Despite this, research has only more recently begun focusing on SID for dialectal and colloquial varieties. Many approaches for low-resource scenarios have not yet been applied to dialectal SID data, or compared to each other on the same datasets. We participate in the VarDial 2025 shared task on slot and intent detection in Norwegian varieties, and compare multiple set-ups: varying the training data (English, Norwegian, or dialectal Norwegian), injecting character-level noise, training on auxiliary tasks, and applying Layer Swapping, a technique in which layers of models fine-tuned on different datasets are assembled into a model. We find noise injection to be beneficial while the effects of auxiliary tasks are mixed. Though some experimentation was required to successfully assemble a model from layers, it worked surprisingly well; a combination of models trained on English and small amounts of dialectal data produced the most robust slot predictions. Our best models achieve 97.6% intent accuracy and 85.6% slot F1 in the shared task. 2025.vardial-1.14 diff --git a/data/xml/2025.wacl.xml b/data/xml/2025.wacl.xml index c99a1bc8f6..cb230735c8 100644 --- a/data/xml/2025.wacl.xml +++ b/data/xml/2025.wacl.xml @@ -13,7 +13,7 @@ AmalHaddad Haddad MustafaJarrar MoEl-Haj - RuslanMitkov + RuslanMitkov PaulRayson Association for Computational Linguistics
Abu Dhabi, UAE
@@ -87,7 +87,7 @@ <fixed-case>D</fixed-case>ial2<fixed-case>MSA</fixed-case>-Verified: A Multi-Dialect <fixed-case>A</fixed-case>rabic Social Media Dataset for Neural Machine Translation to <fixed-case>M</fixed-case>odern <fixed-case>S</fixed-case>tandard <fixed-case>A</fixed-case>rabic AbdullahKhered YoucefBenkhedda - RizaBatista-Navarro + RizaBatista-Navarro 50–62 Social media has become an essential focus for Natural Language Processing (NLP) research due to its widespread use and unique linguistic characteristics. Normalising social media content, especially for morphologically rich languages like Arabic, remains a complex task due to limited parallel corpora. Arabic encompasses Modern Standard Arabic (MSA) and various regional dialects, collectively termed Dialectal Arabic (DA), which complicates NLP efforts due to their informal nature and variability. This paper presents Dial2MSA-Verified, an extension of the Dial2MSA dataset that includes verified translations for Gulf, Egyptian, Levantine, and Maghrebi dialects. We evaluate the performance of Seq2Seq models on this dataset, highlighting the effectiveness of state-of-the-art models in translating local Arabic dialects. We also provide insights through error analysis and outline future directions for enhancing Seq2Seq models and dataset development. The Dial2MSA-Verified dataset is publicly available to support further research. 2025.wacl-1.6 @@ -115,7 +115,7 @@ AzzedineAftiss SalimaLamsiyah ChristophSchommer - Said OuatikEl Alaoui + Said OuatikEl Alaoui 77–85 Moroccan Dialect (MD), or “Darija,” is a primary spoken variant of Arabic in Morocco, yet remains underrepresented in Natural Language Processing (NLP) research, particularly in tasks like summarization. Despite a growing volume of MD textual data online, there is a lack of robust resources and NLP models tailored to handle the unique linguistic challenges posed by MD. In response, we introduce .MA_v2, an expanded version of the GOUD.MA dataset, containing over 50k articles with their titles across 11 categories. This dataset provides a more comprehensive resource for developing summarization models. We evaluate the application of large language models (LLMs) for MD summarization, utilizing both fine-tuning and zero-shot prompting with encoder-decoder and causal LLMs, respectively. Our findings demonstrate that an expanded dataset improves summarization performance and highlights the capabilities of recent LLMs in handling MD text. We open-source our dataset, fine-tuned models, and all experimental code, establishing a foundation for future advancements in MD NLP. We release the code at https://github.com/AzzedineAftiss/Moroccan-Dialect-Summarization. 2025.wacl-1.9 diff --git a/data/xml/2025.wnu.xml b/data/xml/2025.wnu.xml index 8129a2601b..14f0393cb9 100644 --- a/data/xml/2025.wnu.xml +++ b/data/xml/2025.wnu.xml @@ -81,7 +81,7 @@ <fixed-case>CHATTER</fixed-case>: A character-attribution dataset for narrative understanding SabyasacheeBaruahUniversity of Southern California - ShrikanthNarayananUniversity of Southern California + ShrikanthNarayananUniversity of Southern California 52-63 Computational narrative understanding studies the identification, description, and interaction of the elements of a narrative: characters, attributes, events, and relations.Narrative research has given considerable attention to defining and classifying character types.However, these character-type taxonomies do not generalize well because they are small, too simple, or specific to a domain.We require robust and reliable benchmarks to test whether narrative models truly understand the nuances of the character’s development in the story.Our work addresses this by curating the CHATTER dataset that labels whether a character portrays some attribute for 88124 character-attribute pairs, encompassing 2998 characters, 12967 attributes and 660 movies.We validate a subset of CHATTER, called CHATTEREVAL, using human annotations to serve as an evaluation benchmark for the character attribution task in movie scripts.CHATTEREVAL also assesses narrative understanding and the long-context modeling capacity of language models. 2025.wnu-1.11 @@ -101,7 +101,7 @@ Narrative Studio: Visual narrative exploration using <fixed-case>LLM</fixed-case>s and <fixed-case>M</fixed-case>onte <fixed-case>C</fixed-case>arlo Tree Search ParsaGhaffariIndependent researcher - ChrisHokampAylien Ltd. + ChrisHokampAylien Ltd. 83-96 Interactive storytelling benefits from planning and exploring multiple “what if” scenarios. Modern LLMs are useful tools for ideation and exploration, but current chat-based user interfaces restrict users to a single linear flow. To address this limitation, we propose Narrative Studio – a novel in-browser narrative exploration environment featuring a tree-like interface that allows branching exploration from user-defined points in a story. Each branch is extended via iterative LLM inference guided by system and user-defined prompts. Additionally, we employ Monte Carlo Tree Search (MCTS) to automatically expand promising narrative paths based on user-specified criteria, enabling more diverse and robust story development. We also allow users to enhance narrative coherence by grounding the generated text in a graph that represents the actors and environment of the story. 2025.wnu-1.16 diff --git a/data/xml/2025.wnut.xml b/data/xml/2025.wnut.xml index 11127ae978..5752e215b5 100644 --- a/data/xml/2025.wnut.xml +++ b/data/xml/2025.wnut.xml @@ -30,7 +30,7 @@ MarkDrakesmith DimosthenisAntypas ClareBrownNA - JoseCamacho-ColladosCardiff University + JoseCamacho-ColladosCardiff University JiaoSong 1-9 Social media offers the potential to provide detection of outbreaks or public health incidents faster than traditional reporting mechanisms. In this paper, we developed and tested a pipeline to produce alerts of influenza-like illness (ILI) using Twitter data. Data was collected from the Twitter API, querying keywords referring to ILI symptoms and geolocated to Wales. Tweets that described first-hand descriptions of symptoms (as opposed to non-personal descriptions) were classified using transformer-based language models specialised on social media (BERTweet and TimeLMs), which were trained on a manually labelled dataset matching the above criteria. After gathering this data, weekly tweet counts were applied to the regression-based Noufaily algorithm to identify exceedances throughout 2022. The algorithm was also applied to counts of ILI-related GP consultations for comparison. Exceedance detection applied to the classified tweet counts produced alerts starting four weeks earlier than by using GP consultation data. These results demonstrate the potential to facilitate advanced preparedness for unexpected increases in healthcare burdens. @@ -43,7 +43,7 @@ QuanqiDu LoicDe Langhe ElsLefeverGhent University - VeroniqueHosteUniversiteit Gent + VeroniqueHosteUniversiteit Gent 10-15 This study explores the differences between textual and multimodal sentiment annotations on videos and their impact on transcript-based sentiment modelling. Using the UniC and CH-SIMS datasets which are annotated at both the unimodal and multimodal level, we conducted a statistical analysis and sentiment modelling experiments. Results reveal significant differences between the two annotation types, with textual annotations yielding better performance in sentiment modelling and demonstrating superior generalization ability. These findings highlight the challenges of cross-modality generalization and provide insights for advancing sentiment analysis. 2025.wnut-1.2 @@ -140,7 +140,7 @@ Automatically Generating <fixed-case>C</fixed-case>hinese Homophone Words to Probe Machine Translation Estimation Systems ShenbinQian - ConstantinOrasanUniversity of Surrey + ConstantinOrasanUniversity of Surrey DipteshKanojiaUniversity of Surrey FélixDo CarmoUniversity of Surrey 97-107 @@ -172,7 +172,7 @@ <fixed-case>W</fixed-case>ikipedia is Not a Dictionary, Delete! Text Classification as a Proxy for Analysing <fixed-case>W</fixed-case>iki Deletion Discussions HsuvasBorkakotyCardiff University - LuisEspinosa-AnkeCardiff University and AMPLYFI + LuisEspinosa-AnkeCardiff University and AMPLYFI 133-142 Automated content moderation for collaborative knowledge hubs like Wikipedia or Wikidata is an important yet challenging task due to multiple factors. In this paper, we construct a database of discussions happening around articles marked for deletion in several Wikis and in three languages, which we then use to evaluate a range of LMs on different tasks (from predicting the outcome of the discussion to identifying the implicit policy an individual comment might be pointing to). Our results reveal, among others, that discussions leading to deletion are easier to predict, and that, surprisingly, self-produced tags (keep, delete or redirect) don’t always help guiding the classifiers, presumably because of users’ hesitation or deliberation within comments 2025.wnut-1.14 @@ -181,8 +181,8 @@ From Conversational Speech to Readable Text: Post-Processing Noisy Transcripts in a Low-Resource Setting - ArtursZnotins - NormundsGruzitis + ArtursZnotins + NormundsGruzitis RobertsDargis 143-148 We present ongoing research on automatic post-processing approaches to enhance the readability of noisy speech transcripts in low-resource languages, with a focus on conversational speech in Latvian. We compare transformer-based sequence-labeling models and large language models (LLMs) for the standard punctuation and capitalization restoration task, while also considering automatic correction of mispronounced words and disfluency, and partial inverse text normalization. Our results show that very small LLMs (approx. 2B parameters), fine-tuned on a modest text corpus, can achieve near state-of-the-art performance, rivaling orders of magnitude larger LLMs. Additionally, we demonstrate that a fine-tuned Whisper model, leveraging acoustic cues, outperforms text-only systems on challenging conversational data, even for a low-resource language. Error analysis reveals recurring pitfalls in sentence boundary determination and disfluency handling, emphasizing the importance of consistent annotation and domain adaptation for robust post-processing. Our findings highlight the feasibility of developing efficient post-processing solutions that significantly refine ASR output in low-resource settings, while opening new possibilities for editing and formatting speech transcripts beyond mere restoration of punctuation and capitalization. diff --git a/data/xml/2025.woah.xml b/data/xml/2025.woah.xml index 07b1cd1f52..a64a5ba084 100644 --- a/data/xml/2025.woah.xml +++ b/data/xml/2025.woah.xml @@ -7,7 +7,7 @@ Christinede Kock DeboraNozza Flor MiriamPlaza-del-Arco - ZeerakTalat + ZeerakTalat FrancielleVargas Association for Computational Linguistics
Vienna, Austria
@@ -38,7 +38,7 @@ DimosthenisAntypasCardiff University IndiraSenUniversity of Mannheim CarlaPerez AlmendrosCardiff University - JoseCamacho-ColladosCardiff University + JoseCamacho-ColladosCardiff University FrancescoBarbieriMeta 17-31 The detection of sensitive content in large datasets is crucial for ensuring that shared and analysed data is free from harmful material. However, current moderation tools, such as external APIs, suffer from limitations in customisation, accuracy across diverse sensitive categories, and privacy concerns. Additionally, existing datasets and open-source models focus predominantly on toxic language, leaving gaps in detecting other sensitive categories such as substance abuse or self-harm. In this paper, we put forward a unified dataset tailored for social media content moderation across six sensitive categories: conflictual language, profanity,sexually explicit material, drug-related content, self-harm, and spam. By collecting and annotating data with consistent retrieval strategies and guidelines, we address the shortcomings of previous focalised research. Our analysis demonstrates that fine-tuning large language models (LLMs) on this novel dataset yields significant improvements in detection performance compared to open off-the-shelf models such as LLaMA, and even proprietary OpenAI models, which underperform by 10-15% overall. This limitation is even more pronounced on popular moderation APIs, which cannot be easily tailored to specific sensitive content categories, among others. @@ -82,7 +82,7 @@ LingSunIndiana University SoyoungKimIndiana University XiaoDongIndiana University - SandraKüblerIndiana University + SandraKüblerIndiana University 67-76 We examine how embedding bias affects hate speech detection by evaluating two debiasing methods—hard-debiasing and soft-debiasing. We analyze stereotype and sentiment associations within the embedding space and assess whether debiased models reduce censorship of marginalized authors while improving detection of hate speech targeting these groups. Our findings highlight how embedding bias propagates into downstream tasks and demonstrates how well different embedding bias metrics can predict bias in hate speech detection. 2025.woah-1.8 @@ -205,7 +205,7 @@ Implicit Hate Target Span Detection in Zero- and Few-Shot Settings with Selective Sub-Billion Parameter Models HossamBoudraaAix-Marseille University - BenoitFavreLIS, AMU Marseille, France + BenoitFavreLIS, AMU Marseille, France RaquelUrenaSESSTIM, AMU Marseille, France 228-240 This work investigates the effectiveness of masked language models (MLMs) and autoregressive language models (LLMs) with fewer than one billion parameters in the detection of implicit hate speech through fine-grained span identification. The evaluation spans zero-shot, few-shot, and full supervision settings across two core benchmarks—SBIC and IHC—and an auxiliary testbed, OffensiveLang.RoBERTa-Large-355M emerges as the strongest zero-shot model, achieving the highest F1 scores of 75.8 (SBIC) and 72.5 (IHC), outperforming larger models like LLaMA 3.2-1B. ModernBERT-125M closely matches this performance with scores of 75.1 and 72.2, demonstrating the advantage of architectural efficiency. Among instruction-tuned models, SmolLM2-135M Instruct and LLaMA 3.2 1B Instruct consistently outperform their non-instructed counterparts, with up to +2.3 F1 gain on SBIC and +1.7 on IHC. Interestingly, the larger SmolLM2-360M Instruct does not outperform the 135M variant, highlighting that model scale does not always correlate with performance in implicit hate detection tasks.Few-shot fine-tuning with SmolLM2-135M Instruct achieves F1 scores of 68.2 (SBIC) and 64.0 (IHC), trailing full-data fine-tuning by only 1.6 and 2.0 points, respectively, with accuracy drops under 0.5 points. This illustrates the promise of compact, instruction-aligned models in data-scarce settings, particularly when optimized with Low-Rank Adaptation (LoRA).Topic-guided error analysis using Latent Dirichlet Allocation (LDA) reveals recurring model failures in ideologically charged or euphemistic discourse. Misclassifications often involve neutral references to identity, politics, or advocacy language, underscoring current limitations in discourse-level inference and sociopragmatic understanding. @@ -227,7 +227,7 @@ Hostility Detection in <fixed-case>UK</fixed-case> Politics: A Dataset on Online Abuse Targeting <fixed-case>MP</fixed-case>s MugdhaPandyaUniversity of Sheffield MaliJinUniversity of Sheffield - KalinaBontchevaUniversity of Sheffield + KalinaBontchevaUniversity of Sheffield DianaMaynardUniversity of Sheffield 254-266 Social media platforms, particularly X, enable direct interaction between politicians and constituents but also expose politicians to hostile responses targetting both their governmental role and personal identity. This online hostility can undermine public trust and potentially incite offline violence. While general hostility detection models exist, they lack the specificity needed for political contexts and country-specific issues. We address this gap by creating a dataset of 3,320 English tweets directed at UK Members of Parliament (MPs) over two years, annotated for hostility and targeted identity characteristics (race, gender, religion). Through linguistic and topical analyses, we examine the unique features of UK political discourse and evaluate pre-trained language models and large language models on binary hostility detection and multi-class targeted identity type classification tasks. Our work provides essential data and insights for studying politics-related hostility in the UK. @@ -250,8 +250,8 @@ Pathways to Radicalisation: On Research for Online Radicalisation in Natural Language Processing and Machine Learning ZeerakTalatUniversity of Edinburgh - Michael SejrSchlichtkrullUniversity of Cambridge - PranavaMadhyasthaCity, University of London + Michael SejrSchlichtkrullUniversity of Cambridge + PranavaMadhyasthaCity, University of London ChristineDe KockUniversity of Melbourne 276-283 Online communities play an integral part in communication for communication across the globe. Online communities that are known for extremist content. As a field of surveillance technologies, NLP and other ML fields hold particular promise for monitoring extremist communities that may turn violent.Such communities make use of a wide variety of modalities of communication, including textual posts on specialised fora, memes, videos, and podcasts. Furthermore, such communities undergo rapid linguistic evolution, thus presenting a challenge to machine learning technologies that quickly diverge from the data that are used. In this position, we argue that radicalisation is a nascent area for which machine learning is particularly apt. However, in addressing radicalisation research it is important that avoids falling into the temptation of focusing on prediction. We argue that such communities present a particular avenue for addressing key concerns with machine learning technologies: (1) temporal misalignment of models and (2) aligning and linking content across modalities. @@ -309,7 +309,7 @@ BerkAtilPennsylvania State University VipulGuptaPennsylvania State University Sarkar Snigdha SarathiDasPennsylvania State University - RebeccaPassonneauThe Pennsylvania State University + RebeccaPassonneauThe Pennsylvania State University 342-354 Large language models (LLMs) have become ubiquitous, thus it is important to understand their risks and limitations, such as their propensity to generate harmful output. This includes smaller LLMs, which are important for settings with constrained compute resources, such as edge devices. Detection of LLM harm typically requires human annotation, which is expensive to collect. This work studies two questions: How do smaller LLMs rank regarding generation of harmful content? How well can larger LLMs annotate harmfulness? We prompt three small LLMs to elicit harmful content of various types, such as discriminatory language, offensive content, privacy invasion, or negative influence, and collect human rankings of their outputs. Then, we compare harm annotation from three state-of-the-art large LLMs with each other and with humans. We find that the smaller models differ with respect to harmfulness. We also find that large LLMs show low to moderate agreement with humans. 2025.woah-1.30 @@ -347,7 +347,7 @@ RobinCooperUniversity of Gothenburg ElinaLindgrenKarlstad University BjörnRönnerstrandUniversity of Gothenburg - AsadSayeedUniversity of Gothenburg + AsadSayeedUniversity of Gothenburg 383-395 A dogwhistle is a communicative act intended to broadcast a message only understood by a select in-group while going unnoticed by others (out-group). We illustrate that political dogwhistle behavior in a more radical community precedes the occurrence of the dogwhistles in a less radical community, but the reverse does not hold. We study two Swedish online communities – Flashback and Familjeliv – which both contain discussions of life and society, with the former having a stronger anti-immigrant subtext. Expressions associated with dogwhistles are substantially more frequent in Flashback than in Familjeliv. We analyze the time series of changes in intensity of three dogwhistle expressions (DWEs), i.e., the strength of association of a DWE and its in-group meaning modeled by Swedish Sentence-BERT, and model the dynamic temporal relationship of intensity in the two communities for the three DWEs using Vector Autoregression (VAR). We show that changes in intensity in Familjeliv are explained by the changes of intensity observed at previous lags in Flashback but not the other way around. This suggests a direction of travel for dogwhistles associated with radical ideologies to less radical contexts. 2025.woah-1.34 @@ -420,7 +420,7 @@ Graph of Attacks with Pruning: Optimizing Stealthy Jailbreak Prompt. Generation for Enhanced <fixed-case>LLM</fixed-case> Content Moderation DanielSchwarz DmitriyBespalov - ZheWang + ZheWang NinadKulkarni YanjunQi 482-489 @@ -457,7 +457,7 @@ SebastianLoftus AdrianMülthaler SanneHoeken - SinaZarrieß + SinaZarrieß OzgeAlacam 538-547 Annotator disagreement poses a significant challenge in subjective tasks like hate speech detection. In this paper, we introduce a novel variant of the HateWiC task that explicitly models annotator agreement by estimating the proportion of annotators who classify the meaning of a term as hateful. To tackle this challenge, we explore the use of Llama 3 models fine-tuned through Direct Preference Optimization (DPO). Our experiments show that while LLMs perform well for majority-based hate classification, they struggle with the more complex agreement-aware task. DPO fine-tuning offers improvements, particularly when applied to instruction-tuned models. Yet, our results emphasize the need for improved modeling of subjectivity in hate classification and this study can serve as foundation for future advancements. diff --git a/data/xml/2025.wraicogs.xml b/data/xml/2025.wraicogs.xml index b7df1b2a68..4b82d0286c 100644 --- a/data/xml/2025.wraicogs.xml +++ b/data/xml/2025.wraicogs.xml @@ -22,7 +22,7 @@ Chain-of-<fixed-case>M</fixed-case>eta<fixed-case>W</fixed-case>riting: Linguistic and Textual Analysis of How Small Language Models Write Young Students Texts IoanaBuhnila GeorgetaCislaru - AmaliaTodirascu + AmaliaTodirascu 1–15 Large Language Models (LLMs) have been used to generate texts in response to different writing tasks: reports, essays, story telling. However, language models do not have a metarepresentation of the text writing process, nor inherent communication learning needs, comparable to those of young human students. This paper introduces a fine-grained linguistic and textual analysis of multilingual Small Language Models’ (SLMs) writing. With our method, Chain-of-MetaWriting, SLMs can imitate some steps of the human writing process, such as planning and evaluation. We mainly focused on short story and essay writing tasks in French for schoolchildren and undergraduate students respectively. Our results show that SLMs encounter difficulties in assisting young students on sensitive topics such as violence in the schoolyard, and they sometimes use words too complex for the target audience. In particular, the output is quite different from the human produced texts in term of text cohesion and coherence regarding temporal connectors, topic progression, reference. 2025.wraicogs-1.1 @@ -55,7 +55,7 @@ FlorianBoudin RichardDufour NicolasHernandez - AkikoAizawa + AkikoAizawa 35–44 Revision is a crucial step in scientific writing, where authors refine their work to improve clarity, structure, and academic quality. Existing approaches to automated writing assistance often focus on sentence-level revisions, which fail to capture the broader context needed for effective modification. In this paper, we explore the impact of shifting from sentence-level to paragraph-level scope for the task of scientific text revision. The paragraph level definition of the task allows for more meaningful changes, and is guided by detailed revision instructions rather than general ones. To support this task, we introduce ParaRev, the first dataset of revised scientific paragraphs with an evaluation subset manually annotated with revision instructions. Our experiments demonstrate that using detailed instructions significantly improves the quality of automated revisions compared to general approaches, no matter the model or the metric considered. 2025.wraicogs-1.4 diff --git a/data/xml/2025.wsc.xml b/data/xml/2025.wsc.xml index c948e87acd..0a0355150d 100644 --- a/data/xml/2025.wsc.xml +++ b/data/xml/2025.wsc.xml @@ -20,8 +20,8 @@ BhaktiJadhav HimanshuDutta ShrutiKanitkar - MalharKulkarni - PushpakBhattacharyya + MalharKulkarni + PushpakBhattacharyya 1–14 2025.wsc-csdh.1 jadhav-etal-2025-introduction @@ -31,7 +31,7 @@ JivneshSandhan AmrutaBarbadikar MalayMaity - PavankumarSatuluri + PavankumarSatuluri TusharSandhan Ravi MGupta PawanGoyal @@ -82,7 +82,7 @@ Compound Type Identification in <fixed-case>S</fixed-case>anskrit SriramKrishnan - PavankumarSatuluri + PavankumarSatuluri AmrutaBarbadikar T SPrasanna Venkatesh AmbaKulkarni diff --git a/data/xml/2025.xllm.xml b/data/xml/2025.xllm.xml index d04de79564..d1ede11c56 100644 --- a/data/xml/2025.xllm.xml +++ b/data/xml/2025.xllm.xml @@ -14,7 +14,7 @@ MeishanZhang WeiLu N.Siddharth - LiljaØvrelid + LiljaØvrelid NianwenXue YueZhang Association for Computational Linguistics @@ -71,7 +71,7 @@ Regular-pattern-sensitive <fixed-case>CRF</fixed-case>s for Distant Label Interactions SeanPapay RomanKlinger - SebastianPadó + SebastianPadó 26-35 While LLMs have grown popular in sequence labeling, linear-chain conditionalrandom fields (CRFs) remain a popular alternativewith the ability to directly model interactions between labels.However, the Markov assumption limits them to interactions between adjacent labels.Weighted finite-state transducers (FSTs), in contrast, can modeldistant label–label interactions, but exact label inference is intractable in general.In this work, we present regular-pattern-sensitiveCRFs (RPCRFs), a method of enriching standardlinear-chain CRFs with the ability to learnlong-distance label interactions through user-specified patterns.This approach allows users to write regular-expressionlabel patterns concisely specifying which types of interactionsthe model should take into account, allowingthe model to learn from data whether and inwhich contexts these patterns occur. The resultcan be interpreted alternatively as a CRF augmented with additional,non-local potentials,or as a finite-state transducer whose structureis defined by a set of easily-interpretable patterns.Critically, exact training and inferenceare tractable for many pattern sets. We detailhow an RPCRF can be automatically constructed from a set of user-specified patterns,and demonstrate the model’s effectiveness ona sequence of three synthetic sequence modeling datasets. 2025.xllm-1.4 @@ -139,7 +139,7 @@ Seamlessly Integrating Tree-Based Positional Embeddings into Transformer Models for Source Code Representation PatrykBartkowiak - FilipGraliński + FilipGraliński 91-98 Transformer-based models have demonstrated significant success in various source code representation tasks. Nonetheless, traditional positional embeddings employed by these models inadequately capture the hierarchical structure intrinsic to source code, typically represented as Abstract Syntax Trees (ASTs). To address this, we propose a novel tree-based positional embedding approach that explicitly encodes hierarchical relationships derived from ASTs, including node depth and sibling indices. These hierarchical embeddings are integrated into the transformer architecture, specifically enhancing the CodeBERTa model. We thoroughly evaluate our proposed model through masked language modeling (MLM) pretraining and clone detection fine-tuning tasks. Experimental results indicate that our Tree-Enhanced CodeBERTa consistently surpasses the baseline model in terms of loss, accuracy, F1 score, precision, and recall, emphasizing the importance of incorporating explicit structural information into transformer-based representations of source code. 2025.xllm-1.10 @@ -201,7 +201,7 @@ Do we still need Human Annotators? Prompting Large Language Models for Aspect Sentiment Quad Prediction Nils ConstantinHellwig JakobFehle - UdoKruschwitz + UdoKruschwitz ChristianWolff 153-172 Aspect sentiment quad prediction (ASQP) facilitates a detailed understanding of opinions expressed in a text by identifying the opinion term, aspect term, aspect category and sentiment polarity for each opinion. However, annotating a full set of training examples to fine-tune models for ASQP is a resource-intensive process. In this study, we explore the capabilities of large language models (LLMs) for zero- and few-shot learning on the ASQP task across five diverse datasets. We report F1 scores almost up to par with those obtained with state-of-the-art fine-tuned models and exceeding previously reported zero- and few-shot performance. In the 20-shot setting on the Rest16 restaurant domain dataset, LLMs achieved an F1 score of 51.54, compared to 60.39 by the best-performing fine-tuned method MVP. Additionally, we report the performance of LLMs in target aspect sentiment detection (TASD), where the F1 scores were close to fine-tuned models, achieving 68.93 on Rest16 in the 30-shot setting, compared to 72.76 with MVP. While human annotators remain essential for achieving optimal performance, LLMs can reduce the need for extensive manual annotation in ASQP tasks. @@ -213,7 +213,7 @@ Can <fixed-case>LLM</fixed-case>s Interpret and Leverage Structured Linguistic Representations? A Case Study with <fixed-case>AMR</fixed-case>s AnkushRaut XiaofengZhu - Maria LeonorPacheco + Maria LeonorPacheco 173-185 This paper evaluates the ability of Large Language Models (LLMs) to leverage contextual information in the form of structured linguistic representations. Specifically, we examine the impact of encoding both short and long contexts using Abstract Meaning Representation (AMR) structures across a diverse set of language tasks. We perform our analysis using 8-bit quantized and instruction-tuned versions of Llama 3.1 (8B), Phi-3, and Mistral 7B. Our results indicate that, for tasks involving short contexts, augmenting the prompt with the AMR of the original language context often degrades the performance of the underlying LLM. However, for tasks that involve long contexts, such as dialogue summarization in the SAMSum dataset, this enhancement improves LLM performance, for example, by increasing the zero-shot cosine similarity score of Llama 3.1 from 66% to 76%. This improvement is more evident in the newer and larger LLMs, but does not extend to the older or smaller ones. In addition, we observe that LLMs can effectively reconstruct the original text from a linearized AMR, achieving a cosine similarity of 81% in the best-case scenario. 2025.xllm-1.16 @@ -322,7 +322,7 @@ DigantaBiswas DipanjanSaha DipankarDas - SivajiBandyopadhyay + SivajiBandyopadhyay 283-287 Event extraction from text is a complex taskthat involves the identification of event triggersand their supporting arguments. Whenapplied to speech, this task becomes evenmore challenging due to the continuous natureof audio signals and the need for robustAutomatic Speech Recognition (ASR). Thispaper proposes an approach that integratesASR with event extraction by utilizing theWhisper model for speech recognition and aText2Event2 Transformer for extracting eventsfrom English audio samples. The Whispermodel is used to generate transcripts from audio,which are then fed into the Text2Event2Transformer to identify event triggers and theirarguments. This approach combines two difficulttasks into one, streamlining the processof extracting structured event information directlyfrom audio. Our approach leverages arobust ASR system (Whisper) followed by aparameter-efficient transformer (Text2Event2fine-tuned via LoRA) to extract structuredevents from raw speech. Unlike prior worktrained on gold textual input, our pipeline istrained end-to-end on noisy ASR outputs. Despitesignificant resource constraints and datanoise, our system ranked first in the ACL 2025XLLM Shared Task II. 2025.xllm-1.24 diff --git a/data/xml/A00.xml b/data/xml/A00.xml index d7f83fe7b2..fd36f509dc 100644 --- a/data/xml/A00.xml +++ b/data/xml/A00.xml @@ -24,7 +24,7 @@ Machine Translation of Very Close Languages - JanHajic + JanHajic 10.3115/974147.974149 7–12 A00-1002 @@ -41,7 +41,7 @@ Automatic construction of parallel <fixed-case>E</fixed-case>nglish-<fixed-case>C</fixed-case>hinese corpus for cross-language information retrieval JiangChen - Jian-YunNie + Jian-YunNie 10.3115/974147.974151 21–28 A00-1004 @@ -50,8 +50,8 @@ <fixed-case>P</fixed-case>arts<fixed-case>ID</fixed-case>: A Dialogue-Based System for Identifying Parts for Medical Systems AmitBagga - TomekStrzalkowski - G. BowdenWise + TomekStrzalkowski + G. BowdenWise 10.3115/974147.974152 29–36 A00-1005 @@ -60,8 +60,8 @@ Translation using Information on Dialogue Participants SetsuoYamada - EiichiroSumita - HidekiKashioka + EiichiroSumita + HidekiKashioka 10.3115/974147.974153 37–43 A00-1006 @@ -69,7 +69,7 @@ Distilling dialogues - A method using natural dialogue corpora for dialogue systems development - ArneJonsson + ArneJonsson NilsDahlback 10.3115/974147.974154 44–51 @@ -86,10 +86,10 @@ A Framework for <fixed-case>MT</fixed-case> and Multilingual <fixed-case>NLG</fixed-case> Systems Based on Uniform Lexico-Structural Processing - BenoitLavoie - RichardKittredge + BenoitLavoie + RichardKittredge TanyaKorelsky - OwenRambow + OwenRambow 10.3115/974147.974156 60–67 A00-1009 @@ -97,7 +97,7 @@ <fixed-case>T</fixed-case>alk’n’<fixed-case>T</fixed-case>ravel: A Conversational System for Air Travel Planning - DavidStallard + DavidStallard 10.3115/974147.974157 68–75 A00-1010 @@ -115,7 +115,7 @@ Experiments on Sentence Boundary Detection MarkStevenson - RobertGaizauskas + RobertGaizauskas 10.3115/974147.974159 84–89 A00-1012 @@ -133,7 +133,7 @@ <fixed-case>MIMIC</fixed-case>: An Adaptive Mixed Initiative Spoken Dialogue System for Information Queries - JenniferChu-Carroll + JenniferChu-Carroll 10.3115/974147.974161 97–104 A00-1014 @@ -142,7 +142,7 @@ <fixed-case>J</fixed-case>avox: A Toolkit for Building Speech-Enabled Applications Michael S.Fulkerson - Alan W.Biermann + Alan W.Biermann 10.3115/974147.974162 105–111 A00-1015 @@ -150,8 +150,8 @@ A Compact Architecture for Dialogue Management Based on Scripts and Meta-Outputs - MannyRayner - Beth AnnHockey + MannyRayner + Beth AnnHockey FrankieJames 10.3115/974147.974163 112–118 @@ -183,7 +183,7 @@ Unit Completion for a Computer-aided Translation Typing System - PhilippeLanglais + PhilippeLanglais GeorgeFoster GuyLapalme 10.3115/974147.974166 @@ -193,8 +193,8 @@ Multilingual Coreference Resolution - Sanda M.Harabagiu - Steven J.Maiorano + Sanda M.Harabagiu + Steven J.Maiorano 10.3115/974147.974167 142–149 A00-1020 @@ -202,7 +202,7 @@ Ranking suspected answers to natural language questions using predictive annotation - Dragomir R.Radev + Dragomir R.Radev JohnPrager ValerieSamn 10.3115/974147.974168 @@ -222,7 +222,7 @@ A Question Answering System Supported by Information Extraction - RohiniSrihari + RohiniSrihari WeiLi 10.3115/974147.974170 166–172 @@ -239,10 +239,10 @@ Examining the Role of Statistical and Linguistic Knowledge Sources in a General-Knowledge Question-Answering System - ClaireCardie + ClaireCardie VincentNg - DavidPierce - ChrisBuckley + DavidPierce + ChrisBuckley 10.3115/974147.974172 180–187 A00-1025 @@ -250,9 +250,9 @@ Extracting Molecular Binding Relationships from Biomedical Text - Thomas C.Rindflesch + Thomas C.Rindflesch Jayant V.Rajan - LawrenceHunter + LawrenceHunter 10.3115/974147.974173 188–195 A00-1026 @@ -277,7 +277,7 @@ A Tool for Automated Revision of Grammars for <fixed-case>NLP</fixed-case> Systems - NandaKambhatla + NandaKambhatla WlodekZadrozny 10.3115/974147.974176 210–217 @@ -286,7 +286,7 @@ Aggressive Morphology for Robust Lexical Coverage - William A.Woods + William A.Woods 10.3115/974147.974177 218–223 A00-1030 @@ -303,7 +303,7 @@ Language Independent Morphological Analysis TatsuoYamashita - YujiMatsumoto + YujiMatsumoto 10.3115/974147.974179 232–238 A00-1032 @@ -311,7 +311,7 @@ A Divide-and-Conquer Strategy for Shallow Parsing of <fixed-case>G</fixed-case>erman Free Texts - GunterNeumann + GunterNeumann ChristianBraun JakubPiskorski 10.3115/974147.974180 @@ -321,7 +321,7 @@ A Hybrid Approach for Named Entity and Sub-Type Tagging - RohiniSrihari + RohiniSrihari 10.3115/974147.974181 247–254 A00-1034 @@ -337,12 +337,12 @@ Linguistic Knowledge can Improve Information Retrieval - William A.Woods + William A.Woods Lawrence A.Bookman AnnHouston - Robert J.Kuhns + Robert J.Kuhns PaulMartin - StephenGreen + StephenGreen 10.3115/974147.974183 262–267 A00-1036 @@ -350,8 +350,8 @@ Domain-Specific Knowledge Acquisition from Text - DanMoldovan - RoxanaGirju + DanMoldovan + RoxanaGirju VasileRus 10.3115/974147.974184 268–275 @@ -369,7 +369,7 @@ Unsupervised Discovery of Scenario-Level Patterns for Information Extraction RomanYangarber - RalphGrishman + RalphGrishman PasiTapanainen 10.3115/974147.974186 282–289 @@ -379,7 +379,7 @@ Using Corpus-derived Name Lists for Named Entity Recognition MarkStevenson - RobertGaizauskas + RobertGaizauskas 10.3115/974147.974187 290–295 A00-1040 @@ -387,8 +387,8 @@ Answer Extraction - StevenAbney - MichaelCollins + StevenAbney + MichaelCollins AmitSinghal 10.3115/974147.974188 296–301 @@ -398,8 +398,8 @@ Evaluation of Automatically Identified Index Terms for Browsing Electronic Documents NinaWacholder - Judith L.Klavans - David K.Evans + Judith L.Klavans + David K.Evans 10.3115/974147.974189 302–309 A00-1042 @@ -407,7 +407,7 @@ Sentence Reduction for Automatic Text Summarization - HongyanJing + HongyanJing 10.3115/974147.974190 310–315 A00-1043 @@ -416,10 +416,10 @@ Named Entity Extraction from Noisy Input: Speech and <fixed-case>OCR</fixed-case> DavidMiller - SeanBoisen - RichardSchwartz + SeanBoisen + RichardSchwartz RebeccaStone - RalphWeischedel + RalphWeischedel 10.3115/974147.974191 316–324 A00-1044 @@ -435,8 +435,8 @@ The Efficiency of Multimodal Interaction for a Map-based Task - PhilipCohen - DavidMcGee + PhilipCohen + DavidMcGee JoshClow 10.3115/974147.974193 331–338 @@ -457,8 +457,8 @@ Modelling Grounding and Discourse Obligations Using Update Rules ColinMatheson - MassimoPoesio - DavidTraum + MassimoPoesio + DavidTraum A00-2001 matheson-etal-2000-modelling @@ -485,7 +485,7 @@ Bagging and Boosting a Treebank Parser - John C.Henderson + John C.Henderson EricBrill A00-2005 henderson-brill-2000-bagging @@ -494,22 +494,22 @@ Encoding information on adjectives in a lexical-semantic net for computational applications AntoniettaAlonge FrancescaBertagna - NicolettaCalzolari + NicolettaCalzolari AdrianaRoventini - AntonioZampolli + AntonioZampolli A00-2006 alonge-etal-2000-encoding Noun Phrase Recognition by System Combination - Erik F.Tjong Kim Sang + Erik F.Tjong Kim Sang A00-2007 tjong-kim-sang-2000-noun The <fixed-case>F</fixed-case>rame<fixed-case>N</fixed-case>et tagset for frame-semantic and syntactic coding of predicate-argument structure ChristopherJohnson - Charles J.Fillmore + Charles J.Fillmore A00-2008 johnson-fillmore-2000-framenet @@ -542,7 +542,7 @@ Morphological Tagging: Data vs. Dictionaries - JanHajic + JanHajic A00-2013 hajic-2000-morphological @@ -561,7 +561,7 @@ TakehitoUtsuro ShigeyukiNishiokayama MasakazuFujio - YujiMatsumoto + YujiMatsumoto A00-2015 utsuro-etal-2000-analyzing @@ -586,7 +586,7 @@ An Unsupervised Method for Detecting Grammatical Errors - MartinChodorow + MartinChodorow ClaudiaLeacock A00-2019 chodorow-leacock-2000-unsupervised @@ -607,56 +607,56 @@ Ambiguity Packing in Constraint-based Parsing Practical Results StephanOepen - JohnCarroll + JohnCarroll A00-2022 oepen-carroll-2000-ambiguity Forest-Based Statistical Sentence Generation - IreneLangkilde + IreneLangkilde A00-2023 langkilde-2000-forest Cut and Paste Based Text Summarization - HongyanJing - Kathleen R.McKeown + HongyanJing + Kathleen R.McKeown A00-2024 jing-mckeown-2000-cut Minimizing Word Error Rate in Textual Summaries of Spoken Language KlausZechner - AlexWaibel + AlexWaibel A00-2025 zechner-waibel-2000-minimizing Trainable Methods for Surface Natural Language Generation - AdwaitRatnaparkhi + AdwaitRatnaparkhi A00-2026 ratnaparkhi-2000-trainable Evaluating Automatic Dialogue Strategy Adaptation for a Spoken Dialogue System - JenniferChu-Carroll + JenniferChu-Carroll A00-2027 chu-carroll-2000-evaluating Learning to Predict Problematic Situations in a Spoken Dialogue System: Experiments with <fixed-case>H</fixed-case>ow <fixed-case>M</fixed-case>ay <fixed-case>I</fixed-case> <fixed-case>H</fixed-case>elp <fixed-case>Y</fixed-case>ou? - MarilynWalker - IreneLangkilde + MarilynWalker + IreneLangkilde JerryWright - AllenGorin - DianeLitman + AllenGorin + DianeLitman A00-2028 walker-etal-2000-learning Predicting Automatic Speech Recognition Performance Using Prosodic Cues - Diane J.Litman - Julia B.Hirschberg + Diane J.Litman + Julia B.Hirschberg MarcSwerts A00-2029 litman-etal-2000-predicting @@ -664,9 +664,9 @@ A Novel Use of Statistical Parsing to Extract Information from Text ScottMiller - HeidiFox - LanceRamshaw - RalphWeischedel + HeidiFox + LanceRamshaw + RalphWeischedel A00-2030 miller-etal-2000-novel @@ -679,20 +679,20 @@ Mostly-Unsupervised Statistical Segmentation of <fixed-case>J</fixed-case>apanese: Applications to Kanji - Rie KubotaAndo + Rie KubotaAndo LillianLee A00-2032 ando-lee-2000-mostly Removing Left Recursion from Context-Free Grammars - Robert C.Moore + Robert C.Moore A00-2033 moore-2000-removing Using Semantic Preferences to Identify Verbal Participation in Role Switching Alternations - DianaMcCarthy + DianaMcCarthy A00-2034 mccarthy-2000-using @@ -712,7 +712,7 @@ Acknowledgments in Human-Computer Interaction KarenWard - Peter A.Heeman + Peter A.Heeman A00-2037 ward-heeman-2000-acknowledgments @@ -736,7 +736,7 @@ A Framework for Robust Semantic Interpretation Learning - Carolyn P.Rose + Carolyn P.Rose A00-2041 rose-2000-framework @@ -792,8 +792,8 @@ Corpus-Based Syntactic Error Detection Using Syntactic Patterns - KoldoGojenola - MaiteOronoz + KoldoGojenola + MaiteOronoz A00-3005 gojenola-oronoz-2000-corpus diff --git a/data/xml/A83.xml b/data/xml/A83.xml index 68e44a6c4b..1c12e24d5c 100644 --- a/data/xml/A83.xml +++ b/data/xml/A83.xml @@ -15,7 +15,7 @@
Domain-Independent Natural Language Interfaces: Session Introduction - Aravind K.Joshi + Aravind K.Joshi 10.3115/974194.974196 1–2 A83-1001 @@ -58,7 +58,7 @@ <fixed-case>TEAM</fixed-case>: A Transportable Natural-Language Interface System - Barbara J.Grosz + Barbara J.Grosz 10.3115/974194.974201 39–45 A83-1006 @@ -84,7 +84,7 @@ Distinguishing Fact From Opinion and Events From Meta-Events - Christine A.Montgomery + Christine A.Montgomery 10.3115/974194.974205 55–61 A83-1009 @@ -92,8 +92,8 @@ Parsing With Logical Variables - Timothy W.Finin - Martha StonePalmer + Timothy W.Finin + Martha StonePalmer 10.3115/974194.974206 62–68 A83-1010 @@ -101,7 +101,7 @@ <fixed-case>EXPLORER</fixed-case>: A Natural Language Processing System for Oil Exploration - Wendy G.Lehnert + Wendy G.Lehnert Steven P.Shwartz 10.3115/974194.974207 69–72 @@ -128,7 +128,7 @@ Handling Ill-Formed Input: Session Introduction - Ralph M.Weischedel + Ralph M.Weischedel 10.3115/974194.974211 89–92 A83-1014 @@ -136,8 +136,8 @@ The Fitted Parse: 100% Parsing Capability in a Syntactic Grammar of <fixed-case>E</fixed-case>nglish - KarenJensen - George E.Heidorn + KarenJensen + George E.Heidorn 10.3115/974194.974212 93–98 A83-1015 @@ -172,7 +172,7 @@ Specialized Information Extraction: Automatic Chemical Reaction Coding From <fixed-case>E</fixed-case>nglish Descriptions - Larry H.Reeker + Larry H.Reeker Elena M.Chmora Paul E.Blower 10.3115/974194.974217 @@ -190,9 +190,9 @@ “Expertness” from Structured Text? <fixed-case>RECONSIDER</fixed-case>: A Diagnostic Prompting Program - Mark S.Tuttle - David D.Sherertz - Marsden S.Blois + Mark S.Tuttle + David D.Sherertz + Marsden S.Blois StuartNelson 10.3115/974194.974219 124–131 @@ -218,7 +218,7 @@ Automatic Representation of the Semantic Relationships Corresponding to a <fixed-case>F</fixed-case>rench Surface Expression - Gian PieroZarri + Gian PieroZarri 10.3115/974194.974222 143–147 A83-1024 @@ -234,7 +234,7 @@ Investigating the Possibility of a Microprocessor-Based Machine Translatton System - Harold L.Somers + Harold L.Somers 10.3115/974194.974225 149–155 A83-1026 @@ -242,7 +242,7 @@ An Application of <fixed-case>M</fixed-case>ontague Grammar to <fixed-case>E</fixed-case>nglish-<fixed-case>J</fixed-case>apanese Machine Translation - ToyoakiNishida + ToyoakiNishida ShujiDoshita 10.3115/974194.974226 156–165 @@ -259,7 +259,7 @@ COMPUTER-ASSISTED TRANSLATION SYSTEMS: The Standard Design and A Multi-level Design - Alan K.Melby + Alan K.Melby 10.3115/974194.974228 174–177 A83-1029 diff --git a/data/xml/A88.xml b/data/xml/A88.xml index 04339c6c30..eda83db38d 100644 --- a/data/xml/A88.xml +++ b/data/xml/A88.xml @@ -15,7 +15,7 @@ The Multimedia Articulation of Answers in a Natural Language Database Query System - Susan E.Brennan + Susan E.Brennan 10.3115/974235.974237 1–8 A88-1001 @@ -33,8 +33,8 @@ An Architecture for Anaphora Resolution - ElaineRich - SusannLuperFoy + ElaineRich + SusannLuperFoy 10.3115/974235.974239 18–24 A88-1003 @@ -42,7 +42,7 @@ The <fixed-case>SEMSYN</fixed-case> Generation System: Ingredients, Applications, Prospects - DietmarRosner + DietmarRosner 10.3115/974235.974241 25–32 A88-1004 @@ -58,8 +58,8 @@ From Water to Wine: Generating Natural Language Text From Today’s Applications Programs - David D.McDonald - Marie W.Meteer + David D.McDonald + Marie W.Meteer 10.3115/974235.974243 41–48 A88-1006 @@ -67,8 +67,8 @@ Improved Portability and Parsing Through Interactive Acquisition of Semantic Information - Francois-MichelLang - LynetteHirschman + Francois-MichelLang + LynetteHirschman 10.3115/974235.974245 49–57 A88-1007 @@ -84,7 +84,7 @@ Responding to Semantically Ill-Formed Input - RalphGrishman + RalphGrishman PingPeng 10.3115/974235.974247 66–70 @@ -93,8 +93,8 @@ Evaluation of a Parallel Chart Parser - RalphGrishman - MaheshChitrao + RalphGrishman + MaheshChitrao 10.3115/974235.974248 71–76 A88-1010 @@ -103,7 +103,7 @@ Triphone Analysis: A Combined Method for the Correction of Orthographical and Typographical Errors. Brigittevan Berkel - KoenraadDe Smedt + KoenraadDe Smedt 10.3115/974235.974250 77–83 A88-1011 @@ -111,7 +111,7 @@ Creating and Querying Lexical Data Bases - Mary S.Neff + Mary S.Neff Roy J.Byrd Omneya A.Rizk 10.3115/974235.974251 @@ -131,8 +131,8 @@ Building a Large Thesaurus for Information Retrieval Edward A.Fox J. TerryNutter - ThomasAhlswede - MarthaEvens + ThomasAhlswede + MarthaEvens JudithMarkowitz 10.3115/974235.974253 101–108 @@ -141,9 +141,9 @@ Application-Specific Issues in Natural Language Interfacer Development for a Diagnostic Expert System - Karen L.Ryan + Karen L.Ryan RebeccaRoot - DuaneOlawsky + DuaneOlawsky 10.3115/974235.974255 109–114 A88-1015 @@ -152,7 +152,7 @@ The <fixed-case>MULTIVOC</fixed-case> Text-to-Speech System Olivier M.Emorine - Pierre M.Martin + Pierre M.Martin 10.3115/974235.974256 115–120 A88-1016 @@ -168,8 +168,8 @@ Integrating Top-Down and Bottom-Up Strategies in a Text Processing System - Lisa F.Rau - Paul S.Jacobs + Lisa F.Rau + Paul S.Jacobs 10.3115/974235.974259 129–135 A88-1018 @@ -177,7 +177,7 @@ A Stochastic Parts Program and Noun Phrase Parser for Unrestricted Text - Kenneth WardChurch + Kenneth WardChurch 10.3115/974235.974260 136–143 A88-1019 @@ -185,8 +185,8 @@ A Tool for Investigating the Synonymy Relation in a Sense Disambiguated Thesaurus - Martin S.Chodorow - YaelRavin + Martin S.Chodorow + YaelRavin Howard E.Sachar 10.3115/974235.974261 144–151 @@ -195,7 +195,7 @@ Dictionary Text Entries as a Source of Knowledge for Syntactic and Other Disambiguations - KarenJensen + KarenJensen Jean-LouisBinot 10.3115/974235.974262 152–159 @@ -204,7 +204,7 @@ <fixed-case>E</fixed-case>urotra Practical Experience With a Multilingual Machine Translation System Under Development - Giovanni B.Varile + Giovanni B.Varile PeterLau 10.3115/974235.974264 160–167 @@ -221,7 +221,7 @@ Natural Language Interfaces: Present and Future - Norman K.Sondheimer + Norman K.Sondheimer 10.3115/974235.974267 176–177 A88-1024 @@ -229,7 +229,7 @@ Automatically Generating Natural Language Reports in an Office Environment - JugalKalita + JugalKalita SunilShende 10.3115/974235.974269 178–185 @@ -239,7 +239,7 @@ <fixed-case>LUKE</fixed-case>: An Experiment in the Early Integration of Natural Language Processing David A.Wroblewski - Elaine A.Rich + Elaine A.Rich 10.3115/974235.974270 186–194 A88-1026 @@ -248,7 +248,7 @@ The Experience of Developing a Large-Scale Natural Language Text Processing System: Critique Stephen D.Richardson - Lisa C.Braden-Harder + Lisa C.Braden-Harder 10.3115/974235.974271 195–202 A88-1027 @@ -256,7 +256,7 @@ Computational Techniques for Improved Name Search - Beatrice T.Oshika + Beatrice T.Oshika FilipMachi BruceEvans JanetTom @@ -275,7 +275,7 @@ Finding Clauses in Unrestricted Text by Finitary and Stochastic Methods - Eva I.Ejerhed + Eva I.Ejerhed 10.3115/974235.974275 219–227 A88-1030 @@ -291,8 +291,8 @@ Localizing Expression of Ambiguity - JohnBear - Jerry R.Hobbs + JohnBear + Jerry R.Hobbs 10.3115/974235.974278 235–242 A88-1032 diff --git a/data/xml/A92.xml b/data/xml/A92.xml index 82a30c0f03..ec8bacdb5a 100644 --- a/data/xml/A92.xml +++ b/data/xml/A92.xml @@ -15,8 +15,8 @@ Deriving Database Queries from Logical Forms by Abductive Definition Expansion - MannyRayner - HiyanAlshawi + MannyRayner + HiyanAlshawi 10.3115/974499.974501 1–8 A92-1001 @@ -24,9 +24,9 @@ A Dialog Control Algorithm and Its Performance - Ronnie W.Smith + Ronnie W.Smith D. RichardHipp - Alan W.Biermann + Alan W.Biermann 10.3115/974499.974502 9–16 A92-1002 @@ -34,8 +34,8 @@ An Approach to Multilevel Semantics for Applied Systems - AlbertoLavelli - BernardoMagnini + AlbertoLavelli + BernardoMagnini CarloStrapparava 10.3115/974499.974503 17–24 @@ -44,9 +44,9 @@ A Parser for Real-Time Speech Synthesis of Conversational Texts - JoanBachenko + JoanBachenko JeffreyDaugherty - EileenFitzpatrick + EileenFitzpatrick 10.3115/974499.974505 25–32 A92-1004 @@ -56,7 +56,7 @@ Real-time linguistic analysis for continuous speech understanding PaoloBaggia ElisabettaGerbino - EgidioGiachin + EgidioGiachin ClaudioRullent 10.3115/974499.974506 33–39 @@ -65,7 +65,7 @@ Applied Text Generation - OwenRambow + OwenRambow TanyaKorelsky 10.3115/974499.974508 40–47 @@ -74,7 +74,7 @@ Automatic Generation of Multimodal Weather Reports from Datasets - Stephan M.Kerpedjiev + Stephan M.Kerpedjiev 10.3115/974499.974509 48–55 A92-1007 @@ -91,7 +91,7 @@ Automatic Generation of On-Line Documentation in the <fixed-case>IDAS</fixed-case> Project EhudReiter - ChrisMellish + ChrisMellish JohnLevine 10.3115/974499.974511 64–71 @@ -101,7 +101,7 @@ Integrating Natural Language Components into Graphical Discourse StephanDilley - JohnBateman + JohnBateman UlrichThiel AnneTissen 10.3115/974499.974512 @@ -128,9 +128,9 @@ Computational Lexicons: the Neat Examples and the Odd Exemplars - RobertoBasili - Maria TeresaPazienza - PaolaVelardi + RobertoBasili + Maria TeresaPazienza + PaolaVelardi 10.3115/974499.974516 96–103 A92-1013 @@ -139,9 +139,9 @@ Automatic Learning for Semantic Collocation SatoshiSekine - Jeremy J.Carroll + Jeremy J.Carroll SofiaAnaniadou - Jun’ichiTsujii + Jun’ichiTsujii 10.3115/974499.974517 104–110 A92-1014 @@ -183,7 +183,7 @@ A Practical Part-of-Speech Tagger - DougCutting + DougCutting JulianKupiec JanPedersen PenelopeSibun @@ -203,12 +203,12 @@ A Corpus-Based Statistical Approach to Automatic Book Indexing - Jyun-ShengChang + Jyun-ShengChang Tsung-YihTseng - Sur-JinKer + Sur-JinKer YingCheng - Huey-ChyunChen - Shun-DerCheng + Huey-ChyunChen + Shun-DerCheng John S.Liu 10.3115/974499.974525 147–151 @@ -225,8 +225,8 @@ Evaluating Parsing Strategies Using Standardized Parse Files - RalphGrishman - CatherineMacleod + RalphGrishman + CatherineMacleod JohnSterling 10.3115/974499.974528 156–161 @@ -235,8 +235,8 @@ A Practical Methodology for the Evaluation of Spoken Language Systems - SeanBoisen - MadeleineBates + SeanBoisen + MadeleineBates 10.3115/974499.974529 162–169 A92-1023 @@ -249,7 +249,7 @@ Steven P.Weinstein Alison K.Huettner Linda M.Schmandt - Irene B.Nirenburg + Irene B.Nirenburg 10.3115/974499.974531 170–177 A92-1024 @@ -257,7 +257,7 @@ Joining Statistics with <fixed-case>NLP</fixed-case> for Text Categorization - Paul S.Jacobs + Paul S.Jacobs 10.3115/974499.974532 178–185 A92-1025 @@ -265,9 +265,9 @@ Robust Processing of Real-World Natural-Language Texts - Jerry R.Hobbs - Douglas E.Appelt - JohnBear + Jerry R.Hobbs + Douglas E.Appelt + JohnBear MabryTyson 10.3115/974499.974533 186–192 @@ -276,7 +276,7 @@ An Efficient Chart-based Algorithm for Partial-Parsing of Unrestricted Texts - David D.McDonald + David D.McDonald 10.3115/974499.974534 193–200 A92-1027 @@ -293,9 +293,9 @@ Compound Nouns in a Unification-Based <fixed-case>MT</fixed-case> System - PierretteBouillon - KatharinaBoesefeldt - GrahamRussell + PierretteBouillon + KatharinaBoesefeldt + GrahamRussell 10.3115/974499.974537 209–215 A92-1029 @@ -303,9 +303,9 @@ <fixed-case>XTAG</fixed-case> - A Graphical Workbench for Developing <fixed-case>T</fixed-case>ree-<fixed-case>A</fixed-case>djoining <fixed-case>G</fixed-case>rammars - PatrickParoubek + PatrickParoubek YvesSchabes - Aravind K.Joshi + Aravind K.Joshi 10.3115/974499.974538 223–230 A92-1030 @@ -351,7 +351,7 @@ Practical World Modeling for <fixed-case>NLP</fixed-case> Applications LynnCarlson - SergeiNirenburg + SergeiNirenburg 10.3115/974499.974544 235–236 A92-1035 @@ -359,7 +359,7 @@ Portable Natural Language Generation using <fixed-case>SPOKESMAN</fixed-case> - MarieMeteer + MarieMeteer 10.3115/974499.974545 237–238 A92-1036 @@ -368,8 +368,8 @@ A Method of Automatic Hypertext Construction from an Encyclopedic Dictionary of a Specific Field SadaoKurohashi - MakotoNagao - SatoshiSato + MakotoNagao + SatoshiSato MasahikoMurakami 10.3115/974499.974546 239–240 @@ -378,10 +378,10 @@ Datenbank-<fixed-case>DIALOG</fixed-case> and the Relevance of Habitability - HaraldTrost + HaraldTrost WolfgangHeinz JohannesMatiasek - ErnstBuchberger + ErnstBuchberger 10.3115/974499.974547 241–242 A92-1038 @@ -398,11 +398,11 @@ Dialogue Management for Telephone Information Systems ScottMcGlashan - NormanFraser + NormanFraser NigelGilbert EricBilange PaulHeisterkamp - NickYoud + NickYoud 10.3115/974499.974549 245–246 A92-1040 @@ -436,12 +436,12 @@ <fixed-case>SEISD</fixed-case>: An environment for extraction of Semantic Information from on-line dictionaries AliciaAgeno - IreneCastellon + IreneCastellon M. A.Marti - GermanRigau - FrancescRibas - HoracioRodriguez - MarionaTaule + GermanRigau + FrancescRibas + HoracioRodriguez + MarionaTaule FelisaVerdejo 10.3115/974499.974553 253–254 @@ -450,7 +450,7 @@ Multi-Purpose Development and Operation Environments for Natural Language Applications - SergeiNirenburg + SergeiNirenburg PeterShell ArielCohen PeterCousseau @@ -471,7 +471,7 @@ Lexical Processing in the <fixed-case>CLARE</fixed-case> System - David M.Carter + David M.Carter 10.3115/974499.974556 259–260 A92-1047 diff --git a/data/xml/A94.xml b/data/xml/A94.xml index 78e098437b..1add3e8b10 100644 --- a/data/xml/A94.xml +++ b/data/xml/A94.xml @@ -24,7 +24,7 @@ Practical Issues in Automatic Documentation Generation - KathleenMcKeown + KathleenMcKeown KarenKukich JamesShaw 10.3115/974358.974361 @@ -51,7 +51,7 @@ Machine Translation of Sentences with Fixed Expressions - NaotoKatoh + NaotoKatoh TeruakiAizawa 10.3115/974358.974366 28–33 @@ -61,7 +61,7 @@ <fixed-case>T</fixed-case>ermight: Identifying and Translating Technical Terminology IdoDagan - KenChurch + KenChurch 10.3115/974358.974367 34–40 A94-1006 @@ -95,7 +95,7 @@ Improving Language Models by Clustering Training Sentences - DavidCarter + DavidCarter 10.3115/974358.974372 59–64 A94-1010 @@ -112,7 +112,7 @@ Combination of Symbolic and Statistical Approaches for Grammatical Knowledge Acquisition MasakiKiyono - Jun’ichiTsujii + Jun’ichiTsujii 10.3115/974358.974375 72–77 A94-1012 @@ -120,8 +120,8 @@ Adaptive Sentence Boundary Disambiguation - David D.Palmer - Marti A.Hearst + David D.Palmer + Marti A.Hearst 10.3115/974358.974376 78–83 A94-1013 @@ -147,8 +147,8 @@ Three Heads are Better than One - RobertFrederking - SergeiNirenburg + RobertFrederking + SergeiNirenburg 10.3115/974358.974380 95–100 A94-1016 @@ -157,7 +157,7 @@ Real-Time Spoken Language Translation Using Associative Processors KozoOi - EiichiroSumita + EiichiroSumita OsamuFuruse HitoshiIida TetsuyaHiguchi @@ -185,7 +185,7 @@ Resolving Anaphora in a Portable Natural Language Front End to Databases Flavia A.Barros - AnneDeRoeck + AnneDeRoeck 10.3115/974358.974386 119–124 A94-1020 @@ -193,8 +193,8 @@ Upholding the Maxim of Relevance during Patient-Centered Activities - Abigail S.Gertner - Bonnie L.Webber + Abigail S.Gertner + Bonnie L.Webber John R.Clarke 10.3115/974358.974387 125–131 @@ -203,10 +203,10 @@ The Delphi Natural Language Understanding System - MadeleineBates - RobertBobrow - RobertIngria - DavidStallard + MadeleineBates + RobertBobrow + RobertIngria + DavidStallard 10.3115/974358.974388 132–137 A94-1022 @@ -233,7 +233,7 @@ A robust category guesser for <fixed-case>D</fixed-case>utch medical language - PeterSpyns + PeterSpyns 10.3115/974358.974392 150–155 A94-1025 @@ -258,7 +258,7 @@ Robust Text Processing in Automated Information Retrieval - TomekStrzalkowski + TomekStrzalkowski 10.3115/974358.974396 168–173 A94-1028 @@ -266,9 +266,9 @@ Might a semantic lexicon support hypertextual authoring? - RobertoBasili + RobertoBasili FabrizioGrisoli - Maria TeresaPazienza + Maria TeresaPazienza 10.3115/974358.974397 174–179 A94-1029 @@ -295,7 +295,7 @@ Automatic Aquisition of Semantic Attributes for User Defined Words m <fixed-case>J</fixed-case>apanese to <fixed-case>E</fixed-case>nglish Machine Translation SatoruIkehara - SatoshiShirai + SatoshiShirai AkioYokoo FrancisBond YoshieOmi @@ -335,7 +335,7 @@ A Practical Evaluation of an Integrated Translation Tool during a Large Scale Localisation Project - ReinhardSchaler + ReinhardSchaler 10.3115/974358.974405 192–193 A94-1036 @@ -373,7 +373,7 @@ Multifunction Thesaurus for <fixed-case>R</fixed-case>ussian Word Processing - Igor A.Bolshakov + Igor A.Bolshakov 10.3115/974358.974409 200–202 A94-1040 @@ -381,7 +381,7 @@ Representing Knowledge for Planning Multisentential Text - JoseCoch + JoseCoch RaphaelDavid 10.3115/974358.974410 203–204 @@ -400,7 +400,7 @@ An Interactive Rewriting Tool for Machine Acceptable Sentences - HidekiHirakawa + HidekiHirakawa KouichiNomura MarikoNakamura 10.3115/974358.974412 @@ -410,7 +410,7 @@ <fixed-case>TECHDOC</fixed-case>: Multilingual generation of online and offline instructional text - DietmarRosner + DietmarRosner ManfredStede 10.3115/974358.974413 209–210 @@ -419,7 +419,7 @@ An Inheritance-based Lexicon for Message Understanding Systems - Lynne J.Cahill + Lynne J.Cahill 10.3115/974358.974414 211–212 A94-1045 @@ -427,7 +427,7 @@ Industrial Applications of Unification Morphology - GaborProszeky + GaborProszeky 10.3115/974358.974415 213–214 A94-1046 @@ -435,10 +435,10 @@ Sublanguage Engineering in the <fixed-case>F</fixed-case>o<fixed-case>G</fixed-case> System - RichardKittredge - EliGoldberg + RichardKittredge + EliGoldberg MyungheeKim - AlainPolguere + AlainPolguere 10.3115/974358.974416 215–216 A94-1047 diff --git a/data/xml/A97.xml b/data/xml/A97.xml index 38c38a01d9..bc5b5486bf 100644 --- a/data/xml/A97.xml +++ b/data/xml/A97.xml @@ -29,7 +29,7 @@ Natural Language in Four Spatial Interfaces KennethWauchope - StephanieEverett + StephanieEverett DennisPerzanowski ElaineMarsh 10.3115/974557.974559 @@ -39,9 +39,9 @@ High Performance Segmentation of Spontaneous Speech Using Part of Speech and Trigger Word Information - MarsalGavalda + MarsalGavalda KlausZechner - GregoryAist + GregoryAist 10.3115/974557.974560 12–15 A97-1003 @@ -50,7 +50,7 @@ A Maximum Entropy Approach to Identifying Sentence Boundaries Jeffrey C.Reynar - AdwaitRatnaparkhi + AdwaitRatnaparkhi 10.3115/974557.974561 16–19 A97-1004 @@ -58,10 +58,10 @@ <fixed-case>Q</fixed-case>uick<fixed-case>S</fixed-case>et: Multimodal Interaction for Simulation Set-up and Control - Philip R.Cohen - MichaelJohnston - DavidMcGee - SharonOviatt + Philip R.Cohen + MichaelJohnston + DavidMcGee + SharonOviatt JayPittman IraSmith LiangChen @@ -76,7 +76,7 @@ StephanBusemann ThierryDeclerck Abdel KaderDiagne - LucaDini + LucaDini JudithKlein SvenSchmeier 10.3115/974557.974563 @@ -88,7 +88,7 @@ Insights into the Dialogue Processing of <fixed-case>VERBMOBIL</fixed-case> JanAlexandersson NorbertReithinger - ElisabethMaier + ElisabethMaier 10.3115/974557.974564 33–40 A97-1007 @@ -96,7 +96,7 @@ An Evaluation of Strategies for Selective Utterance Verification for Spoken Natural Language Dialog - Ronnie W.Smith + Ronnie W.Smith 10.3115/974557.974565 41–48 A97-1008 @@ -105,7 +105,7 @@ Name pronunciation in <fixed-case>G</fixed-case>erman text-to-speech synthesis StefanieJannedy - BerndMobius + BerndMobius 10.3115/974557.974566 49–56 A97-1009 @@ -123,7 +123,7 @@ A non-projective dependency parser PasiTapanainen - TimoJarvinen + TimoJarvinen 10.3115/974557.974568 64–71 A97-1011 @@ -131,7 +131,7 @@ Incremental Finite-State Parsing - SalahAit-Mokhtar + SalahAit-Mokhtar Jean-PierreChanod 10.3115/974557.974569 72–79 @@ -141,7 +141,7 @@ Developing a hybrid <fixed-case>NP</fixed-case> parser AtroVoutilainen - LluisPadro + LluisPadro 10.3115/974557.974570 80–87 A97-1013 @@ -177,8 +177,8 @@ Probabilistic and Rule-Based Tagger of an Inflective Language- a Comparison - JanHajic - BarboraHladka + JanHajic + BarboraHladka 10.3115/974557.974574 111–118 A97-1017 @@ -188,7 +188,7 @@ <fixed-case>CS</fixed-case>eg&Tagl.0: A Practical Word Segmenter and <fixed-case>POS</fixed-case> Tagger for <fixed-case>C</fixed-case>hinese Texts SunMaosong ShenDayang - HuangChangning + ChangningHuang 10.3115/974557.974575 119–126 A97-1018 @@ -207,9 +207,9 @@ Reading more into Foreign Languages JohnNerbonne LauriKarttunen - ElenaPaskaleva - GaborProszeky - TiitRoosmaa + ElenaPaskaleva + GaborProszeky + TiitRoosmaa 10.3115/974557.974577 135–138 A97-1020 @@ -217,7 +217,7 @@ Large-Scale Acquisition of <fixed-case>LCS</fixed-case>-Based Lexicons for Foreign Language Tutoring - Bonnie J.Dorr + Bonnie J.Dorr 10.3115/974557.974578 139–146 A97-1021 @@ -225,9 +225,9 @@ A Prototype of a Grammar Checker for <fixed-case>C</fixed-case>zech - TomášHolan - VladislavKuboň - MartinPlátek + TomášHolan + VladislavKuboň + MartinPlátek 10.3115/974557.974579 147–154 A97-1022 @@ -235,7 +235,7 @@ Techniques for Accelerating a Grammar-Checker - KarelOliva + KarelOliva 10.3115/974557.974580 155–158 A97-1023 @@ -251,8 +251,8 @@ Contextual Spelling Correction Using Latent Semantic Analysis - Michael P.Jones - James H.Martin + Michael P.Jones + James H.Martin 10.3115/974557.974582 166–173 A97-1025 @@ -263,7 +263,7 @@ JillBurstein SusanneWolff ChiLu - Randy M.Kaplan + Randy M.Kaplan 10.3115/974557.974583 174–181 A97-1026 @@ -283,8 +283,8 @@ A Statistical Profile of the Named Entity Task - David D.Palmer - David S.Day + David D.Palmer + David S.Day 10.3115/974557.974585 190–193 A97-1028 @@ -292,10 +292,10 @@ <fixed-case>N</fixed-case>ymble: a High-Performance Learning Name-finder - Daniel M.Bikel + Daniel M.Bikel ScottMiller - RichardSchwartz - RalphWeischedel + RichardSchwartz + RalphWeischedel 10.3115/974557.974586 194–201 A97-1029 @@ -304,7 +304,7 @@ Disambiguation of Proper Names in Text NinaWacholder - YaelRavin + YaelRavin MisookChoi 10.3115/974557.974587 202–208 @@ -313,7 +313,7 @@ An Information Extraction Core System for Real World <fixed-case>G</fixed-case>erman Text Processing - GunterNeumann + GunterNeumann RolfBackofen JudithBaur MarkusBecker @@ -334,8 +334,8 @@ Building a Generation Knowledge Source using <fixed-case>I</fixed-case>nternet-Accessible Newswire - Dragomir R.Radev - Kathleen R.McKeown + Dragomir R.Radev + Kathleen R.McKeown 10.3115/974557.974590 221–228 A97-1033 @@ -345,7 +345,7 @@ Using <fixed-case>SGML</fixed-case> as a Basis for Data-Intensive <fixed-case>NLP</fixed-case> DavidMcKelvie ChrisBrew - HenryThompson + HenryThompson 10.3115/974557.974591 229–236 A97-1034 @@ -353,10 +353,10 @@ Software Infrastructure for Natural Language Processing - HamishCunningham - KevinHumphreys - RobertGaizauskas - YorickWilks + HamishCunningham + KevinHumphreys + RobertGaizauskas + YorickWilks 10.3115/974557.974592 237–244 A97-1035 @@ -364,7 +364,7 @@ An Open Distributed Architecture for Reuse and Integration of Heterogeneous <fixed-case>NLP</fixed-case> Components - RemiZajac + RemiZajac MarkCasper NigelSharples 10.3115/974557.974593 @@ -374,8 +374,8 @@ Customizable Descriptions of Object-Oriented Models - BenoitLavoie - OwenRambow + BenoitLavoie + OwenRambow EhudReiter 10.3115/974557.974594 253–256 @@ -384,7 +384,7 @@ <fixed-case>C</fixed-case>ogent<fixed-case>H</fixed-case>elp: <fixed-case>NLG</fixed-case> meets <fixed-case>SE</fixed-case> in a tool for authoring dynamically generated on-line help - MichaelWhite + MichaelWhite David E.Caldwell 10.3115/974557.974595 257–264 @@ -393,7 +393,7 @@ A Fast and Portable Realizer for Text Generation Systems - BenoitLavoie + BenoitLavoie OwenRainbow 10.3115/974557.974596 265–268 @@ -402,12 +402,12 @@ Multilingual Generation and Summarization of Job Adverts: the <fixed-case>TREE</fixed-case> Project - HaroldSomers + HaroldSomers BillBlack JoakimNivre - TorbjornLager + TorbjornLager AnnarosaMultari - LucaGilardoni + LucaGilardoni JeremyEllman AlexRogers 10.3115/974557.974597 @@ -417,7 +417,7 @@ Language Generation for Multimedia Healthcare Briefings - Kathleen R.McKeown + Kathleen R.McKeown Desmond A.Jordan ShimeiPan JamesShaw @@ -429,8 +429,8 @@ Identifying Topics by Position - Chin-YewLin - EduardHovy + Chin-YewLin + EduardHovy 10.3115/974557.974599 283–290 A97-1042 @@ -440,7 +440,7 @@ An Automatic Extraction of Key Paragraphs Based on Context Dependency FumiyoFukumoto YoshimiSuzuki - Jun’ichiFukumoto + Jun’ichiFukumoto 10.3115/974557.974600 291–298 A97-1043 @@ -448,7 +448,7 @@ Building Effective Queries In Natural Language Information Retrieval - TomekStrzalkowski + TomekStrzalkowski FangLin JosePerez-Carballo JinWang @@ -459,7 +459,7 @@ Construction and Visualization of Key Term Hierarchies - JoeZhou + JoeZhou TroyTanner 10.3115/974557.974602 307–311 @@ -468,7 +468,7 @@ Fast Statistical Parsing of Noun Phrases for Document Indexing - ChengxiangZhai + ChengxiangZhai 10.3115/974557.974603 312–319 A97-1046 @@ -486,9 +486,9 @@ An Interactive Translation Support Facility for Non-Professional Users KiyoshiYamabana KazunoriMuraki - Shin-ichiroKamei + Shin-ichiroKamei KenjiSatoh - ShinichiDoi + ShinichiDoi ShinkoTamura 10.3115/974557.974605 324–331 @@ -516,12 +516,12 @@ Mixed-Initiative Development of Language Processing Systems - DavidDay + DavidDay JohnAberdeen - LynetteHirschman + LynetteHirschman RobynKozierok - PatriciaRobinson - MarcVilain + PatriciaRobinson + MarcVilain 10.3115/974557.974608 348–355 A97-1051 @@ -529,8 +529,8 @@ Automatic Extraction of Subcategorization from Corpora - TedBriscoe - JohnCarroll + TedBriscoe + JohnCarroll 10.3115/974557.974609 356–363 A97-1052 @@ -539,7 +539,7 @@ Learning Probabilistic Subcategorization Preference by Identifying Case Dependencies and Optimal Noun Class Generalization Level TakehitoUtsuro - YujiMatsumoto + YujiMatsumoto 10.3115/974557.974610 364–371 A97-1053 @@ -557,7 +557,7 @@ Automatic Selection of Class Labels from a Thesaurus for an Effective Semantic Tagging of Corpora. AlessandroCucchiarelli - PaolaVelardi + PaolaVelardi 10.3115/974557.974612 380–387 A97-1055 @@ -566,8 +566,8 @@ Sequential Model Selection for Word Sense Disambiguation TedPedersen - RebeccaBruce - JanyceWiebe + RebeccaBruce + JanyceWiebe 10.3115/974557.974613 388–395 A97-1056 @@ -598,7 +598,7 @@ An Efficient Two Stage Approach to Robust Language Interpretation - Carolyn PensteinRose + Carolyn PensteinRose 10.3115/974281.974284 3–4 A97-2002 @@ -615,7 +615,7 @@ Duke’s Trainable Information and Meaning Extraction System (Duke <fixed-case>TIMES</fixed-case>) AmitBagga - Joyce YueChai + Joyce YueChai 10.3115/974281.974286 7–8 A97-2004 @@ -623,7 +623,7 @@ Dependency parser demo - TimoJarvinen + TimoJarvinen PasiTapanainen 10.3115/974281.974287 9–10 @@ -632,8 +632,8 @@ An Improvement in the Selection Process of Machine Translation Using Inductive Learning with Genetic Algorithms - HiroshiEchizen-ya - KenjiAraki + HiroshiEchizen-ya + KenjiAraki YoshikazuMiyanaga KojiTochinai 10.3115/974281.974288 @@ -643,7 +643,7 @@ <fixed-case>CIRCSIM</fixed-case>-Tutor: An Intelligent Tutoring System Using Natural Language Dialogue - Martha W.Evens + Martha W.Evens Ru-CharnChang Yoon HeeLee Leem SeopShim @@ -696,10 +696,10 @@ <fixed-case>EAGLE</fixed-case>: An Extensible Architecture for General Linguistic Engineering BreckBaldwin - ChristineDoran + ChristineDoran Jeffrey C.Reynar MichaelNiv - B.Srinivas + B.Srinivas 10.3115/974281.974295 23–23 A97-2013 @@ -707,8 +707,8 @@ An <fixed-case>E</fixed-case>nglish Grammar Checker as a Writing Aid for Students of <fixed-case>E</fixed-case>nglish as a Second Language - Jong C.Park - MarthaPalmer + Jong C.Park + MarthaPalmer ClayWashburn 10.3115/974281.974296 24–24 @@ -718,7 +718,7 @@ <fixed-case>CATMORF</fixed-case>: Multi two-level steps for <fixed-case>C</fixed-case>atalan morphology ToniBadia - AngelsEgea + AngelsEgea AntoniTuells 10.3115/974281.974297 25–26 @@ -738,10 +738,10 @@ <fixed-case>GATE</fixed-case> - a General Architecture for Text Engineering - HamishCunningham - KevinHumphreys - RobertGaizauskas - YorickWilks + HamishCunningham + KevinHumphreys + RobertGaizauskas + YorickWilks 10.3115/974281.974299 29–30 A97-2017 @@ -757,8 +757,8 @@ <fixed-case>NL</fixed-case> Assistant: A Toolkit for Developing Natural Language: Applications - Deborah A.Dahl - Lewis M.Norton + Deborah A.Dahl + Lewis M.Norton AhmedBouzid LiLi 10.3115/974281.974301 @@ -768,8 +768,8 @@ Using <fixed-case>BBN</fixed-case> <fixed-case>VALAD</fixed-case>: Speech at the Logistics Anchor Desk - MadelineBates - RustyBobrow + MadelineBates + RustyBobrow 10.3115/974281.974303 35–35 A97-2020 @@ -777,7 +777,7 @@ A Spoken Language Interface to a Virtual Reality System (Video) - Stephanie S.Everett + Stephanie S.Everett KennethWauchope Manuel A.Pfirez 10.3115/974281.974304 diff --git a/data/xml/C00.xml b/data/xml/C00.xml index 07bbb909d1..50131a2852 100644 --- a/data/xml/C00.xml +++ b/data/xml/C00.xml @@ -29,7 +29,7 @@ Learning Word Clusters from Data Types PaoloAllegrini - SimonettaMontemagni + SimonettaMontemagni VitoPirrelli C00-1002 allegrini-etal-2000-learning @@ -44,7 +44,7 @@ Extended Models and Tools for High-performance Part-of-speech MasayukiAsahara - YujiMatsumoto + YujiMatsumoto C00-1004 asahara-matsumoto-2000-extended @@ -57,15 +57,15 @@ The Effects of Word Order and Segmentation on Translation Retrieval Performance - TimothyBaldwin + TimothyBaldwin HozumiTanaka C00-1006 baldwin-tanaka-2000-effects Exploiting a Probabilistic Hierarchical Model for Generation - SrinivasBangalore - OwenRambow + SrinivasBangalore + OwenRambow C00-1007 bangalore-rambow-2000-exploiting @@ -78,8 +78,8 @@ Combination of N-Grams and Stochastic Context-Free Grammars for Language Modeling - Jose-MiguelBenedi - Joan-AndreuSanchez + Jose-MiguelBenedi + Joan-AndreuSanchez C00-1009 benedi-sanchez-2000-combination @@ -97,16 +97,16 @@ The effects of analysing cohesion on document summarisation - Branimir K.Boguraev - Mary S.Neff + Branimir K.Boguraev + Mary S.Neff C00-1012 boguraev-neff-2000-effects Creating a Universal Networking Language Module within an Advanced <fixed-case>NLP</fixed-case> System - IgorBoguslavsky + IgorBoguslavsky NadezhdaFrid - LeonidIomdin + LeonidIomdin LeonidKreidlin IrinaSagalova VictorSizov @@ -128,14 +128,14 @@ Binding Constraints as Instructions of Binding Machines - AntonioBranco + AntonioBranco C00-1016 branco-2000-binding Probabilistic Parsing and Psychological Plausibility ThorstenBrants - MatthewCrocker + MatthewCrocker C00-1017 brants-crocker-2000-probabilistic @@ -147,7 +147,7 @@ Automated Generalization of Translation Examples - Ralf D.Brown + Ralf D.Brown C00-1019 brown-2000-automated @@ -160,8 +160,8 @@ Tagging of very large corpora: Topic-Focus Articulation EvaBuranova - EvaHajicova - PetrSgall + EvaHajicova + PetrSgall C00-1021 buranova-etal-2000-tagging @@ -174,7 +174,7 @@ Word Sense Disambiguation of Adjectives Using Probabilistic Networks GeraldChao - Michael G.Dyer + Michael G.Dyer C00-1023 chao-dyer-2000-word @@ -195,14 +195,14 @@ Automatic Semantic Classification for <fixed-case>C</fixed-case>hinese Unknown Compound Nouns - Keh-JiannChen - Chao-janChen + Keh-JiannChen + Chao-janChen C00-1026 chen-chen-2000-automatic Empirical Estimates of Adaptation: The chance of Two Noriegas is closer to p/2 than p2 - Kenneth W.Church + Kenneth W.Church C00-1027 church-2000-empirical @@ -216,7 +216,7 @@ A Class-based Probabilistic approach to Structural Disambiguation StephenClark - DavidWeir + DavidWeir C00-1029 clark-weir-2000-class @@ -224,14 +224,14 @@ Extracting the Names of Genes and Gene Products with a Hidden <fixed-case>M</fixed-case>arkov Model NigelCollier ChikashiNobata - Jun-ichiTsujii + Jun-ichiTsujii C00-1030 collier-etal-2000-extracting An Empirical Investigation of the Relation Between Discourse Structure and Co-Reference DanCristea - NancyIde + NancyIde DanielMarcu ValentinTablan C00-1031 @@ -239,20 +239,20 @@ Morphological Rule Induction for Terminology Acquisition - BeatriceDaille + BeatriceDaille C00-1032 daille-2000-morphological Authoring Multimedia Documents using <fixed-case>WYSIWYM</fixed-case> Editing - Keesvan Deemter + Keesvan Deemter RichardPower C00-1033 van-deemter-power-2000-authoring Theory Refinement and Natural Language Learning - HerveDejean + HerveDejean C00-1034 dejean-2000-theory @@ -265,7 +265,7 @@ <fixed-case>XML</fixed-case> and Multilingual Document Authoring: Convergent Trends MarcDymetman - VeronikaLux + VeronikaLux AarneRanta C00-1036 dymetman-etal-2000-xml @@ -279,13 +279,13 @@ Directional Constraint Evaluation in <fixed-case>O</fixed-case>ptimality <fixed-case>T</fixed-case>heory - JasonEisner + JasonEisner C00-1038 eisner-2000-directional Boosting Variant Recognition with Light Semantics - CecileFabre + CecileFabre ChristianJacquemin C00-1039 fabre-jacquemin-2000-boosting @@ -299,7 +299,7 @@ Deletions and their reconstruction in tectogrammatical syntactic tagging of very large corpora - EvaHajicová + EvaHajicová MarkétaCeplová C00-1041 hajicova-ceplova-2000-deletions @@ -314,30 +314,30 @@ Experiments with Open-Domain Textual Question Answering - Sanda M.Harabagiu - Marius A.Pasca - Steven J.Maiorano + Sanda M.Harabagiu + Marius A.Pasca + Steven J.Maiorano C00-1043 harabagiu-etal-2000-experiments Effects of Adjective Orientation and Gradability on Sentence Subjectivity VasileiosHatzivassiloglou - Janyce M.Wiebe + Janyce M.Wiebe C00-1044 hatzivassiloglou-wiebe-2000-effects Pronominalization revisited - RenateHenschel + RenateHenschel HuaCheng - MassimoPoesio + MassimoPoesio C00-1045 henschel-etal-2000-pronominalization Automatic Refinement of a <fixed-case>POS</fixed-case> Tagger Using a Reliable Parser and Plain Text Corpora - HidekiHirakawa + HidekiHirakawa KenjiOno YumikoYoshimura C00-1046 @@ -347,15 +347,15 @@ A Method of Measuring Term Representativeness - Baseline Method Using Co-occurrence Distribution TomHisamitsu YoshikiNiwa - Jun-ichiTsujii + Jun-ichiTsujii C00-1047 hisamitsu-etal-2000-method A Rule Induction Approach to Modeling Regional Pronunciation Variation - VeroniqueHoste + VeroniqueHoste StevenGillis - WalterDaelemans + WalterDaelemans C00-1048 hoste-etal-2000-rule @@ -371,7 +371,7 @@ YumiIchimura YoshimiSaito KazuhiroKimura - HidekiHirakawa + HidekiHirakawa C00-1050 ichimura-etal-2000-kana @@ -391,27 +391,27 @@ Deixis and Conjunction in Multimodal Systems - MichaelJohnston + MichaelJohnston C00-1053 johnston-2000-deixis Finite-state Multimodal Parsing and Understanding - MichaelJohnston - SrinivasBangalore + MichaelJohnston + SrinivasBangalore C00-1054 johnston-bangalore-2000-finite Toward a Scoring Function for Quality-Driven Machine Translation - Douglas A.Jones + Douglas A.Jones Gregory M.Rusk C00-1055 jones-rusk-2000-toward An <fixed-case>E</fixed-case>nglish to <fixed-case>K</fixed-case>orean Transliteration Model of Extended <fixed-case>M</fixed-case>arkov Window - Sung YoungJung + Sung YoungJung SungLimHong EunokPaek C00-1056 @@ -429,7 +429,7 @@ Automatic Thesaurus Generation through Multiple Filtering KyoKageura KeitaTsuji - Akiko N.Aizawa + Akiko N.Aizawa C00-1058 kageura-etal-2000-automatic @@ -447,21 +447,21 @@ HiroshiKanayama KentaroTorisawa YutakaMitsuishi - Jun’ichiTsujii + Jun’ichiTsujii C00-1060 kanayama-etal-2000-hybrid <fixed-case>E</fixed-case>nglish-to-<fixed-case>K</fixed-case>orean Transliteration using Multiple Unbounded Overlapping Phoneme Chunks In-HoKang - GilChangKim + GilChangKim C00-1061 kang-kim-2000-english <fixed-case>LFG</fixed-case> Generation Produces Context-free Languages - Ronald M.Kaplan - JurgenWedekind + Ronald M.Kaplan + JurgenWedekind C00-1062 kaplan-wedekind-2000-lfg @@ -490,7 +490,7 @@ Automatic Text Categorization by Unsupervised Learning YoungjoongKo - JungyunSeo + JungyunSeo C00-1066 ko-seo-2000-automatic @@ -510,11 +510,11 @@ Multilinguality in a Text Generation System For Three <fixed-case>S</fixed-case>lavic Languages - Geert-JanKruijff + Geert-JanKruijff ElkeTeich - JohnBateman - IvanaKruijff-Korbayova - HanaSkoumalova + JohnBateman + IvanaKruijff-Korbayova + HanaSkoumalova SergeSharoff LenaSokolova TonyHartley @@ -526,8 +526,8 @@ Lexicalized Hidden <fixed-case>M</fixed-case>arkov Models for Part-of-Speech Tagging Sang-ZooLee - Jun-ichiTsujii - Hae-ChangRim + Jun-ichiTsujii + Hae-ChangRim C00-1070 lee-etal-2000-lexicalized @@ -539,17 +539,17 @@ The Automated Acquisition of Topic Signatures for Text Summarization - Chin-YewLin - EduardHovy + Chin-YewLin + EduardHovy C00-1072 lin-hovy-2000-automated Automatic Optimization of Dialogue Management - Diane J.Litman - Michael S.Kearns + Diane J.Litman + Michael S.Kearns SatinderSingh - Marilyn A.Walker + Marilyn A.Walker C00-1073 litman-etal-2000-automatic @@ -565,7 +565,7 @@ Application of Analogical Modelling to Example Based Machine Translation ChristosMalavazosi - SteliosPiperidis + SteliosPiperidis C00-1075 malavazosi-piperidis-2000-application @@ -584,15 +584,15 @@ Chart-Based Transfer Rule Application in Machine Translation - AdamMeyers + AdamMeyers MichikoKosaka - RalphGrishman + RalphGrishman C00-1078 meyers-etal-2000-chart Representation and Recognition Method for Multi-Word Translation Units in <fixed-case>K</fixed-case>orean-to-<fixed-case>J</fixed-case>apanese <fixed-case>MT</fixed-case> System - KyonghiMoon + KyonghiMoon Jong-HyeokLee C00-1079 moon-lee-2000-representation @@ -624,7 +624,7 @@ Taking Account of the User’s View in 3<fixed-case>D</fixed-case> Multimodal Instruction Dialogue - Yukiko I.Nakano + Yukiko I.Nakano KenjiImamura HisashiOhara C00-1083 @@ -657,7 +657,7 @@ Informed Parsing for Coordination with <fixed-case>C</fixed-case>ombinatory <fixed-case>C</fixed-case>ategorial <fixed-case>G</fixed-case>rammar - Jong C.Park + Jong C.Park Hyung JoonCho C00-2086 park-cho-2000-informed @@ -730,12 +730,12 @@ Compiling Language Models from a Linguistically Motivated Unification Grammar - MannyRayner - Beth AnnHockey + MannyRayner + Beth AnnHockey FrankieJames Elizabeth OwenBratt - SharonGoldwater - Jean MarkGawron + SharonGoldwater + Jean MarkGawron C00-2097 rayner-etal-2000-compiling @@ -754,7 +754,7 @@ Automatic Extraction of Subcategorization Frames for <fixed-case>C</fixed-case>zech AnoopSarkar - DanielZeman + DanielZeman C00-2100 sarkar-zeman-2000-automatic @@ -781,14 +781,14 @@ Experiments in Automated Lexicon Building for Text Searching BarrySchiffman - Kathleen R.McKeown + Kathleen R.McKeown C00-2104 schiffman-mckeown-2000-experiments Robust <fixed-case>G</fixed-case>erman Noun Chunking With a Probabilistic Context-Free Grammar HelmutSchmid - SabineSchulte im Walde + SabineSchulte im Walde C00-2105 schmid-schulte-im-walde-2000-robust @@ -801,13 +801,13 @@ Realizing Expressions of Doubt in Collaborative Dialogue LeahSchroeder - SandraCarberry + SandraCarberry C00-2107 schroeder-carberry-2000-realizing Clustering Verbs Semantically According to their Alternation Behaviour - SabineSchulte im Walde + SabineSchulte im Walde C00-2108 schulte-im-walde-2000-clustering @@ -827,8 +827,8 @@ On <fixed-case>UNL</fixed-case> as the future “html of the linguistic content” & the reuse of existing <fixed-case>NLP</fixed-case> components in <fixed-case>UNL</fixed-case>-related applications with the example of a <fixed-case>UNL</fixed-case>-<fixed-case>F</fixed-case>rench deconverter - GillesSérasset - ChristianBoitet + GillesSérasset + ChristianBoitet C00-2111 serasset-boitet-2000-unl @@ -855,8 +855,8 @@ Extracting semantic clusters from the alignment of definitions - GerardoSierra - JohnMcNaught + GerardoSierra + JohnMcNaught C00-2115 sierra-mcnaught-2000-extracting @@ -894,7 +894,7 @@ Matching a tone-based and tune-based approach to <fixed-case>E</fixed-case>nglish intonation for concept-to-speech generation ElkeTeich - Catherine I.Watson + Catherine I.Watson CecilePereira C00-2120 teich-etal-2000-matching @@ -902,8 +902,8 @@ Automatic Extraction of Semantic Relations from Specialized Corpora AristomenisThanopoulos - NikosFakotakis - GeorgeKokkinakis + NikosFakotakis + GeorgeKokkinakis C00-2121 thanopoulos-etal-2000-automatic @@ -916,16 +916,16 @@ Word Re-ordering and <fixed-case>DP</fixed-case>-based Search in Statistical Machine Translation - ChristophTillmann - HermannNey + ChristophTillmann + HermannNey C00-2123 tillmann-ney-2000-word Applying System Combination to Base Noun Phrase Identification - Erik F.Tjong Kim Sang - WalterDaelemans - HerveDejean + Erik F.Tjong Kim Sang + WalterDaelemans + HerveDejean RobKoeling YuvalKrymolowski VasinPunyakanok @@ -951,7 +951,7 @@ Toward the “At-a-glance” Summary: Phrase-representation Summarization Method - YoshihiroUeda + YoshihiroUeda MamikoOka TakahiroKoyama TadanobuMiyauchi @@ -969,14 +969,14 @@ Multi-Topic Multi-Document Summarization MasaoUtiyama - KoitiHasida + KoitiHasida C00-2129 utiyama-hasida-2000-multi Corpus-based Development and Evaluation of a System for Processing Definite Descriptions - RenataVieira - MassimoPoesio + RenataVieira + MassimoPoesio C00-2130 vieira-poesio-2000-corpus @@ -997,14 +997,14 @@ Prosody and the Resolution of Pronominal Anaphora MariaWolters - Donna K.Byron + Donna K.Byron C00-2133 wolters-byron-2000-prosody Lexicalized Tree Automata-based Grammars for Translating Conversational Texts KiyoshiYamabana - ShinichiAndo + ShinichiAndo KiyomiMimura C00-2134 yamabana-etal-2000-lexicalized @@ -1012,14 +1012,14 @@ Acquisition of Phrase-level Bilingual Correspondence using Dependency Structure KaoruYamamoto - YujiMatsumoto + YujiMatsumoto C00-2135 yamamoto-matsumoto-2000-acquisition Automatic Acquisition of Domain Knowledge for Information Extraction RomanYangarber - RalphGrishman + RalphGrishman PasiTapanainen SiljaHuttunen C00-2136 @@ -1027,7 +1027,7 @@ More accurate tests for the statistical significance of result differences - AlexanderYeh + AlexanderYeh C00-2137 yeh-2000-accurate @@ -1041,14 +1041,14 @@ <fixed-case>ABL</fixed-case>: Alignment-Based Learning - Mennovan Zaanen + Mennovan Zaanen C00-2139 van-zaanen-2000-abl <fixed-case>DIASUMM</fixed-case>: Flexible Summarization of Spontaneous Dialogues in Unrestricted Domains KlausZechner - AlexWaibel + AlexWaibel C00-2140 zechner-waibel-2000-diasumm @@ -1062,13 +1062,13 @@ Rapid Development of Translation Tools: Application to <fixed-case>P</fixed-case>ersian and <fixed-case>T</fixed-case>urkish Jan W.Amtrup KarineMegerdoomian - RemiZajac + RemiZajac C00-2142 amtrup-etal-2000-rapid Dependency Treebank for <fixed-case>R</fixed-case>ussian: Concept, Tools, Types of Information - IgorBoguslavsky + IgorBoguslavsky SvetlanaGrigorieva NikolaiGrigoriev LeonidKreidlin @@ -1078,7 +1078,7 @@ Thistle and Interarbora - JoCalder + JoCalder C00-2144 calder-2000-thistle @@ -1090,25 +1090,25 @@ Structural disambiguation of morpho-syntactic categorial parsing for <fixed-case>K</fixed-case>orean - JeongwonCha - GeunbaeLee + JeongwonCha + GeunbaeLee C00-2146 cha-lee-2000-structural The Week at a Glance - Cross-language Cross-document Information Extraction and Translation - JimCowie + JimCowie YevgenyLudovik HugoMolina-Salgado - SergeiNirenburg + SergeiNirenburg C00-2147 cowie-etal-2000-week Integrating compositional semantics into a verb lexicon - Hoa TrangDang - KarinKipper - MarthaPalmer + Hoa TrangDang + KarinKipper + MarthaPalmer C00-2148 dang-etal-2000-integrating @@ -1121,15 +1121,15 @@ Language Identification in Unknown Signals - JohnElliott - EricAtwell + JohnElliott + EricAtwell BillWhyte C00-2150 elliott-etal-2000-language An Experiment On Incremental Analysis Using Robust Parsing Techniques - KilianFoth + KilianFoth WolfgangMenzel Horia F.Pop IngoSchroder @@ -1138,7 +1138,7 @@ An Integrated Architecture for Example-Based Machine Translation - AlexanderFranz + AlexanderFranz KeikoHoriguchi LeiDuan DorisEcker @@ -1157,14 +1157,14 @@ <fixed-case>W</fixed-case>eb<fixed-case>DIPLOMAT</fixed-case>: A Web-Based Interactive Machine Translation System ChristopherHogan - RobertFrederking + RobertFrederking C00-2154 hogan-frederking-2000-webdiplomat An <fixed-case>HPSG</fixed-case>-to-<fixed-case>CFG</fixed-case> Approximation of <fixed-case>J</fixed-case>apanese - BerndKiefer - Hans-UlrichKrieger + BerndKiefer + Hans-UlrichKrieger MelanieSiegel C00-2155 kiefer-etal-2000-hpsg @@ -1172,7 +1172,7 @@ Decision-Tree based Error Correction for Statistical Phrase Break Prediction in <fixed-case>K</fixed-case>orean ByeongchangKim - GeunbaeLee + GeunbaeLee C00-2156 kim-lee-2000-decision @@ -1192,7 +1192,7 @@ A Bootstrapping Method for Extracting Bilingual Text Pairs - HiroshiMasuichi + HiroshiMasuichi RaymondFlournoy StefanKaufmann StanleyPeters @@ -1201,7 +1201,7 @@ Producing More Readable Extracts by Revising Them - ManabuOkumura + ManabuOkumura C00-2160 okumura-2000-producing @@ -1214,15 +1214,15 @@ Improving <fixed-case>SMT</fixed-case> quality with morpho-syntactic analysis - SonjaNießen - HermannNey + SonjaNießen + HermannNey C00-2162 niessen-ney-2000-improving A Comparison of Alignment Models for Statistical Machine Translation - Franz JosefOch - HermannNey + Franz JosefOch + HermannNey C00-2163 och-ney-2000-comparison @@ -1237,8 +1237,8 @@ <fixed-case>KCAT</fixed-case>: A <fixed-case>K</fixed-case>orean Corpus Annotating Tool Minimizing Human Intervention Won-HeRyu Jin-DongKim - Hae-ChangRim - Heui-SeokLim + Hae-ChangRim + Heui-SeokLim C00-2165 ryu-etal-2000-kcat @@ -1258,7 +1258,7 @@ Acquisition of a Language Computational Model for <fixed-case>NLP</fixed-case> SvetlanaSheremetyeva - SergeiNirenburg + SergeiNirenburg C00-2168 sheremetyeva-nirenburg-2000-acquisition @@ -1286,7 +1286,7 @@ Incorporating Metaphonemes in a Multilingual Lexicon CaroleTiberius - LynneCahill + LynneCahill C00-2171 tiberius-cahill-2000-incorporating @@ -1301,7 +1301,7 @@ <fixed-case>XMLT</fixed-case>rans: a <fixed-case>J</fixed-case>ava-based <fixed-case>XML</fixed-case> Transformation Language for Structured Data DerekWalker DominiquePetitpierre - SusanArmstrong + SusanArmstrong C00-2173 walker-etal-2000-xmltrans @@ -1309,13 +1309,13 @@ <fixed-case>C</fixed-case>hinese Generation in a Spoken Dialogue Translation System HuaWu TaiyiHuang - ChengqingZong + ChengqingZong C00-2174 wu-etal-2000-chinese Comparing two trainable grammatical relations finders - AlexanderYeh + AlexanderYeh C00-2175 yeh-2000-comparing diff --git a/data/xml/C02.xml b/data/xml/C02.xml index 6be28ef8d8..cf632bff82 100644 --- a/data/xml/C02.xml +++ b/data/xml/C02.xml @@ -20,16 +20,16 @@ A Cheap and Fast Way to Build Useful Translation Lexicons - DanTufis + DanTufis C02-1002 tufis-2002-cheap Learning <fixed-case>C</fixed-case>hinese Bracketing Knowledge Based on a Bilingual Language Model - Yajuan + Yajuan ShengLi - TiejunZhao - MuyunYang + TiejunZhao + MuyunYang C02-1003 lu-etal-2002-learning @@ -78,7 +78,7 @@ WeiWang MingZhou Jin-XiaHuang - Chang-NingHuang + Chang-NingHuang C02-1010 wang-etal-2002-structure @@ -95,22 +95,22 @@ JianfengGao LeiZhang MingZhou - ChangningHuang + ChangningHuang C02-1012 sun-etal-2002-chinese High Precision Extraction of Grammatical Relations - JohnCarroll - TedBriscoe + JohnCarroll + TedBriscoe C02-1013 carroll-briscoe-2002-high Semiautomatic Labelling of Semantic Features - ArantzaDíaz de Ilarraza - AingeruMayor - KepaSarasola + ArantzaDíaz de Ilarraza + AingeruMayor + KepaSarasola C02-1014 diaz-de-ilarraza-etal-2002-semiautomatic @@ -128,8 +128,8 @@ Corpus-based Generation of Numeral Classifier using Phrase Alignment - MichaelPaul - EiichiroSumita + MichaelPaul + EiichiroSumita SeiichiYamamoto C02-1017 paul-etal-2002-corpus @@ -144,7 +144,7 @@ The Grammatical Function Analysis between <fixed-case>K</fixed-case>orean Adnoun Clause and Noun Phrase by Using Support Vector Machines SongwookLee Tae-YeoubJang - JungyunSeo + JungyunSeo C02-1019 lee-etal-2002-grammatical @@ -157,8 +157,8 @@ (Semi-)Automatic Detection of Errors in <fixed-case>P</fixed-case>o<fixed-case>S</fixed-case>-Tagged Corpora - PavelKvĕtoň - KarelOliva + PavelKvĕtoň + KarelOliva C02-1021 kveton-oliva-2002-semi @@ -189,20 +189,20 @@ The Effectiveness of Dictionary and Web-Based Answer Reranking - Chin-YewLin + Chin-YewLin C02-1026 lin-2002-effectiveness Shallow Language Processing Architecture for <fixed-case>B</fixed-case>ulgarian - HristoTanev - RuslanMitkov + HristoTanev + RuslanMitkov C02-1027 tanev-mitkov-2002-shallow Parsing Mildly Context-Sensitive Languages with Thread Automata - ÉricVillemonte de la Clergerie + ÉricVillemonte de la Clergerie C02-1028 villemonte-de-la-clergerie-2002-parsing @@ -214,24 +214,24 @@ The Simple Core and the Complex Periphery of Natural Language - a Formal and a Computational View - PetrSgall - AlenaBŏhmová + PetrSgall + AlenaBŏhmová C02-1030 sgall-bohmova-2002-simple The <fixed-case>C</fixed-case>hinese Aspect System and its Semantic Interpretation GuowenYang - John A.Bateman + John A.Bateman C02-1031 yang-bateman-2002-chinese Improving Alignment Quality in Statistical Machine Translation Using Context-dependent Maximum Entropy Models - Ismael GarcíaVarea - Franz J.Och - HermannNey - FranciscoCasacuberta + Ismael GarcíaVarea + Franz J.Och + HermannNey + FranciscoCasacuberta C02-1032 varea-etal-2002-improving @@ -250,17 +250,17 @@ Semantics-based Representation for Multimodal Interpretation in Conversational Systems - JoyceChai + JoyceChai C02-1035 chai-2002-semantics <fixed-case>E</fixed-case>xtraposition: A Case Study in <fixed-case>G</fixed-case>erman Sentence Realization MichaelGamon - EricRingger + EricRingger ZhuZhang - RobertMoore - SimonCorston-Oliver + RobertMoore + SimonCorston-Oliver C02-1036 gamon-etal-2002-extraposition @@ -279,7 +279,7 @@ Instance Based Learning with Automatic Feature Selection Applied to Word Sense Disambiguation - RadaMihalcea + RadaMihalcea C02-1039 mihalcea-2002-instance @@ -308,9 +308,9 @@ Using Knowledge to Facilitate Factoid Answer Pinpointing - EduardHovy + EduardHovy UlfHermjakob - Chin-YewLin + Chin-YewLin DeepakRavichandran C02-1042 hovy-etal-2002-using @@ -324,20 +324,20 @@ Considerations of Linking <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et with <fixed-case>MRD</fixed-case> ChanghuaYang - Sue J.Ker + Sue J.Ker C02-1044 yang-ker-2002-considerations A Method of Cluster-Based Indexing of Textual Data - AkikoAizawa + AkikoAizawa C02-1045 aizawa-2002-method Translation Selection through Source Word Sense Disambiguation and Target Word Selection Hyun AhLee - Gil ChangKim + Gil ChangKim C02-1046 lee-kim-2002-translation @@ -358,15 +358,15 @@ Unknown Word Extraction for <fixed-case>C</fixed-case>hinese Documents - Keh-JiannChen - Wei-YunMa + Keh-JiannChen + Wei-YunMa C02-1049 chen-ma-2002-unknown Bidirectional Decoding for Statistical Machine Translation TaroWatanabe - EiichiroSumita + EiichiroSumita C02-1050 watanabe-sumita-2002-bidirectional @@ -391,7 +391,7 @@ TsutomuHirao HidekiIsozaki EisakuMaeda - YujiMatsumoto + YujiMatsumoto C02-1053 hirao-etal-2002-extracting @@ -406,7 +406,7 @@ Covering Ambiguity Resolution in <fixed-case>C</fixed-case>hinese Word Segmentation Based on Contextual Information XiaoLuo MaosongSun - Benjamin K.Tsou + Benjamin K.Tsou C02-1055 luo-etal-2002-covering @@ -419,9 +419,9 @@ An Automatic Evaluation Method for Localization Oriented Lexicalised <fixed-case>EBMT</fixed-case> System - JianminYao + JianminYao MingZhou - TiejunZhao + TiejunZhao HaoYu ShengLi C02-1057 @@ -504,20 +504,20 @@ Learning Grammars for Different Parsing Tasks by Partition Search - AnjaBelz + AnjaBelz C02-1068 belz-2002-learning Effective Structural Inference for Large <fixed-case>XML</fixed-case> Documents JasonSankey - Raymond K.Wong + Raymond K.Wong C02-1069 sankey-wong-2002-effective Inducing Information Extraction Systems for New Languages via Cross-language Projection - EllenRiloff + EllenRiloff CharlesSchafer DavidYarowsky C02-1070 @@ -525,7 +525,7 @@ Integrating Shallow Linguistic Processing into a Unification-based <fixed-case>S</fixed-case>panish Grammar - MontserratMarimon + MontserratMarimon C02-1071 marimon-2002-integrating @@ -540,7 +540,7 @@ Meta-evaluation of Summaries in a Cross-lingual Environment using Content-based Metrics HoracioSaggion - DragomirRadev + DragomirRadev SimoneTeufel WaiLam C02-1073 @@ -549,14 +549,14 @@ Text Categorization using Feature Projections YoungjoongKo - JungyunSeo + JungyunSeo C02-1074 ko-seo-2002-text A Novel Disambiguation Method for Unification-Based Grammars Using Probabilistic Context-Free Approximations - BerndKiefer - Hans-UlrichKrieger + BerndKiefer + Hans-UlrichKrieger DetlefPrescher C02-1075 kiefer-etal-2002-novel @@ -565,7 +565,7 @@ Using Language and Translation Models to Select the Best among Outputs from Multiple <fixed-case>MT</fixed-case> Systems YasuhiroAkiba TaroWatanabe - EiichiroSumita + EiichiroSumita C02-1076 akiba-etal-2002-using @@ -587,14 +587,14 @@ Best Analysis Selection in Inflectional Languages AlešHorák - PavelSmrž + PavelSmrž C02-1079 horak-smrz-2002-best An Agent-based Approach to <fixed-case>C</fixed-case>hinese Named Entity Recognition ShirenYe - Tat-SengChua + Tat-SengChua JiminLiu C02-1080 ye-etal-2002-agent @@ -608,7 +608,7 @@ Topic Detection Based on Dialogue History TakayukiNakata - ShinichiAndo + ShinichiAndo AkitoshiOkumura C02-1082 nakata-etal-2002-topic @@ -617,8 +617,8 @@ A Methodology for Terminology-based Knowledge Acquisition and Integration HidekiMima SophiaAnaniadou - GoranNenadic - Jun-IchiTsujii + GoranNenadic + Jun-IchiTsujii C02-1083 mima-etal-2002-methodology @@ -639,9 +639,9 @@ Implicit Ambiguity Resolution Using Incremental Clustering in <fixed-case>K</fixed-case>orean-to-<fixed-case>E</fixed-case>nglish Cross-Language Information Retrieval - Kyung-SoonLee + Kyung-SoonLee KyoKageura - Key-SunChoi + Key-SunChoi C02-1086 lee-etal-2002-implicit @@ -656,14 +656,14 @@ Unsupervised Named Entity Classification Models and their Ensembles Jae-HoKim In-HoKang - Key-SunChoi + Key-SunChoi C02-1088 kim-etal-2002-unsupervised Applying an <fixed-case>NVEF</fixed-case> Word-Pair Identifier to the <fixed-case>C</fixed-case>hinese Syllable-to-Word Conversion Problem Jia-LinTsai - Wen-LianHsu + Wen-LianHsu C02-1089 tsai-hsu-2002-applying @@ -676,7 +676,7 @@ Using Syntactic Analysis to Increase Efficiency in Visualizing Text Collections - JamesHenderson + JamesHenderson PaolaMerlo IvanPetroff GeroldSchneider @@ -712,16 +712,16 @@ Wordform- and Class-based Prediction of the Components of <fixed-case>G</fixed-case>erman Nominal Compounds in an <fixed-case>AAC</fixed-case> System - MarcoBaroni + MarcoBaroni JohannesMatiasek - HaraldTrost + HaraldTrost C02-1096 baroni-etal-2002-wordform Word Sense Disambiguation using Static and Dynamic Sense Vectors - Jong-HoonOh - Key-SunChoi + Jong-HoonOh + Key-SunChoi C02-1097 oh-choi-2002-word @@ -735,8 +735,8 @@ An <fixed-case>E</fixed-case>nglish-<fixed-case>K</fixed-case>orean Transliteration Model Using Pronunciation and Contextual Rules - Jong-HoonOh - Key-SunChoi + Jong-HoonOh + Key-SunChoi C02-1099 oh-choi-2002-english @@ -744,20 +744,20 @@ Lenient Default Unification for Robust Processing within Unification Based Grammar Formalisms TakashiNinomiya YusukeMiyao - Jun-IchiTsujii + Jun-IchiTsujii C02-1100 ninomiya-etal-2002-lenient
Detecting Errors in Corpora Using Support Vector Machines TetsujiNakagawa - YujiMatsumoto + YujiMatsumoto C02-1101 nakagawa-matsumoto-2002-detecting Learning How to Answer Questions Using Trivia Games - Gideon S.Mann + Gideon S.Mann C02-1102 mann-2002-learning @@ -765,7 +765,7 @@ Automatic Text Categorization using the Importance of Sentences YoungjoongKo JinwooPark - JungyunSeo + JungyunSeo C02-1103 ko-etal-2002-automatic
@@ -773,14 +773,14 @@ From Shallow to Deep Parsing Using Constraint Satisfaction Jean-MarieBalfourier PhilippeBlache - Tristanvan Rullen + Tristanvan Rullen C02-1104 balfourier-etal-2002-shallow
From Trees to Predicate-argument Structures MariaLiakata - StephenPulman + StephenPulman C02-1105 liakata-pulman-2002-trees @@ -831,9 +831,9 @@
Syntactic Features for High Precision Word Sense Disambiguation - DavidMartínez - EnekoAgirre - LluísMàrquez + DavidMartínez + EnekoAgirre + LluísMàrquez C02-1112 martinez-etal-2002-syntactic @@ -854,8 +854,8 @@
A Maximum Entropy-based Word Sense Disambiguation System - ArmandoSuárez - ManuelPalomar + ArmandoSuárez + ManuelPalomar C02-1115 suarez-palomar-2002-maximum @@ -868,7 +868,7 @@
Extension of <fixed-case>Z</fixed-case>ipf’s Law to Words and Phrases - Le QuanHa + Le QuanHa E. I.Sicilia-Garcia JiMing F. J.Smith @@ -877,7 +877,7 @@ Can Subcategorization Help a Statistical Dependency Parser? - DanielZeman + DanielZeman C02-1118 zeman-2002-subcategorization @@ -911,7 +911,7 @@ Natural Language Interpretations for Heterogeneous Database Access HodongLee - Jong C.Park + Jong C.Park C02-1123 lee-park-2002-natural @@ -932,14 +932,14 @@ Recovering Latent Information in Treebanks DavidChiang - Daniel M.Bikel + Daniel M.Bikel C02-1126 chiang-bikel-2002-recovering Location Normalization for Information Extraction - HuifengLi - Rohini K.Srihari + HuifengLi + Rohini K.Srihari ChengNiu WeiLi C02-1127 @@ -953,22 +953,22 @@ Decision Trees as Explicit Domain Term Definitions - RobertoBasili - Maria TeresaPazienza - Fabio MassimoZanzotto + RobertoBasili + Maria TeresaPazienza + Fabio MassimoZanzotto C02-1129 basili-etal-2002-decision Fine Grained Classification of Named Entities MichaelFleischman - EduardHovy + EduardHovy C02-1130 fleischman-hovy-2002-fine Annotating Topological Fields and Chunks - and Revising <fixed-case>POS</fixed-case> Tags at the Same Time - Frank HenrikMüller + Frank HenrikMüller TylmanUle C02-1131 muller-ule-2002-annotating @@ -988,7 +988,7 @@ Bootstrapping Bilingual Data using Consensus Translation for a Multilingual Instant Messaging System - SrinivasBangalore + SrinivasBangalore VanessaMurdock GiuseppeRiccardi C02-1134 @@ -996,7 +996,7 @@ Non-Sentential Utterances: Grammar and Dialogue Dynamics in Corpus Annotation - RaquelFernández + RaquelFernández JonathanGinzburg C02-1135 fernandez-ginzburg-2002-non @@ -1013,30 +1013,30 @@ A New Probabilistic Model for Title Generation RongJin - Alexander G.Hauptmann + Alexander G.Hauptmann C02-1137 jin-hauptmann-2002-new Towards Automatic Generation of Natural Language Generation Systems JohnChen - SrinivasBangalore - OwenRambow - Marilyn A.Walker + SrinivasBangalore + OwenRambow + Marilyn A.Walker C02-1138 chen-etal-2002-towards Identifying Anaphoric and Non-Anaphoric Noun Phrases to Improve Coreference Resolution VincentNg - ClaireCardie + ClaireCardie C02-1139 ng-cardie-2002-identifying Bringing the Dictionary to the User: The <fixed-case>FOKS</fixed-case> System SlavenBilac - TimothyBaldwin + TimothyBaldwin HozumiTanaka C02-1140 bilac-etal-2002-bringing @@ -1044,7 +1044,7 @@ A Complete Integrated <fixed-case>NLG</fixed-case> System Using <fixed-case>AI</fixed-case> and <fixed-case>NLU</fixed-case> Tools LaurenceDanlos - AdilEl Ghali + AdilEl Ghali C02-1141 danlos-el-ghali-2002-complete @@ -1052,15 +1052,15 @@ Automatic Glossary Extraction: Beyond Terminology Identification YoungjaPark Roy J.Byrd - Branimir K.Boguraev + Branimir K.Boguraev C02-1142 park-etal-2002-automatic Simple Features for <fixed-case>C</fixed-case>hinese Word Sense Disambiguation - Hoa TrangDang + Hoa TrangDang Ching-yiChia - MarthaPalmer + MarthaPalmer Fu-DongChiou C02-1143 dang-etal-2002-simple @@ -1076,7 +1076,7 @@ Building a Large-Scale Annotated <fixed-case>C</fixed-case>hinese Corpus NianwenXue Fu-DongChiou - MarthaPalmer + MarthaPalmer C02-1145 xue-etal-2002-building @@ -1097,7 +1097,7 @@ Investigating the Relationship between Word Segmentation Performance and Retrieval Performance in <fixed-case>C</fixed-case>hinese <fixed-case>IR</fixed-case> FuchunPeng - XiangjiHuang + XiangjiHuang DaleSchuurmans NickCercone C02-1148 @@ -1105,7 +1105,7 @@ Entering Text with a Four-Button Device - KumikoTanaka-Ishii + KumikoTanaka-Ishii YusukeInutsuka MasatoTakeichi C02-1149 @@ -1121,7 +1121,7 @@ Probabilistic Reasoning for Entity & Relation Recognition DanRoth - Wen-tauYih + Wen-tauYih C02-1151 roth-yih-2002-probabilistic @@ -1130,13 +1130,13 @@ KazunoriKomatani TatsuyaKawahara RyosukeIto - Hiroshi G.Okuno + Hiroshi G.Okuno C02-1152 komatani-etal-2002-efficient Generating the <fixed-case>XTAG</fixed-case> <fixed-case>E</fixed-case>nglish Grammar Using Metarules - Carlos A.Prolo + Carlos A.Prolo C02-1153 prolo-2002-generating @@ -1144,7 +1144,7 @@ Unsupervised Learning of Generalized Names RomanYangarber WinstonLin - RalphGrishman + RalphGrishman C02-1154 yangarber-etal-2002-unsupervised @@ -1158,8 +1158,8 @@ Putting Frames in Perspective NancyChang - SriniNarayanan - Miriam R.L.Petruck + SriniNarayanan + Miriam R.L.Petruck C02-1156 chang-etal-2002-putting @@ -1171,8 +1171,8 @@
Study of Practical Effectiveness for Machine Translation Using Recursive Chain-link-type Learning - HiroshiEchizen-ya - KenjiAraki + HiroshiEchizen-ya + KenjiAraki YoshioMomouchi KojiTochinai C02-1158 @@ -1224,7 +1224,7 @@ Language Model Adaptation with Additional Text Generated by Machine Translation HideharuNakajima - HirofumiYamamoto + HirofumiYamamoto TaroWatanabe C02-1164 nakajima-etal-2002-language @@ -1233,21 +1233,21 @@ Complexity of Event Structure in <fixed-case>IE</fixed-case> Scenarios SiljaHuttunen RomanYangarber - RalphGrishman + RalphGrishman C02-1165 huttunen-etal-2002-complexity An Approach Based on Multilingual Thesauri and Model Combination for Bilingual Lexicon Extraction - HervéDéjean - ÉricGaussier + HervéDéjean + ÉricGaussier FatihaSadat C02-1166 dejean-etal-2002-approach Lexical Chains for Question Answering - DanMoldovan + DanMoldovan AdrianNovischi C02-1167 moldovan-novischi-2002-lexical @@ -1255,21 +1255,21 @@ Maximum Entropy Models for Word Sense Disambiguation GeraldChao - Michael G.Dyer + Michael G.Dyer C02-1168 chao-dyer-2002-maximum Open-Domain Voice-Activated Question Answering - SandaHarabagiu - DanMoldovan + SandaHarabagiu + DanMoldovan JoePicone C02-1169 harabagiu-etal-2002-open A Pattern-based Analyzer for <fixed-case>F</fixed-case>rench in the Context of Spoken Language Translation: First Prototype and Evaluation - HervéBlanchon + HervéBlanchon C02-1170 blanchon-2002-pattern @@ -1301,7 +1301,7 @@ Searching the Web by Voice - AlexanderFranz + AlexanderFranz BrianMilch C02-2003 franz-milch-2002-searching @@ -1323,7 +1323,7 @@ An Annotation System for Enhancing Quality of Natural Language Processing HideoWatanabe KatashiNagao - Michael C.McCord + Michael C.McCord ArendseBernth C02-2006 watanabe-etal-2002-annotation @@ -1352,7 +1352,7 @@ The Performance of a Grammar Checker with Deviant Language Input - Janne BondiJohannessen + Janne BondiJohannessen KristinHagen PiaLane C02-2010 @@ -1362,7 +1362,7 @@ Semantic Case Role Detection for Information Extraction RikDe Busser RoxanaAngheluta - Marie-FrancineMoens + Marie-FrancineMoens C02-2011 de-busser-etal-2002-semantic @@ -1377,22 +1377,22 @@ <fixed-case>SOAT</fixed-case>: A Semi-Automatic Domain Ontology Acquisition Tool from <fixed-case>C</fixed-case>hinese Corpus Shih-HungWu - Wen-LianHsu + Wen-LianHsu C02-2013 wu-hsu-2002-soat Recognition Assistance - Treating Errors in Texts Acquired from Various Recognition Processes - GáborPrószéky + GáborPrószéky MátyásNaszódi - BalázsKis + BalázsKis C02-2014 proszeky-etal-2002-recognition Context-Sensitive Electronic Dictionaries - GáborPrószéky - BalázsKis + GáborPrószéky + BalázsKis C02-2015 proszeky-kis-2002-context @@ -1406,13 +1406,13 @@ Robust Interpretation of User Requests for Text Retrieval in a Multimodal Environment AlexandraKlein EstelaPuig-Waldmüller - HaraldTrost + HaraldTrost C02-2017 klein-etal-2002-robust An <fixed-case>XML</fixed-case>-based Document Suite - DietmarRösner + DietmarRösner ManuelaKunze C02-2018 rosner-kunze-2002-xml @@ -1430,22 +1430,22 @@ Looking for Candidate Translational Equivalents in Specialized, Comparable Corpora Yun-ChuangChiao - PierreZweigenbaum + PierreZweigenbaum C02-2020 chiao-zweigenbaum-2002-looking Reasoning in Metaphor Understanding: The <fixed-case>ATT</fixed-case>-Meta Approach and System - JohnBarnden - SheilaGlasbey - MarkLee - AlanWallington + JohnBarnden + SheilaGlasbey + MarkLee + AlanWallington C02-2021 barnden-etal-2002-reasoning Cross Linguistic Phoneme Correspondences - LynneCahill + LynneCahill CaroleTiberius C02-2022 cahill-tiberius-2002-cross @@ -1456,7 +1456,7 @@ NathanCarr GregKamei MikyungKim - Michael J.Pan + Michael J.Pan ChrisSalvador YasuyoSawaki C02-2023 @@ -1466,7 +1466,7 @@ An Indexing Scheme for Typed Feature Structures TakashiNinomiya TakakiMakino - Jun-IchiTsujii + Jun-IchiTsujii C02-2024 ninomiya-etal-2002-indexing @@ -1474,20 +1474,20 @@ The <fixed-case>L</fixed-case>in<fixed-case>GO</fixed-case> Redwoods Treebank: Motivation and Preliminary Applications StephanOepen KristinaToutanova - StuartShieber - ChristopherManning - DanFlickinger + StuartShieber + ChristopherManning + DanFlickinger ThorstenBrants C02-2025 oepen-etal-2002-lingo Creating a Finite-State Parser with Application Semantics - OwenRambow - SrinivasBangalore + OwenRambow + SrinivasBangalore TahirButt AlexisNasr - RichardSproat + RichardSproat C02-2026 rambow-etal-2002-creating @@ -1501,7 +1501,7 @@ <fixed-case>C</fixed-case>hinese Syntactic Parsing Based on Extended <fixed-case>GLR</fixed-case> Parsing Algorithm with <fixed-case>PCFG</fixed-case>* YanZhang BoXu - ChengqingZong + ChengqingZong C02-2028 zhang-etal-2002-chinese
diff --git a/data/xml/C04.xml b/data/xml/C04.xml index 248ce810d9..e8b8b878ae 100644 --- a/data/xml/C04.xml +++ b/data/xml/C04.xml @@ -35,7 +35,7 @@
Discriminative Hidden <fixed-case>M</fixed-case>arkov Modeling with Long State Dependence using a k<fixed-case>NN</fixed-case> Ensemble - GuoDongZhou + GuoDongZhou 22–28 C04-1004 zhou-2004-discriminative @@ -52,7 +52,7 @@ Improved Word Alignment Using a Symmetric Lexicon Model RichardZens EvgenyMatusov - HermannNey + HermannNey 36–42 C04-1006 zens-etal-2004-improved @@ -113,7 +113,7 @@ Modeling of Long Distance Context Dependency - GuoDongZhou + GuoDongZhou 92–98 C04-1014 zhou-2004-modeling @@ -123,7 +123,7 @@ KenjiImamura HideoOkuma TaroWatanabe - EiichiroSumita + EiichiroSumita 99–105 C04-1015 imamura-etal-2004-example @@ -132,7 +132,7 @@ Extending <fixed-case>MT</fixed-case> evaluation tools with translation complexity metrics BogdanBabych DebbieElliott - AnthonyHartley + AnthonyHartley 106–112 C04-1016 babych-etal-2004-extending @@ -140,25 +140,25 @@ Splitting Input Sentence for Machine Translation Using Language Model with Sentence Similarity TakaoDoi - EiichiroSumita + EiichiroSumita 113–119 C04-1017 doi-sumita-2004-splitting Playing the Telephone Game: Determining the Hierarchical Structure of Perspective and Speech Expressions - EricBreck - ClaireCardie + EricBreck + ClaireCardie 120–126 C04-1018 breck-cardie-2004-playing The Queen’s Agents: Using Collaborating Object-Based Dialogue Agents in the Queen’s Communicator - IanO’Neill - PhilipHanna + IanO’Neill + PhilipHanna XingkunLiu - MichaelMcTear + MichaelMcTear 127–133 C04-1019 oneill-etal-2004-queens @@ -194,7 +194,7 @@ A Support System for Revising Titles to Stimulate the Lay Reader’s Interest in Technical Achievements YasukoSenda YasusiSinohara - ManabuOkumura + ManabuOkumura 155–161 C04-1023 senda-etal-2004-support @@ -209,7 +209,7 @@ A Grammar Formalism and Parser for Linearization-based <fixed-case>HPSG</fixed-case> Michael W.Daniels - W. DetmarMeurers + W. DetmarMeurers 169–175 C04-1025 daniels-meurers-2004-grammar @@ -229,14 +229,14 @@ Learning theories from text MariaLiakata - StephenPulman + StephenPulman 183–190 C04-1027 liakata-pulman-2004-learning Generalizing Dimensionality in <fixed-case>C</fixed-case>ombinatory <fixed-case>C</fixed-case>ategorial <fixed-case>G</fixed-case>rammar - Geert-Jan M.Kruijff + Geert-Jan M.Kruijff JasonBaldridge 191–197 C04-1028 @@ -254,16 +254,16 @@ Reordering Constraints for Phrase-Based Statistical Machine Translation RichardZens - HermannNey + HermannNey TaroWatanabe - EiichiroSumita + EiichiroSumita 205–211 C04-1030 zens-etal-2004-reordering Word to word alignment strategies - JörgTiedemann + JörgTiedemann 212–218 C04-1031 tiedemann-2004-word @@ -272,7 +272,7 @@ Symmetric Word Alignments for Statistical Machine Translation EvgenyMatusov RichardZens - HermannNey + HermannNey 219–225 C04-1032 matusov-etal-2004-symmetric @@ -281,8 +281,8 @@ An <fixed-case>NP</fixed-case>-Cluster Based Approach to Coreference Resolution XiaofengYang JianSu - GuoDongZhou - Chew LimTan + GuoDongZhou + Chew LimTan 226–232 C04-1033 yang-etal-2004-np @@ -296,7 +296,7 @@ Classifying Ellipsis in Dialogue: A Machine Learning Approach - RaquelFernández + RaquelFernández JonathanGinzburg ShalomLappin 240–246 @@ -305,7 +305,7 @@ Feature Vector Quality and Distributional Similarity - MaayanGeffet + MaayanGeffet IdoDagan 247–253 C04-1036 @@ -328,7 +328,7 @@ Annotating Grammatical Functions for <fixed-case>G</fixed-case>erman Using Finite-State Cascades - Frank HenrikMüller + Frank HenrikMüller 268–274 C04-1039 muller-2004-annotating @@ -345,16 +345,16 @@ The Importance of Supertagging for Wide-Coverage <fixed-case>CCG</fixed-case> Parsing StephenClark - James R.Curran + James R.Curran 282–288 C04-1041 clark-curran-2004-importance Condition of Projectivity in the Underlying Dependency Structures - KaterinaVeselá - JiriHavelka - EvaHajicová + KaterinaVeselá + JiriHavelka + EvaHajicová 289–295 C04-1042 vesela-etal-2004-condition @@ -369,7 +369,7 @@ Polarization and abstraction of grammatical formalisms as methods for lexical disambiguation - GuillaumeBonfante + GuillaumeBonfante BrunoGuillaume GuyPerrier 303–309 @@ -378,8 +378,8 @@ Improving Word Alignment Quality using Morpho-syntactic Information - HermannNey - MajaPopovic + HermannNey + MajaPopovic 310–314 C04-1045 ney-popovic-2004-improving @@ -390,7 +390,7 @@ ErinFitzgerald GeorgeFoster SimonaGandrabur - CyrilGoutte + CyrilGoutte AlexKulesza AlbertoSanchis NicolaUeffing @@ -401,10 +401,10 @@ Using a Mixture of N-Best Lists from Multiple <fixed-case>MT</fixed-case> Systems in Rank-Sum-Based Confidence Measure for <fixed-case>MT</fixed-case> Outputs YasuhiroAkiba - EiichiroSumita + EiichiroSumita HiromiNakaiwa SeiichiYamamoto - Hiroshi G.Okuno + Hiroshi G.Okuno 322–328 C04-1047 akiba-etal-2004-using @@ -413,7 +413,7 @@ Generating Discourse Structures for Written Text HuongLe Thanh GeethaAbeysinghe - ChristianHuyck + ChristianHuyck 329–335 C04-1048 le-thanh-etal-2004-generating @@ -421,7 +421,7 @@ Talking robots with <fixed-case>L</fixed-case>ego <fixed-case>M</fixed-case>ind<fixed-case>S</fixed-case>torms AlexanderKoller - Geert-JanKruijff + Geert-JanKruijff 336–342 C04-1049 koller-kruijff-2004-talking @@ -436,7 +436,7 @@ Unsupervised Construction of Large Paraphrase Corpora: Exploiting Massively Parallel News Sources - BillDolan + BillDolan ChrisQuirk ChrisBrockett 350–356 @@ -445,7 +445,7 @@ Toward Unsupervised Whole-Corpus Tagging - DayneFreitag + DayneFreitag 357–363 C04-1052 freitag-2004-toward @@ -454,7 +454,7 @@ Evaluating Cross-Language Annotation Transfer in the <fixed-case>M</fixed-case>ulti<fixed-case>S</fixed-case>em<fixed-case>C</fixed-case>or Corpus LuisaBentivogli PamelaForner - EmanuelePianta + EmanuelePianta 364–370 C04-1053 bentivogli-etal-2004-evaluating @@ -469,8 +469,8 @@ Skeletons in the parser: Using a shallow parser to improve deep parsing - MarySwift - JamesAllen + MarySwift + JamesAllen DanielGildea 383–389 C04-1055 @@ -504,7 +504,7 @@ Language Model Adaptation for Statistical Machine Translation via Structured Query Models BingZhao MatthiasEck - StephanVogel + StephanVogel 411–417 C04-1059 zhao-etal-2004-language @@ -563,7 +563,7 @@ <fixed-case>J</fixed-case>apanese Unknown Word Identification by Character-based Chunking MasayukiAsahara - YujiMatsumoto + YujiMatsumoto 459–465 C04-1066 asahara-matsumoto-2004-japanese @@ -585,8 +585,8 @@ Document Re-ranking Based on Automatically Acquired Key Terms in <fixed-case>C</fixed-case>hinese Information Retrieval - LingpengYang - DonghongJi + LingpengYang + DonghongJi LiTang 480–486 C04-1069 @@ -611,8 +611,8 @@ <fixed-case>ORANGE</fixed-case>: a Method for Evaluating Automatic Evaluation Metrics for Machine Translation - Chin-YewLin - Franz JosefOch + Chin-YewLin + Franz JosefOch 501–507 C04-1072 lin-och-2004-orange @@ -620,7 +620,7 @@ Improving a Statistical <fixed-case>MT</fixed-case> System with Automatically Learned Rewrite Patterns FeiXia - MichaelMcCord + MichaelMcCord 508–514 C04-1073 xia-mccord-2004-improving @@ -634,7 +634,7 @@ A High-Performance Coreference Resolution System using a Constraint-based Multi-Agent Strategy - GuoDongZhou + GuoDongZhou JianSu 522–528 C04-1075 @@ -652,7 +652,7 @@ Corpus and Evaluation Measures for Multiple Document Summarization with Multiple Sources TsutomuHirao TakahiroFukusima - ManabuOkumura + ManabuOkumura ChikashiNobata HidetsuguNanba 535–541 @@ -662,7 +662,7 @@ Cascading Use of Soft and Hard Matching Pattern Rules for Weakly Supervised Information Extraction JingXiao - Tat-SengChua + Tat-SengChua HangCui 542–548 C04-1078 @@ -671,7 +671,7 @@ Generating Overview Summaries of Ongoing Email Thread Discussions StephenWan - KathyMcKeown + KathyMcKeown 549–555 C04-1079 wan-mckeown-2004-generating @@ -679,7 +679,7 @@ Part-of-Speech Tagging in Context MicheleBanko - Robert C.Moore + Robert C.Moore 556–561 C04-1080 banko-moore-2004-part @@ -687,7 +687,7 @@ <fixed-case>C</fixed-case>hinese Segmentation and New Word Detection using Conditional Random Fields FuchunPeng - FangfangFeng + FangfangFeng AndrewMcCallum 562–568 C04-1081 @@ -696,7 +696,7 @@ Tagging with Hidden <fixed-case>M</fixed-case>arkov Models Using Ambiguous Tags AlexisNasr - FrédéricBechét + FrédéricBechét AlexandraVolanschi 569–575 C04-1082 @@ -705,14 +705,14 @@ Browsing Help for a Faster Retrieval EricCrestan - Claudede Loupy + Claudede Loupy 576–582 C04-1083 crestan-de-loupy-2004-browsing Incremental Topic Representations - SandaHarabagiu + SandaHarabagiu 583–589 C04-1084 harabagiu-2004-incremental @@ -734,9 +734,9 @@ Enhancing automatic term recognition through recognition of variation - GoranNenadic + GoranNenadic SophiaAnaniadou - JohnMcNaught + JohnMcNaught 604–610 C04-1087 nenadic-etal-2004-enhancing @@ -765,8 +765,8 @@ An Algorithmic Framework for Solving the Decoding Problem in Statistical Machine Translation - RaghavendraUdupa - Tanveer A.Faruquie + RaghavendraUdupa + Tanveer A.Faruquie Hemanta K.Maji 631–637 C04-1091 @@ -789,10 +789,10 @@ Using syntactic information to extract relevant terms for multi-document summarization - EnriqueAmigó + EnriqueAmigó JulioGonzalo - VíctorPeinado - AnselmoPeñas + VíctorPeinado + AnselmoPeñas FelisaVerdejo 652–658 C04-1094 @@ -818,12 +818,12 @@ Linguistically Informed Statistical Models of Constituent Structure for Ordering in Sentence Realization - EricRingger + EricRingger MichaelGamon - Robert C.Moore - DavidRojas + Robert C.Moore + DavidRojas MartineSmets - SimonCorston-Oliver + SimonCorston-Oliver 673–679 C04-1097 ringger-etal-2004-linguistically @@ -833,7 +833,7 @@ Jun-linZhang LeSun Wei-minQu - Yu-fangSun + Yu-fangSun 680–685 C04-1098 zhang-etal-2004-trigger @@ -847,8 +847,8 @@ Question Answering Based on Semantic Structures - SriniNarayanan - SandaHarabagiu + SriniNarayanan + SandaHarabagiu 693–701 C04-1100 narayanan-harabagiu-2004-question @@ -857,7 +857,7 @@ Combining Linguistic Features with Weighted <fixed-case>B</fixed-case>ayesian Classifier for Temporal Reference Processing GuihongCao WenjieLi - Kam-FaiWong + Kam-FaiWong ChunfaYuan 702–708 C04-1101 @@ -884,8 +884,8 @@ Subcategorization Acquisition and Evaluation for <fixed-case>C</fixed-case>hinese Verbs XiwuHan - TiejunZhao - HaoliangQi + TiejunZhao + HaoliangQi HaoYu 723–728 C04-1104 @@ -907,7 +907,7 @@ Probabilistic Sentence Reduction Using Support Vector Machines - Minh LeNguyen + Minh LeNguyen AkiraShimazu SusumuHoriguchi Bao TuHo @@ -918,7 +918,7 @@ Improving Chronological Sentence Ordering by Precedence Relation - NaoakiOkazaki + NaoakiOkazaki YutakaMatsuo MitsuruIshizuka 750–756 @@ -928,8 +928,8 @@ Discriminative Slot Detection Using Kernel Methods ShubinZhao - AdamMeyers - RalphGrishman + AdamMeyers + RalphGrishman 757–763 C04-1109 zhao-etal-2004-discriminative @@ -946,7 +946,7 @@ Towards Terascale Semantic Acquisition PatrickPantel DeepakRavichandran - EduardHovy + EduardHovy 771–777 C04-1111 pantel-etal-2004-towards @@ -968,8 +968,8 @@ Improving Statistical Machine Translation in the Medical Domain using the Unified Medical Language system MatthiasEck - StephanVogel - AlexWaibel + StephanVogel + AlexWaibel 792–798 C04-1114 eck-etal-2004-improving @@ -993,8 +993,8 @@ Cognate Mapping - A Heuristic Strategy for the Semi-Supervised Acquisition of a <fixed-case>S</fixed-case>panish Lexicon from a <fixed-case>P</fixed-case>ortuguese Seed Lexicon - StefanSchulz - KornelMarkó + StefanSchulz + KornelMarkó EduardoSbrissia PercyNohama UdoHahn @@ -1016,7 +1016,7 @@ Back Transliteration from <fixed-case>J</fixed-case>apanese to <fixed-case>E</fixed-case>nglish using Target <fixed-case>E</fixed-case>nglish Context IsaoGoto - NaotoKato + NaotoKato TerumasaEhara HidekiTanaka 827–833 @@ -1055,7 +1055,7 @@ Detecting Multiword Verbs in the <fixed-case>E</fixed-case>nglish Sublanguage of <fixed-case>MEDLINE</fixed-case> Abstracts ChunXiao - DietmarRösner + DietmarRösner 861–867 C04-1124 xiao-rosner-2004-detecting @@ -1079,7 +1079,7 @@ Cross-lingual Information Extraction System Evaluation KiyoshiSudo SatoshiSekine - RalphGrishman + RalphGrishman 882–888 C04-1127 sudo-etal-2004-cross @@ -1087,7 +1087,7 @@ Detection of Question-Answer Pairs in Email Conversations LokeshShrestha - KathleenMcKeown + KathleenMcKeown 889–895 C04-1128 shrestha-mckeown-2004-detection @@ -1096,7 +1096,7 @@ Syntactic Simplification for Improving Content Selection in Multi-Document Summarization AdvaithSiddharthan AniNenkova - KathleenMcKeown + KathleenMcKeown 896–902 C04-1129 siddharthan-etal-2004-syntactic @@ -1104,7 +1104,7 @@ Trajectory Based Word Sense Disambiguation XiaojieWang - YujiMatsumoto + YujiMatsumoto 903–909 C04-1130 wang-matsumoto-2004-trajectory @@ -1126,7 +1126,7 @@ Automated Induction of Sense in Context - JamesPustejovsky + JamesPustejovsky PatrickHanks AnnaRumshisky 924–930 @@ -1159,7 +1159,7 @@ Identification of Confusable Drug Names: A New Approach and Evaluation Methodology GrzegorzKondrak - BonnieDorr + BonnieDorr 952–958 C04-1137 kondrak-dorr-2004-identification @@ -1177,7 +1177,7 @@ Linguistic profiling of texts for the purpose of language verification - Hansvan Halteren + Hansvan Halteren NellekeOostdijk 966–972 C04-1139 @@ -1212,7 +1212,7 @@ <fixed-case>FASIL</fixed-case> Email Summarisation System AngeloDalli YunqingXia - YorickWilks + YorickWilks 994–1000 C04-1143 dalli-etal-2004-fasil @@ -1229,9 +1229,9 @@ Morpheme-based Derivation of Bipolar Semantic Orientation of <fixed-case>C</fixed-case>hinese Words Raymond W.M.Yuen Terence Y.W.Chan - Tom B.Y.Lai - O.Y.Kwong - Benjamin K.Y.Tsou + Tom B.Y.Lai + O.Y.Kwong + Benjamin K.Y.Tsou 1008–1014 C04-1145 yuen-etal-2004-morpheme @@ -1239,23 +1239,23 @@ Characterising Measures of Lexical Distributional Similarity JulieWeeds - DavidWeir - DianaMcCarthy + DavidWeir + DianaMcCarthy 1015–1021 C04-1146 weeds-etal-2004-characterising Fast Computation of Lexical Affinity Models - EgidioTerra - Charles L. A.Clarke + EgidioTerra + Charles L. A.Clarke 1022–1028 C04-1147 terra-clarke-2004-fast Online Generic Editing of Heterogeneous Dictionary Entries in Papillon Project - MathieuMangeot + MathieuMangeot DavidThevenin 1029–1035 C04-1148 @@ -1266,8 +1266,8 @@ TakehitoUtsuro KoheiHino MitsuhiroKida - SeiichiNakagawa - SatoshiSato + SeiichiNakagawa + SatoshiSato 1036–1042 C04-1149 utsuro-etal-2004-integrating @@ -1275,7 +1275,7 @@ Quantitative and Qualitative Evaluation of the <fixed-case>O</fixed-case>nto<fixed-case>L</fixed-case>earn Ontology Learning System RobertoNavigli - PaolaVelardi + PaolaVelardi AlessandroCucchiarelli FrancescaNeri 1043–1050 @@ -1302,10 +1302,10 @@ Learning <fixed-case>G</fixed-case>reek Verb Complements: Addressing the Class Imbalance - KatiaKermanidis + KatiaKermanidis ManolisMaragoudakis - NikosFakotakis - GeorgeKokkinakis + NikosFakotakis + GeorgeKokkinakis 1065–1071 C04-1153 kermanidis-etal-2004-learning @@ -1322,16 +1322,16 @@ A Flexible Example Annotation Schema: Translation Corresponding Tree Representation FaiWong - Dong ChengHu - Yu HangMao - Ming ChuiDong + Dong ChengHu + Yu HangMao + Ming ChuiDong 1079–1085 C04-1155 wong-etal-2004-flexible Knowledge Intensive Word Alignment with <fixed-case>KNOWA</fixed-case> - EmanuelePianta + EmanuelePianta LuisaBentivogli 1086–1092 C04-1156 @@ -1349,7 +1349,7 @@ KazunoriKomatani TeruhisaMisu TatsuyaKawahara - Hiroshi G.Okuno + Hiroshi G.Okuno 1100–1106 C04-1158 komatani-etal-2004-efficient @@ -1373,7 +1373,7 @@ Acquisition of Semantic Classes for Adjectives from Distributional Evidence - GemmaBoleda + GemmaBoleda ToniBadia EloiBatlle 1119–1125 @@ -1382,7 +1382,7 @@ <fixed-case>P</fixed-case>age<fixed-case>R</fixed-case>ank on Semantic Networks, with Application to Word Sense Disambiguation - RadaMihalcea + RadaMihalcea PaulTarau ElizabethFiga 1126–1132 @@ -1391,9 +1391,9 @@ A Semantic-based Approach to Interoperabiltity of Classification Hierarchies: Evaluation of Linguistic Techniques - BernardoMagnini - ManuelaSperanza - ChristianGirardi + BernardoMagnini + ManuelaSperanza + ChristianGirardi 1133–1139 C04-1163 magnini-etal-2004-semantic @@ -1403,7 +1403,7 @@ Jui-FengYeh Chung-HsienWu Ming-JunChen - Liang-ChihYu + Liang-ChihYu 1140–1146 C04-1164 yeh-etal-2004-automated @@ -1438,10 +1438,10 @@ A Unified Approach in Speech-to-Speech Translation: Integrating Features of Speech recognition and Machine Translation RuiqiangZhang GenichiroKikui - HirofumiYamamoto - FrankSoong + HirofumiYamamoto + FrankSoong TaroWatanabe - Wai KitLo + Wai KitLo 1168–1174 C04-1168 zhang-etal-2004-unified @@ -1455,8 +1455,8 @@ Lexicalisation strategies in cooperative question-answering systems - FarahBenamara - PatrickSaint-Dizier + FarahBenamara + PatrickSaint-Dizier 1179–1185 C04-1170 benamara-saint-dizier-2004-lexicalisation @@ -1512,19 +1512,19 @@ Automatic Identification of Infrequent Word Senses - DianaMcCarthy + DianaMcCarthy RobKoeling JulieWeeds - JohnCarroll + JohnCarroll 1220–1226 C04-1177 mccarthy-etal-2004-automatic Semiautomatic Extension of <fixed-case>C</fixed-case>ore<fixed-case>N</fixed-case>et using a Bootstrapping Mechanism on Corpus-based Co-occurrences - ChrisBiemann - Sa-ImShin - Key-SunChoi + ChrisBiemann + Sa-ImShin + Key-SunChoi 1227–1232 C04-1178 biemann-etal-2004-semiautomatic @@ -1533,7 +1533,7 @@ <fixed-case>F</fixed-case>rame<fixed-case>N</fixed-case>et-based Semantic Parsing using Maximum Entropy Models NamheeKwon MichaelFleischman - EduardHovy + EduardHovy 1233–1239 C04-1179 kwon-etal-2004-framenet @@ -1542,8 +1542,8 @@ Wide-Coverage Semantic Representations from a <fixed-case>CCG</fixed-case> Parser JohanBos StephenClark - MarkSteedman - James R.Curran + MarkSteedman + James R.Curran JuliaHockenmaier 1240–1246 C04-1180 @@ -1563,14 +1563,14 @@ AndreasHagen NicholasRomanyshyn SeanMartin - BryanPellom + BryanPellom 1254–1260 C04-1182 lee-etal-2004-analysis Combining clues for lexical level aligning using the Null hypothesis approach - OlivierKraif + OlivierKraif BoxingChen 1261–1264 C04-1183 @@ -1578,8 +1578,8 @@ Comparing Semantically Related Sentences: The Case of Paraphrase Versus Subsumption - JahnaOtterbacher - DragomirRadev + JahnaOtterbacher + DragomirRadev 1265–1268 C04-1184 otterbacher-radev-2004-comparing @@ -1593,7 +1593,7 @@ Semantic Role Labeling Using Dependency Trees - KadriHacioglu + KadriHacioglu 1273–1276 C04-1186 hacioglu-2004-semantic @@ -1601,7 +1601,7 @@ Web-based List Question Answering HuiYang - Tat-SengChua + Tat-SengChua 1277–1283 C04-1187 yang-chua-2004-web @@ -1610,20 +1610,20 @@ Information Extraction for Question Answering: Improving Recall Through Syntactic Patterns ValentinJijkoun JoriMur - Maartende Rijke + Maartende Rijke 1284–1290 C04-1188 jijkoun-etal-2004-information <fixed-case>HITIQA</fixed-case>: Towards Analytical Question Answering - SharonSmall - TomekStrzalkowski + SharonSmall + TomekStrzalkowski TingLiu SeanRyan RobertSalkin NobuyukiShimizu - PaulKantor + PaulKantor DianeKelly RobertRittman NinaWacholder @@ -1644,7 +1644,7 @@ Inferring parts of speech for lexical mappings via the <fixed-case>C</fixed-case>yc <fixed-case>KB</fixed-case> TomO’Hara StefanoBertolo - MichaelWitbrock + MichaelWitbrock BjørnAldag JonCurtis KathyPanton @@ -1656,9 +1656,9 @@ Fine-Grained Word Sense Disambiguation Based on Parallel Corpora, Word Alignment, Word Clustering and Aligned Wordnets - DanTufis + DanTufis RaduIon - NancyIde + NancyIde 1312–1318 C04-1192 tufis-etal-2004-fine @@ -1684,7 +1684,7 @@ Creative Discovery in Lexical Ontologies TonyVeale NunoSeco - JerHayes + JerHayes 1333–1338 C04-1195 veale-etal-2004-creative @@ -1702,7 +1702,7 @@ Semantic Role Labeling Via Integer Linear Programming Inference VasinPunyakanok DanRoth - Wen-tauYih + Wen-tauYih DavZimak 1346–1352 C04-1197 @@ -1727,7 +1727,7 @@ Determining the Sentiment of Opinions Soo-MinKim - EduardHovy + EduardHovy 1367–1373 C04-1200 kim-hovy-2004-determining @@ -1736,8 +1736,8 @@ A Language Independent Method for Question Classification ThamarSolorio ManuelPérez-Coutiño - ManuelMontes-y-Gómez - LuisVillaseñor-Pineda + ManuelMontes-y-Gómez + LuisVillaseñor-Pineda AurelioLópez-López 1374–1380 C04-1201 @@ -1758,7 +1758,7 @@ A Natural Language Processing Infrastructure for <fixed-case>T</fixed-case>urkish A. C. CemSay - OzlemCetinoglu + OzlemCetinoglu SenizDemir FaithÖgun 1385–1391 @@ -1768,7 +1768,7 @@ Deep Linguistic Analysis for the Accurate Identification of Predicate-Argument Relations YusukeMiyao - Jun’ichiTsujii + Jun’ichiTsujii 1392–1398 C04-1204 miyao-tsujii-2004-deep diff --git a/data/xml/C08.xml b/data/xml/C08.xml index 0d8406e8cd..2d15526e30 100644 --- a/data/xml/C08.xml +++ b/data/xml/C08.xml @@ -3,7 +3,7 @@ Proceedings of the 22nd International Conference on Computational Linguistics (Coling 2008) - DoniaScott + DoniaScott HansUszkoreit Coling 2008 Organizing Committee
Manchester, UK
@@ -19,7 +19,7 @@ Two-Phased Event Relation Acquisition: Coupling the Relation-Oriented and Argument-Oriented Approaches ShuyaAbe KentaroInui - YujiMatsumoto + YujiMatsumoto 1–8 C08-1001 abe-etal-2008-two @@ -35,8 +35,8 @@
On Robustness and Domain Adaptation using <fixed-case>SVD</fixed-case> for Word Sense Disambiguation - EnekoAgirre - OierLopez de Lacalle + EnekoAgirre + OierLopez de Lacalle 17–24 C08-1003 agirre-lopez-de-lacalle-2008-robustness @@ -50,17 +50,17 @@ Improving Alignments for Better Confusion Networks for Combining Machine Translation Systems - Necip FazilAyan + Necip FazilAyan JingZheng - WenWang + WenWang 33–40 C08-1005 ayan-etal-2008-improving Verification and Implementation of Language-Based Deception Indicators in Civil and Criminal Narratives - JoanBachenko - EileenFitzpatrick + JoanBachenko + EileenFitzpatrick MichaelSchonwetter 41–48 C08-1006 @@ -68,8 +68,8 @@ Enhancing Multilingual Latent Semantic Analysis with Term Alignment Information - Brett W.Bader - Peter A.Chew + Brett W.Bader + Peter A.Chew 49–56 C08-1007 bader-chew-2008-enhancing @@ -116,7 +116,7 @@ <fixed-case>P</fixed-case>ara<fixed-case>M</fixed-case>etric: An Automatic Evaluation Metric for Paraphrasing ChrisCallison-Burch - TrevorCohn + TrevorCohn MirellaLapata 97–104 C08-1013 @@ -126,7 +126,7 @@ Regenerating Hypotheses for Statistical Machine Translation BoxingChen MinZhang - AitiAw + AitiAw HaizhouLi 105–112 C08-1014 @@ -146,15 +146,15 @@ BinChen XiaofengYang JianSu - Chew LimTan + Chew LimTan 121–128 C08-1016 chen-etal-2008-anaphora Latent Morpho-Semantic Analysis: Multilingual Information Retrieval with Character N-Grams and Mutual Information - Peter A.Chew - Brett W.Bader + Peter A.Chew + Brett W.Bader AhmedAbdelali 129–136 C08-1017 @@ -162,7 +162,7 @@ Sentence Compression Beyond Word Deletion - TrevorCohn + TrevorCohn MirellaLapata 137–144 C08-1018 @@ -170,8 +170,8 @@ Mind the Gap: Dangers of Divorcing Evaluations of Summary Content from Linguistic Quality - John M.Conroy - Hoa TrangDang + John M.Conroy + Hoa TrangDang 145–152 C08-1019 conroy-dang-2008-mind @@ -179,10 +179,10 @@ Hybrid Processing for Grammar and Style Checking BertholdCrysmann - NuriaBertomeu + NuriaBertomeu PeterAdolphs - DanielFlickinger - TinaKlüwer + DanielFlickinger + TinaKlüwer 153–160 C08-1020 crysmann-etal-2008-hybrid @@ -190,7 +190,7 @@ <fixed-case>K</fixed-case>now<fixed-case>N</fixed-case>et: Building a Large Net of Knowledge from the Web MontseCuadros - GermanRigau + GermanRigau 161–168 C08-1021 cuadros-rigau-2008-knownet @@ -198,7 +198,7 @@ A Classifier-Based Approach to Preposition and Determiner Error Correction in <fixed-case>L</fixed-case>2 <fixed-case>E</fixed-case>nglish RacheleDe Felice - Stephen G.Pulman + Stephen G.Pulman 169–176 C08-1022 de-felice-pulman-2008-classifier @@ -206,8 +206,8 @@ Pedagogically Useful Extractive Summaries for Science Education Sebastiande la Chica - FaisalAhmad - James H.Martin + FaisalAhmad + James H.Martin TamaraSumner 177–184 C08-1023 @@ -217,7 +217,7 @@ Looking for Trouble StijnDe Saeger KentaroTorisawa - Jun’ichiKazama + Jun’ichiKazama 185–192 C08-1024 de-saeger-etal-2008-looking @@ -253,7 +253,7 @@ A Probabilistic Model for Measuring Grammaticality and Similarity of Automatically Generated Paraphrases of Predicate Phrases AtsushiFujita - SatoshiSato + SatoshiSato 225–232 C08-1029 fujita-sato-2008-probabilistic @@ -285,15 +285,15 @@ Statistical Anaphora Resolution in Biomedical Texts CarolineGasperin - TedBriscoe + TedBriscoe 257–264 C08-1033 gasperin-briscoe-2008-statistical Instance-Based Ontology Population Exploiting Named-Entity Substitution - ClaudioGiuliano - AlfioGliozzo + ClaudioGiuliano + AlfioGliozzo 265–272 C08-1034 giuliano-gliozzo-2008-instance @@ -301,7 +301,7 @@ Measuring Topic Homogeneity and its Application to Dictionary-Based Word Sense Disambiguation AnnGledson - JohnKeane + JohnKeane 273–280 C08-1035 gledson-keane-2008-measuring @@ -309,7 +309,7 @@ Using Web-Search Results to Measure Word-Group Similarity AnnGledson - JohnKeane + JohnKeane 281–288 C08-1036 gledson-keane-2008-using @@ -323,8 +323,8 @@ Dependency-Based N-Gram Models for General Purpose Sentence Realisation - YuqingGuo - Josefvan Genabith + YuqingGuo + Josefvan Genabith HaifengWang 297–304 C08-1038 @@ -332,7 +332,7 @@ Homotopy-Based Semi-Supervised Hidden <fixed-case>M</fixed-case>arkov Models for Sequence Labeling - GholamrezaHaffari + GholamrezaHaffari AnoopSarkar 305–312 C08-1039 @@ -340,13 +340,13 @@ Tracking the Dynamic Evolution of Participants Salience in a Discussion - AhmedHassan + AhmedHassan AnthonyFader Michael H.Crespin - Kevin M.Quinn + Kevin M.Quinn Burt L.Monroe MichaelColaresi - Dragomir R.Radev + Dragomir R.Radev 313–320 C08-1040 hassan-etal-2008-tracking @@ -399,7 +399,7 @@ <fixed-case>J</fixed-case>apanese Dependency Parsing Using a Tournament Model MasakazuIwatate MasayukiAsahara - YujiMatsumoto + YujiMatsumoto 361–368 C08-1046 iwatate-etal-2008-japanese @@ -471,8 +471,8 @@ Generation of Referring Expressions: Managing Structural Ambiguities Imtiaz HussainKhan - Keesvan Deemter - GraemeRitchie + Keesvan Deemter + GraemeRitchie 433–440 C08-1055 khan-etal-2008-generation @@ -481,7 +481,7 @@ Normalizing <fixed-case>SMS</fixed-case>: are Two Metaphors Better than One ? CatherineKobus FrançoisYvon - GéraldineDamnati + GéraldineDamnati 441–448 C08-1056 kobus-etal-2008-normalizing @@ -497,8 +497,8 @@ Extending a Thesaurus with Words from Pan-<fixed-case>C</fixed-case>hinese Sources - Oi YeeKwong - Benjamin K.Tsou + Oi YeeKwong + Benjamin K.Tsou 457–464 C08-1058 kwong-tsou-2008-extending @@ -506,7 +506,7 @@ Stopping Criteria for Active Learning of Named Entity Recognition FlorianLaws - HinrichSchütze + HinrichSchütze 465–472 C08-1059 laws-schutze-2008-stopping @@ -535,7 +535,7 @@ <fixed-case>PNR</fixed-case>2: Ranking Sentences with Positive and Negative Reinforcement for Query-Oriented Update Summarization WenjieLi FuruWei - QinLu + QinLu YanxiangHe 489–496 C08-1062 @@ -546,7 +546,7 @@ YuanjieLiu ShashaLi YunboCao - Chin-YewLin + Chin-YewLin DingyiHan YongYu 497–504 @@ -563,7 +563,7 @@ Authorship Attribution and Verification with Many Authors and Limited Data KimLuyckx - WalterDaelemans + WalterDaelemans 513–520 C08-1065 luyckx-daelemans-2008-authorship @@ -571,7 +571,7 @@ Modeling Semantic Containment and Exclusion in Natural Language Inference BillMacCartney - Christopher D.Manning + Christopher D.Manning 521–528 C08-1066 maccartney-manning-2008-modeling @@ -580,16 +580,16 @@ Linguistically-Based Sub-Sentential Alignment for Terminology Extraction from a Bilingual Automotive Corpus LieveMacken ElsLefever - VeroniqueHoste + VeroniqueHoste 529–536 C08-1067 macken-etal-2008-linguistically <fixed-case>H</fixed-case>indi <fixed-case>U</fixed-case>rdu Machine Transliteration using Finite-State Transducers - M G AbbasMalik - ChristianBoitet - PushpakBhattacharyya + M G AbbasMalik + ChristianBoitet + PushpakBhattacharyya 537–544 C08-1068 malik-etal-2008-hindi @@ -597,14 +597,14 @@ Comparative Parser Performance Analysis across Grammar Frameworks through Automatic Tree Conversion using Synchronous Grammars TakuyaMatsuzaki - Jun’ichiTsujii + Jun’ichiTsujii 545–552 C08-1069 matsuzaki-tsujii-2008-comparative What’s the Date? High Accuracy Interpretation of Weekday Names - PawelMazur + PawelMazur RobertDale 553–560 C08-1070 @@ -621,7 +621,7 @@ A Syntactic Time-Series Model for Parsing Fluent and Disfluent Speech - TimMiller + TimMiller WilliamSchuler 569–576 C08-1072 @@ -630,7 +630,7 @@ Applying Discourse Analysis and Data Mining Methods to Spoken <fixed-case>OSCE</fixed-case> Assessments MeladelMistica - TimothyBaldwin + TimothyBaldwin MarisaCordella SimonMusgrave 577–584 @@ -639,7 +639,7 @@ Random Restarts in Minimum Error Rate Training for Statistical Machine Translation - Robert C.Moore + Robert C.Moore ChrisQuirk 585–592 C08-1074 @@ -666,9 +666,9 @@ Detecting Multiple Facets of an Event using Graph-Based Unsupervised Methods - PradeepMuthukrishnan + PradeepMuthukrishnan JoshuaGerrish - Dragomir R.Radev + Dragomir R.Radev 609–616 C08-1077 muthukrishnan-etal-2008-detecting @@ -693,10 +693,10 @@ Computer Aided Correction and Extension of a Syntactic Wide-Coverage Lexicon LionelNicolas - BenoîtSagot + BenoîtSagot Miguel A.Molinero JacquesFarré - Éricde la Clergerie + Éricde la Clergerie 633–640 C08-1080 nicolas-etal-2008-computer @@ -704,8 +704,8 @@ Parsing the <fixed-case>S</fixed-case>yn<fixed-case>T</fixed-case>ag<fixed-case>R</fixed-case>us Treebank of <fixed-case>R</fixed-case>ussian JoakimNivre - Igor M.Boguslavsky - Leonid L.Iomdin + Igor M.Boguslavsky + Leonid L.Iomdin 641–648 C08-1081 nivre-etal-2008-parsing @@ -720,16 +720,16 @@ A Discriminative Alignment Model for Abbreviation Recognition - NaoakiOkazaki + NaoakiOkazaki SophiaAnaniadou - Jun’ichiTsujii + Jun’ichiTsujii 657–664 C08-1083 okazaki-etal-2008-discriminative Semantic Role Assignment for Event Nominalisations by Leveraging Verbal Data - SebastianPadó + SebastianPadó MarcoPennacchiotti CarolineSporleder 665–672 @@ -755,17 +755,17 @@ Scientific Paper Summarization Using Citation Summary Networks VahedQazvinian - Dragomir R.Radev + Dragomir R.Radev 689–696 C08-1087 qazvinian-radev-2008-scientific Exploiting Constituent Dependencies for Tree Kernel-Based Semantic Relation Extraction - LonghuaQian - GuodongZhou + LonghuaQian + GuodongZhou FangKong - QiaomingZhu + QiaomingZhu PeideQian 697–704 C08-1088 @@ -782,9 +782,9 @@ Almost Flat Functional Semantics for Speech Translation - MannyRayner - PierretteBouillon - Beth AnnHockey + MannyRayner + PierretteBouillon + Beth AnnHockey YukieNakao 713–720 C08-1090 @@ -809,7 +809,7 @@ Translating Queries into Snippets for Improved Query Expansion StefanRiezler YiLiu - AlexanderVasserman + AlexanderVasserman 737–744 C08-1093 riezler-etal-2008-translating @@ -825,7 +825,7 @@ Shift-Reduce Dependency <fixed-case>DAG</fixed-case> Parsing KenjiSagae - Jun’ichiTsujii + Jun’ichiTsujii 753–760 C08-1095 sagae-tsujii-2008-shift @@ -835,7 +835,7 @@ YutakaSasaki PaulThompson PhilipCotter - JohnMcNaught + JohnMcNaught SophiaAnaniadou 761–768 C08-1096 @@ -861,8 +861,8 @@ Toward a Psycholinguistically-Motivated Model of Language Processing WilliamSchuler - SamirAbdelRahman - TimMiller + SamirAbdelRahman + TimMiller LaneSchwartz 785–792 C08-1099 @@ -882,7 +882,7 @@ Discourse Level Opinion Interpretation SwapnaSomasundaran - JanyceWiebe + JanyceWiebe JosefRuppenhofer 801–808 C08-1101 @@ -892,7 +892,7 @@ Acquiring Sense Tagged Examples using Relevance Feedback MarkStevenson YikunGuo - RobertGaizauskas + RobertGaizauskas 809–816 C08-1102 stevenson-etal-2008-acquiring @@ -900,7 +900,7 @@ Topic Identification for Fine-Grained Opinion Analysis VeselinStoyanov - ClaireCardie + ClaireCardie 817–824 C08-1103 stoyanov-cardie-2008-topic @@ -915,7 +915,7 @@ Prediction of Maximal Projection for Semantic Role Labeling - WeiweiSun + WeiweiSun ZhifangSui HaifengWang 833–840 @@ -928,7 +928,7 @@ Louis-PhilippeMorency DaisukeOkanohara YoshimasaTsuruoka - Jun’ichiTsujii + Jun’ichiTsujii 841–848 C08-1106 sun-etal-2008-modeling @@ -944,15 +944,15 @@ Experiments with Reasoning for Temporal Relations between Events MartaTatu - MunirathnamSrikanth + MunirathnamSrikanth 857–864 C08-1108 tatu-srikanth-2008-experiments The Ups and Downs of Preposition Error Detection in <fixed-case>ESL</fixed-case> Writing - Joel R.Tetreault - MartinChodorow + Joel R.Tetreault + MartinChodorow 865–872 C08-1109 tetreault-chodorow-2008-ups @@ -960,7 +960,7 @@ A Framework for Identifying Textual Redundancy KapilThadani - KathleenMcKeown + KathleenMcKeown 873–880 C08-1110 thadani-mckeown-2008-framework @@ -969,7 +969,7 @@ Emotion Classification Using Massive Examples Extracted from the Web RyokoTokuhisa KentaroInui - YujiMatsumoto + YujiMatsumoto 881–888 C08-1111 tokuhisa-etal-2008-emotion @@ -977,7 +977,7 @@ Relational-Realizational Parsing ReutTsarfaty - KhalilSima’an + KhalilSima’an 889–896 C08-1112 tsarfaty-simaan-2008-relational @@ -988,14 +988,14 @@ HisashiKashima ShinsukeMori HirokiOda - YujiMatsumoto + YujiMatsumoto 897–904 C08-1113 tsuboi-etal-2008-training A Uniform Approach to Analogies, Synonyms, Antonyms, and Associations - PeterTurney + PeterTurney 905–912 C08-1114 turney-2008-uniform @@ -1005,7 +1005,7 @@ NicolaUeffing JensStephan EvgenyMatusov - LoïcDugast + LoïcDugast GeorgeFoster RolandKuhn JeanSenellart @@ -1018,21 +1018,21 @@ Class-Driven Attribute Extraction BenjaminVan Durme TingQian - LenhartSchubert + LenhartSchubert 921–928 C08-1116 van-durme-etal-2008-class Using Three Way Data for Word Sense Discrimination - TimVan de Cruys + TimVan de Cruys 929–936 C08-1117 van-de-cruys-2008-using Source Language Markers in <fixed-case>EUROPARL</fixed-case> Translations - Hansvan Halteren + Hansvan Halteren 937–944 C08-1118 van-halteren-2008-source @@ -1048,9 +1048,9 @@ Using Syntactic Information for Improving Why-Question Answering SuzanVerberne - LouBoves + LouBoves NellekeOostdijk - Peter-ArnoCoppen + Peter-ArnoCoppen 953–960 C08-1120 verberne-etal-2008-using @@ -1059,7 +1059,7 @@ Coreference Systems Based on Kernels Methods YannickVersley AlessandroMoschitti - MassimoPoesio + MassimoPoesio XiaofengYang 961–968 C08-1121 @@ -1075,7 +1075,7 @@ Investigating the Portability of Corpus-Derived Cue Phrases for Dialogue Act Classification - NickWebb + NickWebb TingLiu 977–984 C08-1123 @@ -1083,7 +1083,7 @@ Extractive Summarization Using Supervised and Semi-Supervised Learning - Kam-FaiWong + Kam-FaiWong MingliWu WenjieLi 985–992 @@ -1094,7 +1094,7 @@ Domain Adaptation for Statistical Machine Translation with Domain Dictionary and Monolingual Corpora HuaWu HaifengWang - ChengqingZong + ChengqingZong 993–1000 C08-1125 wu-etal-2008-domain @@ -1108,9 +1108,9 @@ Linguistically Annotated <fixed-case>BTG</fixed-case> for Statistical Machine Translation - DeyiXiong + DeyiXiong MinZhang - AitiAw + AitiAw HaizhouLi 1009–1016 C08-1127 @@ -1121,7 +1121,7 @@ JiaXu JianfengGao KristinaToutanova - HermannNey + HermannNey 1017–1024 C08-1128 xu-etal-2008-bayesian @@ -1129,8 +1129,8 @@ Switching to Real-Time Tasks in Multi-Tasking Dialogue FanYang - Peter A.Heeman - AndrewKun + Peter A.Heeman + AndrewKun 1025–1032 C08-1129 yang-etal-2008-switching @@ -1138,8 +1138,8 @@ <fixed-case>C</fixed-case>hinese Term Extraction Using Minimal Resources YuhangYang - QinLu - TiejunZhao + QinLu + TiejunZhao 1033–1040 C08-1130 yang-etal-2008-chinese @@ -1147,7 +1147,7 @@ Measuring and Predicting Orthographic Associations: Modelling the Similarity of <fixed-case>J</fixed-case>apanese Kanji LarsYencken - TimothyBaldwin + TimothyBaldwin 1041–1048 C08-1131 yencken-baldwin-2008-measuring @@ -1163,9 +1163,9 @@ <fixed-case>O</fixed-case>nto<fixed-case>N</fixed-case>otes: Corpus Cleanup of Mistaken Agreement Using Word Sense Disambiguation - Liang-ChihYu + Liang-ChihYu Chung-HsienWu - EduardHovy + EduardHovy 1057–1064 C08-1133 yu-etal-2008-ontonotes @@ -1181,7 +1181,7 @@ Automatic Seed Word Selection for Unsupervised Sentiment Classification of <fixed-case>C</fixed-case>hinese Text TarasZagibalov - JohnCarroll + JohnCarroll 1073–1080 C08-1135 zagibalov-carroll-2008-automatic @@ -1198,7 +1198,7 @@ Sentence Type Based Reordering Model for Statistical Machine Translation JiajunZhang - ChengqingZong + ChengqingZong ShoushanLi 1089–1096 C08-1137 @@ -1209,7 +1209,7 @@ MinZhang HongfeiJiang HaizhouLi - AitiAw + AitiAw ShengLi 1097–1104 C08-1138 @@ -1238,7 +1238,7 @@ ShujieLiu MuLi DongdongZhang - TiejunZhao + TiejunZhao 1121–1128 C08-1141 zhou-etal-2008-diagnostic @@ -1247,7 +1247,7 @@ Multi-Criteria-Based Strategy to Stop Active Learning for Data Annotation JingboZhu HuizhenWang - EduardHovy + EduardHovy 1129–1136 C08-1142 zhu-etal-2008-multi @@ -1257,7 +1257,7 @@ JingboZhu HuizhenWang TianshunYao - Benjamin KTsou + Benjamin KTsou 1137–1144 C08-1143 zhu-etal-2008-active @@ -1266,7 +1266,7 @@ A Systematic Comparison of Phrase-Based, Hierarchical and Syntax-Augmented Statistical <fixed-case>MT</fixed-case> AndreasZollmann AshishVenugopal - FranzOch + FranzOch JayPonte 1145–1152 C08-1144 @@ -1284,7 +1284,7 @@ Coling 2008: Companion volume: Posters - DoniaScott + DoniaScott HansUszkoreit Coling 2008 Organizing Committee
Manchester, UK
@@ -1298,16 +1298,16 @@ Metaphor in Textual Entailment - RodrigoAgerri + RodrigoAgerri 3–6 C08-2001 agerri-2008-metaphor Distilling Opinion in Discourse: A Preliminary Study - NicholasAsher - FarahBenamara - Yvette YannickMathieu + NicholasAsher + FarahBenamara + Yvette YannickMathieu 7–10 C08-2002 asher-etal-2008-distilling @@ -1324,7 +1324,7 @@ The Power of Negative Thinking: Exploiting Label Disagreement in the <fixed-case>M</fixed-case>in-cut Classification Framework MohitBansal - ClaireCardie + ClaireCardie LillianLee 15–18 C08-2004 @@ -1333,7 +1333,7 @@ Phrasal Segmentation Models for Statistical Machine Translation GraemeBlackwood - Adriàde Gispert + Adriàde Gispert WilliamByrne 19–22 C08-2005 @@ -1342,8 +1342,8 @@ A Scalable <fixed-case>MMR</fixed-case> Approach to Sentence Scoring for Multi-Document Update Summarization FlorianBoudin - MarcEl-Bèze - Juan-ManuelTorres-Moreno + MarcEl-Bèze + Juan-ManuelTorres-Moreno 23–26 C08-2006 boudin-etal-2008-scalable @@ -1353,17 +1353,17 @@ DebasriChakrabarti HemangMandalia RitwikPriya - VaijayanthiSarma - PushpakBhattacharyya + VaijayanthiSarma + PushpakBhattacharyya 27–30 C08-2007 chakrabarti-etal-2008-hindi Detecting Erroneous Uses of Complex Postpositions in an Agglutinative Language - ArantzaDíaz de Ilarraza - KoldoGojenola - MaiteOronoz + ArantzaDíaz de Ilarraza + KoldoGojenola + MaiteOronoz 31–34 C08-2008 diaz-de-ilarraza-etal-2008-detecting @@ -1378,8 +1378,8 @@ The Impact of Reference Quality on Automatic <fixed-case>MT</fixed-case> Evaluation - OlivierHamon - DjamelMostefa + OlivierHamon + DjamelMostefa 39–42 C08-2010 hamon-mostefa-2008-impact @@ -1388,22 +1388,22 @@ Word Sense Disambiguation for All Words using Tree-Structured Conditional Random Fields JunHatori YusukeMiyao - Jun’ichiTsujii + Jun’ichiTsujii 43–46 C08-2011 hatori-etal-2008-word <fixed-case>ILP</fixed-case>-based Conceptual Analysis for <fixed-case>C</fixed-case>hinese <fixed-case>NP</fixed-case>s - Paul D.Ji - Stephen G.Pulman + Paul D.Ji + Stephen G.Pulman 47–50 C08-2012 ji-pulman-2008-ilp Scaling up Analogical Learning - PhilippeLanglais + PhilippeLanglais FrançoisYvon 51–54 C08-2013 @@ -1428,7 +1428,7 @@ Exact Inference for Multi-label Classification using Sparse Graphical Models YusukeMiyao - Jun’ichiTsujii + Jun’ichiTsujii 63–66 C08-2016 miyao-tsujii-2008-exact @@ -1436,9 +1436,9 @@ Modelling Multilinguality in Ontologies ElenaMontiel-Ponsoda - GuadalupeAguado de Cea - AsunciónGómez-Pérez - WimPeters + GuadalupeAguado de Cea + AsunciónGómez-Pérez + WimPeters 67–70 C08-2017 montiel-ponsoda-etal-2008-modelling @@ -1461,10 +1461,10 @@ Generation under Space Constraints - CécileParis + CécileParis NathalieColineau AndrewLampert - JoanGiralt Duran + JoanGiralt Duran 79–82 C08-2020 paris-etal-2008-generation @@ -1486,7 +1486,7 @@ HenaMehta AniNenkova AlanLee - AravindJoshi + AravindJoshi 87–90 C08-2022 pitler-etal-2008-easily @@ -1494,29 +1494,29 @@ Rank Distance as a Stylistic Similarity MariusPopescu - Liviu P.Dinu + Liviu P.Dinu 91–94 C08-2023 popescu-dinu-2008-rank Integrating Motion Predicate Classes with Spatial and Temporal Annotations - JamesPustejovsky - Jessica L.Moszkowicz + JamesPustejovsky + Jessica L.Moszkowicz 95–98 C08-2024 pustejovsky-moszkowicz-2008-integrating On the Weak Generative Capacity of Weighted Context-free Grammars - AndersSøgaard + AndersSøgaard 99–102 C08-2025 sogaard-2008-weak Range Concatenation Grammars for Translation - AndersSøgaard + AndersSøgaard 103–106 C08-2026 sogaard-2008-range @@ -1524,7 +1524,7 @@ Comparative Evaluation of <fixed-case>A</fixed-case>rabic Language Morphological Analysers and Stemmers MajdiSawalha - EricAtwell + EricAtwell 107–110 C08-2027 sawalha-atwell-2008-comparative @@ -1532,7 +1532,7 @@ A Complete and Modestly Funny System for Generating and Performing <fixed-case>J</fixed-case>apanese Stand-Up Comedy JonasSjöbergh - KenjiAraki + KenjiAraki 111–114 C08-2028 sjobergh-araki-2008-complete @@ -1569,8 +1569,8 @@ Building a Bilingual Lexicon Using Phrase-based Statistical Machine Translation via a Pivot Language TakashiTsunakawa - NaoakiOkazaki - Jun’ichiTsujii + NaoakiOkazaki + Jun’ichiTsujii 127–130 C08-2032 tsunakawa-etal-2008-building @@ -1605,7 +1605,7 @@ Coling 2008: Companion volume: Demonstrations AllanRamsay - KalinaBontcheva + KalinaBontcheva Coling 2008 Organizing Committee
Manchester, UK
August @@ -1622,7 +1622,7 @@ JakubPiskorski BrunoPouliquen RalfSteinberger - HristoTanev + HristoTanev VanniZavarella 145–148 C08-3001 @@ -1631,7 +1631,7 @@ A Grammar Checking System for <fixed-case>P</fixed-case>unjabi Mandeep SinghGill - Gurpreet SinghLehal + Gurpreet SinghLehal 149–152 C08-3002 gill-lehal-2008-grammar @@ -1639,7 +1639,7 @@ A Toolchain for Grammarians BrunoGuillaume - JosephLe Roux + JosephLe Roux JonathanMarchand GuyPerrier KarënFort @@ -1651,7 +1651,7 @@ A <fixed-case>P</fixed-case>unjabi To <fixed-case>H</fixed-case>indi Machine Translation System Gurpreet SinghJosan - Gurpreet SinghLehal + Gurpreet SinghLehal 157–160 C08-3004 josan-lehal-2008-punjabi @@ -1660,19 +1660,19 @@ “Build Your Own” Spoken Dialogue Systems: Automatically Generating <fixed-case>ISU</fixed-case> Dialogue Systems from Business User Resources OliverLemon XingkunLiu - HelenHastie + HelenHastie 161–164 C08-3005 lemon-etal-2008-build Multilingual Mobile-Phone Translation Services for World Travelers - MichaelPaul + MichaelPaul HideoOkuma - HirofumiYamamoto - EiichiroSumita + HirofumiYamamoto + EiichiroSumita ShigekiMatsuda - TohruShimizu + TohruShimizu SatoshiNakamura 165–168 C08-3006 @@ -1681,7 +1681,7 @@ Multilingual Assistant for Medical Diagnosing and Drug Prescription Based on Category Ranking FernandoRuiz-Rico - Jose-LuisVicedo + Jose-LuisVicedo María-ConsueloRubio-Sánchez 169–172 C08-3007 @@ -1690,12 +1690,12 @@ Entailment-based Question Answering for Structured Data BogdanSacaleanu - ConstantinOrasan + ConstantinOrasan ChristianSpurk ShiyanOu - OscarFerrandez + OscarFerrandez MilenKouylekov - MatteoNegri + MatteoNegri 173–176 C08-3008 sacaleanu-etal-2008-entailment @@ -1703,7 +1703,7 @@ Shahmukhi to Gurmukhi Transliteration System Tejinder SinghSaini - Gurpreet SinghLehal + Gurpreet SinghLehal Virinder SKalra 177–180 C08-3009 @@ -1726,7 +1726,7 @@ Temporal Processing with the <fixed-case>TARSQI</fixed-case> Toolkit MarcVerhagen - JamesPustejovsky + JamesPustejovsky 189–192 C08-3012 verhagen-pustejovsky-2008-temporal diff --git a/data/xml/C10.xml b/data/xml/C10.xml index e2147b043e..1c0358d93d 100644 --- a/data/xml/C10.xml +++ b/data/xml/C10.xml @@ -5,7 +5,7 @@ Proceedings of the 23rd International Conference on Computational Linguistics (Coling 2010) C10-1 Chu-RenHuang - DanJurafsky + DanJurafsky Coling 2010 Organizing Committee
Beijing, China
August @@ -19,7 +19,7 @@ Testing <fixed-case>SDRT</fixed-case>’s Right Frontier StergosAfantenos - NicholasAsher + NicholasAsher 1–9 C10-1001 afantenos-asher-2010-testing @@ -36,7 +36,7 @@ Robust Measurement and Comparison of Context Similarity for Finding Translation Pairs DanielAndrade TetsuyaNasukawa - JunichiTsujii + JunichiTsujii 19–27 C10-1003 andrade-etal-2010-robust @@ -44,8 +44,8 @@ Multilingual Subjectivity: Are More Languages Better? CarmenBanea - RadaMihalcea - JanyceWiebe + RadaMihalcea + JanyceWiebe 28–36 C10-1004 banea-etal-2010-multilingual @@ -54,15 +54,15 @@ Plagiarism Detection across Distant Language Pairs AlbertoBarrón-Cedeño PaoloRosso - EnekoAgirre - GorkaLabaka + EnekoAgirre + GorkaLabaka 37–45 C10-1005 barron-cedeno-etal-2010-plagiarism Automatic Detection of Non-deverbal Event Nouns for Quick Lexicon Production - NuriaBel + NuriaBel MariaColl GabrielaResnik 46–52 @@ -88,7 +88,7 @@ Fluency Constraints for Minimum <fixed-case>B</fixed-case>ayes-Risk Decoding of Statistical Machine Translation Lattices GraemeBlackwood - Adriàde Gispert + Adriàde Gispert WilliamByrne 71–79 C10-1009 @@ -96,8 +96,8 @@ Self-Annotation for fine-grained geospatial relation extraction - AndreBlessing - HinrichSchütze + AndreBlessing + HinrichSchütze 80–88 C10-1010 blessing-schutze-2010-self @@ -122,7 +122,7 @@ Towards an optimal weighting of context words based on distance BernardBrosseau-Villeneuve - Jian-YunNie + Jian-YunNie NorikoKando 107–115 C10-1013 @@ -139,7 +139,7 @@ A Utility-Driven Approach to Question Ranking in Social <fixed-case>QA</fixed-case> - RazvanBunescu + RazvanBunescu YunfengHuang 125–133 C10-1015 @@ -190,7 +190,7 @@ Emotion Cause Detection with Linguistic Constructions YingChen - Sophia Yat MeiLee + Sophia Yat MeiLee ShoushanLi Chu-RenHuang 179–187 @@ -201,7 +201,7 @@ A Twin-Candidate Based Approach for Event Pronoun Resolution using Composite Kernel BinChen JianSu - Chew LimTan + Chew LimTan 188–196 C10-1022 chen-etal-2010-twin @@ -217,7 +217,7 @@ Simplicity is Better: Revisiting Single Kernel <fixed-case>PPI</fixed-case> Extraction Sung-PilChoi - Sung-HyonMyaeng + Sung-HyonMyaeng 206–214 C10-1024 choi-myaeng-2010-simplicity @@ -226,13 +226,13 @@ An ontology-driven system for detecting global health events NigelCollier ReikoMatsuda Goodwin - JohnMcCrae + JohnMcCrae SonDoan AiKawazoe MikeConway AsaneeKawtrakul KoichiTakeuchi - DinhDien + DinhDien 215–222 C10-1025 collier-etal-2010-ontology @@ -247,7 +247,7 @@ Local lexical adaptation in Machine Translation through triangulation: <fixed-case>SMT</fixed-case> helping <fixed-case>SMT</fixed-case> - Josep MariaCrego + Josep MariaCrego AurélienMax FrançoisYvon 232–240 @@ -291,7 +291,7 @@ PaulMcNamee DelipRao AdamGerber - TimFinin + TimFinin 277–285 C10-1032 dredze-etal-2010-entity @@ -310,7 +310,7 @@ LongJiang TaoQin MingZhou - Heung-YeungShum + Heung-YeungShum 295–303 C10-1034 duan-etal-2010-empirical @@ -352,7 +352,7 @@ <fixed-case>O</fixed-case>pinosis: A Graph Based Approach to Abstractive Summarization of Highly Redundant Opinions KavitaGanesan - ChengXiangZhai + ChengXiangZhai JiaweiHan 340–348 C10-1039 @@ -361,8 +361,8 @@ <fixed-case>EMDC</fixed-case>: A Semi-supervised Approach for Word Alignment QinGao - FranciscoGuzman - StephanVogel + FranciscoGuzman + StephanVogel 349–357 C10-1040 gao-etal-2010-emdc @@ -397,7 +397,7 @@ Comparing Language Similarity across Genetic and Typologically-Based Groupings RyanGeorgi FeiXia - WilliamLewis + WilliamLewis 385–393 C10-1044 georgi-etal-2010-comparing @@ -405,16 +405,16 @@ Better <fixed-case>A</fixed-case>rabic Parsing: Baselines, Evaluations, and Analysis SpenceGreen - Christopher D.Manning + Christopher D.Manning 394–402 C10-1045 green-manning-2010-better Paraphrase Alignment for Synonym Evidence Discovery - GintarėGrigonytė - João PauloCordeiro - GaëlDias + GintarėGrigonytė + João PauloCordeiro + GaëlDias RumenMoraliyski PavelBrazdil 403–411 @@ -431,7 +431,7 @@ Detection of Simple Plagiarism in Computer Science Papers - YaakovHaCohen-Kerner + YaakovHaCohen-Kerner AharonTayeb NatanBen-Dror 421–429 @@ -440,7 +440,7 @@ A Structured Vector Space Model for Hidden Attribute Meaning in Adjective-Noun Phrases - MatthiasHartung + MatthiasHartung AnetteFrank 430–438 C10-1049 @@ -461,7 +461,7 @@ A Novel Reordering Model Based on Multi-layer Phrase for Statistical Machine Translation YanqingHe YuZhou - ChengqingZong + ChengqingZong HuilinWang 447–455 C10-1051 @@ -470,7 +470,7 @@ Standardizing Wordnets in the <fixed-case>ISO</fixed-case> Standard <fixed-case>LMF</fixed-case>: <fixed-case>W</fixed-case>ordnet-<fixed-case>LMF</fixed-case> for <fixed-case>G</fixed-case>erma<fixed-case>N</fixed-case>et VerenaHenrich - ErhardHinrichs + ErhardHinrichs 456–464 C10-1052 henrich-hinrichs-2010-standardizing @@ -488,7 +488,7 @@ GumwonHong Chi-HoLi MingZhou - Hae-ChangRim + Hae-ChangRim 474–482 C10-1054 hong-etal-2010-empirical @@ -497,7 +497,7 @@ Enhancing Cross Document Coreference of Web Documents with Context Similarity and Very Large Scale Text Categorization JianHuang PucktadaTreeratpituk - SarahTaylor + SarahTaylor C. LeeGiles 483–491 C10-1055 @@ -553,24 +553,24 @@ Learning to Predict Readability using Diverse Linguistic Features - RohitKate - XiaoqiangLuo + RohitKate + XiaoqiangLuo SiddharthPatwardhan MartinFranz - RaduFlorian - RaymondMooney - SalimRoukos - ChrisWelty + RaduFlorian + RaymondMooney + SalimRoukos + ChrisWelty 546–554 C10-1062 kate-etal-2010-learning Value for Money: Balancing Annotation Effort, Lexicon Building and Accuracy for Multilingual <fixed-case>WSD</fixed-case> - MiteshKhapra + MiteshKhapra SaurabhSohoney AnupKulkarni - PushpakBhattacharyya + PushpakBhattacharyya 555–563 C10-1063 khapra-etal-2010-value @@ -580,7 +580,7 @@ SeokhwanKim MinwooJeong JonghoonLee - Gary GeunbaeLee + Gary GeunbaeLee 564–571 C10-1064 kim-etal-2010-cross @@ -588,7 +588,7 @@ Evaluating N-gram based Evaluation Metrics for Automatic Keyphrase Extraction Su NamKim - TimothyBaldwin + TimothyBaldwin Min-YenKan 572–580 C10-1065 @@ -598,7 +598,7 @@ Improving the Quality of Text Understanding by Delaying Ambiguity Resolution Doo SoonKim KenBarker - BrucePorter + BrucePorter 581–589 C10-1066 kim-etal-2010-improving @@ -615,9 +615,9 @@ Dependency-driven Anaphoricity Determination for Coreference Resolution FangKong - GuodongZhou - LonghuaQian - QiaomingZhu + GuodongZhou + LonghuaQian + QiaomingZhu 599–607 C10-1068 kong-etal-2010-dependency @@ -635,7 +635,7 @@ Revisiting Context-based Projection Methods for Term-Translation Spotting in Comparable Corpora AudreyLaroche - PhilippeLanglais + PhilippeLanglais 617–625 C10-1070 laroche-langlais-2010-revisiting @@ -652,18 +652,18 @@ Sentiment Classification and Polarity Shifting ShoushanLi - Sophia Y. M.Lee + Sophia Y. M.Lee YingChen Chu-RenHuang - GuodongZhou + GuodongZhou 635–643 C10-1072 li-etal-2010-sentiment Improving Corpus Comparability for Bilingual Lexicon Extraction from Comparable Corpora - BoLi - EricGaussier + BoLi + EricGaussier 644–652 C10-1073 li-gaussier-2010-improving @@ -674,7 +674,7 @@ ChaoHan MinlieHuang XiaoyanZhu - Ying-JuXia + Ying-JuXia ShuZhang HaoYu 653–661 @@ -693,10 +693,10 @@ Learning the Scope of Negation via Shallow Semantic Parsing - JunhuiLi - GuodongZhou + JunhuiLi + GuodongZhou HonglingWang - QiaomingZhu + QiaomingZhu 671–679 C10-1076 li-etal-2010-learning @@ -704,7 +704,7 @@ Filtered Ranking for Bootstrapping in Event Extraction ShashaLiao - RalphGrishman + RalphGrishman 680–688 C10-1077 liao-grishman-2010-filtered @@ -727,7 +727,7 @@ MingZhou LongJiang ZhongyangXiong - ChangningHuang + ChangningHuang 698–706 C10-1079 liu-etal-2010-semantic @@ -750,9 +750,9 @@ <fixed-case>T</fixed-case>ime<fixed-case>ML</fixed-case> Events Recognition and Classification: Learning <fixed-case>CRF</fixed-case> Models with Semantic Roles - HectorLlorens - EstelaSaquete - BorjaNavarro-Colorado + HectorLlorens + EstelaSaquete + BorjaNavarro-Colorado 725–733 C10-1082 llorens-etal-2010-timeml @@ -762,14 +762,14 @@ YueLu HuizhongDuan HongningWang - ChengXiangZhai + ChengXiangZhai 734–742 C10-1083 lu-etal-2010-exploiting Enhancing Morphological Alignment for Translating Highly Inflected Languages - Minh-ThangLuong + Minh-ThangLuong Min-YenKan 743–751 C10-1084 @@ -778,7 +778,7 @@ Automatic analysis of semantic similarity in comparable text through syntactic tree matching ErwinMarsi - EmielKrahmer + EmielKrahmer 752–760 C10-1085 marsi-krahmer-2010-automatic @@ -808,7 +808,7 @@ MakotoMiwa SampoPyysalo TadayoshiHara - Jun’ichiTsujii + Jun’ichiTsujii 779–787 C10-1088 miwa-etal-2010-evaluating @@ -816,9 +816,9 @@ Entity-Focused Sentence Simplification for Relation Extraction MakotoMiwa - RuneSætre + RuneSætre YusukeMiyao - Jun’ichiTsujii + Jun’ichiTsujii 788–796 C10-1089 miwa-etal-2010-entity @@ -827,7 +827,7 @@ Using Cross-Lingual Projections to Generate Semantic Role Labeled Annotated Corpus for <fixed-case>U</fixed-case>rdu - A Resource Poor Language SmruthiMukund DebanjanGhosh - RohiniSrihari + RohiniSrihari 797–805 C10-1090 mukund-etal-2010-using @@ -843,9 +843,9 @@ Nonparametric Word Segmentation for Machine Translation - ThuyLinhNguyen - StephanVogel - Noah A.Smith + ThuyLinhNguyen + StephanVogel + Noah A.Smith 815–823 C10-1092 nguyen-etal-2010-nonparametric @@ -870,7 +870,7 @@ Co-<fixed-case>STAR</fixed-case>: A Co-training Style Algorithm for Hyponymy Relation Acquisition from Structured and Unstructured Text - Jong-HoonOh + Jong-HoonOh IchiroYamada KentaroTorisawa StijnDe Saeger @@ -880,8 +880,8 @@ Simple and Efficient Algorithm for Approximate Dictionary Matching - NaoakiOkazaki - Jun’ichiTsujii + NaoakiOkazaki + Jun’ichiTsujii 851–859 C10-1096 okazaki-tsujii-2010-simple @@ -898,7 +898,7 @@ Text Summarization of <fixed-case>T</fixed-case>urkish Texts using Latent Semantic Analysis MakbuleOzsoy - IlyasCicekli + IlyasCicekli FerdaAlpaslan 869–876 C10-1098 @@ -917,7 +917,7 @@ EmilyPitler ShaneBergsma DekangLin - KennethChurch + KennethChurch 886–894 C10-1100 pitler-etal-2010-using @@ -925,8 +925,8 @@ Citation Summarization Through Keyphrase Extraction VahedQazvinian - Dragomir R.Radev - ArzucanÖzgür + Dragomir R.Radev + ArzucanÖzgür 895–903 C10-1101 qazvinian-etal-2010-citation @@ -935,8 +935,8 @@ 2<fixed-case>D</fixed-case> Trie for Fast Parsing XianQian QiZhang - XuanjingHuang - LideWu + XuanjingHuang + LideWu 904–912 C10-1102 qian-etal-2010-2d @@ -996,7 +996,7 @@ A Multiple-Domain Ontology Builder SaraSalem - SamirAbdelRahman + SamirAbdelRahman 967–975 C10-1109 salem-abdelrahman-2010-multiple @@ -1064,12 +1064,12 @@ Modeling Socio-Cultural Phenomena in Discourse - TomekStrzalkowski - George AaronBroadwell - JenniferStromer-Galley + TomekStrzalkowski + George AaronBroadwell + JenniferStromer-Galley SamiraShaikh - SarahTaylor - NickWebb + SarahTaylor + NickWebb 1038–1046 C10-1117 strzalkowski-etal-2010-modeling @@ -1078,7 +1078,7 @@ Discriminative Induction of Sub-Tree Alignment using Limited Labeled Data JunSun MinZhang - Chew LimTan + Chew LimTan 1047–1055 C10-1118 sun-etal-2010-discriminative-induction @@ -1095,7 +1095,7 @@ Semi-supervised dependency parsing using generalized tri-training - AndersSøgaard + AndersSøgaard ChristianRishøj 1065–1073 C10-1120 @@ -1113,7 +1113,7 @@ <fixed-case>C</fixed-case>hinese <fixed-case>CCG</fixed-case>bank: extracting <fixed-case>CCG</fixed-case> derivations from the <fixed-case>P</fixed-case>enn <fixed-case>C</fixed-case>hinese Treebank DanielTse - James R.Curran + James R.Curran 1083–1091 C10-1122 tse-curran-2010-chinese @@ -1133,7 +1133,7 @@ Large Scale Parallel Document Mining for Machine Translation JakobUszkoreit JayPonte - AshokPopat + AshokPopat MosheDubiner 1101–1109 C10-1124 @@ -1150,10 +1150,10 @@ Syntax Based Reordering with Automatically Derived Rules for Improved Statistical Machine Translation KarthikVisweswariah - JiriNavratil - JeffreySorensen + JiriNavratil + JeffreySorensen VijilChenthamarakshan - NandakishoreKambhatla + NandakishoreKambhatla 1119–1127 C10-1126 visweswariah-etal-2010-syntax @@ -1177,7 +1177,7 @@ “Got You!”: Automatic Vandalism Detection in <fixed-case>W</fixed-case>ikipedia with Web-based Shallow Syntactic-Semantic Modeling William YangWang - KathleenMcKeown + KathleenMcKeown 1146–1154 C10-1129 wang-mckeown-2010-got @@ -1185,7 +1185,7 @@ Exploiting Salient Patterns for Question Detection and Question Retrieval in Community-based Question Answering KaiWang - Tat-SengChua + Tat-SengChua 1155–1163 C10-1130 wang-chua-2010-exploiting @@ -1193,7 +1193,7 @@ Probabilistic Tree-Edit Models with Structured Latent Variables for Textual Entailment and Question Answering MengqiuWang - ChristopherManning + ChristopherManning 1164–1172 C10-1131 wang-manning-2010-probabilistic @@ -1201,7 +1201,7 @@ A Character-Based Joint Model for <fixed-case>C</fixed-case>hinese Word Segmentation KunWang - ChengqingZong + ChengqingZong Keh-YihSu 1173–1181 C10-1132 @@ -1246,7 +1246,7 @@ A Methodology for Automatic Identification of Nocuous Ambiguity HuiYang - Annede Roeck + Annede Roeck AlistairWillis BasharNuseibeh 1218–1226 @@ -1279,7 +1279,7 @@ Discriminative Training for Near-Synonym Substitution - Liang-ChihYu + Liang-ChihYu Hsiu-MinShih Yu-LingLai Jui-FengYeh @@ -1290,7 +1290,7 @@ Estimating Linear Models for Compositional Distributional Semantics - Fabio MassimoZanzotto + Fabio MassimoZanzotto IoannisKorkontzelos FrancescaFallucchi SureshManandhar @@ -1310,9 +1310,9 @@ Forest-guided Supertagger Training - Yao-zhongZhang + Yao-zhongZhang TakuyaMatsuzaki - Jun’ichiTsujii + Jun’ichiTsujii 1281–1289 C10-1144 zhang-etal-2010-forest @@ -1321,8 +1321,8 @@ Entity Linking Leveraging Automatically Generated Annotation WeiZhang JianSu - Chew LimTan - Wen TingWang + Chew LimTan + Wen TingWang 1290–1298 C10-1145 zhang-etal-2010-entity @@ -1394,7 +1394,7 @@ A Minimum Error Weighting Combination Strategy for <fixed-case>C</fixed-case>hinese Semantic Role Labeling TaoZhuang - ChengqingZong + ChengqingZong 1362–1370 C10-1153 zhuang-zong-2010-minimum @@ -1410,7 +1410,7 @@ Syntactic Scope Resolution in Uncertainty Analysis - LiljaØvrelid + LiljaØvrelid ErikVelldal StephanOepen 1379–1387 @@ -1423,7 +1423,7 @@ Coling 2010: Posters C10-2 Chu-RenHuang - DanJurafsky + DanJurafsky Coling 2010 Organizing Committee
Beijing, China
August @@ -1444,8 +1444,8 @@
Document Expansion Based on <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et for Robust <fixed-case>IR</fixed-case> - EnekoAgirre - XabierArregi + EnekoAgirre + XabierArregi ArantxaOtegi 9–17 C10-2002 @@ -1464,10 +1464,10 @@ Going Beyond Traditional <fixed-case>QA</fixed-case> Systems: Challenges and Keys in Opinion Question Answering - AlexandraBalahur + AlexandraBalahur EsterBoldrini - AndrésMontoyo - PatricioMartínez-Barco + AndrésMontoyo + PatricioMartínez-Barco 27–35 C10-2004 balahur-etal-2010-going @@ -1482,7 +1482,7 @@ Benchmarking for syntax-based sentential inference - PaulBedaride + PaulBedaride ClaireGardent 45–53 C10-2006 @@ -1498,7 +1498,7 @@ A Formal Scheme for Multimodal Grammars PhilippeBlache - LaurentPrévot + LaurentPrévot 63–71 C10-2008 blache-prevot-2010-formal @@ -1507,7 +1507,7 @@ Composition of Semantic Relations: Model and Applications EduardoBlanco Hakki C.Cankaya - DanMoldovan + DanMoldovan 72–80 C10-2009 blanco-etal-2010-composition @@ -1515,7 +1515,7 @@ Improved Unsupervised Sentence Alignment for Symmetrical and Asymmetrical Parallel Corpora FabienneBraune - AlexanderFraser + AlexanderFraser 81–89 C10-2010 braune-fraser-2010-improved @@ -1532,26 +1532,26 @@ Toward Qualitative Evaluation of Textual Entailment Systems ElenaCabrio - BernardoMagnini + BernardoMagnini 99–107 C10-2012 cabrio-magnini-2010-toward Benchmarking of Statistical Dependency Parsers for <fixed-case>F</fixed-case>rench - MarieCandito + MarieCandito JoakimNivre PascalDenis - EnriqueHenestroza Anguiano + EnriqueHenestroza Anguiano 108–116 C10-2013 candito-etal-2010-benchmarking Tree Topological Features for Unlexicalized Parsing - Samuel W. K.Chan - Lawrence Y. L.Cheung - Mickey W. C.Chong + Samuel W. K.Chan + Lawrence Y. L.Cheung + Mickey W. C.Chong 117–125 C10-2014 chan-etal-2010-tree @@ -1559,7 +1559,7 @@ Improving Graph-based Dependency Parsing with Decision History WenliangChen - Jun’ichiKazama + Jun’ichiKazama YoshimasaTsuruoka KentaroTorisawa 126–134 @@ -1587,7 +1587,7 @@ Acquisition of Unknown Word Paradigms for Large-Scale Grammars KostadinCholakov - Gertjanvan Noord + Gertjanvan Noord 153–161 C10-2018 cholakov-van-noord-2010-acquisition @@ -1604,14 +1604,14 @@ Exploiting Paraphrases and Deferred Sense Commitment to Interpret Questions more Reliably PeterClark - PhilHarrison + PhilHarrison 171–179 C10-2020 clark-harrison-2010-exploiting Two Methods for Extending Hierarchical Rules from the Bilingual Chart Parsing - MartinČmejrek + MartinČmejrek BowenZhou 180–188 C10-2021 @@ -1620,15 +1620,15 @@ Unsupervised cleansing of noisy text DanishContractor - Tanveer A.Faruquie - L. VenkataSubramaniam + Tanveer A.Faruquie + L. VenkataSubramaniam 189–196 C10-2022 contractor-etal-2010-unsupervised Improving Reordering with Linguistically Informed Bilingual n-grams - Josep MariaCrego + Josep MariaCrego FrançoisYvon 197–205 C10-2023 @@ -1648,7 +1648,7 @@ DongdongZhang MuLi MingZhou - TiejunZhao + TiejunZhao 214–222 C10-2025 cui-etal-2010-hybrid @@ -1657,8 +1657,8 @@ Global Ranking via Data Fusion Hong-JieDai Po-TingLai - Richard Tzong-HanTsai - Wen-LianHsu + Richard Tzong-HanTsai + Wen-LianHsu 223–231 C10-2026 dai-etal-2010-global @@ -1666,7 +1666,7 @@ Topic-Based <fixed-case>B</fixed-case>engali Opinion Summarization AmitavaDas - SivajiBandyopadhyay + SivajiBandyopadhyay 232–240 C10-2027 das-bandyopadhyay-2010-topic @@ -1682,7 +1682,7 @@ Topic Models for Meaning Similarity in Context - GeorgianaDinu + GeorgianaDinu MirellaLapata 250–258 C10-2029 @@ -1699,7 +1699,7 @@ Exploring the Data-Driven Prediction of Prepositions in <fixed-case>E</fixed-case>nglish AnasElghafari - DetmarMeurers + DetmarMeurers HolgerWunsch 267–275 C10-2031 @@ -1710,7 +1710,7 @@ LijunFeng MartinJansche MattHuenerfauth - NoémieElhadad + NoémieElhadad 276–284 C10-2032 feng-etal-2010-comparison @@ -1730,8 +1730,8 @@ YanhuiFeng YuHong ZhenxiangYan - JianminYao - QiaomingZhu + JianminYao + QiaomingZhu 294–302 C10-2034 feng-etal-2010-novel @@ -1739,7 +1739,7 @@ Building Systematic Reviews Using Automatic Text Classification Techniques OanaFrunza - DianaInkpen + DianaInkpen StanMatwin 303–311 C10-2035 @@ -1756,8 +1756,8 @@ Monolingual Distributional Profiles for Word Substitution in Machine Translation RashmiGangadharaiah - Ralf D.Brown - JaimeCarbonell + Ralf D.Brown + JaimeCarbonell 320–328 C10-2037 gangadharaiah-etal-2010-monolingual @@ -1784,8 +1784,8 @@ Verbs are where all the action lies: Experiences of Shallow Parsing of a Morphologically Rich Language HarshadaGune MugdhaBapat - Mitesh M.Khapra - PushpakBhattacharyya + Mitesh M.Khapra + PushpakBhattacharyya 347–355 C10-2040 gune-etal-2010-verbs @@ -1810,7 +1810,7 @@ YifanHe YanjunMa AndyWay - Josefvan Genabith + Josefvan Genabith 374–382 C10-2043 he-etal-2010-integrating @@ -1877,10 +1877,10 @@ Negative Feedback: The Forsaken Nature Available for Re-ranking YuHong - Qing-qingCai + Qing-qingCai SongHua - Jian-minYao - Qiao-mingZhu + Jian-minYao + Qiao-mingZhu 436–444 C10-2050 hong-etal-2010-negative @@ -1889,7 +1889,7 @@ Morphological Analysis Can Improve a <fixed-case>CCG</fixed-case> Parser for <fixed-case>E</fixed-case>nglish MatthewHonnibal Jonathan K.Kummerfeld - James R.Curran + James R.Curran 445–453 C10-2051 honnibal-etal-2010-morphological @@ -1898,7 +1898,7 @@ What’s in a Preposition? Dimensions of Sense Disambiguation for an Interesting Word Class DirkHovy StephenTratz - EduardHovy + EduardHovy 454–462 C10-2052 hovy-etal-2010-whats @@ -1913,7 +1913,7 @@ Mining Large-scale Comparable Corpora from <fixed-case>C</fixed-case>hinese-<fixed-case>E</fixed-case>nglish News Collections - DegenHuang + DegenHuang LianZhao LishuangLi HaitaoYu @@ -1954,7 +1954,7 @@ Effective Constituent Projection across Languages WenbinJiang - YajuanLv + YajuanLv YangLiu QunLiu 516–524 @@ -1982,7 +1982,7 @@ Generative Alignment and Semantic Parsing for Learning from Ambiguous Supervision JoohyunKim - RaymondMooney + RaymondMooney 543–551 C10-2062 kim-mooney-2010-generative @@ -2009,7 +2009,7 @@ Using Syntactic and Semantic based Relations for Dialogue Act Recognition - TinaKlüwer + TinaKlüwer HansUszkoreit FeiyuXu 570–578 @@ -2023,15 +2023,15 @@ MikioNakano KotaroFunakoshi TetsuyaOgata - Hiroshi G.Okuno + Hiroshi G.Okuno 579–587 C10-2066 komatani-etal-2010-automatic <fixed-case>DL</fixed-case> Meet <fixed-case>FL</fixed-case>: A Bidirectional Mapping between Ontologies and Linguistic Knowledge - Hans-UlrichKrieger - UlrichSchäfer + Hans-UlrichKrieger + UlrichSchäfer 588–596 C10-2067 krieger-schafer-2010-dl @@ -2049,7 +2049,7 @@ Jey HanLau DavidNewman SarvnazKarimi - TimothyBaldwin + TimothyBaldwin 605–613 C10-2069 lau-etal-2010-best @@ -2061,19 +2061,19 @@ BeateDorow ChristianScheible UlrichHeid - HinrichSchütze + HinrichSchütze 614–622 C10-2070 laws-etal-2010-linguistically A Post-processing Approach to Statistical Word Alignment Reflecting Alignment Tendency between Part-of-speeches - Jae-HeeLee + Jae-HeeLee Seung-WookLee GumwonHong Young-SookHwang Sang-BumKim - Hae-ChangRim + Hae-ChangRim 623–629 C10-2071 lee-etal-2010-post @@ -2091,8 +2091,8 @@ <fixed-case>EM</fixed-case>-based Hybrid Model for Bilingual Terminology Extraction from Comparable Corpora - LianhauLee - AitiAw + LianhauLee + AitiAw MinZhang HaizhouLi 639–646 @@ -2101,8 +2101,8 @@ Text Mining for Automatic Image Tagging - Chee WeeLeong - RadaMihalcea + Chee WeeLeong + RadaMihalcea SamerHassan 647–655 C10-2074 @@ -2112,8 +2112,8 @@ Unsupervised Discriminative Language Model Training for Machine Translation using Simulated Confusion Sets ZhifeiLi ZiyuanWang - SanjeevKhudanpur - JasonEisner + SanjeevKhudanpur + JasonEisner 656–664 C10-2075 li-etal-2010-unsupervised @@ -2121,9 +2121,9 @@ Combining Constituent and Dependency Syntactic Views for <fixed-case>C</fixed-case>hinese Semantic Role Labeling ShiqiLi - QinLu - TiejunZhao - PengyuanLiu + QinLu + TiejunZhao + PengyuanLiu HanjingLi 665–673 C10-2076 @@ -2161,10 +2161,10 @@ Reexamination on Potential for Personalization in Web Search DarenLi - MuyunYang - HaoLiangQi + MuyunYang + HaoLiangQi ShengLi - TiejunZhao + TiejunZhao 701–709 C10-2080 li-etal-2010-reexamination @@ -2180,9 +2180,9 @@ <fixed-case>T</fixed-case>ibetan Number Identification Based on Classification of Number Components in <fixed-case>T</fixed-case>ibetan Word Segmentation - HuidanLiu - WeinaZhao - MinghuaNuo + HuidanLiu + WeinaZhao + MinghuaNuo LiJiang JianWu YepingHe @@ -2215,9 +2215,9 @@ Visually and Phonologically Similar Characters in Incorrect Simplified <fixed-case>C</fixed-case>hinese Words Chao-LinLiu - Min-HuaLai + Min-HuaLai Yi-HsuanChuang - Chia-YingLee + Chia-YingLee 739–747 C10-2085 liu-etal-2010-visually @@ -2226,9 +2226,9 @@ Head-modifier Relation based Non-lexical Reordering Model for Phrase-Based Translation ShuiLiu ShengLi - TiejunZhao + TiejunZhao MinZhang - PengyuanLiu + PengyuanLiu 748–756 C10-2086 liu-etal-2010-head @@ -2236,9 +2236,9 @@ Dependency-Driven Feature-based Learning for Extracting Protein-Protein Interactions from Biomedical Text BingLiu - LonghuaQian + LonghuaQian HonglingWang - GuodongZhou + GuodongZhou 757–765 C10-2087 liu-etal-2010-dependency @@ -2271,9 +2271,9 @@ Finite-state Scriptural Translation - M. G. AbbasMalik - ChristianBoitet - PushpakBhattacharyya + M. G. AbbasMalik + ChristianBoitet + PushpakBhattacharyya 791–800 C10-2091 malik-etal-2010-finite @@ -2298,8 +2298,8 @@ Instance Sense Induction from Attribute Sets RicardoMartin-Brualla EnriqueAlfonseca - MariusPasca - KeithHall + MariusPasca + KeithHall EnriqueRobledo-Arnuncio MassimilianoCiaramita 819–827 @@ -2336,7 +2336,7 @@ Imbalanced Classification Using Dictionary-based Prototypes and Hierarchical Decision Rules for Entity Sense Disambiguation TingtingMu XinglongWang - Jun’ichiTsujii + Jun’ichiTsujii SophiaAnaniadou 851–859 C10-2098 @@ -2345,7 +2345,7 @@ A Vector Space Model for Subjectivity Classification in <fixed-case>U</fixed-case>rdu aided by Co-Training SmruthiMukund - RohiniSrihari + RohiniSrihari 860–868 C10-2099 mukund-srihari-2010-vector @@ -2404,7 +2404,7 @@ A Study on Position Information in Document Summarization YouOuyang WenjieLi - QinLu + QinLu RenxianZhang 919–927 C10-2106 @@ -2421,7 +2421,7 @@ Word Space Modeling for Measuring Semantic Specificity in <fixed-case>C</fixed-case>hinese Ching-FenPan - Shu-KaiHsieh + Shu-KaiHsieh 937–945 C10-2108 pan-hsieh-2010-word @@ -2429,14 +2429,14 @@ <fixed-case>MT</fixed-case> Error Detection for Cross-Lingual Question Answering KristenParton - KathleenMcKeown + KathleenMcKeown 946–954 C10-2109 parton-mckeown-2010-mt The Role of Queries in Ranking Labeled Instances Extracted from Text - MariusPaşca + MariusPaşca 955–962 C10-2110 pasca-2010-role @@ -2460,8 +2460,8 @@ Filling Knowledge Gaps in Text for Machine Reading - AnselmoPeñas - EduardHovy + AnselmoPeñas + EduardHovy 979–987 C10-2113 penas-hovy-2010-filling @@ -2494,8 +2494,8 @@ Automatic Committed Belief Tagging VinodkumarPrabhakaran - OwenRambow - MonaDiab + OwenRambow + MonaDiab 1014–1022 C10-2117 prabhakaran-etal-2010-automatic @@ -2503,16 +2503,16 @@ Realization of Discourse Relations by Other Means: Alternative Lexicalizations RashmiPrasad - AravindJoshi - BonnieWebber + AravindJoshi + BonnieWebber 1023–1031 C10-2118 prasad-etal-2010-realization Designing Agreement Features for Realization Ranking - RajakrishnanRajkumar - MichaelWhite + RajakrishnanRajkumar + MichaelWhite 1032–1040 C10-2119 rajkumar-white-2010-designing @@ -2521,7 +2521,7 @@ Web-based and combined language models: a case study on noun compound identification CarlosRamisch AlineVillavicencio - ChristianBoitet + ChristianBoitet 1041–1049 C10-2120 ramisch-etal-2010-web @@ -2538,10 +2538,10 @@ Multilingual Summarization Evaluation without Human Models HoracioSaggion - Juan-ManuelTorres-Moreno - Iriada Cunha + Juan-ManuelTorres-Moreno + Iriada Cunha EricSanJuan - PatriciaVelázquez-Morales + PatriciaVelázquez-Morales 1059–1067 C10-2122 saggion-etal-2010-multilingual @@ -2549,15 +2549,15 @@ Argument Optionality in the <fixed-case>L</fixed-case>in<fixed-case>GO</fixed-case> Grammar Matrix SafiyyahSaleem - Emily M.Bender + Emily M.Bender 1068–1076 C10-2123 saleem-bender-2010-argument Log-linear weight optimisation via <fixed-case>B</fixed-case>ayesian Adaptation in Statistical Machine Translation - GermánSanchis-Trilles - FranciscoCasacuberta + GermánSanchis-Trilles + FranciscoCasacuberta 1077–1085 C10-2124 sanchis-trilles-casacuberta-2010-log @@ -2565,18 +2565,18 @@ A Global Relaxation Labeling Approach to Coreference Resolution EmiliSapena - LluísPadró - JordiTurmo + LluísPadró + JordiTurmo 1086–1094 C10-2125 sapena-etal-2010-global “Expresses-an-opinion-about”: using corpus statistics in an information extraction approach to opinion mining - Asad B.Sayeed + Asad B.Sayeed Hieu C.Nguyen Timothy J.Meyer - AmyWeinberg + AmyWeinberg 1095–1103 C10-2126 sayeed-etal-2010-expresses @@ -2586,7 +2586,7 @@ ChristianScheible FlorianLaws LukasMichelbacher - HinrichSchütze + HinrichSchütze 1104–1112 C10-2127 scheible-etal-2010-sentiment @@ -2602,7 +2602,7 @@ Informed ways of improving data-driven dependency parsing for <fixed-case>G</fixed-case>erman WolfgangSeeker BerndBohnet - LiljaØvrelid + LiljaØvrelid JonasKuhn 1122–1130 C10-2129 @@ -2632,7 +2632,7 @@ Towards Automatic Building of Document Keywords JoaquimSilva - GabrielLopes + GabrielLopes 1149–1157 C10-2132 silva-lopes-2010-towards @@ -2641,7 +2641,7 @@ Shallow Information Extraction from Medical Forum Data ParikshitSondhi ManishGupta - ChengXiangZhai + ChengXiangZhai JuliaHockenmaier 1158–1166 C10-2133 @@ -2670,7 +2670,7 @@ YangLiu HaitaoMi HongmeiZhao - YajuanLv + YajuanLv QunLiu 1185–1193 C10-2136 @@ -2679,7 +2679,7 @@ Semi-supervised Semantic Pattern Discovery with Guidance from Unsupervised Pattern Clusters AngSun - RalphGrishman + RalphGrishman 1194–1202 C10-2137 sun-grishman-2010-semi @@ -2688,16 +2688,16 @@ Utilizing Variability of Time and Term Content, within and across Users in Session Detection ShuqiSun ShengLi - MuyunYang - HaoliangQi - TiejunZhao + MuyunYang + HaoliangQi + TiejunZhao 1203–1210 C10-2138 sun-etal-2010-utilizing Word-based and Character-based Word Segmentation Models: Comparison and Combination - WeiweiSun + WeiweiSun 1211–1219 C10-2139 sun-2010-word @@ -2705,8 +2705,8 @@ Confidence Measures for Error Discrimination in an Interactive Predictive Parsing Framework RicardoSánchez-Sáez - Joan AndreuSánchez - José MiguelBenedí + Joan AndreuSánchez + José MiguelBenedí 1220–1228 C10-2140 sanchez-saez-etal-2010-confidence @@ -2714,7 +2714,7 @@ Learning Web Query Patterns for Imitating <fixed-case>W</fixed-case>ikipedia Articles ShoheiTanaka - NaoakiOkazaki + NaoakiOkazaki MitsuruIshizuka 1229–1237 C10-2141 @@ -2769,7 +2769,7 @@ <fixed-case>U</fixed-case>rdu and <fixed-case>H</fixed-case>indi: Translation and sharing of linguistic resources KarthikVisweswariah VijilChenthamarakshan - NandakishoreKambhatla + NandakishoreKambhatla 1283–1291 C10-2147 visweswariah-etal-2010-urdu @@ -2777,7 +2777,7 @@ Phrase Structure Parsing with Dependency Structure ZhiguoWang - ChengqingZong + ChengqingZong 1292–1300 C10-2148 wang-zong-2010-phrase @@ -2785,16 +2785,16 @@ Automatic Generation of Semantic Fields for Annotating Web Images GangWang - Tat SengChua + Tat SengChua Chong-WahNgo - Yong ChengWang + Yong ChengWang 1301–1309 C10-2149 wang-etal-2010-automatic-generation Automatic Extraction of Cue Phrases for Cross-Corpus Dialogue Act Classification - NickWebb + NickWebb MichaelFerguson 1310–1317 C10-2150 @@ -2816,7 +2816,7 @@ <fixed-case>MIEA</fixed-case>: a Mutual Iterative Enhancement Approach for Cross-Domain Sentiment Classification QiongWu SongboTan - XueqiCheng + XueqiCheng MiyiDuan 1327–1335 C10-2152 @@ -2825,7 +2825,7 @@ Exploring the Use of Word Relation Features for Sentiment Classification RuiXia - ChengqingZong + ChengqingZong 1336–1344 C10-2153 xia-zong-2010-exploring @@ -2904,14 +2904,14 @@ YusukeMiyao XiangliWang TakuyaMatsuzaki - JunichiTsujii + JunichiTsujii 1417–1425 C10-2162 yu-etal-2010-semi Cross-Lingual Induction for Deep Broad-Coverage Syntax: A Case Study on <fixed-case>G</fixed-case>erman Participles - SinaZarrieß + SinaZarrieß AoifeCahill JonasKuhn ChristianRohrer @@ -2963,10 +2963,10 @@ Chart Pruning for Fast Lexicalised-Grammar Parsing YueZhang - Byung-GyuAhn + Byung-GyuAhn StephenClark CurtVan Wyk - James R.Curran + James R.Curran LauraRimell 1471–1479 C10-2168 @@ -2983,7 +2983,7 @@ Sentence Ordering with Event-Enriched Semantics and Two-Layered Clustering for Multi-Document News Summarization RenxianZhang WenjieLi - QinLu + QinLu 1489–1497 C10-2170 zhang-etal-2010-sentence @@ -2999,12 +2999,12 @@ Predicting Discourse Connectives for Implicit Discourse Relation Recognition - Zhi-MinZhou + Zhi-MinZhou YuXu - Zheng-YuNiu - ManLan + Zheng-YuNiu + ManLan JianSu - Chew LimTan + Chew LimTan 1507–1514 C10-2172 zhou-etal-2010-predicting @@ -3021,7 +3021,7 @@ Dual-Space Re-ranking Model for Document Retrieval DongZhou - SeamusLawless + SeamusLawless JinmingMin VincentWade 1524–1532 @@ -3031,10 +3031,10 @@ All in Strings: a Powerful String-based Automatic <fixed-case>MT</fixed-case> Evaluation Metric with Multiple Granularities JunguoZhu - MuyunYang + MuyunYang BoWang ShengLi - TiejunZhao + TiejunZhao 1533–1540 C10-2175 zhu-etal-2010-strings @@ -3096,7 +3096,7 @@ <fixed-case>P</fixed-case>y<fixed-case>CWN</fixed-case>: a Python Module for <fixed-case>C</fixed-case>hinese <fixed-case>W</fixed-case>ordnet Yueh-ChengWu - Shu-KaiHsieh + Shu-KaiHsieh 5–8 C10-3002 wu-hsieh-2010-pycwn @@ -3105,7 +3105,7 @@ Annotation Tool for Discourse in <fixed-case>PDT</fixed-case> JiříMírovský LucieMladová - ZdeněkŽabokrtský + ZdeněkŽabokrtský 9–12 C10-3003 mirovsky-etal-2010-annotation-tool @@ -3121,7 +3121,7 @@ <fixed-case>H</fixed-case>ave2eat: a Restaurant Finder with Review Summarization for Mobile Phones - GiuseppeFabbrizio + GiuseppeFabbrizio NarendraGupta SvevaBesana PremkumarMani @@ -3168,7 +3168,7 @@ <fixed-case>P</fixed-case>an<fixed-case>L</fixed-case>ex and <fixed-case>LEXTRACT</fixed-case>: Translating all Words of all Languages of the World - TimothyBaldwin + TimothyBaldwin JonathanPool SusanColowick 37–40 @@ -3187,8 +3187,8 @@ Wei-TeChen Su-ChuLin Shu-LingHuang - You-ShanChung - Keh-JiannChen + You-ShanChung + Keh-JiannChen 45–48 C10-3012 chen-etal-2010-e @@ -3214,7 +3214,7 @@ Multiword Expressions in the wild? The mwetoolkit comes in handy CarlosRamisch AlineVillavicencio - ChristianBoitet + ChristianBoitet 57–60 C10-3015 ramisch-etal-2010-multiword diff --git a/data/xml/C12.xml b/data/xml/C12.xml index 8c0ddfc2de..d1b5f725c8 100644 --- a/data/xml/C12.xml +++ b/data/xml/C12.xml @@ -5,7 +5,7 @@ Proceedings of COLING 2012 C12-1 MartinKay - ChristianBoitet + ChristianBoitet The COLING 2012 Organizing Committee
Mumbai, India
December @@ -20,7 +20,7 @@ Multi-Dimensional Feature Merger for Question Answering ApoorvAgarwal J. WilliamMurdock - JenniferChu-Carroll + JenniferChu-Carroll AdamLally AdityaKalyanpur 1–16 @@ -40,8 +40,8 @@
Automatic Detection of Point of View Differences in <fixed-case>W</fixed-case>ikipedia - KhalidAl Khatib - HinrichSchütze + KhalidAl Khatib + HinrichSchütze CathleenKantner 33–50 C12-1003 @@ -50,14 +50,14 @@ <fixed-case>S</fixed-case>peed<fixed-case>R</fixed-case>ead: A Fast Named Entity Recognition Pipeline RamiAl-Rfou’ - StevenSkiena + StevenSkiena 51–66 C12-1004 al-rfou-skiena-2012-speedread Experiments with Term Translation - MihaelArcan + MihaelArcan ChristianFedermann PaulBuitelaar 67–82 @@ -69,7 +69,7 @@ MohammedAttia YounesSamih KhaledShaalan - Josefvan Genabith + Josefvan Genabith 83–96 C12-1006 attia-etal-2012-floating @@ -77,8 +77,8 @@ Contribution of Complex Lexical Information to Solve Syntactic Ambiguity in <fixed-case>B</fixed-case>asque AitziberAtutxa - EnekoAgirre - KepaSarasola + EnekoAgirre + KepaSarasola 97–114 C12-1007 atutxa-etal-2012-contribution @@ -103,10 +103,10 @@ Translation Quality-Based Supplementary Data Selection by Incremental Update of Translation Models PratyushBanerjee - Sudip KumarNaskar + Sudip KumarNaskar JohannRoturier AndyWay - Josefvan Genabith + Josefvan Genabith 149–166 C12-1010 banerjee-etal-2012-translation @@ -133,18 +133,18 @@ LucianoBarbosa Vivek KumarRangarajan Sridhar MahsaYarmohammadi - SrinivasBangalore + SrinivasBangalore 201–214 C12-1013 barbosa-etal-2012-harvesting An Evaluation of Statistical Post-Editing Systems Applied to <fixed-case>RBMT</fixed-case> and <fixed-case>SMT</fixed-case> Systems - HannaBéchara - RaphaëlRubino + HannaBéchara + RaphaëlRubino YifanHe YanjunMa - Josefvan Genabith + Josefvan Genabith 215–230 C12-1014 bechara-etal-2012-evaluation @@ -152,19 +152,19 @@ <fixed-case>P</fixed-case>rague <fixed-case>D</fixed-case>ependency <fixed-case>T</fixed-case>reebank 2.5 – a Revisited Version of <fixed-case>PDT</fixed-case> 2.0 EduardBejček - JarmilaPanevová + JarmilaPanevová JanPopelka PavelStraňák MagdaŠevčíková JanŠtěpánek - ZdeněkŽabokrtský + ZdeněkŽabokrtský 231–246 C12-1015 bejcek-etal-2012-prague Deriving a Lexicon for a Precision Grammar from Language Documentation Resources: A Case Study of <fixed-case>C</fixed-case>hintang - Emily M.Bender + Emily M.Bender RobertSchikowski BalthasarBickel 247–262 @@ -173,7 +173,7 @@ Quantifying Semantics using Complex Network Analysis - ChrisBiemann + ChrisBiemann StefanieRoos KarstenWeihe 263–278 @@ -183,7 +183,7 @@ Improvements to Training an <fixed-case>RNN</fixed-case> parser RichardBillingsley - JamesCurran + JamesCurran 279–294 C12-1018 billingsley-curran-2012-improvements @@ -205,17 +205,17 @@ HienNguyen NirwanSharma Anne-MarieRobinson - ElaineO’Mahony + ElaineO’Mahony BenDarvill - ChrisMellish - Renévan der Wal + ChrisMellish + Renévan der Wal 311–324 C12-1020 blake-etal-2012-natural Studying the Effect of Input Size for <fixed-case>B</fixed-case>ayesian Word Segmentation on the <fixed-case>P</fixed-case>rovidence Corpus - BenjaminBörschinger + BenjaminBörschinger KatherineDemuth MarkJohnson 325–340 @@ -225,8 +225,8 @@ <fixed-case>B</fixed-case>ayesian Language Modelling of <fixed-case>G</fixed-case>erman Compounds Jan A.Botha - ChrisDyer - PhilBlunsom + ChrisDyer + PhilBlunsom 341–356 C12-1022 botha-etal-2012-bayesian @@ -235,7 +235,7 @@ Can <fixed-case>S</fixed-case>panish Be Simpler? <fixed-case>L</fixed-case>ex<fixed-case>S</fixed-case>i<fixed-case>S</fixed-case>: Lexical Simplification for <fixed-case>S</fixed-case>panish StefanBott LuzRello - BiljanaDrndarevic + BiljanaDrndarevic HoracioSaggion 357–374 C12-1023 @@ -262,9 +262,9 @@ Identifying <fixed-case>U</fixed-case>rdu Complex Predication via Bigram Extraction MiriamButt TinaBögel - AnnetteHautli + AnnetteHautli SebastianSulger - TafseerAhmed + TafseerAhmed 409–424 C12-1026 butt-etal-2012-identifying @@ -272,7 +272,7 @@ Native Language Identification using Recurring <tex-math>n</tex-math>-grams – Investigating Abstraction and Domain Dependence SerhiyBykh - DetmarMeurers + DetmarMeurers 425–440 C12-1027 bykh-meurers-2012-native @@ -281,7 +281,7 @@ Analysis and Enhancement of Wikification for Microblogs with Context Expansion TaylorCassidy HengJi - Lev-ArieRatinov + Lev-ArieRatinov ArkaitzZubiaga HongzhaoHuang 441–456 @@ -291,7 +291,7 @@ On the Effectiveness of using Sentence Compression Models for Query-Focused Multi-Document Summarization YlliasChali - Sadid A.Hasan + Sadid A.Hasan 457–474 C12-1029 chali-hasan-2012-effectiveness @@ -299,14 +299,14 @@ Towards Automatic Topical Question Generation YlliasChali - Sadid A.Hasan + Sadid A.Hasan 475–492 C12-1030 chali-hasan-2012-towards Adjective Deletion for Linguistic Steganography and Secret Sharing - Ching-YunChang + Ching-YunChang StephenClark 493–510 C12-1031 @@ -314,7 +314,7 @@ The Secret’s in the Word Order: Text-to-Text Generation for Linguistic Steganography - Ching-YunChang + Ching-YunChang StephenClark 511–528 C12-1032 @@ -345,7 +345,7 @@ LiqiangNie XiaHu XiangyuWang - Tat-SengChua + Tat-SengChua XiaomingZhang 561–576 C12-1035 @@ -362,17 +362,17 @@ Extraction of <fixed-case>R</fixed-case>ussian Sentiment Lexicon for Product Meta-Domain IliaChetviorkin - NataliaLoukachevitch + NataliaLoukachevitch 593–610 C12-1037 chetviorkin-loukachevitch-2012-extraction Problems in Evaluating Grammatical Error Detection Systems - MartinChodorow + MartinChodorow MarkusDickinson RossIsrael - JoelTetreault + JoelTetreault 611–628 C12-1038 chodorow-etal-2012-problems @@ -387,12 +387,12 @@ A Hybrid Approach to Finding Phenotype Candidates in Genetic Texts NigelCollier - Mai-VuTran - Hoang-QuynhLe + Mai-VuTran + Hoang-QuynhLe AnikaOellrich AiKawazoe MartinHall-May - DietrichRebholz-Schuhmann + DietrichRebholz-Schuhmann 647–662 C12-1040 collier-etal-2012-hybrid @@ -411,7 +411,7 @@ BobCoyne AlexKlapheke MasoudRouhizadeh - RichardSproat + RichardSproat DanielBauer 679–694 C12-1042 @@ -435,7 +435,7 @@ A Computational Cognitive Model for Semantic Sub-Network Extraction from Natural Language Queries - SumanDeb Roy + SumanDeb Roy WenjunZeng 727–744 C12-1045 @@ -444,7 +444,7 @@ Extraction of Domain-Specific Bilingual Lexicon from Comparable Corpora: Compositional Translation and Ranking EstelleDelpech - BéatriceDaille + BéatriceDaille EmmanuelMorin ClaireLemaire 745–762 @@ -457,7 +457,7 @@ ZhuminChen FuruWei MingZhou - Heung-YeungShum + Heung-YeungShum 763–780 C12-1047 duan-etal-2012-twitter @@ -497,7 +497,7 @@ Stacking of Dependency and Phrase Structure Parsers - RichárdFarkas + RichárdFarkas BerndBohnet 849–866 C12-1052 @@ -506,8 +506,8 @@ Semantic Cohesion Model for Phrase-Based <fixed-case>SMT</fixed-case> MinweiFeng - WeiweiSun - HermannNey + WeiweiSun + HermannNey 867–878 C12-1053 feng-etal-2012-semantic @@ -515,10 +515,10 @@ Comparing Taxonomies for Organising Collections of Documents SamuelFernando - MarkHall - EnekoAgirre - AitorSoroa - PaulClough + MarkHall + EnekoAgirre + AitorSoroa + PaulClough MarkStevenson 879–894 C12-1054 @@ -527,15 +527,15 @@ Modeling the Complexity of Manual Annotation Tasks: a Grid of Analysis KarënFort - AdelineNazarenko - SophieRosset + AdelineNazarenko + SophieRosset 895–910 C12-1055 fort-etal-2012-modeling Extractive Multi-Document Summarization with Integer Linear Programming and Support Vector Regression - DimitriosGalanis + DimitriosGalanis GerasimosLampouras IonAndroutsopoulos 911–926 @@ -553,8 +553,8 @@ Structured Term Recognition in Medical Text - MichaelGlass - AlfioGliozzo + MichaelGlass + AlfioGliozzo 943–958 C12-1058 glass-gliozzo-2012-structured @@ -597,8 +597,8 @@ Understanding the Performance of Statistical <fixed-case>MT</fixed-case> Systems: A Linear Regression Framework - FranciscoGuzman - StephanVogel + FranciscoGuzman + StephanVogel 1029–1044 C12-1063 guzman-vogel-2012-understanding @@ -607,7 +607,7 @@ Geolocation Prediction in Social Media Data by Finding Location Indicative Words BoHan PaulCook - TimothyBaldwin + TimothyBaldwin 1045–1062 C12-1064 han-etal-2012-geolocation @@ -616,7 +616,7 @@ Readability Classification for <fixed-case>G</fixed-case>erman using Lexical, Syntactic, and Morphological Features JuliaHancke SowmyaVajjala - DetmarMeurers + DetmarMeurers 1063–1080 C12-1065 hancke-etal-2012-readability @@ -626,7 +626,7 @@ KazuoHara IkumiSuzuki MasashiShimbo - YujiMatsumoto + YujiMatsumoto 1081–1096 C12-1066 hara-etal-2012-walk @@ -653,8 +653,8 @@ Bridging the Gap between Intrinsic and Perceived Relevance in Snippet Generation JingHe - PabloDuboue - Jian-YunNie + PabloDuboue + Jian-YunNie 1129–1146 C12-1069 he-etal-2012-bridging @@ -682,7 +682,7 @@ Statistical Method of Building Dialect Language Models for <fixed-case>ASR</fixed-case> Systems NaokiHirayama ShinsukeMori - Hiroshi G.Okuno + Hiroshi G.Okuno 1179–1194 C12-1072 hirayama-etal-2012-statistical @@ -692,7 +692,7 @@ MartinHolub VincentKríž SilvieCinková - EckhardBick + EckhardBick 1195–1210 C12-1073 holub-etal-2012-tailored @@ -700,7 +700,7 @@ Method Mention Extraction from Scientific Research Papers HospiceHoungbo - Robert E.Mercer + Robert E.Mercer 1211–1222 C12-1074 houngbo-mercer-2012-method @@ -708,7 +708,7 @@ Context-Enhanced Personalized Social Summarization PoHu - DonghongJi + DonghongJi ChongTeng YujingGuo 1223–1238 @@ -727,7 +727,7 @@ JiaweiHan AliceLeung JohnHancock - ClareVoss + ClareVoss 1239–1256 C12-1076 huang-etal-2012-tweet @@ -736,7 +736,7 @@ Improved <fixed-case>C</fixed-case>ombinatory <fixed-case>C</fixed-case>ategorial <fixed-case>G</fixed-case>rammar Induction with Boundary Words and <fixed-case>B</fixed-case>ayesian Inference YunHuang MinZhang - Chew-LimTan + Chew-LimTan 1257–1274 C12-1077 huang-etal-2012-improved @@ -754,7 +754,7 @@ NaoyaInoue EkaterinaOvchinnikova KentaroInui - JerryHobbs + JerryHobbs 1291–1308 C12-1079 inoue-etal-2012-coreference @@ -779,14 +779,14 @@ Towards a Generic and Flexible Citation Classifier Based on a Faceted Classification Scheme CharlesJochim - HinrichSchütze + HinrichSchütze 1343–1358 C12-1082 jochim-schutze-2012-towards Semantics-Based Machine Translation with Hyperedge Replacement Grammars - BevanJones + BevanJones JacobAndreas DanielBauer Karl MoritzHermann @@ -806,9 +806,9 @@ Improving Topic Classification for Highly Inflective Languages - JurgitaKapociute-Dzikiene + JurgitaKapociute-Dzikiene FrederikVaassen - WalterDaelemans + WalterDaelemans AlgisKrupavičius 1393–1410 C12-1085 @@ -819,7 +819,7 @@ HiroakiKawasaki RyoheiSasano HiroyaTakamura - ManabuOkumura + ManabuOkumura 1411–1424 C12-1086 kawasaki-etal-2012-generating @@ -827,7 +827,7 @@ A Machine Learning Approach for Phenotype Name Recognition MaryamKhordad - Robert E.Mercer + Robert E.Mercer PeterRogan 1425–1440 C12-1087 @@ -835,10 +835,10 @@ Improving <fixed-case>C</fixed-case>ombinatory <fixed-case>C</fixed-case>ategorial <fixed-case>G</fixed-case>rammar Parse Reranking with Dependency Grammar Features - Sunghwan MacKim + Sunghwan MacKim DominickNg MarkJohnson - JamesCurran + JamesCurran 1441–1458 C12-1088 kim-etal-2012-improving @@ -855,7 +855,7 @@ Exploring Local and Global Semantic Information for Event Pronoun Resolution FangKong - GuodongZhou + GuodongZhou 1475–1488 C12-1090 kong-zhou-2012-exploring @@ -864,8 +864,8 @@ Semantic Processing of Compounds in <fixed-case>I</fixed-case>ndian Languages AmbaKulkarni SomaPaul - MalharKulkarni - AnilKumar + MalharKulkarni + AnilKumar NiteshSurtani 1489–1502 C12-1091 @@ -874,8 +874,8 @@ Unsupervised <fixed-case>J</fixed-case>apanese-<fixed-case>C</fixed-case>hinese Opinion Word Translation using Dependency Distance and Feature-Opinion Association Weight Guo-HauLai - Ying-MeiGuo - Richard Tzong-HanTsai + Ying-MeiGuo + Richard Tzong-HanTsai 1503–1518 C12-1092 lai-etal-2012-unsupervised @@ -884,7 +884,7 @@ On-line Trend Analysis with Topic Models: #twitter Trends Detection Topic Model Online Jey HanLau NigelCollier - TimothyBaldwin + TimothyBaldwin 1519–1534 C12-1093 lau-etal-2012-line @@ -899,7 +899,7 @@ Evaluating Different Methods for Automatically Collecting Large General Corpora for <fixed-case>B</fixed-case>asque from the Web - IgorLeturia + IgorLeturia 1553–1570 C12-1095 leturia-2012-evaluating @@ -908,7 +908,7 @@ Approximate Sentence Retrieval for Scalable and Efficient Example-Based Machine Translation JohannesLeveling DebasisGanguly - SandipanDandapat + SandipanDandapat GarethJones 1571–1586 C12-1096 @@ -928,7 +928,7 @@ SujianLi XunWang YeTian - BaobaoChang + BaobaoChang 1603–1618 C12-1098 li-etal-2012-update @@ -936,7 +936,7 @@ Employing Morphological Structures and Sememes for <fixed-case>C</fixed-case>hinese Event Extraction PeifengLi - GuodongZhou + GuodongZhou 1619–1634 C12-1099 li-zhou-2012-employing @@ -944,9 +944,9 @@ Joint Modeling of Trigger Identification and Event Type Determination in <fixed-case>C</fixed-case>hinese Event Extraction PeifengLi - QiaomingZhu + QiaomingZhu HongjunDiao - GuodongZhou + GuodongZhou 1635–1652 C12-1100 li-etal-2012-joint @@ -955,7 +955,7 @@ Integrating Surface and Abstract Features for Robust Cross-Domain <fixed-case>C</fixed-case>hinese Word Segmentation XiaoqingLi KunWang - ChengqingZong + ChengqingZong Keh-YihSu 1653–1670 C12-1101 @@ -1012,7 +1012,7 @@ Recognizing Personal Characteristics of Readers using Eye-Movements and Text Features PascualMartínez-Gómez TadayoshiHara - AkikoAizawa + AkikoAizawa 1747–1762 C12-1107 martinez-gomez-etal-2012-recognizing @@ -1028,7 +1028,7 @@ Using Distributional Similarity for Lexical Expansion in Knowledge-based Word Sense Disambiguation TristanMiller - ChrisBiemann + ChrisBiemann TorstenZesch IrynaGurevych 1781–1796 @@ -1038,7 +1038,7 @@ Revising the Compositional Method for Terminology Acquisition from Comparable Corpora EmmanuelMorin - BéatriceDaille + BéatriceDaille 1797–1810 C12-1110 morin-daille-2012-revising @@ -1063,7 +1063,7 @@ Sentiment Analysis in <fixed-case>T</fixed-case>witter with Lightweight Discourse Analysis SubhabrataMukherjee - PushpakBhattacharyya + PushpakBhattacharyya 1847–1864 C12-1113 mukherjee-bhattacharyya-2012-sentiment @@ -1071,7 +1071,7 @@ <fixed-case>Y</fixed-case>ou<fixed-case>C</fixed-case>at: Weakly Supervised <fixed-case>Y</fixed-case>outube Video Categorization System from Meta Data & User Comments using <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et & <fixed-case>W</fixed-case>ikipedia SubhabrataMukherjee - PushpakBhattacharyya + PushpakBhattacharyya 1865–1882 C12-1114 mukherjee-bhattacharyya-2012-youcat @@ -1081,7 +1081,7 @@ PhilippeMuller StergosAfantenos PascalDenis - NicholasAsher + NicholasAsher 1883–1900 C12-1115 C12-1115e1 @@ -1091,7 +1091,7 @@ Incremental Learning of Affix Segmentation WondwossenMulugeta MichaelGasser - BayeYimam + BayeYimam 1901–1914 C12-1116 mulugeta-etal-2012-incremental @@ -1107,8 +1107,8 @@ Learning Effective and Interpretable Semantic Models using Non-Negative Sparse Embedding BrianMurphy - ParthaTalukdar - TomMitchell + ParthaTalukdar + TomMitchell 1933–1950 C12-1118 murphy-etal-2012-learning @@ -1116,7 +1116,7 @@ Combining <fixed-case>W</fixed-case>ordnet and Morphosyntactic Information in Terminology Clustering AgnieszkaMykowiecka - MalgorzataMarciniak + MalgorzataMarciniak 1951–1962 C12-1119 mykowiecka-marciniak-2012-combining @@ -1131,9 +1131,9 @@ Optimizing for Sentence-Level <fixed-case>BLEU</fixed-case>+1 Yields Short Translations - PreslavNakov - FranciscoGuzman - StephanVogel + PreslavNakov + FranciscoGuzman + StephanVogel 1979–1994 C12-1121 nakov-etal-2012-optimizing @@ -1142,7 +1142,7 @@ Grammarless Parsing for Joint Inference JasonNaradowsky TimVieira - DavidSmith + DavidSmith 1995–2010 C12-1122 naradowsky-etal-2012-grammarless @@ -1165,7 +1165,7 @@ A Comparison of Syntactic Reordering Methods for <fixed-case>E</fixed-case>nglish-<fixed-case>G</fixed-case>erman Machine Translation - JiříNavrátil + JiříNavrátil KarthikVisweswariah AnanthakrishnanRamanathan 2043–2058 @@ -1185,25 +1185,25 @@ DavidNewman NagendraKoilada Jey HanLau - TimothyBaldwin + TimothyBaldwin 2077–2092 C12-1127 newman-etal-2012-bayesian Exploiting Category-Specific Information for Multi-Document Summarization - Jun-PingNg + Jun-PingNg PraveenBysani ZihengLin Min-YenKan - Chew-LimTan + Chew-LimTan 2093–2108 C12-1128 ng-etal-2012-exploiting Improved Temporal Relation Classification using Dependency Parses and Selective Crowdsourced Annotations - Jun-PingNg + Jun-PingNg Min-YenKan 2109–2124 C12-1129 @@ -1212,7 +1212,7 @@ Accurate Unbounded Dependency Recovery using Generalized Categorial Grammars LuanNguyen - MartenVan Schijndel + MartenVan Schijndel WilliamSchuler 2125–2140 C12-1130 @@ -1220,10 +1220,10 @@ <fixed-case>T</fixed-case>ibetan Base Noun Phrase Identification Framework Based on <fixed-case>C</fixed-case>hinese-<fixed-case>T</fixed-case>ibetan Sentence Aligned Corpus - Ming HuaNuo - Hui DanLiu - Wei NaZhao - Long LongMa + Ming HuaNuo + Hui DanLiu + Wei NaZhao + Long LongMa JianWu Zhi MingDing 2141–2158 @@ -1240,7 +1240,7 @@ Attribute Extraction from Conjectural Queries - MariusPaşca + MariusPaşca 2177–2190 C12-1133 pasca-2012-attribute @@ -1257,7 +1257,7 @@ Simple and Effective Parameter Tuning for Domain Adaptation of Statistical Machine Translation PavelPecina AntonioToral - Josefvan Genabith + Josefvan Genabith 2209–2224 C12-1135 pecina-etal-2012-simple @@ -1267,7 +1267,7 @@ YulongPei WenpengYin QifengFan - Lian’enHuang + Lian’enHuang 2225–2242 C12-1136 pei-etal-2012-supervised @@ -1275,7 +1275,7 @@ Collective Search for Concept Disambiguation AnjaPilz - GerhardPaaß + GerhardPaaß 2243–2258 C12-1137 pilz-paass-2012-collective @@ -1283,18 +1283,18 @@ Who’s (Really) the Boss? Perception of Situational Power in Written Interactions VinodkumarPrabhakaran - OwenRambow - MonaDiab + OwenRambow + MonaDiab 2259–2274 C12-1138 prabhakaran-etal-2012-whos Bilingual Lexicon Construction from Comparable Corpora via Dependency Mapping - LonghuaQian + LonghuaQian HonglingWang - GuodongZhou - QiaomingZhu + GuodongZhou + QiaomingZhu 2275–2290 C12-1139 qian-etal-2012-bilingual @@ -1340,7 +1340,7 @@ KeisukeSakaguchi TomoyaMizumoto MamoruKomachi - YujiMatsumoto + YujiMatsumoto 2357–2374 C12-1144 sakaguchi-etal-2012-joint @@ -1351,7 +1351,7 @@ RohitPrasad ShivVitaladevuni MaciejPacula - MichaelCrystal + MichaelCrystal BrianMarx DeniseSloan JenniferVasterling @@ -1363,9 +1363,9 @@ Ant Colony Algorithm for the Unsupervised Word Sense Disambiguation of Texts: Comparison and Evaluation DidierSchwab - JérômeGoulian + JérômeGoulian AndonTchechmedjiev - HervéBlanchon + HervéBlanchon 2389–2404 C12-1146 schwab-etal-2012-ant @@ -1381,18 +1381,18 @@ Improving Supervised Sense Disambiguation with Web-Scale Selectors - H. AndrewSchwartz + H. AndrewSchwartz FernandoGomez - LyleUngar + LyleUngar 2423–2440 C12-1148 schwartz-etal-2012-improving The <fixed-case>F</fixed-case>rench <fixed-case>S</fixed-case>ocial <fixed-case>M</fixed-case>edia <fixed-case>B</fixed-case>ank: a Treebank of Noisy User Generated Content - DjaméSeddah - BenoitSagot - MarieCandito + DjaméSeddah + BenoitSagot + MarieCandito VirginieMouilleron VanessaCombet 2441–2458 @@ -1402,14 +1402,14 @@ Initial Explorations on using <fixed-case>CRF</fixed-case>s for <fixed-case>T</fixed-case>urkish Named Entity Recognition Gökhan AkınŞeker - GülşenEryiğit + GülşenEryiğit 2459–2474 C12-1150 seker-eryigit-2012-initial Differential Evolution Based Feature Selection and Classifier Ensemble for Named Entity Recognition - Utpal KumarSikdar + Utpal KumarSikdar AsifEkbal SriparnaSaha 2475–2490 @@ -1419,8 +1419,8 @@ Noun Group and Verb Group Identification for <fixed-case>H</fixed-case>indi SmritiSingh - Om P.Damani - Vaijayanthi M.Sarma + Om P.Damani + Vaijayanthi M.Sarma 2491–2506 C12-1152 singh-etal-2012-noun @@ -1429,7 +1429,7 @@ Named Entity Recognition System for <fixed-case>U</fixed-case>rdu UmrinderPalSingh VishalGoyal - Gurpreet SinghLehal + Gurpreet SinghLehal 2507–2518 C12-1153 singh-etal-2012-named @@ -1437,19 +1437,19 @@ Easy-first Coreference Resolution VeselinStoyanov - JasonEisner + JasonEisner 2519–2534 C12-1154 stoyanov-eisner-2012-easy Modeling Leadership and Influence in Multi-party Online Discourse - TomekStrzalkowski + TomekStrzalkowski SamiraShaikh TingLiu - George AaronBroadwell + George AaronBroadwell JennyStromer-Galley - SarahTaylor + SarahTaylor UmitBoz VeenaRavishankar XiaoaiRen @@ -1479,10 +1479,10 @@ Native Tongues, Lost and Found: Resources and Empirical Evaluations in Native Language Identification - JoelTetreault + JoelTetreault DanielBlanchard AoifeCahill - MartinChodorow + MartinChodorow 2585–2602 C12-1158 @@ -1500,7 +1500,7 @@ Efficient Discrimination Between Closely Related Languages - JörgTiedemann + JörgTiedemann NikolaLjubešić 2619–2634 C12-1160 @@ -1524,7 +1524,7 @@ Implicitness of Discourse Relations - FatemehTorabi Asr + FatemehTorabi Asr VeraDemberg 2669–2684 C12-1163 @@ -1532,16 +1532,16 @@ Combining Statistical Translation Techniques for Cross-Language Information Retrieval - FerhanTure + FerhanTure JimmyLin - DouglasOard + DouglasOard 2685–2702 C12-1164 ture-etal-2012-combining Multi-way Tensor Factorization for Unsupervised Lexical Acquisition - TimVan de Cruys + TimVan de Cruys LauraRimell ThierryPoibeau AnnaKorhonen @@ -1552,7 +1552,7 @@ Sub-corpora Sampling with an Application to Bilingual Lexicon Extraction IvanVulić - Marie-FrancineMoens + Marie-FrancineMoens 2721–2738 C12-1166 vulic-moens-2012-sub @@ -1561,7 +1561,7 @@ The Utility of Discourse Structure in Identifying Resolved Threads in Technical User Forums LiWang Su NamKim - TimothyBaldwin + TimothyBaldwin 2739–2756 C12-1167 wang-etal-2012-utility @@ -1579,7 +1579,7 @@ <fixed-case>C</fixed-case>hinese Evaluative Information Analysis YiouWang - Jun’ichiKazama + Jun’ichiKazama TakuyaKawada KentaroTorisawa 2773–2788 @@ -1588,7 +1588,7 @@ Harnessing the <fixed-case>CRF</fixed-case> Complexity with Domain-Specific Constraints. The Case of Morphosyntactic Tagging of a Highly Inflected Language - JakubWaszczuk + JakubWaszczuk 2789–2804 C12-1170 waszczuk-2012-harnessing @@ -1598,7 +1598,7 @@ YotaroWatanabe JuntaMizuno EricNichols - NaoakiOkazaki + NaoakiOkazaki KentaroInui 2805–2820 C12-1171 @@ -1621,7 +1621,7 @@ HitoshiYamamoto ShigekiMatsuda ChioriHori - HidekiKashioka + HidekiKashioka 2835–2850 C12-1173 wu-etal-2012-factored @@ -1646,7 +1646,7 @@ Unsupervised Discriminative Induction of Synchronous Grammar for Machine Translation XinyanXiao - DeyiXiong + DeyiXiong YangLiu QunLiu ShouxunLin @@ -1658,8 +1658,8 @@ Paraphrasing for Style WeiXu AlanRitter - BillDolan - RalphGrishman + BillDolan + RalphGrishman ColinCherry 2899–2914 C12-1177 @@ -1676,9 +1676,9 @@ <fixed-case>ISO</fixed-case>-<fixed-case>T</fixed-case>ime<fixed-case>ML</fixed-case> Event Extraction in <fixed-case>P</fixed-case>ersian Text YadollahYaghoobzadeh - GholamrezaGhassem-sani + GholamrezaGhassem-sani Seyed AbolghasemMirroshandel - MahbanehEshaghzadeh + MahbanehEshaghzadeh 2931–2944 C12-1179 yaghoobzadeh-etal-2012-iso @@ -1700,7 +1700,7 @@ WenpengYin LifuHuang YulongPei - Lian’enHuang + Lian’enHuang 2961–2976 C12-1181 yin-etal-2012-relationlistwise @@ -1710,7 +1710,7 @@ WenpengYin YulongPei FanZhang - Lian’enHuang + Lian’enHuang 2977–2992 C12-1182 yin-etal-2012-senttopic @@ -1737,7 +1737,7 @@ FeifeiZhai JiajunZhang YuZhou - ChengqingZong + ChengqingZong 3019–3036 C12-1185 zhai-etal-2012-machine @@ -1747,7 +1747,7 @@ FeifeiZhai JiajunZhang YuZhou - ChengqingZong + ChengqingZong 3037–3054 C12-1186 zhai-etal-2012-tree @@ -1777,21 +1777,21 @@ A Lazy Learning Model for Entity Linking using Query-Specific Information WeiZhang JianSu - Chew-LimTan + Chew-LimTan YunboCao - Chin-YewLin + Chin-YewLin 3089–3104 C12-1189 zhang-etal-2012-lazy The Use of Dependency Relation Graph to Enhance the Term Weighting in Question Retrieval - WeinanZhang - ZhaoyanMing + WeinanZhang + ZhaoyanMing YuZhang LiqiangNie TingLiu - Tat-SengChua + Tat-SengChua 3105–3120 C12-1190 zhang-etal-2012-use @@ -1799,7 +1799,7 @@ Long-Tail Distributions and Unsupervised Learning of Morphology QiuyeZhao - MitchMarcus + MitchMarcus 3121–3136 C12-1191 zhao-marcus-2012-long @@ -1837,7 +1837,7 @@ Proceedings of COLING 2012: Posters C12-2 MartinKay - ChristianBoitet + ChristianBoitet The COLING 2012 Organizing Committee
Mumbai, India
December @@ -1850,7 +1850,7 @@ <fixed-case>K</fixed-case>-Best Spanning Tree Dependency Parsing With Verb Valency Lexicon Reranking - ZeljkoAgic + ZeljkoAgic 1–12 C12-2001 agic-2012-k @@ -1867,7 +1867,7 @@ Automatic Bilingual Phrase Extraction from Comparable Corpora AhmetAker YangFeng - RobertGaizauskas + RobertGaizauskas 23–32 C12-2003 aker-etal-2012-automatic @@ -1882,14 +1882,14 @@ Mapping <fixed-case>A</fixed-case>rabic <fixed-case>W</fixed-case>ikipedia into the Named Entities Taxonomy FahdAlotaibi - MarkLee + MarkLee 43–52 C12-2005 alotaibi-lee-2012-mapping Probabilistic Refinement Algorithms for the Generation of Referring Expressions - RominaAltamirano + RominaAltamirano CarlosAreces LucianaBenotti 53–62 @@ -1905,9 +1905,9 @@ Cross-Lingual Sentiment Analysis for <fixed-case>I</fixed-case>ndian Languages using Linked <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>ets - BalamuraliA.R. + BalamuraliA.R. AdityaJoshi - PushpakBhattacharyya + PushpakBhattacharyya 73–82 C12-2008 a-r-etal-2012-cross @@ -1923,7 +1923,7 @@ Collocation Extraction using Parallel Corpus - KavoshAsadi Atui + KavoshAsadi Atui HeshaamFaili KavehAssadi Atuie 93–102 @@ -1936,7 +1936,7 @@ PavelPecina YounesSamih KhaledShaalan - Josefvan Genabith + Josefvan Genabith 103–112 C12-2011 attia-etal-2012-improved @@ -1990,7 +1990,7 @@ Does Similarity Matter? The Case of Answer Extraction from Technical Discussion Forums - RoseCatherine + RoseCatherine AmitSingh RashmiGangadharaiah DineshRaghu @@ -2018,7 +2018,7 @@ Impact of Less Skewed Distributions on Efficiency and Effectiveness of Biomedical Relation Extraction Md. Faisal MahbubChowdhury - AlbertoLavelli + AlbertoLavelli 205–216 C12-2021 chowdhury-lavelli-2012-impact @@ -2026,7 +2026,7 @@ Lattice Rescoring for Speech Recognition using Large Scale Distributed Language Models EuisokChung - Hyung-BaeJeon + Hyung-BaeJeon Jeon-GuePark Yun-KeunLee 217–224 @@ -2037,7 +2037,7 @@ Morphological Analyzer for Affix Stacking Languages: A Case Study of <fixed-case>M</fixed-case>arathi RajDabre ArchanaAmberkar - PushpakBhattacharyya + PushpakBhattacharyya 225–234 C12-2023 dabre-etal-2012-morphological @@ -2054,15 +2054,15 @@ Coreference Clustering using Column Generation JanDe Belder - Marie-FrancineMoens + Marie-FrancineMoens 245–254 C12-2025 de-belder-moens-2012-coreference Metric Learning for Graph-Based Domain Adaptation - ParamveerDhillon - ParthaTalukdar + ParamveerDhillon + ParthaTalukdar KobyCrammer 255–264 C12-2026 @@ -2072,7 +2072,7 @@ Automatic Hashtag Recommendation for Microblogs using Topic-Specific Translation Model ZhuoyeDing QiZhang - XuanjingHuang + XuanjingHuang 265–274 C12-2027 ding-etal-2012-automatic @@ -2087,15 +2087,15 @@ Token Level Identification of Linguistic Code Switching HebaElfardy - MonaDiab + MonaDiab 287–296 C12-2029 elfardy-diab-2012-token Parenthetical Classification for Information Extraction - IsmailEl Maarouf - JeanneVillaneau + IsmailEl Maarouf + JeanneVillaneau 297–308 C12-2030 el-maarouf-villaneau-2012-parenthetical @@ -2113,8 +2113,8 @@ Dealing with Input Noise in Statistical Machine Translation - LluisFormiga - Jose A. R.Fonollosa + LluisFormiga + Jose A. R.Fonollosa 319–328 C12-2032 formiga-fonollosa-2012-dealing @@ -2134,7 +2134,7 @@ UtpalGarain ArjunDas DavidDoermann - DouglasOard + DouglasOard 339–348 C12-2034 garain-etal-2012-leveraging @@ -2163,7 +2163,7 @@ Improving Dependency Parsing with Interlinear Glossed Text and Syntactic Projection RyanGeorgi FeiXia - WilliamLewis + WilliamLewis 371–380 C12-2037 georgi-etal-2012-improving @@ -2194,10 +2194,10 @@ Classifier-Based Tense Model for <fixed-case>SMT</fixed-case> - ZhengXianGong + ZhengXianGong MinZhang - ChewLimTan - GuoDongZhou + ChewLimTan + GuoDongZhou 411–420 C12-2041 gong-etal-2012-classifier @@ -2248,7 +2248,7 @@ Verb Temporality Analysis using <fixed-case>R</fixed-case>eichenbach’s Tense System AndréHorie - KumikoTanaka-Ishii + KumikoTanaka-Ishii MitsuruIshizuka 471–482 C12-2047 @@ -2264,9 +2264,9 @@ Comparing Word Relatedness Measures Based on <fixed-case>G</fixed-case>oogle <tex-math>n</tex-math>-grams - AminulIslam + AminulIslam EvangelosMilios - VladoKeselj + VladoKeselj 495–506 C12-2049 islam-etal-2012-comparing @@ -2274,7 +2274,7 @@ Two-Stage Bootstrapping for Anaphora Resolution BalajiJagan - T VGeetha + T VGeetha RanjaniParthasarathi 507–516 C12-2050 @@ -2282,15 +2282,15 @@ Explorations in the Speakers’ Interaction Experience and Self-Assessments - KristiinaJokinen + KristiinaJokinen 517–526 C12-2051 jokinen-2012-explorations Multimodal Signals and Holistic Interaction Structuring - KristiinaJokinen - GrahamWilcock + KristiinaJokinen + GrahamWilcock 527–538 C12-2052 jokinen-wilcock-2012-multimodal @@ -2299,9 +2299,9 @@ New Insights from Coarse Word Sense Disambiguation in the Crowd AdamKapelner KrishnaKaliannan - H. AndrewSchwartz - LyleUngar - DeanFoster + H. AndrewSchwartz + LyleUngar + DeanFoster 539–548 C12-2053 kapelner-etal-2012-new @@ -2309,8 +2309,8 @@ A Unified Sentence Space for Categorical Distributional-Compositional Semantics: Theory and Experiments DimitriKartsaklis - MehrnooshSadrzadeh - StephenPulman + MehrnooshSadrzadeh + StephenPulman 549–558 C12-2054 kartsaklis-etal-2012-unified @@ -2326,7 +2326,7 @@ Classification of Inconsistent Sentiment Words using Syntactic Constructions WiltrudKessler - HinrichSchütze + HinrichSchütze 569–578 C12-2056 kessler-schutze-2012-classification @@ -2358,14 +2358,14 @@ Decoder-based Discriminative Training of Phrase Segmentation for Statistical Machine Translation Hyoung-GyuLee - Hae-ChangRim + Hae-ChangRim 611–620 C12-2060 lee-rim-2012-decoder Glimpses of <fixed-case>A</fixed-case>ncient <fixed-case>C</fixed-case>hina from Classical <fixed-case>C</fixed-case>hinese Poems - JohnLee + JohnLee Tak-sumWong 621–632 C12-2061 @@ -2373,7 +2373,7 @@ Conversion between Scripts of <fixed-case>P</fixed-case>unjabi: Beyond Simple Transliteration - Gurpreet SinghLehal + Gurpreet SinghLehal Tejinder SinghSaini 633–642 C12-2062 @@ -2381,7 +2381,7 @@ Development of a Complete <fixed-case>U</fixed-case>rdu-<fixed-case>H</fixed-case>indi Transliteration System - Gurpreet SinghLehal + Gurpreet SinghLehal Tejinder SinghSaini 643–652 C12-2063 @@ -2399,8 +2399,8 @@ Phrase-Based Evaluation for Machine Translation LiangyouLi - ZhengxianGong - GuodongZhou + ZhengxianGong + GuodongZhou 663–672 C12-2065 li-etal-2012-phrase @@ -2417,7 +2417,7 @@ Active Learning for <fixed-case>C</fixed-case>hinese Word Segmentation ShoushanLi - GuodongZhou + GuodongZhou Chu-RenHuang 683–692 C12-2067 @@ -2448,7 +2448,7 @@ NadiTomeh GuangXiang IsabelTrancoso - AlanBlack + AlanBlack 713–722 C12-2070 ling-etal-2012-improving @@ -2456,7 +2456,7 @@ Expected Error Minimization with Ultraconservative Update for <fixed-case>SMT</fixed-case> LemaoLiu - TiejunZhao + TiejunZhao TaroWatanabe HailongCao ConghuiZhu @@ -2475,7 +2475,7 @@ Unsupervised Domain Adaptation for Joint Segmentation and <fixed-case>POS</fixed-case>-Tagging - YangLiu + YangLiu YueZhang 745–754 C12-2073 @@ -2528,12 +2528,12 @@ YannMathet AntoineWidlöcher KarënFort - ClaireFrançois + ClaireFrançois OlivierGalibert CyrilGrouin JulietteKahn - SophieRosset - PierreZweigenbaum + SophieRosset + PierreZweigenbaum 809–818 C12-2079 mathet-etal-2012-manual @@ -2586,7 +2586,7 @@ YutaHayashibe MamoruKomachi MasaakiNagata - YujiMatsumoto + YujiMatsumoto 863–872 C12-2084 mizumoto-etal-2012-effect @@ -2603,7 +2603,7 @@ Relational Structures and Models for Coreference Resolution Truc-Vien T.Nguyen - MassimoPoesio + MassimoPoesio 883–892 C12-2086 nguyen-poesio-2012-relational @@ -2620,7 +2620,7 @@ Lexical Categories for Improved Parsing of Web Data - LiljaØvrelid + LiljaØvrelid ArneSkjærholt 903–912 C12-2088 @@ -2628,8 +2628,8 @@ Text-To-Speech for Languages without an Orthography - SukhadaPalkar - AlanBlack + SukhadaPalkar + AlanBlack AlokParlikar 913–922 C12-2089 @@ -2640,7 +2640,7 @@ Braja GopalPatra KhumbarDebbarma DipankarDas - SivajiBandyopadhyay + SivajiBandyopadhyay 923–932 C12-2090 patra-etal-2012-part @@ -2650,7 +2650,7 @@ StephanPeitz ArneMauser JoernWuebker - HermannNey + HermannNey 933–942 C12-2091 peitz-etal-2012-forced @@ -2668,7 +2668,7 @@ XipengQiu FengJi JiayiZhao - XuanjingHuang + XuanjingHuang 951–964 C12-2093 qiu-etal-2012-joint @@ -2728,7 +2728,7 @@ Using Qualia Information to Identify Lexical Semantic Classes in an Unsupervised Clustering Task LaurenRomeo SaraMendes - NúriaBel + NúriaBel 1029–1038 C12-2100 romeo-etal-2012-using @@ -2738,7 +2738,7 @@ EwaRudnicka MarekMaziarz MaciejPiasecki - StanSzpakowicz + StanSzpakowicz 1039–1048 C12-2101 rudnicka-etal-2012-strategy @@ -2749,7 +2749,7 @@ DonghyeonLee InjaeLee SangdoHan - Gary GeunbaeLee + Gary GeunbaeLee MyungjaeKim KyungdukKim 1049–1058 @@ -2758,7 +2758,7 @@ A Fully Coreference-annotated Corpus of Scholarly Papers from the <fixed-case>ACL</fixed-case> <fixed-case>A</fixed-case>nthology - UlrichSchäfer + UlrichSchäfer ChristianSpurk JörgSteffen 1059–1070 @@ -2776,7 +2776,7 @@ Data-driven Dependency Parsing With Empty Heads WolfgangSeeker - RichárdFarkas + RichárdFarkas BerndBohnet HelmutSchmid JonasKuhn @@ -2786,7 +2786,7 @@ Extension of <fixed-case>TSVM</fixed-case> to Multi-Class and Hierarchical Text Classification Problems With General Losses - Sathiya KeerthiSelvaraj + Sathiya KeerthiSelvaraj SundararajanSellamanickam ShirishShevade 1091–1100 @@ -2795,7 +2795,7 @@ Calculation of Phrase Probabilities for Statistical Machine Translation by using Belief Functions - ChristopheServan + ChristopheServan SimonPetitrenaud 1101–1110 C12-2107 @@ -2804,8 +2804,8 @@ Sense and Reference Disambiguation in <fixed-case>W</fixed-case>ikipedia HuiShen - RazvanBunescu - RadaMihalcea + RazvanBunescu + RadaMihalcea 1111–1120 C12-2108 shen-etal-2012-sense @@ -2813,7 +2813,7 @@ Unsupervised Metaphor Paraphrasing using a Vector Space Model EkaterinaShutova - TimVan de Cruys + TimVan de Cruys AnnaKorhonen 1121–1130 C12-2109 @@ -2842,7 +2842,7 @@ YvonneSkalban Le AnHa LuciaSpecia - RuslanMitkov + RuslanMitkov 1151–1160 C12-2112 skalban-etal-2012-automatic @@ -2851,22 +2851,22 @@ A More Cohesive Summarizer ChristianSmith HenrikDanielsson - ArneJönsson + ArneJönsson 1161–1170 C12-2113 smith-etal-2012-cohesive Robust Learning in Random Subspaces: Equipping <fixed-case>NLP</fixed-case> for <fixed-case>OOV</fixed-case> Effects - AndersSøgaard - AndersJohannsen + AndersSøgaard + AndersJohannsen 1171–1180 C12-2114 sogaard-johannsen-2012-robust An Empirical Etudy of Non-Lexical Extensions to Delexicalized Transfer - AndersSøgaard + AndersSøgaard JulieWulff 1181–1190 C12-2115 @@ -2877,7 +2877,7 @@ YanSong PrescottKlassen FeiXia - ChunyuKit + ChunyuKit 1191–1200 C12-2116 song-etal-2012-entropy @@ -2894,7 +2894,7 @@ Acquiring and Generalizing Causal Inference Rules from Deverbal Noun Constructions ShoheiTanaka - NaoakiOkazaki + NaoakiOkazaki MitsuruIshizuka 1209–1218 C12-2118 @@ -2903,7 +2903,7 @@ Advertising Legality Recognition Yi-jieTang - Cong-kaiLin + Cong-kaiLin Hsin-HsiChen 1219–1228 C12-2119 @@ -2913,7 +2913,7 @@ A Joint Phrasal and Dependency Model for Paraphrase Alignment KapilThadani ScottMartin - MichaelWhite + MichaelWhite 1229–1238 C12-2120 thadani-etal-2012-joint @@ -2931,7 +2931,7 @@ ZhaopengTu YangLiu YifanHe - Josefvan Genabith + Josefvan Genabith QunLiu ShouxunLin 1249–1260 @@ -2948,9 +2948,9 @@ Automatic Extraction of Polar Adjectives for the Creation of Polarity Lexicons - SilviaVázquez - MuntsaPadró - NúriaBel + SilviaVázquez + MuntsaPadró + NúriaBel JulioGonzalo 1271–1280 C12-2124 @@ -3000,8 +3000,8 @@ A Unified Framework for Discourse Argument Identification via Shallow Semantic Parsing FanXu - QiaomingZhu - GuodongZhou + QiaomingZhu + GuodongZhou 1331–1340 C12-2130 xu-etal-2012-unified @@ -3063,7 +3063,7 @@ <fixed-case>C</fixed-case>hinese Word Sense Disambiguation based on Context Expansion YangZhizhuo - HuangHeyan + HeyanHuang 1401–1408 C12-2137 yang-huang-2012-chinese @@ -3074,7 +3074,7 @@ WeiGao BinyangLi ZhongyuWei - Kam-FaiWong + Kam-FaiWong 1409–1418 C12-2138 zhou-etal-2012-cross @@ -3085,7 +3085,7 @@ Proceedings of COLING 2012: Demonstration Papers C12-3 MartinKay - ChristianBoitet + ChristianBoitet The COLING 2012 Organizing Committee
Mumbai, India
December @@ -3115,7 +3115,7 @@ Keyphrase Extraction in Scientific Articles: A Supervised Approach PinakiBhaskar KishorjitNongmeikapam - SivajiBandyopadhyay + SivajiBandyopadhyay 17–24 C12-3003 bhaskar-etal-2012-keyphrase @@ -3146,8 +3146,8 @@
Word Root Finder: a Morphological Segmentor Based on <fixed-case>CRF</fixed-case> - Joseph ZChang - Jason S.Chang + Joseph ZChang + Jason S.Chang 51–58 C12-3007 chang-chang-2012-word @@ -3172,15 +3172,15 @@ <fixed-case>D</fixed-case>om<fixed-case>E</fixed-case>x: Extraction of Sentiment Lexicons for Domains and Meta-Domains IliaChetviorkin - NataliaLoukachevitch + NataliaLoukachevitch 77–86 C12-3010 chetviorkin-loukachevitch-2012-domex On the <fixed-case>R</fixed-case>omanian Rhyme Detection - AlinaCiobanu - Liviu P.Dinu + AlinaCiobanu + Liviu P.Dinu 87–94 C12-3011 ciobanu-dinu-2012-romanian @@ -3188,7 +3188,7 @@ Hierarchical Dialogue Policy Learning using Flexible State Transitions and Linear Function Approximation HeribertoCuayáhuitl - IvanaKruijff-Korbayová + IvanaKruijff-Korbayová NinaDethlefs 95–102 C12-3012 @@ -3197,8 +3197,8 @@ Automated Paradigm Selection for <fixed-case>FSA</fixed-case> based <fixed-case>K</fixed-case>onkani Verb Morphological Analyzer ShilpaDesai - JyotiPawar - PushpakBhattacharyya + JyotiPawar + PushpakBhattacharyya 103–110 C12-3013 desai-etal-2012-automated @@ -3214,16 +3214,16 @@ Dealing with the Grey Sheep of the <fixed-case>R</fixed-case>omanian Gender System, the Neuter - Liviu P.Dinu + Liviu P.Dinu VladNiculae - MariaSulea + MariaSulea 119–124 C12-3015 dinu-etal-2012-dealing Authorial Studies using Ranked Lexical Features - Liviu P.Dinu + Liviu P.Dinu SergiuNisioi 125–130 C12-3016 @@ -3232,8 +3232,8 @@ <fixed-case>S</fixed-case>cien<fixed-case>Q</fixed-case>uest: a Treebank Exploitation Tool for non <fixed-case>NLP</fixed-case>-Specialists AchilleFalaise - OlivierKraif - AgnèsTutin + OlivierKraif + AgnèsTutin DavidRouquet 131–140 C12-3017 @@ -3243,7 +3243,7 @@ An In-Context and Collaborative Software Localisation Model AmelFraisse ChristianBoitet - ValérieBellynck + ValérieBellynck 141–146 C12-3018 fraisse-etal-2012-context @@ -3296,7 +3296,7 @@ Automatic <fixed-case>P</fixed-case>unjabi Text Extractive Summarization System VishalGupta - GurpreetLehal + GurpreetLehal 191–198 C12-3024 gupta-lehal-2012-automatic @@ -3304,7 +3304,7 @@ Complete Pre Processing Phase of <fixed-case>P</fixed-case>unjabi Text Extractive Summarization System VishalGupta - GurpreetLehal + GurpreetLehal 199–206 C12-3025 gupta-lehal-2012-complete @@ -3337,7 +3337,7 @@ Modeling <fixed-case>P</fixed-case>ollyanna Phenomena in <fixed-case>C</fixed-case>hinese Sentiment Analysis - Ting-HaoHuang + Ting-HaoHuang Ho-ChengYu Hsin-HsiChen 231–238 @@ -3349,7 +3349,7 @@ SalilJoshi ArindamChatterjee Arun KarthikeyanKarra - PushpakBhattacharyya + PushpakBhattacharyya 239–246 C12-3030 joshi-etal-2012-eating @@ -3357,8 +3357,8 @@ <fixed-case>I</fixed-case> Can Sense It: a Comprehensive Online System for <fixed-case>WSD</fixed-case> SalilJoshi - Mitesh MKhapra - PushpakBhattacharyya + Mitesh MKhapra + PushpakBhattacharyya 247–254 C12-3031 joshi-etal-2012-sense @@ -3367,7 +3367,7 @@ Collaborative Computer-Assisted Translation Applied to Pedagogical Documents and Literary Works RuslanKalitvianski ChristianBoitet - ValérieBellynck + ValérieBellynck 255–260 C12-3032 kalitvianski-etal-2012-collaborative @@ -3377,7 +3377,7 @@ DipteshKanojia ArindamChatterjee SalilJoshi - PushpakBhattacharyya + PushpakBhattacharyya 261–266 C12-3033 kanojia-etal-2012-discrimination @@ -3386,7 +3386,7 @@ Rule Based <fixed-case>U</fixed-case>rdu Stemmer RohitKansal VishalGoyal - Gurpreet SinghLehal + Gurpreet SinghLehal 267–276 C12-3034 kansal-etal-2012-rule @@ -3416,18 +3416,18 @@ Open Information Extraction for <fixed-case>SOV</fixed-case> Language Based on Entity-Predicate Pair Detection - Woong-KiLee - Yeon-SuLee + Woong-KiLee + Yeon-SuLee Hyoung-GyuLee - Won-HoRyu - Hae-ChangRim + Won-HoRyu + Hae-ChangRim 305–312 C12-3038 lee-etal-2012-open An Omni-Font <fixed-case>G</fixed-case>urmukhi to <fixed-case>S</fixed-case>hahmukhi Transliteration System - Gurpreet SinghLehal + Gurpreet SinghLehal Tejinder SinghSaini Savleen KaurChowdhary 313–320 @@ -3446,11 +3446,11 @@ Recognition of Named-Event Passages in News Articles - LuisMarujo + LuisMarujo WangLing AnatoleGershman - JaimeCarbonell - João P.Neto + JaimeCarbonell + João P.Neto DavidMatos 329–336 C12-3041 @@ -3458,8 +3458,8 @@ Nonparametric Model for <fixed-case>I</fixed-case>nupiaq Word Segmentation - Thuy LinhNguyen - StephanVogel + Thuy LinhNguyen + StephanVogel 337–344 C12-3042 nguyen-vogel-2012-nonparametric @@ -3474,7 +3474,7 @@ <fixed-case>O</fixed-case>pen<fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et-<fixed-case>PT</fixed-case>: An Open <fixed-case>B</fixed-case>razilian <fixed-case>W</fixed-case>ordnet for Reasoning - Valeriade Paiva + Valeriade Paiva AlexandreRademaker Gerardde Melo 353–360 @@ -3523,8 +3523,8 @@ Open source multi-platform <fixed-case>N</fixed-case>oo<fixed-case>J</fixed-case> for <fixed-case>NLP</fixed-case> - MaxSilberztein - TamásVáradi + MaxSilberztein + TamásVáradi MarkoTadić 401–408 C12-3050 @@ -3533,7 +3533,7 @@ <fixed-case>P</fixed-case>unjabi Text-To-Speech Synthesis System ParminderSingh - Gurpreet SinghLehal + Gurpreet SinghLehal 409–416 C12-3051 singh-lehal-2012-punjabi @@ -3556,7 +3556,7 @@ <fixed-case>C</fixed-case>entral and <fixed-case>S</fixed-case>outh-<fixed-case>E</fixed-case>ast <fixed-case>E</fixed-case>uropean Resources in <fixed-case>META</fixed-case>-<fixed-case>SHARE</fixed-case> MarkoTadić - TamásVáradi + TamásVáradi 431–438 C12-3054 tadic-varadi-2012-central @@ -3572,7 +3572,7 @@ Visualization on Financial Terms via Risk Ranking from Financial Reports - Ming-FengTsai + Ming-FengTsai Chuan-JuWang 447–452 C12-3056 @@ -3582,7 +3582,7 @@ <fixed-case>UNL</fixed-case> Explorer HiroshiUchida MeiyingZhu - Md. Anwarus SalamKhan + Md. Anwarus SalamKhan 453–458 C12-3057 uchida-etal-2012-unl @@ -3598,8 +3598,8 @@ Generating Questions from Web Community Contents BaoxunWang - BingquanLiu - ChengjieSun + BingquanLiu + ChengjieSun XiaolongWang DeyuanZhang 467–474 @@ -3608,10 +3608,10 @@ Demo of i<fixed-case>MAG</fixed-case> Possibilities: <fixed-case>MT</fixed-case>-postediting, Translation Quality Evaluation, Parallel Corpus Production - Ling XiaoWang - YingZhang + Ling XiaoWang + YingZhang ChristianBoitet - ValerieBellynck + ValerieBellynck 475–482 C12-3060 wang-etal-2012-demo @@ -3625,7 +3625,7 @@ MarkusFreitag Jan-ThorstenPeter SaabMansour - HermannNey + HermannNey 483–492 C12-3061 wuebker-etal-2012-jane @@ -3648,7 +3648,7 @@ Developing and Evaluating a Computer-Assisted Near-Synonym Learning System - Liang-ChihYu + Liang-ChihYu Kai-HsiangHsu 509–516 C12-3064 @@ -3664,7 +3664,7 @@ <fixed-case>SMR</fixed-case>-<fixed-case>C</fixed-case>mp: Square-Mean-Root Approach to Comparison of Monolingual Contrastive Corpora - HuaRuiZhang + HuaRuiZhang Chu-RenHuang FrancescaQuattri 527–534 diff --git a/data/xml/C14.xml b/data/xml/C14.xml index b3dc3ceee8..f0efe196cc 100644 --- a/data/xml/C14.xml +++ b/data/xml/C14.xml @@ -4,8 +4,8 @@ Proceedings of COLING 2014, the 25th International Conference on Computational Linguistics: Technical Papers C14-1 - JunichiTsujii - JanHajic + JunichiTsujii + JanHajic Dublin City University and Association for Computational Linguistics
Dublin, Ireland
August @@ -18,7 +18,7 @@ Learning from 26 Languages: Program Management and Science in the <fixed-case>B</fixed-case>abel Program - MaryHarper + MaryHarper 1 C14-1001 harper-2014-learning @@ -34,7 +34,7 @@ Cross-lingual Coreference Resolution of Pronouns MichalNovák - ZdeněkŽabokrtský + ZdeněkŽabokrtský 14–24 C14-1003 novak-zabokrtsky-2014-cross @@ -51,7 +51,7 @@ Hierarchical Topical Segmentation with Affinity Propagation AnnaKazantseva - StanSzpakowicz + StanSzpakowicz 37–47 C14-1005 kazantseva-szpakowicz-2014-hierarchical @@ -75,8 +75,8 @@ Deep Convolutional Neural Networks for Sentiment Analysis of Short Texts - Cícerodos Santos - MaíraGatti + Cícerodos Santos + MaíraGatti 69–78 C14-1008 dos-santos-gatti-2014-deep @@ -84,7 +84,7 @@ Joint Inference and Disambiguation of Implicit Sentiments via Implicature Constraints LingjiaDeng - JanyceWiebe + JanyceWiebe YoonjungChoi 79–88 C14-1009 @@ -104,8 +104,8 @@ Multi-Objective Search Results Clustering SudiptaAcharya SriparnaSaha - Jose G.Moreno - GaëlDias + Jose G.Moreno + GaëlDias 99–108 C14-1011 acharya-etal-2014-multi @@ -162,7 +162,7 @@ Learning Task-specific Bilexical Embeddings - Pranava SwaroopMadhyastha + Pranava SwaroopMadhyastha XavierCarreras AriadnaQuattoni 161–171 @@ -183,7 +183,7 @@ Political Tendency Identification in <fixed-case>T</fixed-case>witter using Sentiment Analysis Techniques FerranPla - Lluís-F.Hurtado + Lluís-F.Hurtado 183–192 C14-1019 pla-hurtado-2014-political @@ -191,12 +191,12 @@ A Study of using Syntactic and Semantic Structures for Concept Segmentation and Labeling ImanSaleh - ScottCyphers + ScottCyphers JimGlass - ShafiqJoty - LluísMàrquez + ShafiqJoty + LluísMàrquez AlessandroMoschitti - PreslavNakov + PreslavNakov 193–202 C14-1020 saleh-etal-2014-study @@ -206,7 +206,7 @@ QiZhang YeyunGong XuyangSun - XuanjingHuang + XuanjingHuang 203–212 C14-1021 zhang-etal-2014-time @@ -278,7 +278,7 @@ Unsupervised Training Set Generation for Automatic Acquisition of Technical Terminology in Patents AlexJudea - HinrichSchütze + HinrichSchütze SoerenBruegmann 290–300 C14-1029 @@ -287,7 +287,7 @@ A Data Driven Approach for Person Name Disambiguation in Web Search Results Agustín D.Delgado - RaquelMartínez + RaquelMartínez VíctorFresno SotoMontalvo 301–310 @@ -297,15 +297,15 @@ Picking the Amateur’s Mind - Predicting Chess Player Strength from Game Annotations ChristianScheible - HinrichSchütze + HinrichSchütze 311–321 C14-1031 scheible-schutze-2014-picking <fixed-case>Z</fixed-case>ipf’s Law and Statistical Data on <fixed-case>M</fixed-case>odern <fixed-case>T</fixed-case>ibetan - HuidanLiu - MinghuaNuo + HuidanLiu + MinghuaNuo JianWu 322–333 C14-1032 @@ -315,7 +315,7 @@ Simple or Complex? Assessing the readability of <fixed-case>B</fixed-case>asque Texts ItziarGonzalez-Dios María JesúsAranzabe - ArantzaDíaz de Ilarraza + ArantzaDíaz de Ilarraza HaritzSalaberri 334–344 C14-1033 @@ -332,7 +332,7 @@ Inducing Word Sense with Automatically Learned Hidden Concepts - BaobaoChang + BaobaoChang WenzhePei MiaohongChen 355–364 @@ -358,7 +358,7 @@ Author Verification Using Common N-Gram Profiles of Text Documents MagdalenaJankowska EvangelosMilios - VladoKešelj + VladoKešelj 387–397 C14-1038 jankowska-etal-2014-author @@ -366,7 +366,7 @@ Dynamically Integrating Cross-Domain Translation Memory into Phrase-Based Machine Translation during Decoding KunWang - ChengqingZong + ChengqingZong Keh-YihSu 398–408 C14-1039 @@ -374,9 +374,9 @@ Machine Translation Quality Estimation Across Domains - José G.C. de Souza + José G.C. de Souza MarcoTurchi - MatteoNegri + MatteoNegri 409–420 C14-1040 c-de-souza-etal-2014-machine @@ -386,7 +386,7 @@ NadirDurrani PhilippKoehn HelmutSchmid - AlexanderFraser + AlexanderFraser 421–432 C14-1041 durrani-etal-2014-investigating @@ -401,7 +401,7 @@ Fourteen Light Tasks for comparing Analogical and Phrase-based Machine Translation RafikRhouma - PhillippeLanglais + PhillippeLanglais 444–454 C14-1043 rhouma-langlais-2014-fourteen @@ -419,9 +419,9 @@ The annotation of the Central Unit in Rhetorical Structure Trees: A Key Step in Annotating Rhetorical Relations - MikelIruskieta - ArantzaDíaz de Ilarraza - MikelLersundi + MikelIruskieta + ArantzaDíaz de Ilarraza + MikelLersundi 466–475 C14-1045 iruskieta-etal-2014-annotation @@ -429,7 +429,7 @@ Measuring Lexical Cohesion: Beyond Word Repetition AnnaKazantseva - StanSzpakowicz + StanSzpakowicz 476–485 C14-1046 kazantseva-szpakowicz-2014-measuring @@ -437,8 +437,8 @@ Fast Tweet Retrieval with Compact Binary Codes WeiweiGuo - WeiLiu - MonaDiab + WeiLiu + MonaDiab 486–496 C14-1047 guo-etal-2014-fast @@ -457,7 +457,7 @@ Using unmarked contexts in nominal lexical semantic classification LaurenRomeo SaraMendes - NúriaBel + NúriaBel 508–519 C14-1049 romeo-etal-2014-using @@ -467,7 +467,7 @@ ZhongqingWang ShoushanLi HanxiaoShi - GuodongZhou + GuodongZhou 520–529 C14-1050 wang-etal-2014-skill @@ -484,7 +484,7 @@ An <fixed-case>LR</fixed-case>-inspired generalized lexicalized phrase structure parser - BenoitCrabbé + BenoitCrabbé 541–552 C14-1052 crabbe-2014-lr @@ -501,8 +501,8 @@ Biber Redux: Reconsidering Dimensions of Variation in <fixed-case>A</fixed-case>merican <fixed-case>E</fixed-case>nglish - Rebecca J.Passonneau - NancyIde + Rebecca J.Passonneau + NancyIde SongqiaoSu JesseStuart 565–576 @@ -521,7 +521,7 @@ Enforcing Topic Diversity in a Document Recommender for Conversations MaryamHabibi - AndreiPopescu-Belis + AndreiPopescu-Belis 588–599 C14-1056 habibi-popescu-belis-2014-enforcing @@ -529,9 +529,9 @@ Identifying Important Features for Graph Retrieval ZhuoLi - SandraCarberry + SandraCarberry HuiFang - KathleenMcCoy + KathleenMcCoy 600–609 C14-1057 li-etal-2014-identifying @@ -564,7 +564,7 @@ Unsupervised Coreference Resolution by Utilizing the Most Informative Relations - Nafise SadatMoosavi + Nafise SadatMoosavi MichaelStrube 644–655 C14-1061 @@ -573,7 +573,7 @@ Knowledge Sharing via Social Login: Exploiting Microblogging Service for Warming up Social Question Answering Websites YangXiao - Wayne XinZhao + Wayne XinZhao KunWang ZhenXiao 656–666 @@ -604,7 +604,7 @@ YaqianZhou YaGuo QiZhang - XuanjingHuang + XuanjingHuang 688–697 C14-1065 gong-etal-2014-generative @@ -612,7 +612,7 @@ Inducing Latent Semantic Relations for Structured Distributional Semantics Sujay KumarJauhar - EduardHovy + EduardHovy 698–708 C14-1066 jauhar-hovy-2014-inducing @@ -629,7 +629,7 @@ Towards Syntax-aware Compositional Distributional Semantic Models LorenzoFerrone - Fabio MassimoZanzotto + Fabio MassimoZanzotto 721–730 C14-1068 ferrone-zanzotto-2014-towards @@ -664,15 +664,15 @@ docrep: A lightweight and efficient document representation framework TimDawborn - James R.Curran + James R.Curran 762–771 C14-1072 dawborn-curran-2014-docrep Why Implementation Matters: Evaluation of an Open-source Constraint Grammar Parser - Dávid MárkNemeskey - FrancisTyers + Dávid MárkNemeskey + FrancisTyers MansHulden 772–780 C14-1073 @@ -732,7 +732,7 @@ Identifying Emotion Labels from Psychiatric Social Texts Using Independent Component Analysis - Liang-ChihYu + Liang-ChihYu Chun-YuanHo 837–847 C14-1080 @@ -767,14 +767,14 @@ FeiLiu RohanRamanath NormanSadeh - Noah A.Smith + Noah A.Smith 884–894 C14-1084 liu-etal-2014-step An Off-the-shelf Approach to Authorship Attribution - Jamal A.Nasir + Jamal A.Nasir NicoGörnitz UlfBrefeld 895–904 @@ -828,7 +828,7 @@ Lexical Chaining for Measuring Discourse Coherence Quality in Test-taker Essays SwapnaSomasundaran JillBurstein - MartinChodorow + MartinChodorow 950–961 C14-1090 somasundaran-etal-2014-lexical @@ -847,7 +847,7 @@ A Framework for Translating <fixed-case>SMS</fixed-case> Messages Vivek KumarRangarajan Sridhar JohnChen - SrinivasBangalore + SrinivasBangalore RonShacham 974–983 C14-1092 @@ -856,7 +856,7 @@ A Hybrid Approach to Features Representation for Fine-grained <fixed-case>A</fixed-case>rabic Named Entity Recognition FahdAlotaibi - MarkLee + MarkLee 984–995 C14-1093 alotaibi-lee-2014-hybrid @@ -864,7 +864,7 @@ Prior-informed Distant Supervision for Temporal Evidence Classification RidhoReinanda - Maartende Rijke + Maartende Rijke 996–1006 C14-1094 reinanda-de-rijke-2014-prior @@ -892,7 +892,7 @@ Inclusive yet Selective: Supervised Distributional Hypernymy Detection StephenRoller KatrinErk - GemmaBoleda + GemmaBoleda 1025–1036 C14-1097 roller-etal-2014-inclusive @@ -910,7 +910,7 @@ What good are ‘Nominalkomposita’ for ‘noun compounds’: Multilingual Extraction and Structure Analysis of Nominal Compositions using Linguistic Restrictors PatrickZiering - Lonnekevan der Plas + Lonnekevan der Plas 1047–1058 C14-1099 ziering-van-der-plas-2014-good @@ -925,16 +925,16 @@ LalehRoostapour JordanBender AbhimanuKumar - LoriLevin + LoriLevin MandySimons - ChrisDyer + ChrisDyer 1059–1070 C14-1100 bhatia-etal-2014-automatic Argument structure of adverbial derivatives in <fixed-case>R</fixed-case>ussian - IgorBoguslavsky + IgorBoguslavsky 1071–1080 C14-1101 boguslavsky-2014-argument @@ -942,7 +942,7 @@ Active Learning in Noisy Conditions for Spoken Language Understanding HosseinHadian - HosseinSameti + HosseinSameti 1081–1090 C14-1102 hadian-sameti-2014-active @@ -980,7 +980,7 @@ KoichiAkabe GrahamNeubig SakrianiSakti - TomokiToda + TomokiToda SatoshiNakamura 1124–1132 C14-1106 @@ -1002,7 +1002,7 @@ DongdongZhang MuLi MingZhou - TiejunZhao + TiejunZhao 1144–1153 C14-1108 cao-etal-2014-lexicalized @@ -1011,14 +1011,14 @@ Automatic Corpus Expansion for <fixed-case>C</fixed-case>hinese Word Segmentation by Exploiting the Redundancy of Web Information XipengQiu ChaoChaoHuang - XuanjingHuang + XuanjingHuang 1154–1164 C14-1109 qiu-etal-2014-automatic Fast High-Accuracy Part-of-Speech Tagging by Independent Classifiers - RobertMoore + RobertMoore 1165–1176 C14-1110 moore-2014-fast @@ -1037,7 +1037,7 @@ <fixed-case>J</fixed-case>apanese Word Reordering Integrated with Dependency Parsing KazushiYoshida TomohiroOhno - YoshihideKato + YoshihideKato ShigekiMatsubara 1186–1196 C14-1112 @@ -1055,7 +1055,7 @@ Ranking Multidocument Event Descriptions for Building Thematic Timelines Kiem-HieuNguyen XavierTannier - VeroniqueMoriceau + VeroniqueMoriceau 1208–1217 C14-1114 nguyen-etal-2014-ranking @@ -1066,7 +1066,7 @@ SubhashiniVenugopalan SergioGuadarrama KateSaenko - RaymondMooney + RaymondMooney 1218–1227 C14-1115 thomason-etal-2014-integrating @@ -1075,7 +1075,7 @@ Cross-Topic Authorship Attribution: Will Out-Of-Topic Data Help? UpendraSapkota ThamarSolorio - ManuelMontes + ManuelMontes StevenBethard PaoloRosso 1228–1237 @@ -1084,7 +1084,7 @@ Online Gaming for Crowd-sourcing Phrase-equivalents - A.Kumaran + A.Kumaran MelissaDensmore ShaishavKumar 1238–1247 @@ -1119,7 +1119,7 @@ Global Methods for Cross-lingual Semantic Role and Predicate Labelling - Lonnekevan der Plas + Lonnekevan der Plas MariannaApidianaki ChenhuaChen 1279–1290 @@ -1137,14 +1137,14 @@ Unsupervised Word Sense Induction using Distributional Statistics KartikGoyal - EduardHovy + EduardHovy 1302–1310 C14-1123 goyal-hovy-2014-unsupervised Group based Self Training for <fixed-case>E</fixed-case>-Commerce Product Record Linkage - XinZhao + XinZhao YuexinWu HongfeiYan XiaomingLi @@ -1208,18 +1208,18 @@ From neighborhood to parenthood: the advantages of dependency representation over bigrams in Brown clustering - SimonŠuster - Gertjanvan Noord + SimonŠuster + Gertjanvan Noord 1382–1391 C14-1131 suster-van-noord-2014-neighborhood An Empirical Evaluation of Automatic Conversion from Constituency to Dependency in <fixed-case>H</fixed-case>ungarian - Katalin IlonaSimkó + Katalin IlonaSimkó VeronikaVincze ZsoltSzántó - RichárdFarkas + RichárdFarkas 1392–1401 C14-1132 simko-etal-2014-empirical @@ -1237,7 +1237,7 @@ Modeling Newswire Events using Neural Networks for Anomaly Detection PradeepDasigi - EduardHovy + EduardHovy 1414–1422 C14-1134 dasigi-hovy-2014-modeling @@ -1248,8 +1248,8 @@ KentaroTorisawa JulienKloetzer ChikaraHashimoto - IstvánVarga - Jong-HoonOh + IstvánVarga + Jong-HoonOh 1423–1434 C14-1135 sano-etal-2014-million @@ -1258,14 +1258,14 @@ Combining Supervised and Unsupervised Parsing for Distributional Similarity MartinRiedl IrinaAlles - ChrisBiemann + ChrisBiemann 1435–1446 C14-1136 riedl-etal-2014-combining A <fixed-case>M</fixed-case>arkovian approach to distributional semantics with application to semantic compositionality - ÉdouardGrave + ÉdouardGrave GuillaumeObozinski FrancisBach 1447–1456 @@ -1276,7 +1276,7 @@ A Beam-Search Decoder for Disfluency Detection XuancongWang Hwee TouNg - Khe ChaiSim + Khe ChaiSim 1457–1467 C14-1138 wang-etal-2014-beam @@ -1327,7 +1327,7 @@ 3arif: A Corpus of Modern Standard and <fixed-case>E</fixed-case>gyptian <fixed-case>A</fixed-case>rabic Tweets Annotated for Epistemic Modality Using Interactive Crowdsourcing RaniaAl-Sabbagh - RoxanaGirju + RoxanaGirju JanaDiesner 1521–1532 C14-1144 @@ -1337,7 +1337,7 @@ Empirical Analysis of Aggregation Methods for Collective Annotation CiyangQing UlleEndriss - RaquelFernández + RaquelFernández JustinKruger 1533–1542 C14-1145 @@ -1353,7 +1353,7 @@ Collective Named Entity Disambiguation using Graph Ranking and Clique Partitioning Approaches AymanAlhelbawy - RobertGaizauskas + RobertGaizauskas 1544–1555 C14-1147 alhelbawy-gaizauskas-2014-collective @@ -1375,7 +1375,7 @@ ChiWang ShiZhi JiaweiHan - ClareVoss + ClareVoss MalikMagdon-Ismail 1567–1578 C14-1149 @@ -1393,7 +1393,7 @@ An Enhanced <fixed-case>L</fixed-case>esk Word Sense Disambiguation Algorithm through a Distributional Semantic Model PierpaoloBasile AnnalinaCaputo - GiovanniSemeraro + GiovanniSemeraro 1591–1600 C14-1151 basile-etal-2014-enhanced @@ -1401,7 +1401,7 @@ Word Sense Induction Using Lexical Chain based Hypergraph Model TaoQian - DonghongJi + DonghongJi MingyaoZhang ChongTeng ConglingXia @@ -1422,8 +1422,8 @@ Novel Word-sense Identification PaulCook Jey HanLau - DianaMcCarthy - TimothyBaldwin + DianaMcCarthy + TimothyBaldwin 1624–1635 C14-1154 cook-etal-2014-novel @@ -1431,7 +1431,7 @@ Learning to Summarise Related Sentences EmmanouilTzouridis - JamalNasir + JamalNasir UlfBrefeld 1636–1647 C14-1155 @@ -1453,7 +1453,7 @@ Query-Focused Opinion Summarization for User-Generated Content LuWang HemaRaghavan - ClaireCardie + ClaireCardie VittorioCastelli 1660–1669 C14-1157 @@ -1491,7 +1491,7 @@ TakuyaHiraoka GrahamNeubig SakrianiSakti - TomokiToda + TomokiToda SatoshiNakamura 1706–1717 C14-1161 @@ -1500,18 +1500,18 @@ Towards multimodal modeling of physicians’ diagnostic confidence and self-awareness using medical narratives JosephBullard - CeciliaOvesdotter Alm + CeciliaOvesdotter Alm QiYu PengchengShi - AnneHaake + AnneHaake 1718–1727 C14-1162 bullard-etal-2014-towards Towards Semantic Validation of a Derivational Lexicon - BrittaZeller - SebastianPadó + BrittaZeller + SebastianPadó JanŠnajder 1728–1739 C14-1163 @@ -1520,7 +1520,7 @@ Detecting Learner Errors in the Choice of Content Words Using Compositional Distributional Semantics EkaterinaKochmar - TedBriscoe + TedBriscoe 1740–1751 C14-1164 kochmar-briscoe-2014-detecting @@ -1556,10 +1556,10 @@ Adapting taggers to <fixed-case>T</fixed-case>witter with not-so-distant supervision - BarbaraPlank + BarbaraPlank DirkHovy RyanMcDonald - AndersSøgaard + AndersSøgaard 1783–1792 C14-1168 plank-etal-2014-adapting @@ -1567,14 +1567,14 @@ Interpolated <fixed-case>D</fixed-case>irichlet Class Language Model for Speech Recognition Incorporating Long-distance N-grams Md. AkmalHaidar - DouglasO’Shaughnessy + DouglasO’Shaughnessy 1793–1802 C14-1169 haidar-oshaughnessy-2014-interpolated Situated Incremental Natural Language Understanding using a Multimodal, Linguistically-driven Update Model - CaseyKennington + CaseyKennington SpyrosKousidis DavidSchlangen 1803–1812 @@ -1583,9 +1583,9 @@ Quality Estimation for Automatic Speech Recognition - MatteoNegri + MatteoNegri MarcoTurchi - José G.C. de Souza + José G.C. de Souza DanieleFalavigna 1813–1823 C14-1171 @@ -1593,9 +1593,9 @@ A Generic Anaphora Resolution Engine for <fixed-case>I</fixed-case>ndian Languages - SobhaLalitha Devi - VijaySundar Ram - PattabhiRK Rao + SobhaLalitha Devi + VijaySundar Ram + PattabhiRK Rao 1824–1833 C14-1172 lalitha-devi-etal-2014-generic @@ -1617,7 +1617,7 @@ Rediscovering Annotation Projection for Cross-Lingual Parser Induction - JörgTiedemann + JörgTiedemann 1854–1864 C14-1175 tiedemann-2014-rediscovering @@ -1626,16 +1626,16 @@ Synchronous Constituent Context Model for Inducing Bilingual Synchronous Structures XiangyuDuan MinZhang - QiaomingZhu + QiaomingZhu 1865–1874 C14-1176 duan-etal-2014-synchronous Syntactic Parsing and Compound Recognition via Dual Decomposition: Application to <fixed-case>F</fixed-case>rench - JosephLe Roux + JosephLe Roux AntoineRozenknop - MatthieuConstant + MatthieuConstant 1875–1885 C14-1177 le-roux-etal-2014-syntactic @@ -1681,7 +1681,7 @@ Latent Domain Translation Models in Mix-of-Domains Haystack HoangCuong - KhalilSima’an + KhalilSima’an 1928–1939 C14-1182 cuong-simaan-2014-latent @@ -1699,7 +1699,7 @@ DolfTrieschnigg A. SezaDoğruöz RilanaGravel - MariëtTheune + MariëtTheune TheoMeder Franciskade Jong 1950–1961 @@ -1709,7 +1709,7 @@ Exploring Syntactic Features for Native Language Identification: A Variationist Perspective on Feature Encoding and Ensemble Optimization SerhiyBykh - DetmarMeurers + DetmarMeurers 1962–1973 C14-1185 bykh-meurers-2014-exploring @@ -1726,14 +1726,14 @@ Empirical analysis of exploiting review helpfulness for extractive summarization of online reviews WentingXiong - DianeLitman + DianeLitman 1985–1995 C14-1187 xiong-litman-2014-empirical Lexico-syntactic text simplification and compression with typed dependencies - MandyaAngrosh + MandyaAngrosh TadashiNomoto AdvaithSiddharthan 1996–2006 @@ -1794,7 +1794,7 @@ RasoulKaljahi JenniferFoster JohannRoturier - RaphaelRubino + RaphaelRubino 2052–2063 C14-1194 kaljahi-etal-2014-quality @@ -1802,7 +1802,7 @@ Effective Incorporation of Source Syntax into Hierarchical Phrase-based Translation TongXiao - Adriàde Gispert + Adriàde Gispert JingboZhu BillByrne 2064–2074 @@ -1838,7 +1838,7 @@ Exploring Fine-grained Entity Type Constraints for Distantly Supervised Relation Extraction - YangLiu + YangLiu KangLiu LihengXu JunZhao @@ -1856,7 +1856,7 @@ Limited memory incremental coreference resolution KellieWebster - James R.Curran + James R.Curran 2129–2139 C14-1201 webster-curran-2014-limited @@ -1880,8 +1880,8 @@ Employing Event Inference to Improve Semi-Supervised <fixed-case>C</fixed-case>hinese Event Extraction PeifengLi - QiaomingZhu - GuodongZhou + QiaomingZhu + GuodongZhou 2161–2171 C14-1204 li-etal-2014-employing @@ -1898,7 +1898,7 @@ Unsupervised extraction of semantic relations using discourse cues JulietteConrath StergosAfantenos - NicholasAsher + NicholasAsher PhilippeMuller 2184–2194 C14-1206 @@ -1923,7 +1923,7 @@ Augment Dependency-to-String Translation with Fixed and Floating Structures JunXie - JinanXu + JinanXu QunLiu 2217–2226 C14-1209 @@ -1934,14 +1934,14 @@ HailongCao DongdongZhang MingZhou - TiejunZhao + TiejunZhao 2227–2236 C14-1210 cao-etal-2014-soft Using Spreading Activation to Evaluate and Improve Ontologies - RónanMac an tSaoir + RónanMac an tSaoir 2237–2248 C14-1211 mac-an-tsaoir-2014-using @@ -1951,7 +1951,7 @@ JulieWeeds DaoudClarke JeremyReffin - DavidWeir + DavidWeir BillKeller 2249–2259 C14-1212 @@ -1960,10 +1960,10 @@ “One Entity per Discourse” and “One Entity per Collocation” Improve Named-Entity Disambiguation AnderBarrena - EnekoAgirre + EnekoAgirre BernardoCabaleiro - AnselmoPeñas - AitorSoroa + AnselmoPeñas + AitorSoroa 2260–2269 C14-1213 barrena-etal-2014-one @@ -2018,7 +2018,7 @@ Unsupervised Word Segmentation in Context GabrielSynnaeve IsabelleDautriche - BenjaminBörschinger + BenjaminBörschinger MarkJohnson EmmanuelDupoux 2326–2334 @@ -2040,7 +2040,7 @@ A context-based model for Sentiment Analysis in <fixed-case>T</fixed-case>witter AndreaVanzo DaniloCroce - RobertoBasili + RobertoBasili 2345–2354 C14-1221 vanzo-etal-2014-context @@ -2075,14 +2075,14 @@ Claims on demand – an initial demonstration of a system for automatic detection and polarity identification of context dependent claims in massive corpora NoamSlonim EhudAharoni - CarlosAlzate + CarlosAlzate RoyBar-Haim YonatanBilu LenaDankin IrisEiron DanielHershcovich ShayHummel - MiteshKhapra + MiteshKhapra TamarLavee RanLevy PaulMatchen @@ -2099,11 +2099,11 @@ Copa 2014 <fixed-case>F</fixed-case>rame<fixed-case>N</fixed-case>et Brasil: a frame-based trilingual electronic dictionary for the Football World Cup - Tiago T.Torrent + Tiago T.Torrent Maria Margarida M.Salomão Fernanda C. A.Campos Regina M. M.Braga - Ely E. S.Matos + Ely E. S.Matos Maucha A.Gamonal Julia A.Gonçalves Bruno C. P.Souza @@ -2134,7 +2134,7 @@ Processing Discourse in Dislog on the <fixed-case>T</fixed-case>ext<fixed-case>C</fixed-case>oop Platform - PatrickSaint-Dizier + PatrickSaint-Dizier 25–28 C14-2006 saint-dizier-2014-processing @@ -2209,10 +2209,10 @@ A Sentence Judgment System for Grammatical Error Detection Lung-HaoLee - Liang-ChihYu + Liang-ChihYu Kuei-ChingLee - Yuen-HsienTseng - Li-PingChang + Yuen-HsienTseng + Li-PingChang Hsin-HsiChen 67–70 C14-2015 @@ -2269,7 +2269,7 @@ What or Who is Multilingual <fixed-case>W</fixed-case>atson? KeithCortis UrveshBhowan - RonanMac an tSaoir + RonanMac an tSaoir D.J.McCloskey MikhailSogrin RossCadogan @@ -2310,7 +2310,7 @@ Method51 for Mining Insight from Social Media Datasets SimonWibberley - DavidWeir + DavidWeir JeremyReffin 115–119 C14-2025 @@ -2318,9 +2318,9 @@ <fixed-case>MT</fixed-case>-<fixed-case>EQ</fixed-case>u<fixed-case>A</fixed-case>l: a Toolkit for Human Assessment of Machine Translation Output - ChristianGirardi + ChristianGirardi LuisaBentivogli - Mohammad AminFarajian + Mohammad AminFarajian MarcelloFederico 120–123 C14-2026 @@ -2330,7 +2330,7 @@ <fixed-case>O</fixed-case>pen<fixed-case>S</fixed-case>o<fixed-case>N</fixed-case>a<fixed-case>R</fixed-case>: user-driven development of the <fixed-case>S</fixed-case>o<fixed-case>N</fixed-case>a<fixed-case>R</fixed-case> corpus interfaces MartinReynaert Matjevan de Camp - Mennovan Zaanen + Mennovan Zaanen 124–128 C14-2027 reynaert-etal-2014-opensonar @@ -2340,7 +2340,7 @@ MarcelloFederico NicolaBertoldi MauroCettolo - MatteoNegri + MatteoNegri MarcoTurchi MarcoTrombetti AlessandroCattelan @@ -2350,7 +2350,7 @@ AlbertoMassidda HolgerSchwenk LoïcBarrault - FredericBlain + FredericBlain PhilippKoehn ChristianBuck UlrichGermann @@ -2377,8 +2377,8 @@ Biomedical/Clinical <fixed-case>NLP</fixed-case> - OzlemUzuner - MelihaYetişgen + OzlemUzuner + MelihaYetişgen AmberStubbs 1–2 C14-3001 @@ -2386,7 +2386,7 @@ Using Neural Networks for Modeling and Representing Natural Languages - TomasMikolov + TomasMikolov 3–4 C14-3002 mikolov-2014-using @@ -2401,7 +2401,7 @@ Automated Grammatical Error Correction for Language Learners - JoelTetreault + JoelTetreault ClaudiaLeacock 8–10 C14-3004 @@ -2409,8 +2409,8 @@ Selection Bias, Label Bias, and Bias in Ground Truth - AndersSøgaard - BarbaraPlank + AndersSøgaard + BarbaraPlank DirkHovy 11–13 C14-3005 diff --git a/data/xml/C16.xml b/data/xml/C16.xml index c185fc9e1b..153bac8a50 100644 --- a/data/xml/C16.xml +++ b/data/xml/C16.xml @@ -4,7 +4,7 @@ Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers C16-1 - YujiMatsumoto + YujiMatsumoto RashmiPrasad The COLING 2016 Organizing Committee
Osaka, Japan
@@ -19,7 +19,7 @@ Boosting for Efficient Model Selection for Syntactic Parsing RachelBawden - BenoîtCrabbé + BenoîtCrabbé 1–11 C16-1001 We present an efficient model selection method using boosting for transition-based constituency parsing. It is designed for exploring a high-dimensional search space, defined by a large set of feature templates, as for example is typically the case when parsing morphologically rich languages. Our method removes the need to manually define heuristic constraints, which are often imposed in current state-of-the-art selection methods. Our experiments for French show that the method is more efficient and is also capable of producing compact, state-of-the-art models. @@ -38,7 +38,7 @@ Grammar induction from (lots of) words alone - John KPate + John KPate MarkJohnson 23–32 C16-1003 @@ -64,7 +64,7 @@ NorikiNishida RaphaelShu YoEhara - NaoakiOkazaki + NaoakiOkazaki YusukeMiyao HidekiNakayama 44–52 @@ -76,7 +76,7 @@ An Improved Phrase-based Approach to Annotating and Summarizing Student Course Responses WencanLuo FeiLiu - DianeLitman + DianeLitman 53–63 C16-1006 Teaching large classes remains a great challenge, primarily because it is difficult to attend to all the student needs in a timely manner. Automatic text summarization systems can be leveraged to summarize the student feedback, submitted immediately after each lecture, but it is left to be discovered what makes a good summary for student responses. In this work we explore a new methodology that effectively extracts summary phrases from the student responses. Each phrase is tagged with the number of students who raise the issue. The phrases are evaluated along two dimensions: with respect to text content, they should be informative and well-formed, measured by the ROUGE metric; additionally, they shall attend to the most pressing student needs, measured by a newly proposed metric. This work is enabled by a phrase-based annotation and highlighting scheme, which is new to the summarization task. The phrase-based framework allows us to summarize the student responses into a set of bullet points and present to the instructor promptly. @@ -114,8 +114,8 @@ Expanding wordnets to new languages with multilingual sense disambiguation - MihaelArcan - John PhilipMcCrae + MihaelArcan + John PhilipMcCrae PaulBuitelaar 97–108 C16-1010 @@ -125,7 +125,7 @@ A Correlational Encoder Decoder Architecture for Pivot Based Sequence Generation AmritaSaha - Mitesh M.Khapra + Mitesh M.Khapra SarathChandar JanarthananRajendran KyunghyunCho @@ -147,7 +147,7 @@ Improving historical spelling normalization with bi-directional <fixed-case>LSTM</fixed-case>s and multi-task learning MarcelBollmann - AndersSøgaard + AndersSøgaard 131–139 C16-1013 Natural-language processing of historical documents is complicated by the abundance of variant spellings and lack of annotated data. A common approach is to normalize the spelling of historical words to modern forms. We explore the suitability of a deep neural network architecture for this task, particularly a deep bi-LSTM network applied on a character level. Our model compares well to previously established normalization algorithms when evaluated on a diverse set of texts from Early New High German. We show that multi-task learning with additional normalization data can improve our model’s performance further. @@ -200,7 +200,7 @@ DanielClothiaux EmilyTagtow PatrickLittell - ChrisDyer + ChrisDyer 181–191 C16-1018 Languages with rich morphology often introduce sparsity in language processing tasks. While morphological analyzers can reduce this sparsity by providing morpheme-level analyses for words, they will often introduce ambiguity by returning multiple analyses for the same surface form. The problem of disambiguating between these morphological parses is further complicated by the fact that a correct parse for a word is not only be dependent on the surface form but also on other words in its context. In this paper, we present a language-agnostic approach to morphological disambiguation. We address the problem of using context in morphological disambiguation by presenting several LSTM-based neural architectures that encode long-range surface-level and analysis-level contextual dependencies. We applied our approach to Turkish, Russian, and Arabic to compare effectiveness across languages, matching state-of-the-art results in two of the three languages. Our results also demonstrate that while context plays a role in learning how to disambiguate, the type and amount of context needed varies between languages. @@ -218,7 +218,7 @@ An Empirical Exploration of Skip Connections for Sequential Tagging HuijiaWu JiajunZhang - ChengqingZong + ChengqingZong 203–212 C16-1020 In this paper, we empirically explore the effects of various kinds of skip connections in stacked bidirectional LSTMs for sequential tagging. We investigate three kinds of skip connections connecting to LSTM cells: (a) skip connections to the gates, (b) skip connections to the internal states and (c) skip connections to the cell outputs. We present comprehensive experiments showing that skip connections to cell outputs outperform the remaining two. Furthermore, we observe that using gated identity functions as skip mappings works pretty well. Based on this novel skip connections, we successfully train deep stacked bidirectional LSTM models and obtain state-of-the-art results on CCG supertagging and comparable results on POS tagging. @@ -266,13 +266,13 @@ Exploiting Sentence and Context Representations in Deep Neural Models for Spoken Language Understanding - Lina M.Rojas-Barahona - MilicaGašić + Lina M.Rojas-Barahona + MilicaGašić NikolaMrkšić Pei-HaoSu StefanUltes Tsung-HsienWen - SteveYoung + SteveYoung 258–267 C16-1025 This paper presents a deep learning architecture for the semantic decoder component of a Statistical Spoken Dialogue System. In a slot-filling dialogue, the semantic decoder predicts the dialogue act and a set of slot-value pairs from a set of n-best hypotheses returned by the Automatic Speech Recognition. Most current models for spoken language understanding assume (i) word-aligned semantic annotations as in sequence taggers and (ii) delexicalisation, or a mapping of input words to domain-specific concepts using heuristics that try to capture morphological variation but that do not scale to other domains nor to language variation (e.g., morphology, synonyms, paraphrasing ). In this work the semantic decoder is trained using unaligned semantic annotations and it uses distributed semantic representation learning to overcome the limitations of explicit delexicalisation. The proposed architecture uses a convolutional neural network for the sentence representation and a long-short term memory network for the context representation. Results are presented for the publicly available DSTC2 corpus and an In-car corpus which is similar to DSTC2 but has a significantly higher word error rate (WER). @@ -331,7 +331,7 @@ A Word Labeling Approach to <fixed-case>T</fixed-case>hai Sentence Boundary Detection and <fixed-case>POS</fixed-case> Tagging NinaZhou - AiTiAw + AiTiAw NattadapornLertcheva XuancongWang 319–327 @@ -372,7 +372,7 @@ Kuan-YuChen Shih-HungLiu BerlinChen - Hsin-MinWang + Hsin-MinWang 358–368 C16-1035 In the context of natural language processing, representation learning has emerged as a newly active research subject because of its excellent performance in many applications. Learning representations of words is a pioneering study in this school of research. However, paragraph (or sentence and document) embedding learning is more suitable/reasonable for some tasks, such as sentiment classification and document summarization. Nevertheless, as far as we are aware, there is only a dearth of research focusing on launching unsupervised paragraph embedding methods. Classic paragraph embedding methods infer the representation of a given paragraph by considering all of the words occurring in the paragraph. Consequently, those stop or function words that occur frequently may mislead the embedding learning process to produce a misty paragraph representation. Motivated by these observations, our major contributions are twofold. First, we propose a novel unsupervised paragraph embedding method, named the essence vector (EV) model, which aims at not only distilling the most representative information from a paragraph but also excluding the general background information to produce a more informative low-dimensional vector representation for the paragraph. We evaluate the proposed EV model on benchmark sentiment classification and multi-document summarization tasks. The experimental results demonstrate the effectiveness and applicability of the proposed embedding method. Second, in view of the increasing importance of spoken content processing, an extension of the EV model, named the denoising essence vector (D-EV) model, is proposed. The D-EV model not only inherits the advantages of the EV model but also can infer a more robust representation for a given spoken paragraph against imperfect speech recognition. The utility of the D-EV model is evaluated on a spoken document summarization task, confirming the effectiveness of the proposed embedding method in relation to several well-practiced and state-of-the-art summarization methods. @@ -381,10 +381,10 @@ Continuous Expressive Speaking Styles Synthesis based on <fixed-case>CVSM</fixed-case> and <fixed-case>MR</fixed-case>-<fixed-case>HMM</fixed-case> JaimeLorenzo-Trueba - RobertoBarra-Chicote - AscensionGallardo-Antolin + RobertoBarra-Chicote + AscensionGallardo-Antolin JunichiYamagishi - Juan M.Montero + Juan M.Montero 369–376 C16-1036 This paper introduces a continuous system capable of automatically producing the most adequate speaking style to synthesize a desired target text. This is done thanks to a joint modeling of the acoustic and lexical parameters of the speaker models by adapting the CVSM projection of the training texts using MR-HMM techniques. As such, we consider that as long as sufficient variety in the training data is available, we should be able to model a continuous lexical space into a continuous acoustic space. The proposed continuous automatic text to speech system was evaluated by means of a perceptual evaluation in order to compare them with traditional approaches to the task. The system proved to be capable of conveying the correct expressiveness (average adequacy of 3.6) with an expressive strength comparable to oracle traditional expressive speech synthesis (average of 3.6) although with a drop in speech quality mainly due to the semi-continuous nature of the data (average quality of 2.9). This means that the proposed system is capable of improving traditional neutral systems without requiring any additional user interaction. @@ -404,7 +404,7 @@ Frustratingly Easy Neural Domain Adaptation Young-BumKim KarlStratos - RuhiSarikaya + RuhiSarikaya 387–396 C16-1038 Popular techniques for domain adaptation such as the feature augmentation method of Daumé III (2009) have mostly been considered for sparse binary-valued features, but not for dense real-valued features such as those used in neural networks. In this paper, we describe simple neural extensions of these techniques. First, we propose a natural generalization of the feature augmentation method that uses K + 1 LSTMs where one model captures global patterns across all K domains and the remaining K models capture domain-specific information. Second, we propose a novel application of the framework for learning shared structures by Ando and Zhang (2005) to domain adaptation, and also provide a neural extension of their approach. In experiments on slot tagging over 17 domains, our methods give clear performance improvement over Daumé III (2009) applied on feature-rich CRFs. @@ -412,10 +412,10 @@ A House United: Bridging the Script and Lexical Barrier between <fixed-case>H</fixed-case>indi and <fixed-case>U</fixed-case>rdu - Riyaz A.Bhat - Irshad A.Bhat + Riyaz A.Bhat + Irshad A.Bhat NamanJain - Dipti MisraSharma + Dipti MisraSharma 397–408 C16-1039 In Computational Linguistics, Hindi and Urdu are not viewed as a monolithic entity and have received separate attention with respect to their text processing. From part-of-speech tagging to machine translation, models are separately trained for both Hindi and Urdu despite the fact that they represent the same language. The reasons mainly are their divergent literary vocabularies and separate orthographies, and probably also their political status and the social perception that they are two separate languages. In this article, we propose a simple but efficient approach to bridge the lexical and orthographic differences between Hindi and Urdu texts. With respect to text processing, addressing the differences between the Hindi and Urdu texts would be beneficial in the following ways: (a) instead of training separate models, their individual resources can be augmented to train single, unified models for better generalization, and (b) their individual text processing applications can be used interchangeably under varied resource conditions. To remove the script barrier, we learn accurate statistical transliteration models which use sentence-level decoding to resolve word ambiguity. Similarly, we learn cross-register word embeddings from the harmonized Hindi and Urdu corpora to nullify their lexical divergences. As a proof of the concept, we evaluate our approach on the Hindi and Urdu dependency parsing under two scenarios: (a) resource sharing, and (b) resource augmentation. We demonstrate that a neural network-based dependency parser trained on augmented, harmonized Hindi and Urdu resources performs significantly better than the parsing models trained separately on the individual resources. We also show that we can achieve near state-of-the-art results when the parsers are used interchangeably. @@ -425,7 +425,7 @@ Deeper syntax for better semantic parsing OlivierMichalon CorentinRibeyre - MarieCandito + MarieCandito AlexisNasr 409–420 C16-1040 @@ -443,7 +443,7 @@ Promoting multiword expressions in <fixed-case>A</fixed-case>* <fixed-case>TAG</fixed-case> parsing - JakubWaszczuk + JakubWaszczuk AgataSavary YannickParmentier 429–439 @@ -454,8 +454,8 @@ Incrementally Learning a Dependency Parser to Support Language Documentation in Field Linguistics MorganUlinski - JuliaHirschberg - OwenRambow + JuliaHirschberg + OwenRambow 440–449 C16-1043 We present experiments in incrementally learning a dependency parser. The parser will be used in the WordsEye Linguistics Tools (WELT) (Ulinski et al., 2014) which supports field linguists documenting a language’s syntax and semantics. Our goal is to make syntactic annotation faster for field linguists. We have created a new parallel corpus of descriptions of spatial relations and motion events, based on pictures and video clips used by field linguists for elicitation of language from native speaker informants. We collected descriptions for each picture and video from native speakers in English, Spanish, German, and Egyptian Arabic. We compare the performance of MSTParser (McDonald et al., 2006) and MaltParser (Nivre et al., 2006) when trained on small amounts of this data. We find that MaltParser achieves the best performance. We also present the results of experiments using the parser to assist with annotation. We find that even when the parser is trained on a single sentence from the corpus, annotation time significantly decreases. @@ -465,7 +465,7 @@ Inducing Multilingual Text Analysis Tools Using Bidirectional Recurrent Neural Networks OthmanZennaki NasredineSemmar - LaurentBesacier + LaurentBesacier 450–460 C16-1044 This work focuses on the development of linguistic analysis tools for resource-poor languages. We use a parallel corpus to produce a multilingual word representation based only on sentence level alignment. This representation is combined with the annotated source side (resource-rich language) of the parallel corpus to train text analysis tools for resource-poor languages. Our approach is based on Recurrent Neural Networks (RNN) and has the following advantages: (a) it does not use word alignment information, (b) it does not assume any knowledge about foreign languages, which makes it applicable to a wide range of resource-poor languages, (c) it provides truly multilingual taggers. In a previous study, we proposed a method based on Simple RNN to automatically induce a Part-Of-Speech (POS) tagger. In this paper, we propose an improvement of our neural model. We investigate the Bidirectional RNN and the inclusion of external information (for instance low level information from Part-Of-Speech tags) in the RNN to train a more complex tagger (for instance, a multilingual super sense tagger). We demonstrate the validity and genericity of our method by using parallel corpora (obtained by manual or automatic translation). Our experiments are conducted to induce cross-lingual POS and super sense taggers. @@ -488,7 +488,7 @@ Determining the Multiword Expression Inventory of a Surprise Language BaharSalehi PaulCook - TimothyBaldwin + TimothyBaldwin 471–481 C16-1046 Much previous research on multiword expressions (MWEs) has focused on the token- and type-level tasks of MWE identification and extraction, respectively. Such studies typically target known prevalent MWE types in a given language. This paper describes the first attempt to learn the MWE inventory of a “surprise” language for which we have no explicit prior knowledge of MWE patterns, certainly no annotated MWE data, and not even a parallel corpus. Our proposed model is trained on a treebank with MWE relations of a source language, and can be applied to the monolingual corpus of the surprise language to identify its MWE construction types. @@ -497,9 +497,9 @@ A Hybrid Deep Learning Architecture for Sentiment Analysis Md ShadAkhtar - AyushKumar + AyushKumar AsifEkbal - PushpakBhattacharyya + PushpakBhattacharyya 482–493 C16-1047 In this paper, we propose a novel hybrid deep learning archtecture which is highly efficient for sentiment analysis in resource-poor languages. We learn sentiment embedded vectors from the Convolutional Neural Network (CNN). These are augmented to a set of optimized features selected through a multi-objective optimization (MOO) framework. The sentiment augmented optimized vector obtained at the end is used for the training of SVM for sentiment classification. We evaluate our proposed approach for coarse-grained (i.e. sentence level) as well as fine-grained (i.e. aspect level) sentiment analysis on four Hindi datasets covering varying domains. In order to show that our proposed method is generic in nature we also evaluate it on two benchmark English datasets. Evaluation shows that the results of the proposed method are consistent across all the datasets and often outperforms the state-of-art systems. To the best of our knowledge, this is the very first attempt where such a deep learning model is used for less-resourced languages such as Hindi. @@ -509,7 +509,7 @@ Word Segmentation in <fixed-case>S</fixed-case>anskrit Using Path Constrained Random Walks AmrithKrishna BishalSantra - PavankumarSatuluri + PavankumarSatuluri Sasi PrasanthBandaru BhumiFaldu YajuvendraSingh @@ -533,7 +533,7 @@ Appraising <fixed-case>UMLS</fixed-case> Coverage for Summarizing Medical Evidence ElahehShafieiBavani MohammadEbrahimi - RaymondWong + RaymondWong FangChen 513–524 C16-1050 @@ -625,7 +625,7 @@ Keystroke dynamics as signal for shallow syntactic parsing - BarbaraPlank + BarbaraPlank 609–619 C16-1059 Keystroke dynamics have been extensively used in psycholinguistic and writing research to gain insights into cognitive processing. But do keystroke logs contain actual signal that can be used to learn better natural language processing models? We postulate that keystroke dynamics contain information about syntactic structure that can inform shallow syntactic parsing. To test this hypothesis, we explore labels derived from keystroke logs as auxiliary task in a multi-task bidirectional Long Short-Term Memory (bi-LSTM). Our results show promising results on two shallow syntactic parsing tasks, chunking and CCG supertagging. Our model is simple, has the advantage that data can come from distinct sources, and produces models that are significantly better than models trained on the text annotations alone. @@ -681,7 +681,7 @@ Identifying Cross-Cultural Differences in Word Usage AparnaGarimella - RadaMihalcea + RadaMihalcea JamesPennebaker 674–683 C16-1065 @@ -721,7 +721,7 @@ Understanding the Lexical Simplification Needs of Non-Native Speakers of <fixed-case>E</fixed-case>nglish - GustavoPaetzold + GustavoPaetzold LuciaSpecia 717–727 C16-1069 @@ -742,7 +742,7 @@ Advancing Linguistic Features and Insights by Label-informed Feature Grouping: An Exploration in the Context of Native Language Identification SerhiyBykh - DetmarMeurers + DetmarMeurers 739–749 C16-1071 We propose a hierarchical clustering approach designed to group linguistic features for supervised machine learning that is inspired by variationist linguistics. The method makes it possible to abstract away from the individual feature occurrences by grouping features together that behave alike with respect to the target class, thus providing a new, more general perspective on the data. On the one hand, it reduces data sparsity, leading to quantitative performance gains. On the other, it supports the formation and evaluation of hypotheses about individual choices of linguistic structures. We explore the method using features based on verb subcategorization information and evaluate the approach in the context of the Native Language Identification (NLI) task. @@ -750,10 +750,10 @@ Modeling Diachronic Change in Scientific Writing with Information Density - RaphaelRubino + RaphaelRubino StefaniaDegaetano-Ortlieb ElkeTeich - Josefvan Genabith + Josefvan Genabith 750–761 C16-1072 Previous linguistic research on scientific writing has shown that language use in the scientific domain varies considerably in register and style over time. In this paper we investigate the introduction of information theory inspired features to study long term diachronic change on three levels: lexis, part-of-speech and syntax. Our approach is based on distinguishing between sentences from 19th and 20th century scientific abstracts using supervised classification models. To the best of our knowledge, the introduction of information theoretic features to this task is novel. We show that these features outperform more traditional features, such as token or character n-grams, while leading to more compact models. We present a detailed analysis of feature informativeness in order to gain a better understanding of diachronic change on different linguistic levels. @@ -772,7 +772,7 @@ Machine Learning for Metrical Analysis of <fixed-case>E</fixed-case>nglish Poetry ManexAgirrezabal - IñakiAlegria + IñakiAlegria MansHulden 772–781 C16-1074 @@ -827,7 +827,7 @@ Automatic Extraction of Learner Errors in <fixed-case>ESL</fixed-case> Sentences Using Linguistically Enhanced Alignments MarianoFelice ChristopherBryant - TedBriscoe + TedBriscoe 825–835 C16-1079 We propose a new method of automatically extracting learner errors from parallel English as a Second Language (ESL) sentences in an effort to regularise annotation formats and reduce inconsistencies. Specifically, given an original and corrected sentence, our method first uses a linguistically enhanced alignment algorithm to determine the most likely mappings between tokens, and secondly employs a rule-based function to decide which alignments should be merged. Our method beats all previous approaches on the tested datasets, achieving state-of-the-art results for automatic error extraction. @@ -844,7 +844,7 @@ How Regular is <fixed-case>J</fixed-case>apanese Loanword Adaptation? A Computational Study - LingshuangMao + LingshuangMao MansHulden 847–856 C16-1081 @@ -854,11 +854,11 @@ Using Linguistic Data for <fixed-case>E</fixed-case>nglish and <fixed-case>S</fixed-case>panish Verb-Noun Combination Identification UxoaIñurrieta - ArantzaDíaz de Ilarraza - GorkaLabaka - KepaSarasola - ItziarAduriz - JohnCarroll + ArantzaDíaz de Ilarraza + GorkaLabaka + KepaSarasola + ItziarAduriz + JohnCarroll 857–867 C16-1082 We present a linguistic analysis of a set of English and Spanish verb+noun combinations (VNCs), and a method to use this information to improve VNC identification. Firstly, a sample of frequent VNCs are analysed in-depth and tagged along lexico-semantic and morphosyntactic dimensions, obtaining satisfactory inter-annotator agreement scores. Then, a VNC identification experiment is undertaken, where the analysed linguistic data is combined with chunking information and syntactic dependencies. A comparison between the results of the experiment and the results obtained by a basic detection method shows that VNC identification can be greatly improved by using linguistic information, as a large number of additional occurrences are detected with high precision. @@ -867,8 +867,8 @@ Analyzing Gender Bias in Student Evaluations AndamlakTerkik - EmilyPrud’hommeaux - CeciliaOvesdotter Alm + EmilyPrud’hommeaux + CeciliaOvesdotter Alm ChristopherHoman ScottFranklin 868–876 @@ -900,7 +900,7 @@ Extending the Use of <fixed-case>A</fixed-case>daptor <fixed-case>G</fixed-case>rammars for Unsupervised Morphological Segmentation of Unseen Languages RamyEskander - OwenRambow + OwenRambow TianchunYang 900–910 C16-1086 @@ -930,7 +930,7 @@ YunliWang YongJin XiaodanZhu - CyrilGoutte + CyrilGoutte 932–942 C16-1089 The goal of keyphrase extraction is to automatically identify the most salient phrases from documents. The technique has a wide range of applications such as rendering a quick glimpse of a document, or extracting key content for further use. While previous work often assumes keyphrases are a static property of a given documents, in many applications, the appropriate set of keyphrases that should be extracted depends on the set of documents that are being considered together. In particular, good keyphrases should not only accurately describe the content of a document, but also reveal what discriminates it from the other documents. In this paper, we study this problem of extracting discriminative keyphrases. In particularly, we propose to use the hierarchical semantic structure between candidate keyphrases to promote keyphrases that have the right level of specificity to clearly distinguish the target document from others. We show that such knowledge can be used to construct better discriminative keyphrase extraction systems that do not assume a static, fixed set of keyphrases for a document. We show how this helps identify key expertise of authors from their papers, as well as competencies covered by online courses within different domains. @@ -941,7 +941,7 @@ HaoranHuang QiZhang YeyunGong - XuanjingHuang + XuanjingHuang 943–952 C16-1090 On microblogging services, people usually use hashtags to mark microblogs, which have a specific theme or content, making them easier for users to find. Hence, how to automatically recommend hashtags for microblogs has received much attention in recent years. Previous deep neural network-based hashtag recommendation approaches converted the task into a multi-class classification problem. However, most of these methods only took the microblog itself into consideration. Motivated by the intuition that the history of users should impact the recommendation procedure, in this work, we extend end-to-end memory networks to perform this task. We incorporate the histories of users into the external memory and introduce a hierarchical attention mechanism to select more appropriate histories. To train and evaluate the proposed method, we also construct a dataset based on microblogs collected from Twitter. Experimental results demonstrate that the proposed methods can significantly outperform state-of-the-art methods. By incorporating the hierarchical attention mechanism, the relative improvement in the proposed method over the state-of-the-art method is around 67.9% in the F1-score. @@ -951,7 +951,7 @@ Automatic Labelling of Topics with Neural Embeddings ShraeyBhatia Jey HanLau - TimothyBaldwin + TimothyBaldwin 953–963 C16-1091 Topics generated by topic models are typically represented as list of terms. To reduce the cognitive overhead of interpreting these topics for end-users, we propose labelling a topic with a succinct phrase that summarises its theme or idea. Using Wikipedia document titles as label candidates, we compute neural embeddings for documents and words to select the most relevant labels for topics. Comparing to a state-of-the-art topic labelling system, our methodology is simpler, more efficient and finds better topic labels. @@ -964,7 +964,7 @@ LifengJin VictoriaKrakovna FinaleDoshi-Velez - TimothyMiller + TimothyMiller WilliamSchuler LaneSchwartz 964–975 @@ -974,7 +974,7 @@ ‘Calling on the classical phone’: a distributional model of adjective-noun errors in learners’ <fixed-case>E</fixed-case>nglish - AurélieHerbelot + AurélieHerbelot EkaterinaKochmar 976–986 C16-1093 @@ -983,10 +983,10 @@ Are Cohesive Features Relevant for Text Readability Evaluation? - AmaliaTodirascu + AmaliaTodirascu ThomasFrançois DelphineBernhard - NúriaGala + NúriaGala Anne-LaureLigozat 987–997 C16-1094 @@ -997,10 +997,10 @@ Named Entity Recognition for Linguistic Rapid Response in Low-Resource Languages: <fixed-case>S</fixed-case>orani <fixed-case>K</fixed-case>urdish and <fixed-case>T</fixed-case>ajik PatrickLittell KartikGoyal - David R.Mortensen - AlexaLittle - ChrisDyer - LoriLevin + David R.Mortensen + AlexaLittle + ChrisDyer + LoriLevin 998–1006 C16-1095 This paper describes our construction of named-entity recognition (NER) systems in two Western Iranian languages, Sorani Kurdish and Tajik, as a part of a pilot study of “Linguistic Rapid Response” to potential emergency humanitarian relief situations. In the absence of large annotated corpora, parallel corpora, treebanks, bilingual lexica, etc., we found the following to be effective: exploiting distributional regularities in monolingual data, projecting information across closely related languages, and utilizing human linguist judgments. We show promising results on both a four-month exercise in Sorani and a two-day exercise in Tajik, achieved with minimal annotation costs. @@ -1047,7 +1047,7 @@ <fixed-case>C</fixed-case>hinese Poetry Generation with Planning based Neural Network - ZheWang + ZheWang WeiHe HuaWu HaiyangWu @@ -1062,7 +1062,7 @@ Predicting sentential semantic compatibility for aggregation in text-to-text generation VictorChenal - Jackie Chi KitCheung + Jackie Chi KitCheung 1061–1070 C16-1101 We examine the task of aggregation in the context of text-to-text generation. We introduce a new aggregation task which frames the process as grouping input sentence fragments into clusters that are to be expressed as a single output sentence. We extract datasets for this task from a corpus using an automatic extraction process. Based on the results of a user study, we develop two gold-standard clusterings and corresponding evaluation methods for each dataset. We present a hierarchical clustering framework for predicting aggregation decisions on this task, which outperforms several baselines and can serve as a reference in future work. @@ -1071,7 +1071,7 @@ Sequential Clustering and Contextual Importance Measures for Incremental Update Summarization MarkusZopf - EneldoLoza Mencía + EneldoLoza Mencía JohannesFürnkranz 1071–1082 C16-1102 @@ -1082,7 +1082,7 @@ Natural Language Generation through Character-based <fixed-case>RNN</fixed-case>s with Finite-state Prior Knowledge RaghavGoyal MarcDymetman - EricGaussier + EricGaussier 1083–1092 C16-1103 Recently Wen et al. (2015) have proposed a Recurrent Neural Network (RNN) approach to the generation of utterances from dialog acts, and shown that although their model requires less effort to develop than a rule-based system, it is able to improve certain aspects of the utterances, in particular their naturalness. However their system employs generation at the word-level, which requires one to pre-process the data by substituting named entities with placeholders. This pre-processing prevents the model from handling some contextual effects and from managing multiple occurrences of the same attribute. Our approach uses a character-level model, which unlike the word-level model makes it possible to learn to “copy” information from the dialog act to the target without having to pre-process the input. In order to avoid generating non-words and inventing information not present in the input, we propose a method for incorporating prior knowledge into the RNN in the form of a weighted finite-state automaton over character sequences. Automatic and human evaluations show improved performance over baselines on several evaluation criteria. @@ -1153,11 +1153,11 @@ <fixed-case>W</fixed-case>ord2<fixed-case>V</fixed-case>ec vs <fixed-case>DB</fixed-case>nary: Augmenting <fixed-case>METEOR</fixed-case> using Vector Representations or Lexical Resources? - ChristopheServan + ChristopheServan AlexandreBérard ZiedElloumi - HervéBlanchon - LaurentBesacier + HervéBlanchon + LaurentBesacier 1159–1168 C16-1110 This paper presents an approach combining lexico-semantic resources and distributed representations of words applied to the evaluation in machine translation (MT). This study is made through the enrichment of a well-known MT evaluation metric: METEOR. METEOR enables an approximate match (synonymy or morphological similarity) between an automatic and a reference translation. Our experiments are made in the framework of the Metrics task of WMT 2014. We show that distributed representations are a good alternative to lexico-semanticresources for MT evaluation and they can even bring interesting additional information. The augmented versions of METEOR, using vector representations, are made available on our Github page. @@ -1165,8 +1165,8 @@ Broad <fixed-case>T</fixed-case>witter Corpus: A Diverse Named Entity Recognition Resource - LeonDerczynski - KalinaBontcheva + LeonDerczynski + KalinaBontcheva IanRoberts 1169–1179 C16-1111 @@ -1196,7 +1196,7 @@ Leveraging Multilingual Training for Limited Resource Event Extraction AndrewHsi YimingYang - JaimeCarbonell + JaimeCarbonell RuochenXu 1201–1210 C16-1114 @@ -1206,7 +1206,7 @@ <fixed-case>LILI</fixed-case>: A Simple Language Independent Approach for Language Identification MohamedAl-Badrashiny - MonaDiab + MonaDiab 1211–1219 C16-1115 We introduce a generic Language Independent Framework for Linguistic Code Switch Point Detection. The system uses characters level 5-grams and word level unigram language models to train a conditional random fields (CRF) model for classifying input words into various languages. We test our proposed framework and compare it to the state-of-the-art published systems on standard data sets from several language pairs: English-Spanish, Nepali-English, English-Hindi, Arabizi (Refers to Arabic written using the Latin/Roman script)-English, Arabic-Engari (Refers to English written using Arabic script), Modern Standard Arabic(MSA)-Egyptian, Levantine-MSA, Gulf-MSA, one more English-Spanish, and one more MSA-EGY. The overall weighted average F-score of each language pair are 96.4%, 97.3%, 98.0%, 97.0%, 98.9%, 86.3%, 88.2%, 90.6%, 95.2%, and 85.0% respectively. The results show that our approach despite its simplicity, either outperforms or performs at comparable levels to state-of-the-art published systems. @@ -1215,7 +1215,7 @@ High Accuracy Rule-based Question Classification using Question Syntax and Semantics HarishTayyar Madabushi - MarkLee + MarkLee 1220–1230 C16-1116 We present in this paper a purely rule-based system for Question Classification which we divide into two parts: The first is the extraction of relevant words from a question by use of its structure, and the second is the classification of questions based on rules that associate these words to Concepts. We achieve an accuracy of 97.2%, close to a 6 point improvement over the previous State of the Art of 91.6%. Additionally, we believe that machine learning algorithms can be applied on top of this method to further improve accuracy. @@ -1238,7 +1238,7 @@ Semantically Motivated <fixed-case>H</fixed-case>ebrew Verb-Noun Multi-Word Expressions Identification ChayaLiebeskind - YaakovHaCohen-Kerner + YaakovHaCohen-Kerner 1242–1253 C16-1118 Identification of Multi-Word Expressions (MWEs) lies at the heart of many natural language processing applications. In this research, we deal with a particular type of Hebrew MWEs, Verb-Noun MWEs (VN-MWEs), which combine a verb and a noun with or without other words. Most prior work on MWEs classification focused on linguistic and statistical information. In this paper, we claim that it is essential to utilize semantic information. To this end, we propose a semantically motivated indicator for classifying VN-MWE and define features that are related to various semantic spaces and combine them as features in a supervised classification framework. We empirically demonstrate that our semantic feature set yields better performance than the common linguistic and statistical feature sets and that combining semantic features contributes to the VN-MWEs identification task. @@ -1269,7 +1269,7 @@ Facing the most difficult case of Semantic Role Labeling: A collaboration of word embeddings and co-training Quynh Ngoc ThiDo StevenBethard - Marie-FrancineMoens + Marie-FrancineMoens 1275–1284 C16-1121 We present a successful collaboration of word embeddings and co-training to tackle in the most difficult test case of semantic role labeling: predicting out-of-domain and unseen semantic frames. Despite the fact that co-training is a successful traditional semi-supervised method, its application in SRL is very limited especially when a huge amount of labeled data is available. In this work, co-training is used together with word embeddings to improve the performance of a system trained on a large training dataset. We also introduce a semantic role labeling system with a simple learning architecture and effective inference that is easily adaptable to semi-supervised settings with new training data and/or new features. On the out-of-domain testing set of the standard benchmark CoNLL 2009 data our simple approach achieves high performance and improves state-of-the-art results. @@ -1277,8 +1277,8 @@ Predictability of Distributional Semantics in Derivational Word Formation - SebastianPadó - AurélieHerbelot + SebastianPadó + AurélieHerbelot MaxKisselew JanŠnajder 1285–1296 @@ -1301,7 +1301,7 @@ From phonemes to images: levels of representation in a recurrent neural model of visually-grounded language learning LiekeGelderloos - GrzegorzChrupała + GrzegorzChrupała 1309–1319 C16-1124 We present a model of visually-grounded language learning based on stacked gated recurrent neural networks which learns to predict visual features given an image description in the form of a sequence of phonemes. The learning task resembles that faced by human language learners who need to discover both structure and meaning from noisy and ambiguous data across modalities. We show that our model indeed learns to predict features of the visual context given phonetically transcribed image descriptions, and show that it represents linguistic information in a hierarchy of levels: lower layers in the stack are comparatively more sensitive to form, whereas higher layers are more sensitive to meaning. @@ -1311,7 +1311,7 @@ Linguistic features for <fixed-case>H</fixed-case>indi light verb construction identification AshwiniVaidya SumeetAgarwal - MarthaPalmer + MarthaPalmer 1320–1329 C16-1125 Light verb constructions (LVC) in Hindi are highly productive. If we can distinguish a case such as nirnay lenaa ‘decision take; decide’ from an ordinary verb-argument combination kaagaz lenaa ‘paper take; take (a) paper’,it has been shown to aid NLP applications such as parsing (Begum et al., 2011) and machine translation (Pal et al., 2011). In this paper, we propose an LVC identification system using language specific features for Hindi which shows an improvement over previous work(Begum et al., 2011). To build our system, we carry out a linguistic analysis of Hindi LVCs using Hindi Treebank annotations and propose two new features that are aimed at capturing the diversity of Hindi LVCs in the corpus. We find that our model performs robustly across a diverse range of LVCs and our results underscore the importance of semantic features, which is in keeping with the findings for English. Our error analysis also demonstrates that our classifier can be used to further refine LVC annotations in the Hindi Treebank and make them more consistent across the board. @@ -1321,7 +1321,7 @@ Cross-lingual Transfer of Correlations between Parts of Speech and Gaze Features MariaBarrett FrankKeller - AndersSøgaard + AndersSøgaard 1330–1339 C16-1126 Several recent studies have shown that eye movements during reading provide information about grammatical and syntactic processing, which can assist the induction of NLP models. All these studies have been limited to English, however. This study shows that gaze and part of speech (PoS) correlations largely transfer across English and French. This means that we can replicate previous studies on gaze-based PoS tagging for French, but also that we can use English gaze data to assist the induction of French NLP models. @@ -1349,7 +1349,7 @@ Dynamic Generative model for Diachronic Sense Emergence Detection MartinEmms - Arun KumarJayapal + Arun KumarJayapal 1362–1373 C16-1129 As time passes words can acquire meanings they did not previously have, such as the ‘twitter post’ usage of ‘tweet’. We address how this can be detected from time-stamped raw text. We propose a generative model with senses dependent on times and context words dependent on senses but otherwise eternal, and a Gibbs sampler for estimation. We obtain promising parameter estimates for positive (resp. negative) cases of known sense emergence (resp non-emergence) and adapt the ‘pseudo-word’ technique (Schutze, 1992) to give a novel further evaluation via ‘pseudo-neologisms’. The question of ground-truth is also addressed and a technique proposed to locate an emergence date for evaluation purposes. @@ -1380,7 +1380,7 @@ Machine Translation Evaluation for <fixed-case>A</fixed-case>rabic using Morphologically-enriched Embeddings - FranciscoGuzmán + FranciscoGuzmán HoudaBouamor RamyBaly NizarHabash @@ -1401,7 +1401,7 @@ Phrase-based Machine Translation using Multiple Preordering Candidates YusukeOda - TakuKudo + TakuKudo TetsujiNakagawa TaroWatanabe 1419–1428 @@ -1413,8 +1413,8 @@ Hand in Glove: Deep Feature Fusion Network Architectures for Answer Quality Prediction in Community Question Answering Sai PraneethSuggu KushwanthNaga Goutham - Manoj K.Chinnakotla - ManishShrivastava + Manoj K.Chinnakotla + ManishShrivastava 1429–1440 C16-1135 Community Question Answering (cQA) forums have become a popular medium for soliciting direct answers to specific questions of users from experts or other experienced users on a given topic. However, for a given question, users sometimes have to sift through a large number of low-quality or irrelevant answers to find out the answer which satisfies their information need. To alleviate this, the problem of Answer Quality Prediction (AQP) aims to predict the quality of an answer posted in response to a forum question. Current AQP systems either learn models using - a) various hand-crafted features (HCF) or b) Deep Learning (DL) techniques which automatically learn the required feature representations. In this paper, we propose a novel approach for AQP known as - “Deep Feature Fusion Network (DFFN)” which combines the advantages of both hand-crafted features and deep learning based systems. Given a question-answer pair along with its metadata, the DFFN architecture independently - a) learns features from the Deep Neural Network (DNN) and b) computes hand-crafted features using various external resources and then combines them using a fully connected neural network trained to predict the final answer quality. DFFN is end-end differentiable and trained as a single system. We propose two different DFFN architectures which vary mainly in the way they model the input question/answer pair - DFFN-CNN uses a Convolutional Neural Network (CNN) and DFFN-BLNA uses a Bi-directional LSTM with Neural Attention (BLNA). Both these proposed variants of DFFN (DFFN-CNN and DFFN-BLNA) achieve state-of-the-art performance on the standard SemEval-2015 and SemEval-2016 benchmark datasets and outperforms baseline approaches which individually employ either HCF or DL based techniques alone. @@ -1424,7 +1424,7 @@ Learning Event Expressions via Bilingual Structure Projection FangyuanLi RuihongHuang - DeyiXiong + DeyiXiong MinZhang 1441–1450 C16-1136 @@ -1434,8 +1434,8 @@ Global Inference to <fixed-case>C</fixed-case>hinese Temporal Relation Extraction PeifengLi - QiaomingZhu - GuodongZhou + QiaomingZhu + GuodongZhou HonglingWang 1451–1460 C16-1137 @@ -1498,7 +1498,7 @@ An Unsupervised Multi-Document Summarization Framework Based on Neural Document Model ShuleiMa - Zhi-HongDeng + Zhi-HongDeng YunlunYang 1514–1523 C16-1143 @@ -1510,7 +1510,7 @@ MaximilianSchwenger ÁlvaroTorralba JoergHoffmann - David M.Howcroft + David M.Howcroft VeraDemberg 1524–1534 C16-1144 @@ -1542,7 +1542,7 @@ On the Impact of Seed Words on Sentiment Polarity Lexicon Induction DameJovanoski VenoPachovski - PreslavNakov + PreslavNakov 1557–1567 C16-1147 Sentiment polarity lexicons are key resources for sentiment analysis, and researchers have invested a lot of efforts in their manual creation. However, there has been a recent shift towards automatically extracted lexicons, which are orders of magnitude larger and perform much better. These lexicons are typically mined using bootstrapping, starting from very few seed words whose polarity is given, e.g., 50-60 words, and sometimes even just 5-6. Here we demonstrate that much higher-quality lexicons can be built by starting with hundreds of words and phrases as seeds, especially when they are in-domain. Thus, we combine (i) mid-sized high-quality manually crafted lexicons as seeds and (ii) bootstrapping, in order to build large-scale lexicons. @@ -1553,7 +1553,7 @@ SwapnaSomasundaran BrianRiordan BinodGyawali - Su-YounYoon + Su-YounYoon 1568–1578 C16-1148 This work investigates whether the development of ideas in writing can be captured by graph properties derived from the text. Focusing on student essays, we represent the essay as a graph, and encode a variety of graph properties including PageRank as features for modeling essay scores related to quality of development. We demonstrate that our approach improves on a state-of-the-art system on the task of holistic scoring of persuasive essays and on the task of scoring narrative essays along the development dimension. @@ -1607,7 +1607,7 @@ YueZhang SophiaLee ShoushanLi - GuodongZhou + GuodongZhou 1624–1634 C16-1153 Emotions in code-switching text can be expressed in either monolingual or bilingual forms. However, relatively little research has emphasized on code-switching text. In this paper, we propose a Bilingual Attention Network (BAN) model to aggregate the monolingual and bilingual informative words to form vectors from the document representation, and integrate the attention vectors to predict the emotion. The experiments show that the effectiveness of the proposed model. Visualization of the attention layers illustrates that the model selects qualitatively informative words. @@ -1634,7 +1634,7 @@ Predicting the Evocation Relation between Lexicalized Concepts - YoshihikoHayashi + YoshihikoHayashi 1657–1668 C16-1156 Evocation is a directed yet weighted semantic relationship between lexicalized concepts. Although evocation relations are considered potentially useful in several semantic NLP tasks, the prediction of the evocation relation between an arbitrary pair of concepts remains difficult, since evocation relationships cover a broader range of semantic relations rooted in human perception and experience. This paper presents a supervised learning approach to predict the strength (by regression) and to determine the directionality (by classification) of the evocation relation that might hold between a pair of lexicalized concepts. Empirical results that were obtained by investigating useful features are shown, indicating that a combination of the proposed features largely outperformed individual baselines, and also suggesting that semantic relational vectors computed from existing semantic vectors for lexicalized concepts were indeed effective for both the prediction of strength and the determination of directionality. @@ -1642,7 +1642,7 @@ Collecting and Exploring Everyday Language for Predicting Psycholinguistic Properties of Words - GustavoPaetzold + GustavoPaetzold LuciaSpecia 1669–1679 C16-1157 @@ -1652,7 +1652,7 @@ Using Argument Mining to Assess the Argumentation Quality of Essays HenningWachsmuth - KhalidAl-Khatib + KhalidAl-Khatib BennoStein 1680–1691 C16-1158 @@ -1685,7 +1685,7 @@ TianyuLiu TaoGe LeiSha - BaobaoChang + BaobaoChang SujianLi ZhifangSui 1715–1724 @@ -1713,7 +1713,7 @@ Wei-NingHsu YuZhang MitraMohtarami - JamesGlass + JamesGlass 1734–1745 C16-1163 In real-world data, e.g., from Web forums, text is often contaminated with redundant or irrelevant content, which leads to introducing noise in machine learning algorithms. In this paper, we apply Long Short-Term Memory networks with an attention mechanism, which can select important parts of text for the task of similar question retrieval from community Question Answering (cQA) forums. In particular, we use the attention weights for both selecting entire sentences and their subparts, i.e., word/chunk, from shallow syntactic trees. More interestingly, we apply tree kernels to the filtered text representations, thus exploiting the implicit features of the subtree space for learning question reranking. Our results show that the attention-based pruning allows for achieving the top position in the cQA challenge of SemEval 2016, with a relatively large gap from the other participants while greatly decreasing running time. @@ -1725,7 +1725,7 @@ MoYu BingXiang BowenZhou - HinrichSchütze + HinrichSchütze 1746–1756 C16-1164 This work focuses on answering single-relation factoid questions over Freebase. Each question can acquire the answer from a single fact of form (subject, predicate, object) in Freebase. This task, simple question answering (SimpleQA), can be addressed via a two-step pipeline: entity linking and fact selection. In fact selection, we match the subject entity in a fact candidate with the entity mention in the question by a character-level convolutional neural network (char-CNN), and match the predicate in that fact with the question by a word-level CNN (word-CNN). This work makes two main contributions. (i) A simple and effective entity linker over Freebase is proposed. Our entity linker outperforms the state-of-the-art entity linker over SimpleQA task. (ii) A novel attentive maxpooling is stacked over word-CNN, so that the predicate representation can be matched with the predicate-focused question representation more effectively. Experiments show that our system sets new state-of-the-art in this task. @@ -1746,8 +1746,8 @@ GeorgiosBalikas HesamAmoualian MarianneClausel - EricGaussier - Massih R.Amini + EricGaussier + Massih R.Amini 1767–1776 C16-1166 The exchangeability assumption in topic models like Latent Dirichlet Allocation (LDA) often results in inferring inconsistent topics for the words of text spans like noun-phrases, which are usually expected to be topically coherent. We propose copulaLDA, that extends LDA by integrating part of the text structure to the model and relaxes the conditional independence assumption between the word-specific latent topics given the per-document topic distributions. To this end, we assume that the words of text spans like noun-phrases are topically bound and we model this dependence with copulas. We demonstrate empirically the effectiveness of copulaLDA on both intrinsic and extrinsic evaluation tasks on several publicly available corpora. @@ -1768,7 +1768,7 @@ Semantic Annotation Aggregation with Conditional Crowdsourcing Models and Word Embeddings PaulFelt - EricRingger + EricRingger KevinSeppi 1787–1796 C16-1168 @@ -1778,8 +1778,8 @@ Interactive-Predictive Machine Translation based on Syntactic Constraints of Prefix NaYe - GuipingZhang - DongfengCai + GuipingZhang + DongfengCai 1797–1806 C16-1169 Interactive-predictive machine translation (IPMT) is a translation mode which combines machine translation technology and human behaviours. In the IPMT system, the utilization of the prefix greatly affects the interaction efficiency. However, state-of-the-art methods filter translation hypotheses mainly according to their matching results with the prefix on character level, and the advantage of the prefix is not fully developed. Focusing on this problem, this paper mines the deep constraints of prefix on syntactic level to improve the performance of IPMT systems. Two syntactic subtree matching rules based on phrase structure grammar are proposed to filter the translation hypotheses more strictly. Experimental results on LDC Chinese-English corpora show that the proposed method outperforms state-of-the-art phrase-based IPMT system while keeping comparable decoding speed. @@ -1799,7 +1799,7 @@ A Distribution-based Model to Learn Bilingual Word Embeddings HailongCao - TiejunZhao + TiejunZhao ShuZhang YaoMeng 1818–1827 @@ -1812,7 +1812,7 @@ JanNiehues EunahCho Thanh-LeHa - AlexWaibel + AlexWaibel 1828–1836 C16-1172 Recently, the development of neural machine translation (NMT) has significantly improved the translation quality of automatic machine translation. While most sentences are more accurate and fluent than translations by statistical machine translation (SMT)-based systems, in some cases, the NMT system produces translations that have a completely different meaning. This is especially the case when rare words occur. When using statistical machine translation, it has already been shown that significant gains can be achieved by simplifying the input in a preprocessing step. A commonly used example is the pre-reordering approach. In this work, we used phrase-based machine translation to pre-translate the input into the target language. Then a neural machine translation system generates the final hypothesis using the pre-translation. Thereby, we use either only the output of the phrase-based machine translation (PBMT) system or a combination of the PBMT output and the source sentence. We evaluate the technique on the English to German translation task. Using this approach we are able to outperform the PBMT system as well as the baseline neural MT system by up to 2 BLEU points. We analyzed the influence of the quality of the initial system on the final result. @@ -1878,8 +1878,8 @@ Multi-view and multi-task training of <fixed-case>RST</fixed-case> discourse parsers ChloéBraud - BarbaraPlank - AndersSøgaard + BarbaraPlank + AndersSøgaard 1903–1913 C16-1179 We experiment with different ways of training LSTM networks to predict RST discourse trees. The main challenge for RST discourse parsing is the limited amounts of training data. We combat this by regularizing our models using task supervision from related tasks as well as alternative views on discourse structures. We show that a simple LSTM sequential discourse parser takes advantage of this multi-view and multi-task framework with 12-15% error reductions over our baseline (depending on the metric) and results that rival more complex state-of-the-art parsers. @@ -1906,7 +1906,7 @@ Representation and Learning of Temporal Relations - LeonDerczynski + LeonDerczynski 1937–1948 C16-1182 Determining the relative order of events and times described in text is an important problem in natural language processing. It is also a difficult one: general state-of-the-art performance has been stuck at a relatively low ceiling for years. We investigate the representation of temporal relations, and empirically evaluate the effect that various temporal relation representations have on machine learning performance. While machine learning performance decreases with increased representational expressiveness, not all representation simplifications have equal impact. @@ -1928,7 +1928,7 @@ KentoWatanabe YuichirohMatsubayashi NahoOrita - NaoakiOkazaki + NaoakiOkazaki KentaroInui SatoruFukayama TomoyasuNakano @@ -1952,7 +1952,7 @@ Multimodal Mood Classification - A Case Study of Differences in <fixed-case>H</fixed-case>indi and Western Songs Braja GopalPatra DipankarDas - SivajiBandyopadhyay + SivajiBandyopadhyay 1980–1989 C16-1186 Music information retrieval has emerged as a mainstream research area in the past two decades. Experiments on music mood classification have been performed mainly on Western music based on audio, lyrics and a combination of both. Unfortunately, due to the scarcity of digitalized resources, Indian music fares poorly in music mood retrieval research. In this paper, we identified the mood taxonomy and prepared multimodal mood annotated datasets for Hindi and Western songs. We identified important audio and lyric features using correlation based feature selection technique. Finally, we developed mood classification systems using Support Vector Machines and Feed Forward Neural Networks based on the features collected from audio, lyrics, and a combination of both. The best performing multimodal systems achieved F-measures of 75.1 and 83.5 for classifying the moods of the Hindi and Western songs respectively using Feed Forward Neural Networks. A comparative analysis indicates that the selected features work well for mood classification of the Western songs and produces better results as compared to the mood classification systems for Hindi songs. @@ -1984,7 +1984,7 @@ Dialogue Act Classification in Domain-Independent Conversations Using a Deep Recurrent Neural Network HamedKhanpour NishithaGuntakandla - RodneyNielsen + RodneyNielsen 2012–2021 C16-1189 In this study, we applied a deep LSTM structure to classify dialogue acts (DAs) in open-domain conversations. We found that the word embeddings parameters, dropout regularization, decay rate and number of layers are the parameters that have the largest effect on the final system accuracy. Using the findings of these experiments, we trained a deep LSTM network that outperforms the state-of-the-art on the Switchboard corpus by 3.11%, and MRDA by 2.2%. @@ -1993,7 +1993,7 @@ Non-sentential Question Resolution using Sequence to Sequence Learning VineetKumar - SachindraJoshi + SachindraJoshi 2022–2031 C16-1190 An interactive Question Answering (QA) system frequently encounters non-sentential (incomplete) questions. These non-sentential questions may not make sense to the system when a user asks them without the context of conversation. The system thus needs to take into account the conversation context to process the question. In this work, we present a recurrent neural network (RNN) based encoder decoder network that can generate a complete (intended) question, given an incomplete question and conversation context. RNN encoder decoder networks have been show to work well when trained on a parallel corpus with millions of sentences, however it is extremely hard to obtain conversation data of this magnitude. We therefore propose to decompose the original problem into two separate simplified problems where each problem focuses on an abstraction. Specifically, we train a semantic sequence model to learn semantic patterns, and a syntactic sequence model to learn linguistic patterns. We further combine syntactic and semantic sequence models to generate an ensemble model. Our model achieves a BLEU score of 30.15 as compared to 18.54 using a standard RNN encoder decoder model. @@ -2024,7 +2024,7 @@ Domainless Adaptation by Constrained Decoding on a Schema Lattice Young-BumKim KarlStratos - RuhiSarikaya + RuhiSarikaya 2051–2060 C16-1193 In many applications such as personal digital assistants, there is a constant need for new domains to increase the system’s coverage of user queries. A conventional approach is to learn a separate model every time a new domain is introduced. This approach is slow, inefficient, and a bottleneck for scaling to a large number of domains. In this paper, we introduce a framework that allows us to have a single model that can handle all domains: including unknown domains that may be created in the future as long as they are covered in the master schema. The key idea is to remove the need for distinguishing domains by explicitly predicting the schema of queries. Given permitted schema of a query, we perform constrained decoding on a lattice of slot sequences allowed under the schema. The proposed model achieves competitive and often superior performance over the conventional model trained separately per domain. @@ -2057,7 +2057,7 @@ JonasWacker StefanRadomski MaxMühlhäuser - ChrisBiemann + ChrisBiemann 2082–2091 C16-1196 We present Ambient Search, an open source system for displaying and retrieving relevant documents in real time for speech input. The system works ambiently, that is, it unobstructively listens to speech streams in the background, identifies keywords and keyphrases for query construction and continuously serves relevant documents from its index. Query terms are ranked with Word2Vec and TF-IDF and are continuously updated to allow for ongoing querying of a document collection. The retrieved documents, in our case Wikipedia articles, are visualized in real time in a browser interface. Our evaluation shows that Ambient Search compares favorably to another implicit information retrieval system on speech streams. Furthermore, we extrinsically evaluate multiword keyphrase generation, showing positive impact for manual transcriptions. @@ -2067,8 +2067,8 @@ Semi-supervised Gender Classification with Joint Textual and Social Modeling ShoushanLi BinDai - ZhengxianGong - GuodongZhou + ZhengxianGong + GuodongZhou 2092–2100 C16-1197 In gender classification, labeled data is often limited while unlabeled data is ample. This motivates semi-supervised learning for gender classification to improve the performance by exploring the knowledge in both labeled and unlabeled data. In this paper, we propose a semi-supervised approach to gender classification by leveraging textual features and a specific kind of indirect links among the users which we call “same-interest” links. Specifically, we propose a factor graph, namely Textual and Social Factor Graph (TSFG), to model both the textual and the “same-interest” link information. Empirical studies demonstrate the effectiveness of the proposed approach to semi-supervised gender classification. @@ -2089,7 +2089,7 @@ DongZhang ShoushanLi HonglingWang - GuodongZhou + GuodongZhou 2112–2121 C16-1199 Textual information is of critical importance for automatic user classification in social media. However, most previous studies model textual features in a single perspective while the text in a user homepage typically possesses different styles of text, such as original message and comment from others. In this paper, we propose a novel approach, namely ensemble LSTM, to user classification by incorporating multiple textual perspectives. Specifically, our approach first learns a LSTM representation with a LSTM recurrent neural network and then presents a joint learning method to integrating all naturally-divided textual perspectives. Empirical studies on two basic user classification tasks, i.e., gender classification and age classification, demonstrate the effectiveness of the proposed approach to user classification with multiple textual perspectives. @@ -2129,9 +2129,9 @@ Improving Statistical Machine Translation with Selectional Preferences HaiqingTang - DeyiXiong + DeyiXiong MinZhang - ZhengxianGong + ZhengxianGong 2154–2163 C16-1203 Long-distance semantic dependencies are crucial for lexical choice in statistical machine translation. In this paper, we study semantic dependencies between verbs and their arguments by modeling selectional preferences in the context of machine translation. We incorporate preferences that verbs impose on subjects and objects into translation. In addition, bilingual selectional preferences between source-side verbs and target-side arguments are also investigated. Our experiments on Chinese-to-English translation tasks with large-scale training data demonstrate that statistical machine translation using verbal selectional preferences can achieve statistically significant improvements over a state-of-the-art baseline. @@ -2140,7 +2140,7 @@ Hierarchical Permutation Complexity for Word Order Evaluation MilošStanojević - KhalilSima’an + KhalilSima’an 2164–2173 C16-1204 Existing approaches for evaluating word order in machine translation work with metrics computed directly over a permutation of word positions in system output relative to a reference translation. However, every permutation factorizes into a permutation tree (PET) built of primal permutations, i.e., atomic units that do not factorize any further. In this paper we explore the idea that permutations factorizing into (on average) shorter primal permutations should represent simpler ordering as well. Consequently, we contribute Permutation Complexity, a class of metrics over PETs and their extension to forests, and define tight metrics, a sub-class of metrics implementing this idea. Subsequently we define example tight metrics and empirically test them in word order evaluation. Experiments on the WMT13 data sets for ten language pairs show that a tight metric is more often than not better than the baselines. @@ -2159,7 +2159,7 @@ Get Semantic With Me! The Usefulness of Different Feature Types for Short-Answer Grading - UlrikePadó + UlrikePadó 2186–2195 C16-1206 Automated short-answer grading is key to help close the automation loop for large-scale, computerised testing in education. A wide range of features on different levels of linguistic processing has been proposed so far. We investigate the relative importance of the different types of features across a range of standard corpora (both from a language skill and content assessment context, in English and in German). We find that features on the lexical, text similarity and dependency level often suffice to approximate full-model performance. Features derived from semantic processing particularly benefit the linguistically more varied answers in content assessment corpora. @@ -2170,9 +2170,9 @@ TerraBlevins RobertKwiatkowski JamieMacBeth - KathleenMcKeown + KathleenMcKeown DesmondPatton - OwenRambow + OwenRambow 2196–2206 C16-1207 Violence is a serious problems for cities like Chicago and has been exacerbated by the use of social media by gang-involved youths for taunting rival gangs. We present a corpus of tweets from a young and powerful female gang member and her communicators, which we have annotated with discourse intention, using a deep read to understand how and what triggered conversations to escalate into aggression. We use this corpus to develop a part-of-speech tagger and phrase table for the variant of English that is used and a classifier for identifying tweets that express grieving and aggression. @@ -2192,7 +2192,7 @@ Data-driven learning of symbolic constraints for a log-linear model in a phonological setting GabrielDoyle - RogerLevy + RogerLevy 2217–2226 C16-1209 We propose a non-parametric Bayesian model for learning and weighting symbolically-defined constraints to populate a log-linear model. The model jointly infers a vector of binary constraint values for each candidate output and likely definitions for these constraints, combining observations of the output classes with a (potentially infinite) grammar over potential constraint definitions. We present results on a small morphophonological system, English regular plurals, as a test case. The inferred constraints, based on a grammar of articulatory features, perform as well as theoretically-defined constraints on both observed and novel forms of English regular plurals. The learned constraint values and definitions also closely resemble standard constraints defined within phonological theory. @@ -2235,7 +2235,7 @@ MarekMaziarz MaciejPiasecki EwaRudnicka - StanSzpakowicz + StanSzpakowicz PawełKędzia 2259–2268 C16-1213 @@ -2245,7 +2245,7 @@ Time-Independent and Language-Independent Extraction of Multiword Expressions From <fixed-case>T</fixed-case>witter NikhilLondhe - RohiniSrihari + RohiniSrihari VishrawasGopalakrishnan 2269–2278 C16-1214 @@ -2279,7 +2279,7 @@ AmitGupta FrancescoPiccinno MikhailKozhevnikov - MariusPaşca + MariusPaşca DanielePighin 2300–2309 C16-1217 @@ -2289,10 +2289,10 @@ Joint Learning of Local and Global Features for Entity Linking via Neural Networks Thien HuuNguyen - NicolasFauceglia + NicolasFauceglia MarianoRodriguez Muro OktieHassanzadeh - AlfioMassimiliano Gliozzo + AlfioMassimiliano Gliozzo MohammadSadoghi 2310–2320 C16-1218 @@ -2301,7 +2301,7 @@ Structured Aspect Extraction - OmerGunes + OmerGunes TimFurche GiorgioOrsi 2321–2332 @@ -2440,7 +2440,7 @@ Targeted Sentiment to Understand Student Comments CharlesWelch - RadaMihalcea + RadaMihalcea 2471–2481 C16-1233 We address the task of targeted sentiment as a means of understanding the sentiment that students hold toward courses and instructors, as expressed by students in their comments. We introduce a new dataset consisting of student comments annotated for targeted sentiment and describe a system that can both identify the courses and instructors mentioned in student comments, as well as label the students’ sentiment toward those entities. Through several comparative evaluations, we show that our system outperforms previous work on a similar task. @@ -2450,7 +2450,7 @@ Towards Sub-Word Level Compositions for Sentiment Analysis of <fixed-case>H</fixed-case>indi-<fixed-case>E</fixed-case>nglish Code Mixed Text AdityaJoshi AmeyaPrabhu - ManishShrivastava + ManishShrivastava VasudevaVarma 2482–2491 C16-1234 @@ -2461,7 +2461,7 @@ Distance Metric Learning for Aspect Phrase Grouping ShufengXiong YueZhang - DonghongJi + DonghongJi YinxiaLou 2492–2502 C16-1235 @@ -2474,7 +2474,7 @@ NanDuan ZhaoYan MingZhou - TiejunZhao + TiejunZhao 2503–2514 C16-1236 WebQuestions and SimpleQuestions are two benchmark data-sets commonly used in recent knowledge-based question answering (KBQA) work. Most questions in them are ‘simple’ questions which can be answered based on a single relation in the knowledge base. Such data-sets lack the capability of evaluating KBQA systems on complicated questions. Motivated by this issue, we release a new data-set, namely ComplexQuestions, aiming to measure the quality of KBQA systems on ‘multi-constraint’ questions which require multiple knowledge base relations to get the answer. Beside, we propose a novel systematic KBQA approach to solve multi-constraint questions. Compared to state-of-the-art methods, our approach not only obtains comparable results on the two existing benchmark data-sets, but also achieves significant improvements on the ComplexQuestions. @@ -2494,7 +2494,7 @@ Attention-Based Convolutional Neural Network for Semantic Relation Extraction YatianShen - XuanjingHuang + XuanjingHuang 2526–2536 C16-1238 Nowadays, neural networks play an important role in the task of relation classification. In this paper, we propose a novel attention-based convolutional neural network architecture for this task. Our model makes full use of word embedding, part-of-speech tag embedding and position embedding information. Word level attention mechanism is able to better determine which parts of the sentence are most influential with respect to the two entities of interest. This architecture enables learning some important features from task-specific labeled data, forgoing the need for external knowledge such as explicit dependency structures. Experiments on the SemEval-2010 Task 8 benchmark dataset show that our model achieves better performances than several state-of-the-art neural network models and can achieve a competitive performance just with minimal feature engineering. @@ -2503,7 +2503,7 @@ Table Filling Multi-Task Recurrent Neural Network for Joint Entity and Relation Extraction PankajGupta - HinrichSchütze + HinrichSchütze BerntAndrassy 2537–2547 C16-1239 @@ -2513,7 +2513,7 @@ Bilingual Autoencoders with Global Descriptors for Modeling Parallel Sentences BiaoZhang - DeyiXiong + DeyiXiong JinsongSu HongDuan MinZhang @@ -2525,8 +2525,8 @@ Multi-Engine and Multi-Alignment Based Automatic Post-Editing and its Impact on Translation Productivity SantanuPal - Sudip KumarNaskar - Josefvan Genabith + Sudip KumarNaskar + Josefvan Genabith 2559–2570 C16-1241 In this paper we combine two strands of machine translation (MT) research: automatic post-editing (APE) and multi-engine (system combination) MT. APE systems learn a target-language-side second stage MT system from the data produced by human corrected output of a first stage MT system, to improve the output of the first stage MT in what is essentially a sequential MT system combination architecture. At the same time, there is a rich research literature on parallel MT system combination where the same input is fed to multiple engines and the best output is selected or smaller sections of the outputs are combined to obtain improved translation output. In the paper we show that parallel system combination in the APE stage of a sequential MT-APE combination yields substantial translation improvements both measured in terms of automatic evaluation metrics as well as in terms of productivity improvements measured in a post-editing experiment. We also show that system combination on the level of APE alignments yields further improvements. Overall our APE system yields statistically significant improvement of 5.9% relative BLEU over a strong baseline (English–Italian Google MT) and 21.76% productivity increase in a human post-editing experiment with professional translators. @@ -2568,7 +2568,7 @@ Training Data Enrichment for Infrequent Discourse Relations KailangJiang GiuseppeCarenini - RaymondNg + RaymondNg 2603–2614 C16-1245 Discourse parsing is a popular technique widely used in text understanding, sentiment analysis and other NLP tasks. However, for most discourse parsers, the performance varies significantly across different discourse relations. In this paper, we first validate the underfitting hypothesis, i.e., the less frequent a relation is in the training data, the poorer the performance on that relation. We then explore how to increase the number of positive training instances, without resorting to manually creating additional labeled data. We propose a training data enrichment framework that relies on co-training of two different discourse parsers on unlabeled documents. Importantly, we show that co-training alone is not sufficient. The framework requires a filtering step to ensure that only “good quality” unlabeled documents can be used for enrichment and re-training. We propose and evaluate two ways to perform the filtering. The first is to use an agreement score between the two parsers. The second is to use only the confidence score of the faster parser. Our empirical results show that agreement score can help to boost the performance on infrequent relations, and that the confidence score is a viable approximation of the agreement score for infrequent relations. @@ -2577,8 +2577,8 @@ Inferring Discourse Relations from <fixed-case>PDTB</fixed-case>-style Discourse Labels for Argumentative Revision Classification FanZhang - DianeLitman - KatherineForbes Riley + DianeLitman + KatherineForbes Riley 2615–2624 C16-1246 Penn Discourse Treebank (PDTB)-style annotation focuses on labeling local discourse relations between text spans and typically ignores larger discourse contexts. In this paper we propose two approaches to infer discourse relations in a paragraph-level context from annotated PDTB labels. We investigate the utility of inferring such discourse information using the task of revision classification. Experimental results demonstrate that the inferred information can significantly improve classification performance compared to baselines, not only when PDTB annotation comes from humans but also from automatic parsers. @@ -2588,7 +2588,7 @@ Capturing Pragmatic Knowledge in Article Usage Prediction using <fixed-case>LSTM</fixed-case>s JadKabbara YulanFeng - Jackie Chi KitCheung + Jackie Chi KitCheung 2625–2634 C16-1247 We examine the potential of recurrent neural networks for handling pragmatic inferences involving complex contextual cues for the task of article usage prediction. We train and compare several variants of Long Short-Term Memory (LSTM) networks with an attention mechanism. Our model outperforms a previous state-of-the-art system, achieving up to 96.63% accuracy on the WSJ/PTB corpus. In addition, we perform a series of analyses to understand the impact of various model choices. We find that the gain in performance can be attributed to the ability of LSTMs to pick up on contextual cues, both local and further away in distance, and that the model is able to solve cases involving reasoning about coreference and synonymy. We also show how the attention mechanism contributes to the interpretability of the model’s effectiveness. @@ -2607,7 +2607,7 @@ ShoushanLi JianXu DongZhang - GuodongZhou + GuodongZhou 2647–2655 C16-1249 In the literature, various supervised learning approaches have been adopted to address the task of reader emotion classification. However, the classification performance greatly suffers when the size of the labeled data is limited. In this paper, we propose a two-view label propagation approach to semi-supervised reader emotion classification by exploiting two views, namely source text and response text in a label propagation algorithm. Specifically, our approach depends on two word-document bipartite graphs to model the relationship among the samples in the two views respectively. Besides, the two bipartite graphs are integrated by linking each source text sample with its corresponding response text sample via a length-sensitive transition probability. In this way, our two-view label propagation approach to semi-supervised reader emotion classification largely alleviates the reliance on the strong sufficiency and independence assumptions of the two views, as required in co-training. Empirical evaluation demonstrates the effectiveness of our two-view label propagation approach to semi-supervised reader emotion classification. @@ -2628,7 +2628,7 @@ ErikCambria SoujanyaPoria RajivBajpai - BjoernSchuller + BjoernSchuller 2666–2677 C16-1251 An important difference between traditional AI systems and human intelligence is the human ability to harness commonsense knowledge gleaned from a lifetime of learning and experience to make informed decisions. This allows humans to adapt easily to novel situations where AI fails catastrophically due to a lack of situation-specific rules and generalization capabilities. Commonsense knowledge also provides background information that enables humans to successfully operate in social situations where such knowledge is typically assumed. Since commonsense consists of information that humans take for granted, gathering it is an extremely difficult task. Previous versions of SenticNet were focused on collecting this kind of knowledge for sentiment analysis but they were heavily limited by their inability to generalize. SenticNet 4 overcomes such limitations by leveraging on conceptual primitives automatically generated by means of hierarchical clustering and dimensionality reduction. @@ -2661,7 +2661,7 @@ Neural-based Noise Filtering from Word Embeddings Kim AnhNguyen - SabineSchulte im Walde + SabineSchulte im Walde Ngoc ThangVu 2699–2707 C16-1254 @@ -2692,7 +2692,7 @@ <fixed-case>M</fixed-case>onday mornings are my fave :) #not Exploring the Automatic Recognition of Irony in <fixed-case>E</fixed-case>nglish tweets CynthiaVan Hee ElsLefever - VéroniqueHoste + VéroniqueHoste 2730–2739 C16-1257 Recognising and understanding irony is crucial for the improvement natural language processing tasks including sentiment analysis. In this study, we describe the construction of an English Twitter corpus and its annotation for irony based on a newly developed fine-grained annotation scheme. We also explore the feasibility of automatic irony recognition by exploiting a varied set of features including lexical, syntactic, sentiment and semantic (Word2Vec) information. Experiments on a held-out test set show that our irony classifier benefits from this combined information, yielding an F1-score of 67.66%. When explicit hashtag information like #irony is included in the data, the system even obtains an F1-score of 92.77%. A qualitative analysis of the output reveals that recognising irony that results from a polarity clash appears to be (much) more feasible than recognising other forms of ironic utterances (e.g., descriptions of situational irony). @@ -2729,7 +2729,7 @@ Learning Succinct Models: Pipelined Compression with <fixed-case>L</fixed-case>1-Regularization, Hashing, <fixed-case>E</fixed-case>lias-<fixed-case>F</fixed-case>ano Indices, and Quantization HajimeSenuma - AkikoAizawa + AkikoAizawa 2774–2784 C16-1261 The recent proliferation of smart devices necessitates methods to learn small-sized models. This paper demonstrates that if there are m features in total but only n = o(\sqrt{m}) features are required to distinguish examples, with \Omega(\log m) training examples and reasonable settings, it is possible to obtain a good model in a succinct representation using n \log_2 \frac{m}{n} + o(m) bits, by using a pipeline of existing compression methods: L1-regularized logistic regression, feature hashing, Elias–Fano indices, and randomized quantization. An experiment shows that a noun phrase chunking task for which an existing library requires 27 megabytes can be compressed to less than 13 kilobytes without notable loss of accuracy. @@ -2756,7 +2756,7 @@ Is an Image Worth More than a Thousand Words? On the Fine-Grain Semantic Differences between Visual and Linguistic Representations GuillemCollell - Marie-FrancineMoens + Marie-FrancineMoens 2807–2817 C16-1264 Human concept representations are often grounded with visual information, yet some aspects of meaning cannot be visually represented or are better described with language. Thus, vision and language provide complementary information that, properly combined, can potentially yield more complete concept representations. Recently, state-of-the-art distributional semantic models and convolutional neural networks have achieved great success in representing linguistic and visual knowledge respectively. In this paper, we compare both, visual and linguistic representations in their ability to capture different types of fine-grain semantic knowledge—or attributes—of concepts. Humans often describe objects using attributes, that is, properties such as shape, color or functionality, which often transcend the linguistic and visual modalities. In our setting, we evaluate how well attributes can be predicted by using the unimodal representations as inputs. We are interested in first, finding out whether attributes are generally better captured by either the vision or by the language modality; and second, if none of them is clearly superior (as we hypothesize), what type of attributes or semantic knowledge are better encoded from each modality. Ultimately, our study sheds light on the potential of combining visual and textual representations. @@ -2776,7 +2776,7 @@ NaoyaInoue YuichirohMatsubayashi MasayukiOno - NaoakiOkazaki + NaoakiOkazaki KentaroInui 2829–2838 C16-1266 @@ -2795,7 +2795,7 @@ Distributional Inclusion Hypothesis for Tensor-based Composition DimitriKartsaklis - MehrnooshSadrzadeh + MehrnooshSadrzadeh 2849–2860 C16-1268 According to the distributional inclusion hypothesis, entailment between words can be measured via the feature inclusions of their distributional vectors. In recent work, we showed how this hypothesis can be extended from words to phrases and sentences in the setting of compositional distributional semantics. This paper focuses on inclusion properties of tensors; its main contribution is a theoretical and experimental analysis of how feature inclusion works in different concrete models of verb tensors. We present results for relational, Frobenius, projective, and holistic methods and compare them to the simple vector addition, multiplication, min, and max models. The degrees of entailment thus obtained are evaluated via a variety of existing word-based measures, such as Weed’s and Clarke’s, KL-divergence, APinc, balAPinc, and two of our previously proposed metrics at the phrase/sentence level. We perform experiments on three entailment datasets, investigating which version of tensor-based composition achieves the highest performance when combined with the sentence-level measures. @@ -2814,7 +2814,7 @@ Reading and Thinking: Re-read <fixed-case>LSTM</fixed-case> Unit for Textual Entailment Recognition LeiSha - BaobaoChang + BaobaoChang ZhifangSui SujianLi 2870–2879 @@ -2866,7 +2866,7 @@ Neural Paraphrase Generation with Stacked Residual <fixed-case>LSTM</fixed-case> Networks AadityaPrakash - Sadid A.Hasan + Sadid A.Hasan KathyLee VivekDatla AshequlQadir @@ -2892,7 +2892,7 @@ Keyphrase Annotation with Graph Co-Ranking AdrienBougouin FlorianBoudin - BéatriceDaille + BéatriceDaille 2945–2955 C16-1277 Keyphrase annotation is the task of identifying textual units that represent the main content of a document. Keyphrase annotation is either carried out by extracting the most important phrases from a document, keyphrase extraction, or by assigning entries from a controlled domain-specific vocabulary, keyphrase assignment. Assignment methods are generally more reliable. They provide better-formed keyphrases, as well as keyphrases that do not occur in the document. But they are often silent on the contrary of extraction methods that do not depend on manually built resources. This paper proposes a new method to perform both keyphrase extraction and keyphrase assignment in an integrated and mutual reinforcing manner. Experiments have been carried out on datasets covering different domains of humanities and social sciences. They show statistically significant improvements compared to both keyphrase extraction and keyphrase assignment state-of-the art methods. @@ -2900,7 +2900,7 @@ What’s in an Explanation? Characterizing Knowledge and Inference Requirements for Elementary Science Exams - PeterJansen + PeterJansen NiranjanBalasubramanian MihaiSurdeanu PeterClark @@ -2911,7 +2911,7 @@ “All <fixed-case>I</fixed-case> know about politics is what <fixed-case>I</fixed-case> read in <fixed-case>T</fixed-case>witter”: Weakly Supervised Models for Extracting Politicians’ Stances From <fixed-case>T</fixed-case>witter - KristenJohnson + KristenJohnson DanGoldwasser 2966–2977 C16-1279 @@ -2932,7 +2932,7 @@ Political News Sentiment Analysis for Under-resourced Languages Patrik F.Bakken Terje A.Bratlie - CristinaMarco + CristinaMarco Jon AtleGulla 2989–2996 C16-1281 @@ -2944,7 +2944,7 @@ JeffreyLund PaulFelt KevinSeppi - EricRingger + EricRingger 2997–3006 C16-1282 Probabilistic models are a useful means for analyzing large text corpora. Integrating such models with human interaction enables many new use cases. However, adding human interaction to probabilistic models requires inference algorithms which are both fast and accurate. We explore the use of Iterated Conditional Modes as a fast alternative to Gibbs sampling or variational EM. We demonstrate superior performance both in run time and model quality on three different models of text including a DP Mixture of Multinomials for web search result clustering, the Interactive Topic Model, and M OM R ESP , a multinomial crowdsourcing model. @@ -2991,7 +2991,7 @@ Crowdsourcing Complex Language Resources: Playing to Annotate Dependency Syntax BrunoGuillaume KarënFort - NicolasLefebvre + NicolasLefebvre 3041–3052 C16-1286 This article presents the results we obtained on a complex annotation task (that of dependency syntax) using a specifically designed Game with a Purpose, ZombiLingo. We show that with suitable mechanisms (decomposition of the task, training of the players and regular control of the annotation quality during the game), it is possible to obtain annotations whose quality is significantly higher than that obtainable with a parser, provided that enough players participate. The source code of the game and the resulting annotated corpora (for French) are freely available. @@ -3000,7 +3000,7 @@ Borrow a Little from your Rich Cousin: Using Embeddings and Polarities of <fixed-case>E</fixed-case>nglish Words for Multilingual Sentiment Classification PreranaSinghal - PushpakBhattacharyya + PushpakBhattacharyya 3053–3062 C16-1287 In this paper, we provide a solution to multilingual sentiment classification using deep learning. Given input text in a language, we use word translation into English and then the embeddings of these English words to train a classifier. This projection into the English space plus word embeddings gives a simple and uniform framework for multilingual sentiment analysis. A novel idea is augmentation of the training data with polar words, appearing in these sentences, along with their polarities. This approach leads to a performance gain of 7-10% over traditional classifiers on many languages, irrespective of text genre, despite the scarcity of resources in most languages. @@ -3021,7 +3021,7 @@ Convolution-Enhanced Bilingual Recursive Neural Network for Bilingual Semantic Modeling JinsongSu BiaoZhang - DeyiXiong + DeyiXiong RuochenLi JianminYin 3071–3081 @@ -3036,7 +3036,7 @@ NanYang MuLi MingZhou - Kenny Q.Zhu + Kenny Q.Zhu 3082–3092 C16-1290 In neural machine translation, the attention mechanism facilitates the translation process by producing a soft alignment between the source sentence and the target sentence. However, without dedicated distortion and fertility models seen in traditional SMT systems, the learned alignment may not be accurate, which can lead to low translation quality. In this paper, we propose two novel models to improve attention-based neural machine translation. We propose a recurrent attention mechanism as an implicit distortion model, and a fertility conditioned decoder as an implicit fertility model. We conduct experiments on large-scale Chinese–English translation tasks. The results show that our models significantly improve both the alignment and translation quality compared to the original attention mechanism and several other variations. @@ -3047,7 +3047,7 @@ LemaoLiu MasaoUtiyama AndrewFinch - EiichiroSumita + EiichiroSumita 3093–3102 C16-1291 The attention mechanism is appealing for neural machine translation, since it is able to dynamically encode a source sentence by generating a alignment between a target word and source words. Unfortunately, it has been proved to be worse than conventional alignment models in alignment accuracy. In this paper, we analyze and explain this issue from the point view of reordering, and propose a supervised attention which is learned with guidance from conventional alignment models. Experiments on two Chinese-to-English translation tasks show that the supervised attention mechanism yields better alignments leading to substantial gains over the standard attention based NMT. @@ -3058,8 +3058,8 @@ MatthiasSperber GrahamNeubig JanNiehues - SebastianStüker - AlexWaibel + SebastianStüker + AlexWaibel 3103–3113 C16-1292 Evaluating the quality of output from language processing systems such as machine translation or speech recognition is an essential step in ensuring that they are sufficient for practical use. However, depending on the practical requirements, evaluation approaches can differ strongly. Often, reference-based evaluation measures (such as BLEU or WER) are appealing because they are cheap and allow rapid quantitative comparison. On the other hand, practitioners often focus on manual evaluation because they must deal with frequently changing domains and quality standards requested by customers, for which reference-based evaluation is insufficient or not possible due to missing in-domain reference data (Harris et al., 2016). In this paper, we attempt to bridge this gap by proposing a framework for lightly supervised quality estimation. We collect manually annotated scores for a small number of segments in a test corpus or document, and combine them with automatically predicted quality scores for the remaining segments to predict an overall quality estimate. An evaluation shows that our framework estimates quality more reliably than using fully automatic quality estimation approaches, while keeping annotation effort low by not requiring full references to be available for the particular domain. @@ -3068,9 +3068,9 @@ Improving Translation Selection with Supersenses HaiqingTang - DeyiXiong - OierLopez de Lacalle - EnekoAgirre + DeyiXiong + OierLopez de Lacalle + EnekoAgirre 3114–3123 C16-1293 Selecting appropriate translations for source words with multiple meanings still remains a challenge for statistical machine translation (SMT). One reason for this is that most SMT systems are not good at detecting the proper sense for a polysemic word when it appears in different contexts. In this paper, we adopt a supersense tagging method to annotate source words with coarse-grained ontological concepts. In order to enable the system to choose an appropriate translation for a word or phrase according to the annotated supersense of the word or phrase, we propose two translation models with supersense knowledge: a maximum entropy based model and a supersense embedding model. The effectiveness of our proposed models is validated on a large-scale English-to-Spanish translation task. Results indicate that our method can significantly improve translation quality via correctly conveying the meaning of the source language to the target language. @@ -3079,7 +3079,7 @@ Is all that Glitters in Machine Translation Quality Estimation really Gold? YvetteGraham - TimothyBaldwin + TimothyBaldwin MeghanDowling MariaEskevich TeresaLynn @@ -3093,9 +3093,9 @@ Connecting Phrase based Statistical Machine Translation Adaptation RuiWang HaiZhao - Bao-LiangLu + Bao-LiangLu MasaoUtiyama - EiichiroSumita + EiichiroSumita 3135–3145 C16-1295 Although more additional corpora are now available for Statistical Machine Translation (SMT), only the ones which belong to the same or similar domains of the original corpus can indeed enhance SMT performance directly. A series of SMT adaptation methods have been proposed to select these similar-domain data, and most of them focus on sentence selection. In comparison, phrase is a smaller and more fine grained unit for data selection, therefore we propose a straightforward and efficient connecting phrase based adaptation method, which is applied to both bilingual phrase pair and monolingual n-gram adaptation. The proposed method is evaluated on IWSLT/NIST data sets, and the results show that phrase based SMT performances are significantly improved (up to +1.6 in comparison with phrase based SMT baseline system and +0.9 in comparison with existing methods). @@ -3123,7 +3123,7 @@ Universal Reordering via Linguistic Typology JoachimDaiber MilošStanojević - KhalilSima’an + KhalilSima’an 3167–3176 C16-1298 In this paper we explore the novel idea of building a single universal reordering model from English to a large number of target languages. To build this model we exploit typological features of word order for a large number of target languages together with source (English) syntactic features and we train this model on a single combined parallel corpus representing all (22) involved language pairs. We contribute experimental evidence for the usefulness of linguistically defined typological features for building such a model. When the universal reordering model is used for preordering followed by monotone translation (no reordering inside the decoder), our experiments show that this pipeline gives comparable or improved translation performance with a phrase-based baseline for a large number of language pairs (12 out of 22) from diverse language families. @@ -3133,7 +3133,7 @@ A Deep Fusion Model for Domain Adaptation in Phrase-based <fixed-case>MT</fixed-case> NadirDurrani HassanSajjad - ShafiqJoty + ShafiqJoty AhmedAbdelali 3177–3187 C16-1299 @@ -3166,7 +3166,7 @@ Improving Word Alignment of Rare Words with Word Embeddings MasoudJalili Sabet HeshaamFaili - GholamrezaHaffari + GholamrezaHaffari 3209–3215 C16-1302 We address the problem of inducing word alignment for language pairs by developing an unsupervised model with the capability of getting applied to other generative alignment models. We approach the task by: i)proposing a new alignment model based on the IBM alignment model 1 that uses vector representation of words, and ii)examining the use of similar source words to overcome the problem of rare source words and improving the alignments. We apply our method to English-French corpora and run the experiments with different sizes of sentence pairs. Our results show competitive performance against the baseline and in some cases improve the results up to 6.9% in terms of precision. @@ -3174,7 +3174,7 @@ Measuring the Information Content of Financial News - Ching-YunChang + Ching-YunChang YueZhang ZhiyangTeng ZahnBozanic @@ -3187,8 +3187,8 @@ Automatic Generation and Classification of Minimal Meaningful Propositions in Educational Systems AndreeaGodea - FlorinBulgarov - RodneyNielsen + FlorinBulgarov + RodneyNielsen 3226–3236 C16-1304 Truly effective and practical educational systems will only be achievable when they have the ability to fully recognize deep relationships between a learner’s interpretation of a subject and the desired conceptual understanding. In this paper, we take important steps in this direction by introducing a new representation of sentences – Minimal Meaningful Propositions (MMPs), which will allow us to significantly improve the mapping between a learner’s answer and the ideal response. Using this technique, we make significant progress towards highly scalable and domain independent educational systems, that will be able to operate without human intervention. Even though this is a new task, we show very good results both for the extraction of MMPs and for classification with respect to their importance. @@ -3209,7 +3209,7 @@ Textual complexity as a predictor of difficulty of listening items in language proficiency tests AnastassiaLoukina - Su-YounYoon + Su-YounYoon JenniferSakano YouhuaWei KathySheehan @@ -3222,7 +3222,7 @@ The Construction of a <fixed-case>C</fixed-case>hinese Collocational Knowledge Resource and Its Application for Second Language Acquisition RenfenHu JiayongChen - Kuang-huaChen + Kuang-huaChen 3254–3263 C16-1307 The appropriate use of collocations is a challenge for second language acquisition. However, high quality and easily accessible Chinese collocation resources are not available for both teachers and students. This paper presents the design and construction of a large scale resource of Chinese collocational knowledge, and a web-based application (OCCA, Online Chinese Collocation Assistant) which offers free and convenient collocation search service to end users. We define and classify collocations based on practical language acquisition needs and utilize a syntax based method to extract nine types of collocations. Totally 37 extraction rules are compiled with word, POS and dependency relation features, 1,750,000 collocations are extracted from a corpus for L2 learning and complementary Wikipedia data, and OCCA is implemented based on these extracted collocations. By comparing OCCA with two traditional collocation dictionaries, we find OCCA has higher entry coverage and collocation quantity, and our method achieves quite low error rate at less than 5%. We also discuss how to apply collocational knowledge to grammatical error detection and demonstrate comparable performance to the best results in 2015 NLP-TEA CGED shared task. The preliminary experiment shows that the collocation knowledge is helpful in detecting all the four types of grammatical errors. @@ -3243,7 +3243,7 @@ Event Detection with Burst Information Networks TaoGe LeiCui - BaobaoChang + BaobaoChang ZhifangSui MingZhou 3276–3286 @@ -3256,7 +3256,7 @@ SuyangZhu ShoushanLi YingChen - GuodongZhou + GuodongZhou 3287–3297 C16-1310 Machine learning-based methods have obtained great progress on emotion classification. However, in most previous studies, the models are learned based on a single corpus which often suffers from insufficient labeled data. In this paper, we propose a corpus fusion approach to address emotion classification across two corpora which use different emotion taxonomies. The objective of this approach is to utilize the annotated data from one corpus to help the emotion classification on another corpus. An Integer Linear Programming (ILP) optimization is proposed to refine the classification results. Empirical studies show the effectiveness of the proposed approach to corpus fusion for emotion classification. @@ -3306,11 +3306,11 @@ A Novel Fast Framework for Topic Labeling Based on Similarity-preserved Hashing Xian-LingMao - Yi-JingHao + Yi-JingHao QiangZhou Wen-QingYuan LinerYang - HeyanHuang + HeyanHuang 3339–3348 C16-1315 Recently, topic modeling has been widely applied in data mining due to its powerful ability. A common, major challenge in applying such topic models to other tasks is to accurately interpret the meaning of each topic. Topic labeling, as a major interpreting method, has attracted significant attention recently. However, most of previous works only focus on the effectiveness of topic labeling, and less attention has been paid to quickly creating good topic descriptors; meanwhile, it’s hard to assign labels for new emerging topics by using most of existing methods. To solve the problems above, in this paper, we propose a novel fast topic labeling framework that casts the labeling problem as a k-nearest neighbor (KNN) search problem in a probability vector set. Our experimental results show that the proposed sequential interleaving method based on locality sensitive hashing (LSH) technology is efficient in boosting the comparison speed among probability distributions, and the proposed framework can generate meaningful labels to interpret topics, including new emerging topics. @@ -3362,13 +3362,13 @@ <fixed-case>OCR</fixed-case>++: A Robust Framework For Information Extraction from Scholarly Articles - MayankSingh + MayankSingh BarnopriyoBarua PriyankPalod ManviGarg SidharthaSatapathy SamuelBushi - KumarAyush + KumarAyush KrishnaSai Rohith TulasiGamidi PawanGoyal @@ -3390,7 +3390,7 @@ <fixed-case>T</fixed-case>weet<fixed-case>G</fixed-case>eo - A Tool for Collecting, Processing and Analysing Geo-encoded Linguistic Data NikolaLjubešić - TanjaSamardžić + TanjaSamardžić CurdinDerungs 3412–3421 C16-1322 @@ -3399,8 +3399,8 @@ Extending <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et with Fine-Grained Collocational Information via Supervised Distributional Learning - LuisEspinosa-Anke - JoseCamacho-Collados + LuisEspinosa-Anke + JoseCamacho-Collados SaraRodríguez-Fernández HoracioSaggion LeoWanner @@ -3411,7 +3411,7 @@ A News Editorial Corpus for Mining Argumentation Strategies - KhalidAl-Khatib + KhalidAl-Khatib HenningWachsmuth JohannesKiesel MatthiasHagen @@ -3424,11 +3424,11 @@ <fixed-case>U</fixed-case>niversal <fixed-case>D</fixed-case>ependencies for <fixed-case>T</fixed-case>urkish UmutSulubacak - MemduhGokirmak - FrancisTyers + MemduhGokirmak + FrancisTyers ÇağrıÇöltekin JoakimNivre - GülşenEryiğit + GülşenEryiğit 3444–3454 C16-1325 The Universal Dependencies (UD) project was conceived after the substantial recent interest in unifying annotation schemes across languages. With its own annotation principles and abstract inventory for parts of speech, morphosyntactic features and dependency relations, UD aims to facilitate multilingual parser development, cross-lingual learning, and parsing research from a language typology perspective. This paper presents the Turkish IMST-UD Treebank, the first Turkish treebank to be in a UD release. The IMST-UD Treebank was automatically converted from the IMST Treebank, which was also recently released. We describe this conversion procedure in detail, complete with mapping tables. We also present our evaluation of the parsing performances of both versions of the IMST Treebank. Our findings suggest that the UD framework is at least as viable for Turkish as the original annotation framework of the IMST Treebank. @@ -3438,7 +3438,7 @@ Creating Resources for Dialectal <fixed-case>A</fixed-case>rabic from a Single Annotation: A Case Study on <fixed-case>E</fixed-case>gyptian and <fixed-case>L</fixed-case>evantine RamyEskander NizarHabash - OwenRambow + OwenRambow ArfathPasha 3455–3465 C16-1326 @@ -3457,12 +3457,12 @@ <fixed-case>P</fixed-case>an<fixed-case>P</fixed-case>hon: A Resource for Mapping <fixed-case>IPA</fixed-case> Segments to Articulatory Feature Vectors - David R.Mortensen + David R.Mortensen PatrickLittell AkashBharadwaj KartikGoyal - ChrisDyer - LoriLevin + ChrisDyer + LoriLevin 3475–3484 C16-1328 This paper contributes to a growing body of evidence that—when coupled with appropriate machine-learning techniques–linguistically motivated, information-rich representations can outperform one-hot encodings of linguistic data. In particular, we show that phonological features outperform character-based models. PanPhon is a database relating over 5,000 IPA segments to 21 subsegmental articulatory features. We show that this database boosts performance in various NER-related tasks. Phonologically aware, neural CRF models built on PanPhon features are able to perform better on monolingual Spanish and Turkish NER tasks that character-based models. They have also been shown to work well in transfer models (as between Uzbek and Turkish). PanPhon features also contribute measurably to Orthography-to-IPA conversion tasks. @@ -3484,7 +3484,7 @@ More is not always better: balancing sense distributions for all-words Word Sense Disambiguation MartenPostma - RubenIzquierdo Bevia + RubenIzquierdo Bevia PiekVossen 3496–3506 C16-1330 @@ -3514,7 +3514,7 @@ Semantic Tagging with Deep Residual Networks JohannesBjerva - BarbaraPlank + BarbaraPlank JohanBos 3531–3541 C16-1333 @@ -3535,7 +3535,7 @@ <fixed-case>R</fixed-case>eddit Temporal N-gram Corpus and its Applications on Paraphrase and Semantic Similarity in Social Media using a Topic-based Latent Semantic Analysis AnhDang AbidalrahmanMoh’d - AminulIslam + AminulIslam RosaneMinghim MichaelSmit EvangelosMilios @@ -3581,12 +3581,12 @@ An Interactive System for Exploring Community Question Answering Forums EnamulHoque - ShafiqJoty - LluísMàrquez + ShafiqJoty + LluísMàrquez AlbertoBarrón-Cedeño GiovanniDa San Martino AlessandroMoschitti - PreslavNakov + PreslavNakov SalvatoreRomeo GiuseppeCarenini 1–5 @@ -3605,7 +3605,7 @@ A Reading Environment for Learners of <fixed-case>C</fixed-case>hinese as a Foreign Language - JohnLee + JohnLee Chun YinLam ShuJiang 11–15 @@ -3636,13 +3636,13 @@ ‘<fixed-case>B</fixed-case>on<fixed-case>T</fixed-case>en’ – Corpus Concordance System for ‘<fixed-case>NINJAL</fixed-case> Web <fixed-case>J</fixed-case>apanese Corpus’ MasayukiAsahara KazuyaKawahara - YuyaTakei + YuyaTakei HidetoMasuoka YasukoOhba YukiTorii ToruMorii YukiTanaka - KikuoMaekawa + KikuoMaekawa SachiKato HikariKonishi 25–29 @@ -3655,7 +3655,7 @@ XiaolinWang AndrewFinch MasaoUtiyama - EiichiroSumita + EiichiroSumita 30–34 C16-2007 Simultaneous interpretation allows people to communicate spontaneously across language boundaries, but such services are prohibitively expensive for the general public. This paper presents a fully automatic simultaneous interpretation system to address this problem. Though the development is still at an early stage, the system is capable of keeping up with the fastest of the TED speakers while at the same time delivering high-quality translations. We believe that the system will become an effective tool for facilitating cross-lingual communication in the future. @@ -3664,11 +3664,11 @@ <fixed-case>M</fixed-case>u<fixed-case>TUAL</fixed-case>: A Controlled Authoring Support System Enabling Contextual Machine Translation ReiMiyata - AnthonyHartley + AnthonyHartley KyoKageura - CécileParis + CécileParis MasaoUtiyama - EiichiroSumita + EiichiroSumita 35–39 C16-2008 The paper introduces a web-based authoring support system, MuTUAL, which aims to help writers create multilingual texts. The highlighted feature of the system is that it enables machine translation (MT) to generate outputs appropriate to their functional context within the target document. Our system is operational online, implementing core mechanisms for document structuring and controlled writing. These include a topic template and a controlled language authoring assistant, linked to our statistical MT system. @@ -3676,9 +3676,9 @@ Joint search in a bilingual valency lexicon and an annotated corpus - EvaFučíková - JanHajič - ZdeňkaUrešová + EvaFučíková + JanHajič + ZdeňkaUrešová 40–44 C16-2009 In this paper and the associated system demo, we present an advanced search system that allows to perform a joint search over a (bilingual) valency lexicon and a correspondingly annotated linked parallel corpus. This search tool has been developed on the basis of the Prague Czech-English Dependency Treebank, but its ideas are applicable in principle to any bilingual parallel corpus that is annotated for dependencies and valency (i.e., predicate-argument structure), and where verbs are linked to appropriate entries in an associated valency lexicon. Our online search tool consolidates more search interfaces into one, providing expanded structured search capability and a more efficient advanced way to search, allowing users to search for verb pairs, verbal argument pairs, their surface realization as recorded in the lexicon, or for their surface form actually appearing in the linked parallel corpus. The search system is currently under development, and is replacing our current search tool available at http://lindat.mff.cuni.cz/services/CzEngVallex, which could search the lexicon but the queries cannot take advantage of the underlying corpus nor use the additional surface form information from the lexicon(s). The system is available as open source. @@ -3698,7 +3698,7 @@ Demonstration of <fixed-case>C</fixed-case>ha<fixed-case>K</fixed-case>i.<fixed-case>NET</fixed-case> – beyond the corpus search system MasayukiAsahara - YujiMatsumoto + YujiMatsumoto ToshioMorita 49–53 C16-2011 @@ -3708,7 +3708,7 @@ <fixed-case>V</fixed-case>ox<fixed-case>S</fixed-case>im: A Visual Platform for Modeling Motion Language NikhilKrishnaswamy - JamesPustejovsky + JamesPustejovsky 54–58 C16-2012 Much existing work in text-to-scene generation focuses on generating static scenes. By introducing a focus on motion verbs, we integrate dynamic semantics into a rich formal model of events to generate animations in real time that correlate with human conceptions of the event described. This paper presents a working system that generates these animated scenes over a test set, discussing challenges encountered and describing the solutions implemented. @@ -3753,7 +3753,7 @@ <fixed-case>A</fixed-case>nita: An Intelligent Text Adaptation Tool - GustavoPaetzold + GustavoPaetzold LuciaSpecia 79–83 C16-2017 @@ -3771,7 +3771,7 @@ On-line Multilingual Linguistic Services - EricWehrli + EricWehrli YvesScherrer LukaNerima 89–92 @@ -3781,7 +3781,7 @@ A Customizable Editor for Text Simplification - JohnLee + JohnLee WenlongZhao WenxiuXie 93–97 @@ -3792,10 +3792,10 @@ <fixed-case>CAT</fixed-case>a<fixed-case>L</fixed-case>og Online: A Web-based <fixed-case>CAT</fixed-case> Tool for Distributed Translation with Data Capture for <fixed-case>APE</fixed-case> and Translation Process Research SantanuPal - Sudip KumarNaskar + Sudip KumarNaskar MarcosZampieri - TapasNayak - Josefvan Genabith + TapasNayak + Josefvan Genabith 98–102 C16-2021 We present a free web-based CAT tool called CATaLog Online which provides a novel and user-friendly online CAT environment for post-editors/translators. The goal is to support distributed translation, reduce post-editing time and effort, improve the post-editing experience and capture data for incremental MT/APE (automatic post-editing) and translation process research. The tool supports individual as well as batch mode file translation and provides translations from three engines – translation memory (TM), MT and APE. TM suggestions are color coded to accelerate the post-editing task. The users can integrate their personal TM/MT outputs. The tool remotely monitors and records post-editing activities generating an extensive range of post-editing logs. @@ -3835,8 +3835,8 @@ What topic do you want to hear about? A bilingual talking robot using <fixed-case>E</fixed-case>nglish and <fixed-case>J</fixed-case>apanese <fixed-case>W</fixed-case>ikipedias - GrahamWilcock - KristiinaJokinen + GrahamWilcock + KristiinaJokinen SeiichiYamamoto 116–120 C16-2025 @@ -3847,8 +3847,8 @@ Annotating Discourse Relations with the <fixed-case>PDTB</fixed-case> Annotator AlanLee RashmiPrasad - BonnieWebber - Aravind K.Joshi + BonnieWebber + Aravind K.Joshi 121–125 C16-2026 The PDTB Annotator is a tool for annotating and adjudicating discourse relations based on the annotation framework of the Penn Discourse TreeBank (PDTB). This demo describes the benefits of using the PDTB Annotator, gives an overview of the PDTB Framework and discusses the tool’s features, setup requirements and how it can also be used for adjudication. @@ -3858,7 +3858,7 @@ Opinion Retrieval Systems using Tweet-external Factors Yoon-SungKim Young-InSong - Hae-ChangRim + Hae-ChangRim 126–130 C16-2027 Opinion mining is a natural language processing technique which extracts subjective information from natural language text. To estimate an opinion about a query in large data collection, an opinion retrieval system that retrieves subjective and relevant information about the query can be useful. We present an opinion retrieval system that retrieves subjective and query-relevant tweets from Twitter, which is a useful source of obtaining real-time opinions. Our system outperforms previous opinion retrieval systems, and it further provides subjective information about Twitter authors and hashtags to describe their subjective tendencies. @@ -3866,10 +3866,10 @@ <fixed-case>T</fixed-case>ext<fixed-case>P</fixed-case>ro-<fixed-case>AL</fixed-case>: An Active Learning Platform for Flexible and Efficient Production of Training Data for <fixed-case>NLP</fixed-case> Tasks - BernardoMagnini + BernardoMagnini Anne-LyseMinard Mohammed R. H.Qwaider - ManuelaSperanza + ManuelaSperanza 131–135 C16-2028 This paper presents TextPro-AL (Active Learning for Text Processing), a platform where human annotators can efficiently work to produce high quality training data for new domains and new languages exploiting Active Learning methodologies. TextPro-AL is a web-based application integrating four components: a machine learning based NLP pipeline, an annotation editor for task definition and text annotations, an incremental re-training procedure based on active learning selection from a large pool of unannotated data, and a graphical visualization of the learning status of the system. @@ -3878,7 +3878,7 @@ <fixed-case>S</fixed-case>ide<fixed-case>N</fixed-case>oter: Scholarly Paper Browsing System based on <fixed-case>PDF</fixed-case> Restructuring and Text Annotation TakeshiAbekawa - AkikoAizawa + AkikoAizawa 136–140 C16-2029 In this paper, we discuss our ongoing efforts to construct a scientific paper browsing system that helps users to read and understand advanced technical content distributed in PDF. Since PDF is a format specifically designed for printing, layout and logical structures of documents are indistinguishably embedded in the file. It requires much effort to extract natural language text from PDF files, and reversely, display semantic annotations produced by NLP tools on the original page layout. In our browsing system, we tackle these issues caused by the gap between printable document and plain text. Our system provides ways to extract natural language sentences from PDF files together with their logical structures, and also to map arbitrary textual spans to their corresponding regions on page images. We setup a demonstration system using papers published in ACL anthology and demonstrate the enhanced search and refined recommendation functions which we plan to make widely available to NLP researchers. @@ -3889,7 +3889,7 @@ Shih-MingWang Chun-Hui ScottLee Yu-ChunLo - Ting-HaoHuang + Ting-HaoHuang Lun-WeiKu 141–145 C16-2030 @@ -3929,10 +3929,10 @@ The Open Framework for Developing Knowledge Base And Question Answering System JiseongKim - GyuHyeonChoi + GyuHyeonChoi Jung-UkKim - Eun-KyungKim - Key-SunChoi + Eun-KyungKim + Key-SunChoi 161–165 C16-2034 Developing a question answering (QA) system is a task of implementing and integrating modules of different technologies and evaluating an integrated whole system, which inevitably goes with a collaboration among experts of different domains. For supporting a easy collaboration, this demonstration presents the open framework that aims to support developing a QA system in collaborative and intuitive ways. The demonstration also shows the QA system developed by our novel framework. @@ -3944,7 +3944,7 @@ Hao-ChunPeng Mei-CihYeh Peng-YuChen - JasonChang + JasonChang 166–169 C16-2035 This paper shows the great potential of incorporating different approaches to help writing. Not only did they solve different kinds of writing problems, but also they complement and reinforce each other to be a complete and effective solution. Despite the extensive and multifaceted feedback and suggestion, writing is not all about syntactically or lexically well-written. It involves contents, structure, the certain understanding of the background, and many other factors to compose a rich, organized and sophisticated text. (e.g., conventional structure and idioms in academic writing). There is still a long way to go to accomplish the ultimate goal. We envision the future of writing to be a joyful experience with the help of instantaneous suggestion and constructive feedback. @@ -3955,7 +3955,7 @@ ChristinaNiklaus BernhardBermeitinger SiegfriedHandschuh - AndréFreitas + AndréFreitas 170–174 C16-2036 We present a text simplification approach that is directed at improving the performance of state-of-the-art Open Relation Extraction (RE) systems. As syntactically complex sentences often pose a challenge for current Open RE approaches, we have developed a simplification framework that performs a pre-processing step by taking a single sentence as input and using a set of syntactic-based transformation rules to create a textual input that is easier to process for subsequently applied Open RE systems. @@ -3964,8 +3964,8 @@ <fixed-case>K</fixed-case>orean <fixed-case>F</fixed-case>rame<fixed-case>N</fixed-case>et Expansion Based on Projection of <fixed-case>J</fixed-case>apanese <fixed-case>F</fixed-case>rame<fixed-case>N</fixed-case>et Jeong-ukKim - YounggyunHahm - Key-SunChoi + YounggyunHahm + Key-SunChoi 175–179 C16-2037 FrameNet project has begun from Berkeley in 1997, and is now supported in several countries reflecting characteristics of each language. The work for generating Korean FrameNet was already done by converting annotated English sentences into Korean with trained translators. However, high cost of frame-preservation and error revision was a huge burden on further expansion of FrameNet. This study makes use of linguistic similarity between Japanese and Korean to increase Korean FrameNet corpus with low cost. We also suggest adapting PubAnnotation and Korean-friendly valence patterns to FrameNet for increased accessibility. @@ -3987,7 +3987,7 @@ Hyoung-GyuLee Jun-SeokKim Joong-HwiShin - JaesongLee + JaesongLee Ying-XiuQuan Young-SeobJeong 185–188 @@ -4015,7 +4015,7 @@ BaolinPeng MingLiao JiaZhu - Kam-faiWong + Kam-faiWong 194–197 C16-2041 We present a system called ACE for Automatic Colloquialism and Errors detection for written Chinese. ACE is based on the combination of N-gram model and rule-base model. Although it focuses on detecting colloquial Cantonese (a dialect of Chinese) at the current stage, it can be extended to detect other dialects. We chose Cantonese becauase it has many interesting properties, such as unique grammar system and huge colloquial terms, that turn the detection task extremely challenging. We conducted experiments using real data and synthetic data. The results indicated that ACE is highly reliable and effective. @@ -4031,8 +4031,8 @@ <fixed-case>MAGES</fixed-case>: A Multilingual Angle-integrated Grouping-based Entity Summarization System - Eun-kyungKim - Key-SunChoi + Eun-kyungKim + Key-SunChoi 203–207 C16-2043 This demo presents MAGES (multilingual angle-integrated grouping-based entity summarization), an entity summarization system for a large knowledge base such as DBpedia based on a entity-group-bound ranking in a single integrated entity space across multiple language-specific editions. MAGES offers a multilingual angle-integrated space model, which has the advantage of overcoming missing semantic tags (i.e., categories) caused by biases in different language communities, and can contribute to the creation of entity groups that are well-formed and more stable than the monolingual condition within it. MAGES can help people quickly identify the essential points of the entities when they search or browse a large volume of entity-centric data. Evaluation results on the same experimental data demonstrate that our system produces a better summary compared with other representative DBpedia entity summarization methods. @@ -4063,7 +4063,7 @@ MónicaDomínguez IvánLatorre MireiaFarrús - JoanCodina-Filbà + JoanCodina-Filbà LeoWanner 218–222 C16-2046 @@ -4097,7 +4097,7 @@ JonasWacker StefanRadomski MaxMühlhäuser - ChrisBiemann + ChrisBiemann 233–237 C16-2049 In this demonstration paper we describe Ambient Search, a system that displays and retrieves documents in real time based on speech input. The system operates continuously in ambient mode, i.e. it generates speech transcriptions and identifies main keywords and keyphrases, while also querying its index to display relevant documents without explicit query. Without user intervention, the results are dynamically updated; users can choose to interact with the system at any time, employing a conversation protocol that is enriched with the ambient information gathered continuously. Our evaluation shows that Ambient Search outperforms another implicit speech-based information retrieval system. Ambient search is available as open source software. @@ -4123,7 +4123,7 @@ Towards Non-projective High-Order Dependency Parser WenjingFang - KennyZhu + KennyZhu YizhongWang JiaTan 248–252 @@ -4156,7 +4156,7 @@ JuntaMizuno MasahiroTanaka KiyonoriOhtake - Jong-HoonOh + Jong-HoonOh JulienKloetzer ChikaraHashimoto KentaroTorisawa @@ -4259,7 +4259,7 @@ <fixed-case>K</fixed-case>yoto-<fixed-case>NMT</fixed-case>: a Neural Machine Translation implementation in Chainer - FabienCromières + FabienCromières 307–311 C16-2064 We present Kyoto-NMT, an open-source implementation of the Neural Machine Translation paradigm. This implementation is done in Python and Chainer, an easy-to-use Deep Learning Framework. @@ -4271,7 +4271,7 @@ Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Tutorial Abstracts C16-3 MarcelloFederico - AkikoAizawa + AkikoAizawa The COLING 2016 Organizing Committee
Osaka, Japan
December @@ -4284,7 +4284,7 @@ Compositional Distributional Models of Meaning - MehrnooshSadrzadeh + MehrnooshSadrzadeh DimitriKartsaklis 1–4 C16-3001 @@ -4312,8 +4312,8 @@ Quality Estimation for Language Output Applications - CarolinaScarton - GustavoPaetzold + CarolinaScarton + GustavoPaetzold LuciaSpecia 14–17 C16-3004 @@ -4331,7 +4331,7 @@ Succinct Data Structures for <fixed-case>NLP</fixed-case>-at-Scale MatthiasPetri - TrevorCohn + TrevorCohn 20–21 C16-3006 Succinct data structures involve the use of novel data structures, compression technologies, and other mechanisms to allow data to be stored in extremely small memory or disk footprints, while still allowing for efficient access to the underlying data. They have successfully been applied in areas such as Information Retrieval and Bioinformatics to create highly compressible in-memory search indexes which provide efficient search functionality over datasets which traditionally could only be processed using external memory data structures. Modern technologies in this space are not well known within the NLP community, but have the potential to revolutionise NLP, particularly the application to ‘big data’ in the form of terabyte and larger corpora. This tutorial will present a practical introduction to the most important succinct data structures, tools, and applications with the intent of providing the researchers with a jump-start into this domain. The focus of this tutorial will be efficient text processing utilising space efficient representations of suffix arrays, suffix trees and searchable integer compression schemes with specific applications of succinct data structures to common NLP tasks such as n-gram language modelling. @@ -4339,7 +4339,7 @@ The Role of <fixed-case>W</fixed-case>ikipedia in Text Analysis and Retrieval - MariusPaşca + MariusPaşca 22 C16-3007 This tutorial examines the characteristics, advantages and limitations of Wikipedia relative to other existing, human-curated resources of knowledge; derivative resources, created by converting semi-structured content in Wikipedia into structured data; the role of Wikipedia and its derivatives in text analysis; and the role of Wikipedia and its derivatives in enhancing information retrieval. diff --git a/data/xml/C18.xml b/data/xml/C18.xml index e00720247f..60f0dd14eb 100644 --- a/data/xml/C18.xml +++ b/data/xml/C18.xml @@ -4,8 +4,8 @@ Proceedings of the 27th International Conference on Computational Linguistics C18-1 - Emily M.Bender - LeonDerczynski + Emily M.Bender + LeonDerczynski PierreIsabelle Association for Computational Linguistics
Santa Fe, New Mexico, USA
@@ -21,7 +21,7 @@ A New Approach to <fixed-case>A</fixed-case>nimacy Detection LabibaJahan GeetickaChauhan - MarkFinlayson + MarkFinlayson 1–12 Animacy is a necessary property for a referent to be an agent, and thus animacy detection is useful for a variety of natural language processing tasks, including word sense disambiguation, co-reference resolution, semantic role labeling, and others. Prior work treated animacy as a word-level property, and has developed statistical classifiers to classify words as either animate or inanimate. We discuss why this approach to the problem is ill-posed, and present a new approach based on classifying the animacy of co-reference chains. We show that simple voting approaches to inferring the animacy of a chain from its constituent words perform relatively poorly, and then present a hybrid system merging supervised machine learning (ML) and a small number of hand-built rules to compute the animacy of referring expressions and co-reference chains. This method achieves state of the art performance. The supervised ML component leverages features such as word embeddings over referring expressions, parts of speech, and grammatical and semantic roles. The rules take into consideration parts of speech and the hypernymy structure encoded in WordNet. The system achieves an F1 of 0.88 for classifying the animacy of referring expressions, which is comparable to state of the art results for classifying the animacy of words, and achieves an F1 of 0.75 for classifying the animacy of coreference chains themselves. We release our training and test dataset, which includes 142 texts (all narratives) comprising 156,154 words, 34,698 referring expressions, and 10,941 co-reference chains. We test the method on a subset of the OntoNotes dataset, showing using manual sampling that animacy classification is 90% +/- 2% accurate for coreference chains, and 92% +/- 1% for referring expressions. The data also contains 46 folktales, which present an interesting challenge because they often involve characters who are members of traditionally inanimate classes (e.g., stoves that walk, trees that talk). We show that our system is able to detect the animacy of these unusual referents with an F1 of 0.95. C18-1001 @@ -31,7 +31,7 @@ Zero Pronoun Resolution with Attention-based Neural Network QingyuYin YuZhang - WeinanZhang + WeinanZhang TingLiu William YangWang 13–23 @@ -42,7 +42,7 @@ They Exist! Introducing Plural Mentions to Coreference Resolution and Entity Linking EthanZhou - Jinho D.Choi + Jinho D.Choi 24–34 This paper analyzes arguably the most challenging yet under-explored aspect of resolution tasks such as coreference resolution and entity linking, that is the resolution of plural mentions. Unlike singular mentions each of which represents one entity, plural mentions stand for multiple entities. To tackle this aspect, we take the character identification corpus from the SemEval 2018 shared task that consists of entity annotation for singular mentions, and expand it by adding annotation for plural mentions. We then introduce a novel coreference resolution algorithm that selectively creates clusters to handle both singular and plural mentions, and also a deep learning-based entity linking model that jointly handles both types of mentions through multi-task learning. Adjusted evaluation metrics are proposed for these tasks as well to handle the uniqueness of plural mentions. Our experiments show that the new coreference resolution and entity linking models significantly outperform traditional models designed only for singular mentions. To the best of our knowledge, this is the first time that plural mentions are thoroughly analyzed for these two resolution tasks. C18-1003 @@ -60,9 +60,9 @@ Unsupervised Morphology Learning with Statistical Paradigms HongzhiXu - MitchellMarcus - CharlesYang - LyleUngar + MitchellMarcus + CharlesYang + LyleUngar 44–54 This paper describes an unsupervised model for morphological segmentation that exploits the notion of paradigms, which are sets of morphological categories (e.g., suffixes) that can be applied to a homogeneous set of words (e.g., nouns or verbs). Our algorithm identifies statistically reliable paradigms from the morphological segmentation result of a probabilistic model, and chooses reliable suffixes from them. The new suffixes can be fed back iteratively to improve the accuracy of the probabilistic model. Finally, the unreliable paradigms are subjected to pruning to eliminate unreliable morphological relations between words. The paradigm-based algorithm significantly improves segmentation accuracy. Our method achieves start-of-the-art results on experiments using the Morpho-Challenge data, including English, Turkish, and Finnish. C18-1005 @@ -72,8 +72,8 @@ Challenges of language technologies for the indigenous languages of the <fixed-case>A</fixed-case>mericas ManuelMager XimenaGutierrez-Vasques - GerardoSierra - IvanMeza-Ruiz + GerardoSierra + IvanMeza-Ruiz 55–69 Indigenous languages of the American continent are highly diverse. However, they have received little attention from the technological perspective. In this paper, we review the research, the digital resources and the available NLP systems that focus on these languages. We present the main challenges and research questions that arise when distant languages and low-resource scenarios are faced. We would like to encourage NLP research in linguistically rich and diverse areas like the Americas. C18-1006 @@ -87,7 +87,7 @@ RuochenXu YimingYang TerukoMitamura - EduardHovy + EduardHovy 70–82 The use of machine learning for NLP generally requires resources for training. Tasks performed in a low-resource language usually rely on labeled data in another, typically resource-rich, language. However, there might not be enough labeled data even in a resource-rich language such as English. In such cases, one approach is to use a hand-crafted approach that utilizes only a small bilingual dictionary with minimal manual verification to create distantly supervised data. Another is to explore typical machine learning techniques, for example adversarial training of bilingual word representations. We find that in event-type detection task—the task to classify [parts of] documents into a fixed set of labels—they give about the same performance. We explore ways in which the two methods can be complementary and also see how to best utilize a limited budget for manual annotation to maximize performance gain. C18-1007 @@ -165,8 +165,8 @@ YangXu HuibinRuan BoweiZou - JianminYao - GuodongZhou + JianminYao + GuodongZhou 177–189 Event relation recognition is a challenging language processing task. It is required to determine the relation class of a pair of query events, such as causality, under the condition that there isn’t any reliable clue for use. We follow the traditional statistical approach in this paper, speculating the relation class of the target events based on the relation-class distributions on the similar events. There is minimal supervision used during the speculation process. In particular, we incorporate image processing into the acquisition of similar event instances, including the utilization of images for visually representing event scenes, and the use of the neural network based image matching for approximate calculation between events. We test our method on the ACE-R2 corpus and compared our model with the fully-supervised neural network models. Experimental results show that we achieve a comparable performance to CNN while slightly better than LSTM. C18-1015 @@ -187,7 +187,7 @@ VivekKulkarni YingtaoTian ParthDandiwala - SteveSkiena + SteveSkiena 202–212 We present domain independent models to date documents based only on neologism usage patterns. Our models capture patterns of neologism usage over time to date texts, provide insights into temporal locality of word usage over a span of 150 years, and generalize to various domains like News, Fiction, and Non-Fiction with competitive performance. Quite intriguingly, we show that by modeling only the distribution of usage counts over neologisms (the model being agnostic of the particular words themselves), we achieve competitive performance using several orders of magnitude fewer features (only 200 input features) compared to state of the art models some of which use 200K features. C18-1017 @@ -197,7 +197,7 @@ Neural Math Word Problem Solver with Reinforcement Learning DanqingHuang JingLiu - Chin-YewLin + Chin-YewLin JianYin 213–223 Sequence-to-sequence model has been applied to solve math word problems. The model takes math problem descriptions as input and generates equations as output. The advantage of sequence-to-sequence model requires no feature engineering and can generate equations that do not exist in training data. However, our experimental analysis reveals that this model suffers from two shortcomings: (1) generate spurious numbers; (2) generate numbers at wrong positions. In this paper, we propose incorporating copy and alignment mechanism to the sequence-to-sequence model (namely CASS) to address these shortcomings. To train our model, we apply reinforcement learning to directly optimize the solution accuracy. It overcomes the “train-test discrepancy” issue of maximum likelihood estimation, which uses the surrogate objective of maximizing equation likelihood during training while the evaluation metric is solution accuracy (non-differentiable) at test time. Furthermore, to explore the effectiveness of our neural model, we use our model output as a feature and incorporate it into the feature-based model. Experimental results show that (1) The copy and alignment mechanism is effective to address the two issues; (2) Reinforcement learning leads to better performance than maximum likelihood on this task; (3) Our neural model is complementary to the feature-based model and their combination significantly outperforms the state-of-the-art results. @@ -206,7 +206,7 @@ Personalizing Lexical Simplification - JohnLee + JohnLee Chak YanYeung 224–232 A lexical simplification (LS) system aims to substitute complex words with simple words in a text, while preserving its meaning and grammaticality. Despite individual users’ differences in vocabulary knowledge, current systems do not consider these variations; rather, they are trained to find one optimal substitution or ranked list of substitutions for all users. We evaluate the performance of a state-of-the-art LS system on individual learners of English at different proficiency levels, and measure the benefits of using complex word identification (CWI) models to personalize the system. Experimental results show that even a simple personalized CWI model, based on graded vocabulary lists, can help the system avoid some unnecessary simplifications and produce more readable output. @@ -225,8 +225,8 @@ <fixed-case>L</fixed-case>exi: A tool for adaptive, personalized text simplification JoachimBingel - GustavoPaetzold - AndersSøgaard + GustavoPaetzold + AndersSøgaard 245–258 Most previous research in text simplification has aimed to develop generic solutions, assuming very homogeneous target audiences with consistent intra-group simplification needs. We argue that this assumption does not hold, and that instead we need to develop simplification systems that adapt to the individual needs of specific users. As a first step towards personalized simplification, we propose a framework for adaptive lexical simplification and introduce Lexi, a free open-source and easily extensible tool for adaptive, personalized text simplification. Lexi is easily installed as a browser extension, enabling easy access to the service for its users. C18-1021 @@ -265,7 +265,7 @@ Joint Learning from Labeled and Unlabeled Data for Information Retrieval - BoLi + BoLi PingCheng LeJia 293–302 @@ -275,8 +275,8 @@ Modeling the Readability of <fixed-case>G</fixed-case>erman Targeting Adults and Children: An empirically broad analysis and its cross-corpus validation - ZarahWeiß - DetmarMeurers + ZarahWeiß + DetmarMeurers 303–317 We analyze two novel data sets of German educational media texts targeting adults and children. The analysis is based on 400 automatically extracted measures of linguistic complexity from a wide range of linguistic domains. We show that both data sets exhibit broad linguistic adaptation to the target audience, which generalizes across both data sets. Our most successful binary classification model for German readability robustly shows high accuracy between 89.4%–98.9% for both data sets. To our knowledge, this comprehensive German readability model is the first for which robust cross-corpus performance has been shown. The research also contributes resources for German readability assessment that are externally validated as successful for different target audiences: we compiled a new corpus of German news broadcast subtitles, the Tagesschau/Logo corpus, and crawled a GEO/GEOlino corpus substantially enlarging the data compiled by Hancke et al. 2012. C18-1026 @@ -284,7 +284,7 @@ Automatic Assessment of Conceptual Text Complexity Using Knowledge Graphs - SanjaŠtajner + SanjaŠtajner IoanaHulpuş 318–330 Complexity of texts is usually assessed only at the lexical and syntactic levels. Although it is known that conceptual complexity plays a significant role in text understanding, no attempts have been made at assessing it automatically. We propose to automatically estimate the conceptual complexity of texts by exploiting a number of graph-based measures on a large knowledge base. By using a high-quality language learners corpus for English, we show that graph-based measures of individual text concepts, as well as the way they relate to each other in the knowledge graph, have a high discriminative power when distinguishing between two versions of the same text. Furthermore, when used as features in a binary classification task aiming to choose the simpler of two versions of the same text, our measures achieve high performance even in a default setup. @@ -294,7 +294,7 @@ <fixed-case>P</fixed-case>ar4<fixed-case>S</fixed-case>im – Adaptive Paraphrasing for Text Simplification Seid MuhieYimam - ChrisBiemann + ChrisBiemann 331–342 Learning from a real-world data stream and continuously updating the model without explicit supervision is a new challenge for NLP applications with machine learning components. In this work, we have developed an adaptive learning system for text simplification, which improves the underlying learning-to-rank model from usage data, i.e. how users have employed the system for the task of simplification. Our experimental result shows that, over a period of time, the performance of the embedded paraphrase ranking model increases steadily improving from a score of 62.88% up to 75.70% based on the NDCG@10 evaluation metrics. To our knowledge, this is the first study where an NLP component is adaptively improved through usage. C18-1028 @@ -346,8 +346,8 @@ Authorship Identification for Literary Book Recommendations HaifaAlharthi - DianaInkpen - StanSzpakowicz + DianaInkpen + StanSzpakowicz 390–400 Book recommender systems can help promote the practice of reading for pleasure, which has been declining in recent years. One factor that influences reading preferences is writing style. We propose a system that recommends books after learning their authors’ style. To our knowledge, this is the first work that applies the information learned by an author-identification model to book recommendations. We evaluated the system according to a top-k recommendation scenario. Our system gives better accuracy when compared with many state-of-the-art methods. We also conducted a qualitative analysis by checking if similar books/authors were annotated similarly by experts. C18-1033 @@ -356,8 +356,8 @@ A Nontrivial Sentence Corpus for the Task of Sentence Readability Assessment in <fixed-case>P</fixed-case>ortuguese Sidney EvaldoLeal - Magali SanchesDuran - Sandra MariaAluísio + Magali SanchesDuran + Sandra MariaAluísio 401–413 Effective textual communication depends on readers being proficient enough to comprehend texts, and texts being clear enough to be understood by the intended audience, in a reading task. When the meaning of textual information and instructions is not well conveyed, many losses and damages may occur. Among the solutions to alleviate this problem is the automatic evaluation of sentence readability, task which has been receiving a lot of attention due to its large applicability. However, a shortage of resources, such as corpora for training and evaluation, hinders the full development of this task. In this paper, we generate a nontrivial sentence corpus in Portuguese. We evaluate three scenarios for building it, taking advantage of a parallel corpus of simplification, in which each sentence triplet is aligned and has simplification operations annotated, being ideal for justifying possible mistakes of future methods. The best scenario of our corpus PorSimplesSent is composed of 4,888 pairs, which is bigger than a similar corpus for English; all the three versions of it are publicly available. We created four baselines for PorSimplesSent and made available a pairwise ranking method, using 17 linguistic and psycholinguistic features, which correctly identifies the ranking of sentence pairs with an accuracy of 74.2%. C18-1034 @@ -366,7 +366,7 @@ Adopting the Word-Pair-Dependency-Triplets with Individual Comparison for Natural Language Inference QianlongDu - ChengqingZong + ChengqingZong Keh-YihSu 414–425 This paper proposes to perform natural language inference with Word-Pair-Dependency-Triplets. Most previous DNN-based approaches either ignore syntactic dependency among words, or directly use tree-LSTM to generate sentence representation with irrelevant information. To overcome the problems mentioned above, we adopt Word-Pair-Dependency-Triplets to improve alignment and inference judgment. To be specific, instead of comparing each triplet from one passage with the merged information of another passage, we first propose to perform comparison directly between the triplets of the given passage-pair to make the judgement more interpretable. Experimental results show that the performance of our approach is better than most of the approaches that use tree structures, and is comparable to other state-of-the-art approaches. @@ -380,7 +380,7 @@ YaliangLi NanDu MinYang - WeiFan + WeiFan YingShen 426–436 Distantly supervised relation extraction greatly reduces human efforts in extracting relational facts from unstructured texts. However, it suffers from noisy labeling problem, which can degrade its performance. Meanwhile, the useful information expressed in knowledge graph is still underutilized in the state-of-the-art methods for distantly supervised relation extraction. In the light of these challenges, we propose CORD, a novelCOopeRativeDenoising framework, which consists two base networks leveraging text corpus and knowledge graph respectively, and a cooperative module involving their mutual learning by the adaptive bi-directional knowledge distillation and dynamic ensemble with noisy-varying instances. Experimental results on a real-world dataset demonstrate that the proposed method reduces the noisy labels and achieves substantial improvement over the state-of-the-art methods. @@ -392,7 +392,7 @@ BoweiZou ZengzhuangXu YuHong - GuodongZhou + GuodongZhou 437–448 Relation Classification aims to classify the semantic relationship between two marked entities in a given sentence. It plays a vital role in a variety of natural language processing applications. Most existing methods focus on exploiting mono-lingual data, e.g., in English, due to the lack of annotated data in other languages. In this paper, we come up with a feature adaptation approach for cross-lingual relation classification, which employs a generative adversarial network (GAN) to transfer feature representations from one language with rich annotated data to another language with scarce annotated data. Such a feature adaptation approach enables feature imitation via the competition between a relation classification network and a rival discriminator. Experimental results on the ACE 2005 multilingual training corpus, treating English as the source language and Chinese the target, demonstrate the effectiveness of our proposed approach, yielding an improvement of 5.7% over the state-of-the-art. C18-1037 @@ -419,7 +419,7 @@ Interpretation of Implicit Conditions in Database Search Dialogues - ShunyaFukunaga + ShunyaFukunaga HitoshiNishikawa TakenobuTokunaga HikaruYokono @@ -443,10 +443,10 @@ Can Taxonomy Help? Improving Semantic Question Matching using Question Taxonomy - DeepakGupta - RajkumarPujari + DeepakGupta + RajkumarPujari AsifEkbal - PushpakBhattacharyya + PushpakBhattacharyya AnutoshMaitra TomJain ShubhashisSengupta @@ -471,8 +471,8 @@ Employing Text Matching Network to Recognise Nuclearity in <fixed-case>C</fixed-case>hinese Discourse ShengXu PeifengLi - GuodongZhou - QiaomingZhu + GuodongZhou + QiaomingZhu 525–535 The task of nuclearity recognition in Chinese discourse remains challenging due to the demand for more deep semantic information. In this paper, we propose a novel text matching network (TMN) that encodes the discourse units and the paragraphs by combining Bi-LSTM and CNN to capture both global dependency information and local n-gram information. Moreover, it introduces three components of text matching, the Cosine, Bilinear and Single Layer Network, to incorporate various similarities and interactions among the discourse units. Experimental results on the Chinese Discourse TreeBank show that our proposed TMN model significantly outperforms various strong baselines in both micro-F1 and macro-F1. C18-1044 @@ -483,8 +483,8 @@ XiaominChu FengJiang YiZhou - GuodongZhou - QiaomingZhu + GuodongZhou + QiaomingZhu 536–546 Discourse parsing is a challenging task and plays a critical role in discourse analysis. This paper focus on the macro level discourse structure analysis, which has been less studied in the previous researches. We explore a macro discourse structure presentation schema to present the macro level discourse structure, and propose a corresponding corpus, named Macro Chinese Discourse Treebank. On these bases, we concentrate on two tasks of macro discourse structure analysis, including structure identification and nuclearity recognition. In order to reduce the error transmission between the associated tasks, we adopt a joint model of the two tasks, and an Integer Linear Programming approach is proposed to achieve global optimization with various kinds of constraints. C18-1045 @@ -535,9 +535,9 @@ Modeling Coherence for Neural Machine Translation with Dynamic and Topic Caches ShaohuiKuang - DeyiXiong + DeyiXiong WeihuaLuo - GuodongZhou + GuodongZhou 596–606 Sentences in a well-formed text are connected to each other via various links to form the cohesive structure of the text. Current neural machine translation (NMT) systems translate a text in a conventional sentence-by-sentence fashion, ignoring such cross-sentence links and dependencies. This may lead to generate an incoherent target text for a coherent source text. In order to handle this issue, we propose a cache-based approach to modeling coherence for neural machine translation by capturing contextual information either from recently translated sentences or the entire document. Particularly, we explore two types of caches: a dynamic cache, which stores words from the best translation hypotheses of preceding sentences, and a topic cache, which maintains a set of target-side topical words that are semantically related to the document to be translated. On this basis, we build a new layer to score target words in these two caches with a cache-based neural model. Here the estimated probabilities from the cache-based neural model are combined with NMT probabilities into the final word prediction probabilities via a gating mechanism. Finally, the proposed cache-based neural model is trained jointly with NMT system in an end-to-end manner. Experiments and analysis presented in this paper demonstrate that the proposed cache-based model achieves substantial improvements over several state-of-the-art SMT and NMT baselines. C18-1050 @@ -546,7 +546,7 @@ Fusing Recency into Neural Machine Translation with an Inter-Sentence Gate Model ShaohuiKuang - DeyiXiong + DeyiXiong 607–617 Neural machine translation (NMT) systems are usually trained on a large amount of bilingual sentence pairs and translate one sentence at a time, ignoring inter-sentence information. This may make the translation of a sentence ambiguous or even inconsistent with the translations of neighboring sentences. In order to handle this issue, we propose an inter-sentence gate model that uses the same encoder to encode two adjacent sentences and controls the amount of information flowing from the preceding sentence to the translation of the current sentence with an inter-sentence gate. In this way, our proposed model can capture the connection between sentences and fuse recency from neighboring sentences into neural machine translation. On several NIST Chinese-English translation tasks, our experiments demonstrate that the proposed inter-sentence gate model achieves substantial improvements over the baseline. C18-1051 @@ -675,7 +675,7 @@ Multi-task dialog act and sentiment recognition on Mastodon ChristopheCerisara - SomayehJafaritazehjani + SomayehJafaritazehjani AdedayoOluokun Hoa T.Le 745–754 @@ -720,7 +720,7 @@ Dynamic Feature Selection with Attention in Incremental Parsing RyosukeKohita HiroshiNoji - YujiMatsumoto + YujiMatsumoto 785–794 One main challenge for incremental transition-based parsers, when future inputs are invisible, is to extract good features from a limited local context. In this work, we present a simple technique to maximally utilize the local features with an attention mechanism, which works as context- dependent dynamic feature selection. Our model learns, for example, which tokens should a parser focus on, to decide the next action. Our multilingual experiment shows its effectiveness across many languages. We also present an experiment with augmented test dataset and demon- strate it helps to understand the model’s behavior on locally ambiguous points. C18-1067 @@ -751,7 +751,7 @@ Projecting Embeddings for Domain Adaption: Joint Modeling of Sentiment Analysis in Diverse Domains JeremyBarnes RomanKlinger - SabineSchulte im Walde + SabineSchulte im Walde 818–830 Domain adaptation for sentiment analysis is challenging due to the fact that supervised classifiers are very sensitive to changes in domain. The two most prominent approaches to this problem are structural correspondence learning and autoencoders. However, they either require long training times or suffer greatly on highly divergent domains. Inspired by recent advances in cross-lingual sentiment analysis, we provide a novel perspective and cast the domain adaptation problem as an embedding projection task. Our model takes as input two mono-domain embedding spaces and learns to project them to a bi-domain space, which is jointly optimized to (1) project across domains and to (2) predict sentiment. We perform domain adaptation experiments on 20 source-target domain pairs for sentiment classification and report novel state-of-the-art results on 11 domain pairs, including the Amazon domain adaptation datasets and SemEval 2013 and 2016 datasets. Our analysis shows that our model performs comparably to state-of-the-art approaches on domains that are similar, while performing significantly better on highly divergent domains. Our code is available at https://github.com/jbarnesspain/domain_blse C18-1070 @@ -799,7 +799,7 @@ YichengZou TaoGui QiZhang - XuanjingHuang + XuanjingHuang 868–877 Attention mechanisms have been leveraged for sentiment classification tasks because not all words have the same importance. However, most existing attention models did not take full advantage of sentiment lexicons, which provide rich sentiment information and play a critical role in sentiment analysis. To achieve the above target, in this work, we propose a novel lexicon-based supervised attention model (LBSA), which allows a recurrent neural network to focus on the sentiment content, thus generating sentiment-informative representations. Compared with general attention models, our model has better interpretability and less noise. Experimental results on three large-scale sentiment classification datasets showed that the proposed method outperforms previous methods. C18-1074 @@ -829,7 +829,7 @@ Summarization Evaluation in the Absence of Human Model Summaries Using the Compositionality of Word Embeddings ElahehShafieiBavani MohammadEbrahimi - RaymondWong + RaymondWong FangChen 905–914 We present a new summary evaluation approach that does not require human model summaries. Our approach exploits the compositional capabilities of corpus-based and lexical resource-based word embeddings to develop the features reflecting coverage, diversity, informativeness, and coherence of summaries. The features are then used to train a learning model for predicting the summary content quality in the absence of gold models. We evaluate the proposed metric in replicating the human assigned scores for summarization systems and summaries on data from query-focused and update summarization tasks in TAC 2008 and 2009. The results show that our feature combination provides reliable estimates of summary content quality when model summaries are not available. @@ -838,10 +838,10 @@ A review of <fixed-case>S</fixed-case>panish corpora annotated with negation - Salud MaríaJiménez-Zafra + Salud MaríaJiménez-Zafra RoserMorante - MaiteMartin - L. AlfonsoUreña-López + MaiteMartin + L. AlfonsoUreña-López 915–924 The availability of corpora annotated with negation information is essential to develop negation processing systems in any language. However, there is a lack of these corpora even for languages like English, and when there are corpora available they are small and the annotations are not always compatible across corpora. In this paper we review the existing corpora annotated with negation in Spanish with the purpose of first, gathering the information to make it available for other researchers and, second, analyzing how compatible are the corpora and how has the linguistic phenomenon been addressed. Our final aim is to develop a supervised negation processing system for Spanish, for which we need training and test data. Our analysis shows that it will not be possible to merge the small corpora existing for Spanish due to lack of compatibility in the annotations. C18-1078 @@ -851,7 +851,7 @@ Document-level Multi-aspect Sentiment Classification by Jointly Modeling Users, Aspects, and Overall Ratings JunjieLi HaitongYang - ChengqingZong + ChengqingZong 925–936 Document-level multi-aspect sentiment classification aims to predict user’s sentiment polarities for different aspects of a product in a review. Existing approaches mainly focus on text information. However, the authors (i.e. users) and overall ratings of reviews are ignored, both of which are proved to be significant on interpreting the sentiments of different aspects in this paper. Therefore, we propose a model called Hierarchical User Aspect Rating Network (HUARN) to consider user preference and overall ratings jointly. Specifically, HUARN adopts a hierarchical architecture to encode word, sentence, and document level information. Then, user attention and aspect attention are introduced into building sentence and document level representation. The document representation is combined with user and overall rating information to predict aspect ratings of a review. Diverse aspects are treated differently and a multi-task framework is adopted. Empirical results on two real-world datasets show that HUARN achieves state-of-the-art performances. C18-1079 @@ -880,7 +880,7 @@ Evaluating the text quality, human likeness and tailoring component of <fixed-case>PASS</fixed-case>: A <fixed-case>D</fixed-case>utch data-to-text system for soccer Chrisvan der Lee BartVerduijn - EmielKrahmer + EmielKrahmer SanderWubben 962–972 We present an evaluation of PASS, a data-to-text system that generates Dutch soccer reports from match statistics which are automatically tailored towards fans of one club or the other. The evaluation in this paper consists of two studies. An intrinsic human-based evaluation of the system’s output is described in the first study. In this study it was found that compared to human-written texts, computer-generated texts were rated slightly lower on style-related text components (fluency and clarity) and slightly higher in terms of the correctness of given information. Furthermore, results from the first study showed that tailoring was accurately recognized in most cases, and that participants struggled with correctly identifying whether a text was written by a human or computer. The second study investigated if tailoring affects perceived text quality, for which no results were garnered. This lack of results might be due to negative preconceptions about computer-generated texts which were found in the first study. @@ -891,7 +891,7 @@ Answerable or Not: Devising a Dataset for Extending Machine Reading Comprehension MaoNakanishi TetsunoriKobayashi - YoshihikoHayashi + YoshihikoHayashi 973–983 Machine-reading comprehension (MRC) has recently attracted attention in the fields of natural language processing and machine learning. One of the problematic presumptions with current MRC technologies is that each question is assumed to be answerable by looking at a given text passage. However, to realize human-like language comprehension ability, a machine should also be able to distinguish not-answerable questions (NAQs) from answerable questions. To develop this functionality, a dataset incorporating hard-to-detect NAQs is vital; however, its manual construction would be expensive. This paper proposes a dataset creation method that alters an existing MRC dataset, the Stanford Question Answering Dataset, and describes the resulting dataset. The value of this dataset is likely to increase if each NAQ in the dataset is properly classified with the difficulty of identifying it as an NAQ. This difficulty level would allow researchers to evaluate a machine’s NAQ detection performance more precisely. Therefore, we propose a method for automatically assigning difficulty level labels, which measures the similarity between a question and the target text passage. Our NAQ detection experiments demonstrate that the resulting dataset, having difficulty level annotations, is valid and potentially useful in the development of advanced MRC models. C18-1083 @@ -900,8 +900,8 @@ Style Obfuscation by Invariance ChrisEmmery - EnriqueManjavacas Arevalo - GrzegorzChrupała + EnriqueManjavacas Arevalo + GrzegorzChrupała 984–996 The task of obfuscating writing style using sequence models has previously been investigated under the framework of obfuscation-by-transfer, where the input text is explicitly rewritten in another style. A side effect of this framework are the frequent major alterations to the semantic content of the input. In this work, we propose obfuscation-by-invariance, and investigate to what extent models trained to be explicitly style-invariant preserve semantics. We evaluate our architectures in parallel and non-parallel settings, and compare automatic and human evaluations on the obfuscated sentences. Our experiments show that the performance of a style classifier can be reduced to chance level, while the output is evaluated to be of equal quality to models applying style-transfer. Additionally, human evaluation indicates a trade-off between the level of obfuscation and the observed quality of the output in terms of meaning preservation and grammaticality. C18-1084 @@ -911,7 +911,7 @@ Encoding Sentiment Information into Word Vectors for Sentiment Analysis ZheYe FangLi - TimothyBaldwin + TimothyBaldwin 997–1007 General-purpose pre-trained word embeddings have become a mainstay of natural language processing, and more recently, methods have been proposed to encode external knowledge into word embeddings to benefit specific downstream tasks. The goal of this paper is to encode sentiment knowledge into pre-trained word vectors to improve the performance of sentiment analysis. Our proposed method is based on a convolutional neural network (CNN) and an external sentiment lexicon. Experiments on four popular sentiment analysis datasets show that this method improves the accuracy of sentiment analysis compared to a number of benchmark methods. C18-1085 @@ -929,7 +929,7 @@ Towards a Language for Natural Language Treebank Transductions - Carlos A.Prolo + Carlos A.Prolo 1022–1032 This paper describes a transduction language suitable for natural language treebank transformations and motivates its application to tasks that have been used and described in the literature. The language, which is the basis for a tree transduction tool allows for clean, precise and concise description of what has been very confusingly, ambiguously, and incompletely textually described in the literature also allowing easy non-hard-coded implementation. We also aim at getting feedback from the NLP community to eventually converge to a de facto standard for such transduction language. C18-1087 @@ -957,7 +957,7 @@ Enhancing General Sentiment Lexicons for Domain-Specific Use TimKreutz - WalterDaelemans + WalterDaelemans 1056–1064 Lexicon based methods for sentiment analysis rely on high quality polarity lexicons. In recent years, automatic methods for inducing lexicons have increased the viability of lexicon based methods for polarity classification. SentProp is a framework for inducing domain-specific polarities from word embeddings. We elaborate on SentProp by evaluating its use for enhancing DuOMan, a general-purpose lexicon, for use in the political domain. By adding only top sentiment bearing words from the vocabulary and applying small polarity shifts in the general-purpose lexicon, we increase accuracy in an in-domain classification task. The enhanced lexicon performs worse than the original lexicon in an out-domain task, showing that the words we added and the polarity shifts we applied are domain-specific and do not translate well to an out-domain setting. C18-1090 @@ -1041,7 +1041,7 @@ Multilevel Heuristics for Rationale-Based Entity Relation Classification in Sentences Shiou TianHsu MandarChaudhary - NagizaSamatova + NagizaSamatova 1145–1155 Rationale-based models provide a unique way to provide justifiable results for relation classification models by identifying rationales (key words and phrases that a person can use to justify the relation in the sentence) during the process. However, existing generative networks used to extract rationales come with a trade-off between extracting diversified rationales and achieving good classification results. In this paper, we propose a multilevel heuristic approach to regulate rationale extraction to avoid extracting monotonous rationales without compromising classification performance. In our model, rationale selection is regularized by a semi-supervised process and features from different levels: word, syntax, sentence, and corpus. We evaluate our approach on the SemEval 2010 dataset that includes 19 relation classes and the quality of extracted rationales with our manually-labeled rationales. Experiments show a significant improvement in classification performance and a 20% gain in rationale interpretability compared to state-of-the-art approaches. C18-1098 @@ -1096,7 +1096,7 @@ Adversarial Domain Adaptation for Variational Neural Language Generation in Dialogue Systems Van-KhanhTran - Le-MinhNguyen + Le-MinhNguyen 1205–1217 Domain Adaptation arises when we aim at learning from source domain a model that can perform acceptably well on a different target domain. It is especially crucial for Natural Language Generation (NLG) in Spoken Dialogue Systems when there are sufficient annotated data in the source domain, but there is a limited labeled data in the target domain. How to effectively utilize as much of existing abilities from source domains is a crucial issue in domain adaptation. In this paper, we propose an adversarial training procedure to train a Variational encoder-decoder based language generator via multiple adaptation steps. In this procedure, a model is first trained on a source domain data and then fine-tuned on a small set of target domain utterances under the guidance of two proposed critics. Experimental results show that the proposed method can effectively leverage the existing knowledge in the source domain to adapt to another related domain by using only a small amount of in-domain data. C18-1103 @@ -1108,8 +1108,8 @@ TimBaumgärtner AashishVenkatesh EliaBruni - RaffaellaBernardi - RaquelFernandez + RaffaellaBernardi + RaquelFernandez 1218–1233 Our goal is to explore how the abilities brought in by a dialogue manager can be included in end-to-end visually grounded conversational agents. We make initial steps towards this general goal by augmenting a task-oriented visual dialogue model with a decision-making component that decides whether to ask a follow-up question to identify a target referent in an image, or to stop the conversation to make a guess. Our analyses show that adding a decision making component produces dialogues that are less repetitive and that include fewer unnecessary questions, thus potentially leading to more efficient and less unnatural interactions. C18-1104 @@ -1130,7 +1130,7 @@ Dialogue-act-driven Conversation Model : An Experimental Study HarshitKumar ArvindAgarwal - SachindraJoshi + SachindraJoshi 1246–1256 The utility of additional semantic information for the task of next utterance selection in an automated dialogue system is the focus of study in this paper. In particular, we show that additional information available in the form of dialogue acts –when used along with context given in the form of dialogue history– improves the performance irrespective of the underlying model being generative or discriminative. In order to show the model agnostic behavior of dialogue acts, we experiment with several well-known models such as sequence-to-sequence encoder-decoder model, hierarchical encoder-decoder model, and Siamese-based models with and without hierarchy; and show that in all models, incorporating dialogue acts improves the performance by a significant margin. We, furthermore, propose a novel way of encoding dialogue act information, and use it along with hierarchical encoder to build a model that can use the sequential dialogue act information in a natural way. Our proposed model achieves an MRR of about 84.8% for the task of next utterance selection on a newly introduced Daily Dialogue dataset, and outperform the baseline models. We also provide a detailed analysis of results including key insights that explain the improvement in MRR because of dialog act information. C18-1106 @@ -1166,7 +1166,7 @@ MengZou XihanLi HaokunLiu - ZhihongDeng + ZhihongDeng 1281–1291 Neural encoder-decoder models have been widely applied to conversational response generation, which is a research hot spot in recent years. However, conventional neural encoder-decoder models tend to generate commonplace responses like “I don’t know” regardless of what the input is. In this paper, we analyze this problem from a new perspective: latent vectors. Based on it, we propose an easy-to-extend learning framework named MEMD (Multi-Encoder to Multi-Decoder), in which an auxiliary encoder and an auxiliary decoder are introduced to provide necessary training guidance without resorting to extra data or complicating network’s inner structure. Experimental results demonstrate that our method effectively improve the quality of generated responses according to automatic metrics and human evaluations, yielding more diverse and smooth replies. C18-1109 @@ -1226,7 +1226,7 @@ Local String Transduction as Sequence Labeling JoanaRibeiro ShashiNarayan - Shay B.Cohen + Shay B.Cohen XavierCarreras 1360–1371 We show that the general problem of string transduction can be reduced to the problem of sequence labeling. While character deletion and insertions are allowed in string transduction, they do not exist in sequence labeling. We show how to overcome this difference. Our approach can be used with any sequence labeling algorithm and it works best for problems in which string transduction imposes a strong notion of locality (no long range dependencies). We experiment with spelling correction for social media, OCR correction, and morphological inflection, and we see that it behaves better than seq2seq models and yields state-of-the-art results in several cases. @@ -1244,7 +1244,7 @@ Diachronic word embeddings and semantic shifts: a survey AndreyKutuzov - LiljaØvrelid + LiljaØvrelid TerrenceSzymanski ErikVelldal 1384–1397 @@ -1271,7 +1271,7 @@ ShoushanLi MingqiJiang HanqianWu - GuodongZhou + GuodongZhou 1410–1420 In realistic scenarios, a user profiling model (e.g., gender classification or age regression) learned from one social media might perform rather poorly when tested on another social media due to the different data distributions in the two media. In this paper, we address cross-media user profiling by bridging the knowledge between the source and target media with a uniform user embedding learning approach. In our approach, we first construct a cross-media user-word network to capture the relationship among users through the textual information and a modified cross-media user-user network to capture the relationship among users through the social information. Then, we learn user embedding by jointly learning the heterogeneous network composed of above two networks. Finally, we train a classification (or regression) model with the obtained user embeddings as input to perform user profiling. Empirical studies demonstrate the effectiveness of the proposed approach to two cross-media user profiling tasks, i.e., cross-media gender classification and cross-media age regression. C18-1119 @@ -1280,7 +1280,7 @@ Incorporating Syntactic Uncertainty in Neural Machine Translation with a Forest-to-Sequence Model PooryaZaremoodi - GholamrezaHaffari + GholamrezaHaffari 1421–1429 Incorporating syntactic information in Neural Machine Translation (NMT) can lead to better reorderings, particularly useful when the language pairs are syntactically highly divergent or when the training bitext is not large. Previous work on using syntactic information, provided by top-1 parse trees generated by (inevitably error-prone) parsers, has been promising. In this paper, we propose a forest-to-sequence NMT model to make use of exponentially many parse trees of the source sentence to compensate for the parser errors. Our method represents the collection of parse trees as a packed forest, and learns a neural transducer to translate from the input forest to the target sentence. Experiments on English to German, Chinese and Farsi translation tasks show the superiority of our approach over the sequence-to-sequence and tree-to-sequence neural translation models. C18-1120 @@ -1291,7 +1291,7 @@ HaoranLi JunnanZhu JiajunZhang - ChengqingZong + ChengqingZong 1430–1441 In this paper, we investigate the sentence summarization task that produces a summary from a source sentence. Neural sequence-to-sequence models have gained considerable success for this task, while most existing approaches only focus on improving the informativeness of the summary, which ignore the correctness, i.e., the summary should not contain unrelated information with respect to the source sentence. We argue that correctness is an essential requirement for summarization systems. Considering a correct summary is semantically entailed by the source sentence, we incorporate entailment knowledge into abstractive summarization models. We propose an entailment-aware encoder under multi-task framework (i.e., summarization generation and entailment recognition) and an entailment-aware decoder by entailment Reward Augmented Maximum Likelihood (RAML) training. Experiment results demonstrate that our models significantly outperform baselines from the aspects of informativeness and correctness. C18-1121 @@ -1300,7 +1300,7 @@ Extracting Parallel Sentences with Bidirectional Recurrent Neural Networks to Improve Machine Translation FrancisGrégoire - PhilippeLanglais + PhilippeLanglais 1442–1453 Parallel sentence extraction is a task addressing the data sparsity problem found in multilingual natural language processing applications. We propose a bidirectional recurrent neural network based approach to extract parallel sentences from collections of multilingual texts. Our experiments with noisy parallel corpora show that we can achieve promising results against a competitive baseline by removing the need of specific feature engineering or additional external resources. To justify the utility of our approach, we extract sentence pairs from Wikipedia articles to train machine translation systems and show significant improvements in translation performance. C18-1122 @@ -1310,7 +1310,7 @@ Fast and Accurate Reordering with <fixed-case>ITG</fixed-case> Transition <fixed-case>RNN</fixed-case> HaoZhang AxelNg - RichardSproat + RichardSproat 1454–1463 Attention-based sequence-to-sequence neural network models learn to jointly align and translate. The quadratic-time attention mechanism is powerful as it is capable of handling arbitrary long-distance reordering, but computationally expensive. In this paper, towards making neural translation both accurate and efficient, we follow the traditional pre-reordering approach to decouple reordering from translation. We add a reordering RNN that shares the input encoder with the decoder. The RNNs are trained jointly with a multi-task loss function and applied sequentially at inference time. The task of the reordering model is to predict the permutation of the input words following the target language word order. After reordering, the attention in the decoder becomes more peaked and monotonic. For reordering, we adopt the Inversion Transduction Grammars (ITG) and propose a transition system to parse input to trees for reordering. We harness the ITG transition system with RNN. With the modeling power of RNN, we achieve superior reordering accuracy without any feature engineering. In experiments, we apply the model to the task of text normalization. Compared to a strong baseline of attention-based RNN, our ITG RNN re-ordering model can reach the same reordering accuracy with only 1/10 of the training data and is 2.5x faster in decoding. C18-1123 @@ -1322,7 +1322,7 @@ JunXie ZhixingTan JinsongSu - DeyiXiong + DeyiXiong ChaoBian 1464–1473 Neural machine translation with source-side attention have achieved remarkable performance. however, there has been little work exploring to attend to the target-side which can potentially enhance the memory capbility of NMT. We reformulate a Decoding History Enhanced Attention mechanism (DHEA) to render NMT model better at selecting both source-side and target-side information. DHA enables dynamic control of the ratios at which source and target contexts contribute to the generation of target words, offering a way to weakly induce structure relations among both source and target tokens. It also allows training errors to be directly back-propagated through short-cut connections and effectively alleviates the gradient vanishing problem. The empirical study on Chinese-English translation shows that our model with proper configuration can improve by 0:9 BLEU upon Transformer and the best reported results in the dataset. On WMT14 English-German task and a larger WMT14 English-French task, our model achieves comparable results with the state-of-the-art. @@ -1391,8 +1391,8 @@ User-Level Race and Ethnicity Predictors from <fixed-case>T</fixed-case>witter Text - DanielPreoţiuc-Pietro - LyleUngar + DanielPreoţiuc-Pietro + LyleUngar 1534–1545 User demographic inference from social media text has the potential to improve a range of downstream applications, including real-time passive polling or quantifying demographic bias. This study focuses on developing models for user-level race and ethnicity prediction. We introduce a data set of users who self-report their race/ethnicity through a survey, in contrast to previous approaches that use distantly supervised data or perceived labels. We develop predictive models from text which accurately predict the membership of a user to the four largest racial and ethnic groups with up to .884 AUC and make these available to the research community. C18-1130 @@ -1402,7 +1402,7 @@ Multi-Source Multi-Class Fake News Detection HamidKarimi ProteekRoy - SariSaba-Sadiya + SariSaba-Sadiya JiliangTang 1546–1557 Fake news spreading through media outlets poses a real threat to the trustworthiness of information and detecting fake news has attracted increasing attention in recent years. Fake news is typically written intentionally to mislead readers, which determines that fake news detection merely based on news content is tremendously challenging. Meanwhile, fake news could contain true evidence to mock true news and presents different degrees of fakeness, which further exacerbates the detection difficulty. On the other hand, the spread of fake news produces various types of data from different perspectives. These multiple sources provide rich contextual information about fake news and offer unprecedented opportunities for advanced fake news detection. In this paper, we study fake news detection with different degrees of fakeness by integrating multiple sources. In particular, we introduce approaches to combine information from multiple sources and to discriminate between different degrees of fakeness, and propose a Multi-source Multi-class Fake news Detection framework MMFD, which combines automated feature extraction, multi-source fusion and automated degrees of fakeness detection into a coherent and interpretable model. Experimental results on the real-world data demonstrate the effectiveness of the proposed framework and extensive experiments are further conducted to understand the working of the proposed framework. @@ -1424,7 +1424,7 @@ NurendraChoudhary RajatSingh VijjiniAnvesh Rao - ManishShrivastava + ManishShrivastava 1570–1577 In this paper, we leverage social media platforms such as twitter for developing corpus across multiple languages. The corpus creation methodology is applicable for resource-scarce languages provided the speakers of that particular language are active users on social media platforms. We present an approach to extract social media microblogs such as tweets (Twitter). In this paper, we create corpus for multilingual sentiment analysis and emoji prediction in Hindi, Bengali and Telugu. Further, we perform and analyze multiple NLP tasks utilizing the corpus to get interesting observations. C18-1133 @@ -1442,7 +1442,7 @@ The Road to Success: Assessing the Fate of Linguistic Innovations in Online Communities MarcoDel Tredici - RaquelFernández + RaquelFernández 1591–1603 We investigate the birth and diffusion of lexical innovations in a large dataset of online social communities. We build on sociolinguistic theories and focus on the relation between the spread of a novel term and the social role of the individuals who use it, uncovering characteristics of innovators and adopters. Finally, we perform a prediction task that allows us to anticipate whether an innovation will successfully spread within a community. C18-1135 @@ -1450,8 +1450,8 @@ Ab Initio: Automatic <fixed-case>L</fixed-case>atin Proto-word Reconstruction - Alina MariaCiobanu - Liviu P.Dinu + Alina MariaCiobanu + Liviu P.Dinu 1604–1614 Proto-word reconstruction is central to the study of language evolution. It consists of recreating the words in an ancient language from its modern daughter languages. In this paper we investigate automatic word form reconstruction for Latin proto-words. Having modern word forms in multiple Romance languages (French, Italian, Spanish, Portuguese and Romanian), we infer the form of their common Latin ancestors. Our approach relies on the regularities that occurred when the Latin words entered the modern languages. We leverage information from all modern languages, building an ensemble system for proto-word reconstruction. We use conditional random fields for sequence labeling, but we conduct preliminary experiments with recurrent neural networks as well. We apply our method on multiple datasets, showing that our method improves on previous results, having also the advantage of requiring less input data, which is essential in historical linguistics, where resources are generally scarce. C18-1136 @@ -1459,7 +1459,7 @@ A Computational Model for the Linguistic Notion of Morphological Paradigm - MiikkaSilfverberg + MiikkaSilfverberg LingLiu MansHulden 1615–1626 @@ -1532,7 +1532,7 @@ Learning from Measurements in Crowdsourcing Models: Inferring Ground Truth from Diverse Annotation Types PaulFelt - EricRingger + EricRingger JordanBoyd-Graber KevinSeppi 1694–1704 @@ -1601,7 +1601,7 @@ ZhongyuWei SiyuanWang YangLiu - XuanjingHuang + XuanjingHuang 1763–1774 Visual Question Generation (VQG) aims to ask natural questions about an image automatically. Existing research focus on training model to fit the annotated data set that makes it indifferent from other language generation tasks. We argue that natural questions need to have two specific attributes from the perspectives of content and linguistic respectively, namely, natural and human-written. Inspired by the setting of discriminator in adversarial learning, we propose two discriminators, one for each attribute, to enhance the training. We then use the reinforcement learning framework to incorporate scores from the two discriminators as the reward to guide the training of the question generator. Experimental results on a benchmark VQG dataset show the effectiveness and robustness of our model compared to some state-of-the-art models in terms of both automatic and human evaluation metrics. C18-1150 @@ -1652,8 +1652,8 @@ Treat us like the sequences we are: Prepositional Paraphrasing of Noun Compounds using <fixed-case>LSTM</fixed-case> GirishkumarPonkiya KevinPatel - PushpakBhattacharyya - GirishPalshikar + PushpakBhattacharyya + GirishPalshikar 1827–1836 Interpreting noun compounds is a challenging task. It involves uncovering the underlying predicate which is dropped in the formation of the compound. In most cases, this predicate is of the form VERB+PREP. It has been observed that uncovering the preposition is a significant step towards uncovering the predicate. In this paper, we attempt to paraphrase noun compounds using prepositions. We consider noun compounds and their corresponding prepositional paraphrases as parallelly aligned sequences of words. This enables us to adapt different architectures from cross-lingual embedding literature. We choose the architecture where we create representations of both noun compound (source sequence) and its corresponding prepositional paraphrase (target sequence), such that their sim- ilarity is high. We use LSTMs to learn these representations. We use these representations to decide the correct preposition. Our experiments show that this approach performs considerably well on different datasets of noun compounds that are manually annotated with prepositions. C18-1155 @@ -1666,7 +1666,7 @@ SruthiGorantla ErikCambria RogerZimmermann - RadaMihalcea + RadaMihalcea 1837–1848 The literature in automated sarcasm detection has mainly focused on lexical-, syntactic- and semantic-level analysis of text. However, a sarcastic sentence can be expressed with contextual presumptions, background and commonsense knowledge. In this paper, we propose a ContextuAl SarCasm DEtector (CASCADE), which adopts a hybrid approach of both content- and context-driven modeling for sarcasm detection in online social media discussions. For the latter, CASCADE aims at extracting contextual information from the discourse of a discussion thread. Also, since the sarcastic nature and form of expression can vary from person to person, CASCADE utilizes user embeddings that encode stylometric and personality features of users. When used along with content-based feature extractors such as convolutional neural networks, we see a significant boost in the classification performance on a large Reddit corpus. C18-1156 @@ -1718,7 +1718,7 @@ Robust Lexical Features for Improved Neural Network Named-Entity Recognition AbbasGhaddar - PhillippeLanglais + PhillippeLanglais 1896–1907 Neural network approaches to Named-Entity Recognition reduce the need for carefully hand-crafted features. While some features do remain in state-of-the-art systems, lexical features have been mostly discarded, with the exception of gazetteers. In this work, we show that this is unfair: lexical features are actually quite useful. We propose to embed words and entity types into a low-dimensional vector space we train from annotated data produced by distant supervision thanks to Wikipedia. From this, we compute — offline — a feature vector representing each word. When used with a vanilla recurrent neural network model, this representation yields substantial improvements. We establish a new state-of-the-art F1 score of 87.95 on ONTONOTES 5.0, while matching state-of-the-art performance with a F1 score of 91.73 on the over-studied CONLL-2003 dataset. C18-1161 @@ -1783,7 +1783,7 @@ Genre Identification and the Compositional Effect of Genre in Literature JosephWorsham - JugalKalita + JugalKalita 1963–1973 Recent advances in Natural Language Processing are finding ways to place an emphasis on the hierarchical nature of text instead of representing language as a flat sequence or unordered collection of words or letters. A human reader must capture multiple levels of abstraction and meaning in order to formulate an understanding of a document. In this paper, we address the problem of developing approaches which are capable of working with extremely large and complex literary documents to perform Genre Identification. The task is to assign the literary classification to a full-length book belonging to a corpus of literature, where the works on average are well over 200,000 words long and genre is an abstract thematic concept. We introduce the Gutenberg Dataset for Genre Identification. Additionally, we present a study on how current deep learning models compare to traditional methods for this task. The results are presented as a baseline along with findings on how using an ensemble of chapters can significantly improve results in deep learning methods. The motivation behind the ensemble of chapters method is discussed as the compositionality of subtexts which make up a larger work and contribute to the overall genre. C18-1167 @@ -1793,7 +1793,7 @@ Transfer Learning for Entity Recognition of Novel Classes Juan DiegoRodriguez AdamCaldwell - AlexanderLiu + AlexanderLiu 1974–1985 In this reproduction paper, we replicate and extend several past studies on transfer learning for entity recognition. In particular, we are interested in entity recognition problems where the class labels in the source and target domains are different. Our work is the first direct comparison of these previously published approaches in this problem setting. In addition, we perform experiments on seven new source/target corpus pairs, nearly doubling the total number of corpus pairs that have been studied in all past work combined. Our results empirically demonstrate when each of the published approaches tends to do well. In particular, simpler approaches often work best when there is very little labeled target data, while neural transfer approaches tend to do better when there is more labeled target data. C18-1168 @@ -1834,7 +1834,7 @@ Task-oriented Word Embedding for Text Classification QianLiu - HeyanHuang + HeyanHuang YangGao XiaochiWei YuxinTian @@ -1849,7 +1849,7 @@ JianyuZhao ZhiqiangZhan QichuanYang - YangZhang + YangZhang ChangjianHu ZhenshengLi LiuxinZhang @@ -1897,7 +1897,7 @@ Improving Named Entity Recognition by Jointly Learning to Disambiguate Morphological Tags OnurGüngör - SuzanUskudarli + SuzanUskudarli TungaGüngör 2082–2092 Previous studies have shown that linguistic features of a word such as possession, genitive or other grammatical cases can be employed in word representations of a named entity recognition (NER) tagger to improve the performance for morphologically rich languages. However, these taggers require external morphological disambiguation (MD) tools to function which are hard to obtain or non-existent for many languages. In this work, we propose a model which alleviates the need for such disambiguators by jointly learning NER and MD taggers in languages for which one can provide a list of candidate morphological analyses. We show that this can be done independent of the morphological annotation schemes, which differ among languages. Our experiments employing three different model architectures that join these two tasks show that joint learning improves NER performance. Furthermore, the morphological disambiguator’s performance is shown to be competitive. @@ -1917,7 +1917,7 @@ An Analysis of Annotated Corpora for Emotion Classification in Text - Laura-Ana-MariaBostan + Laura-Ana-MariaBostan RomanKlinger 2104–2119 Several datasets have been annotated and published for classification of emotions. They differ in several ways: (1) the use of different annotation schemata (e. g., discrete label sets, including joy, anger, fear, or sadness or continuous values including valence, or arousal), (2) the domain, and, (3) the file formats. This leads to several research gaps: supervised models often only use a limited set of available resources. Additionally, no previous work has compared emotion corpora in a systematic manner. We aim at contributing to this situation with a survey of the datasets, and aggregate them in a common file format with a common annotation schema. Based on this aggregation, we perform the first cross-corpus classification experiments in the spirit of future research enabled by this paper, in order to gain insight and a better understanding of differences of models inferred from the data. This work also simplifies the choice of the most appropriate resources for developing a model for a novel domain. One result from our analysis is that a subset of corpora is better classified with models trained on a different corpus. For none of the corpora, training on all data altogether is better than using a subselection of the resources. Our unified corpus is available at http://www.ims.uni-stuttgart.de/data/unifyemotion. @@ -1936,8 +1936,8 @@ A Review on Deep Learning Techniques Applied to Answer Selection - Tuan ManhLai - TrungBui + Tuan ManhLai + TrungBui ShengLi 2132–2144 Given a question and a set of candidate answers, answer selection is the task of identifying which of the candidates answers the question correctly. It is an important problem in natural language processing, with applications in many areas. Recently, many deep learning based methods have been proposed for the task. They produce impressive performance without relying on any feature engineering or expensive external resources. In this paper, we aim to provide a comprehensive review on deep learning methods applied to answer selection. @@ -1955,7 +1955,7 @@ Distantly Supervised <fixed-case>NER</fixed-case> with Partial Annotation Learning and Reinforcement Learning - YaoshengYang + YaoshengYang WenliangChen ZhenghuaLi ZhengqiuHe @@ -2011,8 +2011,8 @@ Aspect-based summarization of pros and cons in unstructured product reviews FlorianKunneman SanderWubben - Antalvan den Bosch - EmielKrahmer + Antalvan den Bosch + EmielKrahmer 2219–2229 We developed three systems for generating pros and cons summaries of product reviews. Automating this task eases the writing of product reviews, and offers readers quick access to the most important information. We compared SynPat, a system based on syntactic phrases selected on the basis of valence scores, against a neural-network-based system trained to map bag-of-words representations of reviews directly to pros and cons, and the same neural system trained on clusters of word-embedding encodings of similar pros and cons. We evaluated the systems in two ways: first on held-out reviews with gold-standard pros and cons, and second by asking human annotators to rate the systems’ output on relevance and completeness. In the second evaluation, the gold-standard pros and cons were assessed along with the system output. We find that the human-generated summaries are not deemed as significantly more relevant or complete than the SynPat systems; the latter are scored higher than the human-generated summaries on a precision metric. The neural approaches yield a lower performance in the human assessment, and are outperformed by the baseline. C18-1188 @@ -2085,7 +2085,7 @@ <fixed-case>G</fixed-case>raphene: Semantically-Linked Propositions in Open Information Extraction MatthiasCetto ChristinaNiklaus - AndréFreitas + AndréFreitas SiegfriedHandschuh 2300–2311 We present an Open Information Extraction (IE) approach that uses a two-layered transformation stage consisting of a clausal disembedding layer and a phrasal disembedding layer, together with rhetorical relation identification. In that way, we convert sentences that present a complex linguistic structure into simplified, syntactically sound sentences, from which we can extract propositions that are represented in a two-layered hierarchy in the form of core relational tuples and accompanying contextual information which are semantically linked via rhetorical relations. In a comparative evaluation, we demonstrate that our reference implementation Graphene outperforms state-of-the-art Open IE systems in the construction of correct n-ary predicate-argument structures. Moreover, we show that existing Open IE approaches can benefit from the transformation process of our framework. @@ -2116,7 +2116,7 @@ AakankshaNaik AbhilashaRavichander NormanSadeh - CarolynRose + CarolynRose GrahamNeubig 2340–2353 Natural language inference (NLI) is the task of determining if a natural language hypothesis can be inferred from a given premise in a justifiable manner. NLI was proposed as a benchmark task for natural language understanding. Existing models perform well at standard datasets for NLI, achieving impressive results across different genres of text. However, the extent to which these models understand the semantic content of sentences is unclear. In this work, we propose an evaluation methodology consisting of automatically constructed “stress tests” that allow us to examine whether systems have the ability to make real inferential decisions. Our evaluation of six sentence-encoder models on these stress tests reveals strengths and weaknesses of these models with respect to challenging linguistic phenomena, and suggests important directions for future work in this area. @@ -2128,11 +2128,11 @@ Hoa TrongVu ClaudioGreco AliiaErofeeva - SomayehJafaritazehjan + SomayehJafaritazehjan GuidoLinders MarcTanti AlbertoTestoni - RaffaellaBernardi + RaffaellaBernardi AlbertGatt 2354–2368 Capturing semantic relations between sentences, such as entailment, is a long-standing challenge for computational semantics. Logic-based models analyse entailment in terms of possible worlds (interpretations, or situations) where a premise P entails a hypothesis H iff in all worlds where P is true, H is also true. Statistical models view this relationship probabilistically, addressing it in terms of whether a human would likely infer H from P. In this paper, we wish to bridge these two perspectives, by arguing for a visually-grounded version of the Textual Entailment task. Specifically, we ask whether models can perform better if, in addition to P and H, there is also an image (corresponding to the relevant “world” or “situation”). We use a multimodal version of the SNLI dataset (Bowman et al., 2015) and we compare “blind” and visually-augmented models of textual entailment. We show that visual information is beneficial, but we also conduct an in-depth error analysis that reveals that current multimodal models are not performing “grounding” in an optimal fashion. @@ -2143,7 +2143,7 @@ Recurrent One-Hop Predictions for Reasoning over Knowledge Graphs WenpengYin YadollahYaghoobzadeh - HinrichSchütze + HinrichSchütze 2369–2378 Large scale knowledge graphs (KGs) such as Freebase are generally incomplete. Reasoning over multi-hop (mh) KG paths is thus an important capability that is needed for question answering or other NLP tasks that require knowledge about the world. mh-KG reasoning includes diverse scenarios, e.g., given a head entity and a relation path, predict the tail entity; or given two entities connected by some relation paths, predict the unknown relation between them. We present ROPs, recurrent one-hop predictors, that predict entities at each step of mh-KB paths by using recurrent neural networks and vector representations of entities and relations, with two benefits: (i) modeling mh-paths of arbitrary lengths while updating the entity and relation representations by the training signal at each step; (ii) handling different types of mh-KG reasoning in a unified framework. Our models show state-of-the-art for two important multi-hop KG reasoning tasks: Knowledge Base Completion and Path Query Answering. C18-1200 @@ -2178,8 +2178,8 @@ Stance Detection with Hierarchical Attention Network QingyingSun ZhongqingWang - QiaomingZhu - GuodongZhou + QiaomingZhu + GuodongZhou 2399–2409 Stance detection aims to assign a stance label (for or against) to a post toward a specific target. Recently, there is a growing interest in using neural models to detect stance of documents. Most of these works model the sequence of words to learn document representation. However, much linguistic information, such as polarity and arguments of the document, is correlated with the stance of the document, and can inspire us to explore the stance. Hence, we present a neural model to fully employ various linguistic information to construct the document representation. In addition, since the influences of different linguistic information are different, we propose a hierarchical attention network to weigh the importance of various linguistic information, and learn the mutual attention between the document and the linguistic information. The experimental results on two datasets demonstrate the effectiveness of the proposed hierarchical attention neural model. C18-1203 @@ -2198,7 +2198,7 @@ Retrofitting Distributional Embeddings to Knowledge Graphs with Functional Relations BenLengerich - AndrewMaas + AndrewMaas ChristopherPotts 2423–2436 Knowledge graphs are a versatile framework to encode richly structured data relationships, but it can be challenging to combine these graphs with unstructured data. Methods for retrofitting pre-trained entity representations to the structure of a knowledge graph typically assume that entities are embedded in a connected space and that relations imply similarity. However, useful knowledge graphs often contain diverse entities and relations (with potentially disjoint underlying corpora) which do not accord with these assumptions. To overcome these limitations, we present Functional Retrofitting, a framework that generalizes current retrofitting methods by explicitly modeling pairwise relations. Our framework can directly incorporate a variety of pairwise penalty functions previously developed for knowledge graph completion. Further, it allows users to encode, learn, and extract information about relation semantics. We present both linear and neural instantiations of the framework. Functional Retrofitting significantly outperforms existing retrofitting methods on complex knowledge graphs and loses no accuracy on simpler graphs (in which relations do imply similarity). Finally, we demonstrate the utility of the framework by predicting new drug–disease treatment pairs in a large, complex health knowledge graph. @@ -2207,7 +2207,7 @@ Context-Sensitive Generation of Open-Domain Conversational Responses - WeinanZhang + WeinanZhang YimingCui YifaWang QingfuZhu @@ -2233,10 +2233,10 @@ Synonymy in Bilingual Context: The <fixed-case>C</fixed-case>z<fixed-case>E</fixed-case>ng<fixed-case>C</fixed-case>lass Lexicon - ZdeňkaUrešová - EvaFučíková - EvaHajičová - JanHajič + ZdeňkaUrešová + EvaFučíková + EvaHajičová + JanHajič 2456–2469 This paper describes CzEngClass, a bilingual lexical resource being built to investigate verbal synonymy in bilingual context and to relate semantic roles common to one synonym class to verb arguments (verb valency). In addition, the resource is linked to existing resources with the same of a similar aim: English and Czech WordNet, FrameNet, PropBank, VerbNet (SemLink), and valency lexicons for Czech and English (PDT-Vallex, Vallex, and EngVallex). There are several goals of this work and resource: (a) to provide gold standard data for automatic experiments in the future (such as automatic discovery of synonym classes, word sense disambiguation, assignment of classes to occurrences of verbs in text, coreferential linking of verb and event arguments in text, etc.), (b) to build a core (bilingual) lexicon linked to existing resources, for comparative studies and possibly for training automatic tools, and (c) to enrich the annotation of a parallel treebank, the Prague Czech English Dependency Treebank, which so far contained valency annotation but has not linked synonymous senses of verbs together. The method used for extracting the synonym classes is a semi-automatic process with a substantial amount of manual work during filtering, role assignment to classes and individual Class members’ arguments, and linking to the external lexical resources. We present the first version with 200 classes (about 1800 verbs) and evaluate interannotator agreement using several metrics. C18-1208 @@ -2257,7 +2257,7 @@ AndrewMatteson ChanheeLee YoungbumKim - HeuiseokLim + HeuiseokLim 2482–2492 Due to the fact that Korean is a highly agglutinative, character-rich language, previous work on Korean morphological analysis typically employs the use of sub-character features known as graphemes or otherwise utilizes comprehensive prior linguistic knowledge (i.e., a dictionary of known morphological transformation forms, or actions). These models have been created with the assumption that character-level, dictionary-less morphological analysis was intractable due to the number of actions required. We present, in this study, a multi-stage action-based model that can perform morphological transformation and part-of-speech tagging using arbitrary units of input and apply it to the case of character-level Korean morphological analysis. Among models that do not employ prior linguistic knowledge, we achieve state-of-the-art word and sentence-level tagging accuracy with the Sejong Korean corpus using our proposed data-driven Bi-LSTM model. C18-1210 @@ -2276,7 +2276,7 @@ Real-time Change Point Detection using On-line Topic Models YunliWang - CyrilGoutte + CyrilGoutte 2505–2515 Detecting changes within an unfolding event in real time from news articles or social media enables to react promptly to serious issues in public safety, public health or natural disasters. In this study, we use on-line Latent Dirichlet Allocation (LDA) to model shifts in topics, and apply on-line change point detection (CPD) algorithms to detect when significant changes happen. We describe an on-line Bayesian change point detection algorithm that we use to detect topic changes from on-line LDA output. Extensive experiments on social media data and news articles show the benefits of on-line LDA versus standard LDA, and of on-line change point detection compared to off-line algorithms. This yields F-scores up to 52% on the detection of significant real-life changes from these document streams. C18-1212 @@ -2313,7 +2313,7 @@ LuoSi XiaozhongLiu MinZhang - GuodongZhou + GuodongZhou 2540–2550 Question-Answer (QA) matching is a fundamental task in the Natural Language Processing community. In this paper, we first build a novel QA matching corpus with informal text which is collected from a product reviewing website. Then, we propose a novel QA matching approach, namely One vs. Many Matching, which aims to address the novel scenario where one question sentence often has an answer with multiple sentences. Furthermore, we improve our matching approach by employing both word-level and sentence-level attentions for solving the noisy problem in the informal text. Empirical studies demonstrate the effectiveness of the proposed approach to question-answer matching. C18-1215 @@ -2333,7 +2333,7 @@ <fixed-case>U</fixed-case>rdu Word Segmentation using Conditional Random Fields (<fixed-case>CRF</fixed-case>s) HarisBin Zia - Agha AliRaza + Agha AliRaza AwaisAthar 2562–2569 State-of-the-art Natural Language Processing algorithms rely heavily on efficient word segmentation. Urdu is amongst languages for which word segmentation is a complex task as it exhibits space omission as well as space insertion issues. This is partly due to the Arabic script which although cursive in nature, consists of characters that have inherent joining and non-joining attributes regardless of word boundary. This paper presents a word segmentation system for Urdu which uses a Conditional Random Field sequence modeler with orthographic, linguistic and morphological features. Our proposed model automatically learns to predict white space as word boundary as well as Zero Width Non-Joiner (ZWNJ) as sub-word boundary. Using a manually annotated corpus, our model achieves F1 score of 0.97 for word boundary identification and 0.85 for sub-word boundary identification tasks. We have made our code and corpus publicly available to make our results reproducible. @@ -2342,9 +2342,9 @@ <fixed-case>R</fixed-case>e<fixed-case>S</fixed-case>yf: a <fixed-case>F</fixed-case>rench lexicon with ranked synonyms - Mokhtar B.Billami + Mokhtar B.Billami ThomasFrançois - NúriaGala + NúriaGala 2570–2581 In this article, we present ReSyf, a lexical resource of monolingual synonyms ranked according to their difficulty to be read and understood by native learners of French. The synonyms come from an existing lexical network and they have been semantically disambiguated and refined. A ranking algorithm, based on a wide range of linguistic features and validated through an evaluation campaign with human annotators, automatically sorts the synonyms corresponding to a given word sense by reading difficulty. ReSyf is freely available and will be integrated into a web platform for reading assistance. It can also be applied to perform lexical simplification of French texts. C18-1218 @@ -2364,7 +2364,7 @@ Learning Multilingual Topics from Incomparable Corpora ShudongHao - Michael J.Paul + Michael J.Paul 2595–2609 Multilingual topic models enable crosslingual tasks by extracting consistent topics from multilingual corpora. Most models require parallel or comparable training corpora, which limits their ability to generalize. In this paper, we first demystify the knowledge transfer mechanism behind multilingual topic models by defining an alternative but equivalent formulation. Based on this analysis, we then relax the assumption of training data required by most existing models, creating a model that only requires a dictionary for training. Experiments show that our new method effectively learns coherent multilingual topics from partially and fully incomparable corpora with limited amounts of dictionary resources. C18-1220 @@ -2406,9 +2406,9 @@ Automatically Extracting Qualia Relations for the Rich Event Ontology GhazalehKazeminejad - ClaireBonial - Susan WindischBrown - MarthaPalmer + ClaireBonial + Susan WindischBrown + MarthaPalmer 2644–2652 Commonsense, real-world knowledge about the events that entities or “things in the world” are typically involved in, as well as part-whole relationships, is valuable for allowing computational systems to draw everyday inferences about the world. Here, we focus on automatically extracting information about (1) the events that typically bring about certain entities (origins), (2) the events that are the typical functions of entities, and (3) part-whole relationships in entities. These correspond to the agentive, telic and constitutive qualia central to the Generative Lexicon. We describe our motivations and methods for extracting these qualia relations from the Suggested Upper Merged Ontology (SUMO) and show that human annotators overwhelmingly find the information extracted to be reasonable. Because ontologies provide a way of structuring this information and making it accessible to agents and computational systems generally, efforts are underway to incorporate the extracted information to an ontology hub of Natural Language Processing semantic role labeling resources, the Rich Event Ontology. C18-1224 @@ -2416,7 +2416,7 @@ <fixed-case>S</fixed-case>e<fixed-case>V</fixed-case>e<fixed-case>N</fixed-case>: Augmenting Word Embeddings with Unsupervised Relation Vectors - LuisEspinosa-Anke + LuisEspinosa-Anke StevenSchockaert 2653–2665 We present SeVeN (Semantic Vector Networks), a hybrid resource that encodes relationships between words in the form of a graph. Different from traditional semantic networks, these relations are represented as vectors in a continuous vector space. We propose a simple pipeline for learning such relation vectors, which is based on word vector averaging in combination with an ad hoc autoencoder. We show that by explicitly encoding relational information in a dedicated vector space we can capture aspects of word meaning that are complementary to what is captured by word embeddings. For example, by examining clusters of relation vectors, we observe that relational similarities can be identified at a more abstract level than with traditional word vector differences. Finally, we test the effectiveness of semantic vector networks in two tasks: measuring word similarity and neural text categorization. SeVeN is available at bitbucket.org/luisespinosa/seven. @@ -2426,7 +2426,7 @@ Evaluation of Unsupervised Compositional Representations HananAldarmaki - MonaDiab + MonaDiab 2666–2677 We evaluated various compositional models, from bag-of-words representations to compositional RNN-based models, on several extrinsic supervised and unsupervised evaluation benchmarks. Our results confirm that weighted vector averaging can outperform context-sensitive models in most benchmarks, but structural features encoded in RNN models can also be useful in certain classification tasks. We analyzed some of the evaluation datasets to identify the aspects of meaning they measure and the characteristics of the various models that explain their performance variance. C18-1226 @@ -2435,7 +2435,7 @@ Using Formulaic Expressions in Writing Assistance Systems KenichiIwatsuki - AkikoAizawa + AkikoAizawa 2678–2689 Formulaic expressions (FEs) used in scholarly papers, such as ‘there has been little discussion about’, are helpful for non-native English speakers. However, it is time-consuming for users to manually search for an appropriate expression every time they want to consult FE dictionaries. For this reason, we tackle the task of semantic searches of FE dictionaries. At the start of our research, we identified two salient difficulties in this task. First, the paucity of example sentences in existing FE dictionaries results in a shortage of context information, which is necessary for acquiring semantic representation of FEs. Second, while a semantic category label is assigned to each FE in many FE dictionaries, it is difficult to predict the labels from user input, forcing users to manually designate the semantic category when searching. To address these difficulties, we propose a new framework for semantic searches of FEs and propose a new method to leverage both existing dictionaries and domain sentence corpora. Further, we expand an existing FE dictionary to consider building a more comprehensive and domain-specific FE dictionary and to verify the effectiveness of our method. C18-1227 @@ -2488,7 +2488,7 @@ JingjingGong XipengQiu ShaojingWang - XuanjingHuang + XuanjingHuang 2742–2752 While much progress has been made in how to encode a text sequence into a sequence of vectors, less attention has been paid to how to aggregate these preceding vectors (outputs of RNN/CNN) into fixed-size encoding vector. Usually, a simple max or average pooling is used, which is a bottom-up and passive way of aggregation and lack of guidance by task information. In this paper, we propose an aggregation mechanism to obtain a fixed-size encoding with a dynamic routing policy. The dynamic routing policy is dynamically deciding that what and how much information need be transferred from each word to the final encoding of the text sequence. Following the work of Capsule Network, we design two dynamic routing policies to aggregate the outputs of RNN/CNN encoding layer into a final encoding vector. Compared to the other aggregation methods, dynamic routing can refine the messages according to the state of final encoding vector. Experimental results on five text classification tasks show that our method outperforms other aggregating models by a significant margin. Related source code is released on our github page. Related source code is released on our github page. C18-1232 @@ -2532,7 +2532,7 @@ JosepCarmona HenrikLeopold JanMendling - LluísPadró + LluísPadró 2791–2801 The Business Process Management (BPM) field focuses in the coordination of labor so that organizational processes are smoothly executed in a way that products and services are properly delivered. At the same time, NLP has reached a maturity level that enables its widespread application in many contexts, thanks to publicly available frameworks. In this position paper, we show how NLP has potential in raising the benefits of BPM practices at different levels. Instead of being exhaustive, we show selected key challenges were a successful application of NLP techniques would facilitate the automation of particular tasks that nowadays require a significant effort to accomplish. Finally, we report on applications that consider both the process perspective and its enhancement through NLP. C18-1236 @@ -2543,7 +2543,7 @@ TirthankarGhosal VigneshEdithal AsifEkbal - PushpakBhattacharyya + PushpakBhattacharyya GeorgeTsatsaronis Srinivasa Satya Sameer KumarChivukula 2802–2813 @@ -2565,7 +2565,7 @@ JunwenDuan YueZhang XiaoDing - Ching-YunChang + Ching-YunChang TingLiu 2823–2833 Texts from the Internet serve as important data sources for financial market modeling. Early statistical approaches rely on manually defined features to capture lexical, sentiment and event information, which suffers from feature sparsity. Recent work has considered learning dense representations for news titles and abstracts. Compared to news titles, full documents can contain more potentially helpful information, but also noise compared to events and sentences, which has been less investigated in previous work. To fill this gap, we propose a novel target-specific abstract-guided news document representation model. The model uses a target-sensitive representation of the news abstract to weigh sentences in the news content, so as to select and combine the most informative sentences for market modeling. Results show that document representations can give better performance for estimating cumulative abnormal returns of companies when compared to titles and abstracts. Our model is especially effective when it used to combine information from multiple document sources compared to the sentence-level baselines. @@ -2594,7 +2594,7 @@ Towards a unified framework for bilingual terminology extraction of single-word and multi-word terms JingshuLiu EmmanuelMorin - PeñaSaldarriaga + PeñaSaldarriaga 2855–2866 Extracting a bilingual terminology for multi-word terms from comparable corpora has not been widely researched. In this work we propose a unified framework for aligning bilingual terms independently of the term lengths. We also introduce some enhancements to the context-based and the neural network based approaches. Our experiments show the effectiveness of our enhancements of previous works and the system can be adapted in specialized domains. C18-1242 @@ -2622,7 +2622,7 @@ Emotion Representation Mapping for Automatic Lexicon Construction (Mostly) Performs on Human Level - SvenBuechel + SvenBuechel UdoHahn 2892–2904 Emotion Representation Mapping (ERM) has the goal to convert existing emotion ratings from one representation format into another one, e.g., mapping Valence-Arousal-Dominance annotations for words or sentences into Ekman’s Basic Emotions and vice versa. ERM can thus not only be considered as an alternative to Word Emotion Induction (WEI) techniques for automatic emotion lexicon construction but may also help mitigate problems that come from the proliferation of emotion representation formats in recent years. We propose a new neural network approach to ERM that not only outperforms the previous state-of-the-art. Equally important, we present a refined evaluation methodology and gather strong evidence that our model yields results which are (almost) as reliable as human annotations, even in cross-lingual settings. Based on these results we generate new emotion ratings for 13 typologically diverse languages and claim that they have near-gold quality, at least. @@ -2632,7 +2632,7 @@ Emotion Detection and Classification in a Multigenre Corpus with Joint Multi-Task Deep Learning ShabnamTafreshi - MonaDiab + MonaDiab 2905–2913 Detection and classification of emotion categories expressed by a sentence is a challenging task due to subjectivity of emotion. To date, most of the models are trained and evaluated on single genre and when used to predict emotion in different genre their performance drops by a large margin. To address the issue of robustness, we model the problem within a joint multi-task learning framework. We train this model with a multigenre emotion corpus to predict emotions across various genre. Each genre is represented as a separate task, we use soft parameter shared layers across the various tasks. our experimental results show that this model improves the results across the various genres, compared to a single genre training in the same neural net architecture. C18-1246 @@ -2642,7 +2642,7 @@ How emotional are you? Neural Architectures for Emotion Intensity Prediction in Microblogs DevangKulshreshtha PranavGoel - AnilKumar Singh + AnilKumar Singh 2914–2926 Social media based micro-blogging sites like Twitter have become a common source of real-time information (impacting organizations and their strategies, and are used for expressing emotions and opinions. Automated analysis of such content therefore rises in importance. To this end, we explore the viability of using deep neural networks on the specific task of emotion intensity prediction in tweets. We propose a neural architecture combining convolutional and fully connected layers in a non-sequential manner - done for the first time in context of natural language based tasks. Combined with lexicon-based features along with transfer learning, our model achieves state-of-the-art performance, outperforming the previous system by 0.044 or 4.4% Pearson correlation on the WASSA’17 EmoInt shared task dataset. We investigate the performance of deep multi-task learning models trained for all emotions at once in a unified architecture and get encouraging results. Experiments performed on evaluating correlation between emotion pairs offer interesting insights into the relationship between them. C18-1247 @@ -2652,7 +2652,7 @@ Expressively vulgar: The socio-dynamics of vulgarity and its effects on sentiment analysis in social media IsabelCachola EricHolgate - DanielPreoţiuc-Pietro + DanielPreoţiuc-Pietro Junyi JessyLi 2927–2938 Vulgarity is a common linguistic expression and is used to perform several linguistic functions. Understanding their usage can aid both linguistic and psychological phenomena as well as benefit downstream natural language processing applications such as sentiment analysis. This study performs a large-scale, data-driven empirical analysis of vulgar words using social media data. We analyze the socio-cultural and pragmatic aspects of vulgarity using tweets from users with known demographics. Further, we collect sentiment ratings for vulgar tweets to study the relationship between the use of vulgar words and perceived sentiment and show that explicitly modeling vulgar words can boost sentiment analysis performance. @@ -2745,7 +2745,7 @@ Adaptive Weighting for Neural Machine Translation YachaoLi - JunhuiLi + JunhuiLi MinZhang 3038–3048 In the popular sequence to sequence (seq2seq) neural machine translation (NMT), there exist many weighted sum models (WSMs), each of which takes a set of input and generates one output. However, the weights in a WSM are independent of each other and fixed for all inputs, suggesting that by ignoring different needs of inputs, the WSM lacks effective control on the influence of each input. In this paper, we propose adaptive weighting for WSMs to control the contribution of each input. Specifically, we apply adaptive weighting for both GRU and the output state in NMT. Experimentation on Chinese-to-English translation and English-to-German translation demonstrates that the proposed adaptive weighting is able to much improve translation accuracy by achieving significant improvement of 1.49 and 0.92 BLEU points for the two translation tasks. Moreover, we discuss in-depth on what type of information is encoded in the encoder and how information influences the generation of target words in the decoder. @@ -2775,7 +2775,7 @@ An Empirical Investigation of Error Types in <fixed-case>V</fixed-case>ietnamese Parsing - QuyNguyen + QuyNguyen YusukeMiyao HiroshiNoji NhungNguyen @@ -2795,15 +2795,15 @@ Parallel Corpora for bi-lingual <fixed-case>E</fixed-case>nglish-<fixed-case>E</fixed-case>thiopian Languages Statistical Machine Translation - Solomon TeferraAbate + Solomon TeferraAbate MichaelMelese - Martha YifiruTachbelie + Martha YifiruTachbelie MillionMeshesha SolomonAtinafu WondwossenMulugeta YaregalAssabie HafteAbera - BinyamEphrem + BinyamEphrem TewodrosAbebe WondimagegnhueTsegaye AmanuelLemma @@ -2845,7 +2845,7 @@ deep<fixed-case>Q</fixed-case>uest: A Framework for Neural-based Quality Estimation JuliaIve - FrédéricBlain + FrédéricBlain LuciaSpecia 3146–3157 Predicting Machine Translation (MT) quality can help in many practical tasks such as MT post-editing. The performance of Quality Estimation (QE) methods has drastically improved recently with the introduction of neural approaches to the problem. However, thus far neural approaches have only been designed for word and sentence-level prediction. We present a neural framework that is able to accommodate neural QE approaches at these fine-grained levels and generalize them to the level of documents. We test the framework with two sentence-level neural QE approaches: a state of the art approach that requires extensive pre-training, and a new light-weight approach that we propose, which employs basic encoders. Our approach is significantly faster and yields performance improvements for a range of document-level quality estimation tasks. To our knowledge, this is the first neural architecture for document-level QE. In addition, for the first time we apply QE models to the output of both statistical and neural MT systems for a series of European languages and highlight the new challenges resulting from the use of neural MT. @@ -2856,7 +2856,7 @@ Butterfly Effects in Frame Semantic Parsing: impact of data processing on model ranking AlexandreKabbach CorentinRibeyre - AurélieHerbelot + AurélieHerbelot 3158–3169 Knowing the state-of-the-art for a particular task is an essential component of any computational linguistics investigation. But can we be truly confident that the current state-of-the-art is indeed the best performing model? In this paper, we study the case of frame semantic parsing, a well-established task with multiple shared datasets. We show that in spite of all the care taken to provide a standard evaluation resource, small variations in data processing can have dramatic consequences for ranking parser performance. This leads us to propose an open-source standardized processing pipeline, which can be shared and reused for robust model comparison. C18-1267 @@ -2874,7 +2874,7 @@ Sentence Weighting for Neural Machine Translation Domain Adaptation ShiqiZhang - DeyiXiong + DeyiXiong 3181–3190 In this paper, we propose a new sentence weighting method for the domain adaptation of neural machine translation. We introduce a domain similarity metric to evaluate the relevance between a sentence and an available entire domain dataset. The similarity of each sentence to the target domain is calculated with various methods. The computed similarity is then integrated into the training objective to weight sentences. The adaptation results on both IWSLT Chinese-English TED task and a task with only synthetic training parallel data show that our sentence weighting method is able to achieve an significant improvement over strong baselines. C18-1269 @@ -2905,7 +2905,7 @@ Revisiting the Hierarchical Multiscale <fixed-case>LSTM</fixed-case> ÁkosKádár Marc-AlexandreCôté - GrzegorzChrupała + GrzegorzChrupała AfraAlishahi 3215–3227 Hierarchical Multiscale LSTM (Chung et. al., 2016) is a state-of-the-art language model that learns interpretable structure from character-level input. Such models can provide fertile ground for (cognitive) computational linguistics studies. However, the high complexity of the architecture, training and implementations might hinder its applicability. We provide a detailed reproduction and ablation study of the architecture, shedding light on some of the potential caveats of re-purposing complex deep-learning architectures. We further show that simplifying certain aspects of the architecture can in fact improve its performance. We also investigate the linguistic units (segments) learned by various levels of the model, and argue that their quality does not correlate with the overall performance of the model on language modeling. @@ -2917,7 +2917,7 @@ ChanheeLee Young-BumKim DongyubLee - HeuiseokLim + HeuiseokLim 3228–3239 Generating character-level features is an important step for achieving good results in various natural language processing tasks. To alleviate the need for human labor in generating hand-crafted features, methods that utilize neural architectures such as Convolutional Neural Network (CNN) or Recurrent Neural Network (RNN) to automatically extract such features have been proposed and have shown great results. However, CNN generates position-independent features, and RNN is slow since it needs to process the characters sequentially. In this paper, we propose a novel method of using a densely connected network to automatically extract character-level features. The proposed method does not require any language or task specific assumptions, and shows robustness and effectiveness while being faster than CNN- or RNN-based methods. Evaluating this method on three sequence labeling tasks - slot tagging, Part-of-Speech (POS) tagging, and Named-Entity Recognition (NER) - we obtain state-of-the-art performance with a 96.62 F1-score and 97.73% accuracy on slot tagging and POS tagging, respectively, and comparable performance to the state-of-the-art 91.13 F1-score on NER. C18-1273 @@ -2929,7 +2929,7 @@ AkihiroTamura TakashiNinomiya HiroyaTakamura - ManabuOkumura + ManabuOkumura 3240–3250 This study proposes a new neural machine translation (NMT) model based on the encoder-decoder model that incorporates named entity (NE) tags of source-language sentences. Conventional NMT models have two problems enumerated as follows: (i) they tend to have difficulty in translating words with multiple meanings because of the high ambiguity, and (ii) these models’abilitytotranslatecompoundwordsseemschallengingbecausetheencoderreceivesaword, a part of the compound word, at each time step. To alleviate these problems, the encoder of the proposed model encodes the input word on the basis of its NE tag at each time step, which could reduce the ambiguity of the input word. Furthermore,the encoder introduces a chunk-level LSTM layer over a word-level LSTM layer and hierarchically encodes a source-language sentence to capture a compound NE as a chunk on the basis of the NE tags. We evaluate the proposed model on an English-to-Japanese translation task with the ASPEC, and English-to-Bulgarian and English-to-Romanian translation tasks with the Europarl corpus. The evaluation results show that the proposed model achieves up to 3.11 point improvement in BLEU. C18-1274 @@ -2976,8 +2976,8 @@ Integrating Question Classification and Deep Learning for improved Answer Selection HarishTayyar Madabushi - MarkLee - JohnBarnden + MarkLee + JohnBarnden 3283–3294 We present a system for Answer Selection that integrates fine-grained Question Classification with a Deep Learning model designed for Answer Selection. We detail the necessary changes to the Question Classification taxonomy and system, the creation of a new Entity Identification system and methods of highlighting entities to achieve this objective. Our experiments show that Question Classes are a strong signal to Deep Learning models for Answer Selection, and enable us to outperform the current state of the art in all variations of our experiments except one. In the best configuration, our MRR and MAP scores outperform the current state of the art by between 3 and 5 points on both versions of the TREC Answer Selection test set, a standard dataset for this task. C18-1278 @@ -2990,7 +2990,7 @@ MinYang YaliangLi NanDu - WeiFan + WeiFan KaiLei 3295–3305 Answer selection is an important but challenging task. Significant progresses have been made in domains where a large amount of labeled training data is available. However, obtaining rich annotated data is a time-consuming and expensive process, creating a substantial barrier for applying answer selection models to a new domain which has limited labeled data. In this paper, we propose Knowledge-aware Attentive Network (KAN), a transfer learning framework for cross-domain answer selection, which uses the knowledge base as a bridge to enable knowledge transfer from the source domain to the target domains. Specifically, we design a knowledge module to integrate the knowledge-based representational learning into answer selection models. The learned knowledge-based representations are shared by source and target domains, which not only leverages large amounts of cross-domain data, but also benefits from a regularization effect that leads to more general representations to help tasks in new domains. To verify the effectiveness of our model, we use SQuAD-T dataset as the source domain and three other datasets (i.e., Yahoo QA, TREC QA and InsuranceQA) as the target domains. The experimental results demonstrate that KAN has remarkable applicability and generality, and consistently outperforms the strong competitors by a noticeable margin for cross-domain answer selection. @@ -3041,7 +3041,7 @@ SebastianDungs AhmetAker NorbertFuhr - KalinaBontcheva + KalinaBontcheva 3360–3370 Prior manual studies of rumours suggested that crowd stance can give insights into the actual rumour veracity. Even though numerous studies of automatic veracity classification of social media rumours have been carried out, none explored the effectiveness of leveraging crowd stance to determine veracity. We use stance as an additional feature to those commonly used in earlier studies. We also model the veracity of a rumour using variants of Hidden Markov Models (HMM) and the collective stance information. This paper demonstrates that HMMs that use stance and tweets’ times as the only features for modelling true and false rumours achieve F1 scores in the range of 80%, outperforming those approaches where stance is used jointly with content and user based features. C18-1284 @@ -3061,7 +3061,7 @@ Predicting Stances from Social Media Posts using Factorization Machines AkiraSasaki KazuakiHanawa - NaoakiOkazaki + NaoakiOkazaki KentaroInui 3381–3390 Social media provide platforms to express, discuss, and shape opinions about events and issues in the real world. An important step to analyze the discussions on social media and to assist in healthy decision-making is stance detection. This paper presents an approach to detect the stance of a user toward a topic based on their stances toward other topics and the social media posts of the user. We apply factorization machines, a widely used method in item recommendation, to model user preferences toward topics from the social media data. The experimental results demonstrate that users’ posts are useful to model topic preferences and therefore predict stances of silent users. @@ -3073,7 +3073,7 @@ VerónicaPérez-Rosas BennettKleinberg AlexandraLefevre - RadaMihalcea + RadaMihalcea 3391–3401 The proliferation of misleading information in everyday access media outlets such as social media feeds, news blogs, and online newspapers have made it challenging to identify trustworthy news sources, thus increasing the need for computational tools able to provide insights into the reliability of online content. In this paper, we focus on the automatic identification of fake content in online news. Our contribution is twofold. First, we introduce two novel datasets for the task of fake news detection, covering seven different news domains. We describe the collection, annotation, and validation process in detail and present several exploratory analyses on the identification of linguistic differences in fake and legitimate news content. Second, we conduct a set of learning experiments to build accurate fake news detectors, and show that we can achieve accuracies of up to 76%. In addition, we provide comparative analyses of the automatic and manual identification of fake news. C18-1287 @@ -3094,7 +3094,7 @@ PaulGroth MikeLauruhn AntonyScerri - RonDaniel Jr. + RonDaniel Jr. 3414–3423 Open Information Extraction (OIE) is the task of the unsupervised creation of structured information from text. OIE is often used as a starting point for a number of downstream tasks including knowledge base construction, relation extraction, and question answering. While OIE methods are targeted at being domain independent, they have been evaluated primarily on newspaper, encyclopedic or general web text. In this article, we evaluate the performance of OIE on scientific texts originating from 10 different disciplines. To do so, we use two state-of-the-art OIE systems using a crowd-sourcing approach. We find that OIE systems perform significantly worse on scientific text than encyclopedic text. We also provide an error analysis and suggest areas of work to reduce errors. Our corpus of sentences and judgments are made available. C18-1289 @@ -3102,7 +3102,7 @@ Simple Algorithms For Sentiment Analysis On Sentiment Rich, Data Poor Domains. - PrathushaK Sarma + PrathushaK Sarma WilliamSethares 3424–3435 Standard word embedding algorithms learn vector representations from large corpora of text documents in an unsupervised fashion. However, the quality of word embeddings learned from these algorithms is affected by the size of training data sets. Thus, applications of these algorithms in domains with only moderate amounts of available data is limited. In this paper we introduce an algorithm that learns word embeddings jointly with a classifier. Our algorithm is called SWESA (Supervised Word Embeddings for Sentiment Analysis). SWESA leverages document label information to learn vector representations of words from a modest corpus of text documents by solving an optimization problem that minimizes a cost function with respect to both word embeddings and the weight vector used for classification. Experiments on several real world data sets show that SWESA has superior performance on domains with limited data, when compared to previously suggested approaches to word embeddings and sentiment analysis tasks. @@ -3112,7 +3112,7 @@ Word-Level Loss Extensions for Neural Temporal Relation Classification ArtuurLeeuwenberg - Marie-FrancineMoens + Marie-FrancineMoens 3436–3447 Unsupervised pre-trained word embeddings are used effectively for many tasks in natural language processing to leverage unlabeled textual data. Often these embeddings are either used as initializations or as fixed word representations for task-specific classification models. In this work, we extend our classification model’s task loss with an unsupervised auxiliary loss on the word-embedding level of the model. This is to ensure that the learned word representations contain both task-specific features, learned from the supervised loss component, and more general features learned from the unsupervised loss component. We evaluate our approach on the task of temporal relation extraction, in particular, narrative containment relation extraction from clinical records, and show that continued training of the embeddings on the unsupervised objective together with the task objective gives better task-specific embeddings, and results in an improvement over the state of the art on the THYME dataset, using only a general-domain part-of-speech tagger as linguistic resource. C18-1291 @@ -3121,7 +3121,7 @@ Personalized Text Retrieval for Learners of <fixed-case>C</fixed-case>hinese as a Foreign Language Chak YanYeung - JohnLee + JohnLee 3448–3455 This paper describes a personalized text retrieval algorithm that helps language learners select the most suitable reading material in terms of vocabulary complexity. The user first rates their knowledge of a small set of words, chosen by a graph-based active learning model. The system trains a complex word identification model on this set, and then applies the model to find texts that contain the desired proportion of new, challenging, and familiar vocabulary. In an evaluation on learners of Chinese as a foreign language, we show that this algorithm is effective in identifying simpler texts for low-proficiency learners, and more challenging ones for high-proficiency learners. C18-1292 @@ -3130,7 +3130,7 @@ Punctuation as Native Language Interference IliaMarkov - ViviNastase + ViviNastase CarloStrapparava 3456–3466 In this paper, we describe experiments designed to explore and evaluate the impact of punctuation marks on the task of native language identification. Punctuation is specific to each language, and is part of the indicators that overtly represent the manner in which each language organizes and conveys information. Our experiments are organized in various set-ups: the usual multi-class classification for individual languages, also considering classification by language groups, across different proficiency levels, topics and even cross-corpus. The results support our hypothesis that punctuation marks are persistent and robust indicators of the native language of the author, which do not diminish in influence even when a high proficiency level in a non-native language is achieved. @@ -3141,7 +3141,7 @@ Investigating Productive and Receptive Knowledge: A Profile for Second Language Learning LeonardoZilio RodrigoWilkens - CédrickFairon + CédrickFairon 3467–3478 The literature frequently addresses the differences in receptive and productive vocabulary, but grammar is often left unacknowledged in second language acquisition studies. In this paper, we used two corpora to investigate the divergences in the behavior of pedagogically relevant grammatical structures in reception and production texts. We further improved the divergence scores observed in this investigation by setting a polarity to them that indicates whether there is overuse or underuse of a grammatical structure by language learners. This led to the compilation of a language profile that was later combined with vocabulary and readability features for classifying reception and production texts in three classes: beginner, intermediate, and advanced. The results of the automatic classification task in both production (0.872 of F-measure) and reception (0.942 of F-measure) were comparable to the current state of the art. We also attempted to automatically attribute a score to texts produced by learners, and the correlation results were encouraging, but there is still a good amount of room for improvement in this task. The developed language profile will serve as input for a system that helps language learners to activate more of their passive knowledge in writing texts. C18-1294 @@ -3163,8 +3163,8 @@ ShengXu XiaominChu PeifengLi - QiaomingZhu - GuodongZhou + QiaomingZhu + GuodongZhou 3493–3504 In view of the differences between the annotations of micro and macro discourse rela-tionships, this paper describes the relevant experiments on the construction of the Macro Chinese Discourse Treebank (MCDTB), a higher-level Chinese discourse corpus. Fol-lowing RST (Rhetorical Structure Theory), we annotate the macro discourse information, including discourse structure, nuclearity and relationship, and the additional discourse information, including topic sentences, lead and abstract, to make the macro discourse annotation more objective and accurate. Finally, we annotated 720 articles with a Kappa value greater than 0.6. Preliminary experiments on this corpus verify the computability of MCDTB. C18-1296 @@ -3183,7 +3183,7 @@ Bridging resolution: Task definition, corpus resources and rule-based experiments - InaRoesiger + InaRoesiger ArndtRiester JonasKuhn 3516–3528 @@ -3208,7 +3208,7 @@ <fixed-case>ISO</fixed-case>-Standard Domain-Independent Dialogue Act Tagging for Conversational Agents StefanoMezza AlessandraCervone - EvgenyStepanov + EvgenyStepanov GiulianoTortoreto GiuseppeRiccardi 3539–3551 @@ -3265,8 +3265,8 @@ YuZhou JiajunZhang LiangZhao - Mei-YuhHwang - ChengqingZong + Mei-YuhHwang + ChengqingZong 3597–3607 To deploy a spoken language understanding (SLU) model to a new language, language transferring is desired to avoid the trouble of acquiring and labeling a new big SLU corpus. An SLU corpus is a monolingual corpus with domain/intent/slot labels. Translating the original SLU corpus into the target language is an attractive strategy. However, SLU corpora consist of plenty of semantic labels (slots), which general-purpose translators cannot handle well, not to mention additional culture differences. This paper focuses on the language transferring task given a small in-domain parallel SLU corpus. The in-domain parallel corpus can be used as the first adaptation on the general translator. But more importantly, we show how to use reinforcement learning (RL) to further adapt the adapted translator, where translated sentences with more proper slot tags receive higher rewards. Our reward is derived from the source input sentence exclusively, unlike reward via actor-critical methods or computing reward with a ground truth target sentence. Hence we can adapt the translator the second time, using the big monolingual SLU corpus from the source language. We evaluate our approach on Chinese to English language transferring for SLU systems. The experimental results show that the generated English SLU corpus via adaptation and reinforcement learning gives us over 97% in the slot F1 score and over 84% accuracy in domain classification. It demonstrates the effectiveness of the proposed language transferring method. Compared with naive translation, our proposed method improves domain classification accuracy by relatively 22%, and the slot filling F1 score by relatively more than 71%. C18-1305 @@ -3288,7 +3288,7 @@ Adaptive Multi-Task Transfer Learning for <fixed-case>C</fixed-case>hinese Word Segmentation in Medical Text JunjieXing - KennyZhu + KennyZhu ShaodianZhang 3619–3630 Chinese word segmentation (CWS) trained from open source corpus faces dramatic performance drop when dealing with domain text, especially for a domain with lots of special terms and diverse writing styles, such as the biomedical domain. However, building domain-specific CWS requires extremely high annotation cost. In this paper, we propose an approach by exploiting domain-invariant knowledge from high resource to low resource domains. Extensive experiments show that our model achieves consistently higher accuracy than the single-task CWS and other transfer learning baselines, especially when there is a large disparity between source and target domains. @@ -3309,7 +3309,7 @@ Graph Based Decoding for Event Sequencing and Coreference Resolution ZhengzhongLiu TerukoMitamura - EduardHovy + EduardHovy 3645–3657 Events in text documents are interrelated in complex ways. In this paper, we study two types of relation: Event Coreference and Event Sequencing. We show that the popular tree-like decoding structure for automated Event Coreference is not suitable for Event Sequencing. To this end, we propose a graph-based decoding algorithm that is applicable to both tasks. The new decoding algorithm supports flexible feature sets for both tasks. Empirically, our event coreference system has achieved state-of-the-art performance on the TAC-KBP 2015 event coreference task and our event sequencing system beats a strong temporal-based, oracle-informed baseline. We discuss the challenges of studying these event relations. C18-1309 @@ -3320,7 +3320,7 @@ Emielvan Miltenburg ÁkosKádár RuudKoolen - EmielKrahmer + EmielKrahmer 3658–3669 We present a corpus of spoken Dutch image descriptions, paired with two sets of eye-tracking data: Free viewing, where participants look at images without any particular purpose, and Description viewing, where we track eye movements while participants produce spoken descriptions of the images they are viewing. This paper describes the data collection procedure and the corpus itself, and provides an initial analysis of self-corrections in image descriptions. We also present two studies showing the potential of this data. Though these studies mainly serve as an example, we do find two interesting results: (1) the eye-tracking data for the description viewing task is more coherent than for the free-viewing task; (2) variation in image descriptions (also called ‘image specificity’; Jas and Parikh, 2015) is only moderately correlated across different languages. Our corpus can be used to gain a deeper understanding of the image description task, particularly how visual attention is correlated with the image description process. C18-1310 @@ -3328,7 +3328,7 @@ Narrative Schema Stability in News Text - DanSimonson + DanSimonson AnthonyDavis 3670–3680 We investigate the stability of narrative schemas (Chambers and Jurafsky, 2009) automatically induced from a news corpus, representing recurring narratives in a corpus. If such techniques produce meaningful results, we should expect that small changes to the corpus will result in only small changes to the induced schemas. We describe experiments involving successive ablation of a corpus and cross-validation at each stage of ablation, on schemas generated by three different techniques over a general news corpus and topically-specific subcorpora. We also develop a method for evaluating the similarity between sets of narrative schemas, and thus the stability of the schema induction algorithms. This stability analysis affirms the heterogeneous/homogeneous document category hypothesis first presented in Simonson and Davis (2016), whose technique is problematically limited. Additionally, increased ablation leads to increasing stability, so the smaller the remaining corpus, the more stable schema generation appears to be. We surmise that as a corpus grows larger, novel and more varied narratives continue to appear and stability declines, though at some point this decline levels off as new additions to the corpus consist essentially of “more of the same.” @@ -3338,7 +3338,7 @@ <fixed-case>NIPS</fixed-case> Conversational Intelligence Challenge 2017 Winner System: Skill-based Conversational Agent with Supervised Dialog Manager IdrisYusupov - YuriiKuratov + YuriiKuratov 3681–3692 We present bot#1337: a dialog system developed for the 1st NIPS Conversational Intelligence Challenge 2017 (ConvAI). The aim of the competition was to implement a bot capable of conversing with humans based on a given passage of text. To enable conversation, we implemented a set of skills for our bot, including chit-chat, topic detection, text summarization, question answering and question generation. The system has been trained in a supervised setting using a dialogue manager to select an appropriate skill for generating a response. The latter allows a developer to focus on the skill implementation rather than the finite state machine based dialog manager. The proposed system bot#1337 won the competition with an average dialogue quality score of 2.78 out of 5 given by human evaluators. Source code and trained models for the bot#1337 are available on GitHub. C18-1312 @@ -3351,7 +3351,7 @@ KiraGriffitt UlfHermjakob KevinKnight - MarthaPalmer + MarthaPalmer 3693–3702 There are few corpora that endeavor to represent the semantic content of entire documents. We present a corpus that accomplishes one way of capturing document level semantics, by annotating coreference and similar phenomena (bridging and implicit roles) on top of gold Abstract Meaning Representations of sentence-level semantics. We present a new corpus of this annotation, with analysis of its quality, alongside a plausible baseline for comparison. It is hoped that this Multi-Sentence AMR corpus (MS-AMR) may become a feasible method for developing rich representations of document meaning, useful for tasks such as information extraction and question answering. C18-1313 @@ -3364,7 +3364,7 @@ XiangkunHu YangLiu QiZhang - XuanjingHuang + XuanjingHuang 3703–3714 In this paper, we investigate the issue of persuasiveness evaluation for argumentative comments. Most of the existing research explores different text features of reply comments on word level and ignores interactions between participants. In general, viewpoints are usually expressed by multiple arguments and exchanged on argument level. To better model the process of dialogical argumentation, we propose a novel co-attention mechanism based neural network to capture the interactions between participants on argument level. Experimental results on a publicly available dataset show that the proposed model significantly outperforms some state-of-the-art methods for persuasiveness evaluation. Further analysis reveals that attention weights computed in our model are able to extract interactive argument pairs from the original post and the reply. C18-1314 @@ -3372,7 +3372,7 @@ Learning Visually-Grounded Semantics from Contrastive Adversarial Samples - HaoyueShi + HaoyueShi JiayuanMao TeteXiao YuningJiang @@ -3409,7 +3409,7 @@ HenningWachsmuth ManfredStede RoxanneEl Baff - KhalidAl-Khatib + KhalidAl-Khatib MariaSkeppstedt BennoStein 3753–3765 @@ -3422,7 +3422,7 @@ SumanBanerjee NikitaMoghe SiddharthaArora - Mitesh M.Khapra + Mitesh M.Khapra 3766–3780 There is an increasing demand for goal-oriented conversation systems which can assist users in various day-to-day activities such as booking tickets, restaurant reservations, shopping, etc. Most of the existing datasets for building such conversation systems focus on monolingual conversations and there is hardly any work on multilingual and/or code-mixed conversations. Such datasets and systems thus do not cater to the multilingual regions of the world, such as India, where it is very common for people to speak more than one language and seamlessly switch between them resulting in code-mixed conversations. For example, a Hindi speaking user looking to book a restaurant would typically ask, “Kya tum is restaurant mein ek table book karne mein meri help karoge?” (“Can you help me in booking a table at this restaurant?”). To facilitate the development of such code-mixed conversation models, we build a goal-oriented dialog dataset containing code-mixed conversations. Specifically, we take the text from the DSTC2 restaurant reservation dataset and create code-mixed versions of it in Hindi-English, Bengali-English, Gujarati-English and Tamil-English. We also establish initial baselines on this dataset using existing state of the art models. This dataset along with our baseline implementations will be made publicly available for research purposes. C18-1319 @@ -3497,7 +3497,7 @@ A Survey on Open Information Extraction ChristinaNiklaus MatthiasCetto - AndréFreitas + AndréFreitas SiegfriedHandschuh 3866–3878 We provide a detailed overview of the various approaches that were proposed to date to solve the task of Open Information Extraction. We present the major challenges that such systems face, show the evolution of the suggested approaches over time and depict the specific issues they address. In addition, we provide a critique of the commonly applied evaluation procedures for assessing the performance of Open IE systems and highlight some directions for future work. @@ -3563,7 +3563,7 @@ Abbreviation Expander - a Web-based System for Easy Reading of Technical Documents - Manuel R.Ciosici + Manuel R.Ciosici IraAssent 1–4 Abbreviations and acronyms are a part of textual communication in most domains. However, abbreviations are not necessarily defined in documents that employ them. Understanding all abbreviations used in a given document often requires extensive knowledge of the target domain and the ability to disambiguate based on context. This creates considerable entry barriers to newcomers and difficulties in automated document processing. Existing abbreviation expansion systems or tools require substantial technical knowledge for set up or make strong assumptions which limit their use in practice. Here, we present Abbreviation Expander, a system that builds on state of the art methods for identification of abbreviations, acronyms and their definitions and a novel disambiguator for abbreviation expansion in an easily accessible web-based solution. @@ -3585,7 +3585,7 @@ <fixed-case>J</fixed-case>e<fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>: Interleaving Semantics and Emotions in a Web Service for the Exploration of Language Change Phenomena JohannesHellrich - SvenBuechel + SvenBuechel UdoHahn 10–14 We here introduce a substantially extended version of JeSemE, an interactive website for visually exploring computationally derived time-variant information on word meanings and lexical emotions assembled from five large diachronic text corpora. JeSemE is designed for scholars in the (digital) humanities as an alternative to consulting manually compiled, printed dictionaries for such information (if available at all). This tool uniquely combines state-of-the-art distributional semantics with a nuanced model of human emotions, two information streams we deem beneficial for a data-driven interpretation of texts in the humanities. @@ -3607,11 +3607,11 @@ A <fixed-case>K</fixed-case>orean Knowledge Extraction System for Enriching a <fixed-case>KB</fixed-case>ox SanghaNam - Eun-kyungKim + Eun-kyungKim JihoKim YoosungJung KijongHan - Key-SunChoi + Key-SunChoi 20–24 The increased demand for structured knowledge has created considerable interest in knowledge extraction from natural language sentences. This study presents a new Korean knowledge extraction system and web interface for enriching a KBox knowledge base that expands based on the Korean DBpedia. The aim is to create an endpoint where knowledge can be extracted and added to KBox anytime and anywhere. C18-2005 @@ -3657,7 +3657,7 @@ <fixed-case>CRST</fixed-case>: a Claim Retrieval System in <fixed-case>T</fixed-case>witter WenjiaMa - WenHanChao + WenHanChao ZhunchenLuo XinJiang 43–47 @@ -3667,10 +3667,10 @@ Utilizing Graph Measure to Deduce Omitted Entities in Paragraphs - Eun-kyungKim + Eun-kyungKim KijongHan JihoKim - Key-SunChoi + Key-SunChoi 48–52 This demo deals with the problem of capturing omitted arguments in relation extraction given a proper knowledge base for entities of interest. This paper introduces the concept of a salient entity and use this information to deduce omitted entities in the paragraph which allows improving the relation extraction quality. The main idea to compute salient entities is to construct a graph on the given information (by identifying the entities but without parsing it), rank it with standard graph measures and embed it in the context of the sentences. C18-2011 @@ -3678,7 +3678,7 @@ Transparent, Efficient, and Robust Word Embedding Access with <fixed-case>WOMBAT</fixed-case> - Mark-ChristophMüller + Mark-ChristophMüller MichaelStrube 53–57 We present WOMBAT, a Python tool which supports NLP practitioners in accessing word embeddings from code. WOMBAT addresses common research problems, including unified access, scaling, and robust and reproducible preprocessing. Code that uses WOMBAT for accessing word embeddings is not only cleaner, more readable, and easier to reuse, but also much more efficient than code using standard in-memory methods: a Python script using WOMBAT for evaluating seven large word embedding collections (8.7M embedding vectors in total) on a simple SemEval sentence similarity task involving 250 raw sentence pairs completes in under ten seconds end-to-end on a standard notebook computer. @@ -3717,8 +3717,8 @@ Simulating Language Evolution: a Tool for Historical Linguistics - Alina MariaCiobanu - Liviu P.Dinu + Alina MariaCiobanu + Liviu P.Dinu 68–72 Language change across space and time is one of the main concerns in historical linguistics. In this paper, we develop a language evolution simulator: a web-based tool for word form production to assist in historical linguistics, in studying the evolution of the languages. Given a word in a source language, the system automatically predicts how the word evolves in a target language. The method that we propose is language-agnostic and does not use any external knowledge, except for the training word pairs. C18-2015 @@ -3751,7 +3751,7 @@ Yu-ChunLo Jhih-JieChen ChingyuYang - JasonChang + JasonChang 82–85 This paper presents a grammatical error correction (GEC) system that provides corrective feedback for essays. We apply the sequence-to-sequence model, which is frequently used in machine translation and text summarization, to this GEC task. The model is trained by EF-Cambridge Open Language Database (EFCAMDAT), a large learner corpus annotated with grammatical errors and corrections. Evaluation shows that our system achieves competitive performance on a number of publicly available testsets. C18-2018 @@ -3772,12 +3772,12 @@ MarkusMüller JanNiehues Thai-SonNguyen - Ngoc-QuanPham + Ngoc-QuanPham ElizabethSalesky MatthiasSperber - SebastianStüker + SebastianStüker ThomasZenkel - AlexanderWaibel + AlexanderWaibel 89–93 In today’s globalized world we have the ability to communicate with people across the world. However, in many situations the language barrier still presents a major issue. For example, many foreign students coming to KIT to study are initially unable to follow a lecture in German. Therefore, we offer an automatic simultaneous interpretation service for students. To fulfill this task, we have developed a low-latency translation system that is adapted to lectures and covers several language pairs. While the switch from traditional Statistical Machine Translation to Neural Machine Translation (NMT) significantly improved performance, to integrate NMT into the speech translation framework required several adjustments. We have addressed the run-time constraints and different types of input. Furthermore, we utilized one-shot learning to easily add new topic-specific terms to the system. Besides better performance, NMT also enabled us increase our covered languages through multilingual NMT. % Combining these techniques, we are able to provide an adapted speech translation system for several European languages. C18-2020 @@ -3787,7 +3787,7 @@ <fixed-case>G</fixed-case>raphene: a Context-Preserving Open Information Extraction System MatthiasCetto ChristinaNiklaus - AndréFreitas + AndréFreitas SiegfriedHandschuh 94–98 We introduce Graphene, an Open IE system whose goal is to generate accurate, meaningful and complete propositions that may facilitate a variety of downstream semantic applications. For this purpose, we transform syntactically complex input sentences into clean, compact structures in the form of core facts and accompanying contexts, while identifying the rhetorical relations that hold between them in order to maintain their semantic relationship. In that way, we preserve the context of the relational tuples extracted from a source sentence, generating a novel lightweight semantic representation for Open IE that enhances the expressiveness of the extracted propositions. @@ -3799,7 +3799,7 @@ Shang-ChienCheng Jhih-JieChen ChingyuYang - JasonChang + JasonChang 99–102 In this paper, we present a system, LanguageNet, which can help second language learners to search for different meanings and usages of a word. We disambiguate word senses based on the pairs of an English word and its corresponding Chinese translations in a parallel corpus, UM-Corpus. The process involved performing word alignment, learning vector space representations of words and training a classifier to distinguish words into groups of senses. LanguageNet directly shows the definition of a sense, bilingual synonyms and sense relevant examples. C18-2022 @@ -3882,7 +3882,7 @@ <fixed-case>WARP</fixed-case>-Text: a Web-Based Tool for Annotating Relationships between Pairs of Texts VenelinKovatchev - M. AntòniaMartí + M. AntòniaMartí MariaSalamó 132–136 We present WARP-Text, an open-source web-based tool for annotating relationships between pairs of texts. WARP-Text supports multi-layer annotation and custom definitions of inter-textual and intra-textual relationships. Annotation can be performed at different granularity levels (such as sentences, phrases, or tokens). WARP-Text has an intuitive user-friendly interface both for project managers and annotators. WARP-Text fills a gap in the currently available NLP toolbox, as open-source alternatives for annotation of pairs of text are not readily available. WARP-Text has already been used in several annotation tasks and can be of interest to the researchers working in the areas of Paraphrasing, Entailment, Simplification, and Summarization, among others. @@ -3914,7 +3914,7 @@ XinJiang HaiYe ZhunchenLuo - WenHanChao + WenHanChao WenjiaMa 146–151 This paper proposes a neural based system to solve the essential interpretability problem existing in text classification, especially in charge prediction task. First, we use a deep reinforcement learning method to extract rationales which mean short, readable and decisive snippets from input text. Then a rationale augmented classification model is proposed to elevate the prediction accuracy. Naturally, the extracted rationales serve as the introspection explanation for the prediction result of the model, enhancing the transparency of the model. Experimental results demonstrate that our system is able to extract readable rationales in a high consistency with manual annotation and is comparable with the attention model in prediction accuracy. @@ -3926,7 +3926,7 @@ ShehrozeKhan JihyunKim TarikZulfikarpasic - PeterChen + PeterChen NizarHabash 152–156 We present Qutr (Query Translator), a smart cross-lingual communication application for the travel domain. Qutr is a real-time messaging app that automatically translates conversations while supporting keyword-to-sentence matching. Qutr relies on querying a database that holds commonly used pre-translated travel-domain phrases and phrase templates in different languages with the use of keywords. The query matching supports paraphrases, incomplete keywords and some input spelling errors. The application addresses common cross-lingual communication issues such as translation accuracy, speed, privacy, and personalization. @@ -3948,7 +3948,7 @@ Quynh Ngoc ThiDo ArtuurLeeuwenberg GeertHeyman - Marie-FrancineMoens + Marie-FrancineMoens 161–165 This paper presents a flexible and open source framework for deep semantic role labeling. We aim at facilitating easy exploration of model structures for multiple languages with different characteristics. It provides flexibility in its model construction in terms of word representation, sequence representation, output modeling, and inference styles and comes with clear output visualization. The framework is available under the Apache 2.0 license. C18-2035 @@ -3959,8 +3959,8 @@ Proceedings of the 27th International Conference on Computational Linguistics: Tutorial Abstracts C18-3 - DoniaScott - MarilynWalker + DoniaScott + MarilynWalker PascaleFung Association for Computational Linguistics
Santa Fe, New Mexico, USA
@@ -3976,7 +3976,7 @@ <fixed-case>NLP</fixed-case> for Conversations: Sentiment, Summarization, and Group Dynamics GabrielMurray GiuseppeCarenini - ShafiqJoty + ShafiqJoty 1–4 C18-3001 murray-etal-2018-nlp @@ -3984,16 +3984,16 @@ Practical Parsing for Downstream Applications DanielDakota - SandraKübler + SandraKübler 5–7 C18-3002 dakota-kubler-2018-practical Frame Semantics across Languages: Towards a Multilingual <fixed-case>F</fixed-case>rame<fixed-case>N</fixed-case>et - Collin F.Baker - MichaelEllsworth - Miriam R. L.Petruck + Collin F.Baker + MichaelEllsworth + Miriam R. L.Petruck SwabhaSwayamdipta 9–12 C18-3003 @@ -4001,7 +4001,7 @@ Deep <fixed-case>B</fixed-case>ayesian Learning and Understanding - Jen-TzungChien + Jen-TzungChien 13–18 C18-3004 C18-3004.Presentation.pdf @@ -4009,7 +4009,7 @@ Data-Driven Text Simplification - SanjaŠtajner + SanjaŠtajner HoracioSaggion 19–23 C18-3005 @@ -4020,7 +4020,7 @@ Deep Learning for Dialogue Systems Yun-NungChen AsliCelikyilmaz - DilekHakkani-Tür + DilekHakkani-Tür 25–31 C18-3006 chen-etal-2018-deep diff --git a/data/xml/C65.xml b/data/xml/C65.xml index f57493da41..9ad9cf37a3 100644 --- a/data/xml/C65.xml +++ b/data/xml/C65.xml @@ -91,7 +91,7 @@ Endocentric Constructions and the <fixed-case>C</fixed-case>ocke Parsing Logic - Jane J.Robinson + Jane J.Robinson C65-1020 robinson-1965-endocentric @@ -104,19 +104,19 @@ Sentence Generation by Semantic Concordance ToshiyukiSakai - MakotoNagao + MakotoNagao C65-1022 sakai-nagao-1965-sentence Generation, Production, and Translation - PetrSgall + PetrSgall C65-1024 sgall-1965-generation On a Certain Distribution of Semantic Units - WojciechSkalmowski + WojciechSkalmowski C65-1025 skalmowski-1965-certain diff --git a/data/xml/C67.xml b/data/xml/C67.xml index 882a2b0f41..d92bffdf08 100644 --- a/data/xml/C67.xml +++ b/data/xml/C67.xml @@ -21,7 +21,7 @@ An evaluation of the usefulness of machine translations produced at the National Physical Laboratory, Teddington, with a summary of the translation methods J.McDaniel W.L.Price - A.J.M.Szanser + A.J.M.Szanser D.M.Yates C67-1002 mcdaniel-etal-1967-evaluation @@ -54,7 +54,7 @@ Transformational Decomposition: A Simple Description of an Algorithm for Transformational Analysis of <fixed-case>E</fixed-case>nglish Sentences DanutaHiz - Aravind K.Joshi + Aravind K.Joshi C67-1007 hiz-joshi-1967-transformational @@ -104,7 +104,7 @@
Methods for Obtaining Corresponding Phrase Structure and Dependency Grammars - Jane J.Robinson + Jane J.Robinson C67-1015 robinson-1967-methods diff --git a/data/xml/C69.xml b/data/xml/C69.xml index 7e936e650c..29a87b3f65 100644 --- a/data/xml/C69.xml +++ b/data/xml/C69.xml @@ -34,7 +34,7 @@ A Conceptual Dependency Parser for Natural Language - Roger C.Schank + Roger C.Schank LarryTesler C69-0201 schank-tesler-1969-conceptual @@ -93,7 +93,7 @@ An Application of Computer Programming to the Reconstruction of a Proto-Language Stanton P.Durham - David EllisRogers + David EllisRogers C69-0501 durham-rogers-1969-application @@ -131,7 +131,7 @@ Automatic error-correction in natural languages - A.J.Szanser + A.J.Szanser C69-0701 szanser-1969-automatic @@ -150,7 +150,7 @@ Interactive Semantic Analysis of <fixed-case>E</fixed-case>nglish Paragraphs - YorickWilks + YorickWilks C69-0801 wilks-1969-interactive @@ -169,7 +169,7 @@ Automatic Simulation of Historical Change - Raoul N.Smith + Raoul N.Smith C69-0901 smith-1969-automatic @@ -791,7 +791,7 @@ Linguistics and Automated Language Processing - Christine A.Montgomery + Christine A.Montgomery C69-4101 montgomery-1969-linguistics @@ -907,7 +907,7 @@ Properties of Formal Grammars With Mixed Type of Rules and Their Linguistic Relevance - Aravind K.Joshi + Aravind K.Joshi C69-4701 joshi-1969-properties @@ -1066,7 +1066,7 @@ AndreDugas MyrnaGopnik BrianHarris - Jean-PierrePaillet + Jean-PierrePaillet C69-5501 dugas-etal-1969-le @@ -1236,7 +1236,7 @@
On Semantics of Some Verbal Categories in <fixed-case>E</fixed-case>nglish - EvaHajicova + EvaHajicova C69-6207 hajicova-1969-semantics @@ -1262,7 +1262,7 @@
Machine Transcoding - T. R.Hofmann + T. R.Hofmann BrianHarris C69-6211 hofmann-harris-1969-machine @@ -1351,7 +1351,7 @@ H.Eggers A.Rothkegel-Schramm W.Klein - H-J.Weber + H-J.Weber H.Zimmermann C69-6501 eggers-etal-1969-diskontinuierliche @@ -1447,7 +1447,7 @@ Project <fixed-case>DOC</fixed-case> - William S-Y.Wang + William S-Y.Wang C69-6904 wang-1969-project @@ -1485,7 +1485,7 @@ Project <fixed-case>DOC</fixed-case>: Its Methodological Basis - William S-Y.Wang + William S-Y.Wang C69-7101 wang-1969-project-doc diff --git a/data/xml/C73.xml b/data/xml/C73.xml index f32b38bcc8..4f8824883e 100644 --- a/data/xml/C73.xml +++ b/data/xml/C73.xml @@ -25,7 +25,7 @@
Un Modele Mathematique D’analyse Transformationnelle Selon <fixed-case>Z</fixed-case>. <fixed-case>S</fixed-case>. <fixed-case>H</fixed-case>arris - Jean PierreDescles + Jean PierreDescles C73-1003 descles-1973-un @@ -56,7 +56,7 @@
Working With the Interactive Version of the <fixed-case>T.G.T.</fixed-case>-System of <fixed-case>J</fixed-case>oyce <fixed-case>F</fixed-case>riedman - IstvanBatori + IstvanBatori C73-1008 batori-1973-working @@ -84,7 +84,7 @@ Problems in Computerized Historical Linguistics: The <fixed-case>O</fixed-case>ld <fixed-case>C</fixed-case>ornish Lexicon EnricoCampanile - AntonioZampolli + AntonioZampolli C73-1012 campanile-zampolli-1973-problems @@ -170,13 +170,13 @@
A La Recherche D’un Modele De Derivation En <fixed-case>I</fixed-case>talien - IrinaProdanof + IrinaProdanof C73-1026 prodanof-1973-la An <fixed-case>E</fixed-case>nglish Dictionary for Computerized Syntactic and Semantic Processing Systems - Raoul N.Smith + Raoul N.Smith EdwardMaxwell C73-1027 smith-maxwell-1973-english @@ -238,9 +238,9 @@ Working on the <fixed-case>I</fixed-case>talian Machine Dictionary: A Semantic Approach - NicolettaCalzolari + NicolettaCalzolari LauraPecchia - AntonioZampolli + AntonioZampolli C73-2005 calzolari-etal-1973-working @@ -286,7 +286,7 @@
Automatic Pattern Recognition Applied to Semantic Problems - R. G.Piotrowski + R. G.Piotrowski I. V.Palibina C73-2012 piotrowski-palibina-1973-automatic @@ -308,7 +308,7 @@ Towards Computer Systems for Conversing in <fixed-case>P</fixed-case>olish - Janusz StanislawBien + Janusz StanislawBien C73-2015 bien-1973-towards @@ -361,7 +361,7 @@
Segmentation of <fixed-case>F</fixed-case>rench Sentences - BenteMaegaard + BenteMaegaard EbbeSpang-Hanssen C73-2023 maegaard-spang-hanssen-1973-segmentation @@ -375,7 +375,7 @@ An Application Du Systeme <fixed-case>A.T.E.F.</fixed-case> A L’analyse Morphologique De Textes Russes - NicolasNedobejkine + NicolasNedobejkine C73-2025 nedobejkine-1973-application @@ -387,7 +387,7 @@
Computational Linguistics and Linguistic Theory - Jean PierrePaillet + Jean PierrePaillet C73-2027 paillet-1973-computational @@ -399,14 +399,14 @@
Analyse Automatique De Textes Par Un Systeme D’etats Finis - MauriceQuezel-Ambrunaz - PierreGuillaume + MauriceQuezel-Ambrunaz + PierreGuillaume C73-2029 quezel-ambrunaz-guillaume-1973-analyse On Using Semantic Data in Automatic Syntactic Analysis - MorrisSalkoff + MorrisSalkoff C73-2030 salkoff-1973-using @@ -418,7 +418,7 @@
The Automatically Built Up Homograph Dictionary a Component of a Dynamic Lexical System - Heinz J.Weber + Heinz J.Weber C73-2032 weber-1973-automatically diff --git a/data/xml/C80.xml b/data/xml/C80.xml index c4beb8a23d..1d1154d3b3 100644 --- a/data/xml/C80.xml +++ b/data/xml/C80.xml @@ -29,7 +29,7 @@
A Syntax Parser Based on the Case Dependency Grammar and Its Efficiency - ToruHitaka + ToruHitaka ShoYoshida C80-1003 hitaka-yoshida-1980-syntax @@ -54,14 +54,14 @@ A Context-Free Grammar of <fixed-case>F</fixed-case>rench - MorrisSalkoff + MorrisSalkoff C80-1007 salkoff-1980-context A Rule-Based Approach to Ill-Formed Input - Norman K.Sondheimer - Ralph M.Weischedel + Norman K.Sondheimer + Ralph M.Weischedel C80-1008 sondheimer-weischedel-1980-rule @@ -79,8 +79,8 @@
Linguistic Meaning and Knowledge Representation in Automatic Understanding of Natural Language - EvaHajicova - PetrSgall + EvaHajicova + PetrSgall C80-1011 hajicova-sgall-1980-linguistic @@ -92,7 +92,7 @@
Hierarchical Meaning Representation and Analysis of Natural Language Documents - Toyo-akiNishida + Toyo-akiNishida ShujiDoshita C80-1013 nishida-doshita-1980-hierarchical @@ -195,13 +195,13 @@ Embedded Sublanguages and Natural Language Processing - RichardKittredge + RichardKittredge C80-1029 kittredge-1980-embedded Adaptation of <fixed-case>M</fixed-case>ontague Grammar to the Requirements of Question-Answering - S.P.J.Landsbergen + S.P.J.Landsbergen C80-1030 landsbergen-1980-adaptation @@ -228,7 +228,7 @@
On the Derivation of a Conversational Maxim - Th. R.Hofmann + Th. R.Hofmann C80-1034 hofmann-1980-derivation @@ -257,7 +257,7 @@ Linguistic Error Correction of <fixed-case>J</fixed-case>apanese Sentences TsutomuKawada - Shin-yaAmano + Shin-yaAmano KunioSakai C80-1038 kawada-etal-1980-linguistic @@ -322,7 +322,7 @@ Automatic Compilation of <fixed-case>M</fixed-case>odern <fixed-case>C</fixed-case>hinese Concordances - SyunsukeUemura + SyunsukeUemura YasuoSugawara Mantaro J.Hashimoto AkihiroFuruya @@ -345,15 +345,15 @@ Parsing Free Word Order Languages in <fixed-case>P</fixed-case>rolog - Janusz StanislawBien + Janusz StanislawBien KrystynaLaus-Maczyniska - StanislawSzpakowicz + StanislawSzpakowicz C80-1051 bien-etal-1980-parsing Parsing Against Lexical Ambiguity - RobMilne + RobMilne C80-1052 milne-1980-parsing @@ -372,7 +372,7 @@ Active Schemata and Their Role in Semantic Parsing Joachim H.Laubsch - Dietmar F.Roesner + Dietmar F.Roesner C80-1055 laubsch-roesner-1980-active @@ -432,9 +432,9 @@
<fixed-case>ITS</fixed-case>: Interactive Translation System - Alan K.Melby + Alan K.Melby Melvin R.Smith - JillPeterson + JillPeterson C80-1064 melby-etal-1980-interactive @@ -494,7 +494,7 @@ The Impatient Tutor: An Integrated Language Understanding System BrianPhillips - JamesHendler + JamesHendler C80-1072 phillips-hendler-1980-impatient @@ -513,7 +513,7 @@
Conjunctions and Modularity in Language Analysis Procedures - RalphGrishman + RalphGrishman C80-1075 grishman-1980-conjunctions @@ -526,7 +526,7 @@ Une Experience Pratique D’utilisation De L’analyse Linguistique En Recherche D’information: Bilan & Perspectives ErnestGrandjean - GerardVeillon + GerardVeillon C80-1077 grandjean-veillon-1980-une @@ -575,7 +575,7 @@
Natürlichsprachige <fixed-case>P</fixed-case>roblembeschreibung als ein <fixed-case>V</fixed-case>erfahren für den Bürgernahen <fixed-case>Z</fixed-case>ugang zu <fixed-case>D</fixed-case>okumentationssystemen - Harald H.Zimmermann + Harald H.Zimmermann C80-1084 deu zimmermann-1980-naturlichsprachige @@ -612,7 +612,7 @@ A Method to Reduce Large Number of Concordances - MariaPozzi + MariaPozzi JavierBecerra JaimeRangel Luis FernandoLara diff --git a/data/xml/C82.xml b/data/xml/C82.xml index e0388aa739..8d1f1ace4b 100644 --- a/data/xml/C82.xml +++ b/data/xml/C82.xml @@ -99,34 +99,34 @@ Natural Language Interfaces Using Limited Semantic Information - RalphGrishman - LynetteHirschman - CarolFriedman + RalphGrishman + LynetteHirschman + CarolFriedman C82-1014 grishman-etal-1982-natural <fixed-case>DIALOGIC</fixed-case>: A Core Natural-Language Processing System - BarbaraGrosz + BarbaraGrosz NormanHaas - GaryHendrix - JerryHobbs + GaryHendrix + JerryHobbs PaulMartin - RobertMoore - JaneRobinson - StanleyRosenschein + RobertMoore + JaneRobinson + StanleyRosenschein C82-1015 grosz-etal-1982-dialogic Referential Nets With Attributes - Christopher U.Habel + Christopher U.Habel C82-1016 habel-1982-referential The Role of the Hierarchy of Activation in the Process of Natural Language Understanding - EvaHajicova + EvaHajicova JarkaVrbova C82-1017 hajicova-vrbova-1982-role @@ -139,13 +139,13 @@ An Experimental Parser - Anna SagvallHein + Anna SagvallHein C82-1019 hein-1982-experimental Natural Language Access to Structured Text - Jerry R.Hobbs + Jerry R.Hobbs Donald E.Walker Robert A.Amsler C82-1020 @@ -153,7 +153,7 @@ A Multilayered Approach to the Handling of Word Formation - WolfgangHoeppner + WolfgangHoeppner C82-1021 hoeppner-1982-multilayered @@ -165,14 +165,14 @@
Processing of Sentences With Intra-Sentential Code-Switching - Aravind K.Joshi + Aravind K.Joshi C82-1023 joshi-1982-processing Incremental Sentence Generation: Implications for the Structure of a Syntactic Processor GerardKempen - EdwardHoenkamp + EdwardHoenkamp C82-1024 kempen-hoenkamp-1982-incremental @@ -196,13 +196,13 @@
Machine Translation Based on Logically Isomorphic <fixed-case>M</fixed-case>ontague Grammars - JanLandsbergen + JanLandsbergen C82-1028 landsbergen-1982-machine Conversion of a <fixed-case>F</fixed-case>rench Surface Expression Into Its Semantic Representation According to the <fixed-case>RESEDA</fixed-case> Metalanguage - JacquelineLeon + JacquelineLeon C82-1029 leon-1982-conversion @@ -214,14 +214,14 @@
The Anatomy of a Systemic Choice - William C.Mann + William C.Mann C82-1031 mann-1982-anatomy Analysis and Processing of Compact Text ElaineMarsh - NaomiSager + NaomiSager C82-1032 marsh-sager-1982-analysis @@ -234,7 +234,7 @@
Multi-Level Translation Aids in a Distributed System - Alan K.Melby + Alan K.Melby C82-1034 melby-1982-multi @@ -267,8 +267,8 @@
An <fixed-case>E</fixed-case>nglish <fixed-case>J</fixed-case>apanese Machine Translation System of the Titles of Scientific and Engineering Papers - MakotoNagao - Jun-ichiTsujii + MakotoNagao + Jun-ichiTsujii KojiYada ToshihiroKakimoto C82-1039 @@ -276,8 +276,8 @@ Parser Which Learns the Application Order of Rewriting Rules - MakotoNagao - Jun-ichiNakamura + MakotoNagao + Jun-ichiNakamura C82-1040 nagao-nakamura-1982-parser @@ -303,7 +303,7 @@
An <fixed-case>E</fixed-case>nglish-<fixed-case>J</fixed-case>apanese Machine Translation System Based on Formal Semantics of Natural Language - Toyo-akiNishida + Toyo-akiNishida ShujiDoshita C82-1044 nishida-doshita-1982-english @@ -326,7 +326,7 @@ Random Generation of <fixed-case>C</fixed-case>zech Sentences - JarmilaPanevova + JarmilaPanevova C82-1047 panevova-1982-random @@ -339,13 +339,13 @@ A Message-Passing Control Structure for Text Understanding BrianPhillips - James A.Hendler + James A.Hendler C82-1049 phillips-hendler-1982-message Composition of Translation Schemes with <fixed-case>D</fixed-case>-Trees - MartinPlatek + MartinPlatek C82-1050 platek-1982-composition @@ -357,7 +357,7 @@
Formalization of Argumentation Structures in Newspaper Texts - Dietmar F.Roesner + Dietmar F.Roesner Joachim H.Laubsch C82-1052 roesner-laubsch-1982-formalization @@ -392,21 +392,21 @@ Knowledge Representation and Machine Translation SusumuSawai HiromichiFukushima - MasakatsuSugimoto + MasakatsuSugimoto NaoyaUkai C82-1057 sawai-etal-1982-knowledge Natural Language Understanding and the Perspectives of Question Answering - PetrSgall + PetrSgall C82-1058 sgall-1982-natural Parsing <fixed-case>G</fixed-case>erman IngeborgSteinacker - HaraldTrost + HaraldTrost C82-1059 steinacker-trost-1982-parsing @@ -424,7 +424,7 @@
The Transfer Phase In an <fixed-case>E</fixed-case>nglish-<fixed-case>J</fixed-case>apanese Translation System - Jun-ichiTsujii + Jun-ichiTsujii C82-1062 tsujii-1982-transfer @@ -450,8 +450,8 @@
Taking the Initiative in Natural Language Data Base Interactions: Justifying Why - BonnieWebber - AravindJoshi + BonnieWebber + AravindJoshi C82-1066 webber-joshi-1982-taking @@ -459,7 +459,7 @@ Man-Assisted Machine Construction of a Semantic Dictionary for Natural Language Processing ShoYoshida HiroakiTsurumaru - TooruHitaka + TooruHitaka C82-1067 yoshida-etal-1982-man
@@ -517,7 +517,7 @@
Subordinate Clauses and Belief - Domains in Verbal Information Processing - IstvanBatori + IstvanBatori C82-2005 batori-1982-subordinate @@ -535,14 +535,14 @@
Toward a Parsing Method for Free Word Order Languages - Janusz S.Bień - StanisławSzpakowicz + Janusz S.Bień + StanisławSzpakowicz C82-2008 bien-szpakowicz-1982-toward Developing the <fixed-case>COMMENTATOR</fixed-case>, A Computer System Simulating Verbal Production - MilanBily + MilanBily BengtSigurd C82-2009 bily-sigurd-1982-developing @@ -567,7 +567,7 @@ Towards the Organization of Lexical Definitions on a Database Structure - NicolettaCalzolari + NicolettaCalzolari C82-2013 calzolari-1982-towards @@ -620,21 +620,21 @@
Merging - The Art of Representing Different Levels of Sentence Structure in a Single Analysis Tree - FrankVan Eynde + FrankVan Eynde C82-2021 van-eynde-1982-merging Revising an <fixed-case>ATN</fixed-case> Parser GiacomoFerrari - IrinaProdanof + IrinaProdanof C82-2022 ferrari-prodanof-1982-revising Collocational Grammar as a Model for Human-Computer Interaction W. RandolphFord - Raoul N.Smith + Raoul N.Smith C82-2023 ford-smith-1982-collocational @@ -673,13 +673,13 @@
Constraints on Noun Phrase Conjunction: A Domain-Independent Mechanism - LynetteHirschman + LynetteHirschman C82-2029 hirschman-1982-constraints Why There Must Be a Semantic Representation (Over and Above Any Cognitive Network) - Th. R.Hofmann + Th. R.Hofmann C82-2030 hofmann-1982-must @@ -691,8 +691,8 @@
Inferencing and Search for an Answer in <fixed-case>TIBAQ</fixed-case> - PetrJirku - JanHajic + PetrJirku + JanHajic C82-2032 jirku-hajic-1982-inferencing @@ -711,7 +711,7 @@
Syntactic Privilege - Michael B.Kac + Michael B.Kac C82-2035 kac-1982-syntactic @@ -725,7 +725,7 @@ A Procedure of an Automatic Grapheme-to-Phoneme Transfornation of <fixed-case>G</fixed-case>erman SabineKoch WolfgangMenzel - IngridStarke + IngridStarke C82-2037 koch-etal-1982-procedure
@@ -743,7 +743,7 @@
Natural Language Data Base Access With <fixed-case>PEARL</fixed-case> - WendyLehnert + WendyLehnert SteveShwartz C82-2040 lehnert-shwartz-1982-natural @@ -752,7 +752,7 @@ Reference Resolution and Semantic Coherence ElisabethLeinfellner IngeborgSteinacker - HaraldTrost + HaraldTrost C82-2041 leinfellner-etal-1982-reference @@ -770,7 +770,7 @@
Meaning Negotiation in Dialogue - BarbaraLewandowska + BarbaraLewandowska C82-2044 lewandowska-1982-meaning @@ -783,7 +783,7 @@
The Transfer of Finite Verb Forms in a Machine Translation System - BenteMaegaard + BenteMaegaard C82-2046 maegaard-1982-transfer @@ -826,10 +826,10 @@
A Formal Procedure for <fixed-case>B</fixed-case>ulgarian Word Form Generation - ElenaPaskaleva + ElenaPaskaleva C82-2053 paskaleva-bulgaria-1982-formal - + On an Approach for Designing Linguistic Processors @@ -878,7 +878,7 @@ Adverbs and Semantic Inferences - MadisSaluveer + MadisSaluveer C82-2061 saluveer-1982-adverbs diff --git a/data/xml/C86.xml b/data/xml/C86.xml index 8393e0255d..30edb578d2 100644 --- a/data/xml/C86.xml +++ b/data/xml/C86.xml @@ -45,7 +45,7 @@
User Models: The Problem of Disparity - SandraCarberry + SandraCarberry C86-1006 carberry-1986-user @@ -58,13 +58,13 @@ A Two-Level Dialogue Representation GiacomoFerrari - RonanReilly + RonanReilly C86-1008 ferrari-reilly-1986-two <fixed-case>INTERFACILE</fixed-case>: Linguistic Coverage and Query Reformulation - YvetteMathieu + YvetteMathieu PaulSabatier C86-1009 mathieu-sabatier-1986-interfacile @@ -84,19 +84,19 @@ Particle Homonymy and Machine Translation - KarolyFabricz + KarolyFabricz C86-1012 fabricz-1986-particle Plurals, Cardinalities, and Structures of Determination - Christopher U.Habel + Christopher U.Habel C86-1013 habel-1986-plurals Processing Word Order Variation Within a Modified <fixed-case>ID/LP</fixed-case> Framework - PradipDey + PradipDey C86-1014 dey-1986-processing @@ -120,7 +120,7 @@
Conditioned Unification for Natural Language Processing - KoitiHasida + KoitiHasida C86-1018 hasida-1986-conditioned @@ -140,13 +140,13 @@ The Transfer Phase of the <fixed-case>M</fixed-case>u Machine Translation System HakotoNagao - Jun-ichiTsujii + Jun-ichiTsujii C86-1021 nagao-tsujii-1986-transfer Lexical Transfer: A Missing Element in Linguistics Theories - Alan K.Melby + Alan K.Melby C86-1022 melby-1986-lexical @@ -172,7 +172,7 @@
The need for <fixed-case>MT</fixed-case>-oriented versions of Case and Valency in <fixed-case>MT</fixed-case> - Harold L.Somers + Harold L.Somers C86-1026 somers-1986-need @@ -191,29 +191,29 @@
Solutions for Problems of <fixed-case>MT</fixed-case> Parser - Methods Used in <fixed-case>M</fixed-case>u-Machine Translation Project - - Jun-ichiNakamura - Jun-ichiTsujii - MakotoNagao + Jun-ichiNakamura + Jun-ichiTsujii + MakotoNagao C86-1029 nakamura-etal-1986-solutions Strategies and Heuristics in the Analysis of a Natural Language in Machine Translation - ZaharinYusoff + ZaharinYusoff C86-1030 yusoff-1986-strategies Parsing in Parallel XiumingHuang - LouiseGuthrie + LouiseGuthrie C86-1031 huang-guthrie-1986-parsing COMPUTATIONAL COMPARATIVE STUDIES ON <fixed-case>R</fixed-case>OMANCE LAGUAGES: A linguistic comparison of lexicon-grammars AnnibaleElia - YvetteMathieu + YvetteMathieu C86-1032 elia-mathieu-1986-computational @@ -225,40 +225,40 @@
Parsing Without (Much) Phrase Structure - Michael B.Kac - AlexisManaster-Ramer + Michael B.Kac + AlexisManaster-Ramer C86-1034 kac-manaster-ramer-1986-parsing Reconnaissance-Attack Parsing - Michael B.Kac + Michael B.Kac TomRindflesch - Karen L.Ryan + Karen L.Ryan C86-1035 kac-etal-1986-reconnaissance Natural Language Interfaces - Ready for Commercial Success? - WolfgangWahlster + WolfgangWahlster C86-1036 wahlster-1986-natural Requirements for Robust Natural Language Interfaces: The <fixed-case>L</fixed-case>anguage<fixed-case>C</fixed-case>raft and <fixed-case>XCALIBUR</fixed-case> experiences - Jaime G.Carbonell + Jaime G.Carbonell C86-1037 carbonell-1986-requirements <fixed-case>Q&A</fixed-case>: Already a Success? - Gary G.Hendrix + Gary G.Hendrix C86-1038 hendrix-1986-q The Commercial Application of: Natural Language Interfaces - HarryTennant + HarryTennant C86-1039 tennant-1986-commercial @@ -277,23 +277,23 @@
Linking Propositions - D. S.Bree - R. A.Smit + D. S.Bree + R. A.Smit C86-1042 bree-smit-1986-linking Discourse and Cohesion in Expository Text - Allen B.Tucker - SergeiNirenburg + Allen B.Tucker + SergeiNirenburg VictorRaskin C86-1043 tucker-etal-1986-discourse Degrees of Understanding - EvaHajicova - PetrSgall + EvaHajicova + PetrSgall C86-1044 hajicova-sgall-1986-degrees @@ -319,9 +319,9 @@
Tree Adjoining and Head Wrapping - K.Vijay-Shanker - David J.Weir - Aravind K.Joshi + K.Vijay-Shanker + David J.Weir + Aravind K.Joshi C86-1048 vijay-shanker-etal-1986-tree @@ -334,7 +334,7 @@
A Simple Reconstruction of <fixed-case>GPSG</fixed-case> - Stuart M.Shieber + Stuart M.Shieber C86-1050 shieber-1986-simple @@ -353,8 +353,8 @@
Conceptual Lexicon Using an Object-Oriented Language - ShoichiYokoyama - KenjiHanakata + ShoichiYokoyama + KenjiHanakata C86-1053 yokoyama-hanakata-1986-conceptual @@ -396,7 +396,7 @@ Disambiguation and Language Acquisition through the Phrasal Lexicon UriZernik - Michael G.Dyer + Michael G.Dyer C86-1059 zernik-dyer-1986-disambiguation @@ -461,13 +461,13 @@
A Compression Technique for <fixed-case>A</fixed-case>rabic Dictionaries: The Affix Analysis - AbdelmajidBen Hamadou + AbdelmajidBen Hamadou C86-1068 ben-hamadou-1986-compression Machine Learning of Morphological Rules by Generalization and Analogy - KlausWothke + KlausWothke C86-1069 wothke-1986-machine @@ -489,7 +489,7 @@
Generating Semantic Structures in <fixed-case>EUROTRA-D</fixed-case> - ErichSteiner + ErichSteiner C86-1072 steiner-1986-generating @@ -513,7 +513,7 @@
<fixed-case>NARA</fixed-case>: A Two-way Simultaneous Interpretation System between <fixed-case>K</fixed-case>orean and <fixed-case>J</fixed-case>apanese -A methodological study- - Hee SungChung + Hee SungChung Tosiyasu L.Kunii C86-1076 chung-kunii-1986-nara @@ -536,7 +536,7 @@ A Metric for Computational Analysis of Meaning: Toward an Applied Theory of Linguistic Semantics - SergeiNirenburg + SergeiNirenburg VictorRaskin C86-1079 nirenburg-raskin-1986-metric @@ -550,7 +550,7 @@ A Logical Formalism for the Representation of Determiners BarbaraDi Eugenio - LeonardoLesmo + LeonardoLesmo PaoloPogliano PietroTorasso FrancescoUrbano @@ -559,7 +559,7 @@ A Compositional Semantics for Directional Modifiers - Locative Case Reopened - - Erhard W.Hinrichs + Erhard W.Hinrichs C86-1082 hinrichs-1986-compositional @@ -583,19 +583,19 @@ NorbertReithinger DagmarSchmauks KarinHarbusch - WolfgangWahlster + WolfgangWahlster C86-1085 kobsa-etal-1986-combining An Approach to Non-Singular Terms in Discourse - TomekStrzalkowski + TomekStrzalkowski C86-1086 strzalkowski-1986-approach Processing Clinical Narratives in <fixed-case>H</fixed-case>ungarian - GaborProszeky + GaborProszeky C86-1087 proszeky-1986-processing @@ -615,7 +615,7 @@
On the Use of Term Associations in Automatic Information Retrieval - GerardSalton + GerardSalton C86-1090 salton-1986-use @@ -635,7 +635,7 @@
<fixed-case>SCSL</fixed-case>: a linguistic specification language for <fixed-case>MT</fixed-case> - RemiZajac + RemiZajac C86-1093 zajac-1986-scsl @@ -717,7 +717,7 @@ An Attempt to Automatic Thesaurus Construction From an Ordinary <fixed-case>J</fixed-case>apanese Language Dictionary HiroakiTsurumaru - ToruHitaka + ToruHitaka ShoYoshida C86-1105 tsurumaru-etal-1986-attempt @@ -738,7 +738,7 @@ User Specification of Syntactic Case Frames in <fixed-case>TELI</fixed-case>, A Transportable, User-Customized Natural Language Processor - Bruce W.Ballard + Bruce W.Ballard C86-1108 ballard-1986-user @@ -765,7 +765,7 @@
Generalized Memory Manipulating Actions for Parsing Natural Language - IrinaProdanof + IrinaProdanof GiacomoFerrari C86-1112 prodanof-ferrari-1986-generalized @@ -778,20 +778,20 @@ The Treatment of Movement-Rules in a <fixed-case>LFG</fixed-case>-Parser - Hans-UlrichBlock + Hans-UlrichBlock HansHaugeneder C86-1114 block-haugeneder-1986-treatment A Concept of Derivation for <fixed-case>LFG</fixed-case> - JurgenWedekind + JurgenWedekind C86-1115 wedekind-1986-concept Incremental Construction of <fixed-case>C</fixed-case>- and <fixed-case>F</fixed-case>-Structure in a <fixed-case>LFG</fixed-case>-Parser - Hans-UlrichBlock + Hans-UlrichBlock RudolfHunze C86-1116 block-hunze-1986-incremental @@ -836,7 +836,7 @@ Pragmatic Considerations in Man-Machine Discourse - Waltherv. Hahn + Waltherv. Hahn C86-1123 v-hahn-1986-pragmatic @@ -850,41 +850,41 @@
On Formalizations of <fixed-case>M</fixed-case>arcus’ Parser - R.Nozohoor-Farshi + R.Nozohoor-Farshi C86-1125 nozohoor-farshi-1986-formalizations A Grammar Used for Parsing and Generation Jean-MarieLancel - FrancoisRousselot - NathalieSimonin + FrancoisRousselot + NathalieSimonin C86-1126 lancel-etal-1986-grammar <fixed-case>BUILDRS</fixed-case>: An Implementation of <fixed-case>DR</fixed-case> Theory and <fixed-case>LFG</fixed-case> HajimeWada - NicholasAsher + NicholasAsher C86-1127 wada-asher-1986-buildrs A <fixed-case>PROLOG</fixed-case> Implementation of <fixed-case>G</fixed-case>overnment-<fixed-case>B</fixed-case>inding <fixed-case>T</fixed-case>heory - Robert J.Kuhns + Robert J.Kuhns C86-1128 kuhns-1986-prolog A <fixed-case>L</fixed-case>exical <fixed-case>F</fixed-case>unctional <fixed-case>G</fixed-case>rammar System in <fixed-case>P</fixed-case>rolog - AndreasEisele - JochenDorre + AndreasEisele + JochenDorre C86-1129 eisele-dorre-1986-lexical Knowledge Structures for Natural Language Generation - Paul S.Jacobs + Paul S.Jacobs C86-1130 jacobs-1986-knowledge @@ -920,30 +920,30 @@
Generating Natural Language Text in a Dialog System - MareKoit - MadisSaluveer + MareKoit + MadisSaluveer C86-1135 koit-saluveer-1986-generating Generating <fixed-case>E</fixed-case>nglish Paraphrases From Formal Relational Calculus Expressions - A.N.De Roeck + A.N.De Roeck B.G.T.Lowden C86-1136 de-roeck-lowden-1986-generating The computational complexity of sentence derivation in functional unification grammar - GraemeRitchie + GraemeRitchie C86-1137 ritchie-1986-computational Parsing Spoken Language: a Semantic Caseframe Approach Philip J.Hayes - Alexander G.Hauptmann - Jaime G.Carbonell - MasaruTomita + Alexander G.Hauptmann + Jaime G.Carbonell + MasaruTomita C86-1138 hayes-etal-1986-parsing @@ -963,7 +963,7 @@ Synthesis of Spoken Messages from Semantic Representations (Semantic-Representation-to-Speech System) LaurenceDanlos - EricLaporte + EricLaporte FrancoiseEmerard C86-1141 danlos-etal-1986-synthesis @@ -1013,22 +1013,22 @@ On Knowledge-Based Machine Translation - SergeiNirenburg + SergeiNirenburg VictorRaskin - AllenTucker + AllenTucker C86-1148 nirenburg-etal-1986-knowledge Another Stride Towards Knowledge-Based Machine Translation - MasaruTomita - Jaime G.Carbonell + MasaruTomita + Jaime G.Carbonell C86-1149 tomita-carbonell-1986-another <fixed-case>E</fixed-case>nglish - <fixed-case>M</fixed-case>alay Translation System: A Laboratory Prototype - TongLoong-Cheong + Loong-CheongTong C86-1150 tong-1986-english @@ -1055,13 +1055,13 @@
When <fixed-case>M</fixed-case>ariko talks to <fixed-case>S</fixed-case>iegfried - Experiences from a <fixed-case>J</fixed-case>apanese/<fixed-case>G</fixed-case>erman Machine Translation Project- - DietmarRosner + DietmarRosner C86-1154 rosner-1986-mariko Future Directions of Machine Translation - Jun-ichiTsujii + Jun-ichiTsujii C86-1155 tsujii-1986-future diff --git a/data/xml/C88.xml b/data/xml/C88.xml index 63d0313ed5..84dfed51ac 100644 --- a/data/xml/C88.xml +++ b/data/xml/C88.xml @@ -18,7 +18,7 @@
Parsing <fixed-case>F</fixed-case>rench with <fixed-case>T</fixed-case>ree <fixed-case>A</fixed-case>djoining <fixed-case>G</fixed-case>rammar: some linguistic accounts - AnneAbeille + AnneAbeille C88-1002 abeille-1988-parsing @@ -36,21 +36,21 @@
Efficiency Considerations for <fixed-case>LFG</fixed-case>-Parsers - Incremental and Table-Lookup Techniques - IstvanBatori + IstvanBatori StefanMarok C88-1005 batori-marok-1988-efficiency Morphology with Two-Level Rules and Negative Rule Features - JohnBear + JohnBear C88-1006 bear-1988-morphology Machine Translation Using Isomorphic <fixed-case>UCG</fixed-case>s John L.Beaven - PeteWhitelock + PeteWhitelock C88-1007 beaven-whitelock-1988-machine @@ -71,8 +71,8 @@ Some Problems of Machine Translation Between Closely Related Languages AlevtinaBemova - KarelOli̊va - JarmilaPanevová + KarelOli̊va + JarmilaPanevová C88-1010 bemova-etal-1988-problems @@ -85,9 +85,9 @@
Software Support for Practical Grammar Development - BranBoguraev - JohnCarroll - TedBriscoe + BranBoguraev + JohnCarroll + TedBriscoe ClaireGrover C88-1012 boguraev-etal-1988-software @@ -101,7 +101,7 @@ Co-Ordinative Ellipsis in <fixed-case>R</fixed-case>ussian Texts: Problems of Description and Restoration - Igor A.Bolshakov + Igor A.Bolshakov C88-1014 bolshakov-1988-co @@ -132,7 +132,7 @@
Unification Categorial Grammar: A Concise, Extendable Grammar for Natural Language Processing - JonathanCalder + JonathanCalder EwanKlein HenkZeevat C88-1018 @@ -140,7 +140,7 @@ Acquisition of Semantic Information From an On-Line Dictionary - NicolettaCalzolari + NicolettaCalzolari EugenioPicchi C88-1019 calzolari-picchi-1988-acquisition @@ -153,8 +153,8 @@ Anaphora Resolution: A Multi-Strategy Approach - Jaime G.Carbonell - Ralf D.Brown + Jaime G.Carbonell + Ralf D.Brown C88-1021 carbonell-brown-1988-anaphora @@ -166,7 +166,7 @@
Unification and Transduction in Computational Phonology - JulieCarson + JulieCarson C88-1023 carson-1988-unification @@ -200,14 +200,14 @@
<fixed-case>GRAFON</fixed-case>: A Grapheme-to-Phoneme Conversion System for <fixed-case>D</fixed-case>utch - WalterDaelemans + WalterDaelemans C88-1028 daelemans-1988-grafon Morphology and cross dependencies in the synthesis of personal pronouns in <fixed-case>R</fixed-case>omance languages LaurenceDanlos - FiamettaNamer + FiamettaNamer C88-1029 danlos-namer-1988-morphology @@ -221,7 +221,7 @@
Stylistic Grammars in Language Translation - ChrysanneDiMarco + ChrysanneDiMarco GraemeHirst C88-1031 dimarco-hirst-1988-stylistic @@ -240,7 +240,7 @@ Knowledge integration in a robust and efficient morpho-syntactic analyzer for <fixed-case>F</fixed-case>rench - LouisetteEmirkanian + LouisetteEmirkanian Lorne H.Bouchard C88-1034 emirkanian-bouchard-1988-knowledge @@ -266,7 +266,7 @@ Sequencing in a Connectionist Model of Language Processing MichaelGasser - Michael G.Dyer + Michael G.Dyer C88-1038 gasser-dyer-1988-sequencing @@ -316,7 +316,7 @@ Formal Morphology - JanHajic + JanHajic C88-1045 hajic-1988-formal @@ -330,7 +330,7 @@
A Cognitive Account of Unbounded Dependency - KoitiHasida + KoitiHasida C88-1047 hasida-1988-cognitive @@ -377,34 +377,34 @@
Achieving Bidirectionality - Paul S.Jacobs + Paul S.Jacobs C88-1054 jacobs-1988-achieving <fixed-case>C</fixed-case>oncretion: Assumption-Based Understanding - Paul S.Jacobs + Paul S.Jacobs C88-1055 jacobs-1988-concretion Locally Governed Trees and Dependecncy Parsing - HarriJäppinen + HarriJäppinen EeroLassila - AarnoLehtola + AarnoLehtola C88-1056 jappinen-etal-1988-locally Issues in Relating Syntax and Semantics - DanielJurafsky + DanielJurafsky C88-1057 jurafsky-1988-issues Coordination in Reconnaissance-Attack Parsing - Michael B.Kac - Thomas C.Rindflesch + Michael B.Kac + Thomas C.Rindflesch C88-1058 kac-rindflesch-1988-coordination @@ -417,15 +417,15 @@
An Algorithm for Functional Uncertainty - Ronald M.Kaplan - John T.Maxwell III + Ronald M.Kaplan + John T.Maxwell III C88-1060 kaplan-maxwell-iii-1988-algorithm Constituent Coordination in <fixed-case>L</fixed-case>exical-<fixed-case>F</fixed-case>unctional <fixed-case>G</fixed-case>rammar - Ronald M.Kaplan - John T.Maxwell III + Ronald M.Kaplan + John T.Maxwell III C88-1061 kaplan-maxwell-iii-1988-constituent @@ -438,7 +438,7 @@
An Experimental Parser for Systemic Grammars - Robert T.Kasper + Robert T.Kasper C88-1063 kasper-1988-experimental @@ -470,14 +470,14 @@
Constructing a Model of Dialog - MareKoit + MareKoit C88-1068 koit-1988-constructing Complexity, Two-Level Morphology and <fixed-case>F</fixed-case>innish KimmoKoskenniemi - Kenneth WardChurch + Kenneth WardChurch C88-1069 koskenniemi-church-1988-complexity @@ -486,7 +486,7 @@ IkuoKudo HideyaKoshino MoonkyungChung - TsuyosiMorimoto + TsuyosiMorimoto C88-1070 kudo-etal-1988-schema
@@ -498,7 +498,7 @@
A News Analysis System - Robert J.Kuhns + Robert J.Kuhns C88-1072 kuhns-1988-news @@ -512,7 +512,7 @@ <fixed-case>SAGE</fixed-case> - a Sentence Parsing and Generation System Jean-MarieLancel MiyoOtani - NathalieSimonin + NathalieSimonin LaurenceDanlos C88-1074 lancel-etal-1988-sage @@ -531,21 +531,21 @@
Interpretation of Noun Phrases in Intensional Contexts - LeonardoLesmo + LeonardoLesmo PaoloTerenziani C88-1077 lesmo-terenziani-1988-interpretation Inheritance in Hierarchical Relational Structures - Derek P.Long - RobertoGarigliano + Derek P.Long + RobertoGarigliano C88-1078 long-garigliano-1988-inheritance Designing and testing linguistic development phases in machine translation project - BenteMaegaard + BenteMaegaard C88-1079 maegaard-1988-designing @@ -557,13 +557,13 @@
Representing Regularities in the Metaphoric Lexicon - James H.Martin + James H.Martin C88-1081 martin-1988-representing Linguistic Processing Using a Dependency Structure Grammar for Speech Recognition and Understanding - Sho-ichiMatsunaga + Sho-ichiMatsunaga MasakiKohda C88-1082 matsunaga-kohda-1988-linguistic @@ -587,7 +587,7 @@ Lexical Transfer: Between a Source Rock and a Hard Target - Alan K.Melby + Alan K.Melby C88-2084 melby-1988-lexical @@ -599,7 +599,7 @@ Solving Some Persistent Presupposition Problems - Robert E.Mercer + Robert E.Mercer C88-2086 mercer-1988-solving @@ -611,7 +611,7 @@
Strategies for Effective Paraphrasing - MarieMeteer + MarieMeteer VardaShaked C88-2088 meteer-shaked-1988-strategies @@ -631,37 +631,37 @@ <fixed-case>PANEL</fixed-case>: Language Engineering: The Real Bottle Neck of Natural Language Processing - MakotoNagao + MakotoNagao C88-2091 nagao-1988-panel Why Computational Grammarians Can Be Skeptical About Existing Linguistic Theories - KarenJensen + KarenJensen C88-2092 jensen-1988-computational Why Implementors of Practical <fixed-case>NLP</fixed-case> Systems Can not Wait for Linguistic Theories Remarks and Theses - DietmarRoesner + DietmarRoesner C88-2093 roesner-1988-implementors Reasons Why We Use Dependency Grammar - EvaHajicova + EvaHajicova C88-2094 hajicova-1988-reasons Reasons why <fixed-case>I</fixed-case> do not care grammar formalism - Jun-ichiTsujii + Jun-ichiTsujii C88-2095 tsujii-1988-reasons “Linguistic” Sentences and “Real” Sentences - MasaruTomita + MasaruTomita C88-2096 tomita-1988-linguistic @@ -674,8 +674,8 @@
Extraction of Semantic Information from an Ordinary <fixed-case>E</fixed-case>nglish Dictionary and its Evaluation - Jun-ichiNakamura - MakotoNagao + Jun-ichiNakamura + MakotoNagao C88-2098 nakamura-nagao-1988-extraction @@ -688,8 +688,8 @@
A Framework for Lexical Selection in Natural Language Generation - SergeiNirenburg - IreneNirenburg + SergeiNirenburg + IreneNirenburg C88-2100 nirenburg-nirenburg-1988-framework @@ -704,7 +704,7 @@
Maintaining Consistency and Plausibility in Integrated Natural Language Understanding - ToyoakiNishida + ToyoakiNishida XueminLiu ShujiDoshita AtsushiYamada @@ -715,20 +715,20 @@ Parsing with look-ahead in real-time on-line translation system HiroyasuNogami YumikoYoshimura - Shin-yaAmano + Shin-yaAmano C88-2103 nogami-etal-1988-parsing Syntactic Functions in <fixed-case>GPSG</fixed-case> - KarelOli̊va + KarelOli̊va C88-2104 oliva-1988-syntactic List Automata With Syntactically Structured Output - KarelOli̊va - MartinPlatek + KarelOli̊va + MartinPlatek C88-2105 oliva-platek-1988-list @@ -748,7 +748,7 @@
New Dependency Based Specification of Underlying Representations of Sentences - VladimirPetkevic + VladimirPetkevic C88-2108 petkevic-1988-new @@ -760,14 +760,14 @@
On The Semantic Interpretation of Nominals - JamesPustejovsky - Peter G.Anick + JamesPustejovsky + Peter G.Anick C88-2110 pustejovsky-anick-1988-semantic Using a Logic Grammar to Learn a Lexicon - MannyRayner + MannyRayner AsaHugosson GoranHagert C88-2111 @@ -778,22 +778,22 @@ WalterRead AlexQuilici JohnReeves - MichaelDyer + MichaelDyer C88-2112 read-etal-1988-evaluating Parallel Intersection and Serial Composition of Finite State Transducers - MikeReape - HenryThompson + MikeReape + HenryThompson C88-2113 reape-thompson-1988-parallel Framework for a Model of Dialogue - RonanReilly + RonanReilly GiacomoFerrari - IrinaProdanof + IrinaProdanof C88-2114 reilly-etal-1988-framework @@ -811,27 +811,27 @@
Default Logic, Natural Language and Generalized Quantifiers - PatrickSaint-Dizier + PatrickSaint-Dizier C88-2117 saint-dizier-1988-default Parsing Noisy Sentences HiroakiSaito - MasaruTomita + MasaruTomita C88-2118 saito-tomita-1988-parsing A New Strategy for Providing Definitions In Task-Oriented Dialogues Margaret H.Sarner - SandraCarberry + SandraCarberry C88-2119 sarner-carberry-1988-new An Augmented Context Free Grammar for Discourse - RemkoScha + RemkoScha LiviaPolanyi C88-2120 scha-polanyi-1988-augmented @@ -839,8 +839,8 @@ Parsing Strategies with ‘Lexicalized’ Grammars: Application to <fixed-case>T</fixed-case>ree <fixed-case>A</fixed-case>djoining <fixed-case>G</fixed-case>rammars YvesSchabes - AnneAbeille - Aravind K.Joshi + AnneAbeille + Aravind K.Joshi C88-2121 schabes-etal-1988-parsing @@ -861,7 +861,7 @@ <fixed-case>PANEL</fixed-case> Parallel Processing in Computational Linguistics HelmutSchnelle GarryCottrell - ParadipDey + ParadipDey Peter A.Reich LokendraShastri C88-2124 @@ -887,7 +887,7 @@ A Uniform Architecture for Parsing and Generation - Stuart M.Shieber + Stuart M.Shieber C88-2128 shieber-1988-uniform @@ -903,13 +903,13 @@ Directing the Generation of Living Space Descriptions PenelopeSibun Alison K.Huettner - David D.McDonald + David D.McDonald C88-2130 sibun-etal-1988-directing
On the Semantics of Focus Phenomena in <fixed-case>E</fixed-case>urotra - Erich H.Steiner + Erich H.Steiner JuttaWinter-Thielen C88-2131 steiner-winter-thielen-1988-semantics @@ -937,7 +937,7 @@ A Computer Readability Formula of <fixed-case>J</fixed-case>apanese Texts for Machine Scoring - TateisiYuka + YukaTateisi OnoYoshihiko YamadaHisao C88-2135 @@ -955,44 +955,44 @@ Application of the Direct Memory Access paradigm to natural language interlaces to knowledge-based systems HidetoTomabechi - MasaruTomita + MasaruTomita C88-2137 tomabechi-tomita-1988-application Combining Lexicon-Driven Parsing and Phrase-Structure-Based Parsing - MasaruTomita + MasaruTomita C88-2138 tomita-1988-combining Linguistic Contributions to Text-to-Speech Computer Prorgrams for <fixed-case>F</fixed-case>rench PierreTrescases - MatthewCrocker + MatthewCrocker C88-2139 trescases-crocker-1988-linguistic On the Interaction of Syntax and Semantics in a Syntactically Guided Caseframe Parser - HaraldTrost - ErnstBuchberger + HaraldTrost + ErnstBuchberger WolfgangHeinz C88-2140 trost-etal-1988-interaction How to Get Preferred Readings in Natural Language Analysis - Jun-ichiTsujii + Jun-ichiTsujii YukiyoshiMuto YuujiIkeda - MakotoNagao + MakotoNagao C88-2141 tsujii-etal-1988-get Dialogue Translation vs. Text Translation - Jun-ichiTsujii - MakotoNagao + Jun-ichiTsujii + MakotoNagao C88-2142 tsujii-nagao-1988-dialogue @@ -1005,7 +1005,7 @@ The Analysis of Tense and Aspect in <fixed-case>E</fixed-case>urotra - Frankvan Eynde + Frankvan Eynde C88-2144 van-eynde-1988-analysis @@ -1017,13 +1017,13 @@
Morphosyntactic correction in natural language interfaces - JeanVeronis + JeanVeronis C88-2146 veronis-1988-morphosyntactic Feature Structures Based <fixed-case>T</fixed-case>ree <fixed-case>A</fixed-case>djoining <fixed-case>G</fixed-case>rammars - K.Vijay-Shanker + K.Vijay-Shanker A.K.Joshi C88-2147 vijay-shanker-joshi-1988-feature @@ -1036,13 +1036,13 @@ Issues in Word Choice - NigelWard + NigelWard C88-2149 ward-1988-issues Generation as Structure Driven Derivation - JurgenWedekind + JurgenWedekind C88-2150 wedekind-1988-generation @@ -1060,9 +1060,9 @@
Machine Tractable Dictionaries as Tools and Resources for Natural Language Processing - YorickWilks + YorickWilks DanFass - Cheng-mingGuo + Cheng-mingGuo James E.McDonald TonyPlate Brian M.Slator @@ -1077,15 +1077,15 @@ Machine Translation for Monolinguals - Mary McGeeWood - Brian J.Chandler + Mary McGeeWood + Brian J.Chandler C88-2155 wood-chandler-1988-machine Figuring out Most Plausible Interpretation from Spatial Descriptions AtsushiYamada - ToyoakiNishida + ToyoakiNishida ShujiDoshita C88-2156 yamada-etal-1988-figuring @@ -1106,13 +1106,13 @@ Identifying Zero Pronouns in <fixed-case>J</fixed-case>apanese Dialogue - KeiYoshimoto + KeiYoshimoto C88-2159 yoshimoto-1988-identifying Interactive Translation: a new approach - RemiZajac + RemiZajac C88-2160 zajac-1988-interactive @@ -1153,7 +1153,7 @@
<fixed-case>COMPLEX</fixed-case>: A Computational Lexicon for Natural Language Systems - JudithKlavans + JudithKlavans C88-2166 klavans-1988-complex diff --git a/data/xml/C90.xml b/data/xml/C90.xml index 7e8670f2d8..9dc7efe592 100644 --- a/data/xml/C90.xml +++ b/data/xml/C90.xml @@ -18,7 +18,7 @@ Design of a Hybrid Deterministic Parser Kanaan A.Faisal - Stan C.Kwasny + Stan C.Kwasny C90-1002 faisal-kwasny-1990-design @@ -38,7 +38,7 @@ Tagging for Learning: Collecting Thematic Relations from Corpus UriZernik - PaulJacobs + PaulJacobs C90-1005 zernik-jacobs-1990-tagging @@ -94,13 +94,13 @@
The Generalized <fixed-case>LR</fixed-case> Parser/Compiler V8-4: A Software Package for Practical <fixed-case>NL</fixed-case> Projects - MasaruTomita + MasaruTomita C90-1012 tomita-1990-generalized Generation for Dialogue Translation Using Typed Feature Structure Unification - YoshihiroUeda + YoshihiroUeda KiyoshiKogure C90-1013 ueda-kogure-1990-generation @@ -125,7 +125,7 @@ <fixed-case>STS</fixed-case>: An Experimental Sentence Translation System - EricWehrli + EricWehrli C90-1017 wehrli-1990-sts @@ -137,7 +137,7 @@
Deep Sentence Understanding in a Restricted Domain - PierreZweigenbaum + PierreZweigenbaum MarcCavazza C90-1019 zweigenbaum-cavazza-1990-deep @@ -207,14 +207,14 @@ An Application of Lexical Semantics to Knowledge Acquisition from Corpora - PeterAnick - JamesPustejovsky + PeterAnick + JamesPustejovsky C90-2002 anick-pustejovsky-1990-application Finding Translation Equivalents: An Application of Grammatical Metaphor - John A.Bateman + John A.Bateman C90-2003 bateman-1990-finding @@ -234,22 +234,22 @@
Towards Personal <fixed-case>MT</fixed-case>: general design, dialogue structure, potential role of speech - ChristianBoitet + ChristianBoitet C90-2006 boitet-1990-towards Lexical Ambiguity and The Role of Knowledge Representation in Lexicon Design - BranimirBoguraev - JamesPustejovsky + BranimirBoguraev + JamesPustejovsky C90-2007 boguraev-pustejovsky-1990-lexical Enjoy the Paper: Lexicology - TedBriscoe + TedBriscoe AnnCopestake - BranBoguraev + BranBoguraev C90-2008 briscoe-etal-1990-enjoy @@ -261,7 +261,7 @@
Information-based Case Grammar - Keh-jiannChen + Keh-jiannChen Chu-RenHuang C90-2010 chen-huang-1990-information @@ -269,8 +269,8 @@ An Augmented Chart Data Structure with Efficient Word Lattice Parsing Scheme In Speech Recognition Applications Lee-FengChien - K. J.Chen - Lin-ShanLee + K. J.Chen + Lin-ShanLee C90-2011 chien-etal-1990-augmented @@ -283,7 +283,7 @@ Modeling syntactic constraints on anaphoric binding MaryDalrymple - JohnMaxwell + JohnMaxwell AnnieZaenen C90-2013 dalrymple-etal-1990-modeling @@ -303,7 +303,7 @@ Integrating Stress and Intonation into a Concept-to-Speech System GeorgDorffner - ErnstBuchberger + ErnstBuchberger MarkusKommenda C90-2016 dorffner-etal-1990-integrating @@ -316,14 +316,14 @@ Feature Logic with Disjunctive Unification - JochenDorre - AndreasEisele + JochenDorre + AndreasEisele C90-2018 dorre-eisele-1990-feature Generating <fixed-case>F</fixed-case>rench with a Reversible Unification Grammar - DominiqueEstival + DominiqueEstival C90-2019 estival-1990-generating @@ -348,7 +348,7 @@ “Translation Great Problem” - On the Problem of Inserting Articles When Translating From <fixed-case>R</fixed-case>ussian Into <fixed-case>S</fixed-case>wedish - BarbaraGawronska-Werngren + BarbaraGawronska-Werngren C90-2023 gawronska-werngren-1990-translation @@ -363,7 +363,7 @@ Functor-Driven Natural Language Generation with Categorial-Unification Grammars DaleGerdemann - Erhard W.Hinrichs + Erhard W.Hinrichs C90-2025 gerdemann-hinrichs-1990-functor @@ -376,7 +376,7 @@ A Linguistic Theory of Robustness - SebastianGoeser + SebastianGoeser C90-2027 goeser-1990-linguistic @@ -416,27 +416,27 @@
A Bottom-up Generation for Principle-based Grammars Using Constraint Propagation - MasatoIshizaki + MasatoIshizaki C90-2033 ishizaki-1990-bottom To Parse or Not to Parse: Relation-Driven Text Skimming - Paul S.Jacobs + Paul S.Jacobs C90-2034 jacobs-1990-parse Representing and Integrating Linguistic Knowledge - DanielJurafsky + DanielJurafsky C90-2035 jurafsky-1990-representing A Spelling Correction Program Based on a Noisy Channel Model Mark D.Kernighan - Kenneth W.Church - William A.Gale + Kenneth W.Church + William A.Gale C90-2036 kernighan-etal-1990-spelling @@ -486,8 +486,8 @@
Disambiguating Cue Phrases in Text and Speech - DianeLitman - JuliaHirschberg + DianeLitman + JuliaHirschberg C90-2044 litman-hirschberg-1990-disambiguating @@ -499,7 +499,7 @@
Tenets for an Interlingual Representation Definite <fixed-case>NP</fixed-case>s - MontserratMeya + MontserratMeya C90-2046 meya-1990-tenets @@ -512,8 +512,8 @@ The Generation of High-Level Structure for Extended Explanations David J.Mooney - SandraCarberry - Kathleen F.McCoy + SandraCarberry + Kathleen F.McCoy C90-2048 mooney-etal-1990-generation @@ -525,7 +525,7 @@
A Head-Driven Approach to Incremental and Parallel Generation of Syntactic Structures - GunterNeumann + GunterNeumann WolfgangFinkler C90-2050 neumann-finkler-1990-head @@ -538,7 +538,7 @@ Reversible Unification Based Machine Translation - Gertjanvan Noord + Gertjanvan Noord C90-2052 van-noord-1990-reversible @@ -581,13 +581,13 @@
Gapping and Frame Semantics: A fresh look from a cognitive perspective - AndreasStolcke + AndreasStolcke C90-2059 stolcke-1990-gapping How to Invert a Natural Language Parser Into an Efficient Generator: An Algorithm for Logic Grammars - TomekStrzalkowskl + TomekStrzalkowskl C90-2060 strzalkowskl-1990-invert @@ -599,7 +599,7 @@
An Explanation Facility for a Grammar Writing System - Loong CheongTong + Loong CheongTong C90-2062 tong-1990-explanation @@ -611,7 +611,7 @@
The application of two-level morphology to non-concatenative <fixed-case>G</fixed-case>erman morphology - HaraldTrost + HaraldTrost C90-2064 trost-1990-application @@ -623,27 +623,27 @@
Why Human Translators Still Sleep in Peace? (Four Engineering and Linguistic Gaps in Nlp) - PaolaVelardi + PaolaVelardi C90-2066 velardi-1990-human Word Sense Disambiguation with Very Large Neural Networks Extracted from Machine Readable Dictionaries - JeanVeronis - Nancy M.Ide + JeanVeronis + Nancy M.Ide C90-2067 veronis-ide-1990-word Free Adjuncts in Natural Language Instructions - Bonnie LynnWebber + Bonnie LynnWebber BarbaraDi Eugenio C90-2068 webber-di-eugenio-1990-free Identifying Subjective Characters in Narrative - Janyce M.Wiebe + Janyce M.Wiebe C90-2069 wiebe-1990-identifying @@ -668,14 +668,14 @@
Generation of Synthes Is Programs in Robra (Ariane) From String-Tree Correspondence Grammars (Or a Strategy for Synthesis in Machine Translation) - ZaharinYusoff + ZaharinYusoff C90-2073 yusoff-1990-generation Morphological Analysis and Synthesis by Automated Discovery and Acquisition of Linguistic Rules Byoung-TakZhang - Yung-TaekKim + Yung-TaekKim C90-2074 zhang-kim-1990-morphological @@ -692,9 +692,9 @@ Using Lexicalized Tags for Machine Translation - AnneAbeille + AnneAbeille YvesSchabes - Aravind K.Joshi + Aravind K.Joshi C90-3001 abeille-etal-1990-using @@ -707,26 +707,26 @@
Backwards Phonology - JohnBear + JohnBear C90-3003 bear-1990-backwards Phonological Processing of Speech Variants - JulleCarson-Berndsen + JulleCarson-Berndsen C90-3004 carson-berndsen-1990-phonological A Karaka Based Approach to Parsing of <fixed-case>I</fixed-case>ndian Languages - AksharBharati + AksharBharati RajeevSangal C90-3005 bharati-sangal-1990-karaka Towards Personal <fixed-case>MT</fixed-case>: general design, dialogue structure, potential role of speech - ChristianBoitet + ChristianBoitet C90-3006 boitet-1990-towards-personal @@ -738,19 +738,19 @@
Human-Computer Interaction for Semantic Disambiguation - Ralf D.Brown + Ralf D.Brown C90-3008 brown-1990-human Syllable-based Morphology - Lynne J.Cahill + Lynne J.Cahill C90-3009 cahill-1990-syllable Acquisition of Lexical Information from a Large Textual <fixed-case>I</fixed-case>talian Corpus - NicolettaCalzolari + NicolettaCalzolari RemoBindi C90-3010 calzolari-bindi-1990-acquisition @@ -771,13 +771,13 @@ Efficient Disjunctive Unification for Bottom-Up Parsing - DavidCarter + DavidCarter C90-3013 carter-1990-efficient A Phonological Knowledge Base System Using Unification-based Formalism - A Case Study of <fixed-case>K</fixed-case>orean Phonology - Hee-SungChung + Hee-SungChung C90-3014 chung-1990-phonological @@ -789,7 +789,7 @@
Structured Meanings in Computational Linguistics - Keesvan Deemter + Keesvan Deemter C90-3016 van-deemter-1990-structured @@ -804,16 +804,16 @@ Generating Connectives MichaelElhadad - Kathleen R.McKeown + Kathleen R.McKeown C90-3018 elhadad-mckeown-1990-generating Organizing linguistic knowledge for multilingual generation - MartinEmele + MartinEmele UlrichHeld StefanMomma - RemiZajac + RemiZajac C90-3019 emele-etal-1990-organizing @@ -832,14 +832,14 @@ A Computational Approach to Binding Theory AlessandraGiorgi - FabioPianesi + FabioPianesi GiorgioSatta C90-3022 giorgi-etal-1990-computational Causal and Temporal Text Analysis: The Role of the Domain Model - RalphGrishman + RalphGrishman TomaszKsiezyk C90-3023 grishman-ksiezyk-1990-causal @@ -852,38 +852,38 @@ Is there content in empty heads? - LouiseGuthrie + LouiseGuthrie Brian M.Slator - YorickWilks - RebeccaBruce + YorickWilks + RebeccaBruce C90-3025 guthrie-etal-1990-content Hierarchy of Salience and Discourse Analysis and Production - EvaHajicova + EvaHajicova PetrKubon - VladlslavKubon + VladlslavKubon C90-3026 hajicova-etal-1990-hierarchy A Constraint-Based Approach to Linguistic Performance - KoitiHasida + KoitiHasida C90-3027 hasida-1990-constraint Translation by Abduction - Jerry R.Hobbs + Jerry R.Hobbs MegumiKameyama C90-3028 hobbs-kameyama-1990-translation Two Principles of Parse Preference - Jerry R.Hobbs - JohnBear + Jerry R.Hobbs + JohnBear C90-3029 hobbs-bear-1990-two @@ -895,8 +895,8 @@
The <fixed-case>BICORD</fixed-case> System Combining Lexical Information from Bilingual Corpora and Machine Readable Dictionaries - JudithKlavans - EvelyneTzoukermann + JudithKlavans + EvelyneTzoukermann C90-3031 klavans-tzoukermann-1990-bicord @@ -908,7 +908,7 @@
When Something Is Missing: Ellipsis, Coordination and the Chart - AlbertoLavelli + AlbertoLavelli OlivieroStock C90-3033 lavelli-stock-1990-something @@ -921,7 +921,7 @@ Expressive Power of Grammatical Formalisms - AlexisManaster-Ramer + AlexisManaster-Ramer WlodekZadrozny C90-3035 manaster-ramer-zadrozny-1990-expressive @@ -950,7 +950,7 @@ Meaning Representation and Text Planning ChristineDefrise - SergeiNirenburg + SergeiNirenburg C90-3039 defrise-nirenburg-1990-meaning @@ -964,7 +964,7 @@ Predicting Co-Occurrence Restrictions by Using Semantic Classifications in the Lexicon Elena V.Paducheva - Ekaterina V.Rakhilina + Ekaterina V.Rakhilina C90-3041 paducheva-rakhilina-1990-predicting @@ -976,20 +976,20 @@ Automatic translation of support verb constructions - MorrisSalkoff + MorrisSalkoff C90-3043 salkoff-1990-automatic Toward Memory-based Translation - SatoshiSato - MakotoNagao + SatoshiSato + MakotoNagao C90-3044 sato-nagao-1990-toward Synchronous <fixed-case>T</fixed-case>ree-<fixed-case>A</fixed-case>djoining <fixed-case>G</fixed-case>rammars - Stuart M.Shieber + Stuart M.Shieber YvesSchabes C90-3045 shieber-schabes-1990-synchronous @@ -1008,16 +1008,16 @@ Machine Translation without a source text - Harold L.Somers - Jun-ichiTsujii + Harold L.Somers + Jun-ichiTsujii DannyJones C90-3048 somers-etal-1990-machine A Finite-State Morphological Processor for <fixed-case>S</fixed-case>panish - EvelyneTzoukermann - Mark Y.Liberman + EvelyneTzoukermann + Mark Y.Liberman C90-3049 tzoukermann-liberman-1990-finite @@ -1029,14 +1029,14 @@
Incremental Parsing and Reason Maintenance - MatsWiren + MatsWiren C90-3051 wiren-1990-incremental Typed Unification Grammars - Martin C.Emele - RemiZajac + Martin C.Emele + RemiZajac C90-3052 emele-zajac-1990-typed @@ -1048,7 +1048,7 @@
The Self-Extending Lexicon: Off-Line and On-Line Defaulting of Lexical Information in the <fixed-case>METAL</fixed-case> Machine Translation System - GeertAdriaens + GeertAdriaens MaartenLemmons C90-3054 adriaens-lemmons-1990-self @@ -1059,7 +1059,7 @@ TerumasaEhara NoriyoshiUratani HidekiTanaka - NaotoKato + NaotoKato SumioNakase NorikazuAruga TakeoMatsuda @@ -1069,20 +1069,20 @@ Syntactic Description of Free Word Order Languages TaniaAvgustinova - KarelOliva + KarelOliva C90-3056 avgustinova-oliva-1990-syntactic <fixed-case>C</fixed-case>zech-to-<fixed-case>R</fixed-case>ussian Transducing Dictionary AllaBemova - VladislavKubon + VladislavKubon C90-3057 bemova-kubon-1990-czech A Large <fixed-case>R</fixed-case>ussian Morphological Vocabulary for Ibm Compatibles and Methods of Its Compression - Igor A.Bolshakov + Igor A.Bolshakov C90-3058 bolshakov-1990-large @@ -1129,7 +1129,7 @@ A message processing system with object-centered semantics - Jean-FrancoisDelannoy + Jean-FrancoisDelannoy C90-3064 delannoy-1990-message @@ -1159,7 +1159,7 @@
An Integrated System for Morphological Analysis of the <fixed-case>S</fixed-case>lovene Language - TomazErjavec + TomazErjavec PeterTancig C90-3069 erjavec-tancig-1990-integrated @@ -1172,14 +1172,14 @@ Information Extraction and Semantic Constraints - RalphGrishman + RalphGrishman JohnSterling C90-3071 grishman-sterling-1990-information Spelling-checking for Highly Inflective Languages - JanHajic + JanHajic JanusDrozd C90-3072 hajic-drozd-1990-spelling @@ -1216,8 +1216,8 @@ The <fixed-case>GE</fixed-case> <fixed-case>NLT</fixed-case>oolset: A Software Foundation for Intelligent Text Processing - Paul S.Jacobs - Lisa F.Rau + Paul S.Jacobs + Lisa F.Rau C90-3077 jacobs-rau-1990-ge @@ -1229,7 +1229,7 @@
Intelligent Handling of Weather Forecasts - StephanKerpedjiev + StephanKerpedjiev VeskaNoncheva C90-3079 kerpedjiev-noncheva-1990-intelligent @@ -1262,19 +1262,19 @@ A <fixed-case>PARLOG</fixed-case> Implementation of Government-Binding Theory - Robert J.Kuhns + Robert J.Kuhns C90-3084 kuhns-1990-parlog Automatic Indexing and Government-Binding Theory - Robert J.Kuhns + Robert J.Kuhns C90-3085 kuhns-1990-automatic “The first million is hardest to get”: Building a Large Tagged Corpus as Automatically as Possible - GunnelKallgren + GunnelKallgren C90-3086 kallgren-1990-first @@ -1295,10 +1295,10 @@
Applying Natural Language Processing Techniques to Augmentative Communication Systems - KathleenMcCoy + KathleenMcCoy PatrickDemasco - MarkJones - ChristopherPennington + MarkJones + ChristopherPennington CharlesRowe C90-3089 mccoy-etal-1990-applying @@ -1344,7 +1344,7 @@ Simple Parser for an Hpsg-Style Grammar Implemented in <fixed-case>P</fixed-case>rolog - KarelOliva + KarelOliva C90-3096 oliva-1990-simple @@ -1376,7 +1376,7 @@
Pilot Implementation of a Bilingual Knowledge Bank - VictorSadler + VictorSadler RonaldVendelmans C90-3101 sadler-vendelmans-1990-pilot @@ -1384,16 +1384,16 @@ A Mechanism for ellipsis resolution in dialogued systems A.Diaz de Ilarraza Sanchez - H.Rodriguez Hontoria + H.Rodriguez Hontoria F.Maillo Verdejo C90-3102 diaz-de-ilarraza-sanchez-etal-1990-mechanism <fixed-case>MORPHO</fixed-case>-<fixed-case>ASSISTANT</fixed-case>: The Proper Treatment of Morphological Knowledge - KirilSimov + KirilSimov GaliaAngelova - ElenaPaskaleva + ElenaPaskaleva C90-3103 simov-etal-1990-morpho diff --git a/data/xml/C92.xml b/data/xml/C92.xml index 8f412da235..6d933358ee 100644 --- a/data/xml/C92.xml +++ b/data/xml/C92.xml @@ -29,13 +29,13 @@ The scientific programme of <fixed-case>COLING</fixed-case>-92 - AntonioZampolli + AntonioZampolli C92-1004 zampolli-1992-scientific About these proceedings - ChristianBoitet + ChristianBoitet C92-1005 boitet-1992-proceedings @@ -57,7 +57,7 @@
Feature Structure Based Semantic Head Driven Generation - Gen-ichiroKikui + Gen-ichiroKikui C92-1009 kikui-1992-feature @@ -87,13 +87,13 @@
Synchronous <fixed-case>TAG</fixed-case>s and <fixed-case>F</fixed-case>rench Pronominal Clitics - AnneAbeille + AnneAbeille C92-1013 abeille-1992-synchronous A High-level Morphological Description Language Exploiting Inflectional Paradigms - PeterAnick + PeterAnick SuzanneArtemieff C92-1014 anick-artemieff-1992-high @@ -112,7 +112,7 @@ Trace & Unification Grammar - Hans UlrichBlock + Hans UlrichBlock StefanieSchachtl C92-1017 block-schachtl-1992-trace @@ -125,7 +125,7 @@ Word Identification for <fixed-case>M</fixed-case>andarin <fixed-case>C</fixed-case>hinese Sentences - Keh-JiannChen + Keh-JiannChen Shing-HuanLiu C92-1019 chen-liu-1992-word @@ -144,14 +144,14 @@ Chart Parsing of Robust Grammars - SebastianGoeser + SebastianGoeser C92-1022 goeser-1992-chart Stock of Shared Knowledge - A Tool for Solving Pronominal Anaphora - EvaHajicova - VladislavKubon + EvaHajicova + VladislavKubon PetrKubon C92-1023 hajicova-etal-1992-stock @@ -165,7 +165,7 @@ Two-Level Morphology with Composition LauriKarttunen - Ronald M.Kaplan + Ronald M.Kaplan AnnieZaenen C92-1025 karttunen-etal-1992-two @@ -193,7 +193,7 @@ Dynamic Programming Method for Analyzing Conjunctive Structures in <fixed-case>J</fixed-case>apanese SadaoKurohashi - MakotoNagao + MakotoNagao C92-1029 kurohashi-nagao-1992-dynamic @@ -205,7 +205,7 @@ The Proper Treatment of Word Order in Hpsg - KarelOliva + KarelOliva C92-1031 oliva-1992-proper @@ -217,13 +217,13 @@ <fixed-case>TTP</fixed-case>: A Fast and Robust Parser for Natural Language - TomekStrzalkowski + TomekStrzalkowski C92-1033 strzalkowski-1992-ttp Structure Sharing in <fixed-case>L</fixed-case>exicalized <fixed-case>T</fixed-case>ree-<fixed-case>A</fixed-case>djoining <fixed-case>G</fixed-case>rammars - K.Vijay-Shanker + K.Vijay-Shanker YvesSchabes C92-1034 vijay-shanker-schabes-1992-structure @@ -262,7 +262,7 @@ Conceptual Structures and <fixed-case>CCG</fixed-case>: Linking Theory and Incorporated Argument Adjuncts - MichaelWhite + MichaelWhite C92-1040 white-1992-conceptual @@ -287,14 +287,14 @@ An Acquisition Model for both Choosing and Resolving Anaphora in Conjoined <fixed-case>M</fixed-case>andarin <fixed-case>C</fixed-case>hinese Sentences Benjamin L.Chen - Von-WunSoo + Von-WunSoo C92-1044 chen-soo-1992-acquisition Aspect-Switching and Subordination: the Role of It-Clefts in Discourse JudyDelin - JonOberlander + JonOberlander C92-1045 delin-oberlander-1992-aspect @@ -320,7 +320,7 @@ Using Linguistic, World, and Contextual Knowledge in a Plan Recognition Model of Dialogue LynnLambert - SandraCarberry + SandraCarberry C92-1049 lambert-carberry-1992-using @@ -338,8 +338,8 @@
Temporal Structure of Discourse - Irene PimentaRodrigues - Jose Gabriel P.Lopes + Irene PimentaRodrigues + Jose Gabriel P.Lopes C92-1052 rodrigues-lopes-1992-temporal @@ -351,13 +351,13 @@
Redundancy in Collaborative Dialogue - Marilyn A.Walker + Marilyn A.Walker C92-1054 walker-1992-redundancy Syntactic Ambiguity Resolution Using A Discrimination and Robustness Oriented Adaptive Learning Algorithm - Tung-HuiChiang + Tung-HuiChiang Yi-ChungLin Keh-YihSu C92-1055 @@ -365,9 +365,9 @@ Lexical Disambiguation using Simulated Annealing - JimCowie + JimCowie JoeGuthrie - LouiseGuthrie + LouiseGuthrie C92-1056 cowie-etal-1992-lexical-disambiguation @@ -407,7 +407,7 @@ A Chart-based Method of <fixed-case>ID</fixed-case>/<fixed-case>LP</fixed-case> Parsing with Generalized Discrimination Networks SurapantMeknavin - ManabuOkumura + ManabuOkumura HozumiTanaka C92-1062 meknavin-etal-1992-chart @@ -464,7 +464,7 @@ A Linear Least Squares Fit Mapping Method for Information Retrieval From Natural Language Texts YimingYang - Christopher G.Chute + Christopher G.Chute C92-2069 yang-chute-1992-linear @@ -479,7 +479,7 @@ AlainBerrendonner MouniaFredj FlavioOquendo - JacquesRouault + JacquesRouault C92-2071 berrendonner-etal-1992-un @@ -506,7 +506,7 @@
<fixed-case>T</fixed-case>alisman: Un Systeme Multi-Agents Gouverne Par Des Lois Linguistiques Pour Le Traitement De La Langue Naturelle - Marie-HeleneStefanini + Marie-HeleneStefanini AlainBerrendonner GenevieveLallich FlavioOquendo @@ -515,7 +515,7 @@ Disjunctive Feature Structures as Hypergraphs - JeanVeronis + JeanVeronis C92-2076 veronis-1992-disjunctive @@ -525,7 +525,7 @@ L.Kogan W.Kwitakowski R.Minvaleev - R.Piotrowski + R.Piotrowski V.Shumovsky E.Tioun Yu.Tovmach @@ -549,28 +549,28 @@
Translation Ambiguity Resolution Based on Text Corpora of Source and Target Languages - ShinichiDoi + ShinichiDoi KazunoriMuraki C92-2080 doi-muraki-1992-translation The Automatic Creation of Lexical Entries for a Multilingual <fixed-case>MT</fixed-case> System - DavidFarwell - LouiseGuthrie - YorickWilks + DavidFarwell + LouiseGuthrie + YorickWilks C92-2081 farwell-etal-1992-automatic Automatic Acquisition of Hyponyms from Large Text Corpora - Marti A.Hearst + Marti A.Hearst C92-2082 hearst-1992-automatic Structural Patterns vs. String Patterns for Extracting Semantic Information from Dictionaries - SimonettaMontemagni + SimonettaMontemagni LucyVanderwende C92-2083 montemagni-vanderwende-1992-structural @@ -578,8 +578,8 @@ Derivation of Underlying Valency Frames From a Learner’s Dictionary AlexandrRosen - EvaHajicova - JanHajic + EvaHajicova + JanHajic C92-2084 rosen-etal-1992-derivation @@ -587,8 +587,8 @@ Linguistic Knowledge Generator SatoshiSekine SofiaAnaniadou - Jeremy J.Carroll - Jun’ichiTsujii + Jeremy J.Carroll + Jun’ichiTsujii C92-2085 sekine-etal-1992-linguistic @@ -605,7 +605,7 @@ Logical Form of Hierarchical Relation on Verbs and Extracting it from Definition Sentences in a <fixed-case>J</fixed-case>apanese Dictionary YoichiTomiura TeigoNakamura - ToruHitaka + ToruHitaka ShoYoshida C92-2087 tomiura-etal-1992-logical @@ -613,21 +613,21 @@ Lexical Knowledge Acquisition from Bilingual Corpora TakehitoUtsuro - YujiMatsumoto - MakotoNagao + YujiMatsumoto + MakotoNagao C92-2088 utsuro-etal-1992-lexical A Feature-Based Model for Lexical Databases - JeanVeronis - NancyIde + JeanVeronis + NancyIde C92-2089 veronis-ide-1992-feature From Cogram to Alcogram: Toward a Controlled <fixed-case>E</fixed-case>nglish Grammar Checker - GeertAdriaens + GeertAdriaens DirkSchreurs C92-2090 adriaens-schreurs-1992-cogram @@ -652,14 +652,14 @@ Parameterization of the Interlingua in Machine Translation - BonnieDorr + BonnieDorr C92-2094 dorr-1992-parameterization Isolating Cross-linguistic Parsing Complexity with a Principles-and-Parameters Parser: A Case Study of <fixed-case>J</fixed-case>apanese and <fixed-case>E</fixed-case>nglish SandiwayFong - Robert C.Berwick + Robert C.Berwick C92-2095 fong-berwick-1992-isolating @@ -678,20 +678,20 @@
Aspect - A Problem for <fixed-case>MT</fixed-case> - BarbaraGawronska + BarbaraGawronska C92-2098 gawronska-1992-aspect Acquisition of Selectional Patterns - RalphGrishman + RalphGrishman JohnSterling C92-2099 grishman-sterling-1992-acquisition A Three-level Revision Model for Improving <fixed-case>J</fixed-case>apanese Bad-styled Expressions - YoshihikoHayashi + YoshihikoHayashi C92-2100 hayashi-1992-three @@ -707,7 +707,7 @@ Interaction between Structural Changes in Machine Translation SatoshiKinoshita JohnPhillips - Jun-ichiTsujii + Jun-ichiTsujii C92-2102 kinoshita-etal-1992-interaction-structural
@@ -727,8 +727,8 @@
Self-Monitoring with Reversible Grammars - GunterNeumann - Gertjanvan Noord + GunterNeumann + Gertjanvan Noord C92-2105 neumann-van-noord-1992-self @@ -748,7 +748,7 @@
Preventing False Temporal Implicatures: Interactive Defaults for Text Generation - JonOberlander + JonOberlander AlexLascarides C92-2108 oberlander-lascarides-1992-preventing @@ -771,7 +771,7 @@ Explanatory Text Planning in Logic Based Systems Clarisse Sieckeniusde Souza - Maria das GracasVolpe + Maria das GracasVolpe C92-2111 de-souza-volpe-1992-explanatory @@ -792,8 +792,8 @@ Lexical choice in context: generating procedural texts - AgnesTutin - RichardKittredge + AgnesTutin + RichardKittredge C92-2114 tutin-kittredge-1992-lexical @@ -838,13 +838,13 @@ Semantic Network Array Processor as a Massively Parallel Computing Platform for High Performance and Large-Scale Natural Language Processing HiroakiKitano - DanMoldovan + DanMoldovan C92-2121 kitano-moldovan-1992-semantic A Case Study of Natural Language Customisation: The Practical Effects of World Knowledge - Marilyn A.Walker + Marilyn A.Walker Andrew L.Nelson PhilStenton C92-2122 @@ -852,7 +852,7 @@ Towards Computer-Aided Linguistic Engineering - RemiZajac + RemiZajac C92-2123 zajac-1992-towards @@ -898,7 +898,7 @@
The Ips System - EricWehrli + EricWehrli C92-3129 wehrli-1992-ips @@ -911,21 +911,21 @@
Causal ambiguity in Natural Language: conceptual representation of ‘parce que/because’ and ‘puisque/since’ - AdelineNazarenko-Perrin + AdelineNazarenko-Perrin C92-3131 nazarenko-perrin-1992-causal Surface and Deep Cases - JarmilaPanevová - HanaSkoumalova + JarmilaPanevová + HanaSkoumalova C92-3132 panevova-skoumalova-1992-surface An Integrated Syntactic and Semantic System for Natural Language Understanding - FrederiqueSegond - KarenJensen + FrederiqueSegond + KarenJensen C92-3133 segond-jensen-1992-integrated @@ -950,25 +950,25 @@
Attitude Emergence - An Effective Interpretation Scheme for Persuasive Discourse - Horng-Jyh P.Wu - Steven L.Lytinen + Horng-Jyh P.Wu + Steven L.Lytinen C92-3137 wu-lytinen-1992-attitude The Nondirectional Representation of Systemic Functional Grammars and Semantics as Typed Feature Structures - John A.Bateman - MartinEmele + John A.Bateman + MartinEmele StefanMomma C92-3138 bateman-etal-1992-nondirectional A Statistical Approach to Machine Aided Translation of Terminology <fixed-case>B</fixed-case>anks - Jyun-ShengChang + Jyun-ShengChang AndrewChang Tsuey-FenLin - Sur-JinKer + Sur-JinKer C92-3139 chang-etal-1992-statistical @@ -998,7 +998,7 @@ Coupling an Automatic Dictation System With a Grammar Checker Jean-PierreChanod - MarcEl-Beze + MarcEl-Beze SylvieGuillemin-Lanne C92-3143 chanod-etal-1992-coupling @@ -1037,7 +1037,7 @@ Multilinguisation d’un editeur de documents structures. Application a un dictionnaire trilingue Huy KhanhPhan - ChristianBoitet + ChristianBoitet C92-3148 phan-boitet-1992-multilinguisation @@ -1083,7 +1083,7 @@ <fixed-case>JDII</fixed-case>: Parsing <fixed-case>I</fixed-case>talian with a Robust Constraint Grammar AndreaBolioli - LucaDini + LucaDini GiovanniMalnati C92-3155 bolioli-etal-1992-jdii @@ -1092,7 +1092,7 @@ Parsing and Case Analysis in <fixed-case>TANKA</fixed-case> TerryCopeck SylvainDelisle - StanSzpakowicz + StanSzpakowicz C92-3156 copeck-etal-1992-parsing @@ -1115,7 +1115,7 @@ Generation of Informative Texts with Style - Stephan M.Kerpedjiev + Stephan M.Kerpedjiev C92-3159 kerpedjiev-1992-generation @@ -1138,27 +1138,27 @@
A Knowledge-based Machine-aided System for <fixed-case>C</fixed-case>hinese Text Abstraction - Benjamin K.Tsou - Hing-cheungHo - Tom Bong-yeungLai - Caesar SuenLun - Hing-lungLin + Benjamin K.Tsou + Hing-cheungHo + Tom Bong-yeungLai + Caesar SuenLun + Hing-lungLin C92-3162 tsou-etal-1992-knowledge Interaction Between Lexicon and Image: Linguistic Specifications of Animation MaryvonneAbraham - Jean-PierreDesclés + Jean-PierreDesclés C92-3163 abraham-descles-1992-interaction A Spoken Language Translation System: <fixed-case>SL-TRANS</fixed-case>2 - TsuyoshiMorimoto + TsuyoshiMorimoto MasamiSuzuki ToshiyukiTakezawa - Gen’ichiroKikui + Gen’ichiroKikui MasaakiNagata MutsukoTomokiyo C92-3164 @@ -1186,7 +1186,7 @@ The <fixed-case>KANT</fixed-case> System: Fast, Accurate, High-Quality Translation in Practical Domains - Eric H.Nyberg III + Eric H.Nyberg III TerukoMitamura C92-3168 nyberg-iii-mitamura-1992-kant @@ -1209,7 +1209,7 @@ The Assignment of Grammatical Relations in Natural Language Processing - LeonardoLesmo + LeonardoLesmo VincenzoLombardo C92-4170 lesmo-lombardo-1992-assignment @@ -1228,8 +1228,8 @@ Tokenization as the Initial Phase in <fixed-case>NLP</fixed-case> - Jonathan J.Webster - ChunyuKit + Jonathan J.Webster + ChunyuKit C92-4173 webster-kit-1992-tokenization @@ -1242,7 +1242,7 @@ Embedding <fixed-case>DRT</fixed-case> in a Situation Theoretic Framework - Alan W.Black + Alan W.Black C92-4175 black-1992-embedding @@ -1254,8 +1254,8 @@
Degrees of Stativity: The Lexical Representation of Verb Aspect - Judith L.Klavans - MartinChodorow + Judith L.Klavans + MartinChodorow C92-4177 klavans-chodorow-1992-degrees @@ -1267,20 +1267,20 @@
An Alternative to Deep Case for Representing Relational Information - NigelWard + NigelWard C92-4179 ward-1992-alternative Preferred Argument Structure for Discourse Understanding - Ka-WaiChui + Ka-WaiChui C92-4180 chui-1992-preferred On the Interpretation of Natural Language Instructions BarbaraDi Eugenio - MichaelWhite + MichaelWhite C92-4181 di-eugenio-white-1992-interpretation @@ -1306,14 +1306,14 @@
Unifying Disjunctive Feature Structures - LenaStromback + LenaStromback C92-4185 stromback-1992-unifying Ebl²: An Approach to Automatic Lexical Acquisition LarsAsker - BjornGamback + BjornGamback ChristerSamuelsson C92-4186 asker-etal-1992-ebl2 @@ -1332,22 +1332,22 @@ Converting Large On-Line Valency Dictionaries for <fixed-case>NLP</fixed-case> Applications: From Proton Descriptions to Metal Frames - GeertAdriaens + GeertAdriaens GertDe Braekeleer C92-4188 adriaens-de-braekeleer-1992-converting Genus Disambiguation: A Study in Weighted Preference - RebeccaBruce - LouiseGuthrie + RebeccaBruce + LouiseGuthrie C92-4189 bruce-guthrie-1992-genus Can Computers Handle Adverbs? Sumali Pin-NgernConlon - MarthaEvens + MarthaEvens C92-4190 conlon-evens-1992-computers @@ -1379,7 +1379,7 @@ A <fixed-case>C</fixed-case>hinese Corpus for Linguistic Research Chu-RenHuang - Keh-jiannChen + Keh-jiannChen C92-4194 huang-chen-1992-chinese @@ -1407,7 +1407,7 @@
A Solution for the Problem of Interactive Disambiguation - HerveBlanchon + HerveBlanchon C92-4198 blanchon-1992-solution @@ -1421,7 +1421,7 @@
Knowledge Extraction From Texts by Sintesi - FabioCiravegna + FabioCiravegna PaoloCampia AlbertoColognese C92-4200 @@ -1438,33 +1438,33 @@ Hierarchical Lexical Structure and Interpretive Mapping in Machine Translation TerukoMitamura - Eric H.Nyberg III + Eric H.Nyberg III C92-4202 mitamura-nyberg-iii-1992-hierarchical <fixed-case>CTM</fixed-case>: An Example-Based Translation Aid System - SatoshiSato + SatoshiSato C92-4203 sato-1992-ctm Applying and Improving the Restriction Grammar Approach for <fixed-case>D</fixed-case>utch Patient Discharge Summaries - PeterSpyns - GeertAdriaens + PeterSpyns + GeertAdriaens C92-4204 spyns-adriaens-1992-applying Event Relations at the Phonetics/Phonology Interface - JulieCarson-Berndsen + JulieCarson-Berndsen DafyddGibbon C92-4205 carson-berndsen-gibbon-1992-event Multimodal Database Query - Nicholas J.Haddock + Nicholas J.Haddock C92-4206 haddock-1992-multimodal @@ -1473,7 +1473,7 @@ AtsushiYamada TadashiYamamoto HisashiIkeda - ToyoakiNishida + ToyoakiNishida ShujiDoshita C92-4207 yamada-etal-1992-reconstructing @@ -1496,7 +1496,7 @@ Semantic dictionary viewed as a lexical database Elena V.Paducheva - Ekaterina V.Rakhilina + Ekaterina V.Rakhilina Marina V.Filipenko C92-4210 paducheva-etal-1992-semantic @@ -1504,7 +1504,7 @@ Knowledge Acquisition and <fixed-case>C</fixed-case>hinese Parsing Based on Corpus YuanChunfa - HuangChangning + ChangningHuang PanShimei C92-4211 yuan-etal-1992-knowledge @@ -1524,7 +1524,7 @@ Marking and Tagging a Computerized Corpus ErikssonGunnar - KallgrenGunnel + GunnelKallgren C92-4214 eriksson-kallgren-1992-marking diff --git a/data/xml/C94.xml b/data/xml/C94.xml index aa4712f396..3e9917cb61 100644 --- a/data/xml/C94.xml +++ b/data/xml/C94.xml @@ -16,7 +16,7 @@ Improvement in Customizability Using Translation Templates SatoshiKinoshita AkiraKumano - HidekiHirakawa + HidekiHirakawa C94-1001 kinoshita-etal-1994-improvement @@ -36,7 +36,7 @@ Interpreting Compounds for Machine Translation - BarbaraGawronska + BarbaraGawronska AndersNordner ChristerJohansson CarolineWillners @@ -47,7 +47,7 @@ Towards Machine Translation Using Contextual Information TimCornish KimikazuFujita - RyochiSugimura + RyochiSugimura C94-1005 cornish-etal-1994-towards @@ -80,7 +80,7 @@ Building an <fixed-case>MT</fixed-case> Dictionary From Parallel Texts Based on Linguistic and Statistical Information AkiraKumano - HidekiHirakawa + HidekiHirakawa C94-1009 kumano-hirakawa-1994-building @@ -102,27 +102,27 @@ Coping With Ambiguity in a Large-Scale Machine Translation System - Kathryn L.Baker - Alexander M.Franz - Pamela W.Jordan + Kathryn L.Baker + Alexander M.Franz + Pamela W.Jordan TerukoMitamura - Eric H.Nyberg + Eric H.Nyberg C94-1012 baker-etal-1994-coping Evaluation Metrics for Knowledge-Based Machine Translation - Eric H.Nyberg, 3rd + Eric H.Nyberg, 3rd TerukoMitamura - Jaime G.Carbonell + Jaime G.Carbonell C94-1013 nyberg-3rd-etal-1994-evaluation A Matching Technique in Example-Based Machine Translation - LambrosCranias - HarrisPapageorgiou - SteliosPiperdis + LambrosCranias + HarrisPapageorgiou + SteliosPiperdis C94-1014 cranias-etal-1994-matching @@ -136,40 +136,40 @@ The <fixed-case>J</fixed-case>a<fixed-case>RAP</fixed-case> Experimental System of <fixed-case>J</fixed-case>apanese-<fixed-case>R</fixed-case>ussian Automatic Translation Larisa S.Modina - Zoya M.Shalyapina + Zoya M.Shalyapina C94-1016 modina-shalyapina-1994-jarap Perspectives of <fixed-case>DBMT</fixed-case> for monolingual authors on the basis of <fixed-case>LIDIA</fixed-case>-1, an implemented mock-up - HerveBlanchon + HerveBlanchon C94-1017 blanchon-1994-perspectives Modals as a Problem for <fixed-case>MT</fixed-case> BengtSigurd - BarbaraGawronska + BarbaraGawronska C94-1018 sigurd-gawronska-1994-modals Two Types of Adaptive <fixed-case>MT</fixed-case> Environments - SergeiNirenburg - RobertFrederking - DavidFarwell - YorickWilks + SergeiNirenburg + RobertFrederking + DavidFarwell + YorickWilks C94-1019 nirenburg-etal-1994-two An <fixed-case>E</fixed-case>nglish-to-<fixed-case>K</fixed-case>orean Machine Translator: <fixed-case>MATES/EK</fixed-case> - Key-SunChoi + Key-SunChoi SeungmiLee HiongunKim - Deok-bongKim - CheoljungKweon - GilchangKim + Deok-bongKim + CheoljungKweon + GilchangKim C94-1020 choi-etal-1994-english @@ -181,7 +181,7 @@
Morphology with a Null-Interface - HaraldTrost + HaraldTrost JohannesMatiasek C94-1022 trost-matiasek-1994-morphology @@ -189,27 +189,27 @@ <fixed-case>AUTOMATIC</fixed-case> <fixed-case>MODEL</fixed-case> <fixed-case>REFINEMENT</fixed-case> - with an application to tagging Yi-ChungLin - Tung-HuiChiang + Tung-HuiChiang Keh-YihSu C94-1023 lin-etal-1994-automatic Disambiguation of Super Parts of Speech (or Supertags): Almost Parsing - Aravind K.Joshi - B.Srinivas + Aravind K.Joshi + B.Srinivas C94-1024 joshi-srinivas-1994-disambiguation Probabilistic Tagging With Feature Structures - AndreKempe + AndreKempe C94-1025 kempe-1994-probabilistic A Part-of-Speech-Based Alignment Algorithm - Kuang-huaChen + Kuang-huaChen Hsin-HsiChen C94-1026 chen-chen-1994-part @@ -223,13 +223,13 @@ The Rumors System of <fixed-case>R</fixed-case>ussian Synthesis Max I.Kanovich - Zoyn M.Shalyapina + Zoyn M.Shalyapina C94-1028 kanovich-shalyapina-1994-rumors MULTI-TAPE TWO-LEVEL MORPHOLOGY: A Case Study in <fixed-case>S</fixed-case>emitic Non-linear Morphology - George AntonKiraz + George AntonKiraz C94-1029 kiraz-1994-multi @@ -270,14 +270,14 @@ Syllable-Based Model for the <fixed-case>K</fixed-case>orean Morphology Seung-ShikKang - Yung TaekKim + Yung TaekKim C94-1035 kang-kim-1994-syllable Segmenting a Sentence Into Morphemes Using Statistic Information Between Words ShilmNobesawa - JunyaTsutsumi + JunyaTsutsumi TomoakiNitta KotaroOno Sun DaJiang @@ -295,14 +295,14 @@ AN ARCHITECTURE FOR A UNIVERSAL LEXICON: A Case Study on Shared Syntactic Information in <fixed-case>J</fixed-case>apanese, <fixed-case>H</fixed-case>indi, <fixed-case>B</fixed-case>engali, <fixed-case>G</fixed-case>reek, and <fixed-case>E</fixed-case>nglish NaoyukiNomura - Douglas A.Jones - Robert C.Berwick + Douglas A.Jones + Robert C.Berwick C94-1038 nomura-etal-1994-architecture Adjuncts and the Processing of Lexical Rules - Gertjanvan Noord + Gertjanvan Noord GosseBouma C94-1039 van-noord-bouma-1994-adjuncts @@ -328,9 +328,9 @@ Comlex Syntax: Building a Computational Lexicon - RalphGrishman - CatherineMacleod - AdamMeyers + RalphGrishman + CatherineMacleod + AdamMeyers C94-1042 grishman-etal-1994-comlex @@ -342,7 +342,7 @@ lnterlinguai Lexical Organisation for Multilingual Lexical Databases in <fixed-case>NADIA</fixed-case> - GillesSerasset + GillesSerasset C94-1044 serasset-1994-lnterlinguai @@ -362,13 +362,13 @@ Logic Compression of Dictionaries for Multilingual Spelling Checkers - BoubakerMeddeb Hamrouni + BoubakerMeddeb Hamrouni C94-1047 meddeb-hamrouni-1994-logic Construction of a Bilingual Dictionary Intermediated by a Third Language - KumikoTanaka + KumikoTanaka KyojiUmemura C94-1048 tanaka-umemura-1994-construction @@ -396,9 +396,9 @@ <fixed-case>TGE</fixed-case>: Tlinks Generation Environment AliciaAgeno - FrancescRibas - GermanRigau - HoracioRodriguez + FrancescRibas + GermanRigau + HoracioRodriguez AnnaSamiotou C94-1052 ageno-etal-1994-tge @@ -418,7 +418,7 @@ Generating Multilingual Documents from a Knowledge Base The <fixed-case>TECHDOC</fixed-case> Project - DietmarRosner + DietmarRosner ManfredStede C94-1055 rosner-stede-1994-generating @@ -433,15 +433,15 @@ The Correct Place of Lexical Semantics in Interlingual <fixed-case>MT</fixed-case> - LoriLevin - SergeiNirenburg + LoriLevin + SergeiNirenburg C94-1057 levin-nirenburg-1994-correct Default Handling in Incremental Generation KarinHarbusch - Gen-ichiroKikui + Gen-ichiroKikui AnneKilger C94-1058 harbusch-etal-1994-default @@ -523,7 +523,7 @@ The “Whiteboard” Architecture: A Way to Integrate Heterogeneous Components of <fixed-case>NLP</fixed-case> Systems - ChristianBoitet + ChristianBoitet MarkSeligman C94-1070 boitet-seligman-1994-whiteboard @@ -540,12 +540,12 @@ RolfBackofen StephanBusemann Abdel KaderDiagne - Elizabeth A.Hinkelman - WalterKasper - BerndKiefer - Hans-UlrichKrieger + Elizabeth A.Hinkelman + WalterKasper + BerndKiefer + Hans-UlrichKrieger KlausNetter - GunterNeumann + GunterNeumann StephanOepen Stephen P.Spackman C94-1072 @@ -554,7 +554,7 @@ A Corpus-Based Learning Technique for Building A Self-Extensible Parser Rey-LongLiu - Von-WunSoo + Von-WunSoo C94-1073 liu-soo-1994-corpus @@ -568,20 +568,20 @@ A Modular Architecture for Constraint-Based Parsing - FrancoisBarthelemy + FrancoisBarthelemy FrancoisRouaix C94-1075 barthelemy-rouaix-1994-modular Minimal Change and Bounded Incremental Parsing - MatsWiren + MatsWiren C94-1076 wiren-1994-minimal Emergent Parsing and Generation with Generalized Chart - HasidaKoiti + KoitiHasida C94-1077 hasida-1994-emergent @@ -615,7 +615,7 @@ <fixed-case>LHIP</fixed-case>: Extended <fixed-case>DCG</fixed-case>s for Configurable Robust Parsing AfzalBallim - GrahamRussell + GrahamRussell C94-1082 ballim-russell-1994-lhip @@ -627,41 +627,41 @@ Towards Automatic Extraction of Monolingual and Bilingual Terminology - BeatriceDaille - EricGaussier + BeatriceDaille + EricGaussier Jean-MarcLange C94-1084 daille-etal-1994-towards <fixed-case>F</fixed-case>ax: An Alternative to <fixed-case>SGML</fixed-case> - Kenneth W.Church - William A.Gale + Kenneth W.Church + William A.Gale Jonathan I.Helfman - David D.Lewis + David D.Lewis C94-1085 church-etal-1994-fax Referring to World Objects With Text and Pictures - ElisabethAndre + ElisabethAndre ThomasRist C94-1086 andre-rist-1994-referring A Two-Level Morphological Analysis of <fixed-case>K</fixed-case>orean - Deok-BongKim - Sung-JinLee - Key-SunChoi - Gil-ChangKim + Deok-BongKim + Sung-JinLee + Key-SunChoi + Gil-ChangKim C94-1087 kim-etal-1994-two Character-based Collocation for <fixed-case>M</fixed-case>andarin <fixed-case>C</fixed-case>hinese Chu-RenHuang - Keh-jiannChen + Keh-jiannChen Yun-yanYang C94-1088 huang-etal-1994-character @@ -692,7 +692,7 @@ Annotating 200 Million Words: The Bank of <fixed-case>E</fixed-case>nglish Project - TimoJarvinen + TimoJarvinen C94-1092 jarvinen-1994-annotating @@ -700,32 +700,32 @@ Restructuring Tagged Corpora with Morpheme Adjustment Rules ToshihisaTashiro NoriyoshiUratani - TsuyoshiMorimoto + TsuyoshiMorimoto C94-1093 tashiro-etal-1994-restructuring Encoding standards for large text resources: The <fixed-case>T</fixed-case>ext <fixed-case>E</fixed-case>ncoding <fixed-case>I</fixed-case>nitiative - NancyIde + NancyIde C94-1094 ide-1994-encoding <fixed-case>INTEX</fixed-case>: A Corpus Processing System - Max D.Silberztein + Max D.Silberztein C94-1095 silberztein-1994-intex An <fixed-case>IBM</fixed-case>-<fixed-case>PC</fixed-case> Environment for <fixed-case>C</fixed-case>hinese Corpus Analysis - Robert Wing PongLuk + Robert Wing PongLuk C94-1096 luk-1994-ibm <fixed-case>MULTEXT</fixed-case>: Multilingual Text Tools and Corpora - NancyIde - JeanVeronis + NancyIde + JeanVeronis C94-1097 ide-veronis-1994-multext @@ -740,21 +740,21 @@ A Tool for Collecting Domain Dependent Sortal Constraints From Corpora FrancoisAndry - MarkGawron - JohnDowding - RobertMoore + MarkGawron + JohnDowding + RobertMoore C94-1099 andry-etal-1994-tool Building a Lexical Domain Map From Text Corpora - TomekStrzalkowski + TomekStrzalkowski C94-1100 strzalkowski-1994-building A New Method of N-gram Statistics for Large Number of n and Automatic Extraction of Words and Phrases from Large Text Data of <fixed-case>J</fixed-case>apanese - MakotoNagao + MakotoNagao ShinsukeMori C94-1101 nagao-mori-1994-new @@ -776,7 +776,7 @@ Syntactic Analysis of Natural Language Using Linguistic Rules and Corpus-Based Patterns PasiTapanainen - TimoJarvinen + TimoJarvinen C94-1104 tapanainen-jarvinen-1994-syntactic @@ -793,7 +793,7 @@ Word Sense Acquisition for Multilingual Text Interpretation - Paul S.Jacobs + Paul S.Jacobs C94-2105 jacobs-1994-word @@ -821,7 +821,7 @@ The Nature of Near-Synonymic Relations - ChrysanneDiMarco + ChrysanneDiMarco C94-2109 dimarco-1994-nature @@ -835,20 +835,20 @@
Building a <fixed-case>W</fixed-case>indows-Based Bilingual Functional Semantic Processor - Jonathan J.Webster + Jonathan J.Webster C94-2111 webster-1994-building On the Proper Role of Coercion in Semantic Typing - JamesPustejovsky - PierretteBouillon + JamesPustejovsky + PierretteBouillon C94-2112 pustejovsky-bouillon-1994-proper Word Sense Ambiguation: Clustering Related Senses - William B.Dolan + William B.Dolan C94-2113 dolan-1994-word @@ -874,7 +874,7 @@ An Empirical Study on the Generation of Zero Anaphors in <fixed-case>C</fixed-case>hinese Ching-LongYeh - ChrisMellish + ChrisMellish C94-2117 yeh-mellish-1994-empirical @@ -886,7 +886,7 @@
Generalizing Automatically Generated Selectional Patterns - RalphGrishman + RalphGrishman JohnSterling C94-2119 grishman-sterling-1994-generalizing @@ -900,7 +900,7 @@ Word Sense Disambiguation and Text Segmentation Based on Lexical Cohesion - ManabuOkumura + ManabuOkumura TakeoHonda C94-2121 okumura-honda-1994-word @@ -908,19 +908,19 @@ Automatic Recognition of Verbal Polysemy FumiyoFukumoto - Jun’ichiTsujii + Jun’ichiTsujii C94-2122 fukumoto-tsujii-1994-automatic An Experiment on Learning Appropriate Selectional Restrictions From a Parsed Corpus - Francesc RibasFramis + Francesc RibasFramis C94-2123 framis-1994-experiment A Discrete Model of Degree Concept in Natural Language - Shin-ichiroKamei + Shin-ichiroKamei KazunoriMuraki C94-2124 kamei-muraki-1994-discrete @@ -945,8 +945,8 @@ The Merged Upper Model: A Linguistic Ontology for <fixed-case>G</fixed-case>erman and <fixed-case>E</fixed-case>nglish - RenateHenschel - JohnBateman + RenateHenschel + JohnBateman C94-2128 henschel-bateman-1994-merged @@ -965,13 +965,13 @@ <fixed-case>HPSG</fixed-case> Lexicon Without Lexical Rules - KarelOliva + KarelOliva C94-2131 oliva-1994-hpsg A Lexicon of Distributed Noun Representations Constructed by Taxonomic Traversal - Richard F.E.Sutcliffe + Richard F.E.Sutcliffe DonieO’Sullivan FergusMeharg C94-2132 @@ -988,13 +988,13 @@ Hypothesis Selection in Grammar Acquisition MasakiKiyono - Jun’ichiTsujii + Jun’ichiTsujii C94-2134 kiyono-tsujii-1994-hypothesis Achieving Flexibility in Unification Formalisms - LenaStromback + LenaStromback C94-2135 stromback-1994-achieving @@ -1003,7 +1003,7 @@ KazunoriMuraki SusumuAkamine KenjiSatoh - SinichiAndo + SinichiAndo C94-2136 muraki-etal-1994-twp @@ -1016,7 +1016,7 @@ A Reestimation Algorithm for Probabilistic ttecursive Transition Network Young S.Han - Key-SunChoi + Key-SunChoi C94-2138 han-choi-1994-reestimation @@ -1054,36 +1054,36 @@
<fixed-case>TDL</fixed-case>-A Type Description Language for Constraint-Based Grammars - Hans-UlrichKrieger - UlrichSchafer + Hans-UlrichKrieger + UlrichSchafer C94-2144 krieger-schafer-1994-tdl On the Portability of Complex Constraint-Based Grammars - C.J.Rupp + C.J.Rupp RodJohnson C94-2145 rupp-johnson-1994-portability A Grammar Based Approach to a Grammar Checking of Free Word Order Languages - VladislavKubon - MartinPlatek + VladislavKubon + MartinPlatek C94-2146 kubon-platek-1994-grammar Table-driven Neural Syntactic Analysis of Spoken <fixed-case>K</fixed-case>orean WonllLee - GeunbaeLee + GeunbaeLee Jong-HyeokLee C94-2147 lee-etal-1994-table Universal Guides and Finiteness and Symmetry of Grammar Processing Algorithms - MiroslavMartinović + MiroslavMartinović C94-2148 martinovic-1994-universal @@ -1091,8 +1091,8 @@ <fixed-case>XTAG</fixed-case> System - A Wide Coverage Grammar for <fixed-case>E</fixed-case>nglish ChristyDoran DaniaEgedi - Beth AnnHockey - B.Srinivas + Beth AnnHockey + B.Srinivas MartinZaidel C94-2149 doran-etal-1994-xtag @@ -1113,14 +1113,14 @@ Hypothesis Scoring over Theta Grids Information in Parsing <fixed-case>C</fixed-case>hinese Sentences with Serial Verb Constructions Koong H. C.Lin - Von-WunSoo + Von-WunSoo C94-2152 lin-soo-1994-hypothesis An Efficient Syntactic Tagging Tool for Corpora MingZhou - ChangningHuang + ChangningHuang C94-2153 zhou-huang-1994-efficient @@ -1140,20 +1140,20 @@
Machine-Readable Dictionaries in Text-to-Speech Systems - Judith L.Klavans - EvelyneTzoukermann + Judith L.Klavans + EvelyneTzoukermann C94-2156 klavans-tzoukermann-1994-machine Issues in Text-to-Speech for <fixed-case>F</fixed-case>rench - EvelyneTzoukermann + EvelyneTzoukermann C94-2157 tzoukermann-1994-issues <fixed-case>CHATR</fixed-case>: a generic speech synthesis system - Alan W.Black + Alan W.Black PaulTaylor C94-2158 black-taylor-1994-chatr @@ -1188,7 +1188,7 @@ Phonological Derivation in <fixed-case>O</fixed-case>ptimality <fixed-case>T</fixed-case>heory - T. MarkEllison + T. MarkEllison C94-2163 ellison-1994-phonological @@ -1209,7 +1209,7 @@ A <fixed-case>D</fixed-case>utch to <fixed-case>SQL</fixed-case> database interface using Generalized Quantifier Theory DirkSpeelman - GeertAdriaens + GeertAdriaens C94-2166 speelman-adriaens-1994-dutch @@ -1222,7 +1222,7 @@ Knowledge Extraction from Texts: a method for extracting predicate-argument structures from texts FlorencePugeault - PatrickSaint-Dizier + PatrickSaint-Dizier Marie-GaelleMonteil C94-2168 pugeault-etal-1994-knowledge @@ -1232,7 +1232,7 @@ TakehitoUtsuro KiyotakaUchimoto MitsutakaMatsumoto - MakotoNagao + MakotoNagao C94-2169 utsuro-etal-1994-thesaurus @@ -1252,7 +1252,7 @@
Document Classification by Machine:Theory and Practice - LouiseGuthrie + LouiseGuthrie ElbertWalker C94-2172 guthrie-walker-1994-document @@ -1268,7 +1268,7 @@ Recognizing Text Genres With Simple Metrics Using Discriminant Analysis JussiKarlgren - DouglassCutting + DouglassCutting C94-2174 karlgren-cutting-1994-recognizing @@ -1277,8 +1277,8 @@ TakehitoUtsuro HiroshiIkeda MasayaYamane - YujiMatsumoto - MakotoNagao + YujiMatsumoto + MakotoNagao C94-2175 utsuro-etal-1994-bilingual @@ -1299,7 +1299,7 @@ K-vec: A New Approach for Aligning Parallel Texts PascaleFung - Kenneth WardChurch + Kenneth WardChurch C94-2178 fung-church-1994-k @@ -1324,27 +1324,27 @@
Collaboration on Reference to Objects That Are Not Mutually Known - Philip G.Edmonds + Philip G.Edmonds C94-2182 edmonds-1994-collaboration Automatic Detection of Discourse Structure by Checking Surface Information in Sentences SadaoKurohashi - MakotoNagao + MakotoNagao C94-2183 kurohashi-nagao-1994-automatic Extending <fixed-case>DRT</fixed-case> With a Focusing Mechanism for Pronominal Anaphora and Ellipsis Resolution JoseAbracos - Jose GabrielLopes + Jose GabrielLopes C94-2184 abracos-lopes-1994-extending Reference Resolution Using Semantic Patterns in <fixed-case>J</fixed-case>apanese Newspaper Articles - TakahiroWakao + TakahiroWakao C94-2185 wakao-1994-reference @@ -1384,14 +1384,14 @@
An Integrated Model for Anaphora Resolution - RuslanMitkov + RuslanMitkov C94-2191 mitkov-1994-integrated Breaking Down Rhetorical Relations for the purpose of Analysing Discourse Structures - Jun’ichiFukumoto - Jun’ichiTsujii + Jun’ichiFukumoto + Jun’ichiTsujii C94-2192 fukumoto-tsujii-1994-breaking @@ -1403,7 +1403,7 @@
Communicating With Multiple Agents - Elizabeth A.Hinkelman + Elizabeth A.Hinkelman Stephen P.Spackman C94-2194 hinkelman-spackman-1994-communicating @@ -1417,7 +1417,7 @@ Discourse and Deliberation: Testing a Collaborative Strategy - Marilyn A.Walker + Marilyn A.Walker C94-2196 walker-1994-discourse @@ -1442,9 +1442,9 @@
The Evolution of Machine-Tractable Dictionaries - Cheng-mingGuo - Chang-ningHuang - Jun-pingGong + Cheng-mingGuo + Chang-ningHuang + Jun-pingGong JinLi C94-2200 guo-etal-1994-evolution @@ -1477,8 +1477,8 @@ <fixed-case>P</fixed-case>ortuguese Analysis with <fixed-case>T</fixed-case>ree <fixed-case>A</fixed-case>djoining <fixed-case>G</fixed-case>rammars - Karin ChristineKipper - Vera Lucia Strubede Lima + Karin ChristineKipper + Vera Lucia Strubede Lima C94-2205 kipper-de-lima-1994-portuguese @@ -1497,9 +1497,9 @@
Humor-Based Applications - GaborProszeky + GaborProszeky MiklosPal - LaszloTihanyi + LaszloTihanyi C94-2208 proszeky-etal-1994-humor @@ -1530,9 +1530,9 @@ <fixed-case>NL</fixed-case> Understanding with a Grammar of Constructions WlodekZadrozny - MarcinSzummer + MarcinSzummer StanislawJarecki - David E.Johnson + David E.Johnson LeoraMorgenstern C94-2212 zadrozny-etal-1994-nl diff --git a/data/xml/C96.xml b/data/xml/C96.xml index aa0c3f614e..146bfe89aa 100644 --- a/data/xml/C96.xml +++ b/data/xml/C96.xml @@ -12,7 +12,7 @@ Discovering the Sounds of Discourse Structure Extended Abstract - Barbara J.Grosz + Barbara J.Grosz C96-1001 grosz-1996-discovering @@ -38,8 +38,8 @@ Word Sense Disambiguation using Conceptual Density - EnekoAgirre - GermanRigau + EnekoAgirre + GermanRigau C96-1005 agirre-rigau-1996-word @@ -95,34 +95,34 @@
Concept clustering and knowledge integration from a children’s dictionary - CarolineBarrière + CarolineBarrière FredPopowich C96-1013 barriere-popowich-1996-concept Integrating Syntactic and Prosodic Information for the Efficient Detection of Empty Categories - AntonBatliner + AntonBatliner AnkeFeldhaus StefanGeifiler AndreasKieflling TiborKiss RalfKompe - ElmarNoth + ElmarNoth C96-1014 batliner-etal-1996-integrating Monotonic Paradigmatic Schemata in <fixed-case>I</fixed-case>talian Verb Inflection VitoPirrelli - MarcoBattista + MarcoBattista C96-1015 pirrelli-battista-1996-monotonic Measuring Semantic Coverage - SergeiNirenburg - KaviMahesh + SergeiNirenburg + KaviMahesh StephenBeale C96-1016 nirenburg-etal-1996-measuring @@ -135,7 +135,7 @@ Unsupervised Discovery of Phonological Categories through Supervised Learning of Morphological Rules - WalterDaelemans + WalterDaelemans PeterBerck StevenGillis C96-1018 @@ -150,10 +150,10 @@ Beyond Skeleton Parsing: Producing a Comprehensive Large-Scale General-<fixed-case>E</fixed-case>nglish Treebank With Full Grammatical Analysis - EzraBlack + EzraBlack StephenEubank - HidekiKashioka - DavidMagerman + HidekiKashioka + DavidMagerman RogerGarside GeoffreyLeech C96-1020 @@ -162,13 +162,13 @@ Anaphora for Everyone: Pronominal Anaphora Resolution without a Parser ChristopherKennedy - BranimirBoguraev + BranimirBoguraev C96-1021 kennedy-boguraev-1996-anaphora Theory and practice of ambiguity labelling with a view to interactive disambiguation in text and speech <fixed-case>MT</fixed-case> - ChristianBoitet + ChristianBoitet MutsukoTomokiyo C96-1022 boitet-tomokiyo-1996-theory @@ -184,11 +184,11 @@ Compositional Semantics in Verbmobil JohanBos - BjornGamback + BjornGamback ChristianLieske YoshikiMori ManfredPinkal - KarstenWorm + KarstenWorm C96-1024 bos-etal-1996-compositional @@ -196,19 +196,19 @@ Processing Metonymy- a Domain-Model Heuristic Graph Traversal Approach JacquesBouaud BrunoBachimont - PierreZweigenbaum + PierreZweigenbaum C96-1025 bouaud-etal-1996-processing Mental State Adjectives: the Perspective of <fixed-case>G</fixed-case>enerative <fixed-case>L</fixed-case>exicon - PierretteBouillon + PierretteBouillon C96-1026 bouillon-1996-mental Branching Split Obliqueness at the Syntax-Semantics Interface - Antonio H.Branco + Antonio H.Branco C96-1027 branco-1996-branching @@ -223,21 +223,21 @@ Lexical Rules: What are they? AndrewBredenkamp - StellaMarkantonatou + StellaMarkantonatou LouisaSadler C96-1029 bredenkamp-etal-1996-lexical Example-Based Machine Translation in the Pangloss System - Ralf D.Brown + Ralf D.Brown C96-1030 brown-1996-example <fixed-case>G</fixed-case>ram<fixed-case>C</fixed-case>heck: A Grammar and Style Checker Flora RamírezBustamante - Fernando SánchezLeón + Fernando SánchezLeón C96-1031 bustamante-leon-1996-gramcheck @@ -252,13 +252,13 @@ <fixed-case>F</fixed-case>eas<fixed-case>P</fixed-case>ar - A Feature Structure Parser Learning to Parse Spoken Language Finn DagBuo - AlexWaibel + AlexWaibel C96-1033 buo-waibel-1996-feaspar A principle-based hierarchical representation of <fixed-case>LTAG</fixed-case>s - Marie-HeleneCandito + Marie-HeleneCandito C96-1034 candito-1996-principle @@ -278,14 +278,14 @@ Aligning More Words with High Precision for Small Bilingual Corpora - Sur-JinKer - Jason J. S.Chang + Sur-JinKer + Jason J. S.Chang C96-1037 ker-chang-1996-aligning A Rule-Based and <fixed-case>MT</fixed-case>-Oriented Approach to Prepositional Phrase Attachment - Kuang-huaChen + Kuang-huaChen Hsin-HsiChen C96-1038 chen-chen-1996-rule @@ -301,15 +301,15 @@ Bilingual Knowledge Acquisition from <fixed-case>K</fixed-case>orean-<fixed-case>E</fixed-case>nglish Parallel Corpus Using Alignment Jung H.Shin Young S.Han - Key-SunChoi + Key-SunChoi C96-1040 shin-etal-1996-bilingual <fixed-case>M</fixed-case>arkov random field based <fixed-case>E</fixed-case>nglish Part-Of-Speech tagging system - Sung-YoungJung + Sung-YoungJung Young C.Park - Key-SunChoi + Key-SunChoi YoungwhanKim C96-1041 jung-etal-1996-markov @@ -322,7 +322,7 @@ Evaluating and comparing three text-production techniques - JoseCoch + JoseCoch C96-1043 coch-1996-evaluating @@ -335,22 +335,22 @@
Direct and Underspecified Interpretations of <fixed-case>LFG</fixed-case> f-structures - Josefvan Genabith + Josefvan Genabith DickCrouch C96-1045 van-genabith-crouch-1996-direct Pronouncing Text by Analogy - Robert I.Damper + Robert I.Damper John EG.Eastmond C96-1046 damper-eastmond-1996-pronouncing Finite-state phrase parsing by rule sequences - MarcVilain - DavidDay + MarcVilain + DavidDay C96-1047 vilain-day-1996-finite @@ -372,17 +372,17 @@ Language-Specific Mappings from Semantics to Syntax JudyDelin - Donia R.Scott - AnthonyHartley + Donia R.Scott + AnthonyHartley C96-1050 delin-etal-1996-language Segmentation and Labelling of <fixed-case>S</fixed-case>lovenian Diphone Inventories - JernejaGros + JernejaGros IvoIpsic - SimonDobrisek - FranceMihelic + SimonDobrisek + FranceMihelic NikolaPavesic C96-1051 gros-etal-1996-segmentation @@ -396,31 +396,31 @@ Lexical Information for Determining <fixed-case>J</fixed-case>apanese Unbounded Dependency - Shin-ichiroKamei + Shin-ichiroKamei KazunoriMuraki - Shin’ichiDoi + Shin’ichiDoi C96-1053 kamei-etal-1996-lexical Semantic-based Transfer MichaelDorna - Martin C.Emele + Martin C.Emele C96-1054 dorna-emele-1996-semantic Role of Word Sense Disalnbiguation in Lexical Acquisition: Predicting Semantics from Syntactic Cues - Bonnie J.Dorr - DougJones + Bonnie J.Dorr + DougJones C96-1055 dorr-jones-1996-role <fixed-case>G</fixed-case>RICE INCORPORATED: Cooperativity in Spoken Dialogue - LailaDybkjaer - Niels OleBernsen - HansDybkjaer + LailaDybkjaer + Niels OleBernsen + HansDybkjaer C96-1056 dybkjaer-etal-1996-grice @@ -432,7 +432,7 @@
Three New Probabilistic Models for Dependency Parsing: An Exploration - Jason M.Eisner + Jason M.Eisner C96-1058 eisner-1996-three @@ -452,14 +452,14 @@ Using Discourse Predictions for Ambiguity Resolution YanQu - Carolyn P.Rose + Carolyn P.Rose BarbaraDi Eugenio C96-1061 qu-etal-1996-using Interpretation of Nominal Compounds: Combining Domain-Independent and Domain-Specific Information - CecileFabre + CecileFabre C96-1062 fabre-1996-interpretation @@ -471,7 +471,7 @@
Resolving syntactic ambiguities with lexico-semantic patterns: an analogy-based approach - SimonettaMontemagni + SimonettaMontemagni StefanoFederici VitoPirrelli C96-1064 @@ -523,9 +523,9 @@ Evaluation of an Algorithm for the Recognition and Classification of Proper Names - TakahiroWakao - RobertGaizauskas - YorickWilks + TakahiroWakao + RobertGaizauskas + YorickWilks C96-1071 wakao-etal-1996-evaluation @@ -552,12 +552,12 @@
Multi-lingual Translation of Spontaneously Spoken Language in a Limited Domain - AlonLavie - DonnaGates - MarsalGavalda - LauraMayfield - AlexWaibel - LoriLevin + AlonLavie + DonnaGates + MarsalGavalda + LauraMayfield + AlexWaibel + LoriLevin C96-1075 lavie-etal-1996-multi @@ -569,39 +569,39 @@
Compiling a Partition-Based Two-Level Formalism - EdmundGrimley-Evans - George AntonKiraz - Stephen G.Pulman + EdmundGrimley-Evans + George AntonKiraz + Stephen G.Pulman C96-1077 grimley-evans-etal-1996-compiling Alignment of Shared Forests for Bilingual Corpora - AdamMeyers + AdamMeyers RomanYangarber - RalphGrishman + RalphGrishman C96-1078 meyers-etal-1996-alignment <fixed-case>M</fixed-case>essage <fixed-case>U</fixed-case>nderstanding <fixed-case>C</fixed-case>onference- 6: A Brief History - RalphGrishman - BethSundheim + RalphGrishman + BethSundheim C96-1079 grishman-sundheim-1996-message The Influence of Tagging on the Classification of Lexical Complements - CatherineMacleod - AdamMeyers - RalphGrishman + CatherineMacleod + AdamMeyers + RalphGrishman C96-1080 macleod-etal-1996-influence A Sign Expansion Approach to Dynamic, Multi-Purpose Lexicons Jon AtleGulla - Sjur NørstebøMoshagen + Sjur NørstebøMoshagen C96-1081 gulla-moshagen-1996-sign @@ -616,9 +616,9 @@
Symbolic word clustering for medium-size corpora - BenoitHabert + BenoitHabert ElieNaulleau - AdelineNazarenko + AdelineNazarenko C96-1083 habert-etal-1996-symbolic @@ -639,7 +639,7 @@
Inherited Feature-based Similarity Measure Based on Large Semantic Hierarchy and Large Text Corpus - HidekiHirakawa + HidekiHirakawa ZhonghuiXu KennethHaase C96-1086 @@ -669,7 +669,7 @@ Issues in Communication Game - KoitiHasida + KoitiHasida C96-1090 hasida-1996-issues @@ -681,7 +681,7 @@
Applying Lexical Rules Under Subsumption - Erhard W.Hinrichs + Erhard W.Hinrichs TsunekoNakazawa C96-1092 hinrichs-nakazawa-1996-applying @@ -702,7 +702,7 @@ Towards a More Careful Evaluation of Broad Coverage Parsing Systems Wide R.Hogenhout - YujiMatsumoto + YujiMatsumoto C96-1095 hogenhout-matsumoto-1996-towards @@ -715,7 +715,7 @@ A Statistical Method for Extracting Uninterrupted and Interrupted Collocations from Very Large Corpora SatoruIkehara - SatoshiShirai + SatoshiShirai HajimeUchino C96-1097 ikehara-etal-1996-statistical @@ -733,7 +733,7 @@ Extraction of Lexical Translations from Non-Aligned Corpora - KumikoTanaka + KumikoTanaka HideyaIwasaki C96-2098 tanaka-iwasaki-1996-extraction @@ -741,7 +741,7 @@ Segmenting Sentences into Linky Strings Using <fixed-case>D</fixed-case>-bigram Statistics ShihoNobesawa - JunyaTsutsumi + JunyaTsutsumi Sun DaJiang TomohisaSano KengoSato @@ -757,7 +757,7 @@ Goal Formulation based on Communicative Principles - KristiinaJokinen + KristiinaJokinen C96-2101 jokinen-1996-goal @@ -770,7 +770,7 @@ Coordination in <fixed-case>T</fixed-case>ree <fixed-case>A</fixed-case>djoining <fixed-case>G</fixed-case>rammars: Formalization and Implementation AnoopSarkar - AravindJoshi + AravindJoshi C96-2103 sarkar-joshi-1996-coordination @@ -782,22 +782,22 @@ Parallel Replacement in Finite State Calculus - AndreKempe + AndreKempe LauriKarttunen C96-2105 kempe-karttunen-1996-parallel Modularizing Codescriptive Grammars for Efficient Parsing - WalterKasper - Hans-UlrichKrieger + WalterKasper + Hans-UlrichKrieger C96-2106 kasper-krieger-1996-modularizing Statistical Method of Recognizing Local Cohesion - NaotoKatoh - TsuyoshiMorimoto + NaotoKatoh + TsuyoshiMorimoto C96-2107 katoh-morimoto-1996-statistical @@ -828,7 +828,7 @@ Computing Prosodic Morphology - George AntonKiraz + George AntonKiraz C96-2112 kiraz-1996-computing @@ -840,7 +840,7 @@ Linguistic Indeterminacy as a Source of Errors in Tagging - GunnelKallgren + GunnelKallgren C96-2114 kallgren-1996-linguistic @@ -848,14 +848,14 @@ Arguments desperately seeking Interpretation: Parsing <fixed-case>G</fixed-case>erman Infinitives ChristopherLaenzlinger Martin S.Ulmann - EricWehrli + EricWehrli C96-2115 laenzlinger-etal-1996-arguments
A Generalized Reconstruction Algorithm for Ellipsis Resolution ShalomLappin - Hsue-HuehShih + Hsue-HuehShih C96-2116 lappin-shih-1996-generalized @@ -867,17 +867,17 @@
An ascription-based approach to Speech Acts - MarkLee - YorickWilks + MarkLee + YorickWilks C96-2118 lee-wilks-1996-ascription Automatic <fixed-case>E</fixed-case>nglish-to-<fixed-case>K</fixed-case>orean Text Translation of Telegraphic Messages in a Limited Domain - CliffordWeinstein + CliffordWeinstein DineshTummala Young-SukLee - StephanieSeneff + StephanieSeneff C96-2119 weinstein-etal-1996-automatic @@ -887,11 +887,11 @@ StephanOepen SylvieRegnier-Prost KlausNetter - VeronikaLux + VeronikaLux JudithKlein KirstenFalkedal FrederikFouvry - DominiqueEstival + DominiqueEstival EvaDauphin HerveCompagnion JudithBaur @@ -903,14 +903,14 @@ Saussurian analogy: a theoretical account and its application YvesLepage - AndoShin-ichi + Shin-ichiAndo C96-2121 lepage-ando-1996-saussurian An <fixed-case>E</fixed-case>arley-type recognizer for dependency grammar VincenzoLombardo - LeonardoLesmo + LeonardoLesmo C96-2122 lombardo-lesmo-1996-earley @@ -922,7 +922,7 @@
Building Knowledge Bases for the Generation of Software Documentation - CecileParis + CecileParis KeithVander Linden C96-2124 paris-vander-linden-1996-building @@ -944,14 +944,14 @@ An <fixed-case>HPSG</fixed-case>-Based Generator for <fixed-case>G</fixed-case>erman An Experiment in the Reusability of Linguistic Resources JohannesMatiasek - HaraldTrost + HaraldTrost C96-2127 matiasek-trost-1996-hpsg Reversible delayed lexical choice in a bidirectional framework - GrahamWilcock - YujiMatsumoto + GrahamWilcock + YujiMatsumoto C96-2128 wilcock-matsumoto-1996-reversible @@ -991,13 +991,13 @@ YasuhikoWatanabe MasakiMurata MasahitoTakeuchi - MakotoNagao + MakotoNagao C96-2134 watanabe-etal-1996-document Yet Another Paper about Partial Verb Phrase Fronting in <fixed-case>G</fixed-case>erman - StefanMuller + StefanMuller C96-2135 muller-1996-yet @@ -1010,7 +1010,7 @@ Anaphora Resolution of <fixed-case>J</fixed-case>apanese Zero Pronouns with Deictic Reference HiromiNakaiwa - SatoshiShirai + SatoshiShirai C96-2137 nakaiwa-shirai-1996-anaphora @@ -1035,16 +1035,16 @@
<fixed-case>HMM</fixed-case>-Based Word Alignment in Statistical Translation - StephanVogel - HermannNey - ChristophTillmann + StephanVogel + HermannNey + ChristophTillmann C96-2141 vogel-etal-1996-hmm Adjectival Modification in Text Meaning Representation VictorRaskin - SergeiNirenburg + SergeiNirenburg C96-2142 raskin-nirenburg-1996-adjectival @@ -1075,14 +1075,14 @@
Zero Pronoun Resolution in <fixed-case>J</fixed-case>apanese Discourse Based on Centering Theory - ManabuOkumura + ManabuOkumura KoujiTamura C96-2147 okumura-tamura-1996-zero <fixed-case>POS</fixed-case> Tagging Using Relaxation Labelling - LluisPadro + LluisPadro C96-2148 padro-1996-pos @@ -1137,7 +1137,7 @@
A Self-Learning Universal Concept Spotter - TomekStrzalkowski + TomekStrzalkowski JinWang C96-2157 strzalkowski-wang-1996-self @@ -1157,7 +1157,7 @@ Computing Phrasal-signs in <fixed-case>HPSG</fixed-case> prior to Parsing KentaroTorisawa - Jun’ichiTsujii + Jun’ichiTsujii C96-2160 torisawa-tsujii-1996-computing @@ -1187,7 +1187,7 @@ On Inference-Based Procedures for Lexical Disambiguation - JurgenWedekind + JurgenWedekind C96-2165 wedekind-1996-inference @@ -1205,8 +1205,8 @@
“Is Speech Language?” - JosephMariani - StevenKrauwer + JosephMariani + StevenKrauwer C96-2168 mariani-krauwer-1996-speech @@ -1218,7 +1218,7 @@
Evaluation of <fixed-case>NLP</fixed-case> systems - BenteMaegaard + BenteMaegaard C96-2170 maegaard-1996-evaluation @@ -1242,7 +1242,7 @@
<fixed-case>CALL</fixed-case>: The Potential of Lingware and the Use of Empirical Linguistic Data - DanTufis + DanTufis C96-2174 tufis-1996-call @@ -1261,14 +1261,14 @@ <fixed-case>NL</fixed-case> Domain Explanations in Knowledge Based <fixed-case>MAT</fixed-case> GaliaAngelova - KalinaBontcheva + KalinaBontcheva C96-2177 angelova-bontcheva-1996-nl Machine Translation Method Using Inductive Learning with Genetic Algorithms - HiroshiEchizen-ya - KenjiAraki + HiroshiEchizen-ya + KenjiAraki YoshioMomouchi KojiTochinai C96-2178 @@ -1276,7 +1276,7 @@ The implementation of a computational grammar of <fixed-case>F</fixed-case>rench using the Grammar Development Environment - LouisetteEmirkanian + LouisetteEmirkanian LyneDa Sylva Lorne H.Bouchard C96-2179 @@ -1293,14 +1293,14 @@ <fixed-case>NKRL</fixed-case>, a Knowledge Representation Language for Narrative Natural Language Processing - Gian PieroZarri + Gian PieroZarri C96-2181 zarri-1996-nkrl Formal Description of Multi-Word Lexemes with the Finite-State Formalism <fixed-case>IDAREX</fixed-case> ElisabethBreidt - FrederiqueSegond + FrederiqueSegond GiuseppeValetto C96-2182 breidt-etal-1996-formal @@ -1308,23 +1308,23 @@ Motivations and Methods for Text Simplification R.Chandrasekar - ChristineDoran - B.Srinivas + ChristineDoran + B.Srinivas C96-2183 chandrasekar-etal-1996-motivations Segmentation Standard for <fixed-case>C</fixed-case>hinese Natural Language Processing Chu-RenHuang - Keh-jiannChen - Li-LiChang + Keh-jiannChen + Li-LiChang C96-2184 huang-etal-1996-segmentation <fixed-case>K</fixed-case>orean Language Engineering: Current Status of the Information Platform KimSeongyong - ChoiKey-Sun + Key-SunChoi C96-2185 kim-choi-1996-korean @@ -1338,16 +1338,16 @@ <fixed-case>GATE</fixed-case>-a General Architecture for Text Engineering - HamishCunningham - YorickWilks - Robert J.Gaizauskas + HamishCunningham + YorickWilks + Robert J.Gaizauskas C96-2187 cunningham-etal-1996-gate Corpus-based annotated test set for Machine Translation evaluation by an Industrial User EvaDauphin - VeronikaLux + VeronikaLux C96-2188 dauphin-lux-1996-corpus @@ -1368,7 +1368,7 @@ Spoken-Language Translation Method Using Examples HitoshiIida - EiichiroSumita + EiichiroSumita OsamuFuruse C96-2191 iida-etal-1996-spoken @@ -1376,10 +1376,10 @@ Tagging Spoken Language Using Written Language Statistics JoakimNivre - LeifGronqvist + LeifGronqvist MalinGustafsson TorbjSrnLager - SylvanaSofkova + SylvanaSofkova C96-2192 nivre-etal-1996-tagging @@ -1393,7 +1393,7 @@ A Gradual Refinement Model for A Robust <fixed-case>T</fixed-case>hai Morphological Analyzer AsaneeKawtrakul - ChalatipThumkanon + ChalatipThumkanon ThitimaJamjanya ParineeMuangyunnan KritsadaPoolwan @@ -1440,7 +1440,7 @@ <fixed-case>C</fixed-case>hinese String Searching Using the <fixed-case>KMP</fixed-case> Algorithm - Robert W.P.Luk + Robert W.P.Luk C96-2200 luk-1996-chinese @@ -1448,26 +1448,26 @@ <fixed-case>P</fixed-case>a<fixed-case>T</fixed-case>rans- A Patent Translation System BjarneOrsnes BradleyMusic - BenteMaegaard + BenteMaegaard C96-2201 orsnes-etal-1996-patrans Word Extraction from Corpora and Its Part-of-Speech Estimation Using Distributional Analysis ShinsukeMori - MakotoNagao + MakotoNagao C96-2202 mori-nagao-1996-word Morphological Analyzer as Syntactic Parser - GáborPrószéky + GáborPrószéky C96-2203 proszeky-1996-morphological Constructing Verb Semantic Classes for <fixed-case>F</fixed-case>rench: Methods and Evaluation - PatrickSaint-Dizier + PatrickSaint-Dizier C96-2204 saint-dizier-1996-constructing @@ -1489,7 +1489,7 @@ How the Linguistic Negation Can Have an Effect in Object-Based Knowledge Representation Model LahceneSi Ameur - JacquesRouault + JacquesRouault C96-2207 si-ameur-rouault-1996-linguistic @@ -1502,13 +1502,13 @@
A tagger/lemmatiser for <fixed-case>D</fixed-case>utch medical language - PeterSpyns + PeterSpyns C96-2209 spyns-1996-tagger A Distributed Architecture for Text Analysis in <fixed-case>F</fixed-case>rench: An Application to Complex Linguistic Phenomena Processing - Marie-HeleneStefanini + Marie-HeleneStefanini KarineWarren C96-2210 stefanini-warren-1996-distributed @@ -1538,7 +1538,7 @@ Computational Complexity of Probabilistic Disambiguation by means of Tree-Grammars - KhalilSima’an + KhalilSima’an C96-2215 simaan-1996-computational diff --git a/data/xml/C98.xml b/data/xml/C98.xml index 3d188c5890..72755bfb86 100644 --- a/data/xml/C98.xml +++ b/data/xml/C98.xml @@ -13,7 +13,7 @@ A Quasi-Dependency Model for Structural Analysis it of <fixed-case>C</fixed-case>hinese <fixed-case>B</fixed-case>ase<fixed-case>NP</fixed-case>s ZhaoJun - HuangChangning + ChangningHuang C98-1001 zhao-huang-1998-quasi @@ -25,9 +25,9 @@
Towards a single proposal in spelling correction - EnekoAgirre - KoldoGojenola - KepaSarasola + EnekoAgirre + KoldoGojenola + KepaSarasola AtroVoutilainen C98-1003 agirre-etal-1998-towards @@ -42,15 +42,15 @@ Parole et traduction automatique: le module de reconnaissance <fixed-case>RAPHAEL</fixed-case> - MohammadAkbar + MohammadAkbar JeanCaelen C98-1005 akbar-caelen-1998-parole Automatic Acquisition of Hierarchical Transduction Models for Machine Translation - HiyanAlshawi - SrinivasBangalore + HiyanAlshawi + SrinivasBangalore ShonaDouglas C98-1006 alshawi-etal-1998-automatic @@ -87,9 +87,9 @@ Evaluating a Focus-Based Approach to Anaphora Resolution - SalihaAzzam - KevinHumphreys - RobertGaizauskas + SalihaAzzam + KevinHumphreys + RobertGaizauskas C98-1011 azzam-etal-1998-evaluating @@ -102,9 +102,9 @@
The <fixed-case>B</fixed-case>erkeley <fixed-case>F</fixed-case>rame<fixed-case>N</fixed-case>et Project - Collin F.Baker - Charles J.Fillmore - John B.Lowe + Collin F.Baker + Charles J.Fillmore + John B.Lowe C98-1013 baker-etal-1998-berkeley @@ -118,13 +118,13 @@ Semi-Automatic Recognition of Noun Modifier Relationships KenBarker - StanSzpakowicz + StanSzpakowicz C98-1015 barker-szpakowicz-1998-semi <fixed-case>R</fixed-case>edundancy: helping semantic disambiguation - CarolineBarriere + CarolineBarriere C98-1016 barriere-1998-redundancy @@ -151,9 +151,9 @@
Trigger-Pair Predictors in Parsing and Tagging - EzraBlack + EzraBlack AndrewFinch - HidekiKashioka + HidekiKashioka C98-1020 black-etal-1998-trigger @@ -166,7 +166,7 @@ A Probabilistic Corpus-Driven Model for Lexical-Functional Analysis RensBod - RonaldKaplan + RonaldKaplan C98-1022 bod-kaplan-1998-probabilistic @@ -174,22 +174,22 @@ Anchoring Floating Quantifiers in <fixed-case>J</fixed-case>apanese-to-<fixed-case>E</fixed-case>nglish Machine Translation FrancisBond DanielaKurz - SatoshiShirai + SatoshiShirai C98-1023 bond-etal-1998-anchoring
Managing information at linguistic interfaces JohanBos - C.J.Rupp - BiankaBuschbeck-Wolf + C.J.Rupp + BiankaBuschbeck-Wolf MichaelDorna C98-1024 bos-etal-1998-managing Deriving the Predicate-Argument Structure for a Free Word Order Language - CemBozsahin + CemBozsahin C98-1025 bozsahin-1998-deriving @@ -201,16 +201,16 @@
The Logical Structure of Binding - AntonioBranco + AntonioBranco C98-1027 branco-1998-logical Beyond N-Grams: Can Linguistic Sophistication Improve Language Modeling? EricBrill - RaduFlorian - John C.Henderson - LidiaMangu + RaduFlorian + John C.Henderson + LidiaMangu C98-1028 brill-etal-1998-beyond @@ -229,9 +229,9 @@
Named Entity Scoring for Speech Input - John D.Burger - DavidPalmer - LynetteHirschman + John D.Burger + DavidPalmer + LynetteHirschman C98-1031 burger-etal-1998-named @@ -241,29 +241,29 @@ KarenKukich SusanneWolff ChiLu - MartinChodorow - LisaBraden-Harder + MartinChodorow + LisaBraden-Harder Mary DeeHarris C98-1032 burstein-etal-1998-automated
Building Parallel <fixed-case>LTAG</fixed-case> for <fixed-case>F</fixed-case>rench and <fixed-case>I</fixed-case>talian - Marie-HeleneCandito + Marie-HeleneCandito C98-1033 candito-1998-building Error-Driven Pruning of Treebank Grammars for Base Noun Phrase Identification - ClaireCardie - DavidPierce + ClaireCardie + DavidPierce C98-1034 cardie-pierce-1998-error Exploiting Syntactic Structure for Language Modeling CiprianChelba - FrederickJelinek + FrederickJelinek C98-1035 chelba-jelinek-1998-exploiting @@ -278,14 +278,14 @@
A Concept-based Adaptive Approach to Word Sense Disambiguation - Jen NanChen - Jason S.Chang + Jen NanChen + Jason S.Chang C98-1037 chen-chang-1998-concept <fixed-case>PAT</fixed-case>-Trees with the Deletion Function as the Learning Device for Linguistic Patterns - Keh-JiannChen + Keh-JiannChen WenTsuei Lee-FengChien C98-1038 @@ -294,15 +294,15 @@ Hybrid Approaches to Improvement of Translation Quality in Web-based <fixed-case>E</fixed-case>nglish-<fixed-case>K</fixed-case>orean Machine Translation Sung-KwonChoi - Han-MinJung + Han-MinJung Jun-SikPark - Key-SunChoi + Key-SunChoi C98-1039 choi-etal-1998-hybrid Dialogue Management in Vector-Based Call Routing - JenniferChu-Carroll + JenniferChu-Carroll BobCarpenter C98-1040 chu-carroll-carpenter-1998-dialogue @@ -310,7 +310,7 @@ Machine Translation vs. Dictionary Term Translation - a Comparison for <fixed-case>E</fixed-case>nglish-<fixed-case>J</fixed-case>apanese News Article Alignment NigelCollier - HidekiHirakawa + HidekiHirakawa AkiraKumano C98-1041 collier-etal-1998-machine @@ -319,7 +319,7 @@ An Experiment in Hybrid Dictionary and Statistical Sentence Alignment NigelCollier KenjiOno - HidekiHirakawa + HidekiHirakawa C98-1042 collier-etal-1998-experiment @@ -332,8 +332,8 @@ Veins Theory: A Model of Global Discourse Cohesion and Coherence DanCristea - NancyIde - LaurentRomary + NancyIde + LaurentRomary C98-1044 cristea-etal-1998-veins @@ -341,15 +341,15 @@ Automatic Semantic Tagging of Unknown Proper Names AlessandroCucchiarelli DaniloLuzi - PaolaVelardi + PaolaVelardi C98-1045 cucchiarelli-etal-1998-automatic Investigating regular sense extensions based on intersective Levin classes - Hoa TrangDang - KarinKipper - MarthaPalmer + Hoa TrangDang + KarinKipper + MarthaPalmer JosephRosenzweig C98-1046 dang-etal-1998-investigating @@ -372,28 +372,28 @@ Multext-East: Parallel and Comparable Corpora and Lexicons for Six Central and <fixed-case>E</fixed-case>astern <fixed-case>E</fixed-case>uropean Languages LudmilaDimitrova - TomazErjavec - NancyIde - Heiki JaanKaalep - VladimirPetkevic - DanTufis + TomazErjavec + NancyIde + Heiki JaanKaalep + VladimirPetkevic + DanTufis C98-1049 dimitrova-etal-1998-multext Error Driven Word Sense Disambiguation - LucaDini - VittorioDi Tomaso - FrederiqueSegond + LucaDini + VittorioDi Tomaso + FrederiqueSegond C98-1050 dini-etal-1998-error An Empirical Investigation of Proposals in Collaborative Dialogues BarbaraDi Eugenio - Pamela W.Jordan - Johanna D.Moore - Richmond H.Thomason + Pamela W.Jordan + Johanna D.Moore + Richmond H.Thomason C98-1051 di-eugenio-etal-1998-empirical @@ -405,8 +405,8 @@ A Text Input Front-end Processor as an Information Access Platform - ShinichiDoi - Shin-ichiroKamei + ShinichiDoi + Shin-ichiroKamei KiyoshiYamabana C98-1053 doi-etal-1998-text @@ -415,8 +415,8 @@ Syntactic and Semantic Transfer with <fixed-case>F</fixed-case>-Structures MichaelDorna AnetteFrank - Josefvan Genabith - Martin C.Emele + Josefvan Genabith + Martin C.Emele C98-1054 dorna-etal-1998-syntactic @@ -438,21 +438,21 @@ Spelling Correction Using Context Mohammad AliElmi - MarthaEvens + MarthaEvens C98-1057 elmi-evens-1998-spelling Ambiguity Preserving Machine Translation using Packed Representations - Martin C.Emele + Martin C.Emele MichaelDorna C98-1058 emele-dorna-1998-ambiguity A structure-sharing parser for lexicalized grammars - RogerEvans - DavidWeir + RogerEvans + DavidWeir C98-1059 evans-weir-1998-structure @@ -490,7 +490,7 @@ Toward General-Purpose Learning for Information Extraction - DayneFreitag + DayneFreitag C98-1064 freitag-1998-toward @@ -524,7 +524,7 @@
Semantic-Head Based Resolution of Scopal Ambiguities - BjornGamback + BjornGamback JohanBos C98-1069 gamback-bos-1998-semantic @@ -533,20 +533,20 @@ Vers l’utilisation des methodes formelles pour le developpement de linguiciels BilelGargouri MohamedJmaiel - AbdelmajidBen Hamadou + AbdelmajidBen Hamadou C98-1070 gargouri-etal-1998-vers Flow Network Models for Word Alignment and Terminology Extraction from Bilingual Corpora - EricGaussier + EricGaussier C98-1071 gaussier-1998-flow Growing Semantic Grammars - MarsalGavaldà - AlexWaibel + MarsalGavaldà + AlexWaibel C98-1072 gavalda-waibel-1998-growing @@ -558,7 +558,7 @@
Efficient Linear Logic Meaning Assembly - VineetGupta + VineetGupta JohnLamping C98-1074 gupta-lamping-1998-efficient @@ -579,23 +579,23 @@ Tagging Inflective Languages: Prediction of Morphological Categories for a Rich, Structured Tagset - JanHajic - BarboraHladka + JanHajic + BarboraHladka C98-1077 hajic-hladka-1998-tagging Improving Data Driven Wordclass Tagging by System Combination - Hansvan Halteren + Hansvan Halteren JakubZavrel - WalterDaelemans + WalterDaelemans C98-1078 van-halteren-etal-1998-improving A step towards the detection of semantic variants of terms in technical documents - ThierryHamon - AdelineNazarenko + ThierryHamon + AdelineNazarenko CecileGros C98-1079 hamon-etal-1998-step @@ -603,7 +603,7 @@ Using Decision Trees to Construct a Practical Parser MasahikoHaruno - SatoshiShirai + SatoshiShirai YoshifumiOoyama C98-1080 haruno-etal-1998-using @@ -611,7 +611,7 @@ Integrating Text Plans for Conciseness and Coherence TerrenceHarvey - SandraCarberry + SandraCarberry C98-1081 harvey-carberry-1998-integrating @@ -632,7 +632,7 @@ A Connectionist Architecture for Learning to Parse - JamesHenderson + JamesHenderson PeterLane C98-1084 henderson-lane-1998-connectionist @@ -651,8 +651,8 @@ Long Distance Pronominalisation and Global Focus - JanetHitzeman - MassimoPoesio + JanetHitzeman + MassimoPoesio C98-1087 hitzeman-poesio-1998-long @@ -664,7 +664,7 @@ Terminological variation, a means of identifying research topics from texts - FideliaIbekwe-Sanjuan + FideliaIbekwe-Sanjuan C98-1089 ibekwe-sanjuan-1998-terminological @@ -685,15 +685,15 @@
Exploring the Characteristics of Multi-Party Dialogues - MasatoIshizaki + MasatoIshizaki TsuneakiKato C98-1092 ishizaki-kato-1998-exploring Robust Interaction through Partial Interpretation and Dialogue Management - ArneJonsson - LenaStromback + ArneJonsson + LenaStromback C98-1093 jonsson-stromback-1998-robust @@ -705,23 +705,23 @@
Combining a <fixed-case>C</fixed-case>hinese Thesaurus with a <fixed-case>C</fixed-case>hinese Dictionary - JiDonghong - GongJunping - HuangChangning + DonghongJi + JunpingGong + ChangningHuang C98-1095 ji-etal-1998-combining Combining Multiple, Large-Scale Resources in a Reusable Lexicon for Natural Language Generation - HongyanJing - KathleenMcKeown + HongyanJing + KathleenMcKeown C98-1096 jing-mckeown-1998-combining Text Segmentation Using Reiteration and Collocation - Amanda C.Jobbins - Lindsay J.Evett + Amanda C.Jobbins + Lindsay J.Evett C98-1097 jobbins-evett-1998-text @@ -733,13 +733,13 @@
Unification-based Multimodal Parsing - MichaelJohnston + MichaelJohnston C98-1099 johnston-1998-unification Context Management with Topics for Spoken Dialogue Systems - KristiinaJokinen + KristiinaJokinen HidekiTanaka AkioYokoo C98-1100 @@ -755,31 +755,31 @@ Pseudo-Projectivity: A Polynomially Parsable Non-Projective Dependency Grammar SylvainKahane AlexisNasr - OwenRambow + OwenRambow C98-1102 kahane-etal-1998-pseudo A Method for Correcting Errors in Speech Recognition Using the Statistical Features of Character Co-occurrence SatoshiKaki - EiichiroSumita + EiichiroSumita HitoshiIida C98-1103 kaki-etal-1998-method Use of Mutual Information Based Character Clusters in Dictionary-less Morphological Analysis of <fixed-case>J</fixed-case>apanese - HidekiKashioka + HidekiKashioka YasuhiroKawata YumikoKinjo AndrewFinch - Ezra W.Black + Ezra W.Black C98-1104 kashioka-etal-1998-use Know When to Hold ’Em: Shuffling Deterministically in a Parser for Nonconcatenative Grammars - Robert T.Kasper + Robert T.Kasper MikeCalcagno Paul C.Davis C98-1105 @@ -795,21 +795,21 @@ Unlimited Vocabulary Grapheme to Phoneme Conversion for <fixed-case>K</fixed-case>orean <fixed-case>TTS</fixed-case> ByeongchangKim WonIlLee - GeunbaeLee + GeunbaeLee Jong-HyeokLee C98-1107 kim-etal-1998-unlimited Role of Verbs in Document Analysis - JudithKlavans + JudithKlavans Min-YenKan C98-1108 klavans-kan-1998-role A Flexible Example-Based Parser Based on the <fixed-case>SSTC</fixed-case> - Mosleh HmoudAl-Adhaileh + Mosleh HmoudAl-Adhaileh TangEnya Kong C98-1109 al-adhaileh-tang-1998-flexible @@ -827,37 +827,37 @@ Compacting the <fixed-case>P</fixed-case>enn <fixed-case>T</fixed-case>reebank Grammar AlexanderKrotov MarkHepple - RobertGaizauskas - YorickWilks + RobertGaizauskas + YorickWilks C98-1111 krotov-etal-1998-compacting Generation that Exploits Corpus-Based Statistical Knowledge - IreneLangkilde + IreneLangkilde KevinKnight C98-1112 langkilde-knight-1998-generation Methods and Practical Issues in Evaluating Alignment Techniques - PhilippeLanglais + PhilippeLanglais MichelSimard - JeanVeronis + JeanVeronis C98-1113 langlais-etal-1998-methods A Framework for Customizable Generation of Hypertext Presentations - BenoitLavoie - OwenRambow + BenoitLavoie + OwenRambow C98-1114 lavoie-rambow-1998-framework Automatic Acquisition of Language Model based on Head-Dependent Relation between Words SeungmiLee - Key-SunChoi + Key-SunChoi C98-1115 lee-choi-1998-automatic @@ -869,7 +869,7 @@
Characterizing and Recognizing Spoken Corrections in Human-Computer Dialogue - Gina-AnneLevow + Gina-AnneLevow C98-1117 levow-1998-characterizing @@ -901,18 +901,18 @@
Identifying Syntactic Role of Antecedent in <fixed-case>K</fixed-case>orean Relative Clause Using Corpus and Thesaurus Information - Hui-FengLi + Hui-FengLi Jong-HyeokLee - GeunbaeLee + GeunbaeLee C98-2120 li-etal-1998-identifying A Test Environment for Natural Language Understanding Systems LiLi - Deborah A.Dahl - Lewis M.Norton - Marcia C.Linebarger + Deborah A.Dahl + Lewis M.Norton + Marcia C.Linebarger DongdongChen C98-2121 li-etal-1998-test @@ -932,26 +932,26 @@ Evaluating Response Strategies in a Web-Based Spoken Dialogue Agent - Diane J.Litman + Diane J.Litman ShimeiPan - Marilyn A.Walker + Marilyn A.Walker C98-2124 litman-etal-1998-evaluating Formal aspects and parsing issues of dependency theory VincenzoLombardo - LeonardoLesmo + LeonardoLesmo C98-2125 lombardo-lesmo-1998-formal An Architecture for Dialogue Management, Context Tracking, and Pragmatic Adaptation in Spoken Dialogue Systems - SusannLuperFoy + SusannLuperFoy DanLoehr DavidDuff - KeithMiller - FlorenceReeder + KeithMiller + FlorenceReeder LisaHarper C98-2126 luperfoy-etal-1998-architecture @@ -968,29 +968,29 @@ MakinoTakaki YoshidaMinoru TorisawaKentaro - TsujiiJun’ichi + Jun’ichiTsujii C98-2128 makino-etal-1998-lilfes Bitext Correspondences through Rich Mark-up - RaquelMartinez + RaquelMartinez JosebaAbaitua - ArantzaCasillas + ArantzaCasillas C98-2129 martinez-etal-1998-bitext Discourse Cues for Broadcast News Segmentation - Mark T.Maybury + Mark T.Maybury C98-2130 maybury-1998-discourse Confmnation in Multimodal Systems - David R.McGee - Philip R.Cohen - SharonOviatt + David R.McGee + Philip R.Cohen + SharonOviatt C98-2131 mcgee-etal-1998-confmnation @@ -1011,11 +1011,11 @@
Deriving Transfer Rules from Dominance-Preserving Alignments - AdamMeyers + AdamMeyers RomanYangarber - RalphGrishman - CatherineMacleod - AntonioMoreno-Sandoval + RalphGrishman + CatherineMacleod + AntonioMoreno-Sandoval C98-2134 meyers-etal-1998-deriving @@ -1042,7 +1042,7 @@
Robust pronoun resolution with limited knowledge - RuslanMitkov + RuslanMitkov C98-2138 mitkov-1998-robust @@ -1050,7 +1050,7 @@ <fixed-case>HPSG</fixed-case>-Style Underspecified <fixed-case>J</fixed-case>apanese Grammar with Wide Coverage MitsuishiYutaka TorisawaKentaro - TsujiiJun’ichi + Jun’ichiTsujii C98-2139 mitsuishi-etal-1998-hpsg
@@ -1058,7 +1058,7 @@ Text Segmentation with Multiple Surface Linguistic Cues MochizukiHajime HondaTakeo - OkumuraManabu + ManabuOkumura C98-2140 mochizuki-etal-1998-text
@@ -1071,14 +1071,14 @@ Dynamic compilation of weighted context-free grammars MehryarMohri - Fernando C. N.Pereira + Fernando C. N.Pereira C98-2142 mohri-pereira-1998-dynamic A Stochastic Language Model using Dependency and Its Improvement by Word Clustering ShinsukeMori - MakotoNagao + MakotoNagao C98-2143 mori-nagao-1998-stochastic @@ -1092,14 +1092,14 @@ An Estimate of Referent of Noun Phrases in <fixed-case>J</fixed-case>apanese Sentences MasakiMurata - MakotoNagao + MakotoNagao C98-2145 murata-nagao-1998-estimate Automatic Text Summarization Based on the Global Document Annotation KatashiNagao - KoitiHasida + KoitiHasida C98-2146 nagao-hasida-1998-automatic @@ -1127,7 +1127,7 @@
Constituent-based Accent Prediction - Christine H.Nakatani + Christine H.Nakatani C98-2150 nakatani-1998-constituent @@ -1158,21 +1158,21 @@ An Efficient Parallel Substrate for Typed Feature Structures on Shared Memory Parallel Machines NinomiyaTakashi TorisawaKentaro - TsujiiJun’ichi + Jun’ichiTsujii C98-2154 ninomiya-etal-1998-efficient
Universal Grammar and Lexis for Quick Ramp-Up of <fixed-case>MT</fixed-case> Systems - SergeiNirenburg + SergeiNirenburg VictorRaskin C98-2155 nirenburg-raskin-1998-universal Integration of Large-Scale Linguistic Resources in a Natural Language Understanding System - Lewis M.Norton - Deborah A.Dahl + Lewis M.Norton + Deborah A.Dahl LiLi Katharine P.Beals C98-2156 @@ -1180,7 +1180,7 @@ Improving Statistical Natural Language Translation with Categories and Rules - Franz JosefOch + Franz JosefOch HansWeber C98-2157 och-weber-1998-improving @@ -1188,28 +1188,28 @@ Recognition of the Coherence Relation between Te-linked Clauses AkiraOishi - YujiMatsumoto + YujiMatsumoto C98-2158 oishi-matsumoto-1998-recognition On the Evaluation and Comparison of Taggers: the Effect of Noise in Testing Corpora. - LluisPadro - LluisMarquez + LluisPadro + LluisMarquez C98-2159 padro-marquez-1998-evaluation Learning Intonation Rules for Concept to Speech Generation ShimeiPan - KathleenMcKeown + KathleenMcKeown C98-2160 pan-mckeown-1998-learning Possessive Pronominal Anaphor Resolution in <fixed-case>P</fixed-case>ortuguese Written Texts - IvandreParaboni - Vera Lucia Strubede Lima + IvandreParaboni + Vera Lucia Strubede Lima C98-2161 paraboni-de-lima-1998-possessive @@ -1218,7 +1218,7 @@ JunsikPark Jung-GooKang WookHur - Key-SunChoi + Key-SunChoi C98-2162 park-etal-1998-machine @@ -1247,13 +1247,13 @@ HannesPirker GeorgNiklfeld JohannesMatiasek - HaraldTrost + HaraldTrost C98-2166 pirker-etal-1998-information
Reference Resolution beyond Coreference: a Conceptual Frame and its Application - AndreiPopescu-Belis + AndreiPopescu-Belis IsabelleRobba GerardSabah C98-2167 @@ -1262,14 +1262,14 @@ Multilingual authoring using feedback texts RichardPower - DoniaScott + DoniaScott C98-2168 power-scott-1998-multilingual Practical Glossing by Prioritised Tiling VictorPoznanski - PeteWhitelock + PeteWhitelock JanIJdens SteffanCorley C98-2169 @@ -1277,19 +1277,19 @@ An Intelligent Multi-Dictionary Environment - GaborProszeky + GaborProszeky C98-2170 proszeky-1998-intelligent Learning Correlations between Linguistic Indicators and Semantic Constraints: Reuse of Context-Dependent Descriptions of Entities - Dragomir R.Radev + Dragomir R.Radev C98-2171 radev-1998-learning Statistical Models for Unsupervised Prepositional Phrase Attachment - AdwaitRatnaparkhi + AdwaitRatnaparkhi C98-2172 ratnaparkhi-1998-statistical @@ -1302,24 +1302,24 @@ Generating the Structure of Argument - ChrisReed - DerekLong + ChrisReed + DerekLong C98-2174 reed-long-1998-generating <fixed-case>M</fixed-case>ind<fixed-case>N</fixed-case>et: acquiring and structuring semantic information from text Stephen D.Richardson - William B.Dolan + William B.Dolan LucyVanderwende C98-2175 richardson-etal-1998-mindnet Building Accurate Semantic Taxonomies Monolingual <fixed-case>MRD</fixed-case>s - GermanRigau - HoracioRodriguez - EnekoAgirre + GermanRigau + HoracioRodriguez + EnekoAgirre C98-2176 rigau-etal-1998-building @@ -1339,14 +1339,14 @@ How Verb Subcategorization Frequencies Are Affected By Corpus Choice DouglasRoland - DanielJurafsky + DanielJurafsky C98-2179 roland-jurafsky-1998-verb An Interactive Domain Independent Approach to Robust Dialogue Interpretation - Carolyn PensteinRose - Lori S.Levin + Carolyn PensteinRose + Lori S.Levin C98-2180 rose-levin-1998-interactive @@ -1359,15 +1359,15 @@
A <fixed-case>G</fixed-case>enerative <fixed-case>L</fixed-case>exicon Perspective for Adjectival Modification - PatrickSaint-Dizier + PatrickSaint-Dizier C98-2182 saint-dizier-1998-generative Dialogue Act Tagging with Transformation-Based Learning - KenSamuel - SandraCarberry - K.Vijay-Shanker + KenSamuel + SandraCarberry + K.Vijay-Shanker C98-2183 samuel-etal-1998-dialogue @@ -1425,15 +1425,15 @@ Recognizing Syntactic Errors in the Writing of Second Language Learners DavidSchneider - Kathleen F.McCoy + Kathleen F.McCoy C98-2191 schneider-mccoy-1998-recognizing Transforming Lattices into Non-deterministic Automata with Optional Null Arcs MarkSeligman - ChristianBoitet - BoubakerMeddeb-Hamrouni + ChristianBoitet + BoubakerMeddeb-Hamrouni C98-2192 seligman-etal-1998-transforming @@ -1451,14 +1451,14 @@
Similarity metrics for aligning children’s articulation data - Harold L.Somers + Harold L.Somers C98-2195 somers-1998-similarity A Connectionist Approach to Prepositional Phrase Attachment for Real World Texts Josep M.Sopena - AgustiLLoberas + AgustiLLoberas Joan L.Moliner C98-2196 sopena-etal-1998-connectionist @@ -1485,9 +1485,9 @@ Summarization-based Query Expansion in Information Retrieval - TomekStrzalkowski + TomekStrzalkowski JinWang - BowdenWise + BowdenWise C98-2200 strzalkowski-etal-1998-summarization @@ -1495,7 +1495,7 @@ <fixed-case>C</fixed-case>hinese Word Segmentation without Using Lexicon and Hand-crafted Training Data SunMaosong ShenDayang - Benjamin K.Tsou + Benjamin K.Tsou C98-2201 sun-etal-1998-chinese
@@ -1509,15 +1509,15 @@
Tagging <fixed-case>E</fixed-case>nglish by Path Voting Constraints - GokhanTur + GokhanTur KemalOflazer C98-2203 tur-oflazer-1998-tagging Reactive Content Selection in the Generation of Real-time Soccer Commentary - KumikoTanaka-Ishii - KoitiHasida + KumikoTanaka-Ishii + KoitiHasida ItsukiNoda C98-2204 tanaka-ishii-etal-1998-reactive @@ -1526,7 +1526,7 @@ Idiomatic object usage and support verbs PasiTapanainen JussiPiitulainen - TimoJarvinen + TimoJarvinen C98-2205 tapanainen-etal-1998-idiomatic @@ -1554,7 +1554,7 @@ General-to-Specific Model Selection for Subcategorization Preference TakehitoUtsuro TakashiMiyata - YujiMatsumoto + YujiMatsumoto C98-2209 utsuro-etal-1998-general
@@ -1568,20 +1568,20 @@ The Computational Lexical Semantics of Syntagmatic Relations EvelyneViegas StephenBeale - SergeiNirenburg + SergeiNirenburg C98-2211 viegas-etal-1998-computational
A tabular interpretation of a class of 2-Stack Automata - EricVillemonte de la Clergerie - MiguelAlonso Pardo + EricVillemonte de la Clergerie + MiguelAlonso Pardo C98-2212 villemonte-de-la-clergerie-alonso-pardo-1998-tabular Project for production of closed-caption <fixed-case>TV</fixed-case> programs for the hearing impaired - TakahiroWakao + TakahiroWakao EijiSawamura TerumasaEhara IchiroMaruyama @@ -1590,23 +1590,23 @@ Learning Optimal Dialogue Strategies: A Case Study of a Spoken Dialogue Agent for Email - Marilyn A.Walker + Marilyn A.Walker Jeanne C.Fromer - ShrikanthNarayanan + ShrikanthNarayanan C98-2214 walker-etal-1998-learning Automatic <fixed-case>E</fixed-case>nglish-<fixed-case>C</fixed-case>hinese name transliteration for development of multilingual resources StephenWan - Cornelia MariaVerspoor + Cornelia MariaVerspoor C98-2215 wan-verspoor-1998-automatic Modeling with Structures in Statistical Machine Translation Ye-YiWang - AlexWaibel + AlexWaibel C98-2216 wang-waibel-1998-modeling @@ -1626,7 +1626,7 @@ Diagram Understanding Using Integration of Layout Information and Textual Information YasuhikoWatanabe - MakotoNagao + MakotoNagao C98-2219 watanabe-nagao-1998-diagram @@ -1635,33 +1635,33 @@ YasuhikoWatanabe YoshihiroOkada KengoKaneji - MakotoNagao + MakotoNagao C98-2220 watanabe-etal-1998-aligning
Translating Idioms - EricWehrli + EricWehrli C98-2221 wehrli-1998-translating Head-Driven Generation with <fixed-case>HPSG</fixed-case> - GrahamWilcock - YujiMatsumoto + GrahamWilcock + YujiMatsumoto C98-2222 wilcock-matsumoto-1998-head Word Sense Disambiguation using Optimised Combinations of Knowledge Sources - YorickWilks + YorickWilks MarkStevenson C98-2223 wilks-stevenson-1998-word A Model for Robust Processing of Spontaneous Speech by Integrating Viable Fragments - Karsten L.Worm + Karsten L.Worm C98-2224 worm-1998-model @@ -1693,20 +1693,20 @@ Feasibility Study for Ellipsis Resolution in Dialogues by Machine-Learning Technique YamamotoKazuhide - SumitaEiichiro + EiichiroSumita C98-2228 yamamoto-sumita-1998-feasibility Some Properties of Preposition and Subordinate Conjunction Attachments - Alexander S.Yeh - Marc B.Vilain + Alexander S.Yeh + Marc B.Vilain C98-2229 yeh-vilain-1998-properties Evaluation of Importance of Sentences based on Connectivity to Title - TakehikoYoshimi + TakehikoYoshimi ToshiyukiOkunishi TakahiroYamaji YojiFukumochi @@ -1722,7 +1722,7 @@ Using Chunk Based Partial Parsing of Spontaneous Speech in Unrestricted Domains for Reducing Word Error Rate in Speech Recognition KlausZechner - AlexWaibel + AlexWaibel C98-2232 zechner-waibel-1998-using @@ -1734,14 +1734,14 @@ Word Association and <fixed-case>MI</fixed-case>-Trigger-based Language Modeling - GuoDongZhou - KimTengLua + GuoDongZhou + KimTengLua C98-2234 zhou-lua-1998-word Discovering Phonotactic Finite-State Automata by Genetic Search - AnjaBelz + AnjaBelz C98-2235 belz-1998-discovering @@ -1772,7 +1772,7 @@
Bridging the Gap between Dictionary and Thesaurus - Oi YeeKwong + Oi YeeKwong C98-2240 kwong-1998-bridging @@ -1784,7 +1784,7 @@
Detecting Verbal Participation in Diathesis Alternations - DianaMcCarthy + DianaMcCarthy AnnaKorhonen C98-2242 mccarthy-korhonen-1998-detecting diff --git a/data/xml/D07.xml b/data/xml/D07.xml index f6e784e4fd..a77d37c63f 100644 --- a/data/xml/D07.xml +++ b/data/xml/D07.xml @@ -4,7 +4,7 @@ Proceedings of the 2007 Joint Conference on Empirical Methods in Natural Language Processing and Computational Natural Language Learning (EMNLP-CoNLL) D07-1 - JasonEisner + JasonEisner Association for Computational Linguistics
Prague, Czech Republic
June @@ -35,7 +35,7 @@ What is the <fixed-case>J</fixed-case>eopardy Model? A Quasi-Synchronous Grammar for <fixed-case>QA</fixed-case> MengqiuWang - Noah A.Smith + Noah A.Smith TerukoMitamura 22–32 D07-1003 @@ -46,7 +46,7 @@ YouzhengWu RuiqiangZhang XinhuiHu - HidekiKashioka + HidekiKashioka 33–41 D07-1004 wu-etal-2007-learning @@ -54,7 +54,7 @@ Improving Word Alignment with Bridge Languages ShankarKumar - Franz J.Och + Franz J.Och WolfgangMacherey 42–50 D07-1005 @@ -62,7 +62,7 @@ Getting the Structure Right for Word Alignment: <fixed-case>LEAF</fixed-case> - AlexanderFraser + AlexanderFraser DanielMarcu 51–60 D07-1006 @@ -78,7 +78,7 @@ Large Margin Synchronous Generation and its Application to Sentence Compression - TrevorCohn + TrevorCohn MirellaLapata 73–82 D07-1008 @@ -95,8 +95,8 @@ Automatically Identifying the Arguments of Discourse Connectives - BenWellner - JamesPustejovsky + BenWellner + JamesPustejovsky 92–101 D07-1010 wellner-pustejovsky-2007-automatically @@ -104,7 +104,7 @@ Incremental Generation of Plural Descriptions: Similarity and Partitioning AlbertGatt - Keesvan Deemter + Keesvan Deemter 102–111 D07-1011 gatt-van-deemter-2007-incremental @@ -113,7 +113,7 @@ A Comparative Evaluation of Deep and Shallow Approaches to the Automatic Detection of Common Grammatical Errors JoachimWagner JenniferFoster - Josefvan Genabith + Josefvan Genabith 112–121 D07-1012 wagner-etal-2007-comparative @@ -128,8 +128,8 @@ Probabilistic Models of Nonprojective Dependency Trees - David A.Smith - Noah A.Smith + David A.Smith + Noah A.Smith 132–140 D07-1014 smith-smith-2007-probabilistic @@ -139,14 +139,14 @@ TerryKoo AmirGloberson XavierCarreras - MichaelCollins + MichaelCollins 141–150 D07-1015 koo-etal-2007-structured Using Foreign Inclusion Detection to Improve Parsing Performance - BeatriceAlex + BeatriceAlex AmitDubey FrankKeller 151–160 @@ -157,15 +157,15 @@ <fixed-case>LEDIR</fixed-case>: An Unsupervised Algorithm for Learning Directionality of Inference Rules RahulBhagat PatrickPantel - EduardHovy + EduardHovy 161–170 D07-1017 bhagat-etal-2007-ledir Modelling Polysemy in Adjective Classes by Multi-Label Classification - GemmaBoleda - SabineSchulte im Walde + GemmaBoleda + SabineSchulte im Walde ToniBadia 171–180 D07-1018 @@ -183,14 +183,14 @@ Towards Robust Unsupervised Personal Name Disambiguation YingChen - JamesMartin + JamesMartin 190–198 D07-1020 chen-martin-2007-towards Compressing Trigram Language Models With <fixed-case>G</fixed-case>olomb Coding - KennethChurch + KennethChurch TedHart JianfengGao 199–207 @@ -199,8 +199,8 @@ Joint Morphological and Syntactic Disambiguation - Shay B.Cohen - Noah A.Smith + Shay B.Cohen + Noah A.Smith 208–217 D07-1022 cohen-smith-2007-joint @@ -215,16 +215,16 @@ Semi-Supervised Classification for Extracting Protein Interaction Sentences using Dependency Parsing - GüneşErkan - ArzucanÖzgür - Dragomir R.Radev + GüneşErkan + ArzucanÖzgür + Dragomir R.Radev 228–237 D07-1024 erkan-etal-2007-semi A Sequence Alignment Model Based on the Averaged Perceptron - DayneFreitag + DayneFreitag ShahramKhadivi 238–247 D07-1025 @@ -232,17 +232,17 @@ Instance Based Lexical Entailment for Ontology Population - ClaudioGiuliano - AlfioGliozzo + ClaudioGiuliano + AlfioGliozzo 248–256 D07-1026 giuliano-gliozzo-2007-instance Recovering Non-Local Dependencies for <fixed-case>C</fixed-case>hinese - YuqingGuo + YuqingGuo HaifengWang - Josefvan Genabith + Josefvan Genabith 257–266 D07-1027 guo-etal-2007-recovering @@ -252,7 +252,7 @@ DeirdreHogan ConorCafferkey AoifeCahill - Josefvan Genabith + Josefvan Genabith 267–276 D07-1028 hogan-etal-2007-exploiting @@ -291,7 +291,7 @@ A New Perceptron Algorithm for Sequence Labeling with Non-Local Features - Jun’ichiKazama + Jun’ichiKazama KentaroTorisawa 315–324 D07-1033 @@ -299,8 +299,8 @@ Extending a Thesaurus in the Pan-<fixed-case>C</fixed-case>hinese Context - Oi YeeKwong - Benjamin K.Tsou + Oi YeeKwong + Benjamin K.Tsou 325–333 D07-1034 kwong-tsou-2007-extending @@ -309,7 +309,7 @@ Low-Quality Product Review Detection in Opinion Summarization JingjingLiu YunboCao - Chin-YewLin + Chin-YewLin YalouHuang MingZhou 334–342 @@ -318,7 +318,7 @@ Improving Statistical Machine Translation Performance by Training Data Selection and Optimization - Yajuan + Yajuan JinHuang QunLiu 343–350 @@ -327,8 +327,8 @@ Topic Segmentation with Hybrid Document Indexing - IrinaMatveeva - Gina-AnneLevow + IrinaMatveeva + Gina-AnneLevow 351–359 D07-1037 matveeva-levow-2007-topic @@ -343,16 +343,16 @@ Detecting Compositionality of Verb-Object Combinations using Selectional Preferences - DianaMcCarthy + DianaMcCarthy SriramVenkatapathy - AravindJoshi + AravindJoshi 369–379 D07-1039 mccarthy-etal-2007-detecting Explorations in Automatic Book Summarization - RadaMihalcea + RadaMihalcea HakanCeylan 380–389 D07-1040 @@ -368,8 +368,8 @@ Flexible, Corpus-Based Modelling of Human Plausibility Judgements - SebastianPadó - UlrikePadó + SebastianPadó + UlrikePadó KatrinErk 400–409 D07-1042 @@ -378,7 +378,7 @@ <fixed-case>V</fixed-case>-Measure: A Conditional Entropy-Based External Cluster Evaluation Measure AndrewRosenberg - JuliaHirschberg + JuliaHirschberg 410–420 D07-1043 rosenberg-hirschberg-2007-v @@ -394,8 +394,8 @@ Smooth Bilingual <tex-math>N</tex-math>-Gram Translation HolgerSchwenk - MartaR. Costa-jussà - Jose A.R. Fonollosa + MartaR. Costa-jussà + Jose A.R. Fonollosa 430–438 D07-1045 schwenk-etal-2007-smooth @@ -420,7 +420,7 @@ Automatic Identification of Important Segments and Expressions for Mining of Business-Oriented Conversations at Contact Centers HironoriTakeuchi - L VenkataSubramaniam + L VenkataSubramaniam TetsuyaNasukawa ShouryaRoy 458–467 @@ -439,7 +439,7 @@ Word Sense Disambiguation Incorporating Lexical and Structural Semantic Information TakaakiTanaka FrancisBond - TimothyBaldwin + TimothyBaldwin SanaeFujita ChikaraHashimoto 477–485 @@ -472,8 +472,8 @@ Bilingual Cluster Based Models for Statistical Machine Translation - HirofumiYamamoto - EiichiroSumita + HirofumiYamamoto + EiichiroSumita 514–523 D07-1054 yamamoto-sumita-2007-bilingual @@ -481,8 +481,8 @@ A Systematic Comparison of Training Criteria for Statistical Machine Translation RichardZens - SašaHasan - HermannNey + SašaHasan + HermannNey 524–532 D07-1055 zens-etal-2007-systematic @@ -516,14 +516,14 @@ Generating Lexical Analogies Using Dependency Relations AndyChiu PascalPoupart - ChrysanneDiMarco + ChrysanneDiMarco 561–570 D07-1059 chiu-etal-2007-generating Cross-Lingual Distributional Profiles of Concepts for Measuring Semantic Distance - SaifMohammad + SaifMohammad IrynaGurevych GraemeHirst TorstenZesch @@ -551,7 +551,7 @@ <fixed-case>J</fixed-case>apanese Dependency Analysis Using the Ancestor-Descendant Relation AkihiroTamura HiroyaTakamura - ManabuOkumura + ManabuOkumura 600–609 D07-1063 tamura-etal-2007-japanese @@ -567,7 +567,7 @@ Recovery of Empty Nodes in Parse Structures DenisFilimonov - MaryHarper + MaryHarper 620–629 D07-1065 filimonov-harper-2007-recovery @@ -575,7 +575,7 @@ Treebank Annotation Schemes and Parser Evaluation for <fixed-case>G</fixed-case>erman InesRehbein - Josefvan Genabith + Josefvan Genabith 630–639 D07-1066 rehbein-van-genabith-2007-treebank @@ -585,7 +585,7 @@ QinfengShi YaseminAltun AlexSmola - S.V.N.Vishwanathan + S.V.N.Vishwanathan 640–648 D07-1067 shi-etal-2007-semi @@ -594,7 +594,7 @@ A Graph-Based Approach to Named Entity Categorization in <fixed-case>W</fixed-case>ikipedia Using Conditional Random Fields YotaroWatanabe MasayukiAsahara - YujiMatsumoto + YujiMatsumoto 649–657 D07-1068 watanabe-etal-2007-graph @@ -602,10 +602,10 @@ <fixed-case>M</fixed-case>aven<fixed-case>R</fixed-case>ank: Identifying Influential Members of the <fixed-case>US</fixed-case> Senate Using Lexical Centrality AnthonyFader - Dragomir R.Radev + Dragomir R.Radev Michael H.Crespin Burt L.Monroe - Kevin M.Quinn + Kevin M.Quinn MichaelColaresi 658–666 D07-1069 @@ -613,7 +613,7 @@ Bootstrapping Feature-Rich Dependency Parsers with Entropic Priors - David A.Smith + David A.Smith JasonEisner 667–677 D07-1070 @@ -621,8 +621,8 @@ Online Learning of Relaxed <fixed-case>CCG</fixed-case> Grammars for Parsing to Logical Form - LukeZettlemoyer - MichaelCollins + LukeZettlemoyer + MichaelCollins 678–687 D07-1071 zettlemoyer-collins-2007-online @@ -631,7 +631,7 @@ The Infinite <fixed-case>PCFG</fixed-case> Using Hierarchical <fixed-case>D</fixed-case>irichlet Processes PercyLiang SlavPetrov - MichaelJordan + MichaelJordan DanKlein 688–697 D07-1072 @@ -639,7 +639,7 @@ Exploiting <fixed-case>W</fixed-case>ikipedia as External Knowledge for Named Entity Recognition - Jun’ichiKazama + Jun’ichiKazama KentaroTorisawa 698–707 D07-1073 @@ -647,7 +647,7 @@ Large-Scale Named Entity Disambiguation Based on <fixed-case>W</fixed-case>ikipedia Data - SilviuCucerzan + SilviuCucerzan 708–716 D07-1074 cucerzan-2007-large @@ -655,17 +655,17 @@ Effective Information Extraction with Semantic Affinity Patterns and Relevant Regions SiddharthPatwardhan - EllenRiloff + EllenRiloff 717–727 D07-1075 patwardhan-riloff-2007-effective Tree Kernel-Based Relation Extraction with Context-Sensitive Structured Parse Tree Information - GuoDongZhou + GuoDongZhou MinZhang - Dong HongJi - QiaoMingZhu + Dong HongJi + QiaoMingZhu 728–736 D07-1076 zhou-etal-2007-tree @@ -673,7 +673,7 @@ <fixed-case>C</fixed-case>hinese Syntactic Reordering for Statistical Machine Translation ChaoWang - MichaelCollins + MichaelCollins PhilippKoehn 737–745 D07-1077 @@ -719,7 +719,7 @@ Active Learning for Word Sense Disambiguation with Methods for Addressing the Class Imbalance Problem JingboZhu - EduardHovy + EduardHovy 783–790 D07-1082 zhu-hovy-2007-active @@ -769,16 +769,16 @@ Extracting Data Records from Unstructured Biomedical Full Text DonghuiFeng GullyBurns - EduardHovy + EduardHovy 837–846 D07-1088 feng-etal-2007-extracting Multiple Alignment of Citation Sentences with Conditional Random Fields and Posterior Decoding - ArielSchwartz + ArielSchwartz AnnaDivoli - MartiHearst + MartiHearst 847–857 D07-1089 schwartz-etal-2007-multiple @@ -786,9 +786,9 @@ Large Language Models in Machine Translation ThorstenBrants - Ashok C.Popat + Ashok C.Popat PengXu - Franz J.Och + Franz J.Och JeffreyDean 858–867 D07-1090 @@ -804,7 +804,7 @@ Translating Unknown Words by Analogical Learning - PhilippeLanglais + PhilippeLanglais AlexandrePatry 877–886 D07-1092 @@ -812,9 +812,9 @@ A Probabilistic Approach to Diachronic Phonology - AlexandreBouchard + AlexandreBouchard PercyLiang - ThomasGriffiths + ThomasGriffiths DanKlein 887–896 D07-1093 @@ -840,7 +840,7 @@ The <fixed-case>C</fixed-case>o<fixed-case>NLL</fixed-case> 2007 Shared Task on Dependency Parsing JoakimNivre JohanHall - SandraKübler + SandraKübler RyanMcDonald JensNilsson SebastianRiedel @@ -854,8 +854,8 @@ JohanHall JensNilsson JoakimNivre - GülşenEryiǧit - BeátaMegyesi + GülşenEryiǧit + BeátaMegyesi MattiasNilsson MarkusSaers 933–939 @@ -874,7 +874,7 @@ Fast and Robust Multilingual Dependency Parsing with a Generative Latent Variable Model IvanTitov - JamesHenderson + JamesHenderson 947–951 D07-1099 titov-henderson-2007-fast @@ -895,9 +895,9 @@ Log-Linear Models of Non-Projective Trees, <tex-math>k</tex-math>-best <fixed-case>MST</fixed-case> Parsing and Tree-Ranking - KeithHall - JiříHavelka - David A.Smith + KeithHall + JiříHavelka + David A.Smith 962–966 D07-1102 hall-etal-2007-log @@ -922,15 +922,15 @@ An Empirical Study on Computing Consensus Translations from Multiple Machine Translation Systems WolfgangMacherey - Franz J.Och + Franz J.Och 986–995 D07-1105 macherey-och-2007-empirical Learning to Find <fixed-case>E</fixed-case>nglish to <fixed-case>C</fixed-case>hinese Transliterations on the Web - Jian-ChengWu - Jason S.Chang + Jian-ChengWu + Jason S.Chang 996–1004 D07-1106 wu-chang-2007-learning @@ -939,15 +939,15 @@ Learning to Merge Word Senses RionSnow SushantPrakash - DanielJurafsky - Andrew Y.Ng + DanielJurafsky + Andrew Y.Ng 1005–1014 D07-1107 snow-etal-2007-learning Improving Word Sense Disambiguation Using Topic Features - JunfuCai + JunfuCai Wee SunLee Yee WhyeTeh 1015–1023 @@ -957,8 +957,8 @@ A Topic Model for Word Sense Disambiguation JordanBoyd-Graber - DavidBlei - XiaojinZhu + DavidBlei + XiaojinZhu 1024–1033 D07-1109 boyd-graber-etal-2007-topic @@ -977,7 +977,7 @@ Dependency Parsing and Domain Adaptation with <fixed-case>LR</fixed-case> Models and Parser Ensembles KenjiSagae - Jun’ichiTsujii + Jun’ichiTsujii 1044–1050 D07-1111 sagae-tsujii-2007-dependency @@ -986,9 +986,9 @@ Frustratingly Hard Domain Adaptation for Dependency Parsing MarkDredze JohnBlitzer - Partha PratimTalukdar + Partha PratimTalukdar KuzmanGanchev - JoãoGraça + JoãoGraça FernandoPereira 1051–1055 D07-1112 @@ -997,7 +997,7 @@ <fixed-case>C</fixed-case>rystal: Analyzing Predictive Opinions on the Web Soo-MinKim - EduardHovy + EduardHovy 1056–1064 D07-1113 kim-hovy-2007-crystal @@ -1006,7 +1006,7 @@ Extracting Aspect-Evaluation and Aspect-Of Relations in Opinion Mining NozomiKobayashi KentaroInui - YujiMatsumoto + YujiMatsumoto 1065–1074 D07-1114 kobayashi-etal-2007-extracting @@ -1023,9 +1023,9 @@ Determining Case in <fixed-case>A</fixed-case>rabic: Learning Complex Linguistic Behavior Requires Complex Linguistic Features NizarHabash RyanGabbard - OwenRambow + OwenRambow SethKulick - MitchMarcus + MitchMarcus 1084–1092 D07-1116 habash-etal-2007-determining @@ -1033,17 +1033,17 @@ <fixed-case>M</fixed-case>andarin Part-of-Speech Tagging and Discriminative Reranking ZhongqiangHuang - MaryHarper - WenWang + MaryHarper + WenWang 1093–1102 D07-1117 huang-etal-2007-mandarin Building Domain-Specific Taggers without Annotated (Domain) Data - JohnMiller + JohnMiller ManabuTorii - K.Vijay-Shanker + K.Vijay-Shanker 1103–1111 D07-1118 miller-etal-2007-building @@ -1061,7 +1061,7 @@ Hybrid Ways to Improve Domain Independence in an <fixed-case>ML</fixed-case> Dependency Parser - EckhardBick + EckhardBick 1119–1123 D07-1120 bick-2007-hybrid @@ -1069,7 +1069,7 @@ A Constraint Satisfaction Approach to Dependency Parsing SanderCanisius - ErikTjong Kim Sang + ErikTjong Kim Sang 1124–1128 D07-1121 canisius-tjong-kim-sang-2007-constraint @@ -1093,7 +1093,7 @@ Online Learning for Deterministic Dependency Parsing - Prashanth ReddyMannem + Prashanth ReddyMannem 1139–1143 D07-1124 mannem-2007-online @@ -1107,9 +1107,9 @@ A Multilingual Dependency Analysis System Using Online Passive-Aggressive Learning - Le-MinhNguyen + Le-MinhNguyen AkiraShimazu - Phuong-ThaiNguyen + Phuong-ThaiNguyen Xuan-HieuPhan 1149–1155 D07-1126 @@ -1144,7 +1144,7 @@ Adapting the <fixed-case>RASP</fixed-case> System for the <fixed-case>C</fixed-case>o<fixed-case>NLL</fixed-case>07 Domain-Adaptation Task RebeccaWatson - TedBriscoe + TedBriscoe 1170–1174 D07-1130 watson-briscoe-2007-adapting diff --git a/data/xml/D08.xml b/data/xml/D08.xml index 37e8c8967a..05ff9b4305 100644 --- a/data/xml/D08.xml +++ b/data/xml/D08.xml @@ -20,7 +20,7 @@ Revealing the Structure of Medical Dictations with Conditional Random Fields JeremyJancsary JohannesMatiasek - HaraldTrost + HaraldTrost 1–10 D08-1001 jancsary-etal-2008-revealing @@ -28,7 +28,7 @@ It’s a Contradiction – no, it’s not: <fixed-case>A</fixed-case> Case Study using Functional Relations AlanRitter - StephenSoderland + StephenSoderland DougDowney OrenEtzioni 11–20 @@ -48,8 +48,8 @@ Modeling Annotators: <fixed-case>A</fixed-case> Generative Approach to Learning from Annotator Rationales - OmarZaidan - JasonEisner + OmarZaidan + JasonEisner 31–40 D08-1004 zaidan-eisner-2008-modeling @@ -90,7 +90,7 @@ Scaling Textual Inference to the Web StefanSchoenmackers OrenEtzioni - DanielWeld + DanielWeld 79–88 D08-1009 schoenmackers-etal-2008-scaling @@ -111,7 +111,7 @@ MeiYang JianfengGao PatrickNguyen - RobertMoore + RobertMoore 98–107 D08-1011 he-etal-2008-indirect @@ -137,8 +137,8 @@ Multilingual Subjectivity Analysis Using Machine Translation CarmenBanea - RadaMihalcea - JanyceWiebe + RadaMihalcea + JanyceWiebe SamerHassan 127–135 D08-1014 @@ -154,8 +154,8 @@ Dependency Parsing by Belief Propagation - DavidSmith - JasonEisner + DavidSmith + JasonEisner 145–156 D08-1016 smith-eisner-2008-dependency @@ -164,8 +164,8 @@ Stacking Dependency Parsers André FilipeTorres Martins DipanjanDas - Noah A.Smith - Eric P.Xing + Noah A.Smith + Eric P.Xing 157–166 D08-1017 torres-martins-etal-2008-stacking @@ -174,7 +174,7 @@ Better Binarization for the <fixed-case>CKY</fixed-case> Parsing XinyingSong ShilinDing - Chin-YewLin + Chin-YewLin 167–176 D08-1018 song-etal-2008-better @@ -212,7 +212,7 @@ Probabilistic Inference for Machine Translation - PhilBlunsom + PhilBlunsom MilesOsborne 215–223 D08-1023 @@ -229,7 +229,7 @@ A Noisy-Channel Model of Human Sentence Comprehension under Uncertain Input - RogerLevy + RogerLevy 234–243 D08-1025 levy-2008-noisy @@ -237,7 +237,7 @@ Incorporating Temporal and Semantic Information with Eye Gaze for Automatic Word Acquisition in Multimodal Conversational Systems ShaolinQu - JoyceChai + JoyceChai 244–253 D08-1026 qu-chai-2008-incorporating @@ -246,16 +246,16 @@ Cheap and Fast – But is it Good? Evaluating Non-Expert Annotations for Natural Language Tasks RionSnow BrendanO’Connor - DanielJurafsky - AndrewNg + DanielJurafsky + AndrewNg 254–263 D08-1027 snow-etal-2008-cheap <fixed-case>H</fixed-case>ot<fixed-case>S</fixed-case>pots: <fixed-case>V</fixed-case>isualizing Edits to a Text - SrinivasBangalore - DavidSmith + SrinivasBangalore + DavidSmith 264–273 D08-1028 bangalore-smith-2008-hotspots @@ -271,7 +271,7 @@ <fixed-case>A</fixed-case>rabic Named Entity Recognition using Optimized Feature Sets YassineBenajiba - MonaDiab + MonaDiab PaoloRosso 284–293 D08-1030 @@ -288,7 +288,7 @@ Selecting Sentences for Answering Complex Questions YlliasChali - ShafiqJoty + ShafiqJoty 304–313 D08-1032 chali-joty-2008-selecting @@ -296,7 +296,7 @@ Sampling Alignment Structure under a <fixed-case>B</fixed-case>ayesian Translation Model JohnDeNero - AlexandreBouchard-Côté + AlexandreBouchard-Côté DanKlein 314–323 D08-1033 @@ -305,7 +305,7 @@ Improving <fixed-case>C</fixed-case>hinese Semantic Role Classification with Hierarchical Feature Selection Strategy WeiweiDing - BaobaoChang + BaobaoChang 324–333 D08-1034 ding-chang-2008-improving @@ -337,17 +337,17 @@ Studying the History of Ideas Using Topic Models DavidHall - DanielJurafsky - Christopher D.Manning + DanielJurafsky + Christopher D.Manning 363–371 D08-1038 hall-etal-2008-studying Triplet Lexicon Models for Statistical Machine Translation - SašaHasan + SašaHasan JuriGanitkevitch - HermannNey + HermannNey JesúsAndrés-Ferrer 372–381 D08-1039 @@ -357,7 +357,7 @@ A Casual Conversation System Using Modality and Word Associations Retrieved from the Web ShinsukeHiguchi RafalRzepka - KenjiAraki + KenjiAraki 382–390 D08-1040 higuchi-etal-2008-casual @@ -373,7 +373,7 @@ A Dependency-based Word Subsequence Kernel - RohitKate + RohitKate 400–409 D08-1042 kate-2008-dependency @@ -383,7 +383,7 @@ Jung-TaeLee Sang-BumKim Young-InSong - Hae-ChangRim + Hae-ChangRim 410–418 D08-1043 lee-etal-2008-bridging @@ -406,17 +406,17 @@ Legal Docket Classification: <fixed-case>W</fixed-case>here Machine Learning Stumbles RameshNallapati - Christopher D.Manning + Christopher D.Manning 438–446 D08-1046 nallapati-manning-2008-legal A Discriminative Candidate Generator for String Transformations - NaoakiOkazaki + NaoakiOkazaki YoshimasaTsuruoka SophiaAnaniadou - Jun’ichiTsujii + Jun’ichiTsujii 447–456 D08-1047 okazaki-etal-2008-discriminative-candidate @@ -425,7 +425,7 @@ Automatic induction of <fixed-case>F</fixed-case>rame<fixed-case>N</fixed-case>et lexical units MarcoPennacchiotti DiegoDe Cao - RobertoBasili + RobertoBasili DaniloCroce MichaelRoth 457–465 @@ -435,7 +435,7 @@ Multimodal Subjectivity Analysis of Multiparty Conversation StephanRaaijmakers - KhietTruong + KhietTruong TheresaWilson 466–474 D08-1049 @@ -451,11 +451,11 @@ Improving Interactive Machine Translation via Mouse Actions - GermánSanchis-Trilles - DanielOrtiz-Martínez + GermánSanchis-Trilles + DanielOrtiz-Martínez JorgeCivera - FranciscoCasacuberta - EnriqueVidal + FranciscoCasacuberta + EnriqueVidal HieuHoang 485–494 D08-1051 @@ -464,7 +464,7 @@ <fixed-case>LTAG</fixed-case> Dependency Parsing with Bidirectional Incremental Construction LibinShen - AravindJoshi + AravindJoshi 495–504 D08-1052 shen-joshi-2008-ltag @@ -514,7 +514,7 @@ StephenWan RobertDale MarkDras - CécileParis + CécileParis 543–552 D08-1057 wan-etal-2008-seed @@ -537,16 +537,16 @@ Generalizing Local and Non-Local Word-Reordering Patterns for Syntax-Based Machine Translation BingZhao - YaserAl-onaizan + YaserAl-onaizan 572–581 D08-1060 zhao-al-onaizan-2008-generalizing Weakly-Supervised Acquisition of Labeled Class Instances using Graph Random Walks - Partha PratimTalukdar + Partha PratimTalukdar JosephReisinger - MariusPaşca + MariusPaşca DeepakRavichandran RahulBhagat FernandoPereira @@ -565,7 +565,7 @@ Mention Detection Crossing the Language Barrier ImedZitouni - RaduFlorian + RaduFlorian 600–609 D08-1063 zitouni-florian-2008-mention @@ -582,9 +582,9 @@ Lattice <fixed-case>M</fixed-case>inimum <fixed-case>B</fixed-case>ayes-<fixed-case>R</fixed-case>isk Decoding for Statistical Machine Translation - RoyTromble + RoyTromble ShankarKumar - FranzOch + FranzOch WolfgangMacherey 620–629 D08-1065 @@ -593,7 +593,7 @@ Phrase Translation Probabilities with <fixed-case>ITG</fixed-case> Priors and Smoothing as Learning Objective MarkosMylonakis - KhalilSima’an + KhalilSima’an 630–639 D08-1066 mylonakis-simaan-2008-phrase @@ -623,14 +623,14 @@ Learning with Probabilistic Features for Improved Pipeline Models - RazvanBunescu + RazvanBunescu 670–679 D08-1070 bunescu-2008-learning Cross-Task Knowledge-Constrained Self Training - HalDaumé III + HalDaumé III 680–688 D08-1071 daume-iii-2008-cross @@ -645,8 +645,8 @@ Jointly Combining Implicit Constraints Improves Temporal Ordering - NathanaelChambers - DanielJurafsky + NathanaelChambers + DanielJurafsky 698–706 D08-1073 chambers-jurafsky-2008-jointly @@ -662,7 +662,7 @@ Learning the Scope of Negation in Biomedical Texts RoserMorante AnthonyLiekens - WalterDaelemans + WalterDaelemans 715–724 D08-1075 morante-etal-2008-learning @@ -670,7 +670,7 @@ Lattice-based Minimum Error Rate Training for Statistical Machine Translation WolfgangMacherey - FranzOch + FranzOch IgnacioThayer JakobUszkoreit 725–734 @@ -703,7 +703,7 @@ Topic-Driven Multi-Document Summarization with Encyclopedic Knowledge and Spreading Activation - ViviNastase + ViviNastase 763–772 D08-1080 nastase-2008-topic @@ -721,7 +721,7 @@ WeiLu Hwee TouNg Wee SunLee - Luke S.Zettlemoyer + Luke S.Zettlemoyer 783–792 D08-1082 lu-etal-2008-generative @@ -729,7 +729,7 @@ Learning with Compositional Semantics as Structural Inference for Subsentential Sentiment Analysis YejinChoi - ClaireCardie + ClaireCardie 793–801 D08-1083 choi-cardie-2008-learning @@ -738,7 +738,7 @@ A Phrase-Based Alignment Model for Natural Language Inference BillMacCartney MichelGalley - Christopher D.Manning + Christopher D.Manning 802–811 D08-1084 maccartney-etal-2008-phrase @@ -763,8 +763,8 @@ <fixed-case>N</fixed-case>-gram Weighting: <fixed-case>R</fixed-case>educing Training Data Mismatch in Cross-Domain Language Model Estimation - Bo-June PaulHsu - JamesGlass + Bo-June PaulHsu + JamesGlass 829–838 D08-1087 hsu-glass-2008-n @@ -773,7 +773,7 @@ Complexity of Finding the <fixed-case>BLEU</fixed-case>-optimal Hypothesis in a Confusion Network GregorLeusch EvgenyMatusov - HermannNey + HermannNey 839–847 D08-1088 leusch-etal-2008-complexity @@ -781,16 +781,16 @@ A Simple and Effective Hierarchical Phrase Reordering Model MichelGalley - Christopher D.Manning + Christopher D.Manning 848–856 D08-1089 galley-manning-2008-simple Language and Translation Model Adaptation using Comparable Corpora - MatthewSnover - BonnieDorr - RichardSchwartz + MatthewSnover + BonnieDorr + RichardSchwartz 857–866 D08-1090 snover-etal-2008-language @@ -823,7 +823,7 @@ A Structured Vector Space Model for Word Meaning in Context KatrinErk - SebastianPadó + SebastianPadó 897–906 D08-1094 erk-pado-2008-structured @@ -831,14 +831,14 @@ Learning Graph Walk Based Similarity Measures for Parsed Text EinatMinkov - William W.Cohen + William W.Cohen 907–916 D08-1095 minkov-cohen-2008-learning A Graph-theoretic Model of Lexical Syntactic Acquisition - HinrichSchütze + HinrichSchütze MichaelWalsh 917–926 D08-1096 @@ -864,10 +864,10 @@ Automatic Set Expansion for List Question Answering - Richard C.Wang + Richard C.Wang NicoSchlaefer - William W.Cohen - EricNyberg + William W.Cohen + EricNyberg 947–954 D08-1099 wang-etal-2008-automatic @@ -875,7 +875,7 @@ Acquiring Domain-Specific Dialog Information from Task-Oriented Human-Human Interaction through an Unsupervised Learning AnanladaChotimongkol - AlexanderRudnicky + AlexanderRudnicky 955–964 D08-1100 chotimongkol-rudnicky-2008-acquiring @@ -897,8 +897,8 @@ Computing Word-Pair Antonymy - SaifMohammad - BonnieDorr + SaifMohammad + BonnieDorr GraemeHirst 982–991 D08-1103 @@ -924,9 +924,9 @@ Graph-based Analysis of Semantic Drift in <fixed-case>E</fixed-case>spresso-like Bootstrapping Algorithms MamoruKomachi - TakuKudo + TakuKudo MasashiShimbo - YujiMatsumoto + YujiMatsumoto 1011–1020 D08-1106 komachi-etal-2008-graph @@ -987,16 +987,16 @@ Latent-Variable Modeling of String Transductions with Finite-State Methods MarkusDreyer - JasonSmith - JasonEisner + JasonSmith + JasonEisner 1080–1089 D08-1113 dreyer-etal-2008-latent Soft-Supervised Learning for Text Classification - AmarnagSubramanya - JeffBilmes + AmarnagSubramanya + JeffBilmes 1090–1099 D08-1114 subramanya-bilmes-2008-soft diff --git a/data/xml/D09.xml b/data/xml/D09.xml index 955abf17af..d854fc39ee 100644 --- a/data/xml/D09.xml +++ b/data/xml/D09.xml @@ -5,7 +5,7 @@ Proceedings of the 2009 Conference on Empirical Methods in Natural Language Processing D09-1 PhilippKoehn - RadaMihalcea + RadaMihalcea Association for Computational Linguistics
Singapore
August @@ -35,7 +35,7 @@ Semi-supervised Semantic Role Labeling Using the <fixed-case>L</fixed-case>atent <fixed-case>W</fixed-case>ords <fixed-case>L</fixed-case>anguage <fixed-case>M</fixed-case>odel KoenDeschacht - Marie-FrancineMoens + Marie-FrancineMoens 21–29 D09-1003 deschacht-moens-2009-semi @@ -44,7 +44,7 @@ Semantic Dependency Parsing of <fixed-case>N</fixed-case>om<fixed-case>B</fixed-case>ank and <fixed-case>P</fixed-case>rop<fixed-case>B</fixed-case>ank: An Efficient Integrated Approach via a Large-scale Feature Selection HaiZhao WenliangChen - ChunyuKit + ChunyuKit 30–39 D09-1004 zhao-etal-2009-semantic @@ -52,14 +52,14 @@ First- and Second-Order Expectation Semirings with Applications to Minimum-Risk Training on Translation Forests ZhifeiLi - JasonEisner + JasonEisner 40–51 D09-1005 li-eisner-2009-first Feasibility of Human-in-the-loop Minimum Error Rate Training - Omar F.Zaidan + Omar F.Zaidan ChrisCallison-Burch 52–61 D09-1006 @@ -79,7 +79,7 @@ JinxiXu BingZhang SpyrosMatsoukas - RalphWeischedel + RalphWeischedel 72–80 D09-1008 shen-etal-2009-effective @@ -95,7 +95,7 @@ Efficient kernels for sentence pair classification - Fabio MassimoZanzotto + Fabio MassimoZanzotto LorenzoDell’Arciprete 91–100 D09-1010 @@ -104,7 +104,7 @@ Graphical Models over Multiple Strings MarkusDreyer - JasonEisner + JasonEisner 101–110 D09-1011 dreyer-eisner-2009-graphical @@ -120,9 +120,9 @@ A Rich Feature Vector for Protein-Protein Interaction Extraction from Multiple Corpora MakotoMiwa - RuneSætre + RuneSætre YusukeMiyao - Jun’ichiTsujii + Jun’ichiTsujii 121–130 D09-1013 miwa-etal-2009-rich @@ -137,8 +137,8 @@ Nested Named Entity Recognition - Jenny RoseFinkel - Christopher D.Manning + Jenny RoseFinkel + Christopher D.Manning 141–150 D09-1015 finkel-manning-2009-nested @@ -146,7 +146,7 @@ A Unified Model of Phrasal and Sentential Evidence for Information Extraction SiddharthPatwardhan - EllenRiloff + EllenRiloff 151–160 D09-1016 patwardhan-riloff-2009-unified @@ -154,7 +154,7 @@ Review Sentiment Scoring via a Parse-and-Paraphrase Paradigm JingjingLiu - StephanieSeneff + StephanieSeneff 161–169 D09-1017 liu-seneff-2009-review @@ -163,7 +163,7 @@ Supervised and Unsupervised Methods in Employing Discourse Relations for Improving Opinion Polarity Classification SwapnaSomasundaran GalileoNamata - JanyceWiebe + JanyceWiebe LiseGetoor 170–179 D09-1018 @@ -181,7 +181,7 @@ Subjectivity Word Sense Disambiguation CemAkkaya - JanyceWiebe + JanyceWiebe RadaMihalcea 190–199 D09-1020 @@ -190,7 +190,7 @@ Non-Projective Parsing for Statistical Machine Translation XavierCarreras - MichaelCollins + MichaelCollins 200–209 D09-1021 carreras-collins-2009-non @@ -198,8 +198,8 @@ Extending Statistical Machine Translation with Discriminative and Trigger-Based Lexicon Models ArneMauser - SašaHasan - HermannNey + SašaHasan + HermannNey 210–218 D09-1022 mauser-etal-2009-extending @@ -207,7 +207,7 @@ Feature-Rich Translation by Quasi-Synchronous Lattice Parsing KevinGimpel - Noah A.Smith + Noah A.Smith 219–228 D09-1023 gimpel-smith-2009-feature @@ -232,7 +232,7 @@ DanielRamage DavidHall RameshNallapati - Christopher D.Manning + Christopher D.Manning 248–256 D09-1026 ramage-etal-2009-labeled @@ -258,7 +258,7 @@ <fixed-case>W</fixed-case>ikipedia as Frame Information Repository SaraTonelli - ClaudioGiuliano + ClaudioGiuliano 276–285 D09-1029 tonelli-giuliano-2009-wikipedia @@ -307,8 +307,8 @@ It’s Not You, it’s Me: Detecting Flirting and its Misperception in Speed-Dates RajeshRanganath - DanJurafsky - DanMcFarland + DanJurafsky + DanMcFarland 334–342 D09-1035 ranganath-etal-2009-detecting @@ -324,8 +324,8 @@ A <fixed-case>B</fixed-case>ayesian Model of Syntax-Directed Tree to String Grammar Induction - TrevorCohn - PhilBlunsom + TrevorCohn + PhilBlunsom 352–361 D09-1037 cohn-blunsom-2009-bayesian @@ -378,8 +378,8 @@ Perceptron Reranking for <fixed-case>CCG</fixed-case> Realization - MichaelWhite - RajakrishnanRajkumar + MichaelWhite + RajakrishnanRajkumar 410–419 D09-1043 white-rajkumar-2009-perceptron @@ -402,7 +402,7 @@ Graded Word Sense Assignment KatrinErk - DianaMcCarthy + DianaMcCarthy 440–449 D09-1046 erk-mccarthy-2009-graded @@ -418,10 +418,10 @@ Projecting Parameters for Multilingual Word Sense Disambiguation - Mitesh M.Khapra + Mitesh M.Khapra SapanShah PiyushKedia - PushpakBhattacharyya + PushpakBhattacharyya 459–467 D09-1048 khapra-etal-2009-projecting @@ -438,8 +438,8 @@ Acquiring Translation Equivalences of Multiword Expressions by Normalized Correlation Frequencies Ming-HongBai Jia-MingYou - Keh-JiannChen - Jason S.Chang + Keh-JiannChen + Jason S.Chang 478–486 D09-1050 bai-etal-2009-acquiring @@ -481,14 +481,14 @@ A Structural Support Vector Method for Extracting Contexts and Answers of Questions from Online Forums Wen-YunYang YunboCao - Chin-YewLin + Chin-YewLin 514–523 D09-1054 yang-etal-2009-structural Mining Search Engine Clickthrough Log for Matching N-gram Features - HuihsinTseng + HuihsinTseng LongbinChen FanLi ZimingZhuang @@ -501,7 +501,7 @@ The role of named entities in <fixed-case>W</fixed-case>eb <fixed-case>P</fixed-case>eople <fixed-case>S</fixed-case>earch JavierArtiles - EnriqueAmigó + EnriqueAmigó JulioGonzalo 534–542 D09-1056 @@ -521,7 +521,7 @@ JunSuzuki HidekiIsozaki XavierCarreras - MichaelCollins + MichaelCollins 551–560 D09-1058 suzuki-etal-2009-empirical @@ -536,7 +536,7 @@ Improving Dependency Parsing with Subtrees from Auto-Parsed Data WenliangChen - Jun’ichiKazama + Jun’ichiKazama KiyotakaUchimoto KentaroTorisawa 570–579 @@ -554,16 +554,16 @@ Adapting a Polarity Lexicon using Integer Linear Programming for Domain-Specific Sentiment Classification YejinChoi - ClaireCardie + ClaireCardie 590–598 D09-1062 choi-cardie-2009-adapting Generating High-Coverage Semantic Orientation Lexicons From Overtly Marked Words and a Thesaurus - SaifMohammad + SaifMohammad CodyDunne - BonnieDorr + BonnieDorr 599–608 D09-1063 mohammad-etal-2009-generating @@ -581,8 +581,8 @@ <fixed-case>EEG</fixed-case> responds to conceptual stimuli and corpus semantics BrianMurphy - MarcoBaroni - MassimoPoesio + MarcoBaroni + MassimoPoesio 619–627 D09-1065 murphy-etal-2009-eeg @@ -616,7 +616,7 @@ Can <fixed-case>C</fixed-case>hinese Phonemes Improve Machine Transliteration?: A Comparative Study of <fixed-case>E</fixed-case>nglish-to-<fixed-case>C</fixed-case>hinese Transliteration Models - Jong-HoonOh + Jong-HoonOh KiyotakaUchimoto KentaroTorisawa 658–667 @@ -644,7 +644,7 @@ A Simple Unsupervised Learner for <fixed-case>POS</fixed-case> Disambiguation Rules Given Only a Minimal Lexicon QiuyeZhao - MitchMarcus + MitchMarcus 688–697 D09-1072 zhao-marcus-2009-simple @@ -660,7 +660,7 @@ Discriminative Corpus Weight Estimation for Machine Translation SpyrosMatsoukas - Antti-Veikko I.Rosti + Antti-Veikko I.Rosti BingZhang 708–717 D09-1074 @@ -684,14 +684,14 @@ Word Buffering Models for Improved Speech Repair Parsing - TimMiller + TimMiller 737–745 D09-1077 miller-2009-word Less is More: Significance-Based <fixed-case>N</fixed-case>-gram Selection for Smaller, Better Language Models - Robert C.Moore + Robert C.Moore ChrisQuirk 746–755 D09-1078 @@ -708,8 +708,8 @@ Integrating sentence- and word-level error identification for disfluency correction ErinFitzgerald - FrederickJelinek - KeithHall + FrederickJelinek + KeithHall 765–774 D09-1080 fitzgerald-etal-2009-integrating @@ -717,7 +717,7 @@ Estimating Semantic Distance Using Soft Semantic Constraints in Knowledge-Source – Corpus Hybrid Models YuvalMarton - SaifMohammad + SaifMohammad PhilipResnik 775–783 D09-1081 @@ -733,7 +733,7 @@ Learning Term-weighting Functions for Similarity Measures - Wen-tauYih + Wen-tauYih 793–802 D09-1083 D09-1083.Presentation.pptx @@ -752,15 +752,15 @@ Unbounded Dependency Recovery for Parser Evaluation LauraRimell StephenClark - MarkSteedman + MarkSteedman 813–821 D09-1085 rimell-etal-2009-unbounded Parser Adaptation and Projection with Quasi-Synchronous Grammar Features - David A.Smith - JasonEisner + David A.Smith + JasonEisner 822–831 D09-1086 smith-eisner-2009-parser @@ -768,7 +768,7 @@ Self-Training <fixed-case>PCFG</fixed-case> Grammars with Latent Annotations Across Languages ZhongqiangHuang - MaryHarper + MaryHarper 832–841 D09-1087 huang-harper-2009-self @@ -776,8 +776,8 @@ An Alternative to Head-Driven Approaches for Parsing a (Relatively) Free Word-Order Language ReutTsarfaty - KhalilSima’an - RemkoScha + KhalilSima’an + RemkoScha 842–851 D09-1088 tsarfaty-etal-2009-alternative @@ -792,8 +792,8 @@ Bilingual dictionary generation for low-resourced language pairs - IstvánVarga - ShoichiYokoyama + IstvánVarga + ShoichiYokoyama 862–870 D09-1090 varga-yokoyama-2009-bilingual @@ -801,7 +801,7 @@ Multilingual Spectral Clustering Using Document Similarity Propagation DaniYogatama - KumikoTanaka-Ishii + KumikoTanaka-Ishii 871–879 D09-1091 yogatama-tanaka-ishii-2009-multilingual @@ -809,9 +809,9 @@ Polylingual Topic Models DavidMimno - Hanna M.Wallach + Hanna M.Wallach JasonNaradowsky - David A.Smith + David A.Smith AndrewMcCallum 880–889 D09-1092 @@ -821,7 +821,7 @@ Using the <fixed-case>W</fixed-case>eb for Language Independent Spellchecking and Autocorrection CaseyWhitelaw BenHutchinson - Grace YChung + Grace YChung GedEllis 890–899 D09-1093 @@ -838,7 +838,7 @@ Combining Collocations, Lexical and Encyclopedic Knowledge for Metonymy Resolution - ViviNastase + ViviNastase MichaelStrube 910–918 D09-1095 @@ -848,7 +848,7 @@ Segmenting Email Message Text into Zones AndrewLampert RobertDale - CécileParis + CécileParis 919–928 D09-1096 lampert-etal-2009-segmenting @@ -857,7 +857,7 @@ Hypernym Discovery Based on Distributional Similarity and Hierarchical Structures IchiroYamada KentaroTorisawa - Jun’ichiKazama + Jun’ichiKazama KowKuroda MasakiMurata StijnDe Saeger @@ -880,9 +880,9 @@ Toward Completeness in Concept Extraction and Classification - EduardHovy + EduardHovy ZornitsaKozareva - EllenRiloff + EllenRiloff 948–957 D09-1099 hovy-etal-2009-toward @@ -907,7 +907,7 @@ Global Learning of Noun Phrase Anaphoricity in Coreference Resolution via Label Propagation - GuoDongZhou + GuoDongZhou FangKong 978–986 D09-1102 @@ -916,8 +916,8 @@ Employing the Centering Theory in Pronoun Resolution from the Semantic Perspective FangKong - GuoDongZhou - QiaomingZhu + GuoDongZhou + QiaomingZhu 987–996 D09-1103 kong-etal-2009-employing @@ -931,8 +931,8 @@ Learning Linear Ordering Problems for Better Translation - RoyTromble - JasonEisner + RoyTromble + JasonEisner 1007–1016 D09-1105 tromble-eisner-2009-learning @@ -959,7 +959,7 @@ HuiZhang MinZhang HaizhouLi - Chew LimTan + Chew LimTan 1037–1045 D09-1108 zhang-etal-2009-fast @@ -968,7 +968,7 @@ <fixed-case>G</fixed-case>azpacho and summer rash: lexical relationships from temporal patterns of web search queries EnriqueAlfonseca MassimilianoCiaramita - KeithHall + KeithHall 1046–1055 D09-1109 alfonseca-etal-2009-gazpacho @@ -1027,7 +1027,7 @@ YangLiu HaitaoMi QunLiu - Yajuan + Yajuan 1105–1113 D09-1115 feng-etal-2009-lattice @@ -1035,7 +1035,7 @@ A Joint Language Model With Fine-grain Syntactic Tags DenisFilimonov - MaryHarper + MaryHarper 1114–1123 D09-1116 filimonov-harper-2009-joint @@ -1043,7 +1043,7 @@ Bidirectional Phrase-based Statistical Machine Translation AndrewFinch - EiichiroSumita + EiichiroSumita 1124–1132 D09-1117 finch-sumita-2009-bidirectional @@ -1052,7 +1052,7 @@ Real-time decision detection in multi-party dialogue MatthewFrampton JiaHuang - TrungBui + TrungBui StanleyPeters 1133–1141 D09-1118 @@ -1078,7 +1078,7 @@ Descriptive and Empirical Approaches to Capturing Underlying Dependencies among Parsing Errors TadayoshiHara YusukeMiyao - Jun’ichiTsujii + Jun’ichiTsujii 1162–1171 D09-1121 hara-etal-2009-descriptive @@ -1090,15 +1090,15 @@ KowKuroda StijnDe Saeger MasakiMurata - Jun’ichiKazama + Jun’ichiKazama 1172–1181 D09-1122 hashimoto-etal-2009-large A Syntactified Direct Translation Model with Linear-time Decoding - HanyHassan - KhalilSima’an + HanyHassan + KhalilSima’an AndyWay 1182–1191 D09-1123 @@ -1123,7 +1123,7 @@ Fully Lexicalising <fixed-case>CCG</fixed-case>bank with Hat Categories MatthewHonnibal - James R.Curran + James R.Curran 1212–1221 D09-1126 honnibal-curran-2009-fully @@ -1149,8 +1149,8 @@ Real-Word Spelling Correction using <fixed-case>G</fixed-case>oogle <fixed-case>W</fixed-case>eb 1<fixed-case>T</fixed-case> 3-grams - AminulIslam - DianaInkpen + AminulIslam + DianaInkpen 1241–1249 D09-1129 islam-inkpen-2009-real @@ -1158,8 +1158,8 @@ Semi-supervised Speech Act Recognition in Emails and Forums MinwooJeong - Chin-YewLin - Gary GeunbaeLee + Chin-YewLin + Gary GeunbaeLee 1250–1259 D09-1130 jeong-etal-2009-semi @@ -1167,7 +1167,7 @@ Using Morphological and Syntactic Structures for <fixed-case>C</fixed-case>hinese Opinion Analysis Lun-WeiKu - Ting-HaoHuang + Ting-HaoHuang Hsin-HsiChen 1260–1269 D09-1131 @@ -1183,10 +1183,10 @@ Improving Nominal <fixed-case>SRL</fixed-case> in <fixed-case>C</fixed-case>hinese Language with Verbal <fixed-case>SRL</fixed-case> Information and Automatic Predicate Recognition - JunhuiLi - GuodongZhou + JunhuiLi + GuodongZhou HaiZhao - QiaomingZhu + QiaomingZhu PeideQian 1280–1288 D09-1133 @@ -1201,7 +1201,7 @@ Refining Grammars for Parsing with Hierarchical Semantic Knowledge - XiaojunLin + XiaojunLin YangFan MengZhang XihongWu @@ -1230,14 +1230,14 @@ Supervised Learning of a Probabilistic Lexicon of Verb Semantic Classes YusukeMiyao - Jun’ichiTsujii + Jun’ichiTsujii 1328–1337 D09-1138 miyao-tsujii-2009-supervised A Study on the Semantic Relatedness of Query and Document Terms in Information Retrieval - ChristofMüller + ChristofMüller IrynaGurevych 1338–1347 D09-1139 @@ -1253,7 +1253,7 @@ Improved Statistical Machine Translation for Resource-Poor Languages Using Related Resource-Rich Languages - PreslavNakov + PreslavNakov Hwee TouNg 1358–1367 D09-1141 @@ -1261,7 +1261,7 @@ What’s in a name? <fixed-case>I</fixed-case>n some languages, grammatical gender - ViviNastase + ViviNastase MariusPopescu 1368–1377 D09-1142 @@ -1287,16 +1287,16 @@ Detecting Speculations and their Scopes in Scientific Text - ArzucanÖzgür - Dragomir R.Radev + ArzucanÖzgür + Dragomir R.Radev 1398–1407 D09-1145 ozgur-radev-2009-detecting Cross-Cultural Analysis of Blogs and Forums with Mixed-Collection Topic Models - MichaelPaul - RoxanaGirju + MichaelPaul + RoxanaGirju 1408–1417 D09-1146 paul-girju-2009-cross @@ -1313,17 +1313,17 @@ Using Word-Sense Disambiguation Methods to Classify Web Queries by Intent EmilyPitler - KenChurch + KenChurch 1428–1436 D09-1148 pitler-church-2009-using Semi-Supervised Learning for Semantic Relation Classification using Stratified Sampling Strategy - LonghuaQian - GuodongZhou + LonghuaQian + GuodongZhou FangKong - QiaomingZhu + QiaomingZhu 1437–1445 D09-1149 qian-etal-2009-semi @@ -1354,7 +1354,7 @@ <fixed-case>C</fixed-case>hinese Semantic Role Labeling with Shallow Parsing - WeiweiSun + WeiweiSun ZhifangSui MengWang XinWang @@ -1375,15 +1375,15 @@ Towards Domain-Independent Argumentative Zoning: Evidence from Chemistry and Computational Linguistics SimoneTeufel AdvaithSiddharthan - ColinBatchelor + ColinBatchelor 1493–1502 D09-1155 teufel-etal-2009-towards Character-level Analysis of Semi-Structured Documents for Set Expansion - Richard C.Wang - William W.Cohen + Richard C.Wang + William W.Cohen 1503–1512 D09-1156 wang-cohen-2009-character @@ -1391,7 +1391,7 @@ Classifying Relations for Biomedical Named Entity Disambiguation XinglongWang - Jun’ichiTsujii + Jun’ichiTsujii SophiaAnaniadou 1513–1522 D09-1157 @@ -1411,8 +1411,8 @@ Phrase Dependency Parsing for Opinion Mining YuanbinWu QiZhang - XuanjingHuang - LideWu + XuanjingHuang + LideWu 1533–1541 D09-1159 wu-etal-2009-phrase @@ -1429,7 +1429,7 @@ K-Best Combination of Syntactic Parsers HuiZhang MinZhang - Chew LimTan + Chew LimTan HaizhouLi 1552–1560 D09-1161 diff --git a/data/xml/D10.xml b/data/xml/D10.xml index 0dcb90b209..931312a1b4 100644 --- a/data/xml/D10.xml +++ b/data/xml/D10.xml @@ -5,7 +5,7 @@ Proceedings of the 2010 Conference on Empirical Methods in Natural Language Processing D10-1 HangLi - LluísMàrquez + LluísMàrquez Association for Computational Linguistics
Cambridge, MA
October @@ -18,9 +18,9 @@ On Dual Decomposition and Linear Programming Relaxations for Natural Language Processing - Alexander M.Rush + Alexander M.Rush DavidSontag - MichaelCollins + MichaelCollins TommiJaakkola 1–11 D10-1001 @@ -29,7 +29,7 @@ Self-Training with Products of Latent Variable Grammars ZhongqiangHuang - MaryHarper + MaryHarper SlavPetrov 12–22 D10-1002 @@ -37,7 +37,7 @@ Utilizing Extra-Sentential Context for Parsing - Jackie Chi KitCheung + Jackie Chi KitCheung GeraldPenn 23–33 D10-1003 @@ -45,9 +45,9 @@ Turbo Parsers: Dependency Parsing by Approximate Variational Inference - AndréMartins - NoahSmith - EricXing + AndréMartins + NoahSmith + EricXing PedroAguiar MárioFigueiredo 34–44 @@ -64,7 +64,7 @@ Jointly Modeling Aspects and Opinions with a <fixed-case>M</fixed-case>ax<fixed-case>E</fixed-case>nt-<fixed-case>LDA</fixed-case> Hybrid - XinZhao + XinZhao JingJiang HongfeiYan XiaomingLi @@ -74,9 +74,9 @@ Summarizing Contrastive Viewpoints in Opinionated Text - MichaelPaul - ChengXiangZhai - RoxanaGirju + MichaelPaul + ChengXiangZhai + RoxanaGirju 66–76 D10-1007 paul-etal-2010-summarizing @@ -84,8 +84,8 @@ Automatically Producing Plot Unit Representations for Narrative Text AmitGoyal - EllenRiloff - HalDaumé III + EllenRiloff + HalDaumé III 77–86 D10-1008 goyal-etal-2010-automatically @@ -93,8 +93,8 @@ Handling Noisy Queries in Cross Language <fixed-case>FAQ</fixed-case> Retrieval DanishContractor - GovindKothari - TanveerFaruquie + GovindKothari + TanveerFaruquie L. V.Subramaniam SumitNegi 87–96 @@ -103,7 +103,7 @@ Learning the Relative Usefulness of Questions in Community <fixed-case>QA</fixed-case> - RazvanBunescu + RazvanBunescu YunfengHuang 97–107 D10-1010 @@ -112,7 +112,7 @@ Positional Language Models for Clinical Information Retrieval FlorianBoudin - Jian-YunNie + Jian-YunNie MartinDawes 108–115 D10-1011 @@ -141,7 +141,7 @@ Soft Syntactic Constraints for Hierarchical Phrase-Based Translation Using Latent Syntactic Distributions ZhongqiangHuang - MartinČmejrek + MartinČmejrek BowenZhou 138–147 D10-1014 @@ -149,8 +149,8 @@ A Hybrid Morpheme-Word Representation for Machine Translation of Morphologically Rich Languages - Minh-ThangLuong - PreslavNakov + Minh-ThangLuong + PreslavNakov Min-YenKan 148–157 D10-1015 @@ -160,14 +160,14 @@ “Poetic” Statistical Machine Translation: Rhyme and Meter DmitriyGenzel JakobUszkoreit - FranzOch + FranzOch 158–166 D10-1016 genzel-etal-2010-poetic Efficient Graph-Based Semi-Supervised Learning of Structured Tagging Models - AmarnagSubramanya + AmarnagSubramanya SlavPetrov FernandoPereira 167–176 @@ -187,8 +187,8 @@ XianQian QiZhang YaqianZhou - XuanjingHuang - LideWu + XuanjingHuang + LideWu 187–195 D10-1019 qian-etal-2010-joint @@ -214,7 +214,7 @@ Negative Training Data Can be Harmful to Text Classification Xiao-LiLi BingLiu - See-KiongNg + See-KiongNg 218–228 D10-1022 li-etal-2010-negative @@ -232,16 +232,16 @@ Evaluating Models of Latent Document Semantics in the Presence of <fixed-case>OCR</fixed-case> Errors DanielWalker William B.Lund - Eric K.Ringger + Eric K.Ringger 240–250 D10-1024 walker-etal-2010-evaluating Translingual Document Representations from Discriminative Projections - JohnPlatt + JohnPlatt KristinaToutanova - Wen-tauYih + Wen-tauYih 251–261 D10-1025 platt-etal-2010-translingual @@ -284,7 +284,7 @@ Joint Inference for Bilingual Semantic Role Labeling TaoZhuang - ChengqingZong + ChengqingZong 304–314 D10-1030 zhuang-zong-2010-joint @@ -292,7 +292,7 @@ Automatic Discovery of Manner Relations and its Applications EduardoBlanco - DanMoldovan + DanMoldovan 315–324 D10-1031 blanco-moldovan-2010-automatic @@ -307,9 +307,9 @@ Improving Mention Detection Robustness to Noisy Input - RaduFlorian - JohnPitrelli - SalimRoukos + RaduFlorian + JohnPitrelli + SalimRoukos ImedZitouni 335–345 D10-1033 @@ -317,8 +317,8 @@ Clustering-Based Stratified Seed Sampling for Semi-Supervised Relation Classification - LonghuaQian - GuodongZhou + LonghuaQian + GuodongZhou 346–355 D10-1034 qian-zhou-2010-clustering @@ -351,10 +351,10 @@ Exploiting Conversation Structure in Unsupervised Topic Segmentation for Emails - ShafiqJoty + ShafiqJoty GiuseppeCarenini GabrielMurray - Raymond T.Ng + Raymond T.Ng 388–398 D10-1038 joty-etal-2010-exploiting @@ -410,7 +410,7 @@ Discriminative Instance Weighting for Domain Adaptation in Statistical Machine Translation GeorgeFoster - CyrilGoutte + CyrilGoutte RolandKuhn 451–459 D10-1044 @@ -421,7 +421,7 @@ MarkDredze ArenJansen GlenCoppersmith - KenChurch + KenChurch 460–470 D10-1045 dredze-etal-2010-nlp @@ -429,7 +429,7 @@ Fusing Eye Gaze with Speech Recognition Hypotheses to Resolve Exophoric References in Situated Dialogue ZaharPrasov - Joyce Y.Chai + Joyce Y.Chai 471–481 D10-1046 prasov-chai-2010-fusing @@ -437,8 +437,8 @@ Multi-Document Summarization Using <fixed-case>A</fixed-case>* Search and Discriminative Learning AhmetAker - TrevorCohn - RobertGaizauskas + TrevorCohn + RobertGaizauskas 482–491 D10-1047 aker-etal-2010-multi @@ -448,10 +448,10 @@ KarthikRaghunathan HeeyoungLee SudarshanRangarajan - NathanaelChambers + NathanaelChambers MihaiSurdeanu - DanJurafsky - ChristopherManning + DanJurafsky + ChristopherManning 492–501 D10-1048 raghunathan-etal-2010-multi @@ -486,7 +486,7 @@ Discriminative Word Alignment with a Function Word Reordering Model HendraSetiawan - ChrisDyer + ChrisDyer PhilipResnik 534–544 D10-1052 @@ -494,7 +494,7 @@ Hierarchical Phrase-Based Translation Grammars Extracted from Alignment Posterior Probabilities - Adriàde Gispert + Adriàde Gispert JuanPino WilliamByrne 545–554 @@ -513,8 +513,8 @@ Further Meta-Evaluation of Broad-Coverage Surface Realization DominicEspinosa - RajakrishnanRajkumar - MichaelWhite + RajakrishnanRajkumar + MichaelWhite ShoshanaBerleant 564–574 D10-1055 @@ -523,8 +523,8 @@ Two Decades of Unsupervised <fixed-case>POS</fixed-case> Induction: How Far Have We Come? ChristosChristodoulopoulos - SharonGoldwater - MarkSteedman + SharonGoldwater + MarkSteedman 575–584 D10-1056 christodoulopoulos-etal-2010-two @@ -533,7 +533,7 @@ We’re Not in <fixed-case>K</fixed-case>ansas Anymore: Detecting Domain Changes in Streams MarkDredze TimOates - ChristinePiatko + ChristinePiatko 585–595 D10-1057 dredze-etal-2010-kansas @@ -560,7 +560,7 @@ BingZhang SpyrosMatsoukas JinxiXu - RalphWeischedel + RalphWeischedel 616–625 D10-1060 shen-etal-2010-statistical @@ -569,8 +569,8 @@ Discriminative Sample Selection for Statistical Machine Translation SankaranarayananAnanthakrishnan RohitPrasad - DavidStallard - PremNatarajan + DavidStallard + PremNatarajan 626–635 D10-1061 ananthakrishnan-etal-2010-discriminative @@ -601,7 +601,7 @@ Combining Unsupervised and Supervised Alignments for <fixed-case>MT</fixed-case>: An Empirical Study JinxiXu - Antti-VeikkoRosti + Antti-VeikkoRosti 667–673 D10-1065 xu-rosti-2010-combining @@ -624,7 +624,7 @@ Unsupervised Parse Selection for <fixed-case>HPSG</fixed-case> RebeccaDridan - TimothyBaldwin + TimothyBaldwin 694–704 D10-1068 dridan-baldwin-2010-unsupervised @@ -632,19 +632,19 @@ Uptraining for Accurate Deterministic Question Parsing SlavPetrov - Pi-ChuanChang + Pi-ChuanChang MichaelRinggaard - HiyanAlshawi + HiyanAlshawi 705–713 D10-1069 petrov-etal-2010-uptraining A Unified Framework for Scope Learning via Simplified Shallow Semantic Parsing - QiaomingZhu - JunhuiLi + QiaomingZhu + JunhuiLi HonglingWang - GuodongZhou + GuodongZhou 714–724 D10-1070 zhu-etal-2010-unified @@ -652,19 +652,19 @@ A New Approach to Lexical Disambiguation of <fixed-case>A</fixed-case>rabic Text RushinShah - Paramveer S.Dhillon - MarkLiberman - DeanFoster - MohamedMaamouri - LyleUngar + Paramveer S.Dhillon + MarkLiberman + DeanFoster + MohamedMaamouri + LyleUngar 725–735 D10-1071 shah-etal-2010-new What a Parser Can Learn from a Semantic Role Labeler and Vice Versa - StephenBoxwell - DennisMehay + StephenBoxwell + DennisMehay ChrisBrew 736–744 D10-1072 @@ -672,7 +672,7 @@ Word Sense Induction & Disambiguation Using Hierarchical Random Graphs - IoannisKlapaftis + IoannisKlapaftis SureshManandhar 745–755 D10-1073 @@ -680,8 +680,8 @@ Towards Conversation Entailment: An Empirical Investigation - ChenZhang - JoyceChai + ChenZhang + JoyceChai 756–766 D10-1074 zhang-chai-2010-towards @@ -697,7 +697,7 @@ Training Continuous Space Language Models: Some Practical Issues - Hai SonLe + Hai SonLe AlexandreAllauzen GuillaumeWisniewski FrançoisYvon @@ -707,7 +707,7 @@ Enhancing Domain Portability of <fixed-case>C</fixed-case>hinese Segmentation Model Using Chi-Square Statistics and Bootstrapping - BaobaoChang + BaobaoChang DongxuHan 789–798 D10-1077 @@ -730,7 +730,7 @@ KristianHeal DeryleLonsdale KevinSeppi - EricRingger + EricRingger 810–820 D10-1079 mcclanahan-etal-2010-probabilistic @@ -739,7 +739,7 @@ Lessons Learned in Part-of-Speech Tagging of Conversational Speech VladimirEidelman ZhongqiangHuang - MaryHarper + MaryHarper 821–831 D10-1080 eidelman-etal-2010-lessons @@ -748,7 +748,7 @@ An Efficient Algorithm for Unsupervised Word Segmentation with Branching Entropy and <fixed-case>MDL</fixed-case> ValentinZhikov HiroyaTakamura - ManabuOkumura + ManabuOkumura 832–842 D10-1081 zhikov-etal-2010-efficient @@ -763,7 +763,7 @@ Simple Type-Level Unsupervised <fixed-case>POS</fixed-case> Tagging - Yoong KeokLee + Yoong KeokLee AriaHaghighi ReginaBarzilay 853–861 @@ -774,7 +774,7 @@ Classifying Dialogue Acts in One-on-One Live Chats Su NamKim LawrenceCavedon - TimothyBaldwin + TimothyBaldwin 862–871 D10-1084 kim-etal-2010-classifying @@ -783,7 +783,7 @@ Resolving Event Noun Phrases to Their Verbal Mentions BinChen JianSu - Chew LimTan + Chew LimTan 872–881 D10-1085 chen-etal-2010-resolving @@ -791,7 +791,7 @@ A Tree Kernel-Based Unified Framework for <fixed-case>C</fixed-case>hinese Zero Anaphora Resolution FangKong - GuodongZhou + GuodongZhou 882–891 D10-1086 kong-zhou-2010-tree @@ -808,14 +808,14 @@ Using Unknown Word Techniques to Learn Known Words KostadinCholakov - Gertjanvan Noord + Gertjanvan Noord 902–912 D10-1088 cholakov-van-noord-2010-using <fixed-case>W</fixed-case>iki<fixed-case>W</fixed-case>ars: A New Corpus for Research on Temporal Expressions - PawelMazur + PawelMazur RobertDale 913–922 D10-1089 @@ -854,7 +854,7 @@ An Approach of Generating Personalized Views from Normalized Electronic Dictionaries : A Practical Experiment on <fixed-case>A</fixed-case>rabic Language AidaKhemakhem BilelGargouri - AbdelmajidBen Hamadou + AbdelmajidBen Hamadou 953–960 D10-1093 khemakhem-etal-2010-approach @@ -896,7 +896,7 @@ LauraChiticariu RajasekarKrishnamurthy YunyaoLi - FrederickReiss + FrederickReiss ShivakumarVaithyanathan 1002–1012 D10-1098 @@ -914,7 +914,7 @@ Automatic Detection and Classification of Social Events ApoorvAgarwal - OwenRambow + OwenRambow 1024–1034 D10-1100 agarwal-rambow-2010-automatic @@ -931,7 +931,7 @@ Multi-Level Structured Models for Document-Level Sentiment Classification AinurYessenalina YisongYue - ClaireCardie + ClaireCardie 1046–1056 D10-1102 yessenalina-etal-2010-multi @@ -939,7 +939,7 @@ Cross Language Text Classification by Model Translation and Semi-Supervised Learning LeiShi - RadaMihalcea + RadaMihalcea MingjunTian 1057–1067 D10-1103 @@ -970,7 +970,7 @@ StefanSchoenmackers JesseDavis OrenEtzioni - DanielWeld + DanielWeld 1088–1098 D10-1106 schoenmackers-etal-2010-learning @@ -986,7 +986,7 @@ A Semi-Supervised Method to Learn and Construct Taxonomies Using the Web ZornitsaKozareva - EduardHovy + EduardHovy 1110–1118 D10-1108 kozareva-hovy-2010-semi @@ -1016,7 +1016,7 @@ Staying Informed: Supervised and Semi-Supervised Multi-View Topical Analysis of Ideological Perspective AmrAhmed - EricXing + EricXing 1140–1150 D10-1111 ahmed-xing-2010-staying @@ -1024,14 +1024,14 @@ Word-Based Dialect Identification with Georeferenced Rules YvesScherrer - OwenRambow + OwenRambow 1151–1161 D10-1112 scherrer-rambow-2010-word Measuring Distributional Similarity in Context - GeorgianaDinu + GeorgianaDinu MirellaLapata 1162–1172 D10-1113 @@ -1040,14 +1040,14 @@ A Mixture Model with Sharing for Lexical Semantics JosephReisinger - RaymondMooney + RaymondMooney 1173–1182 D10-1114 reisinger-mooney-2010-mixture Nouns are Vectors, Adjectives are Matrices: Representing Adjective-Noun Constructions in Semantic Space - MarcoBaroni + MarcoBaroni RobertoZamparelli 1183–1193 D10-1115 @@ -1055,7 +1055,7 @@ Practical Linguistic Steganography Using Contextual Synonym Substitution and Vertex Colour Coding - Ching-YunChang + Ching-YunChang StephenClark 1194–1203 D10-1116 @@ -1063,8 +1063,8 @@ Unsupervised Induction of Tree Substitution Grammars for Dependency Parsing - PhilBlunsom - TrevorCohn + PhilBlunsom + TrevorCohn 1204–1213 D10-1117 blunsom-cohn-2010-unsupervised @@ -1079,9 +1079,9 @@ Inducing Probabilistic <fixed-case>CCG</fixed-case> Grammars from Logical Form with Higher-Order Unification TomKwiatkowksi - LukeZettlemoyer - SharonGoldwater - MarkSteedman + LukeZettlemoyer + SharonGoldwater + MarkSteedman 1223–1233 D10-1119 kwiatkowksi-etal-2010-inducing @@ -1098,16 +1098,16 @@ What’s with the Attitude? Identifying Sentences with Attitude in Online Discussions - AhmedHassan + AhmedHassan VahedQazvinian - DragomirRadev + DragomirRadev 1245–1255 D10-1121 hassan-etal-2010-whats Hashing-Based Approaches to Spelling Correction of Personal Names - RaghavendraUdupa + RaghavendraUdupa ShaishavKumar 1256–1265 D10-1122 @@ -1126,8 +1126,8 @@ A Latent Variable Model for Geographic Lexical Variation JacobEisenstein BrendanO’Connor - Noah A.Smith - Eric P.Xing + Noah A.Smith + Eric P.Xing 1277–1287 D10-1124 eisenstein-etal-2010-latent @@ -1135,8 +1135,8 @@ Dual Decomposition for Parsing with Non-Projective Head Automata TerryKoo - Alexander M.Rush - MichaelCollins + Alexander M.Rush + MichaelCollins TommiJaakkola DavidSontag 1288–1298 diff --git a/data/xml/D11.xml b/data/xml/D11.xml index ed927e2356..7340a80683 100644 --- a/data/xml/D11.xml +++ b/data/xml/D11.xml @@ -30,7 +30,7 @@ MarcoLui Su NamKim JoakimNivre - TimothyBaldwin + TimothyBaldwin 13–25 D11-1002 wang-etal-2011-predicting @@ -38,7 +38,7 @@ Exact Decoding of Phrase-Based Translation Models through <fixed-case>L</fixed-case>agrangian Relaxation Yin-WenChang - MichaelCollins + MichaelCollins 26–37 D11-1003 D11-1003.Attachment.pdf @@ -54,9 +54,9 @@ Unsupervised Structure Prediction with Non-Parallel Multilingual Guidance - Shay B.Cohen + Shay B.Cohen DipanjanDas - Noah A.Smith + Noah A.Smith 50–61 D11-1005 cohen-etal-2011-unsupervised @@ -65,7 +65,7 @@ Multi-Source Transfer of Delexicalized Dependency Parsers RyanMcDonald SlavPetrov - KeithHall + KeithHall 62–72 D11-1006 mcdonald-etal-2011-multi @@ -73,7 +73,7 @@ <fixed-case>SMT</fixed-case> Helps Bitext Dependency Parsing WenliangChen - Jun’ichiKazama + Jun’ichiKazama MinZhang YoshimasaTsuruoka YujieZhang @@ -133,7 +133,7 @@ Zheng-JunZha MengWang KaiWang - Tat-SengChua + Tat-SengChua 140–150 D11-1013 D11-1013.Attachment.zip @@ -143,9 +143,9 @@ Semi-Supervised Recursive Autoencoders for Predicting Sentiment Distributions RichardSocher JeffreyPennington - Eric H.Huang - Andrew Y.Ng - Christopher D.Manning + Eric H.Huang + Andrew Y.Ng + Christopher D.Manning 151–161 D11-1014 D11-1014.Attachment.pdf @@ -157,7 +157,7 @@ BinyangLi WeiGao ZhongyuWei - Kam-FaiWong + Kam-FaiWong 162–171 D11-1015 zhou-etal-2011-unsupervised @@ -165,17 +165,17 @@ Compositional Matrix-Space Models for Sentiment Analysis AinurYessenalina - ClaireCardie + ClaireCardie 172–182 D11-1016 yessenalina-cardie-2011-compositional Training a Parser for Machine Translation Reordering - JasonKatz-Brown + JasonKatz-Brown SlavPetrov RyanMcDonald - FranzOch + FranzOch DavidTalbot HiroshiIchikawa MasakazuSeno @@ -196,7 +196,7 @@ Augmenting String-to-Tree Translation Models with Fuzzy Use of Source-side Syntax JiajunZhang FeifeiZhai - ChengqingZong + ChengqingZong 204–215 D11-1019 zhang-etal-2011-augmenting @@ -213,7 +213,7 @@ <fixed-case>B</fixed-case>ayesian Checking for Topic Models DavidMimno - DavidBlei + DavidBlei 227–237 D11-1021 D11-1021.Attachment.zip @@ -221,8 +221,8 @@ Dual Decomposition with Many Overlapping Components - AndréMartins - NoahSmith + AndréMartins + NoahSmith MárioFigueiredo PedroAguiar 238–249 @@ -233,7 +233,7 @@ Approximate Scalable Bounded Space Sketch for Large Data <fixed-case>NLP</fixed-case> AmitGoyal - HalDaumé III + HalDaumé III 250–261 D11-1023 goyal-daume-iii-2011-approximate @@ -241,7 +241,7 @@ Optimizing Semantic Coherence in Topic Models DavidMimno - HannaWallach + HannaWallach EdmundTalley MiriamLeenders AndrewMcCallum @@ -261,7 +261,7 @@ Linear Text Segmentation Using Affinity Propagation AnnaKazantseva - StanSzpakowicz + StanSzpakowicz 284–293 D11-1026 D11-1026.Attachment.gz @@ -297,7 +297,7 @@ Universal Morphological Analysis using Structured Nearest Neighbor Prediction Young-BumKim - JoãoGraça + JoãoGraça BenjaminSnyder 322–332 D11-1030 @@ -358,8 +358,8 @@ Parser Evaluation over Local and Non-Local Deep Dependencies in a Large Corpus - Emily M.Bender - DanFlickinger + Emily M.Bender + DanFlickinger StephanOepen YiZhang 397–408 @@ -378,7 +378,7 @@ Bootstrapping Semantic Parsers from Conversations YoavArtzi - LukeZettlemoyer + LukeZettlemoyer 421–432 D11-1039 artzi-zettlemoyer-2011-bootstrapping @@ -399,7 +399,7 @@ Corpus-Guided Sentence Generation of Natural Images YezhouYang ChingTeo - HalDaumé III + HalDaumé III YiannisAloimonos 444–454 D11-1041 @@ -407,9 +407,9 @@ Corroborating Text Evaluation Results with Heterogeneous Measures - EnriqueAmigó + EnriqueAmigó JulioGonzalo - JesúsGiménez + JesúsGiménez FelisaVerdejo 455–466 D11-1042 @@ -417,10 +417,10 @@ Ranking Human and Machine Summarization Systems - PeterRankel - JohnConroy + PeterRankel + JohnConroy EricSlud - DianneO’Leary + DianneO’Leary 467–473 D11-1043 rankel-etal-2011-ranking @@ -428,7 +428,7 @@ Quasi-Synchronous Phrase Dependency Grammars for Machine Translation KevinGimpel - Noah A.Smith + Noah A.Smith 474–485 D11-1044 gimpel-smith-2011-quasi @@ -436,10 +436,10 @@ A Word Reordering Model for Improved Machine Translation KarthikVisweswariah - RajakrishnanRajkumar + RajakrishnanRajkumar AnkurGandhe AnanthakrishnanRamanathan - JiriNavratil + JiriNavratil 486–496 D11-1045 visweswariah-etal-2011-word @@ -456,7 +456,7 @@ Efficient retrieval of tree translation examples for Syntax-Based Machine Translation - FabienCromieres + FabienCromieres SadaoKurohashi 508–518 D11-1047 @@ -465,7 +465,7 @@ A generative model for unsupervised discovery of relations and argument classes from clinical texts BryanRink - SandaHarabagiu + SandaHarabagiu 519–528 D11-1048 rink-harabagiu-2011-generative @@ -473,15 +473,15 @@ Random Walk Inference and Learning in A Large Scale Knowledge Base NiLao - TomMitchell - William W.Cohen + TomMitchell + William W.Cohen 529–539 D11-1049 lao-etal-2011-random Exploring Supervised <fixed-case>LDA</fixed-case> Models for Assigning Attributes to Adjective-Noun Phrases - MatthiasHartung + MatthiasHartung AnetteFrank 540–551 D11-1050 @@ -491,7 +491,7 @@ Semantic Topic Models: Combining Word Distributional Statistics and Dictionary Definitions WeiweiGuo - MonaDiab + MonaDiab 552–561 D11-1051 guo-diab-2011-semantic @@ -517,7 +517,7 @@ Data-Driven Response Generation in Social Media AlanRitter ColinCherry - William B.Dolan + William B.Dolan 583–593 D11-1054 D11-1054.Attachment.zip @@ -528,9 +528,9 @@ DaniYogatama MichaelHeilman BrendanO’Connor - ChrisDyer - Bryan R.Routledge - Noah A.Smith + ChrisDyer + Bryan R.Routledge + Noah A.Smith 594–604 D11-1055 yogatama-etal-2011-predicting @@ -546,7 +546,7 @@ Discovering Morphological Paradigms from Plain Text Using a <fixed-case>D</fixed-case>irichlet Process Mixture Model MarkusDreyer - JasonEisner + JasonEisner 616–627 D11-1057 D11-1057.Attachment.zip @@ -555,7 +555,7 @@ Multilayer Sequence Labeling AiAzuma - YujiMatsumoto + YujiMatsumoto 628–637 D11-1058 azuma-matsumoto-2011-multilayer @@ -563,8 +563,8 @@ A <fixed-case>B</fixed-case>ayesian Mixture Model for <fixed-case>P</fixed-case>o<fixed-case>S</fixed-case> Induction Using Multiple Features ChristosChristodoulopoulos - SharonGoldwater - MarkSteedman + SharonGoldwater + MarkSteedman 638–647 D11-1059 christodoulopoulos-etal-2011-bayesian @@ -572,14 +572,14 @@ Large-Scale Noun Compound Interpretation Using Bootstrapping and the Web as a Corpus Su NamKim - PreslavNakov + PreslavNakov 648–658 D11-1060 kim-nakov-2011-large Linguistic Redundancy in <fixed-case>T</fixed-case>witter - Fabio MassimoZanzotto + Fabio MassimoZanzotto MarcoPennacchiotti KostasTsioutsiouliklis 659–669 @@ -588,7 +588,7 @@ Divide and Conquer: Crowdsourcing the Creation of Cross-Lingual Textual Entailment Corpora - MatteoNegri + MatteoNegri LuisaBentivogli YasharMehdad DaniloGiampiccolo @@ -599,7 +599,7 @@ Literal and Metaphorical Sense Identification through Concrete and Abstract Context - PeterTurney + PeterTurney YairNeuman DanAssaf YohaiCohen @@ -610,7 +610,7 @@ Syntactic Decision Tree <fixed-case>LM</fixed-case>s: Random Selection or Intelligent Design? DenisFilimonov - MaryHarper + MaryHarper 691–699 D11-1064 filimonov-harper-2011-syntactic @@ -626,7 +626,7 @@ Using Syntactic and Semantic Structural Kernels for Classifying Definition Questions in Jeopardy! AlessandroMoschitti - JenniferChu-Carroll + JenniferChu-Carroll SiddharthPatwardhan JamesFan GiuseppeRiccardi @@ -637,16 +637,16 @@ Multiword Expression Identification with Tree Substitution Grammars: A Parsing tour de force with <fixed-case>F</fixed-case>rench SpenceGreen - Marie-Catherinede Marneffe + Marie-Catherinede Marneffe JohnBauer - Christopher D.Manning + Christopher D.Manning 725–735 D11-1067 green-etal-2011-multiword Modelling Discourse Relations for <fixed-case>A</fixed-case>rabic - AmalAl-Saif + AmalAl-Saif KatjaMarkert 736–747 D11-1068 @@ -655,14 +655,14 @@ Classifying Sentences as Speech Acts in Message Board Posts AshequlQadir - EllenRiloff + EllenRiloff 748–758 D11-1069 qadir-riloff-2011-classifying Learning Local Content Shift Detectors from Document-level Information - RichárdFarkas + RichárdFarkas 759–770 D11-1070 D11-1070.Attachment.zip @@ -697,7 +697,7 @@ AlokKothari MartinForst ChristinaLioma - HinrichSchütze + HinrichSchütze 793–803 D11-1073 michelbacher-etal-2011-cascaded @@ -714,7 +714,7 @@ Unsupervised Information Extraction with Distributional Prior Knowledge Cane Wing-kiLeung JingJiang - Kian Ming A.Chai + Kian Ming A.Chai Hai LeongChieu Loo-NinTeow 814–824 @@ -726,11 +726,11 @@ StijnDe Saeger KentaroTorisawa MasaakiTsuchida - Jun’ichiKazama + Jun’ichiKazama ChikaraHashimoto IchiroYamada - Jong HoonOh - IstvanVarga + Jong HoonOh + IstvanVarga YulanYan 825–835 D11-1076 @@ -746,7 +746,7 @@ Analyzing Methods for Improving Precision of Pivot Based Bilingual Dictionaries - XabierSaralegi + XabierSaralegi IkerManterola IñakiSan Vicente 846–856 @@ -781,11 +781,11 @@ A Correction Model for Word Alignments - J. ScottMcCarley + J. ScottMcCarley AbrahamIttycheriah - SalimRoukos + SalimRoukos BingXiang - Jian-mingXu + Jian-mingXu 889–898 D11-1082 mccarley-etal-2011-correction @@ -793,16 +793,16 @@ Heuristic Search for Non-Bottom-Up Tree Structure Prediction AndreaGesmundo - JamesHenderson + JamesHenderson 899–908 D11-1083 gesmundo-henderson-2011-heuristic Cache-based Document-level Statistical Machine Translation - ZhengxianGong + ZhengxianGong MinZhang - GuodongZhou + GuodongZhou 909–919 D11-1084 gong-etal-2011-cache @@ -811,8 +811,8 @@ Minimum Imputed-Risk: Unsupervised Discriminative Training for Machine Translation ZhifeiLi ZiyuanWang - JasonEisner - SanjeevKhudanpur + JasonEisner + SanjeevKhudanpur BrianRoark 920–929 D11-1085 @@ -821,8 +821,8 @@ Improving Bilingual Projections via Sparse Covariance Matrices JagadeeshJagarlamudi - RaghavendraUdupa - HalDaumé III + RaghavendraUdupa + HalDaumé III AbhijitBhole 930–940 D11-1086 @@ -854,7 +854,7 @@ Enhancing <fixed-case>C</fixed-case>hinese Word Segmentation Using Unlabeled Data - WeiweiSun + WeiweiSun JiaXu 970–979 D11-1090 @@ -863,7 +863,7 @@ Unsupervised Learning of Selectional Restrictions and Detection of Argument Coercions KirkRoberts - SandaHarabagiu + SandaHarabagiu 980–990 D11-1091 roberts-harabagiu-2011-unsupervised @@ -872,7 +872,7 @@ Harnessing different knowledge sources to measure semantic relatedness under a uniform model ZiqiZhang Anna LisaGentile - FabioCiravegna + FabioCiravegna 991–1002 D11-1092 zhang-etal-2011-harnessing @@ -887,7 +887,7 @@ Latent Vector Weighting for Word Meaning in Context - TimVan de Cruys + TimVan de Cruys ThierryPoibeau AnnaKorhonen 1012–1022 @@ -907,7 +907,7 @@ Structured Lexical Similarity via Convolution Kernels on Dependency Trees DaniloCroce AlessandroMoschitti - RobertoBasili + RobertoBasili 1034–1046 D11-1096 croce-etal-2011-structured @@ -923,7 +923,7 @@ Lexical Co-occurrence, Statistical Significance, and Word Association Dipak L.Chaudhari - Om P.Damani + Om P.Damani SrivatsanLaxman 1058–1068 D11-1098 @@ -939,9 +939,9 @@ Harnessing <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et Senses for Supervised Sentiment Classification - BalamuraliAR + BalamuraliAR AdityaJoshi - PushpakBhattacharyya + PushpakBhattacharyya 1081–1091 D11-1100 ar-etal-2011-harnessing @@ -958,7 +958,7 @@ Hypotheses Selection Criteria in a Reranking Framework for Spoken Language Understanding MarcoDinarelli - SophieRosset + SophieRosset 1104–1115 D11-1102 dinarelli-rosset-2011-hypotheses @@ -966,8 +966,8 @@ A Fast Re-scoring Strategy to Capture Long-Distance Dependencies AnoopDeoras - TomášMikolov - KennethChurch + TomášMikolov + KennethChurch 1116–1127 D11-1103 deoras-etal-2011-fast @@ -976,7 +976,7 @@ Efficient Subsampling for Training Complex Language Models PuyangXu AselaGunawardana - SanjeevKhudanpur + SanjeevKhudanpur 1128–1136 D11-1104 xu-etal-2011-efficient @@ -1034,7 +1034,7 @@ Relaxed Cross-lingual Projection of Constituent Syntax WenbinJiang QunLiu - YajuanLv + YajuanLv 1192–1201 D11-1110 jiang-etal-2011-relaxed @@ -1042,7 +1042,7 @@ Computing Logical Form on Regulatory Texts NikhilDinesh - AravindJoshi + AravindJoshi InsupLee 1202–1212 D11-1111 @@ -1058,15 +1058,15 @@ Parse Correction with Specialized Models for Difficult Attachment Types - EnriqueHenestroza Anguiano - MarieCandito + EnriqueHenestroza Anguiano + MarieCandito 1222–1233 D11-1113 henestroza-anguiano-candito-2011-parse Exact Inference for Generative Probabilistic Non-Projective Dependency Parsing - Shay B.Cohen + Shay B.Cohen CarlosGómez-Rodríguez GiorgioSatta 1234–1245 @@ -1076,7 +1076,7 @@ Semi-supervised <fixed-case>CCG</fixed-case> Lexicon Extension EmilyThomforde - MarkSteedman + MarkSteedman 1246–1256 D11-1115 thomforde-steedman-2011-semi @@ -1084,7 +1084,7 @@ A Fast, Accurate, Non-Projective, Semantically-Enriched Parser StephenTratz - EduardHovy + EduardHovy 1257–1268 D11-1116 D11-1116.Attachment.zip @@ -1092,19 +1092,19 @@ Lateen <fixed-case>EM</fixed-case>: Unsupervised Training with Multiple Objectives, Applied to Dependency Grammar Induction - Valentin I.Spitkovsky - HiyanAlshawi - DanielJurafsky + Valentin I.Spitkovsky + HiyanAlshawi + DanielJurafsky 1269–1280 D11-1117 spitkovsky-etal-2011-lateen Unsupervised Dependency Parsing without Gold Part-of-Speech Tags - Valentin I.Spitkovsky - HiyanAlshawi - Angel X.Chang - DanielJurafsky + Valentin I.Spitkovsky + HiyanAlshawi + Angel X.Chang + DanielJurafsky 1281–1290 D11-1118 spitkovsky-etal-2011-unsupervised @@ -1112,9 +1112,9 @@ Exploiting Syntactic and Distributional Information for Spelling Correction with Web-Scale N-gram Models WeiXu - JoelTetreault - MartinChodorow - RalphGrishman + JoelTetreault + MartinChodorow + RalphGrishman LeZhao 1291–1300 D11-1119 @@ -1122,8 +1122,8 @@ Discriminating Gender on <fixed-case>T</fixed-case>witter - John D.Burger - JohnHenderson + John D.Burger + JohnHenderson GeorgeKim GuidoZarrella 1301–1309 @@ -1154,8 +1154,8 @@ Structural Opinion Mining for Graph-based Sentiment Representation YuanbinWu QiZhang - XuanjingHuang - LideWu + XuanjingHuang + LideWu 1332–1341 D11-1123 wu-etal-2011-structural @@ -1163,7 +1163,7 @@ Summarize What You Are Interested In: An Optimization Framework for Interactive Personalized Summarization RuiYan - Jian-YunNie + Jian-YunNie XiaomingLi 1342–1351 D11-1124 @@ -1182,7 +1182,7 @@ AshishVenugopal JakobUszkoreit DavidTalbot - FranzOch + FranzOch JuriGanitkevitch 1363–1372 D11-1126 @@ -1193,8 +1193,8 @@ GonzaloIglesias CyrilAllauzen WilliamByrne - Adriàde Gispert - MichaelRiley + Adriàde Gispert + MichaelRiley 1373–1383 D11-1127 iglesias-etal-2011-hierarchical @@ -1213,8 +1213,8 @@ Experimental Support for a Categorical Compositional Distributional Model of Meaning - EdwardGrefenstette - MehrnooshSadrzadeh + EdwardGrefenstette + MehrnooshSadrzadeh 1394–1404 D11-1129 D11-1129.Attachment.zip @@ -1223,15 +1223,15 @@ Cross-Cutting Models of Lexical Semantics JosephReisinger - RaymondMooney + RaymondMooney 1405–1415 D11-1130 reisinger-mooney-2011-cross Reducing Grounded Learning Tasks To Grammatical Inference - BenjaminBörschinger - Bevan K.Jones + BenjaminBörschinger + Bevan K.Jones MarkJohnson 1416–1425 D11-1131 @@ -1251,12 +1251,12 @@ Extreme Extraction – Machine Reading in a Week MarjorieFreedman - LanceRamshaw + LanceRamshaw ElizabethBoschee RyanGabbard GaryKratkiewicz NicolasWard - RalphWeischedel + RalphWeischedel 1437–1446 D11-1133 freedman-etal-2011-extreme @@ -1265,7 +1265,7 @@ Discovering Relations between Noun Categories ThahirMohamed EstevamHruschka - TomMitchell + TomMitchell 1447–1455 D11-1134 mohamed-etal-2011-discovering @@ -1292,16 +1292,16 @@ KatsuhikoHayashi TaroWatanabe MasayukiAsahara - YujiMatsumoto + YujiMatsumoto 1479–1488 D11-1137 hayashi-etal-2011-third Training dependency parsers by jointly optimizing multiple objectives - KeithHall + KeithHall RyanMcDonald - JasonKatz-Brown + JasonKatz-Brown MichaelRinggaard 1489–1499 D11-1138 @@ -1309,8 +1309,8 @@ Structured Sparsity in Structured Prediction - AndréMartins - NoahSmith + AndréMartins + NoahSmith MárioFigueiredo PedroAguiar 1500–1511 @@ -1320,9 +1320,9 @@ Lexical Generalization in <fixed-case>CCG</fixed-case> Grammar Induction for Semantic Parsing TomKwiatkowski - LukeZettlemoyer - SharonGoldwater - MarkSteedman + LukeZettlemoyer + SharonGoldwater + MarkSteedman 1512–1523 D11-1140 kwiatkowski-etal-2011-lexical @@ -1340,7 +1340,7 @@ Identifying Relations for Open Information Extraction AnthonyFader - StephenSoderland + StephenSoderland OrenEtzioni 1535–1545 D11-1142 @@ -1350,7 +1350,7 @@ Active Learning with <fixed-case>A</fixed-case>mazon <fixed-case>M</fixed-case>echanical <fixed-case>T</fixed-case>urk FlorianLaws ChristianScheible - HinrichSchütze + HinrichSchütze 1546–1556 D11-1143 laws-etal-2011-active @@ -1385,7 +1385,7 @@ Rumor has it: Identifying Misinformation in Microblogs VahedQazvinian EmilyRosengren - Dragomir R.Radev + Dragomir R.Radev QiaozhuMei 1589–1599 D11-1147 diff --git a/data/xml/D12.xml b/data/xml/D12.xml index bf7f45b808..e52c44e8b4 100644 --- a/data/xml/D12.xml +++ b/data/xml/D12.xml @@ -4,9 +4,9 @@ Proceedings of the 2012 Joint Conference on Empirical Methods in Natural Language Processing and Computational Natural Language Learning D12-1 - Jun’ichiTsujii - JamesHenderson - MariusPaşca + Jun’ichiTsujii + JamesHenderson + MariusPaşca Association for Computational Linguistics
Jeju Island, Korea
July @@ -29,7 +29,7 @@ Regularized Interlingual Projections: Evaluation on Multilingual Transliteration JagadeeshJagarlamudi - HalDaumé III + HalDaumé III 12–23 D12-1002 jagarlamudi-daume-iii-2012-regularized @@ -38,7 +38,7 @@ Bilingual Lexicon Extraction from Comparable Corpora Using Label Propagation AkihiroTamura TaroWatanabe - EiichiroSumita + EiichiroSumita 24–36 D12-1003 tamura-etal-2012-bilingual @@ -64,9 +64,9 @@ Detecting Subgroups in Online Discussions by Modeling Positive and Negative Relations among Participants - AhmedHassan + AhmedHassan AmjadAbu-Jbara - DragomirRadev + DragomirRadev 59–70 D12-1006 hassan-etal-2012-detecting @@ -75,7 +75,7 @@ Generative Goal-Driven User Simulation for Dialog Management AcielEshky BenAllison - MarkSteedman + MarkSteedman 71–81 D12-1007 eshky-etal-2012-generative @@ -83,7 +83,7 @@ Optimising Incremental Dialogue Decisions Using Information Density for Interactive Systems NinaDethlefs - HelenHastie + HelenHastie VerenaRieser OliverLemon 82–93 @@ -92,7 +92,7 @@ Mixed Membership <fixed-case>M</fixed-case>arkov Models for Unsupervised Conversation Modeling - Michael J.Paul + Michael J.Paul 94–104 D12-1009 paul-2012-mixed @@ -107,7 +107,7 @@ Linking Named Entities to Any Database - AvirupSil + AvirupSil ErnestCronin PenghaiNie YinfeiYang @@ -125,7 +125,7 @@ LauraChiticariu RajasekarKrishnamurthy AnkushDharkar - PushpakBhattacharyya + PushpakBhattacharyya 128–138 D12-1012 nagesh-etal-2012-towards @@ -134,8 +134,8 @@ Active Learning for Imbalanced Sentiment Classification ShoushanLi ShengfengJu - GuodongZhou - XiaojunLi + GuodongZhou + XiaojunLi 139–148 D12-1013 li-etal-2012-active-learning @@ -169,7 +169,7 @@ Local and Global Context for Supervised and Unsupervised Metonymy Resolution - ViviNastase + ViviNastase AlexJudea KatjaMarkert MichaelStrube @@ -190,11 +190,11 @@ Spectral Dependency Parsing with Latent Variables - ParamveerDhillon + ParamveerDhillon JordanRodu - MichaelCollins - DeanFoster - LyleUngar + MichaelCollins + DeanFoster + LyleUngar 205–213 D12-1019 dhillon-etal-2012-spectral @@ -211,8 +211,8 @@ A <fixed-case>B</fixed-case>ayesian Model for Learning <fixed-case>SCFG</fixed-case>s with Discontiguous Rules AbbyLevenberg - ChrisDyer - PhilBlunsom + ChrisDyer + PhilBlunsom 223–232 D12-1021 levenberg-etal-2012-bayesian @@ -227,8 +227,8 @@ Minimal Dependency Length in Realization Ranking - MichaelWhite - RajakrishnanRajkumar + MichaelWhite + RajakrishnanRajkumar 244–255 D12-1023 D12-1023.Attachment.zip @@ -252,10 +252,10 @@ N-gram-based Tense Models for Statistical Machine Translation - ZhengxianGong + ZhengxianGong MinZhang - Chew LimTan - GuodongZhou + Chew LimTan + GuodongZhou 276–285 D12-1026 gong-etal-2012-n @@ -263,7 +263,7 @@ Source Language Adaptation for Resource-Poor Machine Translation PidongWang - PreslavNakov + PreslavNakov Hwee TouNg 286–296 D12-1027 @@ -272,7 +272,7 @@ Exploiting Reducibility in Unsupervised Dependency Parsing DavidMareček - ZdeněkŽabokrtský + ZdeněkŽabokrtský 297–307 D12-1028 marecek-zabokrtsky-2012-exploiting @@ -306,7 +306,7 @@ Name Phylogeny: A Generative Model of String Variation NicholasAndrews - JasonEisner + JasonEisner MarkDredze 344–355 D12-1032 @@ -317,8 +317,8 @@ Syntactic Surprisal Affects Spoken Word Duration in Conversational Contexts VeraDemberg - AsadSayeed - PhilipGorinski + AsadSayeed + PhilipGorinski NikolaosEngonopoulos 356–367 D12-1033 @@ -327,12 +327,12 @@ Why Question Answering using Sentiment Analysis and Word Classes - Jong-HoonOh + Jong-HoonOh KentaroTorisawa ChikaraHashimoto TakuyaKawada StijnDe Saeger - Jun’ichiKazama + Jun’ichiKazama YiouWang 368–378 D12-1034 @@ -355,7 +355,7 @@ Answering Opinion Questions on Products by Exploiting Hierarchical Organization of Consumer Reviews JianxingYu Zheng-JunZha - Tat-SengChua + Tat-SengChua 391–401 D12-1036 D12-1036.Attachment.zip @@ -366,7 +366,7 @@ LemaoLiu HailongCao TaroWatanabe - TiejunZhao + TiejunZhao MoYu ConghuiZhu 402–411 @@ -378,7 +378,7 @@ WenbinJiang FandongMeng QunLiu - Yajuan + Yajuan 412–420 D12-1038 jiang-etal-2012-iterative @@ -387,7 +387,7 @@ Automatically Constructing a Normalisation Dictionary for Microblogs BoHan PaulCook - TimothyBaldwin + TimothyBaldwin 421–432 D12-1039 han-etal-2012-automatically @@ -395,7 +395,7 @@ Unsupervised <fixed-case>PCFG</fixed-case> Induction for Grounded Language Learning with Highly Ambiguous Supervision JoohyunKim - RaymondMooney + RaymondMooney 433–444 D12-1040 kim-mooney-2012-unsupervised @@ -414,7 +414,7 @@ MihaiSurdeanu JulieTibshirani RameshNallapati - Christopher D.Manning + Christopher D.Manning 455–465 D12-1042 surdeanu-etal-2012-multi @@ -422,7 +422,7 @@ An “<fixed-case>AI</fixed-case> readability” Formula for <fixed-case>F</fixed-case>rench as a Foreign Language ThomasFrançois - CédrickFairon + CédrickFairon 466–477 D12-1043 francois-fairon-2012-ai @@ -431,7 +431,7 @@ Dynamic Programming for Higher Order Parsing of Gap-Minding Trees EmilyPitler SampathKannan - MitchellMarcus + MitchellMarcus 478–488 D12-1044 pitler-etal-2012-dynamic @@ -440,9 +440,9 @@ Joint Entity and Event Coreference Resolution across Documents HeeyoungLee MartaRecasens - AngelChang + AngelChang MihaiSurdeanu - DanJurafsky + DanJurafsky 489–500 D12-1045 D12-1045.Attachment.zip @@ -471,7 +471,7 @@ ACL 2022 10-Year Test of Time Mausam MichaelSchmitz - StephenSoderland + StephenSoderland RobertBart OrenEtzioni 523–534 @@ -518,7 +518,7 @@ VincentVan Asch RoserMorante PaoloFrasconi - WalterDaelemans + WalterDaelemans LucDe Raedt 579–589 D12-1053 @@ -527,7 +527,7 @@ Lyrics, Music, and Emotions - RadaMihalcea + RadaMihalcea CarloStrapparava 590–599 D12-1054 @@ -535,7 +535,7 @@ Assessment of <fixed-case>ESL</fixed-case> Learners’ Syntactic Competence Based on Similarity Measures - Su-YounYoon + Su-YounYoon SumaBhat 600–608 D12-1055 @@ -554,8 +554,8 @@ ChikaraHashimoto KentaroTorisawa StijnDe Saeger - Jong-HoonOh - Jun’ichiKazama + Jong-HoonOh + Jun’ichiKazama 619–630 D12-1057 D12-1057.Attachment.pdf @@ -573,7 +573,7 @@ Concurrent Acquisition of Word Meaning and Lexical Categories AfraAlishahi - GrzegorzChrupala + GrzegorzChrupala 643–654 D12-1059 alishahi-chrupala-2012-concurrent @@ -607,9 +607,9 @@ Three Dependency-and-Boundary Models for Grammar Induction - Valentin I.Spitkovsky - HiyanAlshawi - DanielJurafsky + Valentin I.Spitkovsky + HiyanAlshawi + DanielJurafsky 688–698 D12-1063 spitkovsky-etal-2012-three @@ -645,7 +645,7 @@ Parse, Price and <fixed-case>C</fixed-case>ut—<fixed-case>D</fixed-case>elayed Column and Row Generation for Graph Based Parsers SebastianRiedel - DavidSmith + DavidSmith AndrewMcCallum 732–743 D12-1067 @@ -657,7 +657,7 @@ QiMao Qiao LiangXiang Ivor Wai-HungTsang - Kian Ming AdamChai + Kian Ming AdamChai Hai LeongChieu 744–753 D12-1068 @@ -666,7 +666,7 @@ Weakly Supervised Training of Semantic Parsers JayantKrishnamurthy - TomMitchell + TomMitchell 754–765 D12-1069 krishnamurthy-mitchell-2012-weakly @@ -689,9 +689,9 @@ A Sequence Labelling Approach to Quote Attribution - TimothyO’Keefe + TimothyO’Keefe SilviaPareti - James R.Curran + James R.Curran IrenaKoprinska MatthewHonnibal 790–799 @@ -701,8 +701,8 @@ <fixed-case>SSHLDA</fixed-case>: A Semi-Supervised Hierarchical Topic Model Xian-LingMao - Zhao-YanMing - Tat-SengChua + Zhao-YanMing + Tat-SengChua SiLi HongfeiYan XiaomingLi @@ -714,7 +714,7 @@ Improving <fixed-case>NLP</fixed-case> through Marginalization of Hidden Syntactic Structure JasonNaradowsky SebastianRiedel - DavidSmith + DavidSmith 810–820 D12-1074 naradowsky-etal-2012-improving @@ -768,7 +768,7 @@ Learning Constraints for Consistent Timeline Extraction DavidMcClosky - Christopher D.Manning + Christopher D.Manning 873–882 D12-1080 mcclosky-manning-2012-learning @@ -794,9 +794,9 @@ A Novel Discriminative Framework for Sentence-Level Discourse Analysis - ShafiqJoty + ShafiqJoty GiuseppeCarenini - RaymondNg + RaymondNg 904–915 D12-1083 joty-etal-2012-novel @@ -816,7 +816,7 @@ AndersBjörkelund JonasKuhn WolfgangSeeker - SinaZarriess + SinaZarriess 928–939 D12-1085 bohnet-etal-2012-generating @@ -843,9 +843,9 @@ Entropy-based Pruning for Phrase-based Machine Translation WangLing - JoãoGraça + JoãoGraça IsabelTrancoso - AlanBlack + AlanBlack 962–971 D12-1088 ling-etal-2012-entropy @@ -862,7 +862,7 @@ Probabilistic Finite State Machines for Regression-based <fixed-case>MT</fixed-case> Evaluation MengqiuWang - Christopher D.Manning + Christopher D.Manning 984–994 D12-1090 wang-manning-2012-probabilistic @@ -879,8 +879,8 @@ Employing Compositional Semantics and Discourse Consistency in <fixed-case>C</fixed-case>hinese Event Extraction PeifengLi - GuodongZhou - QiaomingZhu + GuodongZhou + QiaomingZhu LibinHou 1006–1016 D12-1092 @@ -889,9 +889,9 @@ Reading The Web with Learned Syntactic-Semantic Inference Rules NiLao - AmarnagSubramanya + AmarnagSubramanya FernandoPereira - William W.Cohen + William W.Cohen 1017–1026 D12-1093 lao-etal-2012-reading @@ -900,15 +900,15 @@ Ensemble Semantics for Large-scale Unsupervised Relation Extraction BonanMin ShumingShi - RalphGrishman - Chin-YewLin + RalphGrishman + Chin-YewLin 1027–1037 D12-1094 min-etal-2012-ensemble Forest Reranking through Subtree Ranking - RichárdFarkas + RichárdFarkas HelmutSchmid 1038–1047 D12-1095 @@ -918,7 +918,7 @@ Parser Showdown at the <fixed-case>W</fixed-case>all <fixed-case>S</fixed-case>treet Corral: An Empirical Investigation of Error Types in Parser Output Jonathan K.Kummerfeld DavidHall - James R.Curran + James R.Curran DanKlein 1048–1059 D12-1096 @@ -927,8 +927,8 @@ Extending Machine Translation Evaluation Metrics with Lexical Cohesion to Document Level - Billy T. M.Wong - ChunyuKit + Billy T. M.Wong + ChunyuKit 1060–1068 D12-1097 wong-kit-2012-extending @@ -936,7 +936,7 @@ Fast Large-Scale Approximate Graph Construction for <fixed-case>NLP</fixed-case> AmitGoyal - HalDaumé III + HalDaumé III RaulGuerra 1069–1080 D12-1098 @@ -955,7 +955,7 @@ Sketch Algorithms for Estimating Point Queries in <fixed-case>NLP</fixed-case> AmitGoyal - HalDaumé III + HalDaumé III GrahamCormode 1093–1103 D12-1100 @@ -990,9 +990,9 @@ <fixed-case>PATTY</fixed-case>: A Taxonomy of Relational Patterns with Semantic Types - NdapandulaNakashole + NdapandulaNakashole GerhardWeikum - FabianSuchanek + FabianSuchanek 1135–1145 D12-1104 nakashole-etal-2012-patty @@ -1018,7 +1018,7 @@ Language Model Rest Costs and Space-Efficient Storage KennethHeafield PhilippKoehn - AlonLavie + AlonLavie 1169–1178 D12-1107 heafield-etal-2012-language @@ -1027,7 +1027,7 @@ Document-Wide Decoding for Phrase-Based Statistical Machine Translation ChristianHardmeier JoakimNivre - JörgTiedemann + JörgTiedemann 1179–1190 D12-1108 hardmeier-etal-2012-document @@ -1037,7 +1037,7 @@ YangFeng YangLiu QunLiu - TrevorCohn + TrevorCohn 1191–1200 D12-1109 feng-etal-2012-left @@ -1046,17 +1046,17 @@ Semantic Compositionality through Recursive Matrix-Vector Spaces RichardSocher BrodyHuval - Christopher D.Manning - Andrew Y.Ng + Christopher D.Manning + Andrew Y.Ng 1201–1211 D12-1110 socher-etal-2012-semantic Polarity Inducing Latent Semantic Analysis - Wen-tauYih - GeoffreyZweig - JohnPlatt + Wen-tauYih + GeoffreyZweig + JohnPlatt 1212–1222 D12-1111 D12-1111.Presentation.pptx @@ -1064,8 +1064,8 @@ First Order vs. Higher Order Modification in Distributional Semantics - GemmaBoleda - Eva MariaVecchi + GemmaBoleda + Eva MariaVecchi MiquelCornudella LouiseMcNally 1223–1233 @@ -1074,7 +1074,7 @@ Learning-based Multi-Sieve Co-reference Resolution with Knowledge - LevRatinov + LevRatinov DanRoth 1234–1244 D12-1113 @@ -1084,7 +1084,7 @@ Joint Learning for Coreference Resolution with <fixed-case>M</fixed-case>arkov <fixed-case>L</fixed-case>ogic YangSong JingJiang - Wayne XinZhao + Wayne XinZhao SujianLi HoufengWang 1245–1254 @@ -1118,7 +1118,7 @@ JordanBoyd-Graber BriannaSatinoff HeHe - HalDaumé III + HalDaumé III 1290–1301 D12-1118 D12-1118.Attachment.pdf @@ -1128,8 +1128,8 @@ Multi-Domain Learning: When Do Domains Matter? MaheshJoshi MarkDredze - William W.Cohen - CarolynRosé + William W.Cohen + CarolynRosé 1302–1312 D12-1119 joshi-etal-2012-multi @@ -1153,7 +1153,7 @@ Extracting Opinion Expressions with semi-<fixed-case>M</fixed-case>arkov Conditional Random Fields BishanYang - ClaireCardie + ClaireCardie 1335–1345 D12-1122 yang-cardie-2012-extracting @@ -1171,9 +1171,9 @@ Word Salad: Relating Food Prices and Descriptions VictorChahuneau KevinGimpel - Bryan R.Routledge + Bryan R.Routledge LilyScherlis - Noah A.Smith + Noah A.Smith 1357–1367 D12-1124 D12-1124.Attachment.pdf @@ -1196,7 +1196,7 @@ XipengQiu ShuZhang FengJi - XuanjingHuang + XuanjingHuang 1379–1388 D12-1126 zhao-etal-2012-part @@ -1204,7 +1204,7 @@ <fixed-case>W</fixed-case>iki-ly Supervised Part-of-Speech Tagging ShenLi - JoãoGraça + JoãoGraça BenTaskar 1389–1398 D12-1127 @@ -1213,7 +1213,7 @@ Joining Forces Pays Off: Multilingual Joint Word Sense Disambiguation RobertoNavigli - Simone PaoloPonzetto + Simone PaoloPonzetto 1399–1410 D12-1128 navigli-ponzetto-2012-joining @@ -1236,9 +1236,9 @@ Improved Parsing and <fixed-case>POS</fixed-case> Tagging Using Inter-Sentence Consistency Constraints - AlexanderRush + AlexanderRush RoiReichart - MichaelCollins + MichaelCollins AmirGloberson 1434–1444 D12-1131 @@ -1247,7 +1247,7 @@ Unified Dependency Parsing of <fixed-case>C</fixed-case>hinese Morphological and Syntactic Structures ZhongguoLi - GuodongZhou + GuodongZhou 1445–1454 D12-1132 li-zhou-2012-unified @@ -1262,7 +1262,7 @@ Identifying Event-related Bursts via Social Media Activities - XinZhao + XinZhao BaihanShu JingJiang YangSong @@ -1292,7 +1292,7 @@ StephenRoller MichaelSperiosu SaratRallapalli - BenjaminWing + BenjaminWing JasonBaldridge 1500–1510 D12-1137 @@ -1302,7 +1302,7 @@ A Discriminative Model for Query Spelling Correction with Latent Structural <fixed-case>SVM</fixed-case> HuizhongDuan YanenLi - ChengXiangZhai + ChengXiangZhai DanRoth 1511–1521 D12-1138 diff --git a/data/xml/D13.xml b/data/xml/D13.xml index ec25152c12..25e4a12011 100644 --- a/data/xml/D13.xml +++ b/data/xml/D13.xml @@ -5,7 +5,7 @@ Proceedings of the 2013 Conference on Empirical Methods in Natural Language Processing D13-1 DavidYarowsky - TimothyBaldwin + TimothyBaldwin AnnaKorhonen KarenLivescu StevenBethard @@ -22,7 +22,7 @@ Event-Based Time Label Propagation for Automatic Dating of News Articles TaoGe - BaobaoChang + BaobaoChang SujianLi ZhifangSui 1–11 @@ -31,13 +31,13 @@ Exploiting Discourse Analysis for Article-Wide Temporal Classification - Jun-PingNg + Jun-PingNg Min-YenKan ZihengLin WeiFeng BinChen JianSu - Chew-LimTan + Chew-LimTan 12–23 D13-1002 ng-etal-2013-exploiting @@ -54,7 +54,7 @@ Exploring the Utility of Joint Morphological and Syntactic Learning from Child-directed Speech StellaFrank FrankKeller - SharonGoldwater + SharonGoldwater 30–41 D13-1004 frank-etal-2013-exploring @@ -62,8 +62,8 @@ A Joint Learning Model of Word Segmentation, Lexical Acquisition, and Phonetic Variability MichaElsner - SharonGoldwater - NaomiFeldman + SharonGoldwater + NaomiFeldman FrankWood 42–54 D13-1005 @@ -73,9 +73,9 @@ <fixed-case>A</fixed-case>nimacy Detection with Voting Models JoshuaMoore - Christopher J.C.Burges + Christopher J.C.Burges ErinRenshaw - Wen-tauYih + Wen-tauYih 55–60 D13-1006 moore-etal-2013-animacy @@ -91,8 +91,8 @@ Paraphrasing 4 Microblog Normalization WangLing - ChrisDyer - Alan WBlack + ChrisDyer + Alan WBlack IsabelTrancoso 73–84 D13-1008 @@ -102,8 +102,8 @@ Question Difficulty Estimation in Community Question Answering Services JingLiu QuanWang - Chin-YewLin - Hsiao-WuenHon + Chin-YewLin + Hsiao-WuenHon 85–90 D13-1009 liu-etal-2013-question @@ -112,8 +112,8 @@ Measuring Ideological Proportions in Political Speeches YanchuanSim Brice D. L.Acree - Justin H.Gross - Noah A.Smith + Justin H.Gross + Noah A.Smith 91–101 D13-1010 D13-1010.Attachment.pdf @@ -140,7 +140,7 @@ Joint Parsing and Disfluency Detection in Linear Time Mohammad SadeghRasooli - JoelTetreault + JoelTetreault 124–129 D13-1013 rasooli-tetreault-2013-joint @@ -150,16 +150,16 @@ MasashiTsubaki KevinDuh MasashiShimbo - YujiMatsumoto + YujiMatsumoto 130–140 D13-1014 tsubaki-etal-2013-modeling Studying the Recursive Behaviour of Adjectival Modification with Compositional Distributional Semantics - Eva MariaVecchi + Eva MariaVecchi RobertoZamparelli - MarcoBaroni + MarcoBaroni 141–151 D13-1015 vecchi-etal-2013-studying @@ -175,7 +175,7 @@ Appropriately Incorporating Statistical Significance in <fixed-case>PMI</fixed-case> - Om P.Damani + Om P.Damani ShwetaGhonge 163–169 D13-1017 @@ -191,9 +191,9 @@ Joint Learning of Phonetic Units and Word Pronunciations for <fixed-case>ASR</fixed-case> - Chia-yingLee + Chia-yingLee YuZhang - JamesGlass + JamesGlass 182–192 D13-1019 lee-etal-2013-joint @@ -201,7 +201,7 @@ <fixed-case>MCT</fixed-case>est: A Challenge Dataset for the Open-Domain Machine Comprehension of Text MatthewRichardson - Christopher J.C.Burges + Christopher J.C.Burges ErinRenshaw 193–203 D13-1020 @@ -218,9 +218,9 @@ Optimal Beam Search for Machine Translation - AlexanderRush + AlexanderRush Yin-WenChang - MichaelCollins + MichaelCollins 210–221 D13-1022 rush-etal-2013-optimal @@ -237,7 +237,7 @@ Structured Penalties for Log-Linear Language Models - Anil KumarNelakanti + Anil KumarNelakanti CédricArchambeau JulienMairal FrancisBach @@ -249,10 +249,10 @@ Interactive Machine Translation using Hierarchical Translation Models - JesúsGonzález-Rubio - DanielOrtiz-Martínez - José-MiguelBenedí - FranciscoCasacuberta + JesúsGonzález-Rubio + DanielOrtiz-Martínez + José-MiguelBenedí + FranciscoCasacuberta 244–254 D13-1025 gonzalez-rubio-etal-2013-interactive @@ -260,7 +260,7 @@ Max-Margin Synchronous Grammar Induction for Machine Translation XinyanXiao - DeyiXiong + DeyiXiong 255–264 D13-1026 xiao-xiong-2013-max @@ -286,8 +286,8 @@ Joint Coreference Resolution and Named-Entity Linking with Multi-Pass Sieves HannanehHajishirzi LeilaZilles - Daniel S.Weld - LukeZettlemoyer + Daniel S.Weld + LukeZettlemoyer 289–299 D13-1029 hajishirzi-etal-2013-joint @@ -306,16 +306,16 @@ LongkaiZhang HoufengWang XuSun - MairgupMansur + MairgupMansur 311–321 D13-1031 zhang-etal-2013-exploring Efficient Higher-Order <fixed-case>CRF</fixed-case>s for Morphological Tagging - ThomasMueller + ThomasMueller HelmutSchmid - HinrichSchütze + HinrichSchütze 322–332 D13-1032 D13-1032.Attachment.tgz @@ -332,16 +332,16 @@ <fixed-case>A</fixed-case>daptor <fixed-case>G</fixed-case>rammars for Learning Non-Concatenative Morphology Jan A.Botha - PhilBlunsom + PhilBlunsom 345–356 D13-1034 botha-blunsom-2013-adaptor Grounding Strategic Conversation: Using Negotiation Dialogues to Predict Trades in a Win-Lose Game - AnaïsCadilhac - NicholasAsher - FarahBenamara + AnaïsCadilhac + NicholasAsher + FarahBenamara AlexLascarides 357–368 D13-1035 @@ -353,7 +353,7 @@ ElaheRahimtoroghi LarissaMunishkina ReidSwanson - Marilyn A.Walker + Marilyn A.Walker 369–379 D13-1036 hu-etal-2013-unsupervised @@ -361,7 +361,7 @@ Latent Anaphora Resolution for Cross-Lingual Pronoun Prediction ChristianHardmeier - JörgTiedemann + JörgTiedemann JoakimNivre 380–391 D13-1037 @@ -372,21 +372,21 @@ RuiFang ChangsongLiu LanboShe - Joyce Y.Chai + Joyce Y.Chai 392–402 D13-1038 fang-etal-2013-towards Open-Domain Fine-Grained Class Extraction from Web Search Queries - MariusPaşca + MariusPaşca 403–414 D13-1039 pasca-2013-open Unsupervised Relation Extraction with General Domain Knowledge - OierLopez de Lacalle + OierLopez de Lacalle MirellaLapata 415–425 D13-1040 @@ -407,7 +407,7 @@ Joint Bootstrapping of Corpus Annotations and Entity Types HrushikeshMohapatra - SiddhanthJain + SiddhanthJain SoumenChakrabarti 436–446 D13-1042 @@ -446,7 +446,7 @@ DhouhaBouamor AdrianPopescu NasredineSemmar - PierreZweigenbaum + PierreZweigenbaum 479–489 D13-1046 bouamor-etal-2013-building @@ -485,7 +485,7 @@ HuaWu HaifengWang ConghuiZhu - TiejunZhao + TiejunZhao 524–534 D13-1050 zhu-etal-2013-improving @@ -504,7 +504,7 @@ Flexible and Efficient Hypergraph Interactions for Joint Hierarchical and Forest-to-String Decoding - MartinČmejrek + MartinČmejrek HaitaoMi BowenZhou 545–555 @@ -598,7 +598,7 @@ Joint <fixed-case>C</fixed-case>hinese Word Segmentation and <fixed-case>POS</fixed-case> Tagging on Heterogeneous Annotated Corpora with Multiple Task Learning XipengQiu JiayiZhao - XuanjingHuang + XuanjingHuang 658–668 D13-1062 qiu-etal-2013-joint @@ -608,7 +608,7 @@ JimmyDubuisson Jean-PierreEckmann ChristianScheible - HinrichSchütze + HinrichSchütze 669–680 D13-1063 D13-1063.Attachment.zip @@ -617,7 +617,7 @@ Unsupervised Induction of Cross-Lingual Semantic Relations MikeLewis - MarkSteedman + MarkSteedman 681–692 D13-1064 lewis-steedman-2013-unsupervised @@ -628,7 +628,7 @@ StijnDe Saeger KentaroTorisawa ChikaraHashimoto - Jong-HoonOh + Jong-HoonOh MotokiSano KiyonoriOhtake 693–703 @@ -637,7 +637,7 @@ Sarcasm as Contrast between a Positive Sentiment and Negative Situation - EllenRiloff + EllenRiloff AshequlQadir PrafullaSurve LalindraDe Silva @@ -652,7 +652,7 @@ ZhongqingWang ShoushanLi FangKong - GuodongZhou + GuodongZhou 715–725 D13-1067 wang-etal-2013-collective @@ -660,7 +660,7 @@ Optimized Event Storyline Generation based on Mixture-Event-Aspect Model LifuHuang - Lian’enHuang + Lian’enHuang 726–735 D13-1068 huang-huang-2013-optimized @@ -678,8 +678,8 @@ MariaLiakata SimonDobnik ShyamasreeSaha - ColinBatchelor - DietrichRebholz-Schuhmann + ColinBatchelor + DietrichRebholz-Schuhmann 747–757 D13-1070 liakata-etal-2013-discourse @@ -697,7 +697,7 @@ Exploiting Language Models for Visual Recognition Dieu-ThuLe JasperUijlings - RaffaellaBernardi + RaffaellaBernardi 769–779 D13-1072 le-etal-2013-exploiting @@ -706,7 +706,7 @@ Mining Scientific Terms and their Definitions: A Study of the <fixed-case>ACL</fixed-case> <fixed-case>A</fixed-case>nthology YipingJin Min-YenKan - Jun-PingNg + Jun-PingNg XiangnanHe 780–790 D13-1073 @@ -724,7 +724,7 @@ With Blinkers on: Robust Prediction of Eye Movements across Readers FranzMatthies - AndersSøgaard + AndersSøgaard 803–807 D13-1075 matthies-sogaard-2013-blinkers @@ -732,7 +732,7 @@ Using Paraphrases and Lexical Semantics to Improve the Accuracy and the Robustness of Supervised Models in Situated Dialogue Systems ClaireGardent - Lina M.Rojas Barahona + Lina M.Rojas Barahona 808–813 D13-1076 gardent-rojas-barahona-2013-using @@ -757,7 +757,7 @@ Rule-Based Information Extraction is Dead! Long Live Rule-Based Information Extraction Systems! LauraChiticariu YunyaoLi - Frederick R.Reiss + Frederick R.Reiss 827–832 D13-1079 chiticariu-etal-2013-rule @@ -765,9 +765,9 @@ Improving Learning and Inference in a Large Knowledge-Base using Latent Syntactic Cues MattGardner - Partha PratimTalukdar + Partha PratimTalukdar BryanKisiel - TomMitchell + TomMitchell 833–838 D13-1080 gardner-etal-2013-improving @@ -785,9 +785,9 @@ RuiWang MasaoUtiyama IsaoGoto - EiichroSumita + EiichroSumita HaiZhao - Bao-LiangLu + Bao-LiangLu 845–850 D13-1082 wang-etal-2013-converting @@ -840,8 +840,8 @@ <fixed-case>R</fixed-case>ussian Stress Prediction using Maximum Entropy Ranking - KeithHall - RichardSproat + KeithHall + RichardSproat 879–883 D13-1088 hall-sproat-2013-russian @@ -849,7 +849,7 @@ Scaling to Large³ Data: An Efficient and Effective Method to Compute Distributional Thesauri MartinRiedl - ChrisBiemann + ChrisBiemann 884–890 D13-1089 riedl-biemann-2013-scaling @@ -869,7 +869,7 @@ BinyangLi DalingWang GeYu - Kam-FaiWong + Kam-FaiWong 897–902 D13-1091 feng-etal-2013-twitter @@ -897,7 +897,7 @@ Predicting the Presence of Discourse Connectives GaryPatterson - AndrewKehler + AndrewKehler 914–923 D13-1094 patterson-kehler-2013-predicting @@ -927,7 +927,7 @@ JinQian HuanChen JihuaKang - XuanjingHuang + XuanjingHuang 946–957 D13-1097 zhang-etal-2013-discourse @@ -935,7 +935,7 @@ Building Event Threads out of Multiple News Articles XavierTannier - VéroniqueMoriceau + VéroniqueMoriceau 958–967 D13-1098 tannier-moriceau-2013-building @@ -943,16 +943,16 @@ Tree Kernel-based Negation and Speculation Scope Detection with Structured Syntactic Parse Features BoweiZou - GuodongZhou - QiaomingZhu + GuodongZhou + QiaomingZhu 968–976 D13-1099 zou-etal-2013-tree A temporal model of text periodicities using <fixed-case>G</fixed-case>aussian Processes - DanielPreoţiuc-Pietro - TrevorCohn + DanielPreoţiuc-Pietro + TrevorCohn 977–988 D13-1100 D13-1100.Poster.pdf @@ -961,9 +961,9 @@ Automatically Detecting and Attributing Indirect Quotations SilviaPareti - TimO’Keefe + TimO’Keefe IoannisKonstas - James R.Curran + James R.Curran IrenaKoprinska 989–999 D13-1101 @@ -971,7 +971,7 @@ Identifying Web Search Query Reformulation using Concept based Matching - AhmedHassan + AhmedHassan 1000–1010 D13-1102 hassan-2013-identifying @@ -1001,7 +1001,7 @@ Automatic Extraction of Morphological Lexicons from Morphologically Annotated Corpora RamyEskander NizarHabash - OwenRambow + OwenRambow 1032–1043 D13-1105 eskander-etal-2013-automatic @@ -1011,7 +1011,7 @@ MichaelAuli MichelGalley ChrisQuirk - GeoffreyZweig + GeoffreyZweig 1044–1054 D13-1106 auli-etal-2013-joint @@ -1033,7 +1033,7 @@ FandongMeng JunXie LinfengSong - Yajuan + Yajuan QunLiu 1066–1076 D13-1108 @@ -1043,7 +1043,7 @@ Monolingual Marginal Matching for Translation Model Adaptation AnnIrvine ChrisQuirk - HalDaumé III + HalDaumé III 1077–1088 D13-1109 irvine-etal-2013-monolingual @@ -1051,7 +1051,7 @@ Efficient Left-to-Right Hierarchical Phrase-Based Translation with Improved Reordering MaryamSiahbani - BaskaranSankaran + BaskaranSankaran AnoopSarkar 1089–1099 D13-1110 @@ -1061,7 +1061,7 @@ A Systematic Exploration of Diversity in Machine Translation KevinGimpel DhruvBatra - ChrisDyer + ChrisDyer GregoryShakhnarovich 1100–1111 D13-1111 @@ -1098,7 +1098,7 @@ A Multimodal <fixed-case>LDA</fixed-case> Model integrating Textual, Cognitive and Visual Modalities StephenRoller - SabineSchulte im Walde + SabineSchulte im Walde 1146–1157 D13-1115 D13-1115.Attachment.zip @@ -1106,7 +1106,7 @@ Combining <fixed-case>PCFG</fixed-case>-<fixed-case>LA</fixed-case> Models with Dual Decomposition: A Case Study with Function Labels and Binarization - JosephLe Roux + JosephLe Roux AntoineRozenknop JenniferFoster 1158–1169 @@ -1115,11 +1115,11 @@ Feature Noising for Log-Linear Structured Prediction - SidaWang + SidaWang MengqiuWang - StefanWager + StefanWager PercyLiang - Christopher D.Manning + Christopher D.Manning 1170–1179 D13-1117 wang-etal-2013-feature @@ -1153,7 +1153,7 @@ RyoheiSasano DaisukeKawahara SadaoKurohashi - ManabuOkumura + ManabuOkumura 1213–1223 D13-1121 sasano-etal-2013-automatic @@ -1170,7 +1170,7 @@ A Semantically Enhanced Approach to Determine Textual Similarity EduardoBlanco - DanMoldovan + DanMoldovan 1235–1245 D13-1123 @@ -1198,7 +1198,7 @@ Simulating Early-Termination Search for Verbose Spoken Queries JeromeWhite - Douglas W.Oard + Douglas W.Oard NitendraRajput MarionZalk 1270–1280 @@ -1233,8 +1233,8 @@ Leveraging Lexical Cohesion and Disruption for Topic Segmentation - Anca-RoxanaŞimon - GuillaumeGravier + Anca-RoxanaŞimon + GuillaumeGravier PascaleSébillot 1314–1324 D13-1130 @@ -1253,7 +1253,7 @@ Mining New Business Opportunities: Identifying Trend related Products by Leveraging Commercial Intents from Microblogs JinpengWang - Wayne XinZhao + Wayne XinZhao HaitianWei HongfeiYan XiaomingLi @@ -1313,7 +1313,7 @@ JoernWuebker StephanPeitz FelixRietig - HermannNey + HermannNey 1377–1381 D13-1138 wuebker-etal-2013-improving @@ -1333,7 +1333,7 @@ Decoding with Large-Scale Neural Language Models Improves Translation AshishVaswani YinggongZhao - VictoriaFossum + VictoriaFossum DavidChiang 1387–1392 D13-1140 @@ -1344,7 +1344,7 @@ Will Y.Zou RichardSocher DanielCer - Christopher D.Manning + Christopher D.Manning 1393–1398 D13-1141 zou-etal-2013-bilingual @@ -1352,7 +1352,7 @@ Application of Localized Similarity for Web Documents PeterReberšek - MatejaVerlič + MatejaVerlič 1399–1404 D13-1142 rebersek-verlic-2013-application @@ -1369,7 +1369,7 @@ A Walk-Based Semantically Enriched Tree Kernel Over Distributed Word Representations ShashankSrivastava DirkHovy - EduardHovy + EduardHovy 1411–1416 D13-1144 srivastava-etal-2013-walk @@ -1377,7 +1377,7 @@ Automatic Idiom Identification in <fixed-case>W</fixed-case>iktionary GraceMuzny - LukeZettlemoyer + LukeZettlemoyer 1417–1421 D13-1145 muzny-zettlemoyer-2013-automatic @@ -1386,7 +1386,7 @@ <fixed-case>E</fixed-case>lephant: Sequence Labeling for Word and Sentence Segmentation KilianEvang ValerioBasile - GrzegorzChrupała + GrzegorzChrupała JohanBos 1422–1426 D13-1146 @@ -1411,8 +1411,8 @@ The <fixed-case>V</fixed-case>erb<fixed-case>C</fixed-case>orner Project: Toward an Empirically-Based Semantic Decomposition of Verbs Joshua K.Hartshorne - ClaireBonial - MarthaPalmer + ClaireBonial + MarthaPalmer 1438–1442 D13-1149 hartshorne-etal-2013-verbcorner @@ -1438,8 +1438,8 @@ Dynamic Feature Selection for Dependency Parsing HeHe - HalDaumé III - JasonEisner + HalDaumé III + JasonEisner 1455–1464 D13-1152 he-etal-2013-dynamic @@ -1454,10 +1454,10 @@ Using Crowdsourcing to get Representations based on Regular Expressions - AndersSøgaard - HectorMartinez + AndersSøgaard + HectorMartinez JakobElming - AndersJohannsen + AndersJohannsen 1476–1480 D13-1154 sogaard-etal-2013-using @@ -1491,7 +1491,7 @@ TsutomuHirao YasuhisaYoshida MasaakiNishino - NorihitoYasuda + NorihitoYasuda MasaakiNagata 1515–1520 D13-1158 @@ -1501,7 +1501,7 @@ A Hierarchical Entity-Based Approach to Structuralize User Generated Content in Social Media: A Case of <fixed-case>Y</fixed-case>ahoo! <fixed-case>A</fixed-case>nswers BaichuanLi JingLiu - Chin-YewLin + Chin-YewLin IrwinKing Michael R.Lyu 1521–1532 @@ -1524,7 +1524,7 @@ TomKwiatkowski EunsolChoi YoavArtzi - LukeZettlemoyer + LukeZettlemoyer 1545–1556 D13-1161 kwiatkowski-etal-2013-scaling @@ -1532,17 +1532,17 @@ Classifying Message Board Posts with an Extracted Lexicon of Patient Attributes RuihongHuang - EllenRiloff + EllenRiloff 1557–1562 D13-1162 huang-riloff-2013-classifying Lexical Chain Based Cohesion Models for Document-Level Statistical Machine Translation - DeyiXiong + DeyiXiong YangDing MinZhang - Chew LimTan + Chew LimTan 1563–1573 D13-1163 xiong-etal-2013-lexical @@ -1550,7 +1550,7 @@ A Convex Alternative to <fixed-case>IBM</fixed-case> Model 2 AndreiSimion - MichaelCollins + MichaelCollins CliffStein 1574–1583 D13-1164 @@ -1558,7 +1558,7 @@ Pair Language Models for Deriving Alternative Pronunciations and Spellings from Pronunciation Dictionaries - RussellBeckley + RussellBeckley BrianRoark 1584–1589 D13-1165 @@ -1567,7 +1567,7 @@ Prior Disambiguation of Word Tensors for Constructing Sentence Vectors DimitriKartsaklis - MehrnooshSadrzadeh + MehrnooshSadrzadeh 1590–1601 D13-1166 kartsaklis-sadrzadeh-2013-prior @@ -1575,8 +1575,8 @@ Multi-Relational Latent Semantic Analysis Kai-WeiChang - Wen-tauYih - ChristopherMeek + Wen-tauYih + ChristopherMeek 1602–1612 D13-1167 D13-1167.Presentation.pptx @@ -1585,7 +1585,7 @@ A Study on Bootstrapping Bilingual Vector Spaces from Non-Parallel Data (and Nothing Else) IvanVulić - Marie-FrancineMoens + Marie-FrancineMoens 1613–1624 D13-1168 vulic-moens-2013-study @@ -1593,7 +1593,7 @@ Deriving Adjectival Scales from Continuous Space Word Representations Joo-KyungKim - Marie-Catherinede Marneffe + Marie-Catherinede Marneffe 1625–1630 D13-1169 kim-de-marneffe-2013-deriving @@ -1604,8 +1604,8 @@ AlexPerelygin JeanWu JasonChuang - Christopher D.Manning - AndrewNg + Christopher D.Manning + AndrewNg ChristopherPotts 1631–1642 D13-1170 @@ -1647,15 +1647,15 @@ Translating into Morphologically Rich Languages with Synthetic Phrases VictorChahuneau EvaSchlinger - Noah A.Smith - ChrisDyer + Noah A.Smith + ChrisDyer 1677–1687 D13-1174 chahuneau-etal-2013-translating Boosting Cross-Language Retrieval by Learning Bilingual Phrase Associations from Relevance Rankings - ArtemSokokov + ArtemSokokov LauraJehl FelixHieber StefanRiezler @@ -1666,7 +1666,7 @@ Recurrent Continuous Translation Models NalKalchbrenner - PhilBlunsom + PhilBlunsom 1700–1709 D13-1176 kalchbrenner-blunsom-2013-recurrent @@ -1679,7 +1679,7 @@ PeterClark JustinLewis BrittanyHarding - Christopher D.Manning + Christopher D.Manning 1710–1720 D13-1177 D13-1177.Attachment.zip @@ -1688,7 +1688,7 @@ Generating Coherent Event Schemas at Scale NiranjanBalasubramanian - StephenSoderland + StephenSoderland Mausam OrenEtzioni 1721–1731 @@ -1697,8 +1697,8 @@ Orthonormal Explicit Topic Analysis for Cross-Lingual Document Matching - John PhilipMcCrae - PhilippCimiano + John PhilipMcCrae + PhilippCimiano RomanKlinger 1732–1740 D13-1179 @@ -1714,7 +1714,7 @@ Success with Style: Using Writing Style to Predict the Success of Novels - VikasGanjigunte Ashok + VikasGanjigunte Ashok SongFeng YejinChoi 1753–1764 @@ -1723,7 +1723,7 @@ A Generative Joint, Additive, Sequential Model of Topics and Speech Acts in Patient-Doctor Communication - Byron C.Wallace + Byron C.Wallace Thomas A.Trikalinos M. BartonLaws Ira B.Wilson @@ -1735,7 +1735,7 @@ Harvesting Parallel News Streams to Generate Paraphrases of Event Relations CongleZhang - Daniel S.Weld + Daniel S.Weld 1776–1786 D13-1183 zhang-weld-2013-harvesting @@ -1750,7 +1750,7 @@ Event Schema Induction with a Probabilistic Entity-Driven Model - NathanaelChambers + NathanaelChambers 1797–1807 D13-1185 chambers-2013-event @@ -1793,7 +1793,7 @@ Detecting Promotional Content in <fixed-case>W</fixed-case>ikipedia ShrutiBhosale HeathVinicombe - RaymondMooney + RaymondMooney 1851–1857 D13-1190 bhosale-etal-2013-detecting @@ -1804,7 +1804,7 @@ MinghuiQiu YanchuanSim JingJiang - Noah A.Smith + Noah A.Smith 1858–1868 D13-1191 D13-1191.Attachment.pdf @@ -1848,8 +1848,8 @@ Fish Transporters and Miracle Homes: How Compositional Distributional Semantics can Help <fixed-case>NP</fixed-case> Parsing AngelikiLazaridou - Eva MariaVecchi - MarcoBaroni + Eva MariaVecchi + MarcoBaroni 1908–1913 D13-1196 D13-1196.Attachment.zip @@ -1859,7 +1859,7 @@ Learning Distributions over Logical Forms for Referring Expression Generation NicholasFitzGerald YoavArtzi - LukeZettlemoyer + LukeZettlemoyer 1914–1925 D13-1197 fitzgerald-etal-2013-learning @@ -1877,7 +1877,7 @@ Identifying Manipulated Offerings on Review Portals JiweiLi MyleOtt - ClaireCardie + ClaireCardie 1933–1942 D13-1199 li-etal-2013-identifying @@ -1885,7 +1885,7 @@ Well-Argued Recommendation: Adaptive Models Based on Words in Recommender Systems JulienGaillard - MarcEl-Beze + MarcEl-Beze EitanAltman EmmanuelEthis 1943–1947 @@ -1905,11 +1905,11 @@ Of Words, Eyes and Brains: Correlating Image-Based Distributional Semantic Models with Neural Representations of Concepts - Andrew J.Anderson + Andrew J.Anderson EliaBruni UlisseBordignon - MassimoPoesio - MarcoBaroni + MassimoPoesio + MarcoBaroni 1960–1970 D13-1202 anderson-etal-2013-words @@ -1924,9 +1924,9 @@ Breaking Out of Local Optima with Count Transforms and Model Recombination: A Study in Grammar Induction - Valentin I.Spitkovsky - HiyanAlshawi - DanielJurafsky + Valentin I.Spitkovsky + HiyanAlshawi + DanielJurafsky 1983–1995 D13-1204 spitkovsky-etal-2013-breaking diff --git a/data/xml/D14.xml b/data/xml/D14.xml index 5301063142..e6e3972f7a 100644 --- a/data/xml/D14.xml +++ b/data/xml/D14.xml @@ -6,7 +6,7 @@ D14-1 AlessandroMoschitti BoPang - WalterDaelemans + WalterDaelemans 10.3115/v1/D14-1 Association for Computational Linguistics
Doha, Qatar
@@ -20,7 +20,7 @@ Invited Talk: <fixed-case>IBM</fixed-case> Cognitive Computing - An <fixed-case>NLP</fixed-case> Renaissance! - SalimRoukos + SalimRoukos 1 D14-1001 10.3115/v1/D14-1001 @@ -43,7 +43,7 @@ MartinSundermeyer TamerAlkhouli JoernWuebker - HermannNey + HermannNey 14–25 D14-1003 10.3115/v1/D14-1003 @@ -51,7 +51,7 @@ A Neural Network Approach to Selectional Preference Acquisition - TimVan de Cruys + TimVan de Cruys 26–35 D14-1004 10.3115/v1/D14-1004 @@ -92,7 +92,7 @@ A Constituent-Based Approach to Argument Labeling with Joint Inference in Discourse Parsing FangKong Hwee TouNg - GuodongZhou + GuodongZhou 68–77 D14-1008 10.3115/v1/D14-1008 @@ -140,7 +140,7 @@ Combining Punctuation and Disfluency Prediction: An Empirical Study XuancongWang - Khe ChaiSim + Khe ChaiSim Hwee TouNg 121–130 D14-1013 @@ -150,7 +150,7 @@ Submodularity for Data Selection in Machine Translation KatrinKirchhoff - JeffBilmes + JeffBilmes 131–141 D14-1014 10.3115/v1/D14-1014 @@ -187,7 +187,7 @@ HidetakaKamigaito TaroWatanabe HiroyaTakamura - ManabuOkumura + ManabuOkumura 153–158 D14-1017 10.3115/v1/D14-1017 @@ -206,7 +206,7 @@ Syntax-Augmented Machine Translation using Syntax-Label Clustering HideyaMino TaroWatanabe - EiichiroSumita + EiichiroSumita 165–171 D14-1019 10.3115/v1/D14-1019 @@ -215,7 +215,7 @@ Testing for Significance of Increased Correlation with Human Judgment YvetteGraham - TimothyBaldwin + TimothyBaldwin 172–176 D14-1020 10.3115/v1/D14-1020 @@ -237,7 +237,7 @@ Learning Hierarchical Translation Spans JingyiZhang MasaoUtiyama - EiichiroSumita + EiichiroSumita HaiZhao 183–188 D14-1022 @@ -248,9 +248,9 @@ Neural Network Based Bilingual Language Model Growing for Statistical Machine Translation RuiWang HaiZhao - Bao-LiangLu + Bao-LiangLu MasaoUtiyama - EiichiroSumita + EiichiroSumita 189–195 D14-1023 10.3115/v1/D14-1023 @@ -268,7 +268,7 @@ Fitting Sentence Level Translation Evaluation with Many Dense Features MilošStanojević - KhalilSima’an + KhalilSima’an 202–206 D14-1025 10.3115/v1/D14-1025 @@ -287,11 +287,11 @@ Learning to Differentiate Better from Worse Translations - FranciscoGuzmán - ShafiqJoty - LluísMàrquez + FranciscoGuzmán + ShafiqJoty + LluísMàrquez AlessandroMoschitti - PreslavNakov + PreslavNakov MassimoNicosia 214–220 D14-1027 @@ -309,7 +309,7 @@ Reordering Model for Forest-to-String Machine Translation - MartinČmejrek + MartinČmejrek 227–232 D14-1029 10.3115/v1/D14-1029 @@ -320,7 +320,7 @@ LeilaWehbe AshishVaswani KevinKnight - TomMitchell + TomMitchell 233–243 D14-1030 10.3115/v1/D14-1030 @@ -388,8 +388,8 @@ A Graph-based Approach for Contextual Text Normalization - CagilSönmez - ArzucanÖzgür + CagilSönmez + ArzucanÖzgür 313–324 D14-1037 10.3115/v1/D14-1037 @@ -408,7 +408,7 @@ Hierarchical Discriminative Classification for Text-Based Geolocation - BenjaminWing + BenjaminWing JasonBaldridge 336–348 D14-1039 @@ -418,7 +418,7 @@ Probabilistic Models of Cross-Lingual Semantic Similarity in Context Based on Latent Cross-Lingual Concepts Induced from Comparable Data IvanVulić - Marie-FrancineMoens + Marie-FrancineMoens 349–362 D14-1040 10.3115/v1/D14-1040 @@ -428,7 +428,7 @@ Multi-Predicate Semantic Role Labeling HaitongYang - ChengqingZong + ChengqingZong 363–373 D14-1041 10.3115/v1/D14-1041 @@ -457,9 +457,9 @@ Incorporating Vector Space Similarity in Random Walk Inference over Knowledge Bases MattGardner - ParthaTalukdar + ParthaTalukdar JayantKrishnamurthy - TomMitchell + TomMitchell 397–406 D14-1044 10.3115/v1/D14-1044 @@ -485,7 +485,7 @@ Nothing like Good Old Frequency: Studying Context Filters for Distributional Thesauri - MuntsaPadró + MuntsaPadró MarcoIdiart AlineVillavicencio CarlosRamisch @@ -509,7 +509,7 @@ A Shortest-path Method for Arc-factored Semantic Role Labeling XavierLluís XavierCarreras - LluísMàrquez + LluísMàrquez 430–435 D14-1049 10.3115/v1/D14-1049 @@ -519,9 +519,9 @@ Semantic Kernels for Semantic Parsing ImanSaleh AlessandroMoschitti - PreslavNakov - LluísMàrquez - ShafiqJoty + PreslavNakov + LluísMàrquez + ShafiqJoty 436–442 D14-1050 10.3115/v1/D14-1050 @@ -532,9 +532,9 @@ MohamedMorchid MohamedBouallegue RichardDufour - GeorgesLinarès + GeorgesLinarès DrissMatrouf - Renatode Mori + Renatode Mori 443–454 D14-1051 10.3115/v1/D14-1051 @@ -543,7 +543,7 @@ Explaining the Stars: Weighted Multiple-Instance Learning for Aspect-Based Sentiment Analysis NikolaosPappas - AndreiPopescu-Belis + AndreiPopescu-Belis 455–466 D14-1052 10.3115/v1/D14-1052 @@ -552,7 +552,7 @@ Sentiment Analysis on the People’s Daily JiweiLi - EduardHovy + EduardHovy 467–476 D14-1053 10.3115/v1/D14-1053 @@ -575,7 +575,7 @@ Positive Unlabeled Learning for Deceptive Reviews Detection YafengRen - DonghongJi + DonghongJi HongbinZhang 488–498 D14-1055 @@ -614,7 +614,7 @@ <fixed-case>N</fixed-case>atural<fixed-case>LI</fixed-case>: Natural Logic Inference for Common Sense Reasoning GaborAngeli - Christopher D.Manning + Christopher D.Manning 534–545 D14-1059 10.3115/v1/D14-1059 @@ -623,7 +623,7 @@ Modeling Term Translation for Document-informed Machine Translation FandongMeng - DeyiXiong + DeyiXiong WenbinJiang QunLiu 546–556 @@ -644,7 +644,7 @@ Latent Domain Phrase-based Models for Adaptation HoangCuong - KhalilSima’an + KhalilSima’an 566–576 D14-1062 10.3115/v1/D14-1062 @@ -652,7 +652,7 @@ Translation Rules with Right-Hand Side Lattices - FabienCromières + FabienCromières SadaoKurohashi 577–588 D14-1063 @@ -661,7 +661,7 @@ Learning to Translate: A Query-Specific Combination Approach for Cross-Lingual Information Retrieval - FerhanTure + FerhanTure ElizabethBoschee 589–599 D14-1064 @@ -681,8 +681,8 @@ Lexical Substitution for the Medical Domain MartinRiedl - MichaelGlass - AlfioGliozzo + MichaelGlass + AlfioGliozzo 610–614 D14-1066 10.3115/v1/D14-1066 @@ -710,7 +710,7 @@ Non-linear Mapping for Improved Identification of 1300+ Languages - RalfBrown + RalfBrown 627–632 D14-1069 10.3115/v1/D14-1069 @@ -722,7 +722,7 @@ JordanBoyd-Graber LeonardoClaudino RichardSocher - HalDaumé III + HalDaumé III 633–644 D14-1070 10.3115/v1/D14-1070 @@ -733,7 +733,7 @@ Min-ChulYang NanDuan MingZhou - Hae-ChangRim + Hae-ChangRim 645–650 D14-1071 10.3115/v1/D14-1071 @@ -772,7 +772,7 @@ Fear the <fixed-case>REAPER</fixed-case>: A System for Automatic Multi-Document Summarization with Reinforcement Learning CodyRioux - Sadid A.Hasan + Sadid A.Hasan YlliasChali 681–690 D14-1075 @@ -794,7 +794,7 @@ Analyzing Stemming Approaches for <fixed-case>T</fixed-case>urkish Multi-Document Summarization Muhammed YavuzNuzumlalı - ArzucanÖzgür + ArzucanÖzgür 702–706 D14-1077 10.3115/v1/D14-1077 @@ -812,7 +812,7 @@ Evaluating Neural Word Representations in Tensor-Based Compositional Settings DmitrijsMilajevs DimitriKartsaklis - MehrnooshSadrzadeh + MehrnooshSadrzadeh MatthewPurver 708–719 D14-1079 @@ -821,8 +821,8 @@ Opinion Mining with Deep Recurrent Neural Networks - Ozanİrsoy - ClaireCardie + Ozanİrsoy + ClaireCardie 720–728 D14-1080 10.3115/v1/D14-1080 @@ -840,7 +840,7 @@ A Fast and Accurate Dependency Parser using Neural Networks DanqiChen - ChristopherManning + ChristopherManning 740–750 D14-1082 10.3115/v1/D14-1082 @@ -866,7 +866,7 @@ Unsupervised Sentence Enhancement for Automatic Summarization - Jackie Chi KitCheung + Jackie Chi KitCheung GeraldPenn 775–786 D14-1085 @@ -879,7 +879,7 @@ SaharKazemzadeh VicenteOrdonez MarkMatten - TamaraBerg + TamaraBerg 787–798 D14-1086 10.3115/v1/D14-1086 @@ -889,7 +889,7 @@ Unsupervised Template Mining for Semantic Category Understanding LeiShi ShumingShi - Chin-YewLin + Chin-YewLin Yi-DongShen YongRui 799–809 @@ -899,9 +899,9 @@ Taxonomy Construction Using Syntactic Contextual Evidence - Luu Anh Tuan - Jung-jaeKim - See KiongNg + Anh TuanLuu + Jung-jaeKim + See KiongNg 810–819 D14-1088 10.3115/v1/D14-1088 @@ -911,7 +911,7 @@ Analysing recall loss in named entity slot filling GlenPink JoelNothman - James R.Curran + James R.Curran 820–830 D14-1089 10.3115/v1/D14-1089 @@ -930,7 +930,7 @@ Syllable weight encodes mostly the same information for <fixed-case>E</fixed-case>nglish word segmentation as dictionary stress - John KPate + John KPate MarkJohnson 844–853 D14-1091 @@ -940,7 +940,7 @@ A Joint Model for Unsupervised <fixed-case>C</fixed-case>hinese Word Segmentation MiaohongChen - BaobaoChang + BaobaoChang WenzhePei 854–863 D14-1092 @@ -973,7 +973,7 @@ Morphological Segmentation for Keyword Spotting KarthikNarasimhan DamianosKarakos - RichardSchwartz + RichardSchwartz StavrosTsakalidis ReginaBarzilay 880–885 @@ -983,9 +983,9 @@ What Can We Get From 1000 Tokens? A Case Study of Multilingual <fixed-case>POS</fixed-case> Tagging For Resource-Poor Languages - LongDuong - TrevorCohn - KarinVerspoor + LongDuong + TrevorCohn + KarinVerspoor StevenBird PaulCook 886–897 @@ -1026,8 +1026,8 @@ Ambiguity Resolution for Vt-N Structures in <fixed-case>C</fixed-case>hinese Yu-MingHsieh - Jason S.Chang - Keh-JiannChen + Jason S.Chang + Keh-JiannChen 928–937 D14-1100 10.3115/v1/D14-1100 @@ -1053,11 +1053,11 @@ Dependency parsing with latent refinements of part-of-speech tags - ThomasMueller - RichardFarkas + ThomasMueller + RichardFarkas AlexJudea HelmutSchmid - HinrichSchuetze + HinrichSchuetze 963–967 D14-1103 10.3115/v1/D14-1103 @@ -1065,9 +1065,9 @@ Importance weighting and unsupervised domain adaptation of <fixed-case>POS</fixed-case> taggers: a negative result - BarbaraPlank - AndersJohannsen - AndersSøgaard + BarbaraPlank + AndersJohannsen + AndersSøgaard 968–973 D14-1104 10.3115/v1/D14-1104 @@ -1098,7 +1098,7 @@ <fixed-case>A</fixed-case>* <fixed-case>CCG</fixed-case> Parsing with a Supertag-factored Model MikeLewis - MarkSteedman + MarkSteedman 990–1000 D14-1107 10.3115/v1/D14-1107 @@ -1110,8 +1110,8 @@ NathanSchneider SwabhaSwayamdipta ArchnaBhatia - ChrisDyer - Noah A.Smith + ChrisDyer + Noah A.Smith 1001–1012 D14-1108 10.3115/v1/D14-1108 @@ -1142,7 +1142,7 @@ Reducing Dimensions of Tensors in Type-Driven Distributional Semantics TamaraPolajnar - LuanaFǎgǎrǎşan + LuanaFǎgǎrǎşan StephenClark 1036–1046 D14-1111 @@ -1151,8 +1151,8 @@ An Etymological Approach to Cross-Language Orthographic Similarity. Application on <fixed-case>R</fixed-case>omanian - Alina MariaCiobanu - Liviu P.Dinu + Alina MariaCiobanu + Liviu P.Dinu 1047–1058 D14-1112 10.3115/v1/D14-1112 @@ -1180,7 +1180,7 @@ Queries as a Source of Lexicalized Commonsense Knowledge - MariusPaşca + MariusPaşca 1081–1091 D14-1115 10.3115/v1/D14-1115 @@ -1247,11 +1247,11 @@ MaartenSap GregoryPark JohannesEichstaedt - MargaretKern + MargaretKern DavidStillwell MichalKosinski - LyleUngar - Hansen AndrewSchwartz + LyleUngar + Hansen AndrewSchwartz 1146–1151 D14-1121 10.3115/v1/D14-1121 @@ -1263,7 +1263,7 @@ William YangWang LingpengKong KathrynMazaitis - William W.Cohen + William W.Cohen 1152–1158 D14-1122 10.3115/v1/D14-1122 @@ -1278,7 +1278,7 @@ ZhongyuWei BinyangLi DongqingYang - Kam-FaiWong + Kam-FaiWong 1159–1168 D14-1123 10.3115/v1/D14-1123 @@ -1288,7 +1288,7 @@ Detecting Disagreement in Conversations using Pseudo-Monologic Rhetorical Structure KelseyAllen GiuseppeCarenini - RaymondNg + RaymondNg 1169–1180 D14-1124 10.3115/v1/D14-1124 @@ -1297,7 +1297,7 @@ +/-<fixed-case>E</fixed-case>ffect<fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et: Sense-level Lexicon Acquisition for Opinion Inference YoonjungChoi - JanyceWiebe + JanyceWiebe 1181–1191 D14-1125 10.3115/v1/D14-1125 @@ -1315,7 +1315,7 @@ Learning Emotion Indicators from Tweets: Hashtags, Hashtag Patterns, and Phrases AshequlQadir - EllenRiloff + EllenRiloff 1203–1209 D14-1127 10.3115/v1/D14-1127 @@ -1324,7 +1324,7 @@ Fine-Grained Contextual Predictions for Hard Sentiment Words SebastianEbert - HinrichSchütze + HinrichSchütze 1210–1215 D14-1128 10.3115/v1/D14-1128 @@ -1338,7 +1338,7 @@ JunLu JunLang HengJi - JianminYao + JianminYao 1216–1224 D14-1129 10.3115/v1/D14-1129 @@ -1347,11 +1347,11 @@ Human Effort and Machine Learnability in Computer Aided Translation SpenceGreen - Sida I.Wang + Sida I.Wang JasonChuang JeffreyHeer SebastianSchuster - Christopher D.Manning + Christopher D.Manning 1225–1236 D14-1130 10.3115/v1/D14-1130 @@ -1401,7 +1401,7 @@ Morpho-syntactic Lexical Generalization for <fixed-case>CCG</fixed-case> Semantic Parsing AdrienneWang TomKwiatkowski - LukeZettlemoyer + LukeZettlemoyer 1284–1295 D14-1135 10.3115/v1/D14-1135 @@ -1453,7 +1453,7 @@ HeHe JordanBoyd-Graber JohnMorgan - HalDaumé III + HalDaumé III 1342–1352 D14-1140 10.3115/v1/D14-1140 @@ -1493,7 +1493,7 @@ Language Transfer Hypotheses with Linear <fixed-case>SVM</fixed-case> Weights - ShervinMalmasi + ShervinMalmasi MarkDras 1385–1390 D14-1144 @@ -1503,7 +1503,7 @@ Predicting Dialect Variation in Immigrant Contexts Using Light Verb Constructions A. SezaDoğruöz - PreslavNakov + PreslavNakov 1391–1395 D14-1145 10.3115/v1/D14-1145 @@ -1514,7 +1514,7 @@ A-YeongKim Hyun-JeSong Seong-BaePark - Sang-JoLee + Sang-JoLee 1396–1404 D14-1146 10.3115/v1/D14-1146 @@ -1553,9 +1553,9 @@ Citation-Enhanced Keyphrase Extraction from Research Papers: A Supervised Approach CorneliaCaragea - Florin AdrianBulgarov + Florin AdrianBulgarov AndreeaGodea - SujathaDas Gollapalli + SujathaDas Gollapalli 1435–1446 D14-1150 10.3115/v1/D14-1150 @@ -1564,7 +1564,7 @@ Using Mined Coreference Chains as a Resource for a Semantic Task HeikeAdel - HinrichSchütze + HinrichSchütze 1447–1452 D14-1151 10.3115/v1/D14-1151 @@ -1573,7 +1573,7 @@ Financial Keyword Expansion via Continuous Word Vector Representations - Ming-FengTsai + Ming-FengTsai Chuan-JuWang 1453–1458 D14-1152 @@ -1618,8 +1618,8 @@ Shih-HungLiu BerlinChen Ea-EeJan - Hsin-MinWang - Wen-LianHsu + Hsin-MinWang + Wen-LianHsu Hsin-HsiChen 1474–1480 D14-1156 @@ -1630,7 +1630,7 @@ Staying on Topic: An Indicator of Power in Political Debates VinodkumarPrabhakaran AshimaArora - OwenRambow + OwenRambow 1481–1486 D14-1157 10.3115/v1/D14-1157 @@ -1638,10 +1638,10 @@ Language Modeling with Power Low Rank Ensembles - Ankur P.Parikh + Ankur P.Parikh AvneeshSaluja - ChrisDyer - EricXing + ChrisDyer + EricXing 1487–1498 D14-1158 10.3115/v1/D14-1158 @@ -1657,7 +1657,7 @@ BrittanyHarding BradHuang PeterClark - Christopher D.Manning + Christopher D.Manning 1499–1510 D14-1159 10.3115/v1/D14-1159 @@ -1669,7 +1669,7 @@ <fixed-case>S</fixed-case>ensicon: An Automatically Constructed Sensorial Lexicon Serra SinemTekiroğlu - GözdeÖzbal + GözdeÖzbal CarloStrapparava 1511–1521 D14-1160 @@ -1680,8 +1680,8 @@ Word Semantic Representations using <fixed-case>B</fixed-case>ayesian Probabilistic Tensor Factorization JingweiZhang JeremySalwen - MichaelGlass - AlfioGliozzo + MichaelGlass + AlfioGliozzo 1522–1531 D14-1161 10.3115/v1/D14-1161 @@ -1691,7 +1691,7 @@ <fixed-case>G</fixed-case>lo<fixed-case>V</fixed-case>e: Global Vectors for Word Representation JeffreyPennington RichardSocher - ChristopherManning + ChristopherManning 1532–1543 D14-1162 10.3115/v1/D14-1162 @@ -1713,7 +1713,7 @@ GaborAngeli JulieTibshirani JeanWu - Christopher D.Manning + Christopher D.Manning 1556–1567 D14-1164 10.3115/v1/D14-1164 @@ -1723,9 +1723,9 @@ Typed Tensor Decomposition of Knowledge Bases for Relation Extraction Kai-WeiChang - Wen-tauYih + Wen-tauYih BishanYang - ChristopherMeek + ChristopherMeek 1568–1579 D14-1165 10.3115/v1/D14-1165 @@ -1734,7 +1734,7 @@ A convex relaxation for weakly supervised relation extraction - ÉdouardGrave + ÉdouardGrave 1580–1590 D14-1166 10.3115/v1/D14-1166 @@ -1757,7 +1757,7 @@ ShimaGerani YasharMehdad GiuseppeCarenini - Raymond T.Ng + Raymond T.Ng BitaNejat 1602–1613 D14-1168 @@ -1797,7 +1797,7 @@ Assessing the Impact of Translation Errors on Machine Translation Quality with Mixed-effects Models MarcelloFederico - MatteoNegri + MatteoNegri LuisaBentivogli MarcoTurchi 1643–1653 @@ -1810,7 +1810,7 @@ XiaolinWang MasaoUtiyama AndrewFinch - EiichiroSumita + EiichiroSumita 1654–1664 D14-1173 10.3115/v1/D14-1173 @@ -1823,7 +1823,7 @@ HuaWu ConghuiZhu HaifengWang - TiejunZhao + TiejunZhao 1665–1675 D14-1174 10.3115/v1/D14-1174 @@ -1831,7 +1831,7 @@ Word Translation Prediction for Morphologically Rich Languages with Bilingual Neural Networks - Ke M.Tran + Ke M.Tran AriannaBisazza ChristofMonz 1676–1688 @@ -1852,7 +1852,7 @@ Combining String and Context Similarity for Bilingual Term Alignment from Comparable Corpora GeorgiosKontonatsios IoannisKorkontzelos - Jun’ichiTsujii + Jun’ichiTsujii SophiaAnaniadou 1701–1712 D14-1177 @@ -1861,7 +1861,7 @@ Random Manhattan Integer Indexing: Incremental <fixed-case>L</fixed-case>1 Normed Vector Space Construction - BehrangQ. Zadeh + BehrangQ. Zadeh SiegfriedHandschuh 1713–1723 D14-1178 @@ -1924,7 +1924,7 @@ Improved Decipherment of Homophonic Ciphers MalteNuhn JulianSchamper - HermannNey + HermannNey 1764–1768 D14-1184 10.3115/v1/D14-1184 @@ -1975,7 +1975,7 @@ Detecting Non-compositional <fixed-case>MWE</fixed-case> Components using <fixed-case>W</fixed-case>iktionary BaharSalehi PaulCook - TimothyBaldwin + TimothyBaldwin 1792–1797 D14-1189 10.3115/v1/D14-1189 @@ -1983,8 +1983,8 @@ Joint Emotion Analysis via Multi-task <fixed-case>G</fixed-case>aussian Processes - DanielBeck - TrevorCohn + DanielBeck + TrevorCohn LuciaSpecia 1798–1803 D14-1190 @@ -2034,7 +2034,7 @@ Joint Decoding of Tree Transduction Models for Sentence Compression - Jin-geYao + Jin-geYao XiaojunWan JianguoXiao 1828–1833 @@ -2064,7 +2064,7 @@ Constructing Information Networks Using One Single Model - QiLi + QiLi HengJi YuHong SujianLi @@ -2075,7 +2075,7 @@ Event Role Extraction using Domain-Relevant Word Representations - EmanuelaBoroş + EmanuelaBoroş RomaricBesançon OlivierFerret BrigitteGrau @@ -2116,8 +2116,8 @@ Type-Aware Distantly Supervised Relation Extraction with Linked Arguments MitchellKoch JohnGilmer - StephenSoderland - Daniel S.Weld + StephenSoderland + Daniel S.Weld 1891–1901 D14-1203 10.3115/v1/D14-1203 @@ -2155,9 +2155,9 @@ <fixed-case>CTP</fixed-case>s: Contextual Temporal Profiles for Time Scoping Facts using State Change Detection - Derry TantiWijaya - NdapandulaNakashole - Tom M.Mitchell + Derry TantiWijaya + NdapandulaNakashole + Tom M.Mitchell 1930–1936 D14-1207 10.3115/v1/D14-1207 @@ -2166,7 +2166,7 @@ Noisy Or-based model for Relation Extraction using Distant Supervision AjayNagesh - GholamrezaHaffari + GholamrezaHaffari GaneshRamakrishnan 1937–1941 D14-1208 @@ -2185,8 +2185,8 @@ Latent-Variable Synchronous <fixed-case>CFG</fixed-case>s for Hierarchical Translation AvneeshSaluja - ChrisDyer - Shay B.Cohen + ChrisDyer + Shay B.Cohen 1953–1964 D14-1210 10.3115/v1/D14-1210 @@ -2196,7 +2196,7 @@ Gender and Power: How Gender and Gender Environment Affect Manifestations of Power VinodkumarPrabhakaran Emily E.Reid - OwenRambow + OwenRambow 1965–1976 D14-1211 10.3115/v1/D14-1211 @@ -2215,8 +2215,8 @@ Self-disclosure topic model for classifying and analyzing <fixed-case>T</fixed-case>witter conversations JinYeongBak - Chin-YewLin - AliceOh + Chin-YewLin + AliceOh 1986–1996 D14-1213 10.3115/v1/D14-1213 @@ -2227,8 +2227,8 @@ Major Life Event Extraction from <fixed-case>T</fixed-case>witter based on Congratulations/Condolences Speech Acts JiweiLi AlanRitter - ClaireCardie - EduardHovy + ClaireCardie + EduardHovy 1997–2007 D14-1214 10.3115/v1/D14-1214 @@ -2256,9 +2256,9 @@ Learning Spatial Knowledge for Text to 3<fixed-case>D</fixed-case> Scene Generation - AngelChang + AngelChang ManolisSavva - Christopher D.Manning + Christopher D.Manning 2028–2038 D14-1217 10.3115/v1/D14-1217 @@ -2267,7 +2267,7 @@ A Model of Coherence Based on Distributed Sentence Representation JiweiLi - EduardHovy + EduardHovy 2039–2048 D14-1218 10.3115/v1/D14-1218 @@ -2275,7 +2275,7 @@ Discriminative Reranking of Discourse Parses Using Tree Kernels - ShafiqJoty + ShafiqJoty AlessandroMoschitti 2049–2060 D14-1219 @@ -2286,7 +2286,7 @@ Recursive Deep Models for Discourse Parsing JiweiLi RumengLi - EduardHovy + EduardHovy 2061–2069 D14-1220 10.3115/v1/D14-1220 @@ -2315,8 +2315,8 @@ Resolving Referring Expressions in Conversational Dialogs for Natural User Interfaces AsliCelikyilmaz ZhalehFeizollahi - DilekHakkani-Tur - RuhiSarikaya + DilekHakkani-Tur + RuhiSarikaya 2094–2104 D14-1223 10.3115/v1/D14-1223 @@ -2328,7 +2328,7 @@ WenheFeng JingSun FangKong - GuodongZhou + GuodongZhou 2105–2114 D14-1224 10.3115/v1/D14-1224 @@ -2339,8 +2339,8 @@ Prune-and-Score: Learning for Greedy Coreference Resolution ChaoMa Janardhan RaoDoppa - J. WalkerOrr - PrashanthMannem + J. WalkerOrr + PrashanthMannem XiaoliFern TomDietterich PrasadTadepalli @@ -2374,7 +2374,7 @@ Sentiment Analysis of Social Media Texts - Saif M.Mohammad + Saif M.Mohammad XiaodanZhu Automatically detecting sentiment of product reviews, blogs, tweets, and SMS messages has attracted extensive interest from both the academia and industry. It has a number of applications, including: tracking sentiment towards products, movies, politicians, etc.; improving customer relation models; detecting happiness and well-being; and improving automatic dialogue systems. In this tutorial, we will describe how you can create a state-of-the-art sentiment analysis system, with a focus on social media posts. @@ -2401,7 +2401,7 @@ The content of the tutorial will be divided into four blocks of 45 minutes each, Semantic Parsing with <fixed-case>C</fixed-case>ombinatory <fixed-case>C</fixed-case>ategorial <fixed-case>G</fixed-case>rammars YoavArtzi NicholasFitzgerald - LukeZettlemoyer + LukeZettlemoyer Semantic parsers map natural language sentences to formal representations of their underlying meaning. Building accurate semantic parsers without prohibitive engineering costs is a long-standing, open research problem. The tutorial will describe general principles for building semantic parsers. The presentation will be divided into two main parts: learning and modeling. In the learning part, we will describe a unified approach for learning Combinatory Categorial Grammar (CCG) semantic parsers, that induces both a CCG lexicon and the parameters of a parsing model. The approach learns from data with labeled meaning representations, as well as from more easily gathered weak supervision. It also enables grounded learning where the semantic parser is used in an interactive environment, for example to read and execute instructions. The modeling section will include best practices for grammar design and choice of semantic representation. We will motivate our use of lambda calculus as a language for building and representing meaning with examples from several domains. @@ -2411,7 +2411,7 @@ The ideas we will discuss are widely applicable. The semantic modeling approach, Linear Programming Decoders in Natural Language Processing: From Integer Programming to Message Passing and Dual Decomposition - André F. T.Martins + André F. T.Martins This tutorial will cover the theory and practice of linear programming decoders. This class of decoders encompasses a variety of techniques that have enjoyed great success in devising structured models for natural language processing (NLP). Along the tutorial, we provide a unified view of different algorithms and modeling techniques, including belief propagation, dual decomposition, integer linear programming, Markov logic, and constrained conditional models. Various applications in NLP will serve as a motivation. There is a long string of work using integer linear programming (ILP) formulations in NLP, for example in semantic role labeling, machine translation, summarization, dependency parsing, coreference resolution, and opinion mining, to name just a few. At the heart of these approaches is the ability to encode logic and budget constraints (common in NLP and information retrieval) as linear inequalities. Thanks to general purpose solvers (such as Gurobi, CPLEX, or GLPK), the practitioner can abstract away from the decoding algorithm and focus on developing a powerful model. A disadvantage, however, is that general solvers do not scale well to large problem instances, since they fail to exploit the structure of the problem. @@ -2451,7 +2451,7 @@ This second part will focus mostly on the construction of embeddings for multi-r Natural Language Processing of <fixed-case>A</fixed-case>rabic and its Dialects - MonaDiab + MonaDiab NizarHabash This tutorial introduces the different challenges and current solutions to the automatic processing of Arabic and its dialects. The tutorial has two parts: First, we present a discussion of generic issues relevant to Arabic NLP and detail dialectal linguistic issues and the challenges they pose for NLP. In the second part, we review the state-of-the-art in Arabic processing covering several enabling technologies and applications, e.g., dialect identification, morphological processing (analysis, disambiguation, tokenization, POS tagging), parsing, and machine translation. diab-habash-2014-natural diff --git a/data/xml/D15.xml b/data/xml/D15.xml index 2f1951e02e..8c35eb4f71 100644 --- a/data/xml/D15.xml +++ b/data/xml/D15.xml @@ -4,7 +4,7 @@ Proceedings of the 2015 Conference on Empirical Methods in Natural Language Processing D15-1 - LluísMàrquez + LluísMàrquez ChrisCallison-Burch JianSu 10.18653/v1/D15-1 @@ -33,9 +33,9 @@ Distributional vectors encode referential attributes AbhijeetGupta - GemmaBoleda - MarcoBaroni - SebastianPadó + GemmaBoleda + MarcoBaroni + SebastianPadó 12–21 D15-1002 10.18653/v1/D15-1002 @@ -44,8 +44,8 @@ Building a shared world: mapping distributional to model-theoretic semantic spaces - AurélieHerbelot - Eva MariaVecchi + AurélieHerbelot + Eva MariaVecchi 22–32 D15-1003 10.18653/v1/D15-1003 @@ -66,7 +66,7 @@ Reordering Grammar Induction MilošStanojević - KhalilSima’an + KhalilSima’an 44–54 D15-1005 10.18653/v1/D15-1005 @@ -79,7 +79,7 @@ AlvinGrissom II JohnMorgan JordanBoyd-Graber - HalDaumé III + HalDaumé III 55–64 D15-1006 10.18653/v1/D15-1006 @@ -88,7 +88,7 @@ Identifying Political Sentiment between Nation States with Social Media - NathanaelChambers + NathanaelChambers VictorBowen EthanGenco XisenTian @@ -104,7 +104,7 @@ Open Extraction of Fine-Grained Political Statements DavidBamman - Noah A.Smith + Noah A.Smith 76–85 D15-1008 10.18653/v1/D15-1008 @@ -125,8 +125,8 @@ Semantic Annotation for Microblog Topics Using <fixed-case>W</fixed-case>ikipedia Temporal Information - TuanTran - Nam KhanhTran + TuanTran + Nam KhanhTran AsmelashTeka Hadgu RobertJäschke 97–106 @@ -138,7 +138,7 @@ System Combination for Multi-document Summarization KaiHong - MitchellMarcus + MitchellMarcus AniNenkova 107–117 D15-1011 @@ -147,7 +147,7 @@ Phrase-based Compressive Cross-Language Summarization - Jin-geYao + Jin-geYao XiaojunWan JianguoXiao 118–127 @@ -167,7 +167,7 @@ Indicative Tweet Generation: An Extractive Summarization Problem? PriyaSidhaye - Jackie Chi KitCheung + Jackie Chi KitCheung 138–147 D15-1014 10.18653/v1/D15-1014 @@ -196,7 +196,7 @@ Monotone Submodularity in Opinion Summaries JayanthJayanth JayaprakashSundararaj - PushpakBhattacharyya + PushpakBhattacharyya 169–178 D15-1017 10.18653/v1/D15-1017 @@ -205,7 +205,7 @@ Joint Prediction for Entity/Event-Level Sentiment Analysis using Probabilistic Soft Logic Models LingjiaDeng - JanyceWiebe + JanyceWiebe 179–189 D15-1018 10.18653/v1/D15-1018 @@ -214,8 +214,8 @@ Learning to Recognize Affective Polarity in Similes AshequlQadir - EllenRiloff - MarilynWalker + EllenRiloff + MarilynWalker 190–200 D15-1019 10.18653/v1/D15-1019 @@ -226,7 +226,7 @@ TongtaoZhang HongzhiLi HengJi - Shih-FuChang + Shih-FuChang 201–206 D15-1020 10.18653/v1/D15-1020 @@ -236,7 +236,7 @@ A Survey of Current Datasets for Vision and Language Research FrancisFerraro NasrinMostafazadeh - Ting-HaoHuang + Ting-HaoHuang LucyVanderwende JacobDevlin MichelGalley @@ -253,7 +253,7 @@ YingLu EmmanuelDellandrea FrancescMoreno-Noguer - RobertGaizauskas + RobertGaizauskas 214–220 D15-1022 10.18653/v1/D15-1022 @@ -263,7 +263,7 @@ On A Strictly Convex <fixed-case>IBM</fixed-case> Model 1 AndreiSimion - MichaelCollins + MichaelCollins CliffStein 221–226 D15-1023 @@ -302,7 +302,7 @@ A Model of Zero-Shot Learning of Spoken Language Understanding MajidYazdani - JamesHenderson + JamesHenderson 244–249 D15-1027 10.18653/v1/D15-1027 @@ -312,8 +312,8 @@ Modeling Tweet Arrival Times using Log-<fixed-case>G</fixed-case>aussian Cox Processes MichalLukasik P. K.Srijith - TrevorCohn - KalinaBontcheva + TrevorCohn + KalinaBontcheva 250–255 D15-1028 10.18653/v1/D15-1028 @@ -365,7 +365,7 @@ Learning Better Embeddings for Rare Words Using Distributional Representations IrinaSergienya - HinrichSchütze + HinrichSchütze 280–285 D15-1033 10.18653/v1/D15-1033 @@ -373,7 +373,7 @@ Composing Relationships with Translations - AlbertoGarcía-Durán + AlbertoGarcía-Durán AntoineBordes NicolasUsunier 286–290 @@ -417,7 +417,7 @@ Traversing Knowledge Graphs in Vector Space KelvinGuu - JohnMiller + JohnMiller PercyLiang 318–327 D15-1038 @@ -428,7 +428,7 @@ Density-Driven Cross-Lingual Transfer of Dependency Parsers Mohammad SadeghRasooli - MichaelCollins + MichaelCollins 328–338 D15-1039 10.18653/v1/D15-1039 @@ -437,8 +437,8 @@ A Neural Network Model for Low-Resource <fixed-case>U</fixed-case>niversal <fixed-case>D</fixed-case>ependency Parsing - LongDuong - TrevorCohn + LongDuong + TrevorCohn StevenBird PaulCook 339–348 @@ -450,8 +450,8 @@ Improved Transition-based Parsing by Modeling Characters instead of Words with <fixed-case>LSTM</fixed-case>s MiguelBallesteros - ChrisDyer - Noah A.Smith + ChrisDyer + Noah A.Smith 349–359 D15-1041 10.18653/v1/D15-1041 @@ -463,7 +463,7 @@ KatjaFilippova EnriqueAlfonseca Carlos A.Colmenares - LukaszKaiser + LukaszKaiser OriolVinyals 360–368 D15-1042 @@ -483,7 +483,7 @@ A Neural Attention Model for Abstractive Sentence Summarization - Alexander M.Rush + Alexander M.Rush SumitChopra JasonWeston 379–389 @@ -506,7 +506,7 @@ Hashtag Recommendation Using <fixed-case>D</fixed-case>irichlet Process Mixture Models Incorporating Types of Hashtags YeyunGong QiZhang - XuanjingHuang + XuanjingHuang 401–410 D15-1046 10.18653/v1/D15-1046 @@ -538,7 +538,7 @@ Flexible Domain Adaptation for Automated Essay Scoring Using Correlated Linear Regression PeterPhandi - Kian Ming A.Chai + Kian Ming A.Chai Hwee TouNg 431–439 D15-1049 @@ -549,8 +549,8 @@ Show Me Your Evidence - an Automatic Method for Context Dependent Evidence Detection RutyRinott LenaDankin - CarlosAlzate Perez - Mitesh M.Khapra + CarlosAlzate Perez + Mitesh M.Khapra EhudAharoni NoamSlonim 440–450 @@ -560,7 +560,7 @@ Spelling Correction of User Search Queries through Statistical Machine Translation - SašaHasan + SašaHasan CarmenHeger SaabMansour 451–460 @@ -590,7 +590,7 @@ Joint Embedding of Query and Ad by Leveraging Implicit Feedback - SungjinLee + SungjinLee YifanHu 482–491 D15-1054 @@ -601,7 +601,7 @@ Automatic Extraction of Time Expressions Accross Domains in <fixed-case>F</fixed-case>rench Narratives Mike DonaldTapi Nzali XavierTannier - AurélieNévéol + AurélieNévéol 492–498 D15-1055 10.18653/v1/D15-1055 @@ -611,7 +611,7 @@ Semi-Supervised Bootstrapping of Relationship Extractors with Distributional Semantics David S.Batista BrunoMartins - Mário J.Silva + Mário J.Silva 499–504 D15-1056 10.18653/v1/D15-1056 @@ -631,7 +631,7 @@ Named entity recognition with document-specific <fixed-case>KB</fixed-case> tag gazetteers WillRadford XavierCarreras - JamesHenderson + JamesHenderson 512–517 D15-1058 10.18653/v1/D15-1058 @@ -639,9 +639,9 @@ “A Spousal Relation Begins with a Deletion of engage and Ends with an Addition of divorce”: Learning State Changing Verbs from <fixed-case>W</fixed-case>ikipedia Revision History - Derry TantiWijaya - NdapandulaNakashole - TomMitchell + Derry TantiWijaya + NdapandulaNakashole + TomMitchell 518–523 D15-1059 10.18653/v1/D15-1059 @@ -651,8 +651,8 @@ Improving Distant Supervision for Information Extraction Using Label Propagation Through Lists LidongBing SnehaChaudhari - RichardWang - WilliamCohen + RichardWang + WilliamCohen 524–529 D15-1060 10.18653/v1/D15-1060 @@ -663,7 +663,7 @@ An Entity-centric Approach for Overcoming Knowledge Graph Sparsity ManjunathHegde - Partha P.Talukdar + Partha P.Talukdar 530–535 D15-1061 10.18653/v1/D15-1061 @@ -702,7 +702,7 @@ Inferring Binary Relation Schemas for Open Information Extraction KangqiLuo XushengLuo - KennyZhu + KennyZhu 555–560 D15-1065 10.18653/v1/D15-1065 @@ -715,7 +715,7 @@ ZhiweiZhang LejianLiao LuoSi - Chin-YewLin + Chin-YewLin 561–566 D15-1066 10.18653/v1/D15-1066 @@ -723,7 +723,7 @@ Online Sentence Novelty Scoring for Topical Document Streams - SungjinLee + SungjinLee 567–572 D15-1067 10.18653/v1/D15-1067 @@ -731,13 +731,13 @@ Global Thread-level Inference for Comment Classification in Community Question Answering - ShafiqJoty + ShafiqJoty AlbertoBarrón-Cedeño GiovanniDa San Martino SimoneFilice LluísMàrquez AlessandroMoschitti - PreslavNakov + PreslavNakov 573–578 D15-1068 10.18653/v1/D15-1068 @@ -805,10 +805,10 @@ A large annotated corpus for learning natural language inference - Samuel R.Bowman + Samuel R.Bowman GaborAngeli ChristopherPotts - Christopher D.Manning + Christopher D.Manning 632–642 D15-1075 10.18653/v1/D15-1075 @@ -819,7 +819,7 @@ Question-Answer Driven Semantic Role Labeling: Using Natural Language to Annotate Natural Language LuhengHe MikeLewis - LukeZettlemoyer + LukeZettlemoyer 643–653 D15-1076 10.18653/v1/D15-1076 @@ -845,7 +845,7 @@ YuHong XiaobinWang YadongChen - JianWang + JianWang TongtaoZhang HengJi 665–675 @@ -908,7 +908,7 @@ Corpus-level Fine-grained Entity Typing Using Contextual Information YadollahYaghoobzadeh - HinrichSchütze + HinrichSchütze 715–725 D15-1083 10.18653/v1/D15-1083 @@ -917,7 +917,7 @@ Knowledge Base Unification via Sense Embeddings and Disambiguation ClaudioDelli Bovi - LuisEspinosa-Anke + LuisEspinosa-Anke RobertoNavigli 726–736 D15-1084 @@ -928,7 +928,7 @@ Open-Domain Name Error Detection using a Multi-Task <fixed-case>RNN</fixed-case> HaoCheng HaoFang - MariOstendorf + MariOstendorf 737–746 D15-1085 10.18653/v1/D15-1085 @@ -1002,7 +1002,7 @@ XipengQiu ChenxiZhu ShiyuWu - XuanjingHuang + XuanjingHuang 793–798 D15-1092 10.18653/v1/D15-1092 @@ -1075,7 +1075,7 @@ LiLi HoufengWang XuSun - BaobaoChang + BaobaoChang ShiZhao LeiSha 835–839 @@ -1127,7 +1127,7 @@ Joint Entity Recognition and Disambiguation GangLuo XiaojiangHuang - Chin-YewLin + Chin-YewLin ZaiqingNie 879–888 D15-1104 @@ -1150,7 +1150,7 @@ Hierarchical Recurrent Neural Network for Document Modeling RuiLin ShujieLiu - MuyunYang + MuyunYang MuLi MingZhou ShengLi @@ -1174,7 +1174,7 @@ Dual Decomposition Inference for Graphical Models over Strings NanyunPeng RyanCotterell - JasonEisner + JasonEisner 917–927 D15-1108 10.18653/v1/D15-1108 @@ -1185,7 +1185,7 @@ Discourse parsing for multi-party chat dialogues StergosAfantenos EricKow - NicholasAsher + NicholasAsher JérémyPerret 928–937 D15-1109 @@ -1207,7 +1207,7 @@ Feature-Rich Two-Stage Logistic Regression for Monolingual Alignment - Md ArafatSultan + Md ArafatSultan StevenBethard TamaraSumner 949–959 @@ -1241,9 +1241,9 @@ Mise en Place: Unsupervised Interpretation of Instructional Recipes - ChloéKiddon + ChloéKiddon Ganesa ThandavamPonnuraj - LukeZettlemoyer + LukeZettlemoyer YejinChoi 982–992 D15-1114 @@ -1253,7 +1253,7 @@ Semantic Framework for Comparison Structures in Natural Language OmidBakhshandeh - JamesAllen + JamesAllen 993–1002 D15-1115 10.18653/v1/D15-1115 @@ -1271,9 +1271,9 @@ Incorporating Trustiness and Collective Synonym/Contrastive Evidence into Taxonomy Construction - Luu Anh Tuan - Jung-jaeKim - See KiongNg + Anh TuanLuu + Jung-jaeKim + See KiongNg 1013–1022 D15-1117 10.18653/v1/D15-1117 @@ -1292,7 +1292,7 @@ Improving fast_align by Reordering ChenchenDing MasaoUtiyama - EiichiroSumita + EiichiroSumita 1034–1039 D15-1119 10.18653/v1/D15-1119 @@ -1309,7 +1309,7 @@ A Discriminative Training Procedure for Continuous Translation Models - Quoc-KhanhDo + Quoc-KhanhDo AlexandreAllauzen FrançoisYvon 1046–1052 @@ -1319,8 +1319,8 @@ System Combination for Machine Translation through Paraphrasing - Wei-YunMa - KathleenMcKeown + Wei-YunMa + KathleenMcKeown 1053–1058 D15-1122 10.18653/v1/D15-1122 @@ -1339,8 +1339,8 @@ <fixed-case>R</fixed-case>e<fixed-case>V</fixed-case>al: A Simple and Effective Machine Translation Evaluation Metric Based on Recurrent Neural Networks RohitGupta - ConstantinOrăsan - Josefvan Genabith + ConstantinOrăsan + Josefvan Genabith 1066–1072 D15-1124 10.18653/v1/D15-1124 @@ -1373,8 +1373,8 @@ EvangelosPapalexakis ChristosFaloutsos NikosSidiropoulos - TomMitchell - Partha P.Talukdar + TomMitchell + Partha P.Talukdar XiaoFu 1084–1088 D15-1127 @@ -1386,7 +1386,7 @@ AndrewFinch XiaolinWang MasaoUtiyama - EiichiroSumita + EiichiroSumita 1089–1094 D15-1128 10.18653/v1/D15-1128 @@ -1396,7 +1396,7 @@ Rule Selection with Soft Syntactic Features for String-to-Tree Statistical Machine Translation FabienneBraune NinaSeemann - AlexanderFraser + AlexanderFraser 1095–1101 D15-1129 10.18653/v1/D15-1129 @@ -1428,7 +1428,7 @@ The Overall Markedness of Discourse Relations LifengJin - Marie-Catherinede Marneffe + Marie-Catherinede Marneffe 1114–1119 D15-1132 10.18653/v1/D15-1132 @@ -1437,7 +1437,7 @@ Experiments in Open Domain Deception Detection VerónicaPérez-Rosas - RadaMihalcea + RadaMihalcea 1120–1125 D15-1133 10.18653/v1/D15-1133 @@ -1446,7 +1446,7 @@ A model of rapid phonotactic generalization TalLinzen - TimothyO’Donnell + TimothyO’Donnell 1126–1131 D15-1134 10.18653/v1/D15-1134 @@ -1456,7 +1456,7 @@ Automatically Solving Number Word Problems by Semantic Parsing and Reasoning ShumingShi YuehuiWang - Chin-YewLin + Chin-YewLin XiaojiangLiu YongRui 1132–1142 @@ -1520,7 +1520,7 @@ XipengQiu ChenxiZhu PengfeiLiu - XuanjingHuang + XuanjingHuang 1197–1206 D15-1141 10.18653/v1/D15-1141 @@ -1542,8 +1542,8 @@ HidetakaKamigaito TaroWatanabe HiroyaTakamura - ManabuOkumura - EiichiroSumita + ManabuOkumura + EiichiroSumita 1217–1227 D15-1143 10.18653/v1/D15-1143 @@ -1563,7 +1563,7 @@ Graph-Based Collective Lexical Selection for Statistical Machine Translation JinsongSu - DeyiXiong + DeyiXiong ShujianHuang XianpeiHan JunfengYao @@ -1575,7 +1575,7 @@ Bilingual Correspondence Recursive Autoencoder for Statistical Machine Translation JinsongSu - DeyiXiong + DeyiXiong BiaoZhang YangLiu JunfengYao @@ -1587,12 +1587,12 @@ How to Avoid Unwanted Pregnancies: Domain Adaptation using Neural Network Models - ShafiqJoty + ShafiqJoty HassanSajjad NadirDurrani KamlaAl-Mannai AhmedAbdelali - StephanVogel + StephanVogel 1259–1270 D15-1147 10.18653/v1/D15-1147 @@ -1621,7 +1621,7 @@ Part-of-speech Taggers for Low-resource Languages using <fixed-case>CCA</fixed-case> Features Young-BumKim BenjaminSnyder - RuhiSarikaya + RuhiSarikaya 1292–1302 D15-1150 10.18653/v1/D15-1150 @@ -1629,7 +1629,7 @@ An Improved Tag Dictionary for Faster Part-of-Speech Tagging - RobertMoore + RobertMoore 1303–1308 D15-1151 10.18653/v1/D15-1151 @@ -1657,7 +1657,7 @@ Efficient Inner-to-outer Greedy Algorithm for Higher-order Labeled Dependency Parsing XuezheMa - EduardHovy + EduardHovy 1322–1328 D15-1154 10.18653/v1/D15-1154 @@ -1667,7 +1667,7 @@ Online Updating of Word Representations for Part-of-Speech Tagging WenpengYin TobiasSchnabel - HinrichSchütze + HinrichSchütze 1329–1334 D15-1155 10.18653/v1/D15-1155 @@ -1690,7 +1690,7 @@ JohannRoturier CorentinRibeyre TeresaLynn - JosephLe Roux + JosephLe Roux 1341–1347 D15-1157 10.18653/v1/D15-1157 @@ -1732,8 +1732,8 @@ YuliaTsvetkov SilvioAmir RamónFermandez - ChrisDyer - Alan WBlack + ChrisDyer + Alan WBlack IsabelTrancoso Chu-ChengLin 1367–1372 @@ -1764,7 +1764,7 @@ Learning Semantic Representations for Nonterminals in Hierarchical Phrase-Based Translation XingWang - DeyiXiong + DeyiXiong MinZhang 1391–1400 D15-1164 @@ -1778,7 +1778,7 @@ TamerAlkhouli Jan-ThorstenPeter JoernWuebker - HermannNey + HermannNey 1401–1411 D15-1165 10.18653/v1/D15-1165 @@ -1787,9 +1787,9 @@ Effective Approaches to Attention-based Neural Machine Translation - ThangLuong + ThangLuong HieuPham - Christopher D.Manning + Christopher D.Manning 1412–1421 D15-1166 10.18653/v1/D15-1166 @@ -1811,8 +1811,8 @@ Fine-grained Opinion Mining with Recurrent Neural Networks and Word Embeddings PengfeiLiu - ShafiqJoty - HelenMeng + ShafiqJoty + HelenMeng 1433–1443 D15-1168 10.18653/v1/D15-1168 @@ -1823,7 +1823,7 @@ Joint <fixed-case>A</fixed-case>* <fixed-case>CCG</fixed-case> Parsing and Semantic Role Labelling MikeLewis LuhengHe - LukeZettlemoyer + LukeZettlemoyer 1444–1454 D15-1169 10.18653/v1/D15-1169 @@ -1832,10 +1832,10 @@ Improving Semantic Parsing with Enriched Synchronous Context-Free Grammar - JunhuiLi + JunhuiLi MuhuaZhu WeiLu - GuodongZhou + GuodongZhou 1455–1465 D15-1170 10.18653/v1/D15-1170 @@ -1871,7 +1871,7 @@ Efficient and Expressive Knowledge Base Completion Using Subgraph Feature Extraction MattGardner - TomMitchell + TomMitchell 1488–1498 D15-1173 10.18653/v1/D15-1173 @@ -1895,8 +1895,8 @@ A Utility Model of Authors in the Scientific Community YanchuanSim - BryanRoutledge - Noah A.Smith + BryanRoutledge + Noah A.Smith 1510–1519 D15-1175 10.18653/v1/D15-1175 @@ -1905,12 +1905,12 @@ Finding Function in Form: Compositional Character Models for Open Vocabulary Word Representation WangLing - ChrisDyer - Alan WBlack + ChrisDyer + Alan WBlack IsabelTrancoso RamónFermandez SilvioAmir - LuísMarujo + LuísMarujo TiagoLuís 1520–1530 D15-1176 @@ -1930,7 +1930,7 @@ Conversation Trees: A Grammar Model for Topic Structure in Forums AnnieLouis - Shay B.Cohen + Shay B.Cohen 1543–1553 D15-1178 10.18653/v1/D15-1178 @@ -2001,7 +2001,7 @@ Recognizing Textual Entailment Using Probabilistic Inference LeiSha SujianLi - BaobaoChang + BaobaoChang ZhifangSui TingsongJiang 1620–1625 @@ -2013,7 +2013,7 @@ <fixed-case>C</fixed-case>hinese Semantic Role Labeling with Bidirectional Recurrent Neural Networks ZhenWang TingsongJiang - BaobaoChang + BaobaoChang ZhifangSui 1626–1631 D15-1186 @@ -2023,8 +2023,8 @@ Unsupervised Negation Focus Identification with Word-Topic Graph Model BoweiZou - GuodongZhou - QiaomingZhu + GuodongZhou + QiaomingZhu 1632–1636 D15-1187 10.18653/v1/D15-1187 @@ -2043,7 +2043,7 @@ KentonLee YoavArtzi YejinChoi - LukeZettlemoyer + LukeZettlemoyer 1643–1648 D15-1189 10.18653/v1/D15-1189 @@ -2054,7 +2054,7 @@ JulienKloetzer KentaroTorisawa ChikaraHashimoto - Jong-HoonOh + Jong-HoonOh 1649–1655 D15-1190 10.18653/v1/D15-1190 @@ -2074,7 +2074,7 @@ Learning to Identify the Best Contexts for Knowledge-based <fixed-case>WSD</fixed-case> EvgeniaWasserman Pritsker - WilliamCohen + WilliamCohen EinatMinkov 1662–1667 D15-1192 @@ -2128,7 +2128,7 @@ A Strong Lexical Matching Method for the Machine Comprehension Test EllerySmith NicolaGreco - MatkoBošnjak + MatkoBošnjak AndreasVlachos 1693–1698 D15-1197 @@ -2139,7 +2139,7 @@ Broad-coverage <fixed-case>CCG</fixed-case> Semantic Parsing with <fixed-case>AMR</fixed-case> YoavArtzi KentonLee - LukeZettlemoyer + LukeZettlemoyer 1699–1710 D15-1198 10.18653/v1/D15-1198 @@ -2150,11 +2150,11 @@ Semantically Conditioned <fixed-case>LSTM</fixed-case>-based Natural Language Generation for Spoken Dialogue Systems Tsung-HsienWen - MilicaGašić + MilicaGašić NikolaMrkšić Pei-HaoSu DavidVandyke - SteveYoung + SteveYoung 1711–1721 D15-1199 10.18653/v1/D15-1199 @@ -2165,7 +2165,7 @@ Do Multi-Sense Embeddings Improve Natural Language Understanding? JiweiLi - DanJurafsky + DanJurafsky 1722–1732 D15-1200 10.18653/v1/D15-1200 @@ -2176,7 +2176,7 @@ Learning Semantic Composition to Detect Non-compositionality of Multiword Expressions MajidYazdani MeghdadFarahmand - JamesHenderson + JamesHenderson 1733–1742 D15-1201 10.18653/v1/D15-1201 @@ -2218,7 +2218,7 @@ Improved Relation Extraction with Feature-Rich Compositional Embedding Models - Matthew R.Gormley + Matthew R.Gormley MoYu MarkDredze 1774–1784 @@ -2269,7 +2269,7 @@ MasaoUtiyama AndrewFinch TaroWatanabe - EiichiroSumita + EiichiroSumita 1817–1827 D15-1209 10.18653/v1/D15-1209 @@ -2295,7 +2295,7 @@ YueZhang MeishanZhang YafengRen - DonghongJi + DonghongJi 1837–1846 D15-1211 10.18653/v1/D15-1211 @@ -2303,7 +2303,7 @@ Multilingual discriminative lexicalized phrase structure parsing - BenoitCrabbé + BenoitCrabbé 1847–1856 D15-1212 10.18653/v1/D15-1212 @@ -2321,7 +2321,7 @@ Diversity in Spectral Learning for Natural Language Parsing ShashiNarayan - Shay B.Cohen + Shay B.Cohen 1868–1878 D15-1214 10.18653/v1/D15-1214 @@ -2333,7 +2333,7 @@ YaqianZhou ChenxiZhu XipengQiu - XuanjingHuang + XuanjingHuang 1879–1889 D15-1215 10.18653/v1/D15-1215 @@ -2343,7 +2343,7 @@ Turn-taking phenomena in incremental dialogue systems HatimKhouzaimi RomainLaroche - FabriceLefèvre + FabriceLefèvre 1890–1895 D15-1216 10.18653/v1/D15-1216 @@ -2367,8 +2367,8 @@ GauravKumar GraemeBlackwood JanTrmal - DanielPovey - SanjeevKhudanpur + DanielPovey + SanjeevKhudanpur 1902–1907 D15-1218 10.18653/v1/D15-1218 @@ -2386,7 +2386,7 @@ Concept-based Summarization using Integer Linear Programming: From Concept Pruning to Multiple Optimal Solutions FlorianBoudin HugoMougard - BenoitFavre + BenoitFavre 1914–1918 D15-1220 10.18653/v1/D15-1220 @@ -2404,7 +2404,7 @@ Better Summarization Evaluation with Word Embeddings for <fixed-case>ROUGE</fixed-case> - Jun-PingNg + Jun-PingNg ViktoriaAbrecht 1925–1930 D15-1222 @@ -2455,7 +2455,7 @@ Summarizing Student Responses to Reflection Prompts WencanLuo - DianeLitman + DianeLitman 1955–1960 D15-1227 10.18653/v1/D15-1227 @@ -2466,7 +2466,7 @@ Extractive Summarization by Maximizing Semantic Volume DaniYogatama FeiLiu - Noah A.Smith + Noah A.Smith 1961–1966 D15-1228 10.18653/v1/D15-1228 @@ -2485,7 +2485,7 @@ Discourse Planning with an N-gram Model of Relations OrBiran - KathleenMcKeown + KathleenMcKeown 1973–1977 D15-1230 10.18653/v1/D15-1230 @@ -2526,7 +2526,7 @@ AnilRamakrishna NikolaosMalandrakis ElizabethStaruk - ShrikanthNarayanan + ShrikanthNarayanan 1996–2001 D15-1234 10.18653/v1/D15-1234 @@ -2535,7 +2535,7 @@ <fixed-case>EMNLP</fixed-case> versus <fixed-case>ACL</fixed-case>: Analyzing <fixed-case>NLP</fixed-case> research over time - Sujatha DasGollapalli + Sujatha DasGollapalli XiaoliLi 2002–2006 D15-1235 @@ -2557,8 +2557,8 @@ <fixed-case>W</fixed-case>iki<fixed-case>QA</fixed-case>: A Challenge Dataset for Open-Domain Question Answering YiYang - Wen-tauYih - ChristopherMeek + Wen-tauYih + ChristopherMeek 2013–2018 D15-1237 10.18653/v1/D15-1237 @@ -2581,7 +2581,7 @@ AaronJaech VictoriaZayats HaoFang - MariOstendorf + MariOstendorf HannanehHajishirzi 2026–2031 D15-1239 @@ -2592,7 +2592,7 @@ What Your Username Says About You AaronJaech - MariOstendorf + MariOstendorf 2032–2037 D15-1240 10.18653/v1/D15-1240 @@ -2603,7 +2603,7 @@ Knowledge Base Inference using Bridging Entities BhushanKotnis PradeepBansal - Partha P.Talukdar + Partha P.Talukdar 2038–2043 D15-1241 10.18653/v1/D15-1241 @@ -2627,7 +2627,7 @@ ManaalFaruqui WangLing GuillaumeLample - ChrisDyer + ChrisDyer 2049–2054 D15-1243 10.18653/v1/D15-1243 @@ -2648,9 +2648,9 @@ Any-language frame-semantic parsing - AndersJohannsen - HéctorMartínez Alonso - AndersSøgaard + AndersJohannsen + HéctorMartínez Alonso + AndersSøgaard 2062–2066 D15-1245 10.18653/v1/D15-1245 @@ -2701,7 +2701,7 @@ A Binarized Neural Network Joint Model for Machine Translation JingyiZhang MasaoUtiyama - EiichiroSumita + EiichiroSumita GrahamNeubig SatoshiNakamura 2094–2099 @@ -2714,7 +2714,7 @@ <fixed-case>B</fixed-case>ayesian Optimization of Text Representations DaniYogatama LingpengKong - Noah A.Smith + Noah A.Smith 2100–2105 D15-1251 10.18653/v1/D15-1251 @@ -2776,7 +2776,7 @@ Modeling Reportable Events as Turning Points in Narrative JessicaOuyang - KathleenMcKeown + KathleenMcKeown 2149–2158 D15-1257 10.18653/v1/D15-1257 @@ -2797,7 +2797,7 @@ WeiGao ZhongyuWei BaolinPeng - Kam-FaiWong + Kam-FaiWong 2168–2178 D15-1259 10.18653/v1/D15-1259 @@ -2808,7 +2808,7 @@ RyuIida KentaroTorisawa ChikaraHashimoto - Jong-HoonOh + Jong-HoonOh JulienKloetzer 2179–2189 D15-1260 @@ -2819,7 +2819,7 @@ Estimation of Discourse Segmentation Labels from Crowd Data ZihengHuang JialuZhong - Rebecca J.Passonneau + Rebecca J.Passonneau 2190–2200 D15-1261 10.18653/v1/D15-1261 @@ -2857,7 +2857,7 @@ Wikification of Concept Mentions within Spoken Dialogues Using Domain Constraints from <fixed-case>W</fixed-case>ikipedia SeokhwanKim - Rafael E.Banchs + Rafael E.Banchs HaizhouLi 2225–2229 D15-1265 @@ -2868,7 +2868,7 @@ Shallow Convolutional Neural Network for Implicit Discourse Relation Recognition BiaoZhang JinsongSu - DeyiXiong + DeyiXiong YaojieLu HongDuan JunfengYao @@ -2892,7 +2892,7 @@ RyuichiroHigashinaka MasahiroMizukami KotaroFunakoshi - MasahiroAraki + MasahiroAraki HiroshiTsukahara YukaKobayashi 2243–2248 @@ -2929,7 +2929,7 @@ Adapting Coreference Resolution for Narrative Processing Quynh Ngoc ThiDo StevenBethard - Marie-FrancineMoens + Marie-FrancineMoens 2262–2267 D15-1271 10.18653/v1/D15-1271 @@ -2937,10 +2937,10 @@ Joint Lemmatization and Morphological Tagging with Lemming - ThomasMüller + ThomasMüller RyanCotterell - AlexanderFraser - HinrichSchütze + AlexanderFraser + HinrichSchütze 2268–2274 D15-1272 10.18653/v1/D15-1272 @@ -2950,7 +2950,7 @@ Transducer Disambiguation with Sparse Topological Features GonzaloIglesias - Adriàde Gispert + Adriàde Gispert BillByrne 2275–2280 D15-1273 @@ -2960,7 +2960,7 @@ <fixed-case>A</fixed-case>rabic Diacritization with Recurrent Neural Networks YonatanBelinkov - JamesGlass + JamesGlass 2281–2285 D15-1274 10.18653/v1/D15-1274 @@ -2998,9 +2998,9 @@ When Are Tree Structures Necessary for Deep Learning of Representations? JiweiLi - ThangLuong - DanJurafsky - EduardHovy + ThangLuong + DanJurafsky + EduardHovy 2304–2314 D15-1278 10.18653/v1/D15-1278 @@ -3027,7 +3027,7 @@ XipengQiu XinchiChen ShiyuWu - XuanjingHuang + XuanjingHuang 2326–2335 D15-1280 10.18653/v1/D15-1280 @@ -3038,7 +3038,7 @@ Verbal and Nonverbal Clues for Real-life Deception Detection VerónicaPérez-Rosas MohamedAbouelenien - RadaMihalcea + RadaMihalcea YaoXiao CJLinton MihaiBurzo @@ -3061,8 +3061,8 @@ Co-Training for Topic Classification of Scholarly Data CorneliaCaragea - FlorinBulgarov - RadaMihalcea + FlorinBulgarov + RadaMihalcea 2357–2366 D15-1283 10.18653/v1/D15-1283 @@ -3072,9 +3072,9 @@ Humor Recognition and Humor Anchor Extraction DiyiYang - AlonLavie - ChrisDyer - EduardHovy + AlonLavie + ChrisDyer + EduardHovy 2367–2376 D15-1284 10.18653/v1/D15-1284 @@ -3098,8 +3098,8 @@ A Dynamic Programming Algorithm for Computing N-gram Posteriors from Lattices - DoğanCan - ShrikanthNarayanan + DoğanCan + ShrikanthNarayanan 2388–2397 D15-1286 10.18653/v1/D15-1286 @@ -3120,8 +3120,8 @@ Compact, Efficient and Unlimited Capacity: Language Modeling with Compressed Suffix Trees EhsanShareghi MatthiasPetri - GholamrezaHaffari - TrevorCohn + GholamrezaHaffari + TrevorCohn 2409–2418 D15-1288 10.18653/v1/D15-1288 @@ -3133,7 +3133,7 @@ <fixed-case>ERSOM</fixed-case>: A Structural Ontology Matching Approach Using Automatically Learned Entity Representation ChunchengXiang TingsongJiang - BaobaoChang + BaobaoChang ZhifangSui 2419–2429 D15-1289 @@ -3143,7 +3143,7 @@ A Single Word is not Enough: Ranking Multiword Expressions Using Distributional Semantics MartinRiedl - ChrisBiemann + ChrisBiemann 2430–2440 D15-1290 10.18653/v1/D15-1290 @@ -3206,7 +3206,7 @@ <fixed-case>JEAM</fixed-case>: A Novel Model for Cross-Domain Sentiment Classification Based on Emotion Analysis Kun-HuLuo - Zhi-HongDeng + Zhi-HongDeng HongliangYu Liang-ChenWei 2503–2508 @@ -3238,7 +3238,7 @@ RakshaSharma MohitGupta AsthaAgarwal - PushpakBhattacharyya + PushpakBhattacharyya 2520–2526 D15-1300 10.18653/v1/D15-1300 @@ -3256,11 +3256,11 @@ A Multi-lingual Annotated Dataset for Aspect-Oriented Opinion Mining - Salud M.Jiménez Zafra + Salud M.Jiménez Zafra GiacomoBerardi AndreaEsuli DiegoMarcheggiani - María TeresaMartín-Valdivia + María TeresaMartín-Valdivia AlejandroMoreo Fernández 2533–2538 D15-1302 @@ -3271,7 +3271,7 @@ Deep Convolutional Neural Network Textual Features and Multiple Kernel Learning for Utterance-level Multimodal Sentiment Analysis SoujanyaPoria ErikCambria - AlexanderGelbukh + AlexanderGelbukh 2539–2544 D15-1303 10.18653/v1/D15-1303 @@ -3280,7 +3280,7 @@ <fixed-case>SLSA</fixed-case>: A Sentiment Lexicon for <fixed-case>S</fixed-case>tandard <fixed-case>A</fixed-case>rabic RamyEskander - OwenRambow + OwenRambow 2545–2550 D15-1304 10.18653/v1/D15-1304 @@ -3310,7 +3310,7 @@ Detection of Steganographic Techniques on <fixed-case>T</fixed-case>witter AlexWilson - PhilBlunsom + PhilBlunsom AndrewKer 2564–2569 D15-1307 @@ -3332,8 +3332,8 @@ An Analysis of Domestic Abuse Discourse on <fixed-case>R</fixed-case>eddit NicolasSchrading - CeciliaOvesdotter Alm - RayPtucha + CeciliaOvesdotter Alm + RayPtucha ChristopherHoman 2577–2583 D15-1309 @@ -3353,8 +3353,8 @@ Classifying Tweet Level Judgements of Rumours in Social Media MichalLukasik - TrevorCohn - KalinaBontcheva + TrevorCohn + KalinaBontcheva 2590–2595 D15-1311 10.18653/v1/D15-1311 @@ -3404,7 +3404,7 @@ The major aim of the tutorial is to provide NLP researchers with an introduction Transparent Machine Learning for Information Extraction: State-of-the-Art and the Future LauraChiticariu YunyaoLi - FrederickReiss + FrederickReiss The rise of Big Data analytics over unstructured text has led to renewed interest in information extraction (IE). These applications need effective IE as a first step towards solving end-to-end real world problems (e.g. biology, medicine, finance, media and entertainment, etc). Much recent NLP research has focused on addressing specific IE problems using a pipeline of multiple machine learning techniques. This approach requires an analyst with the expertise to answer questions such as: “What ML techniques should I combine to solve this problem?”; “What features will be useful for the composite pipeline?”; and “Why is my model giving the wrong answer on this document?”. The need for this expertise creates problems in real world applications. It is very difficult in practice to find an analyst who both understands the real world problem and has deep knowledge of applied machine learning. As a result, the real impact by current IE research does not match up to the abundant opportunities available. In this tutorial, we introduce the concept of transparent machine learning. A transparent ML technique is one that: @@ -3418,16 +3418,16 @@ The tutorial is aimed at IE researchers in both the academic and industry commun Knowledge Acquisition for Web Search - MariusPasca + MariusPasca The identification of textual items, or documents, that best match a user’s information need, as expressed in search queries, forms the core functionality of information retrieval systems. Well-known challenges are associated with understanding the intent behind user queries; and, more importantly, with matching inherently-ambiguous queries to documents that may employ lexically different phrases to convey the same meaning. The conversion of semi-structured content from Wikipedia and other resources into structured data produces knowledge potentially more suitable to database-style queries and, ideally, to use in information retrieval. In parallel, the availability of textual documents on the Web enables an aggressive push towards the automatic acquisition of various types of knowledge from text. Methods developed under the umbrella of open-domain information extraction acquire open-domain classes of instances and relations from Web text. The methods operate over unstructured or semi-structured text available within collections of Web documents, or over relatively more intriguing streams of anonymized search queries. Some of the methods import the automatically-extracted data into human-generated resources, or otherwise exploit existing human-generated resources. In both cases, the goal is to expand the coverage of the initial resources, thus providing information about more of the topics that people in general, and Web search users in particular, may be interested in. pasca-2015-knowledge Learning Semantic Relations from Text - PreslavNakov - ViviNastase + PreslavNakov + ViviNastase DiarmuidÓ Séaghdha - StanSzpakowicz + StanSzpakowicz Every non-trivial text describes interactions and relations between people, institutions, activities, events and so on. What we know about the world consists in large part of such relations, and that knowledge contributes to the understanding of what texts refer to. Newly found relations can in turn become part of this knowledge that is stored for future use. To grasp a text’s semantic content, an automatic system must be able to recognize relations in texts and reason about them. This may be done by applying and updating previously acquired knowledge. We focus here in particular on semantic relations which describe the interactions among nouns and compact noun phrases, and we present such relations from both a theoretical and a practical perspective. The theoretical exploration sketches the historical path which has brought us to the contemporary view and interpretation of semantic relations. We discuss a wide range of relation inventories proposed by linguists and by language processing people. Such inventories vary by domain, granularity and suitability for downstream applications. @@ -3438,7 +3438,7 @@ On the practical side, we investigate the recognition and acquisition of relatio Applications of Social Media Text Analysis AtefehFarzindar - DianaInkpen + DianaInkpen Analyzing social media texts is a complex problem that becomes difficult to address using traditional Natural Language Processing (NLP) methods. Our tutorial focuses on presenting new methods for NLP tasks and applications that work on noisy and informal texts, such as the ones from social media. Automatic processing of large collections of social media texts is important because they contain a lot of useful information, due to the in-creasing popularity of all types of social media. Use of social media and messaging apps grew 203 percent year-on-year in 2013, with overall app use rising 115 percent over the same period, as reported by Statista, citing data from Flurry Analytics. This growth means that 1.61 billion people are now active in social media around the world and this is expected to advance to 2 billion users in 2016, led by India. The research shows that consumers are now spending daily 5.6 hours on digital media including social media and mo-bile internet usage. @@ -3448,7 +3448,7 @@ At the heart of this interest is the ability for users to create and share conte Robust Semantic Analysis of Multiword Expressions with <fixed-case>F</fixed-case>rame<fixed-case>N</fixed-case>et - Miriam R. L.Petruck + Miriam R. L.Petruck ValiaKordoni This tutorial will give participants a solid understanding of the linguistic features of multiword expressions (MWEs), focusing on the semantics of such expressions and their importance for natural language processing and language technology, with particular attention to the way that FrameNet (framenet.icsi.berkeley.edu) handles this wide spread phenomenon. Our target audience includes researchers and practitioners of language technology, not necessarily experts in MWEs or knowledgeable about FrameNet, who are interested in NLP tasks that involve or could benefit from considering MWEs as a pervasive phenomenon in human language and communication. @@ -3461,8 +3461,8 @@ While native speakers use these forms with ease, the treatment and interpretatio Computational Analysis of Affect and Emotion in Language - SaifMohammad - CeciliaOvesdotter Alm + SaifMohammad + CeciliaOvesdotter Alm Computational linguistics has witnessed a surge of interest in approaches to emotion and affect analysis, tackling problems that extend beyond sentiment analysis in depth and complexity. This area involves basic emotions (such as joy, sadness, and fear) as well as any of the hundreds of other emotions humans are capable of (such as optimism, frustration, and guilt), expanding into affective conditions, experiences, and activities. Leveraging linguistic data for computational affect and emotion inference enables opportunities to address a range of affect-related tasks, problems, and non-invasive applications that capture aspects essential to the human condition and individuals’ cognitive processes. These efforts enable and facilitate human-centered computing experiences, as demonstrated by applications across clinical, socio-political, artistic, educational, and commercial domains. Efforts to computationally detect, characterize, and generate emotions or affect-related phenomena respond equally to technological needs for personalized, micro-level analytics and broad-coverage, macro-level inference, and they have involved both small and massive amounts of data. While this is an exciting area with numerous opportunities for members of the ACL community, a major obstacle is its intersection with other investigatory traditions, necessitating knowledge transfer. This tutorial comprehensively integrates relevant concepts and frameworks from linguistics, cognitive science, affective computing, and computational linguistics in order to equip researchers and practitioners with the adequate background and knowledge to work effectively on problems and tasks either directly involving, or benefiting from having an understanding of, affect and emotion analysis. diff --git a/data/xml/D16.xml b/data/xml/D16.xml index cf24e6a389..75dd3e594d 100644 --- a/data/xml/D16.xml +++ b/data/xml/D16.xml @@ -63,9 +63,9 @@ Distinguishing Past, On-going, and Future Events: The <fixed-case>E</fixed-case>vent<fixed-case>S</fixed-case>tatus Corpus RuihongHuang IgnacioCases - DanJurafsky + DanJurafsky CleoCondoravdi - EllenRiloff + EllenRiloff 44–54 D16-1005 10.18653/v1/D16-1005 @@ -76,7 +76,7 @@ Nested Propositions in Open Information Extraction NikitaBhutani H. V.Jagadish - DragomirRadev + DragomirRadev 55–64 D16-1006 10.18653/v1/D16-1006 @@ -88,7 +88,7 @@ YunlunYang YunhaiTong ShuleiMa - Zhi-HongDeng + Zhi-HongDeng 65–74 D16-1007 10.18653/v1/D16-1007 @@ -118,7 +118,7 @@ Comparing Computational Cognitive Models of Generalization in a Language Acquisition Task LibbyBarak - Adele E.Goldberg + Adele E.Goldberg SuzanneStevenson 96–106 D16-1010 @@ -141,7 +141,7 @@ Deep Multi-Task Learning with Shared Memory for Text Classification PengfeiLiu XipengQiu - XuanjingHuang + XuanjingHuang 118–127 D16-1012 10.18653/v1/D16-1012 @@ -166,7 +166,7 @@ Creating Causal Embeddings for Question Answering with Minimal Supervision RebeccaSharp MihaiSurdeanu - PeterJansen + PeterJansen PeterClark MichaelHammond 138–148 @@ -203,7 +203,7 @@ Event participant modelling with neural networks OttokarTilk VeraDemberg - AsadSayeed + AsadSayeed DietrichKlakow StefanThater 171–182 @@ -306,8 +306,8 @@ Zero-Resource Translation with Multi-Lingual Neural Machine Translation OrhanFirat - BaskaranSankaran - YaserAl-onaizan + BaskaranSankaran + YaserAl-onaizan Fatos T.Yarman Vural KyunghyunCho 268–277 @@ -331,9 +331,9 @@ Semi-Supervised Learning of Sequence Models with Method of Moments ZitaMarinho - André F. T.Martins - Shay B.Cohen - Noah A.Smith + André F. T.Martins + Shay B.Cohen + Noah A.Smith 287–296 D16-1028 10.18653/v1/D16-1028 @@ -345,7 +345,7 @@ ShyamUpadhyay Ming-WeiChang Kai-WeiChang - Wen-tauYih + Wen-tauYih 297–306 D16-1029 10.18653/v1/D16-1029 @@ -366,7 +366,7 @@ Language as a Latent Variable: Discrete Generative Models for Sentence Compression YishuMiao - PhilBlunsom + PhilBlunsom 319–328 D16-1031 10.18653/v1/D16-1031 @@ -375,8 +375,8 @@ Globally Coherent Text Generation with Neural Checklist Models - ChloéKiddon - LukeZettlemoyer + ChloéKiddon + LukeZettlemoyer YejinChoi 329–339 D16-1032 @@ -388,7 +388,7 @@ A Dataset and Evaluation Metrics for Abstractive Compression of Sentences and Short Paragraphs KristinaToutanova ChrisBrockett - Ke M.Tran + Ke M.Tran SaleemaAmershi 340–350 D16-1033 @@ -410,9 +410,9 @@ Discourse Parsing with Attention-based Hierarchical Neural Networks - QiLi + QiLi TianshiLi - BaobaoChang + BaobaoChang 362–371 D16-1035 10.18653/v1/D16-1035 @@ -436,7 +436,7 @@ Variational Neural Discourse Relation Recognizer BiaoZhang - DeyiXiong + DeyiXiong JinsongSu QunLiu RongrongJi @@ -460,10 +460,10 @@ Learning Term Embeddings for Taxonomic Relation Identification Using Dynamic Weighting Neural Network - Luu Anh Tuan - YiTay + Anh TuanLuu + TayYi Siu CheungHui - See KiongNg + See KiongNg 403–413 D16-1039 10.18653/v1/D16-1039 @@ -473,7 +473,7 @@ Relation Schema Induction using Tensor Factorization with Side Information MadhavNimishakavi Uday SinghSaini - ParthaTalukdar + ParthaTalukdar 414–423 D16-1040 10.18653/v1/D16-1040 @@ -481,8 +481,8 @@ Supervised Distributional Hypernym Discovery via Domain Adaptation - LuisEspinosa-Anke - JoseCamacho-Collados + LuisEspinosa-Anke + JoseCamacho-Collados ClaudioDelli Bovi HoracioSaggion 424–435 @@ -501,7 +501,7 @@ Comparing Data Sources and Architectures for Deep Visual Representation Learning in Semantics DouweKiela - Anita LillaVerő + Anita LillaVerő StephenClark 447–456 D16-1043 @@ -559,7 +559,7 @@ Automatic Cross-Lingual Similarization of Dependency Grammars for Tree-based Machine Translation WenbinJiang WenZhang - JinanXu + JinanXu RangjiaCai 501–510 D16-1048 @@ -581,7 +581,7 @@ Variational Neural Machine Translation BiaoZhang - DeyiXiong + DeyiXiong JinsongSu HongDuan MinZhang @@ -593,7 +593,7 @@ Towards a Convex <fixed-case>HMM</fixed-case> Surrogate for Word Alignment AndreiSimion - MichaelCollins + MichaelCollins CliffStein 531–540 D16-1051 @@ -640,7 +640,7 @@ Learning to Translate for Multilingual Question Answering - FerhanTure + FerhanTure ElizabethBoschee 573–584 D16-1055 @@ -662,7 +662,7 @@ William L.Hamilton KevinClark JureLeskovec - DanJurafsky + DanJurafsky 595–605 D16-1057 10.18653/v1/D16-1057 @@ -713,7 +713,7 @@ Building an Evaluation Scale using Item Response Theory - John P.Lalor + John P.Lalor HaoWu HongYu 648–657 @@ -727,7 +727,7 @@ HyokunYun PinarYanardag ShinMatsushima - S. V. N.Vishwanathan + S. V. N.Vishwanathan 658–668 D16-1063 10.18653/v1/D16-1063 @@ -769,7 +769,7 @@ Enhanced Personalized Search using Social Data DongZhou - SéamusLawless + SéamusLawless XuanWu WenyuZhao JianxunLiu @@ -794,7 +794,7 @@ JinQian YaGuo YaqianZhou - XuanjingHuang + XuanjingHuang 721–730 D16-1069 10.18653/v1/D16-1069 @@ -813,8 +813,8 @@ <fixed-case>LAMB</fixed-case>: A Good Shepherd of Morphologically Rich Languages SebastianEbert - ThomasMüller - HinrichSchütze + ThomasMüller + HinrichSchütze 742–752 D16-1071 10.18653/v1/D16-1071 @@ -856,7 +856,7 @@ News Stream Summarization using Burst Information Networks TaoGe LeiCui - BaobaoChang + BaobaoChang SujianLi MingZhou ZhifangSui @@ -868,8 +868,8 @@ Rationale-Augmented Convolutional Neural Networks for Text Classification YeZhang - IainMarshall - Byron C.Wallace + IainMarshall + Byron C.Wallace 795–804 D16-1076 10.18653/v1/D16-1076 @@ -878,7 +878,7 @@ Transferring User Interests Across Websites with Unstructured Text for Cold-Start Recommendation Yu-YangHuang - Shou-DeLin + Shou-DeLin 805–814 D16-1077 10.18653/v1/D16-1077 @@ -888,8 +888,8 @@ Speculation and Negation Scope Detection via Convolutional Neural Networks ZhongQian PeifengLi - QiaomingZhu - GuodongZhou + QiaomingZhu + GuodongZhou ZhunchenLuo WeiLuo 815–825 @@ -902,7 +902,7 @@ Analyzing Linguistic Knowledge in Sequential Model of Sentence PengQian XipengQiu - XuanjingHuang + XuanjingHuang 826–835 D16-1079 10.18653/v1/D16-1079 @@ -913,7 +913,7 @@ QiZhang YangWang YeyunGong - XuanjingHuang + XuanjingHuang 836–845 D16-1080 10.18653/v1/D16-1080 @@ -933,7 +933,7 @@ Structured prediction models for <fixed-case>RNN</fixed-case> based sequence labeling in clinical text - AbhyudayJagannatha + AbhyudayJagannatha HongYu 856–865 D16-1082 @@ -954,9 +954,9 @@ Stance Detection with Bidirectional Conditional Encoding IsabelleAugenstein - TimRocktäschel + TimRocktäschel AndreasVlachos - KalinaBontcheva + KalinaBontcheva 876–885 D16-1084 10.18653/v1/D16-1084 @@ -965,7 +965,7 @@ Modeling Skip-Grams for Event Detection with Convolutional Neural Networks Thien HuuNguyen - RalphGrishman + RalphGrishman 886–891 D16-1085 10.18653/v1/D16-1085 @@ -988,7 +988,7 @@ GabrielaFerraro LiyuanZhou WeiweiHou - TimothyBaldwin + TimothyBaldwin 899–905 D16-1087 10.18653/v1/D16-1087 @@ -1063,8 +1063,8 @@ Richer Interpolative Smoothing Based on Modified <fixed-case>K</fixed-case>neser-<fixed-case>N</fixed-case>ey Language Modeling EhsanShareghi - TrevorCohn - GholamrezaHaffari + TrevorCohn + GholamrezaHaffari 944–949 D16-1094 10.18653/v1/D16-1094 @@ -1084,9 +1084,9 @@ Coverage Embedding Models for Neural Machine Translation HaitaoMi - BaskaranSankaran + BaskaranSankaran ZhiguoWang - AbeIttycheriah + AbeIttycheriah 955–960 D16-1096 10.18653/v1/D16-1096 @@ -1094,9 +1094,9 @@ Neural Morphological Analysis: Encoding-Decoding Canonical Segments - KatharinaKann + KatharinaKann RyanCotterell - HinrichSchütze + HinrichSchütze 961–967 D16-1097 10.18653/v1/D16-1097 @@ -1169,8 +1169,8 @@ AdityaJoshi VaibhavTripathi KevinPatel - PushpakBhattacharyya - MarkCarman + PushpakBhattacharyya + MarkCarman 1006–1011 D16-1104 10.18653/v1/D16-1104 @@ -1210,7 +1210,7 @@ Characterizing the Language of Online Communities and its Relation to Community Reception TrangTran - MariOstendorf + MariOstendorf 1030–1035 D16-1108 10.18653/v1/D16-1108 @@ -1220,7 +1220,7 @@ Joint Transition-based Dependency Parsing and Disfluency Detection for Automatic Speech Recognition Texts MasashiYoshikawa HiroyukiShindo - YujiMatsumoto + YujiMatsumoto 1036–1041 D16-1109 10.18653/v1/D16-1109 @@ -1252,7 +1252,7 @@ Neural Headline Generation on <fixed-case>A</fixed-case>bstract <fixed-case>M</fixed-case>eaning <fixed-case>R</fixed-case>epresentation ShoTakase JunSuzuki - NaoakiOkazaki + NaoakiOkazaki TsutomuHirao MasaakiNagata 1054–1059 @@ -1294,10 +1294,10 @@ Semantic Parsing with Semi-Supervised Sequential Autoencoders TomášKočiský GáborMelis - EdwardGrefenstette - ChrisDyer + EdwardGrefenstette + ChrisDyer WangLing - PhilBlunsom + PhilBlunsom Karl MoritzHermann 1078–1087 D16-1116 @@ -1365,9 +1365,9 @@ Detecting and Characterizing Events AllisonChaney - HannaWallach + HannaWallach MatthewConnelly - DavidBlei + DavidBlei 1142–1152 D16-1122 10.18653/v1/D16-1122 @@ -1377,9 +1377,9 @@ Convolutional Neural Network Language Models - Ngoc-QuanPham - GermanKruszewski - GemmaBoleda + Ngoc-QuanPham + GermanKruszewski + GemmaBoleda 1153–1162 D16-1123 10.18653/v1/D16-1123 @@ -1389,7 +1389,7 @@ Generalizing and Hybridizing Count-based and Neural Language Models GrahamNeubig - ChrisDyer + ChrisDyer 1163–1172 D16-1124 10.18653/v1/D16-1124 @@ -1423,7 +1423,7 @@ JiweiLi WillMonroe AlanRitter - DanJurafsky + DanJurafsky MichelGalley JianfengGao 1192–1202 @@ -1465,7 +1465,7 @@ Antecedent Selection for Sluicing: Structure and Content - PranavAnand + PranavAnand DanielHardt 1234–1243 D16-1131 @@ -1477,7 +1477,7 @@ Intra-Sentential Subject Zero Anaphora Resolution using Multi-Column Convolutional Neural Network RyuIida KentaroTorisawa - Jong-HoonOh + Jong-HoonOh CanasaiKruengkrai JulienKloetzer 1244–1254 @@ -1490,7 +1490,7 @@ An Unsupervised Probability Model for Speech-to-Translation Alignment of Low-Resource Languages AntoniosAnastasopoulos DavidChiang - LongDuong + LongDuong 1255–1263 D16-1133 10.18653/v1/D16-1133 @@ -1501,7 +1501,7 @@ <fixed-case>HUME</fixed-case>: Human <fixed-case>UCCA</fixed-case>-Based Evaluation of Machine Translation AlexandraBirch OmriAbend - OndřejBojar + OndřejBojar BarryHaddow 1264–1274 D16-1134 @@ -1512,7 +1512,7 @@ Improving Multilingual Named Entity Recognition with <fixed-case>W</fixed-case>ikipedia Entity Type Mapping JianNi - RaduFlorian + RaduFlorian 1275–1284 D16-1135 10.18653/v1/D16-1135 @@ -1521,11 +1521,11 @@ Learning Crosslingual Word Embeddings without Bilingual Corpora - LongDuong + LongDuong HiroshiKanayama TengfeiMa StevenBird - TrevorCohn + TrevorCohn 1285–1295 D16-1136 10.18653/v1/D16-1136 @@ -1535,7 +1535,7 @@ Sequence-to-Sequence Learning as Beam-Search Optimization SamWiseman - Alexander M.Rush + Alexander M.Rush 1296–1306 D16-1137 10.18653/v1/D16-1137 @@ -1546,7 +1546,7 @@ Online Segment to Segment Neural Transduction LeiYu JanBuys - PhilBlunsom + PhilBlunsom 1307–1316 D16-1138 10.18653/v1/D16-1138 @@ -1556,7 +1556,7 @@ Sequence-Level Knowledge Distillation YoonKim - Alexander M.Rush + Alexander M.Rush 1317–1327 D16-1139 10.18653/v1/D16-1139 @@ -1569,7 +1569,7 @@ GrahamNeubig RyoheiSasano HiroyaTakamura - ManabuOkumura + ManabuOkumura 1328–1338 D16-1140 10.18653/v1/D16-1140 @@ -1637,7 +1637,7 @@ Lifted Rule Injection for Relation Embeddings ThomasDemeester - TimRocktäschel + TimRocktäschel SebastianRiedel 1389–1399 D16-1146 @@ -1662,9 +1662,9 @@ Analyzing Framing through the Casts of Characters in the News DallasCard - JustinGross - AmberBoydstun - Noah A.Smith + JustinGross + AmberBoydstun + Noah A.Smith 1410–1420 D16-1148 10.18653/v1/D16-1148 @@ -1673,7 +1673,7 @@ The Teams Corpus and Entrainment in Multi-Party Spoken Dialogues - DianeLitman + DianeLitman SusannahPaletz ZahraRahimi StefaniAllegretti @@ -1720,9 +1720,9 @@ Phonologically Aware Neural Model for Named Entity Recognition in Low Resource Transfer Settings AkashBharadwaj - DavidMortensen - ChrisDyer - JaimeCarbonell + DavidMortensen + ChrisDyer + JaimeCarbonell 1462–1472 D16-1153 10.18653/v1/D16-1153 @@ -1743,11 +1743,11 @@ Jointly Learning Grounded Task Structures from Language Instruction and Visual Demonstration ChangsongLiu ShaohuaYang - SariSaba-Sadiya + SariSaba-Sadiya NishantShukla YunzhongHe - Song-ChunZhu - JoyceChai + Song-ChunZhu + JoyceChai 1482–1492 D16-1155 10.18653/v1/D16-1155 @@ -1802,7 +1802,7 @@ Exploiting Source-side Monolingual Data in Neural Machine Translation JiajunZhang - ChengqingZong + ChengqingZong 1535–1545 D16-1160 10.18653/v1/D16-1160 @@ -1840,7 +1840,7 @@ <fixed-case>M</fixed-case>ix<fixed-case>KM</fixed-case>eans: Clustering Question-Answer Archives - DeepakP + DeepakP 1576–1585 D16-1164 10.18653/v1/D16-1164 @@ -1848,9 +1848,9 @@ It Takes Three to Tango: Triangulation Approach to Answer Ranking in Community Question Answering - PreslavNakov - LluísMàrquez - FranciscoGuzmán + PreslavNakov + LluísMàrquez + FranciscoGuzmán 1586–1597 D16-1165 10.18653/v1/D16-1165 @@ -1877,9 +1877,9 @@ A Theme-Rewriting Approach for Generating Algebra Word Problems - RikKoncel-Kedziorski + RikKoncel-Kedziorski IoannisKonstas - LukeZettlemoyer + LukeZettlemoyer HannanehHajishirzi 1617–1628 D16-1168 @@ -1901,7 +1901,7 @@ LinGui DongyinWu RuifengXu - QinLu + QinLu YuZhou 1639–1649 D16-1170 @@ -1925,7 +1925,7 @@ JiachengXu DanluChen XipengQiu - XuanjingHuang + XuanjingHuang 1660–1669 D16-1172 10.18653/v1/D16-1172 @@ -1936,7 +1936,7 @@ ZhitingHu ZichaoYang RuslanSalakhutdinov - EricXing + EricXing 1670–1679 D16-1173 10.18653/v1/D16-1173 @@ -1957,7 +1957,7 @@ ThomasKober JulieWeeds JeremyReffin - DavidWeir + DavidWeir 1691–1702 D16-1175 10.18653/v1/D16-1175 @@ -1969,7 +1969,7 @@ XipengQiu YaqianZhou JifanChen - XuanjingHuang + XuanjingHuang 1703–1712 D16-1176 10.18653/v1/D16-1176 @@ -1993,8 +1993,8 @@ <fixed-case>F</fixed-case>riends with Motives: Using Text to Infer Influence on <fixed-case>SCOTUS</fixed-case> YanchuanSim - BryanRoutledge - Noah A.Smith + BryanRoutledge + Noah A.Smith 1724–1733 D16-1178 10.18653/v1/D16-1178 @@ -2004,7 +2004,7 @@ Verb Phrase Ellipsis Resolution Using Discriminative and Margin-Infused Algorithms KianKenyon-Dean - Jackie Chi KitCheung + Jackie Chi KitCheung DoinaPrecup 1734–1743 D16-1179 @@ -2016,8 +2016,8 @@ AdhigunaKuncoro MiguelBallesteros LingpengKong - ChrisDyer - Noah A.Smith + ChrisDyer + Noah A.Smith 1744–1753 D16-1180 10.18653/v1/D16-1180 @@ -2044,7 +2044,7 @@ Neural Shift-Reduce <fixed-case>CCG</fixed-case> Semantic Parsing - Dipendra KumarMisra + Dipendra KumarMisra YoavArtzi 1775–1786 D16-1183 @@ -2108,7 +2108,7 @@ Deep Reinforcement Learning with a Combinatorial Action Space for Predicting Popular <fixed-case>R</fixed-case>eddit Threads JiHe - MariOstendorf + MariOstendorf XiaodongHe JianshuChen JianfengGao @@ -2121,7 +2121,7 @@ Non-Literal Text Reuse in Historical Texts: An Approach to Identify Reuse Transformations and its Application to <fixed-case>B</fixed-case>ible Reuse - MariaMoritz + MariaMoritz AndreasWiederhold BarbaraPavlek YuriBizzoni @@ -2165,7 +2165,7 @@ Non-uniform Language Detection in Technical Writing WeiboWang AbidalrahmanMoh’d - AminulIslam + AminulIslam AxelSoto EvangelosMilios 1892–1900 @@ -2186,7 +2186,7 @@ Orthographic Syllable as basic unit for <fixed-case>SMT</fixed-case> between Related Languages AnoopKunchukuttan - PushpakBhattacharyya + PushpakBhattacharyya 1912–1917 D16-1196 10.18653/v1/D16-1196 @@ -2239,7 +2239,7 @@ Combining Supervised and Unsupervised Enembles for Knowledge Base Population Nazneen FatemaRajani - RaymondMooney + RaymondMooney 1943–1948 D16-1201 10.18653/v1/D16-1201 @@ -2248,9 +2248,9 @@ Character Sequence Models for Colorful Words KazuyaKawakami - ChrisDyer - BryanRoutledge - Noah A.Smith + ChrisDyer + BryanRoutledge + Noah A.Smith 1949–1954 D16-1202 10.18653/v1/D16-1202 @@ -2271,7 +2271,7 @@ Improving <fixed-case>LSTM</fixed-case>-based Video Description with Linguistic Knowledge Mined from Text SubhashiniVenugopalan Lisa AnneHendricks - RaymondMooney + RaymondMooney KateSaenko 1961–1966 D16-1204 @@ -2294,7 +2294,7 @@ Speed-Accuracy Tradeoffs in Tagging with Variable-Order <fixed-case>CRF</fixed-case>s and Structured Sparsity TimVieira RyanCotterell - JasonEisner + JasonEisner 1973–1978 D16-1206 10.18653/v1/D16-1206 @@ -2303,8 +2303,8 @@ Learning Robust Representations of Text YitongLi - TrevorCohn - TimothyBaldwin + TrevorCohn + TimothyBaldwin 1979–1985 D16-1207 10.18653/v1/D16-1207 @@ -2332,8 +2332,8 @@ HidetakaKamigaito AkihiroTamura HiroyaTakamura - ManabuOkumura - EiichiroSumita + ManabuOkumura + EiichiroSumita 1998–2004 D16-1210 10.18653/v1/D16-1210 @@ -2343,8 +2343,8 @@ Training with Exploration Improves a Greedy Stack <fixed-case>LSTM</fixed-case> Parser MiguelBallesteros YoavGoldberg - ChrisDyer - Noah A.Smith + ChrisDyer + Noah A.Smith 2005–2010 D16-1211 10.18653/v1/D16-1211 @@ -2354,7 +2354,7 @@ Capturing Argument Relationship for <fixed-case>C</fixed-case>hinese Semantic Role Labeling LeiSha SujianLi - BaobaoChang + BaobaoChang ZhifangSui TingsongJiang 2011–2016 @@ -2378,7 +2378,7 @@ SivaReddy JohnBlitzer JuliaHockenmaier - MarkSteedman + MarkSteedman 2022–2027 D16-1214 10.18653/v1/D16-1214 @@ -2408,10 +2408,10 @@ SalvatoreGiorgi RishiSolanki JohannesEichstaedt - H. AndrewSchwartz + H. AndrewSchwartz MuhammadAbdul-Mageed AnnekeBuffone - LyleUngar + LyleUngar 2042–2047 D16-1217 10.18653/v1/D16-1217 @@ -2429,9 +2429,9 @@ Using Syntactic and Semantic Context to Explore Psychodemographic Differences in Self-reference MasoudRouhizadeh - LyleUngar + LyleUngar AnnekeBuffone - H AndrewSchwartz + H AndrewSchwartz 2054–2059 D16-1219 10.18653/v1/D16-1219 @@ -2439,7 +2439,7 @@ Learning to Identify Metaphors from a Corpus of Proverbs - GözdeÖzbal + GözdeÖzbal CarloStrapparava Serra SinemTekiroğlu DanielePighin @@ -2452,7 +2452,7 @@ An Embedding Model for Predicting Roll-Call Votes PeterKraft HirshJain - Alexander M.Rush + Alexander M.Rush 2066–2070 D16-1221 10.18653/v1/D16-1221 @@ -2462,7 +2462,7 @@ Natural Language Model Re-usability for Scaling to Different Domains Young-BumKim AlexandreRochette - RuhiSarikaya + RuhiSarikaya 2071–2076 D16-1222 10.18653/v1/D16-1222 @@ -2528,7 +2528,7 @@ There’s No Comparison: Reference-less Evaluation Metrics in Grammatical Error Correction CourtneyNapoles KeisukeSakaguchi - JoelTetreault + JoelTetreault 2109–2115 D16-1228 10.18653/v1/D16-1228 @@ -2538,7 +2538,7 @@ Cultural Shift or Linguistic Drift? Comparing Two Computational Measures of Semantic Change William L.Hamilton JureLeskovec - DanJurafsky + DanJurafsky 2116–2121 D16-1229 10.18653/v1/D16-1229 @@ -2548,7 +2548,7 @@ How <fixed-case>NOT</fixed-case> To Evaluate Your Dialogue System: An Empirical Study of Unsupervised Evaluation Metrics for Dialogue Response Generation Chia-WeiLiu RyanLowe - IulianSerban + IulianSerban MikeNoseworthy LaurentCharlin JoellePineau @@ -2584,13 +2584,13 @@ Conditional Generation and Snapshot Learning in Neural Dialogue Systems Tsung-HsienWen - MilicaGašić + MilicaGašić NikolaMrkšić - Lina M.Rojas-Barahona + Lina M.Rojas-Barahona Pei-HaoSu StefanUltes DavidVandyke - SteveYoung + SteveYoung 2153–2162 D16-1233 10.18653/v1/D16-1233 @@ -2680,7 +2680,7 @@ Who did What: A Large-Scale Person-Centered Cloze Dataset - TakeshiOnishi + TakeshiOnishi HaiWang MohitBansal KevinGimpel @@ -2708,7 +2708,7 @@ Learning to Generate Compositional Color Descriptions WillMonroe - Noah D.Goodman + Noah D.Goodman ChristopherPotts 2243–2248 D16-1243 @@ -2718,7 +2718,7 @@ A Decomposable Attention Model for Natural Language Inference - AnkurParikh + AnkurParikh OscarTäckström DipanjanDas JakobUszkoreit @@ -2731,7 +2731,7 @@ Deep Reinforcement Learning for Mention-Ranking Coreference Models KevinClark - Christopher D.Manning + Christopher D.Manning 2256–2262 D16-1245 10.18653/v1/D16-1245 @@ -2776,7 +2776,7 @@ Supervised Attentions for Neural Machine Translation HaitaoMi ZhiguoWang - AbeIttycheriah + AbeIttycheriah 2283–2288 D16-1249 10.18653/v1/D16-1249 @@ -2786,8 +2786,8 @@ Learning principled bilingual mappings of word embeddings while preserving monolingual invariance MikelArtetxe - GorkaLabaka - EnekoAgirre + GorkaLabaka + EnekoAgirre 2289–2294 D16-1250 10.18653/v1/D16-1250 @@ -2831,7 +2831,7 @@ Transition-Based Dependency Parsing with Heuristic Backtracking JacobBuckman MiguelBallesteros - ChrisDyer + ChrisDyer 2313–2318 D16-1254 10.18653/v1/D16-1254 @@ -2841,8 +2841,8 @@ Word Ordering Without Syntax AllenSchmaltz - Alexander M.Rush - StuartShieber + Alexander M.Rush + StuartShieber 2319–2324 D16-1255 10.18653/v1/D16-1255 @@ -2853,7 +2853,7 @@ Morphological Segmentation Inside-Out RyanCotterell ArunKumar - HinrichSchütze + HinrichSchütze 2325–2330 D16-1256 10.18653/v1/D16-1256 @@ -2877,7 +2877,7 @@ LuhengHe JulianMichael MikeLewis - LukeZettlemoyer + LukeZettlemoyer 2337–2342 D16-1258 10.18653/v1/D16-1258 @@ -2901,7 +2901,7 @@ TaoGe LeiSha SujianLi - BaobaoChang + BaobaoChang ZhifangSui 2350–2354 D16-1260 @@ -2925,7 +2925,7 @@ Global Neural <fixed-case>CCG</fixed-case> Parsing with Optimality Guarantees KentonLee MikeLewis - LukeZettlemoyer + LukeZettlemoyer 2366–2376 D16-1262 10.18653/v1/D16-1262 @@ -2936,7 +2936,7 @@ Learning a Lexicon and Translation Model from Phoneme Lattices OliverAdams GrahamNeubig - TrevorCohn + TrevorCohn StevenBird Quoc TruongDo SatoshiNakamura @@ -2972,7 +2972,7 @@ Practical Neural Networks for <fixed-case>NLP</fixed-case>: From Theory to Code - ChrisDyer + ChrisDyer YoavGoldberg GrahamNeubig This tutorial aims to bring NLP researchers up to speed with the current techniques in deep learning and neural networks, and show them how they can turn their ideas into practical implementations. We will start with simple classification models (logistic regression and multilayer perceptrons) and cover more advanced patterns that come up in NLP such as recurrent networks for sequence tagging and prediction problems, structured networks (e.g., compositional architectures based on syntax trees), structured output spaces (sequences and trees), attention for sequence-to-sequence transduction, and feature induction for complex algorithm states. A particular emphasis will be on learning to represent complex objects as recursive compositions of simpler objects. This representation will reflect characterize standard objects in NLP, such as the composition of characters and morphemes into words, and words into sentences and documents. In addition, new opportunities such as learning to embed "algorithm states" such as those used in transition-based parsing and other sequential structured prediction models (for which effective features may be difficult to engineer by hand) will be covered. @@ -3014,7 +3014,7 @@ The content of the tutorial is divided into 3 sections of 1 hour each. We assume Continuous Vector Spaces for Cross-language <fixed-case>NLP</fixed-case> Applications - Rafael E.Banchs + Rafael E.Banchs The mathematical metaphor offered by the geometric concept of distance in vector spaces with respect to semantics and meaning has been proven to be useful in many monolingual natural language processing applications. There is also some recent and strong evidence that this paradigm can also be useful in the cross-language setting. In this tutorial, we present and discuss some of the most recent advances on exploiting the vector space model paradigm in specific cross-language natural language processing applications, along with a comprehensive review of the theoretical background behind them. First, the tutorial introduces some fundamental concepts of distributional semantics and vector space models. More specifically, the concepts of distributional hypothesis and term-document matrices are revised, followed by a brief discussion on linear and non-linear dimensionality reduction techniques and their implications to the parallel distributed approach to semantic cognition. Next, some classical examples of using vector space models in monolingual natural language processing applications are presented. Specific examples in the areas of information retrieval, related term identification and semantic compositionality are described. diff --git a/data/xml/D17.xml b/data/xml/D17.xml index b7a427eda4..7182f5e617 100644 --- a/data/xml/D17.xml +++ b/data/xml/D17.xml @@ -4,7 +4,7 @@ Proceedings of the 2017 Conference on Empirical Methods in Natural Language Processing D17-1 - MarthaPalmer + MarthaPalmer RebeccaHwa SebastianRiedel 10.18653/v1/D17-1 @@ -21,7 +21,7 @@ Monolingual Phrase Alignment on Parse Forests YukiArase - JunichiTsujii + JunichiTsujii 1–11 D17-1001 10.18653/v1/D17-1001 @@ -46,7 +46,7 @@ Quasi-Second-Order Parsing for 1-Endpoint-Crossing, Pagenumber-2 Graphs JunjieCao ShengHuang - WeiweiSun + WeiweiSun XiaojunWan 24–34 D17-1003 @@ -62,7 +62,7 @@ VictorZhong DanqiChen GaborAngeli - Christopher D.Manning + Christopher D.Manning 35–45 D17-1004 10.18653/v1/D17-1004 @@ -91,7 +91,7 @@ Integrating Order Information and Event Relation for Script Event Prediction ZhongqingWang YueZhang - Ching-YunChang + Ching-YunChang 57–67 D17-1006 10.18653/v1/D17-1006 @@ -130,7 +130,7 @@ SivaReddy OscarTäckström SlavPetrov - MarkSteedman + MarkSteedman MirellaLapata 89–101 D17-1009 @@ -156,7 +156,7 @@ Past, Present, Future: A Computational Investigation of the Typology of Tense in 1000 Languages EhsaneddinAsgari - HinrichSchütze + HinrichSchütze 113–124 D17-1011 10.18653/v1/D17-1011 @@ -181,8 +181,8 @@ RongxiangWeng ShujianHuang ZaixiangZheng - XinyuDai - JiajunChen + XinyuDai + JiajunChen 136–145 D17-1013 10.18653/v1/D17-1013 @@ -192,8 +192,8 @@ Towards Decoding as Continuous Optimisation in Neural Machine Translation Cong Duy VuHoang - GholamrezaHaffari - TrevorCohn + GholamrezaHaffari + TrevorCohn 146–156 D17-1014 10.18653/v1/D17-1014 @@ -216,8 +216,8 @@ Continuous Representation of Location for Geolocation and Lexical Dialectology using Mixture Density Networks AfshinRahimi - TimothyBaldwin - TrevorCohn + TimothyBaldwin + TrevorCohn 167–176 D17-1016 10.18653/v1/D17-1016 @@ -241,7 +241,7 @@ KentonLee LuhengHe MikeLewis - LukeZettlemoyer + LukeZettlemoyer 188–197 D17-1018 10.18653/v1/D17-1018 @@ -252,7 +252,7 @@ Neural Net Models of Open-domain Discourse Coherence JiweiLi - DanJurafsky + DanJurafsky 198–209 D17-1019 10.18653/v1/D17-1019 @@ -265,7 +265,7 @@ KexiangWang TianyuLiu ZhifangSui - BaobaoChang + BaobaoChang 210–220 D17-1020 10.18653/v1/D17-1020 @@ -290,7 +290,7 @@ Hierarchical Embeddings for Hypernymy Detection and Directionality Kim AnhNguyen MaximilianKöper - SabineSchulte im Walde + SabineSchulte im Walde Ngoc ThangVu 233–243 D17-1022 @@ -364,7 +364,7 @@ AvijitVajpayee ArjitSrivastava Madan GopalJhanwar - ManishShrivastava + ManishShrivastava 292–297 D17-1028 10.18653/v1/D17-1028 @@ -377,7 +377,7 @@ Exploiting Word Internal Structures for Generic <fixed-case>C</fixed-case>hinese Sentence Representation ShaonanWang JiajunZhang - ChengqingZong + ChengqingZong 298–303 D17-1029 10.18653/v1/D17-1029 @@ -386,8 +386,8 @@ High-risk learning: acquiring new word vectors from tiny data - AurélieHerbelot - MarcoBaroni + AurélieHerbelot + MarcoBaroni 304–309 D17-1030 10.18653/v1/D17-1030 @@ -398,7 +398,7 @@ Word Embeddings based on Fixed-Size Ordinally Forgetting Encoding JosephSanu - MingbinXu + MingbinXu HuiJiang QuanLiu 310–315 @@ -452,7 +452,7 @@ Learning What’s Easy: Fully Differentiable Neural Easy-First Taggers - André F. T.Martins + André F. T.Martins JuliaKreutzer 349–362 D17-1036 @@ -476,7 +476,7 @@ Learning to select data for transfer learning with <fixed-case>B</fixed-case>ayesian Optimization SebastianRuder - BarbaraPlank + BarbaraPlank 372–382 D17-1038 10.18653/v1/D17-1038 @@ -486,8 +486,8 @@ Unsupervised Pretraining for Sequence to Sequence Learning PrajitRamachandran - PeterLiu - QuocLe + PeterLiu + QuocLe 383–391 D17-1039 10.18653/v1/D17-1039 @@ -499,7 +499,7 @@ Efficient Attention using a Fixed-Size Memory Representation DennyBritz MelodyGuan - Minh-ThangLuong + Minh-ThangLuong 392–400 D17-1040 10.18653/v1/D17-1040 @@ -510,7 +510,7 @@ Rotated Word Vector Representations and their Interpretability SungjoonPark JinYeongBak - AliceOh + AliceOh 401–411 D17-1041 10.18653/v1/D17-1041 @@ -530,7 +530,7 @@ Piecewise Latent Variables for Neural Variational Text Processing - Iulian VladSerban + Iulian VladSerban Alexander G.Ororbia JoellePineau AaronCourville @@ -589,7 +589,7 @@ A Cognition Based Attention Model for Sentiment Analysis YunfeiLong - QinLu + QinLu RongXiang MingleiLi Chu-RenHuang @@ -623,7 +623,7 @@ Identifying Humor in Reviews using Background Text Sources AlexMorales - ChengxiangZhai + ChengxiangZhai 492–501 D17-1051 10.18653/v1/D17-1051 @@ -675,7 +675,7 @@ Refining Word Embeddings for Sentiment Analysis - Liang-ChihYu + Liang-ChihYu JinWang K. RobertLai XuejieZhang @@ -691,7 +691,7 @@ AbhishekKumar DeepanwayGhosal AsifEkbal - PushpakBhattacharyya + PushpakBhattacharyya 540–546 D17-1057 10.18653/v1/D17-1057 @@ -703,7 +703,7 @@ RakshaSharma ArpanSomani LakshyaKumar - PushpakBhattacharyya + PushpakBhattacharyya 547–552 D17-1058 10.18653/v1/D17-1058 @@ -713,7 +713,7 @@ Sentiment Lexicon Expansion Based on Neural <fixed-case>PU</fixed-case> Learning, Double Dictionary Lookup, and Polarity Association YashengWang - YangZhang + YangZhang BingLiu 553–563 D17-1059 @@ -764,7 +764,7 @@ Learning how to Active Learn: A Deep Reinforcement Learning Approach MengFang YuanLi - TrevorCohn + TrevorCohn 595–605 D17-1063 10.18653/v1/D17-1063 @@ -776,7 +776,7 @@ Split and Rephrase ShashiNarayan ClaireGardent - Shay B.Cohen + Shay B.Cohen AnastasiaShimorina 606–616 D17-1064 @@ -788,9 +788,9 @@ Neural Response Generation via <fixed-case>GAN</fixed-case> with an Approximate Embedding Layer ZhenXu - BingquanLiu + BingquanLiu BaoxunWang - ChengjieSun + ChengjieSun XiaolongWang ZhuoranWang ChaoQi @@ -962,8 +962,8 @@ ZhentingYu YueZhang ShujianHuang - XinyuDai - JiajunChen + XinyuDai + JiajunChen 760–766 D17-1079 10.18653/v1/D17-1079 @@ -983,7 +983,7 @@ From Textbooks to Knowledge: A Case Study in Harvesting Axiomatic Knowledge from Textbooks to Solve Geometry Problems MrinmayaSachan KumarDubey - EricXing + EricXing 773–784 D17-1081 10.18653/v1/D17-1081 @@ -996,7 +996,7 @@ QizheXie HanxiaoLiu YimingYang - EduardHovy + EduardHovy 785–794 D17-1082 10.18653/v1/D17-1082 @@ -1024,7 +1024,7 @@ and the code is available at https://github.com/qizhex/RACE_AR_baselinesLearning Fine-Grained Expressions to Solve Math Word Problems DanqingHuang ShumingShi - Chin-YewLin + Chin-YewLin JianYin 805–814 D17-1084 @@ -1038,7 +1038,7 @@ and the code is available at https://github.com/qizhex/RACE_AR_baselinesJunjieHu WeiWei ZiYang - EricNyberg + EricNyberg 815–824 D17-1085 10.18653/v1/D17-1085 @@ -1050,7 +1050,7 @@ and the code is available at https://github.com/qizhex/RACE_AR_baselinesTengLong EmmanuelBengio RyanLowe - Jackie Chi KitCheung + Jackie Chi KitCheung DoinaPrecup 825–834 D17-1086 @@ -1083,7 +1083,7 @@ and the code is available at https://github.com/qizhex/RACE_AR_baselines Latent Space Embedding for Retrieval in Question-Answer Archives - DeepakP + DeepakP DineshGarg ShirishShevade 855–865 @@ -1210,7 +1210,7 @@ and the code is available at https://github.com/qizhex/RACE_AR_baselines Deriving continous grounded meaning representations from referentially structured multimodal contexts - SinaZarrieß + SinaZarrieß DavidSchlangen 959–965 D17-1100 @@ -1222,7 +1222,7 @@ and the code is available at https://github.com/qizhex/RACE_AR_baselinesHierarchically-Attentive RNN for Album Summarization and Storytelling LichengYu MohitBansal - TamaraBerg + TamaraBerg 966–971 D17-1101 10.18653/v1/D17-1101 @@ -1235,7 +1235,7 @@ and the code is available at https://github.com/qizhex/RACE_AR_baselinesCheng-YangFu JoonLee MohitBansal - AlexanderBerg + AlexanderBerg 972–978 D17-1102 10.18653/v1/D17-1102 @@ -1258,7 +1258,7 @@ and the code is available at https://github.com/qizhex/RACE_AR_baselinesEvaluating Hierarchies of Verb Argument Structure with Hierarchical Clustering JesseMu Joshua K.Hartshorne - TimothyO’Donnell + TimothyO’Donnell 986–991 D17-1104 10.18653/v1/D17-1104 @@ -1278,7 +1278,7 @@ and the code is available at https://github.com/qizhex/RACE_AR_baselines Mapping Instructions and Visual Observations to Actions with Reinforcement Learning - DipendraMisra + DipendraMisra JohnLangford YoavArtzi 1004–1015 @@ -1290,7 +1290,7 @@ and the code is available at https://github.com/qizhex/RACE_AR_baselines An analysis of eye-movements during reading for the detection of mild cognitive impairment - Kathleen C.Fraser + Kathleen C.Fraser KristinaLundholm Fors DimitriosKokkinakis ArtoNordlund @@ -1317,7 +1317,7 @@ and the code is available at https://github.com/qizhex/RACE_AR_baselinesArunChaganty AshwinParanjape PercyLiang - Christopher D.Manning + Christopher D.Manning 1038–1048 D17-1109 10.18653/v1/D17-1109 @@ -1341,7 +1341,7 @@ and the code is available at https://github.com/qizhex/RACE_AR_baselines Globally Normalized Reader JonathanRaiman - JohnMiller + JohnMiller 1059–1069 D17-1111 10.18653/v1/D17-1111 @@ -1379,7 +1379,7 @@ and the code is available at https://github.com/qizhex/RACE_AR_baselinesJunnanZhu CongMa JiajunZhang - ChengqingZong + ChengqingZong 1092–1102 D17-1114 10.18653/v1/D17-1114 @@ -1446,7 +1446,7 @@ and the code is available at https://github.com/qizhex/RACE_AR_baselinesYoungseoSon VivekKulkarni NiranjanBalasubramanian - H. AndrewSchwartz + H. AndrewSchwartz 1146–1155 D17-1119 10.18653/v1/D17-1119 @@ -1481,7 +1481,7 @@ and the code is available at https://github.com/qizhex/RACE_AR_baselinesInter-Weighted Alignment Network for Sentence Pair Modeling GehuiShen YunlunYang - Zhi-HongDeng + Zhi-HongDeng 1179–1189 D17-1122 10.18653/v1/D17-1122 @@ -1504,7 +1504,7 @@ and the code is available at https://github.com/qizhex/RACE_AR_baselinesPengfeiLiu KaiyuQian XipengQiu - XuanjingHuang + XuanjingHuang 1204–1213 D17-1124 10.18653/v1/D17-1124 @@ -1548,7 +1548,7 @@ and the code is available at https://github.com/qizhex/RACE_AR_baselines A Joint Sequential and Relational Model for Frame-Semantic Parsing BishanYang - TomMitchell + TomMitchell 1247–1256 D17-1128 10.18653/v1/D17-1128 @@ -1568,7 +1568,7 @@ and the code is available at https://github.com/qizhex/RACE_AR_baselines <fixed-case>AMR</fixed-case> Parsing using Stack-<fixed-case>LSTM</fixed-case>s MiguelBallesteros - YaserAl-Onaizan + YaserAl-Onaizan 1269–1275 D17-1130 10.18653/v1/D17-1130 @@ -1609,10 +1609,10 @@ and the code is available at https://github.com/qizhex/RACE_AR_baselines Multi-task Attention-based Neural Networks for Implicit Discourse Relationship Representation and Identification - ManLan - JianxiangWang + ManLan + JianxiangWang YuanbinWu - Zheng-YuNiu + Zheng-YuNiu HaifengWang 1299–1308 D17-1134 @@ -1624,7 +1624,7 @@ and the code is available at https://github.com/qizhex/RACE_AR_baselinesChinese Zero Pronoun Resolution with Deep Memory Network QingyuYin YuZhang - WeinanZhang + WeinanZhang TingLiu 1309–1318 D17-1135 @@ -1636,7 +1636,7 @@ and the code is available at https://github.com/qizhex/RACE_AR_baselinesHow much progress have we made on RST discourse parsing? A replication study of recent results on the RST-DT MathieuMorey PhilippeMuller - NicholasAsher + NicholasAsher 1319–1324 D17-1136 10.18653/v1/D17-1136 @@ -1657,7 +1657,7 @@ and the code is available at https://github.com/qizhex/RACE_AR_baselines Revisiting Selectional Preferences for Coreference Resolution BenjaminHeinzerling - Nafise SadatMoosavi + Nafise SadatMoosavi MichaelStrube 1332–1339 D17-1138 @@ -1669,7 +1669,7 @@ and the code is available at https://github.com/qizhex/RACE_AR_baselinesLearning to Rank Semantic Coherence for Topic Segmentation LiangWang SujianLi - YajuanLv + YajuanLv HoufengWang 1340–1344 D17-1139 @@ -1692,7 +1692,7 @@ and the code is available at https://github.com/qizhex/RACE_AR_baselines Patterns of Argumentation Strategies across Topics - KhalidAl-Khatib + KhalidAl-Khatib HenningWachsmuth MatthiasHagen BennoStein @@ -1744,7 +1744,7 @@ and the code is available at https://github.com/qizhex/RACE_AR_baselinesMatthiasSperber GrahamNeubig JanNiehues - AlexWaibel + AlexWaibel 1380–1389 D17-1145 10.18653/v1/D17-1145 @@ -1792,7 +1792,7 @@ and the code is available at https://github.com/qizhex/RACE_AR_baselinesTranslating Phrases in Neural Machine Translation XingWang ZhaopengTu - DeyiXiong + DeyiXiong MinZhang 1421–1431 D17-1149 @@ -1817,8 +1817,8 @@ and the code is available at https://github.com/qizhex/RACE_AR_baselinesMassive Exploration of Neural Machine Translation Architectures DennyBritz AnnaGoldie - Minh-ThangLuong - QuocLe + Minh-ThangLuong + QuocLe 1442–1451 D17-1151 10.18653/v1/D17-1151 @@ -1827,7 +1827,7 @@ and the code is available at https://github.com/qizhex/RACE_AR_baselines Learning Translations via Matrix Completion - Derry TantiWijaya + Derry TantiWijaya BrendanCallahan JohnHewitt JieGao @@ -1843,7 +1843,7 @@ and the code is available at https://github.com/qizhex/RACE_AR_baselines Reinforcement Learning for Bandit Neural Machine Translation with Simulated Human Feedback KhanhNguyen - HalDaumé III + HalDaumé III JordanBoyd-Graber 1464–1474 D17-1153 @@ -1871,7 +1871,7 @@ and the code is available at https://github.com/qizhex/RACE_AR_baselinesMasaoUtiyama LemaoLiu KehaiChen - EiichiroSumita + EiichiroSumita 1482–1488 D17-1155 10.18653/v1/D17-1155 @@ -1880,7 +1880,7 @@ and the code is available at https://github.com/qizhex/RACE_AR_baselines Regularization techniques for fine-tuning in neural machine translation - Antonio ValerioMiceli Barone + Antonio ValerioMiceli Barone BarryHaddow UlrichGermann RicoSennrich @@ -1893,7 +1893,7 @@ and the code is available at https://github.com/qizhex/RACE_AR_baselines Source-Side Left-to-Right or Target-Side Left-to-Right? An Empirical Comparison of Two Phrase-Based Decoding Algorithms Yin-WenChang - MichaelCollins + MichaelCollins 1495–1499 D17-1157 10.18653/v1/D17-1157 @@ -1938,7 +1938,7 @@ and the code is available at https://github.com/qizhex/RACE_AR_baselinesJoint Concept Learning and Semantic Parsing from Natural Language Explanations ShashankSrivastava IgorLabutov - TomMitchell + TomMitchell 1527–1536 D17-1161 10.18653/v1/D17-1161 @@ -2000,7 +2000,7 @@ and the code is available at https://github.com/qizhex/RACE_AR_baselines“i have a feeling trump will win..................”: Forecasting Winners and Losers from User Predictions on Twitter SandeshSwamy AlanRitter - Marie-Catherinede Marneffe + Marie-Catherinede Marneffe 1583–1592 D17-1166 10.18653/v1/D17-1166 @@ -2014,7 +2014,7 @@ and the code is available at https://github.com/qizhex/RACE_AR_baselinesJiannanHu YulanHe RuifengXu - QinLu + QinLu JiachenDu 1593–1602 D17-1167 @@ -2039,7 +2039,7 @@ and the code is available at https://github.com/qizhex/RACE_AR_baselinesUsing millions of emoji occurrences to learn any-domain representations for detecting sentiment, emotion and sarcasm BjarkeFelbo AlanMislove - AndersSøgaard + AndersSøgaard IyadRahwan SuneLehmann 1615–1625 @@ -2075,7 +2075,7 @@ and the code is available at https://github.com/qizhex/RACE_AR_baselines Efficient Discontinuous Phrase-Structure Parsing via the Generalized Maximum Spanning Arborescence CaioCorro - JosephLe Roux + JosephLe Roux MathieuLacroix 1644–1654 D17-1172 @@ -2166,8 +2166,8 @@ and the code is available at https://github.com/qizhex/RACE_AR_baselinesTAG Parsing with Neural Networks and Vector Representations of Supertags JungoKasai RobertFrank - R. ThomasMcCoy - OwenRambow + R. ThomasMcCoy + OwenRambow AlexisNasr 1712–1722 D17-1180 @@ -2178,7 +2178,7 @@ and the code is available at https://github.com/qizhex/RACE_AR_baselines Global Normalization of Convolutional Neural Networks for Joint Entity and Relation Classification HeikeAdel - HinrichSchütze + HinrichSchütze 1723–1729 D17-1181 10.18653/v1/D17-1181 @@ -2199,7 +2199,7 @@ and the code is available at https://github.com/qizhex/RACE_AR_baselines <fixed-case>KGE</fixed-case>val: Accuracy Estimation of Automatically Constructed Knowledge Graphs PrakharOjha - ParthaTalukdar + ParthaTalukdar 1741–1750 D17-1183 10.18653/v1/D17-1183 @@ -2221,7 +2221,7 @@ and the code is available at https://github.com/qizhex/RACE_AR_baselines Dual Tensor Model for Detecting Asymmetric Lexico-Semantic Relations GoranGlavaš - Simone PaoloPonzetto + Simone PaoloPonzetto 1757–1767 D17-1185 10.18653/v1/D17-1185 @@ -2266,7 +2266,7 @@ and the code is available at https://github.com/qizhex/RACE_AR_baselinesA Soft-label Method for Noise-tolerant Distantly Supervised Relation Extraction TianyuLiu KexiangWang - BaobaoChang + BaobaoChang ZhifangSui 1790–1795 D17-1189 @@ -2276,7 +2276,7 @@ and the code is available at https://github.com/qizhex/RACE_AR_baselines A Sequential Model for Classifying Temporal Relations between Intra-Sentence Events - Prafulla KumarChoubey + Prafulla KumarChoubey RuihongHuang 1796–1802 D17-1190 @@ -2321,7 +2321,7 @@ and the code is available at https://github.com/qizhex/RACE_AR_baselinesTemporal dynamics of semantic relations in word embeddings: an application to predicting armed conflict participants AndreyKutuzov ErikVelldal - LiljaØvrelid + LiljaØvrelid 1824–1829 D17-1194 10.18653/v1/D17-1194 @@ -2334,7 +2334,7 @@ and the code is available at https://github.com/qizhex/RACE_AR_baselinesChenhaoTan SebastianMartschat YejinChoi - Noah A.Smith + Noah A.Smith 1830–1839 D17-1195 10.18653/v1/D17-1195 @@ -2344,7 +2344,7 @@ and the code is available at https://github.com/qizhex/RACE_AR_baselines Towards Quantum Language Models IvanoBasile - FabioTamburini + FabioTamburini 1840–1849 D17-1196 10.18653/v1/D17-1196 @@ -2355,8 +2355,8 @@ and the code is available at https://github.com/qizhex/RACE_AR_baselines Reference-Aware Language Models ZichaoYang - PhilBlunsom - ChrisDyer + PhilBlunsom + ChrisDyer WangLing 1850–1859 D17-1197 @@ -2381,7 +2381,7 @@ and the code is available at https://github.com/qizhex/RACE_AR_baselinesZhenisbekAssylbekov RustemTakhanov BagdatMyrzakhmetov - Jonathan N.Washington + Jonathan N.Washington 1866–1872 D17-1199 10.18653/v1/D17-1199 @@ -2440,7 +2440,7 @@ and the code is available at https://github.com/qizhex/RACE_AR_baselines Finding Patterns in Noisy Crowds: Regression-based Annotation Aggregation for Crowdsourced Data NatalieParde - RodneyNielsen + RodneyNielsen 1907–1912 D17-1204 10.18653/v1/D17-1204 @@ -2504,7 +2504,7 @@ and the code is available at https://github.com/qizhex/RACE_AR_baselinesIvanTitov WilkerAziz DiegoMarcheggiani - KhalilSima’an + KhalilSima’an 1957–1967 D17-1209 10.18653/v1/D17-1209 @@ -2515,7 +2515,7 @@ and the code is available at https://github.com/qizhex/RACE_AR_baselinesTrainable Greedy Decoding for Neural Machine Translation JiataoGu KyunghyunCho - Victor O.K.Li + Victor O.K.Li 1968–1978 D17-1210 10.18653/v1/D17-1210 @@ -2552,7 +2552,7 @@ and the code is available at https://github.com/qizhex/RACE_AR_baselinesDiyiYang AaronHalfaker RobertKraut - EduardHovy + EduardHovy 2000–2010 D17-1213 10.18653/v1/D17-1213 @@ -2626,7 +2626,7 @@ and the code is available at https://github.com/qizhex/RACE_AR_baselines Identifying Where to Focus in Reading Comprehension for Neural Question Generation XinyaDu - ClaireCardie + ClaireCardie 2067–2073 D17-1219 10.18653/v1/D17-1219 @@ -2717,7 +2717,7 @@ and the code is available at https://github.com/qizhex/RACE_AR_baselines Event Coreference Resolution by Iteratively Unfolding Inter-dependencies among Events - Prafulla KumarChoubey + Prafulla KumarChoubey RuihongHuang 2124–2133 D17-1226 @@ -2741,7 +2741,7 @@ and the code is available at https://github.com/qizhex/RACE_AR_baselinesDiWang NebojsaJojic ChrisBrockett - EricNyberg + EricNyberg 2140–2150 D17-1228 10.18653/v1/D17-1228 @@ -2751,9 +2751,9 @@ and the code is available at https://github.com/qizhex/RACE_AR_baselines Preserving Distributional Information in Dialogue Act Classification - Quan HungTran + Quan HungTran IngridZukerman - GholamrezaHaffari + GholamrezaHaffari 2151–2156 D17-1229 10.18653/v1/D17-1229 @@ -2767,7 +2767,7 @@ and the code is available at https://github.com/qizhex/RACE_AR_baselinesTianlinShi SébastienJean AlanRitter - DanJurafsky + DanJurafsky 2157–2169 D17-1230 10.18653/v1/D17-1230 @@ -2791,7 +2791,7 @@ and the code is available at https://github.com/qizhex/RACE_AR_baselinesYohanJo MichaelYoder HyejuJang - CarolynRosé + CarolynRosé 2179–2189 D17-1232 10.18653/v1/D17-1232 @@ -2858,8 +2858,8 @@ and the code is available at https://github.com/qizhex/RACE_AR_baselinesLihongLi JianfengGao AsliCelikyilmaz - SungjinLee - Kam-FaiWong + SungjinLee + Kam-FaiWong 2231–2240 D17-1237 10.18653/v1/D17-1237 @@ -2883,8 +2883,8 @@ and the code is available at https://github.com/qizhex/RACE_AR_baselines Challenges in Data-to-Document Generation SamWiseman - StuartShieber - AlexanderRush + StuartShieber + AlexanderRush 2253–2263 D17-1239 10.18653/v1/D17-1239 @@ -2923,7 +2923,7 @@ and the code is available at https://github.com/qizhex/RACE_AR_baselinesDemographic-aware word associations AparnaGarimella CarmenBanea - RadaMihalcea + RadaMihalcea 2285–2295 D17-1242 10.18653/v1/D17-1242 @@ -2934,7 +2934,7 @@ and the code is available at https://github.com/qizhex/RACE_AR_baselinesA Factored Neural Network Model for Characterizing Online Discussions in Vector Space HaoCheng HaoFang - MariOstendorf + MariOstendorf 2296–2306 D17-1243 10.18653/v1/D17-1243 @@ -2967,7 +2967,7 @@ and the code is available at https://github.com/qizhex/RACE_AR_baselinesTatsuyaAoki RyoheiSasano HiroyaTakamura - ManabuOkumura + ManabuOkumura 2323–2328 D17-1246 10.18653/v1/D17-1246 @@ -2990,9 +2990,9 @@ and the code is available at https://github.com/qizhex/RACE_AR_baselines Controlling Human Perception of Basic User Traits - DanielPreoţiuc-Pietro - SharathChandra Guntuku - LyleUngar + DanielPreoţiuc-Pietro + SharathChandra Guntuku + LyleUngar 2335–2341 D17-1248 10.18653/v1/D17-1248 @@ -3005,7 +3005,7 @@ and the code is available at https://github.com/qizhex/RACE_AR_baselinesClémentGautrais PeggyCellier RenéQuiniou - AlexandreTermier + AlexandreTermier 2342–2347 D17-1249 10.18653/v1/D17-1249 @@ -3014,12 +3014,12 @@ and the code is available at https://github.com/qizhex/RACE_AR_baselines Assessing Objective Recommendation Quality through Political Forecasting - H. AndrewSchwartz + H. AndrewSchwartz MasoudRouhizadeh MichaelBishop PhilipTetlock BarbaraMellers - LyleUngar + LyleUngar 2348–2357 D17-1250 10.18653/v1/D17-1250 @@ -3041,7 +3041,7 @@ and the code is available at https://github.com/qizhex/RACE_AR_baselinesMaximum Margin Reward Networks for Learning from Explicit and Implicit Supervision HaoruoPeng Ming-WeiChang - Wen-tauYih + Wen-tauYih 2368–2378 D17-1252 10.18653/v1/D17-1252 @@ -3080,7 +3080,7 @@ and the code is available at https://github.com/qizhex/RACE_AR_baselines Repeat before Forgetting: Spaced Repetition for Efficient and Effective Training of Neural Networks HadiAmiri - TimothyMiller + TimothyMiller GuerganaSavova 2401–2410 D17-1255 @@ -3095,7 +3095,7 @@ and the code is available at https://github.com/qizhex/RACE_AR_baselinesQiZhang HaoranHuang MinlongPeng - XuanjingHuang + XuanjingHuang 2411–2420 D17-1256 10.18653/v1/D17-1256 @@ -3122,7 +3122,7 @@ and the code is available at https://github.com/qizhex/RACE_AR_baselinesDoes syntax help discourse segmentation? Not so much ChloéBraud OphélieLacroix - AndersSøgaard + AndersSøgaard 2432–2442 D17-1258 10.18653/v1/D17-1258 @@ -3176,7 +3176,7 @@ and efficiency of on-line policy optimization compared to other companion Further Investigation into Reference Bias in Monolingual Evaluation of Machine Translation QingsongMa YvetteGraham - TimothyBaldwin + TimothyBaldwin QunLiu 2476–2485 D17-1262 @@ -3198,7 +3198,7 @@ and efficiency of on-line policy optimization compared to other companion Knowledge Distillation for Bilingual Dictionary Induction - NdapandulaNakashole + NdapandulaNakashole RaphaelFlauger 2497–2506 D17-1264 @@ -3283,7 +3283,7 @@ and efficiency of on-line policy optimization compared to other companion Counterfactual Learning from Bandit Feedback under Deterministic Logging : A Case Study in Statistical Machine Translation CarolinLawrence - ArtemSokolov + ArtemSokolov StefanRiezler 2566–2576 D17-1272 @@ -3306,9 +3306,9 @@ and efficiency of on-line policy optimization compared to other companion Improving Slot Filling Performance with Attentive Neural Networks on Dependency Structures LifuHuang - AvirupSil + AvirupSil HengJi - RaduFlorian + RaduFlorian 2588–2597 D17-1274 10.18653/v1/D17-1274 @@ -3368,7 +3368,7 @@ and efficiency of on-line policy optimization compared to other companion Scientific Information Extraction with Semi-supervised Neural Tagging YiLuan - MariOstendorf + MariOstendorf HannanehHajishirzi 2641–2651 D17-1279 @@ -3393,7 +3393,7 @@ and efficiency of on-line policy optimization compared to other companion Speeding up Reinforcement Learning-based Information Extraction Training using Asynchronous Methods AdityaSharma ZaranaParekh - ParthaTalukdar + ParthaTalukdar 2658–2663 D17-1281 10.18653/v1/D17-1281 @@ -3407,7 +3407,7 @@ and efficiency of on-line policy optimization compared to other companion Ruo-PingDong Yu-SiangWang Ju-ChiehChou - Wei-YunMa + Wei-YunMa 2664–2669 D17-1282 10.18653/v1/D17-1282 @@ -3444,7 +3444,7 @@ and efficiency of on-line policy optimization compared to other companion KrisGanjam NavenduJain JessicaLundin - RyenWhite + RyenWhite JimmyLin 2691–2701 D17-1285 @@ -3454,7 +3454,7 @@ and efficiency of on-line policy optimization compared to other companion Word Etymology as Native Language Interference - ViviNastase + ViviNastase CarloStrapparava 2702–2707 D17-1286 @@ -3465,7 +3465,7 @@ and efficiency of on-line policy optimization compared to other companion A Simpler and More Generalizable Story Detector using Verb and Character Features JoshuaEisenberg - MarkFinlayson + MarkFinlayson 2708–2715 D17-1287 10.18653/v1/D17-1287 @@ -3525,7 +3525,7 @@ and efficiency of on-line policy optimization compared to other companion VarunGangal AngLu ZhengChen - EduardHovy + EduardHovy 2758–2767 D17-1292 10.18653/v1/D17-1292 @@ -3547,7 +3547,7 @@ and efficiency of on-line policy optimization compared to other companion Identifying the Provision of Choices in Privacy Policy Text - KanthashreeMysore Sathyendra + KanthashreeMysore Sathyendra ShomirWilson FlorianSchaub SebastianZimmeck @@ -3588,7 +3588,7 @@ and efficiency of on-line policy optimization compared to other companion Neural Sequence-Labelling Models for Grammatical Error Correction HelenYannakoudakis MarekRei - Øistein E.Andersen + Øistein E.Andersen ZhengYuan 2795–2806 D17-1297 @@ -3600,8 +3600,8 @@ and efficiency of on-line policy optimization compared to other companion Adapting Sequence Models for Sentence Correction AllenSchmaltz YoonKim - AlexanderRush - StuartShieber + AlexanderRush + StuartShieber 2807–2813 D17-1298 10.18653/v1/D17-1298 @@ -3612,7 +3612,7 @@ and efficiency of on-line policy optimization compared to other companion A Study of Style in Machine Translation: Controlling the Formality of Machine Translation Output XingNiu - MariannaMartindale + MariannaMartindale MarineCarpuat 2814–2819 D17-1299 @@ -3649,8 +3649,8 @@ and efficiency of on-line policy optimization compared to other companion Cross-Lingual Transfer Learning for <fixed-case>POS</fixed-case> Tagging without Cross-Lingual Resources Joo-KyungKim Young-BumKim - RuhiSarikaya - EricFosler-Lussier + RuhiSarikaya + EricFosler-Lussier 2832–2838 D17-1302 10.18653/v1/D17-1302 @@ -3678,8 +3678,8 @@ and efficiency of on-line policy optimization compared to other companion MasaoUtiyama LemaoLiu AkihiroTamura - EiichiroSumita - TiejunZhao + EiichiroSumita + TiejunZhao 2846–2852 D17-1304 10.18653/v1/D17-1304 @@ -3702,8 +3702,8 @@ and efficiency of on-line policy optimization compared to other companion Sequence Effects in Crowdsourced Annotations NitikaMathur - TimothyBaldwin - TrevorCohn + TimothyBaldwin + TrevorCohn 2860–2865 D17-1306 10.18653/v1/D17-1306 @@ -3713,7 +3713,7 @@ and efficiency of on-line policy optimization compared to other companion No Need to Pay Attention: Simple Recurrent Neural Networks Work! - FerhanTure + FerhanTure OliverJojic 2866–2872 D17-1307 @@ -3788,7 +3788,7 @@ and efficiency of on-line policy optimization compared to other companion Learning what to read: Focused machine reading EnriqueNoriega-Atala - Marco A.Valenzuela-Escárcega + Marco A.Valenzuela-Escárcega ClaytonMorrison MihaiSurdeanu 2905–2910 @@ -3814,8 +3814,8 @@ and efficiency of on-line policy optimization compared to other companion VarunGangal HarshJhamtani GrahamNeubig - EduardHovy - EricNyberg + EduardHovy + EricNyberg 2917–2922 D17-1315 10.18653/v1/D17-1315 @@ -3826,8 +3826,8 @@ and efficiency of on-line policy optimization compared to other companion Using Automated Metaphor Identification to Aid in Detection and Prediction of First-Episode Schizophrenia - E. DaríoGutiérrez - GuillermoCecchi + E. DaríoGutiérrez + GuillermoCecchi CherylCorcoran PhilipCorlett 2923–2930 @@ -3855,7 +3855,7 @@ and efficiency of on-line policy optimization compared to other companion Topic-Based Agreement and Disagreement in <fixed-case>US</fixed-case> Electoral Manifestos StefanoMenini FedericoNanni - Simone PaoloPonzetto + Simone PaoloPonzetto SaraTonelli 2938–2944 D17-1318 @@ -3929,7 +3929,7 @@ and efficiency of on-line policy optimization compared to other companion D17-2 LuciaSpecia MattPost - MichaelPaul + MichaelPaul 10.18653/v1/D17-2 Association for Computational Linguistics
Copenhagen, Denmark
@@ -3967,9 +3967,9 @@ and efficiency of on-line policy optimization compared to other companion
An Analysis and Visualization Tool for Case Study Learning of Linguistic Concepts - CeciliaOvesdotter Alm - BenjaminMeyers - EmilyPrud’hommeaux + CeciliaOvesdotter Alm + BenjaminMeyers + EmilyPrud’hommeaux 13–18 D17-2003 10.18653/v1/D17-2003 @@ -4062,11 +4062,11 @@ and efficiency of on-line policy optimization compared to other companion <fixed-case>DLATK</fixed-case>: Differential Language Analysis <fixed-case>T</fixed-case>ool<fixed-case>K</fixed-case>it - H. AndrewSchwartz + H. AndrewSchwartz SalvatoreGiorgi MaartenSap PatrickCrutchley - LyleUngar + LyleUngar JohannesEichstaedt 55–60 D17-2010 @@ -4100,7 +4100,7 @@ and efficiency of on-line policy optimization compared to other companion <fixed-case>M</fixed-case>ood<fixed-case>S</fixed-case>wipe: A Soft Keyboard that Suggests <fixed-case>M</fixed-case>essage<fixed-case>B</fixed-case>ased on User-Specified Emotions Chieh-YangHuang TristanLabetoulle - Ting-HaoHuang + Ting-HaoHuang Yi-PeiChen Hung-ChenChen VallariSrivastava @@ -4130,7 +4130,7 @@ and efficiency of on-line policy optimization compared to other companion <fixed-case>H</fixed-case>eidel<fixed-case>P</fixed-case>lace: An Extensible Framework for Geoparsing LudwigRichter - JohannaGeiß + JohannaGeiß AndreasSpitz MichaelGertz 85–90 @@ -4146,8 +4146,8 @@ and efficiency of on-line policy optimization compared to other companion EugenRuppert StefanoFaralli DmitryUstalov - Simone PaoloPonzetto - ChrisBiemann + Simone PaoloPonzetto + ChrisBiemann 91–96 D17-2016 10.18653/v1/D17-2016 @@ -4201,7 +4201,7 @@ and efficiency of on-line policy optimization compared to other companion Interactive Visualization and Manipulation of Attention-based Neural Machine Translation - JaesongLee + JaesongLee Joong-HwiShin Jun-SeokKim 121–126 @@ -4224,7 +4224,7 @@ and efficiency of on-line policy optimization compared to other companion Acquisition, Representation and Usage of Conceptual Hierarchies - MariusPasca + MariusPasca D17-3001 Through subsumption and instantiation, individual instances (“artificial intelligence”, “the spotted pig”) otherwise spanning a wide range of domains can be brought together and organized under conceptual hierarchies. The hierarchies connect more specific concepts (“computer science subfields”, “gastropubs”) to more general concepts (“academic disciplines”, “restaurants”) through IsA relations. Explicit or implicit properties applicable to, and defining, more general concepts are inherited by their more specific concepts, down to the instances connected to the lower parts of the hierarchies. Subsumption represents a crisp, universally-applicable principle towards consistently representing IsA relations in any knowledge resource. Yet knowledge resources often exhibit significant differences in their scope, representation choices and intended usage, to cause significant differences in their expected usage and impact on various tasks. @@ -4233,7 +4233,7 @@ and efficiency of on-line policy optimization compared to other companion Computational Sarcasm - PushpakBhattacharyya + PushpakBhattacharyya AdityaJoshi D17-3002 Sarcasm is a form of verbal irony that is intended to express contempt or ridicule. Motivated by challenges posed by sarcastic text to sentiment analysis, computational approaches to sarcasm have witnessed a growing interest at NLP forums in the past decade. Computational sarcasm refers to automatic approaches pertaining to sarcasm. The tutorial will provide a bird’s-eye view of the research in computational sarcasm for text, while focusing on significant milestones. @@ -4300,7 +4300,7 @@ The material associated with this tutorial will be available at the tutorial web Cross-Lingual Word Representations: Induction and Evaluation ManaalFaruqui - AndersSøgaard + AndersSøgaard IvanVulić D17-3007 In recent past, NLP as a field has seen tremendous utility of distributional word vector representations as features in downstream tasks. The fact that these word vectors can be trained on unlabeled monolingual corpora of a language makes them an inexpensive resource in NLP. With the increasing use of monolingual word vectors, there is a need for word vectors that can be used as efficiently across multiple languages as monolingually. Therefore, learning bilingual and multilingual word embeddings/vectors is currently an important research topic. These vectors offer an elegant and language-pair independent way to represent content across different languages. diff --git a/data/xml/D18.xml b/data/xml/D18.xml index 8f6b6858aa..a0b0a813c9 100644 --- a/data/xml/D18.xml +++ b/data/xml/D18.xml @@ -4,10 +4,10 @@ Proceedings of the 2018 Conference on Empirical Methods in Natural Language Processing D18-1 - EllenRiloff + EllenRiloff DavidChiang JuliaHockenmaier - Jun’ichiTsujii + Jun’ichiTsujii Association for Computational Linguistics
Brussels, Belgium
October-November @@ -22,7 +22,7 @@ Privacy-preserving Neural Representations of Text MaximinCoavoux ShashiNarayan - Shay B.Cohen + Shay B.Cohen 1–10 D18-1001 This article deals with adversarial attacks towards deep learning systems for Natural Language Processing (NLP), in the context of privacy protection. We study a specific type of attack: an attacker eavesdrops on the hidden representations of a neural text classifier and tries to recover information about the input text. Such scenario may arise in situations when the computation of a neural network is shared across multiple devices, e.g. some hidden representation is computed by a user’s device and sent to a cloud-based model. We measure the privacy of a hidden representation by the ability of an attacker to predict accurately specific private information from it and characterize the tradeoff between the privacy and the utility of neural representations. Finally, we propose several defense methods based on modified training objectives and show that they improve the privacy of neural representations. @@ -76,7 +76,7 @@ DesmondPatton WilliamFrey ChrisKedzie - KathyMcKeown + KathyMcKeown 46–56 D18-1005 D18-1005.Attachment.zip @@ -88,9 +88,9 @@ Reasoning about Actions and State Changes by Injecting Commonsense Knowledge NiketTandon - BhavanaDalvi + BhavanaDalvi JoelGrus - Wen-tauYih + Wen-tauYih AntoineBosselut PeterClark 57–66 @@ -120,8 +120,8 @@ Textual Analogy Parsing: What’s Shared and What’s Compared among Analogous Facts MatthewLamm ArunChaganty - Christopher D.Manning - DanJurafsky + Christopher D.Manning + DanJurafsky PercyLiang 82–92 D18-1008 @@ -159,7 +159,7 @@ Associative Multichannel Autoencoder for Multimodal Word Representation ShaonanWang JiajunZhang - ChengqingZong + ChengqingZong 115–124 D18-1011 In this paper we address the problem of learning multimodal word representations by integrating textual, visual and auditory inputs. Inspired by the re-constructive and associative nature of human memory, we propose a novel associative multichannel autoencoder (AMA). Our model first learns the associations between textual and perceptual modalities, so as to predict the missing perceptual information of concepts. Then the textual and predicted perceptual representations are fused through reconstructing their original and associated embeddings. Using a gating mechanism our model assigns different weights to each modality according to the different concepts. Results on six benchmark concept similarity tests show that the proposed method significantly outperforms strong unimodal baselines and state-of-the-art multimodal models. @@ -213,7 +213,7 @@ XinpengChen LinMa ZequnJie - Tat-SengChua + Tat-SengChua 162–171 D18-1015 We introduce an effective and efficient method that grounds (i.e., localizes) natural sentences in long, untrimmed video sequences. Specifically, a novel Temporal GroundNet (TGN) is proposed to temporally capture the evolving fine-grained frame-by-word interactions between video and sentence. TGN sequentially scores a set of temporal candidates ended at each frame based on the exploited frame-by-word interactions, and finally grounds the segment corresponding to the sentence. Unlike traditional methods treating the overlapping segments separately in a sliding window fashion, TGN aggregates the historical information and generates the final grounding result in one single pass. We extensively evaluate our proposed TGN on three public datasets with significant improvements over the state-of-the-arts. We further show the consistent effectiveness and efficiency of TGN through an ablation study and a runtime test. @@ -251,7 +251,7 @@ Using Linguistic Features to Improve the Generalization Capability of Neural Coreference Resolvers - Nafise SadatMoosavi + Nafise SadatMoosavi MichaelStrube 193–203 D18-1018 @@ -328,7 +328,7 @@ Unsupervised Multilingual Word Embeddings XilunChen - ClaireCardie + ClaireCardie 261–270 D18-1024 Multilingual Word Embeddings (MWEs) represent words from multiple languages in a single distributional vector space. Unsupervised MWE (UMWE) methods acquire multilingual embeddings without cross-lingual supervision, which is a significant advantage over traditional supervised approaches and opens many new possibilities for low-resource languages. Prior art for learning UMWEs, however, merely relies on a number of independently trained Unsupervised Bilingual Word Embeddings (UBWEs) to obtain multilingual embeddings. These methods fail to leverage the interdependencies that exist among many languages. To address this shortcoming, we propose a fully unsupervised framework for learning MWEs that directly exploits the relations between all language pairs. Our model substantially outperforms previous approaches in the experiments on multilingual word translation and cross-lingual word similarity. In addition, our model even beats supervised approaches trained with cross-lingual resources. @@ -360,9 +360,9 @@ Improving Cross-Lingual Word Embeddings by Meeting in the Middle - YeraiDoval - JoseCamacho-Collados - LuisEspinosa-Anke + YeraiDoval + JoseCamacho-Collados + LuisEspinosa-Anke StevenSchockaert 294–304 D18-1027 @@ -400,7 +400,7 @@ A Fast, Compact, Accurate Model for Language Identification of Codemixed Text YuanZhang JasonRiesa - DanielGillick + DanielGillick AntonBakalov JasonBaldridge DavidWeiss @@ -455,8 +455,8 @@ JiatengXie ZhilinYang GrahamNeubig - Noah A.Smith - JaimeCarbonell + Noah A.Smith + JaimeCarbonell 369–379 D18-1034 For languages with no annotated resources, unsupervised transfer of natural language processing models such as named-entity recognition (NER) from resource-rich languages would be an appealing capability. However, differences in words and word order across languages make it a challenging problem. To improve mapping of lexical items across languages, we propose a method that finds translations based on bilingual word embeddings. To improve robustness to word order differences, we propose to use self-attention, which allows for a degree of flexibility with respect to word order. We demonstrate that these methods achieve state-of-the-art or competitive NER performance on commonly tested languages under a cross-lingual setting, with much lower resource requirements than past approaches. We also evaluate the challenges of applying these methods to Uyghur, a low-resource language. @@ -466,9 +466,9 @@ A Stable and Effective Learning Strategy for Trainable Greedy Decoding YunChen - Victor O.K.Li + Victor O.K.Li KyunghyunCho - SamuelBowman + SamuelBowman 380–390 D18-1035 Beam search is a widely used approximate search strategy for neural network decoders, and it generally outperforms simple greedy decoding on tasks like machine translation. However, this improvement comes at substantial computational cost. In this paper, we propose a flexible new method that allows us to reap nearly the full benefits of beam search with nearly no additional computational cost. The method revolves around a small neural network actor that is trained to observe and manipulate the hidden state of a previously-trained decoder. To train this actor network, we introduce the use of a pseudo-parallel corpus built using the output of beam search on a base model, ranked by a target quality metric like BLEU. Our method is inspired by earlier work on this problem, but requires no reinforcement learning, and can be trained reliably on a range of models. Experiments on three parallel corpora and three architectures show that the method yields substantial improvements in translation quality and speed over each base system. @@ -480,7 +480,7 @@ YangZhao JiajunZhang ZhongjunHe - ChengqingZong + ChengqingZong HuaWu 391–400 D18-1036 @@ -491,7 +491,7 @@ Top-down Tree Structured Decoding with Syntactic Connections for Neural Machine Translation and Parsing Jetic - Hassan S.Shavarani + Hassan S.Shavarani AnoopSarkar 401–413 D18-1037 @@ -520,7 +520,7 @@ Emmanouil AntoniosPlatanios MrinmayaSachan GrahamNeubig - TomMitchell + TomMitchell 425–435 D18-1039 We propose a simple modification to existing neural machine translation (NMT) models that enables using a single universal model to translate between multiple languages while allowing for language specific parameterization, and that can also be used for domain adaptation. Our approach requires no changes to the model architecture of a standard NMT system, but instead introduces a new component, the contextual parameter generator (CPG), that generates the parameters of the system (e.g., weights in a neural network). This parameter generator accepts source and target language embeddings as input, and generates the parameters for the encoder and the decoder, respectively. The rest of the model remains unchanged and is shared across all languages. We show how this simple modification enables the system to use monolingual data for training and also perform zero-shot translation. We further show it is able to surpass state-of-the-art performance for both the IWSLT-15 and IWSLT-17 datasets and that the learned language embeddings are able to uncover interesting relationships between languages. @@ -557,7 +557,7 @@ SebastianRuder RyanCotterell YovaKementchedjhieva - AndersSøgaard + AndersSøgaard 458–468 D18-1042 We introduce a novel discriminative latent-variable model for the task of bilingual lexicon induction. Our model combines the bipartite matching dictionary prior of Haghighi et al. (2008) with a state-of-the-art embedding-based approach. To train the model, we derive an efficient Viterbi EM algorithm. We provide empirical improvements on six language pairs under two metrics and show that the prior theoretically and empirically helps to mitigate the hubness problem. We also demonstrate how previous work may be viewed as a similarly fashioned latent-variable model, albeit with a different prior. @@ -610,7 +610,7 @@ <fixed-case>NORMA</fixed-case>: Neighborhood Sensitive Maps for Multilingual Word Embeddings - NdapaNakashole + NdapaNakashole 512–522 D18-1047 Inducing multilingual word embeddings by learning a linear map between embedding spaces of different languages achieves remarkable accuracy on related languages. However, accuracy drops substantially when translating between distant languages. Given that languages exhibit differences in vocabulary, grammar, written form, or syntax, one would expect that embedding spaces of different languages have different structures especially for distant languages. With the goal of capturing such differences, we propose a method for learning neighborhood sensitive maps, NORMA. Our experiments show that NORMA outperforms current state-of-the-art methods for word translation between distant languages. @@ -657,7 +657,7 @@ <fixed-case>S</fixed-case>imple<fixed-case>Q</fixed-case>uestions Nearly Solved: A New Upperbound and Baseline Approach MichaelPetrochuk - LukeZettlemoyer + LukeZettlemoyer 554–558 D18-1051 The SimpleQuestions dataset is one of the most commonly used benchmarks for studying single-relation factoid questions. In this paper, we present new evidence that this benchmark can be nearly solved by standard methods. First, we show that ambiguity in the data bounds performance at 83.4%; many questions have more than one equally plausible interpretation. Second, we introduce a baseline that sets a new state-of-the-art performance level at 78.1% accuracy, despite using standard methods. Finally, we report an empirical analysis showing that the upperbound is loose; roughly a quarter of the remaining errors are also not resolvable from the linguistic signal. Together, these results suggest that the SimpleQuestions dataset is nearly solved. @@ -669,7 +669,7 @@ Phrase-Indexed Question Answering: A New Challenge for Scalable Document Comprehension MinjoonSeo TomKwiatkowski - AnkurParikh + AnkurParikh AliFarhadi HannanehHajishirzi 559–564 @@ -695,7 +695,7 @@ Cut to the Chase: A Context Zoom-in Network for Reading Comprehension - Sathish ReddyIndurthi + Sathish ReddyIndurthi SeunghakYu SeohyunBack HeribertoCuayáhuitl @@ -722,7 +722,7 @@ Why is unsupervised alignment of <fixed-case>E</fixed-case>nglish embeddings from different algorithms so hard? MareikeHartmann YovaKementchedjhieva - AndersSøgaard + AndersSøgaard 582–586 D18-1056 This paper presents a challenge to the community: Generative adversarial networks (GANs) can perfectly align independent English word embeddings induced using the same algorithm, based on distributional information alone; but fails to do so, for two different embeddings algorithms. Why is that? We believe understanding why, is key to understand both modern word embedding algorithms and the limitations and instability dynamics of GANs. This paper shows that (a) in all these cases, where alignment fails, there exists a linear transform between the two embeddings (so algorithm biases do not lead to non-linear differences), and (b) similar effects can not easily be obtained by varying hyper-parameters. One plausible suggestion based on our initial experiments is that the differences in the inductive biases of the embedding algorithms lead to an optimization landscape that is riddled with local optima, leading to a very small basin of convergence, but we present this more as a challenge paper than a technical contribution. @@ -770,7 +770,7 @@ GeGao EunsolChoi YejinChoi - LukeZettlemoyer + LukeZettlemoyer 607–613 D18-1060 We present end-to-end neural models for detecting metaphorical word use in context. We show that relatively standard BiLSTM models which operate on complete sentences work well in this setting, in comparison to previous work that used more restricted forms of linguistic context. These models establish a new state-of-the-art on existing verb metaphor detection benchmarks, and show strong performance on jointly predicting the metaphoricity of all words in a running text. @@ -780,8 +780,8 @@ Distant Supervision from Disparate Sources for Low-Resource Part-of-Speech Tagging - BarbaraPlank - ŽeljkoAgić + BarbaraPlank + ŽeljkoAgić 614–620 D18-1061 a cross-lingual neural part-of-speech tagger that learns from disparate sources of distant supervision, and realistically scales to hundreds of low-resource languages. The model exploits annotation projection, instance selection, tag dictionaries, morphological lexicons, and distributed representations, all in a uniform framework. The approach is simple, yet surprisingly effective, resulting in a new state of the art without access to any gold annotated data. @@ -854,7 +854,7 @@ Exploring Optimism and Pessimism in <fixed-case>T</fixed-case>witter Using Deep Learning CorneliaCaragea - Liviu P.Dinu + Liviu P.Dinu BogdanDumitru 652–658 D18-1067 @@ -965,7 +965,7 @@ A Dataset for Document Grounded Conversations KangyanZhou ShrimaiPrabhumoye - Alan WBlack + Alan WBlack 708–713 D18-1076 D18-1076.Attachment.zip @@ -978,7 +978,7 @@ SeonghanRyu SangjunKoo HwanjoYu - Gary GeunbaeLee + Gary GeunbaeLee 714–718 D18-1077 The main goal of this paper is to develop out-of-domain (OOD) detection for dialog systems. We propose to use only in-domain (IND) sentences to build a generative adversarial network (GAN) of which the discriminator generates low scores for OOD sentences. To improve basic GANs, we apply feature matching loss in the discriminator, use domain-category analysis as an additional task in the discriminator, and remove the biases in the generator. Thereby, we reduce the huge effort of collecting OOD sentences for training OOD detection. For evaluation, we experimented OOD detection on a multi-domain dialog system. The experimental results showed the proposed method was most accurate compared to the existing methods. @@ -1008,9 +1008,9 @@ YangXu YuHong HuibinRuan - JianminYao + JianminYao MinZhang - GuodongZhou + GuodongZhou 725–731 D18-1079 We tackle discourse-level relation recognition, a problem of determining semantic relations between text spans. Implicit relation recognition is challenging due to the lack of explicit relational clues. The increasingly popular neural network techniques have been proven effective for semantic encoding, whereby widely employed to boost semantic relation discrimination. However, learning to predict semantic relations at a deep level heavily relies on a great deal of training data, but the scale of the publicly available data in this field is limited. In this paper, we follow Rutherford and Xue (2015) to expand the training data set using the corpus of explicitly-related arguments, by arbitrarily dropping the overtly presented discourse connectives. On the basis, we carry out an experiment of sampling, in which a simple active learning approach is used, so as to take the informative instances for data expansion. The goal is to verify whether the selective use of external data not only reduces the time consumption of retraining but also ensures a better system performance. Using the expanded training data, we retrain a convolutional neural network (CNN) based classifer which is a simplified version of Qin et al. (2016)’s stacking gated relation recognizer. Experimental results show that expanding the training set with small-scale carefully-selected external data yields substantial performance gain, with the improvements of about 4% for accuracy and 3.6% for F-score. This allows a weak classifier to achieve a comparable performance against the state-of-the-art systems. @@ -1068,7 +1068,7 @@ Training for Diversity in Image Paragraph Captioning LukeMelas-Kyriazi - AlexanderRush + AlexanderRush GeorgeHan 757–761 D18-1084 @@ -1080,7 +1080,7 @@ A Graph-theoretic Summary Evaluation for <fixed-case>ROUGE</fixed-case> ElahehShafieiBavani MohammadEbrahimi - RaymondWong + RaymondWong FangChen 762–767 D18-1085 @@ -1127,8 +1127,8 @@ On the Abstractiveness of Neural Document Summarization - FangfangZhang - Jin-geYao + FangfangZhang + Jin-geYao RuiYan 785–790 D18-1089 @@ -1141,7 +1141,7 @@ YuchengWang ZhongyuWei YaqianZhou - XuanjingHuang + XuanjingHuang 791–797 D18-1090 Automatic essay scoring (AES) is the task of assigning grades to essays without human interference. Existing systems for AES are typically trained to predict the score of each single essay at a time without considering the rating schema. In order to address this issue, we propose a reinforcement learning framework for essay scoring that incorporates quadratic weighted kappa as guidance to optimize the scoring system. Experiment results on benchmark datasets show the effectiveness of our framework. @@ -1185,7 +1185,7 @@ A Hierarchical Neural Attention-based Text Classifier KoustuvSinha YueDong - Jackie Chi KitCheung + Jackie Chi KitCheung DerekRuths 817–823 D18-1094 @@ -1233,7 +1233,7 @@ Topic Intrusion for Automatic Topic Model Evaluation ShraeyBhatia Jey HanLau - TimothyBaldwin + TimothyBaldwin 844–849 D18-1098 D18-1098.Attachment.pdf @@ -1270,7 +1270,7 @@ Improving Unsupervised Word-by-Word Translation with Language Model and Denoising Autoencoder YunsuKim JiahuiGeng - HermannNey + HermannNey 862–868 D18-1101 Unsupervised learning of cross-lingual word embedding offers elegant matching of words across languages, but has fundamental limitations in translating sentences. In this paper, we propose simple yet effective methods to improve word-by-word translation of cross-lingual embeddings, using only monolingual corpora but without any back-translation. We integrate a language model for context-aware search, and use a novel denoising autoencoder to handle reordering. Our system surpasses state-of-the-art unsupervised translation systems without costly iterative training. We also analyze the effect of vocabulary size and denoising type on the translation performance, which provides better understanding of learning the cross-lingual word embedding and its usage in translation. @@ -1350,8 +1350,8 @@ Towards Dynamic Computation Graphs via Sparse Latent Structure VladNiculae - André F. T.Martins - ClaireCardie + André F. T.Martins + ClaireCardie 905–911 D18-1108 D18-1108.Attachment.zip @@ -1442,7 +1442,7 @@ Conversational Decision-Making Model for Predicting the King’s Decision in the Annals of the <fixed-case>J</fixed-case>oseon Dynasty JinYeongBak - AliceOh + AliceOh 956–961 D18-1115 Styles of leaders when they make decisions in groups vary, and the different styles affect the performance of the group. To understand the key words and speakers associated with decisions, we initially formalize the problem as one of predicting leaders’ decisions from discussion with group members. As a dataset, we introduce conversational meeting records from a historical corpus, and develop a hierarchical RNN structure with attention and pre-trained speaker embedding in the form of a, Conversational Decision Making Model (CDMM). The CDMM outperforms other baselines to predict leaders’ final decisions from the data. We explain why CDMM works better than other methods by showing the key words and speakers discovered from the attentions as evidence. @@ -1491,7 +1491,7 @@ How agents see things: On visual representations in an emergent language game DianeBouchacourt - MarcoBaroni + MarcoBaroni 981–985 D18-1119 There is growing interest in the language developed by agents interacting in emergent-communication settings. Earlier studies have focused on the agents’ symbol usage, rather than on their representation of visual input. In this paper, we consider the referential games of Lazaridou et al. (2017), and investigate the representations the agents develop during their evolving interaction. We find that the agents establish successful communication by inducing visual representations that almost perfectly align with each other, but, surprisingly, do not capture the conceptual properties of the objects depicted in the input images. We conclude that, if we care about developing language-like communication systems, we must pay more attention to the visual semantics agents associate to the symbols they use. @@ -1529,7 +1529,7 @@ Event Detection with Neural Networks: A Rigorous Empirical Evaluation - WalkerOrr + WalkerOrr PrasadTadepalli XiaoliFern 999–1004 @@ -1571,7 +1571,7 @@ BoliangZhang HengJi LejianLiao - HeyanHuang + HeyanHuang 1018–1023 D18-1125 Relation Extraction suffers from dramatical performance decrease when training a model on one genre and directly applying it to a new genre, due to the distinct feature distributions. Previous studies address this problem by discovering a shared space across genres using manually crafted features, which requires great human effort. To effectively automate this process, we design a genre-separation network, which applies two encoders, one genre-independent and one genre-shared, to explicitly extract genre-specific and genre-agnostic features. Then we train a relation classifier using the genre-agnostic features on the source genre and directly apply to the target genre. Experiment results on three distinct genres of the ACE dataset show that our approach achieves up to 6.1% absolute F1-score gain compared to previous methods. By incorporating a set of external linguistic features, our approach outperforms the state-of-the-art by 1.7% absolute F1 gain. We make all programs of our model publicly available for research purpose @@ -1593,7 +1593,7 @@ ShaoboLiu RuiCheng XiaomingYu - XueqiCheng + XueqiCheng 1030–1035 D18-1127 The task of event detection involves identifying and categorizing event triggers. Contextual information has been shown effective on the task. However, existing methods which utilize contextual information only process the context once. We argue that the context can be better exploited by processing the context multiple times, allowing the model to perform complex reasoning and to generate better context representation, thus improving the overall performance. Meanwhile, dynamic memory network (DMN) has demonstrated promising capability in capturing contextual information and has been applied successfully to various tasks. In light of the multi-hop mechanism of the DMN to model the context, we propose the trigger detection dynamic memory network (TD-DMN) to tackle the event detection problem. We performed a five-fold cross-validation on the ACE-2005 dataset and experimental results show that the multi-hop mechanism does improve the performance and the proposed model achieves best F1 score compared to the state-of-the-art methods. @@ -1629,7 +1629,7 @@ Entity Tracking Improves Cloze-style Reading Comprehension LuongHoang SamWiseman - AlexanderRush + AlexanderRush 1049–1055 D18-1130 D18-1130.Attachment.pdf @@ -1643,7 +1643,7 @@ TaoLei AlessandroMoschitti SalvatoreRomeo - PreslavNakov + PreslavNakov 1056–1063 D18-1131 We address the problem of detecting duplicate questions in forums, which is an important step towards automating the process of answering new questions. As finding and annotating such potential duplicates manually is very tedious and costly, automatic methods based on machine learning are a viable alternative. However, many forums do not have annotated data, i.e., questions labeled by experts as duplicates, and thus a promising solution is to use domain adaptation from another forum that has such annotations. Here we focus on adversarial domain adaptation, deriving important findings about when it performs well and what properties of the domains are important in this regard. Our experiments with StackExchange data show an average improvement of 5.6% over the best baseline across multiple pairs of domains. @@ -1688,7 +1688,7 @@ Improving the results of string kernels in sentiment analysis and <fixed-case>A</fixed-case>rabic dialect identification by adapting them to your test set Radu TudorIonescu - Andrei M.Butnaru + Andrei M.Butnaru 1084–1090 D18-1135 Recently, string kernels have obtained state-of-the-art results in various text classification tasks such as Arabic dialect identification or native language identification. In this paper, we apply two simple yet effective transductive learning approaches to further improve the results of string kernels. The first approach is based on interpreting the pairwise string kernel similarities between samples in the training set and samples in the test set as features. Our second approach is a simple self-training method based on two learning iterations. In the first iteration, a classifier is trained on the training set and tested on the test set, as usual. In the second iteration, a number of test samples (to which the classifier associated higher confidence scores) are added to the training set for another round of training. However, the ground-truth labels of the added test samples are not necessary. Instead, we use the labels predicted by the classifier in the first training iteration. By adapting string kernels to the test set, we report significantly better accuracy rates in English polarity classification and Arabic dialect identification. @@ -1698,7 +1698,7 @@ Parameterized Convolutional Neural Networks for Aspect Level Sentiment Classification BinxuanHuang - KathleenCarley + KathleenCarley 1091–1096 D18-1136 We introduce a novel parameterized convolutional neural network for aspect level sentiment classification. Using parameterized filters and parameterized gates, we incorporate aspect information into convolutional neural networks (CNN). Experiments demonstrate that our parameterized filters and parameterized gates effectively capture the aspect-specific features, and our CNN-based models achieve excellent results on SemEval 2014 datasets. @@ -1708,7 +1708,7 @@ Improving Multi-label Emotion Classification via Sentiment Classification with Dual Attention Transfer Network JianfeiYu - LuísMarujo + LuísMarujo JingJiang PradeepKaruturi WilliamBrendel @@ -1795,7 +1795,7 @@ YasuhideMiura MotokiTaniguchi Yan-YingChen - FrancineChen + FrancineChen TomokoOhkuma 1139–1145 D18-1144 @@ -1808,9 +1808,9 @@ MasoudRouhizadeh KokilJaidka LauraSmith - H. AndrewSchwartz + H. AndrewSchwartz AnnekeBuffone - LyleUngar + LyleUngar 1146–1152 D18-1145 D18-1145.Attachment.pdf @@ -1840,11 +1840,11 @@ The Remarkable Benefit of User-Level Aggregation for Lexical-based Population-Level Predictions SalvatoreGiorgi - DanielPreoţiuc-Pietro + DanielPreoţiuc-Pietro AnnekeBuffone DanielRieman - LyleUngar - H. AndrewSchwartz + LyleUngar + H. AndrewSchwartz 1167–1172 D18-1148 D18-1148.Attachment.zip @@ -1895,7 +1895,7 @@ HaoPeng RoySchwartz SamThomson - Noah A.Smith + Noah A.Smith 1203–1214 D18-1152 D18-1152.Attachment.pdf @@ -1924,7 +1924,7 @@ ZhengzhongLiu ChenyanXiong TerukoMitamura - EduardHovy + EduardHovy 1226–1236 D18-1154 Identifying the salience (i.e. importance) of discourse units is an important task in language understanding. While events play important roles in text documents, little research exists on analyzing their saliency status. This paper empirically studies Event Salience and proposes two salience detection models based on discourse relations. The first is a feature based salience model that incorporates cohesion among discourse units. The second is a neural model that captures more complex interactions between discourse units. In our new large-scale event salience corpus, both methods significantly outperform the strong frequency baseline, while our neural model further improves the feature based one by a large margin. Our analyses demonstrate that our neural model captures interesting connections between salience and discourse unit relations (e.g., scripts and frame structures). @@ -1935,7 +1935,7 @@ Temporal Information Extraction by Predicting Relative Time-lines ArtuurLeeuwenberg - Marie-FrancineMoens + Marie-FrancineMoens 1237–1246 D18-1155 The current leading paradigm for temporal information extraction from text consists of three phases: (1) recognition of events and temporal expressions, (2) recognition of temporal relations among them, and (3) time-line construction from the temporal relations. In contrast to the first two phases, the last phase, time-line construction, received little attention and is the focus of this work. In this paper, we propose a new method to construct a linear time-line from a set of (extracted) temporal relations. But more importantly, we propose a novel paradigm in which we directly predict start and end-points for events from the text, constituting a time-line without going through the intermediate step of prediction of temporal relations as in earlier work. Within this paradigm, we propose two models that predict in linear complexity, and a new training loss using TimeML-style annotations, yielding promising results. @@ -1947,7 +1947,7 @@ Jointly Multiple Events Extraction via Attention-based Graph Information Aggregation XiaoLiu ZhunchenLuo - HeyanHuang + HeyanHuang 1247–1256 D18-1156 Event extraction is of practical utility in natural language processing. In the real world, it is a common phenomenon that multiple events existing in the same sentence, where extracting them are more difficult than extracting a single event. Previous works on modeling the associations between events by sequential modeling methods suffer a lot from the low efficiency in capturing very long-range dependencies. In this paper, we propose a novel Jointly Multiple Events Extraction (JMEE) framework to jointly extract multiple event triggers and arguments by introducing syntactic shortcut arcs to enhance information flow and attention-based graph convolution networks to model graph information. The experiment results demonstrate that our proposed framework achieves competitive results compared with state-of-the-art methods. @@ -1961,7 +1961,7 @@ RishabhJoshi Sai SumanPrayaga ChiranjibBhattacharyya - ParthaTalukdar + ParthaTalukdar 1257–1266 D18-1157 D18-1157.Attachment.pdf @@ -2036,7 +2036,7 @@ Synthetic Data Made to Order: The Case of Parsing DingquanWang - JasonEisner + JasonEisner 1325–1337 D18-1163 D18-1163.Attachment.pdf @@ -2063,7 +2063,7 @@ Learning a Policy for Opportunistic Active Learning AishwaryaPadmakumar PeterStone - RaymondMooney + RaymondMooney 1347–1357 D18-1165 D18-1165.Attachment.tgz @@ -2091,7 +2091,7 @@ JieLei LichengYu MohitBansal - TamaraBerg + TamaraBerg 1369–1379 D18-1167 D18-1167.Attachment.pdf @@ -2136,7 +2136,7 @@ ZexueHe QiaolinXia ZhifangSui - BaobaoChang + BaobaoChang 1402–1411 D18-1170 The goal of Word Sense Disambiguation (WSD) is to identify the correct meaning of a word in the particular context. Traditional supervised methods only use labeled data (context), while missing rich lexical knowledge such as the gloss which defines the meaning of a word sense. Recent studies have shown that incorporating glosses into neural networks for WSD has made significant improvement. However, the previous models usually build the context representation and gloss representation separately. In this paper, we find that the learning for the context and gloss representation can benefit from each other. Gloss can help to highlight the important words in the context, thus building a better context representation. Context can also help to locate the key words in the gloss of the correct word sense. Therefore, we introduce a co-attention mechanism to generate co-dependent representations for the context and gloss. Furthermore, in order to capture both word-level and sentence-level information, we extend the attention mechanism in a hierarchical fashion. Experimental results show that our model achieves the state-of-the-art results on several standard English all-words WSD test datasets. @@ -2168,7 +2168,7 @@ Memory, Show the Way: Memory Based Few Shot Word Representation Learning JingyuanSun ShaonanWang - ChengqingZong + ChengqingZong 1435–1444 D18-1173 Distributional semantic models (DSMs) generally require sufficient examples for a word to learn a high quality representation. This is in stark contrast with human who can guess the meaning of a word from one or a few referents only. In this paper, we propose Mem2Vec, a memory based embedding learning method capable of acquiring high quality word representations from fairly limited context. Our method directly adapts the representations produced by a DSM with a longterm memory to guide its guess of a novel word. Based on a pre-trained embedding space, the proposed method delivers impressive performance on two challenging few-shot word similarity tasks. Embeddings learned with our method also lead to considerable improvements over strong baselines on NER and sentiment classification. @@ -2235,10 +2235,10 @@ Dissecting Contextual Word Embeddings: Architecture and Representation - Matthew E.Peters + Matthew E.Peters MarkNeumann - LukeZettlemoyer - Wen-tauYih + LukeZettlemoyer + Wen-tauYih 1499–1509 D18-1179 D18-1179.Attachment.pdf @@ -2308,7 +2308,7 @@ Compare, Compress and Propagate: Enhancing Neural Architectures with Alignment Factorization for Natural Language Inference YiTay - Anh TuanLuu + Anh TuanLuu Siu CheungHui 1565–1575 D18-1185 @@ -2322,7 +2322,7 @@ XipengQiu XinchiChen DongLiang - XuanjingHuang + XuanjingHuang 1576–1585 D18-1186 Attention-based neural models have achieved great success in natural language inference (NLI). In this paper, we propose the Convolutional Interaction Network (CIN), a general model to capture the interaction between two sentences, which can be an alternative to the attention mechanism for NLI. Specifically, CIN encodes one sentence with the filters dynamically generated based on another sentence. Since the filters may be designed to have various numbers and sizes, CIN can capture more complicated interaction patterns. Experiments on three large datasets demonstrate CIN’s efficacy. @@ -2386,7 +2386,7 @@ A Span Selection Model for Semantic Role Labeling HirokiOuchi HiroyukiShindo - YujiMatsumoto + YujiMatsumoto 1630–1642 D18-1191 We present a simple and accurate span-based model for semantic role labeling (SRL). Our model directly takes into account all possible argument spans and scores them for each label. At decoding time, we greedily select higher scoring labeled spans. One advantage of our model is to allow us to design and use span-level features, that are difficult to use in token-based BIO tagging approaches. Experimental results demonstrate that our ensemble model achieves the state-of-the-art results, 87.4 F1 and 87.0 F1 on the CoNLL-2005 and 2012 datasets, respectively. @@ -2398,7 +2398,7 @@ SrinivasanIyer IoannisKonstas AlvinCheung - LukeZettlemoyer + LukeZettlemoyer 1643–1652 D18-1192 Source code is rarely written in isolation. It depends significantly on the programmatic context, such as the class that the code would reside in. To study this phenomenon, we introduce the task of generating class member functions given English documentation and the programmatic context provided by the rest of the class. This task is challenging because the desired code can vary greatly depending on the functionality the class provides (e.g., a sort function may or may not be available when we are asked to “return the smallest element” in a particular member variable list). We introduce CONCODE, a new large dataset with over 100,000 examples consisting of Java classes from online code repositories, and develop a new encoder-decoder architecture that models the interaction between the method documentation and the class environment. We also present a detailed error analysis suggesting that there is significant room for future work on this task. @@ -2413,7 +2413,7 @@ RuiZhang DongxuWang ZifanLi - DragomirRadev + DragomirRadev 1653–1663 D18-1193 Most existing studies in text-to-SQL tasks do not require generating complex SQL queries with multiple clauses or sub-queries, and generalizing to new, unseen databases. In this paper we propose SyntaxSQLNet, a syntax tree network to address the complex and cross-domain text-to-SQL generation task. SyntaxSQLNet employs a SQL specific syntax tree-based decoder with SQL generation path history and table-aware column attention encoders. We evaluate SyntaxSQLNet on a new large-scale text-to-SQL corpus containing databases with multiple tables and complex SQL queries containing multiple SQL clauses and nested queries. We use a database split setting where databases in the test set are unseen during training. Experimental results show that SyntaxSQLNet can handle a significantly greater number of complex SQL examples than prior work, outperforming the previous state-of-the-art model by 9.5% in exact matching accuracy. To our knowledge, we are the first to study this complex text-to-SQL task. Our task and models with the latest updates are available at https://yale-lily.github.io/seq2sql/spider. @@ -2438,7 +2438,7 @@ Learning to Learn Semantic Parsers from Natural Language Supervision IgorLabutov BishanYang - TomMitchell + TomMitchell 1676–1690 D18-1195 As humans, we often rely on language to learn language. For example, when corrected in a conversation, we may learn from that correction, over time improving our language fluency. Inspired by this observation, we propose a learning algorithm for training semantic parsers from supervision (feedback) expressed in natural language. Our algorithm learns a semantic parser from users’ corrections such as “no, what I really meant was before his job, not after”, by also simultaneously learning to parse this natural language feedback in order to leverage it as a form of supervision. Unlike supervision with gold-standard logical forms, our method does not require the user to be familiar with the underlying logical formalism, and unlike supervision from denotation, it does not require the user to know the correct answer to their query. This makes our learning algorithm naturally scalable in settings where existing conversational logs are available and can be leveraged as training data. We construct a novel dataset of natural language feedback in a conversational setting, and show that our method is effective at learning a semantic parser from such natural language supervision. @@ -2448,8 +2448,8 @@ <fixed-case>D</fixed-case>eep<fixed-case>C</fixed-case>x: A transition-based approach for shallow semantic parsing with complex constructional triggers JesseDunietz - JaimeCarbonell - LoriLevin + JaimeCarbonell + LoriLevin 1691–1701 D18-1196 D18-1196.Attachment.zip @@ -2575,7 +2575,7 @@ Don’t Give Me the Details, Just the Summary! Topic-Aware Convolutional Neural Networks for Extreme Summarization ShashiNarayan - Shay B.Cohen + Shay B.Cohen MirellaLapata 1797–1807 D18-1206 @@ -2600,8 +2600,8 @@ Content Selection in Deep Learning Models of Summarization ChrisKedzie - KathleenMcKeown - HalDaumé III + KathleenMcKeown + HalDaumé III 1818–1828 D18-1208 @@ -2653,7 +2653,7 @@ Learning Neural Representation for <fixed-case>CLIR</fixed-case> with Adversarial Framework - BoLi + BoLi PingCheng 1861–1870 D18-1212 @@ -2666,7 +2666,7 @@ <fixed-case>AD</fixed-case>3: Attentive Deep Document Dater Swayambhu NathRay Shib SankarDasgupta - ParthaTalukdar + ParthaTalukdar 1871–1880 D18-1213 Knowledge of the creation date of documents facilitates several tasks such as summarization, event extraction, temporally focused information extraction etc. Unfortunately, for most of the documents on the Web, the time-stamp metadata is either missing or can’t be trusted. Thus, predicting creation time from document content itself is an important task. In this paper, we propose Attentive Deep Document Dater (AD3), an attention-based neural document dating system which utilizes both context and temporal information in documents in a flexible and principled manner. We perform extensive experimentation on multiple real-world datasets to demonstrate the effectiveness of AD3 over neural and non-neural baselines. @@ -2713,9 +2713,9 @@ Semi-Supervised Sequence Modeling with Cross-View Training KevinClark - Minh-ThangLuong - Christopher D.Manning - QuocLe + Minh-ThangLuong + Christopher D.Manning + QuocLe 1914–1925 D18-1217 D18-1217.Attachment.zip @@ -2728,9 +2728,9 @@ A Probabilistic Annotation Model for Crowdsourcing Coreference SilviuPaun JonChamberlain - UdoKruschwitz + UdoKruschwitz JuntaoYu - MassimoPoesio + MassimoPoesio 1926–1937 D18-1218 D18-1218.Attachment.zip @@ -2753,7 +2753,7 @@ NoeliaDe La Cruz AdamTrischler KaheerSuleman - Jackie Chi KitCheung + Jackie Chi KitCheung 1949–1958 D18-1220 D18-1220.Attachment.pdf @@ -2815,7 +2815,7 @@ <fixed-case>H</fixed-case>y<fixed-case>TE</fixed-case>: Hyperplane-based Temporally aware Knowledge Graph Embedding Shib SankarDasgupta Swayambhu NathRay - ParthaTalukdar + ParthaTalukdar 2001–2011 D18-1225 Knowledge Graph (KG) embedding has emerged as an active area of research resulting in the development of several KG embedding methods. Relational facts in KG often show temporal dynamics, e.g., the fact (Cristiano_Ronaldo, playsFor, Manchester_United) is valid only from 2003 to 2009. Most of the existing KG embedding methods ignore this temporal dimension while learning embeddings of the KG elements. In this paper, we propose HyTE, a temporally aware KG embedding method which explicitly incorporates time in the entity-relation space by associating each timestamp with a corresponding hyperplane. HyTE not only performs KG inference using temporal guidance, but also predicts temporal scopes for relational facts with missing time annotations. Through extensive experimentation on temporal datasets extracted from real-world KGs, we demonstrate the effectiveness of our model over both traditional as well as temporal KG embedding methods. @@ -2824,7 +2824,7 @@ Neural Adaptation Layers for Cross-domain Named Entity Recognition - Bill YuchenLin + Bill YuchenLin WeiLu 2012–2022 D18-1226 @@ -2903,7 +2903,7 @@ YuxingPeng FuruWei ZhenHuang - DongshengLi + DongshengLi NanYang MingZhou 2077–2086 @@ -2918,7 +2918,7 @@ MaxBartolo PatrickLewis SameerSingh - TimRocktäschel + TimRocktäschel MikeSheldon GuillaumeBouchard SebastianRiedel @@ -2970,7 +2970,7 @@ <fixed-case>M</fixed-case>emo<fixed-case>R</fixed-case>eader: Large-Scale Reading Comprehension through Neural Memory Controller SeohyunBack SeunghakYu - Sathish ReddyIndurthi + Sathish ReddyIndurthi JihieKim JaegulChoo 2131–2140 @@ -2983,7 +2983,7 @@ Multi-Granular Sequence Encoding via Dilated Compositional Units for Reading Comprehension YiTay - Anh TuanLuu + Anh TuanLuu Siu CheungHui 2141–2151 D18-1238 @@ -3018,16 +3018,16 @@ HeHe MohitIyyer MarkYatskar - Wen-tauYih + Wen-tauYih YejinChoi PercyLiang - LukeZettlemoyer + LukeZettlemoyer 2174–2184 D18-1241 D18-1241.Attachment.zip + D18-1241.Poster.pdf We present QuAC, a dataset for Question Answering in Context that contains 14K information-seeking QA dialogs (100K questions in total). The dialogs involve two crowd workers: (1) a student who poses a sequence of freeform questions to learn as much as possible about a hidden Wikipedia text, and (2) a teacher who answers the questions by providing short excerpts from the text. QuAC introduces challenges not found in existing machine comprehension datasets: its questions are often more open-ended, unanswerable, or only meaningful within the dialog context, as we show in a detailed qualitative evaluation. We also report results for a number of reference models, including a recently state-of-the-art reading comprehension architecture extended to model dialog context. Our best model underperforms humans by 20 F1, suggesting that there is significant room for future work on this data. Dataset, baseline, and leaderboard available at http://quac.ai. 10.18653/v1/D18-1241 - D18-1241.Poster.pdf choi-etal-2018-quac @@ -3035,7 +3035,7 @@ KangqiLuo FengliLin XushengLuo - KennyZhu + KennyZhu 2185–2194 D18-1242 Answering complex questions that involve multiple entities and multiple relations using a standard knowledge base is an open and challenging task. Most existing KBQA approaches focus on simpler questions and do not work very well on complex questions because they were not able to simultaneously represent the question and the corresponding complex query structure. In this work, we encode such complex query structure into a uniform vector representation, and thus successfully capture the interactions between individual semantic components within a complex question. This approach consistently outperforms existing methods on complex questions while staying competitive on simple questions. @@ -3058,7 +3058,7 @@ Graph Convolution over Pruned Dependency Trees Improves Relation Extraction YuhaoZhang PengQi - Christopher D.Manning + Christopher D.Manning 2205–2215 D18-1244 D18-1244.Attachment.pdf @@ -3069,7 +3069,7 @@ Multi-Level Structured Self-Attentions for Distantly Supervised Relation Extraction JinhuaDu - JingguangHan + JingguangHan AndyWay DadongWan 2216–2225 @@ -3123,7 +3123,7 @@ Extracting Entities and Relations with Joint Minimum Risk Training ChangzhiSun YuanbinWu - ManLan + ManLan ShiliangSun WentingWang Kuang-ChihLee @@ -3137,7 +3137,7 @@ Large-scale Exploration of Neural Relation Classification Architectures - Hoang-QuynhLe + Hoang-QuynhLe Duy-CatCan Sinh T.Vu Thanh HaiDang @@ -3206,7 +3206,7 @@ NikitaMoghe SiddharthaArora SumanBanerjee - Mitesh M.Khapra + Mitesh M.Khapra 2322–2332 D18-1255 D18-1255.Attachment.zip @@ -3234,7 +3234,7 @@ QizheXie GuokunLai ZihangDai - EduardHovy + EduardHovy 2344–2356 D18-1257 Cloze tests are widely adopted in language exams to evaluate students’ language proficiency. In this paper, we propose the first large-scale human-created cloze test dataset CLOTH, containing questions used in middle-school and high-school language exams. With missing blanks carefully created by teachers and candidate choices purposely designed to be nuanced, CLOTH requires a deeper language understanding and a wider attention span than previously automatically-generated cloze datasets. We test the performance of dedicatedly designed baseline models including a language model trained on the One Billion Word Corpus and show humans outperform them by a significant margin. We investigate the source of the performance gap, trace model deficiencies to some distinct properties of CLOTH, and identify the limited ability of comprehending the long-term context to be the key bottleneck. @@ -3261,9 +3261,9 @@ PengQi SaizhengZhang YoshuaBengio - WilliamCohen + WilliamCohen RuslanSalakhutdinov - Christopher D.Manning + Christopher D.Manning 2369–2380 D18-1259 D18-1259.Attachment.pdf @@ -3356,17 +3356,17 @@ Policy Shaping and Generalized Update Equations for Semantic Parsing from Denotations - DipendraMisra + DipendraMisra Ming-WeiChang XiaodongHe - Wen-tauYih + Wen-tauYih 2442–2452 D18-1266 D18-1266.Attachment.pdf + D18-1266.Presentation.pdf Semantic parsing from denotations faces two key challenges in model training: (1) given only the denotations (e.g., answers), search for good candidate semantic parses, and (2) choose the best model update algorithm. We propose effective and general solutions to each of them. Using policy shaping, we bias the search procedure towards semantic parses that are more compatible to the text, which provide better supervision signals for training. In addition, we propose an update equation that generalizes three different families of learning algorithms, which enables fast model exploration. When experimented on a recently proposed sequential question answering dataset, our framework leads to a new state-of-the-art model that outperforms previous work by 5.0% absolute on exact match accuracy. @@ -3402,7 +3402,7 @@ RutyRinott GuillaumeLample AdinaWilliams - SamuelBowman + SamuelBowman HolgerSchwenk VeselinStoyanov 2475–2485 @@ -3431,7 +3431,7 @@ QingDou HengJi LeiCui - BaobaoChang + BaobaoChang ZhifangSui FuruWei MingZhou @@ -3451,7 +3451,7 @@ LiangYang KanXu ZhihaoYang - JianWang + JianWang ShaowuZhang BoXu DongyuZhang @@ -3492,7 +3492,7 @@ MinlongPeng DiLiang KeyuDing - XuanjingHuang + XuanjingHuang 2540–2549 D18-1275 Part-of-Speech (POS) tagging for Twitter has received considerable attention in recent years. Because most POS tagging methods are based on supervised models, they usually require a large amount of labeled data for training. However, the existing labeled datasets for Twitter are much smaller than those for newswire text. Hence, to help POS tagging for Twitter, most domain adaptation methods try to leverage newswire datasets by learning the shared features between the two domains. However, from a linguistic perspective, Twitter users not only tend to mimic the formal expressions of traditional media, like news, but they also appear to be developing linguistically informal styles. Therefore, POS tagging for the formal Twitter context can be learned together with the newswire dataset, while POS tagging for the informal Twitter context should be learned separately. To achieve this task, in this work, we propose a hypernetwork-based method to generate different parameters to separately model contexts with different expression styles. Experimental results on three different datasets show that our approach achieves better performance than state-of-the-art methods in most cases. @@ -3505,8 +3505,8 @@ BishalSantra Sasi PrasanthBandaru GauravSahu - Vishnu DuttSharma - PavankumarSatuluri + Vishnu DuttSharma + PavankumarSatuluri PawanGoyal 2550–2561 D18-1276 @@ -3544,7 +3544,7 @@ YingweiXin EthanHart VibhutiMahajan - Jean-DavidRuvini + Jean-DavidRuvini 2584–2593 D18-1279 Character-based neural models have recently proven very useful for many NLP tasks. However, there is a gap of sophistication between methods for learning representations of sentences and words. While, most character models for learning representations of sentences are deep and complex, models for learning representations of words are shallow and simple. Also, in spite of considerable research on learning character embeddings, it is still not clear which kind of architecture is the best for capturing character-to-word representations. To address these questions, we first investigate the gaps between methods for learning word and sentence representations. We conduct detailed experiments and comparisons on different state-of-the-art convolutional models, and also investigate the advantages and disadvantages of their constituents. Furthermore, we propose IntNet, a funnel-shaped wide convolutional neural architecture with no down-sampling for learning representations of the internal structure of words by composing their characters from limited, supervised training corpora. We evaluate our proposed model on six sequence labeling datasets, including named entity recognition, part-of-speech tagging, and syntactic chunking. Our in-depth analysis shows that IntNet significantly outperforms other character embedding models and obtains new state-of-the-art performance without relying on any external knowledge or resources. @@ -3555,7 +3555,7 @@ <fixed-case>ICON</fixed-case>: Interactive Conversational Memory Network for Multimodal Emotion Detection DevamanyuHazarika SoujanyaPoria - RadaMihalcea + RadaMihalcea ErikCambria RogerZimmermann 2594–2604 @@ -3589,8 +3589,8 @@ Commonsense Justification for Action Explanation ShaohuaYang QiaoziGao - SariSadiya - JoyceChai + SariSadiya + JoyceChai 2627–2637 D18-1283 To enable collaboration and communication between humans and agents, this paper investigates learning to acquire commonsense evidence for action justification. In particular, we have developed an approach based on the generative Conditional Variational Autoencoder(CVAE) that models object relations/attributes of the world as latent variables and jointly learns a performer that predicts actions and an explainer that gathers commonsense evidence to justify the action. Our empirical results have shown that, compared to a typical attention-based model, CVAE achieves significantly higher performance in both action prediction and justification. A human subject study further shows that the commonsense evidence gathered by CVAE can be communicated to humans to achieve a significantly higher common ground between humans and agents. @@ -3601,7 +3601,7 @@ Learning Personas from Dialogue with Attentive Memory Networks EricChu PrashanthVijayaraghavan - DebRoy + DebRoy 2638–2646 D18-1284 D18-1284.Attachment.zip @@ -3640,8 +3640,8 @@ Mapping Instructions to Actions in 3<fixed-case>D</fixed-case> Environments with Visual Goal Prediction - DipendraMisra - AndrewBennett + DipendraMisra + AndrewBennett ValtsBlukis EyvindNiklasson MaxShatkhin @@ -3704,7 +3704,7 @@ Depth-bounding is effective: Improvements and evaluation of unsupervised <fixed-case>PCFG</fixed-case> induction LifengJin FinaleDoshi-Velez - TimothyMiller + TimothyMiller WilliamSchuler LaneSchwartz 2721–2731 @@ -3752,7 +3752,7 @@ WayneXiong LingfengWu JunZhang - AndreasStolcke + AndreasStolcke 2764–2768 D18-1296 We propose to generalize language models for conversational speech recognition to allow them to operate across utterance boundaries and speaker changes, thereby capturing conversation-level phenomena such as adjacency pairs, lexical entrainment, and topical coherence. The model consists of a long-short-term memory (LSTM) recurrent network that reads the entire word-level history of a conversation, as well as information about turn taking and speaker overlap, in order to predict each next word. The model is applied in a rescoring framework, where the word history prior to the current utterance is approximated with preliminary recognition results. In experiments in the conversational telephone speech domain (Switchboard) we find that such a model gives substantial perplexity reductions over a standard LSTM-LM with utterance scope, as well as improvements in word error rate. @@ -3777,7 +3777,7 @@ Training Millions of Personalized Dialogue Agents - Pierre-EmmanuelMazaré + Pierre-EmmanuelMazaré SamuelHumeau MartinRaison AntoineBordes @@ -3905,11 +3905,11 @@ Structured Multi-Label Biomedical Text Tagging via Attentive Neural Tree Decoding - GauravSingh + GauravSingh JamesThomas - IainMarshall + IainMarshall JohnShawe-Taylor - Byron C.Wallace + Byron C.Wallace 2837–2842 D18-1308 D18-1308.Attachment.pdf @@ -3933,7 +3933,7 @@ Evaluating the Utility of Hand-crafted Features in Sequence Labelling MinghaoWu FeiLiu - TrevorCohn + TrevorCohn 2850–2856 D18-1310 Conventional wisdom is that hand-crafted features are redundant for deep learning models, as they already learn adequate representations of text automatically from corpora. In this work, we test this claim by proposing a new method for exploiting handcrafted features as part of a novel hybrid learning approach, incorporating a feature auto-encoder loss component. We evaluate on the task of named entity recognition (NER), where we show that including manual features for part-of-speech, word shapes and gazetteers can improve the performance of a neural CRF model. We obtain a F 1 of 91.89 for the CoNLL-2003 English shared task, which significantly outperforms a collection of highly competitive baseline models. We also present an ablation study showing the importance of auto-encoding, over using features as either inputs or outputs alone, and moreover, show including the autoencoder components reduces training requirements to 60%, while retaining the same predictive accuracy. @@ -3944,8 +3944,8 @@ Improved Dependency Parsing using Implicit Word Connections Learned from Unlabeled Data WenhuiWang - BaobaoChang - MairgupMansur + BaobaoChang + MairgupMansur 2857–2863 D18-1311 Pre-trained word embeddings and language model have been shown useful in a lot of tasks. However, both of them cannot directly capture word connections in a sentence, which is important for dependency parsing given its goal is to establish dependency relations between words. In this paper, we propose to implicitly capture word connections from unlabeled data by a word ordering model with self-attention mechanism. Experiments show that these implicit word connections do improve our parsing model. Furthermore, by combining with a pre-trained language model, our model gets state-of-the-art performance on the English PTB dataset, achieving 96.35% UAS and 95.25% LAS. @@ -3989,7 +3989,7 @@ An Encoder-Decoder Approach to the Paradigm Cell Filling Problem - MiikkaSilfverberg + MiikkaSilfverberg MansHulden 2883–2889 D18-1315 @@ -4029,7 +4029,7 @@ Deep <fixed-case>B</fixed-case>ayesian Active Learning for Natural Language Processing: Results of a Large-Scale Empirical Study AdityaSiddhant - Zachary C.Lipton + Zachary C.Lipton 2904–2909 D18-1318 Several recent papers investigate Active Learning (AL) for mitigating the data dependence of deep learning for natural language processing. However, the applicability of AL to real-world problems remains an open question. While in supervised learning, practitioners can try many different methods, evaluating each against a validation set before selecting a model, AL affords no such luxury. Over the course of one AL run, an agent annotates its dataset exhausting its labeling budget. Thus, given a new task, we have no opportunity to compare models and acquisition functions. This paper provides a large-scale empirical study of deep active learning, addressing multiple tasks and, for each, multiple datasets, multiple models, and a full suite of acquisition functions. We find that across all settings, Bayesian active learning by disagreement, using uncertainty estimates provided either by Dropout or Bayes-by-Backprop significantly improves over i.i.d. baselines and usually outperforms classic uncertainty sampling. @@ -4052,7 +4052,7 @@ Multimodal neural pronunciation modeling for spoken languages with logographic origin MinhNguyen Gia H.Ngo - NancyChen + NancyChen 2916–2922 D18-1320 D18-1320.Attachment.zip @@ -4085,7 +4085,7 @@ How to represent a word and predict it, too: Improving tied architectures for language modelling KristinaGulordava LauraAina - GemmaBoleda + GemmaBoleda 2936–2941 D18-1323 D18-1323.Attachment.pdf @@ -4107,10 +4107,10 @@ Document-Level Neural Machine Translation with Hierarchical Attention Networks - LeslyMiculicich + LeslyMiculicich DhananjayRam NikolaosPappas - JamesHenderson + JamesHenderson 2947–2954 D18-1325 D18-1325.Attachment.pdf @@ -4124,7 +4124,7 @@ JiajunZhang FeifeiZhai JingfangXu - ChengqingZong + ChengqingZong 2955–2960 D18-1326 Due to the benefits of model compactness, multilingual translation (including many-to-one, many-to-many and one-to-many) based on a universal encoder-decoder architecture attracts more and more attention. However, previous studies show that one-to-many translation based on this framework cannot perform on par with the individually trained models. In this work, we introduce three strategies to improve one-to-many multilingual translation by balancing the shared and unique features. Within the architecture of one decoder for all target languages, we first exploit the use of unique initial states for different target languages. Then, we employ language-dependent positional embeddings. Finally and especially, we propose to divide the hidden cells of the decoder into shared and language-dependent ones. The extensive experiments demonstrate that our proposed methods can obtain remarkable improvements over the strong baselines. Moreover, our strategies can achieve comparable or even better performance than the individually trained translation models. @@ -4143,8 +4143,8 @@ Fixing Translation Divergences in Parallel Corpora for Neural <fixed-case>MT</fixed-case> - MinhQuangPham - JosepCrego + MinhQuangPham + JosepCrego JeanSenellart FrançoisYvon 2967–2973 @@ -4166,9 +4166,9 @@ Loss in Translation: Learning Bilingual Word Mapping with a Retrieval Criterion ArmandJoulin PiotrBojanowski - TomasMikolov + TomasMikolov HervéJégou - EdouardGrave + EdouardGrave 2979–2984 D18-1330 D18-1330.Attachment.zip @@ -4228,7 +4228,7 @@ Towards Two-Dimensional Sequence to Sequence Model in Neural Machine Translation ParniaBahar ChristopherBrix - HermannNey + HermannNey 3009–3015 D18-1335 This work investigates an alternative model for neural machine translation (NMT) and proposes a novel architecture, where we employ a multi-dimensional long short-term memory (MDLSTM) for translation modelling. In the state-of-the-art methods, source and target sentences are treated as one-dimensional sequences over time, while we view translation as a two-dimensional (2D) mapping using an MDLSTM layer to define the correspondence between source and target words. We extend beyond the current sequence to sequence backbone NMT models to a 2D structure in which the source and target sentences are aligned with each other in a 2D grid. Our proposed topology shows consistent improvements over attention-based sequence to sequence model on two WMT 2017 tasks, German<->English. @@ -4238,7 +4238,7 @@ End-to-End Non-Autoregressive Neural Machine Translation with Connectionist Temporal Classification JindřichLibovický - JindřichHelcl + JindřichHelcl 3016–3021 D18-1336 Autoregressive decoding is the only part of sequence-to-sequence models that prevents them from massive parallelization at inference time. Non-autoregressive models enable the decoder to generate all output symbols independently in parallel. We present a novel non-autoregressive architecture based on connectionist temporal classification and evaluate it on the task of neural machine translation. Unlike other non-autoregressive methods which operate in several steps, our model can be trained end-to-end. We conduct experiments on the WMT English-Romanian and English-German datasets. Our models achieve a significant speedup over the autoregressive models, keeping the translation quality comparable to other non-autoregressive models. @@ -4260,7 +4260,7 @@ Training Deeper Neural Machine Translation Models with Transparent Attention AnkurBapna - MiaChen + MiaChen OrhanFirat YuanCao YonghuiWu @@ -4282,8 +4282,8 @@ Encoding Gated Translation Memory into Neural Machine Translation - QianCao - DeyiXiong + QianCao + DeyiXiong 3042–3047 D18-1340 Translation memories (TM) facilitate human translators to reuse existing repetitive translation fragments. In this paper, we propose a novel method to combine the strengths of both TM and neural machine translation (NMT) for high-quality translation. We treat the target translation of a TM match as an additional reference input and encode it into NMT with an extra encoder. A gating mechanism is further used to balance the impact of the TM match on the NMT decoder. Experiment results on the UN corpus demonstrate that when fuzzy matches are higher than 50%, the quality of NMT translation can be significantly improved by over 10 BLEU points. @@ -4292,8 +4292,8 @@ Automatic Post-Editing of Machine Translation: A Neural Programmer-Interpreter Approach - Thuy-TrangVu - GholamrezaHaffari + Thuy-TrangVu + GholamrezaHaffari 3048–3053 D18-1341 Automated Post-Editing (PE) is the task of automatically correct common and repetitive errors found in machine translation (MT) output. In this paper, we present a neural programmer-interpreter approach to this task, resembling the way that human perform post-editing using discrete edit operations, wich we refer to as programs. Our model outperforms previous neural models for inducing PE programs on the WMT17 APE task for German-English up to +1 BLEU score and -0.7 TER scores. @@ -4314,7 +4314,7 @@ Multi-Multi-View Learning: Multilingual and Multi-Representation Entity Typing YadollahYaghoobzadeh - HinrichSchütze + HinrichSchütze 3060–3066 D18-1343 D18-1343.Attachment.zip @@ -4372,7 +4372,7 @@ ChenweiZhang XiaohuiYan YiChang - PhilipYu + PhilipYu 3090–3099 D18-1348 User intent detection plays a critical role in question-answering and dialog systems. Most previous works treat intent detection as a classification problem where utterances are labeled with predefined intents. However, it is labor-intensive and time-consuming to label users’ utterances as intents are diversely expressed and novel intents will continually be involved. Instead, we study the zero-shot intent detection problem, which aims to detect emerging user intents where no labeled utterances are currently available. We propose two capsule-based architectures: IntentCapsNet that extracts semantic features from utterances and aggregates them to discriminate existing intents, and IntentCapsNet-ZSL which gives IntentCapsNet the zero-shot learning ability to discriminate emerging intents via knowledge transfer from existing intents. Experiments on two real-world datasets show that our model not only can better discriminate diversely expressed existing intents, but is also able to discriminate emerging intents when no labeled utterances are available. @@ -4477,8 +4477,8 @@ Learning Neural Templates for Text Generation SamWiseman - StuartShieber - AlexanderRush + StuartShieber + AlexanderRush 3174–3187 D18-1356 While neural, encoder-decoder models have had significant empirical success in text generation, there remain several unaddressed problems with this style of generation. Encoder-decoder models are largely (a) uninterpretable, and (b) difficult to control in terms of their phrasing or content. This work proposes a neural generation system using a hidden semi-markov model (HSMM) decoder, which learns latent, discrete templates jointly with learning to generate. We show that this model learns useful templates, and that these templates make generation both more interpretable and controllable. Furthermore, we show that this approach scales to real data sets and achieves strong performance nearing that of encoder-decoder text generation models. @@ -4529,7 +4529,7 @@ Multi-Task Identification of Entities, Relations, and Coreference for Scientific Knowledge Graph Construction YiLuan LuhengHe - MariOstendorf + MariOstendorf HannanehHajishirzi 3219–3232 D18-1360 @@ -4557,7 +4557,7 @@ Multi-Hop Knowledge Graph Reasoning with Reward Shaping - Xi VictoriaLin + Xi VictoriaLin RichardSocher CaimingXiong 3243–3253 @@ -4570,8 +4570,8 @@ Neural Transductive Learning and Beyond: Morphological Generation in the Minimal-Resource Setting - KatharinaKann - HinrichSchütze + KatharinaKann + HinrichSchütze 3254–3264 D18-1363 D18-1363.Attachment.zip @@ -4609,10 +4609,10 @@ Adapting Word Embeddings to New Languages with Morphological and Phonological Subword Representations AditiChaudhary ChuntingZhou - LoriLevin + LoriLevin GrahamNeubig - David R.Mortensen - JaimeCarbonell + David R.Mortensen + JaimeCarbonell 3285–3295 D18-1366 D18-1366.Attachment.zip @@ -4625,7 +4625,7 @@ A Computational Exploration of Exaggeration EnricaTroiano CarloStrapparava - GözdeÖzbal + GözdeÖzbal Serra SinemTekiroğlu 3296–3304 D18-1367 @@ -4646,7 +4646,7 @@ Hierarchical <fixed-case>D</fixed-case>irichlet <fixed-case>G</fixed-case>aussian Marked <fixed-case>H</fixed-case>awkes Process for Narrative Reconstruction in Continuous Time Domain YeonSeonwoo - AliceOh + AliceOh SungjoonPark 3316–3325 D18-1369 @@ -4658,7 +4658,7 @@ Investigating the Role of Argumentation in the Rhetorical Analysis of Scientific Publications with Neural Multi-Task Learning Models AnneLauscher GoranGlavaš - Simone PaoloPonzetto + Simone PaoloPonzetto KaiEckert 3326–3338 D18-1370 @@ -4681,7 +4681,7 @@ Causal Explanation Analysis on Social Media YoungseoSon NipunBayas - H. AndrewSchwartz + H. AndrewSchwartz 3350–3359 D18-1372 D18-1372.Attachment.zip @@ -4715,8 +4715,8 @@ A Genre-Aware Attention Model to Improve the Likability Prediction of Books SurajMaharjan - ManuelMontes - Fabio A.González + ManuelMontes + Fabio A.González ThamarSolorio 3381–3391 D18-1375 @@ -4739,7 +4739,7 @@ <fixed-case>IARM</fixed-case>: Inter-Aspect Relation Modeling with Memory Networks in Aspect-Based Sentiment Analysis NavonilMajumder SoujanyaPoria - AlexanderGelbukh + AlexanderGelbukh Md. ShadAkhtar ErikCambria AsifEkbal @@ -4752,7 +4752,7 @@ <fixed-case>L</fixed-case>imbic: Author-Based Sentiment Aspect Modeling Regularized with Word Embeddings and Discourse Relations ZheZhang - MunindarSingh + MunindarSingh 3412–3422 D18-1378 We propose Limbic, an unsupervised probabilistic model that addresses the problem of discovering aspects and sentiments and associating them with authors of opinionated texts. Limbic combines three ideas, incorporating authors, discourse relations, and word embeddings. For discourse relations, Limbic adopts a generative process regularized by a Markov Random Field. To promote words with high semantic similarity into the same topic, Limbic captures semantic regularities from word embeddings via a generalized Pólya Urn process. We demonstrate that Limbic (1) discovers aspects associated with sentiments with high lexical diversity; (2) outperforms state-of-the-art models by a substantial margin in topic cohesion and sentiment classification. @@ -4784,7 +4784,7 @@ Attentive Gated Lexicon Reader with Contrastive Contextual Co-Attention for Sentiment Classification YiTay - Anh TuanLuu + Anh TuanLuu Siu CheungHui JianSu 3443–3453 @@ -4800,7 +4800,7 @@ DushyantChauhan SoujanyaPoria AsifEkbal - PushpakBhattacharyya + PushpakBhattacharyya 3454–3466 D18-1382 Multi-modal sentiment analysis offers various challenges, one being the effective combination of different input modalities, namely text, visual and acoustic. In this paper, we propose a recurrent neural network based multi-modal attention framework that leverages the contextual information for utterance-level sentiment prediction. The proposed approach applies attention on multi-modal multi-utterance representations and tries to learn the contributing features amongst them. We evaluate our proposed approach on two multi-modal sentiment analysis benchmark datasets, viz. CMU Multi-modal Opinion-level Sentiment Intensity (CMU-MOSI) corpus and the recently released CMU Multi-modal Opinion Sentiment and Emotion Intensity (CMU-MOSEI) corpus. Evaluation results show the effectiveness of our proposed approach with the accuracies of 82.31% and 79.80% for the MOSI and MOSEI datasets, respectively. These are approximately 2 and 1 points performance improvement over the state-of-the-art models for the datasets. @@ -4824,10 +4824,10 @@ <fixed-case>E</fixed-case>xt<fixed-case>RA</fixed-case>: Extracting Prominent Review Aspects from Customer Feedback ZhiyiLuo ShanshanHuang - Frank F.Xu - Bill YuchenLin + Frank F.Xu + Bill YuchenLin HanyuanShi - KennyZhu + KennyZhu 3477–3486 D18-1384 Many existing systems for analyzing and summarizing customer reviews about products or service are based on a number of prominent review aspects. Conventionally, the prominent review aspects of a product type are determined manually. This costly approach cannot scale to large and cross-domain services such as Amazon.com, Taobao.com or Yelp.com where there are a large number of product types and new products emerge almost every day. In this paper, we propose a novel framework, for extracting the most prominent aspects of a given product type from textual reviews. The proposed framework, ExtRA, extracts K most prominent aspect terms or phrases which do not overlap semantically automatically without supervision. Extensive experiments show that ExtRA is effective and achieves the state-of-the-art performance on a dataset consisting of different product types. @@ -4870,7 +4870,7 @@ Multi-view Models for Political Ideology Detection of News Articles VivekKulkarni JuntingYe - SteveSkiena + SteveSkiena William YangWang 3518–3527 D18-1388 @@ -4883,8 +4883,8 @@ RamyBaly GeorgiKaradzhov DimitarAlexandrov - JamesGlass - PreslavNakov + JamesGlass + PreslavNakov 3528–3539 D18-1389 We present a study on predicting the factuality of reporting and bias of news media. While previous work has focused on studying the veracity of claims or documents, here we are interested in characterizing entire news media. This is an under-studied, but arguably important research problem, both in its own right and as a prior for fact-checking systems. We experiment with a large list of news websites and with a rich set of features derived from (i) a sample of articles from the target news media, (ii) its Wikipedia page, (iii) its Twitter account, (iv) the structure of its URL, and (v) information about the Web traffic it attracts. The experimental results show sizable performance gains over the baseline, and reveal the importance of each feature type. @@ -4920,7 +4920,7 @@ Residualized Factor Adaptation for Community Social Media Prediction Tasks MohammadzamanZamani - H. AndrewSchwartz + H. AndrewSchwartz VeronicaLynn SalvatoreGiorgi NiranjanBalasubramanian @@ -4936,7 +4936,7 @@ DoronKliger ShulyWintner JenniferPan - DanJurafsky + DanJurafsky YuliaTsvetkov 3570–3580 D18-1393 @@ -5005,7 +5005,7 @@ JiataoGu YongWang YunChen - Victor O. K.Li + Victor O. K.Li KyunghyunCho 3622–3631 D18-1398 @@ -5017,8 +5017,8 @@ Unsupervised Statistical Machine Translation MikelArtetxe - GorkaLabaka - EnekoAgirre + GorkaLabaka + EnekoAgirre 3632–3642 D18-1399 While modern machine translation has relied on large parallel corpora, a recent line of work has managed to train Neural Machine Translation (NMT) systems from monolingual corpora only (Artetxe et al., 2018c; Lample et al., 2018). Despite the potential of this approach for low-resource settings, existing systems are far behind their supervised counterparts, limiting their practical interest. In this paper, we propose an alternative approach based on phrase-based Statistical Machine Translation (SMT) that significantly closes the gap with supervised systems. Our method profits from the modular architecture of SMT: we first induce a phrase table from monolingual corpora through cross-lingual embedding mappings, combine it with an n-gram language model, and fine-tune hyperparameters through an unsupervised MERT variant. In addition, iterative backtranslation improves results further, yielding, for instance, 14.08 and 26.22 BLEU points in WMT 2014 English-German and English-French, respectively, an improvement of more than 7-10 BLEU points over previous unsupervised systems, and closing the gap with supervised SMT (Moses trained on Europarl) down to 2-5 BLEU points. Our implementation is available at https://github.com/artetxem/monoses. @@ -5050,7 +5050,7 @@ XiaozhongLiu LuoSi MinZhang - GuodongZhou + GuodongZhou 3654–3663 D18-1401 In an e-commerce environment, user-oriented question-answering (QA) text pair could carry rich sentiment information. In this study, we propose a novel task/method to address QA sentiment analysis. In particular, we create a high-quality annotated corpus with specially-designed annotation guidelines for QA-style sentiment classification. On the basis, we propose a three-stage hierarchical matching network to explore deep sentiment information in a QA text pair. First, we segment both the question and answer text into sentences and construct a number of [Q-sentence, A-sentence] units in each QA text pair. Then, by leveraging a QA bidirectional matching layer, the proposed approach can learn the matching vectors of each [Q-sentence, A-sentence] unit. Finally, we characterize the importance of the generated matching vectors via a self-matching attention layer. Experimental results, comparing with a number of state-of-the-art baselines, demonstrate the impressive effectiveness of the proposed approach for QA-style sentiment classification. @@ -5103,7 +5103,7 @@ Noise Contrastive Estimation and Negative Sampling for Conditional Models: Consistency and Statistical Efficiency ZhuangMa - MichaelCollins + MichaelCollins 3698–3707 D18-1405 D18-1405.Attachment.pdf @@ -5160,7 +5160,7 @@ YikangShen EricCrawford Herkevan Hoof - Jackie Chi KitCheung + Jackie Chi KitCheung 3739–3748 D18-1409 D18-1409.Attachment.pdf @@ -5183,10 +5183,10 @@ Learning Latent Semantic Annotations for Grounding Natural Language to Structured Data GuanghuiQin - Jin-GeYao + Jin-GeYao XueningWang JinpengWang - Chin-YewLin + Chin-YewLin 3761–3771 D18-1411 D18-1411.Attachment.pdf @@ -5200,9 +5200,9 @@ SwabhaSwayamdipta SamThomson KentonLee - LukeZettlemoyer - ChrisDyer - Noah A.Smith + LukeZettlemoyer + ChrisDyer + Noah A.Smith 3772–3782 D18-1412 D18-1412.Attachment.zip @@ -5216,7 +5216,7 @@ NoahWeber LeenaShekhar NiranjanBalasubramanian - NathanaelChambers + NathanaelChambers 3783–3792 D18-1413 Scripts define knowledge about how everyday scenarios (such as going to a restaurant) are expected to unfold. One of the challenges to learning scripts is the hierarchical nature of the knowledge. For example, a suspect arrested might plead innocent or guilty, and a very different track of events is then expected to happen. To capture this type of information, we propose an autoencoder model with a latent space defined by a hierarchy of categorical variables. We utilize a recently proposed vector quantization based approach, which allows continuous embeddings to be associated with each latent variable value. This permits the decoder to softly decide what portions of the latent hierarchy to condition on by attending over the value embeddings for a given setting. Our model effectively encodes and generates scripts, outperforming a recent language modeling-based method on several standard tasks, and allowing the autoencoder model to achieve substantially lower perplexity scores compared to the previous language modeling-based method. @@ -5227,9 +5227,9 @@ Semantic Role Labeling for Learner <fixed-case>C</fixed-case>hinese: the Importance of Syntactic Parsing and <fixed-case>L</fixed-case>2-<fixed-case>L</fixed-case>1 Parallel Data ZiLin - YuguangDuan + YuguangDuan YuanyuanZhao - WeiweiSun + WeiweiSun XiaojunWan 3793–3802 D18-1414 @@ -5243,8 +5243,8 @@ WeikangWang JiajunZhang HanZhang - Mei-YuhHwang - ChengqingZong + Mei-YuhHwang + ChengqingZong ZhifeiLi 3803–3812 D18-1415 @@ -5292,7 +5292,7 @@ <fixed-case>A</fixed-case>ir<fixed-case>D</fixed-case>ialogue: An Environment for Goal-Oriented Dialogue Research WeiWei - QuocLe + QuocLe AndrewDai JiaLi 3844–3854 @@ -5331,9 +5331,9 @@ Operation-guided Neural Networks for High Fidelity Data-To-Text Generation FengNie JinpengWang - Jin-GeYao + Jin-GeYao RongPan - Chin-YewLin + Chin-YewLin 3879–3889 D18-1422 Recent neural models for data-to-text generation are mostly based on data-driven end-to-end training over encoder-decoder networks. Even though the generated texts are mostly fluent and informative, they often generate descriptions that are not consistent with the input structured data. This is a critical issue especially in domains that require inference or calculations over raw data. In this paper, we attempt to improve the fidelity of neural data-to-text generation by utilizing pre-executed symbolic operations. We propose a framework called Operation-guided Attention-based sequence-to-sequence network (OpAtt), with a specifically designed gating mechanism as well as a quantization module for operation results to utilize information from pre-executed operations. Experiments on two sports datasets show our proposed method clearly improves the fidelity of the generated texts to the input structured data. @@ -5380,7 +5380,7 @@ QingningYao ShanelleRoman ZilinZhang - DragomirRadev + DragomirRadev 3911–3921 D18-1425 D18-1425.Attachment.zip @@ -5428,7 +5428,7 @@ Towards a Better Metric for Evaluating Question Generation Systems PrekshaNema - Mitesh M.Khapra + Mitesh M.Khapra 3950–3959 D18-1429 There has always been criticism for using n-gram based similarity metrics, such as BLEU, NIST, etc, for evaluating the performance of NLG systems. However, these metrics continue to remain popular and are recently being used for evaluating the performance of systems which automatically generate questions from documents, knowledge graphs, images, etc. Given the rising interest in such automatic question generation (AQG) systems, it is important to objectively examine whether these metrics are suitable for this task. In particular, it is important to verify whether such metrics used for evaluating AQG systems focus on answerability of the generated question by preferring questions which contain all relevant information such as question type (Wh-types), entities, relations, etc. In this work, we show that current automatic evaluation metrics based on n-gram similarity do not always correlate well with human judgments about answerability of a question. To alleviate this problem and as a first step towards better evaluation metrics for AQG, we introduce a scoring function to capture answerability and show that when this scoring function is integrated with existing metrics, they correlate significantly better with human judgments. The scripts and data developed as a part of this work are made publicly available. @@ -5453,7 +5453,7 @@ AshutoshBaheti AlanRitter JiweiLi - BillDolan + BillDolan 3970–3980 D18-1431 Neural conversation models tend to generate safe, generic responses for most inputs. This is due to the limitations of likelihood-based decoding objectives in generation tasks with diverse outputs, such as conversation. To address this challenge, we propose a simple yet effective approach for incorporating side information in the form of distributional constraints over the generated responses. We propose two constraints that help generate more content rich responses that are based on a model of syntax and topics (Griffiths et al., 2005) and semantic similarity (Arora et al., 2016). We evaluate our approach against a variety of competitive baselines, using both automatic metrics and human judgments, showing that our proposed approach generates responses that are much less generic without sacrificing plausibility. A working demo of our code can be found at https://github.com/abaheti95/DC-NeuralConversation. @@ -5478,8 +5478,8 @@ SpencerWhitehead HengJi MohitBansal - Shih-FuChang - ClareVoss + Shih-FuChang + ClareVoss 3992–4001 D18-1433 Most previous efforts toward video captioning focus on generating generic descriptions, such as, “A man is talking.” We collect a news video dataset to generate enriched descriptions that include important background knowledge, such as named entities and related events, which allows the user to fully understand the video content. We develop an approach that uses video meta-data to retrieve topically related news documents for a video and extracts the events and named entities from these documents. Then, given the video as well as the extracted events and entities, we generate a description using a Knowledge-aware Video Description network. The model learns to incorporate entities found in the topically related documents into the description via an entity pointer network and the generation procedure is guided by the event and entity types from the topically related documents through a knowledge gate, which is a gating mechanism added to the model’s decoder that takes a one-hot vector of these types. We evaluate our approach on the new dataset of news videos we have collected, establishing the first benchmark for this dataset as well as proposing a new metric to evaluate these descriptions. @@ -5505,7 +5505,7 @@ SpencerWhitehead LifuHuang HengJi - Shih-FuChang + Shih-FuChang 4013–4023 D18-1435 Current image captioning approaches generate descriptions which lack specific information, such as named entities that are involved in the images. In this paper we propose a new task which aims to generate informative image captions, given images and hashtags as input. We propose a simple but effective approach to tackle this problem. We first train a convolutional neural networks - long short term memory networks (CNN-LSTM) model to generate a template caption based on the input image. Then we use a knowledge graph based collective inference algorithm to fill in the template with specific named entities retrieved via the hashtags. Experiments on a new benchmark dataset collected from Flickr show that our model generates news-style image descriptions with much richer information. Our model outperforms unimodal baselines significantly with various evaluation metrics. @@ -5600,7 +5600,7 @@ Bottom-Up Abstractive Summarization SebastianGehrmann YuntianDeng - AlexanderRush + AlexanderRush 4098–4109 D18-1443 Neural summarization produces outputs that are fluent and readable, but which can be poor at content selection, for instance often copying full sentences from the source document. This work explores the use of data-efficient content selectors to over-determine phrases in a source document that should be part of the summary. We use this selector as a bottom-up attention step to constrain the model to likely phrases. We show that this approach improves the ability to compress text, while still generating fluent summaries. This two-step process is both simpler and higher performing than other end-to-end content selection models, leading to significant improvements on ROUGE for both the CNN-DM and NYT corpus. Furthermore, the content selector can be trained with as little as 1,000 sentences making it easy to transfer a trained summarizer to a new domain. @@ -5611,7 +5611,7 @@ Controlling Length in Abstractive Summarization Using a Convolutional Neural Network YizhuLiu ZhiyiLuo - KennyZhu + KennyZhu 4110–4119 D18-1444 Convolutional neural networks (CNNs) have met great success in abstractive summarization, but they cannot effectively generate summaries of desired lengths. Because generated summaries are used in difference scenarios which may have space or length constraints, the ability to control the summary length in abstractive summarization is an important problem. In this paper, we propose an approach to constrain the summary length by extending a convolutional sequence to sequence model. The results show that this approach generates high-quality summaries with user defined length, and outperforms the baselines consistently in terms of ROUGE score, length variations and semantic similarity. @@ -5658,7 +5658,7 @@ TianshangLiu YuZhou JiajunZhang - ChengqingZong + ChengqingZong 4154–4164 D18-1448 D18-1448.Attachment.zip @@ -5701,9 +5701,9 @@ Joint Multitask Learning for Community Question Answering Using Task-Specific Embeddings - ShafiqJoty - LluísMàrquez - PreslavNakov + ShafiqJoty + LluísMàrquez + PreslavNakov 4196–4207 D18-1452 We address jointly two important tasks for Question Answering in community forums: given a new question, (i) find related existing questions, and (ii) find relevant answers to this new question. We further use an auxiliary task to complement the previous two, i.e., (iii) find good answers with respect to the thread question in a question-comment thread. We use deep neural networks (DNNs) to learn meaningful task-specific embeddings, which we then incorporate into a conditional random field (CRF) model for the multitask setting, performing joint learning over a complex graph structure. While DNNs alone achieve competitive results when trained to produce the embeddings, the CRF, which makes use of the embeddings and the dependencies between the tasks, improves the results significantly and consistently across a variety of evaluation metrics, thus showing the complementarity of DNNs and structured learning. @@ -5716,7 +5716,7 @@ SakuSugawara KentaroInui SatoshiSekine - AkikoAizawa + AkikoAizawa 4208–4219 D18-1453 A challenge in creating a dataset for machine reading comprehension (MRC) is to collect questions that require a sophisticated understanding of language to answer beyond using superficial cues. In this work, we investigate what makes questions easier across recent 12 MRC datasets with three question styles (answer extraction, description, and multiple choice). We propose to employ simple heuristics to split each dataset into easy and hard subsets and examine the performance of two baseline models for each of the subsets. We then manually annotate questions sampled from each subset with both validity and requisite reasoning skills to investigate which skills explain the difference between easy and hard questions. From this study, we observed that (i) the baseline performances for the hard subsets remarkably degrade compared to those of entire datasets, (ii) hard questions require knowledge inference and multiple-sentence reasoning in comparison with easy questions, and (iii) multiple-choice questions tend to require a broader range of reasoning skills than answer extraction and description questions. These results suggest that one might overestimate recent advances in MRC. @@ -5744,7 +5744,7 @@ ManzilZaheer KathrynMazaitis RuslanSalakhutdinov - WilliamCohen + WilliamCohen 4231–4242 D18-1455 Open Domain Question Answering (QA) is evolving from complex pipelined systems to end-to-end deep neural networks. Specialized neural models have been developed for extracting answers from either text alone or Knowledge Bases (KBs) alone. In this paper we look at a more practical setting, namely QA over the combination of a KB and entity-linked text, which is appropriate when an incomplete KB is available with a large text corpus. Building on recent advances in graph representation learning we propose a novel model, GRAFT-Net, for extracting answers from a question-specific subgraph containing text and KB entities and relations. We construct a suite of benchmark tasks for this problem, varying the difficulty of questions, the amount of training data, and KB completeness. We show that GRAFT-Net is competitive with the state-of-the-art when tested using either KBs or text alone, and vastly outperforms existing methods in the combined setting. @@ -5781,7 +5781,7 @@ Why Self-Attention? A Targeted Evaluation of Neural Machine Translation Architectures GongboTang MathiasMüller - AnnetteRios + AnnetteRios RicoSennrich 4263–4272 D18-1458 @@ -5793,7 +5793,7 @@ Simplifying Neural Machine Translation with Addition-Subtraction Twin-Gated Recurrent Networks BiaoZhang - DeyiXiong + DeyiXiong JinsongSu QianLin HuijiZhang @@ -5891,7 +5891,7 @@ Getting to “Hearer-old”: Charting Referring Expressions Across Time IevaStaliūnaitė HannahRohde - BonnieWebber + BonnieWebber AnnieLouis 4350–4359 D18-1466 @@ -5949,7 +5949,7 @@ Why Swear? Analyzing and Inferring the Intentions of Vulgar Expressions EricHolgate IsabelCachola - DanielPreoţiuc-Pietro + DanielPreoţiuc-Pietro Junyi JessyLi 4405–4414 D18-1471 @@ -5985,7 +5985,7 @@ Speed Reading: Learning to Read <fixed-case>F</fixed-case>or<fixed-case>B</fixed-case>ackward via Shuttle Tsu-JuiFu - Wei-YunMa + Wei-YunMa 4439–4448 D18-1474 We present LSTM-Shuttle, which applies human speed reading techniques to natural language processing tasks for accurate and efficient comprehension. In contrast to previous work, LSTM-Shuttle not only reads shuttling forward but also goes back. Shuttling forward enables high efficiency, and going backward gives the model a chance to recover lost information, ensuring better prediction. We evaluate LSTM-Shuttle on sentiment analysis, news classification, and cloze on IMDB, Rotten Tomatoes, AG, and Children’s Book Test datasets. We show that LSTM-Shuttle predicts both better and more quickly. To demonstrate how LSTM-Shuttle actually behaves, we also analyze the shuttling operation and present a case study. @@ -6025,7 +6025,7 @@ Simple Recurrent Units for Highly Parallelizable Recurrence TaoLei YuZhang - Sida I.Wang + Sida I.Wang HuiDai YoavArtzi 4470–4481 @@ -6054,7 +6054,7 @@ Co-Stack Residual Affinity Networks with Multi-level Attention Refinement for Matching Text Sequences YiTay - Anh TuanLuu + Anh TuanLuu Siu CheungHui 4492–4502 D18-1479 @@ -6076,7 +6076,7 @@ Learning Universal Sentence Representations with Mean-Max Attention Autoencoder MinghuaZhang YunfangWu - WeikangLi + WeikangLi WeiLi 4514–4523 D18-1481 @@ -6093,7 +6093,7 @@ AvinashBalakrishnan Pin-YuChen PradeepRavikumar - Michael J.Witbrock + Michael J.Witbrock 4524–4534 D18-1482 D18-1482.Attachment.zip @@ -6104,9 +6104,9 @@ Multilingual Clustering of Streaming News SebastiãoMiranda - ArtūrsZnotiņš - Shay B.Cohen - GuntisBarzdins + ArtūrsZnotiņš + Shay B.Cohen + GuntisBarzdins 4535–4544 D18-1483 Clustering news across languages enables efficient media monitoring by aggregating articles from multilingual sources into coherent stories. Doing so in an online setting allows scalable processing of massive news streams. To this end, we describe a novel method for clustering an incoming stream of multilingual documents into monolingual and crosslingual clusters. Unlike typical clustering approaches that report results on datasets with a small and known number of labels, we tackle the problem of discovering an ever growing number of cluster labels in an online fashion, using real news datasets in multiple languages. In our formulation, the monolingual clusters group together documents while the crosslingual clusters group together monolingual clusters, one per language that appears in the stream. Our method is simple to implement, computationally efficient and produces state-of-the-art results on datasets in German, English and Spanish. @@ -6198,7 +6198,7 @@ Pyramidal Recurrent Unit for Language Modeling SachinMehta - RikKoncel-Kedziorski + RikKoncel-Kedziorski MohammadRastegari HannanehHajishirzi 4620–4630 @@ -6209,7 +6209,7 @@ On Tree-Based Neural Sentence Modeling - HaoyueShi + HaoyueShi HaoZhou JiazeChen LeiLi @@ -6279,8 +6279,8 @@ SarthakJain EdwardBanner Jan-Willemvan de Meent - Iain J.Marshall - Byron C.Wallace + Iain J.Marshall + Byron C.Wallace 4683–4693 D18-1497 D18-1497.Attachment.pdf @@ -6301,7 +6301,7 @@ A Neural Model of Adaptation in Reading - Martenvan Schijndel + Martenvan Schijndel TalLinzen 4704–4710 D18-1499 @@ -6313,7 +6313,7 @@ Understanding Deep Learning Performance through an Examination of Test Set Difficulty: A Psychometric Case Study - John P.Lalor + John P.Lalor HaoWu TsendsurenMunkhdalai HongYu @@ -6342,7 +6342,7 @@ Dual Fixed-Size Ordinally Forgetting Encoding (<fixed-case>FOFE</fixed-case>) for Competitive Neural Language Models SedtawutWatcharawittayakul - MingbinXu + MingbinXu HuiJiang 4725–4730 D18-1502 @@ -6353,7 +6353,7 @@ The Importance of Being Recurrent for Modeling Hierarchical Structure - KeTran + KeTran AriannaBisazza ChristofMonz 4731–4736 @@ -6404,10 +6404,10 @@ Modeling Empathy and Distress in Reaction to News Stories - SvenBuechel + SvenBuechel AnnekeBuffone BarrySlaff - LyleUngar + LyleUngar JoãoSedoc 4758–4765 D18-1507 @@ -6419,8 +6419,8 @@ Interpretable Emoji Prediction via Label-Wise Attention <fixed-case>LSTM</fixed-case>s FrancescoBarbieri - LuisEspinosa-Anke - JoseCamacho-Collados + LuisEspinosa-Anke + JoseCamacho-Collados StevenSchockaert HoracioSaggion 4766–4771 @@ -6461,7 +6461,7 @@ ZhisongZhang RuiWang MasaoUtiyama - EiichiroSumita + EiichiroSumita HaiZhao 4785–4790 D18-1511 @@ -6513,9 +6513,9 @@ A strong baseline for question relevancy ranking - AnaGonzalez + AnaGonzalez IsabelleAugenstein - AndersSøgaard + AndersSøgaard 4810–4815 D18-1515 The best systems at the SemEval-16 and SemEval-17 community question answering shared tasks – a task that amounts to question relevancy ranking – involve complex pipelines and manual feature engineering. Despite this, many of these still fail at beating the IR baseline, i.e., the rankings provided by Google’s search engine. We present a strong baseline for question relevancy ranking by training a simple multi-task feed forward network on a bag of 14 distance measures for the input question pair. This baseline model, which is fast to train and uses only language-independent features, outperforms the best shared task systems on the task of retrieving relevant previously asked questions. @@ -6525,7 +6525,7 @@ Learning Sequence Encoders for Temporal Knowledge Graph Completion - AlbertoGarcía-Durán + AlbertoGarcía-Durán SebastijanDumančić MathiasNiepert 4816–4821 @@ -6537,7 +6537,7 @@ Similar but not the Same: Word Sense Disambiguation Improves Event Detection via Neural Representation Matching - WeiyiLu + WeiyiLu Thien HuuNguyen 4822–4828 D18-1517 @@ -6562,7 +6562,7 @@ Hong-YouChen Cheng-SyuanLee Keng-TeLiao - Shou-DeLin + Shou-DeLin 4834–4839 D18-1519 Lexicon relation extraction given distributional representation of words is an important topic in NLP. We observe that the state-of-the-art projection-based methods cannot be generalized to handle unseen hypernyms. We propose to analyze it in the perspective of pollution and construct the corresponding indicator to measure it. We propose a word relation autoencoder (WRAE) model to address the challenge. Experiments on several hypernym-like lexicon datasets show that our model outperforms the competitors significantly. @@ -6618,7 +6618,7 @@ <fixed-case>I</fixed-case>nfer<fixed-case>L</fixed-case>ite: Simple Universal Sentence Representations from Natural Language Inference Data - JamieKiros + JamieKiros WilliamChan 4868–4874 D18-1524 @@ -6669,8 +6669,8 @@ Classifying Referential and Non-referential It Using Gaze VictoriaYaneva Le AnHa - RichardEvans - RuslanMitkov + RichardEvans + RuslanMitkov 4896–4901 D18-1528 When processing a text, humans and machines must disambiguate between different uses of the pronoun it, including non-referential, nominal anaphoric or clause anaphoric ones. In this paper we use eye-tracking data to learn how humans perform this disambiguation and use this knowledge to improve the automatic classification of it. We show that by using gaze data and a POS-tagger we are able to significantly outperform a common baseline and classify between three categories of it with an accuracy comparable to that of linguistic-based approaches. In addition, the discriminatory power of specific gaze features informs the way humans process the pronoun, which, to the best of our knowledge, has not been explored using data from a natural reading task. @@ -6705,7 +6705,7 @@ Unsupervised Neural Word Segmentation for <fixed-case>C</fixed-case>hinese via Segmental Language Modeling ZhiqingSun - Zhi-HongDeng + Zhi-HongDeng 4915–4920 D18-1531 Previous traditional approaches to unsupervised Chinese word segmentation (CWS) can be roughly classified into discriminative and generative models. The former uses the carefully designed goodness measures for candidate segmentation, while the latter focuses on finding the optimal segmentation of the highest generative probability. However, while there exists a trivial way to extend the discriminative models into neural version by using neural language models, those of generative ones are non-trivial. In this paper, we propose the segmental language models (SLMs) for CWS. Our approach explicitly focuses on the segmental nature of Chinese, as well as preserves several properties of language models. In SLMs, a context encoder encodes the previous context and a segment decoder generates each segment incrementally. As far as we know, we are the first to propose a neural model for unsupervised CWS and achieve competitive performance to the state-of-the-art statistical models on four different datasets from SIGHAN 2005 bakeoff. @@ -6717,7 +6717,7 @@ DanielKondratyuk TomášGavenčiak MilanStraka - JanHajič + JanHajič 4921–4928 D18-1532 D18-1532.Attachment.zip @@ -6728,7 +6728,7 @@ Recovering Missing Characters in Old <fixed-case>H</fixed-case>awaiian Writing BrendanShillingford - OiwiParker Jones + OiwiParker Jones 4929–4934 D18-1533 D18-1533.Attachment.pdf @@ -6787,9 +6787,9 @@ Towards Semi-Supervised Learning for Deep Semantic Role Labeling - Sanket VaibhavMehta + Sanket VaibhavMehta Jay YoonLee - JaimeCarbonell + JaimeCarbonell 4958–4963 D18-1538 D18-1538.Attachment.pdf @@ -6802,7 +6802,7 @@ JamesFerguson JanaraChristensen EdwardLi - EdgarGonzàlez + EdgarGonzàlez 4964–4969 D18-1539 When the semantics of a sentence are not representable in a semantic parser’s output schema, parsing will inevitably fail. Detection of these instances is commonly treated as an out-of-domain classification problem. However, there is also a more subtle scenario in which the test data is drawn from the same domain. In addition to formalizing this problem of domain-adjacency, we present a comparison of various baselines that could be used to solve it. We also propose a new simple sentence representation that emphasizes words which are unexpected. This approach improves the performance of a downstream semantic parser run on in-domain and domain-adjacent instances. @@ -6836,7 +6836,7 @@ Modeling Input Uncertainty in Neural Network Dependency Parsing Robvan der Goot - Gertjanvan Noord + Gertjanvan Noord 4984–4991 D18-1542 D18-1542.Attachment.pdf @@ -6849,12 +6849,12 @@ Miryamde Lhoneux JohannesBjerva IsabelleAugenstein - AndersSøgaard + AndersSøgaard 4992–4997 D18-1543 D18-1543.Attachment.pdf - Previous work has suggested that parameter sharing between transition-based neural dependency parsers for related languages can lead to better performance, but there is no consensus on what parameters to share. We present an evaluation of 27 different parameter sharing strategies across 10 languages, representing five pairs of related languages, each pair from a different language family. We find that sharing transition classifier parameters always helps, whereas the usefulness of sharing word and/or character LSTM parameters varies. Based on this result, we propose an architecture where the transition classifier is shared, and the sharing of word and character parameters is controlled by a parameter that can be tuned on validation data. This model is linguistically motivated and obtains significant improvements over a monolingually trained baseline. We also find that sharing transition classifier parameters helps when training a parser on unrelated language pairs, but we find that, in the case of unrelated languages, sharing too many parameters does not help. D18-1543.Poster.pdf + Previous work has suggested that parameter sharing between transition-based neural dependency parsers for related languages can lead to better performance, but there is no consensus on what parameters to share. We present an evaluation of 27 different parameter sharing strategies across 10 languages, representing five pairs of related languages, each pair from a different language family. We find that sharing transition classifier parameters always helps, whereas the usefulness of sharing word and/or character LSTM parameters varies. Based on this result, we propose an architecture where the transition classifier is shared, and the sharing of word and character parameters is controlled by a parameter that can be tuned on validation data. This model is linguistically motivated and obtains significant improvements over a monolingually trained baseline. We also find that sharing transition classifier parameters helps when training a parser on unrelated language pairs, but we find that, in the case of unrelated languages, sharing too many parameters does not help. 10.18653/v1/D18-1543 de-lhoneux-etal-2018-parameter @@ -6862,7 +6862,7 @@ Grammar Induction with Neural Language Models: An Unusual Replication Phu MonHtut KyunghyunCho - SamuelBowman + SamuelBowman 4998–5003 D18-1544 D18-1544.Attachment.zip @@ -6872,8 +6872,8 @@ Data Augmentation via Dependency Tree Morphing for Low-Resource Languages - Gözde GülŞahin - MarkSteedman + Gözde GülŞahin + MarkSteedman 5004–5009 D18-1545 Neural NLP systems achieve high scores in the presence of sizable training dataset. Lack of such datasets leads to poor system performances in the case low-resource languages. We present two simple text augmentation techniques using dependency trees, inspired from image processing. We “crop” sentences by removing dependency links, and we “rotate” sentences by moving the tree fragments around the root. We apply these techniques to augment the training sets of low-resource languages in Universal Dependencies project. We implement a character-level sequence tagging model and evaluate the augmented datasets on part-of-speech tagging task. We show that crop and rotate provides improvements over the models trained with non-augmented data for majority of the languages, especially for languages with rich case marking systems. @@ -6883,7 +6883,7 @@ How Much Reading Does Reading Comprehension Require? A Critical Investigation of Popular Benchmarks DivyanshKaushik - Zachary C.Lipton + Zachary C.Lipton 5010–5015 D18-1546 Many recent papers address reading comprehension, where examples consist of (question, passage, answer) tuples. Presumably, a model must combine information from both questions and passages to predict corresponding answers. However, despite intense interest in the topic, with hundreds of published papers vying for leaderboard dominance, basic questions about the difficulty of many popular benchmarks remain unanswered. In this paper, we establish sensible baselines for the bAbI, SQuAD, CBT, CNN, and Who-did-What datasets, finding that question- and passage-only models often perform surprisingly well. On 14 out of 20 bAbI tasks, passage-only models achieve greater than 50% accuracy, sometimes matching the full model. Interestingly, while CBT provides 20-sentence passages, only the last is needed for accurate prediction. By comparison, SQuAD and CNN appear better-constructed. @@ -6899,7 +6899,7 @@ IñigoCasanueva StefanUltes OsmanRamadan - MilicaGašić + MilicaGašić 5016–5026 D18-1547 D18-1547.Attachment.pdf @@ -6958,7 +6958,7 @@ <fixed-case>S</fixed-case>ynta<fixed-case>V</fixed-case>iz: Visualizing Voice Queries through a Syntax-Driven Hierarchical Ontology Md IftekharTanveer - FerhanTure + FerhanTure 1–6 D18-2001 This paper describes SyntaViz, a visualization interface specifically designed for analyzing natural-language queries that were created by users of a voice-enabled product. SyntaViz provides a platform for browsing the ontology of user queries from a syntax-driven perspective, providing quick access to high-impact failure points of the existing intent understanding system and evidence for data-driven decisions in the development cycle. A case study on Xfinity X1 (a voice-enabled entertainment platform from Comcast) reveals that SyntaViz helps developers identify multiple action items in a short amount of time without any special training. SyntaViz has been open-sourced for the benefit of the community. @@ -6980,8 +6980,8 @@ LongxuDou GuanghuiQin JinpengWang - Jin-GeYao - Chin-YewLin + Jin-GeYao + Chin-YewLin 13–18 D18-2003 Data2Text Studio is a platform for automated text generation from structured data. It is equipped with a Semi-HMMs model to extract high-quality templates and corresponding trigger conditions from parallel data automatically, which improves the interactivity and interpretability of the generated text. In addition, several easy-to-use tools are provided for developers to edit templates of pre-trained models, and APIs are released for developers to call the pre-trained model to generate texts in third-party applications. We conduct experiments on RotoWire datasets for template extraction and text generation. The results show that our model achieves improvements on both tasks. @@ -7020,7 +7020,7 @@ An Interactive Web-Interface for Visualizing the Inner Workings of the Question Answering <fixed-case>LSTM</fixed-case> EkaterinaLoginova - GünterNeumann + GünterNeumann 30–35 D18-2006 We present a visualisation tool which aims to illuminate the inner workings of an LSTM model for question answering. It plots heatmaps of neurons’ firings and allows a user to check the dependency between neurons and manual features. The system possesses an interactive web-interface and can be adapted to other models and domains. @@ -7044,9 +7044,9 @@ <fixed-case>DERE</fixed-case>: A Task and Domain-Independent Slot Filling Framework for Declarative Relation Extraction HeikeAdel - Laura Ana MariaBostan + Laura Ana MariaBostan SeanPapay - SebastianPadó + SebastianPadó RomanKlinger 42–47 D18-2008 @@ -7057,7 +7057,7 @@ Demonstrating <fixed-case>P</fixed-case>ar4<fixed-case>S</fixed-case>em - A Semantic Writing Aid with Adaptive Paraphrasing Seid MuhieYimam - ChrisBiemann + ChrisBiemann 48–53 D18-2009 In this paper, we present Par4Sem, a semantic writing aid tool based on adaptive paraphrasing. Unlike many annotation tools that are primarily used to collect training examples, Par4Sem is integrated into a real word application, in this case a writing aid tool, in order to collect training examples from usage data. Par4Sem is a tool, which supports an adaptive, iterative, and interactive process where the underlying machine learning models are updated for each iteration using new training examples from usage data. After motivating the use of ever-learning tools in NLP applications, we evaluate Par4Sem by adopting it to a text simplification task through mere usage. @@ -7094,7 +7094,7 @@ <fixed-case>S</fixed-case>entence<fixed-case>P</fixed-case>iece: A simple and language independent subword tokenizer and detokenizer for Neural Text Processing - TakuKudo + TakuKudo JohnRichardson 66–71 D18-2012 @@ -7119,7 +7119,7 @@ A Multilingual Information Extraction Pipeline for Investigative Journalism GregorWiedemann Seid MuhieYimam - ChrisBiemann + ChrisBiemann 78–83 D18-2014 We introduce an advanced information extraction pipeline to automatically process very large collections of unstructured textual data for the purpose of investigative journalism. The pipeline serves as a new input processor for the upcoming major release of our New/s/leak 2.0 software, which we develop in cooperation with a large German news organization. The use case is that journalists receive a large collection of files up to several Gigabytes containing unknown contents. Collections may originate either from official disclosures of documents, e.g. Freedom of Information Act requests, or unofficial data leaks. @@ -7130,7 +7130,7 @@ Sisyphus, a Workflow Manager Designed for Machine Translation and Automatic Speech Recognition Jan-ThorstenPeter EugenBeck - HermannNey + HermannNey 84–89 D18-2015 Training and testing many possible parameters or model architectures of state-of-the-art machine translation or automatic speech recognition system is a cumbersome task. They usually require a long pipeline of commands reaching from pre-processing the training data to post-processing and evaluating the output. @@ -7171,12 +7171,12 @@ RajarshiDas AndrewMcCallum MariaChang - AchilleFokoue + AchilleFokoue PavanKapanipathi NicholasMattei RyanMusa KartikTalamadupula - MichaelWitbrock + MichaelWitbrock 102–107 D18-2018 Recent work introduces the AI2 Reasoning Challenge (ARC) and the associated ARC dataset that partitions open domain, complex science questions into an Easy Set and a Challenge Set. That work includes an analysis of 100 questions with respect to the types of knowledge and reasoning required to answer them. However, it does not include clear definitions of these types, nor does it offer information about the quality of the labels or the annotation process used. In this paper, we introduce a novel interface for human annotation of science question-answer pairs with their respective knowledge and reasoning types, in order that the classification of new questions may be improved. We build on the classification schema proposed by prior work on the ARC dataset, and evaluate the effectiveness of our interface with a preliminary study involving 10 participants. @@ -7219,7 +7219,7 @@ Integrating Knowledge-Supported Search into the <fixed-case>INCE</fixed-case>p<fixed-case>TION</fixed-case> Annotation Platform BetoBoullosa RichardEckart de Castilho - NaveenKumar + NaveenKumar Jan-ChristophKlie IrynaGurevych 127–132 @@ -7232,7 +7232,7 @@ <fixed-case>C</fixed-case>yton<fixed-case>MT</fixed-case>: an Efficient Neural Machine Translation Open-source Toolkit Implemented in <fixed-case>C</fixed-case>++ XiaolinWang MasaoUtiyama - EiichiroSumita + EiichiroSumita 133–138 D18-2023 This paper presents an open-source neural machine translation toolkit named CytonMT. The toolkit is built from scratch only using C++ and NVIDIA’s GPU-accelerated libraries. The toolkit features training efficiency, code simplicity and translation quality. Benchmarks show that cytonMT accelerates the training speed by 64.5% to 110.8% on neural networks of various sizes, and achieves competitive translation quality. @@ -7258,7 +7258,7 @@ <fixed-case>LIA</fixed-case>: A Natural Language Programmable Personal Assistant IgorLabutov ShashankSrivastava - TomMitchell + TomMitchell 145–150 D18-2025 We present LIA, an intelligent personal assistant that can be programmed using natural language. Our system demonstrates multiple competencies towards learning from human-like interactions. These include the ability to be taught reusable conditional procedures, the ability to be taught new knowledge about the world (concepts in an ontology) and the ability to be taught how to ground that knowledge in a set of sensors and effectors. Building such a system highlights design questions regarding the overall architecture that such an agent should have, as well as questions about parsing and grounding language in situational contexts. We outline key properties of this architecture, and demonstrate a prototype that embodies them in the form of a personal assistant on an Android device. @@ -7357,7 +7357,7 @@ Deep Latent Variable Models of Natural Language - AlexanderRush + AlexanderRush YoonKim SamWiseman The proposed tutorial will cover deep latent variable models both in the case where exact inference over the latent variables is tractable and when it is not. The former case includes neural extensions of unsupervised tagging and parsing models. Our discussion of the latter case, where inference cannot be performed tractably, will restrict itself to continuous latent variables. In particular, we will discuss recent developments both in neural variational inference (e.g., relating to Variational Auto-encoders) and in implicit density modeling (e.g., relating to Generative Adversarial Networks). We will highlight the challenges of applying these families of methods to NLP problems, and discuss recent successes and best practices. @@ -7368,7 +7368,7 @@ MrinmayaSachan MinjoonSeo HannanehHajishirzi - EricXing + EricXing Standardized tests have recently been proposed as replacements to the Turing test as a driver for progress in AI (Clark, 2015). These include tests on understanding passages and stories and answering questions about them (Richardson et al., 2013; Rajpurkar et al., 2016a, inter alia), science question answering (Schoenick et al., 2016, inter alia), algebra word problems (Kushman et al., 2014, inter alia), geometry problems (Seo et al., 2015; Sachan et al., 2016), visual question answering (Antol et al., 2015), etc. Many of these tests require sophisticated understanding of the world, aiming to push the boundaries of AI. For this tutorial, we broadly categorize these tests into two categories: open domain tests such as reading comprehensions and elementary school tests where the goal is to find the support for an answer from the student curriculum, and closed domain tests such as intermediate level math and science tests (algebra, geometry, Newtonian physics problems, etc.). Unlike open domain tests, closed domain tests require the system to have significant domain knowledge and reasoning capabilities. For example, geometry questions typically involve a number of geometry primitives (lines, quadrilaterals, circles, etc) and require students to use axioms and theorems of geometry (Pythagoras theorem, alternating angles, etc) to solve them. These closed domains often have a formal logical basis and the question can be mapped to a formal language by semantic parsing. The formal question representation can then provided as an input to an expert system to solve the question. sachan-etal-2018-standardized diff --git a/data/xml/D19.xml b/data/xml/D19.xml index b4aaa11c5d..430f55e0a7 100644 --- a/data/xml/D19.xml +++ b/data/xml/D19.xml @@ -45,8 +45,8 @@ Practical Obstacles to Deploying Active Learning DavidLowell - Zachary C.Lipton - Byron C.Wallace + Zachary C.Lipton + Byron C.Wallace 21–30 Active learning (AL) is a widely-used training strategy for maximizing predictive performance subject to a fixed annotation budget. In AL, one iteratively selects training examples for annotation, often those for which the current model is most uncertain (by some measure). The hope is that active sampling leads to better performance than would be achieved under independent and identically distributed (i.i.d.) random samples. While AL has shown promise in retrospective evaluations, these studies often ignore practical obstacles to its use. In this paper, we show that while AL may provide benefits when used with specific models and for particular domains, the benefits of current approaches do not generalize reliably across models and tasks. This is problematic because in practice, one does not have the opportunity to explore and compare alternative AL strategies. Moreover, AL couples the training dataset with the model used to guide its acquisition. We find that subsequently training a successor model with an actively-acquired dataset does not consistently outperform training on i.i.d. sampled data. Our findings raise the question of whether the downsides inherent to AL are worth the modest and inconsistent performance gains it tends to afford. D19-1003 @@ -66,13 +66,13 @@ Knowledge Enhanced Contextual Word Representations - Matthew E.Peters + Matthew E.Peters MarkNeumann RobertLogan RoySchwartz VidurJoshi SameerSingh - Noah A.Smith + Noah A.Smith 43–54 Contextual word representations, typically trained on unstructured, unlabeled text, do not contain any explicit grounding to real world entities and are often unable to remember facts about those entities. We propose a general method to embed multiple knowledge bases (KBs) into large scale models, and thereby enhance their representations with structured, human-curated knowledge. For each KB, we first use an integrated entity linker to retrieve relevant entity embeddings, then update contextual word representations via a form of word-to-entity attention. In contrast to previous approaches, the entity linkers and self-supervised language modeling objective are jointly trained end-to-end in a multitask setting that combines a small amount of entity linking supervision with a large amount of raw text. After integrating WordNet and a subset of Wikipedia into BERT, the knowledge enhanced BERT (KnowBert) demonstrates improved perplexity, ability to recall facts as measured in a probing task and downstream performance on relationship extraction, entity typing, and word sense disambiguation. KnowBert’s runtime is comparable to BERT’s and it scales to large KBs. D19-1005 @@ -94,7 +94,7 @@ PhilippaShoemark Farhana FerdousiLiza DongNguyen - Scott A.Hale + Scott A.Hale BarbaraMcGillivray 66–76 Word embeddings are increasingly used for the automatic detection of semantic change; yet, a robust evaluation and systematic comparison of the choices involved has been lacking. We propose a new evaluation framework for semantic change detection and find that (i) using the whole time series is preferable over only comparing between the first and last time points; (ii) independently trained and aligned embeddings perform better than continuously trained embeddings for long time periods; and (iii) that the reference point for comparison matters. We also present an analysis of the changes detected on a large Twitter dataset spanning 5.5 years. @@ -198,7 +198,7 @@ NavonilMajumder SoujanyaPoria NiyatiChhaya - AlexanderGelbukh + AlexanderGelbukh 154–164 Emotion recognition in conversation (ERC) has received much attention, lately, from researchers due to its potential widespread applications in diverse areas, such as health-care, education, and human resources. In this paper, we present Dialogue Graph Convolutional Network (DialogueGCN), a graph neural network based approach to ERC. We leverage self and inter-speaker dependency of the interlocutors to model conversational context for emotion recognition. Through the graph network, DialogueGCN addresses context propagation issues present in the current RNN-based methods. We empirically show that this method alleviates such issues, while outperforming the current state of the art on a number of benchmark emotion classification datasets. D19-1015 @@ -291,7 +291,7 @@ PengfeiLi KezhiMao XuefengYang - QiLi + QiLi 229–239 While attention mechanisms have been proven to be effective in many NLP tasks, majority of them are data-driven. We propose a novel knowledge-attention encoder which incorporates prior knowledge from external lexical resources into deep neural networks for relation extraction task. Furthermore, we present three effective ways of integrating knowledge-attention with self-attention to maximize the utilization of both knowledge and data. The proposed relation extraction system is end-to-end and fully attention-based. Experiment results show that the proposed knowledge-attention mechanism has complementary strengths with self-attention, and our integrated models outperform existing CNN, RNN, and self-attention based models. State-of-the-art performance is achieved on TACRED, a complex and large-scale relation extraction dataset. D19-1022 @@ -401,9 +401,9 @@ DiLu HengJi JonathanMay - Shih-FuChang - AvirupSil - ClareVoss + Shih-FuChang + AvirupSil + ClareVoss 313–325 The identification of complex semantic structures such as events and entity relations, already a challenging Information Extraction task, is doubly difficult from sources written in under-resourced and under-annotated languages. We investigate the suitability of cross-lingual structure transfer techniques for these tasks. We exploit relation- and event-relevant language-universal features, leveraging both symbolic (including part-of-speech and dependency path) and distributional (including type representation and contextualized representation) information. By representing all entity mentions, event triggers, and contexts into this complex and structured multilingual common space, using graph convolutional networks, we can train a relation or event extractor from source language annotations and apply it to the target language. Extensive experiments on cross-lingual relation and event transfer among English, Chinese, and Arabic demonstrate that our approach achieves performance comparable to state-of-the-art supervised models trained on up to 3,000 manually annotated mentions: up to 62.6% F-score for Relation Extraction, and 63.1% F-score for Event Argument Role Labeling. The event argument role labeling model transferred from English to Chinese achieves similar performance as the model trained from Chinese. We thus find that language-universal symbolic and distributional representations are complementary for cross-lingual structure transfer. D19-1030 @@ -481,7 +481,7 @@ <fixed-case>C</fixed-case>a<fixed-case>R</fixed-case>e: Open Knowledge Graph Embeddings SwapnilGupta SreyashKenkre - ParthaTalukdar + ParthaTalukdar 378–388 Open Information Extraction (OpenIE) methods are effective at extracting (noun phrase, relation phrase, noun phrase) triples from text, e.g., (Barack Obama, took birth in, Honolulu). Organization of such triples in the form of a graph with noun phrases (NPs) as nodes and relation phrases (RPs) as edges results in the construction of Open Knowledge Graphs (OpenKGs). In order to use such OpenKGs in downstream tasks, it is often desirable to learn embeddings of the NPs and RPs present in the graph. Even though several Knowledge Graph (KG) embedding methods have been recently proposed, all of those methods have targeted Ontological KGs, as opposed to OpenKGs. Straightforward application of existing Ontological KG embedding methods to OpenKGs is challenging, as unlike Ontological KGs, OpenKGs are not canonicalized, i.e., a real-world entity may be represented using multiple nodes in the OpenKG, with each node corresponding to a different NP referring to the entity. For example, nodes with labels Barack Obama, Obama, and President Obama may refer to the same real-world entity Barack Obama. Even though canonicalization of OpenKGs has received some attention lately, output of such methods has not been used to improve OpenKG embed- dings. We fill this gap in the paper and propose Canonicalization-infused Representations (CaRe) for OpenKGs. Through extensive experiments, we observe that CaRe enables existing models to adapt to the challenges in OpenKGs and achieve substantial improvements for the link prediction task. D19-1036 @@ -501,7 +501,7 @@ Neural Cross-Lingual Relation Extraction Based on Bilingual Word Embedding Mapping JianNi - RaduFlorian + RaduFlorian 399–409 Relation extraction (RE) seeks to detect and classify semantic relationships between entities, which provides useful information for many NLP applications. Since the state-of-the-art RE models require large amounts of manually annotated data and language-specific resources to achieve high accuracy, it is very challenging to transfer an RE model of a resource-rich language to a resource-poor language. In this paper, we propose a new approach for cross-lingual RE model transfer based on bilingual word embedding mapping. It projects word embeddings from a target language to a source language, so that a well-trained source-language neural network RE model can be directly applied to the target language. Experiment results show that the proposed approach achieves very good performance for a number of target languages on both in-house and open datasets, using a small bilingual dictionary with only 1K word pairs. D19-1038 @@ -648,7 +648,7 @@ Linking artificial and human neural representations of language JonGauthier - RogerLevy + RogerLevy 529–539 What information from an act of sentence understanding is robustly represented in the human brain? We investigate this question by comparing sentence encoding models on a brain decoding task, where the sentence that an experimental participant has seen must be predicted from the fMRI signal evoked by the sentence. We take a pre-trained BERT architecture as a baseline sentence encoding model and fine-tune it on a variety of natural language understanding (NLU) tasks, asking which lead to improvements in brain-decoding performance. We find that none of the sentence encoding tasks tested yield significant increases in brain decoding performance. Through further task ablations and representational analyses, we find that tasks which produce syntax-light representations yield significant improvements in brain decoding performance. Our results constrain the space of NLU models that could best account for human neural representations of language, but also suggest limits on the possibility of decoding fine-grained syntactic information from fMRI human neuroimaging. D19-1050 @@ -672,10 +672,10 @@ Neural data-to-text generation: A comparison between pipeline and end-to-end architectures - ThiagoCastro Ferreira + ThiagoCastro Ferreira Chrisvan der Lee Emielvan Miltenburg - EmielKrahmer + EmielKrahmer 552–562 Traditionally, most data-to-text applications have been designed using a modular pipeline architecture, in which non-linguistic input data is converted into natural language through several intermediate transformations. By contrast, recent neural models for data-to-text generation have been proposed as end-to-end approaches, where the non-linguistic input is rendered in natural language with much less explicit intermediate representations in between. This study introduces a systematic comparison between neural pipeline and end-to-end data-to-text approaches for the generation of text from RDF triples. Both architectures were implemented making use of the encoder-decoder Gated-Recurrent Units (GRU) and Transformer, two state-of-the art deep learning methods. Automatic and human evaluations together with a qualitative analysis suggest that having explicit intermediate steps in the generation process results in better texts than the ones generated by end-to-end approaches. Moreover, the pipeline models generalize better to unseen inputs. Data and code are publicly available. D19-1052 @@ -800,7 +800,7 @@ SaachiJain SamuelHumeau EmilyDinan - TimRocktäschel + TimRocktäschel DouweKiela ArthurSzlam JasonWeston @@ -814,7 +814,7 @@ Help, Anna! Visual Navigation with Natural Multimodal Assistance via Retrospective Curiosity-Encouraging Imitation Learning KhanhNguyen - HalDaumé III + HalDaumé III 684–695 Mobile agents that can leverage help from humans can potentially accomplish more complex tasks than they could entirely on their own. We develop “Help, Anna!” (HANNA), an interactive photo-realistic simulator in which an agent fulfills object-finding tasks by requesting and interpreting natural language-and-vision assistance. An agent solving tasks in a HANNA environment can leverage simulated human assistants, called ANNA (Automatic Natural Navigation Assistants), which, upon request, provide natural language and visual instructions to direct the agent towards the goals. To address the HANNA problem, we develop a memory-augmented neural agent that hierarchically models multiple levels of decision-making, and an imitation learning algorithm that teaches the agent to avoid repeating past mistakes while simultaneously predicting its own chances of making future progress. Empirically, our approach is able to ask for help more effectively than competitive baselines and, thus, attains higher task success rate on both previously seen and previously unseen environments. D19-1063 @@ -936,7 +936,7 @@ Improving Back-Translation with Uncertainty-based Confidence Estimation ShuoWang - YangLiu + YangLiu ChaoWang HuanboLuan MaosongSun @@ -952,7 +952,7 @@ JunXie ZhixingTan JinsongSu - DeyiXiong + DeyiXiong LeiLi 803–812 In this study, we first investigate a novel capsule network with dynamic routing for linear time Neural Machine Translation (NMT), referred as CapsNMT. CapsNMT uses an aggregation mechanism to map the source sentence into a matrix with pre-determined size, and then applys a deep LSTM network to decode the target sequence from the source representation. Unlike the previous work (CITATION) to store the source sentence with a passive and bottom-up way, the dynamic routing policy encodes the source sentence with an iterative process to decide the credit attribution between nodes from lower and higher layers. CapsNMT has two core properties: it runs in time that is linear in the length of the sequences and provides a more flexible way to aggregate the part-whole information of the source sentence. On WMT14 English-German task and a larger WMT14 English-French task, CapsNMT achieves comparable results with the Transformer system. To the best of our knowledge, this is the first work that capsule networks have been empirically investigated for sequence to sequence problems. @@ -992,7 +992,7 @@ Iterative Dual Domain Adaptation for Neural Machine Translation JialiZeng - YangLiu + YangLiu JinsongSu YubingGe YaojieLu @@ -1023,7 +1023,7 @@ PetrePetrov PavelPetrushkov ShahramKhadivi - HermannNey + HermannNey 866–876 We present effective pre-training strategies for neural machine translation (NMT) using parallel corpora involving a pivot language, i.e., source-pivot and pivot-target, leading to a significant improvement in source-target translation. We propose three methods to increase the relation among source, pivot, and target languages in the pre-training: 1) step-wise training of a single model for different language pairs, 2) additional adapter component to smoothly connect pre-trained encoder and decoder, and 3) cross-lingual encoder training via autoencoding of the pivot language. Our methods greatly outperform multilingual models up to +2.6% BLEU in WMT 2019 French-German and German-Czech tasks. We show that our improvements are valid also in zero-shot/zero-resource scenarios. D19-1080 @@ -1094,8 +1094,8 @@ ZaixiangZheng ShujianHuang ZhaopengTu - Xin-YuDai - JiajunChen + Xin-YuDai + JiajunChen 931–941 Previous studies have shown that neural machine translation (NMT) models can benefit from explicitly modeling translated () and untranslated () source contents as recurrent states (CITATION). However, this less interpretable recurrent process hinders its power to model the dynamic updating of and contents during decoding. In this paper, we propose to model the dynamic principles by explicitly separating source words into groups of translated and untranslated contents through parts-to-wholes assignment. The assignment is learned through a novel variant of routing-by-agreement mechanism (CITATION), namely Guided Dynamic Routing, where the translating status at each decoding step guides the routing process to assign each source word to its associated group (i.e., translated or untranslated content) represented by a capsule, enabling translation to be made from holistic context. Experiments show that our approach achieves substantial improvements over both Rnmt and Transformer by producing more adequate translations. Extensive analysis demonstrates that our method is highly interpretable, which is able to recognize the translated and untranslated contents as expected. D19-1086 @@ -1107,9 +1107,9 @@ Revisit Automatic Error Detection for Wrong and Missing Translation – A Supervised Approach WenqiangLei WeiwenXu - Ai TiAw + Ai TiAw YuanxinXiang - Tat SengChua + Tat SengChua 942–952 While achieving great fluency, current machine translation (MT) techniques are bottle-necked by adequacy issues. To have a closer study of these issues and accelerate model development, we propose automatic detecting adequacy errors in MT hypothesis for MT model evaluation. To do that, we annotate missing and wrong translations, the two most prevalent issues for current neural machine translation model, in 15000 Chinese-English translation pairs. We build a supervised alignment model for translation error detection (AlignDet) based on a simple Alignment Triangle strategy to set the benchmark for automatic error detection task. We also discuss the difficulties of this task and the benefits of this task for existing evaluation metrics. D19-1087 @@ -1150,7 +1150,7 @@ Don’t Forget the Long Tail! A Comprehensive Analysis of Morphological Generalization in Bilingual Lexicon Induction PaulaCzarnowska SebastianRuder - EdouardGrave + EdouardGrave RyanCotterell AnnCopestake 974–983 @@ -1185,7 +1185,7 @@ Hierarchical Pointer Net Parsing LinlinLiu XiangLin - ShafiqJoty + ShafiqJoty SimengHan LidongBing 1007–1017 @@ -1210,7 +1210,7 @@ ZuyiBao RuiHuang ChenLi - KennyZhu + KennyZhu 1028–1039 Previous work on cross-lingual sequence labeling tasks either requires parallel data or bridges the two languages through word-by-word matching. Such requirements and assumptions are infeasible for most languages, especially for languages with large linguistic distances, e.g., English and Chinese. In this work, we propose a Multilingual Language Model with deep semantic Alignment (MLMA) to generate language-independent representations for cross-lingual sequence labeling. Our methods require only monolingual corpora with no bilingual resources at all and take advantage of deep contextualized representations. Experimental results show that our approach achieves new state-of-the-art NER and POS performance across European languages, and is also effective on distant language pairs such as English and Chinese. D19-1095 @@ -1225,7 +1225,7 @@ MinlongPeng JinlanFu ZhongyuWei - XuanjingHuang + XuanjingHuang 1040–1050 Recurrent neural networks (RNN) used for Chinese named entity recognition (NER) that sequentially track character and word information have achieved great success. However, the characteristic of chain structure and the lack of global semantics determine that RNN-based models are vulnerable to word ambiguities. In this work, we try to alleviate this problem by introducing a lexicon-based graph neural network with global semantics, in which lexicon knowledge is used to connect characters to capture the local composition, while a global relay node can capture global sentence semantics and long-range dependency. Based on the multiple graph-based interactions among characters, potential words, and the whole-sentence semantics, word ambiguities can be effectively tackled. Experiments on four NER datasets show that the proposed model achieves significant improvements against other baseline models. D19-1096 @@ -1239,7 +1239,7 @@ JinchaoZhang JieZhou YufengChen - JinanXu + JinanXu 1051–1060 Spoken Language Understanding (SLU) mainly involves two tasks, intent detection and slot filling, which are generally modeled jointly in existing works. However, most existing models fail to fully utilize cooccurrence relations between slots and intents, which restricts their potential performance. To address this issue, in this paper we propose a novel Collaborative Memory Network (CM-Net) based on the well-designed block, named CM-block. The CM-block firstly captures slot-specific and intent-specific features from memories in a collaborative manner, and then uses these enriched features to enhance local context representations, based on which the sequential information flow leads to more specific (slot and intent) global utterance representations. Through stacking multiple CM-blocks, our CM-Net is able to alternately perform information exchange among specific memories, local contexts and the global utterance, and thus incrementally enriches each other. We evaluate the CM-Net on two standard benchmarks (ATIS and SNIPS) and a self-collected corpus (CAIS). Experimental results show that the CM-Net achieves the state-of-the-art results on the ATIS and SNIPS in most of criteria, and significantly outperforms the baseline models on the CAIS. Additionally, we make the CAIS dataset publicly available for the research community. D19-1097 @@ -1262,7 +1262,7 @@ Semantic Role Labeling with Iterative Structure Refinement ChunchuanLyu - Shay B.Cohen + Shay B.Cohen IvanTitov 1071–1082 Modern state-of-the-art Semantic Role Labeling (SRL) methods rely on expressive sentence encoders (e.g., multi-layer LSTMs) but tend to model only local (if any) interactions between individual argument labeling decisions. This contrasts with earlier work and also with the intuition that the labels of individual arguments are strongly interdependent. We model interactions between argument labeling decisions through iterative refinement. Starting with an output produced by a factorized model, we iteratively refine it using a refinement network. Instead of modeling arbitrary interactions among roles and words, we encode prior knowledge about the SRL problem by designing a restricted network architecture capturing non-local interactions. This modeling choice prevents overfitting and results in an effective model, outperforming strong factorized baseline models on all 7 CoNLL-2009 languages, and achieving state-of-the-art results on 5 of them, including English. @@ -1275,7 +1275,7 @@ Entity Projection via Machine Translation for Cross-Lingual <fixed-case>NER</fixed-case> AlankarJain BhargaviParanjape - Zachary C.Lipton + Zachary C.Lipton 1083–1092 Although over 100 languages are supported by strong off-the-shelf machine translation systems, only a subset of them possess large annotated corpora for named entity recognition. Motivated by this fact, we leverage machine translation to improve annotation-projection approaches to cross-lingual named entity recognition. We propose a system that improves over prior entity-projection methods by: (a) leveraging machine translation systems twice: first for translating sentences and subsequently for translating entities; (b) matching entities based on orthographic and phonetic similarity; and (c) identifying matches based on distributional statistics derived from the dataset. Our approach improves upon current state-of-the-art methods for cross-lingual named entity recognition on 5 diverse languages by an average of 4.1 points. Further, our method achieves state-of-the-art F_1 scores for Armenian, outperforming even a monolingual model trained on Armenian source data. D19-1100 @@ -1298,7 +1298,7 @@ A systematic comparison of methods for low-resource dependency parsing on genuinely low-resource languages ClaraVania YovaKementchedjhieva - AndersSøgaard + AndersSøgaard AdamLopez 1105–1116 Parsers are available for only a handful of the world’s languages, since they require lots of training data. How far can we get with just a small amount of training data? We systematically compare a set of simple strategies for improving low-resource parsers: data augmentation, which has not been tested before; cross-lingual training; and transliteration. Experimenting on three typologically diverse low-resource languages—North Sámi, Galician, and Kazah—We find that (1) when only the low-resource treebank is available, data augmentation is very helpful; (2) when a related high-resource treebank is available, cross-lingual training is helpful and complements data augmentation; and (3) when the high-resource treebank uses a different writing system, transliteration into a shared orthographic spaces is also very helpful. @@ -1324,7 +1324,7 @@ ZhichuLu ForoughArabshahi IgorLabutov - TomMitchell + TomMitchell 1129–1139 Computing devices have recently become capable of interacting with their end users via natural language. However, they can only operate within a limited “supported” domain of discourse and fail drastically when faced with an out-of-domain utterance, mainly due to the limitations of their semantic parser. In this paper, we propose a semantic parser that generalizes to out-of-domain examples by learning a general strategy for parsing an unseen utterance through adapting the logical forms of seen utterances, instead of learning to generate a logical form from scratch. Our parser maintains a memory consisting of a representative subset of the seen utterances paired with their logical forms. Given an unseen utterance, our parser works by looking up a similar utterance from the memory and adapting its logical form until it fits the unseen utterance. Moreover, we present a data generation strategy for constructing utterance-logical form pairs from different domains. Our results show an improvement of up to 68.8% on one-shot parsing under two different evaluation settings compared to the baselines. D19-1104 @@ -1347,7 +1347,7 @@ Variable beam search for generative neural parsing and its relevance for the analysis of neuro-imaging signal - BenoitCrabbé + BenoitCrabbé MurielleFabre ChristophePallier 1150–1160 @@ -1370,7 +1370,7 @@ Robust Text Classifier on Test-Time Budgets - Md RizwanParvez + Md RizwanParvez TolgaBolukbasi Kai-WeiChang VenkateshSaligrama @@ -1385,7 +1385,7 @@ Commonsense Knowledge Mining from Pretrained Models JoeDavison JoshuaFeldman - AlexanderRush + AlexanderRush 1173–1178 Inferring commonsense knowledge is a key challenge in machine learning. Due to the sparsity of training data, previous work has shown that supervised methods for commonsense knowledge mining underperform when evaluated on novel data. In this work, we develop a method for generating commonsense knowledge using a large, pre-trained bidirectional language model. By transforming relational triples into masked sentences, we can use this model to rank a triple’s validity by the estimated pointwise mutual information between the two entities. Since we do not update the weights of the bidirectional model, our approach is not biased by the coverage of any one commonsense knowledge base. Though we do worse on a held-out test set than models explicitly trained on a corresponding training set, our approach outperforms these methods when mining commonsense knowledge from new sources, suggesting that our unsupervised technique generalizes better than current supervised approaches. D19-1109 @@ -1398,7 +1398,7 @@ JesseDodge RoySchwartz HaoPeng - Noah A.Smith + Noah A.Smith 1179–1184 Neural models for NLP typically use large numbers of parameters to reach state-of-the-art performance, which can lead to excessive memory usage and increased runtime. We present a structure learning method for learning sparse, parameter-efficient NLP models. Our method applies group lasso to rational RNNs (Peng et al., 2018), a family of models that is closely connected to weighted finite-state automata (WFSAs). We take advantage of rational RNNs’ natural grouping of the weights, so the group lasso penalty directly removes WFSA states, substantially reducing the number of parameters in the model. Our experiments on a number of sentiment analysis datasets, using both GloVe and BERT embeddings, show that our approach learns neural structures which have fewer parameters without sacrificing performance relative to parameter-rich baselines. Our method also highlights the interpretable properties of rational RNNs. We show that sparsifying such models makes them easier to visualize, and we present models that rely exclusively on as few as three WFSAs after pruning more than 90% of the weights. We publicly release our code. D19-1110 @@ -1409,7 +1409,7 @@ Analytical Methods for Interpretable Ultradense Word Embeddings PhilippDufter - HinrichSchütze + HinrichSchütze 1185–1191 Word embeddings are useful for a wide variety of tasks, but they lack interpretability. By rotating word spaces, interpretable dimensions can be identified while preserving the information contained in the embeddings without any loss. In this work, we investigate three methods for making word spaces interpretable by rotation: Densifier (Rothe et al., 2016), linear SVMs and DensRay, a new method we propose. In contrast to Densifier, DensRay can be computed in closed form, is hyperparameter-free and thus more robust than Densifier. We evaluate the three methods on lexicon induction and set-based word analogy. In addition we provide qualitative insights as to how interpretable word spaces can be used for removing gender bias from embeddings. D19-1111 @@ -1458,7 +1458,7 @@ Neural Linguistic Steganography ZacharyZiegler YuntianDeng - AlexanderRush + AlexanderRush 1210–1215 Whereas traditional cryptography encrypts a secret message into an unintelligible form, steganography conceals that communication is taking place by encoding a secret message into a cover signal. Language is a particularly pragmatic cover signal due to its benign occurrence and independence from any one medium. Traditionally, linguistic steganography systems encode secret messages in existing text via synonym substitution or word order rearrangements. Advances in neural language models enable previously impractical generation-based techniques. We propose a steganography technique based on arithmetic coding with large-scale neural language models. We find that our approach can generate realistic looking cover sentences as evaluated by humans, while at the same time preserving security by matching the cover message distribution with the language model distribution. D19-1115 @@ -1480,7 +1480,7 @@ Attention Optimization for Abstractive Document Summarization MinGui - JunfengTian + JunfengTian RuiWang ZhengluYang 1222–1228 @@ -1498,7 +1498,7 @@ ChenQiu AndersSandholm MichaelRingaard - AndersSøgaard + AndersSøgaard 1229–1235 Unresolved coreference is a bottleneck for relation extraction, and high-quality coreference resolvers may produce an output that makes it a lot easier to extract knowledge triples. We show how to improve coreference resolvers by forwarding their input to a relation extraction system and reward the resolvers for producing triples that are found in knowledge bases. Since relation extraction systems can rely on different forms of supervision and be biased in different ways, we obtain the best performance, improving over the state of the art, using multi-task reinforcement learning. D19-1118 @@ -1547,9 +1547,9 @@ Towards Extracting Medical Family History from Natural Language Interactions: A New Dataset and Baselines MahmoudAzab StephaneDadian - ViviNastase + ViviNastase LarryAn - RadaMihalcea + RadaMihalcea 1255–1260 We introduce a new dataset consisting of natural language interactions annotated with medical family histories, obtained during interactions with a genetic counselor and through crowdsourcing, following a questionnaire created by experts in the domain. We describe the data collection process and the annotations performed by medical professionals, including illness and personal attributes (name, age, gender, family relationships) for the patient and their family members. An initial system that performs argument identification and relation extraction shows promising results – average F-score of 0.87 on complex sentences on the targeted relations. D19-1122 @@ -1560,7 +1560,7 @@ Multi-task Learning for Natural Language Generation in Task-Oriented Dialogue ChenguangZhu MichaelZeng - XuedongHuang + XuedongHuang 1261–1266 In task-oriented dialogues, Natural Language Generation (NLG) is the final yet crucial step to produce user-facing system utterances. The result of NLG is directly related to the perceived quality and usability of a dialogue system. While most existing systems provide semantically correct responses given goals to present, they struggle to match the variation and fluency in the human language. In this paper, we propose a novel multi-task learning framework, NLG-LM, for natural language generation. In addition to generating high-quality responses conveying the required information, it also explicitly targets for naturalness in generated responses via an unconditioned language model. This can significantly improve the learning of style and variation in human language. Empirical results show that this multi-task learning framework outperforms previous models across multiple datasets. For example, it improves the previous best BLEU score on the E2E-NLG dataset by 2.2%, and on the Laptop dataset by 6.1%. D19-1123 @@ -1610,7 +1610,7 @@ ArshitGupta PengZhang GarimaLalwani - MonaDiab + MonaDiab 1285–1290 Natural Language Understanding (NLU) is a core component of dialog systems. It typically involves two tasks - Intent Classification (IC) and Slot Labeling (SL), which are then followed by a dialogue management (DM) component. Such NLU systems cater to utterances in isolation, thus pushing the problem of context management to DM. However, contextual information is critical to the correct prediction of intents in a conversation. Prior work on contextual NLU has been limited in terms of the types of contextual signals used and the understanding of their impact on the model. In this work, we propose a context-aware self-attentive NLU (CASA-NLU) model that uses multiple signals over a variable context window, such as previous intents, slots, dialog acts and utterances, in addition to the current user utterance. CASA-NLU outperforms a recurrent contextual NLU baseline on two conversational datasets, yielding a gain of up to 7% on the IC task. Moreover, a non-contextual variant of CASA-NLU achieves state-of-the-art performance on standard public datasets - SNIPS and ATIS. D19-1127 @@ -1637,7 +1637,7 @@ ZihanLiu JaminShin YanXu - Genta IndraWinata + Genta IndraWinata PengXu AndreaMadotto PascaleFung @@ -1663,7 +1663,7 @@ An Evaluation Dataset for Intent Classification and Out-of-Scope Prediction StefanLarson AnishMahendran - Joseph J.Peper + Joseph J.Peper ChristopherClarke AndrewLee ParkerHill @@ -1693,7 +1693,7 @@ uniblock: Scoring and Filtering Corpus with <fixed-case>U</fixed-case>nicode Block Information YingboGao WeiyueWang - HermannNey + HermannNey 1324–1329 The preprocessing pipelines in Natural Language Processing usually involve a step of removing sentences consisted of illegal characters. The definition of illegal characters and the specific removal strategy depend on the task, language, domain, etc, which often lead to tiresome and repetitive scripting of rules. In this paper, we introduce a simple statistical method, uniblock, to overcome this problem. For each sentence, uniblock generates a fixed-size feature vector using Unicode block information of the characters. A Gaussian mixture model is then estimated on some clean corpus using variational inference. The learned model can then be used to score sentences and filter corpus. We present experimental results on Sentiment Analysis, Language Modeling and Machine Translation, and show the simplicity and effectiveness of our method. D19-1133 @@ -1763,7 +1763,7 @@ KehaiChen RuiWang MasaoUtiyama - EiichiroSumita + EiichiroSumita 1361–1367 In the Transformer network architecture, positional embeddings are used to encode order dependencies into the input representation. However, this input representation only involves static order dependencies based on discrete numerical information, that is, are independent of word content. To address this issue, this work proposes a recurrent positional embedding approach based on word vector. In this approach, these recurrent positional embeddings are learned by a recurrent neural network, encoding word content-based order dependencies into the input representation. They are then integrated into the existing multi-head self-attention model as independent heads or part of each head. The experimental results revealed that the proposed approach improved translation performance over that of the state-of-the-art Transformer baseline in WMT’14 English-to-German and NIST Chinese-to-English translation tasks. D19-1139 @@ -1774,7 +1774,7 @@ Machine Translation for Machines: the Sentiment Classification Use Case AmirhosseinTebbifakhr LuisaBentivogli - MatteoNegri + MatteoNegri MarcoTurchi 1368–1374 We propose a neural machine translation (NMT) approach that, instead of pursuing adequacy and fluency (“human-oriented” quality criteria), aims to generate translations that are best suited as input to a natural language processing component designed for a specific downstream task (a “machine-oriented” criterion). Towards this objective, we present a reinforcement learning technique based on a new candidate sampling strategy, which exploits the results obtained on the downstream task as weak feedback. Experiments in sentiment classification of Twitter data in German and Italian show that feeding an English classifier with “machine-oriented” translations significantly improves its performance. Classification results outperform those obtained with translations produced by general-purpose NMT models as well as by an approach based on reinforcement learning. Moreover, our results on both languages approximate the classification accuracy computed on gold standard English tweets. @@ -1906,7 +1906,7 @@ Efficient Convolutional Neural Networks for Diacritic Restoration SawsanAlqahtani AjayMishra - MonaDiab + MonaDiab 1442–1448 Diacritic restoration has gained importance with the growing need for machines to understand written texts. The task is typically modeled as a sequence labeling problem and currently Bidirectional Long Short Term Memory (BiLSTM) models provide state-of-the-art results. Recently, Bai et al. (2018) show the advantages of Temporal Convolutional Neural Networks (TCN) over Recurrent Neural Networks (RNN) for sequence modeling in terms of performance and computational resources. As diacritic restoration benefits from both previous as well as subsequent timesteps, we further apply and evaluate a variant of TCN, Acausal TCN (A-TCN), which incorporates context from both directions (previous and future) rather than strictly incorporating previous context as in the case of TCN. A-TCN yields significant improvement over TCN for diacritization in three different languages: Arabic, Yoruba, and Vietnamese. Furthermore, A-TCN and BiLSTM have comparable performance, making A-TCN an efficient alternative over BiLSTM since convolutions can be trained in parallel. A-TCN is significantly faster than BiLSTM at inference time (270% 334% improvement in the amount of text diacritized per minute). D19-1151 @@ -1941,7 +1941,7 @@ Multi-Head Attention with Diversity for Learning Grounded Multilingual Multimodal Representations Po-YaoHuang XiaojunChang - AlexanderHauptmann + AlexanderHauptmann 1461–1467 With the aim of promoting and understanding the multilingual version of image search, we leverage visual object detection and propose a model with diverse multi-head attention to learn grounded multilingual multimodal representations. Specifically, our model attends to different types of textual semantics in two languages and visual objects for fine-grained alignments between sentences and images. We introduce a new objective function which explicitly encourages attention diversity to learn an improved visual-semantic embedding space. We evaluate our model in the German-Image and English-Image matching tasks on the Multi30K dataset, and in the Semantic Textual Similarity task with the English descriptions of visual content. Results show that our model yields a significant performance gain over other methods in all of the three tasks. D19-1154 @@ -1991,7 +1991,7 @@ Grounding learning of modifier dynamics: An application to color naming XudongHan PhilipSchulz - TrevorCohn + TrevorCohn 1488–1493 Grounding is crucial for natural language understanding. An important subtask is to understand modified color expressions, such as “light blue”. We present a model of color modifiers that, compared with previous additive models in RGB space, learns more complex transformations. In addition, we present a model that operates in the HSV color space. We show that certain adjectives are better modeled in that space. To account for all modifiers, we train a hard ensemble model that selects a color space depending on the modifier-color pair. Experimental results show significant and consistent improvements compared to the state-of-the-art baseline model. D19-1158 @@ -2006,7 +2006,7 @@ YonatanBisk AsliCelikyilmaz JianfengGao - Noah A.Smith + Noah A.Smith YejinChoi 1494–1499 Core to the vision-and-language navigation (VLN) challenge is building robust instruction representations and action decoding schemes, which can generalize well to previously unseen instructions and environments. In this paper, we report two simple but highly effective methods to address these challenges and lead to a new state-of-the-art performance. First, we adapt large-scale pretrained language models to learn text representations that generalize better to previously unseen instructions. Second, we propose a stochastic sampling scheme to reduce the considerable gap between the expert actions in training and sampled actions in test, so that the agent can learn to correct its own mistakes during long sequential action decoding. Combining the two techniques, we achieve a new state of the art on the Room-to-Room benchmark with 6% absolute gain over the previous best result (47% -> 53%) on the Success Rate weighted by Path Length metric. @@ -2059,7 +2059,7 @@ KarishmaMandyam RushinShah MikeLewis - LukeZettlemoyer + LukeZettlemoyer 1520–1526 We propose a semantic parser for parsing compositional utterances into Task Oriented Parse (TOP), a tree representation that has intents and slots as labels of nesting tree nodes. Our parser is span-based: it scores labels of the tree nodes covering each token span independently, but then decodes a valid tree globally. In contrast to previous sequence decoding approaches and other span-based parsers, we (1) improve the training speed by removing the need to run the decoder at training time; and (2) introduce edge scores, which model relations between parent and child labels, to mitigate the independence assumption between node labels and improve accuracy. Our best parser outperforms previous methods on the TOP dataset of mixed-domain task-oriented utterances in both accuracy and training speed. D19-1163 @@ -2119,8 +2119,8 @@ Hierarchical Modeling of Global Context for Document-Level Neural Machine Translation XinTan LongyinZhang - DeyiXiong - GuodongZhou + DeyiXiong + GuodongZhou 1576–1585 Document-level machine translation (MT) remains challenging due to the difficulty in efficiently using document context for translation. In this paper, we propose a hierarchical model to learn the global context for document-level neural machine translation (NMT). This is done through a sentence encoder to capture intra-sentence dependencies and a document encoder to model document-level inter-sentence consistency and coherence. With this hierarchical architecture, we feedback the extracted global document context to each word in a top-down fashion to distinguish different translations of a word according to its specific surrounding context. In addition, since large-scale in-domain document-level parallel corpora are usually unavailable, we use a two-step training strategy to take advantage of a large-scale corpus with out-of-domain parallel sentence pairs and a small-scale corpus with in-domain parallel document pairs to achieve the domain adaptability. Experimental results on several benchmark corpora show that our proposed model can significantly improve document-level translation performance over several strong NMT baselines. D19-1168 @@ -2146,7 +2146,7 @@ MinghaoHu YuxingPeng ZhenHuang - DongshengLi + DongshengLi 1596–1606 Rapid progress has been made in the field of reading comprehension and question answering, where several systems have achieved human parity in some simplified settings. However, the performance of these models degrades significantly when they are applied to more realistic scenarios, such as answers involve various types, multiple text strings are correct answers, or discrete reasoning abilities are required. In this paper, we introduce the Multi-Type Multi-Span Network (MTMSN), a neural reading comprehension model that combines a multi-type answer predictor designed to support various answer types (e.g., span, count, negation, and arithmetic expression) with a multi-span extraction method for dynamically producing one or multiple text spans. In addition, an arithmetic expression reranking mechanism is proposed to rank expression candidates for further confirming the prediction. Experiments show that our model achieves 79.9 F1 on the DROP hidden test set, creating new state-of-the-art results. Source code (https://github.com/huminghao16/MTMSN) is released to facilitate future work. D19-1170 @@ -2157,7 +2157,7 @@ Neural Duplicate Question Detection without Labeled Training Data AndreasRücklé - Nafise SadatMoosavi + Nafise SadatMoosavi IrynaGurevych 1607–1617 Supervised training of neural models to duplicate question detection in community Question Answering (CQA) requires large amounts of labeled question pairs, which can be costly to obtain. To minimize this cost, recent works thus often used alternative methods, e.g., adversarial domain adaptation. In this work, we propose two novel methods—weak supervision using the title and body of a question, and the automatic generation of duplicate questions—and show that both can achieve improved performances even though they do not require any labeled data. We provide a comparison of popular training strategies and show that our proposed approaches are more effective in many cases because they can utilize larger amounts of data from the CQA forums. Finally, we show that weak supervision with question title and body information is also an effective method to train CQA answer selection models without direct answer supervision. @@ -2185,7 +2185,7 @@ Multi-View Domain Adapted Sentence Embeddings for Low-Resource Unsupervised Duplicate Question Detection NinaPoerner - HinrichSchütze + HinrichSchütze 1630–1641 We address the problem of Duplicate Question Detection (DQD) in low-resource domain-specific Community Question Answering forums. Our multi-view framework MV-DASE combines an ensemble of sentence encoders via Generalized Canonical Correlation Analysis, using unlabeled data only. In our experiments, the ensemble includes generic and domain-specific averaged word embeddings, domain-finetuned BERT and the Universal Sentence Encoder. We evaluate MV-DASE on the CQADupStack corpus and on additional low-resource Stack Exchange forums. Combining the strengths of different encoders, we significantly outperform BM25, all single-view systems as well as a recent supervised domain-adversarial DQD method. D19-1173 @@ -2212,7 +2212,7 @@ The Trumpiest Trump? Identifying a Subject’s Most Characteristic Tweets CharutaPethe - SteveSkiena + SteveSkiena 1653–1663 The sequence of documents produced by any given author varies in style and content, but some documents are more typical or representative of the source than others. We quantify the extent to which a given short text is characteristic of a specific person, using a dataset of tweets from fifteen celebrities. Such analysis is useful for generating excerpts of high-volume Twitter profiles, and understanding how representativeness relates to tweet popularity. We first consider the related task of binary author detection (is x the author of text T?), and report a test accuracy of 90.37% for the best of five approaches to this problem. We then use these models to compute characterization scores among all of an author’s texts. A user study shows human evaluators agree with our characterization model for all 15 celebrities in our dataset, each with p-value < 0.05. We use these classifiers to show surprisingly strong correlations between characterization scores and the popularity of the associated texts. Indeed, we demonstrate a statistically significant correlation between this score and tweet popularity (likes/replies/retweets) for 13 of the 15 celebrities in our study. D19-1175 @@ -2258,7 +2258,7 @@ (Male, Bachelor) and (Female, <fixed-case>P</fixed-case>h.<fixed-case>D</fixed-case>) have different connotations: Parallelly Annotated Stylistic Language Dataset with Multiple Personas DongyeopKang VarunGangal - EduardHovy + EduardHovy 1696–1706 Stylistic variation in text needs to be studied with different aspects including the writer’s personal traits, interpersonal relations, rhetoric, and more. Despite recent attempts on computational modeling of the variation, the lack of parallel corpora of style language makes it difficult to systematically control the stylistic change as well as evaluate such models. We release PASTEL, the parallel and annotated stylistic language dataset, that contains ~41K parallel sentences (8.3K parallel stories) annotated across different personas. Each persona has different styles in conjunction: gender, age, country, political view, education, ethnic, and time-of-writing. The dataset is collected from human annotators with solid control of input denotation: not only preserving original meaning between text, but promoting stylistic diversity to annotators. We test the dataset on two interesting applications of style language, where PASTEL helps design appropriate experiment and evaluation. First, in predicting a target style (e.g., male or female in gender) given a text, multiple styles of PASTEL make other external style variables controlled (or fixed), which is a more accurate experimental design. Second, a simple supervised model with our parallel text outperforms the unsupervised models using nonparallel text in style transfer. Our dataset is publicly available. D19-1179 @@ -2295,9 +2295,9 @@ Deep Ordinal Regression for Pledge Specificity Prediction - ShivashankarSubramanian - TrevorCohn - TimothyBaldwin + ShivashankarSubramanian + TrevorCohn + TimothyBaldwin 1729–1740 Many pledges are made in the course of an election campaign, forming important corpora for political analysis of campaign strategy and governmental accountability. At present, there are no publicly available annotated datasets of pledges, and most political analyses rely on manual annotations. In this paper we collate a novel dataset of manifestos from eleven Australian federal election cycles, with over 12,000 sentences annotated with specificity (e.g., rhetorical vs detailed pledge) on a fine-grained scale. We propose deep ordinal regression approaches for specificity prediction, under both supervised and semi-supervised settings, and provide empirical results demonstrating the effectiveness of the proposed techniques over several baseline approaches. We analyze the utility of pledge specificity modeling across a spectrum of policy issues in performing ideology prediction, and further provide qualitative analysis in terms of capturing party-specific issue salience across election cycles. D19-1182 @@ -2307,7 +2307,7 @@ Data-Efficient Goal-Oriented Conversation with Dialogue Knowledge Transfer Networks IgorShalyminov - SungjinLee + SungjinLee ArashEshghi OliverLemon 1741–1751 @@ -2332,7 +2332,7 @@ WeikangWang JiajunZhang QianLi - ChengqingZong + ChengqingZong ZhifeiLi 1762–1771 Identity fraud detection is of great importance in many real-world scenarios such as the financial industry. However, few studies addressed this problem before. In this paper, we focus on identity fraud detection in loan applications and propose to solve this problem with a novel interactive dialogue system which consists of two modules. One is the knowledge graph (KG) constructor organizing the personal information for each loan applicant. The other is structured dialogue management that can dynamically generate a series of questions based on the personal KG to ask the applicants and determine their identity states. We also present a heuristic user simulator based on problem analysis to evaluate our method. Experiments have shown that the trainable dialogue system can effectively detect fraudsters, and achieve higher recognition accuracy compared with rule-based systems. Furthermore, our learned dialogue strategies are interpretable and flexible, which can help promote real-world applications. @@ -2354,7 +2354,7 @@ Knowledge Aware Conversation Generation with Explainable Reasoning over Augmented Graphs ZhibinLiu - Zheng-YuNiu + Zheng-YuNiu HuaWu HaifengWang 1782–1792 @@ -2396,11 +2396,11 @@ Structuring Latent Spaces for Stylized Response Generation XiangGao YizheZhang - SungjinLee + SungjinLee MichelGalley ChrisBrockett JianfengGao - BillDolan + BillDolan 1814–1823 Generating responses in a targeted style is a useful yet challenging task, especially in the absence of parallel data. With limited data, existing methods tend to generate responses that are either less stylized or less context-relevant. We propose StyleFusion, which bridges conversation modeling and non-parallel style transfer by sharing a structured latent space. This structure allows the system to generate stylized relevant responses by sampling in the neighborhood of the conversation model prediction, and continuously control the style level. We demonstrate this method using dialogues from Reddit data and two sets of sentences with distinct styles (arXiv and Sherlock Holmes novels). Automatic and human evaluation show that, without sacrificing appropriateness, the system generates responses of the targeted style and outperforms competitive baselines. D19-1190 @@ -2501,8 +2501,8 @@ JunGao WeiBi XiaojiangLiu - JunhuiLi - GuodongZhou + JunhuiLi + GuodongZhou ShumingShi 1898–1908 Neural conversation models such as encoder-decoder models are easy to generate bland and generic responses. Some researchers propose to use the conditional variational autoencoder (CVAE) which maximizes the lower bound on the conditional log-likelihood on a continuous latent variable. With different sampled latent variables, the model is expected to generate diverse responses. Although the CVAE-based models have shown tremendous potential, their improvement of generating high-quality responses is still unsatisfactory. In this paper, we introduce a discrete latent variable with an explicit semantic meaning to improve the CVAE on short-text conversation. A major advantage of our model is that we can exploit the semantic distance between the latent variables to maintain good diversity between the sampled latent variables. Accordingly, we propose a two-stage sampling approach to enable efficient diverse variable selection from a large latent space assumed in the short-text conversation task. Experimental results indicate that our model outperforms various kinds of generation models under both automatic and human evaluations and generates more diverse and informative responses. @@ -2558,13 +2558,13 @@ Variational Hierarchical User-based Conversation Model JinYeongBak - AliceOh + AliceOh 1941–1950 Generating appropriate conversation responses requires careful modeling of the utterances and speakers together. Some recent approaches to response generation model both the utterances and the speakers, but these approaches tend to generate responses that are overly tailored to the speakers. To overcome this limitation, we propose a new model with a stochastic variable designed to capture the speaker information and deliver it to the conversational context. An important part of this model is the network of speakers in which each speaker is connected to one or more conversational partner, and this network is then used to model the speakers better. To test whether our model generates more appropriate conversation responses, we build a new conversation corpus containing approximately 27,000 speakers and 770,000 conversations. With this corpus, we run experiments of generating conversational responses and compare our model with other state-of-the-art models. By automatic evaluation metrics and human evaluation, we show that our model outperforms other models in generating appropriate responses. An additional advantage of our model is that it generates better responses for various new user scenarios, for example when one of the speakers is a known user in our corpus but the partner is a new user. For replicability, we make available all our code and data. D19-1202 D19-1202.Attachment.pdf - 10.18653/v1/D19-1202 D19-1202.Poster.pdf + 10.18653/v1/D19-1202 bak-oh-2019-variational @@ -2572,7 +2572,7 @@ DongyeopKang AnushaBalakrishnan PararthShah - PaulCrook + PaulCrook Y-LanBoureau JasonWeston 1951–1961 @@ -2590,7 +2590,7 @@ SuyiLi EricXue BoPang - Xi VictoriaLin + Xi VictoriaLin Yi ChernTan TianzeShi ZihanLi @@ -2598,7 +2598,7 @@ MichihiroYasunaga SungrokShim TaoChen - AlexanderFabbri + AlexanderFabbri ZifanLi LuyaoChen YuwenZhang @@ -2606,8 +2606,8 @@ VincentZhang CaimingXiong RichardSocher - WalterLasecki - DragomirRadev + WalterLasecki + DragomirRadev 1962–1979 We present CoSQL, a corpus for building cross-domain, general-purpose database (DB) querying dialogue systems. It consists of 30k+ turns plus 10k+ annotated SQL queries, obtained from a Wizard-of-Oz (WOZ) collection of 3k dialogues querying 200 complex DBs spanning 138 domains. Each dialogue simulates a real-world DB query scenario with a crowd worker as a user exploring the DB and a SQL expert retrieving answers with SQL, clarifying ambiguous questions, or otherwise informing of unanswerable questions. When user questions are answerable by SQL, the expert describes the SQL and execution results to the user, hence maintaining a natural interaction flow. CoSQL introduces new challenges compared to existing task-oriented dialogue datasets: (1) the dialogue states are grounded in SQL, a domain-independent executable representation, instead of domain-specific slot value pairs, and (2) because testing is done on unseen databases, success requires generalizing to new domains. CoSQL includes three tasks: SQL-grounded dialogue state tracking, response generation from query results, and user dialogue act prediction. We evaluate a set of strong baselines for each task and show that CoSQL presents significant challenges for future research. The dataset, baselines, and leaderboard will be released at https://yale-lily.github.io/cosql. D19-1204 @@ -2618,7 +2618,7 @@ A Practical Dialogue-Act-Driven Conversation Model for Multi-Turn Response Selection HarshitKumar ArvindAgarwal - SachindraJoshi + SachindraJoshi 1980–1989 Dialogue Acts play an important role in conversation modeling. Research has shown the utility of dialogue acts for the response selection task, however, the underlying assumption is that the dialogue acts are readily available, which is impractical, as dialogue acts are rarely available for new conversations. This paper proposes an end-to-end multi-task model for conversation modeling, which is optimized for two tasks, dialogue act prediction and response selection, with the latter being the task of interest. It proposes a novel way of combining the predicted dialogue acts of context and response with the context (previous utterances) and response (follow-up utterance) in a crossway fashion, such that, it achieves at par performance for the response selection task compared to the model that uses actual dialogue acts. Through experiments on two well known datasets, we demonstrate that the multi-task model not only improves the accuracy of the dialogue act prediction task but also improves the MRR for the response selection task. Also, the cross-stitching of dialogue acts of context and response with the context and response is better than using either one of them individually. D19-1205 @@ -2706,7 +2706,7 @@ NikosPapasarantopoulos LeaFrermann MirellaLapata - Shay B.Cohen + Shay B.Cohen 2057–2067 Multi-view learning algorithms are powerful representation learning tools, often exploited in the context of multimodal problems. However, for problems requiring inference at the token-level of a sequence (that is, a separate prediction must be made for every time step), it is often the case that single-view systems are used, or that more than one views are fused in a simple manner. We describe an incremental neural architecture paired with a novel training objective for incremental inference. The network operates on multi-view data. We demonstrate the effectiveness of our approach on the problem of predicting perpetrators in crime drama series, for which our model significantly outperforms previous work and strong baselines. Moreover, we introduce two tasks, crime case and speaker type tagging, that contribute to movie understanding and demonstrate the effectiveness of our model on them. D19-1212 @@ -2745,7 +2745,7 @@ SimonVandenhende DusanGrujicic LucVan Gool - Marie-FrancineMoens + Marie-FrancineMoens 2088–2098 A long-term goal of artificial intelligence is to have an agent execute commands communicated through natural language. In many cases the commands are grounded in a visual environment shared by the human who gives the command and the agent. Execution of the command then requires mapping the command into the physical visual space, after which the appropriate action can be taken. In this paper we consider the former. Or more specifically, we consider the problem in an autonomous driving setting, where a passenger requests an action that can be associated with an object found in a street scene. Our work presents the Talk2Car dataset, which is the first object referral dataset that contains commands written in natural language for self-driving cars. We provide a detailed comparison with related datasets such as ReferIt, RefCOCO, RefCOCO+, RefCOCOg, Cityscape-Ref and CLEVR-Ref. Additionally, we include a performance analysis using strong state-of-the-art models. The results show that the proposed object referral task is a challenging one for which the models show promising results but still require additional research in natural language processing, computer vision and the intersection of these fields. The dataset can be found on our website: http://macchina-ai.eu/ D19-1215 @@ -2756,7 +2756,7 @@ Fact-Checking Meets Fauxtography: Verifying Claims About Images DimitrinaZlatkova - PreslavNakov + PreslavNakov IvanKoychev 2099–2108 The recent explosion of false claims in social media and on the Web in general has given rise to a lot of manual fact-checking initiatives. Unfortunately, the number of claims that need to be fact-checked is several orders of magnitude larger than what humans can handle manually. Thus, there has been a lot of research aiming at automating the process. Interestingly, previous work has largely ignored the growing number of claims about images. This is despite the fact that visual imagery is more influential than text and naturally appears alongside fake news. Here we aim at bridging this gap. In particular, we create a new dataset for this problem, and we explore a variety of features modeling the claim, the image, and the relationship between the claim and the image. The evaluation results show sizable improvements over the baseline. We release our dataset, hoping to enable further research on fact-checking claims about images. @@ -2801,7 +2801,7 @@ Fusion of Detected Objects in Text for Visual Question Answering ChrisAlberti JeffreyLing - MichaelCollins + MichaelCollins DavidReitter 2131–2140 To advance models of multimodal context, we introduce a simple yet powerful neural architecture for data that combines vision and natural language. The “Bounding Boxes in Text Transformer” (B2T2) also leverages referential information binding words to portions of the image in a single unified architecture. B2T2 is highly effective on the Visual Commonsense Reasoning benchmark, achieving a new state-of-the-art with a 25% relative reduction in error rate compared to published baselines and obtaining the best performance to date on the public leaderboard (as of May 22, 2019). A detailed ablation analysis shows that the early integration of the visual features into the text analysis is key to the effectiveness of the new architecture. A reference implementation of our models is provided. @@ -2854,7 +2854,7 @@ Adaptively Sparse Transformers Gonçalo M.Correia VladNiculae - André F. T.Martins + André F. T.Martins 2174–2184 Attention mechanisms have become ubiquitous in NLP. Recent architectures, notably the Transformer, learn powerful context-aware word representations through layered, multi-headed attention. The multiple heads learn diverse types of word relationships. However, with standard softmax attention, all attention heads are dense, assigning a non-zero weight to all context words. In this work, we introduce the adaptively sparse Transformer, wherein attention heads have flexible, context-dependent sparsity patterns. This sparsity is accomplished by replacing softmax with alpha-entmax: a differentiable generalization of softmax that allows low-scoring words to receive precisely zero weight. Moreover, we derive a method to automatically learn the alpha parameter – which controls the shape and sparsity of alpha-entmax – allowing attention heads to choose between focused or spread-out behavior. Our adaptively sparse Transformer improves interpretability and head diversity when compared to softmax Transformers on machine translation datasets. Findings of the quantitative and qualitative analysis of our approach include that heads in different layers learn different sparsity preferences and tend to be more diverse in their attention distributions than softmax Transformers. Furthermore, at no cost in accuracy, sparsity in attention heads helps to uncover different head specializations. D19-1223 @@ -2868,7 +2868,7 @@ SuchinGururangan DallasCard RoySchwartz - Noah A.Smith + Noah A.Smith 2185–2194 Research in natural language processing proceeds, in part, by demonstrating that new models achieve superior performance (e.g., accuracy) on held-out test data, compared to previous results. In this paper, we demonstrate that test-set performance scores alone are insufficient for drawing accurate conclusions about which model performs best. We argue for reporting additional details, especially performance on validation data obtained during model development. We present a novel technique for doing so: expected validation performance of the best-found model as a function of computation budget (i.e., the number of hyperparameter search trials or the overall training time). Using our approach, we find multiple recent model comparisons where authors would have reached a different conclusion if they had used more (or less) computation. Our approach also allows us to estimate the amount of computation required to obtain a given accuracy; applying it to several recently published results yields massive variation across papers, from hours to weeks. We conclude with a set of best practices for reporting experimental results which allow for robust future comparisons, and provide code to allow researchers to use our technique. D19-1224 @@ -2928,7 +2928,7 @@ Modeling Color Terminology Across Thousands of Languages - Arya D.McCarthy + Arya D.McCarthy WinstonWu AaronMueller WilliamWatson @@ -2945,9 +2945,9 @@ LongxiangShen BoweiZou YuHong - GuodongZhou - QiaomingZhu - AiTiAw + GuodongZhou + QiaomingZhu + AiTiAw 2251–2261 Negation is a universal but complicated linguistic phenomenon, which has received considerable attention from the NLP community over the last decade, since a negated statement often carries both an explicit negative focus and implicit positive meanings. For the sake of understanding a negated statement, it is critical to precisely detect the negative focus in context. However, how to capture contextual information for negative focus detection is still an open challenge. To well address this, we come up with an attention-based neural network to model contextual information. In particular, we introduce a framework which consists of a Bidirectional Long Short-Term Memory (BiLSTM) neural network and a Conditional Random Fields (CRF) layer to effectively encode the order information and the long-range context dependency in a sentence. Moreover, we design two types of attention mechanisms, word-level contextual attention and topic-level contextual attention, to take advantage of contextual information across sentences from both the word perspective and the topic perspective, respectively. Experimental results on the SEM’12 shared task corpus show that our approach achieves the best performance on negative focus detection, yielding an absolute improvement of 2.11% over the state-of-the-art. This demonstrates the great effectiveness of the two types of contextual attention mechanisms. D19-1230 @@ -2957,8 +2957,8 @@ A Unified Neural Coherence Model Han CheolMoon - TasnimMohiuddin - ShafiqJoty + TasnimMohiuddin + ShafiqJoty ChiXu 2262–2272 Recently, neural approaches to coherence modeling have achieved state-of-the-art results in several evaluation tasks. However, we show that most of these models often fail on harder tasks with more realistic application scenarios. In particular, the existing models underperform on tasks that require the model to be sensitive to local contexts such as candidate ranking in conversational dialogue and in machine translation. In this paper, we propose a unified coherence model that incorporates sentence grammar, inter-sentence coherence relations, and global coherence patterns into a common neural framework. With extensive experiments on local and global discrimination tasks, we demonstrate that our proposed model outperforms existing models by a good margin, and establish a new state-of-the-art. @@ -2996,7 +2996,7 @@ SoniaBadene KateThompson Jean-PierreLorré - NicholasAsher + NicholasAsher 2296–2305 This paper provides a detailed comparison of a data programming approach with (i) off-the-shelf, state-of-the-art deep learning architectures that optimize their representations (BERT) and (ii) handcrafted-feature approaches previously used in the discourse analysis literature. We compare these approaches on the task of learning discourse structure for multi-party dialogue. The data programming paradigm offered by the Snorkel framework allows a user to label training data using expert-composed heuristics, which are then transformed via the “generative step” into probability distributions of the class labels given the data. We show that on our task the generative model outperforms both deep learning architectures as well as more traditional ML approaches when learning discourse structure—it even outperforms the combination of deep learning methods and hand-crafted features. We also implement several strategies for “decoding” our generative model output in order to improve our results. We conclude that weak supervision methods hold great promise as a means for creating and improving data sets for discourse structure. D19-1234 @@ -3017,7 +3017,7 @@ The Myth of Double-Blind Review Revisited: <fixed-case>ACL</fixed-case> vs. <fixed-case>EMNLP</fixed-case> CorneliaCaragea AnaUban - Liviu P.Dinu + Liviu P.Dinu 2317–2327 The review and selection process for scientific paper publication is essential for the quality of scholarly publications in a scientific field. The double-blind review system, which enforces author anonymity during the review period, is widely used by prestigious conferences and journals to ensure the integrity of this process. Although the notion of anonymity in the double-blind review has been questioned before, the availability of full text paper collections brings new opportunities for exploring the question: Is the double-blind review process really double-blind? We study this question on the ACL and EMNLP paper collections and present an analysis on how well deep learning techniques can infer the authors of a paper. Specifically, we explore Convolutional Neural Networks trained on various aspects of a paper, e.g., content, style features, and references, to understand the extent to which we can infer the authors of a paper and what aspects contribute the most. Our results show that the authors of a paper can be inferred with accuracy as high as 87% on ACL and 78% on EMNLP for the top 100 most prolific authors. D19-1236 @@ -3095,7 +3095,7 @@ <fixed-case>P</fixed-case>ull<fixed-case>N</fixed-case>et: Open Domain Question Answering with Iterative Retrieval on Knowledge Bases and Text HaitianSun TaniaBedrax-Weiss - WilliamCohen + WilliamCohen 2380–2390 We consider open-domain question answering (QA) where answers are drawn from either a corpus, a knowledge base (KB), or a combination of both of these. We focus on a setting in which a corpus is supplemented with a large but incomplete KB, and on questions that require non-trivial (e.g., “multi-hop”) reasoning. We describe PullNet, an integrated framework for (1) learning what to retrieve and (2) reasoning with this heterogeneous information to find the best answer. PullNet uses an iterative process to construct a question-specific subgraph that contains information relevant to the question. In each iteration, a graph convolutional network (graph CNN) is used to identify subgraph nodes that should be expanded using retrieval (or “pull”) operations on the corpus and/or KB. After the subgraph is complete, another graph CNN is used to extract the answer from the subgraph. This retrieve-and-reason process allows us to answer multi-hop questions using large KBs and corpora. PullNet is weakly supervised, requiring question-answer pairs but not gold inference paths. Experimentally PullNet improves over the prior state-of-the art, and in the setting where a corpus is used with incomplete KB these improvements are often dramatic. PullNet is also often superior to prior systems in a KB-only setting or a text-only setting. D19-1242 @@ -3187,7 +3187,7 @@ <fixed-case>B</fixed-case>i<fixed-case>P</fixed-case>a<fixed-case>R</fixed-case>: A Bilingual Parallel Dataset for Multilingual and Cross-lingual Reading Comprehension on Novels YiminJing - DeyiXiong + DeyiXiong ZhenYan 2452–2462 This paper presents BiPaR, a bilingual parallel novel-style machine reading comprehension (MRC) dataset, developed to support multilingual and cross-lingual reading comprehension. The biggest difference between BiPaR and existing reading comprehension datasets is that each triple (Passage, Question, Answer) in BiPaR is written parallelly in two languages. We collect 3,667 bilingual parallel paragraphs from Chinese and English novels, from which we construct 14,668 parallel question-answer pairs via crowdsourced workers following a strict quality control procedure. We analyze BiPaR in depth and find that BiPaR offers good diversification in prefixes of questions, answer types and relationships between questions and passages. We also observe that answering questions of novels requires reading comprehension skills of coreference resolution, multi-sentence reasoning, and understanding of implicit causality, etc. With BiPaR, we build monolingual, multilingual, and cross-lingual MRC baseline models. Even for the relatively simple monolingual MRC on this dataset, experiments show that a strong BERT baseline is over 30 points behind human in terms of both EM and F1 score, indicating that BiPaR provides a challenging testbed for monolingual, multilingual and cross-lingual MRC on novels. The dataset is available at https://multinlp.github.io/BiPaR/. @@ -3198,7 +3198,7 @@ Language Models as Knowledge Bases? FabioPetroni - TimRocktäschel + TimRocktäschel SebastianRiedel PatrickLewis AntonBakhtin @@ -3315,7 +3315,7 @@ QiaoJin BhuwanDhingra ZhengpingLiu - WilliamCohen + WilliamCohen XinghuaLu 2567–2577 We introduce PubMedQA, a novel biomedical question answering (QA) dataset collected from PubMed abstracts. The task of PubMedQA is to answer research questions with yes/no/maybe (e.g.: Do preoperative statins reduce atrial fibrillation after coronary artery bypass grafting?) using the corresponding abstracts. PubMedQA has 1k expert-annotated, 61.2k unlabeled and 211.3k artificially generated QA instances. Each PubMedQA instance is composed of (1) a question which is either an existing research article title or derived from one, (2) a context which is the corresponding abstract without its conclusion, (3) a long answer, which is the conclusion of the abstract and, presumably, answers the research question, and (4) a yes/no/maybe answer which summarizes the conclusion. PubMedQA is the first QA dataset where reasoning over biomedical research texts, especially their quantitative contents, is required to answer the questions. Our best performing model, multi-phase fine-tuning of BioBERT with long answer bag-of-word statistics as additional supervision, achieves 68.1% accuracy, compared to single human performance of 78.0% accuracy and majority-baseline of 55.2% accuracy, leaving much room for improvement. PubMedQA is publicly available at https://pubmedqa.github.io. @@ -3340,7 +3340,7 @@ XiaowenLin LeoMehr ZijianWang - Christopher D.Manning + Christopher D.Manning 2590–2602 It is challenging for current one-step retrieve-and-read question answering (QA) systems to answer questions like “Which novel by the author of ‘Armada’ will be adapted as a feature film by Steven Spielberg?” because the question seldom contains retrievable clues about the missing entity (here, the author). Answering such a question requires multi-hop reasoning where one must gather information about the missing entity (or facts) to proceed with further reasoning. We present GoldEn (Gold Entity) Retriever, which iterates between reading context and retrieving more supporting documents to answer open-domain multi-hop questions. Instead of using opaque and computationally expensive neural retrieval models, GoldEn Retriever generates natural language search queries given the question and available context, and leverages off-the-shelf information retrieval systems to query for missing entities. This allows GoldEn Retriever to scale up efficiently for open-domain multi-hop reasoning while maintaining interpretability. We evaluate GoldEn Retriever on the recently proposed open-domain multi-hop QA dataset, HotpotQA, and demonstrate that it outperforms the best previously published model despite not using pretrained language models such as BERT. D19-1261 @@ -3374,7 +3374,7 @@ Incorporating Graph Attention Mechanism into Knowledge Graph Reasoning Based on Deep Reinforcement Learning - HengWang + HengWang ShuangyinLi RongPan MingzhiMao @@ -3410,7 +3410,7 @@ Original Semantics-Oriented Attention and Deep Fusion Network for Sentence Matching MingtongLiu YujieZhang - JinanXu + JinanXu YufengChen 2652–2661 Sentence matching is a key issue in natural language inference and paraphrase identification. Despite the recent progress on multi-layered neural network with cross sentence attention, one sentence learns attention to the intermediate representations of another sentence, which are propagated from preceding layers and therefore are uncertain and unstable for matching, particularly at the risk of error propagation. In this paper, we present an original semantics-oriented attention and deep fusion network (OSOA-DFN) for sentence matching. Unlike existing models, each attention layer of OSOA-DFN is oriented to the original semantic representation of another sentence, which captures the relevant information from a fixed matching target. The multiple attention layers allow one sentence to repeatedly read the important information of another sentence for better matching. We then additionally design deep fusion to propagate the attention information at each matching layer. At last, we introduce a self-attention mechanism to capture global context to enhance attention-aware representation within each sentence. Experiment results on three sentence matching benchmark datasets SNLI, SciTail and Quora show that OSOA-DFN has the ability to model sentence matching more precisely. @@ -3462,7 +3462,7 @@ DiLiang FubaoZhang QiZhang - XuanjingHuang + XuanjingHuang 2692–2700 Natural language inference aims to predict whether a premise sentence can infer another hypothesis sentence. Existing methods typically have framed the reasoning problem as a semantic matching task. The both sentences are encoded and interacted symmetrically and in parallel. However, in the process of reasoning, the role of the two sentences is obviously different, and the sentence pairs for NLI are asymmetrical corpora. In this paper, we propose an asynchronous deep interaction network (ADIN) to complete the task. ADIN is a neural network structure stacked with multiple inference sub-layers, and each sub-layer consists of two local inference modules in an asymmetrical manner. Different from previous methods, this model deconstructs the reasoning process and implements the asynchronous and multi-step reasoning. Experiment results show that ADIN achieves competitive performance and outperforms strong baselines on three popular benchmarks: SNLI, MultiNLI, and SciTail. D19-1271 @@ -3499,7 +3499,7 @@ LeiHou JiaxinShi JuanziLi - Tat-SengChua + Tat-SengChua 2723–2732 Entity alignment aims at integrating complementary knowledge graphs (KGs) from different sources or languages, which may benefit many knowledge-driven applications. It is challenging due to the heterogeneity of KGs and limited seed alignments. In this paper, we propose a semi-supervised entity alignment method by joint Knowledge Embedding model and Cross-Graph model (KECG). It can make better use of seed alignments to propagate over the entire graphs with KG-based constraints. Specifically, as for the knowledge embedding model, we utilize TransE to implicitly complete two KGs towards consistency and learn relational constraints between entities. As for the cross-graph model, we extend Graph Attention Network (GAT) with projection constraint to robustly encode graphs, and two KGs share the same GAT to transfer structural knowledge as well as to ignore unimportant neighbors for alignment via attention mechanism. Results on publicly available datasets as well as further analysis demonstrate the effectiveness of KECG. Our codes can be found in https: //github.com/THU-KEG/KECG. D19-1274 @@ -3520,7 +3520,7 @@ Specializing Word Embeddings (for Parsing) by Information Bottleneck Xiang LisaLi - JasonEisner + JasonEisner 2744–2754 Pre-trained word embeddings like ELMo and BERT contain rich syntactic and semantic information, resulting in state-of-the-art performance on various tasks. We propose a very fast variational information bottleneck (VIB) method to nonlinearly compress these embeddings, keeping only the information that helps a discriminative parser. We compress each word embedding to either a discrete tag or a continuous vector. In the discrete version, our automatically compressed tags form an alternative tag set: we show experimentally that our tags capture most of the information in traditional POS tag annotations, but our tag sequences can be parsed more accurately at the same level of tag granularity. In the continuous version, we show experimentally that moderately compressing the word embeddings by our method yields a more accurate parser in 8 of 9 languages, unlike simple dimensionality reduction. D19-1276 @@ -3574,7 +3574,7 @@ Marc-AlexandreCôté JieFu ZhouhanLin - ChrisPal + ChrisPal YoshuaBengio AdamTrischler 2796–2813 @@ -3598,7 +3598,7 @@ <fixed-case>K</fixed-case>ag<fixed-case>N</fixed-case>et: Knowledge-Aware Graph Networks for Commonsense Reasoning - Bill YuchenLin + Bill YuchenLin XinyueChen JaminChen XiangRen @@ -3627,7 +3627,7 @@ SewonMin DanqiChen HannanehHajishirzi - LukeZettlemoyer + LukeZettlemoyer 2851–2864 Many question answering (QA) tasks only provide weak supervision for how the answer should be computed. For example, TriviaQA answers are entities that can be mentioned multiple times in supporting documents, while DROP answers can be computed by deriving many different equations from numbers in the reference text. In this paper, we show it is possible to convert such tasks into discrete latent variable learning problems with a precomputed, task-specific set of possible solutions (e.g. different mentions or equations) that contains one correct option. We then develop a hard EM learning scheme that computes gradients relative to the most likely solution at each update. Despite its simplicity, we show that this approach significantly outperforms previous methods on six QA tasks, including absolute gains of 2–10%, and achieves the state-of-the-art on five of them. Using hard updates instead of maximizing marginal likelihood is key to these results as it encourages the model to find the one correct answer, which we show through detailed qualitative analysis. D19-1284 @@ -3637,7 +3637,7 @@ Is the Red Square Big? <fixed-case>MAL</fixed-case>e<fixed-case>V</fixed-case>i<fixed-case>C</fixed-case>: Modeling Adjectives Leveraging Visual Contexts SandroPezzelle - RaquelFernández + RaquelFernández 2865–2876 This work aims at modeling how the meaning of gradable adjectives of size (‘big’, ‘small’) can be learned from visually-grounded contexts. Inspired by cognitive and linguistic evidence showing that the use of these expressions relies on setting a threshold that is dependent on a specific context, we investigate the ability of multi-modal models in assessing whether an object is ‘big’ or ‘small’ in a given visual scene. In contrast with the standard computational approach that simplistically treats gradable adjectives as ‘fixed’ attributes, we pose the problem as relational: to be successful, a model has to consider the full visual context. By means of four main tasks, we show that state-of-the-art models (but not a relatively strong baseline) can learn the function subtending the meaning of size adjectives, though their performance is found to decrease while moving from simple to more complex tasks. Crucially, models fail in developing abstract representations of gradable adjectives that can be used compositionally. D19-1285 @@ -3661,7 +3661,7 @@ AnhadMohananey Phu MonHtut PalomaJeretic - Samuel R.Bowman + Samuel R.Bowman 2877–2887 Though state-of-the-art sentence representation models can perform tasks requiring significant knowledge of grammar, it is an open question how best to evaluate their grammatical knowledge. We explore five experimental methods inspired by prior work evaluating pretrained sentence representation models. We use a single linguistic phenomenon, negative polarity item (NPI) licensing, as a case study for our experiments. NPIs like any are grammatical only if they appear in a licensing environment like negation (Sue doesn’t have any cats vs. *Sue has any cats). This phenomenon is challenging because of the variety of NPI licensing environments that exist. We introduce an artificially generated dataset that manipulates key features of NPI licensing for the experiments. We find that BERT has significant knowledge of these features, but its success varies widely across different experimental methods. We conclude that a variety of methods is necessary to reveal all relevant aspects of a model’s grammatical knowledge in a given domain. D19-1286 @@ -3674,7 +3674,7 @@ AixiuAn PengQian EthanWilcox - RogerLevy + RogerLevy 2888–2899 Neural language models have achieved state-of-the-art performances on many NLP tasks, and recently have been shown to learn a number of hierarchically-sensitive syntactic dependencies between individual words. However, equally important for language processing is the ability to combine words into phrasal constituents, and use constituent-level features to drive downstream expectations. Here we investigate neural models’ ability to represent constituent-level features, using coordinated noun phrases as a case study. We assess whether different neural language models trained on English and French represent phrase-level number and gender features, and use those features to drive downstream expectations. Our results suggest that models use a linear combination of NP constituent number to drive CoordNP/verb number agreement. This behavior is highly regular and even sensitive to local syntactic context, however it differs crucially from observed human behavior. Models have less success with gender agreement. Models trained on large corpora perform best, and there is no obvious advantage for models trained using explicit syntactic supervision. D19-1287 @@ -3726,7 +3726,7 @@ TuhinChakrabarty ChristopherHidey SmarandaMuresan - KathyMcKeown + KathyMcKeown AlyssaHwang 2933–2943 Argumentation is a type of discourse where speakers try to persuade their audience about the reasonableness of a claim by presenting supportive arguments. Most work in argument mining has focused on modeling arguments in monologues. We propose a computational model for argument mining in online persuasive discussion forums that brings together the micro-level (argument as product) and macro-level (argument as process) models of argumentation. Fundamentally, this approach relies on identifying relations between components of arguments in a discussion thread. Our approach for relation prediction uses contextual information in terms of fine-tuning a pre-trained language model and leveraging discourse relations based on Rhetorical Structure Theory. We additionally propose a candidate selection method to automatically predict what parts of one’s argument will be targeted by other participants in the discussion. Our models obtain significant improvements compared to recent state-of-the-art approaches using pointer networks and a pre-trained language model. @@ -3752,7 +3752,7 @@ WonsukYang SeungwonYoon AdaCarpenter - JongPark + JongPark 2954–2963 Annotation quality control is a critical aspect for building reliable corpora through linguistic annotation. In this study, we present a simple but powerful quality control method using two-step reason selection. We gathered sentential annotations of local acceptance and three related attributes through a crowdsourcing platform. For each attribute, the reason for the choice of the attribute value is selected in a two-step manner. The options given for reason selection were designed to facilitate the detection of a nonsensical reason selection. We assume that a sentential annotation that contains a nonsensical reason is less reliable than the one without such reason. Our method, based solely on this assumption, is found to retain the annotations with satisfactory quality out of the entire annotations mixed with those of low quality. D19-1293 @@ -3763,9 +3763,9 @@ Evaluating Pronominal Anaphora in Machine Translation: An Evaluation Measure and a Test Suite PrathyushaJwalapuram - ShafiqJoty + ShafiqJoty IrinaTemnikova - PreslavNakov + PreslavNakov 2964–2975 The ongoing neural revolution in machine translation has made it easier to model larger contexts beyond the sentence-level, which can potentially help resolve some discourse-level ambiguities such as pronominal anaphora, thus enabling better translations. Unfortunately, even when the resulting improvements are seen as substantial by humans, they remain virtually unnoticed by traditional automatic evaluation measures like BLEU, as only a few words end up being affected. Thus, specialized evaluation measures are needed. With this aim in mind, we contribute an extensive, targeted dataset that can be used as a test suite for pronoun translation, covering multiple source languages and different pronoun errors drawn from real system translations, for English. We further propose an evaluation measure to differentiate good and bad pronoun translations. We also conduct a user study to report correlations with human judgments. D19-1294 @@ -3797,7 +3797,7 @@ JunjieLi XuepengWang DaweiYin - ChengqingZong + ChengqingZong 3000–3010 Review summarization aims to generate a condensed summary for a review or multiple reviews. Existing review summarization systems mainly generate summary only based on review content and neglect the authors’ attributes (e.g., gender, age, and occupation). In fact, when summarizing a review, users with different attributes usually pay attention to specific aspects and have their own word-using habits or writing styles. Therefore, we propose an Attribute-aware Sequence Network (ASN) to take the aforementioned users’ characteristics into account, which includes three modules: an attribute encoder encodes the attribute preferences over the words; an attribute-aware review encoder adopts an attribute-based selective mechanism to select the important information of a review; and an attribute-aware summary decoder incorporates attribute embedding and attribute-specific word-using habits into word prediction. To validate our model, we collect a new dataset TripAtt, comprising 495,440 attribute-review-summary triplets with three kinds of attribute information: gender, age, and travel status. Extensive experiments show that ASN achieves state-of-the-art performance on review summarization in both auto-metric ROUGE and human evaluation. D19-1297 @@ -3822,7 +3822,7 @@ XiaochengFeng FengJiang BingQin - Chin-YewLin + Chin-YewLin 3022–3032 Recent neural models for data-to-text generation rely on massive parallel pairs of data and text to learn the writing knowledge. They often assume that writing knowledge can be acquired from the training data alone. However, when people are writing, they not only rely on the data but also consider related knowledge. In this paper, we enhance neural data-to-text models with external knowledge in a simple but effective way to improve the fidelity of generated text. Besides relying on parallel data and text as in previous work, our model attends to relevant external knowledge, encoded as a temporary memory, and combines this knowledge with the context representation of data before generating words. This allows the model to infer relevant facts which are not explicitly stated in the data table from an external knowledge source. Experimental results on twenty-one Wikipedia infobox-to-text datasets show our model, KBAtt, consistently improves a state-of-the-art model on most of the datasets. In addition, to quantify when and why external knowledge is effective, we design a metric, KBGain, which shows a strong correlation with the observed performance boost. This result demonstrates the relevance of external knowledge and sparseness of original data are the main factors affecting system performance. D19-1299 @@ -3866,7 +3866,7 @@ YuZhou JiajunZhang ShaonanWang - ChengqingZong + ChengqingZong 3054–3064 Cross-lingual summarization (CLS) is the task to produce a summary in one particular language for a source document in a different language. Existing methods simply divide this task into two steps: summarization and translation, leading to the problem of error propagation. To handle that, we present an end-to-end CLS framework, which we refer to as Neural Cross-Lingual Summarization (NCLS), for the first time. Moreover, we propose to further improve NCLS by incorporating two related tasks, monolingual summarization and machine translation, into the training process of CLS under multi-task learning. Due to the lack of supervised CLS data, we propose a round-trip translation strategy to acquire two high-quality large-scale CLS datasets based on existing monolingual summarization datasets. Experimental results have shown that our NCLS achieves remarkable improvement over traditional pipeline methods on both English-to-Chinese and Chinese-to-English CLS human-corrected test sets. In addition, NCLS with multi-task learning can further significantly improve the quality of generated summaries. We make our dataset and code publicly available here: http://www.nlpr.ia.ac.cn/cip/dataset.htm. D19-1302 @@ -3889,7 +3889,7 @@ Concept Pointer Network for Abstractive Summarization WenboWang YangGao - HeyanHuang + HeyanHuang YuxiangZhou 3076–3085 A quality abstractive summary should not only copy salient source texts as summaries but should also tend to generate new conceptual words to express concrete details. Inspired by the popular pointer generator sequence-to-sequence model, this paper presents a concept pointer network for improving these aspects of abstractive summarization. The network leverages knowledge-based, context-aware conceptualizations to derive an extended set of candidate concepts. The model then points to the most appropriate choice using both the concept set and original source text. This joint approach generates abstractive summaries with higher-level semantic concepts. The training model is also optimized in a way that adapts to different data, which is based on a novel method of distant-supervised learning guided by reference summaries and testing set. Overall, the proposed approach provides statistically significant improvements over several state-of-the-art models on both the DUC-2004 and Gigaword datasets. A human evaluation of the model’s abstractive abilities also supports the quality of the summaries produced within this framework. @@ -3990,7 +3990,7 @@ Referring Expression Generation Using Entity Profiles MengCao - Jackie Chi KitCheung + Jackie Chi KitCheung 3163–3172 Referring Expression Generation (REG) is the task of generating contextually appropriate references to entities. A limitation of existing REG systems is that they rely on entity-specific supervised training, which means that they cannot handle entities not seen during training. In this study, we address this in two ways. First, we propose task setups in which we specifically test a REG system’s ability to generalize to entities not seen during training. Second, we propose a profile-based deep neural network model, ProfileREG, which encodes both the local context and an external profile of the entity to generate reference realizations. Our model generates tokens by learning to choose between generating pronouns, generating from a fixed vocabulary, or copying a word from the profile. We evaluate our model on three different splits of the WebNLG dataset, and show that it outperforms competitive baselines in all settings according to automatic and human evaluations. D19-1312 @@ -4001,7 +4001,7 @@ Exploring Diverse Expressions for Paraphrase Generation LihuaQian LinQiu - WeinanZhang + WeinanZhang XinJiang YongYu 3173–3182 @@ -4062,7 +4062,7 @@ Deep Copycat Networks for Text-to-Text Generation JuliaIve - PranavaMadhyastha + PranavaMadhyastha LuciaSpecia 3227–3236 Most text-to-text generation tasks, for example text summarisation and text simplification, require copying words from the input to the output. We introduce Copycat, a transformer-based pointer network for such tasks which obtains competitive results in abstractive text summarisation and generates more abstractive summaries. We propose a further extension of this architecture for automatic post-editing, where generation is conditioned over two inputs (source language and machine translation), and the model is capable of deciding where to copy information from. This approach achieves competitive performance when compared to state-of-the-art automated post-editing systems. More importantly, we show that it addresses a well-known limitation of automatic post-editing - overcorrecting translations - and that our novel mechanism for copying source language words improves the results. @@ -4148,7 +4148,7 @@ ZheGan YuCheng ChrisBrockett - BillDolan + BillDolan Ming-TingSun 3304–3313 Text style transfer without parallel data has achieved some practical success. However, in the scenario where less data is available, these methods may yield poor performance. In this paper, we examine domain adaptation for text style transfer to leverage massively available data from other domains. These data may demonstrate domain shift, which impedes the benefits of utilizing such data for training. To address this challenge, we propose simple yet effective domain adaptive text style transfer models, enabling domain-adaptive information exchange. The proposed models presumably learn from the source domain to: (i) distinguish stylized information and generic content information; (ii) maximally preserve content information; and (iii) adaptively transfer the styles in a domain-aware manner. We evaluate the proposed models on two style transfer tasks (sentiment and formality) over multiple target domains where only limited non-parallel data is available. Extensive experiments demonstrate the effectiveness of the proposed model compared to the baselines. @@ -4161,9 +4161,9 @@ Let’s Ask Again: Refine Network for Automatic Question Generation PrekshaNema Akash KumarMohankumar - Mitesh M.Khapra + Mitesh M.Khapra Balaji VasanSrinivasan - BalaramanRavindran + BalaramanRavindran 3314–3323 In this work, we focus on the task of Automatic Question Generation (AQG) where given a passage and an answer the task is to generate the corresponding question. It is desired that the generated question should be (i) grammatically correct (ii) answerable from the passage and (iii) specific to the given answer. An analysis of existing AQG models shows that they produce questions which do not adhere to one or more of the above-mentioned qualities. In particular, the generated questions look like an incomplete draft of the desired question with a clear scope for refinement. To alleviate this shortcoming, we propose a method which tries to mimic the human process of generating questions by first creating an initial draft and then refining it. More specifically, we propose Refine Network (RefNet) which contains two decoders. The second decoder uses a dual attention network which pays attention to both (i) the original passage and (ii) the question (initial draft) generated by the first decoder. In effect, it refines the question generated by the first decoder, thereby making it more correct and complete. We evaluate RefNet on three datasets, viz., SQuAD, HOTPOT-QA, and DROP, and show that it outperforms existing state-of-the-art methods by 7-16% on all of these datasets. Lastly, we show that we can improve the quality of the second decoder on specific metrics, such as, fluency and answerability by explicitly rewarding revisions that improve on the corresponding metric during training. The code has been made publicly available . D19-1326 @@ -4176,7 +4176,7 @@ TaeheeJung DongyeopKang LucasMentch - EduardHovy + EduardHovy 3324–3335 Despite the recent developments on neural summarization systems, the underlying logic behind the improvements from the systems and its corpus-dependency remains largely unexplored. Position of sentences in the original text, for example, is a well known bias for news summarization. Following in the spirit of the claim that summarization is a combination of sub-functions, we define three sub-aspects of summarization: position, importance, and diversity and conduct an extensive analysis of the biases of each sub-aspect with respect to the domain of nine different summarization corpora (e.g., news, academic papers, meeting minutes, movie script, books, posts). We find that while position exhibits substantial bias in news articles, this is not the case, for example, with academic papers and meeting minutes. Furthermore, our empirical study shows that different types of summarization systems (e.g., neural-based) are composed of different degrees of the sub-aspects. Our study provides useful lessons regarding consideration of underlying sub-aspects when collecting a new summarization dataset or developing a new system. D19-1327 @@ -4188,7 +4188,7 @@ Lost in Evaluation: Misleading Benchmarks for Bilingual Dictionary Induction YovaKementchedjhieva MareikeHartmann - AndersSøgaard + AndersSøgaard 3336–3341 The task of bilingual dictionary induction (BDI) is commonly used for intrinsic evaluation of cross-lingual word embeddings. The largest dataset for BDI was generated automatically, so its quality is dubious. We study the composition and quality of the test sets for five diverse languages from this dataset, with concerning findings: (1) a quarter of the data consists of proper nouns, which can be hardly indicative of BDI performance, and (2) there are pervasive gaps in the gold-standard targets. These issues appear to affect the ranking between cross-lingual embedding systems on individual languages, and the overall degree to which the systems differ in performance. With proper nouns removed from the data, the margin between the top two systems included in the study grows from 3.4% to 17.2%. Manual verification of the predictions, on the other hand, reveals that gaps in the gold standard targets artificially inflate the margin between the two systems on English to Bulgarian BDI from 0.1% to 6.7%. We thus suggest that future research either avoids drawing conclusions from quantitative results on this BDI dataset, or accompanies such evaluation with rigorous error analysis. D19-1328 @@ -4198,9 +4198,9 @@ Towards Realistic Practices In Low-Resource Natural Language Processing: The Development Set - KatharinaKann + KatharinaKann KyunghyunCho - Samuel R.Bowman + Samuel R.Bowman 3342–3349 Development sets are impractical to obtain for real low-resource languages, since using all available data for training is often more effective. However, development sets are widely used in research papers that purport to deal with low-resource natural language processing (NLP). Here, we aim to answer the following questions: Does using a development set for early stopping in the low-resource setting influence results as compared to a more realistic alternative, where the number of training epochs is tuned on development languages? And does it lead to overestimation or underestimation of performance? We repeat multiple experiments from recent work on neural models for low-resource NLP and compare results for models obtained by training with and without development sets. On average over languages, absolute accuracy differs by up to 1.4%. However, for some languages and tasks, differences are as big as 18.0% accuracy. Our results highlight the importance of realistic experimental setups in the publication of low-resource NLP research results. D19-1329 @@ -4214,7 +4214,7 @@ JiajunZhang LongZhou YuchenLiu - ChengqingZong + ChengqingZong 3350–3355 In this paper, we introduce a novel interactive approach to translate a source language into two different languages simultaneously and interactively. Specifically, the generation of one language relies on not only previously generated outputs by itself, but also the outputs predicted in the other language. Experimental results on IWSLT and WMT datasets demonstrate that our method can obtain significant improvements over both conventional Neural Machine Translation (NMT) model and multilingual NMT model. D19-1330 @@ -4275,7 +4275,7 @@ AliEmami AdamTrischler KaheerSuleman - Jackie Chi KitCheung + Jackie Chi KitCheung 3382–3387 Recent studies have significantly improved the state-of-the-art on common-sense reasoning (CSR) benchmarks like the Winograd Schema Challenge (WSC) and SWAG. The question we ask in this paper is whether improved performance on these benchmarks represents genuine progress towards common-sense-enabled systems. We make case studies of both benchmarks and design protocols that clarify and qualify the results of previous work by analyzing threats to the validity of previous experimental designs. Our protocols account for several properties prevalent in common-sense benchmarks including size limitations, structural regularities, and variable instance difficulty. D19-1335 @@ -4289,7 +4289,7 @@ ShunyaoLi PengchengYang LeiLi - BaobaoChang + BaobaoChang ZhifangSui XuSun 3388–3393 @@ -4325,7 +4325,7 @@ The Woman Worked as a Babysitter: On Biases in Language Generation EmilySheng Kai-WeiChang - PremkumarNatarajan + PremkumarNatarajan NanyunPeng 3407–3412 We present a systematic study of biases in natural language generation (NLG) by analyzing text generated from prompts that contain mentions of different demographic groups. In this work, we introduce the notion of the regard towards a demographic, use the varying levels of regard towards different demographics as a defining metric for bias in NLG, and analyze the extent to which sentiment scores are a relevant proxy metric for regard. To this end, we collect strategically-generated text from language models and manually annotate the text with both sentiment and regard scores. Additionally, we build an automatic regard classifier through transfer learning, so that we can analyze biases in unseen text. Together, these methods reveal the extent of the biased nature of language model generations. Our analysis provides a study of biases in NLG, bias metrics and correlated human judgments, and empirical evidence on the usefulness of our annotated dataset. @@ -4376,7 +4376,7 @@ Investigating Dynamic Routing in Tree-Structured <fixed-case>LSTM</fixed-case> for Sentiment Analysis JinWang - Liang-ChihYu + Liang-ChihYu K. RobertLai XuejieZhang 3432–3437 @@ -4413,7 +4413,7 @@ Semantic Relatedness Based Re-ranker for Text Spotting AhmedSabir FrancescMoreno - LluísPadró + LluísPadró 3451–3457 Applications such as textual entailment, plagiarism detection or document clustering rely on the notion of semantic similarity, and are usually approached with dimension reduction techniques like LDA or with embedding-based neural approaches. We present a scenario where semantic similarity is not enough, and we devise a neural approach to learn semantic relatedness. The scenario is text spotting in the wild, where a text in an image (e.g. street sign, advertisement or bus destination) must be identified and recognized. Our goal is to improve the performance of vision systems by leveraging semantic information. Our rationale is that the text to be spotted is often related to the image context in which it appears (word pairs such as Delta-airplane, or quarters-parking are not similar, but are clearly related). We show how learning a word-to-word or word-to-sentence relatedness score can improve the performance of text spotting systems up to 2.9 points, outperforming other measures in a benchmark dataset. D19-1346 @@ -4444,7 +4444,7 @@ Evaluating Topic Quality with Posterior Variability LinziXing - Michael J.Paul + Michael J.Paul GiuseppeCarenini 3471–3477 Probabilistic topic models such as latent Dirichlet allocation (LDA) are popularly used with Bayesian inference methods such as Gibbs sampling to learn posterior distributions over topic model parameters. We derive a novel measure of LDA topic quality using the variability of the posterior distributions. Compared to several existing baselines for automatic topic evaluation, the proposed metric achieves state-of-the-art correlations with human judgments of topic quality in experiments on three corpora. We additionally demonstrate that topic quality estimation can be further improved using a supervised estimator that combines multiple metrics. @@ -4495,7 +4495,7 @@ DanielCohen Yen-ChiehLien PratikMehta - W. BruceCroft + W. BruceCroft ScottMiller 3497–3502 When performing cross-language information retrieval (CLIR) for lower-resourced languages, a common approach is to retrieve over the output of machine translation (MT). However, there is no established guidance on how to optimize the resulting MT-IR system. In this paper, we examine the relationship between the performance of MT systems and both neural and term frequency-based IR models to identify how CLIR performance can be best predicted from MT quality. We explore performance at varying amounts of MT training data, byte pair encoding (BPE) merge operations, and across two IR collections and retrieval models. We find that the choice of IR collection can substantially affect the predictive power of MT tuning decisions and evaluation, potentially introducing dissociations between MT-only and overall CLIR performance. @@ -4518,7 +4518,7 @@ LuyaoHuang ChiSun XipengQiu - XuanjingHuang + XuanjingHuang 3509–3514 Word Sense Disambiguation (WSD) aims to find the exact sense of an ambiguous word in a particular context. Traditional supervised methods rarely take into consideration the lexical resources like WordNet, which are widely utilized in knowledge-based methods. Recent studies have shown the effectiveness of incorporating gloss (sense definition) into neural networks for WSD. However, compared with traditional word expert supervised methods, they have not achieved much improvement. In this paper, we focus on how to better leverage gloss knowledge in a supervised neural WSD system. We construct context-gloss pairs and propose three BERT based models for WSD. We fine-tune the pre-trained BERT model and achieve new state-of-the-art results on WSD task. D19-1355 @@ -4577,7 +4577,7 @@ Hierarchical Meta-Embeddings for Code-Switching Named Entity Recognition - Genta IndraWinata + Genta IndraWinata ZhaojiangLin JaminShin ZihanLiu @@ -4643,7 +4643,7 @@ YuWu LiliMou ZhoujunLi - WenhanChao + WenhanChao 3573–3578 Formality text style transfer plays an important role in various NLP applications, such as non-native speaker assistants and child education. Early studies normalize informal sentences with rules, before statistical and neural models become a prevailing method in the field. While a rule-based system is still a common preprocessing step for formality style transfer in the neural era, it could introduce noise if we use the rules in a naive way such as data preprocessing. To mitigate this problem, we study how to harness rules into a state-of-the-art neural network that is typically pretrained on massive corpora. We propose three fine-tuning methods in this paper and achieve a new state-of-the-art on benchmark datasets D19-1365 @@ -4656,7 +4656,7 @@ Yi-TeHong Hong-YouChen Chi-JenLu - Shou-DeLin + Shou-DeLin 3579–3584 The objective of non-parallel text style transfer, or controllable text generation, is to alter specific attributes (e.g. sentiment, mood, tense, politeness, etc) of a given text while preserving its remaining attributes and content. Generative adversarial network (GAN) is a popular model to ensure the transferred sentences are realistic and have the desired target styles. However, training GAN often suffers from mode collapse problem, which causes that the transferred text is little related to the original text. In this paper, we propose a new GAN model with a word-level conditional architecture and a two-phase training procedure. By using a style-related condition architecture before generating a word, our model is able to maintain style-unrelated words while changing the others. By separating the training procedure into reconstruction and transfer phases, our model is able to learn a proper text generation process, which further improves the content preservation. We test our model on polarity sentiment transfer and multiple-attribute transfer tasks. The empirical results show that our model achieves comparable evaluation scores in both transfer accuracy and fluency but significantly outperforms other state-of-the-art models in content compatibility on three real-world datasets. D19-1366 @@ -4774,7 +4774,7 @@ <fixed-case>P</fixed-case>a<fixed-case>LM</fixed-case>: A Hybrid Parser and Language Model HaoPeng RoySchwartz - Noah A.Smith + Noah A.Smith 3644–3651 We present PaLM, a hybrid parser and neural language model. Building on an RNN language model, PaLM adds an attention layer over text spans in the left context. An unsupervised constituency parser can be derived from its attention weights, using a greedy decoding algorithm. We evaluate PaLM on language modeling, and empirically show that it outperforms strong baselines. If syntactic annotations are available, the attention component can be trained in a supervised manner, providing syntactically-informed representations of the context, and further improving language modeling performance. D19-1376 @@ -4821,7 +4821,7 @@ Efficient Sentence Embedding using Discrete Cosine Transform NadaAlmarwani HananAldarmaki - MonaDiab + MonaDiab 3672–3678 Vector averaging remains one of the most popular sentence embedding methods in spite of its obvious disregard for syntactic structure. While more complex sequential or convolutional networks potentially yield superior classification performance, the improvements in classification accuracy are typically mediocre compared to the simple vector averaging. As an efficient alternative, we propose the use of discrete cosine transform (DCT) to compress word sequences in an order-preserving manner. The lower order DCT coefficients represent the overall feature patterns in sentences, which results in suitable embeddings for tasks that could benefit from syntactic features. Our results in semantic probing tasks demonstrate that DCT embeddings indeed preserve more syntactic information compared with vector averaging. With practically equivalent complexity, the model yields better overall performance in downstream classification tasks that correlate with syntactic features, which illustrates the capacity of DCT to preserve word order information. D19-1380 @@ -4856,8 +4856,8 @@ ArmanCohan IzBeltagy DanielKing - BhavanaDalvi - DanWeld + BhavanaDalvi + DanWeld 3693–3699 As a step toward better document-level understanding, we explore classification of a sequence of sentences into their corresponding categories, a task that requires understanding sentences in context of the document. Recent successful models for this task have used hierarchical models to contextualize sentence representations, and Conditional Random Fields (CRFs) to incorporate dependencies between subsequent labels. In this work, we show that pretrained language models, BERT (Devlin et al., 2018) in particular, can be used for this task to capture contextual dependencies without the need for hierarchical encoding nor a CRF. Specifically, we construct a joint sentence representation that allows BERT Transformer layers to directly utilize contextual information from all words in all sentences. Our approach achieves state-of-the-art results on four datasets, including a new dataset of structured scientific abstracts. D19-1383 @@ -5226,7 +5226,7 @@ SahilGarg AramGalstyan GregVer Steeg - GuillermoCecchi + GuillermoCecchi 4026–4036 Recently, kernelized locality sensitive hashcodes have been successfully employed as representations of natural language text, especially showing high relevance to biomedical relation extraction tasks. In this paper, we propose to optimize the hashcode representations in a nearly unsupervised manner, in which we only use data points, but not their class labels, for learning. The optimized hashcode representations are then fed to a supervised classifi er following the prior work. This nearly unsupervised approach allows fine-grained optimization of each hash function, which is particularly suitable for building hashcode representations generalizing from a training set to a test set. We empirically evaluate the proposed approach for biomedical relation extraction tasks, obtaining significant accuracy improvements w.r.t. state-of-the-art supervised and semi-supervised approaches. D19-1414 @@ -5237,7 +5237,7 @@ Auditing Deep Learning processes through Kernel-based Explanatory Models DaniloCroce DanieleRossini - RobertoBasili + RobertoBasili 4037–4046 While NLP systems become more pervasive, their accountability gains value as a focal point of effort. Epistemological opaqueness of nonlinear learning methods, such as deep learning models, can be a major drawback for their adoptions. In this paper, we discuss the application of Layerwise Relevance Propagation over a linguistically motivated neural architecture, the Kernel-based Deep Architecture, in order to trace back connections between linguistic properties of input instances and system decisions. Such connections then guide the construction of argumentations on network’s inferences, i.e., explanations based on real examples, semantically related to the input. We propose here a methodology to evaluate the transparency and coherence of analogy-based explanations modeling an audit stage for the system. Quantitative analysis on two semantic tasks, i.e., question classification and semantic role labeling, show that the explanatory capabilities (native in KDAs) are effective and they pave the way to more complex argumentation methods. D19-1415 @@ -5271,7 +5271,7 @@ Don’t Take the Easy Way Out: Ensemble Based Methods for Avoiding Known Dataset Biases ChristopherClark MarkYatskar - LukeZettlemoyer + LukeZettlemoyer 4069–4082 State-of-the-art models often make use of superficial patterns in the data that do not generalize well to out-of-domain or adversarial settings. For example, textual entailment models often learn that particular key words imply entailment, irrespective of context, and visual question answering models learn to predict prototypical answers, without considering evidence in the image. In this paper, we show that if we have prior knowledge of such biases, we can train a model to be more robust to domain shift. Our method has two stages: we (1) train a naive model that makes predictions exclusively based on dataset biases, and (2) train a robust model as part of an ensemble with the naive one in order to encourage it to focus on other patterns in the data that are more likely to generalize. Experiments on five datasets with out-of-domain test sets show significantly improved robustness in all settings, including a 12 point gain on a changing priors visual question answering dataset and a 9 point gain on an adversarial question answering test set. D19-1418 @@ -5283,7 +5283,7 @@ Po-SenHuang RobertStanforth JohannesWelbl - ChrisDyer + ChrisDyer DaniYogatama SvenGowal KrishnamurthyDvijotham @@ -5299,7 +5299,7 @@ Rethinking Cooperative Rationalization: Introspective Extraction and Complement Control MoYu ShiyuChang - YangZhang + YangZhang TommiJaakkola 4094–4103 Selective rationalization has become a common mechanism to ensure that predictive models reveal how they use any available features. The selection may be soft or hard, and identifies a subset of input features relevant for prediction. The setup can be viewed as a co-operate game between the selector (aka rationale generator) and the predictor making use of only the selected features. The co-operative setting may, however, be compromised for two reasons. First, the generator typically has no direct access to the outcome it aims to justify, resulting in poor performance. Second, there’s typically no control exerted on the information left outside the selection. We revise the overall co-operative framework to address these challenges. We introduce an introspective model which explicitly predicts and incorporates the outcome into the selection process. Moreover, we explicitly control the rationale complement via an adversary so as not to leave any useful information out of the selection. We show that the two complementary mechanisms maintain both high predictive accuracy and lead to comprehensive rationales. @@ -5311,7 +5311,7 @@ Experimenting with Power Divergences for Language Modeling MatthieuLabeau - Shay B.Cohen + Shay B.Cohen 4104–4114 Neural language models are usually trained using Maximum-Likelihood Estimation (MLE). The corresponding objective function for MLE is derived from the Kullback-Leibler (KL) divergence between the empirical probability distribution representing the data and the parametric probability distribution output by the model. However, the word frequency discrepancies in natural language make performance extremely uneven: while the perplexity is usually very low for frequent words, it is especially difficult to predict rare words. In this paper, we experiment with several families (alpha, beta and gamma) of power divergences, generalized from the KL divergence, for learning language models with an objective different than standard MLE. Intuitively, these divergences should affect the way the probability mass is spread during learning, notably by prioritizing performances on high or low-frequency words. In addition, we implement and experiment with various sampling-based objectives, where the computation of the output layer is only done on a small subset of the vocabulary. They are derived as power generalizations of a softmax approximated via Importance Sampling, and Noise Contrastive Estimation, for accelerated learning. Our experiments on the Penn Treebank and Wikitext-2 show that these power divergences can indeed be used to prioritize learning on the frequent or rare words, and lead to general performance improvements in the case of sampling-based learning. D19-1421 @@ -5359,7 +5359,7 @@ Topics to Avoid: Demoting Latent Confounds in Text Classification SachinKumar ShulyWintner - Noah A.Smith + Noah A.Smith YuliaTsvetkov 4153–4163 Despite impressive performance on many text classification tasks, deep neural networks tend to learn frequent superficial patterns that are specific to the training data and do not always generalize well. In this work, we observe this limitation with respect to the task of native language identification. We find that standard text classifiers which perform well on the test set end up learning topical features which are confounds of the prediction task (e.g., if the input text mentions Sweden, the classifier predicts that the author’s native language is Swedish). We propose a method that represents the latent topical confounds and a model which “unlearns” confounding features by predicting both the label of the input text and the confound; but we train the two predictors adversarially in an alternating fashion to learn a text representation that predicts the correct label but is less prone to using information about the confound. We show that this model generalizes better and learns features that are indicative of the writing style rather than the content. @@ -5371,7 +5371,7 @@ Learning to Ask for Conversational Machine Learning ShashankSrivastava IgorLabutov - TomMitchell + TomMitchell 4164–4174 Natural language has recently been explored as a new medium of supervision for training machine learning models. Here, we explore learning classification tasks using language in a conversational setting – where the automated learner does not simply receive language input from a teacher, but can proactively engage the teacher by asking questions. We present a reinforcement learning framework, where the learner’s actions correspond to question types and the reward for asking a question is based on how the teacher’s response changes performance of the resulting machine learning model on the learning task. In this framework, learning good question-asking strategies corresponds to asking sequences of questions that maximize the cumulative (discounted) reward, and hence quickly lead to effective classifiers. Empirical analysis across three domains shows that learned question-asking strategies expedite classifier training by asking appropriate questions at different points in the learning process. The approach allows learning classifiers from a blend of strategies, including learning from observations, explanations and clarifications. D19-1426 @@ -5406,8 +5406,8 @@ Fine-grained Knowledge Fusion for Sequence Labeling Domain Adaptation HuiyunYang ShujianHuang - Xin-YuDai - JiajunChen + Xin-YuDai + JiajunChen 4197–4206 In sequence labeling, previous domain adaptation methods focus on the adaptation from the source domain to the entire target domain without considering the diversity of individual target domain samples, which may lead to negative transfer results for certain samples. Besides, an important characteristic of sequence labeling tasks is that different elements within a given sample may also have diverse domain relevance, which requires further consideration. To take the multi-level domain relevance discrepancy into account, in this paper, we propose a fine-grained knowledge fusion model with the domain relevance modeling scheme to control the balance between learning from the target domain data and learning from the source domain model. Experiments on three sequence labeling tasks show that our fine-grained knowledge fusion model outperforms strong baselines and other state-of-the-art sequence labeling domain adaptation methods. D19-1429 @@ -5446,7 +5446,7 @@ Distributionally Robust Language Modeling YonatanOren ShioriSagawa - Tatsunori B.Hashimoto + Tatsunori B.Hashimoto PercyLiang 4227–4237 Language models are generally trained on data spanning a wide range of topics (e.g., news, reviews, fiction), but they might be applied to an a priori unknown target distribution (e.g., restaurant reviews). In this paper, we first show that training on text outside the test distribution can degrade test performance when using standard maximum likelihood (MLE) training. To remedy this without the knowledge of the test distribution, we propose an approach which trains a model that performs well over a wide range of potential test distributions. In particular, we derive a new distributionally robust optimization (DRO) procedure which minimizes the loss of the model over the worst-case mixture of topics with sufficient overlap with the training distribution. Our approach, called topic conditional value at risk (topic CVaR), obtains a 5.5 point perplexity reduction over MLE when the language models are trained on a mixture of Yelp reviews and news and tested only on reviews. @@ -5467,7 +5467,7 @@ Learning Latent Parameters without Human Response Patterns: Item Response Theory with Artificial Crowds - John P.Lalor + John P.Lalor HaoWu HongYu 4249–4259 @@ -5509,7 +5509,7 @@ ChuntingZhou XianLi GrahamNeubig - EduardHovy + EduardHovy 4282–4292 Most sequence-to-sequence (seq2seq) models are autoregressive; they generate each token by conditioning on previously generated tokens. In contrast, non-autoregressive seq2seq models generate all tokens in one pass, which leads to increased efficiency through parallel processing on hardware such as GPUs. However, directly modeling the joint distribution of all tokens simultaneously is challenging, and even with increasingly complex model structures accuracy lags significantly behind autoregressive models. In this paper, we propose a simple, efficient, and effective model for non-autoregressive sequence generation using latent variable models. Specifically, we turn to generative flow, an elegant technique to model complex distributions using neural networks, and design several layers of flow tailored for modeling the conditional density of sequential latent variables. We evaluate this model on three neural machine translation (NMT) benchmark datasets, achieving comparable performance with state-of-the-art non-autoregressive NMT models and almost constant decoding time w.r.t the sequence length. D19-1437 @@ -5536,7 +5536,7 @@ Oana-MariaCamburu Ana-MariaCretu YordanYordanov - PhilBlunsom + PhilBlunsom ThomasLukasiewicz 4303–4312 Pronoun resolution is a major area of natural language understanding. However, large-scale training sets are still scarce, since manually labelling data is costly. In this work, we introduce WikiCREM (Wikipedia CoREferences Masked) a large-scale, yet accurate dataset of pronoun disambiguation instances. We use a language-model-based approach for pronoun resolution in combination with our WikiCREM dataset. We compare a series of models on a collection of diverse and challenging coreference resolution problems, where we match or outperform previous state-of-the-art approaches on 6 out of 7 datasets, such as GAP, DPR, WNLI, PDP, WinoBias, and WinoGender. We release our model to be used off-the-shelf for solving pronoun disambiguation. @@ -5548,7 +5548,7 @@ Identifying and Explaining Discriminative Attributes ArminsStepanjans - AndréFreitas + AndréFreitas 4313–4322 Identifying what is at the center of the meaning of a word and what discriminates it from other words is a fundamental natural language inference task. This paper describes an explicit word vector representation model (WVM) to support the identification of discriminative attributes. A core contribution of the paper is a quantitative and qualitative comparative analysis of different types of data sources and Knowledge Bases in the construction of explainable and explicit WVMs: (i) knowledge graphs built from dictionary definitions, (ii) entity-attribute-relationships graphs derived from images and (iii) commonsense knowledge graphs. Using a detailed quantitative and qualitative analysis, we demonstrate that these data sources have complementary semantic aspects, supporting the creation of explicit semantic vector spaces. The explicit vector spaces are evaluated using the task of discriminative attribute identification, showing comparable performance to the state-of-the-art systems in the task (F1-score = 0.69), while delivering full model transparency and explainability. D19-1440 @@ -5668,7 +5668,7 @@ Weakly-Supervised Concept-based Adversarial Learning for Cross-lingual Word Embeddings HaozhouWang - JamesHenderson + JamesHenderson PaolaMerlo 4419–4430 Distributed representations of words which map each word to a continuous vector have proven useful in capturing important linguistic information not only in a single language but also across different languages. Current unsupervised adversarial approaches show that it is possible to build a mapping matrix that aligns two sets of monolingual word embeddings without high quality parallel data, such as a dictionary or a sentence-aligned corpus. However, without an additional step of refinement, the preliminary mapping learnt by these methods is unsatisfactory, leading to poor performance for typologically distant languages. In this paper, we propose a weakly-supervised adversarial training method to overcome this limitation, based on the intuition that mapping across languages is better done at the concept level than at the word level. We propose a concept-based adversarial training method which improves the performance of previous unsupervised adversarial methods for most languages, and especially for typologically distant language pairs. @@ -5693,8 +5693,8 @@ Contrastive Language Adaptation for Cross-Lingual Stance Detection MitraMohtarami - JamesGlass - PreslavNakov + JamesGlass + PreslavNakov 4442–4452 We study cross-lingual stance detection, which aims to leverage labeled data in one language to identify the relative perspective (or stance) of a given document with respect to a claim in a different target language. In particular, we introduce a novel contrastive language adaptation approach applied to memory networks, which ensures accurate alignment of stances in the source and target languages, and can effectively deal with the challenge of limited labeled data in the target language. The evaluation results on public benchmark datasets and comparison against current state-of-the-art approaches demonstrate the effectiveness of our approach. D19-1452 @@ -5752,10 +5752,10 @@ Everything Happens for a Reason: Discovering the Purpose of Actions in Procedural Text - BhavanaDalvi + BhavanaDalvi NiketTandon AntoineBosselut - Wen-tauYih + Wen-tauYih PeterClark 4496–4505 Our goal is to better comprehend procedural text, e.g., a paragraph about photosynthesis, by not only predicting what happens, but *why* some actions need to happen before others. Our approach builds on a prior process comprehension framework for predicting actions’ effects, to also identify subsequent steps that those effects enable. We present our new model (XPAD) that biases effect predictions towards those that (1) explain more of the actions in the paragraph and (2) are more plausible with respect to background knowledge. We also extend an existing benchmark dataset for procedural text comprehension, ProPara, by adding the new task of explaining actions by predicting their dependencies. We find that XPAD significantly outperforms prior systems on this task, while maintaining the performance on the original task in ProPara. The dataset is available at http://data.allenai.org/propara @@ -5787,7 +5787,7 @@ DanielDuckworth SemihYavuz AmitDubey - Kyu-YoungKim + Kyu-YoungKim AndyCedilnik 4516–4525 A significant barrier to progress in data-driven approaches to building dialog systems is the lack of high quality, goal-oriented conversational data. To help satisfy this elementary requirement, we introduce the initial release of the Taskmaster-1 dataset which includes 13,215 task-based dialogs comprising six domains. Two procedures were used to create this collection, each with unique advantages. The first involves a two-person, spoken “Wizard of Oz” (WOz) approach in which trained agents and crowdsourced workers interact to complete the task while the second is “self-dialog” in which crowdsourced workers write the entire dialog themselves. We do not restrict the workers to detailed scripts or to a small knowledge base and hence we observe that our dataset contains more realistic and diverse conversations in comparison to existing datasets. We offer several baseline models including state of the art neural seq2seq architectures with benchmark performance as well as qualitative human evaluations. Dialogs are labeled with API calls and arguments, a simple and cost effective approach which avoids the requirement of complex annotation schema. The layer of abstraction between the dialog model and the service provider API allows for a given model to interact with multiple services that provide similar functionally. Finally, the dataset will evoke interest in written vs. spoken language, discourse patterns, error handling and other linguistic phenomena related to dialog system research, development and design. @@ -5803,7 +5803,7 @@ BrigiFodor YiZhang AdelYoussef - MonaDiab + MonaDiab 4526–4536 The need for high-quality, large-scale, goal-oriented dialogue datasets continues to grow as virtual assistants become increasingly wide-spread. However, publicly available datasets useful for this area are limited either in their size, linguistic diversity, domain coverage, or annotation granularity. In this paper, we present strategies toward curating and annotating large scale goal oriented dialogue data. We introduce the MultiDoGO dataset to overcome these limitations. With a total of over 81K dialogues harvested across six domains, MultiDoGO is over 8 times the size of MultiWOZ, the other largest comparable dialogue dataset currently available to the public. Over 54K of these harvested conversations are annotated for intent classes and slot labels. We adopt a Wizard-of-Oz approach wherein a crowd-sourced worker (the “customer”) is paired with a trained annotator (the “agent”). The data curation process was controlled via biases to ensure a diversity in dialogue flows following variable dialogue policies. We provide distinct class label tags for agents vs. customer utterances, along with applicable slot labels. We also compare and contrast our strategies on annotation granularity, i.e. turn vs. sentence level. Furthermore, we compare and contrast annotations curated by leveraging professional annotators vs the crowd. We believe our strategies for eliciting and annotating such a dialogue dataset scales across modalities and domains and potentially languages in the future. To demonstrate the efficacy of our devised strategies we establish neural baselines for classification on the agent and customer utterances as well as slot labeling for each domain. D19-1460 @@ -5827,8 +5827,8 @@ <fixed-case>GECOR</fixed-case>: An End-to-End Generative Ellipsis and Co-reference Resolution Model for Task-Oriented Dialogue JunQuan - DeyiXiong - BonnieWebber + DeyiXiong + BonnieWebber ChangjianHu 4547–4557 Ellipsis and co-reference are common and ubiquitous especially in multi-turn dialogues. In this paper, we treat the resolution of ellipsis and co-reference in dialogue as a problem of generating omitted or referred expressions from the dialogue context. We therefore propose a unified end-to-end Generative Ellipsis and CO-reference Resolution model (GECOR) in the context of dialogue. The model can generate a new pragmatically complete user utterance by alternating the generation and copy mode for each user utterance. A multi-task learning framework is further proposed to integrate the GECOR into an end-to-end task-oriented dialogue. In order to train both the GECOR and the multi-task learning framework, we manually construct a new dataset on the basis of the public dataset CamRest676 with both ellipsis and co-reference annotation. On this dataset, intrinsic evaluations on the resolution of ellipsis and co-reference show that the GECOR model significantly outperforms the sequence-to-sequence (seq2seq) baseline model in terms of EM, BLEU and F1 while extrinsic evaluations on the downstream dialogue task demonstrate that our multi-task learning framework with GECOR achieves a higher success rate of task completion than TSCP, a state-of-the-art end-to-end task-oriented dialogue model. @@ -5851,7 +5851,7 @@ Aspect-based Sentiment Classification with Aspect-specific Graph Convolutional Networks - ChenZhang + ChenZhang QiuchiLi DaweiSong 4568–4578 @@ -5867,7 +5867,7 @@ HaisongZhang LingzhiWang XixinWu - Kam-FaiWong + Kam-FaiWong 4579–4589 Aspect words, indicating opinion targets, are essential in expressing and understanding human opinions. To identify aspects, most previous efforts focus on using sequence tagging models trained on human-annotated data. This work studies unsupervised aspect extraction and explores how words appear in global context (on sentence level) and local context (conveyed by neighboring words). We propose a novel neural model, capable of coupling global and local representation to discover aspect words. Experimental results on two benchmarks, laptop and restaurant reviews, show that our model significantly outperforms the state-of-the-art models from previous studies evaluated with varying metrics. Analysis on model output show our ability to learn meaningful and coherent aspect representations. We further investigate how words distribute in global and local context, and find that aspect and non-aspect words do exhibit different context, interpreting our superiority in unsupervised aspect extraction. D19-1465 @@ -5923,7 +5923,7 @@ JonahLubin KaranSikka XiaoLin - DanJurafsky + DanJurafsky AjayDivakaran 4622–4632 Computing author intent from multimodal data like Instagram posts requires modeling a complex relationship between text and image. For example, a caption might evoke an ironic contrast with the image, so neither caption nor image is a mere transcript of the other. Instead they combine—via what has been called meaning multiplication (Bateman et al.)- to create a new meaning that has a more complex relation to the literal meanings of text and image. Here we introduce a multimodal dataset of 1299 Instagram posts labeled for three orthogonal taxonomies: the authorial intent behind the image-caption pair, the contextual relationship between the literal meanings of the image and caption, and the semiotic relationship between the signified meanings of the image and caption. We build a baseline deep multimodal classifier to validate the taxonomy, showing that employing both text and image improves intent detection by 9.6 compared to using only the image modality, demonstrating the commonality of non-intersective meaning multiplication. The gain with multimodality is greatest when the image and caption diverge semiotically. Our dataset offers a new resource for the study of the rich meanings that result from pairing text and image. @@ -5936,7 +5936,7 @@ XingshanZeng JingLi LuWang - Kam-FaiWong + Kam-FaiWong 4633–4643 The prevalent use of social media leads to a vast amount of online conversations being produced on a daily basis. It presents a concrete challenge for individuals to better discover and engage in social media discussions. In this paper, we present a novel framework to automatically recommend conversations to users based on their prior conversation behaviors. Built on neural collaborative filtering, our model explores deep semantic features that measure how a user’s preferences match an ongoing conversation’s context. Furthermore, to identify salient characteristics from interleaving user interactions, our model incorporates graph-structured networks, where both replying relations and temporal features are encoded as conversation context. Experimental results on two large-scale datasets collected from Twitter and Reddit show that our model yields better performance than previous state-of-the-art models, which only utilize lexical features and ignore past user interactions in the conversations. D19-1470 @@ -6024,8 +6024,8 @@ You Shall Know a User by the Company It Keeps: Dynamic Representations for Social Media Users in <fixed-case>NLP</fixed-case> MarcoDel Tredici DiegoMarcheggiani - SabineSchulte im Walde - RaquelFernández + SabineSchulte im Walde + RaquelFernández 4707–4717 Information about individuals can help to better understand what they say, particularly in social media where texts are short. Current approaches to modelling social media users pay attention to their social connections, but exploit this information in a static way, treating all connections uniformly. This ignores the fact, well known in sociolinguistics, that an individual may be part of several communities which are not equally relevant in all communicative situations. We present a model based on Graph Attention Networks that captures this observation. It dynamically explores the social graph of a user, computes a user representation given the most relevant connections for a target task, and combines it with linguistic information to make a prediction. We apply our model to three different tasks, evaluate it against alternative models, and analyse the results extensively, showing that it significantly outperforms other current methods. D19-1477 @@ -6049,7 +6049,7 @@ A Hierarchical Location Prediction Neural Network for <fixed-case>T</fixed-case>witter User Geolocation BinxuanHuang - KathleenCarley + KathleenCarley 4732–4742 Accurate estimation of user location is important for many online services. Previous neural network based methods largely ignore the hierarchical structure among locations. In this paper, we propose a hierarchical location prediction neural network for Twitter user geolocation. Our model first predicts the home country for a user, then uses the country result to guide the city-level prediction. In addition, we employ a character-aware word embedding layer to overcome the noisy information in tweets. With the feature fusion layer, our model can accommodate various feature combinations and achieves state-of-the-art results over three commonly used benchmarks under different feature settings. It not only improves the prediction accuracy but also greatly reduces the mean error distance. D19-1480 @@ -6085,7 +6085,7 @@ YandaChen DesmondPatton CharlotteSelous - KathyMcKeown + KathyMcKeown 4765–4775 Gang-involved youth in cities such as Chicago sometimes post on social media to express their aggression towards rival gangs and previous research has demonstrated that a deep learning approach can predict aggression and loss in posts. To address the possibility of bias in this sensitive application, we developed an approach to systematically interpret the state of the art model. We found, surprisingly, that it frequently bases its predictions on stop words such as “a” or “on”, an approach that could harm social media users who have no aggressive intentions. To tackle this bias, domain experts annotated the rationales, highlighting words that explain why a tweet is labeled as “aggression”. These new annotations enable us to quantitatively measure how justified the model predictions are, and build models that drastically reduce bias. Our study shows that in high stake scenarios, accuracy alone cannot guarantee a good system and we need new evaluation methods. D19-1483 @@ -6134,7 +6134,7 @@ Domain Adaptation for Person-Job Fit with Transferable Deep Global Match Network ShuqingBian - Wayne XinZhao + Wayne XinZhao YangSong TaoZhang Ji-RongWen @@ -6171,7 +6171,7 @@ KristinaGligorić SeanKross MichelleMazurek - HalDaumé III + HalDaumé III 4831–4842 The readability of a digital text can influence people’s ability to learn new things about a range topics from digital resources (e.g., Wikipedia, WebMD). Readability also impacts search rankings, and is used to evaluate the performance of NLP systems. Despite this, we lack a thorough understanding of how to validly measure readability at scale, especially for domain-specific texts. In this work, we present a comparison of the validity of well-known readability measures and introduce a novel approach, Smart Cloze, which is designed to address shortcomings of existing measures. We compare these approaches across four different corpora: crowdworker-generated stories, Wikipedia articles, security and privacy advice, and health information. On these corpora, we evaluate the convergent and content validity of each measure, and detail tradeoffs in score precision, domain-specificity, and participant burden. These results provide a foundation for more accurate readability measurements and better evaluation of new natural-language-processing systems and tools. D19-1489 @@ -6269,7 +6269,7 @@ A Neural Citation Count Prediction Model based on Peer Review Text SiqingLi - Wayne XinZhao + Wayne XinZhao Eddy JingYin Ji-RongWen 4914–4924 @@ -6307,7 +6307,7 @@ Question Answering for Privacy Policies: Combining Computational and Legal Perspectives AbhilashaRavichander - Alan WBlack + Alan WBlack ShomirWilson ThomasNorton NormanSadeh @@ -6562,7 +6562,7 @@ JiatengXie ZaidSheikh GrahamNeubig - JaimeCarbonell + JaimeCarbonell 5164–5174 Most state-of-the-art models for named entity recognition (NER) rely on the availability of large amounts of labeled data, making them challenging to extend to new, lower-resourced languages. However, there are now many proposed solutions to this problem involving either cross-lingual transfer learning, which learns from other highly resourced languages, or active learning, which efficiently selects effective training data based on model predictions. In this paper, we ask the question: given this recent progress, and some amount of human annotation, what is the most effective method for efficiently creating high-quality entity recognizers in under-resourced languages? Based on extensive experimentation using both simulated and real human annotation, we settle on a recipe of starting with a cross-lingual transferred model, then performing targeted annotation of only uncertain entity spans in the target language, minimizing annotator effort. Results demonstrate that cross-lingual transfer is a powerful tool when very little data can be annotated, but an entity-targeted annotation strategy can achieve competitive accuracy quickly, with just one-tenth of training data. D19-1520 @@ -6785,11 +6785,11 @@ HeyangEr SungrokShim EricXue - Xi VictoriaLin + Xi VictoriaLin TianzeShi CaimingXiong RichardSocher - DragomirRadev + DragomirRadev 5338–5349 We focus on the cross-domain context-dependent text-to-SQL generation task. Based on the observation that adjacent natural language questions are often linguistically dependent and their corresponding SQL queries tend to overlap, we utilize the interaction history by editing the previous predicted query to improve the generation quality. Our editing mechanism views SQL as sequences and reuses generation results at the token level in a simple manner. It is flexible to change individual tokens and robust to error propagation. Furthermore, to deal with complex table structures in different domains, we employ an utterance-table encoder and a table-aware decoder to incorporate the context of the user utterance and the table schema. We evaluate our approach on the SParC dataset and demonstrate the benefit of editing compared with the state-of-the-art baselines which generate SQL from scratch. Our code is available at https://github.com/ryanzhumich/sparc_atis_pytorch. D19-1537 @@ -6812,7 +6812,7 @@ AlexeiBaevski SergeyEdunov YinhanLiu - LukeZettlemoyer + LukeZettlemoyer MichaelAuli 5360–5369 We present a new approach for pretraining a bi-directional transformer model that provides significant performance gains across a variety of language understanding problems. Our model solves a cloze-style word reconstruction task, where each word is ablated and must be predicted given the rest of the text. Experiments demonstrate large performance gains on GLUE and new state of the art results on NER as well as constituency parsing benchmarks, consistent with BERT. We also present a detailed analysis of a number of factors that contribute to effective pretraining, including data domain and size, model capacity, and variations on the cloze objective. @@ -6848,7 +6848,7 @@ Transfer Fine-Tuning: A <fixed-case>BERT</fixed-case> Case Study YukiArase - Jun’ichiTsujii + Jun’ichiTsujii 5393–5404 A semantic equivalence assessment is defined as a task that assesses semantic equivalence in a sentence pair by binary judgment (i.e., paraphrase identification) or grading (i.e., semantic textual similarity measurement). It constitutes a set of tasks crucial for research on natural language understanding. Recently, BERT realized a breakthrough in sentence representation learning (Devlin et al., 2019), which is broadly transferable to various NLP tasks. While BERT’s performance improves by increasing its model size, the required computational power is an obstacle preventing practical applications from adopting the technology. Herein, we propose to inject phrasal paraphrase relations into BERT in order to generate suitable representations for semantic equivalence assessment instead of increasing the model size. Experiments on standard natural language understanding tasks confirm that our method effectively improves a smaller BERT model while maintaining the model size. The generated model exhibits superior performance compared to a larger BERT model on semantic equivalence assessment tasks. Furthermore, it achieves larger performance gains on tasks with limited training datasets for fine-tuning, which is a property desirable for transfer learning. D19-1542 @@ -6883,7 +6883,7 @@ Learning Programmatic Idioms for Scalable Semantic Parsing SrinivasanIyer AlvinCheung - LukeZettlemoyer + LukeZettlemoyer 5426–5435 Programmers typically organize executable source code using high-level coding patterns or idiomatic structures such as nested loops, exception handlers and recursive blocks, rather than as individual code tokens. In contrast, state of the art (SOTA) semantic parsers still map natural language instructions to source code by building the code syntax tree one node at a time. In this paper, we introduce an iterative method to extract code idioms from large source code corpora by repeatedly collapsing most-frequent depth-2 subtrees of their syntax trees, and train semantic parsers to apply these idioms during decoding. Applying idiom-based decoding on a recent context-dependent semantic parsing task improves the SOTA by 2.2% BLEU score while reducing training time by more than 50%. This improved speed enables us to scale up the model by training on an extended training set that is 5\times larger, to further move up the SOTA by an additional 2.3% BLEU and 0.9% exact match. Finally, idioms also significantly improve accuracy of semantic parsing to SQL on the ATIS-SQL dataset, when training data is limited. D19-1545 @@ -6895,7 +6895,7 @@ <fixed-case>J</fixed-case>u<fixed-case>IC</fixed-case>e: A Large Scale Distantly Supervised Dataset for Open Domain Context-based Code Generation RajasAgashe SrinivasanIyer - LukeZettlemoyer + LukeZettlemoyer 5436–5446 Interactive programming with interleaved code snippet cells and natural language markdown is recently gaining popularity in the form of Jupyter notebooks, which accelerate prototyping and collaboration. To study code generation conditioned on a long context history, we present JuICe, a corpus of 1.5 million examples with a curated test set of 3.7K instances based on online programming assignments. Compared with existing contextual code generation datasets, JuICe provides refined human-curated data, open-domain code, and an order of magnitude more training data. Using JuICe, we train models for two tasks: (1) generation of the API call sequence in a code cell, and (2) full code cell generation, both conditioned on the NL-Code history up to a particular code cell. Experiments using current baseline code generation models show that both context and distant supervision aid in generation, and that the dataset is challenging for current systems. D19-1546 @@ -6907,7 +6907,7 @@ ZiyuYao YuSu HuanSun - Wen-tauYih + Wen-tauYih 5447–5458 As a promising paradigm, interactive semantic parsing has shown to improve both semantic parsing accuracy and user confidence in the results. In this paper, we propose a new, unified formulation of the interactive semantic parsing problem, where the goal is to design a model-based intelligent agent. The agent maintains its own state as the current predicted semantic parse, decides whether and where human intervention is needed, and generates a clarification question in natural language. A key part of the agent is a world model: it takes a percept (either an initial question or subsequent feedback from the user) and transitions to a new state. We then propose a simple yet remarkably effective instantiation of our framework, demonstrated on two text-to-SQL datasets (WikiSQL and Spider) with different state-of-the-art base semantic parsers. Compared to an existing interactive semantic parsing approach that treats the base parser as a black box, our approach solicits less user feedback but yields higher run-time accuracy. D19-1547 @@ -6918,11 +6918,11 @@ Modeling Graph Structure in Transformer for Better <fixed-case>AMR</fixed-case>-to-Text Generation JieZhu - JunhuiLi + JunhuiLi MuhuaZhu - LonghuaQian + LonghuaQian MinZhang - GuodongZhou + GuodongZhou 5459–5468 Recent studies on AMR-to-text generation often formalize the task as a sequence-to-sequence (seq2seq) learning problem by converting an Abstract Meaning Representation (AMR) graph into a word sequences. Graph structures are further modeled into the seq2seq framework in order to utilize the structural information in the AMR graphs. However, previous approaches only consider the relations between directly connected concepts while ignoring the rich structure in AMR graphs. In this paper we eliminate such a strong limitation and propose a novel structure-aware self-attention approach to better model the relations between indirectly connected concepts in the state-of-the-art seq2seq model, i.e. the Transformer. In particular, a few different methods are explored to learn structural representations between two concepts. Experimental results on English AMR benchmark datasets show that our approach significantly outperforms the state-of-the-art with 29.66 and 31.82 BLEU scores on LDC2015E86 and LDC2017T10, respectively. To the best of our knowledge, these are the best results achieved so far by supervised models on the benchmarks. D19-1548 @@ -6932,7 +6932,7 @@ Syntax-Aware Aspect Level Sentiment Classification with Graph Attention Networks BinxuanHuang - KathleenCarley + KathleenCarley 5469–5477 Aspect level sentiment classification aims to identify the sentiment expressed towards an aspect given a context sentence. Previous neural network based methods largely ignore the syntax structure in one sentence. In this paper, we propose a novel target-dependent graph attention network (TD-GAT) for aspect level sentiment classification, which explicitly utilizes the dependency relationship among words. Using the dependency graph, it propagates sentiment features directly from the syntactic context of an aspect target. In our experiments, we show our method outperforms multiple baselines with GloVe embeddings. We also demonstrate that using BERT representations further substantially boosts the performance. D19-1549 @@ -6970,7 +6970,7 @@ XiabingZhou ZhongqingWang ShoushanLi - GuodongZhou + GuodongZhou MinZhang 5499–5507 There have been a recent line of works to automatically predict the emotions of posts in social media. Existing approaches consider the posts individually and predict their emotions independently. Different from previous researches, we explore the dependence among relevant posts via the authors’ backgrounds, since the authors with similar backgrounds, e.g., gender, location, tend to express similar emotions. However, such personal attributes are not easy to obtain in most social media websites, and it is hard to capture attributes-aware words to connect similar people. Accordingly, we propose a Neural Personal Discrimination (NPD) approach to address above challenges by determining personal attributes from posts, and connecting relevant posts with similar attributes to jointly learn their emotions. In particular, we employ adversarial discriminators to determine the personal attributes, with attention mechanisms to aggregate attributes-aware words. In this way, social correlationship among different posts can be better addressed. Experimental results show the usefulness of personal attributes, and the effectiveness of our proposed NPD approach in capturing such personal attributes with significant gains over the state-of-the-art models. @@ -7010,7 +7010,7 @@ Leveraging Structural and Semantic Correspondence for Attribute-Oriented Aspect Sentiment Discovery ZheZhang - MunindarSingh + MunindarSingh 5528–5538 Opinionated text often involves attributes such as authorship and location that influence the sentiments expressed for different aspects. We posit that structural and semantic correspondence is both prevalent in opinionated text, especially when associated with attributes, and crucial in accurately revealing its latent aspect and sentiment structure. However, it is not recognized by existing approaches. We propose Trait, an unsupervised probabilistic model that discovers aspects and sentiments from text and associates them with different attributes. To this end, Trait infers and leverages structural and semantic correspondence using a Markov Random Field. We show empirically that by incorporating attributes explicitly Trait significantly outperforms state-of-the-art baselines both by generating attribute profiles that accord with our intuitions, as shown via visualization, and yielding topics of greater semantic cohesion. D19-1555 @@ -7033,7 +7033,7 @@ Shallow Domain Adaptive Embeddings for Sentiment Analysis - PrathushaK Sarma + PrathushaK Sarma YingyuLiang WilliamSethares 5549–5558 @@ -7062,7 +7062,7 @@ YunlongLiang FandongMeng JinchaoZhang - JinanXu + JinanXu YufengChen JieZhou 5569–5580 @@ -7080,7 +7080,7 @@ LuoSi MinZhang XiaozhongLiu - GuodongZhou + GuodongZhou 5581–5590 Recently, neural networks have shown promising results on Document-level Aspect Sentiment Classification (DASC). However, these approaches often offer little transparency w.r.t. their inner working mechanisms and lack interpretability. In this paper, to simulating the steps of analyzing aspect sentiment in a document by human beings, we propose a new Hierarchical Reinforcement Learning (HRL) approach to DASC. This approach incorporates clause selection and word selection strategies to tackle the data noise problem in the task of DASC. First, a high-level policy is proposed to select aspect-relevant clauses and discard noisy clauses. Then, a low-level policy is proposed to select sentiment-relevant words and discard noisy words inside the selected clauses. Finally, a sentiment rating predictor is designed to provide reward signals to guide both clause and word selection. Experimental results demonstrate the impressive effectiveness of the proposed approach to DASC over the state-of-the-art baselines. D19-1560 @@ -7109,7 +7109,7 @@ Rethinking Attribute Representation and Injection for Sentiment Classification - Reinald KimAmplayo + Reinald KimAmplayo 5602–5613 Text attributes, such as user and product information in product reviews, have been used to improve the performance of sentiment classification models. The de facto standard method is to incorporate them as additional biases in the attention mechanism, and more performance gains are achieved by extending the model architecture. In this paper, we show that the above method is the least effective way to represent and inject attributes. To demonstrate this hypothesis, unlike previous models with complicated architectures, we limit our base model to a simple BiLSTM with attention classifier, and instead focus on how and where the attributes should be incorporated in the model. We propose to represent attributes as chunk-wise importance weight matrices and consider four locations in the model (i.e., embedding, encoding, attention, classifier) to inject attributes. Experiments show that our proposed method achieves significant improvements over the standard approach and that attention mechanism is the worst location to inject attributes, contradicting prior work. We also outperform the state-of-the-art despite our use of a simple base model. Finally, we show that these representations transfer well to other tasks. Model implementation and datasets are released here: https://github.com/rktamplayo/CHIM. D19-1562 @@ -7156,7 +7156,7 @@ SeunghakYu AlbertoBarrón-Cedeño RostislavPetrov - PreslavNakov + PreslavNakov 5636–5646 Propaganda aims at influencing people’s mindset with the purpose of advancing a specific agenda. Previous work has addressed propaganda detection at document level, typically labelling all articles from a propagandistic news outlet as propaganda. Such noisy gold labels inevitably affect the quality of any learning system trained on them. A further issue with most existing systems is the lack of explainability. To overcome these limitations, we propose a novel task: performing fine-grained analysis of texts by detecting all fragments that contain propaganda techniques as well as their type. In particular, we create a corpus of news articles manually annotated at fragment level with eighteen propaganda techniques and propose a suitable evaluation measure. We further design a novel multi-granularity neural network, and we show that it outperforms several strong BERT-based baselines. D19-1565 @@ -7169,7 +7169,7 @@ Dushyant SinghChauhan Md ShadAkhtar AsifEkbal - PushpakBhattacharyya + PushpakBhattacharyya 5647–5657 In recent times, multi-modal analysis has been an emerging and highly sought-after field at the intersection of natural language processing, computer vision, and speech processing. The prime objective of such studies is to leverage the diversified information, (e.g., textual, acoustic and visual), for learning a model. The effective interaction among these modalities often leads to a better system in terms of performance. In this paper, we introduce a recurrent neural network based approach for the multi-modal sentiment and emotion analysis. The proposed model learns the inter-modal interaction among the participating modalities through an auto-encoder mechanism. We employ a context-aware attention module to exploit the correspondence among the neighboring utterances. We evaluate our proposed approach for five standard multi-modal affect analysis datasets. Experimental results suggest the efficacy of the proposed model for both sentiment and emotion analysis over various existing state-of-the-art systems. D19-1566 @@ -7190,7 +7190,7 @@ The Role of Pragmatic and Discourse Context in Determining Argument Impact EsinDurmus FaisalLadhak - ClaireCardie + ClaireCardie 5668–5678 Research in the social sciences and psychology has shown that the persuasiveness of an argument depends not only the language employed, but also on attributes of the source/communicator, the audience, and the appropriateness and strength of the argument’s claims given the pragmatic and discourse context of the argument. Among these characteristics of persuasive arguments, prior work in NLP does not explicitly investigate the effect of the pragmatic and discourse context when determining argument quality. This paper presents a new dataset to initiate the study of this aspect of argumentation: it consists of a diverse collection of arguments covering 741 controversial topics and comprising over 47,000 claims. We further propose predictive models that incorporate the pragmatic and discourse context of argumentative claims and show that they outperform models that rely only on claim-specific linguistic features for predicting the perceived impact of individual claims within a particular line of argument. D19-1568 @@ -7216,7 +7216,7 @@ LemaoLiu GuopingHuang ConghuiZhu - TiejunZhao + TiejunZhao 5689–5695 Many Data Augmentation (DA) methods have been proposed for neural machine translation. Existing works measure the superiority of DA methods in terms of their performance on a specific test set, but we find that some DA methods do not exhibit consistent improvements across translation tasks. Based on the observation, this paper makes an initial attempt to answer a fundamental question: what benefits, which are consistent across different methods and tasks, does DA in general obtain? Inspired by recent theoretic advances in deep learning, the paper understands DA from two perspectives towards the generalization ability of a model: input sensitivity and prediction margin, which are defined independent of specific test set thereby may lead to findings with relatively low variance. Extensive experiments show that relatively consistent benefits across five DA methods and four translation tasks are achieved regarding both perspectives. D19-1570 @@ -7310,7 +7310,7 @@ AdinaWilliams DamianBlasi LawrenceWolf-Sonkin - HannaWallach + HannaWallach RyanCotterell 5734–5739 Many of the world’s languages employ grammatical gender on the lexeme. For instance, in Spanish, house “casa” is feminine, whereas the word for paper “papel” is masculine. To a speaker of a genderless language, this categorization seems to exist with neither rhyme nor reason. But, is the association of nouns to gender classes truly arbitrary? In this work, we present the first large-scale investigation of the arbitrariness of gender assignment that uses canonical correlation analysis as a method for correlating the gender of inanimate nouns with their lexical semantic meaning. We find that the gender systems of 18 languages exhibit a significant correlation with an externally grounded definition of lexical semantics. @@ -7332,7 +7332,7 @@ Automatically Inferring Gender Associations from Language SerinaChang - KathyMcKeown + KathyMcKeown 5746–5752 In this paper, we pose the question: do people talk about women and men in different ways? We introduce two datasets and a novel integration of approaches for automatically inferring gender associations from language, discovering coherent word clusters, and labeling the clusters for the semantic concepts they represent. The datasets allow us to compare how people write about women and men in two different settings – one set draws from celebrity news and the other from student reviews of computer science professors. We demonstrate that there are large-scale differences in the ways that people talk about women and men and that these differences vary across domains. Human evaluations show that our methods significantly outperform strong baselines. D19-1579 @@ -7376,7 +7376,7 @@ XiaolongJin XiangbinMeng JiafengGuo - XueqiCheng + XueqiCheng 5766–5770 Syntactic relations are broadly used in many NLP tasks. For event detection, syntactic relation representations based on dependency tree can better capture the interrelations between candidate trigger words and related entities than sentence representations. But, existing studies only use first-order syntactic relations (i.e., the arcs) in dependency trees to identify trigger words. For this reason, this paper proposes a new method for event detection, which uses a dependency tree based graph convolution network with aggregative attention to explicitly model and aggregate multi-order syntactic representations in sentences. Experimental comparison with state-of-the-art baselines shows the superiority of the proposed method. D19-1582 @@ -7442,7 +7442,7 @@ TsutomuHirao KengoNakamura HidetakaKamigaito - ManabuOkumura + ManabuOkumura MasaakiNagata 5797–5802 Rhetorical Structure Theory (RST) parsing is crucial for many downstream NLP tasks that require a discourse structure for a text. Most of the previous RST parsers have been based on supervised learning approaches. That is, they require an annotated corpus of sufficient size and quality, and heavily rely on the language and domain dependent corpus. In this paper, we present two language-independent unsupervised RST parsing methods based on dynamic programming. The first one builds the optimal tree in terms of a dissimilarity score function that is defined for splitting a text span into smaller ones. The second builds the optimal tree in terms of a similarity score function that is defined for merging two adjacent spans into a large one. Experimental results on English and German RST treebanks showed that our parser based on span merging achieved the best score, around 0.8 F_1 score, which is close to the scores of the previous supervised parsers. @@ -7454,8 +7454,8 @@ <fixed-case>BERT</fixed-case> for Coreference Resolution: Baselines and Analysis MandarJoshi OmerLevy - LukeZettlemoyer - DanielWeld + LukeZettlemoyer + DanielWeld 5803–5808 We apply BERT to coreference resolution, achieving a new state of the art on the GAP (+11.5 F1) and OntoNotes (+3.9 F1) benchmarks. A qualitative analysis of model predictions indicates that, compared to ELMo and BERT-base, BERT-large is particularly better at distinguishing between related but distinct entities (e.g., President and CEO), but that there is still room for improvement in modeling document-level context, conversations, and mention paraphrasing. We will release all code and trained models upon publication. D19-1588 @@ -7465,7 +7465,7 @@ Linguistic Versus Latent Relations for Modeling Coherent Flow in Paragraphs DongyeopKang - EduardHovy + EduardHovy 5809–5815 Generating a long, coherent text such as a paragraph requires a high-level control of different levels of relations between sentences (e.g., tense, coreference). We call such a logical connection between sentences as a (paragraph) flow. In order to produce a coherent flow of text, we explore two forms of intersentential relations in a paragraph: one is a human-created linguistical relation that forms a structure (e.g., discourse tree) and the other is a relation from latent representation learned from the sentences themselves. Our two proposed models incorporate each form of relations into document-level language models: the former is a supervised model that jointly learns a language model as well as discourse relation prediction, and the latter is an unsupervised model that is hierarchically conditioned by a recurrent neural network (RNN) over the latent information. Our proposed models with both forms of relations outperform the baselines in partially conditioned paragraph generation task. Our codes and data are publicly available. D19-1589 @@ -7478,7 +7478,7 @@ KazumaKadowaki RyuIida KentaroTorisawa - Jong-HoonOh + Jong-HoonOh JulienKloetzer 5816–5822 We propose new BERT-based methods for recognizing event causality such as “smoke cigarettes” –> “die of lung cancer” written in web texts. In our methods, we grasp each annotator’s policy by training multiple classifiers, each of which predicts the labels given by a single annotator, and combine the resulting classifiers’ outputs to predict the final labels determined by majority vote. Furthermore, we investigate the effect of supplying background knowledge to our classifiers. Since BERT models are pre-trained with a large corpus, some sort of background knowledge for event causality may be learned during pre-training. Our experiments with a Japanese dataset suggest that this is actually the case: Performance improved when we pre-trained the BERT models with web texts containing a large number of event causalities instead of Wikipedia articles or randomly sampled web texts. However, this effect was limited. Therefore, we further improved performance by simply adding texts related to an input causality candidate as background knowledge to the input of the BERT models. We believe these findings indicate a promising future research direction. @@ -7499,7 +7499,7 @@ Quantity doesn’t buy quality syntax with neural language models - Martenvan Schijndel + Martenvan Schijndel AaronMueller TalLinzen 5831–5837 @@ -7515,7 +7515,7 @@ ArturKulmizev FelixHill Daniel M.Low - AndersSøgaard + AndersSøgaard 5838–5845 Representational Similarity Analysis (RSA) is a technique developed by neuroscientists for comparing activity patterns of different measurement modalities (e.g., fMRI, electrophysiology, behavior). As a framework, RSA has several advantages over existing approaches to interpretation of language encoders based on probing or diagnostic classification: namely, it does not require large training samples, is not prone to overfitting, and it enables a more transparent comparison between the representational geometries of different models and modalities. We demonstrate the utility of RSA by establishing a previously unknown correspondence between widely-employed pretrained language encoders and human processing difficulty via eye-tracking data, showcasing its potential in the interpretability toolbox for neural models. D19-1593 @@ -7525,10 +7525,10 @@ Text Genre and Training Data Size in Human-like Parsing - JohnHale + JohnHale AdhigunaKuncoro - KeithHall - ChrisDyer + KeithHall + ChrisDyer JonathanBrennan 5846–5852 Domain-specific training typically makes NLP systems work better. We show that this extends to cognitive modeling as well by relating the states of a neural phrase-structure parser to electrophysiological measures from human participants. These measures were recorded as participants listened to a spoken recitation of the same literary text that was supplied as input to the neural parser. Given more training data, the system derives a better cognitive model — but only when the training examples come from the same textual genre. This finding is consistent with the idea that humans adapt syntactic expectations to particular genres during language comprehension (Kaan and Chun, 2018; Branigan and Pickering, 2017). @@ -7569,7 +7569,7 @@ YuangWei GongCheng LinZhou - XinyuDai + XinyuDai YuzhongQu 5866–5871 Scenario-based question answering (SQA) has attracted increasing research attention. It typically requires retrieving and integrating knowledge from multiple sources, and applying general knowledge to a specific case described by a scenario. SQA widely exists in the medical, geography, and legal domains—both in practice and in the exams. In this paper, we introduce the GeoSQA dataset. It consists of 1,981 scenarios and 4,110 multiple-choice questions in the geography domain at high school level, where diagrams (e.g., maps, charts) have been manually annotated with natural language descriptions to benefit NLP research. Benchmark results on a variety of state-of-the-art methods for question answering, textual entailment, and reading comprehension demonstrate the unique challenges presented by SQA for future research. @@ -7648,7 +7648,7 @@ Answering Conversational Questions on Structured Data without Logical Forms - ThomasMueller + ThomasMueller FrancescoPiccinno PeterShaw MassimoNicosia @@ -7687,9 +7687,9 @@ <fixed-case>Q</fixed-case>uoref: A Reading Comprehension Dataset with Questions Requiring Coreferential Reasoning PradeepDasigi - Nelson F.Liu + Nelson F.Liu AnaMarasović - Noah A.Smith + Noah A.Smith MattGardner 5925–5932 Machine comprehension of texts longer than a single sentence often requires coreference resolution. However, most current reading comprehension benchmarks do not contain complex coreferential phenomena and hence fail to evaluate the ability of models to resolve coreference. We present a new crowdsourced dataset containing more than 24K span-selection questions that require resolving coreference among entities in over 4.7K English paragraphs from Wikipedia. Obtaining questions focused on such phenomena is challenging, because it is hard to avoid lexical cues that shortcut complex reasoning. We deal with this issue by using a strong baseline model as an adversary in the crowdsourcing loop, which helps crowdworkers avoid writing questions with exploitable surface cues. We show that state-of-the-art reading comprehension models perform significantly worse than humans on this benchmark—the best model performance is 70.5 F1, while the estimated human performance is 93.4 F1. @@ -7736,9 +7736,9 @@ A Gated Self-attention Memory Network for Answer Selection - TuanLai - Quan HungTran - TrungBui + TuanLai + Quan HungTran + TrungBui DaisukeKihara 5953–5959 Answer selection is an important research problem, with applications in many areas. Previous deep learning based approaches for the task mainly adopt the Compare-Aggregate architecture that performs word-level comparison followed by aggregation. In this work, we take a departure from the popular Compare-Aggregate architecture, and instead, propose a new gated self-attention memory network for the task. Combined with a simple transfer learning technique from a large-scale online corpus, our model outperforms previous methods by a large margin, achieving new state-of-the-art results on two standard answer selection datasets: TrecQA and WikiQA. @@ -7784,7 +7784,7 @@ Generating Highly Relevant Questions JiazuoQiu - DeyiXiong + DeyiXiong 5983–5987 The neural seq2seq based question generation (QG) is prone to generating generic and undiversified questions that are poorly relevant to the given passage and target answer. In this paper, we propose two methods to address the issue. (1) By a partial copy mechanism, we prioritize words that are morphologically close to words in the input passage when generating questions; (2) By a QA-based reranker, from the n-best list of question candidates, we select questions that are preferred by both the QA and QG model. Experiments and analyses demonstrate that the proposed two methods substantially improve the relevance of generated questions to passages and answers. D19-1614 @@ -7852,7 +7852,7 @@ Countering the Effects of Lead Bias in News Summarization via Multi-Stage Training and Auxiliary Losses MattGrenander YueDong - Jackie Chi KitCheung + Jackie Chi KitCheung AnnieLouis 6019–6024 Sentence position is a strong feature for news summarization, since the lead often (but not always) summarizes the key points of the article. In this paper, we show that recent neural systems excessively exploit this trend, which although powerful for many inputs, is also detrimental when summarizing documents where important content should be extracted from later parts of the article. We propose two techniques to make systems sensitive to the importance of content in different parts of the article. The first technique employs ‘unbiased’ data; i.e., randomly shuffled sentences of the source document, to pretrain the model. The second technique uses an auxiliary ROUGE-based loss that encourages the model to distribute importance scores throughout a document by mimicking sentence-level ROUGE scores on the training data. We show that these techniques significantly improve the performance of a competitive reinforcement learning based extractive system, with the auxiliary loss being more powerful than pretraining. @@ -7864,8 +7864,8 @@ Learning Rhyming Constraints using Structured Adversaries HarshJhamtani - Sanket VaibhavMehta - JaimeCarbonell + Sanket VaibhavMehta + JaimeCarbonell TaylorBerg-Kirkpatrick 6025–6031 Existing recurrent neural language models often fail to capture higher-level structure present in text: for example, rhyming patterns present in poetry. Much prior work on poetry generation uses manually defined constraints which are satisfied during decoding using either specialized decoding procedures or rejection sampling. The rhyming constraints themselves are typically not learned by the generator. We propose an alternate approach that uses a structured discriminator to learn a poetry generator that directly captures rhyming constraints in a generative adversarial setup. By causing the discriminator to compare poems based only on a learned similarity matrix of pairs of line ending words, the proposed approach is able to successfully learn rhyming patterns in two different English poetry datasets (Sonnet and Limerick) without explicitly being provided with any phonetic information @@ -7959,7 +7959,7 @@ <fixed-case>WIQA</fixed-case>: A dataset for “What if...” reasoning over procedural text NiketTandon - BhavanaDalvi + BhavanaDalvi KeisukeSakaguchi PeterClark AntoineBosselut @@ -7973,7 +7973,7 @@ Evaluating <fixed-case>BERT</fixed-case> for natural language inference: A case study on the <fixed-case>C</fixed-case>ommitment<fixed-case>B</fixed-case>ank NanjiangJiang - Marie-Catherinede Marneffe + Marie-Catherinede Marneffe 6086–6091 Natural language inference (NLI) datasets (e.g., MultiNLI) were collected by soliciting hypotheses for a given premise from annotators. Such data collection led to annotation artifacts: systems can identify the premise-hypothesis relationship without observing the premise (e.g., negation in hypothesis being indicative of contradiction). We address this problem by recasting the CommitmentBank for NLI, which contains items involving reasoning over the extent to which a speaker is committed to complements of clause-embedding verbs under entailment-canceling environments (conditional, negation, modal and question). Instead of being constructed to stand in certain relationships with the premise, hypotheses in the recast CommitmentBank are the complements of the clause-embedding verb in each premise, leading to no annotation artifacts in the hypothesis. A state-of-the-art BERT-based model performs well on the CommitmentBank with 85% F1. However analysis of model behavior shows that the BERT models still do not capture the full complexity of pragmatic reasoning, nor encode some of the linguistic generalizations, highlighting room for improvement. D19-1630 @@ -7997,7 +7997,7 @@ The <fixed-case>FLORES</fixed-case> Evaluation Datasets for Low-Resource Machine Translation: <fixed-case>N</fixed-case>epali–<fixed-case>E</fixed-case>nglish and <fixed-case>S</fixed-case>inhala–<fixed-case>E</fixed-case>nglish - FranciscoGuzmán + FranciscoGuzmán Peng-JenChen MyleOtt JuanPino @@ -8018,7 +8018,7 @@ MarjanGhazvininejad OmerLevy YinhanLiu - LukeZettlemoyer + LukeZettlemoyer 6112–6121 Most machine translation systems generate text autoregressively from left to right. We, instead, use a masked language modeling objective to train a model to predict any subset of the target words, conditioned on both the input text and a partially masked target translation. This approach allows for efficient iterative decoding, where we first predict all of the target words non-autoregressively, and then repeatedly mask out and regenerate the subset of words that the model is least confident about. By applying this strategy for a constant number of iterations, our model improves state-of-the-art performance levels for non-autoregressive and parallel decoding translation models by over 4 BLEU on average. It is also able to reach within about 1 BLEU point of a typical left-to-right transformer model, while decoding significantly faster. D19-1633 @@ -8028,7 +8028,7 @@ Learning to Copy for Automatic Post-Editing XuanchengHuang - YangLiu + YangLiu HuanboLuan JingfangXu MaosongSun @@ -8042,7 +8042,7 @@ Exploring Human Gender Stereotypes with Word Association Test YupeiDu YuanbinWu - ManLan + ManLan 6133–6143 Word embeddings have been widely used to study gender stereotypes in texts. One key problem regarding existing bias scores is to evaluate their validities: do they really reflect true bias levels? For a small set of words (e.g. occupations), we can rely on human annotations or external data. However, for most words, evaluating the correctness of them is still an open problem. In this work, we utilize word association test, which contains rich types of word connections annotated by human participants, to explore how gender stereotypes spread within our minds. Specifically, we use random walk on word association graph to derive bias scores for a large amount of words. Experiments show that these bias scores correlate well with bias in the real world. More importantly, comparing with word-embedding-based bias scores, it provides a different perspective on gender stereotypes in words. D19-1635 @@ -8078,8 +8078,8 @@ Set to Ordered Text: Generating Discharge Instructions from Medical Billing Codes - LittonJ Kurisinkel - NancyChen + LittonJ Kurisinkel + NancyChen 6165–6175 We present set to ordered text, a natural language generation task applied to automatically generating discharge instructions from admission ICD (International Classification of Diseases) codes. This task differs from other natural language generation tasks in the following ways: (1) The input is a set of identifiable entities (ICD codes) where the relations between individual entity are not explicitly specified. (2) The output text is not a narrative description (e.g. news articles) composed from the input. Rather, inferences are made from the input (symptoms specified in ICD codes) to generate the output (instructions). (3) There is an optimal order in which each sentence (instruction) should appear in the output. Unlike most other tasks, neither the input (ICD codes) nor their corresponding symptoms appear in the output, so the ordering of the output instructions needs to be learned in an unsupervised fashion. Based on clinical intuition, we hypothesize that each instruction in the output is mapped to a subset of ICD codes specified in the input. We propose a neural architecture that jointly models (a) subset selection: choosing relevant subsets from a set of input entities; (b) content ordering: learning the order of instructions; and (c) text generation: representing the instructions corresponding to the selected subsets in natural language. In addition, we penalize redundancy during beam search to improve tractability for long text generation. Our model outperforms baseline models in BLEU scores and human evaluation. We plan to extend this work to other tasks such as recipe generation from ingredients. D19-1638 @@ -8322,7 +8322,7 @@ A Robust Self-Learning Framework for Cross-Lingual Text Classification - XinDong + XinDong Gerardde Melo 6306–6310 Based on massive amounts of data, recent pretrained contextual representation models have made significant strides in advancing a number of different English NLP tasks. However, for other languages, relevant training data may be lacking, while state-of-the-art deep learning methods are known to be data-hungry. In this paper, we present an elegantly simple robust self-learning framework to include unlabeled non-English samples in the fine-tuning process of pretrained multilingual representation models. We leverage a multilingual model’s own predictions on unlabeled non-English data in order to obtain additional information that can be used during further fine-tuning. Compared with original multilingual models and other cross-lingual classification models, we observe significant gains in effectiveness on document and sentiment classification for a range of diverse languages. @@ -8343,7 +8343,7 @@ Label Embedding using Hierarchical Structure of Labels for <fixed-case>T</fixed-case>witter Classification TaroMiyazaki KiminobuMakino - YukaTakei + YukaTakei HirokiOkamoto JunGoto 6317–6322 @@ -8370,7 +8370,7 @@ YovaKementchedjhieva YanaiElazar DesmondElliott - AndersSøgaard + AndersSøgaard 6330–6335 Elazar and Goldberg (2018) showed that protected attributes can be extracted from the representations of a debiased neural network for mention detection at above-chance levels, by evaluating a diagnostic classifier on a held-out subsample of the data it was trained on. We revisit their experiments and conduct a series of follow-up experiments showing that, in fact, the diagnostic classifier generalizes poorly to both new in-domain samples and new domains, indicating that it relies on correlations specific to their particular data sample. We further show that a diagnostic classifier trained on the biased baseline neural network also does not generalize to new samples. In other words, the biases detected in Elazar and Goldberg (2018) seem restricted to their particular data sample, and would therefore not bias the decisions of the model on new samples, whether in-domain or out-of-domain. In light of this, we discuss better methodologies for detecting bias in our models. D19-1662 @@ -8395,7 +8395,7 @@ MarshallWhite EvaSharma RuisiSu - Prafulla KumarChoubey + Prafulla KumarChoubey RuihongHuang LuWang 6343–6349 @@ -8570,7 +8570,7 @@ Using Clinical Notes with Time Series Data for <fixed-case>ICU</fixed-case> Management SwarajKhadanga KaranAggarwal - ShafiqJoty + ShafiqJoty JaideepSrivastava 6432–6437 Monitoring patients in ICU is a challenging and high-cost task. Hence, predicting the condition of patients during their ICU stay can help provide better acute care and plan the hospital’s resources. There has been continuous progress in machine learning research for ICU management, and most of this work has focused on using time series signals recorded by ICU instruments. In our work, we show that adding clinical notes as another modality improves the performance of the model for three benchmark tasks: in-hospital mortality prediction, modeling decompensation, and length of stay forecasting that play an important role in ICU management. While the time-series data is measured at regular intervals, doctor notes are charted at irregular times, making it challenging to model them together. We propose a method to model them jointly, achieving considerable improvement across benchmark tasks over baseline time-series model. @@ -8582,7 +8582,7 @@ Spelling-Aware Construction of Macaronic Texts for Teaching Foreign-Language Vocabulary AdithyaRenduchintala PhilippKoehn - JasonEisner + JasonEisner 6438–6443 We present a machine foreign-language teacher that modifies text in a student’s native language (L1) by replacing some word tokens with glosses in a foreign language (L2), in such a way that the student can acquire L2 vocabulary simply by reading the resulting macaronic text. The machine teacher uses no supervised data from human students. Instead, to guide the machine teacher’s choice of which words to replace, we equip a cloze language model with a training procedure that can incrementally learn representations for novel words, and use this model as a proxy for the word guessing and learning ability of real human students. We use Mechanical Turk to evaluate two variants of the student model: (i) one that generates a representation for a novel word using only surrounding context and (ii) an extension that also uses the spelling of the novel word. D19-1679 @@ -8663,7 +8663,7 @@ In this hands-on tutorial, we take a closer look at the challenges from these co Processing and Understanding Mixed Language Data MonojitChoudhury AnirudhSrinivasan - SandipanDandapat + SandipanDandapat Multilingual communities exhibit code-mixing, that is, mixing of two or more socially stable languages in a single conversation, sometimes even in a single utterance. This phenomenon has been widely studied by linguists and interaction scientists in the spoken language of such communities. However, with the prevalence of social media and other informal interactive platforms, code-switching is now also ubiquitously observed in user-generated text. As multilingual communities are more the norm from a global perspective, it becomes essential that code-switched text and speech are adequately handled by language technologies and NUIs. Code-mixing is extremely prevalent in all multilingual societies. Current studies have shown that as much as 20% of user generated content from some geographies, like South Asia, parts of Europe, and Singapore, are code-mixed. Thus, it is very important to handle code-mixed content as a part of NLP systems and applications for these geographies. @@ -8717,7 +8717,7 @@ As a gentle start, we will briefly introduce the background of deep learning bas Graph-based Deep Learning in Natural Language Processing ShikharVashishth NaganandYadati - ParthaTalukdar + ParthaTalukdar This tutorial aims to introduce recent advances in graph-based deep learning techniques such as Graph Convolutional Networks (GCNs) for Natural Language Processing (NLP). It provides a brief introduction to deep learning methods on non-Euclidean domains such as graphs and justifies their relevance in NLP. It then covers recent advances in applying graph-based deep learning methods for various NLP tasks, such as semantic role labeling, machine translation, relationship extraction, and many more. vashishth-etal-2019-graph @@ -8736,7 +8736,7 @@ A plethora of methods have been proposed to emphasize specific lexico-semantic r Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing and the 9th International Joint Conference on Natural Language Processing (EMNLP-IJCNLP): System Demonstrations D19-3 - SebastianPadó + SebastianPadó RuihongHuang Association for Computational Linguistics
Hong Kong, China
@@ -8804,7 +8804,7 @@ A plethora of methods have been proposed to emphasize specific lexico-semantic r Automatic Taxonomy Induction and Expansion Nicolas RodolfoFauceglia - AlfioGliozzo + AlfioGliozzo SarthakDash Md. Faisal MahbubChowdhury NandanaMihindukulasooriya @@ -8821,12 +8821,12 @@ A plethora of methods have been proposed to emphasize specific lexico-semantic r AndrzejSakrajda AnthonyFerritto LinPan - MichaelGlass + MichaelGlass VittorioCastelli J. WilliamMurdock - RaduFlorian - SalimRoukos - AviSil + RaduFlorian + SalimRoukos + AviSil 31–36 This paper introduces a novel orchestration framework, called CFO (Computation Flow Orchestrator), for building, experimenting with, and deploying interactive NLP (Natural Language Processing) and IR (Information Retrieval) systems to production environments. We then demonstrate a question answering system built using this framework which incorporates state-of-the-art BERT based MRC (Machine Reading Com- prehension) with IR components to enable end-to-end answer retrieval. Results from the demo system are shown to be high quality in both academic and industry domain specific settings. Finally, we discuss best practices when (pre-)training BERT based MRC models for production systems. Screencast links: - Short video (< 3 min): http: //ibm.biz/gaama_demo - Supplementary long video (< 13 min): http://ibm.biz/gaama_cfo_demo D19-3006 @@ -8852,7 +8852,7 @@ A plethora of methods have been proposed to emphasize specific lexico-semantic r Liang-HsinShen Pei-LunTai Chao-ChungWu - Shou-DeLin + Shou-DeLin 43–48 An acrostic is a form of writing that the first token of each line (or other recurring features in the text) forms a meaningful sequence. In this paper we present a generalized acrostic generation system that can hide certain message in a flexible pattern specified by the users. Different from previous works that focus on rule-based solutions, here we adopt a neural- based sequence-to-sequence model to achieve this goal. Besides acrostic, users are also allowed to specify the rhyme and length of the output sequences. Based on our knowledge, this is the first neural-based natural language generation system that demonstrates the capability of performing micro-level control over output sentences. D19-3008 @@ -8863,7 +8863,7 @@ A plethora of methods have been proposed to emphasize specific lexico-semantic r <fixed-case>EASSE</fixed-case>: Easier Automatic Sentence Simplification Evaluation FernandoAlva-Manchego LouisMartin - CarolinaScarton + CarolinaScarton LuciaSpecia 49–54 We introduce EASSE, a Python package aiming to facilitate and standardise automatic evaluation and comparison of Sentence Simplification (SS) systems. EASSE provides a single access point to a broad range of evaluation resources: standard automatic metrics for assessing SS outputs (e.g. SARI), word-level accuracy scores for certain simplification transformations, reference-independent quality estimation features (e.g. compression ratio), and standard test data for SS evaluation (e.g. TurkCorpus). Finally, EASSE generates easy-to-visualise reports on the various metrics and features above and on how a particular SS output fares against reference simplifications. Through experiments, we show that these functionalities allow for better comparison and understanding of the performance of SS systems. @@ -8876,7 +8876,7 @@ A plethora of methods have been proposed to emphasize specific lexico-semantic r EugeneKharitonov RahmaChaabouni DianeBouchacourt - MarcoBaroni + MarcoBaroni 55–60 There is renewed interest in simulating language emergence among deep neural agents that communicate to jointly solve a task, spurred by the practical aim to develop language-enabled interactive AIs, as well as by theoretical questions about the evolution of human language. However, optimizing deep architectures connected by a discrete communication channel (such as that in which language emerges) is technically challenging. We introduce EGG, a toolkit that greatly simplifies the implementation of emergent-language communication games. EGG’s modular design provides a set of building blocks that the user can combine to create new games, easily navigating the optimization and architecture space. We hope that the tool will lower the technical barrier, and encourage researchers from various backgrounds to do original work in this exciting area. D19-3010 @@ -8952,7 +8952,7 @@ A plethora of methods have been proposed to emphasize specific lexico-semantic r <fixed-case>HARE</fixed-case>: a Flexible Highlighting Annotator for Ranking and Exploration DenisNewman-Griffis - EricFosler-Lussier + EricFosler-Lussier 85–90 Exploration and analysis of potential data sources is a significant challenge in the application of NLP techniques to novel information domains. We describe HARE, a system for highlighting relevant information in document collections to support ranking and triage, which provides tools for post-processing and qualitative analysis for model development and tuning. We apply HARE to the use case of narrative descriptions of mobility information in clinical data, and demonstrate its utility in comparing candidate embedding features. We provide a web-based interface for annotation visualization and document ranking, with a modular backend to support interoperability with existing annotation tools. Our system is available online at https://github.com/OSU-slatelab/HARE. D19-3015 @@ -8988,7 +8988,7 @@ A plethora of methods have been proposed to emphasize specific lexico-semantic r <fixed-case>INMT</fixed-case>: Interactive Neural Machine Translation Prediction SebastinSanty - SandipanDandapat + SandipanDandapat MonojitChoudhury KalikaBali 103–108 @@ -9012,7 +9012,7 @@ A plethora of methods have been proposed to emphasize specific lexico-semantic r Journalist-in-the-Loop: Continuous Learning as a Service for Rumour Analysis TwinKarmakharm NikolaosAletras - KalinaBontcheva + KalinaBontcheva 115–120 Automatically identifying rumours in social media and assessing their veracity is an important task with downstream applications in journalism. A significant challenge is how to keep rumour analysis tools up-to-date as new information becomes available for particular rumours that spread in a social network. This paper presents a novel open-source web-based rumour analysis tool that can continuous learn from journalists. The system features a rumour annotation service that allows journalists to easily provide feedback for a given social media post through a web-based interface. The feedback allows the system to improve an underlying state-of-the-art neural network-based rumour classification model. The system can be easily integrated as a service into existing tools and platforms used by journalists using a REST API. D19-3020 @@ -9021,7 +9021,7 @@ A plethora of methods have been proposed to emphasize specific lexico-semantic r <fixed-case>LIDA</fixed-case>: Lightweight Interactive Dialogue Annotator - EdwardCollins + EdwardCollins NikolaiRozanov BingbingZhang 121–126 @@ -9033,7 +9033,7 @@ A plethora of methods have been proposed to emphasize specific lexico-semantic r <fixed-case>LINSPECTOR</fixed-case> <fixed-case>WEB</fixed-case>: A Multilingual Probing Suite for Word Representations MaxEichler - Gözde GülŞahin + Gözde GülŞahin IrynaGurevych 127–132 We present LINSPECTOR WEB , an open source multilingual inspector to analyze word representations. Our system provides researchers working in low-resource settings with an easily accessible web based probing tool to gain quick insights into their word embeddings especially outside of the English language. To do this we employ 16 simple linguistic probing tasks such as gender, case marking, and tense for a diverse set of 28 languages. We support probing of static word embeddings along with pretrained AllenNLP models that are commonly used for NLP downstream tasks such as named entity recognition, natural language inference and dependency parsing. The results are visualized in a polar chart and also provided as a table. LINSPECTOR WEB is available as an offline tool or at https://linspector.ukp.informatik.tu-darmstadt.de. @@ -9084,7 +9084,7 @@ A plethora of methods have been proposed to emphasize specific lexico-semantic r Multilingual, Multi-scale and Multi-layer Visualization of Intermediate Representations CarlosEscolano - Marta R.Costa-jussà + Marta R.Costa-jussà EloraLacroux Pere-PauVázquez 151–156 @@ -9097,7 +9097,7 @@ A plethora of methods have been proposed to emphasize specific lexico-semantic r <fixed-case>MY</fixed-case>-<fixed-case>AKKHARA</fixed-case>: A <fixed-case>R</fixed-case>omanization-based <fixed-case>B</fixed-case>urmese (<fixed-case>M</fixed-case>yanmar) Input Method ChenchenDing MasaoUtiyama - EiichiroSumita + EiichiroSumita 157–162 MY-AKKHARA is a method used to input Burmese texts encoded in the Unicode standard, based on commonly accepted Latin transcription. By using this method, arbitrary Burmese strings can be accurately inputted with 26 lowercase Latin letters. Meanwhile, the 26 uppercase Latin letters are designed as shortcuts of lowercase letter sequences. The frequency of Burmese characters is considered in MY-AKKHARA to realize an efficient keystroke distribution on a QWERTY keyboard. Given that the Unicode standard has not been extensively used in digitization of Burmese, we hope that MY-AKKHARA can contribute to the widespread use of Unicode in Myanmar and can provide a platform for smart input methods for Burmese in the future. An implementation of MY-AKKHARA running in Windows is released at http://www2.nict.go.jp/astrec-att/member/ding/my-akkhara.html D19-3027 @@ -9181,7 +9181,7 @@ A plethora of methods have been proposed to emphasize specific lexico-semantic r <fixed-case>R</fixed-case>edcoat: A Collaborative Annotation Tool for Hierarchical Entity Typing MichaelStewart - WeiLiu + WeiLiu RachelCardell-Oliver 193–198 We introduce Redcoat, a web-based annotation tool that supports collaborative hierarchical entity typing. As an annotation tool, Redcoat also facilitates knowledge elicitation by allowing the creation and continuous refinement of concept hierarchies during annotation. It aims to minimise not only annotation time but the time it takes for project creators to set up and distribute projects to annotators. Projects created using the web-based interface can be rapidly distributed to a list of email addresses. Redcoat handles the propagation of documents amongst annotators and automatically scales the annotation workload depending on the number of active annotators. In this paper we discuss these key features and outline Redcoat’s system architecture. We also highlight Redcoat’s unique benefits over existing annotation tools via a qualitative comparison. @@ -9193,7 +9193,7 @@ A plethora of methods have been proposed to emphasize specific lexico-semantic r <fixed-case>SEAGLE</fixed-case>: A Platform for Comparative Evaluation of Semantic Encoders for Information Retrieval Fabian DavidSchmidt MarkusDietsche - Simone PaoloPonzetto + Simone PaoloPonzetto GoranGlavaš 199–204 We introduce Seagle, a platform for comparative evaluation of semantic text encoding models on information retrieval (IR) tasks. Seagle implements (1) word embedding aggregators, which represent texts as algebraic aggregations of pretrained word embeddings and (2) pretrained semantic encoders, and allows for their comparative evaluation on arbitrary (monolingual and cross-lingual) IR collections. We benchmark Seagle’s models on monolingual document retrieval and cross-lingual sentence retrieval. Seagle functionality can be exploited via an easy-to-use web interface and its modular backend (micro-service architecture) can easily be extended with additional semantic search models. @@ -9267,8 +9267,8 @@ A plethora of methods have been proposed to emphasize specific lexico-semantic r GeorgiKaradzhov RamyBaly KareemDarwish - JamesGlass - PreslavNakov + JamesGlass + PreslavNakov 223–228 We introduce Tanbih, a news aggregator with intelligent analysis tools to help readers understanding what’s behind a news story. Our system displays news grouped into events and generates media profiles that show the general factuality of reporting, the degree of propagandistic content, hyper-partisanship, leading political ideology, general frame of reporting, and stance with respect to various claims and topics of a news outlet. In addition, we automatically analyse each article to detect whether it is propagandistic and to determine its stance with respect to a number of controversial topics. D19-3038 @@ -9291,7 +9291,7 @@ A plethora of methods have been proposed to emphasize specific lexico-semantic r <fixed-case>T</fixed-case>ell<fixed-case>M</fixed-case>e<fixed-case>W</fixed-case>hy: Learning to Explain Corrective Feedback for Second Language Learners Yi-HueiLai - JasonChang + JasonChang 235–240 We present a writing prototype feedback system, TellMeWhy, to provide explanations of errors in submitted essays. In our approach, the sentence with corrections is analyzed to identify error types and problem words, aimed at customizing explanations based on the context of the error. The method involves learning the relation of errors and problem words, generating common feedback patterns, and extracting grammar patterns, collocations and example sentences. At run-time, a sentence with corrections is classified, and the problem word and template are identified to provide detailed explanations. Preliminary evaluation shows that the method has potential to improve existing commercial writing services. D19-3040 @@ -9303,7 +9303,7 @@ A plethora of methods have been proposed to emphasize specific lexico-semantic r ZheZhao HuiChen JinbinZhang - XinZhao + XinZhao TaoLiu WeiLu XiChen @@ -9363,7 +9363,7 @@ A plethora of methods have been proposed to emphasize specific lexico-semantic r AlbertoBarrón-Cedeño ChrisBrew ChrisLeberknight - PreslavNakov + PreslavNakov Association for Computational Linguistics
Hong Kong, China
November @@ -9393,7 +9393,7 @@ A plethora of methods have been proposed to emphasize specific lexico-semantic r Detecting context abusiveness using hierarchical deep learning Ju-HyoungLee Jun-UPark - Jeong-WonCha + Jeong-WonCha Yo-SubHan 10–19 Abusive text is a serious problem in social media and causes many issues among users as the number of users and the content volume increase. There are several attempts for detecting or preventing abusive text effectively. One simple yet effective approach is to use an abusive lexicon and determine the existence of an abusive word in text. This approach works well even when an abusive word is obfuscated. On the other hand, it is still a challenging problem to determine abusiveness in a text having no explicit abusive words. Especially, it is hard to identify sarcasm or offensiveness in context without any abusive words. We tackle this problem using an ensemble deep learning model. Our model consists of two parts of extracting local features and global features, which are crucial for identifying implicit abusiveness in context level. We evaluate our model using three benchmark data. Our model outperforms all the previous models for detecting abusiveness in a text data without abusive words. Furthermore, we combine our model and an abusive lexicon method. The experimental results show that our model has at least 4% better performance compared with the previous approaches for identifying text abusiveness in case of with/without abusive words. @@ -9417,7 +9417,7 @@ A plethora of methods have been proposed to emphasize specific lexico-semantic r Identifying Nuances in Fake News vs. Satire: Using Semantic and Linguistic Cues OrLevi PedramHosseini - MonaDiab + MonaDiab DavidBroniatowski 31–35 The blurry line between nefarious fake news and protected-speech satire has been a notorious struggle for social media platforms. Further to the efforts of reducing exposure to misinformation on social media, purveyors of fake news have begun to masquerade as satire sites to avoid being demoted. In this work, we address the challenge of automatically classifying fake news versus satire. Previous work have studied whether fake news and satire can be distinguished based on language differences. Contrary to fake news, satire stories are usually humorous and carry some political or social message. We hypothesize that these nuances could be identified using semantic and linguistic cues. Consequently, we train a machine learning method using semantic representation, with a state-of-the-art contextual language model, and with linguistic features based on textual coherence metrics. Empirical evaluation attests to the merits of our approach compared to the language-based baseline and sheds light on the nuances between fake news and satire. As avenues for future work, we consider studying additional linguistic features related to the humor aspect, and enriching the data with current news events, to help identify a political or social message. @@ -9456,7 +9456,7 @@ A plethora of methods have been proposed to emphasize specific lexico-semantic r Generating Sentential Arguments from Diverse Perspectives on Controversial Topic ChaeHunPark WonsukYang - JongPark + JongPark 56–65 Considering diverse aspects of an argumentative issue is an essential step for mitigating a biased opinion and making reasonable decisions. A related generation model can produce flexible results that cover a wide range of topics, compared to the retrieval-based method that may show unstable performance for unseen data. In this paper, we study the problem of generating sentential arguments from multiple perspectives, and propose a neural method to address this problem. Our model, ArgDiver (Argument generation model from diverse perspectives), in a way a conversational system, successfully generates high-quality sentential arguments. At the same time, the automatically generated arguments by our model show a higher diversity than those generated by any other baseline models. We believe that our work provides evidence for the potential of a good generation model in providing diverse perspectives on a controversial topic. D19-5007 @@ -9478,7 +9478,7 @@ A plethora of methods have been proposed to emphasize specific lexico-semantic r Unraveling the Search Space of Abusive Language in <fixed-case>W</fixed-case>ikipedia with Dynamic Lexicon Acquisition Wei-FanChen - KhalidAl Khatib + KhalidAl Khatib MatthiasHagen HenningWachsmuth BennoStein @@ -9514,7 +9514,7 @@ A plethora of methods have been proposed to emphasize specific lexico-semantic r KhushbuSaxena UsamaYaseen ThomasRunkler - HinrichSchütze + HinrichSchütze 92–97 This paper describes our system (MIC-CIS) details and results of participation in the fine grained propaganda detection shared task 2019. To address the tasks of sentence (SLC) and fragment level (FLC) propaganda detection, we explore different neural architectures (e.g., CNN, LSTM-CRF and BERT) and extract linguistic (e.g., part-of-speech, named entity, readability, sentiment, emotion, etc.), layout and topical features. Specifically, we have designed multi-granularity and multi-tasking neural architectures to jointly perform both the sentence and fragment level propaganda detection. Additionally, we investigate different ensemble schemes such as majority-voting, relax-voting, etc. to boost overall system performance. Compared to the other participating systems, our submissions are ranked 3rd and 4th in FLC and SLC tasks, respectively. D19-5012 @@ -9661,7 +9661,7 @@ Typo in Table 4 fixed to reflect correct recall of presented system.
Proceedings of the Second Workshop on Economics and Natural Language Processing D19-51 UdoHahn - VéroniqueHoste + VéroniqueHoste ZhuZhang Association for Computational Linguistics
Hong Kong
@@ -9678,7 +9678,7 @@ Typo in Table 4 fixed to reflect correct recall of presented system. BerkeOral ErdemEmekligil SeçilArslan - GülşenEryiğit + GülşenEryiğit 1–9 In order to automate banking processes (e.g. payments, money transfers, foreign trade), we need to extract banking transactions from different types of mediums such as faxes, e-mails, and scanners. Banking orders may be considered as complex documents since they contain quite complex relations compared to traditional datasets used in relation extraction research. In this paper, we present our method to extract intersentential, nested and complex relations from banking orders, and introduce a relation extraction method based on maximal clique factorization technique. We demonstrate 11% error reduction over previous methods. D19-5101 @@ -9687,7 +9687,7 @@ Typo in Table 4 fixed to reflect correct recall of presented system.
Financial Event Extraction Using <fixed-case>W</fixed-case>ikipedia-Based Weak Supervision - LiatEin-Dor + LiatEin-Dor ArielGera OrithToledo-Ronen AlonHalfon @@ -9704,7 +9704,7 @@ Typo in Table 4 fixed to reflect correct recall of presented system. A Time Series Analysis of Emotional Loading in Central Bank Statements - SvenBuechel + SvenBuechel SimonJunker ThoreSchlaak ClausMichelsen @@ -9795,7 +9795,7 @@ Typo in Table 4 fixed to reflect correct recall of presented system. AnoopKunchukuttan NobushigeDoi YusukeOda - OndřejBojar + OndřejBojar ShantipriyaParida IsaoGoto HidayaMino @@ -9846,7 +9846,7 @@ Typo in Table 4 fixed to reflect correct recall of presented system. Controlling <fixed-case>J</fixed-case>apanese Honorifics in <fixed-case>E</fixed-case>nglish-to-<fixed-case>J</fixed-case>apanese Neural Machine Translation WestonFeely EvaHasler - Adriàde Gispert + Adriàde Gispert 45–53 In the Japanese language different levels of honorific speech are used to convey respect, deference, humility, formality and social distance. In this paper, we present a method for controlling the level of formality of Japanese output in English-to-Japanese neural machine translation (NMT). By using heuristics to identify honorific verb forms, we classify Japanese sentences as being one of three levels of informal, polite, or formal speech in parallel text. The English source side is marked with a feature that identifies the level of honorific speech present in the Japanese target side. We use this parallel text to train an English-Japanese NMT model capable of producing Japanese translations in different honorific speech styles for the same English input sentence. D19-5203 @@ -9870,7 +9870,7 @@ Typo in Table 4 fixed to reflect correct recall of presented system. Sahinur RahmanLaskar Rohit PratapSingh ParthaPakray - SivajiBandyopadhyay + SivajiBandyopadhyay 62–67 With the widespread use of Machine Trans-lation (MT) techniques, attempt to minimizecommunication gap among people from di-verse linguistic backgrounds. We have par-ticipated in Workshop on Asian Transla-tion 2019 (WAT2019) multi-modal translationtask. There are three types of submissiontrack namely, multi-modal translation, Hindi-only image captioning and text-only transla-tion for English to Hindi translation. The mainchallenge is to provide a precise MT output. The multi-modal concept incorporates textualand visual features in the translation task. Inthis work, multi-modal translation track re-lies on pre-trained convolutional neural net-works (CNN) with Visual Geometry Grouphaving 19 layered (VGG19) to extract imagefeatures and attention-based Neural MachineTranslation (NMT) system for translation. The merge-model of recurrent neural network(RNN) and CNN is used for the Hindi-onlyimage captioning. The text-only translationtrack is based on the transformer model of theNMT system. The official results evaluated atWAT2019 translation task, which shows thatour multi-modal NMT system achieved Bilin-gual Evaluation Understudy (BLEU) score20.37, Rank-based Intuitive Bilingual Eval-uation Score (RIBES) 0.642838, Adequacy-Fluency Metrics (AMFM) score 0.668260 forchallenge test data and BLEU score 40.55,RIBES 0.760080, AMFM score 0.770860 forevaluation test data in English to Hindi multi-modal translation respectively. D19-5205 @@ -9885,7 +9885,7 @@ Typo in Table 4 fixed to reflect correct recall of presented system. ChenchenDing AtsushiFujita MasaoUtiyama - EiichiroSumita + EiichiroSumita 68–75 This paper presents the NICT’s supervised and unsupervised machine translation systems for the WAT2019 Myanmar-English and Khmer-English translation tasks. For all the translation directions, we built state-of-the-art supervised neural (NMT) and statistical (SMT) machine translation systems, using monolingual data cleaned and normalized. Our combination of NMT and SMT performed among the best systems for the four translation directions. We also investigated the feasibility of unsupervised machine translation for low-resource and distant language pairs and confirmed observations of previous work showing that unsupervised MT is still largely unable to deal with them. D19-5206 @@ -9895,7 +9895,7 @@ Typo in Table 4 fixed to reflect correct recall of presented system. <fixed-case>NICT</fixed-case>’s participation to <fixed-case>WAT</fixed-case> 2019: Multilingualism and Multi-step Fine-Tuning for Low Resource <fixed-case>NMT</fixed-case> RajDabre - EiichiroSumita + EiichiroSumita 76–80 In this paper we describe our submissions to WAT 2019 for the following tasks: English–Tamil translation and Russian–Japanese translation. Our team,“NICT-5”, focused on multilingual domain adaptation and back-translation for Russian–Japanese translation and on simple fine-tuning for English–Tamil translation . We noted that multi-stage fine tuning is essential in leveraging the power of multilingualism for an extremely low-resource language like Russian–Japanese. Furthermore, we can improve the performance of such a low-resource language pair by exploiting a small but in-domain monolingual corpus via back-translation. We managed to obtain second rank in both tasks for all translation directions. D19-5207 @@ -9925,7 +9925,7 @@ Typo in Table 4 fixed to reflect correct recall of presented system. KehaiChen ChenchenDing MasaoUtiyama - EiichiroSumita + EiichiroSumita 90–93 This paper presents the NICT’s participation (team ID: NICT) in the 6th Workshop on Asian Translation (WAT-2019) shared translation task, specifically Myanmar (Burmese) - English task in both translation directions. We built neural machine translation (NMT) systems for these tasks. Our NMT systems were trained with language model pretraining. Back-translation technology is adopted to NMT. Our NMT systems rank the third in English-to-Myanmar and the second in Myanmar-to-English according to BLEU score. D19-5209 @@ -10012,7 +10012,7 @@ Typo in Table 4 fixed to reflect correct recall of presented system. <fixed-case>LTRC</fixed-case>-<fixed-case>MT</fixed-case> Simple & Effective <fixed-case>H</fixed-case>indi-<fixed-case>E</fixed-case>nglish Neural Machine Translation Systems at <fixed-case>WAT</fixed-case> 2019 VikrantGoyal - Dipti MisraSharma + Dipti MisraSharma 137–140 This paper describes the Neural Machine Translation systems of IIIT-Hyderabad (LTRC-MT) for WAT 2019 Hindi-English shared task. We experimented with both Recurrent Neural Networks & Transformer architectures. We also show the results of our experiments of training NMT models using additional data via backtranslation. D19-5216 @@ -10022,7 +10022,7 @@ Typo in Table 4 fixed to reflect correct recall of presented system. Long Warm-up and Self-Training: Training Strategies of <fixed-case>NICT</fixed-case>-2 <fixed-case>NMT</fixed-case> System at <fixed-case>WAT</fixed-case>-2019 KenjiImamura - EiichiroSumita + EiichiroSumita 141–146 This paper describes the NICT-2 neural machine translation system at the 6th Workshop on Asian Translation. This system employs the standard Transformer model but features the following two characteristics. One is the long warm-up strategy, which performs a longer warm-up of the learning rate at the start of the training than conventional approaches. Another is that the system introduces self-training approaches based on multiple back-translations generated by sampling. We participated in three tasks—ASPEC.en-ja, ASPEC.ja-en, and TDDC.ja-en—using this system. D19-5217 @@ -10077,7 +10077,7 @@ Typo in Table 4 fixed to reflect correct recall of presented system. <fixed-case>NLPRL</fixed-case> at <fixed-case>WAT</fixed-case>2019: Transformer-based <fixed-case>T</fixed-case>amil – <fixed-case>E</fixed-case>nglish Indic Task Neural Machine Translation System AmitKumar - Anil KumarSingh + Anil KumarSingh 171–174 This paper describes the Machine Translation system for Tamil-English Indic Task organized at WAT 2019. We use Transformer- based architecture for Neural Machine Translation. D19-5222 @@ -10098,8 +10098,8 @@ Typo in Table 4 fixed to reflect correct recall of presented system. <fixed-case>WAT</fixed-case>2019: <fixed-case>E</fixed-case>nglish-<fixed-case>H</fixed-case>indi Translation on <fixed-case>H</fixed-case>indi Visual Genome Dataset LoitongbamSanayai Meetei - Thoudam DorenSingh - SivajiBandyopadhyay + Thoudam DorenSingh + SivajiBandyopadhyay 181–188 A multimodal translation is a task of translating a source language to a target language with the help of a parallel text corpus paired with images that represent the contextual details of the text. In this paper, we carried out an extensive comparison to evaluate the benefits of using a multimodal approach on translating text in English to a low resource language, Hindi as a part of WAT2019 shared task. We carried out the translation of English to Hindi in three separate tasks with both the evaluation and challenge dataset. First, by using only the parallel text corpora, then through an image caption generation approach and, finally with the multimodal approach. Our experiment shows a significant improvement in the result with the multimodal approach than the other approach. D19-5224 @@ -10110,8 +10110,8 @@ Typo in Table 4 fixed to reflect correct recall of presented system. <fixed-case>SYSTRAN</fixed-case> @ <fixed-case>WAT</fixed-case> 2019: <fixed-case>R</fixed-case>ussian-<fixed-case>J</fixed-case>apanese News Commentary task JitaoXu TuAnhNguyen - MinhQuangPham - JosepCrego + MinhQuangPham + JosepCrego JeanSenellart 189–194 This paper describes Systran’s submissions to WAT 2019 Russian-Japanese News Commentary task. A challenging translation task due to the extremely low resources available and the distance of the language pair. We have used the neural Transformer architecture learned over the provided resources and we carried out synthetic data generation experiments which aim at alleviating the data scarcity problem. Results indicate the suitability of the data augmentation experiments, enabling our systems to rank first according to automatic evaluations. @@ -10134,7 +10134,7 @@ Typo in Table 4 fixed to reflect correct recall of presented system. Sentiment Aware Neural Machine Translation ChengleiSi KuiWu - Ai TiAw + Ai TiAw Min-YenKan 200–206 Sentiment ambiguous lexicons refer to words where their polarity depends strongly on con- text. As such, when the context is absent, their translations or their embedded sentence ends up (incorrectly) being dependent on the training data. While neural machine translation (NMT) has achieved great progress in recent years, most systems aim to produce one single correct translation for a given source sentence. We investigate the translation variation in two sentiment scenarios. We perform experiments to study the preservation of sentiment during translation with three different methods that we propose. We conducted tests with both sentiment and non-sentiment bearing contexts to examine the effectiveness of our methods. We show that NMT can generate both positive- and negative-valent translations of a source sentence, based on a given input sentiment label. Empirical evaluations show that our valence-sensitive embedding (VSE) method significantly outperforms a sequence-to-sequence (seq2seq) baseline, both in terms of BLEU score and ambiguous word translation accuracy in test, given non-sentiment bearing contexts. @@ -10146,8 +10146,8 @@ Typo in Table 4 fixed to reflect correct recall of presented system. Overcoming the Rare Word Problem for low-resource language pairs in Neural Machine Translation Thi-VinhNgo Thanh-LeHa - Phuong-ThaiNguyen - Le-MinhNguyen + Phuong-ThaiNguyen + Le-MinhNguyen 207–214 Among the six challenges of neural machine translation (NMT) coined by (Koehn and Knowles, 2017), rare-word problem is considered the most severe one, especially in translation of low-resource languages. In this paper, we propose three solutions to address the rare words in neural machine translation systems. First, we enhance source context to predict the target words by connecting directly the source embeddings to the output of the attention component in NMT. Second, we propose an algorithm to learn morphology of unknown words for English in supervised way in order to minimize the adverse effect of rare-word problem. Finally, we exploit synonymous relation from the WordNet to overcome out-of-vocabulary (OOV) problem of NMT. We evaluate our approaches on two low-resource language pairs: English-Vietnamese and Japanese-Vietnamese. In our experiments, we have achieved significant improvements of up to roughly +1.0 BLEU points in both language pairs. D19-5228 @@ -10174,7 +10174,7 @@ Typo in Table 4 fixed to reflect correct recall of presented system. D19-53 DmitryUstalov SwapnaSomasundaran - PeterJansen + PeterJansen GoranGlavaš MartinRiedl MihaiSurdeanu @@ -10226,9 +10226,9 @@ Typo in Table 4 fixed to reflect correct recall of presented system. Neural Speech Translation using Lattice Transformations and Graph Networks - DanielBeck - TrevorCohn - GholamrezaHaffari + DanielBeck + TrevorCohn + GholamrezaHaffari 26–31 Speech translation systems usually follow a pipeline approach, using word lattices as an intermediate representation. However, previous work assume access to the original transcriptions used to train the ASR system, which can limit applicability in real scenarios. In this work we propose an approach for speech translation through lattice transformations and neural models based on graph networks. Experimental results show that our approach reaches competitive performance without relying on transcriptions, while also being orders of magnitude faster than previous work. D19-5304 @@ -10251,7 +10251,7 @@ Typo in Table 4 fixed to reflect correct recall of presented system. MokanaranganThayaparan MarcoValentino ViktorSchlegel - AndréFreitas + AndréFreitas 42–51 Recent advances in reading comprehension have resulted in models that surpass human performance when the answer is contained in a single, continuous passage of text. However, complex Question Answering (QA) typically requires multi-hop reasoning - i.e. the integration of supporting facts from different sources, to infer the correct answer. This paper proposes Document Graph Network (DGN), a message passing architecture for the identification of supporting facts over a graph-structured representation of text. The evaluation on HotpotQA shows that DGN obtains competitive results when compared to a reading comprehension baseline operating on raw text, confirming the relevance of structured representations for supporting multi-hop reasoning. D19-5306 @@ -10351,7 +10351,7 @@ Typo in Table 4 fixed to reflect correct recall of presented system. Evaluating Research Novelty Detection: Counterfactual Approaches - Reinald KimAmplayo + Reinald KimAmplayo Seung-wonHwang MinSong 124–133 @@ -10364,7 +10364,7 @@ Typo in Table 4 fixed to reflect correct recall of presented system. Do Sentence Interactions Matter? Leveraging Sentence Level Representations for Fake News Classification VaibhavVaibhav RaghuramMandyam - EduardHovy + EduardHovy 134–139 The rising growth of fake news and misleading information through online media outlets demands an automatic method for detecting such news articles. Of the few limited works which differentiate between trusted vs other types of news article (satire, propaganda, hoax), none of them model sentence interactions within a document. We observe an interesting pattern in the way sentences interact with each other across different kind of news articles. To capture this kind of information for long news articles, we propose a graph neural network-based model which does away with the need of feature engineering for fine grained fake news classification. Through experiments, we show that our proposed method beats strong neural baselines and achieves state-of-the-art accuracy on existing datasets. Moreover, we establish the generalizability of our model by evaluating its performance in out-of-domain scenarios. Code is available at https://github.com/MysteryVaibhav/fake_news_semantics. D19-5316 @@ -10406,7 +10406,7 @@ Typo in Table 4 fixed to reflect correct recall of presented system. JieChen MariaChang LingfeiWu - MichaelWitbrock + MichaelWitbrock 159–163 Semantic parsing is a fundamental problem in natural language understanding, as it involves the mapping of natural language to structured forms such as executable queries or logic-like knowledge representations. Existing deep learning approaches for semantic parsing have shown promise on a variety of benchmark data sets, particularly on text-to-SQL parsing. However, most text-to-SQL parsers do not generalize to unseen data sets in different domains. In this paper, we propose a new cross-domain learning scheme to perform text-to-SQL translation and demonstrate its use on Spider, a large-scale cross-domain text-to-SQL data set. We improve upon a state-of-the-art Spider model, SyntaxSQLNet, by constructing a graph of column names for all databases and using graph neural networks to compute their embeddings. The resulting embeddings offer better cross-domain representations and SQL queries, as evidenced by substantial improvement on the Spider data set compared to SyntaxSQLNet. D19-5319 @@ -10427,7 +10427,7 @@ Typo in Table 4 fixed to reflect correct recall of presented system. Node Embeddings for Graph Merging: Case of Knowledge Graph Construction IdaSzubert - MarkSteedman + MarkSteedman 172–176 Combining two graphs requires merging the nodes which are counterparts of each other. In this process errors occur, resulting in incorrect merging or incorrect failure to merge. We find a high prevalence of such errors when using AskNET, an algorithm for building Knowledge Graphs from text corpora. AskNET node matching method uses string similarity, which we propose to replace with vector embedding similarity. We explore graph-based and word-based embedding models and show an overall error reduction of from 56% to 23.6%, with a reduction of over a half in both types of incorrect node matching. D19-5321 @@ -10437,7 +10437,7 @@ Typo in Table 4 fixed to reflect correct recall of presented system. <fixed-case>DB</fixed-case>ee: A Database for Creating and Managing Knowledge Graphs and Embeddings ViktorSchlegel - AndréFreitas + AndréFreitas 177–185 This paper describes DBee, a database to support the construction of data-intensive AI applications. DBee provides a unique data model which operates jointly over large-scale knowledge graphs (KGs) and embedding vector spaces (VSs). This model supports queries which exploit the semantic properties of both types of representations (KGs and VSs). Additionally, DBee aims to facilitate the construction of KGs and VSs, by providing a library of generators, which can be used to create, integrate and transform data into KGs and VSs. D19-5322 @@ -10460,7 +10460,7 @@ Typo in Table 4 fixed to reflect correct recall of presented system. Proceedings of the 2nd Workshop on New Frontiers in Summarization D19-54 LuWang - Jackie Chi KitCheung + Jackie Chi KitCheung GiuseppeCarenini FeiLiu Association for Computational Linguistics @@ -10476,8 +10476,8 @@ Typo in Table 4 fixed to reflect correct recall of presented system. Answering Naturally: Factoid to Full length Answer Generation VaishaliPal - ManishShrivastava - IrshadBhat + ManishShrivastava + IrshadBhat 1–9 In recent years, the task of Question Answering over passages, also pitched as a reading comprehension, has evolved into a very active research area. A reading comprehension system extracts a span of text, comprising of named entities, dates, small phrases, etc., which serve as the answer to a given question. However, these spans of text would result in an unnatural reading experience in a conversational system. Usually, dialogue systems solve this issue by using template-based language generation. These systems, though adequate for a domain specific task, are too restrictive and predefined for a domain independent system. In order to present the user with a more conversational experience, we propose a pointer generator based full-length answer generator which can be used with most QA systems. Our system generates a full length answer given a question and the extracted factoid/span answer without relying on the passage from where the answer was extracted. We also present a dataset of 315000 question, factoid answer and full length answer triples. We have evaluated our system using ROUGE-1,2,L and BLEU and achieved 74.05 BLEU score and 86.25 Rogue-L score. D19-5401 @@ -10581,7 +10581,7 @@ Typo in Table 4 fixed to reflect correct recall of presented system. DanqingWang PengfeiLiu XipengQiu - XuanjingHuang + XuanjingHuang 80–89 In this paper, we take stock of the current state of summarization datasets and explore how different factors of datasets influence the generalization behaviour of neural extractive summarization models. Specifically, we first propose several properties of datasets, which matter for the generalization of summarization models. Then we build the connection between priors residing in datasets and model designs, analyzing how different properties of datasets influence the choices of model structure design and training methods. Finally, by taking a typical dataset as an example, we rethink the process of the model design based on the experience of the above analysis. We demonstrate that when we have a deep understanding of the characteristics of datasets, a simple approach can bring significant improvements to the existing state-of-the-art model. D19-5410 @@ -10591,7 +10591,7 @@ Typo in Table 4 fixed to reflect correct recall of presented system. <fixed-case>G</fixed-case>lobal <fixed-case>V</fixed-case>oices: Crossing Borders in Automatic News Summarization KhanhNguyen - HalDaumé III + HalDaumé III 90–97 We construct Global Voices, a multilingual dataset for evaluating cross-lingual summarization methods. We extract social-network descriptions of Global Voices news articles to cheaply collect evaluation data for into-English and from-English summarization in 15 languages. Especially, for the into-English summarization task, we crowd-source a high-quality evaluation dataset based on guidelines that emphasize accuracy, coverage, and understandability. To ensure the quality of this dataset, we collect human ratings to filter out bad summaries, and conduct a survey on humans, which shows that the remaining summaries are preferred over the social-network summaries. We study the effect of translation quality in cross-lingual summarization, comparing a translate-then-summarize approach with several baselines. Our results highlight the limitations of the ROUGE metric that are overlooked in monolingual summarization. D19-5411 @@ -10631,7 +10631,7 @@ Typo in Table 4 fixed to reflect correct recall of presented system. AbramHandler PremkumarGaneshkumar BrendanO’Connor - MohamedAlTantawy + MohamedAlTantawy 111–115 Concept maps are visual summaries, structured as directed graphs: important concepts from a dataset are displayed as vertexes, and edges between vertexes show natural language descriptions of the relationships between the concepts on the map. Thus far, preliminary attempts at automatically creating concept maps have focused on building static summaries. However, in interactive settings, users will need to dynamically investigate particular relationships between pairs of concepts. For instance, a historian using a concept map browser might decide to investigate the relationship between two politicians in a news archive. We present a model which responds to such queries by returning one or more short, importance-ranked, natural language descriptions of the relationship between two requested concepts, for display in a visual interface. Our model is trained on a new public dataset, collected for this task. D19-5414 @@ -10641,7 +10641,7 @@ Typo in Table 4 fixed to reflect correct recall of presented system. Exploiting Discourse-Level Segmentation for Extractive Summarization ZhengyuanLiu - NancyChen + NancyChen 116–121 Extractive summarization selects and concatenates the most essential text spans in a document. Most, if not all, neural approaches use sentences as the elementary unit to select content for summarization. However, semantic segments containing supplementary information or descriptive details are often nonessential in the generated summaries. In this work, we propose to exploit discourse-level segmentation as a finer-grained means to more precisely pinpoint the core content in a document. We investigate how the sub-sentential segmentation improves extractive summarization performance when content selection is modeled through two basic neural network architectures and a deep bi-directional transformer. Experiment results on the CNN/Daily Mail dataset show that discourse-level segmentation is effective in both cases. In particular, we achieve state-of-the-art performance when discourse-level segmentation is combined with our adapted contextual representation model. D19-5415 @@ -10655,7 +10655,7 @@ Typo in Table 4 fixed to reflect correct recall of presented system. D19-55 WeiXu AlanRitter - TimBaldwin + TimBaldwin AfshinRahimi Association for Computational Linguistics
Hong Kong, China
@@ -10682,7 +10682,7 @@ Typo in Table 4 fixed to reflect correct recall of presented system. Formality Style Transfer for Noisy, User-generated Conversations: Extracting Labeled, Parallel Data from Unlabeled Corpora IsakCzeresnia Etinger - Alan WBlack + Alan WBlack 11–16 Typical datasets used for style transfer in NLP contain aligned pairs of two opposite extremes of a style. As each existing dataset is sourced from a specific domain and context, most use cases will have a sizable mismatch from the vocabulary and sentence structures of any dataset available. This reduces the performance of the style transfer, and is particularly significant for noisy, user-generated text. To solve this problem, we show a technique to derive a dataset of aligned pairs (style-agnostic vs stylistic sentences) from an unlabeled corpus by using an auxiliary dataset, allowing for in-domain training. We test the technique with the Yahoo Formality Dataset and 6 novel datasets we produced, which consist of scripts from 5 popular TV-shows (Friends, Futurama, Seinfeld, Southpark, Stargate SG-1) and the Slate Star Codex online forum. We gather 1080 human evaluations, which show that our method produces a sizable change in formality while maintaining fluency and context; and that it considerably outperforms OpenNMT’s Seq2Seq model directly trained on the Yahoo Formality Dataset. Additionally, we publish the full pipeline code and our novel datasets. D19-5502 @@ -10703,8 +10703,8 @@ Typo in Table 4 fixed to reflect correct recall of presented system. Personalizing Grammatical Error Correction: Adaptation to Proficiency Level and <fixed-case>L</fixed-case>1 - MariaNadejde - JoelTetreault + MariaNadejde + JoelTetreault 27–33 Grammar error correction (GEC) systems have become ubiquitous in a variety of software applications, and have started to approach human-level performance for some datasets. However, very little is known about how to efficiently personalize these systems to the user’s characteristics, such as their proficiency level and first language, or to emerging domains of text. We present the first results on adapting a general purpose neural GEC system to both the proficiency level and the first language of a writer, using only a few thousand annotated sentences. Our study is the broadest of its kind, covering five proficiency levels and twelve different languages, and comparing three different adaptation scenarios: adapting to the proficiency level only, to the first language only, or to both aspects simultaneously. We show that tailoring to both scenarios achieves the largest performance improvement (3.6 F0.5) relative to a strong baseline. D19-5504 @@ -10738,7 +10738,7 @@ Typo in Table 4 fixed to reflect correct recall of presented system. Character-Based Models for Adversarial Phone Extraction: Preventing Human Sex Trafficking - NathanaelChambers + NathanaelChambers TimothyForman CatherineGriswold KevinLu @@ -10786,7 +10786,7 @@ Typo in Table 4 fixed to reflect correct recall of presented system. VinayakAthavale AayushNaik RajasVanjape - ManishShrivastava + ManishShrivastava 84–93 We introduce the task of algorithm class prediction for programming word problems. A programming word problem is a problem written in natural language, which can be solved using an algorithm or a program. We define classes of various programming word problems which correspond to the class of algorithms required to solve the problem. We present four new datasets for this task, two multiclass datasets with 550 and 1159 problems each and two multilabel datasets having 3737 and 3960 problems each. We pose the problem as a text classification problem and train neural network and non-neural network based models on this task. Our best performing classifier gets an accuracy of 62.7 percent for the multiclass case on the five class classification dataset, Codeforces Multiclass-5 (CFMC5). We also do some human-level analysis and compare human performance with that of our text classification models. Our best classifier has an accuracy only 9 percent lower than that of a human on this task. To the best of our knowledge, these are the first reported results on such a task. We make our code and datasets publicly available. D19-5511 @@ -10797,7 +10797,7 @@ Typo in Table 4 fixed to reflect correct recall of presented system. Automatic identification of writers’ intentions: Comparing different methods for predicting relationship goals in online dating profile texts Chrisvan der Lee Tessvan der Zanden - EmielKrahmer + EmielKrahmer MariaMos AlexanderSchouten 94–100 @@ -10810,7 +10810,7 @@ Typo in Table 4 fixed to reflect correct recall of presented system. Contextualized Word Representations from Distant Supervision with and for <fixed-case>NER</fixed-case> AbbasGhaddar - PhillippeLanglais + PhillippeLanglais 101–108 We describe a special type of deep contextualized word representation that is learned from distant supervision annotations and dedicated to named entity recognition. Our extensive experiments on 7 datasets show systematic gains across all domains over strong baselines, and demonstrate that our representation is complementary to previously proposed embeddings. We report new state-of-the-art results on CONLL and ONTONOTES datasets. D19-5513 @@ -10943,7 +10943,7 @@ Typo in Table 4 fixed to reflect correct recall of presented system. Modelling Uncertainty in Collaborative Document Quality Assessment AiliShen - DanielBeck + DanielBeck BaharSalehi JianzhongQi TimothyBaldwin @@ -10955,7 +10955,7 @@ Typo in Table 4 fixed to reflect correct recall of presented system. Conceptualisation and Annotation of Drug Nonadherence Information for Knowledge Extraction from Patient-Generated Texts - AnjaBelz + AnjaBelz RichardHoile ElizabethFord AzamMullick @@ -10970,7 +10970,7 @@ Typo in Table 4 fixed to reflect correct recall of presented system. Shirley AnugrahHayati AditiChaudhary NaokiOtani - Alan WBlack + Alan WBlack 212–216 Irony detection is an important task with applications in identification of online abuse and harassment. With the ubiquitous use of non-verbal cues such as emojis in social media, in this work we aim to study the role of these structures in irony detection. Since the existing irony detection datasets have <10% ironic tweets with emoji, classifiers trained on them are insensitive to emojis. We propose an automated pipeline for creating a more balanced dataset. D19-5527 @@ -11011,7 +11011,7 @@ Typo in Table 4 fixed to reflect correct recall of presented system. Robustness to Capitalization Errors in Named Entity Recognition SravanBodapati HyokunYun - YaserAl-Onaizan + YaserAl-Onaizan 237–242 Robustness to capitalization errors is a highly desirable characteristic of named entity recognizers, yet we find standard models for the task are surprisingly brittle to such noise. Existing methods to improve robustness to the noise completely discard given orthographic information, which significantly degrades their performance on well-formed text. We propose a simple alternative approach based on data augmentation, which allows the model to learn to utilize or ignore orthographic information depending on its usefulness in the context. It achieves competitive robustness to capitalization errors while making negligible compromise to its performance on well-formed text and significantly improving generalization power on noisy user-generated text. Our experiments clearly and consistently validate our claim across different types of machine learning models, languages, and dataset sizes. D19-5531 @@ -11054,7 +11054,7 @@ Typo in Table 4 fixed to reflect correct recall of presented system. Incremental processing of noisy user utterances in the spoken language understanding task StefanConstantin JanNiehues - AlexWaibel + AlexWaibel 265–274 The state-of-the-art neural network architectures make it possible to create spoken language understanding systems with high quality and fast processing time. One major challenge for real-world applications is the high latency of these systems caused by triggered actions with high executions times. If an action can be separated into subactions, the reaction time of the systems can be improved through incremental processing of the user utterance and starting subactions while the utterance is still being uttered. In this work, we present a model-agnostic method to achieve high quality in processing incrementally produced partial utterances. Based on clean and noisy versions of the ATIS dataset, we show how to create datasets with our method to create low-latency natural language understanding components. We get improvements of up to 47.91 absolute percentage points in the metric F1-score. D19-5535 @@ -11065,8 +11065,8 @@ Typo in Table 4 fixed to reflect correct recall of presented system. Benefits of Data Augmentation for <fixed-case>NMT</fixed-case>-based Text Normalization of User-Generated Content ClaudiaMatos Veliz - OrpheeDe Clercq - VeroniqueHoste + OrpheeDe Clercq + VeroniqueHoste 275–285 One of the most persistent characteristics of written user-generated content (UGC) is the use of non-standard words. This characteristic contributes to an increased difficulty to automatically process and analyze UGC. Text normalization is the task of transforming lexical variants to their canonical forms and is often used as a pre-processing step for conventional NLP tasks in order to overcome the performance drop that NLP systems experience when applied to UGC. In this work, we follow a Neural Machine Translation approach to text normalization. To train such an encoder-decoder model, large parallel training corpora of sentence pairs are required. However, obtaining large data sets with UGC and their normalized version is not trivial, especially for languages other than English. In this paper, we explore how to overcome this data bottleneck for Dutch, a low-resource language. We start off with a small publicly available parallel Dutch data set comprising three UGC genres and compare two different approaches. The first is to manually normalize and add training data, a money and time-consuming task. The second approach is a set of data augmentation techniques which increase data size by converting existing resources into synthesized non-standard forms. Our results reveal that, while the different approaches yield similar results regarding the normalization issues in the test set, they also introduce a large amount of over-normalizations. D19-5536 @@ -11086,7 +11086,7 @@ Typo in Table 4 fixed to reflect correct recall of presented system. Towards Automated Semantic Role Labelling of <fixed-case>H</fixed-case>indi-<fixed-case>E</fixed-case>nglish Code-Mixed Tweets RiyaPal - DiptiSharma + DiptiSharma 291–296 We present a system for automating Semantic Role Labelling of Hindi-English code-mixed tweets. We explore the issues posed by noisy, user generated code-mixed social media data. We also compare the individual effect of various linguistic features used in our system. Our proposed model is a 2-step system for automated labelling which gives an overall accuracy of 84% for Argument Classification, marking a 10% increase over the existing rule-based baseline model. This is the first attempt at building a statistical Semantic Role Labeller for Hindi-English code-mixed data, to the best of our knowledge. D19-5538 @@ -11096,8 +11096,8 @@ Typo in Table 4 fixed to reflect correct recall of presented system. Enhancing <fixed-case>BERT</fixed-case> for Lexical Normalization BenjaminMuller - BenoitSagot - DjaméSeddah + BenoitSagot + DjaméSeddah 297–306 Language model-based pre-trained representations have become ubiquitous in natural language processing. They have been shown to significantly improve the performance of neural models on a great variety of tasks. However, it remains unclear how useful those general models can be in handling non-canonical text. In this article, focusing on User Generated Content (UGC), we study the ability of BERT to perform lexical normalisation. Our contribution is simple: by framing lexical normalisation as a token prediction task, by enhancing its architecture and by carefully fine-tuning it, we show that BERT can be a competitive lexical normalisation model without the need of any UGC resources aside from 3,000 training sentences. To the best of our knowledge, it is the first work done in adapting and analysing the ability of this model to handle noisy UGC data. D19-5539 @@ -11120,7 +11120,7 @@ Typo in Table 4 fixed to reflect correct recall of presented system. Improving Multi-label Emotion Classification by Integrating both General and Domain-specific Knowledge WenhaoYing RongXiang - QinLu + QinLu 316–321 Deep learning based general language models have achieved state-of-the-art results in many popular tasks such as sentiment analysis and QA tasks. Text in domains like social media has its own salient characteristics. Domain knowledge should be helpful in domain relevant tasks. In this work, we devise a simple method to obtain domain knowledge and further propose a method to integrate domain knowledge with general knowledge based on deep language models to improve performance of emotion classification. Experiments on Twitter data show that even though a deep language model fine-tuned by a target domain data has attained comparable results to that of previous state-of-the-art models, this fine-tuned model can still benefit from our extracted domain knowledge to obtain more improvement. This highlights the importance of making use of domain knowledge in domain-specific applications. D19-5541 @@ -11228,7 +11228,7 @@ Typo in Table 4 fixed to reflect correct recall of presented system. YeonSeonwoo SungjoonPark DongkwanKim - AliceOh + AliceOh 387–396 Additive compositionality of word embedding models has been studied from empirical and theoretical perspectives. Existing research on justifying additive compositionality of existing word embedding models requires a rather strong assumption of uniform word distribution. In this paper, we relax that assumption and propose more realistic conditions for proving additive compositionality, and we develop a novel word and sub-word embedding model that satisfies additive compositionality under those conditions. We then empirically show our model’s improved semantic representation performance on word similarity and noisy sentence similarity. D19-5551 @@ -11250,7 +11250,7 @@ Typo in Table 4 fixed to reflect correct recall of presented system. Phonetic Normalization for Machine Translation of User Generated Content José CarlosRosales Núñez - DjaméSeddah + DjaméSeddah GuillaumeWisniewski 407–416 We present an approach to correct noisy User Generated Content (UGC) in French aiming to produce a pretreatement pipeline to improve Machine Translation for this kind of non-canonical corpora. In order to do so, we have implemented a character-based neural model phonetizer to produce IPA pronunciations of words. In this way, we intend to correct grammar, vocabulary and accentuation errors often present in noisy UGC corpora. Our method leverages on the fact that some errors are due to confusion induced by words with similar pronunciation which can be corrected using a phonetic look-up table to produce normalization candidates. These potential corrections are then encoded in a lattice and ranked using a language model to output the most probable corrected phrase. Compare to using other phonetizers, our method boosts a transformer-based machine translation system on UGC. @@ -11321,7 +11321,7 @@ Typo in Table 4 fixed to reflect correct recall of presented system. AndrewFinch HiroakiHayashi IoannisKonstas - ThangLuong + ThangLuong GrahamNeubig YusukeOda KatsuhitoSudoh @@ -11364,7 +11364,7 @@ Typo in Table 4 fixed to reflect correct recall of presented system. Recycling a Pre-trained <fixed-case>BERT</fixed-case> Encoder for Neural Machine Translation KenjiImamura - EiichiroSumita + EiichiroSumita 23–31 In this paper, a pre-trained Bidirectional Encoder Representations from Transformers (BERT) model is applied to Transformer-based neural machine translation (NMT). In contrast to monolingual tasks, the number of unlearned model parameters in an NMT decoder is as huge as the number of learned parameters in the BERT model. To train all the models appropriately, we employ two-stage optimization, which first trains only the unlearned parameters by freezing the BERT model, and then fine-tunes all the sub-models. In our experiments, stable two-stage optimization was achieved, in contrast the BLEU scores of direct fine-tuning were extremely low. Consequently, the BLEU scores of the proposed method were better than those of the Transformer base model and the same model without pre-training. Additionally, we confirmed that NMT with the BERT encoder is more effective in low-resource settings. D19-5603 @@ -11377,7 +11377,7 @@ Typo in Table 4 fixed to reflect correct recall of presented system. YizheZhang SudhaRao ChrisBrockett - SungjinLee + SungjinLee 32–43 Ambiguous user queries in search engines result in the retrieval of documents that often span multiple topics. One potential solution is for the search engine to generate multiple refined queries, each of which relates to a subset of the documents spanning the same topic. A preliminary step towards this goal is to generate a question that captures common concepts of multiple documents. We propose a new task of generating common question from multiple documents and present simple variant of an existing multi-source encoder-decoder framework, called the Multi-Source Question Generator (MSQG). We first train an RNN-based single encoder-decoder generator from (single document, question) pairs. At test time, given multiple documents, the Distribute step of our MSQG model predicts target word distributions for each document using the trained model. The Aggregate step aggregates these distributions to generate a common question. This simple yet effective strategy significantly outperforms several existing baseline models applied to the new task when evaluated using automated metrics and human judgments on the MS-MARCO-QA dataset. D19-5604 @@ -11503,7 +11503,7 @@ Typo in Table 4 fixed to reflect correct recall of presented system. Enhanced Transformer Model for Data-to-Text Generation LiGong - JosepCrego + JosepCrego JeanSenellart 148–156 Neural models have recently shown significant progress on data-to-text generation tasks in which descriptive texts are generated conditioned on database records. In this work, we present a new Transformer-based data-to-text generation model which learns content selection and summary generation in an end-to-end fashion. We introduce two extensions to the baseline transformer model: First, we modify the latent representation of the input, which helps to significantly improve the content correctness of the output summary; Second, we include an additional learning objective that accounts for content selection modelling. In addition, we propose two data augmentation methods that succeed to further improve performance of the resulting generation models. Evaluation experiments show that our final model outperforms current state-of-the-art systems as measured by different metrics: BLEU, content selection precision and content ordering. We made publicly available the transformer extension presented in this paper. @@ -11539,7 +11539,7 @@ Typo in Table 4 fixed to reflect correct recall of presented system. Adaptively Scheduled Multitask Learning: The Case of Low-Resource Neural Machine Translation PooryaZaremoodi - GholamrezaHaffari + GholamrezaHaffari 177–186 Neural Machine Translation (NMT), a data-hungry technology, suffers from the lack of bilingual data in low-resource scenarios. Multitask learning (MTL) can alleviate this issue by injecting inductive biases into NMT, using auxiliary syntactic and semantic tasks. However, an effective training schedule is required to balance the importance of tasks to get the best use of the training signal. The role of training schedule becomes even more crucial in biased-MTL where the goal is to improve one (or a subset) of tasks the most, e.g. translation quality. Current approaches for biased-MTL are based on brittle hand-engineered heuristics that require trial and error, and should be (re-)designed for each learning scenario. To the best of our knowledge, ours is the first work on adaptively and dynamically changing the training schedule in biased-MTL. We propose a rigorous approach for automatically reweighing the training data of the main and auxiliary tasks throughout the training process based on their contributions to the generalisability of the main NMT task. Our experiments on translating from English to Vietnamese/Turkish/Spanish show improvements of up to +1.2 BLEU points, compared to strong baselines. Additionally, our analyses shed light on the dynamic of needs throughout the training of NMT: from syntax to semantic. D19-5618 @@ -11550,7 +11550,7 @@ Typo in Table 4 fixed to reflect correct recall of presented system. On the Importance of Word Boundaries in Character-level Neural Machine Translation DuyguAtaman OrhanFirat - Mattia A.Di Gangi + Mattia A.Di Gangi MarcelloFederico AlexandraBirch 187–193 @@ -11620,7 +11620,7 @@ Typo in Table 4 fixed to reflect correct recall of presented system. Auto-Sizing the Transformer Network: Improving Speed, Efficiency, and Performance for Low-Resource Machine Translation KentonMurray JefferyKinnison - Toan Q.Nguyen + Toan Q.Nguyen WalterScheirer DavidChiang 231–240 @@ -11652,7 +11652,7 @@ Typo in Table 4 fixed to reflect correct recall of presented system. Monash University’s Submissions to the <fixed-case>WNGT</fixed-case> 2019 Document Translation Task SameenMaruf - GholamrezaHaffari + GholamrezaHaffari 256–261 We describe the work of Monash University for the shared task of Rotowire document translation organised by the 3rd Workshop on Neural Generation and Translation (WNGT 2019). We submitted systems for both directions of the English-German language pair. Our main focus is on employing an established document-level neural machine translation model for this task. We achieve a BLEU score of 39.83 (41.46 BLEU per WNGT evaluation) for En-De and 45.06 (47.39 BLEU per WNGT evaluation) for De-En translation directions on the Rotowire test set. All experiments conducted in the process are also described. D19-5628 @@ -11662,7 +11662,7 @@ Typo in Table 4 fixed to reflect correct recall of presented system. <fixed-case>SYSTRAN</fixed-case> @ <fixed-case>WNGT</fixed-case> 2019: <fixed-case>DGT</fixed-case> Task LiGong - JosepCrego + JosepCrego JeanSenellart 262–267 This paper describes SYSTRAN participation to the Document-level Generation and Trans- lation (DGT) Shared Task of the 3rd Workshop on Neural Generation and Translation (WNGT 2019). We participate for the first time using a Transformer network enhanced with modified input embeddings and optimising an additional objective function that considers content selection. The network takes in structured data of basketball games and outputs a summary of the game in natural language. @@ -11686,7 +11686,7 @@ Typo in Table 4 fixed to reflect correct recall of presented system. FahimehSaleh AlexandreBerard IoanCalapodescu - LaurentBesacier + LaurentBesacier 273–279 Recently, neural models led to significant improvements in both machine translation (MT) and natural language generation tasks (NLG). However, generation of long descriptive summaries conditioned on structured data remains an open challenge. Likewise, MT that goes beyond sentence-level context is still an open issue (e.g., document-level MT or MT with metadata). To address these challenges, we propose to leverage data from both tasks and do transfer learning between MT, NLG, and MT with source-side metadata (MT+NLG). First, we train document-based MT systems with large amounts of parallel data. Then, we adapt these models to pure NLG and MT+NLG tasks by fine-tuning with smaller amounts of domain-specific data. This end-to-end NLG approach, without data selection and planning, outperforms the previous state of the art on the Rotowire NLG task. We participated to the “Document Generation and Translation” task at WNGT 2019, and ranked first in all tracks. D19-5631 @@ -11697,7 +11697,7 @@ Typo in Table 4 fixed to reflect correct recall of presented system. From Research to Production and Back: Ludicrously Fast Neural Machine Translation Young JinKim MarcinJunczys-Dowmunt - HanyHassan + HanyHassan AlhamFikri Aji KennethHeafield RomanGrundkiewicz @@ -11710,9 +11710,9 @@ Typo in Table 4 fixed to reflect correct recall of presented system. Selecting, Planning, and Rewriting: A Modular Approach for Data-to-Document Generation and Translation - LeslyMiculicich + LeslyMiculicich MarcMarone - HanyHassan + HanyHassan 289–296 In this paper, we report our system submissions to all 6 tracks of the WNGT 2019 shared task on Document-Level Generation and Translation. The objective is to generate a textual document from either structured data: generation task, or a document in a different language: translation task. For the translation task, we focused on adapting a large scale system trained on WMT data by fine tuning it on the RotoWire data. For the generation task, we participated with two systems based on a selection and planning model followed by (a) a simple language model generation, and (b) a GPT-2 pre-trained language model approach. The selection and planning module chooses a subset of table records in order, and the language models produce text given such a subset. D19-5633 @@ -11751,12 +11751,12 @@ Typo in Table 4 fixed to reflect correct recall of presented system. <fixed-case>P</fixed-case>harma<fixed-case>C</fixed-case>o<fixed-case>NER</fixed-case>: Pharmacological Substances, Compounds and proteins Named Entity Recognition track - AitorGonzalez-Agirre - MontserratMarimon + AitorGonzalez-Agirre + MontserratMarimon AnderIntxaurrondo ObduliaRabal MartaVillegas - MartinKrallinger + MartinKrallinger 1–10 One of the biomedical entity types of relevance for medicine or biosciences are chemical compounds and drugs. The correct detection these entities is critical for other text mining applications building on them, such as adverse drug-reaction detection, medication-related fake news or drug-target extraction. Although a significant effort was made to detect mentions of drugs/chemicals in English texts, so far only very limited attempts were made to recognize them in medical documents in other languages. Taking into account the growing amount of medical publications and clinical records written in Spanish, we have organized the first shared task on detecting drug and chemical entities in Spanish medical documents. Additionally, we included a clinical concept-indexing sub-track asking teams to return SNOMED-CT identifiers related to drugs/chemicals for a collection of documents. For this task, named PharmaCoNER, we generated annotation guidelines together with a corpus of 1,000 manually annotated clinical case studies. A total of 22 teams participated in the sub-track 1, (77 system runs), and 7 teams in the sub-track 2 (19 system runs). Top scoring teams used sophisticated deep learning approaches yielding very competitive results with F-measures above 0.91. These results indicate that there is a real interest in promoting biomedical text mining efforts beyond English. We foresee that the PharmaCoNER annotation guidelines, corpus and participant systems will foster the development of new resources for clinical and biomedical text mining systems of Spanish medical data. D19-5701 @@ -11787,9 +11787,9 @@ Typo in Table 4 fixed to reflect correct recall of presented system. <fixed-case>I</fixed-case>xa<fixed-case>M</fixed-case>ed at <fixed-case>P</fixed-case>harmaco<fixed-case>NER</fixed-case> Challenge 2019 XabierLahuerta IakesGoenaga - KoldoGojenola + KoldoGojenola AitziberAtutxa Salazar - MaiteOronoz + MaiteOronoz 21–25 The aim of this paper is to present our approach (IxaMed) in the PharmacoNER 2019 task. The task consists of identifying chemical, drug, and gene/protein mentions from clinical case studies written in Spanish. The evaluation of the task is divided in two scenarios: one corresponding to the detection of named entities and one corresponding to the indexation of named entities that have been previously identified. In order to identify named entities we have made use of a Bi-LSTM with a CRF on top in combination with different types of word embeddings. We have achieved our best result (86.81 F-Score) combining pretrained word embeddings of Wikipedia and Electronic Health Records (50M words) with contextual string embeddings of Wikipedia and Electronic Health Records. On the other hand, for the indexation of the named entities we have used the Levenshtein distance obtaining a 85.34 F-Score as our best result. D19-5704 @@ -11954,8 +11954,8 @@ Typo in Table 4 fixed to reflect correct recall of presented system. Using Snomed to recognize and index chemical and drug mentions. PilarLópez Úbeda Manuel CarlosDíaz Galiano - L. AlfonsoUrena Lopez - MaiteMartin + L. AlfonsoUrena Lopez + MaiteMartin 115–120 In this paper we describe a new named entity extraction system. Our work proposes a system for the identification and annotation of drug names in Spanish biomedical texts based on machine learning and deep learning models. Subsequently, a standardized code using Snomed is assigned to these drugs, for this purpose, Natural Language Processing tools and techniques have been used, and a dictionary of different sources of information has been built. The results are promising, we obtain 78% in F1 score on the first sub-track and in the second task we map with Snomed correctly 72% of the found entities. D19-5718 @@ -11968,7 +11968,7 @@ Typo in Table 4 fixed to reflect correct recall of presented system. LouiseDeléger EstelleChaix MouhamadouBa - ClaireNédellec + ClaireNédellec 121–131 This paper presents the fourth edition of the Bacteria Biotope task at BioNLP Open Shared Tasks 2019. The task focuses on the extraction of the locations and phenotypes of microorganisms from PubMed abstracts and full-text excerpts, and the characterization of these entities with respect to reference knowledge sources (NCBI taxonomy, OntoBiotope ontology). The task is motivated by the importance of the knowledge on biodiversity for fundamental research and applications in microbiology. The paper describes the different proposed subtasks, the corpus characteristics, and the challenge organization. We also provide an analysis of the results obtained by participants, and inspect the evolution of the results since the last edition in 2016. D19-5719 @@ -11979,7 +11979,7 @@ Typo in Table 4 fixed to reflect correct recall of presented system. Linguistically Informed Relation Extraction and Neural Architectures for Nested Named Entity Recognition in <fixed-case>B</fixed-case>io<fixed-case>NLP</fixed-case>-<fixed-case>OST</fixed-case> 2019 PankajGupta UsamaYaseen - HinrichSchütze + HinrichSchütze 132–142 Named Entity Recognition (NER) and Relation Extraction (RE) are essential tools in distilling knowledge from biomedical literature. This paper presents our findings from participating in BioNLP Shared Tasks 2019. We addressed Named Entity Recognition including nested entities extraction, Entity Normalization and Relation Extraction. Our proposed approach of Named Entities can be generalized to different languages and we have shown it’s effectiveness for English and Spanish text. We investigated linguistic features, hybrid loss including ranking and Conditional Random Fields (CRF), multi-task objective and token level ensembling strategy to improve NER. We employed dictionary based fuzzy and semantic search to perform Entity Normalization. Finally, our RE system employed Support Vector Machine (SVM) with linguistic features. Our NER submission (team:MIC-CIS) ranked first in BB-2019 norm+NER task with standard error rate (SER) of 0.7159 and showed competitive performance on PharmaCo NER task with F1-score of 0.8662. Our RE system ranked first in the SeeDev-binary Relation Extraction Task with F1-score of 0.3738. D19-5720 @@ -12003,7 +12003,7 @@ Typo in Table 4 fixed to reflect correct recall of presented system. <fixed-case>BOUN</fixed-case>-<fixed-case>ISIK</fixed-case> Participation: An Unsupervised Approach for the Named Entity Normalization and Relation Extraction of Bacteria Biotopes İlknurKaradeniz Ömer FarukTuna - ArzucanÖzgür + ArzucanÖzgür 150–157 This paper presents our participation to the Bacteria Biotope Task of the BioNLP Shared Task 2019. Our participation includes two systems for the two subtasks of the Bacteria Biotope Task: the normalization of entities (BB-norm) and the identification of the relations between the entities given a biomedical text (BB-rel). For the normalization of entities, we utilized word embeddings and syntactic re-ranking. For the relation extraction task, pre-defined rules are used. Although both approaches are unsupervised, in the sense that they do not need any labeled data, they achieved promising results. Especially, for the BB-norm task, the results have shown that the proposed method performs as good as deep learning based methods, which require labeled data. D19-5722 @@ -12016,7 +12016,7 @@ Typo in Table 4 fixed to reflect correct recall of presented system. FeiLi MingCheng HongYu - DonghongJi + DonghongJi 158–167 abstract In this article, we describe our approach for the Bacteria Biotopes relation extraction (BB-rel) subtask in the BioNLP Shared Task 2019. This task aims to promote the development of text mining systems that extract relationships between Microorganism, Habitat and Phenotype entities. In this paper, we propose a novel approach for dependency graph construction based on lexical chains, so one dependency graph can represent one or multiple sentences. After that, we propose a neural network model which consists of the bidirectional long short-term memories and an attention graph convolution neural network to learn relation extraction features from the graph. Our approach is able to extract both intra- and inter-sentence relations, and meanwhile utilize syntax information. The results show that our approach achieved the best F1 (66.3%) in the official evaluation participated by 7 teams. D19-5723 @@ -12035,14 +12035,14 @@ Typo in Table 4 fixed to reflect correct recall of presented system. <fixed-case>CRAFT</fixed-case> Shared Tasks 2019 Overview — Integrated Structure, Semantics, and Coreference - WilliamBaumgartner + WilliamBaumgartner MichaelBada SampoPyysalo - Manuel R.Ciosici - NegacyHailu + Manuel R.Ciosici + NegacyHailu HarrisonPielke-Lombardo MichaelRegan - LawrenceHunter + LawrenceHunter 174–184 As part of the BioNLP Open Shared Tasks 2019, the CRAFT Shared Tasks 2019 provides a platform to gauge the state of the art for three fundamental language processing tasks — dependency parse construction, coreference resolution, and ontology concept identification — over full-text biomedical articles. The structural annotation task requires the automatic generation of dependency parses for each sentence of an article given only the article text. The coreference resolution task focuses on linking coreferring base noun phrase mentions into chains using the symmetrical and transitive identity relation. The ontology concept annotation task involves the identification of concept mentions within text using the classes of ten distinct ontologies in the biomedical domain, both unmodified and augmented with extension classes. This paper provides an overview of each task, including descriptions of the data provided to participants and the evaluation metrics used, and discusses participant results relative to baseline performances for each of the three tasks. D19-5725 @@ -12102,7 +12102,7 @@ Typo in Table 4 fixed to reflect correct recall of presented system. <fixed-case>B</fixed-case>io<fixed-case>NLP</fixed-case>-<fixed-case>OST</fixed-case> 2019 <fixed-case>RD</fixed-case>o<fixed-case>C</fixed-case> Tasks: Multi-grain Neural Relevance Ranking Using Topics and Attention Based Query-Document-Sentence Interactions PankajGupta YatinChaudhary - HinrichSchütze + HinrichSchütze 227–236 This paper presents our system details and results of participation in the RDoC Tasks of BioNLP-OST 2019. Research Domain Criteria (RDoC) construct is a multi-dimensional and broad framework to describe mental health disorders by combining knowledge from genomics to behaviour. Non-availability of RDoC labelled dataset and tedious labelling process hinders the use of RDoC framework to reach its full potential in Biomedical research community and Healthcare industry. Therefore, Task-1 aims at retrieval and ranking of PubMed abstracts relevant to a given RDoC construct and Task-2 aims at extraction of the most relevant sentence from a given PubMed abstract. We investigate (1) attention based supervised neural topic model and SVM for retrieval and ranking of PubMed abstracts and, further utilize BM25 and other relevance measures for re-ranking, (2) supervised and unsupervised sentence ranking models utilizing multi-view representations comprising of query-aware attention-based sentence representation (QAR), bag-of-words (BoW) and TF-IDF. Our best systems achieved 1st rank and scored 0.86 mAP and 0.58 macro average accuracy in Task-1 and Task-2 respectively. D19-5730 @@ -12162,10 +12162,10 @@ Typo in Table 4 fixed to reflect correct recall of presented system. <fixed-case>CALOR</fixed-case>-<fixed-case>QUEST</fixed-case> : generating a training corpus for Machine Reading Comprehension models from shallow semantic annotations - FredericBechet + FredericBechet CindyAloui DelphineCharlet - GeraldineDamnati + GeraldineDamnati JohannesHeinecke AlexisNasr FredericHerledan @@ -12182,7 +12182,7 @@ Typo in Table 4 fixed to reflect correct recall of presented system. DianYu JianshuChen HengJi - ClaireCardie + ClaireCardie DongYu 27–37 We focus on multiple-choice question answering (QA) tasks in subject areas such as science, where we require both broad background knowledge and the facts from the given subject-area reference corpus. In this work, we explore simple yet effective methods for exploiting two sources of external knowledge for subject-area QA. The first enriches the original subject-area reference corpus with relevant text snippets extracted from an open-domain resource (i.e., Wikipedia) that cover potentially ambiguous concepts in the question and answer options. As in other QA research, the second method simply increases the amount of training data by appending additional in-domain subject-area instances. Experiments on three challenging multiple-choice science QA tasks (i.e., ARC-Easy, ARC-Challenge, and OpenBookQA) demonstrate the effectiveness of our methods: in comparison to the previous state-of-the-art, we obtain absolute gains in accuracy of up to 8.1%, 13.0%, and 12.8%, respectively. While we observe consistent gains when we introduce knowledge from Wikipedia, we find that employing additional QA training instances is not uniformly helpful: performance degrades when the added instances exhibit a higher level of difficulty than the original training data. As one of the first studies on exploiting unstructured external knowledge for subject-area QA, we hope our methods, observations, and discussion of the exposed limitations may shed light on further developments in the area. @@ -12252,7 +12252,7 @@ Typo in Table 4 fixed to reflect correct recall of presented system. Towards Answer-unaware Conversational Question Generation MaoNakanishi TetsunoriKobayashi - YoshihikoHayashi + YoshihikoHayashi 63–71 Conversational question generation is a novel area of NLP research which has a range of potential applications. This paper is first to presents a framework for conversational question generation that is unaware of the corresponding answers. To properly generate a question coherent to the grounding text and the current conversation history, the proposed framework first locates the focus of a question in the text passage, and then identifies the question pattern that leads the sequential generation of the words in a question. The experiments using the CoQA dataset demonstrate that the quality of generated questions greatly improves if the question foci and the question patterns are correctly identified. In addition, it was shown that the question foci, even estimated with a reasonable accuracy, could contribute to the quality improvement. These results established that our research direction may be promising, but at the same time revealed that the identification of question patterns is a challenging issue, and it has to be largely refined to achieve a better quality in the end-to-end automatic question generation. D19-5809 @@ -12263,7 +12263,7 @@ Typo in Table 4 fixed to reflect correct recall of presented system. Cross-Task Knowledge Transfer for Query-Based Text Summarization ElozinoEgonmwan VittorioCastelli - Md ArafatSultan + Md ArafatSultan 72–77 We demonstrate the viability of knowledge transfer between two related tasks: machine reading comprehension (MRC) and query-based text summarization. Using an MRC model trained on the SQuAD1.1 dataset as a core system component, we first build an extractive query-based summarizer. For better precision, this summarizer also compresses the output of the MRC model using a novel sentence compression technique. We further leverage pre-trained machine translation systems to abstract our extracted summaries. Our models achieve state-of-the-art results on the publicly available CNN/Daily Mail and Debatepedia datasets, and can serve as simple yet powerful baselines for future systems. We also hope that these results will encourage research on transfer learning from large MRC corpora to query-based summarization. D19-5810 @@ -12276,7 +12276,7 @@ Typo in Table 4 fixed to reflect correct recall of presented system. LeaFrermann DiegoMarcheggiani RoiBlanco - LluísMàrquez + LluísMàrquez 78–85 We present a system for answering questions based on the full text of books (BookQA), which first selects book passages given a question at hand, and then uses a memory network to reason and predict an answer. To improve generalization, we pretrain our memory network using artificial questions generated from book sentences. We experiment with the recently published NarrativeQA corpus, on the subset of Who questions, which expect book characters as answers. We experimentally show that BERT-based retrieval and pretraining improve over baseline results significantly. At the same time, we confirm that NarrativeQA is a highly challenging data set, and that there is need for novel research in order to achieve high-precision BookQA results. We analyze some of the bottlenecks of the current approach, and we argue that more research is needed on text representation, retrieval of relevant passages, and reasoning, including commonsense knowledge. D19-5811 @@ -12370,7 +12370,7 @@ Typo in Table 4 fixed to reflect correct recall of presented system. JamesRoute KaixinMa YixuanGeng - EricNyberg + EricNyberg 125–136 The field of question answering (QA) has seen rapid growth in new tasks and modeling approaches in recent years. Large scale datasets and focus on challenging linguistic phenomena have driven development in neural models, some of which have achieved parity with human performance in limited cases. However, an examination of state-of-the-art model output reveals that a gap remains in reasoning ability compared to a human, and performance tends to degrade when models are exposed to less-constrained tasks. We are interested in more clearly defining the strengths and limitations of leading models across diverse QA challenges, intending to help future researchers with identifying pathways to generalizable performance. We conduct extensive qualitative and quantitative analyses on the results of four models across four datasets and relate common errors to model capabilities. We also illustrate limitations in the datasets we examine and discuss a way forward for achieving generalizable models and datasets that broadly test QA capabilities. D19-5818 @@ -12417,7 +12417,7 @@ Typo in Table 4 fixed to reflect correct recall of presented system. Let Me Know What to Ask: Interrogative-Word-Aware Question Generation JunmoKang HaritzPuerto San Roman - Sung-HyonMyaeng + Sung-HyonMyaeng 163–171 Question Generation (QG) is a Natural Language Processing (NLP) task that aids advances in Question Answering (QA) and conversational assistants. Existing models focus on generating a question based on a text and possibly the answer to the generated question. They need to determine the type of interrogative word to be generated while having to pay attention to the grammar and vocabulary of the question. In this work, we propose Interrogative-Word-Aware Question Generation (IWAQG), a pipelined system composed of two modules: an interrogative word classifier and a QG model. The first module predicts the interrogative word that is provided to the second module to create the question. Owing to an increased recall of deciding the interrogative words to be used for the generated questions, the proposed model achieves new state-of-the-art results on the task of QG in SQuAD, improving from 46.58 to 47.69 in BLEU-1, 17.55 to 18.53 in BLEU-4, 21.24 to 22.33 in METEOR, and from 44.53 to 46.94 in ROUGE-L. D19-5822 @@ -12448,7 +12448,7 @@ Typo in Table 4 fixed to reflect correct recall of presented system. Question Answering Using Hierarchical Attention on Top of <fixed-case>BERT</fixed-case> Features RehamOsama - NagwaEl-Makky + NagwaEl-Makky MarwanTorki 191–195 The model submitted works as follows. When supplied a question and a passage it makes use of the BERT embedding along with the hierarchical attention model which consists of 2 parts, the co-attention and the self-attention, to locate a continuous span of the passage that is the answer to the question. @@ -12471,7 +12471,7 @@ Typo in Table 4 fixed to reflect correct recall of presented system. Generalizing Question Answering System with Pre-trained Language Model Fine-tuning DanSu YanXu - Genta IndraWinata + Genta IndraWinata PengXu HyeondeyKim ZihanLiu @@ -12549,8 +12549,8 @@ Typo in Table 4 fixed to reflect correct recall of presented system. Leveraging syntactic parsing to improve event annotation matching CamielColruyt - OrphéeDe Clercq - VéroniqueHoste + OrphéeDe Clercq + VéroniqueHoste 15–23 Detecting event mentions is the first step in event extraction from text and annotating them is a notoriously difficult task. Evaluating annotator consistency is crucial when building datasets for mention detection. When event mentions are allowed to cover many tokens, annotators may disagree on their span, which means that overlapping annotations may then refer to the same event or to different events. This paper explores different fuzzy-matching functions which aim to resolve this ambiguity. The functions extract the sets of syntactic heads present in the annotations, use the Dice coefficient to measure the similarity between sets and return a judgment based on a given threshold. The functions are tested against the judgment of a human evaluator and a comparison is made between sets of tokens and sets of syntactic heads. The best-performing function is a head-based function that is found to agree with the human evaluator in 89% of cases. D19-5903 @@ -12587,7 +12587,7 @@ Typo in Table 4 fixed to reflect correct recall of presented system. Computer Assisted Annotation of Tension Development in <fixed-case>TED</fixed-case> Talks through Crowdsourcing SeungwonYoon WonsukYang - JongPark + JongPark 39–47 We propose a method of machine-assisted annotation for the identification of tension development, annotating whether the tension is increasing, decreasing, or staying unchanged. We use a neural network based prediction model, whose predicted results are given to the annotators as initial values for the options that they are asked to choose. By presenting such initial values to the annotators, the annotation task becomes an evaluation task where the annotators inspect whether or not the predicted results are correct. To demonstrate the effectiveness of our method, we performed the annotation task in both in-house and crowdsourced environments. For the crowdsourced environment, we compared the annotation results with and without our method of machine-assisted annotation. We find that the results with our method showed a higher agreement to the gold standard than those without, though our method had little effect at reducing the time for annotation. Our codes for the experiment are made publicly available. D19-5906 @@ -12653,7 +12653,7 @@ Typo in Table 4 fixed to reflect correct recall of presented system. KaixinMa JonathanFrancis QuanyangLu - EricNyberg + EricNyberg AlessandroOltramari 22–32 Non-extractive commonsense QA remains a challenging AI task, as it requires systems to reason about, synthesize, and gather disparate pieces of information, in order to generate responses to queries. Recent approaches on such tasks show increased performance, only when models are either pre-trained with additional information or when domain-specific heuristics are used, without any special consideration regarding the knowledge resource type. In this paper, we perform a survey of recent commonsense QA methods and we provide a systematic analysis of popular knowledge resources and knowledge-integration methods, across benchmarks from multiple commonsense datasets. Our results and analysis show that attention-based injection seems to be a preferable choice for knowledge integration and that the degree of domain overlap, between knowledge bases and datasets, plays a crucial role in determining model success. @@ -12677,7 +12677,7 @@ Typo in Table 4 fixed to reflect correct recall of presented system. Commonsense about Human Senses: Labeled Data Collection Processes - NdapaNakashole + NdapaNakashole 43–52 We consider the problem of extracting from text commonsense knowledge pertaining to human senses such as sound and smell. First, we consider the problem of recognizing mentions of human senses in text. Our contribution is a method for acquiring labeled data. Experiments show the effectiveness of our proposed data labeling approach when used with standard machine learning models on the task of sense recognition in text. Second, we propose to extract novel, common sense relationships pertaining to sense perception concepts. Our contribution is a process for generating labeled data by leveraging large corpora and crowdsourcing questionnaires. D19-6005 @@ -12687,7 +12687,7 @@ Typo in Table 4 fixed to reflect correct recall of presented system. Extracting Common Inference Patterns from Semi-Structured Explanations SebastianThiem - PeterJansen + PeterJansen 53–65 Complex questions often require combining multiple facts to correctly answer, particularly when generating detailed explanations for why those answers are correct. Combining multiple facts to answer questions is often modeled as a “multi-hop” graph traversal problem, where a given solver must find a series of interconnected facts in a knowledge graph that, taken together, answer the question and explain the reasoning behind that answer. Multi-hop inference currently suffers from semantic drift, or the tendency for chains of reasoning to “drift”’ to unrelated topics, and this semantic drift greatly limits the number of facts that can be combined in both free text or knowledge base inference. In this work we present our effort to mitigate semantic drift by extracting large high-confidence multi-hop inference patterns, generated by abstracting large-scale explanatory structure from a corpus of detailed explanations. We represent these inference patterns as sets of generalized constraints over sentences represented as rows in a knowledge base of semi-structured tables. We present a prototype tool for identifying common inference patterns from corpora of semi-structured explanations, and use it to successfully extract 67 inference patterns from a “matter” subset of standardized elementary science exam questions that span scientific and world knowledge. D19-6006 @@ -12790,7 +12790,7 @@ Typo in Table 4 fixed to reflect correct recall of presented system. Can a Gorilla Ride a Camel? Learning Semantic Plausibility from Text IanPorada KaheerSuleman - Jackie Chi KitCheung + Jackie Chi KitCheung 123–129 Modeling semantic plausibility requires commonsense knowledge about the world and has been used as a testbed for exploring various knowledge representations. Previous work has focused specifically on modeling physical plausibility and shown that distributional methods fail when tested in a supervised setting. At the same time, distributional models, namely large pretrained language models, have led to improved results for many natural language understanding tasks. In this work, we show that these pretrained language models are in fact effective at modeling physical plausibility in the supervised setting. We therefore present the more difficult problem of learning to model physical plausibility directly from text. We create a training set by extracting attested events from a large corpus, and we provide a baseline for training on these attested events in a self-supervised manner and testing on a physical plausibility task. We believe results could be further improved by injecting explicit commonsense knowledge into a distributional model. D19-6015 @@ -12817,7 +12817,7 @@ Typo in Table 4 fixed to reflect correct recall of presented system. ColinCherry GregDurrett GeorgeFoster - RezaHaffari + RezaHaffari ShahramKhadivi NanyunPeng XiangRen @@ -12941,7 +12941,7 @@ Typo in Table 4 fixed to reflect correct recall of presented system. Empirical Evaluation of Active Learning Techniques for Neural <fixed-case>MT</fixed-case> XiangkaiZeng SarthakGarg - RajenChatterjee + RajenChatterjee UdhyakumarNallasamy MatthiasPaulik 84–93 @@ -12965,7 +12965,7 @@ Typo in Table 4 fixed to reflect correct recall of presented system. Few-Shot and Zero-Shot Learning for Historical Text Normalization MarcelBollmann NataliaKorchagina - AndersSøgaard + AndersSøgaard 104–114 Historical text normalization often relies on small training datasets. Recent work has shown that multi-task learning can lead to significant improvements by exploiting synergies with related datasets, but there has been no systematic study of different multi-task learning architectures. This paper evaluates 63 multi-task learning configurations for sequence-to-sequence-based historical text normalization across ten datasets from eight languages, using autoencoding, grapheme-to-phoneme mapping, and lemmatization as auxiliary tasks. We observe consistent, significant improvements across languages when training data for the target task is limited, but minimal or no improvements when training data is abundant. We also show that zero-shot learning outperforms the simple, but relatively strong, identity baseline. D19-6112 @@ -13075,7 +13075,7 @@ Typo in Table 4 fixed to reflect correct recall of presented system. StevenHillis IsakCzeresnia Etinger HanZhang - Alan WBlack + Alan WBlack 192–201 Grapheme-to-phoneme conversion (g2p) is the task of predicting the pronunciation of words from their orthographic representation. His- torically, g2p systems were transition- or rule- based, making generalization beyond a mono- lingual (high resource) domain impractical. Recently, neural architectures have enabled multilingual systems to generalize widely; however, all systems to date have been trained only on spelling-pronunciation pairs. We hy- pothesize that the sequences of IPA characters used to represent pronunciation do not capture its full nuance, especially when cleaned to fa- cilitate machine learning. We leverage audio data as an auxiliary modality in a multi-task training process to learn a more optimal inter- mediate representation of source graphemes; this is the first multimodal model proposed for multilingual g2p. Our approach is highly ef- fective: on our in-domain test set, our mul- timodal model reduces phoneme error rate to 2.46%, a more than 65% decrease compared to our implementation of a unimodal spelling- pronunciation model—which itself achieves state-of-the-art results on the Wiktionary test set. The advantages of the multimodal model generalize to wholly unseen languages, reduc- ing phoneme error rate on our out-of-domain test set to 6.39% from the unimodal 8.21%, a more than 20% relative decrease. Further- more, our training and test sets are composed primarily of low-resource languages, demon- strating that our multimodal approach remains useful when training data are constrained. D19-6121 @@ -13095,9 +13095,9 @@ Typo in Table 4 fixed to reflect correct recall of presented system. Neural Unsupervised Parsing Beyond <fixed-case>E</fixed-case>nglish - KatharinaKann + KatharinaKann AnhadMohananey - Samuel R.Bowman + Samuel R.Bowman KyunghyunCho 209–218 Recently, neural network models which automatically infer syntactic structure from raw text have started to achieve promising results. However, earlier work on unsupervised parsing shows large performance differences between non-neural models trained on corpora in different languages, even for comparable amounts of data. With that in mind, we train instances of the PRPN architecture (Shen et al., 2018)—one of these unsupervised neural network parsers—for Arabic, Chinese, English, and German. We find that (i) the model strongly outperforms trivial baselines and, thus, acquires at least some parsing ability for all languages; (ii) good hyperparameter values seem to be universal; (iii) how the model benefits from larger training set sizes depends on the corpus, with the model achieving the largest performance gains when increasing the number of sentences from 2,500 to 12,500 for English. In addition, we show that, by sharing parameters between the related languages German and English, we can improve the model’s unsupervised parsing F1 score by up to 4% in the low-resource setting. @@ -13108,9 +13108,9 @@ Typo in Table 4 fixed to reflect correct recall of presented system. Reevaluating Argument Component Extraction in Low Resource Settings AnirudhJoshi - TimothyBaldwin + TimothyBaldwin RichardSinnott - CecileParis + CecileParis 219–224 Argument component extraction is a challenging and complex high-level semantic extraction task. As such, it is both expensive to annotate (meaning training data is limited and low-resource by nature), and hard for current-generation deep learning methods to model. In this paper, we reevaluate the performance of state-of-the-art approaches in both single- and multi-task learning settings using combinations of character-level, GloVe, ELMo, and BERT encodings using standard BiLSTM-CRF encoders. We use evaluation metrics that are more consistent with evaluation practice in named entity recognition to understand how well current baselines address this challenge and compare their performance to lower-level semantic tasks such as CoNLL named entity recognition. We find that performance utilizing various pre-trained representations and training methodologies often leaves a lot to be desired as it currently stands, and suggest future pathways for improvement. D19-6124 @@ -13121,7 +13121,7 @@ Typo in Table 4 fixed to reflect correct recall of presented system. Reinforcement-based denoising of distantly supervised <fixed-case>NER</fixed-case> with partial annotation FarhadNooralahzadeh Jan ToreLønning - LiljaØvrelid + LiljaØvrelid 225–233 Existing named entity recognition (NER) systems rely on large amounts of human-labeled data for supervision. However, obtaining large-scale annotated data is challenging particularly in specific domains like health-care, e-commerce and so on. Given the availability of domain specific knowledge resources, (e.g., ontologies, dictionaries), distant supervision is a solution to generate automatically labeled training data to reduce human effort. The outcome of distant supervision for NER, however, is often noisy. False positive and false negative instances are the main issues that reduce performance on this kind of auto-generated data. In this paper, we explore distant supervision in a supervised setup. We adopt a technique of partial annotation to address false negative cases and implement a reinforcement learning strategy with a neural network policy to identify false positive instances. Our results establish a new state-of-the-art on four benchmark datasets taken from different domains and different languages. We then go on to show that our model reduces the amount of manually annotated data required to perform NER in a new domain. D19-6125 @@ -13153,7 +13153,7 @@ Typo in Table 4 fixed to reflect correct recall of presented system. Transductive Auxiliary Task Self-Training for Neural Multi-Task Models JohannesBjerva - KatharinaKann + KatharinaKann IsabelleAugenstein 253–258 Multi-task learning and self-training are two common ways to improve a machine learning model’s performance in settings with limited training data. Drawing heavily on ideas from those two approaches, we suggest transductive auxiliary task self-training: training a multi-task model on (i) a combination of main and auxiliary task training data, and (ii) test instances with auxiliary task labels which a single-task version of the model has previously generated. We perform extensive experiments on 86 combinations of languages and tasks. Our results are that, on average, transductive auxiliary task self-training improves absolute accuracy by up to 9.56% over the pure multi-task model for dependency relation tagging and by up to 13.03% for semantic tagging. @@ -13180,7 +13180,7 @@ Typo in Table 4 fixed to reflect correct recall of presented system. CezarSas RahulAralikatte IsabelleAugenstein - AndersSøgaard + AndersSøgaard 265–274 Although the vast majority of knowledge bases (KBs) are heavily biased towards English, Wikipedias do cover very different topics in different languages. Exploiting this, we introduce a new multilingual dataset (X-WikiRE), framing relation extraction as a multilingual machine reading problem. We show that by leveraging this resource it is possible to robustly transfer models cross-lingually and that multilingual support significantly improves (zero-shot) relation extraction, enabling the population of low-resourced KBs from their well-populated counterparts. D19-6130 @@ -13199,7 +13199,7 @@ Typo in Table 4 fixed to reflect correct recall of presented system. Zero-shot Dependency Parsing with Pre-trained Multilingual Sentence Representations - KeTran + KeTran AriannaBisazza 281–288 We investigate whether off-the-shelf deep bidirectional sentence representations (Devlin et al., 2019) trained on a massively multilingual corpus (multilingual BERT) enable the development of an unsupervised universal dependency parser. This approach only leverages a mix of monolingual corpora in many languages and does not require any translation data making it applicable to low-resource languages. In our experiments we outperform the best CoNLL 2018 language-specific systems in all of the shared task’s six truly low-resource languages while using a single system. However, we also find that (i) parsing accuracy still varies dramatically when changing the training languages and (ii) in some target languages zero-shot transfer fails under all tested conditions, raising concerns on the ‘universality’ of the whole approach. @@ -13213,10 +13213,10 @@ Typo in Table 4 fixed to reflect correct recall of presented system. Proceedings of the Tenth International Workshop on Health Text Mining and Information Analysis (LOUHI 2019) D19-62 EbenHolderness - AntonioJimeno Yepes - AlbertoLavelli + AntonioJimeno Yepes + AlbertoLavelli Anne-LyseMinard - JamesPustejovsky + JamesPustejovsky FabioRinaldi Association for Computational Linguistics
Hong Kong
@@ -13231,10 +13231,10 @@ Typo in Table 4 fixed to reflect correct recall of presented system. Cross-document coreference: An approach to capturing coreference without context KristinWright-Bettner - MarthaPalmer + MarthaPalmer GuerganaSavova Pietde Groen - TimothyMiller + TimothyMiller 1–10 This paper discusses a cross-document coreference annotation schema that was developed to further automatic extraction of timelines in the clinical domain. Lexical senses and coreference choices are determined largely by context, but cross-document work requires reasoning across contexts that are not necessarily coherent. We found that an annotation approach that relies less on context-guided annotator intuitions and more on schematic rules was most effective in creating meaningful and consistent cross-document relations. D19-6201 @@ -13297,7 +13297,7 @@ Typo in Table 4 fixed to reflect correct recall of presented system. Experiments with ad hoc ambiguous abbreviation expansion AgnieszkaMykowiecka - MalgorzataMarciniak + MalgorzataMarciniak 44–53 The paper addresses experiments to expand ad hoc ambiguous abbreviations in medical notes on the basis of morphologically annotated texts, without using additional domain resources. We work on Polish data but the described approaches can be used for other languages too. We test two methods to select candidates for word abbreviation expansions. The first one automatically selects all words in text which might be an expansion of an abbreviation according to the language rules. The second method uses clustering of abbreviation occurrences to select representative elements which are manually annotated to determine lists of potential expansions. We then train a classifier to assign expansions to abbreviations based on three training sets: automatically obtained, consisting of manual annotation, and concatenation of the two previous ones. The results obtained for the manually annotated training data significantly outperform automatically obtained training data. Adding the automatically obtained training data to the manually annotated data improves the results, in particular for less frequent abbreviations. In this context the proposed a priori data driven selection of possible extensions turned out to be crucial. D19-6207 @@ -13307,7 +13307,7 @@ Typo in Table 4 fixed to reflect correct recall of presented system. Multi-Task, Multi-Channel, Multi-Input Learning for Mental Illness Detection using Social Media Text PrasadithKirinde Gamaarachchige - DianaInkpen + DianaInkpen 54–64 We investigate the impact of using emotional patterns identified by the clinical practitioners and computational linguists to enhance the prediction capabilities of a mental illness detection (in our case depression and post-traumatic stress disorder) model built using a deep neural network architecture. Over the years, deep learning methods have been successfully used in natural language processing tasks, including a few in the domain of mental illness and suicide ideation detection. We illustrate the effectiveness of using multi-task learning with a multi-channel convolutional neural network as the shared representation and use additional inputs identified by researchers as indicatives in detecting mental disorders to enhance the model predictability. Given the limited amount of unstructured data available for training, we managed to obtain a task-specific AUC higher than 0.90. In comparison to methods such as multi-class classification, we identified multi-task learning with multi-channel convolution neural network and multiple-inputs to be effective in detecting mental disorders. D19-6208 @@ -13371,7 +13371,7 @@ Typo in Table 4 fixed to reflect correct recall of presented system. <fixed-case>D</fixed-case>readdit: A <fixed-case>R</fixed-case>eddit Dataset for Stress Analysis in Social Media ElsbethTurcan - KathyMcKeown + KathyMcKeown 97–107 Stress is a nigh-universal human experience, particularly in the online world. While stress can be a motivator, too much stress is associated with many negative health outcomes, making its identification useful across a range of domains. However, existing computational research typically only studies stress in domains such as speech, or in short genres such as Twitter. We present Dreaddit, a new text corpus of lengthy multi-domain social media data for the identification of stress. Our dataset consists of 190K posts from five different categories of Reddit communities; we additionally label 3.5K total segments taken from 3K posts using Amazon Mechanical Turk. We present preliminary supervised learning methods for identifying stress, both neural and traditional, and analyze the complexity and diversity of the data and characteristics of each category. D19-6213 @@ -13431,7 +13431,7 @@ Typo in Table 4 fixed to reflect correct recall of presented system. Writing habits and telltale neighbors: analyzing clinical concept usage patterns with sublanguage embeddings DenisNewman-Griffis - EricFosler-Lussier + EricFosler-Lussier 146–156 Natural language processing techniques are being applied to increasingly diverse types of electronic health records, and can benefit from in-depth understanding of the distinguishing characteristics of medical document types. We present a method for characterizing the usage patterns of clinical concepts among different document types, in order to capture semantic differences beyond the lexical level. By training concept embeddings on clinical documents of different types and measuring the differences in their nearest neighborhood structures, we are able to measure divergences in concept usage while correcting for noise in embedding learning. Experiments on the MIMIC-III corpus demonstrate that our approach captures clinically-relevant differences in concept usage and provides an intuitive way to explore semantic characteristics of clinical document collections. D19-6218 @@ -13441,7 +13441,7 @@ Typo in Table 4 fixed to reflect correct recall of presented system. Recognizing <fixed-case>UMLS</fixed-case> Semantic Types with Deep Learning IsarNejadgholi - Kathleen C.Fraser + Kathleen C.Fraser BerryDe Bruijn MuqunLi AsthaLaPlante @@ -13486,7 +13486,7 @@ Typo in Table 4 fixed to reflect correct recall of presented system. Proceedings of the 2nd Workshop on Multilingual Surface Realisation (MSR 2019) D19-63 SimonMille - AnjaBelz + AnjaBelz BerndBohnet YvetteGraham LeoWanner @@ -13518,7 +13518,7 @@ Typo in Table 4 fixed to reflect correct recall of presented system. Learning to Order Graph Elements with Application to Multilingual Surface Realization WenchaoDu - Alan WBlack + Alan WBlack 18–24 Recent advances in deep learning have shown promises in solving complex combinatorial optimization problems, such as sorting variable-sized sequences. In this work, we take a step further and tackle the problem of ordering the elements of sequences that come with graph structures. Our solution adopts an encoder-decoder framework, in which the encoder is a graph neural network that learns the representation for each element, and the decoder predicts the ordering of each local neighborhood of the graph in turn. We apply our framework to multilingual surface realization, which is the task of ordering and completing sentences with their dependency parses given but without the ordering of words. Experiments show that our approach is much better for this task than prior works that do not consider graph structures. We participated in 2019 Surface Realization Shared Task (SR’19), and we ranked second out of 14 teams while outperforming those teams below by a large margin. D19-6302 @@ -13559,7 +13559,7 @@ Typo in Table 4 fixed to reflect correct recall of presented system. <fixed-case>IMS</fixed-case>ur<fixed-case>R</fixed-case>eal: <fixed-case>IMS</fixed-case> at the Surface Realization Shared Task 2019 XiangYu - AgnieszkaFalenska + AgnieszkaFalenska MarinaHaid Ngoc ThangVu JonasKuhn @@ -13571,8 +13571,8 @@ Typo in Table 4 fixed to reflect correct recall of presented system. Surface Realization Shared Task 2019 (<fixed-case>MSR</fixed-case>19): The Team 6 Approach - ThiagoCastro Ferreira - EmielKrahmer + ThiagoCastro Ferreira + EmielKrahmer 59–62 This study describes the approach developed by the Tilburg University team to the shallow track of the Multilingual Surface Realization Shared Task 2019 (SR’19) (Mille et al., 2019). Based on Ferreira et al. (2017) and on our 2018 submission Ferreira et al. (2018), the approach generates texts by first preprocessing an input dependency tree into an ordered linearized string, which is then realized using a rule-based and a statistical machine translation (SMT) model. This year our submission is able to realize texts in the 11 languages proposed for the task, different from our last year submission, which covered only 6 Indo-European languages. The model is publicly available. D19-6307 @@ -13594,10 +13594,10 @@ Typo in Table 4 fixed to reflect correct recall of presented system. The <fixed-case>OSU</fixed-case>/<fixed-case>F</fixed-case>acebook Realizer for <fixed-case>SRST</fixed-case> 2019: <fixed-case>S</fixed-case>eq2<fixed-case>S</fixed-case>eq Inflection and Serialized <fixed-case>T</fixed-case>ree2<fixed-case>T</fixed-case>ree Linearization KartikeyaUpasani - DavidKing + DavidKing JinfengRao AnushaBalakrishnan - MichaelWhite + MichaelWhite 68–74 We describe our exploratory system for the shallow surface realization task, which combines morphological inflection using character sequence-to-sequence models with a baseline linearizer that implements a tree-to-tree model using sequence-to-sequence models on serialized trees. Results for morphological inflection were competitive across languages. Due to time constraints, we could only submit complete results (including linearization) for English. Preliminary linearization results were decent, with a small benefit from reranking to prefer valid output trees, but inadequate control over the words in the output led to poor quality on longer sentences. D19-6309 @@ -13617,7 +13617,7 @@ Typo in Table 4 fixed to reflect correct recall of presented system. The <fixed-case>D</fixed-case>ip<fixed-case>I</fixed-case>nfo<fixed-case>U</fixed-case>ni<fixed-case>T</fixed-case>o Realizer at <fixed-case>SRST</fixed-case>’19: Learning to Rank and Deep Morphology Prediction for Multilingual Surface Realization - AlessandroMazzei + AlessandroMazzei ValerioBasile 81–87 We describe the system presented at the SR’19 shared task by the DipInfoUnito team. Our approach is based on supervised machine learning. In particular, we divide the SR task into two independent subtasks, namely word order prediction and morphology inflection prediction. Two neural networks with different architectures run on the same input structure, each producing a partial output which is recombined in the final step in order to produce the predicted surface form. This work is a direct successor of the architecture presented at SR’19. @@ -13637,7 +13637,7 @@ Typo in Table 4 fixed to reflect correct recall of presented system. Back-Translation as Strategy to Tackle the Lack of Corpus in Natural Language Generation from Semantic Representations - Marco AntonioSobrevilla Cabezudo + Marco AntonioSobrevilla Cabezudo SimonMille ThiagoPardo 94–103 @@ -13654,7 +13654,7 @@ Typo in Table 4 fixed to reflect correct recall of presented system. AdityaMogadala DietrichKlakow SandroPezzelle - Marie-FrancineMoens + Marie-FrancineMoens Association for Computational Linguistics
Hong Kong, China
November @@ -13670,7 +13670,7 @@ Typo in Table 4 fixed to reflect correct recall of presented system. VardaanPahuja JieFu SarathChandar - ChristopherPal + ChristopherPal 1–10 Neural Module Networks, originally proposed for the task of visual question answering, are a class of neural network architectures that involve human-specified neural modules, each designed for a specific form of reasoning. In current formulations of such networks only the parameters of the neural modules and/or the order of their execution is learned. In this work, we further expand this approach and also learn the underlying internal structure of modules in terms of the ordering and combination of simple and elementary arithmetic operators. We utilize a minimum amount of prior knowledge from the human-specified neural modules in the form of different input types and arithmetic operators used in these modules. Our results show that one is indeed able to simultaneously learn both internal module structure and module sequencing without extra supervisory signals for module execution sequencing. With this approach, we report performance comparable to models using hand-designed modules. In addition, we do a analysis of sensitivity of the learned modules w.r.t. the arithmetic operations and infer the analytical expressions of the learned modules. D19-6401 @@ -13693,7 +13693,7 @@ Typo in Table 4 fixed to reflect correct recall of presented system. Big Generalizations with Small Data: Exploring the Role of Training Samples in Learning Adjectives of Size SandroPezzelle - RaquelFernández + RaquelFernández 18–23 In this paper, we experiment with a recently proposed visual reasoning task dealing with quantities – modeling the multimodal, contextually-dependent meaning of size adjectives (‘big’, ‘small’) – and explore the impact of varying the training data on the learning behavior of a state-of-art system. In previous work, models have been shown to fail in generalizing to unseen adjective-noun combinations. Here, we investigate whether, and to what extent, seeing some of these cases during training helps a model understand the rule subtending the task, i.e., that being big implies being not small, and vice versa. We show that relatively few examples are enough to understand this relationship, and that developing a specific, mutually exclusive representation of size adjectives is beneficial to the task. D19-6403 @@ -13703,7 +13703,7 @@ Typo in Table 4 fixed to reflect correct recall of presented system. <fixed-case>E</fixed-case>igencharacter: An Embedding of <fixed-case>C</fixed-case>hinese Character Orthography Yu-HsiangTseng - Shu-KaiHsieh + Shu-KaiHsieh 24–28 Chinese characters are unique in its logographic nature, which inherently encodes world knowledge through thousands of years evolution. This paper proposes an embedding approach, namely eigencharacter (EC) space, which helps NLP application easily access the knowledge encoded in Chinese orthography. These EC representations are automatically extracted, encode both structural and radical information, and easily integrate with other computational models. We built EC representations of 5,000 Chinese characters, investigated orthography knowledge encoded in ECs, and demonstrated how these ECs identified visually similar characters with both structural and radical information. D19-6404 @@ -13713,8 +13713,8 @@ Typo in Table 4 fixed to reflect correct recall of presented system. On the Role of Scene Graphs in Image Captioning DalinWang - DanielBeck - TrevorCohn + DanielBeck + TrevorCohn 29–34 Scene graphs represent semantic information in images, which can help image captioning system to produce more descriptive outputs versus using only the image as context. Recent captioning approaches rely on ad-hoc approaches to obtain graphs for images. However, those graphs introduce noise and it is unclear the effect of parser errors on captioning accuracy. In this work, we investigate to what extent scene graphs can help image captioning. Our results show that a state-of-the-art scene graph parser can boost performance almost as much as the ground truth graphs, showing that the bottleneck currently resides more on the captioning models than on the performance of the scene graph parser. D19-6405 @@ -13723,7 +13723,7 @@ Typo in Table 4 fixed to reflect correct recall of presented system. Understanding the Effect of Textual Adversaries in Multimodal Machine Translation - KoelDutta Chowdhury + KoelDutta Chowdhury DesmondElliott 35–40 It is assumed that multimodal machine translation systems are better than text-only systems at translating phrases that have a direct correspondence in the image. This assumption has been challenged in experiments demonstrating that state-of-the-art multimodal systems perform equally well in the presence of randomly selected images, but, more recently, it has been shown that masking entities from the source language sentence during training can help to overcome this problem. In this paper, we conduct experiments with both visual and textual adversaries in order to understand the role of incorrect textual inputs to such systems. Our results show that when the source language sentence contains mistakes, multimodal translation systems do not leverage the additional visual signal to produce the correct translation. We also find that the degradation of translation performance caused by textual adversaries is significantly higher than by visual adversaries. @@ -13752,7 +13752,7 @@ Typo in Table 4 fixed to reflect correct recall of presented system. At a Glance: The Impact of Gaze Aggregation Views on Syntactic Tagging SigridKlerke - BarbaraPlank + BarbaraPlank 51–61 Readers’ eye movements used as part of the training signal have been shown to improve performance in a wide range of Natural Language Processing (NLP) tasks. Previous work uses gaze data either at the type level or at the token level and mostly from a single eye-tracking corpus. In this paper, we analyze type vs token-level integration options with eye tracking data from two corpora to inform two syntactic sequence labeling problems: binary phrase chunking and part-of-speech tagging. We show that using globally-aggregated measures that capture the central tendency or variability of gaze data is more beneficial than proposed local views which retain individual participant information. While gaze data is informative for supervised POS tagging, which complements previous findings on unsupervised POS induction, almost no improvement is obtained for binary phrase chunking, except for a single specific setup. Hence, caution is warranted when using gaze data as signal for NLP, as no single view is robust over tasks, modeling choice and gaze corpus. D19-6408 @@ -13779,10 +13779,10 @@ Typo in Table 4 fixed to reflect correct recall of presented system. Proceedings of the Fourth Workshop on Discourse in Machine Translation (DiscoMT 2019) D19-65 - AndreiPopescu-Belis + AndreiPopescu-Belis SharidLoáiciga ChristianHardmeier - DeyiXiong + DeyiXiong Association for Computational Linguistics
Hong Kong, China
November @@ -13797,7 +13797,7 @@ Typo in Table 4 fixed to reflect correct recall of presented system. Analysing Coreference in Transformer Outputs EkaterinaLapshinova-Koltunski CristinaEspaña-Bonet - Josefvan Genabith + Josefvan Genabith 1–12 We analyse coreference phenomena in three neural machine translation systems trained with different data settings with or without access to explicit intra- and cross-sentential anaphoric information. We compare system performance on two different genres: news and TED talks. To do this, we manually annotate (the possibly incorrect) coreference chains in the MT outputs and evaluate the coreference chain translations. We define an error typology that aims to go further than pronoun translation adequacy and includes types such as incorrect word selection or missing words. The features of coreference chains in automatic translations are also compared to those of the source texts and human translations. The analysis shows stronger potential translationese effects in machine translated outputs than in human translations. D19-6501 @@ -13806,7 +13806,7 @@ Typo in Table 4 fixed to reflect correct recall of presented system.
Context-Aware Neural Machine Translation Decoding - EvaMartínez Garcia + EvaMartínez Garcia CarlesCreus CristinaEspaña-Bonet 13–23 @@ -13819,7 +13819,7 @@ Typo in Table 4 fixed to reflect correct recall of presented system. When and Why is Document-level Context Useful in Neural Machine Translation? YunsuKim Duc ThanhTran - HermannNey + HermannNey 24–34 Document-level context has received lots of attention for compensating neural machine translation (NMT) of isolated sentences. However, recent advances in document-level NMT focus on sophisticated integration of the context, explaining its improvement with only a few selected examples or targeted test sets. We extensively quantify the causes of improvements by a document-level model in general test sets, clarifying the limit of the usefulness of document-level context in NMT. We show that most of the improvements are not interpretable as utilizing the context. We also show that a minimal encoding is sufficient for the context modeling and very long context is not helpful for NMT. D19-6503 @@ -13842,7 +13842,7 @@ Typo in Table 4 fixed to reflect correct recall of presented system. TakumiOhtani HidetakaKamigaito MasaakiNagata - ManabuOkumura + ManabuOkumura 45–50 We present neural machine translation models for translating a sentence in a text by using a graph-based encoder which can consider coreference relations provided within the text explicitly. The graph-based encoder can dynamically encode the source text without attending to all tokens in the text. In experiments, our proposed models provide statistically significant improvement to the previous approach of at most 0.9 points in the BLEU score on the OpenSubtitle2018 English-to-Japanese data set. Experimental results also show that the graph-based encoder can handle a longer text well, compared with the previous approach. D19-6505 @@ -13852,7 +13852,7 @@ Typo in Table 4 fixed to reflect correct recall of presented system. Analysing concatenation approaches to document-level <fixed-case>NMT</fixed-case> in two different domains YvesScherrer - JörgTiedemann + JörgTiedemann SharidLoáiciga 51–61 In this paper, we investigate how different aspects of discourse context affect the performance of recent neural MT systems. We describe two popular datasets covering news and movie subtitles and we provide a thorough analysis of the distribution of various document-level features in their domains. Furthermore, we train a set of context-aware MT models on both datasets and propose a comparative evaluation scheme that contrasts coherent context with artificially scrambled documents and absent context, arguing that the impact of discourse-aware MT models will become visible in this way. Our results show that the models are indeed affected by the manipulation of the test data, providing a different view on document-level translation quality than absolute sentence-level scores. @@ -13909,7 +13909,7 @@ Typo in Table 4 fixed to reflect correct recall of presented system. WeiFang MoinNadeem MitraMohtarami - JamesGlass + JamesGlass 13–19 We present a multi-task learning model that leverages large amount of textual information from existing datasets to improve stance prediction. In particular, we utilize multiple NLP tasks under both unsupervised and supervised settings for the target stance prediction task. Our model obtains state-of-the-art performance on a public benchmark dataset, Fake News Challenge, outperforming current approaches by a wide margin. D19-6603 @@ -14037,7 +14037,7 @@ Typo in Table 4 fixed to reflect correct recall of presented system. <fixed-case>FEVER</fixed-case> Breaker’s Run of Team <fixed-case>N</fixed-case>b<fixed-case>A</fixed-case>uz<fixed-case>D</fixed-case>r<fixed-case>L</fixed-case>qg YoungwooKim - JamesAllan + JamesAllan 99–104 We describe our submission for the Breaker phase of the second Fact Extraction and VERification (FEVER) Shared Task. Our adversarial data can be explained by two perspectives. First, we aimed at testing model’s ability to retrieve evidence, when appropriate query terms could not be easily generated from the claim. Second, we test model’s ability to precisely understand the implications of the texts, which we expect to be rare in FEVER 1.0 dataset. Overall, we suggested six types of adversarial attacks. The evaluation on the submitted systems showed that the systems were only able get both the evidence and label correct in 20% of the data. We also demonstrate our adversarial run analysis in the data development process. D19-6615 @@ -14047,7 +14047,7 @@ Typo in Table 4 fixed to reflect correct recall of presented system. Team <fixed-case>DOMLIN</fixed-case>: Exploiting Evidence Enhancement for the <fixed-case>FEVER</fixed-case> Shared Task DominikStammbach - GuenterNeumann + GuenterNeumann 105–109 This paper contains our system description for the second Fact Extraction and VERification (FEVER) challenge. We propose a two-staged sentence selection strategy to account for examples in the dataset where evidence is not only conditioned on the claim, but also on previously retrieved evidence. We use a publicly available document retrieval module and have fine-tuned BERT checkpoints for sentence se- lection and as the entailment classifier. We report a FEVER score of 68.46% on the blind testset. D19-6616 @@ -14058,8 +14058,8 @@ Typo in Table 4 fixed to reflect correct recall of presented system. Team <fixed-case>GPLSI</fixed-case>. Approach for automated fact checking AiméeAlonso-Reina RobiertSepúlveda-Torres - EstelaSaquete - ManuelPalomar + EstelaSaquete + ManuelPalomar 110–114 Fever Shared 2.0 Task is a challenge meant for developing automated fact checking systems. Our approach for the Fever 2.0 is based on a previous proposal developed by Team Athene UKP TU Darmstadt. Our proposal modifies the sentence retrieval phase, using statement extraction and representation in the form of triplets (subject, object, action). Triplets are extracted from the claim and compare to triplets extracted from Wikipedia articles using semantic similarity. Our results are satisfactory but there is room for improvement. D19-6617 diff --git a/data/xml/E03.xml b/data/xml/E03.xml index 9ab4b76168..6399a151f6 100644 --- a/data/xml/E03.xml +++ b/data/xml/E03.xml @@ -4,7 +4,7 @@ 10th Conference of the European Chapter of the Association for Computational Linguistics AnnCopestake - JanHajič + JanHajič Association for Computational Linguistics
Budapest, Hungary
2003 @@ -16,13 +16,13 @@ Multilingual Access to Large Spoken Archives (Invited talk) - DougOard + DougOard E03-1001 oard-2003-multilingual Neural Network Probability Estimation for Broad Coverage Parsing - JamesHenderson + JamesHenderson E03-1002 henderson-2003-neural @@ -35,9 +35,9 @@
<fixed-case>C</fixed-case>zech-<fixed-case>E</fixed-case>nglish Dependency Tree-based Machine Translation - MartinČmejrek - JanCuřín - JiříHavelka + MartinČmejrek + JanCuřín + JiříHavelka E03-1004 cmejrek-etal-2003-czech @@ -51,21 +51,21 @@ <fixed-case>F</fixed-case>rench Amalgam: a quick adaptation of a sentence realization system to <fixed-case>F</fixed-case>rench MartineSmets MichaelGamon - SimonCorston-Oliver - EricRingger + SimonCorston-Oliver + EricRingger E03-1006 smets-etal-2003-french
Using <fixed-case>POS</fixed-case> Information for <fixed-case>SMT</fixed-case> into Morphologically Rich Languages NicolaUeffing - HermannNey + HermannNey E03-1007 ueffing-ney-2003-using Bootstrapping statistical parsers from small datasets - MarkSteedman + MarkSteedman MilesOsborne AnoopSarkar StephenClark @@ -96,21 +96,21 @@ Mining Web Sites Using Unsupervised Adaptive Information Extraction AlexieiDingli - FabioCiravegna + FabioCiravegna DavidGuthrie - YorickWilks + YorickWilks E03-1011 dingli-etal-2003-mining Annotated <fixed-case>H</fixed-case>ungarian National Corpus - ZoltánAlexin + ZoltánAlexin JánosCsirik - TiborGyimóthy + TiborGyimóthy KárolyBibok CsabaHatvani - GáborPrószéky - LászlóTihanyi + GáborPrószéky + LászlóTihanyi E03-1012 alexin-etal-2003-annotated @@ -124,8 +124,8 @@ <fixed-case>A</fixed-case>rabic Syntactic Trees: from Constituency to Dependency - ZdenekZabokrtsky - OtakarSmrz + ZdenekZabokrtsky + OtakarSmrz E03-1014 zabokrtsky-smrz-2003-arabic @@ -150,7 +150,7 @@ poibeau-etal-2003-multilingual
- Serge A.Yablonsky + Serge A.Yablonsky The Corpora Management System Based on <fixed-case>J</fixed-case>ava and Oracle Technologies E03-1016 yablonsky-2003-corpora @@ -164,7 +164,7 @@ Beyond Lexical Units: Enriching <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>ets with Phrasets LuisaBentivogli - EmanuelePianta + EmanuelePianta E03-1018 bentivogli-pianta-2003-beyond @@ -183,7 +183,7 @@ Summarizing Neonatal Time Series Data - Somayajulu G.Sripada + Somayajulu G.Sripada EhudReiter JimHunter JinYu @@ -194,7 +194,7 @@ Creating a multilingual collocations dictionary from large text corpora LukaNerima VioletaSeretan - EricWehrli + EricWehrli E03-1022 nerima-etal-2003-creating @@ -224,7 +224,7 @@
Combining Clues for Word Alignment - JörgTiedemann + JörgTiedemann E03-1026 tiedemann-2003-combining @@ -244,8 +244,8 @@ Automatic Construction of Machine Translation Knowledge Using Translation Literalness KenjiImamura - EiichiroSumita - YujiMatsumoto + EiichiroSumita + YujiMatsumoto E03-1029 imamura-etal-2003-automatic @@ -264,15 +264,15 @@
Efficient Search for Interactive Statistical Machine Translation - Franz JosefOch + Franz JosefOch RichardZens - HermannNey + HermannNey E03-1032 och-etal-2003-efficient Rigid grammars in the Associative-Commutative <fixed-case>L</fixed-case>ambek Calculus are not learnable - Christophe CostaFlorencio + Christophe CostaFlorencio E03-1033 florencio-2003-rigid @@ -285,28 +285,28 @@
Learning Translations of Named-Entity Phrases from Parallel Corpora - Robert C.Moore + Robert C.Moore E03-1035 moore-2003-learning Multi-<fixed-case>M</fixed-case>odal <fixed-case>C</fixed-case>ombinatory <fixed-case>C</fixed-case>ategorial <fixed-case>G</fixed-case>rammar JasonBaldridge - Geert-Jan M.Kruijff + Geert-Jan M.Kruijff E03-1036 baldridge-kruijff-2003-multi Experiments on the Choice of Features for Learning Verb Classes - SabineSchulte im Walde + SabineSchulte im Walde E03-1037 schulte-im-walde-2003-experiments Named Entity Recognition For <fixed-case>C</fixed-case>atalan Using Only <fixed-case>S</fixed-case>panish Resources and Unlabelled Data XavierCarreras - LluísMàrquez - LluísPadró + LluísMàrquez + LluísPadró E03-1038 carreras-etal-2003-named @@ -340,8 +340,8 @@
Contents and evaluation of the first <fixed-case>S</fixed-case>lovenian-<fixed-case>G</fixed-case>erman online dictionary - BirteLönneker - PrimozJakopin + BirteLönneker + PrimozJakopin E03-1043 lonneker-jakopin-2003-contents @@ -367,19 +367,19 @@ Lexicalized Grammar Acquisition YusukeMiyao TakashiNinomiya - Jun’ichiTsujii + Jun’ichiTsujii E03-1047 miyao-etal-2003-lexicalized
A corpus-centered approach to spoken language translation - EiichiroSumita + EiichiroSumita YasuhiroAkiba TakaoDoi AndrewFinch KenjiImamura - MichaelPaul - MitsuoShimohata + MichaelPaul + MitsuoShimohata TaroWatanabe E03-1048 sumita-etal-2003-corpus @@ -392,7 +392,7 @@ Using Noisy Biligual Data for Statistical Machine Translation - StephanVogel + StephanVogel E03-1050 vogel-2003-using @@ -400,7 +400,7 @@ Learning <fixed-case>PP</fixed-case> attachment for filtering prosodic phrasing Olgavan Herwijnen JacquesTerken - Antalvan den Bosch + Antalvan den Bosch ErwinMarsi E03-1051 van-herwijnen-etal-2003-learning @@ -408,7 +408,7 @@ Constraint Based Integration of Deep and Shallow Parsing Techniques MichaelDaum - Kilian A.Foth + Kilian A.Foth WolfgangMenzel E03-1052 daum-etal-2003-constraint @@ -417,14 +417,14 @@ Language Independent Authorship Attribution with Character Level N-Grams FuchunPeng DaleSchuurmans - VladoKeselj + VladoKeselj ShaojunWang E03-1053 peng-etal-2003-language Information Structure in Topological Dependency Grammar - Geert-Jan M.Kruijff + Geert-Jan M.Kruijff DenysDuchier E03-1054 kruijff-duchier-2003-information @@ -433,8 +433,8 @@ Comparison of Alignment Templates and Maximum Entropy Models for <fixed-case>NLP</fixed-case> OliverBender KlausMacherey - Franz JosefOch - HermannNey + Franz JosefOch + HermannNey E03-1055 bender-etal-2003-comparison @@ -451,9 +451,9 @@
Producing Contextually Appropriate Intonation is an Information-State Based Dialogue System - IvanaKruijff-Korbayova + IvanaKruijff-Korbayova StinaEricsson - Kepa J.Rodríguez + Kepa J.Rodríguez ElenaKaragjosova E03-1057 kruijff-korbayova-etal-2003-producing @@ -474,8 +474,8 @@ <fixed-case>WEBCOOP</fixed-case>: A Cooperative Question Answering System on the Web - FarahBenamara - PatrickSaint Dizier + FarahBenamara + PatrickSaint Dizier E03-1060 benamara-saint-dizier-2003-webcoop @@ -483,7 +483,7 @@ Automatic Acquisition of Script Knowledge from a Text Collection ToshiakiFujiki HidetsuguNanba - ManabuOkumura + ManabuOkumura E03-1061 fujiki-etal-2003-automatic
@@ -495,15 +495,15 @@
The Corpora Management System Based on <fixed-case>J</fixed-case>ava and Oracle Technologies - Serge A.Yablonsky + Serge A.Yablonsky E03-1063 yablonsky-2003-corpora-management How to build a <fixed-case>QA</fixed-case> system in your back-garden: application for <fixed-case>R</fixed-case>omanian - ConstantinOrăsan - DoinaTatar - GabrielaŞerban + ConstantinOrăsan + DoinaTatar + GabrielaŞerban DanaLupsa AdrianOneţ E03-1064 @@ -513,31 +513,31 @@ <fixed-case>NLP</fixed-case> for Indexing and Retrieval of Captioned Photographs HoracioSaggion KaterinaPastra - YorickWilks + YorickWilks E03-1065 saggion-etal-2003-nlp <fixed-case>CAST</fixed-case>: A computer-aided summarisation tool - ConstantinOrasan - RuslanMitkov + ConstantinOrasan + RuslanMitkov LauraHasler E03-1066 orasan-etal-2003-cast Domain-transcending mappings in a system for metaphorical reasoning - John A.Barnden - Sheila R.Glasbey - Mark G.Lee - Alan M.Wallington + John A.Barnden + Sheila R.Glasbey + Mark G.Lee + Alan M.Wallington E03-1067 barnden-etal-2003-domain Detecting Errors in Part-of-Speech Annotation MarkusDickinson - W. DetmarMeurers + W. DetmarMeurers E03-1068 dickinson-meurers-2003-detecting @@ -552,21 +552,21 @@ <fixed-case>QUALIFIER</fixed-case>: Question Answering by Lexical Fabric and External Resources HuiYang - Tat-SengChua + Tat-SengChua E03-1070 yang-chua-2003-qualifier Investigating <fixed-case>GIS</fixed-case> and Smoothing for Maximum Entropy Taggers - James R.Curran + James R.Curran StephenClark E03-1071 curran-clark-2003-investigating The Role of Initiative in Tutorial Dialogue - Mark G.Core - Johanna D.Moore + Mark G.Core + Johanna D.Moore ClausZinn E03-1072 core-etal-2003-role @@ -586,14 +586,14 @@ Targeted Help for Spoken Dialogue Systems - Beth AnnHockey + Beth AnnHockey OliverLemon - EllenCampana + EllenCampana LauraHiatt - GregoryAist - JamesHieronymus + GregoryAist + JamesHieronymus AlexanderGruenstein - JohnDowding + JohnDowding E03-1075 hockey-etal-2003-targeted @@ -606,8 +606,8 @@
An Integrated Term-Based Corpus Query System - IrenaSpasic - GoranNenadic + IrenaSpasic + GoranNenadic KostasManios SophiaAnaniadou E03-1077 @@ -615,8 +615,8 @@ Transparent combination of rule-based and data-driven approaches in speech understanding - MannyRayner - Beth AnnHockey + MannyRayner + Beth AnnHockey E03-1078 rayner-hockey-2003-transparent @@ -635,8 +635,8 @@
Categorial Fluidity in <fixed-case>C</fixed-case>hinese and its Implications for Part-of-speech Tagging - Oi YeeKwong - Benjamin K.Tsou + Oi YeeKwong + Benjamin K.Tsou E03-1081 kwong-tsou-2003-categorial @@ -664,7 +664,7 @@ Creating a multilingual collocations dictionary from large text corpora LukaNerima VioletaSeretan - EricWehrli + EricWehrli E03-1083 nerima-etal-2003-creating-multilingual
@@ -679,9 +679,9 @@ <fixed-case>PEAS</fixed-case>, the first instantiation of a comparative framework for evaluating parsers of <fixed-case>F</fixed-case>rench VéroniqueGendner GabrielIllouz - MichèleJardino + MichèleJardino LauraMonceaux - PatrickParoubek + PatrickParoubek IsabelleRobba AnneVilnat E03-1085 @@ -719,7 +719,7 @@ Talking through Procedures: An Intelligent Space Station Procedure Assistant - GregAist + GregAist J.Dowding B. A.Hockey M.Rayner @@ -754,16 +754,16 @@ A dialogue system with contextually appropriate spoken output intonation - IvanaKruijff-Korbayova + IvanaKruijff-Korbayova ElenaKaragjosova - Kepa JosebaRodriguez + Kepa JosebaRodriguez StinaEricsson E03-2004 kruijff-korbayova-etal-2003-dialogue <fixed-case>WYSIWYM</fixed-case> - building user interfaces with natural language feedback - RogerEvans + RogerEvans RichardPower E03-2005 evans-power-2003-wysiwym @@ -778,7 +778,7 @@ <fixed-case>WASPBENCH</fixed-case>: a lexicographer’s workbench incorporating state-of-the-art word sense disambiguation AdamKilgarriff - RogerEvans + RogerEvans RobKoeling MichaelRundell DavidTugwell @@ -794,25 +794,25 @@ Multilingual adaptations of a reusable information extraction tool DianaMaynard - HamishCunningham - KalinaBontcheva + HamishCunningham + KalinaBontcheva E03-2009 maynard-etal-2003-multilingual An Open-Source Environment for Compiling Typed Unification Grammars into Speech Recognisers - MannyRayner - Beth AnnHockey - JohnDowding + MannyRayner + Beth AnnHockey + JohnDowding E03-2010 rayner-etal-2003-open <fixed-case>AGORA</fixed-case>. Multilingual Multiplatform Architecture for the development of Natural Language Voice Services - JoseRelaño-Gil - Mari CarmenRodriguez-Gancedo + JoseRelaño-Gil + Mari CarmenRodriguez-Gancedo LuisVillarrubia - Luis HernándezGomez + Luis HernándezGomez E03-2011 relano-gil-etal-2003-agora @@ -827,8 +827,8 @@ Robust Generic and Query-based Summarization HoracioSaggion - KalinaBontcheva - HamishCunningham + KalinaBontcheva + HamishCunningham E03-2013 saggion-etal-2003-robust @@ -836,22 +836,22 @@ Event-Coreference across Multiple, Multi-lingual Sources in the Mumis Project HoracioSaggion JanKuper - HamishCunningham + HamishCunningham ThierryDeclerck - PeterWittenburg + PeterWittenburg MarcoPuts - EduardHoenkamp + EduardHoenkamp Franciskade Jong - YorickWilks + YorickWilks E03-2014 saggion-etal-2003-event Development of Corpora within the <fixed-case>CL</fixed-case>a<fixed-case>RK</fixed-case> System: The <fixed-case>B</fixed-case>ul<fixed-case>T</fixed-case>ree<fixed-case>B</fixed-case>ank Project Experience - KirilSimov + KirilSimov AlexanderSimov MilenKouylekov - KrasimiraIvanova + KrasimiraIvanova IlkoGrigorov HristoGanev E03-2015 @@ -859,7 +859,7 @@ Integrating Natural Language Generation with <fixed-case>XML</fixed-case> Web Technology - GrahamWilcock + GrahamWilcock E03-2016 wilcock-2003-integrating @@ -887,15 +887,15 @@ Cohesion and coherence for Automatic Summarization - LauraAlonso i Alemany - MariaFuentes Fort + LauraAlonso i Alemany + MariaFuentes Fort E03-3002 alonso-i-alemany-fuentes-fort-2003-cohesion Clustering Adjectives for Class Discovery - GemmaBoleda Torrent - LauraAlonso i Alemany + GemmaBoleda Torrent + LauraAlonso i Alemany E03-3003 boleda-torrent-alonso-i-alemany-2003-clustering @@ -908,7 +908,7 @@
A Dynamic Logic Formalisation of the Dialogue Gameboard - RaquelFernández + RaquelFernández E03-3005 fernandez-2003-dynamic diff --git a/data/xml/E06.xml b/data/xml/E06.xml index 3842f51ea0..845090ae01 100644 --- a/data/xml/E06.xml +++ b/data/xml/E06.xml @@ -3,7 +3,7 @@ 11th Conference of the European Chapter of the Association for Computational Linguistics - DianaMcCarthy + DianaMcCarthy ShulyWintner Association for Computational Linguistics
Trento, Italy
@@ -24,23 +24,23 @@
Using Encyclopedic Knowledge for Named entity Disambiguation - RazvanBunescu - MariusPaşca + RazvanBunescu + MariusPaşca 9–16 E06-1002 bunescu-pasca-2006-using Weakly Supervised Approaches for Ontology Population - HristoTanev - BernardoMagnini + HristoTanev + BernardoMagnini 17–24 E06-1003 tanev-magnini-2006-weakly Computational Complexity of Statistical Machine Translation - Raghavendra UdupaU. + Raghavendra UdupaU. Hemanta K.Maji 25–32 E06-1004 @@ -50,7 +50,7 @@ Computing Consensus Translation for Multiple Machine Translation Systems Using Enhanced Hypothesis Alignment EvgenyMatusov NicolaUeffing - HermannNey + HermannNey 33–40 E06-1005 matusov-etal-2006-computing @@ -65,7 +65,7 @@ Automatic Detection of Nonreferential It in Spoken Multi-Party Dialog - ChristophMüller + ChristophMüller 49–56 E06-1007 muller-2006-automatic @@ -80,7 +80,7 @@ Information Presentation in Spoken Dialogue Systems VeraDemberg - Johanna D.Moore + Johanna D.Moore 65–72 E06-1009 demberg-moore-2006-information @@ -102,7 +102,7 @@ Statistical Dependency Parsing for <fixed-case>T</fixed-case>urkish - GülşenEryiǧit + GülşenEryiǧit KemalOflazer 89–96 E06-1012 @@ -110,7 +110,7 @@ Generalized Hebbian Algorithm for Incremental Singular Value Decomposition in Natural Language Processing - GenevieveGorrell + GenevieveGorrell 97–104 E06-1013 gorrell-2006-generalized @@ -118,7 +118,7 @@ Improving Probabilistic Latent Semantic Analysis with Principal Component Analysis AymanFarahat - FrancineChen + FrancineChen 105–112 E06-1014 farahat-chen-2006-improving @@ -132,7 +132,7 @@ Determining Word Sense Dominance Using a Thesaurus - SaifMohammad + SaifMohammad GraemeHirst 121–128 E06-1016 @@ -162,10 +162,10 @@ Improved Lexical Alignment by Combining Multiple Reified Alignments - DanTufiş + DanTufiş RaduIon - AlexandruCeauşu - DanŞtefănescu + AlexandruCeauşu + DanŞtefănescu 153–160 E06-1020 tufis-etal-2006-improved @@ -173,7 +173,7 @@ Towards Robust Context-Sensitive Sentence Alignment for Monolingual Corpora RaniNelken - Stuart M.Shieber + Stuart M.Shieber 1611–168 E06-1021 nelken-shieber-2006-towards @@ -198,7 +198,7 @@ Keeping the Initiative: An Empirically-Motivated Approach to Predicting User-Initiated Dialogue Contribution in <fixed-case>HCI</fixed-case> KerstinFischer - John A.Bateman + John A.Bateman 185–192 E06-1024 fischer-bateman-2006-keeping @@ -215,7 +215,7 @@ Latent Variable Models for Semantic Orientations of Phrases HiroyaTakamura TakashiInui - ManabuOkumura + ManabuOkumura 201–208 E06-1026 takamura-etal-2006-latent @@ -231,7 +231,7 @@ A Figure of Merit for the Evaluation of Web-Corpus Randomness MassimilianoCiaramita - MarcoBaroni + MarcoBaroni 217–224 E06-1028 ciaramita-baroni-2006-figure @@ -241,7 +241,7 @@ XavierRobitaille YasuhiroSasaki MasatsuguTonoike - SatoshiSato + SatoshiSato TakehitoUtsuro 225–232 E06-1029 @@ -250,7 +250,7 @@ Web Text Corpus for Natural Language Processing VinciLiu - James R.Curran + James R.Curran 233–240 E06-1030 liu-curran-2006-web @@ -259,7 +259,7 @@ <fixed-case>CDER</fixed-case>: Efficient <fixed-case>MT</fixed-case> Evaluation Using Block Movements GregorLeusch NicolaUeffing - HermannNey + HermannNey 241–248 E06-1031 leusch-etal-2006-cder @@ -277,7 +277,7 @@ Adaptive Transformation-Based Learning for Improving Dictionary Tagging BurcuKaragol-Ayan DavidDoermann - AmyWeinberg + AmyWeinberg 257–264 E06-1033 karagol-ayan-etal-2006-adaptive @@ -292,7 +292,7 @@ Automatic Segmentation of Multiparty Dialogue Pei-YunHsueh - Johanna D.Moore + Johanna D.Moore SteveRenals 273–280 E06-1035 @@ -308,8 +308,8 @@ Using Reinforcement Learning to Build a Better Model of Dialogue State - Joel R.Tetreault - Diane J.Litman + Joel R.Tetreault + Diane J.Litman 289–296 E06-1037 tetreault-litman-2006-using @@ -324,7 +324,7 @@ Multi-Document Summarization of Evaluative Text GiuseppeCarenini - RaymondNg + RaymondNg AdamPauls 305–312 E06-1039 @@ -332,7 +332,7 @@ Comparing Automatic and Human Evaluation of <fixed-case>NLG</fixed-case> Systems - AnjaBelz + AnjaBelz EhudReiter 313–320 E06-1040 @@ -363,8 +363,8 @@ Modelling Semantic Role Pausibility in Human Sentence Processing - UlrikePadó - MatthewCrocker + UlrikePadó + MatthewCrocker FrankKeller 345–352 E06-1044 @@ -372,16 +372,16 @@ Data-Driven Generation of Emphatic Facial Displays - Mary EllenFoster - JonOberlander + Mary EllenFoster + JonOberlander 353–360 E06-1045 foster-oberlander-2006-data Edit Machines for Robust Multimodal Language Processing - SrinivasBangalore - MichaelJohnston + SrinivasBangalore + MichaelJohnston 361–368 E06-1046 bangalore-johnston-2006-edit @@ -389,9 +389,9 @@ Parsing <fixed-case>A</fixed-case>rabic Dialects DavidChiang - MonaDiab + MonaDiab NizarHabash - OwenRambow + OwenRambow SafiullahShareef 369–376 E06-1047 @@ -399,7 +399,7 @@ Unifying Synchronous <fixed-case>T</fixed-case>ree <fixed-case>A</fixed-case>djoining <fixed-case>G</fixed-case>rammars and Tree Transducers via Bimorphisms - Stuart M.Shieber + Stuart M.Shieber 377–384 E06-1048 shieber-2006-unifying @@ -423,20 +423,20 @@ Exploiting Shallow Linguistic Information for Relation Extraction from Biomedical Literature - ClaudioGiuliano - AlbertoLavelli - LorenzaRomano + ClaudioGiuliano + AlbertoLavelli + LorenzaRomano 401–408 E06-1051 giuliano-etal-2006-exploiting Investigating a Generic Paraphrase-Based Approach for Relation Extraction - LorenzaRomano + LorenzaRomano MilenKouylekov IdanSzpektor IdoDagan - AlbertoLavelli + AlbertoLavelli 409–416 E06-1052 romano-etal-2006-investigating @@ -454,7 +454,7 @@ Large Linguistically-Processed Web Corpora for Multiple Languages - MarcoBaroni + MarcoBaroni AdamKilgarriff 87–90 E06-2001 @@ -490,8 +490,8 @@ <fixed-case>XMG</fixed-case> - An Expressive Formalism for Describing Tree-Based Grammars YannickParmentier - Joseph LeRoux - BenoîtCrabbé + Joseph LeRoux + BenoîtCrabbé 103–106 E06-2005 parmentier-etal-2006-xmg @@ -523,7 +523,7 @@ An <fixed-case>ISU</fixed-case> Dialogue System Exhibiting Reinforcement Learning of Dialogue Policies: Generic Slot-Filling in the <fixed-case>TALK</fixed-case> In-car System OliverLemon KallirroiGeorgila - JamesHenderson + JamesHenderson MatthewStuttle 119–122 E06-2009 @@ -533,11 +533,11 @@ Generating and Visualizing a Soccer Knowledge Base PaulBuitelaar ThomasEigner - GregGul-rajani + GregGul-rajani AlexanderSchutz MelanieSiegel NicolasWeber - PhilippCimiano + PhilippCimiano GünterLadwig MatthiasMantel HonggangZhu @@ -547,7 +547,7 @@ <fixed-case>E</fixed-case>sfinge — a Question Answering System in the Web using the Web - Luís FernandoCosta + Luís FernandoCosta 127–130 E06-2011 costa-2006-esfinge @@ -557,12 +557,12 @@ ConradChang LisaFerro JohnGibson - JanetHitzeman + JanetHitzeman SuziLubar JustinPalmer SeanMunson - MarcVilain - BenjaminWellner + MarcVilain + BenjaminWellner 131–134 E06-2012 chang-etal-2006-maytag @@ -581,14 +581,14 @@ BogdanBabych PaulRayson OlgaMudraya - ScottPiao + ScottPiao 139–142 E06-2014 sharoff-etal-2006-assist Semantic Role Labeling for Coreference Resolution - Simone PaoloPonzetto + Simone PaoloPonzetto MichaelStrube 143–146 E06-2015 @@ -596,15 +596,15 @@ The <fixed-case>GOD</fixed-case> model - Alfio MassimilianoGliozzo + Alfio MassimilianoGliozzo 147–150 E06-2016 gliozzo-2006-god Computing Term Translation Probabilities with Generalized Latent Semantic Analysis - IrinaMatveeva - Gina-AnneLevow + IrinaMatveeva + Gina-AnneLevow 151–154 E06-2017 matveeva-levow-2006-computing @@ -618,7 +618,7 @@ Classifying Biological Full-Text Articles for Multi-Database Curation - Wen-JuanHou + Wen-JuanHou ChihLee Hsin-HsiChen 159–162 @@ -628,9 +628,9 @@ Generating Spatio-Temporal Descriptions in Pollen Forecasts RossTurner - SomayajuluSripada + SomayajuluSripada EhudReiter - Ian PDavy + Ian PDavy 163–166 E06-2020 turner-etal-2006-generating @@ -659,8 +659,8 @@ A Suite of Shallow Processing Tools for <fixed-case>P</fixed-case>ortuguese: <fixed-case>LX</fixed-case>-Suite - AntónioBranco - João RicardoSilva + AntónioBranco + João RicardoSilva 179–182 E06-2024 branco-silva-2006-suite @@ -688,7 +688,7 @@ <fixed-case>B</fixed-case>ayesian Network, a Model for <fixed-case>NLP</fixed-case>? - DavyWeissenbacher + DavyWeissenbacher 195–198 E06-2028 weissenbacher-2006-bayesian @@ -704,9 +704,9 @@ Developments in Affect Detection in <fixed-case>E</fixed-case>-drama LiZhang - John A.Barnden - Robert J.Hendley - Alan M.Wallington + John A.Barnden + Robert J.Hendley + Alan M.Wallington 203–206 E06-2030 zhang-etal-2006-developments @@ -715,7 +715,7 @@ Why Are They Excited? Identifying and Explaining Spikes in Blog Mood Levels KrisztianBalog GiladMishne - Maartende Rijke + Maartende Rijke 207–210 E06-2031 balog-etal-2006-excited @@ -782,7 +782,7 @@ Towards Robust <fixed-case>A</fixed-case>nimacy Classification Using Morphosyntactic Distributional Features - LiljaØvrelid + LiljaØvrelid 47–54 E06-3008 ovrelid-2006-towards diff --git a/data/xml/E09.xml b/data/xml/E09.xml index 2bf60f8324..dee778e71e 100644 --- a/data/xml/E09.xml +++ b/data/xml/E09.xml @@ -43,23 +43,23 @@ Contextual Phrase-Level Polarity Analysis Using Lexical Affect Scoring and Syntactic <fixed-case>N</fixed-case>-Grams ApoorvAgarwal FadiBiadsy - Kathleen R.McKeown + Kathleen R.McKeown 24–32 E09-1004 agarwal-etal-2009-contextual Personalizing <fixed-case>P</fixed-case>age<fixed-case>R</fixed-case>ank for Word Sense Disambiguation - EnekoAgirre - AitorSoroa + EnekoAgirre + AitorSoroa 33–41 E09-1005 agirre-soroa-2009-personalizing Supervised Domain Adaption for <fixed-case>WSD</fixed-case> - EnekoAgirre - OierLopez de Lacalle + EnekoAgirre + OierLopez de Lacalle 42–50 E09-1006 agirre-lopez-de-lacalle-2009-supervised @@ -99,15 +99,15 @@ Syntactic Phrase Reordering for <fixed-case>E</fixed-case>nglish-to-<fixed-case>A</fixed-case>rabic Statistical Machine Translation IbrahimBadr RabihZbib - JamesGlass + JamesGlass 86–93 E09-1011 badr-etal-2009-syntactic Incremental Parsing Models for Dialog Task Structure - SrinivasBangalore - AmandaStent + SrinivasBangalore + AmandaStent 94–102 E09-1012 bangalore-stent-2009-incremental @@ -132,7 +132,7 @@ Large-Coverage Root Lexicon Extraction for <fixed-case>H</fixed-case>indi Cohan SujayCarlos MonojitChoudhury - SandipanDandapat + SandipanDandapat 121–129 E09-1015 carlos-etal-2009-large @@ -171,7 +171,7 @@ An Alignment Algorithm Using Belief Propagation and a Structure-Based Distortion Model - FabienCromières + FabienCromières SadaoKurohashi 166–174 E09-1020 @@ -211,7 +211,7 @@ Inference Rules and their Application to Recognizing Textual Entailment - GeorgianaDinu + GeorgianaDinu RuiWang 211–219 E09-1025 @@ -228,7 +228,7 @@ Cognitively Motivated Features for Readability Assessment LijunFeng - NoémieElhadad + NoémieElhadad MattHuenerfauth 229–237 E09-1027 @@ -237,7 +237,7 @@ Effects of Word Confusion Networks on Voice Search JunlanFeng - SrinivasBangalore + SrinivasBangalore 238–245 E09-1028 feng-bangalore-2009-effects @@ -255,8 +255,8 @@ Reconstructing False Start Errors in Spontaneous Speech Text ErinFitzgerald - KeithHall - FrederickJelinek + KeithHall + FrederickJelinek 255–263 E09-1030 fitzgerald-etal-2009-reconstructing @@ -272,9 +272,9 @@ Who is “You”? Combining Linguistic and Gaze Features to Resolve Second-Person References in Dialogue MatthewFrampton - RaquelFernández + RaquelFernández PatrickEhlen - MarioChristoudias + MarioChristoudias TrevorDarrell StanleyPeters 273–281 @@ -283,9 +283,9 @@ Rich Bitext Projection Features for Parse Reranking - AlexanderFraser + AlexanderFraser RenjingWang - HinrichSchütze + HinrichSchütze 282–290 E09-1033 fraser-etal-2009-rich @@ -293,8 +293,8 @@ Parsing Mildly Non-Projective Dependency Structures CarlosGómez-Rodríguez - DavidWeir - JohnCarroll + DavidWeir + JohnCarroll 291–299 E09-1034 gomez-rodriguez-etal-2009-parsing @@ -317,7 +317,7 @@ Cube Summing, Approximate Inference with Non-Local Features, and Dynamic Programming without Semirings KevinGimpel - Noah A.Smith + Noah A.Smith 318–326 E09-1037 gimpel-smith-2009-cube @@ -334,7 +334,7 @@ Person Identification from Text and Speech Genre Samples - JadeGoldstein-Stewart + JadeGoldstein-Stewart RansomWinder RobertaSabin 336–344 @@ -343,13 +343,13 @@ End-to-End Evaluation in Simultaneous Translation - OlivierHamon + OlivierHamon ChristianFügen - DjamelMostefa + DjamelMostefa VictoriaArranz MuntsinKolss - AlexWaibel - KhalidChoukri + AlexWaibel + KhalidChoukri 345–353 E09-1040 hamon-etal-2009-end @@ -382,8 +382,8 @@ Rule Filtering by Pattern for Efficient Hierarchical Translation GonzaloIglesias - Adriàde Gispert - Eduardo R.Banga + Adriàde Gispert + Eduardo R.Banga WilliamByrne 380–388 E09-1044 @@ -391,9 +391,9 @@ An Empirical Study on Class-Based Word Sense Disambiguation - RubénIzquierdo - ArmandoSuárez - GermanRigau + RubénIzquierdo + ArmandoSuárez + GermanRigau 389–397 E09-1045 izquierdo-etal-2009-empirical @@ -408,8 +408,8 @@ Parsing Coordinations - SandraKübler - ErhardHinrichs + SandraKübler + ErhardHinrichs WolfgangMaier EvaKlett 406–414 @@ -427,7 +427,7 @@ <fixed-case>N</fixed-case>-Gram-Based Statistical Machine Translation versus Syntax Augmented Machine Translation: Comparison and System Combination MaximKhalilov - José A. R.Fonollosa + José A. R.Fonollosa 424–432 E09-1049 khalilov-fonollosa-2009-n @@ -466,7 +466,7 @@ Lattice Parsing to Integrate Speech Recognition and Rule-Based Machine Translation - SelçukKöprü + SelçukKöprü AdnanYazıcı 469–477 E09-1054 @@ -482,9 +482,9 @@ Improvements in Analogical Learning: Application to Translating Multi-Terms of the Medical Domain - PhilippeLanglais + PhilippeLanglais FrançoisYvon - PierreZweigenbaum + PierreZweigenbaum 487–495 E09-1056 langlais-etal-2009-improvements @@ -493,7 +493,7 @@ Language-Independent Bilingual Terminology Extraction from a Multilingual Parallel Corpus ElsLefever LieveMacken - VeroniqueHoste + VeroniqueHoste 496–504 E09-1057 lefever-etal-2009-language @@ -559,7 +559,7 @@ Text-to-Text Semantic Similarity for Automatic Short Answer Grading MichaelMohler - RadaMihalcea + RadaMihalcea 567–575 E09-1065 mohler-mihalcea-2009-text @@ -601,7 +601,7 @@ Analysing <fixed-case>W</fixed-case>ikipedia and Gold-Standard Corpora for <fixed-case>NER</fixed-case> Training JoelNothman TaraMurphy - James R.Curran + James R.Curran 612–620 E09-1070 nothman-etal-2009-analysing @@ -616,14 +616,14 @@ Empirical Evaluations of <fixed-case>A</fixed-case>nimacy Annotation - LiljaØvrelid + LiljaØvrelid 630–638 E09-1072 ovrelid-2009-empirical Outclassing <fixed-case>W</fixed-case>ikipedia in Open-Domain Information Extraction: Weakly-Supervised Acquisition of Attributes over Conceptual Hierarchies - MariusPaşca + MariusPaşca 639–647 E09-1073 pasca-2009-outclassing @@ -696,7 +696,7 @@ Word Lattices for Multi-Source Translation JoshSchroeder - TrevorCohn + TrevorCohn PhilippKoehn 719–727 E09-1082 @@ -706,10 +706,10 @@ Frequency Matters: Pitch Accents and Information Status KatrinSchweitzer MichaelWalsh - BerndMöbius + BerndMöbius ArndtRiester AntjeSchweitzer - HinrichSchütze + HinrichSchütze 728–736 E09-1083 schweitzer-etal-2009-frequency @@ -743,8 +743,8 @@ Semi-Supervised Training for the Averaged Perceptron <fixed-case>POS</fixed-case> Tagger - Drahomíra “johanka”Spoustová - JanHajič + Drahomíra “johanka”Spoustová + JanHajič JanRaab MiroslavSpousta 763–771 @@ -754,7 +754,7 @@ Sequential Labeling with Latent Variables: An Exact Inference Algorithm and its Efficient Approximation XuSun - Jun’ichiTsujii + Jun’ichiTsujii 772–780 E09-1088 sun-tsujii-2009-sequential @@ -762,7 +762,7 @@ Text Summarization Model Based on Maximum Coverage Problem and its Variant HiroyaTakamura - ManabuOkumura + ManabuOkumura 781–789 E09-1089 takamura-okumura-2009-text @@ -770,7 +770,7 @@ Fast Full Parsing by Linear-Chain Conditional Random Fields YoshimasaTsuruoka - Jun’ichiTsujii + Jun’ichiTsujii SophiaAnaniadou 790–798 E09-1090 @@ -778,9 +778,9 @@ <fixed-case>MINT</fixed-case>: A Method for Effective and Scalable Mining of Named Entity Transliterations from Large Comparable Corpora - RaghavendraUdupa - KSaravanan - AKumaran + RaghavendraUdupa + KSaravanan + AKumaran JagadeeshJagarlamudi 799–807 E09-1091 @@ -790,14 +790,14 @@ Deriving Generalized Knowledge from Corpora Using <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et Abstraction BenjaminVan Durme PhillipMichalak - LenhartSchubert + LenhartSchubert 808–816 E09-1092 van-durme-etal-2009-deriving Learning Efficient Parsing - Gertjanvan Noord + Gertjanvan Noord 817–825 E09-1093 van-noord-2009-learning @@ -806,7 +806,7 @@ A Robust and Extensible Exemplar-Based Model of Thematic Fit BramVandekerckhove DominiekSandra - WalterDaelemans + WalterDaelemans 826–834 E09-1094 vandekerckhove-etal-2009-robust @@ -822,8 +822,8 @@ Feature-Based Method for Document Alignment in Comparable News Corpora - ThuyVu - Ai TiAw + ThuyVu + Ai TiAw MinZhang 843–851 E09-1096 @@ -834,7 +834,7 @@ StephenWan MarkDras RobertDale - CécileParis + CécileParis 852–860 E09-1097 wan-etal-2009-improving @@ -849,7 +849,7 @@ Language <fixed-case>ID</fixed-case> in the Context of Harvesting Language Data off the Web FeiXia - WilliamLewis + WilliamLewis HoifungPoon 870–878 E09-1099 @@ -916,7 +916,7 @@ An Open-Source Natural Language Generator for <fixed-case>OWL</fixed-case> Ontologies and its Use in Protege and Second Life - DimitriosGalanis + DimitriosGalanis GeorgeKarakatsiotis GerasimosLampouras IonAndroutsopoulos @@ -939,8 +939,8 @@ A Comparison of Clausal Coordinate Ellipsis in <fixed-case>E</fixed-case>stonian and <fixed-case>G</fixed-case>erman: Remarkably Similar Elision Rules Allow a Language-Independent Ellipsis-Generation Module KarinHarbusch - MareKoit - HaldurÕim + MareKoit + HaldurÕim 25–28 E09-2007 harbusch-etal-2009-comparison @@ -955,11 +955,11 @@ The Software Architecture for the First Challenge on Generating Instructions in Virtual Environments AlexanderKoller - DonnaByron + DonnaByron JustineCassell RobertDale - JohannaMoore - JonOberlander + JohannaMoore + JonOberlander KristinaStriegnitz 33–36 E09-2009 @@ -969,7 +969,7 @@ Adaptive Natural Language Interaction StasinosKonstantopoulos AthanasiosTegos - DimitriosBilidas + DimitriosBilidas IonAndroutsopoulos GerasimosLampouras ColinMatheson @@ -981,7 +981,7 @@ Parsing, Projecting & Prototypes: Repurposing Linguistic Data on the Web - WilliamLewis + WilliamLewis FeiXia 41–44 E09-2011 @@ -1023,7 +1023,7 @@ Three <fixed-case>B</fixed-case>io<fixed-case>NLP</fixed-case> Tools Powered by a Biological Lexicon YutakaSasaki PaulThompson - JohnMcNaught + JohnMcNaught SophiaAnaniadou 61–64 E09-2016 @@ -1032,7 +1032,7 @@ A Mobile Health and Fitness Companion Demonstrator OlovStåhl - BjörnGambäck + BjörnGambäck MarkkuTurunen JaakkoHakulinen 65–68 @@ -1080,14 +1080,14 @@ Finding Word Substitutions Using a Distributional Similarity Baseline and Immediate Context Overlap - AurelieHerbelot + AurelieHerbelot 28–36 E09-3004 herbelot-2009-finding Structural Correspondence Learning for Parse Disambiguation - BarbaraPlank + BarbaraPlank 37–45 E09-3005 plank-2009-structural @@ -1123,7 +1123,7 @@ Aligning Medical Domain Ontologies for Clinical Query Extraction - PinarWennerberg + PinarWennerberg 79–87 E09-3010 wennerberg-2009-aligning diff --git a/data/xml/E12.xml b/data/xml/E12.xml index eed5d478d3..a3fab2e8db 100644 --- a/data/xml/E12.xml +++ b/data/xml/E12.xml @@ -4,7 +4,7 @@ Proceedings of the 13th Conference of the European Chapter of the Association for Computational Linguistics E12-1 - WalterDaelemans + WalterDaelemans Association for Computational Linguistics
Avignon, France
April @@ -40,8 +40,8 @@
Entailment above the word level in distributional semantics - MarcoBaroni - RaffaellaBernardi + MarcoBaroni + RaffaellaBernardi Ngoc-QuynhDo Chung-chiehShan 23–32 @@ -50,7 +50,7 @@ Evaluating Distributional Models of Semantics for Syntactically Invariant Inference - Jackie Chi KitCheung + Jackie Chi KitCheung GeraldPenn 33–43 E12-1005 @@ -67,7 +67,7 @@ Dependency Parsing of <fixed-case>H</fixed-case>ungarian: Baseline Results and Challenges - RichárdFarkas + RichárdFarkas VeronikaVincze HelmutSchmid 55–65 @@ -93,7 +93,7 @@ Answer Sentence Retrieval by Matching Dependency Paths acquired from Question/Answer Sentence Pairs - MichaelKaisser + MichaelKaisser 88–98 E12-1010 kaisser-2012-answer @@ -117,7 +117,7 @@ Computing Lattice <fixed-case>BLEU</fixed-case> Oracle Scores for Machine Translation - ArtemSokolov + ArtemSokolov GuillaumeWisniewski FrançoisYvon 120–129 @@ -137,7 +137,7 @@ Character-Based Pivot Translation for Under-Resourced Languages and Domains - JörgTiedemann + JörgTiedemann 141–151 E12-1015 tiedemann-2012-character @@ -145,10 +145,10 @@ Does more data always yield better translations? GuillemGascó - Martha-AliciaRocha - GermánSanchis-Trilles + Martha-AliciaRocha + GermánSanchis-Trilles JesúsAndrés-Ferrer - FranciscoCasacuberta + FranciscoCasacuberta 152–161 E12-1016 gasco-etal-2012-data @@ -159,7 +159,7 @@ NathanSchneider RishavBhowmick KemalOflazer - Noah A.Smith + Noah A.Smith 162–173 E12-1017 mohit-etal-2012-recall @@ -167,7 +167,7 @@ Tree Representations in Probabilistic Models for Extended Named Entities Detection MarcoDinarelli - SophieRosset + SophieRosset 174–184 E12-1018 dinarelli-rosset-2012-tree @@ -176,9 +176,9 @@ When Did that Happen? — Linking Events and Relations to Timestamps DirkHovy JamesFan - AlfioGliozzo + AlfioGliozzo SiddharthPatwardhan - ChristopherWelty + ChristopherWelty 185–193 E12-1019 hovy-etal-2012-happen @@ -186,7 +186,7 @@ Compensating for Annotation Errors in Training a Relation Extractor BonanMin - RalphGrishman + RalphGrishman 194–203 E12-1020 min-grishman-2012-compensating @@ -194,8 +194,8 @@ Incorporating Lexical Priors into Topic Models JagadeeshJagarlamudi - HalDaumé III - RaghavendraUdupa + HalDaumé III + RaghavendraUdupa 204–213 E12-1021 jagarlamudi-etal-2012-incorporating @@ -220,18 +220,18 @@ A Probabilistic Model of Syntactic and Semantic Acquisition from Child-Directed Utterances and their Meanings TomKwiatkowski - SharonGoldwater - LukeZettlemoyer - MarkSteedman + SharonGoldwater + LukeZettlemoyer + MarkSteedman 234–244 E12-1024 kwiatkowski-etal-2012-probabilistic Active learning for interactive machine translation - JesúsGonzález-Rubio - DanielOrtiz-Martínez - FranciscoCasacuberta + JesúsGonzález-Rubio + DanielOrtiz-Martínez + FranciscoCasacuberta 245–254 E12-1025 gonzalez-rubio-etal-2012-active @@ -248,7 +248,7 @@ Aspectual Type and Temporal Relation Classification FranciscoCosta - AntónioBranco + AntónioBranco 266–275 E12-1027 costa-branco-2012-aspectual @@ -256,7 +256,7 @@ Automatic generation of short informative sentiment summaries AndreaGlaser - HinrichSchütze + HinrichSchütze 276–285 E12-1028 glaser-schutze-2012-automatic @@ -264,7 +264,7 @@ Bootstrapped Training of Event Extraction Classifiers RuihongHuang - EllenRiloff + EllenRiloff 286–295 E12-1029 huang-riloff-2012-bootstrapped @@ -272,7 +272,7 @@ Bootstrapping Events and Relations from Text TingLiu - TomekStrzalkowski + TomekStrzalkowski 296–305 E12-1030 liu-strzalkowski-2012-bootstrapping @@ -280,7 +280,7 @@ <fixed-case>CL</fixed-case>ex: A Lexicon for Exploring Color, Concept and Emotion Associations in Language SvitlanaVolkova - William B.Dolan + William B.Dolan TheresaWilson 306–314 E12-1031 @@ -307,7 +307,7 @@ BramJans StevenBethard IvanVulić - Marie FrancineMoens + Marie FrancineMoens 336–344 E12-1034 jans-etal-2012-skip @@ -346,7 +346,7 @@ <fixed-case>W</fixed-case>eb<fixed-case>CAG</fixed-case>e – A Web-Harvested Corpus Annotated with <fixed-case>G</fixed-case>erma<fixed-case>N</fixed-case>et Senses VerenaHenrich - ErhardHinrichs + ErhardHinrichs TatianaVodolazova 387–396 E12-1039 @@ -362,7 +362,7 @@ Lexical surprisal as a general predictor of reading time IreneFernandez Monsalve - Stefan L.Frank + Stefan L.Frank GabriellaVigliocco 398–408 E12-1041 @@ -381,7 +381,7 @@ Combining Tree Structures, Flat Features and Patterns for Biomedical Relation Extraction Md. Faisal MahbubChowdhury - AlbertoLavelli + AlbertoLavelli 420–429 E12-1043 chowdhury-lavelli-2012-combining @@ -390,7 +390,7 @@ Coordination Structure Analysis using Dual Decomposition AtsushiHanamoto TakuyaMatsuzaki - Jun’ichiTsujii + Jun’ichiTsujii 430–438 E12-1044 hanamoto-etal-2012-coordination @@ -406,7 +406,7 @@ Detecting Highly Confident Word Translations from Comparable Corpora without Any Prior Knowledge IvanVulić - Marie-FrancineMoens + Marie-FrancineMoens 449–459 E12-1046 vulic-moens-2012-detecting @@ -420,10 +420,10 @@ Evaluating language understanding accuracy with respect to objective outcomes in a dialogue system - Myroslava O.Dzikovska + Myroslava O.Dzikovska PeterBell AmyIsard - Johanna D.Moore + Johanna D.Moore 471–481 E12-1048 dzikovska-etal-2012-evaluating @@ -440,9 +440,9 @@ Feature-Rich Part-of-speech Tagging for Morphologically Complex Languages: Application to <fixed-case>B</fixed-case>ulgarian GeorgiGeorgiev ValentinZhikov - KirilSimov + KirilSimov PetyaOsenova - PreslavNakov + PreslavNakov 492–502 E12-1050 georgiev-etal-2012-feature @@ -450,7 +450,7 @@ Instance-Driven Attachment of Semantic Annotations over Conceptual Hierarchies JanaraChristensen - MariusPaşca + MariusPaşca 503–513 E12-1051 christensen-pasca-2012-instance @@ -467,9 +467,9 @@ Learning How to Conjugate the <fixed-case>R</fixed-case>omanian Verb. Rules for Regular and Partially Irregular Verbs - Liviu P.Dinu + Liviu P.Dinu VladNiculae - Octavia-MariaSulea + Octavia-MariaSulea 524–528 E12-1053 dinu-etal-2012-learning @@ -499,9 +499,9 @@ The effect of domain and text type on text prediction quality SuzanVerberne - Antalvan den Bosch - HelmerStrik - LouBoves + Antalvan den Bosch + HelmerStrik + LouBoves 561–569 E12-1057 verberne-etal-2012-effect @@ -531,16 +531,16 @@ Word Sense Induction for Novel Sense Detection Jey HanLau PaulCook - DianaMcCarthy + DianaMcCarthy DavidNewman - TimothyBaldwin + TimothyBaldwin 591–601 E12-1060 lau-etal-2012-word Learning Language from Perceptual Context - RaymondMooney + RaymondMooney 602 E12-1061 mooney-2012-learning @@ -548,14 +548,14 @@ Learning for Microblogs with Distant Supervision: Political Forecasting with <fixed-case>T</fixed-case>witter MicolMarchetti-Bowick - NathanaelChambers + NathanaelChambers 603–612 E12-1062 marchetti-bowick-chambers-2012-learning Learning from evolving data streams: online triage of bug reports - GrzegorzChrupala + GrzegorzChrupala 613–622 E12-1063 chrupala-2012-learning @@ -563,7 +563,7 @@ Towards a model of formal and informal address in <fixed-case>E</fixed-case>nglish ManaalFaruqui - SebastianPadó + SebastianPadó 623–633 E12-1064 faruqui-pado-2012-towards @@ -593,7 +593,7 @@ Modeling Inflection and Word-Formation in <fixed-case>SMT</fixed-case> - AlexanderFraser + AlexanderFraser MarionWeller AoifeCahill FabienneCap @@ -613,14 +613,14 @@ Framework of Semantic Role Assignment based on Extended Lexical Conceptual Structure: Comparison with <fixed-case>V</fixed-case>erb<fixed-case>N</fixed-case>et and <fixed-case>F</fixed-case>rame<fixed-case>N</fixed-case>et YuichirohMatsubayashi YusukeMiyao - AkikoAizawa + AkikoAizawa 686–695 E12-1070 matsubayashi-etal-2012-framework Unsupervised Detection of Downward-Entailing Operators By Maximizing Classification Certainty - Jackie Chi KitCheung + Jackie Chi KitCheung GeraldPenn 696–705 E12-1071 @@ -630,7 +630,7 @@ <fixed-case>E</fixed-case>lliphant: Improved Automatic Detection of Zero Subjects and Impersonal Constructions in <fixed-case>S</fixed-case>panish LuzRello RicardoBaeza-Yates - RuslanMitkov + RuslanMitkov 706–715 E12-1072 rello-etal-2012-elliphant @@ -647,7 +647,7 @@ Determining the placement of <fixed-case>G</fixed-case>erman verbs in <fixed-case>E</fixed-case>nglish–to–<fixed-case>G</fixed-case>erman <fixed-case>SMT</fixed-case> AnitaGojun - AlexanderFraser + AlexanderFraser 726–735 E12-1074 gojun-fraser-2012-determining @@ -671,9 +671,9 @@ KarlStratos XufengHan AlyssaMensch - AlexBerg - TamaraBerg - HalDaumé III + AlexBerg + TamaraBerg + HalDaumé III 747–756 E12-1076 mitchell-etal-2012-midge @@ -688,7 +688,7 @@ To what extent does sentence-internal realisation reflect discourse context? A study on word order - SinaZarrieß + SinaZarrieß AoifeCahill JonasKuhn 767–776 @@ -710,7 +710,7 @@ JohnMcDonough GahgeneGweon BhikshaRaj - CarolynPenstein Rosé + CarolynPenstein Rosé 787–797 E12-1080 jain-etal-2012-unsupervised @@ -735,7 +735,7 @@ Structural and Topical Dimensions in Multi-Task Patent Translation - KatharinaWaeschle + KatharinaWaeschle StefanRiezler 818–828 E12-1083 @@ -762,7 +762,7 @@ Proceedings of the Demonstrations at the 13th Conference of the European Chapter of the Association for Computational Linguistics E12-2 - FrédériqueSegond + FrédériqueSegond Association for Computational Linguistics
Avignon, France
April @@ -777,7 +777,7 @@ Language Resources Factory: case study on the acquisition of Translation Memories MarcPoch AntonioToral - NúriaBel + NúriaBel 1–5 E12-2001 poch-etal-2012-language @@ -785,7 +785,7 @@ Harnessing <fixed-case>NLP</fixed-case> Techniques in the Processes of Multilingual Content Management AneliaBelogay - DimanKaragyozov + DimanKaragyozov SvetlaKoeva CristinaVertan AdamPrzepiórkowski @@ -799,23 +799,23 @@ Collaborative Machine Translation Service for Scientific texts PatrikLambert JeanSenellart - LaurentRomary + LaurentRomary HolgerSchwenk FlorianZipser PatriceLopez - FrédéricBlain + FrédéricBlain 11–15 E12-2003 lambert-etal-2012-collaborative <fixed-case>T</fixed-case>rans<fixed-case>A</fixed-case>head: A Writing Assistant for <fixed-case>CAT</fixed-case> and <fixed-case>CALL</fixed-case> - Chung-chiHuang - Ping-cheYang - Mei-huaChen - Hung-tingHsieh - Ting-huiKao - Jason S.Chang + Chung-chiHuang + Ping-cheYang + Mei-huaChen + Hung-tingHsieh + Ting-huiKao + Jason S.Chang 16–19 E12-2004 huang-etal-2012-transahead @@ -826,7 +826,7 @@ HenriLeisma MonikaMachunik TuomoKakkonen - Jean-LucLeBrun + Jean-LucLeBrun 20–24 E12-2005 kinnunen-etal-2012-swan @@ -839,7 +839,7 @@ BrettCrawley StefanoBucci RalfSteinberger - ErikVan der Goot + ErikVan der Goot 25–30 E12-2006 turchi-etal-2012-onts @@ -855,7 +855,7 @@ <fixed-case>F</fixed-case>olheador: browsing through <fixed-case>P</fixed-case>ortuguese semantic relations - HugoGonçalo Oliveira + HugoGonçalo Oliveira HernaniCosta DianaSantos 35–40 @@ -866,14 +866,14 @@ A Computer Assisted Speech Transcription System AlejandroRevuelta-Martínez LuisRodríguez - IsmaelGarcía-Varea + IsmaelGarcía-Varea 41–45 E12-2009 revuelta-martinez-etal-2012-computer A Statistical Spoken Dialogue System using Complex User Goals and Value Directed Compression - Paul A.Crook + Paul A.Crook ZhuoranWang XingkunLiu OliverLemon @@ -909,7 +909,7 @@ A Support Platform for Event Detection using Social Intelligence - TimothyBaldwin + TimothyBaldwin PaulCook BoHan AaronHarwood @@ -922,7 +922,7 @@ <fixed-case>NERD</fixed-case>: A Framework for Unifying Named Entity Recognition and Disambiguation Extraction Tools GiuseppeRizzo - RaphaëlTroncy + RaphaëlTroncy 73–76 E12-2015 rizzo-troncy-2012-nerd @@ -951,7 +951,7 @@ MarcoTrevisan EduardBarbu IgorBarsanti - LucaDini + LucaDini NikolaosLagos FrédériqueSegond MathieuRhulmann @@ -965,7 +965,7 @@ ValerioBasile JohanBos KilianEvang - NoortjeVenhuizen + NoortjeVenhuizen 92–96 E12-2019 basile-etal-2012-platform @@ -985,7 +985,7 @@ GoranTopić TomokoOhta SophiaAnaniadou - Jun’ichiTsujii + Jun’ichiTsujii 102–107 E12-2021 stenetorp-etal-2012-brat @@ -1024,7 +1024,7 @@ A Comparative Study of Reinforcement Learning Techniques on Dialogue Management - AlexandrosPapangelis + AlexandrosPapangelis 22–31 E12-3003 papangelis-2012-comparative @@ -1039,7 +1039,7 @@ What’s in a Name? Entity Type Variation across Two Biomedical Subdomains ClaudiuMihăilă - Riza TheresaBatista-Navarro + Riza TheresaBatista-Navarro 38–45 E12-3005 mihaila-batista-navarro-2012-whats diff --git a/data/xml/E14.xml b/data/xml/E14.xml index f91513ac6a..3ee4a37017 100644 --- a/data/xml/E14.xml +++ b/data/xml/E14.xml @@ -5,7 +5,7 @@ Proceedings of the 14th Conference of the European Chapter of the Association for Computational Linguistics E14-1 ShulyWintner - SharonGoldwater + SharonGoldwater StefanRiezler 10.3115/v1/E14-1 Association for Computational Linguistics @@ -30,7 +30,7 @@ Undirected Machine Translation with Discriminative Reinforcement Learning AndreaGesmundo - JamesHenderson + JamesHenderson 10–19 E14-1002 10.3115/v1/E14-1002 @@ -51,7 +51,7 @@ Maximizing Component Quality in Bilingual Word-Aligned Segmentations SpyrosMartzoukos ChristofMonz - ChristopheCosta Florêncio + ChristopheCosta Florêncio 30–38 E14-1004 10.3115/v1/E14-1004 @@ -60,8 +60,8 @@ A Joint Model for Quotation Attribution and Coreference Resolution Mariana S. C.Almeida - Miguel B.Almeida - André F. T.Martins + Miguel B.Almeida + André F. T.Martins 39–48 E14-1005 10.3115/v1/E14-1005 @@ -80,9 +80,9 @@ Inducing Example-based Semantic Frames from a Massive Amount of Verb Uses DaisukeKawahara - DanielPeterson + DanielPeterson OctavianPopescu - MarthaPalmer + MarthaPalmer 58–67 E14-1007 10.3115/v1/E14-1007 @@ -110,8 +110,8 @@ Simple, Robust and (almost) Unsupervised Generation of Polarity Lexicons for Multiple Languages IñakiSan Vicente - RodrigoAgerri - GermanRigau + RodrigoAgerri + GermanRigau 88–97 E14-1010 10.3115/v1/E14-1010 @@ -131,7 +131,7 @@ A. SezaDoğruöz PhaniGadde DavidAdamson - CarolynRosé + CarolynRosé 107–115 E14-1012 10.3115/v1/E14-1012 @@ -140,7 +140,7 @@ Modelling the Lexicon in Unsupervised Part of Speech Induction GregoryDubbin - PhilBlunsom + PhilBlunsom 116–125 E14-1013 10.3115/v1/E14-1013 @@ -151,7 +151,7 @@ TejaswiniDeoskar ChristosChristodoulopoulos AlexandraBirch - MarkSteedman + MarkSteedman 126–134 E14-1014 10.3115/v1/E14-1014 @@ -160,7 +160,7 @@ Special Techniques for Constituent Parsing of Morphologically Rich Languages ZsoltSzántó - RichárdFarkas + RichárdFarkas 135–144 E14-1015 10.3115/v1/E14-1015 @@ -169,7 +169,7 @@ Leveraging Verb-Argument Structures to Infer Semantic Relations EduardoBlanco - DanMoldovan + DanMoldovan 145–154 E14-1016 10.3115/v1/E14-1016 @@ -178,7 +178,7 @@ Structured and Unstructured Cache Models for <fixed-case>SMT</fixed-case> Domain Adaptation AnnieLouis - BonnieWebber + BonnieWebber 155–163 E14-1017 10.3115/v1/E14-1017 @@ -239,7 +239,7 @@ SriramkumarBalasubramanian AnupKotalwar JiehanZheng - OwenRambow + OwenRambow 211–219 E14-1023 10.3115/v1/E14-1023 @@ -248,7 +248,7 @@ Statistical Script Learning with Multi-Argument Events KarlPichotta - RaymondMooney + RaymondMooney 220–229 E14-1024 10.3115/v1/E14-1024 @@ -266,7 +266,7 @@ Source-side Preordering for Translation using Logistic Regression and Depth-first Branch-and-Bound Search LauraJehl - Adriàde Gispert + Adriàde Gispert MarkHopkins BillByrne 239–248 @@ -285,7 +285,7 @@ Word Ordering with Phrase-Based Grammars - Adriàde Gispert + Adriàde Gispert MarcusTomalin BillByrne 259–268 @@ -296,8 +296,8 @@ Iterative Constrained Clustering for Subjectivity Word Sense Disambiguation CemAkkaya - JanyceWiebe - RadaMihalcea + JanyceWiebe + RadaMihalcea 269–278 E14-1029 10.3115/v1/E14-1029 @@ -306,7 +306,7 @@ Identifying fake <fixed-case>A</fixed-case>mazon reviews as learning from crowds TommasoFornaciari - MassimoPoesio + MassimoPoesio 279–287 E14-1030 10.3115/v1/E14-1030 @@ -315,7 +315,7 @@ Assessing the relative reading level of sentence pairs for text simplification SowmyaVajjala - DetmarMeurers + DetmarMeurers 288–297 E14-1031 10.3115/v1/E14-1031 @@ -343,7 +343,7 @@ Using idiolects and sociolects to improve word prediction WesselStoop - Antalvan den Bosch + Antalvan den Bosch 318–327 E14-1034 10.3115/v1/E14-1034 @@ -352,7 +352,7 @@ Dynamic Topic Adaptation for Phrase-based <fixed-case>MT</fixed-case> EvaHasler - PhilBlunsom + PhilBlunsom PhilippKoehn BarryHaddow 328–337 @@ -391,7 +391,7 @@ Fast Statistical Parsing with Parallel Multiple Context-Free Grammars KrasimirAngelov - PeterLjunglöf + PeterLjunglöf 368–376 E14-1039 10.3115/v1/E14-1039 @@ -400,7 +400,7 @@ Sentiment Propagation via Implicature Constraints LingjiaDeng - JanyceWiebe + JanyceWiebe 377–385 E14-1040 10.3115/v1/E14-1040 @@ -408,7 +408,7 @@ Acquisition of Noncontiguous Class Attributes from Web Search Queries - MariusPaşca + MariusPaşca 386–394 E14-1041 10.3115/v1/E14-1041 @@ -417,8 +417,8 @@ Learning from Post-Editing: Online Model Adaptation for Statistical Machine Translation MichaelDenkowski - ChrisDyer - AlonLavie + ChrisDyer + AlonLavie 395–404 E14-1042 10.3115/v1/E14-1042 @@ -428,8 +428,8 @@ Predicting and Characterising User Impact on <fixed-case>T</fixed-case>witter VasileiosLampos NikolaosAletras - DanielPreoţiuc-Pietro - TrevorCohn + DanielPreoţiuc-Pietro + TrevorCohn 405–413 E14-1043 10.3115/v1/E14-1043 @@ -458,8 +458,8 @@ Improving the Lexical Function Composition Model with Pathwise Optimized Elastic-Net Regression JimingLi - MarcoBaroni - GeorgianaDinu + MarcoBaroni + GeorgianaDinu 434–442 E14-1046 10.3115/v1/E14-1046 @@ -468,7 +468,7 @@ Is Machine Translation Getting Better over Time? YvetteGraham - TimothyBaldwin + TimothyBaldwin AlistairMoffat JustinZobel 443–451 @@ -479,7 +479,7 @@ Learning Dictionaries for Named Entity Recognition using Minimal Supervision ArvindNeelakantan - MichaelCollins + MichaelCollins 452–461 E14-1048 10.3115/v1/E14-1048 @@ -488,7 +488,7 @@ Improving Vector Space Word Representations Using Multilingual Correlation ManaalFaruqui - ChrisDyer + ChrisDyer 462–471 E14-1049 10.3115/v1/E14-1049 @@ -498,7 +498,7 @@ Using Distributional Similarity of Multi-way Translations to Predict Multiword Expression Compositionality BaharSalehi PaulCook - TimothyBaldwin + TimothyBaldwin 472–481 E14-1050 10.3115/v1/E14-1050 @@ -553,7 +553,7 @@ Machine Reading Tea Leaves: Automatically Evaluating Topic Coherence and Topic Model Quality Jey HanLau DavidNewman - TimothyBaldwin + TimothyBaldwin 530–539 E14-1056 10.3115/v1/E14-1056 @@ -563,7 +563,7 @@ What Substitutes Tell Us - Analysis of an “All-Words” Lexical Substitution Corpus GerhardKremer KatrinErk - SebastianPadó + SebastianPadó StefanThater 540–549 E14-1057 @@ -573,8 +573,8 @@ Weighted <fixed-case>K</fixed-case>rippendorff’s alpha is a more reliable metrics for multi-coders ordinal annotations: experimental studies on emotion, opinion and coreference annotation Jean-YvesAntoine - JeanneVillaneau - AnaïsLefeuvre + JeanneVillaneau + AnaïsLefeuvre 550–559 E14-1058 10.3115/v1/E14-1058 @@ -603,7 +603,7 @@ How to Produce Unseen Teddy Bears: Improved Morphological Processing of Compounds in <fixed-case>SMT</fixed-case> FabienneCap - AlexanderFraser + AlexanderFraser MarionWeller AoifeCahill 579–587 @@ -625,7 +625,7 @@ Applying the semantics of negation to <fixed-case>SMT</fixed-case> through n-best list re-ranking FedericoFancellu - BonnieWebber + BonnieWebber 598–606 E14-1063 10.3115/v1/E14-1063 @@ -644,7 +644,7 @@ Augmenting Translation Models with Simulated Acoustic Confusions for Improved Spoken Language Translation YuliaTsvetkov FlorianMetze - ChrisDyer + ChrisDyer 616–625 E14-1065 10.3115/v1/E14-1065 @@ -655,7 +655,7 @@ AcielEshky BenAllison SubramanianRamamoorthy - MarkSteedman + MarkSteedman 626–635 E14-1066 10.3115/v1/E14-1066 @@ -682,7 +682,7 @@ “<fixed-case>I</fixed-case> Object!” Modeling Latent Pragmatic Effects in Courtroom Dialogues DanGoldwasser - HalDaumé III + HalDaumé III 655–663 E14-1069 10.3115/v1/E14-1069 @@ -731,7 +731,7 @@ Cluster-based Prediction of User Ratings for Stylistic Surface Realisation NinaDethlefs HeribertoCuayáhuitl - HelenHastie + HelenHastie VerenaRieser OliverLemon 702–711 @@ -751,7 +751,7 @@ Hybrid text simplification using synchronous dependency grammars with hand-written and automatically harvested rules AdvaithSiddharthan - AngroshMandya + AngroshMandya 722–731 E14-1076 10.3115/v1/E14-1076 @@ -768,9 +768,9 @@ Learning part-of-speech taggers with inter-annotator agreement loss - BarbaraPlank + BarbaraPlank DirkHovy - AndersSøgaard + AndersSøgaard 742–751 E14-1078 10.3115/v1/E14-1078 @@ -797,7 +797,7 @@ <fixed-case>ITU</fixed-case> <fixed-case>T</fixed-case>urkish <fixed-case>NLP</fixed-case> Web Service - GülşenEryiğit + GülşenEryiğit 1–4 E14-2001 10.3115/v1/E14-2001 @@ -805,9 +805,9 @@ Multilingual, Efficient and Easy <fixed-case>NLP</fixed-case> Processing with <fixed-case>IXA</fixed-case> Pipeline - RodrigoAgerri + RodrigoAgerri JosuBermudez - GermanRigau + GermanRigau 5–8 E14-2002 10.3115/v1/E14-2002 @@ -816,12 +816,12 @@ <fixed-case>XL</fixed-case>ike Project Language Analysis Services XavierCarreras - LluísPadró + LluísPadró LeiZhang AchimRettinger ZhixingLi EstebanGarcía-Cuesta - ŽeljkoAgić + ŽeljkoAgić BožoBekavac BlazFortuna TadejŠtajner @@ -863,20 +863,20 @@ <fixed-case>CASMACAT</fixed-case>: A Computer-assisted Translation Workbench - VicentAlabau + VicentAlabau ChristianBuck MichaelCarl - FranciscoCasacuberta - MercedesGarcía-Martínez + FranciscoCasacuberta + MercedesGarcía-Martínez UlrichGermann - JesúsGonzález-Rubio - RobinHill + JesúsGonzález-Rubio + RobinHill PhilippKoehn - LuisLeiva + LuisLeiva BartoloméMesa-Lao - DanielOrtiz-Martínez - HerveSaint-Amand - GermánSanchis Trilles + DanielOrtiz-Martínez + HerveSaint-Amand + GermánSanchis Trilles CharaTsoukala 25–28 E14-2007 @@ -887,7 +887,7 @@ <fixed-case>J</fixed-case>ane: Open Source Machine Translation System Combination MarkusFreitag MatthiasHuck - HermannNey + HermannNey 29–32 E14-2008 10.3115/v1/E14-2008 @@ -896,8 +896,8 @@ <fixed-case>CHISPA</fixed-case> on the <fixed-case>GO</fixed-case>: A mobile <fixed-case>C</fixed-case>hinese-<fixed-case>S</fixed-case>panish translation service for travellers in trouble JordiCentelles - Marta R.Costa-jussà - Rafael E.Banchs + Marta R.Costa-jussà + Rafael E.Banchs 33–36 E14-2009 10.3115/v1/E14-2009 @@ -925,8 +925,8 @@ The New Thot Toolkit for Fully-Automatic and Interactive Statistical Machine Translation - DanielOrtiz-Martínez - FranciscoCasacuberta + DanielOrtiz-Martínez + FranciscoCasacuberta 45–48 E14-2012 10.3115/v1/E14-2012 @@ -946,9 +946,9 @@ Finding Terms in Corpora for Many Languages with the <fixed-case>S</fixed-case>ketch <fixed-case>E</fixed-case>ngine MilošJakubíček AdamKilgarriff - VojtěchKovář - PavelRychlý - VítSuchomel + VojtěchKovář + PavelRychlý + VítSuchomel 53–56 E14-2014 10.3115/v1/E14-2014 @@ -968,9 +968,9 @@ <fixed-case>DKIE</fixed-case>: Open Source Information Extraction for <fixed-case>D</fixed-case>anish - LeonDerczynski + LeonDerczynski Camilla VilhelmsenField - Kenneth S.Bøgh + Kenneth S.Bøgh 61–64 E14-2016 10.3115/v1/E14-2016 @@ -980,8 +980,8 @@ Event Extraction for <fixed-case>B</fixed-case>alkan Languages VanniZavarella DilekKüçük - HristoTanev - AliHürriyetoğlu + HristoTanev + AliHürriyetoğlu 65–68 E14-2017 10.3115/v1/E14-2017 @@ -998,7 +998,7 @@ <fixed-case>SPARSAR</fixed-case>: An Expressive Poetry Reader - RodolfoDelmonte + RodolfoDelmonte Anton MariaPrati 73–76 E14-2019 @@ -1023,8 +1023,8 @@ Answering List Questions using Web as a corpus - PatríciaGonçalves - AntónioBranco + PatríciaGonçalves + AntónioBranco 81–84 E14-2021 10.3115/v1/E14-2021 @@ -1045,7 +1045,7 @@ <fixed-case>R</fixed-case>elation<fixed-case>F</fixed-case>actory: A Fast, Modular and Effective System for Knowledge Base Population BenjaminRoth TassiloBarth - GrzegorzChrupała + GrzegorzChrupała MartinGropp DietrichKlakow 89–92 @@ -1063,9 +1063,9 @@ The <fixed-case>GATE</fixed-case> Crowdsourcing Plugin: Crowdsourcing Annotated Corpora Made Easy - KalinaBontcheva + KalinaBontcheva IanRoberts - LeonDerczynski + LeonDerczynski SamanthaAlexander-Eames 97–100 E14-2025 @@ -1145,7 +1145,7 @@ Resolving Coreferent and Associative Noun Phrases in Scientific Text - InaRoesiger + InaRoesiger SimoneTeufel 45–55 E14-3006 @@ -1197,8 +1197,8 @@ Automatic Creation of <fixed-case>A</fixed-case>rabic Named Entity Annotated Corpus Using <fixed-case>W</fixed-case>ikipedia MahaAlthobaiti - UdoKruschwitz - MassimoPoesio + UdoKruschwitz + MassimoPoesio 106–115 E14-3012 10.3115/v1/E14-3012 @@ -1220,7 +1220,7 @@ E14-4 ShulyWintner StefanRiezler - SharonGoldwater + SharonGoldwater 10.3115/v1/E14-4 Association for Computational Linguistics
Gothenburg, Sweden
@@ -1234,8 +1234,8 @@ Easy Web Search Results Clustering: When Baselines Can Reach State-of-the-Art Algorithms - Jose G.Moreno - GaëlDias + Jose G.Moreno + GaëlDias 1–5 E14-4001 10.3115/v1/E14-4001 @@ -1244,8 +1244,8 @@ Propagation Strategies for Building Temporal Ontologies MohammedHasanuzzaman - GaëlDias - StéphaneFerrari + GaëlDias + StéphaneFerrari YannMathet 6–11 E14-4002 @@ -1254,9 +1254,9 @@ <fixed-case>C</fixed-case>hinese Open Relation Extraction for Knowledge Acquisition - Yuen-HsienTseng + Yuen-HsienTseng Lung-HaoLee - Shu-YenLin + Shu-YenLin Bo-ShunLiao Mei-JunLiu Hsin-HsiChen @@ -1271,8 +1271,8 @@ Temporal Text Ranking and Automatic Dating of Texts VladNiculae MarcosZampieri - LiviuDinu - Alina MariaCiobanu + LiviuDinu + Alina MariaCiobanu 17–21 E14-4004 10.3115/v1/E14-4004 @@ -1290,7 +1290,7 @@ Projecting the Knowledge Graph to Syntactic Parsing AndreaGesmundo - KeithHall + KeithHall 28–32 E14-4006 10.3115/v1/E14-4006 @@ -1309,8 +1309,8 @@ Chasing Hypernyms in Vector Spaces with Entropy EnricoSantus AlessandroLenci - QinLu - SabineSchulte im Walde + QinLu + SabineSchulte im Walde 38–42 E14-4008 10.3115/v1/E14-4008 @@ -1320,7 +1320,7 @@ Tight Integration of Speech Disfluency Removal into <fixed-case>SMT</fixed-case> EunahCho JanNiehues - AlexWaibel + AlexWaibel 43–47 E14-4009 10.3115/v1/E14-4009 @@ -1329,7 +1329,7 @@ Non-Monotonic Parsing of Fluent Umm <fixed-case>I</fixed-case> mean Disfluent Sentences Mohammad SadeghRasooli - JoelTetreault + JoelTetreault 48–53 E14-4010 10.3115/v1/E14-4010 @@ -1337,7 +1337,7 @@ Lightly-Supervised Word Sense Translation Error Detection for an Interactive Conversational Spoken Language Translation System - DennisMehay + DennisMehay SankaranarayananAnanthakrishnan SanjikaHewavitharana 54–58 @@ -1357,9 +1357,9 @@ Predicting <fixed-case>R</fixed-case>omanian Stress Assignment - Alina MariaCiobanu + Alina MariaCiobanu AncaDinu - LiviuDinu + LiviuDinu 64–68 E14-4013 10.3115/v1/E14-4013 @@ -1367,8 +1367,8 @@ Passive-Aggressive Sequence Labeling with Discriminative Post-Editing for Recognising Person Entities in Tweets - LeonDerczynski - KalinaBontcheva + LeonDerczynski + KalinaBontcheva 69–73 E14-4014 10.3115/v1/E14-4014 @@ -1378,9 +1378,9 @@ Accelerated Estimation of Conditional Random Fields using a Pseudo-Likelihood-inspired Perceptron Variant TeemuRuokolainen - MiikkaSilfverberg + MiikkaSilfverberg MikkoKurimo - KristerLinden + KristerLinden 74–78 E14-4015 10.3115/v1/E14-4015 @@ -1407,8 +1407,8 @@ Inference of Phrase-Based Translation Models via Minimum Description Length - JesúsGonzález-Rubio - FranciscoCasacuberta + JesúsGonzález-Rubio + FranciscoCasacuberta 90–94 E14-4018 10.3115/v1/E14-4018 @@ -1416,7 +1416,7 @@ <fixed-case>C</fixed-case>hinese Native Language Identification - ShervinMalmasi + ShervinMalmasi MarkDras 95–99 E14-4019 @@ -1445,7 +1445,7 @@ Using a Random Forest Classifier to Compile Bilingual Dictionaries of Technical Terms from Comparable Corpora GeorgiosKontonatsios IoannisKorkontzelos - Jun’ichiTsujii + Jun’ichiTsujii SophiaAnaniadou 111–116 E14-4022 @@ -1475,7 +1475,7 @@ Hoa TrongVu GrahamNeubig SakrianiSakti - TomokiToda + TomokiToda SatoshiNakamura 128–132 E14-4025 @@ -1527,7 +1527,7 @@ Improving Dependency Parsers with Supertags HirokiOuchi KevinDuh - YujiMatsumoto + YujiMatsumoto 154–158 E14-4030 10.3115/v1/E14-4030 @@ -1537,7 +1537,7 @@ Improving Dependency Parsers using <fixed-case>C</fixed-case>ombinatory <fixed-case>C</fixed-case>ategorial <fixed-case>G</fixed-case>rammar Bharat RamAmbati TejaswiniDeoskar - MarkSteedman + MarkSteedman 159–163 E14-4031 10.3115/v1/E14-4031 @@ -1555,7 +1555,7 @@ Data Driven Language Transfer Hypotheses - BenSwanson + BenSwanson EugeneCharniak 169–173 E14-4033 @@ -1566,7 +1566,7 @@ Simple and Effective Approach for Consistent Training of Hierarchical Phrase-based Translation Models StephanPeitz DavidVilar - HermannNey + HermannNey 174–179 E14-4034 10.3115/v1/E14-4034 @@ -1575,7 +1575,7 @@ Some Experiments with a Convex <fixed-case>IBM</fixed-case> Model 2 AndreiSimion - MichaelCollins + MichaelCollins CliffStein 180–184 E14-4035 @@ -1584,8 +1584,8 @@ Active Learning for Post-Editing Based Incrementally Retrained <fixed-case>MT</fixed-case> - Aswarth AbhilashDara - Josefvan Genabith + Aswarth AbhilashDara + Josefvan Genabith QunLiu JohnJudge AntonioToral @@ -1598,7 +1598,7 @@ Analysis and Prediction of Unalignable Words in Parallel Text FrancesYung KevinDuh - YujiMatsumoto + YujiMatsumoto 190–194 E14-4037 10.3115/v1/E14-4037 @@ -1616,7 +1616,7 @@ Multi-Domain Sentiment Relevance Classification with Automatic Representation Learning ChristianScheible - HinrichSchütze + HinrichSchütze 200–204 E14-4039 10.3115/v1/E14-4039 @@ -1625,7 +1625,7 @@ A New Entity Salience Task with Millions of Training Examples JesseDunietz - DanielGillick + DanielGillick 205–209 E14-4040 10.3115/v1/E14-4040 @@ -1634,7 +1634,7 @@ Finding middle ground? Multi-objective Natural Language Generation from time-series data DimitraGkatzia - HelenHastie + HelenHastie OliverLemon 210–214 E14-4041 @@ -1645,7 +1645,7 @@ One Sense per Tweeter ... and Other Lexical Semantic Tales of <fixed-case>T</fixed-case>witter SpandanaGella PaulCook - TimothyBaldwin + TimothyBaldwin 215–220 E14-4042 10.3115/v1/E14-4042 @@ -1662,7 +1662,7 @@ Crowdsourcing Annotation of Non-Local Semantic Roles Parvin SadatFeizabadi - SebastianPadó + SebastianPadó 226–230 E14-4044 10.3115/v1/E14-4044 diff --git a/data/xml/E17.xml b/data/xml/E17.xml index f3ff94ffe2..1d16f35471 100644 --- a/data/xml/E17.xml +++ b/data/xml/E17.xml @@ -5,7 +5,7 @@ Proceedings of the 15th Conference of the European Chapter of the Association for Computational Linguistics: Volume 1, Long Papers E17-1 MirellaLapata - PhilBlunsom + PhilBlunsom AlexanderKoller Association for Computational Linguistics
Valencia, Spain
@@ -38,7 +38,7 @@ Exploring Different Dimensions of Attention for Uncertainty Detection HeikeAdel - HinrichSchütze + HinrichSchütze 22–34 E17-1003 Neural networks with attention have proven effective for many natural language processing tasks. In this paper, we develop attention mechanisms for uncertainty detection. In particular, we generalize standardly used attention mechanisms by introducing external attention and sequence-preserving attention. These novel architectures differ from standard approaches in that they use external resources to compute attention weights and preserve sequence information. We compare them to other configurations along different dimensions of attention. Our novel architectures set the new state of the art on a Wikipedia benchmark dataset and perform similar to the state-of-the-art model on a biomedical benchmark which uses a large set of linguistic features. @@ -57,8 +57,8 @@ When is multitask learning effective? Semantic sequence prediction under varying data conditions - HéctorMartínez Alonso - BarbaraPlank + HéctorMartínez Alonso + BarbaraPlank 44–53 E17-1005 Multitask learning has been applied successfully to a range of tasks, mostly morphosyntactic. However, little is known on when MTL works and whether there are data characteristics that help to determine the success of MTL. In this paper we evaluate a range of semantic sequence labeling tasks in a MTL setup. We examine different auxiliary task configurations, amongst which a novel setup, and correlate their impact to data-dependent conditions. Our results show that MTL is not always effective, because significant improvements are obtained only for 1 out of 5 tasks. When successful, auxiliary tasks with compact and more uniform label distributions are preferable. @@ -66,10 +66,10 @@ Learning Compositionality Functions on Word Embeddings for Modelling Attribute Meaning in Adjective-Noun Phrases - MatthiasHartung + MatthiasHartung FabianKaupmann SoufianJebbara - PhilippCimiano + PhilippCimiano 54–64 E17-1006 Word embeddings have been shown to be highly effective in a variety of lexical semantic tasks. They tend to capture meaningful relational similarities between individual words, at the expense of lacking the capabilty of making the underlying semantic relation explicit. In this paper, we investigate the attribute relation that often holds between the constituents of adjective-noun phrases. We use CBOW word embeddings to represent word meaning and learn a compositionality function that combines the individual constituents into a phrase representation, thus capturing the compositional attribute meaning. The resulting embedding model, while being fully interpretable, outperforms count-based distributional vector space models that are tailored to attribute meaning in the two tasks of attribute selection and phrase similarity prediction. Moreover, as the model captures a generalized layer of attribute meaning, it bears the potential to be used for predictions over various attribute inventories without re-training. @@ -88,7 +88,7 @@ Distinguishing Antonyms and Synonyms in a Pattern-based Neural Network Kim AnhNguyen - SabineSchulte im Walde + SabineSchulte im Walde Ngoc ThangVu 76–85 E17-1008 @@ -100,8 +100,8 @@ AlexanderPanchenko EugenRuppert StefanoFaralli - Simone PaoloPonzetto - ChrisBiemann + Simone PaoloPonzetto + ChrisBiemann 86–98 E17-1009 The current trend in NLP is the use of highly opaque models, e.g. neural networks and word embeddings. While these models yield state-of-the-art results on a range of tasks, their drawback is poor interpretability. On the example of word sense induction and disambiguation (WSID), we show that it is possible to develop an interpretable model that matches the state-of-the-art models in accuracy. Namely, we present an unsupervised, knowledge-free WSID approach, which is interpretable at three levels: word sense inventory, sense feature representations, and disambiguation procedure. Experiments show that our model performs on par with state-of-the-art word sense embeddings and other unsupervised systems while offering the possibility to justify its decisions in human-readable form. @@ -110,7 +110,7 @@ Word Sense Disambiguation: A Unified Evaluation Framework and Empirical Comparison AlessandroRaganato - JoseCamacho-Collados + JoseCamacho-Collados RobertoNavigli 99–110 E17-1010 @@ -231,8 +231,8 @@ Cross-Lingual Dependency Parsing with Late Decoding for Truly Low-Resource Languages - MichaelSchlichtkrull - AndersSøgaard + MichaelSchlichtkrull + AndersSøgaard 220–229 E17-1021 In cross-lingual dependency annotation projection, information is often lost during transfer because of early decoding. We present an end-to-end graph-based neural network dependency parser that can be trained to reproduce matrices of edge scores, which can be directly projected across word alignments. We show that our approach to cross-lingual dependency parsing is not only simpler, but also achieves an absolute improvement of 2.25% averaged across 10 languages compared to the previous state of the art. @@ -240,10 +240,10 @@ Parsing <fixed-case>U</fixed-case>niversal <fixed-case>D</fixed-case>ependencies without training - HéctorMartínez Alonso - ŽeljkoAgić - BarbaraPlank - AndersSøgaard + HéctorMartínez Alonso + ŽeljkoAgić + BarbaraPlank + AndersSøgaard 230–240 E17-1022 We present UDP, the first training-free parser for Universal Dependencies (UD). Our algorithm is based on PageRank and a small set of specific dependency head rules. UDP features two-step decoding to guarantee that function words are attached as leaf nodes. The parser requires no training, and it is competitive with a delexicalized transfer system. UDP offers a linguistically sound unsupervised alternative to cross-lingual parsing for UD. The parser has very few parameters and distinctly robust to domain change across languages. @@ -273,8 +273,8 @@ Exploring the Impact of Pragmatic Phenomena on Irony Detection in Tweets: A Multilingual Corpus Study JihenKaroui - FarahBenamara - VéroniqueMoriceau + FarahBenamara + VéroniqueMoriceau VivianaPatti CristinaBosco NathalieAussenac-Gilles @@ -307,7 +307,7 @@ Cross-lingual <fixed-case>RST</fixed-case> Discourse Parsing ChloéBraud MaximinCoavoux - AndersSøgaard + AndersSøgaard 292–304 E17-1028 Discourse parsing is an integral part of understanding information flow and argumentative structure in documents. Most previous research has focused on inducing and evaluating models from the English RST Discourse Treebank. However, discourse treebanks for other languages exist, including Spanish, German, Basque, Dutch and Brazilian Portuguese. The treebanks share the same underlying linguistic theory, but differ slightly in the way documents are annotated. In this paper, we present (a) a new discourse parser which is simpler, yet competitive (significantly better on 2/3 metrics) to state of the art for English, (b) a harmonization of discourse treebanks across languages, enabling us to present (c) what to the best of our knowledge are the first experiments on cross-lingual discourse parsing. @@ -326,7 +326,7 @@ Sentence Segmentation in Narrative Transcripts from Neuropsychological Tests using Recurrent Convolutional Neural Networks MarcosTreviso ChristopherShulby - SandraAluísio + SandraAluísio 315–325 E17-1030 Automated discourse analysis tools based on Natural Language Processing (NLP) aiming at the diagnosis of language-impairing dementias generally extract several textual metrics of narrative transcripts. However, the absence of sentence boundary segmentation in the transcripts prevents the direct application of NLP methods which rely on these marks in order to function properly, such as taggers and parsers. We present the first steps taken towards automatic neuropsychological evaluation based on narrative discourse analysis, presenting a new automatic sentence segmentation method for impaired speech. Our model uses recurrent convolutional neural networks with prosodic, Part of Speech (PoS) features, and word embeddings. It was evaluated intrinsically on impaired, spontaneous speech as well as normal, prepared speech and presents better results for healthy elderly (CTL) (F1 = 0.74) and Mild Cognitive Impairment (MCI) patients (F1 = 0.70) than the Conditional Random Fields method (F1 = 0.55 and 0.53, respectively) used in the same context of our study. The results suggest that our model is robust for impaired speech and can be used in automated discourse analysis tools to differentiate narratives produced by MCI and CTL. @@ -344,7 +344,7 @@ From Segmentation to Analyses: a Probabilistic Model for Unsupervised Morphology Induction TomsBergmanis - SharonGoldwater + SharonGoldwater 337–346 E17-1032 A major motivation for unsupervised morphological analysis is to reduce the sparse data problem in under-resourced languages. Most previous work focus on segmenting surface forms into their constituent morphs (taking: tak +ing), but surface form segmentation does not solve the sparse data problem as the analyses of take and taking are not connected to each other. We present a system that adapts the MorphoChains system (Narasimhan et al., 2015) to provide morphological analyses that aim to abstract over spelling differences in functionally similar morphs. This results in analyses that are not compelled to use all the orthographic material of a word (stopping: stop +ing) or limited to only that material (acidified: acid +ify +ed). On average across six typologically varied languages our system has a similar or better F-score on EMMA (a measure of underlying morpheme accuracy) than three strong baselines; moreover, the total number of distinct morphemes identified by our system is on average 12.8% lower than for Morfessor (Virpioja et al., 2013), a state-of-the-art surface segmentation system. @@ -353,7 +353,7 @@ Creating <fixed-case>POS</fixed-case> Tagging and Dependency Parsing Experts via Topic Modeling AtreyeeMukherjee - SandraKübler + SandraKübler MatthiasScheutz 347–355 E17-1033 @@ -363,9 +363,9 @@ <fixed-case>U</fixed-case>niversal <fixed-case>D</fixed-case>ependencies and Morphology for <fixed-case>H</fixed-case>ungarian - and on the Price of Universality VeronikaVincze - KatalinSimkó + KatalinSimkó ZsoltSzántó - RichárdFarkas + RichárdFarkas 356–365 E17-1034 In this paper, we present how the principles of universal dependencies and morphology have been adapted to Hungarian. We report the most challenging grammatical phenomena and our solutions to those. On the basis of the adapted guidelines, we have converted and manually corrected 1,800 sentences from the Szeged Treebank to universal dependency format. We also introduce experiments on this manually annotated corpus for evaluating automatic conversion and the added value of language-specific, i.e. non-universal, annotations. Our results reveal that converting to universal dependencies is not necessarily trivial, moreover, using language-specific morphological features may have an impact on overall performance. @@ -384,10 +384,10 @@ Generating Natural Language Question-Answer Pairs from a Knowledge Graph Using a <fixed-case>RNN</fixed-case> Based Question Generation Model - SathishReddy + SathishReddy DineshRaghu - Mitesh M.Khapra - SachindraJoshi + Mitesh M.Khapra + SachindraJoshi 376–385 E17-1036 In recent years, knowledge graphs such as Freebase that capture facts about entities and relationships between them have been used actively for answering factoid questions. In this paper, we explore the problem of automatically generating question answer pairs from a given knowledge graph. The generated question answer (QA) pairs can be used in several downstream applications. For example, they could be used for training better QA systems. To generate such QA pairs, we first extract a set of keywords from entities and relationships expressed in a triple stored in the knowledge graph. From each such set, we use a subset of keywords to generate a natural language question that has a unique answer. We treat this subset of keywords as a sequence and propose a sequence to sequence model using RNN to generate a natural language question from it. Our RNN based model generates QA pairs with an accuracy of 33.61 percent and performs 110.47 percent (relative) better than a state-of-the-art template based method for generating natural language question from keywords. We also do an extrinsic evaluation by using the generated QA pairs to train a QA system and observe that the F1-score of the QA system improves by 5.5 percent (relative) when using automatically generated QA pairs in addition to manually generated QA pairs available for training. @@ -415,9 +415,9 @@ Efficient Benchmarking of <fixed-case>NLP</fixed-case> <fixed-case>API</fixed-case>s using Multi-armed Bandits - GholamrezaHaffari - Tuan DungTran - MarkCarman + GholamrezaHaffari + Tuan DungTran + MarkCarman 408–416 E17-1039 Comparing NLP systems to select the best one for a task of interest, such as named entity recognition, is critical for practitioners and researchers. A rigorous approach involves setting up a hypothesis testing scenario using the performance of the systems on query documents. However, often the hypothesis testing approach needs to send a lot of document queries to the systems, which can be problematic. In this paper, we present an effective alternative based on the multi-armed bandit (MAB). We propose a hierarchical generative model to represent the uncertainty in the performance measures of the competing systems, to be used by Thompson Sampling to solve the resulting MAB. Experimental results on both synthetic and real data show that our approach requires significantly fewer queries compared to the standard benchmarking technique to identify the best system according to F-measure. @@ -427,7 +427,7 @@ Character-Word <fixed-case>LSTM</fixed-case> Language Models LyanVerwimp JorisPelemans - HugoVan hamme + HugoVan hamme PatrickWambacq 417–427 E17-1040 @@ -436,9 +436,9 @@ A Hierarchical Neural Model for Learning Sequences of Dialogue Acts - Quan HungTran + Quan HungTran IngridZukerman - GholamrezaHaffari + GholamrezaHaffari 428–437 E17-1041 We propose a novel hierarchical Recurrent Neural Network (RNN) for learning sequences of Dialogue Acts (DAs). The input in this task is a sequence of utterances (i.e., conversational contributions) comprising a sequence of tokens, and the output is a sequence of DA labels (one label per utterance). Our model leverages the hierarchical nature of dialogue data by using two nested RNNs that capture long-range dependencies at the dialogue level and the utterance level. This model is combined with an attention mechanism that focuses on salient tokens in utterances. Our experimental results show that our model outperforms strong baselines on two popular datasets, Switchboard and MapTask; and our detailed empirical analysis highlights the impact of each aspect of our model. @@ -449,11 +449,11 @@ Tsung-HsienWen DavidVandyke NikolaMrkšić - MilicaGašić - Lina M.Rojas-Barahona + MilicaGašić + Lina M.Rojas-Barahona Pei-HaoSu StefanUltes - SteveYoung + SteveYoung 438–449 E17-1042 Teaching machines to accomplish tasks by conversing naturally with humans is challenging. Currently, developing task-oriented dialogue systems requires creating multiple components and typically this involves either a large amount of handcrafting, or acquiring costly labelled datasets to solve a statistical learning problem for each component. In this work we introduce a neural network-based text-in, text-out end-to-end trainable goal-oriented dialogue system along with a new way of collecting dialogue data based on a novel pipe-lined Wizard-of-Oz framework. This approach allows us to develop dialogue systems easily and without making too many assumptions about the task at hand. The results show that the model can converse with human subjects naturally whilst helping them to accomplish tasks in a restaurant search domain. @@ -464,8 +464,8 @@ BaolinPeng MichaelSeltzer Y.C.Ju - GeoffreyZweig - Kam-FaiWong + GeoffreyZweig + Kam-FaiWong 450–459 E17-1043 In this paper we tackle a unique and important problem of extracting a structured order from the conversation a customer has with an order taker at a restaurant. This is motivated by an actual system under development to assist in the order taking process. We develop a sequence-to-sequence model that is able to map from unstructured conversational input to the structured form that is conveyed to the kitchen and appears on the customer receipt. This problem is critically different from other tasks like machine translation where sequence-to-sequence models have been used: the input includes two sides of a conversation; the output is highly structured; and logical manipulations must be performed, for example when the customer changes his mind while ordering. We present a novel sequence-to-sequence model that incorporates a special attention-memory gating mechanism and conversational role markers. The proposed model improves performance over both a phrase-based machine translation approach and a standard sequence-to-sequence model. @@ -475,8 +475,8 @@ A Two-stage Sieve Approach for Quote Attribution GraceMuzny MichaelFang - AngelChang - DanJurafsky + AngelChang + DanJurafsky 460–470 E17-1044 We present a deterministic sieve-based system for attributing quotations in literary text and a new dataset: QuoteLi3. Quote attribution, determining who said what in a given text, is important for tasks like creating dialogue systems, and in newer areas like computational literary studies, where it creates opportunities to analyze novels at scale rather than only a few at a time. We release QuoteLi3, which contains more than 6,000 annotations linking quotes to speaker mentions and quotes to speaker entities, and introduce a new algorithm for quote attribution. Our two-stage algorithm first links quotes to mentions, then mentions to entities. Using two stages encapsulates difficult sub-problems and improves system performance. The modular design allows us to tune for overall performance or higher precision, which is useful for many real-world use cases. Our system achieves an average F-score of 87.5 across three novels, outperforming previous systems, and can be tuned for precision of 90.4 at a recall of 65.1. @@ -486,7 +486,7 @@ Out-of-domain <fixed-case>F</fixed-case>rame<fixed-case>N</fixed-case>et Semantic Role Labeling SilvanaHartmann IliaKuznetsov - TeresaMartin + TeresaMartin IrynaGurevych 471–482 E17-1045 @@ -516,8 +516,8 @@ An Extensive Empirical Evaluation of Character-Based Morphological Tagging for 14 Languages GeorgHeigold - GuenterNeumann - Josefvan Genabith + GuenterNeumann + Josefvan Genabith 505–513 E17-1048 This paper investigates neural character-based morphological tagging for languages with complex morphology and large tag sets. Character-based approaches are attractive as they can handle rarely- and unseen words gracefully. We evaluate on 14 languages and observe consistent gains over a state-of-the-art morphological tagger across all languages except for English and French, where we match the state-of-the-art. We compare two architectures for computing character-based word vectors using recurrent (RNN) and convolutional (CNN) nets. We show that the CNN based approach performs slightly worse and less consistently than the RNN based approach. Small but systematic gains are observed when combining the two architectures by ensembling. @@ -525,9 +525,9 @@ Neural Multi-Source Morphological Reinflection - KatharinaKann + KatharinaKann RyanCotterell - HinrichSchütze + HinrichSchütze 514–524 E17-1049 We explore the task of multi-source morphological reinflection, which generalizes the standard, single-source version. The input consists of (i) a target tag and (ii) multiple pairs of source form and source tag for a lemma. The motivation is that it is beneficial to have access to more than one source form since different source forms can provide complementary information, e.g., different stems. We further present a novel extension to the encoder-decoder recurrent neural architecture, consisting of multiple encoders, to better solve the task. We show that our new architecture outperforms single-source reinflection models and publish our dataset for multi-source morphological reinflection to facilitate future research. @@ -535,9 +535,9 @@ Online Automatic Post-editing for <fixed-case>MT</fixed-case> in a Multi-Domain Translation Environment - RajenChatterjee + RajenChatterjee GebremedhenGebremelak - MatteoNegri + MatteoNegri MarcoTurchi 525–535 E17-1050 @@ -547,7 +547,7 @@ An Incremental Parser for <fixed-case>A</fixed-case>bstract <fixed-case>M</fixed-case>eaning <fixed-case>R</fixed-case>epresentation MarcoDamonte - Shay B.Cohen + Shay B.Cohen GiorgioSatta 536–546 E17-1051 @@ -558,7 +558,7 @@ Integrated Learning of Dialog Strategies and Semantic Parsing AishwaryaPadmakumar JesseThomason - Raymond J.Mooney + Raymond J.Mooney 547–557 E17-1052 Natural language understanding and dialog management are two integral components of interactive dialog systems. Previous research has used machine learning techniques to individually optimize these components, with different forms of direct and indirect supervision. We present an approach to integrate the learning of both a dialog strategy using reinforcement learning, and a semantic parser for robust natural language understanding, using only natural dialog interaction for supervision. Experimental results on a simulated task of robot instruction demonstrate that joint learning of both components improves dialog performance over learning either of these components alone. @@ -567,7 +567,7 @@ Unsupervised <fixed-case>AMR</fixed-case>-Dependency Parse Alignment Wei-TeChen - MarthaPalmer + MarthaPalmer 558–567 E17-1053 In this paper, we introduce an Abstract Meaning Representation (AMR) to Dependency Parse aligner. Alignment is a preliminary step for AMR parsing, and our aligner improves current AMR parser performance. Our aligner involves several different features, including named entity tags and semantic role labels, and uses Expectation-Maximization training. Results show that our aligner reaches an 87.1% F-Score score with the experimental data, and enhances AMR parsing. @@ -586,7 +586,7 @@ Multi-level Representations for Fine-Grained Typing of Knowledge Base Entities YadollahYaghoobzadeh - HinrichSchütze + HinrichSchütze 578–589 E17-1055 Entities are essential elements of natural language. In this paper, we present methods for learning multi-level representations of entities on three complementary levels: character (character patterns in entity names extracted, e.g., by neural networks), word (embeddings of words in entity names) and entity (entity embeddings). We investigate state-of-the-art learning methods on each level and find large differences, e.g., for deep learning models, traditional ngram features and the subword model of fasttext (Bojanowski et al., 2016) on the character level; for word2vec (Mikolov et al., 2013) on the word level; and for the order-aware model wang2vec (Ling et al., 2015a) on the entity level. We confirm experimentally that each level of representation contributes complementary information and a joint representation of all three levels improves the existing embedding based baseline for fine-grained entity typing by a large margin. Additionally, we show that adding information from entity descriptions further improves multi-level representations of entities. @@ -596,8 +596,8 @@ The <fixed-case>C</fixed-case>ontrast<fixed-case>M</fixed-case>edium Algorithm: Taxonomy Induction From Noisy Knowledge Graphs With Just A Few Links StefanoFaralli AlexanderPanchenko - ChrisBiemann - Simone PaoloPonzetto + ChrisBiemann + Simone PaoloPonzetto 590–600 E17-1056 In this paper, we present ContrastMedium, an algorithm that transforms noisy semantic networks into full-fledged, clean taxonomies. ContrastMedium is able to identify the embedded taxonomy structure from a noisy knowledge graph without explicit human supervision such as, for instance, a set of manually selected input root and leaf concepts. This is achieved by leveraging structural information from a companion reference taxonomy, to which the input knowledge graph is linked (either automatically or manually). When used in conjunction with methods for hypernym acquisition and knowledge base linking, our methodology provides a complete solution for end-to-end taxonomy induction. We conduct experiments using automatically acquired knowledge graphs, as well as a SemEval benchmark, and show that our method is able to achieve high performance on the task of taxonomy induction. @@ -650,7 +650,7 @@ Transition-Based Deep Input Linearization RatishPuduppully YueZhang - ManishShrivastava + ManishShrivastava 643–654 E17-1061 Traditional methods for deep NLG adopt pipeline approaches comprising stages such as constructing syntactic input, predicting function words, linearizing the syntactic input and generating the surface forms. Though easier to visualize, pipeline approaches suffer from error propagation. In addition, information available across modules cannot be leveraged by all modules. We construct a transition-based model to jointly perform linearization, function word prediction and morphological generation, which considerably improves upon the accuracy compared to a pipelined baseline system. On a standard deep input linearization shared task, our system achieves the best results reported so far. @@ -658,8 +658,8 @@ Generating flexible proper name references in text: Data, models and evaluation - ThiagoCastro Ferreira - EmielKrahmer + ThiagoCastro Ferreira + EmielKrahmer SanderWubben 655–664 E17-1062 @@ -688,7 +688,7 @@ Noisy-context surprisal as a human sentence processing cost model RichardFutrell - RogerLevy + RogerLevy 688–698 E17-1065 We use the noisy-channel theory of human sentence comprehension to develop an incremental processing cost model that unifies and extends key features of expectation-based and memory-based models. In this model, which we call noisy-context surprisal, the processing cost of a word is the surprisal of the word given a noisy representation of the preceding context. We show that this model accounts for an outstanding puzzle in sentence comprehension, language-dependent structural forgetting effects (Gibson and Thomas, 1999; Vasishth et al., 2010; Frank et al., 2016), which are previously not well modeled by either expectation-based or memory-based approaches. Additionally, we show that this model derives and generalizes locality effects (Gibson, 1998; Demberg and Keller, 2008), a signature prediction of memory-based models. We give corpus-based evidence for a key assumption in this derivation. @@ -697,7 +697,7 @@ Task-Specific Attentive Pooling of Phrase Alignments Contributes to Sentence Matching WenpengYin - HinrichSchütze + HinrichSchütze 699–709 E17-1066 This work studies comparatively two typical sentence matching tasks: textual entailment (TE) and answer selection (AS), observing that weaker phrase alignments are more critical in TE, while stronger phrase alignments deserve more attention in AS. The key to reach this observation lies in phrase detection, phrase representation, phrase alignment, and more importantly how to connect those aligned phrases of different matching degrees with the final classifier. Prior work (i) has limitations in phrase generation and representation, or (ii) conducts alignment at word and phrase levels by handcrafted features or (iii) utilizes a single framework of alignment without considering the characteristics of specific tasks, which limits the framework’s effectiveness across tasks. We propose an architecture based on Gated Recurrent Unit that supports (i) representation learning of phrases of arbitrary granularity and (ii) task-specific attentive pooling of phrase alignments between two sentences. Experimental results on TE and AS match our observation and show the effectiveness of our approach. @@ -730,7 +730,7 @@ SamikshaGupta AnupamJamatia UpendraKumar - BjörnGambäck + BjörnGambäck AmitavaDas 731–741 E17-1069 @@ -739,10 +739,10 @@ Argument Strength is in the Eye of the Beholder: Audience Effects in Persuasion - StephanieLukin - PranavAnand - MarilynWalker - SteveWhittaker + StephanieLukin + PranavAnand + MarilynWalker + SteveWhittaker 742–753 E17-1070 Americans spend about a third of their time online, with many participating in online conversations on social and political issues. We hypothesize that social media arguments on such issues may be more engaging and persuasive than traditional media summaries, and that particular types of people may be more or less convinced by particular styles of argument, e.g. emotional arguments may resonate with some personalities while factual arguments resonate with others. We report a set of experiments testing at large scale how audience variables interact with argument style to affect the persuasiveness of an argument, an under-researched topic within natural language processing. We show that belief change is affected by personality factors, with conscientious, open and agreeable people being more convinced by emotional arguments. @@ -761,7 +761,7 @@ A Strong Baseline for Learning Cross-Lingual Word Embeddings from Sentence Alignments OmerLevy - AndersSøgaard + AndersSøgaard YoavGoldberg 765–774 E17-1072 @@ -779,7 +779,7 @@ Nonsymbolic Text Representation - HinrichSchütze + HinrichSchütze 785–796 E17-1074 We introduce the first generic text representation model that is completely nonsymbolic, i.e., it does not require the availability of a segmentation or tokenization method that attempts to identify words or other symbolic units in text. This applies to training the parameters of the model on a training corpus as well as to applying it when computing the representation of a new text. We show that our model performs better than prior work on an information extraction and a text denoising task. @@ -808,8 +808,8 @@ End-to-end Relation Extraction using Neural Networks and <fixed-case>M</fixed-case>arkov <fixed-case>L</fixed-case>ogic <fixed-case>N</fixed-case>etworks SachinPawar - PushpakBhattacharyya - GirishPalshikar + PushpakBhattacharyya + GirishPalshikar 818–827 E17-1077 End-to-end relation extraction refers to identifying boundaries of entity mentions, entity types of these mentions and appropriate semantic relation for each pair of mentions. Traditionally, separate predictive models were trained for each of these tasks and were used in a “pipeline” fashion where output of one model is fed as input to another. But it was observed that addressing some of these tasks jointly results in better performance. We propose a single, joint neural network based model to carry out all the three tasks of boundary identification, entity type classification and relation type classification. This model is referred to as “All Word Pairs” model (AWP-NN) as it assigns an appropriate label to each word pair in a given sentence for performing end-to-end relation extraction. We also propose to refine output of the AWP-NN model by using inference in Markov Logic Networks (MLN) so that additional domain knowledge can be effectively incorporated. We demonstrate effectiveness of our approach by achieving better end-to-end relation extraction performance than all 4 previous joint modelling approaches, on the standard dataset of ACE 2004. @@ -819,7 +819,7 @@ Trust, but Verify! Better Entity Linking through Automatic Verification BenjaminHeinzerling MichaelStrube - Chin-YewLin + Chin-YewLin 828–838 E17-1078 We introduce automatic verification as a post-processing step for entity linking (EL). The proposed method trusts EL system results collectively, by assuming entity mentions are mostly linked correctly, in order to create a semantic profile of the given text using geospatial and temporal information, as well as fine-grained entity types. This profile is then used to automatically verify each linked mention individually, i.e., to predict whether it has been linked correctly or not. Verification allows leveraging a rich set of global and pairwise features that would be prohibitively expensive for EL systems employing global inference. Evaluation shows consistent improvements across datasets and systems. In particular, when applied to state-of-the-art systems, our method yields an absolute improvement in linking performance of up to 1.7 F1 on AIDA/CoNLL’03 and up to 2.4 F1 on the English TAC KBP 2015 TEDL dataset. @@ -877,11 +877,11 @@ Multilingual Training of Crosslingual Word Embeddings - LongDuong + LongDuong HiroshiKanayama TengfeiMa StevenBird - TrevorCohn + TrevorCohn 894–904 E17-1084 Crosslingual word embeddings represent lexical items from different languages using the same vector space, enabling crosslingual transfer. Most prior work constructs embeddings for a pair of languages, with English on one side. We investigate methods for building high quality crosslingual word embeddings for many languages in a unified vector space. In this way, we can exploit and combine strength of many languages. We obtained high performance on bilingual lexicon induction, monolingual similarity and crosslingual document classification tasks. @@ -890,7 +890,7 @@ Building Lexical Vector Representations from Concept Definitions DaniloSilva de Carvalho - Minh LeNguyen + Minh LeNguyen 905–915 E17-1085 The use of distributional language representations have opened new paths in solving a variety of NLP problems. However, alternative approaches can take advantage of information unavailable through pure statistical means. This paper presents a method for building vector representations from meaning unit blocks called concept definitions, which are obtained by extracting information from a curated linguistic resource (Wiktionary). The representations obtained in this way can be compared through conventional cosine similarity and are also interpretable by humans. Evaluation was conducted in semantic similarity and relatedness test sets, with results indicating a performance comparable to other methods based on single linguistic resource extraction. The results also indicate noticeable performance gains when combining distributional similarity scores with the ones obtained using this approach. Additionally, a discussion on the proposed method’s shortcomings is provided in the analysis of error cases. @@ -898,7 +898,7 @@ <fixed-case>S</fixed-case>hotgun<fixed-case>WSD</fixed-case>: An unsupervised algorithm for global word sense disambiguation inspired by <fixed-case>DNA</fixed-case> sequencing - AndreiButnaru + AndreiButnaru Radu TudorIonescu FlorentinaHristea 916–926 @@ -909,7 +909,7 @@ <fixed-case>L</fixed-case>anide<fixed-case>NN</fixed-case>: Multilingual Language Identification on Character Window TomKocmi - OndřejBojar + OndřejBojar 927–936 E17-1087 In language identification, a common first step in natural language processing, we want to automatically determine the language of some input text. Monolingual language identification assumes that the given document is written in one language. In multilingual language identification, the document is usually in two or three languages and we just want their names. We aim one step further and propose a method for textual language identification where languages can change arbitrarily and the goal is to identify the spans of each of the languages. Our method is based on Bidirectional Recurrent Neural Networks and it performs well in monolingual and multilingual language identification tasks on six datasets covering 131 languages. The method keeps the accuracy also for short documents and across domains, so it is ideal for off-the-shelf use without preparation of training data. @@ -921,7 +921,7 @@ AdamMakarucha GrahamNeubig StevenBird - TrevorCohn + TrevorCohn 937–947 E17-1088 Most languages have no established writing system and minimal written records. However, textual data is essential for natural language processing, and particularly important for training language models to support speech recognition. Even in cases where text data is missing, there are some languages for which bilingual lexicons are available, since creating lexicons is a fundamental task of documentary linguistics. We investigate the use of such lexicons to improve language models when textual training data is limited to as few as a thousand sentences. The method involves learning cross-lingual word embeddings as a preliminary step in training monolingual language models. Results across a number of languages show that language models are improved by this pre-training. Application to Yongning Na, a threatened language, highlights challenges in deploying the approach in real low-resource environments. @@ -931,7 +931,7 @@ Consistent Translation of Repeated Nouns using Syntactic and Semantic Cues XiaoPu LauraMascarell - AndreiPopescu-Belis + AndreiPopescu-Belis 948–957 E17-1089 We propose a method to decide whether two occurrences of the same noun in a source text should be translated consistently, i.e. using the same noun in the target text as well. We train and test classifiers that predict consistent translations based on lexical, syntactic, and semantic features. We first evaluate the accuracy of our classifiers intrinsically, in terms of the accuracy of consistency predictions, over a subset of the UN Corpus. Then, we also evaluate them in combination with phrase-based statistical MT systems for Chinese-to-English and German-to-English. We compare the automatic post-editing of noun translations with the re-ranking of the translation hypotheses based on the classifiers’ output, and also use these methods in combination. This improves over the baseline and closes up to 50% of the gap in BLEU scores between the baseline and an oracle classifier. @@ -939,7 +939,7 @@ Psycholinguistic Models of Sentence Processing Improve Sentence Readability Ranking - David M.Howcroft + David M.Howcroft VeraDemberg 958–968 E17-1090 @@ -951,7 +951,7 @@ PradiptoDas YandiXia AaronLevine - GiuseppeDi Fabbrizio + GiuseppeDi Fabbrizio AnkurDatta 969–979 E17-1091 @@ -974,7 +974,7 @@ GeorgiosKontonatsios TingtingMu John Y.Goulermas - Jun’ichiTsujii + Jun’ichiTsujii SophiaAnaniadou 991–1001 E17-1093 @@ -984,7 +984,7 @@ <fixed-case>SMART</fixed-case>ies: Sentiment Models for <fixed-case>A</fixed-case>rabic Target entities NouraFarra - KathyMcKeown + KathyMcKeown 1002–1013 E17-1094 We consider entity-level sentiment analysis in Arabic, a morphologically rich language with increasing resources. We present a system that is applied to complex posts written in response to Arabic newspaper articles. Our goal is to identify important entity “targets” within the post along with the polarity expressed about each target. We achieve significant improvements over multiple baselines, demonstrating that the use of specific morphological representations improves the performance of identifying both important targets and their sentiment, and that the use of distributional semantic clusters further boosts performances for these representations, especially when richer linguistic resources are not available. @@ -992,7 +992,7 @@ Exploring Convolutional Neural Networks for Sentiment Analysis of <fixed-case>S</fixed-case>panish tweets - IsabelSegura-Bedmar + IsabelSegura-Bedmar AntonioQuirós PalomaMartínez 1014–1022 @@ -1003,7 +1003,7 @@ Contextual Bidirectional Long Short-Term Memory Recurrent Neural Network Language Models: A Generative Approach to Sentiment Analysis AmrMousa - BjörnSchuller + BjörnSchuller 1023–1032 E17-1096 Traditional learning-based approaches to sentiment analysis of written text use the concept of bag-of-words or bag-of-n-grams, where a document is viewed as a set of terms or short combinations of terms disregarding grammar rules or word order. Novel approaches de-emphasize this concept and view the problem as a sequence classification problem. In this context, recurrent neural networks (RNNs) have achieved significant success. The idea is to use RNNs as discriminative binary classifiers to predict a positive or negative sentiment label at every word position then perform a type of pooling to get a sentence-level polarity. Here, we investigate a novel generative approach in which a separate probability distribution is estimated for every sentiment using language models (LMs) based on long short-term memory (LSTM) RNNs. We introduce a novel type of LM using a modified version of bidirectional LSTM (BLSTM) called contextual BLSTM (cBLSTM), where the probability of a word is estimated based on its full left and right contexts. Our approach is compared with a BLSTM binary classifier. Significant improvements are observed in classifying the IMDB movie review dataset. Further improvements are achieved via model combination. @@ -1013,7 +1013,7 @@ Large-scale Opinion Relation Extraction with Distantly Supervised Neural Network ChangzhiSun YuanbinWu - ManLan + ManLan ShiliangSun QiZhang 1033–1043 @@ -1035,7 +1035,7 @@ JiataoGu GrahamNeubig KyunghyunCho - Victor O.K.Li + Victor O.K.Li 1053–1062 E17-1099 Translating in real-time, a.k.a.simultaneous translation, outputs translation words before the input sentence ends, which is a challenging problem for conventional machine translation methods. We propose a neural machine translation (NMT) framework for simultaneous translation in which an agent learns to make decisions on when to translate from the interaction with a pre-trained NMT environment. To trade off quality and delay, we extensively explore various targets for delay and design a method for beam-search applicable in the simultaneous MT setting. Experiments against state-of-the-art baselines on two language pairs demonstrate the efficacy of the proposed framework both quantitatively and qualitatively. @@ -1044,7 +1044,7 @@ A Multifaceted Evaluation of Neural versus Phrase-Based Machine Translation for 9 Language Directions AntonioToral - Víctor M.Sánchez-Cartagena + Víctor M.Sánchez-Cartagena 1063–1073 E17-1100 We aim to shed light on the strengths and weaknesses of the newly introduced neural machine translation paradigm. To that end, we conduct a multifaceted evaluation in which we compare outputs produced by state-of-the-art neural machine translation and phrase-based machine translation systems for 9 language directions across a number of dimensions. Specifically, we measure the similarity of the outputs, their fluency and amount of reordering, the effect of sentence length and performance across different error categories. We find out that translations produced by neural machine translation systems are considerably different, more fluent and more accurate in terms of word order compared to those produced by phrase-based systems. Neural machine translation systems are also more accurate at producing inflected forms, but they perform poorly when translating very long sentences. @@ -1067,7 +1067,7 @@ Bilingual Lexicon Induction by Learning to Combine Word-Level and Character-Level Representations GeertHeyman IvanVulić - Marie-FrancineMoens + Marie-FrancineMoens 1085–1095 E17-1102 We study the problem of bilingual lexicon induction (BLI) in a setting where some translation resources are available, but unknown translations are sought for certain, possibly domain-specific terminology. We frame BLI as a classification problem for which we design a neural network based classification architecture composed of recurrent long short-term memory and deep feed forward networks. The results show that word- and character-level representations each improve state-of-the-art results for BLI, and the best results are obtained by exploiting the synergy between these word- and character-level representations in the classification model. @@ -1109,7 +1109,7 @@ Predicting Counselor Behaviors in Motivational Interviewing Encounters VerónicaPérez-Rosas - RadaMihalcea + RadaMihalcea KennethResnicow SatinderSingh LawrenceAn @@ -1122,7 +1122,7 @@ Authorship Attribution Using Text Distortion - EfstathiosStamatatos + EfstathiosStamatatos 1138–1149 E17-1107 Authorship attribution is associated with important applications in forensics and humanities research. A crucial point in this field is to quantify the personal style of writing, ideally in a way that is not affected by changes in topic or genre. In this paper, we present a novel method that enhances authorship attribution effectiveness by introducing a text distortion step before extracting stylometric measures. The proposed method attempts to mask topic-specific information that is not related to the personal style of authors. Based on experiments on two main tasks in authorship attribution, closed-set attribution and authorship verification, we demonstrate that the proposed approach can enhance existing methods especially under cross-topic conditions, where the training and test corpora do not match in topic. @@ -1131,7 +1131,7 @@ Structured Learning for Temporal Relation Extraction from Clinical Records ArtuurLeeuwenberg - Marie-FrancineMoens + Marie-FrancineMoens 1150–1158 E17-1108 We propose a scalable structured learning model that jointly predicts temporal relations between events and temporal expressions (TLINKS), and the relation between these events and the document creation time (DCTR). We employ a structured perceptron, together with integer linear programming constraints for document-level inference during training and prediction to exploit relational properties of temporality, together with global learning of the relations at the document level. Moreover, this study gives insights in the results of integrating constraints for temporal relation extraction when using structured learning and prediction. Our best system outperforms the state-of-the art on both the CONTAINS TLINK task, and the DCTR task. @@ -1142,7 +1142,7 @@ ShwetaYadav AsifEkbal SriparnaSaha - PushpakBhattacharyya + PushpakBhattacharyya 1159–1170 E17-1109 Text mining has drawn significant attention in recent past due to the rapid growth in biomedical and clinical records. Entity extraction is one of the fundamental components for biomedical text mining. In this paper, we propose a novel approach of feature selection for entity extraction that exploits the concept of deep learning and Particle Swarm Optimization (PSO). The system utilizes word embedding features along with several other features extracted by studying the properties of the datasets. We obtain an interesting observation that compact word embedding features as determined by PSO are more effective compared to the entire word embedding feature set for entity extraction. The proposed system is evaluated on three benchmark biomedical datasets such as GENIA, GENETAG, and AiMed. The effectiveness of the proposed approach is evident with significant performance gains over the baseline models as well as the other existing systems. We observe improvements of 7.86%, 5.27% and 7.25% F-measure points over the baseline models for GENIA, GENETAG, and AiMed dataset respectively. @@ -1161,7 +1161,7 @@ Noise Mitigation for Neural Entity Typing and Relation Extraction YadollahYaghoobzadeh HeikeAdel - HinrichSchütze + HinrichSchütze 1183–1194 E17-1111 In this paper, we address two different types of noise in information extraction models: noise from distant supervision and noise from pipeline input features. Our target tasks are entity typing and relation extraction. For the first noise type, we introduce multi-instance multi-label learning algorithms using neural network models, and apply them to fine-grained entity typing for the first time. Our model outperforms the state-of-the-art supervised approach which uses global embeddings of entities. For the second noise type, we propose ways to improve the integration of noisy entity type predictions into relation extraction. Our experiments show that probabilistic predictions are more robust than discrete predictions and that joint training of the two tasks performs best. @@ -1191,8 +1191,8 @@ A Multi-task Approach to Predict Likability of Books SurajMaharjan JohnArevalo - ManuelMontes - Fabio A.González + ManuelMontes + Fabio A.González ThamarSolorio 1217–1227 E17-1114 @@ -1214,7 +1214,7 @@ DebnilSur LukeShrimpton IainMurray - SharonGoldwater + SharonGoldwater 1239–1248 E17-1116 Political surveys have indicated a relationship between a sense of Scottish identity and voting decisions in the 2014 Scottish Independence Referendum. Identity is often reflected in language use, suggesting the intuitive hypothesis that individuals who support Scottish independence are more likely to use distinctively Scottish words than those who oppose it. In the first large-scale study of sociolinguistic variation on social media in the UK, we identify distinctively Scottish terms in a data-driven way, and find that these terms are indeed used at a higher rate by users of pro-independence hashtags than by users of anti-independence hashtags. However, we also find that in general people are less likely to use distinctively Scottish words in tweets with referendum-related hashtags than in their general Twitter activity. We attribute this difference to style shifting relative to audience, aligning with previous work showing that Twitter users tend to use fewer local variants when addressing a broader audience. @@ -1225,9 +1225,9 @@ AdhigunaKuncoro MiguelBallesteros LingpengKong - ChrisDyer + ChrisDyer GrahamNeubig - Noah A.Smith + Noah A.Smith 1249–1258 E17-1117 Recurrent neural network grammars (RNNG) are a recently proposed probablistic generative modeling family for natural language. They show state-of-the-art language modeling and parsing performance. We investigate what information they learn, from a linguistic perspective, through various ablations to the model and the data, and by augmenting the model with an attention mechanism (GA-RNNG) to enable closer inspection. We find that explicit modeling of composition is crucial for achieving the best performance. Through the attention mechanism, we find that headedness plays a central role in phrasal representation (with the model’s latent attention largely agreeing with predictions made by hand-crafted head rules, albeit with some important differences). By training grammars without nonterminal labels, we find that phrasal representations depend minimally on nonterminals, providing support for the endocentricity hypothesis. @@ -1236,7 +1236,7 @@ Incremental Discontinuous Phrase Structure Parsing with the <fixed-case>GAP</fixed-case> Transition MaximinCoavoux - BenoîtCrabbé + BenoîtCrabbé 1259–1270 E17-1118 This article introduces a novel transition system for discontinuous lexicalized constituent parsing called SR-GAP. It is an extension of the shift-reduce algorithm with an additional gap transition. Evaluation on two German treebanks shows that SR-GAP outperforms the previous best transition-based discontinuous parser (Maier, 2015) by a large margin (it is notably twice as accurate on the prediction of discontinuous constituents), and is competitive with the state of the art (Fernández-González and Martins, 2015). As a side contribution, we adapt span features (Hall et al., 2014) to discontinuous parsing. @@ -1259,7 +1259,7 @@ Proceedings of the 15th Conference of the European Chapter of the Association for Computational Linguistics: Volume 2, Short Papers E17-2 MirellaLapata - PhilBlunsom + PhilBlunsom AlexanderKoller Association for Computational Linguistics
Valencia, Spain
@@ -1275,7 +1275,7 @@ Multilingual Back-and-Forth Conversion between Content and Function Head for Easy Dependency Parsing RyosukeKohita HiroshiNoji - YujiMatsumoto + YujiMatsumoto 1–7 E17-2001 Universal Dependencies (UD) is becoming a standard annotation scheme cross-linguistically, but it is argued that this scheme centering on content words is harder to parse than the conventional one centering on function words. To improve the parsability of UD, we propose a back-and-forth conversion algorithm, in which we preprocess the training treebank to increase parsability, and reconvert the parser outputs to follow the UD scheme as a postprocess. We show that this technique consistently improves LAS across languages even with a state-of-the-art parser, in particular on core dependency arcs such as nominal modifier. We also provide an in-depth analysis to understand why our method increases parsability. @@ -1284,11 +1284,11 @@ <fixed-case>URIEL</fixed-case> and lang2vec: Representing languages as typological, geographical, and phylogenetic vectors PatrickLittell - David R.Mortensen + David R.Mortensen KeLin KatherineKairis CarlisleTurner - LoriLevin + LoriLevin 8–14 E17-2002 We introduce the URIEL knowledge base for massively multilingual NLP and the lang2vec utility, which provides information-rich vector identifications of languages drawn from typological, geographical, and phylogenetic databases and normalized to have straightforward and consistent formats, naming, and semantics. The goal of URIEL and lang2vec is to enable multilingual NLP, especially on less-resourced languages and make possible types of experiments (especially but not exclusively related to NLP tasks) that are otherwise difficult or impossible due to the sparsity and incommensurability of the data sources. lang2vec vectors have been shown to reduce perplexity in multilingual language modeling, when compared to one-hot language identification vectors. @@ -1306,8 +1306,8 @@ Robust Training under Linguistic Adversity YitongLi - TrevorCohn - TimothyBaldwin + TrevorCohn + TimothyBaldwin 21–27 E17-2004 Deep neural networks have achieved remarkable results across many language processing tasks, however they have been shown to be susceptible to overfitting and highly sensitive to noise, including adversarial attacks. In this work, we propose a linguistically-motivated approach for training robust models based on exposing the model to corrupted text examples at training time. We consider several flavours of linguistically plausible corruption, include lexical semantic and syntactic methods. Empirically, we evaluate our method with a convolutional neural model across a range of sentiment analysis datasets. Compared with a baseline and the dropout method, our method achieves better overall performance. @@ -1316,7 +1316,7 @@ Using <fixed-case>T</fixed-case>witter Language to Predict the Real Estate Market MohammadzamanZamani - H. AndrewSchwartz + H. AndrewSchwartz 28–33 E17-2005 We explore whether social media can provide a window into community real estate -foreclosure rates and price changes- beyond that of traditional economic and demographic variables. We find language use in Twitter not only predicts real estate outcomes as well as traditional variables across counties, but that including Twitter language in traditional models leads to a significant improvement (e.g. from Pearson r = :50 to r = :59 for price changes). We overcome the challenge of the relative sparsity and noise in Twitter language variables by showing that training on the residual error of the traditional models leads to more accurate overall assessments. Finally, we discover that it is Twitter language related to business (e.g. ‘company’, ‘marketing’) and technology (e.g. ‘technology’, ‘internet’), among others, that yield predictive power over economics. @@ -1324,7 +1324,7 @@ Lexical Simplification with Neural Ranking - GustavoPaetzold + GustavoPaetzold LuciaSpecia 34–40 E17-2006 @@ -1343,7 +1343,7 @@ Crowd-Sourced Iterative Annotation for Narrative Summarization Corpora JessicaOuyang SerinaChang - KathyMcKeown + KathyMcKeown 46–51 E17-2008 We present an iterative annotation process for producing aligned, parallel corpora of abstractive and extractive summaries for narrative. Our approach uses a combination of trained annotators and crowd-sourcing, allowing us to elicit human-generated summaries and alignments quickly and at low cost. We use crowd-sourcing to annotate aligned phrases with the text-to-text generation techniques needed to transform each phrase into the other. We apply this process to a corpus of 476 personal narratives, which we make available on the Web. @@ -1364,7 +1364,7 @@ Detecting negation scope is easy, except when it isn’t FedericoFancellu AdamLopez - BonnieWebber + BonnieWebber HangfengHe 58–63 E17-2010 @@ -1394,9 +1394,9 @@ Instances and concepts in distributional space - GemmaBoleda + GemmaBoleda AbhijeetGupta - SebastianPadó + SebastianPadó 79–85 E17-2013 Instances (“Mozart”) are ontologically distinct from concepts or classes (“composer”). Natural language encompasses both, but instances have received comparatively little attention in distributional semantics. Our results show that instances and concepts differ in their distributional properties. We also establish that instantiation detection (“Mozart – composer”) is generally easier than hypernymy detection (“chemist – scientist”), and that results on the influence of input representation do not transfer from hyponymy to instantiation. @@ -1404,7 +1404,7 @@ Is this a Child, a Girl or a Car? Exploring the Contribution of Distributional Similarity to Learning Referential Word Meanings - SinaZarrieß + SinaZarrieß DavidSchlangen 86–91 E17-2014 @@ -1456,8 +1456,8 @@ Context-Aware Prediction of Derivational Word-forms EkaterinaVylomova RyanCotterell - TimothyBaldwin - TrevorCohn + TimothyBaldwin + TrevorCohn 118–124 E17-2019 Derivational morphology is a fundamental and complex characteristic of language. In this paper we propose a new task of predicting the derivational form of a given base-form lemma that is appropriate for a given context. We present an encoder-decoder style neural network to produce a derived form character-by-character, based on its corresponding character-level representation of the base form and the context. We demonstrate that our model is able to generate valid context-sensitive derivations from known base forms, but is less accurate under lexicon agnostic setting. @@ -1486,7 +1486,7 @@ A Computational Analysis of the Language of Drug Addiction CarloStrapparava - RadaMihalcea + RadaMihalcea 136–142 E17-2022 We present a computational analysis of the language of drug users when talking about their drug experiences. We introduce a new dataset of over 4,000 descriptions of experiences reported by users of four main drug types, and show that we can predict with an F1-score of up to 88% the drug behind a certain experience. We also perform an analysis of the dominant psycholinguistic processes and dominant emotions associated with each drug type, which sheds light on the characteristics of drug users. @@ -1522,7 +1522,7 @@ Identifying beneficial task relations for multi-task learning in deep neural networks JoachimBingel - AndersSøgaard + AndersSøgaard 164–169 E17-2026 Multi-task learning (MTL) in deep neural networks for NLP has recently received increasing interest due to some compelling benefits, including its potential to efficiently regularize models and to reduce the need for labeled data. While it has brought significant improvements in a number of NLP tasks, mixed results have been reported, and little is known about the conditions under which MTL leads to gains in NLP. This paper sheds light on the specific task relations that can lead to gains from MTL models over single-task setups. @@ -1541,7 +1541,7 @@ RyanCotterell AdamPoliak BenjaminVan Durme - JasonEisner + JasonEisner 175–181 E17-2028 The popular skip-gram model induces word embeddings by exploiting the signal from word-context coocurrence. We offer a new interpretation of skip-gram based on exponential family PCA-a form of matrix factorization to generalize the skip-gram model to tensor factorization. In turn, this lets us train embeddings through richer higher-order coocurrences, e.g., triples that include positional information (to incorporate syntax) or morphological information (to share parameters across related words). We experiment on 40 languages and show our model improves upon skip-gram. @@ -1610,7 +1610,7 @@ Morphological Analysis of the <fixed-case>D</fixed-case>ravidian Language Family ArunKumar RyanCotterell - LluísPadró + LluísPadró AntoniOliver 217–222 E17-2035 @@ -1619,7 +1619,7 @@ <fixed-case>B</fixed-case>abel<fixed-case>D</fixed-case>omains: Large-Scale Domain Labeling of Lexical Resources - JoseCamacho-Collados + JoseCamacho-Collados RobertoNavigli 223–228 E17-2036 @@ -1630,7 +1630,7 @@ <fixed-case>JFLEG</fixed-case>: A Fluency Corpus and Benchmark for Grammatical Error Correction CourtneyNapoles KeisukeSakaguchi - JoelTetreault + JoelTetreault 229–234 E17-2037 We present a new parallel corpus, JHU FLuency-Extended GUG corpus (JFLEG) for developing and evaluating grammatical error correction (GEC). Unlike other corpora, it represents a broad range of language proficiency levels and uses holistic fluency edits to not only correct grammatical errors but also make the original text more native sounding. We describe the types of corrections made and benchmark four leading GEC systems on this corpus, identifying specific areas in which they do well and how they can improve. JFLEG fulfills the need for a new gold standard to properly assess the current state of GEC. @@ -1665,9 +1665,9 @@ Cross-lingual tagger evaluation without test data - ŽeljkoAgić - BarbaraPlank - AndersSøgaard + ŽeljkoAgić + BarbaraPlank + AndersSøgaard 248–253 E17-2040 We address the challenge of cross-lingual POS tagger evaluation in absence of manually annotated test data. We put forth and evaluate two dictionary-based metrics. On the tasks of accuracy prediction and system ranking, we reveal that these metrics are reliable enough to approximate test set-based evaluation, and at the same time lean enough to support assessment for truly low-resource languages. @@ -1677,7 +1677,7 @@ Legal <fixed-case>NERC</fixed-case> with ontologies, <fixed-case>W</fixed-case>ikipedia and curriculum learning CristianCardellino MilagroTeruel - LauraAlonso Alemany + LauraAlonso Alemany SerenaVillata 254–259 E17-2041 @@ -1686,7 +1686,7 @@ The Content Types Dataset: a New Resource to Explore Semantic and Functional Characteristics of Texts - RacheleSprugnoli + RacheleSprugnoli TommasoCaselli SaraTonelli GiovanniMoretti @@ -1718,9 +1718,9 @@ Neural vs. Phrase-Based Machine Translation in a Multi-Domain Scenario - M. AminFarajian + M. AminFarajian MarcoTurchi - MatteoNegri + MatteoNegri NicolaBertoldi MarcelloFederico 280–284 @@ -1749,7 +1749,7 @@ To Sing like a Mockingbird LorenzoGatti - GözdeÖzbal + GözdeÖzbal OlivieroStock CarloStrapparava 298–304 @@ -1771,7 +1771,7 @@ Daniëlde Kok JianqiangMa CorinaDima - ErhardHinrichs + ErhardHinrichs 311–317 E17-2050 Prepostitional phrase (PP) attachment is a well known challenge to parsing. In this paper, we combine the insights of different works, namely: (1) treating PP attachment as a classification task with an arbitrary number of attachment candidates; (2) using auxiliary distributions to augment the data beyond the hand-annotated training set; (3) using topological fields to get information about the distribution of PP attachment throughout clauses and (4) using state-of-the-art techniques such as word embeddings and neural networks. We show that jointly using these techniques leads to substantial improvements. We also conduct a qualitative analysis to gauge where the ceiling of the task is in a realistic setup. @@ -1789,10 +1789,10 @@ Joining Hands: Exploiting Monolingual Treebanks for Parsing of Code-mixing Data - IrshadBhat - Riyaz A.Bhat - ManishShrivastava - DiptiSharma + IrshadBhat + Riyaz A.Bhat + ManishShrivastava + DiptiSharma 324–330 E17-2052 In this paper, we propose efficient and less resource-intensive strategies for parsing of code-mixed data. These strategies are not constrained by in-domain annotations, rather they leverage pre-existing monolingual annotated resources for training. We show that these methods can produce significantly better results as compared to an informed baseline. Due to lack of an evaluation set for code-mixed structures, we also present a data set of 450 Hindi and English code-mixed tweets of Hindi multilingual speakers for evaluation. @@ -1801,7 +1801,7 @@ Multilingual Lexicalized Constituency Parsing with Word-Level Auxiliary Tasks MaximinCoavoux - BenoîtCrabbé + BenoîtCrabbé 331–336 E17-2053 We introduce a constituency parser based on a bi-LSTM encoder adapted from recent work (Cross and Huang, 2016b; Kiperwasser and Goldberg, 2016), which can incorporate a lower level character biLSTM (Ballesteros et al., 2015; Plank et al., 2016). We model two important interfaces of constituency parsing with auxiliary tasks supervised at the word level: (i) part-of-speech (POS) and morphological tagging, (ii) functional label prediction. On the SPMRL dataset, our parser obtains above state-of-the-art results on constituency parsing without requiring either predicted POS or morphological tags, and outputs labelled dependency trees. @@ -1811,7 +1811,7 @@ Be Precise or Fuzzy: Learning the Meaning of Cardinals and Quantifiers from Vision SandroPezzelle MarcoMarelli - RaffaellaBernardi + RaffaellaBernardi 337–342 E17-2054 People can refer to quantities in a visual scene by using either exact cardinals (e.g. one, two, three) or natural language quantifiers (e.g. few, most, all). In humans, these two processes underlie fairly different cognitive and neural mechanisms. Inspired by this evidence, the present study proposes two models for learning the objective meaning of cardinals and quantifiers from visual scenes containing multiple objects. We show that a model capitalizing on a ‘fuzzy’ measure of similarity is effective for learning quantifiers, whereas the learning of exact cardinals is better accomplished when information about number is provided. @@ -1829,10 +1829,10 @@ Neural Automatic Post-Editing Using Prior Alignment and Reranking SantanuPal - Sudip KumarNaskar + Sudip KumarNaskar MihaelaVela QunLiu - Josefvan Genabith + Josefvan Genabith 349–355 E17-2056 We present a second-stage machine translation (MT) system based on a neural machine translation (NMT) approach to automatic post-editing (APE) that improves the translation quality provided by a first-stage MT system. Our APE system (APE_Sym) is an extended version of an attention based NMT model with bilingual symmetry employing bidirectional models, mt–pe and pe–mt. APE translations produced by our system show statistically significant improvements over the first-stage MT, phrase-based APE and the best reported score on the WMT 2016 APE dataset by a previous neural APE system. Re-ranking (APE_Rerank) of the n-best translations from the phrase-based APE and APE_Sym systems provides further substantial improvements over the symmetric neural APE model. Human evaluation confirms that the APE_Rerank generated PE translations improve on the previous best neural APE system at WMT 2016. @@ -1842,10 +1842,10 @@ Improving Evaluation of Document-level Machine Translation Quality Estimation YvetteGraham QingsongMa - TimothyBaldwin + TimothyBaldwin QunLiu - CarlaParra - CarolinaScarton + CarlaParra + CarolinaScarton 356–361 E17-2057 Meaningful conclusions about the relative performance of NLP systems are only possible if the gold standard employed in a given evaluation is both valid and reliable. In this paper, we explore the validity of human annotations currently employed in the evaluation of document-level quality estimation for machine translation (MT). We demonstrate the degree to which MT system rankings are dependent on weights employed in the construction of the gold standard, before proposing direct human assessment as a valid alternative. Experiments show direct assessment (DA) scores for documents to be highly reliable, achieving a correlation of above 0.9 in a self-replication experiment, in addition to a substantial estimated cost reduction through quality controlled crowd-sourcing. The original gold standard based on post-edits incurs a 10–20 times greater cost than DA. @@ -1854,7 +1854,7 @@ Neural Machine Translation by Minimising the <fixed-case>B</fixed-case>ayes-risk with Respect to Syntactic Translation Lattices FelixStahlberg - Adriàde Gispert + Adriàde Gispert EvaHasler BillByrne 362–368 @@ -1866,8 +1866,8 @@ Producing Unseen Morphological Variants in Statistical Machine Translation MatthiasHuck AlešTamchyna - OndřejBojar - AlexanderFraser + OndřejBojar + AlexanderFraser 369–375 E17-2059 Translating into morphologically rich languages is difficult. Although the coverage of lemmas may be reasonable, many morphological variants cannot be learned from the training data. We present a statistical translation system that is able to produce these inflected word forms. Different from most previous work, we do not separate morphological prediction from lexical choice into two consecutive steps. Our approach is novel in that it is integrated in decoding and takes advantage of context information from both the source language and the target language sides. @@ -1886,7 +1886,7 @@ ZichaoYang ZhitingHu YuntianDeng - ChrisDyer + ChrisDyer AlexSmola 383–387 E17-2061 @@ -1931,7 +1931,7 @@ Using Word Embedding for Cross-Language Plagiarism Detection JérémyFerrero - LaurentBesacier + LaurentBesacier DidierSchwab FrédéricAgnès 415–421 @@ -1952,9 +1952,9 @@ Bag of Tricks for Efficient Text Classification ArmandJoulin - EdouardGrave + EdouardGrave PiotrBojanowski - TomasMikolov + TomasMikolov 427–431 E17-2068 This paper explores a simple and efficient baseline for text classification. Our experiments show that our fast text classifier fastText is often on par with deep learning classifiers in terms of accuracy, and many orders of magnitude faster for training and evaluation. We can train fastText on more than one billion words in less than ten minutes using a standard multicore CPU, and classify half a million sentences among 312K classes in less than a minute. @@ -1975,7 +1975,7 @@ NitinRamrakhiyani SachinPawar SwapnilHingmire - GirishPalshikar + GirishPalshikar 437–442 E17-2070 Measuring topic quality is essential for scoring the learned topics and their subsequent use in Information Retrieval and Text classification. To measure quality of Latent Dirichlet Allocation (LDA) based topics learned from text, we propose a novel approach based on grouping of topic words into buckets (TBuckets). A single large bucket signifies a single coherent theme, in turn indicating high topic coherence. TBuckets uses word embeddings of topic words and employs singular value decomposition (SVD) and Integer Linear Programming based optimization to create coherent word buckets. TBuckets outperforms the state-of-the-art techniques when evaluated using 3 publicly available datasets and on another one proposed in this paper. @@ -1986,7 +1986,7 @@ Shiou TianHsu ChangsungMoon PaulJones - NagizaSamatova + NagizaSamatova 443–449 E17-2071 The success of sentence classification often depends on understanding both the syntactic and semantic properties of word-phrases. Recent progress on this task has been based on exploiting the grammatical structure of sentences but often this structure is difficult to parse and noisy. In this paper, we propose a structure-independent ‘Gated Representation Alignment’ (GRA) model that blends a phrase-focused Convolutional Neural Network (CNN) approach with sequence-oriented Recurrent Neural Network (RNN). Our novel alignment mechanism allows the RNN to selectively include phrase information in a word-by-word sentence representation, and to do this without awareness of the syntactic structure. An empirical evaluation of GRA shows higher prediction accuracy (up to 4.6%) of fine-grained sentiment ratings, when compared to other structure-independent baselines. We also show comparable results to several structure-dependent methods. Finally, we analyzed the effect of our alignment mechanism and found that this is critical to the effectiveness of the CNN-RNN hybrid. @@ -2027,7 +2027,7 @@ A Copy-Augmented Sequence-to-Sequence Architecture Gives Good Performance on Task-Oriented Dialogue MihailEric - ChristopherManning + ChristopherManning 468–473 E17-2075 Task-oriented dialogue focuses on conversational agents that participate in dialogues with user goals on domain-specific topics. In contrast to chatbots, which simply seek to sustain open-ended meaningful discourse, existing task-oriented agents usually explicitly model user intent and belief states. This paper examines bypassing such an explicit representation by depending on a latent neural embedding of state and learning selective attention to dialogue history together with copying to incorporate relevant prior context. We complement recent work by showing the effectiveness of simple sequence-to-sequence neural architectures with a copy mechanism. Our model outperforms more complex memory-augmented models by 7% in per-response generation and is on par with the current state-of-the-art on DSTC2, a real-world task-oriented dialogue dataset. @@ -2038,7 +2038,7 @@ SameerBansal HermanKamper AdamLopez - SharonGoldwater + SharonGoldwater 474–479 E17-2076 We explore the problem of translating speech to text in low-resource scenarios where neither automatic speech recognition (ASR) nor machine translation (MT) are available, but we have training data in the form of audio paired with text translations. We present the first system for this problem applied to a realistic multi-speaker dataset, the CALLHOME Spanish-English speech translation corpus. Our approach uses unsupervised term discovery (UTD) to cluster repeated patterns in the audio, creating a pseudotext, which we pair with translations to create a parallel text and train a simple bag-of-words MT model. We identify the challenges faced by the system, finding that the difficulty of cross-speaker UTD results in low recall, but that our system is still able to correctly translate some content words in test data. @@ -2091,7 +2091,7 @@ Efficient, Compositional, Order-sensitive n-gram Embeddings AdamPoliak PushpendreRastogi - M. PatrickMartin + M. PatrickMartin BenjaminVan Durme 503–508 E17-2081 @@ -2101,7 +2101,7 @@ Integrating Semantic Knowledge into Lexical Embeddings Based on Information Content Measurement Hsin-YangWang - Wei-YunMa + Wei-YunMa 509–515 E17-2082 Distributional word representations are widely used in NLP tasks. These representations are based on an assumption that words with a similar context tend to have a similar meaning. To improve the quality of the context-based embeddings, many researches have explored how to make full use of existing lexical resources. In this paper, we argue that while we incorporate the prior knowledge with context-based embeddings, words with different occurrences should be treated differently. Therefore, we propose to rely on the measurement of information content to control the degree of applying prior knowledge into context-based embeddings - different words would have different learning rates when adjusting their embeddings. In the result, we demonstrate that our embeddings get significant improvements on two different tasks: Word Similarity and Analogical Reasoning. @@ -2110,7 +2110,7 @@ Improving Neural Knowledge Base Completion with Cross-Lingual Projections PatrickKlein - Simone PaoloPonzetto + Simone PaoloPonzetto GoranGlavaš 516–522 E17-2083 @@ -2132,7 +2132,7 @@ JulieWeeds ThomasKober JeremyReffin - DavidWeir + DavidWeir 529–534 E17-2085 Non-compositional phrases such as red herring and weakly compositional phrases such as spelling bee are an integral part of natural language (Sag, 2002). They are also the phrases that are difficult, or even impossible, for good compositional distributional models of semantics. Compositionality detection therefore provides a good testbed for compositional methods. We compare an integrated compositional distributional approach, using sparse high dimensional representations, with the ad-hoc compositional approach of applying simple composition operations to state-of-the-art neural embeddings. @@ -2141,7 +2141,7 @@ Applying Multi-Sense Embeddings for <fixed-case>G</fixed-case>erman Verbs to Determine Semantic Relatedness and to Detect Non-Literal Language MaximilianKöper - SabineSchulte im Walde + SabineSchulte im Walde 535–542 E17-2086 Up to date, the majority of computational models still determines the semantic relatedness between words (or larger linguistic units) on the type level. In this paper, we compare and extend multi-sense embeddings, in order to model and utilise word senses on the token level. We focus on the challenging class of complex verbs, and evaluate the model variants on various semantic tasks: semantic classification; predicting compositionality; and detecting non-literal language usage. While there is no overall best model, all models significantly outperform a word2vec single-sense skip baseline, thus demonstrating the need to distinguish between word senses in a distributional semantic model. @@ -2150,8 +2150,8 @@ Negative Sampling Improves Hypernymy Extraction Based on Projection Learning DmitryUstalov - NikolayArefyev - ChrisBiemann + NikolayArefyev + ChrisBiemann AlexanderPanchenko 543–550 E17-2087 @@ -2161,7 +2161,7 @@ A Dataset for Multi-Target Stance Detection ParinazSobhani - DianaInkpen + DianaInkpen XiaodanZhu 551–557 E17-2088 @@ -2181,8 +2181,8 @@ Predicting Emotional Word Ratings using Distributional Representations and Signed Clustering JoãoSedoc - DanielPreoţiuc-Pietro - LyleUngar + DanielPreoţiuc-Pietro + LyleUngar 564–571 E17-2090 Inferring the emotional content of words is important for text-based sentiment analysis, dialogue systems and psycholinguistics, but word ratings are expensive to collect at scale and across languages or domains. We develop a method that automatically extends word-level ratings to unrated words using signed clustering of vector space word representations along with affect ratings. We use our method to determine a word’s valence and arousal, which determine its position on the circumplex model of affect, the most popular dimensional model of emotion. Our method achieves superior out-of-sample word rating prediction on both affective dimensions across three different languages when compared to state-of-the-art word similarity based methods. Our method can assist building word ratings for new languages and improve downstream tasks such as sentiment analysis and emotion detection. @@ -2199,7 +2199,7 @@ <fixed-case>E</fixed-case>mo<fixed-case>B</fixed-case>ank: Studying the Impact of Annotation Perspective and Representation Format on Dimensional Emotion Analysis - SvenBuechel + SvenBuechel UdoHahn 578–585 E17-2092 @@ -2218,7 +2218,7 @@ Ranking Convolutional Recurrent Neural Networks for Purchase Stage Identification on Imbalanced <fixed-case>T</fixed-case>witter Data HeikeAdel - FrancineChen + FrancineChen Yan-YingChen 592–598 E17-2094 @@ -2238,7 +2238,7 @@ Reranking Translation Candidates Produced by Several Bilingual Word Similarity Sources LaurentJakubina - PhillippeLanglais + PhillippeLanglais 605–611 E17-2096 We investigate the reranking of the output of several distributional approaches on the Bilingual Lexicon Induction task. We show that reranking an n-best list produced by any of those approaches leads to very substantial improvements. We further demonstrate that combining several n-best lists by reranking is an effective way of further boosting performance. @@ -2265,9 +2265,9 @@ Addressing Problems across Linguistic Levels in <fixed-case>SMT</fixed-case>: Combining Approaches to Model Morphology, Syntax and Lexical Choice - MarionWeller-Di Marco - AlexanderFraser - SabineSchulte im Walde + MarionWeller-Di Marco + AlexanderFraser + SabineSchulte im Walde 625–630 E17-2099 Many errors in phrase-based SMT can be attributed to problems on three linguistic levels: morphological complexity in the target language, structural differences and lexical choice. We explore combinations of linguistically motivated approaches to address these problems in English-to-German SMT and show that they are complementary to one another, but also that the popular verbal pre-ordering can cause problems on the morphological and lexical level. A discriminative classifier can overcome these problems, in particular when enriching standard lexical features with features geared towards verbal inflection. @@ -2275,9 +2275,9 @@ Machine Translation of <fixed-case>S</fixed-case>panish Personal and Possessive Pronouns Using Anaphora Probabilities - Ngoc QuangLuong - AndreiPopescu-Belis - AnnetteRios Gonzales + Ngoc QuangLuong + AndreiPopescu-Belis + AnnetteRios Gonzales DonTuggener 631–636 E17-2100 @@ -2300,7 +2300,7 @@ Continuous multilinguality with language vectors RobertÖstling - JörgTiedemann + JörgTiedemann 644–649 E17-2102 Most existing models for multilingual natural language processing (NLP) treat language as a discrete category, and make predictions for either one language or the other. In contrast, we propose using continuous vector representations of language. We show that these can be learned efficiently with a character-based neural language model, and used to improve inference about language varieties not seen during training. In experiments with 1303 Bible translations into 990 different languages, we empirically explore the capacity of multilingual language models, and also show that the language vectors capture genetic relationships between languages. @@ -2310,7 +2310,7 @@ Unsupervised Training for Large Vocabulary Translation Using Sparse Lexicon and Word Classes YunsuKim JulianSchamper - HermannNey + HermannNey 650–656 E17-2103 We address for the first time unsupervised training for a translation task with hundreds of thousands of vocabulary words. We scale up the expectation-maximization (EM) algorithm to learn a large translation table without any parallel text or seed lexicon. First, we solve the memory bottleneck and enforce the sparsity with a simple thresholding scheme for the lexicon. Second, we initialize the lexicon training with word classes, which efficiently boosts the performance. Our methods produced promising results on two large-scale unsupervised translation tasks. @@ -2318,7 +2318,7 @@ Co-reference Resolution of Elided Subjects and Possessive Pronouns in <fixed-case>S</fixed-case>panish-<fixed-case>E</fixed-case>nglish Statistical Machine Translation - AnnetteRios Gonzales + AnnetteRios Gonzales DonTuggener 657–662 E17-2104 @@ -2330,7 +2330,7 @@ YandiXia AaronLevine PradiptoDas - GiuseppeDi Fabbrizio + GiuseppeDi Fabbrizio KeijiShinzato AnkurDatta 663–668 @@ -2342,8 +2342,8 @@ Convolutional Neural Networks for Authorship Attribution of Short Texts PrashaShrestha SebastianSierra - FabioGonzález - ManuelMontes + FabioGonzález + ManuelMontes PaoloRosso ThamarSolorio 669–674 @@ -2356,7 +2356,7 @@ YinfeiYang CenChen MinghuiQiu - ForrestBao + ForrestBao 675–680 E17-2107 Aspect extraction abstracts the common properties of objects from corpora discussing them, such as reviews of products. Recent work on aspect extraction is leveraging the hierarchical relationship between products and their categories. However, such effort focuses on the aspects of child categories but ignores those from parent categories. Hence, we propose an LDA-based generative topic model inducing the two-layer categorical information (CAT-LDA), to balance the aspects of both a parent category and its child categories. Our hypothesis is that child categories inherit aspects from parent categories, controlled by the hierarchy between them. Experimental results on 5 categories of Amazon.com products show that both common aspects of parent category and the individual aspects of sub-categories can be extracted to align well with the common sense. We further evaluate the manually extracted aspects of 16 products, resulting in an average hit rate of 79.10%. @@ -2364,7 +2364,7 @@ On the Relevance of Syntactic and Discourse Features for Author Profiling and Identification - JuanSoler-Company + JuanSoler-Company LeoWanner 681–687 E17-2108 @@ -2375,7 +2375,7 @@ Unsupervised Cross-Lingual Scaling of Political Texts GoranGlavaš FedericoNanni - Simone PaoloPonzetto + Simone PaoloPonzetto 688–693 E17-2109 Political text scaling aims to linearly order parties and politicians across political dimensions (e.g., left-to-right ideology) based on textual content (e.g., politician speeches or party manifestos). Existing models scale texts based on relative word usage and cannot be used for cross-lingual analyses. Additionally, there is little quantitative evidence that the output of these models correlates with common political dimensions like left-to-right orientation. Experimental results show that the semantically-informed scaling models better predict the party positions than the existing word-based models in two different political dimensions. Furthermore, the proposed models exhibit no drop in performance in the cross-lingual compared to monolingual setting. @@ -2393,10 +2393,10 @@ Multimodal Topic Labelling - IonutSorodoc + IonutSorodoc Jey HanLau NikolaosAletras - TimothyBaldwin + TimothyBaldwin 701–706 E17-2111 Topics generated by topic models are typically presented as a list of topic terms. Automatic topic labelling is the task of generating a succinct label that summarises the theme or subject of a topic, with the intention of reducing the cognitive load of end-users when interpreting these topics. Traditionally, topic label systems focus on a single label modality, e.g. textual labels. In this work we propose a multimodal approach to topic labelling using a simple feedforward neural network. Given a topic and a candidate image or textual label, our method automatically generates a rating for the label, relative to the topic. Experiments show that this multimodal approach outperforms single-modality topic labelling systems. @@ -2405,7 +2405,7 @@ Detecting (Un)Important Content for Single-Document News Summarization YinfeiYang - ForrestBao + ForrestBao AniNenkova 707–712 E17-2112 @@ -2454,7 +2454,7 @@ JulienTourille OlivierFerret XavierTannier - AurélieNévéol + AurélieNévéol 739–745 E17-2117 In this paper, we present a method for temporal relation extraction from clinical narratives in French and in English. We experiment on two comparable corpora, the MERLOT corpus and the THYME corpus, and show that a common approach can be used for both languages. @@ -2463,7 +2463,7 @@ Neural Temporal Relation Extraction DmitriyDligach - TimothyMiller + TimothyMiller ChenLin StevenBethard GuerganaSavova @@ -2476,7 +2476,7 @@ End-to-End Trainable Attentive Decoder for Hierarchical Entity Classification Sanjeev KumarKarn UlliWaltinger - HinrichSchütze + HinrichSchütze 752–758 E17-2119 We address fine-grained entity classification and propose a novel attention-based recurrent neural network (RNN) encoder-decoder that generates paths in the type hierarchy and can be trained end-to-end. We show that our model performs better on fine-grained entity classification than prior work that relies on flat or local classifiers that do not directly model hierarchical structure. @@ -2497,8 +2497,8 @@ Proceedings of the Software Demonstrations of the 15th Conference of the European Chapter of the Association for Computational Linguistics E17-3 - AndréMartins - AnselmoPeñas + AndréMartins + AnselmoPeñas Association for Computational Linguistics
Valencia, Spain
April @@ -2511,7 +2511,7 @@ <fixed-case>COVER</fixed-case>: Covering the Semantically Tractable Questions - MichaelMinock + MichaelMinock 1–4 E17-3001 In semantic parsing, natural language questions map to expressions in a meaning representation language (MRL) over some fixed vocabulary of predicates. To do this reliably, one must guarantee that for a wide class of natural language questions (the so called semantically tractable questions), correct interpretations are always in the mapped set of possibilities. In this demonstration, we introduce the system COVER which significantly clarifies, revises and extends the basic notion of semantic tractability. COVER achieves coverage of 89% while the earlier PRECISE system achieved coverage of 77% on the well known GeoQuery corpus. Like PRECISE, COVER requires only a simple domain lexicon and integrates off-the-shelf syntactic parsers. Beyond PRECISE, COVER also integrates off-the-shelf theorem provers to provide more accurate results. COVER is written in Python and uses the NLTK. @@ -2526,10 +2526,10 @@ RenlongAi StephanBusemann JonDehdari - Josefvan Genabith + Josefvan Genabith GeorgHeigold NilsRethmeier - RaphaelRubino + RaphaelRubino SvenSchmeier PhilippeThomas HeWang @@ -2551,7 +2551,7 @@ <fixed-case>WAT</fixed-case>-<fixed-case>SL</fixed-case>: A Customizable Web Annotation Tool for Segment Labeling JohannesKiesel HenningWachsmuth - KhalidAl-Khatib + KhalidAl-Khatib BennoStein 13–16 E17-3004 @@ -2605,8 +2605,8 @@ <fixed-case>CASSANDRA</fixed-case>: A multipurpose configurable voice-enabled human-computer-interface - TiberiuBoros - Stefan DanielDumitrescu + TiberiuBoros + Stefan DanielDumitrescu SoniaPipa 33–36 E17-3009 @@ -2662,7 +2662,7 @@ The ar<fixed-case>T</fixed-case>ext prototype: An automatic system for writing specialized texts - Iriada Cunha + Iriada Cunha M. AmorMontané LuisHysa 57–60 @@ -2680,7 +2680,7 @@ AhmedAbdelali HamdyMubarak AhmedAli - StephanVogel + StephanVogel 61–64 E17-3016 This paper presents QCRI’s Arabic-to-English live speech translation system. It features modern web technologies to capture live audio, and broadcasts Arabic transcriptions and English translations simultaneously. Our Kaldi-based ASR system uses the Time Delay Neural Network (TDNN) architecture, while our Machine Translation (MT) system uses both phrase-based and neural frameworks. Although our neural MT system is slower than the phrase-based system, it produces significantly better translations and is memory efficient. The demo is available at https://st.qcri.org/demos/livetranslation. @@ -2696,9 +2696,9 @@ JulianHitschler MarcinJunczys-Dowmunt SamuelLäubli - Antonio ValerioMiceli Barone + Antonio ValerioMiceli Barone JozefMokry - MariaNădejde + MariaNădejde 65–68 E17-3017 We present Nematus, a toolkit for Neural Machine Translation. The toolkit prioritizes high translation accuracy, usability, and extensibility. Nematus has been used to build top-performing submissions to shared translation tasks at WMT and IWSLT, and has been used to train systems for production environments. @@ -2718,7 +2718,7 @@ <fixed-case>L</fixed-case>ingmotif: Sentiment Analysis for the Digital Humanities - AntonioMoreno-Ortiz + AntonioMoreno-Ortiz 73–76 E17-3019 Lingmotif is a lexicon-based, linguistically-motivated, user-friendly, GUI-enabled, multi-platform, Sentiment Analysis desktop application. Lingmotif can perform SA on any type of input texts, regardless of their length and topic. The analysis is based on the identification of sentiment-laden words and phrases contained in the application’s rich core lexicons, and employs context rules to account for sentiment shifters. It offers easy-to-interpret visual representations of quantitative data (text polarity, sentiment intensity, sentiment profile), as well as a detailed, qualitative analysis of the text in terms of its sentiment. Lingmotif can also take user-provided plugin lexicons in order to account for domain-specific sentiment expression. Lingmotif currently analyzes English and Spanish texts. @@ -2727,7 +2727,7 @@ <fixed-case>RAMBLE</fixed-case> <fixed-case>ON</fixed-case>: Tracing Movements of Popular Historical Figures StefanoMenini - RacheleSprugnoli + RacheleSprugnoli GiovanniMoretti EnricoBignotti SaraTonelli @@ -2817,7 +2817,7 @@ The <fixed-case>SUMMA</fixed-case> Platform Prototype RenarsLiepins UlrichGermann - GuntisBarzdins + GuntisBarzdins AlexandraBirch SteveRenals SusanneWeber @@ -2827,11 +2827,11 @@ OndřejKlejch PeterBell AlexandrosLazaridis - AlfonsoMendes + AlfonsoMendes SebastianRiedel Mariana S. C.Almeida - PedroBalage - Shay B.Cohen + PedroBalage + Shay B.Cohen TomaszDwojak Philip N.Garner AndreasGiefer @@ -2840,8 +2840,8 @@ DavidNogueira AhmedAli SebastiãoMiranda - AndreiPopescu-Belis - LeslyMiculicich Werlen + AndreiPopescu-Belis + LeslyMiculicich Werlen NikosPapasarantopoulos AbiolaObamuyide CliveJones @@ -2857,7 +2857,7 @@ SameerKhurana AhmedAbdelali HassanSajjad - StephanVogel + StephanVogel DavidSheppey ChrisHernon JeffMitchell @@ -2874,7 +2874,7 @@ FlorianKunneman UxoaIñurrieta John J.Camilleri - Mariona CollArdanuy + Mariona CollArdanuy Association for Computational Linguistics
Valencia, Spain
April @@ -2913,7 +2913,7 @@ Discourse Relations and Conjoined <fixed-case>VP</fixed-case>s: Automated Sense Recognition ValentinaPyatkin - BonnieWebber + BonnieWebber 33–42 E17-4004 Sense classification of discourse relations is a sub-task of shallow discourse parsing. Discourse relations can occur both across sentences (inter-sentential) and within sentences (intra-sentential), and more than one discourse relation can hold between the same units. Using a newly available corpus of discourse-annotated intra-sentential conjoined verb phrases, we demonstrate a sequential classification pipeline for their multi-label sense classification. We assess the importance of each feature used in the classification, the feature scope, and what is lost in moving from gold standard manual parses to the output of an off-the-shelf parser. @@ -2942,7 +2942,7 @@ Automatic Extraction of News Values from Headline Text AlicjaPiotrkowicz - VaniaDimitrova + VaniaDimitrova KatjaMarkert 64–74 E17-4007 @@ -2989,7 +2989,7 @@ Evaluating the Reliability and Interaction of Recursively Used Feature Classes for Terminology Extraction AnnaHätty MichaelDorna - SabineSchulte im Walde + SabineSchulte im Walde 113–121 E17-4012 Feature design and selection is a crucial aspect when treating terminology extraction as a machine learning classification problem. We designed feature classes which characterize different properties of terms based on distributions, and propose a new feature class for components of term candidates. By using random forests, we infer optimal features which are later used to build decision tree classifiers. We evaluate our method using the ACL RD-TEC dataset. We demonstrate the importance of the novel feature class for downgrading termhood which exploits properties of term components. Furthermore, our classification suggests that the identification of reliable term candidates should be performed successively, rather than just once. @@ -3010,9 +3010,9 @@ <fixed-case>U</fixed-case>niversal <fixed-case>D</fixed-case>ependencies JoakimNivre - DanielZeman + DanielZeman FilipGinter - FrancisTyers + FrancisTyers E17-5001 Universal Dependencies (UD) is a project that seeks to develop cross-linguistically consistent treebank annotation for many languages. This tutorial gives an introduction to the UD framework and resources, from basic design principles to annotation guidelines and existing treebanks. We also discuss tools for developing and exploiting UD treebanks and survey applications of UD in NLP and linguistics. nivre-etal-2017-universal @@ -3065,7 +3065,7 @@ The goal of this tutorial is to introduce the computational framework to broader Building Multimodal Simulations for Natural Language - JamesPustejovsky + JamesPustejovsky NikhilKrishnaswamy E17-5006 In this tutorial, we introduce a computational framework and modeling language (VoxML) for composing multimodal simulations of natural language expressions within a 3D simulation environment (VoxSim). We demonstrate how to construct voxemes, which are visual object representations of linguistic entities. We also show how to compose events and actions over these objects, within a restricted domain of dynamics. This gives us the building blocks to simulate narratives of multiple events or participate in a multimodal dialogue with synthetic agents in the simulation environment. To our knowledge, this is the first time such material has been presented as a tutorial within the CL community. diff --git a/data/xml/E83.xml b/data/xml/E83.xml index d49e44fb0b..b580459990 100644 --- a/data/xml/E83.xml +++ b/data/xml/E83.xml @@ -34,14 +34,14 @@ Structure of Sentence and Inferencing in Question Answering - EvaHajicova - PetrSgall + EvaHajicova + PetrSgall E83-1004 hajicova-sgall-1983-structure A Phonological Processor for <fixed-case>I</fixed-case>talian - RodolfoDelmonte + RodolfoDelmonte E83-1005 delmonte-1983-phonological @@ -61,10 +61,10 @@ Knowledge Engineering Approach to Morphological Analysis - HarriJäppinen - AarnoLehtola - EsaNelimarkka - MattiYlilammi + HarriJäppinen + AarnoLehtola + EsaNelimarkka + MattiYlilammi E83-1008 jappinen-etal-1983-knowledge @@ -77,7 +77,7 @@ Extended Access to the Left Context in an <fixed-case>ATN</fixed-case> Parser - IrinaProdanof + IrinaProdanof GiacomoFerrari E83-1010 prodanof-ferrari-1983-extended @@ -108,20 +108,20 @@ The Generation of Term Definitions From an On-Line Terminological Thesaurus - JohnMcNaught + JohnMcNaught E83-1015 mcnaught-1983-generation Relating Syntax and Semantics: The Syntactico-Semantic Lexicon of the System <fixed-case>VIE-LANG</fixed-case> IngeborgSteinacker - ErnstBuchberger + ErnstBuchberger E83-1016 steinacker-buchberger-1983-relating An Island Parsing Interpreter for the Full Augmented Transition Network Formalism - John A.Carroll + John A.Carroll E83-1017 carroll-1983-island @@ -142,15 +142,15 @@
A Flexible Natural Language Parser Based on a Two-Level Representation of Syntax - LeonardoLesmo + LeonardoLesmo PietroTorasso E83-1020 lesmo-torasso-1983-flexible An Approach to Natural Language in the <fixed-case>SI-N</fixed-case>ets Paradigm - AmedeoCappelli - LorenzoMoretti + AmedeoCappelli + LorenzoMoretti E83-1021 cappelli-moretti-1983-approach @@ -175,7 +175,7 @@
Rules for Pronominalization - FranzGuenthner + FranzGuenthner HubertLehmann E83-1025 guenthner-lehmann-1983-rules @@ -192,20 +192,20 @@ Systemic Grammar in Computation: The <fixed-case>N</fixed-case>igel Case - Christian M.I.M.Matthiessen + Christian M.I.M.Matthiessen E83-1027 matthiessen-1983-systemic Inquiry Semantics: A Functional Semantics of Natural Language Grammar - William C.Mann + William C.Mann E83-1028 mann-1983-inquiry Natural Language Input for Scene Generation - GiovanniAdorni - MauroDi Manzo + GiovanniAdorni + MauroDi Manzo E83-1029 adorni-di-manzo-1983-natural @@ -218,7 +218,7 @@ Case Role Filling as a Side Effect of Visual Search HeinzMarburger - WolfgangWahlster + WolfgangWahlster E83-1031 marburger-wahlster-1983-case diff --git a/data/xml/E85.xml b/data/xml/E85.xml index 73f16d12c4..d7d4abd412 100644 --- a/data/xml/E85.xml +++ b/data/xml/E85.xml @@ -40,8 +40,8 @@
The Specification of Time Meaning for Machine Translation - Frankvan Eynde - Louisdes Tombe + Frankvan Eynde + Louisdes Tombe FonsMaes E85-1005 van-eynde-etal-1985-specification @@ -79,9 +79,9 @@ Various Representations of Text Proposed for <fixed-case>E</fixed-case>urotra - ChristianBoitet + ChristianBoitet NelsonVerastegui - DanielBachut + DanielBachut E85-1011 boitet-etal-1985-various @@ -93,7 +93,7 @@
Right Attachment and Preference Semantics . - YorickWilks + YorickWilks E85-1013 wilks-1985-right @@ -120,7 +120,7 @@
On the Representation of Query Term Relations by Soft <fixed-case>B</fixed-case>oolean Operators - GerardSalton + GerardSalton E85-1017 salton-1985-representation @@ -138,13 +138,13 @@
Parsing Difficulties & Phonological Processing in <fixed-case>I</fixed-case>talian - RodolfoDelmonte + RodolfoDelmonte E85-1020 delmonte-1985-parsing Design and Implementation of a Lexical Data Base - EricWehrli + EricWehrli E85-1021 wehrli-1985-design @@ -157,7 +157,7 @@
A Probabilistic Approach to Grammatical Analysis of Written <fixed-case>E</fixed-case>nglish by Computer. - Andrew DavidBeale + Andrew DavidBeale E85-1023 beale-1985-probabilistic @@ -170,9 +170,9 @@
Towards a Dictionary Support Environment for Realtime Parsing - HiyanAlshawi - BranBoguraev - TedBriscoe + HiyanAlshawi + BranBoguraev + TedBriscoe E85-1025 alshawi-etal-1985-towards @@ -184,14 +184,14 @@
A Computational Theory of Prose Style for Natural Language Generation - David D.McDonald - James D.Pustejovsky + David D.McDonald + James D.Pustejovsky E85-1027 mcdonald-pustejovsky-1985-computational An <fixed-case>E</fixed-case>nglish Generator for a Case-Labelled Dependency Representation - John IrvingTait + John IrvingTait E85-1028 tait-1985-english @@ -221,8 +221,8 @@
Non Standard Uses of If - D.S.Bree - R.A.Smit + D.S.Bree + R.A.Smit E85-1032 bree-smit-1985-non @@ -258,7 +258,7 @@
A Problem Solving Approach to Generating Text From Systemic Grammars - TerryPatten + TerryPatten E85-1037 patten-1985-problem @@ -271,8 +271,8 @@
Towards an Automatic Identification of Topic and Focus - EvaHajicova - PetrSgall + EvaHajicova + PetrSgall E85-1039 hajicova-sgall-1985-towards diff --git a/data/xml/E87.xml b/data/xml/E87.xml index d4af99ca22..b49a881988 100644 --- a/data/xml/E87.xml +++ b/data/xml/E87.xml @@ -3,7 +3,7 @@ Third Conference of the European Chapter of the Association for Computational Linguistics - BenteMaegaard + BenteMaegaard Association for Computational Linguistics
Copenhagen, Denmark
April @@ -28,10 +28,10 @@
Formalisms for Morphographemic Description - AlanBlack - GraemeRitchie + AlanBlack + GraemeRitchie StevePulman - GrahamRussell + GrahamRussell E87-1003 black-etal-1987-formalisms @@ -57,7 +57,7 @@
How to Detect Grammatical Errors in a Text Without Parsing It - Eric StevenAtwell + Eric StevenAtwell E87-1007 atwell-1987-detect @@ -78,16 +78,16 @@
Pattern Recognition Applied to the Acquisition of a Grammatical Classification System From Unrestricted <fixed-case>E</fixed-case>nglish Text - Eric StevenAtwell + Eric StevenAtwell Nicos FrixouDrakos E87-1010 atwell-drakos-1987-pattern A Multi-Purpose Interface to an On-line Dictionary - BranimirBoguraev - DavidCarter - TedBriscoe + BranimirBoguraev + DavidCarter + TedBriscoe E87-1011 boguraev-etal-1987-multi @@ -128,38 +128,38 @@
Dictionary Organization for Machine Translation: The Experience and Implications of the <fixed-case>UMIST</fixed-case> <fixed-case>J</fixed-case>apanese Project - Mary McGeeWood + Mary McGeeWood ElainePollard - HeatherHorsfall - NatsukoHolden - BrianChandler - JeremyCarroll + HeatherHorsfall + NatsukoHolden + BrianChandler + JeremyCarroll E87-1017 wood-etal-1987-dictionary Machine Translation, Linguistics, and Interlingua - PetrSgall - JarmilaPanevová + PetrSgall + JarmilaPanevová E87-1018 sgall-panevova-1987-machine Fail-Soft (“Emergency”) Measures in a Production-Oriented <fixed-case>MT</fixed-case> System - EvaHajicova + EvaHajicova ZdenekKirschner E87-1019 hajicova-kirschner-1987-fail <fixed-case>REFTEX</fixed-case> - A Context-Based Translation Aid - Poul SorenKjaersgaard + Poul SorenKjaersgaard E87-1020 kjaersgaard-1987-reftex <fixed-case>RUSLAN</fixed-case> - An <fixed-case>MT</fixed-case> System Between Closely Related Languages - JanHajic + JanHajic E87-1021 hajic-1987-ruslan @@ -167,17 +167,17 @@ Subgrammars, Rule Classes and Control in the <fixed-case>R</fixed-case>osetta Translation System LisetteAppelo CarelFellinger - JanLandsbergen + JanLandsbergen E87-1022 appelo-etal-1987-subgrammars
A Model for Preference DominiquePetitpierre - StevenKrauwer - Louisdes Tombe + StevenKrauwer + Louisdes Tombe DougArnold - Giovanni B.Varile + Giovanni B.Varile E87-1023 petitpierre-etal-1987-model @@ -201,7 +201,7 @@
String-Tree Correspondence Grammar: A Declarative Grammar Formalism for Defining the Correspondence Between Strings of Terms and Tree Structures - ZaharinYusoff + ZaharinYusoff E87-1027 yusoff-1987-string @@ -230,14 +230,14 @@ Planning for Problem Formulation in Advice-Giving Dialogue PaulDecitre ThomasGrossi - CléoJullien + CléoJullien Jean-PhilippeSolvay E87-1031 decitre-etal-1987-planning
Modeling Extemporaneous Elaboration - Marie A.Bienkowski + Marie A.Bienkowski E87-1032 bienkowski-1987-modeling @@ -246,13 +246,13 @@ MassimoMarino AntonellaSpiezio GiacomoFerrari - IrinaProdanof + IrinaProdanof E87-1033 marino-etal-1987-efficient
Discontinuous Constituents in Trees, Rules, and Parsing - HarryBunt + HarryBunt JanThesingh Kovan der Sloot E87-1034 @@ -260,7 +260,7 @@ Deterministic Parsing and Unbounded Dependencies - TedBriscoe + TedBriscoe E87-1035 briscoe-1987-deterministic @@ -275,7 +275,7 @@
A Comparison of Rule-Invocation Strategies in Context-Free Chart Parsing - MatsWiren + MatsWiren E87-1037 wiren-1987-comparison @@ -287,21 +287,21 @@
Acquisition of Conceptual Data Models from Natural Language Descriptions - William J.Black + William J.Black E87-1039 black-1987-acquisition A Structured Representation of Word-Senses for Semantic Analysis. - Maria TeresaPazienza - PaolaVelardi + Maria TeresaPazienza + PaolaVelardi E87-1040 pazienza-velardi-1987-structured Situations and Prepositional Phrases ErikColban - Jens ErikFenstad + Jens ErikFenstad E87-1041 colban-fenstad-1987-situations @@ -313,7 +313,7 @@
Iteration, Habituality and Verb Form Semantics - Frankvan Eynde + Frankvan Eynde E87-1043 van-eynde-1987-iteration diff --git a/data/xml/E89.xml b/data/xml/E89.xml index 79f0ac95c0..a7d7c37e49 100644 --- a/data/xml/E89.xml +++ b/data/xml/E89.xml @@ -3,8 +3,8 @@ Fourth Conference of the European Chapter of the Association for Computational Linguistics - HaroldSomers - MaryMcGee Wood + HaroldSomers + MaryMcGee Wood Association for Computational Linguistics
Manchester, England
April @@ -17,7 +17,7 @@ Parsing Idioms in Lexicalized <fixed-case>TAG</fixed-case>s - AnneAbeille + AnneAbeille YvesSchabes E89-1001 abeille-schabes-1989-parsing @@ -44,32 +44,32 @@ A Metaplan Model for Problem-Solving Discourse - Lance A.Ramshaw + Lance A.Ramshaw E89-1005 ramshaw-1989-metaplan Tenses as Anaphora KurtEberle - WalterKasper + WalterKasper E89-1006 eberle-kasper-1989-tenses On the Generative Power of Two-Level Morphological Rules - GraemeRitchie + GraemeRitchie E89-1007 ritchie-1989-generative Paradigmatic Morphology - JonathanCalder + JonathanCalder E89-1008 calder-1989-paradigmatic Inference in <fixed-case>DATR</fixed-case> - RogerEvans + RogerEvans GeraldGazdar E89-1009 evans-gazdar-1989-inference @@ -78,33 +78,33 @@ Ambiguity Resolution in the <fixed-case>DMTRANS</fixed-case> <fixed-case>PLUS</fixed-case> HiroakiKitano HidetoTomabechi - LoriLevin + LoriLevin E89-1010 kitano-etal-1989-ambiguity The Organization of the <fixed-case>R</fixed-case>osetta Grammars - JanOdijk + JanOdijk E89-1011 odijk-1989-organization Programming in Logic with Constraints for Natural Language Processing - PatrickSaint-Dizier + PatrickSaint-Dizier E89-1012 saint-dizier-1989-programming <fixed-case>JPSG</fixed-case> Parser on Constraint Logic Programming HirosiTuda - KoitiHasida + KoitiHasida HidetosiSirai E89-1013 tuda-etal-1989-jpsg A logical treatment of semi-free word order and bounded discontinuous constituency - MikeReape + MikeReape E89-1014 reape-1989-logical @@ -116,7 +116,7 @@
User studies and the design of Natural Language Systems - SteveWhittaker + SteveWhittaker PhilStenton E89-1016 whittaker-stenton-1989-user @@ -138,19 +138,19 @@ Lexical Acquisition in the Core Language Engine - David M.Carter + David M.Carter E89-1019 carter-1989-lexical It Would Be Much Easier If <fixed-case>WENT</fixed-case> Were <fixed-case>GOED</fixed-case> - DanTufis + DanTufis E89-1020 tufis-1989-much Plan Revision in Person-Machine Dialogue - CleoJullien + CleoJullien Jean-CharlesMarty E89-1021 jullien-marty-1989-plan @@ -158,7 +158,7 @@ Remarks on Plural Anaphora CarolaEschenbach - ChristopherHabel + ChristopherHabel MichaelHerweg KlausRehkamper E89-1022 @@ -166,16 +166,16 @@ Enhancing Explanation Coherence With Rhetorical Strategies - Mark T.Maybury + Mark T.Maybury E89-1023 maybury-1989-enhancing Expressing generalizations in unification-based grammar formalisms MarcMoens - JoCalder + JoCalder EwanKlein - MikeReape + MikeReape HenkZeevat E89-1024 moens-etal-1989-expressing @@ -219,27 +219,27 @@ Subject Erasing and Pronominalization in <fixed-case>I</fixed-case>talian Text Generation - FiammettaNamer + FiammettaNamer E89-1031 namer-1989-subject An Algorithm for Generation in Unification Categorial Grammar - JonathanCalder - MikeReape + JonathanCalder + MikeReape HenkZeevat E89-1032 calder-etal-1989-algorithm Interactive Incremental Chart Parsing - MatsWiren + MatsWiren E89-1033 wiren-1989-interactive <fixed-case>F</fixed-case>rench Order Without Order - Gabriel G.Bes + Gabriel G.Bes ClaireGardent E89-1034 bes-gardent-1989-french @@ -248,63 +248,63 @@ The Syntactic Regularity of <fixed-case>E</fixed-case>nglish Noun Phrases LitaTaylor ClaireGrover - TedBriscoe + TedBriscoe E89-1035 taylor-etal-1989-syntactic A Descriptive Framework for Translating Speaker’s Meaning - MasakoKume + MasakoKume Gayle K.Sato - KeiYoshimoto + KeiYoshimoto E89-1036 kume-etal-1989-descriptive Translation by Structural Correspondences - Ronald M.Kaplan + Ronald M.Kaplan KlausNetter - JurgenWedekind + JurgenWedekind AnnieZaenen E89-1037 kaplan-etal-1989-translation A New View on the Process of Translation - John A.Bateman - Robert T.Kasper + John A.Bateman + Robert T.Kasper Jorg F. L.Schutz - Erich H.Steiner + Erich H.Steiner E89-1038 bateman-etal-1989-new Empirical Studies of Discourse Representations for Natural Language Interfaces NilsDählback - ArneJonsson + ArneJonsson E89-1039 dahlback-jonsson-1989-empirical An Approach to Sentence-Level Anaphora in Machine Translation - Gertjanvan Noord + Gertjanvan Noord JokeDorrepaal DougArnold - StevenKrauwer + StevenKrauwer LouisaSadler - Louisdes Tombe + Louisdes Tombe E89-1040 van-noord-etal-1989-approach Situation Semantics and Machine Translation - C.J.Rupp + C.J.Rupp E89-1041 rupp-1989-situation On Formalisms and Analysis, Generation and Synthesis in Machine Translation - ZaharinYusoff + ZaharinYusoff E89-1042 yusoff-1989-formalisms diff --git a/data/xml/E91.xml b/data/xml/E91.xml index 3c0e7d020c..9b6e58a727 100644 --- a/data/xml/E91.xml +++ b/data/xml/E91.xml @@ -29,8 +29,8 @@ Designing Illustrated Texts: How Language Production Is Influenced by Graphics Generation - WolfgangWahlster - ElisabethAndre + WolfgangWahlster + ElisabethAndre WinfriedGraf ThomasRist E91-1003 @@ -39,21 +39,21 @@ <fixed-case>P</fixed-case>earl: A Probabilistic Chart Parser David M.Magerrnan - Mitchell P.Marcus + Mitchell P.Marcus E91-1004 magerrnan-marcus-1991-pearl Long-Distance Scrambling and <fixed-case>T</fixed-case>ree <fixed-case>A</fixed-case>djoining <fixed-case>G</fixed-case>rammars TilmanBecker - Aravind K.Joshi - OwenRambow + Aravind K.Joshi + OwenRambow E91-1005 becker-etal-1991-long Bidirectional Parsing of <fixed-case>L</fixed-case>exicalized <fixed-case>T</fixed-case>ree <fixed-case>A</fixed-case>djoining <fixed-case>G</fixed-case>rammars - AlbertoLavelli + AlbertoLavelli GiorgioSatta E91-1006 lavelli-satta-1991-bidirectional @@ -66,7 +66,7 @@ Indexing and a Referential Dependencies Within Binding Theory: Computational Framework - FabioPianesi + FabioPianesi E91-1008 pianesi-1991-indexing @@ -84,7 +84,7 @@ Processing Language with Logical Types and Active Constraints - PatrickSaint-Dizier + PatrickSaint-Dizier E91-1011 saint-dizier-1991-processing @@ -102,7 +102,7 @@
What Sort of Trees Do We Speak? A Computational Model of the Syntax-Prosody Interface in <fixed-case>T</fixed-case>okyo <fixed-case>J</fixed-case>apanese - PeteWhitelock + PeteWhitelock E91-1014 whitelock-1991-sort @@ -121,14 +121,14 @@
A Unified Management and Processing of Word-Forms, Idioms and Analytical Compounds - DanTufis + DanTufis OctavPopescu E91-1017 tufis-popescu-1991-unified Analysis of Unknown Words through Morphological Decomposition - Alan W.Black + Alan W.Black Jokevan de Plassche BrionyWilliams E91-1018 @@ -175,7 +175,7 @@ Parsing without lexicon: the <fixed-case>M</fixed-case>or<fixed-case>P</fixed-case> system - GunnelKallgren + GunnelKallgren E91-1025 kallgren-1991-parsing @@ -196,7 +196,7 @@ Generating Referring Expressions Involving Relations RobertDale - NicholasHaddock + NicholasHaddock E91-1028 dale-haddock-1991-generating @@ -209,7 +209,7 @@ The Formal and Processing Models of <fixed-case>CLG</fixed-case> LuisDamas - Giovanni B.Varile + Giovanni B.Varile E91-1030 damas-varile-1991-formal @@ -221,7 +221,7 @@
Multiple Interpreters in a Principle-Based Model of Sentence Processing - Matthew W.Crocker + Matthew W.Crocker E91-1032 crocker-1991-multiple @@ -248,7 +248,7 @@
Classical Logics for Attribute-Value Languages - JurgenWedekind + JurgenWedekind E91-1036 wedekind-1991-classical @@ -272,14 +272,14 @@
An Assessment of Semantic Information Automatically Extracted From Machine Readable Dictionaries - JeanVeronis - NancyIde + JeanVeronis + NancyIde E91-1040 veronis-ide-1991-assessment A Dialogue Manager Using Initiative-Response Units and Distributed Control - ArneJonsson + ArneJonsson E91-1041 jonsson-1991-dialogue @@ -292,7 +292,7 @@
A Bidirectional Model for Natural Language Processing - GunterNeumann + GunterNeumann E91-1043 neumann-1991-bidirectional @@ -304,7 +304,7 @@
Helpful Answers to Modal and Hypothetical Questions - AnneDe Roeck + AnneDe Roeck RichardBall KeithBrown ChrisFox @@ -323,18 +323,18 @@ Limits of a Sentence Based Procedural Approach for Aspect Choice in <fixed-case>G</fixed-case>erman-<fixed-case>R</fixed-case>ussian <fixed-case>MT</fixed-case> - BiankaBuschbeck - RenateHenschel - IrisHoser - GerdaKlimonow - AndreasKustner - IngridStarke + BiankaBuschbeck + RenateHenschel + IrisHoser + GerdaKlimonow + AndreasKustner + IngridStarke E91-1047 buschbeck-etal-1991-limits Lexical Transfer based on bilingual signs: Towards interaction during transfer - Jun-ichTsujii + Jun-ichTsujii KimikazuFujita E91-1048 tsujii-fujita-1991-lexical @@ -349,17 +349,17 @@ A Language for the Statement of Binary Relations over Feature Structures - GrahamRussell + GrahamRussell AfzalBallim - DominiqueEstival - SusanWarwick-Armstrong + DominiqueEstival + SusanWarwick-Armstrong E91-1050 russell-etal-1991-language Structural Non-Correspondence in Translation LouisaSadler - Henry S.Thompson + Henry S.Thompson E91-1051 sadler-thompson-1991-structural diff --git a/data/xml/E93.xml b/data/xml/E93.xml index 2406cc2f40..412779744d 100644 --- a/data/xml/E93.xml +++ b/data/xml/E93.xml @@ -3,9 +3,9 @@ Sixth Conference of the European Chapter of the Association for Computational Linguistics - StevenKrauwer - MichaelMoortgat - Louisdes Tombe + StevenKrauwer + MichaelMoortgat + Louisdes Tombe Association for Computational Linguistics
Utrecht, The Netherlands
April @@ -33,8 +33,8 @@ Experiments in Reusability of Grammatical Resources DougArnold ToniBadia - Josefvan Genabith - StellaMarkantonatou + Josefvan Genabith + StellaMarkantonatou StefanMomma LouisaSadler PaulSchmidt @@ -64,8 +64,8 @@
Data-Oriented Methods for Grapheme-to-Phoneme Conversion - Antalvan den Bosch - WalterDaelemans + Antalvan den Bosch + WalterDaelemans E93-1007 van-den-bosch-daelemans-1993-data @@ -86,7 +86,7 @@ Head-driven Parsing for Lexicalist Grammars: Experimental Results GosseBouma - Gertjanvan Noord + Gertjanvan Noord E93-1010 bouma-van-noord-1993-head @@ -98,7 +98,7 @@
Morphonology in the Lexicon - Lynne J.Cahill + Lynne J.Cahill E93-1012 cahill-1993-morphonology @@ -142,28 +142,28 @@
Rule-based Acquisition and Maintenance of Lexical and Semantic Knowledge - Donna M.Gates + Donna M.Gates PeterShell E93-1019 gates-shell-1993-rule A Computational Treatment of Sentence-Final ‘then’ - SheilaGlasbey + SheilaGlasbey E93-1020 glasbey-1993-computational Towards a proper treatment of coercion phenomena - DanieleGodard + DanieleGodard JacquesJayez E93-1021 godard-jayez-1993-towards Identifying Topic and Focus by an Automatic Procedure - EvaHajicova - PetrSgall + EvaHajicova + PetrSgall HanaSkonmalovla E93-1022 hajicova-etal-1993-identifying @@ -176,13 +176,13 @@ Restriction and Correspondence-based Translation - Ronald M.Kaplan + Ronald M.Kaplan E93-1024 kaplan-1993-restriction A Discourse Copying Algorithm for Ellipsis and Anaphora Resolution - AndrewKehler + AndrewKehler E93-1025 kehler-1993-discourse @@ -195,7 +195,7 @@ Linguistic Knowledge Acquisition from Parsing Failures MasakiKiyono - Jun-ichiTsujii + Jun-ichiTsujii E93-1027 kiyono-tsujii-1993-linguistic @@ -214,14 +214,14 @@ A Semantics and Pragmatics for the Pluperfect AlexLascarides - NicholasAsher + NicholasAsher E93-1030 lascarides-asher-1993-semantics Temporal Connectives in a Discourse Context AlexLascarides - JonOberlander + JonOberlander E93-1031 lascarides-oberlander-1993-temporal @@ -233,7 +233,7 @@
Abductive Explanation of Dialogue Misunderstandings - SusanMcRoy + SusanMcRoy GraemeHirst E93-1033 mcroy-hirst-1993-abductive @@ -274,7 +274,7 @@ Generating Contextually Appropriate Intonation ScottPrevost - MarkSteedman + MarkSteedman E93-1039 prevost-steedman-1993-generating @@ -300,7 +300,7 @@ Coping With Derivation in a Morphological Component - HaraldTrost + HaraldTrost E93-1043 trost-1993-coping @@ -312,7 +312,7 @@
The Use of Shared Forests in <fixed-case>T</fixed-case>ree <fixed-case>A</fixed-case>djoining <fixed-case>G</fixed-case>rammar Parsing - K.Vijay-Shanker + K.Vijay-Shanker E93-1045 vijay-shanker-1993-use @@ -325,14 +325,14 @@
Type-Driven Semantic Interpretation of f-Structures - JurgenWedekind - Ronald M.Kaplan + JurgenWedekind + Ronald M.Kaplan E93-1047 wedekind-kaplan-1993-type Delimitedness and Trajectory-of-Motion Events - MichaelWhite + MichaelWhite E93-1048 white-1993-delimitedness @@ -349,7 +349,7 @@
Lexical Disambiguation Using Constraint Handling In <fixed-case>P</fixed-case>rolog (<fixed-case>CHIP</fixed-case>) - George C.Demetriou + George C.Demetriou E93-1051 demetriou-1993-lexical @@ -401,8 +401,8 @@ Undestanding Stories in Different Languages with <fixed-case>GETA</fixed-case>-<fixed-case>RUN</fixed-case> DarioBianchi - RodolfoDelmonte - EmanuelePianta + RodolfoDelmonte + EmanuelePianta E93-1058 bianchi-etal-1993-undestanding @@ -415,28 +415,28 @@
Long Sentence Analysis by Domain-Specific Pattern Grammar - ShinichiDoi + ShinichiDoi KazunoriMuraki - ShinichiroKamei + ShinichiroKamei KiyoshiYamabana E93-1060 doi-etal-1993-long Knowledge acquisition for a constrained speech system using <fixed-case>W</fixed-case>o<fixed-case>Z</fixed-case> - LailaDybkjær - Niels OleBernsen - HansDybkjær + LailaDybkjær + Niels OleBernsen + HansDybkjær E93-1061 dybkjaer-etal-1993-knowledge The <fixed-case>PANGLOSS MARK I</fixed-case> <fixed-case>MAT</fixed-case> system - RobertFrederking + RobertFrederking ArielCohen DeanGrannes PeterCousseau - SergeiNirenburg + SergeiNirenburg E93-1062 frederking-etal-1993-pangloss @@ -450,7 +450,7 @@
The Linguistic Annotation System of the <fixed-case>S</fixed-case>tockholm - <fixed-case>U</fixed-case>meå <fixed-case>C</fixed-case>orpus Project - GunnelKällgren + GunnelKällgren GunnarEriksson E93-1064 kallgren-eriksson-1993-linguistic @@ -459,7 +459,7 @@ <fixed-case>INSYST</fixed-case>: An Automatic Inserter System for Hierarchical Lexica MarcLight SabineReinhard - MarieBoyle-Hinrichs + MarieBoyle-Hinrichs E93-1065 light-etal-1993-insyst @@ -471,14 +471,14 @@
<fixed-case>H</fixed-case>elyette: Inflectional Thesaurus for Agglutinative Languages - GaborProszeky - LaszloTihanyi + GaborProszeky + LaszloTihanyi E93-1067 proszeky-tihanyi-1993-helyette Natural Language Front-Ends to Databases: Design and the Customisation Bottleneck - AnneDe Roeck + AnneDe Roeck E93-1068 de-roeck-1993-natural @@ -491,7 +491,7 @@
<fixed-case>ITS</fixed-case>-2 : an interactive personal translation system - EricWehrli + EricWehrli MiraRamluckun E93-1070 wehrli-ramluckun-1993-2 diff --git a/data/xml/E95.xml b/data/xml/E95.xml index 70fa23067b..ede5da8f83 100644 --- a/data/xml/E95.xml +++ b/data/xml/E95.xml @@ -3,8 +3,8 @@ Seventh Conference of the European Chapter of the Association for Computational Linguistics - Steven P.Abney - Erhard W.Hinrichs + Steven P.Abney + Erhard W.Hinrichs Association for Computational Linguistics
Dublin, Ireland
March @@ -44,7 +44,7 @@
The Semantics of Resource Sharing in <fixed-case>L</fixed-case>exical-<fixed-case>F</fixed-case>unctional <fixed-case>G</fixed-case>rammar - AndrewKehler + AndrewKehler MaryDalrymple JohnLamping VijaySaraswat @@ -60,7 +60,7 @@ Some Remarks on the Decidability of the Generation Problem in <fixed-case>LFG</fixed-case>- and <fixed-case>PATR</fixed-case>-Style Unification Grammars - JurgenWedekind + JurgenWedekind E95-1007 wedekind-1995-remarks @@ -68,7 +68,7 @@ Collocation Map for Overcoming Data Sparseness MoonjooKim Young S.Han - Key-SunChoi + Key-SunChoi E95-1008 kim-etal-1995-collocation
@@ -80,16 +80,16 @@
Text Alignment in the Real World: Improving Alignments of Noisy Translations Using Common Lexical Features, String Matching Strategies and N-Gram Comparisons - Mark W.Davis - Ted E.Dunning - William C.Ogden + Mark W.Davis + Ted E.Dunning + William C.Ogden E95-1010 davis-etal-1995-text A Tractable Extension of Linear Indexed Grammars BillKeller - DavidWeir + DavidWeir E95-1011 keller-weir-1995-tractable @@ -120,7 +120,7 @@
On Learning more Appropriate Selectional Restrictions - FrancescRibas + FrancescRibas E95-1016 ribas-1995-learning @@ -144,7 +144,7 @@
Distributional Part-of-Speech Tagging - HinrichSchütze + HinrichSchütze E95-1020 schutze-1995-distributional @@ -171,7 +171,7 @@ Off-line Optimization for <fixed-case>E</fixed-case>arley-style <fixed-case>HPSG</fixed-case> Processing GuidoMinnen DaleGerdemann - ThiloGotz + ThiloGotz E95-1024 minnen-etal-1995-line
@@ -184,7 +184,7 @@ A Robust and Efficient Three-Layered Dialogue Component for a Speech-to-Speech Translation System JanAlexandersson - ElisabethMaier + ElisabethMaier NorbertReithinger E95-1026 alexandersson-etal-1995-robust @@ -198,14 +198,14 @@ Rapid Development of Morphological Descriptions for Full Language Processing Systems - DavidCarter + DavidCarter E95-1028 carter-1995-rapid Specifying a shallow grammatical representation for parsing purposes AtroVoutilainen - TimoJarvinen + TimoJarvinen E95-1029 voutilainen-jarvinen-1995-specifying @@ -219,9 +219,9 @@ A Robust Parser Based on Syntactic Information Kong JooLee - Cheol JungKweon - JungyunSeo - Gil ChangKim + Cheol JungKweon + JungyunSeo + Gil ChangKim E95-1031 lee-etal-1995-robust @@ -246,7 +246,7 @@
Algorithms for Analysing the Temporal Structure of Discourse - JanetHitzeman + JanetHitzeman MarcMoens ClaireGrover E95-1035 @@ -261,7 +261,7 @@ Topic Identification in Discourse - Kuang-huaChen + Kuang-huaChen E95-1037 chen-1995-topic @@ -285,7 +285,7 @@
An Algorithm to Co-Ordinate Anaphora Resolution and <fixed-case>PPS</fixed-case> Disambiguation Process - SalihaAzzam + SalihaAzzam E95-1041 azzam-1995-algorithm diff --git a/data/xml/E99.xml b/data/xml/E99.xml index 93e24b9e06..cb3650583c 100644 --- a/data/xml/E99.xml +++ b/data/xml/E99.xml @@ -3,7 +3,7 @@ Ninth Conference of the European Chapter of the Association for Computational Linguistics - Henry S.Thompson + Henry S.Thompson AlexLascarides Association for Computational Linguistics
Bergen, Norway
@@ -41,8 +41,8 @@
An Object-Oriented Approach to the Design of Dialogue Management Functionality - Ian M.O’Neill - Michael F.McTear + Ian M.O’Neill + Michael F.McTear 23–29 E99-1004 oneill-mctear-1999-object @@ -88,7 +88,7 @@ An Efficient Method for Determining Bilingual Word Classes - Franz JosefOch + Franz JosefOch 71–76 E99-1010 och-1999-efficient @@ -98,9 +98,9 @@ InderjeetMani DavidHouse GaryKlein - LynetteHirschman - ThereseFirmin - BethSundheim + LynetteHirschman + ThereseFirmin + BethSundheim 77–85 E99-1011 mani-etal-1999-tipster @@ -114,7 +114,7 @@ Complementing <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et with <fixed-case>R</fixed-case>oget’s and Corpus-based Thesauri for Information Retrieval - RilaMandala + RilaMandala TakenobuTokunaga HozumiTanaka 94–101 @@ -124,8 +124,8 @@ Full Text Parsing using Cascades of Rules: an Information Extraction Perspective 102–109 - FabioCiravegna - AlbertoLavelli + FabioCiravegna + AlbertoLavelli E99-1014 ciravegna-lavelli-1999-full @@ -148,7 +148,7 @@ Transducers from Rewrite Rules with Backreferences DaleGerdemann - Gertjanvan Noord + Gertjanvan Noord 126–133 E99-1017 gerdemann-van-noord-1999-transducers @@ -156,7 +156,7 @@ <fixed-case>POS</fixed-case> Disambiguation and Unknown Word Guessing with Decision Trees Giorgos S.Orphanos - Dimitris N.Christodoulakis + Dimitris N.Christodoulakis 134–141 E99-1018 orphanos-christodoulakis-1999-pos @@ -171,9 +171,9 @@ Tabular Algorithms for <fixed-case>TAG</fixed-case> Parsing - Miguel A.Alonso - DavidCabrero - Ericde la Clergerie + Miguel A.Alonso + DavidCabrero + Ericde la Clergerie ManuelVilares 150–157 E99-1020 @@ -197,7 +197,7 @@ Representing Text Chunks - Erik F.Tjong Kim Sang + Erik F.Tjong Kim Sang JornVeenstra 173–179 E99-1023 @@ -213,7 +213,7 @@ New Models for Improving Supertag Disambiguation JohnChen - SrinivasBangalore + SrinivasBangalore 188–195 E99-1025 chen-bangalore-1999-new @@ -244,26 +244,26 @@ Parsing with an Extended Domain of Locality - JohnCarroll - NicolasNicolov + JohnCarroll + NicolasNicolov OlgaShaumyan MartineSmets - DavidWeir + DavidWeir 217–224 E99-1029 carroll-etal-1999-parsing The Development of Lexical Resources for Information Extraction from Text Combining <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et and <fixed-case>D</fixed-case>ewey Decimal Classification - GabrielaCavaglia + GabrielaCavaglia 225–228 E99-1030 cavaglia-1999-development A Flexible Architecture for Reference Resolution - Donna K.Byron - Joel R.Tetreault + Donna K.Byron + Joel R.Tetreault 229–232 E99-1031 byron-tetreault-1999-flexible @@ -277,7 +277,7 @@ Investigating <fixed-case>NLG</fixed-case> Architectures: taking style into consideration - Daniel S.Paiva + Daniel S.Paiva 237–240 E99-1033 paiva-1999-investigating @@ -292,7 +292,7 @@ A Cascaded Finite-State Parser for Syntactic Analysis of <fixed-case>S</fixed-case>wedish DimitriosKokkinakis - Sofie JohanssonKokkinakis + Sofie JohanssonKokkinakis 245–248 E99-1035 kokkinakis-kokkinakis-1999-cascaded @@ -346,12 +346,12 @@ Simplifying Text for Language-Impaired Readers - JohnCarroll + JohnCarroll GuidoMinnen DarrenPearce YvonneCanning SiobhanDevlin - JohnTait + JohnTait 269–270 E99-1042 carroll-etal-1999-simplifying @@ -359,15 +359,15 @@ The <fixed-case>GENIA</fixed-case> project: corpus-based knowledge acquisition and information extraction from genome research papers NigelCollier - Hyun SeokPark + Hyun SeokPark NorihiroOgata - YukaTateishi + YukaTateishi ChikashiNobata TomokoOhta TateshiSekimizu HisaoImai KatsutoshiIbushi - Jun-ichiTsujii + Jun-ichiTsujii 271–272 E99-1043 collier-etal-1999-genia @@ -395,7 +395,7 @@ <fixed-case>u-TBL</fixed-case> Lite: A Small, Extendible Transformation-Based Learner - TorbjornLager + TorbjornLager 279–280 E99-1047 lager-1999-u @@ -425,11 +425,11 @@ Robust and Flexible Mixed-Initiative Dialogue for Telephone Services - JoseRelano Gil - DanielTapias + JoseRelano Gil + DanielTapias Maria C.Gancedo - MarcelaCharfuelan - Luis A.Hernandez + MarcelaCharfuelan + Luis A.Hernandez 287–290 E99-1051 relano-gil-etal-1999-robust diff --git a/data/xml/F12.xml b/data/xml/F12.xml index a898ce6eb7..1c25796fb8 100644 --- a/data/xml/F12.xml +++ b/data/xml/F12.xml @@ -4,9 +4,9 @@ Proceedings of the Joint Conference JEP-TALN-RECITAL 2012, volume 1: JEP F12-1 - LaurentBesacier + LaurentBesacier BenjaminLecouteux - GillesSérasset + GillesSérasset ATALA/AFCP
Grenoble, France
June @@ -27,7 +27,7 @@ La structuration prosodique et les relations syntaxe/ prosodie dans le discours politique (Prosodic Structuring and the Syntax-Prosody Relationship in Political Speech) [in <fixed-case>F</fixed-case>rench] IngoFeldhausen - ElisabethDelais-Roussarie + ElisabethDelais-Roussarie 9–16 F12-1002 feldhausen-delais-roussarie-2012-la @@ -62,7 +62,7 @@ LucianaMendonça-Alves RobertEspesser AlainGhio - Célinede Looze + Célinede Looze CésarReis 41–48 F12-1006 @@ -73,7 +73,7 @@ JulienFayolle FabienneMoreau ChristianRaymond - GuillaumeGravier + GuillaumeGravier 49–56 F12-1007 fayolle-etal-2012-automates @@ -178,7 +178,7 @@ Détection d’émotions dans la voix de patients en interaction avec un agent conversationnel animé (Emotions detection in the voice of patients interacting with an animated conversational agent) [in <fixed-case>F</fixed-case>rench] ClémentChastagnol - LaurenceDevillers + LaurenceDevillers 137–144 F12-1018 chastagnol-devillers-2012-detection @@ -220,14 +220,14 @@ Prosodie multimodale. Les enchères chantées aux Etats-Unis (Multimodal Prosody. The auction chant in the <fixed-case>U</fixed-case>nited <fixed-case>S</fixed-case>tates) [in <fixed-case>F</fixed-case>rench] - GaëlleFerré + GaëlleFerré 177–184 F12-1023 ferre-2012-prosodie Un cadre expérimental pour les Sciences de la Parole (An experimental framework for speech sciences) [in <fixed-case>F</fixed-case>rench] - GillesAdda + GillesAdda 185–192 F12-1024 adda-2012-un @@ -271,7 +271,7 @@ Coordinations spatio-temporelles dans les suites ab(b)i en arabe marocain (Spatio-temporal coordinations in <fixed-case>M</fixed-case>oroccan <fixed-case>A</fixed-case>rabic ab(bi) sequences) [in <fixed-case>F</fixed-case>rench] ChakirZeroual - PhilipHoole + PhilipHoole DiamantisGafos JohnEsling 225–232 @@ -302,7 +302,7 @@ Les ajustements laryngaux en français (Laryngeal adjustments in <fixed-case>F</fixed-case>rench) [in <fixed-case>F</fixed-case>rench] RachidRidouane NicolasAudibert - Van MinhNguyen + Van MinhNguyen 249–256 F12-1032 ridouane-etal-2012-les @@ -321,7 +321,7 @@ La Prosodie des énoncés interrogatifs en français langue seconde (The prosody of questions in <fixed-case>F</fixed-case>rench as <fixed-case>L</fixed-case>2) [in <fixed-case>F</fixed-case>rench] FabianSantiago Vargas - ÉlisabethDelais-Roussarie + ÉlisabethDelais-Roussarie 265–272 F12-1034 santiago-vargas-delais-roussarie-2012-la @@ -329,7 +329,7 @@ Extraction de mots clefs dans des vidéos Web par Analyse Latente de <fixed-case>D</fixed-case>irichlet (<fixed-case>LDA</fixed-case>-based tagging of Web videos) [in <fixed-case>F</fixed-case>rench] MohamedMorchid - GeorgesLinarès + GeorgesLinarès 273–280 F12-1035 morchid-linares-2012-extraction @@ -337,7 +337,7 @@ Impact du Comportement Social d’un Robot sur les Emotions de l’Utilisateur : une Expérience Perceptive (Impact of the Social Behaviour of a Robot on the User’s Emotions: a Perceptive Experiment) [in <fixed-case>F</fixed-case>rench] AgnesDelaborde - LaurenceDevillers + LaurenceDevillers 281–288 F12-1036 delaborde-devillers-2012-impact @@ -356,7 +356,7 @@ AnthonyLarcher Pierre-MichelBousquet DrissMatrouf - Jean-FrancoisBonastre + Jean-FrancoisBonastre 297–304 F12-1038 larcher-etal-2012-analyse @@ -382,7 +382,7 @@ Vers un mesure automatique de l’adaptation prosodique en interaction conversationnelle (Automatic measurement of prosodic accommodation in conversational interaction) [in <fixed-case>F</fixed-case>rench] - CélineDe Looze + CélineDe Looze StefanScherer BrianVaughan NickCampbell @@ -393,8 +393,8 @@ Une comparaison de la déclinaison de F0 entre le français et l’allemand journalistiques (F0-declination : a comparison between <fixed-case>F</fixed-case>rench and <fixed-case>G</fixed-case>erman journalistic speech) [in <fixed-case>F</fixed-case>rench] CarolinSchmid - CédricGendrot - MartineAdda-Decker + CédricGendrot + MartineAdda-Decker 329–336 F12-1042 schmid-etal-2012-une @@ -495,7 +495,7 @@ L’identification du locuteur : 20 ans de témoignage dans les cours de Justice. Le cas du <fixed-case>LIPSADON</fixed-case> << laboratoire indépendant de police scientifique >> (Forensic speaker identification: 20 years of scientific testimonies in courts of Justice. The case of <fixed-case>LIPSADON</fixed-case> “forensics independent laboratory”) [in <fixed-case>F</fixed-case>rench] Louis-JeanBoë - Jean-FrançoisBonastre + Jean-FrançoisBonastre 417–424 F12-1053 boe-bonastre-2012-lidentification @@ -505,7 +505,7 @@ JulietteKahn NicolasScheffer SolangeRossato - Jean-FrançoisBonastre + Jean-FrançoisBonastre 425–432 F12-1054 kahn-etal-2012-verification @@ -597,7 +597,7 @@ Codage échelonnable à granularité fine de la parole : Application au codeur G.729 (Fine granularity scalable speech coding: Application to the G.729 coder) [in <fixed-case>F</fixed-case>rench] MouloudDjamah - DouglasO’Shaughnessy + DouglasO’Shaughnessy 505–512 F12-1064 djamah-oshaughnessy-2012-codage @@ -643,26 +643,26 @@ La liaison dans la parole spontanée familière : explorations semi-automatiques de grands corpus (<fixed-case>F</fixed-case>rench Liaison in casual speech : automatic and manual investigations) [in <fixed-case>F</fixed-case>rench] - MartineAdda-Decker - ElisabethDelais-Roussarie + MartineAdda-Decker + ElisabethDelais-Roussarie CécileFougeron - CédricGendrot - LoriLamel + CédricGendrot + LoriLamel 545–552 F12-1069 adda-decker-etal-2012-la Percol0 - un système multimodal de détection de personnes dans des documents vidéo (Percol0 - A multimodal person detection system in video documents) [in <fixed-case>F</fixed-case>rench] - FredericBechet + FredericBechet RemiAuguste StephaneAyache DelphineCharlet - GeraldineDamnati - BenoitFavre + GeraldineDamnati + BenoitFavre CorinneFredouille ChristopheLevy - GeorgesLinares + GeorgesLinares JeanMartinet 553–560 F12-1070 @@ -679,16 +679,16 @@ Évaluation segmentale du système de synthèse <fixed-case>HTS</fixed-case> pour le français (Segmental evaluation of <fixed-case>HTS</fixed-case>) [in <fixed-case>F</fixed-case>rench] - SébastienLe Maguer + SébastienLe Maguer NellyBarbot - OlivierBoeffard + OlivierBoeffard 569–576 F12-1072 le-maguer-etal-2012-evaluation Lire les tons sur les lèvres : perception(s) visuelle(s) des tons lexicaux en chinois mandarin (Read the tones on the lips : visual perception(s) of lexical tones in <fixed-case>M</fixed-case>andarin <fixed-case>C</fixed-case>hinese) [in <fixed-case>F</fixed-case>rench] - GrégoryRoulet-Guiot + GrégoryRoulet-Guiot CorineAstésano 577–584 F12-1073 @@ -719,7 +719,7 @@ <fixed-case>PROSOTRAN</fixed-case> : un système d’annotation symbolique des faits prosodiques pour les données non-standards (<fixed-case>PROSOTRAN</fixed-case> : an tool that provides a symbolic representation of the prosodic events in non-standard data) [in <fixed-case>F</fixed-case>rench] KatarinaBartkova - ElisabethDelais-Roussarie + ElisabethDelais-Roussarie FabianSantiago Vargas 601–608 F12-1076 @@ -727,7 +727,7 @@ Questions corses : peut-on mettre en évidence un transfert prosodique du corse vers le français ? (<fixed-case>C</fixed-case>orsican questions: is there a prosodic transfer from <fixed-case>C</fixed-case>orsican to <fixed-case>F</fixed-case>rench?) [in <fixed-case>F</fixed-case>rench] - PhilippeBoula de Mareüil + PhilippeBoula de Mareüil AlbertRilliard PaoloMairano Jean-PierreLai @@ -747,7 +747,7 @@ Allongements vocaliques en français de <fixed-case>B</fixed-case>elgique : approche expérimentale et perceptive (Vowel lengthening in <fixed-case>B</fixed-case>elgium <fixed-case>F</fixed-case>rench: an experimental and perceptual approach) [in <fixed-case>F</fixed-case>rench] AliceBardiaux - PhilippeBoula de Mareüil + PhilippeBoula de Mareüil 625–632 F12-1079 bardiaux-boula-de-mareuil-2012-allongements @@ -772,8 +772,8 @@ Comparaison de parole journalistique et de parole spontanée : analyses de séquences entre pauses (Comparison of journalistic and spontaneous speech: analysis of sequences between pauses) [in <fixed-case>F</fixed-case>rench] - CedricGendrot - MartineAdda-decker + CedricGendrot + MartineAdda-decker CarolinSchmid 649–656 F12-1082 @@ -790,7 +790,7 @@ Mise au point d’un paradigme de perturbation motrice pour l’étude de la perception de la parole (Defining a motor perturbation paradigm for speech perception studies) [in <fixed-case>F</fixed-case>rench] - Ali HadianCefidekhanie + Ali HadianCefidekhanie ChristopheSavariaux MarcSato Jean-LucSchwartz @@ -829,7 +829,7 @@ Prédiction de l’indexabilité d’une transcription (Prediction of transcription indexability) [in <fixed-case>F</fixed-case>rench] GrégorySenay BenjaminLecouteux - GeorgesLinarès + GeorgesLinarès 697–705 F12-1088 senay-etal-2012-prediction @@ -868,9 +868,9 @@ Vers une annotation automatique de corpus audio pour la synthèse de parole (Towards Fully Automatic Annotation of Audio Books for Text-To-Speech (<fixed-case>TTS</fixed-case>) Synthesis) [in <fixed-case>F</fixed-case>rench] - OlivierBoëffard + OlivierBoëffard LaureCharonnat - SébastienLe Maguer + SébastienLe Maguer DamienLolive GaëlleVidal 731–738 @@ -920,15 +920,15 @@ Robustesse et portabilités multilingue et multi-domaines des systèmes de compréhension de la parole : les corpus du projet <fixed-case>P</fixed-case>ort<fixed-case>M</fixed-case>edia (Robustness and portability of spoken language understanding systems among languages and domains : the <fixed-case>PORTMEDIA</fixed-case> project) [in <fixed-case>F</fixed-case>rench] - FabriceLefèvre - DjamelMostefa + FabriceLefèvre + DjamelMostefa LaurentBesacier YannickEstève - MatthieuQuignard + MatthieuQuignard NathalieCamelin - BenoitFavre + BenoitFavre BassamJabaian - LinaRojas-Barahona + LinaRojas-Barahona 779–786 F12-1098 lefevre-etal-2012-robustesse @@ -947,9 +947,9 @@ Avancées dans le domaine de la transcription automatique par décodage guidé (Improvements on driven decoding system combination) [in <fixed-case>F</fixed-case>rench] FethiBougares YannickEstève - PaulDeléglise + PaulDeléglise MickaelRouvier - GeorgesLinarès + GeorgesLinarès 795–802 F12-1100 bougares-etal-2012-avancees @@ -967,7 +967,7 @@ Détection et caractérisation des régions d’erreurs dans des transcriptions de contenus multimédia : application à la recherche des noms de personnes (Error region detection and characterization in transcriptions of multimedia documents : application to person name search) [in <fixed-case>F</fixed-case>rench] RichardDufour - GéraldineDamnati + GéraldineDamnati DelphineCharlet 811–818 F12-1102 @@ -1038,9 +1038,9 @@ Proceedings of the Joint Conference JEP-TALN-RECITAL 2012, volume 2: TALN F12-2 - GeorgesAntoniadis - HervéBlanchon - GillesSérasset + GeorgesAntoniadis + HervéBlanchon + GillesSérasset ATALA/AFCP
Grenoble, France
June @@ -1063,7 +1063,7 @@ Extraction d’information automatique en domaine médical par projection inter-langue : vers un passage à l’échelle (Automatic Information Extraction in the Medical Domain by Cross-Lingual Projection) [in <fixed-case>F</fixed-case>rench] AsmaBen Abacha - PierreZweigenbaum + PierreZweigenbaum AurélienMax 15–28 F12-2002 @@ -1080,7 +1080,7 @@ Traitement automatique sur corpus de récits de voyages pyrénéens : Une analyse syntaxique, sémantique et temporelle (Processing of a Pyrenees Travel Novels Corpus : a Syntactical, Semantical and Temporal Analysis.) [in <fixed-case>F</fixed-case>rench] - AnaïsLefeuvre + AnaïsLefeuvre RichardMoot ChristianRetoré Noémie-FleurSandillon-Rezer @@ -1090,7 +1090,7 @@ La reconnaissance des mots composés à l’épreuve de l’analyse syntaxique et vice-versa : évaluation de deux stratégies discriminantes (Recognition of Compound Words Tested against Parsing and Vice-versa : Evaluation of Two Discriminative Approaches) [in <fixed-case>F</fixed-case>rench] - MatthieuConstant + MatthieuConstant AnthonySigogne PatrickWatrin 57–70 @@ -1117,7 +1117,7 @@ <fixed-case>TCOF</fixed-case>-<fixed-case>POS</fixed-case> : un corpus libre de français parlé annoté en morphosyntaxe (<fixed-case>TCOF</fixed-case>-<fixed-case>POS</fixed-case> : A Freely Available <fixed-case>POS</fixed-case>-Tagged Corpus of Spoken <fixed-case>F</fixed-case>rench) [in <fixed-case>F</fixed-case>rench] ChristopheBenzitoun KarënFort - BenoîtSagot + BenoîtSagot 99–112 F12-2008 benzitoun-etal-2012-tcof @@ -1133,7 +1133,7 @@ Utilisation de la translittération arabe pour l’amélioration de l’alignement de mots à partir de corpus parallèles français-arabe (Using <fixed-case>A</fixed-case>rabic Transliteration to Improve Word Alignment from <fixed-case>F</fixed-case>rench-<fixed-case>A</fixed-case>rabic Parallel Corpora) [in <fixed-case>F</fixed-case>rench] - HoudaSaadane + HoudaSaadane NasredineSemmar 127–140 F12-2010 @@ -1142,14 +1142,14 @@ Compositionnalité et contextes issus de corpus comparables pour la traduction terminologique (Compositionality and Context for Bilingual Lexicon Extraction from Comparable Corpora) [in <fixed-case>F</fixed-case>rench] EmmanuelMorin - BéatriceDaille + BéatriceDaille 141–154 F12-2011 morin-daille-2012-compositionnalite Raffinement du Lexique des Verbes Français (Resource Refining : << Les Verbes Français >>) [in <fixed-case>F</fixed-case>rench] - PaulBédaride + PaulBédaride 155–168 F12-2012 bedaride-2012-raffinement @@ -1157,16 +1157,16 @@ Étude des manifestations de la relation de méronymie dans une ressource distributionnelle (Study of Meronymy in a Distribution-Based Lexical Resource) [in <fixed-case>F</fixed-case>rench] FrançoisMorlane-Hondère - CécileFabre + CécileFabre 169–182 F12-2013 morlane-hondere-fabre-2012-etude Un critère de cohésion thématique fondé sur un graphe de cooccurrences (Topical Cohesion using Graph Random Walks) [in <fixed-case>F</fixed-case>rench] - Clémentde Groc + Clémentde Groc XavierTannier - Claudede Loupy + Claudede Loupy 183–195 F12-2014 de-groc-etal-2012-un @@ -1194,9 +1194,9 @@ Étude comparative entre trois approches de résumé automatique de documents arabes (Comparative Study of Three Approaches to Automatic Summarization of <fixed-case>A</fixed-case>rabic Documents) [in <fixed-case>F</fixed-case>rench] IskandarKeskes - Mohamed MahdiBoudabous + Mohamed MahdiBoudabous Mohamed HédiMaaloul - LamiaHadrich Belguith + LamiaHadrich Belguith 225–238 F12-2017 keskes-etal-2012-etude @@ -1255,8 +1255,8 @@ Le corpus Sequoia : annotation syntaxique et exploitation pour l’adaptation d’analyseur par pont lexical (The Sequoia Corpus : Syntactic Annotation and Use for a Parser Lexical Domain Adaptation Method) [in <fixed-case>F</fixed-case>rench] - MarieCandito - DjaméSeddah + MarieCandito + DjaméSeddah 321–334 F12-2024 candito-seddah-2012-le @@ -1264,17 +1264,17 @@ <fixed-case>ACOLAD</fixed-case> Plateforme pour l’édition collaborative dépendancielle (<fixed-case>ACOLAD</fixed-case>: platform for collaborative dependency annotation) [in <fixed-case>F</fixed-case>rench] FrancisBrunet-Manquat - JérômeGoulian + JérômeGoulian 335–342 F12-2025 brunet-manquat-goulian-2012-acolad Extraction de préférences à partir de dialogues de négociation (Towards Preference Extraction From Negotiation Dialogues) [in <fixed-case>F</fixed-case>rench] - AnaïsCadilhac - FarahBenamara + AnaïsCadilhac + FarahBenamara VladimirPopescu - NicholasAsher + NicholasAsher MohamadouSeck 343–350 F12-2026 @@ -1282,8 +1282,8 @@ Détection de conflits dans les communautés épistémiques en ligne (Conflicts detection in online epistemic communities) [in <fixed-case>F</fixed-case>rench] - AlexandreDenis - MatthieuQuignard + AlexandreDenis + MatthieuQuignard DominiqueFréard FrançoiseDétienne MichaelBaker @@ -1296,9 +1296,9 @@ Quel est l’apport de la détection d’entités nommées pour l’extraction d’information en domaine restreint ? (What is the contribution of named entities detection for information extraction in restricted domain ?) [in <fixed-case>F</fixed-case>rench] CamilleDutrey ChloéClavel - SophieRosset - IoanaVasilescu - MartineAdda-Decker + SophieRosset + IoanaVasilescu + MartineAdda-Decker 359–366 F12-2028 dutrey-etal-2012-quel @@ -1314,7 +1314,7 @@ Méthodologie d’exploration de corpus et de formalisation de règles grammaticales pour les langues des signes (Methodology for corpus exploration and grammatical rule building in Sign Language) [in <fixed-case>F</fixed-case>rench] MichaelFilhol - AnneliesBraffort + AnneliesBraffort 375–382 F12-2030 filhol-braffort-2012-methodologie @@ -1337,7 +1337,7 @@ Le Lexicoscope : un outil pour l’étude de profils combinatoires et l’extraction de constructions lexico-syntaxiques (The Lexicoscope : an integrated tool for combinatoric profles observation and lexico-syntactic constructs extraction) [in <fixed-case>F</fixed-case>rench] - OlivierKraif + OlivierKraif SaschaDiwersy 399–406 F12-2033 @@ -1372,7 +1372,7 @@ Apprentissage automatique d’un chunker pour le français (Machine Learning of a chunker for <fixed-case>F</fixed-case>rench) [in <fixed-case>F</fixed-case>rench] IsabelleTellier DenysDuchier - IrisEshkol + IrisEshkol ArnaudCourmet MathieuMartinet 431–438 @@ -1400,7 +1400,7 @@ La reconnaissance automatique de la fonction des pronoms démonstratifs en langue arabe (Automatic recognition of demonstrative pronouns function in <fixed-case>A</fixed-case>rabic) [in <fixed-case>F</fixed-case>rench] YacineBen Yahia SouhaMezghani Hammami - LamiaHadrich Belguith + LamiaHadrich Belguith 455–462 F12-2040 ben-yahia-etal-2012-la @@ -1426,7 +1426,7 @@ Combinaison de ressources générales pour une contextualisation implicite de requêtes (Query Contextualization and Reformulation by Combining External Corpora) [in <fixed-case>F</fixed-case>rench] RomainDeveaud - PatriceBellot + PatriceBellot 479–486 F12-2043 deveaud-bellot-2012-combinaison @@ -1434,7 +1434,7 @@ Repérage des entités nommées pour l’arabe : adaptation non-supervisée et combinaison de systèmes (Named Entity Recognition for <fixed-case>A</fixed-case>rabic : Unsupervised adaptation and Systems combination) [in <fixed-case>F</fixed-case>rench] SouhirGahbiche-Braham - HélèneBonneau-Maynard + HélèneBonneau-Maynard ThomasLavergne FrançoisYvon 487–494 @@ -1443,7 +1443,7 @@ Propagation de polarités dans des familles de mots : impact de la morphologie dans la construction d’un lexique pour l’analyse de sentiments (Spreading Polarities among Word Families: Impact of Morphology on Building a Lexicon for Sentiment Analysis) [in <fixed-case>F</fixed-case>rench] - NúriaGala + NúriaGala CarolineBrun 495–502 F12-2045 @@ -1453,7 +1453,7 @@ Transitions thématiques : Annotation d’un corpus journalistique et premières analyses (Manual thematic annotation of a journalistic corpus : first observations and evaluation) [in <fixed-case>F</fixed-case>rench] AlexandreLabadié PatriceEnjalbert - StéphaneFerrari + StéphaneFerrari 503–510 F12-2046 labadie-etal-2012-transitions @@ -1478,17 +1478,17 @@ Post-édition statistique pour l’adaptation aux domaines de spécialité en traduction automatique (Statistical Post-Editing of Machine Translation for Domain Adaptation) [in <fixed-case>F</fixed-case>rench] - RaphaëlRubino + RaphaëlRubino StéphaneHuet - FabriceLefèvre - GeorgesLinarès + FabriceLefèvre + GeorgesLinarès 527–534 F12-2049 rubino-etal-2012-post Annotation référentielle du Corpus Arboré de <fixed-case>P</fixed-case>aris 7 en entités nommées (Referential named entity annotation of the <fixed-case>P</fixed-case>aris 7 <fixed-case>F</fixed-case>rench <fixed-case>T</fixed-case>ree<fixed-case>B</fixed-case>ank) [in <fixed-case>F</fixed-case>rench] - BenoîtSagot + BenoîtSagot MarionRichard RosaStern 535–542 @@ -1497,7 +1497,7 @@ Utilisation des fonctions de croyance pour l’estimation de paramètres en traduction automatique (Feature calculation for Statistical Machine Translation by using belief functions) [in <fixed-case>F</fixed-case>rench] - ChristopheServan + ChristopheServan SimonPetitrenaud 543–550 F12-2051 @@ -1514,7 +1514,7 @@ Enjeux méthodologiques, linguistiques et informatiques pour le traitement du français écrit des sourds (Methodological, linguistic and computational challenges for processing written <fixed-case>F</fixed-case>rench of deaf people) [in <fixed-case>F</fixed-case>rench] - TristanVanrullen + TristanVanrullen LeïlaBoutora JeanDagron 559–566 @@ -1528,7 +1528,7 @@ F12-3 Jorge MauricioMolina Mejia DidierSchwab - GillesSérasset + GillesSérasset ATALA/AFCP
Grenoble, France
June @@ -1562,7 +1562,7 @@
Integrating lexical, syntactic and system-based features to improve Word Confidence Estimation in <fixed-case>SMT</fixed-case> - Ngoc QuangLuong + Ngoc QuangLuong 43–56 F12-3004 luong-2012-integrating @@ -1598,7 +1598,7 @@ Création d’un multi-arbre à partir d’un texte balisé : l’exemple de l’annotation d’un corpus d’oral spontané (Creating a Multi-Tree from a Tagged Text : Annotating Spoken <fixed-case>F</fixed-case>rench) [in <fixed-case>F</fixed-case>rench] - JulieBelião + JulieBelião 109–122 F12-3009 beliao-2012-creation @@ -1727,11 +1727,11 @@ Proceedings of the Joint Conference JEP-TALN-RECITAL 2012, volume 4: Invited Conferences F12-4 - LaurentBesacier - HervéBlanchon + LaurentBesacier + HervéBlanchon Marie-PauleJacques NathalieVallée - GillesSérasset + GillesSérasset ATALA/AFCP
Grenoble, France
June @@ -1751,7 +1751,7 @@
Tensions entre théorie et pratique dans les systèmes de <fixed-case>TAL</fixed-case>. Étude historique et épistémologique (Tensions Between Theory and Practice in <fixed-case>NLP</fixed-case> Systems. Historic and Epistemological Study) [in <fixed-case>F</fixed-case>rench] - JacquelineLéon + JacquelineLéon 3 F12-4002 leon-2012-tensions @@ -1775,9 +1775,9 @@ Proceedings of the Joint Conference JEP-TALN-RECITAL 2012, volume 5: Software Demonstrations F12-5 - LaurentBesacier - HervéBlanchon - GillesSérasset + LaurentBesacier + HervéBlanchon + GillesSérasset ATALA/AFCP
Grenoble, France
June @@ -1791,7 +1791,7 @@ <fixed-case>G</fixed-case>rew : un outil de réécriture de graphes pour le <fixed-case>TAL</fixed-case> (<fixed-case>G</fixed-case>rew: a Graph Rewriting Tool for <fixed-case>NLP</fixed-case>) [in <fixed-case>F</fixed-case>rench] BrunoGuillaume - GuillameBonfante + GuillameBonfante PaulMasson MathieuMorey GuyPerrier @@ -1801,7 +1801,7 @@ Interfaces de navigation dans des contenus audio et vidéo (Navigation interfaces through audio and video contents) [in <fixed-case>F</fixed-case>rench] - GéraldineDamnati + GéraldineDamnati 3–4 F12-5002 damnati-2012-interfaces @@ -1857,7 +1857,7 @@ AtefBen-Youssef PierreBadin GérardBailly - FrédéricEliséi + FrédéricEliséi 17–18 F12-5009 hueber-etal-2012-vizart3d @@ -1865,7 +1865,7 @@ <fixed-case>ROC</fixed-case>me! : logiciel pour l’enregistrement et la gestion de corpus oraux (<fixed-case>ROC</fixed-case>me!: software for the recording and management of oral corpora) [in <fixed-case>F</fixed-case>rench] EmmanuelFerragne - SébastienFlavier + SébastienFlavier ChristianFressard 19–20 F12-5010 diff --git a/data/xml/F13.xml b/data/xml/F13.xml index a3cabc74e1..c9b6d8ba3f 100644 --- a/data/xml/F13.xml +++ b/data/xml/F13.xml @@ -36,7 +36,7 @@ Using <fixed-case>POMDP</fixed-case>s for Topic-Focused <fixed-case>M</fixed-case>ulti-<fixed-case>D</fixed-case>ocument Summarization (L’utilisation des <fixed-case>POMDP</fixed-case> pour les résumés multi-documents orientés par une thématique) [in <fixed-case>F</fixed-case>rench] YlliasChali - Sadid A.Hasan + Sadid A.Hasan MustaphaMojahid 33-47 F13-1003 @@ -52,7 +52,7 @@ Grouping of terms based on linguistic and semantic regularities in a cross-lingual context (Groupement de termes basé sur des régularités linguistiques et sémantiques dans un contexte cross-langue) [in <fixed-case>F</fixed-case>rench] MarieDupuch - ThierryHamon + ThierryHamon NataliaGrabar 62-75 F13-1005 @@ -61,7 +61,7 @@ <fixed-case>W</fixed-case>o<fixed-case>N</fixed-case>e<fixed-case>F</fixed-case>, an improved, extended and evaluated automatic <fixed-case>F</fixed-case>rench translation of <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et (<fixed-case>W</fixed-case>o<fixed-case>N</fixed-case>e<fixed-case>F</fixed-case> : amélioration, extension et évaluation d’une traduction française automatique de <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et) [in <fixed-case>F</fixed-case>rench] QuentinPradet - JeanneBaguenier-Desormeaux + JeanneBaguenier-Desormeaux Gaëlde Chalendar LaurenceDanlos 76-89 @@ -71,8 +71,8 @@ Discriminative statistical approaches for multilingual speech understanding (Approches statistiques discriminantes pour l’interprétation sémantique multilingue de la parole) [in <fixed-case>F</fixed-case>rench] BassamJabaian - FabriceLefèvre - LaurentBesacier + FabriceLefèvre + LaurentBesacier 90-103 F13-1007 jabaian-etal-2013-discriminative @@ -122,8 +122,8 @@ Towards a treebank of spoken <fixed-case>F</fixed-case>rench (Vers un treebank du français parlé) [in <fixed-case>F</fixed-case>rench] - AnneAbeillé - BenoitCrabbé + AnneAbeillé + BenoitCrabbé 174-187 F13-1013 abeille-crabbe-2013-towards @@ -138,8 +138,8 @@ A probabilistic segment model combining lexical cohesion and disruption for topic segmentation (Un modèle segmental probabiliste combinant cohésion lexicale et rupture lexicale pour la segmentation thématique) [in <fixed-case>F</fixed-case>rench] - AncaSimon - GuillaumeGravier + AncaSimon + GuillaumeGravier PascaleSébillot 202-214 F13-1015 @@ -194,7 +194,7 @@ The constitution of an <fixed-case>A</fixed-case>rabic semantic resource from a multilingual aligned corpus (Constitution d’une ressource sémantique arabe à partir de corpus multilingue aligné) [in <fixed-case>F</fixed-case>rench] Authoul AbdulHay - OlivierKraif + OlivierKraif 299-312 F13-1022 hay-kraif-2013-constitution @@ -202,7 +202,7 @@ Identification, Alignment, and Tranlsation of Relational Adjectives from Comparable Corpora (Identification, alignement, et traductions des adjectifs relationnels en corpus comparables) [in <fixed-case>F</fixed-case>rench] RimaHarastani - BeatriceDaille + BeatriceDaille EmmanuelMorin 313-326 F13-1023 @@ -212,7 +212,7 @@ (Utilisation de la similarité sémantique pour l’extraction de lexiques bilingues à partir de corpus comparables) [in <fixed-case>F</fixed-case>rench] DhouhaBouamor NasredineSemmar - PierreZweigenbaum + PierreZweigenbaum 327-338 F13-1024 bouamor-etal-2013-utilisation @@ -268,7 +268,7 @@ Dynamic extension of a <fixed-case>F</fixed-case>rench morphological lexicon based a text stream (Extension dynamique de lexiques morphologiques pour le français à partir d’un flux textuel) [in <fixed-case>F</fixed-case>rench] - BenoîtSagot + BenoîtSagot DamienNouvel VirginieMouilleron MarionBaranes @@ -289,8 +289,8 @@ Segmenting <fixed-case>A</fixed-case>rabic Texts into Elementary Discourse Units (Segmentation de textes arabes en unités discursives minimales) [in <fixed-case>F</fixed-case>rench] IskandarKeskes - FarahBeanamara - Lamia HadrichBelguith + FarahBeanamara + Lamia HadrichBelguith 435-449 F13-1032 keskes-etal-2013-segmenting @@ -307,8 +307,8 @@ Semantic Annotation in Specific Domains with rich Ontologies (Annotation sémantique pour des domaines spécialisés et des ontologies riches) [in <fixed-case>F</fixed-case>rench] YueMa - FrançoisLévy - AdelineNazarenko + FrançoisLévy + AdelineNazarenko 464-478 F13-1034 ma-etal-2013-semantic @@ -316,8 +316,8 @@ Web pages segmentation for document selection in Question Answering (Pré-segmentation de pages web et sélection de documents pertinents en Questions-Réponses) [in <fixed-case>F</fixed-case>rench] NicolasFoucault - SophieRosset - GillesAdda + SophieRosset + GillesAdda 479-492 F13-1035 foucault-etal-2013-web @@ -357,8 +357,8 @@ Similarities induced by a comparability mapping : meaning and utility in the context of the clustering of comparable texts (Similarités induites par mesure de comparabilité : signification et utilité pour le clustering et l’alignement de textes comparables) [in <fixed-case>F</fixed-case>rench] - Pierre-FrancoisMarteau - GildasMénier + Pierre-FrancoisMarteau + GildasMénier 515-522 F13-2002 marteau-menier-2013-similarities @@ -374,7 +374,7 @@ Driven Decoding for machine translation (Vers un décodage guidé pour la traduction automatique) [in <fixed-case>F</fixed-case>rench] BenjaminLecouteux - LaurentBesacier + LaurentBesacier 531-538 F13-2004 lecouteux-besacier-2013-driven @@ -383,7 +383,7 @@ Can lightweight pre-editing rules improve statistical <fixed-case>MT</fixed-case> of forum content? (La La préédition avec des règles peu coûteuses, utile pour la <fixed-case>TA</fixed-case> statistique des forums ?) [in <fixed-case>F</fixed-case>rench] JohannaGerlach VictoriaPorro - PierretteBouillon + PierretteBouillon SabineLehmann 539-546 F13-2005 @@ -401,28 +401,28 @@ <fixed-case>ANCOR</fixed-case>, the first large <fixed-case>F</fixed-case>rench speaking corpus of conversational speech annotated in coreference to be freely available (<fixed-case>ANCOR</fixed-case>, premier corpus de français parlé d’envergure annoté en coréférence et distribué librement) [in <fixed-case>F</fixed-case>rench] JudithMuzerelle - AnaïsLefeuvre + AnaïsLefeuvre Jean-YvesAntoine EmmanuelSchang DenisMaurel - JeanneVillaneau - IrisEshkol + JeanneVillaneau + IrisEshkol 555-563 F13-2007 muzerelle-etal-2013-ancor Multilingual Compound Splitting (Segmentation Multilingue des Mots Composés) [in <fixed-case>F</fixed-case>rench] - ElizavetaLoginova-Clouet - BéatriceDaille + ElizavetaLoginova-Clouet + BéatriceDaille 564-571 F13-2008 loginova-clouet-daille-2013-multilingual Complex terminologies management - the case of acronyms (Gestion des terminologies riches : l’exemple des acronymes) [in <fixed-case>F</fixed-case>rench] - YingZhang - MathieuMangeot + YingZhang + MathieuMangeot 572-579 F13-2009 zhang-mangeot-2013-complex @@ -430,7 +430,7 @@ N-gram Language Models and <fixed-case>POS</fixed-case> Distribution for the Identification of <fixed-case>S</fixed-case>panish Varieties (Ngrammes et Traits Morphosyntaxiques pour la Identification de Variétés de l’Espagnol) [in <fixed-case>F</fixed-case>rench] MarcosZampieri - Binyam GebrekidanGebre + Binyam GebrekidanGebre SaschaDiwersy 580-587 F13-2010 @@ -439,7 +439,7 @@ Improving Minor Opinion Polarity Classification with Named Entity Analysis (L’apport des Entités Nommées pour la classification des opinions minoritaires) [in <fixed-case>F</fixed-case>rench] AmelFraisse - PatrickParoubek + PatrickParoubek GilFrancopoulo 588-595 F13-2011 @@ -447,7 +447,7 @@ Lexical access via a simple co-occurrence network (Trouver les mots dans un simple réseau de co-occurrences) [in <fixed-case>F</fixed-case>rench] - GemmaBel-Enguix + GemmaBel-Enguix MichaelZock 596-603 F13-2012 @@ -462,7 +462,7 @@ Semantic annotation influence on coreference detection using perceptron approach (Influence des annotations sémantiques sur un système de détection de coréférence à base de perceptron multi-couches) [in <fixed-case>F</fixed-case>rench] - EricCharton + EricCharton MichelGagnon LudovicJean-Louis 612-619 @@ -488,14 +488,14 @@ Localizing toponyms in topographic map titles (Repérer des toponymes dans des titres de cartes topographiques) [in <fixed-case>F</fixed-case>rench] CatherineDominguès - IrisEshkol-Taravella + IrisEshkol-Taravella 636-642 F13-2017 domingues-eshkol-taravella-2013-localizing Extraction of temporal relations between clinical events in clinical documents (Extraction des relations temporelles entre événements médicaux dans des comptes rendus hospitaliers) [in <fixed-case>F</fixed-case>rench] - PierreZweigenbaum + PierreZweigenbaum XavierTannier 643-650 F13-2018 @@ -528,7 +528,7 @@ Converting dependencies for syntactic analysis of <fixed-case>F</fixed-case>rench into <fixed-case>PASSAGE</fixed-case> functional relations (Convertir des analyses syntaxiques en dépendances vers les relations fonctionnelles <fixed-case>PASSAGE</fixed-case>) [in <fixed-case>F</fixed-case>rench] - PatrickParoubek + PatrickParoubek MunshiAsadullah AnneVilnat 675-682 @@ -546,8 +546,8 @@ Lexicons from Comparable Corpora for Multilingual Information Retrieval (Lexiques de corpus comparables et recherche d’information multilingue) [in <fixed-case>F</fixed-case>rench] FrederikCailliau ArianeCavet - ClémentDe Groc - ClaudeDe Loupy + ClémentDe Groc + ClaudeDe Loupy 691-698 F13-2024 cailliau-etal-2013-lexicons @@ -562,9 +562,9 @@ <fixed-case>S</fixed-case>eg<fixed-case>CV</fixed-case> : Eficient parsing of résumés with analysis and correction of errors (<fixed-case>S</fixed-case>eg<fixed-case>CV</fixed-case> : traitement efficace de <fixed-case>CV</fixed-case> avec analyse et correction d’erreurs) [in <fixed-case>F</fixed-case>rench] - Luis AdriánCabrera-Diego - Juan-ManuelTorres-Moreno - MarcEl-Bèze + Luis AdriánCabrera-Diego + Juan-ManuelTorres-Moreno + MarcEl-Bèze 707-714 F13-2026 cabrera-diego-etal-2013-segcv @@ -572,8 +572,8 @@ Search and usage of named conceptual entities in a categorisazion task (Recherche et utilisation d’entités nommées conceptuelles dans une tâche de catégorisation) [in <fixed-case>F</fixed-case>rench] Jean-ValèreCossu - Juan-ManuelTorres-Moreno - MarcEl-Bèze + Juan-ManuelTorres-Moreno + MarcEl-Bèze 715-722 F13-2027 cossu-etal-2013-search @@ -581,7 +581,7 @@ A corpus of post-edited translations (Un corpus d’erreurs de traduction) [in <fixed-case>F</fixed-case>rench] GuillaumeWisniewski - Anil KumarSingh + Anil KumarSingh NataliaSegal FrançoisYvon 723-730 @@ -590,17 +590,17 @@ An evaluation summary method based on combination of automatic and textual complexity metrics (Une méthode d’évaluation des résumés basée sur la combinaison de métriques automatiques et de complexité textuelle) [in <fixed-case>F</fixed-case>rench] - Samira WalhaEllouze + Samira WalhaEllouze MaherJaoua - Lamia HadrichBelguith + Lamia HadrichBelguith 731-738 F13-2029 ellouze-etal-2013-evaluation An iterative topic segmentation algorithm with intra-content term weighting (Segmentation thématique : processus itératif de pondération intra-contenu) [in <fixed-case>F</fixed-case>rench] - AbdessalamBouchekif - GéraldineDamnati + AbdessalamBouchekif + GéraldineDamnati DelphineCharlet 739-746 F13-2030 @@ -612,7 +612,7 @@ HubertNaets LaetitiaBrouwers PavelRomanov - CédrickFairon + CédrickFairon 747-754 F13-2031 panchenko-etal-2013-search @@ -620,7 +620,7 @@ An extended morphological analyzer of <fixed-case>G</fixed-case>erman handling verbal forms with separated separable particles (Un analyseur morphologique étendu de l’allemand traitant les formes verbales à particule séparée) [in <fixed-case>F</fixed-case>rench] Jean-PhilippeGuilbaud - ChristianBoitet + ChristianBoitet VincentBerment 755-763 F13-2032 @@ -637,7 +637,7 @@ Anaphora Resolution Applied to Collocation Identification: A Preliminary Evaluation (Résolution d’anaphores appliquée aux collocations: une évaluation préliminaire) [in <fixed-case>F</fixed-case>rench] LukaNerima - ÉricWehrli + ÉricWehrli 772-778 F13-2034 nerima-wehrli-2013-anaphora @@ -645,7 +645,7 @@ Help enrich a terminological repository : proposals and experiments (Aide à l’enrichissement d’un référentiel terminologique : propositions et expérimentations) [in <fixed-case>F</fixed-case>rench] ThibaultMondary - AdelineNazarenko + AdelineNazarenko HaïfaZargayouna SabineBarreaux 779-786 @@ -714,7 +714,7 @@ An Interface for Validating and Evaluating Thematic Timelines (Une interface pour la validation et l’évaluation de chronologies thématiques) [in <fixed-case>F</fixed-case>rench] XavierTannier - VéroniqueMoriceau + VéroniqueMoriceau Erwan LeFlem 797-798 F13-3006 @@ -730,8 +730,8 @@ i<fixed-case>MAG</fixed-case> : <fixed-case>MT</fixed-case>-postediting, translation quality evaluation and parallel corpus production (i<fixed-case>MAG</fixed-case> : post-édition, évaluation de qualité de <fixed-case>TA</fixed-case> et production d’un corpus parallèle) [in <fixed-case>F</fixed-case>rench] - LingxiaoWang - YingZhang + LingxiaoWang + YingZhang 801-802 F13-3008 wang-zhang-2013-imag @@ -752,9 +752,9 @@ Apopsis Demonstrator for Tweet Analysis (Démonstrateur Apopsis pour l’analyse des tweets) [in <fixed-case>F</fixed-case>rench] - Sebastián PeñaSaldarriaga + Sebastián PeñaSaldarriaga DamienVintache - BéatriceDaille + BéatriceDaille 807-808 F13-3011 saldarriaga-etal-2013-apopsis @@ -769,7 +769,7 @@ <fixed-case>TTC</fixed-case> <fixed-case>T</fixed-case>erm<fixed-case>S</fixed-case>uite - Terminological Alignment from Comparable Corpora (<fixed-case>TTC</fixed-case> <fixed-case>T</fixed-case>erm<fixed-case>S</fixed-case>uite alignement terminologique à partir de corpus comparables) [in <fixed-case>F</fixed-case>rench] - BéatriceDaille + BéatriceDaille RimaHarastani 812-813 F13-3013 @@ -793,7 +793,7 @@ Improving Translation to Morphologically Rich Languages (Améliorer la traduction des langages morphologiquement riches) [in <fixed-case>F</fixed-case>rench] - AlexanderFraser + AlexanderFraser 1-1 F13-4001 fraser-2013-improving @@ -889,7 +889,7 @@ A linguistic approach for knowledge extraction from an <fixed-case>A</fixed-case>rabic text (Une approche linguistique pour l’extraction des connaissances dans un texte arabe) [in <fixed-case>F</fixed-case>rench] - HoudaSaadane + HoudaSaadane 124-137 F13-5010 saadane-2013-linguistic diff --git a/data/xml/F14.xml b/data/xml/F14.xml index c2acf9712b..273308ed7a 100644 --- a/data/xml/F14.xml +++ b/data/xml/F14.xml @@ -5,7 +5,7 @@ Proceedings of TALN 2014 (Volume 1: Long Papers) F14-1 PhilippeBlache - FrédéricBéchet + FrédéricBéchet BrigitteBigi Association pour le Traitement Automatique des Langues
Marseille, France
@@ -31,14 +31,14 @@ The impact of domains for Keyphrase extraction (Influence des domaines de spécialité dans l’extraction de termes-clés) [in <fixed-case>F</fixed-case>rench] AdrienBougouin FlorianBoudin - BéatriceDaille + BéatriceDaille 13-24 F14-1002 bougouin-etal-2014-impact
Event Role Labelling using a Neural Network Model (Étiquetage en rôles événementiels fondé sur l’utilisation d’un modèle neuronal) [in <fixed-case>F</fixed-case>rench] - EmanuelaBoroş + EmanuelaBoroş RomaricBesançon OlivierFerret BrigitteGrau @@ -49,14 +49,14 @@ Using distributed word representations for robust semantic role labeling (Utilisation de représentations de mots pour l’étiquetage de rôles sémantiques suivant <fixed-case>F</fixed-case>rame<fixed-case>N</fixed-case>et) [in <fixed-case>F</fixed-case>rench] WilliamLéchelle - PhilippeLanglais + PhilippeLanglais 36-45 F14-1004 lechelle-langlais-2014-using Cross-lingual Word Sense Disambiguation for Predicate Labelling of <fixed-case>F</fixed-case>rench - Lonnekevan der Plas + Lonnekevan der Plas MariannaApidianaki 46-55 F14-1005 @@ -71,24 +71,24 @@ Playing with parsers (Jouer avec des analyseurs syntaxiques) [in <fixed-case>F</fixed-case>rench] - ÉricVillemonte de la Clergerie + ÉricVillemonte de la Clergerie 67-78 F14-1007 villemonte-de-la-clergerie-2014-playing Principles of Lexical Network Systemic Modeling (Principes de modélisation systémique des réseaux lexicaux) [in <fixed-case>F</fixed-case>rench] - AlainPolguère + AlainPolguère 79-90 F14-1008 polguere-2014-principles A model to predict lexical complexity and to grade words (Un modèle pour prédire la complexité lexicale et graduer les mots) [in <fixed-case>F</fixed-case>rench] - NúriaGala + NúriaGala ThomasFrançois DelphineBernhard - CédrickFairon + CédrickFairon 91-102 F14-1009 gala-etal-2014-model @@ -115,7 +115,7 @@ Can we chunk well with bad <fixed-case>POS</fixed-case> labels? (Peut-on bien chunker avec de mauvaises étiquettes <fixed-case>POS</fixed-case> ?) [in <fixed-case>F</fixed-case>rench] IsabelleTellier - IrisEshkol-Taravella + IrisEshkol-Taravella YoannDupont IlaineWang 125-136 @@ -125,7 +125,7 @@ Analogy-based Text Normalization : the case of unknowns words (Normalisation de textes par analogie: le cas des mots inconnus) [in <fixed-case>F</fixed-case>rench] MarionBaranes - BenoîtSagot + BenoîtSagot 137-148 F14-1013 baranes-sagot-2014-analogy @@ -133,8 +133,8 @@ An evaluation of various methods for adjective-nouns composition (Une évaluation approfondie de différentes méthodes de compositionalité sémantique) [in <fixed-case>F</fixed-case>rench] AntoineBride - TimVan de Cruys - NicolasAsher + TimVan de Cruys + NicolasAsher 149-160 F14-1014 bride-etal-2014-evaluation @@ -170,7 +170,7 @@ A simple approach to make dialogue systems incremental (Vers une approche simplifiée pour introduire le caractère incrémental dans les systèmes de dialogue) [in <fixed-case>F</fixed-case>rench] HatimKhouzaimi RomainLaroche - FabriceLefèvre + FabriceLefèvre 196-207 F14-1018 khouzaimi-etal-2014-simple @@ -178,7 +178,7 @@ The Démonette Lexical Database: between Constructional Semantics and Word Formation (La base lexicale Démonette : entre sémantique constructionnelle et morphologie dérivationnelle) [in <fixed-case>F</fixed-case>rench] NabilHathout - FiammettaNamer + FiammettaNamer 208-219 F14-1019 hathout-namer-2014-demonette-lexical @@ -195,7 +195,7 @@ Reducing data sparsity by generalising distributional contexts: application to specialised texts (Réduction de la dispersion des données par généralisation des contextes distributionnels : application aux textes de spécialité) [in <fixed-case>F</fixed-case>rench] AmandinePérinet - ThierryHamon + ThierryHamon 232-243 F14-1021 perinet-hamon-2014-reducing @@ -204,7 +204,7 @@ Unsupervised extraction of semantic relations (Extraction non supervisée de relations sémantiques lexicales) [in <fixed-case>F</fixed-case>rench] JulietteConrath StergosAfantenos - NicholasAsher + NicholasAsher PhilippeMuller 244-255 F14-1022 @@ -212,7 +212,7 @@ Comparison of scheduling methods for the learning rate of neural network language models (Modèles de langue neuronaux: une comparaison de plusieurs stratégies d’apprentissage) [in <fixed-case>F</fixed-case>rench] - Quoc-KhanhDo + Quoc-KhanhDo AlexandreAllauzen FrançoisYvon 256-267 @@ -222,7 +222,7 @@ Study of the impact of proper name transliteration on the performance of word alignment in <fixed-case>F</fixed-case>rench-<fixed-case>A</fixed-case>rabic parallel corpora (Etude de l’impact de la translittération de noms propres sur la qualité de l’alignement de mots à partir de corpus parallèles français-arabe) [in <fixed-case>F</fixed-case>rench] NasredineSemmar - HoudaSaadane + HoudaSaadane 268-279 F14-1024 semmar-saadane-2014-study @@ -230,7 +230,7 @@ Topic Adaptation for the Automatic Translation of News Articles (Adaptation thématique pour la traduction automatique de dépêches de presse) [in <fixed-case>F</fixed-case>rench] SouhirGahbiche-Braham - HélèneBonneau-Maynard + HélèneBonneau-Maynard FrançoisYvon 280-291 F14-1025 @@ -246,7 +246,7 @@ Detection and Analysis of Paraphrastic Reformulations in Spoken Corpora (Repérage et analyse de la reformulation paraphrastique dans les corpus oraux) [in <fixed-case>F</fixed-case>rench] - IrisEshkol-Taravella + IrisEshkol-Taravella NataliaGrabar 304-315 F14-1027 @@ -257,7 +257,7 @@ RajaAyed IbrahimBounhas BilelElayeb - NarjèsBellamine Ben Saoud + NarjèsBellamine Ben Saoud FabriceEvrard 316-327 F14-1028 @@ -265,7 +265,7 @@ A discriminative parser of the <fixed-case>LR</fixed-case> family for phrase structure parsing (Un analyseur discriminant de la famille <fixed-case>LR</fixed-case> pour l’analyse en constituants) [in <fixed-case>F</fixed-case>rench] - BenoîtCrabbé + BenoîtCrabbé 328-339 F14-1029 crabbe-2014-discriminative @@ -290,8 +290,8 @@ Semantic Annotation and Terminology Validation in full scientific articles in Social Sciences and Humanities (Annotation sémantique et validation terminologique en texte intégral en <fixed-case>SHS</fixed-case>) [in <fixed-case>F</fixed-case>rench] - Mokhtar-BoumedyenBillami - JoséCamacho-Collados + Mokhtar-BoumedyenBillami + JoséCamacho-Collados EvelyneJacquey LaurenceKister 363-376 @@ -303,7 +303,7 @@ CharlotteRoze ThierryCharnois DominiqueLegallois - StéphaneFerrari + StéphaneFerrari MathildeSalles 377-388 F14-1033 @@ -315,7 +315,7 @@ Proceedings of TALN 2014 (Volume 2: Short Papers) F14-2 PhilippeBlache - FrédéricBéchet + FrédéricBéchet BrigitteBigi Association pour le Traitement Automatique des Langues
Marseille, France
@@ -329,7 +329,7 @@ Machine translation for litterature: a pilot study (Traduction automatisée d’une oeuvre littéraire: une étude pilote) [in <fixed-case>F</fixed-case>rench] - LaurentBesacier + LaurentBesacier 389-394 F14-2001 besacier-2014-machine @@ -346,8 +346,8 @@ On-going Cooperative Research towards Developing Economy-Oriented <fixed-case>C</fixed-case>hinese-<fixed-case>F</fixed-case>rench <fixed-case>SMT</fixed-case> Systems with a New <fixed-case>SMT</fixed-case> Framework YidongChen - LingxiaoWang - ChristianBoitet + LingxiaoWang + ChristianBoitet XiaodongShi 401-406 F14-2003 @@ -356,7 +356,7 @@ Automatic Term Extraction Combining Different Information (Extraction automatique de termes combinant différentes informations) [in <fixed-case>F</fixed-case>rench] Juan AntonioLossio-Ventura - ClementJonquet + ClementJonquet MathieuRoche MaguelonneTeisseire 407-412 @@ -366,21 +366,21 @@ Automated Analysis for Stem Spaces: the case of <fixed-case>F</fixed-case>rench verbs (Analyse automatique d’espaces thématiques) [in <fixed-case>F</fixed-case>rench] GillesBoyé - AnnaKupść + AnnaKupść 413-418 F14-2005 boye-kupsc-2014-automated Extraction and representation of support verb constructions in <fixed-case>S</fixed-case>panish (Extraction et représentation des constructions à verbe support en espagnol) [in <fixed-case>F</fixed-case>rench] - SandraMilena Castellanos Páez + SandraMilena Castellanos Páez 419-424 F14-2006 milena-castellanos-paez-2014-extraction Sub-categorization in ‘pour’ and lexical syntax (Sous-catégorisation en pour et syntaxe lexicale) [in <fixed-case>F</fixed-case>rench] - BenoîtSagot + BenoîtSagot LaurenceDanlos MargotColinet 425-430 @@ -399,7 +399,7 @@ Named Entity Recognition and Correction in <fixed-case>OCR</fixed-case>ized Corpora (Détection et correction automatique d’entités nommées dans des corpus <fixed-case>OCR</fixed-case>isés) [in <fixed-case>F</fixed-case>rench] - BenoîtSagot + BenoîtSagot KataGábor 437-442 F14-2009 @@ -416,7 +416,7 @@ User evaluation of a multiple answer extraction system on the Web (Évaluation d’un système d’extraction de réponses multiples sur le Web par comparaison à des humains) [in <fixed-case>F</fixed-case>rench] Mathieu-HenriFalco - VéroniqueMoriceau + VéroniqueMoriceau AnneVilnat 449-454 F14-2011 @@ -444,7 +444,7 @@ ThomasFrançois LaetitiaBrouwers HubertNaets - CédrickFairon + CédrickFairon 467-472 F14-2014 francois-etal-2014-amesure @@ -467,7 +467,7 @@ <fixed-case>KNG</fixed-case>: a Tool for Writing Easily Transducer Cascades (<fixed-case>KNG</fixed-case>: un outil pour l’écriture facile de cascades de transducteurs) [in <fixed-case>F</fixed-case>rench] - FrançoisBarthélemy + FrançoisBarthélemy 485-490 F14-2017 barthelemy-2014-kng @@ -500,8 +500,8 @@ Impact of the nature and size of the training set on performance in the automatic detection of named entities (Impact de la nature et de la taille des corpus d’apprentissage sur les performances dans la détection automatique des entités nommées) [in <fixed-case>F</fixed-case>rench] AnaïsOllagnier - SébastienFournier - PatriceBellot + SébastienFournier + PatriceBellot FrédéricBéchet 511-516 F14-2021 @@ -535,8 +535,8 @@ Supporting Sign Languages Exploratory Linguistics with an Automatization of the Annotation Process (Vers un traitement automatique en soutien d’une linguistique exploratoire des Langues des Signes) [in <fixed-case>F</fixed-case>rench] RémiDubot - ArturoCuriel - ChristopheCollet + ArturoCuriel + ChristopheCollet 537-542 F14-2025 dubot-etal-2014-supporting @@ -555,7 +555,7 @@ RémiBois JohannesLeveling LorraineGoeuriot - Gareth J. F.Jones + Gareth J. F.Jones LiadhKelly 550-555 F14-2027 @@ -571,13 +571,13 @@ Tense and Time Annotations : a Contribution to <fixed-case>T</fixed-case>ime<fixed-case>ML</fixed-case> Improvement (Annotation de la temporalité en corpus : contribution à l’amélioration de la norme <fixed-case>T</fixed-case>ime<fixed-case>ML</fixed-case>) [in <fixed-case>F</fixed-case>rench] - AnaïsLefeuvre + AnaïsLefeuvre Jean-YvesAntoine AgataSavary EmmanuelSchang LotfiAbouda DenisMaurel - IrisEshkol + IrisEshkol 562-567 F14-2029 lefeuvre-etal-2014-tense @@ -585,7 +585,7 @@ Automatic identification of document sections for designing a <fixed-case>F</fixed-case>rench clinical corpus (Identification automatique de zones dans des documents pour la constitution d’un corpus médical en français) [in <fixed-case>F</fixed-case>rench] LouiseDeléger - AurélieNévéol + AurélieNévéol 568-573 F14-2030 deleger-neveol-2014-automatic @@ -593,11 +593,11 @@ Annotation scheme for deep dependency syntax of <fixed-case>F</fixed-case>rench (Un schéma d’annotation en dépendances syntaxiques profondes pour le français) [in <fixed-case>F</fixed-case>rench] GuyPerrier - MarieCandito + MarieCandito BrunoGuillaume CorentinRibeyre KarënFort - DjaméSeddah + DjaméSeddah 574-579 F14-2031 perrier-etal-2014-annotation @@ -616,7 +616,7 @@ Integrating lexicographic examples in a lexical network (Intégration relationnelle des exemples lexicographiques dans un réseau lexical) [in <fixed-case>F</fixed-case>rench] - VeronikaLux-Pogodalla + VeronikaLux-Pogodalla 586-591 F14-2033 lux-pogodalla-2014-integrating @@ -624,7 +624,7 @@ Colors of People (Les couleurs des gens) [in <fixed-case>F</fixed-case>rench] MathieuLafourcade - NathalieLe Brun + NathalieLe Brun VirginieZampa 592-597 F14-2034 @@ -635,8 +635,8 @@ MohammadNasiruddin DidierSchwab AndonTchechmedjiev - GillesSérasset - HervéBlanchon + GillesSérasset + HervéBlanchon 598-603 F14-2035 nasiruddin-etal-2014-word @@ -653,7 +653,7 @@ Proceedings of TALN 2014 (Volume 3: System Demonstrations) F14-3 - Grégoirede Montcheuil + Grégoirede Montcheuil BrigitteBigi Association pour le Traitement Automatique des Langues
Marseille, France
@@ -739,7 +739,7 @@ <fixed-case>D</fixed-case>icta<fixed-case>N</fixed-case>um: a dialogue system for numbers dictation (<fixed-case>D</fixed-case>icta<fixed-case>N</fixed-case>um : système de dialogue incrémental pour la dictée de numéros.) [in <fixed-case>F</fixed-case>rench] HatimKhouzaimi RomainLaroche - FabriceLefèvre + FabriceLefèvre 23-25 F14-3010 khouzaimi-etal-2014-dictanum @@ -759,7 +759,7 @@ HajarFalih ChristineChardenon RomainLaroche - FabriceLefevre + FabriceLefevre 28-29 F14-3012 ekeinhor-komi-etal-2014-enia @@ -788,7 +788,7 @@ Proceedings of TALN 2014 (Volume 4: RECITAL - Student Research Workshop) F14-4 - NúriaGala + NúriaGala KlimPeshkov BrigitteBigi Association pour le Traitement Automatique des Langues @@ -845,7 +845,7 @@
Description of structures of time (in <fixed-case>F</fixed-case>rench sign language) based on a formal grammar (Une description des structures de la durée en Langue des Signes Française à partir d’une grammaire formelle) [in <fixed-case>F</fixed-case>rench] - MohamedHadjadj + MohamedHadjadj 71-80 F14-4007 hadjadj-2014-description @@ -866,7 +866,7 @@ Extraction methods for automatic summarization of spoken conversations from call centers (Méthodes par extraction pour le résumé automatique de conversations parlées provenant de centres d’appels) [in <fixed-case>F</fixed-case>rench] - JérémyTrione + JérémyTrione 104-111 F14-4010 trione-2014-extraction diff --git a/data/xml/H01.xml b/data/xml/H01.xml index 6c9cb68d6e..1abfff7cfb 100644 --- a/data/xml/H01.xml +++ b/data/xml/H01.xml @@ -12,21 +12,21 @@ Activity detection for information access to oral communication KlausRies - AlexWaibel + AlexWaibel H01-1001 ries-waibel-2001-activity Adapting an Example-Based Translation System to <fixed-case>C</fixed-case>hinese - YingZhang - Ralf D.Brown - Robert E.Frederking + YingZhang + Ralf D.Brown + Robert E.Frederking H01-1002 zhang-etal-2001-adapting Advances in meeting recognition - AlexWaibel + AlexWaibel HuaYu TanjaSchultz YuePan @@ -41,10 +41,10 @@ Amount of Information Presented in a Complex List: Effects on User Performance DawnDutton - MarilynWalker + MarilynWalker SelinaChu JamesHubbell - ShrikanthNarayanan + ShrikanthNarayanan H01-1004 dutton-etal-2001-amount @@ -60,17 +60,17 @@ Answering What-Is Questions by Virtual Annotation JohnPrager - DragomirRadev + DragomirRadev KrzysztofCzuba H01-1006 prager-etal-2001-answering Architecture and Design Considerations in <fixed-case>NESPOLE</fixed-case>!: a Speech Translation System for <fixed-case>E</fixed-case>-commerce Applications - AlonLavie + AlonLavie ChadLangley - AlexWaibel - FabioPianesi + AlexWaibel + FabioPianesi GianniLazzari PaoloColetti LoredanaTaddei @@ -80,7 +80,7 @@ Assigning Belief Scores to Names in Queries - ChristopherDozier + ChristopherDozier H01-1008 dozier-2001-assigning @@ -88,13 +88,13 @@ Automatic Pattern Acquisition for <fixed-case>J</fixed-case>apanese Information Extraction KiyoshiSudo SatoshiSekine - RalphGrishman + RalphGrishman H01-1009 sudo-etal-2001-automatic Automatic Predicate Argument Analysis of the <fixed-case>P</fixed-case>enn <fixed-case>T</fixed-case>ree<fixed-case>B</fixed-case>ank - MarthaPalmer + MarthaPalmer JosephRosenzweig ScottCotton H01-1010 @@ -103,28 +103,28 @@ Automatic Title Generation for Spoken Broadcast News RongJin - Alexander G.Hauptmann + Alexander G.Hauptmann H01-1011 jin-hauptmann-2001-automatic A Conversational Interface for Online Shopping - JoyceChai + JoyceChai VeronikaHorvath - NandaKambhatla - NicolasNicolov - MargoStys-Budzikowska + NandaKambhatla + NicolasNicolov + MargoStys-Budzikowska H01-1012 chai-etal-2001-conversational Conversational Sales Assistant for Online Shopping - MargoBudzikowska - JoyceChai + MargoBudzikowska + JoyceChai SunilGovindappa VeronikaHorvath - NandaKambhatla - NicolasNicolov + NandaKambhatla + NicolasNicolov WlodekZadrozny H01-1013 budzikowska-etal-2001-conversational @@ -132,20 +132,20 @@ Converting Dependency Structures to Phrase Structures FeiXia - MarthaPalmer + MarthaPalmer H01-1014 xia-palmer-2001-converting <fixed-case>DATE</fixed-case>: A Dialogue Act Tagging Scheme for Evaluation of Spoken Dialogue Systems - MarilynWalker - RebeccaPassonneau + MarilynWalker + RebeccaPassonneau H01-1015 walker-passonneau-2001-date Development of the <fixed-case>HRL</fixed-case> Route Navigation Dialogue System - RobertBelvin + RobertBelvin RonBurns CherylHein H01-1016 @@ -153,21 +153,21 @@ Dialogue Interaction with the <fixed-case>DARPA</fixed-case> Communicator Infrastructure: The Development of Useful Software - SamuelBayer - ChristineDoran + SamuelBayer + ChristineDoran BryanGeorge H01-1017 bayer-etal-2001-dialogue Domain Portability in Speech-to-Speech Translation - AlonLavie - LoriLevin + AlonLavie + LoriLevin TanjaSchultz ChadLangley BenjaminHan AliciaTribble - DonnaGates + DonnaGates DorcasWallace KayPeterson H01-1018 @@ -193,40 +193,40 @@ Evaluating Question-Answering Techniques in <fixed-case>C</fixed-case>hinese XiaoyanLi - W. BruceCroft + W. BruceCroft H01-1021 li-croft-2001-evaluating An Evaluation Corpus For Temporal Summarization - VikashKhandelwal + VikashKhandelwal RahulGupta - JamesAllan + JamesAllan H01-1022 khandelwal-etal-2001-evaluation Evaluation Results for the Talk’n’Travel System - DavidStallard + DavidStallard H01-1023 stallard-2001-evaluation Experiments in Multi-Modal Automatic Content Extraction - LanceRamshaw + LanceRamshaw ElizabethBoschee SergeyBratus ScottMiller RebeccaStone - RalphWeischedel + RalphWeischedel AlexZamanian H01-1024 ramshaw-etal-2001-experiments Exploring Speech-Enabled Dialogue with the Galaxy Communicator Infrastructure - SamuelBayer - ChristineDoran + SamuelBayer + ChristineDoran BryanGeorge H01-1025 bayer-etal-2001-exploring @@ -235,7 +235,7 @@ Facilitating Treebank Annotation Using a Statistical Parser Fu-DongChiou DavidChiang - MarthaPalmer + MarthaPalmer H01-1026 chiou-etal-2001-facilitating @@ -243,8 +243,8 @@ <fixed-case>F</fixed-case>act<fixed-case>B</fixed-case>rowser Demonstration ScottMiller SergeyBratus - LanceRamshaw - RalphWeischedel + LanceRamshaw + RalphWeischedel AlexZamanian H01-1027 miller-etal-2001-factbrowser @@ -252,16 +252,16 @@ Finding Errors Automatically in Semantically Tagged Dialogues JohnAberdeen - ChristineDoran - LaurieDamianos - SamuelBayer - LynetteHirschman + ChristineDoran + LaurieDamianos + SamuelBayer + LynetteHirschman H01-1028 aberdeen-etal-2001-finding Fine-Grained Hidden <fixed-case>M</fixed-case>arkov Modeling for Broadcast-News Story Segmentation - WarrenGreiff + WarrenGreiff AlexMorgan RandallFish MarcRichards @@ -281,29 +281,29 @@ InderjeetMani GeorgeWilson LisaFerro - BethSundheim + BethSundheim H01-1031 mani-etal-2001-guidelines Hypothesis Selection and Resolution in the Mercury Flight Reservation System - StephanieSeneff - JosephPolifroni + StephanieSeneff + JosephPolifroni H01-1032 seneff-polifroni-2001-hypothesis Improved Cross-Language Retrieval using Backoff Translation PhilipResnik - DouglasOard - GinaLevow + DouglasOard + GinaLevow H01-1033 resnik-etal-2001-improved Improving Information Extraction by Modeling Errors in Speech Recognizer Output - David D.Palmer - MariOstendorf + David D.Palmer + MariOstendorf H01-1034 palmer-ostendorf-2001-improving @@ -332,44 +332,44 @@ Integrated Feasibility Experiment for Bio-Security: <fixed-case>IFE</fixed-case>-Bio, A <fixed-case>TIDES</fixed-case> Demonstration - LynetteHirschman - KrisConcepcion - LaurieDamianos - DavidDay + LynetteHirschman + KrisConcepcion + LaurieDamianos + DavidDay JohnDelmore LisaFerro JohnGriffith - JohnHenderson + JohnHenderson JeffKurtz InderjeetMani ScottMardis TomMcEntee - KeithMiller + KeithMiller BeverlyNunam JayPonte - FlorenceReeder - BenWellner + FlorenceReeder + BenWellner GeorgeWilson - AlexYeh + AlexYeh H01-1038 hirschman-etal-2001-integrated Integrated Information Management: An Interactive, Extensible Architecture for Information Retrieval - EricNyberg - HalDaume + EricNyberg + HalDaume H01-1039 nyberg-daume-2001-integrated Intelligent Access to Text: Integrating Information Extraction Technology into Text Browsers - RobertGaizauskas + RobertGaizauskas PatrickHerring MichaelOakes MichellineBeaulieu PeterWillett HeleneFowkes - AnnaJonsson + AnnaJonsson H01-1040 gaizauskas-etal-2001-intelligent @@ -377,14 +377,14 @@ Interlingua-Based Broad-Coverage <fixed-case>K</fixed-case>orean-to-<fixed-case>E</fixed-case>nglish Translation in <fixed-case>CCLINC</fixed-case> Young-SukLee Wu SokYi - StephanieSeneff - Clifford J.Weinstein + StephanieSeneff + Clifford J.Weinstein H01-1041 lee-etal-2001-interlingua Is That Your Final Answer? - FlorenceReeder + FlorenceReeder H01-1042 reeder-2001-final @@ -397,7 +397,7 @@ <fixed-case>J</fixed-case>apanese Text Input System With Digits - KumikoTanaka-Ishii + KumikoTanaka-Ishii YusukeInutsuka MasatoTakeichi H01-1044 @@ -429,7 +429,7 @@ MartinWestphal MikeSchneider TanjaSchultz - AlexWaibel + AlexWaibel H01-1048 fugen-etal-2001-lingwear @@ -442,15 +442,15 @@ <fixed-case>M</fixed-case>andarin-<fixed-case>E</fixed-case>nglish Information: Investigating Translingual Speech Retrieval - HelenMeng + HelenMeng BerlinChen - SanjeevKhudanpur - Gina-AnneLevow - Wai-KitLo - DouglasOard + SanjeevKhudanpur + Gina-AnneLevow + Wai-KitLo + DouglasOard PatrickShone KarenTang - Hsin-MinWang + Hsin-MinWang JianqiangWang H01-1050 meng-etal-2001-mandarin @@ -464,8 +464,8 @@ DavidGelbart AdamJanin ThiloPfau - ElizabethShriberg - AndreasStolcke + ElizabethShriberg + AndreasStolcke H01-1051 morgan-etal-2001-meeting @@ -480,35 +480,35 @@ Monitoring the News: a <fixed-case>TDT</fixed-case> demonstration system DavidFrey RahulGupta - VikasKhandelwal + VikasKhandelwal VictorLavrenko AntonLeuski - JamesAllan + JamesAllan H01-1053 frey-etal-2001-monitoring
Multidocument Summarization via Information Extraction - MichaelWhite + MichaelWhite TanyaKorelsky - ClaireCardie + ClaireCardie VincentNg - DavidPierce + DavidPierce KiriWagstaff H01-1054 white-etal-2001-multidocument Natural Language Generation in Dialog Systems - OwenRambow - SrinivasBangalore - MarilynWalker + OwenRambow + SrinivasBangalore + MarilynWalker H01-1055 rambow-etal-2001-natural <fixed-case>N</fixed-case>ews<fixed-case>I</fixed-case>n<fixed-case>E</fixed-case>ssence: A System For Domain-Independent, Real-Time News Clustering and Multi-Document Summarization - Dragomir R.Radev + Dragomir R.Radev SashaBlair-Goldensohn ZhuZhang Revathi SundaraRaghavan @@ -524,24 +524,24 @@ On Combining Language Models: Oracle Approach - KadriHacioglu - WayneWard + KadriHacioglu + WayneWard H01-1058 hacioglu-ward-2001-combining Portability Issues for Speech Recognition Technologies - LoriLamel - FabriceLefevre + LoriLamel + FabriceLefevre Jean-LucGauvain - GillesAdda + GillesAdda H01-1059 lamel-etal-2001-portability Rapidly Retargetable Interactive Translingual Retrieval - Gina-AnneLevow - Douglas W.Oard + Gina-AnneLevow + Douglas W.Oard PhilipResnik H01-1060 levow-etal-2001-rapidly @@ -570,20 +570,20 @@ Scalability and Portability of a Belief Network-based Dialog Model for Different Application Domains CarmenWai - Helen M.Meng - RobertoPieraccini + Helen M.Meng + RobertoPieraccini H01-1063 wai-etal-2001-scalability <fixed-case>SCANM</fixed-case>ail: Audio Navigation in the Voicemail Domain MichielBacchiani - JuliaHirschberg + JuliaHirschberg AaronRosenberg - SteveWhittaker - DonaldHindle + SteveWhittaker + DonaldHindle PhilIsenhour - MarkJones + MarkJones LitzaStark GaryZamchick H01-1064 @@ -592,14 +592,14 @@ Sentence Ordering in Multidocument Summarization ReginaBarzilay - NoemieElhadad - Kathleen R.McKeown + NoemieElhadad + Kathleen R.McKeown H01-1065 barzilay-etal-2001-sentence A Server for Real-Time Event Tracking in News - Ralf D.Brown + Ralf D.Brown H01-1066 brown-2001-server @@ -619,10 +619,10 @@ Toward Semantics-Based Answer Pinpointing - EduardHovy + EduardHovy LaurieGerber UlfHermjakob - Chin-YewLin + Chin-YewLin DeepakRavichandran H01-1069 hovy-etal-2001-toward @@ -639,15 +639,15 @@ Towards Automatic Sign Translation JieYang JiangGao - YingZhang - AlexWaibel + YingZhang + AlexWaibel H01-1071 yang-etal-2001-towards <fixed-case>T</fixed-case>ü<fixed-case>SBL</fixed-case>: A Similarity-Based Chunk Parser for Robust Syntactic Processing - SandraKübler - Erhard W.Hinrichs + SandraKübler + Erhard W.Hinrichs H01-1072 kubler-hinrichs-2001-tusbl @@ -667,13 +667,13 @@ The Use of Dynamic Segment Scoring for Language-Independent Question Answering DanielPack - CliffordWeinstein + CliffordWeinstein H01-1074 pack-weinstein-2001-use Using Speech and Language Technology to Coach Reading - PattiPrice + PattiPrice LucJulia H01-1075 price-julia-2001-using diff --git a/data/xml/H05.xml b/data/xml/H05.xml index 0409e337cd..bcc4f10e8b 100644 --- a/data/xml/H05.xml +++ b/data/xml/H05.xml @@ -4,7 +4,7 @@ Proceedings of Human Language Technology Conference and Conference on Empirical Methods in Natural Language Processing H05-1 - RaymondMooney + RaymondMooney ChrisBrew Lee-FengChien KatrinKirchhoff @@ -22,18 +22,18 @@ Improving <fixed-case>LSA</fixed-case>-based Summarization with Anaphora Resolution JosefSteinberger - MijailKabadjov - MassimoPoesio - OliviaSanchez-Graillet + MijailKabadjov + MassimoPoesio + OliviaSanchez-Graillet 1–8 H05-1001 steinberger-etal-2005-improving Data-driven Approaches for Information Structure Identification - OanaPostolache - IvanaKruijff-Korbayová - Geert-JanKruijff + OanaPostolache + IvanaKruijff-Korbayová + Geert-JanKruijff 9–16 H05-1002 postolache-etal-2005-data @@ -42,14 +42,14 @@ Using Semantic Relations to Refine Coreference Decisions HengJi DavidWestbrook - RalphGrishman + RalphGrishman 17–24 H05-1003 ji-etal-2005-using On Coreference Resolution Performance Metrics - XiaoqiangLuo + XiaoqiangLuo 25–32 H05-1004 luo-2005-coreference @@ -57,7 +57,7 @@ Improving Multilingual Summarization: Using Redundancy in the Input to Correct <fixed-case>MT</fixed-case> errors AdvaithSiddharthan - KathleenMcKeown + KathleenMcKeown 33–40 H05-1005 siddharthan-mckeown-2005-improving @@ -72,7 +72,7 @@ Semantic Similarity for Detecting Recognition Errors in Automatic Speech Transcripts - DianaInkpen + DianaInkpen AlainDésilets 49–56 H05-1007 @@ -88,8 +88,8 @@ <fixed-case>N</fixed-case>eur<fixed-case>A</fixed-case>lign: Combining Word Alignments Using Neural Networks - Necip FazilAyan - Bonnie J.Dorr + Necip FazilAyan + Bonnie J.Dorr ChristofMonz 65–72 H05-1009 @@ -106,7 +106,7 @@ A Discriminative Framework for Bilingual Word Alignment - Robert C.Moore + Robert C.Moore 81–88 H05-1011 moore-2005-discriminative @@ -114,14 +114,14 @@ A Maximum Entropy Word Aligner for <fixed-case>A</fixed-case>rabic-<fixed-case>E</fixed-case>nglish Machine Translation AbrahamIttycheriah - SalimRoukos + SalimRoukos 89–96 H05-1012 ittycheriah-roukos-2005-maximum A Large-Scale Exploration of Effective Global Features for a Joint Entity Detection and Tracking Model - HalDaumé III + HalDaumé III DanielMarcu 97–104 H05-1013 @@ -146,14 +146,14 @@ Using Names and Topics for New Event Detection GiridharKumaran - JamesAllan + JamesAllan 121–128 H05-1016 kumaran-allan-2005-using Investigating Unsupervised Learning for Text Categorization Bootstrapping - AlfioGliozzo + AlfioGliozzo CarloStrapparava IdoDagan 129–136 @@ -162,7 +162,7 @@ Speeding up Training with Tree Kernels for Node Relation Labeling - Jun’ichiKazama + Jun’ichiKazama KentaroTorisawa 137–144 H05-1018 @@ -171,7 +171,7 @@ Kernel-based Approach for Automatic Evaluation of Natural Language Generation Technologies: Application to Automatic Summarization TsutomuHirao - ManabuOkumura + ManabuOkumura HidekiIsozaki 145–152 H05-1019 @@ -212,8 +212,8 @@ Alignment Link Projection Using Transformation-Based Learning - Necip FazilAyan - Bonnie J.Dorr + Necip FazilAyan + Bonnie J.Dorr ChristofMonz 185–192 H05-1024 @@ -248,7 +248,7 @@ A Salience Driven Approach to Robust Input Interpretation in Multimodal Conversational Systems - Joyce Y.Chai + Joyce Y.Chai ShaolinQu 217–224 H05-1028 @@ -256,19 +256,19 @@ Error Handling in the <fixed-case>R</fixed-case>aven<fixed-case>C</fixed-case>law Dialog Management Architecture - DanBohus - AlexanderRudnicky + DanBohus + AlexanderRudnicky 225–232 H05-1029 bohus-rudnicky-2005-error Effective Use of Prosody in Parsing Conversational Speech - Jeremy G.Kahn + Jeremy G.Kahn MatthewLease EugeneCharniak MarkJohnson - MariOstendorf + MariOstendorf 233–240 H05-1030 kahn-etal-2005-effective @@ -277,7 +277,7 @@ Automatically Learning Cognitive Status for Multi-Document Summarization of Newswire AniNenkova AdvaithSiddharthan - KathleenMcKeown + KathleenMcKeown 241–248 H05-1031 nenkova-etal-2005-automatically @@ -308,16 +308,16 @@ <fixed-case>PP</fixed-case>-attachment Disambiguation using Large Context MarianOlteanu - DanMoldovan + DanMoldovan 273–280 H05-1035 olteanu-moldovan-2005-pp Compiling Comp Ling: Weighted Dynamic Programming and the <fixed-case>D</fixed-case>yna Language - JasonEisner + JasonEisner EricGoldlust - Noah A.Smith + Noah A.Smith 281–290 H05-1036 eisner-etal-2005-compiling @@ -332,7 +332,7 @@ Using Question Series to Evaluate Question Answering System Effectiveness - EllenVoorhees + EllenVoorhees 299–306 H05-1038 voorhees-2005-using @@ -340,7 +340,7 @@ Combining Deep Linguistics Analysis and Surface Pattern Learning: A Hybrid Approach to <fixed-case>C</fixed-case>hinese Definitional Question Answering FuchunPeng - RalphWeischedel + RalphWeischedel AnaLicuanan JinxiXu 307–314 @@ -350,7 +350,7 @@ Enhanced Answer Type Inference from Questions using Sequential Models VijayKrishnan - SujathaDas + SujathaDas SoumenChakrabarti 315–322 H05-1040 @@ -359,7 +359,7 @@ A Practically Unsupervised Learning Method to Identify Single-Snippet Answers to Definition Questions on the Web IonAndroutsopoulos - DimitriosGalanis + DimitriosGalanis 323–330 H05-1041 androutsopoulos-galanis-2005-practically @@ -383,7 +383,7 @@ Recognizing Contextual Polarity in Phrase-Level Sentiment Analysis TheresaWilson - JanyceWiebe + JanyceWiebe PaulHoffmann 347–354 H05-1044 @@ -392,8 +392,8 @@ Identifying Sources of Opinions with Conditional Random Fields and Extraction Patterns YejinChoi - ClaireCardie - EllenRiloff + ClaireCardie + EllenRiloff SiddharthPatwardhan 355–362 H05-1045 @@ -410,14 +410,14 @@ A Semantic Approach to Recognizing Textual Entailment MartaTatu - DanMoldovan + DanMoldovan 371–378 H05-1047 tatu-moldovan-2005-semantic Detection of Entity Mentions Occuring in <fixed-case>E</fixed-case>nglish and <fixed-case>C</fixed-case>hinese Text - KadriHacioglu + KadriHacioglu BenjaminDouglas YingChen 379–386 @@ -427,15 +427,15 @@ Robust Textual Inference via Graph Matching AriaHaghighi - AndrewNg - ChristopherManning + AndrewNg + ChristopherManning 387–394 H05-1049 haghighi-etal-2005-robust Bootstrapping Without the Boot - JasonEisner + JasonEisner DamianosKarakos 395–402 H05-1050 @@ -450,7 +450,7 @@ Unsupervised Large-Vocabulary Word Sense Disambiguation with Graph-based Algorithms for Sequence Data Labeling - RadaMihalcea + RadaMihalcea 411–418 H05-1052 mihalcea-2005-unsupervised @@ -458,8 +458,8 @@ Domain-Specific Sense Distributions and Predominant Sense Acquisition RobKoeling - DianaMcCarthy - JohnCarroll + DianaMcCarthy + JohnCarroll 419–426 H05-1053 koeling-etal-2005-domain @@ -484,8 +484,8 @@ Extracting Personal Names from Email: Applying Named Entity Recognition to Informal Text EinatMinkov - Richard C.Wang - William W.Cohen + Richard C.Wang + William W.Cohen 443–450 H05-1056 minkov-etal-2005-extracting @@ -493,7 +493,7 @@ Matching Inconsistently Spelled Names in Automatic Speech Recognizer Output for Information Retrieval HemaRaghavan - JamesAllan + JamesAllan 451–458 H05-1057 raghavan-allan-2005-matching @@ -501,7 +501,7 @@ Part-of-Speech Tagging using Virtual Evidence and Negative Training Sheila M.Reynolds - Jeff A.Bilmes + Jeff A.Bilmes 459–466 H05-1058 reynolds-bilmes-2005-part @@ -509,16 +509,16 @@ Bidirectional Inference with the Easiest-First Strategy for Tagging Sequence Data YoshimasaTsuruoka - Jun’ichiTsujii + Jun’ichiTsujii 467–474 H05-1059 tsuruoka-tsujii-2005-bidirectional Context-Based Morphological Disambiguation with Random Fields - Noah A.Smith - David A.Smith - Roy W.Tromble + Noah A.Smith + David A.Smith + Roy W.Tromble 475–482 H05-1060 smith-etal-2005-context @@ -526,17 +526,17 @@ Mining Key Phrase Translations from Web Corpora FeiHuang - YingZhang - StephanVogel + YingZhang + StephanVogel 483–490 H05-1061 huang-etal-2005-mining Robust Named Entity Extraction from Large Spoken Archives - BenoîtFavre - FrédéricBéchet - PascalNocéra + BenoîtFavre + FrédéricBéchet + PascalNocéra 491–498 H05-1062 favre-etal-2005-robust @@ -553,7 +553,7 @@ Hidden-Variable Models for Discriminative Reranking TerryKoo - MichaelCollins + MichaelCollins 507–514 H05-1064 koo-collins-2005-hidden @@ -570,14 +570,14 @@ RyanMcDonald FernandoPereira KirilRibarov - JanHajič + JanHajič 523–530 H05-1066 mcdonald-etal-2005-non Making Computers Laugh: Investigations in Automatic Humor Recognition - RadaMihalcea + RadaMihalcea CarloStrapparava 531–538 H05-1067 @@ -586,7 +586,7 @@ Optimizing to Arbitrary <fixed-case>NLP</fixed-case> Metrics using Ensemble Selection ArtMunson - ClaireCardie + ClaireCardie RichCaruana 539–546 H05-1068 @@ -595,7 +595,7 @@ Word Sense Disambiguation Using Sense Examples Automatically Acquired from a Second Language XinglongWang - JohnCarroll + JohnCarroll 547–554 H05-1069 wang-carroll-2005-word @@ -609,9 +609,9 @@ <fixed-case>K</fixed-case>now<fixed-case>I</fixed-case>t<fixed-case>N</fixed-case>ow: Fast, Scalable Information Extraction from the Web - Michael J.Cafarella + Michael J.Cafarella DougDowney - StephenSoderland + StephenSoderland OrenEtzioni 563–570 H05-1071 @@ -619,7 +619,7 @@ A Cost-Benefit Analysis of Hybrid Phone-Manner Representations for <fixed-case>ASR</fixed-case> - EricFosler-Lussier + EricFosler-Lussier C. AntonRytting 571–578 H05-1072 @@ -627,9 +627,9 @@ Emotions from Text: Machine Learning for Text-based Emotion Prediction - Cecilia OvesdotterAlm + Cecilia OvesdotterAlm DanRoth - RichardSproat + RichardSproat 579–586 H05-1073 alm-etal-2005-emotions @@ -645,7 +645,7 @@ Handling Biographical Questions with Implicature DonghuiFeng - EduardHovy + EduardHovy 596–603 H05-1075 feng-hovy-2005-handling @@ -653,16 +653,16 @@ The Use of Metadata, Web-derived Answer Patterns and Passage Context to Improve Reading Comprehension Performance YongpingDu - HelenMeng - XuanjingHuang - LideWu + HelenMeng + XuanjingHuang + LideWu 604–611 H05-1076 du-etal-2005-use Identifying Semantic Relations and Functional Properties of Human Verb Associations - SabineSchulte im Walde + SabineSchulte im Walde AlissaMelinger 612–619 H05-1077 @@ -671,7 +671,7 @@ Accurate Function Parsing PaolaMerlo - GabrieleMusillo + GabrieleMusillo 620–627 H05-1078 merlo-musillo-2005-accurate @@ -694,29 +694,29 @@ A Robust Combination Strategy for Semantic Role Labeling - LluísMàrquez + LluísMàrquez MihaiSurdeanu - PereComas - JordiTurmo + PereComas + JordiTurmo 644–651 H05-1081 marquez-etal-2005-robust A Methodology for Extrinsically Evaluating Information Extraction Performance - MichaelCrystal + MichaelCrystal AlexBaron KatherineGodfrey LinneaMicciulla YvetteTenney - RalphWeischedel + RalphWeischedel 652–659 H05-1082 crystal-etal-2005-methodology Multi-Lingual Coreference Resolution With Syntactic Features - XiaoqiangLuo + XiaoqiangLuo ImedZitouni 660–667 H05-1083 @@ -725,14 +725,14 @@ Analyzing Models for Semantic Role Assignment using Confusability KatrinErk - SebastianPadó + SebastianPadó 668–675 H05-1084 erk-pado-2005-analyzing Improving Statistical <fixed-case>MT</fixed-case> through Morphological Analysis - SharonGoldwater + SharonGoldwater DavidMcClosky 676–683 H05-1085 @@ -741,7 +741,7 @@ A Translation Model for Sentence Retrieval VanessaMurdock - W. BruceCroft + W. BruceCroft 684–691 H05-1086 murdock-croft-2005-translation @@ -755,10 +755,10 @@ <fixed-case>E</fixed-case>vita: A Robust Event Recognizer For <fixed-case>QA</fixed-case> Systems - RoserSaurí + RoserSaurí RobertKnippen MarcVerhagen - JamesPustejovsky + JamesPustejovsky 700–707 H05-1088 sauri-etal-2005-evita @@ -766,7 +766,7 @@ Using Sketches to Estimate Associations PingLi - Kenneth W.Church + Kenneth W.Church 708–715 H05-1089 li-church-2005-using @@ -774,14 +774,14 @@ Context and Learning in Novelty Detection BarrySchiffman - KathleenMcKeown + KathleenMcKeown 716–723 H05-1090 schiffman-mckeown-2005-context A Shortest Path Dependency Kernel for Relation Extraction - RazvanBunescu + RazvanBunescu RaymondMooney 724–731 H05-1091 @@ -790,16 +790,16 @@ Multi-way Relation Classification: Application to Protein-Protein Interactions BarbaraRosario - MartiHearst + MartiHearst 732–739 H05-1092 rosario-hearst-2005-multi <fixed-case>BLANC</fixed-case>: Learning Evaluation Metrics for <fixed-case>MT</fixed-case> - LucianLita + LucianLita MonicaRogati - AlonLavie + AlonLavie 740–747 H05-1093 lita-etal-2005-blanc @@ -818,10 +818,10 @@ NicolaCancedda BrunoCavestro MarcDymetman - EricGaussier - CyrilGoutte + EricGaussier + CyrilGoutte KenjiYamada - PhilippeLanglais + PhilippeLanglais ArneMauser 755–762 H05-1095 @@ -830,7 +830,7 @@ Word-Level Confidence Estimation for Machine Translation using Phrase-Based Translation Models NicolaUeffing - HermannNey + HermannNey 763–770 H05-1096 ueffing-ney-2005-word @@ -869,7 +869,7 @@ Morphology and Reranking for the Statistical Parsing of <fixed-case>S</fixed-case>panish BrookeCowan - MichaelCollins + MichaelCollins 795–802 H05-1100 cowan-collins-2005-morphology @@ -885,7 +885,7 @@ Incremental <fixed-case>LTAG</fixed-case> Parsing LibinShen - AravindJoshi + AravindJoshi 811–818 H05-1102 shen-joshi-2005-incremental @@ -910,8 +910,8 @@ Using the Web as an Implicit Training Set: Application to Structural Ambiguity Resolution - PreslavNakov - MartiHearst + PreslavNakov + MartiHearst 835–842 H05-1105 nakov-hearst-2005-using @@ -934,7 +934,7 @@ Cross-linguistic Projection of Role-Semantic Information - SebastianPadó + SebastianPadó MirellaLapata 859–866 H05-1108 @@ -957,7 +957,7 @@ Exploiting a Verb Lexicon in Automatic Semantic Role Labelling - RobertSwier + RobertSwier SuzanneStevenson 883–890 H05-1111 @@ -965,8 +965,8 @@ A Semantic Scattering Model for the Automatic Interpretation of Genitives - DanMoldovan - AdrianaBadulescu + DanMoldovan + AdrianaBadulescu 891–898 H05-1112 moldovan-badulescu-2005-semantic @@ -974,25 +974,25 @@ Measuring the Relative Compositionality of Verb-Noun (<fixed-case>V</fixed-case>-N) Collocations by Integrating Features SriramVenkatapathy - AravindJoshi + AravindJoshi 899–906 H05-1113 venkatapathy-joshi-2005-measuring A Semi-Supervised Feature Clustering Algorithm with Application to Word Sense Disambiguation - Zheng-YuNiu - Dong-HongJi - Chew LimTan + Zheng-YuNiu + Dong-HongJi + Chew LimTan 907–914 H05-1114 niu-etal-2005-semi Using Random Walks for Question-focused Sentence Retrieval - JahnaOtterbacher - GüneşErkan - DragomirRadev + JahnaOtterbacher + GüneşErkan + DragomirRadev 915–922 H05-1115 otterbacher-etal-2005-using @@ -1000,8 +1000,8 @@ Multi-Perspective Question Answering Using the <fixed-case>O</fixed-case>p<fixed-case>QA</fixed-case> Corpus VeselinStoyanov - ClaireCardie - JanyceWiebe + ClaireCardie + JanyceWiebe 923–930 H05-1116 stoyanov-etal-2005-multi @@ -1016,7 +1016,7 @@ Integrating Linguistic Knowledge in Passage Retrieval for Question Answering - JörgTiedemann + JörgTiedemann 939–946 H05-1118 tiedemann-2005-integrating @@ -1050,7 +1050,7 @@ An Orthonormal Basis for Topic Segmentation in Tutorial Dialogue - AndrewOlney + AndrewOlney ZhiqiangCai 971–978 H05-1122 @@ -1076,13 +1076,13 @@ The Vocal Joystick: A Voice-Based Human-Computer Interface for Individuals with Motor Impairments - Jeff A.Bilmes + Jeff A.Bilmes XiaoLi JonathanMalkin KelleyKilanski RichardWright KatrinKirchhoff - AmarSubramanya + AmarSubramanya SusumuHarada JamesLanday PatriciaDowden @@ -1102,7 +1102,7 @@ Learning Mixed Initiative Dialog Strategies By Using Reinforcement Learning On Both Conversants MichaelEnglish - PeterHeeman + PeterHeeman 1011–1018 H05-1127 english-heeman-2005-learning @@ -1112,7 +1112,7 @@ Proceedings of HLT/EMNLP 2005 Interactive Demonstrations H05-2 - DonnaByron + DonnaByron AnandVenkataraman DellZhang Association for Computational Linguistics @@ -1128,7 +1128,7 @@ Automatic Detection of Translation Errors: The State of the Art - GrahamRussell + GrahamRussell GeorgeFoster Ngoc TranNguyen 1 @@ -1149,8 +1149,8 @@ <fixed-case>C</fixed-case>lassummary: Introducing Discussion Summarization to Online Classrooms LiangZhou ErinShaw - Chin-YewLin - EduardHovy + Chin-YewLin + EduardHovy 4–5 H05-2003 zhou-etal-2005-classummary @@ -1160,7 +1160,7 @@ VasinPunyakanok DanRoth MarkSammons - Wen-tauYih + Wen-tauYih 6–7 H05-2004 punyakanok-etal-2005-demonstrating @@ -1177,7 +1177,7 @@ <fixed-case>N</fixed-case>oo<fixed-case>J</fixed-case>: a Linguistic Annotation System for Corpus Processing - MaxSilberztein + MaxSilberztein 10–11 H05-2006 silberztein-2005-nooj @@ -1192,7 +1192,7 @@ <fixed-case>P</fixed-case>rague Dependency Treebank as an Exercise Book of <fixed-case>C</fixed-case>zech - BarboraHladká + BarboraHladká OndřejKučera 14–15 H05-2008 @@ -1217,7 +1217,7 @@ <fixed-case>D</fixed-case>ialogue<fixed-case>V</fixed-case>iew: an Annotation Tool for Dialogue FanYang - Peter A.Heeman + Peter A.Heeman 20–21 H05-2011 yang-heeman-2005-dialogueview @@ -1240,7 +1240,7 @@ DaniloMirkovic BenBei HeatherPon-Barry - HarryBratt + HarryBratt HuaCheng HaukeSchmidt RohitMishra @@ -1258,13 +1258,13 @@ <fixed-case>J</fixed-case>apanese Speech Understanding using Grammar Specialization - MannyRayner + MannyRayner NikosChatzichrisafis - PierretteBouillon + PierretteBouillon YukieNakao HitoshiIsahara KyokoKanzaki - Beth AnnHockey + Beth AnnHockey MarianneSantaholma MarianneStarlander 26–27 @@ -1273,9 +1273,9 @@ The <fixed-case>M</fixed-case><fixed-case>I</fixed-case><fixed-case>T</fixed-case> Spoken Lecture Processing Project - James R.Glass - Timothy J.Hazen - D. ScottCyphers + James R.Glass + Timothy J.Hazen + D. ScottCyphers KenSchutte AlexPark 28–29 @@ -1306,10 +1306,10 @@ PaulHoffmann SwapnaSomasundaran JasonKessler - JanyceWiebe + JanyceWiebe YejinChoi - ClaireCardie - EllenRiloff + ClaireCardie + EllenRiloff SiddharthPatwardhan 34–35 H05-2018 @@ -1319,7 +1319,7 @@ <fixed-case>P</fixed-case><fixed-case>O</fixed-case><fixed-case>S</fixed-case><fixed-case>B</fixed-case><fixed-case>I</fixed-case><fixed-case>O</fixed-case><fixed-case>T</fixed-case><fixed-case>M</fixed-case>/<fixed-case>W</fixed-case>: A Development Workbench for Machine Learning Oriented Biomedical Text Mining System KyungdukKim YuSong - Gary GeunbaeLee + Gary GeunbaeLee 36–37 H05-2019 kim-etal-2005-posbiotm diff --git a/data/xml/H86.xml b/data/xml/H86.xml index 7ca746ff88..3993166a64 100644 --- a/data/xml/H86.xml +++ b/data/xml/H86.xml @@ -12,109 +12,109 @@ Research and Development in Natural Language Processing at <fixed-case>BBN</fixed-case> <fixed-case>L</fixed-case>aboratories in the <fixed-case>S</fixed-case>trategic <fixed-case>C</fixed-case>omputing <fixed-case>P</fixed-case>rogram - RalphWeischedel - RemkoScha + RalphWeischedel + RemkoScha EdwardWalker - DamarisAyuso - AndrewHaas - ErhardHinrichs - RobertIngria - LanceRamshaw + DamarisAyuso + AndrewHaas + ErhardHinrichs + RobertIngria + LanceRamshaw VardaShaked - DavidStallard + DavidStallard H86-1001 weischedel-etal-1986-research <fixed-case>PROTEUS</fixed-case> and <fixed-case>PUNDIT</fixed-case>: <fixed-case>RESEARCH</fixed-case> <fixed-case>IN</fixed-case> <fixed-case>TEXT</fixed-case> <fixed-case>UNDERSTANDING</fixed-case> at the <fixed-case>D</fixed-case>epartment of <fixed-case>C</fixed-case>omputer <fixed-case>S</fixed-case>cience, <fixed-case>N</fixed-case>ew <fixed-case>Y</fixed-case>ork <fixed-case>U</fixed-case>niversity and <fixed-case>S</fixed-case>ystem <fixed-case>D</fixed-case>evelopment <fixed-case>C</fixed-case>orporation -- A <fixed-case>B</fixed-case>urroughs Company - RalphGrishman - LynetteHirschman + RalphGrishman + LynetteHirschman H86-1002 grishman-hirschman-1986-proteus Overview of the <fixed-case>TACITUS</fixed-case> Project - Jerry R.Hobbs + Jerry R.Hobbs H86-1003 hobbs-1986-overview The <fixed-case>C</fixed-case>ounselor Project at the <fixed-case>U</fixed-case>niversity of <fixed-case>M</fixed-case>assachusetts - David D.McDonald + David D.McDonald James D.Pustejowky H86-1004 mcdonald-pustejowky-1986-counselor Research in Natural Language Processing - AravindJoshi - TimFinin - DaleMiller + AravindJoshi + TimFinin + DaleMiller LokendraShastri - BonnieWebber + BonnieWebber H86-1005 joshi-etal-1986-research Text Generation for Strategic Computing - WilliamMann - NormanSondheimer + WilliamMann + NormanSondheimer H86-1006 mann-sondheimer-1986-text Out of the Laboratory: A Case Study with the <fixed-case>IRUS</fixed-case> Natural Language Interface - Ralph M.Weischedel + Ralph M.Weischedel EdwardWalker - DamarisAyuso + DamarisAyuso Josde Bruin KimberleKoile - LanceRamshaw + LanceRamshaw VardaShaked H86-1007 weischedel-etal-1986-laboratory A Terminological Simplification Transformation for Natural Language Question-Answering Systems - David G.Stallard + David G.Stallard H86-1008 stallard-1986-terminological Model-based Analysis of Messages about Equipment - RalphGrishman + RalphGrishman TomaszKsiezyk - Ngo ThanhNhan + Ngo ThanhNhan H86-1009 grishman-etal-1986-model An Equipment Model and Its Role in the Interpretation of Nominal Compounds TomaszKsiezyk - RalphGrishman + RalphGrishman H86-1010 ksiezyk-grishman-1986-equipment Recovering Implicit Information - Martha S.Palmer - Deborah A.Dahl + Martha S.Palmer + Deborah A.Dahl Rebecca J.Schiffman - LynetteHirschman - MarciaLinebarger - JohnDowding + LynetteHirschman + MarciaLinebarger + JohnDowding H86-1011 palmer-etal-1986-recovering Focusing and Reference Resolution in <fixed-case>PUNDIT</fixed-case> - Deborah A.Dahl + Deborah A.Dahl H86-1012 dahl-1986-focusing Commonsense Metaphysics and Lexical Semantics - Jerry R.Hobbs + Jerry R.Hobbs WilliamCroft ToddDavies DouglasEdwards @@ -124,35 +124,35 @@ Multi-Level Description Directed Generation - David D.McDonald + David D.McDonald H86-1014 mcdonald-1986-multi <fixed-case>TAG</fixed-case>’s as a Grammatical Formalism for Generation - David D.McDonald - James D.Pustejovsky + David D.McDonald + James D.Pustejovsky H86-1015 mcdonald-pustejovsky-1986-tags Hypotheticals as Heuristic Device Edwina L.Rissland - Kevin D.Ashley + Kevin D.Ashley H86-1016 rissland-ashley-1986-hypotheticals Living Up to Expectations: Computing Expert Responses - AravindJoshi - BonnieWebber - Ralph M.Weischedel + AravindJoshi + BonnieWebber + Ralph M.Weischedel H86-1017 joshi-etal-1986-living The Role of Perspective in Responding to Property Misconceptions - Kathleen F.McCoy + Kathleen F.McCoy H86-1018 mccoy-1986-role @@ -164,21 +164,21 @@ Some Computational Properties of <fixed-case>T</fixed-case>ree <fixed-case>A</fixed-case>djoining <fixed-case>G</fixed-case>rammars - K.Vijay-Shankar - Aravind K.Joshi + K.Vijay-Shankar + Aravind K.Joshi H86-1020 vijay-shankar-joshi-1986-computational <fixed-case>GUMS</fixed-case>₁ : A General User Modeling System - TimFinin + TimFinin DavidDrager H86-1021 finin-drager-1986-gums1 A Logical-Form and Knowledge-Base Design for Natural Language Generation - Norman K.Sondheimer + Norman K.Sondheimer BernhardNebel H86-1022 sondheimer-nebel-1986-logical @@ -191,7 +191,7 @@ Assertions from Discourse Structure - William C.Mann + William C.Mann Sandra A.Thompson H86-1024 mann-thompson-1986-assertions diff --git a/data/xml/H89.xml b/data/xml/H89.xml index f1ec812317..861bfeb302 100644 --- a/data/xml/H89.xml +++ b/data/xml/H89.xml @@ -12,7 +12,7 @@ Overview of the <fixed-case>DARPA</fixed-case> Speech and Natural Language Workshop - LynetteHirshman + LynetteHirshman H89-1001 hirshman-1989-overview @@ -42,20 +42,20 @@ Research in Continuous Speech Recognition - JohnMakhoul - RichardSchwartz + JohnMakhoul + RichardSchwartz H89-1006 makhoul-schwartz-1989-research Integrating Speech and Natural Language - SalimRoukos + SalimRoukos H89-1007 roukos-1989-integrating Rapid Porting of the <fixed-case>P</fixed-case>arlance™ Natural Language Interface - MadeleineBates + MadeleineBates H89-1008 bates-1989-rapid @@ -67,44 +67,44 @@ The <fixed-case>BBN</fixed-case> <fixed-case>BYBLOS</fixed-case> Continuous Speech Recognition System - RichardSchwartz - ChrisBarry - Yen-LuChow + RichardSchwartz + ChrisBarry + Yen-LuChow AlanDeft Ming-WheiFeng - OwenKimball - FrancisKubala - JohnMakhoul + OwenKimball + FrancisKubala + JohnMakhoul JeffreyVandegrift H89-1010 schwartz-etal-1989-bbn Speaker Adaptation from Limited Training in the <fixed-case>BBN</fixed-case> <fixed-case>BYBLOS</fixed-case> Speech Recognition System - FrancisKubala + FrancisKubala Ming-WheiFeng - JohnMakhoul - RichardSchwartz + JohnMakhoul + RichardSchwartz H89-1011 kubala-etal-1989-speaker The <fixed-case>BBN</fixed-case> Spoken Language System - SeanBoisen - Yen-LuChow - AndrewHaas - RobertIngria - SalimRoukos - DavidStallard + SeanBoisen + Yen-LuChow + AndrewHaas + RobertIngria + SalimRoukos + DavidStallard H89-1012 boisen-etal-1989-bbn Portability in the <fixed-case>J</fixed-case>anus Natural Language Interface - Ralph M.Weischedel - Robert J.Bobrow - DamarisAyuso - LanceRamshaw + Ralph M.Weischedel + Robert J.Bobrow + DamarisAyuso + LanceRamshaw H89-1013 weischedel-etal-1989-portability @@ -116,34 +116,34 @@ The design of voice-driven interfaces - Alexander I.Rudnicky + Alexander I.Rudnicky H89-1015 rudnicky-1989-design Recent Progress in the <fixed-case>S</fixed-case>phinx Speech Recognition System - Kai-FuLee - Hsiao-WuenHon - Mei-YuhHwang + Kai-FuLee + Hsiao-WuenHon + Mei-YuhHwang H89-1016 lee-etal-1989-recent The <fixed-case>MINDS</fixed-case> System: Using Context and Dialog to Enhance Speech Recognition - Sheryl R.Young + Sheryl R.Young H89-1017 young-1989-minds Understanding Spontaneous Speech - WayneWard + WayneWard H89-1018 ward-1989-understanding <fixed-case>D</fixed-case>ragon - Janet M.Baker - James K.Baker + Janet M.Baker + James K.Baker H89-1019 baker-baker-1989-dragon @@ -155,20 +155,20 @@ The <fixed-case>P</fixed-case>enman Language Generation Project - William C.Mann - Eduard H.Hovy + William C.Mann + Eduard H.Hovy H89-1021 mann-hovy-1989-penman A Flexible Interface for Linking Applications to <fixed-case>P</fixed-case>enman’s Sentence Generator - Robert T.Kasper + Robert T.Kasper H89-1022 kasper-1989-flexible Robust Speech Recognition - Clifford J.Weinstein + Clifford J.Weinstein H89-1023 weinstein-1989-robust @@ -180,35 +180,35 @@ Acoustic-Phonetics Based Speech Recognition - Victor W.Zue + Victor W.Zue H89-1025 zue-1989-acoustic <fixed-case>TINA</fixed-case>: A Probabilistic Syntactic Parser for Speech Understanding Systems - StephanieSeneff + StephanieSeneff H89-1026 seneff-1989-tina The <fixed-case>MIT</fixed-case> <fixed-case>SUMMIT</fixed-case> Speech Recognition System: A Progress Report - VictorZue - JamesGlass - MichaelPhillips - StephanieSeneff + VictorZue + JamesGlass + MichaelPhillips + StephanieSeneff H89-1027 zue-etal-1989-mit <fixed-case>N</fixed-case>ATIONAL <fixed-case>I</fixed-case>NSTITUTE OF <fixed-case>S</fixed-case>TANDARDS AND <fixed-case>T</fixed-case>ECHNOLOGY (<fixed-case>NIST</fixed-case>) (Formerly <fixed-case>N</fixed-case>ational <fixed-case>B</fixed-case>ureau of <fixed-case>S</fixed-case>tandards) - David S.Pallett + David S.Pallett H89-1028 pallett-1989-national <fixed-case>N</fixed-case>ew <fixed-case>M</fixed-case>exico <fixed-case>S</fixed-case>tate <fixed-case>U</fixed-case>niversity <fixed-case>C</fixed-case>omputing <fixed-case>R</fixed-case>esearch <fixed-case>L</fixed-case>aboratory - YorickWilks - DavidFarwell + YorickWilks + DavidFarwell AfzalBallim RogerHartley H89-1029 @@ -216,7 +216,7 @@ <fixed-case>N</fixed-case>aval <fixed-case>O</fixed-case>cean <fixed-case>S</fixed-case>ystems <fixed-case>C</fixed-case>enter - BethSundheim + BethSundheim H89-1030 sundheim-1989-naval @@ -229,38 +229,38 @@ Plans for a Task-Oriented Evaluation of Natural Language Understanding Systems - Beth M.Sundheim + Beth M.Sundheim H89-1032 sundheim-1989-plans Natural Language Understanding - RalphGrishman + RalphGrishman H89-1033 grishman-1989-natural Analyzing Telegraphic Messages - RalphGrishman + RalphGrishman JohnSterling H89-1034 grishman-sterling-1989-analyzing Natural Language Research - AravindJoshi - MitchMarcus - MarkSteedman - BonnieWebber + AravindJoshi + MitchMarcus + MarkSteedman + BonnieWebber H89-1035 joshi-etal-1989-natural Lexicalized <fixed-case>TAG</fixed-case>s, Parsing and Lexicons - AnneAbeille + AnneAbeille KathleenBishop SharonCote - Aravind K.Joshi + Aravind K.Joshi YvesSchabes H89-1036 abeille-etal-1989-lexicalized @@ -268,13 +268,13 @@ Elements of a Computational Model of Cooperative Response Generation Brant A.Cheikes - Bonnie L.Webber + Bonnie L.Webber H89-1037 cheikes-webber-1989-elements Intonation and Syntax in Spoken Language Systems - MarkSteedman + MarkSteedman H89-1038 steedman-1989-intonation @@ -293,26 +293,26 @@ <fixed-case>SRI</fixed-case> <fixed-case>I</fixed-case>nternational, Speech Recognition Program, <fixed-case>M</fixed-case>enlo <fixed-case>P</fixed-case>ark, <fixed-case>CA</fixed-case> JaredBemstein - HyMurveit + HyMurveit H89-1041 bemstein-murveit-1989-sri <fixed-case>SRI</fixed-case>’s <fixed-case>DECIPHER</fixed-case> System - HyMurveit + HyMurveit MichaelCohen - PattiPrice + PattiPrice GayBaldwin - MitchWeintraub + MitchWeintraub JaredBernstein H89-1042 murveit-etal-1989-sris Integrating Speech and Natural-Language Processing - RobertMoore + RobertMoore FernandoPereira - HyMurveit + HyMurveit H89-1043 moore-etal-1989-integrating @@ -325,14 +325,14 @@ Chart Parsing of Stochastic Spoken Language Models - CharlesHemphill + CharlesHemphill JosephPicone H89-1045 hemphill-picone-1989-chart Initial Draft Guidelines for the Development of the Next-Generation Spoken Language Systems Speech Research Database - George R.Doddington + George R.Doddington H89-1046 doddington-1989-initial @@ -344,28 +344,28 @@ Natural Language Understanding: Integrating Syntax, Semantics, and Discourse. - LynetteHirschman - MarthaPalmer + LynetteHirschman + MarthaPalmer H89-1048 hirschman-palmer-1989-natural Analyzing Explicitly-Structured Discourse in a Limited Domain: Trouble and Failure Reports - Catherine N.Ball + Catherine N.Ball H89-1049 ball-1989-analyzing Reducing Search by Partitioning the Word Network - JohnDowding + JohnDowding H89-1050 dowding-1989-reducing Porting <fixed-case>PUNDIT</fixed-case> to the Resource Management Domain - LynetteHirschman - Francois-MichelLang - JohnDowding + LynetteHirschman + Francois-MichelLang + JohnDowding CarlWeir H89-1051 hirschman-etal-1989-porting @@ -373,13 +373,13 @@ Analysis and Symbolic Processing of Unrestricted Speech M. MargaretWithgott - Ronald M.Kaplan + Ronald M.Kaplan H89-1052 withgott-kaplan-1989-analysis Automatic Discovery of Contextual Factors Describing Phonological Variation - Francine R.Chen + Francine R.Chen JeffShrager H89-1053 chen-shrager-1989-automatic @@ -403,7 +403,7 @@ Report on Session <fixed-case>I</fixed-case>: Prosodic Aids to Speech Recognition - LynetteHirschman + LynetteHirschman H89-2001 hirschman-1989-report @@ -423,7 +423,7 @@ Distinguishing Questions by Contour Speech Recognition Tasks - JuliaHirschberg + JuliaHirschberg H89-2004 hirschberg-1989-distinguishing @@ -435,65 +435,65 @@ Unification-Based Semantic Interpretation in the <fixed-case>BBN</fixed-case> Spoken Language System - DavidStallard + DavidStallard H89-2006 stallard-1989-unification Modelling Non-verbal Sounds for Speech Recognition - WayneWard + WayneWard H89-2007 ward-1989-modelling The <fixed-case>VOYAGER</fixed-case> Speech Understanding System: A Progress Report - VictorZue - JamesGlass - DavidGoodine - HongLeung - MichaelPhillips - JosephPolifroni - StephanieSeneff + VictorZue + JamesGlass + DavidGoodine + HongLeung + MichaelPhillips + JosephPolifroni + StephanieSeneff H89-2008 zue-etal-1989-voyager Answers and Questions: Processing Messages and Queries - Catherine N.Ball - DeborahDahl - Lewis M.Norton - LynetteHirschman + Catherine N.Ball + DeborahDahl + Lewis M.Norton + LynetteHirschman CarlWeir - MarciaLinebarger + MarciaLinebarger H89-2009 ball-etal-1989-answers Natural Language <fixed-case>I</fixed-case> - Bonnie LynnWebber + Bonnie LynnWebber H89-2010 webber-1989-natural Preference Semantics for Message Understanding - RalphGrishman + RalphGrishman JohnSterling H89-2011 grishman-sterling-1989-preference Parsing, Word Associations and Typical Predicate-Argument Relations - KennethChurch - WilliamGale + KennethChurch + WilliamGale PatrickHanks - DonaldHindle + DonaldHindle H89-2012 church-etal-1989-parsing Enhanced <fixed-case>G</fixed-case>ood-<fixed-case>T</fixed-case>uring and <fixed-case>C</fixed-case>at-<fixed-case>C</fixed-case>al: Two New Methods for Estimating Probabilities of <fixed-case>E</fixed-case>nglish Bigrams (abbreviated version) - Kenneth W.Church - William A.Gale + Kenneth W.Church + William A.Gale H89-2013 church-gale-1989-enhanced @@ -505,101 +505,101 @@ New Possibilities in Machine Translation - Eduard H.Hovy + Eduard H.Hovy H89-2015 hovy-1989-new Data Collection And Evaluation - David S.Pallett + David S.Pallett H89-2016 pallett-1989-data Data Collection and Analysis in the Air Travel Planning Domain Jacqueline C.Kowtko - Patti J.Price + Patti J.Price H89-2017 kowtko-price-1989-data The Collection and Preliminary Analysis of a Spontaneous Speech Database - VictorZue + VictorZue NancyDaly - JamesGlass - DavidGoodine - HongLeung - MichaelPhillips - JosephPolifroni - StephanieSeneff + JamesGlass + DavidGoodine + HongLeung + MichaelPhillips + JosephPolifroni + StephanieSeneff MichalSoclof H89-2018 zue-etal-1989-collection A Proposal for <fixed-case>SLS</fixed-case> Evaluation - SeanBoisen - LanceRamshaw - DamarisAyuso - MadeleineBates + SeanBoisen + LanceRamshaw + DamarisAyuso + MadeleineBates H89-2019 boisen-etal-1989-proposal A Simple Statistical Class Grammar for Measuring Speech Recognition Performance AlanDerr - RichardSchwartz + RichardSchwartz H89-2020 derr-schwartz-1989-simple Evaluating spoken language interaction - Alexander I.Rudnicky + Alexander I.Rudnicky MichelleSakamoto - Joseph H.Polifroni + Joseph H.Polifroni H89-2021 rudnicky-etal-1989-evaluating Preliminary Evaluation of the <fixed-case>VOYAGER</fixed-case> Spoken Language System - VictorZue - JamesGlass - DavidGoodine - HongLeung - MichaelPhillips - JosephPolifroni - StephanieSeneff + VictorZue + JamesGlass + DavidGoodine + HongLeung + MichaelPhillips + JosephPolifroni + StephanieSeneff H89-2022 zue-etal-1989-preliminary Data Collection and Evaluation <fixed-case>II</fixed-case> - RalphGrishman + RalphGrishman H89-2023 grishman-1989-data Text on Tap: the <fixed-case>ACL</fixed-case>/<fixed-case>DCI</fixed-case> - MarkLiberman + MarkLiberman H89-2024 liberman-1989-text Spoken Language Systems <fixed-case>II</fixed-case> - RichardStern + RichardStern H89-2025 stern-1989-spoken A Stack Decoder for Continous Speech Recognition - Dean G.Sturtevant + Dean G.Sturtevant H89-2026 sturtevant-1989-stack The <fixed-case>N</fixed-case>-Best Algorithm: Efficient Procedure for Finding Top <fixed-case>N</fixed-case> Sentence Hypotheses - Yen-LuChow - RichardSchwartz + Yen-LuChow + RichardSchwartz H89-2027 chow-schwartz-1989-n @@ -611,13 +611,13 @@ Summary of Session 7 – Natural Language (Part 2) - MadeleineBates + MadeleineBates H89-2029 bates-1989-summary Belief Ascription and Model Generative Reasoning: joining two paradigms to a robust parser of messages. - YorickWilks + YorickWilks RogerHartley H89-2030 wilks-hartley-1989-belief @@ -625,60 +625,60 @@ Porting to New Domains Using the Learner Robert J. P.Ingna - LanceRamshaw + LanceRamshaw H89-2031 ingna-ramshaw-1989-porting Overview: Continuous Speech Recognition <fixed-case>I</fixed-case> - Janet M.Baker + Janet M.Baker H89-2032 baker-1989-overview Improved <fixed-case>HMM</fixed-case> Models for High Performance Speech Recognition - SteveAustin - ChrisBarry - Yen-LuChow + SteveAustin + ChrisBarry + Yen-LuChow ManDerr - OwenKimball - FrancisKubala - JohnMakhoul - PaulPlaceway + OwenKimball + FrancisKubala + JohnMakhoul + PaulPlaceway WilliamRussell - RichardSchwartz + RichardSchwartz GeorgeYu H89-2033 austin-etal-1989-improved Speaker Adaptation Using Multiple Reference Speakers - FrancisKubala - RichardSchwartz - ChrisBarry + FrancisKubala + RichardSchwartz + ChrisBarry H89-2034 kubala-etal-1989-speaker-adaptation Automatic Detection Of New Words In A Large Vocabulary Continuous Speech Recognition System AymanAsadi - RichardSchwartz - JohnMakhoul + RichardSchwartz + JohnMakhoul H89-2035 asadi-etal-1989-automatic Automatic New Word Acquisition: Spelling from Acoustics - FilAlleva - Kai-FuLee + FilAlleva + Kai-FuLee H89-2036 alleva-lee-1989-automatic Towards Speech Recognition Without Vocabulary-Specific Training - Hsiao-WuenHon - Kai-FuLee - RobertWeide + Hsiao-WuenHon + Kai-FuLee + RobertWeide H89-2037 hon-etal-1989-towards @@ -693,9 +693,9 @@ Acoustic Modeling of Subword Units for Large Vocabulary Speaker Independent Speech Recognition Chin-HuiLee - Lawrence R.Rabiner - RobertoPieraccini - Jay G.Wilpon + Lawrence R.Rabiner + RobertoPieraccini + Jay G.Wilpon H89-2039 lee-etal-1989-acoustic @@ -714,27 +714,27 @@ Summary of Session 9 - Future Plans - CliffordWeinstein + CliffordWeinstein H89-2042 weinstein-1989-summary SUMMARY OF SESSION 10 - Continous Speech Recognition <fixed-case>II</fixed-case> - GeorgeDoddington + GeorgeDoddington H89-2043 doddington-1989-summary Acoustical Pre-Processing for Robust Speech Recognition - Richard M.Stern + Richard M.Stern AlejandroAcero H89-2044 stern-acero-1989-acoustical Spectral Estimation for Noise Robust Speech Recognition - AdoramErell - MitchWeintraub + AdoramErell + MitchWeintraub H89-2045 erell-weintraub-1989-spectral @@ -756,7 +756,7 @@ Some Applications of Tree-based Modelling to Speech and Language - Michael D.Riley + Michael D.Riley H89-2048 riley-1989-applications @@ -764,33 +764,33 @@ Speech Recognition in Parallel Salvatore J.Stolfo ZviGalil - KathleenMcKeown + KathleenMcKeown RussellMills H89-2049 stolfo-etal-1989-speech Contextually-Based Data-Derived Pronunciation Networks for Automatic Speech Recognition - Francine R.Chen + Francine R.Chen H89-2050 chen-1989-contextually Session 11 Natural Language <fixed-case>III</fixed-case> - Kenneth WardChurch + Kenneth WardChurch H89-2051 church-1989-session Using Structural Constraints for Speech Act Interpretation - James F.Allen - ElizabethHinkelman + James F.Allen + ElizabethHinkelman H89-2052 allen-hinkelman-1989-using An Evaluation of Lexicalization in Parsing - Aravind K.Joshi + Aravind K.Joshi YvesSchabes H89-2053 joshi-schabes-1989-evaluation @@ -807,33 +807,33 @@ Coordinating Text and Graphics in Explanation Generation - Steven K.Feiner - Kathleen R.McKeown + Steven K.Feiner + Kathleen R.McKeown H89-2055 feiner-mckeown-1989-coordinating Summary of Session on Hardware for Spoken Language Demonstrations - RichardSchwartz + RichardSchwartz H89-2056 schwartz-1989-summary Research and Development in Natural Language Understanding - RalphWeischedel + RalphWeischedel H89-2057 weischedel-1989-research Research in Continuous Speech Recognition - JohnMakhoul - RichardSchwartz + JohnMakhoul + RichardSchwartz H89-2058 makhoul-schwartz-1989-research-continuous Spoken Language Systems - JohnMakhoul + JohnMakhoul H89-2059 makhoul-1989-spoken @@ -845,15 +845,15 @@ Evaluating the Use of Prosodic Information in Speech Recognition and Understanding - MariOstendorf - PattiPrice + MariOstendorf + PattiPrice H89-2061 ostendorf-price-1989-evaluating Segment-Based Acoustic Models with Multi-level Search Algorithms for Continuous Speech Recognition - MariOstendorf - J. RobinRohlicek + MariOstendorf + J. RobinRohlicek H89-2062 ostendorf-rohlicek-1989-segment @@ -871,32 +871,32 @@ The Current Status of the Penman Language Generation System - Eduard H.Hovy + Eduard H.Hovy H89-2065 hovy-1989-current Research and Development for Spoken Language Systems - Victor W.Zue + Victor W.Zue H89-2066 zue-1989-research Robust Speech Recognition Technology Program Summary - Clifford J.Weinstein + Clifford J.Weinstein Douglas B.Paul H89-2067 weinstein-paul-1989-robust Establishing Performance Baselines for Text Understanding Systems - BethSundheim + BethSundheim H89-2068 sundheim-1989-establishing Robust Natural Language Analysis - RalphGrishman + RalphGrishman H89-2069 grishman-1989-robust @@ -914,8 +914,8 @@ Natural Language, Knowledge Representation and Discourse - James F.Allen - Lenhart K.Schubert + James F.Allen + Lenhart K.Schubert H89-2071 allen-schubert-1989-natural @@ -927,21 +927,21 @@ Integration of Speech and Natural Language Understanding for Spoken Language Systems (<fixed-case>SLS</fixed-case>) - PattiPrice + PattiPrice H89-2073 price-1989-integration Real-Time Speech Recognition Systems - HyMurveit + HyMurveit H89-2074 murveit-1989-real <fixed-case>TACITUS</fixed-case>: A Message Understanding System - Jerry R.Hobbs - DouglasAppelt - JohnBear + Jerry R.Hobbs + DouglasAppelt + JohnBear MarkStickel MabryTyson H89-2075 @@ -950,28 +950,28 @@ Analysis and Symbolic Processing of Unrestricted Speech M. MargaretWithgott - Ronald M.Kaplan + Ronald M.Kaplan H89-2076 withgott-kaplan-1989-analysis-symbolic White Paper on Spoken Language Systems - JohnMakhoul - FredJelinek + JohnMakhoul + FredJelinek LarryRabiner - CliffordWeinstein - VictorZue + CliffordWeinstein + VictorZue H89-2077 makhoul-etal-1989-white White Paper on Natural Language Processing - RalphWeischedel - JaimeCarbonell - BarbaraGrosz - WendyLehnert - MitchellMarcus - RaymondPerrault + RalphWeischedel + JaimeCarbonell + BarbaraGrosz + WendyLehnert + MitchellMarcus + RaymondPerrault RobertWilensky H89-2078 weischedel-etal-1989-white diff --git a/data/xml/H90.xml b/data/xml/H90.xml index cdd6869221..2544ff6cbe 100644 --- a/data/xml/H90.xml +++ b/data/xml/H90.xml @@ -12,26 +12,26 @@ Overview of the Third <fixed-case>DARPA</fixed-case> Speech and Natural Language Workshop - Richard M.Stern + Richard M.Stern H90-1001 stern-1990-overview Session 1: Spoken Language Systems <fixed-case>I</fixed-case> - WayneWard + WayneWard H90-1002 ward-1990-session Efficient, High-Performance Algorithms for N-Best Search - RichardSchwartz - SteveAustin + RichardSchwartz + SteveAustin H90-1003 schwartz-austin-1990-efficient A <fixed-case>T</fixed-case>ree.<fixed-case>T</fixed-case>rellis Based Fast Search for Finding the N Best Sentence Hypotheses in Continuous Speech Recognition - Frank K.Soong + Frank K.Soong Eng-FongHuang H90-1004 soong-huang-1990-tree @@ -53,48 +53,48 @@ Session 2: Natural Language <fixed-case>I</fixed-case> - Damaris M.Ayuso + Damaris M.Ayuso H90-1007 ayuso-1990-session Picking Reference Events from Tense A Formal, Implement able Theory of <fixed-case>E</fixed-case>nglish Tense-Aspect Semantics Trees: - Lenhart K.Schubert + Lenhart K.Schubert Chung HeeHwang H90-1008 schubert-hwang-1990-picking Interactive Multimedia Explanation for Equipment Maintenance and Repair - KathleenMcKeown - StevenFeiner + KathleenMcKeown + StevenFeiner H90-1009 mckeown-feiner-1990-interactive Two Recent Developments in <fixed-case>T</fixed-case>ree <fixed-case>A</fixed-case>djoining <fixed-case>G</fixed-case>rammars: Semantics and Efficient Processing YvesSchabes - Aravind K.Joshi + Aravind K.Joshi H90-1010 schabes-joshi-1990-two Performing Integrated Syntactic and Semantic Parsing Using Classification - Robert T.Kasper - Eduard H.Hovy + Robert T.Kasper + Eduard H.Hovy H90-1011 kasper-hovy-1990-performing Making Abduction More Efficient - DouglasAppelt - Jerry R.Hobbs + DouglasAppelt + Jerry R.Hobbs H90-1012 appelt-hobbs-1990-making Session 3: Natural Language Evaluation - LynetteHirschman + LynetteHirschman H90-1013 hirschman-1990-session @@ -112,10 +112,10 @@ Toward a Real-Time Spoken Language System Using Commercial Hardware - SteveAustin + SteveAustin PatPeterson - PaulPlaceway - RichardSchwartz + PaulPlaceway + RichardSchwartz JeffVandergrift H90-1016 austin-etal-1990-toward @@ -123,10 +123,10 @@ The Dragon Continuous Speech Recognition System: A Real-Time Implementation PaulBamberg - Yen-luChow - LaurenceGillick + Yen-luChow + LaurenceGillick RobertRoth - DeanSturtevant + DeanSturtevant H90-1017 bamberg-etal-1990-dragon @@ -147,7 +147,7 @@ Session 5: Overview of the <fixed-case>ATIS</fixed-case> System - David S.Pallett + David S.Pallett H90-1019 pallett-1990-session @@ -159,27 +159,27 @@ The <fixed-case>ATIS</fixed-case> Spoken Language Systems Pilot Corpus - Charles T.Hemphill + Charles T.Hemphill John J.Godfrey - George R.Doddington + George R.Doddington H90-1021 hemphill-etal-1990-atis Developing an Evaluation Methodology for Spoken Language Systems - MadeleineBates - SeanBoisen - JohnMakhoul + MadeleineBates + SeanBoisen + JohnMakhoul H90-1022 bates-etal-1990-developing Beyond Class A: A Proposal for Automatic Evaluation of Discourse - LynetteHirschman - Deborah A.Dahl + LynetteHirschman + Deborah A.Dahl Donald P.McKay - Lewis M.Norton - Marcia C.Linebarger + Lewis M.Norton + Marcia C.Linebarger H90-1023 hirschman-etal-1990-beyond @@ -194,7 +194,7 @@ Session 6: <fixed-case>ATIS</fixed-case> Site Reports and General Discussion - David S.Pallett + David S.Pallett H90-1025 pallett-1990-session-6 @@ -210,19 +210,19 @@ The <fixed-case>CMU</fixed-case> Air Travel Information Service: Understanding Spontaneous Speech - WayneWard + WayneWard H90-1027 ward-1990-cmu Preliminary <fixed-case>ATIS</fixed-case> Development at <fixed-case>MIT</fixed-case> - VictorZue - JamesGlass - DavidGoodine - HongLeung - MichaelPhillips - JosephPolifroni - StephanieSeneff + VictorZue + JamesGlass + DavidGoodine + HongLeung + MichaelPhillips + JosephPolifroni + StephanieSeneff H90-1028 zue-etal-1990-preliminary @@ -239,29 +239,29 @@ Management and Evaluation of Interactive Dialog in the Air Travel Domain - Lewis M.Norton - Deborah A.Dahl + Lewis M.Norton + Deborah A.Dahl Donald P.McKay - LynetteHirschman - Marcia C.Linebarger - DavidMagerman - Catherine N.Ball + LynetteHirschman + Marcia C.Linebarger + DavidMagerman + Catherine N.Ball H90-1030 norton-etal-1990-management <fixed-case>SRI</fixed-case>’s Experience with the <fixed-case>ATIS</fixed-case> Evaluation - RobertMoore - DouglasAppelt - JohnBear + RobertMoore + DouglasAppelt + JohnBear MaryDalrymple - DouglasMoran + DouglasMoran H90-1031 moore-etal-1990-sris Session 7: Speech Recognition <fixed-case>I</fixed-case> - MitchWeintraub + MitchWeintraub H90-1032 weintraub-1990-session @@ -274,20 +274,20 @@ Towards Environment-Independent Spoken Language Systems AlejandroAcero - Richard M.Stern + Richard M.Stern H90-1034 acero-stern-1990-towards Phoneme-in-Context Modeling for Dragon’s Continuous Speech Recognizer PaulBamberg - LaurenceGillick + LaurenceGillick H90-1035 bamberg-gillick-1990-phoneme A Rapid Match Algorithm for Continuous Speech Recognition - Laurence S.Gillick + Laurence S.Gillick RobertRoth H90-1036 gillick-roth-1990-rapid @@ -309,7 +309,7 @@ R. L.Mercer B.Merialdo D.Nahamoo - M. A.Picheny + M. A.Picheny J.Powell H90-1038 bahl-etal-1990-automatic @@ -317,8 +317,8 @@ On the Interaction Between True Source, Training, and Testing Language Models Douglas B.Paul - James K.Baker - Janet M.Baker + James K.Baker + Janet M.Baker H90-1039 paul-etal-1990-interaction @@ -338,61 +338,61 @@ Session 8: Spoken Language Systems <fixed-case>II</fixed-case> - Charles T.Hemphill + Charles T.Hemphill H90-1042 hemphill-1990-session Recent Progress on the <fixed-case>VOYAGER</fixed-case> System - VictorZue - JamesGlass - DavidGoodine - HongLeung + VictorZue + JamesGlass + DavidGoodine + HongLeung MichaelMcCandless - MichaelPhillips - JosephPolifroni - StephanieSeneff + MichaelPhillips + JosephPolifroni + StephanieSeneff H90-1043 zue-etal-1990-recent Training and Evaluation of a Spoken Language Understanding System - Deborah A.Dahl - LynetteHirschman - Lewis M.Norton - Marcia C.Linebarger - DavidMagerman + Deborah A.Dahl + LynetteHirschman + Lewis M.Norton + Marcia C.Linebarger + DavidMagerman NghiNguyen - Catherine N.Ball + Catherine N.Ball H90-1044 dahl-etal-1990-training A Comparison of Speech and Typed Input - Alexander G.Hauptmann - Alexander I.Rudnicky + Alexander G.Hauptmann + Alexander I.Rudnicky H90-1045 hauptmann-rudnicky-1990-comparison The design of a spoken language interface Jean-MichelLunati - Alexander I.Rudnicky + Alexander I.Rudnicky H90-1046 lunati-rudnicky-1990-design Syntactic and Semantic Knowledge in the <fixed-case>DELPHI</fixed-case> Unification Grammar R.Bobrow - RobertIngria - DavidStallard + RobertIngria + DavidStallard H90-1047 bobrow-etal-1990-syntactic On Deftly Introducing Procedural Elements into Unification Parsing R.Bobrow - LanceRamshaw + LanceRamshaw H90-1048 bobrow-ramshaw-1990-deftly @@ -405,28 +405,28 @@ Session 9: Automatic Acquisition of Linguistic Structure - MitchellMarcus + MitchellMarcus H90-1050 marcus-1990-session Using Explanation-Based Learning to Increase Performance in a Large-Scale <fixed-case>NL</fixed-case> Query System - MannyRayner + MannyRayner ChristerSamuelsson H90-1051 rayner-samuelsson-1990-using Structural Ambiguity and Lexical Relations - DonaldHindle + DonaldHindle MatsRooth H90-1052 hindle-rooth-1990-structural Statistical Parsing of Messages - Mahesh V.Chitrao - RalphGrishman + Mahesh V.Chitrao + RalphGrishman H90-1053 chitrao-grishman-1990-statistical @@ -441,28 +441,28 @@ Deducing Linguistic Structure from the Statistics of Large Corpora EricBrill - DavidMagerman - MitchellMarcus - BeatriceSantorini + DavidMagerman + MitchellMarcus + BeatriceSantorini H90-1055 brill-etal-1990-deducing Poor Estimates of Context are Worse than None - William A.Gale - Kenneth W.Church + William A.Gale + Kenneth W.Church H90-1056 gale-church-1990-poor Representation Quality in Text Classification: An Introduction and Experiment - David D.Lewis + David D.Lewis H90-1057 lewis-1990-representation Session 10: Evaluation of Systems on the Resource Management Database - GeorgeDoddington + GeorgeDoddington H90-1058 doddington-1990-session @@ -476,15 +476,15 @@ A New Paradigm for Speaker-Independent Training and Speaker Adaptation - FrancisKubala - RichardSchwartz + FrancisKubala + RichardSchwartz H90-1060 kubala-schwartz-1990-new Implementation Aspects of Large Vocabulary Recognition Based on Intraword and Interword Phonetic Units R.Pieraccini - C. H.Lee + C. H.Lee E.Giachin L. R.Rabiner H90-1061 @@ -492,7 +492,7 @@ Improved Acoustic Modeling for Continuous Speech Recognition - C.-H.Lee + C.-H.Lee E.Giachin L. R.Rabiner R.Pieraccini @@ -502,12 +502,12 @@ Improved Hidden <fixed-case>M</fixed-case>arkov Modeling for Speaker-Independent Continuous Speech Recognition - XuedongHuang - FilAlleva + XuedongHuang + FilAlleva SatoruHayamizu - Hsiao-WuenHon - Mei-YuhHwang - Kai-FuLee + Hsiao-WuenHon + Mei-YuhHwang + Kai-FuLee H90-1063 huang-etal-1990-improved @@ -519,8 +519,8 @@ Training Set Issues in <fixed-case>SRI</fixed-case>’s <fixed-case>DECIPHER</fixed-case> Speech Recognition System - HyMurveit - MitchWeintraub + HyMurveit + MitchWeintraub MikeCohen H90-1065 murveit-etal-1990-training @@ -537,35 +537,35 @@ Mark T.Anikst William S.Meisel Matthew C.Soares - Kai-FuLee + Kai-FuLee H90-1067 anikst-etal-1990-experiments Session 11: Natural Language <fixed-case>II</fixed-case> - Deborah A.Dahl + Deborah A.Dahl H90-1068 dahl-1990-session Towards Understanding Text with a Very Large Vocabulary - DamarisAyuso + DamarisAyuso R.Bobrow DawnMacLaughlin - MarieMeteer - LanceRamshaw - RichSchwartz - RalphWeischedel + MarieMeteer + LanceRamshaw + RichSchwartz + RalphWeischedel H90-1069 ayuso-etal-1990-towards Generic Text Processing: A Progress Report - Paul S.Jacobs - George R.Krupka - Susan W.McRoy - Lisa F.Rau - Norman K.Sondheimer + Paul S.Jacobs + George R.Krupka + Susan W.McRoy + Lisa F.Rau + Norman K.Sondheimer UriZernik H90-1070 jacobs-etal-1990-generic @@ -578,35 +578,35 @@ Machine Translation Again? - YorickWilks - JaimeCarbonell - DavidFarwell - EduardHovy - SergeiNirenburg + YorickWilks + JaimeCarbonell + DavidFarwell + EduardHovy + SergeiNirenburg H90-1072 wilks-etal-1990-machine Session 12: Speech Recognition <fixed-case>II</fixed-case> - JordanCohen + JordanCohen H90-1073 cohen-1990-session Recent Progress on the <fixed-case>SUMMIT</fixed-case> System - VictorZue - JamesGlass - DavidGoodine - HongLeung - MichaelPhillips - JosephPolifroni - StephanieSeneff + VictorZue + JamesGlass + DavidGoodine + HongLeung + MichaelPhillips + JosephPolifroni + StephanieSeneff H90-1074 zue-etal-1990-recent-progress Spoken Letter Recognition - RonaldCole + RonaldCole MarkFanty H90-1075 cole-fanty-1990-spoken @@ -623,41 +623,41 @@ Recent Results from the <fixed-case>ARM</fixed-case> Continuous Speech Recognition Project - MartinRussell + MartinRussell KeithPonting H90-1077 russell-ponting-1990-recent Adaptive Natural Language Processing - RalphWeischedel + RalphWeischedel H90-1078 weischedel-1990-adaptive Research in Continuous Speech Recognition - JohnMakhoul - RichardSchwartz + JohnMakhoul + RichardSchwartz H90-1079 makhoul-schwartz-1990-research Spoken Language Systems - JohnMakhoul + JohnMakhoul H90-1080 makhoul-1990-spoken Evaluating the Use of Prosodic Information in Speech Recognition and Understanding - MariOstendorf - PattiPrice + MariOstendorf + PattiPrice H90-1081 ostendorf-price-1990-evaluating Segment-Based Acoustic Models with Multi-level Search Algorithms for Continuous Speech Recognition - MariOstendorf - J. RobinRohlicek + MariOstendorf + J. RobinRohlicek H90-1082 ostendorf-rohlicek-1990-segment @@ -681,14 +681,14 @@ Interactive Multimedia Explanation for Equipment Maintenance and Repair - KathleenMcKeown - StevenFeiner + KathleenMcKeown + StevenFeiner H90-1086 mckeown-feiner-1990-interactive-multimedia Large Vocabulary Speech Recognition Prototype - Janet M.Baker + Janet M.Baker H90-1087 baker-1990-large @@ -700,38 +700,38 @@ Robust Speech Recognition Technology Program Summary - Clifford J.Weinstein + Clifford J.Weinstein Douglas B.Paul H90-1089 weinstein-paul-1990-robust Research and Development for Spoken Language Systems - Victor W.Zue + Victor W.Zue H90-1090 zue-1990-research <fixed-case>NIST</fixed-case>-<fixed-case>DARPA</fixed-case> Interagency Agreement: <fixed-case>SLS</fixed-case> Program - David S.Pallett + David S.Pallett H90-1091 pallett-1990-nist Extending the Scope of Text Understanding Systems Evaluation - BethSundheim + BethSundheim H90-1092 sundheim-1990-extending PROGRESS REPORT: Active Knowledge Structures in Natural Language Understanding - YorickWilks + YorickWilks H90-1093 wilks-1990-progress Research in Text Processing: Creating Robust and Portable Systems - RalphGrishman + RalphGrishman H90-1094 grishman-1990-research @@ -743,44 +743,44 @@ A Real-Time Spoken-Language System Interactive Problem-Solving - PattiPrice - Robert C.Moore + PattiPrice + Robert C.Moore H90-1096 price-moore-1990-real Real-Time Speech Recognition Systems - HyMurveit - MitchWeintraub + HyMurveit + MitchWeintraub H90-1097 murveit-weintraub-1990-real Project Summary: Linguistic Knowledge Sources for Spoken Language Understanding - LynetteHirschman - DeborahDahl + LynetteHirschman + DeborahDahl H90-1098 hirschman-dahl-1990-project Natural Language Research - AravindJoshi - MitchMarcus - MarkSteedman - BonnieWebber + AravindJoshi + MitchMarcus + MarkSteedman + BonnieWebber H90-1099 joshi-etal-1990-natural Very Large Annotated Database of <fixed-case>A</fixed-case>merican <fixed-case>E</fixed-case>nglish - MitchMarcus + MitchMarcus H90-1100 marcus-1990-large Natural Language, Knowledge Representation, and Discourse - James F.Allen - Lenhart K.Schubert + James F.Allen + Lenhart K.Schubert H90-1101 allen-schubert-1990-natural @@ -791,7 +791,7 @@ Opportunities for Advanced Speech Processing in Military Computer-Based Systems* - Clifford J.Weinstein + Clifford J.Weinstein H90-1103 weinstein-1990-opportunities diff --git a/data/xml/H91.xml b/data/xml/H91.xml index 26c1e31a4d..56ed944aca 100644 --- a/data/xml/H91.xml +++ b/data/xml/H91.xml @@ -12,20 +12,20 @@ Overview of the Fourth <fixed-case>DARPA</fixed-case> Speech and Natural Language Workshop - PattiPrice + PattiPrice H91-1001 price-1991-overview Session 1: Speech and Natural Language Efforts in the <fixed-case>U. S.</fixed-case> and Abroad - Mark Y.Liberman - PattiPrice + Mark Y.Liberman + PattiPrice H91-1002 liberman-price-1991-session The <fixed-case>ESPRIT</fixed-case> Project <fixed-case>POLYGLOT</fixed-case> - LouisBoves + LouisBoves H91-1003 boves-1991-esprit @@ -54,20 +54,20 @@ Session 2: <fixed-case>DARPA</fixed-case> Resource Management and <fixed-case>ATIS</fixed-case> Benchmark Test Poster Session - David S.Pallett + David S.Pallett H91-1008 pallett-1991-session <fixed-case>D</fixed-case>ragon <fixed-case>S</fixed-case>ystems Resource Management Benchmark Results <fixed-case>F</fixed-case>ebruary 1991 - JamesBaker - JanetBaker + JamesBaker + JanetBaker PardBamberg LarryGillick - LoriLamel + LoriLamel RobertRoth FrancescoScattone - DeanSturtevant + DeanSturtevant OusmaneBa RichardBenedict H91-1009 @@ -81,9 +81,9 @@ Modelling Context Dependency in Acoustic-Phonetic and Lexical Representations - MichaelPhillips - JamesGlass - VictorZue + MichaelPhillips + JamesGlass + VictorZue H91-1011 phillips-etal-1991-modelling @@ -111,35 +111,35 @@ Development and Preliminary Evaluation of the <fixed-case>MIT</fixed-case> <fixed-case>ATIS</fixed-case> System - StephanieSeneff - JamesGlass + StephanieSeneff + JamesGlass DavidGoddeau - DavidGoodine - LynetteHirschman - HongLeung - MichaelPhillips - JosephPolifroni - VictorZue + DavidGoodine + LynetteHirschman + HongLeung + MichaelPhillips + JosephPolifroni + VictorZue H91-1014 seneff-etal-1991-development Speech Recognition in <fixed-case>SRI</fixed-case>’s Resource Management and <fixed-case>ATIS</fixed-case> Systems - HyMurveit + HyMurveit JohnButzberger - MitchWeintraub + MitchWeintraub H91-1015 murveit-etal-1991-speech Evaluation of the <fixed-case>CMU</fixed-case> <fixed-case>ATIS</fixed-case> System - WayneWard + WayneWard H91-1016 ward-1991-evaluation Using Semantics to Correct Parser Output for <fixed-case>ATIS</fixed-case> Utterances - SherylYoung + SherylYoung H91-1017 young-1991-using @@ -159,13 +159,13 @@ A Textual processor to handle <fixed-case>ATIS</fixed-case> queries - DouglasO’Shaughnessy + DouglasO’Shaughnessy H91-1019 oshaughnessy-1991-textual Stochastic Representation of Conceptual Structure in the <fixed-case>ATIS</fixed-case> Task - RobertoPieraccini + RobertoPieraccini EstherLevin Chin-HuiLee H91-1020 @@ -173,45 +173,45 @@ Augmented Role Filling Capabilities for Semantic Interpretation of Spoken Language - LewisNorton - MarciaLinebarger - DeborahDahl + LewisNorton + MarciaLinebarger + DeborahDahl NghiNguyen H91-1021 norton-etal-1991-augmented The Use of a Commercial Natural Language Interface in the <fixed-case>ATIS</fixed-case> Task - EvelyneTzoukermann + EvelyneTzoukermann H91-1022 tzoukermann-1991-use Session 3: Machine Translation - JaimeCarbonell + JaimeCarbonell H91-1023 carbonell-1991-session Machine Translation Using Abductive Inference - Jerry R.Hobbs + Jerry R.Hobbs MegumiKameyama H91-1024 hobbs-kameyama-1991-machine A Statistical Approach to Sense Disambiguation in Machine Translation - Peter F.Brown - Stephen A.Della Pietra - Vincent J.Della Pietra - Robert L.Mercer + Peter F.Brown + Stephen A.Della Pietra + Vincent J.Della Pietra + Robert L.Mercer H91-1025 brown-etal-1991-statistical Identifying Word Correspondences in Parallel Texts - William A.Gale - Kenneth W.Church + William A.Gale + Kenneth W.Church H91-1026 gale-church-1991-identifying @@ -244,64 +244,64 @@ Signal Representation Attribute Extraction and the Use Distinctive Features for Phonetic Classification - Helen M.Meng - Victor W.Zue - Hong C.Leung + Helen M.Meng + Victor W.Zue + Hong C.Leung H91-1031 meng-etal-1991-signal Session 5: Natural Language <fixed-case>I</fixed-case> - James F.Allen + James F.Allen H91-1032 allen-1991-session The Mapping Unit Approach to Subcategorization - RobertBobrow - RobertIngria - DavidStallard + RobertBobrow + RobertIngria + DavidStallard H91-1033 bobrow-etal-1991-mapping A Template Matcher for Robust <fixed-case>NL</fixed-case> Interpretation - EricJackson - DouglasAppelt - JohnBear - RobertMoore + EricJackson + DouglasAppelt + JohnBear + RobertMoore AnnPodlozny H91-1034 jackson-etal-1991-template Fixed and Flexible Phrase Structure: Coordination in <fixed-case>T</fixed-case>ree <fixed-case>A</fixed-case>djoining <fixed-case>G</fixed-case>rammars - Aravind K.Joshi + Aravind K.Joshi YvesSchabes H91-1035 joshi-schabes-1991-fixed Efficient Bottom-Up Parsing - RobertMoore - JohnDowding + RobertMoore + JohnDowding H91-1036 moore-dowding-1991-efficient Partial Parsing: A Report on Work in Progress - RalphWeischedel - DamarisAyuso + RalphWeischedel + DamarisAyuso R.Bobrow - SeanBoisen - RobertIngria + SeanBoisen + RobertIngria JeffPalmucci H91-1037 weischedel-etal-1991-partial Session 6: Demonstrations and Videotapes of Speech and Natural Language Technologies - MariOstendorf + MariOstendorf H91-1038 ostendorf-1991-session @@ -313,7 +313,7 @@ Using Spoken Language to Facilitate Military Transportation Planning - MadeleineBates + MadeleineBates DanEllard PatPeterson VardaShaked @@ -322,27 +322,27 @@ Session 7: Natural Language <fixed-case>II</fixed-case> - SalimRoukos + SalimRoukos H91-1041 roukos-1991-session Statistical Agenda Parsing - Robert J.Bobrow + Robert J.Bobrow H91-1042 bobrow-1991-statistical Some Results on Stochastic Language Modelling - RenatoDe Mori + RenatoDe Mori RolandKuhn H91-1043 de-mori-kuhn-1991-results Parsing the <fixed-case>V</fixed-case>oyager Domain Using <fixed-case>P</fixed-case>earl - David M.Magerman - Mitchell P.Marcus + David M.Magerman + Mitchell P.Marcus H91-1044 magerman-marcus-1991-parsing @@ -361,7 +361,7 @@ Session 8: Speech <fixed-case>II</fixed-case> - Kai-FuLee + Kai-FuLee H91-1047 lee-1991-session @@ -384,8 +384,8 @@ Recent Progress in Robust Vocabulary-Independent Speech Recognition - Hsiao-WuenHon - Kai-FuLee + Hsiao-WuenHon + Kai-FuLee H91-1050 hon-lee-1991-recent @@ -395,13 +395,13 @@ P.V.de Souza P.S.Gopalakrishnan D.Nahamoo - M.A.Picheny + M.A.Picheny H91-1051 bahl-etal-1991-context Session 9: Speech <fixed-case>III</fixed-case> - FrancisKubala + FrancisKubala H91-1052 kubala-1991-session @@ -426,8 +426,8 @@ Lexical Access With a Statistically-Derived Phonetic Network - Michael D.Riley - AndrejLjolje + Michael D.Riley + AndrejLjolje H91-1056 riley-ljolje-1991-lexical @@ -442,13 +442,13 @@ Session 10: Corpora and Evaluation - Clifford J.Weinstein + Clifford J.Weinstein H91-1058 weinstein-1991-session <fixed-case>T</fixed-case>hird <fixed-case>M</fixed-case>essage <fixed-case>U</fixed-case>nderstanding <fixed-case>E</fixed-case>valuation and <fixed-case>C</fixed-case>onference (<fixed-case>MUC</fixed-case>-3): Phase 1 Status Report - Beth M.Sundheim + Beth M.Sundheim H91-1059 sundheim-1991-third @@ -474,55 +474,55 @@ Evaluating Text Categorization <fixed-case>I</fixed-case> - David D.Lewis + David D.Lewis H91-1061 lewis-1991-evaluating A Proposal for Incremental Dialogue Evaluation - MadeleineBates - DamarisAyuso + MadeleineBates + DamarisAyuso H91-1062 bates-ayuso-1991-proposal Session 11 - Natural Language <fixed-case>III</fixed-case> - MitchMarcus + MitchMarcus H91-1063 marcus-1991-session Discourse Structure in the <fixed-case>TRAINS</fixed-case> Project - James F.Allen + James F.Allen H91-1064 allen-1991-discourse Studies in Part of Speech Labelling - MarieMeteer - RichardSchwartz - RalphWeischedel + MarieMeteer + RichardSchwartz + RalphWeischedel H91-1065 meteer-etal-1991-studies Lexico-Semantic Pattern Matching as a Companion to Parsing in Text Understanding - Paul S.Jacobs - George R.Krupka - Lisa F.Rau + Paul S.Jacobs + George R.Krupka + Lisa F.Rau H91-1066 jacobs-etal-1991-lexico Automatic Acquisition of Subcategorization Frames from Tagged Text Michael R.Brent - Robert C.Berwick + Robert C.Berwick H91-1067 brent-berwick-1991-automatic Fast Text Processing for Information Retrieval - TomekStrzalkowski + TomekStrzalkowski BarbaraVauthey H91-1068 strzalkowski-vauthey-1991-fast @@ -535,42 +535,42 @@ Interactive Problem Solving and Dialogue in the <fixed-case>ATIS</fixed-case> Domain - StephanieSeneff - LynetteHirschman - Victor W.Zue + StephanieSeneff + LynetteHirschman + Victor W.Zue H91-1070 seneff-etal-1991-interactive Collection of Spontaneous Speech for the <fixed-case>ATIS</fixed-case> Domain and Comparative Analyses of Data Collected at <fixed-case>MIT</fixed-case> and <fixed-case>TI</fixed-case> - JosephPolifroni - StephanieSeneff - Victor W.Zue + JosephPolifroni + StephanieSeneff + Victor W.Zue H91-1071 polifroni-etal-1991-collection Integrating Syntax and Semantics into Spoken Language Understanding - LynetteHirschman - StephanieSeneff - DavidGoodine - MichaelPhillips + LynetteHirschman + StephanieSeneff + DavidGoodine + MichaelPhillips H91-1072 hirschman-etal-1991-integrating The Use of Prosody in Syntactic Disambiguation - PattiPrice - MariOstendorf - StefanieShattuck-Hufnagel + PattiPrice + MariOstendorf + StefanieShattuck-Hufnagel CynthiaFong H91-1073 price-etal-1991-use Predicting Intonational Boundaries Automatically from Text: The <fixed-case>ATIS</fixed-case> Domain - Michelle Q.Wang - JuliaHirschberg + Michelle Q.Wang + JuliaHirschberg H91-1074 wang-hirschberg-1991-predicting @@ -602,34 +602,34 @@ Adaptive Natural Language Processing - RalphWeischedel + RalphWeischedel H91-1079 weischedel-1991-adaptive Research in Continuous Speech Recognition - JohnMakhoul - RichardSchwartz + JohnMakhoul + RichardSchwartz H91-1080 makhoul-schwartz-1991-research Spoken Language Systems - JohnMakhoul + JohnMakhoul H91-1081 makhoul-1991-spoken Evaluating the Use of Prosodic Information in Speech Recognition and Understanding - MariOstendorf - PattiPrice + MariOstendorf + PattiPrice H91-1082 ostendorf-price-1991-evaluating Segment-Based Acoustic Models with Multi-level Search Algorithms for Continuous Speech Recognition - MariOstendorf - J. RobinRohlicek + MariOstendorf + J. RobinRohlicek H91-1083 ostendorf-rohlicek-1991-segment @@ -653,14 +653,14 @@ Interactive Multimedia Explanation for Equipment Maintenance and Repair - KathleenMcKeown - StevenFeiner + KathleenMcKeown + StevenFeiner H91-1087 mckeown-feiner-1991-interactive Progress Report for <fixed-case>DARPA</fixed-case> <fixed-case>SLS</fixed-case> Program at <fixed-case>D</fixed-case>ragon <fixed-case>S</fixed-case>ystems, <fixed-case>I</fixed-case>nc. - JanetBaker + JanetBaker LarryGillick H91-1088 baker-gillick-1991-progress @@ -673,39 +673,39 @@ Spoken Language Recognition and Understanding - Victor W.Zue - LynetteHirschman + Victor W.Zue + LynetteHirschman H91-1090 zue-hirschman-1991-spoken Robust Speech Recognition Technology Program Summary - Clifford J.Weinstein + Clifford J.Weinstein Douglas B.Paul H91-1091 weinstein-paul-1991-robust <fixed-case>NIST</fixed-case>-<fixed-case>DARPA</fixed-case> Interagency Agreement: <fixed-case>SLS</fixed-case> Program - David S.Pallett + David S.Pallett H91-1092 pallett-1991-nist Evaluating Text Understanding Systems - Beth M.Sundheim + Beth M.Sundheim H91-1093 sundheim-1991-evaluating Active Knowledge Structures in Natural Language Understanding - YorickWilks + YorickWilks H91-1094 wilks-1991-active Robust and Portable Text Processing - RalphGrishman + RalphGrishman H91-1095 grishman-1991-robust @@ -723,27 +723,27 @@ Real-Time Speech Recognition System - HyMurveit - MitchelWeintraub + HyMurveit + MitchelWeintraub H91-1098 murveit-weintraub-1991-real <fixed-case>SRI</fixed-case>’s Real-Time Spoken Language System - PattiPrice - Robert C.Moore + PattiPrice + Robert C.Moore H91-1099 price-moore-1991-sris <fixed-case>TACITUS</fixed-case>: The Abductive Commonsense Inference-based Text Understanding System - Jerry R.Hobbs + Jerry R.Hobbs H91-1100 hobbs-1991-tacitus Linguistic Knowledge Sources for Spoken Language Understanding - Deborah A.Dahl + Deborah A.Dahl H91-1101 dahl-1991-linguistic @@ -755,29 +755,29 @@ Natural Language Research - Aravind K.Joshi - MitchMarcus - MarkSteedman - BonnieWebber + Aravind K.Joshi + MitchMarcus + MarkSteedman + BonnieWebber H91-1103 joshi-etal-1991-natural Very Large Annotated Database of <fixed-case>A</fixed-case>merican <fixed-case>E</fixed-case>nglish - MitchMarcus + MitchMarcus H91-1104 marcus-1991-large Natural Language, Knowledge Representation and Discourse - James F.Allen - Lenhart K.Schubert + James F.Allen + Lenhart K.Schubert H91-1105 allen-schubert-1991-natural The <fixed-case>P</fixed-case>enman Natural Language Project Systemics-Based Machine Translation - EduardHovy + EduardHovy H91-1106 hovy-1991-penman diff --git a/data/xml/H92.xml b/data/xml/H92.xml index 12959390f3..d3fdaf510d 100644 --- a/data/xml/H92.xml +++ b/data/xml/H92.xml @@ -12,69 +12,69 @@ Overview of the Fifth <fixed-case>DARPA</fixed-case> Speech and Natural Language Workshop - Mitchell P.Marcus + Mitchell P.Marcus H92-1001 marcus-1992-overview Session <fixed-case>I</fixed-case>: Evaluating Spoken Language - James F.Allen + James F.Allen H92-1002 allen-1992-session Multi-Site Data Collection for a Spoken Language Corpus - LynetteHirschman + LynetteHirschman H92-1003 hirschman-1992-multi <fixed-case>DARPA</fixed-case> <fixed-case>F</fixed-case>ebruary 1992 <fixed-case>ATIS</fixed-case> Benchmark Test Results - David S.Pallett + David S.Pallett Nancy L.Dahlgren - Jonathan G.Fiscus - William M.Fisher - John S.Garofolo + Jonathan G.Fiscus + William M.Fisher + John S.Garofolo Brett C.Tjaden H92-1004 pallett-etal-1992-darpa Experiments in Evaluating Interactive Spoken Language Systems - JosephPolifroni - LynetteHirschman - StephanieSeneff - VictorZue + JosephPolifroni + LynetteHirschman + StephanieSeneff + VictorZue H92-1005 polifroni-etal-1992-experiments Subject-Based Evaluation Measures for Interactive Spoken Language Systems - PattiPrice - LynetteHirschman - ElizabethShriberg + PattiPrice + LynetteHirschman + ElizabethShriberg ElizabethWade H92-1006 price-etal-1992-subject Session 2: Spoken Language Systems <fixed-case>II</fixed-case> - WayneWard + WayneWard H92-1007 ward-1992-session Spontaneous Speech Collection for the <fixed-case>ATIS</fixed-case> Domain with an Aural User Feedback Paradigm ChristinePao - JayWilpon + JayWilpon H92-1008 pao-wilpon-1992-spontaneous Human-Machine Problem Solving Using Spoken Language Systems (<fixed-case>SLS</fixed-case>): Factors Affecting Performance and User Satisfaction - ElizabethShriberg + ElizabethShriberg ElizabethWade - PattiPrice + PattiPrice H92-1009 shriberg-etal-1992-human @@ -86,21 +86,21 @@
Experiences Collecting Genuine Spoken Enquiries using <fixed-case>WOZ</fixed-case> Techniques - RogerMoore + RogerMoore AngelaMorris H92-1011 moore-morris-1992-experiences Session 3: Spoken Language Systems <fixed-case>III</fixed-case> - JohnMakhoul + JohnMakhoul H92-1012 makhoul-1992-session Progress Report on the <fixed-case>C</fixed-case>hronus System: <fixed-case>ATIS</fixed-case> Benchmark Results - RobertoPieraccini - EvelyneTzoukermann + RobertoPieraccini + EvelyneTzoukermann ZakharGorelov EstherLevin Chin-HuiLee @@ -110,64 +110,64 @@ <fixed-case>BBN</fixed-case> <fixed-case>BYBLOS</fixed-case> and <fixed-case>HARC</fixed-case> <fixed-case>F</fixed-case>ebruary 1992 <fixed-case>ATIS</fixed-case> Benchmark Results - FrancisKubala - ChrisBarry - MadeleineBates - RobertBobrow + FrancisKubala + ChrisBarry + MadeleineBates + RobertBobrow PascaleFung - RobertIngria - JohnMakhoul - LongNguyen - RichardSchwartz - DavidStallard + RobertIngria + JohnMakhoul + LongNguyen + RichardSchwartz + DavidStallard H92-1014 kubala-etal-1992-bbn Speech Understanding in Open Tasks - WayneWard + WayneWard SunilIssar - XuedongHuang - Hsiao-WuenHon - Mei-YuhHwang - SherylYoung + XuedongHuang + Hsiao-WuenHon + Mei-YuhHwang + SherylYoung MikeMatessa Fu-HuaLiu - RichardStern + RichardStern H92-1015 ward-etal-1992-speech The <fixed-case>MIT</fixed-case> <fixed-case>ATIS</fixed-case> System: <fixed-case>F</fixed-case>ebruary 1992 Progress Report - VictorZue - JamesGlass + VictorZue + JamesGlass DavidGoddeau - DavidGoodine - LynetteHirschman - MichaelPhillips - JosephPolifroni - StephanieSeneff + DavidGoodine + LynetteHirschman + MichaelPhillips + JosephPolifroni + StephanieSeneff H92-1016 zue-etal-1992-mit Recent Improvements and Benchmark Results for <fixed-case>P</fixed-case>aramax <fixed-case>ATIS</fixed-case> System - Lewis M.Norton - Deborah A.Dahl - Marcia C.Linebarger + Lewis M.Norton + Deborah A.Dahl + Marcia C.Linebarger H92-1017 norton-etal-1992-recent <fixed-case>SRI</fixed-case> <fixed-case>I</fixed-case>nternational Results <fixed-case>F</fixed-case>ebruary 1992 <fixed-case>ATIS</fixed-case> Benchmark Test - Douglas E.Appelt - EricJackson + Douglas E.Appelt + EricJackson H92-1018 appelt-jackson-1992-sri Session 4: Statistical Language Modeling - Aravind K.Joshi + Aravind K.Joshi H92-1019 joshi-1992-session @@ -182,8 +182,8 @@
Improvements in Stochastic Language Modeling - RonaldRosenfeld - XuedongHuang + RonaldRosenfeld + XuedongHuang H92-1021 rosenfeld-huang-1992-improvements @@ -195,11 +195,11 @@
Decision Tree Models Applied to the Labeling of Text with Parts-of-Speech - EzraBlack - FredJelinek - JohnLafferty + EzraBlack + FredJelinek + JohnLafferty RobertMercer - SalimRoukos + SalimRoukos H92-1023 black-etal-1992-decision @@ -212,19 +212,19 @@
Probabilistic Prediction and Picky Chart Parsing - David M.Magerman + David M.Magerman CarlWeir H92-1025 magerman-weir-1992-probabilistic Towards History-based Grammars: Using Richer Models for Probabilistic Parsing - EzraBlack - FredJelinek - JohnLafferty - David M.Magerman + EzraBlack + FredJelinek + JohnLafferty + David M.Magerman RobertMercer - SalimRoukos + SalimRoukos H92-1026 black-etal-1992-towards @@ -239,34 +239,34 @@ KevinMark MichaelMiller UlfGrenander - SteveAbney + SteveAbney H92-1028 mark-etal-1992-parameter
An Analogical Parser for Restricted Domains - DonaldHindle + DonaldHindle H92-1029 hindle-1992-analogical Automatically Acquiring Phrase Structure Using Distributional Analysis EricBrill - MitchellMarcus + MitchellMarcus H92-1030 brill-marcus-1992-automatically Session 5<fixed-case>A</fixed-case>: Acoustic Modeling - HyMurveit + HyMurveit H92-1031 murveit-1992-session Recent Topics in Speech Recognition Research at <fixed-case>NTT</fixed-case> <fixed-case>L</fixed-case>aboratories - SadaokiFurui + SadaokiFurui KiyohiroShikano - ShoichiMatsunaga + ShoichiMatsunaga TatsuoMatsuoka SatoshiTakahashi TomokazuYamada @@ -275,15 +275,15 @@ Vocabulary and Environment Adaptation in Vocabulary-Independent Speech Recognition - Hsiao-WuenHon - Kai-FuLee + Hsiao-WuenHon + Kai-FuLee H92-1033 hon-lee-1992-vocabulary Subphonetic Modeling for Speech Recognition - Mei-YuhHwang - XuedongHuang + Mei-YuhHwang + XuedongHuang H92-1034 hwang-huang-1992-subphonetic @@ -305,15 +305,15 @@
Minimizing Speaker Variation Effects for Speaker-Independent Speech Recognition - XuedongHuang + XuedongHuang H92-1037 huang-1992-minimizing Recognition Using Classification and Segmentation Scoring - OwenKimball - MariOstendorf - RobinRohlicek + OwenKimball + MariOstendorf + RobinRohlicek H92-1038 kimball-etal-1992-recognition @@ -325,13 +325,13 @@
Information Retrieval Using Robust Natural Language Processing - TomekStrzalkowski + TomekStrzalkowski H92-1040 strzalkowski-1992-information Feature Selection and Feature Extraction for Text Categorization - David D.Lewis + David D.Lewis H92-1041 lewis-1992-feature @@ -343,62 +343,62 @@
Classifying Texts Using Relevancy Signatures - EllenRiloff - WendyLehnert + EllenRiloff + WendyLehnert H92-1043 riloff-lehnert-1992-classifying Session 6: Lexicon and Lexical Semantics - Paul S.Jacobs + Paul S.Jacobs H92-1044 jacobs-1992-session One Sense Per Discourse - William A.Gale - Kenneth W.Church + William A.Gale + Kenneth W.Church DavidYarowsky H92-1045 gale-etal-1992-one Lexical Disambiguation using Simulated Annealing - JimCowie + JimCowie JoeGuthrie - LouiseGuthrie + LouiseGuthrie H92-1046 cowie-etal-1992-lexical The Acquisition of Lexical Semantic Knowledge from Large Corpora - JamesPustejovsky + JamesPustejovsky H92-1047 pustejovsky-1992-acquisition Session 7: Demonstrations and Videos - Victor W.Zue + Victor W.Zue H92-1048 zue-1992-session <fixed-case>BBN</fixed-case> Real-Time Speech Recognition Demonstrations - SteveAustin - RustyBobrow + SteveAustin + RustyBobrow DanEllard - RobertIngria - JohnMakhoul - LongNguyen + RobertIngria + JohnMakhoul + LongNguyen PatPeterson - PaulPlaceway - RichardSchwartz + PaulPlaceway + RichardSchwartz H92-1049 austin-etal-1992-bbn Session 8<fixed-case>A</fixed-case>: Machine Translation - Jerry R.Hobbs + Jerry R.Hobbs H92-1050 hobbs-1992-session @@ -406,36 +406,36 @@ Interaction between Structural Changes in Machine Translation SatoshiKinoshita JohnPhillips - Jun-ichiTsujii + Jun-ichiTsujii H92-1051 kinoshita-etal-1992-interaction
Approximating an Interlingua in a Principled Way - EduardHovy - SergeiNirenburg + EduardHovy + SergeiNirenburg H92-1052 hovy-nirenburg-1992-approximating Dividing and Conquering Long Sentences in a Translation System - Peter F.Brown - Stephen A.Della Pietra - Vincent J.Della Pietra - Robert L.Mercer + Peter F.Brown + Stephen A.Della Pietra + Vincent J.Della Pietra + Robert L.Mercer SuryaMohanty H92-1053 brown-etal-1992-dividing Session 8<fixed-case>B</fixed-case>: Robust Speech Processing - Jordan R.Cohen + Jordan R.Cohen H92-1054 cohen-1992-session Multiple Approaches to Robust Speech Recognition - Richard M.Stern + Richard M.Stern Fu-HuaLiu YoshiakiOhshima Thomas M.Sullivan @@ -445,9 +445,9 @@ Reduced Channel Dependence for Speech Recognition - HyMurveit + HyMurveit JohnButzberger - MitchWeintraub + MitchWeintraub H92-1056 murveit-etal-1992-reduced @@ -468,38 +468,38 @@
Session 9: Natural Language Processings - KathleenMcKeown + KathleenMcKeown H92-1059 mckeown-1992-session A Relaxation Method for Understanding Speech Utterances - StephanieSeneff + StephanieSeneff H92-1060 seneff-1992-relaxation Fragment Processing in the <fixed-case>DELPHI</fixed-case> System - DavidStallard - RobertBobrow + DavidStallard + RobertBobrow H92-1061 stallard-bobrow-1992-fragment Syntactic/Semantic Coupling in the <fixed-case>BBN</fixed-case> <fixed-case>DELPHI</fixed-case> System - RobertBobrow - RobertIngria - DavidStallard + RobertBobrow + RobertIngria + DavidStallard H92-1062 bobrow-etal-1992-syntactic A New Approach to Text Understanding - RalphWeischedel - DamarisAyuso - SeanBoisen - HeidiFox - RobertIngria + RalphWeischedel + DamarisAyuso + SeanBoisen + HeidiFox + RobertIngria H92-1063 weischedel-etal-1992-new @@ -511,7 +511,7 @@
Session 10: Large Vocabulary <fixed-case>CSR</fixed-case> - George R.Doddington + George R.Doddington H92-1065 doddington-1992-session @@ -537,56 +537,56 @@ Spontaneous Speech Effects In Large Vocabulary Speech Recognition Applications JohnButzberger - HyMurveit - ElizabethShriberg - PattiPrice + HyMurveit + ElizabethShriberg + PattiPrice H92-1068 butzberger-etal-1992-spontaneous Speaker-Independent Phone Recognition Using <fixed-case>BREF</fixed-case> Jean-LucGauvain - Lori F.Lamel + Lori F.Lamel H92-1069 gauvain-lamel-1992-speaker Session 1<fixed-case>O</fixed-case>b: Core <fixed-case>NL</fixed-case> Lexicon and Grammar - MarkLiberman + MarkLiberman H92-1070 liberman-1992-session A National Resource Grammar - Jerry R.Hobbs + Jerry R.Hobbs H92-1071 hobbs-1992-national Session 11: Continuous Speech Recognition and Evaluation <fixed-case>I</fixed-case> - Clifford J.Weinstein + Clifford J.Weinstein H92-1072 weinstein-1992-session The Design for the <fixed-case>W</fixed-case>all <fixed-case>S</fixed-case>treet <fixed-case>J</fixed-case>ournal-based <fixed-case>CSR</fixed-case> Corpus Douglas B.Paul - Janet M.Baker + Janet M.Baker H92-1073 paul-baker-1992-design <fixed-case>CSR</fixed-case> Corpus Development - George R.Doddington + George R.Doddington H92-1074 doddington-1992-csr Collection and Analyses of <fixed-case>WSJ</fixed-case>-<fixed-case>CSR</fixed-case> Data at <fixed-case>MIT</fixed-case> - MichaelPhillips - JamesGlass - JosephPolifroni - VictorZue + MichaelPhillips + JamesGlass + JosephPolifroni + VictorZue H92-1075 phillips-etal-1992-collection @@ -599,20 +599,20 @@
Session 12: Continuous Speech Recognition and Evaluation <fixed-case>II</fixed-case> - Clifford J.Weinstein + Clifford J.Weinstein H92-1077 weinstein-1992-session-12 <fixed-case>DARPA</fixed-case> <fixed-case>F</fixed-case>ebruary 1992 Pilot Corpus <fixed-case>CSR</fixed-case> “Dry Run” Benchmark Test Results - David S.Pallett + David S.Pallett H92-1078 pallett-1992-darpa Large Vocabulary Recognition of <fixed-case>W</fixed-case>all <fixed-case>S</fixed-case>treet <fixed-case>J</fixed-case>ournal Sentences at <fixed-case>D</fixed-case>ragon <fixed-case>S</fixed-case>ystems - JamesBaker - JanetBaker + JamesBaker + JanetBaker PaulBamberg KathleenBishop LarryGillick @@ -651,30 +651,30 @@ Performance of <fixed-case>SRI</fixed-case>’s <fixed-case>DECIPHER</fixed-case>™ Speech Recognition System on <fixed-case>DARPA</fixed-case>’s <fixed-case>CSR</fixed-case> Task - HyMurveit + HyMurveit JohnButzberger - MitchWeintraub + MitchWeintraub H92-1083 murveit-etal-1992-performance Session 13: Prosody - PattiPrice - JuliaHirschberg + PattiPrice + JuliaHirschberg H92-1084 price-hirschberg-1992-session Automatic Detection and Correction of Repairs in Human-Computer Dialog - ElizabethShriberg - JohnBear - JohnDowding + ElizabethShriberg + JohnBear + JohnDowding H92-1085 shriberg-etal-1992-automatic Prosodic Structure, Performance Structure and Phrase Structure - StevenAbney + StevenAbney H92-1086 abney-1992-prosodic @@ -687,17 +687,17 @@
Towards Using Prosody in Speech Recognition/Understanding Systems: Differences Between Read and Spontaneous Speech - Kim E.A.Silverman + Kim E.A.Silverman EleonoraBlaauw JudithSpitz - John F.Pitrelli + John F.Pitrelli H92-1088 silverman-etal-1992-towards Intonational Features of Local and Global Discourse Structure - JuliaHirschberg - BarbaraGrosz + JuliaHirschberg + BarbaraGrosz H92-1089 hirschberg-grosz-1992-intonational @@ -722,61 +722,61 @@
Weight Estimation for <fixed-case>N</fixed-case>-Best Rescoring - AshvinKannan - MariOstendorf - J. RobinRohlicek + AshvinKannan + MariOstendorf + J. RobinRohlicek H92-1093 kannan-etal-1992-weight Augmenting With Slot Filler Relevancy Signatures Data - EllenRiloff - WendyLehnert + EllenRiloff + WendyLehnert H92-1094 riloff-lehnert-1992-augmenting Language Understanding Research at <fixed-case>P</fixed-case>aramax - Deborah A.Dahl + Deborah A.Dahl CarlWeir - Suzanne LiebowitzTaylor - Lewis M.Norton - Marcia C.Linebarger + Suzanne LiebowitzTaylor + Lewis M.Norton + Marcia C.Linebarger MarkLipshutz H92-1095 dahl-etal-1992-language Development of a Spoken Language System - JohnMakhoul - MadeleineBates + JohnMakhoul + MadeleineBates H92-1096 makhoul-bates-1992-development Robust Continuous Speech Recognition - JohnMakhoul - RichardSchwartz + JohnMakhoul + RichardSchwartz H92-1097 makhoul-schwartz-1992-robust Robustness, Portability, and Scalability Language Systems - RalphWeischedel + RalphWeischedel H92-1098 weischedel-1992-robustness Evaluating the Use of Prosodic Information in Speech Recognition and Understanding - MariOstendorf - PattiPrice + MariOstendorf + PattiPrice H92-1099 ostendorf-price-1992-evaluating Segment-Based Acoustic Models with Multi-level Search Algorithms for Continuous Speech Recognition - MariOstendorf - J. RobinRohlicek + MariOstendorf + J. RobinRohlicek H92-1100 ostendorf-rohlicek-1992-segment @@ -794,15 +794,15 @@
Extracting Constraints on Word Usage from Large Text Corpora - KathleenMcKeown - DianeLitman - RebeccaPassonneau + KathleenMcKeown + DianeLitman + RebeccaPassonneau H92-1103 mckeown-etal-1992-extracting Research in Continuous Speech Recognition at <fixed-case>D</fixed-case>ragon <fixed-case>S</fixed-case>ystems Under the <fixed-case>DARPA</fixed-case> <fixed-case>SLS</fixed-case> Program - JanetBaker + JanetBaker LarryGillick RobertRoth H92-1104 @@ -822,8 +822,8 @@ Spoken Language Recognition and Understanding - VictorZue - LynetteHirschman + VictorZue + LynetteHirschman H92-1107 zue-hirschman-1992-spoken @@ -835,20 +835,20 @@
Robust Continuous Speech Recognition Technology Program Summary - Clifford J.Weinstein + Clifford J.Weinstein Douglas B.Paul H92-1109 weinstein-paul-1992-robust <fixed-case>NIST</fixed-case>-<fixed-case>DARPA</fixed-case> Interagency Agreement: Spoken Language Program - David S.Pallett + David S.Pallett H92-1110 pallett-1992-nist Evaluating Text Understanding Systems - Beth M.Sundheim + Beth M.Sundheim H92-1111 sundheim-1992-evaluating @@ -874,7 +874,7 @@
Research in Natural Language Processing - RalphGrishman + RalphGrishman H92-1115 grishman-1992-research @@ -886,7 +886,7 @@
Annotation of <fixed-case>ATIS</fixed-case> Data - KateHunicke-Smith + KateHunicke-Smith JaredBernstein H92-1117 hunicke-smith-bernstein-1992-annotation @@ -900,41 +900,41 @@ Real-Time Speech Recognition System - MitchelWeintraub + MitchelWeintraub H92-1119 weintraub-1992-real A Real-Time Spoken-Language System for Interactive Problem Solving - PattiPrice - Robert C.Moore + PattiPrice + Robert C.Moore H92-1120 price-moore-1992-real <fixed-case>TACITUS</fixed-case>: Research in Text Understanding - Jerry R.Hobbs + Jerry R.Hobbs H92-1121 hobbs-1992-tacitus <fixed-case>NLP</fixed-case> and Text Analysis at the <fixed-case>U</fixed-case>niversity of <fixed-case>M</fixed-case>assachusetts - Wendy G.Lehnert + Wendy G.Lehnert H92-1122 lehnert-1992-nlp Natural Language Research - AravindJoshi - MitchMarcus - MarkSteedman - BonnieWebber + AravindJoshi + MitchMarcus + MarkSteedman + BonnieWebber H92-1123 joshi-etal-1992-natural In-Depth Knowledge-Based Machine Translation - EduardHovy + EduardHovy H92-1124 hovy-1992-depth diff --git a/data/xml/H93.xml b/data/xml/H93.xml index c59da7b6e6..1428aed30c 100644 --- a/data/xml/H93.xml +++ b/data/xml/H93.xml @@ -12,22 +12,22 @@ Overview of the <fixed-case>ARPA</fixed-case> Human Language Technology Workshop - MadeleineBates + MadeleineBates H93-1001 bates-1993-overview Session 1: Spoken Language Systems - Alexander I.Rudnicky + Alexander I.Rudnicky H93-1002 rudnicky-1993-session Benchmark Tests for the <fixed-case>DARPA</fixed-case> Spoken Language Program - David S.Pallett + David S.Pallett Johathan G.Fiscus - William M.Fisher - John S.Garofolo + William M.Fisher + John S.Garofolo H93-1003 pallett-etal-1993-benchmark @@ -48,8 +48,8 @@
The <fixed-case>HCRC</fixed-case> Map Task Corpus: Natural Dialogue for Speech Recognition - Henry S.Thompson - AnneAnderson + Henry S.Thompson + AnneAnderson Ellen GurmanBard GwynethDoherty-Sneddon AlisonNewlands @@ -59,28 +59,28 @@ A Portable Approach to Last Resort Parsing and Interpretation - Marcia C.Linebarger - Lewis M.Norton - Deborah A.Dahl + Marcia C.Linebarger + Lewis M.Norton + Deborah A.Dahl H93-1006 linebarger-etal-1993-portable The Semantic Linker- A New Fragment Combining Method - DavidStallard - RobertBobrow + DavidStallard + RobertBobrow H93-1007 stallard-bobrow-1993-semantic <fixed-case>G</fixed-case>emini: A Natural Language System for Spoken-Language Understanding - JohnDowding - Jean MarkGawron - DougAppelt - JohnBear + JohnDowding + Jean MarkGawron + DougAppelt + JohnBear LynnCherny - RobertMoore - DougMoran + RobertMoore + DougMoran H93-1008 dowding-etal-1993-gemini @@ -96,14 +96,14 @@
Session 2: Invited Overviews - MadeleineBates + MadeleineBates H93-1010 bates-1993-session Survey of the <fixed-case>M</fixed-case>essage <fixed-case>U</fixed-case>nderstanding <fixed-case>C</fixed-case>onferences - Beth M.Sundheim - Nancy A.Chinchor + Beth M.Sundheim + Nancy A.Chinchor H93-1011 sundheim-chinchor-1993-survey @@ -122,75 +122,75 @@ Efficient Cepstral Normalization for Robust Speech Recognition Fu-HuaLiu - Richard M.Stern - XuedongHuang + Richard M.Stern + XuedongHuang AlejandroAcero H93-1014 liu-etal-1993-efficient Comparative Experiments on Large Vocabulary Speech Recognition - RichardSchwartz + RichardSchwartz TasosAnastasakos - FrancisKubala - JohnMakhoul - LongNguyen - GeorgeZavaliagkos + FrancisKubala + JohnMakhoul + LongNguyen + GeorgeZavaliagkos H93-1015 schwartz-etal-1993-comparative An Overview of the <fixed-case>SPHINX</fixed-case>-<fixed-case>II</fixed-case> Speech Recognition System - XuedongHuang - FilenoAlleva - Mei-YuhHwang - RonaldRosenfeld + XuedongHuang + FilenoAlleva + Mei-YuhHwang + RonaldRosenfeld H93-1016 huang-etal-1993-overview Progressive-Search Algorithms for Large-Vocabulary Speech Recognition - HyMurveit + HyMurveit JohnButzberger - VassiliosDigalakis - MitchWeintraub + VassiliosDigalakis + MitchWeintraub H93-1017 murveit-etal-1993-progressive Search Algorithms for Software-Only Real-Time Recognition with Very Large Vocabularies - LongNguyen - RichardSchwartz - FrancisKubala - PaulPlaceway + LongNguyen + RichardSchwartz + FrancisKubala + PaulPlaceway H93-1018 nguyen-etal-1993-search Identification of Non-Linguistic Speech Features Jean-LucGauvain - Lori F.Lamel + Lori F.Lamel H93-1019 gauvain-lamel-1993-identification On the Use of Tied-Mixture Distributions - OwenKimball - MariOstendorf + OwenKimball + MariOstendorf H93-1020 kimball-ostendorf-1993-use Adaptive Language Modeling Using the Maximum Entropy Principle RaymondLau - RonaldRosenfeld - SalimRoukos + RonaldRosenfeld + SalimRoukos H93-1021 lau-etal-1993-adaptive Improved Keyword-Spotting Using <fixed-case>SRI</fixed-case>’s <fixed-case>DECIPHER</fixed-case>™ Large-Vocabuarly Speech-Recognition System - MitchelWeintraub + MitchelWeintraub H93-1022 weintraub-1993-improved @@ -202,8 +202,8 @@ StephenLowe RobertRoth FrancescoScattone - JamesBaker - JanetBaker + JamesBaker + JanetBaker JohnBridle MelvynHunt JeremyOrloff @@ -212,22 +212,22 @@
Session 4: Natural Language - Robert C.Moore + Robert C.Moore H93-1024 moore-1993-session Heuristics for Broad-Coverage Natural Language Parsing - Michael C.McCord + Michael C.McCord H93-1025 mccord-1993-heuristics <fixed-case>FASTUS</fixed-case>: A System for Extracting Information from Text - Jerry R.Hobbs - DouglasAppelt - JohnBear - DavidIsrael + Jerry R.Hobbs + DouglasAppelt + JohnBear + DavidIsrael MegumiKameyalna MabryTyson H93-1026 @@ -236,67 +236,67 @@ Interpreting Temporal Adverbials Chung HeeHwang - Lenhart K.Schubert + Lenhart K.Schubert H93-1027 hwang-schubert-1993-interpreting The <fixed-case>M</fixed-case>urasaki Project: Multilingual Natural Language Understanding ChinatsuAone - HatteBlejer + HatteBlejer SharonFlank - DouglasMcKee + DouglasMcKee SandyShinn H93-1028 aone-etal-1993-murasaki Validation of Terminological Inference in an Information Extraction Task - MarcVilain + MarcVilain H93-1029 vilain-1993-validation Session 5: Discourse - Jerry R.Hobbs + Jerry R.Hobbs H93-1030 hobbs-1993-session Development, Implementation and Testing of a Discourse Model for Newspaper Texts - Elizabeth D.Liddy + Elizabeth D.Liddy Kenneth A.McVearry WoojinPaik - EdmundYu + EdmundYu MaryMcKenna H93-1031 liddy-etal-1993-development Indexing and Exploiting a Discourse History to Generate Context-Sensitive Explanations - Johanna D.Moore + Johanna D.Moore H93-1032 moore-1993-indexing Generic Plan Recognition for Dialogue Systems GeorgeFerguson - James F.Allen + James F.Allen H93-1033 ferguson-allen-1993-generic Efficient Collaborative Discourse: A Theory and Its Implementation - Alan W.Biermann - Curry I.Guinn + Alan W.Biermann + Curry I.Guinn D. RichardHipp - Ronnie W.Smith + Ronnie W.Smith H93-1034 biermann-etal-1993-efficient Machine Translation - AlexWaibel + AlexWaibel H93-1035 waibel-1993-machine @@ -309,7 +309,7 @@ <fixed-case>LINGSTAT</fixed-case>: An Interactive, Machine-Aided Translation System JonathanYamron - JamesBaker + JamesBaker PaulBamberg HaakonChevalier TaikoDietzel @@ -324,28 +324,28 @@ An <fixed-case>MAT</fixed-case> Tool and Its Effectiveness - RobertFrederking + RobertFrederking DeanGrannes PeterCousseau - SergeiNirenburg + SergeiNirenburg H93-1038 frederking-etal-1993-mat But Dictionaries Are Data Too - Peter F.Brown - Stephen A.Della Pietra - Vincent J.Della Pietra + Peter F.Brown + Stephen A.Della Pietra + Vincent J.Della Pietra Meredith J.Goldsmith - JanHajic - Robert L.Mercer + JanHajic + Robert L.Mercer SuryaMohanty H93-1039 brown-etal-1993-dictionaries Evaluation of Machine Translation - John S.White + John S.White Theresa A.O’Connell H93-1040 white-oconnell-1993-evaluation @@ -371,30 +371,30 @@ A Speech to Speech Translation System Built From Standard Components - MannyRayner - HiyanAlshawi + MannyRayner + HiyanAlshawi IvanBretan - DavidCarter - VassiliosDigalakis - BjornGamback + DavidCarter + VassiliosDigalakis + BjornGamback JaanKaja JussiKarlgren BertilLyberg StevePulman - PattiPrice + PattiPrice ChristerSamuelsson H93-1042 rayner-etal-1993-speech Session 7: Demonstrations - HyMurveit + HyMurveit H93-1043 murveit-1993-session Session 8: Statistical Language Modeling - MitchellMarcus + MitchellMarcus H93-1044 marcus-1993-session @@ -402,13 +402,13 @@ Example-Based Correction of Word Segmentation and Part of Speech Labelling TomoyoshiMatsukawa ScottMiller - RalphWeischedel + RalphWeischedel H93-1045 matsukawa-etal-1993-example Measures and Models for Phrase Recognition - StevenAbney + StevenAbney H93-1046 abney-1993-measures @@ -420,7 +420,7 @@
Prediction of Lexicalized Tree Fragments in Text - DonaldHindle + DonaldHindle H93-1048 hindle-1993-prediction @@ -432,7 +432,7 @@
Smoothing of Automatically Generated Selectional Constraints - RalphGrishman + RalphGrishman JohnSterling H93-1050 grishman-sterling-1993-smoothing @@ -441,7 +441,7 @@ Corpus-Based Statistical Sense Resolution ClaudiaLeacock GeoffreyTowell - EllenVoorhees + EllenVoorhees H93-1051 leacock-etal-1993-corpus @@ -453,7 +453,7 @@
Augmenting Lexicons Automatically: Clustering Semantically Related Adjectives - KathleenMcKeown + KathleenMcKeown VasileiosHatzivassiloglou H93-1053 mckeown-hatzivassiloglou-1993-augmenting @@ -466,13 +466,13 @@ Session 9: Government Panel - Carol J.Van Ess-Dykema + Carol J.Van Ess-Dykema H93-1055 van-ess-dykema-1993-session Projected Government Needs in Human Language Technology and the Role of Researchers in Meeting Them - Helen M.Gigley + Helen M.Gigley H93-1056 gigley-1993-projected @@ -490,14 +490,14 @@
Session 10: THE LEXICON - RalphGrishman + RalphGrishman H93-1059 grishman-1993-session The <fixed-case>COMLEX</fixed-case> Syntax Project - RalphGrishman - CatherineMacleod + RalphGrishman + CatherineMacleod SusanneWolff H93-1060 grishman-etal-1993-comlex @@ -514,8 +514,8 @@ Interpretation of Proper Nouns for Information Retrieval WoojinPaik - Elizabeth D.Liddy - EdmundYu + Elizabeth D.Liddy + EdmundYu MaryMcKenna H93-1062 paik-etal-1993-interpretation @@ -528,7 +528,7 @@ On Customizing Prosody in Speech Synthesis: Names and Addresses as a Case in Point - Kim E. A.Silverman + Kim E. A.Silverman H93-1064 silverman-1993-customizing @@ -540,8 +540,8 @@ A Speech-First Model for Repair Detection and Correction - ChristineNakatani - JuliaHirschberg + ChristineNakatani + JuliaHirschberg H93-1066 nakatani-hirschberg-1993-speech @@ -562,49 +562,49 @@
Document retrieval and text retrieval - KarenSparck Jones + KarenSparck Jones H93-1069 sparck-jones-1993-document The Importance of Proper Weighting Methods - ChrisBuckley + ChrisBuckley H93-1070 buckley-1993-importance Query Processing for Retrieval From Large Text Bases JohnBroglio - W. BruceCroft + W. BruceCroft H93-1071 broglio-croft-1993-query An Overview of <fixed-case>DR-LINK</fixed-case> and Its Approach to Document Filtering - Elizabeth D.Liddy + Elizabeth D.Liddy WoojinPaik - Edmund S.Yu + Edmund S.Yu Kenneth A.McVearry H93-1072 liddy-etal-1993-overview Session 13: New Directions - RalphWeischedel + RalphWeischedel H93-1073 weischedel-1993-session Mode preference in a simple data-retrieval task - Alexander I.Rudnicky + Alexander I.Rudnicky H93-1074 rudnicky-1993-mode A Simulation-Based Research Strategy for Designing Complex <fixed-case>NL</fixed-case> Systems - SharonOviatt - PhilipCohen - MichelleWang + SharonOviatt + PhilipCohen + MichelleWang JeremyGaston H93-1075 oviatt-etal-1993-simulation @@ -630,35 +630,35 @@ Robust Continuous Speech Recognition - JohnMakhoul - RichardSchwartz + JohnMakhoul + RichardSchwartz H93-1079 makhoul-schwartz-1993-robust Robustness, Portability, and Scalability of Natural Language Systems - RalphWeischedel + RalphWeischedel H93-1080 weischedel-1993-robustness Usable, Real-Time, Interactive Spoken Language Systems - JohnMakhoul - MadeleineBates + JohnMakhoul + MadeleineBates H93-1081 makhoul-bates-1993-usable Evaluating the Use of Prosodic Information in Speech Recognition and Understanding - MariOstendorf - PattiPrice + MariOstendorf + PattiPrice H93-1082 ostendorf-price-1993-evaluating Segment-Based Acoustic Models for Continuous Speech Recognition - MariOstendorf - J. RobinRohlicek + MariOstendorf + J. RobinRohlicek H93-1083 ostendorf-rohlicek-1993-segment @@ -670,8 +670,8 @@
Extracting Constraints on Word Usage from Large Text Corpora - KathleenMcKeown - RebeccaPassonneau + KathleenMcKeown + RebeccaPassonneau H93-1085 mckeown-passonneau-1993-extracting @@ -686,13 +686,13 @@ <fixed-case>LINGSTAT</fixed-case>: An Interactive, Machine-Aided Translation System JonathanYamron - JamesBaker + JamesBaker H93-1087 yamron-baker-1993-lingstat Research in Large Vocabulary Continuous Speech Recognition - JanetBaker + JanetBaker LarryGillick RobertRoth H93-1088 @@ -706,20 +706,20 @@ <fixed-case>M</fixed-case>atch<fixed-case>P</fixed-case>lus: A Context Vector System for Document Retrieval - Stephen L.Gallant + Stephen L.Gallant William R.Caid H93-1090 gallant-caid-1993-matchplus Applying Statistical Methods to Machine Translation - Peter F.Brown + Peter F.Brown H93-1091 brown-1993-applying Automatic Extraction of Grammars From Annotated Text - SalimRoukos + SalimRoukos H93-1092 roukos-1993-automatic @@ -731,27 +731,27 @@
Robust Continuous Speech Recognition Technology Program Summary - Clifford J.Weinstein + Clifford J.Weinstein Douglas B.Paul H93-1094 weinstein-paul-1993-robust Spoken Language Recognition and Understanding - VictorZue - LynetteHirschman + VictorZue + LynetteHirschman H93-1095 zue-hirschman-1993-spoken <fixed-case>NIST</fixed-case>-<fixed-case>DARPA</fixed-case> Interagency Agreement: Spoken Language Program - David S.Pallett + David S.Pallett H93-1096 pallett-1993-nist Information Extraction System Evaluation - Beth M.Sundheim + Beth M.Sundheim H93-1097 sundheim-1993-information @@ -777,13 +777,13 @@
Research in Natural Language Processing - RalphGrishman + RalphGrishman H93-1101 grishman-1993-research Robust Text Processing and Information Retrieval - TomekStrzalkowski + TomekStrzalkowski H93-1102 strzalkowski-1993-robust @@ -801,13 +801,13 @@
Exploiting Concept Spaces for Text Retrieval - Ellen M.Voorhees + Ellen M.Voorhees H93-1105 voorhees-1993-exploiting Annotation of <fixed-case>ATIS</fixed-case> Data - KateHunicke-Smith + KateHunicke-Smith JaredBernstein H93-1106 hunicke-smith-bernstein-1993-annotation @@ -821,58 +821,58 @@ A Real-Time Spoken-Language System for Interactive Problem Solving - PattiPrice - Robert C.Moore + PattiPrice + Robert C.Moore H93-1108 price-moore-1993-real High Performance Speech Recognition Using Consistency Modeling - VassiliosDigalakis - HyMurveit - MitchWeintraub + VassiliosDigalakis + HyMurveit + MitchWeintraub H93-1109 digalakis-etal-1993-high <fixed-case>DR</fixed-case>-<fixed-case>LINK</fixed-case>: Document Retrieval Using Linguistic Knowledge - Elizabeth D.Liddy - Sung H.Myaeng + Elizabeth D.Liddy + Sung H.Myaeng H93-1110 liddy-myaeng-1993-dr <fixed-case>UM</fixed-case>ass/Hughes <fixed-case>TIPSTER</fixed-case> Project on Extraction from Text - WendyLehnert - CharlesDolan + WendyLehnert + CharlesDolan H93-1111 lehnert-dolan-1993-umass Text Retrieval and Routing Techniques Based on an Inference Net - W. BruceCroft + W. BruceCroft H93-1112 croft-1993-text Natural Language Research - AravindJoshi - MitchMarcus - MarkSteedman - BonnieWebber + AravindJoshi + MitchMarcus + MarkSteedman + BonnieWebber H93-1113 joshi-etal-1993-natural Natural Language Planning Dialogue for Intelligent Applications - James F.Allen - LenSchubert + James F.Allen + LenSchubert H93-1114 allen-schubert-1993-natural The <fixed-case>P</fixed-case>enman Project on Knowledge-Based Machine Translation - EduardHovy + EduardHovy H93-1115 hovy-1993-penman diff --git a/data/xml/H94.xml b/data/xml/H94.xml index f4ad92516d..c21a24c947 100644 --- a/data/xml/H94.xml +++ b/data/xml/H94.xml @@ -12,7 +12,7 @@ Overview of the 1994 <fixed-case>ARPA</fixed-case> <fixed-case>H</fixed-case>uman <fixed-case>L</fixed-case>anguage <fixed-case>T</fixed-case>echnology Workshop - Clifford J.Weinstein + Clifford J.Weinstein H94-1001 weinstein-1994-overview @@ -24,15 +24,15 @@
The <fixed-case>C</fixed-case>omlex Syntax Project: The First Year - CatherineMacleod - RalphGrishman - AdamMeyers + CatherineMacleod + RalphGrishman + AdamMeyers H94-1003 macleod-etal-1994-comlex Lexicons for Human Language Technology - MarkLiberman + MarkLiberman H94-1004 liberman-1994-lexicons @@ -58,82 +58,82 @@
Corpus Development Activities at the <fixed-case>C</fixed-case>enter for <fixed-case>S</fixed-case>poken <fixed-case>L</fixed-case>anguage <fixed-case>U</fixed-case>nderstanding - RonCole + RonCole MikeNoel Daniel C.Burnett MarkFanty TerriLander - BeatriceOshika + BeatriceOshika StephenSutton H94-1008 cole-etal-1994-corpus The Hub and Spoke Paradigm for <fixed-case>CSR</fixed-case> Evaluation - FrancisKubala + FrancisKubala JeromeBellegarda - JordanCohen - DavidPallett + JordanCohen + DavidPallett DougPaul MikePhillips RajaRajasekaran FredRichardson - MichaelRiley + MichaelRiley RoniRosenfeld BobRoth - MitchWeintraub + MitchWeintraub H94-1009 kubala-etal-1994-hub Expanding the Scope of the <fixed-case>ATIS</fixed-case> Task: The <fixed-case>ATIS</fixed-case>-3 Corpus - Deborah A.Dahl - MadeleineBates - MichaelBrown - WilliamFisher - KateHunicke-Smith - DavidPallett + Deborah A.Dahl + MadeleineBates + MichaelBrown + WilliamFisher + KateHunicke-Smith + DavidPallett ChristinePao - AlexanderRudnicky - ElizabethShriberg + AlexanderRudnicky + ElizabethShriberg H94-1010 dahl-etal-1994-expanding 1993 Benchmark Tests for the <fixed-case>ARPA</fixed-case> Spoken Language Program - David S.Pallett - Jonathan G.Fiscus - William M.Fisher - John S.Garofolo + David S.Pallett + Jonathan G.Fiscus + William M.Fisher + John S.Garofolo Bruce A.Lund - Mark A.Przybocki + Mark A.Przybocki H94-1011 pallett-etal-1994-1993 Session 2: Language Modeling - XuedongHuang + XuedongHuang H94-1012 huang-1994-session A Hybrid Approach to Adaptive Statistical Language Modeling - RonaldRosenfeld + RonaldRosenfeld H94-1013 rosenfeld-1994-hybrid Language Modeling with Sentence-Level Mixtures RukminiIyer - MariOstendorf - J. RobinRohlicek + MariOstendorf + J. RobinRohlicek H94-1014 iyer-etal-1994-language Speech Recognition Using a Stochastic Language Model Integrating Local and Global Constraints RyosukeIsotani - ShoichiMatsunaga + ShoichiMatsunaga H94-1015 isotani-matsunaga-1994-speech @@ -150,26 +150,26 @@
Session 3: Human Language Evaluation - LynetteHirschman + LynetteHirschman H94-1017 hirschman-1994-session Towards Better <fixed-case>NLP</fixed-case> System Evaluation - KarenSparck Jones + KarenSparck Jones H94-1018 sparck-jones-1994-towards Automatic Evaluation of Computer Generated Text: A Progress Report on the <fixed-case>T</fixed-case>ext<fixed-case>E</fixed-case>val Project ChrisBrew - Henry S.Thompson + Henry S.Thompson H94-1019 brew-thompson-1994-automatic The <fixed-case>P</fixed-case>enn <fixed-case>T</fixed-case>reebank: Annotating Predicate Argument Structure - MitchellMarcus + MitchellMarcus GraceKim Mary AnnMarcinkiewicz RobertMacIntyre @@ -182,25 +182,25 @@ Whither Written Language Evaluation? - RalphGrishman + RalphGrishman H94-1021 grishman-1994-whither Semantic Evaluation for Spoken-Language Systems - Robert C.Moore + Robert C.Moore H94-1022 moore-1994-semantic Session 4: Machine Translation - EduardHovy + EduardHovy H94-1023 hovy-1994-session Evaluation in the <fixed-case>ARPA</fixed-case> Machine Translation Program: 1993 Methodology - John S.White + John S.White Theresa A.O’Connell H94-1024 white-oconnell-1994-evaluation @@ -208,33 +208,33 @@ Building <fixed-case>J</fixed-case>apanese-<fixed-case>E</fixed-case>nglish Dictionary based on Ontology for Machine Translation AkitoshiOkumura - EduardHovy + EduardHovy H94-1025 okumura-hovy-1994-building Toward Multi-Engine Machine Translation - SergeiNirenburg - RobertFrederking + SergeiNirenburg + RobertFrederking H94-1026 nirenburg-frederking-1994-toward Translating Collocations for Use in Bilingual Lexicons - FrankSmadja - KathleenMcKeown + FrankSmadja + KathleenMcKeown H94-1027 smadja-mckeown-1994-translating The <fixed-case>C</fixed-case>andide System for Machine Translation - Adam L.Berger - Peter F.Brown - Stephen A.Della Pietra - Vincent J.Della Pietra + Adam L.Berger + Peter F.Brown + Stephen A.Della Pietra + Vincent J.Della Pietra John R.Gillett - John D.Lafferty - Robert L.Mercer + John D.Lafferty + Robert L.Mercer HarryPrintz LubosUres H94-1028 @@ -252,58 +252,58 @@ Session 5: Natural Language, Discourse - Paul S.Jacobs + Paul S.Jacobs H94-1030 jacobs-1994-session Issues and Methodology for Template Design for Information Extraction - BoyanOnyshkevych + BoyanOnyshkevych H94-1031 onyshkevych-1994-issues Principles of Template Design - JerryHobbs - DavidIsrael + JerryHobbs + DavidIsrael H94-1032 hobbs-israel-1994-principles Pattern Matching in a Linguistically-Motivated Text Understanding System - Damaris M.Ayuso + Damaris M.Ayuso H94-1033 ayuso-1994-pattern Tagging Speech Repairs - Peter A.Heeman - JamesAllen + Peter A.Heeman + JamesAllen H94-1034 heeman-allen-1994-tagging Information Based Intonation Synthesis ScottPrevost - MarkSteedman + MarkSteedman H94-1035 prevost-steedman-1994-information Session 6: Spoken Language Systems - MadeleineBates + MadeleineBates H94-1036 bates-1994-session <fixed-case>PEGASUS</fixed-case>: A Spoken Language Interface for On-Line Air Travel Planning - VictorZue - StephanieSeneff - JosephPolifroni - MichaelPhillips + VictorZue + StephanieSeneff + JosephPolifroni + MichaelPhillips ChristinePao DavidGoddeau - JamesGlass + JamesGlass EricBrill H94-1037 zue-etal-1994-pegasus @@ -316,44 +316,44 @@ Recent Improvements in the <fixed-case>CMU</fixed-case> Spoken Language Understanding System - WayneWard + WayneWard SunilIssar H94-1039 ward-issar-1994-recent Combining Knowledge Sources to Reorder N-Best Speech Hypothesis Lists - MannyRayner - DavidCarter - VassiliosDigalakis - PattiPrice + MannyRayner + DavidCarter + VassiliosDigalakis + PattiPrice H94-1040 rayner-etal-1994-combining Predicting and Managing Spoken Disfluencies During Human-Computer Interaction - SharonOviatt + SharonOviatt H94-1041 oviatt-1994-predicting Integrated Techniques for Phrase Extraction From Speech - MarieMeteer - J. RobinRohlicek + MarieMeteer + J. RobinRohlicek H94-1042 meteer-rohlicek-1994-integrated Session 7: Demonstrations - VictorAbrash + VictorAbrash H94-1043 abrash-1994-session A Prototype Reading Coach that Listens: Summary of Project <fixed-case>LISTEN</fixed-case> - AlexHauptmann + AlexHauptmann JackMostow - Steven F.Roth + Steven F.Roth MatthewKane AdamSwift H94-1044 @@ -361,15 +361,15 @@ Session 8 &: 9: Statistical and Learning Methods - FrederickJelinek + FrederickJelinek H94-1045 jelinek-1994-session Using a Semantic Concordance for Sense Identification George A.Miller - MartinChodorow - ShariLandes + MartinChodorow + ShariLandes ClaudiaLeacock Robert G.Thomas H94-1046 @@ -377,16 +377,16 @@ A New Approach to Word Sense Disambiguation - RebeccaBruce - JanyceWiebe + RebeccaBruce + JanyceWiebe H94-1047 bruce-wiebe-1994-new A Maximum Entropy Model for Prepositional Phrase Attachment - AdwaitRatnaparkhi + AdwaitRatnaparkhi JeffReynar - SalimRoukos + SalimRoukos H94-1048 ratnaparkhi-etal-1994-maximum @@ -399,15 +399,15 @@ Weighted Rational Transductions and their Application to Human Language Processing FernandoPereira - MichaelRiley - RichardSproat + MichaelRiley + RichardSproat H94-1050 pereira-etal-1994-weighted Automatic Grammar Acquisition ScottMiller - Heidi J.Fox + Heidi J.Fox H94-1051 miller-fox-1994-automatic @@ -425,23 +425,23 @@ Statistical Language Processing Using Hidden Understanding Models ScottMiller - RichardSchwartz - RobertBobrow - RobertIngria + RichardSchwartz + RobertBobrow + RobertIngria H94-1053 miller-etal-1994-statistical <fixed-case>J</fixed-case>apanese Word Segmentation by Hidden <fixed-case>M</fixed-case>arkov Model - Constantine P.Papageorgiou + Constantine P.Papageorgiou H94-1054 papageorgiou-1994-japanese Phonological Parsing for Bi-directional Letter-to-Sound/Sound-to-Letter Generation - Helen M.Meng - StephanieSeneff - Victor W.Zue + Helen M.Meng + StephanieSeneff + Victor W.Zue H94-1055 meng-etal-1994-phonological @@ -459,7 +459,7 @@ Speech and Human Language Technology at the <fixed-case>N</fixed-case>aval <fixed-case>R</fixed-case>esearch <fixed-case>L</fixed-case>aboratory - Helen M.Gigley + Helen M.Gigley H94-1058 gigley-1994-speech @@ -472,13 +472,13 @@ Language Processing <fixed-case>R</fixed-case>&<fixed-case>D</fixed-case> Programmes Directorate <fixed-case>XIII E</fixed-case> of the <fixed-case>E</fixed-case>uropean <fixed-case>C</fixed-case>ommission RobertoCencioni - Giovanni BattistaVarile + Giovanni BattistaVarile H94-1060 cencioni-varile-1994-language Session 11: Acoustic Modeling and Robust <fixed-case>CSR</fixed-case> - SteveYoung + SteveYoung H94-1061 young-1994-session @@ -492,8 +492,8 @@
High-Accuracy Large-Vocabulary Speech Recognition Using Mixture Tying and Consistency Modeling - VassiliosDigalakis - HyMurveit + VassiliosDigalakis + HyMurveit H94-1063 digalakis-murveit-1994-high @@ -509,9 +509,9 @@ Adaptation to New Microphones Using Tied-Mixture Normalization AnastasiosAnastasakos - FrancisKubala - JohnMakhoul - RichardSchwartz + FrancisKubala + JohnMakhoul + RichardSchwartz H94-1065 anastasakos-etal-1994-adaptation @@ -527,7 +527,7 @@ Microphone-Independent Robust Signal Processing Using Probabilistic Optimum Filtering LeonardoNeumeyer - MitchelWeintraub + MitchelWeintraub H94-1067 neumeyer-weintraub-1994-microphone @@ -562,13 +562,13 @@
Document Representation in Natural Language Text Retrieval - TomekStrzalkowski + TomekStrzalkowski H94-1072 strzalkowski-1994-document Assessing the Retrieval Effectiveness of a Speech Retrieval System by Simulating Recognition Errors - PeterSchauble + PeterSchauble UlrikeGlavitsch H94-1073 schauble-glavitsch-1994-assessing @@ -589,7 +589,7 @@ Session 13: <fixed-case>CSR</fixed-case> Search - RichardSchwartz + RichardSchwartz H94-1076 schwartz-1994-session @@ -600,15 +600,15 @@ OsamuYoshioka SatoshiTakahashi TomokazuYamada - SadaokiFurui + SadaokiFurui H94-1077 minami-etal-1994-large
Techniques to Achieve an Accurate Real-Time Large-Vocabulary Speech Recognition System - HyMurveit + HyMurveit PeterMonaco - VassiliosDigalakis + VassiliosDigalakis JohnButzberger H94-1078 murveit-etal-1994-techniques @@ -630,16 +630,16 @@ Is N-Best Dead? - LongNguyen - RichardSchwartz + LongNguyen + RichardSchwartz YingZhao - GeorgeZavaliagkos + GeorgeZavaliagkos H94-1081 nguyen-etal-1994-n Session 14: New Directions/Applications - RichardStern + RichardStern H94-1082 stern-1994-session @@ -651,28 +651,28 @@
Integrated Text and Image Understanding for Document Understanding - SuzanneLiebowitz - Deborah A.Dahl + SuzanneLiebowitz + Deborah A.Dahl MarkLipshutz CarlWeir - Lewis M.Norton + Lewis M.Norton RoslynNilson - MarciaLinebarger + MarciaLinebarger H94-1084 liebowitz-etal-1994-integrated Use of Lexical and Syntactic Techniques in Recognizing Handwritten Text - Rohini K.Srihari + Rohini K.Srihari H94-1085 srihari-1994-use On-Line Cursive Handwriting Recognition Using Hidden <fixed-case>M</fixed-case>arkov Models and Statistical Grammars - JohnMakhoul + JohnMakhoul ThadStarner - RichardSchwartz - GeorgeChou + RichardSchwartz + GeorgeChou H94-1086 makhoul-etal-1994-line @@ -688,21 +688,21 @@
Robust Continuous Speech Recognition - JohnMakhoul - RichardSchwartz + JohnMakhoul + RichardSchwartz H94-1088 makhoul-schwartz-1994-robust Robustness, Portability and Scalability Language Systems - RalphWeischedel + RalphWeischedel H94-1089 weischedel-1994-robustness Usable, Real-Time, Interactive Spoken Language Systems - JohnMakhoul - MadeleineBates + JohnMakhoul + MadeleineBates H94-1090 makhoul-bates-1994-usable @@ -710,20 +710,20 @@ Evaluating the Use of Prosodic Information in Speech Recognition and Understanding M.Ostendorf P.Price - S. ShattuckHufnagel + S. ShattuckHufnagel H94-1091 ostendorf-etal-1994-evaluating
Segment-Based Acoustic Models for Continuous Speech Recognition - MariOstendorf - J. RobinRohlicek + MariOstendorf + J. RobinRohlicek H94-1092 ostendorf-rohlicek-1994-segment <fixed-case>P</fixed-case>angloss: A Machine Translation Project - SergeiNirenburg + SergeiNirenburg H94-1093 nirenburg-1994-pangloss @@ -735,8 +735,8 @@
Extracting Constraints on Word Usage from Large Text Corpora - KathleenMcKeown - RebeccaPassonneau + KathleenMcKeown + RebeccaPassonneau H94-1095 mckeown-passonneau-1994-extracting @@ -752,7 +752,7 @@
Research in Large Vocabulary Continuous Speech Recognition - JanetBaker + JanetBaker LarryGillick RobertRoth H94-1097 @@ -760,20 +760,20 @@ The Tipster/Shogun Project - Paul S.Jacobs + Paul S.Jacobs H94-1098 jacobs-1994-tipster Automatic Extraction of Grammars From Annotated Text - SalimRoukos + SalimRoukos H94-1099 roukos-1994-automatic <fixed-case>C</fixed-case>andide: A Statistical Machine Translation System - StephenDellaPietra - VincentDellaPietra + StephenDellaPietra + VincentDellaPietra H94-1100 dellapietra-dellapietra-1994-candide @@ -786,32 +786,32 @@
Robust Continuous Speech Recognition Technology Program Summary - Clifford J.Weinstein + Clifford J.Weinstein Douglas B.Paul H94-1102 weinstein-paul-1994-robust Spoken Language Recognition and Understanding - VictorZue + VictorZue H94-1103 zue-1994-spoken <fixed-case>NIST</fixed-case>-<fixed-case>ARPA</fixed-case> Interagency Agreement: Human Language Technology Program - David S.Pallett + David S.Pallett H94-1104 pallett-1994-nist Written Language System Evaluation - Beth M.Sundheim + Beth M.Sundheim H94-1105 sundheim-1994-written The <fixed-case>C</fixed-case>onsortium for <fixed-case>L</fixed-case>exical <fixed-case>R</fixed-case>esearch - LouiseGuthrie + LouiseGuthrie H94-1106 guthrie-1994-consortium @@ -831,13 +831,13 @@
Research in Natural Language Processing - RalphGrishman + RalphGrishman H94-1109 grishman-1994-research Robust Text Processing and Information Retrieval - TomekStrzalkowski + TomekStrzalkowski H94-1110 strzalkowski-1994-robust-text @@ -864,7 +864,7 @@
Annotation of <fixed-case>ATIS</fixed-case> Data - KateHunicke-Smith + KateHunicke-Smith JaredBernstein H94-1114 hunicke-smith-bernstein-1994-annotation @@ -872,16 +872,16 @@ Combining Linguistic and Statistical Technology for Improved Spoken Language Understanding MichaelCohen - RobertMoore + RobertMoore H94-1115 cohen-moore-1994-combining Consistency Modeling - HyMurveit - VassiliosDigalakis + HyMurveit + VassiliosDigalakis PeterMonaco - MitchWeintraub + MitchWeintraub H94-1116 murveit-etal-1994-consistency @@ -909,14 +909,14 @@ Natural Language Planning Dialogue for Interactive - James F.Allen - LenSchubert + James F.Allen + LenSchubert H94-1120 allen-schubert-1994-natural <fixed-case>PANGLOSS</fixed-case>: Knowledge-Based Machine Translation - EduardHovy + EduardHovy H94-1121 hovy-1994-pangloss diff --git a/data/xml/I05.xml b/data/xml/I05.xml index 5f5f10174d..bdbc235455 100644 --- a/data/xml/I05.xml +++ b/data/xml/I05.xml @@ -13,7 +13,7 @@ A New Method for Sentiment Classification in Text Retrieval YiHu - JianyongDuan + JianyongDuan XiaomingChen BingzhenPei RuzhanLu @@ -33,7 +33,7 @@ The Use of Monolingual Context Vectors for Missing Translations in Cross-Language Information Retrieval YanQu GregoryGrefenstette - David A.Evans + David A.Evans 10.1007/11562214_3 I05-1003 qu-etal-2005-use @@ -49,7 +49,7 @@ Corpus-Based Analysis of <fixed-case>J</fixed-case>apanese Relative Clause Constructions TakeshiAbekawa - ManabuOkumura + ManabuOkumura 10.1007/11562214_5 I05-1005 abekawa-okumura-2005-corpus @@ -64,8 +64,8 @@ Parsing the <fixed-case>P</fixed-case>enn <fixed-case>C</fixed-case>hinese Treebank with Semantic Knowledge - DeyiXiong - ShuanglongLi + DeyiXiong + ShuanglongLi QunLiu ShouxunLin YueliangQian @@ -82,7 +82,7 @@ Entropy as an Indicator of Context Boundaries: An Experiment Using a Web Search Engine - KumikoTanaka-Ishii + KumikoTanaka-Ishii 10.1007/11562214_9 I05-1009 tanaka-ishii-2005-entropy @@ -90,7 +90,7 @@ Automatic Discovery of Attribute Words from Web Documents KosukeTokunaga - Jun’ichiKazama + Jun’ichiKazama KentaroTorisawa 10.1007/11562214_10 I05-1010 @@ -98,7 +98,7 @@ Aligning Needles in a Haystack: Paraphrase Acquisition Across the Web - MariusPaşca + MariusPaşca PéterDienes 10.1007/11562214_11 I05-1011 @@ -117,14 +117,14 @@ Automatic Partial Parsing Rule Acquisition Using Decision Tree Induction Myung-SeokChoi Chul SuLim - Key-SunChoi + Key-SunChoi 10.1007/11562214_13 I05-1013 choi-etal-2005-automatic Chunking Using Conditional Random Fields in <fixed-case>K</fixed-case>orean Texts - Yong-HunLee + Yong-HunLee Mi-YoungKim Jong-HyeokLee 10.1007/11562214_14 @@ -133,7 +133,7 @@ High Efficiency Realization for a Wide-Coverage Unification Grammar - JohnCarroll + JohnCarroll StephanOepen 10.1007/11562214_15 I05-1015 @@ -143,7 +143,7 @@ Linguistically-Motivated Grammar Extraction, Generalization and Adaptation Yu-MingHsieh Duen-ChiYang - Keh-JiannChen + Keh-JiannChen 10.1007/11562214_16 I05-1016 hsieh-etal-2005-linguistically @@ -160,15 +160,15 @@ Adapting a Probabilistic Disambiguation Model of an <fixed-case>HPSG</fixed-case> Parser to a New Domain TadayoshiHara YusukeMiyao - Jun’ichiTsujii + Jun’ichiTsujii 10.1007/11562214_18 I05-1018 hara-etal-2005-adapting A Hybrid Approach to Single and Multiple <fixed-case>PP</fixed-case> Attachment Using <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et - AksharBharathi - U.Rohini + AksharBharathi + U.Rohini P.Vishnu S.M.Bendre RajeevSangal @@ -178,7 +178,7 @@ Period Disambiguation with Maxent Model - ChunyuKit + ChunyuKit XiaoyueLiu 10.1007/11562214_20 I05-1020 @@ -186,8 +186,8 @@ Acquiring Synonyms from Monolingual Comparable Texts - MitsuoShimohata - EiichiroSumita + MitsuoShimohata + EiichiroSumita 10.1007/11562214_21 I05-1021 shimohata-sumita-2005-acquiring @@ -230,7 +230,7 @@ YeNa ChangXinzhi ChenWenliang - Benjamin KTsou + Benjamin KTsou 10.1007/11562214_26 I05-1026 zhu-etal-2005-using @@ -239,7 +239,7 @@ Classifying <fixed-case>C</fixed-case>hinese Texts in Two Steps XinghuaFan MaosongSun - Key-sunChoi + Key-sunChoi QinZhang 10.1007/11562214_27 I05-1027 @@ -248,7 +248,7 @@ Assigning Polarity Scores to Reviews Using Machine Learning Techniques DaisukeOkanohara - Jun’ichiTsujii + Jun’ichiTsujii 10.1007/11562214_28 I05-1028 okanohara-tsujii-2005-assigning @@ -283,7 +283,7 @@ Finding Taxonomical Relation from an <fixed-case>MRD</fixed-case> for Thesaurus Extension SeonHwaChoi - HyukRoPark + HyukRoPark 10.1007/11562214_32 I05-1032 choi-park-2005-finding @@ -300,8 +300,8 @@ MinZhang JianSu DanmeiWang - GuodongZhou - Chew LimTan + GuodongZhou + Chew LimTan 10.1007/11562214_34 I05-1034 zhang-etal-2005-discovering @@ -309,9 +309,9 @@ Automatic Relation Extraction with Model Order Selection and Discriminative Label Identification ChenJinxiu - JiDonghong - TanChew Lim - NiuZhengyu + DonghongJi + Chew LimTan + ZhengyuNiu 10.1007/11562214_35 I05-1035 chen-etal-2005-automatic @@ -327,8 +327,8 @@ A Preliminary Work on Classifying Time Granularities of Temporal Questions WeiLi WenjieLi - QinLu - Kam-FaiWong + QinLu + Kam-FaiWong 10.1007/11562214_37 I05-1037 li-etal-2005-preliminary @@ -337,7 +337,7 @@ Classification of Multiple-Sentence Questions AkihiroTamura HiroyaTakamura - ManabuOkumura + ManabuOkumura 10.1007/11562214_38 I05-1038 tamura-etal-2005-classification @@ -353,8 +353,8 @@ An Ensemble of Grapheme and Phoneme for Machine Transliteration - Jong-HoonOh - Key-SunChoi + Jong-HoonOh + Key-SunChoi 10.1007/11562214_40 I05-1040 oh-choi-2005-ensemble @@ -388,8 +388,8 @@ Answering Definition Questions Using Web Knowledge Bases ZhushuoZhang YaqianZhou - XuanjingHuang - LideWu + XuanjingHuang + LideWu 10.1007/11562214_44 I05-1044 zhang-etal-2005-answering @@ -397,7 +397,7 @@ Exploring Syntactic Relation Patterns for Question Answering DanShen - Geert-Jan M.Kruijff + Geert-Jan M.Kruijff DietrichKlakow 10.1007/11562214_45 I05-1045 @@ -406,16 +406,16 @@ Web-Based Unsupervised Learning for Query Formulation in Question Answering Yi-ChiaWang - Jian-ChengWu + Jian-ChengWu TyneLiang - Jason S.Chang + Jason S.Chang 10.1007/11562214_46 I05-1046 wang-etal-2005-web A Chunking Strategy Towards Unknown Word Detection in <fixed-case>C</fixed-case>hinese Word Segmentation - ZhouGuoDong + GuoDongZhou 10.1007/11562214_47 I05-1047 zhou-2005-chunking @@ -432,7 +432,7 @@ Relative Compositionality of Multi-word Expressions: A Study of Verb-Noun (<fixed-case>V</fixed-case>-N) Collocations SriramVenkatapathy - Aravind K.Joshi + Aravind K.Joshi 10.1007/11562214_49 I05-1049 venkatapathy-joshi-2005-relative @@ -441,7 +441,7 @@ Automatic Extraction of Fixed Multiword Expressions CampbellHore MasayukiAsahara - YūjiMatsumoto + YūjiMatsumoto 10.1007/11562214_50 I05-1050 hore-etal-2005-automatic @@ -459,8 +459,8 @@ Why Is Zero Marking Important in <fixed-case>K</fixed-case>orean? Sun-HeeLee - Donna K.Byron - Seok BaeJang + Donna K.Byron + Seok BaeJang 10.1007/11562214_52 I05-1052 lee-etal-2005-zero @@ -478,9 +478,9 @@ Machine Translation Based on Constraint-Based Synchronous Grammar FaiWong - Dong-ChengHu - Yu-HangMao - Ming-ChuiDong + Dong-ChengHu + Yu-HangMao + Ming-ChuiDong Yi-PingLi 10.1007/11562214_54 I05-1054 @@ -489,7 +489,7 @@ A Machine Learning Approach to Sentence Ordering for Multidocument Summarization and Its Evaluation DanushkaBollegala - NaoakiOkazaki + NaoakiOkazaki MitsuruIshizuka 10.1007/11562214_55 I05-1055 @@ -498,8 +498,8 @@ Significant Sentence Extraction by <fixed-case>E</fixed-case>uclidean Distance Based on Singular Value Decomposition ChangbeomLee - HyukroPark - CheolyoungOck + HyukroPark + CheolyoungOck 10.1007/11562214_56 I05-1056 lee-etal-2005-significant @@ -509,7 +509,7 @@ SeonhoKim JuntaeYoon Kyung-MiPark - Hae-ChangRim + Hae-ChangRim 10.1007/11562214_57 I05-1057 kim-etal-2005-two @@ -517,16 +517,16 @@ Heuristic Methods for Reducing Errors of Geographic Named Entities Learned by Bootstrapping SeungwooLee - Gary GeunbaeLee + Gary GeunbaeLee 10.1007/11562214_58 I05-1058 lee-lee-2005-heuristic Building a <fixed-case>J</fixed-case>apanese-<fixed-case>C</fixed-case>hinese Dictionary Using Kanji/Hanzi Conversion - Chooi-LingGoh + Chooi-LingGoh MasayukiAsahara - YujiMatsumoto + YujiMatsumoto 10.1007/11562214_59 I05-1059 goh-etal-2005-building @@ -544,7 +544,7 @@ <fixed-case>CTEMP</fixed-case>: A <fixed-case>C</fixed-case>hinese Temporal Parser for Extracting and Normalizing Temporal Information WuMingli LiWenjie - LuQin + QinLu LiBaoli 10.1007/11562214_61 I05-1061 @@ -552,7 +552,7 @@ <fixed-case>F</fixed-case>rench-<fixed-case>E</fixed-case>nglish Terminology Extraction from Comparable Corpora - BéatriceDaille + BéatriceDaille EmmanuelMorin 10.1007/11562214_62 I05-1062 @@ -562,7 +562,7 @@ A Twin-Candidate Model of Coreference Resolution with Non-Anaphor Identification Capability XiaofengYang JianSu - Chew LimTan + Chew LimTan 10.1007/11562214_63 I05-1063 yang-etal-2005-twin @@ -571,7 +571,7 @@ Improving <fixed-case>K</fixed-case>orean Speech Acts Analysis by Using Shrinkage and Discourse Stack KyungsunKim YoungjoongKo - JungyunSeo + JungyunSeo 10.1007/11562214_64 I05-1064 kim-etal-2005-improving @@ -602,14 +602,14 @@ Semantic Role Labelling of Prepositional Phrases PatrickYe - TimothyBaldwin + TimothyBaldwin 10.1007/11562214_68 I05-1068 ye-baldwin-2005-semantic Global Path-Based Refinement of Noisy Graphs Applied to Verb Semantics - TimothyChklovski + TimothyChklovski PatrickPantel 10.1007/11562214_69 I05-1069 @@ -617,8 +617,8 @@ Semantic Role Tagging for <fixed-case>C</fixed-case>hinese at the Lexical Level - Oi YeeKwong - Benjamin K.Tsou + Oi YeeKwong + Benjamin K.Tsou 10.1007/11562214_70 I05-1070 kwong-tsou-2005-semantic @@ -627,7 +627,7 @@ Detecting Article Errors Based on the Mass Count Distinction RyoNagata TakahiroWakana - FumitoMasui + FumitoMasui AtsuoKawai NaokiIsu 10.1007/11562214_71 @@ -636,9 +636,9 @@ Principles of Non-stationary Hidden <fixed-case>M</fixed-case>arkov Model and Its Applications to Sequence Labeling Task - XiaoJingHui - LiuBingQuan - WangXiaoLong + JingHuiXiao + BingQuanLiu + XiaoLongWang 10.1007/11562214_72 I05-1072 xiao-etal-2005-principles @@ -654,7 +654,7 @@ A Connectionist Model of Anticipation in Visual Worlds Marshall R.Mayberry - Matthew W.Crocker + Matthew W.Crocker PiaKnoeferle 10.1007/11562214_74 I05-1074 @@ -662,8 +662,8 @@ Automatically Inducing a Part-of-Speech Tagger by Projecting from Multiple Source Languages Across Aligned Corpora - VictoriaFossum - StevenAbney + VictoriaFossum + StevenAbney 10.1007/11562214_75 I05-1075 fossum-abney-2005-automatically @@ -687,7 +687,7 @@ Regularisation Techniques for Conditional Random Fields: Parameterised Versus Parameter-Free - AndrewSmith + AndrewSmith MilesOsborne 10.1007/11562214_78 I05-1078 @@ -697,7 +697,7 @@ Exploiting Lexical Conceptual Structure for Paraphrase Generation AtsushiFujita KentaroInui - YujiMatsumoto + YujiMatsumoto 10.1007/11562214_79 I05-1079 fujita-etal-2005-exploiting @@ -705,8 +705,8 @@ Word Sense Disambiguation by Relative Selection Hee-CheolSeo - Hae-ChangRim - Myung-GilJang + Hae-ChangRim + Myung-GilJang 10.1007/11562214_80 I05-1080 seo-etal-2005-word @@ -714,7 +714,7 @@ Towards Robust High Performance Word Sense Disambiguation of <fixed-case>E</fixed-case>nglish Verbs Using Rich Linguistic Features JinyingChen - MarthaPalmer + MarthaPalmer 10.1007/11562214_81 I05-1081 chen-palmer-2005-towards @@ -722,7 +722,7 @@ Automatic Interpretation of Noun Compounds Using <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et Similarity Su NamKim - TimothyBaldwin + TimothyBaldwin 10.1007/11562214_82 I05-1082 kim-baldwin-2005-automatic @@ -738,7 +738,7 @@ A Comparative Study of Language Models for Book and Author Recognition - ÖzlemUzuner + ÖzlemUzuner BorisKatz 10.1007/11562214_84 I05-1084 @@ -772,8 +772,8 @@ Extracting Terminologically Relevant Collocations in the Translation of <fixed-case>C</fixed-case>hinese Monograph Byeong-KwuKang - Bao-BaoChang - Yi-RongChen + Bao-BaoChang + Yi-RongChen Shi-WenYu 10.1007/11562214_88 I05-1088 @@ -798,7 +798,7 @@ A Classification-based Algorithm for Consistency Check of Part-of-Speech Tagging for <fixed-case>C</fixed-case>hinese Corpora HuZhang - Jia-hengZheng + Jia-hengZheng YingZhao I05-2001 zhang-etal-2005-classification @@ -806,7 +806,7 @@ A Hierarchical Parsing Approach with Punctuation Processing for Long <fixed-case>C</fixed-case>hinese Sentences XingLi - ChengqingZong + ChengqingZong RileHu I05-2002 li-etal-2005-hierarchical @@ -814,7 +814,7 @@ A Hybrid <fixed-case>C</fixed-case>hinese Language Model based on a Combination of Ontology with Statistical Method DequanZheng - TiejunZhao + TiejunZhao ShengLi HaoYu I05-2003 @@ -822,7 +822,7 @@ A Language Independent Algorithm for Single and Multiple Document Summarization - RadaMihalcea + RadaMihalcea PaulTarau I05-2004 mihalcea-tarau-2005-language @@ -845,7 +845,7 @@ A Resource-based <fixed-case>K</fixed-case>orean Morphological Annotation System Hyun-gueHuh - ÉricLaporte + ÉricLaporte I05-2007 huh-laporte-2005-resource @@ -876,14 +876,14 @@ Automatic Detection of Opinion Bearing Words and Sentences Soo-MinKim - EduardHovy + EduardHovy I05-2011 kim-hovy-2005-automatic Automatic Extraction of <fixed-case>E</fixed-case>nglish-<fixed-case>K</fixed-case>orean Translations for Constituents of Technical Terms - Jong-HoonOh - Key-SunChoi + Jong-HoonOh + Key-SunChoi I05-2012 oh-choi-2005-automatic @@ -927,7 +927,7 @@ Detecting the Countability of <fixed-case>E</fixed-case>nglish Compound Nouns Using Web-based Models JingPeng - KenjiAraki + KenjiAraki I05-2018 peng-araki-2005-detecting @@ -948,7 +948,7 @@ ToshihiroTakagi YasuhiroSasaki TakehitoUtsuro - SatoshiSato + SatoshiSato I05-2020 tonoike-etal-2005-effect @@ -988,15 +988,15 @@ Investigating the Features that Affect Cue Usage of Non-native Speakers of <fixed-case>E</fixed-case>nglish - XinyuDeng - Jun-ichiNakamura + XinyuDeng + Jun-ichiNakamura I05-2025 deng-nakamura-2005-investigating Lexical Chains and Sliding Locality Windows in Content-based Text Similarity Detection ThadeNahnsen - ÖzlemUzuner + ÖzlemUzuner BorisKatz I05-2026 nahnsen-etal-2005-lexical @@ -1026,15 +1026,15 @@ NozomiKobayashi RyuIida KentaroInui - YujiMatsumoto + YujiMatsumoto I05-2030 kobayashi-etal-2005-opinion Problems of Reusing an Existing <fixed-case>MT</fixed-case> System - OndřejBojar + OndřejBojar PetrHomola - VladislavKuboň + VladislavKuboň I05-2031 bojar-etal-2005-problems @@ -1050,22 +1050,22 @@ <fixed-case>POS</fixed-case> Tagger Combinations on <fixed-case>H</fixed-case>ungarian Text AndrásKuba - LászlóFelföldi - AndrásKocsor + LászlóFelföldi + AndrásKocsor I05-2033 kuba-etal-2005-pos Probabilistic Models for <fixed-case>K</fixed-case>orean Morphological Analysis Do-GilLee - Hae-ChangRim + Hae-ChangRim I05-2034 lee-rim-2005-probabilistic Rapid Prototyping of Scalable Grammars: Towards Modularity in Extensions to a Language-Independent Core - Emily M.Bender - DanFlickinger + Emily M.Bender + DanFlickinger I05-2035 bender-flickinger-2005-rapid @@ -1080,16 +1080,16 @@ Restoring an Elided Entry Word in a Sentence for Encyclopedia <fixed-case>QA</fixed-case> System SoojongLim ChangkiLee - Myoung-GilJang + Myoung-GilJang I05-2037 lim-etal-2005-restoring Syntax Annotation for the <fixed-case>GENIA</fixed-case> Corpus - YukaTateisi + YukaTateisi AkaneYakushiji TomokoOhta - Jun’ichiTsujii + Jun’ichiTsujii I05-2038 tateisi-etal-2005-syntax @@ -1102,9 +1102,9 @@ Transformation Based <fixed-case>C</fixed-case>hinese Entity Detection and Tracking YaqianZhou - ChangningHuang + ChangningHuang JianfengGao - LideWu + LideWu I05-2040 zhou-etal-2005-transformation @@ -1114,14 +1114,14 @@ YongjooCho SunghoonSon Ui-SungSong - Hae-ChangRim + Hae-ChangRim I05-2041 park-etal-2005-tree Toward a Unified Evaluation Method for Multiple Reading Support Systems: A Reading Speed-based Procedure KatsunoriKotani - TakehikoYoshimi + TakehikoYoshimi TakeshiKutsumi IchikoSata HitoshiIsahara @@ -1150,29 +1150,29 @@ Unsupervised Feature Selection for Relation Extraction JinxiuChen - DonghongJi - Chew LimTan - ZhengyuNiu + DonghongJi + Chew LimTan + ZhengyuNiu I05-2045 chen-etal-2005-unsupervised Using Maximum Entropy to Extract Biomedical Named Entities without Dictionaries - Tzong-HanTsai + Tzong-HanTsai Chia-WeiWu - Wen-LianHsu + Wen-LianHsu I05-2046 tsai-etal-2005-using Automated Text Summarization - Chin-YewLin + Chin-YewLin I05-2047 lin-2005-automated Statistical Machine Translation Part <fixed-case>I</fixed-case>: Hands-On Introduction - StephanVogel + StephanVogel I05-2048 vogel-2005-statistical @@ -1200,8 +1200,8 @@ Detecting Segmentation Errors in <fixed-case>C</fixed-case>hinese Annotated Corpus - ChengjieSun - Chang-NingHuang + ChengjieSun + Chang-NingHuang XiaolongWang MuLi I05-3001 @@ -1217,7 +1217,7 @@ <fixed-case>C</fixed-case>hinese Deterministic Dependency Analyzer: Examining Effects of Global Features and Root Node Finder YuchangCheng MasayukiAsahara - YujiMatsumoto + YujiMatsumoto I05-3003 cheng-etal-2005-chinese @@ -1230,9 +1230,9 @@ Morphological features help <fixed-case>POS</fixed-case> tagging of unknown words across language varieties - HuihsinTseng - DanielJurafsky - ChristopherManning + HuihsinTseng + DanielJurafsky + ChristopherManning I05-3005 tseng-etal-2005-morphological @@ -1251,17 +1251,17 @@ Chu-RenHuang AdamKilgarriff YichingWu - Chih-MingChiu + Chih-MingChiu SimonSmith - PavelRychly + PavelRychly Ming-HongBai - Keh-JiannChen + Keh-JiannChen I05-3007 huang-etal-2005-chinese Word Meaning Inducing via Character Ontology: A Survey on the Semantic Prediction of <fixed-case>C</fixed-case>hinese Two-Character Words - Shu-KaiHsieh + Shu-KaiHsieh I05-3008 hsieh-2005-word @@ -1273,21 +1273,21 @@ Turn-taking in <fixed-case>M</fixed-case>andarin Dialogue: Interactions of Tone and Intonation - Gina-AnneLevow + Gina-AnneLevow I05-3010 levow-2005-turn Learning a Log-Linear Model with Bilingual Phrase-Pair Features for Statistical Machine Translation BingZhao - AlexWaibel + AlexWaibel I05-3011 zhao-waibel-2005-learning Integrating Collocation Features in <fixed-case>C</fixed-case>hinese Word Sense Disambiguation WanyinLi - QinLu + QinLu WenjieLi I05-3012 li-etal-2005-integrating @@ -1295,7 +1295,7 @@ <fixed-case>NIL</fixed-case> Is Not Nothing: Recognition of <fixed-case>C</fixed-case>hinese Network Informal Language Expressions YunqingXia - Kam-FaiWong + Kam-FaiWong WeiGao I05-3013 xia-etal-2005-nil @@ -1304,7 +1304,7 @@ The Robustness of Domain Lexico-Taxonomy: Expanding Domain Lexicon with <fixed-case>C</fixed-case>i<fixed-case>L</fixed-case>in Chu-RenHuang Xiang-BingLi - Jia-FeiHong + Jia-FeiHong I05-3014 huang-etal-2005-robustness @@ -1317,7 +1317,7 @@ Resolving Pronominal References in <fixed-case>C</fixed-case>hinese with the <fixed-case>H</fixed-case>obbs Algorithm - Susan P.Converse + Susan P.Converse I05-3016 converse-2005-resolving @@ -1332,9 +1332,9 @@ MasayukiAsahara KentaFukuoka AiAzuma - Chooi-LingGoh + Chooi-LingGoh YotaroWatanabe - YujiMatsumoto + YujiMatsumoto TakashiTsuzuki I05-3018 asahara-etal-2005-combination @@ -1356,7 +1356,7 @@ An Example-Based <fixed-case>C</fixed-case>hinese Word Segmentation System for <fixed-case>CWSB</fixed-case>-2 - ChunyuKit + ChunyuKit XiaoyueLiu I05-3021 kit-liu-2005-example @@ -1365,7 +1365,7 @@ <fixed-case>C</fixed-case>hinese Word Segmentation in <fixed-case>FTRD</fixed-case> <fixed-case>B</fixed-case>eijing HengLi YuanDong - XinnianMao + XinnianMao HailaWang WuLiu I05-3022 @@ -1375,8 +1375,8 @@ Perceptron Learning for <fixed-case>C</fixed-case>hinese Word Segmentation YaoyongLi ChuanjiangMiao - KalinaBontcheva - HamishCunningham + KalinaBontcheva + HamishCunningham I05-3023 li-etal-2005-perceptron @@ -1399,17 +1399,17 @@ Description of the <fixed-case>HKU</fixed-case> <fixed-case>C</fixed-case>hinese Word Segmentation System for Sighan Bakeoff 2005 GuohongFu Kang-KwongLuke - Percy Ping-WaiWong + Percy Ping-WaiWong I05-3026 fu-etal-2005-description A Conditional Random Field Word Segmenter for Sighan Bakeoff 2005 - HuihsinTseng - PichuanChang + HuihsinTseng + PichuanChang GalenAndrew - DanielJurafsky - ChristopherManning + DanielJurafsky + ChristopherManning I05-3027 tseng-etal-2005-conditional @@ -1435,7 +1435,7 @@ WeiJiang JianZhao YiGuan - ZhimingXu + ZhimingXu I05-3030 jiang-etal-2005-chinese @@ -1448,7 +1448,7 @@ <fixed-case>C</fixed-case>hinese Word Segmentation in <fixed-case>ICT</fixed-case>-<fixed-case>NLP</fixed-case> - ShuangLongLi + ShuangLongLi I05-3032 li-2005-chinese @@ -1467,9 +1467,9 @@ A Hybrid Approach to <fixed-case>C</fixed-case>hinese Word Segmentation around <fixed-case>CRF</fixed-case>s ZhouJun-sheng - DaiXin-yu + Xin-yuDai NiRui-yu - ChenJia-jun + Jia-junChen I05-3035 zhou-etal-2005-hybrid @@ -1511,7 +1511,7 @@ Corpus-oriented Acquisition of <fixed-case>C</fixed-case>hinese Grammar YanZhang - HidekiKashioka + HidekiKashioka I05-4003 zhang-kashioka-2005-corpus @@ -1542,7 +1542,7 @@ Cross-lingual Conversion of Lexical Semantic Relations: Building Parallel Wordnets Chu-RenHuang I-LiSu - Jia-FeiHong + Jia-FeiHong Xiang-BingLi I05-4007 huang-etal-2005-cross @@ -1556,17 +1556,17 @@ Question Classification using Multiple Classifiers LiXin - HuangXuan-Jing - WuLi-de + Xuan-JingHuang + Li-deWu I05-4009 li-etal-2005-question Harvesting the Bitexts of the Laws of <fixed-case>H</fixed-case>ong <fixed-case>K</fixed-case>ong From the Web - ChunyuKit + ChunyuKit XiaoyueLiu - KingKuiSin - Jonathan J.Webster + KingKuiSin + Jonathan J.Webster I05-4010 kit-etal-2005-harvesting @@ -1589,13 +1589,13 @@ Support Vector Machines for Paraphrase Identification and Corpus Construction ChrisBrockett - William B.Dolan + William B.Dolan I05-5001 brockett-dolan-2005-support Automatically Constructing a Corpus of Sentential Paraphrases - William B.Dolan + William B.Dolan ChrisBrockett I05-5002 dolan-brockett-2005-automatically @@ -1604,7 +1604,7 @@ Using Machine Translation Evaluation Techniques to Determine Sentence-level Semantic Equivalence AndrewFinch Young-SookHwang - EiichiroSumita + EiichiroSumita I05-5003 finch-etal-2005-using @@ -1618,7 +1618,7 @@ Structural variation in generated health reports CatalinaHallett - DoniaScott + DoniaScott I05-5005 hallett-scott-2005-structural @@ -1631,7 +1631,7 @@ Automated Generalization of Phrasal Paraphrases from the Web - WeigangLi + WeigangLi TingLiu YuZhang ShengLi @@ -1655,7 +1655,7 @@ Automatic generation of large-scale paraphrases RichardPower - DoniaScott + DoniaScott I05-5010 power-scott-2005-automatic @@ -1670,7 +1670,7 @@ StephenWan MarkDras RobertDale - CécileParis + CécileParis I05-5012 wan-etal-2005-towards @@ -1723,7 +1723,7 @@ Building Dialogue Corpora for Nursing Activity Analysis - Hiromi itohOzaku + Hiromi itohOzaku AkinoriAbe NoriakiKuwahara FutoshiNaya @@ -1734,20 +1734,20 @@ The Syntactically Annotated <fixed-case>ICE</fixed-case> Corpus and the Automatic Induction of a Formal Grammar - Alex ChengyuFang + Alex ChengyuFang I05-6006 fang-2005-syntactically Syntactic Identification of Attribution in the <fixed-case>RST</fixed-case> Treebank - Peter RossenSkadhauge + Peter RossenSkadhauge DanielHardt I05-6007 skadhauge-hardt-2005-syntactic Linguistically enriched corpora for establishing variation in support verb constructions - Begoña VilladaMoirón + Begoña VilladaMoirón I05-6008 moiron-2005-linguistically @@ -1791,7 +1791,7 @@ Extended-<fixed-case>H</fixed-case>ow<fixed-case>N</fixed-case>et: A Representational Framework for Concepts - Keh-JiannChen + Keh-JiannChen Shu-LingHuang Yueh-YinShih Yi-JunChen @@ -1809,7 +1809,7 @@ From General Ontology to Specialized Ontology: A study based on a single author historical corpus Ru-YngChang Chu-RenHuang - Feng-JuLo + Feng-JuLo SuemingChang I05-7003 chang-etal-2005-general @@ -1837,7 +1837,7 @@ Increasing Understanding: Interpreting Events of Change - SergeiNirenburg + SergeiNirenburg MarjorieMcShane StephenBeale I05-7007 @@ -1857,7 +1857,7 @@ The Omega Ontology AndrewPhilpot - EduardHovy + EduardHovy PatrickPantel I05-7009 philpot-etal-2005-omega @@ -1865,14 +1865,14 @@ Experiments of Ontology Construction with Formal Concept Analysis SujianLi - QinLu + QinLu WenjieLi I05-7010 li-etal-2005-experiments Automatic Acquisition of Lexico-semantic Knowledge for <fixed-case>QA</fixed-case> - Lonnekevan der Plas + Lonnekevan der Plas GosseBouma I05-7011 van-der-plas-bouma-2005-automatic @@ -1880,13 +1880,13 @@ A Case Study in Automatic Building of Wordnets EduardBarbu - Verginica BarbuMititelu + Verginica BarbuMititelu I05-7012 barbu-mititelu-2005-case Interfacing Ontologies and Lexical Resources - LaurentPrevot + LaurentPrevot StefanoBorgo AlessandroOltramari I05-7013 diff --git a/data/xml/I08.xml b/data/xml/I08.xml index 51a493ecc2..34e2fda5f4 100644 --- a/data/xml/I08.xml +++ b/data/xml/I08.xml @@ -21,7 +21,7 @@ An Empirical Comparison of Goodness Measures for Unsupervised <fixed-case>C</fixed-case>hinese Word Segmentation with a Unified Framework HaiZhao - ChunyuKit + ChunyuKit I08-1002 zhao-kit-2008-empirical @@ -34,18 +34,18 @@ Context-Sensitive Convolution Tree Kernel for Pronoun Resolution - GuoDongZhou + GuoDongZhou FangKong - QiaoMingZhu + QiaoMingZhu I08-1004 zhou-etal-2008-context Semi-Supervised Learning for Relation Extraction - GuoDongZhou - JunHuiLi - LongHuaQian - QiaoMingZhu + GuoDongZhou + JunHuiLi + LongHuaQian + QiaoMingZhu I08-1005 zhou-etal-2008-semi @@ -69,10 +69,10 @@ Name Origin Recognition Using Maximum Entropy Model and Diverse Features MinZhang - ChengjieSun + ChengjieSun HaizhouLi - AiTiAw - Chew LimTan + AiTiAw + Chew LimTan XiaolongWang I08-1008 zhang-etal-2008-name @@ -80,7 +80,7 @@ A More Discerning and Adaptable Multilingual Transliteration Mechanism for <fixed-case>I</fixed-case>ndian Languages HarshitSurana - Anil KumarSingh + Anil KumarSingh I08-1009 surana-singh-2008-discerning @@ -110,7 +110,7 @@ An Effective Compositional Model for Lexical Alignment - BéatriceDaille + BéatriceDaille EmmanuelMorin I08-1013 daille-morin-2008-effective @@ -118,7 +118,7 @@ Determining the Unithood of Word Sequences Using a Probabilistic Approach WilsonWong - WeiLiu + WeiLiu MohammedBennamoun I08-1014 wong-etal-2008-determining @@ -127,7 +127,7 @@ Lexical Chains as Document Features DinakarJayarajan DiptiDeodhare - BalaramanRavindran + BalaramanRavindran I08-1015 jayarajan-etal-2008-lexical @@ -140,7 +140,7 @@ A New Approach to Automatic Document Summarization XiaofengWu - ChengqingZong + ChengqingZong I08-1017 wu-zong-2008-new @@ -149,7 +149,7 @@ HarendraBhandari MasashiShimbo TakahikoIto - YujiMatsumoto + YujiMatsumoto I08-1018 bhandari-etal-2008-generic @@ -157,7 +157,7 @@ Identifying Cross-Document Relations between Sentences YasunariMiyabe HiroyaTakamura - ManabuOkumura + ManabuOkumura I08-1019 miyabe-etal-2008-identifying @@ -171,7 +171,7 @@ Modeling Context in Scenario Template Creation LongQiu Min-YenKan - Tat-SengChua + Tat-SengChua I08-1021 qiu-etal-2008-modeling @@ -179,7 +179,7 @@ Cross Language Text Categorization Using a Bilingual Lexicon KeWu XiaolinWang - Bao-LiangLu + Bao-LiangLu I08-1022 wu-etal-2008-cross @@ -196,7 +196,7 @@ A Comparative Study for Query Translation using Linear Combination and Confidence Measure YoussefKadri - Jian-YunNie + Jian-YunNie I08-1024 kadri-nie-2008-comparative @@ -221,7 +221,7 @@ Automatic Estimation of Word Significance oriented for Speech-based Information Retrieval TakashiShichiri HiroakiNanjo - TakehikoYoshimi + TakehikoYoshimi I08-1027 shichiri-etal-2008-automatic @@ -233,26 +233,26 @@ KotaroFunakoshi HiroshiTsujino TetsuyaOgata - Hiroshi G.Okuno + Hiroshi G.Okuno I08-1028 fukubayashi-etal-2008-rapid Automatic Prosodic Labeling with Conditional Random Fields and Rich Acoustic Features - Gina-AnneLevow + Gina-AnneLevow I08-1029 levow-2008-automatic <fixed-case>C</fixed-case>hinese Unknown Word Translation by Subword Re-segmentation RuiqiangZhang - EiichiroSumita + EiichiroSumita I08-1030 zhang-sumita-2008-chinese Hypothesis Selection in Machine Transliteration: A Web Mining Approach - Jong-HoonOh + Jong-HoonOh HitoshiIsahara I08-1031 oh-isahara-2008-hypothesis @@ -267,16 +267,16 @@ Improving Word Alignment by Adjusting <fixed-case>C</fixed-case>hinese Word Segmentation Ming-HongBai - Keh-JiannChen - Jason S.Chang + Keh-JiannChen + Jason S.Chang I08-1033 bai-etal-2008-improving The Telling Tail: Signals of Success in Electronic Negotiation Texts MarinaSokolova - ViviNastase - StanSzpakowicz + ViviNastase + StanSzpakowicz I08-1034 sokolova-etal-2008-telling @@ -284,7 +284,7 @@ Automatic Extraction of Briefing Templates DipanjanDas MohitKumar - Alexander I.Rudnicky + Alexander I.Rudnicky I08-1035 das-etal-2008-automatic @@ -298,8 +298,8 @@ Learning Patterns from the Web to Translate Named Entities for Cross Language Information Retrieval Yu-ChunWang - Richard Tzong-HanTsai - Wen-LianHsu + Richard Tzong-HanTsai + Wen-LianHsu I08-1037 wang-etal-2008-learning @@ -314,37 +314,37 @@ Learning to Shift the Polarity of Words for Sentiment Classification DaisukeIkeda HiroyaTakamura - Lev-ArieRatinov - ManabuOkumura + Lev-ArieRatinov + ManabuOkumura I08-1039 ikeda-etal-2008-learning Unsupervised Classification of Sentiment and Objectivity in <fixed-case>C</fixed-case>hinese Text TarasZagibalov - JohnCarroll + JohnCarroll I08-1040 zagibalov-carroll-2008-unsupervised Using <fixed-case>R</fixed-case>oget’s Thesaurus for Fine-grained Emotion Recognition SaimaAman - StanSzpakowicz + StanSzpakowicz I08-1041 aman-szpakowicz-2008-using Heterogeneous Automatic <fixed-case>MT</fixed-case> Evaluation Through Non-Parametric Metric Combinations - JesúsGiménez - LluísMàrquez + JesúsGiménez + LluísMàrquez I08-1042 gimenez-marquez-2008-heterogeneous Paraphrasing Depending on Bilingual Context Toward Generalization of Translation Knowledge Young-SookHwang - YoungKilKim - SangkyuPark + YoungKilKim + SangkyuPark I08-1043 hwang-etal-2008-paraphrasing @@ -382,7 +382,7 @@ Learning a Stopping Criterion for Active Learning for Word Sense Disambiguation and Text Classification JingboZhu HuizhenWang - EduardHovy + EduardHovy I08-1048 zhu-etal-2008-learning @@ -396,7 +396,7 @@ Identifying Sections in Scientific Abstracts using Conditional Random Fields KenjiHirohata - NaoakiOkazaki + NaoakiOkazaki SophiaAnaniadou MitsuruIshizuka I08-1050 @@ -405,7 +405,7 @@ Formalising Multi-layer Corpora in <fixed-case>OWL</fixed-case> <fixed-case>DL</fixed-case> - Lexicon Modelling, Querying and Consistency Control AljoschaBurchardt - SebastianPadó + SebastianPadó DennisSpohr AnetteFrank UlrichHeid @@ -417,7 +417,7 @@ KiyoakiShirai TakenobuTokunaga Chu-RenHuang - Shu-KaiHsieh + Shu-KaiHsieh Tzu-YiKuo VirachSornlertlamvanich ThatsaneeCharoenporn @@ -433,7 +433,7 @@ Answering Definition Questions via Temporally-Anchored Text Snippets - MariusPaşca + MariusPaşca I08-1054 pasca-2008-answering @@ -446,8 +446,8 @@ Cluster-Based Query Expansion for Statistical Question Answering - Lucian VladLita - JaimeCarbonell + Lucian VladLita + JaimeCarbonell I08-1056 lita-carbonell-2008-cluster @@ -460,7 +460,7 @@ Multilingual Text Entry using Automatic Language Detection YoEhara - KumikoTanaka-Ishii + KumikoTanaka-Ishii I08-1058 ehara-tanaka-ishii-2008-multilingual @@ -470,7 +470,7 @@ JianfengGao ChrisBrockett AlexandreKlementiev - William B.Dolan + William B.Dolan DmitriyBelenko LucyVanderwende I08-1059 @@ -479,7 +479,7 @@ Bilingual Synonym Identification with Spelling Variations TakashiTsunakawa - Jun’ichiTsujii + Jun’ichiTsujii I08-1060 tsunakawa-tsujii-2008-bilingual @@ -494,7 +494,7 @@ <fixed-case>J</fixed-case>apanese-<fixed-case>S</fixed-case>panish Thesaurus Construction Using <fixed-case>E</fixed-case>nglish as a Pivot JessicaRamírez MasayukiAsahara - YujiMatsumoto + YujiMatsumoto I08-1062 ramirez-etal-2008-japanese @@ -517,15 +517,15 @@ Acquiring Event Relation Knowledge by Learning Cooccurrence Patterns and Fertilizing Cooccurrence Samples with Verbal Nouns ShuyaAbe KentaroInui - YujiMatsumoto + YujiMatsumoto I08-1065 abe-etal-2008-acquiring Refinements in <fixed-case>BTG</fixed-case>-based Statistical Machine Translation - DeyiXiong + DeyiXiong MinZhang - AiTiAw + AiTiAw HaitaoMi QunLiu ShouxunLin @@ -536,15 +536,15 @@ Simple Syntactic and Morphological Processing Can Help <fixed-case>E</fixed-case>nglish-<fixed-case>H</fixed-case>indi Statistical Machine Translation AnanthakrishnanRamanathan JayprasadHegde - Ritesh M.Shah - PushpakBhattacharyya - SasikumarM. + Ritesh M.Shah + PushpakBhattacharyya + SasikumarM. I08-1067 ramanathan-etal-2008-simple Statistical Machine Translation Models for Personalized Search - RohiniU + RohiniU VamshiAmbati VasudevaVarma I08-1068 @@ -553,21 +553,21 @@ Repurposing Theoretical Linguistic Data for Tool Development and Search FeiXia - William D.Lewis + William D.Lewis I08-1069 xia-lewis-2008-repurposing Computing Paraphrasability of Syntactic Variants Using Web Snippets AtsushiFujita - SatoshiSato + SatoshiSato I08-1070 fujita-sato-2008-computing Augmenting <fixed-case>W</fixed-case>ikipedia with Named Entity Tags WisamDakka - SilviuCucerzan + SilviuCucerzan I08-1071 dakka-cucerzan-2008-augmenting @@ -582,7 +582,7 @@ Gloss-Based Semantic Similarity Metrics for Predominant Sense Acquisition RyuIida - DianaMcCarthy + DianaMcCarthy RobKoeling I08-1073 iida-etal-2008-gloss @@ -590,7 +590,7 @@ Benchmarking Noun Compound Interpretation Su NamKim - TimothyBaldwin + TimothyBaldwin I08-1074 kim-baldwin-2008-benchmarking @@ -623,7 +623,7 @@ Named Entity Recognition in <fixed-case>B</fixed-case>engali: A Conditional Random Field Approach AsifEkbal RejwanulHaque - SivajiBandyopadhyay + SivajiBandyopadhyay I08-2077 ekbal-etal-2008-named @@ -650,7 +650,7 @@ Dimensionality Reduction with Multilingual Resource - YingJuXia + YingJuXia HaoYu GangZou I08-2081 @@ -660,7 +660,7 @@ A Web-based <fixed-case>E</fixed-case>nglish Proofing System for <fixed-case>E</fixed-case>nglish as a Second Language Users XingYi JianfengGao - William B.Dolan + William B.Dolan I08-2082 yi-etal-2008-web @@ -674,8 +674,8 @@ Term Extraction Through Unithood and Termhood Unification - ThuyVu - Ai TiAw + ThuyVu + Ai TiAw MinZhang I08-2084 vu-etal-2008-term @@ -707,8 +707,8 @@ Method of Selecting Training Data to Build a Compact and Efficient Translation Model KeijiYasuda RuiqiangZhang - HirofumiYamamoto - EiichiroSumita + HirofumiYamamoto + EiichiroSumita I08-2088 yasuda-etal-2008-method @@ -732,7 +732,7 @@ ThatsaneeCharoenporn ChumpolMokarat HitoshiIsahara - HammamRiza + HammamRiza PurevJaimai I08-2091 sornlertlamvanich-etal-2008-synset @@ -746,7 +746,7 @@ Automatically Identifying Computationally Relevant Typological Features - William D.Lewis + William D.Lewis FeiXia I08-2093 lewis-xia-2008-automatically @@ -754,14 +754,14 @@ Automatic Paraphrasing of <fixed-case>J</fixed-case>apanese Functional Expressions Using a Hierarchically Organized Dictionary SuguruMatsuyoshi - SatoshiSato + SatoshiSato I08-2094 matsuyoshi-sato-2008-automatic Generation of Referring Expression Using Prefix Tree Structure SibabrataPaladhi - SivajiBandyopadhyay + SivajiBandyopadhyay I08-2095 paladhi-bandyopadhyay-2008-generation @@ -783,7 +783,7 @@ Resolving Ambiguities of <fixed-case>C</fixed-case>hinese Conjunctive Structures by Divide-and-conquer Approaches Duen-ChiYang Yu-MingHsieh - Keh-JiannChen + Keh-JiannChen I08-2098 yang-etal-2008-resolving @@ -792,7 +792,7 @@ RafiyaBegum SamarHusain ArunDhwaj - Dipti MisraSharma + Dipti MisraSharma LakshmiBai RajeevSangal I08-2099 @@ -811,7 +811,7 @@ A Multi-Document Multi-Lingual Automatic Summarization System Mohamad AliHonarpisheh - GholamrezaGhassem-Sani + GholamrezaGhassem-Sani GhassemMirroshandel I08-2101 honarpisheh-etal-2008-multi @@ -825,7 +825,7 @@ Sentence Ordering based on Cluster Adjacency in Multi-Document Summarization - DonghongJi + DonghongJi YuNie I08-2103 ji-nie-2008-sentence @@ -840,13 +840,13 @@ Unsupervised All-words Word Sense Disambiguation with Grammatical Dependencies - ViviNastase + ViviNastase I08-2105 nastase-2008-unsupervised Syntactic and Semantic Frames in <fixed-case>P</fixed-case>rep<fixed-case>N</fixed-case>et - PatrickSaint-Dizier + PatrickSaint-Dizier I08-2106 saint-dizier-2008-syntactic @@ -860,11 +860,11 @@ <fixed-case>MRD</fixed-case>-based Word Sense Disambiguation: Further Extending <fixed-case>L</fixed-case>esk - TimothyBaldwin + TimothyBaldwin Su NamKim FrancisBond SanaeFujita - DavidMartinez + DavidMartinez TakaakiTanaka I08-2108 baldwin-etal-2008-mrd @@ -873,8 +873,8 @@ Fast Computing Grammar-driven Convolution Tree Kernel for Semantic Role Labeling WanxiangChe MinZhang - Ai TiAw - Chew LimTan + Ai TiAw + Chew LimTan TingLiu ShengLi I08-2109 @@ -885,7 +885,7 @@ TomohideShibata MichitakaOdani JunHarashima - TakashiOonishi + TakashiOonishi SadaoKurohashi I08-2110 shibata-etal-2008-syngraph @@ -946,7 +946,7 @@ Learning Decision Lists with Known Rules for Text Mining VenkatesanChakravarthy - SachindraJoshi + SachindraJoshi GaneshRamakrishnan ShantanuGodbole SreeramBalakrishnan @@ -961,7 +961,7 @@ Mining <fixed-case>C</fixed-case>hinese-<fixed-case>E</fixed-case>nglish Parallel Corpora from the Web - BoLi + BoLi JuanLiu li-liu-2008-mining @@ -976,13 +976,13 @@ Towards Data and Goal Oriented Analysis: Tool Inter-operability and Combinatorial Comparison YoshinobuKano NganNguyen - RuneSætre + RuneSætre KazuhiroYoshida KeiichiroFukamachi YusukeMiyao YoshimasaTsuruoka SophiaAnaniadou - Jun’ichiTsujii + Jun’ichiTsujii I08-2122 kano-etal-2008-towards @@ -999,13 +999,13 @@ DonghuiFeng GullyBurns JingboZhu - EduardHovy + EduardHovy I08-2124 feng-etal-2008-towards Large Scale Diagnostic Code Classification for Medical Patient Records - Lucian VladLita + Lucian VladLita ShipengYu StefanNiculescu JinboBi @@ -1021,9 +1021,9 @@ A Discriminative Approach to <fixed-case>J</fixed-case>apanese Abbreviation Extraction - NaoakiOkazaki + NaoakiOkazaki MitsuruIshizuka - Jun’ichiTsujii + Jun’ichiTsujii I08-2127 okazaki-etal-2008-discriminative-approach @@ -1037,7 +1037,7 @@ How to Take Advantage of the Limitations with <fixed-case>M</fixed-case>arkov Clustering?–The Foundations of Branching <fixed-case>M</fixed-case>arkov Clustering (<fixed-case>BMCL</fixed-case>) - HiroyukiAkama + HiroyukiAkama MakiMiyake JaeyoungJung I08-2129 @@ -1052,9 +1052,9 @@ Language Independent Text Correction using Finite State Automata - AhmedHassan + AhmedHassan SaraNoeman - HanyHassan + HanyHassan I08-2131 hassan-etal-2008-language @@ -1071,13 +1071,13 @@ A Comparative Study of Mixture Models for Automatic Topic Segmentation of Multiparty Dialogues MariaGeorgescul AlexanderClark - SusanArmstrong + SusanArmstrong I08-2133 georgescul-etal-2008-comparative
Exploiting Unlabeled Text to Extract New Words of Different Semantic Transparency for <fixed-case>C</fixed-case>hinese Word Segmentation - Richard Tzong-HanTsai + Richard Tzong-HanTsai Hsi-ChuanHung I08-2134 tsai-hung-2008-exploiting @@ -1092,8 +1092,8 @@ How to Add a New Language on the <fixed-case>NLP</fixed-case> Map: Building Resources and Tools for Languages with Scarce Resources - RadaMihalcea - ViviNastase + RadaMihalcea + ViviNastase I08-2136 mihalcea-nastase-2008-add @@ -1106,7 +1106,7 @@ A <fixed-case>P</fixed-case>unjabi Grammar Checker Mandeep SinghGill - Gurpreet SinghLehal + Gurpreet SinghLehal Shiv SharmaJoshi I08-2138 gill-etal-2008-punjabi @@ -1128,7 +1128,7 @@ A Mechanism to Provide Language-Encoding Support and an <fixed-case>NLP</fixed-case> Friendly Editor - Anil KumarSingh + Anil KumarSingh I08-2141 singh-2008-mechanism @@ -1144,8 +1144,8 @@ <fixed-case>POLL</fixed-case>y: A Conversational System that uses a Shared Representation to Generate Action and Social Language SwatiGupta - Marilyn A.Walker - Daniela M.Romano + Marilyn A.Walker + Daniela M.Romano I08-2143 gupta-etal-2008-polly @@ -1192,7 +1192,7 @@
Natural Language Processing for Less Privileged Languages: Where do we come from? Where are we going? - Anil KumarSingh + Anil KumarSingh I08-3004 singh-2008-natural @@ -1208,7 +1208,7 @@ Prototype Machine Translation System From Text-To-<fixed-case>I</fixed-case>ndian <fixed-case>S</fixed-case>ign <fixed-case>L</fixed-case>anguage TirthankarDasgupta - SandipanDandpat + SandipanDandpat AnupamBasu I08-3006 dasgupta-etal-2008-prototype @@ -1222,7 +1222,7 @@ Cross-Language Parser Adaptation between Related Languages - DanielZeman + DanielZeman PhilipResnik I08-3008 zeman-resnik-2008-cross @@ -1230,7 +1230,7 @@ <fixed-case>S</fixed-case>ri<fixed-case>S</fixed-case>hell Primo: A Predictive <fixed-case>S</fixed-case>inhala Text Input System SandevaGoonetilleke - YoshihikoHayashi + YoshihikoHayashi YuichiItoh FumioKishino I08-3009 @@ -1259,7 +1259,7 @@ Design of a Rule-based Stemmer for Natural Language Text in <fixed-case>B</fixed-case>engali SandipanSarkar - SivajiBandyopadhyay + SivajiBandyopadhyay I08-3012 sarkar-bandyopadhyay-2008-design @@ -1280,8 +1280,8 @@ Morphology Driven <fixed-case>M</fixed-case>anipuri <fixed-case>POS</fixed-case> Tagger - Thoudam DorenSingh - SivajiBandyopadhyay + Thoudam DorenSingh + SivajiBandyopadhyay I08-3015 singh-bandyopadhyay-2008-morphology @@ -1301,7 +1301,7 @@ Indigenous Languages of <fixed-case>I</fixed-case>ndonesia: Creating Language Resources for Language Preservation - HammamRiza + HammamRiza I08-3018 riza-2008-indigenous @@ -1314,7 +1314,7 @@
Speech to speech machine translation: Biblical chatter from <fixed-case>F</fixed-case>innish to <fixed-case>E</fixed-case>nglish - DavidEllis + DavidEllis MathiasCreutz TimoHonkela MikkoKurimo @@ -1336,7 +1336,7 @@ An Example-based Decoder for Spoken Language Machine Translation Zhou-JunLi - Wen-HanChao + Wen-HanChao Yue-XinChen I08-4001 li-etal-2008-example @@ -1345,7 +1345,7 @@ Automatic Extraction of <fixed-case>E</fixed-case>nglish-<fixed-case>C</fixed-case>hinese Transliteration Pairs using Dynamic Window and Tokenizer ChengguoJin Seung-HoonNa - Dong-IlKim + Dong-IlKim Jong-HyeokLee I08-4002 jin-etal-2008-automatic @@ -1354,7 +1354,7 @@ Mining Transliterations from Web Query Results: An Incremental Approach Jin-SheaKuo HaizhouLi - Chih-LungLin + Chih-LungLin I08-4003 kuo-etal-2008-mining @@ -1369,7 +1369,7 @@ Use of Event Types for Temporal Relation Identification in <fixed-case>C</fixed-case>hinese Text YuchangCheng MasayukiAsahara - YujiMatsumoto + YujiMatsumoto I08-4005 cheng-etal-2008-use @@ -1383,22 +1383,22 @@
Stochastic Dependency Parsing Based on <fixed-case>A</fixed-case>* Admissible Search - Bor-shenLin + Bor-shenLin I08-4007 lin-2008-stochastic Analyzing <fixed-case>C</fixed-case>hinese Synthetic Words with Tree-based Information and a Survey on <fixed-case>C</fixed-case>hinese Morphologically Derived Words - JiaLu + JiaLu MasayukiAsahara - YujiMatsumoto + YujiMatsumoto I08-4008 lu-etal-2008-analyzing Which Performs Better on In-Vocabulary Word Segmentation: Based on Word or Character? ZhenxingWang - ChangningHuang + ChangningHuang JingboZhu I08-4009 wang-etal-2008-performs @@ -1425,13 +1425,13 @@ GuohuaZhang YuezhongTang ZhanjiangSong - XiaWang + XiaWang I08-4012 li-etal-2008-nokia <fixed-case>C</fixed-case>hinese Word Segmentation and Named Entity Recognition Based on Conditional Random Fields - XinnianMao + XinnianMao YuanDong SaikeHe SenchengBao @@ -1451,7 +1451,7 @@ The Character-based <fixed-case>CRF</fixed-case> Segmenter of <fixed-case>MSRA</fixed-case>&<fixed-case>NEU</fixed-case> for the 4th Bakeoff ZhenxingWang - ChangningHuang + ChangningHuang JingboZhu I08-4015 wang-etal-2008-character @@ -1469,14 +1469,14 @@ Unsupervised Segmentation Helps Supervised Learning of Character Tagging for Word Segmentation and Named Entity Recognition HaiZhao - ChunyuKit + ChunyuKit I08-4017 zhao-kit-2008-unsupervised An Agent-Based Approach to <fixed-case>C</fixed-case>hinese Word Segmentation - Samuel W.K.Chan - Mickey W.C.Chong + Samuel W.K.Chan + Mickey W.C.Chong I08-4018 chan-chong-2008-agent @@ -1501,7 +1501,7 @@ A Morpheme-based Part-of-Speech Tagger for <fixed-case>C</fixed-case>hinese GuohongFu - Jonathan J.Webster + Jonathan J.Webster I08-4021 fu-webster-2008-morpheme @@ -1514,7 +1514,7 @@ <fixed-case>HMM</fixed-case> and <fixed-case>CRF</fixed-case> Based Hybrid Model for <fixed-case>C</fixed-case>hinese Lexical Analysis - DegenHuang + DegenHuang XiaoSun ShidouJiao LishuangLi @@ -1528,7 +1528,7 @@ Ka SengLeong FaiWong YipingLi - Ming ChuiDong + Ming ChuiDong I08-4024 leong-etal-2008-chinese @@ -1542,9 +1542,9 @@ A Study of <fixed-case>C</fixed-case>hinese Lexical Analysis Based on Discriminative Models Guang-LuSun - Cheng-JieSun + Cheng-JieSun KeSun - Xiao-LongWang + Xiao-LongWang I08-4026 sun-etal-2008-study @@ -1557,7 +1557,7 @@ An Improved <fixed-case>CRF</fixed-case> based <fixed-case>C</fixed-case>hinese Language Processing System for <fixed-case>SIGHAN</fixed-case> Bakeoff 2007 XihongWu - XiaojunLin + XiaojunLin XinhaoWang ChunyaoWu YaozhongZhang @@ -1593,7 +1593,7 @@ A <fixed-case>C</fixed-case>hinese Word Segmentation System Based on Cascade Model JianfengZhang - JiahengZheng + JiahengZheng HuZhang HongyeTan I08-4032 @@ -1602,7 +1602,7 @@ <fixed-case>A</fixed-case>chilles: <fixed-case>N</fixed-case>i<fixed-case>CT</fixed-case>/<fixed-case>ATR</fixed-case> <fixed-case>C</fixed-case>hinese Morphological Analyzer for the Fourth Sighan Bakeoff RuiqiangZhang - EiichiroSumita + EiichiroSumita I08-4033 zhang-sumita-2008-achilles @@ -1625,19 +1625,19 @@ Invited Talk: Named Entity Recognition: Different Approaches - SobhaL + SobhaL I08-5001 l-2008-invited Invited Talk: Multilingual Named Entity Recognition - SivajiBandyopadhyay + SivajiBandyopadhyay I08-5002 bandyopadhyay-2008-invited Named Entity Recognition for South and South <fixed-case>E</fixed-case>ast <fixed-case>A</fixed-case>sian Languages: Taking Stock - Anil KumarSingh + Anil KumarSingh I08-5003 singh-2008-named @@ -1645,7 +1645,7 @@ A Hybrid Named Entity Recognition System for South and South <fixed-case>E</fixed-case>ast <fixed-case>A</fixed-case>sian Languages Sujan KumarSaha SanjayChatterji - SandipanDandapat + SandipanDandapat SudeshnaSarkar PabitraMitra I08-5004 @@ -1656,8 +1656,8 @@ KarthikGali HarshitSurana AshwiniVaidya - PraneethShishtla - Dipti MisraSharma + PraneethShishtla + Dipti MisraSharma I08-5005 gali-etal-2008-aggregating @@ -1667,7 +1667,7 @@ RejwanulHaque AmitavaDas VenkateswarluPoka - SivajiBandyopadhyay + SivajiBandyopadhyay I08-5006 ekbal-etal-2008-language @@ -1681,20 +1681,20 @@ <fixed-case>B</fixed-case>engali Named Entity Recognition Using Support Vector Machine AsifEkbal - SivajiBandyopadhyay + SivajiBandyopadhyay I08-5008 ekbal-bandyopadhyay-2008-bengali Domain Focused Named Entity Recognizer for <fixed-case>T</fixed-case>amil Using Conditional Random Fields VijayakrishnaR - SobhaL + SobhaL I08-5009 r-l-2008-domain A Character n-gram Based Approach for Improved Recall in <fixed-case>I</fixed-case>ndian Language <fixed-case>NER</fixed-case> - Praneeth MShishtla + Praneeth MShishtla PrasadPingali VasudevaVarma I08-5010 @@ -1702,14 +1702,14 @@ An Experiment on Automatic Detection of Named Entities in <fixed-case>B</fixed-case>angla - Bidyut BaranChaudhuri + Bidyut BaranChaudhuri SuvankarBhattacharya I08-5011 chaudhuri-bhattacharya-2008-experiment Hybrid Named Entity Recognition System for South and South <fixed-case>E</fixed-case>ast <fixed-case>A</fixed-case>sian Languages - PraveenP + PraveenP Ravi KiranV I08-5012 p-v-2008-hybrid @@ -1732,7 +1732,7 @@ Experiments in <fixed-case>T</fixed-case>elugu <fixed-case>NER</fixed-case>: A Conditional Random Field Approach - Praneeth MShishtla + Praneeth MShishtla KarthikGali PrasadPingali VasudevaVarma @@ -1753,7 +1753,7 @@ The Effects of Language Relatedness on Multilingual Information Retrieval: A Case Study With <fixed-case>I</fixed-case>ndo-<fixed-case>E</fixed-case>uropean and <fixed-case>S</fixed-case>emitic Languages - PeterChew + PeterChew AhmedAbdelali I08-6001 chew-abdelali-2008-effects @@ -1761,7 +1761,7 @@ Identifying Similar and Co-referring Documents Across Languages PattabhiR K Rao T - SobhaL + SobhaL I08-6002 r-k-rao-t-l-2008-identifying @@ -1774,8 +1774,8 @@ Some Experiments in Mining Named Entity Transliteration Pairs from Comparable Corpora - KSaravanan - AKumaran + KSaravanan + AKumaran I08-6004 saravanan-kumaran-2008-experiments @@ -1784,14 +1784,14 @@ GarethJones FabioFantino EamonnNewman - YingZhang + YingZhang I08-6005 jones-etal-2008-domain Statistical Transliteration for Cross Language Information Retrieval using <fixed-case>HMM</fixed-case> alignment model and <fixed-case>CRF</fixed-case> PrasadPingali - SuryaGanesh + SuryaGanesh SreeharshaYella VasudevaVarma I08-6006 @@ -1808,23 +1808,23 @@ A Document Graph Based Query Focused Multi-Document Summarizer SibabrataPaladhi - SivajiBandyopadhyay + SivajiBandyopadhyay I08-6008 paladhi-bandyopadhyay-2008-document <fixed-case>H</fixed-case>indi and <fixed-case>M</fixed-case>arathi to <fixed-case>E</fixed-case>nglish Cross Language Information Retrieval - Manoj KumarChinnakotla + Manoj KumarChinnakotla SagarRanadive - Om P.Damani - PushpakBhattacharyya + Om P.Damani + PushpakBhattacharyya I08-6009 chinnakotla-etal-2008-hindi <fixed-case>B</fixed-case>engali and <fixed-case>H</fixed-case>indi to <fixed-case>E</fixed-case>nglish <fixed-case>CLIR</fixed-case> Evaluation DebasisMandal - SandipanDandapat + SandipanDandapat MayankGupta PratyushBanerjee SudeshnaSarkar @@ -1833,9 +1833,9 @@ <fixed-case>B</fixed-case>engali, <fixed-case>H</fixed-case>indi and <fixed-case>T</fixed-case>elugu to <fixed-case>E</fixed-case>nglish Ad-hoc Bilingual Task - SivajiBandyopadhyay + SivajiBandyopadhyay TapabrataMondal - Sudip KumarNaskar + Sudip KumarNaskar AsifEkbal RejwanulHaque Srinivasa RaoGodavarthy @@ -1845,7 +1845,7 @@ Cross-Lingual Information Retrieval System for <fixed-case>I</fixed-case>ndian Languages JagadeeshJagarlamudi - AKumaran + AKumaran I08-6012 jagarlamudi-kumaran-2008-cross @@ -1883,7 +1883,7 @@ Development of <fixed-case>B</fixed-case>engali Named Entity Tagged Corpus and its Use in <fixed-case>NER</fixed-case> Systems AsifEkbal - SivajiBandyopadhyay + SivajiBandyopadhyay I08-7001 ekbal-bandyopadhyay-2008-development @@ -1898,7 +1898,7 @@ Preliminary <fixed-case>C</fixed-case>hinese Term Classification for Ontology Construction GaoyingCui - QinLu + QinLu WenjieLi I08-7003 cui-etal-2008-preliminary @@ -1947,7 +1947,7 @@ A Discourse Resource for <fixed-case>T</fixed-case>urkish: Annotating Discourse Connectives in the <fixed-case>METU</fixed-case> Corpus DenizZeyrek - BonnieWebber + BonnieWebber I08-7009 zeyrek-webber-2008-discourse @@ -1955,8 +1955,8 @@ Towards an Annotated Corpus of Discourse Relations in <fixed-case>H</fixed-case>indi RashmiPrasad SamarHusain - DiptiSharma - AravindJoshi + DiptiSharma + AravindJoshi I08-7010 prasad-etal-2008-towards @@ -1978,21 +1978,21 @@ Designing a Common <fixed-case>POS</fixed-case>-Tagset Framework for <fixed-case>I</fixed-case>ndian Languages - SankaranBaskaran + SankaranBaskaran KalikaBali TanmoyBhattacharya - PushpakBhattacharyya - Girish NathJha - RajendranS - SaravananK - SobhaL - Subbarao KV. + PushpakBhattacharyya + Girish NathJha + RajendranS + SaravananK + SobhaL + Subbarao KV. I08-7013 baskaran-etal-2008-designing Resources Report on Languages of <fixed-case>I</fixed-case>ndonesia - HammamRiza + HammamRiza I08-7014 riza-2008-resources @@ -2007,7 +2007,7 @@ Corpus building for <fixed-case>M</fixed-case>ongolian language PurevJaimai - OdbayarChimeddorj + OdbayarChimeddorj I08-7016 jaimai-chimeddorj-2008-corpus @@ -2019,7 +2019,7 @@ <fixed-case>B</fixed-case>alanced <fixed-case>C</fixed-case>orpus of <fixed-case>C</fixed-case>ontemporary <fixed-case>W</fixed-case>ritten <fixed-case>J</fixed-case>apanese - KikuoMaekawa + KikuoMaekawa I08-7018 maekawa-2008-balanced @@ -2043,8 +2043,8 @@ <fixed-case>J</fixed-case>apanese Effort Toward Sharing Text and Speech Corpora - ShuichiItahashi - KoitiHasida + ShuichiItahashi + KoitiHasida I08-7021 itahashi-hasida-2008-japanese @@ -2069,7 +2069,7 @@ Transformation-based Sentence Splitting method for Statistical Machine Translation JonghoonLee DonghyeonLee - Gary GeunbaeLee + Gary GeunbaeLee I08-8001 lee-etal-2008-transformation
@@ -2084,7 +2084,7 @@ Phrase-based Machine Transliteration AndrewFinch - EiichiroSumita + EiichiroSumita I08-8003 finch-sumita-2008-phrase @@ -2092,7 +2092,7 @@ Development of <fixed-case>I</fixed-case>ndonesian Large Vocabulary Continuous Speech Recognition System within A-<fixed-case>STAR</fixed-case> Project SakrianiSakti EkaKelana - HammamRiza + HammamRiza ShinsukeSakai KonstantinMarkov SatoshiNakamura @@ -2101,7 +2101,7 @@
Using Confidence Vector in Multi-Stage Speech Recognition - HyungbaeJeon + HyungbaeJeon KyuwoongHwang HoonChung SeunghiKim @@ -2112,7 +2112,7 @@ Toward <fixed-case>A</fixed-case>sian Speech Translation System: Developing Speech Recognition and Machine Translation for <fixed-case>I</fixed-case>ndonesian Language - HammamRiza + HammamRiza OskarRiandi I08-8006 riza-riandi-2008-toward diff --git a/data/xml/I11.xml b/data/xml/I11.xml index f1eab7e60e..dd0f79ad4b 100644 --- a/data/xml/I11.xml +++ b/data/xml/I11.xml @@ -19,23 +19,23 @@ Analyzing the Dynamics of Research by Extracting Key Aspects of Scientific Papers SonalGupta - ChristopherManning + ChristopherManning 1–9 I11-1001 gupta-manning-2011-analyzing Dependency-directed Tree Kernel-based Protein-Protein Interaction Extraction from Biomedical Literature - LonghuaQian - GuodongZhou + LonghuaQian + GuodongZhou 10–19 I11-1002 qian-zhou-2011-dependency Learning Logical Structures of Paragraphs in Legal Articles - Ngo XuanBach - Nguyen LeMinh + Ngo XuanBach + Nguyen LeMinh Tran ThiOanh AkiraShimazu 20–28 @@ -56,7 +56,7 @@ Context-Sensitive Syntactic Source-Reordering by Statistical Transduction MaximKhalilov - KhalilSima’an + KhalilSima’an 38–46 I11-1005 khalilov-simaan-2011-context @@ -64,10 +64,10 @@ Discriminative Phrase-based Lexicalized Reordering Models using Weighted Reordering Graphs WangLing - JoãoGraça - DavidMartins de Matos + JoãoGraça + DavidMartins de Matos IsabelTrancoso - Alan WBlack + Alan WBlack 47–55 I11-1006 ling-etal-2011-discriminative @@ -75,7 +75,7 @@ Active Learning Strategies for Support Vector Machines, Application to Temporal Relation Classification Seyed AbolghasemMirroshandel - GholamrezaGhassem-Sani + GholamrezaGhassem-Sani AlexisNasr 56–64 I11-1007 @@ -85,7 +85,7 @@ A Fast Accurate Two-stage Training Algorithm for <fixed-case>L</fixed-case>1-regularized <fixed-case>CRF</fixed-case>s with Heuristic Line Search Strategy JinlongZhou XipengQiu - XuanjingHuang + XuanjingHuang 65–74 I11-1008 zhou-etal-2011-fast @@ -112,7 +112,7 @@ SriparnaSaha AsifEkbal OlgaUryupina - MassimoPoesio + MassimoPoesio 93–101 I11-1011 saha-etal-2011-single @@ -122,7 +122,7 @@ BinChen JianSu Sinno JialinPan - Chew LimTan + Chew LimTan 102–110 I11-1012 chen-etal-2011-unified @@ -150,7 +150,7 @@ HassanSajjad NadirDurrani HelmutSchmid - AlexanderFraser + AlexanderFraser 129–137 I11-1015 sajjad-etal-2011-comparing @@ -158,7 +158,7 @@ A Semantic-Specific Model for <fixed-case>C</fixed-case>hinese Named Entity Translation YufengChen - ChengqingZong + ChengqingZong 138–146 I11-1016 chen-zong-2011-semantic @@ -168,7 +168,7 @@ TomoyaMizumoto MamoruKomachi MasaakiNagata - YujiMatsumoto + YujiMatsumoto 147–155 I11-1017 mizumoto-etal-2011-mining @@ -178,7 +178,7 @@ ThamarSolorio SangitaPillay SindhuRaghavan - ManuelMontes y Gómez + ManuelMontes y Gómez 156–164 I11-1018 solorio-etal-2011-modality @@ -187,7 +187,7 @@ Keyphrase Extraction from Online News Using Binary Integer Programming ZhuoyeDing QiZhang - XuanjingHuang + XuanjingHuang 165–173 I11-1019 ding-etal-2011-keyphrase @@ -196,8 +196,8 @@ Improving Related Entity Finding via Incorporating Homepages and Recognizing Fine-grained Entities YouzhengWu ChioriHori - HisashiKawai - HidekiKashioka + HisashiKawai + HidekiKashioka 174–182 I11-1020 wu-etal-2011-improving @@ -213,10 +213,10 @@ Semantic Role Labeling Without Treebanks? - StephenBoxwell + StephenBoxwell ChrisBrew JasonBaldridge - DennisMehay + DennisMehay SujithRavi 192–200 I11-1022 @@ -226,7 +226,7 @@ <fixed-case>J</fixed-case>apanese Predicate Argument Structure Analysis Exploiting Argument Position and Type YutaHayashibe MamoruKomachi - YujiMatsumoto + YujiMatsumoto 201–209 I11-1023 hayashibe-etal-2011-japanese @@ -234,7 +234,7 @@ An Empirical Study on Compositionality in Compound Nouns SivaReddy - DianaMcCarthy + DianaMcCarthy SureshManandhar 210–218 I11-1024 @@ -243,7 +243,7 @@ Feature-Rich Log-Linear Lexical Model for Latent Variable <fixed-case>PCFG</fixed-case> Grammars ZhongqiangHuang - MaryHarper + MaryHarper 219–227 I11-1025 huang-harper-2011-feature @@ -267,11 +267,11 @@ <fixed-case>T</fixed-case>reeblazing: Using External Treebanks to Filter Parse Forests for Parse Selection and Treebanking - AndrewMacKinlay + AndrewMacKinlay RebeccaDridan - DanFlickinger + DanFlickinger StephanOepen - TimothyBaldwin + TimothyBaldwin 246–254 I11-1028 mackinlay-etal-2011-treeblazing @@ -281,7 +281,7 @@ PaulMcNamee JamesMayfield DawnLawrie - DouglasOard + DouglasOard DavidDoermann 255–263 I11-1029 @@ -310,7 +310,7 @@ Identifying Event Descriptions using Co-training with Online News Summaries William YangWang KapilThadani - KathleenMcKeown + KathleenMcKeown 282–291 I11-1032 wang-etal-2011-identifying @@ -320,7 +320,7 @@ TeruakiOka MamoruKomachi ToshinobuOgiso - YujiMatsumoto + YujiMatsumoto 292–300 I11-1033 oka-etal-2011-automatic @@ -336,7 +336,7 @@ Improving <fixed-case>C</fixed-case>hinese Word Segmentation and <fixed-case>POS</fixed-case> Tagging with Semi-supervised Methods Using Large Auto-Analyzed Data YiouWang - Jun’ichiKazama + Jun’ichiKazama YoshimasaTsuruoka WenliangChen YujieZhang @@ -348,7 +348,7 @@ <fixed-case>CODACT</fixed-case>: Towards Identifying Orthographic Variants in Dialectal <fixed-case>A</fixed-case>rabic PradeepDasigi - MonaDiab + MonaDiab 318–326 I11-1036 dasigi-diab-2011-codact @@ -363,7 +363,7 @@ Fine-Grained Sentiment Analysis with Structural Features - CäciliaZirn + CäciliaZirn MathiasNiepert HeinerStuckenschmidt MichaelStrube @@ -374,7 +374,7 @@ Predicting Opinion Dependency Relations for Opinion Analysis Lun-WeiKu - Ting-HaoHuang + Ting-HaoHuang Hsin-HsiChen 345–353 I11-1039 @@ -383,14 +383,14 @@ Detecting and Blocking False Sentiment Propagation Hye-JinMin - Jong C.Park + Jong C.Park 354–362 I11-1040 min-park-2011-detecting Efficient induction of probabilistic word classes with <fixed-case>LDA</fixed-case> - GrzegorzChrupala + GrzegorzChrupala 363–372 I11-1041 chrupala-2011-efficient @@ -406,7 +406,7 @@ Labeling Unlabeled Data using Cross-Language Guided Clustering - SachindraJoshi + SachindraJoshi DanishContractor SumitNegi 383–391 @@ -418,14 +418,14 @@ YaliangLi JingJiang Hai LeongChieu - Kian Ming A.Chai + Kian Ming A.Chai 392–400 I11-1044 li-etal-2011-extracting Attribute Extraction from Synthetic Web Search Queries - MariusPaşca + MariusPaşca 401–409 I11-1045 pasca-2011-attribute @@ -456,7 +456,7 @@ Crawling Back and Forth: Using Back and Out Links to Locate Bilingual Sites LucianoBarbosa - SrinivasBangalore + SrinivasBangalore Vivek KumarRangarajan Sridhar 429–437 I11-1048 @@ -465,15 +465,15 @@ Grammar Induction from Text Using Small Syntactic Prototypes PrachyaBoonkwan - MarkSteedman + MarkSteedman 438–446 I11-1049 boonkwan-steedman-2011-grammar Transferring Syntactic Relations from <fixed-case>E</fixed-case>nglish to <fixed-case>H</fixed-case>indi Using Alignments on Local Word Groups - AswarthDara - PrashanthMannem + AswarthDara + PrashanthMannem Hemanth SagarBayyarapu AvineshPVS 447–455 @@ -500,8 +500,8 @@ <fixed-case>T</fixed-case>ri<fixed-case>S</fixed-case>: A Statistical Sentence Simplifier with Log-linear Models and Margin-based Discriminative Training NguyenBach QinGao - StephanVogel - AlexWaibel + StephanVogel + AlexWaibel 474–482 I11-1053 bach-etal-2011-tris @@ -511,7 +511,7 @@ PoHu ChengSun LongfeiWu - DonghongJi + DonghongJi ChongTeng 483–490 I11-1054 @@ -549,9 +549,9 @@ Structured and Extended Named Entity Evaluation in Automatic Speech Transcriptions OlivierGalibert - SophieRosset + SophieRosset CyrilGrouin - PierreZweigenbaum + PierreZweigenbaum LudovicQuintard 518–526 I11-1058 @@ -567,13 +567,13 @@ Similarity Based Language Model Construction for Voice Activated Open-Domain Question Answering - IstvánVarga + IstvánVarga KiyonoriOhtake KentaroTorisawa StijnDe Saeger TeruhisaMisu ShigekiMatsuda - Jun’ichiKazama + Jun’ichiKazama 536–544 I11-1060 varga-etal-2011-similarity @@ -589,7 +589,7 @@ Cross-domain Feature Selection for Language Identification MarcoLui - TimothyBaldwin + TimothyBaldwin 553–561 I11-1062 lui-baldwin-2011-cross @@ -598,7 +598,7 @@ A <fixed-case>W</fixed-case>ikipedia-<fixed-case>LDA</fixed-case> Model for Entity Linking with Batch Size Changing Instance Selection WeiZhang JianSu - Chew LimTan + Chew LimTan 562–570 I11-1063 zhang-etal-2011-wikipedia @@ -617,7 +617,7 @@ YunqingXia MinZhang HaizhouLi - FangZheng + FangZheng 580–588 I11-1065 tang-etal-2011-clgvsm @@ -627,7 +627,7 @@ JianfengZhang YunqingXia BinMa - JianminYao + JianminYao YuHong 589–597 I11-1066 @@ -653,7 +653,7 @@ A <fixed-case>POS</fixed-case>-based Ensemble Model for Cross-domain Sentiment Classification RuiXia - ChengqingZong + ChengqingZong 614–622 I11-1069 xia-zong-2011-pos @@ -661,7 +661,7 @@ Ensemble-style Self-training on Citation Classification CailingDong - UlrichSchäfer + UlrichSchäfer 623–631 I11-1070 I11-1070.Datasets.zip @@ -681,8 +681,8 @@ YinggongZhao ShujieLiu YangshengJi - JiajunChen - GuodongZhou + JiajunChen + GuodongZhou 641–648 I11-1072 zhao-etal-2011-transductive @@ -699,7 +699,7 @@ Going Beyond Word Cooccurrences in Global Lexical Selection for Statistical Machine Translation using a Multilayer Perceptron AlexandrePatry - PhilippeLanglais + PhilippeLanglais 658–666 I11-1074 patry-langlais-2011-going @@ -707,7 +707,7 @@ System Combination Using Discriminative Cross-Adaptation JacobDevlin - Antti-VeikkoRosti + Antti-VeikkoRosti SankaranarayananAnanthakrishnan SpyrosMatsoukas 667–675 @@ -732,9 +732,9 @@ It Takes Two to Tango: A Bilingual Unsupervised Approach for Estimating Sense Distributions using Expectation Maximization - Mitesh M.Khapra + Mitesh M.Khapra SalilJoshi - PushpakBhattacharyya + PushpakBhattacharyya 695–704 I11-1078 khapra-etal-2011-takes @@ -742,8 +742,8 @@ Dynamic and Static Prototype Vectors for Semantic Composition SivaReddy - IoannisKlapaftis - DianaMcCarthy + IoannisKlapaftis + DianaMcCarthy SureshManandhar 705–713 I11-1079 @@ -752,7 +752,7 @@ Using Prediction from Sentential Scope to Build a Pseudo Co-Testing Learner for Event Extraction ShashaLiao - RalphGrishman + RalphGrishman 714–722 I11-1080 liao-grishman-2011-using @@ -779,7 +779,7 @@ SeokhwanKim MinwooJeong JonghoonLee - Gary GeunbaeLee + Gary GeunbaeLee 741–748 I11-1083 kim-etal-2011-cross @@ -789,7 +789,7 @@ TadayoshiHara TakuyaMatsuzaki YusukeMiyao - Jun’ichiTsujii + Jun’ichiTsujii 749–757 I11-1084 hara-etal-2011-exploring @@ -805,7 +805,7 @@ An Empirical Comparison of Unknown Word Prediction Methods KostadinCholakov - Gertjanvan Noord + Gertjanvan Noord ValiaKordoni YiZhang 767–775 @@ -851,8 +851,8 @@ Translation Quality Indicators for Pivot-based Statistical <fixed-case>MT</fixed-case> - MichaelPaul - EiichiroSumita + MichaelPaul + EiichiroSumita 811–818 I11-1091 paul-sumita-2011-translation @@ -862,7 +862,7 @@ SankaranarayananAnanthakrishnan ShivVitaladevuni RohitPrasad - PremNatarajan + PremNatarajan 819–827 I11-1092 ananthakrishnan-etal-2011-source @@ -871,7 +871,7 @@ A Named Entity Recognition Method based on Decomposition and Concatenation of Word Chunks TomoyaIwakura HiroyaTakamura - ManabuOkumura + ManabuOkumura 828–836 I11-1093 iwakura-etal-2011-named @@ -889,8 +889,8 @@ Entity Disambiguation Using a <fixed-case>M</fixed-case>arkov-<fixed-case>L</fixed-case>ogic Network Hong-JieDai - Richard Tzong-HanTsai - Wen-LianHsu + Richard Tzong-HanTsai + Wen-LianHsu 846–855 I11-1095 dai-etal-2011-entity @@ -919,10 +919,10 @@ Extending <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et with Hypernyms and Siblings Acquired from <fixed-case>W</fixed-case>ikipedia IchiroYamada - Jong-HoonOh + Jong-HoonOh ChikaraHashimoto KentaroTorisawa - Jun’ichiKazama + Jun’ichiKazama StijnDe Saeger TakuyaKawada 874–882 @@ -940,12 +940,12 @@ From News to Comment: Resources and Benchmarks for Parsing the Language of Web 2.0 JenniferFoster - ÖzlemÇetinoğlu + ÖzlemÇetinoğlu JoachimWagner - JosephLe Roux + JosephLe Roux JoakimNivre DeirdreHogan - Josefvan Genabith + Josefvan Genabith 893–901 I11-1100 foster-etal-2011-news @@ -955,8 +955,8 @@ MasaakiTsuchida KentaroTorisawa StijnDe Saeger - Jong-HoonOh - Jun’ichiKazama + Jong-HoonOh + Jun’ichiKazama ChikaraHashimoto HayatoOhwada 902–910 @@ -966,7 +966,7 @@ Fleshing it out: A Supervised Approach to <fixed-case>MWE</fixed-case>-token and <fixed-case>MWE</fixed-case>-type Classification RichardFothergill - TimothyBaldwin + TimothyBaldwin 911–919 I11-1102 fothergill-baldwin-2011-fleshing @@ -976,7 +976,7 @@ HikaruYokono TakaakiHasegawa GenichiroKikui - ManabuOkumura + ManabuOkumura 920–928 I11-1103 yokono-etal-2011-identification @@ -1005,7 +1005,7 @@ <fixed-case>K</fixed-case>2<fixed-case>Q</fixed-case>: Generating Natural Language Questions from Keywords with User Refinements ZhichengZheng XianceSi - EdwardChang + EdwardChang XiaoyanZhu 947–955 I11-1106 @@ -1015,8 +1015,8 @@ Answering Complex Questions via Exploiting Social <fixed-case>Q</fixed-case>&<fixed-case>A</fixed-case> Collection YouzhengWu ChioriHori - HisashiKawai - HidekiKashioka + HisashiKawai + HidekiKashioka 956–964 I11-1107 wu-etal-2011-answering @@ -1042,7 +1042,7 @@ Using Text Reviews for Product Entity Completion MrinmayaSachan - TanveerFaruquie + TanveerFaruquie L. V.Subramaniam MukeshMohania 983–991 @@ -1060,7 +1060,7 @@ Efficient Near-Duplicate Detection for <fixed-case>Q</fixed-case>&<fixed-case>A</fixed-case> Forum YanWu QiZhang - XuanjingHuang + XuanjingHuang 1001–1009 I11-1112 wu-etal-2011-efficient @@ -1079,7 +1079,7 @@ Harvesting Related Entities with a Search Engine ShuqiSun ShiqiZhao - MuyunYang + MuyunYang HaifengWang ShengLi 1019–1027 @@ -1105,7 +1105,7 @@ Passage Retrieval for Information Extraction using Distant Supervision WeiXu - RalphGrishman + RalphGrishman LeZhao 1046–1054 I11-1117 @@ -1115,7 +1115,7 @@ Using Context Inference to Improve Sentence Ordering for Multi-document Summarization PeifengLi GuangxiDeng - QiaomingZhu + QiaomingZhu 1055–1061 I11-1118 li-etal-2011-using @@ -1123,7 +1123,7 @@ Enhancing extraction based summarization with outside word space ChristianSmith - ArneJönsson + ArneJönsson 1062–1070 I11-1119 smith-jonsson-2011-enhancing @@ -1141,7 +1141,7 @@ Relational Lasso —<fixed-case>A</fixed-case>n Improved Method Using the Relations Among Features— KotaroKitagawa - KumikoTanaka-Ishii + KumikoTanaka-Ishii 1080–1088 I11-1121 kitagawa-tanaka-ishii-2011-relational @@ -1150,7 +1150,7 @@ Enhance Top-down method with Meta-Classification for Very Large-scale Hierarchical Classification Xiao-LinWang HaiZhao - Bao-LiangLu + Bao-LiangLu 1089–1097 I11-1122 wang-etal-2011-enhance @@ -1158,7 +1158,7 @@ Using Syntactic and Shallow Semantic Kernels to Improve Multi-Modality Manifold-Ranking for Topic-Focused Multi-Document Summarization YlliasChali - Sadid A.Hasan + Sadid A.Hasan KaisarImam 1098–1106 I11-1123 @@ -1167,7 +1167,7 @@ Automatic Determination of a Domain Adaptation Method for Word Sense Disambiguation Using Decision Tree Learning KanakoKomiya - ManabuOkumura + ManabuOkumura 1107–1115 I11-1124 komiya-okumura-2011-automatic @@ -1185,7 +1185,7 @@ Jointly Extracting <fixed-case>J</fixed-case>apanese Predicate-Argument Relation with <fixed-case>M</fixed-case>arkov <fixed-case>L</fixed-case>ogic KatsumasaYoshikawa MasayukiAsahara - YujiMatsumoto + YujiMatsumoto 1125–1133 I11-1126 yoshikawa-etal-2011-jointly @@ -1201,8 +1201,8 @@ Automatic Analysis of Semantic Coherence in Academic Abstracts Written in <fixed-case>P</fixed-case>ortuguese - Vinícius Mourão Alvesde Souza - Valéria DelisandraFeltrim + Vinícius Mourão Alvesde Souza + Valéria DelisandraFeltrim 1144–1152 I11-1128 de-souza-feltrim-2011-automatic @@ -1233,9 +1233,9 @@ Towards Context-Based Subjectivity Analysis - FarahBenamara + FarahBenamara BaptisteChardon - YannickMathieu + YannickMathieu VladimirPopescu 1180–1188 I11-1132 @@ -1243,9 +1243,9 @@ Compression Methods by Code Mapping and Code Dividing for <fixed-case>C</fixed-case>hinese Dictionary Stored in a Double-Array Trie - HuidanLiu - MinghuaNuo - LonglongMa + HuidanLiu + MinghuaNuo + LonglongMa JianWu YepingHe 1189–1197 @@ -1255,7 +1255,7 @@ Functional Elements and <fixed-case>POS</fixed-case> Categories QiuyeZhao - MitchMarcus + MitchMarcus 1198–1206 I11-1134 zhao-marcus-2011-functional @@ -1276,7 +1276,7 @@ JunHatori TakuyaMatsuzaki YusukeMiyao - Jun’ichiTsujii + Jun’ichiTsujii 1216–1224 I11-1136 hatori-etal-2011-incremental @@ -1291,8 +1291,8 @@ Linguistic Phenomena, Analyses, and Representations: Understanding Conversion between Treebanks - RajeshBhatt - OwenRambow + RajeshBhatt + OwenRambow FeiXia 1234–1242 I11-1138 @@ -1311,7 +1311,7 @@ Parse Reranking Based on Higher-Order Lexical Dependencies ZhiguoWang - ChengqingZong + ChengqingZong 1251–1259 I11-1140 wang-zong-2011-parse @@ -1319,7 +1319,7 @@ Improving Part-of-speech Tagging for Context-free Parsing XiaoChen - ChunyuKit + ChunyuKit 1260–1268 I11-1141 chen-kit-2011-improving @@ -1327,7 +1327,7 @@ Models Cascade for Tree-Structured Named Entity Detection MarcoDinarelli - SophieRosset + SophieRosset 1269–1278 I11-1142 dinarelli-rosset-2011-models @@ -1362,8 +1362,8 @@ Integration of Reduplicated Multiword Expressions and Named Entities in a Phrase Based Statistical Machine Translation System - ThoudamDoren Singh - SivajiBandyopadhyay + ThoudamDoren Singh + SivajiBandyopadhyay 1304–1312 I11-1146 doren-singh-bandyopadhyay-2011-integration @@ -1413,7 +1413,7 @@ Clause-Based Reordering Constraints to Improve Statistical Machine Translation AnanthakrishnanRamanathan - PushpakBhattacharyya + PushpakBhattacharyya KarthikVisweswariah KushalLadha AnkurGandhe @@ -1434,16 +1434,16 @@ Enhancing scarce-resource language translation through pivot combinations - MartaR. Costa-jussà - CarlosHenríquez - Rafael E.Banchs + MartaR. Costa-jussà + CarlosHenríquez + Rafael E.Banchs 1361–1365 I11-1154 r-costa-jussa-etal-2011-enhancing A Baseline System for <fixed-case>C</fixed-case>hinese Near-Synonym Choice - Liang-ChihYu + Liang-ChihYu Wei-NanChien Shih-TingChen 1366–1370 @@ -1462,7 +1462,7 @@ Text Patterns and Compression Models for Semantic Class Learning Chung-YaoChuang Yi-HsunLee - Wen-LianHsu + Wen-LianHsu 1376–1381 I11-1157 chuang-etal-2011-text @@ -1470,7 +1470,7 @@ Potts Model on the Case Fillers for Word Sense Disambiguation HiroyaTakamura - ManabuOkumura + ManabuOkumura 1382–1386 I11-1158 takamura-okumura-2011-potts @@ -1502,8 +1502,8 @@ Going Beyond Text: A Hybrid Image-Text Approach for Measuring Word Relatedness - Chee WeeLeong - RadaMihalcea + Chee WeeLeong + RadaMihalcea 1403–1407 I11-1162 leong-mihalcea-2011-going @@ -1529,7 +1529,7 @@ Compiling Learner Corpus Data of Linguistic Output and Language Processing in Speaking, Listening, Writing, and Reading KatsunoriKotani - TakehikoYoshimi + TakehikoYoshimi HiroakiNanjo HitoshiIsahara 1418–1422 @@ -1560,7 +1560,7 @@ XuanWang RuifengXu JunXu - ShiXiFan + ShiXiFan 1432–1436 I11-1168 zhang-etal-2011-diversifying @@ -1568,7 +1568,7 @@ Beyond Normalization: Pragmatics of Word Form in Text Messages TylerBaldwin - JoyceChai + JoyceChai 1437–1441 I11-1169 baldwin-chai-2011-beyond @@ -1600,16 +1600,16 @@ Reduction of Search Space to Annotate Monolingual Corpora - PrajolShrestha + PrajolShrestha ChristineJacquin - BeatriceDaille + BeatriceDaille 1457–1461 I11-1173 shrestha-etal-2011-reduction Toward a Parallel Corpus of Spoken <fixed-case>C</fixed-case>antonese and Written <fixed-case>C</fixed-case>hinese - JohnLee + JohnLee 1462–1466 I11-1174 lee-2011-toward @@ -1617,8 +1617,8 @@ Query Expansion for <fixed-case>IR</fixed-case> using Knowledge-Based Relatedness ArantxaOtegi - XabierArregi - EnekoAgirre + XabierArregi + EnekoAgirre 1467–1471 I11-1175 otegi-etal-2011-query @@ -1636,7 +1636,7 @@ Proceedings of the IJCNLP 2011 System Demonstrations I11-2 - KennethChurch + KennethChurch YunqingXia Asian Federation of Natural Language Processing
Chiang Mai, Thailand
@@ -1651,7 +1651,7 @@ <fixed-case>W</fixed-case>iki<fixed-case>N</fixed-case>et<fixed-case>TK</fixed-case> – A Tool Kit for <fixed-case>E</fixed-case>mbedding<fixed-case>W</fixed-case>orld Knowledge in <fixed-case>NLP</fixed-case> Applications AlexJudea - ViviNastase + ViviNastase MichaelStrube 1–4 I11-2001 @@ -1667,7 +1667,7 @@ <fixed-case>TTC</fixed-case> <fixed-case>T</fixed-case>erm<fixed-case>S</fixed-case>uite - A <fixed-case>UIMA</fixed-case> Application for Multilingual Terminology Extraction from Comparable Corpora JérômeRocheteau - BéatriceDaille + BéatriceDaille 9–12 I11-2003 rocheteau-daille-2011-ttc diff --git a/data/xml/I13.xml b/data/xml/I13.xml index d8138c48fa..83da9e67ab 100644 --- a/data/xml/I13.xml +++ b/data/xml/I13.xml @@ -4,8 +4,8 @@ Proceedings of the Sixth International Joint Conference on Natural Language Processing I13-1 - RuslanMitkov - Jong C.Park + RuslanMitkov + Jong C.Park Asian Federation of Natural Language Processing
Nagoya, Japan
October @@ -18,7 +18,7 @@ Semi-Supervised Answer Extraction from Discussion Forums - RoseCatherine + RoseCatherine RashmiGangadharaiah KarthikVisweswariah DineshRaghu @@ -29,7 +29,7 @@ <fixed-case>W</fixed-case>ord<fixed-case>T</fixed-case>opic-<fixed-case>M</fixed-case>ulti<fixed-case>R</fixed-case>ank: A New Method for Automatic Keyphrase Extraction FanZhang - Lian’enHuang + Lian’enHuang BoPeng 10–18 I13-1002 @@ -47,9 +47,9 @@ Learning a Replacement Model for Query Segmentation with Consistency in Search Logs WeiZhang YunboCao - Chin-YewLin + Chin-YewLin JianSu - Chew-LimTan + Chew-LimTan 28–36 I13-1004 zhang-etal-2013-learning @@ -76,15 +76,15 @@ JiříMírovský KateřinaRysová MagdalénaRysová - EvaHajičová + EvaHajičová 55–63 I13-1007 mirovsky-etal-2013-pre <fixed-case>A</fixed-case>nimacy Acquisition Using Morphological Case - Riyaz AhmadBhat - Dipti MisraSharma + Riyaz AhmadBhat + Dipti MisraSharma 64–72 I13-1008 bhat-sharma-2013-animacy @@ -94,7 +94,7 @@ TakuyaMatsuzaki HidenaoIwane HirokazuAnai - NorikoArai + NorikoArai 73–81 I13-1009 matsuzaki-etal-2013-complexity @@ -114,7 +114,7 @@ AnnaNedoluzhko PavlínaJínová ŠárkaZikánová - EvaHajičová + EvaHajičová 91–99 I13-1011 polakova-etal-2013-introducing @@ -129,7 +129,7 @@ A Weakly Supervised <fixed-case>B</fixed-case>ayesian Model for Violence Detection in Social Media - Amparo ElizabethCano Basave + Amparo ElizabethCano Basave YulanHe KangLiu JunZhao @@ -143,7 +143,7 @@ YeyunGong YaqianZhou QiZhang - XuanjingHuang + XuanjingHuang 118–126 I13-1014 ding-etal-2013-detecting @@ -153,7 +153,7 @@ AoboWang Min-YenKan DanielAndrade - TakashiOnishi + TakashiOnishi KaiIshikawa 127–135 I13-1015 @@ -164,7 +164,7 @@ Feature Selection Using a Semantic Hierarchy for Event Recognition and Type Classification YoonjaeJeong - Sung-HyonMyaeng + Sung-HyonMyaeng 136–144 I13-1016 jeong-myaeng-2013-feature @@ -189,7 +189,7 @@ A Simple Approach to Unknown Word Processing in <fixed-case>J</fixed-case>apanese Morphological Analysis RyoheiSasano SadaoKurohashi - ManabuOkumura + ManabuOkumura 162–170 I13-1019 sasano-etal-2013-simple @@ -205,7 +205,7 @@ Capturing Long-distance Dependencies in Sequence Models: A Case Study of <fixed-case>C</fixed-case>hinese Part-of-speech Tagging - WeiweiSun + WeiweiSun XiaochangPeng XiaojunWan 180–188 @@ -217,8 +217,8 @@ SambhavJain NamanJain AniruddhaTammewar - Riyaz AhmadBhat - DiptiSharma + Riyaz AhmadBhat + DiptiSharma 189–197 I13-1022 jain-etal-2013-exploring @@ -226,7 +226,7 @@ Towards Robust Cross-Domain Domain Adaptation for Part-of-Speech Tagging TobiasSchnabel - HinrichSchütze + HinrichSchütze 198–206 I13-1023 schnabel-schutze-2013-towards @@ -235,7 +235,7 @@ Dependency Parsing for Identifying <fixed-case>H</fixed-case>ungarian Light Verb Constructions VeronikaVincze JánosZsibrita - IstvánNagy T. + IstvánNagy T. 207–215 I13-1024 vincze-etal-2013-dependency @@ -243,7 +243,7 @@ Written Dialog and Social Power: Manifestations of Different Types of Power in Dialog Behavior VinodkumarPrabhakaran - OwenRambow + OwenRambow 216–224 I13-1025 prabhakaran-rambow-2013-written @@ -304,9 +304,9 @@ Tuning <fixed-case>SMT</fixed-case> with a Large Number of Features via Online Feature Grouping LemaoLiu - TiejunZhao + TiejunZhao TaroWatanabe - EiichiroSumita + EiichiroSumita 279–285 I13-1032 @@ -324,7 +324,7 @@ Bootstrapping Large-scale Named Entities using <fixed-case>URL</fixed-case>-Text Hybrid Patterns - ChaoZhang + ChaoZhang ShiqiZhao HaifengWang 293–301 @@ -355,16 +355,16 @@ HuanChen QiZhang JinQian - XuanjingHuang + XuanjingHuang 320–328 I13-1037 chen-etal-2013-chinese Full-coverage Identification of <fixed-case>E</fixed-case>nglish Light Verb Constructions - IstvánNagy T. + IstvánNagy T. VeronikaVincze - RichárdFarkas + RichárdFarkas 329–337 I13-1038 nagy-t-etal-2013-full @@ -387,10 +387,10 @@ How Noisy Social Media Text, How Diffrnt Social Media Sources? - TimothyBaldwin + TimothyBaldwin PaulCook MarcoLui - AndrewMacKinlay + AndrewMacKinlay LiWang 356–364 I13-1041 @@ -407,7 +407,7 @@ Readability Indices for Automatic Evaluation of Text Simplification Systems: A Feasibility Study for <fixed-case>S</fixed-case>panish - SanjaŠtajner + SanjaŠtajner HoracioSaggion 374–382 I13-1043 @@ -423,7 +423,7 @@ Automatically Developing a Fine-grained <fixed-case>A</fixed-case>rabic Named Entity Corpus and Gazetteer by utilizing <fixed-case>W</fixed-case>ikipedia FahdAlotaibi - MarkLee + MarkLee 392–400 I13-1045 alotaibi-lee-2013-automatically @@ -431,7 +431,7 @@ Ranking Translation Candidates Acquired from Comparable Corpora RimaHarastani - BéatriceDaille + BéatriceDaille EmmanuelMorin 401–409 I13-1046 @@ -441,7 +441,7 @@ Using the Semantic-Syntactic Interface for Reliable <fixed-case>A</fixed-case>rabic Modality Annotation RaniaAl-Sabbagh JanaDiesner - RoxanaGirju + RoxanaGirju 410–418 I13-1047 al-sabbagh-etal-2013-using @@ -449,8 +449,8 @@ Mapping Rules for Building a <fixed-case>T</fixed-case>unisian Dialect Lexicon and Generating Corpora RahmaBoujelbane - MariemEllouze Khemekhem - Lamia HadrichBelguith + MariemEllouze Khemekhem + Lamia HadrichBelguith 419–428 I13-1048 boujelbane-etal-2013-mapping @@ -465,8 +465,8 @@ Scalable Variational Inference for Extracting Hierarchical Phrase-based Translation Rules - BaskaranSankaran - GholamrezaHaffari + BaskaranSankaran + GholamrezaHaffari AnoopSarkar 438–446 I13-1050 @@ -476,7 +476,7 @@ A Topic-Triggered Language Model for Statistical Machine Translation HengYu JinsongSu - YajuanLv + YajuanLv QunLiu 447–454 I13-1051 @@ -523,7 +523,7 @@ Uncovering Distributional Differences between Synonyms and Antonyms in a Word Space Model SilkeScheible - SabineSchulte im Walde + SabineSchulte im Walde SylviaSpringorum 489–497 I13-1056 @@ -532,8 +532,8 @@ Multilingual Word Sense Disambiguation Using <fixed-case>W</fixed-case>ikipedia BharathDandala - RadaMihalcea - RazvanBunescu + RadaMihalcea + RazvanBunescu 498–506 I13-1057 dandala-etal-2013-multilingual @@ -543,7 +543,7 @@ RuiYan HanJiang MirellaLapata - Shou-DeLin + Shou-DeLin XueqiangLv XiaomingLi 507–515 @@ -561,7 +561,7 @@ Learning a Product of Experts with Elitist Lasso MengqiuWang - Christopher D.Manning + Christopher D.Manning 525–533 I13-1060 wang-manning-2013-learning @@ -579,7 +579,7 @@ <fixed-case>T</fixed-case>opic<fixed-case>R</fixed-case>ank: Graph-Based Topic Ranking for Keyphrase Extraction AdrienBougouin FlorianBoudin - BéatriceDaille + BéatriceDaille 543–551 I13-1062 bougouin-etal-2013-topicrank @@ -588,14 +588,14 @@ Understanding the Semantic Intent of Natural Language Query JuanXu QiZhang - XuanjingHuang + XuanjingHuang 552–560 I13-1063 xu-etal-2013-understanding Sentiment Classification for Movie Reviews in <fixed-case>C</fixed-case>hinese Using Parsing-based Methods - Wen-JuanHou + Wen-JuanHou Chuang-PingChang 561–569 I13-1064 @@ -604,7 +604,7 @@ Sentiment Aggregation using <fixed-case>C</fixed-case>oncept<fixed-case>N</fixed-case>et Ontology SubhabrataMukherjee - SachindraJoshi + SachindraJoshi 570–578 I13-1065 mukherjee-joshi-2013-sentiment @@ -612,11 +612,11 @@ Detecting Cyberbullying Entries on Informal School Websites Based on Category Relevance Maximization TaiseiNitta - FumitoMasui + FumitoMasui MichalPtaszynski YasutomoKimura RafalRzepka - KenjiAraki + KenjiAraki 579–586 I13-1066 nitta-etal-2013-detecting @@ -636,7 +636,7 @@ KeSun ShiqiZhao HaifengWang - MuyunYang + MuyunYang ShengLi 596–604 I13-1068 @@ -646,14 +646,14 @@ Labeled Alignment for Recognizing Textual Entailment XiaolinWang HaiZhao - Bao-LiangLu + Bao-LiangLu 605–613 I13-1069 wang-etal-2013-labeled Context-Based <fixed-case>C</fixed-case>hinese Word Segmentation using <fixed-case>SVM</fixed-case> Machine-Learning Algorithm without Dictionary Support - Chia-mingLee + Chia-mingLee Chien-KangHuang 614–622 I13-1070 @@ -670,7 +670,7 @@ Detecting Polysemy in Hard and Soft Cluster Analyses of <fixed-case>G</fixed-case>erman Preposition Vector Spaces SylviaSpringorum - SabineSchulte im Walde + SabineSchulte im Walde JasonUtt 632–640 I13-1072 @@ -706,7 +706,7 @@ Detecting Domain Dedicated Polar Words RakshaSharma - PushpakBhattacharyya + PushpakBhattacharyya 661–666 I13-1076 sharma-bhattacharyya-2013-detecting @@ -716,7 +716,7 @@ TanveerAli DavidSchramm MarinaSokolova - DianaInkpen + DianaInkpen 667–673 I13-1077 ali-etal-2013-hear @@ -726,8 +726,8 @@ Braja GopalPatra HiroyaTakamura DipankarDas - ManabuOkumura - SivajiBandyopadhyay + ManabuOkumura + SivajiBandyopadhyay 674–679 I13-1078 patra-etal-2013-construction @@ -744,7 +744,7 @@ Unsupervised Word Class Induction for Under-resourced Languages: A Case Study on <fixed-case>I</fixed-case>ndonesian MeladelMistica Jey HanLau - TimothyBaldwin + TimothyBaldwin 685–691 I13-1080 mistica-etal-2013-unsupervised @@ -752,7 +752,7 @@ An Efficient Active Learning Framework for New Relation Types LishengFu - RalphGrishman + RalphGrishman 692–698 I13-1081 fu-grishman-2013-efficient @@ -767,10 +767,10 @@ Augmentable Paraphrase Extraction Framework - Mei-HuaChen - Yi-ChunChen - Shih-TingHuang - Jason S.Chang + Mei-HuaChen + Yi-ChunChen + Shih-TingHuang + Jason S.Chang 706–711 I13-1083 chen-etal-2013-augmentable @@ -778,8 +778,8 @@ Automatic Prediction of Evidence-based Recommendations via Sentence-level Polarity Classification AbeedSarker - DiegoMollá-Aliod - CécileParis + DiegoMollá-Aliod + CécileParis 712–718 I13-1084 sarker-etal-2013-automatic @@ -815,15 +815,15 @@ Dang HaiTran Cuong XuanChu Son BaoPham - Minh LeNguyen + Minh LeNguyen 740–746 I13-1088 tran-etal-2013-learning Detecting Bot-Answerable Questions in <fixed-case>U</fixed-case>buntu Chat - DavidUthus - DavidAha + DavidUthus + DavidAha 747–752 I13-1089 uthus-aha-2013-detecting @@ -846,7 +846,7 @@ On the Effectiveness of Using Syntactic and Shallow Semantic Tree Kernels for Automatic Assessment of Essays YlliasChali - Sadid A.Hasan + Sadid A.Hasan 767–773 I13-1092 chali-hasan-2013-effectiveness @@ -854,7 +854,7 @@ Little by Little: Semi Supervised Stemming through Stem Set Minimization VasudevanN - PushpakBhattacharyya + PushpakBhattacharyya 774–780 I13-1093 n-bhattacharyya-2013-little @@ -863,7 +863,7 @@ What Information is Helpful for Dependency Based Semantic Role Labeling YanyanLuo KevinDuh - YujiMatsumoto + YujiMatsumoto 781–787 I13-1094 luo-etal-2013-information @@ -871,7 +871,7 @@ Classifying Taxonomic Relations between Pairs of <fixed-case>W</fixed-case>ikipedia Articles OrBiran - KathleenMcKeown + KathleenMcKeown 788–794 I13-1095 biran-mckeown-2013-classifying @@ -887,7 +887,7 @@ Financial Sentiment Analysis for Risk Prediction Chuan-JuWang - Ming-FengTsai + Ming-FengTsai TseLiu Chin-TingChang 802–808 @@ -899,18 +899,18 @@ Minh-QuocNghiem Giovanni YokoKristianto GoranTopić - AkikoAizawa + AkikoAizawa 809–814 I13-1098 nghiem-etal-2013-sense Adapting a State-of-the-art Anaphora Resolution System for Resource-poor Language - UtpalSikdar + UtpalSikdar AsifEkbal SriparnaSaha OlgaUryupina - MassimoPoesio + MassimoPoesio 815–821 I13-1099 sikdar-etal-2013-adapting @@ -942,8 +942,8 @@ Translating <fixed-case>C</fixed-case>hinese Unknown Words by Automatically Acquired Templates Ming-HongBai Yu-MingHsieh - Keh-JiannChen - Jason S.Chang + Keh-JiannChen + Jason S.Chang 839–843 I13-1103 bai-etal-2013-translating @@ -951,8 +951,8 @@ Multilingual Lexicon Bootstrapping - Improving a Lexicon Induction System Using a Parallel Corpus PatrickZiering - Lonnekevan der Plas - HinrichSchütze + Lonnekevan der Plas + HinrichSchütze 844–848 I13-1104 ziering-etal-2013-multilingual @@ -967,7 +967,7 @@ A Factoid Question Answering System Using Answer Pattern Matching NagehanPala Er - IlyasCicekli + IlyasCicekli 854–858 I13-1106 pala-er-cicekli-2013-factoid @@ -988,7 +988,7 @@ YoungsamKim MunhyongKim AndrewCattle - JuliaOtmakhova + JuliaOtmakhova SuziPark HyopilShin 864–868 @@ -1037,7 +1037,7 @@ An Empirical Study of Combing Multiple Models in <fixed-case>B</fixed-case>engali Question Classification SomnathBanerjee - SivajiBandyopadhyay + SivajiBandyopadhyay 892–896 I13-1113 banerjee-bandyopadhyay-2013-empirical @@ -1054,7 +1054,7 @@ Exploiting User Search Sessions for the Semantic Categorization of Question-like Informational Search Queries AlejandroFigueroa - GuenterNeumann + GuenterNeumann 902–906 I13-1115 figueroa-neumann-2013-exploiting @@ -1072,7 +1072,7 @@ Interest Analysis using <fixed-case>P</fixed-case>age<fixed-case>R</fixed-case>ank and Social Interaction Content - Chung-chiHuang + Chung-chiHuang Lun-WeiKu 912–916 I13-1117 @@ -1102,7 +1102,7 @@ An Approach of Hybrid Hierarchical Structure for Word Similarity Computing by <fixed-case>H</fixed-case>ow<fixed-case>N</fixed-case>et JiangmingLiu - JinanXu + JinanXu YujieZhang 927–931 I13-1120 @@ -1119,16 +1119,16 @@ Automated Grammar Correction Using Hierarchical Phrase-Based Statistical Machine Translation BibekBehera - PushpakBhattacharyya + PushpakBhattacharyya 937–941 I13-1122 behera-bhattacharyya-2013-automated Finding Dependency Parsing Limits over a Large <fixed-case>S</fixed-case>panish Corpus - MuntsaPadró + MuntsaPadró MiguelBallesteros - HéctorMartínez + HéctorMartínez BerndBohnet 942–946 I13-1123 @@ -1147,7 +1147,7 @@ Building Specialized Bilingual Lexicons Using Word Sense Disambiguation DhouhaBouamor NasredineSemmar - PierreZweigenbaum + PierreZweigenbaum 952–956 I13-1125 bouamor-etal-2013-building-specialized @@ -1166,7 +1166,7 @@ KoichiroYoshino ShinjiWatanabe JonathanLe Roux - John R.Hershey + John R.Hershey 962–966 I13-1127 yoshino-etal-2013-statistical @@ -1174,9 +1174,9 @@ Repairing Incorrect Translation with Examples JunguoZhu - MuyunYang + MuyunYang ShengLi - TiejunZhao + TiejunZhao 967–971 I13-1128 zhu-etal-2013-repairing @@ -1196,16 +1196,16 @@ A Hybrid Approach for Anaphora Resolution in <fixed-case>H</fixed-case>indi PraveenDakwale VandanMujadia - Dipti MSharma + Dipti MSharma 977–981 I13-1130 dakwale-etal-2013-hybrid Structure Cognizant Pseudo Relevance Feedback - ArjunAtreya V + ArjunAtreya V YogeshKakde - PushpakBhattacharyya + PushpakBhattacharyya GaneshRamakrishnan 982–986 I13-1131 @@ -1213,8 +1213,8 @@ Cross-Domain Answer Ranking using Importance Sampling - AndersJohannsen - AndersSøgaard + AndersJohannsen + AndersSøgaard 987–991 I13-1132 johannsen-sogaard-2013-cross @@ -1222,16 +1222,16 @@ Morphological Analysis of <fixed-case>T</fixed-case>unisian Dialect InèsZribi - MariemEllouze Khemakhem - LamiaHadrich Belguith + MariemEllouze Khemakhem + LamiaHadrich Belguith 992–996 I13-1133 zribi-etal-2013-morphological Disambiguating Explicit Discourse Connectives without Oracles - AndersJohannsen - AndersSøgaard + AndersJohannsen + AndersSøgaard 997–1001 I13-1134 johannsen-sogaard-2013-disambiguating @@ -1246,8 +1246,8 @@ Statistical Morphological Analyzer for <fixed-case>H</fixed-case>indi - Deepak KumarMalladi - PrashanthMannem + Deepak KumarMalladi + PrashanthMannem 1007–1011 I13-1136 malladi-mannem-2013-statistical @@ -1255,15 +1255,15 @@ Induction of Root and Pattern Lexicon for Unsupervised Morphological Analysis of <fixed-case>A</fixed-case>rabic BilalKhaliq - JohnCarroll + JohnCarroll 1012–1016 I13-1137 khaliq-carroll-2013-induction Using Shallow Semantic Parsing and Relation Extraction for Finding Contradiction in Text - Minh Quang NhatPham - Minh LeNguyen + Minh Quang NhatPham + Minh LeNguyen AkiraShimazu 1017–1021 I13-1138 @@ -1272,7 +1272,7 @@ Using Transliteration of Proper Names from <fixed-case>A</fixed-case>rabic to <fixed-case>L</fixed-case>atin Script to Improve <fixed-case>E</fixed-case>nglish-<fixed-case>A</fixed-case>rabic Word Alignment NasredineSemmar - HoudaSaadane + HoudaSaadane 1022–1026 I13-1139 semmar-saadane-2013-using @@ -1290,8 +1290,8 @@ Incremental Segmentation and Decoding Strategies for Simultaneous Translation MahsaYarmohammadi Vivek KumarRangarajan Sridhar - SrinivasBangalore - BaskaranSankaran + SrinivasBangalore + BaskaranSankaran 1032–1036 I13-1141 yarmohammadi-etal-2013-incremental @@ -1299,7 +1299,7 @@ Two Case Studies on Translating Pronouns in a Deep Syntax Framework MichalNovák - ZdeněkŽabokrtský + ZdeněkŽabokrtský AnnaNedoluzhko 1037–1041 I13-1142 @@ -1308,7 +1308,7 @@ Bootstrapping Phrase-based Statistical Machine Translation via <fixed-case>WSD</fixed-case> Integration HienVu Huy - Phuong-ThaiNguyen + Phuong-ThaiNguyen Tung-LamNguyen M.LNguyen 1042–1046 @@ -1326,7 +1326,7 @@ Interoperability between Service Composition and Processing Pipeline: Case Study on the Language Grid and <fixed-case>UIMA</fixed-case> - TrangMai Xuan + TrangMai Xuan YoheiMurakami DonghuiLin ToruIshida @@ -1380,7 +1380,7 @@ Synonym Acquisition Using Bilingual Comparable Corpora DanielAndrade MasaakiTsuchida - TakashiOnishi + TakashiOnishi KaiIshikawa 1077–1081 I13-1150 @@ -1390,7 +1390,7 @@ Exploring Verb Frames for Sentence Simplification in <fixed-case>H</fixed-case>indi AnkushSoni SambhavJain - DiptiMisra Sharma + DiptiMisra Sharma 1082–1086 I13-1151 soni-etal-2013-exploring @@ -1405,9 +1405,9 @@ Parser Accuracy in Quality Estimation of Machine Translation: A Tree Kernel Approach - RasoulSamad Zadeh Kaljahi + RasoulSamad Zadeh Kaljahi JenniferFoster - RaphaelRubino + RaphaelRubino JohannRoturier FredHollowood 1092–1096 @@ -1416,7 +1416,7 @@ Attribute Relation Extraction from Template-inconsistent Semi-structured Text by Leveraging Site-level Knowledge - YangLiu + YangLiu FangLiu SiweiLai KangLiu @@ -1456,7 +1456,7 @@ HowJing YuTsao Kuan-YuChen - Hsin-MinWang + Hsin-MinWang 1117–1123 I13-1158 jing-etal-2013-semantic @@ -1464,7 +1464,7 @@ Cluster-based Web Summarization YvesPetinot - KathleenMcKeown + KathleenMcKeown KapilThadani 1124–1128 I13-1159 @@ -1474,7 +1474,7 @@ Automated Activity Recognition in Clinical Documents CamiloThorne MarcoMontali - DiegoCalvanese + DiegoCalvanese ElenaCardillo ClaudioEccher 1129–1133 @@ -1495,7 +1495,7 @@ BingyangLiu DayongWu YueLiu - XueqiCheng + XueqiCheng 1139–1143 I13-1162 liu-etal-2013-self @@ -1529,9 +1529,9 @@ Estimating the Quality of Translated User-Generated Content - RaphaelRubino + RaphaelRubino JenniferFoster - RasoulSamad Zadeh Kaljahi + RasoulSamad Zadeh Kaljahi JohannRoturier FredHollowood 1167–1173 @@ -1552,7 +1552,7 @@ Multiword Expressions in the Context of Statistical Machine Translation MahmoudGhoneim - MonaDiab + MonaDiab 1181–1187 I13-1168 ghoneim-diab-2013-multiword @@ -1577,7 +1577,7 @@ Automatic Extraction of Social Networks from Literary Text: A Case Study on Alice in Wonderland ApoorvAgarwal AnupKotalwar - OwenRambow + OwenRambow 1202–1208 I13-1171 agarwal-etal-2013-automatic @@ -1603,7 +1603,7 @@ Iterative Development and Evaluation of a Social Conversational Agent AnnikaSilvervarg - ArneJönsson + ArneJönsson 1223–1229 I13-1174 silvervarg-jonsson-2013-iterative @@ -1611,7 +1611,7 @@ A Hybrid Morphological Disambiguation System for <fixed-case>T</fixed-case>urkish MucahidKutlu - IlyasCicekli + IlyasCicekli 1230–1236 I13-1175 kutlu-cicekli-2013-hybrid @@ -1626,7 +1626,7 @@ Increasing the Quality and Quantity of Source Language Data for Unsupervised Cross-Lingual <fixed-case>POS</fixed-case> Tagging - LongDuong + LongDuong PaulCook StevenBird PavelPecina @@ -1655,16 +1655,16 @@ Named Entity Extraction using Information Distance SangameshwarPatil SachinPawar - GirishPalshikar + GirishPalshikar 1264–1270 I13-1180 patil-etal-2013-named Feature-based Neural Language Model and <fixed-case>C</fixed-case>hinese Word Segmentation - MairgupMansur + MairgupMansur WenzhePei - BaobaoChang + BaobaoChang 1271–1277 I13-1181 mansur-etal-2013-feature @@ -1682,7 +1682,7 @@ Effect of Non-linear Deep Architecture in Sequence Labeling MengqiuWang - Christopher D.Manning + Christopher D.Manning 1285–1291 I13-1183 wang-manning-2013-effect @@ -1696,7 +1696,7 @@ Source and Translation Classification using Most Frequent Words - ZahurulIslam + ZahurulIslam ArminHoenen 1299–1305 I13-1185 @@ -1723,8 +1723,8 @@ Bootstrapping Semantic Lexicons for Technical Domains PatrickZiering - Lonnekevan der Plas - HinrichSchütze + Lonnekevan der Plas + HinrichSchütze 1321–1329 I13-1188 ziering-etal-2013-bootstrapping @@ -1733,7 +1733,7 @@ Long-Distance Time-Event Relation Extraction AlessandroMoschitti SiddharthPatwardhan - ChrisWelty + ChrisWelty 1330–1338 I13-1189 moschitti-etal-2013-long @@ -1784,7 +1784,7 @@ Diagnosing Causes of Reading Difficulty using <fixed-case>B</fixed-case>ayesian Networks PascualMartínez-Gómez - AkikoAizawa + AkikoAizawa 1383–1391 I13-1195 martinez-gomez-aizawa-2013-diagnosing @@ -1808,7 +1808,7 @@ Supervised Sentence Fusion with Single-Stage Inference KapilThadani - KathleenMcKeown + KathleenMcKeown 1410–1418 I13-1198 thadani-mckeown-2013-supervised @@ -1887,12 +1887,12 @@ <fixed-case>DIRA</fixed-case>: Dialectal <fixed-case>A</fixed-case>rabic Information Retrieval Assistant ArfathPasha MohammadAl-Badrashiny - MohamedAltantawy + MohamedAltantawy NizarHabash ManojPooleery - OwenRambow - RyanM. Roth - MonaDiab + OwenRambow + RyanM. Roth + MonaDiab 13–16 I13-2004 pasha-etal-2013-dira @@ -1900,7 +1900,7 @@ Keyphrase-Driven Document Visualization Tool GáborBerend - RichárdFarkas + RichárdFarkas 17–20 I13-2005 berend-farkas-2013-keyphrase @@ -1910,7 +1910,7 @@ AdityaJoshi KashyapPopat ShubhamGautam - PushpakBhattacharyya + PushpakBhattacharyya 21–24 I13-2006 joshi-etal-2013-making @@ -1940,7 +1940,7 @@ ApoorvAgarwal AnupKotalwar JiehanZheng - OwenRambow + OwenRambow 33–36 I13-2009 agarwal-etal-2013-sinnet @@ -1957,7 +1957,7 @@ <fixed-case>T</fixed-case>muse: Lexical Network Exploration YannickChudy YannDesalle - BenoîtGaillard + BenoîtGaillard BrunoGaume PierreMagistry EmmanuelNavarro diff --git a/data/xml/I17.xml b/data/xml/I17.xml index efa4b29da9..4bd56158d2 100644 --- a/data/xml/I17.xml +++ b/data/xml/I17.xml @@ -19,11 +19,11 @@ Evaluating Layers of Representation in Neural Machine Translation on Part-of-Speech and Semantic Tagging Tasks YonatanBelinkov - LluísMàrquez + LluísMàrquez HassanSajjad NadirDurrani FahimDalvi - JamesGlass + JamesGlass 1–10 I17-1001 While neural machine translation (NMT) models provide improved translation quality in an elegant framework, it is less clear what they learn about language. Recent work has started evaluating the quality of vector representations learned by NMT models on morphological and syntactic tasks. In this paper, we investigate the representations learned at different layers of NMT encoders. We train NMT systems on parallel data and use the models to extract features for training a classifier on two tasks: part-of-speech and semantic tagging. We then measure the performance of the classifier as a proxy to the quality of the original NMT model for the given task. Our quantitative analysis yields interesting insights regarding representation learning in NMT models. For instance, we find that higher layers are better at learning semantics while lower layers tend to be better for part-of-speech tagging. We also observe little effect of the target language on source-side representations, especially in higher quality models. @@ -34,8 +34,8 @@ KehaiChen RuiWang MasaoUtiyama - EiichiroSumita - TiejunZhao + EiichiroSumita + TiejunZhao 11–20 I17-1002 In Neural Machine Translation (NMT), each word is represented as a low-dimension, real-value vector for encoding its syntax and semantic information. This means that even if the word is in a different sentence context, it is represented as the fixed vector to learn source representation. Moreover, a large number of Out-Of-Vocabulary (OOV) words, which have different syntax and semantic information, are represented as the same vector representation of “unk”. To alleviate this problem, we propose a novel context-aware smoothing method to dynamically learn a sentence-specific vector for each word (including OOV words) depending on its local context words in a sentence. The learned context-aware representation is integrated into the NMT to improve the translation performance. Empirical results on NIST Chinese-to-English translation task show that the proposed approach achieves 1.78 BLEU improvements on average over a strong attentional NMT, and outperforms some existing systems. @@ -46,7 +46,7 @@ AnNguyen Le AnderMartinez AkifumiYoshimoto - YujiMatsumoto + YujiMatsumoto 21–29 I17-1003 I17-1003.Datasets.zip @@ -87,7 +87,7 @@ Neural Probabilistic Model for Non-projective <fixed-case>MST</fixed-case> Parsing XuezheMa - EduardHovy + EduardHovy 59–69 I17-1007 In this paper, we propose a probabilistic parsing model that defines a proper conditional probability distribution over non-projective dependency trees for a given sentence, using neural representations as inputs. The neural network architecture is based on bi-directional LSTMCNNs, which automatically benefits from both word- and character-level representations, by using a combination of bidirectional LSTMs and CNNs. On top of the neural network, we introduce a probabilistic structured layer, defining a conditional log-linear model over non-projective trees. By exploiting Kirchhoff’s Matrix-Tree Theorem (Tutte, 1984), the partition functions and marginals can be computed efficiently, leading to a straightforward end-to-end model training procedure via back-propagation. We evaluate our model on 17 different datasets, across 14 different languages. Our parser achieves state-of-the-art parsing performance on nine datasets. @@ -116,7 +116,7 @@ Improving Implicit Semantic Role Labeling by Predicting Semantic Frame Arguments Quynh Ngoc ThiDo StevenBethard - Marie-FrancineMoens + Marie-FrancineMoens 90–99 I17-1010 Implicit semantic role labeling (iSRL) is the task of predicting the semantic roles of a predicate that do not appear as explicit arguments, but rather regard common sense knowledge or are mentioned earlier in the discourse. We introduce an approach to iSRL based on a predictive recurrent neural semantic frame model (PRNSFM) that uses a large unannotated corpus to learn the probability of a sequence of semantic arguments given a predicate. We leverage the sequence probabilities predicted by the PRNSFM to estimate selectional preferences for predicates and their arguments. On the NomBank iSRL test set, our approach improves state-of-the-art performance on implicit semantic role labeling with less reliance than prior work on manually constructed language resources. @@ -165,7 +165,7 @@ NadirDurrani HassanSajjad YonatanBelinkov - StephanVogel + StephanVogel 142–151 I17-1015 End-to-end training makes the neural machine translation (NMT) architecture simpler, yet elegant compared to traditional statistical machine translation (SMT). However, little is known about linguistic patterns of morphology, syntax and semantics learned during the training of NMT systems, and more importantly, which parts of the architecture are responsible for learning each of these phenomenon. In this paper we i) analyze how much morphology an NMT decoder learns, and ii) investigate whether injecting target morphology in the decoder helps it to produce better translations. To this end we present three methods: i) simultaneous translation, ii) joint-data learning, and iii) multi-task learning. Our results show that explicit morphological information helps the decoder learn target language morphology and improves the translation quality by 0.2–0.6 BLEU points. @@ -175,7 +175,7 @@ Improving Neural Machine Translation through Phrase-based Forced Decoding JingyiZhang MasaoUtiyama - EiichroSumita + EiichroSumita GrahamNeubig SatoshiNakamura 152–162 @@ -196,7 +196,7 @@ Character-based Joint Segmentation and <fixed-case>POS</fixed-case> Tagging for <fixed-case>C</fixed-case>hinese using Bidirectional <fixed-case>RNN</fixed-case>-<fixed-case>CRF</fixed-case> YanShao ChristianHardmeier - JörgTiedemann + JörgTiedemann JoakimNivre 173–183 I17-1018 @@ -239,7 +239,7 @@ A Computational Study on Word Meanings and Their Distributed Representations via Polymodal Embedding JooheePark - Sung-hyonMyaeng + Sung-hyonMyaeng 214–223 I17-1022 A distributed representation has become a popular approach to capturing a word meaning. Besides its success and practical value, however, questions arise about the relationships between a true word meaning and its distributed representation. In this paper, we examine such a relationship via polymodal embedding approach inspired by the theory that humans tend to use diverse sources in developing a word meaning. The result suggests that the existing embeddings lack in capturing certain aspects of word meanings which can be significantly improved by the polymodal approach. Also, we show distinct characteristics of different types of words (e.g. concreteness) via computational studies. Finally, we show our proposed embedding method outperforms the baselines in the word similarity measure tasks and the hypernym prediction tasks. @@ -280,7 +280,7 @@ A Sensitivity Analysis of (and Practitioners’ Guide to) Convolutional Neural Networks for Sentence Classification YeZhang - ByronWallace + ByronWallace 253–263 I17-1026 Convolutional Neural Networks (CNNs) have recently achieved remarkably strong performance on the practically important task of sentence classification (Kim, 2014; Kalchbrenner et al., 2014; Johnson and Zhang, 2014; Zhang et al., 2016). However, these models require practitioners to specify an exact model architecture and set accompanying hyperparameters, including the filter region size, regularization parameters, and so on. It is currently unknown how sensitive model performance is to changes in these configurations for the task of sentence classification. We thus conduct a sensitivity analysis of one-layer CNNs to explore the effect of architecture components on model performance; our aim is to distinguish between important and comparatively inconsequential design decisions for sentence classification. We focus on one-layer CNNs (to the exclusion of more complex models) due to their comparative simplicity and strong empirical performance, which makes it a modern standard baseline method akin to Support Vector Machine (SVMs) and logistic regression. We derive practical advice from our extensive empirical results for those interested in getting the most out of CNNs for sentence classification in real world settings. @@ -290,7 +290,7 @@ Coordination Boundary Identification with Similarity and Replaceability HirokiTeranishi HiroyukiShindo - YujiMatsumoto + YujiMatsumoto 264–272 I17-1027 We propose a neural network model for coordination boundary detection. Our method relies on the two common properties - similarity and replaceability in conjuncts - in order to detect both similar pairs of conjuncts and dissimilar pairs of conjuncts. The model improves identification of clause-level coordination using bidirectional RNNs incorporating two properties as features. We show that our model outperforms the existing state-of-the-art methods on the coordination annotated Penn Treebank and Genia corpus without any syntactic information from parsers. @@ -317,8 +317,8 @@ Learning How to Simplify From Explicit Labeling of Complex-Simplified Text Pairs FernandoAlva-Manchego JoachimBingel - GustavoPaetzold - CarolinaScarton + GustavoPaetzold + CarolinaScarton LuciaSpecia 295–305 I17-1030 @@ -329,7 +329,7 @@ Domain-Adaptable Hybrid Generation of <fixed-case>RDF</fixed-case> Entity Descriptions OrBiran - KathleenMcKeown + KathleenMcKeown 306–315 I17-1031 RDF ontologies provide structured data on entities in many domains and continue to grow in size and diversity. While they can be useful as a starting point for generating descriptions of entities, they often miss important information about an entity that cannot be captured as simple relations. In addition, generic approaches to generation from RDF cannot capture the unique style and content of specific domains. We describe a framework for hybrid generation of entity descriptions, which combines generation from RDF data with text extracted from a corpus, and extracts unique aspects of the domain from the corpus to create domain-specific generation systems. We show that each component of our approach significantly increases the satisfaction of readers with the text across multiple applications and domains. @@ -339,7 +339,7 @@ <fixed-case>ES</fixed-case>-<fixed-case>LDA</fixed-case>: Entity Summarization using Knowledge-based Topic Modeling SeyedaminPouriyeh MehdiAllahyari - KrzysztofKochut + KrzysztofKochut GongCheng Hamid RezaArabnia 316–325 @@ -403,8 +403,8 @@ <fixed-case>NMT</fixed-case> or <fixed-case>SMT</fixed-case>: Case Study of a Narrow-domain <fixed-case>E</fixed-case>nglish-<fixed-case>L</fixed-case>atvian Post-editing Project - IngunaSkadiņa - MārcisPinnis + IngunaSkadiņa + MārcisPinnis 373–383 I17-1038 The recent technological shift in machine translation from statistical machine translation (SMT) to neural machine translation (NMT) raises the question of the strengths and weaknesses of NMT. In this paper, we present an analysis of NMT and SMT systems’ outputs from narrow domain English-Latvian MT systems that were trained on a rather small amount of data. We analyze post-edits produced by professional translators and manually annotated errors in these outputs. Analysis of post-edits allowed us to conclude that both approaches are comparably successful, allowing for an increase in translators’ productivity, with the NMT system showing slightly worse results. Through the analysis of annotated errors, we found that NMT translations are more fluent than SMT translations. However, errors related to accuracy, especially, mistranslation and omission errors, occur more often in NMT outputs. The word form errors, that characterize the morphological richness of Latvian, are frequent for both systems, but slightly fewer in NMT outputs. @@ -415,7 +415,7 @@ YiningWang YangZhao JiajunZhang - ChengqingZong + ChengqingZong ZhengshanXue 384–393 I17-1039 @@ -426,7 +426,7 @@ Identifying Usage Expression Sentences in Consumer Product Reviews ShibamouliLahiri V.G.VinodVydiswaran - RadaMihalcea + RadaMihalcea 394–403 I17-1040 In this paper we introduce the problem of identifying usage expression sentences in a consumer product review. We create a human-annotated gold standard dataset of 565 reviews spanning five distinct product categories. Our dataset consists of more than 3,000 annotated sentences. We further introduce a classification system to label sentences according to whether or not they describe some “usage”. The system combines lexical, syntactic, and semantic features in a product-agnostic fashion to yield good classification performance. We show the effectiveness of our approach using importance ranking of features, error analysis, and cross-product classification experiments. @@ -444,7 +444,7 @@ <fixed-case>W</fixed-case>i<fixed-case>NER</fixed-case>: A <fixed-case>W</fixed-case>ikipedia Annotated Corpus for Named Entity Recognition AbbasGhaddar - PhillippeLanglais + PhillippeLanglais 413–422 I17-1042 We revisit the idea of mining Wikipedia in order to generate named-entity annotations. We propose a new methodology that we applied to English Wikipedia to build WiNER, a large, high quality, annotated corpus. We evaluate its usefulness on 6 NER tasks, comparing 4 popular state-of-the art approaches. We show that LSTM-CRF is the approach that benefits the most from our corpus. We report impressive gains with this model when using a small portion of WiNER on top of the CONLL training material. Last, we propose a simple but efficient method for exploiting the full range of WiNER, leading to further improvements. @@ -473,9 +473,9 @@ Attentive Language Models - GiancarloSalton + GiancarloSalton RobertRoss - JohnKelleher + JohnKelleher 441–450 I17-1045 In this paper, we extend Recurrent Neural Network Language Models (RNN-LMs) with an attention mechanism. We show that an “attentive” RNN-LM (with 11M parameters) achieves a better perplexity than larger RNN-LMs (with 66M parameters) and achieves performance comparable to an ensemble of 10 similar sized RNN-LMs. We also show that an “attentive” RNN-LM needs less contextual information to achieve similar results to the state-of-the-art on the wikitext2 dataset. @@ -494,7 +494,7 @@ Image-Grounded Conversations: Multimodal Context for Natural Question and Response Generation NasrinMostafazadeh ChrisBrockett - BillDolan + BillDolan MichelGalley JianfengGao GeorgiosSpithourakis @@ -508,7 +508,7 @@ A Neural Language Model for Dynamically Representing the Meanings of Unknown Words and Entities in a Discourse SosukeKobayashi - NaoakiOkazaki + NaoakiOkazaki KentaroInui 473–483 I17-1048 @@ -519,7 +519,7 @@ Using Explicit Discourse Connectives in Translation for Implicit Discourse Relation Classification WeiShi FrancesYung - RaphaelRubino + RaphaelRubino VeraDemberg 484–495 I17-1049 @@ -571,7 +571,7 @@ Sentence Modeling with Deep Neural Architecture using Lexicon and Character Attention Mechanism for Sentiment Classification Huy ThanhNguyen - Minh LeNguyen + Minh LeNguyen 536–544 I17-1054 Tweet-level sentiment classification in Twitter social networking has many challenges: exploiting syntax, semantic, sentiment, and context in tweets. To address these problems, we propose a novel approach to sentiment analysis that uses lexicon features for building lexicon embeddings (LexW2Vs) and generates character attention vectors (CharAVs) by using a Deep Convolutional Neural Network (DeepCNN). Our approach integrates LexW2Vs and CharAVs with continuous word embeddings (ContinuousW2Vs) and dependency-based word embeddings (DependencyW2Vs) simultaneously in order to increase information for each word into a Bidirectional Contextual Gated Recurrent Neural Network (Bi-CGRNN). We evaluate our model on two Twitter sentiment classification datasets. Experimental results show that our model can improve the classification accuracy of sentence-level sentiment analysis in Twitter social networking. @@ -590,8 +590,8 @@ Capturing Long-range Contextual Dependencies with Memory-enhanced Conditional Random Fields FeiLiu - TimothyBaldwin - TrevorCohn + TimothyBaldwin + TrevorCohn 555–565 I17-1056 Despite successful applications across a broad range of NLP tasks, conditional random fields (“CRFs”), in particular the linear-chain variant, are only able to model local features. While this has important benefits in terms of inference tractability, it limits the ability of the model to capture long-range dependencies between items. Attempts to extend CRFs to capture long-range dependencies have largely come at the cost of computational complexity and approximate inference. In this work, we propose an extension to CRFs by integrating external memory, taking inspiration from memory networks, thereby allowing CRFs to incorporate information far beyond neighbouring steps. Experiments across two tasks show substantial improvements over strong CRF and LSTM baselines. @@ -599,9 +599,9 @@ Named Entity Recognition with Stack Residual <fixed-case>LSTM</fixed-case> and Trainable Bias Decoding - QuanTran - AndrewMacKinlay - AntonioJimeno Yepes + QuanTran + AndrewMacKinlay + AntonioJimeno Yepes 566–575 I17-1057 Recurrent Neural Network models are the state-of-the-art for Named Entity Recognition (NER). We present two innovations to improve the performance of these models. The first innovation is the introduction of residual connections between the Stacked Recurrent Neural Network model to address the degradation problem of deep neural networks. The second innovation is a bias decoding mechanism that allows the trained system to adapt to non-differentiable and externally computed objectives, such as the entity-based F-measure. Our work improves the state-of-the-art results for both Spanish and English languages on the standard train/development/test split of the CoNLL 2003 Shared Task NER dataset. @@ -621,7 +621,7 @@ Leveraging Discourse Information Effectively for Authorship Attribution ElisaFerracane SuWang - RaymondMooney + RaymondMooney 584–593 I17-1059 We explore techniques to maximize the effectiveness of discourse information in the task of authorship attribution. We present a novel method to embed discourse features in a Convolutional Neural Network text classifier, which achieves a state-of-the-art result by a significant margin. We empirically investigate several featurization methods to understand the conditions under which discourse features contribute non-trivial performance gains, and analyze discourse embeddings. @@ -641,7 +641,7 @@ Multi-Task Learning for Speaker-Role Adaptation in Neural Conversation Models YiLuan ChrisBrockett - BillDolan + BillDolan JianfengGao MichelGalley 605–614 @@ -685,8 +685,8 @@ An Ensemble Method with Sentiment Features and Clustering Support - NguyenHuy Tien - NguyenMinh Le + Huy TienNguyen + Minh LeNguyen 644–653 I17-1065 Deep learning models have recently been applied successfully in natural language processing, especially sentiment analysis. Each deep learning model has a particular advantage, but it is difficult to combine these advantages into one model, especially in the area of sentiment analysis. In our approach, Convolutional Neural Network (CNN) and Long Short Term Memory (LSTM) were utilized to learn sentiment-specific features in a freezing scheme. This scenario provides a novel and efficient way for integrating advantages of deep learning models. In addition, we also grouped documents into clusters by their similarity and applied the prediction score of Naive Bayes SVM (NBSVM) method to boost the classification accuracy of each group. The experiments show that our method achieves the state-of-the-art performance on two well-known datasets: IMDB large movie reviews for document level and Pang & Lee movie reviews for sentence level. @@ -704,7 +704,7 @@ Measuring Semantic Relations between Human Activities StevenWilson - RadaMihalcea + RadaMihalcea 664–673 I17-1067 The things people do in their daily lives can provide valuable insights into their personality, values, and interests. Unstructured text data on social media platforms are rich in behavioral content, and automated systems can be deployed to learn about human activity on a broad scale if these systems are able to reason about the content of interest. In order to aid in the evaluation of such systems, we introduce a new phrase-level semantic textual similarity dataset comprised of human activity phrases, providing a testbed for automated systems that analyze relationships between phrasal descriptions of people’s actions. Our set of 1,000 pairs of activities is annotated by human judges across four relational dimensions including similarity, relatedness, motivational alignment, and perceived actor congruence. We evaluate a set of strong baselines for the task of generating scores that correlate highly with human ratings, and we introduce several new approaches to the phrase-level similarity task in the domain of human activities. @@ -715,7 +715,7 @@ BonanMin ZhuolinJiang MarjorieFreedman - RalphWeischedel + RalphWeischedel 674–684 I17-1068 Typically, relation extraction models are trained to extract instances of a relation ontology using only training data from a single language. However, the concepts represented by the relation ontology (e.g. ResidesIn, EmployeeOf) are language independent. The numbers of annotated examples available for a given ontology vary between languages. For example, there are far fewer annotated examples in Spanish and Japanese than English and Chinese. Furthermore, using only language-specific training data results in the need to manually annotate equivalently large amounts of training for each new language a system encounters. We propose a deep neural network to learn transferable, discriminative bilingual representation. Experiments on the ACE 2005 multilingual training corpus demonstrate that the joint training process results in significant improvement in relation classification performance over the monolingual counterparts. The learnt representation is discriminative and transferable between languages. When using 10% (25K English words, or 30K Chinese characters) of the training data, our approach results in doubling F1 compared to a monolingual baseline. We achieve comparable performance to the monolingual system trained with 250K English words (or 300K Chinese characters) With 50% of training data. @@ -764,7 +764,7 @@ Finding Dominant User Utterances And System Responses in Conversations DhirajMadan - SachindraJoshi + SachindraJoshi 723–732 I17-1073 There are several dialog frameworks which allow manual specification of intents and rule based dialog flow. The rule based framework provides good control to dialog designers at the expense of being more time consuming and laborious. The job of a dialog designer can be reduced if we could identify pairs of user intents and corresponding responses automatically from prior conversations between users and agents. In this paper we propose an approach to find these frequent user utterances (which serve as examples for intents) and corresponding agent responses. We propose a novel SimCluster algorithm that extends standard K-means algorithm to simultaneously cluster user utterances and agent utterances by taking their adjacency information into account. The method also aligns these clusters to provide pairs of intents and response groups. We compare our results with those produced by using simple Kmeans clustering on a real dataset and observe upto 10% absolute improvement in F1-scores. Through our experiments on synthetic dataset, we show that our algorithm gains more advantage over K-means algorithm when the data has large variance. @@ -787,7 +787,7 @@ Jey HanLau LianhuaChi Khoi-NguyenTran - TrevorCohn + TrevorCohn 744–753 I17-1075 We propose an end-to-end neural network to predict the geolocation of a tweet. The network takes as input a number of raw Twitter metadata such as the tweet message and associated user account information. Our model is language independent, and despite minimal feature engineering, it is interpretable and capable of learning location indicative words and timing patterns. Compared to state-of-the-art systems, our model outperforms them by 2%-6%. Additionally, we propose extensions to the model to compress representation learnt by the network into binary codes. Experiments show that it produces compact codes compared to benchmark hashing algorithms. An implementation of the model is released publicly. @@ -809,8 +809,8 @@ Domain Adaptation from User-level <fixed-case>F</fixed-case>acebook Models to County-level <fixed-case>T</fixed-case>witter Predictions DanielRieman KokilJaidka - H. AndrewSchwartz - LyleUngar + H. AndrewSchwartz + LyleUngar 764–773 I17-1077 Several studies have demonstrated how language models of user attributes, such as personality, can be built by using the Facebook language of social media users in conjunction with their responses to psychology questionnaires. It is challenging to apply these models to make general predictions about attributes of communities, such as personality distributions across US counties, because it requires 1. the potentially inavailability of the original training data because of privacy and ethical regulations, 2. adapting Facebook language models to Twitter language without retraining the model, and 3. adapting from users to county-level collections of tweets. We propose a two-step algorithm, Target Side Domain Adaptation (TSDA) for such domain adaptation when no labeled Twitter/county data is available. TSDA corrects for the different word distributions between Facebook and Twitter and for the varying word distributions across counties by adjusting target side word frequencies; no changes to the trained model are made. In the case of predicting the Big Five county-level personality traits, TSDA outperforms a state-of-the-art domain adaptation method, gives county-level predictions that have fewer extreme outliers, higher year-to-year stability, and higher correlation with county-level outcomes. @@ -829,7 +829,7 @@ Estimating Reactions and Recommending Products with Generative Models of Reviews JianmoNi - Zachary C.Lipton + Zachary C.Lipton SharadVikram JulianMcAuley 783–791 @@ -841,7 +841,7 @@ Summarizing Lengthy Questions TatsuyaIshigaki HiroyaTakamura - ManabuOkumura + ManabuOkumura 792–800 I17-1080 In this research, we propose the task of question summarization. We first analyzed question-summary pairs extracted from a Community Question Answering (CQA) site, and found that a proportion of questions cannot be summarized by extractive approaches but requires abstractive approaches. We created a dataset by regarding the question-title pairs posted on the CQA site as question-summary pairs. By using the data, we trained extractive and abstractive summarization models, and compared them based on ROUGE scores and manual evaluations. Our experimental results show an abstractive method using an encoder-decoder model with a copying mechanism achieves better scores for both ROUGE-2 F-measure and the evaluations by human judges. @@ -859,7 +859,7 @@ Abstractive Multi-document Summarization by Partial Tree Extraction, Recombination and Linearization - LittonJ Kurisinkel + LittonJ Kurisinkel YueZhang VasudevaVarma 812–821 @@ -889,7 +889,7 @@ Event Ordering with a Generalized Model for Sieve Prediction Ranking BillMcDowell - NathanaelChambers + NathanaelChambers AlexanderOrorbia II DavidReitter 843–853 @@ -912,7 +912,7 @@ JinseonYou Jin-WooChung WonsukYang - Jong C.Park + Jong C.Park 865–874 I17-1087 Genetic information in the literature has been extensively looked into for the purpose of discovering the etiology of a disease. As the gene-disease relation is sensitive to external factors, their identification is important to study a disease. Environmental influences, which are usually called Gene-Environment interaction (GxE), have been considered as important factors and have extensively been researched in biology. Nevertheless, there is still a lack of systems for automatic GxE extraction from the biomedical literature due to new challenges: (1) there are no preprocessing tools and corpora for GxE, (2) expressions of GxE are often quite implicit, and (3) document-level comprehension is usually required. We propose to overcome these challenges with neural network models and show that a modified sequence-to-sequence model with a static RNN decoder produces a good performance in GxE recognition. @@ -936,7 +936,7 @@ QuincyDavenport Anna MengdanDai MohamedAbouelenien - RadaMihalcea + RadaMihalcea 885–894 I17-1089 This paper addresses the task of detecting identity deception in language. Using a novel identity deception dataset, consisting of real and portrayed identities from 600 individuals, we show that we can build accurate identity detectors targeting both age and gender, with accuracies of up to 88. We also perform an analysis of the linguistic patterns used in identity deception, which lead to interesting insights into identity portrayers. @@ -945,7 +945,7 @@ Learning to Diagnose: Assimilating Clinical Narratives using Deep Reinforcement Learning YuanLing - Sadid A.Hasan + Sadid A.Hasan VivekDatla AshequlQadir KathyLee @@ -984,7 +984,7 @@ Demographic Word Embeddings for Racism Detection on <fixed-case>T</fixed-case>witter MohammedHasanuzzaman - GaëlDias + GaëlDias AndyWay 926–936 I17-1093 @@ -1086,7 +1086,7 @@ Multilingual Hierarchical Attention Networks for Document Classification NikolaosPappas - AndreiPopescu-Belis + AndreiPopescu-Belis 1015–1025 I17-1102 Hierarchical attention networks have recently achieved remarkable performance for document classification in a given language. However, when multilingual document collections are considered, training such models separately for each language entails linear parameter growth and lack of cross-language transfer. Learning a single multilingual model with fewer parameters is therefore a challenging but potentially beneficial objective. To this end, we propose multilingual hierarchical attention networks for learning document structures, with shared encoders and/or shared attention mechanisms across languages, using multi-task learning and an aligned semantic space as input. We evaluate the proposed models on multilingual document classification with disjoint label sets, on a large dataset which we provide, with 600k news documents in 8 languages, and 5k labels. The multilingual models outperform monolingual ones in low-resource as well as full-resource settings, and use fewer parameters, thus confirming their computational efficiency and the utility of cross-language transfer. @@ -1097,7 +1097,7 @@ KeithMaki MichaelYoder YohanJo - CarolynRosé + CarolynRosé 1026–1035 I17-1103 In this work we investigate how role-based behavior profiles of a Wikipedia editor, considered against the backdrop of roles taken up by other editors in discussions, predict the success of the editor at achieving an impact on the associated article. We first contribute a new public dataset including a task predicting the success of Wikipedia editors involved in discussion, measured by an operationalization of the lasting impact of their edits in the article. We then propose a probabilistic graphical model that advances earlier work inducing latent discussion roles using the light supervision of success in the negotiation task. We evaluate the performance of the model and interpret findings of roles and group configurations that lead to certain outcomes on Wikipedia. @@ -1136,7 +1136,7 @@ KatsuhikoHayashi TsutomuHirao HiroyaTakamura - ManabuOkumura + ManabuOkumura MasaakiNagata 7–12 I17-2002 @@ -1147,7 +1147,7 @@ Transferring Semantic Roles Using Translation and Syntactic Information MaryamAminian Mohammad SadeghRasooli - MonaDiab + MonaDiab 13–19 I17-2003 Our paper addresses the problem of annotation projection for semantic role labeling for resource-poor languages using supervised annotations from a resource-rich language through parallel data. We propose a transfer method that employs information from source and target syntactic dependencies as well as word alignment density to improve the quality of an iterative bootstrapping method. Our experiments yield a 3.5 absolute labeled F-score improvement over a standard annotation projection method. @@ -1179,7 +1179,7 @@ Towards Lower Bounds on Number of Dimensions for Word Embeddings KevinPatel - PushpakBhattacharyya + PushpakBhattacharyya 31–36 I17-2006 I17-2006.Notes.pdf @@ -1241,8 +1241,8 @@ Learning Kernels over Strings using <fixed-case>G</fixed-case>aussian Processes - DanielBeck - TrevorCohn + DanielBeck + TrevorCohn 67–73 I17-2012 I17-2012.Notes.pdf @@ -1265,7 +1265,7 @@ Yi-JieHuang Shu-HaoYeh Chun-HungChen - Wen-LianHsu + Wen-LianHsu 80–85 I17-2014 Part-of-speech (POS) tagging and named entity recognition (NER) are crucial steps in natural language processing. In addition, the difficulty of word segmentation places additional burden on those who intend to deal with languages such as Chinese, and pipelined systems often suffer from error propagation. This work proposes an end-to-end model using character-based recurrent neural network (RNN) to jointly accomplish segmentation, POS tagging and NER of a Chinese sentence. Experiments on previous word segmentation and NER datasets show that a single model with the proposed architecture is comparable to those trained specifically for each task, and outperforms freely-available softwares. Moreover, we provide a web-based interface for the public to easily access this resource. @@ -1295,7 +1295,7 @@ MotokiSato HiroyukiShindo IkuyaYamada - YujiMatsumoto + YujiMatsumoto 97–102 I17-2017 We present Segment-level Neural CRF, which combines neural networks with a linear chain CRF for segment-level sequence modeling tasks such as named entity recognition (NER) and syntactic chunking. Our segment-level CRF can consider higher-order label dependencies compared with conventional word-level CRF. Since it is difficult to consider all possible variable length segments, our method uses segment lattice constructed from the word-level tagging model to reduce the search space. Performing experiments on NER and chunking, we demonstrate that our method outperforms conventional word-level CRF with neural networks. @@ -1304,7 +1304,7 @@ Integrating Vision and Language Datasets to Measure Word Concreteness GititKehat - JamesPustejovsky + JamesPustejovsky 103–108 I17-2018 We present and take advantage of the inherent visualizability properties of words in visual corpora (the textual components of vision-language datasets) to compute concreteness scores for words. Our simple method does not require hand-annotated concreteness score lists for training, and yields state-of-the-art results when evaluated against concreteness scores lists and previously derived scores, as well as when used for metaphor detection. @@ -1323,7 +1323,7 @@ Injecting Word Embeddings with Another Language’s Resource : An Application of Bilingual Embeddings PrakharPandey VikramPudi - ManishShrivastava + ManishShrivastava 116–121 I17-2020 Word embeddings learned from text corpus can be improved by injecting knowledge from external resources, while at the same time also specializing them for similarity or relatedness. These knowledge resources (like WordNet, Paraphrase Database) may not exist for all languages. In this work we introduce a method to inject word embeddings of a language with knowledge resource of another language by leveraging bilingual embeddings. First we improve word embeddings of German, Italian, French and Spanish using resources of English and test them on variety of word similarity tasks. Then we demonstrate the utility of our method by creating improved embeddings for Urdu and Telugu languages using Hindi WordNet, beating the previously established baseline for Urdu. @@ -1333,7 +1333,7 @@ Improving Black-box Speech Recognition using Semantic Parsing RodolfoCorona JesseThomason - RaymondMooney + RaymondMooney 122–127 I17-2021 Speech is a natural channel for human-computer interaction in robotics and consumer applications. Natural language understanding pipelines that start with speech can have trouble recovering from speech recognition errors. Black-box automatic speech recognition (ASR) systems, built for general purpose use, are unable to take advantage of in-domain language models that could otherwise ameliorate these errors. In this work, we present a method for re-ranking black-box ASR hypotheses using an in-domain language model and semantic parser trained for a particular task. Our re-ranking method significantly improves both transcription accuracy and semantic understanding over a state-of-the-art ASR’s vanilla output. @@ -1360,7 +1360,7 @@ Modelling Representation Noise in Emotion Analysis using <fixed-case>G</fixed-case>aussian Processes - DanielBeck + DanielBeck 140–145 I17-2024 Emotion Analysis is the task of modelling latent emotions present in natural language. Labelled datasets for this task are scarce so learning good input text representations is not trivial. Using averaged word embeddings is a simple way to leverage unlabelled corpora to build text representations but this approach can be prone to noise either coming from the embedding themselves or the averaging procedure. In this paper we propose a model for Emotion Analysis using Gaussian Processes and kernels that are better suitable for functions that exhibit noisy behaviour. Empirical evaluations in a emotion prediction task show that our model outperforms commonly used baselines for regression. @@ -1369,7 +1369,7 @@ Are Manually Prepared Affective Lexicons Really Useful for Sentiment Analysis MingleiLi - QinLu + QinLu YunfeiLong 146–150 I17-2025 @@ -1391,7 +1391,7 @@ Can Discourse Relations be Identified Incrementally? FrancesYung HiroshiNoji - YujiMatsumoto + YujiMatsumoto 157–162 I17-2027 I17-2027.Notes.pdf @@ -1402,7 +1402,7 @@ Speaker Role Contextual Modeling for Language Understanding and Dialogue Policy Learning Ta-ChungChi - Po-ChunChen + Po-ChunChen Shang-YuSu Yun-NungChen 163–168 @@ -1425,7 +1425,7 @@ Dialog for Language to Code ShobhitChaurasia - Raymond J.Mooney + Raymond J.Mooney 175–180 I17-2030 Generating computer code from natural language descriptions has been a long-standing problem. Prior work in this domain has restricted itself to generating code in one shot from a single description. To overcome this limitation, we propose a system that can engage users in a dialog to clarify their intent until it has all the information to produce correct code. To evaluate the efficacy of dialog in code generation, we focus on synthesizing conditional statements in the form of IFTTT recipes. @@ -1446,7 +1446,7 @@ YutaiHou JingLiu YunboCao - Chin-YewLin + Chin-YewLin 187–192 I17-2032 We present in this paper a statistical framework that generates accurate and fluent product description from product attributes. Specifically, after extracting templates and learning writing knowledge from attribute-description parallel data, we use the learned knowledge to decide what to say and how to say for product description generation. To evaluate accuracy and fluency for the generated descriptions, in addition to BLEU and Recall, we propose to measure what to say (in terms of attribute coverage) and to measure how to say (by attribute-specified generation) separately. Experimental results show that our framework is effective. @@ -1464,7 +1464,7 @@ <fixed-case>SSAS</fixed-case>: Semantic Similarity for Abstractive Summarization RaghuramVadapalli - LittonJ Kurisinkel + LittonJ Kurisinkel ManishGupta VasudevaVarma 198–203 @@ -1522,7 +1522,7 @@ High Recall Open <fixed-case>IE</fixed-case> for Relation Discovery HadyElsahar ChristopheGravier - FrederiqueLaforest + FrederiqueLaforest 228–233 I17-2039 Relation Discovery discovers predicates (relation types) from a text corpus relying on the co-occurrence of two named entities in the same sentence. This is a very narrowing constraint: it represents only a small fraction of all relation mentions in practice. In this paper we propose a high recall approach for Open IE, which enables covering up to 16 times more sentences in a large corpus. Comparison against OpenIE systems shows that our proposed approach achieves 28% improvement over the highest recall OpenIE system and 6% improvement in precision than the same system. @@ -1543,7 +1543,7 @@ Yu-LunHsieh Yung-ChunChang Nai-WenChang - Wen-LianHsu + Wen-LianHsu 240–245 I17-2041 In this paper, we propose a recurrent neural network model for identifying protein-protein interactions in biomedical literature. Experiments on two largest public benchmark datasets, AIMed and BioInfer, demonstrate that our approach significantly surpasses state-of-the-art methods with relative improvements of 10% and 18%, respectively. Cross-corpus evaluation also demonstrate that the proposed model remains robust despite using different training data. These results suggest that RNN can effectively capture semantic relationships among proteins as well as generalizes over different corpora, without any feature engineering. @@ -1562,7 +1562,7 @@ Fake News Detection Through Multi-Perspective Speaker Profiles YunfeiLong - QinLu + QinLu RongXiang MingleiLi Chu-RenHuang @@ -1579,7 +1579,7 @@ KugatsuSadamitsu SatoshiKobashikawa RyoMasumura - YujiMatsumoto + YujiMatsumoto JunjiTomita 257–262 I17-2044 @@ -1602,7 +1602,7 @@ Boosting Neural Machine Translation DakunZhang JungiKim - JosepCrego + JosepCrego JeanSenellart 271–276 I17-2046 @@ -1625,7 +1625,7 @@ AnoopKunchukuttan MaulikShah PradyotPrakash - PushpakBhattacharyya + PushpakBhattacharyya 283–289 I17-2048 We investigate pivot-based translation between related languages in a low resource, phrase-based SMT setting. We show that a subword-level pivot-based SMT model using a related pivot language is substantially better than word and morpheme-level pivot models. It is also highly competitive with the best direct translation model, which is encouraging as no direct source-target training corpus is used. We also show that combining multiple related language pivot models can rival a direct translation model. Thus, the use of subwords as translation units coupled with multiple related pivot languages can compensate for the lack of a direct parallel corpus. @@ -1635,7 +1635,7 @@ Key-value Attention Mechanism for Neural Machine Translation HideyaMino MasaoUtiyama - EiichiroSumita + EiichiroSumita TakenobuTokunaga 290–295 I17-2049 @@ -1644,7 +1644,7 @@ Transfer Learning across Low-Resource, Related Languages for Neural Machine Translation - Toan Q.Nguyen + Toan Q.Nguyen DavidChiang 296–301 I17-2050 @@ -1657,7 +1657,7 @@ KangilKim Jong-HunShin Seung-HoonNa - SangKeunJung + SangKeunJung 302–307 I17-2051 Neural machine translation decoders are usually conditional language models to sequentially generate words for target sentences. This approach is limited to find the best word composition and requires help of explicit methods as beam search. To help learning correct compositional mechanisms in NMTs, we propose concept equalization using direct mapping distributed representations of source and target sentences. In a translation experiment from English to French, the concept equalization significantly improved translation quality by 3.00 BLEU points compared to a state-of-the-art NMT model. @@ -1674,7 +1674,7 @@ A Parallel Corpus of Python Functions and Documentation Strings for Automated Code Documentation and Code Generation - Antonio ValerioMiceli Barone + Antonio ValerioMiceli Barone RicoSennrich 314–319 I17-2053 @@ -1693,7 +1693,7 @@ Identifying Speakers and Listeners of Quoted Speech in Literary Works Chak YanYeung - JohnLee + JohnLee 325–329 I17-2055 We present the first study that evaluates both speaker and listener identification for direct speech in literary texts. Our approach consists of two steps: identification of speakers and listeners near the quotes, and dialogue chain segmentation. Evaluation results show that this approach outperforms a rule-based approach that is state-of-the-art on a corpus of literary texts. @@ -1741,7 +1741,7 @@ Leveraging Diverse Lexical Chains to Construct Essays for <fixed-case>C</fixed-case>hinese College Entrance Examination LiunianLi XiaojunWan - Jin-geYao + Jin-geYao SimingYan 355–360 I17-2060 @@ -1773,7 +1773,7 @@ Coreference Resolution on Math Problem Text in <fixed-case>J</fixed-case>apanese TakumiIto TakuyaMatsuzaki - SatoshiSato + SatoshiSato 373–377 I17-2063 This paper describes a coreference resolution system for math problem text. Case frame dictionaries and a math taxonomy are utilized for supplying domain knowledge. The system deals with various anaphoric phenomena beyond well-studied entity coreferences. @@ -1823,9 +1823,9 @@ <fixed-case>CWIG</fixed-case>3<fixed-case>G</fixed-case>2 - Complex Word Identification Task across Three Text Genres and Two User Groups Seid MuhieYimam - SanjaŠtajner + SanjaŠtajner MartinRiedl - ChrisBiemann + ChrisBiemann 401–407 I17-2068 Complex word identification (CWI) is an important task in text accessibility. However, due to the scarcity of CWI datasets, previous studies have only addressed this problem on Wikipedia sentences and have solely taken into account the needs of non-native English speakers. We collect a new CWI dataset (CWIG3G2) covering three text genres News, WikiNews, and Wikipedia) annotated by both native and non-native English speakers. Unlike previous datasets, we cover single words, as well as complex phrases, and present them for judgment in a paragraph context. We present the first study on cross-genre and cross-group CWI, showing measurable influences in native language and genre types. @@ -1868,7 +1868,7 @@ LishengFu Thien HuuNguyen BonanMin - RalphGrishman + RalphGrishman 425–429 I17-2072 Relations are expressed in many domains such as newswire, weblogs and phone conversations. Trained on a source domain, a relation extractor’s performance degrades when applied to target domains other than the source. A common yet labor-intensive method for domain adaptation is to construct a target-domain-specific labeled dataset for adapting the extractor. In response, we present an unsupervised domain adaptation method which only requires labels from the source domain. Our method is a joint model consisting of a CNN-based relation classifier and a domain-adversarial classifier. The two components are optimized jointly to learn a domain-independent representation for prediction on the target domain. Our model outperforms the state-of-the-art on all three test domains of ACE 2005. @@ -1878,7 +1878,7 @@ Lexical Simplification with the Deep Structured Similarity Model LisPereira XiaodongLiu - JohnLee + JohnLee 430–435 I17-2073 We explore the application of a Deep Structured Similarity Model (DSSM) to ranking in lexical simplification. Our results show that the DSSM can effectively capture fine-grained features to perform semantic matching when ranking substitution candidates, outperforming the state-of-the-art on two standard datasets used for the task. @@ -1888,7 +1888,7 @@ Proofread Sentence Generation as Multi-Task Learning with Editing Operation Prediction YutaHitomi HideakiTamori - NaoakiOkazaki + NaoakiOkazaki KentaroInui 436–441 I17-2074 @@ -1897,8 +1897,8 @@ An Exploration of Data Augmentation and <fixed-case>RNN</fixed-case> Architectures for Question Ranking in Community Question Answering - CharlesChen - RazvanBunescu + CharlesChen + RazvanBunescu 442–447 I17-2075 The automation of tasks in community question answering (cQA) is dominated by machine learning approaches, whose performance is often limited by the number of training examples. Starting from a neural sequence learning approach with attention, we explore the impact of two data augmentation techniques on question ranking performance: a method that swaps reference questions with their paraphrases, and training on examples automatically selected from external datasets. Both methods are shown to lead to substantial gains in accuracy over a strong baseline. Further improvements are obtained by changing the model architecture to mirror the structure seen in the data. @@ -1933,7 +1933,7 @@ <fixed-case>MASSA</fixed-case>lign: Alignment and Annotation of Comparable Documents - GustavoPaetzold + GustavoPaetzold FernandoAlva-Manchego LuciaSpecia 1–4 @@ -1949,7 +1949,7 @@ DeanaBurchfield AdamPoliak CashCostello - TimFinin + TimFinin ScottMiller JamesMayfield PhilippKoehn @@ -1964,7 +1964,7 @@ MarkDredze BenjaminGlass ShudongHao - PatrickMartin + PatrickMartin PushpendreRastogi RashmiSankepally TravisWolfe @@ -1980,7 +1980,7 @@ YunseokNoh Su JeongChoi Seong-BaePark - Se-YoungPark + Se-YoungPark 9–12 I17-3003 We demonstrate a report generation system called WiseReporter. The WiseReporter generates a text report of a specific topic which is usually given as a keyword by verbalizing knowledge base facts involving the topic. This demonstration does not demonstate only the report itself, but also the processes how the sentences for the report are generated. We are planning to enhance WiseReporter in the future by adding data analysis based on deep learning architecture and text summarization. @@ -1990,9 +1990,9 @@ <fixed-case>E</fixed-case>ncyclolink: A Cross-Encyclopedia,Cross-language Article-Linking System and Web-based Search Interface Yu-ChunWang Ka MingWong - Chun-KaiWu + Chun-KaiWu Chao-LinPan - Richard Tzong-HanTsai + Richard Tzong-HanTsai 13–16 I17-3004 Cross-language article linking (CLAL) is the task of finding corresponding article pairs across encyclopedias of different languages. In this paper, we present Encyclolink, a web-based CLAL search interface designed to help users find equivalent encyclopedia articles in Baidu Baike for a given English Wikipedia article title query. Encyclolink is powered by our cross-encyclopedia entity embedding CLAL system (0.8 MRR). The browser-based Interface provides users with a clear and easily readable preview of the contents of retrieved articles for comparison. @@ -2006,7 +2006,7 @@ Chao-ChuangShih Chun-HsunChen Po-ChingLee - Richard Tzong-HanTsai + Richard Tzong-HanTsai 17–20 I17-3005 In the paper, we propose an information retrieval based (IR-based) Question Answering (QA) system to assist online customer service staffs respond users in the telecom domain. When user asks a question, the system retrieves a set of relevant answers and ranks them. Moreover, our system uses a novel reranker to enhance the ranking result of information retrieval. It employs the word2vec model to represent the sentences as vectors. It also uses a sub-category feature, predicted by the k-nearest neighbor algorithm. Finally, the system returns the top five candidate answers, making online staffs find answers much more efficiently. @@ -2030,7 +2030,7 @@ <fixed-case>MUSST</fixed-case>: A Multilingual Syntactic Simplification Tool - CarolinaScarton + CarolinaScarton AlessioPalmero Aprosio SaraTonelli TamaraMartín Wanton @@ -2070,7 +2070,7 @@ Thai-HoangPham Xuan-KhoaiPham Tuan-AnhNguyen - PhuongLe-Hong + PhuongLe-Hong 37–40 I17-3010 This paper demonstrates neural network-based toolkit namely NNVLP for essential Vietnamese language processing tasks including part-of-speech (POS) tagging, chunking, Named Entity Recognition (NER). Our toolkit is a combination of bidirectional Long Short-Term Memory (Bi-LSTM), Convolutional Neural Network (CNN), Conditional Random Field (CRF), using pre-trained word embeddings as input, which outperforms previously published toolkits on these three tasks. We provide both of API and web demo for this toolkit. @@ -2080,7 +2080,7 @@ <fixed-case>C</fixed-case>lassifier<fixed-case>G</fixed-case>uesser: A Context-based Classifier Prediction System for <fixed-case>C</fixed-case>hinese Language Learners NicolePeinelt MariaLiakata - Shu-KaiHsieh + Shu-KaiHsieh 41–44 I17-3011 Classifiers are function words that are used to express quantities in Chinese and are especially difficult for language learners. In contrast to previous studies, we argue that the choice of classifiers is highly contextual and train context-aware machine learning models based on a novel publicly available dataset, outperforming previous baselines. We further present use cases for our database and models in an interactive demo system. @@ -2088,7 +2088,7 @@ Automatic Difficulty Assessment for <fixed-case>C</fixed-case>hinese Texts - JohnLee + JohnLee MeichunLiu Chun YinLam Tak OnLau @@ -2103,7 +2103,7 @@ Verb Replacer: An <fixed-case>E</fixed-case>nglish Verb Error Correction System Yu-HsuanWu Jhih-JieChen - JasonChang + JasonChang 49–52 I17-3013 According to the analysis of Cambridge Learner Corpus, using a wrong verb is the most common type of grammatical errors. This paper describes Verb Replacer, a system for detecting and correcting potential verb errors in a given sentence. In our approach, alternative verbs are considered to replace the verb based on an error-annotated corpus and verb-object collocations. The method involves applying regression on channel models, parsing the sentence, identifying the verbs, retrieving a small set of alternative verbs, and evaluating each alternative. Our method combines and improves channel and language models, resulting in high recall of detecting and correcting verb misuse. @@ -2113,8 +2113,8 @@ Learning Synchronous Grammar Patterns for Assisted Writing for Second Language Learners Chi-EnWu Jhih-JieChen - JimChang - JasonChang + JimChang + JasonChang 53–56 I17-3014 In this paper, we present a method for extracting Synchronous Grammar Patterns (SGPs) from a given parallel corpus in order to assisted second language learners in writing. A grammar pattern consists of a head word (verb, noun, or adjective) and its syntactic environment. A synchronous grammar pattern describes a grammar pattern in the target language (e.g., English) and its counterpart in an other language (e.g., Mandarin), serving the purpose of native language support. Our method involves identifying the grammar patterns in the target language, aligning these patterns with the target language patterns, and finally filtering valid SGPs. The extracted SGPs with examples are then used to develop a prototype writing assistant system, called WriteAhead/bilingual. Evaluation on a set of randomly selected SGPs shows that our system provides satisfactory writing suggestions for English as a Second Language (ESL) learners. @@ -2125,7 +2125,7 @@ Yu-ShengLi Chien-HuiTseng Chian-YunHuang - Wei-YunMa + Wei-YunMa 57–60 I17-3015 In this paper, we propose an idea of ondemand knowledge validation and fulfill the idea through an interactive Question-Answering (QA) game system, which is named Guess What. An object (e.g. dog) is first randomly chosen by the system, and then a user can repeatedly ask the system questions in natural language to guess what the object is. The system would respond with yes/no along with a confidence score. Some useful hints can also be given if needed. The proposed framework provides a pioneering example of on-demand knowledge validation in dialog environment to address such needs in AI agents/chatbots. Moreover, the released log data that the system gathered can be used to identify the most critical concepts/attributes of an existing knowledge base, which reflects human’s cognition about the world. @@ -2136,7 +2136,7 @@ JiaruiXu XuezheMa Chen-TseTsai - EduardHovy + EduardHovy 61–64 I17-3016 This paper aims to provide an effective tool for conversion between Simplified Chinese and Traditional Chinese. We present STCP, a customizable system comprising statistical conversion model, and proofreading web interface. Experiments show that our system achieves comparable character-level conversion performance with the state-of-art systems. In addition, our proofreading interface can effectively support diagnostics and data annotation. STCP is available at http://lagos.lti.cs.cmu.edu:8002/ @@ -2147,8 +2147,8 @@ PurvanshiMehta PruthwikMishra VinayakAthavale - ManishShrivastava - DiptiSharma + ManishShrivastava + DiptiSharma 65–68 I17-3017 This paper presents DILTON a system which solves simple arithmetic word problems. DILTON uses a Deep Neural based model to solve math word problems. DILTON divides the question into two parts - worldstate and query. The worldstate and the query are processed separately in two different networks and finally, the networks are merged to predict the final operation. We report the first deep learning approach for the prediction of operation between two numbers. DILTON learns to predict operations with 88.81% accuracy in a corpus of primary school questions. @@ -2160,7 +2160,7 @@ Proceedings of the IJCNLP 2017, Shared Tasks I17-4 Chao-HongLiu - PreslavNakov + PreslavNakov NianwenXue Asian Federation of Natural Language Processing
Taipei, Taiwan
@@ -2185,10 +2185,10 @@
<fixed-case>IJCNLP</fixed-case>-2017 Task 2: Dimensional Sentiment Analysis for <fixed-case>C</fixed-case>hinese Phrases - Liang-ChihYu + Liang-ChihYu Lung-HaoLee JinWang - Kam-FaiWong + Kam-FaiWong 9–16 I17-4002 This paper presents the IJCNLP 2017 shared task on Dimensional Sentiment Analysis for Chinese Phrases (DSAP) which seeks to identify a real-value sentiment score of Chinese single words and multi-word phrases in the both valence and arousal dimensions. Valence represents the degree of pleasant and unpleasant (or positive and negative) feelings, and arousal represents the degree of excitement and calm. Of the 19 teams registered for this shared task for two-dimensional sentiment analysis, 13 submitted results. We expected that this evaluation campaign could produce more advanced dimensional sentiment analysis techniques, especially for Chinese affective computing. All data sets with gold standards and scoring script are made publicly available to researchers. @@ -2196,7 +2196,7 @@ <fixed-case>IJCNLP</fixed-case>-2017 Task 3: Review Opinion Diversification (<fixed-case>R</fixed-case>ev<fixed-case>O</fixed-case>pi<fixed-case>D</fixed-case>-2017) - AnilKumar Singh + AnilKumar Singh AvijitThawani MayankPanchal AnubhavGupta @@ -2324,7 +2324,7 @@ <fixed-case>CKIP</fixed-case> at <fixed-case>IJCNLP</fixed-case>-2017 Task 2: Neural Valence-Arousal Prediction for Phrases Peng-HsuanLi - Wei-YunMa + Wei-YunMa Hsin-YangWang 89–94 I17-4014 @@ -2337,7 +2337,7 @@ Yung-ChunChang Chen-AnnWang Yu-LunHsieh - Wen-LianHsu + Wen-LianHsu 95–99 I17-4015 Sentiment lexicon is very helpful in dimensional sentiment applications. Because of countless Chinese words, developing a method to predict unseen Chinese words is required. The proposed method can handle both words and phrases by using an ADVWeight List for word prediction, which in turn improves our performance at phrase level. The evaluation results demonstrate that our system is effective in dimensional sentiment analysis for Chinese phrases. The Mean Absolute Error (MAE) and Pearson’s Correlation Coefficient (PCC) for Valence are 0.723 and 0.835, respectively, and those for Arousal are 0.914 and 0.756, respectively. @@ -2346,7 +2346,7 @@ <fixed-case>A</fixed-case>libaba at <fixed-case>IJCNLP</fixed-case>-2017 Task 2: A Boosted Deep System for Dimensional Sentiment Analysis of <fixed-case>C</fixed-case>hinese Phrases XinZhou - JianWang + JianWang XuXie ChanglongSun LuoSi @@ -2392,7 +2392,7 @@ <fixed-case>NCTU</fixed-case>-<fixed-case>NTUT</fixed-case> at <fixed-case>IJCNLP</fixed-case>-2017 Task 2: Deep Phrase Embedding using bi-<fixed-case>LSTM</fixed-case>s for Valence-Arousal Ratings Prediction of <fixed-case>C</fixed-case>hinese Phrases Yen-HsuanLee Han-YunYeh - Yih-RuWang + Yih-RuWang Yuan-FuLiao 124–129 I17-4020 @@ -2430,7 +2430,7 @@ All-In-1 at <fixed-case>IJCNLP</fixed-case>-2017 Task 4: Short Text Classification with One Model for All Languages - BarbaraPlank + BarbaraPlank 143–148 I17-4024 We present All-In-1, a simple model for multilingual text classification that does not require any parallel data. It is based on a traditional Support Vector Machine classifier exploiting multilingual word embeddings and character n-grams. Our model is simple, easily extendable yet very effective, overall ranking 1st (out of 12 teams) in the IJCNLP 2017 shared task on customer feedback analysis in four languages: English, French, Japanese and Spanish. @@ -2440,7 +2440,7 @@ <fixed-case>S</fixed-case>enti<fixed-case>NLP</fixed-case> at <fixed-case>IJCNLP</fixed-case>-2017 Task 4: Customer Feedback Analysis Using a <fixed-case>B</fixed-case>i-<fixed-case>LSTM</fixed-case>-<fixed-case>CNN</fixed-case> Model ShuyingLin HuoshengXie - Liang-ChihYu + Liang-ChihYu K. RobertLai 149–154 I17-4025 @@ -2461,7 +2461,7 @@ <fixed-case>ADAPT</fixed-case> at <fixed-case>IJCNLP</fixed-case>-2017 Task 4: A Multinomial Naive <fixed-case>B</fixed-case>ayes Classification Approach for Customer Feedback Analysis task PintuLohar - KoelDutta Chowdhury + KoelDutta Chowdhury HaithemAfli MohammedHasanuzzaman AndyWay @@ -2494,7 +2494,7 @@ ParthaPakray RiyankaManna DipankarDas - AlexanderGelbukh + AlexanderGelbukh 180–183 I17-4030 In this paper, we describe a deep learning framework for analyzing the customer feedback as part of our participation in the shared task on Customer Feedback Analysis at the 8th International Joint Conference on Natural Language Processing (IJCNLP 2017). A Convolutional Neural Network (CNN) based deep neural network model was employed for the customer feedback task. The proposed system was evaluated on two languages, namely, English and French. @@ -2502,11 +2502,11 @@ <fixed-case>IITP</fixed-case> at <fixed-case>IJCNLP</fixed-case>-2017 Task 4: Auto Analysis of Customer Feedback using <fixed-case>CNN</fixed-case> and <fixed-case>GRU</fixed-case> Network - DeepakGupta + DeepakGupta PabitraLenka HarsimranBedi AsifEkbal - PushpakBhattacharyya + PushpakBhattacharyya 184–193 I17-4031 Analyzing customer feedback is the best way to channelize the data into new marketing strategies that benefit entrepreneurs as well as customers. Therefore an automated system which can analyze the customer behavior is in great demand. Users may write feedbacks in any language, and hence mining appropriate information often becomes intractable. Especially in a traditional feature-based supervised model, it is difficult to build a generic system as one has to understand the concerned language for finding the relevant features. In order to overcome this, we propose deep Convolutional Neural Network (CNN) and Recurrent Neural Network (RNN) based approaches that do not require handcrafting of features. We evaluate these techniques for analyzing customer feedback sentences on four languages, namely English, French, Japanese and Spanish. Our empirical analysis shows that our models perform well in all the four languages on the setups of IJCNLP Shared Task on Customer Feedback Analysis. Our model achieved the second rank in French, with an accuracy of 71.75% and third ranks for all the other languages. @@ -2607,7 +2607,7 @@ Neural Machine Translation: Basics, Practical Aspects and Recent Trends - FabienCromieres + FabienCromieres ToshiakiNakazawa RajDabre 11–13 @@ -2617,7 +2617,7 @@ The Ultimate Presentation Makeup Tutorial: How to <fixed-case>P</fixed-case>olish your Posters, Slides and Presentations Skills - GustavoPaetzold + GustavoPaetzold LuciaSpecia 14–15 I17-5005 diff --git a/data/xml/J00.xml b/data/xml/J00.xml index c75dac1929..f35f7d53b7 100644 --- a/data/xml/J00.xml +++ b/data/xml/J00.xml @@ -3,7 +3,7 @@ Computational Linguistics, Volume 26, Number 1, March 2000 - JuliaHirschberg + JuliaHirschberg MIT Press
Cambridge, MA
2000 @@ -42,8 +42,8 @@
Learning dependency translation models as collections of finite state head transducers - HiyanAlsawi - SrinivasBangalore + HiyanAlsawi + SrinivasBangalore ShonaDouglas 45-60 J00-1004 @@ -51,14 +51,14 @@ Treatment of epsilon moves in subset construction - Gertjanvan Noord + Gertjanvan Noord 61-76 J00-1005 van-noord-2000-treatment Multitiered nonlinear morphology using multitape finite automata: a case study on <fixed-case>S</fixed-case>yriac and <fixed-case>A</fixed-case>rabic - George AntonKiraz + George AntonKiraz 77-105 J00-1006 kiraz-2000-multitiered @@ -92,16 +92,16 @@ A model for multimodal reference resolution - LuisPineda - GabrielaGarza + LuisPineda + GabrielaGarza 139-194 J00-2002 pineda-garza-2000-model A multistrategy approach to improving pronunciation by analogy - YannickMarchand - Robert I.Damper + YannickMarchand + Robert I.Damper 195-220 J00-2003 marchand-damper-2000-multistrategy @@ -170,13 +170,13 @@ Book Reviews: <fixed-case>O</fixed-case>ptimality <fixed-case>T</fixed-case>heory - JasonEisner + JasonEisner J00-2014 eisner-2000-book Book Reviews: Systemic Functional Grammar in Natural Language Generation: Linguistic Description and Computational Representation - GrahamWilcock + GrahamWilcock J00-2015 wilcock-2000-book @@ -214,7 +214,7 @@ Extracting the lowest-frequency words: pitfalls and possibilities MarcWeeber ReinVos - R. HaraldBaayen + R. HaraldBaayen 301-318 J00-3001 weeber-etal-2000-extracting @@ -228,16 +228,16 @@
Dialogue act modeling for automatic tagging and recognition of conversational speech - AndreasStolcke + AndreasStolcke KlausRies - NoahCoccaro - ElizabethShriberg + NoahCoccaro + ElizabethShriberg RebeccaBates - DanielJurafsky + DanielJurafsky PaulTaylor RachelMartin - CarolVan Ess-Dykema - MarieMeteer + CarolVan Ess-Dykema + MarieMeteer 339-374 J00-3003 stolcke-etal-2000-dialogue @@ -261,19 +261,19 @@ Book Reviews: Foundations of Computational Linguistics: Man-Machine Communication in Natural Language - Alexander F.Gelbukh + Alexander F.Gelbukh J00-3006 gelbukh-2000-book Book Reviews: Syntactic Wordclass Tagging - AdwaitRatnaparkhi + AdwaitRatnaparkhi J00-3007 ratnaparkhi-2000-book Book Reviews: Natural Language Information Retrieval - SimonCorston-Oliver + SimonCorston-Oliver J00-3008 corston-oliver-2000-book @@ -315,16 +315,16 @@ Automatic Text Categorization In Terms Of Genre and Author - EfstathiosStamatatos - NikosFakotakis - GeorgeKokkinakis + EfstathiosStamatatos + NikosFakotakis + GeorgeKokkinakis 471-495 J00-4001 stamatatos-etal-2000-automatic Bidirectional Contextual Resolution - Stephen G.Pulman + Stephen G.Pulman 497-537 J00-4002 10.1162/089120100750105939 @@ -332,8 +332,8 @@ An Empirically-based System for Processing Definite Descriptions - RenataVieira - MassimoPoesio + RenataVieira + MassimoPoesio 539-593 J00-4003 10.1162/089120100750105948 @@ -342,15 +342,15 @@ Learning Methods to Combine Linguistic Indicators:Improving Aspectual Classification and Revealing Linguistic Insights Eric V.Siegel - Kathleen R.McKeown + Kathleen R.McKeown 595-627 J00-4004 siegel-mckeown-2000-learning On Coreferring: Coreference in <fixed-case>MUC</fixed-case> and Related Annotation Schemes - Keesvan Deemter - RodgerKibble + Keesvan Deemter + RodgerKibble 629-637 J00-4005 van-deemter-kibble-2000-coreferring @@ -369,13 +369,13 @@ Book Reviews: Architectures and Mechanisms for Language Processing - AmyWeinberg + AmyWeinberg J00-4008 weinberg-2000-book Book Reviews: Breadth and Depth of Semantic Lexicons - John S.White + John S.White J00-4009 white-2000-book diff --git a/data/xml/J01.xml b/data/xml/J01.xml index b7d16a6f4f..8551623493 100644 --- a/data/xml/J01.xml +++ b/data/xml/J01.xml @@ -3,7 +3,7 @@ Computational Linguistics, Volume 27, Number 1, March 2001 - JuliaHirschberg + JuliaHirschberg MIT Press
Cambridge, MA
2001 @@ -18,7 +18,7 @@ Using Suffix Arrays to Compute Term Frequency and Document Frequency for All Substrings in a Corpus MikioYamamoto - Kenneth W.Church + Kenneth W.Church 1-30 J01-1001 10.1162/089120101300346787 @@ -38,7 +38,7 @@ Bootstrapping Morphological Analyzers by Combining Human Elicitation and Machine Learning KemalOflazer - SergeiNirenberg + SergeiNirenberg MarjorieMcShane 59-85 J01-1003 @@ -47,9 +47,9 @@ <fixed-case>D</fixed-case>-Tree Substitution Grammars - OwenRambow - K.Vijay-Shanker - DavidWeir + OwenRambow + K.Vijay-Shanker + DavidWeir 87-121 J01-1004 10.1162/089120101300346813 @@ -58,7 +58,7 @@ Unsupervised Named Entity Recognition Using Syntactic and Semantic Contextual Evidence AlessandroCucchiarelli - PaolaVelardi + PaolaVelardi 123-131 J01-1005 cucchiarelli-velardi-2001-unsupervised @@ -100,7 +100,7 @@ Computational Linguistics, Volume 27, Number 2, June 2001 - JuliaHirschberg + JuliaHirschberg MIT Press
Cambridge, MA
2001 @@ -122,18 +122,18 @@
Improving Accuracy in word class tagging through the Combination of Machine Learning Systems - HansVan Halteren + HansVan Halteren JakubZavrel - WalterDaelemans + WalterDaelemans 199-229 J01-2002 van-halteren-etal-2001-improving The Need for Accurate Alignment in Natural Language System Evaluation - AndrewKehler - JohnBear - DouglasAppelt + AndrewKehler + JohnBear + DouglasAppelt 231-248 J01-2003 10.1162/089120101750300517 @@ -150,7 +150,7 @@ Nonminimal Derivations in Unification-Based Parsing NorikoTomuro - Steven L.Lytinen + Steven L.Lytinen 277-285 J01-2005 10.1162/089120101750300535 @@ -158,13 +158,13 @@ Book Reviews: Knowledge Representation: Logical, Philosophical, and Computational Foundations - Stuart C.Shapiro + Stuart C.Shapiro J01-2006 shapiro-2001-book Book Reviews: Natural Language Processing and Knowledge Representation: Language for Knowledge and Knowledge for Language - Robert E.Mercer + Robert E.Mercer J01-2007 mercer-2001-book @@ -176,7 +176,7 @@ Book Reviews: Advances in Information Retrieval: Recent Research from the Center for Intelligent Information Retrieval - SandaHarabagiu + SandaHarabagiu J01-2009 harabagiu-2001-book @@ -194,7 +194,7 @@
Book Reviews: Learnability in <fixed-case>O</fixed-case>ptimality <fixed-case>T</fixed-case>heory - WalterDaelemans + WalterDaelemans J01-2012 daelemans-2001-book @@ -213,7 +213,7 @@ Computational Linguistics, Volume 27, Number 3, September 2001 - JuliaHirschberg + JuliaHirschberg MIT Press
Cambridge, MA
2001 @@ -228,7 +228,7 @@ The Interaction of Knowledge Sources in Word Sense Disambiguation MarkStevenson - YorickWilks + YorickWilks 321-349 J01-3001 10.1162/089120101317066104 @@ -253,9 +253,9 @@ Towards Constructive Text, Diagram, and Layout Generation for Information Presentation - JohnBateman + JohnBateman ThomasKamps - JörgKleinz + JörgKleinz KlausReichenberger 409-449 J01-3004 @@ -265,7 +265,7 @@ Book Reviews: Prosody: Theory and Experiment. Studies presented to Gosta Bruce ChilinShih - RichardSproat + RichardSproat J01-3005 shih-sproat-2001-book @@ -283,7 +283,7 @@ Book Reviews: Presumptive Meanings: The theory of generalized conversational implicature - NancyGreen + NancyGreen J01-3008 green-2001-book @@ -307,7 +307,7 @@ Computational Linguistics, Volume 27, Number 4, December 2001 - JuliaHirschberg + JuliaHirschberg MIT Press
Cambridge, MA
2001 @@ -321,8 +321,8 @@ Introduction to the Special Issue on Computational Anaphora Resolution - RuslanMitkov - BranimirBoguraev + RuslanMitkov + BranimirBoguraev ShalomLappin 473-477 J01-4001 @@ -339,7 +339,7 @@ A Corpus-Based Evaluation of Centering and Pronoun Resolution - Joel R.Tetreault + Joel R.Tetreault 507-520 J01-4003 tetreault-2001-corpus @@ -348,7 +348,7 @@ A Machine Learning Approach to Coreference Resolution of Noun Phrases Wee MengSoon Hwee TouNg - Daniel Chung YongLim + Daniel Chung YongLim 521-544 J01-4004 10.1162/089120101753342653 @@ -356,13 +356,13 @@ An Algorithm for Anaphora Resolution in <fixed-case>S</fixed-case>panish Texts - ManuelPalomar - AntonioFerrández - LidiaMoreno - PatricioMartínez-Barco - JesúsPeral - MaximilianoSaiz-Noeda - RafaelMuñoz + ManuelPalomar + AntonioFerrández + LidiaMoreno + PatricioMartínez-Barco + JesúsPeral + MaximilianoSaiz-Noeda + RafaelMuñoz 545-567 J01-4005 10.1162/089120101753342662 @@ -370,14 +370,14 @@ The Uncommon Denominator: A Proposal for Consistent Reporting of Pronoun Resolution Results - Donna K.Byron + Donna K.Byron 569-578 J01-4006 byron-2001-uncommon A Reformulation of Rule 2 of <fixed-case>C</fixed-case>entering <fixed-case>T</fixed-case>heory - RodgerKibble + RodgerKibble 579-587 J01-4007 kibble-2001-reformulation @@ -396,13 +396,13 @@ Book Reviews: Robustness in Language and Speech Technology - JohnCarroll + JohnCarroll J01-4010 carroll-2001-book Book Reviews: Formal Aspects of Context - Richmond H.Thomason + Richmond H.Thomason J01-4011 thomason-2001-book diff --git a/data/xml/J02.xml b/data/xml/J02.xml index 3ab7251c7a..8efcc06a20 100644 --- a/data/xml/J02.xml +++ b/data/xml/J02.xml @@ -15,7 +15,7 @@ Binding Machines - AntónioBranco + AntónioBranco 1-18 10.1162/089120102317341747 J02-1001 @@ -24,7 +24,7 @@ A Critique and Improvement of an Evaluation Metric for Text Segmentation LevPevzner - Marti A.Hearst + Marti A.Hearst 19-36 10.1162/089120102317341756 J02-1002 @@ -32,7 +32,7 @@ Generating Referring Expressions: <fixed-case>B</fixed-case>oolean Extensions of the Incremental Algorithm - Keesvan Deemter + Keesvan Deemter 37-52 10.1162/089120102317341765 J02-1003 @@ -40,8 +40,8 @@ Syllable-Pattern-Based Unknown-Morpheme Segmentation and Estimation for Hybrid Part-of-Speech Tagging of <fixed-case>K</fixed-case>orean - Gary GeunbaeLee - JeongwonCha + Gary GeunbaeLee + JeongwonCha Jong-HyeokLee 53-70 10.1162/089120102317341774 @@ -82,7 +82,7 @@ Near-Synonymy and Lexical Choice - PhilipEdmonds + PhilipEdmonds GraemeHirst 105-144 10.1162/089120102760173625 @@ -91,7 +91,7 @@ The Combinatory Morphemic Lexicon - CemBozsahin + CemBozsahin 145-186 10.1162/089120102760173634 J02-2002 @@ -100,7 +100,7 @@ Class-Based Probability Estimation Using a Semantic Hierarchy StephenClark - DavidWeir + DavidWeir 187-206 10.1162/089120102760173643 J02-2003 @@ -109,7 +109,7 @@ Incremental Construction and Maintenance of Minimal Finite-State Automata Rafael C.Carrasco - Mikel L.Forcada + Mikel L.Forcada 207-216 10.1162/089120102760173652 J02-2004 @@ -142,7 +142,7 @@ Automatic Labeling of Semantic Roles DanielGildea - DanielJurafsky + DanielJurafsky 245-288 10.1162/089120102760275983 J02-3001 @@ -197,9 +197,9 @@ Introduction to the Special Issue on Summarization - Dragomir R.Radev - EduardHovy - KathleenMcKeown + Dragomir R.Radev + EduardHovy + KathleenMcKeown 399-408 10.1162/089120102762671927 J02-4001 @@ -225,7 +225,7 @@ Efficiently Computed Lexical Chains as an Intermediate Representation for Automatic Text Summarization H. GregorySilber - Kathleen F.McCoy + Kathleen F.McCoy 487-496 10.1162/089120102762671954 J02-4004 @@ -242,7 +242,7 @@ Using Hidden <fixed-case>M</fixed-case>arkov Modeling to Decompose Human-Written Summaries - HongyanJing + HongyanJing 527-543 10.1162/089120102762671972 J02-4006 @@ -251,7 +251,7 @@ Squibs and Discussions: Human Variation and Lexical Choice EhudReiter - SomayajuluSripada + SomayajuluSripada 545-553 10.1162/089120102762671981 J02-4007 diff --git a/data/xml/J03.xml b/data/xml/J03.xml index a1890b33c6..d13a9ead1f 100644 --- a/data/xml/J03.xml +++ b/data/xml/J03.xml @@ -15,7 +15,7 @@ Optimization Models of Sound Systems Using Genetic Algorithms JinyunKe MiekoOgura - William S.-Y.Wang + William S.-Y.Wang 1-18 10.1162/089120103321337412 J03-1001 @@ -23,8 +23,8 @@ A Systematic Comparison of Various Statistical Alignment Models - Franz JosefOch - HermannNey + Franz JosefOch + HermannNey 19-51 10.1162/089120103321337421 J03-1002 @@ -32,7 +32,7 @@ Graph-Based Generation of Referring Expressions - EmielKrahmer + EmielKrahmer Sebastiaanvan Erk AndréVerleg 53-72 @@ -51,8 +51,8 @@ Word Reordering and a Dynamic Programming Beam Search Algorithm for Statistical Machine Translation - ChristophTillmann - HermannNey + ChristophTillmann + HermannNey 97-133 10.1162/089120103321337458 J03-1005 @@ -114,7 +114,7 @@ Document Structure RichardPower - DoniaScott + DoniaScott NadjetBouayad-Agha 211-260 10.1162/089120103322145315 @@ -133,7 +133,7 @@ Book Reviews: Lexicography and Natural Language Processing: A Festschrift in Honour of <fixed-case>B</fixed-case>. <fixed-case>T</fixed-case>. <fixed-case>S</fixed-case>. Atkins edited by Marie-Hélène Corréard WoodyHaynes - MarthaEvens + MarthaEvens J03-2005 haynes-evens-2003-book @@ -163,7 +163,7 @@ The Web as a Parallel Corpus PhilipResnik - Noah A.Smith + Noah A.Smith 349-380 10.1162/089120103322711578 J03-3002 @@ -172,7 +172,7 @@ Embedding Web-Based Statistical Translation Models in Cross-Language Information Retrieval WesselKraaij - Jian-YunNie + Jian-YunNie MichelSimard 381-419 10.1162/089120103322711587 @@ -246,9 +246,9 @@ Anaphora and Discourse Structure - BonnieWebber + BonnieWebber MatthewStone - AravindJoshi + AravindJoshi AlistairKnott 545-587 10.1162/089120103322753347 @@ -257,7 +257,7 @@ Head-Driven Statistical Models for Natural Language Parsing - MichaelCollins + MichaelCollins 589-637 10.1162/089120103322753356 J03-4003 @@ -265,8 +265,8 @@ Disambiguating Nouns, Verbs, and Adjectives Using Automatically Acquired Selectional Preferences - DianaMcCarthy - JohnCarroll + DianaMcCarthy + JohnCarroll 639-654 10.1162/089120103322753365 J03-4004 @@ -274,7 +274,7 @@ Book Reviews: Learning to Classify Text Using Support Vector Machines: Methods, Theory and Algorithms by Thorsten Joachims; Anaphora Resolution by Ruslan Mitkov - RobertoBasili + RobertoBasili MichaelStrube J03-4005 basili-strube-2003-book diff --git a/data/xml/J04.xml b/data/xml/J04.xml index db3106d477..52fac9a51c 100644 --- a/data/xml/J04.xml +++ b/data/xml/J04.xml @@ -51,7 +51,7 @@ Squibs and Discussions: The Kappa Statistic: A Second Look BarbaraDi Eugenio - MichaelGlass + MichaelGlass 95-101 10.1162/089120104773633402 J04-1005 @@ -95,7 +95,7 @@ Learning Domain Ontologies from Document Warehouses and Dedicated Web Sites RobertoNavigli - PaolaVelardi + PaolaVelardi 151-179 10.1162/089120104323093276 J04-2002 @@ -103,8 +103,8 @@ Statistical Machine Translation with Scarce Resources Using Morpho-syntactic Information - SonjaNießen - HermannNey + SonjaNießen + HermannNey 181-204 10.1162/089120104323093285 J04-2003 @@ -112,8 +112,8 @@ Machine Translation with Inferred Stochastic Finite-State Transducers - FranciscoCasacuberta - EnriqueVidal + FranciscoCasacuberta + EnriqueVidal 205-225 10.1162/089120104323093294 J04-2004 @@ -156,11 +156,11 @@ Learning Subjective Language - JanyceWiebe + JanyceWiebe TheresaWilson - RebeccaBruce + RebeccaBruce MatthewBell - MelanieMartin + MelanieMartin 277-308 10.1162/0891201041850885 J04-3002 @@ -168,10 +168,10 @@ <fixed-case>C</fixed-case>entering: A Parametric Theory and Its Instantiations - MassimoPoesio - RosemaryStevenson + MassimoPoesio + RosemaryStevenson BarbaraDi Eugenio - JanetHitzeman + JanetHitzeman 309-363 10.1162/0891201041850911 J04-3003 @@ -179,7 +179,7 @@ Understanding the Yarowsky Algorithm - StevenAbney + StevenAbney 365-395 10.1162/0891201041850876 J04-3004 @@ -204,7 +204,7 @@ Optimizing Referential Coherence in Text Generation - RodgerKibble + RodgerKibble RichardPower 401-416 10.1162/0891201042544893 @@ -213,8 +213,8 @@ The Alignment Template Approach to Statistical Machine Translation - Franz JosefOch - HermannNey + Franz JosefOch + HermannNey 417-449 10.1162/0891201042544884 J04-4002 @@ -231,7 +231,7 @@ Intricacies of Collins’ Parsing Model - Daniel M.Bikel + Daniel M.Bikel 479-511 10.1162/0891201042544929 J04-4004 diff --git a/data/xml/J05.xml b/data/xml/J05.xml index ae2fb01abb..55ea75c531 100644 --- a/data/xml/J05.xml +++ b/data/xml/J05.xml @@ -13,7 +13,7 @@ <fixed-case>ACL</fixed-case> Lifetime Achievement Award: Some Points in a Time - KarenSpärck Jones + KarenSpärck Jones 1-14 10.1162/0891201053630237 J05-1001 @@ -21,9 +21,9 @@ Squibs and Discussions: Real versus Template-Based Natural Language Generation: A False Opposition? - Keesvan Deemter - EmielKrahmer - MariëtTheune + Keesvan Deemter + EmielKrahmer + MariëtTheune 15-24 10.1162/0891201053630291 J05-1002 @@ -31,7 +31,7 @@ Discriminative Reranking for Natural Language Parsing - MichaelCollins + MichaelCollins TerryKoo 25-70 10.1162/0891201053630273 @@ -40,7 +40,7 @@ The <fixed-case>P</fixed-case>roposition <fixed-case>B</fixed-case>ank: An Annotated Corpus of Semantic Roles - MarthaPalmer + MarthaPalmer DanielGildea PaulKingsbury 71-106 @@ -52,7 +52,7 @@ Clustering Syntactic Positions with Similar Semantic Requirements PabloGamallo AlexandreAgustini - Gabriel P.Lopes + Gabriel P.Lopes 107-146 10.1162/0891201053630318 J05-1005 @@ -94,7 +94,7 @@ <fixed-case>O</fixed-case>bituary: Remembering Bill <fixed-case>M</fixed-case>ann - Christian M. I. M.Matthiessen + Christian M. I. M.Matthiessen 161-171 10.1162/0891201054224002 J05-2001 @@ -148,7 +148,7 @@ Squibs and Discussions: Evaluating Discourse and Dialogue Coding Schemes RichardCraggs - Mary McGeeWood + Mary McGeeWood 289-296 10.1162/089120105774321109 J05-3001 @@ -157,7 +157,7 @@ Sentence Fusion for Multidocument News Summarization ReginaBarzilay - Kathleen R.McKeown + Kathleen R.McKeown 297-328 10.1162/089120105774321091 J05-3002 @@ -168,7 +168,7 @@ RuthO’Donovan MichaelBurke AoifeCahill - Josefvan Genabith + Josefvan Genabith AndyWay 329-366 10.1162/089120105774321073 @@ -198,7 +198,7 @@ Book Review: New Directions in Question Answering, edited by Mark <fixed-case>T</fixed-case>. Maybury - MariusPaşca + MariusPaşca J05-3007 pasca-2005-book @@ -235,7 +235,7 @@ Co-occurrence Retrieval: A Flexible Framework for Lexical Distributional Similarity JulieWeeds - DavidWeir + DavidWeir 439-475 10.1162/089120105775299122 J05-4002 @@ -243,7 +243,7 @@ Improving Machine Translation Performance by Exploiting Non-Parallel Corpora - Dragos StefanMunteanu + Dragos StefanMunteanu DanielMarcu 477-504 10.1162/089120105775299168 @@ -252,7 +252,7 @@ Induction of Word and Phrase Alignments for Automatic Document Summarization - HalDaumé III + HalDaumé III DanielMarcu 505-530 10.1162/089120105775299140 @@ -264,7 +264,7 @@ JianfengGao MuLi AndiWu - Chang-NingHuang + Chang-NingHuang 531-574 10.1162/089120105775299177 J05-4005 @@ -272,7 +272,7 @@ Last Words: Reviewing the Reviewers - KennethChurch + KennethChurch 575-578 10.1162/089120105775299131 J05-4006 diff --git a/data/xml/J06.xml b/data/xml/J06.xml index 8b17a87d3c..1d4f061f2d 100644 --- a/data/xml/J06.xml +++ b/data/xml/J06.xml @@ -39,7 +39,7 @@ Finite-State Registered Automata for Non-Concatenative Morphology - YaelCohen-Sygal + YaelCohen-Sygal ShulyWintner 49-82 10.1162/coli.2006.32.1.49 @@ -48,9 +48,9 @@ Automatic Discovery of Part-Whole Relations - RoxanaGirju - AdrianaBadulescu - DanMoldovan + RoxanaGirju + AdrianaBadulescu + DanMoldovan 83-135 10.1162/coli.2006.32.1.83 J06-1005 @@ -58,7 +58,7 @@ Introducing Speech and Language Processing, by John Coleman - MaryHarper + MaryHarper J06-1006 harper-2006-introducing @@ -70,13 +70,13 @@ Book Reviews: Computational and Quantitative Studies by <fixed-case>M</fixed-case>. <fixed-case>A</fixed-case>. <fixed-case>K</fixed-case>. Halliday, edited by Jonathan <fixed-case>J</fixed-case>. Webster - ChrisMellish + ChrisMellish J06-1008 mellish-2006-book Book Reviews: Corpus Linguistics: Readings in a Widening Discipline, edited by Geoffrey Sampson and Diana <fixed-case>M</fixed-case>c<fixed-case>C</fixed-case>arthy - RobertMalouf + RobertMalouf J06-1009 malouf-2006-book @@ -100,7 +100,7 @@ Experiments on the Automatic Induction of <fixed-case>G</fixed-case>erman Semantic Verb Classes - SabineSchulte im Walde + SabineSchulte im Walde 159-194 10.1162/coli.2006.32.2.159 J06-2001 @@ -108,7 +108,7 @@ Generating Referring Expressions that Involve Gradable Properties - Keesvan Deemter + Keesvan Deemter 195-222 10.1162/coli.2006.32.2.195 J06-2002 @@ -116,7 +116,7 @@ Building and Using a Lexical Knowledge Base of Near-Synonym Differences - DianaInkpen + DianaInkpen GraemeHirst 223-262 10.1162/coli.2006.32.2.223 @@ -126,7 +126,7 @@ <fixed-case>S</fixed-case>quibs: The <fixed-case>PARADISE</fixed-case> Evaluation Framework: Issues and Findings MelitaHajdinjak - FranceMihelic + FranceMihelic 263-272 10.1162/coli.2006.32.2.263 J06-2004 @@ -195,7 +195,7 @@ Similarity of Semantic Relations - Peter D.Turney + Peter D.Turney 379-416 10.1162/coli.2006.32.3.379 J06-3003 @@ -203,9 +203,9 @@ Characterizing and Predicting Corrections in Spoken Dialogue Systems - DianeLitman + DianeLitman MarcSwerts - JuliaHirschberg + JuliaHirschberg 417-438 10.1162/coli.2006.32.3.417 J06-3004 @@ -231,7 +231,7 @@ Book Reviews: Argument Realization, by Beth Levin and Malka Rappaport Hovav - KarinKipper + KarinKipper J06-3008 kipper-2006-book @@ -264,7 +264,7 @@ <fixed-case>ACL</fixed-case> Lifetime Achievement Award: Old Linguists Never Die, They Only Get Obligatorily Deleted - EvaHajicova + EvaHajicova 457-469 10.1162/coli.2006.32.4.457 J06-4001 @@ -289,13 +289,13 @@ N-gram-based Machine Translation - JoséMariño - Rafael E.Banchs - Josep M.Crego - Adriàde Gispert + JoséMariño + Rafael E.Banchs + Josep M.Crego + Adriàde Gispert PatrikLambert - José A. R.Fonollosa - Marta R.Costa-jussà + José A. R.Fonollosa + Marta R.Costa-jussà 527-549 10.1162/coli.2006.32.4.527 J06-4004 @@ -309,13 +309,13 @@ Book Review: One-on-One Tutoring by Humans and Computers, by Martha Evens and Joel <fixed-case>M</fixed-case>ichael - PamelaJordan + PamelaJordan J06-4006 jordan-2006-book Book Review: Memory-Based Language Processing, by Walter Daelemans and Antal van den Bosch - SandraKübler + SandraKübler J06-4007 kubler-2006-book diff --git a/data/xml/J07.xml b/data/xml/J07.xml index fdba373df7..f975ddc641 100644 --- a/data/xml/J07.xml +++ b/data/xml/J07.xml @@ -13,8 +13,8 @@ Letter to the Editor - WalterDaelemans - Antalvan den Bosch + WalterDaelemans + Antalvan den Bosch 1 10.1162/coli.2007.33.1.1 J07-1001 @@ -32,7 +32,7 @@ Word-Level Confidence Estimation for Machine Translation NicolaUeffing - HermannNey + HermannNey 9-40 10.1162/coli.2007.33.1.9 J07-1003 @@ -40,8 +40,8 @@ Question Answering in Restricted Domains: An Overview - DiegoMollá - José LuisVicedo + DiegoMollá + José LuisVicedo 41-61 10.1162/coli.2007.33.1.41 J07-1004 @@ -59,7 +59,7 @@ Composing Questions through Conceptual Authoring CatalinaHallett - DoniaScott + DoniaScott RichardPower 105-133 10.1162/coli.2007.33.1.105 @@ -75,7 +75,7 @@ Book Reviews: Flexible Semantics for Reinterpretation Phenomena, by Markus Egg - StephenPulman + StephenPulman 141-143 J07-1008 pulman-2007-book @@ -109,7 +109,7 @@ <fixed-case>S</fixed-case>quibs: Maximal Consistent Subsets - RobertMalouf + RobertMalouf 153-160 10.1162/coli.2007.33.2.153 J07-2001 @@ -117,7 +117,7 @@ Dependency-Based Construction of Semantic Space Models - SebastianPadó + SebastianPadó MirellaLapata 161-199 10.1162/coli.2007.33.2.161 @@ -134,8 +134,8 @@ Generating Referring Expressions: Making Referents Easy to Identify - IvandréParaboni - Keesvan Deemter + IvandréParaboni + Keesvan Deemter JudithMasthoff 229-254 10.1162/coli.2007.33.2.229 @@ -144,14 +144,14 @@ Book Reviews: Word Sense Disambiguation: Algorithms and Applications, edited by Eneko Agirre and Philip Edmonds - DianaMcCarthy + DianaMcCarthy 255-258 J07-2005 mccarthy-2007-book Book Reviews: From Molecule to Metaphor: A Neural Theory of Language, by Jerome <fixed-case>A</fixed-case>. Feldman - StefanFrank + StefanFrank 259-261 J07-2006 frank-2007-book @@ -218,7 +218,7 @@ <fixed-case>O</fixed-case>bituary: <fixed-case>K</fixed-case>aren <fixed-case>S</fixed-case>pärck <fixed-case>J</fixed-case>ones - JohnTait + JohnTait 289-291 10.1162/coli.2007.33.3.289 J07-3001 @@ -226,7 +226,7 @@ Squibs and Discussions: Measuring Word Alignment Quality for Statistical Machine Translation - AlexanderFraser + AlexanderFraser DanielMarcu 293-303 10.1162/coli.2007.33.3.293 @@ -236,7 +236,7 @@ A Sketch Algorithm for Estimating Two-Way and Multi-Way Associations PingLi - Kenneth W.Church + Kenneth W.Church 305-354 10.1162/coli.2007.33.3.305 J07-3003 @@ -245,7 +245,7 @@ <fixed-case>CCG</fixed-case>bank: A Corpus of <fixed-case>CCG</fixed-case> Derivations and Dependency Structures Extracted from the <fixed-case>P</fixed-case>enn <fixed-case>T</fixed-case>reebank JuliaHockenmaier - MarkSteedman + MarkSteedman 355-396 10.1162/coli.2007.33.3.355 J07-3004 @@ -253,7 +253,7 @@ Classifying Non-Sentential Utterances in Dialogue: A Machine Learning Approach - RaquelFernández + RaquelFernández JonathanGinzburg ShalomLappin 397-427 @@ -275,7 +275,7 @@ Last Words: Computational Linguistics: What About the Linguistics? - KarenSpärck Jones + KarenSpärck Jones 437-441 10.1162/coli.2007.33.3.437 J07-3008 @@ -304,7 +304,7 @@ <fixed-case>S</fixed-case>quibs: Prepositional Phrase Attachment without Oracles MichaelaAtterer - HinrichSchütze + HinrichSchütze 469-476 10.1162/coli.2007.33.4.469 J07-4002 @@ -312,7 +312,7 @@ Weighted and Probabilistic Context-Free Grammars Are Equally Expressive - Noah A.Smith + Noah A.Smith MarkJohnson 477-491 10.1162/coli.2007.33.4.477 @@ -322,7 +322,7 @@ Wide-Coverage Efficient Statistical Parsing with <fixed-case>CCG</fixed-case> and Log-Linear Models StephenClark - James R.Curran + James R.Curran 493-552 10.1162/coli.2007.33.4.493 J07-4004 @@ -330,10 +330,10 @@ Unsupervised Acquisition of Predominant Word Senses - DianaMcCarthy + DianaMcCarthy RobKoeling JulieWeeds - JohnCarroll + JohnCarroll 553-590 10.1162/coli.2007.33.4.553 J07-4005 @@ -360,7 +360,7 @@ Last Words: Breaking News: Changing Attitudes and Practices - BonnieWebber + BonnieWebber 607-611 10.1162/coli.2007.33.4.607 J07-4009 diff --git a/data/xml/J08.xml b/data/xml/J08.xml index 0d87e79c2d..6f24d168f1 100644 --- a/data/xml/J08.xml +++ b/data/xml/J08.xml @@ -23,7 +23,7 @@ Feature Forest Models for Probabilistic <fixed-case>HPSG</fixed-case> Parsing YusukeMiyao - Jun’ichiTsujii + Jun’ichiTsujii 35-80 10.1162/coli.2008.34.1.35 J08-1002 @@ -35,7 +35,7 @@ MichaelBurke RuthO’Donovan StefanRiezler - Josefvan Genabith + Josefvan Genabith AndyWay 81-124 10.1162/coli.2008.34.1.81 @@ -44,7 +44,7 @@ Book Reviews: The Text Mining Handbook: Advanced Approaches to Analyzing Unstructured Data by Ronen Feldman and <fixed-case>J</fixed-case>ames Sanger - RadaMihalcea + RadaMihalcea 125-127 10.1162/coli.2008.34.1.125 J08-1004 @@ -74,7 +74,7 @@ Last Words: On Becoming a Discipline - MarkSteedman + MarkSteedman 137-144 10.1162/coli.2008.34.1.137 J08-1008 @@ -94,7 +94,7 @@ Special Issue Introduction: Semantic Role Labeling: An Introduction to the Special Issue - LluísMàrquez + LluísMàrquez XavierCarreras Kenneth C.Litkowski SuzanneStevenson @@ -107,7 +107,7 @@ A Global Joint Model for Semantic Role Labeling KristinaToutanova AriaHaghighi - Christopher D.Manning + Christopher D.Manning 161-191 10.1162/coli.2008.34.2.161 J08-2002 @@ -117,7 +117,7 @@ Tree Kernels for Semantic Role Labeling AlessandroMoschitti DanielePighin - RobertoBasili + RobertoBasili 193-224 10.1162/coli.2008.34.2.193 J08-2003 @@ -135,7 +135,7 @@ The Importance of Syntactic Parsing and Inference in Semantic Role Labeling VasinPunyakanok DanRoth - Wen-tauYih + Wen-tauYih 257-287 10.1162/coli.2008.34.2.257 J08-2005 @@ -143,9 +143,9 @@ Towards Robust Semantic Role Labeling - Sameer S.Pradhan - WayneWard - James H.Martin + Sameer S.Pradhan + WayneWard + James H.Martin 289-310 10.1162/coli.2008.34.2.289 J08-2006 @@ -192,7 +192,7 @@ A Twin-Candidate Model for Learning-Based Anaphora Resolution XiaofengYang JianSu - Chew LimTan + Chew LimTan 327-356 10.1162/coli.2008.07-004-R2-06-57 J08-3002 @@ -200,7 +200,7 @@ Dependency Parsing of <fixed-case>T</fixed-case>urkish - GülşenEryiğit + GülşenEryiğit JoakimNivre KemalOflazer 357-389 @@ -238,7 +238,7 @@ Book Reviews: Computational Approaches to Morphology and Syntax by Brian Roark and Richard Sproat - Noah A.Smith + Noah A.Smith 453-457 10.1162/coli.2008.34.3.453 J08-3007 @@ -246,7 +246,7 @@ Book Reviews: <fixed-case>A</fixed-case>rabic Computational Morphology: Knowledge-Based and Empirical Methods by Abdelhadi Soudi, Antal van den Bosch, and Günter Neumann (editors) - GeorgeKiraz + GeorgeKiraz 459-462 10.1162/coli.2008.34.3.459 J08-3008 @@ -280,7 +280,7 @@ <fixed-case>ACL</fixed-case> Lifetime Achievement Award: On Whose Shoulders? - YorickWilks + YorickWilks 471-486 10.1162/coli.2008.34.4.471 J08-4001 @@ -288,7 +288,7 @@ Hybrid Reinforcement/Supervised Learning of Dialogue Policies from Fixed Data Sets - JamesHenderson + JamesHenderson OliverLemon KallirroiGeorgila 487-511 @@ -307,7 +307,7 @@ Survey Article: Inter-Coder Agreement for Computational Linguistics RonArtstein - MassimoPoesio + MassimoPoesio 555-596 10.1162/coli.07-034-R2 J08-4004 @@ -315,7 +315,7 @@ Constructing Corpora for the Development and Evaluation of Paraphrase Systems - TrevorCohn + TrevorCohn ChrisCallison-Burch MirellaLapata 597-614 @@ -325,8 +325,8 @@ Book Review: Mathematical Linguistics by András Kornai - RichardSproat - RoxanaGîrju + RichardSproat + RoxanaGîrju 615-617 10.1162/coli.2008.34.4.615 J08-4006 @@ -354,7 +354,7 @@ <fixed-case>E</fixed-case>rratum: Dependency Parsing of <fixed-case>T</fixed-case>urkish - GülşenEryiğit + GülşenEryiğit JoakimNivre KemalOflazer 627 diff --git a/data/xml/J09.xml b/data/xml/J09.xml index c0a584ca8d..95d48ff68c 100644 --- a/data/xml/J09.xml +++ b/data/xml/J09.xml @@ -16,7 +16,7 @@ Letter to the Editor - SeanFulop + SeanFulop 10.1162/coli.2009.35.1.001 1 J09-1001 @@ -24,17 +24,17 @@ Statistical Approaches to Computer-Assisted Translation - SergioBarrachina + SergioBarrachina OliverBender - FranciscoCasacuberta + FranciscoCasacuberta JorgeCivera ElsaCubel ShahramKhadivi - AntonioLagarda - HermannNey + AntonioLagarda + HermannNey JesúsTomás - EnriqueVidal - Juan-MiguelVilar + EnriqueVidal + Juan-MiguelVilar 10.1162/coli.2008.07-055-R2-06-29 3-28 J09-1002 @@ -43,9 +43,9 @@ Evaluating Centering for Information Ordering Using Corpora NikiforosKaramanis - ChrisMellish - MassimoPoesio - JonOberlander + ChrisMellish + MassimoPoesio + JonOberlander 10.1162/coli.07-036-R2-06-22 29-46 J09-1003 @@ -84,7 +84,7 @@ Last Words: That’s Nice ... What Can You Do With It? - AnjaBelz + AnjaBelz 10.1162/coli.2009.35.1.111 J09-1008 belz-2009-last @@ -106,7 +106,7 @@ Prepositions in Applications: A Survey and Introduction to the Special Issue - TimothyBaldwin + TimothyBaldwin ValiaKordoni AlineVillavicencio 10.1162/coli.2009.35.2.119 @@ -117,7 +117,7 @@ Exploiting Semantic Role Resources for Preposition Disambiguation TomO’Hara - JanyceWiebe + JanyceWiebe 10.1162/coli.06-79-prep15 151-184 J09-2002 @@ -125,7 +125,7 @@ The Syntax and Semantics of Prepositions in the Task of Automatic Interpretation of Nominal Phrases and Compounds: A Cross-Linguistic Study - RoxanaGirju + RoxanaGirju 10.1162/coli.06-77-prep13 185-228 J09-2003 @@ -142,8 +142,8 @@ Applying Computational Models of Spatial Prepositions to Visually Situated Dialog - John D.Kelleher - Fintan J.Costello + John D.Kelleher + Fintan J.Costello 10.1162/coli.06-78-prep14 271-306 J09-2005 @@ -189,8 +189,8 @@ <fixed-case>A</fixed-case>rticles: Robust Understanding in Multimodal Interfaces - SrinivasBangalore - MichaelJohnston + SrinivasBangalore + MichaelJohnston 10.1162/coli.08-022-R2-06-26 345-397 J09-3002 @@ -199,7 +199,7 @@ <fixed-case>A</fixed-case>rticles: Recognizing Contextual Polarity: An Exploration of Features for Phrase-Level Sentiment Analysis TheresaWilson - JanyceWiebe + JanyceWiebe PaulHoffmann 10.1162/coli.08-012-R1-06-90 399-433 @@ -208,7 +208,7 @@ <fixed-case>A</fixed-case>rticles: Bootstrapping Distributional Feature Vector Quality - MaayanZhitomirsky-Geffet + MaayanZhitomirsky-Geffet IdoDagan 10.1162/coli.08-032-R1-06-96 435-461 @@ -217,7 +217,7 @@ Book Review: Speech and Language Processing (second edition) by Daniel <fixed-case>J</fixed-case>urafsky and <fixed-case>J</fixed-case>ames <fixed-case>H</fixed-case>. <fixed-case>M</fixed-case>artin - VladoKeselj + VladoKeselj 10.1162/coli.B09-001 J09-3005 keselj-2009-book @@ -261,8 +261,8 @@ <fixed-case>O</fixed-case>bituaries: Janet Hitzeman - MassimoPoesio - DavidDay + MassimoPoesio + DavidDay InderjeetMani 10.1162/coli.2009.35.4.35411 475-481 @@ -271,9 +271,9 @@ <fixed-case>O</fixed-case>bituaries: Hozumi <fixed-case>T</fixed-case>anaka - TimothyBaldwin + TimothyBaldwin TakenobuTokunaga - Jun’ichiTsujii + Jun’ichiTsujii 10.1162/coli.2009.35.4.35412 475-481 J09-4003 @@ -281,7 +281,7 @@ <fixed-case>ACL</fixed-case> Lifetime Achievement Award: The Dawn of Statistical <fixed-case>ASR</fixed-case> and <fixed-case>MT</fixed-case> - FrederickJelinek + FrederickJelinek 10.1162/coli.2009.35.4.35401 483-494 J09-4004 @@ -307,8 +307,8 @@ Kernel Methods for Minimally Supervised <fixed-case>WSD</fixed-case> - ClaudioGiuliano - Alfio MassimilianoGliozzo + ClaudioGiuliano + Alfio MassimilianoGliozzo CarloStrapparava 10.1162/coli.2009.35.4.35407 513-528 @@ -318,7 +318,7 @@ An Investigation into the Validity of Some Metrics for Automatically Evaluating Natural Language Generation Systems EhudReiter - AnjaBelz + AnjaBelz 10.1162/coli.2009.35.4.35405 529-558 J09-4008 @@ -346,7 +346,7 @@ Book Review: Learning Machine Translation by Cyril Goutte, Nicola Cancedda, Marc Dymetman, and <fixed-case>G</fixed-case>eorge Foster (editors) - PhilBlunsom + PhilBlunsom 10.1162/coli.2009.35.4.35408 J09-4011 blunsom-2009-book diff --git a/data/xml/J10.xml b/data/xml/J10.xml index f3cf03709e..9241e8e2e5 100644 --- a/data/xml/J10.xml +++ b/data/xml/J10.xml @@ -17,8 +17,8 @@ Broad-Coverage Parsing Using Human-Like Memory Constraints WilliamSchuler - SamirAbdelRahman - TimMiller + SamirAbdelRahman + TimMiller LaneSchwartz 10.1162/coli.2010.36.1.36100 1-30 @@ -37,7 +37,7 @@ Summarizing Short Stories AnnaKazantseva - StanSzpakowicz + StanSzpakowicz 10.1162/coli.2010.36.1.36102 71-109 J10-1003 @@ -70,14 +70,14 @@ Book Review: Dependency Parsing by Sandra Kübler, Ryan <fixed-case>M</fixed-case>c<fixed-case>D</fixed-case>onald, and Joakim <fixed-case>N</fixed-case>ivre - JohnCarroll + JohnCarroll 10.1162/coli.2010.36.1.36107 J10-1007 carroll-2010-book Last Words: Failure is an Orphan (Let’s Adopt) - StanSzpakowicz + StanSzpakowicz 10.1162/coli.2010.36.1.36105 J10-1008 szpakowicz-2010-last @@ -99,9 +99,9 @@ Generating Tailored, Comparative Descriptions with Contextually Appropriate Intonation - MichaelWhite + MichaelWhite Robert A. J.Clark - Johanna D.Moore + Johanna D.Moore 10.1162/coli.2010.09-023-R1-08-002 159-201 J10-2001 @@ -109,7 +109,7 @@ Sorting Texts by Readability - KumikoTanaka-Ishii + KumikoTanaka-Ishii SatoshiTezuka HiroshiTerada 10.1162/coli.2010.09-036-R2-08-050 @@ -120,9 +120,9 @@ What Is Not in the Bag of Words for Why-<fixed-case>QA</fixed-case>? SuzanVerberne - LouBoves + LouBoves NellekeOostdijk - Peter-ArnoCoppen + Peter-ArnoCoppen 10.1162/coli.2010.09-032-R1-08-034 229-245 J10-2003 @@ -141,7 +141,7 @@ Book Review: Statistical Language Models for Information Retrieval by <fixed-case>C</fixed-case>heng<fixed-case>X</fixed-case>iang Zhai - EricGaussier + EricGaussier 10.1162/coli.2010.36.2.36200 J10-2005 gaussier-2010-book @@ -154,7 +154,7 @@ Last Words: What Computational Linguists Can Learn from Psychologists (and Vice Versa) - EmielKrahmer + EmielKrahmer 10.1162/coli.2010.36.2.36201 285-294 J10-2007 @@ -197,7 +197,7 @@ Generating Phrasal and Sentential Paraphrases: A Survey of Data-Driven Methods NitinMadnani - Bonnie J.Dorr + Bonnie J.Dorr 10.1162/coli_a_00002 341-387 J10-3003 @@ -225,7 +225,7 @@ Complexity, Parsing, and Factorization of Tree-Local Multi-Component <fixed-case>T</fixed-case>ree-<fixed-case>A</fixed-case>djoining <fixed-case>G</fixed-case>rammar RebeccaNesson GiorgioSatta - Stuart M.Shieber + Stuart M.Shieber 10.1162/coli_a_00005 443-480 J10-3006 @@ -233,7 +233,7 @@ Learning Tractable Word Alignment Models with Complex Constraints - João V.Graça + João V.Graça KuzmanGanchev BenTaskar 10.1162/coli_a_00007 @@ -243,10 +243,10 @@ Hierarchical Phrase-Based Translation with Weighted Finite-State Transducers and Shallow-n Grammars - Adriàde Gispert + Adriàde Gispert GonzaloIglesias GraemeBlackwood - Eduardo R.Banga + Eduardo R.Banga WilliamByrne 10.1162/coli_a_00006 505-533 @@ -255,9 +255,9 @@ Linguistically Annotated Reordering: Evaluation and Analysis - DeyiXiong + DeyiXiong MinZhang - AitiAw + AitiAw HaizhouLi 10.1162/coli_a_00009 535-568 @@ -281,7 +281,7 @@ Last Words: Ancient Symbols, Computational Linguistics, and the Reviewing Practices of the General Science Journals - RichardSproat + RichardSproat 10.1162/coli_a_00011 585-594 J10-3012 @@ -304,7 +304,7 @@ <fixed-case>O</fixed-case>bituary: Fred Jelinek - MarkLiberman + MarkLiberman 10.1162/coli_a_00032 595-599 J10-4001 @@ -312,7 +312,7 @@ <fixed-case>ACL</fixed-case> Lifetime Achievement Award: The Right Tools: Reflections on Computation and Language - William A.Woods + William A.Woods 10.1162/coli_a_00018 601-630 J10-4002 @@ -339,7 +339,7 @@ String-to-Dependency Statistical Machine Translation LibinShen JinxiXu - RalphWeischedel + RalphWeischedel 10.1162/coli_a_00015 649-671 J10-4005 @@ -347,7 +347,7 @@ Distributional Memory: A General Framework for Corpus-Based Semantics - MarcoBaroni + MarcoBaroni AlessandroLenci 10.1162/coli_a_00016 673-721 @@ -358,8 +358,8 @@ A Flexible, Corpus-Driven Model of Regular and Inverse Selectional Preferences KatrinErk - SebastianPadó - UlrikePadó + SebastianPadó + UlrikePadó 10.1162/coli_a_00017 723-763 J10-4007 @@ -395,7 +395,7 @@ Book Review: Spoken Dialogue Systems by Kristiina Jokinen and <fixed-case>M</fixed-case>ichael <fixed-case>M</fixed-case>c<fixed-case>T</fixed-case>ear - Mary EllenFoster + Mary EllenFoster 10.1162/coli_r_00025 J10-4012 foster-2010-book @@ -429,7 +429,7 @@ Commentary and Discussion: Entropy, the <fixed-case>I</fixed-case>ndus Script, and Language: A Reply to <fixed-case>R</fixed-case>. <fixed-case>S</fixed-case>proat Rajesh P. N.Rao NishaYadav - Mayank N.Vahia + Mayank N.Vahia HrishikeshJoglekar RonojoyAdhikari IravathamMahadevan @@ -440,7 +440,7 @@ Commentary and Discussion: Reply to <fixed-case>R</fixed-case>ao et al. and <fixed-case>L</fixed-case>ee et al. - RichardSproat + RichardSproat 10.1162/coli_c_00031 807-816 J10-4017 diff --git a/data/xml/J11.xml b/data/xml/J11.xml index 48999bf78e..9bf6fb91c5 100644 --- a/data/xml/J11.xml +++ b/data/xml/J11.xml @@ -16,9 +16,9 @@ <fixed-case>S</fixed-case>quibs: Nouveau-<fixed-case>ROUGE</fixed-case>: A Novelty Metric for Update Summarization - John M.Conroy - Judith D.Schlesinger - Dianne P.O’Leary + John M.Conroy + Judith D.Schlesinger + Dianne P.O’Leary 10.1162/coli_a_00033 1-8 J11-1001 @@ -37,7 +37,7 @@ Towards Modular Development of Typed Unification Grammars - YaelSygal + YaelSygal ShulyWintner 10.1162/coli_a_00035 29-74 @@ -47,8 +47,8 @@ An Investigation of Interruptions and Resumptions in Multi-Tasking Dialogues FanYang - Peter A.Heeman - Andrew L.Kun + Peter A.Heeman + Andrew L.Kun 10.1162/coli_a_00036 75-104 J11-1004 @@ -147,7 +147,7 @@ Lexicon-Based Methods for Sentiment Analysis - MaiteTaboada + MaiteTaboada JulianBrooke MilanTofiloski KimberlyVoll @@ -180,7 +180,7 @@ <fixed-case>S</fixed-case>quibs: Stable Classification of Text Genres PhilippPetrenz - BonnieWebber + BonnieWebber 10.1162/COLI_a_00052 385-393 J11-2004 @@ -188,7 +188,7 @@ Book Review: Handbook of Natural Language Processing (second edition) edited by Nitin Indurkhya and Fred <fixed-case>J</fixed-case>. Damerau - Jochen L.Leidner + Jochen L.Leidner 10.1162/COLI_r_00048 J11-2005 leidner-2011-book @@ -224,8 +224,8 @@ Last Words: <fixed-case>A</fixed-case>mazon <fixed-case>M</fixed-case>echanical <fixed-case>T</fixed-case>urk: Gold Mine or Coal Mine? KarënFort - GillesAdda - K. BretonnelCohen + GillesAdda + K. BretonnelCohen 10.1162/COLI_a_00057 413-420 J11-2010 @@ -259,8 +259,8 @@ Controlling User Perceptions of Linguistic Style: Trainable Generation of Personality Traits - FrançoisMairesse - Marilyn A.Walker + FrançoisMairesse + Marilyn A.Walker 10.1162/COLI_a_00063 455-488 J11-3002 @@ -270,7 +270,7 @@ A Strategy for Information Presentation in Spoken Dialog Systems VeraDemberg AndiWinterboer - Johanna D.Moore + Johanna D.Moore 10.1162/COLI_a_00064 489-539 J11-3003 @@ -279,8 +279,8 @@ Dependency Parsing Schemata and Mildly Non-Projective Dependency Parsing CarlosGómez-Rodríguez - JohnCarroll - DavidWeir + JohnCarroll + DavidWeir 10.1162/COLI_a_00060 541-586 J11-3004 @@ -296,7 +296,7 @@ Book Reviews: Automated Grammatical Error Detection for Language Learners by Claudia Leacock, <fixed-case>M</fixed-case>artin Chodorow, <fixed-case>M</fixed-case>ichael Gamon, and Joel Tetreault - StephenPulman + StephenPulman 10.1162/COLI_r_00062 J11-3006 pulman-2011-book @@ -317,7 +317,7 @@ Book Reviews: Computational Modeling of Human Language Acquisition by Afra Alishahi - SharonGoldwater + SharonGoldwater 10.1162/COLI_r_00067 J11-3009 goldwater-2011-book @@ -372,8 +372,8 @@ Towards Automatic Error Analysis of Machine Translation Output - MajaPopović - HermannNey + MajaPopović + HermannNey 10.1162/COLI_a_00072 657-688 J11-4002 @@ -399,8 +399,8 @@ Annotating and Learning Event Durations in Text FengPan - RutuMulkar-Mehta - Jerry R.Hobbs + RutuMulkar-Mehta + Jerry R.Hobbs 10.1162/COLI_a_00075 727-752 J11-4005 @@ -409,7 +409,7 @@ Parsing Noun Phrases in the <fixed-case>P</fixed-case>enn <fixed-case>T</fixed-case>reebank DavidVadas - James R.Curran + James R.Curran 10.1162/COLI_a_00076 753-809 J11-4006 @@ -419,7 +419,7 @@ Information Status Distinctions and Referring Expressions: An Empirical Study of References to People in News Summaries AdvaithSiddharthan AniNenkova - KathleenMcKeown + KathleenMcKeown 10.1162/COLI_a_00077 811-842 J11-4007 @@ -427,7 +427,7 @@ Half-Context Language Models - HinrichSchütze + HinrichSchütze MichaelWalsh 10.1162/COLI_a_00078 843-865 diff --git a/data/xml/J12.xml b/data/xml/J12.xml index ebaced76ef..6378dcddb0 100644 --- a/data/xml/J12.xml +++ b/data/xml/J12.xml @@ -16,9 +16,9 @@ Affirmative Cue Words in Task-Oriented Dialogue - AgustínGravano - JuliaHirschberg - ŠtefanBeňuš + AgustínGravano + JuliaHirschberg + ŠtefanBeňuš 10.1162/COLI_a_00083 J12-1001 1-39 @@ -62,8 +62,8 @@ Computational Generation of Referring Expressions: A Survey - EmielKrahmer - Keesvan Deemter + EmielKrahmer + Keesvan Deemter 10.1162/COLI_a_00088 J12-1006 173-218 @@ -71,7 +71,7 @@ Book Review: Graph-Based Natural Language Processing and Information Retrieval by Rada Mihalcea and Dragomir Radev - ChrisBiemann + ChrisBiemann 10.1162/COLI_r_00089 J12-1007 biemann-2012-book @@ -108,8 +108,8 @@ Are You Sure That This Happened? Assessing the Factuality Degree of Events in Text - RoserSaurí - JamesPustejovsky + RoserSaurí + JamesPustejovsky 10.1162/COLI_a_00096 J12-2002 261-299 @@ -117,8 +117,8 @@ Did It Happen? The Pragmatic Complexity of Veridicality Assessment - Marie-Catherinede Marneffe - Christopher D.Manning + Marie-Catherinede Marneffe + Christopher D.Manning ChristopherPotts 10.1162/COLI_a_00097 J12-2003 @@ -129,7 +129,7 @@ Cross-Genre and Cross-Domain Detection of Semantic Uncertainty GyörgySzarvas VeronikaVincze - RichárdFarkas + RichárdFarkas GyörgyMóra IrynaGurevych 10.1162/COLI_a_00098 @@ -140,7 +140,7 @@ Speculation and Negation: Rules, Rankers, and the Role of Syntax ErikVelldal - LiljaØvrelid + LiljaØvrelid JonathonRead StephanOepen 10.1162/COLI_a_00126 @@ -150,13 +150,13 @@ Modality and Negation in <fixed-case>SIMT</fixed-case> Use of Modality and Negation in Semantically-Informed Syntactic <fixed-case>MT</fixed-case> - KathrynBaker + KathrynBaker MichaelBloodgood - Bonnie J.Dorr + Bonnie J.Dorr ChrisCallison-Burch Nathaniel W.Filardo - ChristinePiatko - LoriLevin + ChristinePiatko + LoriLevin ScottMiller 10.1162/COLI_a_00099 J12-2006 @@ -172,14 +172,14 @@ Book Review: Unification Grammars by Nissim Francez and Shuly <fixed-case>W</fixed-case>intner - Tracy HollowayKing + Tracy HollowayKing 10.1162/COLI_r_00101 J12-2008 king-2012-book Book Review: The Structure of Scientific Articles: Applications to Citation Indexing and Summarization by Simone Teufel - Robert E.Mercer + Robert E.Mercer 10.1162/COLI_r_00102 J12-2009 mercer-2012-book @@ -193,7 +193,7 @@ Book Review: Interactive Multi-Modal Question-Answering by Antal van den Bosch and Gosse Bouma - ConstantinOrăsan + ConstantinOrăsan 10.1162/COLI_r_00104 J12-2011 orasan-2012-book @@ -228,7 +228,7 @@ <fixed-case>O</fixed-case>bituary: Victor <fixed-case>H</fixed-case>. Yngve - W. JohnHutchins + W. JohnHutchins 10.1162/COLI_a_00115 J12-3001 461-467 @@ -236,10 +236,10 @@ <fixed-case>S</fixed-case>quibs: Fruit Carts: A Domain and Corpus for Research in Dialogue Systems and Psycholinguistics - GregoryAist - EllenCampana - JamesAllen - MarySwift + GregoryAist + EllenCampana + JamesAllen + MarySwift Michael K.Tanenhaus 10.1162/COLI_a_00114 J12-3002 @@ -248,8 +248,8 @@ Empirical Risk Minimization for Probabilistic Grammars: Sample Complexity and Hardness of Learning - Shay B.Cohen - Noah A.Smith + Shay B.Cohen + Noah A.Smith 10.1162/COLI_a_00092 J12-3003 479-526 @@ -258,8 +258,8 @@ Summarizing Information Graphics Textually SenizDemir - SandraCarberry - Kathleen F.McCoy + SandraCarberry + Kathleen F.McCoy 10.1162/COLI_a_00091 J12-3004 527-574 @@ -267,8 +267,8 @@ Modeling Regular Polysemy: A Study on the Semantic Classification of <fixed-case>C</fixed-case>atalan Adjectives - GemmaBoleda - SabineSchulte im Walde + GemmaBoleda + SabineSchulte im Walde ToniBadia 10.1162/COLI_a_00093 J12-3005 @@ -334,7 +334,7 @@ <fixed-case>ACL</fixed-case> Lifetime Achievement Award: Encounters with Language - Charles J.Fillmore + Charles J.Fillmore 10.1162/COLI_a_00129 J12-4001 701-718 @@ -352,8 +352,8 @@ Semantic Role Labeling of Implicit Arguments for Nominal Predicates - MatthewGerber - Joyce Y.Chai + MatthewGerber + Joyce Y.Chai 10.1162/COLI_a_00110 J12-4003 755-798 @@ -372,8 +372,8 @@ Empirical Methods for the Study of Denotation in Nominalizations in <fixed-case>S</fixed-case>panish AinaPeris - MarionaTaulé - HoracioRodríguez + MarionaTaulé + HoracioRodríguez 10.1162/COLI_a_00112 J12-4005 827-865 @@ -381,8 +381,8 @@ <fixed-case>LFG</fixed-case> Generation by Grammar Specialization - JürgenWedekind - Ronald M.Kaplan + JürgenWedekind + Ronald M.Kaplan 10.1162/COLI_a_00113 J12-4006 867-915 @@ -390,7 +390,7 @@ Book Review: Discourse Processing by Manfred Stede - BonnieWebber + BonnieWebber 10.1162/COLI_r_00118 J12-4007 webber-2012-book diff --git a/data/xml/J13.xml b/data/xml/J13.xml index f70eb59026..9cfee72d39 100644 --- a/data/xml/J13.xml +++ b/data/xml/J13.xml @@ -34,8 +34,8 @@ Parsing Morphologically Rich Languages: Introduction to the Special Issue ReutTsarfaty - DjaméSeddah - SandraKübler + DjaméSeddah + SandraKübler JoakimNivre 10.1162/COLI_a_00133 J13-1003 @@ -53,11 +53,11 @@ Knowledge Sources for Constituent Parsing of <fixed-case>G</fixed-case>erman, a Morphologically Rich and Less-Configurational Language - AlexanderFraser + AlexanderFraser HelmutSchmid - RichárdFarkas + RichárdFarkas RenjingWang - HinrichSchütze + HinrichSchütze 10.1162/COLI_a_00135 J13-1005 57-85 @@ -85,7 +85,7 @@ Dependency Parsing of <fixed-case>M</fixed-case>odern <fixed-case>S</fixed-case>tandard <fixed-case>A</fixed-case>rabic with Lexical and Inflectional Features YuvalMarton NizarHabash - OwenRambow + OwenRambow 10.1162/COLI_a_00138 J13-1008 161-194 @@ -94,8 +94,8 @@ Parsing Models for Identifying Multiword Expressions SpenceGreen - Marie-Catherinede Marneffe - Christopher D.Manning + Marie-Catherinede Marneffe + Christopher D.Manning 10.1162/COLI_a_00139 J13-1009 195-227 @@ -119,7 +119,7 @@ A Joint Model to Identify and Align Bilingual Named Entities YufengChen - ChengqingZong + ChengqingZong Keh-YihSu 10.1162/COLI_a_00122 J13-2001 @@ -156,7 +156,7 @@ Learning Dependency-Based Compositional Semantics PercyLiang - Michael I.Jordan + Michael I.Jordan DanKlein 10.1162/COLI_a_00127 J13-2005 @@ -208,7 +208,7 @@ <fixed-case>S</fixed-case>quibs: What Is a Paraphrase? RahulBhagat - EduardHovy + EduardHovy 10.1162/COLI_a_00166 J13-3001 463-472 @@ -226,7 +226,7 @@ Measuring Word Meaning in Context KatrinErk - DianaMcCarthy + DianaMcCarthy NicholasGaylord 10.1162/COLI_a_00142 J13-3003 @@ -235,10 +235,10 @@ Computing Lexical Contrast - Saif M.Mohammad - Bonnie J.Dorr + Saif M.Mohammad + Bonnie J.Dorr GraemeHirst - Peter D.Turney + Peter D.Turney 10.1162/COLI_a_00143 J13-3004 555-590 @@ -246,10 +246,10 @@ <fixed-case>XMG</fixed-case>: e<fixed-case>X</fixed-case>tensible <fixed-case>M</fixed-case>eta<fixed-case>G</fixed-case>rammar - BenoîtCrabbé + BenoîtCrabbé DenysDuchier ClaireGardent - Joseph LeRoux + Joseph LeRoux YannickParmentier 10.1162/COLI_a_00144 J13-3005 @@ -259,8 +259,8 @@ Selectional Preferences for Semantic Role Classification BeñatZapirain - EnekoAgirre - LluísMàrquez + EnekoAgirre + LluísMàrquez MihaiSurdeanu 10.1162/COLI_a_00145 J13-3006 @@ -269,7 +269,7 @@ <fixed-case>O</fixed-case>nto<fixed-case>L</fixed-case>earn Reloaded: A Graph-Based Algorithm for Taxonomy Induction - PaolaVelardi + PaolaVelardi StefanoFaralli RobertoNavigli 10.1162/COLI_a_00146 @@ -291,7 +291,7 @@ EvaD’hondt SuzanVerberne CornelisKoster - LouBoves + LouBoves 10.1162/COLI_a_00149 J13-3009 755-775 @@ -299,7 +299,7 @@ Book Review: - MatsWirén + MatsWirén 10.1162/COLI_r_00165 J13-3010 wiren-2013-book @@ -321,7 +321,7 @@ <fixed-case>ACL</fixed-case> Lifetime Achievement Award: Influences and Inferences - Jerry R.Hobbs + Jerry R.Hobbs 10.1162/COLI_a_00171 J13-4001 781-798 @@ -339,8 +339,8 @@ A Constraint-Based Hypergraph Partitioning Approach to Coreference Resolution EmiliSapena - LluísPadró - JordiTurmo + LluísPadró + JordiTurmo 10.1162/COLI_a_00151 J13-4003 847-884 @@ -349,11 +349,11 @@ Deterministic Coreference Resolution Based on Entity-Centric, Precision-Ranked Rules HeeyoungLee - AngelChang + AngelChang YvesPeirsman - NathanaelChambers + NathanaelChambers MihaiSurdeanu - DanJurafsky + DanJurafsky 10.1162/COLI_a_00152 J13-4004 885-916 @@ -363,7 +363,7 @@ Plagiarism Meets Paraphrasing: Insights for the Next Generation in Automatic Plagiarism Detection AlbertoBarrón-Cedeño MartaVila - M. AntòniaMartí + M. AntòniaMartí PaoloRosso 10.1162/COLI_a_00153 J13-4005 @@ -372,10 +372,10 @@ Multilingual Joint Parsing of Syntactic and Semantic Dependencies with a Latent Variable Model - JamesHenderson + JamesHenderson PaolaMerlo IvanTitov - GabrieleMusillo + GabrieleMusillo 10.1162/COLI_a_00158 J13-4006 949-998 diff --git a/data/xml/J14.xml b/data/xml/J14.xml index 3e690fba49..cfa74952bf 100644 --- a/data/xml/J14.xml +++ b/data/xml/J14.xml @@ -16,7 +16,7 @@ <fixed-case>O</fixed-case>bituary: Ivan A. Sag - Emily M.Bender + Emily M.Bender 1-7 10.1162/COLI_a_00179 J14-1001 @@ -26,9 +26,9 @@ Frame-Semantic Parsing DipanjanDas DesaiChen - André F. T.Martins + André F. T.Martins NathanSchneider - Noah A.Smith + Noah A.Smith 9-56 10.1162/COLI_a_00163 J14-1002 @@ -36,9 +36,9 @@ Random Walks for Knowledge-Based Word Sense Disambiguation - EnekoAgirre - Oier Lópezde Lacalle - AitorSoroa + EnekoAgirre + Oier Lópezde Lacalle + AitorSoroa 57-84 10.1162/COLI_a_00164 J14-1003 @@ -67,7 +67,7 @@ <fixed-case>A</fixed-case>rabic Dialect Identification - Omar F.Zaidan + Omar F.Zaidan ChrisCallison-Burch 171-202 10.1162/COLI_a_00169 @@ -87,7 +87,7 @@ Book Review: Natural Language Processing for Historical Texts by <fixed-case>M</fixed-case>ichael Piotrowski - LaurentRomary + LaurentRomary 231-233 10.1162/COLI_r_00180 J14-1008 @@ -155,8 +155,8 @@ Unsupervised Event Coreference Resolution - CosminAdrian Bejan - SandaHarabagiu + CosminAdrian Bejan + SandaHarabagiu 311-347 10.1162/COLI_a_00174 J14-2004 @@ -165,7 +165,7 @@ Phrase Dependency Machine Translation with Quasi-Synchronous Tree-to-Tree Features KevinGimpel - Noah A.Smith + Noah A.Smith 349-401 10.1162/COLI_a_00175 J14-2005 @@ -173,7 +173,7 @@ Practical Linguistic Steganography using Contextual Synonym Substitution and a Novel Vertex Coding Method - Ching-YunChang + Ching-YunChang StephenClark 403-448 10.1162/COLI_a_00176 @@ -199,7 +199,7 @@ Book Reviews: Sentiment Analysis and Opinion Mining by Bing <fixed-case>L</fixed-case>iu - ClaireCardie + ClaireCardie 511-513 10.1162/COLI_r_00186 J14-2009 @@ -238,9 +238,9 @@ <fixed-case>S</fixed-case>quibs: Automatic Selection of <fixed-case>HPSG</fixed-case>-Parsed Sentences for Treebank Construction - MontserratMarimon - NúriaBel - LluísPadró + MontserratMarimon + NúriaBel + LluísPadró 523–531 10.1162/COLI_a_00190 J14-3001 @@ -248,7 +248,7 @@ <fixed-case>S</fixed-case>quibs: On the Universal Generation Problem for Unification Grammars - JürgenWedekind + JürgenWedekind 533-538 10.1162/COLI_a_00191 J14-3002 @@ -256,10 +256,10 @@ A Random Walk–Based Model for Identifying Semantic Orientation - AhmedHassan + AhmedHassan AmjadAbu-Jbara WanchenLu - DragomirRadev + DragomirRadev 539-562 10.1162/COLI_a_00192 J14-3003 @@ -270,7 +270,7 @@ XuSun WenjieLi HoufengWang - QinLu + QinLu 563-586 10.1162/COLI_a_00193 J14-3004 @@ -308,9 +308,9 @@ Pushdown Automata in Statistical Machine Translation CyrilAllauzen BillByrne - Adriàde Gispert + Adriàde Gispert GonzaloIglesias - MichaelRiley + MichaelRiley 687-723 10.1162/COLI_a_00197 J14-3008 @@ -318,7 +318,7 @@ <fixed-case>O</fixed-case>bituary: <fixed-case>C</fixed-case>harles <fixed-case>J</fixed-case>. <fixed-case>F</fixed-case>illmore - DanJurafsky + DanJurafsky 725-731 10.1162/COLI_a_00201 J14-3009 @@ -348,7 +348,7 @@ Applications of Lexicographic Semirings to Problems in Speech and Language Processing - RichardSproat + RichardSproat MahsaYarmohammadi IzhakShafran BrianRoark @@ -359,8 +359,8 @@ Stochastic Language Generation in Dialogue using Factored Language Models - FrançoisMairesse - SteveYoung + FrançoisMairesse + SteveYoung 763-799 10.1162/COLI_a_00199 J14-4003 @@ -368,9 +368,9 @@ Latent Trees for Coreference Resolution - Eraldo RezendeFernandes - Cícero Nogueirados Santos - Ruy LuizMilidiú + Eraldo RezendeFernandes + Cícero Nogueirados Santos + Ruy LuizMilidiú 801-835 10.1162/COLI_a_00200 J14-4004 @@ -387,7 +387,7 @@ Adaptive Generation in Dialogue Systems Using Dynamic User Modeling - SrinivasanJanarthanam + SrinivasanJanarthanam OliverLemon 883-920 10.1162/COLI_a_00203 @@ -397,8 +397,8 @@ Reflections on the <fixed-case>P</fixed-case>enn <fixed-case>D</fixed-case>iscourse <fixed-case>T</fixed-case>ree<fixed-case>B</fixed-case>ank, Comparable Corpora, and Complementary Annotation RashmiPrasad - BonnieWebber - AravindJoshi + BonnieWebber + AravindJoshi 921-950 10.1162/COLI_a_00204 J14-4007 diff --git a/data/xml/J15.xml b/data/xml/J15.xml index 89d0168784..89e4b67997 100644 --- a/data/xml/J15.xml +++ b/data/xml/J15.xml @@ -17,7 +17,7 @@ Towards Topic-to-Question Generation YlliasChali - Sadid A.Hasan + Sadid A.Hasan 1-20 10.1162/COLI_a_00206 J15-1001 @@ -46,8 +46,8 @@ Concrete Models and Empirical Evaluations for the Categorical Compositional Distributional Model of Meaning - EdwardGrefenstette - MehrnooshSadrzadeh + EdwardGrefenstette + MehrnooshSadrzadeh 71-118 10.1162/COLI_a_00209 J15-1004 @@ -56,7 +56,7 @@ Automatic Adaptation of Annotations WenbinJiang - Yajuan + Yajuan LiangHuang QunLiu 119-147 @@ -74,7 +74,7 @@ Book Reviews: Linguistic Fundamentals for Natural Language Processing: 100 Essentials from Morphology and Syntax by Emily <fixed-case>M</fixed-case>. Bender - ChrisDyer + ChrisDyer 153-155 10.1162/COLI_a_00212 J15-1007 @@ -82,7 +82,7 @@ Book Reviews: Recognizing Textual Entailment: Models and Applications by <fixed-case>I</fixed-case>do <fixed-case>D</fixed-case>agan, <fixed-case>D</fixed-case>an <fixed-case>R</fixed-case>oth, Mark Sammons and Fabio Massimo Zanzotto - BernardoMagnini + BernardoMagnini 157-159 10.1162/COLI_a_00213 J15-1008 @@ -98,9 +98,9 @@ <fixed-case>S</fixed-case>quibs: When the Whole Is Not Greater Than the Combination of Its Parts: A “Decompositional” Look at Compositional Distributional Semantics - Fabio MassimoZanzotto + Fabio MassimoZanzotto LorenzoFerrone - MarcoBaroni + MarcoBaroni 165-173 10.1162/COLI_a_00215 J15-1010 @@ -109,8 +109,8 @@ <fixed-case>S</fixed-case>quibs: Spelling Error Patterns in <fixed-case>B</fixed-case>razilian <fixed-case>P</fixed-case>ortuguese Priscila A.Gimenes - Norton T.Roman - Ariadne M. B. R.Carvalho + Norton T.Roman + Ariadne M. B. R.Carvalho 175-183 10.1162/COLI_a_00216 J15-1011 @@ -135,9 +135,9 @@ The Operation Sequence <fixed-case>M</fixed-case>odel—<fixed-case>C</fixed-case>ombining N-Gram-Based and Phrase-Based Statistical Machine Translation NadirDurrani HelmutSchmid - AlexanderFraser + AlexanderFraser PhilippKoehn - HinrichSchütze + HinrichSchütze 157–186 10.1162/COLI_a_00218 J15-2001 @@ -186,7 +186,7 @@ Book Reviews: Ontology-Based Interpretation of Natural Language by Philipp Cimiano, Christina Unger and John <fixed-case>M</fixed-case>c<fixed-case>C</fixed-case>rae - ChrisBiemann + ChrisBiemann 319-322 10.1162/COLI_r_00223 J15-2006 @@ -194,7 +194,7 @@ Book Reviews: Robots that Talk and Listen edited by Judith <fixed-case>A</fixed-case>. Markowitz - MarthaEvens + MarthaEvens 323-326 10.1162/COLI_r_00224 J15-2007 @@ -218,7 +218,7 @@ Large Linguistic Corpus Reduction with <fixed-case>SCP</fixed-case> Algorithms NellyBarbot - OlivierBoëffard + OlivierBoëffard JonathanChevelu ArnaudDelhay 355-383 @@ -228,9 +228,9 @@ <fixed-case>CODRA</fixed-case>: A Novel Discriminative Framework for Rhetorical Analysis - ShafiqJoty + ShafiqJoty GiuseppeCarenini - Raymond T.Ng + Raymond T.Ng 385-435 10.1162/COLI_a_00226 J15-3002 @@ -248,7 +248,7 @@ Computational Constancy Measures of <fixed-case>T</fixed-case>exts—<fixed-case>Y</fixed-case>ule’s K and Rényi’s Entropy - KumikoTanaka-Ishii + KumikoTanaka-Ishii ShunsukeAihara 481-502 10.1162/COLI_a_00228 @@ -291,7 +291,7 @@ Graph-Based Word Alignment for Clinical Language Evaluation - EmilyPrud’hommeaux + EmilyPrud’hommeaux BrianRoark 549-578 10.1162/COLI_a_00232 @@ -335,7 +335,7 @@ Last Words: Computational Linguistics and Deep Learning - Christopher D.Manning + Christopher D.Manning 701-707 doi:10.1162/COLI_a_00239 J15-4006 @@ -351,7 +351,7 @@ <fixed-case>O</fixed-case>bituaries: <fixed-case>A</fixed-case>dam Kilgarriff - RogerEvans + RogerEvans 719-721 10.1162/COLI_a_00234 J15-4008 @@ -359,9 +359,9 @@ <fixed-case>O</fixed-case>bituaries: Jane <fixed-case>J</fixed-case>. Robinson - Barbara J.Grosz - EvaHajicova - AravindJoshi + Barbara J.Grosz + EvaHajicova + AravindJoshi 723-726 10.1162/COLI_a_00235 J15-4009 diff --git a/data/xml/J16.xml b/data/xml/J16.xml index 8e57bca125..2416fd58bb 100644 --- a/data/xml/J16.xml +++ b/data/xml/J16.xml @@ -47,7 +47,7 @@ Online Learning for Statistical Machine Translation - DanielOrtiz-Martínez + DanielOrtiz-Martínez 121-161 10.1162/COLI_a_00244 J16-1004 @@ -88,7 +88,7 @@ Word Sense Clustering and Clusterability - DianaMcCarthy + DianaMcCarthy MariannaApidianaki KatrinErk 245-275 @@ -99,7 +99,7 @@ Source Language Adaptation Approaches for Resource-Poor Machine Translation PidongWang - PreslavNakov + PreslavNakov Hwee TouNg 277-306 10.1162/COLI_a_00248 @@ -109,9 +109,9 @@ Mining Parallel Corpora from <fixed-case>S</fixed-case>ina <fixed-case>W</fixed-case>eibo and <fixed-case>T</fixed-case>witter WangLing - LuísMarujo - ChrisDyer - Alan W.Black + LuísMarujo + ChrisDyer + Alan W.Black IsabelTrancoso 307-343 10.1162/COLI_a_00249 @@ -121,7 +121,7 @@ <fixed-case>S</fixed-case>quibs: When the Whole Is Less Than the Sum of Its Parts: How Composition Affects <fixed-case>PMI</fixed-case> Values in Distributional Semantic Vectors DenisPaperno - MarcoBaroni + MarcoBaroni 345-350 10.1162/COLI_a_00250 J16-2006 @@ -129,10 +129,10 @@ <fixed-case>O</fixed-case>bituary: In Memoriam: Susan Armstrong - PierretteBouillon + PierretteBouillon PaolaMerlo - Gertjanvan Noord - MikeRosner + Gertjanvan Noord + MikeRosner 351-352 10.1162/COLI_a_00251 J16-2007 @@ -157,7 +157,7 @@ Transition-Based Parsing for Deep Dependency Structures XunZhang YantaoDu - WeiweiSun + WeiweiSun XiaojunWan 353–389 10.1162/COLI_a_00252 @@ -166,7 +166,7 @@ Towards Accurate and Efficient <fixed-case>C</fixed-case>hinese Part-of-Speech Tagging - WeiweiSun + WeiweiSun XiaojunWan 391–419 10.1162/COLI_a_00253 @@ -175,7 +175,7 @@ Parsing Linear Context-Free Rewriting Systems with Fast Matrix Multiplication - Shay B.Cohen + Shay B.Cohen DanielGildea 421–455 10.1162/COLI_a_00254 @@ -184,8 +184,8 @@ All Mixed Up? Finding the Optimal Feature Set for General Readability Prediction and Its Application to <fixed-case>E</fixed-case>nglish and <fixed-case>D</fixed-case>utch - OrphéeDe Clercq - VéroniqueHoste + OrphéeDe Clercq + VéroniqueHoste 457–490 10.1162/COLI_a_00255 J16-3004 @@ -213,7 +213,7 @@ Computational Sociolinguistics: A <fixed-case>S</fixed-case>urvey DongNguyen A. SezaDoğruöz - Carolyn P.Rosé + Carolyn P.Rosé Franciskade Jong 537–593 10.1162/COLI_a_00258 @@ -255,8 +255,8 @@ Formal Distributional Semantics: Introduction to the Special Issue - GemmaBoleda - AurélieHerbelot + GemmaBoleda + AurélieHerbelot 619–635 10.1162/COLI_a_00261 J16-4002 @@ -264,10 +264,10 @@ There Is No Logical Negation Here, But There Are Alternatives: Modeling Conversational Negation with Distributional Semantics - GermánKruszewski + GermánKruszewski DenisPaperno - RaffaellaBernardi - MarcoBaroni + RaffaellaBernardi + MarcoBaroni 637–660 10.1162/COLI_a_00262 J16-4003 @@ -286,8 +286,8 @@ Integrating Type Theory and Distributional Semantics: A Case Study on Adjective–Noun Compositions - NicholasAsher - TimVan de Cruys + NicholasAsher + TimVan de Cruys AntoineBride MártaAbrusán 703–725 @@ -297,7 +297,7 @@ Aligning Packed Dependency Trees: A Theory of Composition for Distributional Semantics - DavidWeir + DavidWeir JulieWeeds JeremyReffin ThomasKober @@ -312,7 +312,7 @@ StephenRoller PengxiangCheng KatrinErk - Raymond J.Mooney + Raymond J.Mooney 763–808 10.1162/COLI_a_00266 J16-4007 @@ -337,7 +337,7 @@ Book Reviews: Semantic Similarity from Natural Language and Ontology Analysis by Sébastien Harispe, Sylvie Ranwez, Stefan Janaqi, and Jacky Montmain - DeyiXiong + DeyiXiong 829–831 10.1162/COLI_r_00269 J16-4010 diff --git a/data/xml/J17.xml b/data/xml/J17.xml index b9e595c4bf..82cb9e36d4 100644 --- a/data/xml/J17.xml +++ b/data/xml/J17.xml @@ -38,9 +38,9 @@ Multilingual Metaphor Processing: Experiments with Semi-Supervised and Unsupervised Learning EkaterinaShutova LinSun - ElkinDarío Gutiérrez + ElkinDarío Gutiérrez PatriciaLichtenstein - SriniNarayanan + SriniNarayanan Highly frequent in language and communication, metaphor represents a significant challenge for Natural Language Processing (NLP) applications. Computational work on metaphor has traditionally evolved around the use of hand-coded knowledge, making the systems hard to scale. Recent years have witnessed a rise in statistical approaches to metaphor processing. However, these approaches often require extensive human annotation effort and are predominantly evaluated within a limited domain. In contrast, we experiment with weakly supervised and unsupervised techniques—with little or no annotation—to generalize higher-level mechanisms of metaphor from distributional properties of concepts. We investigate different levels and types of supervision (learning from linguistic examples vs. learning from a given set of metaphorical mappings vs. learning without annotation) in flat and hierarchical, unconstrained and constrained clustering settings. Our aim is to identify the optimal type of supervision for a learning algorithm that discovers patterns of metaphorical association from text. In order to investigate the scalability and adaptability of our models, we applied them to data in three languages from different language groups—English, Spanish, and Russian—achieving state-of-the-art results with little supervision. Finally, we demonstrate that statistical methods can facilitate and scale up cross-linguistic research on metaphor. 71-123 10.1162/COLI_a_00275 @@ -60,7 +60,7 @@ Hashtag Sense Clustering Based on Temporal Similarity GiovanniStilo - PaolaVelardi + PaolaVelardi Hashtags are creative labels used in micro-blogs to characterize the topic of a message/discussion. Regardless of the use for which they were originally intended, hashtags cannot be used as a means to cluster messages with similar content. First, because hashtags are created in a spontaneous and highly dynamic way by users in multiple languages, the same topic can be associated with different hashtags, and conversely, the same hashtag may refer to different topics in different time periods. Second, contrary to common words, hashtag disambiguation is complicated by the fact that no sense catalogs (e.g., Wikipedia or WordNet) are available; and, furthermore, hashtag labels are difficult to analyze, as they often consist of acronyms, concatenated words, and so forth. A common way to determine the meaning of hashtags has been to analyze their context, but, as we have just pointed out, hashtags can have multiple and variable meanings. In this article, we propose a temporal sense clustering algorithm based on the idea that semantically related hashtags have similar and synchronous usage patterns. 181-200 10.1162/COLI_a_00277 @@ -69,9 +69,9 @@ Evaluative Language Beyond Bags of Words: Linguistic Insights and Computational Applications - FarahBenamara - MaiteTaboada - YannickMathieu + FarahBenamara + MaiteTaboada + YannickMathieu The study of evaluation, affect, and subjectivity is a multidisciplinary enterprise, including sociology, psychology, economics, linguistics, and computer science. A number of excellent computational linguistics and linguistic surveys of the field exist. Most surveys, however, do not bring the two disciplines together to show how methods from linguistics can benefit computational sentiment analysis systems. In this survey, we show how incorporating linguistic insights, discourse information, and other contextual phenomena, in combination with the statistical exploitation of data, can result in an improvement over approaches that take advantage of only one of these perspectives. We first provide a comprehensive introduction to evaluative language from both a linguistic and computational perspective. We then argue that the standard computational definition of the concept of evaluative language neglects the dynamic nature of evaluation, in which the interpretation of a given evaluation depends on linguistic and extra-linguistic contextual factors. We thus propose a dynamic definition that incorporates update functions. The update functions allow for different contextual aspects to be incorporated into the calculation of sentiment for evaluative words or expressions, and can be applied at all levels of discourse. We explore each level and highlight which linguistic aspects contribute to accurate extraction of sentiment. We end the review by outlining what we believe the future directions of sentiment analysis are, and the role that discourse and contextual information need to play. 201-264 10.1162/COLI_a_00278 @@ -88,7 +88,7 @@ Book Review: Automatic Detection of Verbal Deception by Eileen Fitzpatrick, Joan Bachenko and Tommaso Fornaciari - YoongKeok Lee + YoongKeok Lee 269-271 10.1162/COLI_r_00282 J17-1008 @@ -122,9 +122,9 @@ Greedy Transition-Based Dependency Parsing with Stack <fixed-case>LSTM</fixed-case>s MiguelBallesteros - ChrisDyer + ChrisDyer YoavGoldberg - Noah A.Smith + Noah A.Smith We introduce a greedy transition-based parser that learns to represent parser states using recurrent neural networks. Our primary innovation that enables us to do this efficiently is a new control structure for sequential neural networks—the stack long short-term memory unit (LSTM). Like the conventional stack data structures used in transition-based parsers, elements can be pushed to or popped from the top of the stack in constant time, but, in addition, an LSTM maintains a continuous space embedding of the stack contents. Our model captures three facets of the parser’s state: (i) unbounded look-ahead into the buffer of incoming words, (ii) the complete history of transition actions taken by the parser, and (iii) the complete contents of the stack of partially built tree fragments, including their internal structures. In addition, we compare two different word representations: (i) standard word vectors based on look-up tables and (ii) character-based models of words. Although standard word embedding models work well in all languages, the character-based models improve the handling of out-of-vocabulary words, particularly in morphologically rich languages. Finally, we discuss the use of dynamic oracles in training the parser. During training, dynamic oracles alternate between sampling parser states from the training data and from the model as it is being learned, making the model more robust to the kinds of errors that will be made at test time. Training our model with dynamic oracles yields a linear-time greedy parser with very competitive performance. 311–347 10.1162/COLI_a_00285 @@ -135,8 +135,8 @@ Statistical Models for Unsupervised, Semi-Supervised Supervised Transliteration Mining HassanSajjad HelmutSchmid - AlexanderFraser - HinrichSchütze + AlexanderFraser + HinrichSchütze We present a generative model that efficiently mines transliteration pairs in a consistent fashion in three different settings: unsupervised, semi-supervised, and supervised transliteration mining. The model interpolates two sub-models, one for the generation of transliteration pairs and one for the generation of non-transliteration pairs (i.e., noise). The model is trained on noisy unlabeled data using the EM algorithm. During training the transliteration sub-model learns to generate transliteration pairs and the fixed non-transliteration model generates the noise pairs. After training, the unlabeled data is disambiguated based on the posterior probabilities of the two sub-models. We evaluate our transliteration mining system on data from a transliteration mining shared task and on parallel corpora. For three out of four language pairs, our system outperforms all semi-supervised and supervised systems that participated in the NEWS 2010 shared task. On word pairs extracted from parallel corpora with fewer than 2% transliteration pairs, our system achieves up to 86.7% F-measure with 77.9% precision and 97.8% recall. 349–375 10.1162/COLI_a_00286 @@ -159,7 +159,7 @@ Framing <fixed-case>QA</fixed-case> as Building and Ranking Intersentence Answer Justifications - PeterJansen + PeterJansen RebeccaSharp MihaiSurdeanu PeterClark @@ -171,7 +171,7 @@ <fixed-case>S</fixed-case>quib: Effects of Cognitive Effort on the Resolution of Overspecified Descriptions - IvandréParaboni + IvandréParaboni Alex Gwo JenLan Matheus Mendesde Sant’Ana Flávio LuizCoutinho @@ -238,7 +238,7 @@ <fixed-case>A</fixed-case>uto<fixed-case>E</fixed-case>xtend: Combining Word Embeddings with Semantic Resources SaschaRothe - HinrichSchütze + HinrichSchütze We present AutoExtend, a system that combines word embeddings with semantic resources by learning embeddings for non-word objects like synsets and entities and learning word embeddings that incorporate the semantic information from the resource. The method is based on encoding and decoding the word embeddings and is flexible in that it can take any word embeddings as input and does not need an additional training corpus. The obtained embeddings live in the same vector space as the input word embeddings. A sparse tensor formalization guarantees efficiency and parallelizability. We use WordNet, GermaNet, and Freebase as semantic resources. AutoExtend achieves state-of-the-art performance on Word-in-Context Similarity and Word Sense Disambiguation tasks. 593–617 10.1162/COLI_a_00294 @@ -281,10 +281,10 @@ Discourse Structure in Machine Translation Evaluation - ShafiqJoty - FranciscoGuzmán - LluísMàrquez - PreslavNakov + ShafiqJoty + FranciscoGuzmán + LluísMàrquez + PreslavNakov In this article, we explore the potential of using sentence-level discourse structure for machine translation evaluation. We first design discourse-aware similarity measures, which use all-subtree kernels to compare discourse parse trees in accordance with the Rhetorical Structure Theory (RST). Then, we show that a simple linear combination with these measures can help improve various existing machine translation evaluation metrics regarding correlation with human judgments both at the segment level and at the system level. This suggests that discourse information is complementary to the information used by many of the existing evaluation metrics, and thus it could be taken into account when developing richer evaluation metrics, such as the WMT-14 winning combined metric DiscoTKparty. We also provide a detailed analysis of the relevance of various discourse elements and relations from the RST parse trees for machine translation evaluation. In particular, we show that (i) all aspects of the RST tree are relevant, (ii) nuclearity is more useful than relation type, and (iii) the similarity of the translation RST tree to the reference RST tree is positively correlated with translation quality. 683–722 10.1162/COLI_a_00298 @@ -305,7 +305,7 @@ Representation of Linguistic Form and Function in Recurrent Neural Networks ÁkosKádár - GrzegorzChrupała + GrzegorzChrupała AfraAlishahi We present novel methods for analyzing the activation patterns of recurrent neural networks from a linguistic point of view and explore the types of linguistic structure they learn. As a case study, we use a standard standalone language model, and a multi-task gated recurrent network architecture consisting of two parallel pathways with shared word embeddings: The Visual pathway is trained on predicting the representations of the visual scene corresponding to an input sentence, and the Textual pathway is trained to predict the next word in the same sentence. We propose a method for estimating the amount of contribution of individual tokens in the input to the final prediction of the networks. Using this method, we show that the Visual pathway pays selective attention to lexical categories and grammatical functions that carry semantic information, and learns to treat word types differently depending on their grammatical function and their position in the sequential structure of the sentence. In contrast, the language models are comparatively more sensitive to words with a syntactic function. Further analysis of the most informative n-gram contexts for each model shows that in comparison with the Visual pathway, the language models react more strongly to abstract contexts that represent syntactic constructions. 761–780 @@ -328,13 +328,13 @@ <fixed-case>S</fixed-case>urvey: Multiword Expression Processing: A <fixed-case>S</fixed-case>urvey - MathieuConstant - GülşenEryiǧit + MathieuConstant + GülşenEryiǧit JohannaMonti - Lonnekevan der Plas + Lonnekevan der Plas CarlosRamisch - MichaelRosner - AmaliaTodirascu + MichaelRosner + AmaliaTodirascu Multiword expressions (MWEs) are a class of linguistic forms spanning conventional word boundaries that are both idiosyncratic and pervasive across different languages. The structure of linguistic processing that depends on the clear distinction between words and phrases has to be re-thought to accommodate MWEs. The issue of MWE handling is crucial for NLP applications, where it raises a number of challenges. The emergence of solutions in the absence of guiding principles motivates this survey, whose aim is not only to provide a focused review of MWE processing, but also to clarify the nature of interactions between MWE processing and downstream applications. We propose a conceptual framework within which challenges and research contributions can be positioned. It offers a shared understanding of what is meant by “MWE processing,” distinguishing the subtasks of MWE discovery and identification. It also elucidates the interactions between MWE processing and two use cases: Parsing and machine translation. Many of the approaches in the literature can be differentiated according to how MWE processing is timed with respect to underlying use cases. We discuss how such orchestration choices affect the scope of MWE-aware systems. For each of the two MWE processing subtasks and for each of the two use cases, we conclude on open issues and research perspectives. 837–892 10.1162/COLI_a_00302 @@ -356,7 +356,7 @@ KilianEvang Robvan der Goot HesselHaagsma - BarbaraPlank + BarbaraPlank MartijnWieling 897–904 10.1162/COLI_a_00304 diff --git a/data/xml/J18.xml b/data/xml/J18.xml index 4c7fc1df5c..f458bc5d5d 100644 --- a/data/xml/J18.xml +++ b/data/xml/J18.xml @@ -16,7 +16,7 @@ Smart Enough to Talk With Us? Foundations and Challenges for Dialogue Capable <fixed-case>AI</fixed-case> Systems - Barbara J.Grosz + Barbara J.Grosz 1–15 10.1162/COLI_a_00313 J18-1001 @@ -24,8 +24,8 @@ On the Derivational Entropy of Left-to-Right Probabilistic Finite-State Automata and Hidden <fixed-case>M</fixed-case>arkov Models - Joan AndreuSánchez - Martha AliciaRocha + Joan AndreuSánchez + Martha AliciaRocha VerónicaRomero MauricioVillegas Probabilistic finite-state automata are a formalism that is widely used in many problems of automatic speech recognition and natural language processing. Probabilistic finite-state automata are closely related to other finite-state models as weighted finite-state automata, word lattices, and hidden Markov models. Therefore, they share many similar properties and problems. Entropy measures of finite-state models have been investigated in the past in order to study the information capacity of these models. The derivational entropy quantifies the uncertainty that the model has about the probability distribution it represents. The derivational entropy in a finite-state automaton is computed from the probability that is accumulated in all of its individual state sequences. The computation of the entropy from a weighted finite-state automaton requires a normalized model. This article studies an efficient computation of the derivational entropy of left-to-right probabilistic finite-state automata, and it introduces an efficient algorithm for normalizing weighted finite-state automata. The efficient computation of the derivational entropy is also extended to continuous hidden Markov models. @@ -38,7 +38,7 @@ A Notion of Semantic Coherence for Underspecified Semantic Representation MehdiManshadi DanielGildea - James F.Allen + James F.Allen The general problem of finding satisfying solutions to constraint-based underspecified representations of quantifier scope is NP-complete. Existing frameworks, including Dominance Graphs, Minimal Recursion Semantics, and Hole Semantics, have struggled to balance expressivity and tractability in order to cover real natural language sentences with efficient algorithms. We address this trade-off with a general principle of coherence, which requires that every variable introduced in the domain of discourse must contribute to the overall semantics of the sentence. We show that every underspecified representation meeting this criterion can be efficiently processed, and that our set of representations subsumes all previously identified tractable sets. 39–83 10.1162/COLI_a_00307 @@ -113,7 +113,7 @@ A Dependency Perspective on <fixed-case>RST</fixed-case> Discourse Parsing and Evaluation MathieuMorey PhilippeMuller - NicholasAsher + NicholasAsher Computational text-level discourse analysis mostly happens within Rhetorical Structure Theory (RST), whose structures have classically been presented as constituency trees, and relies on data from the RST Discourse Treebank (RST-DT); as a result, the RST discourse parsing community has largely borrowed from the syntactic constituency parsing community. The standard evaluation procedure for RST discourse parsers is thus a simplified variant of PARSEVAL, and most RST discourse parsers use techniques that originated in syntactic constituency parsing. In this article, we isolate a number of conceptual and computational problems with the constituency hypothesis. We then examine the consequences, for the implementation and evaluation of RST discourse parsers, of adopting a dependency perspective on RST structures, a view advocated so far only by a few approaches to discourse parsing. While doing that, we show the importance of the notion of headedness of RST structures. We analyze RST discourse parsing as dependency parsing by adapting to RST a recent proposal in syntactic parsing that relies on head-ordered dependency trees, a representation isomorphic to headed constituency trees. We show how to convert the original trees from the RST corpus, RST-DT, and their binarized versions used by all existing RST parsers to head-ordered dependency trees. We also propose a way to convert existing simple dependency parser output to constituent trees. This allows us to evaluate and to compare approaches from both constituent-based and dependency-based perspectives in a unified framework, using constituency and dependency metrics. We thus propose an evaluation framework to compare extant approaches easily and uniformly, something the RST parsing community has lacked up to now. We can also compare parsers’ predictions to each other across frameworks. This allows us to characterize families of parsing strategies across the different frameworks, in particular with respect to the notion of headedness. Our experiments provide evidence for the conceptual similarities between dependency parsers and shift-reduce constituency parsers, and confirm that dependency parsing constitutes a viable approach to RST discourse parsing. 197–235 10.1162/COLI_a_00314 @@ -144,7 +144,7 @@ The Influence of Context on the Learning of Metrical Stress Systems Using Finite-State Machines CeskoVoeten - Mennovan Zaanen + Mennovan Zaanen Languages vary in the way stress is assigned to syllables within words. This article investigates the learnability of stress systems in a wide range of languages. The stress systems can be described using finite-state automata with symbols indicating levels of stress (primary, secondary, or no stress). Finite-state automata have been the focus of research in the area of grammatical inference for some time now. It has been shown that finite-state machines are learnable from examples using state-merging. One such approach, which aims to learn k-testable languages, has been applied to stress systems with some success. The family of k-testable languages has been shown to be efficiently learnable (in polynomial time). Here, we extend this approach to k, l-local languages by taking not only left context, but also right context, into account. We consider empirical results testing the performance of our learner using various amounts of context (corresponding to varying definitions of phonological locality). Our results show that our approach of learning stress patterns using state-merging is more reliant on left context than on right context. Additionally, some stress systems fail to be learned by our learner using either the left-context k-testable or the left-and-right-context k, l-local learning system. A more complex merging strategy, and hence grammar representation, is required for these stress systems. 329–348 10.1162/COLI_a_00317 @@ -194,7 +194,7 @@ <fixed-case>O</fixed-case>bituary: Aravind <fixed-case>K</fixed-case>. Joshi - BonnieWebber + BonnieWebber 387–392 10.1162/coli_a_00321 J18-3001 @@ -211,7 +211,7 @@ Native Language Identification With Classifier Stacking and Ensembles - ShervinMalmasi + ShervinMalmasi MarkDras Ensemble methods using multiple classifiers have proven to be among the most successful approaches for the task of Native Language Identification (NLI), achieving the current state of the art. However, a systematic examination of ensemble methods for NLI has yet to be conducted. Additionally, deeper ensemble architectures such as classifier stacking have not been closely evaluated. We present a set of experiments using three ensemble-based models, testing each with multiple configurations and algorithms. This includes a rigorous application of meta-classification models for NLI, achieving state-of-the-art results on several large data sets, evaluated in both intra-corpus and cross-corpus modes. 403–446 @@ -233,7 +233,7 @@ Using Semantics for Granularities of Tokenization MartinRiedl - ChrisBiemann + ChrisBiemann Depending on downstream applications, it is advisable to extend the notion of tokenization from low-level character-based token boundary detection to identification of meaningful and useful language units. This entails both identifying units composed of several single words that form a several single words that form a, as well as splitting single-word compounds into their meaningful parts. In this article, we introduce unsupervised and knowledge-free methods for these two tasks. The main novelty of our research is based on the fact that methods are primarily based on distributional similarity, of which we use two flavors: a sparse count-based and a dense neural-based distributional semantic model. First, we introduce DRUID, which is a method for detecting MWEs. The evaluation on MWE-annotated data sets in two languages and newly extracted evaluation data sets for 32 languages shows that DRUID compares favorably over previous methods not utilizing distributional information. Second, we present SECOS, an algorithm for decompounding close compounds. In an evaluation of four dedicated decompounding data sets across four languages and on data sets extracted from Wiktionary for 14 languages, we demonstrate the superiority of our approach over unsupervised baselines, sometimes even matching the performance of previous language-specific and supervised methods. In a final experiment, we show how both decompounding and MWE information can be used in information retrieval. Here, we obtain the best results when combining word information with MWEs and the compound parts in a bag-of-words retrieval set-up. Overall, our methodology paves the way to automatic detection of lexical units beyond standard tokenization techniques without language-specific preprocessing steps such as POS tagging. 483–524 10.1162/coli_a_00325 @@ -280,8 +280,8 @@ The Lost Combinator - MarkSteedman - + MarkSteedman + 613-629 10.1162/coli_a_00328 J18-4001 @@ -300,7 +300,7 @@ <fixed-case>S</fixed-case>quib: Reproducibility in Computational Linguistics: Are We Willing to Share? MartijnWieling JosineRawee - Gertjanvan Noord + Gertjanvan Noord This study focuses on an essential precondition for reproducibility in computational linguistics: the willingness of authors to share relevant source code and data. Ten years after Ted Pedersen’s influential “Last Words” contribution in Computational Linguistics, we investigate to what extent researchers in computational linguistics are willing and able to share their data and code. We surveyed all 395 full papers presented at the 2011 and 2016 ACL Annual Meetings, and identified whether links to data and code were provided. If working links were not provided, authors were requested to provide this information. Although data were often available, code was shared less often. When working links to code or data were not provided in the paper, authors provided the code in about one third of cases. For a selection of ten papers, we attempted to reproduce the results using the provided data and code. We were able to reproduce the results approximately for six papers. For only a single paper did we obtain the exact same results. Our findings show that even though the situation appears to have improved comparing 2016 to 2011, empiricism in computational linguistics still largely remains a matter of faith. Nevertheless, we are somewhat optimistic about the future. Ensuring reproducibility is not only important for the field as a whole, but also seems worthwhile for individual researchers: The median citation count for studies with working links to the source code is higher. 641–649 10.1162/coli_a_00330 @@ -309,7 +309,7 @@ Last Words: What Can Be Accomplished with the State of the Art in Information Extraction? A Personal View - RalphWeischedel + RalphWeischedel ElizabethBoschee Though information extraction (IE) research has more than a 25-year history, F1 scores remain low. Thus, one could question continued investment in IE research. In this article, we present three applications where information extraction of entities, relations, and/or events has been used, and note the common features that seem to have led to success. We also identify key research challenges whose solution seems essential for broader successes. Because a few practical deployments already exist and because breakthroughs on particular challenges would greatly broaden the technology’s deployment, further R and D investments are justified. 651–658 @@ -327,9 +327,9 @@ Introduction to the Special Issue on Language in Social Media: Exploiting Discourse and Other Contextual Information - FarahBenamara - DianaInkpen - MaiteTaboada + FarahBenamara + DianaInkpen + MaiteTaboada Social media content is changing the way people interact with each other and share information, personal messages, and opinions about situations, objects, and past experiences. Most social media texts are short online conversational posts or comments that do not contain enough information for natural language processing (NLP) tools, as they are often accompanied by non-linguistic contextual information, including meta-data (e.g., the user’s profile, the social network of the user, and their interactions with other users). Exploiting such different types of context and their interactions makes the automatic processing of social media texts a challenging research task. Indeed, simply applying traditional text mining tools is clearly sub-optimal, as, typically, these tools take into account neither the interactive dimension nor the particular nature of this data, which shares properties with both spoken and written language. This special issue contributes to a deeper understanding of the role of these interactions to process social media data from a new perspective in discourse interpretation. This introduction first provides the necessary background to understand what context is from both the linguistic and computational linguistic perspectives, then presents the most recent context-based approaches to NLP for social media. We conclude with an overview of the papers accepted in this special issue, highlighting what we believe are the future directions in processing social media texts. 663–681 10.1162/coli_a_00333 @@ -338,7 +338,7 @@ Interactional Stancetaking in Online Forums - Scott F.Kiesling + Scott F.Kiesling UmashanthiPavalanathan JimFitzpatrick XiaochuangHan @@ -354,7 +354,7 @@ JingLi YanSong ZhongyuWei - Kam-FaiWong + Kam-FaiWong Conventional topic models are ineffective for topic extraction from microblog messages, because the data sparseness exhibited in short messages lacking structure and contexts results in poor message-level word co-occurrence patterns. To address this issue, we organize microblog messages as conversation trees based on their reposting and replying relations, and propose an unsupervised model that jointly learns word distributions to represent: (1) different roles of conversational discourse, and (2) various latent topics in reflecting content information. By explicitly distinguishing the probabilities of messages with varying discourse roles in containing topical words, our model is able to discover clusters of discourse words that are indicative of topical content. In an automatic evaluation on large-scale microblog corpora, our joint model yields topics with better coherence scores than competitive topic models from previous studies. Qualitative analysis on model outputs indicates that our model induces meaningful representations for both discourse and topics. We further present an empirical study on microblog summarization based on the outputs of our joint model. The results show that the jointly modeled discourse and topic representations can effectively indicate summary-worthy content in microblog conversations. 719–754 10.1162/coli_a_00335 @@ -364,7 +364,7 @@ Sarcasm Analysis Using Conversation Context DebanjanGhosh - Alexander R.Fabbri + Alexander R.Fabbri SmarandaMuresan Computational models for sarcasm detection have often relied on the content of utterances in isolation. However, the speaker’s sarcastic intent is not always apparent without additional context. Focusing on social media discussions, we investigate three issues: (1) does modeling conversation context help in sarcasm detection? (2) can we identify what part of conversation context triggered the sarcastic reply? and (3) given a sarcastic post that contains multiple sentences, can we identify the specific sentence that is sarcastic? To address the first issue, we investigate several types of Long Short-Term Memory (LSTM) networks that can model both the conversation context and the current turn. We show that LSTM networks with sentence-level attention on context and current turn, as well as the conditional LSTM network, outperform the LSTM model that reads only the current turn. As conversation context, we consider the prior turn, the succeeding turn, or both. Our computational models are tested on two types of social media platforms: Twitter and discussion forums. We discuss several differences between these data sets, ranging from their size to the nature of the gold-label annotations. To address the latter two issues, we present a qualitative analysis of the attention weights produced by the LSTM models (with attention) and discuss the results compared with human performance on the two tasks. 755–792 @@ -376,7 +376,7 @@ We Usually Don’t Like Going to the Dentist: Using Common Sense to Detect Irony on <fixed-case>T</fixed-case>witter CynthiaVan Hee ElsLefever - VéroniqueHoste + VéroniqueHoste Although common sense and connotative knowledge come naturally to most people, computers still struggle to perform well on tasks for which such extratextual information is required. Automatic approaches to sentiment analysis and irony detection have revealed that the lack of such world knowledge undermines classification performance. In this article, we therefore address the challenge of modeling implicit or prototypical sentiment in the framework of automatic irony detection. Starting from manually annotated connoted situation phrases (e.g., “flight delays,” “sitting the whole day at the doctor’s office”), we defined the implicit sentiment held towards such situations automatically by using both a lexico-semantic knowledge base and a data-driven method. We further investigate how such implicit sentiment information affects irony detection by assessing a state-of-the-art irony classifier before and after it is informed with implicit sentiment information. 793–832 10.1162/coli_a_00337 @@ -395,8 +395,8 @@ Modeling Speech Acts in Asynchronous Conversations: A Neural-<fixed-case>CRF</fixed-case> Approach - ShafiqJoty - TasnimMohiuddin + ShafiqJoty + TasnimMohiuddin Participants in an asynchronous conversation (e.g., forum, e-mail) interact with each other at different times, performing certain communicative acts, called speech acts (e.g., question, request). In this article, we propose a hybrid approach to speech act recognition in asynchronous conversations. Our approach works in two main steps: a long short-term memory recurrent neural network (LSTM-RNN) first encodes each sentence separately into a task-specific distributed representation, and this is then used in a conditional random field (CRF) model to capture the conversational dependencies between sentences. The LSTM-RNN model uses pretrained word embeddings learned from a large conversational corpus and is trained to classify sentences into speech act types. The CRF model can consider arbitrary graph structures to model conversational dependencies in an asynchronous conversation. In addition, to mitigate the problem of limited annotated data in the asynchronous domains, we adapt the LSTM-RNN model to learn from synchronous conversations (e.g., meetings), using domain adversarial training of neural networks. Empirical evaluation shows the effectiveness of our approach over existing ones: (i) LSTM-RNNs provide better task-specific representations, (ii) conversational word embeddings benefit the LSTM-RNNs more than the off-the-shelf ones, (iii) adversarial training gives better domain-invariant representations, and (iv) the global CRF model improves over local models. 859–894 10.1162/coli_a_00339 diff --git a/data/xml/J19.xml b/data/xml/J19.xml index 6587711506..0bf6e1c4ed 100644 --- a/data/xml/J19.xml +++ b/data/xml/J19.xml @@ -16,7 +16,7 @@ Unsupervised Compositionality Prediction of Nominal Compounds - SilvioCordeiro + SilvioCordeiro AlineVillavicencio MarcoIdiart CarlosRamisch @@ -40,7 +40,7 @@ Parsing <fixed-case>C</fixed-case>hinese Sentences with Grammatical Relations - WeiweiSun + WeiweiSun YufeiChen XiaojunWan MeichunLiu @@ -96,7 +96,7 @@ Novel Event Detection and Classification for Historical Texts - RacheleSprugnoli + RacheleSprugnoli SaraTonelli 10.1162/coli_a_00347 Event processing is an active area of research in the Natural Language Processing community, but resources and automatic systems developed so far have mainly addressed contemporary texts. However, the recognition and elaboration of events is a crucial step when dealing with historical texts Particularly in the current era of massive digitization of historical sources: Research in this domain can lead to the development of methodologies and tools that can assist historians in enhancing their work, while having an impact also on the field of Natural Language Processing. Our work aims at shedding light on the complex concept of events when dealing with historical texts. More specifically, we introduce new annotation guidelines for event mentions and types, categorized into 22 classes. Then, we annotate a historical corpus accordingly, and compare two approaches for automatic event detection and classification following this novel scheme. We believe that this work can foster research in a field of inquiry as yet underestimated in the area of Temporal Information Processing. To this end, we release new annotation guidelines, a corpus, and new models for automatic annotation. @@ -118,7 +118,7 @@ Neural Models of Text Normalization for Speech Applications HaoZhang - RichardSproat + RichardSproat Axel H.Ng FelixStahlberg XiaochangPeng @@ -146,7 +146,7 @@ JohannesBjerva RobertÖstling Maria HanVeiga - JörgTiedemann + JörgTiedemann IsabelleAugenstein 10.1162/coli_a_00351 A neural language model trained on a text corpus can be used to induce distributed representations of words, such that similar words end up with similar representations. If the corpus is multilingual, the same model can be used to learn distributed representations of languages, such that similar languages end up with similar representations. We show that this holds even when the multilingual corpus has been translated into English, by picking up the faint signal left by the source languages. However, just as it is a thorny problem to separate semantic from syntactic similarity in word representations, it is not obvious what type of similarity is captured by language representations. We investigate correlations and causal relationships between language representations learned from translations on one hand, and genetic, geographical, and several levels of structural similarity between languages on the other. Of these, structural similarity is found to correlate most strongly with language representation similarity, whereas genetic relationships—a convenient benchmark used for evaluation in previous work—appears to be a confounding factor. Apart from implications about translation effects, we see this more generally as a case where NLP and linguistic typology can interact and benefit one another. @@ -169,8 +169,8 @@ Contextualized Translations of Phrasal Verbs with Distributional Compositional Semantics and Monolingual Corpora PabloGamallo - SusanaSotelo - José RamomPichel + SusanaSotelo + José RamomPichel MikelArtetxe 10.1162/coli_a_00353 This article describes a compositional distributional method to generate contextualized senses of words and identify their appropriate translations in the target language using monolingual corpora. Word translation is modeled in the same way as contextualization of word meaning, but in a bilingual vector space. The contextualization of meaning is carried out by means of distributional composition within a structured vector space with syntactic dependencies, and the bilingual space is created by means of transfer rules and a bilingual dictionary. A phrase in the source language, consisting of a head and a dependent, is translated into the target language by selecting both the nearest neighbor of the head given the dependent, and the nearest neighbor of the dependent given the head. This process is expanded to larger phrases by means of incremental composition. Experiments were performed on English and Spanish monolingual corpora in order to translate phrasal verbs in context. A new bilingual data set to evaluate strategies aimed at translating phrasal verbs in restricted syntactic domains has been created and released. @@ -182,8 +182,8 @@ Watset: Local-Global Graph Clustering with Applications in Sense and Frame Induction DmitryUstalov AlexanderPanchenko - ChrisBiemann - Simone PaoloPonzetto + ChrisBiemann + Simone PaoloPonzetto 10.1162/coli_a_00354 We present a detailed theoretical and computational analysis of the Watset meta-algorithm for fuzzy graph clustering, which has been found to be widely applicable in a variety of domains. This algorithm creates an intermediate representation of the input graph, which reflects the “ambiguity” of its nodes. Then, it uses hard clustering to discover clusters in this “disambiguated” intermediate graph. After outlining the approach and analyzing its computational complexity, we demonstrate that Watset shows competitive results in three applications: unsupervised synset induction from a synonymy graph, unsupervised semantic frame induction from dependency triples, and unsupervised semantic class induction from a distributional thesaurus. Our algorithm is generic and can also be applied to other networks of linguistic data. 423–479 @@ -193,7 +193,7 @@ Evaluating Computational Language Models with Scaling Properties of Natural Language ShuntaroTakahashi - KumikoTanaka-Ishii + KumikoTanaka-Ishii 10.1162/coli_a_00355 In this article, we evaluate computational models of natural language with respect to the universal statistical behaviors of natural language. Statistical mechanical analyses have revealed that natural language text is characterized by scaling properties, which quantify the global structure in the vocabulary population and the long memory of a text. We study whether five scaling properties (given by Zipf’s law, Heaps’ law, Ebeling’s method, Taylor’s law, and long-range correlation analysis) can serve for evaluation of computational models. Specifically, we test n-gram language models, a probabilistic context-free grammar, language models based on Simon/Pitman-Yor processes, neural language models, and generative adversarial networks for text generation. Our analysis reveals that language models based on recurrent neural networks with a gating mechanism (i.e., long short-term memory; a gated recurrent unit; and quasi-recurrent neural networks) are the only computational models that can reproduce the long memory behavior of natural language. Furthermore, through comparison with recently proposed model-based evaluation methods, we find that the exponent of Taylor’s law is a good indicator of model quality. 481–513 @@ -240,7 +240,7 @@ Computational Psycholinguistics - Ronald M.Kaplan + Ronald M.Kaplan 10.1162/coli_a_00359 607–626 J19-4001 @@ -250,10 +250,10 @@ Discourse in Multimedia: A Case Study in Extracting Geometry Knowledge from Textbooks MrinmayaSachan AvinavaDubey - Eduard H.Hovy - Tom M.Mitchell + Eduard H.Hovy + Tom M.Mitchell DanRoth - Eric P.Xing + Eric P.Xing 10.1162/coli_a_00360 To ensure readability, text is often written and presented with due formatting. These text formatting devices help the writer to effectively convey the narrative. At the same time, these help the readers pick up the structure of the discourse and comprehend the conveyed information. There have been a number of linguistic theories on discourse structure of text. However, these theories only consider unformatted text. Multimedia text contains rich formatting features that can be leveraged for various NLP tasks. In this article, we study some of these discourse features in multimedia text and what communicative function they fulfill in the context. As a case study, we use these features to harvest structured subject knowledge of geometry from textbooks. We conclude that the discourse and text layout features provide information that is complementary to lexical semantic information. Finally, we show that the harvested structured knowledge can be used to improve an existing solver for geometry problems, making it more accurate as well as more explainable. 627–665 @@ -262,8 +262,8 @@ Automatic Identification and Production of Related Words for Historical Linguistics - Alina MariaCiobanu - Liviu P.Dinu + Alina MariaCiobanu + Liviu P.Dinu 10.1162/coli_a_00361 Language change across space and time is one of the main concerns in historical linguistics. In this article, we develop tools to assist researchers and domain experts in the study of language evolution. First, we introduce a method to automatically determine whether two words are cognates. We propose an algorithm for extracting cognates from electronic dictionaries that contain etymological information. Having built a data set of related words, we further develop machine learning methods based on orthographic alignment for identifying cognates. We use aligned subsequences as features for classification algorithms in order to infer rules for linguistic changes undergone by words when entering new languages and to discriminate between cognates and non-cognates. Second, we extend the method to a finer-grained level, to identify the type of relationship between words. Discriminating between cognates and borrowings provides a deeper insight into the history of a language and allows a better characterization of language relatedness. We show that orthographic features have discriminative power and we analyze the underlying linguistic factors that prove relevant in the classification task. To our knowledge, this is the first attempt of this kind. Third, we develop a machine learning method for automatically producing related words. We focus on reconstructing proto-words, but we also address two related sub-problems, producing modern word forms and producing cognates. The task of reconstructing proto-words consists of recreating the words in an ancient language from its modern daughter languages. Having modern word forms in multiple Romance languages, we infer the form of their common Latin ancestors. Our approach relies on the regularities that occurred when words entered the modern languages. We leverage information from several modern languages, building an ensemble system for reconstructing proto-words. We apply our method to multiple data sets, showing that our approach improves on previous results, also having the advantage of requiring less input data, which is essential in historical linguistics, where resources are generally scarce. 667–704 @@ -295,7 +295,7 @@ Argument Mining: A Survey JohnLawrence - ChrisReed + ChrisReed 10.1162/coli_a_00364 Argument mining is the automatic identification and extraction of the structure of inference and reasoning expressed as arguments presented in natural language. Understanding argumentative structure makes it possible to determine not only what positions people are adopting, but also why they hold the opinions they do, providing valuable insights in domains as diverse as financial market prediction and public relations. This survey explores the techniques that establish the foundations for argument mining, provides a review of recent advances in argument mining techniques, and discusses the challenges faced in automatically extracting a deeper understanding of reasoning expressed in language in general. 765–818 diff --git a/data/xml/J74.xml b/data/xml/J74.xml index ff6fae6513..7764ddd604 100644 --- a/data/xml/J74.xml +++ b/data/xml/J74.xml @@ -28,15 +28,15 @@ The Lexical Subclasses of the Linguistic String Parser - EileenFitzpatrick - NaomiSager + EileenFitzpatrick + NaomiSager Microfiche 2 J74-1002 fitzpatrick-sager-1974-lexical Natural Semantics in Artificial Intelligence - Jaime R.Carbonell + Jaime R.Carbonell Allan M.Collins Microfiche 3 J74-1003 diff --git a/data/xml/J75.xml b/data/xml/J75.xml index a918c342cd..58592e11b7 100644 --- a/data/xml/J75.xml +++ b/data/xml/J75.xml @@ -22,7 +22,7 @@ Simple Digital Speech Synthesis - William M.Fisher + William M.Fisher A. MaynardEngebretson Microfiche 16 J75-1002 @@ -89,7 +89,7 @@ “Formulae” in Coherent Text: Linguistic Relevance of Symbolic Insertions 70–85 - FelixDreizin + FelixDreizin Microfiche 17 J75-2005 dreizin-1975-formulae @@ -112,7 +112,7 @@ A Case History in Computer Exploration of Fast Speech Rules - Douglas B.Moran + Douglas B.Moran Microfiche 19 J75-2008 moran-1975-case @@ -189,7 +189,7 @@ Review: <i> <fixed-case>I</fixed-case>nformal <fixed-case>S</fixed-case>peech: <fixed-case>A</fixed-case>lphabetic and <fixed-case>P</fixed-case>honemic <fixed-case>T</fixed-case>exts with <fixed-case>S</fixed-case>tatistical <fixed-case>A</fixed-case>nalyses and <fixed-case>T</fixed-case>ables</i>, by <fixed-case>E</fixed-case>dward <fixed-case>C</fixed-case>. <fixed-case>C</fixed-case>arterette and <fixed-case>M</fixed-case>argaret <fixed-case>H</fixed-case>ubbard <fixed-case>J</fixed-case>ones 78–91 - John B.Carroll + John B.Carroll Microfiche 22 J75-3003 carroll-1975-review @@ -226,10 +226,10 @@ <fixed-case>J</fixed-case>unction <fixed-case>G</fixed-case>rammar as a Base for Natural Language Processing - Eldon G.Lytel + Eldon G.Lytel DennisPackard DarylGibb - Alan K.Melby + Alan K.Melby Floyd H.Billings, Jr. Microfiche 26 J75-3008 @@ -298,7 +298,7 @@ Interpretation and Integration of Sentences into a <fixed-case>C</fixed-case>-Net 46–66 - Th. R.Hofmann + Th. R.Hofmann Microfiche 29 J75-4003 hofmann-1975-interpretation @@ -363,7 +363,7 @@ A General System for Semantic Analysis of <fixed-case>E</fixed-case>nglish and its Use in Drawing Maps from Directions 21–41 - Jerry R.Hobbs + Jerry R.Hobbs Microfiche 32 J75-4010 hobbs-1975-general @@ -379,7 +379,7 @@ Conceptual Grammar [abstract] 57 - William A.Martin + William A.Martin Microfiche 32 J75-4012 martin-1975-conceptual @@ -387,7 +387,7 @@ Semantic-Based Parsing and a Natural-Language Interface for Interactive Data Management 58–71 - John F.Burger + John F.Burger AntonioLeal ArieShoshani Microfiche 32 @@ -400,7 +400,7 @@ P.Medema W. J.Bronnenberg H. C.Bunt - S. P. J.Landsbergen + S. P. J.Landsbergen R. J. H.Scha W. J.Schoenmakers E. P. C.van Utteren @@ -443,7 +443,7 @@ Generation as Parsing from a Network into a Linear String 45–62 - Stuart C.Shapiro + Stuart C.Shapiro Microfiche 33 J75-4019 shapiro-1975-generation @@ -475,7 +475,7 @@ Syntactic Processing in the <fixed-case>BBN</fixed-case> Speech Understanding System [abstract] 4 - MadelineBates + MadelineBates Microfiche 34 J75-4023 bates-1975-syntactic @@ -492,7 +492,7 @@ A Tuneable Performance Grammar 19–33 - Jane J.Robinson + Jane J.Robinson Microfiche 34 J75-4025 robinson-1975-tuneable @@ -500,7 +500,7 @@ Semantic Processing for Speech Understanding 34–48 - Gary G.Hendrix + Gary G.Hendrix Microfiche 34 J75-4026 hendrix-1975-semantic @@ -508,7 +508,7 @@ <fixed-case>SPS</fixed-case>: A Formalism for Semantic Interpretation and its Use in Processing Prepositions that Reference Space 49–63 - Norman K.Sondheimer + Norman K.Sondheimer DoytPerry Microfiche 34 J75-4027 @@ -573,8 +573,8 @@ A Frame Analysis of <fixed-case>A</fixed-case>merican <fixed-case>S</fixed-case>ign <fixed-case>L</fixed-case>anguage 84–96 - Judy AnneKegl - NancyChinchor + Judy AnneKegl + NancyChinchor Microfiche 35 J75-4035 kegl-chinchor-1975-frame @@ -599,7 +599,7 @@ How Does a System Know When to Stop Inferencing? 26–39 - StanRosenschein + StanRosenschein Microfiche 36 J75-4038 rosenschein-1975-system @@ -622,7 +622,7 @@ A Natural Language Processing Package 52–66 DavidBrill - Beatrice T.Oshika + Beatrice T.Oshika Microfiche 36 J75-4040 brill-oshika-1975-natural @@ -639,7 +639,7 @@ Grammatical Compression in Notes and Records: Analysis and Computation 68–81 Barbara B.Anderson - NaomiSager + NaomiSager Microfiche 36 J75-4042 anderson-sager-1975-grammatical diff --git a/data/xml/J76.xml b/data/xml/J76.xml index 345299dc16..7c4be67cef 100644 --- a/data/xml/J76.xml +++ b/data/xml/J76.xml @@ -46,8 +46,8 @@ <fixed-case>PLATON</fixed-case>--A New Programming Language for Natural Language Analysis 28–53 - MakotoNagao - Jun-IchiTsujii + MakotoNagao + Jun-IchiTsujii Microfiche 37 J76-1003 nagao-tsujii-1976-platon @@ -79,15 +79,15 @@ Natural Language Understanding Systems within the <fixed-case>A</fixed-case>. <fixed-case>I</fixed-case>. Paradigm: A Survey and Some Comparisons - YorickWilks + YorickWilks Microfiche 40 J76-1007 wilks-1976-natural Analysis of <fixed-case>J</fixed-case>apanese Sentences - MakotoNagao - Jun-IchiTsujii + MakotoNagao + Jun-IchiTsujii Microfiche 41 J76-1008 nagao-tsujii-1976-analysis @@ -150,7 +150,7 @@ Syntax in Automatic Speech Understanding - MadeleineBates + MadeleineBates Microfiche 45 J76-2004 bates-1976-syntax @@ -165,7 +165,7 @@ A Survey of Syntactic Analysis Procedures for Natural Language - RalphGrishman + RalphGrishman Microfiche 47 J76-2006 grishman-1976-survey @@ -292,7 +292,7 @@ Multiple Environments Approach to Natural Language - Janusz StanisławBien + Janusz StanisławBien Microfiche 54 J76-3006 bien-1976-multiple @@ -365,7 +365,7 @@ Processing Case - YorickWilks + YorickWilks Microfiche 56 J76-4007 wilks-1976-processing diff --git a/data/xml/J77.xml b/data/xml/J77.xml index 43e6b65444..68b4d6d23b 100644 --- a/data/xml/J77.xml +++ b/data/xml/J77.xml @@ -59,9 +59,9 @@ Pitch Contour Generation in Speech Synthesis: A <fixed-case>J</fixed-case>unction <fixed-case>G</fixed-case>rammar Approach - Alan K.Melby + Alan K.Melby William J.Strong - Eldon G.Lytle + Eldon G.Lytle RonaldMillett Microfiche 60 J77-1004 @@ -97,8 +97,8 @@ Computation of a Subclass of Inferences: Presupposition and Entailment - Aravind K.Joshi - RalphWeischedel + Aravind K.Joshi + RalphWeischedel 1–54 Microfiche 63 J77-1008 @@ -215,7 +215,7 @@ A Goal Oriented Model of Human Dialogue James A.Moore James A.Levin - William C.Mann + William C.Mann Microfiche 67 J77-3002 moore-etal-1977-goal @@ -283,7 +283,7 @@ Spatial Reference and Semantic Nets - Norman K.Sondheimer + Norman K.Sondheimer 1–67 Microfiche 71 J77-4003 diff --git a/data/xml/J78.xml b/data/xml/J78.xml index e0f49eff23..8547a67df4 100644 --- a/data/xml/J78.xml +++ b/data/xml/J78.xml @@ -39,7 +39,7 @@ Two Papers on Semantic Interpretation in <fixed-case>M</fixed-case>ontague Grammar JoyceFriedman - Douglas B.Moran + Douglas B.Moran David S.Warren Microfiche 74 J78-1002 @@ -68,7 +68,7 @@ The Derivation of Answers from Logical Forms in a Question Answering System 3–42 - Fred J.Damerau + Fred J.Damerau Microfiche 75 J78-2002 damerau-1978-derivation @@ -85,7 +85,7 @@ Computation in Departments of Linguistics 62–68 - RichardFritzson + RichardFritzson Microfiche 75 J78-2004 fritzson-1978-computation @@ -203,7 +203,7 @@ Language Representation: Papers presented in two sessions of <fixed-case>TINLAP</fixed-case>-2 - David L.Waltz + David L.Waltz 1–2 Microfiche 78 J78-3006 @@ -220,7 +220,7 @@ What Makes Something “Ad Hoc” 10–15 - Roger C.Schank + Roger C.Schank Microfiche 78 J78-3008 schank-1978-makes @@ -244,7 +244,7 @@ Taxonomic Lattice Structures for Situation Recognition 35–43 - William A.Woods + William A.Woods Microfiche 78 J78-3011 woods-1978-taxonomic @@ -252,7 +252,7 @@ Description Formation and Discourse Model Synthesis 44–52 - Bonnie LynnWebber + Bonnie LynnWebber Microfiche 78 J78-3012 webber-1978-description @@ -277,7 +277,7 @@ Subsequent Reference: Syntactic and Rhetorical Considerations 66–74 - David D.McDonald + David D.McDonald Microfiche 78 J78-3015 mcdonald-1978-subsequent @@ -293,7 +293,7 @@ Bound Variables and Other Anaphors 81–87 - Barbara H.Partee + Barbara H.Partee Microfiche 78 J78-3017 partee-1978-bound @@ -301,14 +301,14 @@ The Use of Focus as a Tool for Disambiguation of Definite Noun Phrases 88–97 - Candace L.Sidner + Candace L.Sidner Microfiche 78 J78-3018 sidner-1978-use Language Representation: Papers presented in two sessions of <fixed-case>TINLAP</fixed-case>-2 - David L.Waltz + David L.Waltz 1–2 Microfiche 79 J78-3019 @@ -317,7 +317,7 @@ Focusing in Dialog 3–10 - Barbara J.Grosz + Barbara J.Grosz Microfiche 79 J78-3020 grosz-1978-focusing @@ -349,9 +349,9 @@ Speech Acts as a Basis for Understanding Dialogue Coherence 32–39 - C. RaymondPerrault - James F.Allen - Philip R.Cohen + C. RaymondPerrault + James F.Allen + Philip R.Cohen Microfiche 79 J78-3024 perrault-etal-1978-speech @@ -367,7 +367,7 @@ Intentlonallty and Human Conversations 48–55 - Jaime G.Carbonell Jr + Jaime G.Carbonell Jr Microfiche 79 J78-3026 carbonell-jr-1978-intentlonallty @@ -375,7 +375,7 @@ On the Interdependence of Language and Perception 56–63 - David L.Waltz + David L.Waltz Microfiche 79 J78-3027 waltz-1978-interdependence @@ -416,14 +416,14 @@ Semantic Primitives in Language and Vision 87–90 - YorickWilks + YorickWilks Microfiche 79 J78-3032 wilks-1978-semantic Inference and Theory: Papers presented in two sessions of <fixed-case>TINLAP</fixed-case>-2 - David L.Waltz + David L.Waltz 1–2 Microfiche 80 J78-3033 @@ -432,7 +432,7 @@ A Note on Partial Match of Descriptions. Can One Simultaneously Question (Retrieve) and Inform (Update)? 3–5 - Aravind K.Joshi + Aravind K.Joshi Microfiche 80 J78-3034 joshi-1978-note @@ -472,7 +472,7 @@ Path-Based and Node-Based Inference in Semantic Networks 38–44 - Stuart C.Shapiro + Stuart C.Shapiro Microfiche 80 J78-3039 shapiro-1978-path @@ -498,7 +498,7 @@ A Computational Account of Some Constraints on Language 55–65 - MitchellMarcus + MitchellMarcus Microfiche 80 J78-3042 marcus-1978-computational @@ -506,7 +506,7 @@ Remarks on Processing, Constraints, and the Lexicon 66–70 - ThomasWasow + ThomasWasow Microfiche 80 J78-3043 wasow-1978-remarks @@ -544,8 +544,8 @@ Properties of Lexical Relations [Appendix <fixed-case>II</fixed-case> of “A Lexicon for a Computer Question-Answering System,” <fixed-case>AJCL</fixed-case> Microfiche 83] 16–24 - Martha W.Evens - Raoul N.Smith + Martha W.Evens + Raoul N.Smith Microfiche 81 J78-4002 evens-smith-1978-properties @@ -580,8 +580,8 @@ A Lexicon for a Computer Question-Answering System - Martha WEvens - Raoul NSmith + Martha WEvens + Raoul NSmith Microfiche 83 J78-4006 evens-smith-1978-lexicon diff --git a/data/xml/J80.xml b/data/xml/J80.xml index 25e55dd5c9..0372d35075 100644 --- a/data/xml/J80.xml +++ b/data/xml/J80.xml @@ -15,14 +15,14 @@ Cascaded <fixed-case>ATN</fixed-case> Grammars - William A.Woods + William A.Woods 1-12 J80-1001 woods-1980-cascaded An Integrated Understander - Roger C.Schank + Roger C.Schank MichaelLebowitz LawrenceBirnbaum 13-30 @@ -31,7 +31,7 @@ Slot Grammars - Michael C.McCord + Michael C.McCord 31-42 J80-1003 mccord-1980-slot @@ -94,8 +94,8 @@ Toward Natural Language Computation <fixed-case>I</fixed-case> - Alan W.Biermann - Bruce W.Ballard + Alan W.Biermann + Bruce W.Ballard 71-86 J80-2001 biermann-ballard-1980-toward @@ -109,7 +109,7 @@ Responding Intelligently to Unparsable Inputs - Ralph M.Weischedel + Ralph M.Weischedel John E.Black 97-109 J80-2003 @@ -128,7 +128,7 @@ Meaning and Discourse - A Computer Model of Psychoanalytic Speech and Cognition - John HenryClippinger, Jr. + John HenryClippinger, Jr. J80-2006 clippinger-jr-1980-meaning @@ -192,7 +192,7 @@ A Plan-Based Analysis of Indirect Speech Act - C. RaymondPerrault + C. RaymondPerrault 167-182 J80-3003 perrault-1980-plan diff --git a/data/xml/J81.xml b/data/xml/J81.xml index 2b86bd89c7..ee25b477fe 100644 --- a/data/xml/J81.xml +++ b/data/xml/J81.xml @@ -22,7 +22,7 @@ Computer Generation of Multiparagraph <fixed-case>E</fixed-case>nglish Text - William C.Mann + William C.Mann James A.Moore 17-29 J81-1002 @@ -30,7 +30,7 @@ Operating Statistics for the Transformational Question Answering System - Fred J.Damerau + Fred J.Damerau 30-42 J81-1003 damerau-1981-operating @@ -88,8 +88,8 @@ Relaxation Techniques for Parsing Grammatically Ill-Formed Input in Natural Language Understanding Systems - Stan C.Kwasny - Norman K.Sondheimer + Stan C.Kwasny + Norman K.Sondheimer 99-108 J81-2002 kwasny-sondheimer-1981-relaxation @@ -137,7 +137,7 @@ Formal Roles, Co-Descriptors, and the Representation of Quantified <fixed-case>E</fixed-case>nglish Expressions - William A.Martin + William A.Martin 137-148 J81-3001 martin-1981-formal @@ -152,7 +152,7 @@ Prospects for Computer-Assisted Dialect Adaptation David J.Weber - William C.Mann + William C.Mann 165-177 J81-3003 weber-mann-1981-prospects @@ -165,7 +165,7 @@ New <fixed-case>G</fixed-case>uinea and Neighboring Areas: A Sociolinguistic Laboratory - KarenJensen + KarenJensen J81-3005 jensen-1981-new @@ -215,7 +215,7 @@ Focusing for Interpretation of Pronouns - Candace L.Sidner + Candace L.Sidner 217-231 J81-4001 sidner-1981-focusing diff --git a/data/xml/J82.xml b/data/xml/J82.xml index aec3798aa4..696fb6e738 100644 --- a/data/xml/J82.xml +++ b/data/xml/J82.xml @@ -15,7 +15,7 @@ Phrase Structure Trees Bear More Fruit than You Would Have Thought - Aravind K.Joshi + Aravind K.Joshi Leon S.Levy 1-11 J82-1001 @@ -23,14 +23,14 @@ Generalized <fixed-case>A</fixed-case>ugmented <fixed-case>T</fixed-case>ransition <fixed-case>N</fixed-case>etwork <fixed-case>G</fixed-case>rammars for Generation from Semantic Networks - Stuart C.Shapiro + Stuart C.Shapiro 12-26 J82-1002 shapiro-1982-generalized From <fixed-case>E</fixed-case>nglish to Logic: Context-Free Computation of ‘Conventional’ Logical Translation - Lenhart K.Schubert + Lenhart K.Schubert Francis JeffryPelletier 27-44 J82-1003 @@ -79,28 +79,28 @@ Applied Computational Linguistics in Perspective: Proceedings of the Workshop CarrollJohnson - JoanBachenko + JoanBachenko 55-84 J82-2001 johnson-bachenko-1982-applied Natural-Language Interface - Gary G.Hendrix + Gary G.Hendrix 56-61 J82-2002 hendrix-1982-natural Text Generation - WilliamMann + WilliamMann 62-69 J82-2003 mann-1982-text Concept Extraction - ChristineMontgomery + ChristineMontgomery 70-73 J82-2004 montgomery-1982-concept @@ -114,7 +114,7 @@ Sublanguages - RichardKittredge + RichardKittredge 79-84 J82-2006 kittredge-1982-sublanguages @@ -170,15 +170,15 @@ Computational Complexity and <fixed-case>L</fixed-case>exical-<fixed-case>F</fixed-case>unctional <fixed-case>G</fixed-case>rammar - Robert C.Berwick + Robert C.Berwick 97-109 J82-3001 berwick-1982-computational An Efficient Easily Adaptable System for Interpreting Natural Language Queries - David H.D.Warren - Fernando C.N.Pereira + David H.D.Warren + Fernando C.N.Pereira 110-122 J82-3002 warren-pereira-1982-efficient @@ -193,7 +193,7 @@ Coping with Syntactic Ambiguity or How to Put the Block in the Box on the Table - KennethChurch + KennethChurch RameshPatil 139-149 J82-3004 diff --git a/data/xml/J83.xml b/data/xml/J83.xml index 8ab53605b5..d9c3c99b58 100644 --- a/data/xml/J83.xml +++ b/data/xml/J83.xml @@ -15,7 +15,7 @@ Paraphrasing Questions Using Given and new information - Kathleen R.McKeown + Kathleen R.McKeown 1-10 J83-1001 mckeown-1983-paraphrasing @@ -30,7 +30,7 @@ Questioning the Need for Parsing Ill-formed Inputs - LindaFineman + LindaFineman 22-23 J83-1003 fineman-1983-questioning @@ -99,14 +99,14 @@ Treating Coordination in Logic Grammars VeronicaDahl - Michael C.McCord + Michael C.McCord 69-91 J83-2002 dahl-mccord-1983-treating Letters to the Editor: Re <fixed-case>B</fixed-case>allard on the Need for Careful Description - KarenSparck Jones + KarenSparck Jones J83-2003 sparck-jones-1983-letters @@ -117,7 +117,7 @@ Directory of Graduate Programs in Computational Linguistics - MarthaEvens + MarthaEvens LauriKarttunen J83-2005 evens-karttunen-1983-directory @@ -158,7 +158,7 @@ Recovery Strategies for Parsing Extragrammatical Language - Jaime G.Carbonell + Jaime G.Carbonell Philip J.Hayes 123-146 J83-3001 @@ -176,8 +176,8 @@ Meta-rules as a Basis for Processing Ill-Formed input - Ralph M.Weischedel - Norman K.Sondheimer + Ralph M.Weischedel + Norman K.Sondheimer 161-177 J83-3003 weischedel-sondheimer-1983-meta @@ -185,7 +185,7 @@ Preference Semantics, Ill-Formedness, and Metaphor DanFass - YorickWilks + YorickWilks 178-187 J83-3004 fass-wilks-1983-preference @@ -199,7 +199,7 @@ Re <fixed-case>S</fixed-case>parck <fixed-case>J</fixed-case>ones Re <fixed-case>B</fixed-case>allard on the Need for Careful Description - BruceBallard + BruceBallard 197-198 J83-3006 ballard-1983-sparck diff --git a/data/xml/J84.xml b/data/xml/J84.xml index 87cef42a0f..f2a00daa54 100644 --- a/data/xml/J84.xml +++ b/data/xml/J84.xml @@ -29,7 +29,7 @@ Book Review: Principles of Computer Speech - JohnThomas + JohnThomas J84-1003 thomas-1984-book @@ -93,7 +93,7 @@ A Phrase-Structured Grammatical Framework for Transportable Natural Language Processing - Bruce W.Ballard + Bruce W.Ballard Nancy L.Tinkham 81-96 J84-2001 @@ -101,14 +101,14 @@ The Pragmatics of Referring and the Modality of Communication - Philip R.Cohen + Philip R.Cohen 97-146 J84-2002 cohen-1984-pragmatics On the <fixed-case>F</fixed-case>ass and <fixed-case>W</fixed-case>ilks Proposal to Use “Polysemy Rules” - David M.Carter + David M.Carter 147-148 J84-2003 carter-1984-fass @@ -169,7 +169,7 @@ On the Mathematical Properties of Linguistic Theories - C. RaymondPerrault + C. RaymondPerrault 165-176 J84-3001 perrault-1984-mathematical @@ -177,21 +177,21 @@ <fixed-case>E</fixed-case>nglish and the Class of Context-Free Languages Paul M.Postal - D. TerenceLangendoen + D. TerenceLangendoen 177-181 J84-3002 postal-langendoen-1984-english On Two Recent Attempts to Show that <fixed-case>E</fixed-case>nglish Is Not a <fixed-case>CFL</fixed-case> - Geoffrey K.Pullum + Geoffrey K.Pullum 182-186 J84-3003 pullum-1984-two Comments on <fixed-case>P</fixed-case>ullum’s Criticisms - D. TerenceLangendoen + D. TerenceLangendoen Paul M.Postal 186-188 J84-3004 @@ -199,14 +199,14 @@ Strong Generative Capacity, Weak Generative Capacity, and Modern Linguistic Theories - Robert C.Berwick + Robert C.Berwick 189-202 J84-3005 berwick-1984-strong Book Review: A Grammar of <fixed-case>E</fixed-case>nglish on Mathematical Principles - Bruce E.Nevin + Bruce E.Nevin J84-3006 nevin-1984-book diff --git a/data/xml/J85.xml b/data/xml/J85.xml index be47323488..9d0fd024b3 100644 --- a/data/xml/J85.xml +++ b/data/xml/J85.xml @@ -22,22 +22,22 @@ Taum-Aviation: Its Technical Features and Some Experimental Results PierreIsabelle - LaurentBourbeau + LaurentBourbeau 18-27 J85-1002 isabelle-bourbeau-1985-taum Automated Translation at Grenoble University - BernardVauquois - ChristianBoitet + BernardVauquois + ChristianBoitet 28-36 J85-1003 vauquois-boitet-1985-automated Book Review: THE LOGIC OF MIND - DavidIsrael + DavidIsrael J85-1004 israel-1985-book @@ -96,9 +96,9 @@ The <fixed-case>J</fixed-case>apanese Government Project for Machine Translation - MakotoNagao - Jun-ichiTsujii - Jun-ichiNakamura + MakotoNagao + Jun-ichiTsujii + Jun-ichiNakamura 91-110 J85-2001 nagao-etal-1985-japanese @@ -133,7 +133,7 @@ <fixed-case>EUROTRA</fixed-case>: A Multilingual System under Development RodJohnson MaghiKing - Louisdes Tombe + Louisdes Tombe 155-169 J85-2005 johnson-etal-1985-eurotra @@ -206,14 +206,14 @@ On the Complexity of <fixed-case>ID</fixed-case>/<fixed-case>LP</fixed-case> Parsing - G. EdwardBarton, Jr. + G. EdwardBarton, Jr. 205-218 J85-4001 barton-jr-1985-complexity <fixed-case>PHRED</fixed-case>: A Generator for Natural Language Interfaces - Paul S.Jacobs + Paul S.Jacobs 219-242 J85-4002 jacobs-1985-phred @@ -233,7 +233,7 @@ Information Retrieval Experiment - MarthaEvens + MarthaEvens J85-4005 evens-1985-information diff --git a/data/xml/J86.xml b/data/xml/J86.xml index e4ead36f6a..c45d42657d 100644 --- a/data/xml/J86.xml +++ b/data/xml/J86.xml @@ -14,15 +14,15 @@ Resolving Lexical Ambiguity in a Deterministic Parser - RobertMilne + RobertMilne 1-12 J86-1001 milne-1986-resolving The Correction of Ill-Formed Input Using History-Based Expectation with Applications to Speech Understanding - Pamela E.Fink - Alan W.Biermann + Pamela E.Fink + Alan W.Biermann 13-36 J86-1002 fink-biermann-1986-correction @@ -36,13 +36,13 @@ Book Reviews: Surface Compositional Grammar - AlainPolguère + AlainPolguère J86-1004 polguere-1986-book Book Reviews: Talking Minds - HelenGigley + HelenGigley J86-1005 gigley-1986-book @@ -54,7 +54,7 @@ The <fixed-case>F</fixed-case>inite <fixed-case>S</fixed-case>tring Newsletter - BernardVauquois + BernardVauquois J86-1007 vauquois-1986-finite @@ -95,7 +95,7 @@ Three Titles from the <fixed-case>C</fixed-case>ambridge Series: <fixed-case>S</fixed-case>TUDIES IN <fixed-case>N</fixed-case>ATURAL <fixed-case>L</fixed-case>ANGUAGE <fixed-case>P</fixed-case>ROCESSING - AravindJoshi + AravindJoshi J86-1015 joshi-1986-three @@ -121,9 +121,9 @@ Summarizing Natural Language Database Responses - Jugal K.Kalita + Jugal K.Kalita Marlene L.Jones - Gordon I.McCalla + Gordon I.McCalla 107-124 J86-2002 kalita-etal-1986-summarizing @@ -143,7 +143,7 @@ Book Reviews: Communicating with Databases in Natural Language - StanKwasny + StanKwasny J86-2005 kwasny-1986-book @@ -208,17 +208,17 @@ Attention, Intentions, and the Structure of Discourse - Barbara J.Grosz - Candace L.Sidner + Barbara J.Grosz + Candace L.Sidner 175-204 J86-3001 grosz-sidner-1986-attention Discovery Procedures for Sublanguage Selectional Patterns: Initial Experiments - RalphGrishman - LynetteHirschman - Ngo ThanhNhan + RalphGrishman + LynetteHirschman + Ngo ThanhNhan 205-215 J86-3002 grishman-etal-1986-discovery @@ -243,7 +243,7 @@ The <fixed-case>F</fixed-case>inite <fixed-case>S</fixed-case>tring Newsletter: Site Report: Another From the <fixed-case>DARPA</fixed-case> Series, Overview of the <fixed-case>TACITUS</fixed-case> Project - Jerry R.Hobbs + Jerry R.Hobbs J86-3006 hobbs-1986-finite @@ -287,8 +287,8 @@ Associative Model of Morphological Analysis: An Empirical Inquiry - HarriJäppinen - MattiYlilammi + HarriJäppinen + MattiYlilammi 257-272 J86-4001 jappinen-ylilammi-1986-associative @@ -309,13 +309,13 @@ Book Reviews: Natural Language Computing: The Commercial Applications - MarkJones + MarkJones J86-4004 jones-1986-book <fixed-case>B</fixed-case>oolean Semantics for Natural Language - LawrenceMoss + LawrenceMoss J86-4005 moss-1986-boolean diff --git a/data/xml/J87.xml b/data/xml/J87.xml index 79ffded0cf..ec027cc1b9 100644 --- a/data/xml/J87.xml +++ b/data/xml/J87.xml @@ -13,7 +13,7 @@ Restricting Logic Grammars with Government-Binding Theory - Edward P.Stabler, Jr. + Edward P.Stabler, Jr. 1-10 J87-1001 stabler-jr-1987-restricting @@ -27,8 +27,8 @@ Simultaneous-Distributive Coordination and Context-Freeness - Michael B.Kac - AlexisManaster-Ramer + Michael B.Kac + AlexisManaster-Ramer William C.Rounds 25-30 J87-1003 @@ -36,30 +36,30 @@ An Efficient Augmented-Context-Free Parsing Algorithm - MasaruTomita + MasaruTomita 31-46 J87-1004 tomita-1987-efficient An Algorithm for Generating Quantifier Scopings - Jerry R.Hobbs - Stuart M.Shieber + Jerry R.Hobbs + Stuart M.Shieber 47-63 J87-1005 hobbs-shieber-1987-algorithm Subject-Verb Agreement in Respective Coordinations and Context Freeness - AlexisManaster-Ramer + AlexisManaster-Ramer 64-65 J87-1006 manaster-ramer-1987-subject A Note on a Study of Cases - KarenSparck Jones - BranimirBoguraev + KarenSparck Jones + BranimirBoguraev 65-68 J87-1007 sparck-jones-boguraev-1987-note @@ -84,13 +84,13 @@ Book Reviews: Electronic Synthesis of Speech - William M.Fisher + William M.Fisher J87-1011 fisher-1987-book Book Reviews: Readings in Knowledge Representation - HelenGigley + HelenGigley J87-1012 gigley-1987-book @@ -102,7 +102,7 @@ Book Reviews: Planning and Understanding: A Computational Approach to Human Reasoning - HaroldSomers + HaroldSomers J87-1014 somers-1987-book @@ -171,15 +171,15 @@ Processing Dictionary Definitions with Phrasal Pattern Hierarchies - HiyanAlshawi + HiyanAlshawi 195-202 J87-3001 alshawi-1987-processing Large Lexicons for Natural Language Processing: Utilising the Grammar Coding System of <fixed-case>LDOCE</fixed-case> - BranBoguraev - TedBriscoe + BranBoguraev + TedBriscoe 203-218 J87-3002 boguraev-briscoe-1987-large @@ -187,10 +187,10 @@ Tools and Methods for Computational Linguistics Roy J.Byrd - NicolettaCalzolari - Martin S.Chodorow - Judith L.Klavans - Mary S.Neff + NicolettaCalzolari + Martin S.Chodorow + Judith L.Klavans + Mary S.Neff Omneya A.Rizk 219-240 J87-3003 @@ -198,7 +198,7 @@ Commonsense Metaphysics and Lexical Semantics - Jerry R.Hobbs + Jerry R.Hobbs WilliamCroft ToddDavies DouglasEdwards @@ -209,7 +209,7 @@ Disambiguating Prepositional Phrase Attachments by Using On-Line Dictionary Definitions - KarenJensen + KarenJensen Jean-LouisBinot 251-260 J87-3005 @@ -217,15 +217,15 @@ A Formal Lexicon in Meaning-Text Theory (Or How to Do Lexica with Words) - IgorMel’čuk - AlainPolguere + IgorMel’čuk + AlainPolguere 261-275 J87-3006 melcuk-polguere-1987-formal The Subworld Concept Lexicon and the Lexicon Management System - SergeiNirenburg + SergeiNirenburg VictorRaskin 276-289 J87-3007 @@ -233,10 +233,10 @@ A Computational Framework for Lexical Description - Graeme D.Ritchie - Stephen G.Pulman - Alan W.Black - Graham J.Russell + Graeme D.Ritchie + Stephen G.Pulman + Alan W.Black + Graham J.Russell 290-307 J87-3008 ritchie-etal-1987-computational @@ -244,7 +244,7 @@ The Self-Extending Phrasal Lexicon UriZernik - Michael G.Dyer + Michael G.Dyer 308-327 J87-3009 zernik-dyer-1987-self diff --git a/data/xml/J88.xml b/data/xml/J88.xml index 53bad4fbd1..736a1d5952 100644 --- a/data/xml/J88.xml +++ b/data/xml/J88.xml @@ -15,7 +15,7 @@ Category Structures GeraldGazdar - Geoffrey K.Pullum + Geoffrey K.Pullum RobertCarpenter EwanKlein Thomas E.Hukari @@ -59,7 +59,7 @@ Book Reviews: Computer Speech Processing - John C.Thomas + John C.Thomas J88-1007 thomas-1988-book @@ -71,7 +71,7 @@ Book Reviews: Machine Translation: Theoretical and Methodological Issues - HaroldSomers + HaroldSomers J88-1009 somers-1988-book @@ -115,14 +115,14 @@ Foreword to Special Issue on Tense and Aspect - Bonnie LynnWebber + Bonnie LynnWebber 1-2 J88-2001 webber-1988-foreword Tense, Quantifiers, and Contexts - Erhard W.Hinrichs + Erhard W.Hinrichs 3-14 J88-2002 hinrichs-1988-tense @@ -130,7 +130,7 @@ Temporal Ontology and Temporal Reference MarcMoens - MarkSteedman + MarkSteedman 15-28 J88-2003 moens-steedman-1988-temporal @@ -144,14 +144,14 @@ A Computational Model of the Semantics of Tense and Aspect - Rebecca J.Passonneau + Rebecca J.Passonneau 44-60 J88-2005 passonneau-1988-computational Tense as Discourse Anaphor - Bonnie LynnWebber + Bonnie LynnWebber 61-73 J88-2006 webber-1988-tense @@ -176,13 +176,13 @@ Book Reviews: <fixed-case>J</fixed-case>apanese Phrase Structure Grammar: A Unification-Based Approach - PeteWhitelock + PeteWhitelock J88-2010 whitelock-1988-book Book Reviews: <fixed-case>P</fixed-case>rolog and Natural-Language Analysis - PatrickSaint-Dizier + PatrickSaint-Dizier J88-2011 saint-dizier-1988-book @@ -234,23 +234,23 @@ Modeling the User in Natural Language Systems RobertKass - TimFinin + TimFinin 5-22 J88-3002 kass-finin-1988-modeling Modeling the User’s Plans and Goals - SandraCarberry + SandraCarberry + MargotFlowers 23-37 J88-3003 - MargotFlowers carberry-flowers-1988-modeling Recognizing and Responding to Plan-Oriented Misconceptions AlexQuilici - Michael G.Dyer + Michael G.Dyer MargotFlowers 38-51 J88-3004 @@ -258,14 +258,14 @@ Reasoning on a Highlighted User Model to Respond to Misconceptions - Kathleen F.McCoy + Kathleen F.McCoy 52-63 J88-3005 mccoy-1988-reasoning Tailoring Object Descriptions to a User’s Level of Expertise - Cecile L.Paris + Cecile L.Paris 64-78 J88-3006 paris-1988-tailoring @@ -307,13 +307,13 @@ User Models, Discourse Models, and Some Others - KarenSparck Jones + KarenSparck Jones J88-3013 sparck-jones-1988-user Distinguishing User Models From Discourse Models - WolfgangWahlster + WolfgangWahlster J88-3014 wahlster-1988-distinguishing @@ -331,25 +331,25 @@ Book Reviews: Natural Language Generation, New Results in Artificial Intelligence, Psychology, and Linguistics - MarieBienkowski + MarieBienkowski J88-3017 bienkowski-1988-book Book Reviews: The Linguistic Basis of Text Generation - KathleenMcCoy + KathleenMcCoy J88-3018 mccoy-1988-book Book Reviews: Cognitive Science: An Introduction - Helen M.Gigley + Helen M.Gigley J88-3019 gigley-1988-book Book Reviews: Machine Translation: Past, Present, Future - RichardKittredge + RichardKittredge J88-3020 kittredge-1988-book @@ -388,7 +388,7 @@ RobertWilensky David N.Chin MarcLuria - JamesMartin + JamesMartin JamesMayfield DekaiWu J88-4003 @@ -408,7 +408,7 @@ Book Reviews: Language and Information - BruceNevin + BruceNevin J88-4006 nevin-1988-book @@ -420,31 +420,31 @@ Book Reviews: Semantic Interpretation and the Resolution of Ambiguity - KarenSparck Jones + KarenSparck Jones J88-4008 sparck-jones-1988-book Book Reviews: The Fifth Generation Fallacy: Why <fixed-case>J</fixed-case>apan is Betting Its Future on Artificial Intelligence - HaroldSomers + HaroldSomers J88-4009 somers-1988-book-reviews Book Reviews: Natural Language Understanding - MichaelKac + MichaelKac J88-4010 kac-1988-book Book Reviews: A Natural Language Interface for Computer-Aided Design - BruceBallard + BruceBallard J88-4011 ballard-1988-book Book Reviews: The Formal Complexity of Natural Language - AlexisManaster-Ramer + AlexisManaster-Ramer J88-4012 manaster-ramer-1988-book diff --git a/data/xml/J89.xml b/data/xml/J89.xml index a7170d53cd..45af0de917 100644 --- a/data/xml/J89.xml +++ b/data/xml/J89.xml @@ -21,7 +21,7 @@ Syntactic Graphs: A Representation for the Union of All Ambiguous Parse Trees - JungyunSeo + JungyunSeo Robert F.Simmons 19-32 J89-1002 @@ -29,14 +29,14 @@ Design of <fixed-case>LMT</fixed-case>: A <fixed-case>P</fixed-case>rolog-Based Machine Translation System - Michael C.McCord + Michael C.McCord 33-52 J89-1003 mccord-1989-design Book Reviews: An Artificial Intelligence Approach to Legal Reasoning - MarthaEvens + MarthaEvens J89-1004 evens-1989-book @@ -60,7 +60,7 @@ Book Reviews: Computer Interpretation of Natural Language Descriptions - Deborah A.Dahl + Deborah A.Dahl J89-1008 dahl-1989-book @@ -109,14 +109,14 @@ A Pragmatic-Based Approach to Ellipsis Resolution - SandraCarberry + SandraCarberry 75-96 J89-2001 carberry-1989-pragmatic Parsing with a Small Dictionary for Applications such as Text to Speech - Douglas D.O’Shaughnessy + Douglas D.O’Shaughnessy 97-108 J89-2002 oshaughnessy-1989-parsing @@ -137,7 +137,7 @@ Book Reviews: The Case for Lexicase: An Outline of Lexicase Grammatical Theory - NormanFraser + NormanFraser J89-2005 fraser-1989-book @@ -155,31 +155,31 @@ Book Reviews: Machine Translation Today: The State of the Ar - JohnHutchins + JohnHutchins J89-2008 hutchins-1989-book Book Reviews: Advances in Natural Language Generation: An Interdisciplinary Perspective - MarieMeteer + MarieMeteer J89-2009 meteer-1989-book Book Reviews: Natural Language Parsing Systems - PetrSgall + PetrSgall J89-2010 sgall-1989-book Book Reviews: Philosophy, Language, and Artificial Intelligence: Resources for Processing Natural Language - PeterLudlow + PeterLudlow J89-2011 ludlow-1989-book Book Reviews: Systemic Text Generation as Problem Solving - EduardHovy + EduardHovy J89-2012 hovy-1989-book @@ -225,15 +225,15 @@ Knowledge Representation for Commonsense Reasoning with Text - KathleenDahlgren - JoyceMcDowell + KathleenDahlgren + JoyceMcDowell 149-170 J89-3002 dahlgren-mcdowell-1989-knowledge Non-singular Concepts in Natural Language Discourse - TomekStrzalkowski + TomekStrzalkowski NickCercone 171-186 J89-3003 @@ -241,13 +241,13 @@ Book Reviews: Natural Language Understanding and Logic Programming, <fixed-case>II</fixed-case>: Proceedings of the Second International Workshop - Janusz S.Bien + Janusz S.Bien J89-3004 bien-1989-book Language and Spatial Cognition - JamesPustejovsky + JamesPustejovsky J89-3005 pustejovsky-1989-language @@ -271,32 +271,32 @@ Book Reviews: Medical Language Processing: Computer Management of Narrative Data - NicolettaCalzolari + NicolettaCalzolari J89-3009 calzolari-1989-book Book Reviews: Information-based Syntax and Semantics. Vol 1: Fundamentals - Edward P.Stabler, Jr. + Edward P.Stabler, Jr. J89-3010 stabler-jr-1989-book Book Reviews: Machine Translation Systems - John S.White + John S.White J89-3011 white-1989-book Book Reviews: Natural Language Processing - ElenaPascaleva + ElenaPascaleva DanFass J89-3012 pascaleva-fass-1989-book Book Reviews: Text Coherence in Translation - ChrysanneDiMarco + ChrysanneDiMarco J89-3013 dimarco-1989-book @@ -308,7 +308,7 @@ Book Reviews: Knowledge Systems and <fixed-case>P</fixed-case>rolog: A Logical Approach to Expert Systems and Natural Language Processing - StanKwasny + StanKwasny J89-3015 kwasny-1989-book @@ -347,15 +347,15 @@ A Parsing Algorithm for Unification Grammar - AndrewHaas + AndrewHaas 219-232 J89-4001 haas-1989-parsing Natural Language Generation from Plans - ChrisMellish - RogerEvans + ChrisMellish + RogerEvans 233-249 J89-4002 mellish-evans-1989-natural @@ -369,19 +369,19 @@ Book Reviews: An Introduction to Formal Language Theory - Geoffrey K.Pullum + Geoffrey K.Pullum J89-4004 pullum-1989-book Book Reviews: Attribute-Value Logic and the Theory of Grammar - RobertKuhns + RobertKuhns J89-4005 kuhns-1989-book Book Reviews: New Directions in Machine Translation (Proceedings of the Conference, <fixed-case>B</fixed-case>udapest, <fixed-case>A</fixed-case>ugust 1988) - EsmeraldaManandise + EsmeraldaManandise J89-4006 manandise-1989-book diff --git a/data/xml/J90.xml b/data/xml/J90.xml index 775d971ace..dc6330897b 100644 --- a/data/xml/J90.xml +++ b/data/xml/J90.xml @@ -14,7 +14,7 @@ Categorial Semantics and Scoping - Fernando C. N.Pereira + Fernando C. N.Pereira 1-10 J90-1001 pereira-1990-categorial @@ -22,14 +22,14 @@ An Interpretation of Negation in Feature Structure Descriptions AnujDawar - K.Vijay-Shanker + K.Vijay-Shanker 11-21 J90-1002 dawar-vijay-shanker-1990-interpretation Word Association Norms, Mutual Information, and Lexicography - Kenneth WardChurch + Kenneth WardChurch PatrickHanks 22-29 J90-1003 @@ -37,23 +37,23 @@ Semantic-Head-Driven Generation - Stuart M.Shieber - Gertjanvan Noord - Fernando C. N.Pereira - Robert C.Moore + Stuart M.Shieber + Gertjanvan Noord + Fernando C. N.Pereira + Robert C.Moore 30-42 J90-1004 shieber-etal-1990-semantic Letter to the Editor - Michael B.Kac + Michael B.Kac J90-1005 kac-1990-letter Book Reviews: Interpreting Anaphors in Natural Language Texts - DeborahDahl + DeborahDahl J90-1006 dahl-1990-book @@ -65,19 +65,19 @@ Book Reviews: Prosody and Speech Recognition - JoanBachenko + JoanBachenko J90-1008 bachenko-1990-book Book Reviews: From Syntax to Semantics: Insights from Machine Translation - HaroldSomers + HaroldSomers J90-1009 somers-1990-book Book Reviews: Studies in Computer-Aided Lexicology - MarthaEvens + MarthaEvens J90-1010 evens-1990-book @@ -136,13 +136,13 @@ A Statistical Approach to Machine Translation - Peter F.Brown - JohnCocke - Stephen A.Della Pietra - Vincent J.Della Pietra - FredrickJelinek - John D.Lafferty - Robert L.Mercer + Peter F.Brown + JohnCocke + Stephen A.Della Pietra + Vincent J.Della Pietra + FredrickJelinek + John D.Lafferty + Robert L.Mercer Paul S.Roossin 79-85 J90-2002 @@ -150,7 +150,7 @@ An Implementable Semantics for Comparative Constructions - MannyRayner + MannyRayner AmelieBanks 86-112 J90-2003 @@ -215,7 +215,7 @@ Resolving Quasi Logical Forms - HiyanAlshawi + HiyanAlshawi 133-144 J90-3001 alshawi-1990-resolving @@ -245,27 +245,27 @@ Workshop on the Evaluation of Natural Language Processing Systems - MarthaPalmer - TimFinin + MarthaPalmer + TimFinin 175-181 J90-3005 palmer-finin-1990-workshop Book Reviews: Machine Translation: How Far Can It Go? - DominiqueEstival + DominiqueEstival J90-3006 estival-1990-book Book Reviews: Looking Up: An Account of the <fixed-case>COBUILD</fixed-case> <fixed-case>PROJECT</fixed-case> <fixed-case>IN</fixed-case> <fixed-case>LEXICAL</fixed-case> <fixed-case>COMPUTING</fixed-case> - BranimirBoguraev + BranimirBoguraev J90-3007 boguraev-1990-book Book Reviews: Generating Natural Language under Pragmatic Constraints - WolfgangHoeppner + WolfgangHoeppner J90-3008 hoeppner-1990-book @@ -305,14 +305,14 @@ Anaphora Resolution in Slot Grammar ShalomLappin - MichaelMcCord + MichaelMcCord 197-212 J90-4001 lappin-mccord-1990-anaphora Sentential Semantics for Propositional Attitudes - Andrew R.Haas + Andrew R.Haas 213-233 J90-4002 haas-1990-sentential diff --git a/data/xml/J91.xml b/data/xml/J91.xml index 72713cf102..43bb5d487d 100644 --- a/data/xml/J91.xml +++ b/data/xml/J91.xml @@ -44,13 +44,13 @@ Book Reviews: Theory and Practice in Corpus Linguistics - Kenneth WardChurch + Kenneth WardChurch J91-1005 church-1991-book Book Reviews: Functional Grammar and the Computer - NormanFraser + NormanFraser J91-1006 fraser-1991-book @@ -62,7 +62,7 @@ Practical <fixed-case>SGML</fixed-case> - CarolVan Ess-Dykema + CarolVan Ess-Dykema J91-1008 van-ess-dykema-1991-practical @@ -108,8 +108,8 @@ How to Encode Semantic Knowledge: A Method for Meaning Representation and Computer-Aided Acquisition - PaolaVelardi - Maria TeresaPazienze + PaolaVelardi + Maria TeresaPazienze MichelaFasolo 153-170 J91-2002 @@ -118,7 +118,7 @@ Semantics of Paragraphs WlodekZadrozny - KarenJensen + KarenJensen 171-210 J91-2003 zadrozny-jensen-1991-semantics @@ -132,7 +132,7 @@ Book Reviews: <fixed-case>PC</fixed-case>-<fixed-case>KIMMO</fixed-case>: A Two-Level Processor for Morphological Analysis - RichardSproat + RichardSproat J91-2005 sproat-1991-book @@ -150,7 +150,7 @@ <fixed-case>A</fixed-case>ntilinguistics: A Critical Assessment of Modern Linguistic Theory and Practice - Geoffrey K.Pullum + Geoffrey K.Pullum J91-2008 pullum-1991-antilinguistics @@ -217,21 +217,21 @@ Computation of the Probability of Initial Substring Generation by Stochastic Context-Free Grammars - FrederickJelinek - John D.Lafferty + FrederickJelinek + John D.Lafferty 315-353 J91-3004 jelinek-lafferty-1991-computation Erratum to: A Statistical Approach to Machine Translation - Peter F.Brown - Stephen A.Della Pietra - FredrickJelinek - Robert L.Mercer - JohnCocke - Vincent J.Della Pietra - John D.Lafferty + Peter F.Brown + Stephen A.Della Pietra + FredrickJelinek + Robert L.Mercer + JohnCocke + Vincent J.Della Pietra + John D.Lafferty Paul S.Roossin 79-85 J91-3005 @@ -251,7 +251,7 @@ A Computational Model of First Language Acquisition - Robert C.Berwick + Robert C.Berwick J91-3008 berwick-1991-computational @@ -292,11 +292,11 @@ An Efficient Natural Language Processing System Specially Designed for the <fixed-case>C</fixed-case>hinese Language - Lin-ShanLee + Lin-ShanLee Lee-FengChien Long-JiLin JamesHuang - K. J.Chen + K. J.Chen 347-374 J91-4001 lee-etal-1991-efficient @@ -310,7 +310,7 @@ The <fixed-case>G</fixed-case>enerative <fixed-case>L</fixed-case>exicon - JamesPustejovsky + JamesPustejovsky 409-441 J91-4003 pustejovsky-1991-generative @@ -329,7 +329,7 @@ Book Reviews: Current Issues in Parsing Technology - Robert J.Kuhns + Robert J.Kuhns J91-4006 kuhns-1991-book diff --git a/data/xml/J92.xml b/data/xml/J92.xml index ea534e1afa..cd373bd855 100644 --- a/data/xml/J92.xml +++ b/data/xml/J92.xml @@ -14,32 +14,32 @@ Using Multiple Knowledge Sources for Word Sense Discrimination - Susan W.McRoy + Susan W.McRoy 1-30 J92-1001 mcroy-1992-using An Estimate of an Upper Bound for the Entropy of <fixed-case>E</fixed-case>nglish - Peter F.Brown - Stephen A.Della Pietra - Vincent J.Della Pietra - Jennifer C.Lai - Robert L.Mercer + Peter F.Brown + Stephen A.Della Pietra + Vincent J.Della Pietra + Jennifer C.Lai + Robert L.Mercer 31-40 J92-1002 brown-etal-1992-estimate Language Generated by Two-Level Morphological Rules - Graeme D.Ritchie + Graeme D.Ritchie 41-60 J92-1003 ritchie-1992-language <fixed-case>TINA</fixed-case>: A Natural Language System for Spoken Language Applications - StephanieSeneff + StephanieSeneff 61-86 J92-1004 seneff-1992-tina @@ -52,25 +52,25 @@ Book Reviews: <fixed-case>E</fixed-case>nglish Word Grammar - Lynne J.Cahill + Lynne J.Cahill J92-1006 cahill-1992-book Book Reviews: Semantic Structures - YorickWilks + YorickWilks J92-1007 wilks-1992-book Reference and Computation - JohnBarnden + JohnBarnden J92-1008 barnden-1992-reference Mathematical Methods in Linguistics - AlexisManaster Ramer + AlexisManaster Ramer J92-1009 manaster-ramer-1992-mathematical @@ -82,7 +82,7 @@ Knowledge Representation and Metaphor - JamesMartin + JamesMartin J92-1011 martin-1992-knowledge @@ -126,15 +126,15 @@ Inheritance in <fixed-case>W</fixed-case>ord <fixed-case>G</fixed-case>rammar - Norman M.Fraser - Richard A.Hudson + Norman M.Fraser + Richard A.Hudson 133-158 J92-2001 fraser-hudson-1992-inheritance Inheritance and Constraint-Based Grammar Formalisms - RémiZajac + RémiZajac 159-182 J92-2002 zajac-1992-inheritance @@ -148,8 +148,8 @@ Inheritance in Natural Language Processing - WalterDaelemans - KoenraadDe Smedt + WalterDaelemans + KoenraadDe Smedt GeraldGazdar 205-218 J92-2004 @@ -189,17 +189,17 @@ Making <fixed-case>DATR</fixed-case> Work for Speech: Lexicon Compilation in <fixed-case>SUNDIAL</fixed-case> FrancoisAndry - Norman M.Fraser + Norman M.Fraser ScottMcGlashan SimonThornton - Nick J.Youd + Nick J.Youd 245-267 J92-3001 andry-etal-1992-making Inheritance and Complementation: a Case Study of Easy Adjectives and Related Nouns - DanFlickinger + DanFlickinger JohnNerbonne 269-309 J92-3002 @@ -207,10 +207,10 @@ A Practical Approach to Multiple Default Inheritance for Unification-Based Lexicons - GrahamRussell + GrahamRussell AfzalBallim - JohnCarroll - SusanWarwick-Armstrong + JohnCarroll + SusanWarwick-Armstrong 311-337 J92-3003 russell-etal-1992-practical @@ -267,7 +267,7 @@ Book Reviews: Literature and Cognition - JanyceWiebe + JanyceWiebe J92-3012 wiebe-1992-book @@ -304,25 +304,25 @@ Ambiguous Noun Phrases in Logical Form - Mary P.Harper + Mary P.Harper 419-466 J92-4002 harper-1992-ambiguous Class-Based <i>n</i>-gram Models of Natural Language - Peter F.Brown - Vincent J.Della Pietra - Peter V.deSouza - Jenifer C.Lai - Robert L.Mercer + Peter F.Brown + Vincent J.Della Pietra + Peter V.deSouza + Jenifer C.Lai + Robert L.Mercer 467-480 J92-4003 brown-etal-1992-class Using Descriptions of Trees in a <fixed-case>T</fixed-case>ree <fixed-case>A</fixed-case>djoining <fixed-case>G</fixed-case>rammar - KVijay-Shanker + KVijay-Shanker 481-518 J92-4004 vijay-shanker-1992-using @@ -344,7 +344,7 @@ A Problem for <fixed-case>RST</fixed-case>: The Need for Multi-Level Discourse Analysis - Johanna D.Moore + Johanna D.Moore Martha E.Pollack 537-544 J92-4007 @@ -376,7 +376,7 @@ Book Reviews: <fixed-case>P</fixed-case>rolog for Natural Language Processing - Norman M.Fraser + Norman M.Fraser J92-4012 fraser-1992-book diff --git a/data/xml/J93.xml b/data/xml/J93.xml index 59cc36a00d..f42370a94e 100644 --- a/data/xml/J93.xml +++ b/data/xml/J93.xml @@ -3,7 +3,7 @@ Computational Linguistics, Volume 19, Number 1, March 1993, Special Issue on Using Large Corpora: I - JuliaHirschberg + JuliaHirschberg MIT Press
Cambridge, MA
1993 @@ -17,38 +17,38 @@ Introduction to the Special Issue on Computational Linguistics Using Large Corpora - Kenneth W.Church - Robert L.Mercer + Kenneth W.Church + Robert L.Mercer 1-24 J93-1001 church-mercer-1993-introduction Generalized Probabilistic <fixed-case>LR</fixed-case> Parsing of Natural Language (Corpora) with Unification-Based Grammars - TedBriscoe - JohnCarroll + TedBriscoe + JohnCarroll 25-59 J93-1002 briscoe-carroll-1993-generalized Accurate Methods for the Statistics of Surprise and Coincidence - TedDunning + TedDunning 61-74 J93-1003 dunning-1993-accurate A Program for Aligning Sentences in Bilingual Corpora - William A.Gale - Kenneth W.Church + William A.Gale + Kenneth W.Church 75-102 J93-1004 gale-church-1993-program Structural Ambiguity and Lexical Relations - DonaldHindle + DonaldHindle MatsRooth 103-120 J93-1005 @@ -64,34 +64,34 @@ Retrieving Collocations from Text: <fixed-case>X</fixed-case>tract - FrankSmadja + FrankSmadja 143-178 J93-1007 smadja-1993-retrieving The problem of logical form equivalence - Stuart M.Shieber + Stuart M.Shieber 179-190 J93-1008 shieber-1993-problem Issues in the choice of a source for Natural Language Generation - David D.McDonald + David D.McDonald 191-197 J93-1009 mcdonald-1993-issues Book Reviews: The Core Language Engine - Deborah A.Dahl + Deborah A.Dahl J93-1010 dahl-1993-book Book Reviews: Text Generation and Systemic-Functional Linguistics: Experiences from <fixed-case>E</fixed-case>nglish and <fixed-case>J</fixed-case>apanese - TerryPatten + TerryPatten J93-1011 patten-1993-book @@ -103,19 +103,19 @@
Book Reviews: Machine Translation: A Knowledge-Based Approach - StevenLytinen + StevenLytinen J93-1013 lytinen-1993-book Book Reviews: Corpus Linguistics and the Automatic Analysis of <fixed-case>E</fixed-case>nglish - TedBriscoe + TedBriscoe J93-1014 briscoe-1993-book Book Reviews: Lexical Acquisition: Exploiting On-Line Resources to Build a Lexicon - VictorSadler + VictorSadler J93-1015 sadler-1993-book @@ -133,7 +133,7 @@ Computational Linguistics, Volume 19, Number 2, June 1993, Special Issue on Using Large Corpora: II - JuliaHirschberg + JuliaHirschberg MIT Press
Cambridge, MA
1993 @@ -161,18 +161,18 @@
The Mathematics of Statistical Machine Translation: Parameter Estimation - Peter F.Brown - Stephen A.Della Pietra - Vincent J.Della Pietra - Robert L.Mercer + Peter F.Brown + Stephen A.Della Pietra + Vincent J.Della Pietra + Robert L.Mercer 263-311 J93-2003 brown-etal-1993-mathematics Building a Large Annotated Corpus of <fixed-case>E</fixed-case>nglish: The <fixed-case>P</fixed-case>enn <fixed-case>T</fixed-case>reebank - Mitchell P.Marcus - BeatriceSantorini + Mitchell P.Marcus + BeatriceSantorini Mary AnnMarcinkiewicz 313-330 J93-2004 @@ -180,19 +180,19 @@ Lexical Semantic Techniques for Corpus Analysis - JamesPustejovsky + JamesPustejovsky SabineBergler - PeterAnick + PeterAnick 331-358 J93-2005 pustejovsky-etal-1993-lexical Coping with Ambiguity and Unknown Words through Probabilistic Models - RalphWeischedel - MarieMeteer - RichardSchwartz - LanceRamshaw + RalphWeischedel + MarieMeteer + RichardSchwartz + LanceRamshaw JeffPalmucci 359-382 J93-2006 @@ -200,7 +200,7 @@ Book Reviews: An Introduction to Machine Translation - GuðrunMagnúsdóttir + GuðrunMagnúsdóttir J93-2007 magnusdottir-1993-book @@ -218,13 +218,13 @@
Book Reviews: Principle-Based Parsing: Computation and Psycholinguistics - Geoffrey K.Pullum + Geoffrey K.Pullum J93-2010 pullum-1993-book Book Reviews: Questions and Information Systems - John S.White + John S.White J93-2011 white-1993-book @@ -248,7 +248,7 @@ Computational Linguistics, Volume 19, Number 3, September 1993 - JuliaHirschberg + JuliaHirschberg MIT Press
Cambridge, MA
1993 @@ -262,16 +262,16 @@ Evaluating Message Understanding Systems: An Analysis of the Third <fixed-case>M</fixed-case>essage <fixed-case>U</fixed-case>nderstanding <fixed-case>C</fixed-case>onference (<fixed-case>MUC</fixed-case>-3) - NancyChinchor - LynetteHirschman - David D.Lewis + NancyChinchor + LynetteHirschman + David D.Lewis 409-450 J93-3001 chinchor-etal-1993-evaluating A Computational Theory of Goal-Directed Style in Syntax - ChrysanneDiMarco + ChrysanneDiMarco GraemeHirst 451-500 J93-3002 @@ -280,7 +280,7 @@ Empirical Studies on the Disambiguation of Cue Phrases JuliaHirschberg - DianeLitman + DianeLitman 501-530 J93-3003 hirschberg-litman-1993-empirical @@ -294,7 +294,7 @@ Book Reviews: Ontologie und Axiomatik der Wissensbasis von <fixed-case>LILOG</fixed-case> - JohnBateman + JohnBateman J93-3005 bateman-1993-book @@ -312,7 +312,7 @@ Connectionist Approaches to Natural Language Processing - JamesHenderson + JamesHenderson J93-3008 henderson-1993-connectionist @@ -341,7 +341,7 @@ Computational Linguistics, Volume 19, Number 4, December 1993 - JuliaHirschberg + JuliaHirschberg MIT Press
Cambridge, MA
1993 @@ -355,44 +355,44 @@ The Interface between Phrasal and Functional Constraints - John T.Maxwell - Ronald M.Kaplan + John T.Maxwell + Ronald M.Kaplan 571-590 J93-4001 maxwell-kaplan-1993-interface Parsing Some Constrained Grammar Formalisms - KVijay-Shanker - David J.Weir + KVijay-Shanker + David J.Weir 591-636 J93-4002 vijay-shanker-weir-1993-parsing Indexical Expressions in the Scope of Attitude Verbs - Andrew R.Haas + Andrew R.Haas 637-649 J93-4003 haas-1993-indexical Planning Text for Advisory Dialogues: Capturing Intentional and Rhetorical Information - Johanna D.Moore - Cecile L.Paris + Johanna D.Moore + Cecile L.Paris 651-694 J93-4004 moore-paris-1993-planning Book Reviews: Functional Grammar in <fixed-case>P</fixed-case>rolog: An Integrated Implementation for <fixed-case>E</fixed-case>nglish, <fixed-case>F</fixed-case>rench, and <fixed-case>D</fixed-case>utch - PatrickSaint-Dizier + PatrickSaint-Dizier J93-4005 saint-dizier-1993-book Book Reviews: Natural Language Processing: The <fixed-case>PLNLP</fixed-case> Approach - Paul S.Jacobs + Paul S.Jacobs J93-4006 jacobs-1993-book diff --git a/data/xml/J94.xml b/data/xml/J94.xml index 257eb70fa8..ce0ee5e872 100644 --- a/data/xml/J94.xml +++ b/data/xml/J94.xml @@ -3,7 +3,7 @@ Computational Linguistics, Volume 20, Number 1, March 1994 - JuliaHirschberg + JuliaHirschberg MIT Press
Cambridge, MA
1994 @@ -33,7 +33,7 @@ One-Level Phonology: Autosegmental Representations and Rules as Finite Automata StevenBird - T. MarkEllison + T. MarkEllison 55-90 J94-1003 bird-ellison-1994-one @@ -41,7 +41,7 @@ An Alternative Conception of Tree-Adjoining Derivation YvesSchabes - Stuart M.Shieber + Stuart M.Shieber 91-124 J94-1004 schabes-shieber-1994-alternative @@ -54,7 +54,7 @@ Book Reviews: Generating Referring Expressions - DoniaScott + DoniaScott J94-1006 scott-1994-book @@ -66,13 +66,13 @@ Book Reviews: Expressibility and the Problem of Efficient Text Planning - RuslanMitkov + RuslanMitkov J94-1008 mitkov-1994-book Book Reviews: Explanation and Interaction: The Computer Generation of Explanatory Dialogues - SandraCarberry + SandraCarberry J94-1009 carberry-1994-book @@ -96,7 +96,7 @@
Book Reviews: The Logical Approach to Syntax: Foundations, Specifications, and Implementations of Theories of Government and Binding - Robert J.Kuhns + Robert J.Kuhns J94-1013 kuhns-1994-book @@ -114,7 +114,7 @@ Computational Linguistics, Volume 20, Number 2, June 1994 - JuliaHirschberg + JuliaHirschberg MIT Press
Cambridge, MA
1994 @@ -128,7 +128,7 @@ Tagging <fixed-case>E</fixed-case>nglish Text with a Probabilistic Model - BernardMerialdo + BernardMerialdo 155-171 J94-2001 merialdo-1994-tagging @@ -142,7 +142,7 @@ <fixed-case>J</fixed-case>apanese Discourse and the Process of Centering - MarilynWalker + MarilynWalker MasayoIida SharonCote 193-231 @@ -151,7 +151,7 @@ Tracking Point of View in Narrative - Janyce M.Wiebe + Janyce M.Wiebe 233-287 J94-2004 wiebe-1994-tracking @@ -167,7 +167,7 @@ <fixed-case>RAFT</fixed-case>/<fixed-case>RAPR</fixed-case> and Centering: a comparison and discussion of problems related to processing complex sentences Linda Z.Suri - KathleenMcCoy + KathleenMcCoy 301-317 J94-2006 suri-mccoy-1994-raft @@ -202,7 +202,7 @@ Computational Linguistics, Volume 20, Number 3, September 1994 - JuliaHirschberg + JuliaHirschberg MIT Press
Cambridge, MA
1994 @@ -216,7 +216,7 @@ Regular Models of Phonological Rule Systems - Ronald M.Kaplan + Ronald M.Kaplan MartinKay 331-378 J94-3001 @@ -224,21 +224,21 @@ Commentary on <fixed-case>K</fixed-case>aplan and <fixed-case>K</fixed-case>ay - MarkLiberman + MarkLiberman 379 J94-3002 liberman-1994-commentary Commentary on <fixed-case>K</fixed-case>aplan and <fixed-case>K</fixed-case>ay - GraemeRitchie + GraemeRitchie 380 J94-3003 ritchie-1994-commentary The Reconstruction Engine: A Computer Implementation of the Comparative Method - John B.Lowe + John B.Lowe MartineMazaudon 381-417 J94-3004 @@ -260,7 +260,7 @@ The Acquisition of Stress: A Data-Oriented Approach - WalterDaelemans + WalterDaelemans StevenGillis GertDurieux 421-453 @@ -298,14 +298,14 @@ Commentary on <fixed-case>B</fixed-case>ird and <fixed-case>K</fixed-case>lein - RichardSproat + RichardSproat 493 J94-3012 sproat-1994-commentary Book Reviews: <fixed-case>E</fixed-case>nglish Verb Classes and Alternations: A Preliminary Investigation - HaroldSomers + HaroldSomers J94-3013 somers-1994-book @@ -330,7 +330,7 @@ Computational Linguistics, Volume 20, Number 4, December 1994 - JuliaHirschberg + JuliaHirschberg MIT Press
Cambridge, MA
1994 @@ -345,7 +345,7 @@ A Syntactic Analysis Method of Long <fixed-case>J</fixed-case>apanese Sentences Based on the Detection of Conjunctive Structures SadaoKurohashi - MakotoNagao + MakotoNagao 507-534 J94-4001 kurohashi-nagao-1994-syntactic @@ -368,35 +368,35 @@ Machine Translation Divergences: A Formal Description and Proposed Solution - Bonnie J.Dorr + Bonnie J.Dorr 597-633 J94-4004 dorr-1994-machine Training and Scaling Preference Functions for Disambiguation - HiyanAlshawi - DavidCarter + HiyanAlshawi + DavidCarter 635-648 J94-4005 alshawi-carter-1994-training Squibs and Discussions: Storing Logical Form in a Shared-Packed Forest - Mary P.Harper + Mary P.Harper 649-660 J94-4006 harper-1994-squibs Book Reviews: Inheritance, Defaults, and the Lexicon - WalterDaelemans + WalterDaelemans J94-4007 daelemans-1994-book Book Reviews: Grammaires d’unification a traits et conto1e des infinitives en francais - DominiqueEstival + DominiqueEstival J94-4008 estival-1994-book diff --git a/data/xml/J95.xml b/data/xml/J95.xml index 4b1124adda..25505b8df0 100644 --- a/data/xml/J95.xml +++ b/data/xml/J95.xml @@ -3,7 +3,7 @@ Computational Linguistics, Volume 21, Number 1, March 1995 - JuliaHirschberg + JuliaHirschberg MIT Press
Cambridge, MA
1995 @@ -26,7 +26,7 @@ Expressing Rhetorical Relations in Instructional Text: a case study of the purpose relation KeithVander Linden - JamesMartin + JamesMartin 29-57 J95-1002 vander-linden-martin-1995-expressing @@ -42,9 +42,9 @@ Identifying Topic and Focus by an Automatic Procedure - EvaHajicova - HanaSkoumalova - PetrSgall + EvaHajicova + HanaSkoumalova + PetrSgall 81-94 J95-1004 hajicova-etal-1995-identifying @@ -59,7 +59,7 @@ Book Reviews: Statistical Language Learning - David M.Magerman + David M.Magerman J95-1006 magerman-1995-book @@ -77,13 +77,13 @@
Book Reviews: Challenges in Natural Language Processing - EduardHovy + EduardHovy J95-1009 hovy-1995-book Book Reviews: The Language Complexity Game - AlexisManaster Ramer + AlexisManaster Ramer J95-1010 manaster-ramer-1995-book @@ -106,7 +106,7 @@ Computational Linguistics, Volume 21, Number 2, June 1995 - JuliaHirschberg + JuliaHirschberg MIT Press
Cambridge, MA
1995 @@ -121,22 +121,22 @@ Automatic Stochastic Tagging of Natural Language Texts EvangelosDermatas - GeorgeKokkinakis + GeorgeKokkinakis 137-163 J95-2001 dermatas-kokkinakis-1995-automatic An Efficient Probabilistic Context-Free Parsing Algorithm that Computes Prefix Probabilities - AndreasStolcke + AndreasStolcke 165-201 J95-2002 stolcke-1995-efficient <fixed-case>C</fixed-case>entering: A Framework for Modeling the Local Coherence of Discourse - Barbara J.Grosz - Aravind K.Joshi + Barbara J.Grosz + Aravind K.Joshi ScottWeinstein 203-225 ACL 2020 Test-of-Time Award (25 years) @@ -153,7 +153,7 @@ Squibs and Discussions: Efficient Parsing for <fixed-case>K</fixed-case>orean and <fixed-case>E</fixed-case>nglish: A Parameterized Message-Passing Approach - Bonnie J.Dorr + Bonnie J.Dorr Jye-hoonLee DekangLin SungkiSuh @@ -197,7 +197,7 @@ Computational Linguistics, Volume 21, Number 3, September 1995 - JuliaHirschberg + JuliaHirschberg MIT Press
Cambridge, MA
1995 @@ -211,16 +211,16 @@ An Architecture for Voice Dialog Systems Based on <fixed-case>P</fixed-case>rolog-Style Theorem Proving - Ronnie W.Smith + Ronnie W.Smith D. RichardHipp - Alan W.Biermann + Alan W.Biermann 281-320 J95-3001 smith-etal-1995-architecture Robust Learning, Smoothing, and Parameter Tying on Syntactic Ambiguity Resolution - Tung-HuiChiang + Tung-HuiChiang Yi-ChungLin Keh-YihSu 321-349 @@ -229,7 +229,7 @@ Collaborating on Referring Expressions - Peter A.Heeman + Peter A.Heeman GraemeHirst 351-382 J95-3003 @@ -288,7 +288,7 @@ Computational Linguistics, Volume 21, Number 4, December 1995 - JuliaHirschberg + JuliaHirschberg MIT Press
Cambridge, MA
1995 @@ -302,7 +302,7 @@ The Repair of Speech Act Misunderstandings by Abductive Inference - Susan W.McRoy + Susan W.McRoy GraemeHirst 435-478 J95-4001 @@ -351,13 +351,13 @@ Book Reviews: Compositional translation - Bonnie J.Dorr + Bonnie J.Dorr J95-4008 dorr-1995-book Book Reviews: Speech-to-speech translation: A massively parallel memory-based approach - NigelWard + NigelWard J95-4009 ward-1995-book diff --git a/data/xml/J96.xml b/data/xml/J96.xml index e7fc02deae..45fe851393 100644 --- a/data/xml/J96.xml +++ b/data/xml/J96.xml @@ -3,7 +3,7 @@ Computational Linguistics, Volume 22, Number 1, March 1996 - JuliaHirschberg + JuliaHirschberg MIT Press
Cambridge, MA
1996 @@ -17,8 +17,8 @@ Translating Collocations for Bilingual Lexicons: A Statistical Approach - FrankSmadja - Kathleen R.McKeown + FrankSmadja + Kathleen R.McKeown VasileiosHatzivassiloglou 1-38 J96-1001 @@ -26,9 +26,9 @@ A Maximum Entropy Approach to Natural Language Processing - Adam L.Berger - Stephen A.Della Pietra - Vincent J.Della Pietra + Adam L.Berger + Stephen A.Della Pietra + Vincent J.Della Pietra 39-71 ACL 2021 Test-of-Time Award (25 year) J96-1002 @@ -58,13 +58,13 @@ Book Reviews: Natural Language Processing for <fixed-case>P</fixed-case>rolog Programmers KenBarker - StanSzpakowicz + StanSzpakowicz J96-1006 barker-szpakowicz-1996-book Book Reviews: Logic and Lexicon - MassimoPoesio + MassimoPoesio J96-1007 poesio-1996-book @@ -94,7 +94,7 @@ Computational Linguistics, Volume 22, Number 2, June 1996 - JuliaHirschberg + JuliaHirschberg MIT Press
Cambridge, MA
1996 @@ -108,15 +108,15 @@ Estimating Lexical Priors for Low-Frequency Morphologically Ambiguous Forms - HaraldBaayen - RichardSproat + HaraldBaayen + RichardSproat 155-166 J96-2001 baayen-sproat-1996-estimating <fixed-case>DATR</fixed-case>: A Language for Lexical Knowledge Representation - RogerEvans + RogerEvans GeraldGazdar 167-216 J96-2002 @@ -125,7 +125,7 @@ Improving Statistical Language Model Performance with Automatically Generated Word Hierarchies John G.McMahon - Francis J.Smith + Francis J.Smith 217-247 J96-2003 mcmahon-smith-1996-improving @@ -140,14 +140,14 @@ Limited Attention and Discourse Structure - Marilyn A.Walker + Marilyn A.Walker 255-264 J96-2005 walker-1996-limited Book Reviews: Time-constrained Memory: A Reader-based Approach to Text Comprehension - Arthur C.Graesser + Arthur C.Graesser J96-2006 graesser-1996-book @@ -159,7 +159,7 @@ Book Reviews: Representing Time in Natural Language: The Dynamic Interpretation of Tense and Aspect - Rebecca J.Passonneau + Rebecca J.Passonneau J96-2008 passonneau-1996-book @@ -175,7 +175,7 @@
Letters to the Editor - EricRistad + EricRistad J96-2011 ristad-1996-letters @@ -188,7 +188,7 @@ Computational Linguistics, Volume 22, Number 3, September 1996 - JuliaHirschberg + JuliaHirschberg MIT Press
Cambridge, MA
1996 @@ -202,7 +202,7 @@ Unification Encodings of Grammatical Notations - Stephen G.Pulman + Stephen G.Pulman 295-327 J96-3001 pulman-1996-unification @@ -219,16 +219,16 @@ Efficient Multilingual Phoneme-to-Grapheme Conversion Based on <fixed-case>HMM</fixed-case> Panagiotis A.Rentzepopoulos - George K.Kokkinakis + George K.Kokkinakis 351-376 J96-3003 rentzepopoulos-kokkinakis-1996-efficient A Stochastic Finite-State Word-Segmentation Algorithm for <fixed-case>C</fixed-case>hinese - Richard W.Sproat + Richard W.Sproat ChilinShih - WilliamGale + WilliamGale NancyChang 377-404 J96-3004 @@ -245,7 +245,7 @@ Toward a Synthesis of Two Accounts of Discourse Structure MeganMoser - Johanna D.Moore + Johanna D.Moore 409-419 J96-3006 moser-moore-1996-toward @@ -253,20 +253,20 @@ A Chart Re-estimation Algorithm for a Probabilistic Recursive Transition Network Young S.Han - Key-SunChoi + Key-SunChoi 421-429 J96-3007 han-choi-1996-chart Book Reviews: Spoken Natural Language Dialogue Systems: A Practical Approach - David R.Traum + David R.Traum J96-3008 traum-1996-book Book Reviews: Electric Words: Dictionaries, Computers, and Meanings - ArchibaldMichiels + ArchibaldMichiels J96-3009 michiels-1996-book @@ -278,7 +278,7 @@ Book Reviews: Speakers, Listeners, and Communication: Explorations in Discourse Analysis - SusanMcRoy + SusanMcRoy J96-3011 mcroy-1996-book @@ -307,7 +307,7 @@ Computational Linguistics, Volume 22, Number 4, December 1996 - JuliaHirschberg + JuliaHirschberg MIT Press
Cambridge, MA
1996 @@ -321,7 +321,7 @@ The Effects of Lexical Specialization on the Growth Curve of the Vocabulary - R. HaraldBaayen + R. HaraldBaayen 455-480 J96-4001 baayen-1996-effects @@ -336,33 +336,33 @@ Learning Bias and Phonological-Rule Induction DanielGildea - DanielJurafsky + DanielJurafsky 497-530 J96-4003 gildea-jurafsky-1996-learning A Statistically Emergent Approach for Language Processing: Application to Modeling Context Effects in Ambiguous <fixed-case>C</fixed-case>hinese Word Boundary Perception - Kok-WeeGan - MarthaPalmer - Kim-TengLua + Kok-WeeGan + MarthaPalmer + Kim-TengLua 531-553 J96-4004 gan-etal-1996-statistically Ambiguity-preserving Generation with <fixed-case>LFG</fixed-case>- and <fixed-case>PATR</fixed-case>-style Grammars - JurgenWedekind - Ronald M.Kaplan + JurgenWedekind + Ronald M.Kaplan 555-558 J96-4005 wedekind-kaplan-1996-ambiguity Integrating General-purpose and Corpus-based Verb Classification - RobertoBasili - Maria TeresaPazienza - PaolaVelardi + RobertoBasili + Maria TeresaPazienza + PaolaVelardi 559-568 J96-4006 basili-etal-1996-integrating diff --git a/data/xml/J97.xml b/data/xml/J97.xml index a1830c4702..70f2095300 100644 --- a/data/xml/J97.xml +++ b/data/xml/J97.xml @@ -3,7 +3,7 @@ Computational Linguistics, Volume 23, Number 1, March 1997 - JuliaHirschberg + JuliaHirschberg MIT Press
Cambridge, MA
1997 @@ -17,8 +17,8 @@ Empirical Studies in Discourse - Marilyn A.Walker - Johanna D.Moore + Marilyn A.Walker + Johanna D.Moore 1-12 J97-1001 walker-moore-1997-empirical @@ -30,37 +30,37 @@ StephenIsard Jacqueline C.Kowtko GwynethDoherty-Sneddon - Anne H.Anderson + Anne H.Anderson 13-31 J97-1002 carletta-etal-1997-reliability Text Tiling: Segmenting Text into Multi-paragraph Subtopic Passages - Marti A.Hearst + Marti A.Hearst 33-64 J97-1003 hearst-1997-text Developing and Empirically Evaluating Robust Explanation Generators: The <fixed-case>KNIGHT</fixed-case> Experiments - James C.Lester - Bruce W.Porter + James C.Lester + Bruce W.Porter 65-101 J97-1004 lester-porter-1997-developing Discourse Segmentation by Human and Automated Means - Rebecca J.Passonneau - Diane J.Litman + Rebecca J.Passonneau + Diane J.Litman 103-139 J97-1005 passonneau-litman-1997-discourse Effects of Variable Initiative on Linguistic Behavior in Human-Computer Spoken Natural Language Dialogue - Ronnie W.Smith + Ronnie W.Smith Steven A.Gordon 141-168 J97-1006 @@ -69,7 +69,7 @@ An Empirical Study on the Generation of Anaphora in <fixed-case>C</fixed-case>hinese Ching-LongYeh - ChrisMellish + ChrisMellish 169-190 J97-1007 yeh-mellish-1997-empirical @@ -89,7 +89,7 @@ Computational Linguistics, Volume 23, Number 2, June 1997 - JuliaHirschberg + JuliaHirschberg MIT Press
Cambridge, MA
1997 @@ -104,7 +104,7 @@ Floating Constraints in Lexical Choice MichaelElhadad - KathleenMcKeown + KathleenMcKeown JacquesRobin 195-239 J97-2001 @@ -112,8 +112,8 @@ Adaptive Multilingual Sentence Boundary Disambiguation - David D.Palmer - Marti A.Hearst + David D.Palmer + Marti A.Hearst 241-267 J97-2002 palmer-hearst-1997-adaptive @@ -127,8 +127,8 @@ A Class-based Approach to Word Alignment - Sue J.Ker - Jason S.Chang + Sue J.Ker + Jason S.Chang 313-343 J97-2004 ker-chang-1997-class @@ -166,7 +166,7 @@ Computational Linguistics, Volume 23, Number 3, September 1997 - JuliaHirschberg + JuliaHirschberg MIT Press
Cambridge, MA
1997 @@ -201,29 +201,29 @@
An Efficient Implementation of the Head-Corner Parser - Gertjanvan Noord + Gertjanvan Noord 425-456 J97-3004 van-noord-1997-efficient Anaphoric Dependencies in Ellipsis - AndrewKehler - StuartShieber + AndrewKehler + StuartShieber 457-466 J97-3005 kehler-shieber-1997-anaphoric Current theories of centering for pronoun interpretation: a critical evaluation - AndrewKehler + AndrewKehler 467-475 J97-3006 kehler-1997-current Book Reviews: Semantic Ambiguity and Underspecification - Peter J.Ludlow + Peter J.Ludlow J97-3007 ludlow-1997-book @@ -263,7 +263,7 @@ Computational Linguistics, Volume 23, Number 4, December 1997 - JuliaHirschberg + JuliaHirschberg MIT Press
Cambridge, MA
1997 @@ -292,7 +292,7 @@
A Computational Treatment of Lexical Rules in <fixed-case>HPSG</fixed-case> as Covariation in Lexical Entries - W. DetmarMeurers + W. DetmarMeurers GuidoMinnen 543-568 J97-4003 @@ -307,7 +307,7 @@ Stochastic Attribute-Value Grammars - Steven P.Abney + Steven P.Abney 597-618 J97-4005 abney-1997-stochastic @@ -320,13 +320,13 @@ Book Reviews: Industrial Parsing of Software Manuals - JohnCarroll + JohnCarroll J97-4007 carroll-1997-book Book Reviews: Using Language - Marilyn A.Walker + Marilyn A.Walker J97-4008 walker-1997-book diff --git a/data/xml/J98.xml b/data/xml/J98.xml index 255cfd1347..3fd55e7a3a 100644 --- a/data/xml/J98.xml +++ b/data/xml/J98.xml @@ -3,7 +3,7 @@ Computational Linguistics, Volume 24, Number 1, March 1998 - Special Issue on Word Sense Disambiguation - JuliaHirschberg + JuliaHirschberg MIT Press
Cambridge, MA
1998 @@ -17,8 +17,8 @@ Introduction to the Special Issue on Word Sense Disambiguation: The State of the Art - NancyIde - JeanVéronis + NancyIde + JeanVéronis 1-40 J98-1001 ide-veronis-1998-introduction @@ -33,15 +33,15 @@ Topical Clustering of <fixed-case>MRD</fixed-case> Senses Based on Information Retrieval Techniques - Jen NanChen - Jason S.Chang + Jen NanChen + Jason S.Chang 61-95 J98-1003 chen-chang-1998-topical Automatic Word Sense Discrimination - HinrichSchütze + HinrichSchütze 97-123 J98-1004 schutze-1998-automatic @@ -50,7 +50,7 @@ Disambiguating Highly Ambiguous Words GeoffreyTowell - Ellen M.Voorhees + Ellen M.Voorhees 125-145 J98-1005 towell-voorhees-1998-disambiguating @@ -58,7 +58,7 @@ Using Corpus Statistics and <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et Relations for Sense Identification ClaudiaLeacock - MartinChodorow + MartinChodorow George A.Miller 147-165 J98-1006 @@ -96,7 +96,7 @@ Computational Linguistics, Volume 24, Number 2, June 1998 - JuliaHirschberg + JuliaHirschberg MIT Press
Cambridge, MA
1998 @@ -110,8 +110,8 @@ A Corpus-based Investigation of Definite Description Use - MassimoPoesio - RenataVieira + MassimoPoesio + RenataVieira 183-216 J98-2001 poesio-vieira-1998-corpus @@ -127,7 +127,7 @@ Contextual Grammars as Generative Models of Natural Language SolomonMarcus - CarlosMartín-Vide + CarlosMartín-Vide GheorghePăun 245-274 J98-2003 @@ -159,19 +159,19 @@ Book Reviews: Corpus-Based Methods in Language and Speech Processing - RebeccaBruce + RebeccaBruce J98-2007 bruce-1998-book Book Reviews: Text Databases: One Database Model and Several Retrieval Languages - NancyIde + NancyIde J98-2008 ide-1998-book Book Reviews: An Introduction to Text-to-Speech Synthesis - EileenFitzpatrick + EileenFitzpatrick J98-2009 fitzpatrick-1998-book @@ -218,7 +218,7 @@ Computational-Linguistics, Volume 24, Number 3, September 1998 - JuliaHirschberg + JuliaHirschberg MIT Press
Cambridge, MA
1998 @@ -234,15 +234,15 @@ Introduction to the Special Issue on Natural Language Generation RobertDale BarbaraDi Eugenio - DoniaScott + DoniaScott 345-353 J98-3001 dale-etal-1998-introduction
Collaborative Response Generation in Planning Dialogues - JenniferChu-Carroll - SandraCarberry + JenniferChu-Carroll + SandraCarberry 355-400 J98-3002 chu-carroll-carberry-1998-collaborative @@ -256,25 +256,25 @@ Describing Complex Charts in Natural Language: A Caption Generation System - Vibhu O.Mittal - Johanna D.Moore + Vibhu O.Mittal + Johanna D.Moore GiuseppeCarenini - StevenRoth + StevenRoth 431-467 J98-3004 mittal-etal-1998-describing Generating Natural Language Summaries from Multiple On-Line Sources - Dragomir R.Radev - Kathleen R.McKeown + Dragomir R.Radev + Kathleen R.McKeown 469-500 J98-3005 radev-mckeown-1998-generating Do the Right Thing … but Expect the Unexpected - JonOberlander + JonOberlander 501-507 J98-3006 oberlander-1998-right @@ -293,7 +293,7 @@ Book Review: Machine Translation and Translation Theory - FrankVan Eynde + FrankVan Eynde J98-3009 van-eynde-1998-book @@ -312,7 +312,7 @@ Computational Linguistics, Volume 24, Number 4, December 1998 - JuliaHirschberg + JuliaHirschberg MIT Press
Cambridge, MA
1998 @@ -326,7 +326,7 @@ A Collaborative Planning Model of Intentional Structure - Karen E.Lochbaum + Karen E.Lochbaum 525-572 J98-4001 lochbaum-1998-collaborative @@ -389,7 +389,7 @@ Multilingual Text-to-Speech Synthesis: The Bell Labs Approach - DouglasO’Shaughnessy + DouglasO’Shaughnessy J98-4010 oshaughnessy-1998-multilingual diff --git a/data/xml/J99.xml b/data/xml/J99.xml index c35c757aed..caefcbd2a8 100644 --- a/data/xml/J99.xml +++ b/data/xml/J99.xml @@ -3,7 +3,7 @@ Computational Linguistics, Volume 25, Number 1, March 1999 - JuliaHirschberg + JuliaHirschberg MIT Press
Cambridge, MA
1999 @@ -17,7 +17,7 @@ A Process Model for Recognizing Communicative Acts and Modeling Negotiation Subdialogues - SandraCarberry + SandraCarberry LynnLambert 1-53 J99-1001 @@ -59,7 +59,7 @@ Book Reviews: Linguistic Databases - JörgTiedemann + JörgTiedemann J99-1007 tiedemann-1999-book @@ -82,7 +82,7 @@ Computational Linguistics, Volume 25, Number 2, June 1999 - JuliaHirschberg + JuliaHirschberg MIT Press
Cambridge, MA
1999 @@ -97,16 +97,16 @@ A Methodology for Extending Focusing Frameworks Linda Z.Suri - Kathleen F.McCoy - Jonathan D.DeCristofaro + Kathleen F.McCoy + Jonathan D.DeCristofaro 173-194 J99-2001 suri-etal-1999-methodology Decomposable Modeling in Natural Language Processing - RebeccaBruce - JanyceWiebe + RebeccaBruce + JanyceWiebe 195-207 J99-2002 bruce-wiebe-1999-decomposable @@ -114,7 +114,7 @@ <fixed-case>T</fixed-case>ree <fixed-case>A</fixed-case>djoining <fixed-case>G</fixed-case>rammars in a Fragment of the <fixed-case>L</fixed-case>ambek Calculus V. MicheleAbrusci - ChristopheFouqueré + ChristopheFouqueré JacquelineVauzeilles 209-236 J99-2003 @@ -122,22 +122,22 @@ <fixed-case>S</fixed-case>upertagging: An Approach to Almost Parsing - SrinivasBangalore - Aravind K.Joshi + SrinivasBangalore + Aravind K.Joshi 237-265 J99-2004 bangalore-joshi-1999-supertagging Aligning Phonetic Segments for Children’s Articulation Assessment - HaroldSomers + HaroldSomers 267-275 J99-2005 somers-1999-aligning Semantic-driven Generation with <fixed-case>LFG</fixed-case>- and <fixed-case>PATR</fixed-case>-style Grammars - JürgenWedekind + JürgenWedekind 277-281 J99-2006 wedekind-1999-semantic @@ -193,7 +193,7 @@ Computational Linguistics, Volume 25, Number 3, September 1999 - JuliaHirschberg + JuliaHirschberg MIT Press
Cambridge, MA
1999 @@ -222,7 +222,7 @@
Vector-based Natural Language Call Routing - JenniferChu-Carroll + JenniferChu-Carroll BobCarpenter 361-388 J99-3003 @@ -230,27 +230,27 @@ Interpreting and Generating Indirect Answers - NancyGreen - SandraCarberry + NancyGreen + SandraCarberry 389-435 J99-3004 green-carberry-1999-interpreting Book Reviews: Ambiguity Resolution in Language Learning: Computational and Cognitive Models - HinrichSchütze + HinrichSchütze J99-3005 schutze-1999-book Book Reviews: Beyond Grammar: An Experience-based Theory of Language - MichaelCollins + MichaelCollins J99-3006 collins-1999-book Book Reviews: Type-Logical Semantics - StephenPulman + StephenPulman J99-3007 pulman-1999-book @@ -262,7 +262,7 @@
Book Reviews: Processing Metonymy and Metaphor - StéphaneFerrari + StéphaneFerrari J99-3009 ferrari-1999-book @@ -286,7 +286,7 @@ Computational Linguistics, Volume 25, Number 4, December 1999 - JuliaHirschberg + JuliaHirschberg MIT Press
Cambridge, MA
1999 @@ -300,14 +300,14 @@ Completeness conditions for mixed strategy bidirectional parsing - GraemeRitchie + GraemeRitchie 457-486 J99-4001 ritchie-1999-completeness Lexical rules in constraint based grammars - TedBriscoe + TedBriscoe AnnCopestake 487–526 J99-4002 @@ -315,15 +315,15 @@ Speech repains, intonational phrases, and discourse markers: modeling speakers’ utterances in spoken dialogue - Peter A.Heeman - James F.Allen + Peter A.Heeman + James F.Allen 527-572 J99-4003 heeman-allen-1999-speech Semiring Parsing - JoshuaGoodman + JoshuaGoodman 573-606 J99-4004 goodman-1999-semiring @@ -337,7 +337,7 @@ Conceptions of limited attention and discourse focus - Barbara J.Grosz + Barbara J.Grosz Peter C.Gordon 617-624 J99-4006 @@ -345,7 +345,7 @@ Book Reviews: Centering Theory in Discourse - RuslanMitkov + RuslanMitkov J99-4007 mitkov-1999-book @@ -373,7 +373,7 @@
Letter to the Editor: Language Technology for Beginners - Ronald A.Cole + Ronald A.Cole J99-4012 cole-1999-letter diff --git a/data/xml/K15.xml b/data/xml/K15.xml index c3a48419d0..d6c6828444 100644 --- a/data/xml/K15.xml +++ b/data/xml/K15.xml @@ -17,9 +17,9 @@ A Coactive Learning View of Online Structured Prediction in Statistical Machine Translation - ArtemSokolov + ArtemSokolov StefanRiezler - Shay B.Cohen + Shay B.Cohen 1–11 K15-1001 10.18653/v1/K15-1001 @@ -38,9 +38,9 @@ A Supertag-Context Model for Weakly-Supervised <fixed-case>CCG</fixed-case> Parser Learning DanGarrette - ChrisDyer + ChrisDyer JasonBaldridge - Noah A.Smith + Noah A.Smith 22–31 K15-1003 10.18653/v1/K15-1003 @@ -60,7 +60,7 @@ <fixed-case>AIDA</fixed-case>2: A Hybrid Approach for Token and Sentence Level Dialect Identification in <fixed-case>A</fixed-case>rabic MohamedAl-Badrashiny HebaElfardy - MonaDiab + MonaDiab 42–51 K15-1005 10.18653/v1/K15-1005 @@ -78,9 +78,9 @@ Analyzing Optimization for Statistical Machine Translation: <fixed-case>MERT</fixed-case> Learns Verbosity, <fixed-case>PRO</fixed-case> Learns Length - FranciscoGuzmán - PreslavNakov - StephanVogel + FranciscoGuzmán + PreslavNakov + StephanVogel 62–72 K15-1007 10.18653/v1/K15-1007 @@ -102,7 +102,7 @@ LiyuanZhou WeiweiHou NathanSchneider - TimothyBaldwin + TimothyBaldwin 83–93 K15-1009 10.18653/v1/K15-1009 @@ -120,9 +120,9 @@ Cross-lingual syntactic variation over age and gender - AndersJohannsen + AndersJohannsen DirkHovy - AndersSøgaard + AndersSøgaard 103–112 K15-1011 10.18653/v1/K15-1011 @@ -130,8 +130,8 @@ Cross-lingual Transfer for Unsupervised Dependency Parsing Without Parallel Data - LongDuong - TrevorCohn + LongDuong + TrevorCohn StevenBird PaulCook 113–122 @@ -142,7 +142,7 @@ Detecting Semantically Equivalent Questions in Online User Forums DashaBogdanova - Cícerodos Santos + Cícerodos Santos LucianoBarbosa BiancaZadrozny 123–131 @@ -153,7 +153,7 @@ Entity Linking <fixed-case>K</fixed-case>orean Text: An Unsupervised Learning Approach using Semantic Relations YoungsikKim - Key-SunChoi + Key-SunChoi 132–141 K15-1014 10.18653/v1/K15-1014 @@ -162,7 +162,7 @@ Incremental Recurrent Neural Network Dependency Parser with Search-based Discriminative Training MajidYazdani - JamesHenderson + JamesHenderson 142–152 K15-1015 10.18653/v1/K15-1015 @@ -171,7 +171,7 @@ Instance Selection Improves Cross-Lingual Model Training for Fine-Grained Sentiment Analysis RomanKlinger - PhilippCimiano + PhilippCimiano 153–163 K15-1016 10.18653/v1/K15-1016 @@ -180,9 +180,9 @@ Labeled Morphological Segmentation with Semi-<fixed-case>M</fixed-case>arkov Models RyanCotterell - ThomasMüller - AlexanderFraser - HinrichSchütze + ThomasMüller + AlexanderFraser + HinrichSchütze 164–174 K15-1017 10.18653/v1/K15-1017 @@ -212,7 +212,7 @@ Making the Most of Crowdsourced Document Annotations: Confused Supervised <fixed-case>LDA</fixed-case> PaulFelt - EricRingger + EricRingger JordanBoyd-Graber KevinSeppi 194–203 @@ -223,7 +223,7 @@ Multichannel Variable-Size Convolution for Sentence Classification WenpengYin - HinrichSchütze + HinrichSchütze 204–214 K15-1021 10.18653/v1/K15-1021 @@ -241,7 +241,7 @@ Quantity, Contrast, and Convention in Cross-Situated Language Comprehension IanPerera - JamesAllen + JamesAllen 226–236 K15-1023 10.18653/v1/K15-1023 @@ -323,9 +323,9 @@ Deep Neural Language Models for Machine Translation - ThangLuong + ThangLuong MichaelKayser - Christopher D.Manning + Christopher D.Manning 305–309 K15-1031 10.18653/v1/K15-1031 @@ -335,7 +335,7 @@ Finding Opinion Manipulation Trolls in News Community Forums TodorMihaylov GeorgiGeorgiev - PreslavNakov + PreslavNakov 310–314 K15-1032 10.18653/v1/K15-1032 @@ -343,11 +343,11 @@ Do dependency parsing metrics correlate with human judgments? - BarbaraPlank - HéctorMartínez Alonso - ŽeljkoAgić + BarbaraPlank + HéctorMartínez Alonso + ŽeljkoAgić DanijelaMerkler - AndersSøgaard + AndersSøgaard 315–320 K15-1033 10.18653/v1/K15-1033 @@ -395,7 +395,7 @@ Reading behavior predicts syntactic categories MariaBarrett - AndersSøgaard + AndersSøgaard 345–349 K15-1038 10.18653/v1/K15-1038 @@ -421,7 +421,7 @@ The <fixed-case>C</fixed-case>o<fixed-case>NLL</fixed-case>-2015 Shared Task on Shallow Discourse Parsing NianwenXue Hwee TouNg - SameerPradhan + SameerPradhan RashmiPrasad ChristopherBryant AttapolRutherford @@ -432,8 +432,8 @@ A Refined End-to-End Discourse Parser - JianxiangWang - ManLan + JianxiangWang + ManLan 17–24 K15-2002 10.18653/v1/K15-2002 @@ -441,7 +441,7 @@ The <fixed-case>U</fixed-case>ni<fixed-case>TN</fixed-case> Discourse Parser in <fixed-case>C</fixed-case>o<fixed-case>NLL</fixed-case> 2015 Shared Task: Token-level Sequence Labeling with Argument-specific Models - EvgenyStepanov + EvgenyStepanov GiuseppeRiccardi Ali OrkanBayer 25–31 @@ -453,7 +453,7 @@ The <fixed-case>S</fixed-case>o<fixed-case>NLP</fixed-case>-<fixed-case>DP</fixed-case> System in the <fixed-case>C</fixed-case>o<fixed-case>NLL</fixed-case>-2015 shared Task FangKong ShengLi - GuodongZhou + GuodongZhou 32–36 K15-2004 10.18653/v1/K15-2004 @@ -480,12 +480,12 @@ A Hybrid Discourse Relation Parser in <fixed-case>C</fixed-case>o<fixed-case>NLL</fixed-case> 2015 - SobhaLalitha Devi + SobhaLalitha Devi SindhujaGopalan - LakshmiS. - PattabhiRK Rao - VijaySundar Ram - MalarkodiC.S. + LakshmiS. + PattabhiRK Rao + VijaySundar Ram + MalarkodiC.S. 50–55 K15-2007 10.18653/v1/K15-2007 @@ -506,7 +506,7 @@ ShubhamMukherjee AbhishekTiwari MohitGupta - AnilKumar Singh + AnilKumar Singh 61–65 K15-2009 10.18653/v1/K15-2009 @@ -515,8 +515,8 @@ <fixed-case>JAIST</fixed-case>: A two-phase machine learning approach for identifying discourse relations in newswire texts Truong SonNguyen - Bao QuocHo - Le MinhNguyen + Bao QuocHo + Le MinhNguyen 66–70 K15-2010 10.18653/v1/K15-2010 @@ -558,7 +558,7 @@ The <fixed-case>DCU</fixed-case> Discourse Parser for Connective, Argument Identification and Explicit Sense Classification LongyueWang - ChrisHokamp + ChrisHokamp TsuyoshiOkita XiaojunZhang QunLiu diff --git a/data/xml/K16.xml b/data/xml/K16.xml index 05196c4cbc..ab9aa2f0dc 100644 --- a/data/xml/K16.xml +++ b/data/xml/K16.xml @@ -27,7 +27,7 @@ Generating Sentences from a Continuous Space - Samuel R.Bowman + Samuel R.Bowman LukeVilnis OriolVinyals AndrewDai @@ -41,8 +41,8 @@ Identifying Temporal Orientation of Word Senses MohammedHasanuzzaman - GaëlDias - StéphaneFerrari + GaëlDias + StéphaneFerrari YannMathet AndyWay 22–30 @@ -85,7 +85,7 @@ Learning to Jointly Predict Ellipsis and Comparison Structures OmidBakhshandeh AlexisCornelia Wellwood - JamesAllen + JamesAllen 62–74 K16-1007 10.18653/v1/K16-1007 @@ -102,7 +102,7 @@ Beyond Centrality and Structural Features: Learning Information Importance for Text Summarization MarkusZopf - EneldoLoza Mencía + EneldoLoza Mencía JohannesFürnkranz 84–94 K16-1009 @@ -122,7 +122,7 @@ A Data-driven Investigation of Corrective Feedback on Subject Omission Errors in First Language Acquisition SarahHiller - RaquelFernández + RaquelFernández 105–114 K16-1011 10.18653/v1/K16-1011 @@ -132,7 +132,7 @@ Redefining part-of-speech classes with distributional semantic models AndreyKutuzov ErikVelldal - LiljaØvrelid + LiljaØvrelid 115–125 K16-1012 10.18653/v1/K16-1012 @@ -143,7 +143,7 @@ RebeccaKnowles AdithyaRenduchintala PhilippKoehn - JasonEisner + JasonEisner 126–135 K16-1013 10.18653/v1/K16-1013 @@ -162,8 +162,8 @@ Harnessing Sequence Labeling for Sarcasm Detection in Dialogue from <fixed-case>TV</fixed-case> Series ‘<fixed-case>F</fixed-case>riends’ AdityaJoshi VaibhavTripathi - PushpakBhattacharyya - Mark J.Carman + PushpakBhattacharyya + Mark J.Carman 146–155 K16-1015 10.18653/v1/K16-1015 @@ -175,7 +175,7 @@ DipteshKanojia SeemaNagar KuntalDey - PushpakBhattacharyya + PushpakBhattacharyya 156–166 K16-1016 10.18653/v1/K16-1016 @@ -184,10 +184,10 @@ Modelling Context with User Embeddings for Sarcasm Detection in Social Media SilvioAmir - Byron C.Wallace + Byron C.Wallace HaoLyu PaulaCarvalho - Mário J.Silva + Mário J.Silva 167–177 K16-1017 10.18653/v1/K16-1017 @@ -196,7 +196,7 @@ Learning when to trust distant supervision: An application to low-resource <fixed-case>POS</fixed-case> tagging using cross-lingual projection MengFang - TrevorCohn + TrevorCohn 178–186 K16-1018 10.18653/v1/K16-1018 @@ -206,8 +206,8 @@ Greedy, Joint Syntactic-Semantic Parsing with Stack <fixed-case>LSTM</fixed-case>s SwabhaSwayamdipta MiguelBallesteros - ChrisDyer - Noah A.Smith + ChrisDyer + Noah A.Smith 187–197 K16-1019 10.18653/v1/K16-1019 @@ -215,10 +215,10 @@ Beyond Prefix-Based Interactive Translation Prediction - JesúsGonzález-Rubio - DanielOrtiz-Martínez - FranciscoCasacuberta - José MiguelBenedi Ruiz + JesúsGonzález-Rubio + DanielOrtiz-Martínez + FranciscoCasacuberta + José MiguelBenedi Ruiz 198–207 K16-1020 10.18653/v1/K16-1020 @@ -226,9 +226,9 @@ Exploring Prediction Uncertainty in Machine Translation Quality Estimation - DanielBeck + DanielBeck LuciaSpecia - TrevorCohn + TrevorCohn 208–218 K16-1021 10.18653/v1/K16-1021 @@ -247,7 +247,7 @@ Coreference in <fixed-case>W</fixed-case>ikipedia: Main Concept Resolution AbbasGhaddar - PhillippeLanglais + PhillippeLanglais 229–238 K16-1023 10.18653/v1/K16-1023 @@ -290,8 +290,8 @@ Substring-based unsupervised transliteration with phonetic and contextual knowledge AnoopKunchukuttan - PushpakBhattacharyya - Mitesh M.Khapra + PushpakBhattacharyya + Mitesh M.Khapra 270–279 K16-1027 10.18653/v1/K16-1027 @@ -301,7 +301,7 @@ Abstractive Text Summarization using Sequence-to-sequence <fixed-case>RNN</fixed-case>s and Beyond RameshNallapati BowenZhou - Cicerodos Santos + Cicerodos Santos ÇağlarGu̇lçehre BingXiang 280–290 @@ -312,8 +312,8 @@ Compression of Neural Machine Translation Models via Pruning AbigailSee - Minh-ThangLuong - Christopher D.Manning + Minh-ThangLuong + Christopher D.Manning 291–301 K16-1029 10.18653/v1/K16-1029 @@ -324,7 +324,7 @@ FrancesYung KevinDuh TakuKomura - YujiMatsumoto + YujiMatsumoto 302–313 K16-1030 10.18653/v1/K16-1030 @@ -360,9 +360,9 @@ <fixed-case>C</fixed-case>o<fixed-case>NLL</fixed-case> 2016 Shared Task on Multilingual Shallow Discourse Parsing NianwenXue Hwee TouNg - SameerPradhan + SameerPradhan AttapolRutherford - BonnieWebber + BonnieWebber ChuanWang HongminWang 1–19 @@ -378,7 +378,7 @@ UladzimirSidarenka ManfredStede ErikVelldal - LiljaØvrelid + LiljaØvrelid 20–26 K16-2002 K16-2002.Presentation.pdf @@ -391,7 +391,7 @@ HaoranLi LongZhou JiajunZhang - ChengqingZong + ChengqingZong 27–32 K16-2003 10.18653/v1/K16-2003 @@ -399,8 +399,8 @@ Two End-to-end Shallow Discourse Parsers for <fixed-case>E</fixed-case>nglish and <fixed-case>C</fixed-case>hinese in <fixed-case>C</fixed-case>o<fixed-case>NLL</fixed-case>-2016 Shared Task - JianxiangWang - ManLan + JianxiangWang + ManLan 33–40 K16-2004 10.18653/v1/K16-2004 @@ -412,7 +412,7 @@ ChristianChiarcos KathrinDonandt SamuelRönnqvist - EvgenyStepanov + EvgenyStepanov GiuseppeRiccardi 41–49 K16-2005 @@ -453,9 +453,9 @@ <fixed-case>S</fixed-case>o<fixed-case>NLP</fixed-case>-<fixed-case>DP</fixed-case> System for <fixed-case>C</fixed-case>on<fixed-case>LL</fixed-case>-2016 <fixed-case>E</fixed-case>nglish Shallow Discourse Parsing FangKong ShengLi - JunhuiLi + JunhuiLi MuhuaZhu - GuodongZhou + GuodongZhou 65–69 K16-2009 10.18653/v1/K16-2009 @@ -473,11 +473,11 @@ <fixed-case>S</fixed-case>o<fixed-case>NLP</fixed-case>-<fixed-case>DP</fixed-case> System for <fixed-case>C</fixed-case>on<fixed-case>LL</fixed-case>-2016 <fixed-case>C</fixed-case>hinese Shallow Discourse Parsing - JunhuiLi + JunhuiLi FangKong ShengLi MuhuaZhu - GuodongZhou + GuodongZhou 78–84 K16-2011 10.18653/v1/K16-2011 @@ -485,7 +485,7 @@ <fixed-case>U</fixed-case>ni<fixed-case>TN</fixed-case> End-to-End Discourse Parser for <fixed-case>C</fixed-case>o<fixed-case>NLL</fixed-case> 2016 Shared Task - EvgenyStepanov + EvgenyStepanov GiuseppeRiccardi 85–91 K16-2012 @@ -515,7 +515,7 @@ <fixed-case>IIT</fixed-case> (<fixed-case>BHU</fixed-case>) Submission on the <fixed-case>C</fixed-case>o<fixed-case>NLL</fixed-case>-2016 Shared Task: Shallow Discourse Parsing using Semantic Lexicons ManpreetKaur NishuKumari - Anil KumarSingh + Anil KumarSingh RajeevSangal 108–114 K16-2015 @@ -547,7 +547,7 @@ Discourse Relation Sense Classification with Two-Step Classifiers YusukeKido - AkikoAizawa + AkikoAizawa 129–135 K16-2018 10.18653/v1/K16-2018 @@ -555,7 +555,7 @@ Adapting Event Embedding for Implicit Discourse Relation Recognition - Maria LeonorPacheco + Maria LeonorPacheco I-TaLee XiaoZhang Abdullah KhanZehady diff --git a/data/xml/K17.xml b/data/xml/K17.xml index 9a2129b259..c4510f2630 100644 --- a/data/xml/K17.xml +++ b/data/xml/K17.xml @@ -4,7 +4,7 @@ Proceedings of the 21st Conference on Computational Natural Language Learning (CoNLL 2017) K17-1 - RogerLevy + RogerLevy LuciaSpecia 10.18653/v1/K17-1 Association for Computational Linguistics @@ -19,7 +19,7 @@ Should Neural Network Architecture Reflect Linguistic Structure? - ChrisDyer + ChrisDyer 1 K17-1001 10.18653/v1/K17-1001 @@ -28,7 +28,7 @@ Rational Distortions of Learners’ Linguistic Input - NaomiFeldman + NaomiFeldman 2 K17-1002 10.18653/v1/K17-1002 @@ -53,7 +53,7 @@ IoannisKonstas LeilaZilles YejinChoi - Noah A.Smith + Noah A.Smith 15–25 K17-1004 10.18653/v1/K17-1004 @@ -62,7 +62,7 @@ Parsing for Grammatical Relations via Graph Merging - WeiweiSun + WeiweiSun YantaoDu XiaojunWan 26–35 @@ -75,7 +75,7 @@ Leveraging Eventive Information for Better Metaphor Detection and Classification I-HsuanChen YunfeiLong - QinLu + QinLu Chu-RenHuang 36–46 K17-1006 @@ -111,8 +111,8 @@ Tell Me Why: Using Question Answering as Distant Supervision for Answer Justification RebeccaSharp MihaiSurdeanu - PeterJansen - Marco A.Valenzuela-Escárcega + PeterJansen + Marco A.Valenzuela-Escárcega PeterClark MichaelHammond 69–79 @@ -138,8 +138,8 @@ HuadongChen ShujianHuang DavidChiang - XinyuDai - JiajunChen + XinyuDai + JiajunChen 90–99 K17-1011 10.18653/v1/K17-1011 @@ -149,7 +149,7 @@ Embedding Words and Senses Together via Joint Knowledge-Enhanced Training MassimilianoMancini - JoseCamacho-Collados + JoseCamacho-Collados IgnacioIacobacci RobertoNavigli 100–111 @@ -184,8 +184,8 @@ An Artificial Language Evaluation of Distributional Semantic Models - FatemehTorabi Asr - MichaelJones + FatemehTorabi Asr + MichaelJones 134–142 K17-1015 10.18653/v1/K17-1015 @@ -216,7 +216,7 @@ Feature Selection as Causal Inference: Experiments with Text Classification - Michael J.Paul + Michael J.Paul 163–172 K17-1018 10.18653/v1/K17-1018 @@ -236,8 +236,8 @@ Neural Sequence-to-sequence Learning of Internal Word Structure - TatyanaRuzsics - TanjaSamardžić + TatyanaRuzsics + TanjaSamardžić 184–194 K17-1020 10.18653/v1/K17-1020 @@ -246,7 +246,7 @@ A Supervised Approach to Extractive Summarisation of Scientific Papers - EdCollins + EdCollins IsabelleAugenstein SebastianRiedel 195–205 @@ -259,7 +259,7 @@ An Automatic Approach for Document-level Topic Model Evaluation ShraeyBhatia Jey HanLau - TimothyBaldwin + TimothyBaldwin 206–215 K17-1022 10.18653/v1/K17-1022 @@ -270,7 +270,7 @@ Robust Coreference Resolution and Entity Linking on Dialogues: Character Identification on <fixed-case>TV</fixed-case> Show Transcripts Henry Y.Chen EthanZhou - Jinho D.Choi + Jinho D.Choi 216–225 K17-1023 10.18653/v1/K17-1023 @@ -279,9 +279,9 @@ Cross-language Learning with Adversarial Neural Networks - ShafiqJoty - PreslavNakov - LluísMàrquez + ShafiqJoty + PreslavNakov + LluísMàrquez IsraaJaradat 226–237 K17-1024 @@ -293,7 +293,7 @@ Knowledge Tracing in Sequential Learning of Inflected Vocabulary AdithyaRenduchintala PhilippKoehn - JasonEisner + JasonEisner 238–247 K17-1025 10.18653/v1/K17-1025 @@ -304,7 +304,7 @@ A Probabilistic Generative Grammar for Semantic Parsing AbulhairSaparov VijaySaraswat - TomMitchell + TomMitchell 248–259 K17-1026 10.18653/v1/K17-1026 @@ -390,7 +390,7 @@ OmerLevy MinjoonSeo EunsolChoi - LukeZettlemoyer + LukeZettlemoyer 333–342 K17-1034 10.18653/v1/K17-1034 @@ -400,7 +400,7 @@ The Covert Helps Parse the Overt XunZhang - WeiweiSun + WeiweiSun XiaojunWan 343–353 K17-1035 @@ -413,7 +413,7 @@ DominikSchlechtweg StefanieEckmann EnricoSantus - SabineSchulte im Walde + SabineSchulte im Walde DanielHole 354–367 K17-1036 @@ -426,7 +426,7 @@ Encoding of phonology in a recurrent neural model of grounded speech AfraAlishahi MarieBarking - GrzegorzChrupała + GrzegorzChrupała 368–378 K17-1037 10.18653/v1/K17-1037 @@ -436,11 +436,11 @@ Multilingual Semantic Parsing And Code-Switching - LongDuong + LongDuong HadiAfshar - DominiqueEstival + DominiqueEstival GlenPink - PhilipCohen + PhilipCohen MarkJohnson 379–389 K17-1038 @@ -483,7 +483,7 @@ Joint Prediction of Morphosyntactic Categories for Fine-Grained <fixed-case>A</fixed-case>rabic Part-of-Speech Tagging Exploiting Tag Dictionary Information GoInoue HiroyukiShindo - YujiMatsumoto + YujiMatsumoto 421–431 K17-1042 10.18653/v1/K17-1042 @@ -508,7 +508,7 @@ Natural Language Generation for Spoken Dialogue System using <fixed-case>RNN</fixed-case> Encoder-Decoder Networks Van-KhanhTran - Le-MinhNguyen + Le-MinhNguyen 442–451 K17-1044 10.18653/v1/K17-1044 @@ -522,7 +522,7 @@ KshitijhMeelu AyushPareek KrishnanSrinivasan - DragomirRadev + DragomirRadev 452–462 K17-1045 10.18653/v1/K17-1045 @@ -555,9 +555,9 @@ EkaterinaVylomova PatrickXia ManaalFaruqui - SandraKübler + SandraKübler DavidYarowsky - JasonEisner + JasonEisner MansHulden 1–30 K17-2001 @@ -567,9 +567,9 @@ Training Data Augmentation for Low-Resource Morphological Inflection TomsBergmanis - KatharinaKann - HinrichSchütze - SharonGoldwater + KatharinaKann + HinrichSchütze + SharonGoldwater 31–39 K17-2002 10.18653/v1/K17-2002 @@ -577,8 +577,8 @@ The <fixed-case>LMU</fixed-case> System for the <fixed-case>C</fixed-case>o<fixed-case>NLL</fixed-case>-<fixed-case>SIGMORPHON</fixed-case> 2017 Shared Task on Universal Morphological Reinflection - KatharinaKann - HinrichSchütze + KatharinaKann + HinrichSchütze 40–48 K17-2003 10.18653/v1/K17-2003 @@ -587,7 +587,7 @@ Align and Copy: <fixed-case>UZH</fixed-case> at <fixed-case>SIGMORPHON</fixed-case> 2017 Shared Task for Morphological Reinflection PeterMakarov - TatianaRuzsics + TatianaRuzsics SimonClematide 49–57 K17-2004 @@ -615,7 +615,7 @@ Experiments on Morphological Reinflection: <fixed-case>C</fixed-case>o<fixed-case>NLL</fixed-case>-2017 Shared Task AkhileshSudhakar - Anil KumarSingh + Anil KumarSingh 71–78 K17-2007 10.18653/v1/K17-2007 @@ -645,10 +645,10 @@ Data Augmentation for Morphological Reinflection - MiikkaSilfverberg + MiikkaSilfverberg AdamWiemerslage LingLiu - Lingshuang JackMao + Lingshuang JackMao 90–99 K17-2010 10.18653/v1/K17-2010 @@ -657,7 +657,7 @@ Seq2seq for Morphological Reinflection: When Deep Learning Fails HajimeSenuma - AkikoAizawa + AkikoAizawa 100–109 K17-2011 10.18653/v1/K17-2011 @@ -677,8 +677,8 @@ Proceedings of the CoNLL 2017 Shared Task: Multilingual Parsing from Raw Text to Universal Dependencies K17-3 - JanHajič - DanZeman + JanHajič + DanZeman 10.18653/v1/K17-3 Association for Computational Linguistics
Vancouver, Canada
@@ -702,31 +702,31 @@ SampoPyysalo SlavPetrov MartinPotthast - FrancisTyers + FrancisTyers ElenaBadmaeva - MemduhGokirmak + MemduhGokirmak AnnaNedoluzhko SilvieCinková - JanHajič jr. + JanHajič jr. JaroslavaHlaváčová VáclavaKettnerová - ZdeňkaUrešová + ZdeňkaUrešová JennaKanerva StinaOjala AnnaMissilä - Christopher D.Manning + Christopher D.Manning SebastianSchuster SivaReddy DimaTaji NizarHabash HermanLeung - Marie-Catherinede Marneffe + Marie-Catherinede Marneffe ManuelaSanguinetti MariaSimi HiroshiKanayama - Valeriade Paiva + Valeriade Paiva KiraDroganova - HéctorMartínez Alonso + HéctorMartínez Alonso ÇağrıÇöltekin UmutSulubacak HansUszkoreit @@ -741,16 +741,16 @@ ZhuoranYu EmilyPitler SaranLertpradit - MichaelMandl + MichaelMandl JesseKirchner Hector FernandezAlcalde JanaStrnadová EshaBanerjee - RuliManurung + RuliManurung AntonioStella AtsukoShimada SookyoungKwak - GustavoMendonça + GustavoMendonça TatianaLando RattimaNitisaroj JosieLi @@ -766,7 +766,7 @@ <fixed-case>S</fixed-case>tanford’s Graph-based Neural Dependency Parser at the <fixed-case>C</fixed-case>o<fixed-case>NLL</fixed-case> 2017 Shared Task TimothyDozat PengQi - Christopher D.Manning + Christopher D.Manning 20–30 K17-3002 10.18653/v1/K17-3002 @@ -788,7 +788,7 @@ <fixed-case>IMS</fixed-case> at the <fixed-case>C</fixed-case>o<fixed-case>NLL</fixed-case> 2017 <fixed-case>UD</fixed-case> Shared Task: <fixed-case>CRF</fixed-case>s and Perceptrons Meet Neural Networks AndersBjörkelund - AgnieszkaFalenska + AgnieszkaFalenska XiangYu JonasKuhn 40–51 @@ -804,7 +804,7 @@ YuxuanWang BoZheng HuaipengZhao - YangLiu + YangLiu DechuanTeng TingLiu 52–62 @@ -826,7 +826,7 @@ A System for Multilingual Dependency Parsing based on Bidirectional <fixed-case>LSTM</fixed-case> Feature Representations - KyungTaeLim + KyungTaeLim ThierryPoibeau 63–70 K17-3006 @@ -839,7 +839,7 @@ MotokiSato HitoshiManabe HiroshiNoji - YujiMatsumoto + YujiMatsumoto 71–79 K17-3007 10.18653/v1/K17-3007 @@ -849,7 +849,7 @@ Parsing with Context Embeddings ÖmerKırnap - Berkay FurkanÖnder + Berkay FurkanÖnder DenizYuret 80–87 K17-3008 @@ -906,7 +906,7 @@ KuanYu PavelSofroniev ErikSchill - ErhardHinrichs + ErhardHinrichs 126–133 K17-3013 10.18653/v1/K17-3013 @@ -957,9 +957,9 @@ <fixed-case>RACAI</fixed-case>’s Natural Language Processing pipeline for <fixed-case>U</fixed-case>niversal <fixed-case>D</fixed-case>ependencies - Stefan DanielDumitrescu - TiberiuBoros - DanTufis + Stefan DanielDumitrescu + TiberiuBoros + DanTufis 174–181 K17-3018 10.18653/v1/K17-3018 @@ -1021,7 +1021,7 @@ Initial Explorations of <fixed-case>CCG</fixed-case> Supertagging for <fixed-case>U</fixed-case>niversal <fixed-case>D</fixed-case>ependency Parsing Burak KerimAkkus HevalAzizoglu - RuketCakici + RuketCakici 218–227 K17-3023 10.18653/v1/K17-3023 @@ -1032,7 +1032,7 @@ <fixed-case>CLCL</fixed-case> (Geneva) <fixed-case>DINN</fixed-case> Parser: a Neural Network Dependency Parser Ten Years Later ChristopheMoor PaolaMerlo - JamesHenderson + JamesHenderson HaozhouWang 228–236 K17-3024 @@ -1044,7 +1044,7 @@ A Fast and Lightweight System for Multilingual Dependency Parsing TaoJi YuanbinWu - ManLan + ManLan 237–242 K17-3025 10.18653/v1/K17-3025 @@ -1053,9 +1053,9 @@ The <fixed-case>P</fixed-case>aris<fixed-case>NLP</fixed-case> entry at the <fixed-case>C</fixed-case>on<fixed-case>LL</fixed-case> <fixed-case>UD</fixed-case> Shared Task 2017: A Tale of a #<fixed-case>P</fixed-case>arsing<fixed-case>T</fixed-case>ragedy - Éricde La Clergerie - BenoîtSagot - DjaméSeddah + Éricde La Clergerie + BenoîtSagot + DjaméSeddah 243–252 K17-3026 10.18653/v1/K17-3026 diff --git a/data/xml/K18.xml b/data/xml/K18.xml index 0ebc70288e..1ffd35af61 100644 --- a/data/xml/K18.xml +++ b/data/xml/K18.xml @@ -43,7 +43,7 @@ Dual Latent Variable Model for Low-Resource Natural Language Generation in Dialogue Systems Van-KhanhTran - Le-MinhNguyen + Le-MinhNguyen 21–30 K18-1003 Recent deep learning models have shown improving results to natural language generation (NLG) irrespective of providing sufficient annotated data. However, a modest training data may harm such models’ performance. Thus, how to build a generator that can utilize as much of knowledge from a low-resource setting data is a crucial issue in NLG. This paper presents a variational neural-based generation model to tackle the NLG problem of having limited labeled dataset, in which we integrate a variational inference into an encoder-decoder generator and introduce a novel auxiliary auto-encoding with an effective training procedure. Experiments showed that the proposed methods not only outperform the previous models when having sufficient training dataset but also demonstrate strong ability to work acceptably well when the training data is scarce. @@ -53,7 +53,7 @@ A Trio Neural Model for Dynamic Entity Relatedness Ranking TuNguyen - TuanTran + TuanTran WolfgangNejdl 31–41 K18-1004 @@ -65,7 +65,7 @@ A Unified Neural Network Model for Geolocating <fixed-case>T</fixed-case>witter Users MohammadEbrahimi ElahehShafieiBavani - RaymondWong + RaymondWong FangChen 42–53 K18-1005 @@ -97,7 +97,7 @@ From Strings to Other Things: Linking the Neighborhood and Transposition Effects in Word Reading StéphanTulkens DominiekSandra - WalterDaelemans + WalterDaelemans 75–85 K18-1008 We investigate the relation between the transposition and deletion effects in word reading, i.e., the finding that readers can successfully read “SLAT” as “SALT”, or “WRK” as “WORK”, and the neighborhood effect. In particular, we investigate whether lexical orthographic neighborhoods take into account transposition and deletion in determining neighbors. If this is the case, it is more likely that the neighborhood effect takes place early during processing, and does not solely rely on similarity of internal representations. We introduce a new neighborhood measure, rd20, which can be used to quantify neighborhood effects over arbitrary feature spaces. We calculate the rd20 over large sets of words in three languages using various feature sets and show that feature sets that do not allow for transposition or deletion explain more variance in Reaction Time (RT) measurements. We also show that the rd20 can be calculated using the hidden state representations of an Multi-Layer Perceptron, and show that these explain less variance than the raw features. We conclude that the neighborhood effect is unlikely to have a perceptual basis, but is more likely to be the result of items co-activating after recognition. All code is available at: www.github.com/clips/conll2018 @@ -119,7 +119,7 @@ Pervasive Attention: 2<fixed-case>D</fixed-case> Convolutional Neural Networks for Sequence-to-Sequence Prediction MahaElbayad - LaurentBesacier + LaurentBesacier JakobVerbeek 97–107 K18-1010 @@ -142,10 +142,10 @@ Uncovering Code-Mixed Challenges: A Framework for Linguistically Driven Question Generation and Neural Based Question Answering - DeepakGupta + DeepakGupta PabitraLenka AsifEkbal - PushpakBhattacharyya + PushpakBhattacharyya 119–130 K18-1012 Existing research on question answering (QA) and comprehension reading (RC) are mainly focused on the resource-rich language like English. In recent times, the rapid growth of multi-lingual web content has posed several challenges to the existing QA systems. Code-mixing is one such challenge that makes the task more complex. In this paper, we propose a linguistically motivated technique for code-mixed question generation (CMQG) and a neural network based architecture for code-mixed question answering (CMQA). For evaluation, we manually create the code-mixed questions for Hindi-English language pair. In order to show the effectiveness of our neural network based CMQA technique, we utilize two benchmark datasets, SQuAD and MMQA. Experiments show that our proposed model achieves encouraging performance on CMQG and CMQA. @@ -154,7 +154,7 @@ Learning to Embed Semantic Correspondence for Natural Language Understanding - SangkeunJung + SangkeunJung JinsikLee JiwonKim 131–140 @@ -177,8 +177,8 @@ Active Learning for Interactive Neural Machine Translation of Data Streams - ÁlvaroPeris - FranciscoCasacuberta + ÁlvaroPeris + FranciscoCasacuberta 151–160 K18-1015 We study the application of active learning techniques to the translation of unbounded data streams via interactive neural machine translation. The main idea is to select, from an unbounded stream of source sentences, those worth to be supervised by a human agent. The user will interactively translate those samples. Once validated, these data is useful for adapting the neural machine translation model. We propose two novel methods for selecting the samples to be validated. We exploit the information from the attention mechanism of a neural machine translation system. Our experiments show that the inclusion of active learning techniques into this pipeline allows to reduce the effort required during the process, while increasing the quality of the translation system. Moreover, it enables to balance the human effort required for achieving a certain translation quality. Moreover, our neural system outperforms classical approaches by a large margin. @@ -193,7 +193,7 @@ RobertWest AndreeaHossmann MichaelBaeriswyl - ClaudiuMusat + ClaudiuMusat 161–170 K18-1016 We propose a new method to detect when users express the intent to leave a service, also known as churn. While previous work focuses solely on social media, we show that this intent can be detected in chatbot conversations. As companies increasingly rely on chatbots they need an overview of potentially churny users. To this end, we crowdsource and publish a dataset of churn intent expressions in chatbot interactions in German and English. We show that classifiers trained on social media data can detect the same intent in the context of chatbots. We introduce a classification architecture that outperforms existing work on churn intent detection in social media. Moreover, we show that, using bilingual word embeddings, a system trained on combined English and German data outperforms monolingual approaches. As the only existing dataset is in English, we crowdsource and publish a novel dataset of German tweets. We thus underline the universal aspect of the problem, as examples of churn intent in English help us identify churn in German tweets and chatbot conversations. @@ -203,8 +203,8 @@ Learning Text Representations for 500<fixed-case>K</fixed-case> Classification Tasks on Named Entity Disambiguation AnderBarrena - AitorSoroa - EnekoAgirre + AitorSoroa + EnekoAgirre 171–180 K18-1017 Named Entity Disambiguation algorithms typically learn a single model for all target entities. In this paper we present a word expert model and train separate deep learning models for each target entity string, yielding 500K classification tasks. This gives us the opportunity to benchmark popular text representation alternatives on this massive dataset. In order to face scarce training data we propose a simple data-augmentation technique and transfer-learning. We show that bag-of-word-embeddings are better than LSTMs for tasks with scarce training data, while the situation is reversed when having larger amounts. Transferring a LSTM which is learned on all datasets is the most effective context representation option for the word experts in all frequency bands. The experiments show that our system trained on out-of-domain Wikipedia data surpass comparable NED systems which have been trained on in-domain training data. @@ -214,7 +214,7 @@ Hierarchical Attention Based Position-Aware Network for Aspect-Level Sentiment Analysis LishuangLi - YangLiu + YangLiu AnQiaoZhou 181–189 K18-1018 @@ -250,7 +250,7 @@ YovaKementchedjhieva SebastianRuder RyanCotterell - AndersSøgaard + AndersSøgaard 211–220 K18-1021 Most recent approaches to bilingual dictionary induction find a linear alignment between the word vector spaces of two languages. We show that projecting the two languages onto a third, latent space, rather than directly onto each other, while equivalent in terms of expressivity, makes it easier to learn approximate alignments. Our modified approach also allows for supporting languages to be included in the alignment process, to obtain an even better performance in low resource settings. @@ -260,7 +260,7 @@ Simple Unsupervised Keyphrase Extraction using Sentence Embeddings KamilBennani-Smires - ClaudiuMusat + ClaudiuMusat AndreeaHossmann MichaelBaeriswyl MartinJaggi @@ -330,9 +330,9 @@ Uncovering Divergent Linguistic Information in Word Embeddings with Lessons for Intrinsic and Extrinsic Evaluation MikelArtetxe - GorkaLabaka + GorkaLabaka IñigoLopez-Gazpio - EnekoAgirre + EnekoAgirre 282–291 K18-1028 Following the recent success of word embeddings, it has been argued that there is no such thing as an ideal representation for words, as different models tend to capture divergent and often mutually incompatible aspects like semantics/syntax and similarity/relatedness. In this paper, we show that each embedding model captures more information than directly apparent. A linear transformation that adjusts the similarity order of the model without any external resource can tailor it to achieve better results in those aspects, providing a new perspective on how embeddings encode divergent linguistic information. In addition, we explore the relation between intrinsic and extrinsic evaluation, as the effect of our transformations in downstream tasks is higher for unsupervised systems than for supervised ones. @@ -344,7 +344,7 @@ Judy HanwenShen MatthiasHofer BjarkeFelbo - RogerLevy + RogerLevy 292–301 K18-1029 Simple reference games are of central theoretical and empirical importance in the study of situated language use. Although language provides rich, compositional truth-conditional semantics to facilitate reference, speakers and listeners may sometimes lack the overall lexical and cognitive resources to guarantee successful reference through these means alone. However, language also has rich associational structures that can serve as a further resource for achieving successful reference. Here we investigate this use of associational information in a setting where only associational information is available: a simplified version of the popular game Codenames. Using optimal experiment design techniques, we compare a range of models varying in the type of associative information deployed and in level of pragmatic sophistication against human behavior. In this setting we find that listeners’ behavior reflects direct bigram collocational associations more strongly than word-embedding or semantic knowledge graph-based associations and that there is little evidence for pragmatically sophisticated behavior on the part of either speakers or listeners. More generally, we demonstrate the effective use of simple tasks to derive insights into the nature of complex linguistic phenomena. @@ -357,7 +357,7 @@ JoachimBingel NoraHollenstein MarekRei - AndersSøgaard + AndersSøgaard 302–312 K18-1030 Learning attention functions requires large volumes of data, but many NLP tasks simulate human behavior, and in this paper, we show that human attention really does provide a good inductive bias on many attention functions in NLP. Specifically, we use estimated human attention derived from eye-tracking corpora to regularize attention functions in recurrent neural networks. We show substantial improvements across a range of tasks, including sentiment analysis, grammatical error detection, and detection of abusive language. @@ -366,7 +366,7 @@ Sentence-Level Fluency Evaluation: References Help, But Can Be Spared! - KatharinaKann + KatharinaKann SaschaRothe KatjaFilippova 313–323 @@ -391,7 +391,7 @@ Learning to Actively Learn Neural Machine Translation MingLiu WrayBuntine - GholamrezaHaffari + GholamrezaHaffari 334–344 K18-1033 Traditional active learning (AL) methods for machine translation (MT) rely on heuristics. However, these heuristics are limited when the characteristics of the MT problem change due to e.g. the language pair or the amount of the initial bitext. In this paper, we present a framework to learn sentence selection strategies for neural MT. We train the AL query strategy using a high-resource language-pair based on AL simulations, and then transfer it to the low-resource language-pair of interest. The learned query strategy capitalizes on the shared characteristics between the language pairs to make an effective use of the AL budget. Our experiments on three language-pairs confirms that our method is more effective than strong heuristic-based methods in various conditions, including cold-start and warm-start as well as small and extremely small data conditions. @@ -402,7 +402,7 @@ Upcycle Your <fixed-case>OCR</fixed-case>: Reusing <fixed-case>OCR</fixed-case>s for Post-<fixed-case>OCR</fixed-case> Text Correction in <fixed-case>R</fixed-case>omanised <fixed-case>S</fixed-case>anskrit AmrithKrishna Bodhisattwa P.Majumder - RajeshBhat + RajeshBhat PawanGoyal 345–355 K18-1034 @@ -432,7 +432,7 @@ Evolutionary Data Measures: Understanding the Difficulty of Text Classification Tasks - EdwardCollins + EdwardCollins NikolaiRozanov BingbingZhang 380–391 @@ -456,7 +456,7 @@ ÁkosKádár DesmondElliott Marc-AlexandreCôté - GrzegorzChrupała + GrzegorzChrupała AfraAlishahi 402–412 K18-1039 @@ -478,7 +478,7 @@ Resources to Examine the Quality of Word Embedding Models Trained on n-Gram Data ÁbelElekes AdrianEnglhardt - MartinSchäler + MartinSchäler KlemensBöhm 423–432 K18-1041 @@ -511,7 +511,7 @@ Challenge or Empower: Revisiting Argumentation Quality in a News Editorial Corpus RoxanneEl Baff HenningWachsmuth - KhalidAl-Khatib + KhalidAl-Khatib BennoStein 454–464 K18-1044 @@ -521,7 +521,7 @@ Bringing Order to Neural Word Embeddings with Embeddings Augmented by Random Permutations (<fixed-case>EARP</fixed-case>) - TrevorCohen + TrevorCohen DominicWiddows 465–475 K18-1045 @@ -535,7 +535,7 @@ ShuyanZhou JingLiu JinpengWang - Chin-YewLin + Chin-YewLin RongPan 476–485 K18-1046 @@ -569,7 +569,7 @@ The Lifted Matrix-Space Model for Semantic Composition WooJinChung Sheng-FuWang - SamuelBowman + SamuelBowman 508–518 K18-1049 Tree-structured neural network architectures for sentence encoding draw inspiration from the approach to semantic composition generally seen in formal linguistics, and have shown empirical improvements over comparable sequence models by doing so. Moreover, adding multiplicative interaction terms to the composition functions in these models can yield significant further improvements. However, existing compositional approaches that adopt such a powerful composition function scale poorly, with parameter counts exploding as model dimension or vocabulary size grows. We introduce the Lifted Matrix-Space model, which uses a global transformation to map vector word embeddings to matrices, which can then be composed via an operation based on matrix-matrix multiplication. Its composition function effectively transmits a larger number of activations across layers with relatively few model parameters. We evaluate our model on the Stanford NLI corpus, the Multi-Genre NLI corpus, and the Stanford Sentiment Treebank and find that it consistently outperforms TreeLSTM (Tai et al., 2015), the previous best known composition function for tree-structured models. @@ -624,7 +624,7 @@ ShengHuang FangWang JunjieCao - WeiweiSun + WeiweiSun XiaojunWan 562–572 K18-1054 @@ -648,7 +648,7 @@ Sequence to Sequence Mixture Model for Diverse Machine Translation XuanliHe - GholamrezaHaffari + GholamrezaHaffari MohammadNorouzi 583–592 K18-1056 @@ -661,8 +661,8 @@ Proceedings of the CoNLL 2018 Shared Task: Multilingual Parsing from Raw Text to Universal Dependencies K18-2 - DanielZeman - JanHajič + DanielZeman + JanHajič Association for Computational Linguistics
Brussels, Belgium
October @@ -695,7 +695,7 @@ The 2018 Shared Task on Extrinsic Parser Evaluation: On the Downstream Utility of <fixed-case>E</fixed-case>nglish <fixed-case>U</fixed-case>niversal <fixed-case>D</fixed-case>ependency Parsers MurhafFares StephanOepen - LiljaØvrelid + LiljaØvrelid JariBjörne RichardJohansson 22–33 @@ -763,7 +763,7 @@ An Improved Neural Network Model for Joint <fixed-case>POS</fixed-case> Tagging and Dependency Parsing Dat QuocNguyen - KarinVerspoor + KarinVerspoor 81–91 K18-2008 We propose a novel neural network model for joint part-of-speech (POS) tagging and dependency parsing. Our model extends the well-known BIST graph-based dependency parser (Kiperwasser and Goldberg, 2016) by incorporating a BiLSTM-based tagging component to produce automatically predicted POS tags for the parser. On the benchmark English Penn treebank, our model obtains strong UAS and LAS scores at 94.51% and 92.87%, respectively, producing 1.5+% absolute improvements to the BIST graph-based parser, and also obtaining a state-of-the-art POS tagging accuracy at 97.97%. Furthermore, experimental results on parsing 61 “big” Universal Dependencies treebanks from raw texts show that our model outperforms the baseline UDPipe (Straka and Strakova, 2017) with 0.8% higher average POS tagging score and 3.6% higher average LAS score. In addition, with our model, we also obtain state-of-the-art downstream task scores for biomedical event extraction and opinion analysis applications. Our code is available together with all pre-trained models at: https://github.com/datquocnguyen/jPTDP @@ -837,7 +837,7 @@ <fixed-case>SE</fixed-case>x <fixed-case>B</fixed-case>i<fixed-case>ST</fixed-case>: A Multi-Source Trainable Parser with Deep Contextualized Lexical Representations - KyungTaeLim + KyungTaeLim CheoneumPark ChangkiLee ThierryPoibeau @@ -849,9 +849,9 @@ The <fixed-case>SLT</fixed-case>-Interactions Parsing System at the <fixed-case>C</fixed-case>o<fixed-case>NLL</fixed-case> 2018 Shared Task - Riyaz A.Bhat - IrshadBhat - SrinivasBangalore + Riyaz A.Bhat + IrshadBhat + SrinivasBangalore 153–159 K18-2015 This paper describes our system (SLT-Interactions) for the CoNLL 2018 shared task: Multilingual Parsing from Raw Text to Universal Dependencies. Our system performs three main tasks: word segmentation (only for few treebanks), POS tagging and parsing. While segmentation is learned separately, we use neural stacking for joint learning of POS tagging and parsing tasks. For all the tasks, we employ simple neural network architectures that rely on long short-term memory (LSTM) networks for learning task-dependent features. At the basis of our parser, we use an arc-standard algorithm with Swap action for general non-projective parsing. Additionally, we use neural stacking as a knowledge transfer mechanism for cross-domain parsing of low resource domains. Our system shows substantial gains against the UDPipe baseline, with an average improvement of 4.18% in LAS across all languages. Overall, we are placed at the 12th position on the official test sets. @@ -863,7 +863,7 @@ PengQi TimothyDozat YuhaoZhang - Christopher D.Manning + Christopher D.Manning 160–170 K18-2016 This paper describes Stanford’s system at the CoNLL 2018 UD Shared Task. We introduce a complete neural pipeline system that takes raw text as input, and performs all tasks required by the shared task, ranging from tokenization and sentence segmentation, to POS tagging and dependency parsing. Our single system submission achieved very competitive performance on big treebanks. Moreover, after fixing an unfortunate bug, our corrected system would have placed the 2nd, 1st, and 3rd on the official evaluation metrics LAS, MLAS, and BLEX, and would have outperformed all submission systems on low-resource treebank categories on all metrics by a large margin. We further show the effectiveness of different model components through extensive ablation studies. @@ -872,8 +872,8 @@ <fixed-case>NLP</fixed-case>-Cube: End-to-End Raw Text Processing With Neural Networks - TiberiuBoros - Stefan DanielDumitrescu + TiberiuBoros + Stefan DanielDumitrescu RuxandraBurtica 171–179 K18-2017 @@ -924,7 +924,7 @@ <fixed-case>SP</fixed-case>arse: <fixed-case>K</fixed-case>oç <fixed-case>U</fixed-case>niversity Graph-Based Parsing System for the <fixed-case>C</fixed-case>o<fixed-case>NLL</fixed-case> 2018 Shared Task - BerkayÖnder + BerkayÖnder CanGümeli DenizYuret 216–222 @@ -939,9 +939,9 @@ BenjaminMuller AmalFethi LouisMartin - ÉricVillemonte de la Clergerie - BenoîtSagot - DjaméSeddah + ÉricVillemonte de la Clergerie + BenoîtSagot + DjaméSeddah 223–237 K18-2023 In this paper, we present the details of the neural dependency parser and the neural tagger submitted by our team ‘ParisNLP’ to the CoNLL 2018 Shared Task on parsing from raw text to Universal Dependencies. We augment the deep Biaffine (BiAF) parser (Dozat and Manning, 2016) with novel features to perform competitively: we utilize an indomain version of ELMo features (Peters et al., 2018) which provide context-dependent word representations; we utilize disambiguated, embedded, morphosyntactic features from lexicons (Sagot, 2018), which complements the existing feature set. Henceforth, we call our system ‘ELMoLex’. In addition to incorporating character embeddings, ELMoLex benefits from pre-trained word vectors, ELMo and morphosyntactic features (whenever available) to correctly handle rare or unknown words which are prevalent in languages with complex morphology. ELMoLex ranked 11th by Labeled Attachment Score metric (70.64%), Morphology-aware LAS metric (55.74%) and ranked 9th by Bilexical dependency metric (60.70%). @@ -951,7 +951,7 @@ A Morphology-Based Representation Model for <fixed-case>LSTM</fixed-case>-Based Dependency Parsing of Agglutinative Languages Şaziye BetülÖzateş - ArzucanÖzgür + ArzucanÖzgür TungaGüngör BalkızÖztürk 238–247 @@ -966,7 +966,7 @@ YufangLiu YijunWang YuanbinWu - ManLan + ManLan 248–255 K18-2025 We describe the graph-based dependency parser in our system (AntNLP) submitted to the CoNLL 2018 UD Shared Task. We use bidirectional lstm to get the word representation, then a bi-affine pointer networks to compute scores of candidate dependency edges and the MST algorithm to get the final dependency tree. From the official testing results, our system gets 70.90 LAS F1 score (rank 9/26), 55.92 MLAS (10/26) and 60.91 BLEX (8/26). @@ -1009,13 +1009,13 @@ JohnSylak-Glassman GéraldineWalther EkaterinaVylomova - Arya D.McCarthy - KatharinaKann - Sabrina J.Mielke + Arya D.McCarthy + KatharinaKann + Sabrina J.Mielke GarrettNicolai - MiikkaSilfverberg + MiikkaSilfverberg DavidYarowsky - JasonEisner + JasonEisner MansHulden 1–27 K18-3001 @@ -1055,7 +1055,7 @@ Experiments on Morphological Reinflection: <fixed-case>C</fixed-case>o<fixed-case>NLL</fixed-case>-2018 Shared Task RishabhJain - Anil KumarSingh + Anil KumarSingh 48–57 K18-3005 10.18653/v1/K18-3005 @@ -1063,7 +1063,7 @@ The <fixed-case>NYU</fixed-case> System for the <fixed-case>C</fixed-case>o<fixed-case>NLL</fixed-case>–<fixed-case>SIGMORPHON</fixed-case> 2018 Shared Task on Universal Morphological Reinflection - KatharinaKann + KatharinaKann StanislasLauly KyunghyunCho 58–63 @@ -1073,8 +1073,8 @@ Attention-free encoder decoder for morphological processing - Stefan DanielDumitrescu - TiberiuBoros + Stefan DanielDumitrescu + TiberiuBoros 64–68 K18-3007 10.18653/v1/K18-3007 @@ -1106,7 +1106,7 @@ IlamvazhuthySubbiah AdamWiemerslage JonathanLilley - SarahMoeller + SarahMoeller 86–92 K18-3010 10.18653/v1/K18-3010 @@ -1134,7 +1134,7 @@ <fixed-case>IIT</fixed-case>(<fixed-case>BHU</fixed-case>)–<fixed-case>IIITH</fixed-case> at <fixed-case>C</fixed-case>o<fixed-case>NLL</fixed-case>–<fixed-case>SIGMORPHON</fixed-case> 2018 Shared Task on Universal Morphological Reinflection AbhishekSharma GaneshKatrapati - Dipti MisraSharma + Dipti MisraSharma 105–111 K18-3013 10.18653/v1/K18-3013 diff --git a/data/xml/K19.xml b/data/xml/K19.xml index 6417992892..9ff90a7015 100644 --- a/data/xml/K19.xml +++ b/data/xml/K19.xml @@ -51,7 +51,7 @@ Investigating Cross-Lingual Alignment Methods for Contextualized Embeddings with Token-Level Evaluation QianchuLiu - DianaMcCarthy + DianaMcCarthy IvanVulić AnnaKorhonen 33–43 @@ -87,7 +87,7 @@ Using Priming to Uncover the Organization of Syntactic Representations in Neural Language Models GrushaPrasad - Martenvan Schijndel + Martenvan Schijndel TalLinzen 66–76 Neural language models (LMs) perform well on tasks that require sensitivity to syntactic structure. Drawing on the syntactic priming paradigm from psycholinguistics, we propose a novel technique to analyze the representations that enable such success. By establishing a gradient similarity metric between structures, this technique allows us to reconstruct the organization of the LMs’ syntactic representational space. We use this technique to demonstrate that LSTM LMs’ representations of different types of sentences with relative clauses are organized hierarchically in a linguistically interpretable manner, suggesting that the LMs track abstract properties of the sentence. @@ -127,7 +127,7 @@ MahmoudAzab NoriyukiKojima JiaDeng - RadaMihalcea + RadaMihalcea 99–109 We introduce a new embedding model to represent movie characters and their interactions in a dialogue by encoding in the same representation the language used by these characters as well as information about the other participants in the dialogue. We evaluate the performance of these new character embeddings on two tasks: (1) character relatedness, using a dataset we introduce consisting of a dense character interaction matrix for 4,378 unique character pairs over 22 hours of dialogue from eighteen movies; and (2) character relation classification, for fine- and coarse-grained relations, as well as sentiment relations. Our experiments show that our model significantly outperforms the traditional Word2Vec continuous bag-of-words and skip-gram models, demonstrating the effectiveness of the character embeddings we introduce. We further show how these embeddings can be used in conjunction with a visual question answering system to improve over previous results. K19-1010 @@ -153,7 +153,7 @@ AdelineWong CyrilAllauzen FrançoiseBeaufays - MichaelRiley + MichaelRiley 121–130 We propose algorithms to train production-quality n-gram language models using federated learning. Federated learning is a distributed computation platform that can be used to train global models for portable devices such as smart phones. Federated learning is especially relevant for applications handling privacy-sensitive data, such as virtual keyboards, because training is performed without the users’ data ever leaving their devices. While the principles of federated learning are fairly generic, its methodology assumes that the underlying models are neural networks. However, virtual keyboards are typically powered by n-gram language models for latency reasons. We propose to train a recurrent neural network language model using the decentralized FederatedAveraging algorithm and to approximate this federated model server-side with an n-gram model that can be deployed to devices for fast inference. Our technical contributions include ways of handling large vocabularies, algorithms to correct capitalization errors in user data, and efficient finite state transducer algorithms to convert word language models to word-piece language models and vice versa. The n-gram language models trained with federated learning are compared to n-grams trained with traditional server-based algorithms using A/B tests on tens of millions of users of a virtual keyboard. Results are presented for two languages, American English and Brazilian Portuguese. This work demonstrates that high-quality n-gram language models can be trained directly on client mobile devices without sensitive training data ever leaving the devices. K19-1012 @@ -174,10 +174,10 @@ Weird Inflects but <fixed-case>OK</fixed-case>: Making Sense of Morphological Generation Errors KyleGorman - Arya D.McCarthy + Arya D.McCarthy RyanCotterell EkaterinaVylomova - MiikkaSilfverberg + MiikkaSilfverberg MagdalenaMarkowska 140–151 We conduct a manual error analysis of the CoNLL-SIGMORPHON Shared Task on Morphological Reinflection. This task involves natural language generation: systems are given a word in citation form (e.g., hug) and asked to produce the corresponding inflected form (e.g., the simple past hugged). We propose an error taxonomy and use it to annotate errors made by the top two systems across twelve languages. Many of the observed errors are related to inflectional patterns sensitive to inherent linguistic properties such as animacy or affect; many others are failures to predict truly unpredictable inflectional behaviors. We also find nearly one quarter of the residual “errors” reflect errors in the gold data. @@ -191,7 +191,7 @@ YingtaoTian HaochenChen Kai-WeiChang - StevenSkiena + StevenSkiena CarloZaniolo 152–162 Bilingual word embeddings have been widely used to capture the correspondence of lexical semantics in different human languages. However, the cross-lingual correspondence between sentences and words is less studied, despite that this correspondence can significantly benefit many applications such as crosslingual semantic search and textual inference. To bridge this gap, we propose a neural embedding model that leverages bilingual dictionaries. The proposed model is trained to map the lexical definitions to the cross-lingual target words, for which we explore with different sentence encoding techniques. To enhance the learning process on limited resources, our model adopts several critical learning strategies, including multi-task learning on different bridges of languages, and joint learning of the dictionary model with a bilingual word embedding model. We conduct experiments on two new tasks. In the cross-lingual reverse dictionary retrieval task, we demonstrate that our model is capable of comprehending bilingual concepts based on descriptions, and the proposed learning strategies are effective. In the bilingual paraphrase identification task, we show that our model effectively associates sentences in different languages via a shared embedding space, and outperforms existing approaches in identifying bilingual paraphrases. @@ -276,7 +276,7 @@ Comparing Top-Down and Bottom-Up Neural Generative Dependency Models AustinMatthews GrahamNeubig - ChrisDyer + ChrisDyer 227–237 Recurrent neural network grammars generate sentences using phrase-structure syntax and perform very well on both parsing and language modeling. To explore whether generative dependency models are similarly effective, we propose two new generative models of dependency syntax. Both models use recurrent neural nets to avoid making explicit independence assumptions, but they differ in the order used to construct the trees: one builds the tree bottom-up and the other top-down, which profoundly changes the estimation problem faced by the learner. We evaluate the two models on three typologically different languages: English, Arabic, and Japanese. While both generative models improve parsing performance over a discriminative baseline, they are significantly less effective than non-syntactic LSTM language models. Surprisingly, little difference between the construction orders is observed for either parsing or language modeling. K19-1022 @@ -285,7 +285,7 @@ Representation Learning and Dynamic Programming for Arc-Hybrid Parsing - JosephLe Roux + JosephLe Roux AntoineRozenknop MathieuLacroix 238–248 @@ -298,8 +298,8 @@ Policy Preference Detection in Parliamentary Debate Motions GavinAbercrombie FedericoNanni - RizaBatista-Navarro - Simone PaoloPonzetto + RizaBatista-Navarro + Simone PaoloPonzetto 249–259 Debate motions (proposals) tabled in the UK Parliament contain information about the stated policy preferences of the Members of Parliament who propose them, and are key to the analysis of all subsequent speeches given in response to them. We attempt to automatically label debate motions with codes from a pre-existing coding scheme developed by political scientists for the annotation and analysis of political parties’ manifestos. We develop annotation guidelines for the task of applying these codes to debate motions at two levels of granularity and produce a dataset of manually labelled examples. We evaluate the annotation process and the reliability and utility of the labelling scheme, finding that inter-annotator agreement is comparable with that of other studies conducted on manifesto data. Moreover, we test a variety of ways of automatically labelling motions with the codes, ranging from similarity matching to neural classification methods, and evaluate them against the gold standard labels. From these experiments, we note that established supervised baselines are not always able to improve over simple lexical heuristics. At the same time, we detect a clear and evident benefit when employing BERT, a state-of-the-art deep language representation model, even in classification scenarios with over 30 different labels and limited amounts of training data. K19-1024 @@ -309,7 +309,7 @@ Improving Neural Machine Translation by Achieving Knowledge Transfer with Sentence Alignment Learning XuewenShi - HeyanHuang + HeyanHuang WenguanWang PingJian Yi-KunTang @@ -321,7 +321,7 @@ Code-Switched Language Models Using Neural Based Synthetic Data from Parallel Sentences - Genta IndraWinata + Genta IndraWinata AndreaMadotto Chien-ShengWu PascaleFung @@ -359,7 +359,7 @@ Low-Resource Parsing with Crosslingual Contextualized Representations PhoebeMulcaire JungoKasai - Noah A.Smith + Noah A.Smith 304–315 Despite advances in dependency parsing, languages with small treebanks still present challenges. We assess recent approaches to multilingual contextual word representations (CWRs), and compare them for crosslingual transfer from a language with a large treebank to a language with a small or nonexistent treebank, by sharing parameters between languages in the parser itself. We experiment with a diverse selection of languages in both simulated and truly low-resource scenarios, and show that multilingual CWRs greatly facilitate low-resource dependency parsing even without crosslingual supervision such as dictionaries or parallel text. Furthermore, we examine the non-contextual part of the learned language models (which we call a “decontextual probe”) to demonstrate that polyglot language models better encode crosslingual lexical correspondence compared to aligned monolingual language models. This analysis provides further evidence that polyglot training is an effective approach to crosslingual transfer. K19-1029 @@ -394,7 +394,7 @@ Word Recognition, Competition, and Activation in a Model of Visually Grounded Speech William N.Havard Jean-PierreChevrot - LaurentBesacier + LaurentBesacier 339–348 In this paper, we study how word-like units are represented and activated in a recurrent neural model of visually grounded speech. The model used in our experiments is trained to project an image and its spoken description in a common representation space. We show that a recurrent model trained on spoken sentences implicitly segments its input into word-like units and reliably maps them to their correct visual referents. We introduce a methodology originating from linguistics to analyse the representation learned by neural networks – the gating paradigm – and show that the correct representation of a word is only activated if the network has access to first phoneme of the target word, suggesting that the network does not rely on a global acoustic pattern. Furthermore, we find out that not all speech frames (MFCC vectors in our case) play an equal role in the final encoded representation of a given word, but that some frames have a crucial effect on it. Finally we suggest that word representation could be activated through a process of lexical competition. K19-1032 @@ -406,8 +406,8 @@ <fixed-case>EQUATE</fixed-case>: A Benchmark Evaluation Framework for Quantitative Reasoning in Natural Language Inference AbhilashaRavichander AakankshaNaik - CarolynRose - EduardHovy + CarolynRose + EduardHovy 349–361 Quantitative reasoning is a higher-order reasoning skill that any intelligent natural language understanding system can reasonably be expected to handle. We present EQUATE (Evaluating Quantitative Understanding Aptitude in Textual Entailment), a new framework for quantitative reasoning in textual entailment. We benchmark the performance of 9 published NLI models on EQUATE, and find that on average, state-of-the-art methods do not achieve an absolute improvement over a majority-class baseline, suggesting that they do not implicitly learn to reason with quantities. We establish a new baseline Q-REAS that manipulates quantities symbolically. In comparison to the best performing NLI model, it achieves success on numerical reasoning tests (+24.2 %), but has limited verbal reasoning capabilities (-8.1 %). We hope our evaluation framework will support the development of models of quantitative reasoning in language understanding. K19-1033 @@ -417,9 +417,9 @@ Linguistic Analysis Improves Neural Metaphor Detection KevinStowe - SarahMoeller + SarahMoeller LauraMichaelis - MarthaPalmer + MarthaPalmer 362–371 In the field of metaphor detection, deep learning systems are the ubiquitous and achieve strong performance on many tasks. However, due to the complicated procedures for manually identifying metaphors, the datasets available are relatively small and fraught with complications. We show that using syntactic features and lexical resources can automatically provide additional high-quality training data for metaphoric language, and this data can cover gaps and inconsistencies in metaphor annotation, improving state-of-the-art word-level metaphor identification. This novel application of automatically improving training data improves classification across numerous tasks, and reconfirms the necessity of high-quality data for deep learning frameworks. K19-1034 @@ -428,7 +428,7 @@ Cross-Lingual Dependency Parsing with Unlabeled Auxiliary Languages - Wasi UddinAhmad + Wasi UddinAhmad ZhisongZhang XuezheMa Kai-WeiChang @@ -458,7 +458,7 @@ TianWang Arun TejasviChaganty GaborAngeli - Angel X.Chang + Angel X.Chang 393–403 Reflective listening–demonstrating that you have heard your conversational partner–is key to effective communication. Expert human communicators often mimic and rephrase their conversational partner, e.g., when responding to sentimental stories or to questions they don’t know the answer to. We introduce a new task and an associated dataset wherein dialogue agents similarly mimic and rephrase a user’s request to communicate sympathy (I’m sorry to hear that) or lack of knowledge (I do not know that). We study what makes a rephrasal response good against a set of qualitative metrics. We then evaluate three models for generating responses: a syntax-aware rule-based system, a seq2seq LSTM neural models with attention (S2SA), and the same neural model augmented with a copy mechanism (S2SA+C). In a human evaluation, we find that S2SA+C and the rule-based system are comparable and approach human-generated response quality. In addition, experiences with a live deployment of S2SA+C in a customer support setting suggest that this generation task is a practical contribution to real world conversational agents. K19-1037 @@ -470,7 +470,7 @@ Automated Pyramid Summarization Evaluation YanjunGao ChenSun - Rebecca J.Passonneau + Rebecca J.Passonneau 404–418 Pyramid evaluation was developed to assess the content of paragraph length summaries of source texts. A pyramid lists the distinct units of content found in several reference summaries, weights content units by how many reference summaries they occur in, and produces three scores based on the weighted content of new summaries. We present an automated method that is more efficient, more transparent, and more complete than previous automated pyramid methods. It is tested on a new dataset of student summaries, and historical NIST data from extractive summarizers. K19-1038 @@ -583,7 +583,7 @@ LeiGuo KateMays MargritBetke - Derry TantiWijaya + Derry TantiWijaya 504–514 Different news articles about the same topic often offer a variety of perspectives: an article written about gun violence might emphasize gun control, while another might promote 2nd Amendment rights, and yet a third might focus on mental health issues. In communication research, these different perspectives are known as “frames”, which, when used in news media will influence the opinion of their readers in multiple ways. In this paper, we present a method for effectively detecting frames in news headlines. Our training and performance evaluation is based on a new dataset of news headlines related to the issue of gun violence in the United States. This Gun Violence Frame Corpus (GVFC) was curated and annotated by journalism and communication experts. Our proposed approach sets a new state-of-the-art performance for multiclass news frame detection, significantly outperforming a recent baseline by 35.9% absolute difference in accuracy. We apply our frame detection approach in a large scale study of 88k news headlines about the coverage of gun violence in the U.S. between 2016 and 2018. K19-1047 @@ -606,7 +606,7 @@ Learning Dense Representations for Entity Retrieval - DanielGillick + DanielGillick SayaliKulkarni LarryLansing AlessandroPresta @@ -663,7 +663,7 @@ ShesheraMysore AndrewMcCallum AdrianBenton - AmandaStent + AmandaStent 574–581 The official voting records of United States congresspeople are preserved as roll call votes. Prediction of voting behavior of politicians for whom no voting record exists, such as individuals running for office, is important for forecasting key political decisions. Prior work has relied on past votes cast to predict future votes, and thus fails to predict voting patterns for politicians without voting records. We address this by augmenting a prior state of the art model with multiple sources of external knowledge so as to enable prediction on unseen politicians. The sources of knowledge we use are news text and Freebase, a manually curated knowledge base. We propose augmentations based on unigram features for news text, and a knowledge base embedding method followed by a neural network composition for relations from Freebase. Empirical evaluation of these approaches indicate that the proposed models outperform the prior system for politicians with complete historical voting records by 1.0% point of accuracy (8.7% error reduction) and for politicians without voting records by 33.4% points of accuracy (66.7% error reduction). We also show that the knowledge base augmented approach outperforms the news text augmented approach by 4.2% points of accuracy. K19-1053 @@ -674,7 +674,7 @@ <fixed-case>B</fixed-case>eam<fixed-case>S</fixed-case>eg: A Joint Model for Multi-Document Segmentation and Topic Identification PedroMota MaxineEskenazi - LuísaCoheur + LuísaCoheur 582–592 We propose BeamSeg, a joint model for segmentation and topic identification of documents from the same domain. The model assumes that lexical cohesion can be observed across documents, meaning that segments describing the same topic use a similar lexical distribution over the vocabulary. The model implements lexical cohesion in an unsupervised Bayesian setting by drawing from the same language model segments with the same topic. Contrary to previous approaches, we assume that language models are not independent, since the vocabulary changes in consecutive segments are expected to be smooth and not abrupt. We achieve this by using a dynamic Dirichlet prior that takes into account data contributions from other topics. BeamSeg also models segment length properties of documents based on modality (textbooks, slides, etc.). The evaluation is carried out in three datasets. In two of them, improvements of up to 4.8% and 7.3% are obtained in the segmentation and topic identifications tasks, indicating that both tasks should be jointly modeled. K19-1054 @@ -695,7 +695,7 @@ Effective Attention Modeling for Neural Relation Extraction - TapasNayak + TapasNayak Hwee TouNg 603–612 Relation extraction is the task of determining the relation between two entities in a sentence. Distantly-supervised models are popular for this task. However, sentences can be long and two entities can be located far from each other in a sentence. The pieces of evidence supporting the presence of a relation between two entities may not be very direct, since the entities may be connected via some indirect links such as a third entity or via co-reference. Relation extraction in such scenarios becomes more challenging as we need to capture the long-distance interactions among the entities and other words in the sentence. Also, the words in a sentence do not contribute equally in identifying the relation between the two entities. To address this issue, we propose a novel and effective attention model which incorporates syntactic information of the sentence and a multi-factor attention mechanism. Experiments on the New York Times corpus show that our proposed model outperforms prior state-of-the-art models. @@ -772,7 +772,7 @@ I-HungHsu MuYang AramGalstyan - RalphWeischedel + RalphWeischedel NanyunPeng 666–106 We propose a novel deep structured learning framework for event temporal relation extraction. The model consists of 1) a recurrent neural network (RNN) to learn scoring functions for pair-wise relations, and 2) a structured support vector machine (SSVM) to make joint predictions. The neural network automatically learns representations that account for long-term contexts to provide robust features for the structured model, while the SSVM incorporates domain knowledge such as transitive closure of temporal relations as constraints to make better globally consistent decisions. By jointly training the two components, our model combines the benefits of both data-driven learning and knowledge exploitation. Experimental results on three high-quality event temporal relation datasets (TCR, MATRES, and TB-Dense) demonstrate that incorporated with pre-trained contextualized embeddings, the proposed model achieves significantly better performances than the state-of-the-art methods on all three datasets. We also provide thorough ablation studies to investigate our model. @@ -859,7 +859,7 @@ YimingCui NanShao SuHe - Wei-NanZhang + Wei-NanZhang TingLiu ShijinWang GuopingHu @@ -873,7 +873,7 @@ Relation Module for Non-Answerable Predictions on Reading Comprehension KevinHuang YunTang - JingHuang + JingHuang XiaodongHe BowenZhou 747–756 @@ -910,7 +910,7 @@ <fixed-case>TILM</fixed-case>: Neural Language Models with Evolving Topical Influence Shubhra KantiKarmaker Santu KalyanVeeramachaneni - ChengxiangZhai + ChengxiangZhai 778–788 Content of text data are often influenced by contextual factors which often evolve over time (e.g., content of social media are often influenced by topics covered in the major news streams). Existing language models do not consider the influence of such related evolving topics, and thus are not optimal. In this paper, we propose to incorporate such topical-influence into a language model to both improve its accuracy and enable cross-stream analysis of topical influences. Specifically, we propose a novel language model called Topical Influence Language Model (TILM), which is a novel extension of a neural language model to capture the influences on the contents in one text stream by the evolving topics in another related (or possibly same) text stream. Experimental results on six different text stream data comprised of conference paper titles show that the incorporation of evolving topical influence into a language model is beneficial and TILM outperforms multiple baselines in a challenging task of text forecasting. In addition to serving as a language model, TILM further enables interesting analysis of topical influence among multiple text streams. K19-1073 @@ -987,7 +987,7 @@ AneeshPappu RohunSaxena AkhilaYerukola - Christopher D.Manning + Christopher D.Manning 843–861 Large neural language models trained on massive amounts of text have emerged as a formidable strategy for Natural Language Understanding tasks. However, the strength of these models as Natural Language Generators is less clear. Though anecdotal evidence suggests that these models generate better quality text, there has been no detailed study characterizing their generation abilities. In this work, we compare the performance of an extensively pretrained model, OpenAI GPT2-117 (Radford et al., 2019), to a state-of-the-art neural story generation model (Fan et al., 2018). By evaluating the generated text across a wide variety of automatic metrics, we characterize the ways in which pretrained models do, and do not, make better storytellers. We find that although GPT2-117 conditions more strongly on context, is more sensitive to ordering of events, and uses more unusual words, it is just as likely to produce repetitive and under-diverse text when using likelihood-maximizing decoding algorithms. K19-1079 @@ -1038,7 +1038,7 @@ LorenzoTarantino AlexandrosLazaridis AndreasFischer - ClaudiuMusat + ClaudiuMusat 890–899 In sequence modeling tasks the token order matters, but this information can be partially lost due to the discretization of the sequence into data points. In this paper, we study the imbalance between the way certain token pairs are included in data points and others are not. We denote this a token order imbalance (TOI) and we link the partial sequence information loss to a diminished performance of the system as a whole, both in text and speech processing tasks. We then provide a mechanism to leverage the full token order information—Alleviated TOI—by iteratively overlapping the token composition of data points. For recurrent networks, we use prime numbers for the batch size to avoid redundancies when building batches from overlapped data points. The proposed method achieved state of the art performance in both text and speech related tasks. K19-1083 @@ -1074,7 +1074,7 @@ YukunFeng HidetakaKamigaito HiroyaTakamura - ManabuOkumura + ManabuOkumura 920–928 We propose a simple and effective method to inject word-level information into character-aware neural language models. Unlike previous approaches which usually inject word-level information at the input of a long short-term memory (LSTM) network, we inject it into the softmax function. The resultant model can be seen as a combination of character-aware language model and simple word-level language model. Our injection method can also be used together with previous methods. Through the experiments on 14 typologically diverse languages, we empirically show that our injection method, when used together with the previous methods, works better than the previous methods, including a gating mechanism, averaging, and concatenation of word vectors. We also provide a comprehensive comparison of these injection methods. K19-1086 @@ -1083,7 +1083,7 @@ On Model Stability as a Function of Random Seed - PranavaMadhyastha + PranavaMadhyastha RishabhJain 929–939 In this paper, we focus on quantifying model stability as a function of random seed by investigating the effects of the induced randomness on model performance and the robustness of the model in general. We specifically perform a controlled study on the effect of random seeds on the behaviour of attention, gradient-based and surrogate model based (LIME) interpretations. Our analysis suggests that random seeds can adversely affect the consistency of models resulting in counterfactual interpretations. We propose a technique called Aggressive Stochastic Weight Averaging (ASWA) and an extension called Norm-filtered Aggressive Stochastic Weight Averaging (NASWA) which improves the stability of models over random seeds. With our ASWA and NASWA based optimization, we are able to improve the robustness of the original model, on average reducing the standard deviation of the model’s performance by 72%. @@ -1097,7 +1097,7 @@ Studying Generalisability across Abusive Language Detection Datasets Steve DurairajSwamy AnupamJamatia - BjörnGambäck + BjörnGambäck 940–950 Work on Abusive Language Detection has tackled a wide range of subtasks and domains. As a result of this, there exists a great deal of redundancy and non-generalisability between datasets. Through experiments on cross-dataset training and testing, the paper reveals that the preconceived notion of including more non-abusive samples in a dataset (to emulate reality) may have a detrimental effect on the generalisability of a model trained on that data. Hence a hierarchical annotation model is utilised here to reveal redundancies in existing datasets and to help reduce redundancy in future efforts. K19-1088 @@ -1196,7 +1196,7 @@ Predicting the Role of Political Trolls in Social Media AtanasAtanasov GianmarcoDe Francisci Morales - PreslavNakov + PreslavNakov 1023–1034 We investigate the political roles of “Internet trolls” in social media. Political trolls, such as the ones linked to the Russian Internet Research Agency (IRA), have recently gained enormous attention for their ability to sway public opinion and even influence elections. Analysis of the online traces of trolls has shown different behavioral patterns, which target different slices of the population. However, this analysis is manual and labor-intensive, thus making it impractical as a first-response tool for newly-discovered troll farms. In this paper, we show how to automate this analysis by using machine learning in a realistic setting. In particular, we show how to classify trolls according to their political role —left, news feed, right— by using features extracted from social media, i.e., Twitter, in two scenarios: (i) in a traditional supervised learning scenario, where labels for trolls are available, and (ii) in a distant supervision scenario, where labels for trolls are not available, and we rely on more-commonly-available labels for news outlets mentioned by the trolls. Technically, we leverage the community structure and the text of the messages in the online social network of trolls represented as a graph, from which we extract several types of learned representations, i.e., embeddings, for the trolls. Experiments on the “IRA Russian Troll” dataset show that our methodology improves over the state-of-the-art in the first scenario, while providing a compelling case for the second scenario, which has not been explored in the literature thus far. K19-1096 @@ -1221,7 +1221,7 @@ K19-2 StephanOepen OmriAbend - JanHajic + JanHajic DanielHershcovich MarcoKuhlmann TimO’Gorman @@ -1247,7 +1247,7 @@ NianwenXue JayeolChun MilanStraka - ZdenkaUresova + ZdenkaUresova 1–27 The 2019 Shared Task at the Conference for Computational Language Learning (CoNLL) was devoted to Meaning Representation Parsing (MRP) across frameworks. Five distinct approaches to the representation of sentence meaning in the form of directed graph were represented in the training and evaluation data for the task, packaged in a uniform abstract graph representation and serialization. The task received submissions from eighteen teams, of which five do not participate in the official ranking because they arrived after the closing deadline, made use of additional training data, or involved one of the task co-organizers. All technical information regarding the task, including system submissions, official results, and links to supporting resources and software are available from the task web site at: http://mrp.nlpl.eu K19-2001 @@ -1269,7 +1269,7 @@ The <fixed-case>ERG</fixed-case> at <fixed-case>MRP</fixed-case> 2019: Radically Compositional Semantic Dependencies StephanOepen - DanFlickinger + DanFlickinger 40–44 The English Resource Grammar (ERG) is a broad-coverage computational grammar of English that outputs underspecified logical-form representations of meaning in a framework dubbed English Resource Semantics (ERS). Two of the target representations in the the 2019 Shared Task on Cross-Framework Meaning Representation Parsing (MRP 2019) derive graph-based simplifications of ERS, viz. Elementary Dependency Structures (EDS) and DELPH-IN MRS Bi-Lexical Dependencies (DM). As a point of reference outside the official MRP competition, we parsed the evaluation strings using the ERG and converted the resulting meaning representations to EDS and DM. These graphs yield higher evaluation scores than the purely data-driven parsers in the actual shared task, suggesting that the general-purpose linguistic knowledge about English grammar encoded in the ERG can add value when parsing into these meaning representations. K19-2003 @@ -1283,7 +1283,7 @@ ZhuoshengZhang RuiWang MasaoUtiyama - EiichiroSumita + EiichiroSumita 45–54 This paper describes our SJTU-NICT’s system for participating in the shared task on Cross-Framework Meaning Representation Parsing (MRP) at the 2019 Conference for Computational Language Learning (CoNLL). Our system uses a graph-based approach to model a variety of semantic graph parsing tasks. Our main contributions in the submitted system are summarized as follows: 1. Our model is fully end-to-end and is capable of being trained only on the given training set which does not rely on any other extra training source including the companion data provided by the organizer; 2. We extend our graph pruning algorithm to a variety of semantic graphs, solving the problem of excessive semantic graph search space; 3. We introduce multi-task learning for multiple objectives within the same framework. The evaluation results show that our system achieved second place in the overall F_1 score and achieved the best F_1 score on the DM framework. K19-2004 @@ -1350,7 +1350,7 @@ JinwoonMin KwanghyeonPark Jong-HunShin - Young-KilKim + Young-KilKim 95–103 This paper describes Jeonbuk National University (JBNU)’s system for the 2019 shared task on Cross-Framework Meaning Representation Parsing (MRP 2019) at the Conference on Computational Natural Language Learning. Of the five frameworks, we address only the DELPH-IN MRS Bi-Lexical Dependencies (DP), Prague Semantic Dependencies (PSD), and Universal Conceptual Cognitive Annotation (UCCA) frameworks. We propose a unified parsing model using biaffine attention (Dozat and Manning, 2017), consisting of 1) a BERT-BiLSTM encoder and 2) a biaffine attention decoder. First, the BERT-BiLSTM for sentence encoder uses BERT to compose a sentence’s wordpieces into word-level embeddings and subsequently applies BiLSTM to word-level representations. Second, the biaffine attention decoder determines the scores for an edge’s existence and its labels based on biaffine attention functions between roledependent representations. We also present multi-level biaffine attention models by combining all the role-dependent representations that appear at multiple intermediate layers. K19-2009 @@ -1425,7 +1425,7 @@ KiraDroganova AndreyKutuzov NikitaMediankin - DanielZeman + DanielZeman 158–165 This paper describes the ÚFAL--Oslo system submission to the shared task on Cross-Framework Meaning Representation Parsing (MRP, Oepen et al. 2019). The submission is based on several third-party parsers. Within the official shared task results, the submission ranked 11th out of 13 participating systems. K19-2015 @@ -1437,7 +1437,7 @@ Peking at <fixed-case>MRP</fixed-case> 2019: Factorization- and Composition-Based Parsing for Elementary Dependency Structures YufeiChen YajieYe - WeiweiSun + WeiweiSun 166–176 We design, implement and evaluate two semantic parsers, which represent factorization- and composition-based approaches respectively, for Elementary Dependency Structures (EDS) at the CoNLL 2019 Shared Task on Cross-Framework Meaning Representation Parsing. The detailed evaluation of the two parsers gives us a new perception about parsing into linguistically enriched meaning representations: current neural EDS parsers are able to reach an accuracy at the inter-annotator agreement level in the same-epoch-and-domain setup. K19-2016 diff --git a/data/xml/L00.xml b/data/xml/L00.xml index 192512d123..df4562c0a6 100644 --- a/data/xml/L00.xml +++ b/data/xml/L00.xml @@ -19,7 +19,7 @@ GérardBailly - Eduardo R.Banga + Eduardo R.Banga AlexMonaghan ErhardRank The Cost258 Signal Generation Test Array @@ -56,11 +56,11 @@ kilgarriff-rosenzweig-2000-english - AsunciónMoreno + AsunciónMoreno RobrechtComeyne KeithHaslam - Henkvan den Heuvel - HaraldHöge + Henkvan den Heuvel + HaraldHöge SabineHorbach GiorgioMicca <fixed-case>SALA</fixed-case>: <fixed-case>S</fixed-case>peech<fixed-case>D</fixed-case>at across <fixed-case>L</fixed-case>atin <fixed-case>A</fixed-case>merica. Results of the First Phase @@ -68,7 +68,7 @@ moreno-etal-2000-sala - DanTufiş + DanTufiş Using a Large Set of <fixed-case>EAGLES</fixed-case>-compliant Morpho-syntactic Descriptors as a Tagset for Probabilistic Tagging http://www.lrec-conf.org/proceedings/lrec2000/pdf/11.pdf tufis-2000-using @@ -76,13 +76,13 @@ ElliottMacklovitch MichelSimard - PhilippeLanglais + PhilippeLanglais <fixed-case>T</fixed-case>rans<fixed-case>S</fixed-case>earch: A Free Translation Memory on the World Wide Web http://www.lrec-conf.org/proceedings/lrec2000/pdf/12.pdf macklovitch-etal-2000-transsearch - Bolette SandfordPedersen + Bolette SandfordPedersen SanniNimb Semantic Encoding of <fixed-case>D</fixed-case>anish Verbs in <fixed-case>SIMPLE</fixed-case> - Adapting a Verb Framed Model to a Satellite-framed Language http://www.lrec-conf.org/proceedings/lrec2000/pdf/13.pdf @@ -90,14 +90,14 @@ MochizukiHajime - OkumuraManabu + ManabuOkumura A Comparison of Summarization Methods Based on Task-based Evaluation http://www.lrec-conf.org/proceedings/lrec2000/pdf/14.pdf mochizuki-okumura-2000-comparison ZhengJie - MaoYuhang + YuhangMao A Word Sense Disambiguation Method Using Bilingual Corpus http://www.lrec-conf.org/proceedings/lrec2000/pdf/15.pdf zheng-mao-2000-word @@ -114,23 +114,23 @@ SandroPedrazzini - ElisabethMaier + ElisabethMaier DierkKönig Terms Specification and Extraction within a Linguistic-based Intranet Service http://www.lrec-conf.org/proceedings/lrec2000/pdf/17.pdf pedrazzini-etal-2000-terms - EvaHajičová - PetrSgall + EvaHajičová + PetrSgall Semantico-syntactic Tagging of Very Large Corpora: the Case of Restoration of Nodes on the Underlying Level http://www.lrec-conf.org/proceedings/lrec2000/pdf/18.pdf hajicova-sgall-2000-semantico - EvaHajičová - JarmilaPanevová - PetrSgall + EvaHajičová + JarmilaPanevová + PetrSgall Coreference in Annotating a Large Corpus http://www.lrec-conf.org/proceedings/lrec2000/pdf/19.pdf hajicova-etal-2000-coreference @@ -150,7 +150,7 @@ maynard-ananiadou-2000-creating - Ellen M.Voorhees + Ellen M.Voorhees Dawn M.Tice The <fixed-case>TREC</fixed-case>-8 Question Answering Track http://www.lrec-conf.org/proceedings/lrec2000/pdf/26.pdf @@ -165,7 +165,7 @@ SvetlanaSheremetyeva - SergeiNirenburg + SergeiNirenburg Towards A Universal Tool For <fixed-case>NLP</fixed-case> Resource Acquisition http://www.lrec-conf.org/proceedings/lrec2000/pdf/28.pdf sheremetyeva-nirenburg-2000-towards @@ -193,17 +193,17 @@ chenfour-etal-2000-etude - MarcelaCharfuelán - JoséRelaño Gil - M. Carmen RodríguezGancedo - Daniel TapiasMerino - Luis HernándezGómez + MarcelaCharfuelán + JoséRelaño Gil + M. Carmen RodríguezGancedo + Daniel TapiasMerino + Luis HernándezGómez Dialogue Annotation for Language Systems Evaluation http://www.lrec-conf.org/proceedings/lrec2000/pdf/33.pdf charfuelan-etal-2000-dialogue - PhilippeLanglais + PhilippeLanglais SébastienSauvé GeorgeFoster ElliottMacklovitch @@ -213,8 +213,8 @@ langlais-etal-2000-evaluation - GerardoSierra - JohnMcNaught + GerardoSierra + JohnMcNaught Extraction of Semantic Clusters for Terminological Information Retrieval from <fixed-case>MRD</fixed-case>s http://www.lrec-conf.org/proceedings/lrec2000/pdf/35.pdf sierra-mcnaught-2000-extraction @@ -223,32 +223,32 @@ Jean-YvesAntoine JacquesSiroux JeanCaelen - JeanneVillaneau - JérômeGoulian + JeanneVillaneau + JérômeGoulian MohamedAhafhaf Obtaining Predictive Results with an Objective Evaluation of Spoken Dialogue Systems: Experiments with the <fixed-case>DCR</fixed-case> Assessment Paradigm http://www.lrec-conf.org/proceedings/lrec2000/pdf/36.pdf antoine-etal-2000-obtaining - GuyPérennou - Martinede Calmès + GuyPérennou + Martinede Calmès <fixed-case>MHATL</fixed-case>ex: Lexical Resources for Modelling the <fixed-case>F</fixed-case>rench Pronunciation http://www.lrec-conf.org/proceedings/lrec2000/pdf/37.pdf perennou-de-calmes-2000-mhatlex Carine-AlexiaLavelle - Martinede Calmès - GuyPérennou + Martinede Calmès + GuyPérennou Dialogue and Prompting Strategies Evaluation in the <fixed-case>DEMON</fixed-case> System http://www.lrec-conf.org/proceedings/lrec2000/pdf/38.pdf lavelle-etal-2000-dialogue - Henkvan den Heuvel - LouBoves - KhalidChoukri + Henkvan den Heuvel + LouBoves + KhalidChoukri SimoGoddijn EricSanders <fixed-case>SLR</fixed-case> Validation: Present State of Affairs and Prospects @@ -269,7 +269,7 @@ MarcSwerts - EmielKrahmer + EmielKrahmer On the Use of Prosody for On-line Evaluation of Spoken Dialogue Systems http://www.lrec-conf.org/proceedings/lrec2000/pdf/43.pdf swerts-krahmer-2000-use @@ -290,10 +290,10 @@ aduriz-etal-2000-word-level - AlbertRussel - HennieBrugman - DaanBroeder - PeterWittenburg + AlbertRussel + HennieBrugman + DaanBroeder + PeterWittenburg The <fixed-case>EUDICO</fixed-case> Project, Multi Media Annotation over the <fixed-case>I</fixed-case>nternet http://www.lrec-conf.org/proceedings/lrec2000/pdf/45.pdf russel-etal-2000-eudico @@ -306,7 +306,7 @@ braasch-olsen-2000-towards - Stavroula-EvitaFotinea + Stavroula-EvitaFotinea AthanassiosProtopapas DimitrisDimitriadis GeorgeCarayannis @@ -328,7 +328,7 @@ DamjanVlaj JanezKaiser RalphWilhelm - UteZiegenhain + UteZiegenhain <fixed-case>PLEDIT</fixed-case> - A New Efficient Tool for Management of Multilingual Pronunciation Lexica and Batchlists http://www.lrec-conf.org/proceedings/lrec2000/pdf/53.pdf vlaj-etal-2000-pledit @@ -336,7 +336,7 @@ RosaEstopà JordiVivaldi - M. TeresaCabré + M. TeresaCabré Use of <fixed-case>G</fixed-case>reek and <fixed-case>L</fixed-case>atin Forms for Term Detection http://www.lrec-conf.org/proceedings/lrec2000/pdf/55.pdf estopa-etal-2000-use @@ -350,8 +350,8 @@ canelli-etal-2000-methods - Noah A.Smith - Michael E.Jahr + Noah A.Smith + Michael E.Jahr <fixed-case>C</fixed-case>airo: An Alignment Visualization Tool http://www.lrec-conf.org/proceedings/lrec2000/pdf/58.pdf smith-jahr-2000-cairo @@ -364,26 +364,26 @@ mengel-lezius-2000-xml - OrnellaCorazzari - NicolettaCalzolari - AntonioZampolli + OrnellaCorazzari + NicolettaCalzolari + AntonioZampolli An Experiment of Lexical-Semantic Tagging of an <fixed-case>I</fixed-case>talian Corpus http://www.lrec-conf.org/proceedings/lrec2000/pdf/60.pdf corazzari-etal-2000-experiment - NuriaBel + NuriaBel FedericaBusa - NicolettaCalzolari + NicolettaCalzolari ElisabettaGola AlessandroLenci MonicaMonachini AntoineOgonowski IvonnePeters - WimPeters + WimPeters NildaRuimy MartaVillegas - AntonioZampolli + AntonioZampolli <fixed-case>SIMPLE</fixed-case>: A General Framework for the Development of Multilingual Lexicons http://www.lrec-conf.org/proceedings/lrec2000/pdf/61.pdf bel-etal-2000-simple @@ -396,7 +396,7 @@ RainerSiemund - HaraldHöge + HaraldHöge SiegfriedKunzmann KrzysztofMarasek <fixed-case>SPEECON</fixed-case> - Speech Data for Consumer Devices @@ -405,9 +405,9 @@ AntonioMoreno - RalphGrishman + RalphGrishman SusanaLópez - FernandoSánchez + FernandoSánchez SatoshiSekine A Treebank of <fixed-case>S</fixed-case>panish and its Application to Parsing http://www.lrec-conf.org/proceedings/lrec2000/pdf/66.pdf @@ -443,7 +443,7 @@ MichaelHess MichaelKluck CarolPeters - PeterSchäuble + PeterSchäuble The Evaluation of Systems for Cross-language Information Retrieval http://www.lrec-conf.org/proceedings/lrec2000/pdf/70.pdf braschler-etal-2000-evaluation @@ -456,8 +456,8 @@ goncalves-veloso-2000-spoken - Maria Fernanda Bacelardo Nascimento - LuisaPereira + Maria Fernanda Bacelardo Nascimento + LuisaPereira JoãoSaramago <fixed-case>P</fixed-case>ortuguese Corpora at <fixed-case>CLUL</fixed-case> http://www.lrec-conf.org/proceedings/lrec2000/pdf/72.pdf @@ -465,14 +465,14 @@ AntonioMoreno - ChantalPérez + ChantalPérez Reusing the Mikrokosmos Ontology for Concept-based Multilingual Terminology Databases http://www.lrec-conf.org/proceedings/lrec2000/pdf/74.pdf moreno-perez-2000-reusing KimuraKazuhiro - HirakawaHideki + HidekiHirakawa Abstraction of the <fixed-case>EDR</fixed-case> Concept Classification and its Effectiveness in Word Sense Disambiguation http://www.lrec-conf.org/proceedings/lrec2000/pdf/75.pdf kimura-hirakawa-2000-abstraction @@ -480,13 +480,13 @@ AlessandroCucchiarelli EnricoFaggioli - PaolaVelardi + PaolaVelardi Will Very Large Corpora Play For Semantic Disambiguation The Role That Massive Computing Power Is Playing For Other <fixed-case>AI</fixed-case>-Hard Problems? http://www.lrec-conf.org/proceedings/lrec2000/pdf/76.pdf cucchiarelli-etal-2000-will - ShuichiItahashi + ShuichiItahashi Guidelines for <fixed-case>J</fixed-case>apanese Speech Synthesizer Evaluation http://www.lrec-conf.org/proceedings/lrec2000/pdf/77.pdf itahashi-2000-guidelines @@ -518,14 +518,14 @@ gavieiro-villatte-spaggiari-2000-open - RodolfoDelmonte + RodolfoDelmonte Shallow Parsing and Functional Structure in <fixed-case>I</fixed-case>talian Corpora http://www.lrec-conf.org/proceedings/lrec2000/pdf/82.pdf delmonte-2000-shallow DimitriosKokkinakis - Maria ToporowskaGronostaj + Maria ToporowskaGronostaj KarinWarmenius Annotating, Disambiguating & Automatically Extending the Coverage of the <fixed-case>S</fixed-case>wedish <fixed-case>SIMPLE</fixed-case> Lexicon http://www.lrec-conf.org/proceedings/lrec2000/pdf/84.pdf @@ -533,7 +533,7 @@ DianaSantos - EckhardBick + EckhardBick Providing <fixed-case>I</fixed-case>nternet Access to <fixed-case>P</fixed-case>ortuguese Corpora: the <fixed-case>AC</fixed-case>/<fixed-case>DC</fixed-case> Project http://www.lrec-conf.org/proceedings/lrec2000/pdf/85.pdf santos-bick-2000-providing @@ -558,7 +558,7 @@ GiuliaBernardis HervéBourlard - MartinRajman + MartinRajman Jean-CédricChappelier Development of Acoustic and Linguistic Resources for Research and Evaluation in Interactive Vocal Information Servers http://www.lrec-conf.org/proceedings/lrec2000/pdf/90.pdf @@ -570,14 +570,14 @@ PilarAlvariño AdelaidaGil María PaulaSantalla - SusanaSotelo + SusanaSotelo An Architecture for Document Routing in <fixed-case>S</fixed-case>panish: Two Language Components, Pre-processor and Parser http://www.lrec-conf.org/proceedings/lrec2000/pdf/91.pdf rojo-etal-2000-architecture - John A.Bateman - Anthony F.Hartley + John A.Bateman + Anthony F.Hartley Target Suites for Evaluating the Coverage of Text Generators http://www.lrec-conf.org/proceedings/lrec2000/pdf/92.pdf bateman-hartley-2000-target @@ -608,20 +608,20 @@ MartaVillegas - NuriaBel + NuriaBel AlessandroLenci - NicolettaCalzolari + NicolettaCalzolari NildaRuimy - AntonioZampolli + AntonioZampolli TeresaSadurní - JoanSoler + JoanSoler Multilingual Linguistic Resources: From Monolingual Lexicons to Bilingual Interrelated Lexicons http://www.lrec-conf.org/proceedings/lrec2000/pdf/96.pdf villegas-etal-2000-multilingual AlessandroLenci - SimonettaMontemagni + SimonettaMontemagni VitoPirrelli ClaudiaSoria Where Opposites Meet. A Syntactic Meta-scheme for Corpus Annotation and Parsing Evaluation @@ -630,15 +630,15 @@ PaoloAllegrini - SimonettaMontemagni + SimonettaMontemagni VitoPirrelli Controlled Bootstrapping of Lexico-semantic Classes as a Bridge between Paradigmatic and Syntagmatic Knowledge: Methodology and Evaluation http://www.lrec-conf.org/proceedings/lrec2000/pdf/99.pdf allegrini-etal-2000-controlled - RodgerKibble - Keesvan Deemter + RodgerKibble + Keesvan Deemter Coreference Annotation: Whither? http://www.lrec-conf.org/proceedings/lrec2000/pdf/100.pdf kibble-van-deemter-2000-coreference @@ -660,7 +660,7 @@ munteanu-boldea-2000-mdwoz - DanBohuş + DanBohuş MarianBoldea A Web-based Text Corpora Development System http://www.lrec-conf.org/proceedings/lrec2000/pdf/105.pdf @@ -674,7 +674,7 @@ georgantopoulos-piperidis-2000-term - KristīneLevāne + KristīneLevāne AndrejsSpektors Morphemic Analysis and Morphological Tagging of <fixed-case>L</fixed-case>atvian Corpus http://www.lrec-conf.org/proceedings/lrec2000/pdf/107.pdf @@ -682,7 +682,7 @@ PatrickKremer - LaurentSchmitt + LaurentSchmitt Textual Information Retrieval Systems Test: The Point of View of an Organizer and Corpuses Provider http://www.lrec-conf.org/proceedings/lrec2000/pdf/109.pdf kremer-schmitt-2000-textual @@ -695,13 +695,13 @@ ToniBadia - ÀngelsEgea + ÀngelsEgea A Strategy for the Syntactic Parsing of Corpora: from Constraint Grammar Output to Unification-based Processing http://www.lrec-conf.org/proceedings/lrec2000/pdf/111.pdf badia-egea-2000-strategy - JoanSoler i Bou + JoanSoler i Bou Producing <fixed-case>LR</fixed-case>s in Parallel with Lexicographic Description: the <fixed-case>DCC</fixed-case> project http://www.lrec-conf.org/proceedings/lrec2000/pdf/112.pdf soler-i-bou-2000-producing @@ -714,14 +714,14 @@ fujii-ishikawa-2000-novelty - RuslanMitkov + RuslanMitkov Towards More Comprehensive Evaluation in Anaphora Resolution http://www.lrec-conf.org/proceedings/lrec2000/pdf/115.pdf mitkov-2000-towards - JosephPolifroni - StephanieSeneff + JosephPolifroni + StephanieSeneff Galaxy-<fixed-case>II</fixed-case> as an Architecture for Spoken Dialogue Evaluation http://www.lrec-conf.org/proceedings/lrec2000/pdf/116.pdf polifroni-seneff-2000-galaxy @@ -733,7 +733,7 @@ tadic-2000-building - TamásVáradi + TamásVáradi Lexical and Translation Equivalence in Parallel Corpora http://www.lrec-conf.org/proceedings/lrec2000/pdf/122.pdf varadi-2000-lexical @@ -760,23 +760,23 @@ AdrianaRoventini AntoniettaAlonge - NicolettaCalzolari - BernardoMagnini + NicolettaCalzolari + BernardoMagnini FrancescaBertagna <fixed-case>I</fixed-case>tal<fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et: a Large Semantic Database for <fixed-case>I</fixed-case>talian http://www.lrec-conf.org/proceedings/lrec2000/pdf/129.pdf roventini-etal-2000-italwordnet - CătălinaBarbu + CătălinaBarbu <fixed-case>FAST</fixed-case> - Towards a Semi-automatic Annotation of Corpora http://www.lrec-conf.org/proceedings/lrec2000/pdf/130.pdf barbu-2000-fast FrançoisTrouilleux - EricGaussier - Gabriel G.Bès + EricGaussier + Gabriel G.Bès AnnieZaenen Coreference Resolution Evaluation Based on Descriptive Specificity http://www.lrec-conf.org/proceedings/lrec2000/pdf/131.pdf @@ -789,20 +789,20 @@ dutoit-2000-text - Philippe Boulade Mareüil + Philippe Boulade Mareüil Christophed’Alessandro FrançoisYvon VéroniqueAubergé - JacquelineVaissière + JacquelineVaissière AngéliqueAmelot A <fixed-case>F</fixed-case>rench Phonetic Lexicon with Variants for Speech and Language Processing http://www.lrec-conf.org/proceedings/lrec2000/pdf/133.pdf de-mareuil-etal-2000-french - LailaDybkjær + LailaDybkjær Morten BaunMøller - Niels OleBernsen + Niels OleBernsen MichaelGrosse MartinOlsen AmandaSchiffrin @@ -811,8 +811,8 @@ dybkjaer-etal-2000-annotating - Niels OleBernsen - LailaDybkjær + Niels OleBernsen + LailaDybkjær A Methodology for Evaluating Spoken Language Dialogue Systems and Their Components http://www.lrec-conf.org/proceedings/lrec2000/pdf/135.pdf bernsen-dybkjaer-2000-methodology @@ -828,22 +828,22 @@ LarsAhrenberg MagnusMerkel - Anna SågvallHein - JörgTiedemann + Anna SågvallHein + JörgTiedemann Evaluation of Word Alignment Systems http://www.lrec-conf.org/proceedings/lrec2000/pdf/137.pdf ahrenberg-etal-2000-evaluation - HervéDéjean + HervéDéjean How To Evaluate and Compare Tagsets? A Proposal http://www.lrec-conf.org/proceedings/lrec2000/pdf/138.pdf dejean-2000-evaluate - JohnWhite - JenniferDoyon - SusanTalbott + JohnWhite + JenniferDoyon + SusanTalbott Determining the Tolerance of Text-handling Tasks for <fixed-case>MT</fixed-case> Output http://www.lrec-conf.org/proceedings/lrec2000/pdf/139.pdf white-etal-2000-determining @@ -856,20 +856,20 @@ SabineBuchholz - Antalvan den Bosch + Antalvan den Bosch Integrating Seed Names and ngrams for a Named Entity List and Classifier http://www.lrec-conf.org/proceedings/lrec2000/pdf/141.pdf buchholz-van-den-bosch-2000-integrating - HidekiKashioka - SatosiShirai + HidekiKashioka + SatosiShirai Automatically Expansion of Thesaurus Entries with a Different Thesaurus http://www.lrec-conf.org/proceedings/lrec2000/pdf/142.pdf kashioka-shirai-2000-automatically - DanielZeman + DanielZeman AnoopSarkar Learning Verb Subcategorization from Corpora: Counting Frame Subsets http://www.lrec-conf.org/proceedings/lrec2000/pdf/145.pdf @@ -877,7 +877,7 @@ SašoDžeroski - TomažErjavec + TomažErjavec JakubZavrel Morphosyntactic Tagging of <fixed-case>S</fixed-case>lovene: Evaluating Taggers and Tagsets http://www.lrec-conf.org/proceedings/lrec2000/pdf/146.pdf @@ -892,14 +892,14 @@ micca-etal-2000-cross - WimPeters + WimPeters IvonnePeters Lexicalised Systematic Polysemy in <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et http://www.lrec-conf.org/proceedings/lrec2000/pdf/148.pdf peters-peters-2000-lexicalised - BjörnGambäck + BjörnGambäck FredrikOlsson Experiences of Language Engineering Algorithm Reuse http://www.lrec-conf.org/proceedings/lrec2000/pdf/151.pdf @@ -914,13 +914,13 @@ JakubZavrel - WalterDaelemans + WalterDaelemans Bootstrapping a Tagged Corpus through Combination of Existing Heterogeneous Taggers http://www.lrec-conf.org/proceedings/lrec2000/pdf/155.pdf zavrel-daelemans-2000-bootstrapping - BarboraHladká + BarboraHladká The Context (not only) for Humans http://www.lrec-conf.org/proceedings/lrec2000/pdf/156.pdf hladka-2000-context @@ -941,7 +941,7 @@ PhilippeAlcouffe NicolasGacon ClaudeRoux - FrédériqueSegond + FrédériqueSegond A Step toward Semantic Indexing of an Encyclopedic Corpus http://www.lrec-conf.org/proceedings/lrec2000/pdf/161.pdf alcouffe-etal-2000-step @@ -958,8 +958,8 @@ Gees C.Stein - TomekStrzalkowski - G. BowdenWise + TomekStrzalkowski + G. BowdenWise AmitBagga Evaluating Summaries for Multiple Documents in an Interactive Environment http://www.lrec-conf.org/proceedings/lrec2000/pdf/163.pdf @@ -1010,25 +1010,25 @@ wayne-2000-multilingual - Montserrat MarimonFelipe - Jordi PortaZamorano + Montserrat MarimonFelipe + Jordi PortaZamorano <fixed-case>P</fixed-case>o<fixed-case>S</fixed-case> Disambiguation and Partial Parsing Bidirectional Interaction http://www.lrec-conf.org/proceedings/lrec2000/pdf/169.pdf felipe-zamorano-2000-pos - HamishCunningham - KalinaBontcheva + HamishCunningham + KalinaBontcheva ValentinTablan - YorickWilks + YorickWilks Software Infrastructure for Language Resources: a Taxonomy of Previous Work and a Requirements Analysis http://www.lrec-conf.org/proceedings/lrec2000/pdf/170.pdf cunningham-etal-2000-software - NancyIde + NancyIde PatriceBonhomme - LaurentRomary + LaurentRomary <fixed-case>XCES</fixed-case>: An <fixed-case>XML</fixed-case>-based Encoding Standard for Linguistic Corpora http://www.lrec-conf.org/proceedings/lrec2000/pdf/172.pdf ide-etal-2000-xces @@ -1036,9 +1036,9 @@ IasonDemiros SotirisBoutsis - VoulaGiouli + VoulaGiouli MariaLiakata - HarrisPapageorgiou + HarrisPapageorgiou SteliosPiperidis Named Entity Recognition in <fixed-case>G</fixed-case>reek Texts http://www.lrec-conf.org/proceedings/lrec2000/pdf/173.pdf @@ -1047,7 +1047,7 @@ SotirisBoutsis ProkopisProkopidis - VoulaGiouli + VoulaGiouli SteliosPiperidis A Robust Parser for Unrestricted <fixed-case>G</fixed-case>reek Text http://www.lrec-conf.org/proceedings/lrec2000/pdf/174.pdf @@ -1055,13 +1055,13 @@ MatejRojc - ZdravkoKačič + ZdravkoKačič A Computational Platform for Development of Morphologic and Phonetic Lexica http://www.lrec-conf.org/proceedings/lrec2000/pdf/175.pdf rojc-kacic-2000-computational - ConstantinOrăsan + ConstantinOrăsan RameshKrishnamurthy An Open Architecture for the Construction and Administration of Corpora http://www.lrec-conf.org/proceedings/lrec2000/pdf/176.pdf @@ -1069,13 +1069,13 @@ MatejRojc - ZdravkoKačič + ZdravkoKačič Design of Optimal <fixed-case>S</fixed-case>lovenian Speech Corpus for Use in the Concatenative Speech Synthesis System http://www.lrec-conf.org/proceedings/lrec2000/pdf/177.pdf rojc-kacic-2000-design - ConstantinOrăsan + ConstantinOrăsan <fixed-case>CL</fixed-case>ink<fixed-case>A</fixed-case> A Coreferential Links Annotator http://www.lrec-conf.org/proceedings/lrec2000/pdf/179.pdf orasan-2000-clinka @@ -1088,9 +1088,9 @@ kilgarriff-yallop-2000-whats - HarrisPapageorgiou + HarrisPapageorgiou ProkopisProkopidis - VoulaGiouli + VoulaGiouli SteliosPiperidis A Unified <fixed-case>POS</fixed-case> Tagging Architecture and its Application to <fixed-case>G</fixed-case>reek http://www.lrec-conf.org/proceedings/lrec2000/pdf/181.pdf @@ -1105,7 +1105,7 @@ AndreasWitt - HaraldLüngen + HaraldLüngen DafyddGibbon Enhancing Speech Corpus Resources with Multiple Lexical Tag Layers http://www.lrec-conf.org/proceedings/lrec2000/pdf/183.pdf @@ -1113,11 +1113,11 @@ StevenBird - DavidDay - JohnGarofolo - JohnHenderson - ChristopheLaprun - MarkLiberman + DavidDay + JohnGarofolo + JohnHenderson + ChristopheLaprun + MarkLiberman <fixed-case>ATLAS</fixed-case>: A Flexible and Extensible Architecture for Linguistic Annotation http://www.lrec-conf.org/proceedings/lrec2000/pdf/184.pdf bird-etal-2000-atlas @@ -1133,7 +1133,7 @@ Lluísde Yzaguirre MartaRibas JordiVivaldi - M. TeresaCabré + M. TeresaCabré Some Technical Aspects about Aligning Near Languages http://www.lrec-conf.org/proceedings/lrec2000/pdf/186.pdf de-yzaguirre-etal-2000-technical @@ -1153,16 +1153,16 @@ krenn-2000-cdb - MarilynWalker - LynetteHirschman + MarilynWalker + LynetteHirschman JohnAberdeen Evaluation for Darpa Communicator Spoken Dialogue Systems http://www.lrec-conf.org/proceedings/lrec2000/pdf/191.pdf walker-etal-2000-evaluation - EdouardGeoffrois - ClaudeBarras + EdouardGeoffrois + ClaudeBarras StevenBird ZhibiaoWu Transcribing with Annotation Graphs @@ -1170,7 +1170,7 @@ geoffrois-etal-2000-transcribing - MassimoPoesio + MassimoPoesio Annotating a Corpus to Develop and Evaluate Discourse Entity Realization Algorithms: Issues and Preliminary Results http://www.lrec-conf.org/proceedings/lrec2000/pdf/193.pdf poesio-2000-annotating @@ -1184,16 +1184,16 @@ bird-etal-2000-towards - CatherineMacleod - NancyIde - RalphGrishman + CatherineMacleod + NancyIde + RalphGrishman The <fixed-case>A</fixed-case>merican National Corpus: A Standardized Resource for <fixed-case>A</fixed-case>merican <fixed-case>E</fixed-case>nglish http://www.lrec-conf.org/proceedings/lrec2000/pdf/196.pdf macleod-etal-2000-american - MarthaPalmer - Hoa TrangDang + MarthaPalmer + Hoa TrangDang JosephRosenzweig Semantic Tagging for the <fixed-case>P</fixed-case>enn <fixed-case>T</fixed-case>reebank http://www.lrec-conf.org/proceedings/lrec2000/pdf/197.pdf @@ -1212,9 +1212,9 @@ ribarov-2000-un - DavidDay + DavidDay AlanGoldschen - JohnHenderson + JohnHenderson A Framework for Cross-Document Annotation http://www.lrec-conf.org/proceedings/lrec2000/pdf/201.pdf day-etal-2000-framework @@ -1227,10 +1227,10 @@ cadel-ledouble-2000-extraction - Eric J.Breck - John D.Burger + Eric J.Breck + John D.Burger LisaFerro - LynetteHirschman + LynetteHirschman DavidHouse MarcLight InderjeetMani @@ -1256,40 +1256,40 @@ SunLe JinYoubing DuLin - SunYufang + YufangSun Automatic Extraction of <fixed-case>E</fixed-case>nglish-<fixed-case>C</fixed-case>hinese Term Lexicons from Noisy Bilingual Corpora http://www.lrec-conf.org/proceedings/lrec2000/pdf/208.pdf sun-etal-2000-automatic - ChristopherCieri - MarkLiberman + ChristopherCieri + MarkLiberman Issues in Corpus Creation and Distribution: The Evolution of the <fixed-case>L</fixed-case>inguistic <fixed-case>D</fixed-case>ata <fixed-case>C</fixed-case>onsortium http://www.lrec-conf.org/proceedings/lrec2000/pdf/209.pdf cieri-liberman-2000-issues - ChristopherCieri + ChristopherCieri DavidGraff - MarkLiberman + MarkLiberman NiiMartey - StephanieStrassel + StephanieStrassel Large, Multilingual, Broadcast News Corpora for Cooperative Research in Topic Detection and Tracking: The <fixed-case>TDT</fixed-case>-2 and <fixed-case>TDT</fixed-case>-3 Corpus Efforts http://www.lrec-conf.org/proceedings/lrec2000/pdf/210.pdf cieri-etal-2000-large - YujiMatsumoto + YujiMatsumoto TatsuoYamashita Using Machine Learning Methods to Improve Quality of Tagged Corpora and Learning Models http://www.lrec-conf.org/proceedings/lrec2000/pdf/211.pdf matsumoto-yamashita-2000-using - StephanieStrassel + StephanieStrassel DavidGraff NiiMartey - ChristopherCieri + ChristopherCieri Quality Control in Large Annotation Projects Involving Multiple Judges: The Case of the <fixed-case>TDT</fixed-case> Corpora http://www.lrec-conf.org/proceedings/lrec2000/pdf/212.pdf strassel-etal-2000-quality @@ -1301,29 +1301,29 @@ utsuro-2000-learning - Lin-ShanLee + Lin-ShanLee Lee-FengChien Live Lexicons and Dynamic Corpora Adapted to the Network Resources for <fixed-case>C</fixed-case>hinese Spoken Language Processing Applications in an <fixed-case>I</fixed-case>nternet Era http://www.lrec-conf.org/proceedings/lrec2000/pdf/214.pdf lee-chien-2000-live - LoriLevin + LoriLevin BorisBartlog - AriadnaFont Llitjos - DonnaGates - AlonLavie + AriadnaFont Llitjos + DonnaGates + AlonLavie DorcasWallace TaroWatanabe - MonikaWoszczyna + MonikaWoszczyna Lessons Learned from a Task-based Evaluation of Speech-to-Speech Machine Translation http://www.lrec-conf.org/proceedings/lrec2000/pdf/215.pdf levin-etal-2000-lessons - FrankVan Eynde + FrankVan Eynde JakubZavrel - WalterDaelemans + WalterDaelemans Part of Speech Tagging and Lemmatisation for the Spoken <fixed-case>D</fixed-case>utch Corpus http://www.lrec-conf.org/proceedings/lrec2000/pdf/216.pdf van-eynde-etal-2000-part @@ -1337,15 +1337,15 @@ weilhammer-etal-2000-influence - LeonardoLesmo + LeonardoLesmo VincenzoLombardo Automatic Assignment of Grammatical Relations http://www.lrec-conf.org/proceedings/lrec2000/pdf/218.pdf lesmo-lombardo-2000-automatic - BernardoMagnini - GabrielaCavaglià + BernardoMagnini + GabrielaCavaglià Integrating Subject Field Codes into <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et http://www.lrec-conf.org/proceedings/lrec2000/pdf/219.pdf magnini-cavaglia-2000-integrating @@ -1354,7 +1354,7 @@ CristinaBosco VincenzoLombardo DanielaVassallo - LeonardoLesmo + LeonardoLesmo Building a Treebank for <fixed-case>I</fixed-case>talian: a Data-driven Annotation Schema http://www.lrec-conf.org/proceedings/lrec2000/pdf/220.pdf bosco-etal-2000-building @@ -1376,9 +1376,9 @@ ByeongchangKim - Jin-seokLee - JeongwonCha - GeunbaeLee + Jin-seokLee + JeongwonCha + GeunbaeLee <fixed-case>POSCAT</fixed-case>: A Morpheme-based Speech Corpus Annotation Tool http://www.lrec-conf.org/proceedings/lrec2000/pdf/224.pdf kim-etal-2000-poscat @@ -1392,23 +1392,23 @@ Byung-JuKang - Key-SunChoi + Key-SunChoi Automatic Transliteration and Back-transliteration by Decision Tree Learning http://www.lrec-conf.org/proceedings/lrec2000/pdf/227.pdf kang-choi-2000-automatic KlausRies - LoriLevin + LoriLevin LizaValle - AlonLavie - AlexWaibel + AlonLavie + AlexWaibel Shallow Discourse Genre Annotation in <fixed-case>C</fixed-case>all<fixed-case>H</fixed-case>ome <fixed-case>S</fixed-case>panish http://www.lrec-conf.org/proceedings/lrec2000/pdf/228.pdf ries-etal-2000-shallow - AnneAbeillé + AnneAbeillé LionelClément AlexandraKinyon Building a Treebank for <fixed-case>F</fixed-case>rench @@ -1440,33 +1440,33 @@ LuisaBentivogli - EmanuelePianta - FabioPianesi + EmanuelePianta + FabioPianesi Coping with Lexical Gaps when Building Aligned Multilingual Wordnets http://www.lrec-conf.org/proceedings/lrec2000/pdf/236.pdf bentivogli-etal-2000-coping Young-SoogChae - Key-SunChoi + Key-SunChoi Design and Construction of Knowledge base for Verb using <fixed-case>MRD</fixed-case> and Tagged Corpus http://www.lrec-conf.org/proceedings/lrec2000/pdf/237.pdf chae-choi-2000-design Young-SoogChae - Key-SunChoi + Key-SunChoi Introduction of <fixed-case>KIBS</fixed-case> (<fixed-case>K</fixed-case>orean Information Base System) Project http://www.lrec-conf.org/proceedings/lrec2000/pdf/239.pdf chae-choi-2000-introduction - JohnBateman + JohnBateman ElkeTeich - Geert-JanKruijff - IvanaKruijff-Korbayová + Geert-JanKruijff + IvanaKruijff-Korbayová SergeSharoff - HanaSkoumalová + HanaSkoumalová Resources for Multilingual Text Generation in Three <fixed-case>S</fixed-case>lavic Languages http://www.lrec-conf.org/proceedings/lrec2000/pdf/241.pdf bateman-etal-2000-resources @@ -1479,23 +1479,23 @@ gibbon-trippel-2000-multi - LynneCahill + LynneCahill ChristyDoran - RogerEvans - RodgerKibble - ChrisMellish + RogerEvans + RodgerKibble + ChrisMellish D.Paiva - MikeReape - DoniaScott - NeilTipper + MikeReape + DoniaScott + NeilTipper Enabling Resource Sharing in Language Generation: an Abstract Reference Architecture http://www.lrec-conf.org/proceedings/lrec2000/pdf/244.pdf cahill-etal-2000-enabling - ZdravkoKačič + ZdravkoKačič BogomirHorvat - AleksandraZögling + AleksandraZögling Issues in Design and Collection of Large Telephone Speech Corpus for <fixed-case>S</fixed-case>lovenian Language http://www.lrec-conf.org/proceedings/lrec2000/pdf/246.pdf kacic-etal-2000-issues @@ -1508,17 +1508,17 @@ jouis-arc-a3-2000-arc - Richard F. E.Sutcliffe + Richard F. E.Sutcliffe SadaoKurohashi A Parallel <fixed-case>E</fixed-case>nglish-<fixed-case>J</fixed-case>apanese Query Collection for the Evaluation of On-Line Help Systems http://www.lrec-conf.org/proceedings/lrec2000/pdf/248.pdf sutcliffe-kurohashi-2000-parallel - DanTufiş + DanTufiş PéterDienes CsabaOravecz - TamásVáradi + TamásVáradi Principled Hidden Tagset Design for Tiered Tagging of <fixed-case>H</fixed-case>ungarian http://www.lrec-conf.org/proceedings/lrec2000/pdf/249.pdf tufis-etal-2000-principled @@ -1526,9 +1526,9 @@ FelisaVerdejo JulioGonzalo - AnselmoPeñas + AnselmoPeñas FernandoLópez - DavidFernández + DavidFernández Evaluating Wordnets in Cross-language Information Retrieval: the <fixed-case>ITEM</fixed-case> Search Engine http://www.lrec-conf.org/proceedings/lrec2000/pdf/250.pdf verdejo-etal-2000-evaluating @@ -1555,9 +1555,9 @@ milde-reinsch-2000-universal - HelkaFolch + HelkaFolch SergeHeiden - BenoîtHabert + BenoîtHabert SergeFleury GabrielIllouz PierreLafon @@ -1579,7 +1579,7 @@ LuziaWittmann - Ricardo DanielRibeiro + Ricardo DanielRibeiro TâniaPêgo FernandoBatista Some Language Resources and Tools for Computational Processing of <fixed-case>P</fixed-case>ortuguese at <fixed-case>INESC</fixed-case> @@ -1594,7 +1594,7 @@ utsuro-sassano-2000-minimally - Joyce YueChai + Joyce YueChai Evaluation of a Generic Lexical Semantic Resource in Information Extraction http://www.lrec-conf.org/proceedings/lrec2000/pdf/259.pdf chai-2000-evaluation @@ -1609,12 +1609,12 @@ KatsunobuItou KiyohiroShikano TatsuyaKawahara - KasuyaTakeda + KasuyaTakeda AtsushiYamada AkinoriItou TakehitoUtsuro TetsunoriKobayashi - NobuakiMinematsu + NobuakiMinematsu MikioYamamoto ShigekiSagayama AkinobuLee @@ -1623,20 +1623,20 @@ itou-etal-2000-ipa - KikuoMaekawa - HanaeKoiso - SadaokiFurui + KikuoMaekawa + HanaeKoiso + SadaokiFurui HitoshiIsahara Spontaneous Speech Corpus of <fixed-case>J</fixed-case>apanese http://www.lrec-conf.org/proceedings/lrec2000/pdf/262.pdf maekawa-etal-2000-spontaneous - SeanBoisen - Michael R.Crystal - RichardSchwartz + SeanBoisen + Michael R.Crystal + RichardSchwartz RebeccaStone - RalphWeischedel + RalphWeischedel Annotating Resources for Information Extraction http://www.lrec-conf.org/proceedings/lrec2000/pdf/263.pdf boisen-etal-2000-annotating @@ -1657,16 +1657,16 @@ ConstandinaEconomou - SpyrosRaptis + SpyrosRaptis GregoryStainhaouer <fixed-case>LEXIPLOIGISSI</fixed-case>: An Educational Platform for the Teaching of Terminology in <fixed-case>G</fixed-case>reece http://www.lrec-conf.org/proceedings/lrec2000/pdf/271.pdf economou-etal-2000-lexiploigissi - MalgorzataMarciniak + MalgorzataMarciniak AgnieszkaMykowiecka - AnnaKupść + AnnaKupść AdamPrzepiórkowski An <fixed-case>HPSG</fixed-case>-Annotated Test Suite for <fixed-case>P</fixed-case>olish http://www.lrec-conf.org/proceedings/lrec2000/pdf/272.pdf @@ -1677,7 +1677,7 @@ NaradaWarakagoda BørgeLindberg GunnarLehtinen - ZdravkoKačič + ZdravkoKačič AndrejŽgank KjellElenius GiampieroSalvi @@ -1693,7 +1693,7 @@ katsoyannou-efthimiou-2000-terminology - Key-SunChoi + Key-SunChoi Young-SoogChae Terminology in <fixed-case>K</fixed-case>orea: <fixed-case>KORTERM</fixed-case> http://www.lrec-conf.org/proceedings/lrec2000/pdf/276.pdf @@ -1706,16 +1706,16 @@ birocheau-2000-morphological - SonjaNießen - Franz JosefOch + SonjaNießen + Franz JosefOch GregorLeusch - HermannNey + HermannNey An Evaluation Tool for Machine Translation: Fast Evaluation for <fixed-case>MT</fixed-case> Research http://www.lrec-conf.org/proceedings/lrec2000/pdf/278.pdf niessen-etal-2000-evaluation - FiammettaNamer + FiammettaNamer GeorgetteDal <fixed-case>G</fixed-case>é<fixed-case>D</fixed-case>éri<fixed-case>F</fixed-case>: Automatic Generation and Analysis of Morphologically Constructed Lexical Resources http://www.lrec-conf.org/proceedings/lrec2000/pdf/279.pdf @@ -1737,7 +1737,7 @@ GerhardBudin - Alan K.Melby + Alan K.Melby Accessibility of Multilingual Terminological Resources - Current Problems and Prospects for the Future http://www.lrec-conf.org/proceedings/lrec2000/pdf/283.pdf budin-melby-2000-accessibility @@ -1745,37 +1745,37 @@ BilelGargouri MohamedJmaiel - AbdelmajidBen Hamadou + AbdelmajidBen Hamadou Using a Formal Approach to Evaluate Grammars http://www.lrec-conf.org/proceedings/lrec2000/pdf/285.pdf gargouri-etal-2000-using - AlvinMartin - MarkPrzybocki + AlvinMartin + MarkPrzybocki Design Issues in Text-Independent Speaker Recognition Evaluation http://www.lrec-conf.org/proceedings/lrec2000/pdf/286.pdf martin-przybocki-2000-design FeiXia - MarthaPalmer + MarthaPalmer NianwenXue Mary EllenOkurowski - JohnKovarik + JohnKovarik Fu-DongChiou ShizheHuang TonyKroch - MitchMarcus + MitchMarcus Developing Guidelines and Ensuring Consistency for <fixed-case>C</fixed-case>hinese Text Annotation http://www.lrec-conf.org/proceedings/lrec2000/pdf/287.pdf xia-etal-2000-developing - JernejaGros - FranceMihelič - SimonDobrišek - TomažErjavec + JernejaGros + FranceMihelič + SimonDobrišek + TomažErjavec MarioŽganec Corpora of <fixed-case>S</fixed-case>lovene Spoken Language for Multi-lingual Applications http://www.lrec-conf.org/proceedings/lrec2000/pdf/288.pdf @@ -1792,18 +1792,18 @@ kavallieratou-etal-2000-gruhd - FranceMihelič - JernejaGros - ElmarNöth + FranceMihelič + JernejaGros + ElmarNöth VolkerWarnke Labeling of Prosodic Events in <fixed-case>S</fixed-case>lovenian Speech Database <fixed-case>GOPOLIS</fixed-case> http://www.lrec-conf.org/proceedings/lrec2000/pdf/292.pdf mihelic-etal-2000-labeling - CatiaCucchiarini + CatiaCucchiarini JohanVan Hoorde - ElizabethD’Halleweyn + ElizabethD’Halleweyn <fixed-case>NL</fixed-case>-Translex: Machine Translation for <fixed-case>D</fixed-case>utch http://www.lrec-conf.org/proceedings/lrec2000/pdf/294.pdf cucchiarini-etal-2000-nl @@ -1817,16 +1817,16 @@ Ángel MartínMunicio GuillermoRojo - Fernando SánchezLeón + Fernando SánchezLeón OctavioPinillos Language Resources Development at the <fixed-case>S</fixed-case>panish Royal Academy http://www.lrec-conf.org/proceedings/lrec2000/pdf/297.pdf municio-etal-2000-language - IrinaProdanof - AmedeoCappelli - LorenzoMoretti + IrinaProdanof + AmedeoCappelli + LorenzoMoretti Reusability as Easy Adaptability: A Substantial Advance in <fixed-case>NL</fixed-case> Technology http://www.lrec-conf.org/proceedings/lrec2000/pdf/298.pdf prodanof-etal-2000-reusability @@ -1858,8 +1858,8 @@ AristomenisThanopoulos - NikosFakotakis - GeorgeKokkinakis + NikosFakotakis + GeorgeKokkinakis Automatic Extraction of Semantic Similarity of Words from Raw Technical Texts http://www.lrec-conf.org/proceedings/lrec2000/pdf/302.pdf thanopoulos-etal-2000-automatic-extraction @@ -1873,9 +1873,9 @@ bonneau-maynard-etal-2000-predictive - PennyLabropoulou + PennyLabropoulou ElenaMantzari - HarrisPapageorgiou + HarrisPapageorgiou MariaGavrilidou Automatic Generation of Dictionary Definitions from a Computational Lexicon http://www.lrec-conf.org/proceedings/lrec2000/pdf/306.pdf @@ -1905,7 +1905,7 @@ WolfgangMenzel - EricAtwell + EricAtwell PatriziaBonaventura DanielHerron PeterHowarth @@ -1917,8 +1917,8 @@ KallirroiGeorgila - NikosFakotakis - GeorgeKokkinakis + NikosFakotakis + GeorgeKokkinakis A Graphical Parametric Language-Independent Tool for the Annotation of Speech Corpora http://www.lrec-conf.org/proceedings/lrec2000/pdf/314.pdf georgila-etal-2000-graphical @@ -1931,43 +1931,43 @@ StéphaneChaudiron - KhalidChoukri + KhalidChoukri AudreyMance - ValérieMapelli + ValérieMapelli For a Repository of <fixed-case>NLP</fixed-case> Tools http://www.lrec-conf.org/proceedings/lrec2000/pdf/316.pdf chaudiron-etal-2000-repository JeffreyAllen - KhalidChoukri + KhalidChoukri Survey of Language Engineering Needs: a Language Resources Perspective http://www.lrec-conf.org/proceedings/lrec2000/pdf/317.pdf allen-choukri-2000-survey - JoCalder + JoCalder Interarbora and Thistle - Delivering Linguistic Structure by the <fixed-case>I</fixed-case>nternet http://www.lrec-conf.org/proceedings/lrec2000/pdf/319.pdf calder-2000-interarbora - GeorgeDemetriou - RobertGaizauskas + GeorgeDemetriou + RobertGaizauskas Automatically Augmenting Terminological Lexicons from Untagged Text http://www.lrec-conf.org/proceedings/lrec2000/pdf/320.pdf demetriou-gaizauskas-2000-automatically - AndreaSetzer - RobertGaizauskas + AndreaSetzer + RobertGaizauskas Annotating Events and Temporal Information in Newswire Texts http://www.lrec-conf.org/proceedings/lrec2000/pdf/321.pdf setzer-gaizauskas-2000-annotating - Bonnie J.Dorr - Gina-AnneLevow + Bonnie J.Dorr + Gina-AnneLevow DekangLin ScottThomas <fixed-case>C</fixed-case>hinese-<fixed-case>E</fixed-case>nglish Semantic Resource Construction @@ -1976,7 +1976,7 @@ VeraFluhr-Semenova - ChristianFluhr + ChristianFluhr StéphanieBrisson Production of <fixed-case>NLP</fixed-case>-oriented Bilingual Language Resources from Human-oriented dictionaries http://www.lrec-conf.org/proceedings/lrec2000/pdf/328.pdf @@ -1991,17 +1991,17 @@ roux-etal-2000-developing - RobertoBasili - Maria TeresaPazienza + RobertoBasili + Maria TeresaPazienza MicheleVindigni - Fabio MassimoZanzotto + Fabio MassimoZanzotto Tuning Lexicons to New Operational Scenarios http://www.lrec-conf.org/proceedings/lrec2000/pdf/330.pdf basili-etal-2000-tuning - José A.R.Fonollosa - AsunciónMoreno + José A.R.Fonollosa + AsunciónMoreno <fixed-case>S</fixed-case>peech<fixed-case>D</fixed-case>at-Car Fixed Platform http://www.lrec-conf.org/proceedings/lrec2000/pdf/331.pdf fonollosa-moreno-2000-speechdat @@ -2020,9 +2020,9 @@ brants-plaehn-2000-interactive - TomažErjavec - RogerEvans - NancyIde + TomažErjavec + RogerEvans + NancyIde AdamKilgarriff The Concede Model for Lexical Databases http://www.lrec-conf.org/proceedings/lrec2000/pdf/335.pdf @@ -2036,9 +2036,9 @@ AnastasiaPapakostopoulou AthanassiaSpiliotopoulou AnnaVacalopoulou - PennyLabropoulou + PennyLabropoulou ElenaMantzari - HarrisPapageorgiou + HarrisPapageorgiou IasonDemiros Design and Implementation of the Online <fixed-case>ILSP</fixed-case> <fixed-case>G</fixed-case>reek Corpus http://www.lrec-conf.org/proceedings/lrec2000/pdf/336.pdf @@ -2079,20 +2079,20 @@ DavidPortabella AlbertFebrer - AsunciónMoreno + AsunciónMoreno <fixed-case>N</fixed-case>ani<fixed-case>T</fixed-case>rans: a Speech Labelling Tool http://www.lrec-conf.org/proceedings/lrec2000/pdf/345.pdf portabella-etal-2000-nanitrans - Sanda M.Harabagiu - Steven J.Maiorano + Sanda M.Harabagiu + Steven J.Maiorano Acquisition of Linguistic Patterns for Knowledge-based Information Extraction http://www.lrec-conf.org/proceedings/lrec2000/pdf/347.pdf harabagiu-maiorano-2000-acquisition - ElisabethD’Halleweyn + ElisabethD’Halleweyn ErwinDewallef JeannineBeeken A Platform for <fixed-case>D</fixed-case>utch in Human Language Technologies @@ -2100,16 +2100,16 @@ dhalleweyn-etal-2000-platform - MarilynWalker - CandaceKamm - JulieBoland + MarilynWalker + CandaceKamm + JulieBoland Developing and Testing General Models of Spoken Dialogue System Peformance http://www.lrec-conf.org/proceedings/lrec2000/pdf/349.pdf walker-etal-2000-developing - Claudede Loupy - MarcEl-Bèze + Claudede Loupy + MarcEl-Bèze Using Few Clues Can Compensate the Small Amount of Resources Available for Word Sense Disambiguation http://www.lrec-conf.org/proceedings/lrec2000/pdf/350.pdf de-loupy-el-beze-2000-using @@ -2122,15 +2122,15 @@ mikros-carayannis-2000-modern - PatrickParoubek + PatrickParoubek Language Resources as by-Product of Evaluation: The <fixed-case>MULTITAG</fixed-case> Example http://www.lrec-conf.org/proceedings/lrec2000/pdf/353.pdf paroubek-2000-language - Judith L.Klavans + Judith L.Klavans NinaWacholder - David K.Evans + David K.Evans Evaluation of Computational Linguistic Techniques for Identifying Significant Topics for Browsing Applications http://www.lrec-conf.org/proceedings/lrec2000/pdf/355.pdf klavans-etal-2000-evaluation @@ -2146,8 +2146,8 @@ nakamura-etal-2000-acoustical - GeorgeDemetriou - EricAtwell + GeorgeDemetriou + EricAtwell CliveSouter Using Lexical Semantic Knowledge from Machine Readable Dictionaries for Domain Independent Language Modelling http://www.lrec-conf.org/proceedings/lrec2000/pdf/357.pdf @@ -2178,7 +2178,7 @@ hofland-2000-self - Janne BondiJohannessen + Janne BondiJohannessen AndersNøklestad KristinHagen A Web-based Advanced and User Friendly System: The <fixed-case>O</fixed-case>slo Corpus of Tagged <fixed-case>N</fixed-case>orwegian Texts @@ -2193,7 +2193,7 @@ IvonnePeters - WimPeters + WimPeters The Treatment of Adjectives in <fixed-case>SIMPLE</fixed-case>: Theoretical Observations http://www.lrec-conf.org/proceedings/lrec2000/pdf/366.pdf peters-peters-2000-treatment @@ -2205,12 +2205,12 @@ michel-2000-cardinal - Laurie E.Damianos + Laurie E.Damianos JillDrury TariFanderclai - LynetteHirschman + LynetteHirschman JeffKurtz - BeatriceOshika + BeatriceOshika Evaluating Multi-party Multi-modal Systems http://www.lrec-conf.org/proceedings/lrec2000/pdf/368.pdf damianos-etal-2000-evaluating @@ -2222,14 +2222,14 @@ kunze-2000-extension - Serge A.Yablonsky + Serge A.Yablonsky <fixed-case>R</fixed-case>ussian Monitor Corpora: Composition, Linguistic Encoding and <fixed-case>I</fixed-case>nternet Publication http://www.lrec-conf.org/proceedings/lrec2000/pdf/370.pdf yablonsky-2000-russian AnnCopestake - DanFlickinger + DanFlickinger An Open Source Grammar Development Environment and Broad-coverage <fixed-case>E</fixed-case>nglish Grammar Using <fixed-case>HPSG</fixed-case> http://www.lrec-conf.org/proceedings/lrec2000/pdf/371.pdf copestake-flickinger-2000-open @@ -2237,7 +2237,7 @@ SunMaosong SunHonglin - HuangChangning + ChangningHuang ZhangPu XingHongbing ZhouQiang @@ -2246,11 +2246,11 @@ sun-etal-2000-hua - AsunciónMoreno + AsunciónMoreno BørgeLindberg ChristophDraxler GaëlRichard - KhalidChoukri + KhalidChoukri StephanEuler JeffreyAllen <fixed-case>SPEECHDAT</fixed-case>-<fixed-case>CAR</fixed-case>. A Large Speech Database for Automotive Environments @@ -2266,9 +2266,9 @@ turrini-etal-2000-addizionario - KhalidChoukri + KhalidChoukri AudreyMance - ValérieMapelli + ValérieMapelli Recent Developments within the <fixed-case>E</fixed-case>uropean Language Resources Association (<fixed-case>ELRA</fixed-case>) http://www.lrec-conf.org/proceedings/lrec2000/pdf/377.pdf choukri-etal-2000-recent diff --git a/data/xml/L02.xml b/data/xml/L02.xml index 239b451b3b..f183b0df93 100644 --- a/data/xml/L02.xml +++ b/data/xml/L02.xml @@ -16,7 +16,7 @@ SusanaAfonso - EckhardBick + EckhardBick RenatoHaber DianaSantos Floresta Sintá(c)tica: A treebank for <fixed-case>P</fixed-case>ortuguese @@ -47,9 +47,9 @@ vandeghinste-2002-lexicon - EduardHovy + EduardHovy MargaretKing - AndreiPopescu-Belis + AndreiPopescu-Belis Computer-Aided Specification of Quality Models for Machine Translation Evaluation http://www.lrec-conf.org/proceedings/lrec2002/pdf/5.pdf hovy-etal-2002-computer @@ -62,15 +62,15 @@ Min-YenKan - Judith L.Klavans - Kathleen R.McKeown + Judith L.Klavans + Kathleen R.McKeown Using the Annotated Bibliography as a Resource for Indicative Summarization http://www.lrec-conf.org/proceedings/lrec2002/pdf/7.pdf kan-etal-2002-using Choy-KimChuah - ZaharinYusoff + ZaharinYusoff Computational Linguistics at Universiti Sains <fixed-case>M</fixed-case>alaysia http://www.lrec-conf.org/proceedings/lrec2002/pdf/8.pdf chuah-yusoff-2002-computational @@ -78,7 +78,7 @@ JuditFeliu JorgeVivaldi - M. TeresaCabré + M. TeresaCabré Towards an Ontology for a Human Genome Knowledge Base http://www.lrec-conf.org/proceedings/lrec2002/pdf/9.pdf feliu-etal-2002-towards @@ -100,7 +100,7 @@ markert-nissim-2002-towards - PhilippeLanglais + PhilippeLanglais MarieLoranger GuyLapalme Translators at work with <fixed-case>TRANSTYPE</fixed-case>: Resource and Evaluation. @@ -109,7 +109,7 @@ QiangZhou - Elliott FrancoDrabek + Elliott FrancoDrabek FujiRen Annotating the functional chunks in <fixed-case>C</fixed-case>hinese sentences http://www.lrec-conf.org/proceedings/lrec2002/pdf/13.pdf @@ -117,11 +117,11 @@ HisaoKuwabara - ShuichItahashi + ShuichItahashi MikioYamamoto ToshiyukiTakezawa SatoshiNakamura - KazuyaTakeda + KazuyaTakeda The Present Status of Speech Database in <fixed-case>J</fixed-case>apan: Development, Management, and Application to Speech Research http://www.lrec-conf.org/proceedings/lrec2002/pdf/14.pdf kuwabara-etal-2002-present @@ -134,8 +134,8 @@ santos-gasperin-2002-evaluation - LauraDocío-Fernández - CarmenGarcía-Mateo + LauraDocío-Fernández + CarmenGarcía-Mateo Acoustic Modeling and Training of a Bilingual <fixed-case>ASR</fixed-case> System when a Minority Language is Involved http://www.lrec-conf.org/proceedings/lrec2002/pdf/16.pdf docio-fernandez-garcia-mateo-2002-acoustic @@ -150,7 +150,7 @@ JakubPiskorski - WitoldDrożdżyński + WitoldDrożdżyński OliverScherf FeiyuXu A Flexible <fixed-case>XML</fixed-case>-based Regular Compiler for Creation and Conversion of Linguistic Resources @@ -172,7 +172,7 @@ draxler-schiel-2002-three - RenéSchneider + RenéSchneider n-grams of Seeds: A Hybrid System for Corpus-Based Text Summarization http://www.lrec-conf.org/proceedings/lrec2002/pdf/21.pdf schneider-2002-n @@ -184,7 +184,7 @@ schiffman-2002-building - SabineSchulte im Walde + SabineSchulte im Walde A Subcategorisation Lexicon for <fixed-case>G</fixed-case>erman Verbs induced from a Lexicalised <fixed-case>PCFG</fixed-case> http://www.lrec-conf.org/proceedings/lrec2002/pdf/23.pdf schulte-im-walde-2002-subcategorisation @@ -219,13 +219,13 @@ murata-isahara-2002-automatic - FabioTamburini + FabioTamburini Automatic detection of prosodic prominence in continuous speech http://www.lrec-conf.org/proceedings/lrec2002/pdf/28.pdf tamburini-2002-automatic - FabioTamburini + FabioTamburini A dynamic model for reference corpora structure definition http://www.lrec-conf.org/proceedings/lrec2002/pdf/29.pdf tamburini-2002-dynamic @@ -239,9 +239,9 @@ JavierCaminero - JoaquínGonzález-Rodríguez - JavierOrtega-García - DanielTapias + JoaquínGonzález-Rodríguez + JavierOrtega-García + DanielTapias Pedro M.Ruz MercedesSolá A Multilingual Speaker Verification System: Architecture and Performance Evaluation @@ -249,7 +249,7 @@ caminero-etal-2002-multilingual - DanTufiş + DanTufiş Ana-MariaBarbu Lexical token alignment: experiments, results and applications http://www.lrec-conf.org/proceedings/lrec2002/pdf/32.pdf @@ -266,8 +266,8 @@ NadjetBouayad-Agha RichardPower - DoniaScott - AnjaBelz + DoniaScott + AnjaBelz <fixed-case>PILLS</fixed-case>: Multilingual generation of medical information documents with overlapping content http://www.lrec-conf.org/proceedings/lrec2002/pdf/34.pdf bouayad-agha-etal-2002-pills @@ -276,7 +276,7 @@ FelixSasaki ClaudiaWegener AndreasWitt - DieterMetzing + DieterMetzing JensPönninghaus Co-reference annotation and resources: A multilingual corpus of typologically diverse languages http://www.lrec-conf.org/proceedings/lrec2002/pdf/35.pdf @@ -284,7 +284,7 @@ UdoHahn - StefanSchulz + StefanSchulz Towards Very Large Ontologies for Medical Language Processing http://www.lrec-conf.org/proceedings/lrec2002/pdf/36.pdf hahn-schulz-2002-towards @@ -304,13 +304,13 @@ alfonseca-manandhar-2002-proposal - MatthieuConstant + MatthieuConstant Methods for Constructing Lexicon-Grammar Resources: The Example of Measure Expressions http://www.lrec-conf.org/proceedings/lrec2002/pdf/39.pdf constant-2002-methods - KristinaNilsson + KristinaNilsson LarsBorin Living off the land: The Web as a source of practice texts for learners of less prevalent languages http://www.lrec-conf.org/proceedings/lrec2002/pdf/40.pdf @@ -318,15 +318,15 @@ SebastianMöller - ErginaKavallieratou + ErginaKavallieratou Diagnostic Assessment of Telephone Transmission Impact on <fixed-case>ASR</fixed-case> Performance and Human-to-Human Speech Quality http://www.lrec-conf.org/proceedings/lrec2002/pdf/41.pdf moller-kavallieratou-2002-diagnostic - Carlos D.Martínez-Hinarejos - EmilioSanchís - FernandoGarcía-Granada + Carlos D.Martínez-Hinarejos + EmilioSanchís + FernandoGarcía-Granada PabloAibar A Labelling Proposal to Annotate Dialogues http://www.lrec-conf.org/proceedings/lrec2002/pdf/42.pdf @@ -334,22 +334,22 @@ SimoneTeufel - NoemieElhadad + NoemieElhadad Collection and linguistic processing of a large-scale corpus of medical articles http://www.lrec-conf.org/proceedings/lrec2002/pdf/43.pdf teufel-elhadad-2002-collection TokunagaTakenobu - OkumuraManabu - SaitôSuguru + ManabuOkumura + SuguruSaitô TanakaHozumi Constructing a lexicon of action http://www.lrec-conf.org/proceedings/lrec2002/pdf/44.pdf tokunaga-etal-2002-constructing - BirteLönneker + BirteLönneker Building Concept Frames based on Text Corpora http://www.lrec-conf.org/proceedings/lrec2002/pdf/45.pdf lonneker-2002-building @@ -367,14 +367,14 @@ RobertoNavigli - PaolaVelardi + PaolaVelardi Automatic Adaptation of <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et to Domains http://www.lrec-conf.org/proceedings/lrec2002/pdf/47.pdf navigli-velardi-2002-automatic MartaVillegas - NuriaBel + NuriaBel From <fixed-case>DTD</fixed-case> to relational d<fixed-case>B</fixed-case>. An automatic generation of a lexicographical station out off <fixed-case>ISLE</fixed-case> guidelines http://www.lrec-conf.org/proceedings/lrec2002/pdf/48.pdf villegas-bel-2002-dtd @@ -400,7 +400,7 @@ AntonioMolina FerranPla EncarnaSegarra - LidiaMoreno + LidiaMoreno Word Sense Disambiguation using Statistical Models and <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et http://www.lrec-conf.org/proceedings/lrec2002/pdf/51.pdf molina-etal-2002-word @@ -426,23 +426,23 @@ MarianneStarlander - AndreiPopescu-Belis + AndreiPopescu-Belis Corpus-based Evaluation of a <fixed-case>F</fixed-case>rench Spelling and Grammar Checker http://www.lrec-conf.org/proceedings/lrec2002/pdf/55.pdf starlander-popescu-belis-2002-corpus - AdamMeyers - RalphGrishman + AdamMeyers + RalphGrishman MichikoKosaka Formal Mechanisms for Capturing Regularizations http://www.lrec-conf.org/proceedings/lrec2002/pdf/56.pdf meyers-etal-2002-formal - Erhard W.Hinrichs - SandraKübler - Frank H.Müller + Erhard W.Hinrichs + SandraKübler + Frank H.Müller TylmanUle A Hybrid Architecture for Robust Parsing of <fixed-case>G</fixed-case>erman http://www.lrec-conf.org/proceedings/lrec2002/pdf/57.pdf @@ -451,14 +451,14 @@ RainerSiemund BarbaraHeuft - KhalidChoukri + KhalidChoukri OssamaEmam EmmanuelMaragoudakis HerbertTropf OrenGedge SherrieShammass - AsuncionMoreno - Albino NogueirasRodriguez + AsuncionMoreno + Albino NogueirasRodriguez ImedZitouni DorotaIskra <fixed-case>O</fixed-case>rien<fixed-case>T</fixed-case>el - Multilingual access to interactive communication services for the Mediterranean and the <fixed-case>M</fixed-case>iddle <fixed-case>E</fixed-case>ast @@ -468,7 +468,7 @@ KazutakaTakao KenjiImamura - HidekiKashioka + HidekiKashioka Comparing and Extracting Paraphrasing Words with 2-Way Bilingual Dictionaries http://www.lrec-conf.org/proceedings/lrec2002/pdf/59.pdf takao-etal-2002-comparing @@ -481,30 +481,30 @@ SabineBrants - SilviaHansen + SilviaHansen Developments in the <fixed-case>TIGER</fixed-case> Annotation Scheme and their Realization in the Corpus http://www.lrec-conf.org/proceedings/lrec2002/pdf/61.pdf brants-hansen-2002-developments - AntónioBranco + AntónioBranco JoséLeitão - JoãoSilva + JoãoSilva LuísGomes Nexing Corpus: a corpus of verbal protocols on syllogistic reasoning http://www.lrec-conf.org/proceedings/lrec2002/pdf/62.pdf branco-etal-2002-nexing - EvaHajičová - IvonaKučerová + EvaHajičová + IvonaKučerová Argument/Valency Structure in <fixed-case>P</fixed-case>rop<fixed-case>B</fixed-case>ank, <fixed-case>LCS</fixed-case> Database and <fixed-case>P</fixed-case>rague Dependency Treebank: A Comparative Pilot Study http://www.lrec-conf.org/proceedings/lrec2002/pdf/63.pdf hajicova-kucerova-2002-argument KarlWeilhammer - UweReichel + UweReichel FlorianSchiel Multi-Tier Annotations in the Verbmobil Corpus http://www.lrec-conf.org/proceedings/lrec2002/pdf/64.pdf @@ -543,24 +543,24 @@ cappelli-etal-2002-knowledge - AlbertoLavelli - FabioPianesi + AlbertoLavelli + FabioPianesi ErmannoMaci - IrinaProdanof - LucaDini - GiampaoloMazzini + IrinaProdanof + LucaDini + GiampaoloMazzini <fixed-case>S</fixed-case>i<fixed-case>SSA</fixed-case>: An Infrastructure for Developing <fixed-case>NLP</fixed-case> Applications http://www.lrec-conf.org/proceedings/lrec2002/pdf/69.pdf lavelli-etal-2002-sissa - KirilSimov + KirilSimov PetyaOsenova MilenaSlavcheva - SiaKolkovska + SiaKolkovska ElisavetaBalabanova DimitarDoikoff - KrassimiraIvanova + KrassimiraIvanova AlexanderSimov MilenKouylekov Building a Linguistically Interpreted Corpus of <fixed-case>B</fixed-case>ulgarian: the <fixed-case>B</fixed-case>ul<fixed-case>T</fixed-case>ree<fixed-case>B</fixed-case>ank @@ -570,7 +570,7 @@ Tonvan der Wouden HeleenHoekstra - MichaelMoortgat + MichaelMoortgat BramRenmans InekeSchuurman Syntactic Analysis in the Spoken <fixed-case>D</fixed-case>utch Corpus (<fixed-case>CGN</fixed-case>) @@ -578,8 +578,8 @@ van-der-wouden-etal-2002-syntactic - AndreiPopescu-Belis - SusanArmstrong + AndreiPopescu-Belis + SusanArmstrong GilbertRobert Electronic Dictionaries - from Publisher Data to a Distribution Server: the <fixed-case>D</fixed-case>ico<fixed-case>P</fixed-case>ro, <fixed-case>D</fixed-case>ico<fixed-case>E</fixed-case>ast and <fixed-case>RERO</fixed-case> Projects http://www.lrec-conf.org/proceedings/lrec2002/pdf/72.pdf @@ -601,44 +601,44 @@ geutner-etal-2002-design - NadiaMana - OrnellaCorazzari + NadiaMana + OrnellaCorazzari The Lexico-semantic Annotation of an <fixed-case>I</fixed-case>talian Treebank http://www.lrec-conf.org/proceedings/lrec2002/pdf/75.pdf mana-corazzari-2002-lexico - BernardoMagnini - MatteoNegri + BernardoMagnini + MatteoNegri RobertoPrevete - HristoTanev + HristoTanev Towards Automatic Evaluation of Question/Answering Systems http://www.lrec-conf.org/proceedings/lrec2002/pdf/76.pdf magnini-etal-2002-towards - MartinRajman - AnthonyHartley + MartinRajman + AnthonyHartley Automatic Ranking of <fixed-case>MT</fixed-case> Systems http://www.lrec-conf.org/proceedings/lrec2002/pdf/77.pdf rajman-hartley-2002-automatic LuisaBentivogli - EmanuelePianta + EmanuelePianta Opportunistic Semantic Tagging http://www.lrec-conf.org/proceedings/lrec2002/pdf/78.pdf bentivogli-pianta-2002-opportunistic - PetrPollák + PetrPollák VáclavHanžl Tool for <fixed-case>C</fixed-case>zech Pronunciation Generation Combining Fixed Rules with Pronunciation Lexicon and Lexicon Management Tool http://www.lrec-conf.org/proceedings/lrec2002/pdf/79.pdf pollak-hanzl-2002-tool - TonyRose + TonyRose MarkStevenson MilesWhitehead The <fixed-case>R</fixed-case>euters Corpus Volume 1 -from Yesterday’s News to Tomorrow’s Language Resources @@ -653,8 +653,8 @@ dutilh-kruyt-2002-implementation - ZdeněkŽabokrtský - PetrSgall + ZdeněkŽabokrtský + PetrSgall SašoDžeroski A Machine Learning Approach to Automatic Functor Assignment in the <fixed-case>P</fixed-case>rague Dependency Treebank http://www.lrec-conf.org/proceedings/lrec2002/pdf/82.pdf @@ -669,7 +669,7 @@ CaroleTiberius DunstanBrown - GrevilleCorbett + GrevilleCorbett A typological database of agreement http://www.lrec-conf.org/proceedings/lrec2002/pdf/84.pdf tiberius-etal-2002-typological @@ -681,8 +681,8 @@ lin-2002-web - MitsuoShimohata - EiichiroSumita + MitsuoShimohata + EiichiroSumita Automatic paraphrasing based on parallel corpus for normalization http://www.lrec-conf.org/proceedings/lrec2002/pdf/86.pdf shimohata-sumita-2002-automatic @@ -702,7 +702,7 @@ AndrejŽgank - ZdravkoKačič + ZdravkoKačič BogomirHorvat Preliminary Evaluation of <fixed-case>S</fixed-case>lovenian Mobile Database <fixed-case>P</fixed-case>oli<fixed-case>D</fixed-case>at http://www.lrec-conf.org/proceedings/lrec2002/pdf/89.pdf @@ -732,7 +732,7 @@ OlivierFerret - ChristianFluhr + ChristianFluhr FrançoiseRousseau-Hans Jean-LucSimoni Building domain specific lexical hierarchies from corpora @@ -740,14 +740,14 @@ ferret-etal-2002-building - WalterDaelemans - VéroniqueHoste + WalterDaelemans + VéroniqueHoste Evaluation of Machine Learning Methods for Natural Language Processing Tasks http://www.lrec-conf.org/proceedings/lrec2002/pdf/94.pdf daelemans-hoste-2002-evaluation - TristanVan Rullen + TristanVan Rullen PhilippeBlache An evaluation of different symbolic shallow parsing techniques http://www.lrec-conf.org/proceedings/lrec2002/pdf/95.pdf @@ -766,7 +766,7 @@ Jean-PierreMartens - DianaBinnenpoorte + DianaBinnenpoorte KrisDemuynck RubenVan Parys TomLaureys @@ -779,11 +779,11 @@ NellekeOostdijk WimGoedertier - Frankvan Eynde - LouisBoves + Frankvan Eynde + LouisBoves Jean-PierreMartens - MichaelMoortgat - HaraldBaayen + MichaelMoortgat + HaraldBaayen Experiences from the Spoken <fixed-case>D</fixed-case>utch Corpus Project http://www.lrec-conf.org/proceedings/lrec2002/pdf/98.pdf oostdijk-etal-2002-experiences @@ -795,9 +795,9 @@ mikros-2002-quantitative - PierretteBouillon + PierretteBouillon VincentClaveau - CécileFabre + CécileFabre PascaleSébillot Acquisition of Qualia Elements from Corpora - Evaluation of a Symbolic Learning Method http://www.lrec-conf.org/proceedings/lrec2002/pdf/100.pdf @@ -827,8 +827,8 @@ widdows-etal-2002-using - AriadnaFont Llitjós - Alan W.Black + AriadnaFont Llitjós + Alan W.Black Evaluation and collection of proper name pronunciations online http://www.lrec-conf.org/proceedings/lrec2002/pdf/104.pdf font-llitjos-black-2002-evaluation @@ -845,7 +845,7 @@ AlexAlsina ToniBadia - GemmaBoleda + GemmaBoleda StefanBott ÀngelGil MartíQuixal @@ -868,7 +868,7 @@ AoifeCahill - Josefvan Genabith + Josefvan Genabith <fixed-case>TTS</fixed-case> - A Treebank Tool Suite http://www.lrec-conf.org/proceedings/lrec2002/pdf/109.pdf cahill-van-genabith-2002-tts @@ -898,14 +898,14 @@ cassidy-2002-xquery - ConstantinOrasan + ConstantinOrasan RameshKrishnamurthy A corpus-based investigation of junk emails http://www.lrec-conf.org/proceedings/lrec2002/pdf/113.pdf orasan-krishnamurthy-2002-corpus - ConstantinOrasan + ConstantinOrasan Building annotated resources for automatic text summarisation http://www.lrec-conf.org/proceedings/lrec2002/pdf/114.pdf orasan-2002-building @@ -925,10 +925,10 @@ steiner-kallmeyer-2002-viqtorya - MassimoPoesio + MassimoPoesio TomonoriIshikawa - SabineSchulte im Walde - RenataVieira + SabineSchulte im Walde + RenataVieira Acquiring Lexical Knowledge for Anaphora Resolution http://www.lrec-conf.org/proceedings/lrec2002/pdf/117.pdf poesio-etal-2002-acquiring @@ -943,7 +943,7 @@ ChikashiNobata SatoshiSekine HitoshiIsahara - RalphGrishman + RalphGrishman Summarization System Integrated with Named Entity Tagging and <fixed-case>IE</fixed-case> pattern Discovery http://www.lrec-conf.org/proceedings/lrec2002/pdf/119.pdf nobata-etal-2002-summarization @@ -970,7 +970,7 @@ matsumoto-tanaka-2002-automatic - SatoshiShirai + SatoshiShirai KazuhideYamamoto FrancisBond HozumiTanaka @@ -980,7 +980,7 @@ Yong-JuLee - Bong-WanKim + Bong-WanKim YongnamUm Speech Information Technology & Industry Promotion Center in <fixed-case>K</fixed-case>orea: Activities and Directions http://www.lrec-conf.org/proceedings/lrec2002/pdf/124.pdf @@ -998,9 +998,9 @@ ManolisMaragoudakis - KatiaKermanidis - NikosFakotakis - GeorgeKokkinakis + KatiaKermanidis + NikosFakotakis + GeorgeKokkinakis Combining <fixed-case>B</fixed-case>ayesian and Support Vector Machines Learning to automatically complete Syntactical Information for <fixed-case>HPSG</fixed-case>-like Formalisms http://www.lrec-conf.org/proceedings/lrec2002/pdf/126.pdf maragoudakis-etal-2002-combining @@ -1017,34 +1017,34 @@ AristomenisThanopoulos - NikosFakotakis - GeorgeKokkinakis + NikosFakotakis + GeorgeKokkinakis Comparative Evaluation of Collocation Extraction Metrics http://www.lrec-conf.org/proceedings/lrec2002/pdf/128.pdf thanopoulos-etal-2002-comparative - ChristopheLaprun - Jonathan G.Fiscus - JohnGarofolo + ChristopheLaprun + Jonathan G.Fiscus + JohnGarofolo SylvainPajot A Pratical Introduction to <fixed-case>ATLAS</fixed-case> http://www.lrec-conf.org/proceedings/lrec2002/pdf/129.pdf laprun-etal-2002-pratical - JohnGarofolo - Jonathan G.Fiscus - AlvinMartin - DavidPallett - MarkPrzybocki + JohnGarofolo + Jonathan G.Fiscus + AlvinMartin + DavidPallett + MarkPrzybocki <fixed-case>NIST</fixed-case> Rich Transcription 2002 Evaluation: A Preview http://www.lrec-conf.org/proceedings/lrec2002/pdf/130.pdf garofolo-etal-2002-nist PalomaMartínez - AnaGarcía-Serrano + AnaGarcía-Serrano AlbertoRuiz-Cristina Integrating <fixed-case>S</fixed-case>panish Linguistic Resources in a Web Site Assistant http://www.lrec-conf.org/proceedings/lrec2002/pdf/131.pdf @@ -1059,15 +1059,15 @@ GregoryGrefenstette YanQu - David A.Evans + David A.Evans Expanding lexicons by inducing paradigms and validating attested forms http://www.lrec-conf.org/proceedings/lrec2002/pdf/133.pdf grefenstette-etal-2002-expanding TaroWatanabe - MitsuoShimohata - EiichiroSumita + MitsuoShimohata + EiichiroSumita Statistical Machine Translation on Paraphrased Corpora http://www.lrec-conf.org/proceedings/lrec2002/pdf/134.pdf watanabe-etal-2002-statistical @@ -1080,7 +1080,7 @@ bia-quero-2002-building - HidekiKashioka + HidekiKashioka Translation Unit Concerning Timing of Simultaneous Translation http://www.lrec-conf.org/proceedings/lrec2002/pdf/136.pdf kashioka-2002-translation @@ -1094,24 +1094,24 @@ narita-etal-2002-web - RicardoRibeiro - LuísOliveira + RicardoRibeiro + LuísOliveira IsabelTrancoso Morphosyntactic Disambiguation for <fixed-case>TTS</fixed-case> Systems http://www.lrec-conf.org/proceedings/lrec2002/pdf/138.pdf ribeiro-etal-2002-morphosyntactic - Charles J.Fillmore - Collin F.Baker + Charles J.Fillmore + Collin F.Baker HiroakiSato Seeing Arguments through Transparent Structures http://www.lrec-conf.org/proceedings/lrec2002/pdf/139.pdf fillmore-etal-2002-seeing - Charles J.Fillmore - Collin F.Baker + Charles J.Fillmore + Collin F.Baker HiroakiSato The <fixed-case>F</fixed-case>rame<fixed-case>N</fixed-case>et Database and Software Tools http://www.lrec-conf.org/proceedings/lrec2002/pdf/140.pdf @@ -1127,31 +1127,31 @@ ma-etal-2002-models - Doroteo TorreToledano - Luis A. HernándezGómez + Doroteo TorreToledano + Luis A. HernándezGómez <fixed-case>HMM</fixed-case>s for Automatic Phonetic Segmentation http://www.lrec-conf.org/proceedings/lrec2002/pdf/142.pdf toledano-gomez-2002-hmms - Helen WrightHastie + Helen WrightHastie RashmiPrasad - MarilynWalker + MarilynWalker Automatic Evaluation: Using a <fixed-case>DATE</fixed-case> Dialogue Act Tagger for User Satisfaction and Task Completion Prediction http://www.lrec-conf.org/proceedings/lrec2002/pdf/143.pdf hastie-etal-2002-automatic - NuriaBel + NuriaBel JavierCaminero - LuisHernández - MontserratMarimón + LuisHernández + MontserratMarimón José F.Morlesín Josep M.Otero - JoséRelaño - M. CarmenRodríguez + JoséRelaño + M. CarmenRodríguez Pedro M.Ruz - DanielTapias + DanielTapias Design and Evaluation of a <fixed-case>SLDS</fixed-case> for <fixed-case>E</fixed-case>-Mail Access through the Telephone http://www.lrec-conf.org/proceedings/lrec2002/pdf/144.pdf bel-etal-2002-design @@ -1161,30 +1161,30 @@ FabreLambeau AlineVillavicencio FrancisBond - TimothyBaldwin + TimothyBaldwin Ivan A.Sag - DanFlickinger + DanFlickinger Multiword expressions: linguistic precision and reusability http://www.lrec-conf.org/proceedings/lrec2002/pdf/145.pdf copestake-etal-2002-multiword KeitaTsuji - BeatriceDaille + BeatriceDaille KyoKageura Extracting <fixed-case>F</fixed-case>rench-<fixed-case>J</fixed-case>apanese Word Pairs from Bilingual Corpora based on Transliteration Rules http://www.lrec-conf.org/proceedings/lrec2002/pdf/146.pdf tsuji-etal-2002-extracting - ElaineUí Dhonnchadha + ElaineUí Dhonnchadha A Two-level Morphological Analyser and Generator for <fixed-case>I</fixed-case>rish using Finite-State Transducers http://www.lrec-conf.org/proceedings/lrec2002/pdf/147.pdf ui-dhonnchadha-2002-two SmarandaMuresan - JudithKlavans + JudithKlavans A Method for Automatically Building and Evaluating Dictionary Resources http://www.lrec-conf.org/proceedings/lrec2002/pdf/148.pdf muresan-klavans-2002-method @@ -1203,17 +1203,17 @@ shirai-2002-construction - Chung-hyeHan + Chung-hyeHan Na-RaeHan Eon-SukKo - MarthaPalmer + MarthaPalmer Development and Evaluation of a <fixed-case>K</fixed-case>orean Treebank and its Application to <fixed-case>NLP</fixed-case> http://www.lrec-conf.org/proceedings/lrec2002/pdf/151.pdf han-etal-2002-development AlexandraKinyon - Carlos A.Prolo + Carlos A.Prolo Identifying Verb Arguments and their Syntactic Function in the <fixed-case>P</fixed-case>enn <fixed-case>T</fixed-case>reebank http://www.lrec-conf.org/proceedings/lrec2002/pdf/152.pdf kinyon-prolo-2002-identifying @@ -1226,10 +1226,10 @@ mokhtari-campbell-2002-automatic - Jong-HoonOh - SaimShin + Jong-HoonOh + SaimShin Yong-SeokChoi - Key-SunChoi + Key-SunChoi Word Sense Disambiguation with Information Retrieval Technique http://www.lrec-conf.org/proceedings/lrec2002/pdf/154.pdf oh-etal-2002-word @@ -1249,56 +1249,56 @@ EricaCostantini SusanneBurger - FabioPianesi + FabioPianesi <fixed-case>NESPOLE</fixed-case>!’s Multilingual and Multimodal Corpus http://www.lrec-conf.org/proceedings/lrec2002/pdf/156.pdf costantini-etal-2002-nespole HoracioSaggion - HamishCunningham + HamishCunningham DianaMaynard - KalinaBontcheva + KalinaBontcheva OanaHamza - ChristianUrsu - YorickWilks + ChristianUrsu + YorickWilks Extracting Information for Automatic Indexing of Multimedia Material http://www.lrec-conf.org/proceedings/lrec2002/pdf/157.pdf saggion-etal-2002-extracting HoracioSaggion - DragomirRadev + DragomirRadev SimoneTeufel WaiLam - Stephanie M.Strassel + Stephanie M.Strassel Developing Infrastructure for the Evaluation of Single and Multi-document Summarization Systems in a Cross-lingual Environment http://www.lrec-conf.org/proceedings/lrec2002/pdf/158.pdf saggion-etal-2002-developing - HarrisPapageorgiou + HarrisPapageorgiou ProkopisProkopidis - VoulaGiouli + VoulaGiouli IasonDemiros - AlexisKonstantinidis - SteliosPiperidis + AlexisKonstantinidis + SteliosPiperidis Multi-level <fixed-case>XML</fixed-case>-based Corpus Annotation http://www.lrec-conf.org/proceedings/lrec2002/pdf/159.pdf papageorgiou-etal-2002-multi - HaraldHöge + HaraldHöge Project Proposal <fixed-case>TC</fixed-case>-<fixed-case>STAR</fixed-case> - Make Speech to Speech Translation Real http://www.lrec-conf.org/proceedings/lrec2002/pdf/160.pdf hoge-2002-project - IgorBoguslavsky + IgorBoguslavsky IvanChardin SvetlanaGrigorieva NikolaiGrigoriev - LeonidIomdin + LeonidIomdin LeonidKreidlin NadezhdaFrid Development of a Dependency Treebank for <fixed-case>R</fixed-case>ussian and its Possible Applications in <fixed-case>NLP</fixed-case> @@ -1337,7 +1337,7 @@ munoz-etal-2002-bilingual - Heiki-JaanKaalep + Heiki-JaanKaalep KadriMuischnek Using the Text Corpus to Create a Comprehensive List of Phrasal Verbs http://www.lrec-conf.org/proceedings/lrec2002/pdf/165.pdf @@ -1346,14 +1346,14 @@ DianaRaileanu PaulBuitelaar - SpelaVintar + SpelaVintar JörgBay Evaluation Corpora for Sense Disambiguation in the Medical Domain http://www.lrec-conf.org/proceedings/lrec2002/pdf/166.pdf raileanu-etal-2002-evaluation - ŠpelaVintar + ŠpelaVintar PaulBuitelaar BärbelRipplinger BogdanSacaleanu @@ -1364,8 +1364,8 @@ vintar-etal-2002-efficient - MarkétaStraňáková-Lopatková - ZdenĕkŽabokrtský + MarkétaStraňáková-Lopatková + ZdenĕkŽabokrtský Valency Dictionary of <fixed-case>C</fixed-case>zech Verbs: Complex Tectogrammatical Annotation http://www.lrec-conf.org/proceedings/lrec2002/pdf/168.pdf stranakova-lopatkova-zabokrtsky-2002-valency @@ -1390,41 +1390,41 @@ trippel-gibbon-2002-annotation - Katia LidaKermanidis - NikosFakotakis - GeorgeKokkinakis + Katia LidaKermanidis + NikosFakotakis + GeorgeKokkinakis <fixed-case>DELOS</fixed-case>: An Automatically Tagged Economic Corpus for <fixed-case>M</fixed-case>odern <fixed-case>G</fixed-case>reek http://www.lrec-conf.org/proceedings/lrec2002/pdf/172.pdf kermanidis-etal-2002-delos - Henkvan den Heuvel - KhalidChoukri - HaraldHöge + Henkvan den Heuvel + KhalidChoukri + HaraldHöge Give me a bug. a framework for a bug report service http://www.lrec-conf.org/proceedings/lrec2002/pdf/173.pdf van-den-heuvel-etal-2002-give VladimirHozjan - ZdravkoKacic - AsunciónMoreno - AntonioBonafonte - AlbinoNogueiras + ZdravkoKacic + AsunciónMoreno + AntonioBonafonte + AlbinoNogueiras Interface Databases: Design and Collection of a Multilingual Emotional Speech Database http://www.lrec-conf.org/proceedings/lrec2002/pdf/174.pdf hozjan-etal-2002-interface VladimirHozjan - ZdravkoKacic + ZdravkoKacic Objective analysis of emotional speech for <fixed-case>E</fixed-case>nglish and <fixed-case>S</fixed-case>lovenian Interface emotional speech databases http://www.lrec-conf.org/proceedings/lrec2002/pdf/175.pdf hozjan-kacic-2002-objective KonstantinBiatov - JoachimKöhler + JoachimKöhler Methods and Tools for Speech Data Acquisition exploiting a Database of <fixed-case>G</fixed-case>erman Parliamentary Speeches and Transcripts from the <fixed-case>I</fixed-case>nternet http://www.lrec-conf.org/proceedings/lrec2002/pdf/176.pdf biatov-kohler-2002-methods @@ -1433,7 +1433,7 @@ DorotaIskra BeateGrosskopf KrzysztofMarasek - Henkvan den Heuvel + Henkvan den Heuvel FrankDiehl AndreasKiessling <fixed-case>SPEECON</fixed-case> – Speech Databases for Consumer Devices: Database Specification and Validation @@ -1445,7 +1445,7 @@ MichaelHess NeemeKahusk KaarelKaljurand - MareKoit + MareKoit FabioRinaldi KadriVider Technical Terminology as a Critical Resource @@ -1462,23 +1462,23 @@ RickardDomeij OlaKnutsson - Kerstin SeverinsonEklundh + Kerstin SeverinsonEklundh Different Ways of Evaluating a <fixed-case>S</fixed-case>wedish Grammar Checker http://www.lrec-conf.org/proceedings/lrec2002/pdf/180.pdf domeij-etal-2002-different - AntonioMoreno Ortiz + AntonioMoreno Ortiz VictorRaskin - SergeiNirenburg + SergeiNirenburg New Developments in Ontological Semantics http://www.lrec-conf.org/proceedings/lrec2002/pdf/181.pdf moreno-ortiz-etal-2002-new - AmaliaTodirascu + AmaliaTodirascu EricKow - LaurentRomary + LaurentRomary Towards Reusable <fixed-case>NLP</fixed-case> Components http://www.lrec-conf.org/proceedings/lrec2002/pdf/182.pdf todirascu-etal-2002-towards @@ -1486,7 +1486,7 @@ JuditaPreiss AnnaKorhonen - TedBriscoe + TedBriscoe Subcategorization Acquisition as an Evaluation Method for <fixed-case>WSD</fixed-case> http://www.lrec-conf.org/proceedings/lrec2002/pdf/183.pdf preiss-etal-2002-subcategorization @@ -1508,7 +1508,7 @@ MatejRojc - ZdravkoKačič + ZdravkoKačič DarinkaVerdonik Design and Implementation of the <fixed-case>S</fixed-case>lovenian Phonetic and Morphology Lexicons for the Use in Spoken Language Applications http://www.lrec-conf.org/proceedings/lrec2002/pdf/186.pdf @@ -1522,8 +1522,8 @@ hathout-tanguy-2002-webaffix - Natalia V.Loukachevitch - Boris V.Dobrov + Natalia V.Loukachevitch + Boris V.Dobrov Evaluation of Thesaurus on Sociopolitical Life as Information-Retrieval Tool http://www.lrec-conf.org/proceedings/lrec2002/pdf/188.pdf loukachevitch-dobrov-2002-evaluation @@ -1547,7 +1547,7 @@ RyuichiYoneda AkikoYamashita YasuharuDen - YujiMatsumoto + YujiMatsumoto Use of <fixed-case>XML</fixed-case> and Relational Databases for Consistent Development and Maintenance of Lexicons and Annotated Corpora http://www.lrec-conf.org/proceedings/lrec2002/pdf/191.pdf asahara-etal-2002-use @@ -1581,7 +1581,7 @@ rio-2002-compiling - ThierryHamon + ThierryHamon Olivier How to evaluate necessary cooperative systems of terminology building? http://www.lrec-conf.org/proceedings/lrec2002/pdf/196.pdf @@ -1594,15 +1594,15 @@ ElisabettaGuazzini StefanoMolino MarisaUlivieri - NicolettaCalzolari - AntonioZampolli + NicolettaCalzolari + AntonioZampolli <fixed-case>CLIPS</fixed-case>, a Multi-level <fixed-case>I</fixed-case>talian Computational Lexicon: a Glimpse to Data http://www.lrec-conf.org/proceedings/lrec2002/pdf/197.pdf ruimy-etal-2002-clips SusanneSalmon-Alt - RenataVieira + RenataVieira Nominal Expressions in Multilingual Corpora: Definites and Demonstratives http://www.lrec-conf.org/proceedings/lrec2002/pdf/198.pdf salmon-alt-vieira-2002-nominal @@ -1610,7 +1610,7 @@ JerkerJärborg DimitriosKokkinakis - Maria ToporowskaGronostaj + Maria ToporowskaGronostaj Lexical and Textual Resources for Sense Recognition and Description http://www.lrec-conf.org/proceedings/lrec2002/pdf/199.pdf jarborg-etal-2002-lexical @@ -1649,56 +1649,56 @@ eguchi-etal-2002-sensitivity - BrianMitchell - RobertGaizauskas + BrianMitchell + RobertGaizauskas A Comparison of Machine Learning Algorithms for Prepositional Phrase Attachment http://www.lrec-conf.org/proceedings/lrec2002/pdf/204.pdf mitchell-gaizauskas-2002-comparison DanCristea - Oana-DianaPostolache + Oana-DianaPostolache Gabriela-EugeniaDima - CătălinaBarbu + CătălinaBarbu <fixed-case>AR</fixed-case>-Engine - a framework for unrestricted co-reference resolution http://www.lrec-conf.org/proceedings/lrec2002/pdf/205.pdf cristea-etal-2002-ar - CătălinaBarbu - RichardEvans - RuslanMitkov + CătălinaBarbu + RichardEvans + RuslanMitkov A corpus based investigation of morphological disagreement in anaphoric relations http://www.lrec-conf.org/proceedings/lrec2002/pdf/206.pdf barbu-etal-2002-corpus - CătălinaBarbu + CătălinaBarbu Error analysis in anaphora resolution http://www.lrec-conf.org/proceedings/lrec2002/pdf/207.pdf barbu-2002-error Jean-YvesAntoine - CarolineBousquet-Vernhettes - JérômeGoulian - Mohamed ZakariaKurdi - SophieRosset + CarolineBousquet-Vernhettes + JérômeGoulian + Mohamed ZakariaKurdi + SophieRosset NadineVigouroux - JeanneVillaneau + JeanneVillaneau Predictive and objective evaluation of speech understanding: the “challenge” evaluation campaign of the I3 speech workgroup of the <fixed-case>F</fixed-case>rench <fixed-case>CNRS</fixed-case> http://www.lrec-conf.org/proceedings/lrec2002/pdf/208.pdf antoine-etal-2002-predictive - MichaelMoortgat + MichaelMoortgat RichardMoot Using the Spoken <fixed-case>D</fixed-case>utch Corpus for type-logical grammar induction http://www.lrec-conf.org/proceedings/lrec2002/pdf/209.pdf moortgat-moot-2002-using - Bolette S.Pedersen + Bolette S.Pedersen PatriziaPaggio Semantic Lexical Resources Applied to Content-based Querying - the <fixed-case>O</fixed-case>nto<fixed-case>Q</fixed-case>uery Project http://www.lrec-conf.org/proceedings/lrec2002/pdf/210.pdf @@ -1709,13 +1709,13 @@ VangelisKarkaletsis GeorgiosPaliouras IonAndroutsopoulos - Constantine D.Spyropoulos + Constantine D.Spyropoulos <fixed-case>E</fixed-case>llogon: A New Text Engineering Platform http://www.lrec-conf.org/proceedings/lrec2002/pdf/211.pdf petasis-etal-2002-ellogon - Antonio S.Valderrábanos + Antonio S.Valderrábanos AlexanderBelskis Luis IraolaMoreno Multilingual Terminology Extraction and Validation @@ -1723,15 +1723,15 @@ valderrabanos-etal-2002-multilingual - LailaDybkjær - Niels OleBernsen + LailaDybkjær + Niels OleBernsen Natural Interactivity Resources – Data, Annotation Schemes and Tools http://www.lrec-conf.org/proceedings/lrec2002/pdf/213.pdf dybkjaer-bernsen-2002-natural - Niels OleBernsen - LailaDybkjær + Niels OleBernsen + LailaDybkjær MykolaKolodnytsky <fixed-case>THE</fixed-case> <fixed-case>NITE</fixed-case> <fixed-case>WORKBENCH</fixed-case>. A Tool for Annotation of Natural Interactivity and Multimodal Data http://www.lrec-conf.org/proceedings/lrec2002/pdf/214.pdf @@ -1739,9 +1739,9 @@ ValentinTablan - CristianUrsu - KalinaBontcheva - HamishCunningham + CristianUrsu + KalinaBontcheva + HamishCunningham DianaMaynard OanaHamza TonyMcEnery @@ -1756,30 +1756,30 @@ VangelisKarkaletsis IoannisKoutsias GeorgePetasis - Constantine D.Spyropoulos + Constantine D.Spyropoulos <fixed-case>P</fixed-case>at<fixed-case>E</fixed-case>dit: An Information Extraction Pattern Editor for Fast System Customization http://www.lrec-conf.org/proceedings/lrec2002/pdf/216.pdf farmakiotou-etal-2002-patedit - TamásVáradi + TamásVáradi The <fixed-case>H</fixed-case>ungarian National Corpus http://www.lrec-conf.org/proceedings/lrec2002/pdf/217.pdf varadi-2002-hungarian - PaulClough - RobertGaizauskas + PaulClough + RobertGaizauskas S. L.Piao Building and annotating a corpus for the study of journalistic text reuse http://www.lrec-conf.org/proceedings/lrec2002/pdf/218.pdf clough-etal-2002-building - HennieBrugman + HennieBrugman HarrietSpenke MarkusKramer - AlexanderKlassmann + AlexanderKlassmann Multimedia Annotation with Multilingual Input Methods and Search Support http://www.lrec-conf.org/proceedings/lrec2002/pdf/219.pdf brugman-etal-2002-multimedia @@ -1818,7 +1818,7 @@ wittenburg-etal-2002-multimodal - DaanBroeder + DaanBroeder FreddyOffenga DonWillems Metadata Tools Supporting Controlled Vocabulary Services @@ -1826,10 +1826,10 @@ broeder-etal-2002-metadata - DaanBroeder - PeterWittenburg + DaanBroeder + PeterWittenburg ThierryDeclerck - LaurentRomary + LaurentRomary <fixed-case>LREP</fixed-case>: A Language Repository Exchange Protocol http://www.lrec-conf.org/proceedings/lrec2002/pdf/225.pdf broeder-etal-2002-lrep @@ -1848,9 +1848,9 @@ schmitz-2002-subject - SteveWhittaker - MarilynWalker - JohannaMoore + SteveWhittaker + MarilynWalker + JohannaMoore Fish or Fowl:A <fixed-case>W</fixed-case>izard of <fixed-case>O</fixed-case>z Evaluation of Dialogue Strategies in the Restaurant Domain http://www.lrec-conf.org/proceedings/lrec2002/pdf/228.pdf whittaker-etal-2002-fish @@ -1858,7 +1858,7 @@ AdrianaRoventini MarisaUlivieri - NicolettaCalzolari + NicolettaCalzolari Integrating Two Semantic Lexicons, <fixed-case>SIMPLE</fixed-case> and <fixed-case>I</fixed-case>tal<fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et: What Can We Gain? http://www.lrec-conf.org/proceedings/lrec2002/pdf/229.pdf roventini-etal-2002-integrating @@ -1871,14 +1871,14 @@ marinelli-roventini-2002-proper - LeonardoLesmo + LeonardoLesmo VincenzoLombardo Transformed Subcategorization Frames in Chunk Parsing http://www.lrec-conf.org/proceedings/lrec2002/pdf/231.pdf lesmo-lombardo-2002-transformed - GabrielaCavaglià + GabrielaCavaglià Measuring corpus homogeneity using a range of measures for inter-document distance http://www.lrec-conf.org/proceedings/lrec2002/pdf/232.pdf cavaglia-2002-measuring @@ -1891,7 +1891,7 @@ DimitraFarmakiotou GeorgiosSamaritakis GeorgiosPetasis - Maria TeresaPazienza + Maria TeresaPazienza MicheleVindigni FrantzVichot FrancisWolinski @@ -1900,7 +1900,7 @@ grover-etal-2002-multilingual - JanienkeSturm + JanienkeSturm IlseBakx BertCranen JacquesTerken @@ -1913,18 +1913,18 @@ KaterinaPastra DianaMaynard OanaHamza - HamishCunningham - YorickWilks + HamishCunningham + YorickWilks How feasible is the reuse of grammars for Named Entity Recognition? http://www.lrec-conf.org/proceedings/lrec2002/pdf/235.pdf pastra-etal-2002-feasible ClaudiaSoria - Niels OleBernsen + Niels OleBernsen NielsCadée JeanCarletta - LailaDybkjær + LailaDybkjær StefanEvert UlrichHeid AmyIsard @@ -1950,23 +1950,23 @@ JasonBaldridge - JohnDowding - SusanaEarly + JohnDowding + SusanaEarly <fixed-case>L</fixed-case>eo: an Architecture for Sharing Resources for Unification-Based Grammars http://www.lrec-conf.org/proceedings/lrec2002/pdf/238.pdf baldridge-etal-2002-leo - IrenaSpasić - GoranNenadić + IrenaSpasić + GoranNenadić SophiaAnaniadou Tuning Context Features with Genetic Algorithms http://www.lrec-conf.org/proceedings/lrec2002/pdf/239.pdf spasic-etal-2002-tuning - GoranNenadić - IrenaSpasić + GoranNenadić + IrenaSpasić SophiaAnaniadou Automatic Acronym Acquisition and Term Variation Management within Domain-Specific Texts http://www.lrec-conf.org/proceedings/lrec2002/pdf/240.pdf @@ -1979,9 +1979,9 @@ nimb-2002-adverbs - Anna SågvallHein + Anna SågvallHein EvaForsbom - JörgTiedemann + JörgTiedemann PerWeijnitz IngridAlmqvist Leif-JöranOlsson @@ -1992,7 +1992,7 @@ XavierCarreras - LluísPadró + LluísPadró A Flexible Distributed Architecture for Natural Language Analyzers http://www.lrec-conf.org/proceedings/lrec2002/pdf/243.pdf carreras-padro-2002-flexible @@ -2008,8 +2008,8 @@ picchi-etal-2002-italian - ChristopherCieri - MarkLiberman + ChristopherCieri + MarkLiberman Language Resource Creation and Distribution at the <fixed-case>L</fixed-case>inguistic <fixed-case>D</fixed-case>ata <fixed-case>C</fixed-case>onsortium: A Progress Report http://www.lrec-conf.org/proceedings/lrec2002/pdf/245.pdf cieri-liberman-2002-language @@ -2022,7 +2022,7 @@ sassen-gibbon-2002-enhanced - JörgTiedemann + JörgTiedemann <fixed-case>M</fixed-case>ats<fixed-case>L</fixed-case>ex - a Multilingual Lexical Database for Machine Translation http://www.lrec-conf.org/proceedings/lrec2002/pdf/247.pdf tiedemann-2002-matslex @@ -2040,16 +2040,16 @@ pfitzinger-2002-reducing - TedBriscoe - JohnCarroll + TedBriscoe + JohnCarroll Robust Accurate Statistical Annotation of General Text http://www.lrec-conf.org/proceedings/lrec2002/pdf/250.pdf briscoe-carroll-2002-robust - CatiaCucchiarini - ElisabethD’Halleweyn - LisanneTeunissen + CatiaCucchiarini + ElisabethD’Halleweyn + LisanneTeunissen A Human Language Technologies Platform for the <fixed-case>D</fixed-case>utch language: awareness, management maintenance and distribution http://www.lrec-conf.org/proceedings/lrec2002/pdf/251.pdf cucchiarini-etal-2002-human @@ -2066,8 +2066,8 @@ binnenpoorte-etal-2002-field - Ana M.García-Serrano - LuisRodrigo-Aguado + Ana M.García-Serrano + LuisRodrigo-Aguado JavierCalle Natural Language Dialogue in a Virtual Assistant Interface http://www.lrec-conf.org/proceedings/lrec2002/pdf/253.pdf @@ -2083,7 +2083,7 @@ DieterMaas - RitaNuebel + RitaNuebel CatherinePease PaulSchmidt Bilingual Indexing for Information Retrieval with <fixed-case>AUTINDEX</fixed-case> @@ -2091,45 +2091,45 @@ maas-etal-2002-bilingual - MichaelRosner + MichaelRosner The Future of Maltilex http://www.lrec-conf.org/proceedings/lrec2002/pdf/256.pdf rosner-2002-future - NicolettaCalzolari - RalphGrishman - MarthaPalmer + NicolettaCalzolari + RalphGrishman + MarthaPalmer Standards & best practice for multilingual computational lexicons: <fixed-case>ISLE</fixed-case> <fixed-case>MILE</fixed-case> and more” http://www.lrec-conf.org/proceedings/lrec2002/pdf/257.pdf calzolari-etal-2002-standards SueAtkins - NuriaBel + NuriaBel FrancescaBertagna - PierretteBouillon - NicolettaCalzolari + PierretteBouillon + NicolettaCalzolari ChristianeFellbaum - RalphGrishman + RalphGrishman AlessandroLenci - CatherineMacLeod - MarthaPalmer + CatherineMacLeod + MarthaPalmer GregorThurmair MartaVillegas - AntonioZampolli + AntonioZampolli From Resources to Applications. Designing the Multilingual <fixed-case>ISLE</fixed-case> Lexical Entry http://www.lrec-conf.org/proceedings/lrec2002/pdf/258.pdf atkins-etal-2002-resources - NicolettaCalzolari - Charles J.Fillmore - RalphGrishman - NancyIde + NicolettaCalzolari + Charles J.Fillmore + RalphGrishman + NancyIde AlessandroLenci - CatherineMacLeod - AntonioZampolli + CatherineMacLeod + AntonioZampolli Towards Best Practice for Multiword Expressions in Computational Lexicons http://www.lrec-conf.org/proceedings/lrec2002/pdf/259.pdf calzolari-etal-2002-towards @@ -2137,12 +2137,12 @@ AlessandroLenci RobertoBartolini - NicolettaCalzolari + NicolettaCalzolari AnaAgua StephanBusemann EmmanuelCartier KarineChevreau - JoséCoch + JoséCoch Multilingual Summarization by Integrating Linguistic Resources in the <fixed-case>MLIS</fixed-case>-<fixed-case>MUSI</fixed-case> Project http://www.lrec-conf.org/proceedings/lrec2002/pdf/260.pdf lenci-etal-2002-multilingual @@ -2154,9 +2154,9 @@ braasch-2002-current - Robert E.Frederking - Alan W.Black - Ralf D.Brown + Robert E.Frederking + Alan W.Black + Ralf D.Brown JohnMoody EricSteinbrecher Field Testing the Tongues Speech-to-Speech Machine Translation System @@ -2165,7 +2165,7 @@ JuliaHockenmaier - MarkSteedman + MarkSteedman Acquiring Compact Lexicalized Grammars from a Cleaner Treebank http://www.lrec-conf.org/proceedings/lrec2002/pdf/263.pdf hockenmaier-steedman-2002-acquiring @@ -2180,7 +2180,7 @@ HélèneFrançois - OlivierBoëffard + OlivierBoëffard The Greedy Algorithm and its Application to the Construction of a Continuous Speech Database http://www.lrec-conf.org/proceedings/lrec2002/pdf/265.pdf francois-boeffard-2002-greedy @@ -2217,14 +2217,14 @@ cassan-etal-2002-step - AsunciónMoreno + AsunciónMoreno OrenGedge - Henkvan den Heuvel - HaraldHöge + Henkvan den Heuvel + HaraldHöge SabineHorbach PatriciaMartin ElisabethPinto - AntonioRincón + AntonioRincón FrancoSenia RafidSukkar <fixed-case>S</fixed-case>peech<fixed-case>D</fixed-case>at across all <fixed-case>A</fixed-case>merica: <fixed-case>SALA</fixed-case> <fixed-case>II</fixed-case> @@ -2248,7 +2248,7 @@ NordineFourour EmmanuelMorin - BéatriceDaille + BéatriceDaille Incremental Recognition and Referential Categorization of <fixed-case>F</fixed-case>rench Proper Names http://www.lrec-conf.org/proceedings/lrec2002/pdf/272.pdf fourour-etal-2002-incremental @@ -2263,8 +2263,8 @@ matsubara-etal-2002-bilingual - MarcelaCharfuelán - Luis HernándezGómez + MarcelaCharfuelán + Luis HernándezGómez Cristina EstebanLópez HolmerHemsen A <fixed-case>XML</fixed-case>-based tool for evaluation of <fixed-case>SLDS</fixed-case> @@ -2280,7 +2280,7 @@ lopez-de-ipina-etal-2002-automatic - Richard F. E.Sutcliffe + Richard F. E.Sutcliffe KieranWhite Searching via Keywords or Concept Hierarchies - Which is Better? http://www.lrec-conf.org/proceedings/lrec2002/pdf/276.pdf @@ -2288,16 +2288,16 @@ Juliana GalvaniGreghi - Ronaldo TeixeiraMartins - Mariadas Graças Volpe Nunes + Ronaldo TeixeiraMartins + Mariadas Graças Volpe Nunes <fixed-case>DIADORIM</fixed-case> - A Lexical Database for <fixed-case>B</fixed-case>razilian <fixed-case>P</fixed-case>ortuguese http://www.lrec-conf.org/proceedings/lrec2002/pdf/277.pdf greghi-etal-2002-diadorim MónicaCaballero - José B.Mariño - AsunciónMoreno + José B.Mariño + AsunciónMoreno Multidialectal <fixed-case>S</fixed-case>panish Modeling for <fixed-case>ASR</fixed-case> http://www.lrec-conf.org/proceedings/lrec2002/pdf/278.pdf caballero-etal-2002-multidialectal @@ -2327,16 +2327,16 @@ uibo-2002-experimental - MarianneDabbadie - Widad Mustafa ElHadi - IsmaïlTimimi + MarianneDabbadie + Widad Mustafa ElHadi + IsmaïlTimimi Terminological Enrichment for non-Interactive <fixed-case>MT</fixed-case> Evaluation http://www.lrec-conf.org/proceedings/lrec2002/pdf/282.pdf dabbadie-etal-2002-terminological PaulKingsbury - MarthaPalmer + MarthaPalmer From <fixed-case>T</fixed-case>ree<fixed-case>B</fixed-case>ank to <fixed-case>P</fixed-case>rop<fixed-case>B</fixed-case>ank http://www.lrec-conf.org/proceedings/lrec2002/pdf/283.pdf kingsbury-palmer-2002-treebank @@ -2345,9 +2345,9 @@ AlmudenaBallester Ángel MartínMunicio FernandoPardos - Jordi PortaZamorano + Jordi PortaZamorano Rafael J. RuizUreña - Fernando SánchezLeón + Fernando SánchezLeón Combining statistics on n-grams for automatic term recognition http://www.lrec-conf.org/proceedings/lrec2002/pdf/284.pdf ballester-etal-2002-combining @@ -2375,23 +2375,23 @@ NobuoKawaguchi ShigekiMatsubara - KazuyaTakeda + KazuyaTakeda FumitadaItakura Multi-Dimensional Data Acquisition for Integrated Acoustic Information Research http://www.lrec-conf.org/proceedings/lrec2002/pdf/287.pdf kawaguchi-etal-2002-multi - LaurenceDevillers - SophieRosset - HélèneBonneau-Maynard - LoriLamel + LaurenceDevillers + SophieRosset + HélèneBonneau-Maynard + LoriLamel Annotations for Dynamic Diagnosis of the Dialog State http://www.lrec-conf.org/proceedings/lrec2002/pdf/288.pdf devillers-etal-2002-annotations - Jean-ClaudeMartin + Jean-ClaudeMartin MichaelKipp Annotating and Measuring Multimodal Behaviour – Tycoon Metrics in the Anvil Tool http://www.lrec-conf.org/proceedings/lrec2002/pdf/289.pdf @@ -2400,12 +2400,12 @@ EmanuelaCresti MassimoMoneglia - Fernanda Bacelardo Nascimento - Antonio MorenoSandoval - JeanVeronis + Fernanda Bacelardo Nascimento + Antonio MorenoSandoval + JeanVeronis PhilippeMartin - KalidChoukri - ValerieMapelli + KalidChoukri + ValerieMapelli DanieleFalavigna AntonioCid ClaudeBlum @@ -2414,8 +2414,8 @@ cresti-etal-2002-c - ChristopherCieri - MarkLiberman + ChristopherCieri + MarkLiberman <fixed-case>TIDES</fixed-case> Language Resources: A Resource Map for Translingual Information Access http://www.lrec-conf.org/proceedings/lrec2002/pdf/291.pdf cieri-liberman-2002-tides @@ -2424,7 +2424,7 @@ StefanEickeler MarthaLarson WolffRüter - JoachimKöhler + JoachimKöhler Creation of an Annotated <fixed-case>G</fixed-case>erman Broadcast Speech Database for Spoken Document Retrieval http://www.lrec-conf.org/proceedings/lrec2002/pdf/292.pdf eickeler-etal-2002-creation @@ -2452,7 +2452,7 @@ duclaye-etal-2002-using - ChristophMüller + ChristophMüller MichaelStrube An <fixed-case>API</fixed-case> for Discourse-level Access to <fixed-case>XML</fixed-case>-encoded Corpora http://www.lrec-conf.org/proceedings/lrec2002/pdf/296.pdf @@ -2460,13 +2460,13 @@ HidetsuguNanba - ManabuOkumura + ManabuOkumura Some Examinations of Intrinsic Methods for Summary Evaluation Based on the Text Summarization Challenge (<fixed-case>TSC</fixed-case>) http://www.lrec-conf.org/proceedings/lrec2002/pdf/297.pdf nanba-okumura-2002-examinations - Mohamed-ZakariaKurdi + Mohamed-ZakariaKurdi MohamedAhafhaf Toward an objective and generic Method for Spoken Language Understanding Systems Evaluation: an extension of the <fixed-case>DCR</fixed-case> method http://www.lrec-conf.org/proceedings/lrec2002/pdf/298.pdf @@ -2481,14 +2481,14 @@ heyer-etal-2002-information - ChristopherCieri - StephanieStrassel + ChristopherCieri + StephanieStrassel The <fixed-case>DASL</fixed-case> Project: a Case Study in Data Re-Annotation and Re-Use http://www.lrec-conf.org/proceedings/lrec2002/pdf/300.pdf cieri-strassel-2002-dasl - Dragomir R.Radev + Dragomir R.Radev HongQi HarrisWu WeiguoFan @@ -2499,13 +2499,13 @@ DaisukeKawahara SadaoKurohashi - KôitiHasida + KôitiHasida Construction of a <fixed-case>J</fixed-case>apanese Relevance-tagged Corpus http://www.lrec-conf.org/proceedings/lrec2002/pdf/302.pdf kawahara-etal-2002-construction - NancyIde + NancyIde RandiReppen KeithSuderman The <fixed-case>A</fixed-case>merican National Corpus: More Than the Web Can Provide @@ -2520,9 +2520,9 @@ ToshiyukiTakezawa - EiichiroSumita + EiichiroSumita FumiakiSugaya - HirofumiYamamoto + HirofumiYamamoto SeiichiYamamoto Toward a Broad-coverage Bilingual Corpus for Speech Translation of Travel Conversations in the Real World http://www.lrec-conf.org/proceedings/lrec2002/pdf/305.pdf @@ -2530,7 +2530,7 @@ MichelleVanni - KeithMiller + KeithMiller Scaling the <fixed-case>ISLE</fixed-case> Framework: Use of Existing Corpus Resources for Validation of <fixed-case>MT</fixed-case> Evaluation Metrics across Languages http://www.lrec-conf.org/proceedings/lrec2002/pdf/306.pdf vanni-miller-2002-scaling @@ -2543,7 +2543,7 @@ BarbaraDi Eugenio - MichaelGlass + MichaelGlass Michael J.Scott The binomial cumulative distribution function, or, is my system better than yours? http://www.lrec-conf.org/proceedings/lrec2002/pdf/308.pdf @@ -2559,7 +2559,7 @@ suyaga-etal-2002-proposal - Rada F.Mihalcea + Rada F.Mihalcea Bootstrapping Large Sense Tagged Corpora http://www.lrec-conf.org/proceedings/lrec2002/pdf/310.pdf mihalcea-2002-bootstrapping @@ -2573,7 +2573,7 @@ ogino-etal-2002-valence - Jean-ClaudeMartin + Jean-ClaudeMartin Jean-HuguesRéty NellyBensimon Multimodal and Adaptative Pedagogical Resources @@ -2581,7 +2581,7 @@ martin-etal-2002-multimodal - TimothyBaldwin + TimothyBaldwin SlavenBilac RyoOkumura TakenobuTokunaga @@ -2592,13 +2592,13 @@ RomaricBesançon - MartinRajman + MartinRajman Evaluation of a Vector Space Similarity Measure in a Multilingual Framework http://www.lrec-conf.org/proceedings/lrec2002/pdf/314.pdf besancon-rajman-2002-evaluation - Serge A.Yablonsky + Serge A.Yablonsky Corpora as Object-Oriented System. From <fixed-case>UML</fixed-case>-notation to Implementation http://www.lrec-conf.org/proceedings/lrec2002/pdf/315.pdf yablonsky-2002-corpora @@ -2606,7 +2606,7 @@ RobertoBartolini AlessandroLenci - SimonettaMontemagni + SimonettaMontemagni VitoPirrelli The Lexicon-Grammar Balance in Robust Parsing of <fixed-case>I</fixed-case>talian http://www.lrec-conf.org/proceedings/lrec2002/pdf/316.pdf @@ -2619,7 +2619,7 @@ jung-2002-humans - AtsukoKoizumi + AtsukoKoizumi HirohikoSagawa MasaruTakeuchi An Annotated <fixed-case>J</fixed-case>apanese <fixed-case>S</fixed-case>ign <fixed-case>L</fixed-case>anguage Corpus @@ -2630,8 +2630,8 @@ PaulBaker AndrewHardie TonyMcEnery - HamishCunningham - RobGaizauskas + HamishCunningham + RobGaizauskas <fixed-case>EMILLE</fixed-case>, A 67-Million Word Corpus of Indic Languages: Data Collection, Mark-up and Harmonisation http://www.lrec-conf.org/proceedings/lrec2002/pdf/319.pdf baker-etal-2002-emille @@ -2643,8 +2643,8 @@ salmen-2002-multi - ConstantinOrăsan - RichardEvans + ConstantinOrăsan + RichardEvans Assessing the difficulty of finding people in texts http://www.lrec-conf.org/proceedings/lrec2002/pdf/321.pdf orasan-evans-2002-assessing @@ -2656,7 +2656,7 @@ olsen-2002-lemma - GáborPrószéky + GáborPrószéky MártonMiháltz Automatism and User Interaction: Building a <fixed-case>H</fixed-case>ungarian <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et http://www.lrec-conf.org/proceedings/lrec2002/pdf/323.pdf @@ -2669,11 +2669,11 @@ gendner-2002-comparative - OwenRambow + OwenRambow CassandreCreswell RachelSzekely HarrietTaber - MarilynWalker + MarilynWalker A Dependency Treebank for <fixed-case>E</fixed-case>nglish http://www.lrec-conf.org/proceedings/lrec2002/pdf/325.pdf rambow-etal-2002-dependency @@ -2686,8 +2686,8 @@ ramesh-bagga-2002-text - NancyIde - LaurentRomary + NancyIde + LaurentRomary Standards for Language Resources http://www.lrec-conf.org/proceedings/lrec2002/pdf/327.pdf ide-romary-2002-standards @@ -2703,7 +2703,7 @@ NigelCollier KoichiTakeuchi ChikashiNobata - JunichiFukumoto + JunichiFukumoto NorihiroOgata Progress on Multi-lingual Named Entity Annotation Guidelines using <fixed-case>RDF</fixed-case> (<fixed-case>S</fixed-case>) http://www.lrec-conf.org/proceedings/lrec2002/pdf/329.pdf @@ -2718,11 +2718,11 @@ JoanneCapstick HansUszkoreit - WolfgangWahlster + WolfgangWahlster ThierryDeclerck GregorErbach - AnthonyJameson - BrigitteJorg + AnthonyJameson + BrigitteJorg ReinhardKarger TillmannWegst <fixed-case>COLLATE</fixed-case>: Competence Center in Speech and Language Technology @@ -2737,13 +2737,13 @@ suzuki-kakihana-2002-japanese - PrimožJakopin + PrimožJakopin The feasibility of a complete text corpus http://www.lrec-conf.org/proceedings/lrec2002/pdf/333.pdf jakopin-2002-feasibility - CatherineMacleod + CatherineMacleod Lexical Annotation for Multi-word Entries Containing Nominalizations http://www.lrec-conf.org/proceedings/lrec2002/pdf/334.pdf macleod-2002-lexical @@ -2751,13 +2751,13 @@ SiljaHuttunen RomanYangarber - RalphGrishman + RalphGrishman Diversity of Scenarios in Information extraction http://www.lrec-conf.org/proceedings/lrec2002/pdf/335.pdf huttunen-etal-2002-diversity - Mark T.Maybury + Mark T.Maybury Multimodal Systems, Resources and Evaluation http://www.lrec-conf.org/proceedings/lrec2002/pdf/336.pdf maybury-2002-multimodal @@ -2765,7 +2765,7 @@ HiromichiKawanami TsuyoshiMasuda - TomokiToda + TomokiToda KiyohiroShikano Designing speech database with prosodic variety for expressive <fixed-case>TTS</fixed-case> system http://www.lrec-conf.org/proceedings/lrec2002/pdf/337.pdf @@ -2782,7 +2782,7 @@ AkinobuLee TatsuyaKawahara - KazuyaTakeda + KazuyaTakeda MasatoMimura AtsushiYamada AkinoriIto @@ -2830,9 +2830,9 @@ VéroniqueGendner GabrielIllouz - MichèleJardino + MichèleJardino LauraMonceaux - PatrickParoubek + PatrickParoubek IsabelleRobba AnneVilnat A Protocol for Evaluating Analyzers of Syntax (<fixed-case>PEAS</fixed-case>) @@ -2840,8 +2840,8 @@ gendner-etal-2002-protocol - Mark T.Maybury - AntonioZampolli + Mark T.Maybury + AntonioZampolli Language Resources and Evaluation: International Strategy Panel http://www.lrec-conf.org/proceedings/lrec2002/pdf/346.pdf maybury-zampolli-2002-language @@ -2861,16 +2861,16 @@ AndrewFinch - EzraBlack + EzraBlack RingoWathelet Beyond Tag Trigrams: New Local Features for Tagging http://www.lrec-conf.org/proceedings/lrec2002/pdf/349.pdf finch-etal-2002-beyond - SandaHarabagiu - FinleyLacatusu - PaulMorarescu + SandaHarabagiu + FinleyLacatusu + PaulMorarescu Multidocument Summarization with <fixed-case>GIST</fixed-case>exter http://www.lrec-conf.org/proceedings/lrec2002/pdf/350.pdf harabagiu-etal-2002-multidocument @@ -2893,14 +2893,14 @@ steininger-etal-2002-user - JamesPustejovsky + JamesPustejovsky Creating Domain-specific Information Servers http://www.lrec-conf.org/proceedings/lrec2002/pdf/353.pdf pustejovsky-2002-creating MathieuLafourcade - ChristianBoitet + ChristianBoitet <fixed-case>UNL</fixed-case> Lexical Selection with Conceptual Vectors http://www.lrec-conf.org/proceedings/lrec2002/pdf/354.pdf lafourcade-boitet-2002-unl diff --git a/data/xml/L04.xml b/data/xml/L04.xml index ef631a9136..5c447227ff 100644 --- a/data/xml/L04.xml +++ b/data/xml/L04.xml @@ -3,7 +3,7 @@ Proceedings of the Fourth International Conference on Language Resources and Evaluation (LREC’04) - Maria TeresaLino + Maria TeresaLino Maria FranciscaXavier FátimaFerreira RuteCosta @@ -18,7 +18,7 @@ lrec-2004-international - MarilynWalker + MarilynWalker Can We Talk? Prospects for Automatically Training Spoken Dialogue Systems http://www.lrec-conf.org/proceedings/lrec2004/pdf/kII.pdf walker-2004-talk @@ -48,13 +48,13 @@ campbell-2004-getting - BenteMaegaard + BenteMaegaard Industrial Needs for Language Resources http://www.lrec-conf.org/proceedings/lrec2004/pdf/I.pdf maegaard-2004-industrial - JunichiTsujii + JunichiTsujii Thesaurus or Logical Ontology, Which do we Need for Mining Text? http://www.lrec-conf.org/proceedings/lrec2004/pdf/kI.pdf tsujii-2004-thesaurus @@ -82,7 +82,7 @@ LourdesDíaz MartíQuixal AnaRuggia - Antonio S.Valderrabanos + Antonio S.Valderrabanos Alberto J.Cruz EnriqueTorrejon CeliaRico @@ -92,12 +92,12 @@ schmidt-etal-2004-alles - GeorgeDoddington + GeorgeDoddington AlexisMitchell - MarkPrzybocki - LanceRamshaw - StephanieStrassel - RalphWeischedel + MarkPrzybocki + LanceRamshaw + StephanieStrassel + RalphWeischedel The Automatic Content Extraction (<fixed-case>ACE</fixed-case>) Program – Tasks, Data, and Evaluation http://www.lrec-conf.org/proceedings/lrec2004/pdf/5.pdf doddington-etal-2004-automatic @@ -121,7 +121,7 @@ JanKrebber AlexanderRaake PaulaSmeele - MartinRajman + MartinRajman MirekMelichar VincenzoPallotta GiannaTsakou @@ -129,7 +129,7 @@ AnestisVovos JettieHoonhout DietmarSchuchardt - NikosFakotakis + NikosFakotakis TodorGanchev IlyasPotamitis <fixed-case>INSPIRE</fixed-case>: Evaluation of a Smart-Home System for Infotainment Management and Device Control @@ -138,7 +138,7 @@ Ielkavan der Sluis - EmielKrahmer + EmielKrahmer Evaluating Multimodal <fixed-case>NLG</fixed-case> Using Production Experiments http://www.lrec-conf.org/proceedings/lrec2004/pdf/14.pdf van-der-sluis-krahmer-2004-evaluating @@ -146,7 +146,7 @@ NunoSeco TonyVeale - JerHayes + JerHayes Concept Creation in Lexical Ontologies http://www.lrec-conf.org/proceedings/lrec2004/pdf/15.pdf seco-etal-2004-concept @@ -159,7 +159,7 @@ SusanneSalmon-Alt - LaurentRomary + LaurentRomary Towards a Reference Annotation Framework http://www.lrec-conf.org/proceedings/lrec2004/pdf/17.pdf salmon-alt-romary-2004-towards @@ -195,7 +195,7 @@ Hsin-HsiChen Yi-ChengYu - Chih-LongLin + Chih-LongLin Collocation Extraction Using Web Statistics http://www.lrec-conf.org/proceedings/lrec2004/pdf/24.pdf chen-etal-2004-collocation @@ -209,23 +209,23 @@ ChristinaAlexandris - Stavroula-EvitaFotinea + Stavroula-EvitaFotinea Reusing Language Resources for Speech Applications involving Emotion http://www.lrec-conf.org/proceedings/lrec2004/pdf/27.pdf alexandris-fotinea-2004-reusing - EvaNavas + EvaNavas AmaiaCastelruiz IkerLuengo - JonSánchez - InmaculadaHernáez + JonSánchez + InmaculadaHernáez Designing and Recording an Audiovisual Database of Emotional Speech in <fixed-case>B</fixed-case>asque http://www.lrec-conf.org/proceedings/lrec2004/pdf/28.pdf navas-etal-2004-designing - GaëlDias + GaëlDias SérgioNunes Evaluation of Different Similarity Measures for the Extraction of Multiword Units in a Reinforcement Learning Environment http://www.lrec-conf.org/proceedings/lrec2004/pdf/29.pdf @@ -260,14 +260,14 @@ bordoni-2004-investigation - WimPeters + WimPeters Incremental Knowledge Acquisition from <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et and <fixed-case>E</fixed-case>uro<fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et http://www.lrec-conf.org/proceedings/lrec2004/pdf/36.pdf peters-2004-incremental - ViviNăstase - RadaMihalcea + ViviNăstase + RadaMihalcea Finding Semantic Associations on Express Lane http://www.lrec-conf.org/proceedings/lrec2004/pdf/38.pdf nastase-mihalcea-2004-finding @@ -281,24 +281,24 @@ DianaMaynard - KalinaBontcheva - HamishCunningham + KalinaBontcheva + HamishCunningham Automatic Language-Independent Induction of Gazetteer Lists http://www.lrec-conf.org/proceedings/lrec2004/pdf/40.pdf maynard-etal-2004-automatic - NikosFakotakis + NikosFakotakis Corpus Design, Recording and Phonetic Analysis of <fixed-case>G</fixed-case>reek Emotional Database http://www.lrec-conf.org/proceedings/lrec2004/pdf/41.pdf fakotakis-2004-corpus - YorickWilks - NickWebb - AndreaSetzer + YorickWilks + NickWebb + AndreaSetzer MarkHepple - RobertaCatizone + RobertaCatizone Human Dialogue Modelling Using Annotated Corpora http://www.lrec-conf.org/proceedings/lrec2004/pdf/42.pdf wilks-etal-2004-human @@ -339,7 +339,7 @@ Chu-RenHuang Ru-YngChang - Hsiang-PinLee + Hsiang-PinLee Sinica <fixed-case>BOW</fixed-case> (Bilingual Ontological <fixed-case>W</fixed-case>ordnet): Integration of Bilingual <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et and <fixed-case>SUMO</fixed-case> http://www.lrec-conf.org/proceedings/lrec2004/pdf/53.pdf huang-etal-2004-sinica @@ -347,7 +347,7 @@ StephanBopp SandroPedrazzini - ElisabethMaier + ElisabethMaier How to Disassemble Alphabetical Processions - Morphological Treatment of Unknown Words http://www.lrec-conf.org/proceedings/lrec2004/pdf/54.pdf bopp-etal-2004-disassemble @@ -355,7 +355,7 @@ DarinkaVerdonik MatejRojc - ZdravkoKačič + ZdravkoKačič Creating <fixed-case>S</fixed-case>lovenian Language Resources for Development of Speech-to-speech Translation Components http://www.lrec-conf.org/proceedings/lrec2004/pdf/57.pdf verdonik-etal-2004-creating @@ -368,7 +368,7 @@ BojanKotnik - ZdravkoKačič + ZdravkoKačič BogomirHorvat The Development and Integration of the <fixed-case>LDA</fixed-case>-Toolkit Into <fixed-case>COST</fixed-case>249 <fixed-case>S</fixed-case>peech<fixed-case>D</fixed-case>at(<fixed-case>II</fixed-case>) <fixed-case>SIG</fixed-case> Reference Recognizer http://www.lrec-conf.org/proceedings/lrec2004/pdf/59.pdf @@ -384,7 +384,7 @@ ozturk-etal-2004-duration - PhilippCimiano + PhilippCimiano AndreasHotho SteffenStaab Clustering Concept Hierarchies from Text @@ -392,13 +392,13 @@ cimiano-etal-2004-clustering - Alvin F.Martin - John S.Garofolo - Jonathan C.Fiscus - Audrey N.Le - David S.Pallett - Mark A.Przybocki - Gregory A.Sanders + Alvin F.Martin + John S.Garofolo + Jonathan C.Fiscus + Audrey N.Le + David S.Pallett + Mark A.Przybocki + Gregory A.Sanders <fixed-case>NIST</fixed-case> Language Technology Evaluation Cookbook http://www.lrec-conf.org/proceedings/lrec2004/pdf/64.pdf martin-etal-2004-nist @@ -419,7 +419,7 @@ Yong-JuLee - Bong-WanKim + Bong-WanKim Young-IlKim Dae-LimChoi Kwang-HyunLee @@ -432,7 +432,7 @@ AlessandroCucchiarelli RobertoNavigli FrancescaNeri - PaolaVelardi + PaolaVelardi Automatic Generation of Glosses in the <fixed-case>O</fixed-case>nto<fixed-case>L</fixed-case>earn System http://www.lrec-conf.org/proceedings/lrec2004/pdf/69.pdf cucchiarelli-etal-2004-automatic @@ -440,17 +440,17 @@ AnVandecatseye Jean-PierreMartens - JoaoNeto + JoaoNeto HugoMeinedo - CarmenGarcia-Mateo - JavierDieguez - FranceMihelic - JanezZibert + CarmenGarcia-Mateo + JavierDieguez + FranceMihelic + JanezZibert JanNouza PetrDavid - MatusPleva + MatusPleva AntonCizmar - HarrisPapageorgiou + HarrisPapageorgiou ChristinaAlexandris The <fixed-case>COST</fixed-case>278 Pan-<fixed-case>E</fixed-case>uropean Broadcast News Database http://www.lrec-conf.org/proceedings/lrec2004/pdf/70.pdf @@ -481,7 +481,7 @@ MarkStevenson - PaulClough + PaulClough <fixed-case>E</fixed-case>uro<fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et as a Resource for Cross-language Information Retrieval http://www.lrec-conf.org/proceedings/lrec2004/pdf/76.pdf stevenson-clough-2004-eurowordnet @@ -502,22 +502,22 @@ ando-etal-2004-automatic - KarinKipper + KarinKipper BenjaminSnyder - MarthaPalmer + MarthaPalmer Extending a Verb-lexicon Using a Semantically Annotated Corpus http://www.lrec-conf.org/proceedings/lrec2004/pdf/84.pdf kipper-etal-2004-extending J.C.T.Beeken - P.H.J.van der Kamp + P.H.J.van der Kamp The Centre for <fixed-case>D</fixed-case>utch Language and Speech Technology (<fixed-case>TST</fixed-case> Centre) http://www.lrec-conf.org/proceedings/lrec2004/pdf/85.pdf beeken-van-der-kamp-2004-centre - Sue EllenWright + Sue EllenWright A Global Data Category Registry for Interoperable Language Resources http://www.lrec-conf.org/proceedings/lrec2004/pdf/87.pdf wright-2004-global @@ -529,14 +529,14 @@ kruyt-2004-integrated - HansDybkjær - LailaDybkjær + HansDybkjær + LailaDybkjær From Acts and Topics to Transactions and Dialogue Smoothness http://www.lrec-conf.org/proceedings/lrec2004/pdf/92.pdf dybkjaer-dybkjaer-2004-acts - HidekiKashioka + HidekiKashioka Grouping Synonymous Sentences from a Parallel Corpus http://www.lrec-conf.org/proceedings/lrec2004/pdf/93.pdf kashioka-2004-grouping @@ -549,7 +549,7 @@ ahmad-musacchio-2004-discovery - HaraldHöge + HaraldHöge Josef G.Bauer ChristianGeißler PanjiSetiawan @@ -559,20 +559,20 @@ hoge-etal-2004-evaluation - JanezŽibert - FranceMihelič + JanezŽibert + FranceMihelič Development of <fixed-case>S</fixed-case>lovenian Broadcast News Speech Database http://www.lrec-conf.org/proceedings/lrec2004/pdf/98.pdf zibert-mihelic-2004-development - EckhardBick + EckhardBick A Named Entity Recognizer for <fixed-case>D</fixed-case>anish http://www.lrec-conf.org/proceedings/lrec2004/pdf/99.pdf bick-2004-named - M. TeresaCabré + M. TeresaCabré CarmeBach RosaEstopà JuditFeliu @@ -612,7 +612,7 @@ sorensen-2004-bilingual - TomažErjavec + TomažErjavec Kristina HmeljakSangawa IrenaSrdanović Antonml. Vahčič @@ -621,7 +621,7 @@ erjavec-etal-2004-making - TomažErjavec + TomažErjavec <fixed-case>MULTEXT</fixed-case>-East Version 3: Multilingual Morphosyntactic Specifications, Lexicons and Corpora http://www.lrec-conf.org/proceedings/lrec2004/pdf/109.pdf erjavec-2004-multext @@ -630,15 +630,15 @@ Lorena SeijoPereiro Ana MartínezÍnsua Francisco MéndezPazó - Francisco CampilloDíaz - Eduardo RodríguezBanga + Francisco CampilloDíaz + Eduardo RodríguezBanga A <fixed-case>G</fixed-case>alician Textual Corpus for Morphosyntactic Tagging with Application to Text-to-Speech Synthesis http://www.lrec-conf.org/proceedings/lrec2004/pdf/111.pdf pereiro-etal-2004-galician - SalvadorEspaña - María JoséCastro + SalvadorEspaña + María JoséCastro José LuisHidalgo The <fixed-case>SPARTACUS</fixed-case>-Database: a <fixed-case>S</fixed-case>panish Sentence Database for Offline Handwriting Recognition http://www.lrec-conf.org/proceedings/lrec2004/pdf/112.pdf @@ -646,32 +646,32 @@ SofiaStamou - GoranNenadic - DimitrisChristodoulakis + GoranNenadic + DimitrisChristodoulakis Exploring <fixed-case>B</fixed-case>alkanet Shared Ontology for Multilingual Conceptual Indexing http://www.lrec-conf.org/proceedings/lrec2004/pdf/113.pdf stamou-etal-2004-exploring - MitsuoShimohata - EiichiroSumita - YujiMatsumoto + MitsuoShimohata + EiichiroSumita + YujiMatsumoto Building a Paraphrase Corpus for Speech Translation http://www.lrec-conf.org/proceedings/lrec2004/pdf/114.pdf shimohata-etal-2004-building YasuhiroAkiba - EiichiroSumita + EiichiroSumita HiromiNakaiwa SeiichiYamamoto - Hiroshi G.Okuno + Hiroshi G.Okuno Incremental Methods to Select Test Sentences for Evaluating Translation Ability http://www.lrec-conf.org/proceedings/lrec2004/pdf/115.pdf akiba-etal-2004-incremental - JanOdijk + JanOdijk Reusable Lexical Representations for Idioms http://www.lrec-conf.org/proceedings/lrec2004/pdf/116.pdf odijk-2004-reusable @@ -687,7 +687,7 @@ JanezStergar CaglayanErdem BogomirHorvat - ZdravkoKačič + ZdravkoKačič A Data-driven Adaptation of Prosody in a Multilingual <fixed-case>TTS</fixed-case> http://www.lrec-conf.org/proceedings/lrec2004/pdf/130.pdf stergar-etal-2004-data @@ -713,12 +713,12 @@ AndrejŽgank TomažRotovnik - Mirjam SepesyMaučec + Mirjam SepesyMaučec DarinkaVerdonik JanezKitak DamjanVlaj VladimirHozjan - ZdravkoKačič + ZdravkoKačič BogomirHorvat Acquisition and Annotation of <fixed-case>S</fixed-case>lovenian Broadcast News Database http://www.lrec-conf.org/proceedings/lrec2004/pdf/123.pdf @@ -726,11 +726,11 @@ AndrejŽgank - ZdravkoKačič + ZdravkoKačič FrankDiehl KlaraVicsi GyorgySzaszak - JozefJuhar + JozefJuhar SlavomirLihan The <fixed-case>COST</fixed-case> 278 <fixed-case>MASPER</fixed-case> Initiative - Crosslingual Speech Recognition with Large Telephone Databases http://www.lrec-conf.org/proceedings/lrec2004/pdf/124.pdf @@ -750,14 +750,14 @@ VincentVandeghinste - ErikTjong Kim Sang + ErikTjong Kim Sang Using a Parallel Transcript/Subtitle Corpus for Sentence Compression http://www.lrec-conf.org/proceedings/lrec2004/pdf/128.pdf vandeghinste-tjong-kim-sang-2004-using SofiaStamou - DimitrisChristodoulakis + DimitrisChristodoulakis Handling Subtle Sense Distinctions Through <fixed-case>W</fixed-case>ordnet Semantic Types http://www.lrec-conf.org/proceedings/lrec2004/pdf/133.pdf stamou-christodoulakis-2004-handling @@ -771,15 +771,15 @@ HeikeTelljohann - ErhardHinrichs - SandraKübler + ErhardHinrichs + SandraKübler The Tüba-<fixed-case>D</fixed-case>/<fixed-case>Z</fixed-case> Treebank: Annotating <fixed-case>G</fixed-case>erman with a Context-Free Backbone http://www.lrec-conf.org/proceedings/lrec2004/pdf/135.pdf telljohann-etal-2004-tuba - John S.Garofolo - Christophe D.Laprun + John S.Garofolo + Christophe D.Laprun MartialMichel Vincent M.Stanford ElhamTabassi @@ -804,7 +804,7 @@ takezawa-kikui-2004-comparative - NúriaBel + NúriaBel Cornelis H.A.Koster MartaVillegas Cost-effective Cross-lingual Document Classification @@ -813,7 +813,7 @@ KatrinErk - SebastianPadó + SebastianPadó A Powerful and Versatile <fixed-case>XML</fixed-case> Format for Representing Role-semantic Annotation http://www.lrec-conf.org/proceedings/lrec2004/pdf/202.pdf erk-pado-2004-powerful @@ -821,11 +821,11 @@ StefanBaumann CarenBrinckmann - SilviaHansen-Schirra - Geert-JanKruijff - IvanaKruijff-Korbayová + SilviaHansen-Schirra + Geert-JanKruijff + IvanaKruijff-Korbayová StellaNeumann - ErichSteiner + ErichSteiner ElkeTeich HansUszkoreit The <fixed-case>MULI</fixed-case> Project: Annotation and Analysis of Information Structure in <fixed-case>G</fixed-case>erman and <fixed-case>E</fixed-case>nglish @@ -833,14 +833,14 @@ baumann-etal-2004-muli - P. H. J.van der Kamp + P. H. J.van der Kamp J. G.Kruyt Putting the <fixed-case>D</fixed-case>utch <fixed-case>PAROLE</fixed-case> Corpus to Work http://www.lrec-conf.org/proceedings/lrec2004/pdf/207.pdf van-der-kamp-kruyt-2004-putting - JulieCarson-Berndsen + JulieCarson-Berndsen RobertKelly Acquiring Reusable Multilingual Phonotactic Resources http://www.lrec-conf.org/proceedings/lrec2004/pdf/208.pdf @@ -854,9 +854,9 @@ neugebauer-wilson-2004-phonological - Pedro ConcejeroCerezo - Juan José RodríguezSoler - Daniel TapiasMerino + Pedro ConcejeroCerezo + Juan José RodríguezSoler + Daniel TapiasMerino Alberto J. SánchezGarcía Methodology for Rapid Prototyping and Testing of <fixed-case>ASR</fixed-case> Based User Interfaces http://www.lrec-conf.org/proceedings/lrec2004/pdf/210.pdf @@ -864,21 +864,21 @@ LarsDegerstedt - ArneJönsson + ArneJönsson Open Resources for Language Technology http://www.lrec-conf.org/proceedings/lrec2004/pdf/211.pdf degerstedt-jonsson-2004-open Marie-LaureReinberger - WalterDaelemans + WalterDaelemans Unsupervised Text Mining for Ontology Extraction: An Evaluation of Statistical Measures http://www.lrec-conf.org/proceedings/lrec2004/pdf/212.pdf reinberger-daelemans-2004-unsupervised DanielAioanei - JulieCarson-Berndsen + JulieCarson-Berndsen AnjaGeumann RobertKelly MoritzNeugebauer @@ -890,28 +890,28 @@ OscarCorcho RaúlGarcía-Castro - AsunciónGómez-Pérez + AsunciónGómez-Pérez Benchmarking Ontology Tools. A Case Study for the <fixed-case>W</fixed-case>eb<fixed-case>ODE</fixed-case> Platform. http://www.lrec-conf.org/proceedings/lrec2004/pdf/217.pdf corcho-etal-2004-benchmarking - Bayan AbuShawar - EricAtwell + Bayan AbuShawar + EricAtwell A Chatbot as a Novel Corpus Visualization Tool http://www.lrec-conf.org/proceedings/lrec2004/pdf/218.pdf shawar-atwell-2004-chatbot FlorentinaVasilescu - PhilippeLanglais + PhilippeLanglais GuyLapalme Evaluating Variants of the <fixed-case>L</fixed-case>esk Approach for Disambiguating Words http://www.lrec-conf.org/proceedings/lrec2004/pdf/219.pdf vasilescu-etal-2004-evaluating - SergeiNirenburg + SergeiNirenburg MarjorieMcShane StephenBeale The Rationale for Building an Ontology Expressly for <fixed-case>NLP</fixed-case> @@ -921,14 +921,14 @@ MarjorieMcShane StephenBeale - SergeiNirenburg + SergeiNirenburg Some Meaning Procedures of Ontological Semantics http://www.lrec-conf.org/proceedings/lrec2004/pdf/224.pdf mcshane-etal-2004-meaning - Eric K.Ringger - Robert C.Moore + Eric K.Ringger + Robert C.Moore EugeneCharniak LucyVanderwende HisamiSuzuki @@ -938,7 +938,7 @@ HidetsuguNanba - ManabuOkumura + ManabuOkumura Comparison of Some Automatic and Manual Methods for Summary Evaluation Based on the Text Summarization Challenge 2 http://www.lrec-conf.org/proceedings/lrec2004/pdf/229.pdf nanba-okumura-2004-comparison @@ -962,15 +962,15 @@ folch-etal-2004-highlighting - DanTufis + DanTufis RaduIon - NancyIde + NancyIde Word Sense Disambiguation as a <fixed-case>W</fixed-case>ordnets’ Validation Method in <fixed-case>B</fixed-case>alkanet http://www.lrec-conf.org/proceedings/lrec2004/pdf/234.pdf tufis-etal-2004-word - DanTufis + DanTufis Term Translations in Parallel Corpora: Discovery and Consistency Check http://www.lrec-conf.org/proceedings/lrec2004/pdf/235.pdf tufis-2004-term @@ -984,27 +984,27 @@ sarmento-etal-2004-corpografo - DanielFerrés + DanielFerrés MarcMassot - MuntsaPadró - HoracioRodríguez - JordiTurmo + MuntsaPadró + HoracioRodríguez + JordiTurmo Automatic Classification of Geographic Named Entities http://www.lrec-conf.org/proceedings/lrec2004/pdf/237.pdf ferres-etal-2004-automatic - OliviaSanchez-Graillet - MassimoPoesio + OliviaSanchez-Graillet + MassimoPoesio Acquiring <fixed-case>B</fixed-case>ayesian Networks from Text http://www.lrec-conf.org/proceedings/lrec2004/pdf/240.pdf sanchez-graillet-poesio-2004-acquiring Thanh BonNguyen - Thi Minh HuyenNguyen - LaurentRomary - Xuan LuongVu + Thi Minh HuyenNguyen + LaurentRomary + Xuan LuongVu Developping Tools and Building Linguistic Resources for <fixed-case>V</fixed-case>ietnamese Morpho-syntactic Processing http://www.lrec-conf.org/proceedings/lrec2004/pdf/241.pdf nguyen-etal-2004-developping @@ -1033,7 +1033,7 @@ panunzi-etal-2004-using - MarcoBaroni + MarcoBaroni SilviaBernardini FedericaComastri LorenzoPiccioni @@ -1045,14 +1045,14 @@ baroni-etal-2004-introducing - NancyIde + NancyIde DavidWoolner Exploiting Semantic Web Technologies for Intelligent Access to Historical Documents http://www.lrec-conf.org/proceedings/lrec2004/pdf/248.pdf ide-woolner-2004-exploiting - MarcoBaroni + MarcoBaroni SabrinaBisi Using Cooccurrence Statistics and the Web to Discover Synonyms in a Technical Language http://www.lrec-conf.org/proceedings/lrec2004/pdf/249.pdf @@ -1073,7 +1073,7 @@ SalmaJamoussi - KamelSmaïli + KamelSmaïli DominiqueFohr Jean-PaulHaton A Complete Understanding Speech System Based on Semantic Concepts @@ -1081,10 +1081,10 @@ jamoussi-etal-2004-complete - KirilSimov + KirilSimov AlexanderSimov HristoGanev - KrasimiraIvanova + KrasimiraIvanova IlkoGrigorov The <fixed-case>CL</fixed-case>a<fixed-case>RK</fixed-case> System: <fixed-case>XML</fixed-case>-based Corpora Development System for Rapid Prototyping http://www.lrec-conf.org/proceedings/lrec2004/pdf/258.pdf @@ -1100,15 +1100,15 @@ badia-etal-2004-nlp - KalinaBontcheva + KalinaBontcheva Open-source Tools for Creation, Maintenance, and Storage of Lexical Resources for Language Generation from Ontologies http://www.lrec-conf.org/proceedings/lrec2004/pdf/264.pdf bontcheva-2004-open AgnesLisowska - AndreiPopescu-Belis - SusanArmstrong + AndreiPopescu-Belis + SusanArmstrong User Query Analysis for the Specification and Evaluation of a Dialogue Processing and Retrieval System http://www.lrec-conf.org/proceedings/lrec2004/pdf/266.pdf lisowska-etal-2004-user @@ -1123,16 +1123,16 @@ popov-etal-2004-creation - AndreiPopescu-Belis + AndreiPopescu-Belis Abstracting a Dialog Act Tagset for Meeting Processing http://www.lrec-conf.org/proceedings/lrec2004/pdf/268.pdf popescu-belis-2004-abstracting - AndreiPopescu-Belis + AndreiPopescu-Belis LoïsRigouste SusanneSalmon-Alt - LaurentRomary + LaurentRomary Online Evaluation of Coreference Resolution http://www.lrec-conf.org/proceedings/lrec2004/pdf/270.pdf popescu-belis-etal-2004-online @@ -1140,8 +1140,8 @@ XavierCarreras IsaacChao - LluísPadró - MuntsaPadró + LluísPadró + MuntsaPadró <fixed-case>F</fixed-case>ree<fixed-case>L</fixed-case>ing: An Open-Source Suite of Language Analyzers http://www.lrec-conf.org/proceedings/lrec2004/pdf/271.pdf carreras-etal-2004-freeling @@ -1184,7 +1184,7 @@ przepiorkowski-etal-2004-search - Peter A.Heeman + Peter A.Heeman The <fixed-case>A</fixed-case>merican <fixed-case>E</fixed-case>nglish <fixed-case>SALA</fixed-case>-<fixed-case>II</fixed-case> Data Collection http://www.lrec-conf.org/proceedings/lrec2004/pdf/276.pdf heeman-2004-american @@ -1192,14 +1192,14 @@ AndrewFinch YasuhiroAkiba - EiichiroSumita + EiichiroSumita How Does Automatic Machine Translation Evaluation Correlate with Human Scoring as the Number of Reference Translations Increases? http://www.lrec-conf.org/proceedings/lrec2004/pdf/277.pdf finch-etal-2004-automatic SlavenBilac - TimothyBaldwin + TimothyBaldwin HozumiTanaka Evaluating the <fixed-case>FOKS</fixed-case> Error Model http://www.lrec-conf.org/proceedings/lrec2004/pdf/278.pdf @@ -1208,7 +1208,7 @@ GuillaumeGibert GérardBailly - FrédéricEliséi + FrédéricEliséi DenisBeautemps RémiBrun Evaluation of a Speech Cuer: From Motion Capture to a Concatenative Text-to-cued Speech System @@ -1218,7 +1218,7 @@ NikolaosNanas VictoriaUren - Annede Roeck + Annede Roeck JohnDomingue Beyond <fixed-case>TREC</fixed-case>’s Filtering Track http://www.lrec-conf.org/proceedings/lrec2004/pdf/283.pdf @@ -1234,7 +1234,7 @@ CarolPeters MartinBraschler - KhalidChoukri + KhalidChoukri JulioGonzalo MichaelKluck The Future of Evaluation for Cross-Language Information Retrieval Systems @@ -1243,11 +1243,11 @@ peters-etal-2004-future - Henkvan den Heuvel + Henkvan den Heuvel PhilHall - HaraldHöge - AsunciónMoreno - AntonioRincon + HaraldHöge + AsunciónMoreno + AntonioRincon FrancescoSenia <fixed-case>SALA</fixed-case> <fixed-case>II</fixed-case> Across the Finish Line: A Large Collection of Mobile Telephone Speech Databases from <fixed-case>N</fixed-case>orth and <fixed-case>L</fixed-case>atin <fixed-case>A</fixed-case>merica completed http://www.lrec-conf.org/proceedings/lrec2004/pdf/288.pdf @@ -1255,7 +1255,7 @@ van-den-heuvel-etal-2004-sala - XavierGómez-Guinovart + XavierGómez-Guinovart Elena SacauFontenla Parallel Corpora for the <fixed-case>G</fixed-case>alician Language: Building and Processing of the <fixed-case>CLUVI</fixed-case> (Linguistic Corpus of the <fixed-case>U</fixed-case>niversity of <fixed-case>V</fixed-case>igo) http://www.lrec-conf.org/proceedings/lrec2004/pdf/290.pdf @@ -1280,15 +1280,15 @@ wagner-zeisler-2004-syntactically - MontserratMarimon - NúriaBel + MontserratMarimon + NúriaBel Lexical Entry Templates for Robust Deep Parsing http://www.lrec-conf.org/proceedings/lrec2004/pdf/295.pdf We report on the development and employment of lexical entry templates in a large--coverage unification--based grammar of Spanish. The aim of the work reported in this paper is to provide robust deep linguistic processing in order to make the grammar more adequate for industrial NLP applications. marimon-bel-2004-lexical - DanTufis + DanTufis LiviuDragomirescu Tiered Tagging Revisited http://www.lrec-conf.org/proceedings/lrec2004/pdf/296.pdf @@ -1296,7 +1296,7 @@ tufis-dragomirescu-2004-tiered - DanTufis + DanTufis EduardBarbu A Methodology and Associated Tools for Building Interlingual Wordnets http://www.lrec-conf.org/proceedings/lrec2004/pdf/298.pdf @@ -1305,8 +1305,8 @@ DoaaSamy - AntonioMoreno-Sandoval - José M.Guirao + AntonioMoreno-Sandoval + José M.Guirao Construction of a Bilingual <fixed-case>A</fixed-case>rabic-<fixed-case>S</fixed-case>panish Lexicon of Verbs Based on a Parallel Corpus http://www.lrec-conf.org/proceedings/lrec2004/pdf/299.pdf Parallel corpora are considered an important resource for the development of linguistic tools. In this paper our main goal is the development of a bilingual lexicon of verbs. The construction of this lexicon is possible using two main resources: I) a parallel corpus (through the alignment); II) the linguistic tools developed for Spanish (which serve as a starting point for developing tools for Arabic language). At the end, aligned equivalent verbs are detected automatically from a parallel corpus Spanish-Arabic. To achieve this goal, we had to pass through different preparatory stages concerning the assesment of the parallel corpus, the monolingual tokenization of each corpus, a preliminary sentence alignment and finally applying the model of automatic extraction of equivalent verbs. Our method is hybrid, since it combines both statistical and linguistic approaches. @@ -1326,8 +1326,8 @@ ManolisMaragoudakis - NikosFakotakis - GeorgeKokkinakis + NikosFakotakis + GeorgeKokkinakis A <fixed-case>B</fixed-case>ayesian Model for Shallow Syntactic Parsing of Natural Language Texts http://www.lrec-conf.org/proceedings/lrec2004/pdf/302.pdf For the present work, we introduce and evaluate a novel Bayesian syntactic shallow parser that is able to perform robust detection of pairs of subject-object and subject-direct object-indirect object for a given verb, in a natural language sentence. The shallow parser infers on the correct subject-object pairs based on knowledge provided by Bayesian network learning from annotated text corpora. The DELOS corpus, a collection of economic domain texts that has been automatically annotated using various morphological and syntactic tools was used as training material. Our shallow parser makes use of limited linguistic input. More specifically, we consider only part of speech tagging, the voice and the mood of the verb as well as the head word of a noun phrase. For the task of detecting the head word of a phrase we used a sentence boundary detector. Identifying the head word of a noun phrase, i.e. the word that holds the morphological information (case, number) of the whole phrase, also proves to be very helpful for our task as its morphological tag is all the information that is needed regarding the phrase. The evaluation of the proposed method was performed against three other machine learning techniques, namely naive Bayes, k-Nearest Neighbor and Support Vector Machines, methods that have been previously applied to natural language processing tasks with satisfactory results. The experimental outcomes portray a satisfactory performance of our proposed shallow parser, which reaches almost 92 per cent in terms of precision. @@ -1344,7 +1344,7 @@ JacquesDuchateau TimCeyssens - HugoVan hamme + HugoVan hamme Use and Evaluation of Prosodic Annotations in <fixed-case>D</fixed-case>utch http://www.lrec-conf.org/proceedings/lrec2004/pdf/304.pdf In the development of annotations for a spoken database, an important issue is whether the annotations can be generated automatically with sufficient precision, or whether expensive manual annotations are needed. In this paper, the case of prosodic annotations is discussed, which was investigated on the CGN database (Spoken Dutch Corpus). The main conclusions of this work are as follows. First, it was found that the available amount of manual prosodic annotations is sufficient for the development of our (baseline, decision tree based) prosodic models. In other words, more manual annotations do not improve the models. Second, the developed prosodic models for prominence are insufficiently accurate to produce automatic prominence annotations that are as good as the manual ones. But on the other hand the consistency between manual and automatic break annotations is as high as the inter-transcriber consistency for breaks. So given the current amount of manual break annotations, annotations for the remainder of the CGN database can be generated automatically with the same quality as the manual annotations. @@ -1352,7 +1352,7 @@ StephanBusemann - Hans-UlrichKrieger + Hans-UlrichKrieger Resources and Techniques for Multilingual Information Extraction http://www.lrec-conf.org/proceedings/lrec2004/pdf/306.pdf Official travel warnings published regularly in the internet by the ministries for foreign affairs of France, Germany, and the UK provide a useful resource for assessing the risks associated with travelling to some countries. The shallow IE system SProUT has been extended to meet the specific needs of delivering a language-neutral output for English, French, or German input texts. A shared type hierarchy, a feature-enhanced gazetteer resource, and generic techniques of merging chunk analyses into larger results are major reusable results of this work. @@ -1361,9 +1361,9 @@ LeiChen YangLiu - MaryHarper + MaryHarper EduardoMaia - SusanMcRoy + SusanMcRoy Evaluating Factors Impacting the Accuracy of Forced Alignments in a Multimodal Corpus http://www.lrec-conf.org/proceedings/lrec2004/pdf/307.pdf People, when processing human-to-human communication, utilize everything they can in order to understand that communication, including speech and information such as the time and location of an interlocutor's gesture and gaze. Speech and gesture are known to exhibit a synchronous relationship in human communication; however, the precise nature of that relationship requires further investigation. The construction of computer models of multimodal human communication would be enabled by the availability of multimodal communication corpora annotated with synchronized gesture and speech features. To investigate the temporal relationships of these knowledge sources, we have collected and are annotating several multimodal corpora with time-aligned features. Forced alignment between a speech file and its transcription is a crucial part of multimodal corpus production. This paper investigates a number of factors that may contribute to highly accurate forced alignments to support the rapid production of these multimodal corpora including the acoustic model, the match between the speech used for training the system and that to be force aligned, the amount of data used to train the ASR system, the availability of speaker adaptation, and the duration of alignment segments. @@ -1406,9 +1406,9 @@ afify-emam-2004-collection - KirilSimov + KirilSimov PetyaOsenova - SiaKolkovska + SiaKolkovska ElisavetaBalabanova DimitarDoikoff A Language Resources Infrastructure for <fixed-case>B</fixed-case>ulgarian @@ -1441,7 +1441,7 @@ dowdall-etal-2004-role - JörgTiedemann + JörgTiedemann LarsNygaard The <fixed-case>OPUS</fixed-case> Corpus - Parallel and Free: <url>http://logos.uio.no/opus</url> http://www.lrec-conf.org/proceedings/lrec2004/pdf/320.pdf @@ -1449,18 +1449,18 @@ tiedemann-nygaard-2004-opus - JavierFarreres - HoracioRodríguez + JavierFarreres + HoracioRodríguez Selecting the Correct <fixed-case>E</fixed-case>nglish Synset for a <fixed-case>S</fixed-case>panish Sense http://www.lrec-conf.org/proceedings/lrec2004/pdf/324.pdf This work tries to enrich the Spanish Wordnet using a Spanish taxonomy as a knowledge source. The Spanish taxonomy is composed by Spanish senses, while Spanish Wordnet is composed by synsets, mostly linked to English WordNet. A set of weighted associations between Spanish words and Wordnet synsets is used for inferring associations between both taxonomies. farreres-rodriguez-2004-selecting - AsunciónMoreno - KhalidChoukri + AsunciónMoreno + KhalidChoukri PhilHall - Henkvan den Heuvel + Henkvan den Heuvel EricSanders FrancescoSenia HerbertTropf @@ -1487,10 +1487,10 @@ dalby-etal-2004-standards - Henkvan den Heuvel + Henkvan den Heuvel DorotaIskra EricSanders - Folkertde Vriend + Folkertde Vriend <fixed-case>SLR</fixed-case> Validation: Current Trends and Developments http://www.lrec-conf.org/proceedings/lrec2004/pdf/328.pdf This paper deals with the quality evaluation (validation) of Spoken Language Resources (SLR). The current situation in terms of relevant validation criteria and procedures is briefly presented. Next, a number of validation issues related to new data formats (XML-based annotations, UTF-16 encoding) are discussed. Further, new validation cycles that were introduced in a series of new projects like SpeeCon and OrienTel are addressed: prompt sheet validation, lexicon validation and pre-release validation. Finally, SPEX's current and future @@ -1503,11 +1503,11 @@ saggion-2004-identifying - LauraAlonso - IreneCastellón + LauraAlonso + IreneCastellón JordiEscribano XavierMesseguer - LluísPadró + LluísPadró Multiple Sequence Alignment for Characterizing the Lineal Structure of Revision http://www.lrec-conf.org/proceedings/lrec2004/pdf/332.pdf We present a first approach to the application of a data mining technique, Multiple Sequence Alignment, to the systematization of a polemic aspect of discourse, namely, the expression of contrast, concession, counterargument and semantically similar discursive relations. The representation of the phenomena under study is carried out by very simple techniques, mostly pattern-matching, but the results allow to drive insightful conclusions on the organization of this aspect of discourse: equivalence classes of discourse markers are established, and systematic patterns are discovered, which will be applied in enhancing a discursive parser. @@ -1546,8 +1546,8 @@ CostanzaNavarretta - Bolette SandfordPedersen - Dorte HaltrupHansen + Bolette SandfordPedersen + Dorte HaltrupHansen “Human Language Technology Elements in a Knowledge Organisation System - The <fixed-case>VID</fixed-case> Project” http://www.lrec-conf.org/proceedings/lrec2004/pdf/339.pdf This paper describes how Human Language Technologies and linguistic resources are used to support the construction of components of a knowledge organisation system. In particular we focus on methodologies and resources for building a corpus-based domain ontology and extracting relevant metadata information for text chunks from domain-specific corpora. @@ -1555,36 +1555,36 @@ KedarBellare - Anish DasSarma + Anish DasSarma Atish DasSarma NavneetLoiwal - VaibhavMehta + VaibhavMehta GaneshRamakrishnan - PushpakBhattacharyya + PushpakBhattacharyya Generic Text Summarization Using <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et http://www.lrec-conf.org/proceedings/lrec2004/pdf/342.pdf bellare-etal-2004-generic - Natalia V.Loukachevitch - Boris V.Dobrov + Natalia V.Loukachevitch + Boris V.Dobrov Development of Bilingual Domain-Specific Ontology for Automatic Conceptual Indexing http://www.lrec-conf.org/proceedings/lrec2004/pdf/343.pdf In the paper we describe development, means of evaluation and applications of Russian-English Sociopolitical Thesaurus specially developed as a linguistic resource for automatic text processing applications. The Sociopolitical domain is not a domain of social research but a broad domain of social relations including economic, political, military, cultural, sports and other subdomains. The knowledge of this domain is necessary for automatic text processing of such important documents as official documents, legislative acts, newspaper articles. loukachevitch-dobrov-2004-development - Natalia V.Loukachevitch - Boris V.Dobrov + Natalia V.Loukachevitch + Boris V.Dobrov Development of Ontologies with Minimal Set of Conceptual Relations http://www.lrec-conf.org/proceedings/lrec2004/pdf/345.pdf In the paper we describe our approach to development of ontologies with small number of relation types. Non-taxonomic relations in our ontologies are based on ontological dependence conception described in the formal ontology. This minimal relations set does not depend on a domain or a task and makes possible to begin the ontology construction at once, as soon as a task is set and a domain is determined, to receive the first version of an ontology in short time. Such an initial ontology can be used for information-retrieval applications and can serve as a structural basis for further development of the ontology loukachevitch-dobrov-2004-development-ontologies - Maria Fernanda Bacelardo Nascimento + Maria Fernanda Bacelardo Nascimento AmáliaMendes - LuísaPereira + LuísaPereira Providing On-line Access to <fixed-case>P</fixed-case>ortuguese Language Resources: Corpora and Lexicons http://www.lrec-conf.org/proceedings/lrec2004/pdf/346.pdf Several Language Resources (LRs) for Portuguese, developed at the Center of Linguistics of the Lisbon University (CLUL), are available on-line at CLUL's webpage: www.clul.ul.pt/english/sectores/projecto_rld.html. These LRs have been extracted from or developed based on the Reference Corpus of Contemporary Portuguese (CRPC), a monitor corpus containing, at the present, more than 300 million words, taken by sampling from several types of written text (literary, newspaper, technical, didactic, juridical, parlamentary, etc.) and spoken text (informal and formal), pertaining to national and regional varieties of Portuguese (including European, Brazilian, African and Asian Portuguese). The LRs available for on-line queries include: a) several subcorpora (written and spoken, tagged and untagged) compiled and extracted from CRPC for specific CLUL's projects and now available for on-line queries; b) a published sample of "Português Fundamental", a spoken CRPC subcorpus, available for texts download; c) a frequency lexicon extracted from a CRPC subcorpus available for both on-line queries and download. Other RLs available for Portuguese are also referred: C-ORAL-ROM - Integrated Reference Corpora for Spoken Romance Languages, a CD-ROM edition of a spoken corpus with text-to-sound alignment; the LE-PAROLE corpus; the LE-PAROLE Lexicon and the SIMPLE Lexicon. @@ -1592,7 +1592,7 @@ BrunoCartoni - PierretteBouillon + PierretteBouillon YalinaAlphonse SabineLehmann Automatisation of the Activity of Term Collection in Different Languages @@ -1602,7 +1602,7 @@ JorgeVivaldi - HoracioRodríguez + HoracioRodríguez Automatically Selecting Domain Markers for Terminology Extraction http://www.lrec-conf.org/proceedings/lrec2004/pdf/348.pdf Some approaches to automatic terminology extraction from corpora imply the use of existing semantic resources for guiding the detection of terms. Most of these systems exploit specialised resources, like UMLS in the medical domain, while a few try to take profit from general-purpose semantic resources, like EuroWordNet (EWN). As the term extraction task is clearly domain depending, in the case a general-purpose resource without specific domain information is used, we need a way of attaching domain information to the units of the resource. For big resources it is desirable that this semantic enrichment could be carried out automatically. Given a specific domain, our proposal aims to detect in EWN those units that can be considered as domain markers (DM). We can define a DM as an EWN entry whose attached strings belong to the domain, as well as the variants of all its descendents through the hyponymy relation. The procedure we propose in this paper is fully automatic and, a priori, domain-independent. The only external knowledge it uses is a set of terms, which is an external vocabulary, which is considered to have at least one sense belonging to the domain. @@ -1610,28 +1610,28 @@ AnneVilnat - PatrickParoubek + PatrickParoubek LauraMonceaux IsabelleRobba VéroniqueGendner GabrielIllouz - MichèleJardino + MichèleJardino The Ongoing Evaluation Campaign of Syntactic Parsing of <fixed-case>F</fixed-case>rench: <fixed-case>EASY</fixed-case> http://www.lrec-conf.org/proceedings/lrec2004/pdf/349.pdf This paper presents EASY (Evaluation of Analyzers of SYntax), an ongoing evaluation campaign of syntactic parsing of French, a subproject of EVALDA in the French TECHNOLANGUE program. After presenting the elaboration of the annotation formalism, we describe the corpus building steps, the annotation tools, the evaluation measures and finally, plans to produce a validated large linguistic resource, syntactically annotated vilnat-etal-2004-ongoing - KateřinaVeselá - JiříHavelka - EvaHajičová + KateřinaVeselá + JiříHavelka + EvaHajičová Annotators’ Agreement: The Case of Topic-Focus Articulation http://www.lrec-conf.org/proceedings/lrec2004/pdf/350.pdf The annotation of the Prague Dependency Treebank (PDT) is conceived of as a multilayered scenario that comprises also dependency representations (tectogrammatical tree structures, TGTS's) of the underlying structure of the sentences. TGTS's capture three basic aspects of the underlying structure of sentences: (a) the dependency tree structure, (b) the kinds of dependency syntactic relations, and (c) the basic characteristics of the topic-focus articulation (TFA). Since the PDT is a large collection and the annotations on the deepest layer are to a large extent performed by several human annotators (based on an automatic preprocessing module), it is more than necessary to observe the consistence of annotators and the agreement among them. In the present paper, we summarize the results of the evaluation of parallel annotations of several samples taken from PDT and the measures accepted to improve the consistency of annotations. vesela-etal-2004-annotators - Scott S. L.Piao + Scott S. L.Piao PaulRayson DawnArcher TonyMcEnery @@ -1642,9 +1642,9 @@ FrédéricLandragin - AlexandreDenis + AlexandreDenis AnnalisaRicci - LaurentRomary + LaurentRomary Multimodal Meaning Representation for Generic Dialogue Systems Architectures http://www.lrec-conf.org/proceedings/lrec2004/pdf/352.pdf An unified language for the communicative acts between agents is essential for the design of multi-agents architectures. Whatever the type of interaction (linguistic, multimodal, including particular aspects such as force feedback), whatever the type of application (command dialogue, request dialogue, database querying), the concepts are common and we need a generic meta-model. In order to tend towards task-independent systems, we need to clarify the modules parameterization procedures. In this paper, we focus on the characteristics of a meta-model designed to represent meaning in linguistic and multimodal applications. This meta-model is called MMIL for MultiModal Interface Language, and has first been specified in the framework of the IST MIAMM European project. What we want to test here is how relevant is MMIL for a completely different context (a different task, a different interaction type, a different linguistic domain). We detail the exploitation of MMIL in the framework of the IST OZONE European project, and we draw the conclusions on the role of MMIL in the parameterization of task-independent dialogue managers. @@ -1659,26 +1659,26 @@ braasch-olsen-2004-sto - KalliopiZervanou - JohnMcNaught + KalliopiZervanou + JohnMcNaught A Domain-Independent Approach to <fixed-case>IE</fixed-case> Rule Development http://www.lrec-conf.org/proceedings/lrec2004/pdf/355.pdf A key element for the extraction of information in a natural language document is a set of shallow text analysis rules, which are typically based on pre-defined linguistic patterns. Current Information Extraction research aims at the automatic or semi-automatic acquisition of these rules. Within this research framework, we consider in this paper the potential for acquiring generic extraction patterns. Our research is based on the hypothesis that, terms (the linguistic representation of concepts in a specialised domain) and Named Entities (the names of persons, organisations and dates of importance in the text) can together be considered as the basic semantic entities of textual information and can therefore be used as a basis for the conceptual representation of domain specific texts and the definition of what constitutes an information extraction template in linguistic terms. The extraction patterns discovered by this approach involve significant associations of these semantic entities with verbs and they can subsequently be translated into the grammar formalism of choice. zervanou-mcnaught-2004-domain - LaurenceDevillers - HélèneMaynard - SophieRosset - PatrickParoubek - KevinMcTait + LaurenceDevillers + HélèneMaynard + SophieRosset + PatrickParoubek + KevinMcTait D.Mostefa - KhalidChoukri + KhalidChoukri LaurentCharnay - CarolineBousquet + CarolineBousquet NadineVigouroux - FrédéricBéchet - LaurentRomary + FrédéricBéchet + LaurentRomary Jean-YvesAntoine J.Villaneau MyriamVergnes @@ -1690,19 +1690,19 @@ EmanuelaCresti - Fernanda Bacelardo Nascimento - Antonio MorenoSandoval - JeanVeronis + Fernanda Bacelardo Nascimento + Antonio MorenoSandoval + JeanVeronis PhilippeMartin - KhalidChoukri + KhalidChoukri The <fixed-case>C</fixed-case>-<fixed-case>ORAL</fixed-case>-<fixed-case>ROM</fixed-case> <fixed-case>CORPUS</fixed-case>. A Multilingual Resource of Spontaneous Speech for <fixed-case>R</fixed-case>omance Languages http://www.lrec-conf.org/proceedings/lrec2004/pdf/357.pdf The C-ORAL-ROM project has delivered a multilingual corpus of spontaneous speech for the main romance languages (Italian, French, Portuguese and Spanish). The collection aims to represent the variety of speech acts performed in everyday language and to enable the description of prosodic and syntactic structures in the four romance languages. Sampling criteria are defined in a corpus design scheme. C-ORAL-ROM adopts two different sampling strategies, one for the formal and one for the informal part: While a set of typical domains of application is selected to document the formal use of language, the informal part documents speech variation using parameters referring to the event’s structure (dialogue vs. monologue) and the sociological domain of use (family-private vs public). The four romance corpora are tagged with respect to terminal and non terminal prosodic breaks. Terminal breaks are assumed to be the more relevant cues for the identification of relevant linguistic domains in spontaneous speech (utterances). Relations with other concurrent criteria are discussed. The multimedia storage of the C-ORAL-ROM corpus is based on this principle; each textual string ending with a terminal break is aligned, through the Win Pitch speech software, to its acoustic counterpart, generating the data base of all utterances. cresti-etal-2004-c - Bodil NistrupMadsen - Hanne ErdmanThomsen + Bodil NistrupMadsen + Hanne ErdmanThomsen CarlVikner Principles of a System for Terminological Concept Modelling http://www.lrec-conf.org/proceedings/lrec2004/pdf/358.pdf @@ -1711,8 +1711,8 @@ ChristopheVan Bael - HelmerStrik - Henkvan den Heuvel + HelmerStrik + Henkvan den Heuvel On the Usefulness of Large Spoken Language Corpora for Linguistic Research http://www.lrec-conf.org/proceedings/lrec2004/pdf/359.pdf In the past, fundamental linguistic research was typically conducted on small data sets that were handcrafted for the specific research at hand. However, from the eighties onwards, many large spoken language corpora have become available. This study investigates the usefulness of large multi-purpose spoken language corpora for fundamental linguistic research. A research task was designed in which we tried to capture the major pronunciation differences between three speech styles in context-sensitive re-write rules at the phone level. These re-write rules were extracted from the alignments of both a manual phonetic transcription and an automatic phonetic transcription with a canonical reference transcription of the same material. @@ -1736,7 +1736,7 @@ bartsch-2004-annotating - ConstantinOrăsan + ConstantinOrăsan ViktorPekar LauraHasler A Comparison of Summarisation Methods Based on Term Specificity Estimation @@ -1772,7 +1772,7 @@ MorenaDanieli - Juan MaríaGarrido + Juan MaríaGarrido MassimoMoneglia AndreaPanizza SilviaQuazza @@ -1782,16 +1782,16 @@ danieli-etal-2004-evaluation - MajaPopović - HermannNey + MajaPopović + HermannNey Towards the Use of Word Stems and Suffixes for Statistical Machine Translation http://www.lrec-conf.org/proceedings/lrec2004/pdf/372.pdf popovic-ney-2004-towards MatthiasEck - StephanVogel - AlexWaibel + StephanVogel + AlexWaibel Language Model Adaptation for Statistical Machine Translation Based on Information Retrieval http://www.lrec-conf.org/proceedings/lrec2004/pdf/374.pdf eck-etal-2004-language @@ -1805,8 +1805,8 @@ CarlosAmaral DominiqueLaurent - AndréMartins - AfonsoMendes + AndréMartins + AfonsoMendes CláudiaPinto Design and Implementation of a Semantic Search Engine for <fixed-case>P</fixed-case>ortuguese http://www.lrec-conf.org/proceedings/lrec2004/pdf/378.pdf @@ -1814,31 +1814,31 @@ RichardCampbell - EricRingger + EricRingger Converting Treebank Annotations to Language Neutral Syntax http://www.lrec-conf.org/proceedings/lrec2004/pdf/380.pdf campbell-ringger-2004-converting YalinaAlphonse - PierretteBouillon + PierretteBouillon Methodology For Building Thematic Indexes In Medicine For <fixed-case>F</fixed-case>rench http://www.lrec-conf.org/proceedings/lrec2004/pdf/381.pdf alphonse-bouillon-2004-methodology - CarmenGarcia-Mateo - JavierDieguez-Tirado - LauraDocio-Fernandez - AntonioCardenal-Lopez + CarmenGarcia-Mateo + JavierDieguez-Tirado + LauraDocio-Fernandez + AntonioCardenal-Lopez <fixed-case>T</fixed-case>ranscrigal: A Bilingual System for Automatic Indexing of Broadcast News http://www.lrec-conf.org/proceedings/lrec2004/pdf/382.pdf garcia-mateo-etal-2004-transcrigal - ArantzaDíaz de Ilarraza + ArantzaDíaz de Ilarraza AitzpeaGarmendia - MaiteOronoz + MaiteOronoz <fixed-case>A</fixed-case>bar-<fixed-case>H</fixed-case>itz: An Annotation Tool for the <fixed-case>B</fixed-case>asque Dependency Treebank http://www.lrec-conf.org/proceedings/lrec2004/pdf/383.pdf diaz-de-ilarraza-etal-2004-abar @@ -1852,15 +1852,15 @@ LeoWanner - Margarita AlonsoRamos - AntoniaMartí + Margarita AlonsoRamos + AntoniaMartí Enriching the <fixed-case>S</fixed-case>panish <fixed-case>E</fixed-case>uro<fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et by Collocations http://www.lrec-conf.org/proceedings/lrec2004/pdf/386.pdf wanner-etal-2004-enriching - Charles J.Fillmore - Collin F.Baker + Charles J.Fillmore + Collin F.Baker HiroakiSato <fixed-case>F</fixed-case>rame<fixed-case>N</fixed-case>et as a “Net” http://www.lrec-conf.org/proceedings/lrec2004/pdf/388.pdf @@ -1869,7 +1869,7 @@ AlfonsoOrtega FedericoSukno - EduardoLLeida + EduardoLLeida AlejandroFrangi AntonioMiguel LuisBuera @@ -1879,10 +1879,10 @@ ortega-etal-2004-av - Robert S.Melvin + Robert S.Melvin WinMay - ShrikanthNarayanan - PanayiotisGeorgiou + ShrikanthNarayanan + PanayiotisGeorgiou ShadiGanjavi Creation of a Doctor-Patient Dialogue Corpus Using Standardized Patients http://www.lrec-conf.org/proceedings/lrec2004/pdf/391.pdf @@ -1891,14 +1891,14 @@ BrianMacWhinney StevenBird - ChristopherCieri + ChristopherCieri CraigMartell <fixed-case>T</fixed-case>alkbank: Building an Open Unified Multimodal Database of Communicative Interaction http://www.lrec-conf.org/proceedings/lrec2004/pdf/392.pdf macwhinney-etal-2004-talkbank - Robert S.Belvin + Robert S.Belvin SusanneRiehemann KristinPrecoda A Fine-Grained Evaluation Method for Speech-to-Speech Machine Translation Using Concept Annotations @@ -1906,17 +1906,17 @@ belvin-etal-2004-fine - David M.de Matos - RicardoRibeiro - Nuno J.Mamede + David M.de Matos + RicardoRibeiro + Nuno J.Mamede Rethinking Reusable Resources http://www.lrec-conf.org/proceedings/lrec2004/pdf/395.pdf de-matos-etal-2004-rethinking - AdamMeyers + AdamMeyers RuthReeves - CatherineMacleod + CatherineMacleod RachelSzekely VeronikaZielinska BrianYoung @@ -1925,13 +1925,13 @@ meyers-etal-2004-cross - AdamMeyers + AdamMeyers RuthReeves - CatherineMacleod + CatherineMacleod RachelSzekely VeronikaZielinska BrianYoung - RalphGrishman + RalphGrishman Annotating Noun Argument Structure for <fixed-case>N</fixed-case>om<fixed-case>B</fixed-case>ank http://www.lrec-conf.org/proceedings/lrec2004/pdf/398.pdf meyers-etal-2004-annotating @@ -1970,7 +1970,7 @@ palmer-etal-2004-utilization - YoshidaKyôsuke + KyôsukeYoshida HashimotoTaiichi TokunagaTakenobu TanakaHozumi @@ -1982,7 +1982,7 @@ TokunagaTakenobu KoyamaTomofumi - SaitoSuguru + SuguruSaito NakajimaMasayuki Classification of <fixed-case>J</fixed-case>apanese Spatial Nouns http://www.lrec-conf.org/proceedings/lrec2004/pdf/405.pdf @@ -2001,25 +2001,25 @@ sanfilippo-etal-2004-meaningful - V. FinleyLacatusu - Steven J.Maiorano - Sanda M.Harabagiu + V. FinleyLacatusu + Steven J.Maiorano + Sanda M.Harabagiu Multi-Document Summarization Using Multiple-Sequence Alignment http://www.lrec-conf.org/proceedings/lrec2004/pdf/408.pdf This paper describes a novel clustering-based text summarization system that uses Multiple Sequence Alignment to improve the alignment of sentences within topic clusters. While most current clustering-based summarization systems base their summaries only on the common information contained in a collection of highly-related sentences, our system constructs more informative summaries that incorporate both the redundant and unique contributions of the sentences in the cluster. When evaluated using ROUGE, the summaries produced by our system represent a substantial improvement over the baseline, which is at 63% of the human performance. lacatusu-etal-2004-multi - JahnaOtterbacher - DragomirRadev + JahnaOtterbacher + DragomirRadev <fixed-case>R</fixed-case>evision<fixed-case>B</fixed-case>ank: A Resource for Revision-based Multi-document Summarization and Evaluation http://www.lrec-conf.org/proceedings/lrec2004/pdf/409.pdf Multi-document summaries produced via sentence extraction often suffer from a number of cohesion problems, including dangling anaphora, sudden shifts in topic and incorrect or awkward chronological ordering. Therefore, the development of an automated revision process to correct such problems is a research area of current interest. We present the RevisionBank, a corpus of 240 extractive, multi-document summaries that have been manually revised to promote cohesion. The summaries were revised by six linguistic students using a constrained set of revision operations that we previously developed. In the current paper, we describe the process of developing a taxonomy of cohesion problems and corrective revision operators that address such problems, as well as an annotation schema for our corpus. Finally, we discuss how our taxonomy and corpus can be used for the study of revision-based multi-document summarization as well as for summary evaluation. otterbacher-radev-2004-revisionbank - SandraAluisio - Gisele MontilhaPinheiro + SandraAluisio + Gisele MontilhaPinheiro Aline M. P.Manfrin Leandro H. M.de Oliveira Luiz C.Genoves, Jr. @@ -2030,8 +2030,8 @@ aluisio-etal-2004-lacio - DragomirRadev - JahnaOtterbacher + DragomirRadev + JahnaOtterbacher ZhuZhang <fixed-case>CST</fixed-case> Bank: A Corpus for the Study of Cross-document Structural Relationships http://www.lrec-conf.org/proceedings/lrec2004/pdf/411.pdf @@ -2065,14 +2065,14 @@ Sun-MeeBae - Key-SunChoi + Key-SunChoi Lexical Analysis of Agglutinative Languages Using a Dictionary of Lemmas and Lexical Transducers http://www.lrec-conf.org/proceedings/lrec2004/pdf/417.pdf This paper presents a simple method for performing a lexical analysis of agglutinative languages like Korean, which have a heavy morphology. Especially, for nouns and adverbs with regular morphological modifications and/or high productivity, we do not need to artificially construct huge dictionaries of all inflected forms of lemmas. To construct a dictionary of lemmas and lexical transducers, first, we construct automatically a dictionary of all inflected forms from KAIST POS-Tagged Corpus. Secondly, we separate the party of lemmas and one of sequences of inflectional suffixes. Thirdly, we describe their lexical transducers (i.e., morphological rules) to recognize all inflected forms of lemmas for nouns and adverbs according to the combinatorial restrictions between lemmas and their inflectional suffixes. Finally, we evaluate the advantages of this method. bae-choi-2004-lexical - RitaNüebel + RitaNüebel Evaluation and Adaptation of a Specialised Language Checking Tool for Non-specialised Machine Translation and Non-expert <fixed-case>MT</fixed-case> Users for Multi-lingual Telecooperation http://www.lrec-conf.org/proceedings/lrec2004/pdf/418.pdf Style guides or writing recommendations play an important role in the field of technical documentation production, e.g. in industrial contexts. Also, writing recommendations are used in technical contexts together with machine translation (MT) in order to circumvent the MT system's weaknesses. This paper describes the evaluation and adaptation of a language checker deployed in the project int.unity In this project, both MT and a specialised language checker were adapted to the requirements of non-expert users and a non-technical domain. The language technology was integrated with the groupware platform BSCW to support the multi-lingual communication of geographically distributed teams concerned with trade union work. The users' languages were either German or English, i.e. the users were monolingual. We chose linguatec's server version of Personal Translator 2004 MT system for the German<->English translations. The language checker CLAT for German and English has been developed at IAI. It is used by technical authors to support the production of high-quality technical documentation. The CLAT core system was adapted and extended in order to match the new requirements imposed by both the user profile and the subsequent MT application. In this paper, the focus will be on the assessment and adaptation of style rules for German. @@ -2092,7 +2092,7 @@ lavelli-etal-2004-critical - JerHayes + JerHayes TonyVeale NunoSeco Enriching <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et Via Generative Metonymy and Creative Polysemy @@ -2103,8 +2103,8 @@ TomLaureys GuyDe Pauw - HugoVan hamme - WalterDaelemans + HugoVan hamme + WalterDaelemans DirkVan Compernolle Evaluation and Adaptation of the Celex <fixed-case>D</fixed-case>utch Morphological Database http://www.lrec-conf.org/proceedings/lrec2004/pdf/421.pdf @@ -2113,8 +2113,8 @@ LiTang - DonghongJi - LingpengYang + DonghongJi + LingpengYang YuNie A Model of Semantic Representations Analysis for <fixed-case>C</fixed-case>hinese Sentences http://www.lrec-conf.org/proceedings/lrec2004/pdf/422.pdf @@ -2137,7 +2137,7 @@ quirk-2004-training - Sonja E.Bosch + Sonja E.Bosch LaurettePretorius Software Tools for Morphological Tagging of <fixed-case>Z</fixed-case>ulu Corpora and Lexicon Development http://www.lrec-conf.org/proceedings/lrec2004/pdf/427.pdf @@ -2187,7 +2187,7 @@ charoenporn-etal-2004-open - LambrosKranias + LambrosKranias AnnaSamiotou Automatic Translation Memory Fuzzy Match Post-Editing: A Step Beyond Traditional <fixed-case>TM</fixed-case>/<fixed-case>MT</fixed-case> Integration http://www.lrec-conf.org/proceedings/lrec2004/pdf/435.pdf @@ -2216,11 +2216,11 @@ novak-etal-2004-combining - BalázsKis - BegoñaVillada + BalázsKis + BegoñaVillada GosseBouma GáborUgray - TamásBíró + TamásBíró GáborPohl JohnNerbonne A New Approach to the Corpus-based Statistical Investigation of <fixed-case>H</fixed-case>ungarian Multi-word Lexemes @@ -2228,7 +2228,7 @@ kis-etal-2004-new - M. Begoña VilladaMoirón + M. Begoña VilladaMoirón Discarding Noise in an Automatically Acquired Lexicon of Support verb Constructions http://www.lrec-conf.org/proceedings/lrec2004/pdf/442.pdf We applied data-driven methods to carry out automatic acquisition of Dutch prepositional support verb constructions (SVCs) in corpora (e.g., iets in de gaten houden (``keep an eye on something'')). This paper addresses the question whether linguistic diagnostics help to discard noise from the nbest lists and how to (semi-)automatically apply such linguistic diagnostics to parsed corpora. We show that some of the linguistic diagnostics proposed in Hollebrandse (1993) effectively identify SVCs and contribute a modest error rate decrease. @@ -2236,7 +2236,7 @@ FranciscoNevado - FranciscoCasacuberta + FranciscoCasacuberta JosuLanda Translation Memories Enrichment by Statistical Bilingual Segmentation http://www.lrec-conf.org/proceedings/lrec2004/pdf/443.pdf @@ -2264,7 +2264,7 @@ NellekeOostdijk - LouBoves + LouBoves Using Large Multi-purpose Corpora for Specific Research Questions: Discourse Phenomena Related to Wh-questions in the Spoken <fixed-case>D</fixed-case>utch Corpus http://www.lrec-conf.org/proceedings/lrec2004/pdf/449.pdf In this paper, we investigate whether a dataset derived from a multi-purpose corpus such as the Spoken Dutch Corpus may be considered appropriate for developing a taxonomy of wh-questions, and a model of the way in which these questions are integrated in spoken discourse. We compare the results obtained from the Spoken Dutch Corpus with a similar analysis of a large random collection of FAQs from the internet. We find substantial differences between the questions in spoken discourse and FAQs. Therefore, it may not be trivial to use a general purpose corpus as a starting point for developing models for human-computer interaction. @@ -2279,24 +2279,24 @@ mariani-badii-2004-methods - PeterWittenburg + PeterWittenburg HeidiJohnson MarkusBuchhorn - HennieBrugman - DaanBroeder + HennieBrugman + DaanBroeder Architecture for Distributed Language Resource Management and Archiving http://www.lrec-conf.org/proceedings/lrec2004/pdf/451.pdf An architecture is presented that provides an integrated framework for managing, archiving and accessing language resources. This architecture was discussed in the DELAMAN network – a world-wide network of archives holding material about endangered languages. Such a framework will be built upon a metadata infrastructure, a mechanism to resolve unique resource identifiers, user and access rights management components. These components are closely related and have to be based on redundant and distributed services. For all these components existing middleware seems to be available, however, it has to be checked how they can interact with each other. wittenburg-etal-2004-architecture - HanneFersøe + HanneFersøe ElviiraHartikainen - Henkvan den Heuvel + Henkvan den Heuvel GiulioMaltese - AsuncíonMoreno + AsuncíonMoreno ShaunieShammass - UteZiegenhain + UteZiegenhain Creation and Validation of Large Lexica for Speech-to-Speech Translation Purposes http://www.lrec-conf.org/proceedings/lrec2004/pdf/452.pdf This paper presents specifications and requirements for creation and validation of large lexica that are needed in automatic Speech Recognition (ASR), Text-to-Speech (TTS) and statistical Speech-to-Speech Translation (SST) systems. The prepared language resources are created and validated within the scope of the EU-project LC-STAR (Lexica and Corpora for Speech-to-Speech Translation Components) during years 2002-2005. Large lexica consisting of phonetic, suprasegmental and morpho-syntactic content will be provided with well-documented specifications for 13 languages. A short summary of the LC-STAR project itself is presented. Overview about the specification for the corpora collection and word extraction as well as the specification and format of the lexica are presented. Particular attention is paid to the validation of the produced lexica and the lessons learnt during pre-validation. The created and validated language resources will be available via ELRA/ELDA. @@ -2313,8 +2313,8 @@ PanagiotisZervas ManolisMaragoudakis - NikosFakotakis - GeorgeKokkinakis + NikosFakotakis + GeorgeKokkinakis Learning to Predict Pitch Accents Using <fixed-case>B</fixed-case>ayesian Belief Networks for <fixed-case>G</fixed-case>reek Language http://www.lrec-conf.org/proceedings/lrec2004/pdf/458.pdf zervas-etal-2004-learning @@ -2328,10 +2328,10 @@ more-etal-2004-grammar - PeterWittenburg - GregGulrajani - DaanBroeder - MarcusUneson + PeterWittenburg + GregGulrajani + DaanBroeder + MarcusUneson Cross-Disciplinary Integration of Metadata Descriptions http://www.lrec-conf.org/proceedings/lrec2004/pdf/462.pdf wittenburg-etal-2004-cross @@ -2374,10 +2374,10 @@ gevaudan-wiebel-2004-dynamic - LauraAlonso - MariaFuentes + LauraAlonso + MariaFuentes MarcMassot - HoracioRodríguez + HoracioRodríguez Re-using High-quality Resources for Continued Evaluation of Automated Summarization Systems http://www.lrec-conf.org/proceedings/lrec2004/pdf/472.pdf alonso-etal-2004-using @@ -2389,18 +2389,18 @@ rossler-2004-corpus - HennieBrugman + HennieBrugman OnnoCrasborn - AlbertRussel + AlbertRussel Collaborative Annotation of Sign Language Data with Peer-to-Peer Technology http://www.lrec-conf.org/proceedings/lrec2004/pdf/473.pdf brugman-etal-2004-collaborative - GlòriaVázquez - Ana FernándezMontraveta - IreneCastellón - LauraAlonso + GlòriaVázquez + Ana FernándezMontraveta + IreneCastellón + LauraAlonso Semantic Categorization of <fixed-case>S</fixed-case>panish Se-constructions http://www.lrec-conf.org/proceedings/lrec2004/pdf/474.pdf vazquez-etal-2004-semantic @@ -2408,22 +2408,22 @@ AngeloDalli ValentinTablan - KalinaBontcheva - YorickWilks - DaanBroeder - HennieBrugman - PeterWittenburg + KalinaBontcheva + YorickWilks + DaanBroeder + HennieBrugman + PeterWittenburg Web Services Architecture for Language Resources http://www.lrec-conf.org/proceedings/lrec2004/pdf/475.pdf dalli-etal-2004-web - DaanBroeder + DaanBroeder ThierryDeclerck - LaurentRomary - MarkusUneson + LaurentRomary + MarkusUneson SvenStrömqvist - PeterWittenburg + PeterWittenburg A Large Metadata Domain of Language Resources http://www.lrec-conf.org/proceedings/lrec2004/pdf/478.pdf broeder-etal-2004-large @@ -2431,44 +2431,44 @@ TamásGröbler GáborHodász - BalázsKis + BalázsKis <fixed-case>M</fixed-case>eta<fixed-case>M</fixed-case>orpho <fixed-case>TM</fixed-case>: A Rule-Based Translation Corpus http://www.lrec-conf.org/proceedings/lrec2004/pdf/479.pdf grobler-etal-2004-metamorpho - HennieBrugman - AlbertRussel + HennieBrugman + AlbertRussel Annotating Multi-media/Multi-modal Resources with <fixed-case>ELAN</fixed-case> http://www.lrec-conf.org/proceedings/lrec2004/pdf/480.pdf brugman-russel-2004-annotating - AgnèsTutin + AgnèsTutin MeriamHaddara - RuslanMitkov - ConstantinOrasan + RuslanMitkov + ConstantinOrasan Annotation of Anaphoric Expressions in an Aligned Bilingual Corpus http://www.lrec-conf.org/proceedings/lrec2004/pdf/482.pdf tutin-etal-2004-annotation TylmanUle - KirilSimov + KirilSimov Unexpected Productions May Well be Errors http://www.lrec-conf.org/proceedings/lrec2004/pdf/483.pdf ule-simov-2004-unexpected AvikSarkar - AnneDe Roeck + AnneDe Roeck A Framework for Evaluating the Suitability of Non-<fixed-case>E</fixed-case>nglish Corpora for Language Engineering http://www.lrec-conf.org/proceedings/lrec2004/pdf/485.pdf sarkar-de-roeck-2004-framework AnnaSamiotou - LambrosKranias + LambrosKranias DimitriosKokkinakis Intelligent Building of Language Resources for <fixed-case>HLT</fixed-case> Applications http://www.lrec-conf.org/proceedings/lrec2004/pdf/486.pdf @@ -2483,18 +2483,18 @@ akiba-etal-2004-collecting - HristoTanev + HristoTanev MilenKouylekov - MatteoNegri + MatteoNegri BonaventuraCoppola - BernardoMagnini + BernardoMagnini Multilingual Pattern Libraries for Question Answering: a Case Study for Definition Questions http://www.lrec-conf.org/proceedings/lrec2004/pdf/488.pdf tanev-etal-2004-multilingual MichaelDaum - Kilian A.Foth + Kilian A.Foth WolfgangMenzel Automatic Transformation of Phrase Treebanks to Dependency Trees http://www.lrec-conf.org/proceedings/lrec2004/pdf/489.pdf @@ -2528,7 +2528,7 @@ slavcheva-2004-verb - WalterKasper + WalterKasper JörgSteffen JakubPiskorski PaulBuitelaar @@ -2540,14 +2540,14 @@ S.R.Deepa KalikaBali A.G.Ramakrishnan - Partha PratimTalukdar + Partha PratimTalukdar Automatic Generation of Compound Word Lexicon for <fixed-case>H</fixed-case>indi Speech Synthesis http://www.lrec-conf.org/proceedings/lrec2004/pdf/501.pdf deepa-etal-2004-automatic SaifAhmad - Paulo C. F.de Oliveira + Paulo C. F.de Oliveira KhurshidAhmad Summarization of Multimodal Information http://www.lrec-conf.org/proceedings/lrec2004/pdf/502.pdf @@ -2564,7 +2564,7 @@ SydBauman AlejandroBia LouBurnard - TomažErjavec + TomažErjavec ChristineRuotolo SusanSchreibman Migrating Language Resources from <fixed-case>SGML</fixed-case> to <fixed-case>XML</fixed-case>: The Text Encoding Initiative Recommendations @@ -2572,16 +2572,16 @@ bauman-etal-2004-migrating - Niels OleBernsen - LailaDybkjær + Niels OleBernsen + LailaDybkjær SvendKiilerich Evaluating Conversation with Hans Christian Andersen http://www.lrec-conf.org/proceedings/lrec2004/pdf/505.pdf bernsen-etal-2004-evaluating - CatiaCucchiarini - ElisabethD’Halleweyn + CatiaCucchiarini + ElisabethD’Halleweyn The New <fixed-case>D</fixed-case>utch-<fixed-case>F</fixed-case>lemish <fixed-case>HLT</fixed-case> Programme: a Concerted Effort to Stimulate the <fixed-case>HLT</fixed-case> Sector http://www.lrec-conf.org/proceedings/lrec2004/pdf/506.pdf cucchiarini-dhalleweyn-2004-new @@ -2596,14 +2596,14 @@ RachelAires AlineManfrin - SandraAluísio + SandraAluísio DianaSantos What is my Style? Using Stylistic Features of <fixed-case>P</fixed-case>ortuguese Web Texts to Classify Web Pages According to Users’ Needs http://www.lrec-conf.org/proceedings/lrec2004/pdf/508.pdf aires-etal-2004-style - MarcoBaroni + MarcoBaroni SilviaBernardini <fixed-case>B</fixed-case>oot<fixed-case>C</fixed-case>a<fixed-case>T</fixed-case>: Bootstrapping Corpora and Terms from the Web http://www.lrec-conf.org/proceedings/lrec2004/pdf/509.pdf @@ -2622,8 +2622,8 @@ barbu-2004-word - DaanBroeder - PeterWittenburg + DaanBroeder + PeterWittenburg OnnoCrasborn Using Profiles for <fixed-case>IMDI</fixed-case> Metadata Creation http://www.lrec-conf.org/proceedings/lrec2004/pdf/513.pdf @@ -2636,25 +2636,25 @@ morth-2004-rethinking - DanielFerrés + DanielFerrés MarcMassot - MuntsaPadró - HoracioRodríguez - JordiTurmo + MuntsaPadró + HoracioRodríguez + JordiTurmo Automatic Building Gazetteers of Co-referring Named Entities http://www.lrec-conf.org/proceedings/lrec2004/pdf/516.pdf ferres-etal-2004-automatic-building NildaRuimy - PierretteBouillon + PierretteBouillon BrunoCartoni Semi-Automatic Derivation of a <fixed-case>F</fixed-case>rench Lexicon from <fixed-case>CLIPS</fixed-case> http://www.lrec-conf.org/proceedings/lrec2004/pdf/517.pdf ruimy-etal-2004-semi - NancyIde + NancyIde KeithSuderman The <fixed-case>A</fixed-case>merican National Corpus First Release http://www.lrec-conf.org/proceedings/lrec2004/pdf/518.pdf @@ -2669,22 +2669,22 @@ evert-etal-2004-identifying - LailaDybkjær - Niels OleBernse + LailaDybkjær + Niels OleBernse Towards General-Purpose Annotation Tools – How Far Are We Today? http://www.lrec-conf.org/proceedings/lrec2004/pdf/520.pdf dybkjaer-bernse-2004-towards - Uwe D.Reichel + Uwe D.Reichel KarlWeilhammer Automated Morphological Segmentation and Evaluation http://www.lrec-conf.org/proceedings/lrec2004/pdf/521.pdf reichel-weilhammer-2004-automated - NancyIde - LaurentRomary + NancyIde + LaurentRomary A Registry of Standard Data Categories for Linguistic Annotation http://www.lrec-conf.org/proceedings/lrec2004/pdf/522.pdf ide-romary-2004-registry @@ -2705,7 +2705,7 @@ marinelli-etal-2004-building - PéterHalácsy + PéterHalácsy AndrásKornai LászlóNémeth AndrásRung @@ -2724,8 +2724,8 @@ fujii-etal-2004-test - YukaTateisi - Jun-ichiTsujii + YukaTateisi + Jun-ichiTsujii Part-of-Speech Annotation of Biology Research Abstracts http://www.lrec-conf.org/proceedings/lrec2004/pdf/528.pdf tateisi-tsujii-2004-part @@ -2733,7 +2733,7 @@ BožoBekavac PetyaOsenova - KirilSimov + KirilSimov MarkoTadić Making Monolingual Corpora Comparable: a Case Study of <fixed-case>B</fixed-case>ulgarian and <fixed-case>C</fixed-case>roatian http://www.lrec-conf.org/proceedings/lrec2004/pdf/529.pdf @@ -2742,21 +2742,21 @@ LinaHenriksen BartJongejan - BenteMaegaard + BenteMaegaard Corporate Voice, Tone of Voice and Controlled Language Techniques http://www.lrec-conf.org/proceedings/lrec2004/pdf/530.pdf henriksen-etal-2004-corporate - NikosFakotakis + NikosFakotakis <fixed-case>C</fixed-case>ypriot Speech Database: Data Collection and <fixed-case>G</fixed-case>reek to <fixed-case>C</fixed-case>ypriot Dialect Adaptation http://www.lrec-conf.org/proceedings/lrec2004/pdf/531.pdf fakotakis-2004-cypriot - BorjaNavarro - ManuelPalomar - PatricioMartínez-Barco + BorjaNavarro + ManuelPalomar + PatricioMartínez-Barco Automatic Extraction of Syntactic Semantic Patterns for Multilingual Resources http://www.lrec-conf.org/proceedings/lrec2004/pdf/532.pdf navarro-etal-2004-automatic @@ -2771,8 +2771,8 @@ ViktorPekar - RichardEvans - RuslanMitkov + RichardEvans + RuslanMitkov Categorizing Web Pages as a Preprocessing Step for Information Extraction http://www.lrec-conf.org/proceedings/lrec2004/pdf/534.pdf pekar-etal-2004-categorizing @@ -2785,7 +2785,7 @@ ManuelaKunze - DietmarRösner + DietmarRösner Corpus Based Enrichment of <fixed-case>G</fixed-case>erma<fixed-case>N</fixed-case>et Verb Frames http://www.lrec-conf.org/proceedings/lrec2004/pdf/536.pdf kunze-rosner-2004-corpus @@ -2800,31 +2800,31 @@ ThierryDeclerck PaulBuitelaar - NicolettaCalzolari + NicolettaCalzolari AlessandroLenci Towards a Language Infrastructure for the Semantic Web http://www.lrec-conf.org/proceedings/lrec2004/pdf/541.pdf declerck-etal-2004-towards - AlvinMartin + AlvinMartin DavidMiller - MarkPrzybocki - JosephCampbell + MarkPrzybocki + JosephCampbell HirotakaNakasone Conversational Telephone Speech Corpus Collection for the <fixed-case>NIST</fixed-case> Speaker Recognition Evaluation 2004 http://www.lrec-conf.org/proceedings/lrec2004/pdf/542.pdf martin-etal-2004-conversational - StephanVogel + StephanVogel ChristianMonson Augmenting Manual Dictionaries for Statistical Machine Translation Systems http://www.lrec-conf.org/proceedings/lrec2004/pdf/543.pdf vogel-monson-2004-augmenting - ChristianBiemann + ChristianBiemann UweQuasthoff ChristianWolff Linguistic Corpus Search @@ -2832,16 +2832,16 @@ biemann-etal-2004-linguistic - NicolettaCalzolari - KhalidChoukri - MariaGavrilidou - BenteMaegaard + NicolettaCalzolari + KhalidChoukri + MariaGavrilidou + BenteMaegaard PaolaBaroni - HanneFersøe + HanneFersøe AlessandroLenci - ValérieMapelli + ValérieMapelli MonicaMonachini - SteliosPiperidis + SteliosPiperidis <fixed-case>ENABLER</fixed-case> Thematic Network of National Projects: Technical, Strategic and Political Issues of <fixed-case>LR</fixed-case>s http://www.lrec-conf.org/proceedings/lrec2004/pdf/545.pdf calzolari-etal-2004-enabler @@ -2861,7 +2861,7 @@ MartinVolk DominicWiddows BogdanSacaleanu - ŠpelaVintar + ŠpelaVintar StanleyPeters HansUszkoreit Evaluation Resources for Concept-based Cross-Lingual Information Retrieval in the Medical Domain @@ -2869,7 +2869,7 @@ buitelaar-etal-2004-evaluation - ChrisBiemann + ChrisBiemann StefanBordag UweQuasthoff Automatic Acquisition of Paradigmatic Relations Using Iterated Co-occurrences @@ -2891,21 +2891,21 @@ DorotaIskra RainerSiemund JamalBorno - AsuncionMoreno + AsuncionMoreno OssamaEmam - KhalidChoukri + KhalidChoukri OrenGedge HerbertTropf - AlbinoNogueiras + AlbinoNogueiras ImedZitouni AnastasiosTsopanoglou - NikosFakotakis + NikosFakotakis <fixed-case>O</fixed-case>rien<fixed-case>T</fixed-case>el - Telephony Databases Across <fixed-case>N</fixed-case>orthern <fixed-case>A</fixed-case>frica and the <fixed-case>M</fixed-case>iddle <fixed-case>E</fixed-case>ast http://www.lrec-conf.org/proceedings/lrec2004/pdf/552.pdf iskra-etal-2004-orientel - HanneFersøe + HanneFersøe MonicaMonachini <fixed-case>ELRA</fixed-case> Validation Methodology and Standard Promotion for Linguistic Resources http://www.lrec-conf.org/proceedings/lrec2004/pdf/553.pdf @@ -2919,38 +2919,38 @@ biber-breiteneder-2004-aac - DianaBinnenpoorte - CatiaCucchiarini - HelmerStrik - LouBoves + DianaBinnenpoorte + CatiaCucchiarini + HelmerStrik + LouBoves Improving Automatic Phonetic Transcription of Spontaneous Speech Through Variant-Based Pronunciation Variation Modelling http://www.lrec-conf.org/proceedings/lrec2004/pdf/558.pdf binnenpoorte-etal-2004-improving - MassimoPoesio - Mijail A.Kabadjov + MassimoPoesio + Mijail A.Kabadjov A General-Purpose, Off-the-shelf Anaphora Resolution Module: Implementation and Preliminary Evaluation http://www.lrec-conf.org/proceedings/lrec2004/pdf/559.pdf poesio-kabadjov-2004-general - DonghongJi + DonghongJi LiTang - LingpengYang + LingpengYang Building a Conceptual Graph Bank for <fixed-case>C</fixed-case>hinese Language http://www.lrec-conf.org/proceedings/lrec2004/pdf/561.pdf ji-etal-2004-building - AnneAbeillé + AnneAbeillé NicolasBarrier Enriching a <fixed-case>F</fixed-case>rench Treebank http://www.lrec-conf.org/proceedings/lrec2004/pdf/562.pdf abeille-barrier-2004-enriching - BéatriceDaille + BéatriceDaille SamuelDufour-Kowalski EmmanuelMorin <fixed-case>F</fixed-case>rench-<fixed-case>E</fixed-case>nglish Multi-word Term Alignment Based on Lexical Context Analysis @@ -2975,13 +2975,13 @@ AbdelhadiSoudi - AndreasEisele + AndreasEisele Generating an <fixed-case>A</fixed-case>rabic Full-form Lexicon for Bidirectional Morphology Lookup http://www.lrec-conf.org/proceedings/lrec2004/pdf/567.pdf soudi-eisele-2004-generating - PetrPollák + PetrPollák JanČernocký Orthographic and Phonetic Annotation of Very Large <fixed-case>C</fixed-case>zech Corpora with Quality Assessment http://www.lrec-conf.org/proceedings/lrec2004/pdf/568.pdf @@ -2998,8 +2998,8 @@ ribeiro-etal-2004-inquer - AntónioBranco - JoãoSilva + AntónioBranco + JoãoSilva Evaluating Solutions for the Rapid Development of State-of-the-Art <fixed-case>POS</fixed-case> Taggers for <fixed-case>P</fixed-case>ortuguese http://www.lrec-conf.org/proceedings/lrec2004/pdf/572.pdf branco-silva-2004-evaluating @@ -3012,7 +3012,7 @@ ManolisMaragoudakis - NikosFakotakis + NikosFakotakis <fixed-case>B</fixed-case>ayesian Semantics Incorporation to Web Content for Natural Language Information Retrieval http://www.lrec-conf.org/proceedings/lrec2004/pdf/576.pdf maragoudakis-fakotakis-2004-bayesian @@ -3025,9 +3025,9 @@ IuliaNica - Mª AntòniaMartí - AndrésMontoyo - SoniaVázquez + Mª AntòniaMartí + AndrésMontoyo + SoniaVázquez Enriching <fixed-case>EWN</fixed-case> with Syntagmatic Information by Means of <fixed-case>WSD</fixed-case> http://www.lrec-conf.org/proceedings/lrec2004/pdf/579.pdf nica-etal-2004-enriching @@ -3056,9 +3056,9 @@ JuanFernández MauroCastillo - GermanRigau - JordiAtserias - JordiTurmo + GermanRigau + JordiAtserias + JordiTurmo Automatic Acquisition of Sense Examples Using <fixed-case>E</fixed-case>x<fixed-case>R</fixed-case>etriever http://www.lrec-conf.org/proceedings/lrec2004/pdf/583.pdf fernandez-etal-2004-automatic @@ -3066,8 +3066,8 @@ CvetanaKrstev DuškoVitas - RankaStankoviæ - IvanObradoviæ + RankaStankoviæ + IvanObradoviæ GordanaPavloviæ-Lažetiæ Combining Heterogeneous Lexical Resources http://www.lrec-conf.org/proceedings/lrec2004/pdf/584.pdf @@ -3075,19 +3075,19 @@ Viet-BacLe - Do-DatTran - EricCastelli - LaurentBesacier - Jean-FrançoisSerignat + Do-DatTran + EricCastelli + LaurentBesacier + Jean-FrançoisSerignat Spoken and Written Language Resources for <fixed-case>V</fixed-case>ietnamese http://www.lrec-conf.org/proceedings/lrec2004/pdf/586.pdf le-etal-2004-spoken - AndreiPopescu-Belis + AndreiPopescu-Belis MariaGeorgescul AlexanderClark - SusanArmstrong + SusanArmstrong Building and Using a Corpus of Shallow Dialogue Annotated Meetings http://www.lrec-conf.org/proceedings/lrec2004/pdf/587.pdf popescu-belis-etal-2004-building @@ -3111,7 +3111,7 @@ Jan-TorstenMilde UlrikeGut KatrinErk - SebastianPadó + SebastianPadó Querying Both Time-aligned and Hierarchical Corpora with <fixed-case>NXT</fixed-case> Search http://www.lrec-conf.org/proceedings/lrec2004/pdf/590.pdf heid-etal-2004-querying @@ -3148,8 +3148,8 @@ ha-2004-practical - JesúsGiménez - LluísMàrquez + JesúsGiménez + LluísMàrquez <fixed-case>SVMT</fixed-case>ool: A general <fixed-case>POS</fixed-case> Tagger Generator Based on Support Vector Machines http://www.lrec-conf.org/proceedings/lrec2004/pdf/597.pdf gimenez-marquez-2004-svmtool @@ -3164,17 +3164,17 @@ UlrichCallmeier - AndreasEisele - UlrichSchäfer + AndreasEisele + UlrichSchäfer MelanieSiegel The <fixed-case>D</fixed-case>eep<fixed-case>T</fixed-case>hought Core Architecture Framework http://www.lrec-conf.org/proceedings/lrec2004/pdf/603.pdf callmeier-etal-2004-deepthought - JordiAtserias + JordiAtserias SalvadorCliment - GermanRigau + GermanRigau Towards the Meaning Top Ontology: Sources of Ontological Meaning http://www.lrec-conf.org/proceedings/lrec2004/pdf/604.pdf atserias-etal-2004-towards @@ -3209,15 +3209,15 @@ teixeira-etal-2004-acoustic - LaurentRomary - AmaliaTodirascu + LaurentRomary + AmaliaTodirascu DavidLanglois Experiments on Building Language Resources for Multi-Modal Dialogue Systems http://www.lrec-conf.org/proceedings/lrec2004/pdf/611.pdf romary-etal-2004-experiments - DavidDay + DavidDay ChadMcHenry RobynKozierok LaurelRiek @@ -3228,9 +3228,9 @@ RayClifford NeilGranoien - DouglasJones + DouglasJones WadeShen - CliffordWeinstein + CliffordWeinstein The Effect of Text Difficulty on Machine Translation Performance – A Pilot Study with <fixed-case>ILR</fixed-case>-Rated Texts in <fixed-case>S</fixed-case>panish, <fixed-case>F</fixed-case>arsi, <fixed-case>A</fixed-case>rabic, <fixed-case>R</fixed-case>ussian and <fixed-case>K</fixed-case>orean http://www.lrec-conf.org/proceedings/lrec2004/pdf/613.pdf clifford-etal-2004-effect @@ -3263,8 +3263,8 @@ EleniMiltsakaki RashmiPrasad - AravindJoshi - BonnieWebber + AravindJoshi + BonnieWebber The <fixed-case>P</fixed-case>enn <fixed-case>D</fixed-case>iscourse <fixed-case>T</fixed-case>reebank http://www.lrec-conf.org/proceedings/lrec2004/pdf/618.pdf miltsakaki-etal-2004-penn @@ -3272,7 +3272,7 @@ VioletaSeretan LukaNerima - EricWehrli + EricWehrli Using the Web as a Corpus for the Syntactic-Based Collocation Identification http://www.lrec-conf.org/proceedings/lrec2004/pdf/619.pdf seretan-etal-2004-using @@ -3286,7 +3286,7 @@ HenkHarkema - RobertGaizauskas + RobertGaizauskas MarkHepple NeilDavis YikunGuo @@ -3303,7 +3303,7 @@ hemsen-2004-evaluation - ChristianBiemann + ChristianBiemann StefanBordag UweQuasthoff ChristianWolff @@ -3315,12 +3315,12 @@ ElisabethPinto DelphineCharlet HélèneFrançois - DjamelMostefa - OlivierBoëffard + DjamelMostefa + OlivierBoëffard DominiqueFohr OdileMella FrédéricBimbot - KhalidChoukri + KhalidChoukri YannPhilip FrancisCharpentier Development of New Telephone Speech Databases for <fixed-case>F</fixed-case>rench: the <fixed-case>NEOLOGOS</fixed-case> Project @@ -3329,7 +3329,7 @@ KarelPala - PavelSmrz + PavelSmrz Top Ontology as a Tool for Semantic Role Tagging http://www.lrec-conf.org/proceedings/lrec2004/pdf/626.pdf pala-smrz-2004-top @@ -3337,23 +3337,23 @@ ArgyriosVasilakopoulos MicheleBersani - William J.Black + William J.Black A Suite of Tools for Marking Up Textual Data for Temporal Text Mining Scenarios http://www.lrec-conf.org/proceedings/lrec2004/pdf/627.pdf vasilakopoulos-etal-2004-suite - AnneDe Roeck + AnneDe Roeck AvikSarkar - PaulGarthwaite + PaulGarthwaite Frequent Term Distribution Measures for Dataset Profiling http://www.lrec-conf.org/proceedings/lrec2004/pdf/629.pdf de-roeck-etal-2004-frequent - JosefPsutka + JosefPsutka PavelIrcing - JanHajič + JanHajič VlastaRadová Josef V.Psutka William J.Byrne @@ -3363,15 +3363,15 @@ psutka-etal-2004-issues - AsunciónGómez-Pérez - M. CarmenSuárez-Figueroa + AsunciónGómez-Pérez + M. CarmenSuárez-Figueroa Ontology Evaluation Functionalities of <fixed-case>RDF</fixed-case>(<fixed-case>S</fixed-case>),<fixed-case>DAML</fixed-case>+<fixed-case>OIL</fixed-case>, and <fixed-case>OWL</fixed-case> Parsers and Ontology Platforms http://www.lrec-conf.org/proceedings/lrec2004/pdf/631.pdf gomez-perez-suarez-figueroa-2004-ontology AnnaSinopalnikova - PavelSmrz + PavelSmrz Word Association Norms as a Unique Supplement of Traditional Language Resources http://www.lrec-conf.org/proceedings/lrec2004/pdf/632.pdf sinopalnikova-smrz-2004-word @@ -3401,7 +3401,7 @@ MalvinaNissim ShipraDingare JeanCarletta - MarkSteedman + MarkSteedman An Annotation Scheme for Information Status in Dialogue http://www.lrec-conf.org/proceedings/lrec2004/pdf/638.pdf nissim-etal-2004-annotation @@ -3409,7 +3409,7 @@ AlexTrutnev AntoineRozenknop - MartinRajman + MartinRajman Speech Recognition Simulation and its Application for <fixed-case>W</fixed-case>izard-of-<fixed-case>O</fixed-case>z Experiments http://www.lrec-conf.org/proceedings/lrec2004/pdf/639.pdf trutnev-etal-2004-speech @@ -3417,7 +3417,7 @@ MuratDeviren KhalidDaoudi - KamelSmaïli + KamelSmaïli Language Modeling Using Dynamic <fixed-case>B</fixed-case>ayesian Networks http://www.lrec-conf.org/proceedings/lrec2004/pdf/640.pdf deviren-etal-2004-language @@ -3430,20 +3430,20 @@ hahn-wermter-2004-pumping - KirilSimov + KirilSimov PetyaOsenova A Hybrid Strategy For Regular Grammar Parsing http://www.lrec-conf.org/proceedings/lrec2004/pdf/642.pdf simov-osenova-2004-hybrid - JordiAtserias - BernardoMagnini + JordiAtserias + BernardoMagnini OctavianPopescu - EnekoAgirre + EnekoAgirre AitziberAtutxa - GermanRigau - JohnCarroll + GermanRigau + JohnCarroll RobKoeling Cross-Language Acquisition of Semantic Models for Verbal Predicates http://www.lrec-conf.org/proceedings/lrec2004/pdf/643.pdf @@ -3457,8 +3457,8 @@ KallirroiGeorgila - NikosFakotakis - GeorgeKokkinakis + NikosFakotakis + GeorgeKokkinakis A graphical Tool for Handling Rule Grammars in <fixed-case>J</fixed-case>ava Speech Grammar Format http://www.lrec-conf.org/proceedings/lrec2004/pdf/645.pdf georgila-etal-2004-graphical @@ -3470,18 +3470,18 @@ sheremetyeva-2004-flexible - DavidMartínez - EnekoAgirre + DavidMartínez + EnekoAgirre The Effect of Bias on an Automatically-built Word Sense Corpus http://www.lrec-conf.org/proceedings/lrec2004/pdf/648.pdf martinez-agirre-2004-effect VictoriaArranz - NúriaCastell - Josep MariaCrego - JesúsGiménez - Adriàde Gispert + NúriaCastell + Josep MariaCrego + JesúsGiménez + Adriàde Gispert PatrikLambert Bilingual Connections for Trilingual Corpora: An <fixed-case>XML</fixed-case> Approach http://www.lrec-conf.org/proceedings/lrec2004/pdf/649.pdf @@ -3507,7 +3507,7 @@ AlexTrutnev - MartinRajman + MartinRajman Comparative Evaluations in the Domain of Automatic Speech Recognition http://www.lrec-conf.org/proceedings/lrec2004/pdf/654.pdf trutnev-rajman-2004-comparative @@ -3550,7 +3550,7 @@ braffort-etal-2004-toward - FabioTamburini + FabioTamburini Building Distributed Language Resources By Grid Computing http://www.lrec-conf.org/proceedings/lrec2004/pdf/661.pdf tamburini-2004-building @@ -3575,11 +3575,11 @@ busemann-2004-egram - LouiseGuthrie - RobertoBasili - FabioZanzotto - KalinaBontcheva - HamishCunningham + LouiseGuthrie + RobertoBasili + FabioZanzotto + KalinaBontcheva + HamishCunningham DavidGuthrie JiaCui MarcoCammisa @@ -3588,24 +3588,24 @@ KristiyanHaralambiev MartinHolub KlausMacherey - FredrickJelinek + FredrickJelinek Large Scale Experiments for Semantic Labeling of Noun Phrases in Raw Text http://www.lrec-conf.org/proceedings/lrec2004/pdf/666.pdf guthrie-etal-2004-large - EnekoAgirre + EnekoAgirre AitziberAtutxa - KoldoGojenola - KepaSarasola + KoldoGojenola + KepaSarasola Exploring Portability of Syntactic Information from <fixed-case>E</fixed-case>nglish to <fixed-case>B</fixed-case>asque http://www.lrec-conf.org/proceedings/lrec2004/pdf/667.pdf agirre-etal-2004-exploring - JordiAtserias - LuísVillarejo - GermanRigau + JordiAtserias + LuísVillarejo + GermanRigau <fixed-case>S</fixed-case>panish <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et 1.6: Porting the <fixed-case>S</fixed-case>panish <fixed-case>W</fixed-case>ordnet Across <fixed-case>P</fixed-case>rinceton Versions http://www.lrec-conf.org/proceedings/lrec2004/pdf/668.pdf atserias-etal-2004-spanish @@ -3614,7 +3614,7 @@ MagdalenaWolska Bao QuocVo DimitraTsovaltzi - IvanaKruijff-Korbayová + IvanaKruijff-Korbayová ElenaKaragjosova HelmutHoracek ArminFiedler @@ -3624,19 +3624,19 @@ wolska-etal-2004-annotated - Lonnekevan der Plas + Lonnekevan der Plas VincenzoPallotta - MartinRajman + MartinRajman HatemGhorbel Automatic Keyword Extraction from Spoken Text. A Comparison of Two Lexical Resources: <fixed-case>EDR</fixed-case> and <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et http://www.lrec-conf.org/proceedings/lrec2004/pdf/670.pdf van-der-plas-etal-2004-automatic - AnnaKupść + AnnaKupść TerukoMitamura BenjaminVan Durme - EricNyberg + EricNyberg Pronominal Anaphora Resolution for Unrestricted Text http://www.lrec-conf.org/proceedings/lrec2004/pdf/671.pdf kupsc-etal-2004-pronominal @@ -3662,7 +3662,7 @@ NinaWacholder - SharonSmall + SharonSmall BingBai DianeKelly RobertRittman @@ -3670,9 +3670,9 @@ RobertSalkin PengSong YingSun - LiuTing - PaulKantor - TomekStrzalkowski + TingLiu + PaulKantor + TomekStrzalkowski Designing a Realistic Evaluation of an End-to-end Interactive Question Answering System http://www.lrec-conf.org/proceedings/lrec2004/pdf/675.pdf wacholder-etal-2004-designing @@ -3686,18 +3686,18 @@ BogdanBabych DebbieElliott - AnthonyHartley + AnthonyHartley Calibrating Resource-light Automatic <fixed-case>MT</fixed-case> Evaluation: a Cheap Approach to Ranking <fixed-case>MT</fixed-case> Systems by the Usability of Their Output http://www.lrec-conf.org/proceedings/lrec2004/pdf/678.pdf babych-etal-2004-calibrating - SteliosPiperidis + SteliosPiperidis IasonDemiros ProkopisProkopidis PeterVanroose - AnjaHoethker - WalterDaelemans + AnjaHoethker + WalterDaelemans ElsaSklavounou ManosKonstantinou YannisKaravidas @@ -3707,7 +3707,7 @@ KazukiAdachi - TomokiToda + TomokiToda HiromichiKawanami HiroshiSaruwatari KiyohiroShikano @@ -3716,24 +3716,24 @@ adachi-etal-2004-perceptual - Serge A.Yablonsky + Serge A.Yablonsky Integration of <fixed-case>R</fixed-case>ussian Language Resources http://www.lrec-conf.org/proceedings/lrec2004/pdf/682.pdf yablonsky-2004-integration - RobertoBasili + RobertoBasili NicolaLorusso - Maria TeresaPazienza - Fabio MassimoZanzotto + Maria TeresaPazienza + Fabio MassimoZanzotto <fixed-case>A</fixed-case>2<fixed-case>Q</fixed-case>: An Agent-based Architecure for Multilingual <fixed-case>Q</fixed-case>&<fixed-case>A</fixed-case> http://www.lrec-conf.org/proceedings/lrec2004/pdf/683.pdf basili-etal-2004-a2q - Guadalupe Aguadode Cea + Guadalupe Aguadode Cea InmaculadaÁlvarez-de-Mon - AntonioPareja-Lora + AntonioPareja-Lora <fixed-case>O</fixed-case>nto<fixed-case>T</fixed-case>ag’s Linguistic Ontologies: Enhancing Higher Level and Semantic Web Annotations http://www.lrec-conf.org/proceedings/lrec2004/pdf/684.pdf de-cea-etal-2004-ontotags @@ -3750,13 +3750,13 @@ KiyongLee LouBurnard - LaurentRomary - Ericde la Clergerie + LaurentRomary + Ericde la Clergerie ThierryDeclerck SydBauman - HarryBunt + HarryBunt LionelClément - TomažErjavec + TomažErjavec AzimRoussanaly ClaudeRoux Towards an International Standard on Feature Structure Representation @@ -3764,43 +3764,43 @@ lee-etal-2004-towards - AriadnaFont Llitjós - JaimeCarbonell + AriadnaFont Llitjós + JaimeCarbonell The Translation Correction Tool: <fixed-case>E</fixed-case>nglish-<fixed-case>S</fixed-case>panish User Studies http://www.lrec-conf.org/proceedings/lrec2004/pdf/688.pdf font-llitjos-carbonell-2004-translation - BrianMitchell - RobertGaizauskas + BrianMitchell + RobertGaizauskas A Labelled Corpus for Prepositional Phrase Attachment http://www.lrec-conf.org/proceedings/lrec2004/pdf/690.pdf mitchell-gaizauskas-2004-labelled GabrielInfante-Lopez - Maartende Rijke + Maartende Rijke Comparing the Ambiguity Reduction Abilities of Probabilistic Context-Free Grammars http://www.lrec-conf.org/proceedings/lrec2004/pdf/692.pdf infante-lopez-de-rijke-2004-comparing - PaulMorarescu - SandaHarabagiu + PaulMorarescu + SandaHarabagiu <fixed-case>N</fixed-case>ame<fixed-case>N</fixed-case>et: a Self-Improving Resource for Name Classification http://www.lrec-conf.org/proceedings/lrec2004/pdf/693.pdf morarescu-harabagiu-2004-namenet KaterinaPastra - YorickWilks + YorickWilks Image-Language Multimodal Corpora: Needs, Lacunae and an <fixed-case>AI</fixed-case> Synergy for Annotation http://www.lrec-conf.org/proceedings/lrec2004/pdf/694.pdf pastra-wilks-2004-image Na-RaeHan - MartinChodorow + MartinChodorow ClaudiaLeacock Detecting Errors in <fixed-case>E</fixed-case>nglish Article Usage with a Maximum Entropy Classifier Trained on a Large, Diverse Corpus http://www.lrec-conf.org/proceedings/lrec2004/pdf/695.pdf @@ -3813,9 +3813,9 @@ sedlacek-2004-core - WalterDaelemans - AnjaHöthker - ErikTjong Kim Sang + WalterDaelemans + AnjaHöthker + ErikTjong Kim Sang Automatic Sentence Simplification for Subtitling in <fixed-case>D</fixed-case>utch and <fixed-case>E</fixed-case>nglish http://www.lrec-conf.org/proceedings/lrec2004/pdf/697.pdf daelemans-etal-2004-automatic @@ -3830,7 +3830,7 @@ kruengkrai-etal-2004-enriching - Jonathan G.Fiscus + Jonathan G.Fiscus Results of the 2003 Topic Detection and Tracking Evaluation http://www.lrec-conf.org/proceedings/lrec2004/pdf/702.pdf fiscus-2004-results @@ -3853,7 +3853,7 @@ FabreLambeau BenjaminWaldron FrancisBond - DanFlickinger + DanFlickinger StephanOepen A Lexicon Module for a Grammar Development Environment http://www.lrec-conf.org/proceedings/lrec2004/pdf/706.pdf @@ -3861,7 +3861,7 @@ BogdanBabych - AnthonyHartley + AnthonyHartley Modelling Legitimate Translation Variation for Automatic Evaluation of <fixed-case>MT</fixed-case> Quality http://www.lrec-conf.org/proceedings/lrec2004/pdf/707.pdf babych-hartley-2004-modelling @@ -3869,7 +3869,7 @@ RobertoBartolini AlessandroLenci - SimonettaMontemagni + SimonettaMontemagni VitoPirrelli ClaudiaSoria Semantic Mark-up of <fixed-case>I</fixed-case>talian Legal Texts Through <fixed-case>NLP</fixed-case>-based Techniques @@ -3878,7 +3878,7 @@ LionelClément - BenoîtSagot + BenoîtSagot BernardLang Morphology Based Automatic Acquisition of Large-coverage Lexica http://www.lrec-conf.org/proceedings/lrec2004/pdf/711.pdf @@ -3892,26 +3892,26 @@ ViolettaCavalli-Sforza - Jaime G.Carbonell - Peter J.Jansen + Jaime G.Carbonell + Peter J.Jansen Developing Language Resources for a Transnational Digital Government System http://www.lrec-conf.org/proceedings/lrec2004/pdf/713.pdf cavalli-sforza-etal-2004-developing - Mary D.Swift - Myroslava O.Dzikovska - Joel R.Tetreault - James F.Allen + Mary D.Swift + Myroslava O.Dzikovska + Joel R.Tetreault + James F.Allen Semi-automatic Syntactic and Semantic Corpus Annotation with a Deep Parser http://www.lrec-conf.org/proceedings/lrec2004/pdf/714.pdf swift-etal-2004-semi GeorgesFafiotte - ChristianBoitet + ChristianBoitet MarkSeligman - ZongChengqing + ChengqingZong Collecting and Sharing Bilingual Spontaneous Speech Corpora: the <fixed-case>C</fixed-case>hin<fixed-case>F</fixed-case>a<fixed-case>D</fixed-case>ial Experiment http://www.lrec-conf.org/proceedings/lrec2004/pdf/715.pdf fafiotte-etal-2004-collecting @@ -3919,7 +3919,7 @@ JuditaPreiss CarolineGasperin - TedBriscoe + TedBriscoe Can Anaphoric Definite Descriptions be Replaced by Pronouns? http://www.lrec-conf.org/proceedings/lrec2004/pdf/718.pdf preiss-etal-2004-anaphoric @@ -3927,7 +3927,7 @@ RobertoBartolini AlessandroLenci - SimonettaMontemagni + SimonettaMontemagni VitoPirrelli Hybrid Constraints for Robust Parsing: First Experiments and Evaluation http://www.lrec-conf.org/proceedings/lrec2004/pdf/719.pdf @@ -3943,7 +3943,7 @@ SimoneTeufel - Hansvan Halteren + Hansvan Halteren Agreement in Human Factoid Annotation for Summarization Evaluation http://www.lrec-conf.org/proceedings/lrec2004/pdf/723.pdf teufel-van-halteren-2004-agreement @@ -3957,11 +3957,11 @@ rilliard-etal-2004-evaluating - NadiaMana + NadiaMana RoldanoCattoni - EmanuelePianta + EmanuelePianta FrancaRossi - FabioPianesi + FabioPianesi SusanneBurger The <fixed-case>I</fixed-case>talian <fixed-case>NESPOLE</fixed-case>! Corpus: a Multilingual Database with Interlingua Annotation in Tourism and Medical Domains http://www.lrec-conf.org/proceedings/lrec2004/pdf/725.pdf @@ -3979,29 +3979,29 @@ AntoniettaAlonge - BirteLönneker + BirteLönneker Metaphors in Wordnets: From Theory to Practice http://www.lrec-conf.org/proceedings/lrec2004/pdf/728.pdf alonge-lonneker-2004-metaphors - HarryBunt - LaurentRomary + HarryBunt + LaurentRomary Standardization in Multimodal Content Representation: Some Methodological Issues http://www.lrec-conf.org/proceedings/lrec2004/pdf/729.pdf bunt-romary-2004-standardization - RobertoBasili + RobertoBasili MarcoCammisa - Fabio MassimoZanzotto + Fabio MassimoZanzotto A Similarity Measure for Unsupervised Semantic Disambiguation http://www.lrec-conf.org/proceedings/lrec2004/pdf/732.pdf basili-etal-2004-similarity - LailaDybkjær - Niels OleBernsen + LailaDybkjær + Niels OleBernsen WolfgangMinker Usability Evaluation of Multimodal and Domain-Oriented Spoken Language Dialogue Systems http://www.lrec-conf.org/proceedings/lrec2004/pdf/733.pdf @@ -4011,7 +4011,7 @@ JaapKamps MaartenMarx Robert J.Mokken - Maartende Rijke + Maartende Rijke Using <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et to Measure Semantic Orientations of Adjectives http://www.lrec-conf.org/proceedings/lrec2004/pdf/734.pdf kamps-etal-2004-using @@ -4021,7 +4021,7 @@ EvaForsbom EbbaGustavii EvaPettersson - JörgTiedemann + JörgTiedemann <fixed-case>MT</fixed-case> Goes Farming: Comparing Two Machine Translation Approaches on a New Domain http://www.lrec-conf.org/proceedings/lrec2004/pdf/735.pdf weijnitz-etal-2004-mt @@ -4037,7 +4037,7 @@ ChristopherBrewster HarithAlani SrinandanDasmahapatra - YorickWilks + YorickWilks Data Driven Ontology Evaluation http://www.lrec-conf.org/proceedings/lrec2004/pdf/737.pdf brewster-etal-2004-data @@ -4058,7 +4058,7 @@ Vasco CalaisPedro JeongwooKo - EricNyberg + EricNyberg TerukoMitamura An Information Repository Model for Advanced Question Answering Systems http://www.lrec-conf.org/proceedings/lrec2004/pdf/742.pdf @@ -4068,29 +4068,29 @@ FrancescaBertagna AlessandroLenci MonicaMonachini - NicolettaCalzolari + NicolettaCalzolari Content Interoperability of Lexical Resources: Open Issues and “<fixed-case>MILE</fixed-case>” Perspectives http://www.lrec-conf.org/proceedings/lrec2004/pdf/743.pdf bertagna-etal-2004-content - MartinČmejrek - JanCuřín - JiříHavelka - JanHajič - VladislavKuboň + MartinČmejrek + JanCuřín + JiříHavelka + JanHajič + VladislavKuboň <fixed-case>P</fixed-case>rague <fixed-case>C</fixed-case>zech-<fixed-case>E</fixed-case>nglish <fixed-case>D</fixed-case>ependency <fixed-case>T</fixed-case>reebank. Syntactically Annotated Resources for Machine Translation http://www.lrec-conf.org/proceedings/lrec2004/pdf/745.pdf cmejrek-etal-2004-prague ChristianMonson - LoriLevin + LoriLevin RodolfoVega - RalfBrown - AriadnaFont Llitjos - AlonLavie - JaimeCarbonell + RalfBrown + AriadnaFont Llitjos + AlonLavie + JaimeCarbonell EliseoCañulef RosendoHuisca Data Collection and Analysis of <fixed-case>M</fixed-case>apudungun Morphology for Spelling Correction @@ -4098,8 +4098,8 @@ monson-etal-2004-data - Arlindo O.Veiga - Fernando S.Perdigão + Arlindo O.Veiga + Fernando S.Perdigão An Efficient Word Confidence Measure Using Likelihood Ratio Scores http://www.lrec-conf.org/proceedings/lrec2004/pdf/748.pdf veiga-perdigao-2004-efficient @@ -4107,13 +4107,13 @@ KenjiSagae BrianMacWhinney - AlonLavie + AlonLavie Adding Syntactic Annotations to Transcripts of Parent-Child Dialogs http://www.lrec-conf.org/proceedings/lrec2004/pdf/749.pdf sagae-etal-2004-adding - HuaruiZhang + HuaruiZhang ChurenHuang ShiwenYu Distributional Consistency: As a General Method for Defining a Core Lexicon @@ -4121,22 +4121,22 @@ zhang-etal-2004-distributional - Rebecca J.Passonneau + Rebecca J.Passonneau Computing Reliability for Coreference Annotation http://www.lrec-conf.org/proceedings/lrec2004/pdf/752.pdf passonneau-2004-computing - EnekoAgirre - Oier Lopezde Lacalle + EnekoAgirre + Oier Lopezde Lacalle Publicly Available Topic Signatures for all <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et Nominal Senses http://www.lrec-conf.org/proceedings/lrec2004/pdf/753.pdf agirre-de-lacalle-2004-publicly - TimothyBaldwin - Emily M.Bender - DanFlickinger + TimothyBaldwin + Emily M.Bender + DanFlickinger AraKim StephanOepen Road-testing the <fixed-case>E</fixed-case>nglish <fixed-case>R</fixed-case>esource <fixed-case>G</fixed-case>rammar Over the <fixed-case>B</fixed-case>ritish <fixed-case>N</fixed-case>ational <fixed-case>C</fixed-case>orpus @@ -4144,31 +4144,31 @@ baldwin-etal-2004-road - YingZhang - StephanVogel - AlexWaibel + YingZhang + StephanVogel + AlexWaibel Interpreting <fixed-case>BLEU</fixed-case>/<fixed-case>NIST</fixed-case> Scores: How Much Improvement do We Need to Have a Better System? http://www.lrec-conf.org/proceedings/lrec2004/pdf/755.pdf zhang-etal-2004-interpreting - PeterAnick + PeterAnick Exploiting Anchor Text as a Lexical Resource http://www.lrec-conf.org/proceedings/lrec2004/pdf/756.pdf anick-2004-exploiting - DragomirRadev + DragomirRadev TimothyAllison SashaBlair-Goldensohn JohnBlitzer ArdaÇelebi StankoDimitrov - ElliottDrabek + ElliottDrabek AliHakim WaiLam DanyuLiu - JahnaOtterbacher + JahnaOtterbacher HongQi HoracioSaggion SimoneTeufel @@ -4184,8 +4184,8 @@ BilyanaMartinovski SusanRobinson JensStephan - JoelTetreault - David R.Traum + JoelTetreault + David R.Traum Evaluation of Transcription and Annotation Tools for a Multi-modal, Multi-party Dialogue Corpus http://www.lrec-conf.org/proceedings/lrec2004/pdf/758.pdf garg-etal-2004-evaluation @@ -4198,7 +4198,7 @@ AlineVillavicencio - TimothyBaldwin + TimothyBaldwin BenjaminWaldron A Multilingual Database of Idioms http://www.lrec-conf.org/proceedings/lrec2004/pdf/760.pdf @@ -4206,19 +4206,19 @@ KazuakiMaeda - StephanieStrassel + StephanieStrassel Annotation Tools for Large-Scale Corpus Development: Using <fixed-case>AGTK</fixed-case> at the <fixed-case>L</fixed-case>inguistic <fixed-case>D</fixed-case>ata <fixed-case>C</fixed-case>onsortium http://www.lrec-conf.org/proceedings/lrec2004/pdf/761.pdf maeda-strassel-2004-annotation - StephanieStrassel + StephanieStrassel Linguistic Resources for Effective, Affordable, Reusable Speech-to-Text http://www.lrec-conf.org/proceedings/lrec2004/pdf/762.pdf strassel-2004-linguistic - MarcVilain + MarcVilain Building part-of-speech Corpora Through Histogram Hopping http://www.lrec-conf.org/proceedings/lrec2004/pdf/763.pdf vilain-2004-building @@ -4235,13 +4235,13 @@ BilyanaMartinovski SaurabhGarg JensStephan - DavidTraum + DavidTraum Issues in Corpus Development for Multi-party Multi-modal Task-oriented Dialogue http://www.lrec-conf.org/proceedings/lrec2004/pdf/766.pdf robinson-etal-2004-issues - ChristopherCieri + ChristopherCieri DavidMiller KevinWalker The Fisher Corpus: a Resource for the Next Generations of Speech-to-Text @@ -4249,7 +4249,7 @@ cieri-etal-2004-fisher - David R.Traum + David R.Traum SusanRobinson JensStephan Evaluation of Multi-party Virtual Reality Dialogue Interaction @@ -4257,8 +4257,8 @@ traum-etal-2004-evaluation - ChristopherCieri - Joseph P.Campbell + ChristopherCieri + Joseph P.Campbell HirotakaNakasone DavidMiller KevinWalker @@ -4267,7 +4267,7 @@ cieri-etal-2004-mixer - AlessandroMazzei + AlessandroMazzei VincenzoLombardo Building a Large Grammar for <fixed-case>I</fixed-case>talian http://www.lrec-conf.org/proceedings/lrec2004/pdf/772.pdf @@ -4292,7 +4292,7 @@ LongQiu Min-YenKan - Tat-SengChua + Tat-SengChua A Public Reference Implementation of the <fixed-case>RAP</fixed-case> Anaphora Resolution Algorithm http://www.lrec-conf.org/proceedings/lrec2004/pdf/778.pdf qiu-etal-2004-public @@ -4302,8 +4302,8 @@ NeilIreson PaoloAllegrini SimoneMarchi - SimonettaMontemagni - Jose Maria GomezHidalgo + SimonettaMontemagni + Jose Maria GomezHidalgo <fixed-case>NLP</fixed-case>-enhanced Content Filtering Within the <fixed-case>POESIA</fixed-case> Project http://www.lrec-conf.org/proceedings/lrec2004/pdf/779.pdf hepple-etal-2004-nlp @@ -4330,20 +4330,20 @@ RobertIrie - BethSundheim + BethSundheim Resources for Place Name Analysis http://www.lrec-conf.org/proceedings/lrec2004/pdf/785.pdf irie-sundheim-2004-resources - BenteMaegaard + BenteMaegaard <fixed-case>NEMLAR</fixed-case> - An <fixed-case>A</fixed-case>rabic Language Resources Project http://www.lrec-conf.org/proceedings/lrec2004/pdf/786.pdf maegaard-2004-nemlar-arabic - Key-SunChoi - Hee-SookBae + Key-SunChoi + Hee-SookBae WonseokKang JuhoLee EunheKim @@ -4363,22 +4363,22 @@ jouis-ferru-2004-intranet - ChristopherCieri - MarkLiberman + ChristopherCieri + MarkLiberman A Progress Report from the <fixed-case>L</fixed-case>inguistic <fixed-case>D</fixed-case>ata <fixed-case>C</fixed-case>onsortium: Recent Activities in Resource Creation and Distribution and the Development of Tools and Standards http://www.lrec-conf.org/proceedings/lrec2004/pdf/789.pdf cieri-liberman-2004-progress - KhalidChoukri + KhalidChoukri Recent Activities within the <fixed-case>E</fixed-case>uropean Language Resources Association: Issues on Sharing Language Resources and Evaluation http://www.lrec-conf.org/proceedings/lrec2004/pdf/791.pdf choukri-2004-recent - Widad Mustafa ElHadi - IsmailTimimi - MarianneDabbadie + Widad Mustafa ElHadi + IsmailTimimi + MarianneDabbadie <fixed-case>EVALDA</fixed-case>-<fixed-case>CESART</fixed-case> Project: Terminological Resources Acquisition Tools Evaluation Campaign http://www.lrec-conf.org/proceedings/lrec2004/pdf/792.pdf hadi-etal-2004-evalda @@ -4401,25 +4401,25 @@ Joaquim F. Ferreirada Silva ZornitsaKozareva - José Gabriel PereiraLopes + José Gabriel PereiraLopes Cluster Analysis and Classification of Named Entities http://www.lrec-conf.org/proceedings/lrec2004/pdf/796.pdf da-silva-etal-2004-cluster - KhalidChoukri + KhalidChoukri MahtabNikkhou - NiklasPaulsson + NiklasPaulsson Network of Data Centres (<fixed-case>N</fixed-case>et<fixed-case>DC</fixed-case>): <fixed-case>BNSC</fixed-case> - An <fixed-case>A</fixed-case>rabic Broadcast News Speech Corpus http://www.lrec-conf.org/proceedings/lrec2004/pdf/797.pdf choukri-etal-2004-network - ValérieMapelli + ValérieMapelli MariaNava SylvainSurcin - DjamelMostefa - KhalidChoukri + DjamelMostefa + KhalidChoukri <fixed-case>T</fixed-case>echnolangue: A Permanent Evaluation and Information Infrastructure http://www.lrec-conf.org/proceedings/lrec2004/pdf/798.pdf mapelli-etal-2004-technolangue @@ -4431,9 +4431,9 @@ marrafa-2004-extending - BorisDobrov + BorisDobrov IgorKuralenok - NataliaLoukachevitch + NataliaLoukachevitch IgorNekrestyanov IlyaSegalovich <fixed-case>R</fixed-case>ussian Information Retrieval Evaluation Seminar diff --git a/data/xml/L06.xml b/data/xml/L06.xml index a3eb502292..8540b50fc5 100644 --- a/data/xml/L06.xml +++ b/data/xml/L06.xml @@ -3,13 +3,13 @@ Proceedings of the Fifth International Conference on Language Resources and Evaluation (LREC’06) - NicolettaCalzolari - KhalidChoukri + NicolettaCalzolari + KhalidChoukri AldoGangemi - BenteMaegaard - JosephMariani - JanOdijk - DanielTapias + BenteMaegaard + JosephMariani + JanOdijk + DanielTapias European Language Resources Association (ELRA)
Genoa, Italy
May @@ -43,9 +43,9 @@ chaudiron-mariani-2006-techno
- MannyRayner - PierretteBouillon - Beth AnnHockey + MannyRayner + PierretteBouillon + Beth AnnHockey NikosChatzichrisafis <fixed-case>REGULUS</fixed-case>: A Generic Multilingual Open Source Platform for Grammar-Based Speech Applications http://www.lrec-conf.org/proceedings/lrec2006/pdf/9_pdf.pdf @@ -86,8 +86,8 @@ IbonSaratxaga - EvaNavas - InmaculadaHernáez + EvaNavas + InmaculadaHernáez IkerAholab Designing and Recording an Emotional Speech Database for Corpus Based Synthesis in <fixed-case>B</fixed-case>asque http://www.lrec-conf.org/proceedings/lrec2006/pdf/19_pdf.pdf @@ -113,7 +113,7 @@ Tien-PingTan - LaurentBesacier + LaurentBesacier A <fixed-case>F</fixed-case>rench Non-Native Corpus for Automatic Speech Recognition http://www.lrec-conf.org/proceedings/lrec2006/pdf/33_pdf.pdf Automatic speech recognition (ASR) technology has achieved a level of maturity, where it is already practical to be used by novice users. However, most non-native speakers are still not comfortable with services including ASR systems, because of the accuracy on non-native speakers. This paper describes our approach in constructing a non-native corpus particularly in French for testing and adapting non-native speaker for automatic speech recognition. Finally, we also propose in this paper a method for detecting pronunciation variants and possible pronunciation mistakes by non-native speakers. @@ -147,7 +147,7 @@ tongchim-etal-2006-blind - YujiMatsumoto + YujiMatsumoto MasayukiAsahara KiyotaHashimoto YukioTono @@ -168,23 +168,23 @@ IvanBerlocher Hyun-gueHuh - EricLaporte - Jee-sunNam + EricLaporte + Jee-sunNam Morphological annotation of <fixed-case>K</fixed-case>orean with Directly Maintainable Resources http://www.lrec-conf.org/proceedings/lrec2006/pdf/44_pdf.pdf This article describes an exclusively resource-based method of morphological annotation of written Korean text. Korean is an agglutinative language. Our annotator is designed to process text before the operation of a syntactic parser. In its present state, it annotates one-stem words only. The output is a graph of morphemes annotated with accurate linguistic information. The granularity of the tagset is 3 to 5 times higher than usual tagsets. A comparison with a reference annotated corpus showed that it achieves 89% recall without any corpus training. The language resources used by the system are lexicons of stems, transducers of suffixes and transducers of generation of allomorphs. All can be easily updated, which allows users to control the evolution of the performances of the system. It has been claimed that morphological annotation of Korean text could only be performed by a morphological analysis module accessing a lexicon of morphemes. We show that it can also be performed directly with a lexicon of words and without applying morphological rules at annotation time, which speeds up annotation to 1,210 words. The lexicon of words is obtained from the maintainable language resources through a fully automated compilation process. berlocher-etal-2006-morphological - PatrickSaint-Dizier + PatrickSaint-Dizier <fixed-case>P</fixed-case>rep<fixed-case>N</fixed-case>et: a Multilingual Lexical Description of Prepositions http://www.lrec-conf.org/proceedings/lrec2006/pdf/45_pdf.pdf In this paper, we present the results of a preliminary investigation that aims at constructing a repository of preposition syntactic and semantic behaviors. A preliminary frame-based format for representing their prototypical behavior is then proposed together with related inferential patterns that describe functional or paradigmatic relations between preposition senses. saint-dizier-2006-prepnet - Marie-ClaudeL’Homme - Hee SookBae + Marie-ClaudeL’Homme + Hee SookBae A Methodology for Developing Multilingual Resources for Terminology http://www.lrec-conf.org/proceedings/lrec2006/pdf/48_pdf.pdf This paper presents a project that aims at building lexical resources for terminology. By lexical resources, we mean dictionaries that provide detailed lexico-semantic information on terms, i.e. lexical units the sense of which can be related to a special subject field. In terminology, there is a lack of such resources. The specific dictionaries we are currently developing describe basic French and Korean terms that belong to the fields of computer science and the Internet (e.g. computer, configure, user-friendly, Web, browse, spam). This paper presents the structure of the French and Korean articles: each component is examined and illustrated with examples. We then describe the corpus-based methodology and the different computer applications used for developing the articles. Our methodology comprises five steps: design of the corpora, selection of terms; sense distinction; definition of actantial structures and listing of semantic relations. Details on the current state of each database are also given. @@ -193,7 +193,7 @@ StephanRaidt GérardBailly - FredericElisei + FredericElisei Does a Virtual Talking Face Generate Proper Multimodal Cues to Draw User’s Attention to Points of Interest? http://www.lrec-conf.org/proceedings/lrec2006/pdf/49_pdf.pdf We present a series of experiments investigating face-to-face interaction between an Embodied Conversational Agent (ECA) and a human interlocutor. The ECA is embodied by a video realistic talking head with independent head and eye movements. For a beneficial application in face-to-face interaction, the ECA should be able to derive meaning from communicational gestures of a human interlocutor, and likewise to reproduce such gestures. Conveying its capability to interpret human behaviour, the system encourages the interlocutor to show appropriate natural activity. Therefore it is important that the ECA knows how to display what would correspond to mental states in humans. This allows to interpret the machine processes of the system in terms of human expressiveness and to assign them a corresponding meaning. Thus the system may maintain an interaction based on human patterns. During a first experiment we investigated the ability of our talking head to direct user attention with facial deictic cues (Raidt, Bailly et al. 2005). Users interact with the ECA during a simple card game offering different levels of help and guidance through facial deictic cues. We analyzed the users’ performance and their perception of the quality of assistance given by the ECA. The experiment showed that users profit from its presence and its facial deictic cues. In the continuative series of experiments presented here, we investigated the effect of an enhancement of the multimodality of the deictic gestures by adding a spoken instruction. @@ -207,7 +207,7 @@ wong-2006-skeleton - ConstantinOrăsan + ConstantinOrăsan LauraHasler Computer-aided summarisation – what the user really wants http://www.lrec-conf.org/proceedings/lrec2006/pdf/52_pdf.pdf @@ -230,7 +230,7 @@ kageura-kikui-2006-self - Hiromi itohOzaku + Hiromi itohOzaku AkinoriAbe KaoruSagara NoriakiKuwahara @@ -251,8 +251,8 @@ santos-etal-2006-harem - RafaelBanchs - AntonioBonafonte + RafaelBanchs + AntonioBonafonte JavierPérez Acceptance Testing of a Spoken Language Translation System http://www.lrec-conf.org/proceedings/lrec2006/pdf/60_pdf.pdf @@ -261,9 +261,9 @@ ValentinTablan - WimPeters + WimPeters DianaMaynard - HamishCunningham + HamishCunningham Creating Tools for Morphological Analysis of <fixed-case>S</fixed-case>umerian http://www.lrec-conf.org/proceedings/lrec2006/pdf/64_pdf.pdf Sumerian is a long-extinct language documented throughout the ancient MiddleEast, arguably the first language for which we have written evidence, and is a language isolate (i.e. no related languages have so far been identified). The Electronic Text Corpus of Sumerian Literature (ETCSL), based at theUniversity of Oxford, aims to make accessible on the web over 350 literary workscomposed during the late third and early second millennia BCE. The transliterations and translations can be searched, browsed and read online using the tools of the website. In this paper we describe the creation of linguistic analysis and corpus search tools for Sumerian, as part of the development of the ETCSL. This is designed to enable Sumerian scholars, students and interested laymen to analyse the texts online and electronically, and to further knowledge about the language. @@ -295,18 +295,18 @@ ReinhardRapp - Carlos MartinVide + Carlos MartinVide Example-Based Machine Translation Using a Dictionary of Word Pairs http://www.lrec-conf.org/proceedings/lrec2006/pdf/74_pdf.pdf Machine translation systems, whether rule-based, example-based, or statistical, all rely on dictionaries that are in essence mappings between individual words of the source and the target language. Criteria for the disambiguation of ambiguous words and for differences in word order between the two languages are not accounted for in the lexicon. Instead, these important issues are dealt with in the translation engines. Because the engines tend to be compact and (even with data-oriented approaches) do not fully reflect the complexity of the problem, this approach generally does not account for the more fine grained facets of word behavior. This leads to wrong generalizations and, as a consequence, translation quality tends to be poor. In this paper we suggest to approach this problem by using a new type of lexicon that is not based on individual words but on pairs of words. For each pair of consecutive words in the source language the lexicon lists the possible translations in the target language together with information on order and distance of the target words. The process of machine translation is then seen as a combinatorial problem: For all word pairs in a source sentence all possible translations are retrieved from the lexicon and then those translations are discarded that lead to contradictions when constructing the target sentence. This process implicitly leads to word sense disambiguation and to language specific reordering of words. rapp-vide-2006-example - Widad MustafaEl Hadi - IsmailTimimi - MarianneDabbadie + Widad MustafaEl Hadi + IsmailTimimi + MarianneDabbadie KhalidChoukri - OlivierHamon + OlivierHamon Yun-ChuangChiao Terminological Resources Acquisition Tools: Toward a User-oriented Evaluation Model http://www.lrec-conf.org/proceedings/lrec2006/pdf/75_pdf.pdf @@ -314,9 +314,9 @@ el-hadi-etal-2006-terminological - NúriaBel + NúriaBel SergioEspeja - MontserratMarimon + MontserratMarimon New tools for the encoding of lexical data extracted from corpus http://www.lrec-conf.org/proceedings/lrec2006/pdf/76_pdf.pdf This paper describes the methodology and tools that are the basis of our platform AAILE.4 AAILE has been built for supplying those working in the construction of lexicons for syntactic parsing with more efficient ways of visualizing and analyzing data extracted from corpus. The platform offers support using techniques such as similarity measures, clustering and pattern classification. @@ -325,7 +325,7 @@ DanielaBraga LuísCoelho - João P.Teixeira + João P.Teixeira DiamantinoFreitas <fixed-case>P</fixed-case>rogmatica: A Prosodic Database for <fixed-case>E</fixed-case>uropean <fixed-case>P</fixed-case>ortuguese http://www.lrec-conf.org/proceedings/lrec2006/pdf/77_pdf.pdf @@ -333,8 +333,8 @@ braga-etal-2006-progmatica - JesúsGiménez - EnriqueAmigó + JesúsGiménez + EnriqueAmigó <fixed-case>I</fixed-case>qmt: A Framework for Automatic Machine Translation Evaluation http://www.lrec-conf.org/proceedings/lrec2006/pdf/79_pdf.pdf We present the IQMT Framework for Machine Translation Evaluation Inside QARLA. IQMT offers a common workbench in which existing evaluation metrics can be utilized and combined. It provides i) a measure to evaluate the quality of any set of similarity metrics (KING), ii) a measure to evaluate the quality of a translation using a set of similarity metrics (QUEEN), and iii) a measure to evaluate the reliability of a test set (JACK). The first release of the IQMT package is freely available for public use. Current version includes a set of 26 metrics from 7 different well-known metric families, and allows the user to supply its own metrics. For future releases, we are working on the design of new metrics that are able to capture linguistic aspects of translation beyond lexical ones. @@ -342,18 +342,18 @@ TommasoCaselli - IrinaProdanof + IrinaProdanof Annotating Bridging Anaphors in <fixed-case>I</fixed-case>talian: in Search of Reliability http://www.lrec-conf.org/proceedings/lrec2006/pdf/80_pdf.pdf The aim of this work is the presentation and preliminary evaluation of an XML annotation scheme for marking bridging anaphors of the form “definite article + N” in Italian. The scheme is based on a corpus-study. The data we collected from the evaluation experiment seem to support the reliability of the scheme, although some problems still remain open. caselli-prodanof-2006-annotating - Henkvan den Heuvel + Henkvan den Heuvel KhalidChoukri ChristianGollan - AsuncionMoreno - DjamelMostefa + AsuncionMoreno + DjamelMostefa <fixed-case>TC</fixed-case>-<fixed-case>STAR</fixed-case>: New language resources for <fixed-case>ASR</fixed-case> and <fixed-case>SLT</fixed-case> purposes http://www.lrec-conf.org/proceedings/lrec2006/pdf/81_pdf.pdf In TC-STAR a variety of Language Resources (LR) is being produced. In this contribution we address the resources that have been created for Automatic Speech Recrognition and Spoken Language Translation. As yet, these are 14 LR in total: two training SLR for ASR (English and Spanish), three development LR and three evaluation LR for ASR (English, Spanish, Mandarin), and three development LR and three evaluation LR for SLT (English-Spanish, Spanish-English, Mandarin-English). In this paper we describe the properties, validation, and availability of these resources. @@ -361,7 +361,7 @@ YunqingXia - Kam-FaiWong + Kam-FaiWong WenjieLi Constructing A <fixed-case>C</fixed-case>hinese Chat Language Corpus with A Two-Stage Incremental Annotation Approach http://www.lrec-conf.org/proceedings/lrec2006/pdf/86_pdf.pdf @@ -378,7 +378,7 @@ YasunoriOhishi KatunobuItou - KazuyaTakeda + KazuyaTakeda AtsushiFujii Statistical Analysis for Thesaurus Construction using an Encyclopedic Corpus http://www.lrec-conf.org/proceedings/lrec2006/pdf/88_pdf.pdf @@ -387,7 +387,7 @@ CatherineHavasi - JamesPustejovsky + JamesPustejovsky MarcVerhagen <fixed-case>BULB</fixed-case>: A Unified Lexical Browser http://www.lrec-conf.org/proceedings/lrec2006/pdf/89_pdf.pdf @@ -395,8 +395,8 @@ havasi-etal-2006-bulb - UlrichSchäfer - DanielBeck + UlrichSchäfer + DanielBeck Automatic Testing and Evaluation of Multilingual Language Technology Resources and Components http://www.lrec-conf.org/proceedings/lrec2006/pdf/91_pdf.pdf We describe SProUTomat, a tool for daily building, testing and evaluating a complex general-purpose multilingual natural language text processor including its linguistic resources (lingware). Software and lingware are developed, maintained and extended in a distributed manner by multiple authors and projects, i.e., the source code stored in a version control system is modified frequently. The modular design of different, dedicated lingware modules like tokenizers, morphology, gazetteers, type hierarchy, rule formalism on the one hand increases flexibility and re-usability, but on the other hand may lead to fragility with respect to changes. Therefore, frequent testing as known from software engineering is necessary also for lingware to warrant a high level of quality and overall stability of the system. We describe the build, testing and evaluation methods for LT software and lingware we have developed on the basis of the open source, platform-independent Apache Ant tool and the configurable evaluation tool JTaCo. @@ -411,7 +411,7 @@ PaulBuitelaar - PhilippCimiano + PhilippCimiano StefaniaRacioppa MelanieSiegel Ontology-based Information Extraction with <fixed-case>SOBA</fixed-case> @@ -486,7 +486,7 @@ ahmad-etal-2006-visual - MathieuMangeot + MathieuMangeot AntoineChalvin Dictionary Building with the Jibiki Platform: the <fixed-case>GDEF</fixed-case> case http://www.lrec-conf.org/proceedings/lrec2006/pdf/105_pdf.pdf @@ -496,8 +496,8 @@ TomohiroOhno ShigekiMatsubara - HidekiKashioka - NaotoKato + HidekiKashioka + NaotoKato YasuyoshiInagaki A Syntactically Annotated Corpus of <fixed-case>J</fixed-case>apanese Spoken Monologue http://www.lrec-conf.org/proceedings/lrec2006/pdf/106_pdf.pdf @@ -505,9 +505,9 @@ ohno-etal-2006-syntactically - Jerneja ŽganecGros + Jerneja ŽganecGros VarjaCvetko-Orešnik - PrimožJakopin + PrimožJakopin AlešMihelič <fixed-case>SI</fixed-case>-<fixed-case>PRON</fixed-case>: A Pronunciation Lexicon for <fixed-case>S</fixed-case>lovenian http://www.lrec-conf.org/proceedings/lrec2006/pdf/111_pdf.pdf @@ -531,18 +531,18 @@ BrianRoark - MaryHarper + MaryHarper EugeneCharniak - BonnieDorr + BonnieDorr MarkJohnson - JeremyKahn + JeremyKahn YangLiu - MariOstendorf - JohnHale + MariOstendorf + JohnHale AnnaKrasnyanskaya MatthewLease IzhakShafran - MatthewSnover + MatthewSnover RobinStewart LisaYung <fixed-case>SP</fixed-case>arseval: Evaluation Metrics for Parsing Speech @@ -584,8 +584,8 @@ tohyama-matsubara-2006-collection - ShuichiItahashi - Chiu-yuTseng + ShuichiItahashi + Chiu-yuTseng SatoshiNakamura Oriental <fixed-case>COCOSDA</fixed-case>: Past, Present and Future http://www.lrec-conf.org/proceedings/lrec2006/pdf/127_pdf.pdf @@ -625,10 +625,10 @@ SašoDžeroski - TomažErjavec + TomažErjavec NinaLedinek PetrPajas - ZdenekŽabokrtsky + ZdenekŽabokrtsky AndrejaŽele Towards a <fixed-case>S</fixed-case>lovene Dependency Treebank http://www.lrec-conf.org/proceedings/lrec2006/pdf/133_pdf.pdf @@ -645,7 +645,7 @@ kruengkrai-etal-2006-conditional - YoshihideKato + YoshihideKato ShigekiMatsubara YasuyoshiInagaki A Corpus Search System Utilizing Lexical Dependency Structure @@ -655,35 +655,35 @@ PeterBerck - AlbertRussel + AlbertRussel <fixed-case>ANNEX</fixed-case> - a web-based Framework for Exploiting Annotated Media Resources http://www.lrec-conf.org/proceedings/lrec2006/pdf/139_pdf.pdf Manual annotation of various media streams, time series data and also text sequences is still a very time consuming work that has to be carried out in many areas of linguistics and beyond. Based on many theoretical discussions and practical experiences professional tools have been deployed such as ELAN that support the researcher in his/her work. Most of these annotation tools operate on local computers. However, since more and more language resources are stored in web-accessible archives, researchers want to take profit from the new possibilities. ANNEX was developed to fill this gap, since it allows web-based analysis of complex annotated media streams, i.e., the users don’t have to download resources and don’t have to download and install programs. By simply using a normal web-browser they can start their linguistic work. Yet, due to the architecture of the Internet, ANNEX does not offer the options to create annotations, but this feature will come. However, users have to be aware of the fact that media streaming does not offer that high accuracy as on local computers. berck-russel-2006-annex - TomažErjavec + TomažErjavec The <fixed-case>E</fixed-case>nglish-<fixed-case>S</fixed-case>lovene <fixed-case>ACQUIS</fixed-case> corpus http://www.lrec-conf.org/proceedings/lrec2006/pdf/140_pdf.pdf The paper presents the SVEZ-IJS corpus, a large parallel annotated English-Slovene corpus containing translated legal texts of the European Union, the ACQUIS Communautaire. The corpus contains approx. 2 x 5 million words and was compiled from the translation memory obtained from the Translation Unit of the Slovene Government Office for European Affairs. The corpus is encoded in XML, accordingto the Text Encoding Initiative Guidelines TEI P4, where each translation memory unit contains useful metadata and the two aligned segments (sentences). Both the Slovene and English text islinguistically annotated at the word-level, by context disambiguatedlemmas and morphosyntactic descriptions, which follow the MULTEXTguidelines. The complete corpus is freely available for research, either via an on-line concordancer, or for downloading from the corpushome page at http://nl.ijs.si/svez/. erjavec-2006-english - DaanBroeder + DaanBroeder AndreasClaus FreddyOffenga - RomualdSkiba + RomualdSkiba PaulTrilsbeek - PeterWittenburg + PeterWittenburg <fixed-case>LAMUS</fixed-case>: the Language Archive Management and Upload System http://www.lrec-conf.org/proceedings/lrec2006/pdf/141_pdf.pdf Language Archiving, Resource Management LAMUS is a web-based service that allows researchers to deposit their language resources into a language resources archive. It was developed at the MPI for Psycholinguistics for stricter control of the archive coherence and consistency and allowing wider use of the archiving facilities without increasing the workload for archive and corpus managers. LAMUS is based on the use of IMDI metadata standard for language resources and offers metadata search and browsing over the archive. broeder-etal-2006-lamus - DaanBroeder + DaanBroeder FreddyOffenga - PeterWittenburg + PeterWittenburg Petervan der Kamp DavidNathan SvenStrömqvist @@ -695,19 +695,19 @@ MarcKemps-Snijders JulienDucret - LaurentRomary - PeterWittenburg + LaurentRomary + PeterWittenburg An <fixed-case>API</fixed-case> for accessing the Data Category Registry http://www.lrec-conf.org/proceedings/lrec2006/pdf/146_pdf.pdf Central Ontologies are increasingly important to manage interoperability between different types of language resources. This was the reason for ISO to set up a new committee ISO TC37/SC4 taking care of language resource management issues. Central to the work of this committee is the definition of a framework for a central registry of data categories that are important in the domain of language resources. This paper describes an application programming interface that was designed to request services from this data category registry. The DCR is operational and the described API has already been tested from a lexicon application. kemps-snijders-etal-2006-api - PeterWittenburg - DaanBroeder - WolfgangKlein + PeterWittenburg + DaanBroeder + WolfgangKlein StephenLevinson - LaurentRomary + LaurentRomary Foundations of Modern Language Resource Archives http://www.lrec-conf.org/proceedings/lrec2006/pdf/147_pdf.pdf A number of serious reasons will convince an increasing amount of researchers to store their relevant material in centers which we will call "language resource archives". They combine the duty of taking care of long-term preservation as well as the task to give access to their material to different user groups. Access here is meant in the sense that an active interaction with the data will be made possible to support the integration of new data, new versions or commentaries of all sorts. Modern Language Resource Archives will have to adhere to a number of basic principles to fulfill all requirements and they will have to be involved in federations to create joint language resource domains making it even simpler for the researchers to access the data. This paper makes an attempt to formulate the essential pillars language resource archives have to adhere to. @@ -715,10 +715,10 @@ FreddyOffenga - DaanBroeder - PeterWittenburg + DaanBroeder + PeterWittenburg JulienDucret - LaurentRomary + LaurentRomary Metadata Profile in the <fixed-case>ISO</fixed-case> Data Category Registry http://www.lrec-conf.org/proceedings/lrec2006/pdf/148_pdf.pdf Metadata descriptions of language resources become an increasing necessity since the shear amount of language resources is increasing rapidly and especially since we are now creating infrastuctures to access these resources via the web through integrated domains of language resource archives. Yet, the metadata frameworks offered for the domain of language resources (IMDI and OLAC), although mature, are not as widely accepted as necessary. The lack of confidence in the stability and persistence of the concepts and formats introduced by these metadata sets seems to be one argument for people to not invest the time needed for metadata creation. The introduction of these concepts into an ISO standardization process may convince contributors to make use of the terminology. The availability of the ISO Data Category Registry that includes a metadata profile will also offer the opportunity for researchers to construct their own metadata set tailored to the needs of the project at hand, but nevertheless supporting interoperability. @@ -727,14 +727,14 @@ MarcKemps-Snijders Mark-JanNederhof - PeterWittenburg + PeterWittenburg <fixed-case>LEXUS</fixed-case>, a web-based tool for manipulating lexical resources lexicon http://www.lrec-conf.org/proceedings/lrec2006/pdf/149_pdf.pdf LEXUS provides a flexible framework for the maintaining lexical structure and content. It is the first implementation of the Lexical Markup Framework model currently being developed at ISO TC37/SC4. Amongst its capabilities are the possibility to create lexicon structures, manipulate content and use of typed relations. Integration of well established Data Category Registries is supported to further promote interoperability by allowing access to well established linguistic concepts. Advanced linguistic functionality is offered to assist users in cross lexica operations such as search and comparison and merging of lexica. To enable use within various user groups the look and feel of each lexicon may be customized. In the near future more functionality will be added including integration with other tools accessing lexical content. kemps-snijders-etal-2006-lexus - TomažErjavec + TomažErjavec DarjaFišer Building <fixed-case>S</fixed-case>lovene <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et http://www.lrec-conf.org/proceedings/lrec2006/pdf/150_pdf.pdf @@ -758,10 +758,10 @@ johansson-nugues-2006-construction - PeterWittenburg - HennieBrugman - AlbertRussel - AlexKlassmann + PeterWittenburg + HennieBrugman + AlbertRussel + AlexKlassmann HanSloetjes <fixed-case>ELAN</fixed-case>: a Professional Framework for Multimodality Research http://www.lrec-conf.org/proceedings/lrec2006/pdf/153_pdf.pdf @@ -772,8 +772,8 @@ PeterBerck Hans-JörgBibiko MarcKemps-Snijders - AlbertRussel - PeterWittenburg + AlbertRussel + PeterWittenburg Ontology-based Language Archive Utilization http://www.lrec-conf.org/proceedings/lrec2006/pdf/154_pdf.pdf At the MPI for Psycholinguistics a large archive with language resources has been created with contributions from many different individual researchers and research projects. All of these resources, in particular annotated media streams and multimedia lexica, are accessible via the web and can be utilized with the help of web-based utilization frameworks. Therefore, the archive lends itself to motivate users to operate across the boundaries of single corpora and to support cross-language work. This, however, can only be done when the problems of interoperability, in particular at the level of linguistic encoding, can be solved in an efficient way. Two Max-Planck-Institutes are cooperating to build a framework that allows users to easily create their own practical ontologies and if wanted to relate their concepts to central ontologies. @@ -791,8 +791,8 @@ AndrejŽgank DarinkaVerdonik - Aleksandra ZöglingMarkuš - ZdravkoKačič + Aleksandra ZöglingMarkuš + ZdravkoKačič <fixed-case>SINOD</fixed-case> - <fixed-case>S</fixed-case>lovenian non-native speech database http://www.lrec-conf.org/proceedings/lrec2006/pdf/163_pdf.pdf This paper presents the SINOD database, which is the first Slovenian non-native speech database. It will be used to improve the performance of large vocabulary continuous speech recogniser for non-native speakers. The main quality impact is expected for acoustic models and recogniser’s vocabulary. The SINOD database is designed as supplement to the Slovenian BNSI Broadcast News database. The same BN recommendations were used for both databases. Two interviews with non-native Slovenian speakers were incorporated in the set. Both non-native speakers were female, whereas the journalist was Slovenian native male speaker. The transcription approach applied in the production phase is presented. Different statistics and analyses of database are given in the paper. @@ -808,7 +808,7 @@ van-assem-etal-2006-conversion - Antalvan den Bosch + Antalvan den Bosch InekeSchuurman VincentVandeghinste Transferring <fixed-case>P</fixed-case>o<fixed-case>S</fixed-case>-tagging and lemmatization tools from spoken to written <fixed-case>D</fixed-case>utch corpus development @@ -817,17 +817,17 @@ van-den-bosch-etal-2006-transferring - MarkPrzybocki - GregorySanders - AudreyLe + MarkPrzybocki + GregorySanders + AudreyLe Edit Distance: A Metric for Machine Translation Evaluation http://www.lrec-conf.org/proceedings/lrec2006/pdf/168_pdf.pdf NIST has coordinated machine translation (MT) evaluations for several years using an automatic and repeatable evaluation measure. Under the Global Autonomous Language Exploitation (GALE) program, NIST is tasked with implementing an edit-distance-based evaluation of MT. Here “edit distance” is defined to be the number of modifications a human editor is required to make to a system translation such that the resulting edited translation contains the complete meaning in easily understandable English, as a single high-quality human reference translation. In preparation for this change in evaluation paradigm, NIST conducted two proof-of-concept exercises specifically designed to probe the data space, to answer questions related to editor agreement, and to establish protocols for the formal GALE evaluations. We report here our experimental design, the data used, and our findings for these exercises. przybocki-etal-2006-edit - Niels OleBernsen - LailaDybkjær + Niels OleBernsen + LailaDybkjær SvendKiilerich <fixed-case>H</fixed-case>. <fixed-case>C</fixed-case>. Andersen Conversation Corpus http://www.lrec-conf.org/proceedings/lrec2006/pdf/169_pdf.pdf @@ -842,10 +842,10 @@ sahlgren-2006-towards - AndreiPopescu-Belis + AndreiPopescu-Belis PaulaEstrella MargaretKing - NancyUnderwood + NancyUnderwood A Model for Context-Based Evaluation of Language Processing Systems and its Application to Machine Translation Evaluation http://www.lrec-conf.org/proceedings/lrec2006/pdf/171_pdf.pdf In this paper, we propose a formal framework that takes into account the influence of the intended context of use of an NLP system on the procedure and the metrics used to evaluate the system. We introduce in particular the notion of a context-dependent quality model and explain how it can be adapted to a given context of use. More specifically, we define vector-space representations of contexts of use and of quality models, which are connected by a generic contextual quality model (GCQM). For each domain, experts in evaluation are needed to build a GCQM based on analytic knowledge and on previous evaluations, using the mechanism proposed here. The main inspiration source for this work is the FEMTI framework for the evaluation of machine translation, which implements partly the present model, and which is described briefly along with insights from other domains. @@ -853,7 +853,7 @@ MartinForst - Ronald M.Kaplan + Ronald M.Kaplan The importance of precise tokenizing for deep grammars http://www.lrec-conf.org/proceedings/lrec2006/pdf/172_pdf.pdf We present a non-deterministic finite-state transducer that acts as a tokenizer and normalizer for free text that is input to a broad-coverage LFG of German. We compare the basic tokenizer used in an earlier version of the grammar and the more sophisticated tokenizer that we now use. The revised tokenizer increases the coverage of the grammar in terms of full parses from 68.3% to 73.4% on sentences 8,001 through 10,000 of the TiGer Corpus. @@ -877,7 +877,7 @@ BerntAndrassy - HaraldHoege + HaraldHoege Human and machine recognition as a function of <fixed-case>SNR</fixed-case> http://www.lrec-conf.org/proceedings/lrec2006/pdf/179_pdf.pdf In-car automatic speech recognition (ASR) is usually evaluated behaviour for different levels of noise. Yet this is interesting for car manufacturers in order to predict system performances for different speeds and different car models and thus allow to design speech based applications in a better way. It therefore makes sense to split the single WER into SNR dependent WERs, where SNR stands for the signal to noise ratio, which is an appropriate measure for the noise level. In this paper a SNR measure based on the concept of the Articulation Index is developed, which allows the direct comparison with human recognition performance. @@ -919,7 +919,7 @@ AgnesLisowska - Nancy L.Underwood + Nancy L.Underwood <fixed-case>ROTE</fixed-case>: A Tool to Support Users in Defining the Relative Importance of Quality Characteristics http://www.lrec-conf.org/proceedings/lrec2006/pdf/187_pdf.pdf This paper describes the Relative Ordering Tool for Evaluation (ROTE) which is designed to support the process of building a parameterised quality model for evaluation. It is a very simple tool which enables users to specify the relative importance of quality characteristics (and associated metrics) to reflect the users' particular requirements. The tool allows users to order any number of quality characteristics by comparing them in a pair-wise fashion. The tool was developed in the context of a collaborative project developing a text mining system. A full scale evaluation of the text mining system was designed and executed for three different users and the ROTE tool was successfully applied by those users during that process. The tool will be made available for general use by the evaluation community. @@ -928,16 +928,16 @@ SergeSharoff BogdanBabych - AnthonyHartley + AnthonyHartley Using collocations from comparable corpora to find translation equivalents http://www.lrec-conf.org/proceedings/lrec2006/pdf/190_pdf.pdf In this paper we present a tool for finding appropriate translation equivalents for words from the general lexicon using comparable corpora. For a phrase in the source language the tool suggests arange of possible expressions used in similar contexts in target language corpora. In the paper we discuss the method and present results of human evaluation of the performance of the tool. sharoff-etal-2006-using - Folkertde Vriend - LouBoves - Henkvan den Heuvel + Folkertde Vriend + LouBoves + Henkvan den Heuvel Roelandvan Hout JoepKruijsen JosSwanenberg @@ -955,7 +955,7 @@ ui-dhonnchadha-van-genabith-2006-part - VéroniqueMoriceau + VéroniqueMoriceau Language Challenges for Data Fusion in Question-Answering http://www.lrec-conf.org/proceedings/lrec2006/pdf/194_pdf.pdf Search engines on the web and most existing question-answering systems provide the user with a set of hyperlinks and/or web page extracts containing answer(s) to a question. These answers are often incoherent to a certain degree (equivalent, contradictory, etc.). It is then quite difficult for the user to know which answer is the correct one. In this paper, we present an approach which aims at providing synthetic numerical answers in a question-answering system. These answers are generated in natural language and, in a cooperative perspective, the aim is to explain to the user the variation of numerical values when several values, apparently incoherent, are extracted from the web as possible answers to a question. We present in particular how lexical resources are essential to answer extraction from the web, to the characterization of the variation mode associated with the type of information and to answer generation in natural language. @@ -969,17 +969,17 @@ sarmento-2006-baco - UlrichSchäfer + UlrichSchäfer <fixed-case>O</fixed-case>nto<fixed-case>NER</fixed-case>d<fixed-case>IE</fixed-case> – Mapping and Linking Ontologies to Named Entity Recognition and Information Extraction Resources http://www.lrec-conf.org/proceedings/lrec2006/pdf/196_pdf.pdf Semantic Web and NLP We describe an implemented offline procedure that maps OWL/RDF-encoded ontologies with large, dynamically maintained instance data to named entity recognition (NER) and information extraction (IE) engine resources, preserving hierarchical concept information and links back to the ontology concepts and instances. The main motivations are (i) improving NER/IE precision and recall in closed domains, (ii) exploiting linguistic knowledge (context, inflection, anaphora) for identifying ontology instances in texts more robustly, (iii) giving full access to ontology instances and concepts in natural language processing results, e.g. for subsequent ontology queries, navigation or inference, (iv) avoiding duplication of work in development and maintenance of similar resources in independent places, namely lingware and ontologies. We show an application in hybrid deep-shallow natural language processing that is e.g. used for question analysis in closed domains. Further applications could be automatic hyperlinking or other innovative semantic-web related applications. schafer-2006-ontonerdie - Jonathan G.Fiscus + Jonathan G.Fiscus JeromeAjot NicolasRadde - ChristopheLaprun + ChristopheLaprun Multiple Dimension <fixed-case>L</fixed-case>evenshtein Edit Distance Calculations for Evaluating Automatic Speech Recognition Systems During Simultaneous Speech http://www.lrec-conf.org/proceedings/lrec2006/pdf/197_pdf.pdf Since 1987, the National Institute of Standards and Technology has been providing evaluation infrastructure for the Automatic Speech Recognition (ASR), and more recently referred to as the Speech-To-Text (STT), research community. From the first efforts in the Resource Management domain to the present research, the NIST SCoring ToolKit (SCTK) has formed the tool set for system developers to make continued progress in many domains; Wall Street Journal, Conversational Telephone Speech (CTS), Broadcast News (BN), and Meetings (MTG) to name a few. For these domains, the community agreed to declared sections of simultaneous speech as “not scoreable”. While this had minor impact on most of these domains, the highly interactive nature of Meeting speech rendered a very large fraction of the test material not scoreable. This paper documents a multi-dimensional extension of the Dynamic Programming solution to Levenshtein Edit Distance calculations capable of evaluating STT systems during periods of overlapping, simultaneous speech. @@ -998,7 +998,7 @@ atserias-etal-2006-freeling - JiříSemecký + JiříSemecký On Automatic Assignment of Verb Valency Frames in <fixed-case>C</fixed-case>zech http://www.lrec-conf.org/proceedings/lrec2006/pdf/199_pdf.pdf Many recent NLP applications, including machine translation and information retrieval, could benefit from semantic analysis of language data on the sentence level. This paper presents a method for automatic disambiguation of verb valency frames on Czech data. For each verb occurrence, we extracted features describing its local context. We experimented with diverse types of features, including morphological, syntax-based, idiomatic, animacy and WordNet-based features. The main contribution of the paper lies in determining which ones are most useful for the disambiguation task. The considered features were classified using decision trees, rule-based learning and a Naïve Bayes classifier. We evaluated the methods using 10-fold cross-validation on VALEVAL, a manually annotated corpus of frame annotations containing 7,778 sentences. Syntax-based features have shown to be the most effective. When we used the full set of features, we achieved an accuracy of 80.55% against the baseline 67.87% obtained by assigning the most frequent frame. @@ -1012,7 +1012,7 @@ medlock-2006-introduction - HennieBrugman + HennieBrugman VéroniqueMalaisé LuitGazendam A Web Based General Thesaurus Browser to Support Indexing of Television and Radio Programs @@ -1023,7 +1023,7 @@ JuditFeliu JorgeVivaldi - M. TeresaCabré + M. TeresaCabré <fixed-case>SKELETON</fixed-case>: Specialised knowledge retrieval on the basis of terms and conceptual relations http://www.lrec-conf.org/proceedings/lrec2006/pdf/203_pdf.pdf The main goal of this paper is to present a first approach to an automatic detection of conceptual relations between two terms in specialised written text. Previous experiments on the basis of the manual analysis lead the authors to implement an automatic query strategy combining the term candidates proposed by an extractor together with a list of verbal syntactic patterns used for the relations refinement. Next step on the research will be the integration of the results into the term extractor in order to attain more restrictive pieces of information directly reused for the ontology building task. @@ -1031,7 +1031,7 @@ IreneCramer - Jochen L.Leidner + Jochen L.Leidner DietrichKlakow Building an Evaluation Corpus for <fixed-case>G</fixed-case>erman Question Answering by Harvesting <fixed-case>W</fixed-case>ikipedia http://www.lrec-conf.org/proceedings/lrec2006/pdf/206_pdf.pdf @@ -1048,8 +1048,8 @@ BenjaminWaldron AnnCopestake - UlrichSchäfer - BerndKiefer + UlrichSchäfer + BerndKiefer Preprocessing and Tokenisation Standards in <fixed-case>DELPH</fixed-case>-<fixed-case>IN</fixed-case> Tools http://www.lrec-conf.org/proceedings/lrec2006/pdf/214_pdf.pdf We discuss preprocessing and tokenisation standards within DELPH-IN, a large scale open-source collaboration providing multiple independent multilingual shallow and deep processors. We discuss (i) a component-specific XML interface format which has been used for some time to interface preprocessor results to the PET parser, and (ii) our implementation of a more generic XML interface format influenced heavily by the (ISO working draft) Morphosyntactic Annotation Framework (MAF). Our generic format encapsulates the information which may be passed from the preprocessing stage to a parser: it uses standoff-annotation, a lattice for the representation of structural ambiguity, intra-annotation dependencies and allows for highly structured annotation content. This work builds on the existing Heart of Gold middleware system, and previous work on Robust Minimal Recursion Semantics (RMRS) as part of an inter-component interface. We give examples of usage with a number of the DELPH-IN processing components and deep grammars. @@ -1057,9 +1057,9 @@ JuriApresjan - IgorBoguslavsky + IgorBoguslavsky BorisIomdin - LeonidIomdin + LeonidIomdin AndreiSannikov VictorSizov A Syntactically and Semantically Tagged Corpus of <fixed-case>R</fixed-case>ussian: State of the Art and Prospects @@ -1110,9 +1110,9 @@ nivre-etal-2006-talbanken05 - OanaPostolache + OanaPostolache DanCristea - ConstantinOrasan + ConstantinOrasan Transferring Coreference Chains through Word Alignment http://www.lrec-conf.org/proceedings/lrec2006/pdf/224_pdf.pdf This paper investigates the problem of automatically annotating resources with NP coreference information using a parallel corpus, English-Romanian, in order to transfer, through word alignment, coreference chains from the English part to the Romanian part of the corpus. The results show that we can detect Romanian referential expressions and coreference chains with over 80% F-measure, thus using our method as a preprocessing step followed by manual correction as part of an annotation effort for creating a large Romanian corpus with coreference information is worthwhile. @@ -1142,10 +1142,10 @@ OlenaMedelyan - StefanSchulz + StefanSchulz JanPaetzold MichaelPoprat - KornélMarkó + KornélMarkó Language Specific and Topic Focused Web Crawling http://www.lrec-conf.org/proceedings/lrec2006/pdf/228_pdf.pdf We describe an experiment on collecting large language and topic specific corpora automatically by using a focused Web crawler. Our crawler combines efficient crawling techniques with a common text classification tool. Given a sample corpus of medical documents, we automatically extract query phrases and then acquire seed URLs with a standard search engine. Starting from these seed URLs, the crawler builds a new large collection consisting only of documents that satisfy both the language and the topic model. The manual analysis of acquired English and German medicine corpora reveals the high accuracy of the crawler. However, there are significant differences between both languages. @@ -1159,7 +1159,7 @@ reynaert-2006-corpus - AndreiPopescu-Belis + AndreiPopescu-Belis MariaGeorgescul <fixed-case>TQB</fixed-case>: Accessing Multimodal Data Using a Transcript-based Query and Browsing Interface http://www.lrec-conf.org/proceedings/lrec2006/pdf/233_pdf.pdf @@ -1168,8 +1168,8 @@ FengPan - RutuMulkar - Jerry R.Hobbs + RutuMulkar + Jerry R.Hobbs An Annotated Corpus of Typical Durations of Events http://www.lrec-conf.org/proceedings/lrec2006/pdf/234_pdf.pdf In this paper, we present our work on generating an annotated corpus for extracting information about the typical durations of events from texts. We include the annotation guidelines, the event classes we categorized, the way we use normal distributions to model vague and implicit temporal information, and how we evaluate inter-annotator agreement. The experimental results show that our guidelines are effective in improving the inter-annotator agreement. @@ -1177,7 +1177,7 @@ AgamPatel - Dragomir R.Radev + Dragomir R.Radev Lexical similarity can distinguish between automatic and manual translations http://www.lrec-conf.org/proceedings/lrec2006/pdf/235_pdf.pdf We consider the problem of identifying automatic translations from manual translations of the same sentence. Using two different similarity metrics (BLEU and Levenshtein edit distance), we found out that automatic translations are closer to each other than they are to manual translations. We also use phylogenetic trees to provide a visual representation of the distances between pairs of individual sentences in a set of translations. The differences in lexical distance are statistically significant, both for Chinese to English and for Arabic to English translations. @@ -1206,8 +1206,8 @@ DoaaSamy - Antonio MorenoSandoval - José M.Guirao + Antonio MorenoSandoval + José M.Guirao EnriqueAlfonseca Building a Parallel Multilingual Corpus (<fixed-case>A</fixed-case>rabic-<fixed-case>S</fixed-case>panish-<fixed-case>E</fixed-case>nglish) http://www.lrec-conf.org/proceedings/lrec2006/pdf/238_pdf.pdf @@ -1215,16 +1215,16 @@ samy-etal-2006-building - Donna K.Byron - EricFosler-Lussier + Donna K.Byron + EricFosler-Lussier The <fixed-case>OSU</fixed-case> Quake 2004 corpus of two-party situated problem-solving dialogs http://www.lrec-conf.org/proceedings/lrec2006/pdf/241_pdf.pdf This report describes the Ohio State University Quake 2004 corpus of English spontaneous task-oriented two-person situated dialog. The corpus was collected using a first-person display of an interior space (rooms, corridors, stairs) in which the partners collaborate on a treasure hunt task. The corpus contains exciting new features such as deictic and exophoric reference, language that is calibrated against the spatial arrangement of objects in the world, and partial-observability of the task world imposed by the perceptual limitations inherent in the physical arrangement of the world. The corpus differs from prior dialog collections which intentionally restricted the interacting subjects from sharing any perceptual context, and which allowed one subject (the direction-giver or system) to have total knowledge of the state of the task world. The corpus consists of audio/video recordings of each person's experience in the virtual world and orthographic transcriptions. The virtual world can also be used by other researchers who want to conduct additional studies using this stimulus. byron-fosler-lussier-2006-osu - Md. AminulIslam - DianaInkpen + Md. AminulIslam + DianaInkpen Second Order Co-occurrence <fixed-case>PMI</fixed-case> for Determining the Semantic Similarity of Words http://www.lrec-conf.org/proceedings/lrec2006/pdf/242_pdf.pdf This paper presents a new corpus-based method for calculating the semantic similarity of two target words. Our method, called Second Order Co-occurrencePMI (SOC-PMI), uses Pointwise Mutual Information to sort lists of important neighbor words of the two target words. Then we consider the words which are common in both lists and aggregate their PMI values (from the opposite list) to calculate the relative semantic similarity. Our method was empirically evaluated using Miller and Charler’s (1991) 30 noun pair subset, Ruben-stein and Goodenough’s (1965) 65 noun pairs, 80 synonym test questions from the Test of English as a Foreign Language (TOEFL), and 50 synonym test questions from a collection of English as a Second Language (ESL) tests. Evaluation results show that our method outperforms several competing corpus-based methods. @@ -1248,7 +1248,7 @@ VáclavNovák - JanHajič + JanHajič Perspectives of Turning <fixed-case>P</fixed-case>rague Dependency Treebank into a Knowledge Base http://www.lrec-conf.org/proceedings/lrec2006/pdf/245_pdf.pdf Recently, the Prague Dependency Treebank 2.0 (PDT 2.0) has emerged as the largest text corpora annotated on the level of tectogrammatical representation (“linguistic meaning”) described in Sgall et al. (2004) and containing about 0.8 milion words (see Hajic (2004)). We hope that this level of annotation is so close to the meaning of the utterances contained in the corpora that it should enable us to automatically transform texts contained in the corpora to the form of knowledge base, usable for information extraction, question answering, summarization, etc. We can use Multilayered Extended Semantic Networks (MultiNet) described in Helbig (2006) as the target formalism. In this paper we discuss the suitability of such approach and some of the main issues that will arise in the process. In section 1, we introduce formalisms underlying PDT 2.0 and MultiNet, in section 2. We describe the role MultiNet can play in the system of Functional Generative Description (FGD), section 3 discusses issues of automatic conversion to MultiNet and section 4 gives some conclusions. @@ -1270,7 +1270,7 @@ QianYang Jean-PierreMartens NannekeKonings - Henkvan den Heuvel + Henkvan den Heuvel Development of a phoneme-to-phoneme (p2p) converter to improve the grapheme-to-phoneme (g2p) conversion of names http://www.lrec-conf.org/proceedings/lrec2006/pdf/248_pdf.pdf It is acknowledged that a good phonemic transcription of proper names is imperative for the success of many modern speech-based services such as directory assistance, car navigation, etc. It is also known that state-of-the-art general-purpose grapheme-to-phoneme (g2p) converters perform rather poorly on many name categories. This paper proposes to use a g2p-p2p tandem comprising a state-of-the-art general-purpose g2p converter that produces an initial transcription and a name category specific phoneme-to-phoneme (p2p) converter that aims at correcting the mistakes made by the g2p converter. The main body of the paper describes a novel methodology for the automatic construction of the p2p converter. The methodology is implemented in a software toolbox that will be made publicly available in a form that will permit the user to design a p2p converter for an arbitrary name category. To give a proof of concept, the toolbox was used for the development of three p2p converters for first names, surnames and geographical names respectively. The obtained systems are small (few rules) and effective: significant improvements (up to 50% relative) of the grapheme-to-phoneme conversion are obtained. These encouraging results call for a further development and improvement of the approach. @@ -1279,15 +1279,15 @@ JaeyoungJung MakiMiyake - HiroyukiAkam + HiroyukiAkam Recurrent <fixed-case>M</fixed-case>arkov Cluster (<fixed-case>RMCL</fixed-case>) Algorithm for the Refinement of the Semantic Network http://www.lrec-conf.org/proceedings/lrec2006/pdf/249_pdf.pdf The purpose of this work is to propose a new methodology to ameliorate the Markov Cluster (MCL) Algorithm that is well known as an efficient way of graph clustering (Van Dongen, 2000). The MCL when applied to a graph of word associations has the effect of producing concept areas in which words are grouped into the similar topics or similar meanings as paradigms. However, since a word is determined to belong to only one cluster that represents a concept, Markov clusters cannot show the polysemy or semantic indetermination among the properties of natural language. Our Recurrent MCL (RMCL) allows us to create a virtual adjacency relationship among the Markov hard clusters and produce a downsized and intrinsically informative semantic network of word association data. We applied one of the RMCL algorithms (Stepping-stone type) to a Japanese associative concept dictionary and obtained a satisfactory level of performance in refining the semantic network generated from MCL. jung-etal-2006-recurrent - CatiaCucchiarini - HugoVan hamme + CatiaCucchiarini + HugoVan hamme Olgavan Herwijnen FelixSmits <fixed-case>JASMIN</fixed-case>-<fixed-case>CGN</fixed-case>: Extension of the Spoken <fixed-case>D</fixed-case>utch Corpus with Speech of Elderly People, Children and Non-natives in the Human-Machine Interaction Modality @@ -1296,7 +1296,7 @@ cucchiarini-etal-2006-jasmin - CédrickFairon + CédrickFairon SébastienPaumier A framework for real-time dictionary updating http://www.lrec-conf.org/proceedings/lrec2006/pdf/255_pdf.pdf @@ -1304,8 +1304,8 @@ fairon-paumier-2006-framework - VicenteAlabau - Carlos D.Martínez + VicenteAlabau + Carlos D.Martínez Bilingual speech corpus in two phonetically similar languages http://www.lrec-conf.org/proceedings/lrec2006/pdf/256_pdf.pdf As Speech Recognition Systems improve, they become suitable for facingnew problems. Multilingual speech recognition is one such problems. In the present work, the case of the Comunitat Valenciana multilingual environment is studied. The official languages in the Comunitat Valenciana (Spanish and Valencian) share most of their acoustic units, and their vocabularies and syntax are quite similar. They have influenced each other for many years.A small corpus on an Information System task was developed for experimentationpurposes.This choice will make it possible to develop a working prototype in the future,and it is simple enough to build semi-automatic language models. The design of the acoustic corpus is discussed, showing that all combinations of accents have been studied (native, non-native speakers, male, female, etc.). @@ -1315,7 +1315,7 @@ VincentVandeghinste InekeSchuurman MichaelCarl - StellaMarkantonatou + StellaMarkantonatou ToniBadia <fixed-case>METIS</fixed-case>-<fixed-case>II</fixed-case>: Machine Translation for Low Resource Languages http://www.lrec-conf.org/proceedings/lrec2006/pdf/258_pdf.pdf @@ -1323,10 +1323,10 @@ vandeghinste-etal-2006-metis - ElisabethD’Halleweyn + ElisabethD’Halleweyn JanOdijk - LisanneTeunissen - CatiaCucchiarini + LisanneTeunissen + CatiaCucchiarini The <fixed-case>D</fixed-case>utch-<fixed-case>F</fixed-case>lemish <fixed-case>HLT</fixed-case> Programme <fixed-case>STEVIN</fixed-case>: Essential Speech and Language Technology Resources http://www.lrec-conf.org/proceedings/lrec2006/pdf/259_pdf.pdf In 2004 a consortium of ministries and organizations in the Netherlands and Flanders launched the comprehensive Dutch-Flemish HLT programme STEVIN (a Dutch acronym for “Essential Speech and Language Technology Resources”). To guarantee its Dutch-Flemish character, this large-scale programme is carried out under the auspices of the intergovernmental Dutch Language Union (NTU). The aim of STEVIN is to contribute to the further progress of HLT for the Dutch language, by raising awareness of HLT results, stimulating the demand of HLT products, promoting strategic research in HLT, and developing HLT resources that are essential and are known to be missing. Furthermore, a structure was set up for the management, maintenance and distribution of HLT resources. The STEVIN programme, which will run from 2004 to 2009, resulted from HLT activities in the Dutch language area, which were reported on at previous LREC conferences (2000, 2002, 2004). In this paper we will explain how different activities are combined in one comprehensive programme. We will show how cooperation can successfully be realized between different parties (language and speech technology, Flanders and the Netherlands, academia, industry and policy institutions) so as to achieve one common goal: progress in HLT. @@ -1350,7 +1350,7 @@ goecke-witt-2006-exploiting - CédrickFairon + CédrickFairon SébastienPaumier A translated corpus of 30,000 <fixed-case>F</fixed-case>rench <fixed-case>SMS</fixed-case> http://www.lrec-conf.org/proceedings/lrec2006/pdf/270_pdf.pdf @@ -1369,7 +1369,7 @@ AnnaSinopalnikova - PavelSmrž + PavelSmrž Intelligent Dictionary Interfaces: Usability Evaluation of Access-Supporting Enhancements http://www.lrec-conf.org/proceedings/lrec2006/pdf/275_pdf.pdf The present paper describes psycholinguistic experiments aimed at exploring the way people behave while accessing electronic dictionaries. In our work we focused on the access by meaning that, in comparison with the access by form, is currently less studied and very seldom implemented in modern dictionary interfaces. Thus, the goal of our experiments was to explore dictionary users’ requirements and to study what services an intelligent dictionary interface should be able to supply to help solving access by meaning problems. We tested several access-supporting enhancements of electronic dictionaries based on various language resources (corpora, wordnets, word association norms and explanatory dictionaries). Experiments were carried out with native speakers of three European languages – English, Czech and Russian. Results for monolingual and bilingual cases are presented. @@ -1385,8 +1385,8 @@ mogele-etal-2006-smartweb - MarkétaLopatková - ZdeněkŽabokrtský + MarkétaLopatková + ZdeněkŽabokrtský KarolinaSkwarska Valency Lexicon of <fixed-case>C</fixed-case>zech Verbs: Alternation-Based Model http://www.lrec-conf.org/proceedings/lrec2006/pdf/278_pdf.pdf @@ -1424,7 +1424,7 @@ kawtrakul-etal-2006-ontology - CorinaForăscu + CorinaForăscu Ionuț CristianPistol DanCristea Temporality in relation with discourse structure @@ -1433,15 +1433,15 @@ forascu-etal-2006-temporality - EvaHajičová - PetrSgall + EvaHajičová + PetrSgall Corpus Annotation as a Test of a Linguistic Theory http://www.lrec-conf.org/proceedings/lrec2006/pdf/283_pdf.pdf In the present contribution we claim that corpus annotation serves, among other things, as an invaluable test for linguistic theories standing behind the annotation schemes, and as such represents an irreplaceable resource of linguistic information for the build-up of grammars. To support this claim we present four linguistic phenomena for the study and relevant description of which in grammar a deep layer of corpus annotation as introduced in the Prague Dependency Treebank has brought important observations, namely the information structure of the sentence, condition of projectivity and word order, types of dependency relations and textual coreference. hajicova-sgall-2006-corpus - OndřejBojar + OndřejBojar MagdelenaProkopová <fixed-case>C</fixed-case>zech-<fixed-case>E</fixed-case>nglish Word Alignment http://www.lrec-conf.org/proceedings/lrec2006/pdf/285_pdf.pdf @@ -1449,7 +1449,7 @@ bojar-prokopova-2006-czech - EmilianoGuevara + EmilianoGuevara SergioScalise AntoniettaBisetto ChiaraMelloni @@ -1467,7 +1467,7 @@ sonntag-romanelli-2006-multimodal - NicoleGrégoire + NicoleGrégoire Elaborating the parameterized Equivalence Class Method for <fixed-case>D</fixed-case>utch http://www.lrec-conf.org/proceedings/lrec2006/pdf/292_pdf.pdf This paper discusses the parameterized Equivalence Class Method for Dutch, an approach developed to incorporate standard lexical representations for Dutch idioms into representations required by any specific NLP system with as minimal manual work as possible. The purpose of the paper is to give an overview of parameters applicable to Dutch, which are determined by examining a large set of data and two Dutch NLP systems. The effects of the introduced parameters are evaluated and the results presented. @@ -1477,14 +1477,14 @@ AndersGreen HelgeHüttenrauch Elin AnnaTopp - KerstinSeverinson + KerstinSeverinson Developing a <fixed-case>C</fixed-case>ontextualized<fixed-case>M</fixed-case>ultimodal Corpus for Human-Robot Interaction http://www.lrec-conf.org/proceedings/lrec2006/pdf/293_pdf.pdf This paper describes the development process of a contextualized corpus for research on Human-Robot Communication. The data have been collected in two Wizard-of-Oz user studies performedwith 22 and 5 users respectively in a scenario that is called the HomeTour. In this scenario the users show the environment (a single room, or a whole floor) to the robot using a combination of speech and gestures. The corpus has been transcribed and annotated with respect to gestures and conversational acts, thus forming a core annotation. We have also annotated or linked other types of data, e.g., laser range finder readings, positioning analysis, questionnaire data and task descriptions that form the annotated context of the scenario. By providing a rich set of different annotated data, thecorpus is thus an important resource both for research on natural language speech interfaces for robots and for research on human-robot communication in general. green-etal-2006-developing - Wei-YunMa + Wei-YunMa Chu-RenHuang Uniform and Effective Tagging of a Heterogeneous Giga-word Corpus http://www.lrec-conf.org/proceedings/lrec2006/pdf/294_pdf.pdf @@ -1494,14 +1494,14 @@ Ana-MariaBarbu EmilIonescu - Verginica BarbuMititelu + Verginica BarbuMititelu <fixed-case>R</fixed-case>omanian Valence Dictionary in <fixed-case>XML</fixed-case> Format http://www.lrec-conf.org/proceedings/lrec2006/pdf/295_pdf.pdf Valence dictionaries are dictionaries in which logical predicates (most of the times verbs) are inventoried alongside with the semantic and syntactic information regarding the role of the arguments with which they combine, as well as the syntactic restrictions these arguments have to obey. In this article we present the incipient stage of the project “Syntactic and semantic database in XML format: an HPSG representation of verb valences in Romanian”. Its aim is the development of a valence dictionary in XML format for a set of 3000 Romanian verbs. Valences are specified for each sense of each verb, alongside with an illustrative example, possible argument alternations and a set of multiword expressions in which the respective verb occurs with the respective sense. The grammatical formalism we make use of is Head-driven Phrase Structure Grammar, which offers one of the most comprehensive frames of encoding various types of linguistic information for lexical items. XML is the most appropriate mark-up language for describing information structured in HPSG framework. The project can be further on extended so that to cover all Romanian verbs (around 7000) and also other predicates (nouns, adjectives, prepositions). barbu-etal-2006-romanian - Niels OleBernsen + Niels OleBernsen Thomas K.Hansen SvendKiilerich Torben KruchovMadsen @@ -1513,7 +1513,7 @@ WeiLi WenjieLi - QinLu + QinLu Mining Implicit Entities in Queries http://www.lrec-conf.org/proceedings/lrec2006/pdf/297_pdf.pdf Entities are pivotal in describing events and objects, and also very important in Document Summarization. In general only explicit entities which can be extracted by a Named Entity Recognizer are used in real applications. However, implicit entities hidden behind the phrases or words, e.g. entity referred by the phrase “cross border”, are proved to be helpful in Document Summarization. In our experiment, we extract the implicit entities from the web resources. @@ -1550,8 +1550,8 @@ areta-etal-2006-structure - JosephPolifroni - MarilynWalker + JosephPolifroni + MarilynWalker Learning Database Content for Spoken Dialogue System Design http://www.lrec-conf.org/proceedings/lrec2006/pdf/301_pdf.pdf Spoken dialogue systems are common interfaces to backend data in information retrieval domains. As more data is made available on the Web and IE technology matures, dialogue systems, whether they be speech- or text-based, will be more in demand to provide user-friendly access to this data. However, dialogue systems must become both easier to configure, as well as more informative than the traditional form-based systems that are currently available. We present techniques in this paper to address the issue of automating both content selection for use in summary responses and in system initiative queries. @@ -1559,7 +1559,7 @@ JorgeCivera - AlfonsJuan + AlfonsJuan Bilingual Machine-Aided Indexing http://www.lrec-conf.org/proceedings/lrec2006/pdf/304_pdf.pdf The proliferation of multilingual documentation in our Information Society has become a common phenomenon. This documentation is usually categorised by hand, entailing a time-consuming and arduous burden. This is particularly true in the case of keyword assignment, in which a list of keywords (descriptors) from a controlled vocabulary (thesaurus) is assigned to a document. A possible solution to alleviate this problem comes from the hand of the so-called Machine-Aided Indexing (MAI) systems. These systems work in cooperation with professional indexer by providing a initial list of descriptors from which those most appropiated will be selected. This way of proceeding increases the productivity and eases the task of indexers. In this paper, we propose a statistical text classification framework for bilingual documentation, from which we derive two novel bilingual classifiers based on the naive combination of monolingual classifiers. We report preliminary results on the multilingual corpus Acquis Communautaire (AC) that demonstrates the suitability of the proposed classifiers as the backend of a fully-working MAI system. @@ -1575,7 +1575,7 @@ panunzi-etal-2006-integrating - Luís FernandoCosta + Luís FernandoCosta LuísSarmento Component Evaluation in a Question Answering System http://www.lrec-conf.org/proceedings/lrec2006/pdf/306_pdf.pdf @@ -1594,8 +1594,8 @@ NasredineSemmar - MeriamaLaib - ChristianFluhr + MeriamaLaib + ChristianFluhr A Deep Linguistic Analysis for Cross-language Information Retrieval http://www.lrec-conf.org/proceedings/lrec2006/pdf/308_pdf.pdf Cross-language information retrieval consists in providing a query in one language and searching documents in one or different languages. These documents are ordered by the probability of being relevant to the user's request. The highest ranked document is considered to be the most likely relevant document. The LIC2M cross-language information retrieval system is a weighted Boolean search engine based on a deep linguistic analysis of the query and the documents. This system is composed of a linguistic analyzer, a statistic analyzer, a reformulator, a comparator and a search engine. The linguistic analysis processes both documents to be indexed and queries to extract concepts representing their content. This analysis includes a morphological analysis, a part-of-speech tagging and a syntactic analysis. In this paper, we present the deep linguistic analysis used in the LIC2M cross-lingual search engine and we will particularly focus on the impact of the syntactic analysis on the retrieval effectiveness. @@ -1612,7 +1612,7 @@ LinaHenriksen ClausPovlsen - AndrejsVasiljevs + AndrejsVasiljevs <fixed-case>E</fixed-case>uro<fixed-case>T</fixed-case>erm<fixed-case>B</fixed-case>ank - a Terminology Resource based on Best Practice http://www.lrec-conf.org/proceedings/lrec2006/pdf/310_pdf.pdf The new EU member countries face the problems of terminology resource fragmentation and lack of coordination in terminology development in general. The EuroTermBank project aims at contributing to improve the terminology infrastructure of the new EU countries and the project will result in a centralized online terminology bank - interlinked to other terminology banks and resources - for languages of the new EU member countries. The main focus of this paper is on a description of how to identify best practice within terminology work seen from a broad perspective. Surveys of real life terminology work have been conducted and these surveys have resulted in identification of scenario specific best practice descriptions of terminology work. Furthermore, this paper will present an outline of the specific criteria that have been used for selection of existing term resources to be included in the EuroTermBank database. @@ -1620,12 +1620,12 @@ FlorbelaBarreto - AntónioBranco + AntónioBranco EduardoFerreira AmáliaMendes - Maria Fernanda Bacelar doNascimento + Maria Fernanda Bacelar doNascimento FilipeNunes - João RicardoSilva + João RicardoSilva Open Resources and Tools for the Shallow Processing of <fixed-case>P</fixed-case>ortuguese: The <fixed-case>T</fixed-case>ag<fixed-case>S</fixed-case>hare Project http://www.lrec-conf.org/proceedings/lrec2006/pdf/311_pdf.pdf This paper presents the TagShare project and the linguistic resources and tools for the shallow processing of Portuguese developed in its scope. These resources include a 1 million token corpus that has been accurately hand annotated with a variety of linguistic information, as well as several state of the art shallow processing tools capable of automatically producing that type of annotation. At present, the linguistic annotations in the corpus are sentence and paragraph boundaries, token boundaries, morphosyntactic POS categories, values of inflection features, lemmas and namedentities. Hence, the set of tools comprise a sentence chunker, a tokenizer, a POS tagger, nominal and verbal analyzers and lemmatizers, a verbal conjugator, a nominal “inflector”, and a namedentity recognizer, some of which underline several online services. @@ -1636,14 +1636,14 @@ BelindaMaia DianaSantos AnaPinto - LuísCabral + LuísCabral Corpógrafo V3 - From Terminological Aid to Semi-automatic Knowledge Engineering http://www.lrec-conf.org/proceedings/lrec2006/pdf/312_pdf.pdf In this paper we will present Corpógrafo, a mature web-based environment for working with corpora, for terminology extraction, and for ontology development. We will explain Corpógrafo’s workflow and describe the most important information extraction methods used, namely its term extraction, and definition / semantic relations identification procedures. We will describe current Corpógrafo users and present a brief overview of the XML format currently used to export terminology databases. Finally, we present future improvements for this tool. sarmento-etal-2006-corpografo - LiviuDinu + LiviuDinu AncaDinu On the data base of <fixed-case>R</fixed-case>omanian syllables and some of its quantitative and cryptographic aspects http://www.lrec-conf.org/proceedings/lrec2006/pdf/313_pdf.pdf @@ -1652,7 +1652,7 @@ Alessandro BahgatShehata - Fabio MassimoZanzotto + Fabio MassimoZanzotto A Dependency-based Algorithm for Grammar Conversion http://www.lrec-conf.org/proceedings/lrec2006/pdf/314_pdf.pdf In this paper we present a model to transfer a grammatical formalism in another. The model is applicable only on restrictive conditions. However, it is fairly useful for many purposes: parsing evaluation, researching methods for truly combining different parsing outputs to reach better parsing performances, and building larger syntactically annotated corpora for data-driven approaches. The model has been tested over a case study: the translation of the Turin Tree Bank Grammar to the Shallow Grammar of the CHAOS Italian parser. @@ -1676,8 +1676,8 @@ ChloéClavel - IoanaVasilescu - LaurenceDevillers + IoanaVasilescu + LaurenceDevillers ThibautEhrette GaëlRichard Fear-type emotions of the <fixed-case>SAFE</fixed-case> Corpus: annotation issues @@ -1688,7 +1688,7 @@ ArneMauser EvgenyMatusov - HermannNey + HermannNey Training a Statistical Machine Translation System without <fixed-case>GIZA</fixed-case>++ http://www.lrec-conf.org/proceedings/lrec2006/pdf/320_pdf.pdf The IBM Models (Brown et al., 1993) enjoy great popularity in the machine translation community because they offer high quality word alignments and a free implementation is available with the GIZA++ Toolkit (Och and Ney, 2003). Several methods have been developed to overcome the asymmetry of the alignment generated by the IBM Models. A remaining disadvantage, however, is the high model complexity. This paper describes a word alignment training procedure for statistical machine translation that uses a simple and clear statistical model, different from the IBM models. The main idea of the algorithm is to generate a symmetric and monotonic alignment between the target sentence and a permutation graph representing different reorderings of the words in the source sentence. The quality of the generated alignment is shown to be comparable to the standard GIZA++ training in an SMT setup. @@ -1712,7 +1712,7 @@ fujii-etal-2006-test - ŽeljkoAgić + ŽeljkoAgić MarkoTadić Evaluating Morphosyntactic Tagging of <fixed-case>C</fixed-case>roatian Texts http://www.lrec-conf.org/proceedings/lrec2006/pdf/326_pdf.pdf @@ -1720,18 +1720,18 @@ agic-tadic-2006-evaluating - MartinRajman + MartinRajman MaritaAilomaa AgnesLisowska MiroslavMelichar - SusanArmstrong + SusanArmstrong Extending the <fixed-case>W</fixed-case>izard of <fixed-case>O</fixed-case>z Methodologie for Multimodal Language-enabled Systems http://www.lrec-conf.org/proceedings/lrec2006/pdf/328_pdf.pdf In this paper we present a proposal for extending the standard Wizard of Oz experimental methodology to language-enabled multimodal systems. We first discuss how Wizard of Oz experiments involving multimodal systems differ from those involving voice-only systems. We then go on to discuss the Extended Wizard of Oz methodology and the Wizard of Oz testing environment and protocol that we have developed. We then describe an example of applying this methodology to Archivus, a multimodal system for multimedia meeting retrieval and browsing. We focus in particular on the tools that the wizards would need to successfully and efficiently perform their tasks in a multimodal context. We conclude with some general comments about which questions need to be addressed when developing and using the Wizard of Oz methodology for testing multimodal systems. rajman-etal-2006-extending - Gertjanvan Noord + Gertjanvan Noord InekeSchuurman VincentVandeghinste Syntactic Annotation of Large Corpora in <fixed-case>STEVIN</fixed-case> @@ -1748,14 +1748,14 @@ buscaldi-rosso-2006-mining - SabineSchulte im Walde + SabineSchulte im Walde Human Verb Associations as the Basis for Gold Standard Verb Classes: Validation against <fixed-case>G</fixed-case>erma<fixed-case>N</fixed-case>et and <fixed-case>F</fixed-case>rame<fixed-case>N</fixed-case>et http://www.lrec-conf.org/proceedings/lrec2006/pdf/333_pdf.pdf We describe a gold standard for semantic verb classes which is based on human associations to verbs. The associations were collected in a web experiment and then applied as verb features in a hierarchical cluster analysis. We claim that the resulting classes represent a theory-independent gold standard classification which covers a variety of semantic verb relations, and whose features can be used to guide the feature selection in automatic processes. To evaluate our claims, the association-based classification is validated against two standard approaches to semantic verb classes, GermaNet and FrameNet. schulte-im-walde-2006-human - Peter W.Wagacha + Peter W.Wagacha GuyDe Pauw Pauline W.Githinji A Grapheme-Based Approach for Accent Restoration in <fixed-case>G</fixed-case>ikuyu @@ -1764,8 +1764,8 @@ wagacha-etal-2006-grapheme - Juan José RodríguezSoler - Pedro ConcejeroCerezo + Juan José RodríguezSoler + Pedro ConcejeroCerezo Daniel TapiasMerino JoséSánchez <fixed-case>MEDUSA</fixed-case>: User-Centred Design and usability evaluation of Automatic Speech Recognition telephone services in Telefónica Móviles España @@ -1778,7 +1778,7 @@ KatrinErk AnetteFrank AndreaKowalski - SebastianPadó + SebastianPadó ManfredPinkal The <fixed-case>SALSA</fixed-case> Corpus: a <fixed-case>G</fixed-case>erman Corpus Resource for Lexical Semantics http://www.lrec-conf.org/proceedings/lrec2006/pdf/339_pdf.pdf @@ -1790,9 +1790,9 @@ BrunoPouliquen AnnaWidiger CameliaIgnat - TomažErjavec - DanTufiş - DánielVarga + TomažErjavec + DanTufiş + DánielVarga The <fixed-case>JRC</fixed-case>-<fixed-case>A</fixed-case>cquis: A Multilingual Aligned Parallel Corpus with 20+ Languages http://www.lrec-conf.org/proceedings/lrec2006/pdf/340_pdf.pdf We present a new, unique and freely available parallel corpus containing European Union (EU) documents of mostly legal nature. It is available in all 20 official EU languages, with additional documents being available in the languages of the EU candidate countries. The corpus consists of almost 8,000 documents per language, with an average size of nearly 9 million words per language. Pair-wise paragraph alignment information produced by two different aligners (Vanilla and HunAlign) is available for all 190+ language pair combinations. Most texts have been manually classified according to the EUROVOC subject domains so that the collection can also be used to train and test multi-label classification algorithms and keyword-assignment software. The corpus is encoded in XML, according to the Text Encoding Initiative Guidelines. Due to the large number of parallel texts in many languages, the JRC-Acquis is particularly suitable to carry out all types of cross-language research, as well as to test and benchmark text analysis software across different languages (for instance for alignment, sentence splitting and term extraction). @@ -1803,14 +1803,14 @@ KatrinErk AnetteFrank AndreaKowalski - SebastianPado + SebastianPado <fixed-case>SALTO</fixed-case> - A Versatile Multi-Level Annotation Tool http://www.lrec-conf.org/proceedings/lrec2006/pdf/341_pdf.pdf In this paper, we describe the SALTO tool. It was originally developed for the annotation of semantic roles in the frame semantics paradigm, but can be used for graphical annotation of treebanks with general relational information in a simple drag-and-drop fashion. The tool additionally supports corpus management and quality control. burchardt-etal-2006-salto - VéroniqueHoste + VéroniqueHoste GuyDe Pauw <fixed-case>KNACK</fixed-case>-2002: a Richly Annotated Corpus of <fixed-case>D</fixed-case>utch Written Text http://www.lrec-conf.org/proceedings/lrec2006/pdf/342_pdf.pdf @@ -1828,7 +1828,7 @@ TomoyukiKato - TomikiToda + TomikiToda HiroshiSaruwatari KiyohiroShikano Transcription Cost Reduction for Constructing Acoustic Models Using Acoustic Likelihood Selection Criteria @@ -1845,8 +1845,8 @@ mieskes-strube-2006-part - BranimirBoguraev - Rie KubotaAndo + BranimirBoguraev + Rie KubotaAndo Analysis of <fixed-case>T</fixed-case>ime<fixed-case>B</fixed-case>ank as a Resource for <fixed-case>T</fixed-case>ime<fixed-case>ML</fixed-case> Parsing http://www.lrec-conf.org/proceedings/lrec2006/pdf/346_pdf.pdf In our work, we present an analysis of the TimeBank corpus---the only available reference sample of TimeML-compliant annotation---from the point of view of its utility as a training resource for developing automated TimeML annotators. We are encouraged by experimental results indicative of the potential of TimeBank; at the same time, closer inspection of causes for some systematic errors shows off certain deficiencies in the corpus, primarily to do with small size and inconsistent annotation. Our analysis suggests that even a reference resource, developed outside of a rigorous process of training corpus design and creation, can be extremely valuable for training and development purposes. The analysis also highlights areas of correction and improvement for evolving the current reference corpus into a community infrastructure resource. @@ -1861,10 +1861,10 @@ kawahara-kurohashi-2006-case - PatrickParoubek + PatrickParoubek IsabelleRobba AnneVilnat - ChristelleAyache + ChristelleAyache Data, Annotations and Measures in <fixed-case>EASY</fixed-case> the Evaluation Campaign for Parsers of <fixed-case>F</fixed-case>rench. http://www.lrec-conf.org/proceedings/lrec2006/pdf/348_pdf.pdf This paper presents the protocol of EASY the evaluation campaign for syntactic parsers of French in the EVALDA project of the TECHNOLANGUE program. We describe the participants, the corpus and its genre partitioning, the annotation scheme, which allows for the annotation of both constituents and relations, the evaluation methodology and, as an illustration, the results obtained by one participant on half of the corpus. @@ -1882,7 +1882,7 @@ GregoryGrefenstette FathiDebili - ChristianFluhr + ChristianFluhr SvitlanaZinger Exploiting text for extracting image processing resources http://www.lrec-conf.org/proceedings/lrec2006/pdf/350_pdf.pdf @@ -1890,7 +1890,7 @@ grefenstette-etal-2006-exploiting - NaoakiOkazaki + NaoakiOkazaki SophiaAnaniadou Clustering acronyms in biomedical text for disambiguation http://www.lrec-conf.org/proceedings/lrec2006/pdf/351_pdf.pdf @@ -1898,8 +1898,8 @@ okazaki-ananiadou-2006-clustering - GoranNenadic - NaokiOkazaki + GoranNenadic + NaokiOkazaki SophiaAnaniadou Towards a terminological resource for biomedical text mining http://www.lrec-conf.org/proceedings/lrec2006/pdf/352_pdf.pdf @@ -1916,9 +1916,9 @@ DavidGuthrie BenAllison - WeiLiu - LouiseGuthrie - YorickWilks + WeiLiu + LouiseGuthrie + YorickWilks A Closer Look at Skip-gram Modelling http://www.lrec-conf.org/proceedings/lrec2006/pdf/357_pdf.pdf Data sparsity is a large problem in natural language processing that refers to the fact that language is a system of rare events, so varied and complex, that even using an extremely large corpus, we can never accurately model all possible strings of words. This paper examines the use of skip-grams (a technique where by n-grams are still stored to model language, but they allow for tokens to be skipped) to overcome the data sparsity problem. We analyze this by computing all possible skip-grams in a training corpus and measure how many adjacent (standard) n-grams these cover in test documents. We examine skip-gram modelling using one to four skips with various amount of training data and test against similar documents as well as documents generated from a machine translation system. In this paper we also determine the amount of extra training data required to achieve skip-gram coverage using standard adjacent tri-grams. @@ -1933,8 +1933,8 @@ dobrov-loukachevitch-2006-development - Matthew W.Bilotti - EricNyberg + Matthew W.Bilotti + EricNyberg Evaluation for Scenario Question Answering Systems http://www.lrec-conf.org/proceedings/lrec2006/pdf/360_pdf.pdf Scenario Question Answering is a relatively new direction in Question Answering (QA) research that presents a number of challenges for evaluation. In this paper, we propose a comprehensive evaluation strategy for Scenario QA, including amethodology for building reusable test collections for Scenario QA and metrics for evaluating system performance over such test collections. Using this methodology, we have built a test collection, which we have made available for public download as a service to the research community. It is our hope that widespread availability of quality evaluation materials fuels research in new approaches to the Scenario QA task. @@ -1958,9 +1958,9 @@ GyörgySzarvas - RichárdFarkas - LászlóFelföldi - AndrásKocsor + RichárdFarkas + LászlóFelföldi + AndrásKocsor JánosCsirik A highly accurate Named Entity corpus for <fixed-case>H</fixed-case>ungarian http://www.lrec-conf.org/proceedings/lrec2006/pdf/365_pdf.pdf @@ -2007,7 +2007,7 @@ ManfredSailer - BeataTrawiński + BeataTrawiński The Collection of Distributionally Idiosyncratic Items: A Multilingual Resource for Linguistic Research http://www.lrec-conf.org/proceedings/lrec2006/pdf/375_pdf.pdf We present two collections of lexical items with idiosyncratic distribution. The collections document the behavior of German and English bound words (BW, such as English “headway”), i.e., words which can only occur in one expression (“make headway”). BWs are a problem for both general and idiomatic dictionaries since it is unclear whether they have an independent lexical status and to what extent the expressions in which they occur are typical idiomatic expressions. We propose a system which allows us to document the information about BWs from dictionaries and linguistic literature, together with corpus data and example queries for major text corpora. We present our data structure and point to other phraseologically oriented collections. We will also show differences between the German and the English collection. @@ -2016,7 +2016,7 @@ UlrichHeid ElsabéTaljard - Danie J.Prinsloo + Danie J.Prinsloo Grammar-based tools for the creation of tagging resources for an unresourced language: the case of <fixed-case>N</fixed-case>orthern <fixed-case>S</fixed-case>otho http://www.lrec-conf.org/proceedings/lrec2006/pdf/376_pdf.pdf We describe an architecture for the parallel construction of a tagger lexicon and an annotated reference corpus for the part-of-speech tagging of Nothern Sotho, a Bantu language of South Africa, for which no tagged resources have been available so far. Our tools make use of grammatical properties (morphological and syntactic) of the language. We use symbolic pretagging, followed by stochastic tagging, an architecture which proves useful not only for the bootstrapping of tagging resources, but also for the tagging of any new text. We discuss the tagset design, the tool architecture and the current state of our ongoing effort. @@ -2031,9 +2031,9 @@ de-sousa-trippel-2006-building - Maria TeresaPazienza + Maria TeresaPazienza MarcoPennacchiotti - Fabio MassimoZanzotto + Fabio MassimoZanzotto Mixing <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et, <fixed-case>V</fixed-case>erb<fixed-case>N</fixed-case>et and <fixed-case>P</fixed-case>rop<fixed-case>B</fixed-case>ank for studying verb relations http://www.lrec-conf.org/proceedings/lrec2006/pdf/379_pdf.pdf In this paper we present a novel resource for studying the semantics of verb relations. The resource is created by mixing sense relational knowledge enclosed in WordNet, frame knowledge enclosed in VerbNet and corpus knowledge enclosed in PropBank. As a result, a set of about 1000 frame pairs is made available. A frame pair represents a pair of verbs in a peculiar semantic relation accompanied with specific information, such as: the syntactic-semantic frames of the two verbs, the mapping among their thematic roles and a set of textual examples extracted from the PennTreeBank. We specifically focus on four relations: Troponymy, Causation, Entailment and Antonymy. The different steps required for the mapping are described in detail and statistics on resource mutual coverage are reported. We also propose a practical use of the resource for the task of Textual Entailment acquisition and for Question Answering. A first attempt for automate the mapping among verb arguments is also presented: early experiments show that simple techniques can achieve good results, up to 85% F-Measure. @@ -2041,7 +2041,7 @@ LeoWanner - Margarita AlonsoRamos + Margarita AlonsoRamos Local Document Relevance Clustering in <fixed-case>IR</fixed-case> Using Collocation Information http://www.lrec-conf.org/proceedings/lrec2006/pdf/381_pdf.pdf A series of different automatic query expansion techniques has been suggested in Information Retrieval. To estimate how suitable a document term is as an expansion term, the most popular of them use a measure of the frequency of the co-occurrence of this term with one or several query terms. The benefit of the use of the linguistic relations that hold between query terms is often questioned. If a linguistic phenomenon is taken into account, it is the phrase structure or lexical compound. We propose a technique that is based on the restricted lexical cooccurrence (collocation) of query terms. We use the knowledge on collocations formed by query terms for two tasks: (i) document relevance clustering done in the first stage of local query expansion and (ii) choice of suitable expansion terms from the relevant document cluster. In this paper, we describe the first task, providing evidence from first preliminary experiments on Spanish material that local relevance clustering benefits largely from knowledge on collocations. @@ -2056,8 +2056,8 @@ esuli-sebastiani-2006-sentiwordnet - AlexandreDenis - MatthieuQuignard + AlexandreDenis + MatthieuQuignard GuillaumePitel A Deep-Parsing Approach to Natural Language Understanding in Dialogue System: Results of a Corpus-Based Evaluation http://www.lrec-conf.org/proceedings/lrec2006/pdf/386_pdf.pdf @@ -2073,17 +2073,17 @@ MargaretKing - NancyUnderwood + NancyUnderwood Evaluating Symbiotic Systems: the challenge http://www.lrec-conf.org/proceedings/lrec2006/pdf/389_pdf.pdf This paper looks at a class of systems which pose severe problems in evaluation design for current conventional approaches to evaluation. After describing the two conventional evaluation paradigms: the “functionality paradigm” as typified by evaluation campaigns and the ISO inspired “user-centred” paradigm typified by the work of the EAGLES and ISLE projects, it goes on to outline the problems posed by the evaluation of systems which are designed to work in critical interaction with a human expert user and to work over vast amounts of data. These systems pose problems for both paradigms although for different reasons. The primary aim of this paper is to provoke discussion and the search for solutions. We have no proven solutions at present. However, we describe a programme of exploratory research on which we have already embarked, which involves ground clearing work which we expect to result in a deep understanding of the systems and users, a pre-requisite for developing a general framework for evaluation in this field. king-underwood-2006-evaluating - AimiliosChalamandaris + AimiliosChalamandaris AthanassiosProtopapas - PirrosTsiakoulis - SpyrosRaptis + PirrosTsiakoulis + SpyrosRaptis All <fixed-case>G</fixed-case>reek to me! An automatic <fixed-case>G</fixed-case>reeklish to <fixed-case>G</fixed-case>reek transliteration system http://www.lrec-conf.org/proceedings/lrec2006/pdf/390_pdf.pdf This paper presents research on “Greeklish,” that is, a transliteration of Greek using the Latin alphabet, which is used frequently in Greek e-mail communication. Greeklish is not standardized and there are a number of competing conventions co-existing in communication, based on personal preferences regarding similarities between Greek and Latin letters in shape, sound, or keyboard position. Our research has led to the development of “All Greek to me!” the first automatic transliteration system that can cope with any type of Greeklish. In this paper we first present previous research on Greeklish, describing other approaches that have attempted to deal with the same problems. We then provide a brief description of our approach, illustrating the functional flowchart of our system and the main ideas that underlie it. We present measures of system performance, based on about a year’s worth of usage as a public web service, and preliminary research, based on the same corpus, on the use of Greeklish and the trends in preferred Latin-Greek letter mapping. We evaluate the consistency of different transliteration patterns among users as well as the within-user consistency based on coherent principles. Finally we outline planned future research to further understand the use of Greeklish and improve “All Greek to me!” to function reliably embedded in integrated communication platforms bridging e-mail to mobile telephony and ubiquitous connectivity. @@ -2091,7 +2091,7 @@ ThuridVogt - ElisabethAndré + ElisabethAndré Improving Automatic Emotion Recognition from Speech via Gender Differentiaion http://www.lrec-conf.org/proceedings/lrec2006/pdf/392_pdf.pdf Feature extraction is still a disputed issue for the recognition of emotions from speech. Differences in features for male and female speakers are a well-known problem and it is established that gender-dependent emotion recognizers perform better than gender-independent ones. We propose a way to improve the discriminative quality of gender-dependent features: The emotion recognition system is preceded by an automatic gender detection that decides upon which of two gender-dependent emotion classifiers is used to classify an utterance. This framework was tested on two different databases, one with emotional speech produced by actors and one with spontaneous emotional speech from a Wizard-of-Oz setting. Gender detection achieved an accuracy of about 90 % and the combined gender and emotion recognition system improved the overall recognition rate of a gender-independent emotion recognition system by 2-4 %. @@ -2117,7 +2117,7 @@ ThierryDeclerck - Asunción GómezPérez + Asunción GómezPérez OvidiuVela ZenoGantner DavidManzano-Macho @@ -2127,7 +2127,7 @@ declerck-etal-2006-multilingual - BeatriceAlex + BeatriceAlex MalvinaNissim ClaireGrover The Impact of Annotation on the Performance of Protein Tagging in Biomedical Text @@ -2136,17 +2136,17 @@ alex-etal-2006-impact - BenWellner - MarcVilain + BenWellner + MarcVilain Leveraging Machine Readable Dictionaries in Discriminative Sequence Models http://www.lrec-conf.org/proceedings/lrec2006/pdf/404_pdf.pdf Many natural language processing tasks make use of a lexicon – typically the words collected from some annotated training data along with their associated properties. We demonstrate here the utility of corpora-independent lexicons derived from machine readable dictionaries. Lexical information is encoded in the form of features in a Conditional Random Field tagger providing improved performance in cases where: i) limited training data is made available ii) the data is case-less and iii) the test data genre or domain is different than that of the training data. We show substantial error reductions, especially on unknown words, for the tasks of part-of-speech tagging and shallow parsing, achieving up to 20% error reduction on Penn TreeBank part-of-speech tagging and up to a 15.7% error reduction for shallow parsing using the CoNLL 2000 data. Our results here point towards a simple, but effective methodology for increasing the adaptability of text processing systems by training models with annotated data in one genre augmented with general lexical information or lexical information pertinent to the target genre (or domain). wellner-vilain-2006-leveraging - SašaHasan - Anas ElIsbihani - HermannNey + SašaHasan + Anas ElIsbihani + HermannNey Creating a Large-Scale <fixed-case>A</fixed-case>rabic to <fixed-case>F</fixed-case>rench Statistical <fixed-case>M</fixed-case>achine<fixed-case>T</fixed-case>ranslation System http://www.lrec-conf.org/proceedings/lrec2006/pdf/405_pdf.pdf In this work, the creation of a large-scale Arabic to French statistical machine translation system is presented. We introduce all necessary steps from corpus aquisition, preprocessing the data to training and optimizing the system and eventual evaluation. Since no corpora existed previously, we collected large amounts of data from the web. Arabic word segmentation was crucial to reduce the overall number of unknown words. We describe the phrase-based SMT system used for training and generation of the translation hypotheses. Results on the second CESTA evaluation campaign are reported. The setting was inthe medical domain. The prototype reaches a favorable BLEU score of40.8%. @@ -2154,7 +2154,7 @@ ChenYirong - LuQin + QinLu LiWenjie SuiZhifang JiLuning @@ -2182,7 +2182,7 @@ LaurianneSitbon - PatriceBellot + PatriceBellot Tools and methods for objective or contextual evaluation of topic segmentation http://www.lrec-conf.org/proceedings/lrec2006/pdf/410_pdf.pdf In this paper we discuss the way of evaluating topic segmentation, from mathematical measures on variously constructed reference corpus to contextual evaluation depending on different topic segmentation usages. We present an overview of the different ways of building reference corpora and of mathematically evaluating segmentation methods, and then we focus on three tasks which may involve a topic segmentation: text extraction, information retrieval and document presentation. We have developed two graphical interfaces, one for an intrinsic comparison, and the other one dedicated to an evaluation in an information retrieval context. These tools will be very soon distributed under GPL licences on the Technolangue project web page. @@ -2201,8 +2201,8 @@ devillers-etal-2006-real - MajaPopović - HermannNey + MajaPopović + HermannNey <fixed-case>POS</fixed-case>-based Word Reorderings for Statistical Machine Translation http://www.lrec-conf.org/proceedings/lrec2006/pdf/412_pdf.pdf Translation In this work we investigate new possibilities for improving the quality of statistical machine translation (SMT) by applying word reorderings of the source language sentences based on Part-of-Speech tags. Results are presented on the European Parliament corpus containing about 700k sentences and 15M running words. In order to investigate sparse training data scenarios, we also report results obtained on about 1\% of the original corpus. The source languages are Spanish and English and target languages are Spanish, English and German. We propose two types of reorderings depending on the language pair and the translation direction: local reorderings of nouns and adjectives for translation from and into Spanish and long-range reorderings of verbs for translation into German. For our best translation system, we achieve up to 2\% relative reduction of WER and up to 7\% relative increase of BLEU score. Improvements can be seen both on the reordered sentences as well as on the rest of the test corpus. Local reorderings are especially important for the translation systems trained on the small corpus whereas long-range reorderings are more effective for the larger corpus. @@ -2211,18 +2211,18 @@ DavidVilar JiaXu - Luis FernandoD’Haro - HermannNey + Luis FernandoD’Haro + HermannNey Error Analysis of Statistical Machine Translation Output http://www.lrec-conf.org/proceedings/lrec2006/pdf/413_pdf.pdf Evaluation of automatic translation output is a difficult task. Several performance measures like Word Error Rate, Position Independent Word Error Rate and the BLEU and NIST scores are widely use and provide a useful tool for comparing different systems and to evaluate improvements within a system. However the interpretation of all of these measures is not at all clear, and the identification of the most prominent source of errors in a given system using these measures alone is not possible. Therefore some analysis of the generated translations is needed in order to identify the main problems and to focus the research efforts. This area is however mostly unexplored and few works have dealt with it until now. In this paper we will present a framework for classification of the errors of a machine translation system and we will carry out an error analysis of the system used by the RWTH in the first TC-STAR evaluation. vilar-etal-2006-error - IreneCastellón - AnaFernández-Montraveta - GloriaVázquez - LauraAlonso Alemany + IreneCastellón + AnaFernández-Montraveta + GloriaVázquez + LauraAlonso Alemany Joan AntoniCapilla The Sensem Corpus: a Corpus Annotated at the Syntactic and Semantic Level http://www.lrec-conf.org/proceedings/lrec2006/pdf/414_pdf.pdf @@ -2231,7 +2231,7 @@ JavierPérez - AntonioBonafonte + AntonioBonafonte <fixed-case>GAIA</fixed-case>: Common Framework for the Development of Speech Translation Technologies http://www.lrec-conf.org/proceedings/lrec2006/pdf/415_pdf.pdf We present here an open-source software platform for the integration of speech translation components. This tool is useful to integrate into a common framework different automatic speech recognition, spoken language translation and text-to-speech synthesis solutions, as demonstrated in the evaluation of the European LC-STAR project, and during the development of the national ALIADO project. Gaia operates with great flexibility, and it has been used to obtain the text and speech corpora needed when performing speech translation. The platform follows a modular distributed approach, with a specifically designed extensible network protocol handling the communication with the different modules. A well defined and publicly available API facilitates the integration of existing solutions into the architecture. Completely functional audio and text interfaces together with remote monitoring tools are provided. @@ -2246,8 +2246,8 @@ JavierPérez - AntonioBonafonte - Horst-UdoHain + AntonioBonafonte + Horst-UdoHain EricKeller StefanBreuer JileiTian @@ -2264,7 +2264,7 @@ nemec-2006-tree - MaiteTaboada + MaiteTaboada CarolineAnthony KimberlyVoll Methods for Creating Semantic Orientation Dictionaries @@ -2284,14 +2284,14 @@ HynekBořil TomášBořil - PetrPollák + PetrPollák Methodology of <fixed-case>L</fixed-case>ombard Speech Database Acquisition: Experiences with <fixed-case>CLSD</fixed-case> http://www.lrec-conf.org/proceedings/lrec2006/pdf/427_pdf.pdf In this paper, process of the Czech Lombard Speech Database (CLSD'05) acquisition is presented. Feature analyses have proven a strong appearance of Lombard effect in the database. In the small vocabulary recognition task, significant performance degradation was observed for the Lombard speech recorded in the database. Aim of this paper is to describe the hardware platform, scenarios and recording tool used for the acquisition of CLSD'05. During the database recording and processing, several difficulties were encountered. The most important question was how to adjust the level of speech feedback for the speaker. A method for minimization of the speech attenuation introduced to the speaker by headphones is proposed in this paper. Finally, contents and corpus of the database are presented to outline it's suitability for analysis and modeling of Lombard effect. The whole CLSD'05 database with a detailed documentation is now released for public use. boril-etal-2006-methodology - HarryBunt + HarryBunt Dimensions in Dialogue Act Annotation http://www.lrec-conf.org/proceedings/lrec2006/pdf/428_pdf.pdf This paper is concerned with the fundamentals of multidimensional dialogue act annotation, i.e. with what it means to annotate dialogues with information about the communicative acts that are performed with the utterances, taking various 'dimensions' into account. Two ideas seem to be prevalent in the literature concerning the notion of dimension: (1) dimensions correspond to different types of information; and (2) a dimension is formed by a set of mutually exclusive tags. In DAMSL, for instance, the terms “dimension” and “layer” are used sometimes in the sense of (1) and sometimes in that of (2). We argue that being mutually exclusive is not a good criterion for a set of dialogue act types to constitute a dimension, even though the description of an object in a multidimensional space should never assign more than one value per dimension. We define a dimension of dialogue act annotation as an aspect of participating in a dialogue that can be addressed independently by means of dialogue acts. We show that DAMSL dimensions such as Info-request, Statement, and Answer do not qualify as proper dimensions, and that the communicative functions in these categories do not fall in any specific dimension, but should be considered as “general-purpose” in the sense that they can be used in any dimension. We argue that using the notion of dimension that we propose, a multidimensional taxonomy of dialogue acts emerges that optimally supports multidimensional dialogue act annotation. @@ -2321,7 +2321,7 @@ bernardi-etal-2006-multilingual - IreneLangkilde-Geary + IreneLangkilde-Geary JustinBetteridge A Factored Functional Dependency Transformation of the <fixed-case>E</fixed-case>nglish <fixed-case>P</fixed-case>enn <fixed-case>T</fixed-case>reebank for Probabilistic Surface Generation http://www.lrec-conf.org/proceedings/lrec2006/pdf/435_pdf.pdf @@ -2336,10 +2336,10 @@ talley-2006-bootstrapping - EduardHovy - Chin-YewLin + EduardHovy + Chin-YewLin LiangZhou - JunichiFukumoto + JunichiFukumoto Automated Summarization Evaluation with Basic Elements. http://www.lrec-conf.org/proceedings/lrec2006/pdf/438_pdf.pdf As part of evaluating a summary automati-cally, it is usual to determine how much of the contents of one or more human-produced “ideal” summaries it contains. Past automated methods such as ROUGE compare using fixed word ngrams, which are not ideal for a variety of reasons. In this paper we describe a framework in which summary evaluation measures can be instantiated and compared, and we implement a specific evaluation method using very small units of content, called Basic Elements that address some of the shortcomings of ngrams. This method is tested on DUC 2003, 2004, and 2005 systems and produces very good correlations with human judgments. @@ -2354,9 +2354,9 @@ kaji-watanabe-2006-automatic - Marie-Catherinede Marneffe + Marie-Catherinede Marneffe BillMacCartney - Christopher D.Manning + Christopher D.Manning Generating Typed Dependency Parses from Phrase Structure Parses http://www.lrec-conf.org/proceedings/lrec2006/pdf/440_pdf.pdf This paper describes a system for extracting typed dependency parses of English sentences from phrase structure parses. In order to capture inherent relations occurring in corpus texts that can be critical in real-world applications, many NP relations are included in the set of grammatical relations used. We provide a comparison of our system with Minipar and the Link parser. The typed dependency extraction facility described here is integrated in the Stanford Parser, available for download. @@ -2380,8 +2380,8 @@ LiangZhou - Chin-YewLin - EduardHovy + Chin-YewLin + EduardHovy Summarizing Answers for Complicated Questions http://www.lrec-conf.org/proceedings/lrec2006/pdf/443_pdf.pdf Recent work in several computational linguistics (CL) applications (especially question answering) has shown the value of semantics (in fact, many people argue that the current performance ceiling experienced by so many CL applications derives from their inability to perform any kind of semantic processing). But the absence of a large semantic information repository that provides representations for sentences prevents the training of statistical CL engines and thus hampers the development of such semantics-enabled applications. This talk refers to recent work in several projects that seek to annotate large volumes of text with shallower or deeper representations of some semantic phenomena. It describes one of the essential problems—creating, managing, and annotating (at large scale) the meanings of words, and outlines the Omega ontology, being built at ISI, that acts as term repository. The talk illustrates how one can proceed from words via senses to concepts, and how the annotation process can help verify good concept decisions and expose bad ones. Much of this work is performed in the context of the OntoNotes project, joint with BBN, the Universities of Colorado and Pennsylvania, and ISI, that is working to build a corpus of about 1M words (English, Chinese, and Arabic), annotated for shallow semantics, over the next few years. @@ -2404,7 +2404,7 @@ RonnyMelz Pum-MoRyu - Key-SunChoi + Key-SunChoi Compiling large language resources using lexical similarity metrics for domain taxonomy learning http://www.lrec-conf.org/proceedings/lrec2006/pdf/446_pdf.pdf In this contribution we present a new methodology to compile large language resources for domain-specific taxonomy learning. We describe the necessary stages to deal with the rich morphology of an agglutinative language, i.e. Korean, and point out a second order machine learning algorithm to unveil term similarity from a given raw text corpus. The language resource compilation described is part of a fully automatic top-down approach to construct taxonomies, without involving the human efforts which are usually required. @@ -2412,14 +2412,14 @@ FelixPîrvan - DanTufiş + DanTufiş Tagset Mapping and Statistical Training Data Cleaning-up http://www.lrec-conf.org/proceedings/lrec2006/pdf/448_pdf.pdf The paper describes a general method (as well as its implementation and evaluation) for deriving mapping systems for different tagsets available in existing training corpora (gold standards) for a specific language. For each pair of corpora (tagged with different tagsets), one such mapping system is derived. This mapping system is then used to improve the tagging of each of the two corpora with the tagset of the other (this process will be called cross-tagging). By reapplying the algorithm to the newly obtained corpora, the accuracy of the underlying training corpora can also be improved. Furthermore, comparing the results with the gold standards makes it possible to assess the distributional adequacy of various tagsets used in processing the language in case. Unlike other methods, such as those reported in (Brants, 1995) or (Tufis & Dragomirescu, 2004), which assume a subsumption relation between the considered tagsets, and as such they aim at minimizing the tagsets by eliminating the feature-value redundancy, this method is applicable for completely unrelated tagsets. Although the experiments were focused on morpho-syntactic (POS) tagging, the method is applicable to other types of tagging as well. pirvan-tufis-2006-tagset - DanTufiş + DanTufiş ElenaIrimia <fixed-case>R</fixed-case>o<fixed-case>C</fixed-case>o-News: A Hand Validated Journalistic Corpus of <fixed-case>R</fixed-case>omanian http://www.lrec-conf.org/proceedings/lrec2006/pdf/451_pdf.pdf @@ -2427,15 +2427,15 @@ tufis-irimia-2006-roco - EckhardBick + EckhardBick Turning a Dependency Treebank into a <fixed-case>PSG</fixed-case>-style Constituent Treebank http://www.lrec-conf.org/proceedings/lrec2006/pdf/452_pdf.pdf In this paper, we present and evaluate a new method to convert Constraint Grammar (CG) parses of running text into Constituent Treebanks. The conversion is two-step - first a grammar-based method is used to bridge the gap between raw CG annotation and full dependency structure, then phrase structure bracketing and non-terminal nodes are introduced by clustering sister dependents, effectively building one syntactic treebank on top of another. The method is compared with another approach (Bick 2003-2), where constituent structures are arrived at by employing a function-tag based Phrase Structure Grammar (PSG). Results are evaluated on a small reference corpus for both raw and revised CG input, with bracketing F-Scores of 87.5% for raw text and 97.1% for revised CG input, and a raw text edge label accuracy of 95.9% for forms and 86% for functions, or 99.7% and 99.4%, respectively, for revised CG. By applying the tools to the CG-only part of the Danish Arboretum treebank we were able to increase the size of the treebank by 86%, from 197.400 to 367.500 words. bick-2006-turning - DanŞtefănescu - DanTufiş + DanŞtefănescu + DanTufiş Aligning Multilingual Thesauri http://www.lrec-conf.org/proceedings/lrec2006/pdf/453_pdf.pdf The aligning and merging of ontologies with overlapping information are actual one of the most active domain of investigation in the Semantic Web community. Multilingual lexical ontologies thesauri are fundamental knowledge sources for most NLP projects addressing multilinguality. The alignment of multilingual lexical knowledge sources has various applications ranging from knowledge acquisition to semantic validation of interlingual equivalence of presumably the same meaning express in different languages. In this paper, we present a general method for aligning ontologies, which was used to align a conceptual thesaurus, lexicalized in 20 languages with a partial version of it lexicalized in Romanian. The objective of our work was to align the existing terms in the Romanian Eurovoc to the terms in the English Eurovoc and to automatically update the Romanian Eurovoc. The general formulation of the ontology alignment problem was set up along the lines established by Heterogeneity group of the KnowledgeWeb consortium, but the actual case study was motivated by the needs of a specific NLP project. @@ -2443,17 +2443,17 @@ RaduIon - AlexandruCeauşu - DanTufiş + AlexandruCeauşu + DanTufiş Dependency-Based Phrase Alignment http://www.lrec-conf.org/proceedings/lrec2006/pdf/454_pdf.pdf Phrase alignment is the task that requires the constituent phrases of two halves of a bitext to be aligned. In order to align phrases, one must discover them first and this article presents a method of aligning phrases that are discovered automatically. Here, the notion of a 'phrase' will be understood as being given by a subtree of a dependency-like structure of a sentence called linkage. To discover phrases, we will make use of two distinct, language independent methods: the IBM-1 model (Brown et al., 1993) adapted to detect linkages and Constrained Lexical Attraction Models (Ion & Barbu Mititelu, 2006). The methods will be combined and the resulted model will be used to annotate the bitext. The accuracy of phrase alignment will be evaluated by obtaining word alignments from link alignments and then by checking the F-measure of the latter word aligner. ion-etal-2006-dependency - AlexandruCeauşu - DanŞtefănescu - DanTufiş + AlexandruCeauşu + DanŞtefănescu + DanTufiş <fixed-case>A</fixed-case>cquis <fixed-case>C</fixed-case>ommunautaire Sentence Alignment using Support Vector Machines http://www.lrec-conf.org/proceedings/lrec2006/pdf/456_pdf.pdf Sentence alignment is a task that requires not only accuracy, as possible errors can affect further processing, but also requires small computation resources and to be language pair independent. Although many implementations do not use translation equivalents because they are dependent on the language pair, this feature is a requirement for the accuracy increase. The paper presents a hybrid sentence aligner that has two alignment iterations. The first iteration is based mostly on sentences length, and the second is based on a translation equivalents table estimated from the results of the first iteration. The aligner uses a Support Vector Machine classifier to discriminate between positive and negative examples of sentence pairs. @@ -2469,10 +2469,10 @@ BadenHughes - TimothyBaldwin + TimothyBaldwin StevenBird JeremyNicholson - AndrewMacKinlay + AndrewMacKinlay Reconsidering Language Identification for Written Language Resources http://www.lrec-conf.org/proceedings/lrec2006/pdf/459_pdf.pdf The task of identifying the language in which a given document (ranging from a sentence to thousands of pages) is written has been relatively well studied over several decades. Automated approachesto written language identification are used widely throughout research and industrial contexts, over both oral and written source materials. Despite this widespread acceptance, a review of previous research in written language identification reveals a number of questions which remain openand ripe for further investigation. @@ -2481,7 +2481,7 @@ YasukoSenda YasusiSinohara - ManabuOkumura + ManabuOkumura Automatic Terminology Intelligibility Estimation for Readership-oriented Technical Writing http://www.lrec-conf.org/proceedings/lrec2006/pdf/461_pdf.pdf This paper describes automatic terminology intelligibility estimation for readership-oriented technical writing. We assume that the term frequency weighted by the types of documents can be an indicator of the term intelligibility for a certain readership. From this standpoint, we analyzed the relationship between the following: average intelligibility levels of 46 technical terms that were rated by about 120 laymen; numbers of documents that an Internet search @@ -2489,7 +2489,7 @@ MatsLundälv - KatarinaMühlenbock + KatarinaMühlenbock BengtFarre AnnikaBrännström <fixed-case>SYMBERED</fixed-case> - a Symbol-Concept Editing Tool @@ -2518,26 +2518,26 @@ CvetanaKrstev - RankaStanković + RankaStanković DuškoVitas - IvanObradović + IvanObradović <fixed-case>WS</fixed-case>4<fixed-case>LR</fixed-case>: A Workstation for Lexical Resources http://www.lrec-conf.org/proceedings/lrec2006/pdf/467_pdf.pdf In this paper we describe WS4LR, the workstation for lexical resources, a software tool developed within the Human Language Technology Group at the Faculty of Mathematics, University of Belgrade. The tool is aimed at manipulating heterogeneous lexical resources, and the need for such a tool came from the large volume of resources the Group has developed in the course of many years and within different projects. The tool handles morphological dictionaries, wordnets, aligned texts and transducers equally and has already proved very useful for various tasks. Although it has so far been used mainly for Serbian, WS4LR is not language dependent and can be successfully used for resources in other languages provided that they follow the described formats and methodologies. The tool operates on the .NET platform and runs on a personal computer under Windows 2000/XP/2003 operating system with at least 256MB of internal memory. krstev-etal-2006-ws4lr - KarinKipper + KarinKipper AnnaKorhonen NevilleRyant - MarthaPalmer + MarthaPalmer Extending <fixed-case>V</fixed-case>erb<fixed-case>N</fixed-case>et with Novel Verb Classes http://www.lrec-conf.org/proceedings/lrec2006/pdf/468_pdf.pdf Lexical classifications have proved useful in supporting various natural language processing (NLP) tasks. The largest verb classification for English is Levin's (1993) work which defined groupings of verbs based on syntactic properties. VerbNet - the largest computational verb lexicon currently available for English - provides detailed syntactic-semantic descriptions of Levin classes. While the classes included are extensive enough for some NLP use, they are not comprehensive. Korhonen and Briscoe (2004) have proposed a significant extension of Levin's classification which incorporates 57 novel classes for verbs not covered (comprehensively) by Levin. This paper describes the integration of these classes into VerbNet. The result is the most extensive Levin-style classification for English verbs which can be highly useful for practical applications. kipper-etal-2006-extending - JamesPustejovsky + JamesPustejovsky CatherineHavasi JessicaLittman AnnaRumshisky @@ -2548,15 +2548,15 @@ pustejovsky-etal-2006-towards - HansDybkjær - LailaDybkjær + HansDybkjær + LailaDybkjær Act-Topic Patterns for Automatically Checking Dialogue Models http://www.lrec-conf.org/proceedings/lrec2006/pdf/471_pdf.pdf When dialogue models are evaluated today, this is normally done by using some evaluation method to collect data, often involving users interacting with the system model, and then subsequently analysing the collected data. We present a tool called DialogDesigner that enables automatic evaluation performed directly on the dialogue model and that does not require any data collection first. DialogDesigner is a tool in support of rapid design and evaluation of dialogue models. The first version was developed in 2005 and enabled developers to create an electronic dialogue model, get various graphical views of the model, run a Wizard-of-Oz (WOZ) simulation session, and extract different presentations in HTML. The second version includes extensions in terms of support for automatic dialogue model evaluation. Various aspects of dialogue model well-formedness can be automatically checked. Some of the automatic analyses simply perform checks based on the state and transition structure of the dialogue model while the core part are based on act-topic annotation of prompts and transitions in the dialogue model and specification of act-topic patterns. This paper focuses on the version 2 extensions. dybkjaer-dybkjaer-2006-act - David M.Rojas + David M.Rojas TakakoAikawa Predicting <fixed-case>MT</fixed-case> Quality as a Function of the Source Language http://www.lrec-conf.org/proceedings/lrec2006/pdf/472_pdf.pdf @@ -2564,7 +2564,7 @@ rojas-aikawa-2006-predicting - PawełMazur + PawełMazur RobertDale Named Entity Extraction with Conjunction Disambiguation http://www.lrec-conf.org/proceedings/lrec2006/pdf/473_pdf.pdf @@ -2595,7 +2595,7 @@ ClaudiaSoria - MaurizioTesconi + MaurizioTesconi FrancescaBertagna NicolettaCalzolari AndreaMarchetti @@ -2609,7 +2609,7 @@ CarolineSporleder Mariekevan Erp TijnPorcelijn - Antalvan den Bosch + Antalvan den Bosch PimArntzen Identifying Named Entities in Text Databases from the Natural History Domain http://www.lrec-conf.org/proceedings/lrec2006/pdf/482_pdf.pdf @@ -2617,7 +2617,7 @@ sporleder-etal-2006-identifying - HaroldSomers + HaroldSomers GarethEvans ZeinabMohamed Developing Speech Synthesis for Under-Resourced Languages by “Faking it”: An Experiment with <fixed-case>S</fixed-case>omali @@ -2648,10 +2648,10 @@ BenteMaegaard - Jens-ErikFenstad + Jens-ErikFenstad LarsAhrenberg KnutKvale - KatarinaMühlenbock + KatarinaMühlenbock Bernt-ErikHeid <fixed-case>KUNSTI</fixed-case> - Knowledge Generation for <fixed-case>N</fixed-case>orwegian Language Technology http://www.lrec-conf.org/proceedings/lrec2006/pdf/487_pdf.pdf @@ -2659,11 +2659,11 @@ maegaard-etal-2006-kunsti - PéterHalácsy + PéterHalácsy AndrásKornai CsabaOravecz ViktorTrón - DánielVarga + DánielVarga Using a morphological analyzer in high precision <fixed-case>POS</fixed-case> tagging of <fixed-case>H</fixed-case>ungarian http://www.lrec-conf.org/proceedings/lrec2006/pdf/488_pdf.pdf The paper presents an evaluation of maxent POS disambiguation systems that incorporate an open source morphological analyzer to constrain the probabilistic models. The experiments show that the best proposed architecture, which is the first application of the maximum entropy framework in a Hungarian NLP task, outperforms comparable state of the art tagging methods and is able to handle out of vocabulary items robustly, allowing for efficient analysis of large (web-based) corpora. @@ -2749,19 +2749,19 @@ ManolisMaragoudakis - KatiaKermanidis + KatiaKermanidis AristogiannisGarbis - NikosFakotakis + NikosFakotakis Dealing with Imbalanced Data using <fixed-case>B</fixed-case>ayesian Techniques http://www.lrec-conf.org/proceedings/lrec2006/pdf/503_pdf.pdf For the present work, we deal with the significant problem of high imbalance in data in binary or multi-class classification problems. We study two different linguistic applications. The former determines whether a syntactic construction (environment) co-occurs with a verb in a natural text corpus consists a subcategorization frame of the verb or not. The latter is called Name Entity Recognition (NER) and it concerns determining whether a noun belongs to a specific Name Entity class. Regarding the subcategorization domain, each environment is encoded as a vector of heterogeneous attributes, where a very high imbalance between positive and negative examples is observed (an imbalance ratio of approximately 1:80). In the NER application, the imbalance between a name entity class and the negative class is even greater (1:120). In order to confront the plethora of negative instances, we suggest a search tactic during training phase that employs Tomek links for reducing unnecessary negative examples from the training set. Regarding the classification mechanism, we argue that Bayesian networks are well suited and we propose a novel network structure which efficiently handles heterogeneous attributes without discretization and is more classification-oriented. Comparing the experimental results with those of other known machine learning algorithms, our methodology performs significantly better in detecting examples of the rare class. maragoudakis-etal-2006-dealing - José-MiguelBenedí - EduardoLleida + José-MiguelBenedí + EduardoLleida AmparoVarona - María-JoséCastro + María-JoséCastro IsabelGaliano RaquelJusto IñigoLópez de Letona @@ -2782,12 +2782,12 @@ Yun-ChuangChiao - OlivierKraif + OlivierKraif DominiqueLaurent - Thi Minh HuyenNguyen + Thi Minh HuyenNguyen NasredineSemmar FrançoisStuck - JeanVéronis + JeanVéronis WajdiZaghouani Evaluation of multilingual text alignment systems: the <fixed-case>ARCADE</fixed-case> <fixed-case>II</fixed-case> project http://www.lrec-conf.org/proceedings/lrec2006/pdf/506_pdf.pdf @@ -2807,7 +2807,7 @@ KareemDarwish OssamaEmam WalidMagdy - MagdiNagi + MagdiNagi Building a Heterogeneous Information Retrieval Collection of Printed <fixed-case>A</fixed-case>rabic Documents http://www.lrec-conf.org/proceedings/lrec2006/pdf/509_pdf.pdf This paper describes the development of an Arabic document image collection containing 34,651 documents from 1,378 different books and 25 topics with their relevance judgments. The books from which the collection is obtained are a part of a larger collection 75,000 books being scanned for archival and retrieval at the bibliotheca Alexandrina (BA). The documents in the collection vary widely in topics, fonts, and degradation levels. Initial baseline experiments were performed to examine the effectiveness of different index terms, with and without blind relevance feedback, on Arabic OCR degraded text. @@ -2824,14 +2824,14 @@ DimouAthanassia Lida - ChalamandarisAimilios + ChalamandarisAimilios Language identification from suprasegmental cues: Speech synthesis of <fixed-case>G</fixed-case>reek utterances from different dialectal variations. http://www.lrec-conf.org/proceedings/lrec2006/pdf/511_pdf.pdf In this paper we present the continuation of our research on the ability of native Greek adults to identify their mother tongue from synthesized stimuli which contain only prosodic - melodic and rhythmic - information. In the first section we present the ideas that underlie our theory, together with a brief review of our preliminary results. In the second section the detailed description of our experimental approach is given, as well as the results and their statistical analysis. In the final two sections we provide the conclusions derived from our experiments and the future work we are planning to carry out. athanassia-lida-aimilios-2006-language - RogerLevy + RogerLevy GalenAndrew Tregex and Tsurgeon: tools for querying and manipulating tree data structures http://www.lrec-conf.org/proceedings/lrec2006/pdf/513_pdf.pdf @@ -2863,7 +2863,7 @@ BenteMaegaard - StevenKrauwer + StevenKrauwer KhalidChoukri Lise DamsgaardJørgensen The <fixed-case>BLARK</fixed-case> concept and <fixed-case>BLARK</fixed-case> for <fixed-case>A</fixed-case>rabic @@ -2883,16 +2883,16 @@ SuzanVerberne - LouBoves + LouBoves NellekeOostdijk - Peter-ArnoCoppen + Peter-ArnoCoppen Data for question answering: The case of why http://www.lrec-conf.org/proceedings/lrec2006/pdf/525_pdf.pdf For research and development of an approach for automatically answering why-questions (why-QA) a data collection was created. The data set was obtained by way of elicitation and comprises a total of 395 why-questions. For each question, the data set includes the source document and one or two user-formulated answers. In addition, for a subset of the questions, user-formulated paraphrases are available. All question-answer pairs have been annotated with information on topic and semantic answer type. The resulting data set is of importance not only for our research, but we expect it to contribute to and stimulate other research in the field of why-QA. verberne-etal-2006-data - KirilSimov + KirilSimov PetyaOsenova Shallow Semantic Annotation of <fixed-case>B</fixed-case>ulgarian http://www.lrec-conf.org/proceedings/lrec2006/pdf/527_pdf.pdf @@ -2900,16 +2900,16 @@ simov-osenova-2006-shallow - ChristopherCieri + ChristopherCieri WaltAndrews - Joseph P.Campbell - GeorgeDoddington + Joseph P.Campbell + GeorgeDoddington JackGodfrey ShudongHuang - MarkLiberman - AlvinMartin + MarkLiberman + AlvinMartin HirotakaNakasone - MarkPrzybocki + MarkPrzybocki KevinWalker The Mixer and Transcript Reading Corpora: Resources for Multilingual, Crosschannel Speaker Recognition Research http://www.lrec-conf.org/proceedings/lrec2006/pdf/530_pdf.pdf @@ -2934,10 +2934,10 @@ chou-huang-2006-hantology - MariaGavrilidou - PennyLabropoulou - SteliosPiperidis - VoulaGiouli + MariaGavrilidou + PennyLabropoulou + SteliosPiperidis + VoulaGiouli NicolettaCalzolari MonicaMonachini ClaudiaSoria @@ -2959,7 +2959,7 @@ KatrinErk - SebastianPadó + SebastianPadó Shalmaneser - A Toolchain For Shallow Semantic Parsing http://www.lrec-conf.org/proceedings/lrec2006/pdf/537_pdf.pdf This paper presents Shalmaneser, a software package for shallow semantic parsing, the automatic assignment of semantic classes and roles to free text. Shalmaneser is a toolchain of independent modules communicating through a common XML format. System output can be inspected graphically. Shalmaneser can be used either as a “black box” to obtain semantic parses for new datasets (classifiers for English and German frame-semantic analysis are included), or as a research platform that can be extended to new parsers, languages, or classification paradigms. @@ -2968,8 +2968,8 @@ ValentinTablan TamaraPolajnar - HamishCunningham - KalinaBontcheva + HamishCunningham + KalinaBontcheva User-friendly ontology authoring using a controlled language http://www.lrec-conf.org/proceedings/lrec2006/pdf/538_pdf.pdf In recent years, following the rapid development in the Semantic Web and Knowledge Management research, ontologies have become more in demand in Natural Language Processing. An increasing number of systems use ontologies either internally, for modelling the domain of the application, or as data structures that hold the output resulting from the work of the system, in the form of knowledge bases. While there are many ontology editing tools aimed at expert users, there are very few which are accessible to users wishing to create simple structures without delving into the intricacies of knowledge representation languages. The approach described in this paper allows users to create and edit ontologies simply by using a restricted version of the English language. The controlled language described within is based on an open vocabulary and a restricted set of grammatical constructs. Sentences written in this language unambiguously map into a number of knowledge representation formats including OWL and RDF-S to allow round-trip ontology management. @@ -2977,7 +2977,7 @@ LauraHasler - ConstantinOrasan + ConstantinOrasan KarinNaumann <fixed-case>NP</fixed-case>s for Events: Experiments in Coreference Annotation http://www.lrec-conf.org/proceedings/lrec2006/pdf/539_pdf.pdf @@ -2987,9 +2987,9 @@ AmáliaMendes SandraAntunes - Maria Fernanda Bacelar doNascimento - João MiguelCasteleiro - LuísaPereira + Maria Fernanda Bacelar doNascimento + João MiguelCasteleiro + LuísaPereira Tiago <fixed-case>COMBINA</fixed-case>-<fixed-case>PT</fixed-case>: A Large Corpus-extracted and Hand-checked Lexical Database of <fixed-case>P</fixed-case>ortuguese Multiword Expressions http://www.lrec-conf.org/proceedings/lrec2006/pdf/540_pdf.pdf @@ -2999,7 +2999,7 @@ DavidGraff TimBuckwalter - MohamedMaamouri + MohamedMaamouri HubertJin Lexicon Development for Varieties of Spoken Colloquial <fixed-case>A</fixed-case>rabic http://www.lrec-conf.org/proceedings/lrec2006/pdf/541_pdf.pdf @@ -3009,19 +3009,19 @@ AlexandrePatry FabrizioGotti - PhilippeLanglais + PhilippeLanglais <fixed-case>MOOD</fixed-case>: A Modular Object-Oriented Decoder for Statistical Machine Translation http://www.lrec-conf.org/proceedings/lrec2006/pdf/542_pdf.pdf We present an Open Source framework called MOOD developed in order tofacilitate the development of a Statistical Machine Translation Decoder.MOOD has been modularized using an object-oriented approach which makes itespecially suitable for the fast development of state-of-the-art decoders. Asa proof of concept, a clone of the pharaoh decoder has been implemented andevaluated. This clone named ramses is part of the current distribution of MOOD. patry-etal-2006-mood - MohamedMaamouri + MohamedMaamouri AnnBies TimBuckwalter - MonaDiab + MonaDiab NizarHabash - OwenRambow + OwenRambow DalilaTabessi Developing and Using a Pilot Dialectal <fixed-case>A</fixed-case>rabic Treebank http://www.lrec-conf.org/proceedings/lrec2006/pdf/543_pdf.pdf @@ -3029,8 +3029,8 @@ maamouri-etal-2006-developing - Beáta BandmannMegyesi - Anna SågvallHein + Beáta BandmannMegyesi + Anna SågvallHein Éva CsatóJohanson Building a <fixed-case>S</fixed-case>wedish-<fixed-case>T</fixed-case>urkish Parallel Corpus http://www.lrec-conf.org/proceedings/lrec2006/pdf/544_pdf.pdf @@ -3039,7 +3039,7 @@ HoracioSaggion - RobertGaizauskas + RobertGaizauskas Language Resources for Background Gathering http://www.lrec-conf.org/proceedings/lrec2006/pdf/545_pdf.pdf We describe the Cubreporter information access system which allows access to news archives through the use of natural language technology. The system includes advanced text search, question answering, summarization, and entity profiling capabilities. It has been designed taking into account the characteristics of the background gathering task. @@ -3048,8 +3048,8 @@ JulieMedero KazuakiMaeda - StephanieStrassel - ChristopherWalker + StephanieStrassel + ChristopherWalker An Efficient Approach to Gold-Standard Annotation: Decision Points for Complex Tasks http://www.lrec-conf.org/proceedings/lrec2006/pdf/550_pdf.pdf Inter-annotator consistency is a concern for any corpus building effort relying on human annotation. Adjudication is as effective way to locate and correct discrepancies of various kinds. It can also be both difficult and time-consuming. This paper introduces Linguistic Data Consortium (LDC)’s model for decision point-based annotation and adjudication, and describes the annotation tools developed to enable this approach for the Automatic Content Extraction (ACE) Program. Using a customized user interface incorporating decision points, we improved adjudication efficiency over 2004 annotation rates, despite increased annotation task complexity. We examine the factors that lead to more efficient, less demanding adjudication. We further discuss how a decision point model might be applied to annotation tools designed for a wide range of annotation tasks. Finally, we consider issues of annotation tool customization versus development time in the context of a decision point model. @@ -3063,7 +3063,7 @@ klatt-2006-corpus - SophieRosset + SophieRosset SandraPetel The Ritel Corpus - An annotated Human-Machine open-domain question answering spoken dialog corpus http://www.lrec-conf.org/proceedings/lrec2006/pdf/553_pdf.pdf @@ -3092,14 +3092,14 @@ AnnaKorhonen YuvalKrymolowski - TedBriscoe + TedBriscoe A Large Subcategorization Lexicon for Natural Language Processing Applications http://www.lrec-conf.org/proceedings/lrec2006/pdf/558_pdf.pdf We introduce a large computational subcategorizationlexicon which includes subcategorization frame (SCF) and frequencyinformation for 6,397 English verbs. This extensive lexicon was acquiredautomatically from five corpora and the Web using the current version of the comprehensive subcategorization acquisition system of Briscoe and Carroll (1997). The lexicon is provided freely for research use, along with a script which can be used to filter and build sub-lexicons suited for different natural languageprocessing (NLP) purposes. Documentation is also provided whichexplains each sub-lexicon option and evaluates its accuracy. korhonen-etal-2006-large - NancyIde + NancyIde KeithSuderman Integrating Linguistic Resources: The <fixed-case>A</fixed-case>merican National Corpus Model http://www.lrec-conf.org/proceedings/lrec2006/pdf/560_pdf.pdf @@ -3107,8 +3107,8 @@ ide-suderman-2006-integrating - NancyIde - LaurentRomary + NancyIde + LaurentRomary Representing Linguistic Corpora and Their Annotations http://www.lrec-conf.org/proceedings/lrec2006/pdf/562_pdf.pdf A Linguistic Annotation Framework (LAF) is being developed within the International Standards Organization Technical Committee 37 Sub-committee on Language Resource Management (ISO TC37 SC4). LAF is intended to provide a standardized means to represent linguistic data and its annotations that is defined broadly enough to accommodate all types of linguistic annotations, and at the same time provide means to represent precise and potentially complex linguistic information. The general principles informing the design of LAF have been previously reported (Ide and Romary, 2003; Ide and Romary, 2004a). This paper describes some of the more technical aspects of the LAF design that have been addressed in the process of finalizing the specifications for the standard. @@ -3117,7 +3117,7 @@ ZhongqiangHuang LeiChen - MaryHarper + MaryHarper An Open Source Prosodic Feature Extraction Tool http://www.lrec-conf.org/proceedings/lrec2006/pdf/565_pdf.pdf There has been an increasing interest in utilizing a wide variety of knowledge sources in order to perform automatic tagging of speech events, such as sentence boundaries and dialogue acts. In addition to the word spoken, the prosodic content of the speech has been proved quite valuable in a variety of spoken language processing tasks such as sentence segmentation and tagging, disfluency detection, dialog act segmentation and tagging, and speaker recognition. In this paper, we report on an open source prosodic feature extraction tool based on Praat, with a description of the prosodic features and the implementation details, as well as a discussion of its extension capability. We also evaluate our tool on a sentence boundary detection task and report the system performance on the NIST RT04 CTS data. @@ -3142,9 +3142,9 @@ EnriqueAlfonseca - AntonioMoreno-Sandoval - José MaríaGuirao - MaríaRuiz-Casado + AntonioMoreno-Sandoval + José MaríaGuirao + MaríaRuiz-Casado The wraetlic <fixed-case>NLP</fixed-case> suite http://www.lrec-conf.org/proceedings/lrec2006/pdf/569_pdf.pdf In this paper, we describe the second release of a suite of language analysers, developed over the last five years, called wraetlic, which includes tools for several partial parsing tasks, both for English and Spanish. It has been successfully used in fields such as Information Extraction, thesaurus acquisition, Text Summarisation and Computer Assisted Assessment. @@ -3152,10 +3152,10 @@ TomokoOhta - YukaTateisi + YukaTateisi Jin-DongKim AkaneYakushiji - Jun-ichiTsujii + Jun-ichiTsujii Linguistic and Biological Annotations of Biological Interaction Events http://www.lrec-conf.org/proceedings/lrec2006/pdf/570_pdf.pdf This paper discusses an augmentation of a corpus ofresearch abstracts in biomedical domain (the GENIA corpus) with two kinds of annotations: tree annotation and event annotation. The tree annotation identifies the linguistic structure that encodes the relations among entities. The event annotation reveals the semantic structure of the biological interaction events encoded in the text. With these annotations we aim to provide a link between the clue and the target of biological event information extraction. @@ -3171,17 +3171,17 @@ tenfjord-etal-2006-ask - IvanaKruijff-Korbayová + IvanaKruijff-Korbayová KláraChvátalová - OanaPostolache + OanaPostolache Annotation Guidelines for <fixed-case>C</fixed-case>zech-<fixed-case>E</fixed-case>nglish Word Alignment http://www.lrec-conf.org/proceedings/lrec2006/pdf/575_pdf.pdf We report on our experience with manual alignment of Czech and English parallel corpus text. We applied existing guidelines for English and French and augmented them to cover systematically occurring cases in our corpus. We describe the main extensions covered in our guidelines and provide examples. We evaluated both intra- and inter-annotator agreement and obtained very good results of Kappa well above 0.9 and agreement of 95% and 93%, respectively. kruijff-korbayova-etal-2006-annotation - JérémieSegouat - AnneliesBraffort + JérémieSegouat + AnneliesBraffort EmilieMartin Sign Language corpus analysis: Synchronisation of linguistic annotation and numerical data http://www.lrec-conf.org/proceedings/lrec2006/pdf/576_pdf.pdf @@ -3193,7 +3193,7 @@ MonteGeorge NicolettaCalzolari MonicaMonachini - NuriaBel + NuriaBel MandyPet ClaudiaSoria Lexical Markup Framework (<fixed-case>LMF</fixed-case>) @@ -3219,7 +3219,7 @@ pouliquen-etal-2006-geocoding - Bolette SandfordPedersen + Bolette SandfordPedersen Query Expansion on Compounds http://www.lrec-conf.org/proceedings/lrec2006/pdf/580_pdf.pdf Compounds constitute a specific issue in search, in particular in languages where they are written in one word, as is the case for Danish and the other Scandinavian languages. For such languages, expansion of the query compound into separate lemmas is a way of finding the often frequent alternative synonymous phrases in which the content of a compound can also be expressed. However, it is crucial to note that the number of irrelevant hits is generally very high when using this expansion strategy. The aim of this paper is to examine how we can obtain better search results on split compounds, partly by looking at the internal structure of the original compound, partly by analyzing the context in which the split compound occurs. We perform an NP analysis and introduce a new, linguistically based threshold for retrieved hits. The results obtained by using this strategy demonstrate that compound splitting combined with a shallow linguistic analysis focusing on the recognition of NPs can improve search by bringing down the number of irrelevant hits. @@ -3237,7 +3237,7 @@ charoenporn-etal-2006-word - WimPeters + WimPeters Maria TeresaSagri DanielaTiscornia SaraCastagnoli @@ -3250,7 +3250,7 @@ JorisVaneyghen GuyDe Pauw DirkVan Compernolle - WalterDaelemans + WalterDaelemans A mixed word / morphological approach for extending <fixed-case>CELEX</fixed-case> for high coverage on contemporary large corpora http://www.lrec-conf.org/proceedings/lrec2006/pdf/583_pdf.pdf This paper describes an alternative approach to morphological language modeling, which incorporates constraints on the morphological production of new words. This is done by applying the constraints as a preprocessing step in which only one morphological production rule can be applied to an extended lexicon of knownmorphemes, lemmas and word forms. This approach is used to extend the CELEX Dutch morphological database, so that a higher coverage can be reached on a largecorpus of Dutch newspaper articles. We present experimental results on the coverage of this extended database and use the extension to further evaluate our morphologicalsystem, as well as the impact of the constraints on the coverage of out-of-vocabulary words. @@ -3287,9 +3287,9 @@ MartíUmbert - AsunciónMoreno + AsunciónMoreno PabloAgüero - AntonioBonafonte + AntonioBonafonte <fixed-case>S</fixed-case>panish Synthesis Corpora http://www.lrec-conf.org/proceedings/lrec2006/pdf/590_pdf.pdf This paper deals with the design of a synthesis database for a high quality corpus-based Speech Synthesis system in Spanish. The database has been designed for speech synthesis, speech conversion and expressive speech. The design follows the specifications of TC-STAR project and has been applied to collect equivalent English and Mandarin synthesis databases. The sentences of the corpus have been selected mainly from transcribed speech and novels. The selection criterion is a phonetic and prosodic coverage. The corpus was completed with sentences specifically designed to cover frequent phrases and words. Two baseline speakers and four bilingual speakers were recorded. Recordings consist of 10 hours of speech for each baseline speaker and one hour of speech for each voice conversion bilingual speaker. The database is labelled and segmented. Pitch marks and phonetic segmentation was done automatically and up to 50% manually supervised. The database will be available at ELRA. @@ -3298,7 +3298,7 @@ PavelIrcing JanHoidekr - JosefPsutka + JosefPsutka Exploiting Linguistic Knowledge in Language Modeling of <fixed-case>C</fixed-case>zech Spontaneous Speech http://www.lrec-conf.org/proceedings/lrec2006/pdf/591_pdf.pdf In our paper, we present a method for incorporating available linguistic information into a statistical language model that is used in ASR system for transcribing spontaneous speech. We employ the class-based language model paradigm and use the morphological tags as the basis for world-to-class mapping. Since the number of different tags is at least by one order of magnitude lower than the number of words even in the tasks with moderately-sized vocabularies, the tag-based model can be rather robustly estimated using even the relatively small text corpora. Unfortunately, this robustness goes hand in hand with restricted predictive ability of the class-based model. Hence we apply the two-pass recognition strategy, where the first pass is performed with the standard word-based n-gram and the resulting lattices are rescored in the second pass using the aforementioned class-based model. Using this decoding scenario, we have managed to moderately improve the word error rate in the performed ASR experiments. @@ -3312,7 +3312,7 @@ slavcheva-2006-semantic - GünterNeumann + GünterNeumann BertholdCrysmann Exploring <fixed-case>HPSG</fixed-case>-based Treebanks for Probabilistic Parsing <fixed-case>HPSG</fixed-case> grammar extraction http://www.lrec-conf.org/proceedings/lrec2006/pdf/595_pdf.pdf @@ -3328,7 +3328,7 @@ marinelli-bindi-2006-proper - Sonja E.Bosch + Sonja E.Bosch LaurettePretorius JackieJones Towards machine-readable lexicons for <fixed-case>S</fixed-case>outh <fixed-case>A</fixed-case>frican <fixed-case>B</fixed-case>antu languages @@ -3347,7 +3347,7 @@ lechenadec-etal-2006-creation - YoshihikoHayashi + YoshihikoHayashi ToruIshida A Dictionary Model for Unifying Machine Readable Dictionaries and Computational Concept Lexicons http://www.lrec-conf.org/proceedings/lrec2006/pdf/600_pdf.pdf @@ -3385,7 +3385,7 @@ VítNováček - PavelSmrž + PavelSmrž JanPomikálek Text Mining for Semantic Relations as a Support Base of a Scientific Portal Generator http://www.lrec-conf.org/proceedings/lrec2006/pdf/606_pdf.pdf @@ -3393,10 +3393,10 @@ novacek-etal-2006-text - RaffaellaBernardi - AndreaBolognesi - CorradoSeidenari - FabioTamburini + RaffaellaBernardi + AndreaBolognesi + CorradoSeidenari + FabioTamburini <fixed-case>POS</fixed-case> tagset design for <fixed-case>I</fixed-case>talian http://www.lrec-conf.org/proceedings/lrec2006/pdf/608_pdf.pdf We aim to automatically induce a PoS tagset for Italian by analysing the distributional behaviour of Italian words. To this end, we propose an algorithm that (a) extracts information from loosely labelled dependency structures that encode only basic and broadly accepted syntactic relations, namely Head/Dependent and the distinction of dependents into Argument vs. Adjunct, and (b) derives a possible set of word classes. The paper reports on some preliminary experiments carried out using the induced tagset in conjunction with state-of-the-art PoS taggers. The method proposed to design a proper tagset exploits little, if any, language-specific knowledge: hence it is in principle applicable to any language. @@ -3407,7 +3407,7 @@ ToruHirano RyuIida AtsushiFujita - YujiMatsumoto + YujiMatsumoto Augmenting a Semantic Verb Lexicon with a Large Scale Collection of Example Sentences http://www.lrec-conf.org/proceedings/lrec2006/pdf/610_pdf.pdf One of the crucial issues in semantic parsing is how to reduce costs of collecting a sufficiently large amount of labeled data. This paper presents a new approach to cost-saving annotation of example sentences with predicate-argument structure information, taking Japanese as a target language. In this scheme, a large collection of unlabeled examples are first clustered and selectively sampled, and for each sampled cluster, only one representative example is given a label by a human annotator. The advantages of this approach are empirically supported by the results of our preliminary experiments, where we use an existing similarity function and naive sampling strategy. @@ -3424,8 +3424,8 @@ onelli-etal-2006-diacoris - EnekoAgirre - IzaskunAldezabal + EnekoAgirre + IzaskunAldezabal JoneEtxeberria EliPociello A Preliminary Study for Building the <fixed-case>B</fixed-case>asque <fixed-case>P</fixed-case>rop<fixed-case>B</fixed-case>ank @@ -3443,8 +3443,8 @@ yamamoto-etal-2006-detection - EnekoAgirre - IzaskunAldezabal + EnekoAgirre + IzaskunAldezabal JoneEtxeberria EliIzagirre KarmeleMendizabal @@ -3490,7 +3490,7 @@ de-luca-nurnberger-2006-rebuilding - PavelSmrž + PavelSmrž Automatic Acquisition of Semantics-Extraction Patterns http://www.lrec-conf.org/proceedings/lrec2006/pdf/621_pdf.pdf This paper examines the use of parallel and comparable corpora for automatic acquisition of semantics-extraction patterns. It presents a new method of the pattern extraction which takes advantage of parallel texts to "port" text mining solutions from a source language to a target language. It is shown thatthe technique can help in situations when the extraction procedure is to beapplied in a language (languages) with a limited set of available resources,e.g. domain-specific thesauri. The primary motivation of our work lies in a particular multilingual e-learning system. For testing purposes, other applications of the given approach were implemented. They include pattern extraction from general texts (tested on wordnet relations), acquisition of domain-specific patterns from large parallel corpus of legal EU documents, and mining of subjectivity expressions for multilingual opinion extraction system. @@ -3512,10 +3512,10 @@ draxler-jansch-2006-speech - Benjamin K.Tsou - Tom B.Y.Lai + Benjamin K.Tsou + Tom B.Y.Lai K.K.Sin - Lawrence Y.L.Cheung + Lawrence Y.L.Cheung Court Stenography-To-Text (“<fixed-case>STT</fixed-case>”) in <fixed-case>H</fixed-case>ong <fixed-case>K</fixed-case>ong: A Jurilinguistic Engineering Effort http://www.lrec-conf.org/proceedings/lrec2006/pdf/624_pdf.pdf Implementation of legal bilingualism in Hong Kong after 1997 has necessitated the production of voluminous and extensive court proceedings and judgments in both Chinese and English. For the former, Cantonese, a dialect of Chinese, is the home language of more than 90% of the population in Hong Kong and so used in the courts. To record speech in Cantonese verbatim, a Chinese Computer-Aided Transcription system has been developed. The transcription system converts stenographic codes into Chinese text, i.e. from phonetic to orthographic representation of the language. The main challenge lies in the resolution of the sever ambiguity resulting from homocode problems in the conversion process. Cantonese Chinese is typified by problematic homonymy, which presents serious challenges. The N-gram statistical model is employed to estimate the most probable character string of the input transcription codes. Domain-specific corpora have been compiled to support the statistical computation. To improve accuracy, scalable techniques such as domain-specific transcription and special encoding are used. Put together, these techniques deliver 96% transcription accuracy. @@ -3567,7 +3567,7 @@ JulieMauclair YannickEstève SimonPetit-Renaud - PaulDeléglise + PaulDeléglise Automatic Detection of Well Recognized Words in Automatic Speech Transcriptions http://www.lrec-conf.org/proceedings/lrec2006/pdf/630_pdf.pdf This work adresses the use of confidence measures for extracting well recognized words with very low error rate from automatically transcribed segments in a unsupervised way. We present and compare several confidence measures and propose a method to merge them into a new one. We study its capabilities on extracting correct recognized word-segments compared to the amount of rejected words. We apply this fusion measure to select audio segments composed of words with a high confidence score. These segments come from an automatic transcription of french broadcast news given by our speech recognition system based on the CMU Sphinx3.3 decoder. Injecting new data resulting from unsupervised treatments of raw audio recordings in the training corpus of acoustic models gives statistically significant improvement (95% confident interval) in terms of word error rate. Experiments have been carried out on the corpus used during ESTER, the french evaluation campaign. @@ -3597,26 +3597,26 @@ nimb-2006-lexadv - VoulaGiouli - AlexisKonstandinidis - ElinaDesypri - HarrisPapageorgiou + VoulaGiouli + AlexisKonstandinidis + ElinaDesypri + HarrisPapageorgiou Multi-domain Multi-lingual Named Entity Recognition: Revisiting & Grounding the resources issue http://www.lrec-conf.org/proceedings/lrec2006/pdf/633_pdf.pdf The paper reports on the development methodology of a system aimed at multi-domain multi-lingual recognition and classification of names in texts, the focus being on the linguistic resources used for training and testing purposes. The corpus presented here has been collected and annotated in the framework of different projects the critical issue being the development of a final resource that is homogenous, re-usable and adaptable to different domains and languages with a view to robust multi-domain and multi-lingual NERC. giouli-etal-2006-multi - RebeccaPassonneau + RebeccaPassonneau NizarHabash - OwenRambow + OwenRambow Inter-annotator Agreement on a Multilingual Semantic Annotation Task http://www.lrec-conf.org/proceedings/lrec2006/pdf/634_pdf.pdf Six sites participated in the Interlingual Annotation of Multilingual Text Corpora (IAMTC) project (Dorr et al., 2004; Farwell et al., 2004; Mitamura et al., 2004). Parsed versions of English translations of news articles in Arabic, French, Hindi, Japanese, Korean and Spanish were annotated by up to ten annotators. Their task was to match open-class lexical items (nouns, verbs, adjectives, adverbs) to one or more concepts taken from the Omega ontology (Philpot et al., 2003), and to identify theta roles for verb arguments. The annotated corpus is intended to be a resource for meaning-based approaches to machine translation. Here we discuss inter-annotator agreement for the corpus. The annotation task is characterized by annotators’ freedom to select multiple concepts or roles per lexical item. As a result, the annotation categories are sets, the number of which is bounded only by the number of distinct annotator-lexical item pairs. We use a reliability metric designed to handle partial agreement between sets. The best results pertain to the part of the ontology derived from WordNet. We examine change over the course of the project, differences among annotators, and differences across parts of speech. Our results suggest a strong learning effect early in the project. passonneau-etal-2006-inter - RebeccaPassonneau + RebeccaPassonneau Measuring Agreement on Set-valued Items (<fixed-case>MASI</fixed-case>) for Semantic and Pragmatic Annotation http://www.lrec-conf.org/proceedings/lrec2006/pdf/636_pdf.pdf Annotation projects dealing with complex semantic or pragmatic phenomena face the dilemma of creating annotation schemes that oversimplify the phenomena, or that capture distinctions conventional reliability metrics cannot measure adequately. The solution to the dilemma is to develop metrics that quantify the decisions that annotators are asked to make. This paper discusses MASI, distance metric for comparing sets, and illustrates its use in quantifying the reliability of a specific dataset. Annotations of Summary Content Units (SCUs) generate models referred to as pyramids which can be used to evaluate unseen human summaries or machine summaries. The paper presents reliability results for five pairs of pyramids created for document sets from the 2003 Document Understanding Conference (DUC). The annotators worked independently of each other. Differences between application of MASI to pyramid annotation and its previous application to co-reference annotation are discussed. In addition, it is argued that a paradigmatic reliability study should relate measures of inter-annotator agreement to independent assessments, such as significance tests of the annotated variables with respect to other phenomena. In effect, what counts as sufficiently reliable intera-annotator agreement depends on the use the annotated data will be put to. @@ -3633,7 +3633,7 @@ MarcVerhagen RobertKnippen InderjeetMani - JamesPustejovsky + JamesPustejovsky Annotation of Temporal Relations with Tango http://www.lrec-conf.org/proceedings/lrec2006/pdf/638_pdf.pdf Temporal annotation is a complex task characterized by low markup speed and low inter-annotator agreements scores. Tango is a graphical annotation tool for temporal relations. It is developed for the TimeML annotation language and allows annotators to build a graph that resembles a timeline. Temporal relations are added by selecting events and drawing labeled arrows between them. Tango is integrated with a temporal closure component and includes features like SmartLink, user prompting and automatic linking of time expressions. Tango has been used to create two corpora with temporal annotation, TimeBank and the AQUAINT Opinion corpus. @@ -3649,14 +3649,14 @@ UweQuasthoff MatthiasRichter - ChristianBiemann + ChristianBiemann Corpus Portal for Search in Monolingual Corpora http://www.lrec-conf.org/proceedings/lrec2006/pdf/641_pdf.pdf A simple and flexible schema for storing and presenting monolingual language resources is proposed. In this format, data for 18 different languages is already available in various sizes. The data is provided free of charge for online use and download. The main target is to ease the application of algorithms for monolingual and interlingual studies. quasthoff-etal-2006-corpus - TristanVanrullen + TristanVanrullen PhilippeBlache Jean-MarieBalfourier Constraint-Based Parsing as an Efficient Solution: Results from the Parsing Evaluation Campaign <fixed-case>EAS</fixed-case>y @@ -3665,24 +3665,24 @@ vanrullen-etal-2006-constraint - AndreasEisele + AndreasEisele Parallel Corpora and Phrase-Based Statistical Machine Translation for New Language Pairs via Multiple Intermediaries http://www.lrec-conf.org/proceedings/lrec2006/pdf/643_pdf.pdf We present a large parallel corpus of texts published by the United Nations Organization, which we exploit for the creation ofphrase-based statistical machine translation (SMT) systems for new language pairs. We present a setup where phrase tables for these language pairs are used for translation between languages for which parallel corpora of sufficient size are so far not available. We give some preliminary results for this novel application of SMT and discuss further refinements. eisele-2006-parallel - OwenRambow - BonnieDorr - DavidFarwell + OwenRambow + BonnieDorr + DavidFarwell RebeccaGreen NizarHabash StephenHelmreich - EduardHovy - LoriLevin - Keith J.Miller + EduardHovy + LoriLevin + Keith J.Miller TerukoMitamura - FlorenceReeder + FlorenceReeder AdvaithSiddharthan Parallel Syntactic Annotation of Multiple Languages http://www.lrec-conf.org/proceedings/lrec2006/pdf/645_pdf.pdf @@ -3702,8 +3702,8 @@ galliano-etal-2006-corpus - Juan José RodríguezSoler - Pedro ConcejeroCerezo + Juan José RodríguezSoler + Pedro ConcejeroCerezo Carlos LázaroÁvila Daniel TapiasMerino Usability evaluation of 3<fixed-case>G</fixed-case> multimodal services in Telefónica Móviles España @@ -3721,7 +3721,7 @@ PorfírioFilipe - NunoMamede + NunoMamede A Framework to Integrate Ubiquitous Knowledge Modeling http://www.lrec-conf.org/proceedings/lrec2006/pdf/650_pdf.pdf This paper describes our contribution to let end users configure mixed-initiative spoken dialogue systems to suit their personalized goals. The main problem that we want to address is the reconfiguration of spoken language dialogue systems to deal with generic plug and play artifacts. Such reconfiguration can be seen as a portability problem and is a critical research issue. In order to solve this problem we describe a hybrid approach to design ubiquitous domain models that allows the dialogue system to perform recognition of available tasks on the fly. Our approach considers two kinds of domain knowledge: the global knowledge and the local knowledge. The global knowledge, that is modeled using a top-down approach, is associated at design time with the dialogue system itself. The local knowledge, that is modeled using a bottom-up approach, is defined with each one of the artifacts. When an artifact is activated or deactivated, a bilateral process, supported by a broker, updates the domain knowledge considering the artifact local knowledge. We assume that everyday artifacts are augmented with computational capabilities and semantic descriptions supported by their own knowledge model. A case study focusing a microwave oven is depicted. @@ -3730,7 +3730,7 @@ FeliceDell’Orletta AlessandroLenci - SimonettaMontemagni + SimonettaMontemagni VitoPirrelli Searching treebanks for functional constraints: cross-lingual experiments in grammatical relation assignment http://www.lrec-conf.org/proceedings/lrec2006/pdf/651_pdf.pdf @@ -3745,7 +3745,7 @@ declerck-2006-synaf - ChristelleAyache + ChristelleAyache BrigitteGrau AnneVilnat <fixed-case>EQ</fixed-case>ue<fixed-case>R</fixed-case>: the <fixed-case>F</fixed-case>rench Evaluation campaign of Question-Answering Systems @@ -3754,9 +3754,9 @@ ayache-etal-2006-equer - Maria Fernanda Bacelar doNascimento + Maria Fernanda Bacelar doNascimento José BettencourtGonçalves - LuísaPereira + LuísaPereira AntóniaEstrela AfonsoPereira RuiSantos @@ -3767,8 +3767,8 @@ nascimento-etal-2006-african - Benjamin K.Tsou - Oi YeeKwong + Benjamin K.Tsou + Oi YeeKwong Toward a Pan-<fixed-case>C</fixed-case>hinese Thesaurus http://www.lrec-conf.org/proceedings/lrec2006/pdf/656_pdf.pdf In this paper, we propose a corpus-based approach to the construction of a Pan-Chinese lexical resource, starting out with the aim to enrich existing Chinese thesauri in the Pan-Chinese context. The resulting thesaurus is thus expected to contain not only the core senses and usages of Chinese lexical items but also usages specific to individual Chinese speech communities. We introduce the ideas behind the construction of the resource, outline the steps to be taken, and discuss some preliminary analyses. The work is backed up by a unique and large Chinese synchronous corpus containing textual data from various Chinese speech communities including Hong Kong, Beijing, Taipei and Singapore. @@ -3776,7 +3776,7 @@ NellekeOostdijk - LouBoves + LouBoves User requirements analysis for the design of a reference corpus of written <fixed-case>D</fixed-case>utch http://www.lrec-conf.org/proceedings/lrec2006/pdf/657_pdf.pdf The Dutch Language Corpus Initiative (D-Coi) project aims to specify the design of a 500-million-word reference corpus of written Dutch, and to put the tools and procedures in place that are needed to actually construct such a corpus. One of the tasks in the project is to conduct a user requirements study that should provide the basis for the eventual design of the 500-million-word reference corpus. The present paper outlines the user requirements analysis and reports the results so far. @@ -3802,7 +3802,7 @@ AlbertoSimões - José JoãoAlmeida + José JoãoAlmeida <fixed-case>T</fixed-case>2<fixed-case>O</fixed-case> - Recycling Thesauri into a Multilingual Ontology http://www.lrec-conf.org/proceedings/lrec2006/pdf/664_pdf.pdf In this article we present T2O - a workbench to assist the process of translating heterogeneous resources into ontologies, to enrich and add multilingual information, to help programming with them, and to support ontology publishing. T2O is an ontology algebra. @@ -3816,7 +3816,7 @@ amsalu-2006-data - JörgTiedemann + JörgTiedemann <fixed-case>ISA</fixed-case> & <fixed-case>ICA</fixed-case> - Two Web Interfaces for Interactive Alignment of Bitexts alignment of parallel texts http://www.lrec-conf.org/proceedings/lrec2006/pdf/667_pdf.pdf ISA and ICA are two web interfaces for interactive alignment of parallel texts. ISA provides an interface for automatic and manual sentence alignment. It includes cognate filters and uses structural markup to improve automatic alignment and provides intuitive tools for editing them. Alignment results can be saved to disk or sent via e-mail. ICA provides an interface to the clue aligner from the Uplug toolbox. It allows one to set various parameters and visualizes alignment results in a two-dimensional matrix. Word alignments can be edited and saved to disk. @@ -3824,7 +3824,7 @@ Petra-MariaStrauß - HolgerHoffman + HolgerHoffman WolfgangMinker HeikoNeumann GüntherPalm @@ -3839,7 +3839,7 @@ strauss-etal-2006-wizard - Nancy L.Underwood + Nancy L.Underwood AgnesLisowska The Evolution of an Evaluation Framework for a Text Mining System http://www.lrec-conf.org/proceedings/lrec2006/pdf/670_pdf.pdf @@ -3859,7 +3859,7 @@ DanielStein PhilippeDreuw MortezaZahedi - HermannNey + HermannNey A <fixed-case>G</fixed-case>erman <fixed-case>S</fixed-case>ign <fixed-case>L</fixed-case>anguage Corpus of the Domain Weather Report http://www.lrec-conf.org/proceedings/lrec2006/pdf/673_pdf.pdf All systems for automatic sign language translation and recognition, in particular statistical systems, rely on adequately sized corpora. For this purpose, we created the Phoenix corpus that is based on German television weather reports translated into German Sign Language. It comes with a rich annotation of the video data, a bilingual text-based sentence corpus and a monolingual German corpus. All systems for automatic sign language translation and recognition, in particular statistical systems, rely on adequately sized corpora. For this purpose, we created the Phoenix corpus that is based on German television weather reports translated into German Sign Language. It comes with a rich annotation of the video data, a bilingual text-based sentence corpus and a monolingual German corpus. @@ -3868,7 +3868,7 @@ RobertoBartolini CaterinaCaracciolo - EmilianoGiovanetti + EmilianoGiovanetti AlessandroLenci SimoneMarchi VitoPirrelli @@ -3888,7 +3888,7 @@ mulloni-pekar-2006-automatic - TimothyBaldwin + TimothyBaldwin Su’adAwab Open Source Corpus Analysis Tools for <fixed-case>M</fixed-case>alay http://www.lrec-conf.org/proceedings/lrec2006/pdf/677_pdf.pdf @@ -3896,7 +3896,7 @@ baldwin-awab-2006-open - FideliaIbekwe-Sanjuan + FideliaIbekwe-Sanjuan A task-oriented framework for evaluating theme detection systems: A discussion paper http://www.lrec-conf.org/proceedings/lrec2006/pdf/678_pdf.pdf This paper discusses the inherent difficulties in evaluating systems for theme detection. Such systems are based essentially on unsupervised clustering aiming to discover the underlying structure in a corpus of texts. As the structures are precisely unknown beforehand, it is difficult to devise a satisfactory evaluation protocol. Several problems are posed by cluster evaluation: determining the optimal number of clusters, cluster content evaluation, topology of the discovered structure. Each of these problems has been studied separately but some of the proposed metrics portray significant flaws. Moreover, no benchmark has been commonly agreed upon. Finally, it is necessary to distinguish between task-oriented and activity-oriented evaluation as the two frameworks imply different evaluation protocols. Possible solutions to the activity-oriented evaluation can be sought from the data and text mining communities. @@ -3905,7 +3905,7 @@ A.Moreno AlbertFebrer - LluisMárquez + LluisMárquez Generation of Language Resources for the Development of Speech Technologies in <fixed-case>C</fixed-case>atalan http://www.lrec-conf.org/proceedings/lrec2006/pdf/679_pdf.pdf This paper describes a joint initiative of the Catalan and Spanish Government to produce Language Resources for the Catalan language. A similar methodology to the Basic Language Resource Kit (BLARK) concept was applied to determine the priorities on the production of the Language Resources. The paper shows the LR and tools currently available for the Catalan Language both for Language and Speech technologies. The production of large databases for Automatic Speech Recognition purposes already started. All the resources generated in the project follow EU standards, will be validated by an external centre and will be free and public available through ELRA. @@ -3913,7 +3913,7 @@ GeorgianaPuşcaşu - RuslanMitkov + RuslanMitkov If “it” were “then”, then when was “it”? Establishing the anaphoric role of “then” http://www.lrec-conf.org/proceedings/lrec2006/pdf/680_pdf.pdf The adverb "then" is among the most frequent Englishtemporal adverbs, being also capable of filling a variety of semantic roles. The identification of anaphoric usages of "then"is important for temporal expression resolution, while thetemporal relationship usage is important for event ordering. Given that previous work has not tackled the identification and temporal resolution of anaphoric "then", this paper presents a machine learning approach for setting apart anaphoric usages and a rule-based normaliser that resolves it with respect to an antecedent. The performance of the two modules is evaluated. The present paper also describes the construction of an annotated corpus and the subsequent derivation of training data required by the machine learning module. @@ -3921,7 +3921,7 @@ ViktorTrón - PéterHalácsy + PéterHalácsy PéterRebrus AndrásRung PéterVajda @@ -3934,7 +3934,7 @@ LeH. Phuong NguyenT. M. Huyen - RomaryLaurent + LaurentRomary RoussanalyAzim A <fixed-case>L</fixed-case>exicalized <fixed-case>T</fixed-case>ree-<fixed-case>A</fixed-case>djoining <fixed-case>G</fixed-case>rammar for <fixed-case>V</fixed-case>ietnamese http://www.lrec-conf.org/proceedings/lrec2006/pdf/685_pdf.pdf @@ -3990,10 +3990,10 @@ ueyama-2006-evaluation - JoséIria + JoséIria ChristopherBrewster - FabioCiravegna - YorickWilks + FabioCiravegna + YorickWilks An Incremental Tri-Partite Approach To Ontology Learning http://www.lrec-conf.org/proceedings/lrec2006/pdf/700_pdf.pdf In this paper we present a new approach to ontology learning. Its basis lies in a dynamic and iterative view of knowledge acquisition for ontologies. The Abraxas approach is founded on three resources, a set of texts, a set of learning patterns and a set of ontological triples, each of which must remain in equilibrium. As events occur which disturb this equilibrium various actions are triggered to re- establish a balance between the resources. Such events include acquisition of a further text from external resources such as the Web or the addition of ontological triples to the ontology. We develop the concept of a knowledge gap between the coverage of an ontology and the corpus of texts as a measure triggering actions. We present an overview of the algorithm and its functionalities. @@ -4001,27 +4001,27 @@ ThomasPellegrini - LoriLamel + LoriLamel Experimental detection of vowel pronunciation variants in <fixed-case>A</fixed-case>mharic http://www.lrec-conf.org/proceedings/lrec2006/pdf/701_pdf.pdf The pronunciation lexicon is a fundamental element in an automatic speech transcription system. It associates each lexical entry (usually a grapheme), with one or more phonemic or phone-like forms, the pronunciation variants. Thorough knowledge of the target language is a priori necessary to establish the pronunciation baseforms and variants. The reliance on human expertise can pose difficulties in developing a system for a language where such knowledge may not be readily available. In this article a speech recognizer is used to help select pronunciation variants in Amharic, the official language of Ethiopia, focusing on alternate choices for vowels. This study is carried out using an audio corpus composed of 37 hours of speech from radio broadcasts which were orthographically transcribed by native speakers. Since the corpus is relatively small for estimating pronunciation variants, a first set of studies were carried out at a syllabic level. Word lexica were then constructed based on the observed syllable occurences. Automatic alignments were compared for lexica containing different vowel variants, with both context-independent and context-dependent acoustic models sets. The variant2+ measure proposed in (Adda-Decker and Lamel, 1999) is used to assess the potential need for pronunciation variants. pellegrini-lamel-2006-experimental - RobertaCatizone + RobertaCatizone AngeloDalli - YorickWilks + YorickWilks Evaluating Automatically Generated Timelines from the Web http://www.lrec-conf.org/proceedings/lrec2006/pdf/702_pdf.pdf As web searches increase, there is a need to represent the search results in the most comprehensible way possible. In particular, we focus on search results from queries about people and places. The standard method for presentation of search results is an ordered list determined by the Web search engine. Although this is satisfactory in some cases, when searching for people and places, presenting the information indexed by time may be more desirable. We are developing a system called Cronopath, which generates a timeline of web search engine results by determining the time frame of each document in the collection and linking elements in the timeline to the relevant articles. In this paper, we propose evaluation guidelines for judging the quality of automatically generated timelines based on a set of common features. catizone-etal-2006-evaluating - IvanaKruijff-Korbayová + IvanaKruijff-Korbayová TilmanBecker - NateBlaylock + NateBlaylock CiprianGerstenberger - MichaelKaißer + MichaelKaißer PeterPoller VerenaRieser JanSchehl @@ -4031,11 +4031,11 @@ kruijff-korbayova-etal-2006-sammie - RebeccaPassonneau + RebeccaPassonneau RobertaBlitz - DavidElson + DavidElson AngelaGiral - JudithKlavans + JudithKlavans <fixed-case>CL</fixed-case>i<fixed-case>MB</fixed-case> <fixed-case>T</fixed-case>ool<fixed-case>K</fixed-case>it: A Case Study of Iterative Evaluation in a Multidisciplinary Project http://www.lrec-conf.org/proceedings/lrec2006/pdf/705_pdf.pdf Digital image collections in libraries and other curatorial institutions grow too rapidly to create new descriptive metadata for subject matter search or browsing. CLiMB (Computational Linguistics for Metadata Building) was a project designed to address this dilemma that involved computer scientists, linguists, librarians, and art librarians. The CLiMB project followed an iterative evaluation model: each next phase of the project emerged from the results of an evaluation. After assembling a suite of text processing tools to be used in extracting metada, we conducted a formative evaluation with thirteen participants, using a survey in which we varied the order and type of four conditions under which respondents would propose or select image search terms. Results of the formative evaluation led us to conclude that a CLiMB ToolKit would work best if its main function was to propose terms for users to review. After implementing a prototype ToolKit using a browser interface, we conducted an evaluation with ten experts. Users found the ToolKit very habitable, remained consistently satisfied throughout a lengthy evaluation, and selected a large number of terms per image. @@ -4043,7 +4043,7 @@ AnnaRumshisky - JamesPustejovsky + JamesPustejovsky Inducing Sense-Discriminating Context Patterns from Sense-Tagged Corpora http://www.lrec-conf.org/proceedings/lrec2006/pdf/706_pdf.pdf Traditionally, context features used in word sense disambiguation are based on collocation statistics and use only minimal syntactic and semantic information. Corpus Pattern Analysis is a technique for producing knowledge-rich context features that capture sense distinctions. It involves (1) identifying sense-carrying context patterns and using the derived context features to discriminate between the unseen instances. Both stages require manual seeding. In this paper, we show how to automate inducing sense-discriminating context features from a sense-tagged corpus. @@ -4051,7 +4051,7 @@ MilenKouylekov - BernardoMagnini + BernardoMagnini Building a Large-Scale Repository of Textual Entailment Rules http://www.lrec-conf.org/proceedings/lrec2006/pdf/707_pdf.pdf Entailment rules are rules where the left hand side (LHS) specifies some knowledge which entails the knowledge expressed n the RHS of the rule, with some degree of confidence. Simple entailment rules can be combined in complex entailment chains, which n turn are at the basis of entailment-based reasoning, which has been recently proposed as a pervasive and application independent approach to Natural Language Understanding. We present the first elease of a large-scale repository of entailment rules at the lexical level, which have been derived from a number of available resources, including WordNet and a word similarity database. Experiments on the PASCAL-RTE dataset show that this resource plays a crucial role in recognizing textual entailment. @@ -4059,18 +4059,18 @@ AlessandroMoschitti - RobertoBasili + RobertoBasili A Tree Kernel approach to Question and Answer Classification in Question Answering Systems http://www.lrec-conf.org/proceedings/lrec2006/pdf/708_pdf.pdf A critical step in Question Answering design is the definition of the models for question focus identification and answer extraction. In case of factoid questions, we can use a question classifier (trained according to a target taxonomy) and a named entity recognizer. Unfortunately, this latter cannot be applied to generate answers related to non-factoid questions. In this paper, we tackle such problem by designing classifiers of non-factoid answers. As the feature design for this learning task is very complex, we take advantage of tree kernels to generate large feature set from the syntactic parse trees of passages relevant to the target question. Such kernels encode syntactic and lexical information in Support Vector Machines which can decide if a sentence focuses on a target taxonomy subject. The experiments with SVMs on the TREC 10 dataset show that our approach is an interesting future research. moschitti-basili-2006-tree - Philippe Boulade Mareüil + Philippe Boulade Mareüil Christophed’Alessandro AlexanderRaake GérardBailly - Marie-NeigeGarcia + Marie-NeigeGarcia MichelMorel A joint intelligibility evaluation of <fixed-case>F</fixed-case>rench text-to-speech synthesis systems: the <fixed-case>E</fixed-case>va<fixed-case>S</fixed-case>y <fixed-case>SUS</fixed-case>/<fixed-case>ACR</fixed-case> campaign http://www.lrec-conf.org/proceedings/lrec2006/pdf/709_pdf.pdf @@ -4078,7 +4078,7 @@ de-mareuil-etal-2006-joint - Winston NAnderson + Winston NAnderson Petronella MKotzé Finite state tokenisation of an orthographical disjunctive agglutinative language: The verbal segment of <fixed-case>N</fixed-case>orthern <fixed-case>S</fixed-case>otho http://www.lrec-conf.org/proceedings/lrec2006/pdf/710_pdf.pdf @@ -4111,17 +4111,17 @@ nitta-etal-2006-building - RoserSaurí + RoserSaurí MarcVerhagen - JamesPustejovsky + JamesPustejovsky <fixed-case>S</fixed-case>link<fixed-case>ET</fixed-case>: A Partial Modal Parser for Events http://www.lrec-conf.org/proceedings/lrec2006/pdf/716_pdf.pdf We present SlinkET, a parser for identifying contexts of event modality in text developed within the TARSQI (Temporal Awareness and Reasoning Systems for Question Interpretation) research framework. SlinkET is grounded on TimeML, a specification language for capturing temporal and event related information in discourse, which provides an adequate foundation to handle event modality. SlinkET builds on top of a robust event recognizer, and provides each relevant event with a value that specifies the degree of certainty about its factuality; e.g., whether it has happened or holds (factive or counter-factive), whether it is being reported or witnessed by somebody else (evidential), or if it is introduced as a possibility (modal). It is based on well-established technology in the field (namely, finite-state techniques), and informed with corpus-induced knowledge that relies on basic information, such as morphological features, POS, and chunking. SlinkET is under continuing development and it currently achieves a performance ratio of 70% F1-measure. sauri-etal-2006-slinket - ChristopherCieri - MarkLiberman + ChristopherCieri + MarkLiberman More Data and Tools for More Languages and Research Areas: A Progress Report on <fixed-case>LDC</fixed-case> Activities http://www.lrec-conf.org/proceedings/lrec2006/pdf/717_pdf.pdf This presentation reports on recent progress the Linguistic Data Consortium has made in addressing the needs of multiple research communities by collecting, annotating and distributing, simplifying access and developing standards and tools. Specifically, it describes new trends in publication, a sample of recent projects and significant improvements to LDC Online that improve access to LDC data especially for those with limited computing support. @@ -4151,7 +4151,7 @@ voghera-cutugno-2006-observatory - KamelSmaïli + KamelSmaïli CarolineLavecchia Jean-PaulHaton Linguistic features modeling based on Partial New Cache @@ -4160,8 +4160,8 @@ smaili-etal-2006-linguistic - StefanSchulz - KornélMarkó + StefanSchulz + KornélMarkó PhilippDaumke UdoHahn SusanneHanser @@ -4189,7 +4189,7 @@ uryupina-2006-coreference - Keith J.Miller + Keith J.Miller MichelleVanni Formal v. Informal: Register-Differentiated <fixed-case>A</fixed-case>rabic <fixed-case>MT</fixed-case> Evaluation in the <fixed-case>PLATO</fixed-case> Paradigm http://www.lrec-conf.org/proceedings/lrec2006/pdf/727_pdf.pdf @@ -4229,7 +4229,7 @@ schiehlen-spranger-2006-mass - Andrew W.Cole + Andrew W.Cole Corpus Development and Publication http://www.lrec-conf.org/proceedings/lrec2006/pdf/738_pdf.pdf This paper will discuss issues relevant to corpus development and publication at the LDC and will illustrate those issues by examining the history of three LDC corpora. This paper will also briefly examine alternative corpus creation and distribution methods and their challenges. The intent of this paper is to increase the available linguistic resources by describing the regulatory and technical environment and thus improving the understanding and interaction between corpus providers and distributors. @@ -4244,7 +4244,7 @@ gibbon-tseng-2006-discourse - LeonardoLesmo + LeonardoLesmo LivioRobaldo From Natural Language to Databases via Ontologies http://www.lrec-conf.org/proceedings/lrec2006/pdf/740_pdf.pdf @@ -4272,13 +4272,13 @@ raake-katz-2006-us - StephanieStrassel - ChristopherCieri - AndrewCole - DeniseDipersio - MarkLiberman + StephanieStrassel + ChristopherCieri + AndrewCole + DeniseDipersio + MarkLiberman XiaoyiMa - MohamedMaamouri + MohamedMaamouri KazuakiMaeda Integrated Linguistic Resources for Language Exploitation Technologies http://www.lrec-conf.org/proceedings/lrec2006/pdf/745_pdf.pdf @@ -4301,8 +4301,8 @@ behrens-milde-2006-eclipse - HarrisPapageorgiou - ElinaDesipri + HarrisPapageorgiou + ElinaDesipri MariaKoutsombogera KanellaPouli ProkopisProkopidis @@ -4321,7 +4321,7 @@ NellaCucurullo - SimonettaMontemagni + SimonettaMontemagni MatildePaoli EugenioPicchi EvaSassolini @@ -4332,7 +4332,7 @@ XiaoyiMa - ChristopherCieri + ChristopherCieri Corpus Support for Machine Translation at <fixed-case>LDC</fixed-case> http://www.lrec-conf.org/proceedings/lrec2006/pdf/754_pdf.pdf This paper describes LDC's efforts in collecting, creating and processing different types of linguistic data, including lexicons, parallel text, multiple translation corpora, and human assessment of translation quality, to support the research and development in Machine Translation. Through a combination of different procedures and core technologies, the LDC was able to create very large, high quality, and cost-efficient corpora, which have contributed significantly to recent advances in Machine Translation. Multiple translation corpora and human assessment together facilitate, validate and improve automatic evaluation metrics, which are vital to the development of MT systems. The Bilingual Internet Text Search (BITS) and Champollion sentence aligner enable the finding and processing of large quantities of parallel text. All specifications and tools used by LDC and described in the paper are or will be available to the general public. @@ -4340,12 +4340,12 @@ AnnBies - StephanieStrassel + StephanieStrassel HaejoongLee KazuakiMaeda SethKulick YangLiu - MaryHarper + MaryHarper MatthewLease Linguistic Resources for Speech Parsing http://www.lrec-conf.org/proceedings/lrec2006/pdf/755_pdf.pdf @@ -4353,7 +4353,7 @@ bies-etal-2006-linguistic - TomaszObrębski + TomaszObrębski MichałStolarski <fixed-case>UAM</fixed-case> Text Tools - a flexible <fixed-case>NLP</fixed-case> architecture http://www.lrec-conf.org/proceedings/lrec2006/pdf/756_pdf.pdf @@ -4390,9 +4390,9 @@ habash-etal-2006-design - GrażynaVetulani + GrażynaVetulani ZygmuntVetulani - TomaszObrębski + TomaszObrębski Syntactic Lexicon of <fixed-case>P</fixed-case>olish Predicative Nouns http://www.lrec-conf.org/proceedings/lrec2006/pdf/760_pdf.pdf In the paper we report realization of SyntLex project aiming at construction of a full lexicon grammar for Polish. The lexicon-grammar based paradigm in computer linguistics is derived from the predicate logic and attributes a central role to the predicative constructions. An important class of syntactic constructions in many languages (French, English, Polish and other Slavonic languages in particular) are those based on verbo-nominal collocations, with the verb playing a support role with respect to the noun considered as carrying the predicative information. In this paper we refer to the former research by one of the authors aiming at full description of verbo-nominal predicative constructions for Polish in the form of an electronic resource for LI applications. We describe procedures to complete and corpus-validate the resource obtained so far. @@ -4406,8 +4406,8 @@ alonge-2006-italian - JoséIria - FabioCiravegna + JoséIria + FabioCiravegna A Methodology and Tool for Representing Language Resources for Information Extraction http://www.lrec-conf.org/proceedings/lrec2006/pdf/765_pdf.pdf In recent years there has been a growing interest in clarifying the process of Information Extraction (IE) from documents, particularly when coupled with Machine Learning. We believe that a fundamental step forward in clarifying the IE process would be to be able to perform comparative evaluations on the use of different representations. However, this is difficult because most of the time the way information is represented is too tightly coupled with the algorithm at an implementation level, making it impossible to vary representation while keeping the algorithm constant. A further motivation behind our work is to reduce the complexity of designing, developing and testing IE systems. The major contribution of this work is in defining a methodology and providing a software infrastructure for representing language resources independently of the algorithm, mainly for Information Extraction but with application in other fields - we are currently evaluating its use for ontology learning and document classification. @@ -4421,7 +4421,7 @@ halpin-2006-automatic - HarryBunt + HarryBunt AmandaSchiffrin Methodological Aspects of Semantic Annotation http://www.lrec-conf.org/proceedings/lrec2006/pdf/769_pdf.pdf @@ -4430,7 +4430,7 @@ WhitneyGegg-Harrison - Donna K.Byron + Donna K.Byron <fixed-case>PYCOT</fixed-case>: An <fixed-case>O</fixed-case>ptimality <fixed-case>T</fixed-case>heory-based Pronoun Resolution Toolkit http://www.lrec-conf.org/proceedings/lrec2006/pdf/770_pdf.pdf In this paper, we present PYCOT, a pronoun resolution toolkit. This toolkit is written in the Python programming language and is intended to be an addition to the open-source NLTK collection of natural language processing tools. We discuss the design of the module as well as studies of its performance on pronoun resolution in English and in Korean. @@ -4439,9 +4439,9 @@ EmmaBarker RyuichiroHigashinaka - FrançoisMairesse - RobertGaizauskas - MarilynWalker + FrançoisMairesse + RobertGaizauskas + MarilynWalker JonathanFoster Simulating Cub Reporter Dialogues: The collection of naturalistic human-human dialogues for information access to text archives http://www.lrec-conf.org/proceedings/lrec2006/pdf/772_pdf.pdf @@ -4451,7 +4451,7 @@ JeongwooKo LaurieHiyakumoto - EricNyberg + EricNyberg Exploiting Multiple Semantic Resources for Answer Selection http://www.lrec-conf.org/proceedings/lrec2006/pdf/774_pdf.pdf This paper describes the utility of semantic resources such as the Web, WordNet and gazetteers in the answer selection process for a question-answering system. In contrast with previous work using individual semantic resources to support answer selection, our work combines multiple resources to boost the confidence scores assigned to correct answers and evaluates different combination strategies based on unweighted sums, weighted linear combinations, and logistic regression. We apply our approach to select answers from candidates produced by three different extraction techniques of varying quality, focusing on TREC questions whose answers represent locations or proper-names. Our experimental results demonstrate that the combination of semantic resources is more effective than individual resources for all three extraction techniques, improving answer selection accuracy by as much as 32.35% for location questions and 72% for proper-name questions. Of the combination strategies tested, logistic regression models produced the best results for both location and proper-name questions. @@ -4459,7 +4459,7 @@ KazuakiMaeda - ChristopherCieri + ChristopherCieri KevinWalker Low-cost Customized Speech Corpus Creation for Speech Technology Applications http://www.lrec-conf.org/proceedings/lrec2006/pdf/776_pdf.pdf @@ -4478,7 +4478,7 @@ ChristophBenzmüller HelmutHoracek HenriLesourd - IvanaKruijff-Korbayova + IvanaKruijff-Korbayova MarvinSchiller MagdalenaWolska A corpus of tutorial dialogs on theorem proving; the influence of the presentation of the study-material @@ -4489,7 +4489,7 @@ JamalLaoudi Calandra R.Tate - Clare R.Voss + Clare R.Voss Task-based <fixed-case>MT</fixed-case> Evaluation: From Who/When/Where Extraction to Event Understanding http://www.lrec-conf.org/proceedings/lrec2006/pdf/779_pdf.pdf Task-based machine translation (MT) evaluation asks, how well do people perform text-handling tasks given MT output? This method of evaluation yields an extrinsic assessment of an MT engine, in terms of users’ task performance on MT output. While this method is time-consuming, its key advantage is that MT users and stakeholders understand how to interpret the assessment results. Prior experiments showed that subjects can extract individual who-, when-, and where-type elements of information from MT output passages that were not especially fluent. This paper presents the results of a pilot study to assess a slightly more complex task: when given such wh-items already identified in an MT output passage, how well can subjects properly select from and place these items into wh-typed slots to complete a sentence-template about the passage’s event? The results of the pilot with nearly sixty subjects, while only preliminary, indicate that this task was extremely challenging: given six test templates to complete, half of the subjects had no completely correct templates and 42% had exactly one completely correct template. The provisional interpretation of this pilot study is that event-based template completion defines a task ceiling, against which to evaluate future improvements on MT engines. @@ -4499,7 +4499,7 @@ KazuakiMaeda HaejoongLee JulieMedero - StephanieStrassel + StephanieStrassel A New Phase in Annotation Tool Development at the <fixed-case>L</fixed-case>inguistic <fixed-case>D</fixed-case>ata <fixed-case>C</fixed-case>onsortium: The Evolution of the Annotation Graph Toolkit http://www.lrec-conf.org/proceedings/lrec2006/pdf/780_pdf.pdf The Linguistic Data Consortium (LDC) has created various annotated linguistic data for a variety of common task evaluation programs and projects to create shared linguistic resources. The majority of these annotated linguistic data were created with highly customized annotation tools developed at LDC. The Annotation Graph Toolkit (AGTK) has been used as a primary infrastructure for annotation tool development at LDC in recent years. Thanks to the direct feedback from annotation task designers and annotators in-house, annotation tool development at LDC has entered a new, more mature and productive phase. This paper describes recent additions to LDC's annotation tools that are newly developed or significantly improved since our last report at the Fourth International Conference on Language Resource and Evaluation Conference in 2004. These tools are either directly based on AGTK or share a common philosophy with other AGTK tools. @@ -4519,7 +4519,7 @@ JeongwooKo FumihikoMurase TerukoMitamura - EricNyberg + EricNyberg MasahikoTateishi IchiroAkahori Analyzing the Effects of Spoken Dialog Systems on Driving Behavior @@ -4540,7 +4540,7 @@ VasileRus - ArtGraesser + ArtGraesser The Look and Feel of a Confident Entailer http://www.lrec-conf.org/proceedings/lrec2006/pdf/788_pdf.pdf The paper presents a software system that embodies a lexico-syntactic approach to the task of Textual Entailment. Although the approach is based on a minimal set of resources it is highly confident. The architecture of the system is open and can be easily expanded with more and deeper processing modules. Results on a standard data set are presented. @@ -4555,24 +4555,24 @@ marton-katz-2006-using - FinleyLacatusu + FinleyLacatusu AndrewHickl - SandaHarabagiu + SandaHarabagiu Impact of Question Decomposition on the Quality of Answer Summaries http://www.lrec-conf.org/proceedings/lrec2006/pdf/792_pdf.pdf Generating answers to complex questions in the form of multi-document summaries requires access to question decomposition methods. In this paper we present three methods for decomposing complex questions and we evaluate their impact on the responsiveness of the answers they enable. lacatusu-etal-2006-impact - SandaHarabagiu - Cosmin AdrianBejan + SandaHarabagiu + Cosmin AdrianBejan An Answer Bank for Temporal Inference http://www.lrec-conf.org/proceedings/lrec2006/pdf/794_pdf.pdf Answering questions that ask about temporal information involves several forms of inference. In order to develop question answering capabilities that benefit from temporal inference, we believe that a large corpus of questions and answers that are discovered based on temporal information should be available. This paper describes our methodology for creating AnswerTime-Bank, a large corpus of questions and answers on which Question Answering systems can operate using complex temporal inference. harabagiu-bejan-2006-answer - Paul C.Morărescu + Paul C.Morărescu Principles for annotating and reasoning with spatial information http://www.lrec-conf.org/proceedings/lrec2006/pdf/795_pdf.pdf In this paper we present the first phase of the ongoing SpaceBank project that attempts to create a linguistic resource for annotating and reasoning with spatial information from text. SpaceBank is the spatial counterpart of TimeBank, an electronic resource for temporal semantics and reasoning. The paper focuses on building an ontology of lexicalized spatial concepts. The textual occurrences of the concepts in this ontology will be annotated using the SpaceML language, briefly described here. SpaceBank is designed to be integrated with TimeBank, for a spatio-temporal model of the textual information. @@ -4580,7 +4580,7 @@ SujianLi - QinLu + QinLu WenjieLi RuifengXu Interaction between Lexical Base and Ontology with Formal Concept Analysis @@ -4598,7 +4598,7 @@ RuifengXu - QinLu + QinLu SujianLi The Design and Construction of A <fixed-case>C</fixed-case>hinese Collocation Bank http://www.lrec-conf.org/proceedings/lrec2006/pdf/799_pdf.pdf @@ -4614,8 +4614,8 @@ NimaanAbdillahi - NoceraPascal - BonastreJean-François + PascalNocera + Jean-FrançoisBonastre Towards automatic transcription of <fixed-case>S</fixed-case>omali language http://www.lrec-conf.org/proceedings/lrec2006/pdf/801_pdf.pdf Most African countries follow an oral tradition system to transmit their cultural, scientific and historic heritage through generations. This ancestral knowledge accumulated during centuries is today threatened of disappearing. This paper presents the first steps in the building of an automatic speech to text transcription for African oral patrimony, particularly the Djibouti cultural heritage. This work is dedicated to process Somali language, which represents half of the targeted Djiboutian audio archives. The main problem is the lack of annotated audio and textual resources for this language. We describe the principal characteristics of audio (10 hours) and textual (3M words) training corpora collected. Using the large vocabulary speech recognizer engine, Speeral, developed at the Laboratoire Informatique d’Avignon (LIA) (computer science laboratory of Avignon), we obtain about 20.9% word error rate (WER). This is an encouraging result, considering the small size of our corpora. This first recognizer of Somali language will serve as a reference and will be used to transcribe some Djibouti cultural archives. We will also discuss future ways of research like sub-words indexing of audio archives, related to the specificities of the Somali language. @@ -4640,8 +4640,8 @@ SabriElkateb - WilliamBlack - HoracioRodríguez + WilliamBlack + HoracioRodríguez MusaAlkhalifa PiekVossen AdamPease @@ -4652,7 +4652,7 @@ elkateb-etal-2006-building - BenoîtSagot + BenoîtSagot PierreBoullier Deep non-probabilistic parsing of large corpora http://www.lrec-conf.org/proceedings/lrec2006/pdf/806_pdf.pdf @@ -4670,11 +4670,11 @@ brekke-etal-2006-automatic - AlexKlassmann + AlexKlassmann FreddyOffenga - DaanBroeder - RomualdSkiba - PeterWittenburg + DaanBroeder + RomualdSkiba + PeterWittenburg Comparison of Resource Discovery Methods http://www.lrec-conf.org/proceedings/lrec2006/pdf/808_pdf.pdf It is an ongoing debate whether categorical systems created by some experts are an appropriate way to help users finding useful resources in the internet. However for the much more restricted domain of language documentation such a category system might still prove reasonable if not indispensable. This article gives an overview over the particular IMDI category set and presents a rough evaluation of its practical use at the Max-Planck-Institute Nijmegen. @@ -4691,9 +4691,9 @@ lucas-etal-2006-information - BenoîtSagot + BenoîtSagot LionelClément - ÉricVillemonte de La Clergerie + ÉricVillemonte de La Clergerie PierreBoullier The Lefff 2 syntactic lexicon for <fixed-case>F</fixed-case>rench: architecture, acquisition, use http://www.lrec-conf.org/proceedings/lrec2006/pdf/810_pdf.pdf @@ -4716,8 +4716,8 @@ geyken-schrader-2006-lexikonet - DjamelMostefa - OlivierHamon + DjamelMostefa + OlivierHamon KhalidChoukri Evaluation of Automatic Speech Recognition and Speech Language Translation within <fixed-case>TC</fixed-case>-<fixed-case>STAR</fixed-case>:Results from the first evaluation campaign http://www.lrec-conf.org/proceedings/lrec2006/pdf/813_pdf.pdf @@ -4725,8 +4725,8 @@ mostefa-etal-2006-evaluation - DjamelMostefa - Marie-NeigeGarcia + DjamelMostefa + Marie-NeigeGarcia KhalidChoukri Evaluation of multimodal components within <fixed-case>CHIL</fixed-case>: The evaluation packages and results http://www.lrec-conf.org/proceedings/lrec2006/pdf/814_pdf.pdf @@ -4741,13 +4741,13 @@ peters-2006-impact - BernardoMagnini + BernardoMagnini DaniloGiampiccolo LiliAunimo - ChristelleAyache + ChristelleAyache PetyaOsenova - AnselmoPeñas - Maartende Rijke + AnselmoPeñas + Maartende Rijke BogdanSacaleanu DianaSantos RichardSutcliffe @@ -4757,10 +4757,10 @@ magnini-etal-2006-multilingual - Marie-NeigeGarcia + Marie-NeigeGarcia Christophed’Alessandro GérardBailly - PhilippeBoula de Mareüil + PhilippeBoula de Mareüil MichelMorel A joint prosody evaluation of <fixed-case>F</fixed-case>rench text-to-speech synthesis systems http://www.lrec-conf.org/proceedings/lrec2006/pdf/817_pdf.pdf diff --git a/data/xml/L08.xml b/data/xml/L08.xml index b14f218aea..53b8764701 100644 --- a/data/xml/L08.xml +++ b/data/xml/L08.xml @@ -3,13 +3,13 @@ Proceedings of the Sixth International Conference on Language Resources and Evaluation (LREC'08) - NicolettaCalzolari - KhalidChoukri - BenteMaegaard - JosephMariani - JanOdijk - SteliosPiperidis - DanielTapias + NicolettaCalzolari + KhalidChoukri + BenteMaegaard + JosephMariani + JanOdijk + SteliosPiperidis + DanielTapias European Language Resources Association (ELRA)
Marrakech, Morocco
May @@ -22,7 +22,7 @@ KathrinEichler HolmerHemsen - GünterNeumann + GünterNeumann Unsupervised Relation Extraction From Web Documents http://www.lrec-conf.org/proceedings/lrec2008/pdf/425_paper.pdf The IDEX system is a prototype of an interactive dynamic Information Extraction (IE) system. A user of the system expresses an information request in the form of a topic description, which is used for an initial search in order to retrieve a relevant set of documents. On basis of this set of documents, unsupervised relation extraction and clustering is done by the system. The results of these operations can then be interactively inspected by the user. In this paper we describe the relation extraction and clustering components of the IDEX system. Preliminary evaluation results of these components are presented and an overview is given of possible enhancements to improve the relation extraction and clustering components. @@ -30,7 +30,7 @@ MuathAlzghool - DianaInkpen + DianaInkpen Combining Multiple Models for Speech Information Retrieval http://www.lrec-conf.org/proceedings/lrec2008/pdf/45_paper.pdf In this article we present a method for combining different information retrieval models in order to increase the retrieval performance in a Speech Information Retrieval task. The formulas for combining the models are tuned on training data. Then the system is evaluated on test data. The task is particularly difficult because the text collection is automatically transcribed spontaneous speech, with many recognition errors. Also, the topics are real information needs, difficult to satisfy. Information Retrieval systems are not able to obtain good results on this data set, except for the case when manual summaries are included. @@ -46,9 +46,9 @@ CvetanaKrstev - RankaStanković + RankaStanković DuškoVitas - IvanObradović + IvanObradović The Usage of Various Lexical Resources and Tools to Improve the Performance of Web Search Engines http://www.lrec-conf.org/proceedings/lrec2008/pdf/67_paper.pdf In this paper we present how resources and tools developed within the Human Language Technology Group at the University of Belgrade can be used for tuning queries before submitting them to a web search engine. We argue that the selection of words chosen for a query, which are of paramount importance for the quality of results obtained by the query, can be substantially improved by using various lexical resources, such as morphological dictionaries and wordnets. These dictionaries enable semantic and morphological expansion of the query, the latter being very important in highly inflective languages, such as Serbian. Wordnets can also be used for adding another language to a query, if appropriate, thus making the query bilingual. Problems encountered in retrieving documents of interest are discussed and illustrated by examples. A brief description of resources is given, followed by an outline of the web tool which enables their integration. Finally, a set of examples is chosen in order to illustrate the use of the lexical resources and tool in question. Results obtained for these examples show that the number of documents obtained through a query by using our approach can double and even quadruple in some cases. @@ -57,13 +57,13 @@ StevenBird RobertDale - BonnieDorr + BonnieDorr BryanGibson MarkJoseph Min-YenKan DongwonLee BrettPowley - DragomirRadev + DragomirRadev Yee FanTan The <fixed-case>ACL</fixed-case> <fixed-case>A</fixed-case>nthology Reference Corpus: A Reference Dataset for Bibliographic Research in Computational Linguistics http://www.lrec-conf.org/proceedings/lrec2008/pdf/445_paper.pdf @@ -72,15 +72,15 @@ MarianReed - DeniseDiPersio - ChristopherCieri + DeniseDiPersio + ChristopherCieri The <fixed-case>L</fixed-case>inguistic <fixed-case>D</fixed-case>ata <fixed-case>C</fixed-case>onsortium Member Survey: Purpose, Execution and Results http://www.lrec-conf.org/proceedings/lrec2008/pdf/755_paper.pdf The Linguistic Data Consortium (LDC) seeks to provide its members with quality linguistic resources and services. In order to pursue these ideals and to remain current, LDC monitors the needs and sentiments of its communities. One mechanism LDC uses to generate feedback on consortium and resource issues is the LDC Member Survey. The survey allows LDC Members and nonmembers to provide LDC with valuable insight into their own unique circumstances, their current and future data needs and their views on LDC’s role in meeting them. When the 2006 Survey was found to be a useful tool for communicating with the Consortium membership, a 2007 Survey was organized and administered. As a result of the surveys, LDC has confirmed that it has made a positive impact on the community and has identified ways to improve the quality of service and the diversity of monthly offerings. Many respondents recommended ways to improve LDC’s functions, ordering mechanism and webpage. Some of these comments have inspired changes to LDC’s operation and strategy. reed-etal-2008-linguistic - DieterVan Uytvanck + DieterVan Uytvanck AlexDukers JacquelijnRingersma PaulTrilsbeek @@ -90,9 +90,9 @@ van-uytvanck-etal-2008-language - TamásVáradi - StevenKrauwer - PeterWittenburg + TamásVáradi + StevenKrauwer + PeterWittenburg MartinWynne KimmoKoskenniemi <fixed-case>CLARIN</fixed-case>: Common Language Resources and Technology Infrastructure @@ -103,16 +103,16 @@ JeroenGeertzen VolhaPetukhova - HarryBunt + HarryBunt Evaluating Dialogue Act Tagging with Naive and Expert Annotators http://www.lrec-conf.org/proceedings/lrec2008/pdf/279_paper.pdf In this paper the dialogue act annotation of naive and expert annotators, both annotating the same data, are compared in order to characterise the insights annotations made by different kind of annotators may provide for evaluating dialogue act tagsets. It is argued that the agreement among naive annotators provides insight in the clarity of the tagset, whereas agreement among expert annotators provides an indication of how reliably the tagset can be applied when errors are ruled out that are due to deficiencies in understanding the concepts of the tagset, to a lack of experience in using the annotation tool, or to little experience in annotation more generally. An indication of the differences between the two groups in terms of inter-annotator agreement and tagging accuracy on task-oriented dialogue in different domains, annotated with the DIT++ dialogue act tagset is presented, and the annotations of both groups are assessed against a gold standard. Additionally, the effect of the reduction of the tagset’s granularity on the performances of both groups is looked into. In general, it is concluded that the annotations of both groups provide complementary insights in reliability, clarity, and more fundamental conceptual issues. geertzen-etal-2008-evaluating - Drahomíra „johanka“Spoustová + Drahomíra „johanka“Spoustová PavelPecina - JanHajič + JanHajič MiroslavSpousta Validating the Quality of Full Morphological Annotation http://www.lrec-conf.org/proceedings/lrec2008/pdf/290_paper.pdf @@ -122,7 +122,7 @@ KremenaIvanova UlrichHeid - SabineSchulte im Walde + SabineSchulte im Walde AdamKilgarriff JanPomikálek Evaluating a <fixed-case>G</fixed-case>erman Sketch Grammar: A Case Study on Noun Phrase Case @@ -132,7 +132,7 @@ MarkMcConville - Myroslava O.Dzikovska + Myroslava O.Dzikovska Evaluating Complement-Modifier Distinctions in a Semantically Annotated Corpus http://www.lrec-conf.org/proceedings/lrec2008/pdf/691_paper.pdf We evaluate the extent to which the distinction between semantically core and non-core dependents as used in the FrameNet corpus corresponds to the traditional distinction between syntactic complements and modifiers of a verb, for the purposes of harvesting a wide-coverage verb lexicon from FrameNet for use in deep linguistic processing applications. We use the VerbNet verb database as our gold standard for making judgements about complement-hood, in conjunction with our own intuitions in cases where VerbNet is incomplete. We conclude that there is enough agreement between the two notions (0.85) to make practical the simple expedient of equating core PP dependents in FrameNet with PP complements in our lexicon. Doing so means that we lose around 13% of PP complements, whilst around 9% of the PP dependents left in the lexicon are not complements. @@ -140,7 +140,7 @@ Petra-MariaStrauß - HolgerHoffmann + HolgerHoffmann WolfgangMinker HeikoNeumann GüntherPalm @@ -153,12 +153,12 @@ strauss-etal-2008-pit - MartineAdda-Decker - ClaudeBarras - GillesAdda - PatrickParoubek - Philippe Boulade Mareüil - BenoitHabert + MartineAdda-Decker + ClaudeBarras + GillesAdda + PatrickParoubek + Philippe Boulade Mareüil + BenoitHabert Annotation and analysis of overlapping speech in political interviews http://www.lrec-conf.org/proceedings/lrec2008/pdf/788_paper.pdf Looking for a better understanding of spontaneous speech-related phenomena and to improve automatic speech recognition (ASR), we present here a study on the relationship between the occurrence of overlapping speech segments and disfluencies (filled pauses, repetitions, revisions) in political interviews. First we present our data, and our overlap annotation scheme. We detail our choice of overlapping tags and our definition of disfluencies; the observed ratios of the different overlapping tags are examined, as well as their correlation with of the speaker role and propose two measures to characterise speakers’ interacting attitude: the attack/resist ratio and the attack density. We then study the relationship between the overlapping speech segments and the disfluencies in our corpus, before concluding on the perspectives that our experiments offer. @@ -166,7 +166,7 @@ NicolasMoreau - DjamelMostefa + DjamelMostefa RainerStiefelhagen SusanneBurger KhalidChoukri @@ -186,11 +186,11 @@ InderjeetMani - JanetHitzeman + JanetHitzeman JustinRicher DaveHarris RobQuimby - BenWellner + BenWellner <fixed-case>S</fixed-case>patial<fixed-case>ML</fixed-case>: Annotation Scheme, Corpora, and Tools http://www.lrec-conf.org/proceedings/lrec2008/pdf/106_paper.pdf SpatialML is an annotation scheme for marking up references to places in natural language. It covers both named and nominal references to places, grounding them where possible with geo-coordinates, including both relative and absolute locations, and characterizes relationships among places in terms of a region calculus. A freely available annotation editor has been developed for SpatialML, along with a corpus of annotated documents released by the Linguistic Data Consortium. Inter-annotator agreement on SpatialML is 77.0 F-measure for extents on that corpus. An automatic tagger for SpatialML extents scores 78.5 F-measure. A disambiguator scores 93.0 F-measure and 93.4 Predictive Accuracy. In adapting the extent tagger to new domains, merging the training data from the above corpus with annotated data in the new domain provides the best performance. @@ -198,9 +198,9 @@ StevenBethard - WilliamCorvey + WilliamCorvey SaraKlingenstein - James H.Martin + James H.Martin Building a Corpus of Temporal-Causal Structure http://www.lrec-conf.org/proceedings/lrec2008/pdf/229_paper.pdf While recent corpus annotation efforts cover a wide variety of semantic structures, work on temporal and causal relations is still in its early stages. Annotation efforts have typically considered either temporal relations or causal relations, but not both, and no corpora currently exist that allow the relation between temporals and causals to be examined empirically. We have annotated a corpus of 1000 event pairs for both temporal and causal relations, focusing on a relatively frequent construction in which the events are conjoined by the word “and”. Temporal relations were annotated using an extension of the BEFORE and AFTER scheme used in the TempEval competition, and causal relations were annotated using a scheme based on connective phrases like “and as a result”. The annotators achieved 81.2% agreement on temporal relations and 77.8% agreement on causal relations. Analysis of the resulting corpus revealed some interesting findings, for example, that over 30% of CAUSAL relations do not have an underlying BEFORE relation. The corpus was also explored using machine learning methods, and while model performance exceeded all baselines, the results suggested that simple grammatical cues may be insufficient for identifying the more difficult temporal and causal relations. @@ -215,7 +215,7 @@ zarcone-lenci-2008-computational - CorinaForăscu + CorinaForăscu <fixed-case>GMT</fixed-case> to +2 or how can <fixed-case>T</fixed-case>ime<fixed-case>ML</fixed-case> be used in <fixed-case>R</fixed-case>omanian http://www.lrec-conf.org/proceedings/lrec2008/pdf/817_paper.pdf The paper describes the construction and usage of the Romanian version of the TimeBank corpus. The success rate of 96.53% for the automatic import of the temporal annotation from English to Romanian shows that the automatic transfer is a worth doing enterprise if temporality is to be studied in another language than the one for which TimeML, the annotation standard used, was developed. A preliminary study identifies the main situations that occurred during the automatic transfer, as well as temporal elements not (yet) marked in the English corpus. @@ -224,15 +224,15 @@ NianwenXue HuaZhong - Kai-YunChen + Kai-YunChen Annotating “tense” in a Tense-less Language http://www.lrec-conf.org/proceedings/lrec2008/pdf/877_paper.pdf In the context of Natural Language Processing, annotation is about recovering implicit information that is useful for natural language applications. In this paper we describe a “tense” annotation task for Chinese - a language that does not have grammatical tense - that is designed to infer the temporal location of a situation in relation to the temporal deixis, the moment of speech. If successful, this would be a highly rewarding endeavor as it has application in many natural language systems. Our preliminary experiments show that while this is a very challenging annotation task for which high annotation consistency is very difficult but not impossible to achieve. We show that guidelines that provide a conceptually intuitive framework will be crucial to the success of this annotation effort. xue-etal-2008-annotating - BarbaraPlank - KhalilSima’an + BarbaraPlank + KhalilSima’an Subdomain Sensitive Statistical Parsing using Raw Corpora http://www.lrec-conf.org/proceedings/lrec2008/pdf/120_paper.pdf Modern statistical parsers are trained on large annotated corpora (treebanks). These treebanks usually consist of sentences addressing different subdomains (e.g. sports, politics, music), which implies that the statistics gathered by current statistical parsers are mixtures of subdomains of language use. In this paper we present a method that exploits raw subdomain corpora gathered from the web to introduce subdomain sensitivity into a given parser. We employ statistical techniques for creating an ensemble of domain sensitive parsers, and explore methods for amalgamating their predictions. Our experiments show that introducing domain sensitivity by exploiting raw corpora can improve over a tough, state-of-the-art baseline. @@ -254,8 +254,8 @@ StephanOepen UlrichCallmeier BertholdCrysmann - DanFlickinger - BerndKiefer + DanFlickinger + BerndKiefer Some Fine Points of Hybrid Natural Language Parsing http://www.lrec-conf.org/proceedings/lrec2008/pdf/349_paper.pdf Large-scale grammar-based parsing systems nowadays increasingly rely on independently developed, more specialized components for pre-processing their input. However, different tools make conflicting assumptions about very basic properties such as tokenization. To make linguistic annotation gathered in pre-processing available to “deep” parsing, a hybrid NLP system needs to establish a coherent mapping between the two universes. Our basic assumption is that tokens are best described by attribute value matrices (AVMs) that may be arbitrarily complex. We propose a powerful resource-sensitive rewrite formalism, “chart mapping”, that allows us to mediate between the token descriptions delivered by shallow pre-processing components and the input expected by the grammar. We furthermore propose a novel way of unknown word treatment where all generic lexical entries are instantiated that are licensed by a particular token AVM. Again, chart mapping is used to give the grammar writer full control as to which items (e.g. native vs. generic lexical items) enter syntactic parsing. We discuss several further uses of the original idea and report on early experiences with the new machinery. @@ -265,7 +265,7 @@ JeremyNicholson ValiaKordoni YiZhang - TimothyBaldwin + TimothyBaldwin RebeccaDridan Evaluating and Extending the Coverage of <fixed-case>HPSG</fixed-case> Grammars: A Case Study for <fixed-case>G</fixed-case>erman http://www.lrec-conf.org/proceedings/lrec2008/pdf/794_paper.pdf @@ -281,8 +281,8 @@ zhang-kordoni-2008-robust - JahnaOtterbacher - DragomirRadev + JahnaOtterbacher + DragomirRadev Modeling Document Dynamics: an Evolutionary Approach http://www.lrec-conf.org/proceedings/lrec2008/pdf/115_paper.pdf News articles about the same event published over time have properties that challenge NLP and IR applications. A cluster of such texts typically exhibits instances of paraphrase and contradiction, as sources update the facts surrounding the story, often due to an ongoing investigation. The current hypothesis is that the stories “evolve” over time, beginning with the first text published on a given topic. This is tested using a phylogenetic approach as well as one based on language modeling. The fit of the evolutionary models is evaluated with respect to how well they facilitate the recovery of chronological relationships between the documents. Over all data clusters, the language modeling approach consistently outperforms the phylogenetics model. However, on manually collected clusters in which the documents are published within short time spans of one another, both have a similar performance, and produce statistically significant results on the document chronology recovery evaluation. @@ -290,7 +290,7 @@ DominicWiddows - KathleenFerraro + KathleenFerraro Semantic Vectors: a Scalable Open Source Package and Online Technology Management Application http://www.lrec-conf.org/proceedings/lrec2008/pdf/300_paper.pdf This paper describes the open source SemanticVectors package that efficiently creates semantic vectors for words and documents from a corpus of free text articles. We believe that this package can play an important role in furthering research in distributional semantics, and (perhaps more importantly) can help to significantly reduce the current gap that exists between good research results and valuable applications in production software. Two clear principles that have guided the creation of the package so far include ease-of-use and scalability. The basic package installs and runs easily on any Java-enabled platform, and depends only on Apache Lucene. Dimension reduction is performed using Random Projection, which enables the system to scale much more effectively than other algorithms used for the same purpose. This paper also describes a trial application in the Technology Management domain, which highlights some user-centred design challenges which we believe are also key to successful deployment of this technology. @@ -306,7 +306,7 @@ KimLuyckx - WalterDaelemans + WalterDaelemans <fixed-case>P</fixed-case>ersonae: a Corpus for Author and Personality Prediction from Text http://www.lrec-conf.org/proceedings/lrec2008/pdf/759_paper.pdf We present a new corpus for computational stylometry, more specifically authorship attribution and the prediction of author personality from text. Because of the large number of authors (145), the corpus will allow previously impossible studies of variation in features considered predictive for writing style. The innovative meta-information (personality profiles of the authors) associated with these texts allows the study of personality prediction, a not yet very well researched aspect of style. In this paper, we describe the contents of the corpus and show its use in both authorship attribution and personality prediction. We focus on features that have been proven useful in the field of author recognition. Syntactic features like part-of-speech n-grams are generally accepted as not being under the author’s conscious control and therefore providing good clues for predicting gender or authorship. We want to test whether these features are helpful for personality prediction and authorship attribution on a large set of authors. Both tasks are approached as text categorization tasks. First a document representation is constructed based on feature selection from the linguistically analyzed corpus (using the Memory-Based Shallow Parser (MBSP)). These are associated with each of the 145 authors or each of the four components of the Myers-Briggs Type Indicator (Introverted-Extraverted, Sensing-iNtuitive, Thinking-Feeling, Judging-Perceiving). Authorship attribution on 145 authors achieves results around 50%-accuracy. Preliminary results indicate that the first two personality dimensions can be predicted fairly accurately. @@ -314,7 +314,7 @@ LeanneSpracklin - DianaInkpen + DianaInkpen AmiyaNayak Using the Complexity of the Distribution of Lexical Elements as a Feature in Authorship Attribution http://www.lrec-conf.org/proceedings/lrec2008/pdf/892_paper.pdf @@ -338,9 +338,9 @@ LauraStoia - Darla MagdaleneShockley - Donna K.Byron - EricFosler-Lussier + Darla MagdaleneShockley + Donna K.Byron + EricFosler-Lussier <fixed-case>SCARE</fixed-case>: a Situated Corpus with Annotated Referring Expressions http://www.lrec-conf.org/proceedings/lrec2008/pdf/164_paper.pdf Even though a wealth of speech data is available for the dialog systems research community, the particular field of situated language has yet to find an appropriate free resource. The corpus required to answer research questions related to situated language should connect world information to the human language. In this paper we report on the release of a corpus of English spontaneous instruction giving situated dialogs. The corpus was collected using the Quake environment, a first-person virtual reality game, and consists of pairs of participants completing a direction giver- direction follower scenario. The corpus contains the collected audio and video, as well as word-aligned transcriptions and the positional/gaze information of the player. Referring expressions in the corpus are annotated with the IDs of their virtual world referents. @@ -348,14 +348,14 @@ HanSloetjes - PeterWittenburg + PeterWittenburg Annotation by Category: <fixed-case>ELAN</fixed-case> and <fixed-case>ISO</fixed-case> <fixed-case>DCR</fixed-case> http://www.lrec-conf.org/proceedings/lrec2008/pdf/208_paper.pdf The Data Category Registry is one of the ISO initiatives towards the establishment of standards for Language Resource management, creation and coding. Successful application of the DCR depends on the availability of tools that can interact with it. This paper describes the first steps that have been taken to provide users of the multimedia annotation tool ELAN, with the means to create references from tiers and annotations to data categories defined in the ISO Data Category Registry. It first gives a brief description of the capabilities of ELAN and the structure of the documents it creates. After a concise overview of the goals and current state of the ISO DCR infrastructure, a description is given of how the preliminary connectivity with the DCR is implemented in ELAN. sloetjes-wittenburg-2008-annotation - HennieBrugman + HennieBrugman VéroniqueMalaisé LauraHollink A Common Multimedia Annotation Framework for Cross Linking Cultural Heritage Digital Collections @@ -366,7 +366,7 @@ PhilippeBlache RoxaneBertrand - GaëlleFerré + GaëlleFerré Creating and Exploiting Multimodal Annotated Corpora http://www.lrec-conf.org/proceedings/lrec2008/pdf/449_paper.pdf The paper presents a project of the Laboratoire Parole & Langage which aims at collecting, annotating and exploiting a corpus of spoken French in a multimodal perspective. The project directly meets the present needs in linguistics where a growing number of researchers become aware of the fact that a theory of communication which aims at describing real interactions should take into account the complexity of these interactions. However, in order to take into account such a complexity, linguists should have access to spoken corpora annotated in different fields. The paper presents the annotation schemes used in phonetics, morphology and syntax, prosody, gestuality at the LPL together with the type of linguistic description made from the annotations seen in two examples. @@ -374,7 +374,7 @@ AnnieZaenen - DanielBobrow + DanielBobrow CleoCondoravdi The Encoding of lexical implications in <fixed-case>V</fixed-case>erb<fixed-case>N</fixed-case>et Predicates of change of locations http://www.lrec-conf.org/proceedings/lrec2008/pdf/101_paper.pdf @@ -390,8 +390,8 @@ burchardt-pennacchiotti-2008-fate - StephenBoxwell - MichaelWhite + StephenBoxwell + MichaelWhite Projecting <fixed-case>P</fixed-case>ropbank Roles onto the <fixed-case>CCG</fixed-case>bank http://www.lrec-conf.org/proceedings/lrec2008/pdf/789_paper.pdf This paper describes a method of accurately projecting Propbank roles onto constituents in the CCGbank and automatically annotating verbal categories with the semantic roles of their arguments. This method will be used to improve the structure of the derivations in the CCGbank and to facilitate research on semantic role tagging and broad coverage generation with CCG. @@ -400,8 +400,8 @@ PiekVossen IsaMaks - RoxaneSegers - HennieVanderVliet + RoxaneSegers + HennieVanderVliet Integrating Lexical Units, Synsets and Ontology in the Cornetto Database http://www.lrec-conf.org/proceedings/lrec2008/pdf/255_paper.pdf Cornetto is a two-year Stevin project (project number STE05039) in which a lexical semantic database is built that combines Wordnet with Framenet-like information for Dutch. The combination of the two lexical resources (the Dutch Wordnet and the Referentie Bestand Nederlands) will result in a much richer relational database that may improve natural language processing (NLP) technologies, such as word sense-disambiguation, and language-generation systems. In addition to merging the Dutch lexicons, the database is also mapped to a formal ontology to provide a more solid semantic backbone. Since the database represents different traditions and perspectives of semantic organization, a key issue in the project is the alignment of concepts across the resources. This paper discusses our methodology to first automatically align the word meanings and secondly to manually revise the most critical cases. @@ -409,12 +409,12 @@ JavierÁlvez - JordiAtserias + JordiAtserias JordiCarrera SalvadorCliment EgoitzLaparra AntoniOliver - GermanRigau + GermanRigau Complete and Consistent Annotation of <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et using the Top Concept Ontology http://www.lrec-conf.org/proceedings/lrec2008/pdf/390_paper.pdf This paper presents the complete and consistent ontological annotation of the nominal part of WordNet. The annotation has been carried out using the semantic features defined in the EuroWordNet Top Concept Ontology and made available to the NLP community. Up to now only an initial core set of 1,024 synsets, the so-called Base Concepts, was ontologized in such a way. The work has been achieved by following a methodology based on an iterative and incremental expansion of the initial labeling through the hierarchy while setting inheritance blockage points. Since this labeling has been set on the EuroWordNet’s Interlingual Index (ILI), it can be also used to populate any other wordnet linked to it through a simple porting process. This feature-annotated WordNet is intended to be useful for a large number of semantic NLP tasks and for testing for the first time componential analysis on real environments. Moreover, the quantitative analysis of the work shows that more than 40% of the nominal part of WordNet is involved in structure errors or inadequacies. @@ -430,7 +430,7 @@ GwénoléLecorvé - GuillaumeGravier + GuillaumeGravier PascaleSébillot On the Use of Web Resources and Natural Language Processing Techniques to Improve Automatic Speech Recognition Systems http://www.lrec-conf.org/proceedings/lrec2008/pdf/155_paper.pdf @@ -439,8 +439,8 @@ StanislasOger - GeorgesLinarès - FrédéricBéchet + GeorgesLinarès + FrédéricBéchet Local Methods for On-Demand Out-of-Vocabulary Word Retrieval http://www.lrec-conf.org/proceedings/lrec2008/pdf/193_paper.pdf Most of the Web-based methods for lexicon augmenting consist in capturing global semantic features of the targeted domain in order to collect relevant documents from the Web. We suggest that the local context of the out-of-vocabulary (OOV) words contains relevant information on the OOV words. With this information, we propose to use the Web to build locally-augmented lexicons which are used in a final local decoding pass. First, an automatic web based OOV word detection method is proposed. Then, we demonstrate the relevance of the Web for the OOV word retrieval. Different methods are proposed to retrieve the hypothesis words. We finally retrieve about 26% of the OOV words with a lexicon increase of less than 1000 words using the reference context. @@ -448,11 +448,11 @@ MarcKemps-Snijders - AlexKlassmann + AlexKlassmann ClausZinn PeterBerck - AlbertRussel - PeterWittenburg + AlbertRussel + PeterWittenburg Exploring and Enriching a Language Resource Archive via the Web http://www.lrec-conf.org/proceedings/lrec2008/pdf/205_paper.pdf The “download first, then process paradigm” is still the predominant working method amongst the research community. The web-based paradigm, however, offers many advantages from a tool development and data management perspective as they allow a quick adaptation to changing research environments. Moreover, new ways of combining tools and data are increasingly becoming available and will eventually enable a true web-based workflow approach, thus challenging the “download first, then process” paradigm. The necessary infrastructure for managing, exploring and enriching language resources via the Web will need to be delivered by projects like CLARIN and DARIAH. @@ -467,7 +467,7 @@ schiel-mogele-2008-talking - ErhardHinrichs + ErhardHinrichs MonicaLău In Contrast - A Complex Discourse Connective http://www.lrec-conf.org/proceedings/lrec2008/pdf/75_paper.pdf @@ -499,18 +499,18 @@ LucieMladová ŠárkaZikánová - EvaHajičová + EvaHajičová From Sentence to Discourse: Building an Annotation Scheme for Discourse Based on <fixed-case>P</fixed-case>rague Dependency Treebank http://www.lrec-conf.org/proceedings/lrec2008/pdf/638_paper.pdf The present paper reports on a preparatory research for building a language corpus annotation scenario capturing the discourse relations in Czech. We primarily focus on the description of the syntactically motivated relations in discourse, basing our findings on the theoretical background of the Prague Dependency Treebank 2.0 and the Penn Discourse Treebank 2. Our aim is to revisit the present-day syntactico-semantic (tectogrammatical) annotation in the Prague Dependency Treebank, extend it for the purposes of a sentence-boundary-crossing representation and eventually to design a new, discourse level of annotation. In this paper, we propose a feasible process of such a transfer, comparing the possibilities the Praguian dependency-based approach offers with the Penn discourse annotation based primarily on the analysis and classification of discourse connectives. mladova-etal-2008-sentence - DavidDay - JanetHitzeman + DavidDay + JanetHitzeman MichaelWick KeithCrouch - MassimoPoesio + MassimoPoesio A Corpus for Cross-Document Co-reference http://www.lrec-conf.org/proceedings/lrec2008/pdf/762_paper.pdf This paper describes a newly created text corpus of news articles that has been annotated for cross-document co-reference. Being able to robustly resolve references to entities across document boundaries will provide a useful capability for a variety of tasks, ranging from practical information retrieval applications to challenging research in information extraction and natural language understanding. This annotated corpus is intended to encourage the development of systems that can more accurately address this problem. A manual annotation tool was developed that allowed the complete corpus to be searched for likely co-referring entity mentions. This corpus of 257K words links mentions of co-referent people, locations and organizations (subject to some additional constraints). Each of the documents had already been annotated for within-document co-reference by the LDC as part of the ACE series of evaluations. The annotation process was bootstrapped with a string-matching-based linking procedure, and we report on some of initial experimentation with the data. The cross-document linking information will be made publicly available. @@ -518,7 +518,7 @@ AntonioToral - RafaelMuñoz + RafaelMuñoz MonicaMonachini Named Entity <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et http://www.lrec-conf.org/proceedings/lrec2008/pdf/188_paper.pdf @@ -527,7 +527,7 @@ CristinaMota - RalphGrishman + RalphGrishman Is this <fixed-case>NE</fixed-case> tagger getting old? http://www.lrec-conf.org/proceedings/lrec2008/pdf/303_paper.pdf This paper focuses on the influence of changing the text time frame on the performance of a named entity tagger. We followed a twofold approach to investigate this subject: on the one hand, we analyzed a corpus that spans 8 years, and, on the other hand, we assessed the performance of a name tagger trained and tested on that corpus. We created 8 samples from the corpus, each drawn from the articles for a particular year. In terms of corpus analysis, we calculated the corpus similarity and names shared between samples. To see the effect on tagger performance, we implemented a semi-supervised name tagger based on co-training; then, we trained and tested our tagger on those samples. We observed that corpus similarity, names shared between samples, and tagger performance all decay as the time gap between the samples increases. Furthermore, we observed that the corpus similarity and names shared correlate with the tagger F-measure. These results show that named entity recognition systems may become obsolete in a short period of time. @@ -535,9 +535,9 @@ BenjaminFarber - DayneFreitag + DayneFreitag NizarHabash - OwenRambow + OwenRambow Improving <fixed-case>NER</fixed-case> in <fixed-case>A</fixed-case>rabic Using a Morphological Tagger http://www.lrec-conf.org/proceedings/lrec2008/pdf/625_paper.pdf We discuss a named entity recognition system for Arabic, and show how we incorporated the information provided by MADA, a full morphological tagger which uses a morphological analyzer. Surprisingly, the relevant features used are the capitalization of the English gloss chosen by the tagger, and the fact that an analysis is returned (that a word is not OOV to the morphological analyzer). The use of the tagger also improves over a third system which just uses a morphological analyzer, yielding a 14\% reduction in error over the baseline. We conduct a thorough error analysis to identify sources of success and failure among the variations, and show that by combining the systems in simple ways we can significantly influence the precision-recall trade-off. @@ -552,7 +552,7 @@ busemann-zhang-2008-identifying - MariusPaşca + MariusPaşca Low-Complexity Heuristics for Deriving Fine-Grained Classes of Named Entities from Web Textual Data http://www.lrec-conf.org/proceedings/lrec2008/pdf/886_paper.pdf We introduce a low-complexity method for acquiring fine-grained classes of named entities from the Web. The method exploits the large amounts of textual data available on the Web, while avoiding the use of any expensive text processing techniques or tools. The quality of the extracted classes is encouraging with respect to both the precision of the sets of named entities acquired within various classes, and the labels assigned to the sets of named entities. @@ -560,7 +560,7 @@ Jin-JiLi - Dong-IlKim + Dong-IlKim Jong-HyeokLee Annotation Guidelines for <fixed-case>C</fixed-case>hinese-<fixed-case>K</fixed-case>orean Word Alignment http://www.lrec-conf.org/proceedings/lrec2008/pdf/137_paper.pdf @@ -568,9 +568,9 @@ li-etal-2008-annotation - OndřejBojar + OndřejBojar MiroslavJaníček - ZdeněkŽabokrtský + ZdeněkŽabokrtský PavelČeška PeterBeňa <fixed-case>C</fixed-case>z<fixed-case>E</fixed-case>ng 0.7: Parallel Corpus with Community-Supplied Translations @@ -579,9 +579,9 @@ bojar-etal-2008-czeng - JonathanClark - RobertFrederking - LoriLevin + JonathanClark + RobertFrederking + LoriLevin Toward Active Learning in Data Selection: Automatic Discovery of Language Features During Elicitation http://www.lrec-conf.org/proceedings/lrec2008/pdf/308_paper.pdf Data Selection has emerged as a common issue in language technologies. We define Data Selection as the choosing of a subset of training data that is most effective for a given task. This paper describes deductive feature detection, one component of a data selection system for machine translation. Feature detection determines whether features such as tense, number, and person are expressed in a language. The database of the World Atlas of Language Structures provides a gold standard against which to evaluate feature detection. The discovered features can be used as input to a Navigator, which uses active learning to determine which piece of language data is the most important to acquire next. @@ -589,7 +589,7 @@ MichaelMohler - RadaMihalcea + RadaMihalcea Babylon Parallel Text Builder: Gathering Parallel Texts for Low-Density Languages http://www.lrec-conf.org/proceedings/lrec2008/pdf/313_paper.pdf This paper describes Babylon, a system that attempts to overcome the shortage of parallel texts in low-density languages by supplementing existing parallel texts with texts gathered automatically from the Web. In addition to the identification of entire Web pages, we also propose a new feature specifically designed to find parallel text chunks within a single document. Experiments carried out on the Quechua-Spanish language pair show that the system is successful in automatically identifying a significant amount of parallel texts on the Web. Evaluations of a machine translation system trained on this corpus indicate that the Web-gathered parallel texts can supplement manually compiled parallel texts and perform significantly better than the manually compiled texts when tested on other Web-gathered data. @@ -597,8 +597,8 @@ Cong-PhapHuynh - ChristianBoitet - HervéBlanchon + ChristianBoitet + HervéBlanchon <fixed-case>SECT</fixed-case>ra_w.1: an Online Collaborative System for Evaluating, Post-editing and Presenting <fixed-case>MT</fixed-case> Translation Corpora http://www.lrec-conf.org/proceedings/lrec2008/pdf/639_paper.pdf SECTra_w is a web-oriented system mainly dedicated to the evaluation of MT systems. After importing a source corpus, and possibly reference translations, one can call various MT systems, store their results, and have a collection of human judges perform subjective evaluation online (fluidity, adequacy). It is also possible to perform objective, task-oriented evaluation by letting humans post-edit the MT results, using a web translation editor, and measuring an edit distance and/or the post-editing time. The post-edited results can be added to the set of reference translations, or constitute it if there were no references. SECTra_w makes it possible to show not only tables of figures as results of an evaluation campaign, but also the real data (source, MT outputs, references, post-edited outputs), and to make the post-edition effort sensible by transforming the trace of the edit distance computation in an intuitive presentation, much like a “revision” presentation in Word. The system is written in java under Xwiki and uses the Ajax technique. It can handle large, multilingual and multimedia corpora: EuroParl, BTEC, ERIM (bilingual interpreted dialogues with audio and text), Unesco-B@bel, and a test corpus by France Telecom have been loaded together and used in tests. @@ -607,14 +607,14 @@ MarkArehart ChrisWolf - Keith J.Miller + Keith J.Miller Adjudicator Agreement and System Rankings for Person Name Search http://www.lrec-conf.org/proceedings/lrec2008/pdf/647_paper.pdf We have analyzed system rankings for person name search algorithms using a data set for which several versions of ground truth were developed by employing different means of resolving adjudicator conflicts. Thirteen algorithms were ranked by F-score, using bootstrap resampling for significance testing, on a dataset containing 70,000 romanized names from various cultures. We found some disagreement among the four adjudicators, with kappa ranging from 0.57 to 0.78. Truth sets based on a single adjudicator, and on the intersection or union of positive adjudications produced sizeable variability in scoring sensitivity - and to a lesser degree rank order - compared to the consensus truth set. However, results on truth sets constructed by randomly choosing an adjudicator for each item were highly consistent with the consensus. The implication is that an evaluation where one adjudicator has judged each item is nearly as good as a more expensive and labor-intensive one where multiple adjudicators have judged each item and conflicts are resolved through voting. arehart-etal-2008-adjudicator - Paulo C Fde Oliveira + Paulo C Fde Oliveira Edson WilsonTorrens AlexandreCidral SidneySchossland @@ -633,24 +633,24 @@ poibeau-messiant-2008-still - PeterSpyns - ElisabethD’Halleweyn - CatiaCucchiarini + PeterSpyns + ElisabethD’Halleweyn + CatiaCucchiarini The <fixed-case>D</fixed-case>utch-<fixed-case>F</fixed-case>lemish Comprehensive Approach to <fixed-case>HLT</fixed-case> Stimulation and Innovation: <fixed-case>STEVIN</fixed-case>, <fixed-case>HLT</fixed-case> Agency and beyond http://www.lrec-conf.org/proceedings/lrec2008/pdf/385_paper.pdf This paper shows how a research and industry stimulation programme on human language technologies (HLT) for Dutch can be “enhanced” with more specific innovation policy aspects to support the take-up by the HLT industry in the Netherlands and Flanders. Important to note is the distinction between the HLT programme itself (called STEVIN) with its specific related committees and actions and the overall policy instruments (HLT Agency, HLT steering board?) that try to span the entire domain of HLT for Dutch and have a more permanent character. The establishment of a pricing committee and a PR & communication working group is explained as a consequence of adopting the notion of “innovation system” as a theoretical framework. It means that a stronger emphasis is put on improving knowledge transfer and exchange amongst actors in the field. Therefore, the focus at the programme management level is shifting from the projects’ research activities producing results to gathering the results, making them available at a certain cost and advertising them through the appropriate channels to the appropriate potential customers. Our conclusion is that this policy stimulates the transfer from academia to industry though it is too soon for an in-depth assessment of the STEVIN programme and other HLT innovation policy instruments. spyns-etal-2008-dutch - ChristopherCieri - MarkLiberman + ChristopherCieri + MarkLiberman 15 Years of Language Resource Creation and Sharing: a Progress Report on <fixed-case>LDC</fixed-case> Activities http://www.lrec-conf.org/proceedings/lrec2008/pdf/861_paper.pdf This paper, the fifth in a series of biennial progress reports, reviews the activities of the Linguistic Data Consortium with particular emphasis on general trends in the language resource landscape and on changes that distinguish the two years since LDC’s last report at LREC from the preceding 8 years. After providing a perspective on the current landscape of language resources, the paper goes on to describe our vision of the role of LDC within the research communities it serves before sketching briefly specific publications and resources creations projects that have been the focus our attention since the last report. cieri-liberman-2008-15 - Anil KumarSingh + Anil KumarSingh KiranPala HarshitSurana Estimating the Resource Adaption Cost from a Resource Rich Language to a Similar Resource Poor Language @@ -659,7 +659,7 @@ singh-etal-2008-estimating - ValérieMapelli + ValérieMapelli VictoriaArranz HélèneMazo KhalidChoukri @@ -671,7 +671,7 @@ CarolPeters MartinBraschler - GiorgioDi Nunzio + GiorgioDi Nunzio NicolaFerro JulioGonzalo MarkSanderson @@ -681,8 +681,8 @@ peters-etal-2008-research - ScottPiao - JohnMcNaught + ScottPiao + JohnMcNaught SophiaAnaniadou Clustering Related Terms with Definitions http://www.lrec-conf.org/proceedings/lrec2008/pdf/515_paper.pdf @@ -692,7 +692,7 @@ NganNguyen Jin-DongKim - Jun’ichiTsujii + Jun’ichiTsujii Challenges in Pronoun Resolution System for Biomedical Text http://www.lrec-conf.org/proceedings/lrec2008/pdf/607_paper.pdf This paper presents our findings on the feasibility of doing pronoun resolution for biomedical texts, in comparison with conducting pronoun resolution for the newswire domain. In our experiments, we built a simple machine learning-based pronoun resolution system, and evaluated the system on three different corpora: MUC, ACE, and GENIA. Comparative statistics not only reveal the noticeable issues in constructing an effective pronoun resolution system for a new domain, but also provides a comprehensive view of those corpora often used for this task. @@ -700,17 +700,17 @@ BarryHaddow - BeatriceAlex + BeatriceAlex Exploiting Multiply Annotated Corpora in Biomedical Information Extraction Tasks http://www.lrec-conf.org/proceedings/lrec2008/pdf/516_paper.pdf This paper discusses the problem of utilising multiply annotated data in training biomedical information extraction systems. Two corpora, annotated with entities and relations, and containing a number of multiply annotated documents, are used to train named entity recognition and relation extraction systems. Several methods of automatically combining the multiple annotations to produce a single annotation are compared, but none produces better results than simply picking one of the annotated versions at random. It is also shown that adding extra singly annotated documents produces faster performance gains than adding extra multiply annotated documents. haddow-alex-2008-exploiting - YukaTateisi + YukaTateisi YusukeMiyao KenjiSagae - Jun’ichiTsujii + Jun’ichiTsujii <fixed-case>GENIA</fixed-case>-<fixed-case>GR</fixed-case>: a Grammatical Relation Corpus for Parser Evaluation in the Biomedical Domain http://www.lrec-conf.org/proceedings/lrec2008/pdf/496_paper.pdf We report the construction of a corpus for parser evaluation in the biomedical domain. A 50-abstract subset (492 sentences) of the GENIA corpus (Kim et al., 2003) is annotated with labeled head-dependent relations using the grammatical relations (GR) evaluation scheme (Carroll et al., 1998) ,which has been used for parser evaluation in the newswire domain. @@ -754,7 +754,7 @@ BartoszBroda MagdalenaDerwojedowa MaciejPiasecki - StanislawSzpakowicz + StanislawSzpakowicz Corpus-based Semantic Relatedness for the Construction of <fixed-case>P</fixed-case>olish <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et http://www.lrec-conf.org/proceedings/lrec2008/pdf/459_paper.pdf The construction of a wordnet, a labour-intensive enterprise, can be significantly assisted by automatic grouping of lexical material and discovery of lexical semantic relations. The objective is to ensure high quality of automatically acquired results before they are presented for lexicographers’ approval. We discuss a software tool that suggests synset members using a measure of semantic relatedness with a given verb or adjective; this extends previous work on nominal synsets in Polish WordNet. Syntactically-motivated constraints are deployed on a large morphologically annotated corpus of Polish. Evaluation has been performed via the WordNet-Based Similarity Test and additionally supported by human raters. A lexicographer also manually assessed a suitable sample of suggestions. The results compare favourably with other known methods of acquiring semantic relations. @@ -764,15 +764,15 @@ RafiyaBegum SamarHusain LakshmiBai - Dipti MisraSharma + Dipti MisraSharma Developing Verb Frames for <fixed-case>H</fixed-case>indi http://www.lrec-conf.org/proceedings/lrec2008/pdf/491_paper.pdf This paper introduces an ongoing work on developing verb frames for Hindi. Verb frames capture syntactic commonalities of semantically related verbs. The main objective of this work is to create a linguistic resource which will prove to be indispensable for various NLP applications. We also hope this resource to help us better understand Hindi verbs. We motivate the basic verb argument structure using relations as introduced by Panini. We show the methodology used in preparing these frames and the criteria followed for classifying Hindi verbs. begum-etal-2008-developing - KateForbes-Riley - DianeLitman + KateForbes-Riley + DianeLitman ScottSilliman AmrutaPurandare Uncertainty Corpus: Resource to Study User Affect in Complex Spoken Dialogue Systems @@ -782,7 +782,7 @@ MilanGnjatović - DietmarRoesner + DietmarRoesner On the Role of the <fixed-case>NIMITEK</fixed-case> Corpus in Developing an Emotion Adaptive Spoken Dialogue System http://www.lrec-conf.org/proceedings/lrec2008/pdf/149_paper.pdf This paper reports on the creation of the multimodal NIMITEK corpus of affected behavior in human-machine interaction and its role in the development of the NIMITEK prototype system. The NIMITEK prototype system is a spoken dialogue system for supporting users while they solve problems in a graphics system. The central feature of the system is adaptive dialogue management. The system dynamically defines a dialogue strategy according to the current state of the interaction (including also the emotional state of the user). Particular emphasis is devoted to the level of naturalness of interaction. We discuss that a higher level of naturalness can be achieved by combining a habitable natural language interface and an appropriate dialogue strategy. The role of the NIMITEK multimodal corpus in achieving these requirements is twofold: (1) in developing the model of attentional state on the level of user’s commands that facilitates processing of flexibly formulated commands, and (2) in defining the dialogue strategy that takes the emotional state of the user into account. Finally, we sketch the implemented prototype system and describe the incorporated dialogue management module. Whereas the prototype system itself is task-specific, the described underlying concepts are intended to be task-independent. @@ -804,7 +804,7 @@ LaureCharonnat GaëlleVidal - OlivierBoeffard + OlivierBoeffard Automatic Phone Segmentation of Expressive Speech http://www.lrec-conf.org/proceedings/lrec2008/pdf/596_paper.pdf In order to improve the flexibility and the precision of an automatic phone segmentation system for a type of expressive speech, the dubbing into French of fiction movies, we developed both the phonetic labeling process and the alignment process. The automatic labelling system relies on an automatic grapheme-to-phoneme conversion including all the variants of the phonetic chain and on HMM modeling. In this article, we will distinguish three sets of phone models: a set of context independent models, a set of left and right context dependant models and finally a mixing of the two that combines phone and triphone models according to the precision of alignment obtained for each phonetic broad-class. The three models are evaluated on a test corpus. On the one hand we notice a little decrease in the score of phonetic labelling mainly due to pauses insertions, but on the other hand the mixed set of models gives the best results for the score of precision of the alignment. @@ -824,7 +824,7 @@ Wei-HaoLin - AlexanderHauptmann + AlexanderHauptmann Vox Populi Annotation: Measuring Intensity of Ideological Perspectives by Aggregating Group Judgments http://www.lrec-conf.org/proceedings/lrec2008/pdf/600_paper.pdf Polarizing discussions about political and social issues are common in mass media. Annotations on the degree to which a sentence expresses an ideological perspective can be valuable for evaluating computer programs that can automatically identify strongly biased sentences, but such annotations remain scarce. We annotated the intensity of ideological perspectives expressed in 250 sentences by aggregating judgments from 18 annotators. We proposed methods of determining the number of annotators and assessing reliability, and showed the annotations were highly consistent across different annotator groups. @@ -832,8 +832,8 @@ CarmenBanea - RadaMihalcea - JanyceWiebe + RadaMihalcea + JanyceWiebe A Bootstrapping Method for Building Subjectivity Lexicons for Languages with Scarce Resources http://www.lrec-conf.org/proceedings/lrec2008/pdf/700_paper.pdf This paper introduces a method for creating a subjectivity lexicon for languages with scarce resources. The method is able to build a subjectivity lexicon by using a small seed set of subjective words, an online dictionary, and a small raw corpus, coupled with a bootstrapping process that ranks new candidate words based on a similarity measure. Experiments performed with a rule-based sentence level subjectivity classifier show an 18% absolute improvement in F-measure as compared to previously proposed semi-supervised methods. @@ -842,7 +842,7 @@ JosefRuppenhofer SwapnaSomasundaran - JanyceWiebe + JanyceWiebe Finding the Sources and Targets of Subjective Expressions http://www.lrec-conf.org/proceedings/lrec2008/pdf/709_paper.pdf As many popular text genres such as blogs or news contain opinions by multiple sources and about multiple targets, finding the sources and targets of subjective expressions becomes an important sub-task for automatic opinion analysis systems. We argue that while automatic semantic role labeling systems (ASRL) have an important contribution to make, they cannot solve the problem for all cases. Based on the experience of manually annotating opinions, sources, and targets in various genres, we present linguistic phenomena that require knowledge beyond that of ASRL systems. In particular, we address issues relating to the attribution of opinions to sources; sources and targets that are realized as zero-forms; and inferred opinions. We also discuss in some depth that for arguing attitudes we need to be able to recover propositions and not only argued-about entities. A recurrent theme of the discussion is that close attention to specific discourse contexts is needed to identify sources and targets correctly. @@ -850,7 +850,7 @@ VeselinStoyanov - ClaireCardie + ClaireCardie Annotating Topics of Opinions http://www.lrec-conf.org/proceedings/lrec2008/pdf/813_paper.pdf Fine-grained subjectivity analysis has been the subject of much recent research attention. As a result, the field has gained a number of working definitions, technical approaches and manually annotated corpora that cover many facets of subjectivity. Little work has been done, however, on one aspect of fine-grained opinions - the specification and identification of opinion topics. In particular, due to the difficulty of manual opinion topic annotation, no general-purpose opinion corpus with information about topics of fine-grained opinions currently exists. In this paper, we propose a methodology for the manual annotation of opinion topics and use it to annotate a portion of an existing general-purpose opinion corpus with opinion topic information. Inter-annotator agreement results according to a number of metrics suggest that the annotations are reliable. @@ -868,8 +868,8 @@ JetteViethen RobertDale - EmielKrahmer - MariëtTheune + EmielKrahmer + MariëtTheune PascalTouset Controlling Redundancy in Referring Expressions http://www.lrec-conf.org/proceedings/lrec2008/pdf/239_paper.pdf @@ -877,7 +877,7 @@ viethen-etal-2008-controlling - MassimoPoesio + MassimoPoesio RonArtstein Anaphoric Annotation in the <fixed-case>ARRAU</fixed-case> Corpus http://www.lrec-conf.org/proceedings/lrec2008/pdf/297_paper.pdf @@ -885,7 +885,7 @@ poesio-artstein-2008-anaphoric - Mark-ChristophMueller + Mark-ChristophMueller MargotMieskes MichaelStrube Knowledge Sources for Bridging Resolution in Multi-Party Dialog @@ -899,8 +899,8 @@ AlanLee EleniMiltsakaki LivioRobaldo - AravindJoshi - BonnieWebber + AravindJoshi + BonnieWebber The <fixed-case>P</fixed-case>enn <fixed-case>D</fixed-case>iscourse <fixed-case>T</fixed-case>ree<fixed-case>B</fixed-case>ank 2.0. http://www.lrec-conf.org/proceedings/lrec2008/pdf/754_paper.pdf We present the second version of the Penn Discourse Treebank, PDTB-2.0, describing its lexically-grounded annotations of discourse relations and their two abstract object arguments over the 1 million word Wall Street Journal corpus. We describe all aspects of the annotation, including (a) the argument structure of discourse relations, (b) the sense annotation of the relations, and (c) the attribution of discourse relations and each of their arguments. We list the differences between PDTB-1.0 and PDTB-2.0. We present representative statistics for several aspects of the annotation in the corpus. @@ -910,8 +910,8 @@ IrisHendrickx GosseBouma FrederikCoppens - WalterDaelemans - VeroniqueHoste + WalterDaelemans + VeroniqueHoste GeertKloosterman Anne-MarieMineur JoeriVan Der Vloet @@ -930,7 +930,7 @@ baker-brew-2008-statistical - DianaTrandabăţ + DianaTrandabăţ MariaHusarciuc <fixed-case>R</fixed-case>omanian Semantic Role Resource http://www.lrec-conf.org/proceedings/lrec2008/pdf/715_paper.pdf @@ -940,7 +940,7 @@ AlessandroLenci BarbaraMcGillivray - SimonettaMontemagni + SimonettaMontemagni VitoPirrelli Unsupervised Acquisition of Verb Subcategorization Frames from Shallow-Parsed Corpora http://www.lrec-conf.org/proceedings/lrec2008/pdf/763_paper.pdf @@ -956,22 +956,22 @@ kawahara-uchimoto-2008-method - NúriaBel + NúriaBel SergioEspeja - MontserratMarimon + MontserratMarimon Automatic Acquisition for low frequency lexical items http://www.lrec-conf.org/proceedings/lrec2008/pdf/334_paper.pdf This paper addresses a specific case of the task of lexical acquisition understood as the induction of information about the linguistic characteristics of lexical items on the basis of information gathered from their occurrences in texts. Most of the recent works in the area of lexical acquisition have used methods that take as much textual data as possible as source of evidence, but their performance decreases notably when only few occurrences of a word are available. The importance of covering such low frequency items lies in the fact that a large quantity of the words in any particular collection of texts will be occurring few times, if not just once. Our work proposes to compensate the lack of information resorting to linguistic knowledge on the characteristics of lexical classes. This knowledge, obtained from a lexical typology, is formulated probabilistically to be used in a Bayesian method to maximize the information gathered from single occurrences as to predict the full set of characteristics of the word. Our results show that our method achieves better results than others for the treatment of low frequency items. bel-etal-2008-automatic - DoroteoToledano - DanielHernandez-Lopez + DoroteoToledano + DanielHernandez-Lopez CristinaEsteve-Elizalde JulianFierrez - JavierOrtega-Garcia + JavierOrtega-Garcia DanielRamos - JoaquinGonzalez-Rodriguez + JoaquinGonzalez-Rodriguez <fixed-case>B</fixed-case>io<fixed-case>S</fixed-case>ec Multimodal Biometric Database in Text-Dependent Speaker Recognition http://www.lrec-conf.org/proceedings/lrec2008/pdf/226_paper.pdf In this paper we briefly describe the BioSec multimodal biometric database and analyze its use in automatic text-dependent speaker recognition research. The paper is structured into four parts: a short introduction to the problem of text-dependent speaker recognition; a brief review of other existing databases, including monomodal text-dependent speaker recognition databases and multimodal biometric recognition databases; a description of the BioSec database; and, finally, an experimental section in which speaker recognition results on some of these databases are presented and compared, using the same underlying speaker recognition technique in all cases. @@ -979,12 +979,12 @@ IkerLuengo - EvaNavas + EvaNavas IñakiSainz IbonSaratxaga - JonSanchez + JonSanchez IgorOdriozola - InmaHernaez + InmaHernaez Text Independent Speaker Identification in Multilingual Environments http://www.lrec-conf.org/proceedings/lrec2008/pdf/461_paper.pdf Speaker identification and verification systems have a poor performance when model training is done in one language while the testing is done in another. This situation is not unusual in multilingual environments, where people should be able to access the system in any language he or she prefers in each moment, without noticing a performance drop. In this work we study the possibility of using features derived from prosodic parameters in order to reinforce the language robustness of these systems. First the features’ properties in terms of language and session variability are studied, predicting an increase in the language robustness when frame-wise intonation and energy values are combined with traditional MFCC features. The experimental results confirm that these features provide an improvement in the speaker recognition rates under language-mismatch conditions. The whole study is carried out in the Basque Country, a bilingual region in which Basque and Spanish languages co-exist. @@ -992,21 +992,21 @@ UdhyakumarNallasamy - AlanBlack + AlanBlack TanjaSchultz - RobertFrederking + RobertFrederking <fixed-case>N</fixed-case>ine<fixed-case>O</fixed-case>ne<fixed-case>O</fixed-case>ne: Recognizing and Classifying Speech for Handling Minority Language Emergency Calls http://www.lrec-conf.org/proceedings/lrec2008/pdf/735_paper.pdf In this paper, we describe NineOneOne (9-1-1), a system designed to recognize and translate Spanish emergency calls for better dispatching. We analyze the research challenges in adapting speech translation technology to 9-1-1 domain. We report our initial research towards building the system and the results of our initial experiments. nallasamy-etal-2008-nineoneone - ChristopherCieri - StephanieStrassel - MeghanGlenn + ChristopherCieri + StephanieStrassel + MeghanGlenn RevaSchwartz WadeShen - JosephCampbell + JosephCampbell Bridging the Gap between Linguists and Technology Developers: Large-Scale, Sociolinguistic Annotation for Dialect and Speaker Recognition http://www.lrec-conf.org/proceedings/lrec2008/pdf/793_paper.pdf Recent years have seen increased interest within the speaker recognition community in high-level features including, for example, lexical choice, idiomatic expressions or syntactic structures. The promise of speaker recognition in forensic applications drives development toward systems robust to channel differences by selecting features inherently robust to channel difference. Within the language recognition community, there is growing interest in differentiating not only languages but also mutually intelligible dialects of a single language. Decades of research in dialectology suggest that high-level features can enable systems to cluster speakers according to the dialects they speak. The Phanotics (Phonetic Annotation of Typicality in Conversational Speech) project seeks to identify high-level features characteristic of American dialects, annotate a corpus for these features, use the data to dialect recognition systems and also use the categorization to create better models for speaker recognition. The data, once published, should be useful to other developers of speaker and dialect recognition systems and to dialectologists and sociolinguists. We expect the methods will generalize well beyond the speakers, dialects, and languages discussed here and should, if successful, provide a model for how linguists and technology developers can collaborate in the future for the benefit of both groups and toward a deeper understanding of how languages vary and change. @@ -1014,7 +1014,7 @@ LindaBrandschain - ChristopherCieri + ChristopherCieri DavidGraff AbbyNeely KevinWalker @@ -1024,11 +1024,11 @@ brandschain-etal-2008-speaker - NancyIde - CollinBaker + NancyIde + CollinBaker ChristianeFellbaum - CharlesFillmore - RebeccaPassonneau + CharlesFillmore + RebeccaPassonneau <fixed-case>MASC</fixed-case>: the Manually Annotated Sub-Corpus of <fixed-case>A</fixed-case>merican <fixed-case>E</fixed-case>nglish http://www.lrec-conf.org/proceedings/lrec2008/pdf/617_paper.pdf To answer the critical need for sharable, reusable annotated resources with rich linguistic annotations, we are developing a Manually Annotated Sub-Corpus (MASC) including texts from diverse genres and manual annotations or manually-validated annotations for multiple levels, including WordNet senses and FrameNet frames and frame elements, both of which have become significant resources in the international computational linguistics community. To derive maximal benefit from the semantic information provided by these resources, the MASC will also include manually-validated shallow parses and named entities, which will enable linking WordNet senses and FrameNet frames within the same sentences into more complex semantic structures and, because named entities will often be the role fillers of FrameNet frames, enrich the semantic and pragmatic information derivable from the sub-corpus. All MASC annotations will be published with detailed inter-annotator agreement measures. The MASC and its annotations will be freely downloadable from the ANC website, thus providing maximum accessibility for researchers from around the globe. @@ -1038,7 +1038,7 @@ Chu-RenHuang Lung-HaoLee Wei-guangQu - Jia-FeiHong + Jia-FeiHong ShiwenYu Quality Assurance of Automatic Annotation of Very Large Corpora: a Study based on heterogeneous Tagging System http://www.lrec-conf.org/proceedings/lrec2008/pdf/686_paper.pdf @@ -1046,7 +1046,7 @@ huang-etal-2008-quality - ClaireCardie + ClaireCardie CynthiaFarina MattRawding AdilAijaz @@ -1056,8 +1056,8 @@ cardie-etal-2008-erulemaking - BranimirBoguraev - MaryNeff + BranimirBoguraev + MaryNeff Navigating through Dense Annotation Spaces http://www.lrec-conf.org/proceedings/lrec2008/pdf/769_paper.pdf Pattern matching, or querying, over annotations is a general purpose paradigm for inspecting, navigating, mining, and transforming annotation repositories - the common representation basis for modern pipelined text-processing frameworks. Configurability of such frameworks and expressiveness of feature structure-based annotation schemes account for the “high density” of some such annotation repositories. This particular characteristic makes challenging the design of a pattern matching engine, capable of interpreting (or imposing) flat patterns over an arbitrarily dense annotation lattice. We present an approach where a finite state device carries out the application of (compiled) grammars over what is, in effect, a linearized “projection” of a unique route through the lattice; a route derived by a mix of static pattern (grammar) analysis and interpretation of navigational directives within the extended grammar formalism. Our approach achieves a mix of finite state scanning and lattice traversal for expressive and efficient pattern matching in dense annotations stores. @@ -1065,8 +1065,8 @@ DavidGuthrie - LouiseGuthrie - YorickWilks + LouiseGuthrie + YorickWilks An Unsupervised Probabilistic Approach for the Detection of Outliers in Corpora http://www.lrec-conf.org/proceedings/lrec2008/pdf/866_paper.pdf Many applications of computational linguistics are greatly influenced by the quality of corpora available and as automatically generated corpora continue to play an increasingly common role, it is essential that we not overlook the importance of well-constructed and homogeneous corpora. This paper describes an automatic approach to improving the homogeneity of corpora using an unsupervised method of statistical outlier detection to find documents and segments that do not belong in a corpus. We consider collections of corpora that are homogeneous with respect to topic (i.e. about the same subject), or genre (written for the same audience or from the same source) and use a combination of stylistic and lexical features of the texts to automatically identify pieces of text in these collections that break the homogeneity. These pieces of text that are significantly different from the rest of the corpus are likely to be errors that are out of place and should be removed from the corpus before it is used for other tasks. We evaluate our techniques by running extensive experiments over large artificially constructed corpora that each contain single pieces of text from a different topic, author, or genre than the rest of the collection and measure the accuracy of identifying these pieces of text without the use of training data. We show that when these pieces of text are reasonably large (1,000 words) we can reliably identify them in a corpus. @@ -1082,7 +1082,7 @@ BogdanBabych SergeSharoff - AnthonyHartley + AnthonyHartley Generalising Lexical Translation Strategies for <fixed-case>MT</fixed-case> Using Comparable Corpora http://www.lrec-conf.org/proceedings/lrec2008/pdf/340_paper.pdf We report on an on-going research project aimed at increasing the range of translation equivalents which can be automatically discovered by MT systems. The methodology is based on semi-supervised learning of indirect translation strategies from large comparable corpora and applying them in run-time to generate novel, previously unseen translation equivalents. This approach is different from methods based on parallel resources, which currently can reuse only individual translation equivalents. Instead it models translation strategies which generalise individual equivalents and can successfully generate an open class of new translation solutions. The task of the project is integration of the developed technology into open-source MT systems. @@ -1097,8 +1097,8 @@ itagaki-aikawa-2008-post - GermánSanchis - Joan AndreuSánchez + GermánSanchis + Joan AndreuSánchez Using Parsed Corpora for Estimating Stochastic Inversion Transduction Grammars http://www.lrec-conf.org/proceedings/lrec2008/pdf/465_paper.pdf An important problem when using Stochastic Inversion Transduction Grammars is their computational cost. More specifically, when dealing with corpora such as Europarl. only one iteration of the estimation algorithm becomes prohibitive. In this work, we apply a reduction of the cost by taking profit of the bracketing information in parsed corpora and show machine translation results obtained with a bracketed Europarl corpus, yielding interresting improvements when increasing the number of non-terminal symbols. @@ -1106,7 +1106,7 @@ MarkFishel - Heiki-JaanKaalep + Heiki-JaanKaalep Experiments on Processing Overlapping Parallel Corpora http://www.lrec-conf.org/proceedings/lrec2008/pdf/776_paper.pdf The number and sizes of parallel corpora keep growing, which makes it necessary to have automatic methods of processing them: combining, checking and improving corpora quality, etc. We here introduce a method which enables performing many of these by exploiting overlapping parallel corpora. The method finds the correspondence between sentence pairs in two corpora: first the corresponding language parts of the corpora are aligned and then the two resulting alignments are compared. The method takes into consideration slight differences in the source documents, different levels of segmentation of the input corpora, encoding differences and other aspects of the task. The paper describes two experiments conducted to test the method. In the first experiment, the Estonian-English part of the JRC-Acquis corpus was combined with another corpus of legislation texts. In the second experiment alternatively aligned versions of the JRC-Acquis are compared to each other with the example of all language pairs between English, Estonian and Latvian. Several additional conclusions about the corpora can be drawn from the results. The method proves to be effective for several parallel corpora processing tasks. @@ -1114,17 +1114,17 @@ JenniferFoster - Josefvan Genabith + Josefvan Genabith Parser Evaluation and the <fixed-case>BNC</fixed-case>: Evaluating 4 constituency parsers with 3 metrics http://www.lrec-conf.org/proceedings/lrec2008/pdf/774_paper.pdf We evaluate discriminative parse reranking and parser self-training on a new English test set using four versions of the Charniak parser and a variety of parser evaluation metrics. The new test set consists of 1,000 hand-corrected British National Corpus parse trees. We directly evaluate parser output using both the Parseval and the Leaf Ancestor metrics. We also convert the hand-corrected and parser output phrase structure trees to dependency trees using a state-of-the-art functional tag labeller and constituent-to-dependency conversion tool, and then calculate label accuracy, unlabelled attachment and labelled attachment scores over the dependency structures. We find that reranking leads to a performance improvement on the new test set (albeit a modest one). We find that self-training using BNC data leads to significantly better results. However, it is not clear how effective self-training is when the training material comes from the North American News Corpus. foster-van-genabith-2008-parser - PatrickParoubek + PatrickParoubek IsabelleRobba AnneVilnat - ChristelleAyache + ChristelleAyache <fixed-case>EASY</fixed-case>, Evaluation of Parsers of <fixed-case>F</fixed-case>rench: what are the Results? http://www.lrec-conf.org/proceedings/lrec2008/pdf/621_paper.pdf This paper presents EASY, which has been the first campaign evaluating syntactic parsers on all the common syntactic phenomena and a large set of dependency relations. The language analyzed was French. During this campaign, an annotation scheme has been elaborated with the different actors: participants and corpus providers; then a corpus made of several syntactic materials has been built and annotated: it reflects a great variety of linguistic styles (from literature to oral transcriptions, and from newspapers to medical texts). Both corpus and annotation scheme are here briefly presented. Moreover, evaluation measures are explained and detailed results are given. The results of the 15 parsers coming from 12 teams are analyzed. To conclude, a first experiment aiming to combine the outputs of the different systems is shown. @@ -1148,11 +1148,11 @@ grothe-etal-2008-comparative - ÉricVillemonte de la Clergerie - OlivierHamon - DjamelMostefa - ChristelleAyache - PatrickParoubek + ÉricVillemonte de la Clergerie + OlivierHamon + DjamelMostefa + ChristelleAyache + PatrickParoubek AnneVilnat <fixed-case>PASSAGE</fixed-case>: from <fixed-case>F</fixed-case>rench Parser Evaluation to Large Sized Treebank http://www.lrec-conf.org/proceedings/lrec2008/pdf/908_paper.pdf @@ -1171,7 +1171,7 @@ MarkpongJongtaveesataporn ChaiWutiwiwatchai KojiIwano - SadaokiFurui + SadaokiFurui <fixed-case>T</fixed-case>hai Broadcast News Corpus Construction and Evaluation http://www.lrec-conf.org/proceedings/lrec2008/pdf/319_paper.pdf Large speech and text corpora are crucial to the development of a state-of-the-art speech recognition system. This paper reports on the construction and evaluation of the first Thai broadcast news speech and text corpora. Specifications and conventions used in the transcription process are described in the paper. The speech corpus contains about 17 hours of speech data while the text corpus was transcribed from around 35 hours of television broadcast news. The characteristics of the corpus were analyzed and shown in the paper. The speech corpus was split according to the evaluation focus condition used in the DARPA Hub-4 evaluation. An 18K-word Thai speech recognition system was setup to test with this speech corpus as a preliminary experiment. Acoustic model adaptations were performed to improve the system performance. The best system yielded a word error rate of about 20% for clean and planned speech, and below 30% for the overall condition. @@ -1189,21 +1189,21 @@ SopheapSeng - SethsereySam - LaurentBesacier + SethsereySam + LaurentBesacier BrigitteBigi - EricCastelli + EricCastelli First Broadcast News Transcription System for <fixed-case>K</fixed-case>hmer Language http://www.lrec-conf.org/proceedings/lrec2008/pdf/661_paper.pdf In this paper we present an overview on the development of a large vocabulary continuous speech recognition (LVCSR) system for Khmer, the official language of Cambodia, spoken by more than 15 million people. As an under-resourced language, develop a LVCSR system for Khmer is a challenging task. We describe our methodologies for quick language data collection and processing for language modeling and acoustic modeling. For language modeling, we investigate the use of word and sub-word as basic modeling unit in order to see the potential of sub-word units in the case of unsegmented language like Khmer. Grapheme-based acoustic modeling is used to quickly build our Khmer language acoustic model. Furthermore, the approaches and tools used for the development of our system are documented and made publicly available on the web. We hope this will contribute to accelerate the development of LVCSR system for a new language, especially for under-resource languages of developing countries where resources and expertise are limited. seng-etal-2008-first - ChomichaBendahman - MeghanGlenn - DjamelMostefa - NiklasPaulsson - StephanieStrassel + ChomichaBendahman + MeghanGlenn + DjamelMostefa + NiklasPaulsson + StephanieStrassel Quick Rich Transcriptions of <fixed-case>A</fixed-case>rabic Broadcast News Speech Data http://www.lrec-conf.org/proceedings/lrec2008/pdf/915_paper.pdf This paper describes the collect and transcription of a large set of Arabic broadcast news speech data. A total of more than 2000 hours of data was transcribed. The transcription factor for transcribing the broadcast news data has been reduced using a method such as Quick Rich Transcription (QRTR) as well as reducing the number of quality controls performed on the data. The data was collected from several Arabic TV and radio sources and from both Modern Standard Arabic and dialectal Arabic. The orthographic transcriptions included segmentation, speaker turns, topics, sentence unit types and a minimal noise mark-up. The transcripts were produced as a part of the GALE project. @@ -1224,8 +1224,8 @@ sekine-2008-extended - Mari CarmenSuárez-Figueroa - AsunciónGómez-Pérez + Mari CarmenSuárez-Figueroa + AsunciónGómez-Pérez Towards a Glossary of Activities in the Ontology Engineering Field http://www.lrec-conf.org/proceedings/lrec2008/pdf/219_paper.pdf The Semantic Web of the future will be characterized by using a very large number of ontologies embedded in ontology networks. It is important to provide strong methodological support for collaborative and context-sensitive development of networks of ontologies. This methodological support includes the identification and definition of which activities should be carried out when ontology networks are collaboratively built. In this paper we present the consensus reaching process followed within the NeOn consortium for the identification and definition of the activities involved in the ontology network development process. The consensus reaching process here presented produces as a result the NeOn Glossary of Activities. This work was conceived due to the lack of standardization in the Ontology Engineering terminology, which clearly contrasts with the Software Engineering field. Our future aim is to standardize the NeOn Glossary of Activities. @@ -1233,7 +1233,7 @@ YirongChen - QinLu + QinLu WenjieLi GaoyingCui <fixed-case>C</fixed-case>hinese Core Ontology Construction from a Bilingual Term Bank @@ -1251,8 +1251,8 @@ TakashiTsunakawa - NaoakiOkazaki - Jun’ichiTsujii + NaoakiOkazaki + Jun’ichiTsujii Building Bilingual Lexicons using Lexical Translation Probabilities via Pivot Languages http://www.lrec-conf.org/proceedings/lrec2008/pdf/423_paper.pdf This paper proposes a method of increasing the size of a bilingual lexicon obtained from two other bilingual lexicons via a pivot language. When we apply this approach, there are two main challenges, “ambiguity” and “mismatch” of terms; we target the latter problem by improving the utilization ratio of the bilingual lexicons. Given two bilingual lexicons between language pairs Lf-Lp and Lp-Le, we compute lexical translation probabilities of word pairs by using a statistical word-alignment model, and term decomposition/composition techniques. We compare three approaches to generate the bilingual lexicon: “exact merging”, “word-based merging”, and our proposed “alignment-based merging”. In our method, we combine lexical translation probabilities and a simple language model for estimating the probabilities of translation pairs. The experimental results show that our method could drastically improve the number of translation terms compared to the two methods mentioned above. Additionally, we evaluated and discussed the quality of the translation outputs. @@ -1260,7 +1260,7 @@ YuChen - AndreasEisele + AndreasEisele MartinKay Improving Statistical Machine Translation Efficiency by Triangulation http://www.lrec-conf.org/proceedings/lrec2008/pdf/733_paper.pdf @@ -1270,7 +1270,7 @@ CarolineLavecchia DavidLanglois - KamelSmaïli + KamelSmaïli Phrase-Based Machine Translation based on Simulated Annealing http://www.lrec-conf.org/proceedings/lrec2008/pdf/791_paper.pdf In this paper, we propose a new phrase-based translation model based on inter-lingual triggers. The originality of our method is double. First we identify common source phrases. Then we use inter-lingual triggers in order to retrieve their translations. Furthermore, we consider the way of extracting phrase translations as an optimization issue. For that we use simulated annealing algorithm to find out the best phrase translations among all those determined by inter-lingual triggers. The best phrases are those which improve the translation quality in terms of Bleu score. Tests are achieved on movie subtitle corpora. They show that our phrase-based machine translation (PBMT) system outperforms a state-of-the-art PBMT system by almost 7 points. @@ -1285,8 +1285,8 @@ carpuat-wu-2008-evaluation - SašaHasan - HermannNey + SašaHasan + HermannNey A Multi-Genre <fixed-case>SMT</fixed-case> System for <fixed-case>A</fixed-case>rabic to <fixed-case>F</fixed-case>rench http://www.lrec-conf.org/proceedings/lrec2008/pdf/549_paper.pdf This work presents improvements of a large-scale Arabic to French statistical machine translation system over a period of three years. The development includes better preprocessing, more training data, additional genre-specific tuning for different domains, namely newswire text and broadcast news transcripts, and improved domain-dependent language models. Starting with an early prototype in 2005 that participated in the second CESTA evaluation, the system was further upgraded to achieve favorable BLEU scores of 44.8% for the text and 41.1% for the audio setting. These results are compared to a system based on the freely available Moses toolkit. We show significant gains both in terms of translation quality (up to +1.2% BLEU absolute) and translation speed (up to 16 times faster) for comparable configuration settings. @@ -1294,16 +1294,16 @@ EstelleDelpech - PatrickSaint-Dizier + PatrickSaint-Dizier Investigating the Structure of Procedural Texts for Answering How-to Questions http://www.lrec-conf.org/proceedings/lrec2008/pdf/20_paper.pdf This paper presents ongoing work dedicated to parsing the textual structure of procedural texts. We propose here a model for the intructional structure and criteria to identify its main components: titles, instructions, warnings and prerequisites. The main aim of this project, besides a contribution to text processing, is to be able to answer procedural questions (How-to? questions), where the answer is a well-formed portion of a text, not a small set of words as for factoid questions. delpech-saint-dizier-2008-investigating - IgorLeturia - AnttonGurrutxaga - NereaAreta + IgorLeturia + AnttonGurrutxaga + NereaAreta EliPociello Analysis and Performance of Morphological Query Expansion and Language-Filtering Words on <fixed-case>B</fixed-case>asque Web Searching http://www.lrec-conf.org/proceedings/lrec2008/pdf/185_paper.pdf @@ -1328,7 +1328,7 @@ TorstenZesch - ChristofMüller + ChristofMüller IrynaGurevych Extracting Lexical Semantic Knowledge from <fixed-case>W</fixed-case>ikipedia and <fixed-case>W</fixed-case>iktionary http://www.lrec-conf.org/proceedings/lrec2008/pdf/420_paper.pdf @@ -1336,9 +1336,9 @@ zesch-etal-2008-extracting - GregorySanders + GregorySanders SébastienBronsart - SherriCondon + SherriCondon CraigSchlenoff Odds of Successful Transfer of Low-Level Concepts: a Key Metric for Bidirectional Speech-to-Speech Machine Translation in <fixed-case>DARPA</fixed-case>’s <fixed-case>TRANSTAC</fixed-case> Program http://www.lrec-conf.org/proceedings/lrec2008/pdf/399_paper.pdf @@ -1346,12 +1346,12 @@ sanders-etal-2008-odds - LoriLamel - SophieRosset - ChristelleAyache - DjamelMostefa - JordiTurmo - PereComas + LoriLamel + SophieRosset + ChristelleAyache + DjamelMostefa + JordiTurmo + PereComas Question Answering on Speech Transcriptions: the <fixed-case>QAST</fixed-case> evaluation in <fixed-case>CLEF</fixed-case> http://www.lrec-conf.org/proceedings/lrec2008/pdf/511_paper.pdf This paper reports on the QAST track of CLEF aiming to evaluate Question Answering on Speech Transcriptions. Accessing information in spoken documents provides additional challenges to those of text-based QA, needing to address the characteristics of spoken language, as well as errors in the case of automatic transcriptions of spontaneous speech. The framework and results of the pilot QAst evaluation held as part of CLEF 2007 is described, illustrating some of the additional challenges posed by QA in spoken documents relative to written ones. The current plans for future multiple-language and multiple-task QAst evaluations are described. @@ -1369,13 +1369,13 @@ heeren-etal-2008-evaluation - SherriCondon - JonPhillips + SherriCondon + JonPhillips ChristyDoran JohnAberdeen DanParvaz - BeatriceOshika - GregSanders + BeatriceOshika + GregSanders CraigSchlenoff Applying Automated Metrics to Speech Translation Dialogs http://www.lrec-conf.org/proceedings/lrec2008/pdf/535_paper.pdf @@ -1391,8 +1391,8 @@ mieskes-strube-2008-three - JesúsGiménez - LluísMàrquez + JesúsGiménez + LluísMàrquez Towards Heterogeneous Automatic <fixed-case>MT</fixed-case> Error Analysis http://www.lrec-conf.org/proceedings/lrec2008/pdf/483_paper.pdf This work studies the viability of performing heterogeneous automatic MT error analyses. Error analysis is, undoubtly, one of the most crucial stages in the development cycle of an MT system. However, often not enough attention is paid to this process. The reason is that performing an accurate error analysis requires intensive human labor. In order to speed up the error analysis process, we suggest partially automatizing it by having automatic evaluation metrics play a more active role. For that purpose, we have compiled a large and heterogeneous set of features at different linguistic levels and at different levels of granularity. Through a practical case study, we show how these features provide an effective means of ellaborating interpretable and detailed automatic reports of translation quality. @@ -1400,14 +1400,14 @@ BogdanBabych - AnthonyHartley + AnthonyHartley Sensitivity of Automated <fixed-case>MT</fixed-case> Evaluation Metrics on Higher Quality <fixed-case>MT</fixed-case> Output: <fixed-case>BLEU</fixed-case> vs Task-Based Evaluation Methods http://www.lrec-conf.org/proceedings/lrec2008/pdf/542_paper.pdf We report the results of our experiment on assessing the ability of automated MT evaluation metrics to remain sensitive to variations in MT quality as the average quality of the compared systems goes up. We compare two groups of metrics: those, which measure the proximity of MT output to some reference translation, and those which evaluate the performance of some automated process on degraded MT output. The experiment shows that proximity-based metrics (such as BLEU) loose sensitivity as the scores go up, but performance-based metrics (e.g., Named Entity recognition from MT output) remain sensitive across the scale. We suggest a model for explaining this result, which attributes stable sensitivity of performance-based metrics to measuring cumulative functional effect of different language levels, while proximity-based metrics measure structural matches on a lexical level and therefore miss higher-level errors that are more typical for better MT systems. Development of new automated metrics should take into account possible decline in sensitivity on higher-quality MT, which should be tested as part of meta-evaluation of the metrics. babych-hartley-2008-sensitivity - MarkPrzybocki + MarkPrzybocki KayPeterson SébastienBronsart Translation Adequacy and Preference Evaluation Tool (<fixed-case>TAP</fixed-case>-<fixed-case>ET</fixed-case>) @@ -1416,7 +1416,7 @@ przybocki-etal-2008-translation - ConstantinOrăsan + ConstantinOrăsan Oana AndreeaChiorean Evaluation of a Cross-lingual <fixed-case>R</fixed-case>omanian-<fixed-case>E</fixed-case>nglish Multi-document Summariser http://www.lrec-conf.org/proceedings/lrec2008/pdf/539_paper.pdf @@ -1424,10 +1424,10 @@ orasan-chiorean-2008-evaluation - Øistein E.Andersen + Øistein E.Andersen JulienNioche - TedBriscoe - JohnCarroll + TedBriscoe + JohnCarroll The <fixed-case>BNC</fixed-case> Parsed with <fixed-case>RASP</fixed-case>4<fixed-case>UIMA</fixed-case> http://www.lrec-conf.org/proceedings/lrec2008/pdf/218_paper.pdf We have integrated the RASP system with the UIMA framework (RASP4UIMA) and used this to parse the XML-encoded version of the British National Corpus (BNC). All original annotation is preserved, and parsing information, mainly in the form of grammatical relations, is added in an XML format. A few specific adaptations of the system to give better results with the BNC are discussed briefly. The RASP4UIMA system is publicly available and can be used to parse other corpora or document collections, and the final parsed version of the BNC will be deposited with the Oxford Text Archive. @@ -1459,7 +1459,7 @@ pustylnikov-etal-2008-unified - AichaBouhjar + AichaBouhjar <fixed-case>A</fixed-case>mazigh Language Terminology in <fixed-case>M</fixed-case>orocco or Management of a “Multidimensional” Variation http://www.lrec-conf.org/proceedings/lrec2008/pdf/912_paper.pdf The present communication brings to the fore the work undertaken at the Royal Institute of the Amazigh Culture (IRCAM, henceforth) within the Language Planning Center known as “Centre de l’Aménagement Linguistique” (CAL) within the framework of the language planning of Amazigh, particularly on the side of terminology. The focus will be on the concept of “variation” that affects different levels in the course of standardizing a language: orthography, spelling, grammar and lexis. Thus, after a brief survey of the main features of the Amazigh (Berber) language in general, the missions and the projects far achieved by CAL will be presented, particularly the objectives that relate to the work on the multiply varied corpus-based terminology. It appears that eliciting the pertinent information, for the most part, requires a whole amount of work on the re-writing of corpora so that the latter become exploitable in the standardization process. It should be pointed out that this stage of data homogenization, seemingly unwieldy for optimal exploitation, cannot be undertaken Amazighist linguists being involved in theoretical and methodological presuppositions that are at the root of this variation. @@ -1467,8 +1467,8 @@ YuhangYang - QinLu - TiejunZhao + QinLu + TiejunZhao <fixed-case>C</fixed-case>hinese Term Extraction Based on Delimiters http://www.lrec-conf.org/proceedings/lrec2008/pdf/72_paper.pdf Existing techniques extract term candidates by looking for internal and contextual information associated with domain specific terms. The algorithms always face the dilemma that fewer features are not enough to distinguish terms from non-terms whereas more features lead to more conflicts among selected features. This paper presents a novel approach for term extraction based on delimiters which are much more stable and domain independent. The proposed approach is not as sensitive to term frequency as that of previous works. This approach has no strict limit or hard rules and thus they can deal with all kinds of terms. It also requires no prior domain knowledge and no additional training to adapt to new domains. Consequently, the proposed approach can be applied to different domains easily and it is especially useful for resource-limited domains. Evaluations conducted on two different domains for Chinese term extraction show significant improvements over existing techniques which verifies its efficiency and domain independent nature. Experiments on new term extraction indicate that the proposed approach can also serve as an effective tool for domain lexicon expansion. @@ -1476,7 +1476,7 @@ SihamBoulaknadel - BeatriceDaille + BeatriceDaille DrissAboutajdine A Multi-Word Term Extraction Program for <fixed-case>A</fixed-case>rabic Language http://www.lrec-conf.org/proceedings/lrec2008/pdf/378_paper.pdf @@ -1485,7 +1485,7 @@ JonathanButters - FabioCiravegna + FabioCiravegna Using Similarity Metrics For Terminology Recognition http://www.lrec-conf.org/proceedings/lrec2008/pdf/717_paper.pdf In this paper we present an approach to terminology recognition whereby a sublanguage term (e.g. an aircraft engine component term extracted from a maintenance log) is matched to its corresponding term from a pre-defined list (such as a taxonomy representing the official break-down of the engine). Terminology recognition is addressed as a classification task whereby the extracted term is associated to one or more potential terms in the official description list via the application of string similarity metrics. The solution described in the paper uses dynamically computed similarity cut-off thresholds calculated on the basis of modeling a noise curve. Dissimilar string matches form a Gaussian distributed noise curve that can be identified and extracted leaving only mostly similar string matches. Dynamically calculated thresholds are preferable over fixed similarity thresholds as fixed thresholds are inherently imprecise, that is, there is no similarity boundary beyond which any two strings always describe the same concept. @@ -1509,8 +1509,8 @@ pitel-grefenstette-2008-semi - LaurenceDevillers - Jean-ClaudeMartin + LaurenceDevillers + Jean-ClaudeMartin Coding Emotional Events in Audiovisual Corpora http://www.lrec-conf.org/proceedings/lrec2008/pdf/322_paper.pdf The modelling of realistic emotional behaviour is needed for various applications in multimodal human-machine interaction such as the design of emotional conversational agents (Martin et al., 2005) or of emotional detection systems (Devillers and Vidrascu, 2007). Yet, building such models requires appropriate definition of various levels for representing the emotions themselves but also some contextual information such as the events that elicit these emotions. This paper presents a coding scheme that has been defined following annotations of a corpus of TV interviews (EmoTV). Deciding which events triggered or may trigger which emotion is a challenge for building efficient emotion eliciting protocols. In this paper, we present the protocol that we defined for collecting another corpus of spontaneous human-human interactions recorded in laboratory conditions (EmoTaboo). We discuss the events that we designed for eliciting emotions. Part of this scheme for coding emotional event is being included in the specifications that are currently defined by a working group of the W3C (the W3C Emotion Incubator Working group). This group is investigating the feasibility of working towards a standard representation of emotions and related states in technological contexts. @@ -1528,8 +1528,8 @@ IsaMaks PiekVossen - RoxaneSegers - Hennievan der Vliet + RoxaneSegers + Hennievan der Vliet Adjectives in the <fixed-case>D</fixed-case>utch Semantic Lexical Database <fixed-case>CORNETTO</fixed-case> http://www.lrec-conf.org/proceedings/lrec2008/pdf/184_paper.pdf The goal of this paper is to describe how adjectives are encoded in Cornetto, a semantic lexical database for Dutch. Cornetto combines two existing lexical resources with different semantic organisation, i.e. Dutch Wordnet (DWN) with a synset organisation and Referentie Bestand Nederlands (RBN) with an organisation in Lexical Units. Both resources will be aligned and mapped on the formal ontology SUMO. In this paper, we will first present details of the description of adjectives in each of the the two resources. We will then address the problems that are encountered during alignment to the SUMO ontology which are greatly due to the fact that SUMO has never been tested for its adequacy with respect to adjectives. We contrasted SUMO with an existing semantic classification which resulted in a further refined and extended SUMO geared for the description of adjectives. @@ -1537,7 +1537,7 @@ MarkusDickinson - Chong MinLee + Chong MinLee Detecting Errors in Semantic Annotation http://www.lrec-conf.org/proceedings/lrec2008/pdf/157_paper.pdf We develop a method for detecting errors in semantic predicate-argument annotation, based on the variation n-gram error detection method. After establishing an appropriate data representation, we detect inconsistencies by searching for identical text with varying annotation. By remaining data-driven, we are able to detect inconsistencies arising from errors at lower layers of annotation. @@ -1545,16 +1545,16 @@ MichaelRoth - SabineSchulte im Walde + SabineSchulte im Walde Corpus Co-Occurrence, Dictionary and <fixed-case>W</fixed-case>ikipedia Entries as Resources for Semantic Relatedness Information http://www.lrec-conf.org/proceedings/lrec2008/pdf/473_paper.pdf Distributional, corpus-based descriptions have frequently been applied to model aspects of word meaning. However, distributional models that use corpus data as their basis have one well-known disadvantage: even though the distributional features based on corpus co-occurrence were often successful in capturing meaning aspects of the words to be described, they generally fail to capture those meaning aspects that refer to world knowledge, because coherent texts tend not to provide redundant information that is presumably available knowledge. The question we ask in this paper is whether dictionary and encyclopaedic resources might complement the distributional information in corpus data, and provide world knowledge that is missing in corpora. As test case for meaning aspects, we rely on a collection of semantic associates to German verbs and nouns. Our results indicate that a combination of the knowledge resources should be helpful in work on distributional descriptions. roth-schulte-im-walde-2008-corpus - EmilianoGiovannetti + EmilianoGiovannetti SimoneMarchi - SimonettaMontemagni + SimonettaMontemagni RobertoBartolini Ontology Learning and Semantic Annotation: a Necessary Symbiosis http://www.lrec-conf.org/proceedings/lrec2008/pdf/534_paper.pdf @@ -1562,7 +1562,7 @@ giovannetti-etal-2008-ontology - JordiAtserias + JordiAtserias HugoZaragoza MassimilianoCiaramita GiuseppeAttardi @@ -1572,20 +1572,20 @@ atserias-etal-2008-semantically - Rodney D.Nielsen - WayneWard - JamesMartin - MarthaPalmer + Rodney D.Nielsen + WayneWard + JamesMartin + MarthaPalmer Annotating Students’ Understanding of Science Concepts http://www.lrec-conf.org/proceedings/lrec2008/pdf/873_paper.pdf This paper summarizes the annotation of fine-grained entailment relationships in the context of student answers to science assessment questions. We annotated a corpus of 15,357 answer pairs with 145,911 fine-grained entailment relationships. We provide the rationale for such fine-grained analysis and discuss its perceived benefits to an Intelligent Tutoring System. The corpus also has potential applications in other areas, such as question answering and multi-document summarization. Annotators achieved 86.2% inter-annotator agreement (Kappa=0.728, corresponding to substantial agreement) annotating the fine-grained facets of reference answers with regard to understanding expressed in student answers and labeling from one of five possible detailed relationship categories. The corpus described in this paper, which is the only one providing such detailed entailment annotations, is available as a public resource for the research community. The corpus is expected to enable application development, not only for intelligent tutoring systems, but also for general textual entailment applications, that is currently not practical. nielsen-etal-2008-annotating - RebeccaPassonneau + RebeccaPassonneau TomLippincott TaeYano - JudithKlavans + JudithKlavans Relation between Agreement Measures on Human Labeling and Machine Learning Performance: Results from an Art History Domain http://www.lrec-conf.org/proceedings/lrec2008/pdf/722_paper.pdf We discuss factors that affect human agreement on a semantic labeling task in the art history domain, based on the results of four experiments where we varied the number of labels annotators could assign, the number of annotators, the type and amount of training they received, and the size of the text span being labeled. Using the labelings from one experiment involving seven annotators, we investigate the relation between interannotator agreement and machine learning performance. We construct binary classifiers and vary the training and test data by swapping the labelings from the seven annotators. First, we find performance is often quite good despite lower than recommended interannotator agreement. Second, we find that on average, learning performance for a given functional semantic category correlates with the overall agreement among the seven annotators for that category. Third, we find that learning performance on the data from a given annotator does not correlate with the quality of that annotator’s labeling. We offer recommendations for the use of labeled data in machine learning, and argue that learners should attempt to accommodate human variation. We also note implications for large scale corpus annotation projects that deal with similarly subjective phenomena. @@ -1627,7 +1627,7 @@ ViktorBielický - OtakarSmrž + OtakarSmrž Building the Valency Lexicon of <fixed-case>A</fixed-case>rabic Verbs http://www.lrec-conf.org/proceedings/lrec2008/pdf/578_paper.pdf This paper describes the building of a valency lexicon of Arabic verbs using a morphologically and syntactically annotated corpus, the Prague Arabic Dependency Treebank (PADT), as its primary source. We present the theoretical account on valency developed within the Functional Generative Description (FGD) theory. We apply the framework to Modern Standard Arabic and discuss various valency-related phenomena with respect to examples from the corpus. We then outline the methodology and the linguistic and technical resources used in the building of the lexicon. The key concept in our scenario is that of PDT-VALLEX of Czech. Our lexicon will be developed by linking the conceivable entries with their instances in the treebank. Conversely, the treebank’s annotations will be linked to the lexicon. While a comparable scheme has been developed for Czech, our own contribution is to design and implement this model thoroughly for Arabic and the PADT data. The Arabic valency lexicon is intended for applications in computational parsing or language generation, and for use by human researchers. The proposed valency lexicon will be exploited in particular during further tectogrammatical annotations of PADT and might serve for enriching the expected second edition of the corpus-based Arabic-Czech Dictionary. @@ -1646,7 +1646,7 @@ RogelioNazar JorgeVivaldi - TeresaCabré + TeresaCabré A Suite to Compile and Analyze an <fixed-case>LSP</fixed-case> Corpus http://www.lrec-conf.org/proceedings/lrec2008/pdf/296_paper.pdf This paper presents a series of tools for the extraction of specialized corpora from the web and its subsequent analysis mainly with statistical techniques. It is an integrated system of original as well as standard tools and has a modular conception that facilitates its re-integration on different systems. The first part of the paper describes the original techniques, which are devoted to the categorization of documents as relevant or irrelevant to the corpus under construction, considering relevant a specialized document of the selected technical domain. Evaluation figures are provided for the original part, but not for the second part involving the analysis of the corpus, which is composed of algorithms that are well known in the field of Natural Language Processing, such as Kwic search, measures of vocabulary richness, the sorting of n-grams by frequency of occurrence or by measures of statistical association, distribution or similarity. @@ -1654,17 +1654,17 @@ EduardoBlanco - NuriaCastell - DanMoldovan + NuriaCastell + DanMoldovan Causal Relation Extraction http://www.lrec-conf.org/proceedings/lrec2008/pdf/87_paper.pdf This paper presents a supervised method for the detection and extraction of Causal Relations from open domain text. First we give a brief outline of the definition of causation and how it relates to other Semantic Relations, as well as a characterization of their encoding. In this work, we only consider marked and explicit causations. Our approach first identifies the syntactic patterns that may encode a causation, then we use Machine Learning techniques to decide whether or not a pattern instance encodes a causation. We focus on the most productive pattern, a verb phrase followed by a relator and a clause, and its reverse version, a relator followed by a clause and a verb phrase. As relators we consider the words as, after, because and since. We present a set of lexical, syntactic and semantic features for the classification task, their rationale and some examples. The results obtained are discussed and the errors analyzed. blanco-etal-2008-causal - GrzegorzChrupala - GeorgianaDinu - Josefvan Genabith + GrzegorzChrupala + GeorgianaDinu + Josefvan Genabith Learning Morphology with <fixed-case>M</fixed-case>orfette http://www.lrec-conf.org/proceedings/lrec2008/pdf/594_paper.pdf Morfette is a modular, data-driven, probabilistic system which learns to perform joint morphological tagging and lemmatization from morphologically annotated corpora. The system is composed of two learning modules which are trained to predict morphological tags and lemmas using the Maximum Entropy classifier. The third module dynamically combines the predictions of the Maximum-Entropy models and outputs a probability distribution over tag-lemma pair sequences. The lemmatization module exploits the idea of recasting lemmatization as a classification task by using class labels which encode mappings from word forms to lemmas. Experimental evaluation results and error analysis on three morphologically rich languages show that the system achieves high accuracy with no language-specific feature engineering or additional resources. @@ -1672,7 +1672,7 @@ GaoyingCui - QinLu + QinLu WenjieLi YirongChen Corpus Exploitation from <fixed-case>W</fixed-case>ikipedia for Ontology Construction @@ -1683,9 +1683,9 @@ ShiyanOu ViktorPekar - ConstantinOrasan + ConstantinOrasan ChristianSpurk - MatteoNegri + MatteoNegri Development and Alignment of a Domain-Specific Ontology for Question Answering http://www.lrec-conf.org/proceedings/lrec2008/pdf/561_paper.pdf With the appearance of Semantic Web technologies, it becomes possible to develop novel, sophisticated question answering systems, where ontologies are usually used as the core knowledge component. In the EU-funded project, QALL-ME, a domain-specific ontology was developed and applied for question answering in the domain of tourism, along with the assistance of two upper ontologies for concept expansion and reasoning. This paper focuses on the development of the QALL-ME ontology in the tourism domain and its alignment with the upper ontologies - WordNet and SUMO. The design of the ontology is presented in the paper, and a semi-automatic alignment procedure is described with some alignment results given as well. Furthermore, the aligned ontology was used to semantically annotate original data obtained from the tourism web sites and natural language questions. The storage schema of the annotated data and the data access method for retrieving answers from the annotated data are also reported in the paper. @@ -1693,7 +1693,7 @@ DavidManzano-Macho - AsunciónGómez-Pérez + AsunciónGómez-Pérez DanielBorrajo Unsupervised and Domain Independent Ontology Learning: Combining Heterogeneous Sources of Evidence http://www.lrec-conf.org/proceedings/lrec2008/pdf/418_paper.pdf @@ -1702,7 +1702,7 @@ AlessandraPotrich - EmanuelePianta + EmanuelePianta <fixed-case>L</fixed-case>-<fixed-case>ISA</fixed-case>: Learning Domain Specific Isa-Relations from the Web http://www.lrec-conf.org/proceedings/lrec2008/pdf/595_paper.pdf Automated extraction of ontological knowledge from text corpora is a relevant task in Natural Language Processing. In this paper, we focus on the problem of finding hypernyms for relevant concepts in a specific domain (e.g. Optical Recording) in the context of a concrete and challenging application scenario (patent processing). To this end information available on the Web is exploited. The extraction method includes four mains steps. Firstly, the Google search engine is exploited to retrieve possible instances of isa-patterns reported in the literature. Then, the returned snippets are filtered on the basis of lexico-syntactic criteria (e.g. the candidate hypernym must be expressed as a noun phrase without complex modifiers). In a further filtering step, only candidate hypernyms compatible with the target domain are kept. Finally a candidate ranking mechanism is applied to select one hypernym as output of the algorithm. The extraction method was evaluated on 100 concepts of the Optical Recording domain. Moreover, the reliability of isa-patterns reported in the literature as predictors of isa-relations was assessed by manually evaluating the template instances remaining after lexico-syntactic filtering, for 3 concepts of the same domain. While more extensive testing is needed the method appears promising especially for its portability across different domains. @@ -1711,8 +1711,8 @@ ArnoHartholt ThomasRuss - DavidTraum - EduardHovy + DavidTraum + EduardHovy SusanRobinson A Common Ground for Virtual Humans: Using an Ontology in a Natural Language Oriented Virtual Human Architecture http://www.lrec-conf.org/proceedings/lrec2008/pdf/811_paper.pdf @@ -1720,8 +1720,8 @@ hartholt-etal-2008-common - EnekoAgirre - AitorSoroa + EnekoAgirre + AitorSoroa Using the Multilingual Central Repository for Graph-Based Word Sense Disambiguation http://www.lrec-conf.org/proceedings/lrec2008/pdf/351_paper.pdf This paper presents the results of a graph-based method for performing knowledge-based Word Sense Disambiguation (WSD). The technique exploits the structural properties of the graph underlying the chosen knowledge base. The method is general, in the sense that it is not tied to any particular knowledge base, but in this work we have applied it to the Multilingual Central Repository (MCR). The evaluation has been performed on the Senseval-3 all-words task. The main contributions of the paper are twofold: (1) We have evaluated the separate and combined performance of each type of relation in the MCR, and thus indirectly validated the contents of the MCR and their potential for WSD. (2) We obtain state-of-the-art results, and in fact yield the best results that can be obtained using publicly available data. @@ -1729,7 +1729,7 @@ FredricGey - David KirkEvans + David KirkEvans NorikoKando A <fixed-case>J</fixed-case>apanese-<fixed-case>E</fixed-case>nglish Technical Lexicon for Translation and Language Research http://www.lrec-conf.org/proceedings/lrec2008/pdf/363_paper.pdf @@ -1739,17 +1739,17 @@ Le AnHa GabrielaFernandez - RuslanMitkov - GloriaCorpas + RuslanMitkov + GloriaCorpas Mutual Bilingual Terminology Extraction http://www.lrec-conf.org/proceedings/lrec2008/pdf/463_paper.pdf This paper describes a novel methodology to perform bilingual terminology extraction, in which automatic alignment is used to improve the performance of terminology extraction for each language. The strengths of monolingual terminology extraction for each language are exploited to improve the performance of terminology extraction in the other language, thanks to the availability of a sentence-level aligned bilingual corpus, and an automatic noun phrase alignment mechanism. The experiment indicates that weaknesses in monolingual terminology extraction due to the limitation of resources in certain languages can be overcome by using another language which has no such limitation. ha-etal-2008-mutual - JoãoGraça + JoãoGraça Joana PauloPardal - LuísaCoheur + LuísaCoheur DiamantinoCaseiro Building a Golden Collection of Parallel Multi-Language Word Alignment http://www.lrec-conf.org/proceedings/lrec2008/pdf/250_paper.pdf @@ -1759,13 +1759,13 @@ ElenaCabrio MilenKouylekov - BernardoMagnini - MatteoNegri + BernardoMagnini + MatteoNegri LauraHasler - ConstantinOrasan - DavidTomás - Jose LuisVicedo - GuenterNeumann + ConstantinOrasan + DavidTomás + Jose LuisVicedo + GuenterNeumann CorinnaWeber The <fixed-case>QALL</fixed-case>-<fixed-case>ME</fixed-case> Benchmark: a Multilingual Resource of Annotated Spoken Requests for Question Answering http://www.lrec-conf.org/proceedings/lrec2008/pdf/628_paper.pdf @@ -1780,7 +1780,7 @@ campbell-2008-tools - Maria TeresaPazienza + Maria TeresaPazienza MarcoPennacchiotti ArmandoStellato A Web Browser Extension for Growing-up Ontological Knowledge from Traditional Web Content @@ -1790,7 +1790,7 @@ YoussefDrissi - BranimirBoguraev + BranimirBoguraev DavidFerrucci PaulKeyser AnthonyLevas @@ -1818,7 +1818,7 @@ LynetteMelnar - ChenLiu + ChenLiu Borrowing Language Resources for Development of Automatic Speech Recognition for Low- and Middle-Density Languages http://www.lrec-conf.org/proceedings/lrec2008/pdf/68_paper.pdf In this paper we describe an approach that both creates crosslingual acoustic monophone model sets for speech recognition tasks and objectively predicts their performance without target-language speech data or acoustic measurement techniques. This strategy is based on a series of linguistic metrics characterizing the articulatory phonetic and phonological distances of target-language phonemes from source-language phonemes. We term these algorithms the Combined Phonetic and Phonological Crosslingual Distance (CPP-CD) metric and the Combined Phonetic and Phonological Crosslingual Prediction (CPP-CP) metric. The particular motivations for this project are the current unavailability and often prohibitively high production cost of speech databases for many strategically important low- and middle-density languages. First, we describe the CPP-CD approach and compare the performance of CPP-CD-specified models to both native language models and crosslingual models selected by the Bhattacharyya acoustic-model distance metric in automatic speech recognition (ASR) experiments. Results confirm that the CPP-CD approach nearly matches those achieved by the acoustic distance metric. We then test the CPP-CP algorithm on the CPP-CD models by comparing the CPP-CP scores to the recognition phoneme error rates. Based on this comparison, we conclude that the CPP-CP algorithm is a reliable indicator of crosslingual model performance in speech recognition tasks. @@ -1840,7 +1840,7 @@ MelissaKronenthal RobertLogie NeilMayo - JohannaMoore + JohannaMoore MattWatson A Fully Annotated Corpus for Studying the Effect of Cognitive Ageing on Users’ Interactions with Spoken Dialogue Systems http://www.lrec-conf.org/proceedings/lrec2008/pdf/237_paper.pdf @@ -1848,9 +1848,9 @@ georgila-etal-2008-fully - CatiaCucchiarini + CatiaCucchiarini JorisDriesen - HugoVan hamme + HugoVan hamme EricSanders Recording Speech of Children, Non-Natives and Elderly People for <fixed-case>HLT</fixed-case> Applications: the <fixed-case>JASMIN</fixed-case>-<fixed-case>CGN</fixed-case> Corpus. http://www.lrec-conf.org/proceedings/lrec2008/pdf/366_paper.pdf @@ -1867,7 +1867,7 @@ draxler-etal-2008-f0 - YorickWilks + YorickWilks DavidBenyon ChristopherBrewster PavelIrcing @@ -1878,7 +1878,7 @@ wilks-etal-2008-dialogue - JadeGoldstein-Stewart + JadeGoldstein-Stewart KerriGoodwin RobertaSabin RansomWinder @@ -1888,10 +1888,10 @@ goldstein-stewart-etal-2008-creating - RobertaCatizone + RobertaCatizone AlexieiDingli HugoPinto - YorickWilks + YorickWilks Information Extraction Tools and Methods for Understanding Dialogue in a Companion http://www.lrec-conf.org/proceedings/lrec2008/pdf/819_paper.pdf This paper discusses how Information Extraction is used to understand and manage Dialogue in the EU-funded Companions project. This will be discussed with respect to the Senior Companion, one of two applications under development in the EU-funded Companions project. Over the last few years, research in human-computer dialogue systems has increased and much attention has focused on applying learning methods to improving a key part of any dialogue system, namely the dialogue manager. Since the dialogue manager in all dialogue systems relies heavily on the quality of the semantic interpretation of the user’s utterance, our research in the Companions project, focuses on how to improve the semantic interpretation and combine it with knowledge from the Knowledge Base to increase the performance of the Dialogue Manager. Traditionally the semantic interpretation of a user utterance is handled by a natural language understanding module which embodies a variety of natural language processing techniques, from sentence splitting, to full parsing. In this paper we discuss the use of a variety of NLU processes and in particular Information Extraction as a key part of the NLU module in order to improve performance of the dialogue manager and hence the overall dialogue system. @@ -1899,16 +1899,16 @@ Carlos GómezGallo - T. FlorianJaeger - JamesAllen - MarySwift + T. FlorianJaeger + JamesAllen + MarySwift Production in a Multimodal Corpus: how Speakers Communicate Complex Actions http://www.lrec-conf.org/proceedings/lrec2008/pdf/740_paper.pdf We describe a new multimodal corpus currently under development. The corpus consists of videos of task-oriented dialogues that are annotated for speaker’s verbal requests and domain action executions. This resource provides data for new research on language production and comprehension. The corpus can be used to study speakers’ decisions as to how to structure their utterances given the complexity of the message they are trying to convey. gallo-etal-2008-production - HarryBunt + HarryBunt ChwhynnyOverbeeke Towards Formal Interpretation of Semantic Annotation http://www.lrec-conf.org/proceedings/lrec2008/pdf/93_paper.pdf @@ -1919,14 +1919,14 @@ MarcoPennacchiotti DiegoDe Cao PaoloMarocco - RobertoBasili + RobertoBasili Towards a Vector Space Model for <fixed-case>F</fixed-case>rame<fixed-case>N</fixed-case>et-like Resources http://www.lrec-conf.org/proceedings/lrec2008/pdf/202_paper.pdf In this paper, we present an original framework to model frame semantic resources (namely, FrameNet) using minimal supervision. This framework can be leveraged both to expand an existing FrameNet with new knowledge, and to induce a FrameNet in a new language. Our hypothesis is that a frame semantic resource can be modeled and represented by a suitable semantic space model. The intuition is that semantic spaces are an effective model of the notion of “being characteristic of a frame” for both lexical elements and full sentences. The paper gives two main contributions. First, it shows that our hypothesis is valid and can be successfully implemented. Second, it explores different types of semantic VSMs, outlining which one is more suitable for representing a frame semantic resource. In the paper, VSMs are used for modeling the linguistic core of a frame, the lexical units. Indeed, if the hypothesis is verified for these units, the proposed framework has a much wider application. pennacchiotti-etal-2008-towards - PavelSmrž + PavelSmrž <fixed-case>K</fixed-case>no<fixed-case>F</fixed-case>usius: a New Knowledge Fusion System for Interpretation of Gene Expression Data http://www.lrec-conf.org/proceedings/lrec2008/pdf/904_paper.pdf This paper introduces a new architecture that aims at combining molecular biology data with information automatically extracted from relevant scientific literature (using text mining techniques on PubMed abstracts and fulltext papers) to help biomedical experts to interpret experimental results in hand. The infrastructural level bears on semantic-web technologies and standards that facilitate the actual fusion of the multi-source knowledge. @@ -1946,7 +1946,7 @@ LeenCleuren JacquesDuchateau PolGhesquière - HugoVan hamme + HugoVan hamme Children’s Oral Reading Corpus (<fixed-case>CHOREC</fixed-case>): Description and Assessment of Annotator Agreement http://www.lrec-conf.org/proceedings/lrec2008/pdf/254_paper.pdf Within the scope of the SPACE project, the CHildren’s Oral REading Corpus (CHOREC) is developed. This database contains recorded, transcribed and annotated read speech (42 GB or 130 hours) of 400 Dutch speaking elementary school children with or without reading difficulties. Analyses of inter- and intra-annotator agreement are carried out in order to investigate the consistency with which reading errors are detected, orthographic and phonetic transcriptions are made, and reading errors and reading strategies are labeled. Percentage agreement scores and kappa values both show that agreement between annotations, and therefore the quality of the annotations, is high. Taken all double or triple annotations (for 10% resp. 30% of the corpus) together, % agreement varies between 86.4% and 98.6%, whereas kappa varies between 0.72 and 0.97 depending on the annotation tier that is being assessed. School type and reading type seem to account for systematic differences in % agreement, but these differences disappear when kappa values are calculated that correct for chance agreement. To conclude, an analysis of the annotation differences with respect to the ’*s’ label (i.e. a label that is used to annotate undistinguishable spelling behaviour), phoneme labels, reading strategy and error labels is given. @@ -1954,7 +1954,7 @@ TommasoCaselli - NancyIde + NancyIde RobertoBartolini A Bilingual Corpus of Inter-linked Events http://www.lrec-conf.org/proceedings/lrec2008/pdf/610_paper.pdf @@ -1962,7 +1962,7 @@ caselli-etal-2008-bilingual - StephanieStrassel + StephanieStrassel LaurenFriedman SafaIsmael LindaBrandschain @@ -1982,11 +1982,11 @@ ThorstenTrippel MichaelMaxwell - GrevilleCorbett + GrevilleCorbett CambellPrince - ChristopherManning + ChristopherManning StephenGrimes - SteveMoran + SteveMoran Lexicon Schemas and Related Data Models: when Standards Meet Users http://www.lrec-conf.org/proceedings/lrec2008/pdf/812_paper.pdf Lexicon schemas and their use are discussed in this paper from the perspective of lexicographers and field linguists. A variety of lexicon schemas have been developed, with goals ranging from computational lexicography (DATR) through archiving (LIFT, TEI) to standardization (LMF, FSR). A number of requirements for lexicon schemas are given. The lexicon schemas are introduced and compared to each other in terms of conversion and usability for this particular user group, using a common lexicon entry and providing examples for each schema under consideration. The formats are assessed and the final recommendation is given for the potential users, namely to request standard compliance from the developers of the tools used. This paper should foster a discussion between authors of standards, lexicographers and field linguists. @@ -2002,12 +2002,12 @@ messiant-etal-2008-lexschem - HoracioRodríguez - DavidFarwell + HoracioRodríguez + DavidFarwell JaviFerreres ManuelBertran MusaAlkhalifa - M. AntoniaMartí + M. AntoniaMartí <fixed-case>A</fixed-case>rabic <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et: Semi-automatic Extensions using <fixed-case>B</fixed-case>ayesian Inference http://www.lrec-conf.org/proceedings/lrec2008/pdf/434_paper.pdf This presentation focuses on the semi-automatic extension of Arabic WordNet (AWN) using lexical and morphological rules and applying Bayesian inference. We briefly report on the current status of AWN and propose a way of extending its coverage by taking advantage of a limited set of highly productive Arabic morphological rules for deriving a range of semantically related word forms from verb entries. The application of this set of rules, combined with the use of bilingual Arabic-English resources and Princeton’s WordNet, allows the generation of a graph representing the semantic neighbourhood of the original word. In previous work, a set of associations between the hypothesized Arabic words and English synsets was proposed on the basis of this graph. Here, a novel approach to extending AWN is presented whereby a Bayesian Network is automatically built from the graph and then the net is used as an inferencing mechanism for scoring the set of candidate associations. Both on its own and in combination with the previous technique, this new approach has led to improved results. @@ -2016,9 +2016,9 @@ IñakiSainz IbonSaratxaga - EvaNavas - InmaculadaHernáez - JonSanchez + EvaNavas + InmaculadaHernáez + JonSanchez IkerLuengo IgorOdriozola Subjective Evaluation of an Emotional Speech Database for <fixed-case>B</fixed-case>asque @@ -2027,7 +2027,7 @@ sainz-etal-2008-subjective - SandraKübler + SandraKübler WolfgangMaier InesRehbein YannickVersley @@ -2039,8 +2039,8 @@ RomaricBesançon StéphaneChaudiron - DjamelMostefa - IsmaïlTimimi + DjamelMostefa + IsmaïlTimimi KhalidChoukri The <fixed-case>INFILE</fixed-case> Project: a Crosslingual Filtering Systems Evaluation Campaign http://www.lrec-conf.org/proceedings/lrec2008/pdf/907_paper.pdf @@ -2048,8 +2048,8 @@ besancon-etal-2008-infile - DanTufiş - AlexandruCeauşu + DanTufiş + AlexandruCeauşu <fixed-case>DIAC</fixed-case>+: a Professional Diacritics Recovering System http://www.lrec-conf.org/proceedings/lrec2008/pdf/54_paper.pdf In languages that use diacritical characters, if these special signs are stripped-off from a word, the resulted string of characters may not exist in the language, and therefore its normative form is, in general, easy to recover. However, this is not always the case, as presence or absence of a diacritical sign attached to a base letter of a word which exists in both variants, may change its grammatical properties or even the meaning, making the recovery of the missing diacritics a difficult task, not only for a program but sometimes even for a human reader. We describe and evaluate an accurate knowledge-based system for automatic recovery of the missing diacritics in MS-Office documents written in Romanian. For the rare cases when the system is not able to make a reliable decision, it either provides the user a list of words with their recovery suggestions, or probabilistically chooses one of the possible changes, but leaves a trace (a highlighted comment) on each word the modification of which was uncertain. @@ -2059,7 +2059,7 @@ GhaziAbuhakema ReemFaraj AnnaFeldman - EileenFitzpatrick + EileenFitzpatrick Annotating an <fixed-case>A</fixed-case>rabic Learner Corpus for Error http://www.lrec-conf.org/proceedings/lrec2008/pdf/343_paper.pdf This paper describes an ongoing project in which we are collecting a learner corpus of Arabic, developing a tagset for error annotation and performing Computer-aided Error Analysis (CEA) on the data. We adapted the French Interlanguage Database FRIDA tagset (Granger, 2003a) to the data. We chose FRIDA in order to follow a known standard and to see whether the changes needed to move from a French to an Arabic tagset would give us a measure of the distance between the two languages with respect to learner difficulty. The current collection of texts, which is constantly growing, contains intermediate and advanced-level student writings. We describe the need for such corpora, the learner data we have collected and the tagset we have developed. We also describe the error frequency distribution of both proficiency levels and the ongoing work. @@ -2084,7 +2084,7 @@ Robvan Son WienekeWesseling EricSanders - Henkvan den Heuvel + Henkvan den Heuvel The <fixed-case>IFADV</fixed-case> Corpus: a Free Dialog Video Corpus http://www.lrec-conf.org/proceedings/lrec2008/pdf/132_paper.pdf Research into spoken language has become more visual over the years. Both fundamental and applied research have progressively included gestures, gaze, and facial expression. Corpora of multi-modal conversational speech are rare and frequently difficult to use due to privacy and copyright restrictions. A freely available annotated corpus is presented, gratis and libre, of high quality video recordings of face-to-face conversational speech. Annotations include orthography, POS tags, and automatically generated phonemes transcriptions and word boundaries. In addition, labeling of both simple conversational function and gaze direction has been a performed. Within the bounds of the law, everything has been done to remove copyright and use restrictions. Annotations have been processed to RDBMS tables that allow SQL queries and direct connections to statistical software. From our experiences we would like to advocate the formulation of “best practises” for both legal handling and database storage of recordings and annotations. @@ -2092,10 +2092,10 @@ AlessioBrutti - LucaCristoforetti + LucaCristoforetti WalterKellermann LutzMarquardt - MaurizioOmologo + <fixed-case>WOZ</fixed-case> Acoustic Data Collection for Interactive <fixed-case>TV</fixed-case> http://www.lrec-conf.org/proceedings/lrec2008/pdf/584_paper.pdf This paper describes a multichannel acoustic data collection recorded under the European DICIT project, during the Wizard of Oz (WOZ) experiments carried out at FAU and FBK-irst laboratories. The scenario is a distant-talking interface for interactive control of a TV. The experiments involve the acquisition of multichannel data for signal processing front-end and were carried out due to the need to collect a database for testing acoustic pre-processing algorithms. In this way, realistic scenarios can be simulated at a preliminary stage, instead of real-time implementations, allowing for repeatable experiments. To match the project requirements, the WOZ experiments were recorded in three languages: English, German and Italian. Besides the user inputs, the database also contains non-speech related acoustic events, room impulse response measurements and video data, the latter used to compute 3D labels. Sessions were manually transcribed and segmented at word level, introducing also specific labels for acoustic events. @@ -2109,8 +2109,8 @@ lounela-2008-process - MarionaTaulé - M. AntòniaMartí + MarionaTaulé + M. AntòniaMartí MartaRecasens <fixed-case>A</fixed-case>n<fixed-case>C</fixed-case>ora: Multilevel Annotated Corpora for <fixed-case>C</fixed-case>atalan and <fixed-case>S</fixed-case>panish http://www.lrec-conf.org/proceedings/lrec2008/pdf/35_paper.pdf @@ -2119,8 +2119,8 @@ StephenPurpura - JohnWilkerson - DustinHillard + JohnWilkerson + DustinHillard The <fixed-case>U</fixed-case>.<fixed-case>S</fixed-case>. Policy Agenda Legislation Corpus Volume 1 - a Language Resource from 1947 - 1998 http://www.lrec-conf.org/proceedings/lrec2008/pdf/105_paper.pdf We introduce the corpus of United States Congressional bills from 1947 to 1998 for use by language research communities. The U.S. Policy Agenda Legislation Corpus Volume 1 (USPALCV1) includes more than 375,000 legislative bills annotated with a hierarchical policy area category. The human annotations in USPALCV1 have been reliably applied over time to enable social science analysis of legislative trends. The corpus is a member of an emerging family of corpora that are annotated by policy area to enable comparative parallel trend recognition across countries and domains (legislation, political speeches, newswire articles, budgetary expenditures, web sites, etc.). This paper describes the origins of the corpus, its creation, ways to access it, design criteria, and an analysis with common supervised machine learning methods. The use of machine learning methods establishes a baseline proposed modeling for the topic classification of legal documents. @@ -2146,7 +2146,7 @@ NellekeOostdijk MartinReynaert PaolaMonachesi - GertjanVan Noord + GertjanVan Noord RoelandOrdelman InekeSchuurman VincentVandeghinste @@ -2156,7 +2156,7 @@ oostdijk-etal-2008-coi - Hiromi ItohOzaku + Hiromi ItohOzaku AkinoriAbe KaoruSagara KiyoshiKogure @@ -2166,8 +2166,8 @@ ozaku-etal-2008-relationships - Meghan LammieGlenn - StephanieStrassel + Meghan LammieGlenn + StephanieStrassel LaurenFriedman HaejoongLee ShawnMedero @@ -2187,7 +2187,7 @@ hammarstrom-etal-2008-bootstrapping - SatoshiSato + SatoshiSato SuguruMatsuyoshi YohsukeKondoh Automatic Assessment of <fixed-case>J</fixed-case>apanese Text Readability Based on a Textbook Corpus @@ -2198,9 +2198,9 @@ PaulThompson PhilipCotter - JohnMcNaught + JohnMcNaught SophiaAnaniadou - SimonettaMontemagni + SimonettaMontemagni AndreaTrabucco GiuliaVenturi Building a Bio-Event Annotated Corpus for the Acquisition of Semantic Frames from Biomedical Corpora @@ -2209,9 +2209,9 @@ thompson-etal-2008-building - C.J.Rupp + C.J.Rupp AnnCopestake - PeterCorbett + PeterCorbett PeterMurray-Rust AdvaithSiddharthan SimoneTeufel @@ -2236,7 +2236,7 @@ ValeriaQuochi MonicaMonachini - RiccardoDel Gratta + RiccardoDel Gratta NicolettaCalzolari A lexicon for biology and bioinformatics: the <fixed-case>BOOTS</fixed-case>trep experience. http://www.lrec-conf.org/proceedings/lrec2008/pdf/576_paper.pdf @@ -2294,7 +2294,7 @@ MeniAdler - YaelNetzer + YaelNetzer YoavGoldberg DavidGabay MichaelElhadad @@ -2307,7 +2307,7 @@ JoydeepNath MonojitChoudhury AnimeshMukherjee - ChristianBiemann + ChristianBiemann NiloyGanguly Unsupervised Parts-of-Speech Induction for <fixed-case>B</fixed-case>engali http://www.lrec-conf.org/proceedings/lrec2008/pdf/309_paper.pdf @@ -2315,7 +2315,7 @@ nath-etal-2008-unsupervised - Guadalupe Aguadode Cea + Guadalupe Aguadode Cea JavierPuche José ÁngelRamos Tagging <fixed-case>S</fixed-case>panish Texts: the Problem of Problem of “<fixed-case>SE</fixed-case>” @@ -2339,7 +2339,7 @@ NatalieSchluter - Josefvan Genabith + Josefvan Genabith Treebank-Based Acquisition of <fixed-case>LFG</fixed-case> Parsing Resources for <fixed-case>F</fixed-case>rench http://www.lrec-conf.org/proceedings/lrec2008/pdf/739_paper.pdf Motivated by the expense in time and other resources to produce hand-crafted grammars, there has been increased interest in automatically obtained wide-coverage grammars from treebanks for natural language processing. In particular, recent years have seen the growth in interest in automatically obtained deep resources that can represent information absent from simple CFG-type structured treebanks and which are considered to produce more language-neutral linguistic representations, such as dependency syntactic trees. As is often the case in early pioneering work on natural language processing, English has provided the focus of first efforts towards acquiring deep-grammar resources, followed by successful treatments of, for example, German, Japanese, Chinese and Spanish. However, no comparable large-scale automatically acquired deep-grammar resources have been obtained for French to date. The goal of this paper is to present the application of treebank-based language acquisition to the case of French. We show that with modest changes to the established parsing architectures, encouraging results can be obtained for French, with an overall best dependency structure f-score of 86.73%. @@ -2359,7 +2359,7 @@ GeorgiosPetasis ArisTheodorakos VangelisKarkaletsis - ConstantineSpyropoulos + ConstantineSpyropoulos <fixed-case>BOEMIE</fixed-case> Ontology-Based Text Annotation Tool http://www.lrec-conf.org/proceedings/lrec2008/pdf/324_paper.pdf The huge amount of the available information in the Web creates the need of effective information extraction systems that are able to produce metadata that satisfy user’s information needs. The development of such systems, in the majority of cases, depends on the availability of an appropriately annotated corpus in order to learn extraction models. The production of such corpora can be significantly facilitated by annotation tools that are able to annotate, according to a defined ontology, not only named entities but most importantly relations between them. This paper describes the BOEMIE ontology-based annotation tool which is able to locate blocks of text that correspond to specific types of named entities, fill tables corresponding to ontology concepts with those named entities and link the filled tables based on relations defined in the domain ontology. Additionally, it can perform annotation of blocks of text that refer to the same topic. The tool has a user-friendly interface, supports automatic pre-annotation, annotation comparison as well as customization to other annotation schemata. The annotation tool has been used in a large scale annotation task involving 3,000 web pages regarding athletics. It has also been used in another annotation task involving 503 web pages with medical information, in different languages. @@ -2376,19 +2376,19 @@ PiekVossen - EnekoAgirre + EnekoAgirre NicolettaCalzolari ChristianeFellbaum - Shu-kaiHsieh + Shu-kaiHsieh Chu-RenHuang HitoshiIsahara KyokoKanzaki AndreaMarchetti MonicaMonachini FedericoNeri - RemoRaffaelli - GermanRigau - MaurizioTescon + RemoRaffaelli + GermanRigau + MaurizioTescon JoopVanGent <fixed-case>KYOTO</fixed-case>: a System for Mining, Structuring and Distributing Knowledge across Languages and Cultures http://www.lrec-conf.org/proceedings/lrec2008/pdf/373_paper.pdf @@ -2396,7 +2396,7 @@ vossen-etal-2008-kyoto - UlrichSchäfer + UlrichSchäfer HansUszkoreit ChristianFedermann TorstenMarek @@ -2408,7 +2408,7 @@ AdrianIftene - AlexandraBalahur-Dobrescu + AlexandraBalahur-Dobrescu Named Entity Relation Mining using <fixed-case>W</fixed-case>ikipedia http://www.lrec-conf.org/proceedings/lrec2008/pdf/192_paper.pdf Discovering relations among Named Entities (NEs) from large corpora is both a challenging, as well as useful task in the domain of Natural Language Processing, with applications in Information Retrieval (IR), Summarization (SUM), Question Answering (QA) and Textual Entailment (TE). The work we present resulted from the attempt to solve practical issues we were confronted with while building systems for the tasks of Textual Entailment Recognition and Question Answering, respectively. The approach consists in applying grammar induced extraction patterns on a large corpus - Wikipedia - for the extraction of relations between a given Named Entity and other Named Entities. The results obtained are high in precision, determining a reliable and useful application of the built resource. @@ -2426,7 +2426,7 @@ ZhiyiSong - StephanieStrassel + StephanieStrassel Entity Translation and Alignment in the <fixed-case>ACE</fixed-case>-07 <fixed-case>ET</fixed-case> Task http://www.lrec-conf.org/proceedings/lrec2008/pdf/551_paper.pdf Entities - people, organizations, locations and the like - have long been a central focus of natural language processing technology development, since entities convey essential content in human languages. For multilingual systems, accurate translation of named entities and their descriptors is critical. LDC produced Entity Translation pilot data to support the ACE ET 2007 Evaluation and the current paper delves more deeply into the entity alignment issue across languages, combining the automatic alignment techniques developed for ACE-07 with manual alignment. Altogether 84% of the Chinese-English entity mentions and 74% of the Arabic-English entity mentions are perfect aligned. The results of this investigation offer several important insights. Automatic alignment algorithms predicted that perfect alignment for the ET corpus was likely to be no greater than 55%; perfect alignment on the 15 pilot documents was predicted at 62.5%. Our results suggest the actual perfect alignment rate is substantially higher (82% average, 92% for NAM entities). The careful analysis of alignment errors also suggests strategies for human translation to support the ET task; for instance, translators might be given additional guidance about preferred treatments of name versus nominal translation. These results can also contribute to refined methods of evaluating ET systems. @@ -2434,8 +2434,8 @@ YojiKiyota - NoriyukiTamura - SatoshiSakai + NoriyukiTamura + SatoshiSakai HiroshiNakagawa HidetakaMasuda Automated Subject Induction from Query Keywords through <fixed-case>W</fixed-case>ikipedia Categories and Subject Headings @@ -2445,14 +2445,14 @@ LinusSellberg - ArneJönsson + ArneJönsson Using Random Indexing to improve Singular Value Decomposition for Latent Semantic Analysis http://www.lrec-conf.org/proceedings/lrec2008/pdf/586_paper.pdf In this paper we present results from using Random indexing for Latent Semantic Analysis to handle Singular Value Decomposition tractability issues. In the paper we compare Latent Semantic Analysis, Random Indexing and Latent Semantic Analysis on Random Indexing reduced matrices. Our results show that Latent Semantic Analysis on Random Indexing reduced matrices provide better results on Precision and Recall than Random Indexing only. Furthermore, computation time for Singular Value Decomposition on a Random indexing reduced matrix is almost halved compared to Latent Semantic Analysis. sellberg-jonsson-2008-using - ŠpelaVintar + ŠpelaVintar DarjaFišer Harvesting Multi-Word Expressions from Parallel Corpora http://www.lrec-conf.org/proceedings/lrec2008/pdf/281_paper.pdf @@ -2472,16 +2472,16 @@ DaigaDeksne RaivisSkadiņš - IngunaSkadiņa + IngunaSkadiņa Dictionary of Multiword Expressions for Translation into highly Inflected Languages http://www.lrec-conf.org/proceedings/lrec2008/pdf/353_paper.pdf Treatment of Multiword Expressions (MWEs) is one of the most complicated issues in natural language processing, especially in Machine Translation (MT). The paper presents dictionary of MWEs for a English-Latvian MT system, demonstrating a way how MWEs could be handled for inflected languages with rich morphology and rather free word order. The proposed dictionary of MWEs consists of two constituents: a lexicon of phrases and a set of MWE rules. The lexicon of phrases is rather similar to translation lexicon of the MT system, while MWE rules describe syntactic structure of the source and target sentence allowing correct transformation of different MWE types into the target language and ensuring correct syntactic structure. The paper demonstrates this approach on different MWE types, starting from simple syntactic structures, followed by more complicated cases and including fully idiomatic expressions. Automatic evaluation shows that the described approach increases the quality of translation by 0.6 BLEU points. deksne-etal-2008-dictionary - GrazynaVetulani + GrazynaVetulani ZygmuntVetulani - TomaszObrębski + TomaszObrębski Verb-Noun Collocation <fixed-case>S</fixed-case>ynt<fixed-case>L</fixed-case>ex Dictionary: Corpus-Based Approach http://www.lrec-conf.org/proceedings/lrec2008/pdf/398_paper.pdf The project presented here is a part of a long term research program aiming at a full lexicon grammar for Polish (SyntLex). The main concern of this project is computer-assisted acquisition and morpho-syntactic description of verb-noun collocations in Polish. We present methodology and resources obtained in three main project phases which are: dictionary-based acquisition of collocation lexicon, feasibility study for corpus-based lexicon enlargement phase, corpus-based lexicon enlargement and collocation description. In this paper we focus on the results of the third phase. The presented here corpus-based approach permitted us to triple the size the verb-noun collocation dictionary for Polish. In the paper we describe the SyntLex Dictionary of Collocations and announce some future research intended to be a separate project continuation. @@ -2497,8 +2497,8 @@ qu-etal-2008-targeting - Margarita AlonsoRamos - OwenRambow + Margarita AlonsoRamos + OwenRambow LeoWanner Using Semantically Annotated Corpora to Build Collocation Resources http://www.lrec-conf.org/proceedings/lrec2008/pdf/294_paper.pdf @@ -2506,10 +2506,10 @@ ramos-etal-2008-using - Katia LidaKermanidis + Katia LidaKermanidis AristomenisThanopoulos ManolisMaragoudakis - NikosFakotakis + NikosFakotakis <fixed-case>E</fixed-case>ksairesis: A Domain-Adaptable System for Ontology Building from Unstructured Text http://www.lrec-conf.org/proceedings/lrec2008/pdf/147_paper.pdf This paper describes Eksairesis, a system for learning economic domain knowledge automatically from Modern Greek text. The knowledge is in the form of economic terms and the semantic relations that govern them. The entire process in based on the use of minimal language-dependent tools, no external linguistic resources, and merely free, unstructured text. The methodology is thereby easily portable to other domains and other languages. The text is pre-processed with basic morphological annotation, and semantic (named and other) entities are identified using supervised learning techniques. Statistical filtering, i.e. corpora comparison is used to extract domain terms and supervised learning is again employed to detect the semantic relations between pairs of terms. Advanced classification schemata, ensemble learning, and one-sided sampling, are experimented with in order to deal with the noise in the data, which is unavoidable due to the low pre-processing level and the lack of sophisticated resources. An average 68.5% f-score over all the classes is achieved when learning semantic relations. Bearing in mind the use of minimal resources and the highly automated nature of the process, classification performance is very promising, compared to results reported in previous work. @@ -2533,9 +2533,9 @@ buitelaar-eigner-2008-ontology - CássiaTrojahn + CássiaTrojahn PauloQuaresma - RenataVieira + RenataVieira A Framework for Multilingual Ontology Mapping http://www.lrec-conf.org/proceedings/lrec2008/pdf/270_paper.pdf In the field of ontology mapping, multilingual ontology mapping is an issue that is not well explored. This paper proposes a framework for mapping of multilingual Description Logics (DL) ontologies. First, the DL source ontology is translated to the target ontology language, using a lexical database or a dictionary, generating a DL translated ontology. The target and the translated ontologies are then used as input for the mapping process. The mappings are computed by specialized agents using different mapping approaches. Next, these agents use argumentation to exchange their local results, in order to agree on the obtained mappings. Based on their preferences and confidence of the arguments, the agents compute their preferred mapping sets. The arguments in such preferred sets are viewed as the set of globally acceptable arguments. A DL mapping ontology is generated as result of the mapping process. In this paper we focus on the process of generating the DL translated ontology. @@ -2543,7 +2543,7 @@ LauraKassner - ViviNastase + ViviNastase MichaelStrube Acquiring a Taxonomy from the <fixed-case>G</fixed-case>erman <fixed-case>W</fixed-case>ikipedia http://www.lrec-conf.org/proceedings/lrec2008/pdf/544_paper.pdf @@ -2552,7 +2552,7 @@ DavidePicca - Alfio MassimilianoGliozzo + Alfio MassimilianoGliozzo AldoGangemi <fixed-case>LMM</fixed-case>: an <fixed-case>OWL</fixed-case>-<fixed-case>DL</fixed-case> <fixed-case>M</fixed-case>eta<fixed-case>M</fixed-case>odel to Represent Heterogeneous Lexical Knowledge http://www.lrec-conf.org/proceedings/lrec2008/pdf/608_paper.pdf @@ -2617,11 +2617,11 @@ jouis-bourdaillet-2008-representation - Siaw-FongChung - LaurentPrévot + Siaw-FongChung + LaurentPrévot MingweiXu KathleenAhrens - Shu-KaiHsieh + Shu-KaiHsieh Chu-RenHuang Extracting Concrete Senses of Lexicon through Measurement of Conceptual Similarity in Ontologies http://www.lrec-conf.org/proceedings/lrec2008/pdf/501_paper.pdf @@ -2655,7 +2655,7 @@ vivaldi-etal-2008-turning - PeterAnick + PeterAnick VijayMurthi ShajiSebastian Similar Term Discovery using Web Search @@ -2674,16 +2674,16 @@ ZiqiZhang - JoseIria + JoseIria ChristopherBrewster - FabioCiravegna + FabioCiravegna A Comparative Evaluation of Term Recognition Algorithms http://www.lrec-conf.org/proceedings/lrec2008/pdf/538_paper.pdf Automatic Term recognition (ATR) is a fundamental processing step preceding more complex tasks such as semantic search and ontology learning. From a large number of methodologies available in the literature only a few are able to handle both single and multi-word terms. In this paper we present a comparison of five such algorithms and propose a combined approach us¬ing a voting mechanism. We evaluated the six approaches using two different corpora and show how the voting algo¬rithm performs best on one corpus (a collection of texts from Wikipedia) and less well using the Genia corpus (a standard life science corpus). This indicates that choice and design of corpus has a major impact on the evaluation of term recog¬nition algorithms. Our experiments also showed that single-word terms can be equally important and occupy a fairly large proportion in certain domains. As a result, algorithms that ignore single-word terms may cause problems to tasks built on top of ATR. Effective ATR systems also need to take into account both the unstructured text and the structured aspects and this means information extraction techniques need to be integrated into the term recognition process. zhang-etal-2008-comparative - VeroniqueHoste + VeroniqueHoste ElsLefever KlaarVanopstal IsabelleDelaere @@ -2694,10 +2694,10 @@ EliPociello - AnttonGurrutxaga - EnekoAgirre - IzaskunAldezabal - GermanRigau + AnttonGurrutxaga + EnekoAgirre + IzaskunAldezabal + GermanRigau <fixed-case>WNTERM</fixed-case>: Enriching the <fixed-case>MCR</fixed-case> with a Terminological Dictionary http://www.lrec-conf.org/proceedings/lrec2008/pdf/451_paper.pdf In this paper we describe the methodology and the first steps for the creation of WNTERM (from WordNet and Terminology), a specialized lexicon produced from the merger of the EuroWordNet-based Multilingual Central Repository (MCR) and the Basic Encyclopaedic Dictionary of Science and Technology (BDST). As an example, the ecology domain has been used. The final result is a multilingual (Basque and English) light-weight domain ontology, including taxonomic and other semantic relations among its concepts, which is tightly connected to other wordnets. @@ -2715,7 +2715,7 @@ ThomasMandl FredricGey - GiorgioDi Nunzio + GiorgioDi Nunzio NicolaFerro MarkSanderson DianaSantos @@ -2727,7 +2727,7 @@ JorgeCivera - AlfonsJuan-Císcar + AlfonsJuan-Císcar Bilingual Text Classification using the <fixed-case>IBM</fixed-case> 1 Translation Model http://www.lrec-conf.org/proceedings/lrec2008/pdf/22_paper.pdf Manual categorisation of documents is a time-consuming task that has been significantly alleviated with the deployment of automatic and machine-aided text categorisation systems. However, the proliferation of multilingual documentation has become a common phenomenon in many international organisations, while most of the current systems have focused on the categorisation of monolingual text. It has been recently shown that the inherent redundancy in bilingual documents can be effectively exploited by relatively simple, bilingual naive Bayes (multinomial) models. In this work, we present a refined version of these models in which this redundancy is explicitly captured by a combination of a unigram (multinomial) model and the well-known IBM 1 translation model. The proposed model is evaluated on two bilingual classification tasks and compared to previous work. @@ -2750,9 +2750,9 @@ shinnou-sasaki-2008-spectral - DanicaDamljanovic + DanicaDamljanovic ValentinTablan - KalinaBontcheva + KalinaBontcheva A Text-based Query Interface to <fixed-case>OWL</fixed-case> Ontologies http://www.lrec-conf.org/proceedings/lrec2008/pdf/64_paper.pdf Accessing structured data in the form of ontologies requires training and learning formal query languages (e.g., SeRQL or SPARQL) which poses significant difficulties for non-expert users. One of the ways to lower the learning overhead and make ontology queries more straightforward is through a Natural Language Interface (NLI). While there are existing NLIs to structured data with reasonable performance, they tend to require expensive customisation to each new domain or ontology. Additionally, they often require specific adherence to a pre-defined syntax which, in turn, means that users still have to undergo training. In this paper we present Question-based Interface to Ontologies (QuestIO) - a tool for querying ontologies using unconstrained language-based queries. QuestIO has a very simple interface, requires no user training and can be easily embedded in any system or used with any ontology or knowledge base without prior customisation. @@ -2760,7 +2760,7 @@ HanRen - DonghongJi + DonghongJi LeiHan A Research on Automatic <fixed-case>C</fixed-case>hinese Catchword Extraction http://www.lrec-conf.org/proceedings/lrec2008/pdf/96_paper.pdf @@ -2788,7 +2788,7 @@ MichaelWiegand - Jochen L.Leidner + Jochen L.Leidner DietrichKlakow Cost-Sensitive Learning in Answer Extraction http://www.lrec-conf.org/proceedings/lrec2008/pdf/177_paper.pdf @@ -2806,7 +2806,7 @@ FrancescaFallucchi - Fabio MassimoZanzotto + Fabio MassimoZanzotto Yet another Platform for Extracting Knowledge from Corpora http://www.lrec-conf.org/proceedings/lrec2008/pdf/284_paper.pdf The research field of “extracting knowledge bases from text collections” seems to be mature: its target and its working hypotheses are clear. In this paper we propose a platform, YAPEK, i.e., Yet Another Platform for Extracting Knowledge from corpora, that wants to be the base to collect the majority of algorithms for extracting knowledge bases from corpora. The idea is that, when many knowledge extraction algorithms are collected under the same platform, relative comparisons are clearer and many algorithms can be leveraged to extract more valuable knowledge for final tasks such as Textual Entailment Recognition. As we want to collect many knowledge extraction algorithms, YAPEK is based on the three working hypotheses of the area: the basic hypothesis, the distributional hypothesis, and the point-wise assertion patterns. In YAPEK, these three hypotheses define two spaces: the space of the target textual forms and the space of the contexts. This platform guarantees the possibility of rapidly implementing many models for extracting knowledge from corpora as the platform gives clear entry points to model what is really different in the different algorithms: the feature spaces, the distances in these spaces, and the actual algorithm. @@ -2815,7 +2815,7 @@ MilenaYankova HoracioSaggion - HamishCunningham + HamishCunningham A Framework for Identity Resolution and Merging for Multi-source Information Extraction http://www.lrec-conf.org/proceedings/lrec2008/pdf/347_paper.pdf In the context of ontology-based information extraction, identity resolution is the process of deciding whether an instance extracted from text refers to a known entity in the target domain (e.g. the ontology). We present an ontology-based framework for identity resolution which can be customized to different application domains and extraction tasks. Rules for identify resolution, which compute similarities between target and source entities based on class information and instance properties and values, can be defined for each class in the ontology. We present a case study of the application of the framework to the problem of multi-source job vacancy extraction @@ -2831,7 +2831,7 @@ karlgren-etal-2008-experiments - FideliaIbekwe-SanJuan + FideliaIbekwe-SanJuan ChaomeiChen RobertoPinho Identifying Strategic Information from Scientific Articles through Sentence Classification @@ -2841,7 +2841,7 @@ SusanaAzeredo - SilviaMoraes + SilviaMoraes VeraLima Keywords, k-<fixed-case>NN</fixed-case> and Neural Networks: a Support for Hierarchical Categorization of Texts in <fixed-case>B</fixed-case>razilian <fixed-case>P</fixed-case>ortuguese http://www.lrec-conf.org/proceedings/lrec2008/pdf/402_paper.pdf @@ -2868,16 +2868,16 @@ yamamoto-etal-2008-extraction - RuneSætre + RuneSætre BrianKemper KanaeOda - NaoakiOkazaki + NaoakiOkazaki YukikoMatsuoka NorihiroKikuchi HiroakiKitano YoshimasaTsuruoka SophiaAnaniadou - Jun’ichiTsujii + Jun’ichiTsujii Connecting Text Mining and Pathways using the <fixed-case>P</fixed-case>ath<fixed-case>T</fixed-case>ext Resource http://www.lrec-conf.org/proceedings/lrec2008/pdf/442_paper.pdf Many systems have been developed in the past few years to assist researchers in the discovery of knowledge published as English text, for example in the PubMed database. At the same time, higher level collective knowledge is often published using a graphical notation representing all the entities in a pathway and their interactions. We believe that these pathway visualizations could serve as an effective user interface for knowledge discovery if they can be linked to the text in publications. Since the graphical elements in a Pathway are of a very different nature than their corresponding descriptions in English text, we developed a prototype system called PathText. The goal of PathText is to serve as a bridge between these two different representations. In this paper, we first describe the overall architecture and the interfaces of the PathText system, and then provide some details about the core Text Mining components. @@ -2885,7 +2885,7 @@ JanPomikálek - PavelRychlý + PavelRychlý Detecting Co-Derivative Documents in Large Text Collections http://www.lrec-conf.org/proceedings/lrec2008/pdf/481_paper.pdf We have analyzed the SPEX algorithm by Bernstein and Zobel (2004) for detecting co-derivative documents using duplicate n-grams. Although we totally agree with the claim that not using unique n-grams can greatly increase the efficiency and scalability of the process of detecting co-derivative documents, we have found serious bottlenecks in the way SPEX finds the duplicate n-grams. While the memory requirements for computing co-derivative documents can be reduced to up to 1% by only using duplicate n-grams, SPEX needs about 40 times more memory for computing the list of duplicate n-grams itself. Therefore the memory requirements of the whole process are not reduced enough to make the algorithm practical for very large collections. We propose a solution for this problem using an external sort with the suffix array in-memory sorting and temporary file compression. The proposed algorithm for computing duplicate n-grams uses a fixed amount of memory for any input size. @@ -2903,7 +2903,7 @@ PengZhang WenjieLi FuruWei - QinLu + QinLu YuexianHou Exploiting the Role of Position Feature in <fixed-case>C</fixed-case>hinese Relation Extraction http://www.lrec-conf.org/proceedings/lrec2008/pdf/540_paper.pdf @@ -2912,15 +2912,15 @@ BenAllison - LouiseGuthrie + LouiseGuthrie Authorship Attribution of <fixed-case>E</fixed-case>-Mail: Comparing Classifiers over a New Corpus for Evaluation http://www.lrec-conf.org/proceedings/lrec2008/pdf/552_paper.pdf The release of the Enron corpus provided a unique resource for studying aspects of email use, because it is largely unfiltered, and therefore presents a relatively complete collection of emails for a reasonably large number of correspondents. This paper describes a newly created subcorpus of the Enron emails which we suggest can be used to test techniqes for authorship attribution, and further shows the application of three different classification methods to this task to present baseline results. Two of the classifiers used are are standard, and have been shown to perform well in the literature, and one of the classifiers is novel and based on concurrent work that proposes a Bayesian hierarchical distribution for word counts in documents. For each of the classifiers, we present results using six text representations, including use of linguistic structures derived from a parser as well as lexical information. allison-guthrie-2008-authorship - MichaelKaisser - JohnLowe + MichaelKaisser + JohnLowe Creating a Research Collection of Question Answer Sentence Pairs with <fixed-case>A</fixed-case>mazon’s <fixed-case>M</fixed-case>echanical <fixed-case>T</fixed-case>urk http://www.lrec-conf.org/proceedings/lrec2008/pdf/565_paper.pdf Each year NIST releases a set of question, document id, answer-triples for the factoid questions used in the TREC Question Answering track. While this resource is widely used and proved itself useful for many purposes, it also is too coarse a grain-size for a lot of other purposes. In this paper we describe how we have used Amazon’s Mechanical Turk to have multiple subjects read the documents and identify the sentences themselves which contain the answer. For most of the 1911 questions in the test sets from 2002 to 2006 and each of the documents said to contain an answer, the Question-Answer Sentence Pairs (QASP) corpus introduced in this paper contains the identified answer sentences. We believe that this corpus, which we will make available to the public, can further stimulate research in QA, especially linguistically motivated research, where matching the question to the answer sentence by either syntactic or semantic means is a central concern. @@ -2966,7 +2966,7 @@ LeiXia - JoséIria + JoséIria An Approach to Modeling Heterogeneous Resources for Information Extraction http://www.lrec-conf.org/proceedings/lrec2008/pdf/702_paper.pdf In this paper, we describe an approach that aims to model heterogeneous resources for information extraction. Document is modeled in graph representation that enables better understanding of multi-media document and its structure which ultimately could result better cross-media information extraction. We also describe our proposed algorithm that segment document-based on the document modeling approach we described in this paper. @@ -2982,7 +2982,7 @@ LorraineGoeuriot NataliaGrabar - BéatriceDaille + BéatriceDaille Characterization of Scientific and Popular Science Discourse in <fixed-case>F</fixed-case>rench, <fixed-case>J</fixed-case>apanese and <fixed-case>R</fixed-case>ussian http://www.lrec-conf.org/proceedings/lrec2008/pdf/743_paper.pdf We aim to characterize the comparability of corpora, we address this issue in the trilingual context through the distinction of expert and non expert documents. We work separately with corpora composed of documents from the medical domain in three languages (French, Japanese and Russian) which present an important linguistic distance between them. In our approach, documents are characterized in each language by their topic and by a discursive typology positioned at three levels of document analysis: structural, modal and lexical. The document typology is implemented with two learning algorithms (SVMlight and C4.5). Evaluation of results shows that the proposed discursive typology can be transposed from one language to another, as it indeed allows to distinguish the two aimed discourses (science and popular science). However, we observe that performances vary a lot according to languages, algorithms and types of discursive characteristics. @@ -2999,7 +2999,7 @@ NasserAbouzakhar BenAllison - LouiseGuthrie + LouiseGuthrie Unsupervised Learning-based Anomalous <fixed-case>A</fixed-case>rabic Text Detection http://www.lrec-conf.org/proceedings/lrec2008/pdf/83_paper.pdf The growing dependence of modern society on the Web as a vital source of information and communication has become inevitable. However, the Web has become an ideal channel for various terrorist organisations to publish their misleading information and send unintelligible messages to communicate with their clients as well. The increase in the number of published anomalous misleading information on the Web has led to an increase in security threats. The existing Web security mechanisms and protocols are not appropriately designed to deal with such recently developed problems. Developing technology to detect anomalous textual information has become one of the major challenges within the NLP community. This paper introduces the problem of anomalous text detection by automatically extracting linguistic features from documents and evaluating those features for patterns of suspicious and/or inconsistent information in Arabic documents. In order to achieve that, we defined specific linguistic features that characterise various Arabic writing styles. Also, the paper introduces the main challenges in Arabic processing and describes the proposed unsupervised learning model for detecting anomalous Arabic textual information. @@ -3052,7 +3052,7 @@ MatthieuHermet AlainDésilets - StanSzpakowicz + StanSzpakowicz Using the Web as a Linguistic Resource to Automatically Correct Lexico-Syntactic Errors http://www.lrec-conf.org/proceedings/lrec2008/pdf/220_paper.pdf This paper presents an algorithm for correcting language errors typical of second-language learners. We focus on preposition errors, which are very common among second-language learners but are not addressed well by current commercial grammar correctors and editing aids. The algorithm takes as input a sentence containing a preposition error (and possibly other errors as well), and outputs the correct preposition for that particular sentence context. We use a two-phase hybrid rule-based and statistical approach. In the first phase, rule-based processing is used to generate a short expression that captures the context of use of the preposition in the input sentence. In the second phase, Web searches are used to evaluate the frequency of this expression, when alternative prepositions are used instead of the original one. We tested this algorithm on a corpus of 133 French sentences written by intermediate second-language learners, and found that it could address 69.9% of those cases. In contrast, we found that the best French grammar and spell checker currently on the market, Antidote, addressed only 3% of those cases. We also showed that performance degrades gracefully when using a corpus of frequent n-grams to evaluate frequencies. @@ -3081,20 +3081,20 @@ quixal-etal-2008-user - WeiLiu + WeiLiu BenAllison - LouiseGuthrie + LouiseGuthrie Professor or Screaming Beast? Detecting Anomalous Words in <fixed-case>C</fixed-case>hinese http://www.lrec-conf.org/proceedings/lrec2008/pdf/37_paper.pdf The Internet has become the most popular platform for communication. However because most of the modern computer keyboard is Latin-based, Asian languages such as Chinese cannot input its characters (Hanzi) directly with these keyboards. As a result, methods for representing Chinese characters using Latin alphabets were introduced. The most popular method among these is the Pinyin input system. Pinyin is also called “Romanised” Chinese in that it phonetically resembles a Chinese character. Due to the highly ambiguous mapping from Pinyin to Chinese characters, word misuses can occur using standard computer keyboard, and more commonly so in internet chat-rooms or instant messengers where the language used is less formal. In this paper we aim to develop a system that can automatically identify such anomalies, whether they are simple typos or whether they are intentional. After identifying them, the system should suggest the correct word to be used. liu-etal-2008-professor - IñakiAlegria + IñakiAlegria KlaraCeberio - NereaEzeiza - AitorSoroa - GregorioHernandez + NereaEzeiza + AitorSoroa + GregorioHernandez Spelling Correction: from Two-Level Morphology to Open Source http://www.lrec-conf.org/proceedings/lrec2008/pdf/274_paper.pdf Basque is a highly inflected and agglutinative language (Alegria et al., 1996). Two-level morphology has been applied successfully to this kind of languages and there are two-level based descriptions for very different languages. After doing the morphological description for a language, it is easy to develop a spelling checker/corrector for this language. However, what happens if we want to use the speller in the “free world” (OpenOffice, Mozilla, emacs, LaTeX, etc.)? Ispell and similar tools (aspell, hunspell, myspell) are the usual mechanisms for these purposes, but they do not fit the two-level model. In the absence of two-level morphology based mechanisms, an automatic conversion from two-level description to hunspell is described in this paper. @@ -3110,11 +3110,11 @@ YannickVersley - SimonePonzetto - MassimoPoesio + SimonePonzetto + MassimoPoesio VladimirEidelman AlanJern - JasonSmith + JasonSmith XiaofengYang AlessandroMoschitti <fixed-case>BART</fixed-case>: A modular toolkit for coreference resolution @@ -3123,8 +3123,8 @@ versley-etal-2008-bart-modular - MassimoPoesio - UdoKruschwitz + MassimoPoesio + UdoKruschwitz JonChamberlain <fixed-case>ANAWIKI</fixed-case>: Creating Anaphorically Annotated Resources through Web Cooperation http://www.lrec-conf.org/proceedings/lrec2008/pdf/590_paper.pdf @@ -3181,7 +3181,7 @@ MichaelaAtterer - HinrichSchütze + HinrichSchütze An Inverted Index for Storing and Retrieving Grammatical Dependencies http://www.lrec-conf.org/proceedings/lrec2008/pdf/23_paper.pdf Web count statistics gathered from search engines have been widely used as a resource in a variety of NLP tasks. For some tasks, however, the information they exploit is not fine-grained enough. We propose an inverted index over grammatical relations as a fast and reliable resource to access more general and also more detailed frequency information. To build the index, we use a dependency parser to parse a large corpus. We extract binary dependency relations, such as he-subj-say (“he” is the subject of “say”) as index terms and construct the index using publicly available open-source indexing software. The unit we index over is the sentence. The index can be used to extract grammatical relations and frequency counts for these relations. The framework also provides the possibility to search for partial dependencies (say, the frequency of “he” occurring in subject position), words, strings and a combination of these. One possible application is the disambiguation of syntactic structures. @@ -3222,7 +3222,7 @@ saito-etal-2008-japanese - Maria TeresaPazienza + Maria TeresaPazienza ArmandoStellato AlexandraTudorache <fixed-case>JMWNL</fixed-case>: an Extensible Multilingual Library for Accessing Wordnets in Different Languages @@ -3238,7 +3238,7 @@ maynard-2008-benchmarking - LiviuDinu + LiviuDinu MariusPopescu AncaDinu Authorship Identification of <fixed-case>R</fixed-case>omanian Texts with Controversial Paternity @@ -3273,7 +3273,7 @@ santaholma-chatzichrisafis-2008-knowledge - MichaelRosner + MichaelRosner <fixed-case>ODL</fixed-case>: an Object Description Language for Lexical Information http://www.lrec-conf.org/proceedings/lrec2008/pdf/871_paper.pdf This paper describes ODL, a description language for lexical information that is being developed within the context of a national project called MLRS (Maltese Language Resource Server) whose goal is to create a national corpus and computational lexicon for the Maltese language. The main aim of ODL is to make the task of the lexicographer easier by allowing lexical specifications to be set out formally so that actual entries will conform to them. The paper describes some of the background motivation, the ODL language itself, and concludes with a short example of how lexical values expressed in ODL can be mapped to an existing tagset together with some speculations about future work. @@ -3281,7 +3281,7 @@ DanCristea - CorinaForăscu + CorinaForăscu MariusRăschip MichaelZock How to Evaluate and Raise the Quality in a Collaborative Lexicographic Approach @@ -3290,7 +3290,7 @@ cristea-etal-2008-evaluate - Bolette SandfordPedersen + Bolette SandfordPedersen AnnaBraasch LinaHenriksen SussiOlsen @@ -3309,8 +3309,8 @@ MíriamLuján - Carlos D.Martínez - VicentAlabau + Carlos D.Martínez + VicentAlabau Evaluation of several Maximum Likelihood Linear Regression Variants for Language Adaptation http://www.lrec-conf.org/proceedings/lrec2008/pdf/217_paper.pdf Multilingual Automatic Speech Recognition (ASR) systems are of great interest in multilingual environments. We studied the case of the Comunitat Valenciana where the two official languages are Spanish and Valencian. These two languages share most of their phonemes, and their syntax and vocabulary are also quite similar since they have influenced each other for many years. We constructed a system, and trained its acoustic models with a small corpus of Spanish and Valencian, which has produced poor results due to the lack of data. Adaptation techniques can be used to adapt acoustic models that are trained with a large corpus of a language inr order to obtain acoustic models for a phonetically similar language. This process is known as language adaptation. The Maximum Likelihood Linear Regression (MLLR) technique has commonly been used in speaker adaptation; however we have used MLLR in language adaptation. We compared several MLLR variants (mean square, diagonal matrix and full matrix) for language adaptation in order to choose the best alternative for our system. @@ -3318,7 +3318,7 @@ LaurianneSitbon - PatriceBellot + PatriceBellot PhilippeBlache Evaluation of Lexical Resources and Semantic Networks on a Corpus of Mental Associations http://www.lrec-conf.org/proceedings/lrec2008/pdf/246_paper.pdf @@ -3344,11 +3344,11 @@ Quang ThắngĐinh - Hồng Phương - Thị Minh HuyềnNguyễn - Cẩm TúNguyễn + Hồng Phương + Thị Minh HuyềnNguyễn + Cẩm TúNguyễn MathiasRossignol - Xuân Lương + Xuân Lương Word Segmentation of <fixed-case>V</fixed-case>ietnamese Texts: a Comparison of Approaches http://www.lrec-conf.org/proceedings/lrec2008/pdf/493_paper.pdf We present in this paper a comparison between three segmentation systems for the Vietnamese language. Indeed, the majority of Vietnamese words is built by semantic composition from about 7,000 syllables, which also have a meaning as isolated words. So the identification of word boundaries in a text is not a simple task, and ambiguities often appear. Beyond the presentation of the tested systems, we also propose a standard definition for word segmentation in Vietnamese, and introduce a reference corpus developed for the purpose of evaluating such a task. The results observed confirm that it can be relatively well treated by automatic means, although a solution needs to be found to take into account out-of-vocabulary words. @@ -3356,12 +3356,12 @@ CristinaBosco - AlessandroMazzei + AlessandroMazzei VincenzoLombardo GiuseppeAttardi AnnaCorazza - AlbertoLavelli - LeonardoLesmo + AlbertoLavelli + LeonardoLesmo GiorgioSatta MariaSimi Comparing <fixed-case>I</fixed-case>talian parsers on a common Treebank: the <fixed-case>EVALITA</fixed-case> experience @@ -3370,25 +3370,25 @@ bosco-etal-2008-comparing - BernardoMagnini - AmedeoCappelli - FabioTamburini + BernardoMagnini + AmedeoCappelli + FabioTamburini CristinaBosco - AlessandroMazzei + AlessandroMazzei VincenzoLombardo FrancescaBertagna NicolettaCalzolari AntonioToral - ValentinaBartalesi Lenzi - RacheleSprugnoli - ManuelaSperanza + ValentinaBartalesi Lenzi + RacheleSprugnoli + ManuelaSperanza Evaluation of Natural Language Tools for <fixed-case>I</fixed-case>talian: <fixed-case>EVALITA</fixed-case> 2007 http://www.lrec-conf.org/proceedings/lrec2008/pdf/630_paper.pdf EVALITA 2007, the first edition of the initiative devoted to the evaluation of Natural Language Processing tools for Italian, provided a shared framework where participants’ systems had the possibility to be evaluated on five different tasks, namely Part of Speech Tagging (organised by the University of Bologna), Parsing (organised by the University of Torino), Word Sense Disambiguation (organised by CNR-ILC, Pisa), Temporal Expression Recognition and Normalization (organised by CELCT, Trento), and Named Entity Recognition (organised by FBK, Trento). We believe that the diffusion of shared tasks and shared evaluation practices is a crucial step towards the development of resources and tools for Natural Language Processing. Experiences of this kind, in fact, are a valuable contribution to the validation of existing models and data, allowing for consistent comparisons among approaches and among representation schemes. The good response obtained by EVALITA, both in the number of participants and in the quality of results, showed that pursuing such goals is feasible not only for English, but also for other languages. magnini-etal-2008-evaluation - Maria TeresaPazienza + Maria TeresaPazienza ArmandoStellato AlexandraTudorache A Bottom-up Comparative Study of <fixed-case>E</fixed-case>uro<fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et and <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et 3.0 Lexical and Semantic Relations @@ -3408,14 +3408,14 @@ VáclavNovák - KeithHall + KeithHall Inter-sentential Coreferences in Semantic Networks: An Evaluation of Manual Annotation http://www.lrec-conf.org/proceedings/lrec2008/pdf/695_paper.pdf We present an evaluation of inter-sentential coreference annotation in the context of manually created semantic networks. The semantic networks are constructed independently be each annotator and require an entity mapping priori to evaluating the coreference. We introduce a model used for mapping the semantic entities as well as an algorithm used for our evaluation task. Finally, we report the raw statistics for inter-annotator agreement and describe the inherent difficulty in evaluating coreference in semantic networks. novak-hall-2008-inter - MohamedMaamouri + MohamedMaamouri SethKulick AnnBies Diacritic Annotation in the <fixed-case>A</fixed-case>rabic Treebank and its Impact on Parser Evaluation @@ -3424,7 +3424,7 @@ maamouri-etal-2008-diacritic - ChantalEnguehard + ChantalEnguehard HarounaNaroua Evaluation of Virtual Keyboards for <fixed-case>W</fixed-case>est-<fixed-case>A</fixed-case>frican Languages http://www.lrec-conf.org/proceedings/lrec2008/pdf/710_paper.pdf @@ -3432,10 +3432,10 @@ enguehard-naroua-2008-evaluation - ConstantinOrăsan + ConstantinOrăsan DanCristea - RuslanMitkov - AntónioBranco + RuslanMitkov + AntónioBranco Anaphora Resolution Exercise: an Overview http://www.lrec-conf.org/proceedings/lrec2008/pdf/713_paper.pdf Evaluation campaigns have become an established way to evaluate automatic systems which tackle the same task. This paper presents the first edition of the Anaphora Resolution Exercise (ARE) and the lessons learnt from it. This first edition focused only on English pronominal anaphora and NP coreference, and was organised as an exploratory exercise where various issues were investigated. ARE proposed four different tasks: pronominal anaphora resolution and NP coreference resolution on a predefined set of entities, pronominal anaphora resolution and NP coreference resolution on raw texts. For each of these tasks different inputs and evaluation metrics were prepared. This paper presents the four tasks, their input data and evaluation metrics used. Even though a large number of researchers in the field expressed their interest to participate, only three institutions took part in the formal evaluation. The paper briefly presents their results, but does not try to interpret them because in this edition of ARE our aim was not about finding why certain methods are better, but to prepare the ground for a fully-fledged edition. @@ -3450,10 +3450,10 @@ santos-simoes-2008-portuguese - KarinSchuler + KarinSchuler VinodKaggal JamesMasanz - PhilipOgren + PhilipOgren GuerganaSavova System Evaluation on a Named Entity Corpus from Clinical Notes http://www.lrec-conf.org/proceedings/lrec2008/pdf/764_paper.pdf @@ -3461,16 +3461,16 @@ schuler-etal-2008-system - PhilipOgren + PhilipOgren GuerganaSavova - ChristopherChute + ChristopherChute Constructing Evaluation Corpora for Automated Clinical Named Entity Recognition http://www.lrec-conf.org/proceedings/lrec2008/pdf/796_paper.pdf We report on the construction of a gold-standard dataset consisting of annotated clinical notes suitable for evaluating our biomedical named entity recognition system. The dataset is the result of consensus between four human annotators and contains 1,556 annotations on 160 clinical notes using 658 unique concept codes from SNOMED-CT corresponding to human disorders. Inter-annotator agreement was calculated on annotations from 100 of the documents for span (90.9%), concept code (81.7%), context (84.8%), and status (86.0%) agreement. Complete agreement for span, concept code, context, and status was 74.6%. We found that creating a consensus set based on annotations from two independently-created annotation sets can reduce inter-annotator disagreement by 32.3%. We found little benefit to pre-annotating the corpus with a third-party named entity recognizer. ogren-etal-2008-constructing - EricRingger + EricRingger MarcCarmen RobbieHaertel KevinSeppi @@ -3485,14 +3485,14 @@ AlexandreAllauzen - HélèneBonneau-Maynard + HélèneBonneau-Maynard Training and Evaluation of <fixed-case>POS</fixed-case> Taggers on the <fixed-case>F</fixed-case>rench <fixed-case>MULTITAG</fixed-case> Corpus http://www.lrec-conf.org/proceedings/lrec2008/pdf/856_paper.pdf The explicit introduction of morphosyntactic information into statistical machine translation approaches is receiving an important focus of attention. The current freely available Part of Speech (POS) taggers for the French language are based on a limited tagset which does not account for some flectional particularities. Moreover, there is a lack of a unified framework of training and evaluation for these kinds of linguistic resources. Therefore in this paper, three standard POS taggers (Treetagger, Brill’s tagger and the standard HMM POS tagger) are trained and evaluated in the same conditions on the French MULTITAG corpus. This POS-tagged corpus provides a tagset richer than the usual ones, including gender and number distinctions, for example. Experimental results show significant differences of performance between the taggers. According to the tagging accuracy estimated with a tagset of 300 items, taggers may be ranked as follows: Treetagger (95.7%), Brill’s tagger (94.6%), HMM tagger (93.4%). Examples of translation outputs illustrate how considering gender and number distinctions in the POS tagset can be relevant. allauzen-bonneau-maynard-2008-training - MarcoBaroni + MarcoBaroni FrancisChantree AdamKilgarriff SergeSharoff @@ -3503,7 +3503,7 @@ MarkArehart - Keith J.Miller + Keith J.Miller A Ground Truth Dataset for Matching Culturally Diverse <fixed-case>R</fixed-case>omanized Person Names http://www.lrec-conf.org/proceedings/lrec2008/pdf/291_paper.pdf This paper describes the development of a ground truth dataset of culturally diverse Romanized names in which approximately 70,000 names are matched against a subset of 700. We ran the subset as queries against the complete list using several matchers, created adjudication pools, adjudicated the results, and compiled two versions of ground truth based on different sets of adjudication guidelines and methods for resolving adjudicator conflicts. The name list, drawn from publicly available sources, was manually seeded with over 1500 name variants. These names include transliteration variation, database fielding errors, segmentation differences, incomplete names, titles, initials, abbreviations, nicknames, typos, OCR errors, and truncated data. These diverse types of matches, along with the coincidental name similarities already in the list, make possible a comprehensive evaluation of name matching systems. We have used the dataset to evaluate several open source and commercial algorithms and provide some of those results. @@ -3540,7 +3540,7 @@ TetsuyaTakiguchi SatoshiTamura ShingoKuroiwa - KazuyaTakeda + KazuyaTakeda SatoshiNakamura Evaluation Framework for Distant-talking Speech Recognition under Reverberant Environments: newest Part of the <fixed-case>CENSREC</fixed-case> Series - http://www.lrec-conf.org/proceedings/lrec2008/pdf/468_paper.pdf @@ -3548,15 +3548,15 @@ nishiura-etal-2008-evaluation - OlivierHamon - DjamelMostefa + OlivierHamon + DjamelMostefa An Experimental Methodology for an End-to-End Evaluation in Speech-to-Speech Translation http://www.lrec-conf.org/proceedings/lrec2008/pdf/900_paper.pdf This paper describes the evaluation methodology used to evaluate the TC-STAR speech-to-speech translation (SST) system and the results from the third year of the project. It follows the results presented in Hamon (2007), dealing with the first end-to-end evaluation of the project. In this paper, we try to experiment with the methodology and the protocol during a second end-to-end evaluation, by comparing outputs from the TC-STAR system with interpreters from the European parliament. For this purpose, we test different criteria of evaluation and type of questions within a comprehension test. The results show that interpreters do not translate all the information (as opposed to the automatic system), but the quality of SST is still far from that of human translation. The experimental comprehension test used provides new information to study the quality of automatic systems, but without settling the issue of which protocol is the best. This depends on what the evaluator wants to know about the SST: either to have a subjective end-user evaluation or a more objective one. hamon-mostefa-2008-experimental - Carlos D.Martínez-Hinarejos + Carlos D.Martínez-Hinarejos VicentTamarit Evaluation of Different Segmentation Techniques for Dialogue Turns http://www.lrec-conf.org/proceedings/lrec2008/pdf/119_paper.pdf @@ -3565,9 +3565,9 @@ DavidGriol - Lluís F.Hurtado + Lluís F.Hurtado EncarnaSegarra - EmilioSanchis + EmilioSanchis Acquisition and Evaluation of a Dialog Corpus through <fixed-case>WO</fixed-case>z and Dialog Simulation Techniques http://www.lrec-conf.org/proceedings/lrec2008/pdf/197_paper.pdf In this paper, we present a comparison between two corpora acquired by means of two different techniques. The first corpus was acquired by means of the Wizard of Oz technique. A dialog simulation technique has been developed for the acquisition of the second corpus. A random selection of the user and system turns has been used, defining stop conditions for automatically deciding if the simulated dialog is successful or not. We use several evaluation measures proposed in previous research to compare between our two acquired corpora, and then discuss the similarities and differences between the two corpora with regard to these measures. @@ -3575,7 +3575,7 @@ SusanRobinson - DavidTraum + DavidTraum MidhunIttycheriah JoeHenderer What would you Ask a conversational Agent? Observations of Human-Agent Dialogues in a Museum Setting @@ -3585,10 +3585,10 @@ DaveToney - SophieRosset + SophieRosset AurélienMax OlivierGalibert - EricBilinski + EricBilinski An Evaluation of Spoken and Textual Interaction in the <fixed-case>RITEL</fixed-case> Interactive Question Answering System http://www.lrec-conf.org/proceedings/lrec2008/pdf/825_paper.pdf The RITEL project aims to integrate a spoken language dialogue system and an open-domain information retrieval system in order to enable human users to ask a general question and to refine their search for information interactively. This type of system is often referred to as an Interactive Question Answering (IQA) system. In this paper, we present an evaluation of how the performance of the RITEL system differs when users interact with it using spoken versus textual input and output. Our results indicate that while users do not perceive the two versions to perform significantly differently, many more questions are asked in a typical text-based dialogue. @@ -3613,7 +3613,7 @@ DianaMaynard - WimPeters + WimPeters YaoyongLi Evaluating Evaluation Metrics for Ontology-Based Applications: Infinite Reflection http://www.lrec-conf.org/proceedings/lrec2008/pdf/273_paper.pdf @@ -3621,7 +3621,7 @@ maynard-etal-2008-evaluating - DianaMcCarthy + DianaMcCarthy Lexical Substitution as a Framework for Multiword Evaluation http://www.lrec-conf.org/proceedings/lrec2008/pdf/275_paper.pdf In this paper we analyse data from the SemEval lexical substitution task in those cases where the annotators indicated that the target word was part of a phrase before substituting the target with a synonym. We classify the types of phrases that were provided in this way by the annotators in order to evaluate the utility of the method as a means of producing a gold-standard for multiword evaluation. Multiword evaluation is a difficult area because lexical resources are not complete and people’s judgments on multiwords vary. Whilst we do not believe lexical substitution is necessarily a panacea for multiword evaluation, we do believe it is a useful methodology because the annotator is focused on the task of substitution. Following the analysis, we make some recommendations which would make the data easier to classify. @@ -3635,9 +3635,9 @@ emms-2008-tree - A. CüneydTantuǧ + A. CüneydTantuǧ KemalOflazer - Ilknur DurgarEl-Kahlout + Ilknur DurgarEl-Kahlout <fixed-case>BLEU</fixed-case>+: a Tool for Fine-Grained <fixed-case>BLEU</fixed-case> Computation http://www.lrec-conf.org/proceedings/lrec2008/pdf/382_paper.pdf We present a tool, BLEU+, which implements various extension to BLEU computation to allow for a better understanding of the translation performance, especially for morphologically complex languages. BLEU+ takes into account both “closeness” in morphological structure, “closeness” of the root words in the WordNet hierarchy while comparing tokens in the candidate and reference sentence. In addition to gauging performance at a finer level of granularity, BLEU+ also allows the computation of various upper bound oracle scores: comparing all tokens considering only the roots allows us to get an upper bound when all errors due to morphological structure are fixed, while comparing tokens in an error-tolerant way considering minor morpheme edit operations, allows us to get a (more realistic) upper bound when tokens that differ in morpheme insertions/deletions and substitutions are fixed. We use BLEU+ in the fine-grained evaluation of the output of our English-to-Turkish statistical MT system. @@ -3646,7 +3646,7 @@ C. RayGraham DeryleLonsdale - CaseyKennington + CaseyKennington AaronJohnson JeremiahMcGhee Elicited Imitation as an Oral Proficiency Measure with <fixed-case>ASR</fixed-case> Scoring @@ -3655,9 +3655,9 @@ graham-etal-2008-elicited - PedroConcejero + PedroConcejero DanielTapias - Juan JoséRodríguez + Juan JoséRodríguez Juan CarlosLuengo SebastiánSánchez Methodology for Evaluating the Usability of User Interfaces in Mobile Services @@ -3666,14 +3666,14 @@ concejero-etal-2008-methodology - EdouardGeoffrois + EdouardGeoffrois An Economic View on Human Language Technology Evaluation http://www.lrec-conf.org/proceedings/lrec2008/pdf/616_paper.pdf This paper analyses some general issues about human language technology evaluation, focusing on economic aspects. It first provides a scientific rationale for the need to organize evaluation in the form of campaigns, by relating this need to some basic characteristics of human language technologies, namely that they involve learning to process information in a way which reproduces human capabilities. It then reviews the benefits and constraints of these evaluation campaigns. Borrowing concepts from the field of economics, it also provides an analysis of the economic incentives to organize evaluation campaigns. It entails from this analysis that fitting evaluation campaigns to the needs of scientific research requires a strong implication in term of research policy and public funding. geoffrois-2008-economic - BeatriceAlex + BeatriceAlex Comparing Corpus-based to Web-based Lookup Techniques for Automatic <fixed-case>E</fixed-case>nglish Inclusion Detection http://www.lrec-conf.org/proceedings/lrec2008/pdf/674_paper.pdf The influence of English as a global language continues to grow to an extent that its words and expressions permeate the original forms of other languages. This paper evaluates a modular Web-based sub-component of an existing English inclusion classifier and compares it to a corpus-based lookup technique. Both approaches are evaluated on a German gold standard data set. It is demonstrated to what extent the Web-based approach benefits from the amount of data available online and the fact that this data is constantly updated. @@ -3687,8 +3687,8 @@ hasler-2008-centering - StephanieStrassel - MarkPrzybocki + StephanieStrassel + MarkPrzybocki KayPeterson ZhiyiSong KazuakiMaeda @@ -3706,21 +3706,21 @@ DavidHardcastle - DoniaScott + DoniaScott Can we Evaluate the Quality of Generated Text? http://www.lrec-conf.org/proceedings/lrec2008/pdf/797_paper.pdf Evaluating the output of NLG systems is notoriously difficult, and performing assessments of text quality even more so. A range of automated and subject-based approaches to the evaluation of text quality have been taken, including comparison with a putative gold standard text, analysis of specific linguistic features of the output, expert review and task-based evaluation. In this paper we present the results of a variety of such approaches in the context of a case study application. We discuss the problems encountered in the implementation of each approach in the context of the literature, and propose that a test based on the Turing test for machine intelligence offers a way forward in the evaluation of the subjective notion of text quality. hardcastle-scott-2008-evaluate - Keith J.Miller + Keith J.Miller MarkArehart - CatherineBall + CatherineBall JohnPolk AlanRubenstein - KennethSamuel - ElizabethSchroeder - EvaVecchi + KennethSamuel + ElizabethSchroeder + EvaVecchi ChrisWolf An Infrastructure, Tools and Methodology for Evaluation of Multicultural Name Matching Systems http://www.lrec-conf.org/proceedings/lrec2008/pdf/805_paper.pdf @@ -3729,7 +3729,7 @@ LaurianneSitbon - PatriceBellot + PatriceBellot PhilippeBlache Evaluating Robustness Of A <fixed-case>QA</fixed-case> System Through A Corpus Of Real-Life Questions http://www.lrec-conf.org/proceedings/lrec2008/pdf/244_paper.pdf @@ -3753,7 +3753,7 @@ LaurentBlin - OlivierBoeffard + OlivierBoeffard VincentBarreaud <fixed-case>WEB</fixed-case>-Based Listening Test System for Speech Synthesis and Speech Conversion Evaluation http://www.lrec-conf.org/proceedings/lrec2008/pdf/573_paper.pdf @@ -3770,8 +3770,8 @@ dividino-etal-2008-semiotic - GeorgeDemetriou - RobertGaizauskas + GeorgeDemetriou + RobertGaizauskas HaotianSun AngusRoberts <fixed-case>ANNALIST</fixed-case> - <fixed-case>ANN</fixed-case>otation <fixed-case>ALI</fixed-case>gnment and Scoring Tool @@ -3780,7 +3780,7 @@ demetriou-etal-2008-annalist - AndreiPopescu-Belis + AndreiPopescu-Belis MikeFlynn PierreWellner PhilippeBaudrion @@ -3791,7 +3791,7 @@ PaulaEstrella - AndreiPopescu-Belis + AndreiPopescu-Belis MaghiKing Improving Contextual Quality Models for <fixed-case>MT</fixed-case> Evaluation Based on Evaluators’ Feedback http://www.lrec-conf.org/proceedings/lrec2008/pdf/236_paper.pdf @@ -3801,10 +3801,10 @@ BrianWeiss CraigSchlenoff - GregSanders + GregSanders MichelleSteves - SherriCondon - JonPhillips + SherriCondon + JonPhillips DanParvaz Performance Evaluation of Speech Translation Systems http://www.lrec-conf.org/proceedings/lrec2008/pdf/99_paper.pdf @@ -3813,18 +3813,18 @@ ArneMauser - SašaHasan - HermannNey + SašaHasan + HermannNey Automatic Evaluation Measures for Statistical Machine Translation System Optimization http://www.lrec-conf.org/proceedings/lrec2008/pdf/785_paper.pdf Evaluation of machine translation (MT) output is a challenging task. In most cases, there is no single correct translation. In the extreme case, two translations of the same input can have completely different words and sentence structure while still both being perfectly valid. Large projects and competitions for MT research raised the need for reliable and efficient evaluation of MT systems. For the funding side, the obvious motivation is to measure performance and progress of research. This often results in a specific measure or metric taken as primarily evaluation criterion. Do improvements in one measure really lead to improved MT performance? How does a gain in one evaluation metric affect other measures? This paper is going to answer these questions by a number of experiments. mauser-etal-2008-automatic - DanTufiş + DanTufiş RaduIon - AlexandruCeauşu - DanŞtefănescu + AlexandruCeauşu + DanŞtefănescu <fixed-case>RACAI</fixed-case>’s Linguistic Web Services http://www.lrec-conf.org/proceedings/lrec2008/pdf/90_paper.pdf Nowadays, there are hundreds of Natural Language Processing applications and resources for different languages that are developed and/or used, almost exclusively with a few but notable exceptions, by their creators. Assuming that the right to use a particular application or resource is licensed by the rightful owner, the user is faced with the often not so easy task of interfacing it with his/her own systems. Even if standards are defined that provide a unified way of encoding resources, few are the cases when the resources are actually coded in conformance to the standard (and, at present time, there is no such thing as general NLP application interoperability). Semantic Web came with the promise that the web will be a universal medium for information exchange whatever its content. In this context, the present article outlines a collection of linguistic web services for Romanian and English, developed at the Research Institute for AI for the Romanian Academy (RACAI) which are ready to provide a standardized way of calling particular NLP operations and extract the results without caring about what exactly is going on in the background. @@ -3840,7 +3840,7 @@ biber-etal-2008-words - ChrisBiemann + ChrisBiemann UweQuasthoff GerhardHeyer FlorianHolz @@ -3850,11 +3850,11 @@ biemann-etal-2008-asv - AntónioBranco + AntónioBranco FranciscoCosta PedroMartins FilipeNunes - JoãoSilva + JoãoSilva SaraSilveira <fixed-case>LX</fixed-case>-Service: Web Services of Language Technology for <fixed-case>P</fixed-case>ortuguese http://www.lrec-conf.org/proceedings/lrec2008/pdf/640_paper.pdf @@ -3862,8 +3862,8 @@ branco-etal-2008-lx - EmanuelePianta - ChristianGirardi + EmanuelePianta + ChristianGirardi RobertoZanoli The <fixed-case>T</fixed-case>ext<fixed-case>P</fixed-case>ro Tool Suite http://www.lrec-conf.org/proceedings/lrec2008/pdf/645_paper.pdf @@ -3871,8 +3871,8 @@ pianta-etal-2008-textpro - Bayan AbuShawar - EricAtwell + Bayan AbuShawar + EricAtwell An <fixed-case>AI</fixed-case>-inspired intelligent agent/student architecture to combine Language Resources research and teaching http://www.lrec-conf.org/proceedings/lrec2008/pdf/777_paper.pdf This paper describes experimental use of the multi-agent architecture to integrate Natural Language and Information Systems research and teaching, by casting a group of students as intelligent agents to collect and analyse English language resources from around the world. Section 2 and section 3 describe the hybrid intelligent information systems experiments at the University of Leeds and the results generated, including several research papers accepted at international conferences, and a finalist entry in the British Computer Society Machine Intelligence contest. Our proposals for applying the multi-agent idea in other universities such as the Arab Open University are presented in section 4. The conclusion is presented in section 5: the success of hybrid intelligent information systems experiments in generating research papers within a limited time. @@ -3881,7 +3881,7 @@ KjellElenius EvaForsbom - BeátaMegyesi + BeátaMegyesi Language Resources and Tools for <fixed-case>S</fixed-case>wedish: A Survey http://www.lrec-conf.org/proceedings/lrec2008/pdf/156_paper.pdf Language resources and tools to create and process these resources are necessary components in human language technology and natural language applications. In this paper, we describe a survey of existing language resources for Swedish, and the need for Swedish language resources to be used in research and real-world applications in language technology as well as in linguistic research. The survey is based on a questionnaire sent to industry and academia, institutions and organizations, and to experts involved in the development of Swedish language resources in Sweden, the Nordic countries and world-wide. @@ -3889,9 +3889,9 @@ LarsNygaard - JoelPriestley + JoelPriestley AndersNøklestad - Janne BondiJohannessen + Janne BondiJohannessen <fixed-case>G</fixed-case>lossa: a Multilingual, Multimodal, Configurable User Interface http://www.lrec-conf.org/proceedings/lrec2008/pdf/159_paper.pdf We describe a web-based corpus query system, Glossa, which combines the expressiveness of regular query languages with the user-friendliness of a graphical interface. Since corpus users are usually linguists with little interest in technical matters, we have developed a system where the user need not have any prior knowledge of the search system. Furthermore, no previous knowledge of abbreviations for metavariables such as part of speech and source text is needed. All searches are done using checkboxes, pull-down menus, or writing simple letters to make words or other strings. Querying for more than one word is simply done by adding an additional query box, and for parts of words by choosing a feature such as “start of word”. The Glossa system also allows a wide range of viewing and post-processing options. Collocations can be viewed and counted in a number of ways, and be viewed as different kinds of graphical charts. Further annotation and deletion of single results for further processing is also easy. The Glossa system is already in use for a number of corpora. Corpus administrators can easily adapt the system to a wide range of corpora, including multilingual corpora and corpora with audio and video content. @@ -3900,27 +3900,27 @@ EkaterinaBuyko ChristianChiarcos - AntonioPareja Lora + AntonioPareja Lora Ontology-Based Interface Specifications for a <fixed-case>NLP</fixed-case> Pipeline Architecture http://www.lrec-conf.org/proceedings/lrec2008/pdf/215_paper.pdf The high level of heterogeneity between linguistic annotations usually complicates the interoperability of processing modules within an NLP pipeline. In this paper, a framework for the interoperation of NLP components, based on a data-driven architecture, is presented. Here, ontologies of linguistic annotation are employed to provide a conceptual basis for the tagset-neutral processing of linguistic annotations. The framework proposed here is based on a set of structured OWL ontologies: a reference ontology, a set of annotation models which formalize different annotation schemes, and a declarative linking between these, specified separately. This modular architecture is particularly scalable and flexible as it allows for the integration of different reference ontologies of linguistic annotations in order to overcome the absence of a consensus for an ontology of linguistic terminology. Our proposal originates from three lines of research from different fields: research on annotation type systems in UIMA; the ontological architecture OLiA, originally developed for sustainable documentation and annotation-independent corpus browsing, and the ontologies of the OntoTag model, targeted towards the processing of linguistic annotations in Semantic Web applications. We describe how UIMA annotations can be backed up by ontological specifications of annotation schemes as in the OLiA model, and how these are linked to the OntoTag ontologies, which allow for further ontological processing. buyko-etal-2008-ontology - DaanBroeder + DaanBroeder ThierryDeclerck - ErhardHinrichs + ErhardHinrichs SteliosPiperidis - LaurentRomary + LaurentRomary NicolettaCalzolari - PeterWittenburg + PeterWittenburg Foundation of a Component-based Flexible Registry for Language Resources and Technology http://www.lrec-conf.org/proceedings/lrec2008/pdf/364_paper.pdf Within the CLARIN e-science infrastructure project it is foreseen to develop a component-based registry for metadata for Language Resources and Language Technology. With this registry it is hoped to overcome the problems of the current available systems with respect to inflexible fixed schema, unsuitable terminology and interoperability problems. The registry will address interoperability needs by refering to a shared vocabulary registered in data category registries as they are suggested by ISO. broeder-etal-2008-foundation - DaanBroeder + DaanBroeder DavidNathan SvenStrömqvist Remcovan Veenendaal @@ -3931,26 +3931,26 @@ PaulTrilsbeek - DaanBroeder + DaanBroeder TobiasValkenhoef - PeterWittenburg + PeterWittenburg A Grid of Regional Language Archives http://www.lrec-conf.org/proceedings/lrec2008/pdf/376_paper.pdf About two years ago, the Max Planck Institute for Psycholinguistics in Nijmegen, The Netherlands, started an initiative to install regional language archives in various places around the world, particularly in places where a large number of endangered languages exist and are being documented. These digital archives make use of the LAT archiving framework that the MPI has developed over the past nine years. This framework consists of a number of web-based tools for depositing, organizing and utilizing linguistic resources in a digital archive. The regional archives are in principle autonomous archives, but they can decide to share metadata descriptions and language resources with the MPI archive in Nijmegen and become part of a grid of linked LAT archives. By doing so, they will also take advantage of the long-term preservation strategy of the MPI archive. This paper describes the reasoning behind this initiative and how in practice such an archive is set up. trilsbeek-etal-2008-grid - TokunagaTakenobu + TakenobuTokunaga DainKaplan Chu-RenHuang - Shu-KaiHsieh - NicolettaCalzolari + Shu-KaiHsieh + CalzolariNicoletta MonicaMonachini ClaudiaSoria KiyoakiShirai VirachSornlertlamvanich ThatsaneeCharoenporn - XiaYingJu + YingJuXia Adapting International Standard for <fixed-case>A</fixed-case>sian Language Technologies http://www.lrec-conf.org/proceedings/lrec2008/pdf/422_paper.pdf Corpus-based approaches and statistical approaches have been the main stream of natural language processing research for the past two decades. Language resources play a key role in such approaches, but there is an insufficient amount of language resources in many Asian languages. In this situation, standardisation of language resources would be of great help in developing resources in new languages. This paper presents the latest development efforts of our project which aims at creating a common standard for Asian language resources that is compatible with an international standard. In particular, the paper focuses on i) lexical specification and data categories relevant for building multilingual lexical resources for Asian languages; ii) a core upper-layer ontology needed for ensuring multilingual interoperability and iii) the evaluation platform used to test the entire architectural framework. @@ -3967,7 +3967,7 @@ shinzato-etal-2008-large - RiccardoDel Gratta + RiccardoDel Gratta RobertoBartolini TommasoCaselli MonicaMonachini @@ -4015,16 +4015,16 @@ tohyama-etal-2008-construction-metadata - Bodil NistrupMadsen - Hanne ErdmanThomsen + Bodil NistrupMadsen + Hanne ErdmanThomsen A Taxonomy of Lexical Metadata Categories http://www.lrec-conf.org/proceedings/lrec2008/pdf/864_paper.pdf Metadata registries comprising sets of categories to be used in data collections exist in many fields. The purpose of a metadata registry is to facilitate data exchange and interoperability within a domain, and registries often contain definitions and examples. In this paper we will argue that in order to ensure completeness, consistency, user-friendliness and extensibility, metadata registries should be structured as taxonomies. Furthermore we will illustrate the usefulness of using terminological ontologies as the basis for developing metadata taxonomies. In this connection we will discuss the principles of developing ontologies and the differences between taxonomies and ontologies. The paper includes examples of initiatives for developing metadata standards within the field of language resources, more specifically lexical data categories, elaborated at international and national level. However, the principles that we introduce for the development of data category registries are relevant not only for metadata registries for lexical resources, but for all kinds of metadata registries. madsen-thomsen-2008-taxonomy - ShuichiItahashi - Chiu-yuTseng + ShuichiItahashi + Chiu-yuTseng The 2008 Oriental <fixed-case>COCOSDA</fixed-case> Book Project: in Commemoration of the First Decade of Sustained Activities in <fixed-case>A</fixed-case>sia http://www.lrec-conf.org/proceedings/lrec2008/pdf/28_paper.pdf The purpose of Oriental COCOSDA is to provide the Asian community a platform to exchange ideas, to share information and to discuss regional matters on creation, utilization, dissemination of spoken language corpora of oriental languages and also on the assessment methods of speech recognition/synthesis systems as well as to promote speech research on oriental languages. Since its preparatory meeting in Hong Kong in 1997, annual workshops have been organized and held in Japan, Taiwan, China, Korea, Thailand, Singapore, India, Indonesia, Malaysia, and Vietnam from 1998 onwards. The organization is managed by a convener, three advisory members, and 26 committee members from 13 regions in Oriental area. In order to commemorate 10 years of continued activities, the members have decided to publish a book which covers a wide range of speech research. Special focus will be on speech resources or speech corpora in Oriental countries and standardization of speech input/output systems performance evaluation methods on which key technologies for speech systems development are based. The book will also include linguistic outlines of oriental languages, annotation, labeling, and software tools for speech processing. @@ -4033,7 +4033,7 @@ AdamPrzepiórkowski Rafał L.Górski - BarbaraLewandowska-Tomaszyk + BarbaraLewandowska-Tomaszyk MarekŁaziński Towards the <fixed-case>N</fixed-case>ational <fixed-case>C</fixed-case>orpus of <fixed-case>P</fixed-case>olish http://www.lrec-conf.org/proceedings/lrec2008/pdf/211_paper.pdf @@ -4052,9 +4052,9 @@ BenteMaegaard MohammedAtiyya KhalidChoukri - StevenKrauwer + StevenKrauwer ChaficMokbel - MustafaYaseen + MustafaYaseen <fixed-case>MEDAR</fixed-case>: Collaboration between <fixed-case>E</fixed-case>uropean and Mediterranean <fixed-case>A</fixed-case>rabic Partners to Support the Development of Language Technology for <fixed-case>A</fixed-case>rabic http://www.lrec-conf.org/proceedings/lrec2008/pdf/917_paper.pdf After the successful completion of the NEMLAR project 2003-2005, a new opportunity for a project was opened by the European Commission, and a group of largely the same partners is now executing the MEDAR project. MEDAR will be updating the surveys and BLARK for Arabic already made, and will then focus on machine translation (and other tools for translation) and information retrieval with a focus on language resources, tools and evaluation for these applications. A very important part of the MEDAR project is to reinforce and extend the NEMLAR network and to create a cooperation roadmap for Human Language Technologies for Arabic. It is expected that the cooperation roadmap will attract wide attention from other parties and that it can help create a larger platform for collaborative projects. Finally, the project will focus on dissemination of knowledge about existing resources and tools, as well as actors and activities; this will happen through newsletter, website and an international conference which will follow up on the Cairo conference of 2004. Dissemination to user communities will also be important, e.g. through participation in translators? conferences. The goal of these activities is to create a stronger and lasting collaboration between EU countries and Arabic speaking countries. @@ -4071,14 +4071,14 @@ VolhaPetukhova - HarryBunt + HarryBunt <fixed-case>LIRICS</fixed-case> Semantic Role Annotation: Design and Evaluation of a Set of Data Categories http://www.lrec-conf.org/proceedings/lrec2008/pdf/17_paper.pdf Semantic roles have often proved to be useful labels for stating linguistic generalisations of various sorts. There is, however, a lack of agreement on their defining criteria, which causes serious problems for semantic roles to be a useful classificatory device for predicate-argument relations. These criteria should (a) support the design of a semantic role set which is complete but does not contain redundant relations; (b) be based on semantic rather than morphological, lexical or syntactic properties; and (c) enable formal interpretation. In this paper we report on the analyses of alternative approaches to annotation and representation of semantic role information (such as FrameNet, PropBank and VerbNet) with respect to their models of description, granularity of semantic role sets, definitions of semantic roles concepts, consistency and reliability of annotations. We present methodological principles for characterising well-defined concepts which were developed within the LIRICS (Linguistic InfRastructure for Interoperable ResourCes and Systems; see http://lirics.loria.fr) project, as well as the designed set of semantic roles and their definitions in ISO 12620 format. We discuss evaluation results of the defined concepts for semantic role annotation concerning the redundancy and completeness of the tagset and the reliability of annotations in terms of inter-annotator agreement. petukhova-bunt-2008-lirics - DanielZeman + DanielZeman Reusable Tagset Conversion Using Tagset Drivers http://www.lrec-conf.org/proceedings/lrec2008/pdf/66_paper.pdf Part-of-speech or morphological tags are important means of annotation in a vast number of corpora. However, different sets of tags are used in different corpora, even for the same language. Tagset conversion is difficult, and solutions tend to be tailored to a particular pair of tagsets. We propose a universal approach that makes the conversion tools reusable. We also provide an indirect evaluation in the context of a parsing task. @@ -4095,8 +4095,8 @@ MarcKemps-Snijders MenzoWindhouwer - PeterWittenburg - Sue EllenWright + PeterWittenburg + Sue EllenWright <fixed-case>ISO</fixed-case>cat: Corralling Data Categories in the Wild http://www.lrec-conf.org/proceedings/lrec2008/pdf/222_paper.pdf To achieve true interoperability for valuable linguistic resources different levels of variation need to be addressed. ISO Technical Committee 37, Terminology and other language and content resources, is developing a Data Category Registry. This registry will provide a reusable set of data categories. A new implementation, dubbed ISOcat, of the registry is currently under construction. This paper shortly describes the new data model for data categories that will be introduced in this implementation. It goes on with a sketch of the standardization process. Completed data categories can be reused by the community. This is done by either making a selection of data categories using the ISOcat web interface, or by other tools which interact with the ISOcat system using one of its various Application Programming Interfaces. Linguistic resources that use data categories from the registry should include persistent references, e.g. in the metadata or schemata of the resource, which point back to their origin. These data category references can then be used to determine if two or more resources share common semantics, thus providing a level of interoperability close to the source data and a promising layer for semantic alignment on higher levels. @@ -4121,7 +4121,7 @@ VictoriaArranz FranckGandcher - ValérieMapelli + ValérieMapelli KhalidChoukri A Guide for the Production of Reusable Language Resources http://www.lrec-conf.org/proceedings/lrec2008/pdf/898_paper.pdf @@ -4136,7 +4136,7 @@ maurel-2008-prolexbase - YoshihikoHayashi + YoshihikoHayashi ChiharuNarawa MonicaMonachini ClaudiaSoria @@ -4161,7 +4161,7 @@ fujii-2008-producing - Folkertde Vriend + Folkertde Vriend Jan PieterKunst Louisten Bosch CharlotteGiesbers @@ -4181,7 +4181,7 @@ ClaireBrierley - EricAtwell + EricAtwell <fixed-case>P</fixed-case>ro<fixed-case>POSEL</fixed-case>: A Prosody and <fixed-case>POS</fixed-case> <fixed-case>E</fixed-case>nglish Lexicon for Language Engineering http://www.lrec-conf.org/proceedings/lrec2008/pdf/724_paper.pdf ProPOSEL is a prototype prosody and PoS (part-of-speech) English lexicon for Language Engineering, derived from the following language resources: the computer-usable dictionary CUVPlus, the CELEX-2 database, the Carnegie-Mellon Pronouncing Dictionary, and the BNC, LOB and Penn Treebank PoS-tagged corpora. The lexicon is designed for the target application of prosodic phrase break prediction but is also relevant to other machine learning and language engineering tasks. It supplements the existing record structure for wordform entries in CUVPlus with syntactic annotations from rival PoS-tagging schemes, mapped to fields for default closed and open-class word categories and for lexical stress patterns representing the rhythmic structure of wordforms and interpreted as potential new text-based features for automatic phrase break classifiers. The current version of the lexicon comes as a textfile of 104052 separate entries and is intended for distribution with the Natural Language ToolKit; it is therefore accompanied by supporting Python software for manipulating the data so that it can be used for Natural Language Processing (NLP) and corpus-based research in speech synthesis and speech recognition. @@ -4196,7 +4196,7 @@ westerhout-monachesi-2008-creating - LynneCahill + LynneCahill Using Similarity Measures to Extend the <fixed-case>L</fixed-case>in<fixed-case>GO</fixed-case> Lexicon http://www.lrec-conf.org/proceedings/lrec2008/pdf/823_paper.pdf Deep processing of natural language requires large scale lexical resources that have sufficient coverage at a sufficient level of detail and accuracy (i.e. both recall and precision). Hand-crafted lexicons are extremely labour-intensive to create and maintain, and require continuous updating and extension to retain their level of usability. In this paper we present a technique for extending lexicons using similarity measures that can be extracted from corpora. The technique involves creating lexical entries for unknown words based on entries for words that are known and that are deemed to be distributionally similar. We demonstrate the applicability of the approach by providing an extended lexicon for the LinGO system using similarity measures extracted from the BNC. We also discuss the advantages and disadvantages of using such lexical extensions in different ways: principally either as part of the main lexicon or as a separate resource used only for “last resort” use. @@ -4210,9 +4210,9 @@ adolphs-2008-acquiring - NúriaBel + NúriaBel SergioEspeja - MontserratMarimon + MontserratMarimon MartaVillegas <fixed-case>COLDIC</fixed-case>, a Lexicographic Platform for <fixed-case>LMF</fixed-case> compliant lexica http://www.lrec-conf.org/proceedings/lrec2008/pdf/42_paper.pdf @@ -4223,30 +4223,30 @@ DavidBamman MarcoPassarotti RobertoBusa - GregoryCrane + GregoryCrane The Annotation Guidelines of the <fixed-case>L</fixed-case>atin Dependency Treebank and Index <fixed-case>T</fixed-case>homisticus Treebank: the Treatment of some specific Syntactic Constructions in <fixed-case>L</fixed-case>atin http://www.lrec-conf.org/proceedings/lrec2008/pdf/25_paper.pdf The paper describes the treatment of some specific syntactic constructions in two treebanks of Latin according to a common set of annotation guidelines. Both projects work within the theoretical framework of Dependency Grammar, which has been demonstrated to be an especially appropriate framework for the representation of languages with a moderately free word order, where the linear order of constituents is broken up with elements of other constituents. The two projects are the first of their kind for Latin, so no prior established guidelines for syntactic annotation are available to rely on. The general model for the adopted style of representation is that used by the Prague Dependency Treebank, with departures arising from the Latin grammar of Pinkster, specifically in the traditional grammatical categories of the ablative absolute, the accusative + infinitive, and gerunds/gerundives. Sharing common annotation guidelines allows us to compare the datasets of the two treebanks for tasks such as mutually checking annotation consistency, diachronically studying specific syntactic constructions, and training statistical dependency parsers. bamman-etal-2008-annotation - DanTufiş + DanTufiş ElenaIrimia RaduIon - AlexandruCeauşu + AlexandruCeauşu Unsupervised Lexical Acquisition for Part of Speech Tagging http://www.lrec-conf.org/proceedings/lrec2008/pdf/56_paper.pdf It is known that POS tagging is not very accurate for unknown words (words which the POS tagger has not seen in the training corpora). Thus, a first step to improve the tagging accuracy would be to extend the coverage of the tagger’s learned lexicon. It turns out that, through the use of a simple procedure, one can extend this lexicon without using additional, hard to obtain, hand-validated training corpora. The basic idea consists of merely adding new words along with their (correct) POS tags to the lexicon and trying to estimate the lexical distribution of these words according to similar ambiguity classes already present in the lexicon. We present a method of automatically acquire high quality POS tagging lexicons based on morphologic analysis and generation. Currently, this procedure works on Romanian for which we have a required paradigmatic generation procedure but the architecture remains general in the sense that given the appropriate substitutes for the morphological generator and POS tagger, one should obtain similar results. tufis-etal-2008-unsupervised - AmaliaTodiraşcu - DanTufiş + AmaliaTodiraşcu + DanTufiş UlrichHeid ChristopherGledhill - DanŞtefanescu + DanŞtefanescu MarionWeller - FrançoisRousselot + FrançoisRousselot A Hybrid Approach to Extracting and Classifying <fixed-case>V</fixed-case>erb+<fixed-case>N</fixed-case>oun Constructions http://www.lrec-conf.org/proceedings/lrec2008/pdf/500_paper.pdf We present the main findings and preliminary results of an ongoing project aimed at developing a system for collocation extraction based on contextual morpho-syntactic properties. We explored two hybrid extraction methods: the first method applies language-indepedent statistical techniques followed by a linguistic filtering, while the second approach, available only for German, is based on a set of lexico-syntactic patterns to extract collocation candidates. To define extraction and filtering patterns, we studied a specific collocation category, the Verb-Noun constructions, using a model inspired by the systemic functional grammar, proposing three level analysis: lexical, functional and semantic criteria. From tagged and lemmatized corpus, we identify some contextual morpho-syntactic properties helping to filter the output of the statistical methods and to extract some potential interesting VN constructions (complex predicates vs complex predicators). The extracted candidates are validated and classified manually. @@ -4270,7 +4270,7 @@ kountz-etal-2008-laf - TomažErjavec + TomažErjavec SimonKrek The <fixed-case>JOS</fixed-case> Morphosyntactically Tagged Corpus of <fixed-case>S</fixed-case>lovene http://www.lrec-conf.org/proceedings/lrec2008/pdf/89_paper.pdf @@ -4294,14 +4294,14 @@ StelianaIvanova - SandraKuebler + SandraKuebler <fixed-case>POS</fixed-case> Tagging for <fixed-case>G</fixed-case>erman: how important is the Right Context? http://www.lrec-conf.org/proceedings/lrec2008/pdf/253_paper.pdf Part-of-Speech tagging is generally performed by Markov models, based on bigram or trigram models. While Markov models have a strong concentration on the left context of a word, many languages require the inclusion of right context for correct disambiguation. We show for German that the best results are reached by a combination of left and right context. If only left context is available, then changing the direction of analysis and going from right to left improves the results. In a version of MBT with default parameter settings, the inclusion of the right context improved POS tagging accuracy from 94.00% to 96.08%, thus corroborating our hypothesis. The version with optimized parameters reaches 96.73%. ivanova-kuebler-2008-pos - ChristianHänig + ChristianHänig StefanBordag UweQuasthoff <fixed-case>U</fixed-case>nsu<fixed-case>P</fixed-case>arse: unsupervised Parsing with unsupervised Part of Speech Tagging @@ -4311,7 +4311,7 @@ SaraTonelli - RodolfoDelmonte + RodolfoDelmonte AntonellaBristot Enriching the Venice <fixed-case>I</fixed-case>talian Treebank with Dependency and Grammatical Relations http://www.lrec-conf.org/proceedings/lrec2008/pdf/490_paper.pdf @@ -4319,7 +4319,7 @@ tonelli-etal-2008-enriching - KristinaVučković + KristinaVučković MarkoTadić ZdravkoDovedan Rule-Based Chunker for <fixed-case>C</fixed-case>roatian @@ -4344,7 +4344,7 @@ banik-lee-2008-study - MohamedMaamouri + MohamedMaamouri AnnBies SethKulick Enhancing the <fixed-case>A</fixed-case>rabic Treebank: a Collaborative Effort toward New Annotation Guidelines @@ -4353,11 +4353,11 @@ maamouri-etal-2008-enhancing - MarthaPalmer + MarthaPalmer OlgaBabko-Malaya AnnBies - MonaDiab - MohamedMaamouri + MonaDiab + MohamedMaamouri AousMansouri WajdiZaghouani A Pilot <fixed-case>A</fixed-case>rabic <fixed-case>P</fixed-case>ropbank @@ -4366,9 +4366,9 @@ palmer-etal-2008-pilot - MarkGreenwood - JoséIria - FabioCiravegna + MarkGreenwood + JoséIria + FabioCiravegna <fixed-case>S</fixed-case>axon: an Extensible Multimedia Annotator http://www.lrec-conf.org/proceedings/lrec2008/pdf/158_paper.pdf This paper introduces Saxon, a rule-based document annotator that is capable of processing and annotating several document formats and media, both within and across documents. Furthermore, Saxon is readily extensible to support other input formats due to both it’s flexible rule formalism and the modular plugin architecture of the Runes framework upon which it is built. In this paper we introduce the Saxon rule formalism through examples aimed at highlighting its power and flexibility. @@ -4396,7 +4396,7 @@ ShawnMedero JulieMedero RobertParker - StephanieStrassel + StephanieStrassel Annotation Tool Development for Large-Scale Corpus Creation Projects at the <fixed-case>L</fixed-case>inguistic <fixed-case>D</fixed-case>ata <fixed-case>C</fixed-case>onsortium http://www.lrec-conf.org/proceedings/lrec2008/pdf/775_paper.pdf The Linguistic Data Consortium (LDC) creates a variety of linguistic resources - data, annotations, tools, standards and best practices - for many sponsored projects. The programming staff at LDC has created the tools and technical infrastructures to support the data creation efforts for these projects, creating tools and technical infrastructures for all aspects of data creation projects: data scouting, data collection, data selection, annotation, search, data tracking and worklow management. This paper introduces a number of samples of LDC programming staff’s work, with particular focus on the recent additions and updates to the suite of software tools developed by LDC. Tools introduced include the GScout Web Data Scouting Tool, LDC Data Selection Toolkit, ACK - Annotation Collection Kit, XTrans Transcription and Speech Annotation Tool, GALE Distillation Toolkit, and the GALE MT Post Editing Workflow Management System. @@ -4436,7 +4436,7 @@ EmilieChételat-Pelé - AnneliesBraffort + AnneliesBraffort Sign Language Corpus Annotation: toward a new Methodology http://www.lrec-conf.org/proceedings/lrec2008/pdf/168_paper.pdf This paper deals with non manual gestures annotation involved in Sign Language within the context of automatic generation of Sign Language. We will tackle linguistic researches in sign language, present descriptions of non manual gestures and problems lead to movement description. Then, we will propose a new annotation methodology, which allows non manual gestures description. This methodology can describe all Non Manual Gestures with precision, economy and simplicity. It is based on four points: Movement description (instead of position description); Movement decomposition (the diagonal movement is described with horizontal movement and vertical movement separately); Element decomposition (we separate higher eyelid and lower eyelid); Use of a set of symbols rather than words. One symbol can describe many phenomena (with use of colours, height...). First analysis results allow us to define precisely the structure of eye blinking and give the very first ideas for the rules to be designed. All the results must be refined and confirmed by extending the study on the whole corpus. In a second step, our annotation will be used to produce analyses in order to define rules and structure definition of Non Manual Gestures that will be evaluate in LIMSI’s automatic French Sign Language generation system. @@ -4447,7 +4447,7 @@ CarolNeidle VassilisAthitsos StanSclaroff - HermannNey + HermannNey Benchmark Databases for Video-Based Automatic Sign Language Recognition http://www.lrec-conf.org/proceedings/lrec2008/pdf/287_paper.pdf A new, linguistically annotated, video database for automatic sign language recognition is presented. The new RWTH-BOSTON-400 corpus, which consists of 843 sentences, several speakers and separate subsets for training, development, and testing is described in detail. For evaluation and benchmarking of automatic sign language recognition, large corpora are needed. Recent research has focused mainly on isolated sign language recognition methods using video sequences that have been recorded under lab conditions using special hardware like data gloves. Such databases have often consisted generally of only one speaker and thus have been speaker-dependent, and have had only small vocabularies. A new database access interface, which was designed and created to provide fast access to the database statistics and content, makes it possible to easily browse and retrieve particular subsets of the video database. Preliminary baseline results on the new corpora are presented. In contradistinction to other research in this area, all databases presented in this paper will be publicly available. @@ -4457,7 +4457,7 @@ JanBungeroth DanielStein PhilippeDreuw - HermannNey + HermannNey SaraMorrissey AndyWay Lynettevan Zijl @@ -4501,7 +4501,7 @@ TheodorosKostoulas TodorGanchev IosifMporas - NikosFakotakis + NikosFakotakis A Real-World Emotional Speech Corpus for <fixed-case>M</fixed-case>odern <fixed-case>G</fixed-case>reek http://www.lrec-conf.org/proceedings/lrec2008/pdf/664_paper.pdf The present paper deals with the design and the annotation of a Greek real-world emotional speech corpus. The speech data consist of recordings collected during the interaction of naïve users with a smart-home dialogue system. Annotation of the speech data with respect to the uttered command and emotional state was performed. Initial experimentations towards recognizing negative emotional states were performed and the experimental results indicate the range of difficulties when dealing with real-world data. @@ -4515,7 +4515,7 @@ wilson-2008-annotating - Henkvan den Heuvel + Henkvan den Heuvel Jean-PierreMartens BartD’hoore KristofD’hanens @@ -4546,19 +4546,19 @@ TévaMerlin SylvainMeignier YannickEstève - PaulDeléglise + PaulDeléglise Combined Systems for Automatic Phonetic Transcription of Proper Nouns http://www.lrec-conf.org/proceedings/lrec2008/pdf/455_paper.pdf Large vocabulary automatic speech recognition (ASR) technologies perform well in known, controlled contexts. However recognition of proper nouns is commonly considered as a difficult task. Accurate phonetic transcription of a proper noun is difficult to obtain, although it can be one of the most important resources for a recognition system. In this article, we propose methods of automatic phonetic transcription applied to proper nouns. The methods are based on combinations of the rule-based phonetic transcription generator LIA_PHON and an acoustic-phonetic decoding system. On the ESTER corpus, we observed that the combined systems obtain better results than our reference system (LIA_PHON). The WER (Word Error Rate) decreased on segments of speech containing proper nouns, without affecting negatively the results on the rest of the corpus. On the same corpus, the Proper Noun Error Rate (PNER, which is a WER computed on proper nouns only), decreased with our new system. laurent-etal-2008-combined - HaraldHöge - ZdravkoKacic + HaraldHöge + ZdravkoKacic BojanKotnik MatejRojc NicolasMoreau - Horst-UdoHain + Horst-UdoHain Evaluation of Modules and Tools for Speech Synthesis: the <fixed-case>ECESS</fixed-case> Framework http://www.lrec-conf.org/proceedings/lrec2008/pdf/32_paper.pdf The consortium ECESS (European Center of Excellence for Speech Synthesis) has set up a framework for evaluation of software modules and tools relevant for speech synthesis. Till now two lines of evaluation campaigns have been established: (1) Evaluation of the ECESS TTS modules (text processing, prosody, acoustic synthesis). (2) Evaluation of ECESS tools (pitch extraction, voice activity detection, phonetic segmentation). The functionality and interfaces of the ECESS TTS have been developed by a joint effort between ECESS and the EC-funded project TC-STAR . First evaluation campaigns were conducted within TC-STAR using the ECESS framework. As TC-STAR finished in March 2007, ECESS continued and extended the evaluation of ECESS TTS modules and tools by its own. Within the paper we describe a novel framework which allows performing remote evaluation for modules via the web. First experimental results are reported. Further the result of several evaluation campaigns for tools handling pitch extraction and voice activity detection are presented. @@ -4590,7 +4590,7 @@ matousek-etal-2008-building - LuísOliveira + LuísOliveira SérgioPaulo LuísFigueira CarlosMendes @@ -4603,17 +4603,17 @@ AlexandrePatry - PhilippeLanglais + PhilippeLanglais <fixed-case>MISTRAL</fixed-case>: a Statistical Machine Translation Decoder for Speech Recognition Lattices http://www.lrec-conf.org/proceedings/lrec2008/pdf/293_paper.pdf This paper presents MISTRAL, an open source statistical machine translation decoder dedicated to spoken language translation. While typical machine translation systems take a written text as input, MISTRAL translates word lattices produced by automatic speech recognition systems. The lattices are translated in two passes using a phrase-based model. Our experiments reveal an improvement in BLEU when translating lattices instead of sentences returned by a speech recognition system. patry-langlais-2008-mistral - UteZiegenhain - HanneFersoe - Henkvan den Heuvel - AsuncionMoreno + UteZiegenhain + HanneFersoe + Henkvan den Heuvel + AsuncionMoreno <fixed-case>LC</fixed-case>-<fixed-case>STAR</fixed-case> <fixed-case>II</fixed-case>: Starring more Lexica http://www.lrec-conf.org/proceedings/lrec2008/pdf/358_paper.pdf LC-STAR II is a follow-up project of the EU funded project LC-STAR (Lexica and Corpora for Speech-to-Speech Translation Components, IST-2001-32216). LC-STAR II develops large lexica containing information for speech processing in ten languages targeting especially automatic speech recognition and text to speech synthesis but also other applications like speech-to-speech translation and tagging. The project follows by large the specifications developed within the scope of LC-STAR covering thirteen languages: Catalan, Finnish, German, Greek, Hebrew, Italian, Mandarin Chinese, Russian, Turkish, Slovenian, Spanish, Standard Arabic and US-English. The ten new LC-STAR II languages are: Brazilian-Portuguese, Cantonese, Czech, English-UK, French, Hindi, Polish, Portuguese, Slovak, and Urdu. The project started in 2006 with a lifetime of two years. The project is funded by a consortium, which includes Microsoft (USA), Nokia (Finland), NSC (Israel), Siemens (Germany) and Harmann/Becker (Germany). The project is coordinated by UPC (Spain) and validation is performed by SPEX (The Netherlands), and CST (Denmark). The developed language resources will be shared among partners. This paper presents a summary of the creation of word lists and lexica and an overview of adaptations of the specifications and conceptual representation model from LC-STAR to the new languages. The validation procedure will be presented too. @@ -4621,23 +4621,23 @@ MatthiasEck - StephanVogel - AlexWaibel + StephanVogel + AlexWaibel Communicating Unknown Words in Machine Translation http://www.lrec-conf.org/proceedings/lrec2008/pdf/392_paper.pdf A new approach to handle unknown words in machine translation is presented. The basic idea is to find definitions for the unknown words on the source language side and translate those definitions instead. Only monolingual resources are required, which generally offer a broader coverage than bilingual resources and are available for a large number of languages. In order to use this in a machine translation system definitions are extracted automatically from online dictionaries and encyclopedias. The translated definition is then inserted and clearly marked in the original hypothesis. This is shown to lead to significant improvements in (subjective) translation quality. eck-etal-2008-communicating - PierretteBouillon + PierretteBouillon SoniaHalimi YukieNakao KyokoKanzaki HitoshiIsahara NikosTsourakis MarianneStarlander - Beth AnnHockey - MannyRayner + Beth AnnHockey + MannyRayner Developing Non-<fixed-case>E</fixed-case>uropean Translation Pairs in a Medium-Vocabulary Medical Speech Translation System http://www.lrec-conf.org/proceedings/lrec2008/pdf/443_paper.pdf We describe recent work on MedSLT, a medium-vocabulary interlingua-based medical speech translation system, focussing on issues that arise when handling languages of which the grammar engineer has little or no knowledge. We show how we can systematically create and maintain multiple forms of grammars, lexica and interlingual representations, with some versions being used by language informants, and some by grammar engineers. In particular, we describe the advantages of structuring the interlingua definition as a simple semantic grammar, which includes a human-readable surface form. We show how this allows us to rationalise the process of evaluating translations between languages lacking common speakers, and also makes it possible to create a simple generic tool for debugging to-interlingua translation rules. Examples presented focus on the concrete case of translation between Japanese and Arabic in both directions. @@ -4653,7 +4653,7 @@ perera-etal-2008-clios - TakahiroOno + TakahiroOno HitomiTohyama ShigekiMatsubara Construction and Analysis of Word-level Time-aligned Simultaneous Interpretation Corpus @@ -4664,19 +4664,19 @@ Marie-JeanMeurs FrédéricDuvert - FrédéricBéchet - FabriceLefèvre - Renatode Mori + FrédéricBéchet + FabriceLefèvre + Renatode Mori Semantic Frame Annotation on the <fixed-case>F</fixed-case>rench <fixed-case>MEDIA</fixed-case> corpus http://www.lrec-conf.org/proceedings/lrec2008/pdf/256_paper.pdf This paper introduces a knowledge representation formalism used for annotation of the French MEDIA dialogue corpus in terms of high level semantic structures. The semantic annotation, worked out according to the Berkeley FrameNet paradigm, is incremental and partially automated. We describe an automatic interpretation process for composing semantic structures from basic semantic constituents using patterns involving words and constituents. This process contains procedures which provide semantic compositions and generating frame hypotheses by inference. The MEDIA corpus is a French dialogue corpus recorded using a Wizard of Oz system simulating a telephone server for tourist information and hotel booking. It had been manually transcribed and annotated at the word and semantic constituent levels. These levels support the automatic interpretation process which provides a high level semantic frame annotation. The Frame based Knowledge Source we composed contains Frame definitions and composition rules. We finally provide some results obtained on the automatically-derived annotation. meurs-etal-2008-semantic - NickWebb + NickWebb TingLiu MarkHepple - YorickWilks + YorickWilks Cross-Domain Dialogue Act Tagging http://www.lrec-conf.org/proceedings/lrec2008/pdf/502_paper.pdf We present recent work in the area of Cross-Domain Dialogue Act (DA) tagging. We have previously reported on the use of a simple dialogue act classifier based on purely intra-utterance features - principally involving word n-gram cue phrases automatically generated from a training corpus. Such a classifier performs surprisingly well, rivalling scores obtained using far more sophisticated language modelling techniques. In this paper, we apply these automatically extracted cues to a new annotated corpus, to determine the portability and generality of the cues we learn. @@ -4685,8 +4685,8 @@ NikosTsourakis MariaGeorgescul - PierretteBouillon - MannyRayner + PierretteBouillon + MannyRayner Building Mobile Spoken Dialogue Applications Using Regulus http://www.lrec-conf.org/proceedings/lrec2008/pdf/620_paper.pdf Regulus is an Open Source platform that supports construction of rule-based medium-vocabulary spoken dialogue applications. It has already been used to build several substantial speech-enabled applications, including NASA’s Clarissa procedure navigator and Geneva University’s MedSLT medical speech translator. System like these would be far more useful if they were available on a hand-held device, rather than, as with the present version, on a laptop. In this paper we describe the Open Source framework we have developed, which makes it possible to run Regulus applications on generally available mobile devices, using a distributed client-server architecture that offers transparent and reliable integration with different types of ASR systems. We describe the architecture, an implemented calendar application prototype hosted on a mobile device, and an evaluation. The evaluation shows that performance on the mobile device is as good as performance on a normal desktop PC. @@ -4694,7 +4694,7 @@ ChristianRaymond - Kepa JosebaRodriguez + Kepa JosebaRodriguez GiuseppeRiccardi Active Annotation in the <fixed-case>LUNA</fixed-case> <fixed-case>I</fixed-case>talian Corpus of Spontaneous Dialogues http://www.lrec-conf.org/proceedings/lrec2008/pdf/499_paper.pdf @@ -4705,7 +4705,7 @@ StefanHahn PatrickLehnen ChristianRaymond - HermannNey + HermannNey A Comparison of Various Methods for Concept Tagging for Spoken Language Understanding http://www.lrec-conf.org/proceedings/lrec2008/pdf/749_paper.pdf The extraction of flat concepts out of a given word sequence is usually one of the first steps in building a spoken language understanding (SLU) or dialogue system. This paper explores five different modelling approaches for this task and presents results on a French state-of-the-art corpus, MEDIA. Additionally, two log-linear modelling approaches could be further improved by adding morphologic knowledge. This paper goes beyond what has been reported in the literature. We applied the models on the same training and testing data and used the NIST scoring toolkit to evaluate the experimental results to ensure identical conditions for each of the experiments and the comparability of the results. Using a model based on conditional random fields, we achieve a concept error rate of 11.8% on the MEDIA evaluation corpus. @@ -4713,7 +4713,7 @@ StéphaneHuet - GuillaumeGravier + GuillaumeGravier PascaleSébillot Morphosyntactic Resources for Automatic Speech Recognition http://www.lrec-conf.org/proceedings/lrec2008/pdf/174_paper.pdf @@ -4721,11 +4721,11 @@ huet-etal-2008-morphosyntactic - NicolásMorales + NicolásMorales JavierTejedor JavierGarrido JoséColás - Doroteo T.Toledano + Doroteo T.Toledano <fixed-case>STC</fixed-case>-<fixed-case>TIMIT</fixed-case>: Generation of a Single-channel Telephone Corpus http://www.lrec-conf.org/proceedings/lrec2008/pdf/102_paper.pdf This paper describes a new speech corpus, STC-TIMIT, and discusses the process of design, development and its distribution through LDC. The STC-TIMIT corpus is derived from the widely used TIMIT corpus by sending it through a real and single telephone channel. TIMIT is phonetically balanced, covers the dialectal diversity in continental USA and has been extensively used as a benchmark for speech recognition algorithms, especially in early stages of development. The experimental usability of TIMIT has been increased eventually with the creation of derived corpora, passing the original data through different channels. One such example is the well-known NTIMIT corpus, where the original files in TIMIT are re-recorded after being sent through different telephone calls, resulting in a corpus that characterizes telephone channels in a wide sense. In STC-TIMIT, we followed a similar procedure, but the whole corpus was transmitted in a single telephone call with the goal of obtaining data from a real and yet highly stable telephone channel across the whole corpus. Files in STC-TIMIT are aligned to those of TIMIT with a theoretical precision of 0.125 ms, making TIMIT labels valid for the new corpus. The experimental section presents several results on speech recognition accuracy. @@ -4733,12 +4733,12 @@ EricSanders - AsuncionMoreno + AsuncionMoreno HerbertTropf LynetteMelnar NuritDekel BreannaGillies - NiklasPaulsson + NiklasPaulsson <fixed-case>LILA</fixed-case>: Cellular Telephone Speech Databases from <fixed-case>A</fixed-case>sia http://www.lrec-conf.org/proceedings/lrec2008/pdf/278_paper.pdf The goal of the LILA project was the collection of speech databases over cellular telephone networks of five languages in three Asian countries. Three languages were recorded in India: Hindi by first language speakers, Hindi by second language speakers and Indian English. Furthermore, Mandarin was recorded in China and Korean in South-Korea. The databases are part of the SpeechDat-family and follow the SpeechDat rules in many respects. All databases have been finished and have passed the validation tests. Both Hindi databases and the Korean database will be available to the public for sale. @@ -4792,11 +4792,11 @@ RubénFernández - Luis A.Hernández + Luis A.Hernández EduardoLópez JoséAlcázar GuillermoPortillo - Doroteo T.Toledano + Doroteo T.Toledano Design of a Multimodal Database for Research on Automatic Detection of Severe Apnoea Cases http://www.lrec-conf.org/proceedings/lrec2008/pdf/454_paper.pdf The aim of this paper is to present the design of a multimodal database suitable for research on new possibilities for automatic diagnosis of patients with severe obstructive sleep apnoea (OSA). Early detection of severe apnoea cases can be very useful to give priority to their early treatment optimizing the expensive and time-consuming tests of current diagnosis methods based on full overnight sleep in a hospital. This work is part of an on-going collaborative project between medical and signal processing groups towards the design of a multimodal database as an innovative resource to promote new research efforts on automatic OSA diagnosis through speech and image processing technologies. In this contribution we present the multimodal design criteria derived from the analysis of specific voice properties related to OSA physiological effects as well as from the morphological facial characteristics in apnoea patients. Details on the database structure and data collection methodology are also given as it is intended to be an open resource to promote further research in this field. Finally, preliminary experimental results on automatic OSA voice assessment are presented for the collected speech data in our OSA multimodal database. Standard GMM speaker recognition techniques obtain an overall correct classification rate of 82%. This represents an initial promising result underlining the interest of this research framework and opening further perspectives for improvement using more specific speech and image recognition technologies. @@ -4809,8 +4809,8 @@ TatsuyaKawahara HiroakiNanjo HiromitsuNishizaki - NorihitoYasuda - YoichiYamashita + NorihitoYasuda + YoichiYamashita KatunobuItou Test Collections for Spoken Document Retrieval from Lecture Audio Data http://www.lrec-conf.org/proceedings/lrec2008/pdf/400_paper.pdf @@ -4825,7 +4825,7 @@ TakanoriNishino NorihideKitaoka KatunobuItou - KazuyaTakeda + KazuyaTakeda In-car Speech Data Collection along with Various Multimodal Signals http://www.lrec-conf.org/proceedings/lrec2008/pdf/472_paper.pdf In this paper, a large-scale real-world speech database is introduced along with other multimedia driving data. We designed a data collection vehicle equipped with various sensors to synchronously record twelve-channel speech, three-channel video, driving behavior including gas and brake pedal pressures, steering angles, and vehicle velocities, physiological signals including driver heart rate, skin conductance, and emotion-based sweating on the palms and soles, etc. These multimodal data are collected while driving on city streets and expressways under four different driving task conditions including two kinds of monologues, human-human dialog, and human-machine dialog. We investigated the response timing of drivers against navigator utterances and found that most overlapped with the preceding utterance due to the task characteristics and the features of Japanese. When comparing utterance length, speaking rate, and the filler rate of driver utterances in human-human and human-machine dialogs, we found that drivers tended to use longer and faster utterances with more fillers to talk with humans than machines. @@ -4836,7 +4836,7 @@ SatoruKogure HiromitsuNishizaki KengoOhta - SeiichiNakagawa + SeiichiNakagawa Developing Corpus of <fixed-case>J</fixed-case>apanese Classroom Lecture Speech Contents http://www.lrec-conf.org/proceedings/lrec2008/pdf/524_paper.pdf This paper explains our developing Corpus of Japanese classroom Lecture speech Contents (henceforth, denoted as CJLC). Increasing e-Learning contents demand a sophisticated interactive browsing system for themselves, however, existing tools do not satisfy such a requirement. Many researches including large vocabulary continuous speech recognition and extraction of important sentences against lecture contents are necessary in order to realize the above system. CJLC is designed as their fundamental basis, and consists of speech, transcriptions, and slides that were collected in real university classroom lectures. This paper also explains the difference about disfluency acts between classroom lectures and academic presentations. @@ -4857,8 +4857,8 @@ RichardAdderley ChristianBonkowski TodorGanchev - JoachimKöhler - NikosFakotakis + JoachimKöhler + NikosFakotakis The <fixed-case>M</fixed-case>ove<fixed-case>O</fixed-case>n Motorcycle Speech Corpus http://www.lrec-conf.org/proceedings/lrec2008/pdf/557_paper.pdf A speech and noise corpus dealing with the extreme conditions of the motorcycle environment is developed within the MoveOn project. Speech utterances in British English are recorded and processed approaching the issue of command and control and template driven dialog systems on the motorcycle. The major part of the corpus comprises noisy speech and environmental noise recorded on a motorcycle, but several clean speech recordings in a silent environment are also available. The corpus development focuses on distortion free recordings and accurate descriptions of both recorded speech and noise. Not only speech segments are annotated but also annotation of environmental noise is performed. The corpus is a small-sized speech corpus with about 12 hours of clean and noisy speech utterances and about 30 hours of segments with environmental noise without speech. This paper addresses the motivation and development of the speech corpus and finally presents some statistics and results of the database creation. @@ -4868,7 +4868,7 @@ StavrosNtalampiras IlyasPotamitis TodorGanchev - NikosFakotakis + NikosFakotakis Audio Database in Support of Potentiel Threat and Crisis Situation Management http://www.lrec-conf.org/proceedings/lrec2008/pdf/327_paper.pdf This paper describes a corpus consisting of audio data for automatic space monitoring based solely on the perceived acoustic information. The particular database is created as part of a project aiming at the detection of abnormal events, which lead to life-threatening situations or property damage. The audio corpus is composed of vocal reactions and environmental sounds that are usually encountered in atypical situations. The audio data is composed of three parts: Phase I - professional sound effects collections, Phase II recordings obtained from action and drama movies and Phase III - vocal reactions related to real-world emergency events as retrieved from television, radio broadcast news, documentaries etc. The annotation methodology is given in details along with preliminary classification results and statistical analysis of the dataset regarding Phase I. The main objective of such a dataset is to provide training data for automatic recognition machines that detect hazardous situations and to provide security enhancement in public environments, which otherwise require human supervision. @@ -4876,11 +4876,11 @@ MartineGarnier-Rizet - GillesAdda + GillesAdda FrederikCailliau SylvieGuillemin-Lanne ClaireWaast-Richard - LoriLamel + LoriLamel StephanVanni ClaireWaast-Richard <fixed-case>C</fixed-case>all<fixed-case>S</fixed-case>urf: Automatic Transcription, Indexing and Structuration of Call Center Conversational Speech for Knowledge Extraction and Query by Content @@ -4889,7 +4889,7 @@ garnier-rizet-etal-2008-callsurf - DjamelMostefa + DjamelMostefa ArnaudVallee New Telephone Speech Databases for <fixed-case>F</fixed-case>rench: a Children Database and an optimized Adult Corpus http://www.lrec-conf.org/proceedings/lrec2008/pdf/901_paper.pdf @@ -4916,7 +4916,7 @@ TiitHennoste OlgaGerassimenko RiinaKasterpalu - MareKoit + MareKoit AndrielaRääbis KristaStrandson From Human Communication to Intelligent User Interfaces: Corpora of Spoken <fixed-case>E</fixed-case>stonian @@ -4942,11 +4942,11 @@ brinckmann-etal-2008-german - AntonioBonafonte + AntonioBonafonte JordiAdell IgnasiEsquerra SilviaGallego - AsunciónMoreno + AsunciónMoreno JavierPérez Corpus and Voices for <fixed-case>C</fixed-case>atalan Speech Synthesis http://www.lrec-conf.org/proceedings/lrec2008/pdf/835_paper.pdf @@ -4954,10 +4954,10 @@ bonafonte-etal-2008-corpus - MartineAdda-Decker + MartineAdda-Decker ThomasPellegrini - EricBilinski - GillesAdda + EricBilinski + GillesAdda Developments of “Lëtzebuergesch” Resources for Automatic Speech Processing and Linguistic Studies http://www.lrec-conf.org/proceedings/lrec2008/pdf/855_paper.pdf In the present contribution we start with an overview of the linguistic situation of Luxembourg. We then describe specificities of spoken and written Lëtzebuergesch, with respect to automatic speech processing. Multilingual code-switching and code-mixing, poor writing standardization as compared to languages such as English or French, a large diversity of spoken varieties, together with a limited written production of Lëtzebuergesch language contribute to pose many interesting challenges to automatic speech processing both for speech technologies and linguistic studies. Multilingual filtering has been investigated to sort out Luxembourgish from German and French. Word list coverage and language model perplexity results, using sibling resources collected from the Web, are presented. A phonemic inventory has been adopted for pronunciation dictionary development, a grapheme-phoneme tool has been developed and pronunciation research issues related to the multilingual context are highlighted. Results achieved in resource development allow to envision the realisation of an ASR system. @@ -4965,8 +4965,8 @@ RenaNemoto - IoanaVasilescu - MartineAdda-Decker + IoanaVasilescu + MartineAdda-Decker Speech Errors on Frequently Observed Homophones in <fixed-case>F</fixed-case>rench: Perceptual Evaluation vs Automatic Classification http://www.lrec-conf.org/proceedings/lrec2008/pdf/554_paper.pdf The present contribution aims at increasing our understanding of automatic speech recognition (ASR) errors involving frequent homophone or almost homophone words by confronting them to perceptual results. The long-term aim is to improve acoustic modelling of these items to reduce automatic transcription errors. A first question of interest addressed in this paper is whether homophone words such as “et” (and); and “est” (to be), for which ASR systems rely on language model weights, can be discriminated in a perceptual transcription test with similar n-gram constraints. A second question concerns the acoustic separability of the two homophone words using appropriate acoustic and prosodic attributes. The perceptual test reveals that even though automatic and perceptual errors correlate positively, human listeners deal with local ambiguity more efficiently than the ASR system in conditions which attempt to approximate the information available for decision for a 4-gram language model. The corresponding acoustic analysis shows that the two homophone words may be distinguished thanks to some relevant acoustic and prosodic attributes. A first experiment in automatic classification of the two words using data mining techniques highlights the role of the prosodic (duration and voicing) and contextual information (pauses co-occurrence) in distinguishing the two words. Current results, even though preliminary, suggests that new levels of information, so far unexplored in pronunciations’ modelling for ASR, may be considered in order to efficiently factorize the word variants observed in speech and to improve the automatic speech transcription. @@ -5001,18 +5001,18 @@ bazillon-etal-2008-manual - Antonio MorenoSandoval - Doroteo TorreToledano + Antonio MorenoSandoval + Doroteo TorreToledano Raúlde la Torre - MartaGarrote - José M.Guirao + MartaGarrote + José M.Guirao Developing a Phonemic and Syllabic Frequency Inventory for Spontaneous Spoken Castilian <fixed-case>S</fixed-case>panish and their Comparison to Text-Based Inventories http://www.lrec-conf.org/proceedings/lrec2008/pdf/283_paper.pdf In this paper we present our recent work to develop phonemic and syllabic inventories for Castilian Spanish based on the C-ORAL-ROM corpus, a spontaneous spoken resource with varying degrees of naturalness and in different communicative contexts. These inventories have been developed by means of a phonemic and syllabic automatic transcriptor whose output has been assessed by manually reviewing most of the transcriptions. The inventories include absolute frequencies of occurrence of the different phones and syllables. These frequencies have been contrasted against an inventory extracted from a comparable textual corpus, finding evidence that the available inventories, based mainly on text, do not provide an accurate description of spontaneously spoken Castilian Spanish. sandoval-etal-2008-developing - PetrPollák + PetrPollák JanVolín RadekSkarnitzl Phone Segmentation Tool with Integrated Pronunciation Lexicon and <fixed-case>C</fixed-case>zech Phonetically Labelled Reference Database. @@ -5040,7 +5040,7 @@ JonathanChevelu NellyBarbot - OlivierBoeffard + OlivierBoeffard ArnaudDelhay Comparing Set-Covering Strategies for Optimal Corpus Design http://www.lrec-conf.org/proceedings/lrec2008/pdf/750_paper.pdf @@ -5068,7 +5068,7 @@ ErinFitzgerald - FrederickJelinek + FrederickJelinek Linguistic Resources for Reconstructing Spontaneous Speech Text http://www.lrec-conf.org/proceedings/lrec2008/pdf/874_paper.pdf The output of a speech recognition system is not always ideal for subsequent downstream processing, in part because speakers themselves often make mistakes. A system would accomplish speech reconstruction of its spontaneous speech input if its output were to represent, in flawless, fluent, and content-preserving English, the message that the speaker intended to convey. These cleaner speech transcripts would allow for more accurate language processing as needed for NLP tasks such as machine translation and conversation summarization, which often rely on grammatical input. Recognizing that supervised statistical methods to identify and transform ill-formed areas of the transcript will require richly labeled resources, we have built the Spontaneous Speech Reconstruction corpus. This small corpus of reconstructed and aligned conversational telephone speech transcriptions for the Fisher conversational telephone speech corpus (Strassel and Walker, 2004) was annotated on several levels including string transformations and predicate-argument structure, and will be shared with the linguistic research community. @@ -5091,7 +5091,7 @@ FlorianKoehler - HinrichSchuetze + HinrichSchuetze MichaelaAtterer A Question Answering System for <fixed-case>G</fixed-case>erman. Experiments with Morphological Linguistic Resources http://www.lrec-conf.org/proceedings/lrec2008/pdf/24_paper.pdf @@ -5124,7 +5124,7 @@ tsarfaty-goldberg-2008-word - SonjaBosch + SonjaBosch LaurettePretorius KholisaPodile AxelFleisch @@ -5145,7 +5145,7 @@ SergeSharoff MikhailKopotev - TomažErjavec + TomažErjavec AnnaFeldman DagmarDivjak Designing and Evaluating a <fixed-case>R</fixed-case>ussian Tagset @@ -5164,7 +5164,7 @@ NizarHabash - RyanRoth + RyanRoth Identification of Naturally Occurring Numerical Expressions in <fixed-case>A</fixed-case>rabic http://www.lrec-conf.org/proceedings/lrec2008/pdf/843_paper.pdf In this paper, we define the task of Number Identification in natural context. We present and validate a language-independent semi-automatic approach to quickly building a gold standard for evaluating number identification systems by exploiting hand-aligned parallel data. We also present and extensively evaluate a robust rule-based system for number identification in natural context for Arabic for a variety of number formats and types. The system is shown to have strong performance, achieving, on a blind test, a 94.8% F-score for the task of correctly identifying number expression spans in natural text, and a 92.1% F-score for the task of correctly determining the core numerical value. @@ -5182,23 +5182,23 @@ MehrnoushShamsfard - HakimehFadaee + HakimehFadaee A Hybrid Morphology-Based <fixed-case>POS</fixed-case> Tagger for <fixed-case>P</fixed-case>ersian http://www.lrec-conf.org/proceedings/lrec2008/pdf/875_paper.pdf In many applications of natural language processing (NLP) grammatically tagged corpora are needed. Thus Part of Speech (POS) Tagging is of high importance in the domain of NLP. Many taggers are designed with different approaches to reach high performance and accuracy. These taggers usually deal with inter-word relations and they make use of lexicons. In this paper we present a new tagging algorithm with a hybrid approach. This algorithm combines the features of probabilistic and rule-based taggers to tag Persian unknown words. In contrast with many other tagging algorithms this algorithm deals with the internal structure of the words and it does not need any built in knowledge. The introduced tagging algorithm is domain independent because it uses morphological rules. In this algorithm POS tags are assigned to unknown word with a probability which shows the accuracy of the assigned POS tag. Although this tagger is proposed for Persian, it can be adapted to other languages by applying their morphological rules. shamsfard-fadaee-2008-hybrid - BaskaranSankaran + BaskaranSankaran KalikaBali MonojitChoudhury TanmoyBhattacharya - PushpakBhattacharyya - Girish NathJha - S.Rajendran - K.Saravanan - L.Sobha - K.V.Subbarao + PushpakBhattacharyya + Girish NathJha + S.Rajendran + K.Saravanan + L.Sobha + K.V.Subbarao A Common Parts-of-Speech Tagset Framework for <fixed-case>I</fixed-case>ndian Languages http://www.lrec-conf.org/proceedings/lrec2008/pdf/337_paper.pdf We present a universal Parts-of-Speech (POS) tagset framework covering most of the Indian languages (ILs) following the hierarchical and decomposable tagset schema. In spite of significant number of speakers, there is no workable POS tagset and tagger for most ILs, which serve as fundamental building blocks for NLP research. Existing IL POS tagsets are often designed for a specific language; the few that have been designed for multiple languages cover only shallow linguistic features ignoring linguistic richness and the idiosyncrasies. The new framework that is proposed here addresses these deficiencies in an efficient and principled manner. We follow a hierarchical schema similar to that of EAGLES and this enables the framework to be flexible enough to capture rich features of a language/ language family, even while capturing the shared linguistic structures in a methodical way. The proposed common framework further facilitates the sharing and reusability of scarce resources in these languages and ensures cross-linguistic compatibility. @@ -5206,7 +5206,7 @@ RajatMohanty - PushpakBhattacharyya + PushpakBhattacharyya Lexical Resources for Semantics Extraction http://www.lrec-conf.org/proceedings/lrec2008/pdf/619_paper.pdf In this paper, we report our work on the creation of a number of lexical resources that are crucial for an interlingua based MT from English to other languages. These lexical resources are in the form of sub-categorization frames, verb knowledge bases and rule templates for establishing semantic relations and speech act like attributes. We have created these resources over a long period of time from Oxford Advanced Learners’ Dictionary (OALD) [1], VerbNet [2], Princeton WordNet 2.1 [3], LCS database [4], Penn Tree Bank [5], and XTAG lexicon [6]. On the challenging problem of generating interlingua from domain and structure unrestricted English sentences, we are able to demonstrate that the use of these lexical resources makes a difference in terms of accuracy figures. @@ -5223,7 +5223,7 @@ Ya-MinChou Chu-RenHuang - Jia-FeiHong + Jia-FeiHong The Extended Architecture of Hantology for <fixed-case>J</fixed-case>apan Kanji http://www.lrec-conf.org/proceedings/lrec2008/pdf/429_paper.pdf Chinese writing system is not only used by Chinese but also used by Japanese. The motivation of this paper is to extend the architecture of Hantology which describes the features of Chinese writing system to integrate Japan Kanji and Chinese characters into the same ontology. The problem is Chinese characters adopted by Japan have been changed, thus, the modification of the original architecture of Hantology is needed. A extended architecture consists orthographic, pronunciation, sense and derived lexicon dimensions. is proposed in this paper. The contribution of this study is that the extension architecture of Hantology provides a platform to analyze the variation of Chinese characters used in Japan. The analytic results of variation for a specific Kanji can be integrated into Hantology, so it is easier to study the variation of Chinese characters systematically @@ -5231,7 +5231,7 @@ PetyaOsenova - KirilSimov + KirilSimov EelcoMossel Language Resources for Semantic Document Annotation and Crosslingual Retrieval http://www.lrec-conf.org/proceedings/lrec2008/pdf/478_paper.pdf @@ -5241,7 +5241,7 @@ SanazJabbari BenAllison - LouiseGuthrie + LouiseGuthrie Using a Probabilistic Model of Context to Detect Word Obfuscation http://www.lrec-conf.org/proceedings/lrec2008/pdf/560_paper.pdf This paper proposes a distributional model of word use and word meaning which is derived purely from a body of text, and then applies this model to determine whether certain words are used in or out of context. We suggest that we can view the contexts of words as multinomially distributed random variables. We illustrate how using this basic idea, we can formulate the problem of detecting whether or not a word is used in context as a likelihood ratio test. We also define a measure of semantic relatedness between a word and its context using the same model. We assume that words that typically appear together are related, and thus have similar probability distributions and that words used in an unusual way will have probability distributions which are dissimilar from those of their surrounding context. The relatedness of a word to its context is based on Kullback-Leibler divergence between probability distributions assigned to the constituent words in the given sentence. We employed our methods on a defense-oriented application where certain words are substituted with other words in an intercepted communication. @@ -5249,7 +5249,7 @@ SaraTonelli - EmanuelePianta + EmanuelePianta Frame Information Transfer from <fixed-case>E</fixed-case>nglish to <fixed-case>I</fixed-case>talian http://www.lrec-conf.org/proceedings/lrec2008/pdf/567_paper.pdf We describe an automatic projection algorithm for transferring frame-semantic information from English to Italian texts as a first sep towards the creation of Italian FrameNet. Given an English text with frame information and its Italian translation, we project the annotation in four steps: first the Italian text is parsed, then English-Italian alignment is automatically carried out at word level, then we extract the semantic head for every annotated constituent on the English corpus side and finally we project annotation from English to Italian using aligned semantic heads as bridge. With our work, we point out typical features of the Italian language as regards frame-semantic annotation, in particular we describe peculiarities of Italian that at the moment make the projection task more difficult than in the above-mentioned examples. Besides, we created a gold standard with 987 manually annotated sentences to evaluate the algorithm. @@ -5257,7 +5257,7 @@ JordiCarrera - IreneCastellón + IreneCastellón SalvadorCliment MartaColl-Florit Towards <fixed-case>S</fixed-case>panish Verbs’ Selectional Preferences Automatic Acquisition: Semantic Annotation of the <fixed-case>S</fixed-case>en<fixed-case>S</fixed-case>em Corpus @@ -5267,26 +5267,26 @@ Paula CristinaVaz - David Martinsde Matos - Nuno J.Mamede + David Martinsde Matos + Nuno J.Mamede Using Lexical Acquisition to Enrich a Predicate Argument Reusable Database http://www.lrec-conf.org/proceedings/lrec2008/pdf/627_paper.pdf The work described in this paper aims to enrich the noun classifications of an existing database of lexical resources (de Matos and Ribeiro, 2004) adding missing information such as semantic relations. Relations are extracted from an annotated and manually corrected corpus. Semantic relations added to the database are retrieved from noun-appositive relations found in the corpus. The method uses clustering to generate labeled sets of words with hypernym relations between set label and set elements. vaz-etal-2008-using - ChrisReed + ChrisReed Raquel MochalesPalau GlennRowe - Marie-FrancineMoens + Marie-FrancineMoens Language Resources for Studying Argument http://www.lrec-conf.org/proceedings/lrec2008/pdf/648_paper.pdf This paper describes the development of a written corpus of argumentative reasoning. Arguments in the corpus have been analysed using state of the art techniques from argumentation theory and have been marked up using an open, reusable markup language. A number of the key challenges enountered during the process are explored, and preliminary observations about features such as inter-coder reliability and corpus statistics are discussed. In addition, several examples are offered of how this kind of language resource can be used in linguistic, computational and philosophical research, and in particular, how the corpus has been used to initiate a programme investigating the automatic detection of argumentative structure. reed-etal-2008-language - CosminBejan - SandaHarabagiu + CosminBejan + SandaHarabagiu A Linguistic Resource for Discovering Event Structures and Resolving Event Coreference http://www.lrec-conf.org/proceedings/lrec2008/pdf/734_paper.pdf In this paper, we present a linguistic resource that annotates event structures in texts. We consider an event structure as a collection of events that interact with each other in a given situation. We interpret the interactions between events as event relations. In this regard, we propose and annotate a set of six relations that best capture the concept of event structure. These relations are: subevent, reason, purpose, enablement, precedence and related. A document from this resource can encode multiple event structures and an event structure can be described across multiple documents. In order to unify event structures, we also annotate inter- and intra-document event coreference. Moreover, we provide methodologies for automatic discovery of event structures from texts. First, we group the events that constitute an event structure into event clusters and then, we use supervised learning frameworks to classify the relations that exist between events from the same cluster @@ -5308,7 +5308,7 @@ ruimy-toral-2008-semantic - RiccardoDel Gratta + RiccardoDel Gratta NildaRuimy AntonioToral Simple-Clips ongoing research: more information with less data by implementing inheritance @@ -5364,7 +5364,7 @@ GeorgianaPuşcaşu - Verginica BarbuMititelu + Verginica BarbuMititelu Annotation of <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et Verbs with <fixed-case>T</fixed-case>ime<fixed-case>ML</fixed-case> Event Classes http://www.lrec-conf.org/proceedings/lrec2008/pdf/712_paper.pdf This paper reports on the annotation of all English verbs included in WordNet 2.0 with TimeML event classes. Two annotators assign each verb present in WordNet the most relevant event class capturing most of that verb’s meanings. At the end of the annotation process, inter-annotator agreement is measured using kappa statistics, yielding a kappa value of 0.87. The cases of disagreement between the two independent annotations are clarified by obtaining a third, and in some cases, a fourth opinion, and finally each of the 11,306 WordNet verbs is mapped to a unique event class. The resulted annotation is then employed to automatically assign the corresponding class to each occurrence of a finite or non-finite verb in a given text. The evaluation performed on TimeBank reveals an F-measure of 86.43% achieved for the identification of verbal events, and an accuracy of 85.25% in the task of classifying them into TimeML event classes. @@ -5390,13 +5390,13 @@ VincentVandeghinste PeterDirix InekeSchuurman - StellaMarkantonatou + StellaMarkantonatou SokratisSofianopoulos MarinaVassiliou OlgaYannoutsou ToniBadia MaiteMelero - GemmaBoleda + GemmaBoleda MichaelCarl PaulSchmidt Evaluation of a Machine Translation System for Low Resource Languages: <fixed-case>METIS</fixed-case>-<fixed-case>II</fixed-case> @@ -5405,8 +5405,8 @@ vandeghinste-etal-2008-evaluation - Marta R.Costa-jussà - José A. R.Fonollosa + Marta R.Costa-jussà + José A. R.Fonollosa EnricMonte Using Reordering in Statistical Machine Translation based on Alignment Block Classification http://www.lrec-conf.org/proceedings/lrec2008/pdf/444_paper.pdf @@ -5414,7 +5414,7 @@ costa-jussa-etal-2008-using - Janne BondiJohannessen + Janne BondiJohannessen TorbjørnNordgård LarsNygaard Evaluation of Linguistics-Based Translation @@ -5444,7 +5444,7 @@ ma-etal-2008-selection - BeátaMegyesi + BeátaMegyesi BengtDahlqvist EvaPettersson JoakimNivre @@ -5483,14 +5483,14 @@ SvitlanaKurella SergeSharoff - AnthonyHartley + AnthonyHartley Corpus-Based Tools for Computer-Assisted Acquisition of Reading Abilities in Cognate Languages http://www.lrec-conf.org/proceedings/lrec2008/pdf/479_paper.pdf This paper presents an approach to computer-assisted teaching of reading abilities using corpus data. The approach is supported by a set of tools for automatically selecting and classifying texts retrieved from the Internet. The approach is based on a linguistic model of textual cohesion which describes relations between larger textual units that go beyond the sentence level. We show that textual connectors that link such textual units reliably predict different types of texts, such as “information” and “opinion”: using only textual connectors as features, an SVM classifier achieves an F-score of between 0.85 and 0.93 for predicting these classes. The tools are used in our project on teaching reading skills in a cognate foreign language (L3) which is cognate to a known foreign language (L2). kurella-etal-2008-corpus - JörgTiedemann + JörgTiedemann Synchronizing Translated Movie Subtitles http://www.lrec-conf.org/proceedings/lrec2008/pdf/484_paper.pdf This paper addresses the problem of synchronizing movie subtitles, which is necessary to improve alignment quality when building a parallel corpus out of translated subtitles. In particular, synchronization is done on the basis of aligned anchor points. Previous studies have shown that cognate filters are useful for the identification of such points. However, this restricts the approach to related languages with similar alphabets. Here, we propose a dictionary-based approach using automatic word alignment. We can show an improvement in alignment quality even for related languages compared to the cognate-based approach. @@ -5506,7 +5506,7 @@ ViolainePrince - JacquesChauché + JacquesChauché Building a Bilingual Representation of the <fixed-case>R</fixed-case>oget Thesaurus for <fixed-case>F</fixed-case>rench to <fixed-case>E</fixed-case>nglish Machine Translation http://www.lrec-conf.org/proceedings/lrec2008/pdf/626_paper.pdf This paper describes a solution to lexical transfer as a trade-off between a dictionary and an ontology. It shows its association to a translation tool based on morpho-syntactical parsing of the source language. It is based on the English Roget Thesaurus and its equivalent, the French Larousse Thesaurus, in a computational framework. Both thesaurii are transformed into vector spaces, and all monolingual entries are represented as vectors, with 1,000 components for English and 873 for French. The indexing concepts of the respective thesaurii are the generation families of the vector spaces. A bilingual data structure transforms French entries into vectors in the English space, by using their equivalencies representations. Word sense disambiguation consists in choosing the appropriate vector among these “bilingual” vectors, by computing the contextualized vector of a given word in its source sentence, wading it in the English vector space, and computing the closest distance to the different entries in the bilingual data structure beginning with the same source string (i.e. French word). The process has been experimented on a 20,000 words extract of a French novel, Le Petit Prince, and lexical transfer results were found quite encouraging with a recall of 71% and a precision of 86%. @@ -5514,7 +5514,7 @@ LukaNerima - EricWehrli + EricWehrli Generating Bilingual Dictionaries by Transitivity http://www.lrec-conf.org/proceedings/lrec2008/pdf/641_paper.pdf Recently the LATL has undertaken the development of a multilingual translation system based on a symbolic parsing technology and on a transfer-based translation model. A crucial component of the system is the lexical database, notably the bilingual dictionaries containing the information for the lexical transfer from one language to another. As the number of necessary bilingual dictionaries is a quadratic function of the number of languages considered, we will face the problem of getting a large number of dictionaries. In this paper we discuss a solution to derive a bilingual dictionary by transitivity using existing ones and to check the generated translations in a parallel corpus. Our first experiments concerns the generation of two bilingual dictionaries and the quality of the entries are very promising. The number of generated entries could however be improved and we conclude the paper with the possible ways we plan to explore. @@ -5531,14 +5531,14 @@ ChristianMonson - AriadnaFont Llitjós + AriadnaFont Llitjós VamshiAmbati - LoriLevin - AlonLavie + LoriLevin + AlonLavie AlisonAlvarez RobertoAranovich - JaimeCarbonell - RobertFrederking + JaimeCarbonell + RobertFrederking ErikPeterson KatharinaProbst Linguistic Structure and Bilingual Informants Help Induce Machine Translation of Lesser-Resourced Languages @@ -5549,7 +5549,7 @@ KazuakiMaeda XiaoyiMa - StephanieStrassel + StephanieStrassel Creating Sentence-Aligned Parallel Text Corpora from a Large Archive of Potential Parallel Text using <fixed-case>BITS</fixed-case> and Champollion http://www.lrec-conf.org/proceedings/lrec2008/pdf/779_paper.pdf Parallel text is one of the most valuable resources for development of statistical machine translation systems and other NLP applications. The Linguistic Data Consortium (LDC) has supported research on statistical machine translations and other NLP applications by creating and distributing a large amount of parallel text resources for the research communities. However, manual translations are very costly, and the number of known providers that offer complete parallel text is limited. This paper presents a cost effective approach to identify parallel document pairs from sources that provide potential parallel text - namely, sources that may contain whole or partial translations of documents in the source language - using the BITS and Champollion parallel text alignment systems developed by LDC. @@ -5570,7 +5570,7 @@ WolodjaWentland JohannesKnopp CarinaSilberer - MatthiasHartung + MatthiasHartung Building a Multilingual Lexical Resource for Named Entity Disambiguation, Translation and Transliteration http://www.lrec-conf.org/proceedings/lrec2008/pdf/816_paper.pdf In this paper, we present HeiNER, the multilingual Heidelberg Named Entity Resource. HeiNER contains 1,547,586 disambiguated English Named Entities together with translations and transliterations to 15 languages. Our work builds on the approach described in (Bunescu and Pasca, 2006), yet extends it to a multilingual dimension. Translating Named Entities into the various target languages is carried out by exploiting crosslingual information contained in the online encyclopedia Wikipedia. In addition, HeiNER provides linguistic contexts for every NE in all target languages which makes it a valuable resource for multilingual Named Entity Recognition, Disambiguation and Classification. The results of our evaluation against the assessments of human annotators yield a high precision of 0.95 for the NEs we extract from the English Wikipedia. These source language NEs are thus very reliable seeds for our multilingual NE translation method. @@ -5585,17 +5585,17 @@ TodorArnaudov - RuslanMitkov + RuslanMitkov Smarty - Extendable Framework for Bilingual and Multilingual Comprehension Assistants http://www.lrec-conf.org/proceedings/lrec2008/pdf/826_paper.pdf This paper discusses a framework for development of bilingual and multilingual comprehension assistants and presents a prototype implementation of an English-Bulgarian comprehension assistant. The framework is based on the application of advanced graphical user interface techniques, WordNet and compatible lexical databases as well as a series of NLP preprocessing tasks, including POS-tagging, lemmatisation, multiword expressions recognition and word sense disambiguation. The aim of this framework is to speed up the process of dictionary look-up, to offer enhanced look-up functionalities and to perform a context-sensitive narrowing-down of the set of translation alternatives proposed to the user. arnaudov-mitkov-2008-smarty - PéterHalácsy + PéterHalácsy AndrásKornai PéterNémeth - DánielVarga + DánielVarga Parallel Creation of <fixed-case>G</fixed-case>igaword Corpora for Medium Density Languages - an Interim Report http://www.lrec-conf.org/proceedings/lrec2008/pdf/858_paper.pdf For increased speed in developing gigaword language resources for medium resource density languages we integrated several FOSS tools in the HUN* toolkit. While the speed and efficiency of the resulting pipeline has surpassed our expectations, our experience in developing LDC-style resource packages for Uzbek and Kurdish makes clear that neither the data collection nor the subsequent processing stages can be fully automated. @@ -5604,14 +5604,14 @@ ReginaldHobbs JamalLaoudi - ClareVoss + ClareVoss <fixed-case>MT</fixed-case>riage: Web-enabled Software for the Creation, Machine Translation, and Annotation of Smart Documents http://www.lrec-conf.org/proceedings/lrec2008/pdf/844_paper.pdf Progress in the Machine Translation (MT) research community, particularly for statistical approaches, is intensely data-driven. Acquiring source language documents for testing, creating training datasets for customized MT lexicons, and building parallel corpora for MT evaluation require translators and non-native speaking analysts to handle large document collections. These collections are further complicated by differences in format, encoding, source media, and access to metadata describing the documents. Automated tools that allow language professionals to quickly annotate, translate, and evaluate foreign language documents are essential to improving MT quality and efficacy. The purpose of this paper is present our research approach to improving MT through pre-processing source language documents. In particular, we will discuss the development and use of MTriage, an application environment that enables the translator to markup documents with metadata for MT parameterization and routing. The use of MTriage as a web-enabled front end to multiple MT engines has leveraged the capabilities of our human translators for creating lexicons from NFW (Not-Found-Word) lists, writing reference translations, and creating parallel corpora for MT development and evaluation. hobbs-etal-2008-mtriage - ClareVoss + ClareVoss JamalLaoudi JeffreyMicher Exploitation of an <fixed-case>A</fixed-case>rabic Language Resource for Machine Translation Evaluation: using <fixed-case>B</fixed-case>uckwalter-based Lookup Tool to Augment <fixed-case>CMU</fixed-case> Alignment Algorithm @@ -5653,7 +5653,7 @@ santos-etal-2008-whats - BeataTrawiński + BeataTrawiński Jan-PhilippSoehn A Multilingual Database of Polarity Items http://www.lrec-conf.org/proceedings/lrec2008/pdf/77_paper.pdf @@ -5662,7 +5662,7 @@ Ernesto WilliamDe Luca - BirteLönneker-Rodman + BirteLönneker-Rodman Integrating Metaphor Information into <fixed-case>RDF</fixed-case>/<fixed-case>OWL</fixed-case> <fixed-case>E</fixed-case>uro<fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et http://www.lrec-conf.org/proceedings/lrec2008/pdf/108_paper.pdf In this paper, we discuss the integration of metaphor information into the RDF/OWL representation of EuroWordNet. First, the lexical database WordNet and its variants are presented. After a brief description of the Hamburg Metaphor Database, examples of its conversion into the RDF/OWL representation of EuroWordNet are discussed. The metaphor information is added to the general EuroWordNet data and the new resulting RDF/OWL structure is shown in LexiRes, a visualization tool developed and adapted for handling structures of ontological and lexical databases. We show how LexiRes can be used to further edit the newly added metaphor information, and explain some problems with this new type of information on the basis of examples. @@ -5678,8 +5678,8 @@ JuanAparicio - MarionaTaulé - M. AntòniaMartí + MarionaTaulé + M. AntòniaMartí <fixed-case>A</fixed-case>n<fixed-case>C</fixed-case>ora-Verb: A Lexical Resource for the Semantic Annotation of Corpora http://www.lrec-conf.org/proceedings/lrec2008/pdf/203_paper.pdf In this paper we present two large-scale verbal lexicons, AnCora-Verb-Ca for Catalan and AnCora-Verb-Es for Spanish, which are the basis for the semantic annotation with arguments and thematic roles of AnCora corpora. In AnCora-Verb lexicons, the mapping between syntactic functions, arguments and thematic roles of each verbal predicate it is established taking into account the verbal semantic class and the diatheses alternations in which the predicate can participate. Each verbal predicate is related to one or more semantic classes basically differentiated according to the four event classes -accomplishments, achievements, states and activities-, and on the diatheses alternations in which a verb can occur. AnCora-Verb-Es contains a total of 1,965 different verbs corresponding to 3,671 senses and AnCora-Verb-Ca contains 2,151 verbs and 4,513 senses. These figures correspond to the total of 500,000 words contained in each corpus, AnCora-Ca and AnCora-Es. The lexicons and the annotated corpora constitute the richest linguistic resources of this kind freely available for Spanish and Catalan. The big amount of linguistic information contained in both resources should be of great interest for computational applications and linguistic studies. Currently, a consulting interface for these lexicons is available at (http://clic.ub.edu/ancora/). @@ -5709,9 +5709,9 @@ furstenau-2008-enriching - Bento CarlosDias-da-Silva - ArianiDi Felippo - Mariadas Graças Volpe Nunes + Bento CarlosDias-da-Silva + ArianiDi Felippo + Mariadas Graças Volpe Nunes The Automatic Mapping of <fixed-case>P</fixed-case>rinceton <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et Lexical-Conceptual Relations onto the <fixed-case>B</fixed-case>razilian <fixed-case>P</fixed-case>ortuguese <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et Database http://www.lrec-conf.org/proceedings/lrec2008/pdf/391_paper.pdf Princeton WordNet (WN.Pr) lexical database has motivated efficient compilations of bulky relational lexicons since its inception in the 1980´s. The EuroWordNet project, the first multilingual initiative built upon WN.Pr, opened up ways of building individual wordnets, and inter-relating them by means of the so-called Inter-Lingual-Index, an unstructured list of the WN.Pr synsets. Other important initiative, relying on a slightly different method of building multilingual wordnets, is the MultiWordNet project, where the key strategy is building language specific wordnets keeping as much as possible of the semantic relations available in the WN.Pr. This paper, in particular, stresses that the additional advantage of using WN.Pr lexical database as a resource for building wordnets for other languages is to explore possibilities of implementing an automatic procedure to map the WN.Pr conceptual relations as hyponymy, co-hyponymy, troponymy, meronymy, cause, and entailment onto the lexical database of the wordnet under construction, a viable possibility, for those are language-independent relations that hold between lexicalized concepts, not between lexical units. Accordingly, combining methods from both initiatives, this paper presents the ongoing implementation of the WN.Br lexical database and the aforementioned automation procedure illustrated with a sample of the automatic encoding of the hyponymy and co-hyponymy relations. @@ -5727,7 +5727,7 @@ EviMarzelou MariaZourari - VoulaGiouli + VoulaGiouli SteliosPiperidis Building a <fixed-case>G</fixed-case>reek corpus for Textual Entailment http://www.lrec-conf.org/proceedings/lrec2008/pdf/427_paper.pdf @@ -5754,7 +5754,7 @@ DavidePicca - Alfio MassimilianoGliozzo + Alfio MassimilianoGliozzo MassimilianoCiaramita Supersense Tagger for <fixed-case>I</fixed-case>talian http://www.lrec-conf.org/proceedings/lrec2008/pdf/599_paper.pdf @@ -5762,7 +5762,7 @@ picca-etal-2008-supersense - Maria TeresaPazienza + Maria TeresaPazienza ArmandoStellato Clustering of Terms from Translation Dictionaries and Synonyms Lists to Automatically Build more Structured Linguistic Resources http://www.lrec-conf.org/proceedings/lrec2008/pdf/629_paper.pdf @@ -5782,7 +5782,7 @@ AttilaAlmási DóraSzauter RóbertOrmándi - RichárdFarkas + RichárdFarkas CsabaHatvani JánosCsirik <fixed-case>H</fixed-case>ungarian Word-Sense Disambiguated Corpus @@ -5791,7 +5791,7 @@ vincze-etal-2008-hungarian - Olga N.Lashevskaja + Olga N.Lashevskaja Olga Yu.Shemanaeva Semantic Annotation Layer in <fixed-case>R</fixed-case>ussian National Corpus: Lexical Classes of Nouns and Adjectives http://www.lrec-conf.org/proceedings/lrec2008/pdf/849_paper.pdf @@ -5799,9 +5799,9 @@ lashevskaja-shemanaeva-2008-semantic - MohamedAttia - MohsenRashwan - AhmedRagheb + MohamedAttia + MohsenRashwan + AhmedRagheb MohamedAl-Badrashiny HuseinAl-Basoumy A Compact <fixed-case>A</fixed-case>rabic Lexical Semantics Language Resource Based on the Theory of Semantic Fields @@ -5811,7 +5811,7 @@ DoaaSamy - AnaGonzález-Ledesma + AnaGonzález-Ledesma Pragmatic Annotation of Discourse Markers in a Multilingual Parallel Corpus (<fixed-case>A</fixed-case>rabic- <fixed-case>S</fixed-case>panish-<fixed-case>E</fixed-case>nglish) http://www.lrec-conf.org/proceedings/lrec2008/pdf/828_paper.pdf Discourse structure and coherence relations are one of the main inferential challenges addressed by computational pragmatics. The present study focuses on discourse markers as key elements in guiding the inferences of the statements in natural language. Through a rule-based approach for the automatic identification, classification and annotation of the discourse markers in a multilingual parallel corpus (Arabic-Spanish-English), this research provides a valuable resource for the community. Two main aspects define the novelty of the present study. First, it offers a multilingual computational processing of discourse markers, grounded on a theoritical framework and implemented in a XML tagging scheme. The XML scheme represents a set of pragmatic and grammatical attributes, considered as basic features for the different kinds of discourse markers. Besides, the scheme provides a typology of discourse markers based on their discursive functions including hypothesis, co-argumentation, cause, consequence, concession, generalization, topicalization, reformulation, enumeration, synthesis, etc. Second, Arabic language is addressed from a computational pragmatic perspective where the identification, classification and annotation processes are carried out using the information provided from the tagging of Spanish discourse markers and the alignments. @@ -5830,7 +5830,7 @@ JonasSjöbergh - KenjiAraki + KenjiAraki A Multi-Lingual Dictionary of Dirty Words http://www.lrec-conf.org/proceedings/lrec2008/pdf/133_paper.pdf We present a multi-lingual dictionary of dirty words. We have collected about 3,200 dirty words in several languages and built a database of these. The language with the most words in the database is English, though there are several hundred dirty words in for instance Japanese too. Words are classified into their general meaning, such as what part of the human anatomy they refer to. Words can also be assigned a nuance label to indicate if it is a cute word used when speaking to children, a very rude word, a clinical word etc. The database is available online and will hopefully be enlarged over time. It has already been used in research on for instance automatic joke generation and emotion detection. @@ -5838,7 +5838,7 @@ JonasSjöbergh - KenjiAraki + KenjiAraki What is poorly Said is a Little Funny http://www.lrec-conf.org/proceedings/lrec2008/pdf/79_paper.pdf We implement several different methods for generating jokes in English. The common theme is to intentionally produce poor utterances by breaking Grice’s maxims of conversation. The generated jokes are evaluated and compared to human made jokes. They are in general quite weak jokes, though there are a few high scoring jokes and many jokes that score higher than the most boring human joke. @@ -5854,7 +5854,7 @@ RuifengXu YunqingXia - Kam-FaiWong + Kam-FaiWong WenjieLi Opinion Annotation in On-line <fixed-case>C</fixed-case>hinese Product Reviews http://www.lrec-conf.org/proceedings/lrec2008/pdf/415_paper.pdf diff --git a/data/xml/L10.xml b/data/xml/L10.xml index 625018ca29..98918c3e10 100644 --- a/data/xml/L10.xml +++ b/data/xml/L10.xml @@ -3,14 +3,14 @@ Proceedings of the Seventh International Conference on Language Resources and Evaluation (LREC'10) - NicolettaCalzolari - KhalidChoukri - BenteMaegaard - JosephMariani - JanOdijk - SteliosPiperidis - MikeRosner - DanielTapias + NicolettaCalzolari + KhalidChoukri + BenteMaegaard + JosephMariani + JanOdijk + SteliosPiperidis + MikeRosner + DanielTapias European Language Resources Association (ELRA)
Valletta, Malta
May @@ -30,11 +30,11 @@ dalianis-etal-2010-creating
- LluísPadró + LluísPadró MiquelCollado SamuelReese MarinaLloberes - IreneCastellón + IreneCastellón <fixed-case>F</fixed-case>ree<fixed-case>L</fixed-case>ing 2.1: Five Years of Open-source Language Processing Tools http://www.lrec-conf.org/proceedings/lrec2010/pdf/14_Paper.pdf FreeLing is an open-source multilingual language processing library providing a wide range of language analyzers for several languages. It offers text processing and language annotation facilities to natural language processing application developers, simplifying the task of building those applications. FreeLing is customizable and extensible. Developers can use the default linguistic resources (dictionaries, lexicons, grammars, etc.) directly, or extend them, adapt them to specific domains, or even develop new ones for specific languages. This paper overviews the recent history of this tool, summarizes the improvements and extensions incorporated in the latest version, and depicts the architecture of the library. Special focus is brought to the fact and consequences of the library being open-source: After five years and over 35,000 downloads, a growing user community has extended the initial threelanguages (English, Spanish and Catalan) to eight (adding Galician, Italian, Welsh, Portuguese, and Asturian), proving that the collaborative open model is a productive approach for the development of NLP tools and resources. @@ -66,8 +66,8 @@ RobertoNavigli - PaolaVelardi - Juana MariaRuiz-Martínez + PaolaVelardi + Juana MariaRuiz-Martínez An Annotated Dataset for Extracting Definitions and Hypernyms from the Web http://www.lrec-conf.org/proceedings/lrec2010/pdf/20_Paper.pdf This paper presents and analyzes an annotated corpus of definitions, created to train an algorithm for the automatic extraction of definitions and hypernyms from web documents. As an additional resource, we also include a corpus of non-definitions with syntactic patterns similar to those of definition sentences, e.g.: ""An android is a robot"" vs. ""Snowcap is unmistakable"". Domain and style independence is obtained thanks to the annotation of a large and domain-balanced corpus and to a novel pattern generalization algorithm based on word-class lattices (WCL). A lattice is a directed acyclic graph (DAG), a subclass of nondeterministic finite state automata (NFA). The lattice structure has the purpose of preserving the salient differences among distinct sequences, while eliminating redundant information. The WCL algorithm will be integrated into an improved version of the GlossExtractor Web application (Velardi et al., 2008). This paper is mostly concerned with a description of the corpus, the annotation strategy, and a linguistic analysis of the data. A summary of the WCL algorithm is also provided for the sake of completeness. @@ -82,8 +82,8 @@ khokhlova-zakharov-2010-studying - Marta R.Costa-jussà - José A. R.Fonollosa + Marta R.Costa-jussà + José A. R.Fonollosa Using Linear Interpolation and Weighted Reordering Hypotheses in the <fixed-case>M</fixed-case>oses System http://www.lrec-conf.org/proceedings/lrec2010/pdf/23_Paper.pdf This paper proposes to introduce a novel reordering model in the open-source Moses toolkit. The main idea is to provide weighted reordering hypotheses to the SMT decoder. These hypotheses are built using a first-step Ngram-based SMT translation from a source language into a third representation that is called reordered source language. Each hypothesis has its own weight provided by the Ngram-based decoder. This proposed reordering technique offers a better and more efficient translation when compared to both the distance-based and the lexicalized reordering. In addition to this reordering approach, this paper describes a domain adaptation technique which is based on a linear combination of an specific in-domain and an extra out-domain translation models. Results for both approaches are reported in the Arabic-to-English 2008 IWSLT task. When implementing the weighted reordering hypotheses and the domain adaptation technique in the final translation system, translation results reach improvements up to 2.5 BLEU compared to a standard state-of-the-art Moses baseline system. @@ -98,7 +98,7 @@ AntoinetteHawayek - RiccardoDel Gratta + RiccardoDel Gratta GiuseppeCappelli A Bilingual Dictionary <fixed-case>M</fixed-case>exican <fixed-case>S</fixed-case>ign <fixed-case>L</fixed-case>anguage-<fixed-case>S</fixed-case>panish/<fixed-case>S</fixed-case>panish-<fixed-case>M</fixed-case>exican <fixed-case>S</fixed-case>ign <fixed-case>L</fixed-case>anguage http://www.lrec-conf.org/proceedings/lrec2010/pdf/27_Paper.pdf @@ -115,7 +115,7 @@ sharoff-etal-2010-web - Hans-UlrichKrieger + Hans-UlrichKrieger A General Methodology for Equipping Ontologies with Time http://www.lrec-conf.org/proceedings/lrec2010/pdf/29_Paper.pdf In the first part of this paper, we present a framework for enriching arbitrary upper or domain-specific ontologies with a concept of time. To do so, we need the notion of a time slice. Contrary to other approaches, we directly interpret the original entities as time slices in order to (i) avoid a duplication of the original ontology and (ii) to prevent a knowledge engineer from ontology rewriting. The diachronic representation of time is complemented by a sophisticated time ontology that supports underspecification and an arbitrarily fine granularity of time. As a showcase, we describe how the time ontology has been interfaced with the PROTON upper ontology. The second part investigates a temporal extension of RDF that replaces the usual triple notation by a more general tuple representation. In this setting, Hayes/ter Horst-like entailment rules are replaced by their temporal counterparts. Our motivation to move towards this direction is twofold: firstly, extending binary relation instances with time leads to a massive proliferation of useless objects (independently of the encoding); secondly, reasoning and querying with such extended relations is extremely complex, expensive, and error-prone. @@ -124,20 +124,20 @@ TingQian KristyHollingshead - Su-younYoon - Kyoung-youngKim - RichardSproat + Su-younYoon + Kyoung-youngKim + RichardSproat A Python Toolkit for Universal Transliteration http://www.lrec-conf.org/proceedings/lrec2010/pdf/30_Paper.pdf We describe ScriptTranscriber, an open source toolkit for extracting transliterations in comparable corpora from languages written in different scripts. The system includes various methods for extracting potential terms of interest from raw text, for providing guesses on the pronunciations of terms, and for comparing two strings as possible transliterations using both phonetic and temporal measures. The system works with any script in the Unicode Basic Multilingual Plane and is easily extended to include new modules. Given comparable corpora, such as newswire text, in a pair of languages that use different scripts, ScriptTranscriber provides an easy way to mine transliterations from the comparable texts. This is particularly useful for underresourced languages, where training data for transliteration may be lacking, and where it is thus hard to train good transliterators. ScriptTranscriber provides an open source package that allows for ready incorporation of more sophisticated modules ― e.g. a trained transliteration model for a particular language pair. ScriptTranscriber is available as part of the nltk contrib source tree at http://code.google.com/p/nltk/. qian-etal-2010-python - K. BretonnelCohen - ChristopheRoeder - William A.Baumgartner Jr. - Lawrence E.Hunter - KarinVerspoor + K. BretonnelCohen + ChristopheRoeder + William A.Baumgartner Jr. + Lawrence E.Hunter + KarinVerspoor Test Suite Design for Biomedical Ontology Concept Recognition Systems http://www.lrec-conf.org/proceedings/lrec2010/pdf/31_Paper.pdf Systems that locate mentions of concepts from ontologies in free text are known as ontology concept recognition systems. This paper describes an approach to the evaluation of the workings of ontology concept recognition systems through use of a structured test suite and presents a publicly available test suite for this purpose. It is built using the principles of descriptive linguistic fieldwork and of software testing. More broadly, we also seek to investigate what general principles might inform the construction of such test suites. The test suite was found to be effective in identifying performance errors in an ontology concept recognition system. The system could not recognize 2.1% of all canonical forms and no non-canonical forms at all. Regarding the question of general principles of test suite construction, we compared this test suite to a named entity recognition test suite constructor. We found that they had twenty features in total and that seven were shared between the two models, suggesting that there is a core of feature types that may be applicable to test suite construction for any similar type of application. @@ -145,7 +145,7 @@ ElsLefever - VéroniqueHoste + VéroniqueHoste Construction of a Benchmark Data Set for Cross-lingual Word Sense Disambiguation http://www.lrec-conf.org/proceedings/lrec2010/pdf/34_Paper.pdf Given the recent trend to evaluate the performance of word sense disambiguation systems in a more application-oriented set-up, we report on the construction of a multilingual benchmark data set for cross-lingual word sense disambiguation. The data set was created for a lexical sample of 25 English nouns, for which translations were retrieved in 5 languages, namely Dutch, German, French, Italian and Spanish. The corpus underlying the sense inventory was the parallel data set Europarl. The gold standard sense inventory was based on the automatic word alignments of the parallel corpus, which were manually verified. The resulting word alignments were used to perform a manual clustering of the translations over all languages in the parallel corpus. The inventory then served as input for the annotators of the sentences, who were asked to provide a maximum of three contextually relevant translations per language for a given focus word. The data set was released in the framework of the SemEval-2010 competition. @@ -163,7 +163,7 @@ ClausZinn - PeterWittenburg + PeterWittenburg JacquelijnRingersma An Evolving e<fixed-case>S</fixed-case>cience Environment for Research Data in Linguistics http://www.lrec-conf.org/proceedings/lrec2010/pdf/36_Paper.pdf @@ -200,7 +200,7 @@ rentoumi-etal-2010-united - NúriaBel + NúriaBel Handling of Missing Values in Lexical Acquisition http://www.lrec-conf.org/proceedings/lrec2010/pdf/45_Paper.pdf In this work we propose a strategy to reduce the impact of the sparse data problem in the tasks of lexical information acquisition based on the observation of linguistic cues. We propose a way to handle the uncertainty created by missing values, that is, when a zero value could mean either that the cue has not been observed because the word in question does not belong to the class, i.e. negative evidence, or that the word in question has just not been observed in the context sought by chance, i.e. lack of evidence. This uncertainty creates problems to the learner, because zero values for incompatible labelled examples make the cue lose its predictive capacity and even though some samples display the sought context, it is not taken into account. In this paper we present the results of our experiments to try to reduce this uncertainty by, as other authors do (Joanis et al. 2007, for instance), substituting zero values for pre-processed estimates. Here we present a first round of experiments that have been the basis for the estimates of linguistic information motivated by lexical classes. We obtained experimental results that show a clear benefit of the proposed approach. @@ -215,17 +215,17 @@ carlsson-dalianis-2010-influence - Marta R.Costa-jussà + Marta R.Costa-jussà MireiaFarrús - José B.Mariño - José A. R.Fonollosa + José B.Mariño + José A. R.Fonollosa Automatic and Human Evaluation Study of a Rule-based and a Statistical <fixed-case>C</fixed-case>atalan-<fixed-case>S</fixed-case>panish Machine Translation Systems http://www.lrec-conf.org/proceedings/lrec2010/pdf/47_Paper.pdf Machine translation systems can be classified into rule-based and corpus-based approaches, in terms of their core technology. Since both paradigms have largely been used during the last years, one of the aims in the research community is to know how these systems differ in terms of translation quality. To this end, this paper reports a study and comparison of a rule-based and a corpus-based (particularly, statistical) Catalan-Spanish machine translation systems, both of them freely available in the web. The translation quality analysis is performed under two different domains: journalistic and medical. The systems are evaluated by using standard automatic measures, as well as by native human evaluators. Automatic results show that the statistical system performs better than the rule-based system. Human judgements show that in the Spanish-to-Catalan direction the statistical system also performs better than the rule-based system, while in the Catalan-to-Spanish direction is the other way round. Although the statistical system obtains the best automatic scores, its errors tend to be more penalized by human judgements than the errors of the rule-based system. This can be explained because statistical errors are usually unexpected and they do not follow any pattern. costa-jussa-etal-2010-automatic - Anil KumarSingh + Anil KumarSingh Bharat RamAmbati An Integrated Digital Tool for Accessing Language Resources http://www.lrec-conf.org/proceedings/lrec2010/pdf/48_Paper.pdf @@ -249,18 +249,18 @@ benajiba-zitouni-2010-arabic - JamesPustejovsky + JamesPustejovsky KiyongLee - HarryBunt - LaurentRomary + HarryBunt + LaurentRomary <fixed-case>ISO</fixed-case>-<fixed-case>T</fixed-case>ime<fixed-case>ML</fixed-case>: An International Standard for Semantic Annotation http://www.lrec-conf.org/proceedings/lrec2010/pdf/55_Paper.pdf In this paper, we present ISO-TimeML, a revised and interoperable version of the temporal markup language, TimeML. We describe the changes and enrichments made, while framing the effort in a more general methodology of semantic annotation. In particular, we assume a principled distinction between the annotation of an expression and the representation which that annotation denotes. This involves not only the specification of an annotation language for a particular phenomenon, but also the development of a meta-model that allows one to interpret the syntactic expressions of the specification semantically. pustejovsky-etal-2010-iso - RankaStanković - IvanObradović + RankaStanković + IvanObradović OliveraKitanović <fixed-case>GIS</fixed-case> Application Improvement with Multilingual Lexical and Terminological Resources http://www.lrec-conf.org/proceedings/lrec2010/pdf/57_Paper.pdf @@ -268,8 +268,8 @@ stankovic-etal-2010-gis - NathanaelChambers - DanJurafsky + NathanaelChambers + DanJurafsky A Database of Narrative Schemas http://www.lrec-conf.org/proceedings/lrec2010/pdf/58_Paper.pdf This paper describes a new language resource of events and semantic roles that characterize real-world situations. Narrative schemas contain sets of related events (edit and publish), a temporal ordering of the events (edit before publish), and the semantic roles of the participants (authors publish books). This type of world knowledge was central to early research in natural language understanding, scripts being one of the main formalisms, they represented common sequences of events that occur in the world. Unfortunately, most of this knowledge was hand-coded and time consuming to create. Current machine learning techniques, as well as a new approach to learning through coreference chains, has allowed us to automatically extract rich event structure from open domain text in the form of narrative schemas. The narrative schema resource described in this paper contains approximately 5000 unique events combined into schemas of varying sizes. We describe the resource, how it is learned, and a new evaluation of the coverage of these schemas over unseen documents. @@ -292,15 +292,15 @@ proisl-kabashi-2010-using - XabierSaralegi - MaddalenLopez de Lacalle + XabierSaralegi + MaddalenLopez de Lacalle Dictionary and Monolingual Corpus-based Query Translation for <fixed-case>B</fixed-case>asque-<fixed-case>E</fixed-case>nglish <fixed-case>CLIR</fixed-case> http://www.lrec-conf.org/proceedings/lrec2010/pdf/63_Paper.pdf This paper deals with the main problems that arise in the query translation process in dictionary-based Cross-lingual Information Retrieval (CLIR): translation selection, presence of Out-Of-Vocabulary (OOV) terms and translation of Multi-Word Expressions (MWE). We analyse to what extent each problem affects the retrieval performance for the Basque-English pair of languages, and the improvement obtained when using parallel corpora free methods to address them. To tackle the translation selection problem we provide novel extensions of an already existing monolingual target co-occurrence-based method, the Out-Of Vocabulary terms are dealt with by means of a cognate detection-based method and finally, for the Multi-Word Expression translation problem, a naïve matching technique is applied. The error analysis shows significant differences in the deterioration of the performance depending on the problem, in terms of Mean Average Precision (MAP), the translation selection problem being the cause of most of the errors. Otherwise, the proposed combined strategy shows a good performance to tackle the three above-mentioned main problems. saralegi-lopez-de-lacalle-2010-dictionary - VéronikaLux-Pogodalla + VéronikaLux-Pogodalla DominiqueBesagni KarënFort <fixed-case>F</fixed-case>ast<fixed-case>K</fixed-case>wic, an “Intelligent“ Concordancer Using <fixed-case>FASTR</fixed-case> @@ -310,7 +310,7 @@ CvetanaKrstev - RankaStanković + RankaStanković DuškoVitas A Description of Morphological Features of <fixed-case>S</fixed-case>erbian: a Revision using Feature System Declaration http://www.lrec-conf.org/proceedings/lrec2010/pdf/66_Paper.pdf @@ -318,7 +318,7 @@ krstev-etal-2010-description - Plaban Kr.Bhowmick + Plaban Kr.Bhowmick AnupamBasu PabitraMitra Determining Reliability of Subjective and Multi-label Emotion Annotation through Novel Fuzzy Agreement Measure @@ -328,7 +328,7 @@ XavierTannier - VéroniqueMoriceau + VéroniqueMoriceau <fixed-case>FIDJI</fixed-case>: Web Question-Answering at Quaero 2009 http://www.lrec-conf.org/proceedings/lrec2010/pdf/68_Paper.pdf This paper presents the participation of FIDJI system to the Web Question-Answering evaluation campaign organized by Quaero in 2009. FIDJI is an open-domain question-answering system which combines syntactic information with traditional QA techniques such as named entity recognition and term weighting in order to validate answers through multiple documents. It was originally designed to process ``clean'' document collections. Overall results are significantly lower than in traditional campaigns but results (for French evaluation) are quite good compared to other state-of-the-art systems. They show that a syntax-based strategy, applied on uncleaned Web data, can still obtain good results. Moreover, we obtain much higher scores on ``complex'' questions, i.e. `how' and `why' questions, which are more representative of real user needs. These results show that questioning the Web with advanced linguistic techniques can be done without heavy pre-processing and with results that come near to best systems that use strong resources and large structured indexes. @@ -358,9 +358,9 @@ sornlertlamvanich-etal-2010-language - Jinho D.Choi - ClaireBonial - MarthaPalmer + Jinho D.Choi + ClaireBonial + MarthaPalmer <fixed-case>P</fixed-case>ropbank Frameset Annotation Guidelines Using a Dedicated Editor, Cornerstone http://www.lrec-conf.org/proceedings/lrec2010/pdf/73_Paper.pdf This paper gives guidelines of how to create and update Propbank frameset files using a dedicated editor, Cornerstone. Propbank is a corpus in which the arguments of each verb predicate are annotated with their semantic roles in relation to the predicate. Propbank annotation also requires the choice of a sense ID for each predicate. Thus, for each predicate in Propbank, there exists a corresponding frameset file showing the expected predicate argument structure of each sense related to the predicate. Since most Propbank annotations are based on the predicate argument structure defined in the frameset files, it is important to keep the files consistent, simple to read as well as easy to update. The frameset files are written in XML, which can be difficult to edit when using a simple text editor. Therefore, it is helpful to develop a user-friendly editor such as Cornerstone, specifically customized to create and edit frameset files. Cornerstone runs platform independently, is light enough to run as an X11 application and supports multiple languages such as Arabic, Chinese, English, Hindi and Korean. @@ -410,7 +410,7 @@ OlivierGalibert - SophieRosset + SophieRosset XavierTannier FannyGrandry Hybrid Citation Extraction from Patents @@ -419,8 +419,8 @@ galibert-etal-2010-hybrid - LucaDini - GiampaoloMazzini + LucaDini + GiampaoloMazzini The Impact of Grammar Enhancement on Semantic Resources Induction http://www.lrec-conf.org/proceedings/lrec2010/pdf/82_Paper.pdf In this paper describes the effects of the evolution of an Italian dependency grammar on a task of multilingual FrameNet acquisition. The task is based on the creation of virtual English/Italian parallel annotation corpora, which are then aligned at dependency level by using two manually encoded grammar based dependency parsers. We show how the evolution of the LAS (Labeled Attachment Score) metric for the considered grammar has a direct impact on the quality of the induced FrameNet, thus proving that the evolution of the quality of syntactic resources is mirrored by an analogous evolution in semantic ones. In particular we show that an improvement of 30% in LAS causes an improvement of precision for the induced resource ranging from 5% to 10%, depending on the type of evaluation. @@ -429,7 +429,7 @@ YiouWang KiyotakaUchimoto - Jun’ichiKazama + Jun’ichiKazama CanasaiKruengkrai KentaroTorisawa Adapting <fixed-case>C</fixed-case>hinese Word Segmentation for Machine Translation Based on Short Units @@ -450,11 +450,11 @@ SamiraShaikh - TomekStrzalkowski - AaronBroadwell - JenniferStromer-Galley - SarahTaylor - NickWebb + TomekStrzalkowski + AaronBroadwell + JenniferStromer-Galley + SarahTaylor + NickWebb <fixed-case>MPC</fixed-case>: A Multi-Party Chat Corpus for Modeling Social Phenomena in Discourse http://www.lrec-conf.org/proceedings/lrec2010/pdf/85_Paper.pdf In this paper, we describe our experience with collecting and creating an annotated corpus of multi-party online conversations in a chat-room environment. This effort is part of a larger project to develop computational models of social phenomena such as agenda control, influence, and leadership in on-line interactions. Such models will help capturing the dialogue dynamics that are essential for developing, among others, realistic human-machine dialogue systems, including autonomous virtual chat agents. In this paper we describe data collection method used and the characteristics of the initial dataset of English chat. We have devised a multi-tiered collection process in which the subjects start from simple, free-flowing conversations and progress towards more complex and structured interactions. In this paper, we report on the first two stages of this process, which were recently completed. The third, large-scale collection effort is currently being conducted. All English dialogue has been annotated at four levels: communication links, dialogue acts, local topics and meso-topics. Some details of these annotations will be discussed later in this paper, although a full description is impossible within the scope of this article. @@ -469,7 +469,7 @@ AlbertoSimões - José JoãoAlmeida + José JoãoAlmeida RitaFarinha Processing and Extracting Data from Dicionário Aberto http://www.lrec-conf.org/proceedings/lrec2010/pdf/90_Paper.pdf @@ -484,8 +484,8 @@ waltinger-2010-germanpolarityclues - AntonioPareja-Lora - Guadalupe Aguadode Cea + AntonioPareja-Lora + Guadalupe Aguadode Cea Ontology-based Interoperation of Linguistic Tools for an Improved Lemma Annotation in <fixed-case>S</fixed-case>panish http://www.lrec-conf.org/proceedings/lrec2010/pdf/92_Paper.pdf In this paper, we present an ontology-based methodology and architecture for the comparison, assessment, combination (and, to some extent, also contrastive evaluation) of the results of different linguistic tools. More specifically, we describe an experiment aiming at the improvement of the correctness of lemma tagging for Spanish. This improvement was achieved by means of the standardisation and combination of the results of three different linguistic annotation tools (Bitext’s DataLexica, Connexor’s FDG Parser and LACELL’s POS tagger), using (1) ontologies, (2) a set of lemma tagging correction rules, determined empirically during the experiment, and (3) W3C standard languages, such as XML, RDF(S) and OWL. As we show in the results of the experiment, the interoperation of these tools by means of ontologies and the correction rules applied in the experiment improved significantly the quality of the resulting lemma tagging (when compared to the separate lemma tagging performed by each of the tools that we made interoperate). @@ -508,8 +508,8 @@ campbell-tabata-2010-software - Ana CristinaMendes - LuísaCoheur + Ana CristinaMendes + LuísaCoheur Paula VazLobo Named Entity Recognition in Questions: Towards a Golden Collection http://www.lrec-conf.org/proceedings/lrec2010/pdf/97_Paper.pdf @@ -519,8 +519,8 @@ PatriziaPaggio JensAllwood - ElisabethAhlsén - KristiinaJokinen + ElisabethAhlsén + KristiinaJokinen CostanzaNavarretta The <fixed-case>NOMCO</fixed-case> Multimodal <fixed-case>N</fixed-case>ordic Resource - Goals and Characteristics http://www.lrec-conf.org/proceedings/lrec2010/pdf/98_Paper.pdf @@ -528,14 +528,14 @@ paggio-etal-2010-nomco - KikuoMaekawa + KikuoMaekawa MakotoYamazaki TakehikoMaruyama MasayaYamaguchi HidekiOgura WakakoKashino ToshinobuOgiso - HanaeKoiso + HanaeKoiso YasuharuDen Design, Compilation, and Preliminary Analyses of <fixed-case>B</fixed-case>alanced <fixed-case>C</fixed-case>orpus of <fixed-case>C</fixed-case>ontemporary <fixed-case>W</fixed-case>ritten <fixed-case>J</fixed-case>apanese http://www.lrec-conf.org/proceedings/lrec2010/pdf/99_Paper.pdf @@ -561,7 +561,7 @@ AhmetAker - RobertGaizauskas + RobertGaizauskas Model Summaries for Location-related Images http://www.lrec-conf.org/proceedings/lrec2010/pdf/102_Paper.pdf At present there is no publicly available data set to evaluate the performance of different summarization systems on the task of generating location-related extended image captions. In this paper we describe a corpus of human generated model captions in English and German. We have collected 932 model summaries in English from existing image descriptions and machine translated these summaries into German. We also performed post-editing on the translated German summaries to ensure high quality. Both English and German summaries are evaluated using a readability assessment as in DUC and TAC to assess their quality. Our model summaries performed similar to the ones reported in Dang (2005) and thus are suitable for evaluating automatic summarization systems on the task of generating image descriptions for location related images. In addition, we also investigated whether post-editing of machine-translated model summaries is necessary for automated ROUGE evaluations. We found a high correlation in ROUGE scores between post-edited and non-post-edited model summaries which indicates that the expensive process of post-editing is not necessary. @@ -587,7 +587,7 @@ jacquemin-2010-derivational - SherriCondon + SherriCondon DanParvaz JohnAberdeen ChristyDoran @@ -600,7 +600,7 @@ MahdiMohseni - BehrouzMinaei-bidgoli + BehrouzMinaei-bidgoli A <fixed-case>P</fixed-case>ersian Part-Of-Speech Tagger Based on Morphological Analysis http://www.lrec-conf.org/proceedings/lrec2010/pdf/107_Paper.pdf This paper describes a method based on morphological analysis of words for a Persian Part-Of-Speech (POS) tagging system. This is a main part of a process for expanding a large Persian corpus called Peyekare (or Textual Corpus of Persian Language). Peykare is arranged into two parts: annotated and unannotated parts. We use the annotated part in order to create an automatic morphological analyzer, a main segment of the system. Morphosyntactic features of Persian words cause two problems: the number of tags is increased in the corpus (586 tags) and the form of the words is changed. This high number of tags debilitates any taggers to work efficiently. From other side the change of word forms reduces the frequency of words with the same lemma; and the number of words belonging to a specific tag reduces as well. This problem also has a bad effect on statistical taggers. The morphological analyzer by removing the problems helps the tagger to cover a large number of tags in the corpus. Using a Markov tagger the method is evaluated on the corpus. The experiments show the efficiency of the method in Persian POS tagging. @@ -608,7 +608,7 @@ OlgaBabko-Malaya - DanHunter + DanHunter ConnieFournelle JimWhite Evaluation of Document Citations in Phase 2 Gale Distillation @@ -637,14 +637,14 @@ SilviaPareti - IrinaProdanof + IrinaProdanof Annotating Attribution Relations: Towards an <fixed-case>I</fixed-case>talian Discourse Treebank http://www.lrec-conf.org/proceedings/lrec2010/pdf/111_Paper.pdf In this paper we describe the development of a schema for the annotation of attribution relations and present the first findings and some relevant issues concerning this phenomenon. Following the D-LTAG approach to discourse, we have developed a lexically anchored description of attribution, considering this relation, contrary to the approach in the PDTB, independently from other discourse relations. This approach has allowed us to deal with the phenomenon in a broader perspective than previous studies, reaching therefore a more accurate description of it and making it possible to raise some still unaddressed issues. Following this analysis, we propose an annotation schema and discuss the first results concerning its applicability. The schema has been applied to a pilot portion of the ISST corpus of Italian and represents the initial phase of a project aiming at the creation of an Italian Discourse Treebank. We believe this work will raise some awareness concerning the fundamental importance of attribution relations. The identification of the source has in fact strong implications for the attributed material. Moreover, it will make overt the complexity of a phenomenon for long underestimated. pareti-prodanof-2010-annotating - NickWebb + NickWebb DavidBenyon PrebenHansen OilMival @@ -654,7 +654,7 @@ webb-etal-2010-evaluating - BeátaMegyesi + BeátaMegyesi BengtDahlqvist Éva Á.Csató JoakimNivre @@ -683,7 +683,7 @@ OscarSaz - EduardoLleida + EduardoLleida CarlosVaquero W.-RicardoRodríguez The Alborada-<fixed-case>I</fixed-case>3<fixed-case>A</fixed-case> Corpus of Disordered Speech @@ -693,7 +693,7 @@ SophiaAnaniadou - JohnMcNaught + JohnMcNaught JamesThomas MarkRickinson SandyOliver @@ -769,9 +769,9 @@ nakano-etal-2010-construction - JoãoSilva - AntónioBranco - PatriciaGonçalves + JoãoSilva + AntónioBranco + PatriciaGonçalves Top-Performing Robust Constituency Parsing of <fixed-case>P</fixed-case>ortuguese: Freely Available in as Many Ways as you Can Get it http://www.lrec-conf.org/proceedings/lrec2010/pdf/136_Paper.pdf In this paper we present LX-Parser, a probabilistic, robust constituency parser for Portuguese. This parser achieves ca. 88% f-score in the labeled bracketing task, thus reaching a state-of-the-art performance score that is in line with those that are currently obtained by top-ranking parsers for English, the most studied natural language. To the best of our knowledge, LX-Parser is the first state-of-the-art, robust constituency parser for Portuguese that is made freely available. This parser is being distributed in a variety of ways, each suited for a different type of usage. More specifically, LX-Parser is being made available (i) as a downloadable, stand-alone parsing tool that can be run locally by its users; (ii) as a Web service that exposes an interface that can be invoked remotely and transparently by client applications; and finally (iii) as an on-line parsing service, aimed at human users, that can be accessed through any common Web browser. @@ -781,21 +781,21 @@ SylvianeCardey KrzysztofBogacki XavierBlanco - RuslanMitkov + RuslanMitkov Resources for Controlled Languages for Alert Messages and Protocols in the <fixed-case>E</fixed-case>uropean Perspective http://www.lrec-conf.org/proceedings/lrec2010/pdf/137_Paper.pdf This paper is concerned with resources for controlled languages for alert messages and protocols in the European perspective. These resources have been produced as the outcome of a project (Alert Messages and Protocols: MESSAGE) which has been funded with the support of the European Commission - Directorate-General Justice, Freedom and Security, and with the specific objective of 'promoting and supporting the development of security standards, and an exchange of know-how and experience on protection of people'. The MESSAGE project involved the development and transfer of a methodology for writing safe and safely translatable alert messages and protocols created by Centre Tesnière in collaboration with the aircraft industry, the health profession, and emergency services by means of a consortium of four partners to their four European member states in their languages (ES, FR (Coordinator), GB, PL). The paper describes alert messages and protocols, controlled languages for safety and security, the target groups involved, controlled language evaluation, dissemination, the resources that are available, both “Freely available” and “From Owner”, together with illustrations of the resources, and the potential transferability to other sectors and users. cardey-etal-2010-resources - TomažErjavec + TomažErjavec <fixed-case>MULTEXT</fixed-case>-East Version 4: Multilingual Morphosyntactic Specifications, Lexicons and Corpora http://www.lrec-conf.org/proceedings/lrec2010/pdf/138_Paper.pdf The paper presents the fourth, ``Mondilex'' edition of the MULTEXT-East language resources, a multilingual dataset for language engineering research and development, focused on the morphosyntactic level of linguistic description. This standardised and linked set of resources covers a large number of mainly Central and Eastern European languages and includes the EAGLES-based morphosyntactic specifications; morphosyntactic lexica; and annotated parallel, comparable, and speech corpora. The fourth release of these resources introduces XML-encoded morphosyntactic specifications and adds six new languages, bringing the total to 16: to Bulgarian, Croatian, Czech, Estonian, English, Hungarian, Romanian, Serbian, Slovene, and the Resian dialect of Slovene it adds Macedonian, Persian, Polish, Russian, Slovak, and Ukrainian. This dataset, unique in terms of languages covered and the wealth of encoding, is extensively documented, and freely available for research purposes at http://nl.ijs.si/ME/V4/. erjavec-2010-multext - TomažErjavec + TomažErjavec DarjaFišer SimonKrek NinaLedinek @@ -816,15 +816,15 @@ DarjaFišer SenjaPollak - ŠpelaVintar + ŠpelaVintar Learning to Mine Definitions from <fixed-case>S</fixed-case>lovene Structured and Unstructured Knowledge-Rich Resources http://www.lrec-conf.org/proceedings/lrec2010/pdf/141_Paper.pdf The paper presents an innovative approach to extract Slovene definition candidates from domain-specific corpora using morphosyntactic patterns, automatic terminology recognition and semantic tagging with wordnet senses. First, a classification model was trained on examples from Slovene Wikipedia which was then used to find well-formed definitions among the extracted candidates. The results of the experiment are encouraging, with accuracy ranging from 67% to 71%. The paper also addresses some drawbacks of the approach and suggests ways to overcome them in future work. fiser-etal-2010-learning - DanTufiş - DanŞtefănescu + DanTufiş + DanŞtefănescu A Differential Semantics Approach to the Annotation of Synsets in <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et http://www.lrec-conf.org/proceedings/lrec2010/pdf/142_Paper.pdf We describe a new method for sentiment load annotation of the synsets of a wordnet, along the principles of Osgood’s “Semantic Differential” theory and extending the Kamp and Marx calculus, by taking into account not only the WordNet structure but also the SUMO/MILO (Niles & Pease, 2001) and DOMAINS (Bentivogli et al., 2004) knowledge sources. We discuss the method to annotate all the synsets in PWN2.0, irrespective of their part of speech. As the number of possible factors (semantic oppositions, along which the synsets are ranked) is very large, we developed also an application allowing the text analyst to select the most discriminating factors for the type of text to be analyzed. Once the factors have been selected, the underlying wordnet is marked-up on the fly and it can be used for the intended textual analysis. We anticipate that these annotations can be imported in other language wordnets, provided they are aligned to PWN2.0. The method for the synsets annotation generalizes the usual subjectivity mark-up (positive, negative and objective) according to a user-based multi-criteria differential semantics model. @@ -838,7 +838,7 @@ grishina-2010-multimodal - JörgTiedemann + JörgTiedemann Lingua-Align: An Experimental Toolbox for Automatic Tree-to-Tree Alignment http://www.lrec-conf.org/proceedings/lrec2010/pdf/144_Paper.pdf In this paper we present an experimental toolbox for automatic tree-to-tree alignment based on a binary classification model. The aligner implements a recurrent architecture for structural prediction using history features and a sequential classification procedure. The discriminative base classifier uses a log-linear model in the current setup which enables simple integration of various features extracted from the data. The Lingua-Align toolbox provides a flexible framework for feature extraction including contextual properties and implements several alignment inference procedures. Various settings and constraints can be controlled via a simple frontend or called from external scripts. Lingua-Align supports different treebank formats and includes additional tools for conversion and evaluation. In our experiments we can show that our tree aligner produces results with high quality and outperforms unsupervised techniques proposed otherwise. It also integrates well with another existing tool for manual tree alignment which makes it possible to quickly integrate additional training material and to run semi-automatic alignment strategies. @@ -869,12 +869,12 @@ wawer-2010-sentiment - IñakiAlegria + IñakiAlegria GarbiñeAranbarri KlaraCeberio - GorkaLabaka + GorkaLabaka BittorLaskurain - RubenUrizar + RubenUrizar A Morphological Processor Based on <fixed-case>F</fixed-case>oma for <fixed-case>B</fixed-case>iscayan (a <fixed-case>B</fixed-case>asque dialect) http://www.lrec-conf.org/proceedings/lrec2010/pdf/150_Paper.pdf We present a new morphological processor for Biscayan, a dialect of Basque, developed on the description of the morphology of standard Basque. The database for the standard morphology has been extended for dialects and an open-source tool for morphological description named foma is used for building the processor. Biscayan is a dialect of the Basque language spoken mainly in Biscay, a province on the western of the Basque Country. The description of the lexicon and the morphotactics (or word grammar) for the standard Basque was carried out using a relational database and the database has been extended in order to include dialectal variants linked to the standard entries. XuxenB, a spelling checker/corrector for this dialect, is the first application of this work. Additionally to the basic analyzer used for spelling, a new transducer is included. It is an enhanced analyzer for linking standard form with the corresponding standard ones. It is used in correction for generation of proposals when in the input text appear standard forms which we want to replace with dialectal forms. @@ -891,14 +891,14 @@ przepiorkowski-etal-2010-recent - AntónioBranco + AntónioBranco FranciscoCosta - JoãoSilva + JoãoSilva SaraSilveira SérgioCastro MarianaAvelãs ClaraPinto - JoãoGraça + JoãoGraça Developing a Deep Linguistic Databank Supporting a Collection of Treebanks: the <fixed-case>CINTIL</fixed-case> <fixed-case>D</fixed-case>eep<fixed-case>G</fixed-case>ram<fixed-case>B</fixed-case>ank http://www.lrec-conf.org/proceedings/lrec2010/pdf/154_Paper.pdf Corpora of sentences annotated with grammatical information have been deployed by extending the basic lexical and morphological data with increasingly complex information, such as phrase constituency, syntactic functions, semantic roles, etc. As these corpora grow in size and the linguistic information to be encoded reaches higher levels of sophistication, the utilization of annotation tools and, above all, supporting computational grammars appear no longer as a matter of convenience but of necessity. In this paper, we report on the design features, the development conditions and the methodological options of a deep linguistic databank, the CINTIL DeepGramBank. In this corpus, sentences are annotated with fully fledged linguistically informed grammatical representations that are produced by a deep linguistic processing grammar, thus consistently integrating morphological, syntactic and semantic information. We also report on how such corpus permits to straightforwardly obtain a whole range of past generation annotated corpora (POS, NER and morphology), current generation treebanks (constituency treebanks, dependency banks, propbanks) and next generation databanks (logical form banks) simply by means of a very residual selection/extraction effort to get the appropriate ""views"" exposing the relevant layers of information. @@ -914,8 +914,8 @@ borin-etal-2010-diabase - AnneAbeillé - DanièleGodard + AnneAbeillé + DanièleGodard The Grande Grammaire du Français Project http://www.lrec-conf.org/proceedings/lrec2010/pdf/157_Paper.pdf We present a new reference Grammar of French (La Grande Grammaire du français), which is a collective project (gathering around fifty contributors), producing a book (about 2200 pages, to be published en 2011) and associated databases. Like the recent reference grammars of the other Romance Languages, it takes into account the important results of the linguistic research of the past thrity years, while aiming at a non specialist audience and avoiding formalization. We differ from existing French grammar by being focused on contemporary French from a purely descriptive point of view, and by taking spoken data into account. We include a description of all the syntactic phenomena, as well as lexical, semantic, pragmatic and prosodic insights, specially as they interact with syntax. The analysis concerns the data from contemporary written French, but also includes data from spoken corpora and regional or non standard French (when accessible). Throughout the grammar, a simple phrase structure grammar is used, in order to maintain a common representation. The analyses are modular with a strict division of labor between morphology, syntax and semantics. From the syntactic point of view, POS are also distinguished from grammatical relations (or functions). The databases include a terminological glossary, different lexical databases for certain POS, certain valence frames and certain semantic classes, and a bibliographical database. @@ -941,8 +941,8 @@ MartaRecasens - EduardHovy - M. AntòniaMartí + EduardHovy + M. AntòniaMartí A Typology of Near-Identity Relations for Coreference (<fixed-case>NIDENT</fixed-case>) http://www.lrec-conf.org/proceedings/lrec2010/pdf/160_Paper.pdf The task of coreference resolution requires people or systems to decide when two referring expressions refer to the 'same' entity or event. In real text, this is often a difficult decision because identity is never adequately defined, leading to contradictory treatment of cases in previous work. This paper introduces the concept of 'near-identity', a middle ground category between identity and non-identity, to handle such cases systematically. We present a typology of Near-Identity Relations (NIDENT) that includes fifteen types―grouped under four main families―that capture a wide range of ways in which (near-)coreference relations hold between discourse entities. We validate the theoretical model by annotating a small sample of real data and showing that inter-annotator agreement is high enough for stability (K=0.58, and up to K=0.65 and K=0.84 when leaving out one and two outliers, respectively). This work enables subsequent creation of the first internally consistent language resource of this type through larger annotation efforts. @@ -950,7 +950,7 @@ InekeSchuurman - VéroniqueHoste + VéroniqueHoste PaolaMonachesi Interacting Semantic Layers of Annotation in <fixed-case>S</fixed-case>o<fixed-case>N</fixed-case>a<fixed-case>R</fixed-case>, a Reference Corpus of Contemporary Written <fixed-case>D</fixed-case>utch http://www.lrec-conf.org/proceedings/lrec2010/pdf/162_Paper.pdf @@ -958,12 +958,12 @@ schuurman-etal-2010-interacting - DaanBroeder + DaanBroeder MarcKemps-Snijders - DieterVan Uytvanck + DieterVan Uytvanck MenzoWindhouwer PeterWithers - PeterWittenburg + PeterWittenburg ClausZinn A Data Category Registry- and Component-based Metadata Framework http://www.lrec-conf.org/proceedings/lrec2010/pdf/163_Paper.pdf @@ -995,7 +995,7 @@ NaushadUzZaman - JamesAllen + JamesAllen <fixed-case>TRIOS</fixed-case>-<fixed-case>T</fixed-case>ime<fixed-case>B</fixed-case>ank Corpus: Extended <fixed-case>T</fixed-case>ime<fixed-case>B</fixed-case>ank Corpus with Help of Deep Understanding of Text http://www.lrec-conf.org/proceedings/lrec2010/pdf/169_Paper.pdf TimeBank (Pustejovsky et al, 2003a), a reference for TimeML (Pustejovsky et al, 2003b) compliant annotation, is widely used temporally annotated corpus in the community. It captures time expressions, events, and relations between events and event and temporal expression; but there is room for improvements in this hand-annotated widely used TimeBank corpus. This work is one such effort to extend the TimeBank corpus. Our first goal is to suggest missing TimeBank events and temporal expressions, i.e. events and temporal expressions that were missed by TimeBank annotators. Along with that this paper also suggests some additions to TimeML language by adding new event features (ontology type), some more SLINKs and also relations between events with their arguments, which we call RLINK (relation link). With our new suggestions we present the TRIOS-TimeBank corpus, an extended TimeBank corpus. We conclude by suggesting our future work to clean the TimeBank corpus even more and automatically generating larger temporally annotated corpus for the community. @@ -1003,7 +1003,7 @@ AdamFunk - KalinaBontcheva + KalinaBontcheva Ontology-Based Categorization of Web Services with Machine Learning http://www.lrec-conf.org/proceedings/lrec2010/pdf/170_Paper.pdf We present the problem of categorizing web services according to a shallow ontology for presentation on a specialist portal, using their WSDL and associated textual documents found by a crawler. We treat this as a text classification problem and apply first information extraction (IE) techniques (voting using keywords weight according to their context), then machine learning (ML), and finally a combined approach in which ML has priority over weighted keywords, but the latter can still make up categorizations for services for which ML does not produce enough. We evaluate the techniques (using data manually annotated through the portal, which we also use as the training data for ML) according to standard IE measures for flat categorization as well as the Balanced Distance Metric (more suitable for ontological classification) and compare them with related work in web service categorization. The ML and combined categorization results are good and the system is designed to take users' contributions through the portal's Web 2.0 features as additional training data. @@ -1027,7 +1027,7 @@ nir-etal-2010-morphologically - KristiinaJokinen + KristiinaJokinen Non-verbal Signals for Turn-taking and Feedback http://www.lrec-conf.org/proceedings/lrec2010/pdf/173_Paper.pdf This paper concerns non-verbal communication, and describes especially the use of eye-gaze to signal turn-taking and feedback in conversational settings. Eye-gaze supports smooth interaction by providing signals that the interlocutors interpret with respect to such conversational functions as taking turns and giving feedback. New possibilities to study the effect of eye-gaze on the interlocutors’ communicative behaviour have appeared with the eye-tracking technology which in the past years has matured to the level where its use to study naturally occurring dialogues have become easier and more reliable to conduct. It enables the tracking of eye-fixations and gaze-paths, and thus allows analysis of the person’s turn-taking and feedback behaviour through the analysis of their focus of attention. In this paper, experiments on the interlocutors’ non-verbal communication in conversational settings using the eye-tracker are reported, and results of classifying turn-taking using eye-gaze and gesture information are presented. Also the hybrid method that combines signal level analysis with human interpretation is discussed. @@ -1035,10 +1035,10 @@ AlejandroAbejón - Doroteo T.Toledano + Doroteo T.Toledano DaniloSpada GonzálezVictor - Daniel HernándezLópez + Daniel HernándezLópez A Study of the Influence of Speech Type on Automatic Language Recognition Performance http://www.lrec-conf.org/proceedings/lrec2010/pdf/174_Paper.pdf Automatic language recognition on spontaneous speech has experienced a rapid development in the last few years. This development has been in part due to the competitive technological Language Recognition Evaluations (LRE) organized by the National Institute of Standards and Technology (NIST). Until now, the need to have clearly defined and consistent evaluations has kept some real-life application issues out of these evaluations. In particular, all past NIST LREs have used exclusively conversational telephone speech (CTS) for development and test. Fortunately this has changed in the current NIST LRE since it includes also broadcast speech. However, for testing only the telephone speech found in broadcast data will be used. In real-life applications, there could be several more types of speech and systems could be forced to use a mix of different types of data for training and development and recognition. In this article, we have defined a test-bed including several types of speech data and have analyzed how a typical language recognition system works using different types of speech, and also a combination of different types of speech, for training and testing. @@ -1053,7 +1053,7 @@ FrançoisLefebvre-Albaret - PatriceDalle + PatriceDalle Video Retrieval in Sign Language Videos : How to Model and Compare Signs? http://www.lrec-conf.org/proceedings/lrec2010/pdf/176_Paper.pdf This paper deals with the problem of finding sign occurrences in a sign language (SL) video. It begins with an analysis of sign models and the way they can take into account the sign variability. Then, we review the most popular technics dedicated to automatic sign language processing and we focus on their adaptation to model sign variability. We present a new method to provide a parametric description of the sign as a set of continuous and discrete parameters. Signs are classified according to there categories (ballistic movements, circles ...), the symmetry between the hand movements, hand absolute and relative locations. Membership grades to sign categories and continuous parameter comparisons can be combined to estimate the similarity between two signs. We set out our system and we evaluate how much time can be saved when looking for a sign in a french sign language video. By now, our formalism only uses hand 2D locations, we finally discuss about the way of integrating other parameters as hand shape or facial expression in our framework. @@ -1116,7 +1116,7 @@ SaraTonelli GiuseppeRiccardi RashmiPrasad - AravindJoshi + AravindJoshi Annotation of Discourse Relations for Conversational Spoken Dialogs http://www.lrec-conf.org/proceedings/lrec2010/pdf/184_Paper.pdf In this paper, we make a qualitative and quantitative analysis of discourse relations within the LUNA conversational spoken dialog corpus. In particular, we first describe the Penn Discourse Treebank (PDTB) and then we detail the adaptation of its annotation scheme to the LUNA corpus of Italian task-oriented dialogs in the domain of software/hardware assistance. We discuss similarities and differences between our approach and the PDTB paradigm and point out the peculiarities of spontaneous dialogs w.r.t. written text, which motivated some changes in the annotation strategy. In particular, we introduced the annotation of relations between non-contiguous arguments and we modified the sense hierarchy in order to take into account the important role of pragmatics in dialogs. In the final part of the paper, we present a comparison between the sense and connective frequency in a representative subset of the LUNA corpus and in the PDTB. Such analysis confirmed the differences between the two corpora and corroborates our choice to introduce dialog-specific adaptations. @@ -1133,11 +1133,11 @@ LudovicQuintard OlivierGalibert - GillesAdda + GillesAdda BrigitteGrau DominiqueLaurent - VéroniqueMoriceau - SophieRosset + VéroniqueMoriceau + SophieRosset XavierTannier AnneVilnat Question Answering on Web Data: The <fixed-case>QA</fixed-case> Evaluation in Quæro @@ -1156,11 +1156,11 @@ OlivierGalibert LudovicQuintard - SophieRosset - PierreZweigenbaum - ClaireNédellec + SophieRosset + PierreZweigenbaum + ClaireNédellec SophieAubin - LaurentGillard + LaurentGillard Jean-PierreRaysz DelphinePois XavierTannier @@ -1192,7 +1192,7 @@ Muhammad KamranMalik - TafseerAhmed + TafseerAhmed SebastianSulger TinaBögel AtifGulzar @@ -1206,7 +1206,7 @@ VolhaPetukhova - HarryBunt + HarryBunt Towards an Integrated Scheme for Semantic Annotation of Multimodal Dialogue Data http://www.lrec-conf.org/proceedings/lrec2010/pdf/195_Paper.pdf Recent years witness a growing interest in the use of multimodal data for modelling of communicative behaviour in dialogue. Dybkjaer and Bernsen (2002), point out that coding schemes for multimodal data are used solely by their creators. Standardisation has been achieved to some extent for coding behavioural features for certain nonverbal expressions, e.g. for facial expression, however, for the semantic annotation of such expressions combined with other modalities such as speech there is still a long way to go. The majority of existing dialogue act annotation schemes that are designed to code semantic and pragmatic dialogue information are limited to analysis of spoken modality. This paper investigates the applicability of existing dialogue act annotation schemes to the semantic annotation of multimodal data, and the way a dialogue act annotation scheme can be extended to cover dialogue phenomena from multiple modalities. The general conclusion of our explorative study is that a multidimensional dialogue act taxonomy is usable for this purpose when some adjustments are made. We proposed a solution for adding these aspects to a dialogue act annotation scheme without changing its set of communicative functions, in the form of qualifiers that can be attached to communicative function tags. @@ -1214,15 +1214,15 @@ CristinaBosco - SimonettaMontemagni - AlessandroMazzei + SimonettaMontemagni + AlessandroMazzei VincenzoLombardo FeliceDell’Orletta AlessandroLenci - LeonardoLesmo + LeonardoLesmo GiuseppeAttardi MariaSimi - AlbertoLavelli + AlbertoLavelli JohanHall JensNilsson JoakimNivre @@ -1241,14 +1241,14 @@ HaiZhao YanSong - ChunyuKit + ChunyuKit How Large a Corpus Do We Need: Statistical Method Versus Rule-based Method http://www.lrec-conf.org/proceedings/lrec2010/pdf/199_Paper.pdf We investigate the impact of input data scale in corpus-based learning using a study style of Zipf’s law. In our research, Chinese word segmentation is chosen as the study case and a series of experiments are specially conducted for it, in which two types of segmentation techniques, statistical learning and rule-based methods, are examined. The empirical results show that a linear performance improvement in statistical learning requires an exponential increasing of training corpus size at least. As for the rule-based method, an approximate negative inverse relationship between the performance and the size of the input lexicon can be observed. zhao-etal-2010-large - Bolette S.Pedersen + Bolette S.Pedersen SanniNimb AnnaBraasch Merging Specialist Taxonomies and Folk Taxonomies in Wordnets - A case Study of Plants, Animals and Foods in the <fixed-case>D</fixed-case>anish <fixed-case>W</fixed-case>ordnet @@ -1258,14 +1258,14 @@ MartaTatu - DanMoldovan + DanMoldovan Inducing Ontologies from Folksonomies using Natural Language Understanding http://www.lrec-conf.org/proceedings/lrec2010/pdf/203_Paper.pdf Folksonomies are unsystematic, unsophisticated collections of keywords associated by social bookmarking users to web content and, despite their inconsistency problems (typographical errors, spelling variations, use of space or punctuation as delimiters, same tag applied in different context, synonymy of concepts, etc.), their popularity is increasing among Web 2.0 application developers. In this paper, in addition to eliminating folksonomic irregularities existing at the lexical, syntactic or semantic understanding levels, we propose an algorithm that automatically builds a semantic representation of the folksonomy by exploiting the tags, their social bookmarking associations (co-occuring tags) and, more importantly, the content of labeled documents. We derive the semantics of each tag, discover semantic links between the folksonomic tags and expose the underlying semantic structure of the folksonomy, thus, enabling a number of information discovery and ontology-based reasoning applications. tatu-moldovan-2010-inducing - OrphéeDe Clercq + OrphéeDe Clercq Maribel MonteroPerez Data Collection and <fixed-case>IPR</fixed-case> in Multilingual Parallel Corpora. <fixed-case>D</fixed-case>utch Parallel Corpus http://www.lrec-conf.org/proceedings/lrec2010/pdf/204_Paper.pdf @@ -1273,7 +1273,7 @@ de-clercq-perez-2010-data - AgataCybulska + AgataCybulska PiekVossen Event Models for Historical Perspectives: Determining Relations between High and Low Level Events in Text, Based on the Classification of Time, Location and Participants. http://www.lrec-conf.org/proceedings/lrec2010/pdf/205_Paper.pdf @@ -1297,7 +1297,7 @@ MatthieuVernier LauraMonceaux - BéatriceDaille + BéatriceDaille Learning Subjectivity Phrases missing from Resources through a Large Set of Semantic Tests http://www.lrec-conf.org/proceedings/lrec2010/pdf/208_Paper.pdf In recent years, blogs and social networks have particularly boosted interests for opinion mining research. In order to satisfy real-scale applicative needs, a main task is to create or to enhance lexical and semantic resources on evaluative language. Classical resources of the area are mostly built for english, they contain simple opinion word markers and are far to cover the lexical richness of this linguistic phenomenon. In particular, infrequent subjective words, idiomatic expressions, and cultural stereotypes are missing from resources. We propose a new method, applied on french, to enhance automatically an opinion word lexicon. This learning method relies on linguistic uses of internet users and on semantic tests to infer the degree of subjectivity of many new adjectives, nouns, verbs, noun phrases, verbal phrases which are usually forgotten by other resources. The final appraisal lexicon contains 3,456 entries. We evaluate the lexicon enhancement with and without textual context. @@ -1314,7 +1314,7 @@ BartDesmet - VéroniqueHoste + VéroniqueHoste Towards a Balanced Named Entity Corpus for <fixed-case>D</fixed-case>utch http://www.lrec-conf.org/proceedings/lrec2010/pdf/210_Paper.pdf This paper introduces a new named entity corpus for Dutch. State-of-the-art named entity recognition systems require a substantial annotated corpus to be trained on. Such corpora exist for English, but not for Dutch. The STEVIN-funded SoNaR project aims to produce a diverse 500-million-word reference corpus of written Dutch, with four semantic annotation layers: named entities, coreference relations, semantic roles and spatiotemporal expressions. A 1-million-word subset will be manually corrected. Named entity annotation guidelines for Dutch were developed, adapted from the MUC and ACE guidelines. Adaptations include the annotation of products and events, the classification into subtypes, and the markup of metonymic usage. Inter-annotator agreement experiments were conducted to corroborate the reliability of the guidelines, which yielded satisfactory results (Kappa scores above 0.90). We are building a NER system, trained on the 1-million-word subcorpus, to automatically classify the remainder of the SoNaR corpus. To this end, experiments with various classification algorithms (MBL, SVM, CRF) and features have been carried out and evaluated. @@ -1322,7 +1322,7 @@ GrégorySenay - GeorgesLinarès + GeorgesLinarès BenjaminLecouteux StanislasOger ThierryMichel @@ -1342,8 +1342,8 @@ ZiqiZhang - JoséIria - FabioCiravegna + JoséIria + FabioCiravegna Improving Domain-specific Entity Recognition with Automatic Term Recognition and Feature Extraction http://www.lrec-conf.org/proceedings/lrec2010/pdf/214_Paper.pdf Domain specific entity recognition often relies on domain-specific knowledge to improve system performance. However, such knowledge often suffers from limited domain portability and is expensive to build and maintain. Therefore, obtaining it in a generic and unsupervised manner would be a desirable feature for domain-specific entity recognition systems. In this paper, we introduce an approach that exploits domain-specificity of words as a form of domain-knowledge for entity-recognition tasks. Compared to prior work in the field, our approach is generic and completely unsupervised. We empirically show an improvement in entity extraction accuracy when features derived by our unsupervised method are used, with respect to baseline methods that do not employ domain knowledge. We also compared the results against those of existing systems that use manually crafted domain knowledge, and found them to be competitive. @@ -1359,10 +1359,10 @@ GiuseppeAttardi - Stefano DeiRossi + Stefano DeiRossi GiuliaDi Pietro AlessandroLenci - SimonettaMontemagni + SimonettaMontemagni MariaSimi A Resource and Tool for Super-sense Tagging of <fixed-case>I</fixed-case>talian Texts http://www.lrec-conf.org/proceedings/lrec2010/pdf/216_Paper.pdf @@ -1370,9 +1370,9 @@ attardi-etal-2010-resource - IzaskunAldezabal + IzaskunAldezabal María JesúsAranzabe - ArantzaDíaz de Ilarraza + ArantzaDíaz de Ilarraza AinaraEstarrona Building the <fixed-case>B</fixed-case>asque <fixed-case>P</fixed-case>rop<fixed-case>B</fixed-case>ank http://www.lrec-conf.org/proceedings/lrec2010/pdf/217_Paper.pdf @@ -1392,8 +1392,8 @@ NikosTsourakis AgnesLisowska - MannyRayner - PierretteBouillon + MannyRayner + PierretteBouillon Examining the Effects of Rephrasing User Input on Two Mobile Spoken Language Systems http://www.lrec-conf.org/proceedings/lrec2010/pdf/220_Paper.pdf During the construction of a spoken dialogue system much effort is spent on improving the quality of speech recognition as possible. However, even if an application perfectly recognizes the input, its understanding may be far from what the user originally meant. The user should be informed about what the system actually understood so that an error will not have a negative impact in the later stages of the dialogue. One important aspect that this work tries to address is the effect of presenting the system’s understanding during interaction with users. We argue that for specific kinds of applications it’s important to confirm the understanding of the system before obtaining the output. In this way the user can avoid misconceptions and problems occurring in the dialogue flow and he can enhance his confidence in the system. Nevertheless this has an impact on the interaction, as the mental workload increases, and the user’s behavior may adapt to the system’s coverage. We focus on two applications that implement the notion of rephrasing user’s input in a different way. Our study took place among 14 subjects that used both systems on a Nokia N810 Internet Tablet. @@ -1401,10 +1401,10 @@ SamuelReese - GemmaBoleda + GemmaBoleda MontseCuadros - LluísPadró - GermanRigau + LluísPadró + GermanRigau <fixed-case>W</fixed-case>ikicorpus: A Word-Sense Disambiguated Multilingual <fixed-case>W</fixed-case>ikipedia Corpus http://www.lrec-conf.org/proceedings/lrec2010/pdf/222_Paper.pdf This article presents a new freely available trilingual corpus (Catalan, Spanish, English) that contains large portions of the Wikipedia and has been automatically enriched with linguistic information. To our knowledge, this is the largest such corpus that is freely available to the community: In its present version, it contains over 750 million words. The corpora have been annotated with lemma and part of speech information using the open source library FreeLing. Also, they have been sense annotated with the state of the art Word Sense Disambiguation algorithm UKB. As UKB assigns WordNet senses, and WordNet has been aligned across languages via the InterLingual Index, this sort of annotation opens the way to massive explorations in lexical semantics that were not possible before. We present a first attempt at creating a trilingual lexical resource from the sense-tagged Wikipedia corpora, namely, WikiNet. Moreover, we present two by-products of the project that are of use for the NLP community: An open source Java-based parser for Wikipedia pages developed for the construction of the corpus, and the integration of the WSD algorithm UKB in FreeLing. @@ -1420,10 +1420,10 @@ MaximKhalilov - José A. R.Fonollosa - IngunaSkadin̨a + José A. R.Fonollosa + IngunaSkadin̨a EdgarsBrālītis - LaumaPretkalnin̨a + LaumaPretkalnin̨a Towards Improving <fixed-case>E</fixed-case>nglish-<fixed-case>L</fixed-case>atvian Translation: A System Comparison and a New Rescoring Feature http://www.lrec-conf.org/proceedings/lrec2010/pdf/228_Paper.pdf Translation into the languages with relatively free word order has received a lot less attention than translation into fixed word order languages (English), or into analytical languages (Chinese). At the same time this translation task is found among the most difficult challenges for machine translation (MT), and intuitively it seems that there is some space in improvement intending to reflect the free word order structure of the target language. This paper presents a comparative study of two alternative approaches to statistical machine translation (SMT) and their application to a task of English-to-Latvian translation. Furthermore, a novel feature intending to reflect the relatively free word order scheme of the Latvian language is proposed and successfully applied on the n-best list rescoring step. Moving beyond classical automatic scores of translation quality that are classically presented in MT research papers, we contribute presenting a manual error analysis of MT systems output that helps to shed light on advantages and disadvantages of the SMT systems under consideration. @@ -1439,9 +1439,9 @@ sidorov-etal-2010-english - FernandoFernández-Martínez - Juan ManuelLucas-Cuesta - Roberto BarraChicote + FernandoFernández-Martínez + Juan ManuelLucas-Cuesta + Roberto BarraChicote JavierFerreiros JavierMacías-Guarasa <fixed-case>HIFI</fixed-case>-<fixed-case>AV</fixed-case>: An Audio-visual Corpus for Spoken Language Human-Machine Dialogue Research in <fixed-case>S</fixed-case>panish @@ -1458,7 +1458,7 @@ DekangLin - KennethChurch + KennethChurch HengJi SatoshiSekine DavidYarowsky @@ -1476,9 +1476,9 @@ EricAuer - AlbertRussel + AlbertRussel HanSloetjes - PeterWittenburg + PeterWittenburg OliverSchreer S.Masnieri DanielSchneider @@ -1490,9 +1490,9 @@ DamjanVlaj - Aleksandra ZöglingMarkuš + Aleksandra ZöglingMarkuš MarkoKos - ZdravkoKačič + ZdravkoKačič Acquisition and Annotation of <fixed-case>S</fixed-case>lovenian <fixed-case>L</fixed-case>ombard Speech Database http://www.lrec-conf.org/proceedings/lrec2010/pdf/235_Paper.pdf This paper presents the acquisition and annotation of Slovenian Lombard Speech Database, the recording of which started in the year 2008. The database was recorded at the University of Maribor, Slovenia. The goal of this paper is to describe the hardware platform used for the acquisition of speech material, recording scenarios and tools used for the annotation of Slovenian Lombard Speech Database. The database consists of recordings of 10 Slovenian native speakers. Five males and five females were recorded. Each speaker pronounced a set of eight corpuses in two recording sessions with at least one week pause between recordings. The structure of the corpus is similar to SpeechDat II database. Approximately 30 minutes of speech material per speaker and per session was recorded. The manual annotation of speech material is performed with the LombardSpeechLabel tool developed at the University of Maribor. The speech and annotation material was saved on 10 DVDs (one speaker on one DVD). @@ -1516,7 +1516,7 @@ Lun-WeiKu - Ting-HaoHuang + Ting-HaoHuang Hsin-HsiChen Construction of a <fixed-case>C</fixed-case>hinese Opinion Treebank http://www.lrec-conf.org/proceedings/lrec2010/pdf/242_Paper.pdf @@ -1526,7 +1526,7 @@ TakeshiAbekawa MasaoUtiyama - EiichiroSumita + EiichiroSumita KyoKageura Community-based Construction of Draft and Final Translation Corpus Through a Translation Hosting Site Minna no Hon’yaku (<fixed-case>MNH</fixed-case>) http://www.lrec-conf.org/proceedings/lrec2010/pdf/243_Paper.pdf @@ -1535,8 +1535,8 @@ VamshiAmbati - StephanVogel - JaimeCarbonell + StephanVogel + JaimeCarbonell Active Learning and Crowd-Sourcing for Machine Translation http://www.lrec-conf.org/proceedings/lrec2010/pdf/244_Paper.pdf Large scale parallel data generation for new language pairs requires intensive human effort and availability of experts. It becomes immensely difficult and costly to provide Statistical Machine Translation (SMT) systems for most languages due to the paucity of expert translators to provide parallel data. Even if experts are present, it appears infeasible due to the impending costs. In this paper we propose Active Crowd Translation (ACT), a new paradigm where active learning and crowd-sourcing come together to enable automatic translation for low-resource language pairs. Active learning aims at reducing cost of label acquisition by prioritizing the most informative data for annotation, while crowd-sourcing reduces cost by using the power of the crowds to make do for the lack of expensive language experts. We experiment and compare our active learning strategies with strong baselines and see significant improvements in translation quality. Similarly, our experiments with crowd-sourcing on Mechanical Turk have shown that it is possible to create parallel corpora using non-experts and with sufficient quality assurance, a translation system that is trained using this corpus approaches expert quality. @@ -1551,7 +1551,7 @@ dalianis-velupillai-2010-certain - WinstonAnderson + WinstonAnderson LaurettePretorius AlbertKotzé Base Concepts in the <fixed-case>A</fixed-case>frican Languages Compared to Upper Ontologies and the <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et Top Ontology @@ -1563,7 +1563,7 @@ KeyanZhou AijunLi ZhigangYin - ChengqingZong + ChengqingZong <fixed-case>CASIA</fixed-case>-<fixed-case>CASSIL</fixed-case>: a <fixed-case>C</fixed-case>hinese Telephone Conversation Corpus in Real Scenarios with Multi-leveled Annotation http://www.lrec-conf.org/proceedings/lrec2010/pdf/248_Paper.pdf CASIA-CASSIL is a large-scale corpus base of Chinese human-human naturally-occurring telephone conversations in restricted domains. The first edition consists of 792 90-second conversations belonging to tourism domain, which are selected from 7,639 spontaneous telephone recordings in real scenarios. The corpus is now being annotated with wide range of linguistic and paralinguistic information in multi-levels. The annotations include Turns, Speaker Gender, Orthographic Transcription, Chinese Syllable, Chinese Phonetic Transcription, Prosodic Boundary, Stress of Sentence, Non-Speech Sounds, Voice Quality, Topic, Dialog-act and Adjacency Pairs, Ill-formedness, and Expressive Emotion as well, 13 levels in total. The abundant annotation will be effective especially for studying Chinese spoken language phenomena. This paper describes the whole process to build the conversation corpus, including collecting and selecting the original data, and the follow-up process such as transcribing, annotating, and so on. CASIA-CASSIL is being extended to a large scale corpus base of annotated Chinese dialogs for spoken Chinese study. @@ -1580,7 +1580,7 @@ RuudKoolen - EmielKrahmer + EmielKrahmer The <fixed-case>D</fixed-case>-<fixed-case>TUNA</fixed-case> Corpus: A <fixed-case>D</fixed-case>utch Dataset for the Evaluation of Referring Expression Generation Algorithms http://www.lrec-conf.org/proceedings/lrec2010/pdf/251_Paper.pdf We present the D-TUNA corpus, which is the first semantically annotated corpus of referring expressions in Dutch. Its primary function is to evaluate and improve the performance of REG algorithms. Such algorithms are computational models that automatically generate referring expressions by computing how a specific target can be identified to an addressee by distinguishing it from a set of distractor objects. We performed a large-scale production experiment, in which participants were asked to describe furniture items and people, and provided all descriptions with semantic information regarding the target and the distractor objects. Besides being useful for evaluating REG algorithms, the corpus addresses several other research goals. Firstly, the corpus contains both written and spoken referring expressions uttered in the direction of an addressee, which enables systematic analyses of how modality (text or speech) influences the human production of referring expressions. Secondly, due to its comparability with the English TUNA corpus, our Dutch corpus can be used to explore the differences between Dutch and English speakers regarding the production of referring expressions. @@ -1588,9 +1588,9 @@ AinaPeris - MarionaTaulé - GemmaBoleda - HoracioRodríguez + MarionaTaulé + GemmaBoleda + HoracioRodríguez <fixed-case>ADN</fixed-case>-Classifier:Automatically Assigning Denotation Types to Nominalizations http://www.lrec-conf.org/proceedings/lrec2010/pdf/252_Paper.pdf This paper presents the ADN-Classifier, an Automatic classification system of Spanish Deverbal Nominalizations aimed at identifying its semantic denotation (i.e. event, result, underspecified, or lexicalized). The classifier can be used for NLP tasks such as coreference resolution or paraphrase detection. To our knowledge, the ADN-Classifier is the first effort in acquisition of denotations for nominalizations using Machine Learning. We compare the results of the classifier when using a decreasing number of Knowledge Sources, namely (1) the complete nominal lexicon (AnCora-Nom) that includes sense distictions, (2) the nominal lexicon (AnCora-Nom) removing the sense-specific information, (3) nominalizations’ context information obtained from a treebank corpus (AnCora-Es) and (4) the combination of the previous linguistic resources. In a realistic scenario, that is, without sense distinction, the best results achieved are those taking into account the information declared in the lexicon (89.40% accuracy). This shows that the lexicon contains crucial information (such as argument structure) that corpus-derived features cannot substitute for. @@ -1615,8 +1615,8 @@ Natalie D.Snoeren - MartineAdda-Decker - GillesAdda + MartineAdda-Decker + GillesAdda The Study of Writing Variants in an Under-resourced Language: Some Evidence from Mobile N-Deletion in <fixed-case>L</fixed-case>uxembourgish http://www.lrec-conf.org/proceedings/lrec2010/pdf/258_Paper.pdf The national language of the Grand-Duchy of Luxembourg, Luxembourgish, has often been characterized as one of Europe's under-described and under-resourced languages. Because of a limited written production of Luxembourgish, poorly observed writing standardization (as compared to other languages such as English and French) and a large diversity of spoken varieties, the study of Luxembourgish poses many interesting challenges to automatic speech processing studies as well as to linguistic enquiries. In the present paper, we make use of large corpora to focus on typical writing and derived pronunciation variants in Luxembourgish, elicited by mobile -n deletion (hereafter shortened to MND). Using transcriptions from the House of Parliament debates and 10k words from news reports, we examine the reality of MND variants in written transcripts of speech. The goal of this study is manyfold: quantify the potential of variation due to MND in written Luxembourgish, check the mandatory status of the MND rule and discuss the arising problems for automatic spoken Luxembourgish processing. @@ -1634,7 +1634,7 @@ YukiKamiya TomohiroOhno ShigekiMatsubara - HidekiKashioka + HidekiKashioka Construction of Back-Channel Utterance Corpus for Responsive Spoken Dialogue System Development http://www.lrec-conf.org/proceedings/lrec2010/pdf/260_Paper.pdf In spoken dialogues, if a spoken dialogue system does not respond at all during user’s utterances, the user might feel uneasy because the user does not know whether or not the system has recognized the utterances. In particular, back-channel utterances, which the system outputs as voices such as “yeah” and “uh huh” in English have important roles for a driver in in-car speech dialogues because the driver does not look owards a listener while driving. This paper describes construction of a back-channel utterance corpus and its analysis to develop the system which can output back-channel utterances at the proper timing in the responsive in-car speech dialogue. First, we constructed the back-channel utterance corpus by integrating the back-channel utterances that four subjects provided for the driver’s utterances in 60 dialogues in the CIAIR in-car speech dialogue corpus. Next, we analyzed the corpus and revealed the relation between back-channel utterance timings and information on bunsetsu, clause, pause and rate of speech. Based on the analysis, we examined the possibility of detecting back-channel utterance timings by machine learning technique. As the result of the experiment, we confirmed that our technique achieved as same detection capability as a human. @@ -1643,9 +1643,9 @@ Marina B.Ruiter Toni C. M.Rietveld - CatiaCucchiarini - Emiel J.Krahmer - HelmerStrik + CatiaCucchiarini + Emiel J.Krahmer + HelmerStrik Human Language Technology and Communicative Disabilities: Requirements and Possibilities for the Future http://www.lrec-conf.org/proceedings/lrec2010/pdf/261_Paper.pdf For some years now, the Nederlandse Taalunie (Dutch Language Union) has been active in promoting the development of human language technology (HLT) applications for users of Dutch with communication disabilities. The reason is that HLT products and services may enable these users to improve their verbal autonomy and communication skills. We sought to identify a minimum common set of HLT resources that is required to develop tools for a wide range of communication disabilities. In order to reach this goal, we investigated the specific HLT needs of communicatively disabled people and related these needs to the underlying HLT software components. By analysing the availability and quality of these essential HLT resources, we were able to identify which of the crucial elements need further research and development to become usable for developing applications for communicatively disabled users of Dutch. The results obtained in the current survey can be used to inform policy institutions on how they can stimulate the development of HLT resources for this target group. In the current study results were obtained for Dutch, but a similar approach can also be used for other languages. @@ -1671,14 +1671,14 @@ VerenaHenrich - ErhardHinrichs + ErhardHinrichs <fixed-case>G</fixed-case>ern<fixed-case>E</fixed-case>di<fixed-case>T</fixed-case> - The <fixed-case>G</fixed-case>erma<fixed-case>N</fixed-case>et Editing Tool http://www.lrec-conf.org/proceedings/lrec2010/pdf/264_Paper.pdf This paper introduces GernEdiT (short for: GermaNet Editing Tool), a new graphical user interface for the lexicographers and developers of GermaNet, the German version of the Princeton WordNet. GermaNet is a lexical-semantic net that relates German nouns, verbs, and adjectives. Traditionally, lexicographic work for extending the coverage of GermaNet utilized the Princeton WordNet development environment of lexicographer files. Due to a complex data format and no opportunity of automatic consistency checks, this process was very error prone and time consuming. The GermaNet Editing Tool GernEdiT was developed to overcome these shortcomings. The main purposes of the GernEdiT tool are, besides supporting lexicographers to access, modify, and extend GermaNet data in an easy and adaptive way, as follows: Replace the standard editing tools by a more user-friendly tool, use a relational database as data storage, support export formats in the form of XML, and facilitate internal consistency and correctness of the linguistic resource. All these core functionalities of GernEdiT along with the main aspects of the underlying lexical resource GermaNet and its current database format are presented in this paper. henrich-hinrichs-2010-gernedit - ErhardHinrichs + ErhardHinrichs VerenaHenrich ThomasZastrow Sustainability of Linguistic Data and Analysis in the Context of a Collaborative e<fixed-case>S</fixed-case>cience Environment @@ -1704,9 +1704,9 @@ gorog-vossen-2010-computer - MarieHinrichs + MarieHinrichs ThomasZastrow - ErhardHinrichs + ErhardHinrichs <fixed-case>W</fixed-case>eb<fixed-case>L</fixed-case>icht: Web-based <fixed-case>LRT</fixed-case> Services in a Distributed e<fixed-case>S</fixed-case>cience Infrastructure http://www.lrec-conf.org/proceedings/lrec2010/pdf/270_Paper.pdf eScience - enhanced science - is a new paradigm of scientific work and research. In the humanities, eScience environments can be helpful in establishing new workflows and lifecycles of scientific data. WebLicht is such an eScience environment for linguistic analysis, making linguistic tools and resources available network-wide. Today, most digital language resources and tools (LRT) are available by download only. This is inconvenient for someone who wants to use and combine several tools because these tools are normally not compatible with each other. To overcome this restriction, WebLicht makes the functionality of linguistic tools and the resources themselves available via the internet as web services. In WebLicht, several kinds of linguistic tools are available which cover the basic functionality of automatic and incremental creation of annotated text corpora. To make use of the more than 70 tools and resources currently available, the end user needs nothing more than just a common web browser. @@ -1722,28 +1722,28 @@ DianaSantos - Luís MiguelCabral - CorinaForascu + Luís MiguelCabral + CorinaForascu PamelaForner FredricGey KatrinLamm ThomasMandl PetyaOsenova - AnselmoPeñas - ÁlvaroRodrigo - JuliaSchulz + AnselmoPeñas + ÁlvaroRodrigo + JuliaSchulz YvonneSkalban - ErikTjong Kim Sang + ErikTjong Kim Sang <fixed-case>G</fixed-case>iki<fixed-case>CLEF</fixed-case>: Crosscultural Issues in Multilingual Information Access http://www.lrec-conf.org/proceedings/lrec2010/pdf/272_Paper.pdf In this paper we describe GikiCLEF, the first evaluation contest that, to our knowledge, was specifically designed to expose and investigate cultural and linguistic issues involved in structured multimedia collections and searching, and which was organized under the scope of CLEF 2009. GikiCLEF evaluated systems that answered hard questions for both human and machine, in ten different Wikipedia collections, namely Bulgarian, Dutch, English, German, Italian, Norwegian (Bokmäl and Nynorsk), Portuguese, Romanian, and Spanish. After a short historical introduction, we present the task, together with its motivation, and discuss how the topics were chosen. Then we provide another description from the point of view of the participants. Before disclosing their results, we introduce the SIGA management system explaining the several tasks which were carried out behind the scenes. We quantify in turn the GIRA resource, offered to the community for training and further evaluating systems with the help of the 50 topics gathered and the solutions identified. We end the paper with a critical discussion of what was learned, advancing possible ways to reuse the data. santos-etal-2010-gikiclef - DieterVan Uytvanck + DieterVan Uytvanck ClausZinn - DaanBroeder - PeterWittenburg + DaanBroeder + PeterWittenburg MarianoGardellini Virtual Language Observatory: The Portal to the Language Resources and Technology Universe http://www.lrec-conf.org/proceedings/lrec2010/pdf/273_Paper.pdf @@ -1765,8 +1765,8 @@ WernerSpiegl KorbinianRiedhammer - StefanSteidl - ElmarNöth + StefanSteidl + ElmarNöth <fixed-case>FAU</fixed-case> <fixed-case>IISAH</fixed-case> Corpus – A <fixed-case>G</fixed-case>erman Speech Database Consisting of Human-Machine and Human-Human Interaction Acquired by Close-Talking and Far-Distance Microphones http://www.lrec-conf.org/proceedings/lrec2010/pdf/275_Paper.pdf In this paper the FAU IISAH corpus and its recording conditions are described: a new speech database consisting of human-machine and human-human interaction recordings. Beside close-talking microphones for the best possible audio quality of the recorded speech, far-distance microphones were used to acquire the interaction and communication. The recordings took place during a Wizard-of-Oz experiment in the intelligent, senior-adapted house (ISA-House). That is a living room with a speech controlled home assistance system for elderly people, based on a dialogue system, which is able to process spontaneous speech. During the studies in the ISA-House more than eight hours of interaction data were recorded including 3 hours and 27 minutes of spontaneous speech. The data were annotated in terms of human-human (off-talk) and human-machine (on-talk) interaction. The test persons used 2891 turns of off-talk and 2752 turns of on-talk including 1751 different words. Still in progress is the analysis under statistical and linguistical aspects. @@ -1789,8 +1789,8 @@ KaisDukes - EricAtwell - Abdul-Baquee M.Sharaf + EricAtwell + Abdul-Baquee M.Sharaf Syntactic Annotation Guidelines for the <fixed-case>Q</fixed-case>uranic <fixed-case>A</fixed-case>rabic Dependency Treebank http://www.lrec-conf.org/proceedings/lrec2010/pdf/278_Paper.pdf The Quranic Arabic Dependency Treebank (QADT) is part of the Quranic Arabic Corpus (http://corpus.quran.com), an online linguistic resource organized by the University of Leeds, and developed through online collaborative annotation. The website has become a popular study resource for Arabic and the Quran, and is now used by over 1,500 researchers and students daily. This paper presents the treebank, explains the choice of syntactic representation, and highlights key parts of the annotation guidelines. The text being analyzed is the Quran, the central religious book of Islam, written in classical Quranic Arabic (c. 600 CE). To date, all 77,430 words of the Quran have a manually verified morphological analysis, and syntactic analysis is in progress. 11,000 words of Quranic Arabic have been syntactically annotated as part of a gold standard treebank. Annotation guidelines are especially important to promote consistency for a corpus which is being developed through online collaboration, since often many people will participate from different backgrounds and with different levels of linguistic expertise. The treebank is available online for collaborative correction to improve accuracy, with suggestions reviewed by expert Arabic linguists, and compared against existing published books of Quranic Syntax. @@ -1798,7 +1798,7 @@ TommiVatanen - Jaakko J.Väyrynen + Jaakko J.Väyrynen SamiVirpioja Language Identification of Short Text Segments with N-gram Models http://www.lrec-conf.org/proceedings/lrec2010/pdf/279_Paper.pdf @@ -1806,8 +1806,8 @@ vatanen-etal-2010-language - JosephPolifroni - ImreKiss + JosephPolifroni + ImreKiss MarkAdler Bootstrapping Named Entity Extraction for the Creation of Mobile Services http://www.lrec-conf.org/proceedings/lrec2010/pdf/280_Paper.pdf @@ -1817,7 +1817,7 @@ BertRéveil Jean-PierreMartens - Henkvan den Heuvel + Henkvan den Heuvel Improving Proper Name Recognition by Adding Automatically Learned Pronunciation Variants to the Lexicon http://www.lrec-conf.org/proceedings/lrec2010/pdf/281_Paper.pdf This paper deals with the task of large vocabulary proper name recognition. In order to accomodate a wide diversity of possible name pronunciations (due to non-native name origins or speaker tongues) a multilingual acoustic model is combined with a lexicon comprising 3 grapheme-to-phoneme (G2P) transcriptions from G2P transcribers for 3 different languages) and up to 4 so-called phoneme-to-phoneme (P2P) transcriptions. The latter are generated with (speaker tongue, name source) specific P2P converters that try to transform a set of baseline name transcriptions into a pool of transcription variants that lie closer to the `true’ name pronunciations. The experimental results show that the generated P2P variants can be employed to improve name recognition, and that the obtained accuracy is comparable to what is achieved with typical (TY) transcriptions (made by a human expert). Furthermore, it is demonstrated that the P2P conversion can best be instantiated from a baseline transcription in the name source language, and that knowledge of the speaker tongue is an important input as well for the P2P transcription process. @@ -1825,7 +1825,7 @@ MajdiSawalha - EricAtwell + EricAtwell Fine-Grain Morphological Analyzer and Part-of-Speech Tagger for <fixed-case>A</fixed-case>rabic Text http://www.lrec-conf.org/proceedings/lrec2010/pdf/282_Paper.pdf Morphological analyzers and part-of-speech taggers are key technologies for most text analysis applications. Our aim is to develop a part-of-speech tagger for annotating a wide range of Arabic text formats, domains and genres including both vowelized and non-vowelized text. Enriching the text with linguistic analysis will maximize the potential for corpus re-use in a wide range of applications. We foresee the advantage of enriching the text with part-of-speech tags of very fine-grained grammatical distinctions, which reflect expert interest in syntax and morphology, but not specific needs of end-users, because end-user applications are not known in advance. In this paper we review existing Arabic Part-of-Speech Taggers and tag-sets, and illustrate four different Arabic PoS tag-sets for a sample of Arabic text from the Quran. We describe the detailed fine-grained morphological feature tag set of Arabic, and the fine-grained Arabic morphological analyzer algorithm. We faced practical challenges in applying the morphological analyzer to the 100-million-word Web Arabic Corpus: we had to port the software to the National Grid Service, adapt the analyser to cope with spelling variations and errors, and utilise a Broad-Coverage Lexical Resource combining 23 traditional Arabic lexicons. Finally we outline the construction of a Gold Standard for comparative evaluation. @@ -1851,7 +1851,7 @@ Philipvan Oosten DriesTanghe - VéroniqueHoste + VéroniqueHoste Towards an Improved Methodology for Automated Readability Prediction http://www.lrec-conf.org/proceedings/lrec2010/pdf/286_Paper.pdf Since the first half of the 20th century, readability formulas have been widely employed to automatically predict the readability of an unseen text. In this article, the formulas and the text characteristics they are composed of are evaluated in the context of large Dutch and English corpora. We describe the behaviour of the formulas and the text characteristics by means of correlation matrices and a principal component analysis, and test the methodological validity of the formulas by means of collinearity tests. Both the correlation matrices and the principal component analysis show that the formulas described in this paper strongly correspond, regardless of the language for which they were designed. Furthermore, the collinearity test reveals shortcomings in the methodology that was used to create some of the existing readability formulas. All of this leads us to conclude that a new readability prediction method is needed. We finally make suggestions to come to a cleaner methodology and present web applications that will help us collect data to compile a new gold standard for readability prediction. @@ -1859,7 +1859,7 @@ MajdiSawalha - EricAtwell + EricAtwell Constructing and Using Broad-coverage Lexical Resource for Enhancing Morphological Analysis of <fixed-case>A</fixed-case>rabic http://www.lrec-conf.org/proceedings/lrec2010/pdf/287_Paper.pdf Broad-coverage language resources which provide prior linguistic knowledge must improve the accuracy and the performance of NLP applications. We are constructing a broad-coverage lexical resource to improve the accuracy of morphological analyzers and part-of-speech taggers of Arabic text. Over the past 1200 years, many different kinds of Arabic language lexicons were constructed; these lexicons are different in ordering, size and aim or goal of construction. We collected 23 machine-readable lexicons, which are freely available on the web. We combined lexical resources into one large broad-coverage lexical resource by extracting information from disparate formats and merging traditional Arabic lexicons. To evaluate the broad-coverage lexical resource we computed coverage over the Qur’an, the Corpus of Contemporary Arabic, and a sample from the Arabic Web Corpus, using two methods. Counting exact word matches between test corpora and lexicon scored about 65-68%; Arabic has a rich morphology with many combinations of roots, affixes and clitics, so about a third of words in the corpora did not have an exact match in the lexicon. The second approach is to compute coverage in terms of use in a lemmatizer program, which strips clitics to look for a match for the underlying lexeme; this scored about 82-85%. @@ -1882,7 +1882,7 @@ GerlofBouma - LiljaØvrelid + LiljaØvrelid JonasKuhn Towards a Large Parallel Corpus of Cleft Constructions http://www.lrec-conf.org/proceedings/lrec2010/pdf/291_Paper.pdf @@ -1893,7 +1893,7 @@ ZiqiZhang Anna LisaGentile LeiXia - JoséIria + JoséIria SamChapman A Random Graph Walk based Approach to Computing Semantic Relatedness Using Knowledge from <fixed-case>W</fixed-case>ikipedia http://www.lrec-conf.org/proceedings/lrec2010/pdf/292_Paper.pdf @@ -1935,7 +1935,7 @@ AnneGarcia-Fernandez - SophieRosset + SophieRosset AnneVilnat <fixed-case>MACAQ</fixed-case> : A Multi Annotated Corpus to Study how we Adapt Answers to Various Questions http://www.lrec-conf.org/proceedings/lrec2010/pdf/301_Paper.pdf @@ -1943,9 +1943,9 @@ garcia-fernandez-etal-2010-macaq - Carlos-D.Martínez-Hinarejos + Carlos-D.Martínez-Hinarejos VicentTamarit - José-M.Benedí + José-M.Benedí Evaluation of <fixed-case>HMM</fixed-case>-based Models for the Annotation of Unsegmented Dialogue Turns http://www.lrec-conf.org/proceedings/lrec2010/pdf/303_Paper.pdf Corpus-based dialogue systems rely on statistical models, whose parameters are inferred from annotated dialogues. The dialogues are usually annotated in terms of Dialogue Acts (DA), and the manual annotation is difficult (as annotation rule are hard to define), error-prone and time-consuming. Therefore, several semi-automatic annotation processes have been proposed to speed-up the process and consequently obtain a dialogue system in less total time. These processes are usually based on statistical models. The standard statistical annotation model is based on Hidden Markov Models (HMM). In this work, we explore the impact of different types of HMM, with different number of states, on annotation accuracy. We performed experiments using these models on two dialogue corpora (Dihana and SwitchBoard) of dissimilar features. The results show that some types of models improve standard HMM in a human-computer task-oriented dialogue corpus (Dihana corpus), but their impact is lower in a human-human non-task-oriented dialogue corpus (SwitchBoard corpus). @@ -1954,7 +1954,7 @@ RaheelNawaz PaulThompson - JohnMcNaught + JohnMcNaught SophiaAnaniadou Meta-Knowledge Annotation of Bio-Events http://www.lrec-conf.org/proceedings/lrec2010/pdf/306_Paper.pdf @@ -1966,24 +1966,24 @@ RubenDorado LukeMcCrohon SophiaAnaniadou - Jun’ichiTsujii + Jun’ichiTsujii <fixed-case>U</fixed-case>-Compare: An Integrated Language Resource Evaluation Platform Including a Comprehensive <fixed-case>UIMA</fixed-case> Resource Library http://www.lrec-conf.org/proceedings/lrec2010/pdf/307_Paper.pdf Language resources, including corpus and tools, are normally required to be combined in order to achieve a user’s specific task. However, resources tend to be developed independently in different, incompatible formats. In this paper we describe about U-Compare, which consists of the U-Compare component repository and the U-Compare platform. We have been building a highly interoperable resource library, providing the world largest ready-to-use UIMA component repository including wide variety of corpus readers and state-of-the-art language tools. These resources can be deployed as local services or web services, even possible to be hosted in clustered machines to increase the performance, while users do not need to be aware of such differences. In addition to the resource library, an integrated language processing platform is provided, allowing workflow creation, comparison, evaluation and visualization, using the resources in the library or any UIMA component, without any programming via graphical user interfaces, while a command line launcher is also available without GUIs. The evaluation itself is processed in a UIMA component, users can create and plug their own evaluation metrics in addition to the predefined metrics. U-Compare has been successfully used in many projects including BioCreative, Conll and the BioNLP shared task. kano-etal-2010-u - Janne BondiJohannessen + Janne BondiJohannessen KristinHagen AndersNøklestad - JoelPriestley + JoelPriestley Enhancing Language Resources with Maps http://www.lrec-conf.org/proceedings/lrec2010/pdf/308_Paper.pdf We will look at how maps can be integrated in research resources, such as language databases and language corpora. By using maps, search results can be illustrated in a way that immediately gives the user information that words or numbers on their own would not give. We will illustrate with two different resources, into which we have now added a Google Maps application: The Nordic Dialect Corpus (Johannessen et al. 2009) and The Nordic Syntactic Judgments Database (Lindstad et al. 2009). We have integrated Google Maps into these applications. The database contains some hundred syntactic test sentences that have been evaluated by four speakers in more than hundred locations in Norway and Sweden. Searching for the evaluations of a particular sentence gives a list of several hundred judgments, which are difficult for a human researcher to assess. With the map option, isoglosses are immediately visible. We show in the paper that both with the maps depicting corpus hits and with the maps depicting database results, the map visualizations actually show clear geographical differences that would be very difficult to spot just by reading concordance lines or database tables. johannessen-etal-2010-enhancing - Jana Z.Sukkarieh + Jana Z.Sukkarieh EleanorBolge Building a Textual Entailment Suite for the Evaluation of Automatic Content Scoring Technologies http://www.lrec-conf.org/proceedings/lrec2010/pdf/310_Paper.pdf @@ -1992,7 +1992,7 @@ HaïfaZargayouna - AdelineNazarenko + AdelineNazarenko Evaluation of Textual Knowledge Acquisition Tools: a Challenging Task http://www.lrec-conf.org/proceedings/lrec2010/pdf/311_Paper.pdf A large effort has been devoted to the development of textual knowledge acquisition (KA) tools, but it is still difficult to assess the progress that has been made. The results produced by these tools are difficult to compare, due to the heterogeneity of the proposed methods and of their goals. Various experiments have been made to evaluate terminological and ontological tools. They show that in terminology as well as in ontology acquisition, it remains difficult to compare existing tools and to analyse their advantages and drawbacks. From our own experiments in evaluating terminology and ontology acquisition tools, it appeared that the difficulties and solutions are similar for both tasks. We propose a unified approach for the evaluation of textual KA tools that can be instantiated in different ways for various tasks. The main originality of this approach lies in the way it takes into account the subjectivity of evaluation and the relativity of gold standards. In this paper, we highlight the major difficulties of KA evaluation, we then present a unified proposal for the evaluation of terminologies and ontologies acquisition tools and the associated experiments. The proposed protocols take into consideration the specificity of this type of evaluation. @@ -2030,7 +2030,7 @@ SvetlaKoeva DianaBlagoeva - SiyaKolkovska + SiyaKolkovska <fixed-case>B</fixed-case>ulgarian National Corpus Project http://www.lrec-conf.org/proceedings/lrec2010/pdf/316_Paper.pdf The paper presents Bulgarian National Corpus project (BulNC) - a large-scale, representative, online available corpus of Bulgarian. The BulNC is also a monolingual general corpus, fully morpho-syntactically (and partially semantically) annotated, and manually provided with detailed meta-data descriptions. Presently the Bulgarian National corpus consists of about 320 000 000 graphical words and includes more than 10 000 samples. Briefly the corpus structure and the accepted criteria for representativeness and well-balancing are presented. The query language for advance search of collocations and concordances is demonstrated with some examples - it allows to retrieve word combinations, ordered queries, inflexionally and semantically related words, part-of-speech tags, utilising Boolean operations and grouping as well. The BulNC already plays a significant role in natural language processing of Bulgarian contributing to scientific advances in spelling and grammar checking, word sense disambiguation, speech recognition, text categorisation, topic extraction and machine translation. The BulNC can also be used in different investigations going beyond the linguistics: library studies, social sciences research, teaching methods studies, etc. @@ -2056,7 +2056,7 @@ haselbach-heid-2010-development - Sophia Yat MeiLee + Sophia Yat MeiLee YingChen ShoushanLi Chu-RenHuang @@ -2097,7 +2097,7 @@ williams-etal-2010-cambridge - Henkvan den Heuvel + Henkvan den Heuvel Renévan Horik StefScagliola EricSanders @@ -2108,7 +2108,7 @@ van-den-heuvel-etal-2010-veterantapes - RaffaellaBernardi + RaffaellaBernardi ManuelKirschner ZoranaRatkovic Context Fusion: The Role of Discourse Structure and Centering Theory @@ -2125,9 +2125,9 @@ okamoto-ishizaki-2010-homographic - Cheikh M. BambaDione + Cheikh M. BambaDione JonasKuhn - SinaZarrieß + SinaZarrieß Design and Development of Part-of-Speech-Tagging Resources for <fixed-case>W</fixed-case>olof (<fixed-case>N</fixed-case>iger-<fixed-case>C</fixed-case>ongo, spoken in <fixed-case>S</fixed-case>enegal) http://www.lrec-conf.org/proceedings/lrec2010/pdf/333_Paper.pdf In this paper, we report on the design of a part-of-speech-tagset for Wolof and on the creation of a semi-automatically annotated gold standard. In order to achieve high-quality annotation relatively fast, we first generated an accurate lexicon that draws on existing word and name lists and takes into account inflectional and derivational morphology. The main motivation for the tagged corpus is to obtain data for training automatic taggers with machine learning approaches. Hence, we took machine learning considerations into account during tagset design and we present training experiments as part of this paper. The best automatic tagger achieves an accuracy of 95.2% in cross-validation experiments. We also wanted to create a basis for experimenting with annotation projection techniques, which exploit parallel corpora. For this reason, it was useful to use a part of the Bible as the gold standard corpus, for which sentence-aligned parallel versions in many languages are easy to obtain. We also report on preliminary experiments exploiting a statistical word alignment of the parallel text. @@ -2160,7 +2160,7 @@ LubomirOtrusina - PavelSmrz + PavelSmrz A New Approach to Pseudoword Generation http://www.lrec-conf.org/proceedings/lrec2010/pdf/339_Paper.pdf Sense-tagged corpora are used to evaluate word sense disambiguation (WSD) systems. Manual creation of such resources is often prohibitively expensive. That is why the concept of pseudowords - conflations of two or more unambiguous words - has been integrated into WSD evaluation experiments. This paper presents a new method of pseudoword generation which takes into account semantic-relatedness of the candidate words forming parts of the pseudowords to the particular senses of the word to be disambiguated. We compare the new approach to its alternatives and show that the results on pseudowords, that are more similar to real ambiguous words, better correspond to the actual results. Two techniques assessing the similarity are studied - the first one takes advantage of manually created dictionaries (wordnets), the second one builds on the automatically computed statistical data obtained from large corpora. Pros and cons of the two techniques are discussed and the results on a standard task are demonstrated. @@ -2186,7 +2186,7 @@ gibbon-etal-2010-medefaidrin - SatoshiSato + SatoshiSato SayokoKaide A Person-Name Filter for Automatic Compilation of Bilingual Person-Name Lexicons http://www.lrec-conf.org/proceedings/lrec2010/pdf/343_Paper.pdf @@ -2195,7 +2195,7 @@ RaniaAl-Sabbagh - RoxanaGirju + RoxanaGirju Mining the Web for the Induction of a Dialectical <fixed-case>A</fixed-case>rabic Lexicon http://www.lrec-conf.org/proceedings/lrec2010/pdf/344_Paper.pdf This paper describes the first phase of building a lexicon of Egyptian Cairene Arabic (ECA) ― one of the most widely understood dialects in the Arab World ― and Modern Standard Arabic (MSA). Each ECA entry is mapped to its MSA synonym, Part-of-Speech (POS) tag and top-ranked contexts based on Web queries; and thus each entry is provided with basic syntactic and semantic information for a generic lexicon compatible with multiple NLP applications. Moreover, through their MSA synonyms, ECA entries acquire access to MSA available NLP tools and resources which are considerably available. Using an associationist approach based on the correlations between word co-occurrence patterns in both dialects, we change the direction of the acquisition process from parallel to circular to overcome a bottleneck of current research on Arabic dialects, namely the lack of parallel corpora, and to alleviate accuracy rates for using unrelated Web documents which are more frequently available. Manually evaluated for 1,000 word entries by two native speakers of the ECA-MSA varieties, the proposed approach achieves a promising F-measured performance rate of 70.9%. In discussion to the proposed algorithm, different semantic issues are highlighted for upcoming phases of the induction of a more comprehensive ECA-MSA lexicon. @@ -2213,7 +2213,7 @@ PhilippeDreuw - HermannNey + HermannNey GregorioMartinez OnnoCrasborn JustusPiater @@ -2235,7 +2235,7 @@ VioletaSeretan - EricWehrli + EricWehrli LukaNerima GabrielaSoare <fixed-case>F</fixed-case>ips<fixed-case>R</fixed-case>omanian: Towards a <fixed-case>R</fixed-case>omanian Version of the Fips Syntactic Parser @@ -2248,7 +2248,7 @@ JonasBeskow KjellElenius KahlHellmer - SofiaStrönbergsson + SofiaStrönbergsson DavidHouse <fixed-case>S</fixed-case>pontal: A <fixed-case>S</fixed-case>wedish Spontaneous Dialogue Corpus of Audio, Video and Motion Capture http://www.lrec-conf.org/proceedings/lrec2010/pdf/352_Paper.pdf @@ -2259,7 +2259,7 @@ WalidMagdy JinmingMin JohannesLeveling - Gareth J. F.Jones + Gareth J. F.Jones Building a Domain-specific Document Collection for Evaluating Metadata Effects on Information Retrieval http://www.lrec-conf.org/proceedings/lrec2010/pdf/353_Paper.pdf This paper describes the development of a structured document collection containing user-generated text and numerical metadata for exploring the exploitation of metadata in information retrieval (IR). The collection consists of more than 61,000 documents extracted from YouTube video pages on basketball in general and NBA (National Basketball Association) in particular, together with a set of 40 topics and their relevance judgements. In addition, a collection of nearly 250,000 user profiles related to the NBA collection is available. Several baseline IR experiments report the effect of using video-associated metadata on retrieval effectiveness. The results surprisingly show that searching the videos titles only performs significantly better than searching additional metadata text fields of the videos such as the tags or the description. @@ -2280,7 +2280,7 @@ JochenSchwenninger BarbaraSamlowski ThomasWinkler - JoachimKöhler + JoachimKöhler <fixed-case>D</fixed-case>i<fixed-case>SC</fixed-case>o - A <fixed-case>G</fixed-case>erman Evaluation Corpus for Challenging Problems in the Broadcast Domain http://www.lrec-conf.org/proceedings/lrec2010/pdf/355_Paper.pdf Typical broadcast material contains not only studio-recorded texts read by trained speakers, but also spontaneous and dialect speech, debates with cross-talk, voice-overs, and on-site reports with difficult acoustic environments. Standard approaches to speech and speaker recognition usually deteriorate under such conditions. This paper reports on the design, construction, and experimental analysis of DiSCo, a German corpus for the evaluation of speech and speaker recognition on challenging material from the broadcast domain. One of the key requirements for the design of this corpus was a good coverage of different types of serious programmes beyond clean speech and planned speech broadcast news. Corpus annotation encompasses manual segmentation, an orthographic transcription, and labelling with speech mode, dialect, and noise type. We indicate typical use cases for the corpus by reporting results from ASR, speech search, and speaker recognition on the new corpus, thereby obtaining insights into the difficulty of audio recognition on the various classes. @@ -2324,7 +2324,7 @@ PaulFelt OwenMerkling MarcCarmen - EricRingger + EricRingger WarrenLemmon KevinSeppi RobbieHaertel @@ -2358,8 +2358,8 @@ UlrichHeid FabienneFritzinger - ErhardHinrichs - MarieHinrichs + ErhardHinrichs + MarieHinrichs ThomasZastrow Term and Collocation Extraction by Means of Complex Linguistic Web Services http://www.lrec-conf.org/proceedings/lrec2010/pdf/363_Paper.pdf @@ -2377,7 +2377,7 @@ NicolettaCalzolari ClaudiaSoria - RiccardoDel Gratta + RiccardoDel Gratta SaraGoggi ValeriaQuochi IreneRusso @@ -2391,13 +2391,13 @@ NicolasMoreau - OlivierHamon - DjamelMostefa - SophieRosset + OlivierHamon + DjamelMostefa + SophieRosset OlivierGalibert - LoriLamel - JordiTurmo - Pere R.Comas + LoriLamel + JordiTurmo + Pere R.Comas PaoloRosso DavideBuscaldi KhalidChoukri @@ -2408,7 +2408,7 @@ RoserSanromà - GemmaBoleda + GemmaBoleda The Database of <fixed-case>C</fixed-case>atalan Adjectives http://www.lrec-conf.org/proceedings/lrec2010/pdf/373_Paper.pdf We present the Database of Catalan Adjectives (DCA), a database with 2,296 adjective lemmata enriched with morphological, syntactic and semantic information. This set of adjectives has been collected from a fragment of the Corpus Textual Informatitzat de la Llengua Catalana of the Institut d’Estudis Catalans and constitutes a representative sample of the adjective class in Catalan as a whole. The database includes both manually coded and automatically extracted information regarding the most prominent properties used in the literature regarding the semantics of adjectives, such as morphological origin, suffix (if any), predicativity, gradability, adjective position with respect to the head noun, adjective modifiers, or semantic class. The DCA can be useful for NLP applications using adjectives (from POS-taggers to Opinion Mining applications) and for linguistic analysis regarding the morphological, syntactic, and semantic properties of adjectives. We now make it available to the research community under a Creative Commons Attribution Share Alike 3.0 Spain license. @@ -2417,7 +2417,7 @@ AmalZouaq MichelGagnon - BenoitOzell + BenoitOzell Can Syntactic and Logical Graphs help Word Sense Disambiguation? http://www.lrec-conf.org/proceedings/lrec2010/pdf/374_Paper.pdf This paper presents a word sense disambiguation (WSD) approach based on syntactic and logical representations. The objective here is to run a number of experiments to compare standard contexts (word windows, sentence windows) with contexts provided by a dependency parser (syntactic context) and a logical analyzer (logico-semantic context). The approach presented here relies on a dependency grammar for the syntactic representations. We also use a pattern knowledge base over the syntactic dependencies to extract flat predicative logical representations. These representations (syntactic and logical) are then used to build context vectors that are exploited in the WSD process. Various state-of-the-art algorithms including Simplified Lesk, Banerjee and Pedersen and frequency of co-occurrences are tested with these syntactic and logical contexts. Preliminary results show that defining context vectors based on these features may improve WSD by comparison with classical word and sentence context windows. However, future experiments are needed to provide more evidence over these issues. @@ -2427,7 +2427,7 @@ MengWang Chu-RenHuang ShiwenYu - WeiweiSun + WeiweiSun Automatic Acquisition of <fixed-case>C</fixed-case>hinese Novel Noun Compounds http://www.lrec-conf.org/proceedings/lrec2010/pdf/377_Paper.pdf Automatic acquisition of novel compounds is notoriously difficult because most novel compounds have relatively low frequency in a corpus. The current study proposes a new method to deal with the novel compound acquisition challenge. We model this task as a two-class classification problem in which a candidate compound is either classified as a compound or a non-compound. A machine learning method using SVM, incorporating two types of linguistically motivated features: semantic features and character features, is applied to identify rare but valid noun compounds. We explore two kinds of training data: one is virtual training data which is obtained by three statistical scores, i.e. co-occurrence frequency, mutual information and dependent ratio, from the frequent compounds; the other is real training data which is randomly selected from the infrequent compounds. We conduct comparative experiments, and the experimental results show that even with limited direct evidence in the corpus for the novel compounds, we can make full use of the typical frequent compounds to help in the discovery of the novel compounds. @@ -2443,7 +2443,7 @@ oostdijk-etal-2010-constructing - PaulBedaride + PaulBedaride ClaireGardent Syntactic Testsuites and Textual Entailment Recognition http://www.lrec-conf.org/proceedings/lrec2010/pdf/379_Paper.pdf @@ -2459,7 +2459,7 @@ stepanek-pajas-2010-querying - RodolfoDelmonte + RodolfoDelmonte AntonellaBristot VincenzoPallotta Deep Linguistic Processing with <fixed-case>GETARUNS</fixed-case> for Spoken Dialogue Understanding @@ -2469,7 +2469,7 @@ EmadMohamed - SandraKübler + SandraKübler <fixed-case>A</fixed-case>rabic Part of Speech Tagging http://www.lrec-conf.org/proceedings/lrec2010/pdf/384_Paper.pdf Arabic is a morphologically rich language, which presents a challenge for part of speech tagging. In this paper, we compare two novel methods for POS tagging of Arabic without the use of gold standard word segmentation but with the full POS tagset of the Penn Arabic Treebank. The first approach uses complex tags that describe full words and does not require any word segmentation. The second approach is segmentation-based, using a machine learning segmenter. In this approach, the words are first segmented, then the segments are annotated with POS tags. Because of the word-based approach, we evaluate full word accuracy rather than segment accuracy. Word-based POS tagging yields better results than segment-based tagging (93.93% vs. 93.41%). Word based tagging also gives the best results on known words, the segmentation-based approach gives better results on unknown words. Combining both methods results in a word accuracy of 94.37%, which is very close to the result obtained by using gold standard segmentation (94.91%). @@ -2477,7 +2477,7 @@ AlexanderPak - PatrickParoubek + PatrickParoubek <fixed-case>T</fixed-case>witter as a Corpus for Sentiment Analysis and Opinion Mining http://www.lrec-conf.org/proceedings/lrec2010/pdf/385_Paper.pdf Microblogging today has become a very popular communication tool among Internet users. Millions of users share opinions on different aspects of life everyday. Therefore microblogging web-sites are rich sources of data for opinion mining and sentiment analysis. Because microblogging has appeared relatively recently, there are a few research works that were devoted to this topic. In our paper, we focus on using Twitter, the most popular microblogging platform, for the task of sentiment analysis. We show how to automatically collect a corpus for sentiment analysis and opinion mining purposes. We perform linguistic analysis of the collected corpus and explain discovered phenomena. Using the corpus, we build a sentiment classifier, that is able to determine positive, negative and neutral sentiments for a document. Experimental evaluations show that our proposed techniques are efficient and performs better than previously proposed methods. In our research, we worked with English, however, the proposed technique can be used with any other language. @@ -2485,7 +2485,7 @@ RenaNemoto - MartineAdda-Decker + MartineAdda-Decker JacquesDurand Word Boundaries in <fixed-case>F</fixed-case>rench: Evidence from Large Speech Corpora http://www.lrec-conf.org/proceedings/lrec2010/pdf/386_Paper.pdf @@ -2493,7 +2493,7 @@ nemoto-etal-2010-word - BenoîtSagot + BenoîtSagot LaurenceDanlos RosaStern A Lexicon of <fixed-case>F</fixed-case>rench Quotation Verbs for Automatic Quotation Extraction @@ -2521,9 +2521,9 @@ YasuharuDen - HanaeKoiso + HanaeKoiso TakehikoMaruyama - KikuoMaekawa + KikuoMaekawa KatsuyaTakanashi MikaEnomoto NaoYoshida @@ -2533,8 +2533,8 @@ den-etal-2010-two - MarieCandito - BenoîtCrabbé + MarieCandito + BenoîtCrabbé PascalDenis Statistical <fixed-case>F</fixed-case>rench Dependency Parsing: Treebank Conversion and First Results http://www.lrec-conf.org/proceedings/lrec2010/pdf/392_Paper.pdf @@ -2543,7 +2543,7 @@ YueMa - AdelineNazarenko + AdelineNazarenko LaurentAudibert Formal Description of Resources for Ontology-based Semantic Annotation http://www.lrec-conf.org/proceedings/lrec2010/pdf/393_Paper.pdf @@ -2551,18 +2551,18 @@ ma-etal-2010-formal - Luis JavierRodríguez-Fuentes - MikelPenagarikano - GermánBordel + Luis JavierRodríguez-Fuentes + MikelPenagarikano + GermánBordel AmparoVarona - MireiaDíez + MireiaDíez <fixed-case>KALAKA</fixed-case>: A <fixed-case>TV</fixed-case> Broadcast Speech Database for the Evaluation of Language Recognition Systems http://www.lrec-conf.org/proceedings/lrec2010/pdf/394_Paper.pdf A speech database, named KALAKA, was created to support the Albayzin 2008 Evaluation of Language Recognition Systems, organized by the Spanish Network on Speech Technologies from May to November 2008. This evaluation, designed according to the criteria and methodology applied in the NIST Language Recognition Evaluations, involved four target languages: Basque, Catalan, Galician and Spanish (official languages in Spain), and included speech signals in other (unknown) languages to allow open-set verification trials. In this paper, the process of designing, collecting data and building the train, development and evaluation datasets of KALAKA is described. Results attained in the Albayzin 2008 LRE are presented as a means of evaluating the database. The performance of a state-of-the-art language recognition system on a closed-set evaluation task is also presented for reference. Future work includes extending KALAKA by adding Portuguese and English as target languages and renewing the set of unknown languages needed to carry out open-set evaluations. rodriguez-fuentes-etal-2010-kalaka - JarmilaPanevová + JarmilaPanevová MagdaŠevčíková Annotation of Morphological Meanings of Verbs Revisited http://www.lrec-conf.org/proceedings/lrec2010/pdf/395_Paper.pdf @@ -2571,13 +2571,13 @@ AndrewHickl - SandaHarabagiu + SandaHarabagiu Unsupervised Discovery of Collective Action Frames for Socio-Cultural Analysis http://www.lrec-conf.org/proceedings/lrec2010/pdf/396_Paper.pdf hickl-harabagiu-2010-unsupervised - Ting-HaoHuang + Ting-HaoHuang Lun-WeiKu Hsin-HsiChen Predicting Morphological Types of <fixed-case>C</fixed-case>hinese Bi-Character Words by Machine Learning Approaches @@ -2595,16 +2595,16 @@ JorgeVivaldi - Iriada Cunha - Juan-ManuelTorres-Moreno - PatriciaVelázquez-Morales + Iriada Cunha + Juan-ManuelTorres-Moreno + PatriciaVelázquez-Morales Automatic Summarization Using Terminological and Semantic Resources http://www.lrec-conf.org/proceedings/lrec2010/pdf/400_Paper.pdf This paper presents a new algorithm for automatic summarization of specialized texts combining terminological and semantic resources: a term extractor and an ontology. The term extractor provides the list of the terms that are present in the text together their corresponding termhood. The ontology is used to calculate the semantic similarity among the terms found in the main body and those present in the document title. The general idea is to obtain a relevance score for each sentence taking into account both the ”termhood” of the terms found in such sentence and the similarity among such terms and those terms present in the title of the document. The phrases with the highest score are chosen to take part of the final summary. We evaluate the algorithm with Rouge, comparing the resulting summaries with the summaries of other summarizers. The sentence selection algorithm was also tested as part of a standalone summarizer. In both cases it obtains quite good results although the perception is that there is a space for improvement. vivaldi-etal-2010-automatic - OlivierHamon + OlivierHamon Is my Judge a good One? http://www.lrec-conf.org/proceedings/lrec2010/pdf/402_Paper.pdf This paper aims at measuring the reliability of judges in MT evaluation. The scope is two evaluation campaigns from the CESTA project, during which human evaluations were carried out on fluency and adequacy criteria for English-to-French documents. Our objectives were threefold: observe both inter- and intra-judge agreements, and then study the influence of the evaluation design especially implemented for the need of the campaigns. Indeed, a web interface was especially developed to help with the human judgments and store the results, but some design changes were made between the first and the second campaign. Considering the low agreements observed, the judges' behaviour has been analysed in that specific context. We also asked several judges to repeat their own evaluations a few times after the first judgments done during the official evaluation campaigns. Even if judges did not seem to agree fully at first sight, a less strict comparison led to a strong agreement. Furthermore, the evolution of the design during the project seemed to have been a source for the difficulties that judges encountered to keep the same interpretation of quality. @@ -2613,14 +2613,14 @@ MátyásBrendel RiccardoZaccarelli - LaurenceDevillers + LaurenceDevillers Building a System for Emotions Detection from Speech to Control an Affective Avatar http://www.lrec-conf.org/proceedings/lrec2010/pdf/403_Paper.pdf In this paper we describe a corpus set together from two sub-corpora. The CINEMO corpus contains acted emotional expression obtained by playing dubbing exercises. This new protocol is a way to collect mood-induced data in large amount which show several complex and shaded emotions. JEMO is a corpus collected with an emotion-detection game and contains more prototypical emotions than CINEMO. We show how the two sub-corpora balance and enrich each other and result in a better performance. We built male and female emotion models and use Sequential Fast Forward Feature Selection to improve detection performances. After feature-selection we obtain good results even with our strict speaker independent testing method. The global corpus contains 88 speakers (38 females, 50 males). This study has been done within the scope of the ANR (National Research Agency) Affective Avatar project which deals with building a system of emotions detection for monitoring an Artificial Agent by voice. brendel-etal-2010-building - RoxaneSegers + RoxaneSegers PiekVossen Facilitating Non-expert Users of the <fixed-case>KYOTO</fixed-case> Platform: the <fixed-case>TMEKO</fixed-case> Editing Protocol for Synset to Ontology Mappings http://www.lrec-conf.org/proceedings/lrec2010/pdf/406_Paper.pdf @@ -2655,7 +2655,7 @@ RichardSchwarz - HinrichSchütze + HinrichSchütze FabienneMartin AchimStein Identification of Rare & Novel Senses Using Translations in a Parallel Corpus @@ -2664,10 +2664,10 @@ schwarz-etal-2010-identification - CláudiaFreitas + CláudiaFreitas CristinaMota DianaSantos - Hugo GonçaloOliveira + Hugo GonçaloOliveira PaulaCarvalho Second <fixed-case>HAREM</fixed-case>: Advancing the State of the Art of Named Entity Recognition in <fixed-case>P</fixed-case>ortuguese http://www.lrec-conf.org/proceedings/lrec2010/pdf/412_Paper.pdf @@ -2694,9 +2694,9 @@ MartaVillegas - NúriaBel + NúriaBel SantiagoBel - VíctorRodríguez + VíctorRodríguez A Case Study on Interoperability for Language Resources and Applications http://www.lrec-conf.org/proceedings/lrec2010/pdf/418_Paper.pdf This paper reports our experience when integrating differ resources and services into a grid environment. The use case we address implies the deployment of several NLP applications as web services. The ultimate objective of this task was to create a scenario where researchers have access to a variety of services they can operate. These services should be easy to invoke and able to interoperate between one another. We essentially describe the interoperability problems we faced, which involve metadata interoperability, data interoperability and service interoperability. We devote special attention to service interoperability and explore the possibility to define common interfaces and semantic description of services. While the web services paradigm suits the integration of different services very well, this requires mutual understanding and the accommodation to common interfaces that not only provide technical solution but also ease the user‟s work. Defining common interfaces benefits interoperability but requires the agreement about operations and the set of inputs/outputs. Semantic annotation allows defining some sort of taxonomy that organizes and collects the set of admissible operations and types input/output parameters. @@ -2704,7 +2704,7 @@ BrunoCartoni - PierreZweigenbaum + PierreZweigenbaum Semi-Automated Extension of a Specialized Medical Lexicon for <fixed-case>F</fixed-case>rench http://www.lrec-conf.org/proceedings/lrec2010/pdf/420_Paper.pdf This paper describes the development of a specialized lexical resource for a specialized domain, namely medicine. First, in order to assess the linguistic phenomena that need to be adressed, we based our observation on a large collection of more than 300'000 terms, organised around conceptual identifiers. Based on these observations, we highlight the specificities that such a lexicon should take into account, namely in terms of inflectional and derivational knowledge. In a first experiment, we show that general resources lack a large part of the words needed to process specialized language. Secondly, we describe an experiment to feed semi-automatically a medical lexicon and populate it with inflectional information. This experiment is based on a semi-automatic methods that tries to acquire inflectional knowledge from frequent endings of words recorded in existing lexicon. Thanks to this, we increased the coverage of the target vocabulary from 14.1% to 25.7%. @@ -2719,9 +2719,9 @@ duarte-gibet-2010-heterogeneous - PatrickParoubek + PatrickParoubek AlexanderPak - DjamelMostefa + DjamelMostefa Annotations for Opinion Mining Evaluation in the Industrial Context of the <fixed-case>DOXA</fixed-case> project http://www.lrec-conf.org/proceedings/lrec2010/pdf/422_Paper.pdf After presenting opinion and sentiment analysis state of the art and the DOXA project, we review the few evaluation campaigns that have dealt in the past with opinion mining. Then we present the two level opinion and sentiment model that we will use for evaluation in the DOXA project and the annotation interface we use for hand annotating a reference corpus. We then present the corpus which will be used on DOXA and report on the hand-annotation task on a corpus of comments on video games and the solution adopted to obtain a sufficient level of inter-annotator agreement. @@ -2730,7 +2730,7 @@ MilenKouylekov YasharMehdad - MatteoNegri + MatteoNegri Mining <fixed-case>W</fixed-case>ikipedia for Large-scale Repositories of Context-Sensitive Entailment Rules http://www.lrec-conf.org/proceedings/lrec2010/pdf/425_Paper.pdf This paper focuses on the central role played by lexical information in the task of Recognizing Textual Entailment. In particular, the usefulness of lexical knowledge extracted from several widely used static resources, represented in the form of entailment rules, is compared with a method to extract lexical information from Wikipedia as a dynamic knowledge resource. The proposed acquisition method aims at maximizing two key features of the resulting entailment rules: coverage (i.e. the proportion of rules successfully applied over a dataset of TE pairs), and context sensitivity (i.e. the proportion of rules applied in appropriate contexts). Evaluation results show that Wikipedia can be effectively used as a source of lexical entailment rules, featuring both higher coverage and context sensitivity with respect to other resources. @@ -2753,9 +2753,9 @@ weller-heid-2010-extraction - PatrickParoubek - OlivierHamon - Ericde La Clergerie + PatrickParoubek + OlivierHamon + Ericde La Clergerie CyrilGrouin AnneVilnat The Second Evaluation Campaign of <fixed-case>PASSAGE</fixed-case> on Parsing of <fixed-case>F</fixed-case>rench @@ -2763,18 +2763,18 @@ paroubek-etal-2010-second - Kepa JosebaRodríguez + Kepa JosebaRodríguez FrancescaDelogu YannickVersley - Egon W.Stemle - MassimoPoesio + Egon W.Stemle + MassimoPoesio Anaphoric Annotation of <fixed-case>W</fixed-case>ikipedia and Blogs in the Live Memories Corpus http://www.lrec-conf.org/proceedings/lrec2010/pdf/431_Paper.pdf The Live Memories corpus is an Italian corpus annotated for anaphoric relations. This annotation effort aims to contribute to two significant issues for the CL research: the lack of annotated anaphoric resources for Italian and the increasing interest for the social Web. The Live Memories Corpus contains texts from the Italian Wikipedia about the region Trentino/Süd Tirol and from blog sites with users' comments. It is planned to add a set of articles of local news papers. The corpus includes manual annotated information about morphosyntactic agreement, anaphoricity, and semantic class of the NPs. The anaphoric annotation includes discourse deixis, bridging relations and markes cases of ambiguity with the annotation of alternative interpretations. For the annotation of the anaphoric links the corpus takes into account specific phenomena of the Italian language like incorporated clitics and phonetically non realized pronouns. Reliability studies for the annotation of the mentioned phenomena and for annotation of anaphoric links in general offer satisfactory results. The Wikipedia and blogs dataset will be distributed under Creative Commons Attributions licence. rodriguez-etal-2010-anaphoric - DanFlickinger + DanFlickinger StephanOepen GisleYtrestøl <fixed-case>W</fixed-case>iki<fixed-case>W</fixed-case>oods: Syntacto-Semantic Annotation for <fixed-case>E</fixed-case>nglish <fixed-case>W</fixed-case>ikipedia @@ -2792,7 +2792,7 @@ ruppenhofer-etal-2010-speaker - NickWebb + NickWebb DavidBenyon JayBradley PrebenHansen @@ -2804,7 +2804,7 @@ Carlos GómezGallo - T. FlorianJaeger + T. FlorianJaeger KatrinaFurth A Database for the Exploration of <fixed-case>S</fixed-case>panish Planning http://www.lrec-conf.org/proceedings/lrec2010/pdf/436_Paper.pdf @@ -2863,7 +2863,7 @@ rytting-etal-2010-error - Christopher RWalker + Christopher RWalker HannahCopperman Evaluating Complex Semantic Artifacts http://www.lrec-conf.org/proceedings/lrec2010/pdf/441_Paper.pdf @@ -2871,9 +2871,9 @@ walker-copperman-2010-evaluating - MohamedAltantawy + MohamedAltantawy NizarHabash - OwenRambow + OwenRambow IbrahimSaleh Morphological Analysis and Generation of <fixed-case>A</fixed-case>rabic Nouns: A Morphemic Functional Approach http://www.lrec-conf.org/proceedings/lrec2010/pdf/442_Paper.pdf @@ -2891,7 +2891,7 @@ HannahCopperman - Christopher R.Walker + Christopher R.Walker Fred’s Reusable Evaluation Device: Providing Support for Quick and Reliable Linguistic Annotation http://www.lrec-conf.org/proceedings/lrec2010/pdf/444_Paper.pdf This paper describes an interface that was developed for processing large amounts of human judgments of linguistically annotated data. Fred’s Reusable Evaluation Device (“Fred”) provides administrators with a tool to submit linguistic evaluation tasks to judges. Each evaluation task is then presented to exactly two judges, who can submit their judgments at their own leisure. Fred then provides several metrics to administrators. The most important metric is precision, which is provided for each evaluation task and each annotator. Administrators can look at precision for a given data set over time, as well as by evaluation type, data set, or annotator. Inter-annotator agreement is also reported, and that can be tracked over time as well. The interface was developed to provide a tool for evaluating semantically marked up text. The types of evaluations Fred has been used for so far include things like correctness of subject-relation identification, and correctness of temporal relations. However, Fred’s full versatility has not yet been fully exploited. @@ -2899,19 +2899,19 @@ AlexisBaird - Christopher R.Walker + Christopher R.Walker The Creation of a Large-Scale <fixed-case>LFG</fixed-case>-Based Gold Parsebank http://www.lrec-conf.org/proceedings/lrec2010/pdf/445_Paper.pdf Systems for syntactically parsing sentences have long been recognized as a priority in Natural Language Processing. Statistics-based systems require large amounts of high quality syntactically parsed data. Using the XLE toolkit developed at PARC and the LFG Parsebanker interface developed at Bergen, the Parsebank Project at Powerset has generated a rapidly increasing volume of syntactically parsed data. By using these tools, we are able to leverage the LFG framework to provide richer analyses via both constituent (c-) and functional (f-) structures. Additionally, the Parsebanking Project uses source data from Wikipedia rather than source data limited to a specific genre, such as the Wall Street Journal. This paper outlines the process we used in creating a large-scale LFG-Based Parsebank to address many of the shortcomings of previously-created parse banks such as the Penn Treebank. While the Parsebank corpus is still in progress, preliminary results using the data in a variety of contexts already show promise. baird-walker-2010-creation - KathrynBaker + KathrynBaker MichaelBloodgood - BonnieDorr + BonnieDorr Nathaniel W.Filardo - LoriLevin - ChristinePiatko + LoriLevin + ChristinePiatko A Modality Lexicon and its use in Automatic Tagging http://www.lrec-conf.org/proceedings/lrec2010/pdf/446_Paper.pdf This paper describes our resource-building results for an eight-week JHU Human Language Technology Center of Excellence Summer Camp for Applied Language Exploration (SCALE-2009) on Semantically-Informed Machine Translation. Specifically, we describe the construction of a modality annotation scheme, a modality lexicon, and two automated modality taggers that were built using the lexicon and annotation scheme. Our annotation scheme is based on identifying three components of modality: a trigger, a target and a holder. We describe how our modality lexicon was produced semi-automatically, expanding from an initial hand-selected list of modality trigger words and phrases. The resulting expanded modality lexicon is being made publicly available. We demonstrate that one tagger―a structure-based tagger―results in precision around 86% (depending on genre) for tagging of a standard LDC data set. In a machine translation application, using the structure-based tagger to annotate English modalities on an English-Urdu training corpus improved the translation quality score for Urdu by 0.3 Bleu points in the face of sparse training data. @@ -2927,7 +2927,7 @@ tanenblatt-etal-2010-conceptmapper - YoshihikoHayashi + YoshihikoHayashi ThierryDeclerck ChiharuNarawa <fixed-case>LAF</fixed-case>/<fixed-case>G</fixed-case>r<fixed-case>AF</fixed-case>-grounded Representation of Dependency Structures @@ -2942,7 +2942,7 @@ DeryleLonsdale PeterMcClanahan OwenMerkling - EricRingger + EricRingger KevinSeppi Tag Dictionaries Accelerate Manual Annotation http://www.lrec-conf.org/proceedings/lrec2010/pdf/451_Paper.pdf @@ -2957,8 +2957,8 @@ konstantopoulos-2010-learning - Chris IrwinDavis - DanMoldovan + Chris IrwinDavis + DanMoldovan Feasibility of Automatically Bootstrapping a <fixed-case>P</fixed-case>ersian <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et http://www.lrec-conf.org/proceedings/lrec2010/pdf/453_Paper.pdf In this paper we describe a proof-of-concept for the bootstrapping of a Persian WordNet. This effort was motivated by previous work done at Stanford University on bootstrapping an Arabic WordNet using a parallel corpus and an English WordNet. The principle of that work is based on the premise that paradigmatic relations are by nature deeply semantic, and as such, are likely to remain intact between languages. We performed our task on a Persian-English bilingual corpus of George Orwell’s Nineteen Eighty-Four. The corpus was neither aligned nor sense tagged, so it was necessary that these were undertaken first. A combination of manual and semiautomated methods were used to tag and sentence align the corpus. Actual mapping of English word senses onto Persian was done using automated techniques. Although Persian is written in Arabic script, it is an Indo-European language, while Arabic is a Central Semitic language. Despite their linguistic differences, we endeavor to test the applicability of the Stanford strategy to our task. @@ -2966,7 +2966,7 @@ Aditi SharmaGrover - Gerhard B.van Huyssteen + Gerhard B.van Huyssteen Marthinus W.Pretorius The <fixed-case>S</fixed-case>outh <fixed-case>A</fixed-case>frican Human Language Technologies Audit http://www.lrec-conf.org/proceedings/lrec2010/pdf/454_Paper.pdf @@ -2974,13 +2974,13 @@ grover-etal-2010-south - MassimoPoesio - MarcoBaroni + MassimoPoesio + MarcoBaroni OswaldLanz AlessandroLenci AlexandrosPotamianos - HinrichSchütze - SabineSchulte im Walde + HinrichSchütze + SabineSchulte im Walde LucaSurian <fixed-case>B</fixed-case>aby<fixed-case>E</fixed-case>xp: Constructing a Huge Multimodal Resource to Acquire Commonsense Knowledge Like Children Do http://www.lrec-conf.org/proceedings/lrec2010/pdf/455_Paper.pdf @@ -2989,10 +2989,10 @@ IñakiSainz - EvaNavas - InmaHernáez - AntonioBonafonte - FranciscoCampillo + EvaNavas + InmaHernáez + AntonioBonafonte + FranciscoCampillo <fixed-case>TTS</fixed-case> Evaluation Campaign with a Common <fixed-case>S</fixed-case>panish Database http://www.lrec-conf.org/proceedings/lrec2010/pdf/456_Paper.pdf This paper describes the first TTS evaluation campaign designed for Spanish. Seven research institutions took part in the evaluation campaign and developed a voice from a common speech database provided by the organisation. Each participating team had a period of seven weeks to generate a voice. Next, a set of sentences were released and each team had to synthesise them within a week period. Finally, some of the synthesised test audio files were subjectively evaluated via an online test according to the following criteria: similarity to the original voice, naturalness and intelligibility. Box-plots, Wilcoxon tests and WER have been generated in order to analyse the results. Two main conclusions can be drawn: On the one hand, there is considerable margin for improvement to reach the quality level of the natural voice. On the other hand, two systems get significantly better results than the rest: one is based on statistical parametric synthesis and the other one is a concatenative system that makes use of a sinusoidal model to modify both prosody and smooth spectral joints. Therefore, it seems that some kind of spectral control is needed when building voices with a medium size database for unrestricted domains. @@ -3017,9 +3017,9 @@ PamelaForner DaniloGiampiccolo - BernardoMagnini - AnselmoPeñas - ÁlvaroRodrigo + BernardoMagnini + AnselmoPeñas + ÁlvaroRodrigo RichardSutcliffe Evaluating Multilingual Question Answering Systems at <fixed-case>CLEF</fixed-case> http://www.lrec-conf.org/proceedings/lrec2010/pdf/464_Paper.pdf @@ -3031,7 +3031,7 @@ DóraSzauter AttilaAlmási GyörgyMóra - ZoltánAlexin + ZoltánAlexin JánosCsirik <fixed-case>H</fixed-case>ungarian Dependency Treebank http://www.lrec-conf.org/proceedings/lrec2010/pdf/465_Paper.pdf @@ -3040,8 +3040,8 @@ FrancescaFallucchi - Maria TeresaPazienza - Fabio MassimoZanzotto + Maria TeresaPazienza + Fabio MassimoZanzotto Generic Ontology Learners on Application Domains http://www.lrec-conf.org/proceedings/lrec2010/pdf/466_Paper.pdf In ontology learning from texts, we have ontology-rich domains where we have large structured domain knowledge repositories or we have large general corpora with large general structured knowledge repositories such as WordNet (Miller, 1995). Ontology learning methods are more useful in ontology-poor domains. Yet, in these conditions, these methods have not a particularly high performance as training material is not sufficient. In this paper we present an LSP ontology learning method that can exploit models learned from a generic domain to extract new information in a specific domain. In our model, we firstly learn a model from training data and then we use the learned model to discover knowledge in a specific domain. We tested our model adaptation strategy using a background domain that is applied to learn the isa networks in the Earth Observation Domain as a specific domain. We will demonstrate that our method captures domain knowledge better than other generic models: our model better captures what is expected by domain experts than a baseline method based only on WordNet. This latter is better correlated with non-domain annotators asked to produce the ontology for the specific domain. @@ -3068,7 +3068,7 @@ YanZhao - Gertjanvan Noord + Gertjanvan Noord <fixed-case>POS</fixed-case> Multi-tagging Based on Combined Models http://www.lrec-conf.org/proceedings/lrec2010/pdf/470_Paper.pdf In the POS tagging task, there are two kinds of statistical models: one is generative model, such as the HMM, the others are discriminative models, such as the Maximum Entropy Model (MEM). POS multi-tagging decoding method includes the N-best paths method and forward-backward method. In this paper, we use the forward-backward decoding method based on a combined model of HMM and MEM. If P(t) is the forward-backward probability of each possible tag t, we first calculate P(t) according HMM and MEM separately. For all tags options in a certain position in a sentence, we normalize P(t) in HMM and MEM separately. Probability of the combined model is the sum of normalized forward-backward probabilities P norm(t) in HMM and MEM. For each word w, we select the best tag in which the probability of combined model is the highest. In the experiments, we use combined model and get higher accuracy than any single model on POS tagging tasks of three languages, which are Chinese, English and Dutch. The result indicates that our combined model is effective. @@ -3076,11 +3076,11 @@ IbonSaratxaga - InmaculadaHernáez - EvaNavas + InmaculadaHernáez + EvaNavas IñakiSainz IkerLuengo - JonSánchez + JonSánchez IgorOdriozola DanielErro <fixed-case>A</fixed-case>ho<fixed-case>T</fixed-case>ransf: A Tool for Multiband Excitation Based Speech Analysis and Modification @@ -3090,7 +3090,7 @@ LouiseDeléger - PierreZweigenbaum + PierreZweigenbaum Identifying Paraphrases between Technical and Lay Corpora http://www.lrec-conf.org/proceedings/lrec2010/pdf/472_Paper.pdf In previous work, we presented a preliminary study to identify paraphrases between technical and lay discourse types from medical corpora dedicated to the French language. In this paper, we test the hypothesis that the same kinds of paraphrases as for French can be detected between English technical and lay discourse types and report the adaptation of our method from French to English. Starting from the constitution of monolingual comparable corpora, we extract two kinds of paraphrases: paraphrases between nominalizations and verbal constructions and paraphrases between neo-classical compounds and modern-language phrases. We do this relying on morphological resources and a set of extraction rules we adapt from the original approach for French. Results show that paraphrases could be identified with a rather good precision, and that these types of paraphrase are relevant in the context of the opposition between technical and lay discourse types. These observations are consistent with the results obtained for French, which demonstrates the portability of the approach as well as the similarity of the two languages as regards the use of those kinds of expressions in technical and lay discourse types. @@ -3100,7 +3100,7 @@ StavrosNtalampiras TodorGanchev IlyasPotamitis - NikosFakotakis + NikosFakotakis Heterogeneous Sensor Database in Support of Human Behaviour Analysis in Unrestricted Environments: The Audio Part http://www.lrec-conf.org/proceedings/lrec2010/pdf/474_Paper.pdf In the present paper we report on a recent effort that resulted in the establishment of a unique multimodal database, referred to as the PROMETHEUS database. This database was created in support of research and development activities, performed within the European Commission FP7 PROMETHEUS project, aiming at the creation of a framework for monitoring and interpretation of human behaviours in unrestricted indoors and outdoors environments. In the present paper we discuss the design and the implementation of the audio part of the database and offer statistical information about the audio content. Specifically, it contains single-person and multi-person scenarios, but also covers scenarios with interactions between groups of people. The database design was conceived with extended support of research and development activities devoted to detection of typical and atypical events, emergency and crisis situations, which assist for achieving situational awareness and more reliable interpretation of the context in which humans behave. The PROMETHEUS database allows for embracing a wide range of real-world applications, including smart-home and human-robot interaction interfaces, indoors/outdoors public areas surveillance, airport terminals or city park supervision, etc. A major portion of the PROMETHEUS database will be made publically available by the end of year 2010. @@ -3108,16 +3108,16 @@ KhalilDahab - AnjaBelz + AnjaBelz A Game-based Approach to Transcribing Images of Text http://www.lrec-conf.org/proceedings/lrec2010/pdf/476_Paper.pdf Creating language resources is expensive and time-consuming, and this forms a bottleneck in the development of language technology, for less-studied non-European languages in particular. The recent internet phenomenon of crowd-sourcing offers a cost-effective and potentially fast way of overcoming such language resource acquisition bottlenecks. We present a methodology that takes as its input scanned documents of typed or hand-written text, and produces transcriptions of the text as its output. Instead of using Optical Character Recognition (OCR) technology, the methodology is game-based and produces such transcriptions as a by-product. The approach is intended particularly for languages for which language technology and resources are scarce and reliable OCR technology may not exist. It can be used in place of OCR for transcribing individual documents, or to create corpora of paired images and transcriptions required to train OCR tools. We present Minefield, a prototype implementation of the approach which is currently collecting Arabic transcriptions. dahab-belz-2010-game - NicolasSerrano + NicolasSerrano FranciscoCastro - AlfonsJuan + AlfonsJuan The <fixed-case>RODRIGO</fixed-case> Database http://www.lrec-conf.org/proceedings/lrec2010/pdf/477_Paper.pdf Annotation of digitized pages from historical document collections is very important to research on automatic extraction of text blocks, lines, and handwriting recognition. We have recently introduced a new handwritten text database, GERMANA, which is based on a Spanish manuscript from 1891. To our knowledge, GERMANA is the first publicly available database mostly written in Spanish and comparable in size to standard databases. In this paper, we present another handwritten text database, RODRIGO, completely written in Spanish and comparable in size to GERMANA. However, RODRIGO comes from a much older manuscript, from 1545, where the typical difficult characteristics of historical documents are more evident. In particular, the writing style, which has clear Gothic influences, is significantly more complex than that of GERMANA. We also provide baseline results of handwriting recognition for reference in future studies, using standard techniques and tools for preprocessing, feature extraction, HMM-based image modelling, and language modelling. @@ -3129,14 +3129,14 @@ IdoDagan DaniloGiampiccolo Medea LoLeggio - BernardoMagnini + BernardoMagnini Building Textual Entailment Specialized Data Sets: a Methodology for Isolating Linguistic Phenomena Relevant to Inference http://www.lrec-conf.org/proceedings/lrec2010/pdf/478_Paper.pdf This paper proposes a methodology for the creation of specialized data sets for Textual Entailment, made of monothematic Text-Hypothesis pairs (i.e. pairs in which only one linguistic phenomenon relevant to the entailment relation is highlighted and isolated). The expected benefits derive from the intuition that investigating the linguistic phenomena separately, i.e. decomposing the complexity of the TE problem, would yield an improvement in the development of specific strategies to cope with them. The annotation procedure assumes that humans have knowledge about the linguistic phenomena relevant to inference, and a classification of such phenomena both into fine grained and macro categories is suggested. We experimented with the proposed methodology over a sample of pairs taken from the RTE-5 data set, and investigated critical issues arising when entailment, contradiction or unknown pairs are considered. The result is a new resource, which can be profitably used both to advance the comprehension of the linguistic phenomena relevant to entailment judgments and to make a first step towards the creation of large-scale specialized data sets. bentivogli-etal-2010-building - AmalAl-Saif + AmalAl-Saif KatjaMarkert The <fixed-case>L</fixed-case>eeds <fixed-case>A</fixed-case>rabic Discourse Treebank: Annotating Discourse Connectives for <fixed-case>A</fixed-case>rabic http://www.lrec-conf.org/proceedings/lrec2010/pdf/479_Paper.pdf @@ -3144,19 +3144,19 @@ al-saif-markert-2010-leeds - IoanaVasilescu - SophieRosset - MartineAdda-Decker + IoanaVasilescu + SophieRosset + MartineAdda-Decker On the Role of Discourse Markers in Interactive Spoken Question Answering Systems http://www.lrec-conf.org/proceedings/lrec2010/pdf/481_Paper.pdf This paper presents a preliminary analysis of the role of some discourse markers and the vocalic hesitation ""euh"" in a corpus of spoken human utterances collected with the Ritel system, an open domain and spoken dialog system. The frequency and contextual combinatory of classical discourse markers and of the vocalic hesitation have been studied. This analysis pointed out some specificity in terms of combinatory of the analyzed items. The classical discourse markers seem to help initiating larger discursive blocks both at initial and medial positions of the on-going turns. The vocalic hesitation stand also for marking the user's embarrassments and wish to close the dialog. vasilescu-etal-2010-role - BjörnSchuller + BjörnSchuller RiccardoZaccarelli NicolasRollet - LaurenceDevillers + LaurenceDevillers <fixed-case>CINEMO</fixed-case> — A <fixed-case>F</fixed-case>rench Spoken Language Resource for Complex Emotions: Facts and Baselines http://www.lrec-conf.org/proceedings/lrec2010/pdf/483_Paper.pdf The CINEMO corpus of French emotional speech provides a richly annotated resource to help overcome the apparent lack of learning and testing speech material for complex, i.e. blended or mixed emotions. The protocol for its collection was dubbing selected emotional scenes from French movies. 51 speakers are contained and the total speech time amounts to 2 hours and 13 minutes and 4k speech chunks after segmentation. Extensive labelling was carried out in 16 categories for major and minor emotions and in 6 continuous dimensions. In this contribution we give insight into the corpus statistics focusing in particular on the topic of complex emotions, and provide benchmark recognition results obtained in exemplary large feature space evaluations. In the result the labelling oft he collected speech clearly demonstrates that a complex handling of emotion seems needed. Further, the automatic recognition experiments provide evidence that the automatic recognition of blended emotions appears to be feasible. @@ -3194,7 +3194,7 @@ IdoDagan DaniloGiampiccolo ShacharMirkin - EmanuelePianta + EmanuelePianta AsherStern A Resource for Investigating the Impact of Anaphora and Coreference on Inference. http://www.lrec-conf.org/proceedings/lrec2010/pdf/488_Paper.pdf @@ -3229,7 +3229,7 @@ MaxJakob - MarkétaLopatková + MarkétaLopatková ValiaKordoni Mapping between Dependency Structures and Compositional Semantic Representations http://www.lrec-conf.org/proceedings/lrec2010/pdf/493_Paper.pdf @@ -3246,7 +3246,7 @@ ben-gera-etal-2010-semantic - NuriaGala + NuriaGala VéroniqueRey MichaelZock A Tool for Linking Stems and Conceptual Fragments to Enhance word Access @@ -3267,7 +3267,7 @@ FlorianLaws BeateDorow UlrichHeid - HinrichSchütze + HinrichSchütze Building a Cross-lingual Relatedness Thesaurus using a Graph Similarity Measure http://www.lrec-conf.org/proceedings/lrec2010/pdf/499_Paper.pdf The Internet is an ever growing source of information stored in documents of different languages. Hence, cross-lingual resources are needed for more and more NLP applications. This paper presents (i) a graph-based method for creating one such resource and (ii) a resource created using the method, a cross-lingual relatedness thesaurus. Given a word in one language, the thesaurus suggests words in a second language that are semantically related. The method requires two monolingual corpora and a basic dictionary. Our general approach is to build two monolingual word graphs, with nodes representing words and edges representing linguistic relations between words. A bilingual dictionary containing basic vocabulary provides seed translations relating nodes from both graphs. We then use an inter-graph node-similarity algorithm to discover related words. Evaluation with three human judges revealed that 49% of the English and 57% of the German words discovered by our method are semantically related to the target words. We publish two resources in conjunction with this paper. First, noun coordinations extracted from the German and English Wikipedias. Second, the cross-lingual relatedness thesaurus which can be used in experiments involving interactive cross-lingual query expansion. @@ -3275,9 +3275,9 @@ SamuelBroscheit - Simone PaoloPonzetto + Simone PaoloPonzetto YannickVersley - MassimoPoesio + MassimoPoesio Extending <fixed-case>BART</fixed-case> to Provide a Coreference Resolution System for <fixed-case>G</fixed-case>erman http://www.lrec-conf.org/proceedings/lrec2010/pdf/500_Paper.pdf We present a flexible toolkit-based approach to automatic coreference resolution on German text. We start with our previous work aimed at reimplementing the system from Soon et al. (2001) for English, and extend it to duplicate a version of the state-of-the-art proposal from Klenner and Ailloud (2009). Evaluation performed on a benchmarking dataset, namely the TueBa-D/Z corpus (Hinrichs et al., 2005b), shows that machine learning based coreference resolution can be robustly performed in a language other than English. @@ -3287,7 +3287,7 @@ UlrichHeid HelmutSchmid KerstinEckart - ErhardHinrichs + ErhardHinrichs A Corpus Representation Format for Linguistic Web Services: The <fixed-case>D</fixed-case>-<fixed-case>SPIN</fixed-case> Text Corpus Format and its Relationship with <fixed-case>ISO</fixed-case> Standards http://www.lrec-conf.org/proceedings/lrec2010/pdf/503_Paper.pdf In the framework of the preparation of linguistic web services for corpus processing, the need for a representation format was felt, which supports interoperability between different web services in a corpus processing pipeline, but also provides a well-defined interface to both, legacy tools and their data formats and upcoming international standards. We present the D-SPIN text corpus format, TCF, which was designed for this purpose. It is a stand-off XML format, inspired by the philosophy of the emerging standards LAF (Linguistic Annotation Framework) and its ``instances'' MAF for morpho-syntactic annotation and SynAF for syntactic annotation. Tools for the exchange with existing (best practice) formats are available, and a converter from MAF to TCF is being tested in spring 2010. We describe the usage scenario where TCF is embedded and the properties and architecture of TCF. We also give examples of TCF encoded data and describe the aspects of syntactic and semantic interoperability already addressed. @@ -3304,7 +3304,7 @@ JakobHalskov - Dorte HaltrupHansen + Dorte HaltrupHansen AnnaBraasch SussiOlsen Quality Indicators of <fixed-case>LSP</fixed-case> Texts — Selection and Measurements Measuring the Terminological Usefulness of Documents for an <fixed-case>LSP</fixed-case> Corpus @@ -3353,15 +3353,15 @@ StefanoBortoli NoemiScarpato AndreaTurbati - PaoloBouquet - Maria TeresaPazienza + PaoloBouquet + Maria TeresaPazienza <fixed-case>M</fixed-case>askkot — An Entity-centric Annotation Platform http://www.lrec-conf.org/proceedings/lrec2010/pdf/515_Paper.pdf The Semantic Web is facing the important challenge to maintain its promise of a real world-wide graph of interconnected resources. Unfortunately, while URIs almost guarantee a direct reference to entities, the relation between the two is not bijective. Many different URI references to same concepts and entities can arise when -- in such a heterogeneous setting as the WWW -- people independently build new ontologies, or populate shared ones with new arbitrarily identified individuals. The proliferation of URIs is an unwanted, though natural effect strictly bound to the same principles which characterize the Semantic Web; reducing this phenomenon will improve the recall of Semantic Search engines, which could rely on explicit links between heterogeneous information sources. To address this problem, in this paper we present an integrated environment combining the semantic annotation and ontology building features available in the Semantic Turkey web browser extension, with globally unique identifiers for entities provided by the okkam Entity Name System, thus realizing a valuable resource for preventing diffusion of multiple URIs on the (Semantic) Web. stellato-etal-2010-maskkot - PetrPollák + PetrPollák JosefRajnoha Multi-Channel Database of Spontaneous <fixed-case>C</fixed-case>zech with Synchronization of Channels Recorded by Independent Devices http://www.lrec-conf.org/proceedings/lrec2010/pdf/516_Paper.pdf @@ -3370,8 +3370,8 @@ GuillaumeBernard - SophieRosset - MartineAdda-Decker + SophieRosset + MartineAdda-Decker OlivierGalibert A Question-answer Distance Measure to Investigate <fixed-case>QA</fixed-case> System Progress http://www.lrec-conf.org/proceedings/lrec2010/pdf/518_Paper.pdf @@ -3379,8 +3379,8 @@ bernard-etal-2010-question - AndreBlessing - HinrichSchütze + AndreBlessing + HinrichSchütze Fine-Grained Geographical Relation Extraction from <fixed-case>W</fixed-case>ikipedia http://www.lrec-conf.org/proceedings/lrec2010/pdf/519_Paper.pdf In this paper, we present work on enhancing the basic data resource of a context-aware system. Electronic text offers a wealth of information about geospatial data and can be used to improve the completeness and accuracy of geospatial resources (e.g., gazetteers). First, we introduce a supervised approach to extracting geographical relations on a fine-grained level. Second, we present a novel way of using Wikipedia as a corpus based on self-annotation. A self-annotation is an automatically created high-quality annotation that can be used for training and evaluation. Wikipedia contains two types of different context: (i) unstructured text and (ii) structured data: templates (e.g., infoboxes about cities), lists and tables. We use the structured data to annotate the unstructured text. Finally, the extracted fine-grained relations are used to complete gazetteer data. The precision and recall scores of more than 97 percent confirm that a statistical IE pipeline can be used to improve the data quality of community-based resources. @@ -3405,9 +3405,9 @@ tatsumi-etal-2010-evaluating - DanicaDamljanovic + DanicaDamljanovic MilanAgatonovic - HamishCunningham + HamishCunningham Identification of the Question Focus: Combining Syntactic Analysis and Ontology-based Lookup through the User Interaction http://www.lrec-conf.org/proceedings/lrec2010/pdf/524_Paper.pdf Most question-answering systems contain a classifier module which determines a question category, based on which each question is assigned an answer type. However, setting up syntactic patterns for this classification is a big challenge. In addition, in the case of ontology-based systems, the answer type should be aligned to the queried knowledge structure. In this paper, we present an approach for determining the answer type semi-automatically. We first identify the question focus using syntactic parsing, and then try to identify the answer type by combining the head of the focus with the ontology-based lookup. When this combination is not enough to make conclusions automatically, the user is engaged into a dialog in order to resolve the answer type. User selections are saved and used for training the system in order to improve its performance over time. Further on, the answer type is used to show the feedback and the concise answer to the user. Our approach is evaluated using 250 questions from the Mooney Geoquery dataset. @@ -3418,7 +3418,7 @@ BrigitteGrau OlivierFerret CyrilGrouin - VéroniqueMoriceau + VéroniqueMoriceau IsabelleRobba XavierTannier AnneVilnat @@ -3430,7 +3430,7 @@ Silvana Marianela BernaolaBiggio - ManuelaSperanza + ManuelaSperanza RobertoZanoli Entity Mention Detection using a Combination of Redundancy-Driven Classifiers http://www.lrec-conf.org/proceedings/lrec2010/pdf/530_Paper.pdf @@ -3469,19 +3469,19 @@ vorwerk-etal-2010-wapusk20 - EnekoAgirre + EnekoAgirre MontseCuadros - GermanRigau - AitorSoroa + GermanRigau + AitorSoroa Exploring Knowledge Bases for Similarity http://www.lrec-conf.org/proceedings/lrec2010/pdf/534_Paper.pdf Graph-based similarity over WordNet has been previously shown to perform very well on word similarity. This paper presents a study of the performance of such a graph-based algorithm when using different relations and versions of Wordnet. The graph algorithm is based on Personalized PageRank, a random-walk based algorithm which computes the probability of a random-walk initiated in the target word to reach any synset following the relations in WordNet (Haveliwala, 2002). Similarity is computed as the cosine of the probability distributions for each word over WordNet. The best combination of relations includes all relations in WordNet 3.0, included disambiguated glosses, and automatically disambiguated topic signatures called KnowNets. All relations are part of the official release of WordNet, except KnowNets, which have been derived automatically. The results over the WordSim 353 dataset show that using the adequate relations the performance improves over previously published WordNet-based results on the WordSim353 dataset (Finkelstein et al., 2002). The similarity software and some graphs used in this paper are publicly available at http://ixa2.si.ehu.es/ukb. agirre-etal-2010-exploring - CristinaSánchez-Marco - GemmaBoleda - Josep MariaFontana + CristinaSánchez-Marco + GemmaBoleda + Josep MariaFontana JudithDomingo Annotation and Representation of a Diachronic Corpus of <fixed-case>S</fixed-case>panish http://www.lrec-conf.org/proceedings/lrec2010/pdf/535_Paper.pdf @@ -3499,9 +3499,9 @@ RomaricBesançon Gaëlde Chalendar OlivierFerret - FaizaGara + FaizaGara OlivierMesnard - MeriamaLaïb + MeriamaLaïb NasredineSemmar <fixed-case>LIMA</fixed-case> : A Multilingual Framework for Linguistic Analysis and Linguistic Resources Development and Evaluation http://www.lrec-conf.org/proceedings/lrec2010/pdf/537_Paper.pdf @@ -3509,7 +3509,7 @@ besancon-etal-2010-lima - GrzegorzChrupała + GrzegorzChrupała DietrichKlakow A Named Entity Labeler for <fixed-case>G</fixed-case>erman: Exploiting <fixed-case>W</fixed-case>ikipedia and Distributional Clusters http://www.lrec-conf.org/proceedings/lrec2010/pdf/538_Paper.pdf @@ -3524,10 +3524,10 @@ rosell-2010-text - JesúsGonzález-Rubio + JesúsGonzález-Rubio JorgeCivera - AlfonsJuan - FranciscoCasacuberta + AlfonsJuan + FranciscoCasacuberta <fixed-case>S</fixed-case>aturnalia: A <fixed-case>L</fixed-case>atin-<fixed-case>C</fixed-case>atalan Parallel Corpus for Statistical <fixed-case>MT</fixed-case> http://www.lrec-conf.org/proceedings/lrec2010/pdf/541_Paper.pdf Currently, a great effort is being carried out in the digitalisation of large historical document collections for preservation purposes. The documents in these collections are usually written in ancient languages, such as Latin or Greek, which limits the access of the general public to their content due to the language barrier. Therefore, digital libraries aim not only at storing raw images of digitalised documents, but also to annotate them with their corresponding text transcriptions and translations into modern languages. Unfortunately, ancient languages have at their disposal scarce electronic resources to be exploited by natural language processing techniques. This paper describes the compilation process of a novel Latin-Catalan parallel corpus as a new task for statistical machine translation (SMT). Preliminary experimental results are also reported using a state-of-the-art phrase-based SMT system. The results presented in this work reveal the complexity of the task and its challenging, but interesting nature for future development. @@ -3538,15 +3538,15 @@ SeanNeilan GaryAn NorikoTomuro - StevenLytinen + StevenLytinen <fixed-case>D</fixed-case>jangology: A Light-weight Web-based Tool for Distributed Collaborative Text Annotation http://www.lrec-conf.org/proceedings/lrec2010/pdf/543_Paper.pdf Manual text annotation is a resource-consuming endeavor necessary for NLP systems when they target new tasks or domains for which there are no existing annotated corpora. Distributing the annotation work across multiple contributors is a natural solution to reduce and manage the effort required. Although there are a few publicly available tools which support distributed collaborative text annotation, most of them have complex user interfaces and require a significant amount of involvement from the annotators/contributors as well as the project developers and administrators. We present a light-weight web application for highly distributed annotation projects - Djangology. The application takes advantage of the recent advances in web framework architecture that allow rapid development and deployment of web applications thus minimizing development time for customization. The application's web-based interface gives project administrators the ability to easily upload data, define project schemas, assign annotators, monitor progress, and review inter-annotator agreement statistics. The intuitive web-based user interface encourages annotator participation as contributors are not burdened by tool manuals, local installation, or configuration. The system has achieved a user response rate of 70% in two annotation projects involving more than 250 medical experts from various geographic locations. apostolova-etal-2010-djangology - LeonDerczynski - RobertGaizauskas + LeonDerczynski + RobertGaizauskas Analysing Temporally Annotated Corpora with <fixed-case>CAV</fixed-case>a<fixed-case>T</fixed-case> http://www.lrec-conf.org/proceedings/lrec2010/pdf/546_Paper.pdf We present CAVaT, a tool that performs Corpus Analysis and Validation for TimeML. CAVaT is an open source, modular checking utility for statistical analysis of features specific to temporally-annotated natural language corpora. It provides reporting, highlights salient links between a variety of general and time-specific linguistic features, and also validates a temporal annotation to ensure that it is logically consistent and sufficiently annotated. Uniquely, CAVaT provides analysis specific to TimeML-annotated temporal information. TimeML is a standard for annotating temporal information in natural language text. In this paper, we present the reporting part of CAVaT, and then its error-checking ability, including the workings of several novel TimeML document verification methods. This is followed by the execution of some example tasks using the tool to show relations between times, events, signals and links. We also demonstrate inconsistencies in a TimeML corpus (TimeBank) that have been detected with CAVaT. @@ -3555,8 +3555,8 @@ MartinReynaert NellekeOostdijk - OrphéeDe Clercq - Henkvan den Heuvel + OrphéeDe Clercq + Henkvan den Heuvel Franciskade Jong Balancing <fixed-case>S</fixed-case>o<fixed-case>N</fixed-case>a<fixed-case>R</fixed-case>: <fixed-case>IPR</fixed-case> versus Processing Issues in a 500-Million-Word Written <fixed-case>D</fixed-case>utch Reference Corpus http://www.lrec-conf.org/proceedings/lrec2010/pdf/549_Paper.pdf @@ -3566,7 +3566,7 @@ SamuelCruz-Lara GilFrancopoulo - LaurentRomary + LaurentRomary NasredineSemmar <fixed-case>MLIF</fixed-case> : A Metamodel to Represent and Exchange Multilingual Textual Information http://www.lrec-conf.org/proceedings/lrec2010/pdf/550_Paper.pdf @@ -3585,7 +3585,7 @@ FrancescaBonin FeliceDell’Orletta - SimonettaMontemagni + SimonettaMontemagni GiuliaVenturi A Contrastive Approach to Multi-word Extraction from Domain-specific Corpora http://www.lrec-conf.org/proceedings/lrec2010/pdf/553_Paper.pdf @@ -3594,7 +3594,7 @@ OlivierBlanc - MatthieuConstant + MatthieuConstant AnneDister PatrickWatrin Partial Parsing of Spontaneous Spoken <fixed-case>F</fixed-case>rench @@ -3603,13 +3603,13 @@ blanc-etal-2010-partial - AnneliesBraffort + AnneliesBraffort LaurenceBolot EmilieChételat-Pelé - AnnickChoisier + AnnickChoisier MaximeDelorme MichaelFilhol - JérémieSegouat + JérémieSegouat CyrilVerrecchia FloraBadin NadègeDevos @@ -3620,8 +3620,8 @@ SaraTonelli - EmanuelePianta - RodolfoDelmonte + EmanuelePianta + RodolfoDelmonte MicheleBrunelli <fixed-case>V</fixed-case>en<fixed-case>P</fixed-case>ro: A Morphological Analyzer for Venetan http://www.lrec-conf.org/proceedings/lrec2010/pdf/556_Paper.pdf @@ -3629,7 +3629,7 @@ tonelli-etal-2010-venpro - MohamedMaamouri + MohamedMaamouri AnnBies SethKulick WajdiZaghouani @@ -3648,18 +3648,18 @@ heja-2010-role - HarryBunt + HarryBunt JanAlexandersson JeanCarletta Jae-WoongChoe - Alex ChengyuFang - KoitiHasida + Alex ChengyuFang + KoitiHasida KiyongLee VolhaPetukhova - AndreiPopescu-Belis - LaurentRomary + AndreiPopescu-Belis + LaurentRomary ClaudiaSoria - DavidTraum + DavidTraum Towards an <fixed-case>ISO</fixed-case> Standard for Dialogue Act Annotation http://www.lrec-conf.org/proceedings/lrec2010/pdf/560_Paper.pdf This paper describes an ISO project which aims at developing a standard for annotating spoken and multimodal dialogue with semantic information concerning the communicative functions of utterances, the kind of semantic content they address, and their relations with what was said and done earlier in the dialogue. The project, ISO 24617-2 ""Semantic annotation framework, Part 2: Dialogue acts"", is currently at DIS stage. The proposed annotation schema distinguishes 9 orthogonal dimensions, allowing each functional segment in dialogue to have a function in each of these dimensions, thus accounting for the multifunctionality that utterances in dialogue often have. A number of core communicative functions is defined in the form of ISO data categories, available at http://semantic-annotation.uvt.nl/dialogue-acts/iso-datcats.pdf; they are divided into ""dimension-specific"" functions, which can be used only in a particular dimension, such as Turn Accept in the Turn Management dimension, and ""general-purpose"" functions, which can be used in any dimension, such as Inform and Request. An XML-based annotation language, ""DiAML"" is defined, with an abstract syntax, a semantics, and a concrete syntax. @@ -3667,11 +3667,11 @@ ArchnaBhatia - RajeshBhatt + RajeshBhatt BhuvanaNarasimhan - MarthaPalmer - OwenRambow - Dipti MisraSharma + MarthaPalmer + OwenRambow + Dipti MisraSharma MichaelTepper AshwiniVaidya FeiXia @@ -3682,24 +3682,24 @@ MarinaLloberes - IreneCastellón - LluísPadró + IreneCastellón + LluísPadró <fixed-case>S</fixed-case>panish <fixed-case>F</fixed-case>ree<fixed-case>L</fixed-case>ing Dependency Grammar http://www.lrec-conf.org/proceedings/lrec2010/pdf/562_Paper.pdf This paper presents the development of an open-source Spanish Dependency Grammar implemented in FreeLing environment. This grammar was designed as a resource for NLP applications that require a step further in natural language automatic analysis, as is the case of Spanish-to-Basque translation. The development of wide-coverage rule-based grammars using linguistic knowledge contributes to extend the existing Spanish deep parsers collection, which sometimes is limited. Spanish FreeLing Dependency Grammar, named EsTxala, provides deep and robust parse trees, solving attachments for any structure and assigning syntactic functions to dependencies. These steps are dealt with hand-written rules based on linguistic knowledge. As a result, FreeLing Dependency Parser gives a unique analysis as a dependency tree for each sentence analyzed. Since it is a resource open to the scientific community, exhaustive grammar evaluation is being done to determine its accuracy as well as strategies for its manteinance and improvement. In this paper, we show the results of an experimental evaluation carried out over EsTxala in order to test our evaluation methodology. lloberes-etal-2010-spanish - Magali SanchesDuran - Marcelo AdrianoAmâncio - Sandra MariaAluísio + Magali SanchesDuran + Marcelo AdrianoAmâncio + Sandra MariaAluísio Assigning Wh-Questions to Verbal Arguments: Annotation Tools Evaluation and Corpus Building http://www.lrec-conf.org/proceedings/lrec2010/pdf/564_Paper.pdf This work reports the evaluation and selection of annotation tools to assign wh-question labels to verbal arguments in a sentence. Wh-question assignment discussed herein is a kind of semantic annotation which involves two tasks: making delimitation of verbs and arguments, and linking verbs to its arguments by question labels. As it is a new type of semantic annotation, there is no report about requirements an annotation tool should have to face it. For this reason, we decided to select the most appropriated tool in two phases. In the first phase, we executed the task with an annotation tool we have used before in another task. Such phase helped us to test the task and enabled us to know which features were or not desirable in an annotation tool for our purpose. In the second phase, guided by such requirements, we evaluated several tools and selected a tool for the real task. After corpus annotation conclusion, we report some of the annotation results and some comments on the improvements there should be made in an annotation tool to better support such kind of annotation task. duran-etal-2010-assigning - RalphGrishman + RalphGrishman The Impact of Task and Corpus on Event Extraction Systems http://www.lrec-conf.org/proceedings/lrec2010/pdf/565_Paper.pdf The term “event extraction” covers a wide range of information extraction tasks, and methods developed and evaluated for one task may prove quite unsuitable for another. Understanding these task differences is essential to making broad progress in event extraction. We look back at the MUC and ACE tasks in terms of one characteristic, the breadth of the scenario ― how wide a range of information is subsumed in a single extraction task. We examine how this affects strategies for collecting information and methods for semi-supervised training of new extractors. We also consider the heterogeneity of corpora ― how varied the topics of documents in a corpus are. Extraction systems may be intended in principle for general news but are typically evaluated on topic-focused corpora, and this evaluation context may affect system design. As one case study, we examine the task of identifying physical attack events in news corpora, observing the effect on system performance of shifting from an attack-event-rich corpus to a more varied corpus and considering how the impact of this shift may be mitigated. @@ -3708,7 +3708,7 @@ SethKulick AnnBies - MohamedMaamouri + MohamedMaamouri Consistent and Flexible Integration of Morphological Annotation in the <fixed-case>A</fixed-case>rabic Treebank http://www.lrec-conf.org/proceedings/lrec2010/pdf/566_Paper.pdf Complications arise for standoff annotation when the annotation is not on the source text itself, but on a more abstract representation. This is particularly the case in a language such as Arabic with morphological and orthographic challenges, and we discuss various aspects of these issues in the context of the Arabic Treebank. The Standard Arabic Morphological Analyzer (SAMA) is closely integrated into the annotation workflow, as the basis for the abstraction between the explicit source text and the more abstract token representation. However, this integration with SAMA gives rise to various problems for the annotation workflow and for maintaining the link between the Treebank and SAMA. In this paper we discuss how we have overcome these problems with consistent and more precise categorization of all of the tokens for their relationship with SAMA. We also discuss how we have improved the creation of several distinct alternative forms of the tokens used in the syntactic trees. As a result, the Treebank provides a resource relating the different forms of the same underlying token with varying degrees of vocalization, in terms of how they relate (1) to each other, (2) to the syntactic structure, and (3) to the morphological analyzer. @@ -3723,8 +3723,8 @@ zaninello-nissim-2010-creation - JanaŠindlerová - OndřejBojar + JanaŠindlerová + OndřejBojar Building a Bilingual <fixed-case>V</fixed-case>al<fixed-case>L</fixed-case>ex Using Treebank Token Alignment: First Observations http://www.lrec-conf.org/proceedings/lrec2010/pdf/568_Paper.pdf We explore the potential and limitations of a concept of building a bilingual valency lexicon based on the alignment of nodes in a parallel treebank. Our aim is to build an electronic Czech->English Valency Lexicon by collecting equivalences from bilingual treebank data and storing them in two already existing electronic valency lexicons, PDT-VALLEX and Engvallex. For this task a special annotation interface has been built upon the TrEd editor, allowing quick and easy collecting of frame equivalences in either of the source lexicons. The issues encountered so far include limitations of technical character, theory-dependent limitations and limitations concerning the achievable degree of quality of human annotation. The issues of special interest for both linguists and MT specialists involved in the project include linguistically motivated non-balance between the frame equivalents, either in number or in type of valency participants. The first phases of annotation so far attest the assumption that there is a unique correspondence between the functors of the translation-equivalent frames. Also, hardly any linguistically significant non-balance between the frames has been found, which is partly promising considering the linguistic theory used and partly caused by little stylistic variety of the annotated corpus texts. @@ -3732,7 +3732,7 @@ AlbertoDíaz - PabloGervás + PabloGervás AntonioGarcía LauraPlaza Development and Use of an Evaluation Collection for Personalisation of Digital Newspapers @@ -3741,19 +3741,19 @@ diaz-etal-2010-development - Jonathan H.Clark - AlonLavie + Jonathan H.Clark + AlonLavie <fixed-case>L</fixed-case>oony<fixed-case>B</fixed-case>in: Keeping Language Technologists Sane through Automated Management of Experimental (Hyper)Workflows http://www.lrec-conf.org/proceedings/lrec2010/pdf/570_Paper.pdf Many contemporary language technology systems are characterized by long pipelines of tools with complex dependencies. Too often, these workflows are implemented by ad hoc scripts; or, worse, tools are run manually, making experiments difficult to reproduce. These practices are difficult to maintain in the face of rapidly evolving workflows while they also fail to expose and record important details about intermediate data. Further complicating these systems are hyperparameters, which often cannot be directly optimized by conventional methods, requiring users to determine which combination of values is best via trial and error. We describe LoonyBin, an open-source tool that addresses these issues by providing: 1) a visual interface for the user to create and modify workflows; 2) a well-defined mechanism for tracking metadata and provenance; 3) a script generator that compiles visual workflows into shell scripts; and 4) a new workflow representation we call a HyperWorkflow, which intuitively and succinctly encodes small experimental variations within a larger workflow. clark-lavie-2010-loonybin - Keith J.Miller + Keith J.Miller SarahMcLeod - ElizabethSchroeder + ElizabethSchroeder MarkArehart - KennethSamuel + KennethSamuel JamesFinley VanesaJurica JohnPolk @@ -3783,7 +3783,7 @@ SunaoHara NorihideKitaoka - KazuyaTakeda + KazuyaTakeda Estimation Method of User Satisfaction Using N-gram-based Dialog History Model for Spoken Dialog System http://www.lrec-conf.org/proceedings/lrec2010/pdf/579_Paper.pdf In this paper, we propose an estimation method of user satisfaction for a spoken dialog system using an N-gram-based dialog history model. We have collected a large amount of spoken dialog data accompanied by usability evaluation scores by users in real environments. The database is made by a field-test in which naive users used a client-server music retrieval system with a spoken dialog interface on their own PCs. An N-gram model is trained from the sequences that consist of users' dialog acts and/or the system's dialog acts for each one of six user satisfaction levels: from 1 to 5 and φ (task not completed). Then, the satisfaction level is estimated based on the N-gram likelihood. Experiments were conducted on the large real data and the results show that our proposed method achieved good classification performance; the classification accuracy was 94.7% in the experiment on a classification into dialogs with task completion and those without task completion. Even if the classifier detected all of the task incomplete dialog correctly, our proposed method achieved the false detection rate of only 6%. @@ -3792,7 +3792,7 @@ Peng-WenChen Snehal KumarChennuru - YingZhang + YingZhang A Language Approach to Modeling Human Behaviors http://www.lrec-conf.org/proceedings/lrec2010/pdf/580_Paper.pdf The modeling of human behavior becomes more and more important due to the increasing popularity of context-aware computing and people-centric mobile applications. Inspired by the principle of action-as-language, we propose that human ambulatory behavior shares similar properties as natural languages. In addition, by exploiting this similarity, we will be able to index, recognize, cluster, retrieve, and infer high-level semantic meanings of human behaviors via the use of natural language processing techniques. In this paper, we developed a Life Logger system to help build the behavior language corpus which supports our ""Behavior as Language"" research. The constructed behavior corpus shows Zipf's distribution over the frequency of vocabularies which is aligned with our ""Behavior as Language"" assumption. Our preliminary results of using smoothed n-gram language model for activity recognition achieved an average accuracy rate of 94% in distinguishing among human ambulatory behaviors including walking, running, and cycling. This behavior-as-language corpus will enable researchers to study higher level human behavior based on the syntactic and semantic analysis of the corpus data. @@ -3821,7 +3821,7 @@ ShuZhang WenjieJia - YingjuXia + YingjuXia YaoMeng HaoYu Extracting Product Features and Sentiments from <fixed-case>C</fixed-case>hinese Customer Reviews @@ -3859,9 +3859,9 @@ Roberto P. A.Araujo Rafael L.de Oliveira Eder M.de Novais - Thiago D.Tadeu - Daniel B.Pereira - IvandréParaboni + Thiago D.Tadeu + Daniel B.Pereira + IvandréParaboni <fixed-case>SIN</fixed-case>otas: the Evaluation of a <fixed-case>NLG</fixed-case> Application http://www.lrec-conf.org/proceedings/lrec2010/pdf/593_Paper.pdf SINotas is a data-to-text NLG application intended to produce short textual reports on students’ academic performance from a database conveying their grades, weekly attendance rates and related academic information. Although developed primarily as a testbed for Portuguese Natural Language Generation, SINotas generates reports of interest to both students keen to learn how their professors would describe their efforts, and to the professors themselves, who may benefit from an at-a-glance view of the student’s performance. In a traditional machine learning approach, SINotas uses a data-text aligned corpus as training data for decision-tree induction. The current system comprises a series of classifiers that implement major Document Planning subtasks (namely, data interpretation, content selection, within- and between-sentence structuring), and a small surface realisation grammar of Brazilian Portuguese. In this paper we focus on the evaluation work of the system, applying a number of intrinsic and user-based evaluation metrics to a collection of text reports generated from real application data. @@ -3882,7 +3882,7 @@ GuillaumeAimetti ChristosKoniaris KrisDemuynck - Henkvan den Heuvel + Henkvan den Heuvel A Speech Corpus for Modeling Language Acquisition: <fixed-case>CAREGIVER</fixed-case> http://www.lrec-conf.org/proceedings/lrec2010/pdf/597_Paper.pdf A multi-lingual speech corpus used for modeling language acquisition called CAREGIVER has been designed and recorded within the framework of the EU funded Acquisition of Communication and Recognition Skills (ACORNS) project. The paper describes the motivation behind the corpus and its design by relying on current knowledge regarding infant language acquisition. Instead of recording infants and children, the voices of their primary and secondary caregivers were captured in both infant-directed and adult-directed speech modes over four languages in a read speech manner. The challenges and methods applied to obtain similar prompts in terms of complexity and semantics across different languages, as well as the normalized recording procedures employed at different locations, is covered. The corpus contains nearly 66000 utterance based audio files spoken over a two-year period by 17 male and 17 female native speakers of Dutch, English, Finnish, and Swedish. An orthographical transcription is available for every utterance. Also, time-aligned word and phone annotations for many of the sub-corpora also exist. The CAREGIVER corpus will be published via ELRA. @@ -3891,9 +3891,9 @@ SanjaSeljan MarkoTadić - ŽeljkoAgić + ŽeljkoAgić JanŠnajder - Bojana DalbeloBašić + Bojana DalbeloBašić VjekoslavOsmann Corpus Aligner (<fixed-case>C</fixed-case>or<fixed-case>A</fixed-case>l) Evaluation on <fixed-case>E</fixed-case>nglish-<fixed-case>C</fixed-case>roatian Parallel Corpora http://www.lrec-conf.org/proceedings/lrec2010/pdf/599_Paper.pdf @@ -3911,7 +3911,7 @@ orasmaa-etal-2010-information - MontserratMarimon + MontserratMarimon The <fixed-case>S</fixed-case>panish Resource Grammar http://www.lrec-conf.org/proceedings/lrec2010/pdf/602_Paper.pdf This paper describes the Spanish Resource Grammar, an open-source multi-purpose broad-coverage precise grammar for Spanish. The grammar is implemented on the Linguistic Knowledge Builder (LKB) system, it is grounded in the theoretical framework of Head-driven Phrase Structure Grammar (HPSG), and it uses Minimal Recursion Semantics (MRS) for the semantic representation. We have developed a hybrid architecture which integrates shallow processing functionalities -- morphological analysis, and Named Entity recognition and classification -- into the parsing process. The SRG has a full coverage lexicon of closed word classes and it contains 50,852 lexical entries for open word classes. The grammar also has 64 lexical rules to perform valence changing operations on lexical items, and 191 phrase structure rules that combine words and phrases into larger constituents and compositionally build up their semantic representation. The annotation of each parsed sentence in an LKB grammar simultaneously represents a traditional phrase structure tree, and a MRS semantic representation. We provide evaluation results on sentences from newspaper texts and discuss future work. @@ -3919,8 +3919,8 @@ AnneVilnat - PatrickParoubek - EricVillemonte de la Clergerie + PatrickParoubek + EricVillemonte de la Clergerie GilFrancopoulo Marie-LaureGuénot <fixed-case>PASSAGE</fixed-case> Syntactic Representation: a Minimal Common Ground for Evaluation @@ -3957,15 +3957,15 @@ ClaudiaBorg MikeRosner - Gordon J.Pace + Gordon J.Pace Automatic Grammar Rule Extraction and Ranking for Definitions http://www.lrec-conf.org/proceedings/lrec2010/pdf/609_Paper.pdf Plain text corpora contain much information which can only be accessed through human annotation and semantic analysis, which is typically very time consuming to perform. Analysis of such texts at a syntactic or grammatical structure level can however extract some of this information in an automated manner, even if identifying effective rules can be extremely difficult. One such type of implicit information present in texts is that of definitional phrases and sentences. In this paper, we investigate the use of evolutionary algorithms to learn classifiers to discriminate between definitional and non-definitional sentences in non-technical texts, and show how effective grammar-based definition discriminators can be automatically learnt with minor human intervention. borg-etal-2010-automatic - MannyRayner - PierretteBouillon + MannyRayner + PierretteBouillon NikosTsourakis JohannaGerlach MariaGeorgescul @@ -3979,7 +3979,7 @@ PeterAdolphs XiwenCheng - TinaKlüwer + TinaKlüwer HansUszkoreit FeiyuXu Question Answering Biographic Information and Social Network Powered by the Semantic Web @@ -4009,17 +4009,17 @@ TheodorosKostoulas TodorGanchev IosifMporas - NikosFakotakis + NikosFakotakis <fixed-case>V</fixed-case>ergina: A <fixed-case>M</fixed-case>odern <fixed-case>G</fixed-case>reek Speech Database for Speech Synthesis http://www.lrec-conf.org/proceedings/lrec2010/pdf/614_Paper.pdf The present paper outlines the Vergina speech database, which was developed in support of research and development of corpus-based unit selection and statistical parametric speech synthesis systems for Modern Greek language. In the following, we describe the design, development and implementation of the recording campaign, as well as the annotation of the database. Specifically, a text corpus of approximately 5 million words, collected from newspaper articles, periodicals, and paragraphs of literature, was processed in order to select the utterances-sentences needed for producing the speech database and to achieve a reasonable phonetic coverage. The broad coverage and contents of the selected utterances-sentences of the database ― text corpus collected from different domains and writing styles ― makes this database appropriate for various application domains. The database, recorded in audio studio, consists of approximately 3,000 phonetically balanced Modern Greek utterances corresponding to approximately four hours of speech. Annotation of the Vergina speech database was performed using task-specific tools, which are based on a hidden Markov model (HMM) segmentation method, and then manual inspection and corrections were performed. lazaridis-etal-2010-vergina - ViviNastase + ViviNastase MichaelStrube - BenjaminBoerschinger - CaeciliaZirn + BenjaminBoerschinger + CaeciliaZirn AnasElghafari <fixed-case>W</fixed-case>iki<fixed-case>N</fixed-case>et: A Very Large Scale Multi-Lingual Concept Network http://www.lrec-conf.org/proceedings/lrec2010/pdf/615_Paper.pdf @@ -4028,7 +4028,7 @@ NirajAswani - RobertGaizauskas + RobertGaizauskas Developing Morphological Analysers for <fixed-case>S</fixed-case>outh <fixed-case>A</fixed-case>sian Languages: Experimenting with the <fixed-case>H</fixed-case>indi and <fixed-case>G</fixed-case>ujarati Languages http://www.lrec-conf.org/proceedings/lrec2010/pdf/616_Paper.pdf A considerable amount of work has been put into development of stemmers and morphological analysers. The majority of these approaches use hand-crafted suffix-replacement rules but a few try to discover such rules from corpora. While most of the approaches remove or replace suffixes, there are examples of derivational stemmers which are based on prefixes as well. In this paper we present a rule-based morphological analyser. We propose an approach that takes both prefixes as well as suffixes into account. Given a corpus and a dictionary, our method can be used to obtain a set of suffix-replacement rules for deriving an inflected word’s root form. We developed an approach for the Hindi language but show that the approach is portable, at least to related languages, by adapting it to the Gujarati language. Given that the entire process of developing such a ruleset is simple and fast, our approach can be used for rapid development of morphological analysers and yet it can obtain competitive results with analysers built relying on human authored rules. @@ -4037,7 +4037,7 @@ HirokiHanaoka HidekiMima - Jun’ichiTsujii + Jun’ichiTsujii A <fixed-case>J</fixed-case>apanese Particle Corpus Built by Example-Based Annotation http://www.lrec-conf.org/proceedings/lrec2010/pdf/617_Paper.pdf This paper is a report on an on-going project of creating a new corpus focusing on Japanese particles. The corpus will provide deeper syntactic/semantic information than the existing resources. The initial target particle is ``to'' which occurs 22,006 times in 38,400 sentences of the existing corpus: the Kyoto Text Corpus. In this annotation task, an ``example-based'' methodology is adopted for the corpus annotation, which is different from the traditional annotation style. This approach provides the annotators with an example sentence rather than a linguistic category label. By avoiding linguistic technical terms, it is expected that any native speakers, with no special knowledge on linguistic analysis, can be an annotator without long training, and hence it can reduce the annotation cost. So far, 10,475 occurrences have been already annotated, with an inter-annotator agreement of 0.66 calculated by Cohen's kappa. The initial disagreement analyses and future directions are discussed in the paper. @@ -4046,7 +4046,7 @@ CarolineSporleder LinlinLi - PhilipGorinski + PhilipGorinski XaverKoch Idioms in Context: The <fixed-case>IDIX</fixed-case> Corpus http://www.lrec-conf.org/proceedings/lrec2010/pdf/618_Paper.pdf @@ -4062,7 +4062,7 @@ SusanaJiménez-Murcia Maher BenMoussa NadiaMagnenat-Thalmann - NikosFakotakis + NikosFakotakis The <fixed-case>P</fixed-case>lay<fixed-case>M</fixed-case>ancer Database: A Multimodal Affect Database in Support of Research and Development Activities in Serious Game Environment http://www.lrec-conf.org/proceedings/lrec2010/pdf/619_Paper.pdf The present paper reports on a recent effort that resulted in the establishment of a unique multimodal affect database, referred to as the PlayMancer database. This database was created in support of the research and development activities, taking place within the PlayMancer project, which aim at the development of a serious game environment in support of treatment of patients with behavioural and addictive disorders, such as eating disorders and gambling addictions. Specifically, for the purpose of data collection, we designed and implemented a pilot trial with healthy test subjects. Speech, video and bio-signals (pulse-rate, SpO2) were captured synchronously, during the interaction of healthy people with a number of video games. The collected data were annotated by the test subjects (self-annotation), targeting proper interpretation of the underlying affective states. The broad-shouldered design of the PlayMancer database allows its use for the needs of research on multimodal affect-emotion recognition and multimodal human-computer interaction in serious games environment. @@ -4086,7 +4086,7 @@ nicolae-etal-2010-c - Stephen A.Boxwell + Stephen A.Boxwell ChrisBrew A Pilot <fixed-case>A</fixed-case>rabic <fixed-case>CCG</fixed-case>bank http://www.lrec-conf.org/proceedings/lrec2010/pdf/623_Paper.pdf @@ -4100,18 +4100,18 @@ AlainGhio ChristineMeunier ClaudeChevrie-Muller - Jean-FrancoisBonastre + Jean-FrancoisBonastre AntoniaColazo Simon - CélineDelooze + CélineDelooze DanielleDuez - CédricGendrot + CédricGendrot ThierryLegou NathalieLevèque ClairePillot-Loiseau SergePinto GillesPouchoulin DanièleRobert - JacquelineVaissiere + JacquelineVaissiere FrançoisViallet CoralieVincent The <fixed-case>D</fixed-case>es<fixed-case>P</fixed-case>ho-<fixed-case>AP</fixed-case>a<fixed-case>D</fixed-case>y Project: Developing an Acoustic-phonetic Characterization of Dysarthric Speech in <fixed-case>F</fixed-case>rench @@ -4121,7 +4121,7 @@ MithunBalakrishna - DanMoldovan + DanMoldovan MartaTatu MarianOlteanu Semi-Automatic Domain Ontology Creation from Text Resources @@ -4131,20 +4131,20 @@ MaiteMelero - GemmaBoleda + GemmaBoleda MontseCuadros CristinaEspaña-Bonet - LluísPadró + LluísPadró MartíQuixal - CarlosRodríguez - RoserSaurí + CarlosRodríguez + RoserSaurí Language Technology Challenges of a ‘Small’ Language (<fixed-case>C</fixed-case>atalan) http://www.lrec-conf.org/proceedings/lrec2010/pdf/628_Paper.pdf In this paper, we present a brief snapshot of the state of affairs in computational processing of Catalan and the initiatives that are starting to take place in an effort to bring the field a step forward, by making a better and more efficient use of the already existing resources and tools, by bridging the gap between research and market, and by establishing periodical meeting points for the community. In particular, we present the results of the First Workshop on the Computational Processing of Catalan, which succeeded in putting together a fair representation of the research in the area, and received attention from both the industry and the administration. Aside from facilitating communication among researchers and between developers and users, the Workshop provided the organizers with valuable information about existing resources, tools, developers and providers. This information has allowed us to go a step further by setting up a “harvesting” procedure which will hopefully build the seed of a portal-catalogue-observatory of language resources and technologies in Catalan. melero-etal-2010-language - JohnLee + JohnLee DagHaug Porting an <fixed-case>A</fixed-case>ncient <fixed-case>G</fixed-case>reek and <fixed-case>L</fixed-case>atin Treebank http://www.lrec-conf.org/proceedings/lrec2010/pdf/631_Paper.pdf @@ -4152,7 +4152,7 @@ lee-haug-2010-porting - SabineSchulte im Walde + SabineSchulte im Walde Comparing Computational Models of Selectional Preferences - Second-order Co-Occurrence vs. Latent Semantic Clusters http://www.lrec-conf.org/proceedings/lrec2010/pdf/632_Paper.pdf This paper presents a comparison of three computational approaches to selectional preferences: (i) an intuitive distributional approach that uses second-order co-occurrence of predicates and complement properties; (ii) an EM-based clustering approach that models the strengths of predicate--noun relationships by latent semantic clusters (Rooth et al., 1999); and (iii) an extension of the latent semantic clusters by incorporating the MDL principle into the EM training, thus explicitly modelling the predicate--noun selectional preferences by WordNet classes (Schulte im Walde et al., 2008). Concerning the distributional approach, we were interested not only in how well the model describes selectional preferences, but moreover which second-order properties are most salient. For example, a typical direct object of the verb 'drink' is usually fluid, might be hot or cold, can be bought, might be bottled, etc. The general question we ask is: what characterises the predicate's restrictions to the semantic realisation of its complements? Our second interest lies in the actual comparison of the models: How does a very simple distributional model compare to much more complex approaches, and which representation of selectional preferences is more appropriate, using (i) second-order properties, (ii) an implicit generalisation of nouns (by clusters), or (iii) an explicit generalisation of nouns by WordNet classes within clusters? We describe various experiments on German data and two evaluations, and demonstrate that the simple distributional model outperforms the more complex cluster-based models in most cases, but does itself not always beat the powerful frequency baseline. @@ -4160,20 +4160,20 @@ PaulMcNamee - Hoa TrangDang + Hoa TrangDang HeatherSimpson PatrickSchone - Stephanie M.Strassel + Stephanie M.Strassel An Evaluation of Technologies for Knowledge Base Population http://www.lrec-conf.org/proceedings/lrec2010/pdf/634_Paper.pdf Previous content extraction evaluations have neglected to address problems which complicate the incorporation of extracted information into an existing knowledge base. Previous question answering evaluations have likewise avoided tasks such as explicit disambiguation of target entities and handling a fixed set of questions about entities without previous determination of possible answers. In 2009 NIST conducted a Knowledge Base Population track at its Text Analysis Conference to unite the content extraction and question answering communities and jointly explore some of these issues. This exciting new evaluation attracted 13 teams from 6 countries that submitted results in two tasks, Entity Linking and Slot Filling. This paper explains the motivation and design of the tasks, describes the language resources that were developed for this evaluation, offers comparisons to previous community evaluations, and briefly summarizes the performance obtained by systems. We also identify relevant issues pertaining to target selection, challenging queries, and performance measures. mcnamee-etal-2010-evaluation - ÓscarFerrández - MichaelEllsworth - RafaelMuñoz - Collin F.Baker + ÓscarFerrández + MichaelEllsworth + RafaelMuñoz + Collin F.Baker Aligning <fixed-case>F</fixed-case>rame<fixed-case>N</fixed-case>et and <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et based on Semantic Neighborhoods http://www.lrec-conf.org/proceedings/lrec2010/pdf/636_Paper.pdf This paper presents an algorithm for aligning FrameNet lexical units to WordNet synsets. Both, FrameNet and WordNet, are well-known as well as widely-used resources by the entire research community. They help systems in the comprehension of the semantics of texts, and therefore, finding strategies to link FrameNet and WordNet involves challenges related to a better understanding of the human language. Such deep analysis is exploited by researchers to improve the performance of their applications. The alignment is achieved by exploiting the particular characteristics of each lexical-semantic resource, with special emphasis on the explicit, formal semantic relations in each. Semantic neighborhoods are computed for each alignment of lemmas, and the algorithm calculates correlation scores by comparing such neighborhoods. The results suggest that the proposed algorithm is appropriate for aligning the FrameNet and WordNet hierarchies. Furthermore, the algorithm can aid research on increasing the coverage of FrameNet, building FrameNets in other languages, and creating a system for querying a joint FrameNet-WordNet hierarchy. @@ -4199,9 +4199,9 @@ kemps-snijders-etal-2010-lat - OndřejBojar - AdamLiška - ZdeněkŽabokrtský + OndřejBojar + AdamLiška + ZdeněkŽabokrtský Evaluating Utility of Data Sources in a Large Parallel <fixed-case>C</fixed-case>zech-<fixed-case>E</fixed-case>nglish Corpus <fixed-case>C</fixed-case>z<fixed-case>E</fixed-case>ng 0.9 http://www.lrec-conf.org/proceedings/lrec2010/pdf/642_Paper.pdf CzEng 0.9 is the third release of a large parallel corpus of Czech and English. For the current release, CzEng was extended by significant amount of texts from various types of sources, including parallel web pages, electronically available books and subtitles. This paper describes and evaluates filtering techniques employed in the process in order to avoid misaligned or otherwise damaged parallel sentences in the collection. We estimate the precision and recall of two sets of filters. The first set was used to process the data before their inclusion into CzEng. The filters from the second set were newly created to improve the filtering process for future releases of CzEng. Given the overall amount and variance of sources of the data, our experiments illustrate the utility of parallel data sources with respect to extractable parallel segments. As a similar behaviour can be expected for other language pairs, our results can be interpreted as guidelines indicating which sources should other researchers exploit first. @@ -4211,7 +4211,7 @@ MariaLiakata SimoneTeufel AdvaithSiddharthan - ColinBatchelor + ColinBatchelor Corpora for the Conceptualisation and Zoning of Scientific Papers http://www.lrec-conf.org/proceedings/lrec2010/pdf/644_Paper.pdf We present two complementary annotation schemes for sentence based annotation of full scientific papers, CoreSC and AZ-II, applied to primary research articles in chemistry. AZ-II is the extension of AZ for chemistry papers. AZ has been shown to have been reliably annotated by independent human coders and useful for various information access tasks. Like AZ, AZ-II follows the rhetorical structure of a scientific paper and the knowledge claims made by the authors. The CoreSC scheme takes a different view of scientific papers, treating them as the humanly readable representations of scientific investigations. It seeks to retrieve the structure of the investigation from the paper as generic high-level Core Scientific Concepts (CoreSC). CoreSCs have been annotated by 16 chemistry experts over a total of 265 full papers in physical chemistry and biochemistry. We describe the differences and similarities between the two schemes in detail and present the two corpora produced using each scheme. There are 36 shared papers in the corpora, which allows us to quantitatively compare aspects of the annotation schemes. We show the correlation between the two schemes, their strengths and weeknesses and discuss the benefits of combining a rhetorical based analysis of the papers with a content-based one. @@ -4229,7 +4229,7 @@ YannickEstève ThierryBazillon Jean-YvesAntoine - FrédéricBéchet + FrédéricBéchet JérômeFarinas The <fixed-case>EPAC</fixed-case> Corpus: Manual and Automatic Annotations of Conversational Speech in <fixed-case>F</fixed-case>rench Broadcast News http://www.lrec-conf.org/proceedings/lrec2010/pdf/650_Paper.pdf @@ -4257,7 +4257,7 @@ PiroskaLendvai ThierryDeclerck SándorDarányi - PabloGervás + PabloGervás RaquelHervás ScottMalec FedericoPeinado @@ -4268,7 +4268,7 @@ BoraSavas - YoshihikoHayashi + YoshihikoHayashi MonicaMonachini ClaudiaSoria NicolettaCalzolari @@ -4278,7 +4278,7 @@ savas-etal-2010-lmf - JordiAtserias + JordiAtserias GiuseppeAttardi MariaSimi HugoZaragoza @@ -4297,7 +4297,7 @@ AntonLeuski - DavidTraum + DavidTraum <fixed-case>NPCE</fixed-case>ditor: A Tool for Building Question-Answering Characters http://www.lrec-conf.org/proceedings/lrec2010/pdf/660_Paper.pdf NPCEditor is a system for building and deploying virtual characters capable of engaging a user in spoken dialog on a limited domain. The dialogue may take any form as long as the character responses can be specified a priori. For example, NPCEditor has been used for constructing question answering characters where a user asks questions and the character responds, but other scenarios are possible. At the core of the system is a state of the art statistical language classification technology for mapping from user's text input to system responses. NPCEditor combines the classifier with a database that stores the character information and relevant language data, a server that allows the character designer to deploy the completed characters, and a user-friendly editor that helps the designer to accomplish both character design and deployment tasks. In the paper we define the overall system architecture, describe individual NPCEditor components, and guide the reader through the steps of building a virtual character. @@ -4324,10 +4324,10 @@ YiLiu PascaleFung YongshengYang - DeniseDiPersio - MeghanGlenn - StephanieStrassel - ChristopherCieri + DeniseDiPersio + MeghanGlenn + StephanieStrassel + ChristopherCieri A Very Large Scale <fixed-case>M</fixed-case>andarin <fixed-case>C</fixed-case>hinese Broadcast Corpus for <fixed-case>GALE</fixed-case> Project http://www.lrec-conf.org/proceedings/lrec2010/pdf/664_Paper.pdf In this paper, we present the design, collection, transcription and analysis of a Mandarin Chinese Broadcast Collection of over 3000 hours. The data was collected by Hong Kong University of Science and Technology (HKUST) in China on a cable TV and satellite transmission platform established in support of the DARPA Global Autonomous Language Exploitation (GALE) program. The collection includes broadcast news (BN) and broadcast conversation (BC) including talk shows, roundtable discussions, call-in shows, editorials and other conversational programs that focus on news and current events. HKUST also collects detailed information about all recorded programs. A subset of BC and BN recordings are manually transcribed with standard Chinese characters in UTF-8 encoding, using specific mark-ups for a small set of spontaneous and conversational speech phenomena. The collection is among the largest and first of its kind for Mandarin Chinese Broadcast speech, providing abundant and diverse samples for Mandarin speech recognition and other application-dependent tasks, such as spontaneous speech processing and recognition, topic detection, information retrieval, and speaker recognition. HKUST’s acoustic analysis of 500 hours of the speech and transcripts demonstrates the positive impact this data could have on system performance. @@ -4351,8 +4351,8 @@ KevinWalker - ChristopherCaruso - DeniseDiPersio + ChristopherCaruso + DeniseDiPersio Large Scale Multilingual Broadcast Data Collection to Support Machine Translation and Distillation Technology Development http://www.lrec-conf.org/proceedings/lrec2010/pdf/667_Paper.pdf The development of technologies to address machine translation and distillation of multilingual broadcast data depends heavily on the collection of large volumes of material from modern data providers. To address the needs of GALE researchers, the Linguistic Data Consortium (LDC) developed a system for collecting broadcast news and conversation from a variety of Arabic, Chinese and English broadcasters. The system is highly automated, easily extensible and robust and is capable of collecting, processing and evaluating hundreds of hours of content from several dozen sources per day. In addition to this extensive system, LDC manages three remote collection sites to maximize the variety of available broadcast data and has designed a portable broadcast collection platform to facilitate remote collection. This paper will present a detailed a description of the design and implementation of LDC’s collection system, the technical challenges and solutions to large scale broadcast data collection efforts and an overview of the system’s operation. This paper will also discuss the challenges of managing remote collections, in particular, the strategies used to normalize data formats, naming conventions and delivery methods to achieve optimal integration of remotely-collected data into LDC’s collection database and downstream tasking workflow. @@ -4392,7 +4392,7 @@ XuansongLi NiyuGe StephenGrimes - Stephanie M.Strassel + Stephanie M.Strassel KazuakiMaeda Enriching Word Alignment with Linguistic Tags http://www.lrec-conf.org/proceedings/lrec2010/pdf/670_Paper.pdf @@ -4402,7 +4402,7 @@ KathleenEberhard HanneleNicholson - SandraKübler + SandraKübler SusanGundersen MatthiasScheutz The <fixed-case>I</fixed-case>ndiana “Cooperative Remote Search Task” (<fixed-case>CR</fixed-case>e<fixed-case>ST</fixed-case>) Corpus @@ -4423,7 +4423,7 @@ Bharat RamAmbati MridulGupta SamarHusain - Dipti MisraSharma + Dipti MisraSharma A High Recall Error Identification Tool for <fixed-case>H</fixed-case>indi Treebank Validation http://www.lrec-conf.org/proceedings/lrec2010/pdf/673_Paper.pdf This paper describes the development of a hybrid tool for a semi-automated process for validation of treebank annotation at various levels. The tool is developed for error detection at the part-of-speech, chunk and dependency levels of a Hindi treebank, currently under development. The tool aims to identify as many errors as possible at these levels to achieve consistency in the task of annotation. Consistency in treebank annotation is a must for making data as error-free as possible and for providing quality assurance. The tool is aimed at ensuring consistency and to make manual validation cost effective. We discuss a rule based and a hybrid approach (statistical methods combined with rule-based methods) by which a high-recall system can be developed and used to identify errors in the treebank. We report some results of using the tool on a sample of data extracted from the Hindi treebank. We also argue how the tool can prove useful in improving the annotation guidelines which would in turn, better the quality of annotation in subsequent iterations. @@ -4432,7 +4432,7 @@ SusanRobinson AntonioRoque - DavidTraum + DavidTraum Dialogues in Context: An Objective User-Oriented Evaluation Approach for Virtual Human Dialogue http://www.lrec-conf.org/proceedings/lrec2010/pdf/674_Paper.pdf As conversational agents are now being developed to encounter more complex dialogue situations it is increasingly difficult to find satisfactory methods for evaluating these agents. Task-based measures are insufficient where there is no clearly defined task. While user-based evaluation methods may give a general sense of the quality of an agent's performance, they shed little light on the relative quality or success of specific features of dialogue that are necessary for system improvement. This paper examines current dialogue agent evaluation practices and motivates the need for a more detailed approach for defining and measuring the quality of dialogues between agent and user. We present a framework for evaluating the dialogue competence of artificial agents involved in complex and underspecified tasks when conversing with people. A multi-part coding scheme is proposed that provides a qualitative analysis of human utterances, and rates the appropriateness of the agent's responses to these utterances. The scheme is outlined, and then used to evaluate Staff Duty Officer Moleno, a virtual guide in Second Life. @@ -4444,7 +4444,7 @@ KallirroiGeorgila KenjiSagae RonArtstein - DavidTraum + DavidTraum Practical Evaluation of Speech Recognizers for Virtual Human Dialogue Systems http://www.lrec-conf.org/proceedings/lrec2010/pdf/675_Paper.pdf We perform a large-scale evaluation of multiple off-the-shelf speech recognizers across diverse domains for virtual human dialogue systems. Our evaluation is aimed at speech recognition consumers and potential consumers with limited experience with readily available recognizers. We focus on practical factors to determine what levels of performance can be expected from different available recognizers in various projects featuring different types of conversational utterances. Our results show that there is no single recognizer that outperforms all other recognizers in all domains. The performance of each recognizer may vary significantly depending on the domain, the size and perplexity of the corpus, the out-of-vocabulary rate, and whether acoustic and language model adaptation has been used or not. We expect that our evaluation will prove useful to other speech recognition consumers, especially in the dialogue community, and will shed some light on the key problem in spoken dialogue systems of selecting the most suitable available speech recognition system for a particular application, and what impact training will have. @@ -4454,7 +4454,7 @@ KiyonoriOhtake TeruhisaMisu ChioriHori - HidekiKashioka + HidekiKashioka SatoshiNakamura Dialogue Acts Annotation for <fixed-case>NICT</fixed-case> <fixed-case>K</fixed-case>yoto Tour Dialogue Corpus to Construct Statistical Dialogue Systems http://www.lrec-conf.org/proceedings/lrec2010/pdf/676_Paper.pdf @@ -4463,14 +4463,14 @@ Bal KrishnaBal - PatrickSaint Dizier + PatrickSaint Dizier Towards Building Annotated Resources for Analyzing Opinions and Argumentation in News Editorials http://www.lrec-conf.org/proceedings/lrec2010/pdf/677_Paper.pdf This paper describes an annotation scheme for argumentation in opinionated texts such as newspaper editorials, developed from a corpus of approximately 500 English texts from Nepali and international newspaper sources. We present the results of analysis and evaluation of the corpus annotation ― currently, the inter-annotator agreement kappa value being 0.80 which indicates substantial agreement between the annotators. We also discuss some of linguistic resources (key factors for distinguishing facts from opinions, opinion lexicon, intensifier lexicon, pre-modifier lexicon, modal verb lexicon, reporting verb lexicon, general opinion patterns from the corpus etc.) developed as a result of our corpus analysis, which can be used to identify an opinion or a controversial issue, arguments supporting an opinion, orientation of the supporting arguments and their strength (intrinsic, relative and in terms of persuasion). These resources form the backbone of our work especially for performing the opinion analysis in the lower levels, i.e., in the lexical and sentence levels. Finally, we shed light on the perspectives of the given work clearly outlining the challenges. bal-saint-dizier-2010-towards - IrisEshkol + IrisEshkol DenisMaurel NathalieFriburger <fixed-case>E</fixed-case>slo: From Transcription to Speakers’ Personal Information Annotation @@ -4479,22 +4479,22 @@ eshkol-etal-2010-eslo - PeterWittenburg - NuriaBel + PeterWittenburg + NuriaBel LarsBorin GerhardBudin NicolettaCalzolari - EvaHajicova + EvaHajicova KimmoKoskenniemi LotharLemnitzer BenteMaegaard MaciejPiasecki Jean-MariePierrel SteliosPiperidis - IngunaSkadina - DanTufis + IngunaSkadina + DanTufis Remcovan Veenendaal - TamasVáradi + TamasVáradi MartinWynne Resource and Service Centres as the Backbone for a Sustainable Service Infrastructure http://www.lrec-conf.org/proceedings/lrec2010/pdf/679_Paper.pdf @@ -4514,7 +4514,7 @@ ChitoseSao KojiMurakami KentaroInui - YujiMatsumoto + YujiMatsumoto Annotating Event Mentions in Text with Modality, Focus, and Source Information http://www.lrec-conf.org/proceedings/lrec2010/pdf/682_Paper.pdf Many natural language processing tasks, including information extraction, question answering and recognizing textual entailment, require analysis of the polarity, focus of polarity, tense, aspect, mood and source of the event mentions in a text in addition to its predicate-argument structure analysis. We refer to modality, polarity and other associated information as extended modality. In this paper, we propose a new annotation scheme for representing the extended modality of event mentions in a sentence. Our extended modality consists of the following seven components: Source, Time, Conditional, Primary modality type, Actuality, Evaluation and Focus. We reviewed the literature about extended modality in Linguistics and Natural Language Processing (NLP) and defined appropriate labels of each component. In the proposed annotation scheme, information of extended modality of an event mention is summarized at the core predicate of the event mention for immediate use in NLP applications. We also report on the current progress of our manual annotation of a Japanese corpus of about 50,000 event mentions, showing a reasonably high ratio of inter-annotator agreement. @@ -4522,7 +4522,7 @@ SisayAdugna - AndreasEisele + AndreasEisele <fixed-case>E</fixed-case>nglish — <fixed-case>O</fixed-case>romo Machine Translation: An Experiment Using a Statistical Approach http://www.lrec-conf.org/proceedings/lrec2010/pdf/683_Paper.pdf This paper deals with translation of English documents to Oromo using statistical methods. Whereas English is the lingua franca of online information, Oromo, despite its relative wide distribution within Ethiopia and neighbouring countries like Kenya and Somalia, is one of the most resource scarce languages. The paper has two main goals: one is to test how far we can go with the available limited parallel corpus for the English ― Oromo language pair and the applicability of existing Statistical Machine Translation (SMT) systems on this language pair. The second goal is to analyze the output of the system with the objective of identifying the challenges that need to be tackled. Since the language is resource scarce as mentioned above, we cannot get as many parallel documents as we want for the experiment. However, using a limited corpus of 20,000 bilingual sentences and 163,000 monolingual sentences, translation accuracy in terms of BLEU Score of 17.74% was achieved. @@ -4536,7 +4536,7 @@ fujii-2010-modeling - MatthiasHartung + MatthiasHartung AnetteFrank A Semi-supervised Type-based Classification of Adjectives: Distinguishing Properties and Relations http://www.lrec-conf.org/proceedings/lrec2010/pdf/685_Paper.pdf @@ -4544,7 +4544,7 @@ hartung-frank-2010-semi - AndreasEisele + AndreasEisele YuChen <fixed-case>M</fixed-case>ulti<fixed-case>UN</fixed-case>: A Multilingual Corpus from United Nation Documents http://www.lrec-conf.org/proceedings/lrec2010/pdf/686_Paper.pdf @@ -4553,21 +4553,21 @@ MyriamRakho - MatthieuConstant + MatthieuConstant Evaluating the Impact of Some Linguistic Information on the Performances of a Similarity-based and Translation-oriented Word-Sense Disambiguation Method http://www.lrec-conf.org/proceedings/lrec2010/pdf/687_Paper.pdf In this article, we present an experiment of linguistic parameter tuning in the representation of the semantic space of polysemous words. We evaluate quantitatively the influence of some basic linguistic knowledge (lemmas, multi-word expressions, grammatical tags and syntactic relations) on the performances of a similarity-based Word-Sense disambiguation method. The question we try to answer, by this experiment, is which kinds of linguistic knowledge are most useful for the semantic disambiguation of polysemous words, in a multilingual framework. The experiment is about 20 French polysemous words (16 nouns and 4 verbs) and we make use of the French-English part of the sentence-aligned EuroParl Corpus for training and testing. Our results show a strong correlation between the system accuracy and the degree of precision of the linguistic features used, particularly the syntactic dependency relations. Furthermore, the lemma-based approach absolutely outperforms the word form-based approach. The best accuracy achieved by our system amounts to 90%. rakho-constant-2010-evaluating - EckhardBick + EckhardBick <fixed-case>F</fixed-case>r<fixed-case>AG</fixed-case>, a Hybrid Constraint Grammar Parser for <fixed-case>F</fixed-case>rench http://www.lrec-conf.org/proceedings/lrec2010/pdf/688_Paper.pdf This paper describes a hybrid system (FrAG) for tagging / parsing French text, and presents results from ongoing development work, corpus annotation and evaluation. The core of the system is a sentence scope Constraint Grammar (CG), with linguist-written rules. However, unlike traditional CG, the system uses hybrid techniques on both its morphological input side and its syntactic output side. Thus, FrAG draws on a pre-existing probabilistic Decision Tree Tagger (DTT) before and in parallel with its own lexical stage, and feeds its output into a Phrase Structure Grammar (PSG) that uses CG syntactic function tags rather than ordinary terminals in its rewriting rules. As an alternative architecture, dependency tree structures are also supported. In the newest version, dependencies are assigned within the CG-framework itself, and can interact with other rules. To provide semantic context, a semantic prototype ontology for nouns is used, covering a large part of the lexicon. In a recent test run on Parliamentary debate transcripts, FrAG achieved F-scores of 98.7 % for part of speech (PoS) and between 93.1 % and 96.2 % for syntactic function tags. Dependency links were correct in 95.9 %. bick-2010-frag - Julia MariaSchulz + Julia MariaSchulz ChristaWomser-Hacker ThomasMandl Multilingual Corpus Development for Opinion Mining @@ -4585,9 +4585,9 @@ broda-etal-2010-building - CássiaTrojahn + CássiaTrojahn PauloQuaresma - RenataVieira + RenataVieira An <fixed-case>API</fixed-case> for Multi-lingual Ontology Matching http://www.lrec-conf.org/proceedings/lrec2010/pdf/691_Paper.pdf Ontology matching consists of generating a set of correspondences between the entities of two ontologies. This process is seen as a solution to data heterogeneity in ontology-based applications, enabling the interoperability between them. However, existing matching systems are designed by assuming that the entities of both source and target ontologies are written in the same languages ( English, for instance). Multi-lingual ontology matching is an open research issue. This paper describes an API for multi-lingual matching that implements two strategies, direct translation-based and indirect. The first strategy considers direct matching between two ontologies (i.e., without intermediary ontologies), with the help of external resources, i.e., translations. The indirect alignment strategy, proposed by (Jung et al., 2009), is based on composition of alignments. We evaluate these strategies using simple string similarity based matchers and three ontologies written in English, French, and Portuguese, an extension of the OAEI benchmark test 206. @@ -4604,15 +4604,15 @@ NirajAswani - RobertGaizauskas + RobertGaizauskas <fixed-case>E</fixed-case>nglish-<fixed-case>H</fixed-case>indi Transliteration using Multiple Similarity Metrics http://www.lrec-conf.org/proceedings/lrec2010/pdf/694_Paper.pdf In this paper, we present an approach to measure the transliteration similarity of English-Hindi word pairs. Our approach has two components. First we propose a bi-directional mapping between one or more characters in the Devanagari script and one or more characters in the Roman script (pronounced as in English). This allows a given Hindi word written in Devanagari to be transliterated into the Roman script and vice-versa. Second, we present an algorithm for computing a similarity measure that is a variant of Dice’s coefficient measure and the LCSR measure and which also takes into account the constraints needed to match English-Hindi transliterated words. Finally, by evaluating various similarity metrics individually and together under a multiple measure agreement scenario, we show that it is possible to achieve a 0.92 f-measure in identifying English-Hindi word pairs that are transliterations. In order to assess the portability of our approach to other similar languages we adapt our system to the Gujarati language. aswani-gaizauskas-2010-english - RodrigoAgerri - AnaGarcía-Serrano + RodrigoAgerri + AnaGarcía-Serrano <fixed-case>Q</fixed-case>-<fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et: Extracting Polarity from <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et Senses http://www.lrec-conf.org/proceedings/lrec2010/pdf/695_Paper.pdf This paper presents Q-WordNet, a lexical resource consisting of WordNet senses automatically annotated by positive and negative polarity. Polarity classification amounts to decide whether a text (sense, sentence, etc.) may be associated to positive or negative connotations. Polarity classification is becoming important within the fields of Opinion Mining and Sentiment Analysis for determining opinions about commercial products, on companies reputation management, brand monitoring, or to track attitudes by mining online forums, blogs, etc. Inspired by work on classification of word senses by polarity (e.g., SentiWordNet), and taking WordNet as a starting point, we build Q-WordNet. Instead of applying external tools such as supervised classifiers to annotated WordNet synsets by polarity, we try to effectively maximize the linguistic information contained in WordNet, thereby taking advantage of the human effort put by lexicographers and annotators. The resulting resource is a subset of WordNet senses classified as positive or negative. In this approach, neutral polarity is seen as the absence of positive or negative polarity. The evaluation of Q-WordNet shows an improvement with respect to previous approaches. We believe that Q-WordNet can be used as a starting point for data-driven approaches in sentiment analysis. @@ -4657,7 +4657,7 @@ nishikawa-etal-2010-context - BenoîtSagot + BenoîtSagot GéraldineWalther A Morphological Lexicon for the <fixed-case>P</fixed-case>ersian Language http://www.lrec-conf.org/proceedings/lrec2010/pdf/700_Paper.pdf @@ -4665,7 +4665,7 @@ sagot-walther-2010-morphological - BenoîtSagot + BenoîtSagot The Lefff, a Freely Available and Large-coverage Morphological and Syntactic Lexicon for <fixed-case>F</fixed-case>rench http://www.lrec-conf.org/proceedings/lrec2010/pdf/701_Paper.pdf In this paper, we introduce the Lefff, a freely available, accurate and large-coverage morphological and syntactic lexicon for French, used in many NLP tools such as large-coverage parsers. We first describe Alexina, the lexical framework in which the Lefff is developed as well as the linguistic notions and formalisms it is based on. Next, we describe the various sources of lexical data we used for building the Lefff, in particular semi-automatic lexical development techniques and conversion and merging of existing resources. Finally, we illustrate the coverage and precision of the resource by comparing it with other resources and by assessing its impact in various NLP tools. @@ -4674,7 +4674,7 @@ MontseCuadros EgoitzLaparra - GermanRigau + GermanRigau PiekVossen WauterBosma Integrating a Large Domain Ontology of Species into <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et @@ -4685,7 +4685,7 @@ Jean-LucRouas MayumiBeppu - MartineAdda-Decker + MartineAdda-Decker Comparison of Spectral Properties of Read, Prepared and Casual Speech in <fixed-case>F</fixed-case>rench http://www.lrec-conf.org/proceedings/lrec2010/pdf/704_Paper.pdf In this paper, we investigate the acoustic properties of phonemes in three speaking styles: read speech, prepared speech and spontaneous speech. Our aim is to better understand why speech recognition systems still fails to achieve good performances on spontaneous speech. This work follows the work of Nakamura et al. on Japanese speaking styles, with the difference that we here focus on French. Using Nakamura's method, we use classical speech recognition features, MFCC, and try to represent the effects of the speaking styles on the spectral space. Two measurements are defined in order to represent the spectral space reduction and the spectral variance extension. Experiments are then carried on to investigate if indeed we find some differences between the three speaking styles using these measurements. We finally compare our results to those obtained by Nakamura on Japanese to see if the same phenomenon appears. We happen to find some cues, and it also seems that phone duration also plays an important role regarding spectral reduction, especially for spontaneous speech. @@ -4699,8 +4699,8 @@ koeva-2010-lexicon - PeterSpyns - ElisabethD’Halleweyn + PeterSpyns + ElisabethD’Halleweyn <fixed-case>F</fixed-case>lemish-<fixed-case>D</fixed-case>utch <fixed-case>HLT</fixed-case> Policy: Evolving to New Forms of Collaboration http://www.lrec-conf.org/proceedings/lrec2010/pdf/707_Paper.pdf In the last decade, the Dutch Language Union has taken a serious interest in digital language resources and human language technologies (HLT), because they are crucial for a language to be able to survive in the information society. In this paper we report on the current state of the joint Flemish-Dutch efforts in the field of HLT for Dutch (HLTD) and how follow-up activities are being prepared. We explain the overall mechanism of evaluating an R&D programme and the role of evaluation in the policy cycle to establish new R&D funding activities. This is applied to the joint Flemish-Dutch STEVIN programme. Outcomes of the STEVIN scientific midterm review are shortly discussed as the overall final evaluation is currently still on-going. As part of preparing for future policy plans, an HLTD forecast is presented. Also new opportunities are outlined, in particular in the context of the European CLARIN infrastructure project that can lead to new avenues for joint Flemish-Dutch cooperation on HLTD. @@ -4715,7 +4715,7 @@ jezek-quochi-2010-capturing - BrigitteJörg + BrigitteJörg HansUszkoreit AlastairBurt <fixed-case>LT</fixed-case> World: Ontology and Reference Information Portal @@ -4724,9 +4724,9 @@ jorg-etal-2010-lt - Thiago D.Tadeu + Thiago D.Tadeu Eder M.de Novais - IvandréParaboni + IvandréParaboni Extracting Surface Realisation Templates from Corpora http://www.lrec-conf.org/proceedings/lrec2010/pdf/715_Paper.pdf In Natural Language Generation (NLG), template-based surface realisation is an effective solution to the problem of producing surface strings from a given semantic representation, but many applications may not be able to provide the input knowledge in the required level of detail, which in turn may limit the use of the available NLG resources. However, if we know in advance what the most likely output sentences are (e.g., because a corpus on the relevant application domain happens to be available), then corpus knowledge may be used to quickly deploy a surface realisation engine for small-scale applications, for which it may be sufficient to select a sentence (in natural language) that resembles the desired output, and then modify some or all of its constituents accordingly. In other words, the application may simply 'point to' an existing sentence in the corpus and specify only the changes that need to take place to obtain the desired surface string. In this paper we describe one such approach to surface realisation, in which we extract syntactically-structured templates from a target corpus, and use these templates to produce existing and modified versions of the target sentences by a combination of canned text and basic dependency-tree operations. @@ -4734,7 +4734,7 @@ ArifBramantoro - UlrichSchäfer + UlrichSchäfer ToruIshida Towards an Integrated Architecture for Composite Language Services and Multiple Linguistic Processing Components http://www.lrec-conf.org/proceedings/lrec2010/pdf/717_Paper.pdf @@ -4751,8 +4751,8 @@ MohamedBelgacem - GeorgesAntoniadis - LaurentBesacier + GeorgesAntoniadis + LaurentBesacier Automatic Identification of <fixed-case>A</fixed-case>rabic Dialects http://www.lrec-conf.org/proceedings/lrec2010/pdf/719_Paper.pdf In this work, automatic recognition of Arabic dialects is proposed. An acoustic survey of the proportion of vocalic intervals and the standard deviation of consonantal intervals in nine dialects (Tunisia, Morocco, Algeria, Egypt, Syria, Lebanon, Yemen, Golf’s Countries and Iraq) is performed using the platform Alize and Gaussian Mixture Models (GMM). The results show the complexity of the automatic identification of Arabic dialects since. No clear border can be found between the dialects, but a gradual transition between them. They can even vary slightly from one city to another. The existence of this gradual change is easy to understand: it corresponds to a human and social reality, to the contact, friendships forged and affinity in the environment more or less immediate of the individual. This document also raises questions about the classes or macro classes of Arabic dialects noticed from the confusion matrix and the design of the hierarchical tree obtained. @@ -4760,7 +4760,7 @@ SathishPammi - MarcelaCharfuelan + MarcelaCharfuelan MarcSchröder Multilingual Voice Creation Toolkit for the <fixed-case>MARY</fixed-case> <fixed-case>TTS</fixed-case> Platform http://www.lrec-conf.org/proceedings/lrec2010/pdf/720_Paper.pdf @@ -4770,7 +4770,7 @@ PetyaOsenova LaskaLaskova - KirilSimov + KirilSimov Exploring Co-Reference Chains for Concept Annotation of Domain Texts http://www.lrec-conf.org/proceedings/lrec2010/pdf/721_Paper.pdf The paper explores the co-reference chains as a way for improving the density of concept annotation over domain texts. The idea extends authors’ previous work on relating the ontology to the text terms in two domains ― IT and textile. Here IT domain is used. The challenge is to enhance relations among concepts instead of text entities, the latter pursued in most works. Our ultimate goal is to exploit these additional chains for concept disambiguation as well as sparseness resolution at concept level. First, a gold standard was prepared with manually connected links among concepts, anaphoric pronouns and contextual equivalents. This step was necessary not only for test purposes, but also for better orientation in the co-referent types and distribution. Then, two automatic systems were tested on the gold standard. Note that these systems were not designed specially for concept chaining. The conclusion is that the state-of-the-art co-reference resolution systems might address the concept sparseness problem, but not so much the concept disambiguation task. For the latter, word-sense disambiguation systems have to be integrated. @@ -4778,7 +4778,7 @@ KathrinSpreyer - LiljaØvrelid + LiljaØvrelid JonasKuhn Training Parsers on Partial Trees: A Cross-language Comparison http://www.lrec-conf.org/proceedings/lrec2010/pdf/722_Paper.pdf @@ -4823,9 +4823,9 @@ DanielCer - Marie-Catherinede Marneffe - DanJurafsky - ChrisManning + Marie-Catherinede Marneffe + DanJurafsky + ChrisManning Parsing to <fixed-case>S</fixed-case>tanford Dependencies: Trade-offs between Speed and Accuracy http://www.lrec-conf.org/proceedings/lrec2010/pdf/730_Paper.pdf We investigate a number of approaches to generating Stanford Dependencies, a widely used semantically-oriented dependency representation. We examine algorithms specifically designed for dependency parsing (Nivre, Nivre Eager, Covington, Eisner, and RelEx) as well as dependencies extracted from constituent parse trees created by phrase structure parsers (Charniak, Charniak-Johnson, Bikel, Berkeley and Stanford). We found that constituent parsers systematically outperform algorithms designed specifically for dependency parsing. The most accurate method for generating dependencies is the Charniak-Johnson reranking parser, with 89% (labeled) attachment F1 score. The fastest methods are Nivre, Nivre Eager, and Covington, used with a linear classifier to make local parsing decisions, which can parse the entire Penn Treebank development set (section 22) in less than 10 seconds on an Intel Xeon E5520. However, this speed comes with a substantial drop in F1 score (about 76% for labeled attachment) compared to competing methods. By tuning how much of the search space is explored by the Charniak-Johnson parser, we are able to arrive at a balanced configuration that is both fast and nearly as good as the most accurate approaches. @@ -4850,14 +4850,14 @@ kawahara-kurohashi-2010-acquiring - EmilianoGiovannetti + EmilianoGiovannetti An Unsupervised Approach for Semantic Relation Interpretation http://www.lrec-conf.org/proceedings/lrec2010/pdf/734_Paper.pdf In this work we propose a hybrid unsupervised approach for semantic relation extraction from Italian and English texts. The system takes as input pairs of ""distributionally similar"" terms, possibly involved in a semantic relation. To validate and label the anonymous relations holding between the terms in input, the candidate pairs of terms are looked for on the Web in the context of reliable lexico-syntactic patterns. This paper focuses on the definition of the patterns, on the measures used to assess the reliability of the suggested specific semantic relation and on the evaluation of the implemented system. So far, the system is able to extract the following types of semantic relations: hyponymy, meronymy, and co-hyponymy. The approach can however be easily extended to manage other relations by defining the appropriate battery of reliable lexico-syntactic patterns. Accuracy of the system was measured with scores of 83.3% for hyponymy, 75% for meronymy and 72.2% for co-hyponymy extraction. giovannetti-2010-unsupervised - Oi YeeKwong + Oi YeeKwong Constructing an Annotated Story Corpus: Some Observations and Issues http://www.lrec-conf.org/proceedings/lrec2010/pdf/736_Paper.pdf This paper discusses our ongoing work on constructing an annotated corpus of children’s stories for further studies on the linguistic, computational, and cognitive aspects of story structure and understanding. Given its semantic nature and the need for extensive common sense and world knowledge, story understanding has been a notoriously difficult topic in natural language processing. In particular, the notion of story structure for maintaining coherence has received much attention, while its strong version in the form of story grammar has triggered much debate. The relation between discourse coherence and the interestingness, or the point, of a story has not been satisfactorily settled. Introspective analysis on story comprehension has led to some important observations, based on which we propose a preliminary annotation scheme covering the structural, functional, and emotional aspects connecting discourse segments in stories. The annotation process will shed light on how story structure interacts with story point via various linguistic devices, and the annotated corpus is expected to be a useful resource for computational discourse processing, especially for studying various issues regarding the interface between coherence and interestingness of stories. @@ -4866,7 +4866,7 @@ KlaarVanopstal BartDesmet - VéroniqueHoste + VéroniqueHoste Towards a Learning Approach for Abbreviation Detection and Resolution. http://www.lrec-conf.org/proceedings/lrec2010/pdf/737_Paper.pdf The explosion of biomedical literature and with it the -uncontrolled- creation of abbreviations presents some special challenges for both human readers and computer applications. We developed an annotated corpus of Dutch medical text, and experimented with two approaches to abbreviation detection and resolution. Our corpus is composed of abstracts from two medical journals from the Low Countries in which approximately 65 percent (NTvG) and 48 percent (TvG) of the abbreviations have a corresponding full form in the abstract. Our first approach, a pattern-based system, consists of two steps: abbreviation detection and definition matching. This system has an average F-score of 0.82 for the detection of both defined and undefined abbreviations and an average F-score of 0.77 was obtained for the definitions. For our second approach, an SVM-based classifier was used on the preprocessed data sets, leading to an average F-score of 0.93 for the abbreviations; for the definitions an average F-score of 0.82 was obtained. @@ -4883,7 +4883,7 @@ MridulGupta VineetYadav SamarHusain - Dipti MisraSharma + Dipti MisraSharma Partial Parsing as a Method to Expedite Dependency Annotation of a <fixed-case>H</fixed-case>indi Treebank http://www.lrec-conf.org/proceedings/lrec2010/pdf/739_Paper.pdf The paper describes an approach to expedite the process of manual annotation of a Hindi dependency treebank which is currently under development. We propose a way by which consistency among a set of manual annotators could be improved. Furthermore, we show that our setup can also prove useful for evaluating when an inexperienced annotator is ready to start participating in the production of the treebank. We test our approach on sample sets of data obtained from an ongoing work on creation of this treebank. The results asserting our proposal are reported in this paper. We report results from a semi-automated approach of dependency annotation experiment. We find out the rate of agreement between annotators using Cohen’s Kappa. We also compare results with respect to the total time taken to annotate sample data-sets using a completely manual approach as opposed to a semi-automated approach. It is observed from the results that this semi-automated approach when carried out with experienced and trained human annotators improves the overall quality of treebank annotation and also speeds up the process. @@ -4898,10 +4898,10 @@ IkerLuengo - EvaNavas + EvaNavas IgorOdriozola IbonSaratxaga - InmaculadaHernaez + InmaculadaHernaez IñakiSainz DanielErro Modified <fixed-case>LTSE</fixed-case>-<fixed-case>VAD</fixed-case> Algorithm for Applications Requiring Reduced Silence Frame Misclassification @@ -4910,7 +4910,7 @@ luengo-etal-2010-modified - NancyIde + NancyIde KeithSuderman BrianSimms <fixed-case>ANC</fixed-case>2<fixed-case>G</fixed-case>o: A Web Application for Customized Corpus Creation @@ -4929,7 +4929,7 @@ kozawa-etal-2010-collection - NickRizzolo + NickRizzolo DanRoth Learning Based <fixed-case>J</fixed-case>ava for Rapid Development of <fixed-case>NLP</fixed-case> Systems http://www.lrec-conf.org/proceedings/lrec2010/pdf/747_Paper.pdf @@ -4938,7 +4938,7 @@ JorgeVivaldi - HoracioRodríguez + HoracioRodríguez Finding Domain Terms using <fixed-case>W</fixed-case>ikipedia http://www.lrec-conf.org/proceedings/lrec2010/pdf/748_Paper.pdf In this paper we present a new approach for obtaining the terminology of a given domain using the category and page structures of the Wikipedia in a language independent way. Our approach consists basically, for each domain, on navigating the Category graph of the Wikipedia starting from the root nodes associated to the domain. A heavy filtering mechanism is carried out for preventing as much as possible the inclusion of spurious categories. For each selected category all the pages belonging to it are then recovered and filtered. This procedure is iterate several times until achieving convergence. Both category names and page names are considered candidates to belong to the terminology of the domain. This approach has been applied to three broad coverage domains: astronomy, chemistry and medicine, and two languages, English and Spanish, showing a promising performance. @@ -4946,14 +4946,14 @@ ClaireBrierley - EricAtwell + EricAtwell <fixed-case>P</fixed-case>ro<fixed-case>POSEC</fixed-case>: A Prosody and <fixed-case>P</fixed-case>o<fixed-case>S</fixed-case> Annotated Spoken <fixed-case>E</fixed-case>nglish Corpus http://www.lrec-conf.org/proceedings/lrec2010/pdf/749_Paper.pdf We have previously reported on ProPOSEL, a purpose-built Prosody and PoS English Lexicon compatible with the Python Natural Language ToolKit. ProPOSEC is a new corpus research resource built using this lexicon, intended for distribution with the Aix-MARSEC dataset. ProPOSEC comprises multi-level parallel annotations, juxtaposing prosodic and syntactic information from different versions of the Spoken English Corpus, with canonical dictionary forms, in a query format optimized for Perl, Python, and text processing programs. The order and content of fields in the text file is as follows: (1) Aix-MARSEC file number; (2) word; (3) LOB PoS-tag; (4) C5 PoS-tag; (5) Aix SAM-PA phonetic transcription; (6) SAM-PA phonetic transcription from ProPOSEL; (7) syllable count; (8) lexical stress pattern; (9) default content or function word tag; (10) DISC stressed and syllabified phonetic transcription; (11) alternative DISC representation, incorporating lexical stress pattern; (12) nested arrays of phonemes and tonic stress marks from Aix. As an experimental dataset, ProPOSEC can be used to study correlations between these annotation tiers, where significant findings are then expressed as additional features for phrasing models integral to Text-to-Speech and Speech Recognition. As a training set, ProPOSEC can be used for machine learning tasks in Information Retrieval and Speech Understanding systems. brierley-atwell-2010-proposec - Margarita AlonsoRamos + Margarita AlonsoRamos LeoWanner OrsolyaVincze Gerard Casamayordel Bosque @@ -4975,14 +4975,14 @@ YuChen - AndreasEisele + AndreasEisele Integrating a Rule-based with a Hierarchical Translation System http://www.lrec-conf.org/proceedings/lrec2010/pdf/754_Paper.pdf Recent developments on hybrid systems that combine rule-based machine translation (RBMT) systems with statistical machine translation (SMT) generally neglect the fact that RBMT systems tend to produce more syntactically well-formed translations than data-driven systems. This paper proposes a method that alleviates this issue by preserving more useful structures produced by RBMT systems and utilizing them in a SMT system that operates on hierarchical structures instead of flat phrases alone. For our experiments, we use Joshua as the decoder. It is the first attempt towards a tighter integration of MT systems from different paradigms that both support hierarchical analysis. Preliminary results show consistent improvements over the previous approach. chen-eisele-2010-integrating - MassimoPoesio + MassimoPoesio OlgaUryupina YannickVersley Creating a Coreference Resolution System for <fixed-case>I</fixed-case>talian @@ -4991,9 +4991,9 @@ poesio-etal-2010-creating - OndřejBojar + OndřejBojar PavelStraňák - DanielZeman + DanielZeman Data Issues in <fixed-case>E</fixed-case>nglish-to-<fixed-case>H</fixed-case>indi Machine Translation http://www.lrec-conf.org/proceedings/lrec2010/pdf/756_Paper.pdf Statistical machine translation to morphologically richer languages is a challenging task and more so if the source and target languages differ in word order. Current state-of-the-art MT systems thus deliver mediocre results. Adding more parallel data often helps improve the results; if it doesn't, it may be caused by various problems such as different domains, bad alignment or noise in the new data. In this paper we evaluate the English-to-Hindi MT task from this data perspective. We discuss several available parallel data sources and provide cross-evaluation results on their combinations using two freely available statistical MT systems. We demonstrate various problems encountered in the data and describe automatic methods of data cleaning and normalization. We also show that the contents of two independently distributed data sets can unexpectedly overlap, which negatively affects translation quality. Together with the error analysis, we also present a new tool for viewing aligned corpora, which makes it easier to detect difficult parts in the data even for a developer not speaking the target language. @@ -5074,14 +5074,14 @@ DiegoDe Cao DaniloCroce - RobertoBasili + RobertoBasili Extensive Evaluation of a <fixed-case>F</fixed-case>rame<fixed-case>N</fixed-case>et-<fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et mapping resource http://www.lrec-conf.org/proceedings/lrec2010/pdf/773_Paper.pdf Lexical resources are basic components of many text processing system devoted to information extraction, question answering or dialogue. In paste years many resources have been developed such as FrameNet and WordNet. FrameNet describes prototypical situations (i.e. Frames) while WordNet defines lexical meaning (senses) for the majority of English nouns, verbs, adjectives and adverbs. A major difference between FrameNet and WordNet refers to their coverage. Due of this lack of coverage, in recent years some approaches have been studied to make a bridge between this two resources, so a resource is used to extend the coverage of the other one. The nature of these approaches leave from supervised to supervised methods. The major problem is that there is not a standard in evaluation of the mapping. Each different work have tested own approach with a custom gold standard. This work give an extensive evaluation of the model proposed in (De Cao et al., 2008) using gold standard proposed in other works. Moreover this work give an empirical comparison between other available resources. As outcome of this work we also release the full mapping resource made according to the model proposed in (De Cao et al., 2008). de-cao-etal-2010-extensive - DjaméSeddah + DjaméSeddah Exploring the Spinal-<fixed-case>STIG</fixed-case> Model for Parsing <fixed-case>F</fixed-case>rench http://www.lrec-conf.org/proceedings/lrec2010/pdf/775_Paper.pdf We evaluate statistical parsing of French using two probabilistic models derived from the Tree Adjoining Grammar framework: a Stochastic Tree Insertion Grammars model (STIG) and a specific instance of this formalism, called Spinal Tree Insertion Grammar model which exhibits interesting properties with regard to data sparseness issues common to small treebanks such as the Paris 7 French Treebank. Using David Chiang’s STIG parser (Chiang, 2003), we present results of various experiments we conducted to explore those models for French parsing. The grammar induction makes use of a head percolation table tailored for the French Treebank and which is provided in this paper. Using two evaluation metrics, we found that the parsing performance of a STIG model is tied to the size of the underlying Tree Insertion Grammar, with a more compact grammar, a spinal STIG, outperforming a genuine STIG. We finally note that a ""spinal"" framework seems to emerge in the literature. Indeed, the use of vertical grammars such as Spinal STIG instead of horizontal grammars such as PCFGs, afflicted with well known data sparseness issues, seems to be a promising path toward better parsing performance. @@ -5089,7 +5089,7 @@ TommasoCaselli - IrinaProdanof + IrinaProdanof Annotating Event Anaphora: A Case Study http://www.lrec-conf.org/proceedings/lrec2010/pdf/776_Paper.pdf In recent years we have resgitered a renewed interest in event detection and temporal processing of text/discourse. TimeML (Pustejovsky et al., 2003a) has shed new lights on the notion of event and developed a new methodology for its annotation. On a parallel, works on anaphora resolution have developed a reliable methodology for the annotation and pointed out the core role of this phenomenon for the improvement of NLP systems. This paper tries to put together these two lines of research by describing a case study for the creation of an annotation scheme on event anaphora. We claim that this work could have consequences for the annotation of eventualities as proposed in TimeML and on the use of the tag and on the study of anaphora and its annotation. The annotation scheme and its guidelines have been developed on the basis of a coarse grained bottom up approach. In order to do this, we have performed a small sampling annotation which has highlighted shortcomings and open issues which need to be resolved. @@ -5097,11 +5097,11 @@ BenteMaegaard - MohamedAttia + MohamedAttia KhalidChoukri - OlivierHamon - StevenKrauwer - MustafaYaseen + OlivierHamon + StevenKrauwer + MustafaYaseen Cooperation for <fixed-case>A</fixed-case>rabic Language Resources and Tools — The <fixed-case>MEDAR</fixed-case> Project http://www.lrec-conf.org/proceedings/lrec2010/pdf/777_Paper.pdf The paper describes some of the work carried out within the European funded project MEDAR. The project has three streams of activity: the technical stream, the cooperation stream and the dissemination stream. MEDAR has first updated the existing surveys and BLARK for Arabic, and then the technical stream focused on machine translation. The consortium identified a number of freely available MT systems and then customized two versions of the famous MOSES package. The Consortium addressed the needs to package MOSES for English to Arabic (while the main MT stream is on Arabic to English). For performance assessment purposes, the partners produced test data that allowed carrying out an evaluation campaign with 5 different systems (including from outside the consortium) and two online ones. Both the MT baselines and the collected data will be made available via ELRA catalogue. The cooperation stream focuses mostly on the cooperation roadmap for Human Language Technologies for Arabic. Cooperation Roadmap for the region directed towards the Arabic HLT in general. It is the purpose of the roadmap to outline areas and priorities for collaboration, in terms of collaboration between EU countries and Arabic speaking countries, as well as cooperation in general: between countries, between universities, and last but not least between universities and industry. @@ -5126,7 +5126,7 @@ ChristineMeunier IrinaNesterenko BerthillePallaud - LaurentPrévot + LaurentPrévot BéatricePriego-Valverde StéphaneRauzy The <fixed-case>OTIM</fixed-case> Formal Annotation Model: A Preliminary Step before Annotation Scheme @@ -5137,7 +5137,7 @@ SanazJabbari MarkHepple - LouiseGuthrie + LouiseGuthrie Evaluating Lexical Substitution: Analysis and New Measures http://www.lrec-conf.org/proceedings/lrec2010/pdf/782_Paper.pdf Lexical substitution is the task of finding a replacement for a target word in a sentence so as to preserve, as closely as possible, the meaning of the original sentence. It has been proposed that lexical substitution be used as a basis for assessing the performance of word sense disambiguation systems, an idea realised in the English Lexical Substitution Task of SemEval-2007. In this paper, we examine the evaluation metrics used for the English Lexical Substitution Task and identify some problems that arise for them. We go on to propose some alternative measures for this purpose, that avoid these problems, and which in turn can be seen as redefining the key tasks that lexical substitution systems should be expected to perform. We hope that these new metrics will better serve to guide the development of lexical substitution systems in future work. One of the new metrics addresses how effective systems are in ranking substitution candidates, a key ability for lexical substitution systems, and we report some results concerning the assessment of systems produced by this measure as compared to the relevant measure from SemEval-2007. @@ -5145,7 +5145,7 @@ MehrnoushShamsfard - HakimehFadaei + HakimehFadaei ElhamFekri Extracting Lexico-conceptual Knowledge for Developing <fixed-case>P</fixed-case>ersian <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et http://www.lrec-conf.org/proceedings/lrec2010/pdf/784_Paper.pdf @@ -5154,7 +5154,7 @@ Paula VazLobo - David Martinsde Matos + David Martinsde Matos Fairy Tale Corpus Organization Using Latent Semantic Mapping and an Item-to-item Top-n Recommendation Algorithm http://www.lrec-conf.org/proceedings/lrec2010/pdf/786_Paper.pdf In this paper we present a fairy tale corpus that was semantically organized and tagged. The proposed method uses latent semantic mapping to represent the stories and a top-n item-to-item recommendation algorithm to define clusters of similar stories. Each story can be placed in more than one cluster and stories in the same cluster are related to the same concepts. The results were manually evaluated regarding the groupings as perceived by human judges. The evaluation resulted in a precision of 0.81, a recall of 0.69, and an f-measure of 0.75 when using tf*idf for word frequency. Our method is topic- and language-independent, and, contrary to traditional clustering methods, automatically defines the number of clusters based on the set of documents. This method can be used as a setup for traditional clustering or classification. The resulting corpus will be used for recommendation purposes, although it can also be used for emotion extraction, semantic role extraction, meaning extraction, text classification, among others. @@ -5162,7 +5162,7 @@ AlistairWillis - DavidKing + DavidKing DavidMorse AntonDil ChrisLyal @@ -5175,9 +5175,9 @@ LindaBrandschain DavidGraff - ChristopherCieri + ChristopherCieri KevinWalker - ChrisCaruso + ChrisCaruso AbbyNeely Greybeard Longitudinal Speech Study http://www.lrec-conf.org/proceedings/lrec2010/pdf/789_Paper.pdf @@ -5185,12 +5185,12 @@ brandschain-etal-2010-greybeard - FranciscoCampillo + FranciscoCampillo DanielaBraga Ana BelénMourín - CarmenGarcía-Mateo + CarmenGarcía-Mateo PedroSilva - Miguel SalesDias + Miguel SalesDias FranciscoMéndez Building High Quality Databases for Minority Languages such as <fixed-case>G</fixed-case>alician http://www.lrec-conf.org/proceedings/lrec2010/pdf/790_Paper.pdf @@ -5198,7 +5198,7 @@ campillo-etal-2010-building - William D.Lewis + William D.Lewis ChrisWendt DavidBullock Achieving Domain Specificity in <fixed-case>SMT</fixed-case> without Overt Siloing @@ -5209,9 +5209,9 @@ LindaBrandschain DavidGraff - ChrisCieri + ChrisCieri KevinWalker - ChrisCaruso + ChrisCaruso AbbyNeely Mixer 6 http://www.lrec-conf.org/proceedings/lrec2010/pdf/792_Paper.pdf @@ -5231,7 +5231,7 @@ AntonioToral LamiaTounsi MonicaMonachini - Josefvan Genabith + Josefvan Genabith An Automatically Built Named Entity Lexicon for <fixed-case>A</fixed-case>rabic http://www.lrec-conf.org/proceedings/lrec2010/pdf/797_Paper.pdf We have adapted and extended the automatic Multilingual, Interoperable Named Entity Lexicon approach to Arabic, using Arabic WordNet (AWN) and Arabic Wikipedia (AWK). First, we extract AWN’s instantiable nouns and identify the corresponding categories and hyponym subcategories in AWK. Then, we exploit Wikipedia inter-lingual links to locate correspondences between articles in ten different languages in order to identify Named Entities (NEs). We apply keyword search on AWK abstracts to provide for Arabic articles that do not have a correspondence in any of the other languages. In addition, we perform a post-processing step to fetch further NEs from AWK not reachable through AWN. Finally, we investigate diacritization using matching with geonames databases, MADA-TOKAN tools and different heuristics for restoring vowel marks of Arabic NEs. Using this methodology, we have extracted approximately 45,000 Arabic NEs and built, to the best of our knowledge, the largest, most mature and well-structured Arabic NE lexical resource to date. We have stored and organised this lexicon following the LMF ISO standard. We conduct a quantitative and qualitative evaluation against a manually annotated gold standard and achieve precision scores from 95.83% (with 66.13% recall) to 99.31% (with 61.45% recall) according to different values of a threshold. @@ -5239,7 +5239,7 @@ ZhiyiSong - StephanieStrassel + StephanieStrassel GaryKrug KazuakiMaeda Enhanced Infrastructure for Creation and Collection of Translation Resources @@ -5249,14 +5249,14 @@ EgoitzLaparra - GermanRigau + GermanRigau e<fixed-case>X</fixed-case>tended <fixed-case>W</fixed-case>ord<fixed-case>F</fixed-case>rame<fixed-case>N</fixed-case>et http://www.lrec-conf.org/proceedings/lrec2010/pdf/799_Paper.pdf This paper presents a novel automatic approach to partially integrate FrameNet and WordNet. In that way we expect to extend FrameNet coverage, to enrich WordNet with frame semantic information and possibly to extend FrameNet to languages other than English. The method uses a knowledge-based Word Sense Disambiguation algorithm for matching the FrameNet lexical units to WordNet synsets. Specifically, we exploit a graph-based Word Sense Disambiguation algorithm that uses a large-scale knowledge-base derived from existing semantic resources. We have developed and tested additional versions of this algorithm showing substantial improvements over state-of-the-art results. Finally, we show some examples and figures of the resulting semantic resource. laparra-rigau-2010-extended - BarbaraPlank + BarbaraPlank Improved Statistical Measures to Assess Natural Language Parser Performance across Domains http://www.lrec-conf.org/proceedings/lrec2010/pdf/801_Paper.pdf We examine the performance of three dependency parsing systems, in particular, their performance variation across Wikipedia domains. We assess the performance variation of (i) Alpino, a deep grammar-based system coupled with a statistical disambiguation versus (ii) MST and Malt, two purely data-driven statistical dependency parsing systems. The question is how the performance of each parser correlates with simple statistical measures of the text (e.g. sentence length, unknown word rate, etc.). This would give us an idea of how sensitive the different systems are to domain shifts, i.e. which system is more in need for domain adaptation techniques. To this end, we extend the statistical measures used by Zhang and Wang (2009) for English and evaluate the systems on several Wikipedia domains by focusing on a freer word-order language, Dutch. The results confirm the general findings of Zhang and Wang (2009), i.e. different parsing systems have different sensitivity against various statistical measure of the text, where the highest correlation to parsing accuracy was found for the measure we added, sentence perplexity. @@ -5275,7 +5275,7 @@ CarlosRamisch AlineVillavicencio - ChristianBoitet + ChristianBoitet mwetoolkit: a Framework for Multiword Expression Identification http://www.lrec-conf.org/proceedings/lrec2010/pdf/803_Paper.pdf This paper presents the Multiword Expression Toolkit (mwetoolkit), an environment for type and language-independent MWE identification from corpora. The mwetoolkit provides a targeted list of MWE candidates, extracted and filtered according to a number of user-defined criteria and a set of standard statistical association measures. For generating corpus counts, the toolkit provides both a corpus indexation facility and a tool for integration with web search engines, while for evaluation, it provides validation and annotation facilities. The mwetoolkit also allows easy integration with a machine learning tool for the creation and application of supervised MWE extraction models if annotated data is available. In our experiment, the mwetoolkit was tested and evaluated in the context of MWE extraction in the biomedical domain. Our preliminary results show that the toolkit performs better than other approaches, especially concerning recall. Moreover, this first version can also be extended in several ways in order to improve the quality of the results. @@ -5315,7 +5315,7 @@ shamsfard-etal-2010-step - Drahomíra „johanka“Spoustová + Drahomíra „johanka“Spoustová MiroslavSpousta PavelPecina Building a Web Corpus of <fixed-case>C</fixed-case>zech @@ -5352,9 +5352,9 @@ de-luca-2010-corpus - RobertaCatizone + RobertaCatizone AlexieiDingli - RobertGaizauskas + RobertGaizauskas Using Dialogue Corpora to Extend Information Extraction Patterns for Natural Language Understanding of Dialogue http://www.lrec-conf.org/proceedings/lrec2010/pdf/818_Paper.pdf This paper examines how Natural Language Process (NLP) resources and online dialogue corpora can be used to extend coverage of Information Extraction (IE) templates in a Spoken Dialogue system. IE templates are used as part of a Natural Language Understanding module for identifying meaning in a user utterance. The use of NLP tools in Dialogue systems is a difficult task given 1) spoken dialogue is often not well-formed and 2) there is a serious lack of dialogue data. In spite of that, we have devised a method for extending IE patterns using standard NLP tools and available dialogue corpora found on the web. In this paper, we explain our method which includes using a set of NLP modules developed using GATE (a General Architecture for Text Engineering), as well as a general purpose editing tool that we built to facilitate the IE rule creation process. Lastly, we present directions for future work in this area. @@ -5362,7 +5362,7 @@ LamiaTounsi - Josefvan Genabith + Josefvan Genabith <fixed-case>A</fixed-case>rabic Parsing Using Grammar Transforms http://www.lrec-conf.org/proceedings/lrec2010/pdf/819_Paper.pdf We investigate Arabic Context Free Grammar parsing with dependency annotation comparing lexicalised and unlexicalised parsers. We study how morphosyntactic as well as function tag information percolation in the form of grammar transforms (Johnson, 1998, Kulick et al., 2006) affects the performance of a parser and helps dependency assignment. We focus on the three most frequent functional tags in the Arabic Penn Treebank: subjects, direct objects and predicates . We merge these functional tags with their phrasal categories and (where appropriate) percolate case information to the non-terminal (POS) category to train the parsers. We then automatically enrich the output of these parsers with full dependency information in order to annotate trees with Lexical Functional Grammar (LFG) f-structure equations with produce f-structures, i.e. attribute-value matrices approximating to basic predicate-argument-adjunct structure representations. We present a series of experiments evaluating how well lexicalized, history-based, generative (Bikel) as well as latent variable PCFG (Berkeley) parsers cope with the enriched Arabic data. We measure quality and coverage of both the output trees and the generated LFG f-structures. We show that joint functional and morphological information percolation improves both the recovery of trees as well as dependency results in the form of LFG f-structures. @@ -5378,7 +5378,7 @@ Na-RaeHan - JoelTetreault + JoelTetreault Soo-HwaLee Jin-YoungHa Using an Error-Annotated Learner Corpus to Develop an <fixed-case>ESL</fixed-case>/<fixed-case>EFL</fixed-case> Error Correction System @@ -5388,9 +5388,9 @@ IanMcGraw - Chia-yingLee + Chia-yingLee LeeHetherington - StephanieSeneff + StephanieSeneff JimGlass Collecting Voices from the Cloud http://www.lrec-conf.org/proceedings/lrec2010/pdf/822_Paper.pdf @@ -5399,7 +5399,7 @@ AurélienMax - Josep MariaCrego + Josep MariaCrego FrançoisYvon Contrastive Lexical Evaluation of Machine Translation http://www.lrec-conf.org/proceedings/lrec2010/pdf/823_Paper.pdf @@ -5407,8 +5407,8 @@ max-etal-2010-contrastive - ElaineUí Dhonnchadha - JosefVan Genabith + ElaineUí Dhonnchadha + JosefVan Genabith Partial Dependency Parsing for <fixed-case>I</fixed-case>rish http://www.lrec-conf.org/proceedings/lrec2010/pdf/824_Paper.pdf We present a partial dependency parser for Irish. Constraint Grammar (CG) based rules are used to annotate dependency relations and grammatical functions. Chunking is performed using a regular-expression grammar which operates on the dependency tagged sentences. As this is the first implementation of a parser for unrestricted Irish text (to our knowledge), there were no guidelines or precedents available. Therefore deciding what constitutes a syntactic unit, and how it should be annotated, accounts for a major part of the early development effort. Currently, all tokens in a sentence are tagged for grammatical function and local dependency. Long-distance dependencies, prepositional attachments or coordination are not handled, resulting in a partial dependency analysis. Evaluations show that the partial dependency analysis achieves an f-score of 93.60% on development data and 94.28% on unseen test data, while the chunker achieves an f-score of 97.20% on development data and 93.50% on unseen test data. @@ -5433,7 +5433,7 @@ SaraRosenthal WilliamLipovsky - KathleenMcKeown + KathleenMcKeown KapilThadani JacobAndreas Towards Semi-Automated Annotation for Prepositional Phrase Attachment @@ -5443,7 +5443,7 @@ PatriceLopez - LaurentRomary + LaurentRomary <fixed-case>GRISP</fixed-case>: A Massive Multilingual Terminological Database for Scientific and Technical Domains http://www.lrec-conf.org/proceedings/lrec2010/pdf/829_Paper.pdf The development of a multilingual terminology is a very long and costly process. We present the creation of a multilingual terminological database called GRISP covering multiple technical and scientific fields from various open resources. A crucial aspect is the merging of the different resources which is based in our proposal on the definition of a sound conceptual model, different domain mapping and the use of structural constraints and machine learning techniques for controlling the fusion process. The result is a massive terminological database of several millions terms, concepts, semantic relations and definitions. The accuracy of the concept merging between several resources have been evaluated following several methods. This resource has allowed us to improve significantly the mean average precision of an information retrieval system applied to a large collection of multilingual and multidomain patent documents. New specialized terminologies, not specifically created for text processing applications, can be aggregated and merged to GRISP with minimal manual efforts. @@ -5476,8 +5476,8 @@ murakami-etal-2010-language - KristinaVučković - ŽeljkoAgić + KristinaVučković + ŽeljkoAgić MarkoTadić Improving Chunking Accuracy on <fixed-case>C</fixed-case>roatian Texts by Morphosyntactic Tagging http://www.lrec-conf.org/proceedings/lrec2010/pdf/834_Paper.pdf @@ -5485,8 +5485,8 @@ vuckovic-etal-2010-improving - David K.Elson - Kathleen R.McKeown + David K.Elson + Kathleen R.McKeown Building a Bank of Semantically Encoded Narratives http://www.lrec-conf.org/proceedings/lrec2010/pdf/835_Paper.pdf We propose a methodology for a novel type of discourse annotation whose model is tuned to the analysis of a text as narrative. This is intended to be the basis of a “story bank” resource that would facilitate the automatic analysis of narrative structure and content. The methodology calls for annotators to construct propositions that approximate a reference text, by selecting predicates and arguments from among controlled vocabularies drawn from resources such as WordNet and VerbNet. Annotators then integrate the propositions into a conceptual graph that maps out the entire discourse; the edges represent temporal, causal and other relationships at the level of story content. Because annotators must identify the recurring objects and themes that appear in the text, they also perform coreference resolution and word sense disambiguation as they encode propositions. We describe a collection experiment and a method for determining inter-annotator agreement when multiple annotators encode the same short story. Finally, we describe ongoing work toward extending the method to integrate the annotator’s interpretations of character agency (the goals, plans and beliefs that are relevant, yet not explictly stated in the text). @@ -5500,8 +5500,8 @@ wong-2010-semantic - Bento CarlosDias-da-Silva - ArianiDi Felippo + Bento CarlosDias-da-Silva + ArianiDi Felippo <fixed-case>REBECA</fixed-case>: Turning <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et Databases into “Ontolexicons” http://www.lrec-conf.org/proceedings/lrec2010/pdf/838_Paper.pdf In this paper we outline the design and present a sample of the REBECA bilingual lexical-conceptual database constructed by linking two monolingual lexical resources in which a set of lexicalized concepts of the North-American English database, the Princeton WordNet (WN.Pr) synsets, is aligned with its corresponding set of lexicalized concepts of the Brazilian Portuguese database, the Brazilian Portuguese WordNet synsets under construction, by means of the MultiNet-based interlingual schema, the concepts of which are the ones represented by the Princeton WordNet synsets. Implemented in the Protégé-OWL editor, the alignment of the two databases illustrates how wordnets can be turned into ontolexicons. At the current stage of development, the “wheeled-vehicle” conceptual domain was modeled to develop and to test REBECA’s design and contents, respectively. The collection of 205 ontological concepts worked out, i.e. REBECA´s alignment indexes, is exemplified in the “wheeled- vehicle” conceptual domain, e.g. [CAR], [RAILCAR], etc., and it was selected in the WN.Pr database, version 2.0. Future work includes the population of the database with more lexical data and other conceptual domains so that the intricacies of adding more concepts and devising the spreading or pruning the relationships between them can be properly evaluated. @@ -5534,7 +5534,7 @@ ParisaKordjamshidi MartijnVan Otterlo - Marie-FrancineMoens + Marie-FrancineMoens Spatial Role Labeling: Task Definition and Annotation Scheme http://www.lrec-conf.org/proceedings/lrec2010/pdf/846_Paper.pdf One of the essential functions of natural language is to talk about spatial relationships between objects. Linguistic constructs can express highly complex, relational structures of objects, spatial relations between them, and patterns of motion through spaces relative to some reference point. Learning how to map this information onto a formal representation from a text is a challenging problem. At present no well-defined framework for automatic spatial information extraction exists that can handle all of these issues. In this paper we introduce the task of spatial role labeling and propose an annotation scheme that is language-independent and facilitates the application of machine learning techniques. Our framework consists of a set of spatial roles based on the theory of holistic spatial semantics with the intent of covering all aspects of spatial concepts, including both static and dynamic spatial relations. We illustrate our annotation scheme with many examples throughout the paper, and in addition we highlight how to connect to spatial calculi such as region connection calculus and also how our approach fits into related work. @@ -5548,7 +5548,7 @@ russo-2010-discovering - KirilSimov + KirilSimov PetyaOsenova Constructing of an Ontology-based Lexicon for <fixed-case>B</fixed-case>ulgarian http://www.lrec-conf.org/proceedings/lrec2010/pdf/848_Paper.pdf @@ -5556,8 +5556,8 @@ simov-osenova-2010-constructing - Meghan LammieGlenn - Stephanie M.Strassel + Meghan LammieGlenn + Stephanie M.Strassel HaejoongLee KazuakiMaeda RamezZakhary @@ -5570,7 +5570,7 @@ ClaudiuMihăilă IustinaIlisei - DianaInkpen + DianaInkpen <fixed-case>R</fixed-case>omanian Zero Pronoun Distribution: A Comparative Study http://www.lrec-conf.org/proceedings/lrec2010/pdf/851_Paper.pdf Anaphora resolution is still a challenging research field in natural language processing, lacking a algorithm that correctly resolves anaphoric pronouns. Anaphoric zero pronouns pose an even greater challenge, since this category is not lexically realised. Thus, their resolution is conditioned by their prior identification stage. This paper reports on the distribution of zero pronouns in Romanian in various genres: encyclopaedic, legal, literary, and news-wire texts. For this purpose, the RoZP corpus has been created, containing almost 50000 tokens and 800 zero pronouns which are manually annotated. The distribution patterns are compared across genres, and exceptional cases are presented in order to facilitate the methodological process of developing a future zero pronoun identification and resolution algorithm. The evaluation results emphasise that zero pronouns appear frequently in Romanian, and their distribution depends largely on the genre. Additionally, possible features are revealed for their identification, and a search scope for the antecedent has been determined, increasing the chances of correct resolution. @@ -5586,18 +5586,18 @@ PrasanthKolachina SudheerKolachina - Anil KumarSingh + Anil KumarSingh SamarHusain ViswanathNaidu RajeevSangal - AksharBharati + AksharBharati Grammar Extraction from Treebanks for <fixed-case>H</fixed-case>indi and <fixed-case>T</fixed-case>elugu http://www.lrec-conf.org/proceedings/lrec2010/pdf/854_Paper.pdf Grammars play an important role in many Natural Language Processing (NLP) applications. The traditional approach to creating grammars manually, besides being labor-intensive, has several limitations. With the availability of large scale syntactically annotated treebanks, it is now possible to automatically extract an approximate grammar of a language in any of the existing formalisms from a corresponding treebank. In this paper, we present a basic approach to extract grammars from dependency treebanks of two Indian languages, Hindi and Telugu. The process of grammar extraction requires a generalization mechanism. Towards this end, we explore an approach which relies on generalization of argument structure over the verbs based on their syntactic similarity. Such a generalization counters the effect of data sparseness in the treebanks. A grammar extracted using this system can not only expand already existing knowledge bases for NLP tasks such as parsing, but also aid in the creation of grammars for languages where none exist. Further, we show that the grammar extraction process can help in identifying annotation errors and thus aid in the task of the treebank validation. kolachina-etal-2010-grammar - AndrejsVasiljevs + AndrejsVasiljevs KasparsBalodis Corpus Based Analysis for Multilingual Terminology Entry Compounding http://www.lrec-conf.org/proceedings/lrec2010/pdf/855_Paper.pdf @@ -5618,7 +5618,7 @@ maeda-etal-2010-technical - José M.García-Miguel + José M.García-Miguel GaelVaamonde Fita GonzálezDomínguez <fixed-case>ADESSE</fixed-case>, a Database with Syntactic and Semantic Annotation of a Corpus of <fixed-case>S</fixed-case>panish @@ -5629,14 +5629,14 @@ DavidGuthrie MarkHepple - WeiLiu + WeiLiu Efficient Minimal Perfect Hash Language Models http://www.lrec-conf.org/proceedings/lrec2010/pdf/860_Paper.pdf The availability of large collections of text have made it possible to build language models that incorporate counts of billions of n-grams. This paper proposes two new methods of efficiently storing large language models that allow O(1) random access and use significantly less space than all known approaches. We introduce two novel data structures that take advantage of the distribution of n-grams in corpora and make use of various numbers of minimal perfect hashes to compactly store language models containing full frequency counts of billions of n-grams using 2.5 Bytes per n-gram and language models of quantized probabilities using 2.26 Bytes per n-gram. These methods allow language processing applications to take advantage of much larger language models than previously was possible using the same hardware and we additionally describe how they can be used in a distributed environment to store even larger models. We show that our approaches are simple to implement and can easily be combined with pruning and quantization to achieve additional reductions in the size of the language model. guthrie-etal-2010-efficient - StephanieStrassel + StephanieStrassel DanAdams HenryGoldberg JonathanHerr @@ -5652,7 +5652,7 @@ HeatherSimpson - StephanieStrassel + StephanieStrassel RobertParker PaulMcNamee <fixed-case>W</fixed-case>ikipedia and the Web of Confusable Entities: Experience from Entity Linking Query Creation for <fixed-case>TAC</fixed-case> 2009 Knowledge Base Population @@ -5681,7 +5681,7 @@ ThepchaiSupnithi TanethRuangrajitpakorn - KanokornTrakultaweekool + KanokornTrakultaweekool PeerachetPorkaew <fixed-case>A</fixed-case>uto<fixed-case>T</fixed-case>ag<fixed-case>TCG</fixed-case> : A Framework for Automatic <fixed-case>T</fixed-case>hai <fixed-case>CG</fixed-case> Tagging http://www.lrec-conf.org/proceedings/lrec2010/pdf/868_Paper.pdf @@ -5698,21 +5698,21 @@ NoureddineLoukil KaisHaddar - AbdelmajidBenhamadou + AbdelmajidBenhamadou A Syntactic Lexicon for <fixed-case>A</fixed-case>rabic Verbs http://www.lrec-conf.org/proceedings/lrec2010/pdf/873_Paper.pdf In this paper, we present a modeling of a syntactic lexicon for Arabic verbs. The structure of the lexicon is based on the recently introduced ISO standard called the Lexical Markup Framework. This standard enables us to describe the lexical information in a versatile way using general guidelines and make possible to share the resources developed in compliance with it. We discuss the syntactic information associated to verbs and the model we propose to structure and represent the entries within the lexicon. To study the usability of the lexicon in a real application, we designed a rule-based system that translates a LMF syntactic resource into Type Description Language compliant resource. The rules are mapping information from LMF entries and types to TDL types. The generated lexicon is used as input for a previously written HPSG grammar for Arabic built within the Language Knowledge Builder platform. Finally, we discuss improvements in parsing results and possible perspectives of this work. loukil-etal-2010-syntactic - Girish NathJha + Girish NathJha The <fixed-case>TDIL</fixed-case> Program and the <fixed-case>I</fixed-case>ndian Langauge Corpora Intitiative (<fixed-case>ILCI</fixed-case>) http://www.lrec-conf.org/proceedings/lrec2010/pdf/874_Paper.pdf India is considered a linguistic ocean with 4 language families and 22 scheduled national languages, and 100 un-scheduled languages reported by the 2001 census. This puts tremendous pressures on the Indian government to not only have comprehensive language policies, but also to create resources for their maintenance and development. In the age of information technology, there is a greater need to have a fine balance between allocation of resources to each language keeping in view the political compulsions, electoral potential of a linguistic community and other issues. In this connection, the government of India through various ministries and a think tank consisting of eminent linguistics and policy makers has done a commendable job despite the obvious roadblocks. This paper describes the Indian government’s policies towards language development and maintenance in the age of technology through the Ministry of HRD through its various agencies and the Ministry of Communications & Information Technology (MCIT) through its dedicated program called TDIL (Technology Development for Indian Languages). The paper also describes some of the recent activities of the TDIL in general and in particular, an innovative corpora project called ILCI - Indian Languages Corpora Initiative. jha-2010-tdil - ŽeljkoAgić + ŽeljkoAgić NikolaLjubešić MarkoTadić Towards Sentiment Analysis of Financial Texts in <fixed-case>C</fixed-case>roatian @@ -5730,7 +5730,7 @@ AgataSavary - JakubWaszczuk + JakubWaszczuk AdamPrzepiórkowski Towards the Annotation of Named Entities in the <fixed-case>N</fixed-case>ational <fixed-case>C</fixed-case>orpus of <fixed-case>P</fixed-case>olish http://www.lrec-conf.org/proceedings/lrec2010/pdf/879_Paper.pdf @@ -5752,7 +5752,7 @@ SomaraSeng NicolasKuchmann-Beauger AnassTalby - Claudede Loupy + Claudede Loupy <fixed-case>OAL</fixed-case>: A <fixed-case>NLP</fixed-case> Architecture to Improve the Development of Linguistic Resources for <fixed-case>NLP</fixed-case> http://www.lrec-conf.org/proceedings/lrec2010/pdf/882_Paper.pdf The performance of most NLP applications relies upon the quality of linguistic resources. The creation, maintenance and enrichment of those resources are a labour-intensive task, especially when no tools are available. In this paper we present the NLP architecture OAL, designed to assist computational linguists in the whole process of the development of resources in an industrial context: from corpora compilation to quality assurance. To add new words more easily to the morphosyntactic lexica, a guesser that lemmatizes and assigns morphosyntactic tags as well as inflection paradigms to a new word has been developed. Moreover, different control mechanisms are set up to check the coherence and consistency of the resources. Today OAL manages resources in five European languages: French, English, Spanish, Italian and Polish. Chinese and Portuguese are in process. The development of OAL has followed an incremental strategy. At present, semantic lexica, a named entities guesser and a named entities phonetizer are being developed. @@ -5761,20 +5761,20 @@ KarelPala ChristianeFellbaum - SonjaBosch + SonjaBosch Lexical Resources for Noun Compounds in <fixed-case>C</fixed-case>zech, <fixed-case>E</fixed-case>nglish and <fixed-case>Z</fixed-case>ulu http://www.lrec-conf.org/proceedings/lrec2010/pdf/883_Paper.pdf In this paper we discuss noun compounding, a highly generative, productive process, in three distinct languages: Czech, English and Zulu. Derivational morphology presents a large grey area between regular, compositional and idiosyncratic, non-compositional word forms. The structural properties of compounds in each of the languages are reviewed and contrasted. Whereas English compounds are head-final and thus left-branching, Czech and Zulu compounds usually consist of a leftmost governing head and a rightmost dependent element. Semantic properties of compounds are discussed with special reference to semantic relations between compound members which cross-linguistically show universal patterns, but idiosyncratic, language specific compounds are also identified. The integration of compounds into lexical resources, and WordNets in particular, remains a challenge that needs to be considered in terms of the compounds’ syntactic idiosyncrasy and semantic compositionality. Experiments with processing compounds in Czech, English and Zulu are reported and partly evaluated. The obtained partial lists of the Czech, English and Zulu compounds are also described. pala-etal-2010-lexical - DietrichRebholz-Schuhmann - Antonio JoséJimeno Yepes - Erik M.van Mulligen + DietrichRebholz-Schuhmann + Antonio JoséJimeno Yepes + Erik M.van Mulligen NingKang - JanKors + JanKors DavidMilward - PeterCorbett + PeterCorbett EkaterinaBuyko KatrinTomanek ElenaBeisswanger @@ -5795,7 +5795,7 @@ Petra-MariaStrauß StefanScherer GeorgLayher - HolgerHoffmann + HolgerHoffmann Evaluation of the <fixed-case>PIT</fixed-case> Corpus Or What a Difference a Face Makes? http://www.lrec-conf.org/proceedings/lrec2010/pdf/890_Paper.pdf This paper presents the evaluation of the PIT Corpus of multi-party dialogues recorded in a Wizard-of-Oz environment. An evaluation has been performed with two different foci: First, a usability evaluation was used to take a look at the overall ratings of the system. A shortened version of the SASSI questionnaire, namely the SASSISV, and the well established AttrakDiff questionnaire assessing the hedonistic and pragmatic dimension of computer systems have been analysed. In a second evaluation, the user's gaze direction was analysed in order to assess the difference in the user's (gazing) behaviour if interacting with the computer versus the other dialogue partner. Recordings have been performed in different setups of the system, e.g. with and without avatar. Thus, the presented evaluation further focuses on the difference in the interaction caused by deploying an avatar. The quantitative analysis of the gazing behaviour has resulted in several encouraging significant differences. As a possible interpretation it could be argued that users are more attentive towards systems with an avatar - the difference a face makes. @@ -5803,7 +5803,7 @@ LukaNerima - EricWehrli + EricWehrli VioletaSeretan A Recursive Treatment of Collocations http://www.lrec-conf.org/proceedings/lrec2010/pdf/891_Paper.pdf @@ -5821,7 +5821,7 @@ TimoSowa FiorenzaArisio - LucaCristoforetti + LucaCristoforetti <fixed-case>DICIT</fixed-case>: Evaluation of a Distant-talking Speech Interface for Television http://www.lrec-conf.org/proceedings/lrec2010/pdf/894_Paper.pdf The EC-funded project DICIT developed distant-talking interfaces for interactive TV. The final DICIT prototype system processes multimodal user input by speech and remote control. It was designed to understand both natural language and command-and-control-style speech input. We conducted an evaluation campaign to examine the usability and performance of the prototype. The task-oriented evaluation involved naive test persons and consisted of a subjective part with a usability questionnaire and an objective part. We used three groups of objective metrics to assess the system: one group related to speech component performance, one related to interface design and user awareness, and a final group related to task-based effectiveness and usability. These metrics were acquired with a dedicated transcription and annotation tool. The evaluation revealed a quite positive subjective assessments of the system and reasonable objective results. We report how the objective metrics helped us to determine problems in specific areas and to distinguish design-related issues from technical problems. The metrics computed over modality-specific groups also show that speech input gives a usability advantage over remote control for certain types of tasks. @@ -5829,7 +5829,7 @@ ArianneReimerink - Pilar LeónAraúz + Pilar LeónAraúz Pedro J. MagañaRedondo <fixed-case>E</fixed-case>co<fixed-case>L</fixed-case>exicon: An Environmental <fixed-case>TKB</fixed-case> http://www.lrec-conf.org/proceedings/lrec2010/pdf/895_Paper.pdf @@ -5837,7 +5837,7 @@ reimerink-etal-2010-ecolexicon - José JoãoAlmeida + José JoãoAlmeida AndréSantos AlbertoSimões Bigorna – A Toolkit for Orthography Migration Challenges @@ -5847,15 +5847,15 @@ Jan JonaJavoršek - TomažErjavec + TomažErjavec Experimental Deployment of a Grid Virtual Organization for Human Language Technologies http://www.lrec-conf.org/proceedings/lrec2010/pdf/899_Paper.pdf We propose to create a grid virtual organization for human language technologies, at first chiefly with the task of enabling linguistic researches to use existing distributed computing facilities of the European grid infrastructure for more efficient processing of large data sets. After a brief overview of modern grid computing, a number of common use-cases of natural language processing tasks running on the grid are presented, notably corpus annotation with morpho-syntactic tagging (600+ million-word corpus annotated in less than a day), $n$-gram statistics processing of a corpus and creation of grid-backed web-accessible services with annotation and term-extraction as examples. Implementation considerations and common problems of using grid for this type of tasks are laid out. We conclude with an outline of a simple action plan for evolving the infrastructure created for these experiments into a fully functional Human Language Technology grid Virtual Organization with the goal of making the power of European grid infrastructure available to the linguistic community. javorsek-erjavec-2010-experimental - EricCharton - Juan-ManuelTorres-Moreno + EricCharton + Juan-ManuelTorres-Moreno <fixed-case>NLG</fixed-case>b<fixed-case>A</fixed-case>se: A Free Linguistic Resource for Natural Language Processing Systems http://www.lrec-conf.org/proceedings/lrec2010/pdf/900_Paper.pdf Availability of labeled language resources, such as annotated corpora and domain dependent labeled language resources is crucial for experiments in the field of Natural Language Processing. Most often, due to lack of resources, manual verification and annotation of electronic text material is a prerequisite for the development of NLP tools. In the context of under-resourced language, the lack of copora becomes a crucial problem because most of the research efforts are supported by organizations with limited funds. Using free, multilingual and highly structured corpora like Wikipedia to produce automatically labeled language resources can be an answer to those needs. This paper introduces NLGbAse, a multilingual linguistic resource built from the Wikipedia encyclopedic content. This system produces structured metadata which make possible the automatic annotation of corpora with syntactical and semantical labels. A metadata contains semantical and statistical informations related to an encyclopedic document. To validate our approach, we built and evaluated a Named Entity Recognition tool, trained with Wikipedia corpora annotated by our system. @@ -5870,9 +5870,9 @@ bosma-vossen-2010-bootstrapping - Jinho D.Choi - ClaireBonial - MarthaPalmer + Jinho D.Choi + ClaireBonial + MarthaPalmer <fixed-case>P</fixed-case>ropbank Instance Annotation Guidelines Using a Dedicated Editor, Jubilee http://www.lrec-conf.org/proceedings/lrec2010/pdf/903_Paper.pdf This paper gives guidelines of how to annotate Propbank instances using a dedicated editor, Jubilee. Propbank is a corpus in which the arguments of each verb predicate are annotated with their semantic roles in relation to the predicate. Propbank annotation also requires the choice of a sense ID for each predicate. Jubilee facilitates this annotation process by displaying several resources of syntactic and semantic information simultaneously: the syntactic structure of a sentence is displayed in the main frame, the available senses with their corresponding argument structures are displayed in another frame, all available Propbank arguments are displayed for the annotators choice, and example annotations of each sense of the predicate are available to the annotator for viewing. Easy access to each of these resources allows the annotator to quickly absorb and apply the necessary syntactic and semantic information pertinent to each predicate for consistent and efficient annotation. Jubilee has been successfully adapted to many Propbank projects in several universities. The tool runs platform independently, is light enough to run as an X11 application and supports multiple languages such as Arabic, Chinese, English, Hindi and Korean. @@ -5894,11 +5894,11 @@ nabende-2010-applying - AlexandraBalahur + AlexandraBalahur RalfSteinberger - MijailKabadjov + MijailKabadjov VanniZavarella - Erikvan der Goot + Erikvan der Goot MatinaHalkia BrunoPouliquen JenyaBelyaeva @@ -5923,11 +5923,11 @@ strunk-2010-enriching - Claudede Loupy + Claudede Loupy MarieGuégan - ChristelleAyache + ChristelleAyache SomaraSeng - Juan-Manuel TorresMoreno + Juan-Manuel TorresMoreno A <fixed-case>F</fixed-case>rench Human Reference Corpus for Multi-Document Summarization and Sentence Compression http://www.lrec-conf.org/proceedings/lrec2010/pdf/919_Paper.pdf This paper presents two corpora produced within the RPM2 project: a multi-document summarization corpus and a sentence compression corpus. Both corpora are in French. The first one is the only one we know in this language. It contains 20 topics with 20 documents each. A first set of 10 documents per topic is summarized and then the second set is used to produce an update summarization (new information). 4 annotators were involved and produced a total of 160 abstracts. The second corpus contains all the sentences of the first one. 4 annotators were asked to compress the 8432 sentences. This is the biggest corpus of compressed sentences we know, whatever the language. The paper provides some figures in order to compare the different annotators: compression rates, number of tokens per sentence, percentage of tokens kept according to their POS, position of dropped tokens in the sentence compression phase, etc. These figures show important differences from an annotator to the other. Another point is the different strategies of compression used according to the length of the sentence. @@ -5936,17 +5936,17 @@ FeiXia CarrieLewis - William D.Lewis + William D.Lewis The Problems of Language Identification within Hugely Multilingual Data Sets http://www.lrec-conf.org/proceedings/lrec2010/pdf/921_Paper.pdf As the data for more and more languages is finding its way into digital form, with an increasing amount of this data being posted to the Web, it has become possible to collect language data from the Web and create large multilingual resources, covering hundreds or even thousands of languages. ODIN, the Online Database of INterlinear text (Lewis, 2006), is such a resource. It currently consists of nearly 200,000 data points for over 1,000 languages, the data for which was harvested from linguistic documents on the Web. We identify a number of issues with language identification for such broad-coverage resources including the lack of training data, ambiguous language names, incomplete language code sets, and incorrect uses of language names and codes. After providing a short overview of existing language code sets maintained by the linguistic community, we discuss what linguists and the linguistic community can do to make the process of language identification easier. xia-etal-2010-problems - Rebecca J.Passonneau - AnsafSalleb-Aoussi + Rebecca J.Passonneau + AnsafSalleb-Aoussi VikasBhardwaj - NancyIde + NancyIde Word Sense Annotation of Polysemous Words by Multiple Annotators http://www.lrec-conf.org/proceedings/lrec2010/pdf/922_Paper.pdf We describe results of a word sense annotation task using WordNet, involving half a dozen well-trained annotators on ten polysemous words for three parts of speech. One hundred sentences for each word were annotated. Annotators had the same level of training and experience, but interannotator agreement (IA) varied across words. There was some effect of part of speech, with higher agreement on nouns and adjectives, but within the words for each part of speech there was wide variation. This variation in IA does not correlate with number of senses in the inventory, or the number of senses actually selected by annotators. In fact, IA was sometimes quite high for words with many senses. We claim that the IA variation is due to the word meanings, contexts of use, and individual differences among annotators. We find some correlation of IA with sense confusability as measured by a sense confusion threshhold (CT). Data mining for association rules on a flattened data representation indicating each annotator's sense choices identifies outliers for some words, and systematic differences among pairs of annotators on others. @@ -5960,17 +5960,17 @@ gasser-2010-expanding - Susan WindischBrown + Susan WindischBrown TravisRood - MarthaPalmer + MarthaPalmer Number or Nuance: Which Factors Restrict Reliable Word Sense Annotation? http://www.lrec-conf.org/proceedings/lrec2010/pdf/927_Paper.pdf This study attempts to pinpoint the factors that restrict reliable word sense annotation, focusing on the influence of the number of senses annotators use and the semantic granularity of those senses. Both of these factors may be possible causes of low interannotator agreement (ITA) when tagging with fine-grained word senses, and, consequently, low WSD system performance (Ng et al., 1999; Snyder & Palmer, 2004; Chklovski & Mihalcea, 2002). If number of senses is the culprit, modifying the task to show fewer senses at a time could improve annotator reliability. However, if overly nuanced distinctions are the problem, then more general, coarse-grained distinctions may be necessary for annotator success and may be all that is needed to supply systems with the types of distinctions that people make. We describe three experiments that explore the role of these factors in annotation performance. Our results indicate that of these two factors, only the granularity of the senses restricts interannotator agreement, with broader senses resulting in higher annotation reliability. brown-etal-2010-number - Joshua B.Gordon - Rebecca J.Passonneau + Joshua B.Gordon + Rebecca J.Passonneau An Evaluation Framework for Natural Language Understanding in Spoken Dialogue Systems http://www.lrec-conf.org/proceedings/lrec2010/pdf/928_Paper.pdf We present an evaluation framework to enable developers of information seeking, transaction based spoken dialogue systems to compare the robustness of natural language understanding (NLU) approaches across varying levels of word error rate and contrasting domains. We develop statistical and semantic parsing based approaches to dialogue act identification and concept retrieval. Voice search is used in each approach to ultimately query the database. Included in the framework is a method for developers to bootstrap a representative pseudo-corpus, which is used to estimate NLU performance in a new domain. We illustrate the relative merits of these NLU techniques by contrasting our statistical NLU approach with a semantic parsing method over two contrasting applications, our CheckItOut library system and the deployed Let’s Go Public! system, across four levels of word error rate. We find that with respect to both dialogue act identification and concept retrieval, our statistical NLU approach is more likely to robustly accommodate the freer form, less constrained utterances of CheckItOut at higher word error rates than is possible with semantic parsing. @@ -5995,15 +5995,15 @@ RashmiPrasad - AravindJoshi - BonnieWebber + AravindJoshi + BonnieWebber Exploiting Scope for Shallow Discourse Parsing http://www.lrec-conf.org/proceedings/lrec2010/pdf/935_Paper.pdf We present an approach to automatically identifying the arguments of discourse connectives based on data from the Penn Discourse Treebank. Of the two arguments of connectives, called Arg1 and Arg2, we focus on Arg1, which has proven more challenging to identify. Our approach employs a sentence-based representation of arguments, and distinguishes ""intra-sentential connectives"", which take both their arguments in the same sentence, from ""inter-sentential connectives"", whose arguments are found in different sentences. The latter are further distinguished by paragraph position into ""ParaInit"" connectives, which appear in a paragraph-initial sentence, and ""ParaNonInit"" connectives, which appear elsewhere. The paper focusses on predicting Arg1 of Inter-sentential ParaNonInit connectives, presenting a set of scope-based filters that reduce the search space for Arg1 from all the previous sentences in the paragraph to a subset of them. For cases where these filters do not uniquely identify Arg1, coreference-based heuristics are employed. Our analysis shows an absolute 3% performance improvement over the high baseline of 83.3% for identifying Arg1 of Inter-sentential ParaNonInit connectives. prasad-etal-2010-exploiting - PushpakBhattacharyya + PushpakBhattacharyya <fixed-case>I</fixed-case>ndo<fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et http://www.lrec-conf.org/proceedings/lrec2010/pdf/939_Paper.pdf India is a multilingual country where machine translation and cross lingual search are highly relevant problems. These problems require large resources- like wordnets and lexicons- of high quality and coverage. Wordnets are lexical structures composed of synsets and semantic relations. Synsets are sets of synonyms. They are linked by semantic relations like hypernymy (is-a), meronymy (part-of), troponymy (manner-of) etc. IndoWordnet is a linked structure of wordnets of major Indian languages from Indo-Aryan, Dravidian and Sino-Tibetan families. These wordnets have been created by following the expansion approach from Hindi wordnet which was made available free for research in 2006. Since then a number of Indian languages have been creating their wordnets. In this paper we discuss the methodology, coverage, important considerations and multifarious benefits of IndoWordnet. Case studies are provided for Marathi, Sanskrit, Bodo and Telugu, to bring out the basic methodology of and challenges involved in the expansion approach. The guidelines the lexicographers follow for wordnet construction are enumerated. The difference between IndoWordnet and EuroWordnet also is discussed. @@ -6012,8 +6012,8 @@ KirkRoberts SrikanthGullapalli - Cosmin AdrianBejan - SandaHarabagiu + Cosmin AdrianBejan + SandaHarabagiu A Linguistic Resource for Semantic Parsing of Motion Events http://www.lrec-conf.org/proceedings/lrec2010/pdf/941_Paper.pdf This paper presents a corpus of annotated motion events and their event structure. We consider motion events triggered by a set of motion evoking words and contemplate both literal and figurative interpretations of them. Figurative motion events are extracted into the same event structure but are marked as figurative in the corpus. To represent the event structure of motion, we use the FrameNet annotation standard, which encodes motion in over 70 frames. In order to acquire a diverse set of texts that are different from FrameNet's, we crawled blog and news feeds for five different domains: sports, newswire, finance, military, and gossip. We then annotated these documents with an automatic FrameNet parser. Its output was manually corrected to account for missing and incorrect frames as well as missing and incorrect frame elements. The corpus, UTD-MotionEvent, may act as a resource for semantic parsing, detection of figurative language, spatial reasoning, and other tasks. @@ -6040,7 +6040,7 @@ ZygmuntVetulani MarekKubis - TomaszObrębski + TomaszObrębski <fixed-case>P</fixed-case>ol<fixed-case>N</fixed-case>et — <fixed-case>P</fixed-case>olish <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et: Data and Tools http://www.lrec-conf.org/proceedings/lrec2010/pdf/947_Paper.pdf This paper presents the PolNet-Polish WordNet project which aims at building a linguistically oriented ontology for Polish compatible with other WordNet projects such as Princeton WordNet, EuroWordNet and other similarly organized ontologies. The main idea behind this kind of ontologies is to use words related by synonymy to construct formal representations of concepts. In the paper we sketch the PolNet project methodology and implementation. We present data obtained so far, as well as the WQuery tool for querying and maintaining PolNet. WQuery is a query language that make use of data types based on synsets, word senses and various semantic relations which occur in wordnet-like lexical databases. The tool is particularly useful to deal with complex querying tasks like searching for cycles in semantic relations, finding isolated synsets or computing overall statistics. Both data and tools presented in this paper have been applied within an advanced AI system POLINT-112-SMS with emulated natural language competence, where they are used in the understanding subsystem. @@ -6056,21 +6056,21 @@ Youssef AïtOuguengay - AïchaBouhjar + AïchaBouhjar For Standardised <fixed-case>A</fixed-case>mazigh Linguistic Resources http://www.lrec-conf.org/proceedings/lrec2010/pdf/949_Paper.pdf Amazigh language and culture may well be viewed to have known an unprecedented booming in Morocco : more than a hundred- which are published by the Royal Institute of Amazigh Culture (IRCAM), an institution created in 2001 to preserve, promote and endorse Amazigh culture in all its dimensions. Crucially, publications in the Amazigh language would not have seen light without the valiant attempts to upgrade the language on the linguistic and technological levels. The central thrust of this contribution is to provide a vista about the whole range of actions carried out by IRCAM. Of prime utility to this presentation is what was accomplished to supply Amazigh with the necessary tools and corpora without which the Amazigh language would emphatically fail to have a place in the world of NITCs. After a brief description of the prime specificities that characterise the standardisation of Amazigh in Morocco, a retrospective on the basic computer tools now available for the processing of Amazigh will be set out. It is concluded that the homogenisation of a considerable number of corpora should, by right, be viewed as a strategic move and an incontrovertible prerequisite to the computerisation of Amazigh, ouguengay-bouhjar-2010-standardised - ChristopherCieri + ChristopherCieri KhalidChoukri NicolettaCalzolari - D. TerenceLangendoen + D. TerenceLangendoen JohannesLeveling - MarthaPalmer - NancyIde - JamesPustejovsky + MarthaPalmer + NancyIde + JamesPustejovsky A Road Map for Interoperable Language Resource Metadata http://www.lrec-conf.org/proceedings/lrec2010/pdf/951_Paper.pdf LRs remain expensive to create and thus rare relative to demand across languages and technology types. The accidental re-creation of an LR that already exists is a nearly unforgivable waste of scarce resources that is unfortunately not so easy to avoid. The number of catalogs the HLT researcher must search, with their different formats, make it possible to overlook an existing resource. This paper sketches the sources of this problem and outlines a proposal to rectify along with a new vision of LR cataloging that will to facilitates the documentation and exploitation of a much wider range of LRs than previously considered. @@ -6094,8 +6094,8 @@ gishri-etal-2010-lexicon - ChristopherCieri - MarkLiberman + ChristopherCieri + MarkLiberman Adapting to Trends in Language Resource Development: A Progress Report on <fixed-case>LDC</fixed-case> Activities http://www.lrec-conf.org/proceedings/lrec2010/pdf/954_Paper.pdf This paper describes changing needs among the communities that exploit language resources and recent LDC activities and publications that support those needs by providing greater volumes of data and associated resources in a growing inventory of languages with ever more sophisticated annotation. Specifically, it covers the evolving role of data centers with specific emphasis on the LDC, the publications released by the LDC in the two years since our last report and the sponsored research programs that provide LRs initially to participants in those programs but eventually to the larger HLT research communities and beyond. diff --git a/data/xml/L12.xml b/data/xml/L12.xml index f6d04bbd82..22b7e928d9 100644 --- a/data/xml/L12.xml +++ b/data/xml/L12.xml @@ -3,15 +3,15 @@ Proceedings of the Eighth International Conference on Language Resources and Evaluation (LREC'12) - NicolettaCalzolari - KhalidChoukri + NicolettaCalzolari + KhalidChoukri ThierryDeclerck Mehmet UğurDoğan - BenteMaegaard - JosephMariani - AsuncionMoreno - JanOdijk - SteliosPiperidis + BenteMaegaard + JosephMariani + AsuncionMoreno + JanOdijk + SteliosPiperidis European Language Resources Association (ELRA)
Istanbul, Turkey
May @@ -22,7 +22,7 @@ lrec-2012-international - KristiinaJokinen + KristiinaJokinen SilviTenjes Investigating Engagement - intercultural and technological aspects of the collection, analysis, and use of the <fixed-case>E</fixed-case>stonian Multiparty Conversational video data 2764–2769 @@ -47,8 +47,8 @@ burkhardt-2012-fast - PeterSpyns - ElisabethD’Halleweyn + PeterSpyns + ElisabethD’Halleweyn Smooth Sailing for <fixed-case>STEVIN</fixed-case> 1021–1028 http://www.lrec-conf.org/proceedings/lrec2012/pdf/112_Paper.pdf @@ -65,7 +65,7 @@ stein-usabaev-2012-automatic - XabierSaralegi + XabierSaralegi IkerManterola IñakiSan Vicente Building a <fixed-case>B</fixed-case>asque-<fixed-case>C</fixed-case>hinese Dictionary by Using <fixed-case>E</fixed-case>nglish as Pivot @@ -84,7 +84,7 @@ tang-chen-2012-mining - JohankaSpoustová + JohankaSpoustová MiroslavSpousta A High-Quality Web Corpus of <fixed-case>C</fixed-case>zech 311–315 @@ -102,7 +102,7 @@ DianaMaynard - Mark A.Greenwood + Mark A.Greenwood Large Scale Semantic Annotation, Indexing and Search at The National Archives 3487–3494 http://www.lrec-conf.org/proceedings/lrec2012/pdf/122_Paper.pdf @@ -110,8 +110,8 @@ maynard-greenwood-2012-large - Abdul-BaqueeSharaf - EricAtwell + Abdul-BaqueeSharaf + EricAtwell <fixed-case>Q</fixed-case>ur<fixed-case>A</fixed-case>na: Corpus of the <fixed-case>Q</fixed-case>uran annotated with Pronominal Anaphora 130–137 http://www.lrec-conf.org/proceedings/lrec2012/pdf/123_Paper.pdf @@ -120,7 +120,7 @@ ChristianScheible - HinrichSchütze + HinrichSchütze Bootstrapping Sentiment Labels For Unannotated Documents With Polarity <fixed-case>P</fixed-case>age<fixed-case>R</fixed-case>ank 1230–1234 http://www.lrec-conf.org/proceedings/lrec2012/pdf/124_Paper.pdf @@ -145,9 +145,9 @@ IñakiSainz DanielErro - EvaNavas - InmaHernáez - JonSanchez + EvaNavas + InmaHernáez + JonSanchez IbonSaratxaga IgorOdriozola Versatile Speech Databases for High Quality Synthesis for <fixed-case>B</fixed-case>asque @@ -157,10 +157,10 @@ sainz-etal-2012-versatile - HectorLlorens - LeonDerczynski - RobertGaizauskas - EstelaSaquete + HectorLlorens + LeonDerczynski + RobertGaizauskas + EstelaSaquete <fixed-case>TIMEN</fixed-case>: An Open Temporal Expression Normalisation Resource 3044–3051 http://www.lrec-conf.org/proceedings/lrec2012/pdf/128_Paper.pdf @@ -177,7 +177,7 @@ brooke-hirst-2012-measuring - PatrickSaint-Dizier + PatrickSaint-Dizier <fixed-case>DISLOG</fixed-case>: A logic-based language for processing discourse structures 2770–2777 http://www.lrec-conf.org/proceedings/lrec2012/pdf/130_Paper.pdf @@ -198,7 +198,7 @@ SarahBourse - PatrickSaint-Dizier + PatrickSaint-Dizier A Repository of Rules and Lexical Resources for Discourse Structure Analysis: the Case of Explanation Structures 2778–2785 http://www.lrec-conf.org/proceedings/lrec2012/pdf/137_Paper.pdf @@ -209,7 +209,7 @@ FloreBarcellini CamilleAlbert CorinneGrosse - PatrickSaint-Dizier + PatrickSaint-Dizier Risk Analysis and Prevention: <fixed-case>LELIE</fixed-case>, a Tool dedicated to Procedure and Requirement Authoring 698–705 http://www.lrec-conf.org/proceedings/lrec2012/pdf/139_Paper.pdf @@ -244,8 +244,8 @@ patejuk-przepiorkowski-2012-towards - KristiinaJokinen - GrahamWilcock + KristiinaJokinen + GrahamWilcock Constructive Interaction for Talking about Interesting Topics 404–410 http://www.lrec-conf.org/proceedings/lrec2012/pdf/151_Paper.pdf @@ -255,8 +255,8 @@ HilderPereira EderNovais - AndréMariotti - IvandréParaboni + AndréMariotti + IvandréParaboni Corpus-based Referring Expressions Generation 4004–4009 http://www.lrec-conf.org/proceedings/lrec2012/pdf/152_Paper.pdf @@ -265,7 +265,7 @@ EderNovais - IvandréParaboni + IvandréParaboni DouglasSilva <fixed-case>P</fixed-case>ortuguese Text Generation from Large Corpora 4010–4014 @@ -275,11 +275,11 @@ VolhaPetukhova - RodrigoAgerri + RodrigoAgerri MarkFishel SergioPenkale Arantzadel Pozo - Mirjam SepesyMaučec + Mirjam SepesyMaučec AndyWay PanayotaGeorgakopoulou MartinVolk @@ -312,9 +312,9 @@ LieveMacken - VeroniqueHoste - MariëlleLeijten - LuukVan Waes + VeroniqueHoste + MariëlleLeijten + LuukVan Waes From keystrokes to annotated process data: Enriching the output of Inputlog with linguistic information 2224–2229 http://www.lrec-conf.org/proceedings/lrec2012/pdf/161_Paper.pdf @@ -323,7 +323,7 @@ VerenaHenrich - ErhardHinrichs + ErhardHinrichs A Comparative Evaluation of Word Sense Disambiguation Algorithms for <fixed-case>G</fixed-case>erman 576–583 http://www.lrec-conf.org/proceedings/lrec2012/pdf/164_Paper.pdf @@ -344,7 +344,7 @@ varges-etal-2012-semscribe - ErhardHinrichs + ErhardHinrichs ThomasZastrow Automatic Annotation and Manual Evaluation of the Diachronic <fixed-case>G</fixed-case>erman Corpus <fixed-case>T</fixed-case>ü<fixed-case>B</fixed-case>a-<fixed-case>D</fixed-case>/<fixed-case>DC</fixed-case> 1622–1627 @@ -362,9 +362,9 @@ joubert-lafourcade-2012-new - HongsuckSeo + HongsuckSeo KyusongLee - Gary GeunbaeLee + Gary GeunbaeLee Soo-OkKweon Hae-RiKim Grammatical Error Annotation for <fixed-case>K</fixed-case>orean Learners of Spoken <fixed-case>E</fixed-case>nglish @@ -405,7 +405,7 @@ MartaTatu - DanMoldovan + DanMoldovan A Tool for Extracting Conversational Implicatures 2708–2715 http://www.lrec-conf.org/proceedings/lrec2012/pdf/175_Paper.pdf @@ -413,7 +413,7 @@ tatu-moldovan-2012-tool - DanMoldovan + DanMoldovan EduardoBlanco <fixed-case>P</fixed-case>olaris: Lymba’s Semantic Parser 66–72 @@ -450,7 +450,7 @@ VolhaPetukhova - HarryBunt + HarryBunt The coding and annotation of multimodal dialogue acts 1293–1300 http://www.lrec-conf.org/proceedings/lrec2012/pdf/180_Paper.pdf @@ -496,8 +496,8 @@ PiekVossen AttilaGörög - RubénIzquierdo - Antalvan den Bosch + RubénIzquierdo + Antalvan den Bosch <fixed-case>D</fixed-case>utch<fixed-case>S</fixed-case>em<fixed-case>C</fixed-case>or: Targeting the ideal sense-tagged corpus 584–589 http://www.lrec-conf.org/proceedings/lrec2012/pdf/187_Paper.pdf @@ -514,8 +514,8 @@ cartoni-meyer-2012-extracting - Abdul-BaqueeSharaf - EricAtwell + Abdul-BaqueeSharaf + EricAtwell <fixed-case>Q</fixed-case>ur<fixed-case>S</fixed-case>im: A corpus for evaluation of relatedness in short texts 2295–2302 http://www.lrec-conf.org/proceedings/lrec2012/pdf/190_Paper.pdf @@ -546,7 +546,7 @@ JyrkiNiemi - KristerLindén + KristerLindén Representing the Translation Relation in a Bilingual <fixed-case>W</fixed-case>ordnet 2439–2446 http://www.lrec-conf.org/proceedings/lrec2012/pdf/194_Paper.pdf @@ -554,7 +554,7 @@ niemi-linden-2012-representing - Marianna J.Martindale + Marianna J.Martindale Can Statistical Post-Editing with a Small Parallel Corpus Save a Weak <fixed-case>MT</fixed-case> Engine? 2138–2142 http://www.lrec-conf.org/proceedings/lrec2012/pdf/196_Paper.pdf @@ -562,7 +562,7 @@ martindale-2012-statistical - GülşenEryiğit + GülşenEryiğit The Impact of Automatic Morphological Analysis & Disambiguation on Dependency Parsing of <fixed-case>T</fixed-case>urkish 1960–1965 http://www.lrec-conf.org/proceedings/lrec2012/pdf/198_Paper.pdf @@ -594,7 +594,7 @@ Michael A.Roach JosephJohnson JoshGuthrie - Sanda M.Harabagiu + Sanda M.Harabagiu <fixed-case>E</fixed-case>mpa<fixed-case>T</fixed-case>weet: Annotating and Detecting Emotions on <fixed-case>T</fixed-case>witter 3806–3813 http://www.lrec-conf.org/proceedings/lrec2012/pdf/201_Paper.pdf @@ -643,7 +643,7 @@ GenevièveCaelen-Haumont - SethsereySam + SethsereySam Comparison between two models of language for the automatic phonetic labeling of an undocumented language of the <fixed-case>S</fixed-case>outh-<fixed-case>A</fixed-case>sia: the case of <fixed-case>M</fixed-case>o <fixed-case>P</fixed-case>iu 956–962 http://www.lrec-conf.org/proceedings/lrec2012/pdf/208_Paper.pdf @@ -653,7 +653,7 @@ CristinaBosco ManuelaSanguinetti - LeonardoLesmo + LeonardoLesmo The Parallel-<fixed-case>TUT</fixed-case>: a multilingual and multiformat treebank 1932–1938 http://www.lrec-conf.org/proceedings/lrec2012/pdf/209_Paper.pdf @@ -683,7 +683,7 @@ MarkusForsberg - TorbjörnLager + TorbjörnLager Cloud Logic Programming for Integrating Language Technology Resources 2935–2940 http://www.lrec-conf.org/proceedings/lrec2012/pdf/212_Paper.pdf @@ -691,7 +691,7 @@ forsberg-lager-2012-cloud - FabioTamburini + FabioTamburini MatiasMelandri <fixed-case>A</fixed-case>n<fixed-case>I</fixed-case>ta: a powerful morphological analyser for <fixed-case>I</fixed-case>talian 941–947 @@ -701,8 +701,8 @@ SylviaSpringorum - SabineSchulte im Walde - AntjeRoßdeutscher + SabineSchulte im Walde + AntjeRoßdeutscher Automatic classification of <fixed-case>G</fixed-case>erman <i>an</i> particle verbs 73–80 http://www.lrec-conf.org/proceedings/lrec2012/pdf/214_Paper.pdf @@ -710,10 +710,10 @@ springorum-etal-2012-automatic - RobertaCatizone - LouiseGuthrie + RobertaCatizone + LouiseGuthrie ArthurThomas - YorickWilks + YorickWilks <fixed-case>LIE</fixed-case>: Leadership, Influence and Expertise 3692–3696 http://www.lrec-conf.org/proceedings/lrec2012/pdf/215_Paper.pdf @@ -721,9 +721,9 @@ catizone-etal-2012-lie - ValentinaBartalesi Lenzi + ValentinaBartalesi Lenzi GiovanniMoretti - RacheleSprugnoli + RacheleSprugnoli <fixed-case>CAT</fixed-case>: the <fixed-case>CELCT</fixed-case> Annotation Tool 333–338 http://www.lrec-conf.org/proceedings/lrec2012/pdf/216_Paper.pdf @@ -734,7 +734,7 @@ YulanHe HassanSaif ZhongyuWei - Kam-FaiWong + Kam-FaiWong Quantising Opinions for Political Tweets Analysis 3901–3906 http://www.lrec-conf.org/proceedings/lrec2012/pdf/217_Paper.pdf @@ -744,8 +744,8 @@ RaduIon ElenaIrimia - DanŞtefănescu - DanTufiș + DanŞtefănescu + DanTufiș <fixed-case>ROMBAC</fixed-case>: The <fixed-case>R</fixed-case>omanian Balanced Annotated Corpus 339–344 http://www.lrec-conf.org/proceedings/lrec2012/pdf/218_Paper.pdf @@ -766,8 +766,8 @@ konstantopoulos-etal-2012-task - IsmaïlEl Maarouf - JeanneVillaneau + IsmaïlEl Maarouf + JeanneVillaneau A <fixed-case>F</fixed-case>rench Fairy Tale Corpus syntactically and semantically annotated 345–350 http://www.lrec-conf.org/proceedings/lrec2012/pdf/220_Paper.pdf @@ -776,7 +776,7 @@ RoserMorante - WalterDaelemans + WalterDaelemans <fixed-case>C</fixed-case>onan<fixed-case>D</fixed-case>oyle-neg: Annotation of negation cues and their scope in Conan Doyle stories 1563–1568 http://www.lrec-conf.org/proceedings/lrec2012/pdf/221_Paper.pdf @@ -796,8 +796,8 @@ ploch-etal-2012-gerned - SilviaVázquez - NúriaBel + SilviaVázquez + NúriaBel A Classification of Adjectives for Polarity Lexicons Enhancement 3557–3561 http://www.lrec-conf.org/proceedings/lrec2012/pdf/223_Paper.pdf @@ -805,9 +805,9 @@ vazquez-bel-2012-classification - Héctor MartínezAlonso - NúriaBel - Bolette SandfordPedersen + Héctor MartínezAlonso + NúriaBel + Bolette SandfordPedersen A voting scheme to detect semantic underspecification 569–575 http://www.lrec-conf.org/proceedings/lrec2012/pdf/225_Paper.pdf @@ -817,7 +817,7 @@ Aditi SharmaGrover AnnamartNieman - GerhardVan Huyssteen + GerhardVan Huyssteen JustusRoux Aspects of a Legal Framework for Language Resource Management 1035–1039 @@ -836,7 +836,7 @@ polakova-etal-2012-interplay - Heiki-JaanKaalep + Heiki-JaanKaalep KadriMuischnek Robust clause boundary identification for corpus annotation 1632–1636 @@ -894,9 +894,9 @@ seeker-kuhn-2012-making - Jorge Carrillode Albornoz + Jorge Carrillode Albornoz LauraPlaza - PabloGervás + PabloGervás <fixed-case>S</fixed-case>enti<fixed-case>S</fixed-case>ense: An easily scalable concept-based affective lexicon for sentiment analysis 3562–3567 http://www.lrec-conf.org/proceedings/lrec2012/pdf/236_Paper.pdf @@ -915,7 +915,7 @@ MajdiSawalha ClaireBrierley - EricAtwell + EricAtwell Predicting Phrase Breaks in Classical and <fixed-case>M</fixed-case>odern <fixed-case>S</fixed-case>tandard <fixed-case>A</fixed-case>rabic Text 3868–3872 http://www.lrec-conf.org/proceedings/lrec2012/pdf/239_Paper.pdf @@ -925,7 +925,7 @@ ClaireBrierley MajdiSawalha - EricAtwell + EricAtwell Open-Source Boundary-Annotated Corpus for <fixed-case>A</fixed-case>rabic Speech and Language Processing 1011–1016 http://www.lrec-conf.org/proceedings/lrec2012/pdf/240_Paper.pdf @@ -946,7 +946,7 @@ PhilipWebster VictoriaUren AndreaVarga - FabioCiravegna + FabioCiravegna Automatically Extracting Procedural Knowledge from Instructional Texts using Natural Language Processing 520–527 http://www.lrec-conf.org/proceedings/lrec2012/pdf/244_Paper.pdf @@ -963,7 +963,7 @@ FranciscoCosta - AntónioBranco + AntónioBranco <fixed-case>T</fixed-case>ime<fixed-case>B</fixed-case>ank<fixed-case>PT</fixed-case>: A <fixed-case>T</fixed-case>ime<fixed-case>ML</fixed-case> Annotated Corpus of <fixed-case>P</fixed-case>ortuguese 3727–3734 http://www.lrec-conf.org/proceedings/lrec2012/pdf/246_Paper.pdf @@ -972,7 +972,7 @@ SanniNimb - Bolette SandfordPedersen + Bolette SandfordPedersen Towards a richer wordnet representation of properties 3452–3456 http://www.lrec-conf.org/proceedings/lrec2012/pdf/247_Paper.pdf @@ -1011,7 +1011,7 @@ kulick-etal-2012-developments - ChrisBiemann + ChrisBiemann Turk Bootstrap Word Sense Inventory 2.0: A Large-Scale Resource for Lexical Substitution 4038–4042 http://www.lrec-conf.org/proceedings/lrec2012/pdf/252_Paper.pdf @@ -1029,7 +1029,7 @@ alazard-etal-2012-multiphonia - AndreiPopescu-Belis + AndreiPopescu-Belis ThomasMeyer JeevanthiLiyanapathirana BrunoCartoni @@ -1042,7 +1042,7 @@ BonanMin - RalphGrishman + RalphGrishman Challenges in the Knowledge Base Population Slot Filling Task 1137–1142 http://www.lrec-conf.org/proceedings/lrec2012/pdf/256_Paper.pdf @@ -1051,7 +1051,7 @@ AlessandraZarcone - StefanRued + StefanRued Logical metonymies and qualia structures: an annotated database of logical metonymies for <fixed-case>G</fixed-case>erman 1799–1804 http://www.lrec-conf.org/proceedings/lrec2012/pdf/259_Paper.pdf @@ -1061,7 +1061,7 @@ Martina KatalinSzabó VeronikaVincze - IstvánNagy T. + IstvánNagy T. <fixed-case>H</fixed-case>un<fixed-case>O</fixed-case>r: A <fixed-case>H</fixed-case>ungarian—<fixed-case>R</fixed-case>ussian Parallel Corpus 2453–2458 http://www.lrec-conf.org/proceedings/lrec2012/pdf/262_Paper.pdf @@ -1081,7 +1081,7 @@ ElenaVolodina - Sofie JohanssonKokkinakis + Sofie JohanssonKokkinakis Introducing the <fixed-case>S</fixed-case>wedish Kelly-list, a new lexical e-resource for <fixed-case>S</fixed-case>wedish 1040–1046 http://www.lrec-conf.org/proceedings/lrec2012/pdf/264_Paper.pdf @@ -1089,8 +1089,8 @@ volodina-kokkinakis-2012-introducing - Valentin I.Spitkovsky - Angel X.Chang + Valentin I.Spitkovsky + Angel X.Chang A Cross-Lingual Dictionary for <fixed-case>E</fixed-case>nglish <fixed-case>W</fixed-case>ikipedia Concepts 3168–3175 http://www.lrec-conf.org/proceedings/lrec2012/pdf/266_Paper.pdf @@ -1099,7 +1099,7 @@ MartinMajliš - ZdeněkŽabokrtský + ZdeněkŽabokrtský Language Richness of the Web 2927–2934 http://www.lrec-conf.org/proceedings/lrec2012/pdf/267_Paper.pdf @@ -1127,7 +1127,7 @@ SigridKlerke - AndersSøgaard + AndersSøgaard <fixed-case>DS</fixed-case>im, a <fixed-case>D</fixed-case>anish Parallel Corpus for Text Simplification 4015–4018 http://www.lrec-conf.org/proceedings/lrec2012/pdf/270_Paper.pdf @@ -1135,8 +1135,8 @@ klerke-sogaard-2012-dsim - Magali SanchesDuran - Sandra MariaAluísio + Magali SanchesDuran + Sandra MariaAluísio <fixed-case>P</fixed-case>ropbank-Br: a <fixed-case>B</fixed-case>razilian Treebank annotated with semantic role labels 1862–1867 http://www.lrec-conf.org/proceedings/lrec2012/pdf/272_Paper.pdf @@ -1157,7 +1157,7 @@ AlineVillavicencio BeracahYankama MarcoIdiart - RobertBerwick + RobertBerwick A large scale annotated child language construction database 2370–2374 http://www.lrec-conf.org/proceedings/lrec2012/pdf/276_Paper.pdf @@ -1166,10 +1166,10 @@ XuansongLi - StephanieStrassel + StephanieStrassel StephenGrimes SafaIsmael - MohamedMaamouri + MohamedMaamouri AnnBies NianwenXue Parallel Aligned Treebanks at <fixed-case>LDC</fixed-case>: New Challenges Interfacing Existing Infrastructures @@ -1180,7 +1180,7 @@ XuansongLi - StephanieStrassel + StephanieStrassel HengJi KiraGriffitt JoeEllis @@ -1205,7 +1205,7 @@ ShotaYamasaki HirohisaFurukawa MasafumiNishida - KristiinaJokinen + KristiinaJokinen SeiichiYamamoto Multimodal Corpus of Multi-party Conversations in Second Language 416–421 @@ -1214,7 +1214,7 @@ yamasaki-etal-2012-multimodal - SatoshiSato + SatoshiSato Dictionary Look-up with Katakana Variant Recognition 249–255 http://www.lrec-conf.org/proceedings/lrec2012/pdf/282_Paper.pdf @@ -1222,8 +1222,8 @@ sato-2012-dictionary - Angel X.Chang - ChristopherManning + Angel X.Chang + ChristopherManning <fixed-case>SUT</fixed-case>ime: A library for recognizing and normalizing time expressions 3735–3740 http://www.lrec-conf.org/proceedings/lrec2012/pdf/284_Paper.pdf @@ -1245,7 +1245,7 @@ EleanorClark - KenjiAraki + KenjiAraki Two Database Resources for Processing Social Media <fixed-case>E</fixed-case>nglish Text 3790–3793 http://www.lrec-conf.org/proceedings/lrec2012/pdf/288_Paper.pdf @@ -1255,7 +1255,7 @@ MaristellaAgosti BirgitAlber - Giorgio MariaDi Nunzio + Giorgio MariaDi Nunzio MarcoDussin StefanRabanus AlessandraTomaselli @@ -1266,7 +1266,7 @@ agosti-etal-2012-curated - AlexandrosPapangelis + AlexandrosPapangelis VangelisKarkaletsis FilliaMakedon Evaluation of Online Dialogue Policy Learning Techniques @@ -1278,11 +1278,11 @@ AnoopKunchukuttan ShouryaRoy - PratikPatel + PratikPatel KushalLadha SomyaGupta - Mitesh M.Khapra - PushpakBhattacharyya + Mitesh M.Khapra + PushpakBhattacharyya Experiences in Resource Generation for Machine Translation through Crowdsourcing 384–391 http://www.lrec-conf.org/proceedings/lrec2012/pdf/292_Paper.pdf @@ -1290,9 +1290,9 @@ kunchukuttan-etal-2012-experiences - AitorGonzalez-Agirre + AitorGonzalez-Agirre EgoitzLaparra - GermanRigau + GermanRigau Multilingual Central Repository version 3.0 2525–2529 http://www.lrec-conf.org/proceedings/lrec2012/pdf/293_Paper.pdf @@ -1303,7 +1303,7 @@ EleftheriosAvramidis AljoschaBurchardt ChristianFedermann - MajaPopović + MajaPopović CindyTscherwinka DavidVilar Involving Language Professionals in the Evaluation of Machine Translation @@ -1313,10 +1313,10 @@ avramidis-etal-2012-involving - PaolaVelardi + PaolaVelardi RobertoNavigli StefanoFaralli - Juana MariaRuiz Martinez + Juana MariaRuiz Martinez A New Method for Evaluating Automatically Learned Terminological Taxonomies 1498–1504 http://www.lrec-conf.org/proceedings/lrec2012/pdf/295_Paper.pdf @@ -1326,7 +1326,7 @@ GloriaGagliardi Edoardo LombardiVallauri - FabioTamburini + FabioTamburini A topologic view of Topic and Focus marking in <fixed-case>I</fixed-case>talian 948–955 http://www.lrec-conf.org/proceedings/lrec2012/pdf/296_Paper.pdf @@ -1345,7 +1345,7 @@ ghosh-etal-2012-improving - Verginica BarbuMititelu + Verginica BarbuMititelu Adding Morpho-semantic Relations to the <fixed-case>R</fixed-case>omanian <fixed-case>W</fixed-case>ordnet 2596–2601 http://www.lrec-conf.org/proceedings/lrec2012/pdf/299_Paper.pdf @@ -1353,9 +1353,9 @@ mititelu-2012-adding - IoanaVasilescu - MartineAdda-Decker - LoriLamel + IoanaVasilescu + MartineAdda-Decker + LoriLamel Cross-lingual studies of <fixed-case>ASR</fixed-case> errors: paradigms for perceptual evaluations 3511–3518 http://www.lrec-conf.org/proceedings/lrec2012/pdf/300_Paper.pdf @@ -1363,8 +1363,8 @@ vasilescu-etal-2012-cross - Karin FribergHeppin - Maria ToporowskaGronostaj + Karin FribergHeppin + Maria ToporowskaGronostaj The Rocky Road towards a <fixed-case>S</fixed-case>wedish <fixed-case>F</fixed-case>rame<fixed-case>N</fixed-case>et - Creating <fixed-case>S</fixed-case>we<fixed-case>FN</fixed-case> 256–261 http://www.lrec-conf.org/proceedings/lrec2012/pdf/301_Paper.pdf @@ -1383,7 +1383,7 @@ PrzemyslawLenkiewicz - Binyam GebrekidanGebre + Binyam GebrekidanGebre OliverSchreer StefanoMasneri DanielSchneider @@ -1403,10 +1403,10 @@ seretan-2012-acquisition - KSaravanan + KSaravanan MonojitChoudhury - RaghavendraUdupa - AKumaran + RaghavendraUdupa + AKumaran An Empirical Study of the Occurrence and Co-Occurrence of Named Entities in Natural Language Corpora 3118–3125 http://www.lrec-conf.org/proceedings/lrec2012/pdf/305_Paper.pdf @@ -1426,7 +1426,7 @@ CarlosMorell JorgeVivaldi - NúriaBel + NúriaBel <fixed-case>I</fixed-case>ula2<fixed-case>S</fixed-case>tandoff: a tool for creating standoff documents for the <fixed-case>IULACT</fixed-case> 351–356 http://www.lrec-conf.org/proceedings/lrec2012/pdf/307_Paper.pdf @@ -1436,7 +1436,7 @@ AndréBittar CarolineHagège - VéroniqueMoriceau + VéroniqueMoriceau XavierTannier CharlesTeissèdre Temporal Annotation: A Proposal for Guidelines and an Experiment with Inter-annotator Agreement @@ -1457,7 +1457,7 @@ PatrickZiering - SinaZarrieß + SinaZarrieß JonasKuhn A Corpus-based Study of the <fixed-case>G</fixed-case>erman Recipient Passive 1637–1644 @@ -1467,7 +1467,7 @@ TomDe Smedt - WalterDaelemans + WalterDaelemans “Vreselijk mooi!” (terribly beautiful): A Subjectivity Lexicon for <fixed-case>D</fixed-case>utch Adjectives. 3568–3572 http://www.lrec-conf.org/proceedings/lrec2012/pdf/312_Paper.pdf @@ -1485,7 +1485,7 @@ XavierTannier - VéroniqueMoriceau + VéroniqueMoriceau BéatriceArnulphy RuixinHe Evolution of Event Designation in Media: Preliminary Study @@ -1495,12 +1495,12 @@ tannier-etal-2012-evolution - AnselmoPeñas - EduardHovy + AnselmoPeñas + EduardHovy PamelaForner - ÁlvaroRodrigo + ÁlvaroRodrigo RichardSutcliffe - CorinaForascu + CorinaForascu CarolineSporleder Evaluating Machine Reading Systems through Comprehension Tests 1143–1147 @@ -1511,7 +1511,7 @@ XinkaiWang PaulThompson - Jun’ichiTsujii + Jun’ichiTsujii SophiaAnaniadou Biomedical <fixed-case>C</fixed-case>hinese-<fixed-case>E</fixed-case>nglish <fixed-case>CLIR</fixed-case> Using an Extended <fixed-case>CM</fixed-case>e<fixed-case>SH</fixed-case> Resource to Expand Queries 1148–1155 @@ -1520,9 +1520,9 @@ wang-etal-2012-biomedical - AitorGonzález-Agirre + AitorGonzález-Agirre MauroCastillo - GermanRigau + GermanRigau A proposal for improving <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et Domains 3457–3462 http://www.lrec-conf.org/proceedings/lrec2012/pdf/319_Paper.pdf @@ -1530,7 +1530,7 @@ gonzalez-agirre-etal-2012-proposal - Henkvan den Heuvel + Henkvan den Heuvel EricSanders RobinRutten StefScagliola @@ -1557,7 +1557,7 @@ Juan Pablo MartínezCortés JimO’Regan - FrancisTyers + FrancisTyers Free/Open Source Shallow-Transfer Based Machine Translation for <fixed-case>S</fixed-case>panish and <fixed-case>A</fixed-case>ragonese 2153–2157 http://www.lrec-conf.org/proceedings/lrec2012/pdf/326_Paper.pdf @@ -1575,8 +1575,8 @@ goldhahn-etal-2012-building - Thomas UlrichChristiansen - Peter JuelHenrichsen + Thomas UlrichChristiansen + Peter JuelHenrichsen Sense Meets Nonsense - Sense Meets Nonsense - a dual-layer <fixed-case>D</fixed-case>anish speech corpus for perception studies 3356–3361 http://www.lrec-conf.org/proceedings/lrec2012/pdf/330_Paper.pdf @@ -1584,9 +1584,9 @@ christiansen-henrichsen-2012-sense - Lluís-F.Hurtado - FernandoGarcía - EmilioSanchis + Lluís-F.Hurtado + FernandoGarcía + EmilioSanchis EncarnaSegarra The acquisition and dialog act labeling of the <fixed-case>EDECAN</fixed-case>-<fixed-case>SPORTS</fixed-case> corpus 1416–1420 @@ -1616,7 +1616,7 @@ ChristianSmith HenrikDanielsson - ArneJönsson + ArneJönsson A good space: Lexical predictors in word space evaluation 2530–2535 http://www.lrec-conf.org/proceedings/lrec2012/pdf/335_Paper.pdf @@ -1625,10 +1625,10 @@ JanBerka - OndřejBojar + OndřejBojar MarkFishel - MajaPopović - DanielZeman + MajaPopović + DanielZeman Automatic <fixed-case>MT</fixed-case> Error Analysis: Hjerson Helping Addicter 2158–2163 http://www.lrec-conf.org/proceedings/lrec2012/pdf/336_Paper.pdf @@ -1637,7 +1637,7 @@ DanielePighin - LluísMàrquez + LluísMàrquez JonathanMay An Analysis (and an Annotated Corpus) of User Responses to Machine Translation Output 1131–1136 @@ -1647,7 +1647,7 @@ MojganSeraji - BeátaMegyesi + BeátaMegyesi JoakimNivre A Basic Language Resource Kit for <fixed-case>P</fixed-case>ersian 2245–2252 @@ -1657,7 +1657,7 @@ AmitSangodkar - OmDamani + OmDamani Re-ordering Source Sentences for <fixed-case>SMT</fixed-case> 2164–2171 http://www.lrec-conf.org/proceedings/lrec2012/pdf/340_Paper.pdf @@ -1666,7 +1666,7 @@ AlexJudea - ViviNastase + ViviNastase MichaelStrube Concept-based Selectional Preferences and Distributional Representations from <fixed-case>W</fixed-case>ikipedia Articles 2985–2990 @@ -1675,7 +1675,7 @@ judea-etal-2012-concept - BehrangQasemiZadeh + BehrangQasemiZadeh PaulBuitelaar TianqiChen GeorgetaBordea @@ -1687,9 +1687,9 @@ OlivierGalibert - SophieRosset + SophieRosset CyrilGrouin - PierreZweigenbaum + PierreZweigenbaum LudovicQuintard Extended Named Entities Annotation on <fixed-case>OCR</fixed-case>ed Documents: From Corpus Constitution to Evaluation Campaign 3126–3131 @@ -1716,8 +1716,8 @@ vetulani-2012-wordnet - AnnetteRios - AnneGöhring + AnnetteRios + AnneGöhring A tree is a <fixed-case>B</fixed-case>aum is an árbol is a sach’a: Creating a trilingual treebank 1874–1879 http://www.lrec-conf.org/proceedings/lrec2012/pdf/350_Paper.pdf @@ -1726,7 +1726,7 @@ KseniyaZablotskaya - Fernando FernándezMartínez + Fernando FernándezMartínez WolfgangMinker Investigating Verbal Intelligence Using the <fixed-case>TF</fixed-case>-<fixed-case>IDF</fixed-case> Approach 1573–1576 @@ -1737,7 +1737,7 @@ KseniyaZablotskaya UmairRahim - Fernando FernándezMartínez + Fernando FernándezMartínez WolfgangMinker Relating Dominance of Dialogue Participants with their Verbal Intelligence Scores 1289–1292 @@ -1746,8 +1746,8 @@ zablotskaya-etal-2012-relating - SanjaŠtajner - RuslanMitkov + SanjaŠtajner + RuslanMitkov Diachronic Changes in Text Complexity in 20th Century <fixed-case>E</fixed-case>nglish Language: An <fixed-case>NLP</fixed-case> Approach 1577–1584 http://www.lrec-conf.org/proceedings/lrec2012/pdf/355_Paper.pdf @@ -1755,11 +1755,11 @@ stajner-mitkov-2012-diachronic - ÂngelaCosta + ÂngelaCosta TiagoLuís JoanaRibeiro - Ana CristinaMendes - LuísaCoheur + Ana CristinaMendes + LuísaCoheur An <fixed-case>E</fixed-case>nglish-<fixed-case>P</fixed-case>ortuguese parallel corpus of questions: translation guidelines and application in <fixed-case>SMT</fixed-case> 2172–2176 http://www.lrec-conf.org/proceedings/lrec2012/pdf/356_Paper.pdf @@ -1767,8 +1767,8 @@ costa-etal-2012-english - Peter JuelHenrichsen - MarcusUneson + Peter JuelHenrichsen + MarcusUneson <fixed-case>SMALLW</fixed-case>orlds – Multilingual Content-Controlled Monologues 3362–3368 http://www.lrec-conf.org/proceedings/lrec2012/pdf/357_Paper.pdf @@ -1808,7 +1808,7 @@ MariaAloni Andreasvan Cranenburgh - RaquelFernández + RaquelFernández MartaSznajder Building a Corpus of Indefinite Uses Annotated with Fine-grained Semantic Functions 1511–1515 @@ -1827,7 +1827,7 @@ gupta-etal-2012-mining - Marie-ClaudeL’Homme + Marie-ClaudeL’Homme JaninePimentel Capturing syntactico-semantic regularities among terms: An application of the <fixed-case>F</fixed-case>rame<fixed-case>N</fixed-case>et methodology to terminology 262–268 @@ -1837,8 +1837,8 @@ DanielePighin - LluísMàrquez - LluísFormiga + LluísMàrquez + LluísFormiga The <fixed-case>FAUST</fixed-case> Corpus of Adequacy Assessments for Real-World Machine Translation Output 29–35 http://www.lrec-conf.org/proceedings/lrec2012/pdf/370_Paper.pdf @@ -1848,7 +1848,7 @@ StevenBethard OleksandrKolomiyets - Marie-FrancineMoens + Marie-FrancineMoens Annotating Story Timelines as Temporal Dependency Structures 2721–2726 http://www.lrec-conf.org/proceedings/lrec2012/pdf/371_Paper.pdf @@ -1867,13 +1867,13 @@ seinturier-etal-2012-ontological - AntónioBranco + AntónioBranco CatarinaCarvalheiro SílviaPereira SaraSilveira - JoãoSilva + JoãoSilva SérgioCastro - JoãoGraça + JoãoGraça A <fixed-case>P</fixed-case>rop<fixed-case>B</fixed-case>ank for <fixed-case>P</fixed-case>ortuguese: the <fixed-case>CINTIL</fixed-case>-<fixed-case>P</fixed-case>rop<fixed-case>B</fixed-case>ank 1516–1521 http://www.lrec-conf.org/proceedings/lrec2012/pdf/373_Paper.pdf @@ -1882,8 +1882,8 @@ MontseCuadros - LluísPadró - GermanRigau + LluísPadró + GermanRigau Highlighting relevant concepts from Topic Signatures 3841–3848 http://www.lrec-conf.org/proceedings/lrec2012/pdf/374_Paper.pdf @@ -1891,9 +1891,9 @@ cuadros-etal-2012-highlighting - RankaStanković + RankaStanković CvetanaKrstev - IvanObradović + IvanObradović AleksandraTrtovac MilošUtvić A tool for enhanced search of multilingual digital libraries of e-journals @@ -1905,8 +1905,8 @@ PedroFialho SérgioCurto - Ana CristinaMendes - LuísaCoheur + Ana CristinaMendes + LuísaCoheur Extending a wordnet framework for simplicity and scalability 3701–3705 http://www.lrec-conf.org/proceedings/lrec2012/pdf/376_Paper.pdf @@ -1915,7 +1915,7 @@ TommasoFornaciari - MassimoPoesio + MassimoPoesio <fixed-case>D</fixed-case>e<fixed-case>C</fixed-case>our: a corpus of <fixed-case>DE</fixed-case>ceptive statements in <fixed-case>I</fixed-case>talian <fixed-case>COUR</fixed-case>ts 1585–1590 http://www.lrec-conf.org/proceedings/lrec2012/pdf/377_Paper.pdf @@ -1924,11 +1924,11 @@ TeresaLynn - ÖzlemÇetinoğlu + ÖzlemÇetinoğlu JenniferFoster - ElaineUí Dhonnchadha + ElaineUí Dhonnchadha MarkDras - Josefvan Genabith + Josefvan Genabith <fixed-case>I</fixed-case>rish Treebanking and Parsing: A Preliminary Evaluation 1939–1946 http://www.lrec-conf.org/proceedings/lrec2012/pdf/378_Paper.pdf @@ -1949,7 +1949,7 @@ Mehmet TalhaÇakmak SüleymanAcar - GülşenEryiğit + GülşenEryiğit Word Alignment for <fixed-case>E</fixed-case>nglish-<fixed-case>T</fixed-case>urkish Language Pair 2177–2180 http://www.lrec-conf.org/proceedings/lrec2012/pdf/380_Paper.pdf @@ -1958,7 +1958,7 @@ NellyBarbot - OlivierBoeffard + OlivierBoeffard ArnaudDelhay Comparing performance of different set-covering strategies for linguistic content optimization in speech corpora 969–974 @@ -1976,7 +1976,7 @@ Mohammad HoseynSheykholeslam - BehrouzMinaei-Bidgoli + BehrouzMinaei-Bidgoli HosseinJuzi A Framework for Spelling Correction in <fixed-case>P</fixed-case>ersian Language Using Noisy Channel Model 706–710 @@ -1985,7 +1985,7 @@ sheykholeslam-etal-2012-framework - GillesSérasset + GillesSérasset <fixed-case>D</fixed-case>bnary: <fixed-case>W</fixed-case>iktionary as a <fixed-case>LMF</fixed-case> based Multilingual <fixed-case>RDF</fixed-case> network 2466–2472 http://www.lrec-conf.org/proceedings/lrec2012/pdf/387_Paper.pdf @@ -1994,7 +1994,7 @@ Dae-LimChoi - Bong-WanKim + Bong-WanKim Yeon-WhoaKim Yong-JuLee YongnamUm @@ -2047,7 +2047,7 @@ NavaMaroto - Marie-ClaudeL’Homme + Marie-ClaudeL’Homme AmparoAlcina Semantic Relations Established by Specialized Processes Expressed by Nouns and Verbs: Identification in a Corpus by means of Syntactico-semantic Annotation 3814–3819 @@ -2056,7 +2056,7 @@ maroto-etal-2012-semantic - RiccardoDel Gratta + RiccardoDel Gratta FrancescaFrontini FrancescoRubino IreneRusso @@ -2071,7 +2071,7 @@ BenoîtWeber GenevièveCaelen-Haumont Binh HaiPham - Do-DatTran + Do-DatTran <fixed-case>MISTRAL</fixed-case>+: A Melody Intonation Speaker Tonal Range semi-automatic Analysis using variable Levels 963–968 http://www.lrec-conf.org/proceedings/lrec2012/pdf/397_Paper.pdf @@ -2088,7 +2088,7 @@ kaeshammer-demberg-2012-german - Helen KaiyunChen + Helen KaiyunChen Annotating a corpus of human interaction with prosodic profiles — focusing on <fixed-case>M</fixed-case>andarin repair/disfluency 986–990 http://www.lrec-conf.org/proceedings/lrec2012/pdf/399_Paper.pdf @@ -2108,9 +2108,9 @@ HongzhiXu - Helen KaiyunChen + Helen KaiyunChen Chu-RenHuang - QinLu + QinLu DingxuShi Tin-ShingChiu A Grammar-informed Corpus-based Sentence Database for Linguistic and Computational Studies @@ -2129,13 +2129,13 @@ sloetjes-somasundaram-2012-elan - Ching-ShengLin + Ching-ShengLin ZumrutAkcam SamiraShaikh - SharonSmall + SharonSmall KenStahl - TomekStrzalkowski - NickWebb + TomekStrzalkowski + NickWebb Revealing Contentious Concepts Across Social Groups 2838–2841 http://www.lrec-conf.org/proceedings/lrec2012/pdf/403_Paper.pdf @@ -2145,7 +2145,7 @@ FabrizioBorgia Claudia S.Bianchini - PatriceDalle + PatriceDalle MariaDe Marsico Resource production of written forms of Sign Languages by a user-centered editor, <fixed-case>SW</fixed-case>ift (<fixed-case>S</fixed-case>ign<fixed-case>W</fixed-case>riting improved fast transcriber) 3779–3784 @@ -2154,9 +2154,9 @@ borgia-etal-2012-resource - BalamuraliAR + BalamuraliAR AdityaJoshi - PushpakBhattacharyya + PushpakBhattacharyya Cost and Benefit of Using <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et Senses for Sentiment Analysis 3090–3097 http://www.lrec-conf.org/proceedings/lrec2012/pdf/405_Paper.pdf @@ -2173,7 +2173,7 @@ BogdanSacaleanu - GünterNeumann + GünterNeumann An Adaptive Framework for Named Entity Combination 1244–1249 http://www.lrec-conf.org/proceedings/lrec2012/pdf/411_Paper.pdf @@ -2181,7 +2181,7 @@ sacaleanu-neumann-2012-adaptive - PhilippeLanglais + PhilippeLanglais PatrickDrouin AméliePaulus Eugénie RompréBrodeur @@ -2209,7 +2209,7 @@ rysova-2012-alternative - KikuoMaekawa + KikuoMaekawa Prediction of Non-Linguistic Information of Spontaneous Speech from the Prosodic Annotation: Evaluation of the <fixed-case>X</fixed-case>-<fixed-case>JT</fixed-case>o<fixed-case>BI</fixed-case> system 991–996 http://www.lrec-conf.org/proceedings/lrec2012/pdf/422_Paper.pdf @@ -2217,7 +2217,7 @@ maekawa-2012-prediction - Maria TeresaPazienza + Maria TeresaPazienza ArmandoStellato AndreaTurbati <fixed-case>PEARL</fixed-case>: <fixed-case>P</fixed-case>roj<fixed-case>E</fixed-case>ction of Annotations Rule Language, a Language for Projecting (<fixed-case>UIMA</fixed-case>) Annotations over <fixed-case>RDF</fixed-case> Knowledge Bases @@ -2236,10 +2236,10 @@ strotgen-gertz-2012-temporal - Monica LestariParamita - PaulClough + Monica LestariParamita + PaulClough AhmetAker - RobertGaizauskas + RobertGaizauskas Correlation between Similarity Measures for Inter-Language Linked <fixed-case>W</fixed-case>ikipedia Articles 790–797 http://www.lrec-conf.org/proceedings/lrec2012/pdf/426_Paper.pdf @@ -2270,13 +2270,13 @@ moneglia-etal-2012-imagact - DanielZeman + DanielZeman DavidMareček MartinPopel LoganathanRamasamy JanŠtěpánek - ZdeněkŽabokrtský - JanHajič + ZdeněkŽabokrtský + JanHajič <fixed-case>H</fixed-case>amle<fixed-case>DT</fixed-case>: To Parse or Not to Parse? 2735–2741 http://www.lrec-conf.org/proceedings/lrec2012/pdf/429_Paper.pdf @@ -2284,7 +2284,7 @@ zeman-etal-2012-hamledt - LluísPadró + LluísPadró EvgenyStanilovsky <fixed-case>F</fixed-case>ree<fixed-case>L</fixed-case>ing 3.0: Towards Wider Multilinguality 2473–2479 @@ -2293,8 +2293,8 @@ padro-stanilovsky-2012-freeling - MannyRayner - PierretteBouillon + MannyRayner + PierretteBouillon JohannaGerlach Evaluating Appropriateness Of System Responses In A Spoken <fixed-case>CALL</fixed-case> Game 2690–2694 @@ -2305,7 +2305,7 @@ MatthewFuchs NikosTsourakis - MannyRayner + MannyRayner A Scalable Architecture For Web Deployment of Spoken Dialogue Systems 1309–1314 http://www.lrec-conf.org/proceedings/lrec2012/pdf/436_Paper.pdf @@ -2313,7 +2313,7 @@ fuchs-etal-2012-scalable - DieterVan Uytvanck + DieterVan Uytvanck HermanStehouwer LariLampen Semantic metadata mapping in practice: the Virtual Language Observatory @@ -2323,10 +2323,10 @@ van-uytvanck-etal-2012-semantic - EiríkurRögnvaldsson - Anton KarlIngason + EiríkurRögnvaldsson + Anton KarlIngason Einar FreyrSigurðsson - JoelWallenberg + JoelWallenberg The <fixed-case>I</fixed-case>celandic Parsed Historical Corpus (<fixed-case>I</fixed-case>ce<fixed-case>P</fixed-case>a<fixed-case>HC</fixed-case>) 1977–1984 http://www.lrec-conf.org/proceedings/lrec2012/pdf/440_Paper.pdf @@ -2335,8 +2335,8 @@ AshwiniVaidya - Jinho D.Choi - MarthaPalmer + Jinho D.Choi + MarthaPalmer BhuvanaNarasimhan Empty Argument Insertion in the <fixed-case>H</fixed-case>indi <fixed-case>P</fixed-case>rop<fixed-case>B</fixed-case>ank 1522–1526 @@ -2345,7 +2345,7 @@ vaidya-etal-2012-empty - PatrickParoubek + PatrickParoubek XavierTannier A Rough Set Formalization of Quantitative Evaluation with Ambiguity 2311–2317 @@ -2355,9 +2355,9 @@ EleftheriosAvramidis - Marta R.Costa-jussà + Marta R.Costa-jussà ChristianFedermann - Josefvan Genabith + Josefvan Genabith MaiteMelero PavelPecina A Richly Annotated, Multilingual Parallel Corpus for Hybrid Machine Translation @@ -2367,7 +2367,7 @@ avramidis-etal-2012-richly - TomažErjavec + TomažErjavec The goo300k corpus of historical <fixed-case>S</fixed-case>lovene 2257–2260 http://www.lrec-conf.org/proceedings/lrec2012/pdf/445_Paper.pdf @@ -2386,8 +2386,8 @@ MyriamRakho - ÉricLaporte - MatthieuConstant + ÉricLaporte + MatthieuConstant A new semantically annotated corpus with syntactic-semantic and cross-lingual senses 597–600 http://www.lrec-conf.org/proceedings/lrec2012/pdf/447_Paper.pdf @@ -2413,9 +2413,9 @@ nicolas-etal-2012-unsupervised - LeonDerczynski - HéctorLlorens - EstelaSaquete + LeonDerczynski + HéctorLlorens + EstelaSaquete Massively Increasing <fixed-case>TIMEX</fixed-case>3 Resources: A Transduction Approach 3754–3761 http://www.lrec-conf.org/proceedings/lrec2012/pdf/451_Paper.pdf @@ -2443,8 +2443,8 @@ sundberg-etal-2012-visualizing - Binyam GebrekidanGebre - PeterWittenburg + Binyam GebrekidanGebre + PeterWittenburg PrzemyslawLenkiewicz Towards Automatic Gesture Stroke Detection 231–235 @@ -2454,7 +2454,7 @@ RichardJohansson - Karin FribergHeppin + Karin FribergHeppin DimitriosKokkinakis Semantic Role Labeling with the <fixed-case>S</fixed-case>wedish <fixed-case>F</fixed-case>rame<fixed-case>N</fixed-case>et 3697–3700 @@ -2464,7 +2464,7 @@ LoganathanRamasamy - ZdeněkŽabokrtský + ZdeněkŽabokrtský <fixed-case>P</fixed-case>rague Dependency Style Treebank for <fixed-case>T</fixed-case>amil 1888–1894 http://www.lrec-conf.org/proceedings/lrec2012/pdf/456_Paper.pdf @@ -2472,8 +2472,8 @@ ramasamy-zabokrtsky-2012-prague - JörgTiedemann - Dorte HaltrupHansen + JörgTiedemann + Dorte HaltrupHansen LeneOffersgaard SussiOlsen MatthiasZumpe @@ -2484,9 +2484,9 @@ tiedemann-etal-2012-distributed - PatríciaGonçalves + PatríciaGonçalves RitaSantos - AntónioBranco + AntónioBranco Treebanking by Sentence and Tree Transformation: Building a Treebank to support Question Answering in <fixed-case>P</fixed-case>ortuguese 1895–1901 http://www.lrec-conf.org/proceedings/lrec2012/pdf/460_Paper.pdf @@ -2495,7 +2495,7 @@ DavidGraff - MohamedMaamouri + MohamedMaamouri Developing <fixed-case>LMF</fixed-case>-<fixed-case>XML</fixed-case> Bilingual Dictionaries for Colloquial <fixed-case>A</fixed-case>rabic Dialects 269–274 http://www.lrec-conf.org/proceedings/lrec2012/pdf/461_Paper.pdf @@ -2503,7 +2503,7 @@ graff-maamouri-2012-developing - JörgTiedemann + JörgTiedemann Parallel Data, Tools and Interfaces in <fixed-case>OPUS</fixed-case> 2214–2218 http://www.lrec-conf.org/proceedings/lrec2012/pdf/463_Paper.pdf @@ -2530,7 +2530,7 @@ mohamed-etal-2012-annotating - Julia MariaSchulz + Julia MariaSchulz DanielaBecks ChristaWomser-Hacker ThomasMandl @@ -2542,7 +2542,7 @@ Mathieu-HenriFalco - VéroniqueMoriceau + VéroniqueMoriceau AnneVilnat <fixed-case>K</fixed-case>itten: a tool for normalizing <fixed-case>HTML</fixed-case> and extracting its textual content 2261–2267 @@ -2553,7 +2553,7 @@ EmanuelDima ChristinaHoppermann - ErhardHinrichs + ErhardHinrichs ThorstenTrippel ClausZinn A Metadata Editor to Support the Description of Linguistic Resources @@ -2574,8 +2574,8 @@ EmanuelDima VerenaHenrich - ErhardHinrichs - MarieHinrichs + ErhardHinrichs + MarieHinrichs ChristinaHoppermann ThorstenTrippel ThomasZastrow @@ -2588,8 +2588,8 @@ MontserratArza - José M.García Miguel - FranciscoCampillo + José M.García Miguel + FranciscoCampillo Miguel Cuevas -Alonso A <fixed-case>G</fixed-case>alician Syntactic Corpus with Application to Intonation Modeling 1650–1654 @@ -2598,9 +2598,9 @@ arza-etal-2012-galician - TafseerAhmed + TafseerAhmed MiriamButt - AnnetteHautli + AnnetteHautli SebastianSulger A Reference Dependency Bank for Analyzing Complex Predicates 3145–3152 @@ -2632,7 +2632,7 @@ MariannaApidianaki - BenoîtSagot + BenoîtSagot Applying cross-lingual <fixed-case>WSD</fixed-case> to wordnet development 833–840 http://www.lrec-conf.org/proceedings/lrec2012/pdf/478_Paper.pdf @@ -2640,7 +2640,7 @@ apidianaki-sagot-2012-applying - PierretteBouillon + PierretteBouillon ElisabettaJezek ChiaraMelloni AuréliePicton @@ -2652,8 +2652,8 @@ MarkFishel - OndřejBojar - MajaPopović + OndřejBojar + MajaPopović <fixed-case>T</fixed-case>erra: a Collection of Translation Error-Annotated Corpora 7–14 http://www.lrec-conf.org/proceedings/lrec2012/pdf/481_Paper.pdf @@ -2693,11 +2693,11 @@ zablotskiy-etal-2012-speech - Luis JavierRodríguez-Fuentes - MikelPenagarikano + Luis JavierRodríguez-Fuentes + MikelPenagarikano AmparoVarona - MireiaDiez - GermánBordel + MireiaDiez + GermánBordel <fixed-case>KALAKA</fixed-case>-2: a <fixed-case>TV</fixed-case> Broadcast Speech Database for the Recognition of <fixed-case>I</fixed-case>berian Languages in Clean and Noisy Environments 99–105 http://www.lrec-conf.org/proceedings/lrec2012/pdf/486_Paper.pdf @@ -2756,9 +2756,9 @@ wang-etal-2012-ntusocialrec - GuillaumeGravier - GillesAdda - NiklasPaulsson + GuillaumeGravier + GillesAdda + NiklasPaulsson MatthieuCarré AudeGiraudel OlivierGalibert @@ -2769,9 +2769,9 @@ gravier-etal-2012-etape - NúriaBel + NúriaBel LaurenRomeo - MuntsaPadró + MuntsaPadró Automatic lexical semantic classification of nouns 1448–1455 http://www.lrec-conf.org/proceedings/lrec2012/pdf/496_Paper.pdf @@ -2800,12 +2800,12 @@ AnnaBraasch LinaHenriksen CsabaHuszka - AndersJohannsen + AndersJohannsen LarsKayser BenteMaegaard OleNorgaard - StefanSchulz - JürgenWedekind + StefanSchulz + JürgenWedekind Creation and use of Language Resources in a Question-Answering e<fixed-case>H</fixed-case>ealth System 2536–2542 http://www.lrec-conf.org/proceedings/lrec2012/pdf/504_Paper.pdf @@ -2813,7 +2813,7 @@ andersen-etal-2012-creation - Lina M.Rojas-Barahona + Lina M.Rojas-Barahona AlejandraLorenzo ClaireGardent Building and Exploiting a Corpus of Dialog Interactions between <fixed-case>F</fixed-case>rench Speaking Virtual and Human Agents @@ -2825,8 +2825,8 @@ MarionPotet EmmanuelleEsperança-Rodier - LaurentBesacier - HervéBlanchon + LaurentBesacier + HervéBlanchon Collection of a Large Database of <fixed-case>F</fixed-case>rench-<fixed-case>E</fixed-case>nglish <fixed-case>SMT</fixed-case> Output Corrections 4043–4048 http://www.lrec-conf.org/proceedings/lrec2012/pdf/506_Paper.pdf @@ -2845,7 +2845,7 @@ ElsLefever - VéroniqueHoste + VéroniqueHoste MartineDe Cock Discovering Missing <fixed-case>W</fixed-case>ikipedia Inter-language Links by means of Cross-lingual Word Sense Disambiguation 841–846 @@ -2855,7 +2855,7 @@ SaabMansour - HermannNey + HermannNey <fixed-case>A</fixed-case>rabic-Segmentation Combination Strategies for Statistical Machine Translation 3915–3920 http://www.lrec-conf.org/proceedings/lrec2012/pdf/509_Paper.pdf @@ -2863,22 +2863,22 @@ mansour-ney-2012-arabic - JanHajič - EvaHajičová - JarmilaPanevová - PetrSgall - OndřejBojar + JanHajič + EvaHajičová + JarmilaPanevová + PetrSgall + OndřejBojar SilvieCinková - EvaFučíková + EvaFučíková MarieMikulová PetrPajas JanPopelka - JiříSemecký - JanaŠindlerová + JiříSemecký + JanaŠindlerová JanŠtěpánek JosefToman - ZdeňkaUrešová - ZdeněkŽabokrtský + ZdeňkaUrešová + ZdeněkŽabokrtský Announcing <fixed-case>P</fixed-case>rague <fixed-case>C</fixed-case>zech-<fixed-case>E</fixed-case>nglish <fixed-case>D</fixed-case>ependency <fixed-case>T</fixed-case>reebank 2.0 3153–3160 http://www.lrec-conf.org/proceedings/lrec2012/pdf/510_Paper.pdf @@ -2887,7 +2887,7 @@ PaulFelt - EricRingger + EricRingger KevinSeppi KristianHeal RobbieHaertel @@ -2899,7 +2899,7 @@ felt-etal-2012-first - EminaKurtić + EminaKurtić BillWells Guy J.Brown TimothyKempton @@ -2911,8 +2911,8 @@ kurtic-etal-2012-corpus - AnttonGurrutxaga - IñakiAlegria + AnttonGurrutxaga + IñakiAlegria Measuring the compositionality of <fixed-case>NV</fixed-case> expressions in <fixed-case>B</fixed-case>asque by means of distributional similarity techniques 2389–2394 http://www.lrec-conf.org/proceedings/lrec2012/pdf/514_Paper.pdf @@ -2921,9 +2921,9 @@ JorgeVivaldi - Luis AdriánCabrera-Diego - GerardoSierra - MaríaPozzi + Luis AdriánCabrera-Diego + GerardoSierra + MaríaPozzi Using <fixed-case>W</fixed-case>ikipedia to Validate the Terminology found in a Corpus of Basic Textbooks 3820–3827 http://www.lrec-conf.org/proceedings/lrec2012/pdf/515_Paper.pdf @@ -2932,7 +2932,7 @@ JavierCaminero - Mari CarmenRodríguez + Mari CarmenRodríguez JeanVanderdonckt FabioPaternò JoergRett @@ -2946,8 +2946,8 @@ caminero-etal-2012-serenoa - AmaliaTodirascu - SebastianPadó + AmaliaTodirascu + SebastianPadó JenniferKrisch MaxKisselew UlrichHeid @@ -2958,14 +2958,14 @@ todirascu-etal-2012-french - MontserratMarimon + MontserratMarimon BeatrizFisas - NúriaBel + NúriaBel MartaVillegas JorgeVivaldi SergiTorner MercèLorente - SilviaVázquez + SilviaVázquez MartaVillegas The <fixed-case>IULA</fixed-case> Treebank 1920–1926 @@ -2995,7 +2995,7 @@ Md. Faisal MahbubChowdhury - AlbertoLavelli + AlbertoLavelli An Evaluation of the Effect of Automatic Preprocessing on Syntactic Parsing for Biomedical Relation Extraction 544–551 http://www.lrec-conf.org/proceedings/lrec2012/pdf/522_Paper.pdf @@ -3004,9 +3004,9 @@ HermanStehouwer - MatejDurco + MatejDurco EricAuer - DaanBroeder + DaanBroeder Federated Search: Towards a Common Search Infrastructure 3255–3259 http://www.lrec-conf.org/proceedings/lrec2012/pdf/524_Paper.pdf @@ -3015,8 +3015,8 @@ ElsaTolone - BenoîtSagot - ÉricVillemonte de La Clergerie + BenoîtSagot + ÉricVillemonte de La Clergerie Evaluating and improving syntactic lexica by plugging them within a parser 2742–2749 http://www.lrec-conf.org/proceedings/lrec2012/pdf/525_Paper.pdf @@ -3024,9 +3024,9 @@ tolone-etal-2012-evaluating - Jing GuangHan + Jing GuangHan EmerGilmartin - CelineDe Looze + CelineDe Looze BrianVaughan NickCampbell The Herme Database of Spontaneous Multimodal Human-Robot Dialogues @@ -3036,8 +3036,8 @@ han-etal-2012-herme - Víctor M.Sánchez-Cartagena - MiquelEsplà-Gomis + Víctor M.Sánchez-Cartagena + MiquelEsplà-Gomis Juan AntonioPérez-Ortiz Source-Language Dictionaries Help Non-Expert Users to Enlarge Target-Language Dictionaries for Machine Translation 3422–3429 @@ -3054,14 +3054,14 @@ schmidt-2012-exmaralda - HarryBunt + HarryBunt JanAlexandersson Jae-WoongChoe - Alex ChengyuFang - KoitiHasida + Alex ChengyuFang + KoitiHasida VolhaPetukhova - AndreiPopescu-Belis - DavidTraum + AndreiPopescu-Belis + DavidTraum <fixed-case>ISO</fixed-case> 24617-2: A semantically-based standard for dialogue annotation 430–437 http://www.lrec-conf.org/proceedings/lrec2012/pdf/530_Paper.pdf @@ -3079,11 +3079,11 @@ NataliaKonstantinova - Sheila C.M.de Sousa - Noa P.Cruz - Manuel J.Maña - MaiteTaboada - RuslanMitkov + Sheila C.M.de Sousa + Noa P.Cruz + Manuel J.Maña + MaiteTaboada + RuslanMitkov A review corpus annotated for negation, speculation and their scope 3190–3195 http://www.lrec-conf.org/proceedings/lrec2012/pdf/533_Paper.pdf @@ -3094,7 +3094,7 @@ ValerioBasile JohanBos KilianEvang - NoortjeVenhuizen + NoortjeVenhuizen Developing a large semantically annotated corpus 3196–3200 http://www.lrec-conf.org/proceedings/lrec2012/pdf/534_Paper.pdf @@ -3115,7 +3115,7 @@ EliasIosif MariaGiannoudaki - EricFosler-Lussier + EricFosler-Lussier AlexandrosPotamianos Associative and Semantic Features Extracted From Web-Harvested Corpora 2991–2998 @@ -3125,8 +3125,8 @@ MaaskeTreurniet - OrphéeDe Clercq - Henkvan den Heuvel + OrphéeDe Clercq + Henkvan den Heuvel NellekeOostdijk Collection of a corpus of <fixed-case>D</fixed-case>utch <fixed-case>SMS</fixed-case> 2268–2273 @@ -3147,7 +3147,7 @@ NikosTsourakis - MannyRayner + MannyRayner A Corpus for a Gesture-Controlled Mobile Spoken Dialogue System 1315–1322 http://www.lrec-conf.org/proceedings/lrec2012/pdf/539_Paper.pdf @@ -3157,9 +3157,9 @@ MarcPoch AntonioToral - OlivierHamon + OlivierHamon ValeriaQuochi - NúriaBel + NúriaBel Towards a User-Friendly Platform for Building Language Resources based on Web Services 1156–1163 http://www.lrec-conf.org/proceedings/lrec2012/pdf/543_Paper.pdf @@ -3167,9 +3167,9 @@ poch-etal-2012-towards - JohnMcCrae + JohnMcCrae ElenaMontiel-Ponsoda - PhilippCimiano + PhilippCimiano Collaborative semantic editing of linked data lexica 2619–2625 http://www.lrec-conf.org/proceedings/lrec2012/pdf/544_Paper.pdf @@ -3190,8 +3190,8 @@ WillemElbers - DaanBroeder - Dietervan Uytvanck + DaanBroeder + Dietervan Uytvanck Proper Language Resource Centers 3260–3263 http://www.lrec-conf.org/proceedings/lrec2012/pdf/547_Paper.pdf @@ -3212,7 +3212,7 @@ KarënFort - ClaireFrançois + ClaireFrançois OlivierGalibert MahaGhribi Analyzing the Impact of Prevalence on the Evaluation of a Manual Annotation Campaign @@ -3222,7 +3222,7 @@ fort-etal-2012-analyzing - DietmarRösner + DietmarRösner JörgFrommer RafaelFriesen MatthiasHaase @@ -3266,7 +3266,7 @@ bouamor-etal-2012-contrastive - MohamedMaamouri + MohamedMaamouri AnnBies SethKulick Expanding <fixed-case>A</fixed-case>rabic Treebank to Speech: Results from Broadcast News @@ -3288,7 +3288,7 @@ AnitaAlicante CristinaBosco AnnaCorazza - AlbertoLavelli + AlbertoLavelli A treebank-based study on the influence of <fixed-case>I</fixed-case>talian word order on parsing performance 1985–1992 http://www.lrec-conf.org/proceedings/lrec2012/pdf/561_Paper.pdf @@ -3297,9 +3297,9 @@ KallirroiGeorgila - AlanBlack + AlanBlack KenjiSagae - DavidTraum + DavidTraum Practical Evaluation of Human and Synthesized Speech for Virtual Human Dialogue Systems 3519–3526 http://www.lrec-conf.org/proceedings/lrec2012/pdf/562_Paper.pdf @@ -3332,7 +3332,7 @@ roche-2012-ontoterminology - DoniaScott + DoniaScott RossanoBarone RobKoeling Corpus Annotation as a Scientific Task @@ -3352,7 +3352,7 @@ mendes-etal-2012-dbpedia - Cheikh M. BambaDione + Cheikh M. BambaDione A Morphological Analyzer For <fixed-case>W</fixed-case>olof Using Finite-State Techniques 894–901 http://www.lrec-conf.org/proceedings/lrec2012/pdf/572_Paper.pdf @@ -3360,7 +3360,7 @@ dione-2012-morphological - Leonardo CampillosLlanos + Leonardo CampillosLlanos Designing a search interface for a <fixed-case>S</fixed-case>panish learner spoken corpus: the end-user’s evaluation 241–248 http://www.lrec-conf.org/proceedings/lrec2012/pdf/574_Paper.pdf @@ -3368,7 +3368,7 @@ llanos-2012-designing - Carla ParraEscartín + Carla ParraEscartín Design and compilation of a specialized <fixed-case>S</fixed-case>panish-<fixed-case>G</fixed-case>erman parallel corpus 2199–2206 http://www.lrec-conf.org/proceedings/lrec2012/pdf/577_Paper.pdf @@ -3386,8 +3386,8 @@ NizarHabash - MonaDiab - OwenRambow + MonaDiab + OwenRambow Conventional Orthography for Dialectal <fixed-case>A</fixed-case>rabic 711–718 http://www.lrec-conf.org/proceedings/lrec2012/pdf/579_Paper.pdf @@ -3395,9 +3395,9 @@ habash-etal-2012-conventional - DaanBroeder - Dietervan Uytvanck - MariaGavrilidou + DaanBroeder + Dietervan Uytvanck + MariaGavrilidou ThorstenTrippel MenzoWindhouwer Standardizing a Component Metadata Infrastructure @@ -3410,7 +3410,7 @@ AhmetAker MahmoudEl-Haj M-DyaaAlbakour - UdoKruschwitz + UdoKruschwitz Assessing Crowdsourcing Quality through Objective Tasks 1456–1461 http://www.lrec-conf.org/proceedings/lrec2012/pdf/583_Paper.pdf @@ -3418,7 +3418,7 @@ aker-etal-2012-assessing - SabineSchulte im Walde + SabineSchulte im Walde SusanneBorgwaldt RonnyJauch Association Norms of <fixed-case>G</fixed-case>erman Noun Compounds @@ -3438,7 +3438,7 @@ ambati-etal-2012-word - EckhardBick + EckhardBick HelianaMello AlessandroPanunzi TommasoRaso @@ -3461,10 +3461,10 @@ koeva-etal-2012-bulgarian - Rebecca J.Passonneau - Collin F.Baker + Rebecca J.Passonneau + Collin F.Baker ChristianeFellbaum - NancyIde + NancyIde The <fixed-case>MASC</fixed-case> Word Sense Corpus 3025–3030 http://www.lrec-conf.org/proceedings/lrec2012/pdf/589_Paper.pdf @@ -3474,8 +3474,8 @@ CristinaMota AlbertoSimões - CláudiaFreitas - LuísCosta + CláudiaFreitas + LuísCosta DianaSantos <fixed-case>P</fixed-case>ágico: Evaluating <fixed-case>W</fixed-case>ikipedia-based information retrieval in <fixed-case>P</fixed-case>ortuguese 2015–2022 @@ -3484,7 +3484,7 @@ mota-etal-2012-pagico - AlexandreDenis + AlexandreDenis IngridFalk ClaireGardent LauraPerez-Beltrachini @@ -3509,9 +3509,9 @@ RonArtstein JillianGerten AthanasiosKatsamanis - ShrikanthNarayanan + ShrikanthNarayanan AngelaNazarian - DavidTraum + DavidTraum The Twins Corpus of Museum Visitor Questions 2355–2361 http://www.lrec-conf.org/proceedings/lrec2012/pdf/595_Paper.pdf @@ -3530,10 +3530,10 @@ DoaaSamy - AntonioMoreno-Sandoval + AntonioMoreno-Sandoval ConchiBueno-Díaz - MartaGarrote-Salazar - José M.Guirao + MartaGarrote-Salazar + José M.Guirao Medical Term Extraction in an <fixed-case>A</fixed-case>rabic Medical Corpus 640–645 http://www.lrec-conf.org/proceedings/lrec2012/pdf/597_Paper.pdf @@ -3563,7 +3563,7 @@ MohammedAttia PavelPecina YounesSamih - Josefvan Genabith + Josefvan Genabith <fixed-case>A</fixed-case>rabic Word Generation and Modelling for Spell Checking 719–725 http://www.lrec-conf.org/proceedings/lrec2012/pdf/603_Paper.pdf @@ -3572,7 +3572,7 @@ YasuharuDen - HanaeKoiso + HanaeKoiso KatsuyaTakanashi NaoYoshida Annotation of response tokens and their triggering expressions in <fixed-case>J</fixed-case>apanese multi-party conversations @@ -3592,8 +3592,8 @@ TakahiroMiyajima - HideakiKikuchi - KatsuhikoShirai + HideakiKikuchi + KatsuhikoShirai ShigekiOkawa Method for Collection of Acted Speech Using Various Situation Scripts 1179–1182 @@ -3602,8 +3602,8 @@ miyajima-etal-2012-method - DaanBroeder - Dietervan Uytvanck + DaanBroeder + Dietervan Uytvanck GunterSenft Citing on-line Language Resources 1391–1394 @@ -3615,7 +3615,7 @@ MohammedAttia KhaledShaalan LamiaTounsi - Josefvan Genabith + Josefvan Genabith Automatic Extraction and Evaluation of <fixed-case>A</fixed-case>rabic <fixed-case>LFG</fixed-case> Resources 1947–1954 http://www.lrec-conf.org/proceedings/lrec2012/pdf/609_Paper.pdf @@ -3623,7 +3623,7 @@ attia-etal-2012-automatic - MatthieuConstant + MatthieuConstant IsabelleTellier Evaluating the Impact of External Lexical Resources into a <fixed-case>CRF</fixed-case>-based Multiword Segmenter and Part-of-Speech Tagger 646–650 @@ -3633,7 +3633,7 @@ BrettDrury - José JoãoAlmeida + José JoãoAlmeida The Minho Quotation Resource 2280–2285 http://www.lrec-conf.org/proceedings/lrec2012/pdf/611_Paper.pdf @@ -3705,7 +3705,7 @@ AhmetAker EvangelosKanoulas - RobertGaizauskas + RobertGaizauskas A light way to collect comparable corpora from the Web 15–20 http://www.lrec-conf.org/proceedings/lrec2012/pdf/626_Paper.pdf @@ -3714,7 +3714,7 @@ MaiteMelero - Marta R.Costa-Jussà + Marta R.Costa-Jussà JudithDomingo MontseMarquina MartíQuixal @@ -3725,8 +3725,8 @@ melero-etal-2012-holaaa - DanicaDamljanović - UdoKruschwitz + DanicaDamljanović + UdoKruschwitz M-DyaaAlbakour JohannPetrak MihaiLupu @@ -3738,7 +3738,7 @@ MarilisaAmoia - KerstinKunz + KerstinKunz EkaterinaLapshinova-Koltunski Coreference in Spoken vs. Written Texts: a Corpus-based Analysis 158–164 @@ -3747,9 +3747,9 @@ amoia-etal-2012-coreference - OlivierBoeffard + OlivierBoeffard LaureCharonnat - Sébastien LeMaguer + Sébastien LeMaguer DamienLolive Towards Fully Automatic Annotation of Audio Books for <fixed-case>TTS</fixed-case> 975–980 @@ -3760,7 +3760,7 @@ IanLewin ŞenayKafkas - DietrichRebholz-Schuhmann + DietrichRebholz-Schuhmann <fixed-case>C</fixed-case>entroids: Gold standards with distributional variation 3894–3900 http://www.lrec-conf.org/proceedings/lrec2012/pdf/633_Paper.pdf @@ -3777,7 +3777,7 @@ navarretta-paggio-2012-multimodal - DavidLewis + DavidLewis AlexanderO’Connor AndrzejZydroń GerdSjögren @@ -3810,9 +3810,9 @@ CostanzaNavarretta - ElisabethAhlsén + ElisabethAhlsén JensAllwood - KristiinaJokinen + KristiinaJokinen PatriziaPaggio Feedback in <fixed-case>N</fixed-case>ordic First-Encounters: a Comparative Study 2494–2499 @@ -3834,7 +3834,7 @@ YuChen - AndreasEisele + AndreasEisele <fixed-case>M</fixed-case>ulti<fixed-case>UN</fixed-case> v2: <fixed-case>UN</fixed-case> Documents with Multilingual Alignments 2500–2504 http://www.lrec-conf.org/proceedings/lrec2012/pdf/641_Paper.pdf @@ -3852,7 +3852,7 @@ GracindaCarvalho - David Martinsde Matos + David Martinsde Matos VitorRocio Building and Exploring Semantic Equivalences Resources 2038–2042 @@ -3861,7 +3861,7 @@ carvalho-etal-2012-building - Septina DianLarasati + Septina DianLarasati <fixed-case>IDENTIC</fixed-case> Corpus: Morphologically Enriched <fixed-case>I</fixed-case>ndonesian-<fixed-case>E</fixed-case>nglish Parallel Corpus 902–906 http://www.lrec-conf.org/proceedings/lrec2012/pdf/644_Paper.pdf @@ -3869,8 +3869,8 @@ larasati-2012-identic - OndřejBojar - ZdeněkŽabokrtský + OndřejBojar + ZdeněkŽabokrtský OndřejDušek PetraGaluščáková MartinMajliš @@ -3887,7 +3887,7 @@ KaisDukes - EricAtwell + EricAtwell <fixed-case>LAMP</fixed-case>: A Multimodal Web Platform for Collaborative Linguistic Analysis 3268–3275 http://www.lrec-conf.org/proceedings/lrec2012/pdf/646_Paper.pdf @@ -3904,7 +3904,7 @@ ogrodniczuk-lenart-2012-web - Casey ReddKennington + Casey ReddKennington MartinKay AnnemarieFriedrich Suffix Trees as Language Models @@ -3914,9 +3914,9 @@ kennington-etal-2012-suffix - Liviu P.Dinu + Liviu P.Dinu VladNiculae - Octavia-MariaŞulea + Octavia-MariaŞulea The <fixed-case>R</fixed-case>omanian Neuter Examined Through A Two-Gender N-Gram Classification System 907–910 http://www.lrec-conf.org/proceedings/lrec2012/pdf/651_Paper.pdf @@ -3926,7 +3926,7 @@ SoojeongEom MarkusDickinson - GrahamKatz + GrahamKatz Using semi-experts to derive judgments on word sense alignment: a pilot study 605–611 http://www.lrec-conf.org/proceedings/lrec2012/pdf/652_Paper.pdf @@ -3945,7 +3945,7 @@ DawnLawrie JamesMayfield PaulMcNamee - DouglasOard + DouglasOard Creating and Curating a Cross-Language Person-Entity Linking Collection 3106–3110 http://www.lrec-conf.org/proceedings/lrec2012/pdf/655_Paper.pdf @@ -3954,7 +3954,7 @@ MarcVerhagen - JamesPustejovsky + JamesPustejovsky The <fixed-case>TARSQI</fixed-case> Toolkit 2043–2048 http://www.lrec-conf.org/proceedings/lrec2012/pdf/656_Paper.pdf @@ -3974,7 +3974,7 @@ JonathanWright KiraGriffitt JoeEllis - StephanieStrassel + StephanieStrassel BrendanCallahan Annotation Trees: <fixed-case>LDC</fixed-case>’s customizable, extensible, scalable, annotation infrastructure 479–485 @@ -3992,7 +3992,7 @@ RaniaAl-Sabbagh - RoxanaGirju + RoxanaGirju <fixed-case>YADAC</fixed-case>: Yet another Dialectal <fixed-case>A</fixed-case>rabic Corpus 2882–2889 http://www.lrec-conf.org/proceedings/lrec2012/pdf/663_Paper.pdf @@ -4011,11 +4011,11 @@ clarke-etal-2012-nlp - LuísMarujo + LuísMarujo AnatoleGershman - JaimeCarbonell - RobertFrederking - João P.Neto + JaimeCarbonell + RobertFrederking + João P.Neto Supervised Topical Key Phrase Extraction of News Stories using Crowdsourcing, Light Filtering and Co-reference Normalization 399–403 http://www.lrec-conf.org/proceedings/lrec2012/pdf/672_Paper.pdf @@ -4034,8 +4034,8 @@ MartaRecasens - M. AntòniaMartí - ConstantinOrasan + M. AntòniaMartí + ConstantinOrasan Annotating Near-Identity from Coreference Disagreements 165–172 http://www.lrec-conf.org/proceedings/lrec2012/pdf/674_Paper.pdf @@ -4071,7 +4071,7 @@ schumann-2012-knowledge - GözdeÖzbal + GözdeÖzbal CarloStrapparava MarcoGuerini Brand Pitt: A Corpus to Explore the Art of Naming @@ -4081,8 +4081,8 @@ ozbal-etal-2012-brand - OrphéeDe Clercq - VeroniqueHoste + OrphéeDe Clercq + VeroniqueHoste PaolaMonachesi Evaluating automatic cross-domain <fixed-case>D</fixed-case>utch semantic role annotation 88–93 @@ -4093,9 +4093,9 @@ ThierryBazillon MelanieDeplano - FredericBechet + FredericBechet AlexisNasr - BenoitFavre + BenoitFavre Syntactic annotation of spontaneous speech: application to call-center conversation data 1338–1342 http://www.lrec-conf.org/proceedings/lrec2012/pdf/682_Paper.pdf @@ -4113,12 +4113,12 @@ hong-etal-2012-korean - FredericBechet + FredericBechet BenjaminMaza NicolasBigouroux ThierryBazillon - MarcEl-Bèze - RenatoDe Mori + MarcEl-Bèze + RenatoDe Mori EricArbillot <fixed-case>DECODA</fixed-case>: a call-centre human-human spoken conversation corpus 1343–1347 @@ -4169,15 +4169,15 @@ akiba-etal-2012-designing - AntonioMoreno-Sandoval - Leonardo CampillosLlanos + AntonioMoreno-Sandoval + Leonardo CampillosLlanos YangDong EmiTakamori - José M.Guirao + José M.Guirao PaulaGozalo ChiekoKimura KengoMatsui - MartaGarrote-Salazar + MartaGarrote-Salazar Spontaneous Speech Corpora for language learners of <fixed-case>S</fixed-case>panish, <fixed-case>C</fixed-case>hinese and <fixed-case>J</fixed-case>apanese 2695–2701 http://www.lrec-conf.org/proceedings/lrec2012/pdf/697_Paper.pdf @@ -4186,7 +4186,7 @@ AnthonyRousseau - PaulDeléglise + PaulDeléglise YannickEstève <fixed-case>TED</fixed-case>-<fixed-case>LIUM</fixed-case>: an Automatic Speech Recognition dedicated corpus 125–129 @@ -4203,11 +4203,11 @@ petasis-2012-sync3 - ValérieMapelli + ValérieMapelli VictoriaArranz MatthieuCarré HélèneMazo - DjamelMostefa + DjamelMostefa KhalidChoukri <fixed-case>ELRA</fixed-case> in the heart of a cooperative <fixed-case>HLT</fixed-case> world 55–59 @@ -4218,7 +4218,7 @@ PatrikLambert HolgerSchwenk - FrédéricBlain + FrédéricBlain Automatic Translation of Scientific Documents in the <fixed-case>HAL</fixed-case> Archive 3933–3936 http://www.lrec-conf.org/proceedings/lrec2012/pdf/703_Paper.pdf @@ -4239,7 +4239,7 @@ AudeGiraudel MatthieuCarré - ValérieMapelli + ValérieMapelli JulietteKahn OlivierGalibert LudovicQuintard @@ -4251,7 +4251,7 @@ AndreaGesmundo - TanjaSamardžić + TanjaSamardžić Lemmatising <fixed-case>S</fixed-case>erbian as Category Tagging with Bidirectional Sequence Classification 2103–2106 http://www.lrec-conf.org/proceedings/lrec2012/pdf/708_Paper.pdf @@ -4269,7 +4269,7 @@ DavidTavarez - EvaNavas + EvaNavas DanielErro IbonSaratxaga Strategies to Improve a Speaker Diarisation Tool @@ -4301,7 +4301,7 @@ AlistairConkie ThomasOkken Yeon-JunKim - GiuseppeDi Fabbrizio + GiuseppeDi Fabbrizio Building Text-To-Speech Voices in the Cloud 3317–3321 http://www.lrec-conf.org/proceedings/lrec2012/pdf/716_Paper.pdf @@ -4319,7 +4319,7 @@ DašaBerović - ŽeljkoAgić + ŽeljkoAgić MarkoTadić <fixed-case>C</fixed-case>roatian Dependency Treebank: Recent Development and Initial Experiments 1902–1906 @@ -4328,7 +4328,7 @@ berovic-etal-2012-croatian - TinaKluewer + TinaKluewer FeiyuXu PeterAdolphs HansUszkoreit @@ -4340,7 +4340,7 @@ MartaVillegas - NuriaBel + NuriaBel CarlosGonzalo AmparoMoreno NuriaSimelio @@ -4352,7 +4352,7 @@ PetyaOsenova - KirilSimov + KirilSimov LaskaLaskova StanislavaKancheva A Treebank-driven Creation of an <fixed-case>O</fixed-case>nto<fixed-case>V</fixed-case>alence Verb lexicon for <fixed-case>B</fixed-case>ulgarian @@ -4375,7 +4375,7 @@ ShaohuaYang HaiZhao XiaolinWang - Bao-liangLu + Bao-liangLu Spell Checking for <fixed-case>C</fixed-case>hinese 730–736 http://www.lrec-conf.org/proceedings/lrec2012/pdf/727_Paper.pdf @@ -4383,7 +4383,7 @@ yang-etal-2012-spell - ZahurulIslam + ZahurulIslam AlexanderMehler Customization of the <fixed-case>E</fixed-case>uroparl Corpus for Translation Studies 2505–2510 @@ -4393,7 +4393,7 @@ CarloStrapparava - RadaMihalcea + RadaMihalcea AlbertoBattocchi A Parallel Corpus of Music and Lyrics Annotated with Emotions 2343–2346 @@ -4404,7 +4404,7 @@ ElisaBianchi MirkoTavosanis - EmilianoGiovannetti + EmilianoGiovannetti Creation of a bottom-up corpus-based ontology for <fixed-case>I</fixed-case>talian Linguistics 2641–2647 http://www.lrec-conf.org/proceedings/lrec2012/pdf/732_Paper.pdf @@ -4422,13 +4422,13 @@ CarmenDayrell - ArnaldoCandido Jr. + ArnaldoCandido Jr. GabrielLima DaniloMachado Jr. AnnCopestake - ValériaFeltrim + ValériaFeltrim StellaTagnin - SandraAluisio + SandraAluisio Rhetorical Move Detection in <fixed-case>E</fixed-case>nglish Abstracts: Multi-label Sentence Classifiers and their Annotated Corpora 1604–1609 http://www.lrec-conf.org/proceedings/lrec2012/pdf/734_Paper.pdf @@ -4462,7 +4462,7 @@ lis-2012-polish - AnneliesBraffort + AnneliesBraffort LeïlaBoutora <fixed-case>DEGELS</fixed-case>1: A comparable corpus of <fixed-case>F</fixed-case>rench <fixed-case>S</fixed-case>ign <fixed-case>L</fixed-case>anguage and co-speech gestures 2426–2429 @@ -4473,7 +4473,7 @@ MatildeGonzalez MichaelFilhol - ChristopheCollet + ChristopheCollet Semi-Automatic Sign Language Corpora Annotation using Lexical Representations of Signs 2430–2434 http://www.lrec-conf.org/proceedings/lrec2012/pdf/741_Paper.pdf @@ -4490,19 +4490,19 @@ iliev-genov-2012-expanding - AndrejsVasiļjevs + AndrejsVasiļjevs MarkusForsberg TatianaGornostay - Dorte HaltrupHansen + Dorte HaltrupHansen KristínJóhannsdóttir - GunnLyse - KristerLindén + GunnLyse + KristerLindén LeneOffersgaard SussiOlsen - BolettePedersen - EiríkurRögnvaldsson - IngunaSkadiņa - KoenraadDe Smedt + BolettePedersen + EiríkurRögnvaldsson + IngunaSkadiņa + KoenraadDe Smedt VilleOksanen RobertsRozis Creation of an Open Shared Language Resource Repository in the <fixed-case>N</fixed-case>ordic and <fixed-case>B</fixed-case>altic Countries @@ -4526,7 +4526,7 @@ MartinReynaert InekeSchuurman - VéroniqueHoste + VéroniqueHoste NellekeOostdijk Maartenvan Gompel Beyond <fixed-case>S</fixed-case>o<fixed-case>N</fixed-case>a<fixed-case>R</fixed-case>: towards the facilitation of large corpus building efforts @@ -4536,15 +4536,15 @@ reynaert-etal-2012-beyond - FabriceLefèvre - DjamelMostefa - LaurentBesacier + FabriceLefèvre + DjamelMostefa + LaurentBesacier YannickEstève - MatthieuQuignard + MatthieuQuignard NathalieCamelin - BenoitFavre + BenoitFavre BassamJabaian - Lina M.Rojas-Barahona + Lina M.Rojas-Barahona Leveraging study of robustness and portability of spoken language understanding systems across languages and domains: the <fixed-case>PORTMEDIA</fixed-case> corpora 1436–1442 http://www.lrec-conf.org/proceedings/lrec2012/pdf/751_Paper.pdf @@ -4554,7 +4554,7 @@ RahulAgarwal Bharat RamAmbati - Anil KumarSingh + Anil KumarSingh A <fixed-case>GUI</fixed-case> to Detect and Correct Errors in <fixed-case>H</fixed-case>indi Dependency Treebank 1907–1911 http://www.lrec-conf.org/proceedings/lrec2012/pdf/753_Paper.pdf @@ -4562,8 +4562,8 @@ agarwal-etal-2012-gui - JordiAtserias - MariaFuentes + JordiAtserias + MariaFuentes RogelioNazar IreneRenau Spell Checking in <fixed-case>S</fixed-case>panish: The Case of Diacritic Accents @@ -4589,7 +4589,7 @@ LiesbethAugustinus VincentVandeghinste - FrankVan Eynde + FrankVan Eynde Example-Based Treebank Querying 3161–3167 http://www.lrec-conf.org/proceedings/lrec2012/pdf/756_Paper.pdf @@ -4635,10 +4635,10 @@ bott-etal-2012-text - ElisabetComelles - JordiAtserias + ElisabetComelles + JordiAtserias VictoriaArranz - IreneCastellón + IreneCastellón <fixed-case>VERT</fixed-case>a: Linguistic features in <fixed-case>MT</fixed-case> evaluation 3944–3950 http://www.lrec-conf.org/proceedings/lrec2012/pdf/763_Paper.pdf @@ -4649,7 +4649,7 @@ AtroVoutilainen KristiinaMuhonen TanjaPurtonen - KristerLindén + KristerLindén Specifying Treebanks, Outsourcing Parsebanks: <fixed-case>F</fixed-case>inn<fixed-case>T</fixed-case>ree<fixed-case>B</fixed-case>ank 3 1927–1931 http://www.lrec-conf.org/proceedings/lrec2012/pdf/766_Paper.pdf @@ -4668,7 +4668,7 @@ NicolettaCalzolari - RiccardoDel Gratta + RiccardoDel Gratta GilFrancopoulo JosephMariani FrancescoRubino @@ -4681,8 +4681,8 @@ calzolari-etal-2012-lre - CorinaForăscu - DanTufiş + CorinaForăscu + DanTufiş <fixed-case>R</fixed-case>omanian <fixed-case>T</fixed-case>ime<fixed-case>B</fixed-case>ank: An Annotated Parallel Corpus for Temporal Information 3762–3766 http://www.lrec-conf.org/proceedings/lrec2012/pdf/770_Paper.pdf @@ -4690,7 +4690,7 @@ forascu-tufis-2012-romanian - MatteoNegri + MatteoNegri YasharMehdad AlessandroMarchetti DaniloGiampiccolo @@ -4702,8 +4702,8 @@ negri-etal-2012-chinese - Janne BondiJohannessen - JoelPriestley + Janne BondiJohannessen + JoelPriestley KristinHagen AndersNøklestad AndréLynum @@ -4715,10 +4715,10 @@ JonathonRead - DanFlickinger + DanFlickinger RebeccaDridan StephanOepen - LiljaØvrelid + LiljaØvrelid The <fixed-case>W</fixed-case>e<fixed-case>S</fixed-case>earch Corpus, Treebank, and Treecache – A Comprehensive Sample of User-Generated Content 1829–1835 http://www.lrec-conf.org/proceedings/lrec2012/pdf/774_Paper.pdf @@ -4728,7 +4728,7 @@ FrantišekCvrček KarelPala - PavelRychlý + PavelRychlý Legal electronic dictionary for <fixed-case>C</fixed-case>zech 283–287 http://www.lrec-conf.org/proceedings/lrec2012/pdf/775_Paper.pdf @@ -4739,7 +4739,7 @@ ThomasKaspersson ChristianSmith HenrikDanielsson - ArneJönsson + ArneJönsson This also affects the context - Errors in extraction based summaries 173–178 http://www.lrec-conf.org/proceedings/lrec2012/pdf/776_Paper.pdf @@ -4748,7 +4748,7 @@ ClaudiaSoria - NúriaBel + NúriaBel KhalidChoukri JosephMariani MonicaMonachini @@ -4784,7 +4784,7 @@ JörgFrommer BerndMichaelis - DietmarRösner + DietmarRösner AndreasWendemuth RafaelFriesen MatthiasHaase @@ -4802,7 +4802,7 @@ AttilaZséder GáborRecski - DánielVarga + DánielVarga AndrásKornai Rapid creation of large-scale corpora and frequency dictionaries 1462–1465 @@ -4824,7 +4824,7 @@ SafaIsmael StephenGrimes DavidDoermann - StephanieStrassel + StephanieStrassel Linguistic Resources for Handwriting Recognition and Translation Evaluation 3951–3955 http://www.lrec-conf.org/proceedings/lrec2012/pdf/785_Paper.pdf @@ -4841,10 +4841,10 @@ origlia-alfano-2012-prosomarker - HelmerStrik + HelmerStrik JozefColpaert Joostvan Doremalen - CatiaCucchiarini + CatiaCucchiarini The <fixed-case>DISCO</fixed-case> <fixed-case>ASR</fixed-case>-based <fixed-case>CALL</fixed-case> system: practicing <fixed-case>L</fixed-case>2 oral skills and beyond 2702–2707 http://www.lrec-conf.org/proceedings/lrec2012/pdf/787_Paper.pdf @@ -4853,7 +4853,7 @@ UtkuŞirin - RuketÇakıcı + RuketÇakıcı DenizZeyrek <fixed-case>METU</fixed-case> <fixed-case>T</fixed-case>urkish Discourse Bank Browser 2808–2812 @@ -4887,7 +4887,7 @@ quarteroni-etal-2012-evaluating - Maria TeresaPazienza + Maria TeresaPazienza NoemiScarpato ArmandoStellato Application of a Semantic Search Algorithm to Semi-Automatic <fixed-case>GUI</fixed-case> Generation @@ -4897,9 +4897,9 @@ pazienza-etal-2012-application - Vanja MladenKaran + Vanja MladenKaran JanŠnajder - Bojana DalbeloBašić + Bojana DalbeloBašić Evaluation of Classification Algorithms and Features for Collocation Extraction in <fixed-case>C</fixed-case>roatian 657–662 http://www.lrec-conf.org/proceedings/lrec2012/pdf/796_Paper.pdf @@ -4938,7 +4938,7 @@ BenjaminWeitz - UlrichSchäfer + UlrichSchäfer A Graphical Citation Browser for the <fixed-case>ACL</fixed-case> <fixed-case>A</fixed-case>nthology 1718–1722 http://www.lrec-conf.org/proceedings/lrec2012/pdf/805_Paper.pdf @@ -4956,7 +4956,7 @@ wattam-etal-2012-document - AnjaBelz + AnjaBelz AlbertGatt A Repository of Data and Evaluation Resources for Natural Language Generation 4027–4032 @@ -4984,7 +4984,7 @@ ThibaultMondary - AdelineNazarenko + AdelineNazarenko HaïfaZargayouna SabineBarreaux The Quaero Evaluation Initiative on Term Extraction @@ -5005,7 +5005,7 @@ RalfSteinberger - AndreasEisele + AndreasEisele SzymonKlocek SpyridonPilos PatrickSchlüter @@ -5017,7 +5017,7 @@ HebaElfardy - MonaDiab + MonaDiab Simplified guidelines for the creation of Large Scale Dialectal <fixed-case>A</fixed-case>rabic Annotations 371–378 http://www.lrec-conf.org/proceedings/lrec2012/pdf/815_Paper.pdf @@ -5027,8 +5027,8 @@ ChristianFedermann IoannaGiannopoulou - ChristianGirardi - OlivierHamon + ChristianGirardi + OlivierHamon DimitrisMavroeidis SalvatoreMinutoli MarcSchröder @@ -5080,11 +5080,11 @@ IgorOdriozola - EvaNavas - InmaHernaez + EvaNavas + InmaHernaez IñakiSainz IbonSaratxaga - JonSánchez + JonSánchez DanielErro Using an <fixed-case>ASR</fixed-case> database to design a pronunciation evaluation system in <fixed-case>B</fixed-case>asque 4122–4126 @@ -5099,8 +5099,8 @@ AndrzejZuczkowski CinziaBuldorini RicardoPietrobon - AlbertoLavelli - BernardoMagnini + AlbertoLavelli + BernardoMagnini A Corpus of Scientific Biomedical Texts Spanning over 168 Years Annotated for Uncertainty 2009–2014 http://www.lrec-conf.org/proceedings/lrec2012/pdf/823_Paper.pdf @@ -5108,7 +5108,7 @@ bongelli-etal-2012-corpus - DjamelMostefa + DjamelMostefa KhalidChoukri SylvieBrunessaux KarimBoudahmane @@ -5121,10 +5121,10 @@ TingLiu SamiraShaikh - TomekStrzalkowski - AaronBroadwell - JenniferStromer-Galley - SarahTaylor + TomekStrzalkowski + AaronBroadwell + JenniferStromer-Galley + SarahTaylor UmitBoz XiaoaiRen JingsiWu @@ -5138,10 +5138,10 @@ ŞenayKafkas IanLewin DavidMilward - Erikvan Mulligen - JanKors + Erikvan Mulligen + JanKors UdoHahn - DietrichRebholz-Schuhmann + DietrichRebholz-Schuhmann <fixed-case>CALBC</fixed-case>: Releasing the Final Corpora 2923–2926 http://www.lrec-conf.org/proceedings/lrec2012/pdf/827_Paper.pdf @@ -5150,13 +5150,13 @@ JordiAdell - AntonioBonafonte - AntonioCardenal - Marta R.Costa-Jussà - José A. R.Fonollosa + AntonioBonafonte + AntonioCardenal + Marta R.Costa-Jussà + José A. R.Fonollosa AsunciónMoreno - EvaNavas - Eduardo R.Banga + EvaNavas + Eduardo R.Banga <fixed-case>BUCEADOR</fixed-case>, a multi-language search engine for digital libraries 1705–1709 http://www.lrec-conf.org/proceedings/lrec2012/pdf/828_Paper.pdf @@ -5168,7 +5168,7 @@ LaskaLaskova StanislavaKancheva PetyaOsenova - KirilSimov + KirilSimov Linguistic Analysis Processing Line for <fixed-case>B</fixed-case>ulgarian 2959–2964 http://www.lrec-conf.org/proceedings/lrec2012/pdf/829_Paper.pdf @@ -5177,7 +5177,7 @@ JirkaHana - BarboraHladká + BarboraHladká Getting more data – Schoolkids as annotators 4049–4054 http://www.lrec-conf.org/proceedings/lrec2012/pdf/830_Paper.pdf @@ -5204,15 +5204,15 @@ StergosAfantenos - NicholasAsher - FarahBenamara + NicholasAsher + FarahBenamara MyriamBras - CécileFabre - MaiHo-dac + CécileFabre + MaiHo-dac Anne LeDraoulec PhilippeMuller - Marie-PaulePéry-Woodley - LaurentPrévot + Marie-PaulePéry-Woodley + LaurentPrévot JosetteRebeyrolles LudovicTanguy MarianneVergez-Couret @@ -5225,7 +5225,7 @@ AmaliaZahra - JulieCarson-Berndsen + JulieCarson-Berndsen <fixed-case>E</fixed-case>nglish to <fixed-case>I</fixed-case>ndonesian Transliteration to Support <fixed-case>E</fixed-case>nglish Pronunciation Practice 4132–4135 http://www.lrec-conf.org/proceedings/lrec2012/pdf/838_Paper.pdf @@ -5235,8 +5235,8 @@ KataGábor MariannaApidianaki - BenoîtSagot - ÉricVillemonte de La Clergerie + BenoîtSagot + ÉricVillemonte de La Clergerie Boosting the Coverage of a Semantic Lexicon by Automatically Extracted Event Nominalizations 1466–1473 http://www.lrec-conf.org/proceedings/lrec2012/pdf/839_Paper.pdf @@ -5246,7 +5246,7 @@ KhalidChoukri VictoriaArranz - OlivierHamon + OlivierHamon JungyeulPark Using the International Standard Language Resource Number: Practical and Technical Aspects 50–54 @@ -5258,7 +5258,7 @@ ClaireJaja DouglasBriesch JamalLaoudi - ClareVoss + ClareVoss Assessing Divergence Measures for Automated Document Routing in an Adaptive <fixed-case>MT</fixed-case> System 3963–3970 http://www.lrec-conf.org/proceedings/lrec2012/pdf/843_Paper.pdf @@ -5272,7 +5272,7 @@ OscarKoller UweZelle JustusPiater - HermannNey + HermannNey <fixed-case>RWTH</fixed-case>-<fixed-case>PHOENIX</fixed-case>-Weather: A Large Vocabulary Sign Language Recognition and Translation Corpus 3785–3789 http://www.lrec-conf.org/proceedings/lrec2012/pdf/844_Paper.pdf @@ -5282,9 +5282,9 @@ RoldanoCattoni FrancescoCorcoglioniti - ChristianGirardi - BernardoMagnini - LucianoSerafini + ChristianGirardi + BernardoMagnini + LucianoSerafini RobertoZanoli The <fixed-case>K</fixed-case>nowledge<fixed-case>S</fixed-case>tore: an Entity-Based Storage System 3639–3646 @@ -5313,7 +5313,7 @@ Young-MinKim - PatriceBellot + PatriceBellot ElodieFaath MarinDacos Annotated Bibliographical Reference Corpora in Digital Humanities @@ -5325,7 +5325,7 @@ MarieTahon AgnesDelaborde - LaurenceDevillers + LaurenceDevillers Corpus of Children Voices for Mid-level Markers and Affect Bursts Analysis 2366–2369 http://www.lrec-conf.org/proceedings/lrec2012/pdf/853_Paper.pdf @@ -5334,7 +5334,7 @@ SouhirGahbiche-Braham - HélèneBonneau-Maynard + HélèneBonneau-Maynard ThomasLavergne FrançoisYvon Joint Segmentation and <fixed-case>POS</fixed-case> Tagging for <fixed-case>A</fixed-case>rabic Using a <fixed-case>CRF</fixed-case>-based Classifier @@ -5362,7 +5362,7 @@ nakagawa-den-2012-annotation - YoshihikoHayashi + YoshihikoHayashi ChiharuNarawa Classifying Standard Linguistic Processing Functionalities based on Fundamental Data Operation Types 1169–1173 @@ -5372,10 +5372,10 @@ ÉvaSzékely - Joao PauloCabral + Joao PauloCabral MohamedAbou-Zleikha PeterCahill - JulieCarson-Berndsen + JulieCarson-Berndsen Evaluating expressive speech synthesis from audiobook corpora for conversational phrases 3335–3339 http://www.lrec-conf.org/proceedings/lrec2012/pdf/864_Paper.pdf @@ -5383,12 +5383,12 @@ szekely-etal-2012-evaluating - Juan MaríaGarrido + Juan MaríaGarrido YesikaLaplaza MontseMarquina AndreaPearman José GregorioEscalada - Miguel ÁngelRodríguez + Miguel ÁngelRodríguez AnaArmenta The <fixed-case>I</fixed-case>3<fixed-case>MEDIA</fixed-case> speech database: a trilingual annotated corpus for the analysis and synthesis of emotional speech 1197–1202 @@ -5397,7 +5397,7 @@ garrido-etal-2012-i3media - DavidElson + DavidElson <fixed-case>D</fixed-case>rama<fixed-case>B</fixed-case>ank: Annotating Agency in Narrative Discourse 2813–2819 http://www.lrec-conf.org/proceedings/lrec2012/pdf/866_Paper.pdf @@ -5406,7 +5406,7 @@ AlessioBosca - LucaDini + LucaDini MilenKouylekov MarcoTrevisan <fixed-case>L</fixed-case>inguagrid: a network of Linguistic and Semantic Services for the <fixed-case>I</fixed-case>talian Language. @@ -5427,7 +5427,7 @@ VictoriaArranz - OlivierHamon + OlivierHamon On the Way to a Legal Sharing of Web Applications in <fixed-case>NLP</fixed-case> 2965–2970 http://www.lrec-conf.org/proceedings/lrec2012/pdf/872_Paper.pdf @@ -5446,10 +5446,10 @@ DavidDoukhan - SophieRosset + SophieRosset AlbertRilliard Christophed’Alessandro - MartineAdda-Decker + MartineAdda-Decker Designing <fixed-case>F</fixed-case>rench Tale Corpora for Entertaining Text To Speech Synthesis 1003–1010 http://www.lrec-conf.org/proceedings/lrec2012/pdf/876_Paper.pdf @@ -5458,7 +5458,7 @@ HelenAristar-Dry - SebastianDrude + SebastianDrude MenzoWindhouwer JostGippert IrinaNevskaya @@ -5469,7 +5469,7 @@ aristar-dry-etal-2012-rendering - RonaldoMartins + RonaldoMartins Le Petit Prince in <fixed-case>UNL</fixed-case> 3201–3204 http://www.lrec-conf.org/proceedings/lrec2012/pdf/879_Paper.pdf @@ -5478,8 +5478,8 @@ AndreaVarga - DanielPreoţiuc-Pietro - FabioCiravegna + DanielPreoţiuc-Pietro + FabioCiravegna Unsupervised document zone identification using probabilistic graphical models 1610–1617 http://www.lrec-conf.org/proceedings/lrec2012/pdf/881_Paper.pdf @@ -5487,9 +5487,9 @@ varga-etal-2012-unsupervised - MariaFuentes - HoracioRodríguez - JordiTurmo + MariaFuentes + HoracioRodríguez + JordiTurmo Summarizing a multimodal set of documents in a Smart Room 2553–2558 http://www.lrec-conf.org/proceedings/lrec2012/pdf/882_Paper.pdf @@ -5498,9 +5498,9 @@ Gerardde Melo - Collin F.Baker - NancyIde - Rebecca J.Passonneau + Collin F.Baker + NancyIde + Rebecca J.Passonneau ChristianeFellbaum Empirical Comparisons of <fixed-case>MASC</fixed-case> Word Sense Annotations 3036–3043 @@ -5509,10 +5509,10 @@ de-melo-etal-2012-empirical - StephanieStrassel + StephanieStrassel AmandaMorris - JonathanFiscus - ChristopherCaruso + JonathanFiscus + ChristopherCaruso HaejoongLee PaulOver JamesFiumara @@ -5528,7 +5528,7 @@ DhouhaBouamor NasredineSemmar - PierreZweigenbaum + PierreZweigenbaum Identifying bilingual Multi-Word Expressions for Statistical Machine Translation 674–679 http://www.lrec-conf.org/proceedings/lrec2012/pdf/886_Paper.pdf @@ -5558,10 +5558,10 @@ rapp-etal-2012-identifying - SebastianDrude - DaanBroeder + SebastianDrude + DaanBroeder PaulTrilsbeek - PeterWittenburg + PeterWittenburg The Language Archive — a new hub for language resources 3264–3267 http://www.lrec-conf.org/proceedings/lrec2012/pdf/891_Paper.pdf @@ -5580,7 +5580,7 @@ khademian-etal-2012-holistic - NataliaLoukachevitch + NataliaLoukachevitch Automatic Term Recognition Needs Multiple Evidence 2401–2407 http://www.lrec-conf.org/proceedings/lrec2012/pdf/893_Paper.pdf @@ -5590,8 +5590,8 @@ SudheerKolachina RashmiPrasad - Dipti MisraSharma - AravindJoshi + Dipti MisraSharma + AravindJoshi Evaluation of Discourse Relation Annotation in the <fixed-case>H</fixed-case>indi Discourse Relation Bank 823–828 http://www.lrec-conf.org/proceedings/lrec2012/pdf/894_Paper.pdf @@ -5608,8 +5608,8 @@ IrinaTemnikova - ConstantinOrasan - RuslanMitkov + ConstantinOrasan + RuslanMitkov <fixed-case>CLCM</fixed-case> - A Linguistic Resource for Effective Simplification of Instructions in the Crisis Management Domain and its Evaluations 3007–3014 http://www.lrec-conf.org/proceedings/lrec2012/pdf/898_Paper.pdf @@ -5638,7 +5638,7 @@ Mohammad HosseinElahimanesh - BehrouzMinaei + BehrouzMinaei HosseinMalekinezhad Improving K-Nearest Neighbor Efficacy for <fixed-case>F</fixed-case>arsi Text Classification 1618–1621 @@ -5658,7 +5658,7 @@ ToshinobuOgiso MamoruKomachi YasuharuDen - YujiMatsumoto + YujiMatsumoto <fixed-case>U</fixed-case>ni<fixed-case>D</fixed-case>ic for Early Middle <fixed-case>J</fixed-case>apanese: a Dictionary for Morphological Analysis of Classical <fixed-case>J</fixed-case>apanese 911–915 http://www.lrec-conf.org/proceedings/lrec2012/pdf/906_Paper.pdf @@ -5678,7 +5678,7 @@ EgoitzLaparra - GermanRigau + GermanRigau PiekVossen Mapping <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et to the <fixed-case>K</fixed-case>yoto ontology 2584–2589 @@ -5698,7 +5698,7 @@ MariaEskevich - Gareth J.F.Jones + Gareth J.F.Jones MarthaLarson RoelandOrdelman Creating a Data Collection for Evaluating Rich Speech Retrieval @@ -5719,7 +5719,7 @@ ChristianChiarcos SebastianHellmann SebastianNordhoff - StevenMoran + StevenMoran RichardLittauer JudithEckle-Kohler IrynaGurevych @@ -5734,8 +5734,8 @@ MehdiManshadi - JamesAllen - MarySwift + JamesAllen + MarySwift An Annotation Scheme for Quantifier Scope Disambiguation 1546–1553 http://www.lrec-conf.org/proceedings/lrec2012/pdf/914_Paper.pdf @@ -5752,8 +5752,8 @@ EleftheriaAhtaridis - ChristopherCieri - DeniseDiPersio + ChristopherCieri + DeniseDiPersio <fixed-case>LDC</fixed-case> Language Resource Database: Building a Bibliographic Database 1723–1728 http://www.lrec-conf.org/proceedings/lrec2012/pdf/916_Paper.pdf @@ -5770,7 +5770,7 @@ giannoulis-potamianos-2012-hierarchical - Anil KumarSingh + Anil KumarSingh A Concise Query Language with Search and Transform Operations for Corpora with Multiple Levels of Annotation 1490–1497 http://www.lrec-conf.org/proceedings/lrec2012/pdf/919_Paper.pdf @@ -5790,7 +5790,7 @@ GideonKotzé VincentVandeghinste ScottMartens - JörgTiedemann + JörgTiedemann Large aligned treebanks for syntax-based machine translation 467–473 http://www.lrec-conf.org/proceedings/lrec2012/pdf/924_Paper.pdf @@ -5798,19 +5798,19 @@ kotze-etal-2012-large - IngunaSkadiņa + IngunaSkadiņa AhmetAker NikosMastropavlos FangzhongSu - DanTufis - MatejaVerlic - AndrejsVasiļjevs + DanTufis + MatejaVerlic + AndrejsVasiļjevs BogdanBabych - PaulClough - RobertGaizauskas + PaulClough + RobertGaizauskas NikosGlaros - Monica LestariParamita - MārcisPinnis + Monica LestariParamita + MārcisPinnis Collecting and Using Comparable Corpora for Statistical Machine Translation 438–445 http://www.lrec-conf.org/proceedings/lrec2012/pdf/925_Paper.pdf @@ -5819,7 +5819,7 @@ MaciejPiasecki - RadoslawRamocki + RadoslawRamocki MarekMaziarz Recognition of <fixed-case>P</fixed-case>olish Derivational Relations Based on Supervised Learning Scheme 916–922 @@ -5828,7 +5828,7 @@ piasecki-etal-2012-recognition - SílviaMoraes + SílviaMoraes VeraLima Combining Formal Concept Analysis and semantic information for building ontological structures from texts : an exploratory study 3653–3660 @@ -5854,7 +5854,7 @@ RoserMorante FrederikVaassen Jannekevan de Loo - WalterDaelemans + WalterDaelemans The Netlog Corpus. A Resource for the Study of <fixed-case>F</fixed-case>lemish <fixed-case>D</fixed-case>utch <fixed-case>I</fixed-case>nternet Language 1569–1572 http://www.lrec-conf.org/proceedings/lrec2012/pdf/938_Paper.pdf @@ -5863,8 +5863,8 @@ IskandarKeskes - FarahBenamara - Lamia HadrichBelguith + FarahBenamara + Lamia HadrichBelguith Clause-based Discourse Segmentation of <fixed-case>A</fixed-case>rabic Texts 2826–2832 http://www.lrec-conf.org/proceedings/lrec2012/pdf/939_Paper.pdf @@ -5874,7 +5874,7 @@ YuichirohMatsubayashi YusukeMiyao - AkikoAizawa + AkikoAizawa Building <fixed-case>J</fixed-case>apanese Predicate-argument Structure Corpus using Lexical Conceptual Structure 1554–1558 http://www.lrec-conf.org/proceedings/lrec2012/pdf/941_Paper.pdf @@ -5892,7 +5892,7 @@ OlgaUryupina - MassimoPoesio + MassimoPoesio Domain-specific vs. Uniform Modeling for Coreference Resolution 187–191 http://www.lrec-conf.org/proceedings/lrec2012/pdf/944_Paper.pdf @@ -5900,7 +5900,7 @@ uryupina-poesio-2012-domain - AlexandraBalahur + AlexandraBalahur Jesús M.Hermida Extending the <fixed-case>E</fixed-case>moti<fixed-case>N</fixed-case>et Knowledge Base to Improve the Automatic Detection of Implicitly Expressed Emotions from Text 1207–1214 @@ -5912,7 +5912,7 @@ ElsaTolone StavroulaVoyatzi ClaudeMartineau - MatthieuConstant + MatthieuConstant Extending the adverbial coverage of a <fixed-case>F</fixed-case>rench morphological lexicon 2856–2862 http://www.lrec-conf.org/proceedings/lrec2012/pdf/946_Paper.pdf @@ -5930,7 +5930,7 @@ cristea-etal-2012-reconstructing - MārcisPinnis + MārcisPinnis <fixed-case>L</fixed-case>atvian and <fixed-case>L</fixed-case>ithuanian Named Entity Recognition with <fixed-case>T</fixed-case>ilde<fixed-case>NER</fixed-case> 1258–1265 http://www.lrec-conf.org/proceedings/lrec2012/pdf/948_Paper.pdf @@ -5956,7 +5956,7 @@ PetyaOsenova - KirilSimov + KirilSimov The Political Speech Corpus of <fixed-case>B</fixed-case>ulgarian 1744–1747 http://www.lrec-conf.org/proceedings/lrec2012/pdf/956_Paper.pdf @@ -5965,7 +5965,7 @@ EricKow - AnjaBelz + AnjaBelz <fixed-case>LG</fixed-case>-Eval: A Toolkit for Creating Online Language Evaluation Experiments 4033–4037 http://www.lrec-conf.org/proceedings/lrec2012/pdf/957_Paper.pdf @@ -6015,9 +6015,9 @@ PauloFernandes LuceleneLopes - Carlos A.Prolo + Carlos A.Prolo AfonsoSales - RenataVieira + RenataVieira A Fast, Memory Efficient, Scalable and Multilingual Dictionary Retriever 2520–2524 http://www.lrec-conf.org/proceedings/lrec2012/pdf/966_Paper.pdf @@ -6026,7 +6026,7 @@ AndréSantos - José JoãoAlmeida + José JoãoAlmeida NunoCarvalho Structural alignment of plain text books 2069–2074 @@ -6036,7 +6036,7 @@ SenizDemir - İlknur DurgarEl-Kahlout + İlknur DurgarEl-Kahlout ErdemUnal HamzaKaya <fixed-case>T</fixed-case>urkish Paraphrase Corpus @@ -6046,8 +6046,8 @@ demir-etal-2012-turkish - Keith J.Miller - Elizabeth SchroederRicherson + Keith J.Miller + Elizabeth SchroederRicherson SarahMcLeod JamesFinley AaronSchein @@ -6060,7 +6060,7 @@ RyanGeorgi FeiXia - WilliamLewis + WilliamLewis Measuring the Divergence of Dependency Structures Cross-Linguistically to Improve Syntactic Projection Algorithms 771–778 http://www.lrec-conf.org/proceedings/lrec2012/pdf/971_Paper.pdf @@ -6077,7 +6077,7 @@ song-xia-2012-using - Joao PauloCabral + Joao PauloCabral MarkKane ZeeshanAhmed MohamedAbou-Zleikha @@ -6085,7 +6085,7 @@ AmaliaZahra KaluOgbureke PeterCahill - JulieCarson-Berndsen + JulieCarson-Berndsen StephanSchlögl Rapidly Testing the Interaction Model of a Pronunciation Training System via <fixed-case>W</fixed-case>izard-of-<fixed-case>O</fixed-case>z 4136–4142 @@ -6094,8 +6094,8 @@ cabral-etal-2012-rapidly - PēterisPaikens - NormundsGrūzītis + PēterisPaikens + NormundsGrūzītis An implementation of a <fixed-case>L</fixed-case>atvian resource grammar in Grammatical Framework 1680–1685 http://www.lrec-conf.org/proceedings/lrec2012/pdf/976_Paper.pdf @@ -6116,7 +6116,7 @@ SilkeScheible Richard J.Whitt MartinDurrell - PaulBennett + PaulBennett <fixed-case>GATE</fixed-case>to<fixed-case>G</fixed-case>er<fixed-case>M</fixed-case>an<fixed-case>C</fixed-case>: A <fixed-case>GATE</fixed-case>-based Annotation Pipeline for Historical <fixed-case>G</fixed-case>erman 3611–3617 http://www.lrec-conf.org/proceedings/lrec2012/pdf/978_Paper.pdf @@ -6124,9 +6124,9 @@ scheible-etal-2012-gatetogermanc - JoãoSilva - LuísaCoheur - ÂngelaCosta + JoãoSilva + LuísaCoheur + ÂngelaCosta IsabelTrancoso Dealing with unknown words in statistical machine translation 3911–3981 @@ -6135,7 +6135,7 @@ silva-etal-2012-dealing - EricCharton + EricCharton MichelGagnon A disambiguation resource extracted from <fixed-case>W</fixed-case>ikipedia for semantic annotation 3665–3671 @@ -6155,7 +6155,7 @@ AlessandroLenci - SimonettaMontemagni + SimonettaMontemagni GiuliaVenturi Maria GraziaCutrullà Enriching the <fixed-case>ISST</fixed-case>-<fixed-case>TANL</fixed-case> Corpus with Semantic Frames @@ -6167,7 +6167,7 @@ KengoOhta MasatoshiTsuchiya - SeiichiNakagawa + SeiichiNakagawa Developing Partially-Transcribed Speech Corpus from Edited Transcriptions 3399–3404 http://www.lrec-conf.org/proceedings/lrec2012/pdf/987_Paper.pdf @@ -6206,8 +6206,8 @@ ChristianFedermann EleftheriosAvramidis - Marta R.Costa-jussà - Josefvan Genabith + Marta R.Costa-jussà + Josefvan Genabith MaiteMelero PavelPecina The <fixed-case>ML</fixed-case>4<fixed-case>HMT</fixed-case> Workshop on Optimising the Division of Labour in Hybrid Machine Translation @@ -6217,17 +6217,17 @@ federmann-etal-2012-ml4hmt - MariaGavrilidou - PennyLabropoulou - ElinaDesipri + MariaGavrilidou + PennyLabropoulou + ElinaDesipri SteliosPiperidis - HarisPapageorgiou + HarisPapageorgiou MonicaMonachini FrancescaFrontini ThierryDeclerck GilFrancopoulo VictoriaArranz - ValerieMapelli + ValerieMapelli The <fixed-case>META</fixed-case>-<fixed-case>SHARE</fixed-case> Metadata Schema for the Description of Language Resources 1090–1097 http://www.lrec-conf.org/proceedings/lrec2012/pdf/998_Paper.pdf @@ -6255,7 +6255,7 @@ MonicaGavrila - Waltherv. Hahn + Waltherv. Hahn CristinaVertan Same domain different discourse style - A case study on Language Resources for data-driven Machine Translation 3441–3446 @@ -6266,8 +6266,8 @@ VinodkumarPrabhakaran HuzaifaNeralwala - OwenRambow - MonaDiab + OwenRambow + MonaDiab Annotations for Power Relations on Email Threads 806–811 http://www.lrec-conf.org/proceedings/lrec2012/pdf/1006_Paper.pdf @@ -6275,11 +6275,11 @@ prabhakaran-etal-2012-annotations - William J.Corvey + William J.Corvey SudhaVerma - SarahVieweg - MarthaPalmer - James H.Martin + SarahVieweg + MarthaPalmer + James H.Martin Foundations of a Multilayer Annotation Framework for <fixed-case>T</fixed-case>witter Communications During Crisis Events http://www.lrec-conf.org/proceedings/lrec2012/pdf/1008_Paper.pdf In times of mass emergency, vast amounts of data are generated via computer-mediated communication (CMC) that are difficult to manually collect and organize into a coherent picture. Yet valuable information is broadcast, and can provide useful insight into time- and safety-critical situations if captured and analyzed efficiently and effectively. We describe a natural language processing component of the EPIC (Empowering the Public with Information in Crisis) Project infrastructure, designed to extract linguistic and behavioral information from tweet text to aid in the task of information integration. The system incorporates linguistic annotation, in the form of Named Entity Tagging, as well as behavioral annotations to capture tweets contributing to situational awareness and analyze the information type of the tweet content. We show classification results and describe future integration of these classifiers in the larger EPIC infrastructure. @@ -6298,7 +6298,7 @@ StefanScherer GeorgLayher - JohnKane + JohnKane HeikoNeumann NickCampbell An audiovisual political speech analysis incorporating eye-tracking and perception data @@ -6308,7 +6308,7 @@ scherer-etal-2012-audiovisual - Chris IrwinDavis + Chris IrwinDavis <fixed-case>T</fixed-case>ajik-<fixed-case>F</fixed-case>arsi <fixed-case>P</fixed-case>ersian Transliteration Using Statistical Machine Translation 3988–3995 http://www.lrec-conf.org/proceedings/lrec2012/pdf/1012_Paper.pdf @@ -6328,7 +6328,7 @@ AnnaRumshisky NickBotchan SophieKushkuley - JamesPustejovsky + JamesPustejovsky Word Sense Inventories by Non-Experts. 4055–4059 http://www.lrec-conf.org/proceedings/lrec2012/pdf/1014_Paper.pdf @@ -6351,7 +6351,7 @@ DanielCapurro FeiXia LucyVanderwende - MelihaYetisgen-Yildiz + MelihaYetisgen-Yildiz Statistical Section Segmentation in Free-Text Clinical Records 2001–2008 http://www.lrec-conf.org/proceedings/lrec2012/pdf/1016_Paper.pdf @@ -6386,10 +6386,10 @@ grezka-poudat-2012-building - EnekoAgirre + EnekoAgirre AnderBarrena - Oier Lopezde Lacalle - AitorSoroa + Oier Lopezde Lacalle + AitorSoroa SamuelFernando MarkStevenson Matching Cultural Heritage items to <fixed-case>W</fixed-case>ikipedia @@ -6401,7 +6401,7 @@ JohnVogel MarcVerhagen - JamesPustejovsky + JamesPustejovsky <fixed-case>ATLIS</fixed-case>: Identifying Locational Information in Text Automatically 612–616 http://www.lrec-conf.org/proceedings/lrec2012/pdf/1022_Paper.pdf @@ -6474,8 +6474,8 @@ ferreira-etal-2012-common - GuidoBoella - Luigidi Caro + GuidoBoella + Luigidi Caro LlioHumphreys LivioRobaldo Leonvan der Torre @@ -6501,7 +6501,7 @@ DanielBauer HagenFürstenau - OwenRambow + OwenRambow The Dependency-Parsed <fixed-case>F</fixed-case>rame<fixed-case>N</fixed-case>et Corpus 3861–3867 http://www.lrec-conf.org/proceedings/lrec2012/pdf/1037_Paper.pdf @@ -6509,7 +6509,7 @@ bauer-etal-2012-dependency - MichaelRosner + MichaelRosner AlbertGatt AndrewAttard JanJoachimsen @@ -6521,7 +6521,7 @@ Emília GarciaCasademont - AntonioBonafonte + AntonioBonafonte AsunciónMoreno Building Synthetic Voices in the <fixed-case>META</fixed-case>-<fixed-case>NET</fixed-case> Framework 3322–3326 @@ -6533,7 +6533,7 @@ HidetsuguNanba ToshiyukiTakezawa KiyokoUchiyama - AkikoAizawa + AkikoAizawa Automatic Translation of Scholarly Terms into Patent Terms Using Synonym Extraction Techniques 3447–3451 http://www.lrec-conf.org/proceedings/lrec2012/pdf/1043_Paper.pdf @@ -6542,7 +6542,7 @@ MarcoDinarelli - SophieRosset + SophieRosset Tree-Structured Named Entity Recognition on <fixed-case>OCR</fixed-case> Data: Analysis, Processing and Results 1266–1272 http://www.lrec-conf.org/proceedings/lrec2012/pdf/1046_Paper.pdf @@ -6552,7 +6552,7 @@ JanPomikálek MilošJakubíček - PavelRychlý + PavelRychlý Building a 70 billion word corpus of <fixed-case>E</fixed-case>nglish from <fixed-case>C</fixed-case>lue<fixed-case>W</fixed-case>eb 502–506 http://www.lrec-conf.org/proceedings/lrec2012/pdf/1047_Paper.pdf @@ -6595,7 +6595,7 @@ popescu-2012-buildind - WilliamBlack + WilliamBlack RobProcter StevenGray SophiaAnaniadou @@ -6607,7 +6607,7 @@ MuhammadAbdul-Mageed - MonaDiab + MonaDiab <fixed-case>AWATIF</fixed-case>: A Multi-Genre Corpus for <fixed-case>M</fixed-case>odern <fixed-case>S</fixed-case>tandard <fixed-case>A</fixed-case>rabic Subjectivity and Sentiment Analysis 3907–3914 http://www.lrec-conf.org/proceedings/lrec2012/pdf/1057_Paper.pdf @@ -6617,7 +6617,7 @@ SunaoHara NorihideKitaoka - KazuyaTakeda + KazuyaTakeda Causal analysis of task completion errors in spoken music retrieval interactions 1365–1372 http://www.lrec-conf.org/proceedings/lrec2012/pdf/1059_Paper.pdf @@ -6626,7 +6626,7 @@ KrešimirŠojat - Nives MikelićPreradović + Nives MikelićPreradović MarkoTadić Generation of Verbal Stems in Derivationally Rich Language 928–933 @@ -6639,7 +6639,7 @@ RogerGranada BrenoMeneghetti LeonardoCarvalho - RenataVieira + RenataVieira <fixed-case>C</fixed-case>orpus+<fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et thesaurus generation for ontology enriching 3463–3467 http://www.lrec-conf.org/proceedings/lrec2012/pdf/1062_Paper.pdf @@ -6674,7 +6674,7 @@ EmmaBarker - RobertGaizauskas + RobertGaizauskas Assessing the Comparability of News Texts 3996–4003 http://www.lrec-conf.org/proceedings/lrec2012/pdf/1069_Paper.pdf @@ -6683,7 +6683,7 @@ Nur-HanaSamsudin - MarkLee + MarkLee Building Text-to-Speech Systems for Resource Poor Languages 3327–3334 http://www.lrec-conf.org/proceedings/lrec2012/pdf/1070_Paper.pdf @@ -6710,7 +6710,7 @@ JenniferWilliams - GrahamKatz + GrahamKatz A New <fixed-case>T</fixed-case>witter Verb Lexicon for Natural Language Processing 293–298 http://www.lrec-conf.org/proceedings/lrec2012/pdf/1076_Paper.pdf @@ -6718,9 +6718,9 @@ williams-katz-2012-new - JonathanWashington + JonathanWashington MirlanIpasov - FrancisTyers + FrancisTyers A finite-state morphological transducer for <fixed-case>K</fixed-case>yrgyz 934–940 http://www.lrec-conf.org/proceedings/lrec2012/pdf/1077_Paper.pdf @@ -6728,9 +6728,9 @@ washington-etal-2012-finite - MarilynWalker - Jean FoxTree - PranavAnand + MarilynWalker + Jean FoxTree + PranavAnand RobAbbott JosephKing A Corpus for Research on Deliberation and Debate @@ -6752,7 +6752,7 @@ VerónicaPérez-Rosas CarmenBanea - RadaMihalcea + RadaMihalcea Learning Sentiment Lexicons in <fixed-case>S</fixed-case>panish 3077–3081 http://www.lrec-conf.org/proceedings/lrec2012/pdf/1081_Paper.pdf @@ -6761,7 +6761,7 @@ ErwinFernandez-Ordoñez - RadaMihalcea + RadaMihalcea SamerHassan Unsupervised Word Sense Disambiguation with Multilingual Representations 847–851 @@ -6789,7 +6789,7 @@ KirkRoberts TravisGoodwin - Sanda M.Harabagiu + Sanda M.Harabagiu Annotating Spatial Containment Relations Between Events 3052–3059 http://www.lrec-conf.org/proceedings/lrec2012/pdf/1091_Paper.pdf @@ -6799,7 +6799,7 @@ JacobAndreas SaraRosenthal - KathleenMcKeown + KathleenMcKeown Annotating Agreement and Disagreement in Threaded Discussion 818–822 http://www.lrec-conf.org/proceedings/lrec2012/pdf/1095_Paper.pdf @@ -6832,7 +6832,7 @@ janssen-2012-neotag - HarryBunt + HarryBunt MichaelKipp VolhaPetukhova Using <fixed-case>D</fixed-case>i<fixed-case>AML</fixed-case> and <fixed-case>ANVIL</fixed-case> for multimodal dialogue annotations @@ -6860,7 +6860,7 @@ caselli-etal-2012-assigning - MarilynWalker + MarilynWalker GraceLin JenniferSawyer An Annotated Corpus of Film Dialogue for Learning and Characterizing Character Style @@ -6878,10 +6878,10 @@ bigi-2012-sppas-tool - ChristopherCieri + ChristopherCieri MarianReed - DeniseDiPersio - MarkLiberman + DeniseDiPersio + MarkLiberman Twenty Years of Language Resource Development and Distribution: A Progress Report on <fixed-case>LDC</fixed-case> Activities 60–65 http://www.lrec-conf.org/proceedings/lrec2012/pdf/1117_Paper.pdf @@ -6898,12 +6898,12 @@ boruta-jastrzebska-2012-phonemic - SebastianStüker + SebastianStüker FlorianKraft ChristianMohr TeresaHerrmann EunahCho - AlexWaibel + AlexWaibel The <fixed-case>KIT</fixed-case> Lecture Corpus for Speech Translation 3409–3414 http://www.lrec-conf.org/proceedings/lrec2012/pdf/1121_Paper.pdf @@ -6921,8 +6921,8 @@ bigi-etal-2012-orthographic - JamesPustejovsky - JessicaMoszkowicz + JamesPustejovsky + JessicaMoszkowicz The Role of Model Testing in Standards Development: The Case of <fixed-case>ISO</fixed-case>-Space 3060–3063 http://www.lrec-conf.org/proceedings/lrec2012/pdf/1123_Paper.pdf @@ -6930,7 +6930,7 @@ pustejovsky-moszkowicz-2012-role - BenoîtSagot + BenoîtSagot RosaStern Aleda, a free large-scale entity database for <fixed-case>F</fixed-case>rench 1273–1276 @@ -6940,9 +6940,9 @@ MarcelloFederico - SebastianStüker + SebastianStüker LuisaBentivogli - MichaelPaul + MichaelPaul MauroCettolo TeresaHerrmann JanNiehues @@ -6954,7 +6954,7 @@ federico-etal-2012-iwslt - BenoîtSagot + BenoîtSagot DarjaFišer Cleaning noisy wordnets 3468–3472 @@ -6971,10 +6971,10 @@ hernandez-2012-tackling - DjaméSeddah - MarieCandito - BenoitCrabbé - Enrique HenestrozaAnguiano + DjaméSeddah + MarieCandito + BenoitCrabbé + Enrique HenestrozaAnguiano Ubiquitous Usage of a Broad Coverage <fixed-case>F</fixed-case>rench Corpus: Processing the <fixed-case>E</fixed-case>st <fixed-case>R</fixed-case>epublicain corpus 3249–3254 http://www.lrec-conf.org/proceedings/lrec2012/pdf/1130_Paper.pdf @@ -6983,7 +6983,7 @@ ValérieHanoka - BenoîtSagot + BenoîtSagot <fixed-case>W</fixed-case>ordnet extension made simple: A multilingual lexicon-based approach using wiki resources 3473–3478 http://www.lrec-conf.org/proceedings/lrec2012/pdf/1131_Paper.pdf @@ -6991,7 +6991,7 @@ hanoka-sagot-2012-wordnet - ShyamAgrawal + ShyamAgrawal ShwetaSinha PoojaSingh JesperOlson diff --git a/data/xml/L14.xml b/data/xml/L14.xml index 2628a59806..d3194272b6 100644 --- a/data/xml/L14.xml +++ b/data/xml/L14.xml @@ -3,15 +3,15 @@ Proceedings of the Ninth International Conference on Language Resources and Evaluation (LREC'14) - NicolettaCalzolari - KhalidChoukri + NicolettaCalzolari + KhalidChoukri ThierryDeclerck HrafnLoftsson - BenteMaegaard - JosephMariani - AsuncionMoreno - JanOdijk - SteliosPiperidis + BenteMaegaard + JosephMariani + AsuncionMoreno + JanOdijk + SteliosPiperidis European Language Resources Association (ELRA)
Reykjavik, Iceland
May @@ -23,7 +23,7 @@ BenVerhoeven - WalterDaelemans + WalterDaelemans <fixed-case>CL</fixed-case>i<fixed-case>PS</fixed-case> Stylometry Investigation (<fixed-case>CSI</fixed-case>) corpus: A <fixed-case>D</fixed-case>utch corpus for the detection of age, gender, personality, sentiment and deception in text 3081–3085 http://www.lrec-conf.org/proceedings/lrec2014/pdf/1_Paper.pdf @@ -59,7 +59,7 @@ claveau-kijak-2014-generating - KirilSimov + KirilSimov IlianaSimova GinkaIvanova MariaMateva @@ -81,7 +81,7 @@ solorio-etal-2014-sockpuppet - AndreBlessing + AndreBlessing JonasKuhn Textual Emigration Analysis (<fixed-case>TEA</fixed-case>) 2089–2093 @@ -102,7 +102,7 @@ AmelFraisse - PatrickParoubek + PatrickParoubek Toward a unifying model for Opinion, Sentiment and Emotion information extraction 3881–3886 http://www.lrec-conf.org/proceedings/lrec2014/pdf/1010_Paper.pdf @@ -111,8 +111,8 @@ ThorstenTrippel - DaanBroeder - MatejDurco + DaanBroeder + MatejDurco OddrunOhren Towards automatic quality assessment of component metadata 3851–3856 @@ -121,11 +121,11 @@ trippel-etal-2014-towards - ClaireBonial + ClaireBonial JuliaBonn KathrynConger Jena D.Hwang - MarthaPalmer + MarthaPalmer <fixed-case>P</fixed-case>rop<fixed-case>B</fixed-case>ank: Semantics of New Predicate Types 3013–3019 http://www.lrec-conf.org/proceedings/lrec2014/pdf/1012_Paper.pdf @@ -163,10 +163,10 @@ kobylinski-2014-polita - SiddharthJain + SiddharthJain ArchnaBhatia AngeliqueRein - EduardHovy + EduardHovy A Corpus of Participant Roles in Contentious Discussions 1751–1756 http://www.lrec-conf.org/proceedings/lrec2014/pdf/1019_Paper.pdf @@ -187,12 +187,12 @@ VeraCabarrão HelenaMoniz FernandoBatista - RicardoRibeiro - NunoMamede + RicardoRibeiro + NunoMamede HugoMeinedo IsabelTrancoso Ana IsabelMata - David Martinsde Matos + David Martinsde Matos Revising the annotation of a Broadcast News corpus: a linguistic approach 3908–3913 http://www.lrec-conf.org/proceedings/lrec2014/pdf/1020_Paper.pdf @@ -211,7 +211,7 @@ takala-etal-2014-gold - DamirCavar + DamirCavar MalgorzataCavar Visualization of Language Relations and Families: <fixed-case>M</fixed-case>ulti<fixed-case>T</fixed-case>ree 698–701 @@ -222,7 +222,7 @@ GoranGlavaš JanŠnajder - Marie-FrancineMoens + Marie-FrancineMoens ParisaKordjamshidi <fixed-case>H</fixed-case>i<fixed-case>E</fixed-case>ve: A Corpus for Extracting Event Hierarchies from News Stories 3678–3683 @@ -231,7 +231,7 @@ glavas-etal-2014-hieve - JamesPustejovsky + JamesPustejovsky ZacharyYocum Image Annotation with <fixed-case>ISO</fixed-case>-Space: Distinguishing Content from Structure 426–431 @@ -241,10 +241,10 @@ OlivierGalibert - JeremyLeixa - GillesAdda + JeremyLeixa + GillesAdda KhalidChoukri - GuillaumeGravier + GuillaumeGravier The <fixed-case>ETAPE</fixed-case> speech processing evaluation 3995–3999 http://www.lrec-conf.org/proceedings/lrec2014/pdf/1027_Paper.pdf @@ -262,7 +262,7 @@ kamholz-etal-2014-panlex - DanTufiş + DanTufiş Large <fixed-case>SMT</fixed-case> data-sets extracted from <fixed-case>W</fixed-case>ikipedia 656–663 http://www.lrec-conf.org/proceedings/lrec2014/pdf/103_Paper.pdf @@ -270,7 +270,7 @@ tufis-2014-large - Valeriade Paiva + Valeriade Paiva LivyReal AlexandreRademaker Gerardde Melo @@ -281,10 +281,10 @@ de-paiva-etal-2014-nomlex - ElisabetComelles - JordiAtserias + ElisabetComelles + JordiAtserias VictoriaArranz - IreneCastellón + IreneCastellón JordiSesé <fixed-case>VERT</fixed-case>a: Facing a Multilingual Experience of a Linguistically-based <fixed-case>MT</fixed-case> Evaluation 2701–2707 @@ -294,7 +294,7 @@ YifanHe - AdamMeyers + AdamMeyers Corpus and Method for Identifying Citations in Non-Academic Text 4316–4319 http://www.lrec-conf.org/proceedings/lrec2014/pdf/1036_Paper.pdf @@ -304,7 +304,7 @@ VanessaLoza ShibamouliLahiri - RadaMihalcea + RadaMihalcea Po-HsiangLai Building a Dataset for Summarization and Keyword Extraction from Emails 2441–2446 @@ -322,7 +322,7 @@ schmidek-barbosa-2014-improving - JuanSoler Company + JuanSoler Company LeoWanner How to Use less Features and Reach Better Performance in Author Gender Identification 1315–1319 @@ -350,7 +350,7 @@ SándorSzeverényi ZsuzsaVárnai PaulTrilsbeek - TamásVáradi + TamásVáradi Languagesindanger.eu - Including Multimedia Language Resources to disseminate Knowledge and Create Educational Material on less-Resourced Languages 530–535 http://www.lrec-conf.org/proceedings/lrec2014/pdf/1046_Paper.pdf @@ -368,7 +368,7 @@ andreeva-etal-2014-cross - BenoîtSagot + BenoîtSagot <fixed-case>D</fixed-case>e<fixed-case>L</fixed-case>ex, a freely-avaible, large-scale and linguistically grounded morphological lexicon for <fixed-case>G</fixed-case>erman 2778–2784 http://www.lrec-conf.org/proceedings/lrec2014/pdf/105_Paper.pdf @@ -377,7 +377,7 @@ PeterBaumann - JanetPierrehumbert + JanetPierrehumbert Using Resource-Rich Languages to Improve Morphological Analysis of Under-Resourced Languages 3355–3359 http://www.lrec-conf.org/proceedings/lrec2014/pdf/1051_Paper.pdf @@ -388,7 +388,7 @@ ClaraBacciu Angelica LoDuca AndreaMarchetti - MaurizioTesconi + MaurizioTesconi Accommodations in Tuscany as Linked Data 3542–3545 http://www.lrec-conf.org/proceedings/lrec2014/pdf/1052_Paper.pdf @@ -399,7 +399,7 @@ PrzemyslawLenkiewicz OlhaShkaravska TwanGoosen - DaanBroeder + DaanBroeder MenzoWindhouwer StephanieRoth OlofOlsson @@ -430,7 +430,7 @@ lonsdale-christensen-2014-combining - PavelSmrz + PavelSmrz JanKouril Semantic Search in Documents Enriched by <fixed-case>LOD</fixed-case>-based Annotations 3724–3727 @@ -440,7 +440,7 @@ ManuelFiorelli - Maria TeresaPazienza + Maria TeresaPazienza ArmandoStellato A Meta-data Driven Platform for Semi-automatic Configuration of Ontology Mediators 4178–4183 @@ -450,7 +450,7 @@ HuijingDeng - GrzegorzChrupała + GrzegorzChrupała Semantic approaches to software component retrieval with <fixed-case>E</fixed-case>nglish queries 3248–3252 http://www.lrec-conf.org/proceedings/lrec2014/pdf/106_Paper.pdf @@ -476,13 +476,13 @@ wolff-etal-2014-missed - Marie-Catherinede Marneffe + Marie-Catherinede Marneffe TimothyDozat NataliaSilveira KatriHaverinen FilipGinter JoakimNivre - Christopher D.Manning + Christopher D.Manning Universal <fixed-case>S</fixed-case>tanford dependencies: A cross-linguistic typology 4585–4592 http://www.lrec-conf.org/proceedings/lrec2014/pdf/1062_Paper.pdf @@ -491,10 +491,10 @@ ReidSwanson - StephanieLukin + StephanieLukin LukeEisenberg ThomasCorcoran - MarilynWalker + MarilynWalker Getting Reliable Annotations for Sarcasm in Online Dialogues 4250–4257 http://www.lrec-conf.org/proceedings/lrec2014/pdf/1063_Paper.pdf @@ -505,7 +505,7 @@ JohannesHellrich SimonClematide UdoHahn - DietrichRebholz-Schuhmann + DietrichRebholz-Schuhmann Collaboratively Annotating Multilingual Parallel Corpora in the Biomedical Domain—some <fixed-case>MANTRA</fixed-case>s 4033–4040 http://www.lrec-conf.org/proceedings/lrec2014/pdf/1064_Paper.pdf @@ -516,7 +516,7 @@ HeeyoungLee MihaiSurdeanu BillMacCartney - DanJurafsky + DanJurafsky On the Importance of Text Analysis for Stock Price Prediction 1170–1175 http://www.lrec-conf.org/proceedings/lrec2014/pdf/1065_Paper.pdf @@ -537,7 +537,7 @@ AntonioSan Martín - Marie-ClaudeL’Homme + Marie-ClaudeL’Homme Definition patterns for predicative terms in specialized lexical resources 3748–3755 http://www.lrec-conf.org/proceedings/lrec2014/pdf/1067_Paper.pdf @@ -580,9 +580,9 @@ YuriBizzoni FedericoBoschetti HarryDiakoff - RiccardoDel Gratta + RiccardoDel Gratta MonicaMonachini - GregoryCrane + GregoryCrane The Making of <fixed-case>A</fixed-case>ncient <fixed-case>G</fixed-case>reek <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et 1140–1147 http://www.lrec-conf.org/proceedings/lrec2014/pdf/1071_Paper.pdf @@ -591,10 +591,10 @@ FeiXia - WilliamLewis - Michael WayneGoodman + WilliamLewis + Michael WayneGoodman JoshuaCrowgey - Emily M.Bender + Emily M.Bender Enriching <fixed-case>ODIN</fixed-case> 3151–3157 http://www.lrec-conf.org/proceedings/lrec2014/pdf/1072_Paper.pdf @@ -602,7 +602,7 @@ xia-etal-2014-enriching - ÖzlemÇetinoğlu + ÖzlemÇetinoğlu <fixed-case>T</fixed-case>urkish Treebank as a Gold Standard for Morphological Disambiguation and Its Influence on Parsing 3360–3365 http://www.lrec-conf.org/proceedings/lrec2014/pdf/1073_Paper.pdf @@ -638,7 +638,7 @@ DanaDannélls - NormundsGruzitis + NormundsGruzitis Extracting a bilingual semantic grammar from <fixed-case>F</fixed-case>rame<fixed-case>N</fixed-case>et-annotated corpora 2466–2473 http://www.lrec-conf.org/proceedings/lrec2014/pdf/1079_Paper.pdf @@ -677,7 +677,7 @@ VictoriaRosén PetterHaugereid MarthaThunes - Gyri S.Losnegaard + Gyri S.Losnegaard HelgeDyvik The Interplay Between Lexical and Syntactic Resources in Incremental Parsebanking 1617–1624 @@ -689,7 +689,7 @@ RafalRak JacobCarter AndrewRowley - Riza TheresaBatista-Navarro + Riza TheresaBatista-Navarro SophiaAnaniadou Interoperability and Customisation of Annotation Schemata in Argo 3837–3842 @@ -710,11 +710,11 @@ NataliaSilveira TimothyDozat - Marie-Catherinede Marneffe - SamuelBowman + Marie-Catherinede Marneffe + SamuelBowman MiriamConnor JohnBauer - ChrisManning + ChrisManning A Gold Standard Dependency Corpus for <fixed-case>E</fixed-case>nglish 2897–2904 http://www.lrec-conf.org/proceedings/lrec2014/pdf/1089_Paper.pdf @@ -731,7 +731,7 @@ SimonScerri - Behrang Q.Zadeh + Behrang Q.Zadeh MaciejDabrowski IsmaelRivera Extracting Information for Context-aware Meeting Preparation @@ -742,8 +742,8 @@ KaiHong - JohnConroy - BenoitFavre + JohnConroy + BenoitFavre AlexKulesza HuiLin AniNenkova @@ -755,7 +755,7 @@ ZhiyiSong - StephanieStrassel + StephanieStrassel HaejoongLee KevinWalker JonathanWright @@ -777,7 +777,7 @@ VivianeMoreira AlineVillavicencio CarlosRamisch - Maria JoséFinatto + Maria JoséFinatto Comparing the Quality of Focused Crawlers and of the Translation Resources Obtained from them 3572–3578 http://www.lrec-conf.org/proceedings/lrec2014/pdf/1095_Paper.pdf @@ -790,7 +790,7 @@ DirkHovy ArchnaBhatia ManaalFaruqui - ChrisDyer + ChrisDyer Augmenting <fixed-case>E</fixed-case>nglish Adjective Senses with Supersenses 4359–4365 http://www.lrec-conf.org/proceedings/lrec2014/pdf/1096_Paper.pdf @@ -810,7 +810,7 @@ Timvor der Brück AlexanderMehler - ZahurulIslam + ZahurulIslam <fixed-case>C</fixed-case>ol<fixed-case>L</fixed-case>ex.en: Automatically Generating and Evaluating a Full-form Lexicon for <fixed-case>E</fixed-case>nglish 3756–3760 http://www.lrec-conf.org/proceedings/lrec2014/pdf/1099_Paper.pdf @@ -850,7 +850,7 @@ AnthonyRousseau - PaulDeléglise + PaulDeléglise YannickEstève Enhancing the <fixed-case>TED</fixed-case>-<fixed-case>LIUM</fixed-case> Corpus with Selected Data for Language Modeling and More <fixed-case>TED</fixed-case> Talks 3935–3939 @@ -872,7 +872,7 @@ DarjaFišer AlešTavčar - TomažErjavec + TomažErjavec slo<fixed-case>WC</fixed-case>rowd: A crowdsourcing tool for lexicographic tasks 3471–3475 http://www.lrec-conf.org/proceedings/lrec2014/pdf/1106_Paper.pdf @@ -891,9 +891,9 @@ ThomasFrançois - NùriaGala + NùriaGala PatrickWatrin - CédrickFairon + CédrickFairon <fixed-case>FLEL</fixed-case>ex: a graded lexical resource for <fixed-case>F</fixed-case>rench foreign learners 3766–3773 http://www.lrec-conf.org/proceedings/lrec2014/pdf/1108_Paper.pdf @@ -901,12 +901,12 @@ francois-etal-2014-flelex - LucasHilgert + LucasHilgert LuceleneLopes ArturFreitas - RenataVieira + RenataVieira DeniseHogetop - AlineVanin + AlineVanin Building Domain Specific Bilingual Dictionaries 2772–2777 http://www.lrec-conf.org/proceedings/lrec2014/pdf/1112_Paper.pdf @@ -915,7 +915,7 @@ GuillaumeWisniewski - NatalieKübler + NatalieKübler FrançoisYvon A Corpus of Machine Translation Errors Extracted from Translation Students Exercises 3585–3588 @@ -924,7 +924,7 @@ wisniewski-etal-2014-corpus - ClareVoss + ClareVoss StephenTratz JamalLaoudi DouglasBriesch @@ -957,9 +957,9 @@ DavidGraff KevinWalker - StephanieStrassel + StephanieStrassel XiaoyiMa - KarenJones + KarenJones AnnSawyer The <fixed-case>RATS</fixed-case> Collection: Supporting <fixed-case>HLT</fixed-case> Research with Degraded Audio Data 1970–1977 @@ -968,8 +968,8 @@ graff-etal-2014-rats - ChrisHokamp - RadaMihalcea + ChrisHokamp + RadaMihalcea PeterSchuelke Modeling Language Proficiency Using Implicit Feedback 3983–3986 @@ -981,8 +981,8 @@ KevinReschke MartinJankowiak MihaiSurdeanu - ChristopherManning - DanielJurafsky + ChristopherManning + DanielJurafsky Event Extraction Using Distant Supervision 4527–4531 http://www.lrec-conf.org/proceedings/lrec2014/pdf/1127_Paper.pdf @@ -1009,8 +1009,8 @@ SanderWubben - Antalvan den Bosch - EmielKrahmer + Antalvan den Bosch + EmielKrahmer Creating and using large monolingual parallel corpora for sentential paraphrase generation 4292–4299 http://www.lrec-conf.org/proceedings/lrec2014/pdf/1135_Paper.pdf @@ -1020,7 +1020,7 @@ Jayendra RakeshYeka PrasanthKolachina - Dipti MisraSharma + Dipti MisraSharma Benchmarking of <fixed-case>E</fixed-case>nglish-<fixed-case>H</fixed-case>indi parallel corpora 1812–1818 http://www.lrec-conf.org/proceedings/lrec2014/pdf/1137_Paper.pdf @@ -1050,7 +1050,7 @@ NathanGreen - Septina DianLarasati + Septina DianLarasati Votter Corpus: A Corpus of Social Polling Language 3693–3697 http://www.lrec-conf.org/proceedings/lrec2014/pdf/1143_Paper.pdf @@ -1067,7 +1067,7 @@ chen-ng-2014-sinocoreferencer - MohamedMaamouri + MohamedMaamouri AnnBies SethKulick MichaelCiul @@ -1088,8 +1088,8 @@ scheffler-2014-german - HelenHastie - AnjaBelz + HelenHastie + AnjaBelz A Comparative Evaluation Methodology for <fixed-case>NLG</fixed-case> in Interactive Systems 4004–4011 http://www.lrec-conf.org/proceedings/lrec2014/pdf/1147_Paper.pdf @@ -1105,7 +1105,7 @@ ohara-2014-relating - Hans-UlrichKrieger + Hans-UlrichKrieger ThierryDeclerck <fixed-case>TMO</fixed-case> — The Federated Ontology of the <fixed-case>T</fixed-case>rend<fixed-case>M</fixed-case>iner Project 4164–4171 @@ -1114,7 +1114,7 @@ krieger-declerck-2014-tmo - Gemma BelEnguix + Gemma BelEnguix ReinhardRapp MichaelZock A Graph-Based Approach for Computing Free Word Associations @@ -1135,7 +1135,7 @@ PaulFelt RobbieHaertel - EricRingger + EricRingger KevinSeppi <fixed-case>M</fixed-case>omresp: A <fixed-case>B</fixed-case>ayesian Model for Multi-Annotator Document Labeling 3704–3711 @@ -1145,7 +1145,7 @@ JessicaOuyang - KathyMcKeown + KathyMcKeown Towards Automatic Detection of Narrative Structure 4624–4631 http://www.lrec-conf.org/proceedings/lrec2014/pdf/1154_Paper.pdf @@ -1155,7 +1155,7 @@ AnabelaBarreiro FernandoBatista - RicardoRibeiro + RicardoRibeiro HelenaMoniz IsabelTrancoso <fixed-case>O</fixed-case>pen<fixed-case>L</fixed-case>ogos Semantico-Syntactic Knowledge-Rich Bilingual Dictionaries @@ -1175,7 +1175,7 @@ ellendorff-etal-2014-using - RacheleSprugnoli + RacheleSprugnoli AlessandroLenci Crowdsourcing for the identification of event nominals: an experiment 1949–1955 @@ -1213,7 +1213,7 @@ sennrich-kunz-2014-zmorge - MonaDiab + MonaDiab MohamedAl-Badrashiny MaryamAminian MohammedAttia @@ -1250,7 +1250,7 @@ HeatherPon-Barry - StuartShieber + StuartShieber NicholasLongenbaugh Eliciting and Annotating Uncertainty in Spoken Language 1978–1983 @@ -1268,7 +1268,7 @@ gleize-grau-2014-hierarchical - MichaelRosner + MichaelRosner KurtSultana Automatic Methods for the Extension of a Bilingual Dictionary using Comparable Corpora 3790–3797 @@ -1295,7 +1295,7 @@ DanielaBraga HyongsilCho AmadeuFerreira - MiguelDias + MiguelDias Casa de la Lhéngua: a set of language resources and natural language processing tools for <fixed-case>M</fixed-case>irandese 536–540 http://www.lrec-conf.org/proceedings/lrec2014/pdf/1174_Paper.pdf @@ -1317,7 +1317,7 @@ DorotheeBeermann ToreBruland Mary Esther KroppDakubu - MontserratMarimon + MontserratMarimon <fixed-case>M</fixed-case>ulti<fixed-case>V</fixed-case>al - towards a multilingual valence lexicon 2478–2485 http://www.lrec-conf.org/proceedings/lrec2014/pdf/1179_Paper.pdf @@ -1338,10 +1338,10 @@ vacher-etal-2014-sweet - DavidLewis + DavidLewis RobBrennan LeroyFinn - DominicJones + DominicJones AlanMeehan DeclanO’Sullivan SebastianHellmann @@ -1353,8 +1353,8 @@ lewis-etal-2014-global - LiviuDinu - Alina MariaCiobanu + LiviuDinu + Alina MariaCiobanu On the <fixed-case>R</fixed-case>omance Languages Mutual Intelligibility 3313–3318 http://www.lrec-conf.org/proceedings/lrec2014/pdf/1183_Paper.pdf @@ -1363,8 +1363,8 @@ AncaDinu - LiviuDinu - IonutSorodoc + LiviuDinu + IonutSorodoc Aggregation methods for efficient collocation detection 4041–4045 http://www.lrec-conf.org/proceedings/lrec2014/pdf/1184_Paper.pdf @@ -1384,8 +1384,8 @@ JurisBorzovs IlzeIlziņa IvetaKeiša - MārcisPinnis - AndrejsVasiļjevs + MārcisPinnis + AndrejsVasiļjevs Terminology localization guidelines for the national scenario 4012–4017 http://www.lrec-conf.org/proceedings/lrec2014/pdf/1189_Paper.pdf @@ -1395,7 +1395,7 @@ ClaireBrierley MajdiSawalha - EricAtwell + EricAtwell Tools for <fixed-case>A</fixed-case>rabic Natural Language Processing: a case study in qalqalah prosody 283–287 http://www.lrec-conf.org/proceedings/lrec2014/pdf/119_Paper.pdf @@ -1406,7 +1406,7 @@ Ana IsabelMata HelenaMoniz FernandoBatista - JuliaHirschberg + JuliaHirschberg Teenage and adult speech in school context: building and processing a corpus of <fixed-case>E</fixed-case>uropean <fixed-case>P</fixed-case>ortuguese 3914–3919 http://www.lrec-conf.org/proceedings/lrec2014/pdf/1193_Paper.pdf @@ -1416,9 +1416,9 @@ ArchnaBhatia MandySimons - LoriLevin + LoriLevin YuliaTsvetkov - ChrisDyer + ChrisDyer JordanBender A Unified Annotation Scheme for the Semantic/Pragmatic Components of Definiteness 910–916 @@ -1457,7 +1457,7 @@ ShikunZhang WangLing - ChrisDyer + ChrisDyer Dual Subtitles as Parallel Corpora 1869–1874 http://www.lrec-conf.org/proceedings/lrec2014/pdf/1199_Paper.pdf @@ -1475,8 +1475,8 @@ GuiyaoKe - Pierre-FrancoisMarteau - GildasMenier + Pierre-FrancoisMarteau + GildasMenier Variations on quantitative comparability measures and their evaluations on synthetic <fixed-case>F</fixed-case>rench-<fixed-case>E</fixed-case>nglish comparable corpora 133–139 http://www.lrec-conf.org/proceedings/lrec2014/pdf/120_Paper.pdf @@ -1484,8 +1484,8 @@ ke-etal-2014-variations - LiviuDinu - Alina MariaCiobanu + LiviuDinu + Alina MariaCiobanu IoanaChitoran VladNiculae Using a machine learning model to assess the complexity of stress systems @@ -1512,7 +1512,7 @@ KevinBlack - EricRingger + EricRingger PaulFelt KevinSeppi KristianHeal @@ -1524,9 +1524,9 @@ black-etal-2014-evaluating - JonathanWashington + JonathanWashington IlnarSalimzyanov - FrancisTyers + FrancisTyers Finite-state morphological transducers for three Kypchak languages 3378–3385 http://www.lrec-conf.org/proceedings/lrec2014/pdf/1207_Paper.pdf @@ -1561,7 +1561,7 @@ schultz-schlippe-2014-globalphone - AlexandruCeausu + AlexandruCeausu SabineHunsicker Pre-ordering of phrase-based machine translation input in translation workflow 3589–3592 @@ -1571,7 +1571,7 @@ EmanueleDi Buccio - Giorgio MariaDi Nunzio + Giorgio MariaDi Nunzio GianmariaSilvello A Vector Space Model for Syntactic Distances Between Dialects 2486–2489 @@ -1602,7 +1602,7 @@ SpandanaGella CarloStrapparava - ViviNastase + ViviNastase Mapping <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et Domains, <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et Topics and <fixed-case>W</fixed-case>ikipedia Categories to Generate Multilingual Domain Specific Resources 1117–1121 http://www.lrec-conf.org/proceedings/lrec2014/pdf/122_Paper.pdf @@ -1620,7 +1620,7 @@ lee-etal-2014-annotating - Shyam SundarAgrawal + Shyam SundarAgrawal Abhimanue ShwetaBansal MinakshiMahajan @@ -1631,11 +1631,11 @@ agrawal-etal-2014-statistical - ChristopherCieri - DeniseDiPersio - MarkLiberman + ChristopherCieri + DeniseDiPersio + MarkLiberman AndreaMazzucchi - StephanieStrassel + StephanieStrassel JonathanWright New Directions for Language Resource Development and Distribution 1539–1546 @@ -1645,9 +1645,9 @@ JosephMariani - PatrickParoubek + PatrickParoubek GilFrancopoulo - OlivierHamon + OlivierHamon Rediscovering 15 Years of Discoveries in Language Resources and Evaluation: The <fixed-case>LREC</fixed-case> Anthology Analysis http://www.lrec-conf.org/proceedings/lrec2014/pdf/1228_Paper.pdf This paper aims at analyzing the content of the LREC conferences contained in the ELRA Anthology over the past 15 years (1998-2013). It follows similar exercises that have been conducted, such as the survey on the IEEE ICASSP conference series from 1976 to 1990, which served in the launching of the ESCA Eurospeech conference, a survey of the Association of Computational Linguistics (ACL) over 50 years of existence, which was presented at the ACL conference in 2012, or a survey over the 25 years (1987-2012) of the conferences contained in the ISCA Archive, presented at Interspeech 2013. It contains first an analysis of the evolution of the number of papers and authors over time, including the study of their gender, nationality and affiliation, and of the collaboration among authors. It then studies the funding sources of the research investigations that are reported in the papers. It conducts an analysis of the evolution of the research topics within the community over time. It finally looks at reuse and plagiarism in the papers. The survey shows the present trends in the conference series and in the Language Resources and Evaluation scientific community. Conducting this survey also demonstrated the importance of a clear and unique identification of authors, papers and other sources to facilitate the analysis. This survey is preliminary, as many other aspects also deserve attention. But we hope it will help better understanding and forging our community in the global village. @@ -1683,7 +1683,7 @@ angelov-2014-bootstrapping - MathieuMangeot + MathieuMangeot <fixed-case>M</fixed-case>otà<fixed-case>M</fixed-case>ot project: conversion of a <fixed-case>F</fixed-case>rench-<fixed-case>K</fixed-case>hmer published dictionary for building a multilingual lexical system 1024–1031 http://www.lrec-conf.org/proceedings/lrec2014/pdf/128_Paper.pdf @@ -1692,10 +1692,10 @@ SérgioCurto - Ana C.Mendes + Ana C.Mendes PedroCurto - LuísaCoheur - ÂngelaCosta + LuísaCoheur + ÂngelaCosta <fixed-case>JUST</fixed-case>.<fixed-case>ASK</fixed-case>, a <fixed-case>QA</fixed-case> system that learns to answer new questions from previous interactions 2603–2607 http://www.lrec-conf.org/proceedings/lrec2014/pdf/130_Paper.pdf @@ -1715,7 +1715,7 @@ MirjamErnestus LucieKočková-Amortová - PetrPollak + PetrPollak The Nijmegen Corpus of Casual <fixed-case>C</fixed-case>zech 365–370 http://www.lrec-conf.org/proceedings/lrec2014/pdf/134_Paper.pdf @@ -1741,7 +1741,7 @@ gruszczynski-ogrodniczuk-2014-digital - KristiinaJokinen + KristiinaJokinen Open-domain Interaction and Online Content in the <fixed-case>S</fixed-case>ami Language 517–522 http://www.lrec-conf.org/proceedings/lrec2014/pdf/143_Paper.pdf @@ -1767,7 +1767,7 @@ PaulFelt - EricRingger + EricRingger KevinSeppi KristianHeal Using Transfer Learning to Assist Exploratory Corpus Annotation @@ -1778,13 +1778,13 @@ JudithMuzerelle - AnaïsLefeuvre + AnaïsLefeuvre EmmanuelSchang Jean-YvesAntoine AurorePelletier DenisMaurel - IrisEshkol - JeanneVillaneau + IrisEshkol + JeanneVillaneau <fixed-case>ANCOR</fixed-case>_<fixed-case>C</fixed-case>entre, a large free spoken <fixed-case>F</fixed-case>rench coreference corpus: description of the resource and reliability measures 843–847 http://www.lrec-conf.org/proceedings/lrec2014/pdf/150_Paper.pdf @@ -1792,7 +1792,7 @@ muzerelle-etal-2014-ancor - AlexRudnick + AlexRudnick TaylorSkidmore AlbertoSamaniego MichaelGasser @@ -1803,7 +1803,7 @@ rudnick-etal-2014-guampa - DaanBroeder + DaanBroeder InekeSchuurman MenzoWindhouwer Experiences with the <fixed-case>ISO</fixed-case>cat Data Category Registry @@ -1832,7 +1832,7 @@ iida-tokunaga-2014-building - MatejĎurčo + MatejĎurčo MenzoWindhouwer The <fixed-case>CMD</fixed-case> Cloud 687–690 @@ -1881,7 +1881,7 @@ HiroakiShimizu GrahamNeubig SakrianiSakti - TomokiToda + TomokiToda SatoshiNakamura Collection of a Simultaneous Translation Corpus for Comparative Analysis 670–673 @@ -1903,7 +1903,7 @@ VolhaPetukhova AndreiMalchanau - HarryBunt + HarryBunt Interoperability of Dialogue Corpora through <fixed-case>ISO</fixed-case> 24617-2-based Querying 4407–4414 http://www.lrec-conf.org/proceedings/lrec2014/pdf/165_Paper.pdf @@ -1911,11 +1911,11 @@ petukhova-etal-2014-interoperability - CatiaCucchiarini + CatiaCucchiarini SteveBodnar Bart Penningde Vries Roelandvan Hout - HelmerStrik + HelmerStrik <fixed-case>ASR</fixed-case>-based <fixed-case>CALL</fixed-case> systems and learner speech data: new resources and opportunities for research and development in second language learning 2708–2714 http://www.lrec-conf.org/proceedings/lrec2014/pdf/168_Paper.pdf @@ -1952,8 +1952,8 @@ schmidt-2014-database - LiviuDinu - Alina MariaCiobanu + LiviuDinu + Alina MariaCiobanu Building a Dataset of Multilingual Cognates for the <fixed-case>R</fixed-case>omanian Lexicon 1038–1043 http://www.lrec-conf.org/proceedings/lrec2014/pdf/175_Paper.pdf @@ -1963,7 +1963,7 @@ GiuseppeRizzo Mariekevan Erp - RaphaëlTroncy + RaphaëlTroncy Benchmarking the Extraction and Disambiguation of Named Entities on the Semantic Web 4593–4600 http://www.lrec-conf.org/proceedings/lrec2014/pdf/176_Paper.pdf @@ -1973,17 +1973,17 @@ TingLiu KitCho - G. AaronBroadwell + G. AaronBroadwell SamiraShaikh - TomekStrzalkowski + TomekStrzalkowski JohnLien - SarahTaylor - LaurieFeldman + SarahTaylor + LaurieFeldman BorisYamrom - NickWebb + NickWebb UmitBoz IgnacioCases - Ching-shengLin + Ching-shengLin Automatic Expansion of the <fixed-case>MRC</fixed-case> Psycholinguistic Database Imageability Ratings 2800–2805 http://www.lrec-conf.org/proceedings/lrec2014/pdf/178_Paper.pdf @@ -1991,9 +1991,9 @@ liu-etal-2014-automatic-expansion - Riyaz AhmadBhat + Riyaz AhmadBhat Shahid MushtaqBhat - Dipti MisraSharma + Dipti MisraSharma Towards building a <fixed-case>K</fixed-case>ashmiri Treebank: Setting up the Annotation Pipeline 748–752 http://www.lrec-conf.org/proceedings/lrec2014/pdf/18_Paper.pdf @@ -2002,7 +2002,7 @@ OlgaUryupina - BarbaraPlank + BarbaraPlank AliakseiSeveryn AgataRotondi AlessandroMoschitti @@ -2024,9 +2024,9 @@ Arantzadel Pozo CarloAliprandi - AitorÁlvarez + AitorÁlvarez CarlosMendes - Joao P.Neto + Joao P.Neto SérgioPaulo NicolaPiccinini MatteoRaffaelli @@ -2056,9 +2056,9 @@ darwish-gao-2014-simple - Miguel B.Almeida + Miguel B.Almeida Mariana S. C.Almeida - André F. T.Martins + André F. T.Martins HelenaFigueira PedroMendes CláudiaPinto @@ -2090,7 +2090,7 @@ MohamedMorchid - GeorgesLinarès + GeorgesLinarès RichardDufour Characterizing and Predicting Bursty Events: The Buzz Case Study on <fixed-case>T</fixed-case>witter 2766–2771 @@ -2099,12 +2099,12 @@ morchid-etal-2014-characterizing - Hans-UlrichKrieger + Hans-UlrichKrieger ChristianSpurk HansUszkoreit FeiyuXu YiZhang - FrankMüller + FrankMüller ThomasTolxdorff Information Extraction from <fixed-case>G</fixed-case>erman Patient Records via Hybrid Parsing and Relation Extraction Strategies 2043–2048 @@ -2115,7 +2115,7 @@ DietmarSchabus MichaelPucher - PhilHoole + PhilHoole The <fixed-case>MMASCS</fixed-case> multi-modal annotated synchronous corpus of audio, video, facial motion and tongue motion data of normal, fast and slow speech 3411–3416 http://www.lrec-conf.org/proceedings/lrec2014/pdf/192_Paper.pdf @@ -2135,7 +2135,7 @@ JorisPelemans KrisDemuynck - HugoVan hamme + HugoVan hamme PatrickWambacq Speech Recognition Web Services for <fixed-case>D</fixed-case>utch 3041–3044 @@ -2144,9 +2144,9 @@ pelemans-etal-2014-speech - AngelaCosta + AngelaCosta TiagoLuís - LuísaCoheur + LuísaCoheur Translation errors from <fixed-case>E</fixed-case>nglish to <fixed-case>P</fixed-case>ortuguese: an annotated corpus 1231–1234 http://www.lrec-conf.org/proceedings/lrec2014/pdf/199_Paper.pdf @@ -2183,7 +2183,7 @@ SharidLoáiciga ThomasMeyer - AndreiPopescu-Belis + AndreiPopescu-Belis <fixed-case>E</fixed-case>nglish-<fixed-case>F</fixed-case>rench Verb Phrase Alignment in <fixed-case>E</fixed-case>uroparl for Tense Translation Modeling 674–681 http://www.lrec-conf.org/proceedings/lrec2014/pdf/205_Paper.pdf @@ -2192,7 +2192,7 @@ NellekeOostdijk - Henkvan den Heuvel + Henkvan den Heuvel The evolving infrastructure for language resources and the role for data scientists 608–612 http://www.lrec-conf.org/proceedings/lrec2014/pdf/206_Paper.pdf @@ -2254,7 +2254,7 @@ IngridHove IbrahimAlmajai VolkerDellwo - StevenMoran + StevenMoran A Crowdsourcing Smartphone Application for <fixed-case>S</fixed-case>wiss <fixed-case>G</fixed-case>erman: Putting Language Documentation in the Hands of the Users 3444–3447 http://www.lrec-conf.org/proceedings/lrec2014/pdf/214_Paper.pdf @@ -2263,8 +2263,8 @@ StevenBethard - PhilipOgren - LeeBecker + PhilipOgren + LeeBecker <fixed-case>C</fixed-case>lear<fixed-case>TK</fixed-case> 2.0: Design Patterns for Machine Learning in <fixed-case>UIMA</fixed-case> 3289–3293 http://www.lrec-conf.org/proceedings/lrec2014/pdf/218_Paper.pdf @@ -2275,8 +2275,8 @@ InèsZribi RahmaBoujelbane AbirMasmoudi - MariemEllouze - LamiaBelguith + MariemEllouze + LamiaBelguith NizarHabash A Conventional Orthography for <fixed-case>T</fixed-case>unisian <fixed-case>A</fixed-case>rabic 2355–2361 @@ -2313,12 +2313,12 @@ BlancaArias - NúriaBel + NúriaBel MercèLorente - MontserratMarimón + MontserratMarimón AlbaMilà JorgeVivaldi - MuntsaPadró + MuntsaPadró MarinaFomicheva ImanolLarrea Boosting the creation of a treebank @@ -2358,7 +2358,7 @@ WolfgangMaier MiriamKaeshammer PeterBaumann - SandraKübler + SandraKübler Discosuite - A parser test suite for <fixed-case>G</fixed-case>erman discontinuous structures 2905–2912 http://www.lrec-conf.org/proceedings/lrec2014/pdf/230_Paper.pdf @@ -2377,8 +2377,8 @@ GregorTitze VolhaBryl - CäciliaZirn - Simone PaoloPonzetto + CäciliaZirn + Simone PaoloPonzetto <fixed-case>DB</fixed-case>pedia Domains: augmenting <fixed-case>DB</fixed-case>pedia with domain information 1438–1442 http://www.lrec-conf.org/proceedings/lrec2014/pdf/233_Paper.pdf @@ -2412,7 +2412,7 @@ stein-2014-parsing - EckhardBick + EckhardBick <fixed-case>ML</fixed-case>-Optimization of Ported Constraint Grammars 4483–4487 http://www.lrec-conf.org/proceedings/lrec2014/pdf/24_Paper.pdf @@ -2421,17 +2421,17 @@ SamiraShaikh - TomekStrzalkowski + TomekStrzalkowski TingLiu - George AaronBroadwell + George AaronBroadwell BorisYamrom - SarahTaylor - LaurieFeldman + SarahTaylor + LaurieFeldman KitCho UmitBoz IgnacioCases YuliyaPeshkova - Ching-ShengLin + Ching-ShengLin A Multi-Cultural Repository of Automatically Discovered Linguistic and Conceptual Metaphors 2495–2500 http://www.lrec-conf.org/proceedings/lrec2014/pdf/241_Paper.pdf @@ -2450,8 +2450,8 @@ LianetSepúlveda Torres - Magali SanchesDuran - SandraAluísio + Magali SanchesDuran + SandraAluísio Generating a Lexicon of Errors in <fixed-case>P</fixed-case>ortuguese to Support an Error Identification System for <fixed-case>S</fixed-case>panish Native Learners 3952–3957 http://www.lrec-conf.org/proceedings/lrec2014/pdf/247_Paper.pdf @@ -2506,7 +2506,7 @@ MarieKopřivová HanaGoláňová - PetraKlimešová + PetraKlimešová DavidLukeš Mapping Diatopic and Diachronic Variation in Spoken <fixed-case>C</fixed-case>zech: The <fixed-case>ORTOFON</fixed-case> and <fixed-case>DIALEKT</fixed-case> Corpora 376–382 @@ -2540,7 +2540,7 @@ KatrinHein RémiLavalley LudwigLinhuber - SebastianStüker + SebastianStüker A Database of Freely Written Texts of <fixed-case>G</fixed-case>erman School Students for the Purpose of Automatic Spelling Error Classification 1212–1217 http://www.lrec-conf.org/proceedings/lrec2014/pdf/255_Paper.pdf @@ -2548,7 +2548,7 @@ berkling-etal-2014-database - ChristianHaenig + ChristianHaenig AndreasNiekler CarstenWuensch <fixed-case>PACE</fixed-case> Corpus: a multilingual corpus of Polarity-annotated textual data from the domains Automotive and <fixed-case>CE</fixed-case>llphone @@ -2560,10 +2560,10 @@ VeronikaVincze ViktorVarga - Katalin IlonaSimkó + Katalin IlonaSimkó JánosZsibrita ÁgostonNagy - RichárdFarkas + RichárdFarkas JánosCsirik <fixed-case>S</fixed-case>zeged Corpus 2.5: Morphological Modifications in a Manually <fixed-case>POS</fixed-case>-tagged <fixed-case>H</fixed-case>ungarian Corpus 1074–1078 @@ -2573,7 +2573,7 @@ Pierre AndréMénard - CarolineBarrière + CarolineBarrière Linked Open Data and Web Corpus Data for noun compound bracketing 702–709 http://www.lrec-conf.org/proceedings/lrec2014/pdf/263_Paper.pdf @@ -2583,7 +2583,7 @@ JoãoFreitas AntónioTeixeira - MiguelDias + MiguelDias Multimodal Corpora for Silent Speech Interaction 4507–4511 http://www.lrec-conf.org/proceedings/lrec2014/pdf/264_Paper.pdf @@ -2604,7 +2604,7 @@ EvelinaRennes - ArneJönsson + ArneJönsson The Impact of Cohesion Errors in Extraction Based Summaries 1575–1582 http://www.lrec-conf.org/proceedings/lrec2014/pdf/27_Paper.pdf @@ -2615,7 +2615,7 @@ LanjunZhou BinyangLi ZhongyuWei - Kam-FaiWong + Kam-FaiWong The <fixed-case>CUHK</fixed-case> Discourse <fixed-case>T</fixed-case>ree<fixed-case>B</fixed-case>ank for <fixed-case>C</fixed-case>hinese: Annotating Explicit Discourse Connectives for the <fixed-case>C</fixed-case>hinese <fixed-case>T</fixed-case>ree<fixed-case>B</fixed-case>ank 942–949 http://www.lrec-conf.org/proceedings/lrec2014/pdf/270_Paper.pdf @@ -2633,7 +2633,7 @@ sadamitsu-etal-2014-extraction - SandipanDandapat + SandipanDandapat DeclanGroves <fixed-case>MTW</fixed-case>atch: A Tool for the Analysis of Noisy Parallel Data 41–45 @@ -2644,7 +2644,7 @@ MartinRiedl RichardSteuer - ChrisBiemann + ChrisBiemann Distributed Distributional Similarities of <fixed-case>G</fixed-case>oogle <fixed-case>B</fixed-case>ooks Over the Centuries 1401–1405 http://www.lrec-conf.org/proceedings/lrec2014/pdf/274_Paper.pdf @@ -2657,9 +2657,9 @@ AsadMustafa RahilaParveen FarahAdeeba - TafseerAhmed Khan + TafseerAhmed Khan MiriamButt - AnnetteHautli + AnnetteHautli The <fixed-case>CLE</fixed-case> <fixed-case>U</fixed-case>rdu <fixed-case>POS</fixed-case> Tagset 2920–2925 http://www.lrec-conf.org/proceedings/lrec2014/pdf/275_Paper.pdf @@ -2668,7 +2668,7 @@ DarinaBenikova - ChrisBiemann + ChrisBiemann MarcReznicek <fixed-case>N</fixed-case>o<fixed-case>S</fixed-case>ta-<fixed-case>D</fixed-case> Named Entity Annotation for <fixed-case>G</fixed-case>erman: Guidelines and Dataset 2524–2531 @@ -2711,7 +2711,7 @@ przepiorkowski-etal-2014-walenty - BalamuraliA.R + BalamuraliA.R Can the Crowd be Controlled?: A Case Study on Crowd Sourcing and Automatic Validation of Completed Tasks based on User Modeling 189–195 http://www.lrec-conf.org/proceedings/lrec2014/pdf/28_Paper.pdf @@ -2729,7 +2729,7 @@ bogel-etal-2014-computational - MārcisPinnis + MārcisPinnis IlzeAuziņa KārlisGoba Designing the <fixed-case>L</fixed-case>atvian Speech Recognition Corpus @@ -2748,8 +2748,8 @@ PanotChaimongkol - AkikoAizawa - YukaTateisi + AkikoAizawa + YukaTateisi Corpus for Coreference Resolution on Scientific Papers 3187–3190 http://www.lrec-conf.org/proceedings/lrec2014/pdf/286_Paper.pdf @@ -2767,16 +2767,16 @@ falk-etal-2014-non - NancyUnderwood + NancyUnderwood BartoloméMesa-Lao - Mercedes GarcíaMartínez + Mercedes GarcíaMartínez MichaelCarl - VicentAlabau - JesúsGonzález-Rubio - Luis A.Leiva - GermánSanchis-Trilles - DanielOrtíz-Martínez - FranciscoCasacuberta + VicentAlabau + JesúsGonzález-Rubio + Luis A.Leiva + GermánSanchis-Trilles + DanielOrtíz-Martínez + FranciscoCasacuberta Evaluating the effects of interactivity in a post-editing workbench 553–559 http://www.lrec-conf.org/proceedings/lrec2014/pdf/289_Paper.pdf @@ -2823,7 +2823,7 @@ PatrikLambert - CarlosRodríguez-Penagos + CarlosRodríguez-Penagos Adapting Freely Available Resources to Build an Opinion Mining Pipeline in <fixed-case>P</fixed-case>ortuguese 2225–2228 http://www.lrec-conf.org/proceedings/lrec2014/pdf/293_Paper.pdf @@ -2834,7 +2834,7 @@ MilenaHnátková MichalKřen PavelProcházka - HanaSkoumalová + HanaSkoumalová The <fixed-case>SYN</fixed-case>-series corpora of written <fixed-case>C</fixed-case>zech 160–164 http://www.lrec-conf.org/proceedings/lrec2014/pdf/294_Paper.pdf @@ -2845,8 +2845,8 @@ LianeGuillou ChristianHardmeier AaronSmith - JörgTiedemann - BonnieWebber + JörgTiedemann + BonnieWebber <fixed-case>P</fixed-case>ar<fixed-case>C</fixed-case>or 1.0: A Parallel Pronoun-Coreference Corpus to Support Statistical <fixed-case>MT</fixed-case> 3191–3198 http://www.lrec-conf.org/proceedings/lrec2014/pdf/298_Paper.pdf @@ -2901,9 +2901,9 @@ Per ErikSolberg ArneSkjærholt - LiljaØvrelid + LiljaØvrelid KristinHagen - Janne BondiJohannessen + Janne BondiJohannessen The <fixed-case>N</fixed-case>orwegian Dependency Treebank 789–795 http://www.lrec-conf.org/proceedings/lrec2014/pdf/303_Paper.pdf @@ -2928,7 +2928,7 @@ laki-orosz-2014-efficient - PeterSpyns + PeterSpyns Remcovan Veenendaal A decade of <fixed-case>HLT</fixed-case> Agency activities in the Low Countries: from resource maintenance (<fixed-case>BLARK</fixed-case>) to service offerings (<fixed-case>BLAISE</fixed-case>) 2158–2165 @@ -2939,8 +2939,8 @@ EunahCho SarahFünfer - SebastianStüker - AlexWaibel + SebastianStüker + AlexWaibel A Corpus of Spontaneous Speech in Lectures: The <fixed-case>KIT</fixed-case> Lecture Corpus for Spoken Language Processing and Translation 1554–1559 http://www.lrec-conf.org/proceedings/lrec2014/pdf/311_Paper.pdf @@ -2958,7 +2958,7 @@ BegümErten - CemBozsahin + CemBozsahin DenizZeyrek <fixed-case>T</fixed-case>urkish Resources for Visual Word Recognition 2106–2110 @@ -2977,10 +2977,10 @@ MassimoMoneglia - SusanBrown + SusanBrown FrancescaFrontini GloriaGagliardi - FahadKhan + FahadKhan MonicaMonachini AlessandroPanunzi The <fixed-case>IMAGACT</fixed-case> Visual Ontology. An Extendable Multilingual Infrastructure for the representation of lexical encoding of Action @@ -3010,8 +3010,8 @@ BogdanLudusan MaartenVersteegh ArenJansen - GuillaumeGravier - Xuan-NgaCao + GuillaumeGravier + Xuan-NgaCao MarkJohnson EmmanuelDupoux Bridging the gap between speech technology and natural language processing: an evaluation toolbox for term discovery systems @@ -3021,7 +3021,7 @@ ludusan-etal-2014-bridging - DietmarRösner + DietmarRösner RafaelFriesen StephanGünther RicoAndrich @@ -3051,7 +3051,7 @@ alsop-nesi-2014-pragmatic - Dorte HaltrupHansen + Dorte HaltrupHansen LeneOffersgaard SussiOlsen Using <fixed-case>TEI</fixed-case>, <fixed-case>CMDI</fixed-case> and <fixed-case>ISO</fixed-case>cat in <fixed-case>CLARIN</fixed-case>-<fixed-case>DK</fixed-case> @@ -3072,7 +3072,7 @@ MirceaPetic - DanielaGîfu + DanielaGîfu Transliteration and alignment of parallel texts from <fixed-case>C</fixed-case>yrillic to <fixed-case>L</fixed-case>atin 1819–1823 http://www.lrec-conf.org/proceedings/lrec2014/pdf/328_Paper.pdf @@ -3082,7 +3082,7 @@ CorinaDima VerenaHenrich - ErhardHinrichs + ErhardHinrichs ChristinaHoppermann How to Tell a Schneemann from a Milchmann: An Annotation Scheme for Compound-Internal Relations 1194–1201 @@ -3101,7 +3101,7 @@ AnitaRácz - IstvánNagy T. + IstvánNagy T. VeronikaVincze 4<fixed-case>FX</fixed-case>: Light Verb Constructions in a Multilingual Parallel Corpus 710–715 @@ -3111,7 +3111,7 @@ FritzKliche - AndréBlessing + AndréBlessing UlrichHeid JonathanSonntag The e<fixed-case>I</fixed-case>dentity Text Exploration Workbench @@ -3169,7 +3169,7 @@ jansche-2014-computer - IsmailEl Maarouf + IsmailEl Maarouf JaneBradbury VítBaisa PatrickHanks @@ -3219,9 +3219,9 @@ schneider-2014-genitivdb - JanaŠindlerová - ZdeňkaUrešová - EvaFucikova + JanaŠindlerová + ZdeňkaUrešová + EvaFucikova Resources in Conflict: A Bilingual Valency Lexicon vs. a Bilingual Treebank vs. a Linguistic Theory 2490–2494 http://www.lrec-conf.org/proceedings/lrec2014/pdf/349_Paper.pdf @@ -3229,7 +3229,7 @@ sindlerova-etal-2014-resources - RoserSaurí + RoserSaurí JudithDomingo ToniBadia The <fixed-case>N</fixed-case>ew<fixed-case>S</fixed-case>o<fixed-case>M</fixed-case>e Corpus: A Unifying Opinion Annotation Framework across Genres and in Multiple Languages @@ -3258,7 +3258,7 @@ AndréBittar - LucaDini + LucaDini SigridMaurel MathieuRuhlmann The Dangerous Myth of the Star System @@ -3270,7 +3270,7 @@ HaiboLi MasatoHagiwara - QiLi + QiLi HengJi Comparison of the Impact of Word Segmentation on Name Tagging for <fixed-case>C</fixed-case>hinese and <fixed-case>J</fixed-case>apanese 2532–2536 @@ -3279,9 +3279,9 @@ li-etal-2014-comparison - Verginica BarbuMititelu + Verginica BarbuMititelu ElenaIrimia - DanTufiș + DanTufiș <fixed-case>C</fixed-case>o<fixed-case>R</fixed-case>o<fixed-case>L</fixed-case>a — The Reference Corpus of Contemporary <fixed-case>R</fixed-case>omanian Language 1235–1239 http://www.lrec-conf.org/proceedings/lrec2014/pdf/360_Paper.pdf @@ -3309,9 +3309,9 @@ MarcoMarelli StefanoMenini - MarcoBaroni + MarcoBaroni LuisaBentivogli - RaffaellaBernardi + RaffaellaBernardi RobertoZamparelli A <fixed-case>SICK</fixed-case> cure for the evaluation of compositional distributional semantic models 216–223 @@ -3333,7 +3333,7 @@ AnnikaHämäläinen JairoAvelar SilviaRodrigues - Miguel SalesDias + Miguel SalesDias ArturKolesiński TiborFegyó GézaNémeth @@ -3349,7 +3349,7 @@ MatteoAbrate Angelo MarioDel Grosso - EmilianoGiovannetti + EmilianoGiovannetti Angelica LoDuca DamianaLuzzi LorenzoMancini @@ -3384,10 +3384,10 @@ geer-keane-2014-exploring - NobalNiraula + NobalNiraula VasileRus RajendraBanjade - DanStefanescu + DanStefanescu WilliamBaggett BrentMorgan The <fixed-case>DARE</fixed-case> Corpus: A Resource for Anaphora Resolution in Dialogue Based Intelligent Tutoring Systems @@ -3397,7 +3397,7 @@ niraula-etal-2014-dare - MikelForcada + MikelForcada On the annotation of <fixed-case>TMX</fixed-case> translation memories for advanced leveraging in computer-aided translation 4374–4378 http://www.lrec-conf.org/proceedings/lrec2014/pdf/373_Paper.pdf @@ -3418,7 +3418,7 @@ AndreaMoro RobertoNavigli Francesco MariaTucci - Rebecca J.Passonneau + Rebecca J.Passonneau Annotating the <fixed-case>MASC</fixed-case> Corpus with <fixed-case>B</fixed-case>abel<fixed-case>N</fixed-case>et 4214–4219 http://www.lrec-conf.org/proceedings/lrec2014/pdf/375_Paper.pdf @@ -3427,7 +3427,7 @@ JasmijnBastings - KhalilSima’an + KhalilSima’an All Fragments Count in Parser Evaluation 78–82 L14-1324 @@ -3435,7 +3435,7 @@ bastings-simaan-2014-fragments - Juan MaríaGarrido + Juan MaríaGarrido YesikaLaplaza BenjaminKolz MiquelCornudella @@ -3448,7 +3448,7 @@ MojganSeraji CarinaJahani - BeátaMegyesi + BeátaMegyesi JoakimNivre A <fixed-case>P</fixed-case>ersian Treebank with <fixed-case>S</fixed-case>tanford Typed Dependencies 796–801 @@ -3458,7 +3458,7 @@ MarionBaranes - BenoîtSagot + BenoîtSagot A Language-independent Approach to Extracting Derivational Relations from an Inflectional Lexicon 2793–2799 http://www.lrec-conf.org/proceedings/lrec2014/pdf/379_Paper.pdf @@ -3476,14 +3476,14 @@ kucuk-etal-2014-named - AnneLacheret + AnneLacheret SylvainKahane - JulieBeliao + JulieBeliao AnneDister KimGerdes Jean-PhilippeGoldman NicolasObin - PaolaPietrandrea + PaolaPietrandrea AtanasTchobanov <fixed-case>R</fixed-case>hapsodie: a Prosodic-Syntactic Treebank for Spoken <fixed-case>F</fixed-case>rench 295–301 @@ -3492,11 +3492,11 @@ lacheret-etal-2014-rhapsodie - MontserratMarimon - NúriaBel + MontserratMarimon + NúriaBel BeatrizFisas BlancaArias - SilviaVázquez + SilviaVázquez JorgeVivaldi CarlosMorell MercèLorente @@ -3509,8 +3509,8 @@ MariaGoryainova CyrilGrouin - SophieRosset - IoanaVasilescu + SophieRosset + IoanaVasilescu Morpho-Syntactic Study of Errors from Speech Recognition System 3045–3049 http://www.lrec-conf.org/proceedings/lrec2014/pdf/383_Paper.pdf @@ -3519,10 +3519,10 @@ NianwenXue - OndřejBojar - JanHajič - MarthaPalmer - ZdeňkaUrešová + OndřejBojar + JanHajič + MarthaPalmer + ZdeňkaUrešová XiuhongZhang Not an Interlingua, But Close: Comparison of <fixed-case>E</fixed-case>nglish <fixed-case>AMR</fixed-case>s to <fixed-case>C</fixed-case>hinese and <fixed-case>C</fixed-case>zech 1765–1772 @@ -3531,7 +3531,7 @@ xue-etal-2014-interlingua - AdamMeyers + AdamMeyers GiancarloLee AngusGrieve-Smith YifanHe @@ -3546,7 +3546,7 @@ PrescottKlassen FeiXia LucyVanderwende - MelihaYetisgen + MelihaYetisgen Annotating Clinical Events in Text Snippets for Phenotype Detection 2753–2757 http://www.lrec-conf.org/proceedings/lrec2014/pdf/386_Paper.pdf @@ -3554,8 +3554,8 @@ klassen-etal-2014-annotating - PabloRuiz - AitorÁlvarez + PabloRuiz + AitorÁlvarez HaritzArzelus Phoneme Similarity Matrices to Improve Long Audio Alignment for Automatic Subtitling 437–442 @@ -3566,7 +3566,7 @@ Maria EvangeliaChatzimina CyrilGrouin - PierreZweigenbaum + PierreZweigenbaum Use of unsupervised word classes for entity recognition: Application to the detection of disorders in clinical reports 3264–3271 http://www.lrec-conf.org/proceedings/lrec2014/pdf/389_Paper.pdf @@ -3574,7 +3574,7 @@ chatzimina-etal-2014-use - EvaHajičová + EvaHajičová Three dimensions of the so-called “interoperability” of annotation schemes” 4559–4564 http://www.lrec-conf.org/proceedings/lrec2014/pdf/39_Paper.pdf @@ -3594,7 +3594,7 @@ DimitriosKokkinakis JyrkiNiemi SamHardwick - KristerLindén + KristerLindén LarsBorin <fixed-case>HFST</fixed-case>-<fixed-case>S</fixed-case>we<fixed-case>NER</fixed-case> — A New <fixed-case>NER</fixed-case> Resource for <fixed-case>S</fixed-case>wedish 2537–2543 @@ -3605,7 +3605,7 @@ MotazSaad DavidLanglois - KamelSmaïli + KamelSmaïli Building and Modelling Multilingual Subjective Corpora 3086–3091 http://www.lrec-conf.org/proceedings/lrec2014/pdf/392_Paper.pdf @@ -3644,8 +3644,8 @@ PaulaLopez-Otero - LauraDocio-Fernandez - CarmenGarcia-Mateo + LauraDocio-Fernandez + CarmenGarcia-Mateo Introducing a Framework for the Evaluation of Music Detection Tools 568–572 http://www.lrec-conf.org/proceedings/lrec2014/pdf/398_Paper.pdf @@ -3674,7 +3674,7 @@ EleftheriosAvramidis AljoschaBurchardt SabineHunsicker - MajaPopović + MajaPopović CindyTscherwinka DavidVilar HansUszkoreit @@ -3687,7 +3687,7 @@ MahmoudEl-Haj PaulRayson - SteveYoung + SteveYoung MartinWalker Detecting Document Structure in a Very Large Corpus of <fixed-case>UK</fixed-case> Financial Reports 1335–1338 @@ -3696,7 +3696,7 @@ el-haj-etal-2014-detecting - DanȘtefănescu + DanȘtefănescu RajendraBanjade VasileRus Latent Semantic Analysis Models on <fixed-case>W</fixed-case>ikipedia and <fixed-case>TASA</fixed-case> @@ -3709,27 +3709,27 @@ GeorgRehm HansUszkoreit SophiaAnaniadou - NúriaBel + NúriaBel AudronėBielevičienė LarsBorin - AntónioBranco + AntónioBranco GerhardBudin NicolettaCalzolari - WalterDaelemans - RadovanGarabík + WalterDaelemans + RadovanGarabík MarkoGrobelnik - CarmenGarcía-Mateo - Josefvan Genabith - JanHajič - InmaHernáez + CarmenGarcía-Mateo + Josefvan Genabith + JanHajič + InmaHernáez JohnJudge SvetlaKoeva SimonKrek CvetanaKrstev - KristerLindén - BernardoMagnini + KristerLindén + BernardoMagnini JosephMariani - JohnMcNaught + JohnMcNaught MaiteMelero MonicaMonachini AsunciónMoreno @@ -3738,16 +3738,16 @@ PiotrPęzik SteliosPiperidis AdamPrzepiórkowski - EiríkurRögnvaldsson - MichaelRosner - BolettePedersen - IngunaSkadiņa - KoenraadDe Smedt + EiríkurRögnvaldsson + MichaelRosner + BolettePedersen + IngunaSkadiņa + KoenraadDe Smedt MarkoTadić PaulThompson - DanTufiş - TamásVáradi - AndrejsVasiļjevs + DanTufiş + TamásVáradi + AndrejsVasiļjevs KadriVider JolantaZabarskaite The Strategic Impact of <fixed-case>META</fixed-case>-<fixed-case>NET</fixed-case> on the Regional, National and International Level @@ -3775,16 +3775,16 @@ schiel-kisler-2014-german - KoenraadDe Smedt - ErhardHinrichs - DetmarMeurers - IngunaSkadiņa - BolettePedersen + KoenraadDe Smedt + ErhardHinrichs + DetmarMeurers + IngunaSkadiņa + BolettePedersen CostanzaNavarretta - NúriaBel - KristerLindén - MarkétaLopatková - JanHajič + NúriaBel + KristerLindén + MarkétaLopatková + JanHajič GisleAndersen PrzemyslawLenkiewicz <fixed-case>CLARA</fixed-case>: A New Generation of Researchers in Common Language Resources and Their Applications @@ -3796,11 +3796,11 @@ NathanHartmann LucasAvanço - PedroBalage - MagaliDuran - Mariadas Graças Volpe Nunes + PedroBalage + MagaliDuran + Mariadas Graças Volpe Nunes ThiagoPardo - SandraAluísio + SandraAluísio A Large Corpus of Product Reviews in <fixed-case>P</fixed-case>ortuguese: Tackling Out-Of-Vocabulary Words 3865–3871 http://www.lrec-conf.org/proceedings/lrec2014/pdf/413_Paper.pdf @@ -3810,9 +3810,9 @@ AnoopKunchukuttan AbhijitMishra - RajenChatterjee - RiteshShah - PushpakBhattacharyya + RajenChatterjee + RiteshShah + PushpakBhattacharyya Shata-Anuvadak: Tackling Multiway Translation of <fixed-case>I</fixed-case>ndian Languages 1781–1787 http://www.lrec-conf.org/proceedings/lrec2014/pdf/414_Paper.pdf @@ -3820,8 +3820,8 @@ kunchukuttan-etal-2014-shata - ErhardHinrichs - StevenKrauwer + ErhardHinrichs + StevenKrauwer The <fixed-case>CLARIN</fixed-case> Research Infrastructure: Resources and Tools for e<fixed-case>H</fixed-case>umanities Scholars 1525–1531 http://www.lrec-conf.org/proceedings/lrec2014/pdf/415_Paper.pdf @@ -3830,9 +3830,9 @@ RenlongAi - MarcelaCharfuelan - WalterKasper - TinaKlüwer + MarcelaCharfuelan + WalterKasper + TinaKlüwer HansUszkoreit FeiyuXu SandraGasber @@ -3876,7 +3876,7 @@ HegeFromreide DirkHovy - AndersSøgaard + AndersSøgaard Crowdsourcing and annotating <fixed-case>NER</fixed-case> for <fixed-case>T</fixed-case>witter #drift 2544–2547 http://www.lrec-conf.org/proceedings/lrec2014/pdf/421_Paper.pdf @@ -3899,7 +3899,7 @@ AlainCouillault KarënFort - GillesAdda + GillesAdda Huguesde Mazancourt Evaluating corpora documentation with regards to the Ethics and Big Data Charter 4225–4229 @@ -3909,9 +3909,9 @@ AhmetAker - MonicaParamita + MonicaParamita EmmaBarker - RobertGaizauskas + RobertGaizauskas Bootstrapping Term Extractors for Multiple Languages 483–489 http://www.lrec-conf.org/proceedings/lrec2014/pdf/425_Paper.pdf @@ -3921,7 +3921,7 @@ ElsLefever MarjanVan de Kauter - VéroniqueHoste + VéroniqueHoste Evaluation of Automatic Hypernym Extraction from Technical Corpora in <fixed-case>E</fixed-case>nglish and <fixed-case>D</fixed-case>utch 490–497 http://www.lrec-conf.org/proceedings/lrec2014/pdf/426_Paper.pdf @@ -3950,7 +3950,7 @@ LiseRebout - PhillippeLanglais + PhillippeLanglais An Iterative Approach for Mining Parallel Sentences in a Comparable Corpus 648–655 http://www.lrec-conf.org/proceedings/lrec2014/pdf/43_Paper.pdf @@ -3977,10 +3977,10 @@ zaghouani-dukes-2014-crowdsourcing - HanaeKoiso + HanaeKoiso YasuharuDen Ken’yaNishikawa - KikuoMaekawa + KikuoMaekawa Design and development of an <fixed-case>RDB</fixed-case> version of the Corpus of Spontaneous <fixed-case>J</fixed-case>apanese 1471–1476 http://www.lrec-conf.org/proceedings/lrec2014/pdf/432_Paper.pdf @@ -4008,11 +4008,11 @@ PiekVossen - GermanRigau - LucianoSerafini + GermanRigau + LucianoSerafini PimStouten FrancisIrving - WillemVan Hage + WillemVan Hage <fixed-case>N</fixed-case>ews<fixed-case>R</fixed-case>eader: recording history from daily news streams 2000–2007 http://www.lrec-conf.org/proceedings/lrec2014/pdf/436_Paper.pdf @@ -4062,14 +4062,14 @@ ghayoomi-kuhn-2014-converting - IñakiAlegria + IñakiAlegria NoraAranberri - PereComas + PereComas VíctorFresno PabloGamallo - LluisPadró + LluisPadró IñakiSan Vicente - JordiTurmo + JordiTurmo ArkaitzZubiaga <fixed-case>T</fixed-case>weet<fixed-case>N</fixed-case>orm_es: an annotated corpus for <fixed-case>S</fixed-case>panish microtext normalization 2274–2278 @@ -4090,10 +4090,10 @@ RalfSteinberger MaudEhrmann MohamedEbrahim - LeonidaDella Rocca + LeonidaDella Rocca StefanoBucci EszterSimon - TamásVáradi + TamásVáradi Media monitoring and information extraction for the highly inflected agglutinative language <fixed-case>H</fixed-case>ungarian 2049–2056 http://www.lrec-conf.org/proceedings/lrec2014/pdf/449_Paper.pdf @@ -4101,7 +4101,7 @@ pajzs-etal-2014-media - VéroniqueMoriceau + VéroniqueMoriceau XavierTannier <fixed-case>F</fixed-case>rench Resources for Extraction and Normalization of Temporal Expressions with <fixed-case>H</fixed-case>eidel<fixed-case>T</fixed-case>ime 3239–3243 @@ -4120,7 +4120,7 @@ AngelinaIvanova - Gertjanvan Noord + Gertjanvan Noord Treelet Probabilities for <fixed-case>HPSG</fixed-case> Parsing and Error Correction 2887–2892 http://www.lrec-conf.org/proceedings/lrec2014/pdf/453_Paper.pdf @@ -4129,9 +4129,9 @@ AbirMasmoudi - Mariem EllouzeKhmekhem + Mariem EllouzeKhmekhem YannickEstève - Lamia HadrichBelguith + Lamia HadrichBelguith NizarHabash A Corpus and Phonetic Dictionary for <fixed-case>T</fixed-case>unisian <fixed-case>A</fixed-case>rabic Speech Recognition 306–310 @@ -4140,9 +4140,9 @@ masmoudi-etal-2014-corpus - Marie-ClaudeL’Homme + Marie-ClaudeL’Homme BenoîtRobichaud - Carlos SubiratsRüggeberg + Carlos SubiratsRüggeberg Discovering frames in specialized domains 1364–1371 http://www.lrec-conf.org/proceedings/lrec2014/pdf/455_Paper.pdf @@ -4150,13 +4150,13 @@ lhomme-etal-2014-discovering - LoriLevin + LoriLevin TerukoMitamura BrianMacWhinney DavidaFromm - JaimeCarbonell + JaimeCarbonell WestonFeely - RobertFrederking + RobertFrederking AnatoleGershman CarlosRamirez Resources for the Detection of Conventionalized Metaphors in Four Languages @@ -4174,7 +4174,7 @@ odijk-2014-clarin - Hugo GonçaloOliveira + Hugo GonçaloOliveira InêsCoelho PauloGomes Exploiting <fixed-case>P</fixed-case>ortuguese Lexical Knowledge Bases for Answering Open Domain Cloze Questions Automatically @@ -4184,10 +4184,10 @@ oliveira-etal-2014-exploiting - YukaTateisi + YukaTateisi YoShidahara YusukeMiyao - AkikoAizawa + AkikoAizawa Annotation of Computer Science Papers for Semantic Relation Extrac-tion 1423–1429 http://www.lrec-conf.org/proceedings/lrec2014/pdf/461_Paper.pdf @@ -4213,7 +4213,7 @@ JieJiang Gerardvan Loenhout Arantzadel Pozo - Mirjam SepesyMaučec + Mirjam SepesyMaučec AnjaTurner MartinVolk Machine Translation for Subtitling: A Large-Scale Evaluation @@ -4224,7 +4224,7 @@ ElisabettaJezek - BernardoMagnini + BernardoMagnini AnnaFeltracco AlessiaBianchini OctavianPopescu @@ -4244,7 +4244,7 @@ SubhabrataMukherjee - SachindraJoshi + SachindraJoshi Author-Specific Sentiment Aggregation for Polarity Prediction of Reviews 3092–3099 http://www.lrec-conf.org/proceedings/lrec2014/pdf/467_Paper.pdf @@ -4262,7 +4262,7 @@ jacquet-etal-2014-clustering - RichardSproat + RichardSproat BrunoCartoni HyunJeongChoe DavidHuynh @@ -4276,7 +4276,7 @@ sproat-etal-2014-database - Noushin RezapourAsheghi + Noushin RezapourAsheghi SergeSharoff KatjaMarkert Designing and Evaluating a Reliable Corpus of Web Genres via Crowd-Sourcing @@ -4286,7 +4286,7 @@ asheghi-etal-2014-designing - Héctor MartínezAlonso + Héctor MartínezAlonso LaurenRomeo Crowdsourcing as a preprocessing for complex semantic annotation tasks 229–234 @@ -4296,7 +4296,7 @@ MarcoTurchi - MatteoNegri + MatteoNegri Automatic Annotation of Machine Translation Datasets with Binary Quality Judgements 1788–1792 http://www.lrec-conf.org/proceedings/lrec2014/pdf/473_Paper.pdf @@ -4306,7 +4306,7 @@ MariannaApidianaki EmiliaVerzeni - DianaMcCarthy + DianaMcCarthy Semantic Clustering of Pivot Paraphrases 4270–4275 http://www.lrec-conf.org/proceedings/lrec2014/pdf/475_Paper.pdf @@ -4315,8 +4315,8 @@ DirkHovy - BarbaraPlank - AndersSøgaard + BarbaraPlank + AndersSøgaard When <fixed-case>POS</fixed-case> data sets don’t add up: Combatting sample bias 4472–4475 http://www.lrec-conf.org/proceedings/lrec2014/pdf/476_Paper.pdf @@ -4332,7 +4332,7 @@ shardlow-2014-open - MarkFinlayson + MarkFinlayson JeffryHalverson StevenCorman The N2 corpus: A semantically annotated collection of Islamist extremist stories @@ -4372,7 +4372,7 @@ JeaninJügler YvesLaprie OdileMella - BerndMöbius + BerndMöbius Designing a Bilingual Speech Corpus for <fixed-case>F</fixed-case>rench and <fixed-case>G</fixed-case>erman Language Learners: a Two-Step Process 1477–1482 http://www.lrec-conf.org/proceedings/lrec2014/pdf/484_Paper.pdf @@ -4399,13 +4399,13 @@ rapp-2014-using-word - MarieCandito + MarieCandito GuyPerrier BrunoGuillaume CorentinRibeyre KarënFort - DjaméSeddah - Éricde la Clergerie + DjaméSeddah + Éricde la Clergerie Deep Syntax Annotation of the Sequoia <fixed-case>F</fixed-case>rench Treebank 2298–2305 http://www.lrec-conf.org/proceedings/lrec2014/pdf/494_Paper.pdf @@ -4413,17 +4413,17 @@ candito-etal-2014-deep - MarieCandito + MarieCandito PascalAmsili LucieBarque - FarahBenamara + FarahBenamara Gaëlde Chalendar MarianneDjemaa PaulineHaas RichardHuyghe - Yvette YannickMathieu + Yvette YannickMathieu PhilippeMuller - BenoîtSagot + BenoîtSagot LaureVieu Developing a <fixed-case>F</fixed-case>rench <fixed-case>F</fixed-case>rame<fixed-case>N</fixed-case>et: Methodology and First results 1372–1379 @@ -4433,8 +4433,8 @@ MartaSabou - KalinaBontcheva - LeonDerczynski + KalinaBontcheva + LeonDerczynski ArnoScharl Corpus Annotation through Crowdsourcing: Towards Best Practice Guidelines 859–866 @@ -4463,7 +4463,7 @@ ThierryDeclerck - Hans-UlrichKrieger + Hans-UlrichKrieger Harmonization of <fixed-case>G</fixed-case>erman Lexical Resources for Opinion Mining 3872–3876 http://www.lrec-conf.org/proceedings/lrec2014/pdf/500_Paper.pdf @@ -4472,7 +4472,7 @@ MagdaŠevčíková - ZdeněkŽabokrtský + ZdeněkŽabokrtský Word-Formation Network for <fixed-case>C</fixed-case>zech 1087–1093 http://www.lrec-conf.org/proceedings/lrec2014/pdf/501_Paper.pdf @@ -4481,7 +4481,7 @@ JamieBost - JohannaMoore + JohannaMoore An Analysis of Older Users’ Interactions with Spoken Dialogue Systems 1176–1181 http://www.lrec-conf.org/proceedings/lrec2014/pdf/502_Paper.pdf @@ -4509,7 +4509,7 @@ MikaëlMorardo - ÉricVillemonte de la Clergerie + ÉricVillemonte de la Clergerie Towards an environment for the production and the validation of lexical semantic resources 867–874 http://www.lrec-conf.org/proceedings/lrec2014/pdf/507_Paper.pdf @@ -4527,8 +4527,8 @@ JillBoberg DavidDeVault StacyMarsella - DavidTraum - SkipRizzo + DavidTraum + SkipRizzo Louis-PhilippeMorency The Distress Analysis Interview Corpus of human and computer interviews 3123–3128 @@ -4539,7 +4539,7 @@ BrigitteBigi TatsuyaWatanabe - LaurentPrévot + LaurentPrévot Representing Multimodal Linguistic Annotated data 3386–3392 http://www.lrec-conf.org/proceedings/lrec2014/pdf/51_Paper.pdf @@ -4549,7 +4549,7 @@ TimurGilmanov OlgaScrivner - SandraKübler + SandraKübler <fixed-case>SWIFT</fixed-case> Aligner, A Multifunctional Tool for Parallel Corpora: Visualization, Word Alignment, and (Morpho)-Syntactic Cross-Language Transfer 2913–2919 http://www.lrec-conf.org/proceedings/lrec2014/pdf/510_Paper.pdf @@ -4560,7 +4560,7 @@ RosemaryOrr MarijnHuijbregts Roelandvan Beek - LisaTeunissen + LisaTeunissen KateBackhouse Davidvan Leeuwen Semi-automatic annotation of the <fixed-case>UCU</fixed-case> accents speech corpus @@ -4571,9 +4571,9 @@ DanielaAmaral - EvandroFonseca + EvandroFonseca LuceleneLopes - RenataVieira + RenataVieira Comparative Analysis of <fixed-case>P</fixed-case>ortuguese Named Entities Recognition Tools 2554–2558 http://www.lrec-conf.org/proceedings/lrec2014/pdf/513_Paper.pdf @@ -4593,10 +4593,10 @@ santos-etal-2014-corpus - GuntisBarzdins - DidzisGosko + GuntisBarzdins + DidzisGosko LauraRituma - PeterisPaikens + PeterisPaikens Using C5.0 and Exhaustive Search for Boosting Frame-Semantic Parsing Accuracy 4476–4482 http://www.lrec-conf.org/proceedings/lrec2014/pdf/515_Paper.pdf @@ -4606,7 +4606,7 @@ VerenaLyding LionelNicolas - EgonStemle + EgonStemle ‘inter<fixed-case>H</fixed-case>ist’ - an interactive visual interface for corpus exploration 635–641 http://www.lrec-conf.org/proceedings/lrec2014/pdf/517_Paper.pdf @@ -4626,7 +4626,7 @@ LisPereira ElgaStrafella - YujiMatsumoto + YujiMatsumoto Collocation or Free Combination? — Applying Machine Translation Techniques to identify collocations in <fixed-case>J</fixed-case>apanese 736–739 http://www.lrec-conf.org/proceedings/lrec2014/pdf/519_Paper.pdf @@ -4635,9 +4635,9 @@ AdamKilgarriff - PavelRychlý + PavelRychlý MilošJakubíček - VojtěchKovář + VojtěchKovář VítBaisa LuciaKocincová Extrinsic Corpus Evaluation with a Collocation Dictionary Task @@ -4647,7 +4647,7 @@ kilgarriff-etal-2014-extrinsic - DominiqueEstival + DominiqueEstival SteveCassidy FelicityCox DenisBurnham @@ -4664,7 +4664,7 @@ EmilyDanchik Michael T.Mordowanec HenriettaConrad - Noah A.Smith + Noah A.Smith Comprehensive Annotation of Multiword Expressions in a Social Web Corpus 455–461 http://www.lrec-conf.org/proceedings/lrec2014/pdf/521_Paper.pdf @@ -4700,10 +4700,10 @@ navarretta-lis-2014-transfer - MiquelEsplà-Gomis + MiquelEsplà-Gomis FilipKlubička NikolaLjubešić - SergioOrtiz-Rojas + SergioOrtiz-Rojas VassilisPapavassiliou ProkopisProkopidis Comparing two acquisition systems for automatically building an <fixed-case>E</fixed-case>nglish—<fixed-case>C</fixed-case>roatian parallel corpus from multilingual websites @@ -4714,7 +4714,7 @@ FabrizioGotti - PhillippeLanglais + PhillippeLanglais AtefehFarzindar Hashtag Occurrences, Layout and Translation: A Corpus-driven Analysis of Tweets Published by the <fixed-case>C</fixed-case>anadian Government 2254–2261 @@ -4735,7 +4735,7 @@ GiuseppeCastellucci DaniloCroce LucaIocchi - RobertoBasili + RobertoBasili DanieleNardi <fixed-case>H</fixed-case>u<fixed-case>RIC</fixed-case>: a Human Robot Interaction Corpus 4519–4526 @@ -4766,8 +4766,8 @@ MatějKorvas OndřejPlátek OndřejDušek - Lukᚎilka - FilipJurčíček + Lukᚎilka + FilipJurčíček Free <fixed-case>E</fixed-case>nglish and <fixed-case>C</fixed-case>zech telephone speech corpus shared under the <fixed-case>CC</fixed-case>-<fixed-case>BY</fixed-case>-<fixed-case>SA</fixed-case> 3.0 license 4423–4428 http://www.lrec-conf.org/proceedings/lrec2014/pdf/535_Paper.pdf @@ -4804,7 +4804,7 @@ wroblewska-przepiorkowski-2014-projection - GianlucaLebani + GianlucaLebani VeronicaViola AlessandroLenci Bootstrapping an <fixed-case>I</fixed-case>talian <fixed-case>V</fixed-case>erb<fixed-case>N</fixed-case>et: data-driven analysis of verb alternations @@ -4815,7 +4815,7 @@ ArdaÇelebi - ArzucanÖzgür + ArzucanÖzgür Self-training a Constituency Parser using n-gram Trees 2893–2896 http://www.lrec-conf.org/proceedings/lrec2014/pdf/543_Paper.pdf @@ -4825,7 +4825,7 @@ BushraJawaid AmirKamran - OndřejBojar + OndřejBojar A Tagged Corpus and a Tagger for <fixed-case>U</fixed-case>rdu 2938–2943 http://www.lrec-conf.org/proceedings/lrec2014/pdf/544_Paper.pdf @@ -4834,7 +4834,7 @@ KostadinCholakov - ChrisBiemann + ChrisBiemann JudithEckle-Kohler IrynaGurevych Lexical Substitution Dataset for <fixed-case>G</fixed-case>erman @@ -4846,7 +4846,7 @@ LaurenRomeo SaraMendes - NúriaBel + NúriaBel A cascade approach for complex-type classification 4451–4458 http://www.lrec-conf.org/proceedings/lrec2014/pdf/546_Paper.pdf @@ -4855,10 +4855,10 @@ CédricLopez - FrédériqueSegond + FrédériqueSegond OlivierHondermarck PaoloCurtoni - LucaDini + LucaDini Generating a Resource for Products and Brandnames Recognition. Application to the Cosmetic Domain. 2559–2564 http://www.lrec-conf.org/proceedings/lrec2014/pdf/549_Paper.pdf @@ -4869,8 +4869,8 @@ LouiseDeléger Anne-LaureLigozat CyrilGrouin - PierreZweigenbaum - AurélieNévéol + PierreZweigenbaum + AurélieNévéol Annotation of specialized corpora using a comprehensive entity and relation scheme 1267–1274 http://www.lrec-conf.org/proceedings/lrec2014/pdf/552_Paper.pdf @@ -4889,7 +4889,7 @@ FrancescaFrontini ValeriaQuochi - SebastianPadó + SebastianPadó MonicaMonachini JasonUtt Polysemy Index for Nouns: an Experiment on <fixed-case>I</fixed-case>talian using the <fixed-case>PAROLE</fixed-case> <fixed-case>SIMPLE</fixed-case> <fixed-case>CLIPS</fixed-case> Lexical Database @@ -4910,8 +4910,8 @@ salama-etal-2014-youdacc - DanFlickinger - Emily M.Bender + DanFlickinger + Emily M.Bender StephanOepen Towards an Encyclopedia of Compositional Semantics: Documenting the Interface of the <fixed-case>E</fixed-case>nglish <fixed-case>R</fixed-case>esource <fixed-case>G</fixed-case>rammar 875–881 @@ -4960,7 +4960,7 @@ TeresaHerrmann JanNiehues - AlexWaibel + AlexWaibel Manual Analysis of Structurally Informed Reordering in <fixed-case>G</fixed-case>erman-<fixed-case>E</fixed-case>nglish Machine Translation 4379–4386 http://www.lrec-conf.org/proceedings/lrec2014/pdf/569_Paper.pdf @@ -4982,7 +4982,7 @@ MoritzWittmann MarionWeller - SabineSchulte im Walde + SabineSchulte im Walde Automatic Extraction of Synonyms for <fixed-case>G</fixed-case>erman Particle Verbs from Parallel Data with Distributional Similarity as a Re-Ranking Feature 1430–1437 http://www.lrec-conf.org/proceedings/lrec2014/pdf/574_Paper.pdf @@ -5053,8 +5053,8 @@ LaurenRomeo - GianlucaLebani - NúriaBel + GianlucaLebani + NúriaBel AlessandroLenci Choosing which to use? A study of distributional models for nominal lexical semantic classification 4366–4373 @@ -5067,7 +5067,7 @@ ChristophSchmidt OscarKoller MartinBellgardt - HermannNey + HermannNey Extensions of the Sign Language Recognition and Translation Corpus <fixed-case>RWTH</fixed-case>-<fixed-case>PHOENIX</fixed-case>-Weather 1911–1916 http://www.lrec-conf.org/proceedings/lrec2014/pdf/585_Paper.pdf @@ -5078,9 +5078,9 @@ SaraCandeias DirceCelorico JorgeProença - ArlindoVeiga + ArlindoVeiga CarlaLopes - FernandoPerdigão + FernandoPerdigão <fixed-case>HESITA</fixed-case>(te) in <fixed-case>P</fixed-case>ortuguese 1564–1567 http://www.lrec-conf.org/proceedings/lrec2014/pdf/587_Paper.pdf @@ -5096,9 +5096,9 @@ alansary-2014-muhit - MaddalenLopez de Lacalle + MaddalenLopez de Lacalle EgoitzLaparra - GermanRigau + GermanRigau Predicate Matrix: extending <fixed-case>S</fixed-case>em<fixed-case>L</fixed-case>ink through <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et mappings 903–909 http://www.lrec-conf.org/proceedings/lrec2014/pdf/589_Paper.pdf @@ -5106,7 +5106,7 @@ lopez-de-lacalle-etal-2014-predicate - AiTiAw + AiTiAw Sharifah MahaniAljunied NattadapornLertcheva SasiwimonKalunsima @@ -5120,7 +5120,7 @@ FeliceDell’Orletta GiuliaVenturi AndreaCimino - SimonettaMontemagni + SimonettaMontemagni <fixed-case>T</fixed-case>2<fixed-case>K</fixed-case>^2: a System for Automatically Extracting and Organizing Knowledge from Texts 2062–2070 http://www.lrec-conf.org/proceedings/lrec2014/pdf/590_Paper.pdf @@ -5141,13 +5141,13 @@ ArfathPasha MohamedAl-Badrashiny - MonaDiab + MonaDiab AhmedEl Kholy RamyEskander NizarHabash ManojPooleery - OwenRambow - RyanRoth + OwenRambow + RyanRoth <fixed-case>MADAMIRA</fixed-case>: A Fast, Comprehensive Tool for Morphological Analysis and Disambiguation of <fixed-case>A</fixed-case>rabic 1094–1101 http://www.lrec-conf.org/proceedings/lrec2014/pdf/593_Paper.pdf @@ -5165,8 +5165,8 @@ WestonFeely MehdiManshadi - RobertFrederking - LoriLevin + RobertFrederking + LoriLevin The <fixed-case>CMU</fixed-case> <fixed-case>METAL</fixed-case> <fixed-case>F</fixed-case>arsi <fixed-case>NLP</fixed-case> Approach 4052–4055 http://www.lrec-conf.org/proceedings/lrec2014/pdf/596_Paper.pdf @@ -5175,7 +5175,7 @@ BartDesmet - VéroniqueHoste + VéroniqueHoste Recognising suicidal messages in <fixed-case>D</fixed-case>utch social media 830–835 http://www.lrec-conf.org/proceedings/lrec2014/pdf/597_Paper.pdf @@ -5184,7 +5184,7 @@ MaximilianKöper - SabineSchulte im Walde + SabineSchulte im Walde A Rank-based Distance Measure to Detect Polysemy and to Determine Salient Vector-Space Features for <fixed-case>G</fixed-case>erman Prepositions 4459–4466 http://www.lrec-conf.org/proceedings/lrec2014/pdf/599_Paper.pdf @@ -5219,7 +5219,7 @@ BrunoGuillaume KarënFort GuyPerrier - PaulBédaride + PaulBédaride Mapping the Lexique des Verbes du Français (Lexicon of <fixed-case>F</fixed-case>rench Verbs) to a <fixed-case>NLP</fixed-case> lexicon using examples 2806–2810 http://www.lrec-conf.org/proceedings/lrec2014/pdf/602_Paper.pdf @@ -5227,10 +5227,10 @@ guillaume-etal-2014-mapping - AurélieNévéol + AurélieNévéol JulienGrosjean - StéfanDarmoni - PierreZweigenbaum + StéfanDarmoni + PierreZweigenbaum Language Resources for <fixed-case>F</fixed-case>rench in the Biomedical Domain 2146–2151 http://www.lrec-conf.org/proceedings/lrec2014/pdf/604_Paper.pdf @@ -5241,7 +5241,7 @@ AdrianeBoyd JirkaHana LionelNicolas - DetmarMeurers + DetmarMeurers KatrinWisniewski AndreaAbel KarinSchöne @@ -5268,7 +5268,7 @@ BushraJawaid - OndřejBojar + OndřejBojar Two-Step Machine Translation with Lattices 682–686 http://www.lrec-conf.org/proceedings/lrec2014/pdf/610_Paper.pdf @@ -5276,7 +5276,7 @@ jawaid-bojar-2014-two - BjörnSchuller + BjörnSchuller FelixFriedmann FlorianEyben The <fixed-case>M</fixed-case>unich Biovoice Corpus: Effects of Physical Exercising, Heart Rate, and Skin Conductance on Human Speech Production @@ -5297,7 +5297,7 @@ RaymondShen - HideakiKikuchi + HideakiKikuchi Estimation of Speaking Style in Speech Corpora Focusing on speech transcriptions 2747–2752 http://www.lrec-conf.org/proceedings/lrec2014/pdf/616_Paper.pdf @@ -5316,7 +5316,7 @@ TravisGoodwin - SandaHarabagiu + SandaHarabagiu Clinical Data-Driven Probabilistic Graph Processing 101–108 http://www.lrec-conf.org/proceedings/lrec2014/pdf/618_Paper.pdf @@ -5324,7 +5324,7 @@ goodwin-harabagiu-2014-clinical - MuntsaPadró + MuntsaPadró MarcoIdiart AlineVillavicencio CarlosRamisch @@ -5335,7 +5335,7 @@ padro-etal-2014-comparing - Cheikh M. BambaDione + Cheikh M. BambaDione Pruning the Search Space of the <fixed-case>W</fixed-case>olof <fixed-case>LFG</fixed-case> Grammar Using a Probabilistic and a Constraint Grammar Parser 2863–2870 http://www.lrec-conf.org/proceedings/lrec2014/pdf/62_Paper.pdf @@ -5344,8 +5344,8 @@ MahaAlthobaiti - UdoKruschwitz - MassimoPoesio + UdoKruschwitz + MassimoPoesio <fixed-case>A</fixed-case>ra<fixed-case>NLP</fixed-case>: a <fixed-case>J</fixed-case>ava-based Library for the Processing of <fixed-case>A</fixed-case>rabic Text. 4134–4138 http://www.lrec-conf.org/proceedings/lrec2014/pdf/621_Paper.pdf @@ -5355,7 +5355,7 @@ Jena D.Hwang AnnieZaenen - MarthaPalmer + MarthaPalmer Criteria for Identifying and Annotating Caused Motion Constructions in Corpus Data 1297–1304 http://www.lrec-conf.org/proceedings/lrec2014/pdf/624_Paper.pdf @@ -5363,7 +5363,7 @@ hwang-etal-2014-criteria - YoshihikoHayashi + YoshihikoHayashi Web-imageability of the Behavioral Features of Basic-level Concepts 3609–3614 http://www.lrec-conf.org/proceedings/lrec2014/pdf/627_Paper.pdf @@ -5372,7 +5372,7 @@ SteveCassidy - DominiqueEstival + DominiqueEstival TimothyJones DenisBurnham JaredBurghold @@ -5383,7 +5383,7 @@ cassidy-etal-2014-alveo - ChrisCuly + ChrisCuly MarcoPassarotti UllaKönig-Cardanobile A Compact Interactive Visualization of Dependency Treebank Query Results @@ -5416,7 +5416,7 @@ wu-etal-2014-illinoiscloudnlp - SatoshiSato + SatoshiSato Text Readability and Word Distribution in <fixed-case>J</fixed-case>apanese 2811–2815 http://www.lrec-conf.org/proceedings/lrec2014/pdf/633_Paper.pdf @@ -5433,7 +5433,7 @@ OctavianPopescu - MarthaPalmer + MarthaPalmer PatrickHanks Mapping <fixed-case>CPA</fixed-case> Patterns onto <fixed-case>O</fixed-case>nto<fixed-case>N</fixed-case>otes Senses 882–889 @@ -5442,7 +5442,7 @@ popescu-etal-2014-mapping - Emily M.Bender + Emily M.Bender Language <fixed-case>C</fixed-case>o<fixed-case>LLAGE</fixed-case>: Grammatical Description with the <fixed-case>L</fixed-case>in<fixed-case>GO</fixed-case> Grammar Matrix 2447–2451 http://www.lrec-conf.org/proceedings/lrec2014/pdf/639_Paper.pdf @@ -5450,8 +5450,8 @@ bender-2014-language - Silvia RodríguezVázquez - PierretteBouillon + Silvia RodríguezVázquez + PierretteBouillon AntonBolfing Applying Accessibility-Oriented Controlled Language (<fixed-case>CL</fixed-case>) Rules to Improve Appropriateness of Text Alternatives for Images: an Exploratory Study 4139–4146 @@ -5492,7 +5492,7 @@ ZhengzhongLiu JunAraki - EduardHovy + EduardHovy TerukoMitamura Supervised Within-Document Event Coreference using Information Propagation 4539–4544 @@ -5523,9 +5523,9 @@ mori-neubig-2014-language - LucaCristoforetti + LucaCristoforetti MircoRavanelli - MaurizioOmologo + AlessandroSosi AlbertoAbad MartinHagmueller @@ -5539,7 +5539,7 @@ DanielHladek JanStas - JozefJuhar + JozefJuhar The <fixed-case>S</fixed-case>lovak Categorized News Corpus 1705–1708 http://www.lrec-conf.org/proceedings/lrec2014/pdf/656_Paper.pdf @@ -5577,8 +5577,8 @@ ganitkevitch-callison-burch-2014-multilingual - Mennovan Zaanen - Gerhardvan Huyssteen + Mennovan Zaanen + Gerhardvan Huyssteen SuzanneAussems ChrisEmmery RoaldEiselen @@ -5589,8 +5589,8 @@ van-zaanen-etal-2014-development - LluísPadró - ŽeljkoAgić + LluísPadró + ŽeljkoAgić XavierCarreras BlazFortuna EstebanGarcía-Cuesta @@ -5614,7 +5614,7 @@ MartaVillegas MaiteMelero - NúriaBel + NúriaBel Metadata as Linked Open Data: mapping disparate <fixed-case>XML</fixed-case> metadata registries into one <fixed-case>RDF</fixed-case>/<fixed-case>OWL</fixed-case> registry. 393–400 http://www.lrec-conf.org/proceedings/lrec2014/pdf/664_Paper.pdf @@ -5623,7 +5623,7 @@ GrégoireDétrez - Víctor M.Sánchez-Cartagena + Víctor M.Sánchez-Cartagena AarneRanta Sharing resources between free/open-source rule-based machine translation systems: Grammatical Framework and Apertium 4394–4400 @@ -5641,7 +5641,7 @@ DianaMaynard - MarkGreenwood + MarkGreenwood Who cares about Sarcastic Tweets? Investigating the Impact of Sarcasm on Sentiment Analysis. 4238–4243 http://www.lrec-conf.org/proceedings/lrec2014/pdf/67_Paper.pdf @@ -5649,9 +5649,9 @@ maynard-greenwood-2014-cares - XabierArtola + XabierArtola ZuhaitzBeloki - AitorSoroa + AitorSoroa A stream computing approach towards scalable <fixed-case>NLP</fixed-case> 8–13 http://www.lrec-conf.org/proceedings/lrec2014/pdf/670_Paper.pdf @@ -5678,13 +5678,13 @@ IrinaTemnikova - William A.Baumgartner Jr. - Negacy D.Hailu + William A.Baumgartner Jr. + Negacy D.Hailu IvelinaNikolova TonyMcEnery AdamKilgarriff GaliaAngelova - K. BretonnelCohen + K. BretonnelCohen Sublanguage Corpus Analysis Toolkit: A tool for assessing the representativeness and sublanguage characteristics of corpora 1714–1718 http://www.lrec-conf.org/proceedings/lrec2014/pdf/675_Paper.pdf @@ -5693,7 +5693,7 @@ VioletaSeretan - PierretteBouillon + PierretteBouillon JohannaGerlach A Large-Scale Evaluation of Pre-editing Strategies for Improving User-Generated Content Translation 1793–1799 @@ -5704,7 +5704,7 @@ SigrúnHelgadóttir HrafnLoftsson - EiríkurRögnvaldsson + EiríkurRögnvaldsson Correcting Errors in a New Gold Standard for Tagging <fixed-case>I</fixed-case>celandic Text 2944–2948 http://www.lrec-conf.org/proceedings/lrec2014/pdf/677_Paper.pdf @@ -5712,7 +5712,7 @@ helgadottir-etal-2014-correcting - BéatriceDaille + BéatriceDaille AmirHazem Semi-compositional Method for Synonym Extraction of Multi-Word Terms 1202–1207 @@ -5721,8 +5721,8 @@ daille-hazem-2014-semi - MatúšPleva - JozefJuhár + MatúšPleva + JozefJuhár <fixed-case>TUKE</fixed-case>-<fixed-case>BN</fixed-case>ews-<fixed-case>SK</fixed-case>: <fixed-case>S</fixed-case>lovak Broadcast News Corpus Construction and Evaluation 1709–1713 http://www.lrec-conf.org/proceedings/lrec2014/pdf/680_Paper.pdf @@ -5731,7 +5731,7 @@ CsabaOravecz - TamásVáradi + TamásVáradi BálintSass The <fixed-case>H</fixed-case>ungarian <fixed-case>G</fixed-case>igaword Corpus 1719–1723 @@ -5743,7 +5743,7 @@ KunalSachdeva RishabhSrivastava SambhavJain - DiptiSharma + DiptiSharma <fixed-case>H</fixed-case>indi to <fixed-case>E</fixed-case>nglish Machine Translation: Using Effective Selection in Multi-Model <fixed-case>SMT</fixed-case> 1807–1811 http://www.lrec-conf.org/proceedings/lrec2014/pdf/682_Paper.pdf @@ -5771,12 +5771,12 @@ wattam-etal-2014-experiences - YounggyunHahm + YounggyunHahm JungyeulPark - KyungtaeLim + KyungtaeLim YoungsikKim DosamHwang - Key-SunChoi + Key-SunChoi Named Entity Corpus Construction using <fixed-case>W</fixed-case>ikipedia and <fixed-case>DB</fixed-case>pedia Ontology 2565–2569 http://www.lrec-conf.org/proceedings/lrec2014/pdf/688_Paper.pdf @@ -5795,7 +5795,7 @@ callejas-etal-2014-model - ŽeljkoAgić + ŽeljkoAgić NikolaLjubešić The <fixed-case>SET</fixed-case>imes.<fixed-case>HR</fixed-case> Linguistically Annotated Corpus of <fixed-case>C</fixed-case>roatian 1724–1727 @@ -5827,7 +5827,7 @@ pho-etal-2014-multiple - ŽeljkoAgić + ŽeljkoAgić DašaBerović DanijelaMerkler MarkoTadić @@ -5847,7 +5847,7 @@ MasoodGhayoomi - KirilSimov + KirilSimov PetyaOsenova Constituency Parsing of <fixed-case>B</fixed-case>ulgarian: Word- vs Class-based Parsing 4056–4060 @@ -5857,7 +5857,7 @@ MaikePaetzel - David NicolasRacca + David NicolasRacca DavidDeVault A Multimodal Corpus of Rapid Dialogue Games 4189–4195 @@ -5870,7 +5870,7 @@ Julián DavidArias-Londoño Jesús FranciscoVargas-Bonilla María ClaudiaGonzález-Rátiva - ElmarNöth + ElmarNöth New <fixed-case>S</fixed-case>panish speech corpus database for the analysis of people suffering from <fixed-case>P</fixed-case>arkinson’s disease 342–347 http://www.lrec-conf.org/proceedings/lrec2014/pdf/7_Paper.pdf @@ -5887,9 +5887,9 @@ martens-passarotti-2014-thomas - PeterAnick + PeterAnick MarcVerhagen - JamesPustejovsky + JamesPustejovsky Identification of Technology Terms in Patents 2008–2014 http://www.lrec-conf.org/proceedings/lrec2014/pdf/701_Paper.pdf @@ -5908,7 +5908,7 @@ EduardBejček VáclavaKettnerová - MarkétaLopatková + MarkétaLopatková Automatic Mapping Lexical Resources: A Lexical Unit as the Keystone 2826–2832 http://www.lrec-conf.org/proceedings/lrec2014/pdf/704_Paper.pdf @@ -5943,7 +5943,7 @@ KeigoKubo ShoMatsumiya GrahamNeubig - TomokiToda + TomokiToda SatoshiNakamura FumihiroAdachi RyosukeIsotani @@ -5986,7 +5986,7 @@ bartolini-etal-2014-synsets - VidasDaudaravičius + VidasDaudaravičius Language Editing Dataset of Academic Texts 1738–1742 http://www.lrec-conf.org/proceedings/lrec2014/pdf/714_Paper.pdf @@ -6005,7 +6005,7 @@ YuichiIshimoto TomoyukiTsuchiya - HanaeKoiso + HanaeKoiso YasuharuDen Towards Automatic Transformation between Different Transcription Conventions: Prediction of Intonation Markers from Linguistic and Acoustic Features 311–315 @@ -6028,7 +6028,7 @@ CorineAstésano Ellen GurmanBard BrigitteBigi - LaurentPrévot + LaurentPrévot Aix Map Task corpus: The <fixed-case>F</fixed-case>rench multimodal corpus of task-oriented dialogue 2648–2652 http://www.lrec-conf.org/proceedings/lrec2014/pdf/719_Paper.pdf @@ -6048,7 +6048,7 @@ MilanRusko SakhiaDarjaa - MariánTrnka + MariánTrnka MariánRitomský RóbertSabo Alert!... Calm Down, There is Nothing to Worry About. Warning and Soothing Speech Synthesis. @@ -6067,9 +6067,9 @@ kordoni-simova-2014-multiword - ChristianGirardi - ManuelaSperanza - RacheleSprugnoli + ChristianGirardi + ManuelaSperanza + RacheleSprugnoli SaraTonelli <fixed-case>CROMER</fixed-case>: a Tool for Cross-Document Event and Entity Coreference 3204–3208 @@ -6078,10 +6078,10 @@ girardi-etal-2014-cromer - TiberiuBoroș + TiberiuBoroș AdrianaStan OliverWatts - Stefan DanielDumitrescu + Stefan DanielDumitrescu <fixed-case>RSS</fixed-case>-<fixed-case>TOBI</fixed-case> - A Prosodically Enhanced <fixed-case>R</fixed-case>omanian Speech Corpus 316–320 http://www.lrec-conf.org/proceedings/lrec2014/pdf/727_Paper.pdf @@ -6089,8 +6089,8 @@ boros-etal-2014-rss - ArtūrsZnotiņš - PēterisPaikens + ArtūrsZnotiņš + PēterisPaikens Coreference Resolution for <fixed-case>L</fixed-case>atvian 3209–3213 http://www.lrec-conf.org/proceedings/lrec2014/pdf/729_Paper.pdf @@ -6118,9 +6118,9 @@ ThomasLavergne - GillesAdda - MartineAdda-Decker - LoriLamel + GillesAdda + MartineAdda-Decker + LoriLamel Automatic language identity tagging on word and sentence-level in multilingual text sources: a case-study on <fixed-case>L</fixed-case>uxembourgish 3300–3304 http://www.lrec-conf.org/proceedings/lrec2014/pdf/732_Paper.pdf @@ -6128,10 +6128,10 @@ lavergne-etal-2014-automatic - OrphéeDe Clercq + OrphéeDe Clercq SarahSchulz BartDesmet - VéroniqueHoste + VéroniqueHoste Towards Shared Datasets for Normalization Research 1218–1223 http://www.lrec-conf.org/proceedings/lrec2014/pdf/733_Paper.pdf @@ -6149,11 +6149,11 @@ pecheux-etal-2014-rule - Luis JavierRodríguez-Fuentes - MikelPenagarikano + Luis JavierRodríguez-Fuentes + MikelPenagarikano AmparoVarona - MireiaDiez - GermánBordel + MireiaDiez + GermánBordel <fixed-case>KALAKA</fixed-case>-3: a database for the recognition of spoken <fixed-case>E</fixed-case>uropean languages on <fixed-case>Y</fixed-case>ou<fixed-case>T</fixed-case>ube audios 443–449 http://www.lrec-conf.org/proceedings/lrec2014/pdf/736_Paper.pdf @@ -6162,7 +6162,7 @@ AndrewGargett - JohnBarnden + JohnBarnden Mining Online Discussion Forums for Metaphors 2507–2512 http://www.lrec-conf.org/proceedings/lrec2014/pdf/737_Paper.pdf @@ -6179,8 +6179,8 @@ togia-copestake-2014-tagntext - CarmenGarcía-Mateo - AntonioCardenal + CarmenGarcía-Mateo + AntonioCardenal Xosé LuisRegueira Elisa FernándezRei MartaMartinez @@ -6213,7 +6213,7 @@ BriceIsableu SylvieGibet PierreDe Loor - Jean-ClaudeMartin + Jean-ClaudeMartin A Database of Full Body Virtual Interactions Annotated with Expressivity Scores 3505–3510 http://www.lrec-conf.org/proceedings/lrec2014/pdf/741_Paper.pdf @@ -6234,11 +6234,11 @@ IgorOdriozola - InmaHernaez - María InésTorres - Luis JavierRodriguez-Fuentes - MikelPenagarikano - EvaNavas + InmaHernaez + María InésTorres + Luis JavierRodriguez-Fuentes + MikelPenagarikano + EvaNavas <fixed-case>B</fixed-case>asque Speecon-like and <fixed-case>B</fixed-case>asque <fixed-case>S</fixed-case>peech<fixed-case>D</fixed-case>at <fixed-case>MDB</fixed-case>-600: speech databases for the development of <fixed-case>ASR</fixed-case> technology for <fixed-case>B</fixed-case>asque 2658–2665 http://www.lrec-conf.org/proceedings/lrec2014/pdf/744_Paper.pdf @@ -6257,8 +6257,8 @@ ColineClaude-Lachenaud - ÉricCharton - BenoîtOzell + ÉricCharton + BenoîtOzell MichelGagnon A multimodal interpreter for 3<fixed-case>D</fixed-case> visualization and animation of verbal concepts 3620–3627 @@ -6271,7 +6271,7 @@ AndreasMaier KorbinianRiedhammer UlrichEysholdt - ElmarNöth + ElmarNöth Erlangen-<fixed-case>CLP</fixed-case>: A Large Annotated Corpus of Speech from Children with Cleft Lip and Palate 2671–2674 http://www.lrec-conf.org/proceedings/lrec2014/pdf/748_Paper.pdf @@ -6292,7 +6292,7 @@ AnindyaRoy CamilleGuinaudeau HervéBredin - ClaudeBarras + ClaudeBarras <fixed-case>TVD</fixed-case>: A Reproducible and Multiply Aligned <fixed-case>TV</fixed-case> Series Dataset 418–425 http://www.lrec-conf.org/proceedings/lrec2014/pdf/751_Paper.pdf @@ -6339,9 +6339,9 @@ goto-etal-2014-crowdsourcing - Billy T.M.Wong + Billy T.M.Wong Ian C.Chow - Jonathan J.Webster + Jonathan J.Webster HengbinYan The Halliday Centre Tagger: An Online Platform for Semi-automatic Text Annotation and Analysis 1664–1667 @@ -6403,8 +6403,8 @@ KasiaBudzynska MathildeJanier - ChrisReed - PatrickSaint-Dizier + ChrisReed + PatrickSaint-Dizier ManfredStede OlenaYakorska A Model for Processing Illocutionary Structures and Argumentation in Debates @@ -6425,14 +6425,14 @@ DanielLuzzati CyrilGrouin - IoanaVasilescu - MartineAdda-Decker - EricBilinski + IoanaVasilescu + MartineAdda-Decker + EricBilinski NathalieCamelin JulietteKahn CaroleLailler - LoriLamel - SophieRosset + LoriLamel + SophieRosset Human annotation of <fixed-case>ASR</fixed-case> error regions: Is “gravity” a sharable concept for human annotators? 3050–3056 http://www.lrec-conf.org/proceedings/lrec2014/pdf/771_Paper.pdf @@ -6444,7 +6444,7 @@ LukaNerima LorenzaRusso MariaIvanova - EricWehrli + EricWehrli <fixed-case>S</fixed-case>wiss<fixed-case>A</fixed-case>dmin: A multilingual tagged parallel corpus of press releases 1832–1836 http://www.lrec-conf.org/proceedings/lrec2014/pdf/772_Paper.pdf @@ -6454,7 +6454,7 @@ AnnaVernerová VáclavaKettnerová - MarkétaLopatková + MarkétaLopatková To Pay or to Get Paid: Enriching a Valency Lexicon with Diatheses 2452–2459 http://www.lrec-conf.org/proceedings/lrec2014/pdf/773_Paper.pdf @@ -6478,9 +6478,9 @@ tian-etal-2014-um - RodrigoAgerri + RodrigoAgerri JosuBermudez - GermanRigau + GermanRigau <fixed-case>IXA</fixed-case> pipeline: Efficient and Ready to Use Multilingual <fixed-case>NLP</fixed-case> tools 3823–3828 http://www.lrec-conf.org/proceedings/lrec2014/pdf/775_Paper.pdf @@ -6498,7 +6498,7 @@ matsuyoshi-etal-2014-annotating - MohamedSherif + MohamedSherif SandroCoelho RicardoUsbeck SebastianHellmann @@ -6524,9 +6524,9 @@ SenkaDrobac - KristerLindén - TommiPirinen - MiikkaSilfverberg + KristerLindén + TommiPirinen + MiikkaSilfverberg Heuristic Hyper-minimization of Finite State Lexicons 3319–3324 http://www.lrec-conf.org/proceedings/lrec2014/pdf/784_Paper.pdf @@ -6535,15 +6535,15 @@ SteliosPiperidis - HarrisPapageorgiou + HarrisPapageorgiou ChristianSpurk GeorgRehm KhalidChoukri - OlivierHamon + OlivierHamon NicolettaCalzolari - Riccardodel Gratta - BernardoMagnini - ChristianGirardi + Riccardodel Gratta + BernardoMagnini + ChristianGirardi <fixed-case>META</fixed-case>-<fixed-case>SHARE</fixed-case>: One year after 1532–1538 http://www.lrec-conf.org/proceedings/lrec2014/pdf/786_Paper.pdf @@ -6552,7 +6552,7 @@ ClaudiaBaur - MannyRayner + MannyRayner NikosTsourakis Using a Serious Game to Collect a Child Learner Speech Corpus 2726–2732 @@ -6561,7 +6561,7 @@ baur-etal-2014-using - RiccardoDel Gratta + RiccardoDel Gratta GabriellaPardelli SaraGoggi The <fixed-case>LRE</fixed-case> Map disclosed @@ -6570,7 +6570,7 @@ del-gratta-etal-2014-lre - EvgenyStepanov + EvgenyStepanov GiuseppeRiccardi Ali OrkanBayer The Development of the Multilingual <fixed-case>LUNA</fixed-case> Corpus for Spoken Language System Porting @@ -6589,7 +6589,7 @@ MarcPoch - NúriaBel + NúriaBel SergioEspeja FelipeNavío Ranking Job Offers for Candidates: learning hidden knowledge from Big Data @@ -6600,7 +6600,7 @@ ValérieHanoka - BenoîtSagot + BenoîtSagot An Open-Source Heavily Multilingual Translation Graph Extracted from Wiktionaries and Parallel Corpora 3179–3186 http://www.lrec-conf.org/proceedings/lrec2014/pdf/792_Paper.pdf @@ -6626,7 +6626,7 @@ YvesScherrer - BenoîtSagot + BenoîtSagot A language-independent and fully unsupervised approach to lexicon induction and part-of-speech tagging for closely related languages 502–508 http://www.lrec-conf.org/proceedings/lrec2014/pdf/797_Paper.pdf @@ -6635,10 +6635,10 @@ DavidTavarez - EvaNavas + EvaNavas DanielErro IbonSaratxaga - InmaHernaez + InmaHernaez New bilingual speech databases for audio diarization 2666–2670 http://www.lrec-conf.org/proceedings/lrec2014/pdf/799_Paper.pdf @@ -6648,7 +6648,7 @@ MohamedMorchid RichardDufour - GeorgesLinarès + GeorgesLinarès A <fixed-case>LDA</fixed-case>-Based Topic Classification Approach From Highly Imperfect Automatic Transcriptions 1309–1314 http://www.lrec-conf.org/proceedings/lrec2014/pdf/8_Paper.pdf @@ -6656,7 +6656,7 @@ morchid-etal-2014-lda - Cristina SánchezMarco + Cristina SánchezMarco An open source part-of-speech tagger for <fixed-case>N</fixed-case>orwegian: Building on existing language resources 4111–4117 http://www.lrec-conf.org/proceedings/lrec2014/pdf/801_Paper.pdf @@ -6665,9 +6665,9 @@ AhmetAker - MonicaParamita - MārcisPinnis - RobertGaizauskas + MonicaParamita + MārcisPinnis + RobertGaizauskas Bilingual dictionaries for all <fixed-case>EU</fixed-case> languages 2839–2845 http://www.lrec-conf.org/proceedings/lrec2014/pdf/803_Paper.pdf @@ -6683,10 +6683,10 @@ reynaert-2014-synergy - RaphaelRubino + RaphaelRubino AntonioToral NikolaLjubešić - GemaRamírez-Sánchez + GemaRamírez-Sánchez Quality Estimation for Synthetic Parallel Data Generation 1843–1849 http://www.lrec-conf.org/proceedings/lrec2014/pdf/807_Paper.pdf @@ -6712,8 +6712,8 @@ MaudEhrmann FrancescoCecconi DanieleVannella - John PhilipMcCrae - PhilippCimiano + John PhilipMcCrae + PhilippCimiano RobertoNavigli Representing Multilingual Data as Linked Data: the Case of <fixed-case>B</fixed-case>abel<fixed-case>N</fixed-case>et 2.0 401–408 @@ -6735,9 +6735,9 @@ LinaHenriksen - Dorte HaltrupHansen + Dorte HaltrupHansen BenteMaegaard - Bolette SandfordPedersen + Bolette SandfordPedersen ClausPovlsen Encompassing a spectrum of <fixed-case>LT</fixed-case> users in the <fixed-case>CLARIN</fixed-case>-<fixed-case>DK</fixed-case> Infrastructure 2175–2181 @@ -6747,11 +6747,11 @@ AlexisNasr - FredericBechet - BenoitFavre + FredericBechet + BenoitFavre ThierryBazillon - JoseDeulofeu - AndreValli + JoseDeulofeu + AndreValli Automatically enriching spoken corpora with syntactic information for linguistic studies 854–858 http://www.lrec-conf.org/proceedings/lrec2014/pdf/816_Paper.pdf @@ -6770,7 +6770,7 @@ MariaSimi CristinaBosco - SimonettaMontemagni + SimonettaMontemagni Less is More? Towards a Reduced Inventory of Categories for Training a Parser for the <fixed-case>I</fixed-case>talian <fixed-case>S</fixed-case>tanford Dependencies 83–90 http://www.lrec-conf.org/proceedings/lrec2014/pdf/818_Paper.pdf @@ -6806,7 +6806,7 @@ sonntag-stede-2014-grapat - AntonioPareja-Lora + AntonioPareja-Lora GuillermoCárcamo-Escorza AliciaBallesteros-Calvo Standardisation and Interoperation of Morphosyntactic and Syntactic Annotation Tools for <fixed-case>S</fixed-case>panish and their Annotations @@ -6829,7 +6829,7 @@ JasonUtt SylviaSpringorum MaximilianKöper - SabineSchulte im Walde + SabineSchulte im Walde Fuzzy <fixed-case>V</fixed-case>-Measure - An Evaluation Method for Cluster Analyses of Ambiguous Data 581–587 http://www.lrec-conf.org/proceedings/lrec2014/pdf/829_Paper.pdf @@ -6841,7 +6841,7 @@ YunqingXia WeizhiWang RaymondLau - FangZheng + FangZheng Clustering tweets using<fixed-case>W</fixed-case>ikipedia concepts 2262–2267 http://www.lrec-conf.org/proceedings/lrec2014/pdf/83_Paper.pdf @@ -6868,7 +6868,7 @@ NikolaLjubešić DarjaFišer - TomažErjavec + TomažErjavec <fixed-case>T</fixed-case>weet<fixed-case>C</fixed-case>a<fixed-case>T</fixed-case>: a tool for building <fixed-case>T</fixed-case>witter corpora of smaller languages 2279–2283 http://www.lrec-conf.org/proceedings/lrec2014/pdf/834_Paper.pdf @@ -6876,13 +6876,13 @@ ljubesic-etal-2014-tweetcat - OndřejBojar + OndřejBojar VojtěchDiatka - PavelRychlý + PavelRychlý PavelStraňák - VítSuchomel + VítSuchomel AlešTamchyna - DanielZeman + DanielZeman <fixed-case>H</fixed-case>ind<fixed-case>E</fixed-case>n<fixed-case>C</fixed-case>orp - <fixed-case>H</fixed-case>indi-<fixed-case>E</fixed-case>nglish and <fixed-case>H</fixed-case>indi-only Corpus for Machine Translation 3550–3555 http://www.lrec-conf.org/proceedings/lrec2014/pdf/835_Paper.pdf @@ -6890,9 +6890,9 @@ bojar-etal-2014-hindencorp - SilviaNecşulescu + SilviaNecşulescu SaraMendes - NúriaBel + NúriaBel Combining dependency information and generalization in a pattern-based approach to the classification of lexical-semantic relation instances 4308–4315 http://www.lrec-conf.org/proceedings/lrec2014/pdf/837_Paper.pdf @@ -6900,10 +6900,10 @@ necsulescu-etal-2014-combining - AimiliosChalamandaris - PirrosTsiakoulis + AimiliosChalamandaris + PirrosTsiakoulis SotirisKarabetsos - SpyrosRaptis + SpyrosRaptis Using Audio Books for Training a Text-to-Speech System 3076–3080 http://www.lrec-conf.org/proceedings/lrec2014/pdf/838_Paper.pdf @@ -6911,7 +6911,7 @@ chalamandaris-etal-2014-using - AgataCybulska + AgataCybulska PiekVossen Using a sledgehammer to crack a nut? Lexical diversity and event coreference resolution 4545–4552 @@ -6930,7 +6930,7 @@ MarcKupietz - HaraldLüngen + HaraldLüngen Recent Developments in <fixed-case>D</fixed-case>e<fixed-case>R</fixed-case>e<fixed-case>K</fixed-case>o 2378–2385 http://www.lrec-conf.org/proceedings/lrec2014/pdf/842_Paper.pdf @@ -6938,7 +6938,7 @@ kupietz-lungen-2014-recent - Shu-KaiHsieh + Shu-KaiHsieh Why <fixed-case>C</fixed-case>hinese Web-as-Corpus is Wacky? Or: How Big Data is Killing <fixed-case>C</fixed-case>hinese Corpus Linguistics 2386–2389 http://www.lrec-conf.org/proceedings/lrec2014/pdf/843_Paper.pdf @@ -6946,7 +6946,7 @@ hsieh-2014-chinese - TafseerAhmed Khan + TafseerAhmed Khan Automatic acquisition of <fixed-case>U</fixed-case>rdu nouns (along with gender and irregular plurals) 2846–2850 http://www.lrec-conf.org/proceedings/lrec2014/pdf/844_Paper.pdf @@ -6956,7 +6956,7 @@ ClareLlewellyn ClaireGrover - JonOberlander + JonOberlander EwanKlein Re-using an Argument Corpus to Aid in the Curation of Social Media Collections 462–468 @@ -6966,7 +6966,7 @@ RaivisSkadiņš - JörgTiedemann + JörgTiedemann RobertsRozis DaigaDeksne Billions of Parallel Words for Free: Building and Using the <fixed-case>EU</fixed-case> Bookshop Corpus @@ -6977,9 +6977,9 @@ IsaMaks - RubenIzquierdo + RubenIzquierdo FrancescaFrontini - RodrigoAgerri + RodrigoAgerri PiekVossen AndoniAzpeitia Generating Polarity Lexicons with <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et propagation in 5 languages @@ -6990,9 +6990,9 @@ MaraChinea Rios - GermánSanchis-Trilles - DanielOrtiz-Martínez - FranciscoCasacuberta + GermánSanchis-Trilles + DanielOrtiz-Martínez + FranciscoCasacuberta Online optimisation of log-linear weights in interactive machine translation 3556–3559 http://www.lrec-conf.org/proceedings/lrec2014/pdf/848_Paper.pdf @@ -7014,7 +7014,7 @@ RomanKlinger - PhilippCimiano + PhilippCimiano The <fixed-case>USAGE</fixed-case> review corpus for fine grained multi lingual opinion analysis 2211–2218 http://www.lrec-conf.org/proceedings/lrec2014/pdf/85_Paper.pdf @@ -7025,7 +7025,7 @@ NadjetBouayad-Agha AliciaBurga GerardCasamayor - JoanCodina + JoanCodina RogelioNazar LeoWanner An Exercise in Reuse of Resources: Adapting General Discourse Coreference Resolution for Detecting Lexical Chains in Patent Documentation @@ -7036,8 +7036,8 @@ BernardoSevero - CassiaTrojahn - RenataVieira + CassiaTrojahn + RenataVieira <fixed-case>VOAR</fixed-case>: A Visual and Integrated Ontology Alignment Environment 3671–3677 http://www.lrec-conf.org/proceedings/lrec2014/pdf/851_Paper.pdf @@ -7059,8 +7059,8 @@ ChahinezBenkoussas HussamHamdan - PatriceBellot - FrédéricBéchet + PatriceBellot + FrédéricBéchet ElodieFaath A Collection of Scholarly Book Reviews from the Platforms of electronic sources in Humanities and Social Sciences <fixed-case>O</fixed-case>pen<fixed-case>E</fixed-case>dition.org 4172–4177 @@ -7069,11 +7069,11 @@ benkoussas-etal-2014-collection - Anton KarlIngason + Anton KarlIngason HrafnLoftsson - EiríkurRögnvaldsson + EiríkurRögnvaldsson Einar FreyrSigurðsson - Joel C.Wallenberg + Joel C.Wallenberg Rapid Deployment of Phrase Structure Parsing for Related Languages: A Case Study of <fixed-case>I</fixed-case>nsular <fixed-case>S</fixed-case>candinavian 91–95 http://www.lrec-conf.org/proceedings/lrec2014/pdf/855_Paper.pdf @@ -7106,7 +7106,7 @@ MichaelStadtschnitzer JochenSchwenninger DanielStein - JoachimKoehler + JoachimKoehler Exploiting the large-scale <fixed-case>G</fixed-case>erman Broadcast Corpus to boost the Fraunhofer <fixed-case>IAIS</fixed-case> Speech Recognition System 3887–3890 http://www.lrec-conf.org/proceedings/lrec2014/pdf/858_Paper.pdf @@ -7114,7 +7114,7 @@ stadtschnitzer-etal-2014-exploiting - NataliaLoukachevitch + NataliaLoukachevitch AlekseyAlekseev Summarizing News Clusters on the Basis of Thematic Chains 1600–1607 @@ -7123,7 +7123,7 @@ loukachevitch-alekseev-2014-summarizing - Kilian A.Foth + Kilian A.Foth ArneKöhn NielsBeuck WolfgangMenzel @@ -7151,7 +7151,7 @@ JorgeGracia ElenaMontiel-Ponsoda DanielVila-Suero - GuadalupeAguado-de-Cea + GuadalupeAguado-de-Cea Enabling Language Resources to Expose Translations as Linked Data on the Web 409–413 http://www.lrec-conf.org/proceedings/lrec2014/pdf/863_Paper.pdf @@ -7178,7 +7178,7 @@ TatianaErekhinskaya MeghanaSatpute - DanMoldovan + DanMoldovan Multilingual e<fixed-case>X</fixed-case>tended <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et Knowledge Base: Semantic Parsing and Translation of Glosses 2990–2994 http://www.lrec-conf.org/proceedings/lrec2014/pdf/866_Paper.pdf @@ -7196,7 +7196,7 @@ VerónicaPérez-Rosas - RadaMihalcea + RadaMihalcea AlexisNarvaez MihaiBurzo A Multimodal Dataset for Deception Detection @@ -7217,9 +7217,9 @@ AhmedAbdelali - FranciscoGuzman + FranciscoGuzman HassanSajjad - StephanVogel + StephanVogel The <fixed-case>AMARA</fixed-case> Corpus: Building Parallel Language Resources for the Educational Domain 1856–1862 http://www.lrec-conf.org/proceedings/lrec2014/pdf/877_Paper.pdf @@ -7227,10 +7227,10 @@ abdelali-etal-2014-amara - LaumaPretkalniņa - ArtūrsZnotiņš + LaumaPretkalniņa + ArtūrsZnotiņš LauraRituma - DidzisGoško + DidzisGoško Dependency parsing representation effects on the accuracy of semantic applications — an example of an inflective language 4074–4081 http://www.lrec-conf.org/proceedings/lrec2014/pdf/879_Paper.pdf @@ -7239,7 +7239,7 @@ GuiyaoKe - Pierre-FrancoisMarteau + Pierre-FrancoisMarteau Co-clustering of bilingual datasets as a mean for assisting the construction of thematic bilingual comparable corpora 1992–1999 http://www.lrec-conf.org/proceedings/lrec2014/pdf/88_Paper.pdf @@ -7288,7 +7288,7 @@ JetskeKlatter Roelandvan Hout - Henkvan den Heuvel + Henkvan den Heuvel PaulaFikkert AnneBaker Jande Jong @@ -7335,7 +7335,7 @@ PatrickLittell KaitlynPrice - LoriLevin + LoriLevin Morphological parsing of <fixed-case>S</fixed-case>wahili using crowdsourced lexical resources 3333–3339 http://www.lrec-conf.org/proceedings/lrec2014/pdf/896_Paper.pdf @@ -7343,7 +7343,7 @@ littell-etal-2014-morphological - EricCharton + EricCharton Marie-JeanMeurs LudovicJean-Louis MichelGagnon @@ -7356,7 +7356,7 @@ VictoriaArranz KhalidChoukri - ValérieMapelli + ValérieMapelli HélèneMazo <fixed-case>ELRA</fixed-case>’s Consolidated Services for the <fixed-case>HLT</fixed-case> Community 1511–1516 @@ -7366,7 +7366,7 @@ DaisukeKawahara - MarthaPalmer + MarthaPalmer Single Classifier Approach for Verb Sense Disambiguation based on Generalized Features 4210–4213 http://www.lrec-conf.org/proceedings/lrec2014/pdf/90_Paper.pdf @@ -7413,7 +7413,7 @@ alfano-etal-2014-volip - Carla ParraEscartín + Carla ParraEscartín Chasing the Perfect Splitter: A Comparison of Different Compound Splitting Tools 3340–3347 http://www.lrec-conf.org/proceedings/lrec2014/pdf/909_Paper.pdf @@ -7462,8 +7462,8 @@ JanMašek DavidMareček MartinPopel - DanielZeman - ZdeněkŽabokrtský + DanielZeman + ZdeněkŽabokrtský <fixed-case>H</fixed-case>amle<fixed-case>DT</fixed-case> 2.0: Thirty Dependency Treebanks Stanfordized 2334–2341 http://www.lrec-conf.org/proceedings/lrec2014/pdf/915_Paper.pdf @@ -7488,14 +7488,14 @@ MuhammadAbdul-Mageed - MonaDiab + MonaDiab <fixed-case>SANA</fixed-case>: A Large Scale Multi-Genre, Multi-Dialect Lexicon for <fixed-case>A</fixed-case>rabic Subjectivity and Sentiment Analysis http://www.lrec-conf.org/proceedings/lrec2014/pdf/919_Paper.pdf The computational treatment of subjectivity and sentiment in natural language is usually significantly improved by applying features exploiting lexical resources where entries are tagged with semantic orientation (e.g., positive, negative values). In spite of the fair amount of work on Arabic sentiment analysis over the past few years (e.g., (Abbasi et al., 2008; Abdul-Mageed et al., 2014; Abdul-Mageed et al., 2012; Abdul-Mageed and Diab, 2012a; Abdul-Mageed and Diab, 2012b; Abdul-Mageed et al., 2011a; Abdul-Mageed and Diab, 2011)), the language remains under-resourced as to these polarity repositories compared to the English language. In this paper, we report efforts to build and present SANA, a large-scale, multi-genre, multi-dialect multi-lingual lexicon for the subjectivity and sentiment analysis of the Arabic language and dialects. abdul-mageed-diab-2014-sana - BehrangZadeh + BehrangZadeh SiegfriedHandschuh Evaluation of Technology Term Recognition with Random Indexing http://www.lrec-conf.org/proceedings/lrec2014/pdf/920_Paper.pdf @@ -7504,7 +7504,7 @@ StefanBott - SabineSchulte im Walde + SabineSchulte im Walde Optimizing a Distributional Semantic Model for the Prediction of <fixed-case>G</fixed-case>erman Particle Verb Compositionality http://www.lrec-conf.org/proceedings/lrec2014/pdf/921_Paper.pdf In the work presented here we assess the degree of compositionality of German Particle Verbs with a Distributional Semantics Model which only relies on word window information and has no access to syntactic information as such. Our method only takes the lexical distributional distance between the Particle Verb to its Base Verb as a predictor for compositionality. We show that the ranking of distributional similarity correlates significantly with the ranking of human judgements on semantic compositionality for a series of Particle Verbs and the Base Verbs they are derived from. We also investigate the influence of further linguistic factors, such as the ambiguity and the overall frequency of the verbs and a syntactically separate occurrences of verbs and particles that causes difficulties for the correct lemmatization of Particle Verbs. We analyse in how far these factors may influence the success with which the compositionality of the Particle Verbs may be predicted. @@ -7519,10 +7519,10 @@ dey-fung-2014-hindi - NancyIde - JamesPustejovsky - ChristopherCieri - EricNyberg + NancyIde + JamesPustejovsky + ChristopherCieri + EricNyberg DiWang KeithSuderman MarcVerhagen @@ -7542,7 +7542,7 @@ christodoulides-etal-2014-dismo - Trang MaiXuan + Trang MaiXuan YoheiMurakami DonghuiLin ToruIshida @@ -7553,7 +7553,7 @@ KlimPeshkov - LaurentPrévot + LaurentPrévot Segmentation evaluation metrics, a comparison grounded on prosodic and discourse units http://www.lrec-conf.org/proceedings/lrec2014/pdf/931_Paper.pdf Knowledge on evaluation metrics and best practices of using them have improved fast in the recent years Fort et al. (2012). However, the advances concern mostly evaluation of classification related tasks. Segmentation tasks have received less attention. Nevertheless, there are crucial in a large number of linguistic studies. A range of metrics is available (F-score on boundaries, F-score on units, WindowDiff ((WD), Boundary Similarity (BS) but it is still relatively difficult to interpret these metrics on various linguistic segmentation tasks, such as prosodic and discourse segmentation. In this paper, we consider real segmented datasets (introduced in Peshkov et al. (2012)) as references which we deteriorate in different ways (random addition of boundaries, random removal boundaries, near-miss errors introduction). This provide us with various measures on controlled datasets and with an interesting benchmark for various linguistic segmentation tasks. @@ -7563,14 +7563,14 @@ AndreaAbel AivarsGlaznieks LionelNicolas - EgonStemle + EgonStemle <fixed-case>K</fixed-case>o<fixed-case>K</fixed-case>o: an <fixed-case>L</fixed-case>1 Learner Corpus for <fixed-case>G</fixed-case>erman http://www.lrec-conf.org/proceedings/lrec2014/pdf/934_Paper.pdf We introduce the KoKo corpus, a collection of German L1 learner texts annotated with learner errors, along with the methods and tools used in its construction and evaluation. The corpus contains both texts and corresponding survey information from 1,319 pupils and amounts to around 716,000 tokens. The evaluation of the performed transcriptions and annotations shows an accuracy of orthographic error annotations of approximately 80% as well as high accuracies of transcriptions (>99%), automatic tokenisation (>99%), sentence splitting (>96%) and POS-tagging (>94%). The KoKo corpus will be published at the end of 2014. It will be the first accessible linguistically annotated German L1 learner corpus and a valuable source for research on L1 learner language as well as for teachers of German as L1, in particular with regards to writing skills. abel-etal-2014-koko - PetraBarančíková + PetraBarančíková RudolfRosa AlešTamchyna Improving Evaluation of <fixed-case>E</fixed-case>nglish-<fixed-case>C</fixed-case>zech <fixed-case>MT</fixed-case> through Paraphrasing @@ -7588,11 +7588,11 @@ faessler-etal-2014-disclose - Mitesh M.Khapra + Mitesh M.Khapra AnanthakrishnanRamanathan AnoopKunchukuttan KarthikVisweswariah - PushpakBhattacharyya + PushpakBhattacharyya When Transliteration Met Crowdsourcing : An Empirical Study of Transliteration via Crowdsourcing using Efficient, Non-redundant and Fair Quality Control http://www.lrec-conf.org/proceedings/lrec2014/pdf/94_Paper.pdf Sufficient parallel transliteration pairs are needed for training state of the art transliteration engines. Given the cost involved, it is often infeasible to collect such data using experts. Crowdsourcing could be a cheaper alternative, provided that a good quality control (QC) mechanism can be devised for this task. Most QC mechanisms employed in crowdsourcing are aggressive (unfair to workers) and expensive (unfair to requesters). In contrast, we propose a low-cost QC mechanism which is fair to both workers and requesters. At the heart of our approach, lies a rule based Transliteration Equivalence approach which takes as input a list of vowels in the two languages and a mapping of the consonants in the two languages. We empirically show that our approach outperforms other popular QC mechanisms (viz., consensus and sampling) on two vital parameters : (i) fairness to requesters (lower cost per correct transliteration) and (ii) fairness to workers (lower rate of rejecting correct answers). Further, as an extrinsic evaluation we use the standard NEWS 2010 test set and show that such quality controlled crowdsourced data compares well to expert data when used for training a transliteration engine. @@ -7601,13 +7601,13 @@ FrederikBaumgardt GiuseppeCelano - Gregory R.Crane + Gregory R.Crane StellaDee MaryamForadi EmilyFranzini GretaFranzini MonicaLent - MariaMoritz + MariaMoritz SimonaStoyanova Open Philology at the <fixed-case>U</fixed-case>niversity of <fixed-case>L</fixed-case>eipzig http://www.lrec-conf.org/proceedings/lrec2014/pdf/940_Paper.pdf @@ -7624,10 +7624,10 @@ NajehHajlaoui - DavidKolovratnik - JaakkoVäyrynen + DavidKolovratnik + JaakkoVäyrynen RalfSteinberger - DanielVarga + DanielVarga <fixed-case>DCEP</fixed-case> -Digital Corpus of the <fixed-case>E</fixed-case>uropean Parliament http://www.lrec-conf.org/proceedings/lrec2014/pdf/943_Paper.pdf We are presenting a new highly multilingual document-aligned parallel corpus called DCEP - Digital Corpus of the European Parliament. It consists of various document types covering a wide range of subject domains. With a total of 1.37 billion words in 23 languages (253 language pairs), gathered in the course of ten years, this is the largest single release of documents by a European Union institution. DCEP contains most of the content of the European Parliament’s official Website. It includes different document types produced between 2001 and 2012, excluding only the documents already exist in the Europarl corpus to avoid overlapping. We are presenting the typical acquisition steps of the DCEP corpus: data access, document alignment, sentence splitting, normalisation and tokenisation, and sentence alignment efforts. The sentence-level alignment is still in progress but based on some first experiments; we showed that DCEP is very useful for NLP applications, in particular for Statistical Machine Translation. @@ -7635,9 +7635,9 @@ JosephMariani - ChristopherCieri + ChristopherCieri GilFrancopoulo - PatrickParoubek + PatrickParoubek MarineDelaborde Facing the Identification Problem in Language-Related Scientific Data Analysis. http://www.lrec-conf.org/proceedings/lrec2014/pdf/945_Paper.pdf @@ -7646,7 +7646,7 @@ MarietteSoury - LaurenceDevillers + LaurenceDevillers Smile and Laughter in Human-Machine Interaction: a study of engagement http://www.lrec-conf.org/proceedings/lrec2014/pdf/947_Paper.pdf This article presents a corpus featuring adults playing games in interaction with machine trying to induce laugh. This corpus was collected during Interspeech 2013 in Lyon to study behavioral differences correlated to different personalities and cultures. We first present the collection protocol, then the corpus obtained and finally different quantitative and qualitative measures. Smiles and laughs are types of affect bursts which are defined as short emotional “non-speech” expressions. Here we correlate smile and laugh with personality traits and cultural background. Our final objective is to propose a measure of engagement deduced from those affect bursts. @@ -7654,8 +7654,8 @@ LivioRobaldo - GuidoBoella - LuigiDi Caro + GuidoBoella + LuigiDi Caro AndreaViolato Exploiting networks in Law http://www.lrec-conf.org/proceedings/lrec2014/pdf/95_Paper.pdf @@ -7688,7 +7688,7 @@ ThomasPellegrini VahidHedayati - AngelaCosta + AngelaCosta El-<fixed-case>WOZ</fixed-case>: a client-server wizard-of-oz interface http://www.lrec-conf.org/proceedings/lrec2014/pdf/959_Paper.pdf In this paper, we present a speech recording interface developed in the context of a project on automatic speech recognition for elderly native speakers of European Portuguese. In order to collect spontaneous speech in a situation of interaction with a machine, this interface was designed as a Wizard-of-Oz (WOZ) plateform. In this setup, users interact with a fake automated dialog system controled by a human wizard. It was implemented as a client-server application and the subjects interact with a talking head. The human wizard chooses pre-defined questions or sentences in a graphical user interface, which are then synthesized and spoken aloud by the avatar on the client side. A small spontaneous speech corpus was collected in a daily center. Eight speakers between 75 and 90 years old were recorded. They appreciated the interface and felt at ease with the avatar. Manual orthographic transcriptions were created for the total of about 45 minutes of speech. @@ -7697,7 +7697,7 @@ FeiCheng KevinDuh - YujiMatsumoto + YujiMatsumoto Parsing <fixed-case>C</fixed-case>hinese Synthetic Words with a Character-based Dependency Model http://www.lrec-conf.org/proceedings/lrec2014/pdf/96_Paper.pdf Synthetic word analysis is a potentially important but relatively unexplored problem in Chinese natural language processing. Two issues with the conventional pipeline methods involving word segmentation are (1) the lack of a common segmentation standard and (2) the poor segmentation performance on OOV words. These issues may be circumvented if we adopt the view of character-based parsing, providing both internal structures to synthetic words and global structure to sentences in a seamless fashion. However, the accuracy of synthetic word parsing is not yet satisfactory, due to the lack of research. In view of this, we propose and present experiments on several synthetic word parsers. Additionally, we demonstrate the usefulness of incorporating large unlabelled corpora and a dictionary for this task. Our parsers significantly outperform the baseline (a pipeline method). @@ -7705,10 +7705,10 @@ MohamedBen Jannet - MartineAdda-Decker + MartineAdda-Decker OlivierGalibert JulietteKahn - SophieRosset + SophieRosset <fixed-case>ETER</fixed-case> : a new metric for the evaluation of hierarchical named entity recognition http://www.lrec-conf.org/proceedings/lrec2014/pdf/960_Paper.pdf This paper addresses the question of hierarchical named entity evaluation. In particular, we focus on metrics to deal with complex named entity structures as those introduced within the QUAERO project. The intended goal is to propose a smart way of evaluating partially correctly detected complex entities, beyond the scope of traditional metrics. None of the existing metrics are fully adequate to evaluate the proposed QUAERO task involving entity detection, classification and decomposition. We are discussing the strong and weak points of the existing metrics. We then introduce a new metric, the Entity Tree Error Rate (ETER), to evaluate hierarchical and structured named entity detection, classification and decomposition. The ETER metric builds upon the commonly accepted SER metric, but it takes the complex entity structure into account by measuring errors not only at the slot (or complex entity) level but also at a basic (atomic) entity level. We are comparing our new metric to the standard one using first some examples and then a set of real data selected from the ETAPE evaluation results. @@ -7717,7 +7717,7 @@ JunAraki ZhengzhongLiu - EduardHovy + EduardHovy TerukoMitamura Detecting Subevent Structure for Event Coreference Resolution http://www.lrec-conf.org/proceedings/lrec2014/pdf/963_Paper.pdf @@ -7734,14 +7734,14 @@ shah-etal-2014-efficient - AlexandraBalahur + AlexandraBalahur MarcoTurchi RalfSteinberger - Jose-ManuelPerea-Ortega + Jose-ManuelPerea-Ortega GuillaumeJacquet DilekKüçük VanniZavarella - AdilEl Ghali + AdilEl Ghali Resource Creation and Evaluation for Multilingual Sentiment Analysis in Social Media Texts http://www.lrec-conf.org/proceedings/lrec2014/pdf/965_Paper.pdf This paper presents an evaluation of the use of machine translation to obtain and employ data for training multilingual sentiment classifiers. We show that the use of machine translated data obtained similar results as the use of native-speaker translations of the same data. Additionally, our evaluations pinpoint to the fact that the use of multilingual data, including that obtained through machine translation, leads to improved results in sentiment classification. Finally, we show that the performance of the sentiment classifiers built on machine translated data can be improved using original data from the target language and that even a small amount of such texts can lead to significant growth in the classification performance. @@ -7765,14 +7765,14 @@ RenlongAi - MarcelaCharfuelan + MarcelaCharfuelan <fixed-case>MAT</fixed-case>: a tool for <fixed-case>L</fixed-case>2 pronunciation errors annotation http://www.lrec-conf.org/proceedings/lrec2014/pdf/971_Paper.pdf In the area of Computer Assisted Language Learning(CALL), second language (L2) learners’ spoken data is an important resource for analysing and annotating typical L2 pronunciation errors. The annotation of L2 pronunciation errors in spoken data is not an easy task though, normally it requires manual annotation from trained linguists or phoneticians. In order to facilitate this task, in this paper, we present the MAT tool, a web-based tool intended to facilitate the annotation of L2 learners’ pronunciation errors at various levels. The tool has been designed taking into account recent studies on error detection in pronunciation training. It also aims at providing an easy and fast annotation process via a comprehensive and friendly user interface. The tool is based on the MARY TTS open source platform, from which it uses the components: text analyser (tokeniser, syllabifier, phonemiser), phonetic aligner and speech signal processor. Annotation results at sentence, word, syllable and phoneme levels are stored in XML format. The tool is currently under evaluation with a L2 learners’ spoken corpus recorded in the SPRINTER (Language Technology for Interactive, Multi-Media Online Language Learning) project. ai-charfuelan-2014-mat - KalliopiZervanou + KalliopiZervanou EliasIosif AlexandrosPotamianos Word Semantic Similarity for Morphologically Rich Languages @@ -7784,15 +7784,15 @@ JoshuaElliot LoganKearsley JasonHousley - AlanMelby + AlanMelby <fixed-case>L</fixed-case>ex<fixed-case>T</fixed-case>erm Manager: Design for an Integrated Lexicography and Terminology System http://www.lrec-conf.org/proceedings/lrec2014/pdf/975_Paper.pdf We present a design for a multi-modal database system for lexical information that can be accessed in either lexicographical or terminological views. The use of a single merged data model makes it easy to transfer common information between termbases and dictionaries, thus facilitating information sharing and re-use. Our combined model is based on the LMF and TMF metamodels for lexicographical and terminological databases and is compatible with both, thus allowing for the import of information from existing dictionaries and termbases, which may be transferred to the complementary view and re-exported. We also present a new Linguistic Configuration Model, analogous to a TBX XCS file, which can be used to specify multiple language-specific schemata for validating and understanding lexical information in a single database. Linguistic configurations are mutable and can be refined and evolved over time as understanding of documentary needs improves. The system is designed with a client-server architecture using the HTTP protocol, allowing for the independent implementation of multiple clients for specific use cases and easy deployment over the web. elliot-etal-2014-lexterm - DanielPeterson - MarthaPalmer + DanielPeterson + MarthaPalmer ShuminWu Focusing Annotation for Semantic Role Labeling http://www.lrec-conf.org/proceedings/lrec2014/pdf/977_Paper.pdf @@ -7810,16 +7810,16 @@ lapponi-etal-2014-road - PennyLabropoulou - ChristopherCieri - MariaGavrilidou + PennyLabropoulou + ChristopherCieri + MariaGavrilidou Developing a Framework for Describing Relations among Language Resources http://www.lrec-conf.org/proceedings/lrec2014/pdf/979_Paper.pdf In this paper, we study relations holding between language resources as implemented in activities concerned with their documentation. We envision the term “language resources” with an inclusive definition covering datasets (corpora, lexica, ontologies, grammars, etc.), tools (including web services, workflows, platforms etc.), related publications and documentation, specifications and guidelines. However, the scope of the paper is limited to relations holding for datasets and tools. The study fosuses on the META-SHARE infrastructure and the Linguistic Data Consortium and takes into account the ISOcat DCR relations. Based on this study, we propose a taxonomy of relations, discuss their semantics and provide specifications for their use in order to cater for semantic interoperability. Issues of granularity, redundancy in codification, naming conventions and semantics of the relations are presented. labropoulou-etal-2014-developing - Clémentde Groc + Clémentde Groc XavierTannier Evaluating Web-as-corpus Topical Document Retrieval with an Index of the <fixed-case>O</fixed-case>pen<fixed-case>D</fixed-case>irectory http://www.lrec-conf.org/proceedings/lrec2014/pdf/980_Paper.pdf @@ -7828,8 +7828,8 @@ SantanuPal - Sudip KumarNaskar - SivajiBandyopadhyay + Sudip KumarNaskar + SivajiBandyopadhyay Word Alignment-Based Reordering of Source Chunks in <fixed-case>PB</fixed-case>-<fixed-case>SMT</fixed-case> http://www.lrec-conf.org/proceedings/lrec2014/pdf/982_Paper.pdf Reordering poses a big challenge in statistical machine translation between distant language pairs. The paper presents how reordering between distant language pairs can be handled efficiently in phrase-based statistical machine translation. The problem of reordering between distant languages has been approached with prior reordering of the source text at chunk level to simulate the target language ordering. Prior reordering of the source chunks is performed in the present work by following the target word order suggested by word alignment. The testset is reordered using monolingual MT trained on source and reordered source. This approach of prior reordering of the source chunks was compared with pre-ordering of source words based on word alignments and the traditional approach of prior source reordering based on language-pair specific reordering rules. The effects of these reordering approaches were studied on an English–Bengali translation task, a language pair with different word order. From the experimental results it was found that word alignment based reordering of the source chunks is more effective than the other reordering approaches, and it produces statistically significant improvements over the baseline system on BLEU. On manual inspection we found significant improvements in terms of word alignments. @@ -7855,8 +7855,8 @@ yates-etal-2014-framework - ZdeňkaUrešová - JanHajič + ZdeňkaUrešová + JanHajič PavelPecina OndřejDušek Multilingual Test Sets for Machine Translation of Search Queries for Cross-Lingual Information Retrieval in the Medical Domain @@ -7875,9 +7875,9 @@ ngonga-ngomo-etal-2014-tool - Clémentde Groc + Clémentde Groc XavierTannier - Claudede Loupy + Claudede Loupy Thematic Cohesion: measuring terms discriminatory power toward themes http://www.lrec-conf.org/proceedings/lrec2014/pdf/991_Paper.pdf We present a new measure of thematic cohesion. This measure associates each term with a weight representing its discriminatory power toward a theme, this theme being itself expressed by a list of terms (a thematic lexicon). This thematic cohesion criterion can be used in many applications, such as query expansion, computer-assisted translation, or iterative construction of domain-specific lexicons and corpora. The measure is computed in two steps. First, a set of documents related to the terms is gathered from the Web by querying a Web search engine. Then, we produce an oriented co-occurrence graph, where vertices are the terms and edges represent the fact that two terms co-occur in a document. This graph can be interpreted as a recommendation graph, where two terms occurring in a same document means that they recommend each other. This leads to using a random walk algorithm that assigns a global importance value to each vertex of the graph. After observing the impact of various parameters on those importance values, we evaluate their correlation with retrieval effectiveness. @@ -7885,7 +7885,7 @@ TatianaGornostay - AndrejsVasiļjevs + AndrejsVasiļjevs Terminology Resources and Terminology Work Benefit from Cloud Services http://www.lrec-conf.org/proceedings/lrec2014/pdf/992_Paper.pdf This paper presents the concept of the innovative platform TaaS “Terminology as a Service”. TaaS brings the benefits of cloud services to the user, in order to foster the creation of terminology resources and to maintain their up-to-datedness by integrating automated data extraction and user-supported clean-up of raw terminological data and sharing user-validated terminology. The platform is based on cutting-edge technologies, provides single-access-point terminology services, and facilitates the establishment of emerging trends beyond conventional praxis and static models in terminology work. A cloud-based, user-oriented, collaborative, portable, interoperable, and multilingual platform offers such terminology services as terminology project creation and sharing, data collection for translation lookup, user document upload and management, terminology extraction customisation and execution, raw terminological data management, validated terminological data export and reuse, and other terminology services. @@ -7893,7 +7893,7 @@ MunshiAsadullah - PatrickParoubek + PatrickParoubek AnneVilnat Bidirectionnal converter between syntactic annotations : from <fixed-case>F</fixed-case>rench Treebank Dependencies to <fixed-case>PASSAGE</fixed-case> annotations, and back http://www.lrec-conf.org/proceedings/lrec2014/pdf/995_Paper.pdf @@ -7902,7 +7902,7 @@ MarcosZampieri - BinyamGebre + BinyamGebre <fixed-case>V</fixed-case>ar<fixed-case>C</fixed-case>lass: An Open-source Language Identification Tool for Language Varieties http://www.lrec-conf.org/proceedings/lrec2014/pdf/996_Paper.pdf This paper presents VarClass, an open-source tool for language identification available both to be downloaded as well as through a graphical user-friendly interface. The main difference of VarClass in comparison to other state-of-the-art language identification tools is its focus on language varieties. General purpose language identification tools do not take language varieties into account and our work aims to fill this gap. VarClass currently contains language models for over 27 languages in which 10 of them are language varieties. We report an average performance of over 90.5% accuracy in a challenging dataset. More language models will be included in the upcoming months. diff --git a/data/xml/L16.xml b/data/xml/L16.xml index e1d534b6c7..4ae22ab719 100644 --- a/data/xml/L16.xml +++ b/data/xml/L16.xml @@ -3,17 +3,17 @@ Proceedings of the Tenth International Conference on Language Resources and Evaluation (LREC'16) - NicolettaCalzolari - KhalidChoukri + NicolettaCalzolari + KhalidChoukri ThierryDeclerck SaraGoggi MarkoGrobelnik - BenteMaegaard - JosephMariani + BenteMaegaard + JosephMariani HeleneMazo - AsuncionMoreno - JanOdijk - SteliosPiperidis + AsuncionMoreno + JanOdijk + SteliosPiperidis European Language Resources Association (ELRA)
Portorož, Slovenia
May @@ -25,9 +25,9 @@ Evaluating Machine Translation in a Usage Scenario - RosaGaudio + RosaGaudio AljoschaBurchardt - AntónioBranco + AntónioBranco 1–8 In this document we report on a user-scenario-based evaluation aiming at assessing the performance of machine translation (MT) systems in a real context of use. We describe a sequel of experiments that has been performed to estimate the usefulness of MT and to test if improvements of MT technology lead to better performance in the usage scenario. One goal is to find the best methodology for evaluating the eventual benefit of a machine translation system in an application. The evaluation is based on the QTLeap corpus, a novel multilingual language resource that was collected through a real-life support service via chat. It is composed of naturally occurring utterances produced by users while interacting with a human technician providing answers. The corpus is available in eight different languages: Basque, Bulgarian, Czech, Dutch, English, German, Portuguese and Spanish. L16-1001 @@ -46,8 +46,8 @@ Enhancing Access to Online Education: Quality Machine Translation of <fixed-case>MOOC</fixed-case> Content ValiaKordoni - Antalvan den Bosch - Katia LidaKermanidis + Antalvan den Bosch + Katia LidaKermanidis VilelminiSosoni KostadinCholakov IrisHendrickx @@ -68,8 +68,8 @@ <fixed-case>PE</fixed-case>2rr Corpus: Manual Error Annotation of Automatically Pre-annotated <fixed-case>MT</fixed-case> Post-edits - MajaPopović - MihaelArčan + MajaPopović + MihaelArčan 27–32 We present a freely available corpus containing source language texts from different domains along with their automatically generated translations into several distinct morphologically rich languages, their post-edited versions, and error annotations of the performed post-edit operations. We believe that the corpus will be useful for many different applications. The main advantage of the approach used for creation of the corpus is the fusion of post-editing and error classification tasks, which have usually been seen as two independent tasks, although naturally they are not. We also show benefits of coupling automatic and manual error classification which facilitates the complex manual error annotation task as well as the development of automatic error classification tools. In addition, the approach facilitates annotation of language pair related issues. L16-1005 @@ -77,7 +77,7 @@ Sentiment Lexicons for <fixed-case>A</fixed-case>rabic Social Media - SaifMohammad + SaifMohammad MohammadSalameh SvetlanaKiritchenko 33–37 @@ -89,7 +89,7 @@ A Language Independent Method for Generating Large Scale Polarity Lexicons GiuseppeCastellucci DaniloCroce - RobertoBasili + RobertoBasili 38–45 Sentiment Analysis systems aims at detecting opinions and sentiments that are expressed in texts. Many approaches in literature are based on resources that model the prior polarity of words or multi-word expressions, i.e. a polarity lexicon. Such resources are defined by teams of annotators, i.e. a manual annotation is provided to associate emotional or sentiment facets to the lexicon entries. The development of such lexicons is an expensive and language dependent process, making them often not covering all the linguistic sentiment phenomena. Moreover, once a lexicon is defined it can hardly be adopted in a different language or even a different domain. In this paper, we present several Distributional Polarity Lexicons (DPLs), i.e. large-scale polarity lexicons acquired with an unsupervised methodology based on Distributional Models of Lexical Semantics. Given a set of heuristically annotated sentences from Twitter, we transfer the sentiment information from sentences to words. The approach is mostly unsupervised, and experimental evaluations on Sentiment Analysis tasks in two languages show the benefits of the generated resources. The generated DPLs are publicly available in English and Italian. L16-1007 @@ -110,7 +110,7 @@ A Comparison of Domain-based Word Polarity Estimation using different Word Embeddings AitorGarcía Pablos MontseCuadros - GermanRigau + GermanRigau 54–60 A key point in Sentiment Analysis is to determine the polarity of the sentiment implied by a certain word or expression. In basic Sentiment Analysis systems this sentiment polarity of the words is accounted and weighted in different ways to provide a degree of positivity/negativity. Currently words are also modelled as continuous dense vectors, known as word embeddings, which seem to encode interesting semantic knowledge. With regard to Sentiment Analysis, word embeddings are used as features to more complex supervised classification systems to obtain sentiment classifiers. In this paper we compare a set of existing sentiment lexicons and sentiment lexicon generation techniques. We also show a simple but effective technique to calculate a word polarity value for each word in a domain using existing continuous word embeddings generation methods. Further, we also show that word embeddings calculated on in-domain corpus capture the polarity better than the ones calculated on general-domain corpus. L16-1009 @@ -162,7 +162,7 @@ AlessiaBarbagli PietroLucisano FeliceDell’Orletta - SimonettaMontemagni + SimonettaMontemagni GiuliaVenturi 88–95 In this paper, we present the CItA corpus (Corpus Italiano di Apprendenti L1), a collection of essays written by Italian L1 learners collected during the first and second year of lower secondary school. The corpus was built in the framework of an interdisciplinary study jointly carried out by computational linguistics and experimental pedagogists and aimed at tracking the development of written language competence over the years and students’ background information. @@ -173,8 +173,8 @@ If You <fixed-case>E</fixed-case>ven Don’t Have a Bit of <fixed-case>B</fixed-case>ible: Learning Delexicalized <fixed-case>POS</fixed-case> Taggers ZhiweiYu DavidMareček - ZdeněkŽabokrtský - DanielZeman + ZdeněkŽabokrtský + DanielZeman 96–103 Part-of-speech (POS) induction is one of the most popular tasks in research on unsupervised NLP. Various unsupervised and semi-supervised methods have been proposed to tag an unseen language. However, many of them require some partial understanding of the target language because they rely on dictionaries or parallel corpora such as the Bible. In this paper, we propose a different method named delexicalized tagging, for which we only need a raw corpus of the target language. We transfer tagging models trained on annotated corpora of one or more resource-rich languages. We employ language-independent features such as word length, frequency, neighborhood entropy, character classes (alphabetic vs. numeric vs. punctuation) etc. We demonstrate that such features can, to certain extent, serve as predictors of the part of speech, represented by the universal POS tag. L16-1015 @@ -208,7 +208,7 @@ Towards a Multi-dimensional Taxonomy of Stories in Dialogue Kathryn J.Collins - DavidTraum + DavidTraum 118–124 In this paper, we present a taxonomy of stories told in dialogue. We based our scheme on prior work analyzing narrative structure and method of telling, relation to storyteller identity, as well as some categories particular to dialogue, such as how the story gets introduced. Our taxonomy currently has 5 major dimensions, with most having sub-dimensions - each dimension has an associated set of dimension-specific labels. We adapted an annotation tool for this taxonomy and have annotated portions of two different dialogue corpora, Switchboard and the Distress Analysis Interview Corpus. We present examples of some of the tags and concepts with stories from Switchboard, and some initial statistics of frequencies of the tags. L16-1018 @@ -216,12 +216,12 @@ <fixed-case>P</fixed-case>ento<fixed-case>R</fixed-case>ef: A Corpus of Spoken References in Task-oriented Dialogues - SinaZarrieß + SinaZarrieß JulianHough - CaseyKennington - RameshManuvinakurike + CaseyKennington + RameshManuvinakurike DavidDeVault - RaquelFernández + RaquelFernández DavidSchlangen 125–131 PentoRef is a corpus of task-oriented dialogues collected in systematically manipulated settings. The corpus is multilingual, with English and German sections, and overall comprises more than 20000 utterances. The dialogues are fully transcribed and annotated with referring expressions mapped to objects in corresponding visual scenes, which makes the corpus a rich resource for research on spoken referring expressions in generation and resolution. The corpus includes several sub-corpora that correspond to different dialogue situations where parameters related to interactivity, visual access, and verbal channel have been manipulated in systematic ways. The corpus thus lends itself to very targeted studies of reference in spontaneous dialogue. @@ -231,7 +231,7 @@ Transfer of Corpus-Specific Dialogue Act Annotation to <fixed-case>ISO</fixed-case> Standard: Is it worth it? Shammur AbsarChowdhury - EvgenyStepanov + EvgenyStepanov GiuseppeRiccardi 132–135 Spoken conversation corpora often adapt existing Dialogue Act (DA) annotation specifications, such as DAMSL, DIT++, etc., to task specific needs, yielding incompatible annotations; thus, limiting corpora re-usability. Recently accepted ISO standard for DA annotation – Dialogue Act Markup Language (DiAML) – is designed as domain and application independent. Moreover, the clear separation of dialogue dimensions and communicative functions, coupled with the hierarchical organization of the latter, allows for classification at different levels of granularity. However, re-annotating existing corpora with the new scheme might require significant effort. In this paper we test the utility of the ISO standard through comparative evaluation of the corpus-specific legacy and the semi-automatically transferred DiAML DA annotations on supervised dialogue act classification task. To test the domain independence of the resulting annotations, we perform cross-domain and data aggregation evaluation. Compared to the legacy annotation scheme, on the Italian LUNA Human-Human corpus, the DiAML annotation scheme exhibits better cross-domain and data aggregation classification performance, while maintaining comparable in-domain performance. @@ -241,7 +241,7 @@ <fixed-case>W</fixed-case>iki<fixed-case>C</fixed-case>oref: An <fixed-case>E</fixed-case>nglish Coreference-annotated Corpus of <fixed-case>W</fixed-case>ikipedia Articles AbbasGhaddar - PhillippeLanglais + PhillippeLanglais 136–142 This paper presents WikiCoref, an English corpus annotated for anaphoric relations, where all documents are from the English version of Wikipedia. Our annotation scheme follows the one of OntoNotes with a few disparities. We annotated each markable with coreference type, mention type and the equivalent Freebase topic. Since most similar annotation efforts concentrate on very specific types of written text, mainly newswire, there is a lack of resources for otherwise over-used Wikipedia texts. The corpus described in this paper addresses this issue. We present a freely available resource we initially devised for improving coreference resolution algorithms dedicated to Wikipedia texts. Our corpus has no restriction on the topics of the documents being annotated, and documents of various sizes have been considered for annotation. L16-1021 @@ -257,9 +257,9 @@ Adapting an Entity Centric Model for <fixed-case>P</fixed-case>ortuguese Coreference Resolution - EvandroFonseca - RenataVieira - AlineVanin + EvandroFonseca + RenataVieira + AlineVanin 150–154 This paper presents the adaptation of an Entity Centric Model for Portuguese coreference resolution, considering 10 named entity categories. The model was evaluated on named e using the HAREM Portuguese corpus and the results are 81.0% of precision and 58.3% of recall overall, the resulting system is freely available L16-1023 @@ -267,7 +267,7 @@ <fixed-case>IMS</fixed-case> <fixed-case>H</fixed-case>ot<fixed-case>C</fixed-case>oref <fixed-case>DE</fixed-case>: A Data-driven Co-reference Resolver for <fixed-case>G</fixed-case>erman - InaRoesiger + InaRoesiger JonasKuhn 155–160 This paper presents a data-driven co-reference resolution system for German that has been adapted from IMS HotCoref, a co-reference resolver for English. It describes the difficulties when resolving co-reference in German text, the adaptation process and the features designed to address linguistic challenges brought forth by German. We report performance on the reference dataset TüBa-D/Z and include a post-task SemEval 2010 evaluation, showing that the resolver achieves state-of-the-art performance. We also include ablation experiments that indicate that integrating linguistic features increases results. The paper also describes the steps and the format necessary to use the resolver on new texts. The tool is freely available for download. @@ -278,7 +278,7 @@ Coreference Annotation Scheme and Relation Types for <fixed-case>H</fixed-case>indi VandanMujadia PalashGupta - Dipti MisraSharma + Dipti MisraSharma 161–168 This paper describes a coreference annotation scheme, coreference annotation specific issues and their solutions through our proposed annotation scheme for Hindi. We introduce different co-reference relation types between continuous mentions of the same coreference chain such as “Part-of”, “Function-value pair” etc. We used Jaccard similarity based Krippendorff‘s’ alpha to demonstrate consistency in annotation scheme, annotation and corpora. To ease the coreference annotation process, we built a semi-automatic Coreference Annotation Tool (CAT). We also provide statistics of coreference annotation on Hindi Dependency Treebank (HDTB). L16-1025 @@ -299,8 +299,8 @@ Sieve-based Coreference Resolution in the Biomedical Domain DaneBell - GusHahn-Powell - Marco A.Valenzuela-Escárcega + GusHahn-Powell + Marco A.Valenzuela-Escárcega MihaiSurdeanu 177–183 We describe challenges and advantages unique to coreference resolution in the biomedical domain, and a sieve-based architecture that leverages domain knowledge for both entity and event coreference resolution. Domain-general coreference resolution algorithms perform poorly on biomedical documents, because the cues they rely on such as gender are largely absent in this domain, and because they do not encode domain-specific knowledge such as the number and type of participants required in chemical reactions. Moreover, it is difficult to directly encode this knowledge into most coreference resolution algorithms because they are not rule-based. Our rule-based architecture uses sequentially applied hand-designed “sieves”, with the output of each sieve informing and constraining subsequent sieves. This architecture provides a 3.2% increase in throughput to our Reach event extraction system with precision parallel to that of the stricter system that relies solely on syntactic patterns for extraction. @@ -322,7 +322,7 @@ Error Typology and Remediation Strategies for Requirements Written in <fixed-case>E</fixed-case>nglish by Non-Native Speakers MarieGarnier - PatrickSaint-Dizier + PatrickSaint-Dizier 190–197 In most international industries, English is the main language of communication for technical documents. These documents are designed to be as unambiguous as possible for their users. For international industries based in non-English speaking countries, the professionals in charge of writing requirements are often non-native speakers of English, who rarely receive adequate training in the use of English for this task. As a result, requirements can contain a relatively large diversity of lexical and grammatical errors, which are not eliminated by the use of guidelines from controlled languages. This article investigates the distribution of errors in a corpus of requirements written in English by native speakers of French. Errors are defined on the basis of grammaticality and acceptability principles, and classified using comparable categories. Results show a high proportion of errors in the Noun Phrase, notably through modifier stacking, and errors consistent with simplification strategies. Comparisons with similar corpora in other genres reveal the specificity of the distribution of errors in requirements. This research also introduces possible applied uses, in the form of strategies for the automatic detection of errors, and in-person training provided by certification boards in requirements authoring. L16-1029 @@ -388,7 +388,7 @@ AnaïsTack ThomasFrançois Anne-LaureLigozat - CédrickFairon + CédrickFairon 230–236 This study examines two possibilities of using the FLELex graded lexicon for the automated assessment of text complexity in French as a foreign language learning. From the lexical frequency distributions described in FLELex, we derive a single level of difficulty for each word in a parallel corpus of original and simplified texts. We then use this data to automatically address the lexical complexity of texts in two ways. On the one hand, we evaluate the degree of lexical simplification in manually simplified texts with respect to their original version. Our results show a significant simplification effect, both in the case of French narratives simplified for non-native readers and in the case of simplified Wikipedia texts. On the other hand, we define a predictive model which identifies the number of words in a text that are expected to be known at a particular learning level. We assess the accuracy with which these predictions are able to capture actual word knowledge as reported by Dutch-speaking learners of French. Our study shows that although the predictions seem relatively accurate in general (87.4% to 92.3%), they do not yet seem to cover the learners’ lack of knowledge very well. L16-1035 @@ -398,9 +398,9 @@ A Shared Task for Spoken <fixed-case>CALL</fixed-case>? ClaudiaBaur JohannaGerlach - MannyRayner - MartinRussell - HelmerStrik + MannyRayner + MartinRussell + HelmerStrik 237–244 We argue that the field of spoken CALL needs a shared task in order to facilitate comparisons between different groups and methodologies, and describe a concrete example of such a task, based on data collected from a speech-enabled online tool which has been used to help young Swiss German teens practise skills in English conversation. Items are prompt-response pairs, where the prompt is a piece of German text and the response is a recorded English audio file. The task is to label pairs as “accept” or “reject”, accepting responses which are grammatically and linguistically correct to match a set of hidden gold standard answers as closely as possible. Initial resources are provided so that a scratch system can be constructed with a minimal investment of effort, and in particular without necessarily using a speech recogniser. Training data for the task will be released in June 2016, and test data in January 2017. L16-1036 @@ -427,7 +427,7 @@ Evaluating Interactive System Adaptation - EdouardGeoffrois + EdouardGeoffrois 256–260 Enabling users of intelligent systems to enhance the system performance by providing feedback on their errors is an important need. However, the ability of systems to learn from user feedback is difficult to evaluate in an objective and comparative way. Indeed, the involvement of real users in the adaptation process is an impediment to objective evaluation. This issue can be solved by using an oracle approach, where users are simulated by oracles having access to the reference test data. Another difficulty is to find a meaningful metric despite the fact that system improvements depend on the feedback provided and on the system itself. A solution is to measure the minimal amount of information needed to correct all system errors. It can be shown that for any well defined non interactive task, the interactively supervised version of the task can be evaluated by combining such an oracle-based approach and a minimum supervision rate metric. This new evaluation protocol for adaptive systems is not only expected to drive progress for such systems, but also to pave the way for a specialisation of actors along the value chain of their technological development. L16-1039 @@ -435,7 +435,7 @@ Complementarity, <fixed-case>F</fixed-case>-score, and <fixed-case>NLP</fixed-case> Evaluation - LeonDerczynski + LeonDerczynski 261–266 This paper addresses the problem of quantifying the differences between entity extraction systems, where in general only a small proportion a document should be selected. Comparing overall accuracy is not very useful in these cases, as small differences in accuracy may correspond to huge differences in selections over the target minority class. Conventionally, one may use per-token complementarity to describe these differences, but it is not very useful when the set is heavily skewed. In such situations, which are common in information retrieval and entity recognition, metrics like precision and recall are typically used to describe performance. However, precision and recall fail to describe the differences between sets of objects selected by different decision strategies, instead just describing the proportional amount of correct and incorrect objects selected. This paper presents a method for measuring complementarity for precision, recall and F-score, quantifying the difference between entity extraction approaches. L16-1040 @@ -455,7 +455,7 @@ Evaluating a Topic Modelling Approach to Measuring Corpus Similarity RichardFothergill PaulCook - TimothyBaldwin + TimothyBaldwin 273–279 Web corpora are often constructed automatically, and their contents are therefore often not well understood. One technique for assessing the composition of such a web corpus is to empirically measure its similarity to a reference corpus whose composition is known. In this paper we evaluate a number of measures of corpus similarity, including a method based on topic modelling which has not been previously evaluated for this task. To evaluate these methods we use known-similarity corpora that have been previously used for this purpose, as well as a number of newly-constructed known-similarity corpora targeting differences in genre, topic, time, and region. Our findings indicate that, overall, the topic modelling approach did not improve on a chi-square method that had previously been found to work well for measuring corpus similarity. L16-1042 @@ -480,9 +480,9 @@ Building a Corpus of Errors and Quality in Machine Translation: Experiments on Error Impact - ÂngelaCosta + ÂngelaCosta RuiCorreia - LuísaCoheur + LuísaCoheur 288–292 In this paper we describe a corpus of automatic translations annotated with both error type and quality. The 300 sentences that we have selected were generated by Google Translate, Systran and two in-house Machine Translation systems that use Moses technology. The errors present on the translations were annotated with an error taxonomy that divides errors in five main linguistic categories (Orthography, Lexis, Grammar, Semantics and Discourse), reflecting the language level where the error is located. After the error annotation process, we accessed the translation quality of each sentence using a four point comprehension scale from 1 to 5. Both tasks of error and quality annotation were performed by two different annotators, achieving good levels of inter-annotator agreement. The creation of this corpus allowed us to use it as training data for a translation quality classifier. We concluded on error severity by observing the outputs of two machine learning classifiers: a decision tree and a regression model. L16-1044 @@ -492,7 +492,7 @@ Evaluating the Readability of Text Simplification Output for Readers with Cognitive Disabilities VictoriaYaneva IrinaTemnikova - RuslanMitkov + RuslanMitkov 293–299 This paper presents an approach for automatic evaluation of the readability of text simplification output for readers with cognitive disabilities. First, we present our work towards the development of the EasyRead corpus, which contains easy-to-read documents created especially for people with cognitive disabilities. We then compare the EasyRead corpus to the simplified output contained in the LocalNews corpus (Feng, 2009), the accessibility of which has been evaluated through reading comprehension experiments including 20 adults with mild intellectual disability. This comparison is made on the basis of 13 disability-specific linguistic features. The comparison reveals that there are no major differences between the two corpora, which shows that the EasyRead corpus is to a similar reading level as the user-evaluated texts. We also discuss the role of Simple Wikipedia (Zhu et al., 2010) as a widely-used accessibility benchmark, in light of our finding that it is significantly more complex than both the EasyRead and the LocalNews corpora. L16-1045 @@ -501,7 +501,7 @@ Word Embedding Evaluation and Combination SaharGhannay - BenoitFavre + BenoitFavre YannickEstève NathalieCamelin 300–305 @@ -513,7 +513,7 @@ Benchmarking multimedia technologies with the <fixed-case>CAMOMILE</fixed-case> platform: the case of Multimodal Person Discovery at <fixed-case>M</fixed-case>edia<fixed-case>E</fixed-case>val 2015 JohannPoignant HervéBredin - ClaudeBarras + ClaudeBarras MickaelStefas PierrickBruneau ThomasTamisier @@ -543,8 +543,8 @@ Odin’s Runes: A Rule Language for Information Extraction - Marco A.Valenzuela-Escárcega - GusHahn-Powell + Marco A.Valenzuela-Escárcega + GusHahn-Powell MihaiSurdeanu 322–329 Odin is an information extraction framework that applies cascades of finite state automata over both surface text and syntactic dependency graphs. Support for syntactic patterns allow us to concisely define relations that are otherwise difficult to express in languages such as Common Pattern Specification Language (CPSL), which are currently limited to shallow linguistic features. The interaction of lexical and syntactic automata provides robustness and flexibility when writing extraction rules. This paper describes Odin’s declarative language for writing these cascaded automata. @@ -554,7 +554,7 @@ A Classification-based Approach to Economic Event Detection in <fixed-case>D</fixed-case>utch News Text ElsLefever - VéroniqueHoste + VéroniqueHoste 330–335 Breaking news on economic events such as stock splits or mergers and acquisitions has been shown to have a substantial impact on the financial markets. As it is important to be able to automatically identify events in news items accurately and in a timely manner, we present in this paper proof-of-concept experiments for a supervised machine learning approach to economic event detection in newswire text. For this purpose, we created a corpus of Dutch financial news articles in which 10 types of company-specific economic events were annotated. We trained classifiers using various lexical, syntactic and semantic features. We obtain good results based on a basic set of shallow features, thus showing that this method is a viable approach for economic event detection in news text. L16-1051 @@ -564,7 +564,7 @@ Predictive Modeling: Guessing the <fixed-case>NLP</fixed-case> Terms of Tomorrow GilFrancopoulo JosephMariani - PatrickParoubek + PatrickParoubek 336–343 Predictive modeling, often called “predictive analytics” in a commercial context, encompasses a variety of statistical techniques that analyze historical and present facts to make predictions about unknown events. Often the unknown events are in the future, but prediction can be applied to any type of unknown whether it be in the past or future. In our case, we present some experiments applying predictive modeling to the usage of technical terms within the NLP domain. L16-1052 @@ -601,7 +601,7 @@ Won-TaeJoo Hyun-WooDo Chae-GyunLim - Key-SunChoi + Key-SunChoi Ho-JinChoi 356–359 Many emerging documents usually contain temporal information. Because the temporal information is useful for various applications, it became important to develop a system of extracting the temporal information from the documents. Before developing the system, it first necessary to define or design the structure of temporal information. In other words, it is necessary to design a language which defines how to annotate the temporal information. There have been some studies about the annotation languages, but most of them was applicable to only a specific target language (e.g., English). Thus, it is necessary to design an individual annotation language for each language. In this paper, we propose a revised version of Koreain Time Mark-up Language (K-TimeML), and also introduce a dataset, named Korean TimeBank, that is constructed basd on the K-TimeML. We believe that the new K-TimeML and Korean TimeBank will be used in many further researches about extraction of temporal information. @@ -616,7 +616,7 @@ StefanoFaralli RobertMeusel HeikoPaulheim - Simone PaoloPonzetto + Simone PaoloPonzetto 360–367 Hypernymy relations (those where an hyponym term shares a “isa” relationship with his hypernym) play a key role for many Natural Language Processing (NLP) tasks, e.g. ontology learning, automatically building or extending knowledge bases, or word sense disambiguation and induction. In fact, such relations may provide the basis for the construction of more complex structures such as taxonomies, or be used as effective background knowledge for many word understanding applications. We present a publicly available database containing more than 400 million hypernymy relations we extracted from the CommonCrawl web corpus. We describe the infrastructure we developed to iterate over the web corpus for extracting the hypernymy relations and store them effectively into a large database. This collection of relations represents a rich source of knowledge and may be useful for many researchers. We offer the tuple dataset for public download and an Application Programming Interface (API) to help other researchers programmatically query the database. L16-1056 @@ -645,7 +645,7 @@ Legal Text Interpretation: Identifying Hohfeldian Relations from Text - WimPeters + WimPeters AdamWyner 379–384 The paper investigates the extent of the support semi-automatic analysis can provide for the specific task of assigning Hohfeldian relations of Duty, using the General Architecture for Text Engineering tool for the automated extraction of Duty instances and the bearers of associated roles. The outcome of the analysis supports scholars in identifying Hohfeldian structures in legal text when performing close reading of the texts. A cyclic workflow involving automated annotation and expert feedback will incrementally increase the quality and coverage of the automatic extraction process, and increasingly reduce the amount of manual work required of the scholar. @@ -663,9 +663,9 @@ Finding Definitions in Large Corpora with <fixed-case>S</fixed-case>ketch <fixed-case>E</fixed-case>ngine - VojtěchKovář + VojtěchKovář MonikaMočiariková - PavelRychlý + PavelRychlý 391–394 The paper describes automatic definition finding implemented within the leading corpus query and management tool, Sketch Engine. The implementation exploits complex pattern-matching queries in the corpus query language (CQL) and the indexing mechanism of word sketches for finding and storing definition candidates throughout the corpus. The approach is evaluated for Czech and English corpora, showing that the results are usable in practice: precision of the tool ranges between 30 and 75 percent (depending on the major corpus text types) and we were able to extract nearly 2 million definition candidates from an English corpus with 1.4 billion words. The feature is embedded into the interface as a concordance filter, so that users can search for definitions of any query to the corpus, including very specific multi-word queries. The results also indicate that ordinary texts (unlike explanatory texts) contain rather low number of definitions, which is perhaps the most important problem with automatic definition finding in general. L16-1061 @@ -686,7 +686,7 @@ <fixed-case>NLP</fixed-case> and Public Engagement: The Case of the <fixed-case>I</fixed-case>talian School Reform TommasoCaselli GiovanniMoretti - RacheleSprugnoli + RacheleSprugnoli SaraTonelli DamienLanfrey Donatella SoldaKutzmann @@ -697,9 +697,9 @@ Evaluating Translation Quality and <fixed-case>CLIR</fixed-case> Performance of Query Sessions - XabierSaralegi - EnekoAgirre - IñakiAlegria + XabierSaralegi + EnekoAgirre + IñakiAlegria 407–411 This paper presents the evaluation of the translation quality and Cross-Lingual Information Retrieval (CLIR) performance when using session information as the context of queries. The hypothesis is that previous queries provide context that helps to solve ambiguous translations in the current query. We tested several strategies on the TREC 2010 Session track dataset, which includes query reformulations grouped by generalization, specification, and drifting types. We study the Basque to English direction, evaluating both the translation quality and CLIR performance, with positive results in both cases. The results show that the quality of translation improved, reducing error rate by 12% (HTER) when using session information, which improved CLIR results 5% (nDCG). We also provide an analysis of the improvements across the three kinds of sessions: generalization, specification, and drifting. Translation quality improved in all three types (generalization, specification, and drifting), and CLIR improved for generalization and specification sessions, preserving the performance in drifting sessions. L16-1064 @@ -752,7 +752,7 @@ “Who was Pietro Badoglio?” Towards a <fixed-case>QA</fixed-case> system for <fixed-case>I</fixed-case>talian History StefanoMenini - RacheleSprugnoli + RacheleSprugnoli AntonioUva 430–435 This paper presents QUANDHO (QUestion ANswering Data for italian HistOry), an Italian question answering dataset created to cover a specific domain, i.e. the history of Italy in the first half of the XX century. The dataset includes questions manually classified and annotated with Lexical Answer Types, and a set of question-answer pairs. This resource, freely available for research purposes, has been used to retrain a domain independent question answering system so to improve its performances in the domain of interest. Ongoing experiments on the development of a question classifier and an automatic tagger of Lexical Answer Types are also presented. @@ -762,8 +762,8 @@ A Document Repository for Social Media and Speech Conversations AdamFunk - RobertGaizauskas - BenoitFavre + RobertGaizauskas + BenoitFavre 436–440 We present a successfully implemented document repository REST service for flexible SCRUD (search, crate, read, update, delete) storage of social media conversations, using a GATE/TIPSTER-like document object model and providing a query language for document features. This software is currently being used in the SENSEI research project and will be published as open-source software before the project ends. It is, to the best of our knowledge, the first freely available, general purpose data repository to support large-scale multimodal (i.e., speech or text) conversation analytics. L16-1070 @@ -771,10 +771,10 @@ Towards a Linguistic Ontology with an Emphasis on Reasoning and Knowledge Reuse - ArtemisParvizi + ArtemisParvizi MattKohl - MeritxellGonzàlez - RoserSaurí + MeritxellGonzàlez + RoserSaurí 441–448 The Dictionaries division at Oxford University Press (OUP) is aiming to model, integrate, and publish lexical content for 100 languages focussing on digitally under-represented languages. While there are multiple ontologies designed for linguistic resources, none had adequate features for meeting our requirements, chief of which was the capability to losslessly capture diverse features of many different languages in a dictionary format, while supplying a framework for inferring relations like translation, derivation, etc., between the data. Building on valuable features of existing models, and working with OUP monolingual and bilingual dictionary datasets, we have designed and implemented a new linguistic ontology. The ontology has been reviewed by a number of computational linguists, and we are working to move more dictionary data into it. We have also developed APIs to surface the linked data to dictionary websites. L16-1071 @@ -797,11 +797,11 @@ The Language Application Grid and Galaxy - NancyIde + NancyIde KeithSuderman - JamesPustejovsky + JamesPustejovsky MarcVerhagen - ChristopherCieri + ChristopherCieri 457–462 The NSF-SI2-funded LAPPS Grid project is a collaborative effort among Brandeis University, Vassar College, Carnegie-Mellon University (CMU), and the Linguistic Data Consortium (LDC), which has developed an open, web-based infrastructure through which resources can be easily accessed and within which tailored language services can be efficiently composed, evaluated, disseminated and consumed by researchers, developers, and students across a wide variety of disciplines. The LAPPS Grid project recently adopted Galaxy (Giardine et al., 2005), a robust, well-developed, and well-supported front end for workflow configuration, management, and persistence. Galaxy allows data inputs and processing steps to be selected from graphical menus, and results are displayed in intuitive plots and summaries that encourage interactive workflows and the exploration of hypotheses. The Galaxy workflow engine provides significant advantages for deploying pipelines of LAPPS Grid web services, including not only means to create and deploy locally-run and even customized versions of the LAPPS Grid as well as running the LAPPS Grid in the cloud, but also access to a huge array of statistical and visualization tools that have been developed for use in genomics research. L16-1073 @@ -810,7 +810,7 @@ <fixed-case>ELRA</fixed-case> Activities and Services KhalidChoukri - ValérieMapelli + ValérieMapelli HélèneMazo VladimirPopescu 463–468 @@ -828,9 +828,9 @@ Humor in Collective Discourse: Unsupervised Funniness Detection in the New Yorker Cartoon Caption Contest - DragomirRadev - AmandaStent - JoelTetreault + DragomirRadev + AmandaStent + JoelTetreault AasishPappu AikateriniIliakopoulou AgustinChanfreau @@ -848,7 +848,7 @@ A Corpus of Text Data and Gaze Fixations from Autistic and Non-Autistic Adults VictoriaYaneva IrinaTemnikova - RuslanMitkov + RuslanMitkov 480–487 The paper presents a corpus of text data and its corresponding gaze fixations obtained from autistic and non-autistic readers. The data was elicited through reading comprehension testing combined with eye-tracking recording. The corpus consists of 1034 content words tagged with their POS, syntactic role and three gaze-based measures corresponding to the autistic and control participants. The reading skills of the participants were measured through multiple-choice questions and, based on the answers given, they were divided into groups of skillful and less-skillful readers. This division of the groups informs researchers on whether particular fixations were elicited from skillful or less-skillful readers and allows a fair between-group comparison for two levels of reading ability. In addition to describing the process of data collection and corpus development, we present a study on the effect that word length has on reading in autism. The corpus is intended as a resource for investigating the particular linguistic constructions which pose reading difficulties for people with autism and hopefully, as a way to inform future text simplification research intended for this population. L16-1077 @@ -877,7 +877,7 @@ An Empirical Study of <fixed-case>A</fixed-case>rabic Formulaic Sequence Extraction Methods AymanAlghamdi - EricAtwell + EricAtwell ClaireBrierley 502–506 This paper aims to implement what is referred to as the collocation of the Arabic keywords approach for extracting formulaic sequences (FSs) in the form of high frequency but semantically regular formulas that are not restricted to any syntactic construction or semantic domain. The study applies several distributional semantic models in order to automatically extract relevant FSs related to Arabic keywords. The data sets used in this experiment are rendered from a new developed corpus-based Arabic wordlist consisting of 5,189 lexical items which represent a variety of modern standard Arabic (MSA) genres and regions, the new wordlist being based on an overlapping frequency based on a comprehensive comparison of four large Arabic corpora with a total size of over 8 billion running words. Empirical n-best precision evaluation methods are used to determine the best association measures (AMs) for extracting high frequency and meaningful FSs. The gold standard reference FSs list was developed in previous studies and manually evaluated against well-established quantitative and qualitative criteria. The results demonstrate that the MI.log_f AM achieved the highest results in extracting significant FSs from the large MSA corpus, while the T-score association measure achieved the worst results. @@ -886,9 +886,9 @@ Rule-based Automatic Multi-word Term Extraction and Lemmatization - RankaStanković + RankaStanković CvetanaKrstev - IvanObradović + IvanObradović BiljanaLazić AleksandraTrtovac 507–514 @@ -908,7 +908,7 @@ A Lexical Resource of <fixed-case>H</fixed-case>ebrew Verb-Noun Multi-Word Expressions ChayaLiebeskind - YaakovHaCohen-Kerner + YaakovHaCohen-Kerner 522–527 A verb-noun Multi-Word Expression (MWE) is a combination of a verb and a noun with or without other words, in which the combination has a meaning different from the meaning of the words considered separately. In this paper, we present a new lexical resource of Hebrew Verb-Noun MWEs (VN-MWEs). The VN-MWEs of this resource were manually collected and annotated from five different web resources. In addition, we analyze the lexical properties of Hebrew VN-MWEs by classifying them to three types: morphological, syntactic, and semantic. These two contributions are essential for designing algorithms for automatic VN-MWEs extraction. The analysis suggests some interesting features of VN-MWEs for exploration. The lexical resource enables to sample a set of positive examples for Hebrew VN-MWEs. This set of examples can either be used for training supervised algorithms or as seeds in unsupervised bootstrapping algorithms. Thus, this resource is a first step towards automatic identification of Hebrew VN-MWEs, which is important for natural language understanding, generation and translation systems. L16-1083 @@ -919,7 +919,7 @@ GuillaumeJacquet MaudEhrmann RalfSteinberger - JaakkoVäyrynen + JaakkoVäyrynen 528–535 This paper reports on an approach and experiments to automatically build a cross-lingual multi-word entity resource. Starting from a collection of millions of acronym/expansion pairs for 22 languages where expansion variants were grouped into monolingual clusters, we experiment with several aggregation strategies to link these clusters across languages. Aggregation strategies make use of string similarity distances and translation probabilities and they are based on vector space and graph representations. The accuracy of the approach is evaluated against Wikipedia’s redirection and cross-lingual linking tables. The resulting multi-word entity resource contains 64,000 multi-word entities with unique identifiers and their 600,000 multilingual lexical variants. We intend to make this new resource publicly available. L16-1084 @@ -930,7 +930,7 @@ Marie-JeanMeurs HaydaAlmeida LudovicJean-Louis - EricCharton + EricCharton 536–540 This paper presents SemLinker, an open source system that discovers named entities, connects them to a reference knowledge base, and clusters them semantically. SemLinker relies on several modules that perform surface form generation, mutual disambiguation, entity clustering, and make use of two annotation engines. SemLinker was evaluated in the English Entity Discovery and Linking track of the Text Analysis Conference on Knowledge Base Population, organized by the US National Institute of Standards and Technology. Along with the SemLinker source code, we release our annotation files containing the discovered named entities, their types, and position across processed documents. L16-1085 @@ -942,7 +942,7 @@ GiuseppeRizzo Mariekevan Erp JulienPlu - RaphaëlTroncy + RaphaëlTroncy 541–548 More and more knowledge bases are publicly available as linked data. Since these knowledge bases contain structured descriptions of real-world entities, they can be exploited by entity linking systems that anchor entity mentions from text to the most relevant resources describing those entities. In this paper, we investigate adaptation of the entity linking task using contextual knowledge. The key intuition is that entity linking can be customized depending on the textual content, as well as on the application that would make use of the extracted information. We present an adaptive approach that relies on contextual knowledge from text to enhance the performance of ADEL, a hybrid linguistic and graph-based entity linking system. We evaluate our approach on a domain-specific corpus consisting of annotated WikiNews articles. L16-1086 @@ -952,7 +952,7 @@ Named Entity Recognition on <fixed-case>T</fixed-case>witter for <fixed-case>T</fixed-case>urkish using Semi-supervised Learning with Word Embeddings EdaOkur HakanDemir - ArzucanÖzgür + ArzucanÖzgür 549–555 Recently, due to the increasing popularity of social media, the necessity for extracting information from informal text types, such as microblog texts, has gained significant attention. In this study, we focused on the Named Entity Recognition (NER) problem on informal text types for Turkish. We utilized a semi-supervised learning approach based on neural networks. We applied a fast unsupervised method for learning continuous representations of words in vector space. We made use of these obtained word embeddings, together with language independent features that are engineered to work better on informal text types, for generating a Turkish NER system on microblog texts. We evaluated our Turkish NER system on Twitter messages and achieved better F-score performances than the published results of previously proposed NER systems on Turkish tweets. Since we did not employ any language dependent features, we believe that our method can be easily adapted to microblog texts in other morphologically rich languages. L16-1087 @@ -962,7 +962,7 @@ Entity Linking with a Paraphrase Flavor MariaPershina YifanHe - RalphGrishman + RalphGrishman 556–560 The task of Named Entity Linking is to link entity mentions in the document to their correct entries in a knowledge base and to cluster NIL mentions. Ambiguous, misspelled, and incomplete entity mention names are the main challenges in the linking process. We propose a novel approach that combines two state-of-the-art models ― for entity disambiguation and for paraphrase detection ― to overcome these challenges. We consider name variations as paraphrases of the same entity mention and adopt a paraphrase model for this task. Our approach utilizes a graph-based disambiguation model based on Personalized Page Rank, and then refines and clusters its output using the paraphrase similarity between entity mention strings. It achieves a competitive performance of 80.5% in B3+F clustering score on diagnostic TAC EDL 2014 data. L16-1088 @@ -981,7 +981,7 @@ <fixed-case>IRIS</fixed-case>: <fixed-case>E</fixed-case>nglish-<fixed-case>I</fixed-case>rish Machine Translation System - MihaelArcan + MihaelArcan CaoilfhionnLane Eoin ÓDroighneáin PaulBuitelaar @@ -1010,7 +1010,7 @@ Syntax-based Multi-system Machine Translation MatīssRikters - IngunaSkadiņa + IngunaSkadiņa 585–591 This paper describes a hybrid machine translation system that explores a parser to acquire syntactic chunks of a source sentence, translates the chunks with multiple online machine translation (MT) system application program interfaces (APIs) and creates output by combining translated chunks to obtain the best possible translation. The selection of the best translation hypothesis is performed by calculating the perplexity for each translated chunk. The goal of this approach is to enhance the baseline multi-system hybrid translation (MHyT) system that uses only a language model to select best translation from translations obtained with different APIs and to improve overall English ― Latvian machine translation quality over each of the individual MT APIs. The presented syntax-based multi-system translation (SyMHyT) system demonstrates an improvement in terms of BLEU and NIST scores compared to the baseline system. Improvements reach from 1.74 up to 2.54 BLEU points. L16-1093 @@ -1018,11 +1018,11 @@ Use of Domain-Specific Language Resources in Machine Translation - SanjaŠtajner + SanjaŠtajner AndreiaQuerido NunoRendeiro João AntónioRodrigues - AntónioBranco + AntónioBranco 592–598 In this paper, we address the problem of Machine Translation (MT) for a specialised domain in a language pair for which only a very small domain-specific parallel corpus is available. We conduct a series of experiments using a purely phrase-based SMT (PBSMT) system and a hybrid MT system (TectoMT), testing three different strategies to overcome the problem of the small amount of in-domain training data. Our results show that adding a small size in-domain bilingual terminology to the small in-domain training corpus leads to the best improvements of a hybrid MT system, while the PBSMT system achieves the best results by adding a combination of in-domain bilingual terminology and a larger out-of-domain corpus. We focus on qualitative human evaluation of the output of two best systems (one for each approach) and perform a systematic in-depth error analysis which revealed advantages of the hybrid MT system over the pure PBSMT system for this specific task. L16-1094 @@ -1032,10 +1032,10 @@ <fixed-case>CAT</fixed-case>a<fixed-case>L</fixed-case>og Online: Porting a Post-editing Tool to the Web SantanuPal MarcosZampieri - Sudip KumarNaskar - TapasNayak + Sudip KumarNaskar + TapasNayak MihaelaVela - Josefvan Genabith + Josefvan Genabith 599–604 This paper presents CATaLog online, a new web-based MT and TM post-editing tool. CATaLog online is a freeware software that can be used through a web browser and it requires only a simple registration. The tool features a number of editing and log functions similar to the desktop version of CATaLog enhanced with several new features that we describe in detail in this paper. CATaLog online is designed to allow users to post-edit both translation memory segments as well as machine translation output. The tool provides a complete set of log information currently not available in most commercial CAT tools. Log information can be used both for project management purposes as well as for the study of the translation process and translator’s productivity. L16-1095 @@ -1067,7 +1067,7 @@ Lexical Resources to Enrich <fixed-case>E</fixed-case>nglish <fixed-case>M</fixed-case>alayalam Machine Translation SreelekhaS - PushpakBhattacharyya + PushpakBhattacharyya 620–627 In this paper we present our work on the usage of lexical resources for the Machine Translation English and Malayalam. We describe a comparative performance between different Statistical Machine Translation (SMT) systems on top of phrase based SMT system as baseline. We explore different ways of utilizing lexical resources to improve the quality of English Malayalam statistical machine translation. In order to enrich the training corpus we have augmented the lexical resources in two ways (a) additional vocabulary and (b) inflected verbal forms. Lexical resources include IndoWordnet semantic relation set, lexical words and verb phrases etc. We have described case studies, evaluations and have given detailed error analysis for both Malayalam to English and English to Malayalam machine translation systems. We observed significant improvement in evaluations of translation quality. Lexical resources do help uplift performance when parallel corpora are scanty. L16-1098 @@ -1125,8 +1125,8 @@ HaoZhou YueZhang ShujianHuang - Xin-YuDai - JiajunChen + Xin-YuDai + JiajunChen 659–663 Greedy transition-based parsers are appealing for their very fast speed, with reasonably high accuracies. In this paper, we build a fast shift-reduce neural constituent parser by using a neural network to make local decisions. One challenge to the parsing speed is the large hidden and output layer sizes caused by the number of constituent labels and branching options. We speed up the parser by using a hierarchical output layer, inspired by the hierarchical log-bilinear neural language model. In standard WSJ experiments, the neural parser achieves an almost 2.4 time speed up (320 sen/sec) compared to a non-hierarchical baseline without significant accuracy loss (89.06 vs 89.13 F-score). L16-1104 @@ -1158,8 +1158,8 @@ Danielvan Niekerk InekeSchuurman VincentVandeghinste - FrankVan Eynde - Gerhardvan Huyssteen + FrankVan Eynde + Gerhardvan Huyssteen 677–682 Compared to well-resourced languages such as English and Dutch, natural language processing (NLP) tools for Afrikaans are still not abundant. In the context of the AfriBooms project, KU Leuven and the North-West University collaborated to develop a first, small treebank, a dependency parser, and an easy to use online linguistic search engine for Afrikaans for use by researchers and students in the humanities and social sciences. The search tool is based on a similar development for Dutch, i.e. GrETEL, a user-friendly search engine which allows users to query a treebank by means of a natural language example instead of a formal search instruction. L16-1107 @@ -1231,7 +1231,7 @@ <fixed-case>FABIOLE</fixed-case>, a Speech Database for Forensic Speaker Comparison MoezAjili - Jean-FrançoisBonastre + Jean-FrançoisBonastre JulietteKahn SolangeRossato GuillaumeBernard @@ -1252,7 +1252,7 @@ <fixed-case>AIMU</fixed-case>: Actionable Items for Meeting Understanding Yun-NungChen - DilekHakkani-Tür + DilekHakkani-Tür 739–743 With emerging conversational data, automated content analysis is needed for better data interpretation, so that it is accurately understood and can be effectively integrated and utilized in various applications. ICSI meeting corpus is a publicly released data set of multi-party meetings in an organization that has been released over a decade ago, and has been fostering meeting understanding research since then. The original data collection includes transcription of participant turns as well as meta-data annotations, such as disfluencies and dialog act tags. This paper presents an extended set of annotations for the ICSI meeting corpus with a goal of deeply understanding meeting conversations, where participant turns are annotated by actionable items that could be performed by an automated meeting assistant. In addition to the user utterances that contain an actionable item, annotations also include the arguments associated with the actionable item. The set of actionable items are determined by aligning human-human interactions to human-machine interactions, where a data annotation schema designed for a virtual personal assistant (human-machine genre) is adapted to the meetings domain (human-human genre). The data set is formed by annotating participants’ utterances in meetings with potential intents/actions considering their contexts. The set of actions target what could be accomplished by an automated meeting assistant, such as taking a note of action items that a participant commits to, or finding emails or topic related documents that were mentioned during the meeting. A total of 10 defined intents/actions are considered as actionable items in meetings. Turns that include actionable intents were annotated for 22 public ICSI meetings, that include a total of 21K utterances, segmented by speaker turns. Participants’ spoken turns, possible actions along with associated arguments and their vector representations as computed by convolutional deep structured semantic models are included in the data set for future research. We present a detailed statistical analysis of the data set and analyze the performance of applying convolutional deep structured semantic models for an actionable item detection task. The data is available at http://research.microsoft.com/projects/meetingunderstanding/. L16-1117 @@ -1261,7 +1261,7 @@ A Taxonomy of Specific Problem Classes in Text-to-Speech Synthesis: Comparing Commercial and Open Source Performance FelixBurkhardt - Uwe D.Reichel + Uwe D.Reichel 744–749 Current state-of-the-art speech synthesizers for domain-independent systems still struggle with the challenge of generating understandable and natural-sounding speech. This is mainly because the pronunciation of words of foreign origin, inflections and compound words often cannot be handled by rules. Furthermore there are too many of these for inclusion in exception dictionaries. We describe an approach to evaluating text-to-speech synthesizers with a subjective listening experiment. The focus is to differentiate between known problem classes for speech synthesizers. The target language is German but we believe that many of the described phenomena are not language specific. We distinguish the following problem categories: Normalization, Foreign linguistics, Natural writing, Language specific and General. Each of them is divided into five to three problem classes. Word lists for each of the above mentioned categories were compiled and synthesized by both a commercial and an open source synthesizer, both being based on the non-uniform unit-selection approach. The synthesized speech was evaluated by human judges using the Speechalyzer toolkit and the results are discussed. It shows that, as expected, the commercial synthesizer performs much better than the open-source one, and especially words of foreign origin were pronounced badly by both systems. L16-1118 @@ -1281,11 +1281,11 @@ A Singing Voice Database in <fixed-case>B</fixed-case>asque for Statistical Singing Synthesis of Bertsolaritza XabierSarasola - EvaNavas + EvaNavas DavidTavarez DanielErro IbonSaratxaga - InmaHernaez + InmaHernaez 756–759 This paper describes the characteristics and structure of a Basque singing voice database of bertsolaritza. Bertsolaritza is a popular singing style from Basque Country sung exclusively in Basque that is improvised and a capella. The database is designed to be used in statistical singing voice synthesis for bertsolaritza style. Starting from the recordings and transcriptions of numerous singers, diarization and phoneme alignment experiments have been made to extract the singing voice from the recordings and create phoneme alignments. This labelling processes have been performed applying standard speech processing techniques and the results prove that these techniques can be used in this specific singing style. L16-1120 @@ -1326,7 +1326,7 @@ Designing a Speech Corpus for the Development and Evaluation of Dictation Systems in <fixed-case>L</fixed-case>atvian - MārcisPinnis + MārcisPinnis AskarsSalimbajevs IlzeAuziņa 775–780 @@ -1340,7 +1340,7 @@ DirceCelorico SaraCandeias CarlaLopes - FernandoPerdigão + FernandoPerdigão 781–785 This paper introduces the LetsRead Corpus of European Portuguese read speech from 6 to 10 years old children. The motivation for the creation of this corpus stems from the inexistence of databases with recordings of reading tasks of Portuguese children with different performance levels and including all the common reading aloud disfluencies. It is also essential to develop techniques to fulfill the main objective of the LetsRead project: to automatically evaluate the reading performance of children through the analysis of reading tasks. The collected data amounts to 20 hours of speech from 284 children from private and public Portuguese schools, with each child carrying out two tasks: reading sentences and reading a list of pseudowords, both with varying levels of difficulty throughout the school grades. In this paper, the design of the reading tasks presented to children is described, as well as the collection procedure. Manually annotated data is analyzed according to disfluencies and reading performance. The considered word difficulty parameter is also confirmed to be suitable for the pseudoword reading tasks. L16-1125 @@ -1348,7 +1348,7 @@ The <fixed-case>BAS</fixed-case> Speech Data Repository - UweReichel + UweReichel FlorianSchiel ThomasKisler ChristophDraxler @@ -1363,8 +1363,8 @@ EmreYilmaz MarioGanzeboom LilianBeijer - CatiaCucchiarini - HelmerStrik + CatiaCucchiarini + HelmerStrik 792–795 We present a new Dutch dysarthric speech database containing utterances of neurological patients with Parkinson’s disease, traumatic brain injury and cerebrovascular accident. The speech content is phonetically and linguistically diversified by using numerous structured sentence and word lists. Containing more than 6 hours of mildly to moderately dysarthric speech, this database can be used for research on dysarthria and for developing and testing speech-to-text systems designed for medical applications. Current activities aimed at extending this database are also discussed. L16-1127 @@ -1401,9 +1401,9 @@ The <fixed-case>O</fixed-case>n<fixed-case>F</fixed-case>orum<fixed-case>S</fixed-case> corpus from the Shared Task on Online Forum Summarisation at <fixed-case>M</fixed-case>ulti<fixed-case>L</fixed-case>ing 2015 - MijailKabadjov - UdoKruschwitz - MassimoPoesio + MijailKabadjov + UdoKruschwitz + MassimoPoesio JosefSteinberger JorgeValderrama HugoZaragoza @@ -1414,8 +1414,8 @@ Automatic Enrichment of <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et with Common-Sense Knowledge - LuigiDi Caro - GuidoBoella + LuigiDi Caro + GuidoBoella 819–822 WordNet represents a cornerstone in the Computational Linguistics field, linking words to meanings (or senses) through a taxonomical representation of synsets, i.e., clusters of words with an equivalent meaning in a specific context often described by few definitions (or glosses) and examples. Most of the approaches to the Word Sense Disambiguation task fully rely on these short texts as a source of contextual information to match with the input text to disambiguate. This paper presents the first attempt to enrich synsets data with common-sense definitions, automatically retrieved from ConceptNet 5, and disambiguated accordingly to WordNet. The aim was to exploit the shared- and immediate-thinking nature of common-sense knowledge to extend the short but incredibly useful contextual information of the synsets. A manual evaluation on a subset of the entire result (which counts a total of almost 600K synset enrichments) shows a very high precision with an estimated good recall. L16-1132 @@ -1461,13 +1461,13 @@ The <fixed-case>S</fixed-case>em<fixed-case>D</fixed-case>a<fixed-case>X</fixed-case> Corpus ― Sense Annotations with Scalable Sense Inventories - BolettePedersen + BolettePedersen AnnaBraasch - AndersJohannsen - Héctor MartínezAlonso + AndersJohannsen + Héctor MartínezAlonso SanniNimb SussiOlsen - AndersSøgaard + AndersSøgaard Nicolai HartvigSørensen 842–847 We launch the SemDaX corpus which is a recently completed Danish human-annotated corpus available through a CLARIN academic license. The corpus includes approx. 90,000 words, comprises six textual domains, and is annotated with sense inventories of different granularity. The aim of the developed corpus is twofold: i) to assess the reliability of the different sense annotation schemes for Danish measured by qualitative analyses and annotation agreement scores, and ii) to serve as training and test data for machine learning algorithms with the practical purpose of developing sense taggers for Danish. To these aims, we take a new approach to human-annotated corpus resources by double annotating a much larger part of the corpus than what is normally seen: for the all-words task we double annotated 60% of the material and for the lexical sample task 100%. We include in the corpus not only the adjucated files, but also the diverging annotations. In other words, we consider not all disagreement to be noise, but rather to contain valuable linguistic information that can help us improve our annotation schemes and our learning algorithms. @@ -1489,7 +1489,7 @@ Multi-prototype <fixed-case>C</fixed-case>hinese Character Embedding YananLu YueZhang - DonghongJi + DonghongJi 855–859 Chinese sentences are written as sequences of characters, which are elementary units of syntax and semantics. Characters are highly polysemous in forming words. We present a position-sensitive skip-gram model to learn multi-prototype Chinese character embeddings, and explore the usefulness of such character embeddings to Chinese NLP tasks. Evaluation on character similarity shows that multi-prototype embeddings are significantly better than a single-prototype baseline. In addition, used as features in the Chinese NER task, the embeddings result in a 1.74% F-score improvement over a state-of-the-art baseline. L16-1138 @@ -1497,10 +1497,10 @@ A comparison of Named-Entity Disambiguation and Word Sense Disambiguation - AngelChang - Valentin I.Spitkovsky - Christopher D.Manning - EnekoAgirre + AngelChang + Valentin I.Spitkovsky + Christopher D.Manning + EnekoAgirre 860–867 Named Entity Disambiguation (NED) is the task of linking a named-entity mention to an instance in a knowledge-base, typically Wikipedia-derived resources like DBpedia. This task is closely related to word-sense disambiguation (WSD), where the mention of an open-class word is linked to a concept in a knowledge-base, typically WordNet. This paper analyzes the relation between two annotated datasets on NED and WSD, highlighting the commonalities and differences. We detail the methods to construct a NED system following the WSD word-expert approach, where we need a dictionary and one classifier is built for each target entity mention string. Constructing a dictionary for NED proved challenging, and although similarity and ambiguity are higher for NED, the results are also higher due to the larger number of training data, and the more crisp and skewed meaning differences. L16-1139 @@ -1510,7 +1510,7 @@ Leveraging <fixed-case>RDF</fixed-case> Graphs for Crossing Multiple Bilingual Dictionaries MartaVillegas MaiteMelero - NúriaBel + NúriaBel JorgeGracia 868–876 The experiments presented here exploit the properties of the Apertium RDF Graph, principally cycle density and nodes’ degree, to automatically generate new translation relations between words, and therefore to enrich existing bilingual dictionaries with new entries. Currently, the Apertium RDF Graph includes data from 22 Apertium bilingual dictionaries and constitutes a large unified array of linked lexical entries and translations that are available and accessible on the Web (http://linguistic.linkeddata.es/apertium/). In particular, its graph structure allows for interesting exploitation opportunities, some of which are addressed in this paper. Two ‘massive’ experiments are reported: in the first one, the original EN-ES translation set was removed from the Apertium RDF Graph and a new EN-ES version was generated. The results were compared against the previously removed EN-ES data and against the Concise Oxford Spanish Dictionary. In the second experiment, a new non-existent EN-FR translation set was generated. In this case the results were compared against a converted wiktionary English-French file. The results we got are really good and perform well for the extreme case of correlated polysemy. This lead us to address the possibility to use cycles and nodes degree to identify potential oddities in the source data. If cycle density proves efficient when considering potential targets, we can assume that in dense graphs nodes with low degree may indicate potential errors. @@ -1533,7 +1533,7 @@ FabricioChalub LivyReal AlexandreRademaker - Valeriade Paiva + Valeriade Paiva 885–891 This paper describes work on incorporating Princenton’s WordNet morphosemantics links to the fabric of the Portuguese OpenWordNet-PT. Morphosemantic links are relations between verbs and derivationally related nouns that are semantically typed (such as for tune-tuner ― in Portuguese “afinar-afinador” – linked through an “agent” link). Morphosemantic links have been discussed for Princeton’s WordNet for a while, but have not been added to the official database. These links are very useful, they help us to improve our Portuguese WordNet. Thus we discuss the integration of these links in our base and the issues we encountered with the integration. L16-1142 @@ -1563,14 +1563,14 @@ Large Multi-lingual, Multi-level and Multi-genre Annotation Corpus XuansongLi - MarthaPalmer + MarthaPalmer NianwenXue - LanceRamshaw - MohamedMaamouri + LanceRamshaw + MohamedMaamouri AnnBies KathrynConger StephenGrimes - StephanieStrassel + StephanieStrassel 906–913 High accuracy for automated translation and information retrieval calls for linguistic annotations at various language levels. The plethora of informal internet content sparked the demand for porting state-of-art natural language processing (NLP) applications to new social media as well as diverse language adaptation. Effort launched by the BOLT (Broad Operational Language Translation) program at DARPA (Defense Advanced Research Projects Agency) successfully addressed the internet information with enhanced NLP systems. BOLT aims for automated translation and linguistic analysis for informal genres of text and speech in online and in-person communication. As a part of this program, the Linguistic Data Consortium (LDC) developed valuable linguistic resources in support of the training and evaluation of such new technologies. This paper focuses on methodologies, infrastructure, and procedure for developing linguistic annotation at various language levels, including Treebank (TB), word alignment (WA), PropBank (PB), and co-reference (CoRef). Inspired by the OntoNotes approach with adaptations to the tasks to reflect the goals and scope of the BOLT project, this effort has introduced more annotation types of informal and free-style genres in English, Chinese and Egyptian Arabic. The corpus produced is by far the largest multi-lingual, multi-level and multi-genre annotation corpus of informal text and speech. L16-1145 @@ -1589,7 +1589,7 @@ <fixed-case>O</fixed-case>pen<fixed-case>S</fixed-case>ubtitles2016: Extracting Large Parallel Corpora from Movie and <fixed-case>TV</fixed-case> Subtitles PierreLison - JörgTiedemann + JörgTiedemann 923–929 We present a new major release of the OpenSubtitles collection of parallel corpora. The release is compiled from a large database of movie and TV subtitles and includes a total of 1689 bitexts spanning 2.6 billion sentences across 60 languages. The release also incorporates a number of enhancements in the preprocessing and alignment of the subtitles, such as the automatic correction of OCR errors and the use of meta-data to estimate the quality of each subtitle and score subtitle pairs. L16-1147 @@ -1598,8 +1598,8 @@ <fixed-case>L</fixed-case>ex<fixed-case>F</fixed-case>r: Adapting the <fixed-case>L</fixed-case>ex<fixed-case>I</fixed-case>t Framework to Build a Corpus-based <fixed-case>F</fixed-case>rench Subcategorization Lexicon GiuliaRambelli - GianlucaLebani - LaurentPrévot + GianlucaLebani + LaurentPrévot AlessandroLenci 930–937 This paper introduces LexFr, a corpus-based French lexical resource built by adapting the framework LexIt, originally developed to describe the combinatorial potential of Italian predicates. As in the original framework, the behavior of a group of target predicates is characterized by a series of syntactic (i.e., subcategorization frames) and semantic (i.e., selectional preferences) statistical information (a.k.a. distributional profiles) whose extraction process is mostly unsupervised. The first release of LexFr includes information for 2,493 verbs, 7,939 nouns and 2,628 adjectives. In these pages we describe the adaptation process and evaluated the final resource by comparing the information collected for 20 test verbs against the information available in a gold standard dictionary. In the best performing setting, we obtained 0.74 precision, 0.66 recall and 0.70 F-measure. @@ -1609,7 +1609,7 @@ Polarity Lexicon Building: to what Extent Is the Manual Effort Worth? Iñaki SanVicente - XabierSaralegi + XabierSaralegi 938–942 Polarity lexicons are a basic resource for analyzing the sentiments and opinions expressed in texts in an automated way. This paper explores three methods to construct polarity lexicons: translating existing lexicons from other languages, extracting polarity lexicons from corpora, and annotating sentiments Lexical Knowledge Bases. Each of these methods require a different degree of human effort. We evaluate how much manual effort is needed and to what extent that effort pays in terms of performance improvement. Experiment setup includes generating lexicons for Basque, and evaluating them against gold standard datasets in different domains. Results show that extracting polarity lexicons from corpora is the best solution for achieving a good performance with reasonable human effort. L16-1149 @@ -1620,7 +1620,7 @@ OuafaeNahli FrancescaFrontini MonicaMonachini - FahadKhan + FahadKhan ArsalanZarghili MustaphaKhalfi 943–950 @@ -1678,7 +1678,7 @@ Argument Mining: the Bottleneck of Knowledge and Language Resources - PatrickSaint-Dizier + PatrickSaint-Dizier 983–990 Given a controversial issue, argument mining from natural language texts (news papers, and any form of text on the Internet) is extremely challenging: domain knowledge is often required together with appropriate forms of inferences to identify arguments. This contribution explores the types of knowledge that are required and how they can be paired with reasoning schemes, language processing and language resources to accurately mine arguments. We show via corpus analysis that the Generative Lexicon, enhanced in different manners and viewed as both a lexicon and a domain knowledge representation, is a relevant approach. In this paper, corpus annotation for argument mining is first developed, then we show how the generative lexicon approach must be adapted and how it can be paired with language processing patterns to extract and specify the nature of arguments. Our approach to argument mining is thus knowledge driven. L16-1156 @@ -1687,7 +1687,7 @@ From Interoperable Annotations towards Interoperable Resources: A Multilingual Approach to the Analysis of Discourse EkaterinaLapshinova-Koltunski - Kerstin AnnaKunz + Kerstin AnnaKunz AnnaNedoluzhko 991–997 In the present paper, we analyse variation of discourse phenomena in two typologically different languages, i.e. in German and Czech. The novelty of our approach lies in the nature of the resources we are using. Advantage is taken of existing resources, which are, however, annotated on the basis of two different frameworks. We use an interoperable scheme unifying discourse phenomena in both frameworks into more abstract categories and considering only those phenomena that have a direct match in German and Czech. The discourse properties we focus on are relations of identity, semantic similarity, ellipsis and discourse relations. Our study shows that the application of interoperable schemes allows an exploitation of discourse-related phenomena analysed in different projects and on the basis of different frameworks. As corpus compilation and annotation is a time-consuming task, positive results of this experiment open up new paths for contrastive linguistics, translation studies and NLP, including machine translation. @@ -1696,7 +1696,7 @@ Falling silent, lost for words ... Tracing personal involvement in interviews with <fixed-case>D</fixed-case>utch war veterans - Henkvan den Heuvel + Henkvan den Heuvel NellekeOostdijk 998–1001 In sources used in oral history research (such as interviews with eye witnesses), passages where the degree of personal emotional involvement is found to be high can be of particular interest, as these may give insight into how historical events were experienced, and what moral dilemmas and psychological or religious struggles were encountered. In a pilot study involving a large corpus of interview recordings with Dutch war veterans, we have investigated if it is possible to develop a method for automatically identifying those passages where the degree of personal emotional involvement is high. The method is based on the automatic detection of exceptionally large silences and filled pause segments (using Automatic Speech Recognition), and cues taken from specific n-grams. The first results appear to be encouraging enough for further elaboration of the method. @@ -1705,9 +1705,9 @@ A Bilingual Discourse Corpus and Its Applications - YangLiu + YangLiu JiajunZhang - ChengqingZong + ChengqingZong YatingYang XiZhou 1002–1007 @@ -1727,7 +1727,7 @@ Corpus Resources for Dispute Mediation Discourse MathildeJanier - ChrisReed + ChrisReed 1014–1021 Dispute mediation is a growing activity in the resolution of conflicts, and more and more research emerge to enhance and better understand this (until recently) understudied practice. Corpus analyses are necessary to study discourse in this context; yet, little data is available, mainly because of its confidentiality principle. After proposing hints and avenues to acquire transcripts of mediation sessions, this paper presents the Dispute Mediation Corpus, which gathers annotated excerpts of mediation dialogues. Although developed as part of a project on argumentation, it is freely available and the text data can be used by anyone. This first-ever open corpus of mediation interactions can be of interest to scholars studying discourse, but also conflict resolution, argumentation, linguistics, communication, etc. We advocate for using and extending this resource that may be valuable to a large variety of domains of research, particularly those striving to enhance the study of the rapidly growing activity of dispute mediation. L16-1161 @@ -1745,10 +1745,10 @@ <fixed-case>P</fixed-case>ersona<fixed-case>B</fixed-case>ank: A Corpus of Personal Narratives and Their Story Intention Graphs - StephanieLukin + StephanieLukin KevinBowden CaseyBarackman - MarilynWalker + MarilynWalker 1026–1033 We present a new corpus, PersonaBank, consisting of 108 personal stories from weblogs that have been annotated with their Story Intention Graphs, a deep representation of the content of a story. We describe the topics of the stories and the basis of the Story Intention Graph representation, as well as the process of annotating the stories to produce the Story Intention Graphs and the challenges of adapting the tool to this new personal narrative domain. We also discuss how the corpus can be used in applications that retell the story using different styles of tellings, co-tellings, or as a content planner. L16-1163 @@ -1779,9 +1779,9 @@ Enhancing The <fixed-case>RATP</fixed-case>-<fixed-case>DECODA</fixed-case> Corpus With Linguistic Annotations For Performing A Large Range Of <fixed-case>NLP</fixed-case> Tasks CaroleLailler AnaïsLandeau - FrédéricBéchet + FrédéricBéchet YannickEstève - PaulDeléglise + PaulDeléglise 1047–1050 In this article, we present the RATP-DECODA Corpus which is composed by a set of 67 hours of speech from telephone conversations of a Customer Care Service (CCS). This corpus is already available on line at http://sldr.org/sldr000847/fr in its first version. However, many enhancements have been made in order to allow the development of automatic techniques to transcript conversations and to capture their meaning. These enhancements fall into two categories: firstly, we have increased the size of the corpus with manual transcriptions from a new operational day; secondly we have added new linguistic annotations to the whole corpus (either manually or through an automatic processing) in order to perform various linguistic tasks from syntactic and semantic parsing to dialog act tagging and dialog summarization. L16-1166 @@ -1792,7 +1792,7 @@ ManfredStede StergosAfantenos AndreasPeldszus - NicholasAsher + NicholasAsher JérémyPerret 1051–1058 We present the first corpus of texts annotated with two alternative approaches to discourse structure, Rhetorical Structure Theory (Mann and Thompson, 1988) and Segmented Discourse Representation Theory (Asher and Lascarides, 2003). 112 short argumentative texts have been analyzed according to these two theories. Furthermore, in previous work, the same texts have already been annotated for their argumentation structure, according to the scheme of Peldszus and Stede (2013). This corpus therefore enables studies of correlations between the two accounts of discourse structure, and between discourse and argumentation. We converted the three annotation formats to a common dependency tree format that enables to compare the structures, and we describe some initial findings. @@ -1801,7 +1801,7 @@ An Annotated Corpus of Direct Speech - JohnLee + JohnLee Chak YanYeung 1059–1063 We propose a scheme for annotating direct speech in literary texts, based on the Text Encoding Initiative (TEI) and the coreference annotation guidelines from the Message Understanding Conference (MUC). The scheme encodes the speakers and listeners of utterances in a text, as well as the quotative verbs that reports the utterances. We measure inter-annotator agreement on this annotation task. We then present statistics on a manually annotated corpus that consists of books from the New Testament. Finally, we visualize the corpus as a conversational network. @@ -1811,7 +1811,7 @@ Evaluating the Noisy Channel Model for the Normalization of Historical Texts: <fixed-case>B</fixed-case>asque, <fixed-case>S</fixed-case>panish and <fixed-case>S</fixed-case>lovene IzaskunEtxeberria - IñakiAlegria + IñakiAlegria LarraitzUria MansHulden 1064–1069 @@ -1830,7 +1830,7 @@ A Morphological Lexicon of <fixed-case>E</fixed-case>speranto with Morpheme Frequencies - EckhardBick + EckhardBick 1075–1078 This paper discusses the internal structure of complex Esperanto words (CWs). Using a morphological analyzer, possible affixation and compounding is checked for over 50,000 Esperanto lexemes against a list of 17,000 root words. Morpheme boundaries in the resulting analyses were then checked manually, creating a CW dictionary of 28,000 words, representing 56.4% of the lexicon, or 19.4% of corpus tokens. The error percentage of the EspGram morphological analyzer for new corpus CWs was 4.3% for types and 6.4% for tokens, with a recall of almost 100%, and wrong/spurious boundaries being more common than missing ones. For pedagogical purposes a morpheme frequency dictionary was constructed for a 16 million word corpus, confirming the importance of agglutinative derivational morphemes in the Esperanto lexicon. Finally, as a means to reduce the morphological ambiguity of CWs, we provide POS likelihoods for Esperanto suffixes. L16-1171 @@ -1848,7 +1848,7 @@ Giving Lexical Resources a Second Life: Démonette, a Multi-sourced Morpho-semantic Network for <fixed-case>F</fixed-case>rench NabilHathout - FiammettaNamer + FiammettaNamer 1084–1091 Démonette is a derivational morphological network designed for the description of French. Its original architecture enables its use as a formal framework for the description of morphological analyses and as a repository for existing lexicons. It is fed with a variety of resources, which all are already validated. The harmonization of their content into a unified format provides them a second life, in which they are enriched with new properties, provided these are deductible from their contents. Démonette is released under a Creative Commons license. It is usable for theoretical and descriptive research in morphology, as a source of experimental material for psycholinguistics, natural language processing (NLP) and information retrieval (IR), where it fills a gap, since French lacks a large-coverage derivational resources database. The article presents the integration of two existing lexicons into Démonette. The first is Verbaction, a lexicon of deverbal action nouns. The second is Lexeur, a database of agent nouns in -eur derived from verbs or from nouns. L16-1173 @@ -1883,7 +1883,7 @@ Encoding Adjective Scales for Fine-grained Resources CédricLopez - FrédériqueSegond + FrédériqueSegond ChristianeFellbaum 1109–1113 We propose an automatic approach towards determining the relative location of adjectives on a common scale based on their strength. We focus on adjectives expressing different degrees of goodness occurring in French product (perfumes) reviews. Using morphosyntactic patterns, we extract from the reviews short phrases consisting of a noun that encodes a particular aspect of the perfume and an adjective modifying that noun. We then associate each such n-gram with the corresponding product aspect and its related star rating. Next, based on the star scores, we generate adjective scales reflecting the relative strength of specific adjectives associated with a shared attribute of the product. An automatic ordering of the adjectives “correct” (correct), “sympa” (nice), “bon” (good) and “excellent” (excellent) according to their score in our resource is consistent with an intuitive scale based on human judgments. Our long-term objective is to generate different adjective scales in an empirical manner, which could allow the enrichment of lexical resources. @@ -1916,11 +1916,11 @@ <fixed-case>ANEW</fixed-case>+: Automatic Expansion and Validation of Affective Norms of Words Lexicons in Multiple Languages SamiraShaikh KitCho - TomekStrzalkowski - LaurieFeldman + TomekStrzalkowski + LaurieFeldman JohnLien TingLiu - George AaronBroadwell + George AaronBroadwell 1127–1132 In this article we describe our method of automatically expanding an existing lexicon of words with affective valence scores. The automatic expansion process was done in English. In addition, we describe our procedure for automatically creating lexicons in languages where such resources may not previously exist. The foreign languages we discuss in this paper are Spanish, Russian and Farsi. We also describe the procedures to systematically validate our newly created resources. The main contributions of this work are: 1) A general method for expansion and creation of lexicons with scores of words on psychological constructs such as valence, arousal or dominance; and 2) a procedure for ensuring validity of the newly constructed resources. L16-1180 @@ -1937,7 +1937,7 @@ Challenges of Evaluating Sentiment Analysis Tools on Social Media DianaMaynard - KalinaBontcheva + KalinaBontcheva 1142–1148 This paper discusses the challenges in carrying out fair comparative evaluations of sentiment analysis systems. Firstly, these are due to differences in corpus annotation guidelines and sentiment class distribution. Secondly, different systems often make different assumptions about how to interpret certain statements, e.g. tweets with URLs. In order to study the impact of these on evaluation results, this paper focuses on tweet sentiment analysis in particular. One existing and two newly created corpora are used, and the performance of four different sentiment analysis systems is reported; we make our annotated datasets and sentiment analysis applications publicly available. We see considerable variations in results across the different corpora, which calls into question the validity of many existing annotated datasets and evaluations, and we make some observations about both the systems and the datasets as a result. L16-1182 @@ -1946,8 +1946,8 @@ <fixed-case>E</fixed-case>mo<fixed-case>T</fixed-case>weet-28: A Fine-Grained Emotion Corpus for Sentiment Analysis Jasy Suet YanLiew - Howard R.Turtle - Elizabeth D.Liddy + Howard R.Turtle + Elizabeth D.Liddy 1149–1156 This paper describes EmoTweet-28, a carefully curated corpus of 15,553 tweets annotated with 28 emotion categories for the purpose of training and evaluating machine learning models for emotion classification. EmoTweet-28 is, to date, the largest tweet corpus annotated with fine-grained emotion categories. The corpus contains annotations for four facets of emotion: valence, arousal, emotion category and emotion cues. We first used small-scale content analysis to inductively identify a set of emotion categories that characterize the emotions expressed in microblog text. We then expanded the size of the corpus using crowdsourcing. The corpus encompasses a variety of examples including explicit and implicit expressions of emotions as well as tweets containing multiple emotions. EmoTweet-28 represents an important resource to advance the development and evaluation of more emotion-sensitive systems. L16-1183 @@ -1956,7 +1956,7 @@ Happy Accident: A Sentiment Composition Lexicon for Opposing Polarity Phrases SvetlanaKiritchenko - SaifMohammad + SaifMohammad 1157–1164 Sentiment composition is the determining of sentiment of a multi-word linguistic unit, such as a phrase or a sentence, based on its constituents. We focus on sentiment composition in phrases formed by at least one positive and at least one negative word ― phrases like ‘happy accident’ and ‘best winter break’. We refer to such phrases as opposing polarity phrases. We manually annotate a collection of opposing polarity phrases and their constituent single words with real-valued sentiment intensity scores using a method known as Best―Worst Scaling. We show that the obtained annotations are consistent. We explore the entries in the lexicon for linguistic regularities that govern sentiment composition in opposing polarity phrases. Finally, we list the current and possible future applications of the lexicon. L16-1184 @@ -1964,8 +1964,8 @@ Detecting Implicit Expressions of Affect from Text using Semantic Knowledge on Common Concept Properties - AlexandraBalahur - HristoTanev + AlexandraBalahur + HristoTanev 1165–1170 Emotions are an important part of the human experience. They are responsible for the adaptation and integration in the environment, offering, most of the time together with the cognitive system, the appropriate responses to stimuli in the environment. As such, they are an important component in decision-making processes. In today’s society, the avalanche of stimuli present in the environment (physical or virtual) makes people more prone to respond to stronger affective stimuli (i.e., those that are related to their basic needs and motivations ― survival, food, shelter, etc.). In media reporting, this is translated in the use of arguments (factual data) that are known to trigger specific (strong, affective) behavioural reactions from the readers. This paper describes initial efforts to detect such arguments from text, based on the properties of concepts. The final system able to retrieve and label this type of data from the news in traditional and social platforms is intended to be integrated Europe Media Monitor family of applications to detect texts that trigger certain (especially negative) reactions from the public, with consequences on citizen safety and security. L16-1185 @@ -1973,7 +1973,7 @@ Creating a General <fixed-case>R</fixed-case>ussian Sentiment Lexicon - NataliaLoukachevitch + NataliaLoukachevitch AnatoliiLevchik 1171–1176 The paper describes the new Russian sentiment lexicon - RuSentiLex. The lexicon was gathered from several sources: opinionated words from domain-oriented Russian sentiment vocabularies, slang and curse words extracted from Twitter, objective words with positive or negative connotations from a news collection. The words in the lexicon having different sentiment orientations in specific senses are linked to appropriate concepts of the thesaurus of Russian language RuThes. All lexicon entries are classified according to four sentiment categories and three sources of sentiment (opinion, emotion, or fact). The lexicon can serve as the first version for the construction of domain-specific sentiment lexicons or can be used for feature generation in machine-learning approaches. In this role, the RuSentiLex lexicon was utilized by the participants of the SentiRuEval-2016 Twitter reputation monitoring shared task and allowed them to achieve high results. @@ -2006,7 +2006,7 @@ Specialising Paragraph Vectors for Text Polarity Detection - FabioTamburini + FabioTamburini 1190–1195 This paper presents some experiments for specialising Paragraph Vectors, a new technique for creating text fragment (phrase, sentence, paragraph, text, ...) embedding vectors, for text polarity detection. The first extension regards the injection of polarity information extracted from a polarity lexicon into embeddings and the second extension aimed at inserting word order information into Paragraph Vectors. These two extensions, when training a logistic-regression classifier on the combined embeddings, were able to produce a relevant gain in performance when compared to the standard Paragraph Vector methods proposed by Le and Mikolov (2014). L16-1189 @@ -2016,7 +2016,7 @@ Evaluating Lexical Similarity to build Sentiment Similarity GrégoireJadi VincentClaveau - BéatriceDaille + BéatriceDaille LauraMonceaux 1196–1201 In this article, we propose to evaluate the lexical similarity information provided by word representations against several opinion resources using traditional Information Retrieval tools. Word representation have been used to build and to extend opinion resources such as lexicon, and ontology and their performance have been evaluated on sentiment analysis tasks. We question this method by measuring the correlation between the sentiment proximity provided by opinion resources and the semantic similarity provided by word representations using different correlation coefficients. We also compare the neighbors found in word representations and list of similar opinion words. Our results show that the proximity of words in state-of-the-art word representations is not very effective to build sentiment similarity. @@ -2029,7 +2029,7 @@ MelanieZaiß QiHan SteffenKoch - SabineSchulte im Walde + SabineSchulte im Walde 1202–1206 Vector space models and distributional information are widely used in NLP. The models typically rely on complex, high-dimensional objects. We present an interactive visualisation tool to explore salient lexical-semantic features of high-dimensional word objects and word similarities. Most visualisation tools provide only one low-dimensional map of the underlying data, so they are not capable of retaining the local and the global structure. We overcome this limitation by providing an additional trust-view to obtain a more realistic picture of the actual object distances. Additional tool options include the reference to a gold standard classification, the reference to a cluster analysis as well as listing the most salient (common) features for a selected subset of the words. L16-1191 @@ -2039,7 +2039,7 @@ <fixed-case>S</fixed-case>em<fixed-case>A</fixed-case>ligner: A Method and Tool for Aligning Chunks with Semantic Relation Types and Semantic Similarity Scores NabinMaharjan RajendraBanjade - Nobal BikramNiraula + Nobal BikramNiraula VasileRus 1207–1211 This paper introduces a ruled-based method and software tool, called SemAligner, for aligning chunks across texts in a given pair of short English texts. The tool, based on the top performing method at the Interpretable Short Text Similarity shared task at SemEval 2015, where it was used with human annotated (gold) chunks, can now additionally process plain text-pairs using two powerful chunkers we developed, e.g. using Conditional Random Fields. Besides aligning chunks, the tool automatically assigns semantic relations to the aligned chunks (such as EQUI for equivalent and OPPO for opposite) and semantic similarity scores that measure the strength of the semantic relation between the aligned chunks. Experiments show that SemAligner performs competitively for system generated chunks and that these results are also comparable to results obtained on gold chunks. SemAligner has other capabilities such as handling various input formats and chunkers as well as extending lookup resources. @@ -2057,7 +2057,7 @@ mwetoolkit+sem: Integrating Word Embeddings in the mwetoolkit for Semantic <fixed-case>MWE</fixed-case> Processing - SilvioCordeiro + SilvioCordeiro CarlosRamisch AlineVillavicencio 1221–1225 @@ -2077,7 +2077,7 @@ Extending Monolingual Semantic Textual Similarity Task to Multiple Cross-lingual Settings - YoshihikoHayashi + YoshihikoHayashi WentaoLuo 1233–1239 This paper describes our independent effort for extending the monolingual semantic textual similarity (STS) task setting to multiple cross-lingual settings involving English, Japanese, and Chinese. So far, we have adopted a “monolingual similarity after translation” strategy to predict the semantic similarity between a pair of sentences in different languages. With this strategy, a monolingual similarity method is applied after having (one of) the target sentences translated into a pivot language. Therefore, this paper specifically details the required and developed resources to implement this framework, while presenting our current results for English-Japanese-Chinese cross-lingual STS tasks that may exemplify the validity of the framework. @@ -2088,7 +2088,7 @@ Resources for building applications with Dependency <fixed-case>M</fixed-case>inimal <fixed-case>R</fixed-case>ecursion <fixed-case>S</fixed-case>emantics AnnCopestake GuyEmerson - Michael WayneGoodman + Michael WayneGoodman MaticHorvat AlexanderKuhnle EwaMuszyńska @@ -2139,7 +2139,7 @@ Collecting Language Resources for the <fixed-case>L</fixed-case>atvian e-Government Machine Translation Platform RobertsRozis - AndrejsVasiļjevs + AndrejsVasiļjevs RaivisSkadiņš 1270–1276 This paper describes corpora collection activity for building large machine translation systems for Latvian e-Government platform. We describe requirements for corpora, selection and assessment of data sources, collection of the public corpora and creation of new corpora from miscellaneous sources. Methodology, tools and assessment methods are also presented along with the results achieved, challenges faced and conclusions made. Several approaches to address the data scarceness are discussed. We summarize the volume of obtained corpora and provide quality metrics of MT systems trained on this data. Resulting MT systems for English-Latvian, Latvian English and Latvian Russian are integrated in the Latvian e-service portal and are freely available on website HUGO.LV. This paper can serve as a guidance for similar activities initiated in other countries, particularly in the context of European Language Resource Coordination action. @@ -2148,12 +2148,12 @@ <fixed-case>N</fixed-case>ederlab: Towards a Single Portal and Research Environment for Diachronic <fixed-case>D</fixed-case>utch Text Corpora - HennieBrugman + HennieBrugman MartinReynaert Nicolinevan der Sijs Renévan Stipriaan - ErikTjong Kim Sang - Antalvan den Bosch + ErikTjong Kim Sang + Antalvan den Bosch 1277–1281 The Nederlab project aims to bring together all digitized texts relevant to the Dutch national heritage, the history of the Dutch language and culture (circa 800 – present) in one user friendly and tool enriched open access web interface. This paper describes Nederlab halfway through the project period and discusses the collections incorporated, back-office processes, system back-end as well as the Nederlab Research Portal end-user web application. L16-1203 @@ -2184,7 +2184,7 @@ DiyiYang AaronHalfaker RobertKraut - EduardHovy + EduardHovy 1295–1299 In this work, we introduced a corpus for categorizing edit types in Wikipedia. This fine-grained taxonomy of edit types enables us to differentiate editing actions and find editor roles in Wikipedia based on their low-level edit types. To do this, we first created an annotated corpus based on 1,996 edits obtained from 953 article revisions and built machine-learning models to automatically identify the edit categories associated with edits. Building on this automated measurement of edit types, we then applied a graphical model analogous to Latent Dirichlet Allocation to uncover the latent roles in editors’ edit histories. Applying this technique revealed eight different roles editors play, such as Social Networker, Substantive Expert, etc. L16-1206 @@ -2196,7 +2196,7 @@ AidanKaplan RamyEskander NizarHabash - OwenRambow + OwenRambow 1300–1306 We present new language resources for Moroccan and Sanaani Yemeni Arabic. The resources include corpora for each dialect which have been morphologically annotated, and morphological analyzers for each dialect which are derived from these corpora. These are the first sets of resources for Moroccan and Yemeni Arabic. The resources will be made available to the public. L16-1207 @@ -2204,7 +2204,7 @@ Merging Data Resources for Inflectional and Derivational Morphology in <fixed-case>C</fixed-case>zech - ZdeněkŽabokrtský + ZdeněkŽabokrtský MagdaŠevčíková MilanStraka JonášVidra @@ -2229,7 +2229,7 @@ EleanorChodroff MatthewMaciejewski JanTrmal - SanjeevKhudanpur + SanjeevKhudanpur JohnGodfrey 1323–1327 The Mixer series of speech corpora were collected over several years, principally to support annual NIST evaluations of speaker recognition (SR) technologies. These evaluations focused on conversational speech over a variety of channels and recording conditions. One of the series, Mixer-6, added a new condition, read speech, to support basic scientific research on speaker characteristics, as well as technology evaluation. With read speech it is possible to make relatively precise measurements of phonetic events and features, which can be correlated with the performance of speaker recognition algorithms, or directly used in phonetic analysis of speaker variability. The read speech, as originally recorded, was adequate for large-scale evaluations (e.g., fixed-text speaker ID algorithms) but only marginally suitable for acoustic-phonetic studies. Numerous errors due largely to speaker behavior remained in the corpus, with no record of their locations or rate of occurrence. We undertook the effort to correct this situation with automatic methods supplemented by human listening and annotation. The present paper describes the tools and methods, resulting corrections, and some examples of the kinds of research studies enabled by these enhancements. @@ -2242,9 +2242,9 @@ FlorianHönig YueZhang SimoneHantke - AntonBatliner - ElmarNöth - BjörnSchuller + AntonBatliner + ElmarNöth + BjörnSchuller 1328–1332 In this paper, we describe a new database with audio recordings of non-native (L2) speakers of English, and the perceptual evaluation experiment conducted with native English speakers for assessing the prosody of each recording. These annotations are then used to compute the gold standard using different methods, and a series of regression experiments is conducted to evaluate their impact on the performance of a regression model predicting the degree of naturalness of L2 speech. Further, we compare the relevance of different feature groups modelling prosody in general (without speech tempo), speech rate and pauses modelling speech tempo (fluency), voice quality, and a variety of spectral features. We also discuss the impact of various fusion strategies on performance. Overall, our results demonstrate that the prosody of non-native speakers of English as L2 can be reliably assessed using supra-segmental audio features; prosodic features seem to be the most important ones. L16-1211 @@ -2261,7 +2261,7 @@ JeaninJügler YvesLaprie OdileMella - BerndMöbius + BerndMöbius FrankZimmerer 1333–1338 The IFCASL corpus is a French-German bilingual phonetic learner corpus designed, recorded and annotated in a project on individualized feedback in computer-assisted spoken language learning. The motivation for setting up this corpus was that there is no phonetically annotated and segmented corpus for this language pair of comparable of size and coverage. In contrast to most learner corpora, the IFCASL corpus incorporate data for a language pair in both directions, i.e. in our case French learners of German, and German learners of French. In addition, the corpus is complemented by two sub-corpora of native speech by the same speakers. The corpus provides spoken data by about 100 speakers with comparable productions, annotated and segmented on the word and the phone level, with more than 50% manually corrected data. The paper reports on inter-annotator agreement and the optimization of the acoustic models for forced speech-text alignment in exercises for computer-assisted pronunciation training. Example studies based on the corpus data with a phonetic focus include topics such as the realization of /h/ and glottal stop, final devoicing of obstruents, vowel quantity and quality, pitch range, and tempo. @@ -2270,7 +2270,7 @@ <fixed-case>LELIO</fixed-case>: An Auto-Adaptative System to Acquire Domain Lexical Knowledge in Technical Texts - PatrickSaint-Dizier + PatrickSaint-Dizier 1339–1345 In this paper, we investigate some language acquisition facets of an auto-adaptative system that can automatically acquire most of the relevant lexical knowledge and authoring practices for an application in a given domain. This is the LELIO project: producing customized LELIE solutions. Our goal, within the framework of LELIE (a system that tags language uses that do not follow the Constrained Natural Language principles), is to automate the long, costly and error prone lexical customization of LELIE to a given application domain. Technical texts being relatively restricted in terms of syntax and lexicon, results obtained show that this approach is feasible and relatively reliable. By auto-adaptative, we mean that the system learns from a sample of the application corpus the various lexical terms and uses crucial for LELIE to work properly (e.g. verb uses, fuzzy terms, business terms, stylistic patterns). A technical writer validation method is developed at each step of the acquisition. L16-1213 @@ -2366,7 +2366,7 @@ Semi-automatically Alignment of Predicates between Speech and <fixed-case>O</fixed-case>nto<fixed-case>N</fixed-case>otes data NirajShrestha - Marie-FrancineMoens + Marie-FrancineMoens 1397–1401 Speech data currently receives a growing attention and is an important source of information. We still lack suitable corpora of transcribed speech annotated with semantic roles that can be used for semantic role labeling (SRL), which is not the case for written data. Semantic role labeling in speech data is a challenging and complex task due to the lack of sentence boundaries and the many transcription errors such as insertion, deletion and misspellings of words. In written data, SRL evaluation is performed at the sentence level, but in speech data sentence boundaries identification is still a bottleneck which makes evaluation more complex. In this work, we semi-automatically align the predicates found in transcribed speech obtained with an automatic speech recognizer (ASR) with the predicates found in the corresponding written documents of the OntoNotes corpus and manually align the semantic roles of these predicates thus obtaining annotated semantic frames in the speech data. This data can serve as gold standard alignments for future research in semantic role labeling of speech data. L16-1222 @@ -2410,18 +2410,18 @@ JohannPoignant MateuszBudnik HervéBredin - ClaudeBarras + ClaudeBarras MickaelStefas PierrickBruneau - GillesAdda - LaurentBesacier + GillesAdda + LaurentBesacier HazimEkenel GilFrancopoulo JavierHernando JosephMariani RamonMorros GeorgesQuénot - SophieRosset + SophieRosset ThomasTamisier 1421–1425 In this paper, we describe the organization and the implementation of the CAMOMILE collaborative annotation framework for multimodal, multimedia, multilingual (3M) data. Given the versatile nature of the analysis which can be performed on 3M data, the structure of the server was kept intentionally simple in order to preserve its genericity, relying on standard Web technologies. Layers of annotations, defined as data associated to a media fragment from the corpus, are stored in a database and can be managed through standard interfaces with authentication. Interfaces tailored specifically to the needed task can then be developed in an agile way, relying on simple but reliable services for the management of the centralized annotations. We then present our implementation of an active learning scenario for person annotation in video, relying on the CAMOMILE server; during a dry run experiment, the manual annotation of 716 speech segments was thus propagated to 3504 labeled tracks. The code of the CAMOMILE framework is distributed in open source. @@ -2454,7 +2454,7 @@ A Web Tool for Building Parallel Corpora of Spoken and Sign Languages AlexBecker - FabioKepler + FabioKepler SaraCandeias 1438–1445 In this paper we describe our work in building an online tool for manually annotating texts in any spoken language with SignWriting in any sign language. The existence of such tool will allow the creation of parallel corpora between spoken and sign languages that can be used to bootstrap the creation of efficient tools for the Deaf community. As an example, a parallel corpus between English and American Sign Language could be used for training Machine Learning models for automatic translation between the two languages. Clearly, this kind of tool must be designed in a way that it eases the task of human annotators, not only by being easy to use, but also by giving smart suggestions as the annotation progresses, in order to save time and effort. By building a collaborative, online, easy to use annotation tool for building parallel corpora between spoken and sign languages we aim at helping the development of proper resources for sign languages that can then be used in state-of-the-art models currently used in tools for spoken languages. There are several issues and difficulties in creating this kind of resource, and our presented tool already deals with some of them, like adequate text representation of a sign and many to many alignments between words and signs. @@ -2465,7 +2465,7 @@ Issues and Challenges in Annotating <fixed-case>U</fixed-case>rdu Action Verbs on the <fixed-case>IMAGACT</fixed-case>4<fixed-case>ALL</fixed-case> Platform SharminMuzaffar PitambarBehera - GirishJha + GirishJha 1446–1451 In South-Asian languages such as Hindi and Urdu, action verbs having compound constructions and serial verbs constructions pose serious problems for natural language processing and other linguistic tasks. Urdu is an Indo-Aryan language spoken by 51, 500, 0001 speakers in India. Action verbs that occur spontaneously in day-to-day communication are highly ambiguous in nature semantically and as a consequence cause disambiguation issues that are relevant and applicable to Language Technologies (LT) like Machine Translation (MT) and Natural Language Processing (NLP). IMAGACT4ALL is an ontology-driven web-based platform developed by the University of Florence for storing action verbs and their inter-relations. This group is currently collaborating with Jawaharlal Nehru University (JNU) in India to connect Indian languages on this platform. Action verbs are frequently used in both written and spoken discourses and refer to various meanings because of their polysemic nature. The IMAGACT4ALL platform stores each 3d animation image, each one of them referring to a variety of possible ontological types, which in turn makes the annotation task for the annotator quite challenging with regard to selecting verb argument structure having a range of probability distribution. The authors, in this paper, discuss the issues and challenges such as complex predicates (compound and conjunct verbs), ambiguously animated video illustrations, semantic discrepancies, and the factors of verb-selection preferences that have produced significant problems in annotating Urdu verbs on the IMAGACT ontology. L16-1230 @@ -2495,11 +2495,11 @@ The Event and Implied Situation Ontology (<fixed-case>ESO</fixed-case>): Application and Evaluation - RoxaneSegers + RoxaneSegers MarcoRospocher PiekVossen EgoitzLaparra - GermanRigau + GermanRigau Anne-LyseMinard 1463–1470 This paper presents the Event and Implied Situation Ontology (ESO), a manually constructed resource which formalizes the pre and post situations of events and the roles of the entities affected by an event. The ontology is built on top of existing resources such as WordNet, SUMO and FrameNet. The ontology is injected to the Predicate Matrix, a resource that integrates predicate and role information from amongst others FrameNet, VerbNet, PropBank, NomBank and WordNet. We illustrate how these resources are used on large document collections to detect information that otherwise would have remained implicit. The ontology is evaluated on two aspects: recall and precision based on a manually annotated corpus and secondly, on the quality of the knowledge inferred by the situation assertions in the ontology. Evaluation results on the quality of the system show that 50% of the events typed and enriched with ESO assertions are correct. @@ -2588,7 +2588,7 @@ Corpus vs. Lexicon Supervision in Morphosyntactic Tagging: the Case of <fixed-case>S</fixed-case>lovene NikolaLjubešić - TomažErjavec + TomažErjavec 1527–1531 In this paper we present a tagger developed for inflectionally rich languages for which both a training corpus and a lexicon are available. We do not constrain the tagger by the lexicon entries, allowing both for lexicon incompleteness and noisiness. By using the lexicon indirectly through features we allow for known and unknown words to be tagged in the same manner. We test our tagger on Slovene data, obtaining a 25% error reduction of the best previous results both on known and unknown words. Given that Slovene is, in comparison to some other Slavic languages, a well-resourced language, we perform experiments on the impact of token (corpus) vs. type (lexicon) supervision, obtaining useful insights in how to balance the effort of extending resources to yield better tagging results. L16-1242 @@ -2596,7 +2596,7 @@ Challenges and Solutions for Consistent Annotation of <fixed-case>V</fixed-case>ietnamese Treebank - QuyNguyen + QuyNguyen YusukeMiyao HaLe NganNguyen @@ -2608,7 +2608,7 @@ Correcting Errors in a Treebank Based on Tree Mining KantaSuzuki - YoshihideKato + YoshihideKato ShigekiMatsubara 1540–1545 This paper provides a new method to correct annotation errors in a treebank. The previous error correction method constructs a pseudo parallel corpus where incorrect partial parse trees are paired with correct ones, and extracts error correction rules from the parallel corpus. By applying these rules to a treebank, the method corrects errors. However, this method does not achieve wide coverage of error correction. To achieve wide coverage, our method adopts a different approach. In our method, we consider that an infrequent pattern which can be transformed to a frequent one is an annotation error pattern. Based on a tree mining technique, our method seeks such infrequent tree patterns, and constructs error correction rules each of which consists of an infrequent pattern and a corresponding frequent pattern. We conducted an experiment using the Penn Treebank. We obtained 1,987 rules which are not constructed by the previous method, and the rules achieved good precision. @@ -2618,8 +2618,8 @@ 4<fixed-case>C</fixed-case>ouv: A New Treebank for <fixed-case>F</fixed-case>rench PhilippeBlache - Grégoirede Montcheuil - LaurentPrévot + Grégoirede Montcheuil + LaurentPrévot StéphaneRauzy 1546–1551 The question of the type of text used as primary data in treebanks is of certain importance. First, it has an influence at the discourse level: an article is not organized in the same way as a novel or a technical document. Moreover, it also has consequences in terms of semantic interpretation: some types of texts can be easier to interpret than others. We present in this paper a new type of treebank which presents the particularity to answer to specific needs of experimental linguistic. It is made of short texts (book backcovers) that presents a strong coherence in their organization and can be rapidly interpreted. This type of text is adapted to short reading sessions, making it easy to acquire physiological data (e.g. eye movement, electroencepholagraphy). Such a resource offers reliable data when looking for correlations between computational models and human language processing. @@ -2632,8 +2632,8 @@ AndreiaQuerido MarisaCampos Rita ValadasPereira - JoãoSilva - AntónioBranco + JoãoSilva + AntónioBranco 1552–1557 This paper presents a new linguistic resource for the study and computational processing of Portuguese. CINTIL DependencyBank PREMIUM is a corpus of Portuguese news text, accurately manually annotated with a wide range of linguistic information (morpho-syntax, named-entities, syntactic function and semantic roles), making it an invaluable resource specially for the development and evaluation of data-driven natural language processing tools. The corpus is under active development, reaching 4,000 sentences in its current version. The paper also reports on the training and evaluation of a dependency parser over this corpus. CINTIL DependencyBank PREMIUM is freely-available for research purposes through META-SHARE. L16-1246 @@ -2664,7 +2664,7 @@ Win PaPa MasaoUtiyama AndrewFinch - EiichiroSumita + EiichiroSumita 1574–1578 This paper introduces the ALT project initiated by the Advanced Speech Translation Research and Development Promotion Center (ASTREC), NICT, Kyoto, Japan. The aim of this project is to accelerate NLP research for Asian languages such as Indonesian, Japanese, Khmer, Laos, Malay, Myanmar, Philippine, Thai and Vietnamese. The original resource for this project was English articles that were randomly selected from Wikinews. The project has so far created a corpus for Myanmar and will extend in scope to include other languages in the near future. A 20000-sentence corpus of Myanmar that has been manually translated from an English corpus has been word segmented, word aligned, part-of-speech tagged and constituency parsed by human annotators. In this paper, we present the implementation steps for creating the treebank in detail, including a description of the ALT web-based treebanking tool. Moreover, we report statistics on the annotation quality of the Myanmar treebank created so far. L16-1249 @@ -2672,7 +2672,7 @@ <fixed-case>U</fixed-case>niversal <fixed-case>D</fixed-case>ependencies for <fixed-case>N</fixed-case>orwegian - LiljaØvrelid + LiljaØvrelid PetterHohle 1579–1585 This article describes the conversion of the Norwegian Dependency Treebank to the Universal Dependencies scheme. This paper details the mapping of PoS tags, morphological features and dependency relations and provides a description of the structural changes made to NDT analyses in order to make it compliant with the UD guidelines. We further present PoS tagging and dependency parsing experiments which report first results for the processing of the converted treebank. The full converted treebank was made available with the 1.2 release of the UD treebanks. @@ -2682,9 +2682,9 @@ Fostering the Next Generation of <fixed-case>E</fixed-case>uropean Language Technology: Recent Developments ― Emerging Initiatives ― Challenges and Opportunities GeorgRehm - JanHajič - Josefvan Genabith - AndrejsVasiljevs + JanHajič + Josefvan Genabith + AndrejsVasiljevs 1586–1592 META-NET is a European network of excellence, founded in 2010, that consists of 60 research centres in 34 European countries. One of the key visions and goals of META-NET is a truly multilingual Europe, which is substantially supported and realised through language technologies. In this article we provide an overview of recent developments around the multilingual Europe topic, we also describe recent and upcoming events as well as recent and upcoming strategy papers. Furthermore, we provide overviews of two new emerging initiatives, the CEF.AT and ELRC activity on the one hand and the Cracking the Language Barrier federation on the other. The paper closes with several suggested next steps in order to address the current challenges and to open up new opportunities. L16-1251 @@ -2701,11 +2701,11 @@ Open Data Vocabularies for Assigning Usage Rights to Data Resources from Translation Projects - DavidLewis + DavidLewis KanizFatema - AlfredoMaldonado + AlfredoMaldonado BrianWalshe - ArturoCalvo + ArturoCalvo 1601–1609 An assessment of the intellectual property requirements for data used in machine-aided translation is provided based on a recent EC-funded legal review. This is compared against the capabilities offered by current linked open data standards from the W3C for publishing and sharing translation memories from translation projects, and proposals for adequately addressing the intellectual property needs of stakeholders in translation projects using open data vocabularies are suggested. L16-1253 @@ -2713,7 +2713,7 @@ Language Resource Citation: the <fixed-case>ISLRN</fixed-case> Dissemination and Further Developments - ValérieMapelli + ValérieMapelli VladimirPopescu LinLiu KhalidChoukri @@ -2724,8 +2724,8 @@ Trends in <fixed-case>HLT</fixed-case> Research: A Survey of <fixed-case>LDC</fixed-case>’s Data Scholarship Program - DeniseDiPersio - ChristopherCieri + DeniseDiPersio + ChristopherCieri 1614–1618 Since its inception in 2010, the Linguistic Data Consortium’s data scholarship program has awarded no cost grants in data to 64 recipients from 26 countries. A survey of the twelve cycles to date ― two awards each in the Fall and Spring semesters from Fall 2010 through Spring 2016 ― yields an interesting view into graduate program research trends in human language technology and related fields and the particular data sets deemed important to support that research. The survey also reveals regions in which such activity appears to be on a rise, including in Arabic-speaking regions and portions of the Americas and Asia. L16-1255 @@ -2746,7 +2746,7 @@ Towards a Corpus of Violence Acts in <fixed-case>A</fixed-case>rabic Social Media AymanAlhelbawy PoesioMassimo - UdoKruschwitz + UdoKruschwitz 1627–1631 In this paper we present a new corpus of Arabic tweets that mention some form of violent event, developed to support the automatic identification of Human Rights Abuse. The dataset was manually labelled for seven classes of violence using crowdsourcing. L16-1257 @@ -2755,8 +2755,8 @@ <fixed-case>T</fixed-case>wi<fixed-case>S</fixed-case>ty: A Multilingual <fixed-case>T</fixed-case>witter Stylometry Corpus for Gender and Personality Profiling BenVerhoeven - WalterDaelemans - BarbaraPlank + WalterDaelemans + BarbaraPlank 1632–1637 Personality profiling is the task of detecting personality traits of authors based on writing style. Several personality typologies exist, however, the Briggs-Myer Type Indicator (MBTI) is particularly popular in the non-scientific community, and many people use it to analyse their own personality and talk about the results online. Therefore, large amounts of self-assessed data on MBTI are readily available on social-media platforms such as Twitter. We present a novel corpus of tweets annotated with the MBTI personality type and gender of their author for six Western European languages (Dutch, German, French, Italian, Portuguese and Spanish). We outline the corpus creation and annotation, show statistics of the obtained data distributions and present first baselines on Myers-Briggs personality profiling and gender prediction for all six languages. L16-1258 @@ -2792,7 +2792,7 @@ SumireUematsu HiroshiKanayama ShinsukeMori - YujiMatsumoto + YujiMatsumoto 1651–1658 We present an attempt to port the international syntactic annotation scheme, Universal Dependencies, to the Japanese language in this paper. Since the Japanese syntactic structure is usually annotated on the basis of unique chunk-based dependencies, we first introduce word-based dependencies by using a word unit called the Short Unit Word, which usually corresponds to an entry in the lexicon UniDic. Porting is done by mapping the part-of-speech tagset in UniDic to the universal part-of-speech tagset, and converting a constituent-based treebank to a typed dependency tree. The conversion is not straightforward, and we discuss the problems that arose in the conversion and the current solutions. A treebank consisting of 10,000 sentences was built by converting the existent resources and currently released to the public. L16-1261 @@ -2801,17 +2801,17 @@ <fixed-case>U</fixed-case>niversal <fixed-case>D</fixed-case>ependencies v1: A Multilingual Treebank Collection JoakimNivre - Marie-Catherinede Marneffe + Marie-Catherinede Marneffe FilipGinter YoavGoldberg - JanHajič - Christopher D.Manning + JanHajič + Christopher D.Manning RyanMcDonald SlavPetrov SampoPyysalo NataliaSilveira ReutTsarfaty - DanielZeman + DanielZeman 1659–1666 Cross-linguistically consistent annotation is necessary for sound comparative evaluation and cross-lingual learning experiments. It is also useful for multilingual system development and comparative linguistic studies. Universal Dependencies is an open community effort to create cross-linguistically consistent treebank annotation for many languages within a dependency-based lexicalist framework. In this paper, we describe v1 of the universal guidelines, the underlying design principles, and the currently available treebanks for 33 languages. L16-1262 @@ -2821,7 +2821,7 @@ Construction of an <fixed-case>E</fixed-case>nglish Dependency Corpus incorporating Compound Function Words AkihikoKato HiroyukiShindo - YujiMatsumoto + YujiMatsumoto 1667–1671 The recognition of multiword expressions (MWEs) in a sentence is important for such linguistic analyses as syntactic and semantic parsing, because it is known that combining an MWE into a single token improves accuracy for various NLP tasks, such as dependency parsing and constituency parsing. However, MWEs are not annotated in Penn Treebank. Furthermore, when converting word-based dependency to MWE-aware dependency directly, one could combine nodes in an MWE into a single node. Nevertheless, this method often leads to the following problem: A node derived from an MWE could have multiple heads and the whole dependency structure including MWE might be cyclic. Therefore we converted a phrase structure to a dependency structure after establishing an MWE as a single subtree. This approach can avoid an occurrence of multiple heads and/or cycles. In this way, we constructed an English dependency corpus taking into account compound function words, which are one type of MWEs that serve as functional expressions. In addition, we report experimental results of dependency parsing using a constructed corpus. L16-1263 @@ -2839,7 +2839,7 @@ A Dependency Treebank of the <fixed-case>C</fixed-case>hinese Buddhist Canon Tak-sumWong - JohnLee + JohnLee 1679–1683 We present a dependency treebank of the Chinese Buddhist Canon, which contains 1,514 texts with about 50 million Chinese characters. The treebank was created by an automatic parser trained on a smaller treebank, containing four manually annotated sutras (Lee and Kong, 2014). We report results on word segmentation, part-of-speech tagging and dependency parsing, and discuss challenges posed by the processing of medieval Chinese. In a case study, we exploit the treebank to examine verbs frequently associated with Buddha, and to analyze usage patterns of quotative verbs in direct speech. Our results suggest that certain quotative verbs imply status differences between the speaker and the listener. L16-1265 @@ -2848,7 +2848,7 @@ Automatic Biomedical Term Polysemy Detection Juan AntonioLossio-Ventura - ClementJonquet + ClementJonquet MathieuRoche MaguelonneTeisseire 1684–1688 @@ -2868,9 +2868,9 @@ Addressing the <fixed-case>MFS</fixed-case> Bias in <fixed-case>WSD</fixed-case> systems MartenPostma - RubenIzquierdo - EnekoAgirre - GermanRigau + RubenIzquierdo + EnekoAgirre + GermanRigau PiekVossen 1695–1700 Word Sense Disambiguation (WSD) systems tend to have a strong bias towards assigning the Most Frequent Sense (MFS), which results in high performance on the MFS but in a very low performance on the less frequent senses. We addressed the MFS bias in WSD systems by combining the output from a WSD system with a set of mostly static features to create a MFS classifier to decide when to and not to choose the MFS. The output from this MFS classifier, which is based on the Random Forest algorithm, is then used to modify the output from the original WSD system. We applied our classifier to one of the state-of-the-art supervised WSD systems, i.e. IMS, and to of the best state-of-the-art unsupervised WSD systems, i.e. UKB. Our main finding is that we are able to improve the system output in terms of choosing between the MFS and the less frequent senses. When we apply the MFS classifier to fine-grained WSD, we observe an improvement on the less frequent sense cases, whereas we maintain the overall recall. @@ -2879,7 +2879,7 @@ A Large-Scale Multilingual Disambiguation of Glosses - JoséCamacho-Collados + JoséCamacho-Collados ClaudioDelli Bovi AlessandroRaganato RobertoNavigli @@ -2940,7 +2940,7 @@ <fixed-case>S</fixed-case>ci<fixed-case>C</fixed-case>orp: A Corpus of <fixed-case>E</fixed-case>nglish Scientific Articles Annotated for Information Status Analysis - InaRoesiger + InaRoesiger 1743–1749 This paper presents SciCorp, a corpus of full-text English scientific papers of two disciplines, genetics and computational linguistics. The corpus comprises co-reference and bridging information as well as information status labels. Since SciCorp is annotated with both labels and the respective co-referent and bridging links, we believe it is a valuable resource for NLP researchers working on scientific articles or on applications such as co-reference resolution, bridging resolution or information status classification. The corpus has been reliably annotated by independent human coders with moderate inter-annotator agreement (average kappa = 0.71). In total, we have annotated 14 full papers containing 61,045 tokens and marked 8,708 definite noun phrases. The paper describes in detail the annotation scheme as well as the resulting corpus. The corpus is available for download in two different formats: in an offset-based format and for the co-reference annotations in the widely-used, tabular CoNLL-2012 format. L16-1275 @@ -2950,7 +2950,7 @@ Using lexical and Dependency Features to Disambiguate Discourse Connectives in <fixed-case>H</fixed-case>indi RohitJain HimanshuSharma - DiptiSharma + DiptiSharma 1750–1754 Discourse parsing is a challenging task in NLP and plays a crucial role in discourse analysis. To enable discourse analysis for Hindi, Hindi Discourse Relations Bank was created on a subset of Hindi TreeBank. The benefits of a discourse analyzer in automated discourse analysis, question summarization and question answering domains has motivated us to begin work on a discourse analyzer for Hindi. In this paper, we focus on discourse connective identification for Hindi. We explore various available syntactic features for this task. We also explore the use of dependency tree parses present in the Hindi TreeBank and study the impact of the same on the performance of the system. We report that the novel dependency features introduced have a higher impact on precision, in comparison to the syntactic features previously used for this task. In addition, we report a high accuracy of 96% for this task. L16-1276 @@ -3011,9 +3011,9 @@ Character-Level Neural Translation for Multilingual Media Monitoring in the <fixed-case>SUMMA</fixed-case> Project - GuntisBarzdins + GuntisBarzdins SteveRenals - DidzisGosko + DidzisGosko 1789–1793 The paper steps outside the comfort-zone of the traditional NLP tasks like automatic speech recognition (ASR) and machine translation (MT) to addresses two novel problems arising in the automated multilingual news monitoring: segmentation of the TV and radio program ASR transcripts into individual stories, and clustering of the individual stories coming from various sources and languages into storylines. Storyline clustering of stories covering the same events is an essential task for inquisitorial media monitoring. We address these two problems jointly by engaging the low-dimensional semantic representation capabilities of the sequence to sequence neural translation models. To enable joint multi-task learning for multilingual neural translation of morphologically rich languages we replace the attention mechanism with the sliding-window mechanism and operate the sequence to sequence neural translation model on the character-level rather than on the word-level. The story segmentation and storyline clustering problem is tackled by examining the low-dimensional vectors produced as a side-product of the neural translation process. The results of this paper describe a novel approach to the automatic story segmentation and storyline clustering problem. L16-1282 @@ -3023,7 +3023,7 @@ Exploring the Realization of Irony in <fixed-case>T</fixed-case>witter Data CynthiaVan Hee ElsLefever - VéroniqueHoste + VéroniqueHoste 1794–1799 Handling figurative language like irony is currently a challenging task in natural language processing. Since irony is commonly used in user-generated content, its presence can significantly undermine accurate analysis of opinions and sentiment in such texts. Understanding irony is therefore important if we want to push the state-of-the-art in tasks such as sentiment analysis. In this research, we present the construction of a Twitter dataset for two languages, being English and Dutch, and the development of new guidelines for the annotation of verbal irony in social media texts. Furthermore, we present some statistics on the annotated corpora, from which we can conclude that the detection of contrasting evaluations might be a good indicator for recognizing irony. L16-1283 @@ -3031,9 +3031,9 @@ Discriminating Similar Languages: Evaluations and Explorations - CyrilGoutte + CyrilGoutte SergeLéger - ShervinMalmasi + ShervinMalmasi MarcosZampieri 1800–1807 We present an analysis of the performance of machine learning classifiers on discriminating between similar languages and language varieties. We carried out a number of experiments using the results of the two editions of the Discriminating between Similar Languages (DSL) shared task. We investigate the progress made between the two tasks, estimate an upper bound on possible performance using ensemble and oracle combination, and provide learning curves to help us understand which languages are more challenging. A number of difficult sentences are identified and investigated further with human annotation @@ -3045,7 +3045,7 @@ LatifaAl-Sulaiti NoorhanAbbas ClaireBrierley - EricAtwell + EricAtwell AymanAlghamdi 1808–1812 Inspired by the Oxford Children’s Corpus, we have developed a prototype corpus of Arabic texts written and/or selected for children. Our Arabic Children’s Corpus of 2950 documents and nearly 2 million words has been collected manually from the web during a 3-month project. It is of high quality, and contains a range of different children’s genres based on sources located, including classic tales from The Arabian Nights, and popular fictional characters such as Goha. We anticipate that the current and subsequent versions of our corpus will lead to interesting studies in text classification, language use, and ideology in children’s texts. @@ -3064,7 +3064,7 @@ Learning Tone and Attribution for Financial Text Mining MahmoudEl-Haj PaulRayson - SteveYoung + SteveYoung AndrewMoore MartinWalker ThomasSchleicher @@ -3117,7 +3117,7 @@ Comparing the Level of Code-Switching in Corpora - BjörnGambäck + BjörnGambäck AmitavaDas 1850–1855 Social media texts are often fairly informal and conversational, and when produced by bilinguals tend to be written in several different languages simultaneously, in the same way as conversational speech. The recent availability of large social media corpora has thus also made large-scale code-switched resources available for research. The paper addresses the issues of evaluation and comparison these new corpora entail, by defining an objective measure of corpus level complexity of code-switched texts. It is also shown how this formal measure can be used in practice, by applying it to several code-switched corpora. @@ -3128,8 +3128,8 @@ Evaluation of the <fixed-case>KIT</fixed-case> Lecture Translation System MarkusMüller SarahFünfer - SebastianStüker - AlexWaibel + SebastianStüker + AlexWaibel 1856–1861 To attract foreign students is among the goals of the Karlsruhe Institute of Technology (KIT). One obstacle to achieving this goal is that lectures at KIT are usually held in German which many foreign students are not sufficiently proficient in, as, e.g., opposed to English. While the students from abroad are learning German during their stay at KIT, it is challenging to become proficient enough in it in order to follow a lecture. As a solution to this problem we offer our automatic simultaneous lecture translation. It translates German lectures into English in real time. While not as good as human interpreters, the system is available at a price that KIT can afford in order to offer it in potentially all lectures. In order to assess whether the quality of the system we have conducted a user study. In this paper we present this study, the way it was conducted and its results. The results indicate that the quality of the system has passed a threshold as to be able to support students in their studies. The study has helped to identify the most crucial weaknesses of the systems and has guided us which steps to take next. L16-1293 @@ -3137,7 +3137,7 @@ The <fixed-case>ACL</fixed-case> <fixed-case>RD</fixed-case>-<fixed-case>TEC</fixed-case> 2.0: A Language Resource for Evaluating Term Extraction and Entity Recognition Methods - BehrangQasemiZadeh + BehrangQasemiZadeh Anne-KathrinSchumann 1862–1868 This paper introduces the ACL Reference Dataset for Terminology Extraction and Classification, version 2.0 (ACL RD-TEC 2.0). The ACL RD-TEC 2.0 has been developed with the aim of providing a benchmark for the evaluation of term and entity recognition tasks based on specialised text from the computational linguistics domain. This release of the corpus consists of 300 abstracts from articles in the ACL Anthology Reference Corpus, published between 1978–2006. In these abstracts, terms (i.e., single or multi-word lexical units with a specialised meaning) are manually annotated. In addition to their boundaries in running text, annotated terms are classified into one of the seven categories method, tool, language resource (LR), LR product, model, measures and measurements, and other. To assess the quality of the annotations and to determine the difficulty of this annotation task, more than 171 of the abstracts are annotated twice, independently, by each of the two annotators. In total, 6,818 terms are identified and annotated in more than 1300 sentences, resulting in a specialised vocabulary made of 3,318 lexical forms, mapped to 3,471 concepts. We explain the development of the annotation guidelines and discuss some of the challenges we encountered in this annotation task. @@ -3164,7 +3164,7 @@ AljoschaBurchardt OndřejKlejch MartinPopel - MajaPopović + MajaPopović 1877–1882 This work addresses the need to aid Machine Translation (MT) development cycles with a complete workflow of MT evaluation methods. Our aim is to assess, compare and improve MT system variants. We hereby report on novel tools and practices that support various measures, developed in order to support a principled and informed approach of MT development. Our toolkit for automatic evaluation showcases quick and detailed comparison of MT system variants through automatic metrics and n-gram feedback, along with manual evaluation via edit-distance, error annotation and task-based feedback. L16-1296 @@ -3175,7 +3175,7 @@ OlivierGalibert Mohamed Ameur BenJannet JulietteKahn - SophieRosset + SophieRosset 1883–1889 Automatic Speech recognition (ASR) is one of the most widely used components in spoken language processing applications. ASR errors are of varying importance with respect to the application, making error analysis keys to improving speech processing applications. Knowing the most serious errors for the applicative case is critical to build better systems. In the context of Automatic Speech Recognition (ASR) used as a first step towards Named Entity Recognition (NER) in speech, error seriousness is usually determined by their frequency, due to the use of the WER as metric to evaluate the ASR output, despite the emergence of more relevant measures in the literature. We propose to use a different evaluation metric form the literature in order to classify ASR errors according to their seriousness for NER. Our results show that the ASR errors importance is ranked differently depending on the used evaluation metric. A more detailed analysis shows that the estimation of the error impact given by the ATENE metric is more adapted to the NER task than the estimation based only on the most used frequency metric WER. L16-1297 @@ -3185,7 +3185,7 @@ A Study of Reuse and Plagiarism in <fixed-case>LREC</fixed-case> papers GilFrancopoulo JosephMariani - PatrickParoubek + PatrickParoubek 1890–1897 The aim of this experiment is to present an easy way to compare fragments of texts in order to detect (supposed) results of copy & paste operations between articles in the domain of Natural Language Processing (NLP). The search space of the comparisons is a corpus labeled as NLP4NLP gathering a large part of the NLP field. The study is centered on LREC papers in both directions, first with an LREC paper borrowing a fragment of text from the collection, and secondly in the reverse direction with fragments of LREC documents borrowed and inserted in the collection. L16-1298 @@ -3203,7 +3203,7 @@ More than Word Cooccurrence: Exploring Support and Opposition in International Climate Negotiations with Semantic Parsing - PabloRuiz Fabo + PabloRuiz Fabo ClémentPlancq ThierryPoibeau 1902–1907 @@ -3213,9 +3213,9 @@ A Sequence Model Approach to Relation Extraction in <fixed-case>P</fixed-case>ortuguese - SandraCollovini + SandraCollovini GabrielMachado - RenataVieira + RenataVieira 1908–1912 The task of Relation Extraction from texts is one of the main challenges in the area of Information Extraction, considering the required linguistic knowledge and the sophistication of the language processing techniques employed. This task aims at identifying and classifying semantic relations that occur between entities recognized in a given text. In this paper, we evaluated a Conditional Random Fields classifier for the extraction of any relation descriptor occurring between named entities (Organisation, Person and Place categories), as well as pre-defined relation types between these entities in Portuguese texts. L16-1301 @@ -3225,7 +3225,7 @@ Evaluation Set for <fixed-case>S</fixed-case>lovak News Information Retrieval DanielHládek JanStaš - JozefJuhár + JozefJuhár 1913–1916 This work proposes an information retrieval evaluation set for the Slovak language. A set of 80 queries written in the natural language is given together with the set of relevant documents. The document set contains 3980 newspaper articles sorted into 6 categories. Each document in the result set is manually annotated for relevancy with its corresponding query. The evaluation set is mostly compatible with the Cranfield test collection using the same methodology for queries and annotation of relevancy. In addition to that it provides annotation for document title, author, publication date and category that can be used for evaluation of automatic document clustering and categorization. L16-1302 @@ -3250,9 +3250,9 @@ <fixed-case>T</fixed-case>erm<fixed-case>ITH</fixed-case>-Eval: a <fixed-case>F</fixed-case>rench Standard-Based Resource for Keyphrase Extraction Evaluation AdrienBougouin SabineBarreaux - LaurentRomary + LaurentRomary FlorianBoudin - BéatriceDaille + BéatriceDaille 1924–1927 Keyphrase extraction is the task of finding phrases that represent the important content of a document. The main aim of keyphrase extraction is to propose textual units that represent the most important topics developed in a document. The output keyphrases of automatic keyphrase extraction methods for test documents are typically evaluated by comparing them to manually assigned reference keyphrases. Each output keyphrase is considered correct if it matches one of the reference keyphrases. However, the choice of the appropriate textual unit (keyphrase) for a topic is sometimes subjective and evaluating by exact matching underestimates the performance. This paper presents a dataset of evaluation scores assigned to automatically extracted keyphrases by human evaluators. Along with the reference keyphrases, the manual evaluations can be used to validate new evaluation measures. Indeed, an evaluation measure that is highly correlated to the manual evaluation is appropriate for the evaluation of automatic keyphrase extraction methods. L16-1304 @@ -3360,7 +3360,7 @@ MatthiasSperber GrahamNeubig SatoshiNakamura - AlexWaibel + AlexWaibel 1986–1992 Computer-assisted transcription promises high-quality speech transcription at reduced costs. This is achieved by limiting human effort to transcribing parts for which automatic transcription quality is insufficient. Our goal is to improve the human transcription quality via appropriate user interface design. We focus on iterative interfaces that allow humans to solve tasks based on an initially given suggestion, in this case an automatic transcription. We conduct a user study that reveals considerable quality gains for three variations of iterative interfaces over a non-iterative from-scratch transcription interface. Our iterative interfaces included post-editing, confidence-enhanced post-editing, and a novel retyping interface. All three yielded similar quality on average, but we found that the proposed retyping interface was less sensitive to the difficulty of the segment, and superior when the automatic transcription of the segment contained relatively many errors. An analysis using mixed-effects models allows us to quantify these and other factors and draw conclusions over which interface design should be chosen in which circumstance. L16-1314 @@ -3394,7 +3394,7 @@ LinneHa MartinJansche KnotPipatsrisawat - RichardSproat + RichardSproat 2005–2010 We present a text-to-speech (TTS) system designed for the dialect of Bengali spoken in Bangladesh. This work is part of an ongoing effort to address the needs of under-resourced languages. We propose a process for streamlining the bootstrapping of TTS systems for under-resourced languages. First, we use crowdsourcing to collect the data from multiple ordinary speakers, each speaker recording small amount of sentences. Second, we leverage an existing text normalization system for a related language (Hindi) to bootstrap a linguistic front-end for Bangla. Third, we employ statistical techniques to construct multi-speaker acoustic models using Long Short-Term Memory Recurrent Neural Network (LSTM-RNN) and Hidden Markov Model (HMM) approaches. We then describe our experiments that show that the resulting TTS voices score well in terms of their perceived quality as measured by Mean Opinion Score (MOS) evaluations. L16-1317 @@ -3415,7 +3415,7 @@ Web Chat Conversations from Contact Centers: a Descriptive Study - GéraldineDamnati + GéraldineDamnati AleksandraGuerraz DelphineCharlet 2017–2021 @@ -3427,7 +3427,7 @@ Identification of Drug-Related Medical Conditions in Social Media FrançoisMorlane-Hondère CyrilGrouin - PierreZweigenbaum + PierreZweigenbaum 2022–2028 Monitoring social media has been shown to be an interesting approach for the early detection of drug adverse effects. In this paper, we describe a system which extracts medical entities in French drug reviews written by users. We focus on the identification of medical conditions, which is based on the concept of post-coordination: we first extract minimal medical-related entities (pain, stomach) then we combine them to identify complex ones (It was the worst [pain I ever felt in my stomach]). These two steps are respectively performed by two classifiers, the first being based on Conditional Random Fields and the second one on Support Vector Machines. The overall results of the minimal entity classifier are the following: P=0.926; R=0.849; F1=0.886. A thourough analysis of the feature set shows that, when combined with word lemmas, clusters generated by word2vec are the most valuable features. When trained on the output of the first classifier, the second classifier’s performances are the following: p=0.683;r=0.956;f1=0.797. The addition of post-processing rules did not add any significant global improvement but was found to modify the precision/recall ratio. L16-1320 @@ -3446,7 +3446,7 @@ A Corpus of <fixed-case>W</fixed-case>ikipedia Discussions: Over the Years, with Topic, Power and Gender Labels VinodkumarPrabhakaran - OwenRambow + OwenRambow 2034–2038 In order to gain a deep understanding of how social context manifests in interactions, we need data that represents interactions from a large community of people over a long period of time, capturing different aspects of social context. In this paper, we present a large corpus of Wikipedia Talk page discussions that are collected from a broad range of topics, containing discussions that happened over a period of 15 years. The dataset contains 166,322 discussion threads, across 1236 articles/topics that span 15 different topic categories or domains. The dataset also captures whether the post is made by an registered user or not, and whether he/she was an administrator at the time of making the post. It also captures the Wikipedia age of editors in terms of number of months spent as an editor, as well as their gender. This corpus will be a valuable resource to investigate a variety of computational sociolinguistics research questions regarding online social interactions. L16-1322 @@ -3455,8 +3455,8 @@ Phrase Detectives Corpus 1.0 Crowdsourced Anaphoric Coreference. JonChamberlain - MassimoPoesio - UdoKruschwitz + MassimoPoesio + UdoKruschwitz 2039–2046 Natural Language Engineering tasks require large and complex annotated datasets to build more advanced models of language. Corpora are typically annotated by several experts to create a gold standard; however, there are now compelling reasons to use a non-expert crowd to annotate text, driven by cost, speed and scalability. Phrase Detectives Corpus 1.0 is an anaphorically-annotated corpus of encyclopedic and narrative text that contains a gold standard created by multiple experts, as well as a set of annotations created by a large non-expert crowd. Analysis shows very good inter-expert agreement (kappa=.88-.93) but a more variable baseline crowd agreement (kappa=.52-.96). Encyclopedic texts show less agreement (and by implication are harder to annotate) than narrative texts. The release of this corpus is intended to encourage research into the use of crowds for text annotation and the development of more advanced, probabilistic language models, in particular for anaphoric coreference. L16-1323 @@ -3464,11 +3464,11 @@ Summ-it++: an Enriched Version of the Summ-it Corpus - EvandroFonseca + EvandroFonseca AndréAntonitsch - SandraCollovini + SandraCollovini DanielaAmaral - RenataVieira + RenataVieira AnnyFigueira 2047–2051 This paper presents Summ-it++, an enriched version the Summ-it corpus. In this new version, the corpus has received new semantic layers, named entity categories and relations between named entities, adding to the previous coreference annotation. In addition, we change the original Summ-it format to SemEval @@ -3479,7 +3479,7 @@ Towards Multiple Antecedent Coreference Resolution in Specialized Discourse AliciaBurga SergioCajal - JoanCodina-Filbà + JoanCodina-Filbà LeoWanner 2052–2057 Despite the popularity of coreference resolution as a research topic, the overwhelming majority of the work in this area focused so far on single antecedence coreference only. Multiple antecedent coreference (MAC) has been largely neglected. This can be explained by the scarcity of the phenomenon of MAC in generic discourse. However, in specialized discourse such as patents, MAC is very dominant. It seems thus unavoidable to address the problem of MAC resolution in the context of tasks related to automatic patent material processing, among them abstractive summarization, deep parsing of patents, construction of concept maps of the inventions, etc. We present the first version of an operational rule-based MAC resolution strategy for patent material that covers the three major types of MAC: (i) nominal MAC, (ii) MAC with personal / relative pronouns, and MAC with reflexive / reciprocal pronouns. The evaluation shows that our strategy performs well in terms of precision and recall. @@ -3493,7 +3493,7 @@ AntonellaBristot FedericaCavicchio KepaRodriguez - MassimoPoesio + MassimoPoesio 2058–2062 This paper presents a second release of the ARRAU dataset: a multi-domain corpus with thorough linguistically motivated annotation of anaphora and related phenomena. Building upon the first release almost a decade ago, a considerable effort had been invested in improving the data both quantitatively and qualitatively. Thus, we have doubled the corpus size, expanded the selection of covered phenomena to include referentiality and genericity and designed and implemented a methodology for enforcing the consistency of the manual annotation. We believe that the new release of ARRAU provides a valuable material for ongoing research in complex cases of coreference as well as for a variety of related tasks. The corpus is publicly available through LDC. L16-1326 @@ -3503,7 +3503,7 @@ An Annotated Corpus and Method for Analysis of Ad-Hoc Structures Embedded in Text EricYeh JohnNiekrasz - DayneFreitag + DayneFreitag RichardRohwer 2063–2070 We describe a method for identifying and performing functional analysis of structured regions that are embedded in natural language documents, such as tables or key-value lists. Such regions often encode information according to ad hoc schemas and avail themselves of visual cues in place of natural language grammar, presenting problems for standard information extraction algorithms. Unlike previous work in table extraction, which assumes a relatively noiseless two-dimensional layout, our aim is to accommodate a wide variety of naturally occurring structure types. Our approach has three main parts. First, we collect and annotate a a diverse sample of “naturally” occurring structures from several sources. Second, we use probabilistic text segmentation techniques, featurized by skip bigrams over spatial and token category cues, to automatically identify contiguous regions of structured text that share a common schema. Finally, we identify the records and fields within each structured region using a combination of distributional similarity and sequence alignment methods, guided by minimal supervision in the form of a single annotated record. We evaluate the last two components individually, and conclude with a discussion of further work. @@ -3534,7 +3534,7 @@ <fixed-case>NNB</fixed-case>locks: A Deep Learning Framework for Computational Linguistics Neural Network Models Frederico TommasiCaroli - AndréFreitas + AndréFreitas João Carlos Pereirada Silva SiegfriedHandschuh 2081–2085 @@ -3550,7 +3550,7 @@ EnricoGhidoni NorinaMarcello Rema RossiniFavretti - FabioTamburini + FabioTamburini 2086–2093 This paper presents some preliminary results of the OPLON project. It aimed at identifying early linguistic symptoms of cognitive decline in the elderly. This pilot study was conducted on a corpus composed of spontaneous speech sample collected from 39 subjects, who underwent a neuropsychological screening for visuo-spatial abilities, memory, language, executive functions and attention. A rich set of linguistic features was extracted from the digitalised utterances (at phonetic, suprasegmental, lexical, morphological and syntactic levels) and the statistical significance in pinpointing the pathological process was measured. Our results show remarkable trends for what concerns both the linguistic traits selection and the automatic classifiers building. L16-1331 @@ -3572,11 +3572,11 @@ Building Language Resources for Exploring Autism Spectrum Disorders JuliaParish-Morris - ChristopherCieri - MarkLiberman + ChristopherCieri + MarkLiberman LeilaBateman EmilyFerguson - Robert T.Schultz + Robert T.Schultz 2100–2107 Autism spectrum disorder (ASD) is a complex neurodevelopmental condition that would benefit from low-cost and reliable improvements to screening and diagnosis. Human language technologies (HLTs) provide one possible route to automating a series of subjective decisions that currently inform “Gold Standard” diagnosis based on clinical judgment. In this paper, we describe a new resource to support this goal, comprised of 100 20-minute semi-structured English language samples labeled with child age, sex, IQ, autism symptom severity, and diagnostic classification. We assess the feasibility of digitizing and processing sensitive clinical samples for data sharing, and identify areas of difficulty. Using the methods described here, we propose to join forces with researchers and clinicians throughout the world to establish an international repository of annotated language samples from individuals with ASD and related disorders. This project has the potential to improve the lives of individuals with ASD and their families by identifying linguistic features that could improve remote screening, inform personalized intervention, and promote advancements in clinically-oriented HLTs. L16-1333 @@ -3633,7 +3633,7 @@ AnnaFeltracco SimoneMagnolini ElisabettaJezek - BernardoMagnini + BernardoMagnini 2138–2144 We describe an experiment for the acquisition of opposition relations among Italian verb senses, based on a crowdsourcing methodology. The goal of the experiment is to discuss whether the types of opposition we distinguish (i.e. complementarity, antonymy, converseness and reversiveness) are actually perceived by the crowd. In particular, we collect data for Italian by using the crowdsourcing platform CrowdFlower. We ask annotators to judge the type of opposition existing among pairs of sentences -previously judged as opposite- that differ only for a verb: the verb in the first sentence is opposite of the verb in second sentence. Data corroborate the hypothesis that some opposition relations exclude each other, while others interact, being recognized as compatible by the contributors. L16-1339 @@ -3666,7 +3666,7 @@ Introducing the Weighted Trustability Evaluator for Crowdsourcing Exemplified by Speaker Likability Classification SimoneHantke ErikMarchi - BjörnSchuller + BjörnSchuller 2156–2161 Crowdsourcing is an arising collaborative approach applicable among many other applications to the area of language and speech processing. In fact, the use of crowdsourcing was already applied in the field of speech processing with promising results. However, only few studies investigated the use of crowdsourcing in computational paralinguistics. In this contribution, we propose a novel evaluator for crowdsourced-based ratings termed Weighted Trustability Evaluator (WTE) which is computed from the rater-dependent consistency over the test questions. We further investigate the reliability of crowdsourced annotations as compared to the ones obtained with traditional labelling procedures, such as constrained listening experiments in laboratories or in controlled environments. This comparison includes an in-depth analysis of obtainable classification performances. The experiments were conducted on the Speaker Likability Database (SLD) already used in the INTERSPEECH Challenge 2012, and the results lend further weight to the assumption that crowdsourcing can be applied as a reliable annotation source for computational paralinguistics given a sufficient number of raters and suited measurements of their reliability. L16-1342 @@ -3739,8 +3739,8 @@ That’ll Do Fine!: A Coarse Lexical Resource for <fixed-case>E</fixed-case>nglish-<fixed-case>H</fixed-case>indi <fixed-case>MT</fixed-case>, Using Polylingual Topic Models DipteshKanojia AdityaJoshi - PushpakBhattacharyya - Mark JamesCarman + PushpakBhattacharyya + Mark JamesCarman 2199–2203 Parallel corpora are often injected with bilingual lexical resources for improved Indian language machine translation (MT). In absence of such lexical resources, multilingual topic models have been used to create coarse lexical resources in the past, using a Cartesian product approach. Our results show that for morphologically rich languages like Hindi, the Cartesian product approach is detrimental for MT. We then present a novel ‘sentential’ approach to use this coarse lexical resource from a multilingual topic model. Our coarse lexical resource when injected with a parallel corpus outperforms a system trained using parallel corpus and a good quality lexical resource. As demonstrated by the quality of our coarse lexical resource and its benefit to MT, we believe that our sentential approach to create such a resource will help MT for resource-constrained languages. L16-1349 @@ -3752,7 +3752,7 @@ ManabuYaguchi KiyotakaUchimoto MasaoUtiyama - EiichiroSumita + EiichiroSumita SadaoKurohashi HitoshiIsahara 2204–2208 @@ -3762,9 +3762,9 @@ Domain Adaptation in <fixed-case>MT</fixed-case> Using Titles in <fixed-case>W</fixed-case>ikipedia as a Parallel Corpus: Resources and Evaluation - GorkaLabaka - IñakiAlegria - KepaSarasola + GorkaLabaka + IñakiAlegria + KepaSarasola 2209–2213 This paper presents how an state-of-the-art SMT system is enriched by using an extra in-domain parallel corpora extracted from Wikipedia. We collect corpora from parallel titles and from parallel fragments in comparable articles from Wikipedia. We carried out an evaluation with a double objective: evaluating the quality of the extracted data and evaluating the improvement due to the domain-adaptation. We think this can be very useful for languages with limited amount of parallel corpora, where in-domain data is crucial to improve the performance of MT sytems. The experiments on the Spanish-English language pair improve a baseline trained with the Europarl corpus in more than 2 points of BLEU when translating in the Computer Science domain. L16-1351 @@ -3784,7 +3784,7 @@ Towards producing bilingual lexica from monolingual corpora JingyiHan - NúriaBel + NúriaBel 2222–2227 Bilingual lexica are the basis for many cross-lingual natural language processing tasks. Recent works have shown success in learning bilingual dictionary by taking advantages of comparable corpora and a diverse set of signals derived from monolingual corpora. In the present work, we describe an approach to automatically learn bilingual lexica by training a supervised classifier using word embedding-based vectors of only a few hundred translation equivalent word pairs. The word embedding representations of translation pairs were obtained from source and target monolingual corpora, which are not necessarily related. Our classifier is able to predict whether a new word pair is under a translation relation or not. We tested it on two quite distinct language pairs Chinese-Spanish and English-Spanish. The classifiers achieved more than 0.90 precision and recall for both language pairs in different evaluation scenarios. These results show a high potential for this method to be used in bilingual lexica production for language pairs with reduced amount of parallel or comparable corpora, in particular for phrase table expansion in Statistical Machine Translation systems. L16-1353 @@ -3793,7 +3793,7 @@ First Steps Towards Coverage-Based Sentence Alignment LuísGomes - Gabriel PereiraLopes + Gabriel PereiraLopes 2228–2231 In this paper, we introduce a coverage-based scoring function that discriminates between parallel and non-parallel sentences. When plugged into Bleualign, a state-of-the-art sentence aligner, our function improves both precision and recall of alignments over the originally proposed BLEU score. Furthermore, since our scoring function uses Moses phrase tables directly we avoid the need to translate the texts to be aligned, which is time-consuming and a potential source of alignment errors. L16-1354 @@ -3802,7 +3802,7 @@ Using the <fixed-case>TED</fixed-case> Talks to Evaluate Spoken Post-editing of Machine Translation JeevanthiLiyanapathirana - AndreiPopescu-Belis + AndreiPopescu-Belis 2232–2239 This paper presents a solution to evaluate spoken post-editing of imperfect machine translation output by a human translator. We compare two approaches to the combination of machine translation (MT) and automatic speech recognition (ASR): a heuristic algorithm and a machine learning method. To obtain a data set with spoken post-editing information, we use the French version of TED talks as the source texts submitted to MT, and the spoken English counterparts as their corrections, which are submitted to an ASR system. We experiment with various levels of artificial ASR noise and also with a state-of-the-art ASR system. The results show that the combination of MT with ASR improves over both individual outputs of MT and ASR in terms of BLEU scores, especially when ASR performance is low. L16-1355 @@ -3810,7 +3810,7 @@ Phrase Level Segmentation and Labelling of Machine Translation Errors - FrédéricBlain + FrédéricBlain VarvaraLogacheva LuciaSpecia 2240–2245 @@ -3820,7 +3820,7 @@ <fixed-case>S</fixed-case>ub<fixed-case>C</fixed-case>o: A Learner Translation Corpus of Human and Machine Subtitles - José ManuelMartínez MartínezUniversität des Saarlandes + José ManuelMartínez MartínezUniversität des Saarlandes MihaelaVelaUniversität des Saarlandes 2246–2254 In this paper, we present a freely available corpus of human and automatic translations of subtitles. The corpus comprises the original English subtitles (SRC), both human (HT) and machine translations (MT) into German, as well as post-editions (PE) of the MT output. HT and MT are annotated with errors. Moreover, human evaluation is included in HT, MT, and PE. Such a corpus is a valuable resource for both human and machine translation communities, enabling the direct comparison – in terms of errors and evaluation – between human and machine translations and post-edited machine translations. @@ -3843,7 +3843,7 @@ <fixed-case>JATE</fixed-case> 2.0: <fixed-case>J</fixed-case>ava Automatic Term Extraction with <fixed-case>A</fixed-case>pache <fixed-case>S</fixed-case>olr ZiqiZhang JieGao - FabioCiravegna + FabioCiravegna 2262–2269 Automatic Term Extraction (ATE) or Recognition (ATR) is a fundamental processing step preceding many complex knowledge engineering tasks. However, few methods have been implemented as public tools and in particular, available as open-source freeware. Further, little effort is made to develop an adaptable and scalable framework that enables customization, development, and comparison of algorithms under a uniform environment. This paper introduces JATE 2.0, a complete remake of the free Java Automatic Term Extraction Toolkit (Zhang et al., 2008) delivering new features including: (1) highly modular, adaptable and scalable ATE thanks to integration with Apache Solr, the open source free-text indexing and search platform; (2) an extended collection of state-of-the-art algorithms. We carry out experiments on two well-known benchmarking datasets and compare the algorithms along the dimensions of effectiveness (precision) and efficiency (speed and memory consumption). To the best of our knowledge, this is by far the only free ATE library offering a flexible architecture and the most comprehensive collection of algorithms. L16-1359 @@ -3860,7 +3860,7 @@ <fixed-case>T</fixed-case>ermo<fixed-case>PL</fixed-case> - a Flexible Tool for Terminology Extraction - MalgorzataMarciniak + MalgorzataMarciniak AgnieszkaMykowiecka PiotrRychlik 2278–2284 @@ -3870,7 +3870,7 @@ <fixed-case>G</fixed-case>ho<fixed-case>S</fixed-case>t-<fixed-case>NN</fixed-case>: A Representative Gold Standard of <fixed-case>G</fixed-case>erman Noun-Noun Compounds - SabineSchulte im Walde + SabineSchulte im Walde AnnaHätty StefanBott NanaKhvtisavrishvili @@ -3883,8 +3883,8 @@ <fixed-case>D</fixed-case>e<fixed-case>Q</fixed-case>ue: A Lexicon of Complex Prepositions and Conjunctions in <fixed-case>F</fixed-case>rench CarlosRamisch AlexisNasr - AndréValli - JoséDeulofeu + AndréValli + JoséDeulofeu 2293–2298 We introduce DeQue, a lexicon covering French complex prepositions (CPRE) like “à partir de” (from) and complex conjunctions (CCONJ) like “bien que” (although). The lexicon includes fine-grained linguistic description based on empirical evidence. We describe the general characteristics of CPRE and CCONJ in French, with special focus on syntactic ambiguity. Then, we list the selection criteria used to build the lexicon and the corpus-based methodology employed to collect entries. Finally, we quantify the ambiguity of each construction by annotating around 100 sentences randomly taken from the FRWaC. In addition to its theoretical value, the resource has many potential practical applications. We intend to employ DeQue for treebank annotation and to train a dependency parser that can takes complex constructions into account. L16-1363 @@ -3894,7 +3894,7 @@ <fixed-case>PARSEME</fixed-case> Survey on <fixed-case>MWE</fixed-case> Resources Gyri SmørdalLosnegaard FedericoSangati - Carla ParraEscartín + Carla ParraEscartín AgataSavary SaschaBargmann JohannaMonti @@ -3916,10 +3916,10 @@ Transfer-Based Learning-to-Rank Assessment of Medical Term Technicality DhouhaBouamor - Leonardo CampillosLlanos + Leonardo CampillosLlanos Anne-LaureLigozat - SophieRosset - PierreZweigenbaum + SophieRosset + PierreZweigenbaum 2312–2316 While measuring the readability of texts has been a long-standing research topic, assessing the technicality of terms has only been addressed more recently and mostly for the English language. In this paper, we train a learning-to-rank model to determine a specialization degree for each term found in a given list. Since no training data for this task exist for French, we train our system with non-lexical features on English data, namely, the Consumer Health Vocabulary, then apply it to French. The features include the likelihood ratio of the term based on specialized and lay language models, and tests for containing morphologically complex words. The evaluation of this approach is conducted on 134 terms from the UMLS Metathesaurus and 868 terms from the Eugloss thesaurus. The Normalized Discounted Cumulative Gain obtained by our system is over 0.8 on both test sets. Besides, thanks to the learning-to-rank approach, adding morphological features to the language model features improves the results on the Eugloss thesaurus. L16-1366 @@ -3929,7 +3929,7 @@ Example-based Acquisition of Fine-grained Collocation Resources SaraRodríguez-Fernández RobertoCarlini - Luis EspinosaAnke + Luis EspinosaAnke LeoWanner 2317–2322 Collocations such as “heavy rain” or “make [a] decision”, are combinations of two elements where one (the base) is freely chosen, while the choice of the other (collocate) is restricted, depending on the base. Collocations present difficulties even to advanced language learners, who usually struggle to find the right collocate to express a particular meaning, e.g., both “heavy” and “strong” express the meaning ‘intense’, but while “rain” selects “heavy”, “wind” selects “strong”. Lexical Functions (LFs) describe the meanings that hold between the elements of collocations, such as ‘intense’, ‘perform’, ‘create’, ‘increase’, etc. Language resources with semantically classified collocations would be of great help for students, however they are expensive to build, since they are manually constructed, and scarce. We present an unsupervised approach to the acquisition and semantic classification of collocations according to LFs, based on word embeddings in which, given an example of a collocation for each of the target LFs and a set of bases, the system retrieves a list of collocates for each base and LF. @@ -3939,7 +3939,7 @@ <fixed-case>MWE</fixed-case>s in Treebanks: From Survey to Guidelines VictoriaRosén - KoenraadDe Smedt + KoenraadDe Smedt Gyri SmørdalLosnegaard EduardBejček AgataSavary @@ -3953,7 +3953,7 @@ Multiword Expressions Dataset for <fixed-case>I</fixed-case>ndian Languages DhirendraSingh SudhaBhingardive - PushpakBhattacharyya + PushpakBhattacharyya 2331–2335 Multiword Expressions (MWEs) are used frequently in natural languages, but understanding the diversity in MWEs is one of the open problem in the area of Natural Language Processing. In the context of Indian languages, MWEs play an important role. In this paper, we present MWEs annotation dataset created for Indian languages viz., Hindi and Marathi. We extract possible MWE candidates using two repositories: 1) the POS-tagged corpus and 2) the IndoWordNet synsets. Annotation is done for two types of MWEs: compound nouns and light verb constructions. In the process of annotation, human annotators tag valid MWEs from these candidates based on the standard guidelines provided to them. We obtained 3178 compound nouns and 2556 light verb constructions in Hindi and 1003 compound nouns and 2416 light verb constructions in Marathi using two repositories mentioned before. This created resource is made available publicly and can be used as a gold standard for Hindi and Marathi MWE systems. L16-1369 @@ -3971,7 +3971,7 @@ <fixed-case>E</fixed-case>asy<fixed-case>T</fixed-case>ree: A Graphical Tool for Dependency Tree Annotation - AlexaLittle + AlexaLittle StephenTratz 2343–2347 This paper introduces EasyTree, a dynamic graphical tool for dependency tree annotation. Built in JavaScript using the popular D3 data visualization library, EasyTree allows annotators to construct and label trees entirely by manipulating graphics, and then export the corresponding data in JSON format. Human users are thus able to annotate in an intuitive way without compromising the machine-compatibility of the output. EasyTree has a number of features to assist annotators, including color-coded part-of-speech indicators and optional translation displays. It can also be customized to suit a wide range of projects; part-of-speech categories, edge labels, and many other settings can be edited from within the GUI. The system also utilizes UTF-8 encoding and properly handles both left-to-right and right-to-left scripts. By providing a user-friendly annotation tool, we aim to reduce time spent transforming data or learning to use the software, to improve the user experience for annotators, and to make annotation approachable even for inexperienced users. Unlike existing solutions, EasyTree is built entirely with standard web technologies–JavaScript, HTML, and CSS–making it ideal for web-based annotation efforts, including crowdsourcing efforts. @@ -4009,8 +4009,8 @@ Hard Time Parsing Questions: Building a <fixed-case>Q</fixed-case>uestion<fixed-case>B</fixed-case>ank for <fixed-case>F</fixed-case>rench - DjaméSeddah - MarieCandito + DjaméSeddah + MarieCandito 2366–2370 We present the French Question Bank, a treebank of 2600 questions. We show that classical parsing model performance drop while the inclusion of this data set is highly beneficial without harming the parsing of non-question data. when facing out-of- domain data with strong structural diver- gences. Two thirds being aligned with the QB (Judge et al., 2006) and being freely available, this treebank will prove useful to build robust NLP systems. L16-1375 @@ -4019,7 +4019,7 @@ Enhanced <fixed-case>E</fixed-case>nglish <fixed-case>U</fixed-case>niversal <fixed-case>D</fixed-case>ependencies: An Improved Representation for Natural Language Understanding Tasks SebastianSchuster - Christopher D.Manning + Christopher D.Manning 2371–2378 Many shallow natural language understanding tasks use dependency trees to extract relations between content words. However, strict surface-structure dependency trees tend to follow the linguistic structure of sentences too closely and frequently fail to provide direct relations between content words. To mitigate this problem, the original Stanford Dependencies representation also defines two dependency graph representations which contain additional and augmented relations that explicitly capture otherwise implicit relations between content words. In this paper, we revisit and extend these dependency graph representations in light of the recent Universal Dependencies (UD) initiative and provide a detailed account of an enhanced and an enhanced++ English UD representation. We further present a converter from constituency to basic, i.e., strict surface structure, UD trees, and a converter from basic UD trees to enhanced and enhanced++ English UD graphs. We release both converters as part of Stanford CoreNLP and the Stanford Parser. L16-1376 @@ -4028,10 +4028,10 @@ A <fixed-case>P</fixed-case>roposition <fixed-case>B</fixed-case>ank of <fixed-case>U</fixed-case>rdu MaazAnwar - Riyaz AhmadBhat - DiptiSharma + Riyaz AhmadBhat + DiptiSharma AshwiniVaidya - MarthaPalmer + MarthaPalmer Tafseer AhmedKhan 2379–2386 This paper describes our efforts for the development of a Proposition Bank for Urdu, an Indo-Aryan language. Our primary goal is the labeling of syntactic nodes in the existing Urdu dependency Treebank with specific argument labels. In essence, it involves annotation of predicate argument structures of both simple and complex predicates in the Treebank corpus. We describe the overall process of building the PropBank of Urdu. We discuss various statistics pertaining to the Urdu PropBank and the issues which the annotators encountered while developing the PropBank. We also discuss how these challenges were addressed to successfully expand the PropBank corpus. While reporting the Inter-annotator agreement between the two annotators, we show that the annotators share similar understanding of the annotation guidelines and of the linguistic phenomena present in the language. The present size of this Propbank is around 180,000 tokens which is double-propbanked by the two annotators for simple predicates. Another 100,000 tokens have been annotated for complex predicates of Urdu. @@ -4041,8 +4041,8 @@ <fixed-case>C</fixed-case>zech Legal Text Treebank 1.0 VincentKríž - BarboraHladká - ZdeňkaUrešová + BarboraHladká + ZdeňkaUrešová 2387–2392 We introduce a new member of the family of Prague dependency treebanks. The Czech Legal Text Treebank 1.0 is a morphologically and syntactically annotated corpus of 1,128 sentences. The treebank contains texts from the legal domain, namely the documents from the Collection of Laws of the Czech Republic. Legal texts differ from other domains in several language phenomena influenced by rather high frequency of very long sentences. A manual annotation of such sentences presents a new challenge. We describe a strategy and tools for this task. The resulting treebank can be explored in various ways. It can be downloaded from the LINDAT/CLARIN repository and viewed locally using the TrEd editor or it can be accessed on-line using the KonText and TreeQuery tools. L16-1378 @@ -4121,18 +4121,18 @@ The Open Linguistics Working Group: Developing the Linguistic Linked Open Data Cloud - John PhilipMcCrae + John PhilipMcCrae ChristianChiarcos FrancisBond - PhilippCimiano + PhilippCimiano ThierryDeclerck Gerardde Melo JorgeGracia SebastianHellmann BettinaKlimek - StevenMoran + StevenMoran PetyaOsenova - AntonioPareja-Lora + AntonioPareja-Lora JonathanPool 2435–2441 The Open Linguistics Working Group (OWLG) brings together researchers from various fields of linguistics, natural language processing, and information technology to present and discuss principles, case studies, and best practices for representing, publishing and linking linguistic data collections. A major outcome of our work is the Linguistic Linked Open Data (LLOD) cloud, an LOD (sub-)cloud of linguistic resources, which covers various linguistic databases, lexicons, corpora, terminologies, and metadata repositories. We present and summarize five years of progress on the development of the cloud and of advancements in open data in linguistics, and we describe recent community activities. The paper aims to serve as a guideline to orient and involve researchers with the community and/or Linguistic Linked Open Data. @@ -4174,7 +4174,7 @@ TimoPetmanson AlexanderTkachenko SvenLaur - Heiki-JaanKaalep + Heiki-JaanKaalep 2460–2466 Although there are many tools for natural language processing tasks in Estonian, these tools are very loosely interoperable, and it is not easy to build practical applications on top of them. In this paper, we introduce a new Python library for natural language processing in Estonian, which provides unified programming interface for various NLP components. The EstNLTK toolkit provides utilities for basic NLP tasks including tokenization, morphological analysis, lemmatisation and named entity recognition as well as offers more advanced features such as a clause segmentation, temporal expression extraction and normalization, verb chain detection, Estonian Wordnet integration and rule-based information extraction. Accompanied by a detailed API documentation and comprehensive tutorials, EstNLTK is suitable for a wide range of audience. We believe EstNLTK is mature enough to be used for developing NLP-backed systems both in industry and research. EstNLTK is freely available under the GNU GPL version 2+ license, which is standard for academic software. L16-1390 @@ -4232,8 +4232,8 @@ Data Management Plans and Data Centers - DeniseDiPersio - ChristopherCieri + DeniseDiPersio + ChristopherCieri DanielJaquette 2496–2501 Data management plans, data sharing plans and the like are now required by funders worldwide as part of research proposals. Concerned with promoting the notion of open scientific data, funders view such plans as the framework for satisfying the generally accepted requirements for data generated in funded research projects, among them that it be accessible, usable, standardized to the degree possible, secure and stable. This paper examines the origins of data management plans, their requirements and issues they raise for data centers and HLT resource development in general. @@ -4254,7 +4254,7 @@ Facilitating Metadata Interoperability in <fixed-case>CLARIN</fixed-case>-<fixed-case>DK</fixed-case> LeneOffersgaard - Dorte HaltrupHansen + Dorte HaltrupHansen 2510–2515 The issue for CLARIN archives at the metadata level is to facilitate the user’s possibility to describe their data, even with their own standard, and at the same time make these metadata meaningful for a variety of users with a variety of resource types, and ensure that the metadata are useful for search across all resources both at the national and at the European level. We see that different people from different research communities fill in the metadata in different ways even though the metadata was defined and documented. This has impacted when the metadata are harvested and displayed in different environments. A loss of information is at stake. In this paper we view the challenges of ensuring metadata interoperability through examples of propagation of metadata values from the CLARIN-DK archive to the VLO. We see that the CLARIN Community in many ways support interoperability, but argue that agreeing upon standards, making clear definitions of the semantics of the metadata and their content is inevitable for the interoperability to work successfully. The key points are clear and freely available definitions, accessible documentation and easily usable facilities and guidelines for the metadata creators. L16-1398 @@ -4262,11 +4262,11 @@ The <fixed-case>IPR</fixed-case>-cleared Corpus of Contemporary Written and Spoken <fixed-case>R</fixed-case>omanian Language - DanTufiș - Verginica BarbuMititelu + DanTufiș + Verginica BarbuMititelu ElenaIrimia - Ștefan DanielDumitrescu - TiberiuBoroș + Ștefan DanielDumitrescu + TiberiuBoroș 2516–2521 The article describes the current status of a large national project, CoRoLa, aiming at building a reference corpus for the contemporary Romanian language. Unlike many other national corpora, CoRoLa contains only - IPR cleared texts and speech data, obtained from some of the country’s most representative publishing houses, broadcasting agencies, editorial offices, newspapers and popular bloggers. For the written component 500 million tokens are targeted and for the oral one 300 hours of recordings. The choice of texts is done according to their functional style, domain and subdomain, also with an eye to the international practice. A metadata file (following the CMDI model) is associated to each text file. Collected texts are cleaned and transformed in a format compatible with the tools for automatic processing (segmentation, tokenization, lemmatization, part-of-speech tagging). The paper also presents up-to-date statistics about the structure of the corpus almost two years before its official launching. The corpus will be freely available for searching. Users will be able to download the results of their searches and those original files when not against stipulations in the protocols we have with text providers. L16-1399 @@ -4282,9 +4282,9 @@ LucieChlumská TomášJelínek DominikaKováříková - VladimírPetkevič + VladimírPetkevič PavelProcházka - HanaSkoumalová + HanaSkoumalová MichalŠkrabal PetrTruneček PavelVondřička @@ -4296,13 +4296,13 @@ <fixed-case>LREC</fixed-case> as a Graph: People and Resources in a Network - RiccardoDel Gratta + RiccardoDel Gratta FrancescaFrontini MonicaMonachini GabriellaPardelli IreneRusso RobertoBartolini - FahadKhan + FahadKhan ClaudiaSoria NicolettaCalzolari 2529–2532 @@ -4364,10 +4364,10 @@ A Finite-state Morphological Analyser for Tuvan - FrancisTyers + FrancisTyers AziyanaBayyr-ool AelitaSalchak - JonathanWashington + JonathanWashington 2562–2567 ~This paper describes the development of free/open-source finite-state morphological transducers for Tuvan, a Turkic language spoken in and around the Tuvan Republic in Russia. The finite-state toolkit used for the work is the Helsinki Finite-State Toolkit (HFST), we use the lexc formalism for modelling the morphotactics and twol formalism for modelling morphophonological alternations. We present a novel description of the morphological combinatorics of pseudo-derivational morphemes in Tuvan. An evaluation is presented which shows that the transducer has a reasonable coverage―around 93%―on freely-available corpora of the languages, and high precision―over 99%―on a manually verified test set. L16-1407 @@ -4378,11 +4378,11 @@ AndrejsSpektors IlzeAuzina RobertsDargis - NormundsGruzitis - PeterisPaikens - LaumaPretkalnina + NormundsGruzitis + PeterisPaikens + LaumaPretkalnina LauraRituma - BaibaSaulite + BaibaSaulite 2568–2571 We describe an extensive and versatile lexical resource for Latvian, an under-resourced Indo-European language, which we call Tezaurs (Latvian for ‘thesaurus’). It comprises a large explanatory dictionary of more than 250,000 entries that are derived from more than 280 external sources. The dictionary is enriched with phonetic, morphological, semantic and other annotations, as well as augmented by various language processing tools allowing for the generation of inflectional forms and pronunciation, for on-the-fly selection of corpus examples, for suggesting synonyms, etc. Tezaurs is available as a public and widely used web application for end-users, as an open data set for the use in language technology (LT), and as an API ― a set of web services for the integration into third-party applications. The ultimate goal of Tezaurs is to be the central computational lexicon for Latvian, bringing together all Latvian words and frequently used multi-word units and allowing for the integration of other LT resources and tools. L16-1408 @@ -4391,8 +4391,8 @@ A Finite-State Morphological Analyser for <fixed-case>S</fixed-case>indhi RaveeshMotlani - FrancisTyers - DiptiSharma + FrancisTyers + DiptiSharma 2572–2577 Morphological analysis is a fundamental task in natural-language processing, which is used in other NLP applications such as part-of-speech tagging, syntactic parsing, information retrieval, machine translation, etc. In this paper, we present our work on the development of free/open-source finite-state morphological analyser for Sindhi. We have used Apertium’s lttoolbox as our finite-state toolkit to implement the transducer. The system is developed using a paradigm-based approach, wherein a paradigm defines all the word forms and their morphological features for a given stem (lemma). We have evaluated our system on the Sindhi Wikipedia corpus and achieved a reasonable coverage of 81% and a precision of over 97%. L16-1409 @@ -4428,7 +4428,7 @@ Automatically Generated Affective Norms of Abstractness, Arousal, Imageability and Valence for 350 000 <fixed-case>G</fixed-case>erman Lemmas MaximilianKöper - SabineSchulte im Walde + SabineSchulte im Walde 2595–2598 This paper presents a collection of 350,000 German lemmatised words, rated on four psycholinguistic affective attributes. All ratings were obtained via a supervised learning algorithm that can automatically calculate a numerical rating of a word. We applied this algorithm to abstractness, arousal, imageability and valence. Comparison with human ratings reveals high correlation across all rating types. The full resource is publically available at: http://www.ims.uni-stuttgart.de/data/affective_norms/ L16-1413 @@ -4446,7 +4446,7 @@ A Framework for Cross-lingual/Node-wise Alignment of Lexical-Semantic Resources - YoshihikoHayashi + YoshihikoHayashi 2607–2613 Given lexical-semantic resources in different languages, it is useful to establish cross-lingual correspondences, preferably with semantic relation labels, between the concept nodes in these resources. This paper presents a framework for enabling a cross-lingual/node-wise alignment of lexical-semantic resources, where cross-lingual correspondence candidates are first discovered and ranked, and then classified by a succeeding module. Indeed, we propose that a two-tier classifier configuration is feasible for the second module: the first classifier filters out possibly irrelevant correspondence candidates and the second classifier assigns a relatively fine-grained semantic relation label to each of the surviving candidates. The results of Japanese-to-English alignment experiments using EDR Electronic Dictionary and Princeton WordNet are described to exemplify the validity of the proposal. L16-1415 @@ -4454,7 +4454,7 @@ Lexical Coverage Evaluation of Large-scale Multilingual Semantic Lexicons for Twelve Languages - ScottPiao + ScottPiao PaulRayson DawnArcher FrancescaBianchi @@ -4503,7 +4503,7 @@ A Large Rated Lexicon with <fixed-case>F</fixed-case>rench Medical Words NataliaGrabar - ThierryHamon + ThierryHamon 2643–2648 Patients are often exposed to medical terms, such as anosognosia, myelodysplastic, or hepatojejunostomy, that can be semantically complex and hardly understandable by non-experts in medicine. Hence, it is important to assess which words are potentially non-understandable and require further explanations. The purpose of our work is to build specific lexicon in which the words are rated according to whether they are understandable or non-understandable. We propose to work with medical words in French such as provided by an international medical terminology. The terms are segmented in single words and then each word is manually processed by three annotators. The objective is to assign each word into one of the three categories: I can understand, I am not sure, I cannot understand. The annotators do not have medical training nor they present specific medical problems. They are supposed to represent an average patient. The inter-annotator agreement is then computed. The content of the categories is analyzed. Possible applications in which this lexicon can be helpful are proposed and discussed. The rated lexicon is freely available for the research purposes. It is accessible online at http://natalia.grabar.perso.sfr.fr/rated-lexicon.html L16-1420 @@ -4520,7 +4520,7 @@ <fixed-case>V</fixed-case>erb<fixed-case>L</fixed-case>ex<fixed-case>P</fixed-case>or: a lexical resource with semantic roles for <fixed-case>P</fixed-case>ortuguese LeonardoZilio - Maria José BocornyFinatto + Maria José BocornyFinatto AlineVillavicencio 2656–2661 This paper presents a lexical resource developed for Portuguese. The resource contains sentences annotated with semantic roles. The sentences were extracted from two domains: Cardiology research papers and newspaper articles. Both corpora were analyzed with the PALAVRAS parser and subsequently processed with a subcategorization frames extractor, so that each sentence that contained at least one main verb was stored in a database together with its syntactic organization. The annotation was manually carried out by a linguist using an annotation interface. Both the annotated and non-annotated data were exported to an XML format, which is readily available for download. The reason behind exporting non-annotated data is that there is syntactic information collected from the parser annotation in the non-annotated data, and this could be useful for other researchers. The sentences from both corpora were annotated separately, so that it is possible to access sentences either from the Cardiology or from the newspaper corpus. The full resource presents more than seven thousand semantically annotated sentences, containing 192 different verbs and more than 15 thousand individual arguments and adjuncts. @@ -4529,10 +4529,10 @@ A Multilingual Predicate Matrix - MaddalenLopez de Lacalle + MaddalenLopez de Lacalle EgoitzLaparra ItziarAldabe - GermanRigau + GermanRigau 2662–2668 This paper presents the Predicate Matrix 1.3, a lexical resource resulting from the integration of multiple sources of predicate information including FrameNet, VerbNet, PropBank and WordNet. This new version of the Predicate Matrix has been extended to cover nominal predicates by adding mappings to NomBank. Similarly, we have integrated resources in Spanish, Catalan and Basque. As a result, the Predicate Matrix 1.3 provides a multilingual lexicon to allow interoperable semantic analysis in multiple languages. L16-1423 @@ -4559,8 +4559,8 @@ Enriching a <fixed-case>P</fixed-case>ortuguese <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et using Synonyms from a Monolingual Dictionary AlbertoSimões - XavierGómez Guinovart - José JoãoAlmeida + XavierGómez Guinovart + José JoãoAlmeida 2682–2687 In this article we present an exploratory approach to enrich a WordNet-like lexical ontology with the synonyms present in a standard monolingual Portuguese dictionary. The dictionary was converted from PDF into XML and senses were automatically identified and annotated. This allowed us to extract them, independently of definitions, and to create sets of synonyms (synsets). These synsets were then aligned with WordNet synsets, both in the same language (Portuguese) and projecting the Portuguese terms into English, Spanish and Galician. This process allowed both the addition of new term variants to existing synsets, as to create new synsets for Portuguese. L16-1426 @@ -4589,7 +4589,7 @@ Aspect based Sentiment Analysis in <fixed-case>H</fixed-case>indi: Resource Creation and Evaluation Md ShadAkhtar AsifEkbal - PushpakBhattacharyya + PushpakBhattacharyya 2703–2709 Due to the phenomenal growth of online product reviews, sentiment analysis (SA) has gained huge attention, for example, by online service providers. A number of benchmark datasets for a wide range of domains have been made available for sentiment analysis, especially in resource-rich languages. In this paper we assess the challenges of SA in Hindi by providing a benchmark setup, where we create an annotated dataset of high quality, build machine learning models for sentiment analysis in order to show the effective usage of the dataset, and finally make the resource available to the community for further advancement of research. The dataset comprises of Hindi product reviews crawled from various online sources. Each sentence of the review is annotated with aspect term and its associated sentiment. As classification algorithms we use Conditional Random Filed (CRF) and Support Vector Machine (SVM) for aspect term extraction and sentiment analysis, respectively. Evaluation results show the average F-measure of 41.07% for aspect term extraction and accuracy of 54.05% for sentiment classification. L16-1429 @@ -4615,7 +4615,7 @@ Discourse Structure and Dialogue Acts in Multiparty Dialogue: the <fixed-case>STAC</fixed-case> Corpus - NicholasAsher + NicholasAsher JulieHunter MathieuMorey BenamaraFarah @@ -4630,7 +4630,7 @@ GuillaumeDubuisson Duplessis VincentLetard Anne-LaureLigozat - SophieRosset + SophieRosset 2728–2735 This paper presents an automatic corpus-based process to author an open-domain conversational strategy usable both in chatterbot systems and as a fallback strategy for out-of-domain human utterances. Our approach is implemented on a corpus of television drama subtitles. This system is used as a chatterbot system to collect a corpus of 41 open-domain textual dialogues with 27 human participants. The general capabilities of the system are studied through objective measures and subjective self-reports in terms of understandability, repetition and coherence of the system responses selected in reaction to human utterances. Subjective evaluations of the collected dialogues are presented with respect to amusement, engagement and enjoyability. The main factors influencing those dimensions in our chatterbot experiment are discussed. L16-1433 @@ -4648,7 +4648,7 @@ Towards Automatic Identification of Effective Clues for Team Word-Guessing Games EliPincus - DavidTraum + DavidTraum 2741–2747 Team word-guessing games where one player, the clue-giver, gives clues attempting to elicit a target-word from another player, the receiver, are a popular form of entertainment and also used for educational purposes. Creating an engaging computational agent capable of emulating a talented human clue-giver in a timed word-guessing game depends on the ability to provide effective clues (clues able to elicit a correct guess from a human receiver). There are many available web resources and databases that can be mined for the raw material for clues for target-words; however, a large number of those clues are unlikely to be able to elicit a correct guess from a human guesser. In this paper, we propose a method for automatically filtering a clue corpus for effective clues for an arbitrary target-word from a larger set of potential clues, using machine learning on a set of features of the clues, including point-wise mutual information between a clue’s constituent words and a clue’s target-word. The results of the experiments significantly improve the average clue quality over previous approaches, and bring quality rates in-line with measures of human clue quality derived from a corpus of human-human interactions. The paper also introduces the data used to develop this method; audio recordings of people making guesses after having heard the clues being spoken by a synthesized voice. L16-1435 @@ -4669,7 +4669,7 @@ Using Contextual Information for Machine Translation Evaluation MarinaFomicheva - NúriaBel + NúriaBel 2755–2761 Automatic evaluation of Machine Translation (MT) is typically approached by measuring similarity between the candidate MT and a human reference translation. An important limitation of existing evaluation systems is that they are unable to distinguish candidate-reference differences that arise due to acceptable linguistic variation from the differences induced by MT errors. In this paper we present a new metric, UPF-Cobalt, that addresses this issue by taking into consideration the syntactic contexts of candidate and reference words. The metric applies a penalty when the words are similar but the contexts in which they occur are not equivalent. In this way, Machine Translations (MTs) that are different from the human translation but still essentially correct are distinguished from those that share high number of words with the reference but alter the meaning of the sentence due to translation errors. The results show that the method proposed is indeed beneficial for automatic MT evaluation. We report experiments based on two different evaluation tasks with various types of manual quality assessment. The metric significantly outperforms state-of-the-art evaluation systems in varying evaluation settings. L16-1437 @@ -4680,8 +4680,8 @@ João AntónioRodrigues NunoRendeiro AndreiaQuerido - SanjaŠtajner - AntónioBranco + SanjaŠtajner + AntónioBranco 2762–2765 The usual concern when opting for a rule-based or a hybrid machine translation (MT) system is how much effort is required to adapt the system to a different language pair or a new domain. In this paper, we describe a way of adapting an existing hybrid MT system to a new language pair, and show that such a system can outperform a standard phrase-based statistical machine translation system with an average of 10 persons/month of work. This is specifically important in the case of domain-specific MT for which there is not enough parallel data for training a statistical machine translation system. L16-1438 @@ -4700,7 +4700,7 @@ TakuyaMatsuzaki AkiraFujita NaoyaTodo - Noriko H.Arai + Noriko H.Arai 2771–2776 This paper reports on an experiment where 795 human participants answered to the questions taken from second language proficiency tests that were translated to their native language. The output of three machine translation systems and two different human translations were used as the test material. We classified the translation errors in the questions according to an error taxonomy and analyzed the participants’ response on the basis of the type and frequency of the translation errors. Through the analysis, we identified several types of errors that deteriorated most the accuracy of the participants’ answers, their confidence on the answers, and their overall evaluation of the translation quality. L16-1440 @@ -4710,9 +4710,9 @@ Word Sense-Aware Machine Translation: Including Senses as Contextual Features for Improved Translation Models StevenNeale LuísGomes - EnekoAgirre - Oier Lopezde Lacalle - AntónioBranco + EnekoAgirre + Oier Lopezde Lacalle + AntónioBranco 2777–2783 Although it is commonly assumed that word sense disambiguation (WSD) should help to improve lexical choice and improve the quality of machine translation systems, how to successfully integrate word senses into such systems remains an unanswered question. Some successful approaches have involved reformulating either WSD or the word senses it produces, but work on using traditional word senses to improve machine translation have met with limited success. In this paper, we build upon previous work that experimented on including word senses as contextual features in maxent-based translation models. Training on a large, open-domain corpus (Europarl), we demonstrate that this aproach yields significant improvements in machine translation from English to Portuguese. L16-1441 @@ -4720,8 +4720,8 @@ <fixed-case>S</fixed-case>uper<fixed-case>CAT</fixed-case>: The (New and Improved) Corpus Analysis Toolkit - K. BretonnelCohen - William A.Baumgartner Jr. + K. BretonnelCohen + William A.Baumgartner Jr. IrinaTemnikova 2784–2788 This paper reports SuperCAT, a corpus analysis toolkit. It is a radical extension of SubCAT, the Sublanguage Corpus Analysis Toolkit, from sublanguage analysis to corpus analysis in general. The idea behind SuperCAT is that representative corpora have no tendency towards closure―that is, they tend towards infinity. In contrast, non-representative corpora have a tendency towards closure―roughly, finiteness. SuperCAT focuses on general techniques for the quantitative description of the characteristics of any corpus (or other language sample), particularly concerning the characteristics of lexical distributions. Additionally, SuperCAT features a complete re-engineering of the previous SubCAT architecture. @@ -4783,7 +4783,7 @@ Detecting Optional Arguments of Verbs AndrásKornai - Dávid MárkNemeskey + Dávid MárkNemeskey GáborRecski 2815–2818 We propose a novel method for detecting optional arguments of Hungarian verbs using only positive data. We introduce a custom variant of collexeme analysis that explicitly models the noise in verb frames. Our method is, for the most part, unsupervised: we use the spectral clustering algorithm described in Brew and Schulte in Walde (2002) to build a noise model from a short, manually verified seed list of verbs. We experimented with both raw count- and context-based clusterings and found their performance almost identical. The code for our algorithm and the frame list are freely available at http://hlt.bme.hu/en/resources/tade. @@ -4821,8 +4821,8 @@ Sentence Similarity based on Dependency Tree Kernels for Multi-document Summarization Şaziye BetülÖzateş - ArzucanÖzgür - DragomirRadev + ArzucanÖzgür + DragomirRadev 2833–2838 We introduce an approach based on using the dependency grammar representations of sentences to compute sentence similarity for extractive multi-document summarization. We adapt and investigate the effects of two untyped dependency tree kernels, which have originally been proposed for relation extraction, to the multi-document summarization problem. In addition, we propose a series of novel dependency grammar based kernels to better represent the syntactic and semantic similarities among the sentences. The proposed methods incorporate the type information of the dependency relations for sentence similarity calculation. To our knowledge, this is the first study that investigates using dependency tree based sentence similarity for multi-document summarization. L16-1452 @@ -4851,7 +4851,7 @@ Recent Advances in Development of a Lexicon-Grammar of <fixed-case>P</fixed-case>olish: <fixed-case>P</fixed-case>ol<fixed-case>N</fixed-case>et 3.0 ZygmuntVetulani - GrażynaVetulani + GrażynaVetulani BartłomiejKochanowski 2851–2854 The granularity of PolNet (Polish Wordnet) is the main theoretical issue discussed in the paper. We describe the latest extension of PolNet including valency information of simple verbs and noun-verb collocations using manual and machine-assisted methods. Valency is defined to include both semantic and syntactic selectional restrictions. We assume the valency structure of a verb to be an index of meaning. Consistently we consider it an attribute of a synset. Strict application of this principle results in fine granularity of the verb section of the wordnet. Considering valency as a distinctive feature of synsets was an essential step to transform the initial PolNet (first intended as a lexical ontology) into a lexicon-grammar. For the present refinement of PolNet we assume that the category of language register is a part of meaning. The totality of PolNet 2.0 synsets is being revised in order to split the PolNet 2.0 synsets that contain different register words into register-uniform sub-synsets. We completed this operation for synsets that were used as values of semantic roles. The operation augmented the number of considered synsets by 29%. In the paper we report an extension of the class of collocation-based verb synsets. @@ -4868,7 +4868,7 @@ Improving corpus search via parsing - NataliaKlyueva + NataliaKlyueva PavelStraňák 2862–2866 In this paper, we describe an addition to the corpus query system Kontext that enables to enhance the search using syntactic attributes in addition to the existing features, mainly lemmas and morphological categories. We present the enhancements of the corpus query system itself, the attributes we use to represent syntactic structures in data, and some examples of querying the syntactically annotated corpora, such as treebanks in various languages as well as an automatically parsed large corpus. @@ -4890,7 +4890,7 @@ A <fixed-case>H</fixed-case>ungarian Sentiment Corpus Manually Annotated at Aspect Level Martina KatalinSzabó VeronikaVincze - Katalin IlonaSimkó + Katalin IlonaSimkó ViktorVarga ViktorHangya 2873–2878 @@ -4918,7 +4918,7 @@ Annotating Sentiment and Irony in the Online <fixed-case>I</fixed-case>talian Political Debate on #labuonascuola - MarcoStranisci + MarcoStranisci CristinaBosco Delia IrazúHernández Farías VivianaPatti @@ -4929,7 +4929,7 @@ <fixed-case>N</fixed-case>ile<fixed-case>UL</fixed-case>ex: A Phrase and Word Level Sentiment Lexicon for <fixed-case>E</fixed-case>gyptian and <fixed-case>M</fixed-case>odern <fixed-case>S</fixed-case>tandard <fixed-case>A</fixed-case>rabic - Samhaa R.El-Beltagy + Samhaa R.El-Beltagy 2900–2905 This paper presents NileULex, which is an Arabic sentiment lexicon containing close to six thousands Arabic words and compound phrases. Forty five percent of the terms and expressions in the lexicon are Egyptian or colloquial while fifty five percent are Modern Standard Arabic. While the collection of many of the terms included in the lexicon was done automatically, the actual addition of any term was done manually. One of the important criterions for adding terms to the lexicon, was that they be as unambiguous as possible. The result is a lexicon with a much higher quality than any translated variant or automatically constructed one. To demonstrate that a lexicon such as this can directly impact the task of sentiment analysis, a very basic machine learning based sentiment analyser that uses unigrams, bigrams, and lexicon based features was applied on two different Twitter datasets. The obtained results were compared to a baseline system that only uses unigrams and bigrams. The same lexicon based features were also generated using a publicly available translation of a popular sentiment lexicon. The experiments show that usage of the developed lexicon improves the results over both the baseline and the publicly available lexicon. L16-1463 @@ -4945,8 +4945,8 @@ Rude waiter but mouthwatering pastries! An exploratory study into <fixed-case>D</fixed-case>utch Aspect-Based Sentiment Analysis - OrphéeDe Clercq - VéroniqueHoste + OrphéeDe Clercq + VéroniqueHoste 2910–2917 The fine-grained task of automatically detecting all sentiment expressions within a given document and the aspects to which they refer is known as aspect-based sentiment analysis. In this paper we present the first full aspect-based sentiment analysis pipeline for Dutch and apply it to customer reviews. To this purpose, we collected reviews from two different domains, i.e. restaurant and smartphone reviews. Both corpora have been manually annotated using newly developed guidelines that comply to standard practices in the field. For our experimental pipeline we perceive aspect-based sentiment analysis as a task consisting of three main subtasks which have to be tackled incrementally: aspect term extraction, aspect category classification and polarity classification. First experiments on our Dutch restaurant corpus reveal that this is indeed a feasible approach that yields promising results. L16-1465 @@ -4955,7 +4955,7 @@ Building A Case-based Semantic <fixed-case>E</fixed-case>nglish-<fixed-case>C</fixed-case>hinese Parallel Treebank HuaxingShi - TiejunZhao + TiejunZhao Keh-YihSu 2918–2924 We construct a case-based English-to-Chinese semantic constituent parallel Treebank for a Statistical Machine Translation (SMT) task by labelling each node of the Deep Syntactic Tree (DST) with our refined semantic cases. Since subtree span-crossing is harmful in tree-based SMT, DST is adopted to alleviate this problem. At the same time, we tailor an existing case set to represent bilingual shallow semantic relations more precisely. This Treebank is a part of a semantic corpus building project, which aims to build a semantic bilingual corpus annotated with syntactic, semantic cases and word senses. Data in our Treebank is from the news domain of Datum corpus. 4,000 sentence pairs are selected to cover various lexicons and part-of-speech (POS) n-gram patterns as much as possible. This paper presents the construction of this case Treebank. Also, we have tested the effect of adopting DST structure in alleviating subtree span-crossing. Our preliminary analysis shows that the compatibility between Chinese and English trees can be significantly increased by transforming the parse-tree into the DST. Furthermore, the human agreement rate in annotation is found to be acceptable (90% in English nodes, 75% in Chinese nodes). @@ -4967,7 +4967,7 @@ XuansongLi JenniferTracey StephenGrimes - StephanieStrassel + StephanieStrassel 2925–2930 Morphologically-rich languages pose problems for machine translation (MT) systems, including word-alignment errors, data sparsity and multiple affixes. Current alignment models at word-level do not distinguish words and morphemes, thus yielding low-quality alignment and subsequently affecting end translation quality. Models using morpheme-level alignment can reduce the vocabulary size of morphologically-rich languages and overcomes data sparsity. The alignment data based on smallest units reveals subtle language features and enhances translation quality. Recent research proves such morpheme-level alignment (MA) data to be valuable linguistic resources for SMT, particularly for languages with rich morphology. In support of this research trend, the Linguistic Data Consortium (LDC) created Uzbek-English and Turkish-English alignment data which are manually aligned at the morpheme level. This paper describes the creation of MA corpora, including alignment and tagging process and approaches, highlighting annotation challenges and specific features of languages with rich morphology. The light tagging annotation on the alignment layer adds extra value to the MA data, facilitating users in flexibly tailoring the data for various MT model training. L16-1467 @@ -4986,11 +4986,11 @@ <fixed-case>T</fixed-case>weet<fixed-case>MT</fixed-case>: A Parallel Microblog Corpus Iñaki SanVicente - IñakiAlegría + IñakiAlegría CristinaEspaña-Bonet PabloGamallo - Hugo GonçaloOliveira - Eva MartínezGarcia + Hugo GonçaloOliveira + Eva MartínezGarcia AntonioToral ArkaitzZubiaga NoraAranberri @@ -5002,8 +5002,8 @@ The Scielo Corpus: a Parallel Corpus of Scientific Publications for Biomedicine MarianaNeves - Antonio JimenoYepes - AurélieNévéol + Antonio JimenoYepes + AurélieNévéol 2942–2948 The biomedical scientific literature is a rich source of information not only in the English language, for which it is more abundant, but also in other languages, such as Portuguese, Spanish and French. We present the first freely available parallel corpus of scientific publications for the biomedical domain. Documents from the ”Biological Sciences” and ”Health Sciences” categories were retrieved from the Scielo database and parallel titles and abstracts are available for the following language pairs: Portuguese/English (about 86,000 documents in total), Spanish/English (about 95,000 documents) and French/English (about 2,000 documents). Additionally, monolingual data was also collected for all four languages. Sentences in the parallel corpus were automatically aligned and a manual analysis of 200 documents by native experts found that a minimum of 79% of sentences were correctly aligned in all language pairs. We demonstrate the utility of the corpus by running baseline machine translation experiments. We show that for all language pairs, a statistical machine translation system trained on the parallel corpora achieves performance that rivals or exceeds the state of the art in the biomedical domain. Furthermore, the corpora are currently being used in the biomedical task in the First Conference on Machine Translation (WMT’16). L16-1470 @@ -5012,9 +5012,9 @@ Producing Monolingual and Parallel Web Corpora at the Same Time - <fixed-case>S</fixed-case>pider<fixed-case>L</fixed-case>ing and Bitextor’s Love Affair NikolaLjubešić - MiquelEsplà-Gomis + MiquelEsplà-Gomis AntonioToral - Sergio OrtizRojas + Sergio OrtizRojas FilipKlubička 2949–2956 This paper presents an approach for building large monolingual corpora and, at the same time, extracting parallel data by crawling the top-level domain of a given language of interest. For gathering linguistically relevant data from top-level domains we use the SpiderLing crawler, modified to crawl data written in multiple languages. The output of this process is then fed to Bitextor, a tool for harvesting parallel data from a collection of documents. We call the system combining these two tools Spidextor, a blend of the names of its two crucial parts. We evaluate the described approach intrinsically by measuring the accuracy of the extracted bitexts from the Croatian top-level domain “.hr” and the Slovene top-level domain “.si”, and extrinsically on the English-Croatian language pair by comparing an SMT system built from the crawled data with third-party systems. We finally present parallel datasets collected with our approach for the English-Croatian, English-Finnish, English-Serbian and English-Slovene language pairs. @@ -5037,7 +5037,7 @@ Can Tweets Predict <fixed-case>TV</fixed-case> Ratings? BridgetSommerdijk EricSanders - Antalvan den Bosch + Antalvan den Bosch 2965–2970 We set out to investigate whether TV ratings and mentions of TV programmes on the Twitter social media platform are correlated. If such a correlation exists, Twitter may be used as an alternative source for estimating viewer popularity. Moreover, the Twitter-based rating estimates may be generated during the programme, or even before. We count the occurrences of programme-specific hashtags in an archive of Dutch tweets of eleven popular TV shows broadcast in the Netherlands in one season, and perform correlation tests. Overall we find a strong correlation of 0.82; the correlation remains strong, 0.79, if tweets are counted a half hour before broadcast time. However, the two most popular TV shows account for most of the positive effect; if we leave out the single and second most popular TV shows, the correlation drops to being moderate to weak. Also, within a TV show, correlations between ratings and tweet counts are mostly weak, while correlations between TV ratings of the previous and next shows are strong. In absence of information on previous shows, Twitter-based counts may be a viable alternative to classic estimation methods for TV ratings. Estimates are more reliable with more popular TV shows. L16-1473 @@ -5059,7 +5059,7 @@ Corpus for Customer Purchase Behavior Prediction in Social Media ShigeyukiSakaki - FrancineChen + FrancineChen MandyKorpusik Yan-YingChen 2976–2980 @@ -5070,7 +5070,7 @@ Segmenting Hashtags using Automatically Created Training Data ArdaÇelebi - ArzucanÖzgür + ArzucanÖzgür 2981–2985 Hashtags, which are commonly composed of multiple words, are increasingly used to convey the actual messages in tweets. Understanding what tweets are saying is getting more dependent on understanding hashtags. Therefore, identifying the individual words that constitute a hashtag is an important, yet a challenging task due to the abrupt nature of the language used in tweets. In this study, we introduce a feature-rich approach based on using supervised machine learning methods to segment hashtags. Our approach is unsupervised in the sense that instead of using manually segmented hashtags for training the machine learning classifiers, we automatically create our training data by using tweets as well as by automatically extracting hashtag segmentations from a large corpus. We achieve promising results with such automatically created noisy training data. L16-1476 @@ -5079,7 +5079,7 @@ Exploring Language Variation Across <fixed-case>E</fixed-case>urope - A Web-based Tool for Computational Sociolinguistics DirkHovy - AndersJohannsen + AndersJohannsen 2986–2989 Language varies not only between countries, but also along regional and socio-demographic lines. This variation is one of the driving factors behind language change. However, investigating language variation is a complex undertaking: the more factors we want to consider, the more data we need. Traditional qualitative methods are not well-suited to do this, an therefore restricted to isolated factors. This reduction limits the potential insights, and risks attributing undue importance to easily observed factors. While there is a large interest in linguistics to increase the quantitative aspect of such studies, it requires training in both variational linguistics and computational methods, a combination that is still not common. We take a first step here to alleviating the problem by providing an interface, www.languagevariation.com, to explore large-scale language variation along multiple socio-demographic factors – without programming knowledge. It makes use of large amounts of data and provides statistical analyses, maps, and interactive features that will enable scholars to explore language variation in a data-driven way. L16-1477 @@ -5140,14 +5140,14 @@ <fixed-case>QTL</fixed-case>eap <fixed-case>WSD</fixed-case>/<fixed-case>NED</fixed-case> Corpora: Semantic Annotation of Parallel Corpora in Six Languages ArantxaOtegi NoraAranberri - AntonioBranco - JanHajič + AntonioBranco + JanHajič MartinPopel - KirilSimov - EnekoAgirre + KirilSimov + EnekoAgirre PetyaOsenova RitaPereira - JoãoSilva + JoãoSilva StevenNeale 3023–3030 This work presents parallel corpora automatically annotated with several NLP tools, including lemma and part-of-speech tagging, named-entity recognition and classification, named-entity disambiguation, word-sense disambiguation, and coreference. The corpora comprise both the well-known Europarl corpus and a domain-specific question-answer troubleshooting corpus on the IT domain. English is common in all parallel corpora, with translations in five languages, namely, Basque, Bulgarian, Czech, Portuguese and Spanish. We describe the annotated corpora and the tools used for annotation, as well as annotation statistics for each language. These new resources are freely available and will help research on semantic processing for machine translation and cross-lingual transfer. @@ -5172,7 +5172,7 @@ JayaSaraswati LaxmiKashyap DhirendraSingh - PushpakBhattacharyya + PushpakBhattacharyya 3039–3043 Word Sense Disambiguation (WSD) is one of the open problems in the area of natural language processing. Various supervised, unsupervised and knowledge based approaches have been proposed for automatically determining the sense of a word in a particular context. It has been observed that such approaches often find it difficult to beat the WordNet First Sense (WFS) baseline which assigns the sense irrespective of context. In this paper, we present our work on creating the WFS baseline for Hindi language by manually ranking the synsets of Hindi WordNet. A ranking tool is developed where human experts can see the frequency of the word senses in the sense-tagged corpora and have been asked to rank the senses of a word by using this information and also his/her intuition. The accuracy of WFS baseline is tested on several standard datasets. F-score is found to be 60%, 65% and 55% on Health, Tourism and News datasets respectively. The created rankings can also be used in other NLP applications viz., Machine Translation, Information Retrieval, Text Summarization, etc. L16-1485 @@ -5189,12 +5189,12 @@ Impact of Automatic Segmentation on the Quality, Productivity and Self-reported Post-editing Effort of Intralingual Subtitles - AitorÁlvarez + AitorÁlvarez MarinaBalenciaga Arantzadel Pozo HaritzArzelus AnnaMatamala - Carlos-D.Martínez-Hinarejos + Carlos-D.Martínez-Hinarejos 3049–3053 This paper describes the evaluation methodology followed to measure the impact of using a machine learning algorithm to automatically segment intralingual subtitles. The segmentation quality, productivity and self-reported post-editing effort achieved with such approach are shown to improve those obtained by the technique based in counting characters, mainly employed for automatic subtitle segmentation currently. The corpus used to train and test the proposed automated segmentation method is also described and shared with the community, in order to foster further research in this area. L16-1487 @@ -5212,7 +5212,7 @@ Cross-validating Image Description Datasets and Evaluation Metrics JosiahWang - RobertGaizauskas + RobertGaizauskas 3059–3066 The task of automatically generating sentential descriptions of image content has become increasingly popular in recent years, resulting in the development of large-scale image description datasets and the proposal of various metrics for evaluating image description generation systems. However, not much work has been done to analyse and understand both datasets and the metrics. In this paper, we propose using a leave-one-out cross validation (LOOCV) process as a means to analyse multiply annotated, human-authored image description datasets and the various evaluation metrics, i.e. evaluating one image description against other human-authored descriptions of the same image. Such an evaluation process affords various insights into the image description datasets and evaluation metrics, such as the variations of image descriptions within and across datasets and also what the metrics capture. We compute and analyse (i) human upper-bound performance; (ii) ranked correlation between metric pairs across datasets; (iii) lower-bound performance by comparing a set of descriptions describing one image to another sentence not describing that image. Interesting observations are made about the evaluation metrics and image description datasets, and we conclude that such cross-validation methods are extremely useful for assessing and gaining insights into image description datasets and evaluation metrics for image descriptions. L16-1489 @@ -5230,7 +5230,7 @@ Benchmarking Lexical Simplification Systems - GustavoPaetzold + GustavoPaetzold LuciaSpecia 3074–3080 Lexical Simplification is the task of replacing complex words in a text with simpler alternatives. A variety of strategies have been devised for this challenge, yet there has been little effort in comparing their performance. In this contribution, we present a benchmarking of several Lexical Simplification systems. By combining resources created in previous work with automatic spelling and inflection correction techniques, we introduce BenchLS: a new evaluation dataset for the task. Using BenchLS, we evaluate the performance of solutions for various steps in the typical Lexical Simplification pipeline, both individually and jointly. This is the first time Lexical Simplification systems are compared in such fashion on the same data, and the findings introduce many contributions to the field, revealing several interesting properties of the systems evaluated. @@ -5250,9 +5250,9 @@ Extractive Summarization under Strict Length Constraints YasharMehdad - AmandaStent + AmandaStent KapilThadani - DragomirRadev + DragomirRadev YoussefBillawala KarolinaBuchner 3089–3093 @@ -5263,13 +5263,13 @@ What’s the Issue Here?: Task-based Evaluation of Reader Comment Summarization Systems EmmaBarker - MonicaParamita + MonicaParamita AdamFunk - EminaKurtic + EminaKurtic AhmetAker JonathanFoster MarkHepple - RobertGaizauskas + RobertGaizauskas 3094–3101 Automatic summarization of reader comments in on-line news is an extremely challenging task and a capability for which there is a clear need. Work to date has focussed on producing extractive summaries using well-known techniques imported from other areas of language processing. But are extractive summaries of comments what users really want? Do they support users in performing the sorts of tasks they are likely to want to perform with reader comments? In this paper we address these questions by doing three things. First, we offer a specification of one possible summary type for reader comment, based on an analysis of reader comment in terms of issues and viewpoints. Second, we define a task-based evaluation framework for reader comment summarization that allows summarization systems to be assessed in terms of how well they support users in a time-limited task of identifying issues and characterising opinion on issues in comments. Third, we describe a pilot evaluation in which we used the task-based evaluation framework to evaluate a prototype reader comment clustering and summarization system, demonstrating the viability of the evaluation framework and illustrating the sorts of insight such an evaluation affords. L16-1494 @@ -5287,7 +5287,7 @@ Bilingual Lexicon Extraction at the Morpheme Level Using Distributional Analysis AmirHazem - BéatriceDaille + BéatriceDaille 3110–3115 Bilingual lexicon extraction from comparable corpora is usually based on distributional methods when dealing with single word terms (SWT). These methods often treat SWT as single tokens without considering their compositional property. However, many SWT are compositional (composed of roots and affixes) and this information, if taken into account can be very useful to match translational pairs, especially for infrequent terms where distributional methods often fail. For instance, the English compound xenograft which is composed of the root xeno and the lexeme graft can be translated into French compositionally by aligning each of its elements (xeno with xéno and graft with greffe) resulting in the translation: xénogreffe. In this paper, we experiment several distributional modellings at the morpheme level that we apply to perform compositional translation to a subset of French and English compounds. We show promising results using distributional analysis at the root and affix levels. We also show that the adapted approach significantly improve bilingual lexicon extraction from comparable corpora compared to the approach at the word level. L16-1496 @@ -5321,7 +5321,7 @@ ZhenhaoHua YulianTamres-Rudnicky ArnabDash - AlexanderRudnicky + AlexanderRudnicky 3127–3132 Users will interact with an individual app on smart devices (e.g., phone, TV, car) to fulfill a specific goal (e.g. find a photographer), but users may also pursue more complex tasks that will span multiple domains and apps (e.g. plan a wedding ceremony). Planning and executing such multi-app tasks are typically managed by users, considering the required global context awareness. To investigate how users arrange domains/apps to fulfill complex tasks in their daily life, we conducted a user study on 14 participants to collect such data from their Android smart phones. This document 1) summarizes the techniques used in the data collection and 2) provides a brief statistical description of the data. This data guilds the future direction for researchers in the fields of conversational agent and personal assistant, etc. This data is available at http://AppDialogue.com. L16-1499 @@ -5364,7 +5364,7 @@ The <fixed-case>D</fixed-case>ialog<fixed-case>B</fixed-case>ank - HarryBunt + HarryBunt VolhaPetukhova AndreiMalchanau KarsWijnhoven @@ -5377,8 +5377,8 @@ Coordinating Communication in the Wild: The Artwalk Dialogue Corpus of Pedestrian Navigation and Mobile Referential Communication KrisLiu - JeanFox Tree - MarilynWalker + JeanFox Tree + MarilynWalker 3159–3166 The Artwalk Corpus is a collection of 48 mobile phone conversations between 24 pairs of friends and 24 pairs of strangers performing a novel, naturalistically-situated referential communication task. This task produced dialogues which, on average, are just under 40 minutes. The task requires the identification of public art while walking around and navigating pedestrian routes in the downtown area of Santa Cruz, California. The task involves a Director on the UCSC campus with access to maps providing verbal instructions to a Follower executing the task. The task provides a setting for real-world situated dialogic language and is designed to: (1) elicit entrainment and coordination of referring expressions between the dialogue participants, (2) examine the effect of friendship on dialogue strategies, and (3) examine how the need to complete the task while negotiating myriad, unanticipated events in the real world ― such as avoiding cars and other pedestrians ― affects linguistic coordination and other dialogue behaviors. Previous work on entrainment and coordinating communication has primarily focused on similar tasks in laboratory settings where there are no interruptions and no need to navigate from one point to another in a complex space. The corpus provides a general resource for studies on how coordinated task-oriented dialogue changes when we move outside the laboratory and into the world. It can also be used for studies of entrainment in dialogue, and the form and style of pedestrian instruction dialogues, as well as the effect of friendship on dialogic behaviors. L16-1504 @@ -5386,10 +5386,10 @@ Managing Linguistic and Terminological Variation in a Medical Dialogue System - Leonardo CampillosLlanos + Leonardo CampillosLlanos DhouhaBouamor - PierreZweigenbaum - SophieRosset + PierreZweigenbaum + SophieRosset 3167–3173 We introduce a dialogue task between a virtual patient and a doctor where the dialogue system, playing the patient part in a simulated consultation, must reconcile a specialized level, to understand what the doctor says, and a lay level, to output realistic patient-language utterances. This increases the challenges in the analysis and generation phases of the dialogue. This paper proposes methods to manage linguistic and terminological variation in that situation and illustrates how they help produce realistic dialogues. Our system makes use of lexical resources for processing synonyms, inflectional and derivational variants, or pronoun/verb agreement. In addition, specialized knowledge is used for processing medical roots and affixes, ontological relations and concept mapping, and for generating lay variants of terms according to the patient’s non-expert discourse. We also report the results of a first evaluation carried out by 11 users interacting with the system. We evaluated the non-contextual analysis module, which supports the Spoken Language Understanding step. The annotation of task domain entities obtained 91.8% of Precision, 82.5% of Recall, 86.9% of F-measure, 19.0% of Slot Error Rate, and 32.9% of Sentence Error Rate. L16-1505 @@ -5400,7 +5400,7 @@ AjdaGokcen EvanJaffe JohnseyErdmann - MichaelWhite + MichaelWhite DouglasDanforth 3174–3179 We present a corpus of virtual patient dialogues to which we have added manually annotated gold standard word alignments. Since each question asked by a medical student in the dialogues is mapped to a canonical, anticipated version of the question, the corpus implicitly defines a large set of paraphrase (and non-paraphrase) pairs. We also present a novel process for selecting the most useful data to annotate with word alignments and for ensuring consistent paraphrase status decisions. In support of this process, we have enhanced the earlier Edinburgh alignment tool (Cohn et al., 2008) and revised and extended the Edinburgh guidelines, in particular adding guidance intended to ensure that the word alignments are consistent with the overall paraphrase status decision. The finished corpus and the enhanced alignment tool are made freely available. @@ -5409,7 +5409,7 @@ A <fixed-case>CUP</fixed-case> of <fixed-case>C</fixed-case>o<fixed-case>F</fixed-case>ee: A large Collection of feedback Utterances Provided with communicative function annotations - LaurentPrévot + LaurentPrévot JanGorisch RoxaneBertrand 3180–3185 @@ -5421,7 +5421,7 @@ <fixed-case>P</fixed-case>alabras: Crowdsourcing Transcriptions of <fixed-case>L</fixed-case>2 Speech EricSanders PepiBurgos - CatiaCucchiarini + CatiaCucchiarini Roelandvan Hout 3186–3191 We developed a web application for crowdsourcing transcriptions of Dutch words spoken by Spanish L2 learners. In this paper we discuss the design of the application and the influence of metadata and various forms of feedback. Useful data were obtained from 159 participants, with an average of over 20 transcriptions per item, which seems a satisfactory result for this type of research. Informing participants about how many items they still had to complete, and not how many they had already completed, turned to be an incentive to do more items. Assigning participants a score for their performance made it more attractive for them to carry out the transcription task, but this seemed to influence their performance. We discuss possible advantages and disadvantages in connection with the aim of the research and consider possible lessons for designing future experiments. @@ -5430,7 +5430,7 @@ The <fixed-case>U</fixed-case>ppsala Corpus of Student Writings: Corpus Creation, Annotation, and Analysis - BeátaMegyesi + BeátaMegyesi JesperNäsman AnnePalmér 3192–3199 @@ -5460,7 +5460,7 @@ <fixed-case>F</fixed-case>rench Learners Audio Corpus of <fixed-case>G</fixed-case>erman Speech (<fixed-case>FLACGS</fixed-case>) JaneWottawa - MartineAdda-Decker + MartineAdda-Decker 3215–3219 The French Learners Audio Corpus of German Speech (FLACGS) was created to compare German speech production of German native speakers (GG) and French learners of German (FG) across three speech production tasks of increasing production complexity: repetition, reading and picture description. 40 speakers, 20 GG and 20 FG performed each of the three tasks, which in total leads to approximately 7h of speech. The corpus was manually transcribed and automatically aligned. Analysis that can be performed on this type of corpus are for instance segmental differences in the speech production of L2 learners compared to native speakers. We chose the realization of the velar nasal consonant engma. In spoken French, engma does not appear in a VCV context which leads to production difficulties in FG. With increasing speech production complexity (reading and picture description), engma is realized as engma + plosive by FG in over 50% of the cases. The results of a two way ANOVA with unequal sample sizes on the durations of the different realizations of engma indicate that duration is a reliable factor to distinguish between engma and engma + plosive in FG productions compared to the engma productions in GG in a VCV context. The FLACGS corpus allows to study L2 production and perception. L16-1512 @@ -5481,7 +5481,7 @@ IsabellHubert AnttiArppe JordanLachler - Eddie A.Santos + Eddie A.Santos 3227–3234 We are presenting our work on the creation of the first optical character recognition (OCR) model for Northern Haida, also known as Masset or Xaad Kil, a nearly extinct First Nations language spoken in the Haida Gwaii archipelago in British Columbia, Canada. We are addressing the challenges of training an OCR model for a language with an extensive, non-standard Latin character set as follows: (1) We have compared various training approaches and present the results of practical analyses to maximize recognition accuracy and minimize manual labor. An approach using just one or two pages of Source Images directly performed better than the Image Generation approach, and better than models based on three or more pages. Analyses also suggest that a character’s frequency is directly correlated with its recognition accuracy. (2) We present an overview of current OCR accuracy analysis tools available. (3) We have ported the once de-facto standardized OCR accuracy tools to be able to cope with Unicode input. Our work adds to a growing body of research on OCR for particularly challenging character sets, and contributes to creating the largest electronic corpus for this severely endangered language. L16-1514 @@ -5505,7 +5505,7 @@ Curation of <fixed-case>D</fixed-case>utch Regional Dictionaries - Henkvan den Heuvel + Henkvan den Heuvel EricSanders Nicolinevan der Sijs 3249–3255 @@ -5519,7 +5519,7 @@ IreneRusso ValeriaQuochi DavythHicks - AnttonGurrutxaga + AnttonGurrutxaga AnneliSarhimaa MattiTuomisto 3256–3260 @@ -5542,7 +5542,7 @@ MartijnWieling EvaSassolini SebastianaCucurullo - SimonettaMontemagni + SimonettaMontemagni 3265–3272 In this paper, we illustrate the integration of an online dialectometric tool, Gabmap, together with an online dialect atlas, the Atlante Lessicale Toscano (ALT-Web). By using a newly created url-based interface to Gabmap, ALT-Web is able to take advantage of the sophisticated dialect visualization and exploration options incorporated in Gabmap. For example, distribution maps showing the distribution in the Tuscan dialect area of a specific dialectal form (selected via the ALT-Web website) are easily obtainable. Furthermore, the complete ALT-Web dataset as well as subsets of the data (selected via the ALT-Web website) can be automatically uploaded and explored in Gabmap. By combining these two online applications, macro- and micro-analyses of dialectal data (respectively offered by Gabmap and ALT-Web) are effectively and dynamically combined. L16-1520 @@ -5550,7 +5550,7 @@ <fixed-case>LORELEI</fixed-case> Language Packs: Data, Tools, and Resources for Technology Development in Low Resource Languages - StephanieStrassel + StephanieStrassel JenniferTracey 3273–3280 In this paper, we describe the textual linguistic resources in nearly 3 dozen languages being produced by Linguistic Data Consortium for DARPA’s LORELEI (Low Resource Languages for Emergent Incidents) Program. The goal of LORELEI is to improve the performance of human language technologies for low-resource languages and enable rapid re-training of such technologies for new languages, with a focus on the use case of deployment of resources in sudden emergencies such as natural disasters. Representative languages have been selected to provide broad typological coverage for training, and surprise incident languages for testing will be selected over the course of the program. Our approach treats the full set of language packs as a coherent whole, maintaining LORELEI-wide specifications, tagsets, and guidelines, while allowing for adaptation to the specific needs created by each language. Each representative language corpus, therefore, both stands on its own as a resource for the specific language and forms part of a large multilingual resource for broader cross-language technology development. @@ -5559,8 +5559,8 @@ A Computational Perspective on the <fixed-case>R</fixed-case>omanian Dialects - Alina MariaCiobanu - Liviu P.Dinu + Alina MariaCiobanu + Liviu P.Dinu 3281–3285 In this paper we conduct an initial study on the dialects of Romanian. We analyze the differences between Romanian and its dialects using the Swadesh list. We analyze the predictive power of the orthographic and phonetic features of the words, building a classification problem for dialect identification. L16-1522 @@ -5589,7 +5589,7 @@ <fixed-case>WTF</fixed-case>-<fixed-case>LOD</fixed-case> - A New Resource for Large-Scale <fixed-case>NER</fixed-case> Evaluation LubomirOtrusina - PavelSmrz + PavelSmrz 3299–3302 This paper introduces the Web TextFull linkage to Linked Open Data (WTF-LOD) dataset intended for large-scale evaluation of named entity recognition (NER) systems. First, we present the process of collecting data from the largest publically-available textual corpora, including Wikipedia dumps, monthly runs of the CommonCrawl, and ClueWeb09/12. We discuss similarities and differences of related initiatives such as WikiLinks and WikiReverse. Our work primarily focuses on links from “textfull” documents (links surrounded by a text that provides a useful context for entity linking), de-duplication of the data and advanced cleaning procedures. Presented statistics demonstrate that the collected data forms one of the largest available resource of its kind. They also prove suitability of the result for complex NER evaluation campaigns, including an analysis of the most ambiguous name mentions appearing in the data. L16-1525 @@ -5621,7 +5621,7 @@ <fixed-case>ELMD</fixed-case>: An Automatically Generated Entity Linking Gold Standard Dataset in the Music Domain SergioOramas - Luis EspinosaAnke + Luis EspinosaAnke MohamedSordo HoracioSaggion XavierSerra @@ -5633,10 +5633,10 @@ Bridge-Language Capitalization Inference in <fixed-case>W</fixed-case>estern <fixed-case>I</fixed-case>ranian: <fixed-case>S</fixed-case>orani, <fixed-case>K</fixed-case>urmanji, Zazaki, and <fixed-case>T</fixed-case>ajik PatrickLittell - David R.Mortensen + David R.Mortensen KartikGoyal - ChrisDyer - LoriLevin + ChrisDyer + LoriLevin 3318–3324 In Sorani Kurdish, one of the most useful orthographic features in named-entity recognition – capitalization – is absent, as the language’s Perso-Arabic script does not make a distinction between uppercase and lowercase letters. We describe a system for deriving an inferred capitalization value from closely related languages by phonological similarity, and illustrate the system using several related Western Iranian languages. L16-1529 @@ -5658,7 +5658,7 @@ A Regional News Corpora for Contextualized Entity Discovery and Linking - AdrianBraşoveanu + AdrianBraşoveanu Lyndon J.B.Nixon AlbertWeichselbraun ArnoScharl @@ -5689,7 +5689,7 @@ Named Entity Resources - Overview and Outlook MaudEhrmann DamienNouvel - SophieRosset + SophieRosset 3349–3356 Recognition of real-world entities is crucial for most NLP applications. Since its introduction some twenty years ago, named entity processing has undergone a significant evolution with, among others, the definition of new tasks (e.g. entity linking) and the emergence of new types of data (e.g. speech transcriptions, micro-blogging). These pose certainly new challenges which affect not only methods and algorithms but especially linguistic resources. Where do we stand with respect to named entity resources? This paper aims at providing a systematic overview of named entity resources, accounting for qualities such as multilingualism, dynamicity and interoperability, and to identify shortfalls in order to guide future developments. L16-1534 @@ -5705,9 +5705,9 @@ Using Word Embeddings to Translate Named Entities - Octavia-MariaŞulea + Octavia-MariaŞulea SergiuNisioi - Liviu P.Dinu + Liviu P.Dinu 3362–3366 In this paper we investigate the usefulness of neural word embeddings in the process of translating Named Entities (NEs) from a resource-rich language to a language low on resources relevant to the task at hand, introducing a novel, yet simple way of obtaining bilingual word vectors. Inspired by observations in (Mikolov et al., 2013b), which show that training their word vector model on comparable corpora yields comparable vector space representations of those corpora, reducing the problem of translating words to finding a rotation matrix, and results in (Zou et al., 2013), which showed that bilingual word embeddings can improve Chinese Named Entity Recognition (NER) and English to Chinese phrase translation, we use the sentence-aligned English-French EuroParl corpora and show that word embeddings extracted from a merged corpus (corpus resulted from the merger of the two aligned corpora) can be used to NE translation. We extrapolate that word embeddings trained on merged parallel corpora are useful in Named Entity Recognition and Translation tasks for resource-poor languages. L16-1536 @@ -5747,7 +5747,7 @@ Can Topic Modelling benefit from Word Sense Information? AdrianaFerrugento - Hugo GonçaloOliveira + Hugo GonçaloOliveira AnaAlves FilipeRodrigues 3387–3393 @@ -5808,7 +5808,7 @@ Annotating and Detecting Medical Events in Clinical Notes PrescottKlassen FeiXia - MelihaYetisgen + MelihaYetisgen 3417–3421 Early detection and treatment of diseases that onset after a patient is admitted to a hospital, such as pneumonia, is critical to improving and reducing costs in healthcare. Previous studies (Tepper et al., 2013) showed that change-of-state events in clinical notes could be important cues for phenotype detection. In this paper, we extend the annotation schema proposed in (Klassen et al., 2014) to mark change-of-state events, diagnosis events, coordination, and negation. After we have completed the annotation, we build NLP systems to automatically identify named entities and medical events, which yield an f-score of 94.7% and 91.8%, respectively. L16-1545 @@ -5817,7 +5817,7 @@ Speech Synthesis of Code-Mixed Text SunayanaSitaram - Alan WBlack + Alan WBlack 3422–3428 Most Text to Speech (TTS) systems today assume that the input text is in a single language and is written in the same language that the text needs to be synthesized in. However, in bilingual and multilingual communities, code mixing or code switching occurs in speech, in which speakers switch between languages in the same utterance. Due to the popularity of social media, we now see code-mixing even in text in these multilingual communities. TTS systems capable of synthesizing such text need to be able to handle text that is written in multiple languages and scripts. Code-mixed text poses many challenges to TTS systems, such as language identification, spelling normalization and pronunciation modeling. In this work, we describe a preliminary framework for synthesizing code-mixed text. We carry out experiments on synthesizing code-mixed Hindi and English text. We find that there is a significant user preference for TTS systems that can correctly identify and pronounce words in different languages. L16-1546 @@ -5855,7 +5855,7 @@ Kai FredericEngelmann FlorianLier SimonSchulz - PhilippCimiano + PhilippCimiano FriederikeEyssel ThomasHermann FranzKummert @@ -5876,8 +5876,8 @@ Chung-NingChang KevinBowden MichaelNeff - JeanFox Tree - MarilynWalker + JeanFox Tree + MarilynWalker 3447–3454 Story-telling is a fundamental and prevalent aspect of human social behavior. In the wild, stories are told conversationally in social settings, often as a dialogue and with accompanying gestures and other nonverbal behavior. This paper presents a new corpus, the Story Dialogue with Gestures (SDG) corpus, consisting of 50 personal narratives regenerated as dialogues, complete with annotations of gesture placement and accompanying gesture forms. The corpus includes dialogues generated by human annotators, gesture annotations on the human generated dialogues, videos of story dialogues generated from this representation, video clips of each gesture used in the gesture annotations, and annotations of the original personal narratives with a deep representation of story called a Story Intention Graph. Our long term goal is the automatic generation of story co-tellings as animated dialogues from the Story Intention Graph. We expect this corpus to be a useful resource for researchers interested in natural language generation, intelligent virtual agents, generation of nonverbal behavior, and story and narrative representations. L16-1550 @@ -5901,8 +5901,8 @@ JacksonTolins KrisLiu MichaelNeff - MarilynWalker - JeanFox Tree + MarilynWalker + JeanFox Tree 3461–3468 We present a corpus of 44 human-agent verbal and gestural story retellings designed to explore whether humans would gesturally entrain to an embodied intelligent virtual agent. We used a novel data collection method where an agent presented story components in installments, which the human would then retell to the agent. At the end of the installments, the human would then retell the embodied animated agent the story as a whole. This method was designed to allow us to observe whether changes in the agent’s gestural behavior would result in human gestural changes. The agent modified its gestures over the course of the story, by starting out the first installment with gestural behaviors designed to manifest extraversion, and slowly modifying gestures to express introversion over time, or the reverse. The corpus contains the verbal and gestural transcripts of the human story retellings. The gestures were coded for type, handedness, temporal structure, spatial extent, and the degree to which the participants’ gestures match those produced by the agent. The corpus illustrates the variation in expressive behaviors produced by users interacting with embodied virtual characters, and the degree to which their gestures were influenced by the agent’s dynamic changes in personality-based expressive style. L16-1552 @@ -5913,8 +5913,8 @@ JacksonTolins KrisLiu YingyingWang - Jean E.Fox Tree - MarilynWalker + Jean E.Fox Tree + MarilynWalker MichaelNeff 3469–3476 This paper presents a new corpus, the Personality Dyads Corpus, consisting of multimodal data for three conversations between three personality-matched, two-person dyads (a total of 9 separate dialogues). Participants were selected from a larger sample to be 0.8 of a standard deviation above or below the mean on the Big-Five Personality extraversion scale, to produce an Extravert-Extravert dyad, an Introvert-Introvert dyad, and an Extravert-Introvert dyad. Each pair carried out conversations for three different tasks. The conversations were recorded using optical motion capture for the body and data gloves for the hands. Dyads’ speech was transcribed and the gestural and postural behavior was annotated with ANVIL. The released corpus includes personality profiles, ANVIL files containing speech transcriptions and the gestural annotations, and BVH files containing body and hand motion in 3D. @@ -5925,7 +5925,7 @@ Crowdsourcing Ontology Lexicons BettinaLanser ChristinaUnger - PhilippCimiano + PhilippCimiano 3477–3484 In order to make the growing amount of conceptual knowledge available through ontologies and datasets accessible to humans, NLP applications need access to information on how this knowledge can be verbalized in natural language. One way to provide this kind of information are ontology lexicons, which apart from the actual verbalizations in a given target language can provide further, rich linguistic information about them. Compiling such lexicons manually is a very time-consuming task and requires expertise both in Semantic Web technologies and lexicon engineering, as well as a very good knowledge of the target language at hand. In this paper we present an alternative approach to generating ontology lexicons by means of crowdsourcing: We use CrowdFlower to generate a small Japanese ontology lexicon for ten exemplary ontology elements from the DBpedia ontology according to a two-stage workflow, the main underlying idea of which is to turn the task of generating lexicon entries into a translation task; the starting point of this translation task is a manually created English lexicon for DBpedia. Comparison of the results to a manually created Japanese lexicon shows that the presented workflow is a viable option if an English seed lexicon is already available. L16-1554 @@ -5956,7 +5956,7 @@ Temporal Information Annotation: Crowd vs. Experts TommasoCaselli - RacheleSprugnoli + RacheleSprugnoli OanaInel 3502–3509 This paper describes two sets of crowdsourcing experiments on temporal information annotation conducted on two languages, i.e., English and Italian. The first experiment, launched on the CrowdFlower platform, was aimed at classifying temporal relations given target entities. The second one, relying on the CrowdTruth metric, consisted in two subtasks: one devoted to the recognition of events and temporal expressions and one to the detection and classification of temporal relations. The outcomes of the experiments suggest a valuable use of crowdsourcing annotations also for a complex task like Temporal Processing. @@ -5966,8 +5966,8 @@ A Tangled Web: The Faint Signals of Deception in Text - Boulder Lies and Truth Corpus (<fixed-case>BLT</fixed-case>-<fixed-case>C</fixed-case>) FrancoSalvetti - John B.Lowe - James H.Martin + John B.Lowe + James H.Martin 3510–3517 We present an approach to creating corpora for use in detecting deception in text, including a discussion of the challenges peculiar to this task. Our approach is based on soliciting several types of reviews from writers and was implemented using Amazon Mechanical Turk. We describe the multi-dimensional corpus of reviews built using this approach, available free of charge from LDC as the Boulder Lies and Truth Corpus (BLT-C). Challenges for both corpus creation and the deception detection include the fact that human performance on the task is typically at chance, that the signal is faint, that paid writers such as turkers are sometimes deceptive, and that deception is a complex human behavior; manifestations of deception depend on details of domain, intrinsic properties of the deceiver (such as education, linguistic competence, and the nature of the intention), and specifics of the deceptive act (e.g., lying vs. fabricating.) To overcome the inherent lack of ground truth, we have developed a set of semi-automatic techniques to ensure corpus validity. We present some preliminary results on the task of deception detection which suggest that the BLT-C is an improvement in the quality of resources available for this task. L16-1558 @@ -5975,7 +5975,7 @@ Finding Alternative Translations in a Large Corpus of Movie Subtitle - JörgTiedemann + JörgTiedemann 3518–3522 OpenSubtitles.org provides a large collection of user contributed subtitles in various languages for movies and TV programs. Subtitle translations are valuable resources for cross-lingual studies and machine translation research. A less explored feature of the collection is the inclusion of alternative translations, which can be very useful for training paraphrase systems or collecting multi-reference test suites for machine translation. However, differences in translation may also be due to misspellings, incomplete or corrupt data files, or wrongly aligned subtitles. This paper reports our efforts in recognising and classifying alternative subtitle translations with language independent techniques. We use time-based alignment with lexical re-synchronisation techniques and BLEU score filters and sort alternative translations into categories using edit distance metrics and heuristic rules. Our approach produces large numbers of sentence-aligned translation alternatives for over 50 languages provided via the OPUS corpus collection. L16-1559 @@ -5985,7 +5985,7 @@ Exploiting a Large Strongly Comparable Corpus ThierryEtchegoyhen AndoniAzpeitia - NaiaraPérez + NaiaraPérez 3523–3529 This article describes a large comparable corpus for Basque and Spanish and the methods employed to build a parallel resource from the original data. The EITB corpus, a strongly comparable corpus in the news domain, is to be shared with the research community, as an aid for the development and testing of methods in comparable corpora exploitation, and as basis for the improvement of data-driven machine translation systems for this language pair. Competing approaches were explored for the alignment of comparable segments in the corpus, resulting in the design of a simple method which outperformed a state-of-the-art method on the corpus test sets. The method we present is highly portable, computationally efficient, and significantly reduces deployment work, a welcome result for the exploitation of comparable corpora. L16-1560 @@ -6005,7 +6005,7 @@ <fixed-case>WAGS</fixed-case>: A Beautiful <fixed-case>E</fixed-case>nglish-<fixed-case>I</fixed-case>talian Benchmark Supporting Word Alignment Evaluation on Rare Words LuisaBentivogli MauroCettolo - M. AminFarajian + M. AminFarajian MarcelloFederico 3535–3542 This paper presents WAGS (Word Alignment Gold Standard), a novel benchmark which allows extensive evaluation of WA tools on out-of-vocabulary (OOV) and rare words. WAGS is a subset of the Common Test section of the Europarl English-Italian parallel corpus, and is specifically tailored to OOV and rare words. WAGS is composed of 6,715 sentence pairs containing 11,958 occurrences of OOV and rare words up to frequency 15 in the Europarl Training set (5,080 English words and 6,878 Italian words), representing almost 3% of the whole text. Since WAGS is focused on OOV/rare words, manual alignments are provided for these words only, and not for the whole sentences. Two off-the-shelf word aligners have been evaluated on WAGS, and results have been compared to those obtained on an existing benchmark tailored to full text alignment. The results obtained confirm that WAGS is a valuable resource, which allows a statistically sound evaluation of WA systems’ performance on OOV and rare words, as well as extensive data analyses. WAGS is publicly released under a Creative Commons Attribution license. @@ -6015,7 +6015,7 @@ Manual and Automatic Paraphrases for <fixed-case>MT</fixed-case> Evaluation AlešTamchyna - PetraBarančíková + PetraBarančíková 3543–3548 Paraphrasing of reference translations has been shown to improve the correlation with human judgements in automatic evaluation of machine translation (MT) outputs. In this work, we present a new dataset for evaluating English-Czech translation based on automatic paraphrases. We compare this dataset with an existing set of manually created paraphrases and find that even automatic paraphrases can improve MT evaluation. We have also propose and evaluate several criteria for selecting suitable reference translations from a larger set. L16-1563 @@ -6036,10 +6036,10 @@ HelgeDyvik PaulMeurer VictoriaRosén - KoenraadDe Smedt + KoenraadDe Smedt PetterHaugereid Gyri SmørdalLosnegaard - Gunn IngerLyse + Gunn IngerLyse MarthaThunes 3555–3562 We present NorGramBank, a treebank for Norwegian with highly detailed LFG analyses. It is one of many treebanks made available through the INESS treebanking infrastructure. NorGramBank was constructed as a parsebank, i.e. by automatically parsing a corpus, using the wide coverage grammar NorGram. One part consisting of 350,000 words has been manually disambiguated using computer-generated discriminants. A larger part of 50 M words has been stochastically disambiguated. The treebank is dynamic: by global reparsing at certain intervals it is kept compatible with the latest versions of the grammar and the lexicon, which are continually further developed in interaction with the annotators. A powerful query language, INESS Search, has been developed for search across formalisms in the INESS treebanks, including LFG c- and f-structures. Evaluation shows that the grammar provides about 85% of randomly selected sentences with good analyses. Agreement among the annotators responsible for manual disambiguation is satisfactory, but also suggests desirable simplifications of the grammar. @@ -6049,8 +6049,8 @@ Accurate Deep Syntactic Parsing of Graphs: The Case of <fixed-case>F</fixed-case>rench CorentinRibeyre - EricVillemonte de la Clergerie - DjaméSeddah + EricVillemonte de la Clergerie + DjaméSeddah 3563–3568 Parsing predicate-argument structures in a deep syntax framework requires graphs to be predicted. Argument structures represent a higher level of abstraction than the syntactic ones and are thus more difficult to predict even for highly accurate parsing models on surfacic syntax. In this paper we investigate deep syntax parsing, using a French data set (Ribeyre et al., 2014a). We demonstrate that the use of topologically different types of syntactic features, such as dependencies, tree fragments, spines or syntactic paths, brings a much needed context to the parser. Our higher-order parsing model, gaining thus up to 4 points, establishes the state of the art for parsing French deep syntactic structures. L16-1566 @@ -6061,7 +6061,7 @@ AbdelatiHawwari MohammedAttia MahmoudGhoneim - MonaDiab + MonaDiab 3569–3577 Idafa in traditional Arabic grammar is an umbrella construction that covers several phenomena including what is expressed in English as noun-noun compounds and Saxon and Norman genitives. Additionally, Idafa participates in some other constructions, such as quantifiers, quasi-prepositions, and adjectives. Identifying the various types of the Idafa construction (IC) is of importance to Natural Language processing (NLP) applications. Noun-Noun compounds exhibit special behavior in most languages impacting their semantic interpretation. Hence distinguishing them could have an impact on downstream NLP applications. The most comprehensive syntactic representation of the Arabic language is the LDC Arabic Treebank (ATB). In the ATB, ICs are not explicitly labeled and furthermore, there is no distinction between ICs of noun-noun relations and other traditional ICs. Hence, we devise a detailed syntactic and semantic typification process of the IC phenomenon in Arabic. We target the ATB as a platform for this classification. We render the ATB annotated with explicit IC labels but with the further semantic characterization which is useful for syntactic, semantic and cross language processing. Our typification of IC comprises 3 main syntactic IC types: FIC, GIC, and TIC, and they are further divided into 10 syntactic subclasses. The TIC group is further classified into semantic relations. We devise a method for automatic IC labeling and compare its yield against the CATiB treebank. Our evaluation shows that we achieve the same level of accuracy, but with the additional fine-grained classification into the various syntactic and semantic types. L16-1567 @@ -6103,7 +6103,7 @@ Elif AhsenAcar DenizZeyrek MurathanKurfalı - CemBozşahin + CemBozşahin 3600–3606 This study primarily aims to build a Turkish psycholinguistic database including three variables: word frequency, age of acquisition (AoA), and imageability, where AoA and imageability information are limited to nouns. We used a corpus-based approach to obtain information about the AoA variable. We built two corpora: a child literature corpus (CLC) including 535 books written for 3-12 years old children, and a corpus of transcribed children’s speech (CSC) at ages 1;4-4;8. A comparison between the word frequencies of CLC and CSC gave positive correlation results, suggesting the usability of the CLC to extract AoA information. We assumed that frequent words of the CLC would correspond to early acquired words whereas frequent words of a corpus of adult language would correspond to late acquired words. To validate AoA results from our corpus-based approach, a rated AoA questionnaire was conducted on adults. Imageability values were collected via a different questionnaire conducted on adults. We conclude that it is possible to deduce AoA information for high frequency words with the corpus-based approach. The results about low frequency words were inconclusive, which is attributed to the fact that corpus-based AoA information is affected by the strong negative correlation between corpus frequency and rated AoA. L16-1571 @@ -6112,7 +6112,7 @@ Domain-Specific Corpus Expansion with Focused Webcrawling SteffenRemus - ChrisBiemann + ChrisBiemann 3607–3611 This work presents a straightforward method for extending or creating in-domain web corpora by focused webcrawling. The focused webcrawler uses statistical N-gram language models to estimate the relatedness of documents and weblinks and needs as input only N-grams or plain texts of a predefined domain and seed URLs as starting points. Two experiments demonstrate that our focused crawler is able to stay focused in domain and language. The first experiment shows that the crawler stays in a focused domain, the second experiment demonstrates that language models trained on focused crawls obtain better perplexity scores on in-domain corpora. We distribute the focused crawler as open source software. L16-1572 @@ -6121,7 +6121,7 @@ Corpus-Based Diacritic Restoration for <fixed-case>S</fixed-case>outh <fixed-case>S</fixed-case>lavic Languages NikolaLjubešić - TomažErjavec + TomažErjavec DarjaFišer 3612–3616 In computer-mediated communication, Latin-based scripts users often omit diacritics when writing. Such text is typically easily understandable to humans but very difficult for computational processing because many words become ambiguous or unknown. Letter-level approaches to diacritic restoration generalise better and do not require a lot of training data but word-level approaches tend to yield better results. However, they typically rely on a lexicon which is an expensive resource, not covering non-standard forms, and often not available for less-resourced languages. In this paper we present diacritic restoration models that are trained on easy-to-acquire corpora. We test three different types of corpora (Wikipedia, general web, Twitter) for three South Slavic languages (Croatian, Serbian and Slovene) and evaluate them on two types of text: standard (Wikipedia) and non-standard (Twitter). The proposed approach considerably outperforms charlifter, so far the only open source tool available for this task. We make the best performing systems freely available. @@ -6130,7 +6130,7 @@ Automatic Recognition of Linguistic Replacements in Text Series Generated from Keystroke Logs - DanielCouto-Vale + DanielCouto-Vale StellaNeumann PaulaNiemietz 3617–3623 @@ -6143,7 +6143,7 @@ ElenaManishina BassamJabaian StéphaneHuet - FabriceLefèvre + FabriceLefèvre 3624–3631 As data-driven approaches started to make their way into the Natural Language Generation (NLG) domain, the need for automation of corpus building and extension became apparent. Corpus creation and extension in data-driven NLG domain traditionally involved manual paraphrasing performed by either a group of experts or with resort to crowd-sourcing. Building the training corpora manually is a costly enterprise which requires a lot of time and human resources. We propose to automate the process of corpus extension by integrating automatically obtained synonyms and paraphrases. Our methodology allowed us to significantly increase the size of the training corpus and its level of variability (the number of distinct tokens and specific syntactic structures). Our extension solutions are fully automatic and require only some initial validation. The human evaluation results confirm that in many cases native speakers favor the outputs of the model built on the extended corpus. L16-1575 @@ -6152,8 +6152,8 @@ Bilbo-Val: Automatic Identification of Bibliographical Zone in Papers AmalHtait - SebastienFournier - PatriceBellot + SebastienFournier + PatriceBellot 3632–3636 In this paper, we present the automatic annotation of bibliographical references’ zone in papers and articles of XML/TEI format. Our work is applied through two phases: first, we use machine learning technology to classify bibliographical and non-bibliographical paragraphs in papers, by means of a model that was initially created to differentiate between the footnotes containing or not containing bibliographical references. The previous description is one of BILBO’s features, which is an open source software for automatic annotation of bibliographic reference. Also, we suggest some methods to minimize the margin of error. Second, we propose an algorithm to find the largest list of bibliographical references in the article. The improvement applied on our model results an increase in the model’s efficiency with an Accuracy equal to 85.89. And by testing our work, we are able to achieve 72.23% as an average for the percentage of success in detecting bibliographical references’ zone. L16-1576 @@ -6164,7 +6164,7 @@ WajdiZaghouani HoudaBouamor AbdelatiHawwari - MonaDiab + MonaDiab OssamaObeid MahmoudGhoneim SawsanAlqahtani @@ -6178,7 +6178,7 @@ Applying the Cognitive Machine Translation Evaluation Approach to <fixed-case>A</fixed-case>rabic IrinaTemnikova WajdiZaghouani - StephanVogel + StephanVogel NizarHabash 3644–3651 The goal of the cognitive machine translation (MT) evaluation approach is to build classifiers which assign post-editing effort scores to new texts. The approach helps estimate fair compensation for post-editors in the translation industry by evaluating the cognitive difficulty of post-editing MT output. The approach counts the number of errors classified in different categories on the basis of how much cognitive effort they require in order to be corrected. In this paper, we present the results of applying an existing cognitive evaluation approach to Modern Standard Arabic (MSA). We provide a comparison of the number of errors and categories of errors in three MSA texts of different MT quality (without any language-specific adaptation), as well as a comparison between MSA texts and texts from three Indo-European languages (Russian, Spanish, and Bulgarian), taken from a previous experiment. The results show how the error distributions change passing from the MSA texts of worse MT quality to MSA texts of better MT quality, as well as a similarity in distinguishing the texts of better MT quality for all four languages. @@ -6187,7 +6187,7 @@ A Reading Comprehension Corpus for Machine Translation Evaluation - CarolinaScarton + CarolinaScarton LuciaSpecia 3652–3658 Effectively assessing Natural Language Processing output tasks is a challenge for research in the area. In the case of Machine Translation (MT), automatic metrics are usually preferred over human evaluation, given time and budget constraints. However, traditional automatic metrics (such as BLEU) are not reliable for absolute quality assessment of documents, often producing similar scores for documents translated by the same MT system. For scenarios where absolute labels are necessary for building models, such as document-level Quality Estimation, these metrics can not be fully trusted. In this paper, we introduce a corpus of reading comprehension tests based on machine translated documents, where we evaluate documents based on answers to questions by fluent speakers of the target language. We describe the process of creating such a resource, the experiment design and agreement between the test takers. Finally, we discuss ways to convert the reading comprehension test into document-level quality scores. @@ -6218,7 +6218,7 @@ <fixed-case>MARMOT</fixed-case>: A Toolkit for Translation Quality Estimation at the Word Level VarvaraLogacheva - ChrisHokamp + ChrisHokamp LuciaSpecia 3671–3674 We present Marmot~― a new toolkit for quality estimation (QE) of machine translation output. Marmot contains utilities targeted at quality estimation at the word and phrase level. However, due to its flexibility and modularity, it can also be extended to work at the sentence level. In addition, it can be used as a framework for extracting features and learning models for many common natural language processing tasks. The tool has a set of state-of-the-art features for QE, and new features can easily be added. The tool is open-source and can be downloaded from https://github.com/qe-team/marmot/ @@ -6266,10 +6266,10 @@ <fixed-case>GATE</fixed-case>-Time: Extraction of Temporal Expressions and Events - LeonDerczynski + LeonDerczynski JannikStrötgen DianaMaynard - Mark A.Greenwood + Mark A.Greenwood ManuelJung 3702–3708 GATE is a widely used open-source solution for text processing with a large user community. It contains components for several natural language processing tasks. However, temporal information extraction functionality within GATE has been rather limited so far, despite being a prerequisite for many application scenarios in the areas of natural language processing and information retrieval. This paper presents an integrated approach to temporal information processing. We take state-of-the-art tools in temporal expression and event recognition and bring them together to form an openly-available resource within the GATE infrastructure. GATE-Time provides annotation in the form of TimeML events and temporal expressions complying with this mature ISO standard for temporal semantic annotation of documents. Major advantages of GATE-Time are (i) that it relies on HeidelTime for temporal tagging, so that temporal expressions can be extracted and normalized in multiple languages and across different domains, (ii) it includes a modern, fast event recognition and classification tool, and (iii) that it can be combined with different linguistic pre-processing annotations, and is thus not bound to license restricted preprocessing components. @@ -6290,7 +6290,7 @@ JustinMott AnnBies ZhiyiSong - StephanieStrassel + StephanieStrassel 3717–3722 This paper introduces the parallel Chinese-English Entities, Relations and Events (ERE) corpora developed by Linguistic Data Consortium under the DARPA Deep Exploration and Filtering of Text (DEFT) Program. Original Chinese newswire and discussion forum documents are annotated for two versions of the ERE task. The texts are manually translated into English and then annotated for the same ERE tasks on the English translation, resulting in a rich parallel resource that has utility for performers within the DEFT program, for participants in NIST’s Knowledge Base Population evaluations, and for cross-language projection research more generally. L16-1589 @@ -6310,8 +6310,8 @@ An Empirical Exploration of Moral Foundations Theory in Partisan News Sources DeanFulgoni JordanCarpenter - LyleUngar - DanielPreoţiuc-Pietro + LyleUngar + DanielPreoţiuc-Pietro 3730–3736 News sources frame issues in different ways in order to appeal or control the perception of their readers. We present a large scale study of news articles from partisan sources in the US across a variety of different issues. We first highlight that differences between sides exist by predicting the political leaning of articles of unseen political bias. Framing can be driven by different types of morality that each group values. We emphasize differences in framing of different news building on the moral foundations theory quantified using hand crafted lexicons. Our results show that partisan sources frame political issues differently both in terms of words usage and through the moral foundations they relate to. L16-1591 @@ -6321,7 +6321,7 @@ Building a Dataset for Possessions Identification in Text CarmenBanea XiChen - RadaMihalcea + RadaMihalcea 3737–3740 Just as industrialization matured from mass production to customization and personalization, so has the Web migrated from generic content to public disclosures of one’s most intimately held thoughts, opinions and beliefs. This relatively new type of data is able to represent finer and more narrowly defined demographic slices. If until now researchers have primarily focused on leveraging personalized content to identify latent information such as gender, nationality, location, or age of the author, this study seeks to establish a structured way of extracting possessions, or items that people own or are entitled to, as a way to ultimately provide insights into people’s behaviors and characteristics. In order to promote more research in this area, we are releasing a set of 798 possessions extracted from blog genre, where possessions are marked at different confidence levels, as well as a detailed set of guidelines to help in future annotation studies. L16-1592 @@ -6330,7 +6330,7 @@ The Query of Everything: Developing Open-Domain, Natural-Language Queries for <fixed-case>BOLT</fixed-case> Information Retrieval KiraGriffitt - StephanieStrassel + StephanieStrassel 3741–3747 The DARPA BOLT Information Retrieval evaluations target open-domain natural-language queries over a large corpus of informal text in English, Chinese and Egyptian Arabic. We outline the goals of BOLT IR, comparing it with the prior GALE Distillation task. After discussing the properties of the BOLT IR corpus, we provide a detailed description of the query creation process, contrasting the summary query format presented to systems at run time with the full query format created by annotators. We describe the relevance criteria used to assess BOLT system responses, highlighting the evolution of the procedures used over the three evaluation phases. We provide a detailed review of the decision points model for relevance assessment introduced during Phase 2, and conclude with information about inter-assessor consistency achieved with the decision points assessment model. L16-1593 @@ -6340,7 +6340,7 @@ The Validation of <fixed-case>MRCPD</fixed-case> Cross-language Expansions on Imageability Ratings TingLiu KitCho - TomekStrzalkowski + TomekStrzalkowski SamiraShaikh MehrdadMirzaei 3748–3751 @@ -6396,7 +6396,7 @@ <fixed-case>PROMETHEUS</fixed-case>: A Corpus of Proverbs Annotated with Metaphors - GözdeÖzbal + GözdeÖzbal CarloStrapparava Serra SinemTekiroğlu 3787–3793 @@ -6407,7 +6407,7 @@ Corpus Annotation within the <fixed-case>F</fixed-case>rench <fixed-case>F</fixed-case>rame<fixed-case>N</fixed-case>et: a Domain-by-domain Methodology MarianneDjemaa - MarieCandito + MarieCandito PhilippeMuller LaureVieu 3794–3801 @@ -6424,7 +6424,7 @@ LotfiAbouda AgataSavary DenisMaurel - IrisEshkol + IrisEshkol DelphineBattistelli 3802–3806 This paper reports a critical analysis of the ISO TimeML standard, in the light of several experiences of temporal annotation that were conducted on spoken French. It shows that the norm suffers from weaknesses that should be corrected to fit a larger variety of needs inNLP and in corpus linguistics. We present our proposition of some improvements of the norm before it will be revised by the ISO Committee in 2017. These modifications concern mainly (1) Enrichments of well identified features of the norm: temporal function of TIMEX time expressions, additional types for TLINK temporal relations; (2) Deeper modifications concerning the units or features annotated: clarification between time and tense for EVENT units, coherence of representation between temporal signals (the SIGNAL unit) and TIMEX modifiers (the MOD feature); (3) A recommendation to perform temporal annotation on top of a syntactic (rather than lexical) layer (temporal annotation on a treebank). @@ -6435,7 +6435,7 @@ A General Framework for the Annotation of Causality Based on <fixed-case>F</fixed-case>rame<fixed-case>N</fixed-case>et LaureVieu PhilippeMuller - MarieCandito + MarieCandito MarianneDjemaa 3807–3813 We present here a general set of semantic frames to annotate causal expressions, with a rich lexicon in French and an annotated corpus of about 5000 instances of causal lexical items with their corresponding semantic frames. The aim of our project is to have both the largest possible coverage of causal phenomena in French, across all parts of speech, and have it linked to a general semantic framework such as FN, to benefit in particular from the relations between other semantic frames, e.g., temporal ones or intentional ones, and the underlying upper lexical ontology that enable some forms of reasoning. This is part of the larger ASFALDA French FrameNet project, which focuses on a few different notional domains which are interesting in their own right (Djemma et al., 2016), including cognitive positions and communication frames. In the process of building the French lexicon and preparing the annotation of the corpus, we had to remodel some of the frames proposed in FN based on English data, with hopefully more precise frame definitions to facilitate human annotation. This includes semantic clarifications of frames and frame elements, redundancy elimination, and added coverage. The result is arguably a significant improvement of the treatment of causality in FN itself. @@ -6453,7 +6453,7 @@ <fixed-case>S</fixed-case>pace<fixed-case>R</fixed-case>ef: A corpus of street-level geographic descriptions - JanaGötze + JanaGötze JohanBoye 3822–3827 This article describes SPACEREF, a corpus of street-level geographic descriptions. Pedestrians are walking a route in a (real) urban environment, describing their actions. Their position is automatically logged, their speech is manually transcribed, and their references to objects are manually annotated with respect to a crowdsourced geographic database. We describe how the data was collected and annotated, and how it has been used in the context of creating resources for an automatic pedestrian navigation system. @@ -6471,11 +6471,11 @@ Typed Entity and Relation Annotation on Computer Science Papers - YukaTateisi + YukaTateisi TomokoOhta SampoPyysalo YusukeMiyao - AkikoAizawa + AkikoAizawa 3836–3843 We describe our ongoing effort to establish an annotation scheme for describing the semantic structures of research articles in the computer science domain, with the intended use of developing search systems that can refine their results by the roles of the entities denoted by the query keys. In our scheme, mentions of entities are annotated with ontology-based types, and the roles of the entities are annotated as relations with other entities described in the text. So far, we have annotated 400 abstracts from the ACL anthology and the ACM digital library. In this paper, the scheme and the annotated dataset are described, along with the problems found in the course of annotation. We also show the results of automatic annotation and evaluate the corpus in a practical setting in application to topic extraction. L16-1607 @@ -6508,7 +6508,7 @@ ClaireLi SamLam BillyChiu - QinLu + QinLu MingleiLi DanXiong Roy ShingYu @@ -6521,7 +6521,7 @@ Collecting Resources in Sub-<fixed-case>S</fixed-case>aharan <fixed-case>A</fixed-case>frican Languages for Automatic Speech Recognition: a Case Study of <fixed-case>W</fixed-case>olof ElodieGauthier - LaurentBesacier + LaurentBesacier SylvieVoisin MichaelMelese Uriel PascalElingui @@ -6535,7 +6535,7 @@ JorisPelemans LyanVerwimp KrisDemuynck - HugoVan hamme + HugoVan hamme PatrickWambacq 3868–3871 In this paper we present SCALE, a new Python toolkit that contains two extensions to n-gram language models. The first extension is a novel technique to model compound words called Semantic Head Mapping (SHM). The second extension, Bag-of-Words Language Modeling (BagLM), bundles popular models such as Latent Semantic Analysis and Continuous Skip-grams. Both extensions scale to large data and allow the integration into first-pass ASR decoding. The toolkit is open source, includes working examples and can be found on http://github.com/jorispelemans/scale. @@ -6555,7 +6555,7 @@ <fixed-case>BAS</fixed-case> Speech Science Web Services - an Update of Current Developments ThomasKisler - UweReichel + UweReichel FlorianSchiel ChristophDraxler BernhardJackl @@ -6574,8 +6574,8 @@ JaimeFerreira EugénioRibeiro HelenaMoniz - David Martinsde Matos - RicardoRibeiro + David Martinsde Matos + RicardoRibeiro 3886–3892 This paper presents SPA, a web-based Speech Analytics platform that integrates several speech processing modules and that makes it possible to use them through the web. It was developed with the aim of facilitating the usage of the modules, without the need to know about software dependencies and specific configurations. Apart from being accessed by a web-browser, the platform also provides a REST API for easy integration with other applications. The platform is flexible, scalable, provides authentication for access restrictions, and was developed taking into consideration the time and effort of providing new services. The platform is still being improved, but it already integrates a considerable number of audio and text processing modules, including: Automatic transcription, speech disfluency classification, emotion detection, dialog act recognition, age and gender classification, non-nativeness detection, hyper-articulation detection, dialog act recognition, and two external modules for feature extraction and DTMF detection. This paper describes the SPA architecture, presents the already integrated modules, and provides a detailed description for the ones most recently integrated. L16-1615 @@ -6600,7 +6600,7 @@ JohnLawrence JoonsukPark KatarzynaBudzynska - ChrisReed + ChrisReed 3899–3906 Governments are increasingly utilising online platforms in order to engage with, and ascertain the opinions of, their citizens. Whilst policy makers could potentially benefit from such enormous feedback from society, they first face the challenge of making sense out of the large volumes of data produced. This creates a demand for tools and technologies which will enable governments to quickly and thoroughly digest the points being made and to respond accordingly. By determining the argumentative and dialogical structures contained within a debate, we are able to determine the issues which are divisive and those which attract agreement. This paper proposes a method of graph-based analytics which uses properties of graphs representing networks of arguments pro- & con- in order to automatically analyse issues which divide citizens about new regulations. By future application of the most recent advances in argument mining, the results reported here will have a chance to scale up to enable sense-making of the vast amount of feedback received from citizens on directions that policy should take. L16-1617 @@ -6609,7 +6609,7 @@ meta<fixed-case>TED</fixed-case>: a Corpus of Metadiscourse for Spoken Language RuiCorreia - NunoMamede + NunoMamede JorgeBaptista MaxineEskenazi 3907–3913 @@ -6641,7 +6641,7 @@ Focus Annotation of Task-based Data: A Comparison of Expert and Crowd-Sourced Annotation in a Reading Comprehension Corpus KordulaDe Kuthy RamonZiai - DetmarMeurers + DetmarMeurers 3928–3935 While the formal pragmatic concepts in information structure, such as the focus of an utterance, are precisely defined in theoretical linguistics and potentially very useful in conceptual and practical terms, it has turned out to be difficult to reliably annotate such notions in corpus data. We present a large-scale focus annotation effort designed to overcome this problem. Our annotation study is based on the tasked-based corpus CREG, which consists of answers to explicitly given reading comprehension questions. We compare focus annotation by trained annotators with a crowd-sourcing setup making use of untrained native speakers. Given the task context and an annotation process incrementally making the question form and answer type explicit, the trained annotators reach substantial agreement for focus annotation. Interestingly, the crowd-sourcing setup also supports high-quality annotation ― for specific subtypes of data. Finally, we turn to the question whether the relevance of focus annotation can be extrinsically evaluated. We show that automatic short-answer assessment significantly improves for focus annotated data. The focus annotated CREG corpus is freely available and constitutes the largest such resource for German. L16-1621 @@ -6649,10 +6649,10 @@ Homing in on <fixed-case>T</fixed-case>witter Users: Evaluating an Enhanced Geoparser for User Profile Locations - BeatriceAlex + BeatriceAlex ClareLlewellyn ClaireGrover - JonOberlander + JonOberlander RichardTobin 3936–3944 Twitter-related studies often need to geo-locate Tweets or Twitter users, identifying their real-world geographic locations. As tweet-level geotagging remains rare, most prior work exploited tweet content, timezone and network information to inform geolocation, or else relied on off-the-shelf tools to geolocate users from location information in their user profiles. However, such user location metadata is not consistently structured, causing such tools to fail regularly, especially if a string contains multiple locations, or if locations are very fine-grained. We argue that user profile location (UPL) and tweet location need to be treated as distinct types of information from which differing inferences can be drawn. Here, we apply geoparsing to UPLs, and demonstrate how task performance can be improved by adapting our Edinburgh Geoparser, which was originally developed for processing English text. We present a detailed evaluation method and results, including inter-coder agreement. We demonstrate that the optimised geoparser can effectively extract and geo-reference multiple locations at different levels of granularity with an F1-score of around 0.90. We also illustrate how geoparsed UPLs can be exploited for international information trade studies and country-level sentiment analysis. @@ -6661,7 +6661,7 @@ A Dataset for Detecting Stance in Tweets - SaifMohammad + SaifMohammad SvetlanaKiritchenko ParinazSobhani XiaodanZhu @@ -6673,7 +6673,7 @@ Emotion Analysis on <fixed-case>T</fixed-case>witter: The Hidden Challenge - LucaDini + LucaDini AndréBittar 3953–3958 In this paper, we present an experiment to detect emotions in tweets. Unlike much previous research, we draw the important distinction between the tasks of emotion detection in a closed world assumption (i.e. every tweet is emotional) and the complicated task of identifying emotional versus non-emotional tweets. Given an apparent lack of appropriately annotated data, we created two corpora for these tasks. We describe two systems, one symbolic and one based on machine learning, which we evaluated on our datasets. Our evaluation shows that a machine learning classifier performs best on emotion detection, while a symbolic approach is better for identifying relevant (i.e. emotional) tweets. @@ -6711,8 +6711,8 @@ Comprehensive and Consistent <fixed-case>P</fixed-case>rop<fixed-case>B</fixed-case>ank Light Verb Annotation - ClaireBonial - MarthaPalmer + ClaireBonial + MarthaPalmer 3980–3985 Recent efforts have focused on expanding the annotation coverage of PropBank from verb relations to adjective and noun relations, as well as light verb constructions (e.g., make an offer, take a bath). While each new relation type has presented unique annotation challenges, ensuring consistent and comprehensive annotation of light verb constructions has proved particularly challenging, given that light verb constructions are semi-productive, difficult to define, and there are often borderline cases. This research describes the iterative process of developing PropBank annotation guidelines for light verb constructions, the current guidelines, and a comparison to related resources. L16-1628 @@ -6722,7 +6722,7 @@ Inconsistency Detection in Semantic Annotation NoraHollenstein NathanSchneider - BonnieWebber + BonnieWebber 3986–3990 Inconsistencies are part of any manually annotated corpus. Automatically finding these inconsistencies and correcting them (even manually) can increase the quality of the data. Past research has focused mainly on detecting inconsistency in syntactic annotation. This work explores new approaches to detecting inconsistency in semantic annotation. Two ranking methods are presented in this paper: a discrepancy ranking and an entropy ranking. Those methods are then tested and evaluated on multiple corpora annotated with multiword expressions and supersense labels. The results show considerable improvements in detecting inconsistency candidates over a random baseline. Possible applications of methods for inconsistency detection are improving the annotation procedure as well as the guidelines and correcting errors in completed annotations. L16-1629 @@ -6733,12 +6733,12 @@ StephanOepen MarcoKuhlmann YusukeMiyao - DanielZeman + DanielZeman SilvieCinková - DanFlickinger - JanHajič + DanFlickinger + JanHajič AngelinaIvanova - ZdeňkaUrešová + ZdeňkaUrešová 3991–3995 We announce a new language resource for research on semantic parsing, a large, carefully curated collection of semantic dependency graphs representing multiple linguistic traditions. This resource is called SDP~2016 and provides an update and extension to previous versions used as Semantic Dependency Parsing target representations in the 2014 and 2015 Semantic Evaluation Exercises. For a common core of English text, this third edition comprises semantic dependency graphs from four distinct frameworks, packaged in a unified abstract format and aligned at the sentence and token levels. SDP 2016 is the first general release of this resource and available for licensing from the Linguistic Data Consortium in May 2016. The data is accompanied by an open-source SDP utility toolkit and system results from previous contrastive parsing evaluations against these target representations. L16-1630 @@ -6756,7 +6756,7 @@ Endangered Language Documentation: Bootstrapping a Chatino Speech Corpus, Forced Aligner, <fixed-case>ASR</fixed-case> MalgorzataĆavar - DamirĆavar + DamirĆavar HilariaCruz 4004–4011 This project approaches the problem of language documentation and revitalization from a rather untraditional angle. To improve and facilitate language documentation of endangered languages, we attempt to use corpus linguistic methods and speech and language technologies to reduce the time needed for transcription and annotation of audio and video language recordings. The paper demonstrates this approach on the example of the endangered and seriously under-resourced variety of Eastern Chatino (CTP). We show how initial speech corpora can be created that can facilitate the development of speech and language technologies for under-resourced languages by utilizing Forced Alignment tools to time align transcriptions. Time-aligned transcriptions can be used to train speech corpora and utilize automatic speech recognition tools for the transcription and annotation of untranscribed data. Speech technologies can be used to reduce the time and effort necessary for transcription and annotation of large collections of audio and video recordings in digital language archives, addressing the transcription bottleneck problem that most language archives and many under-documented languages are confronted with. This approach can increase the availability of language resources from low-resourced and endangered languages to speech and language technology research and development. @@ -6786,7 +6786,7 @@ <fixed-case>E</fixed-case>nglish-to-<fixed-case>J</fixed-case>apanese Translation vs. Dictation vs. Post-editing: Comparing Translation Modes in a Multilingual Setting MichaelCarl - AkikoAizawa + AkikoAizawa MasaruYamada 4024–4031 Speech-enabled interfaces have the potential to become one of the most efficient and ergonomic environments for human-computer interaction and for text production. However, not much research has been carried out to investigate in detail the processes and strategies involved in the different modes of text production. This paper introduces and evaluates a corpus of more than 55 hours of English-to-Japanese user activity data that were collected within the ENJA15 project, in which translators were observed while writing and speaking translations (translation dictation) and during machine translation post-editing. The transcription of the spoken data, keyboard logging and eye-tracking data were recorded with Translog-II, post-processed and integrated into the CRITT Translation Process Research-DB (TPR-DB), which is publicly available under a creative commons license. The paper presents the ENJA15 data as part of a large multilingual Chinese, Danish, German, Hindi and Spanish translation process data collection of more than 760 translation sessions. It compares the ENJA15 data with the other language pairs and reviews some of its particularities. @@ -6824,8 +6824,8 @@ Interoperability of Annotation Schemes: Using the Pepper Framework to Display <fixed-case>AWA</fixed-case> Documents in the <fixed-case>ANNIS</fixed-case> Interface TalvanyCarlotto ZuhaitzBeloki - XabierArtola - AitorSoroa + XabierArtola + AitorSoroa 4049–4054 Natural language processing applications are frequently integrated to solve complex linguistic problems, but the lack of interoperability between these tools tends to be one of the main issues found in that process. That is often caused by the different linguistic formats used across the applications, which leads to attempts to both establish standard formats to represent linguistic information and to create conversion tools to facilitate this integration. Pepper is an example of the latter, as a framework that helps the conversion between different linguistic annotation formats. In this paper, we describe the use of Pepper to convert a corpus linguistically annotated by the annotation scheme AWA into the relANNIS format, with the ultimate goal of interacting with AWA documents through the ANNIS interface. The experiment converted 40 megabytes of AWA documents, allowed their use on the ANNIS interface, and involved making architectural decisions during the mapping from AWA into relANNIS using Pepper. The main issues faced during this process were due to technical issues mainly caused by the integration of the different systems and projects, namely AWA, Pepper and ANNIS. L16-1639 @@ -6835,9 +6835,9 @@ <fixed-case>SPLIT</fixed-case>: Smart Preprocessing (Quasi) Language Independent Tool MohamedAl-Badrashiny ArfathPasha - MonaDiab + MonaDiab NizarHabash - OwenRambow + OwenRambow WaelSalloum RamyEskander 4055–4060 @@ -6847,7 +6847,7 @@ <fixed-case>A</fixed-case>rchi<fixed-case>M</fixed-case>ob - A Corpus of Spoken <fixed-case>S</fixed-case>wiss <fixed-case>G</fixed-case>erman - TanjaSamardžić + TanjaSamardžić YvesScherrer ElviraGlaser 4061–4066 @@ -6874,7 +6874,7 @@ Graphical Annotation for Syntax-Semantics Mapping - KôitiHasida + KôitiHasida 4080–4084 A potential work item (PWI) for ISO standard (MAP) about linguistic annotation concerning syntax-semantics mapping is discussed. MAP is a framework for graphical linguistic annotation to specify a mapping (set of combinations) between possible syntactic and semantic structures of the annotated linguistic data. Just like a UML diagram, a MAP diagram is formal, in the sense that it accurately specifies such a mapping. MAP provides a diagrammatic sort of concrete syntax for linguistic annotation far easier to understand than textual concrete syntax such as in XML, so that it could better facilitate collaborations among people involved in research, standardization, and practical use of linguistic data. MAP deals with syntactic structures including dependencies, coordinations, ellipses, transsentential constructions, and so on. Semantic structures treated by MAP are argument structures, scopes, coreferences, anaphora, discourse relations, dialogue acts, and so forth. In order to simplify explicit annotations, MAP allows partial descriptions, and assumes a few general rules on correspondence between syntactic and semantic compositions. L16-1644 @@ -6913,7 +6913,7 @@ Modeling Language Change in Historical Corpora: The Case of <fixed-case>P</fixed-case>ortuguese MarcosZampieri - ShervinMalmasi + ShervinMalmasi MarkDras 4098–4104 This paper presents a number of experiments to model changes in a historical Portuguese corpus composed of literary texts for the purpose of temporal text classification. Algorithms were trained to classify texts with respect to their publication date taking into account lexical variation represented as word n-grams, and morphosyntactic variation represented by part-of-speech (POS) distribution. We report results of 99.8% accuracy using word unigram features with a Support Vector Machines classifier to predict the publication date of documents in time intervals of both one century and half a century. A feature analysis is performed to investigate the most informative features for this task and how they are linked to language change. @@ -6922,7 +6922,7 @@ “He Said She Said” ― a Male/Female Corpus of <fixed-case>P</fixed-case>olish - FilipGraliński + FilipGraliński ŁukaszBorchmann PiotrWierzchoń 4105–4110 @@ -6932,7 +6932,7 @@ <fixed-case>C</fixed-case>ohere: A Toolkit for Local Coherence - Karin SimSmith + Karin SimSmith WilkerAziz LuciaSpecia 4111–4114 @@ -6964,7 +6964,7 @@ Evaluating Unsupervised <fixed-case>D</fixed-case>utch Word Embeddings as a Linguistic Resource StéphanTulkens ChrisEmmery - WalterDaelemans + WalterDaelemans 4130–4136 Word embeddings have recently seen a strong increase in interest as a result of strong performance gains on a variety of tasks. However, most of this research also underlined the importance of benchmark datasets, and the difficulty of constructing these for a variety of language-specific tasks. Still, many of the datasets used in these tasks could prove to be fruitful linguistic resources, allowing for unique observations into language use and variability. In this paper we demonstrate the performance of multiple types of embeddings, created with both count and prediction-based architectures on a variety of corpora, in two language-specific tasks: relation evaluation, and dialect identification. For the latter, we compare unsupervised methods with a traditional, hand-crafted dictionary. With this research, we provide the embeddings themselves, the relation evaluation task benchmark for use in further research, and demonstrate how the benchmarked embeddings prove a useful unsupervised linguistic resource, effectively used in a downstream task. L16-1652 @@ -6991,7 +6991,7 @@ <fixed-case>EN</fixed-case>-<fixed-case>ES</fixed-case>-<fixed-case>CS</fixed-case>: An <fixed-case>E</fixed-case>nglish-<fixed-case>S</fixed-case>panish Code-Switching <fixed-case>T</fixed-case>witter Corpus for Multilingual Sentiment Analysis DavidVilares - Miguel A.Alonso + Miguel A.Alonso CarlosGómez-Rodríguez 4149–4153 Code-switching texts are those that contain terms in two or more different languages, and they appear increasingly often in social media. The aim of this paper is to provide a resource to the research community to evaluate the performance of sentiment classification techniques on this complex multilingual environment, proposing an English-Spanish corpus of tweets with code-switching (EN-ES-CS CORPUS). The tweets are labeled according to two well-known criteria used for this purpose: SentiStrength and a trinary scale (positive, neutral and negative categories). Preliminary work on the resource is already done, providing a set of baselines for the research community. @@ -7001,7 +7001,7 @@ <fixed-case>S</fixed-case>em<fixed-case>R</fixed-case>el<fixed-case>D</fixed-case>ata ― Multilingual Contextual Annotation of Semantic Relations between Nominals: Dataset and Guidelines DarinaBenikova - ChrisBiemann + ChrisBiemann 4154–4161 Semantic relations play an important role in linguistic knowledge representation. Although their role is relevant in the context of written text, there is no approach or dataset that makes use of contextuality of classic semantic relations beyond the boundary of one sentence. We present the SemRelData dataset that contains annotations of semantic relations between nominals in the context of one paragraph. To be able to analyse the universality of this context notion, the annotation was performed on a multi-lingual and multi-genre corpus. To evaluate the dataset, it is compared to large, manually created knowledge resources in the respective languages. The comparison shows that knowledge bases not only have coverage gaps; they also do not account for semantic relations that are manifested in particular contexts only, yet still play an important role for text cohesion. L16-1656 @@ -7011,7 +7011,7 @@ A Multilingual, Multi-style and Multi-granularity Dataset for Cross-language Textual Similarity Detection JérémyFerrero FrédéricAgnès - LaurentBesacier + LaurentBesacier DidierSchwab 4162–4169 In this paper we describe our effort to create a dataset for the evaluation of cross-language textual similarity detection. We present preexisting corpora and their limits and we explain the various gathered resources to overcome these limits and build our enriched dataset. The proposed dataset is multilingual, includes cross-language alignment for different granularities (from chunk to document), is based on both parallel and comparable corpora and contains human and machine translated texts. Moreover, it includes texts written by multiple types of authors (from average to professionals). With the obtained dataset, we conduct a systematic and rigorous evaluation of several state-of-the-art cross-language textual similarity detection methods. The evaluation results are reviewed and discussed. Finally, dataset and scripts are made publicly available on GitHub: http://github.com/FerreroJeremy/Cross-Language-Dataset. @@ -7070,9 +7070,9 @@ <fixed-case>M</fixed-case>ulti<fixed-case>V</fixed-case>ec: a Multilingual and Multilevel Representation Learning Toolkit for <fixed-case>NLP</fixed-case> AlexandreBérard - ChristopheServan + ChristopheServan OlivierPietquin - LaurentBesacier + LaurentBesacier 4188–4192 We present MultiVec, a new toolkit for computing continuous representations for text at different granularity levels (word-level or sequences of words). MultiVec includes word2vec’s features, paragraph vector (batch and online) and bivec for bilingual distributed representations. MultiVec also includes different distance measures between words and sequences of words. The toolkit is written in C++ and is aimed at being fast (in the same order of magnitude as word2vec), easy to use, and easy to extend. It has been evaluated on several NLP tasks: the analogical reasoning task, sentiment analysis, and crosslingual document classification. L16-1662 @@ -7092,7 +7092,7 @@ A Corpus of Native, Non-native and Translated Texts SergiuNisioi EllaRabinovich - Liviu P.Dinu + Liviu P.Dinu ShulyWintner 4197–4201 We describe a monolingual English corpus of original and (human) translated texts, with an accurate annotation of speaker properties, including the original language of the utterances and the speaker’s country of origin. We thus obtain three sub-corpora of texts reflecting native English, non-native English, and English translated from a variety of European languages. This dataset will facilitate the investigation of similarities and differences between these kinds of sub-languages. Moreover, it will facilitate a unified comparative study of translations and language produced by (highly fluent) non-native speakers, two closely-related phenomena that have only been studied in isolation so far. @@ -7115,7 +7115,7 @@ <fixed-case>A</fixed-case>xolotl: a Web Accessible Parallel Corpus for <fixed-case>S</fixed-case>panish-<fixed-case>N</fixed-case>ahuatl XimenaGutierrez-Vasques - GerardoSierra + GerardoSierra Isaac HernandezPompa 4210–4214 This paper describes the project called Axolotl which comprises a Spanish-Nahuatl parallel corpus and its search interface. Spanish and Nahuatl are distant languages spoken in the same country. Due to the scarcity of digital resources, we describe the several problems that arose when compiling this corpus: most of our sources were non-digital books, we faced errors when digitizing the sources and there were difficulties in the sentence alignment process, just to mention some. The documents of the parallel corpus are not homogeneous, they were extracted from different sources, there is dialectal, diachronical, and orthographical variation. Additionally, we present a web search interface that allows to make queries through the whole parallel corpus, the system is capable to retrieve the parallel fragments that contain a word or phrase searched by a user in any of the languages. To our knowledge, this is the first Spanish-Nahuatl public available digital parallel corpus. We think that this resource can be useful to develop language technologies and linguistic studies for this language pair. @@ -7124,7 +7124,7 @@ A <fixed-case>T</fixed-case>urkish-<fixed-case>G</fixed-case>erman Code-Switching Corpus - ÖzlemÇetinoğlu + ÖzlemÇetinoğlu 4215–4220 Bilingual communities often alternate between languages both in spoken and written communication. One such community, Germany residents of Turkish origin produce Turkish-German code-switching, by heavily mixing two languages at discourse, sentence, or word level. Code-switching in general, and Turkish-German code-switching in particular, has been studied for a long time from a linguistic perspective. Yet resources to study them from a more computational perspective are limited due to either small size or licence issues. In this work we contribute the solution of this problem with a corpus. We present a Turkish-German code-switching corpus which consists of 1029 tweets, with a majority of intra-sentential switches. We share different type of code-switching we have observed in our collection and describe our processing steps. The first step is data collection and filtering. This is followed by manual tokenisation and normalisation. And finally, we annotate data with word-level language identification information. The resulting corpus is available for research purposes. L16-1667 @@ -7143,7 +7143,7 @@ Creating a Large Multi-Layered Representational Repository of Linguistic Code Switched <fixed-case>A</fixed-case>rabic Data - MonaDiab + MonaDiab MahmoudGhoneim AbdelatiHawwari FahadAlGhamdi @@ -7195,8 +7195,8 @@ Multi-language Speech Collection for <fixed-case>NIST</fixed-case> <fixed-case>LRE</fixed-case> - KarenJones - StephanieStrassel + KarenJones + StephanieStrassel KevinWalker DavidGraff JonathanWright @@ -7218,7 +7218,7 @@ New Inflectional Lexicons and Training Corpora for Improved Morphosyntactic Annotation of <fixed-case>C</fixed-case>roatian and <fixed-case>S</fixed-case>erbian NikolaLjubešić FilipKlubička - ŽeljkoAgić + ŽeljkoAgić Ivo-PavaoJazbec 4264–4270 In this paper we present newly developed inflectional lexcions and manually annotated corpora of Croatian and Serbian. We introduce hrLex and srLex - two freely available inflectional lexicons of Croatian and Serbian - and describe the process of building these lexicons, supported by supervised machine learning techniques for lemma and paradigm prediction. Furthermore, we introduce hr500k, a manually annotated corpus of Croatian, 500 thousand tokens in size. We showcase the three newly developed resources on the task of morphosyntactic annotation of both languages by using a recently developed CRF tagger. We achieve best results yet reported on the task for both languages, beating the HunPos baseline trained on the same datasets by a wide margin. @@ -7258,7 +7258,7 @@ <fixed-case>UDP</fixed-case>ipe: Trainable Pipeline for Processing <fixed-case>C</fixed-case>o<fixed-case>NLL</fixed-case>-<fixed-case>U</fixed-case> Files Performing Tokenization, Morphological Analysis, <fixed-case>POS</fixed-case> Tagging and Parsing MilanStraka - JanHajič + JanHajič JanaStraková 4290–4297 Automatic natural language processing of large texts often presents recurring challenges in multiple languages: even for most advanced tasks, the texts are first processed by basic processing steps – from tokenization to parsing. We present an extremely simple-to-use tool consisting of one binary and one model (per language), which performs these tasks for multiple languages without the need for any other external data. UDPipe, a pipeline processing CoNLL-U-formatted files, performs tokenization, morphological analysis, part-of-speech tagging, lemmatization and dependency parsing for nearly all treebanks of Universal Dependencies 1.2 (namely, the whole pipeline is currently available for 32 out of 37 treebanks). In addition, the pipeline is easily trainable with training data in CoNLL-U format (and in some cases also with additional raw corpora) and requires minimal linguistic knowledge on the users’ part. The training code is also released. @@ -7315,7 +7315,7 @@ <fixed-case>S</fixed-case>lang<fixed-case>N</fixed-case>et: A <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et like resource for <fixed-case>E</fixed-case>nglish Slang ShehzaadDhuliawala DipteshKanojia - PushpakBhattacharyya + PushpakBhattacharyya 4329–4332 We present a WordNet like structured resource for slang words and neologisms on the internet. The dynamism of language is often an indication that current language technology tools trained on today’s data, may not be able to process the language in the future. Our resource could be (1) used to augment the WordNet, (2) used in several Natural Language Processing (NLP) applications which make use of noisy data on the internet like Information Retrieval and Web Mining. Such a resource can also be used to distinguish slang word senses from conventional word senses. To stimulate similar innovations widely in the NLP community, we test the efficacy of our resource for detecting slang using standard bag of words Word Sense Disambiguation (WSD) algorithms (Lesk and Extended Lesk) for English data on the internet. L16-1686 @@ -7323,7 +7323,7 @@ Discovering Fuzzy Synsets from the Redundancy in Different Lexical-Semantic Resources - Hugo GonçaloOliveira + Hugo GonçaloOliveira FábioSantos 4333–4340 Although represented as such in wordnets, word senses are not discrete. To handle word senses as fuzzy objects, we exploit the graph structure of synonymy pairs acquired from different sources to discover synsets where words have different membership degrees that reflect confidence. Following this approach, a wide-coverage fuzzy thesaurus was discovered from a synonymy network compiled from seven Portuguese lexical-semantic resources. Based on a crowdsourcing evaluation, we can say that the quality of the obtained synsets is far from perfect but, as expected in a confidence measure, it increases significantly for higher cut-points on the membership and, at a certain point, reaches 100% correction rate. @@ -7349,7 +7349,7 @@ Ambiguity Diagnosis for Terms in Digital Humanities - BéatriceDaille + BéatriceDaille EvelyneJacquey GaëlLejeune Luis FelipeMelo @@ -7361,7 +7361,7 @@ Metrical Annotation of a Large Corpus of <fixed-case>S</fixed-case>panish Sonnets: Representation, Scansion and Evaluation - BorjaNavarro + BorjaNavarro MaríaRibes Lafoz NoeliaSánchez 4360–4364 @@ -7393,10 +7393,10 @@ Studying the Temporal Dynamics of Word Co-occurrences: An Application to Event Detection - DanielPreoţiuc-Pietro + DanielPreoţiuc-Pietro P. K.Srijith MarkHepple - TrevorCohn + TrevorCohn 4380–4387 Streaming media provides a number of unique challenges for computational linguistics. This paper studies the temporal variation in word co-occurrence statistics, with application to event detection. We develop a spectral clustering approach to find groups of mutually informative terms occurring in discrete time frames. Experiments on large datasets of tweets show that these groups identify key real world events as they occur in time, despite no explicit supervision. The performance of our method rivals state-of-the-art methods for event detection on F-score, obtaining higher recall at the expense of precision. L16-1694 @@ -7404,7 +7404,7 @@ <fixed-case>M</fixed-case>arkov <fixed-case>L</fixed-case>ogic <fixed-case>N</fixed-case>etworks for Text Mining: A Qualitative and Empirical Comparison with Integer Linear Programming - Luis GerardoMojica de la Vega + Luis GerardoMojica de la Vega VincentNg 4388–4395 Joint inference approaches such as Integer Linear Programming (ILP) and Markov Logic Networks (MLNs) have recently been successfully applied to many natural language processing (NLP) tasks, often outperforming their pipeline counterparts. However, MLNs are arguably much less popular among NLP researchers than ILP. While NLP researchers who desire to employ these joint inference frameworks do not necessarily have to understand their theoretical underpinnings, it is imperative that they understand which of them should be applied under what circumstances. With the goal of helping NLP researchers better understand the relative strengths and weaknesses of MLNs and ILP; we will compare them along different dimensions of interest, such as expressiveness, ease of use, scalability, and performance. To our knowledge, this is the first systematic comparison of ILP and MLNs on an NLP task. @@ -7416,9 +7416,9 @@ Ayman AlZaatari Rim ElBallouli ShadyELbassouni - WassimEl-Hajj + WassimEl-Hajj HazemHajj - KhaledShaban + KhaledShaban NizarHabash EmadYahya 4396–4401 @@ -7439,7 +7439,7 @@ <fixed-case>QUEMDISSE</fixed-case>? Reported speech in <fixed-case>P</fixed-case>ortuguese - CláudiaFreitas + CláudiaFreitas BiancaFreitas DianaSantos 4410–4416 @@ -7450,8 +7450,8 @@ <fixed-case>MEANTIME</fixed-case>, the <fixed-case>N</fixed-case>ews<fixed-case>R</fixed-case>eader Multilingual Event and Time Corpus Anne-LyseMinard - ManuelaSperanza - RubenUrizar + ManuelaSperanza + RubenUrizar BegoñaAltuna Mariekevan Erp AnneleenSchoen @@ -7463,7 +7463,7 @@ The <fixed-case>ACQDIV</fixed-case> Database: <fixed-case>M</fixed-case>in(d)ing the Ambient Language - StevenMoran + StevenMoran 4423–4429 One of the most pressing questions in cognitive science remains unanswered: what cognitive mechanisms enable children to learn any of the world’s 7000 or so languages? Much discovery has been made with regard to specific learning mechanisms in specific languages, however, given the remarkable diversity of language structures (Evans and Levinson 2009, Bickel 2014) the burning question remains: what are the underlying processes that make language acquisition possible, despite substantial cross-linguistic variation in phonology, morphology, syntax, etc.? To investigate these questions, a comprehensive cross-linguistic database of longitudinal child language acquisition corpora from maximally diverse languages has been built. L16-1700 @@ -7473,9 +7473,9 @@ Summarizing Behaviours: An Experiment on the Annotation of Call-Centre Conversations MorenaDanieli Balamurali AR - EvgenyStepanov - BenoitFavre - FredericBechet + EvgenyStepanov + BenoitFavre + FredericBechet GiuseppeRiccardi 4430–4433 Annotating and predicting behavioural aspects in conversations is becoming critical in the conversational analytics industry. In this paper we look into inter-annotator agreement of agent behaviour dimensions on two call center corpora. We find that the task can be annotated consistently over time, but that subjectivity issues impacts the quality of the annotation. The reformulation of some of the annotated dimensions is suggested in order to improve agreement. @@ -7484,7 +7484,7 @@ Survey of Conversational Behavior: Towards the Design of a Balanced Corpus of Everyday <fixed-case>J</fixed-case>apanese Conversation - HanaeKoiso + HanaeKoiso TomoyukiTsuchiya RyokoWatanabe DaisukeYokomori @@ -7508,8 +7508,8 @@ <fixed-case>I</fixed-case>nternet Argument Corpus 2.0: An <fixed-case>SQL</fixed-case> schema for Dialogic Social Media and the Corpora to go with it RobAbbott BrianEcker - PranavAnand - MarilynWalker + PranavAnand + MarilynWalker 4445–4452 Large scale corpora have benefited many areas of research in natural language processing, but until recently, resources for dialogue have lagged behind. Now, with the emergence of large scale social media websites incorporating a threaded dialogue structure, content feedback, and self-annotation (such as stance labeling), there are valuable new corpora available to researchers. In previous work, we released the INTERNET ARGUMENT CORPUS, one of the first larger scale resources available for opinion sharing dialogue. We now release the INTERNET ARGUMENT CORPUS 2.0 (IAC 2.0) in the hope that others will find it as useful as we have. The IAC 2.0 provides more data than IAC 1.0 and organizes it using an extensible, repurposable SQL schema. The database structure in conjunction with the associated code facilitates querying from and combining multiple dialogically structured data sources. The IAC 2.0 schema provides support for forum posts, quotations, markup (bold, italic, etc), and various annotations, including Stanford CoreNLP annotations. We demonstrate the generalizablity of the schema by providing code to import the ConVote corpus. L16-1704 @@ -7539,7 +7539,7 @@ ChristianFäth HeikeRenner-Westermann FrankAbromeit - VanyaDimitrova + VanyaDimitrova 4463–4471 This paper introduces a novel research tool for the field of linguistics: The Lin|gu|is|tik web portal provides a virtual library which offers scientific information on every linguistic subject. It comprises selected internet sources and databases as well as catalogues for linguistic literature, and addresses an interdisciplinary audience. The virtual library is the most recent outcome of the Special Subject Collection Linguistics of the German Research Foundation (DFG), and also integrates the knowledge accumulated in the Bibliography of Linguistic Literature. In addition to the portal, we describe long-term goals and prospects with a special focus on ongoing efforts regarding an extension towards integrating language resources and Linguistic Linked Open Data. L16-1707 @@ -7560,7 +7560,7 @@ Designing A Long Lasting Linguistic Project: The Case Study of <fixed-case>ASI</fixed-case>t MaristellaAgosti EmanueleDi Buccio - Giorgio MariaDi Nunzio + Giorgio MariaDi Nunzio CeciliaPoletto EstherRinke 4479–4483 @@ -7570,7 +7570,7 @@ Global Open Resources and Information for Language and Linguistic Analysis (<fixed-case>GORILLA</fixed-case>) - DamirCavar + DamirCavar MalgorzataCavar LwinMoe 4484–4491 @@ -7611,8 +7611,8 @@ Two Architectures for Parallel Processing of Huge Amounts of Text MathijsKattenberg ZuhaitzBeloki - AitorSoroa - XabierArtola + AitorSoroa + XabierArtola AntskeFokkens PaulHuygen KeesVerstoep @@ -7633,7 +7633,7 @@ New Developments in the <fixed-case>LRE</fixed-case> Map VladimirPopescu LinLiu - RiccardoDel Gratta + RiccardoDel Gratta KhalidChoukri NicolettaCalzolari 4526–4530 @@ -7652,7 +7652,7 @@ The <fixed-case>ELRA</fixed-case> License Wizard - ValérieMapelli + ValérieMapelli VladimirPopescu LinLiu Meritxell FernándezBarrera @@ -7665,7 +7665,7 @@ Review on the Existing Language Resources for Languages of <fixed-case>F</fixed-case>rance ThibaultGrouas - ValérieMapelli + ValérieMapelli QuentinSamier 4539–4542 With the support of the DGLFLF, ELDA conducted an inventory of existing language resources for the regional languages of France. The main aim of this inventory was to assess the exploitability of the identified resources within technologies. A total of 2,299 Language Resources were identified. As a second step, a deeper analysis of a set of three language groups (Breton, Occitan, overseas languages) was carried out along with a focus of their exploitability within three technologies: automatic translation, voice recognition/synthesis and spell checkers. The survey was followed by the organisation of the TLRF2015 Conference which aimed to present the state of the art in the field of the Technologies for Regional Languages of France. The next step will be to activate the network of specialists built up during the TLRF conference and to begin the organisation of a second TLRF conference. Meanwhile, the French Ministry of Culture continues its actions related to linguistic diversity and technology, in particular through a project with Wikimedia France related to contributions to Wikipedia in regional languages, the upcoming new version of the “Corpus de la Parole” and the reinforcement of the DGLFLF’s Observatory of Linguistic Practices. @@ -7674,9 +7674,9 @@ Selection Criteria for Low Resource Language Programs - ChristopherCieri + ChristopherCieri MikeMaxwell - StephanieStrassel + StephanieStrassel JenniferTracey 4543–4549 This paper documents and describes the criteria used to select languages for study within programs that include low resource languages whether given that label or another similar one. It focuses on five US common task, Human Language Technology research and development programs in which the authors have provided information or consulting related to the choice of language. The paper does not describe the actual selection process which is the responsibility of program management and highly specific to a program’s individual goals and context. Instead it concentrates on the data and criteria that have been considered relevant previously with the thought that future program managers and their consultants may adapt these and apply them with different prioritization to future programs. @@ -7700,7 +7700,7 @@ EnricoSantus AlessandroLenci Tin-ShingChiu - QinLu + QinLu Chu-RenHuang 4557–4564 ROOT9 is a supervised system for the classification of hypernyms, co-hyponyms and random words that is derived from the already introduced ROOT13 (Santus et al., 2016). It relies on a Random Forest algorithm and nine unsupervised corpus-based features. We evaluate it with a 10-fold cross validation on 9,600 pairs, equally distributed among the three classes and involving several Parts-Of-Speech (i.e. adjectives, nouns and verbs). When all the classes are present, ROOT9 achieves an F1 score of 90.7%, against a baseline of 57.2% (vector cosine). When the classification is binary, ROOT9 achieves the following results against the baseline. hypernyms-co-hyponyms 95.7% vs. 69.8%, hypernyms-random 91.8% vs. 64.1% and co-hyponyms-random 97.8% vs. 79.4%. In order to compare the performance with the state-of-the-art, we have also evaluated ROOT9 in subsets of the Weeds et al. (2014) datasets, proving that it is in fact competitive. Finally, we investigated whether the system learns the semantic relation or it simply learns the prototypical hypernyms, as claimed by Levy et al. (2015). The second possibility seems to be the most likely, even though ROOT9 can be trained on negative examples (i.e., switched hypernyms) to drastically reduce this bias. @@ -7712,7 +7712,7 @@ EnricoSantus AlessandroLenci Tin-ShingChiu - QinLu + QinLu Chu-RenHuang 4565–4572 In this paper, we claim that Vector Cosine ― which is generally considered one of the most efficient unsupervised measures for identifying word similarity in Vector Space Models ― can be outperformed by a completely unsupervised measure that evaluates the extent of the intersection among the most associated contexts of two target words, weighting such intersection according to the rank of the shared contexts in the dependency ranked lists. This claim comes from the hypothesis that similar words do not simply occur in similar contexts, but they share a larger portion of their most relevant contexts compared to other related words. To prove it, we describe and evaluate APSyn, a variant of Average Precision that ― independently of the adopted parameters ― outperforms the Vector Cosine and the co-occurrence on the ESL and TOEFL test sets. In the best setting, APSyn reaches 0.73 accuracy on the ESL dataset and 0.70 accuracy in the TOEFL dataset, beating therefore the non-English US college applicants (whose average, as reported in the literature, is 64.50%) and several state-of-the-art approaches. @@ -7722,7 +7722,7 @@ Assessing the Potential of Metaphoricity of verbs using corpus data MarcoDel Tredici - NúriaBel + NúriaBel 4573–4577 The paper investigates the relation between metaphoricity and distributional characteristics of verbs, introducing POM, a corpus-derived index that can be used to define the upper bound of metaphoricity of any expression in which a given verb occurs. The work moves from the observation that while some verbs can be used to create highly metaphoric expressions, others can not. We conjecture that this fact is related to the number of contexts in which a verb occurs and to the frequency of each context. This intuition is modelled by introducing a method in which each context of a verb in a corpus is assigned a vector representation, and a clustering algorithm is employed to identify similar contexts. Eventually, the Standard Deviation of the relative frequency values of the clusters is computed and taken as the POM of the target verb. We tested POM in two experimental settings obtaining values of accuracy of 84% and 92%. Since we are convinced, along with (Shutoff, 2015), that metaphor detection systems should be concerned only with the identification of highly metaphoric expressions, we believe that POM could be profitably employed by these systems to a priori exclude expressions that, due to the verb they include, can only have low degrees of metaphoricity L16-1724 @@ -7751,7 +7751,7 @@ Towards Building Semantic Role Labeler for <fixed-case>I</fixed-case>ndian Languages MaazAnwar - DiptiSharma + DiptiSharma 4588–4595 We present a statistical system for identifying the semantic relationships or semantic roles for two major Indian Languages, Hindi and Urdu. Given an input sentence and a predicate/verb, the system first identifies the arguments pertaining to that verb and then classifies it into one of the semantic labels which can either be a DOER, THEME, LOCATIVE, CAUSE, PURPOSE etc. The system is based on 2 statistical classifiers trained on roughly 130,000 words for Urdu and 100,000 words for Hindi that were hand-annotated with semantic roles under the PropBank project for these two languages. Our system achieves an accuracy of 86% in identifying the arguments of a verb for Hindi and 75% for Urdu. At the subsequent task of classifying the constituents into their semantic roles, the Hindi system achieved 58% precision and 42% recall whereas Urdu system performed better and achieved 83% precision and 80% recall. Our study also allowed us to compare the usefulness of different linguistic features and feature combinations in the semantic role labeling task. We also examine the use of statistical syntactic parsing as feature in the role labeling task. L16-1727 @@ -7759,7 +7759,7 @@ A Framework for Automatic Acquisition of <fixed-case>C</fixed-case>roatian and <fixed-case>S</fixed-case>erbian Verb Aspect from Corpora - TanjaSamardžić + TanjaSamardžić MajaMiličević 4596–4601 Verb aspect is a grammatical and lexical category that encodes temporal unfolding and duration of events described by verbs. It is a potentially interesting source of information for various computational tasks, but has so far not been studied in much depth from the perspective of automatic processing. Slavic languages are particularly interesting in this respect, as they encode aspect through complex and not entirely consistent lexical derivations involving prefixation and suffixation. Focusing on Croatian and Serbian, in this paper we propose a novel framework for automatic classification of their verb types into a number of fine-grained aspectual classes based on the observable morphology of verb forms. In addition, we provide a set of around 2000 verbs classified based on our framework. This set can be used for linguistic research as well as for testing automatic classification on a larger scale. With minor adjustments the approach is also applicable to other Slavic languages @@ -7770,7 +7770,7 @@ Monolingual Social Media Datasets for Detecting Contradiction and Entailment PiroskaLendvai IsabelleAugenstein - KalinaBontcheva + KalinaBontcheva ThierryDeclerck 4602–4605 Entailment recognition approaches are useful for application domains such as information extraction, question answering or summarisation, for which evidence from multiple sentences needs to be combined. We report on a new 3-way judgement Recognizing Textual Entailment (RTE) resource that originates in the Social Media domain, and explain our semi-automatic creation method for the special purpose of information verification, which draws on manually established rumourous claims reported during crisis events. From about 500 English tweets related to 70 unique claims we compile and evaluate 5.4k RTE pairs, while continue automatizing the workflow to generate similar-sized datasets in other languages. @@ -7779,7 +7779,7 @@ <fixed-case>V</fixed-case>ox<fixed-case>ML</fixed-case>: A Visualization Modeling Language - JamesPustejovsky + JamesPustejovsky NikhilKrishnaswamy 4606–4613 We present the specification for a modeling language, VoxML, which encodes semantic knowledge of real-world objects represented as three-dimensional models, and of events and attributes related to and enacted over these objects. VoxML is intended to overcome the limitations of existing 3D visual markup languages by allowing for the encoding of a broad range of semantic knowledge that can be exploited by a variety of systems and platforms, leading to multimodal simulations of real-world scenarios using conceptual objects that represent their semantic values @@ -7797,7 +7797,7 @@ Embedding Open-domain Common-sense Knowledge from Text TravisGoodwin - SandaHarabagiu + SandaHarabagiu 4621–4628 Our ability to understand language often relies on common-sense knowledge ― background information the speaker can assume is known by the reader. Similarly, our comprehension of the language used in complex domains relies on access to domain-specific knowledge. Capturing common-sense and domain-specific knowledge can be achieved by taking advantage of recent advances in open information extraction (IE) techniques and, more importantly, of knowledge embeddings, which are multi-dimensional representations of concepts and relations. Building a knowledge graph for representing common-sense knowledge in which concepts discerned from noun phrases are cast as vertices and lexicalized relations are cast as edges leads to learning the embeddings of common-sense knowledge accounting for semantic compositionality as well as implied knowledge. Common-sense knowledge is acquired from a vast collection of blogs and books as well as from WordNet. Similarly, medical knowledge is learned from two large sets of electronic health records. The evaluation results of these two forms of knowledge are promising: the same knowledge acquisition methodology based on learning knowledge embeddings works well both for common-sense knowledge and for medical knowledge Interestingly, the common-sense knowledge that we have acquired was evaluated as being less neutral than than the medical knowledge, as it often reflected the opinion of the knowledge utterer. In addition, the acquired medical knowledge was evaluated as more plausible than the common-sense knowledge, reflecting the complexity of acquiring common-sense knowledge due to the pragmatics and economicity of language. L16-1732 @@ -7805,7 +7805,7 @@ Medical Concept Embeddings via Labeled Background Corpora - Eneldo LozaMencía + Eneldo LozaMencía Gerardde Melo JinseokNam 4629–4636 @@ -7851,7 +7851,7 @@ ShinsukeMori FumihikoTakahashi KatsutoshiItoyama - Hiroshi G.Okuno + Hiroshi G.Okuno 4652–4657 Binary file summaries/549.html matches L16-1737 @@ -7864,7 +7864,7 @@ CorinneFredouille BrigitteBigi LiseCrevier-Buchman - ElisabethDelais-Roussarie + ElisabethDelais-Roussarie LaurianneGeorgeton AlainGhio ImedLaaridh @@ -7886,7 +7886,7 @@ HansVan de Velde FrederikKampstra JoukeAlgra - Henkvan den Heuvel + Henkvan den Heuvel Davidvan Leeuwen 4666–4669 We present a new speech database containing 18.5 hours of annotated radio broadcasts in the Frisian language. Frisian is mostly spoken in the province Fryslan and it is the second official language of the Netherlands. The recordings are collected from the archives of Omrop Fryslan, the regional public broadcaster of the province Fryslan. The database covers almost a 50-year time span. The native speakers of Frisian are mostly bilingual and often code-switch in daily conversations due to the extensive influence of the Dutch language. Considering the longitudinal and code-switching nature of the data, an appropriate annotation protocol has been designed and the data is manually annotated with the orthographic transcription, speaker identities, dialect information, code-switching details and background noise/music information. @@ -7896,7 +7896,7 @@ The <fixed-case>SI</fixed-case> <fixed-case>TED</fixed-case>x-<fixed-case>UM</fixed-case> speech database: a new <fixed-case>S</fixed-case>lovenian Spoken Language Resource AndrejŽgank - Mirjam SepesyMaučec + Mirjam SepesyMaučec DarinkaVerdonik 4670–4673 This paper presents a new Slovenian spoken language resource built from TEDx Talks. The speech database contains 242 talks in total duration of 54 hours. The annotation and transcription of acquired spoken material was generated automatically, applying acoustic segmentation and automatic speech recognition. The development and evaluation subset was also manually transcribed using the guidelines specified for the Slovenian GOS corpus. The manual transcriptions were used to evaluate the quality of unsupervised transcriptions. The average word error rate for the SI TEDx-UM evaluation subset was 50.7%, with out of vocabulary rate of 24% and language model perplexity of 390. The unsupervised transcriptions contain 372k tokens, where 32k of them were different. @@ -7929,7 +7929,7 @@ JánStaš TomášKoctúr MartinLojka - JozefJuhár + JozefJuhár 4684–4687 In this paper, we introduce an extension of our previously released TUKE-BNews-SK corpus based on a semi-automatic annotation scheme. It firstly relies on the automatic transcription of the BN data performed by our Slovak large vocabulary continuous speech recognition system. The generated hypotheses are then manually corrected and completed by trained human annotators. The corpus is composed of 25 hours of fully-annotated spontaneous and prepared speech. In addition, we have acquired 900 hours of another BN data, part of which we plan to annotate semi-automatically. We present a preliminary corpus evaluation that gives very promising results. L16-1743 @@ -7938,7 +7938,7 @@ Generating a <fixed-case>Y</fixed-case>iddish Speech Corpus, Forced Aligner and Basic <fixed-case>ASR</fixed-case> System for the <fixed-case>AHEYM</fixed-case> Project MalgorzataĆavar - DamirĆavar + DamirĆavar Dov-BerKerler AnyaQuilitzsch 4688–4693 diff --git a/data/xml/L18.xml b/data/xml/L18.xml index 2406e96bc4..4752300436 100644 --- a/data/xml/L18.xml +++ b/data/xml/L18.xml @@ -4,19 +4,19 @@ Proceedings of the Eleventh International Conference on Language Resources and Evaluation (LREC 2018) L18-1 - NicolettaCalzolari - KhalidChoukri - ChristopherCieri + NicolettaCalzolari + KhalidChoukri + ChristopherCieri ThierryDeclerck SaraGoggi - KoitiHasida + KoitiHasida HitoshiIsahara - BenteMaegaard - JosephMariani + BenteMaegaard + JosephMariani HélèneMazo - AsuncionMoreno - JanOdijk - SteliosPiperidis + AsuncionMoreno + JanOdijk + SteliosPiperidis TakenobuTokunaga European Language Resources Association (ELRA)
Miyazaki, Japan
@@ -30,8 +30,8 @@ Augmenting Librispeech with <fixed-case>F</fixed-case>rench Translations: A Multimodal Corpus for Direct Speech Translation Evaluation Ali CanKocabiyikoglu - LaurentBesacier - OlivierKraif + LaurentBesacier + OlivierKraif L18-1001 kocabiyikoglu-etal-2018-augmenting @@ -40,7 +40,7 @@ ThierryEtchegoyhen AnnaFernández Torné AndoniAzpeitia - EvaMartínez Garcia + EvaMartínez Garcia AnnaMatamala L18-1002 etchegoyhen-etal-2018-evaluating @@ -54,9 +54,9 @@
<fixed-case>ESCAPE</fixed-case>: a Large-scale Synthetic Corpus for Automatic Post-Editing - MatteoNegri + MatteoNegri MarcoTurchi - RajenChatterjee + RajenChatterjee NicolaBertoldi L18-1004 negri-etal-2018-escape @@ -65,7 +65,7 @@ Evaluating Machine Translation Performance on <fixed-case>C</fixed-case>hinese Idioms with a Blacklist Method YutongShao RicoSennrich - BonnieWebber + BonnieWebber FedericoFancellu L18-1005 shao-etal-2018-evaluating @@ -86,8 +86,8 @@ Advances in Pre-Training Distributed Word Representations - TomasMikolov - EdouardGrave + TomasMikolov + EdouardGrave PiotrBojanowski ChristianPuhrsch ArmandJoulin @@ -96,10 +96,10 @@ Integrating <fixed-case>G</fixed-case>enerative <fixed-case>L</fixed-case>exicon Event Structures into <fixed-case>V</fixed-case>erb<fixed-case>N</fixed-case>et - Susan WindischBrown - JamesPustejovsky + Susan WindischBrown + JamesPustejovsky AnnieZaenen - MarthaPalmer + MarthaPalmer L18-1009 brown-etal-2018-integrating @@ -123,7 +123,7 @@ The Natural Stories Corpus RichardFutrell EdwardGibson - Harry J.Tily + Harry J.Tily IdanBlank AnastasiaVishnevetsky StevenPiantadosi @@ -133,21 +133,21 @@
Semi-automatic <fixed-case>K</fixed-case>orean <fixed-case>F</fixed-case>rame<fixed-case>N</fixed-case>et Annotation over <fixed-case>KAIST</fixed-case> Treebank - YounggyunHahm + YounggyunHahm JiseongKim SunggooKwon - Key-SunChoi + Key-SunChoi L18-1013 hahm-etal-2018-semi Handling Normalization Issues for Part-of-Speech Tagging of Online Conversational Text - GéraldineDamnati + GéraldineDamnati JeremyAuguste AlexisNasr DelphineCharlet JohannesHeinecke - FrédéricBéchet + FrédéricBéchet L18-1014 damnati-etal-2018-handling @@ -174,17 +174,17 @@
Dialogue Structure Annotation for Multi-Floor Interaction - DavidTraum + DavidTraum CassidyHenry - StephanieLukin + StephanieLukin RonArtstein FelixGervits KimberlyPollard - ClaireBonial + ClaireBonial SuLei - ClareVoss + ClareVoss MatthewMarge - CoryHayes + CoryHayes SusanHill L18-1017 traum-etal-2018-dialogue @@ -222,14 +222,14 @@ Data Management Plan (<fixed-case>DMP</fixed-case>) for Language Data under the New General Da-ta Protection Regulation (<fixed-case>GDPR</fixed-case>) PawelKamocki - ValérieMapelli + ValérieMapelli KhalidChoukri L18-1021 kamocki-etal-2018-data We Are Depleting Our Research Subject as We Are Investigating It: In Language Technology, more Replication and Diversity Are Needed - AntónioBranco + AntónioBranco L18-1022 branco-2018-depleting @@ -244,7 +244,7 @@ Introducing <fixed-case>NIEUW</fixed-case>: Novel Incentives and Workflows for Eliciting Linguistic Data ChristopherCieri JamesFiumara - MarkLiberman + MarkLiberman ChrisCallison-Burch JonathanWright L18-1024 @@ -252,16 +252,16 @@ Three Dimensions of Reproducibility in Natural Language Processing - K. BretonnelCohen + K. BretonnelCohen JingboXia - PierreZweigenbaum + PierreZweigenbaum TiffanyCallahan OrinHargraves FosterGoss - NancyIde - AurélieNévéol + NancyIde + AurélieNévéol CyrilGrouin - Lawrence E.Hunter + Lawrence E.Hunter L18-1025 cohen-etal-2018-three @@ -274,13 +274,13 @@
Word Affect Intensities - SaifMohammad + SaifMohammad L18-1027 mohammad-2018-word Representation Mapping: A Novel Approach to Generate High-Quality Multi-Lingual Emotion Lexicons - SvenBuechel + SvenBuechel UdoHahn L18-1028 buechel-hahn-2018-representation @@ -298,7 +298,7 @@ Understanding Emotions: A Dataset of Tweets to Study Interactions between Affect Categories - SaifMohammad + SaifMohammad SvetlanaKiritchenko L18-1030 mohammad-kiritchenko-2018-understanding @@ -308,7 +308,7 @@ BonanMin MarjorieFreedman RogerBock - RalphWeischedel + RalphWeischedel L18-1031 min-etal-2018-ace @@ -344,7 +344,7 @@ Building Parallel Monolingual <fixed-case>G</fixed-case>an <fixed-case>C</fixed-case>hinese Dialects Corpus FanXu - MingwenWang + MingwenWang MaoxiLi L18-1036 xu-etal-2018-building @@ -373,17 +373,17 @@ A Lexical Tool for Academic Writing in <fixed-case>S</fixed-case>panish based on Expert and Novice Corpora - MarcosGarcía Salido + MarcosGarcía Salido MarcosGarcía MilkaVillayandre-Llamazares - MargaritaAlonso-Ramos + MargaritaAlonso-Ramos L18-1039 L18-1039.Supplementary.pdf garcia-salido-etal-2018-lexical Framing Named Entity Linking Error Types - AdrianBraşoveanu + AdrianBraşoveanu GiuseppeRizzo PhilippKuntschik AlbertWeichselbraun @@ -407,7 +407,7 @@ GarethOwen ClaireO’Donovan AndrewLeach - JohnMcNaught + JohnMcNaught SteveTurner SophiaAnaniadou L18-1042 @@ -415,10 +415,10 @@ Parallel Corpora for the Biomedical Domain - AurélieNévéol - AntonioJimeno Yepes + AurélieNévéol + AntonioJimeno Yepes MarianaNeves - KarinVerspoor + KarinVerspoor L18-1043 neveol-etal-2018-parallel @@ -436,7 +436,7 @@ Word Embedding Approach for Synonym Extraction of Multi-Word Terms AmirHazem - BéatriceDaille + BéatriceDaille L18-1045 hazem-daille-2018-word @@ -458,7 +458,7 @@ RuchitAgrawal VighneshChenthil Kumar VigneshwaranMuralidharan - DiptiSharma + DiptiSharma L18-1048 agrawal-etal-2018-beating
@@ -466,7 +466,7 @@ Sentence Level Temporality Detection using an Implicit Time-sensed Resource SabyasachiKamila AsifEkbal - PushpakBhattacharyya + PushpakBhattacharyya L18-1049 kamila-etal-2018-sentence
@@ -510,28 +510,28 @@ <fixed-case>SW</fixed-case>4<fixed-case>ALL</fixed-case>: a <fixed-case>CEFR</fixed-case> Classified and Aligned Corpus for Language Learning RodrigoWilkens LeonardoZilio - CédrickFairon + CédrickFairon L18-1055 wilkens-etal-2018-sw4all
Towards a Diagnosis of Textual Difficulties for Children with Dyslexia SolenQuiniou - BéatriceDaille + BéatriceDaille L18-1056 quiniou-daille-2018-towards Coreference Resolution in <fixed-case>F</fixed-case>ree<fixed-case>L</fixed-case>ing 4.0 - MontserratMarimon - LluísPadró - JordiTurmo + MontserratMarimon + LluísPadró + JordiTurmo L18-1057 marimon-etal-2018-coreference <fixed-case>BASHI</fixed-case>: A Corpus of <fixed-case>W</fixed-case>all <fixed-case>S</fixed-case>treet <fixed-case>J</fixed-case>ournal Articles Annotated with Bridging Links - InaRösiger + InaRösiger L18-1058 rosiger-2018-bashi @@ -554,13 +554,13 @@ VeronikaVincze KláraHegedűs AlexSliz-Nagy - RichárdFarkas + RichárdFarkas L18-1061 vincze-etal-2018-szegedkoref
A Corpus to Learn Refer-to-as Relations for Nominals - WasiAhmad + WasiAhmad Kai-WeiChang L18-1062 ahmad-chang-2018-corpus @@ -572,7 +572,7 @@ AlbertoTonon PhilippeCudré-Mauroux Djellel EddineDifallah - RaphaëlTroncy + RaphaëlTroncy GiuseppeRizzo L18-1063 plu-etal-2018-sanaphor @@ -581,7 +581,7 @@ <fixed-case>ANCOR</fixed-case>-<fixed-case>AS</fixed-case>: Enriching the <fixed-case>ANCOR</fixed-case> Corpus with Syntactic Annotations LoïcGrobol IsabelleTellier - Éricde la Clergerie + Éricde la Clergerie MarcoDinarelli FrédéricLandragin L18-1064 @@ -661,16 +661,16 @@ IrisHendrickx EiriniTakoulidou ThanasisNaskos - Katia LidaKermanidis + Katia LidaKermanidis VilelminiSosoni Hugode Vos MariaStasimioti - Mennovan Zaanen + Mennovan Zaanen PanayotaGeorgakopoulou ValiaKordoni - MajaPopovic + MajaPopovic MarkusEgg - Antalvan den Bosch + Antalvan den Bosch L18-1073 hendrickx-etal-2018-multilingual @@ -685,11 +685,11 @@ Translation Crowdsourcing: Creating a Multilingual Corpus of Online Educational Content VilelminiSosoni - Katia LidaKermanidis + Katia LidaKermanidis MariaStasimioti ThanasisNaskos EiriniTakoulidou - Mennovan Zaanen + Mennovan Zaanen SheilaCastilho PanayotaGeorgakopoulou ValiaKordoni @@ -706,7 +706,7 @@ <fixed-case>C</fixed-case>hinese Relation Classification using Long Short Term Memory Networks LinruiZhang - DanMoldovan + DanMoldovan L18-1077 zhang-moldovan-2018-chinese @@ -719,7 +719,7 @@ XiaoyanYu RuifengXu TengjiaoWang - Kam-faiWong + Kam-faiWong L18-1078 li-etal-2018-uir @@ -727,7 +727,7 @@ <fixed-case>E</fixed-case>vent<fixed-case>W</fixed-case>iki: A Knowledge Base of Major Events TaoGe LeiCui - BaobaoChang + BaobaoChang ZhifangSui FuruWei MingZhou @@ -737,7 +737,7 @@ Annotating Spin in Biomedical Scientific Publications : the case of Random Controlled Trials (<fixed-case>RCT</fixed-case>s) AnnaKoroleva - PatrickParoubek + PatrickParoubek L18-1080 koroleva-paroubek-2018-annotating @@ -777,8 +777,8 @@ JannikStrötgen Anne-LyseMinard LukasLange - ManuelaSperanza - BernardoMagnini + ManuelaSperanza + BernardoMagnini L18-1085 strotgen-etal-2018-krauts
@@ -790,8 +790,8 @@ TomRedman ChristosChristodoulopoulos VivekSrikumar - NicholasRizzolo - LevRatinov + NicholasRizzolo + LevRatinov GuanhengLuo QuangDo Chen-TseTsai @@ -862,8 +862,8 @@ Enriching Frame Representations with Distributionally Induced Senses StefanoFaralli AlexanderPanchenko - ChrisBiemann - Simone PaoloPonzetto + ChrisBiemann + Simone PaoloPonzetto L18-1093 faralli-etal-2018-enriching
@@ -945,7 +945,7 @@ VivianLi AndreiLopatenko DanielaStepanov - YoshihikoSuhara + YoshihikoSuhara Wang-ChiewTan YinzhanXu L18-1103 @@ -963,7 +963,7 @@ <fixed-case>B</fixed-case>log<fixed-case>S</fixed-case>et-<fixed-case>BR</fixed-case>: A <fixed-case>B</fixed-case>razilian <fixed-case>P</fixed-case>ortuguese Blog Corpus HenriqueSantos ViniciusWoloszyn - RenataVieira + RenataVieira L18-1105 santos-etal-2018-blogset
@@ -978,7 +978,7 @@ GideonMendels VictorSoto AaronJaech - JuliaHirschberg + JuliaHirschberg L18-1107 mendels-etal-2018-collecting
@@ -993,14 +993,14 @@ A Taxonomy for In-depth Evaluation of Normalization for User Generated Content Robvan der Goot Rikvan Noord - Gertjanvan Noord + Gertjanvan Noord L18-1109 van-der-goot-etal-2018-taxonomy
Gaining and Losing Influence in Online Conversation ArunSharma - TomekStrzalkowski + TomekStrzalkowski L18-1110 sharma-strzalkowski-2018-gaining @@ -1021,7 +1021,7 @@
Correction of <fixed-case>OCR</fixed-case> Word Segmentation Errors in Articles from the <fixed-case>ACL</fixed-case> Collection through Neural Machine Translation Methods - ViviNastase + ViviNastase JulianHitschler L18-1113 nastase-hitschler-2018-correction @@ -1042,7 +1042,7 @@ <fixed-case>PDF</fixed-case>-to-Text Reanalysis for Linguistic Data Mining - Michael WayneGoodman + Michael WayneGoodman RyanGeorgi FeiXia L18-1116 @@ -1097,7 +1097,7 @@ Towards Continuous Dialogue Corpus Creation: writing to corpus and generating from it AndreiMalchanau VolhaPetukhova - HarryBunt + HarryBunt L18-1121 malchanau-etal-2018-towards @@ -1156,39 +1156,39 @@ Towards faithfully visualizing global linguistic diversity GarlandMcNew CurdinDerungs - StevenMoran + StevenMoran L18-1129 mcnew-etal-2018-towards
The <fixed-case>G</fixed-case>erma<fixed-case>P</fixed-case>arl Corpus of Parliamentary Protocols AndreasBlätte - AndreBlessing + AndreBlessing L18-1130 blatte-blessing-2018-germaparl Identifying Speakers and Addressees in Dialogues Extracted from Literary Fiction AdamEk - MatsWirén + MatsWirén RobertÖstling - KristinaN. Björkenstam - GintarėGrigonytė - SofiaGustafson Capková + KristinaN. Björkenstam + GintarėGrigonytė + SofiaGustafson Capková L18-1131 ek-etal-2018-identifying Word Embedding Evaluation Datasets and <fixed-case>W</fixed-case>ikipedia Title Embedding for <fixed-case>C</fixed-case>hinese Chi-YenChen - Wei-YunMa + Wei-YunMa L18-1132 chen-ma-2018-word An Automatic Learning of an <fixed-case>A</fixed-case>lgerian Dialect Lexicon by using Multilingual Word Embeddings AbidiKarima - KamelSmaïli + KamelSmaïli L18-1133 karima-smaili-2018-automatic @@ -1208,10 +1208,10 @@
Tools for Building an Interlinked Synonym Lexicon Network - ZdeňkaUrešová - EvaFučíková - EvaHajičová - JanHajič + ZdeňkaUrešová + EvaFučíková + EvaHajičová + JanHajič L18-1136 uresova-etal-2018-tools @@ -1267,15 +1267,15 @@ Exploiting Pre-Ordering for Neural Machine Translation YangZhao JiajunZhang - ChengqingZong + ChengqingZong L18-1143 zhao-etal-2018-exploiting
Improving a Multi-Source Neural Machine Translation Model with Corpus Extension for Low-Resource Languages - Gyu-HyeonChoi + Gyu-HyeonChoi Jong-HunShin - Young-KilKim + Young-KilKim L18-1144 choi-etal-2018-improving @@ -1284,8 +1284,8 @@ Zi-YiDou HaoZhou Shu-JianHuang - Xin-YuDai - Jia-JunChen + Xin-YuDai + Jia-JunChen L18-1145 dou-etal-2018-dynamic
@@ -1293,7 +1293,7 @@ One Sentence One Model for Neural Machine Translation XiaoqingLi JiajunZhang - ChengqingZong + ChengqingZong L18-1146 li-etal-2018-one
@@ -1301,7 +1301,7 @@ A Parallel Corpus of <fixed-case>A</fixed-case>rabic-<fixed-case>J</fixed-case>apanese News Articles GoInoue NizarHabash - YujiMatsumoto + YujiMatsumoto HiroyukiAoyama L18-1147 inoue-etal-2018-parallel @@ -1316,9 +1316,9 @@
Automatic Enrichment of Terminological Resources: the <fixed-case>IATE</fixed-case> <fixed-case>RDF</fixed-case> Example - MihaelArcan + MihaelArcan ElenaMontiel-Ponsoda - John P.McCrae + John P.McCrae PaulBuitelaar L18-1149 arcan-etal-2018-automatic @@ -1333,7 +1333,7 @@ Translating Web Search Queries into Natural Language Questions AdarshKumar - SandipanDandapat + SandipanDandapat SushilChordia L18-1151 kumar-etal-2018-translating @@ -1348,7 +1348,7 @@ Acquiring Verb Classes Through Bottom-Up Semantic Verb Clustering OlgaMajewska - DianaMcCarthy + DianaMcCarthy IvanVulić AnnaKorhonen L18-1153 @@ -1356,7 +1356,7 @@ Constructing High Quality Sense-specific Corpus and Word Embedding via Unsupervised Elimination of Pseudo Multi-sense - HaoyueShi + HaoyueShi XihaoWang YuqiSun JunfengHu @@ -1373,7 +1373,7 @@ Social Image Tags as a Source of Word Embeddings: A Task-oriented Evaluation MikaHasegawa TetsunoriKobayashi - YoshihikoHayashi + YoshihikoHayashi L18-1156 hasegawa-etal-2018-social @@ -1386,7 +1386,7 @@ Towards a <fixed-case>W</fixed-case>elsh Semantic Annotation System - ScottPiao + ScottPiao PaulRayson DawnKnight GarethWatkins @@ -1397,8 +1397,8 @@ Semantic Frame Parsing for Information Extraction : the <fixed-case>CALOR</fixed-case> corpus GabrielMarzinotto JeremyAuguste - FredericBechet - GeraldineDamnati + FredericBechet + GeraldineDamnati AlexisNasr L18-1159 marzinotto-etal-2018-semantic @@ -1414,7 +1414,7 @@ A Multi- versus a Single-classifier Approach for the Identification of Modality in the <fixed-case>P</fixed-case>ortuguese Language JoãoSequeira - TeresaGonçalves + TeresaGonçalves PauloQuaresma AmáliaMendes IrisHendrickx @@ -1445,8 +1445,8 @@ DenisTeslenko AlexanderPanchenko MikhailChernoskutov - ChrisBiemann - Simone PaoloPonzetto + ChrisBiemann + Simone PaoloPonzetto L18-1164 ustalov-etal-2018-unsupervised @@ -1455,8 +1455,8 @@ KijongHan SanghaNam JiseongKim - YounggyunHahm - Key-SunChoi + YounggyunHahm + Key-SunChoi L18-1165 han-etal-2018-unsupervised @@ -1471,7 +1471,7 @@ Retrofitting Word Representations for Unsupervised Sense Aware Word Similarities SteffenRemus - ChrisBiemann + ChrisBiemann L18-1167 remus-biemann-2018-retrofitting @@ -1488,8 +1488,8 @@ Text Annotation Graphs: Annotating Complex Natural Language Phenomena AngusForbes KristineLee - GusHahn-Powell - Marco A.Valenzuela-Escárcega + GusHahn-Powell + Marco A.Valenzuela-Escárcega MihaiSurdeanu L18-1169 forbes-etal-2018-text @@ -1497,7 +1497,7 @@ <fixed-case>M</fixed-case>anzanilla: An Image Annotation Tool for <fixed-case>TKB</fixed-case> Building ArianneReimerink - PilarLeón-Araúz + PilarLeón-Araúz L18-1170 reimerink-leon-arauz-2018-manzanilla @@ -1517,7 +1517,7 @@ <fixed-case>WASA</fixed-case>: A Web Application for Sequence Annotation FahadAlGhamdi - MonaDiab + MonaDiab L18-1173 alghamdi-diab-2018-wasa @@ -1533,7 +1533,7 @@ <fixed-case>PDFA</fixed-case>nno: a Web-based Linguistic Annotation Tool for <fixed-case>PDF</fixed-case> Documents HiroyukiShindo YoheiMunesada - YujiMatsumoto + YujiMatsumoto L18-1175 shindo-etal-2018-pdfanno @@ -1546,8 +1546,8 @@
An Annotation Language for Semantic Search of Legal Sources - AdelineNazarenko - FrançoisLevy + AdelineNazarenko + FrançoisLevy AdamWyner L18-1177 nazarenko-etal-2018-annotation @@ -1588,7 +1588,7 @@ <fixed-case>JESC</fixed-case>: <fixed-case>J</fixed-case>apanese-<fixed-case>E</fixed-case>nglish Subtitle Corpus ReidPryzant YoungjooChung - DanJurafsky + DanJurafsky DennyBritz L18-1182 pryzant-etal-2018-jesc @@ -1599,7 +1599,7 @@ GeorgesNeto BarbaraSilva DanielleMonteiro - IvandréParaboni + IvandréParaboni RafaelDias L18-1183 ramos-etal-2018-building @@ -1608,7 +1608,7 @@ Linguistic and Sociolinguistic Annotation of 17th Century <fixed-case>D</fixed-case>utch Letters MarijnSchraagen FeikeDietz - Marjovan Koppen + Marjovan Koppen L18-1184 schraagen-etal-2018-linguistic @@ -1629,7 +1629,7 @@ <fixed-case>ASAP</fixed-case>++: Enriching the <fixed-case>ASAP</fixed-case> Automated Essay Grading Dataset with Essay Attribute Scores SandeepMathias - PushpakBhattacharyya + PushpakBhattacharyya L18-1187 mathias-bhattacharyya-2018-asap @@ -1645,16 +1645,16 @@
The Reference Corpus of the Contemporary <fixed-case>R</fixed-case>omanian Language (<fixed-case>C</fixed-case>o<fixed-case>R</fixed-case>o<fixed-case>L</fixed-case>a) - VerginicaBarbu Mititelu - DanTufiș + VerginicaBarbu Mititelu + DanTufiș ElenaIrimia L18-1189 barbu-mititelu-etal-2018-reference A Corpus of Drug Usage Guidelines Annotated with Type of Advice - Sarah MasudPreum - Md. RizwanParvez + Sarah MasudPreum + Md. RizwanParvez Kai-WeiChang JohnStankovic L18-1190 @@ -1663,14 +1663,14 @@ <fixed-case>B</fixed-case>io<fixed-case>R</fixed-case>o: The Biomedical Corpus for the <fixed-case>R</fixed-case>omanian Language MariaMitrofan - DanTufiş + DanTufiş L18-1191 mitrofan-tufis-2018-bioro A Comparison Of Emotion Annotation Schemes And A New Annotated Data Set IanWood - John P.McCrae + John P.McCrae VladimirAndryushechkin PaulBuitelaar L18-1192 @@ -1681,7 +1681,7 @@ AnkushKhandelwal SahilSwami Syed S.Akhtar - ManishShrivastava + ManishShrivastava L18-1193 khandelwal-etal-2018-humor @@ -1713,7 +1713,7 @@ <fixed-case>W</fixed-case>iki<fixed-case>A</fixed-case>rt Emotions: An Annotated Dataset of Emotions Evoked by Art - SaifMohammad + SaifMohammad SvetlanaKiritchenko L18-1197 mohammad-kiritchenko-2018-wikiart @@ -1731,7 +1731,7 @@ Sentence and Clause Level Emotion Annotation, Detection, and Classification in a Multi-Genre Corpus ShabnamTafreshi - MonaDiab + MonaDiab L18-1199 tafreshi-diab-2018-sentence @@ -1739,7 +1739,7 @@ A <fixed-case>S</fixed-case>wedish Cookie-Theft Corpus DimitriosKokkinakis KristinaLundholm Fors - KathleenFraser + KathleenFraser ArtoNordlund L18-1200 kokkinakis-etal-2018-swedish @@ -1747,7 +1747,7 @@ Sharing Copies of Synthetic Clinical Corpora without Physical Distribution — A Case Study to Get Around <fixed-case>IPR</fixed-case>s and Privacy Constraints Featuring the <fixed-case>G</fixed-case>erman <fixed-case>JSYNCC</fixed-case> Corpus ChristinaLohr - SvenBuechel + SvenBuechel UdoHahn L18-1201 lohr-etal-2018-sharing @@ -1757,14 +1757,14 @@ RichardEckart de Castilho GiuliaDore ThomasMargoni - PennyLabropoulou + PennyLabropoulou IrynaGurevych L18-1202 eckart-de-castilho-etal-2018-legal <fixed-case>LREM</fixed-case>ap, a Song of Resources and Evaluation - RiccardoDel Gratta + RiccardoDel Gratta SaraGoggi GabriellaPardelli NicolettaCalzolari @@ -1773,7 +1773,7 @@ Metadata Collection Records for Language Resources - Henkvan den Heuvel + Henkvan den Heuvel ErwinKomen NellekeOostdijk L18-1204 @@ -1782,7 +1782,7 @@ Managing Public Sector Data for Multilingual Applications Development SteliosPiperidis - PennyLabropoulou + PennyLabropoulou MiltosDeligiannis MariaGiagkou L18-1205 @@ -1790,11 +1790,11 @@ Bridging the <fixed-case>LAPPS</fixed-case> <fixed-case>G</fixed-case>rid and <fixed-case>CLARIN</fixed-case> - ErhardHinrichs - NancyIde - JamesPustejovsky - JanHajič - MarieHinrichs + ErhardHinrichs + NancyIde + JamesPustejovsky + JanHajič + MarieHinrichs Mohammad FazlehElahi KeithSuderman MarcVerhagen @@ -1806,22 +1806,22 @@ Fluid Annotation: A Granularity-aware Annotation Tool for <fixed-case>C</fixed-case>hinese Word Fluidity - Shu-KaiHsieh + Shu-KaiHsieh Yu-HsiangTseng - Chih-YaoLee + Chih-YaoLee Chiung-YuChiang L18-1207 hsieh-etal-2018-fluid <fixed-case>E</fixed-case>-magyar – A Digital Language Processing System - TamásVáradi + TamásVáradi EszterSimon BálintSass IvánMittelholcz AttilaNovák BalázsIndig - RichárdFarkas + RichárdFarkas VeronikaVincze L18-1208 varadi-etal-2018-e @@ -1843,17 +1843,17 @@ <fixed-case>CLARIN</fixed-case>’s Key Resource Families DarjaFišer JakobLenardič - TomažErjavec + TomažErjavec L18-1210 fiser-etal-2018-clarins <fixed-case>I</fixed-case>ndra: A Word Embedding and Semantic Relatedness Server - Juliano EfsonSales + Juliano EfsonSales LeonardoSouza SiamakBarzegar BrianDavis - AndréFreitas + AndréFreitas SiegfriedHandschuh L18-1211 sales-etal-2018-indra @@ -1868,22 +1868,22 @@ <fixed-case>E</fixed-case>uropean Language Resource Coordination: Collecting Language Resources for Public Sector Multilingual Information Management AndreaLösch - ValérieMapelli + ValérieMapelli SteliosPiperidis - AndrejsVasiļjevs + AndrejsVasiļjevs LilliSmal ThierryDeclerck EileenSchnur KhalidChoukri - Josefvan Genabith + Josefvan Genabith L18-1213 losch-etal-2018-european Tilde <fixed-case>MT</fixed-case> Platform for Developing Client Specific <fixed-case>MT</fixed-case> Solutions - MārcisPinnis - AndrejsVasiļjevs - RihardsKalniņš + MārcisPinnis + AndrejsVasiļjevs + RihardsKalniņš RobertsRozis RaivisSkadiņš ValtersŠics @@ -1902,7 +1902,7 @@ Text Normalization Infrastructure that Scales to Hundreds of Language Varieties MasonChua Daanvan Esch - NoahCoccaro + NoahCoccaro EunjoonCho SujeetBhandari LibinJia @@ -1911,7 +1911,7 @@ <fixed-case>D</fixed-case>e<fixed-case>M</fixed-case>odify: A Dataset for Analyzing Contextual Constraints on Modifier Deletion - ViviNastase + ViviNastase DevonFritz AnetteFrank L18-1217 @@ -1934,14 +1934,14 @@ <fixed-case>SPADE</fixed-case>: Evaluation Dataset for Monolingual Phrase Alignment YukiArase - JunichiTsujii + JunichiTsujii L18-1220 arase-tsujii-2018-spade <fixed-case>ETPC</fixed-case> - A Paraphrase Identification Corpus Annotated with Extended Paraphrase Typology and Negation VenelinKovatchev - M. AntòniaMartí + M. AntòniaMartí MariaSalamó L18-1221 kovatchev-etal-2018-etpc @@ -1966,7 +1966,7 @@ Quantifying Qualitative Data for Understanding Controversial Issues MichaelWojatzki - SaifMohammad + SaifMohammad TorstenZesch SvetlanaKiritchenko L18-1224 @@ -1991,17 +1991,17 @@ Creating a Verb Synonym Lexicon Based on a Parallel Corpus - ZdeňkaUrešová - EvaFučíková - EvaHajičová - JanHajič + ZdeňkaUrešová + EvaFučíková + EvaHajičová + JanHajič L18-1227 uresova-etal-2018-creating Evaluation of Domain-specific Word Embeddings using Knowledge Resources FarhadNooralahzadeh - LiljaØvrelid + LiljaØvrelid Jan ToreLønning L18-1228 nooralahzadeh-etal-2018-evaluation @@ -2017,17 +2017,17 @@ Automatic <fixed-case>W</fixed-case>ordnet Mapping: from <fixed-case>C</fixed-case>ore<fixed-case>N</fixed-case>et to <fixed-case>P</fixed-case>rinceton <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et JiseongKim - YounggyunHahm + YounggyunHahm SunggooKwon - Key-SunChoi + Key-SunChoi L18-1230 kim-etal-2018-automatic The New <fixed-case>P</fixed-case>ropbank: Aligning <fixed-case>P</fixed-case>ropbank with <fixed-case>AMR</fixed-case> through <fixed-case>POS</fixed-case> Unification TimO’Gorman - SameerPradhan - MarthaPalmer + SameerPradhan + MarthaPalmer JuliaBonn KatieConger JamesGung @@ -2045,8 +2045,8 @@ The <fixed-case>F</fixed-case>rench-<fixed-case>A</fixed-case>lgerian Code-Switching Triggered audio corpus (<fixed-case>FACST</fixed-case>) AmazouzDjegdjiga - MartineAdda-Decker - LoriLamel + MartineAdda-Decker + LoriLamel L18-1233 djegdjiga-etal-2018-french @@ -2082,7 +2082,7 @@ Evaluating the <fixed-case>W</fixed-case>ords<fixed-case>E</fixed-case>ye Text-to-Scene System: Imaginative and Realistic Sentences MorganUlinski BobCoyne - JuliaHirschberg + JuliaHirschberg L18-1237 ulinski-etal-2018-evaluating @@ -2112,7 +2112,7 @@ <fixed-case>C</fixed-case>-<fixed-case>HTS</fixed-case>: A Concept-based Hierarchical Text Segmentation approach MostafaBayomi - SéamusLawless + SéamusLawless L18-1241 bayomi-lawless-2018-c @@ -2126,7 +2126,7 @@ A Corpus of Metaphor Novelty Scores for Syntactically-Related Word Pairs NatalieParde - RodneyNielsen + RodneyNielsen L18-1243 parde-nielsen-2018-corpus @@ -2135,8 +2135,8 @@ AlexanderPanchenko DmitryUstalov StefanoFaralli - Simone P.Ponzetto - ChrisBiemann + Simone P.Ponzetto + ChrisBiemann L18-1244 panchenko-etal-2018-improving @@ -2144,7 +2144,7 @@ Laying the Groundwork for Knowledge Base Population: Nine Years of Linguistic Resources for <fixed-case>TAC</fixed-case> <fixed-case>KBP</fixed-case> JeremyGetman JoeEllis - StephanieStrassel + StephanieStrassel ZhiyiSong JenniferTracey L18-1245 @@ -2152,7 +2152,7 @@ A Dataset for Inter-Sentence Relation Extraction using Distant Supervision - AngroshMandya + AngroshMandya DanushkaBollegala FransCoenen KatieAtkinson @@ -2164,7 +2164,7 @@ JakubNáplava MilanStraka PavelStraňák - JanHajič + JanHajič L18-1247 naplava-etal-2018-diacritics @@ -2172,7 +2172,7 @@ Ensemble <fixed-case>R</fixed-case>omanian Dependency Parsing with Neural Networks RaduIon ElenaIrimia - VerginicaBarbu Mititelu + VerginicaBarbu Mititelu L18-1248 ion-etal-2018-ensemble @@ -2186,7 +2186,7 @@ Collection of Multimodal Dialog Data and Analysis of the Result of Annotation of Users’ Interest Level - MasahiroAraki + MasahiroAraki SayakaTomimasu MikioNakano KazunoriKomatani @@ -2212,7 +2212,7 @@ Chao-ChunHsu Sheng-YehChen Chuan-ChunKuo - Ting-HaoHuang + Ting-HaoHuang Lun-WeiKu L18-1252 hsu-etal-2018-emotionlines @@ -2258,7 +2258,7 @@ A Corpus of e<fixed-case>R</fixed-case>ulemaking User Comments for Measuring Evaluability of Arguments JoonsukPark - ClaireCardie + ClaireCardie L18-1257 park-cardie-2018-corpus @@ -2275,7 +2275,7 @@ Discourse Coherence Through the Lens of an Annotated Text Corpus: A Case Study - EvaHajičová + EvaHajičová JiříMírovský L18-1259 hajicova-mirovsky-2018-discourse @@ -2302,9 +2302,9 @@ <fixed-case>BDPROTO</fixed-case>: A Database of Phonological Inventories from Ancient and Reconstructed Languages EgidioMarsico - SebastienFlavier + SebastienFlavier AnnemarieVerkerk - StevenMoran + StevenMoran L18-1262 marsico-etal-2018-bdproto @@ -2320,7 +2320,7 @@ Building a Word Segmenter for <fixed-case>S</fixed-case>anskrit Overnight VikasReddy AmrithKrishna - VishnuSharma + VishnuSharma PrateekGupta VineethM R PawanGoyal @@ -2332,19 +2332,19 @@ KiraGriffitt JenniferTracey AnnBies - StephanieStrassel + StephanieStrassel L18-1265 griffitt-etal-2018-simple <fixed-case>A</fixed-case>bstract <fixed-case>M</fixed-case>eaning <fixed-case>R</fixed-case>epresentation of Constructions: The More We Include, the Better the Representation - ClaireBonial + ClaireBonial BiancaBadarau KiraGriffitt UlfHermjakob KevinKnight TimO’Gorman - MarthaPalmer + MarthaPalmer NathanSchneider L18-1266 bonial-etal-2018-abstract @@ -2415,7 +2415,7 @@ <fixed-case>O</fixed-case>pen<fixed-case>S</fixed-case>ubtitles2018: Statistical Rescoring of Sentence Alignments in Large, Noisy Parallel Corpora PierreLison - JörgTiedemann + JörgTiedemann MilenKouylekov L18-1275 lison-etal-2018-opensubtitles2018 @@ -2433,7 +2433,7 @@ <fixed-case>E</fixed-case>uro<fixed-case>G</fixed-case>ames16: Evaluating Change Detection in Online Conversation - CyrilGoutte + CyrilGoutte YunliWang FangmingLiao ZacharyZanussi @@ -2444,9 +2444,9 @@ A Deep Neural Network based Approach for Entity Extraction in Code-Mixed <fixed-case>I</fixed-case>ndian Social Media Text - DeepakGupta + DeepakGupta AsifEkbal - PushpakBhattacharyya + PushpakBhattacharyya L18-1278 gupta-etal-2018-deep @@ -2454,10 +2454,10 @@ <fixed-case>P</fixed-case>o<fixed-case>STWITA</fixed-case>-<fixed-case>UD</fixed-case>: an <fixed-case>I</fixed-case>talian <fixed-case>T</fixed-case>witter Treebank in <fixed-case>U</fixed-case>niversal <fixed-case>D</fixed-case>ependencies ManuelaSanguinetti CristinaBosco - AlbertoLavelli - AlessandroMazzei + AlbertoLavelli + AlessandroMazzei OronzoAntonelli - FabioTamburini + FabioTamburini L18-1279 sanguinetti-etal-2018-postwita @@ -2478,8 +2478,8 @@ Towards an <fixed-case>ISO</fixed-case> Standard for the Annotation of Quantification - HarryBunt - JamesPustejovsky + HarryBunt + JamesPustejovsky KiyongLee L18-1282 bunt-etal-2018-towards @@ -2495,7 +2495,7 @@ A Gold Standard for Multilingual Automatic Term Extraction from Comparable Corpora: Term Structure and Translation Equivalents AylaRigouts Terryn - VéroniqueHoste + VéroniqueHoste ElsLefever L18-1284 rigouts-terryn-etal-2018-gold @@ -2504,7 +2504,7 @@ Handling Big Data and Sensitive Data Using <fixed-case>EUDAT</fixed-case>’s Generic Execution Framework and the <fixed-case>W</fixed-case>eb<fixed-case>L</fixed-case>icht Workflow Engine. ClausZinn WeiQui - MarieHinrichs + MarieHinrichs EmanuelDima AlexandrChernov L18-1285 @@ -2515,8 +2515,8 @@ AlexanderPanchenko EugenRuppert StefanoFaralli - Simone P.Ponzetto - ChrisBiemann + Simone P.Ponzetto + ChrisBiemann L18-1286 panchenko-etal-2018-building @@ -2528,7 +2528,7 @@ YusukeMiyao SumireUematsu ShinsukeMori - YujiMatsumoto + YujiMatsumoto MaiOmura YugoMurawaki L18-1287 @@ -2552,7 +2552,7 @@ Parse Me if You Can: Artificial Treebanks for Parsing Experiments on Elliptical Constructions KiraDroganova - DanielZeman + DanielZeman JennaKanerva FilipGinter L18-1290 @@ -2562,13 +2562,13 @@ Semi-Automatic Construction of Word-Formation Networks (for <fixed-case>P</fixed-case>olish and <fixed-case>S</fixed-case>panish) MateuszLango MagdaŠevčíková - ZdeněkŽabokrtský + ZdeněkŽabokrtský L18-1291 lango-etal-2018-semi A multilingual collection of <fixed-case>C</fixed-case>o<fixed-case>NLL</fixed-case>-<fixed-case>U</fixed-case>-compatible morphological lexicons - BenoîtSagot + BenoîtSagot L18-1292 sagot-2018-multilingual @@ -2581,11 +2581,11 @@ EkaterinaVylomova PatrickXia ManaalFaruqui - Sabrina J.Mielke - AryaMcCarthy - SandraKübler + Sabrina J.Mielke + AryaMcCarthy + SandraKübler DavidYarowsky - JasonEisner + JasonEisner MansHulden L18-1293 @@ -2596,7 +2596,7 @@ A Computational Architecture for the Morphology of <fixed-case>U</fixed-case>pper <fixed-case>T</fixed-case>anana OlgaLovick ChristopherCox - MiikkaSilfverberg + MiikkaSilfverberg AnttiArppe MansHulden L18-1294 @@ -2620,13 +2620,13 @@ Measuring Innovation in Speech and Language Processing Publications. JosephMariani GilFrancopoulo - PatrickParoubek + PatrickParoubek L18-1297 mariani-etal-2018-measuring <fixed-case>PDF</fixed-case>digest: an Adaptable Layout-Aware <fixed-case>PDF</fixed-case>-to-<fixed-case>XML</fixed-case> Textual Content Extractor for Scientific Articles - DanielFerrés + DanielFerrés HoracioSaggion FrancescoRonzano ÀlexBravo @@ -2669,7 +2669,7 @@ XiaominChu FengJiang ShengXu - QiaomingZhu + QiaomingZhu L18-1302 chu-etal-2018-building @@ -2733,14 +2733,14 @@ Extending the gold standard for a lexical substitution task: is it worth it? LudovicTanguy - CécileFabre + CécileFabre LauraRivière L18-1310 tanguy-etal-2018-extending Lexical and Semantic Features for Cross-lingual Text Reuse Classification: an Experiment in <fixed-case>E</fixed-case>nglish and <fixed-case>L</fixed-case>atin Paraphrases - MariaMoritz + MariaMoritz DavidSteding L18-1311 moritz-steding-2018-lexical @@ -2757,7 +2757,7 @@ ZsanettFerenczi IvánMittelholcz EszterSimon - TamásVáradi + TamásVáradi L18-1313 ferenczi-etal-2018-evaluation @@ -2785,7 +2785,7 @@ GengyuWang SeungtaekChoi HyunsoukCho - ReinaldKim Amplayo + ReinaldKim Amplayo Seung-wonHwang L18-1316 yeo-etal-2018-visual @@ -2793,8 +2793,8 @@ Is it worth it? Budget-related evaluation metrics for model selection FilipKlubička - Giancarlo D.Salton - John D.Kelleher + Giancarlo D.Salton + John D.Kelleher L18-1317 klubicka-etal-2018-worth @@ -2802,7 +2802,7 @@ Automated Evaluation of Out-of-Context Errors PatrickHuber JanNiehues - AlexWaibel + AlexWaibel L18-1318 huber-etal-2018-automated @@ -2828,29 +2828,29 @@ <fixed-case>MI</fixed-case>s<fixed-case>A</fixed-case>: Multilingual “<fixed-case>I</fixed-case>s<fixed-case>A</fixed-case>” Extraction from Corpora StefanoFaralli ElsLefever - Simone PaoloPonzetto + Simone PaoloPonzetto L18-1321 faralli-etal-2018-misa
Biomedical term normalization of <fixed-case>EHR</fixed-case>s with <fixed-case>UMLS</fixed-case> - NaiaraPerez-Miguel + NaiaraPerez-Miguel MontseCuadros - GermanRigau + GermanRigau L18-1322 perez-miguel-etal-2018-biomedical Revisiting the Task of Scoring Open <fixed-case>IE</fixed-case> Relations WilliamLéchelle - PhilippeLanglais + PhilippeLanglais L18-1323 lechelle-langlais-2018-revisiting A supervised approach to taxonomy extraction using word embeddings RajdeepSarkar - John P.McCrae + John P.McCrae PaulBuitelaar L18-1324 sarkar-etal-2018-supervised @@ -2872,7 +2872,7 @@ Mining Biomedical Publications With The <fixed-case>LAPPS</fixed-case> <fixed-case>G</fixed-case>rid - NancyIde + NancyIde KeithSuderman Jin-DongKim L18-1327 @@ -2881,15 +2881,15 @@ An Initial Test Collection for Ranked Retrieval of <fixed-case>SMS</fixed-case> Conversations RashmiSankepally - Douglas W.Oard + Douglas W.Oard L18-1328 sankepally-oard-2018-initial <fixed-case>F</fixed-case>r<fixed-case>N</fixed-case>ews<fixed-case>L</fixed-case>ink : a corpus linking <fixed-case>TV</fixed-case> Broadcast News Segments and Press Articles NathalieCamelin - GéraldineDamnati - AbdessalamBouchekif + GéraldineDamnati + AbdessalamBouchekif AnaisLandeau DelphineCharlet YannickEstève @@ -2909,7 +2909,7 @@ LucieSkorkovská PetrNeduchal PavelIrcing - Josef V.Psutka + Josef V.Psutka MarekHrúz AlešPražák DanielSoutner @@ -2935,7 +2935,7 @@ The Effects of Unimodal Representation Choices on Multimodal Learning Fernando TadaoIto - Helenade Medeiros Caseli + Helenade Medeiros Caseli JanderMoreira L18-1334 ito-etal-2018-effects @@ -2943,7 +2943,7 @@ An Evaluation Framework for Multimodal Interaction NikhilKrishnaswamy - JamesPustejovsky + JamesPustejovsky L18-1335 krishnaswamy-pustejovsky-2018-evaluation @@ -2952,7 +2952,7 @@ AhmedAbdelali IrinaTemnikova SamyHedaya - StephanVogel + StephanVogel L18-1336 abdelali-etal-2018-waw @@ -2977,7 +2977,7 @@ CédricFayet ArnaudDelhay DamienLolive - Pierre-FrançoisMarteau + Pierre-FrançoisMarteau L18-1339 fayet-etal-2018-emo @@ -3038,13 +3038,13 @@ JayeolChun Na-RaeHan Jena D.Hwang - Jinho D.Choi + Jinho D.Choi L18-1347 chun-etal-2018-building Moving <fixed-case>TIGER</fixed-case> beyond Sentence-Level - AgnieszkaFalenska + AgnieszkaFalenska KerstinEckart JonasKuhn L18-1348 @@ -3059,9 +3059,9 @@ <fixed-case>U</fixed-case>niversal <fixed-case>D</fixed-case>ependencies for <fixed-case>A</fixed-case>mharic - Binyam EphremSeyoum + Binyam EphremSeyoum YusukeMiyao - Baye YimamMekonnen + Baye YimamMekonnen L18-1350 seyoum-etal-2018-universal @@ -3074,7 +3074,7 @@
Multilingual Dependency Parsing for Low-Resource Languages: Case Studies on North Saami and <fixed-case>K</fixed-case>omi-<fixed-case>Z</fixed-case>yrian - KyungTaeLim + KyungTaeLim NikoPartanen ThierryPoibeau L18-1352 @@ -3104,7 +3104,7 @@ NorikoKawahara MihoSakamoto YoshitakaUchida - YujiMatsumoto + YujiMatsumoto L18-1355 takaoka-etal-2018-sudachi @@ -3115,7 +3115,7 @@ YusukeKoyanagi NorikoIkeda HiroyukiShindo - YujiMatsumoto + YujiMatsumoto L18-1356 tanaka-etal-2018-chemical
@@ -3145,7 +3145,7 @@ JanOdijk AlexisDimitriadis Martijnvan der Klis - Marjovan Koppen + Marjovan Koppen MeieOtten Remcovan der Veen L18-1360 @@ -3153,7 +3153,7 @@
<fixed-case>B</fixed-case>aby<fixed-case>C</fixed-case>loud, a Technological Platform for Parents and Researchers - Xuân-NgaCao + Xuân-NgaCao CyrilleDakhlia PatriciaDel Carmen Mohamed-AmineJaouani @@ -3173,8 +3173,8 @@ Building a <fixed-case>TOCFL</fixed-case> Learner Corpus for <fixed-case>C</fixed-case>hinese Grammatical Error Diagnosis Lung-HaoLee - Yuen-HsienTseng - Li-PingChang + Yuen-HsienTseng + Li-PingChang L18-1363 lee-etal-2018-building @@ -3203,9 +3203,9 @@ Developing New Linguistic Resources and Tools for the <fixed-case>G</fixed-case>alician Language - RodrigoAgerri - XavierGómez Guinovart - GermanRigau + RodrigoAgerri + XavierGómez Guinovart + GermanRigau Miguel AnxoSolla Portela L18-1367 agerri-etal-2018-developing @@ -3215,7 +3215,7 @@ JordanLachler LeneAntonsen TrondTrosterud - SjurMoshagen + SjurMoshagen AnttiArppe L18-1368 lachler-etal-2018-modeling @@ -3225,7 +3225,7 @@ CaitlinRichter MatthewWickes DenizBeser - MitchMarcus + MitchMarcus L18-1369 richter-etal-2018-low @@ -3252,7 +3252,7 @@ RalfGrubenmann DonTuggener Piusvon Däniken - JanDeriu + JanDeriu MarkCieliebak L18-1372 grubenmann-etal-2018-sb @@ -3260,7 +3260,7 @@ <fixed-case>U</fixed-case>niversal <fixed-case>D</fixed-case>ependencies for <fixed-case>A</fixed-case>inu HajimeSenuma - AkikoAizawa + AkikoAizawa L18-1373 senuma-aizawa-2018-universal @@ -3297,16 +3297,16 @@
Evaluating <fixed-case>E</fixed-case>co<fixed-case>L</fixed-case>exi<fixed-case>CAT</fixed-case>: a Terminology-Enhanced <fixed-case>CAT</fixed-case> Tool - PilarLeón-Araúz + PilarLeón-Araúz ArianneReimerink L18-1377 leon-arauz-reimerink-2018-evaluating A <fixed-case>D</fixed-case>anish <fixed-case>F</fixed-case>rame<fixed-case>N</fixed-case>et Lexicon and an Annotated Corpus Used for Training and Evaluating a Semantic Frame Classifier - BolettePedersen + BolettePedersen SanniNimb - AndersSøgaard + AndersSøgaard MareikeHartmann SussiOlsen L18-1378 @@ -3324,7 +3324,7 @@ <fixed-case>P</fixed-case>ronounc<fixed-case>UR</fixed-case>: An <fixed-case>U</fixed-case>rdu Pronunciation Lexicon Generator HarisBin Zia - Agha AliRaza + Agha AliRaza AwaisAthar L18-1380 bin-zia-etal-2018-pronouncur @@ -3332,22 +3332,22 @@ <fixed-case>S</fixed-case>im<fixed-case>L</fixed-case>ex-999 for <fixed-case>P</fixed-case>olish AgnieszkaMykowiecka - MałgorzataMarciniak + MałgorzataMarciniak PiotrRychlik L18-1381 mykowiecka-etal-2018-simlex Finely Tuned, 2 Billion Token Based Word Embeddings for <fixed-case>P</fixed-case>ortuguese - JoãoRodrigues - AntónioBranco + JoãoRodrigues + AntónioBranco L18-1382 rodrigues-branco-2018-finely <fixed-case>T</fixed-case>eanga: A Linked Data based platform for Natural Language Processing HousamZiad - John P.McCrae + John P.McCrae PaulBuitelaar L18-1383 ziad-etal-2018-teanga @@ -3355,7 +3355,7 @@ Automatic and Manual Web Annotations in an Infrastructure to handle Fake News and other Online Media Phenomena GeorgRehm - JulianMoreno-Schneider + JulianMoreno-Schneider PeterBourgonje L18-1384 rehm-etal-2018-automatic @@ -3389,7 +3389,7 @@ A Bird’s-eye View of Language Processing Projects at the <fixed-case>R</fixed-case>omanian Academy - DanTufiș + DanTufiș DanCristea L18-1388 tufis-cristea-2018-birds @@ -3411,8 +3411,8 @@ Collecting Language Resources from Public Administrations in the <fixed-case>N</fixed-case>ordic and <fixed-case>B</fixed-case>altic Countries - AndrejsVasiļjevs - RihardsKalniņš + AndrejsVasiļjevs + RihardsKalniņš RobertsRozis AivarsBērziņš L18-1391 @@ -3421,7 +3421,7 @@ <fixed-case>LI</fixed-case>dioms: A Multilingual Linked Idioms Data Set DiegoMoussallem - Mohamed AhmedSherif + Mohamed AhmedSherif DiegoEsteves MarcosZampieri Axel-CyrilleNgonga Ngomo @@ -3440,7 +3440,7 @@ Annotating <fixed-case>C</fixed-case>hinese Light Verb Constructions according to <fixed-case>PARSEME</fixed-case> guidelines MenghanJiang - NataliaKlyueva + NataliaKlyueva HongzhiXu Chu-RenHuang L18-1394 @@ -3450,7 +3450,7 @@ Using <fixed-case>E</fixed-case>nglish Baits to Catch <fixed-case>S</fixed-case>erbian Multi-Word Terminology CvetanaKrstev BranislavaŠandrih - RankaStanković + RankaStanković MiljanaMladenović L18-1395 krstev-etal-2018-using @@ -3459,29 +3459,29 @@ Construction of Large-scale <fixed-case>E</fixed-case>nglish Verbal Multiword Expression Annotated Corpus AkihikoKato HiroyukiShindo - YujiMatsumoto + YujiMatsumoto L18-1396 kato-etal-2018-construction <fixed-case>K</fixed-case>onbitzul: an <fixed-case>MWE</fixed-case>-specific database for <fixed-case>S</fixed-case>panish-<fixed-case>B</fixed-case>asque UxoaIñurrieta - ItziarAduriz - ArantzaDíaz de Ilarraza - GorkaLabaka - KepaSarasola + ItziarAduriz + ArantzaDíaz de Ilarraza + GorkaLabaka + KepaSarasola L18-1397 inurrieta-etal-2018-konbitzul A Multilingual Test Collection for the Semantic Search of Entity Categories - Juliano EfsonSales + Juliano EfsonSales SiamakBarzegar WellingtonFranco BernhardBermeitinger TiagoCunha BrianDavis - AndréFreitas + AndréFreitas SiegfriedHandschuh L18-1398 sales-etal-2018-multilingual @@ -3489,7 +3489,7 @@ Towards the Inference of Semantic Relations in Complex Nominals: a Pilot Study MelaniaCabezas-García - PilarLeón-Araúz + PilarLeón-Araúz L18-1399 cabezas-garcia-leon-arauz-2018-towards @@ -3503,8 +3503,8 @@ Improving a Neural-based Tagger for Multiword Expressions Identification - DušanVariš - NataliaKlyueva + DušanVariš + NataliaKlyueva L18-1401 varis-klyueva-2018-improving @@ -3526,7 +3526,7 @@ Improving Hate Speech Detection with Deep Learning Ensembles StevenZimmerman - UdoKruschwitz + UdoKruschwitz ChrisFox L18-1404 zimmerman-etal-2018-improving @@ -3540,7 +3540,7 @@ Can Domain Adaptation be Handled as Analogies? - NúriaBel + NúriaBel JoelPocostales L18-1406 bel-pocostales-2018-domain @@ -3549,13 +3549,13 @@ Author Profiling from <fixed-case>F</fixed-case>acebook Corpora FernandoHsieh RafaelDias - IvandréParaboni + IvandréParaboni L18-1407 hsieh-etal-2018-author Semantic Relatedness of <fixed-case>W</fixed-case>ikipedia Concepts – Benchmark Data and a Working Solution - LiatEin Dor + LiatEin Dor AlonHalfon YoavKantor RanLevy @@ -3586,10 +3586,10 @@ Finite-state morphological analysis for <fixed-case>G</fixed-case>agauz - FrancisTyers + FrancisTyers SevilayBayatli GüllüKaranfil - MemduhGökırmak + MemduhGökırmak Francis M.Tyers L18-1411 tyers-etal-2018-finite @@ -3604,7 +3604,7 @@ Morphology Injection for <fixed-case>E</fixed-case>nglish-<fixed-case>M</fixed-case>alayalam Statistical Machine Translation SreelekhaS - PushpakBhattacharyya + PushpakBhattacharyya L18-1413 s-bhattacharyya-2018-morphology @@ -3650,21 +3650,21 @@ <fixed-case>EMTC</fixed-case>: Multilabel Corpus in Movie Domain for Emotion Analysis in Conversational Text Phan Duc-Anh - YujiMatsumoto + YujiMatsumoto L18-1418 phan-matsumoto-2018-emtc Complex and Precise Movie and Book Annotations in <fixed-case>F</fixed-case>rench Language for Aspect Based Sentiment Analysis StefaniaPecore - JeanneVillaneau + JeanneVillaneau L18-1419 pecore-villaneau-2018-complex <fixed-case>L</fixed-case>ingmotif-lex: a Wide-coverage, State-of-the-art Lexicon for Sentiment Analysis - AntonioMoreno-Ortiz - ChantalPérez-Hernández + AntonioMoreno-Ortiz + ChantalPérez-Hernández L18-1420 moreno-ortiz-perez-hernandez-2018-lingmotif @@ -3688,7 +3688,7 @@ The <fixed-case>SSIX</fixed-case> Corpora: Three Gold Standard Corpora for Sentiment Analysis in <fixed-case>E</fixed-case>nglish, <fixed-case>S</fixed-case>panish and <fixed-case>G</fixed-case>erman Financial Microblogs ThomasGaillat ManelZarrouk - AndréFreitas + AndréFreitas BrianDavis L18-1423 gaillat-etal-2018-ssix @@ -3697,8 +3697,8 @@ Sarcasm Target Identification: Dataset and An Introductory Approach AdityaJoshi PranavGoel - PushpakBhattacharyya - MarkCarman + PushpakBhattacharyya + MarkCarman L18-1424 joshi-etal-2018-sarcasm @@ -3725,7 +3725,7 @@ <fixed-case>W</fixed-case>ord<fixed-case>K</fixed-case>it: a Python Package for Orthographic and Phonological Featurization StéphanTulkens DominiekSandra - WalterDaelemans + WalterDaelemans L18-1427 tulkens-etal-2018-wordkit @@ -3734,13 +3734,13 @@ DavidLukeš MarieKopřivová ZuzanaKomrsková - PetraPoukarová + PetraPoukarová L18-1428 lukes-etal-2018-pronunciation <fixed-case>E</fixed-case>pitran: Precision <fixed-case>G</fixed-case>2<fixed-case>P</fixed-case> for Many Languages - David R.Mortensen + David R.Mortensen SiddharthDalmia PatrickLittell L18-1429 @@ -3750,7 +3750,7 @@ A Multilingual Approach to Question Classification Aikaterini-LidaKalouli KatharinaKaiser - AnnetteHautli-Janisz + AnnetteHautli-Janisz Georg A.Kaiser MiriamButt L18-1430 @@ -3777,7 +3777,7 @@ <fixed-case>W</fixed-case>orld<fixed-case>T</fixed-case>ree: A Corpus of Explanation Graphs for Elementary Science Questions supporting Multi-hop Inference - PeterJansen + PeterJansen ElizabethWainwright StevenMarmorstein ClaytonMorrison @@ -3786,7 +3786,7 @@ Analysis of Implicit Conditions in Database Search Dialogues - Shun-yaFukunaga + Shun-yaFukunaga HitoshiNishikawa TakenobuTokunaga HikaruYokono @@ -3825,7 +3825,7 @@ AndreiDulceanu ThangLe Dinh WalterChang - TrungBui + TrungBui Doo SoonKim Manh ChienVu SeokhwanKim @@ -3836,23 +3836,23 @@ <fixed-case>B</fixed-case>io<fixed-case>R</fixed-case>ead: A New Dataset for Biomedical Reading Comprehension DimitrisPappas IonAndroutsopoulos - HarisPapageorgiou + HarisPapageorgiou L18-1439 pappas-etal-2018-bioread <fixed-case>MMQA</fixed-case>: A Multi-domain Multi-lingual Question-Answering Framework for <fixed-case>E</fixed-case>nglish and <fixed-case>H</fixed-case>indi - DeepakGupta + DeepakGupta SurabhiKumari AsifEkbal - PushpakBhattacharyya + PushpakBhattacharyya L18-1440 gupta-etal-2018-mmqa The First 100 Days: A Corpus Of Political Agendas on <fixed-case>T</fixed-case>witter NathanGreen - SeptinaLarasati + SeptinaLarasati L18-1441 green-larasati-2018-first @@ -3861,7 +3861,7 @@ ShwetaYadav AsifEkbal SriparnaSaha - PushpakBhattacharyya + PushpakBhattacharyya L18-1442 yadav-etal-2018-medical @@ -3871,7 +3871,7 @@ FabioPoletto CristinaBosco VivianaPatti - MarcoStranisci + MarcoStranisci L18-1443 sanguinetti-etal-2018-italian @@ -3879,7 +3879,7 @@ A Large Multilingual and Multi-domain Dataset for Recommender Systems GiorgiaDi Tommaso StefanoFaralli - PaolaVelardi + PaolaVelardi L18-1444 di-tommaso-etal-2018-large @@ -3888,7 +3888,7 @@ RobVoigt DavidJurgens VinodkumarPrabhakaran - DanJurafsky + DanJurafsky YuliaTsvetkov L18-1445 voigt-etal-2018-rtgender @@ -3903,8 +3903,8 @@ Utilizing Large <fixed-case>T</fixed-case>witter Corpora to Create Sentiment Lexica ValerijFredriksen - BrageJahren - BjörnGambäck + BrageJahren + BjörnGambäck L18-1447 fredriksen-etal-2018-utilizing @@ -3968,7 +3968,7 @@ Discovering Canonical <fixed-case>I</fixed-case>ndian <fixed-case>E</fixed-case>nglish Accents: A Crowdsourcing-based Approach SunayanaSitaram - VarunManjunath + VarunManjunath VarunBharadwaj MonojitChoudhury KalikaBali @@ -3983,7 +3983,7 @@ TomokoKajiyama ShunsukeKozawa KiyotakaUchimoto - ShuichiItahashi + ShuichiItahashi L18-1456 ohsuga-etal-2018-extending @@ -3992,9 +3992,9 @@ KatrinSchweitzer KerstinEckart MarkusGärtner - AgnieszkaFalenska + AgnieszkaFalenska ArndtRiester - InaRösiger + InaRösiger AntjeSchweitzer SabrinaStehwien JonasKuhn @@ -4051,7 +4051,7 @@ EmmanuelJohnson AntonLeuski GaleLucas - DavidTraum + DavidTraum L18-1463 artstein-etal-2018-niki
@@ -4082,7 +4082,7 @@ A Semi-autonomous System for Creating a Human-Machine Interaction Corpus in Virtual Reality: Application to the <fixed-case>ACORFORM</fixed-case>ed System for Training Doctors to Break Bad News MagalieOchs PhilippeBlache - Grégoirede Montcheuil + Grégoirede Montcheuil Jean-MariePergandi JoraneSaubesty DanielFrancon @@ -4095,7 +4095,7 @@ SashiNovitasari Quoc TruongDo SakrianiSakti - DessiLestari + DessiLestari SatoshiNakamura L18-1468 novitasari-etal-2018-construction @@ -4114,7 +4114,7 @@ <fixed-case>TF</fixed-case>-<fixed-case>LM</fixed-case>: <fixed-case>T</fixed-case>ensor<fixed-case>F</fixed-case>low-based Language Modeling Toolkit LyanVerwimp - HugoVan hamme + HugoVan hamme PatrickWambacq L18-1470 verwimp-etal-2018-tf @@ -4145,28 +4145,28 @@ Reference production in human-computer interaction: Issues for Corpus-based Referring Expression Generation DanilloRocha - IvandréParaboni + IvandréParaboni L18-1474 rocha-paraboni-2018-reference Definite Description Lexical Choice: taking Speaker’s Personality into account AlexLan - IvandréParaboni + IvandréParaboni L18-1475 lan-paraboni-2018-definite Referring Expression Generation in time-constrained communication - AndréMariotti - IvandréParaboni + AndréMariotti + IvandréParaboni L18-1476 mariotti-paraboni-2018-referring Incorporating Semantic Attention in Video Description Generation NatsudaLaokulrat - NaoakiOkazaki + NaoakiOkazaki HidekiNakayama L18-1477 laokulrat-etal-2018-incorporating @@ -4183,7 +4183,7 @@ A Detailed Evaluation of Neural Sequence-to-Sequence Models for In-domain and Cross-domain Text Simplification - SanjaŠtajner + SanjaŠtajner SergiuNisioi L18-1479 stajner-nisioi-2018-detailed @@ -4193,17 +4193,17 @@ PiekVossen FilipIlievski MartenPostma - RoxaneSegers + RoxaneSegers L18-1480 vossen-etal-2018-dont <fixed-case>RDF</fixed-case>2<fixed-case>PT</fixed-case>: Generating <fixed-case>B</fixed-case>razilian <fixed-case>P</fixed-case>ortuguese Texts from <fixed-case>RDF</fixed-case> Data DiegoMoussallem - ThiagoFerreira + ThiagoFerreira MarcosZampieri Maria ClaudiaCavalcanti - GeraldoXexéo + GeraldoXexéo MarianaNeves Axel-CyrilleNgonga Ngomo L18-1481 @@ -4219,7 +4219,7 @@ Up-cycling Data for Natural Language Generation AmyIsard - JonOberlander + JonOberlander ClaireGrover L18-1483 isard-etal-2018-cycling @@ -4242,14 +4242,14 @@ Annotating <fixed-case>A</fixed-case>bstract <fixed-case>M</fixed-case>eaning <fixed-case>R</fixed-case>epresentations for <fixed-case>S</fixed-case>panish NoeliaMigueles-Abraira - RodrigoAgerri - ArantzaDiaz de Ilarraza + RodrigoAgerri + ArantzaDiaz de Ilarraza L18-1486 migueles-abraira-etal-2018-annotating Browsing the Terminological Structure of a Specialized Domain: A Method Based on Lexical Functions and their Classification - Marie-ClaudeL’Homme + Marie-ClaudeL’Homme BenoîtRobichaud NathaliePrévil L18-1487 @@ -4257,7 +4257,7 @@ Rollenwechsel-<fixed-case>E</fixed-case>nglish: a large-scale semantic role corpus - AsadSayeed + AsadSayeed PavelShkadzko VeraDemberg L18-1488 @@ -4267,8 +4267,8 @@ Towards a Standardized Dataset for Noun Compound Interpretation GirishkumarPonkiya KevinPatel - PushpakBhattacharyya - Girish KPalshikar + PushpakBhattacharyya + Girish KPalshikar L18-1489 ponkiya-etal-2018-towards @@ -4281,9 +4281,9 @@ <fixed-case>NL</fixed-case>2<fixed-case>B</fixed-case>ash: A Corpus and Semantic Parser for Natural Language Interface to the Linux Operating System - Xi VictoriaLin + Xi VictoriaLin ChenglongWang - LukeZettlemoyer + LukeZettlemoyer Michael D.Ernst L18-1491 lin-etal-2018-nl2bash @@ -4293,14 +4293,14 @@ CharlesWelch Jonathan K.Kummerfeld SongFeng - RadaMihalcea + RadaMihalcea L18-1492 welch-etal-2018-world Improved Transcription and Indexing of Oral History Interviews for Digital Humanities Research MichaelGref - JoachimKöhler + JoachimKöhler AlmutLeh L18-1493 gref-etal-2018-improved @@ -4368,7 +4368,7 @@ Creating New Language and Voice Components for the Updated <fixed-case>M</fixed-case>ary<fixed-case>TTS</fixed-case> Text-to-Speech Synthesis Platform IngmarSteiner - SébastienLe Maguer + SébastienLe Maguer L18-1501 steiner-le-maguer-2018-creating @@ -4394,7 +4394,7 @@ A New Annotated <fixed-case>P</fixed-case>ortuguese/<fixed-case>S</fixed-case>panish Corpus for the Multi-Sentence Compression Task ElvysLinhares Pontes - Juan-ManuelTorres-Moreno + Juan-ManuelTorres-Moreno StéphaneHuet Andréa CarneiroLinhares L18-1504 @@ -4412,8 +4412,8 @@ <fixed-case>TS</fixed-case>ix: A Human-involved-creation Dataset for Tweet Summarization Minh-TienNguyen Dac VietLai - Huy-TienNguyen - Le-MinhNguyen + Huy-TienNguyen + Le-MinhNguyen L18-1506 nguyen-etal-2018-tsix @@ -4450,7 +4450,7 @@ <fixed-case>P</fixed-case>yr<fixed-case>E</fixed-case>val: An Automated Method for Summary Content Analysis YanjunGao AndrewWarner - RebeccaPassonneau + RebeccaPassonneau L18-1511 gao-etal-2018-pyreval @@ -4465,10 +4465,10 @@ Semantic Equivalence Detection: Are Interrogatives Harder than Declaratives? - JoãoRodrigues + JoãoRodrigues ChakavehSaedi - AntónioBranco - JoãoSilva + AntónioBranco + JoãoSilva L18-1513 rodrigues-etal-2018-semantic @@ -4484,18 +4484,18 @@ <fixed-case>CLARIN</fixed-case>: Towards <fixed-case>FAIR</fixed-case> and Responsible Data Science Using Language Resources Franciskade Jong BenteMaegaard - KoenraadDe Smedt + KoenraadDe Smedt DarjaFišer - DieterVan Uytvanck + DieterVan Uytvanck L18-1515 de-jong-etal-2018-clarin From ‘Solved Problems’ to New Challenges: A Report on <fixed-case>LDC</fixed-case> Activities ChristopherCieri - MarkLiberman - StephanieStrassel - DeniseDiPersio + MarkLiberman + StephanieStrassel + DeniseDiPersio JonathanWright AndreaMazzucchi L18-1516 @@ -4503,7 +4503,7 @@ New directions in <fixed-case>ELRA</fixed-case> activities - ValérieMapelli + ValérieMapelli VictoriaArranz HélèneMazo PawelKamocki @@ -4540,7 +4540,7 @@ ElsLefever IrisHendrickx IljaCroijmans - Antalvan den Bosch + Antalvan den Bosch AsifaMajid L18-1521 lefever-etal-2018-discovering @@ -4580,8 +4580,8 @@ Reuben AFarrugia ClaudiaBorg Kenneth PCamilleri - MichaelRosner - Lonnekevan der Plas + MichaelRosner + Lonnekevan der Plas L18-1525 gatt-etal-2018-face2text @@ -4600,26 +4600,26 @@ JulieGlikman MathieuAvanzi ChristopheBenzitoun - PhilippeBoula de Mareüil + PhilippeBoula de Mareüil L18-1527 goldman-etal-2018-crowdsourcing
Improving Machine Translation of Educational Content via Crowdsourcing MaximilianaBehnke - Antonio ValerioMiceli Barone + Antonio ValerioMiceli Barone RicoSennrich VilelminiSosoni ThanasisNaskos EiriniTakoulidou MariaStasimioti - Mennovan Zaanen + Mennovan Zaanen SheilaCastilho FedericoGaspari PanayotaGeorgakopoulou ValiaKordoni MarkusEgg - Katia LidaKermanidis + Katia LidaKermanidis L18-1528 behnke-etal-2018-improving @@ -4636,7 +4636,7 @@ Evaluation Phonemic Transcription of Low-Resource Tonal Languages for Language Documentation OliverAdams - TrevorCohn + TrevorCohn GrahamNeubig HilariaCruz StevenBird @@ -4647,17 +4647,17 @@ A Very Low Resource Language Speech Corpus for Computational Language Documentation Experiments PierreGodard - GillesAdda - MartineAdda-Decker + GillesAdda + MartineAdda-Decker JuanBenjumea - LaurentBesacier + LaurentBesacier JamisonCooper-Leavitt - Guy-NoelKouarata - LoriLamel - HélèneMaynard + Guy-NoelKouarata + LoriLamel + HélèneMaynard MarkusMueller AnnieRialland - SebastianStueker + SebastianStueker FrançoisYvon MarcelyZanon-Boito L18-1531 @@ -4677,15 +4677,15 @@ Emmanuel-MosellyMakasso MarkusMüller JonasEngelmann - GillesAdda - AlexWaibel - SebastianStüker + GillesAdda + AlexWaibel + SebastianStüker L18-1533 hamlaoui-etal-2018-bulbasaa Researching Less-Resourced Languages – the <fixed-case>D</fixed-case>igi<fixed-case>S</fixed-case>ami Corpus - KristiinaJokinen + KristiinaJokinen L18-1534 jokinen-2018-researching @@ -4695,7 +4695,7 @@ NizarHabash MohammadSalameh WajdiZaghouani - OwenRambow + OwenRambow DanaAbdulrahim OssamaObeid SalamKhalifa @@ -4716,7 +4716,7 @@ Constructing a Lexicon of Relational Nouns EdwardNewell - Jackie C.K.Cheung + Jackie C.K.Cheung L18-1537 newell-cheung-2018-constructing @@ -4730,7 +4730,7 @@ Lexical Profiling of Environmental Corpora PatrickDrouin - Marie-ClaudeL’Homme + Marie-ClaudeL’Homme BenoîtRobichaud L18-1539 drouin-etal-2018-lexical @@ -4753,7 +4753,7 @@ Building a Knowledge Graph from Natural Language Definitions for Interpretable Text Entailment Recognition VivianSilva - AndréFreitas + AndréFreitas SiegfriedHandschuh L18-1542 silva-etal-2018-building @@ -4762,8 +4762,8 @@ Combining rule-based and embedding-based approaches to normalize textual entities with an ontology ArnaudFerré LouiseDeléger - PierreZweigenbaum - ClaireNédellec + PierreZweigenbaum + ClaireNédellec L18-1543 ferre-etal-2018-combining @@ -4774,7 +4774,7 @@ ArslenRemaci ChristopheGravier JonathonHare - FrederiqueLaforest + FrederiqueLaforest ElenaSimperl L18-1544 elsahar-etal-2018-rex @@ -4782,7 +4782,7 @@ Multilingual Parallel Corpus for Global Communication Plan KenjiImamura - EiichiroSumita + EiichiroSumita L18-1545 imamura-sumita-2018-multilingual @@ -4798,7 +4798,7 @@ <fixed-case>N</fixed-case>eg<fixed-case>P</fixed-case>ar: A parallel corpus annotated for negation QianchuLiu FedericoFancellu - BonnieWebber + BonnieWebber L18-1547 liu-etal-2018-negpar @@ -4806,7 +4806,7 @@ The <fixed-case>IIT</fixed-case> <fixed-case>B</fixed-case>ombay <fixed-case>E</fixed-case>nglish-<fixed-case>H</fixed-case>indi Parallel Corpus AnoopKunchukuttan PratikMehta - PushpakBhattacharyya + PushpakBhattacharyya L18-1548 kunchukuttan-etal-2018-iit @@ -4820,11 +4820,11 @@
Learning Word Vectors for 157 Languages - EdouardGrave + EdouardGrave PiotrBojanowski PrakharGupta ArmandJoulin - TomasMikolov + TomasMikolov L18-1550 grave-etal-2018-learning @@ -4833,9 +4833,9 @@ MilanStraka NikitaMediankin TomKocmi - ZdeněkŽabokrtský + ZdeněkŽabokrtský VojtěchHudeček - JanHajič + JanHajič L18-1551 straka-etal-2018-sumeczech
@@ -4848,8 +4848,8 @@
Text Simplification from Professionally Produced Corpora - CarolinaScarton - GustavoPaetzold + CarolinaScarton + GustavoPaetzold LuciaSpecia L18-1553 scarton-etal-2018-text @@ -4859,7 +4859,7 @@ JackyVisser RoryDuthie JohnLawrence - ChrisReed + ChrisReed L18-1554 visser-etal-2018-intertextual @@ -4880,12 +4880,12 @@
Building Named Entity Recognition Taggers via Parallel Corpora - RodrigoAgerri + RodrigoAgerri YilingChung ItziarAldabe NoraAranberri - GorkaLabaka - GermanRigau + GorkaLabaka + GermanRigau L18-1557 agerri-etal-2018-building @@ -4895,8 +4895,8 @@ AnnBies JustinMott XuansongLi - StephanieStrassel - ChristopherCaruso + StephanieStrassel + ChristopherCaruso L18-1558 song-etal-2018-cross
@@ -4906,7 +4906,7 @@ AmitraSalam SwatiTiwari AsifEkbal - PushpakBhattacharyya + PushpakBhattacharyya L18-1559 ghosal-etal-2018-tap
@@ -4928,14 +4928,14 @@ Annotating Educational Questions for Student Response Analysis AndreeaGodea - RodneyNielsen + RodneyNielsen L18-1562 godea-nielsen-2018-annotating Incorporating Global Contexts into Sentence Embedding for Relational Extraction at the Paragraph Level with Distant Supervision - Eun-kyungKim - Key-SunChoi + Eun-kyungKim + Key-SunChoi L18-1563 kim-choi-2018-incorporating @@ -4963,7 +4963,7 @@ Revisiting Distant Supervision for Relation Extraction TingsongJiang JingLiu - Chin-YewLin + Chin-YewLin ZhifangSui L18-1566 jiang-etal-2018-revisiting @@ -4986,7 +4986,7 @@ Comparison of Pun Detection Methods Using <fixed-case>J</fixed-case>apanese Pun Corpus MotokiYatsu - KenjiAraki + KenjiAraki L18-1569 yatsu-araki-2018-comparison @@ -5011,7 +5011,7 @@ Roelandvan Hout Nicolinevan der Sijs ErwinKomen - Henkvan den Heuvel + Henkvan den Heuvel L18-1572 van-hout-etal-2018-fast
@@ -5028,7 +5028,7 @@ NizarHabash FadhlEryani SalamKhalifa - OwenRambow + OwenRambow DanaAbdulrahim AlexanderErdmann ReemFaraj @@ -5047,9 +5047,9 @@
Automatic Identification of Maghreb Dialects Using a Dictionary-Based Approach - HoudaSaâdane + HoudaSaâdane HosniSeffih - ChristianFluhr + ChristianFluhr KhalidChoukri NasredineSemmar L18-1575 @@ -5104,8 +5104,8 @@ Automating Document Discovery in the Systematic Review Process: How to Use Chaff to Extract Wheat ChristopherNorman MariskaLeeflang - PierreZweigenbaum - AurélieNévéol + PierreZweigenbaum + AurélieNévéol L18-1582 norman-etal-2018-automating @@ -5113,7 +5113,7 @@ Two Multilingual Corpora Extracted from the Tenders Electronic Daily for Machine Learning and Machine Translation Applications. OussamaAhmia NicolasBéchet - Pierre-FrançoisMarteau + Pierre-FrançoisMarteau L18-1583 ahmia-etal-2018-two
@@ -5121,14 +5121,14 @@ Using Adversarial Examples in Natural Language Processing PetrBělohlávek OndřejPlátek - ZdeněkŽabokrtský + ZdeněkŽabokrtský MilanStraka L18-1584 belohlavek-etal-2018-using
Modeling Trolling in Social Media Conversations - Luis GerardoMojica de la Vega + Luis GerardoMojica de la Vega VincentNg L18-1585 mojica-de-la-vega-ng-2018-modeling @@ -5136,7 +5136,7 @@ Automatic Annotation of Semantic Term Types in the Complete <fixed-case>ACL</fixed-case> <fixed-case>A</fixed-case>nthology Reference Corpus Anne-KathrinSchumann - HéctorMartínez Alonso + HéctorMartínez Alonso L18-1586 schumann-martinez-alonso-2018-automatic @@ -5179,7 +5179,7 @@ ChristyLi YuchenWang KennethResnicow - RadaMihalcea + RadaMihalcea L18-1591 perez-rosas-etal-2018-analyzing @@ -5194,7 +5194,7 @@ Text Mining for History: first steps on building a large dataset SuemiHiguchi - CláudiaFreitas + CláudiaFreitas BrunoCuconato AlexandreRademaker L18-1593 @@ -5212,8 +5212,8 @@ Training and Adapting Multilingual <fixed-case>NMT</fixed-case> for Less-resourced and Morphologically Rich Languages MatīssRikters - MārcisPinnis - RihardsKrišlauks + MārcisPinnis + RihardsKrišlauks L18-1595 rikters-etal-2018-training @@ -5229,8 +5229,8 @@ Machine Translation of Low-Resource Spoken Dialects: Strategies for Normalizing <fixed-case>S</fixed-case>wiss <fixed-case>G</fixed-case>erman Pierre-EdouardHonnet - AndreiPopescu-Belis - ClaudiuMusat + AndreiPopescu-Belis + ClaudiuMusat MichaelBaeriswyl L18-1597 honnet-etal-2018-machine @@ -5294,7 +5294,7 @@ A Multilingual Dataset for Evaluating Parallel Sentence Extraction from Comparable Corpora - PierreZweigenbaum + PierreZweigenbaum SergeSharoff ReinhardRapp L18-1605 @@ -5322,11 +5322,11 @@ <fixed-case>C</fixed-case>o<fixed-case>NLL</fixed-case>-<fixed-case>UL</fixed-case>: Universal Morphological Lattices for <fixed-case>U</fixed-case>niversal <fixed-case>D</fixed-case>ependency Parsing AmirMore - ÖzlemÇetinoğlu + ÖzlemÇetinoğlu ÇağrıÇöltekin NizarHabash - BenoîtSagot - DjaméSeddah + BenoîtSagot + DjaméSeddah DimaTaji ReutTsarfaty L18-1608 @@ -5353,13 +5353,13 @@ Parser combinators for <fixed-case>T</fixed-case>igrinya and <fixed-case>O</fixed-case>romo morphology PatrickLittell - TomMcCoy + TomMcCoy Na-RaeHan ShrutiRijhwani ZaidSheikh - DavidMortensen + DavidMortensen TerukoMitamura - LoriLevin + LoriLevin L18-1611 littell-etal-2018-parser @@ -5379,17 +5379,17 @@ Baselines and Test Data for Cross-Lingual Inference - ŽeljkoAgić + ŽeljkoAgić NatalieSchluter L18-1614 agic-schluter-2018-baselines <fixed-case>CATS</fixed-case>: A Tool for Customized Alignment of Text Simplification Corpora - SanjaŠtajner + SanjaŠtajner MarcFranco-Salvador PaoloRosso - Simone PaoloPonzetto + Simone PaoloPonzetto L18-1615 stajner-etal-2018-cats @@ -5398,8 +5398,8 @@ Thanh-LeHa JanNiehues MatthiasSperber - Ngoc QuanPham - AlexanderWaibel + Ngoc QuanPham + AlexanderWaibel L18-1616 ha-etal-2018-kit @@ -5422,7 +5422,7 @@ BrianDavis ManelZarrouk SiegfriedHandschuh - AndreFreitas + AndreFreitas L18-1618 barzegar-etal-2018-semr @@ -5440,7 +5440,7 @@ DominiqueHuck ChristopheRey PhilippeReynés - SophieRosset + SophieRosset JeanSibille ThomasLavergne L18-1619 @@ -5459,7 +5459,7 @@ Web-based Annotation Tool for Inflectional Language Resources AbdulrahmanAlosaimy - EricAtwell + EricAtwell L18-1621 alosaimy-atwell-2018-web @@ -5525,7 +5525,7 @@
Exploring Conversational Language Generation for Rich Content about Hotels - MarilynWalker + MarilynWalker AlbrySmither ShereenOraby VrindavanHarrison @@ -5536,7 +5536,7 @@ Identification of Personal Information Shared in Chat-Oriented Dialogue SarahFillwock - DavidTraum + DavidTraum L18-1629 fillwock-traum-2018-identification @@ -5551,13 +5551,13 @@ Annotating Reflections for Health Behavior Change Therapy NishithaGuntakandla - RodneyNielsen + RodneyNielsen L18-1631 guntakandla-nielsen-2018-annotating Annotating Attribution Relations in <fixed-case>A</fixed-case>rabic - AmalAlsaif + AmalAlsaif TasniemAlyahya MadawiAlotaibi HudaAlmuzaini @@ -5572,7 +5572,7 @@ BrendanSpillane MariaO’Reilly KetongSu - ArturoCalvo + ArturoCalvo LoredanaCerrato KillianLevacher NickCampbell @@ -5634,7 +5634,7 @@ MilagroTeruel CristianCardellino FernandoCardellino - LauraAlonso Alemany + LauraAlonso Alemany SerenaVillata L18-1640 teruel-etal-2018-increasing @@ -5679,7 +5679,7 @@ Cross-linguistically Small World Networks are Ubiquitous in Child-directed Speech - StevenMoran + StevenMoran DanicaPajović SabineStoll L18-1646 @@ -5688,7 +5688,7 @@ <fixed-case>L</fixed-case>1-<fixed-case>L</fixed-case>2 Parallel Treebank of Learner <fixed-case>C</fixed-case>hinese: Overused and Underused Syntactic Structures KeyingLi - JohnLee + JohnLee L18-1647 li-lee-2018-l1 @@ -5696,13 +5696,13 @@ The Use of Text Alignment in Semi-Automatic Error Analysis: Use Case in the Development of the Corpus of the <fixed-case>L</fixed-case>atvian Language Learners RobertsDarģis IlzeAuziņa - KristīneLevāne-Petrova + KristīneLevāne-Petrova L18-1648 dargis-etal-2018-use Error annotation in a Learner Corpus of <fixed-case>P</fixed-case>ortuguese - Iriadel Río + Iriadel Río AmáliaMendes L18-1649 del-rio-mendes-2018-error @@ -5711,7 +5711,7 @@ An <fixed-case>SLA</fixed-case> Corpus Annotated with Pedagogically Relevant Grammatical Structures LeonardoZilio RodrigoWilkens - CédrickFairon + CédrickFairon L18-1650 zilio-etal-2018-sla @@ -5719,13 +5719,13 @@ Portable Spelling Corrector for a Less-Resourced Language: <fixed-case>A</fixed-case>mharic Andargachew MekonnenGezmu AndreasNürnberger - Binyam EphremSeyoum + Binyam EphremSeyoum L18-1651 gezmu-etal-2018-portable A Speaking Atlas of the Regional Languages of <fixed-case>F</fixed-case>rance - PhilippeBoula de Mareüil + PhilippeBoula de Mareüil AlbertRilliard FrédéricVernier L18-1652 @@ -5750,8 +5750,8 @@ <fixed-case>C</fixed-case>h<fixed-case>A</fixed-case>not: An Intelligent Annotation Tool for Indigenous and Highly Agglutinative Languages in <fixed-case>P</fixed-case>eru RodolfoMercado-Gonzales JoséPereira-Noriega - MarcoSobrevilla - ArturoOncevay + MarcoSobrevilla + ArturoOncevay L18-1655 mercado-gonzales-etal-2018-chanot @@ -5766,28 +5766,28 @@ <fixed-case>ASR</fixed-case> for Documenting Acutely Under-Resourced Indigenous Languages RobbieJimerson - EmilyPrud’hommeaux + EmilyPrud’hommeaux L18-1657 jimerson-prudhommeaux-2018-asr Building a Sentiment Corpus of Tweets in <fixed-case>B</fixed-case>razilian <fixed-case>P</fixed-case>ortuguese HenricoBrum - Maria das GraçasVolpe Nunes + Maria das GraçasVolpe Nunes L18-1658 brum-volpe-nunes-2018-building ‘Aye’ or ‘No’? Speech-level Sentiment Analysis of <fixed-case>H</fixed-case>ansard <fixed-case>UK</fixed-case> Parliamentary Debate Transcripts GavinAbercrombie - RizaBatista-Navarro + RizaBatista-Navarro L18-1659 abercrombie-batista-navarro-2018-aye Scalable Visualisation of Sentiment and Stance JonChamberlain - UdoKruschwitz + UdoKruschwitz OrlandHoeber L18-1660 chamberlain-etal-2018-scalable @@ -5795,7 +5795,7 @@ <fixed-case>N</fixed-case>o<fixed-case>R</fixed-case>e<fixed-case>C</fixed-case>: The <fixed-case>N</fixed-case>orwegian Review Corpus ErikVelldal - LiljaØvrelid + LiljaØvrelid Eivind AlexanderBergem CathrineStadsnes SamiaTouileb @@ -5818,8 +5818,8 @@ RodrigoLópez JuanjoséTenorio HéctorGómez - ArturoOncevay-Marcos - Marco A.Sobrevilla Cabezudo + ArturoOncevay-Marcos + Marco A.Sobrevilla Cabezudo L18-1663 penaloza-etal-2018-corpus @@ -5876,7 +5876,7 @@ Elicitation protocol and material for a corpus of long prepared monologues in Sign Language MichaelFilhol - Mohamed NassimeHadjadj + Mohamed NassimeHadjadj L18-1669 filhol-hadjadj-2018-elicitation @@ -5889,15 +5889,15 @@ Modeling <fixed-case>F</fixed-case>rench <fixed-case>S</fixed-case>ign <fixed-case>L</fixed-case>anguage: a proposal for a semantically compositional system - Mohamed NassimeHadjadj + Mohamed NassimeHadjadj MichaelFilhol - AnneliesBraffort + AnneliesBraffort L18-1671 hadjadj-etal-2018-modeling Construction of the Corpus of Everyday <fixed-case>J</fixed-case>apanese Conversation: An Interim Report - HanaeKoiso + HanaeKoiso YasuharuDen YurikoIseki WakakoKashino @@ -5934,11 +5934,11 @@ Parallel Corpora in <fixed-case>M</fixed-case>boshi (<fixed-case>B</fixed-case>antu <fixed-case>C</fixed-case>25, <fixed-case>C</fixed-case>ongo-<fixed-case>B</fixed-case>razzaville) AnnieRialland - MartineAdda-Decker - Guy-NoëlKouarata - GillesAdda - LaurentBesacier - LoriLamel + MartineAdda-Decker + Guy-NoëlKouarata + GillesAdda + LaurentBesacier + LoriLamel ElodieGauthier PierreGodard JamisonCooper-Leavitt @@ -5947,7 +5947,7 @@ A Multimodal Corpus of Expert Gaze and Behavior during Phonetic Segmentation Tasks - ArifKhan + ArifKhan IngmarSteiner YusukeSugano AndreasBulling @@ -5969,7 +5969,7 @@ DamienLolive GaëlleVidal MarieTahon - ÉlisabethDelais-Roussarie + ÉlisabethDelais-Roussarie L18-1677 sini-etal-2018-synpaflex @@ -6008,15 +6008,15 @@ <fixed-case>VAST</fixed-case>: A Corpus of Video Annotation for Speech Technologies JenniferTracey - StephanieStrassel + StephanieStrassel L18-1682 tracey-strassel-2018-vast Edit me: A Corpus and a Framework for Understanding Natural Language Image Editing - RameshManuvinakurike + RameshManuvinakurike JacquelineBrixey - TrungBui + TrungBui WalterChang Doo SoonKim RonArtstein @@ -6028,14 +6028,14 @@ Enriching a Lexicon of Discourse Connectives with Corpus-based Data AnnaFeltracco ElisabettaJezek - BernardoMagnini + BernardoMagnini L18-1684 feltracco-etal-2018-enriching <fixed-case>S</fixed-case>im<fixed-case>PA</fixed-case>: A Sentence-Level Simplification Corpus for the Public Administration Domain - CarolinaScarton - GustavoPaetzold + CarolinaScarton + GustavoPaetzold LuciaSpecia L18-1685 scarton-etal-2018-simpa @@ -6067,7 +6067,7 @@ The <fixed-case>G</fixed-case>erman Reference Corpus <fixed-case>D</fixed-case>e<fixed-case>R</fixed-case>e<fixed-case>K</fixed-case>o: New Developments – New Opportunities MarcKupietz - HaraldLüngen + HaraldLüngen PawełKamocki AndreasWitt L18-1689 @@ -6077,7 +6077,7 @@ <fixed-case>R</fixed-case>isamálheild: A Very Large <fixed-case>I</fixed-case>celandic Text Corpus SteinþórSteingrímsson SigrúnHelgadóttir - EiríkurRögnvaldsson + EiríkurRögnvaldsson StarkaðurBarkarson JónGuðnason L18-1690 @@ -6086,14 +6086,14 @@ <fixed-case>T</fixed-case>ri<fixed-case>MED</fixed-case>: A Multilingual Terminological Database FedericaVezzani - Giorgio MariaDi Nunzio + Giorgio MariaDi Nunzio GenevièveHenrot L18-1691 vezzani-etal-2018-trimed Preparation and Usage of <fixed-case>X</fixed-case>hosa Lexicographical Data for a Multilingual, Federated Environment - SonjaBosch + SonjaBosch ThomasEckart BettinaKlimek DirkGoldhahn @@ -6104,7 +6104,7 @@ A Lexicon of Discourse Markers for <fixed-case>P</fixed-case>ortuguese – <fixed-case>LDM</fixed-case>-<fixed-case>PT</fixed-case> AmáliaMendes - Iriadel Rio + Iriadel Rio ManfredStede FelixDombek L18-1693 @@ -6112,7 +6112,7 @@ One Language to rule them all: modelling Morphological Patterns in a Large Scale <fixed-case>I</fixed-case>talian Lexicon with <fixed-case>SWRL</fixed-case> - FahadKhan + FahadKhan AndreaBellandi FrancescaFrontini MonicaMonachini @@ -6135,8 +6135,8 @@ <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et-Shp: Towards the Building of a Lexical Database for a <fixed-case>P</fixed-case>eruvian Minority Language DiegoMaguiño-Valencia - ArturoOncevay-Marcos - Marco A.Sobrevilla Cabezudo + ArturoOncevay-Marcos + Marco A.Sobrevilla Cabezudo L18-1697 maguino-valencia-etal-2018-wordnet @@ -6151,7 +6151,7 @@ Transforming <fixed-case>W</fixed-case>ikipedia into a Large-Scale Fine-Grained Entity Type Corpus AbbasGhaddar - PhilippeLanglais + PhilippeLanglais L18-1699 ghaddar-langlais-2018-transforming @@ -6212,7 +6212,7 @@ <fixed-case>M</fixed-case>-<fixed-case>CNER</fixed-case>: A Corpus for <fixed-case>C</fixed-case>hinese Named Entity Recognition in Multi-Domains QiLu - YaoShengYang + YaoShengYang ZhenghuaLi WenliangChen MinZhang @@ -6225,7 +6225,7 @@ JiaqiWu ShereenOraby AmitaMisra - MarilynWalker + MarilynWalker L18-1707 bowden-etal-2018-slugnerds @@ -6246,12 +6246,12 @@ The <fixed-case>LIA</fixed-case> Treebank of Spoken <fixed-case>N</fixed-case>orwegian Dialects - LiljaØvrelid + LiljaØvrelid AndreKåsen KristinHagen AndersNøklestad Per ErikSolberg - Janne BondiJohannessen + Janne BondiJohannessen L18-1710 ovrelid-etal-2018-lia @@ -6274,19 +6274,19 @@ <fixed-case>C</fixed-case>zech Legal Text Treebank 2.0 VincentKríž - BarboraHladká + BarboraHladká L18-1713 kriz-hladka-2018-czech Creation of a Balanced State-of-the-Art Multilayer Corpus for <fixed-case>NLU</fixed-case> - NormundsGruzitis - LaumaPretkalnina - BaibaSaulite + NormundsGruzitis + LaumaPretkalnina + BaibaSaulite LauraRituma - GuntaNespore-Berzkalne - ArtursZnotins - PeterisPaikens + GuntaNespore-Berzkalne + ArtursZnotins + PeterisPaikens L18-1714 gruzitis-etal-2018-creation @@ -6301,8 +6301,8 @@ Adding Syntactic Annotations to Flickr30k Entities Corpus for Multimodal Ambiguous Prepositional-Phrase Attachment Resolution SebastienDelecraz AlexisNasr - FredericBechet - BenoitFavre + FredericBechet + BenoitFavre L18-1716 delecraz-etal-2018-adding @@ -6317,11 +6317,11 @@ Cheating a Parser to Death: Data-driven Cross-Treebank Annotation Transfer - DjaméSeddah - Ericde la Clergerie - BenoîtSagot - HéctorMartínez Alonso - MarieCandito + DjaméSeddah + Ericde la Clergerie + BenoîtSagot + HéctorMartínez Alonso + MarieCandito L18-1718 seddah-etal-2018-cheating @@ -6329,7 +6329,7 @@ <fixed-case>U</fixed-case>niversal <fixed-case>D</fixed-case>ependencies and Quantitative Typological Trends. A Case Study on Word Order ChiaraAlzetta FeliceDell’Orletta - SimonettaMontemagni + SimonettaMontemagni GiuliaVenturi L18-1719 alzetta-etal-2018-universal @@ -6343,7 +6343,7 @@ Interoperability of Language-related Information: Mapping the <fixed-case>BLL</fixed-case> Thesaurus to Lexvo and Glottolog - VanyaDimitrova + VanyaDimitrova ChristianFäth ChristianChiarcos HeikeRenner-Westermann @@ -6353,10 +6353,10 @@ Browsing and Supporting Pluricentric Global <fixed-case>W</fixed-case>ordnet, or just your <fixed-case>W</fixed-case>ordnet of Interest - AntónioBranco + AntónioBranco RubenBranco ChakavehSaedi - JoãoSilva + JoãoSilva L18-1722 branco-etal-2018-browsing @@ -6364,20 +6364,20 @@ Cross-checking <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et and <fixed-case>SUMO</fixed-case> Using Meronymy JavierÁlvez ItziarGonzalez-Dios - GermanRigau + GermanRigau L18-1723 alvez-etal-2018-cross Extended <fixed-case>H</fixed-case>ow<fixed-case>N</fixed-case>et 2.0 – An Entity-Relation Common-Sense Representation Model - Wei-YunMa + Wei-YunMa Yueh-YinShih L18-1724 ma-shih-2018-extended The Circumstantial Event Ontology (<fixed-case>CEO</fixed-case>) and <fixed-case>ECB</fixed-case>+/<fixed-case>CEO</fixed-case>: an Ontology and Corpus for Implicit Causal Relations between Events - RoxaneSegers + RoxaneSegers TommasoCaselli PiekVossen L18-1725 @@ -6387,7 +6387,7 @@ Profiling Medical Journal Articles Using a Gene Ontology Semantic Tagger MahmoudEl-Haj PaulRayson - ScottPiao + ScottPiao JoKnight L18-1726 el-haj-etal-2018-profiling @@ -6403,7 +6403,7 @@ <fixed-case>I</fixed-case>ndian Language Wordnets and their Linkages with <fixed-case>P</fixed-case>rinceton <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et DipteshKanojia KevinPatel - PushpakBhattacharyya + PushpakBhattacharyya L18-1728 kanojia-etal-2018-indian diff --git a/data/xml/M91.xml b/data/xml/M91.xml index 8dd46a2d60..e29aac8344 100644 --- a/data/xml/M91.xml +++ b/data/xml/M91.xml @@ -12,25 +12,25 @@ Overview of the Third <fixed-case>M</fixed-case>essage <fixed-case>U</fixed-case>nderstanding <fixed-case>E</fixed-case>valuation and <fixed-case>C</fixed-case>onference - Beth M.Sundheim + Beth M.Sundheim M91-1001 sundheim-1991-overview <fixed-case>MUC</fixed-case>-3 Evaluation Metrics - NancyChinchor + NancyChinchor M91-1002 chinchor-1991-muc Comparing <fixed-case>MUCK</fixed-case>-<fixed-case>II</fixed-case> and <fixed-case>MUC</fixed-case>-3: Assessing the Difficulty of Different Tasks - LynetteHirschman + LynetteHirschman M91-1003 hirschman-1991-comparing <fixed-case>MUC</fixed-case>-3 Linguistic Phenomena Test Experiment - NancyChinchor + NancyChinchor M91-1004 chinchor-1991-muc-3 @@ -43,20 +43,20 @@
<fixed-case>BBN</fixed-case> <fixed-case>PLUM</fixed-case>: <fixed-case>MUC</fixed-case>-3 Test Results and Analysis - RalphWeischedel - DamarisAyuso - SeanBoisen - RobertIngria + RalphWeischedel + DamarisAyuso + SeanBoisen + RobertIngria JeffPalmucci M91-1006 weischedel-etal-1991-bbn <fixed-case>GE</fixed-case> <fixed-case>NLT</fixed-case>oolset: <fixed-case>MUC</fixed-case>-3 Test Results and Analysis - GeorgeKrupka + GeorgeKrupka LucjaIwariska - PaulJacobs - LisaRau + PaulJacobs + LisaRau M91-1007 krupka-etal-1991-ge @@ -68,7 +68,7 @@
<fixed-case>H</fixed-case>ughes <fixed-case>T</fixed-case>rainable <fixed-case>T</fixed-case>ext <fixed-case>S</fixed-case>kimmer: <fixed-case>MUC</fixed-case>-3 Test Results and Analysis - Charles P.Dolan + Charles P.Dolan Seth R.Goldman Thomas V.Cuda Alan M.Nakamura @@ -77,19 +77,19 @@ <fixed-case>ITP</fixed-case> <fixed-case>I</fixed-case>nterpretext System: <fixed-case>MUC</fixed-case>-3 Test Results and Analysis - KathleenDahlgren + KathleenDahlgren CarolLord HajimeWada - JoyceMcDowell - Edward P.Stabler, Jr. + JoyceMcDowell + Edward P.Stabler, Jr. M91-1010 dahlgren-etal-1991-itp <fixed-case>L</fixed-case>anguage <fixed-case>S</fixed-case>ystems, <fixed-case>I</fixed-case>nc.<fixed-case>MUC</fixed-case>-3 Test Results and Analysis - Christine A.Montgomery - Bonnie GloverStalls - Robert S.Belvin + Christine A.Montgomery + Bonnie GloverStalls + Robert S.Belvin Robert E.Stumberger M91-1011 montgomery-etal-1991-language @@ -103,9 +103,9 @@ <fixed-case>N</fixed-case>ew <fixed-case>Y</fixed-case>ork <fixed-case>U</fixed-case>niversity <fixed-case>PROTEUS</fixed-case> System: <fixed-case>MUC</fixed-case>-3 Test Results and Analysis - RalphGrishman + RalphGrishman JohnSterling - CatherineMacleod + CatherineMacleod M91-1013 grishman-etal-1991-new @@ -117,7 +117,7 @@
<fixed-case>SRI</fixed-case> <fixed-case>I</fixed-case>nternational’s <fixed-case>TACITUS</fixed-case> System: <fixed-case>MUC</fixed-case>-3 Test Results and Analysis - Jerry R.Hobbs + Jerry R.Hobbs M91-1015 hobbs-1991-sri @@ -133,16 +133,16 @@ CarlWeir RobinMcEntire BarrySilk - TimFinin + TimFinin M91-1017 weir-etal-1991-unisys
<fixed-case>U</fixed-case>niversity of <fixed-case>M</fixed-case>assachusetts: <fixed-case>MUC</fixed-case>-3 Test Results and Analysis - WendyLehnert - ClaireCardie - DavidFisher - EllenRiloff + WendyLehnert + ClaireCardie + DavidFisher + EllenRiloff RobertWilliams M91-1018 lehnert-etal-1991-university @@ -163,20 +163,20 @@ <fixed-case>BBN</fixed-case>: Description of the <fixed-case>PLUM</fixed-case> System as Used for<fixed-case>MUC</fixed-case>-3 - RalphWeischedel - DamarisAyuso - SeanBoisen - RobertIngria + RalphWeischedel + DamarisAyuso + SeanBoisen + RobertIngria JeffPalmucci M91-1021 weischedel-etal-1991-bbn-description <fixed-case>GE</fixed-case>: Description of the <fixed-case>NLT</fixed-case>oolset System as Used for <fixed-case>MUC</fixed-case>-3 - GeorgeKrupka - PaulJacobs - LisaRau - LucjaIwanska + GeorgeKrupka + PaulJacobs + LisaRau + LucjaIwanska M91-1022 krupka-etal-1991-ge-description @@ -188,7 +188,7 @@
<fixed-case>H</fixed-case>ughes <fixed-case>T</fixed-case>rainable <fixed-case>T</fixed-case>ext <fixed-case>S</fixed-case>kimmer: Description of the <fixed-case>TTS</fixed-case> System as Used for <fixed-case>MUC</fixed-case>-3 - Charles P.Dolan + Charles P.Dolan Seth R.Goldman Thomas V.Cuda Alan M.Nakamura @@ -197,19 +197,19 @@ <fixed-case>ITP</fixed-case>: Description of the <fixed-case>I</fixed-case>nterpretext System as Used for <fixed-case>MUC</fixed-case>-3 - KathleenDahlgren + KathleenDahlgren CarolLord HajimeWada - JoyceMcDowell + JoyceMcDowell Jr.Edward P. Stabler M91-1025 dahlgren-etal-1991-itp-description <fixed-case>L</fixed-case>anguage <fixed-case>S</fixed-case>ystems, <fixed-case>I</fixed-case>nc. Description of the <fixed-case>DBG</fixed-case> System as Used for <fixed-case>MUC</fixed-case>-3 - Christine A.Montgomery - Bonnie GloverStalls - Robert S.Belvin + Christine A.Montgomery + Bonnie GloverStalls + Robert S.Belvin Robert E.Stumberger M91-1026 montgomery-etal-1991-language-systems @@ -223,9 +223,9 @@ <fixed-case>N</fixed-case>ew <fixed-case>Y</fixed-case>ork <fixed-case>U</fixed-case>niversity: Description of the <fixed-case>PROTEUS</fixed-case> System as Used for <fixed-case>MUC</fixed-case>-3 - RalphGrishman + RalphGrishman JohnSterling - CatherineMacleod + CatherineMacleod M91-1028 grishman-etal-1991-new-york @@ -237,7 +237,7 @@
<fixed-case>SRI</fixed-case> <fixed-case>I</fixed-case>nternational: Description of the <fixed-case>TACITUS</fixed-case> System as Used for <fixed-case>MUC</fixed-case>-3 - Jerry R.Hobbs + Jerry R.Hobbs M91-1030 hobbs-1991-sri-international @@ -250,7 +250,7 @@ <fixed-case>U</fixed-case>nisys: Description of the <fixed-case>U</fixed-case>nisys System Used for <fixed-case>MUC</fixed-case>-3 CarlWeir - TimFinin + TimFinin RobinMcEntire BarrySilk M91-1032 @@ -258,10 +258,10 @@ <fixed-case>U</fixed-case>niversity of <fixed-case>M</fixed-case>assachusetts: Description of the <fixed-case>CIRCUS</fixed-case> System as Used for <fixed-case>MUC</fixed-case>-3 - WendyLehnert - ClaireCardie - DavidFisher - EllenRiloff + WendyLehnert + ClaireCardie + DavidFisher + EllenRiloff RobertWilliams M91-1033 lehnert-etal-1991-university-massachusetts @@ -275,21 +275,21 @@ Data Extraction as Text Categorization: An Experiment With the <fixed-case>MUC</fixed-case>-3 Corpus - David D.Lewis + David D.Lewis M91-1035 lewis-1991-data Computational Aspects of Discourse in the Context of <fixed-case>MUC</fixed-case>-3 - LucjaIwanska - DouglasAppelt - DamarisAyuso + LucjaIwanska + DouglasAppelt + DamarisAyuso KathyDahlgren - Bonnie GloverStalls - RalphGrishman - GeorgeKrupka - ChristineMontgomery - EllenRiloff + Bonnie GloverStalls + RalphGrishman + GeorgeKrupka + ChristineMontgomery + EllenRiloff M91-1036 iwanska-etal-1991-computational diff --git a/data/xml/M92.xml b/data/xml/M92.xml index dd4bdac94d..a403edbbc0 100644 --- a/data/xml/M92.xml +++ b/data/xml/M92.xml @@ -12,65 +12,65 @@ Overview of the Fourth <fixed-case>M</fixed-case>essage <fixed-case>U</fixed-case>nderstanding <fixed-case>E</fixed-case>valuation and <fixed-case>C</fixed-case>onference - Beth M.Sundheim + Beth M.Sundheim M92-1001 sundheim-1992-overview <fixed-case>MUC</fixed-case>-4 Evaluation Metrics - NancyChinchor + NancyChinchor M92-1002 chinchor-1992-muc The Statistical Significance of the <fixed-case>MUC</fixed-case>-4 Results - NancyChinchor + NancyChinchor chinchor-1992-statistical Text Filtering in <fixed-case>B/IUC</fixed-case>-3 and <fixed-case>MUC</fixed-case>-4 - David D.Lewis + David D.Lewis Richard M.Tong M92-1004 lewis-tong-1992-text An Adjunct Test for Discourse Processing in <fixed-case>MUC</fixed-case>-4 - LynetteHirschman + LynetteHirschman M92-1005 hirschman-1992-adjunct <fixed-case>GE</fixed-case> Adjunct Test Report: Object-Oriented Design and Scoring for <fixed-case>MUC</fixed-case>-4 - GeorgeKrupka - LisaRau + GeorgeKrupka + LisaRau M92-1006 krupka-rau-1992-ge <fixed-case>BBN</fixed-case> <fixed-case>PLUM</fixed-case>: <fixed-case>MUC</fixed-case>-4 Test Results and Analysis - RalphWeischedel - DamarisAyuso - SeanBoisen - HeidiFox - HerbertGish - RobertIngria + RalphWeischedel + DamarisAyuso + SeanBoisen + HeidiFox + HerbertGish + RobertIngria M92-1007 weischedel-etal-1992-bbn <fixed-case>GE</fixed-case> <fixed-case>NLT</fixed-case>oolset: <fixed-case>MUC</fixed-case>-4 Test Results and Analysis - LisaRau - GeorgeKrupka - PaulJacobs + LisaRau + GeorgeKrupka + PaulJacobs M92-1008 rau-etal-1992-ge <fixed-case>TIPSTER</fixed-case> <fixed-case>SHOGUN</fixed-case> System (Joint <fixed-case>GE-CMU</fixed-case>): <fixed-case>MUC</fixed-case>-4 Test Results and Analysis - GeorgeKrupka - PaulJacobs - MichaelMauldin + GeorgeKrupka + PaulJacobs + MichaelMauldin ToddKaufmann IraSider M92-1009 @@ -79,17 +79,17 @@ <fixed-case>H</fixed-case>ughes <fixed-case>R</fixed-case>esearch <fixed-case>L</fixed-case>aboratories <fixed-case>T</fixed-case>rainable <fixed-case>T</fixed-case>ext <fixed-case>S</fixed-case>kimmer: <fixed-case>MUC</fixed-case>-4 Test Results and Analysis Stephanie E.August - Charles P.Dolan + Charles P.Dolan M92-1010 august-dolan-1992-hughes <fixed-case>L</fixed-case>anguage <fixed-case>S</fixed-case>ystems, <fixed-case>I</fixed-case>nc.<fixed-case>MUC</fixed-case>-4 Test Results and Analysis - Christine A.Montgomery - Bonnie GloverStalls + Christine A.Montgomery + Bonnie GloverStalls Robert R.Stumberger NaicongLi - Robert S.Belvin + Robert S.Belvin AlfredoArnaiz Susan B.Hirsh M92-1011 @@ -108,24 +108,24 @@ JohnBurger DennisConnolly SusanRoberts - MarcVilain + MarcVilain M92-1013 aberdeen-etal-1992-mitre <fixed-case>CRL</fixed-case>/<fixed-case>NMSU</fixed-case> and <fixed-case>B</fixed-case>randeis <fixed-case>M</fixed-case>uc<fixed-case>B</fixed-case>ruce: <fixed-case>MUC</fixed-case>-4 Test Results and Analysis - JimCowie - LouiseGuthrie - YorickWilks - JamesPustejovsky + JimCowie + LouiseGuthrie + YorickWilks + JamesPustejovsky M92-1014 cowie-etal-1992-crl <fixed-case>N</fixed-case>ew <fixed-case>Y</fixed-case>ork <fixed-case>U</fixed-case>niversity <fixed-case>PROTEUS</fixed-case> System: <fixed-case>MUC</fixed-case>-4 Test Results and Analysis - RalphGrishman + RalphGrishman JohnSterling - CatherineMacleod + CatherineMacleod M92-1015 grishman-etal-1992-new @@ -145,18 +145,18 @@ <fixed-case>SRA</fixed-case> <fixed-case>S</fixed-case>olomon: <fixed-case>MUC</fixed-case>-4 Test Results and Analysis ChinatsuAone - DougMcKee + DougMcKee SandyShinn - HatteBlejer + HatteBlejer M92-1018 aone-etal-1992-sra <fixed-case>SRI</fixed-case> <fixed-case>I</fixed-case>nternational <fixed-case>FASTUS</fixed-case> System <fixed-case>MUC</fixed-case>-4 Test Results and Analysis - Douglas E.Appelt - JohnBear - Jerry R.Hobbs - DavidIsrael + Douglas E.Appelt + JohnBear + Jerry R.Hobbs + DavidIsrael MabryTyson M92-1019 appelt-etal-1992-sri @@ -180,11 +180,11 @@ The <fixed-case>LINK</fixed-case> System: <fixed-case>MUC</fixed-case>-4 Test Results and Analysis - Steven L.Lytinen + Steven L.Lytinen SayanBhattacharyya Robert R.Burridge Peter M.Hastings - ChristianHuyck + ChristianHuyck Karen A.Lipinsky Eric S.McDaniel Karenann K.Terrell @@ -204,49 +204,49 @@ <fixed-case>BBN</fixed-case>: Description of the <fixed-case>PLUM</fixed-case> System as Used for <fixed-case>MUC</fixed-case>-4 - DamarisAyuso - SeanBoisen - HeidiFox - HerbGish - RobertIngria - RalphWeischedel + DamarisAyuso + SeanBoisen + HeidiFox + HerbGish + RobertIngria + RalphWeischedel M92-1024 ayuso-etal-1992-bbn <fixed-case>GE</fixed-case> <fixed-case>NLT</fixed-case>oolset: Description of the System as Used for <fixed-case>MUC</fixed-case>-4 - GeorgeKrupka - PaulJacobs - LisaRau - LoisChilds + GeorgeKrupka + PaulJacobs + LisaRau + LoisChilds IraSider M92-1025 krupka-etal-1992-ge <fixed-case>GE</fixed-case>-<fixed-case>CMU</fixed-case>: Description of the <fixed-case>TIPSTER/SHOGUN</fixed-case> System as Used for <fixed-case>MUC</fixed-case>-4 - PaulJacobs - GeorgeKrupka - LisaRau + PaulJacobs + GeorgeKrupka + LisaRau ToddKaufmann - MichaelMauldin + MichaelMauldin M92-1026 jacobs-etal-1992-ge <fixed-case>H</fixed-case>ughes <fixed-case>R</fixed-case>esearch <fixed-case>L</fixed-case>aboratories: Description of the <fixed-case>T</fixed-case>rainable <fixed-case>T</fixed-case>ext <fixed-case>S</fixed-case>kimmer Used for <fixed-case>MUC</fixed-case>-4 Stephanie E.August - Charles P.Dolan + Charles P.Dolan M92-1027 august-dolan-1992-hughes-research <fixed-case>L</fixed-case>anguage <fixed-case>S</fixed-case>ystems, <fixed-case>I</fixed-case>nc. Description of the <fixed-case>DBG</fixed-case> System as Used for <fixed-case>MUC</fixed-case>-4 - Christine A.Montgomery - Bonnie GloverStalls + Christine A.Montgomery + Bonnie GloverStalls Robert E.Stumberger NaicongLi - Robert S.Belvin + Robert S.Belvin AlfredoArnaiz Susan B.Hirsh M92-1028 @@ -265,22 +265,22 @@ JohnBurger DennisConnolly SusanRoberts - MarcVilain + MarcVilain M92-1030 aberdeen-etal-1992-mitre-bedford <fixed-case>CRL</fixed-case>/<fixed-case>NMSU</fixed-case> and <fixed-case>B</fixed-case>randeis: Description of the <fixed-case>M</fixed-case>uc<fixed-case>B</fixed-case>ruce System as Used for <fixed-case>MUC</fixed-case>-4 - JimCowie - LouiseGuthrie - YorickWilks + JimCowie + LouiseGuthrie + YorickWilks M92-1031 cowie-etal-1992-crl-nmsu <fixed-case>N</fixed-case>ew <fixed-case>Y</fixed-case>ork <fixed-case>U</fixed-case>niversity Description of the <fixed-case>PROTEUS</fixed-case> System as Used for <fixed-case>MUC</fixed-case>-4 - RalphGrishman - CatherineMacleod + RalphGrishman + CatherineMacleod JohnSterling M92-1032 grishman-etal-1992-new-york @@ -301,19 +301,19 @@ <fixed-case>SRA</fixed-case>: Description of the <fixed-case>S</fixed-case>olomon System as Used <fixed-case>F</fixed-case>or<fixed-case>MUC</fixed-case>-4 ChinatsuAone - DougMcKee + DougMcKee SandyShinn - HatteBlejer + HatteBlejer M92-1035 aone-etal-1992-sra-description <fixed-case>SRI</fixed-case> International: Description of the <fixed-case>FASTUS</fixed-case> System Used for <fixed-case>MUC</fixed-case>-4 - Jerry R.Hobbs - DouglasAppelt + Jerry R.Hobbs + DouglasAppelt MabryTyson - JohnBear - DavidIsrael + JohnBear + DavidIsrael M92-1036 hobbs-etal-1992-sri @@ -336,11 +336,11 @@ Description of the <fixed-case>LINK</fixed-case> System Used for <fixed-case>MUC</fixed-case>- 4 - Steven L.Lytinen + Steven L.Lytinen SayanBhattacharyya Robert R.Burridge Peter M.Hastings - ChristianHuyck + ChristianHuyck Karen A.Lipinsky Eric S.McDaniel Karenann K.Terrell diff --git a/data/xml/M93.xml b/data/xml/M93.xml index d3f2798ac5..0170bff6ac 100644 --- a/data/xml/M93.xml +++ b/data/xml/M93.xml @@ -13,14 +13,14 @@ Corpora and Data Preparation LynnCarlson - BoyanOnyshkevych + BoyanOnyshkevych Mary EllenOkurowski M93-1001 carlson-etal-1993-corpora Tasks, Domains, and Languages - BoyanOnyshkevych + BoyanOnyshkevych Mary EllenOkurowski LynnCarlson M93-1002 @@ -28,13 +28,13 @@ Template Design for Information Extraction - BoyanOnyshkevych + BoyanOnyshkevych M93-1003 onyshkevych-1993-template <fixed-case>TIPSTER</fixed-case>/<fixed-case>MUC</fixed-case>-5 Information Extraction System Evaluation - Beth M.Sundheim + Beth M.Sundheim M93-1004 sundheim-1993-tipster @@ -52,32 +52,32 @@ <fixed-case>MUC</fixed-case>-5 Evaluation Metrics - NancyChinchor - BethSundheim + NancyChinchor + BethSundheim M93-1007 chinchor-sundheim-1993-muc The Statistical Significance of the <fixed-case>MUC</fixed-case>-5 Results - NancyChinchor + NancyChinchor M93-1008 chinchor-1993-statistical The Generic Information Extraction System - Jerry R.Hobbs + Jerry R.Hobbs M93-1009 hobbs-1993-generic <fixed-case>BBN</fixed-case>: Description of the <fixed-case>PLUM</fixed-case> System as Used for <fixed-case>MUC</fixed-case>-5 - RalphWeischedel - DamarisAyuso - SeanBoisen - HeidiFox - RobertIngria + RalphWeischedel + DamarisAyuso + SeanBoisen + HeidiFox + RobertIngria TomoyoshiMatsukawa - ConstantinePapageorgiou + ConstantinePapageorgiou DawnMacLaughlin MasaichiroKitagawa TsutomuSakai @@ -90,19 +90,19 @@ <fixed-case>GE</fixed-case>-<fixed-case>CMU</fixed-case>: Description of the <fixed-case>SHOGUN</fixed-case> System Used for <fixed-case>MUC</fixed-case>-5 - Paul S.Jacobs - GeorgeKrupka - LisaRau + Paul S.Jacobs + GeorgeKrupka + LisaRau M93-1011 jacobs-etal-1993-ge <fixed-case>L</fixed-case>anguage <fixed-case>S</fixed-case>ystems <fixed-case>I</fixed-case>nc: Description of the <fixed-case>DBG</fixed-case> System as Used for <fixed-case>MUC</fixed-case>-51 - Christine A.Montgomery + Christine A.Montgomery Robert E.Stumberger - Bonnie GloverStalls + Bonnie GloverStalls NaicongLi - Robert S.Belvin + Robert S.Belvin Susan HirshLitenatsky M93-1012 montgomery-etal-1993-language @@ -113,34 +113,34 @@ JohnBurger DennisConnolly SusanRoberts - MarcVilain + MarcVilain M93-1013 aberdeen-etal-1993-mitre <fixed-case>NEC</fixed-case>: Description of the <fixed-case>VENIEX</fixed-case> System as Used for <fixed-case>MUC</fixed-case>-5 KazunoriMuraki - ShinichiDoi - ShinichiAndo + ShinichiDoi + ShinichiAndo M93-1014 muraki-etal-1993-nec <fixed-case>CRL/B</fixed-case>randeis: Description of the <i> <fixed-case>D</fixed-case>iderot</i> System as Used for <fixed-case>MUC</fixed-case>-5 - JimCowie - LouiseGuthrie - WangJin + JimCowie + LouiseGuthrie + JinWang RongWang - TakahiroWakao - JamesPustejovsky + TakahiroWakao + JamesPustejovsky ScottWaterman M93-1015 cowie-etal-1993-crl <fixed-case>N</fixed-case>ew <fixed-case>Y</fixed-case>ork <fixed-case>U</fixed-case>niversity Description of the <fixed-case>PROTEUS</fixed-case> System as Used for <fixed-case>MUC</fixed-case>-5 - RalphGrishman + RalphGrishman JohnSterling M93-1016 grishman-sterling-1993-new @@ -156,17 +156,17 @@ <fixed-case>SRA</fixed-case>: Description of the <fixed-case>S</fixed-case>olomon System as Used for <fixed-case>MUC</fixed-case>-5 ChinatsuAone SharonFlank - DougMcKee + DougMcKee PaulKrause M93-1018 aone-etal-1993-sra <fixed-case>SRI</fixed-case>: Description of the <fixed-case>JV</fixed-case>-<fixed-case>FASTUS</fixed-case> System Used for <fixed-case>MUC</fixed-case>-5 - Douglas E.Appelt - Jerry R.Hobbs - JohnBear - DavidIsrael + Douglas E.Appelt + Jerry R.Hobbs + JohnBear + DavidIsrael MegumiKameyama MabryTyson M93-1019 @@ -182,7 +182,7 @@ <fixed-case>UNISYS</fixed-case>: Description of the <fixed-case>CBAS</fixed-case> System Used for <fixed-case>MUC</fixed-case>-5 CarlWeir - RichFritzson + RichFritzson M93-1021 weir-fritzson-1993-unisys @@ -199,24 +199,24 @@ S.Soderland E.Riloff C.Cardie - J.Peterson + J.Peterson F.Feng M93-1023 lehnert-etal-1993-umass Description of the <fixed-case>LINK</fixed-case> System Used for <fixed-case>MUC</fixed-case>-5 - Steven L.Lytinen + Steven L.Lytinen Robert R.Burridge Peter M.Hastings - ChristianHuyck + ChristianHuyck M93-1024 lytinen-etal-1993-description <fixed-case>USC</fixed-case>: Description of the <fixed-case>SNAP</fixed-case> System Used for <fixed-case>MUC</fixed-case>-5 - DanMoldovan - SeunghoCha + DanMoldovan + SeunghoCha MinhwaChung TonyGallippi Kenneth J.Hendrickson @@ -228,15 +228,15 @@ <fixed-case>S</fixed-case>ussex <fixed-case>U</fixed-case>niversity: Description of the <fixed-case>S</fixed-case>ussex System Used for <fixed-case>MUC</fixed-case>-5 - RobertGaizauskas - LynneCahill - RogerEvans + RobertGaizauskas + LynneCahill + RogerEvans M93-1026 gaizauskas-etal-1993-sussex Summary of Workshop on Lexicons for Text Extraction - JamesPustejovsky + JamesPustejovsky M93-1027 pustejovsky-1993-summary @@ -248,25 +248,25 @@ Information Extraction for the Future - Paul S.Jacobs + Paul S.Jacobs M93-1029 jacobs-1993-information Topic Session on <fixed-case>DISCOURSE</fixed-case> - Damaris M.Ayuso + Damaris M.Ayuso M93-1030 ayuso-1993-topic Tools and Techniques for Rapid Porting - JoeMcCarthy + JoeMcCarthy M93-1031 mccarthy-1993-tools Information Extraction and Evaluation - Lisa F.Rau + Lisa F.Rau M93-1032 rau-1993-information diff --git a/data/xml/M95.xml b/data/xml/M95.xml index 27e5b9fcb7..71b9e4a39e 100644 --- a/data/xml/M95.xml +++ b/data/xml/M95.xml @@ -12,37 +12,37 @@ Design of the <fixed-case>MUC</fixed-case>-6 Evaluation - RalphGrishman - BethSundheim + RalphGrishman + BethSundheim M95-1001 grishman-sundheim-1995-design Overview of Results of the <fixed-case>MUC</fixed-case>-6 Evaluation - Beth M.Sundheim + Beth M.Sundheim M95-1002 sundheim-1995-overview FOUR SCORERS AND SEVEN YEARS AGO: The Scoring Method for <fixed-case>MUC</fixed-case>-6 - NancyChinchor + NancyChinchor GaryDungca M95-1003 chinchor-dungca-1995-four Statistical Significance of <fixed-case>MUC</fixed-case>-6 Results - NancyChinchor + NancyChinchor M95-1004 chinchor-1995-statistical A Model-Theoretic Coreference Scoring Scheme - MarcVilain + MarcVilain JohnBurger JohnAberdeen DennisConnolly - LynetteHirschman + LynetteHirschman M95-1005 vilain-etal-1995-model @@ -53,11 +53,11 @@
<fixed-case>U</fixed-case>niversity of <fixed-case>D</fixed-case>urham: Description of the <fixed-case>LOLITA</fixed-case> System as Used in <fixed-case>MUC</fixed-case>-6. - RichardMorgan - RobertoGarigliano + RichardMorgan + RobertoGarigliano PaulCallaghan SanjayPoria - MarkSmith + MarkSmith AgnieszkaUrbanowicz RussellCollingham MarcoCostantino @@ -74,9 +74,9 @@ <fixed-case>L</fixed-case>ockheed <fixed-case>M</fixed-case>artin: <fixed-case>LOUELLA PARSING</fixed-case>, An <fixed-case>NLT</fixed-case>oolset System for <fixed-case>MUC</fixed-case>-6 - LoisChilds - DebBrady - LouiseGuthrie + LoisChilds + DebBrady + LouiseGuthrie JoseFranco DanValdes-Dapena BillReid @@ -94,11 +94,11 @@ Description of the <fixed-case>UM</fixed-case>ass System as Used for <fixed-case>MUC</fixed-case>-6 - DavidFisher - StephenSoderland + DavidFisher + StephenSoderland JosephMcCarthy - FangfangFeng - WendyLehnert + FangfangFeng + WendyLehnert M95-1011 fisher-etal-1995-description @@ -106,22 +106,22 @@ <fixed-case>MITRE</fixed-case>: Description of the <fixed-case>A</fixed-case>lembic System Used for <fixed-case>MUC</fixed-case>-6 JohnAberdeen JohnBurger - DavidDay - LynetteHirschman - PatriciaRobinson - MarcVilain + DavidDay + LynetteHirschman + PatriciaRobinson + MarcVilain M95-1012 aberdeen-etal-1995-mitre
<fixed-case>CRL</fixed-case>/<fixed-case>NMSUD</fixed-case>escription of the <fixed-case>CRL</fixed-case>/<fixed-case>NMSU</fixed-case> Systems Used for <fixed-case>MUC</fixed-case>-6 - JimCowie + JimCowie M95-1013 cowie-1995-crl The <fixed-case>NYU</fixed-case> System for <fixed-case>MUC</fixed-case>-6 or Where’s the Syntax? - RalphGrishman + RalphGrishman M95-1014 grishman-1995-nyu @@ -129,18 +129,18 @@ <fixed-case>U</fixed-case>niversity of <fixed-case>P</fixed-case>ennsylvania: Description of the <fixed-case>U</fixed-case>niversity of <fixed-case>P</fixed-case>ennsylvania System Used for <fixed-case>MUC</fixed-case>-6 BreckBaldwin JeffReynar - MikeCollins - JasonEisner - AdwaitRatnaparkhi + MikeCollins + JasonEisner + AdwaitRatnaparkhi JosephRosenzweig AnoopSarkar - Srinivas + Srinivas M95-1015 baldwin-etal-1995-university
Description of the <fixed-case>SAIC DX</fixed-case> System as Used for <fixed-case>MUC</fixed-case>-6 - Lance A.Miller + Lance A.Miller M95-1016 miller-1995-description @@ -156,18 +156,18 @@
<fixed-case>SRA</fixed-case>: Description of the <fixed-case>SRA</fixed-case> System as Used for <fixed-case>MUC</fixed-case>-6 - George R.Krupka + George R.Krupka M95-1018 krupka-1995-sra <fixed-case>SRI</fixed-case> <fixed-case>I</fixed-case>nternational <fixed-case>FASTUS</fixed-case> <fixed-case>S</fixed-case>ystem<fixed-case>MUC</fixed-case>-6 Test Results and Analysis - Douglas E.Appelt - Jerry R.Hobbs - JohnBear - DavidIsrael + Douglas E.Appelt + Jerry R.Hobbs + JohnBear + DavidIsrael MegumiKameyama - AndyKehler + AndyKehler DavidMartin KarenMyers MabryTyson @@ -182,7 +182,7 @@ <fixed-case>W</fixed-case>ayne <fixed-case>S</fixed-case>tate <fixed-case>U</fixed-case>niversity: Description of the <fixed-case>UNO</fixed-case> Natural Language Processing System as Used for <fixed-case>MUC</fixed-case>-6 - LucjaIwanska + LucjaIwanska MaryCroll TaewanYoon MariaAdams diff --git a/data/xml/M98.xml b/data/xml/M98.xml index 7d9a10985f..b2e095f59b 100644 --- a/data/xml/M98.xml +++ b/data/xml/M98.xml @@ -12,7 +12,7 @@ Overview of <fixed-case>MUC</fixed-case>-7 - Nancy A.Chinchor + Nancy A.Chinchor M98-1001 chinchor-1998-overview @@ -40,7 +40,7 @@ <fixed-case>U</fixed-case>niversity of <fixed-case>D</fixed-case>urham: Description of the <fixed-case>LOLITA</fixed-case> system as Used in <fixed-case>MUC</fixed-case>-7 - RobertoGarigliano + RobertoGarigliano AgnieszkaUrbanowicz David J.Nettleton M98-1005 @@ -73,20 +73,20 @@ <fixed-case>BBN</fixed-case>: Description of the <fixed-case>SIFT</fixed-case> System as Used for <fixed-case>MUC</fixed-case>-7 ScottMiller - MichaelCrystal - HeidiFox - LanceRamshaw - RichardSchwartz + MichaelCrystal + HeidiFox + LanceRamshaw + RichardSchwartz RebeccaStone - RalphWeischedel + RalphWeischedel The Annotation Group M98-1009 miller-etal-1998-bbn Description of <fixed-case>L</fixed-case>ockheed <fixed-case>M</fixed-case>artin’s <fixed-case>NLT</fixed-case>oolset as Applied to <fixed-case>MUC</fixed-case>-7 (<fixed-case>AATM</fixed-case>7) - DeborahBrady - LoisChilds + DeborahBrady + LoisChilds DavidCassel BobMagee NorrisHeintzelman @@ -97,7 +97,7 @@ <fixed-case>NYU</fixed-case>: Description of the Proteus/<fixed-case>PET</fixed-case> System as Used for <fixed-case>MUC</fixed-case>-7 <fixed-case>ST</fixed-case> RomanYangarber - RalphGrishman + RalphGrishman M98-1011 yangarber-grishman-1998-nyu @@ -112,7 +112,7 @@ <fixed-case>TASC</fixed-case>: Description of the <fixed-case>TASC</fixed-case> System Used for <fixed-case>MUC</fixed-case>-7 - TerryPatten + TerryPatten BerylHoffman MartinThurn M98-1013 @@ -120,7 +120,7 @@ <fixed-case>FACILE</fixed-case>: Description of the <fixed-case>NE</fixed-case> System Used for <fixed-case>MUC</fixed-case>-7 - William JBlack + William JBlack FabioRinaldi DavidMowatt M98-1014 @@ -128,7 +128,7 @@ <fixed-case>I</fixed-case>so<fixed-case>Q</fixed-case>uest Inc.: Description of the <fixed-case>N</fixed-case>et<fixed-case>O</fixed-case>wl™ Extractor System as Used for <fixed-case>MUC</fixed-case>-7 - George R.Krupka + George R.Krupka KevinHausman M98-1015 krupka-hausman-1998-isoquest @@ -155,7 +155,7 @@ AndrewBorthwick JohnSterling EugeneAgichtein - RalphGrishman + RalphGrishman M98-1018 borthwick-etal-1998-nyu @@ -202,7 +202,7 @@ Appendix <fixed-case>B</fixed-case>: <fixed-case>MUC</fixed-case>-7 Test Scores Introduction - NancyChinchor + NancyChinchor M98-1024 chinchor-1998-appendix @@ -218,7 +218,7 @@
Appendix <fixed-case>D</fixed-case>: <fixed-case>MUC</fixed-case>-7 Information Extraction Task Definition (version 5.1) - NancyChinchor + NancyChinchor ElaineMarsh M98-1027 chinchor-marsh-1998-appendix @@ -232,8 +232,8 @@ Appendix <fixed-case>F</fixed-case>: <fixed-case>MUC</fixed-case>-7 Coreference Task Definition (version 3.0) - LynetteHirschman - NancyChinchor + LynetteHirschman + NancyChinchor M98-1029 hirschman-chinchor-1998-appendix diff --git a/data/xml/N01.xml b/data/xml/N01.xml index 6ae9cb67b6..6d20d05334 100644 --- a/data/xml/N01.xml +++ b/data/xml/N01.xml @@ -12,23 +12,23 @@ Instance-Based Natural Language Generation SebastianVarges - ChrisMellish + ChrisMellish N01-1001 varges-mellish-2001-instance Corpus-based <fixed-case>NP</fixed-case> Modifier Generation HuaCheng - MassimoPoesio - RenateHenschel - ChrisMellish + MassimoPoesio + RenateHenschel + ChrisMellish N01-1002 cheng-etal-2001-corpus <fixed-case>SP</fixed-case>o<fixed-case>T</fixed-case>: A Trainable Sentence Planner - Marilyn A.Walker - OwenRambow + Marilyn A.Walker + OwenRambow MonicaRogati N01-1003 walker-etal-2001-spot @@ -44,14 +44,14 @@ AbrahamIttycheriah MartinFranz Wei-JingZhu - AdwaitRatnaparkhi + AdwaitRatnaparkhi N01-1005 ittycheriah-etal-2001-question Transformation Based Learning in the Fast Lane GraceNgai - RaduFlorian + RaduFlorian N01-1006 ngai-florian-2001-transformation @@ -63,9 +63,9 @@
Text and Knowledge Mining for Coreference Resolution - Sanda M.Harabagiu - Razvan C.Bunescu - Steven J.Maiorano + Sanda M.Harabagiu + Razvan C.Bunescu + Steven J.Maiorano N01-1008 harabagiu-etal-2001-text @@ -96,7 +96,7 @@ Class-Based Probability Estimation Using a Semantic Hierarchy StephenClark - DavidWeir + DavidWeir N01-1013 clark-weir-2001-class @@ -121,15 +121,15 @@
Generating Training Data for Medical Dictations - SergeyPakhomov + SergeyPakhomov MichaelSchonwetter - JoanBachenko + JoanBachenko N01-1017 pakhomov-etal-2001-generating A Finite-State Approach to Machine Translation - SrinivasBangalore + SrinivasBangalore GiuseppeRiccardi N01-1018 bangalore-riccardi-2001-finite @@ -142,20 +142,20 @@ Multipath Translation Lexicon Induction via Bridge Languages - Gideon S.Mann + Gideon S.Mann DavidYarowsky N01-1020 mann-yarowsky-2001-multipath A Probabilistic <fixed-case>E</fixed-case>arley Parser as a Psycholinguistic Model - JohnHale + JohnHale N01-1021 hale-2001-probabilistic Refining Tabular Parsers for <fixed-case>TAG</fixed-case>s - ÉricVillemonte de la Clergerie + ÉricVillemonte de la Clergerie villemonte-de-la-clergerie-2001-refining @@ -167,14 +167,14 @@ Knowledge-Free Induction of Inflectional Morphologies PatrickSchone - DanielJurafsky + DanielJurafsky N01-1024 schone-jurafsky-2001-knowledge Chunking with Support Vector Machines - TakuKudo - YujiMatsumoto + TakuKudo + YujiMatsumoto N01-1025 kudo-matsumoto-2001-chunking @@ -187,8 +187,8 @@ Identifying User Corrections Automatically in Spoken Dialogue Systems - JuliaHirschberg - DianeLitman + JuliaHirschberg + DianeLitman MarcSwerts N01-1027 hirschberg-etal-2001-identifying @@ -209,17 +209,17 @@ Do <fixed-case>CFG</fixed-case>-Based Language Models Need Agreement Constraints? - MannyRayner - GenevieveGorrell - Beth AnnHockey - JohnDowding + MannyRayner + GenevieveGorrell + Beth AnnHockey + JohnDowding JohanBoye N01-1030 rayner-etal-2001-cfg You’re Not From ’Round Here, Are You? Naive <fixed-case>B</fixed-case>ayes Detection of Non-Native Utterances - Laura MayfieldTomokiyo + Laura MayfieldTomokiyo RosieJones N01-1031 tomokiyo-jones-2001-youre diff --git a/data/xml/N03.xml b/data/xml/N03.xml index 5358ef989c..c69c91ab03 100644 --- a/data/xml/N03.xml +++ b/data/xml/N03.xml @@ -12,7 +12,7 @@ Effective Utterance Classification with Unsupervised Phonotactic Models - HiyanAlshawi + HiyanAlshawi 1–7 N03-1001 alshawi-2003-effective @@ -20,7 +20,7 @@ <fixed-case>J</fixed-case>apanese Named Entity Extraction with Redundant Morphological Analysis MasayukiAsahara - YujiMatsumoto + YujiMatsumoto 8–15 N03-1002 asahara-matsumoto-2003-japanese @@ -35,7 +35,7 @@ In Question Answering, Two Heads Are Better Than One - JenniferChu-Carroll + JenniferChu-Carroll KrzysztofCzuba JohnPrager AbrahamIttycheriah @@ -45,8 +45,8 @@ Automatic Acquisition of Names Using Speak and Spell Mode in Spoken Dialogue Systems - GraceChung - StephanieSeneff + GraceChung + StephanieSeneff ChaoWang 32–39 N03-1005 @@ -54,7 +54,7 @@ Minimally Supervised Induction of Grammatical Gender - SilviuCucerzan + SilviuCucerzan DavidYarowsky 40–47 N03-1006 @@ -71,14 +71,14 @@ Latent Semantic Information in Maximum Entropy Language Models for Conversational Speech Recognition YonggangDeng - SanjeevKhudanpur + SanjeevKhudanpur 56–63 N03-1008 deng-khudanpur-2003-latent Simpler and More General Minimization for Weighted Finite-State Automata - JasonEisner + JasonEisner 64–71 N03-1009 eisner-2003-simpler @@ -92,9 +92,9 @@ Learning Semantic Constraints for the Automatic Discovery of Part-Whole Relations - RoxanaGirju - AdrianaBadulescu - DanMoldovan + RoxanaGirju + AdrianaBadulescu + DanMoldovan 80–87 N03-1011 girju-etal-2003-learning @@ -112,14 +112,14 @@ A Categorial Variation Database for <fixed-case>E</fixed-case>nglish NizarHabash - BonnieDorr + BonnieDorr 96–102 N03-1013 habash-dorr-2003-categorial Inducing History Representations for Broad Coverage Statistical Parsing - JamesHenderson + JamesHenderson 103–110 N03-1014 henderson-2003-inducing @@ -134,7 +134,7 @@ <fixed-case>A</fixed-case>* Parsing: Fast Exact <fixed-case>V</fixed-case>iterbi Parse Selection DanKlein - Christopher D.Manning + Christopher D.Manning 119–126 N03-1016 klein-manning-2003-parsing @@ -142,7 +142,7 @@ Statistical Phrase-Based Translation PhilippKoehn - Franz J.Och + Franz J.Och DanielMarcu 127–133 N03-1017 @@ -167,8 +167,8 @@ Automatic Evaluation of Summaries Using N-gram Co-occurrence Statistics - Chin-YewLin - EduardHovy + Chin-YewLin + EduardHovy 150–157 N03-1020 lin-hovy-2003-automatic @@ -182,10 +182,10 @@ <fixed-case>COGEX</fixed-case>: A Logic Prover for Question Answering - DanMoldovan - ChristineClark - SandaHarabagiu - SteveMaiorano + DanMoldovan + ChristineClark + SandaHarabagiu + SteveMaiorano 166–172 N03-1022 moldovan-etal-2003-cogex @@ -193,7 +193,7 @@ Weakly Supervised Natural Language Learning Without Redundant Views VincentNg - ClaireCardie + ClaireCardie 173–180 N03-1023 ng-cardie-2003-weakly @@ -219,7 +219,7 @@ Statistical Sentence Condensation using Ambiguity Packing and Stochastic Disambiguation Methods for <fixed-case>L</fixed-case>exical-<fixed-case>F</fixed-case>unctional <fixed-case>G</fixed-case>rammar StefanRiezler - Tracy H.King + Tracy H.King RichardCrouch AnnieZaenen 197–204 @@ -244,7 +244,7 @@ Comma Restoration Using Constituency Information - Stuart M.Shieber + Stuart M.Shieber XiaopengTao 221–227 N03-1029 @@ -260,7 +260,7 @@ Example Selection for Bootstrapping Statistical Parsers - MarkSteedman + MarkSteedman RebeccaHwa StephenClark MilesOsborne @@ -275,8 +275,8 @@ Frequency Estimates for Statistical Word Similarity Measures - Egidio L.Terra - Charles L. A.Clarke + Egidio L.Terra + Charles L. A.Clarke 244–251 N03-1032 terra-clarke-2003-frequency @@ -285,7 +285,7 @@ Feature-Rich Part-of-Speech Tagging with a Cyclic Dependency Network KristinaToutanova DanKlein - Christopher D.Manning + Christopher D.Manning YoramSinger 252–259 N03-1033 @@ -293,7 +293,7 @@ Evaluating the Evaluation: A Case Study Using the <fixed-case>TREC</fixed-case> 2002 Question Answering Track - Ellen M.Voorhees + Ellen M.Voorhees 260–267 N03-1034 voorhees-2003-evaluating @@ -316,7 +316,7 @@ A Web-Trained Extraction Summarization System LiangZhou - EduardHovy + EduardHovy 284–290 N03-1037 zhou-hovy-2003-web @@ -335,7 +335,7 @@ Automating <fixed-case>XML</fixed-case> markup of text documents ShaziaAkhtar - Ronan G.Reilly + Ronan G.Reilly JohnDunnion 1–3 N03-2001 @@ -343,7 +343,7 @@ Factored Language Models and Generalized Parallel Backoff - Jeff A.Bilmes + Jeff A.Bilmes KatrinKirchhoff 4–6 N03-2002 @@ -352,8 +352,8 @@ Getting More Mileage from Web Text Sources for Conversational Speech Language Modeling using Class-Dependent Mixtures IvanBulyko - MariOstendorf - AndreasStolcke + MariOstendorf + AndreasStolcke 7–9 N03-2003 bulyko-etal-2003-getting @@ -361,14 +361,14 @@ Exploiting Diversity for Answering Questions JohnBurger - JohnHenderson + JohnHenderson 10–12 N03-2004 burger-henderson-2003-exploiting Story Link Detection and New Event Detection are Asymmetric - FrancineChen + FrancineChen AymanFarahat ThorstenBrants 13–15 @@ -378,8 +378,8 @@ Adaptation Using Out-of-Domain Corpus within <fixed-case>EBMT</fixed-case> TakaoDoi - EiichiroSumita - HirofumiYamamoto + EiichiroSumita + HirofumiYamamoto 16–18 N03-2006 doi-etal-2003-adaptation @@ -394,23 +394,23 @@ A Maximum Entropy Approach to <fixed-case>F</fixed-case>rame<fixed-case>N</fixed-case>et Tagging MichaelFleischman - EduardHovy + EduardHovy 22–24 N03-2008 fleischman-hovy-2003-maximum Target Word Detection and Semantic Role Chunking using Support Vector Machines - KadriHacioglu - WayneWard + KadriHacioglu + WayneWard 25–27 N03-2009 hacioglu-ward-2003-target Question Classification with Support Vector Machines and Error Correcting Codes - KadriHacioglu - WayneWard + KadriHacioglu + WayneWard 28–30 N03-2010 hacioglu-ward-2003-question @@ -426,9 +426,9 @@ Detection Of Agreement vs. Disagreement In Meetings: Training With Unlabeled Data - DustinHillard - MariOstendorf - ElizabethShriberg + DustinHillard + MariOstendorf + ElizabethShriberg 34–36 N03-2012 hillard-etal-2003-detection @@ -437,7 +437,7 @@ Automatic Expansion of Equivalent Sentence Set Based on Syntactic Substitution KenjiImamura YasuhiroAkiba - EiichiroSumita + EiichiroSumita 37–39 N03-2013 imamura-etal-2003-automatic-expansion @@ -445,11 +445,11 @@ Identifying and Tracking Entity Mentions in a Maximum Entropy Framework AbrahamIttycheriah - LucianLita - NandaKambhatla - NicolasNicolov - SalimRoukos - MargoStys + LucianLita + NandaKambhatla + NicolasNicolov + SalimRoukos + MargoStys 40–42 N03-2014 ittycheriah-etal-2003-identifying @@ -481,8 +481,8 @@ Towards Emotion Prediction in Spoken Tutoring Dialogues - DianeLitman - KateForbes + DianeLitman + KateForbes ScottSilliman 52–54 N03-2018 @@ -503,7 +503,7 @@ TakashiNinomiya YusukeMiyao TomokoOhta - Jun’ichiTsujii + Jun’ichiTsujii 58–60 N03-2020 masuda-etal-2003-robust @@ -512,7 +512,7 @@ Precision and Recall of Machine Translation I. DanMelamed RyanGreen - Joseph P.Turian + Joseph P.Turian 61–63 N03-2021 melamed-etal-2003-precision @@ -520,15 +520,15 @@ Semantic Extraction with Wide-Coverage Lexical Resources BehrangMohit - SriniNarayanan + SriniNarayanan 64–66 N03-2022 mohit-narayanan-2003-semantic Category-based Pseudowords - Preslav I.Nakov - Marti A.Hearst + Preslav I.Nakov + Marti A.Hearst 67–69 N03-2023 nakov-hearst-2003-category @@ -536,7 +536,7 @@ References to Named Entities: a Corpus Study AniNenkova - KathleenMcKeown + KathleenMcKeown 70–72 N03-2024 nenkova-mckeown-2003-references @@ -546,21 +546,21 @@ ChengNiu WeiLi JihongDing - Rohini K.Srihari + Rohini K.Srihari 73–75 N03-2025 niu-etal-2003-bootstrapping Desparately Seeking <fixed-case>C</fixed-case>ebuano - Douglas W.Oard + Douglas W.Oard DavidDoermann - BonnieDorr + BonnieDorr DaqingHe PhilipResnik - AmyWeinberg + AmyWeinberg WilliamByrne - SanjeevKhudanpur + SanjeevKhudanpur DavidYarowsky AntonLeuski PhilippKoehn @@ -589,14 +589,14 @@ Automatic Derivation of Surface Text Patterns for a Maximum Entropy Based Question Answering System DeepakRavichandran AbrahamIttycheriah - SalimRoukos + SalimRoukos 85–87 N03-2029 ravichandran-etal-2003-automatic A Hybrid Approach to Content Analysis for Automatic Essay Grading - Carolyn P.Rose + Carolyn P.Rose AntonioRoque DumisizweBhembe KurtVanLehn @@ -608,7 +608,7 @@ Auditory-based Acoustic Distinctive Features and Spectral Cues for Robust Automatic Speech Recognition in Low-<fixed-case>SNR</fixed-case> Car Environments Sid-AhmedSelouani HeshamTolba - DouglasO’Shaughnessy + DouglasO’Shaughnessy 91–93 N03-2031 selouani-etal-2003-auditory @@ -617,7 +617,7 @@ Latent Semantic Analysis for Dialogue Act Classification RiccardoSerafin BarbaraDi Eugenio - MichaelGlass + MichaelGlass 94–96 N03-2032 serafin-etal-2003-latent @@ -626,8 +626,8 @@ Automatically Predicting Information Quality in News Documents RongTang Kwong BorNg - TomekStrzalkowski - Paul B.Kantor + TomekStrzalkowski + Paul B.Kantor 97–99 N03-2033 tang-etal-2003-automatically @@ -651,7 +651,7 @@ A Phrase-based Unigram Model for Statistical Machine Translation - ChristophTillmann + ChristophTillmann FeiXia 106–108 N03-2036 @@ -659,7 +659,7 @@ Evaluating Answers to Definition Questions - Ellen M.Voorhees + Ellen M.Voorhees 109–111 N03-2037 voorhees-2003-evaluating-answers @@ -766,16 +766,16 @@ <fixed-case>TIPS</fixed-case>: A Translingual Information Processing System - YaserAl-Onaizan - RaduFlorian + YaserAl-Onaizan + RaduFlorian MartinFranz - HanyHassan + HanyHassan Young-SukLee - J. ScottMcCarley + J. ScottMcCarley KishorePapineni - SalimRoukos - JeffreySorensen - ChristophTillmann + SalimRoukos + JeffreySorensen + ChristophTillmann ToddWard FeiXia 1–2 @@ -794,7 +794,7 @@ <fixed-case>DOGHED</fixed-case>: A Template-Based Generator for Multimodal Dialog Systems Targeting Heterogeneous Devices SongsakChannarukul - Susan W.McRoy + Susan W.McRoy Syed S.Ali 5–6 N03-4003 @@ -803,15 +803,15 @@ <fixed-case>TAP</fixed-case>-<fixed-case>XL</fixed-case>: An Automated Analyst’s Assistant SeanColbath - FrancisKubala + FrancisKubala 7–8 N03-4004 colbath-kubala-2003-tap A Spoken Dialogue Interface to a Geologist’s Field Assistant - JohnDowding - JamesHieronymus + JohnDowding + JamesHieronymus 9–10 N03-4005 dowding-hieronymus-2003-spoken @@ -819,8 +819,8 @@ <fixed-case>QCS</fixed-case>: A Tool for Querying, Clustering, and Summarizing Documents Daniel M.Dunlavy - JohnConroy - Dianne P.O’Leary + JohnConroy + Dianne P.O’Leary 11–12 N03-4006 dunlavy-etal-2003-qcs @@ -828,26 +828,26 @@ Demonstration of the <fixed-case>CROSSMARC</fixed-case> System VangelisKarkaletsis - Constantine D.Spyropoulos + Constantine D.Spyropoulos DimitrisSouflis ClaireGrover BenHachey - Maria TeresaPazienza + Maria TeresaPazienza MicheleVindigni EmmanuelCartier - JoseCoch + JoseCoch 13–14 N03-4007 karkaletsis-etal-2003-demonstration <fixed-case>C</fixed-case>olumbia’s Newsblaster: New Features and Future Directions - KathleenMcKeown + KathleenMcKeown ReginaBarzilay JohnChen - DavidElson - DavidEvans - JudithKlavans + DavidElson + DavidEvans + JudithKlavans AniNenkova BarrySchiffman SergeySigelman @@ -857,7 +857,7 @@ <fixed-case>W</fixed-case>ord<fixed-case>F</fixed-case>reak: An Open Tool for Linguistic Annotation - ThomasMorton + ThomasMorton JeremyLaCivita 17–18 N03-4009 @@ -865,8 +865,8 @@ <fixed-case>JAVELIN</fixed-case>: A Flexible, Planner-Based Architecture for Question Answering - EricNyberg - RobertFrederking + EricNyberg + RobertFrederking 19–20 N03-4010 nyberg-frederking-2003-javelin @@ -881,7 +881,7 @@ Automatic Extraction of Semantic Networks from Text using Leximancer - Andrew E.Smith + Andrew E.Smith 23–24 N03-4012 smith-2003-automatic @@ -890,7 +890,7 @@ pre-<fixed-case>CODIE</fixed-case>–Crosslingual On-Demand Information Extraction KiyoshiSudo SatoshiSekine - RalphGrishman + RalphGrishman 25–26 N03-4013 sudo-etal-2003-pre @@ -904,19 +904,19 @@ <fixed-case>S</fixed-case>peechalator: Two-Way Speech-to-Speech Translation in Your Hand - AlexWaibel + AlexWaibel AhmedBadran - Alan W.Black - RobertFrederking - DonnaGates - AlonLavie - LoriLevin + Alan W.Black + RobertFrederking + DonnaGates + AlonLavie + LoriLevin KevinLenzo - Laura MayfieldTomokiyo + Laura MayfieldTomokiyo JuergenReichert TanjaSchultz DorcasWallace - MonikaWoszczyna + MonikaWoszczyna JingZhang 29–30 N03-4015 @@ -933,8 +933,8 @@ Identifying Opinionated Sentences TheresaWilson - David R.Pierce - JanyceWiebe + David R.Pierce + JanyceWiebe 33–34 N03-4017 wilson-etal-2003-identifying @@ -959,7 +959,7 @@ Information Retrieval Systems as Integration Platforms for Language Technologies - Douglas W.Oard + Douglas W.Oard 2–2 N03-5002 oard-2003-information @@ -973,7 +973,7 @@ The State of the Art in Language Modeling - JoshuaGoodman + JoshuaGoodman 4–4 N03-5004 goodman-2003-state @@ -988,7 +988,7 @@ Annotation of Temporal and Event Expressions - JamesPustejovsky + JamesPustejovsky InderjeetMani 6–6 N03-5006 @@ -1003,7 +1003,7 @@ Optimization, Maxent Models, and Conditional Estimation without Magic - ChristopherManning + ChristopherManning DanKlein 8–8 N03-5008 diff --git a/data/xml/N04.xml b/data/xml/N04.xml index aa3c26b7ee..b39f822995 100644 --- a/data/xml/N04.xml +++ b/data/xml/N04.xml @@ -30,7 +30,7 @@ Cross-Document Coreference on a Large Scale Corpus Chung HeongGooi - JamesAllan + JamesAllan 9–16 N04-1002 gooi-allan-2004-cross @@ -47,15 +47,15 @@ A Salience-Based Approach to Gesture-Speech Alignment JacobEisenstein - C. MarioChristoudias + C. MarioChristoudias 25–32 N04-1004 eisenstein-christoudias-2004-salience Balancing data-driven and rule-based approaches in the context of a Multimodal Conversational System - SrinivasBangalore - MichaelJohnston + SrinivasBangalore + MichaelJohnston 33–40 N04-1005 bangalore-johnston-2004-balancing @@ -89,7 +89,7 @@ A Probabilistic Rasch Analysis of Question Answering Evaluations RenseLange JuanMoran - Warren R.Greiff + Warren R.Greiff LisaFerro 65–72 N04-1009 @@ -105,7 +105,7 @@ Sentence-Internal Prosody Does not Help Parsing the Way Punctuation Does - MichelleGregory + MichelleGregory MarkJohnson EugeneCharniak 81–88 @@ -122,11 +122,11 @@ Speed and Accuracy in Shallow and Deep Stochastic Parsing - RonKaplan + RonKaplan StefanRiezler - Tracy H.King - John T.Maxwell III - AlexVasserman + Tracy H.King + John T.Maxwell III + AlexVasserman RichardCrouch 97–104 N04-1013 @@ -158,8 +158,8 @@ Lattice-Based Search for Spoken Utterance Retrieval - MuratSaraclar - RichardSproat + MuratSaraclar + RichardSproat 129–136 N04-1017 saraclar-sproat-2004-lattice @@ -167,8 +167,8 @@ Detecting Structural Metadata with Decision Trees and Transformation-Based Learning JoungbumKim - Sarah E.Schwarm - MariOstendorf + Sarah E.Schwarm + MariOstendorf 137–144 N04-1018 kim-etal-2004-detecting @@ -176,7 +176,7 @@ Evaluating Content Selection in Summarization: The Pyramid Method AniNenkova - RebeccaPassonneau + RebeccaPassonneau 145–152 N04-1019 nenkova-passonneau-2004-evaluating @@ -191,19 +191,19 @@ A Smorgasbord of Features for Statistical Machine Translation - Franz JosefOch + Franz JosefOch DanielGildea - SanjeevKhudanpur + SanjeevKhudanpur AnoopSarkar KenjiYamada - AlexFraser + AlexFraser ShankarKumar LibinShen - DavidSmith + DavidSmith KatherineEng VirenJain ZhenJin - DragomirRadev + DragomirRadev 161–168 N04-1021 och-etal-2004-smorgasbord @@ -220,7 +220,7 @@ Discriminative Reranking for Machine Translation LibinShen AnoopSarkar - Franz JosefOch + Franz JosefOch 177–184 N04-1023 shen-etal-2004-discriminative @@ -245,8 +245,8 @@ Predicting Emotion in Spoken Dialogue from Multiple Knowledge Sources - KateForbes-Riley - DianeLitman + KateForbes-Riley + DianeLitman 201–208 N04-1026 forbes-riley-litman-2004-predicting @@ -277,11 +277,11 @@ Shallow Semantic Parsing using Support Vector Machines - Sameer S.Pradhan - Wayne H.Ward - KadriHacioglu - James H.Martin - DanJurafsky + Sameer S.Pradhan + Wayne H.Ward + KadriHacioglu + James H.Martin + DanJurafsky 233–240 N04-1030 pradhan-etal-2004-shallow @@ -298,7 +298,7 @@ Shallow Semantic Parsing of <fixed-case>C</fixed-case>hinese HonglinSun - DanielJurafsky + DanielJurafsky 249–256 N04-1032 sun-jurafsky-2004-shallow @@ -306,15 +306,15 @@ Improvements in Phrase-Based Statistical Machine Translation RichardZens - HermannNey + HermannNey 257–264 N04-1033 zens-ney-2004-improvements Improved Machine Translation Performance via Parallel Sentence Extraction from Comparable Corpora - Dragos StefanMunteanu - AlexanderFraser + Dragos StefanMunteanu + AlexanderFraser DanielMarcu 265–272 N04-1034 @@ -333,16 +333,16 @@ Improving Named Entity Translation Combining Phonetic and Semantic Similarities FeiHuang - StephanVogel - AlexWaibel + StephanVogel + AlexWaibel 281–288 N04-1036 huang-etal-2004-improving The (Non)Utility of Predicate-Argument Frequencies for Pronoun Interpretation - AndrewKehler - DouglasAppelt + AndrewKehler + DouglasAppelt LaraTaylor AleksandrSimma 289–296 @@ -351,22 +351,22 @@ Unsupervised Learning of Contextual Role Knowledge for Coreference Resolution - DavidBean - EllenRiloff + DavidBean + EllenRiloff 297–304 N04-1038 bean-riloff-2004-unsupervised Exponential Priors for Maximum Entropy Models - JoshuaGoodman + JoshuaGoodman 305–312 N04-1039 goodman-2004-exponential Multiple Similarity Measures and Source-Pair Information in Story Link Detection - FrancineChen + FrancineChen AymanFarahat ThorstenBrants 313–320 @@ -423,7 +423,7 @@ Identifying Chemical Names in Biomedical Text: an Investigation of Substring Co-occurrence Based Approaches - AlexanderVasserman + AlexanderVasserman 7–12 N04-2002 vasserman-2004-identifying @@ -452,7 +452,7 @@ Automatic Article Restoration - JohnLee + JohnLee 31–36 N04-2006 lee-2004-automatic @@ -501,16 +501,16 @@ <fixed-case>C</fixed-case>olumbia Newsblaster: Multilingual News Summarization on the Web - David KirkEvans - Judith L.Klavans - Kathleen R.McKeown + David KirkEvans + Judith L.Klavans + Kathleen R.McKeown 1–4 N04-3001 evans-etal-2004-columbia <fixed-case>ITSPOKE</fixed-case>: An Intelligent Tutoring Spoken Dialogue System - Diane J.Litman + Diane J.Litman ScottSilliman 5–8 N04-3002 @@ -528,19 +528,19 @@ SusanneRiehemann DimitraVergyri JingZheng - ChristopherCuly + ChristopherCuly 9–12 N04-3003 precoda-etal-2004-limited <fixed-case>M</fixed-case>i<fixed-case>TAP</fixed-case> for <fixed-case>SARS</fixed-case> Detection - Laurie E.Damianos - SamuelBayer + Laurie E.Damianos + SamuelBayer Michael A.Chisholm - JohnHenderson - LynetteHirschman - WilliamMorgan + JohnHenderson + LynetteHirschman + WilliamMorgan MarcUbaldino GuidoZarrella James M.Wilson V @@ -551,13 +551,13 @@ Multilingual Video and Audio News Alerting - David D.Palmer + David D.Palmer PatrickBray MarcReichman KatherineRhodes NoahWhite AndrewMerlino - FrancisKubala + FrancisKubala 17–18 N04-3005 palmer-etal-2004-multilingual @@ -565,14 +565,14 @@ Open Text Semantic Parsing Using <fixed-case>F</fixed-case>rame<fixed-case>N</fixed-case>et and <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et LeiShi - RadaMihalcea + RadaMihalcea 19–22 N04-3006 shi-mihalcea-2004-open A Scaleable Multi-document Centroid-based Summarizer - DragomirRadev + DragomirRadev TimothyAllison MatthewCraig StankoDimitrov @@ -605,10 +605,10 @@ A <fixed-case>T</fixed-case>hai Speech Translation System for Medical Dialogs TanjaSchultz DorcasAlexander - Alan W.Black + Alan W.Black KayPeterson SinapornSuebvisai - AlexWaibel + AlexWaibel 34–35 N04-3010 schultz-etal-2004-thai @@ -656,15 +656,15 @@ <fixed-case>MMR</fixed-case>-based Feature Selection for Text Categorization ChangkiLee - Gary GeunbaeLee + Gary GeunbaeLee 5–8 N04-4002 lee-lee-2004-mmr Example-based Rescoring of Statistical Machine Translation Output - MichaelPaul - EiichiroSumita + MichaelPaul + EiichiroSumita SeiichiYamamoto 9–12 N04-4003 @@ -676,7 +676,7 @@ YasuhiroKodama TomohiroWatanabe HiromitsuNishizaki - SeiichiNakagawa + SeiichiNakagawa 13–16 N04-4004 utsuro-etal-2004-empirical @@ -692,7 +692,7 @@ Language Model Adaptation with <fixed-case>MAP</fixed-case> Estimation and the Perceptron Algorithm MichielBacchiani BrianRoark - MuratSaraclar + MuratSaraclar 21–24 N04-4006 bacchiani-etal-2004-language @@ -700,9 +700,9 @@ Advances in Children’s Speech Recognition within an Interactive Literacy Tutor AndreasHagen - BryanPellom + BryanPellom Sarelvan Vuuren - RonaldCole + RonaldCole 25–28 N04-4007 hagen-etal-2004-advances @@ -717,8 +717,8 @@ Competitive Self-Trained Pronoun Interpretation - AndrewKehler - DouglasAppelt + AndrewKehler + DouglasAppelt LaraTaylor AleksandrSimma 33–36 @@ -729,7 +729,7 @@ Using N-best lists for Named Entity Recognition from <fixed-case>C</fixed-case>hinese Speech LufengZhai PascaleFung - RichardSchwartz + RichardSchwartz MarineCarpuat DekaiWu 37–40 @@ -738,7 +738,7 @@ Performance Evaluation and Error Analysis for Multimodal Reference Resolution in a Conversation System - Joyce Y.Chai + Joyce Y.Chai ZaharPrasov PengyuHong 41–44 @@ -756,7 +756,7 @@ Web Search Intent Induction via Automatic Query Reformulation - HalDaumé III + HalDaumé III EricBrill 49–52 N04-4013 @@ -764,8 +764,8 @@ <fixed-case>HITIQA</fixed-case>: A Data Driven Approach to Interactive Analytical Question Answering - SharonSmall - TomekStrzalkowski + SharonSmall + TomekStrzalkowski 53–56 N04-4014 small-strzalkowski-2004-hitiqa @@ -781,15 +781,15 @@ Correction Grammars for Error Handling in a Speech Dialog System HirohikoSagawa TerukoMitamura - EricNyberg + EricNyberg 61–64 N04-4016 sagawa-etal-2004-correction A Comparison of Rule–Based and Statistical Methods for Semantic Language Modeling and Confidence Measurement - RuhiSrikaya - YuqingGao + RuhiSrikaya + YuqingGao MichaelPicheny 65–68 N04-4017 @@ -825,7 +825,7 @@ Feature-based Pronunciation Modeling for Speech Recognition KarenLivescu - JamesGlass + JamesGlass 81–84 N04-4021 livescu-glass-2004-feature @@ -833,14 +833,14 @@ Context-based Speech Recognition Error Detection and Correction ArupSarma - David D.Palmer + David D.Palmer 85–88 N04-4022 sarma-palmer-2004-context Feature Selection for Trainable Multilingual Broadcast News Segmentation - David D.Palmer + David D.Palmer MarcReichman ElyesYaich 89–92 @@ -849,31 +849,31 @@ Direct Maximization of Average Precision by Hill-Climbing, with a Comparison to a Maximum Entropy Approach - WilliamMorgan - WarrenGreiff - JohnHenderson + WilliamMorgan + WarrenGreiff + JohnHenderson 93–96 N04-4024 morgan-etal-2004-direct Automated Team Discourse Annotation and Performance Prediction Using <fixed-case>LSA</fixed-case> - Melanie J.Martin - Peter W.Foltz + Melanie J.Martin + Peter W.Foltz 97–100 N04-4025 martin-foltz-2004-automated A Unigram Orientation Model for Statistical Machine Translation - ChristophTillmann + ChristophTillmann 101–104 N04-4026 tillmann-2004-unigram Summarizing Email Threads - OwenRambow + OwenRambow LokeshShrestha JohnChen ChristyLaurdisen @@ -898,7 +898,7 @@ B.Schmidt-Nielsen KentWittenburg JosephWoelfel - Fang-FangZhang + Fang-FangZhang 113–116 N04-4029 divi-etal-2004-speech @@ -906,14 +906,14 @@ Nearly-Automated Metadata Hierarchy Creation EmiliaStoica - Marti A.Hearst + Marti A.Hearst 117–120 N04-4030 stoica-hearst-2004-nearly Computational Linkuistics: Word Triggers across Hyperlinks - DragomirRadev + DragomirRadev HongQi AdamWinkel DanielTam @@ -923,8 +923,8 @@ Parsing Conversational Speech Using Enhanced Segmentation - Jeremy G.Kahn - MariOstendorf + Jeremy G.Kahn + MariOstendorf CiprianChelba 125–128 N04-4032 @@ -947,54 +947,54 @@ Prosody-based Topic Segmentation for <fixed-case>M</fixed-case>andarin Broadcast News - Gina-AnneLevow + Gina-AnneLevow 137–140 N04-4035 levow-2004-prosody Parsing Arguments of Nominalizations in <fixed-case>E</fixed-case>nglish and <fixed-case>C</fixed-case>hinese - SameerPradhan + SameerPradhan HonglinSun - WayneWard - James H.Martin - DanielJurafsky + WayneWard + James H.Martin + DanielJurafsky 141–144 N04-4036 pradhan-etal-2004-parsing A Lightweight Semantic Chunker Based on Tagging - KadriHacioglu + KadriHacioglu 145–148 N04-4037 hacioglu-2004-lightweight Automatic Tagging of <fixed-case>A</fixed-case>rabic Text: From Raw Text to Base Phrase Chunks - MonaDiab - KadriHacioglu - DanielJurafsky + MonaDiab + KadriHacioglu + DanielJurafsky 149–152 N04-4038 diab-etal-2004-automatic Converting Text into Agent Animations: Assigning Gestures to Text - Yukiko I.Nakano + Yukiko I.Nakano MasashiOkamoto DaisukeKawahara QingLi - ToyoakiNishida + ToyoakiNishida 153–156 N04-4039 nakano-etal-2004-converting A Lexically-Driven Algorithm for Disfluency Detection - MatthewSnover - BonnieDorr - RichardSchwartz + MatthewSnover + BonnieDorr + RichardSchwartz 157–160 N04-4040 snover-etal-2004-lexically diff --git a/data/xml/N06.xml b/data/xml/N06.xml index 8a50889a33..420eff9a5d 100644 --- a/data/xml/N06.xml +++ b/data/xml/N06.xml @@ -4,9 +4,9 @@ Proceedings of the Human Language Technology Conference of the NAACL, Main Conference N06-1 - Robert C.Moore - JeffBilmes - JenniferChu-Carroll + Robert C.Moore + JeffBilmes + JenniferChu-Carroll MarkSanderson Association for Computational Linguistics
New York City, USA
@@ -70,9 +70,9 @@ Learning to recognize features of valid textual entailments BillMacCartney TrondGrenager - Marie-Catherinede Marneffe + Marie-Catherinede Marneffe DanielCer - Christopher D.Manning + Christopher D.Manning 41–48 N06-1006 maccartney-etal-2006-learning @@ -94,7 +94,7 @@ Role of Local Context in Automatic Deidentification of Ungrammatical, Fragmented Text TawandaSibanda - OzlemUzuner + OzlemUzuner OzlemUzuner 65–73 N06-1009 @@ -103,7 +103,7 @@ Exploiting Domain Structure for Named Entity Recognition JingJiang - ChengXiangZhai + ChengXiangZhai 74–81 N06-1010 jiang-zhai-2006-exploiting @@ -127,8 +127,8 @@ A Maximum Entropy Approach to Combining Word Alignments - Necip FazilAyan - Bonnie J.Dorr + Necip FazilAyan + Bonnie J.Dorr 96–103 N06-1013 ayan-dorr-2006-maximum @@ -147,7 +147,7 @@ SimonLacoste-Julien BenTaskar DanKlein - Michael I.Jordan + Michael I.Jordan 112–119 N06-1015 lacoste-julien-etal-2006-word @@ -156,8 +156,8 @@ An Empirical Study of the Behavior of Active Learning for Word Sense Disambiguation JinyingChen AndrewSchein - LyleUngar - MarthaPalmer + LyleUngar + MarthaPalmer 120–127 N06-1016 chen-etal-2006-empirical @@ -172,8 +172,8 @@ Understanding Temporal Expressions in Emails BenjaminHan - DonnaGates - LoriLevin + DonnaGates + LoriLevin 136–143 N06-1018 han-etal-2006-understanding @@ -181,7 +181,7 @@ Partial Training for a Lexicalized-Grammar Parser StephenClark - JamesCurran + JamesCurran 144–151 N06-1019 clark-curran-2006-partial @@ -197,10 +197,10 @@ Multilingual Dependency Parsing using <fixed-case>B</fixed-case>ayes Point Machines - SimonCorston-Oliver + SimonCorston-Oliver AnthonyAue KevinDuh - EricRingger + EricRingger 160–167 N06-1021 corston-oliver-etal-2006-multilingual @@ -211,7 +211,7 @@ MarkJohnson MichaElsner JosephAusterweil - DavidEllis + DavidEllis IsaacHaxton CatherineHill R.Shrivaths @@ -234,14 +234,14 @@ Fully Parsing the <fixed-case>P</fixed-case>enn <fixed-case>T</fixed-case>reebank RyanGabbard SethKulick - MitchellMarcus + MitchellMarcus 184–191 N06-1024 gabbard-etal-2006-fully Exploiting Semantic Role Labeling, <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et and <fixed-case>W</fixed-case>ikipedia for Coreference Resolution - Simone PaoloPonzetto + Simone PaoloPonzetto MichaelStrube 192–199 N06-1025 @@ -250,7 +250,7 @@ Identifying and Analyzing Judgment Opinions Soo-MinKim - EduardHovy + EduardHovy 200–207 N06-1026 kim-hovy-2006-identifying @@ -260,7 +260,7 @@ DonghuiFeng ErinShaw JihieKim - EduardHovy + EduardHovy 208–215 N06-1027 feng-etal-2006-learning @@ -275,7 +275,7 @@ Unsupervised and Semi-supervised Learning of Tone and Pitch Accent - Gina-AnneLevow + Gina-AnneLevow 224–231 N06-1029 levow-2006-unsupervised @@ -283,7 +283,7 @@ Learning Pronunciation Dictionaries: Language Complexity and Word Selection Strategies JohnKominek - Alan WBlack + Alan WBlack 232–239 N06-1030 kominek-black-2006-learning @@ -299,7 +299,7 @@ Grammatical Machine Translation StefanRiezler - John T.Maxwell III + John T.Maxwell III 248–255 N06-1032 riezler-maxwell-iii-2006-grammatical @@ -316,16 +316,16 @@ Modelling User Satisfaction and Student Learning in a Spoken Dialogue Tutoring System with Generic, Tutoring, and User Affect Parameters - KateForbes-Riley - DianeLitman + KateForbes-Riley + DianeLitman 264–271 N06-1034 forbes-riley-litman-2006-modelling Comparing the Utility of State Features in Spoken Dialogue Using Reinforcement Learning - JoelTetreault - DianeLitman + JoelTetreault + DianeLitman 272–279 N06-1035 tetreault-litman-2006-comparing @@ -383,7 +383,7 @@ Learning Morphological Disambiguation Rules for <fixed-case>T</fixed-case>urkish DenizYuret - FerhanTüre + FerhanTüre 328–334 N06-1042 yuret-ture-2006-learning @@ -425,7 +425,7 @@ GabrielMurray SteveRenals JeanCarletta - JohannaMoore + JohannaMoore 367–374 N06-1047 murray-etal-2006-incorporating @@ -468,7 +468,7 @@ TaoTao XuanhuiWang QiaozhuMei - ChengXiangZhai + ChengXiangZhai 407–414 N06-1052 tao-etal-2006-language @@ -485,8 +485,8 @@ A fast finite-state relaxation method for enforcing global constraints on sequence decoding - RoyTromble - JasonEisner + RoyTromble + JasonEisner 423–430 N06-1054 tromble-eisner-2006-fast @@ -501,7 +501,7 @@ Learning for Semantic Parsing with Statistical Machine Translation Yuk WahWong - RaymondMooney + RaymondMooney 439–446 N06-1056 wong-mooney-2006-learning @@ -509,9 +509,9 @@ <fixed-case>P</fixed-case>ara<fixed-case>E</fixed-case>val: Using Paraphrases to Evaluate Summaries Automatically LiangZhou - Chin-YewLin - Dragos StefanMunteanu - EduardHovy + Chin-YewLin + Dragos StefanMunteanu + EduardHovy 447–454 N06-1057 zhou-etal-2006-paraeval @@ -526,10 +526,10 @@ An Information-Theoretic Approach to Automatic Evaluation of Summaries - Chin-YewLin + Chin-YewLin GuihongCao JianfengGao - Jian-YunNie + Jian-YunNie 463–470 N06-1059 lin-etal-2006-information @@ -537,7 +537,7 @@ Cross Linguistic Name Matching in <fixed-case>E</fixed-case>nglish and <fixed-case>A</fixed-case>rabic AndrewFreeman - SherriCondon + SherriCondon ChristopherAckerman 471–478 N06-1060 @@ -545,7 +545,7 @@ Language Model-Based Document Clustering Using Random Walks - GüneşErkan + GüneşErkan 479–486 N06-1061 erkan-2006-language @@ -559,7 +559,7 @@ TeemuHirsimäki JannePylkkönen TanelAlumäe - MuratSaraclar + MuratSaraclar 487–494 N06-1062 kurimo-etal-2006-unlimited @@ -569,9 +569,9 @@ Proceedings of the Human Language Technology Conference of the NAACL, Companion Volume: Short Papers N06-2 - Robert C.Moore - JeffBilmes - JenniferChu-Carroll + Robert C.Moore + JeffBilmes + JenniferChu-Carroll MarkSanderson Association for Computational Linguistics
New York City, USA
@@ -594,10 +594,10 @@ The <fixed-case>MILE</fixed-case> Corpus for Less Commonly Taught Languages AlisonAlvarez - LoriLevin - RobertFrederking + LoriLevin + RobertFrederking SimonFung - DonnaGates + DonnaGates JeffGood 5–8 N06-2002 @@ -606,7 +606,7 @@ <fixed-case>M</fixed-case>useli: A Multi-Source Evidence Integration Approach to Topic Segmentation of Spontaneous Dialogue JaimeArguello - CarolynRosé + CarolynRosé 9–12 N06-2003 arguello-rose-2006-museli @@ -631,8 +631,8 @@ Class Model Adaptation for Speech Summarisation PierreChatain EdwardWhittaker - JoannaMrozinski - SadaokiFurui + JoannaMrozinski + SadaokiFurui 21–24 N06-2006 chatain-etal-2006-class @@ -640,9 +640,9 @@ Semi-supervised Relation Extraction with Label Propagation JinxiuChen - DonghongJi - Chew LimTan - ZhengyuNiu + DonghongJi + Chew LimTan + ZhengyuNiu 25–28 N06-2007 chen-etal-2006-semi @@ -656,7 +656,7 @@ Answering the question you wish they had asked: The impact of paraphrasing for Question Answering - PabloDuboue + PabloDuboue JenniferChu-Carroll 33–36 N06-2009 @@ -673,8 +673,8 @@ Spectral Clustering for Example Based Machine Translation RashmiGangadharaiah - RalfBrown - JaimeCarbonell + RalfBrown + JaimeCarbonell 41–44 N06-2011 gangadharaiah-etal-2006-spectral @@ -698,28 +698,28 @@ Agreement/Disagreement Classification: Exploiting Unlabeled Data using Contrast Classifiers SangyunHahn RichardLadner - MariOstendorf + MariOstendorf 53–56 N06-2014 hahn-etal-2006-agreement <fixed-case>O</fixed-case>nto<fixed-case>N</fixed-case>otes: The 90% Solution - EduardHovy - MitchellMarcus - MarthaPalmer - LanceRamshaw - RalphWeischedel + EduardHovy + MitchellMarcus + MarthaPalmer + LanceRamshaw + RalphWeischedel 57–60 N06-2015 hovy-etal-2006-ontonotes Investigating Cross-Language Speech Retrieval for a Spontaneous Conversational Speech Collection - DianaInkpen + DianaInkpen MuathAlzghool GarethJones - DouglasOard + DouglasOard 61–64 N06-2016 inkpen-etal-2006-investigating @@ -736,8 +736,8 @@ SeokhwanKim YuSong KyungdukKim - Jeong-WonCha - Gary GeunbaeLee + Jeong-WonCha + Gary GeunbaeLee 69–72 N06-2018 kim-etal-2006-mmr @@ -768,8 +768,8 @@ Automatic Recognition of Personality in Conversation - FrançoisMairesse - MarilynWalker + FrançoisMairesse + MarilynWalker 85–88 N06-2022 mairesse-walker-2006-automatic @@ -777,7 +777,7 @@ Summarizing Speech Without Text Using Hidden <fixed-case>M</fixed-case>arkov Models SameerMaskey - JuliaHirschberg + JuliaHirschberg 89–92 N06-2023 maskey-hirschberg-2006-summarizing @@ -785,9 +785,9 @@ <fixed-case>NER</fixed-case> Systems that Suit User’s Preferences: Adjusting the Recall-Precision Trade-off for Entity Extraction EinatMinkov - RichardWang + RichardWang AnthonyTomasic - WilliamCohen + WilliamCohen 93–96 N06-2024 minkov-etal-2006-ner @@ -801,7 +801,7 @@ Accurate Parsing of the <fixed-case>P</fixed-case>roposition <fixed-case>B</fixed-case>ank - GabrieleMusillo + GabrieleMusillo PaolaMerlo 101–104 N06-2026 @@ -809,7 +809,7 @@ Using Semantic Authoring for <fixed-case>B</fixed-case>lissymbols Communication Boards - YaelNetzer + YaelNetzer MichaelElhadad 105–108 N06-2027 @@ -825,8 +825,8 @@ Exploiting Variant Corpora for Machine Translation - MichaelPaul - EiichiroSumita + MichaelPaul + EiichiroSumita 113–116 N06-2029 paul-sumita-2006-exploiting @@ -843,7 +843,7 @@ Computational Modelling of Structural Priming in Dialogue DavidReitter FrankKeller - Johanna D.Moore + Johanna D.Moore 121–124 N06-2031 reitter-etal-2006-computational @@ -851,7 +851,7 @@ Story Segmentation of Broadcast News in <fixed-case>E</fixed-case>nglish, <fixed-case>M</fixed-case>andarin and <fixed-case>A</fixed-case>rabic AndrewRosenberg - JuliaHirschberg + JuliaHirschberg 125–128 N06-2032 rosenberg-hirschberg-2006-story @@ -859,7 +859,7 @@ Parser Combination by Reparsing KenjiSagae - AlonLavie + AlonLavie 129–132 N06-2033 sagae-lavie-2006-parser @@ -876,7 +876,7 @@ Weblog Classification for Fast Splog Filtering: A <fixed-case>URL</fixed-case> Language Model Segmentation Approach FrancoSalvetti - NicolasNicolov + NicolasNicolov 137–140 N06-2035 salvetti-nicolov-2006-weblog @@ -885,7 +885,7 @@ Word Domain Disambiguation via Word Sense Disambiguation AntonioSanfilippo StephenTratz - MichelleGregory + MichelleGregory 141–144 N06-2036 sanfilippo-etal-2006-word @@ -893,8 +893,8 @@ Selecting relevant text subsets from web-data for building topic specific language models AbhinavSethy - PanayiotisGeorgiou - ShrikanthNarayanan + PanayiotisGeorgiou + ShrikanthNarayanan 145–148 N06-2037 sethy-etal-2006-selecting @@ -909,7 +909,7 @@ Unsupervised Induction of <fixed-case>M</fixed-case>odern <fixed-case>S</fixed-case>tandard <fixed-case>A</fixed-case>rabic Verb Classes NealSnider - MonaDiab + MonaDiab 153–156 N06-2039 snider-diab-2006-unsupervised @@ -917,16 +917,16 @@ Sentence Planning for Realtime Navigational Instruction LauraStoia - DonnaByron - DarlaShockley - EricFosler-Lussier + DonnaByron + DarlaShockley + EricFosler-Lussier 157–160 N06-2040 stoia-etal-2006-sentence Using the Web to Disambiguate Acronyms - EiichiroSumita + EiichiroSumita FumiakiSugaya 161–164 N06-2041 @@ -934,7 +934,7 @@ Word Pronunciation Disambiguation using the Web - EiichiroSumita + EiichiroSumita FumiakiSugaya 165–168 N06-2042 @@ -944,7 +944,7 @@ Illuminating Trouble Tickets with Sublanguage Theory SvetlanaSymonenko StevenRowe - Elizabeth D.Liddy + Elizabeth D.Liddy 169–172 N06-2043 symonenko-etal-2006-illuminating @@ -952,7 +952,7 @@ Evolving optimal inspectable strategies for spoken dialogue systems DaveToney - JohannaMoore + JohannaMoore OliverLemon 173–176 N06-2044 @@ -976,7 +976,7 @@ A Maximum Entropy Framework that Integrates Word Dependencies and Grammatical Relations for Reading Comprehension KuiXu - HelenMeng + HelenMeng FuliangWeng 185–188 N06-2047 @@ -994,7 +994,7 @@ Subword-based Tagging by Conditional Random Fields for <fixed-case>C</fixed-case>hinese Word Segmentation RuiqiangZhang GenichiroKikui - EiichiroSumita + EiichiroSumita 193–196 N06-2049 zhang-etal-2006-subword @@ -1011,7 +1011,7 @@ Bridging the Inflection Morphology Gap for <fixed-case>A</fixed-case>rabic Statistical Machine Translation AndreasZollmann AshishVenugopal - StephanVogel + StephanVogel 201–204 N06-2051 zollmann-etal-2006-bridging @@ -1023,7 +1023,7 @@ N06-3 MattHuenerfauth BoPang - MitchMarcus + MitchMarcus Association for Computational Linguistics
New York City, USA
June @@ -1050,7 +1050,7 @@
Can the <fixed-case>I</fixed-case>nternet help improve Machine Translation? - AriadnaFont Llitjós + AriadnaFont Llitjós 219–222 N06-3003 font-llitjos-2006-internet @@ -1078,21 +1078,21 @@ Document Representation and Multilevel Measures of Document Similarity - IrinaMatveeva + IrinaMatveeva 235–238 N06-3007 matveeva-2006-document Logical investigations on the adequacy of certain feature-based theories of natural language - AndersSøgaard + AndersSøgaard 239–242 N06-3008 sogaard-2006-logical A Hybrid Approach to Biomedical Named Entity Recognition and Semantic Role Labeling - Richard Tzong-HanTsai + Richard Tzong-HanTsai 243–246 N06-3009 tsai-2006-hybrid @@ -1109,8 +1109,8 @@ Proceedings of the Human Language Technology Conference of the NAACL, Companion Volume: Demonstrations N06-4 - AlexRudnicky - JohnDowding + AlexRudnicky + JohnDowding NatasaMilic-Frayling Association for Computational Linguistics
New York City, USA
@@ -1125,14 +1125,14 @@ <fixed-case>I</fixed-case>nfo<fixed-case>M</fixed-case>agnets: Making Sense of Corpus Data JaimeArguello - CarolynRosé + CarolynRosé 253–256 N06-4001 arguello-rose-2006-infomagnets From Pipedreams to Products, and Promise! - Janet M.Baker + Janet M.Baker Patri J.Pugliese 257–260 N06-4002 @@ -1165,7 +1165,7 @@ <fixed-case>K</fixed-case>nowtator: A Protégé plug-in for annotated corpus construction - Philip V.Ogren + Philip V.Ogren 273–275 N06-4006 ogren-2006-knowtator @@ -1221,8 +1221,8 @@ Proceedings of the Human Language Technology Conference of the NAACL, Companion Volume: Tutorial Abstracts N06-5 - ChrisManning - DougOard + ChrisManning + DougOard JimGlass Association for Computational Linguistics
New York City, USA
@@ -1236,23 +1236,23 @@ What‘s in a Name: Current Methods, Applications, and Evaluation in Multilingual Name Search and Matching - SherriCondon - KeithMiller + SherriCondon + KeithMiller 299–300 N06-5001 condon-miller-2006-whats Beyond <fixed-case>EM</fixed-case>: <fixed-case>B</fixed-case>ayesian Techniques for Human Language Technology Researchers - HalDaume III + HalDaume III 301–302 N06-5002 daume-iii-2006-beyond Graph-based Algorithms for Natural Language Processing and Information Retrieval - RadaMihalcea - DragomirRadev + RadaMihalcea + DragomirRadev 303–304 N06-5003 mihalcea-radev-2006-graph @@ -1260,7 +1260,7 @@ Automatic Spoken Document Processing for Retrieval and Browsing CiprianChelba - T. J.Hazen + T. J.Hazen 305–306 N06-5004 chelba-hazen-2006-automatic @@ -1268,14 +1268,14 @@ Tutorial on Inductive Semi-supervised Learning Methods: with Applicability to Natural Language Processing AnoopSarkar - GholamrezaHaffari + GholamrezaHaffari 307–308 N06-5005 sarkar-haffari-2006-tutorial Automatic Semantic Role Labeling - Scott Wen-tauYih + Scott Wen-tauYih KristinaToutanova 309–310 N06-5006 diff --git a/data/xml/N07.xml b/data/xml/N07.xml index 11dc0646ea..9c7e205369 100644 --- a/data/xml/N07.xml +++ b/data/xml/N07.xml @@ -4,10 +4,10 @@ Human Language Technologies 2007: The Conference of the North American Chapter of the Association for Computational Linguistics; Proceedings of the Main Conference N07-1 - CandaceSidner + CandaceSidner TanjaSchultz MatthewStone - ChengXiangZhai + ChengXiangZhai Association for Computational Linguistics
Rochester, New York
April @@ -21,8 +21,8 @@ Exploiting Acoustic and Syntactic Features for Prosody Labeling in a Maximum Entropy Framework Vivek KumarRangarajan Sridhar - SrinivasBangalore - ShrikanthNarayanan + SrinivasBangalore + ShrikanthNarayanan 1–8 N07-1001 rangarajan-sridhar-etal-2007-exploiting @@ -30,12 +30,12 @@ To Memorize or to Predict: Prominence labeling in Conversational Speech AniNenkova - JasonBrenier + JasonBrenier AnubhaKothari SashaCalhoun LauraWhitton DavidBeaver - DanJurafsky + DanJurafsky 9–16 N07-1002 nenkova-etal-2007-memorize @@ -43,7 +43,7 @@ Avoiding and Resolving Initiative Conflicts in Dialogue FanYang - Peter A.Heeman + Peter A.Heeman 17–24 N07-1003 yang-heeman-2007-avoiding @@ -51,7 +51,7 @@ What Decisions Have You Made?: Automatic Decision Detection in Meeting Conversations Pei-YunHsueh - Johanna D.Moore + Johanna D.Moore 25–32 N07-1004 hsueh-moore-2007-decisions @@ -85,7 +85,7 @@ Direct Translation Model 2 AbrahamIttycheriah - SalimRoukos + SalimRoukos 57–64 N07-1008 ittycheriah-roukos-2007-direct @@ -93,14 +93,14 @@ Structured Local Training and Biased Potential Functions for Conditional Random Fields with Application to Coreference Resolution YejinChoi - ClaireCardie + ClaireCardie 65–72 N07-1009 choi-cardie-2007-structured Coreference or Not: A Twin Model for Coreference Resolution - XiaoqiangLuo + XiaoqiangLuo 73–80 N07-1010 luo-2007-coreference @@ -118,15 +118,15 @@ Information Retrieval On Empty Fields VictorLavrenko XingYi - JamesAllan + JamesAllan 89–96 N07-1012 lavrenko-etal-2007-information Improving Diversity in Ranking using Absorbing Random Walks - XiaojinZhu - AndrewGoldberg + XiaojinZhu + AndrewGoldberg JurgenVan Gael DavidAndrzejewski 97–104 @@ -135,7 +135,7 @@ A Random Text Model for the Generation of Statistical Language Invariants - ChrisBiemann + ChrisBiemann 105–112 N07-1014 biemann-2007-random @@ -158,7 +158,7 @@ The Domain Restriction Hypothesis: Relating Term Similarity and Semantic Consistency - Alfio MassimilianoGliozzo + Alfio MassimilianoGliozzo MarcoPennacchiotti PatrickPantel 131–138 @@ -168,8 +168,8 @@ <fixed-case>B</fixed-case>ayesian Inference for <fixed-case>PCFG</fixed-case>s via <fixed-case>M</fixed-case>arkov Chain <fixed-case>M</fixed-case>onte <fixed-case>C</fixed-case>arlo MarkJohnson - ThomasGriffiths - SharonGoldwater + ThomasGriffiths + SharonGoldwater 139–146 N07-1018 johnson-etal-2007-bayesian @@ -192,7 +192,7 @@ Probabilistic Generation of Weather Forecast Texts - AnjaBelz + AnjaBelz 164–171 N07-1021 belz-2007-probabilistic @@ -200,7 +200,7 @@ Generation by Inverting a Semantic Parser that Uses Statistical Machine Translation Yuk WahWong - RaymondMooney + RaymondMooney 172–179 N07-1022 wong-mooney-2007-generation @@ -208,7 +208,7 @@ Lexicalized <fixed-case>M</fixed-case>arkov Grammars for Sentence Compression MichelGalley - KathleenMcKeown + KathleenMcKeown 180–187 N07-1023 galley-mckeown-2007-lexicalized @@ -222,7 +222,7 @@ Using <fixed-case>W</fixed-case>ikipedia for Automatic Word Sense Disambiguation - RadaMihalcea + RadaMihalcea 196–203 N07-1025 mihalcea-2007-using @@ -245,19 +245,19 @@ A Case For Shorter Queries, and Helping Users Create Them GiridharKumaran - JamesAllan + JamesAllan 220–227 N07-1028 kumaran-allan-2007-case Combining Outputs from Multiple Machine Translation Systems - Antti-VeikkoRosti - Necip FazilAyan + Antti-VeikkoRosti + Necip FazilAyan BingXiang SpyrosMatsoukas - RichardSchwartz - BonnieDorr + RichardSchwartz + BonnieDorr 228–235 N07-1029 rosti-etal-2007-combining @@ -273,7 +273,7 @@ Automating Creation of Hierarchical Faceted Metadata Structures EmiliaStoica - MartiHearst + MartiHearst MeganRichardson 244–251 N07-1031 @@ -282,8 +282,8 @@ Cross-Instance Tuning of Unsupervised Document Clustering Algorithms DamianosKarakos - JasonEisner - SanjeevKhudanpur + JasonEisner + SanjeevKhudanpur CareyPriebe 252–259 N07-1032 @@ -291,25 +291,25 @@ Using “Annotator Rationales” to Improve Machine Learning for Text Categorization - OmarZaidan - JasonEisner - ChristinePiatko + OmarZaidan + JasonEisner + ChristinePiatko 260–267 N07-1033 zaidan-etal-2007-using Combining Reinformation Learning with Information-State Update Rules - PeterHeeman + PeterHeeman 268–275 N07-1034 heeman-2007-combining Estimating the Reliability of <fixed-case>MDP</fixed-case> Policies: a Confidence Interval Approach - JoelTetreault - DanBohus - DianeLitman + JoelTetreault + DanBohus + DianeLitman 276–283 N07-1035 tetreault-etal-2007-estimating @@ -317,7 +317,7 @@ An Exploration of Eye Gaze in Spoken Language Processing for Multimodal Conversational Interfaces ShaolinQu - JoyceChai + JoyceChai 284–291 N07-1036 qu-chai-2007-exploration @@ -326,7 +326,7 @@ Extracting Semantic Orientations of Phrases from Dictionary HiroyaTakamura TakashiInui - ManabuOkumura + ManabuOkumura 292–299 N07-1037 takamura-etal-2007-extracting @@ -358,15 +358,15 @@ Combining Probability-Based Rankers for Action-Item Detection - Paul N.Bennett - Jaime G.Carbonell + Paul N.Bennett + Jaime G.Carbonell 324–331 N07-1041 bennett-carbonell-2007-combining Multi-Document Relationship Fusion via Constraints on Probabilistic Databases - GideonMann + GideonMann 332–339 N07-1042 mann-2007-multi @@ -390,7 +390,7 @@ Near-Synonym Choice in an Intelligent Thesaurus - DianaInkpen + DianaInkpen 356–363 N07-1045 inkpen-2007-near @@ -400,7 +400,7 @@ BingZhao NguyenBach IanLane - StephanVogel + StephanVogel 364–371 N07-1046 zhao-etal-2007-log @@ -424,8 +424,8 @@ VesaSiivola MattiVarjokallio EbruArisoy - MuratSaraçlar - AndreasStolcke + MuratSaraçlar + AndreasStolcke 380–387 N07-1048 creutz-etal-2007-analysis @@ -466,7 +466,7 @@ A Cascaded Machine Learning Approach to Interpreting Temporal Expressions DavidAhn Jorisvan Rantwijk - Maartende Rijke + Maartende Rijke 420–427 N07-1053 ahn-etal-2007-cascaded @@ -474,8 +474,8 @@ Building and Refining Rhetorical-Semantic Relation Models SashaBlair-Goldensohn - KathleenMcKeown - OwenRambow + KathleenMcKeown + OwenRambow 428–435 N07-1054 blair-goldensohn-etal-2007-building @@ -493,7 +493,7 @@ Randomized Decoding for Selection-and-Ordering Problems PawanDeshpande ReginaBarzilay - DavidKarger + DavidKarger 444–451 N07-1056 deshpande-etal-2007-randomized @@ -501,7 +501,7 @@ Multilingual Structural Projection across Interlinear Text FeiXia - WilliamLewis + WilliamLewis 452–459 N07-1057 xia-lewis-2007-multilingual @@ -519,7 +519,7 @@ Automatic Assessment of Student Translations for Foreign Language Tutoring ChaoWang - StephanieSeneff + StephanieSeneff 468–475 N07-1059 wang-seneff-2007-automatic @@ -543,7 +543,7 @@ Efficient Phrase-Table Representation for Machine Translation with Applications to Online <fixed-case>MT</fixed-case> and Speech Translation RichardZens - HermannNey + HermannNey 492–499 N07-1062 zens-ney-2007-efficient @@ -552,7 +552,7 @@ An Efficient Two-Pass Approach to Synchronous-<fixed-case>CFG</fixed-case> Driven Statistical <fixed-case>MT</fixed-case> AshishVenugopal AndreasZollmann - StephanVogel + StephanVogel 500–507 N07-1063 venugopal-etal-2007-efficient @@ -560,7 +560,7 @@ Statistical Phrase-Based Post-Editing MichelSimard - CyrilGoutte + CyrilGoutte PierreIsabelle 508–515 N07-1064 @@ -578,7 +578,7 @@ A Probabilistic Framework for Answer Selection in Question Answering JeongwooKo LuoSi - EricNyberg + EricNyberg 524–531 N07-1066 ko-etal-2007-probabilistic @@ -586,9 +586,9 @@ Question Answering Using Integrated Information Retrieval and Information Extraction BarrySchiffman - KathleenMcKeown - RalphGrishman - JamesAllan + KathleenMcKeown + RalphGrishman + JamesAllan 532–539 N07-1067 schiffman-etal-2007-question @@ -603,18 +603,18 @@ Can Semantic Roles Generalize Across Genres? - Szu-tingYi + Szu-tingYi EdwardLoper - MarthaPalmer + MarthaPalmer 548–555 N07-1069 yi-etal-2007-semantic Towards Robust Semantic Role Labeling - SameerPradhan - WayneWard - JamesMartin + SameerPradhan + WayneWard + JamesMartin 556–563 N07-1070 pradhan-etal-2007-towards @@ -624,8 +624,8 @@ PatrickPantel RahulBhagat BonaventuraCoppola - TimothyChklovski - EduardHovy + TimothyChklovski + EduardHovy 564–571 N07-1071 pantel-etal-2007-isp @@ -643,10 +643,10 @@ Human Language Technologies 2007: The Conference of the North American Chapter of the Association for Computational Linguistics; Companion Volume, Short Papers N07-2 - CandaceSidner + CandaceSidner TanjaSchultz MatthewStone - ChengXiangZhai + ChengXiangZhai Association for Computational Linguistics
Rochester, New York
April @@ -660,24 +660,24 @@ Comparing User Simulation Models For Dialog Strategy Learning HuaAi - JoelTetreault - DianeLitman + JoelTetreault + DianeLitman 1–4 N07-2001 ai-etal-2007-comparing-user Automatic Acquisition of Grammatical Types for Nouns - NúriaBel + NúriaBel SergioEspeja - MontserratMarimon + MontserratMarimon 5–8 N07-2002 bel-etal-2007-automatic <fixed-case>C</fixed-case>onquest—<fixed-case>A</fixed-case>n Open-Source Dialog System for Conferences - DanBohus + DanBohus SergioGrau Puerto DavidHuggins-Daines VenkateshKeri @@ -693,14 +693,14 @@ Joint Versus Independent Phonological Feature Models within <fixed-case>CRF</fixed-case> Phone Recognition IlanaBromberg JeremyMorris - EricFosler-Lussier + EricFosler-Lussier 13–16 N07-2004 bromberg-etal-2007-joint <fixed-case>K</fixed-case>-Best Suffix Arrays - KennethChurch + KennethChurch BoThiesson RobertRagno 17–20 @@ -710,8 +710,8 @@ Translation Model Pruning via Usage Statistics for Statistical Machine Translation MatthiasEck - StephanVogel - AlexWaibel + StephanVogel + AlexWaibel 21–24 N07-2006 eck-etal-2007-translation @@ -735,7 +735,7 @@ Generalized Graphical Abstractions for Statistical Machine Translation KarimFilali - JeffBilmes + JeffBilmes 33–36 N07-2009 filali-bilmes-2007-generalized @@ -743,17 +743,17 @@ Situated Models of Meaning for Sports Video Retrieval MichaelFleischman - DebRoy + DebRoy 37–40 N07-2010 fleischman-roy-2007-situated Exploring Affect-Context Dependencies for Adaptive System Development - KateForbes-Riley + KateForbes-Riley MihaiRotaru - DianeLitman - JoelTetreault + DianeLitman + JoelTetreault 41–44 N07-2011 forbes-riley-etal-2007-exploring @@ -778,34 +778,34 @@ <fixed-case>A</fixed-case>rabic Diacritization through Full Morphological Tagging NizarHabash - OwenRambow + OwenRambow 53–56 N07-2014 habash-rambow-2007-arabic Are Very Large <fixed-case>N</fixed-case>-Best Lists Useful for <fixed-case>SMT</fixed-case>? - SašaHasan + SašaHasan RichardZens - HermannNey + HermannNey 57–60 N07-2015 hasan-etal-2007-large Relationship between Non-Projective Edges, Their Level Types, and Well-Nestedness - JiříHavelka + JiříHavelka 61–64 N07-2016 havelka-2007-relationship i<fixed-case>ROVER</fixed-case>: Improving System Combination with Classification - DustinHillard + DustinHillard BjoernHoffmeister - MariOstendorf - RalfSchlueter - HermannNey + MariOstendorf + RalfSchlueter + HermannNey 65–68 N07-2017 hillard-etal-2007-irover @@ -813,7 +813,7 @@ Clustered Sub-Matrix Singular Value Decomposition FangHuang - YorickWilks + YorickWilks 69–72 N07-2018 huang-wilks-2007-clustered @@ -821,14 +821,14 @@ Implicitly Supervised Language Model Adaptation for Meeting Transcription DavidHuggins-Daines - Alexander I.Rudnicky + Alexander I.Rudnicky 73–76 N07-2019 huggins-daines-rudnicky-2007-implicitly <fixed-case>ILR</fixed-case>-Based <fixed-case>MT</fixed-case> Comprehension Test with Multi-Level Questions - DouglasJones + DouglasJones MarthaHerzog HussnyIbrahim ArvindJairam @@ -841,8 +841,8 @@ Semi-Supervised Learning for Semantic Parsing using Support Vector Machines - RohitKate - RaymondMooney + RohitKate + RaymondMooney 81–84 N07-2021 kate-mooney-2007-semi @@ -850,8 +850,8 @@ Discriminative Alignment Training without Annotated Data for Machine Translation PatrikLambert - Rafael E.Banchs - Josep M.Crego + Rafael E.Banchs + Josep M.Crego 85–88 N07-2022 lambert-etal-2007-discriminative @@ -866,7 +866,7 @@ Detection of Non-Native Sentences Using Machine-Translated Training Data - JohnLee + JohnLee MingZhou XiaohuaLiu 93–96 @@ -899,7 +899,7 @@ Efficient Computation of Entropy Gradient for Semi-Supervised Conditional Random Fields - GideonMann + GideonMann AndrewMcCallum 109–112 N07-2028 @@ -907,8 +907,8 @@ Hybrid Document Indexing with Spectral Embedding - IrinaMatveeva - Gina-AnneLevow + IrinaMatveeva + Gina-AnneLevow 113–116 N07-2029 matveeva-levow-2007-hybrid @@ -922,7 +922,7 @@ <fixed-case>RH</fixed-case>: A Retro-Hybrid Parser - PaulaNewman + PaulaNewman 121–124 N07-2031 newman-2007-rh @@ -949,34 +949,34 @@ An Integrated Architecture for Speech-Input Multi-Target Machine Translation AliciaPérez M. TeresaGonzález - M. InésTorres - FranciscoCasacuberta + M. InésTorres + FranciscoCasacuberta 133–136 N07-2034 perez-etal-2007-integrated Analysis and System Combination of Phrase- and <fixed-case>N</fixed-case>-Gram-Based Statistical Machine Translation Systems - MartaR. Costa-jussà - Josep M.Crego + MartaR. Costa-jussà + Josep M.Crego DavidVilar - José A.R. Fonollosa - José B.Mariño - HermannNey + José A.R. Fonollosa + José B.Mariño + HermannNey 137–140 N07-2035 r-costa-jussa-etal-2007-analysis Stating with Certainty or Stating with Doubt: Intercoder Reliability Results for Manual Annotation of Epistemically Modalized Statements - Victoria L.Rubin + Victoria L.Rubin 141–144 N07-2036 rubin-2007-stating Joint Morphological-Lexical Language Modeling for Machine Translation - RuhiSarikaya + RuhiSarikaya YonggangDeng 145–148 N07-2037 @@ -988,14 +988,14 @@ BlaiseThomson KarlWeilhammer HuiYe - SteveYoung + SteveYoung 149–152 N07-2038 schatzmann-etal-2007-agenda Reversible Sound-to-Letter/Letter-to-Sound Modeling Based on Syllable Structure - StephanieSeneff + StephanieSeneff 153–156 N07-2039 seneff-2007-reversible @@ -1019,8 +1019,8 @@ Virtual Evidence for Training Speech Recognizers Using Partially Labeled Data - AmarnagSubramanya - JeffBilmes + AmarnagSubramanya + JeffBilmes 165–168 N07-2042 subramanya-bilmes-2007-virtual @@ -1038,8 +1038,8 @@ KeithTrnka DebraYarrington JohnMcCaw - Kathleen F.McCoy - ChristopherPennington + Kathleen F.McCoy + ChristopherPennington 173–176 N07-2044 trnka-etal-2007-effects @@ -1054,7 +1054,7 @@ Entity Extraction is a Boring Solved <fixed-case>P</fixed-case>roblem—<fixed-case>O</fixed-case>r is it? - MarcVilain + MarcVilain JenniferSu SuziLubar 181–184 @@ -1073,7 +1073,7 @@ Modifying <fixed-case>SO</fixed-case>-<fixed-case>PMI</fixed-case> for <fixed-case>J</fixed-case>apanese Weblog Opinion Mining by Using a Balancing Factor and Detecting Neutral Expressions GuangweiWang - KenjiAraki + KenjiAraki 189–192 N07-2048 wang-araki-2007-modifying @@ -1082,7 +1082,7 @@ Combined Use of Speaker- and Tone-Normalized Pitch Reset with Pause Duration for Automatic Story Segmentation in <fixed-case>M</fixed-case>andarin Broadcast News LeiXie ChuanLiu - HelenMeng + HelenMeng 193–196 N07-2049 xie-etal-2007-combined @@ -1114,8 +1114,8 @@ Selective Phrase Pair Extraction for Improved Statistical Machine Translation - LukeZettlemoyer - RobertMoore + LukeZettlemoyer + RobertMoore 209–212 N07-2053 zettlemoyer-moore-2007-selective @@ -1132,7 +1132,7 @@ A Semi-Automatic Evaluation Scheme: Automated Nuggetization for Manual Annotation LiangZhou NamheeKwon - EduardHovy + EduardHovy 217–220 N07-2055 zhou-etal-2007-semi @@ -1156,7 +1156,7 @@ Query Expansion Using Domain Information in Compounds - KarinFriberg + KarinFriberg 1–4 N07-3001 friberg-2007-query @@ -1170,7 +1170,7 @@ Creating a Knowledge Base from a Collaboratively Generated Encyclopedia - Simone PaoloPonzetto + Simone PaoloPonzetto 9–12 N07-3003 ponzetto-2007-creating @@ -1205,7 +1205,7 @@ Semantic Frames in <fixed-case>R</fixed-case>omanian Natural Language Processing Systems - Diana MarieTrandabăţ + Diana MarieTrandabăţ 29–32 N07-3008 trandabat-2007-semantic @@ -1219,7 +1219,7 @@ Unsupervised Natural Language Processing Using Graph Models - ChrisBiemann + ChrisBiemann 37–40 N07-3010 biemann-2007-unsupervised @@ -1230,8 +1230,8 @@ Proceedings of Human Language Technologies: The Annual Conference of the North American Chapter of the Association for Computational Linguistics (NAACL-HLT) N07-4 BobCarpenter - AmandaStent - Jason D.Williams + AmandaStent + Jason D.Williams Association for Computational Linguistics
Rochester, New York, USA
April @@ -1244,12 +1244,12 @@ Demonstration of <fixed-case>PLOW</fixed-case>: A Dialogue System for One-Shot Task Learning - JamesAllen - NathanaelChambers + JamesAllen + NathanaelChambers GeorgeFerguson LucianGalescu HyuckchulJung - MarySwift + MarySwift WilliamTaysom 1–2 N07-4001 @@ -1268,12 +1268,12 @@ Adaptive Tutorial Dialogue Systems Using Deep <fixed-case>NLP</fixed-case> Techniques - Myroslava O.Dzikovska - Charles B.Callaway + Myroslava O.Dzikovska + Charles B.Callaway ElaineFarrow ManuelMarques-Pita ColinMatheson - Johanna D.Moore + Johanna D.Moore 5–6 N07-4003 dzikovska-etal-2007-adaptive @@ -1282,7 +1282,7 @@ <fixed-case>POSSLT</fixed-case>: A <fixed-case>K</fixed-case>orean to <fixed-case>E</fixed-case>nglish Spoken Language Translation System DonghyeonLee JonghoonLee - Gary GeunbaeLee + Gary GeunbaeLee 7–8 N07-4004 lee-etal-2007-posslt @@ -1294,7 +1294,7 @@ SimonTucker JonathanKilgour JeanCarletta - Johanna D.Moore + Johanna D.Moore SteveRenals 9–10 N07-4005 @@ -1309,7 +1309,7 @@ Spoken Dialogue Systems for Language Learning - StephanieSeneff + StephanieSeneff ChaoWang Chih-yuChao 13–14 @@ -1337,7 +1337,7 @@ <fixed-case>OMS</fixed-case>-<fixed-case>J</fixed-case>: An Opinion Mining System for <fixed-case>J</fixed-case>apanese Weblog Reviews Using a Combination of Supervised and Unsupervised Approaches GuangweiWang - KenjiAraki + KenjiAraki 19–20 N07-4010 wang-araki-2007-oms @@ -1345,7 +1345,7 @@ Learning to Find Transliteration on the Web Chien-ChengWu - Jason S.Chang + Jason S.Chang 21–22 N07-4011 wu-chang-2007-learning-find @@ -1367,7 +1367,7 @@ RohitMishra BrianLathrop ZhaoxiaZhang - HarryBratt + HarryBratt StanleyPeters 23–24 N07-4012 @@ -1378,16 +1378,16 @@ AlexanderYates MicheleBanko MatthewBroadhead - MichaelCafarella + MichaelCafarella OrenEtzioni - StephenSoderland + StephenSoderland 25–26 N07-4013 yates-etal-2007-textrunner The Hidden Information State Dialogue Manager: A Real-World <fixed-case>POMDP</fixed-case>-Based System - SteveYoung + SteveYoung JostSchatzmann BlaiseThomson KarlWeilhammer @@ -1405,7 +1405,7 @@ <fixed-case>V</fixed-case>oice-<fixed-case>R</fixed-case>ate: A Dialog System for Consumer Ratings - GeoffreyZweig + GeoffreyZweig Y.C.Ju PatrickNguyen DongYu @@ -1420,9 +1420,9 @@ Proceedings of the Human Language Technology Conference of the NAACL, Companion Volume: Tutorial Abstracts N07-5 - MartiHearst - Gina-AnneLevow - JamesAllan + MartiHearst + Gina-AnneLevow + JamesAllan Association for Computational Linguistics
Rochester, New York
April @@ -1435,21 +1435,21 @@ <fixed-case>B</fixed-case>io<fixed-case>NLP</fixed-case> - K. BretonnelCohen + K. BretonnelCohen 1–2 N07-5001 cohen-2007-bionlp Statistical Language Models for Information Retrieval - ChengXiangZhai + ChengXiangZhai 3–4 N07-5002 zhai-2007-statistical <fixed-case>A</fixed-case>rabic Dialect Processing Tutorial - MonaDiab + MonaDiab NizarHabash 5–6 N07-5003 diff --git a/data/xml/N09.xml b/data/xml/N09.xml index 3a5aa1e76a..88f5915d8c 100644 --- a/data/xml/N09.xml +++ b/data/xml/N09.xml @@ -4,10 +4,10 @@ Proceedings of Human Language Technologies: The 2009 Annual Conference of the North American Chapter of the Association for Computational Linguistics N09-1 - MariOstendorf - MichaelCollins - ShriNarayanan - Douglas W.Oard + MariOstendorf + MichaelCollins + ShriNarayanan + Douglas W.Oard LucyVanderwende Association for Computational Linguistics
Boulder, Colorado
@@ -30,8 +30,8 @@ Integrating Knowledge for Subjectivity Sense Labeling YawGyamfi - JanyceWiebe - RadaMihalcea + JanyceWiebe + RadaMihalcea CemAkkaya 10–18 N09-1002 @@ -39,12 +39,12 @@ A Study on Similarity and Relatedness Using Distributional and <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et-based Approaches - EnekoAgirre + EnekoAgirre EnriqueAlfonseca - KeithHall - JanaKravalova - MariusPaşca - AitorSoroa + KeithHall + JanaKravalova + MariusPaşca + AitorSoroa 19–27 N09-1003 agirre-etal-2009-study @@ -85,15 +85,15 @@ YaozhongZhang TakuyaMatsuzaki YoshimasaTsuruoka - Jun’ichiTsujii + Jun’ichiTsujii 56–64 N09-1007 sun-etal-2009-discriminative Improved Reconstruction of Protolanguage Word Forms - AlexandreBouchard-Côté - Thomas L.Griffiths + AlexandreBouchard-Côté + Thomas L.Griffiths DanKlein 65–73 N09-1008 @@ -101,8 +101,8 @@ Shared Logistic Normal Distributions for Soft Parameter Tying in Unsupervised Grammar Induction - ShayCohen - Noah A.Smith + ShayCohen + Noah A.Smith 74–82 N09-1009 cohen-smith-2009-shared @@ -120,7 +120,7 @@ Efficiently Parsable Extensions to Tree-Local Multicomponent <fixed-case>TAG</fixed-case> RebeccaNesson - StuartShieber + StuartShieber 92–100 N09-1011 nesson-shieber-2009-efficiently @@ -137,7 +137,7 @@ Context-Dependent Alignment Models for Statistical Machine Translation JamieBrunning - Adriàde Gispert + Adriàde Gispert WilliamByrne 110–118 N09-1013 @@ -155,7 +155,7 @@ Intersecting Multilingual Data for Faster and Better Statistical Translations YuChen MartinKay - AndreasEisele + AndreasEisele 128–136 N09-1015 chen-etal-2009-intersecting @@ -171,16 +171,16 @@ The Role of Implicit Argumentation in Nominal <fixed-case>SRL</fixed-case> - MatthewGerber - JoyceChai - AdamMeyers + MatthewGerber + JoyceChai + AdamMeyers 146–154 N09-1017 gerber-etal-2009-role Jointly Identifying Predicates, Arguments and Senses using <fixed-case>M</fixed-case>arkov <fixed-case>L</fixed-case>ogic - IvanMeza-Ruiz + IvanMeza-Ruiz SebastianRiedel 155–163 N09-1018 @@ -197,7 +197,7 @@ Hierarchical <fixed-case>D</fixed-case>irichlet Trees for Information Retrieval - GholamrezaHaffari + GholamrezaHaffari Yee WhyeTeh 173–181 N09-1020 @@ -259,8 +259,8 @@ Preference Grammars: Softening Syntactic Constraints to Improve Statistical Machine Translation AshishVenugopal AndreasZollmann - Noah A.Smith - StephanVogel + Noah A.Smith + StephanVogel 236–244 N09-1027 venugopal-etal-2009-preference @@ -270,7 +270,7 @@ PengXu JaehoKang MichaelRinggaard - FranzOch + FranzOch 245–253 N09-1028 xu-etal-2009-using @@ -278,20 +278,20 @@ Learning Bilingual Linguistic Reordering Model for Statistical Machine Translation Han-BinChen - Jian-ChengWu - Jason S.Chang + Jian-ChengWu + Jason S.Chang 254–262 N09-1029 chen-etal-2009-learning May All Your Wishes Come True: A Study of Wishes and How to Recognize Them - Andrew B.Goldberg + Andrew B.Goldberg NathanaelFillmore DavidAndrzejewski ZhitingXu BryanGibson - XiaojinZhu + XiaojinZhu 263–271 N09-1030 goldberg-etal-2009-may @@ -300,9 +300,9 @@ Predicting Risk from Financial Reports with Regression ShimonKogan DimitryLevin - Bryan R.Routledge + Bryan R.Routledge Jacob S.Sagi - Noah A.Smith + Noah A.Smith 272–280 N09-1031 kogan-etal-2009-predicting @@ -349,23 +349,23 @@ Improving nonparameteric <fixed-case>B</fixed-case>ayesian inference: experiments on unsupervised word segmentation with adaptor grammars MarkJohnson - SharonGoldwater + SharonGoldwater 317–325 N09-1036 johnson-goldwater-2009-improving Joint Parsing and Named Entity Recognition - Jenny RoseFinkel - Christopher D.Manning + Jenny RoseFinkel + Christopher D.Manning 326–334 N09-1037 finkel-manning-2009-joint Minimal-length linearizations for mildly context-sensitive dependency trees - Y. AlbertPark - RogerLevy + Y. AlbertPark + RogerLevy 335–343 N09-1038 park-levy-2009-minimal @@ -395,9 +395,9 @@ Global Models of Document Structure using Latent Permutations HarrChen - S.R.K.Branavan + S.R.K.Branavan ReginaBarzilay - David R.Karger + David R.Karger 371–379 N09-1042 chen-etal-2009-global @@ -413,10 +413,10 @@ Geo-Centric Language Models for Local Business Voice Search - AmandaStent + AmandaStent IlijaZeljković DiamantinoCaseiro - JayWilpon + JayWilpon 389–396 N09-1044 stent-etal-2009-geo @@ -425,21 +425,21 @@ Improving the <fixed-case>A</fixed-case>rabic Pronunciation Dictionary for Phone and Word Recognition with Linguistically-Based Pronunciation Rules FadiBiadsy NizarHabash - JuliaHirschberg + JuliaHirschberg 397–405 N09-1045 biadsy-etal-2009-improving Using a maximum entropy model to build segmentation lattices for <fixed-case>MT</fixed-case> - ChrisDyer + ChrisDyer 406–414 N09-1046 dyer-2009-using Active Learning for Statistical Phrase-based Machine Translation - GholamrezaHaffari + GholamrezaHaffari MaximRoy AnoopSarkar 415–423 @@ -449,8 +449,8 @@ Semi-Supervised Lexicon Mining from Parenthetical Expressions in Monolingual Web Pages XianchaoWu - NaoakiOkazaki - Jun’ichiTsujii + NaoakiOkazaki + Jun’ichiTsujii 424–432 N09-1048 wu-etal-2009-semi @@ -458,8 +458,8 @@ Hierarchical Phrase-Based Translation with Weighted Finite State Transducers GonzaloIglesias - Adriàde Gispert - EduardoR. Banga + Adriàde Gispert + EduardoR. Banga WilliamByrne 433–441 N09-1049 @@ -476,23 +476,23 @@ Performance Prediction for Exponential Language Models - StanleyChen + StanleyChen 450–458 N09-1051 chen-2009-performance Tied-Mixture Language Modeling in Continuous Space - RuhiSarikaya + RuhiSarikaya MohamedAfify - BrianKingsbury + BrianKingsbury 459–467 N09-1052 sarikaya-etal-2009-tied Shrinking Exponential Language Models - StanleyChen + StanleyChen 468–476 N09-1053 chen-2009-shrinking @@ -500,8 +500,8 @@ Predicting Response to Political Blog Posts with Topic Models TaeYano - William W.Cohen - Noah A.Smith + William W.Cohen + Noah A.Smith 477–485 N09-1054 yano-etal-2009-predicting @@ -535,7 +535,7 @@ Streaming for large scale <fixed-case>NLP</fixed-case>: Language Modeling AmitGoyal - HalDaumé III + HalDaumé III SureshVenkatasubramanian 512–520 N09-1058 @@ -562,16 +562,16 @@ CarlosGómez-Rodríguez MarcoKuhlmann GiorgioSatta - DavidWeir + DavidWeir 539–547 N09-1061 gomez-rodriguez-etal-2009-optimal Inducing Compact but Accurate Tree-Substitution Grammars - TrevorCohn - SharonGoldwater - PhilBlunsom + TrevorCohn + SharonGoldwater + PhilBlunsom 548–556 N09-1062 cohn-etal-2009-inducing @@ -601,13 +601,13 @@ Using Citations to Generate surveys of Scientific Paradigms - SaifMohammad - BonnieDorr + SaifMohammad + BonnieDorr MelissaEgan - AhmedHassan - PradeepMuthukrishan + AhmedHassan + PradeepMuthukrishan VahedQazvinian - DragomirRadev + DragomirRadev DavidZajic 584–592 N09-1066 @@ -615,15 +615,15 @@ Non-Parametric <fixed-case>B</fixed-case>ayesian Areal Linguistics - HalDaumé III + HalDaumé III 593–601 N09-1067 daume-iii-2009-non Hierarchical <fixed-case>B</fixed-case>ayesian Domain Adaptation - Jenny RoseFinkel - Christopher D.Manning + Jenny RoseFinkel + Christopher D.Manning 602–610 N09-1068 finkel-manning-2009-hierarchical @@ -656,9 +656,9 @@ Extracting Social Meaning: Identifying Interactional Style in Spoken Conversation - DanJurafsky + DanJurafsky RajeshRanganath - DanMcFarland + DanMcFarland 638–646 N09-1072 jurafsky-etal-2009-extracting @@ -673,7 +673,7 @@ Improved Syntactic Models for Parsing Speech with Repairs - TimMiller + TimMiller 656–664 N09-1074 miller-2009-improved @@ -681,7 +681,7 @@ A model of local coherence effects in human sentence processing as consequences of updates from bottom-up prior to posterior beliefs KlintonBicknell - RogerLevy + RogerLevy 665–673 N09-1075 bicknell-levy-2009-model @@ -691,10 +691,10 @@ Proceedings of Human Language Technologies: The 2009 Annual Conference of the North American Chapter of the Association for Computational Linguistics, Companion Volume: Short Papers N09-2 - MariOstendorf - MichaelCollins - ShriNarayanan - Douglas W.Oard + MariOstendorf + MichaelCollins + ShriNarayanan + Douglas W.Oard LucyVanderwende Association for Computational Linguistics
Boulder, Colorado
@@ -709,7 +709,7 @@ Cohesive Constraints in A Beam Search Phrase-based Decoder NguyenBach - StephanVogel + StephanVogel ColinCherry 1–4 N09-2001 @@ -726,7 +726,7 @@ Efficient Extraction of Oracle-best Translations from Hypergraphs ZhifeiLi - SanjeevKhudanpur + SanjeevKhudanpur 9–12 N09-2003 li-khudanpur-2009-efficient @@ -741,8 +741,8 @@ Comparison of Extended Lexicon Models in Search and Rescoring for <fixed-case>SMT</fixed-case> - SašaHasan - HermannNey + SašaHasan + HermannNey 17–20 N09-2005 hasan-ney-2009-comparison @@ -767,7 +767,7 @@ Large-scale Computation of Distributional Similarities for Queries EnriqueAlfonseca - KeithHall + KeithHall SilvanaHartmann 29–32 N09-2008 @@ -786,7 +786,7 @@ Identifying Types of Claims in Online Customer Reviews ShilpaArora MaheshJoshi - Carolyn P.Rosé + Carolyn P.Rosé 37–40 N09-2010 arora-etal-2009-identifying @@ -801,21 +801,21 @@ <fixed-case>TESLA</fixed-case>: A Tool for Annotating Geospatial Language Corpora - NateBlaylock + NateBlaylock BradleySwain - JamesAllen + JamesAllen 45–48 N09-2012 blaylock-etal-2009-tesla Modeling Dialogue Structure with Adjacency Pair Analysis and Hidden <fixed-case>M</fixed-case>arkov Models - Kristy ElizabethBoyer - RobertPhillips - Eun YoungHa + Kristy ElizabethBoyer + RobertPhillips + Eun YoungHa MichaelWallis MladenVouk - JamesLester + JamesLester 49–52 N09-2013 boyer-etal-2009-modeling @@ -825,7 +825,7 @@ KenjiSagae GwenChristian DavidDeVault - DavidTraum + DavidTraum 53–56 N09-2014 sagae-etal-2009-towards @@ -842,8 +842,8 @@ Learning <fixed-case>B</fixed-case>ayesian Networks for Semantic Frame Composition in a Spoken Dialog System Marie-JeanMeurs - FabriceLefèvre - Renatode Mori + FabriceLefèvre + Renatode Mori 61–64 N09-2016 meurs-etal-2009-learning @@ -851,7 +851,7 @@ Evaluation of a System for Noun Concepts Acquisition from Utterances about Images (<fixed-case>SINCA</fixed-case>) Using Daily Conversation Data YuzuUchida - KenjiAraki + KenjiAraki 65–68 N09-2017 uchida-araki-2009-evaluation @@ -859,14 +859,14 @@ Web and Corpus Methods for <fixed-case>M</fixed-case>alay Count Classifier Prediction JeremyNicholson - TimothyBaldwin + TimothyBaldwin 69–72 N09-2018 nicholson-baldwin-2009-web Minimum <fixed-case>B</fixed-case>ayes Risk Combination of Translation Hypotheses from Alternative Morphological Decompositions - Adriàde Gispert + Adriàde Gispert SamiVirpioja MikkoKurimo WilliamByrne @@ -877,8 +877,8 @@ Generating Synthetic Children’s Acoustic Models from Adult Models AndreasHagen - BryanPellom - KadriHacioglu + BryanPellom + KadriHacioglu 77–80 N09-2020 hagen-etal-2009-generating @@ -886,7 +886,7 @@ Detecting Pitch Accents at the Word, Syllable and Vowel Level AndrewRosenberg - JuliaHirschberg + JuliaHirschberg 81–84 N09-2021 rosenberg-hirschberg-2009-detecting @@ -903,17 +903,17 @@ Automatic Agenda Graph Construction from Human-Human Dialogs using Clustering Method CheongjaeLee - SangkeunJung + SangkeunJung KyungdukKim - Gary GeunbaeLee + Gary GeunbaeLee 89–92 N09-2023 lee-etal-2009-automatic A Simple Sentence-Level Extraction Algorithm for Comparable Data - ChristophTillmann - Jian-mingXu + ChristophTillmann + Jian-mingXu 93–96 N09-2024 tillmann-xu-2009-simple @@ -921,15 +921,15 @@ Learning Combination Features with <fixed-case>L</fixed-case>1 Regularization DaisukeOkanohara - Jun’ichiTsujii + Jun’ichiTsujii 97–100 N09-2025 okanohara-tsujii-2009-learning Multi-scale Personalization for Voice Search Applications - DanielBolaños - GeoffreyZweig + DanielBolaños + GeoffreyZweig PatrickNguyen 101–104 N09-2026 @@ -938,7 +938,7 @@ The Importance of Sub-Utterance Prosody in Predicting Level of Certainty HeatherPon-Barry - StuartShieber + StuartShieber 105–108 N09-2027 pon-barry-shieber-2009-importance @@ -961,7 +961,7 @@ Topic Identification Using <fixed-case>W</fixed-case>ikipedia Graph Centrality KinoCoursey - RadaMihalcea + RadaMihalcea 117–120 N09-2030 coursey-mihalcea-2009-topic @@ -969,15 +969,15 @@ Extracting Bilingual Dictionary from Comparable Corpora with Dependency Heterogeneity KunYu - JunichiTsujii + JunichiTsujii 121–124 N09-2031 yu-tsujii-2009-extracting Domain Adaptation with Artificial Data for Semantic Parsing of Speech - Lonnekevan der Plas - JamesHenderson + Lonnekevan der Plas + JamesHenderson PaolaMerlo 125–128 N09-2032 @@ -997,7 +997,7 @@ KazunoriKomatani KotaroFunakoshi TetsuyaOgata - Hiroshi G.Okuno + Hiroshi G.Okuno 133–136 N09-2034 katsumaru-etal-2009-speech @@ -1005,7 +1005,7 @@ Taking into Account the Differences between Actively and Passively Acquired Data: The Case of Active Learning with Support Vector Machines for Imbalanced Datasets MichaelBloodgood - K.Vijay-Shanker + K.Vijay-Shanker 137–140 N09-2035 bloodgood-vijay-shanker-2009-taking @@ -1020,9 +1020,9 @@ Evaluating the Syntactic Transformations in Gold Standard Corpora for Statistical Sentence Compression - Naman K.Gupta + Naman K.Gupta SourishChaudhuri - Carolyn P.Rosé + Carolyn P.Rosé 145–148 N09-2037 gupta-etal-2009-evaluating @@ -1033,11 +1033,11 @@ RogerHsiao MatthiasEck PaisarnCharoenpornsawat - StephanVogel + StephanVogel TanjaSchultz IanLane - AlexWaibel - AlanBlack + AlexWaibel + AlanBlack 149–152 N09-2038 bach-etal-2009-incremental @@ -1059,8 +1059,8 @@ Exploiting Named Entity Classes in <fixed-case>CCG</fixed-case> Surface Realization - RajakrishnanRajkumar - MichaelWhite + RajakrishnanRajkumar + MichaelWhite DominicEspinosa 161–164 N09-2041 @@ -1072,7 +1072,7 @@ YiChang ZhaohuiZheng DonaldMetzler - Jian-yunNie + Jian-yunNie 165–168 N09-2042 zhang-etal-2009-search @@ -1081,7 +1081,7 @@ A Local Tree Alignment-based Soft Pattern Matching Approach for Information Extraction SeokhwanKim MinwooJeong - Gary GeunbaeLee + Gary GeunbaeLee 169–172 N09-2043 kim-etal-2009-local @@ -1102,7 +1102,7 @@ LuisTari JörgHakenberg ChittaBaral - GracielaGonzalez + GracielaGonzalez 177–180 N09-2045 jonnalagadda-etal-2009-towards @@ -1110,18 +1110,18 @@ Improving <fixed-case>SCL</fixed-case> Model for Sentiment-Transfer Learning SongboTan - XueqiCheng + XueqiCheng 181–184 N09-2046 tan-cheng-2009-improving <fixed-case>MICA</fixed-case>: A Probabilistic Dependency Parser Based on Tree Insertion Grammars (Application Note) - SrinivasBangalore + SrinivasBangalore PierreBoullier AlexisNasr - OwenRambow - BenoîtSagot + OwenRambow + BenoîtSagot 185–188 N09-2047 bangalore-etal-2009-mica @@ -1129,7 +1129,7 @@ Lexical and Syntactic Adaptation and Their Impact in Deployed Spoken Dialog Systems SvetlanaStoyanchev - AmandaStent + AmandaStent 189–192 N09-2048 stoyanchev-stent-2009-lexical @@ -1145,15 +1145,15 @@ The independence of dimensions in multidimensional dialogue act annotation VolhaPetukhova - HarryBunt + HarryBunt 197–200 N09-2050 petukhova-bunt-2009-independence Improving Coreference Resolution by Using Conversational Metadata - XiaoqiangLuo - RaduFlorian + XiaoqiangLuo + RaduFlorian ToddWard 201–204 N09-2051 @@ -1179,16 +1179,16 @@ Improving A Simple Bigram <fixed-case>HMM</fixed-case> Part-of-Speech Tagger by Latent Annotation and Self-Training ZhongqiangHuang VladimirEidelman - MaryHarper + MaryHarper 213–216 N09-2054 huang-etal-2009-improving Statistical Post-Editing of a Rule-Based Machine Translation System - Antonio-L.Lagarda - VicentAlabau - FranciscoCasacuberta + Antonio-L.Lagarda + VicentAlabau + FranciscoCasacuberta RobertoSilva EnriqueDíaz-de-Liaño 217–220 @@ -1197,9 +1197,9 @@ On the Importance of Pivot Language Selection for Statistical Machine Translation - MichaelPaul - HirofumiYamamoto - EiichiroSumita + MichaelPaul + HirofumiYamamoto + EiichiroSumita SatoshiNakamura 221–224 N09-2056 @@ -1216,7 +1216,7 @@ Determining the position of adverbial phrases in <fixed-case>E</fixed-case>nglish HuayanZhong - AmandaStent + AmandaStent 229–232 N09-2058 zhong-stent-2009-determining @@ -1224,9 +1224,9 @@ Estimating and Exploiting the Entropy of Sense Distributions PengJin - DianaMcCarthy + DianaMcCarthy RobKoeling - JohnCarroll + JohnCarroll 233–236 N09-2059 jin-etal-2009-estimating @@ -1240,14 +1240,14 @@ Sentence Boundary Detection and the Problem with the <fixed-case>U</fixed-case>.<fixed-case>S</fixed-case>. - DanGillick + DanGillick 241–244 N09-2061 gillick-2009-sentence Quadratic Features and Deep Architectures for Chunking - JosephTurian + JosephTurian JamesBergstra YoshuaBengio 245–248 @@ -1263,7 +1263,7 @@ Combining Constituent Parsers - VictoriaFossum + VictoriaFossum KevinKnight 253–256 N09-2064 @@ -1272,7 +1272,7 @@ Recognising the Predicate-argument Structure of <fixed-case>T</fixed-case>agalog MeladelMistica - TimothyBaldwin + TimothyBaldwin 257–260 N09-2065 mistica-baldwin-2009-recognising @@ -1288,38 +1288,38 @@ Anchored Speech Recognition for Question Answering SibelYaman - GokhanTur + GokhanTur DimitraVergyri - DilekHakkani-Tur - MaryHarper - WenWang + DilekHakkani-Tur + MaryHarper + WenWang 265–268 N09-2067 yaman-etal-2009-anchored Score Distribution Based Term Specific Thresholding for Spoken Term Detection - DoğanCan - MuratSaraçlar + DoğanCan + MuratSaraçlar 269–272 N09-2068 can-saraclar-2009-score Automatic <fixed-case>C</fixed-case>hinese Abbreviation Generation Using Conditional Random Field - DongYang - Yi-chengPan - SadaokiFurui + DongYang + Yi-chengPan + SadaokiFurui 273–276 N09-2069 yang-etal-2009-automatic Fast decoding for open vocabulary spoken term detection - BhuvanaRamabhadran + BhuvanaRamabhadran AbhinavSethy JonathanMamou - BrianKingsbury + BrianKingsbury UpendraChaudhari 277–280 N09-2070 @@ -1328,7 +1328,7 @@ Tightly coupling Speech Recognition and Search TaniyaMishra - SrinivasBangalore + SrinivasBangalore 281–284 N09-2071 mishra-bangalore-2009-tightly @@ -1341,7 +1341,7 @@ UlrichGermann ChiragShah SvetlanaStoyanchev - Carolyn PensteinRosé + Carolyn PensteinRosé AnoopSarkar Association for Computational Linguistics
Boulder, Colorado
@@ -1356,8 +1356,8 @@ Classifier Combination Techniques Applied to Coreference Resolution SmitaVemulapalli - XiaoqiangLuo - John F.Pitrelli + XiaoqiangLuo + John F.Pitrelli ImedZitouni 1–6 N09-3001 @@ -1366,7 +1366,7 @@ Solving the “<fixed-case>W</fixed-case>ho’s <fixed-case>M</fixed-case>ark <fixed-case>J</fixed-case>ohnson <fixed-case>P</fixed-case>uzzle”: <fixed-case>I</fixed-case>nformation Extraction Based Cross Document Coreference JianHuang - Sarah M.Taylor + Sarah M.Taylor Jonathan L.Smith Konstantinos A.Fotiadis C. LeeGiles @@ -1377,7 +1377,7 @@ Exploring Topic Continuation Follow-up Questions using Machine Learning ManuelKirschner - RaffaellaBernardi + RaffaellaBernardi 13–18 N09-3003 kirschner-bernardi-2009-exploring @@ -1393,7 +1393,7 @@ Using Language Modeling to Select Useful Annotation Data DmitriyDligach - MarthaPalmer + MarthaPalmer 25–30 N09-3005 dligach-palmer-2009-using @@ -1409,7 +1409,7 @@ Building a Semantic Lexicon of <fixed-case>E</fixed-case>nglish Nouns via Bootstrapping TingQian BenjaminVan Durme - LenhartSchubert + LenhartSchubert 37–42 N09-3007 qian-etal-2009-building @@ -1434,7 +1434,7 @@ Interactive Annotation Learning with Indirect Feature Voting ShilpaArora - EricNyberg + EricNyberg 55–60 N09-3010 arora-nyberg-2009-interactive @@ -1443,7 +1443,7 @@ Loss-Sensitive Discriminative Training of Machine Transliteration Models KedarBellare KobyCrammer - DayneFreitag + DayneFreitag 61–65 N09-3011 bellare-etal-2009-loss @@ -1460,9 +1460,9 @@ Towards Building a Competitive Opinion Summarization System: <fixed-case>C</fixed-case>hallenges and Keys ElenaLloret - AlexandraBalahur - ManuelPalomar - AndrésMontoyo + AlexandraBalahur + ManuelPalomar + AndrésMontoyo 72–77 N09-3013 lloret-etal-2009-towards @@ -1485,7 +1485,7 @@ Modeling Letter-to-Phoneme Conversion as a Phrase Based Statistical Machine Translation Problem with <fixed-case>M</fixed-case>inimum <fixed-case>E</fixed-case>rror <fixed-case>R</fixed-case>ate Training TarakaRama - Anil KumarSingh + Anil KumarSingh SudheerKolachina 90–95 N09-3016 @@ -1505,7 +1505,7 @@ Proceedings of Human Language Technologies: The 2009 Annual Conference of the North American Chapter of the Association for Computational Linguistics, Companion Volume: Tutorial Abstracts N09-4 CiprianChelba - PaulKantor + PaulKantor BrianRoark Association for Computational Linguistics
Boulder, Colorado
@@ -1520,7 +1520,7 @@ Data Intensive Text Processing with <fixed-case>M</fixed-case>ap<fixed-case>R</fixed-case>educe JimmyLin - ChrisDyer + ChrisDyer 1–2 N09-4001 lin-dyer-2009-data @@ -1542,7 +1542,7 @@ Extracting World and Linguistic Knowledge from <fixed-case>W</fixed-case>ikipedia - Simone PaoloPonzetto + Simone PaoloPonzetto MichaelStrube 7–8 N09-4004 @@ -1550,7 +1550,7 @@ <fixed-case>O</fixed-case>pen<fixed-case>F</fixed-case>st: An Open-Source, Weighted Finite-State Transducer Library and its Applications to Speech and Language - MichaelRiley + MichaelRiley CyrilAllauzen MartinJansche 9–10 @@ -1559,7 +1559,7 @@ <fixed-case>O</fixed-case>nto<fixed-case>N</fixed-case>otes: The 90% Solution - Sameer S.Pradhan + Sameer S.Pradhan NianwenXue 11–12 N09-4006 @@ -1567,9 +1567,9 @@ <fixed-case>V</fixed-case>erb<fixed-case>N</fixed-case>et overview, extensions, mappings and applications - KarinKipper Schuler + KarinKipper Schuler AnnaKorhonen - SusanBrown + SusanBrown 13–14 N09-4007 kipper-schuler-etal-2009-verbnet @@ -1577,7 +1577,7 @@ Writing Systems, Transliteration and Decipherment KevinKnight - RichardSproat + RichardSproat 15–16 N09-4008 knight-sproat-2009-writing @@ -1587,7 +1587,7 @@ Proceedings of Human Language Technologies: The 2009 Annual Conference of the North American Chapter of the Association for Computational Linguistics, Companion Volume: Demonstration Session N09-5 - MichaelJohnston + MichaelJohnston FredPopowich Association for Computational Linguistics
Boulder, Colorado
@@ -1610,16 +1610,16 @@ Building Conversational Agents with Basilica RohitKumar - Carolyn P.Rosé - Michael J.Witbrock + Carolyn P.Rosé + Michael J.Witbrock 5–8 N09-5002 kumar-etal-2009-building <fixed-case>STAT</fixed-case>: Speech Transcription Analysis Tool - Stephen A.Kunath - Steven H.Weinberger + Stephen A.Kunath + Steven H.Weinberger 9–12 N09-5003 kunath-weinberger-2009-stat diff --git a/data/xml/N10.xml b/data/xml/N10.xml index 82a78cdeb0..627e4e5842 100644 --- a/data/xml/N10.xml +++ b/data/xml/N10.xml @@ -4,9 +4,9 @@ Human Language Technologies: The 2010 Annual Conference of the North American Chapter of the Association for Computational Linguistics N10-1 - RonKaplan + RonKaplan JillBurstein - MaryHarper + MaryHarper GeraldPenn Association for Computational Linguistics
Los Angeles, California
@@ -28,9 +28,9 @@ Chart Mining-based Lexical Acquisition with Precision Grammars YiZhang - TimothyBaldwin + TimothyBaldwin ValiaKordoni - DavidMartinez + DavidMartinez JeremyNicholson 10–18 N10-1002 @@ -71,7 +71,7 @@ Qme! : A Speech-based Question-Answering system on Mobile Devices TaniyaMishra - SrinivasBangalore + SrinivasBangalore 55–63 N10-1007 mishra-bangalore-2010-qme @@ -79,8 +79,8 @@ Dialogue-Oriented Review Summary Generation for Spoken Dialogue Recommendation Systems JingjingLiu - StephanieSeneff - VictorZue + StephanieSeneff + VictorZue 64–72 N10-1008 liu-etal-2010-dialogue @@ -88,7 +88,7 @@ Minimally-Supervised Extraction of Entities from Text Advertisements SameerSingh - DustinHillard + DustinHillard ChrisLeggetter 73–81 N10-1009 @@ -96,7 +96,7 @@ Taxonomy Learning Using Word Sense Induction - Ioannis P.Klapaftis + Ioannis P.Klapaftis SureshManandhar 82–90 N10-1010 @@ -115,7 +115,7 @@ DavidNewman Jey HanLau KarlGrieser - TimothyBaldwin + TimothyBaldwin 100–108 N10-1012 newman-etal-2010-automatic @@ -123,7 +123,7 @@ Multi-Prototype Vector-Space Models of Word Meaning JosephReisinger - Raymond J.Mooney + Raymond J.Mooney 109–117 N10-1013 reisinger-mooney-2010-multi @@ -149,7 +149,7 @@ Learning Translation Boundaries for Phrase-Based Decoding - DeyiXiong + DeyiXiong MinZhang HaizhouLi 136–144 @@ -183,14 +183,14 @@ Unsupervised Modeling of <fixed-case>T</fixed-case>witter Conversations AlanRitter ColinCherry - BillDolan + BillDolan 172–180 N10-1020 ritter-etal-2010-unsupervised Streaming First Story Detection with application to <fixed-case>T</fixed-case>witter - SašaPetrović + SašaPetrović MilesOsborne VictorLavrenko 181–189 @@ -200,9 +200,9 @@ Unsupervised Model Adaptation using Information-Theoretic Criterion AriyaRastrow - FrederickJelinek + FrederickJelinek AbhinavSethy - BhuvanaRamabhadran + BhuvanaRamabhadran 190–197 N10-1022 rastrow-etal-2010-unsupervised @@ -227,7 +227,7 @@ CarolinaParada MarkDredze DenisFilimonov - FrederickJelinek + FrederickJelinek 216–224 N10-1025 parada-etal-2010-contextual @@ -242,7 +242,7 @@ Language Identification: The Long and the Short of the Matter - TimothyBaldwin + TimothyBaldwin MarcoLui 229–237 N10-1027 @@ -250,8 +250,8 @@ Inducing Synchronous Grammars with Slice Sampling - PhilBlunsom - TrevorCohn + PhilBlunsom + TrevorCohn 238–241 N10-1028 blunsom-cohn-2010-inducing @@ -259,7 +259,7 @@ Task-based Evaluation of Multiword Expressions: a Pilot Study in Statistical Machine Translation MarineCarpuat - MonaDiab + MonaDiab 242–245 N10-1029 carpuat-diab-2010-task @@ -276,7 +276,7 @@ Extending the <fixed-case>METEOR</fixed-case> Machine Translation Evaluation Metric to the Phrase Level MichaelDenkowski - AlonLavie + AlonLavie 250–253 N10-1031 denkowski-lavie-2010-extending @@ -290,7 +290,7 @@ Two monolingual parses are better than one (synchronous parse) - ChrisDyer + ChrisDyer 263–266 N10-1033 dyer-2010-two @@ -318,7 +318,7 @@ ZhengChen JonathanFeldman AntonioGonzalez - RalphGrishman + RalphGrishman VivekUpadhyay 285–288 N10-1036 @@ -328,7 +328,7 @@ Evaluation Metrics for the Lexical Substitution Task SanazJabbari MarkHepple - LouiseGuthrie + LouiseGuthrie 289–292 N10-1037 jabbari-etal-2010-evaluation @@ -338,7 +338,7 @@ MaheshJoshi DipanjanDas KevinGimpel - Noah A.Smith + Noah A.Smith 293–296 N10-1038 joshi-etal-2010-movie @@ -364,7 +364,7 @@ Putting the User in the Loop: Interactive Maximal Marginal Relevance for Query-Focused Summarization JimmyLin NitinMadnani - BonnieDorr + BonnieDorr 305–308 N10-1041 lin-etal-2010-putting @@ -389,7 +389,7 @@ Time-Efficient Creation of an Accurate Sentence Fusion Corpus - KathleenMcKeown + KathleenMcKeown SaraRosenthal KapilThadani ColemanMoore @@ -400,7 +400,7 @@ Towards Cross-Lingual Textual Entailment YasharMehdad - MatteoNegri + MatteoNegri MarcelloFederico 321–324 N10-1045 @@ -409,7 +409,7 @@ A Comparative Study of Word Co-occurrence for Term Clustering in Language Model-based Sentence Retrieval SaeedehMomtazi - SanjeevKhudanpur + SanjeevKhudanpur DietrichKlakow 325–328 N10-1046 @@ -432,7 +432,7 @@ The Simple Truth about Dependency and Phrase Structure Representations: An Opinion Piece - OwenRambow + OwenRambow 337–340 N10-1049 rambow-2010-simple @@ -448,11 +448,11 @@ Crowdsourcing the evaluation of a domain-adapted named entity recognition system - Asad B.Sayeed + Asad B.Sayeed Timothy J.Meyer Hieu C.Nguyen OliviaBuzek - AmyWeinberg + AmyWeinberg 345–348 N10-1051 sayeed-etal-2010-crowdsourcing @@ -501,7 +501,7 @@ Predicting Human-Targeted Translation Edit Rate via Untrained Human Annotators - Omar F.Zaidan + Omar F.Zaidan ChrisCallison-Burch 369–372 N10-1057 @@ -510,8 +510,8 @@ Improving Semantic Role Classification with Selectional Preferences BeñatZapirain - EnekoAgirre - LluísMàrquez + EnekoAgirre + LluísMàrquez MihaiSurdeanu 373–376 N10-1058 @@ -553,7 +553,7 @@ Extracting Parallel Sentences from Comparable Corpora using Document Level Alignment - Jason R.Smith + Jason R.Smith ChrisQuirk KristinaToutanova 403–411 @@ -571,9 +571,9 @@ Everybody loves a rich cousin: An empirical study of transliteration through bridge languages - Mitesh M.Khapra - AKumaran - PushpakBhattacharyya + Mitesh M.Khapra + AKumaran + PushpakBhattacharyya 420–428 N10-1065 khapra-etal-2010-everybody @@ -610,16 +610,16 @@ Distributed Training Strategies for the Structured Perceptron RyanMcDonald - KeithHall - GideonMann + KeithHall + GideonMann 456–464 N10-1069 mcdonald-etal-2010-distributed Term Weighting Schemes for <fixed-case>L</fixed-case>atent <fixed-case>D</fixed-case>irichlet <fixed-case>A</fixed-case>llocation - Andrew T.Wilson - Peter A.Chew + Andrew T.Wilson + Peter A.Chew 465–473 N10-1070 wilson-chew-2010-term @@ -628,9 +628,9 @@ Learning Dense Models of Query Similarity from User Click Logs FabioDe Bona StefanRiezler - KeithHall + KeithHall MassimilianoCiaramita - AmaçHerdaǧdelen + AmaçHerdaǧdelen MariaHolmqvist 474–482 N10-1071 @@ -648,17 +648,17 @@ Improving the Multilingual User Experience of <fixed-case>W</fixed-case>ikipedia Using Cross-Language Name Search - RaghavendraUdupa - Mitesh M.Khapra + RaghavendraUdupa + Mitesh M.Khapra 492–500 N10-1073 udupa-khapra-2010-improving Learning Words and Their Meanings from Unsegmented Child-directed Speech - Bevan K.Jones + Bevan K.Jones MarkJohnson - Michael C.Frank + Michael C.Frank 501–509 N10-1074 jones-etal-2010-learning @@ -666,7 +666,7 @@ Subword Variation in Text Message Classification RobertMunro - Christopher D.Manning + Christopher D.Manning 510–518 N10-1075 munro-manning-2010-subword @@ -675,7 +675,7 @@ Automatic Diacritization for Low-Resource Languages Using a Hybrid Word and Consonant <fixed-case>CMM</fixed-case> RobbieHaertel PeterMcClanahan - Eric K.Ringger + Eric K.Ringger 519–527 N10-1076 haertel-etal-2010-automatic @@ -697,9 +697,9 @@ Online Learning for Interactive Statistical Machine Translation - DanielOrtiz-Martínez - IsmaelGarcía-Varea - FranciscoCasacuberta + DanielOrtiz-Martínez + IsmaelGarcía-Varea + FranciscoCasacuberta 546–554 N10-1079 ortiz-martinez-etal-2010-online @@ -707,17 +707,17 @@ The Best Lexical Metric for Phrase-Based Statistical <fixed-case>MT</fixed-case> System Optimization DanielCer - Christopher D.Manning - DanielJurafsky + Christopher D.Manning + DanielJurafsky 555–563 N10-1080 cer-etal-2010-best Variational Inference for <fixed-case>A</fixed-case>daptor <fixed-case>G</fixed-case>rammars - Shay B.Cohen - David M.Blei - Noah A.Smith + Shay B.Cohen + David M.Blei + Noah A.Smith 564–572 N10-1081 cohen-etal-2010-variational @@ -725,7 +725,7 @@ Type-Based <fixed-case>MCMC</fixed-case> PercyLiang - Michael I.Jordan + Michael I.Jordan DanKlein 573–581 N10-1082 @@ -734,7 +734,7 @@ Painless Unsupervised Learning with Features TaylorBerg-Kirkpatrick - AlexandreBouchard-Côté + AlexandreBouchard-Côté JohnDeNero DanKlein 582–590 @@ -743,7 +743,7 @@ Linguistic Steganography Using Automatically Generated Paraphrases - Ching-YunChang + Ching-YunChang StephenClark 591–599 N10-1084 @@ -761,7 +761,7 @@ Good Question! Statistical Ranking for Question Generation MichaelHeilman - Noah A.Smith + Noah A.Smith 609–617 N10-1086 heilman-smith-2010-good @@ -769,7 +769,7 @@ Not All Seeds Are Equal: Measuring the Quality of Text Mining Seeds ZornitsaKozareva - EduardHovy + EduardHovy 618–626 N10-1087 kozareva-hovy-2010-seeds @@ -792,9 +792,9 @@ A Simple Approach for <fixed-case>HPSG</fixed-case> Supertagging Using Dependency Information - Yao-zhongZhang + Yao-zhongZhang TakuyaMatsuzaki - Jun’ichiTsujii + Jun’ichiTsujii 645–648 N10-1090 zhang-etal-2010-simple @@ -802,7 +802,7 @@ Ensemble Models for Dependency Parsing: Cheap and Good? MihaiSurdeanu - Christopher D.Manning + Christopher D.Manning 649–652 N10-1091 surdeanu-manning-2010-ensemble @@ -810,8 +810,8 @@ Enlarged Search Space for <fixed-case>SITG</fixed-case> Parsing GuillemGascó - Joan-AndreuSánchez - José-MiguelBenedí + Joan-AndreuSánchez + José-MiguelBenedí 653–656 N10-1092 gasco-etal-2010-enlarged @@ -821,7 +821,7 @@ PhaniGadde KaranJindal SamarHusain - Dipti MisraSharma + Dipti MisraSharma RajeevSangal 657–660 N10-1093 @@ -845,9 +845,9 @@ An Exploration of Off Topic Conversation - Whitney L.Cade + Whitney L.Cade Blair A.Lehman - AndrewOlney + AndrewOlney 669–672 N10-1096 cade-etal-2010-exploration @@ -855,7 +855,7 @@ Making Conversational Structure Explicit: Identification of Initiation-response Pairs within Online Discussions Yi-ChiaWang - Carolyn P.Rosé + Carolyn P.Rosé 673–676 N10-1097 wang-rose-2010-making @@ -863,7 +863,7 @@ Engaging learning groups using Social Interaction Strategies RohitKumar - Carolyn P.Rosé + Carolyn P.Rosé 677–680 N10-1098 kumar-rose-2010-engaging @@ -871,7 +871,7 @@ Using Entity-Based Features to Model Coherence in Student Essays JillBurstein - JoelTetreault + JoelTetreault SlavaAndreyev 681–684 N10-1099 @@ -881,7 +881,7 @@ Summarizing Microblogs Automatically BeauxSharifi Mark-AnthonyHutton - JugalKalita + JugalKalita 685–688 N10-1100 sharifi-etal-2010-summarizing @@ -890,7 +890,7 @@ Automatic Generation of Personalized Annotation Tags for <fixed-case>T</fixed-case>witter Users WeiWu BinZhang - MariOstendorf + MariOstendorf 689–692 N10-1101 wu-etal-2010-automatic @@ -916,8 +916,8 @@ A Hybrid Morphologically Decomposed Factored Language Models for <fixed-case>A</fixed-case>rabic <fixed-case>LVCSR</fixed-case> AmrEl-Desoky - RalfSchlüter - HermannNey + RalfSchlüter + HermannNey 701–704 N10-1104 el-desoky-etal-2010-hybrid @@ -925,7 +925,7 @@ Is <fixed-case>A</fixed-case>rabic Part of Speech Tagging Feasible Without Word Segmentation? EmadMohamed - SandraKübler + SandraKübler 705–708 N10-1105 mohamed-kubler-2010-arabic-part @@ -951,7 +951,7 @@ BinZhang BrianHutchinson WeiWu - MariOstendorf + MariOstendorf 717–720 N10-1108 zhang-etal-2010-extracting-phrase @@ -969,7 +969,7 @@ PreethiJyothi WilliamHartmann JeremyMorris - EricFosler-Lussier + EricFosler-Lussier 725–728 N10-1110 prabhavalkar-etal-2010-investigations @@ -987,7 +987,7 @@ Softmax-Margin <fixed-case>CRF</fixed-case>s: Training Log-Linear Models with Cost Functions KevinGimpel - Noah A.Smith + Noah A.Smith 733–736 N10-1112 gimpel-smith-2010-softmax @@ -995,8 +995,8 @@ Bitext-Based Resolution of <fixed-case>G</fixed-case>erman Subject-Object Ambiguities FlorianSchwarck - AlexanderFraser - HinrichSchütze + AlexanderFraser + HinrichSchütze 737–740 N10-1113 schwarck-etal-2010-bitext @@ -1018,9 +1018,9 @@ From Baby Steps to Leapfrog: How “Less is More” in Unsupervised Dependency Parsing - Valentin I.Spitkovsky - HiyanAlshawi - DanielJurafsky + Valentin I.Spitkovsky + HiyanAlshawi + DanielJurafsky 751–759 N10-1116 spitkovsky-etal-2010-baby @@ -1028,7 +1028,7 @@ Relaxed Marginal Inference and its Application to Dependency Parsing SebastianRiedel - David A.Smith + David A.Smith 760–768 N10-1117 riedel-smith-2010-relaxed @@ -1070,7 +1070,7 @@ An Unsupervised Aspect-Sentiment Model for Online Reviews SamuelBrody - NoemieElhadad + NoemieElhadad 804–812 N10-1122 brody-elhadad-2010-unsupervised @@ -1086,7 +1086,7 @@ Clinical Information Retrieval using Document and <fixed-case>PICO</fixed-case> Structure FlorianBoudin - Jian-YunNie + Jian-YunNie MartinDawes 822–830 N10-1124 @@ -1102,10 +1102,10 @@ Learning about Voice Search for Spoken Dialogue Systems - RebeccaPassonneau + RebeccaPassonneau Susan L.Epstein TizianaLigorio - Joshua B.Gordon + Joshua B.Gordon PravinBhutada 840–848 N10-1126 @@ -1120,7 +1120,7 @@ Context-free reordering, finite-state translation - ChrisDyer + ChrisDyer PhilipResnik 858–866 N10-1128 @@ -1130,7 +1130,7 @@ Improved Models of Distortion Cost for Statistical Machine Translation SpenceGreen MichelGalley - Christopher D.Manning + Christopher D.Manning 867–875 N10-1129 green-etal-2010-improved @@ -1144,7 +1144,7 @@ An extractive supervised two-stage method for sentence compression - DimitriosGalanis + DimitriosGalanis IonAndroutsopoulos 885–893 N10-1131 @@ -1154,7 +1154,7 @@ Interpretation and Transformation for Abstracting Conversations GabrielMurray GiuseppeCarenini - RaymondNg + RaymondNg 894–902 N10-1132 murray-etal-2010-interpretation @@ -1162,10 +1162,10 @@ Quantifying the Limits and Success of Extractive Summarization Systems Across Domains HakanCeylan - RadaMihalcea + RadaMihalcea UmutÖzertem ElenaLloret - ManuelPalomar + ManuelPalomar 903–911 N10-1133 ceylan-etal-2010-quantifying @@ -1173,7 +1173,7 @@ Multi-document Summarization via Budgeted Maximization of Submodular Functions HuiLin - JeffBilmes + JeffBilmes 912–920 N10-1134 lin-bilmes-2010-multi @@ -1181,7 +1181,7 @@ Cross-lingual Induction of Selectional Preferences with Bilingual Vector Spaces YvesPeirsman - SebastianPadó + SebastianPadó 921–929 N10-1135 peirsman-pado-2010-cross @@ -1199,7 +1199,7 @@ DipanjanDas NathanSchneider DesaiChen - Noah A.Smith + Noah A.Smith 948–956 N10-1138 das-etal-2010-probabilistic @@ -1210,7 +1210,7 @@ ShankarKumar WolfgangMacherey MehryarMohri - MichaelRiley + MichaelRiley 957–965 N10-1139 allauzen-etal-2010-expected @@ -1218,7 +1218,7 @@ Accurate Non-Hierarchical Phrase-Based Translation MichelGalley - Christopher D.Manning + Christopher D.Manning 966–974 N10-1140 galley-manning-2010-accurate @@ -1228,7 +1228,7 @@ JohnDeNero ShankarKumar CiprianChelba - FranzOch + FranzOch 975–983 N10-1141 denero-etal-2010-model @@ -1237,7 +1237,7 @@ Detecting Emails Containing Requests for Action AndrewLampert RobertDale - CecileParis + CecileParis 984–992 N10-1142 lampert-etal-2010-detecting @@ -1260,7 +1260,7 @@ Tree Edit Models for Recognizing Textual Entailments, Paraphrases, and Answers to Questions MichaelHeilman - Noah A.Smith + Noah A.Smith 1011–1019 N10-1145 heilman-smith-2010-tree @@ -1269,7 +1269,7 @@ Syntactic/Semantic Structures for Textual Entailment Recognition YasharMehdad AlessandroMoschitti - Fabio MassimoZanzotto + Fabio MassimoZanzotto 1020–1028 N10-1146 mehdad-etal-2010-syntactic @@ -1286,7 +1286,7 @@ Proceedings of the NAACL HLT 2010 Demonstration Session N10-2 - Carolyn PensteinRosé + Carolyn PensteinRosé Association for Computational Linguistics
Los Angeles, California
June @@ -1299,7 +1299,7 @@ <fixed-case>C</fixed-case>amtology: Intelligent Information Access for Science - TedBriscoe + TedBriscoe KarlHarrison AndrewNaish-Guzman AndyParker @@ -1314,7 +1314,7 @@ Summarizing Textual Information about Locations In a Geo-Spatial Information Display System CongxingCai - EduardHovy + EduardHovy 5–8 N10-2002 cai-hovy-2010-summarizing @@ -1323,17 +1323,17 @@ <fixed-case>P</fixed-case>hrasal: A Statistical Machine Translation Toolkit for Exploring New Model Features DanielCer MichelGalley - DanielJurafsky - Christopher D.Manning + DanielJurafsky + Christopher D.Manning 9–12 N10-2003 cer-etal-2010-phrasal Multilingual <fixed-case>P</fixed-case>ropbank Annotation Tools: Cornerstone and Jubilee - JinhoChoi - ClaireBonial - MarthaPalmer + JinhoChoi + ClaireBonial + MarthaPalmer 13–16 N10-2004 choi-etal-2010-multilingual @@ -1342,7 +1342,7 @@ <fixed-case>KSC</fixed-case>-<fixed-case>P</fixed-case>a<fixed-case>L</fixed-case>: A Peer Learning Agent that Encourages Students to take the Initiative CynthiaKersey BarbaraDi Eugenio - PamelaJordan + PamelaJordan SandraKatz 17–20 N10-2005 @@ -1376,7 +1376,7 @@ Interpretation of Partial Utterances in Virtual Human Dialogue Systems KenjiSagae DavidDeVault - DavidTraum + DavidTraum 33–36 N10-2009 sagae-etal-2010-interpretation @@ -1384,20 +1384,20 @@ Interactive Predictive Parsing using a Web-based Architecture RicardoSánchez-Sáez - Luis A.Leiva - Joan-AndreuSánchez - José-MiguelBenedí + Luis A.Leiva + Joan-AndreuSánchez + José-MiguelBenedí 37–40 N10-2010 sanchez-saez-etal-2010-interactive <fixed-case>SIMPLIFICA</fixed-case>: a tool for authoring simplified texts in <fixed-case>B</fixed-case>razilian <fixed-case>P</fixed-case>ortuguese guided by readability assessments - CarolinaScarton + CarolinaScarton MatheusOliveira - ArnaldoCandido Jr. + ArnaldoCandido Jr. CarolineGasperin - SandraAluísio + SandraAluísio 41–44 N10-2011 scarton-etal-2010-simplifica @@ -1408,7 +1408,7 @@ ChrisThrasher EvelyneViegas XiaolongLi - Bo-june PaulHsu + Bo-june PaulHsu 45–48 N10-2012 wang-etal-2010-overview @@ -1419,7 +1419,7 @@ Proceedings of the NAACL HLT 2010 Student Research Workshop N10-3 JuliaHockenmaier - DianeLitman + DianeLitman AdrianeBoyd MaheshJoshi FrankRudzicz @@ -1435,7 +1435,7 @@ Improving Syntactic Coordination Resolution using Language Modeling - PhilipOgren + PhilipOgren 1–6 N10-3001 ogren-2010-improving @@ -1487,7 +1487,7 @@ Temporal Relation Identification with Endpoints - Chong MinLee + Chong MinLee 40–45 N10-3008 lee-2010-temporal @@ -1520,7 +1520,7 @@ N10-4 JasonBaldwin PeterClark - GokhanTur + GokhanTur Association for Computational Linguistics
Los Angeles, California
June @@ -1534,7 +1534,7 @@ Data-Intensive Text Processing with <fixed-case>M</fixed-case>ap<fixed-case>R</fixed-case>educe JimmyLin - ChrisDyer + ChrisDyer 1–2 N10-4001 lin-dyer-2010-data @@ -1548,7 +1548,7 @@ Noisy Text Analytics - L. VenkataSubramaniam + L. VenkataSubramaniam 5–6 N10-4003 subramaniam-2010-noisy @@ -1563,8 +1563,8 @@ Integer Linear Programming in <fixed-case>NLP</fixed-case> - Constrained Conditional Models - Ming-WeiWang - NicholasRizzolo + Ming-WeiWang + NicholasRizzolo DanRoth 9–14 N10-4005 @@ -1579,7 +1579,7 @@ Computational psycholinguistics - RogerLevy + RogerLevy KlintonBicknell NathanielSmith 19–20 diff --git a/data/xml/N12.xml b/data/xml/N12.xml index b5bdbbed8b..9426614e6a 100644 --- a/data/xml/N12.xml +++ b/data/xml/N12.xml @@ -4,9 +4,9 @@ Proceedings of the 2012 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies N12-1 - EricFosler-Lussier - EllenRiloff - SrinivasBangalore + EricFosler-Lussier + EllenRiloff + SrinivasBangalore Association for Computational Linguistics
Montréal, Canada
June @@ -19,7 +19,7 @@ Multiple Narrative Disentanglement: Unraveling Infinite Jest - ByronWallace + ByronWallace 1–10 N12-1001 wallace-2012-multiple @@ -27,10 +27,10 @@ Acoustic-Prosodic Entrainment and Social Behavior RivkaLevitan - AgustínGravano + AgustínGravano LauraWillson - S̆tefanBen̆us̆ - JuliaHirschberg + S̆tefanBen̆us̆ + JuliaHirschberg AniNenkova 11–19 N12-1002 @@ -40,8 +40,8 @@ Identifying High-Level Organizational Elements in Argumentative Discourse NitinMadnani MichaelHeilman - JoelTetreault - MartinChodorow + JoelTetreault + MartinChodorow 20–28 N12-1003 madnani-etal-2012-identifying @@ -56,7 +56,7 @@ Continuous Space Translation Models with Neural Networks - Hai SonLe + Hai SonLe AlexandreAllauzen FrançoisYvon 39–48 @@ -68,11 +68,11 @@ RabihZbib ErikaMalchiodi JacobDevlin - DavidStallard + DavidStallard SpyrosMatsoukas - RichardSchwartz - JohnMakhoul - Omar F.Zaidan + RichardSchwartz + JohnMakhoul + Omar F.Zaidan ChrisCallison-Burch 49–59 N12-1006 @@ -82,9 +82,9 @@ Entity Clustering Across Languages SpenceGreen NicholasAndrews - Matthew R.Gormley + Matthew R.Gormley MarkDredze - Christopher D.Manning + Christopher D.Manning 60–69 N12-1007 green-etal-2012-entity @@ -100,15 +100,15 @@ Reference Scope Identification in Citing Sentences AmjadAbu-Jbara - DragomirRadev + DragomirRadev 80–90 N12-1009 abu-jbara-radev-2012-reference Intrinsic and Extrinsic Evaluation of an Automatic User Disengagement Detector for an Uncertainty-Adaptive Spoken Dialogue System - KateForbes-Riley - DianeLitman + KateForbes-Riley + DianeLitman HeatherFriedberg JoannaDrummond 91–102 @@ -136,15 +136,15 @@ Minimum-Risk Training of Approximate <fixed-case>CRF</fixed-case>-Based <fixed-case>NLP</fixed-case> Systems VeselinStoyanov - JasonEisner + JasonEisner 120–130 N12-1013 stoyanov-eisner-2012-minimum Unsupervised Learning on an Approximate Corpus - JasonSmith - JasonEisner + JasonSmith + JasonEisner 131–141 N12-1014 smith-eisner-2012-unsupervised @@ -161,7 +161,7 @@ Segmentation Similarity and Agreement ChrisFournier - DianaInkpen + DianaInkpen 152–161 N12-1016 fournier-inkpen-2012-segmentation @@ -185,15 +185,15 @@ Re-examining Machine Translation Metrics for Paraphrase Identification NitinMadnani - JoelTetreault - MartinChodorow + JoelTetreault + MartinChodorow 182–190 N12-1019 madnani-etal-2012-examining A Dependency Treebank of Classical <fixed-case>C</fixed-case>hinese Poems - JohnLee + JohnLee Yin HeiKong 191–199 N12-1020 @@ -201,8 +201,8 @@ Towards Effective Tutorial Feedback for Explanation Questions: A Dataset and Baselines - Myroslava O.Dzikovska - Rodney D.Nielsen + Myroslava O.Dzikovska + Rodney D.Nielsen ChrisBrew 200–210 N12-1021 @@ -211,7 +211,7 @@ Topical Segmentation: a Study of Human Performance and a New Measure of Quality. AnnaKazantseva - StanSzpakowicz + StanSzpakowicz 211–220 N12-1022 kazantseva-szpakowicz-2012-topical @@ -219,15 +219,15 @@ Structured Ramp Loss Minimization for Machine Translation KevinGimpel - Noah A.Smith + Noah A.Smith 221–231 N12-1023 gimpel-smith-2012-structured Implicitly Intersecting Weighted Automata using Dual Decomposition - Michael J.Paul - JasonEisner + Michael J.Paul + JasonEisner 232–242 N12-1024 paul-eisner-2012-implicitly @@ -268,8 +268,8 @@ Correcting Comma Errors in Learner Essays, and Restoring Commas in Newswire Text RossIsrael - JoelTetreault - MartinChodorow + JoelTetreault + MartinChodorow 284–294 N12-1029 israel-etal-2012-correcting @@ -277,7 +277,7 @@ The Challenges of Parsing <fixed-case>C</fixed-case>hinese with <fixed-case>C</fixed-case>ombinatory <fixed-case>C</fixed-case>ategorial <fixed-case>G</fixed-case>rammar DanielTse - James R.Curran + James R.Curran 295–304 N12-1030 tse-curran-2012-challenges @@ -294,7 +294,7 @@ Getting More from Morphology in Multilingual Dependency Parsing MattHohensee - Emily M.Bender + Emily M.Bender 315–326 N12-1032 hohensee-bender-2012-getting @@ -310,7 +310,7 @@ Using paraphrases for improving first story detection in news and <fixed-case>T</fixed-case>witter - SašaPetrović + SašaPetrović MilesOsborne VictorLavrenko 338–346 @@ -320,24 +320,24 @@ Insertion and Deletion Models for Statistical Machine Translation MatthiasHuck - HermannNey + HermannNey 347–351 N12-1035 huck-ney-2012-insertion <fixed-case>T</fixed-case>rans<fixed-case>A</fixed-case>head: A Computer-Assisted Translation and Writing Tool - Chung-chiHuang - Ping-cheYang - Keh-jiannChen - Jason S.Chang + Chung-chiHuang + Ping-cheYang + Keh-jiannChen + Jason S.Chang 352–356 N12-1036 huang-etal-2012-transahead-computer Correction Detection and Error Type Selection as an <fixed-case>ESL</fixed-case> Educational Aid - BenSwanson + BenSwanson ElifYamangil 357–361 N12-1037 @@ -346,7 +346,7 @@ Getting More from Segmentation Evaluation MartinScaiano - DianaInkpen + DianaInkpen 362–366 N12-1038 scaiano-inkpen-2012-getting @@ -362,8 +362,8 @@ Evaluating a Morphological Analyser of <fixed-case>I</fixed-case>nuktitut JeremyNicholson - TrevorCohn - TimothyBaldwin + TrevorCohn + TimothyBaldwin 372–376 N12-1040 nicholson-etal-2012-evaluating @@ -379,7 +379,7 @@ Towards Using <fixed-case>EEG</fixed-case> to Improve <fixed-case>ASR</fixed-case> Accuracy Yun-NungChen - Kai-MinChang + Kai-MinChang JackMostow 382–385 N12-1042 @@ -387,8 +387,8 @@ A Comparative Investigation of Morphological Language Modeling for the Languages of the <fixed-case>E</fixed-case>uropean <fixed-case>U</fixed-case>nion - ThomasMueller - HinrichSchuetze + ThomasMueller + HinrichSchuetze HelmutSchmid 386–395 N12-1043 @@ -412,8 +412,8 @@ Encouraging Consistent Translation Choices - FerhanTure - Douglas W.Oard + FerhanTure + Douglas W.Oard PhilipResnik 417–426 N12-1046 @@ -443,8 +443,8 @@ Parsing Time: Learning to Interpret Time Expressions GaborAngeli - ChristopherManning - DanielJurafsky + ChristopherManning + DanielJurafsky 446–455 N12-1049 angeli-etal-2012-parsing @@ -452,7 +452,7 @@ Fine-Grained Focus for Pinpointing Positive Implicit Meaning from Negated Statements EduardoBlanco - DanMoldovan + DanMoldovan 456–465 N12-1050 blanco-moldovan-2012-fine @@ -484,7 +484,7 @@ Vine Pruning for Efficient Multi-Pass Dependency Parsing - AlexanderRush + AlexanderRush SlavPetrov 498–507 N12-1054 @@ -494,7 +494,7 @@ Active Learning for Coreference Resolution FlorianLaws FlorianHeimerl - HinrichSchütze + HinrichSchütze 508–512 N12-1055 laws-etal-2012-active @@ -510,8 +510,8 @@ Predicting Overt Display of Power in Written Dialogs VinodkumarPrabhakaran - OwenRambow - MonaDiab + OwenRambow + MonaDiab 518–522 N12-1057 prabhakaran-etal-2012-predicting @@ -534,7 +534,7 @@ Improved Reordering for Shallow-n Grammar based Hierarchical Phrase-based Translation - BaskaranSankaran + BaskaranSankaran AnoopSarkar 533–537 N12-1060 @@ -569,7 +569,7 @@ How Text Segmentation Algorithms Gain from Topic Models MartinRiedl - ChrisBiemann + ChrisBiemann 553–557 N12-1064 riedl-biemann-2012-text @@ -584,7 +584,7 @@ Behavioral Factors in Interactive Training of Text Classifiers BurrSettles - XiaojinZhu + XiaojinZhu 563–567 N12-1066 @@ -610,7 +610,7 @@ Concavity and Initialization for Unsupervised Dependency Parsing KevinGimpel - Noah A.Smith + Noah A.Smith 577–581 N12-1069 gimpel-smith-2012-concavity @@ -618,7 +618,7 @@ Multimodal Grammar Implementation KatyaAlahverdzhieva - DanFlickinger + DanFlickinger AlexLascarides 582–586 N12-1070 @@ -626,15 +626,15 @@ Portable Features for Classifying Emotional Text - SaifMohammad + SaifMohammad 587–591 N12-1071 mohammad-2012-portable Stance Classification using Dialogic Properties of Persuasion - MarilynWalker - PranavAnand + MarilynWalker + PranavAnand RobAbbott RickyGrant 592–596 @@ -662,7 +662,7 @@ The Intelius Nickname Collection: Quantitative Analyses from Billions of Public Records - VitorCarvalho + VitorCarvalho YigitKiran AndrewBorthwick 607–610 @@ -671,7 +671,7 @@ A comparison of models of word meaning in context - GeorgianaDinu + GeorgianaDinu StefanThater SoerenLaue 611–615 @@ -680,7 +680,7 @@ Measuring Word Relatedness Using Heterogeneous Vector Space Models - Wen-tauYih + Wen-tauYih VahedQazvinian 616–620 N12-1077 @@ -698,7 +698,7 @@ Why Not Grab a Free Lunch? Mining Large Corpora for Parallel Sentences to Improve Translation Modeling - FerhanTure + FerhanTure JimmyLin 626–630 N12-1079 @@ -707,7 +707,7 @@ Summarization of Historical Articles Using Temporal Event Clustering JamesGung - JugalKalita + JugalKalita 631–635 N12-1080 gung-kalita-2012-summarization @@ -723,8 +723,8 @@ On The Feasibility of Open Domain Referring Expression Generation Using Large Scale Folksonomies FabiánPacheco - PabloDuboue - MartínDomínguez + PabloDuboue + MartínDomínguez 641–645 N12-1082 pacheco-etal-2012-feasibility @@ -733,7 +733,7 @@ Structured Event Retrieval over Microblog Archives DonaldMetzler CongxingCai - EduardHovy + EduardHovy 646–655 N12-1083 metzler-etal-2012-structured @@ -742,7 +742,7 @@ Learning from Bullying Traces in Social Media Jun-MingXu Kwang-SungJun - XiaojinZhu + XiaojinZhu AmyBellmore 656–666 N12-1084 @@ -750,10 +750,10 @@ Grammatical structures for word-level sentiment detection - AsadSayeed + AsadSayeed JordanBoyd-Graber BryanRusk - AmyWeinberg + AmyWeinberg 667–676 N12-1085 sayeed-etal-2012-grammatical @@ -761,7 +761,7 @@ Graph-Based Lexicon Expansion with Sparsity-Inducing Penalties DipanjanDas - Noah A.Smith + Noah A.Smith 677–687 N12-1086 das-smith-2012-graph @@ -778,7 +778,7 @@ Low-Dimensional Discriminative Reranking JagadeeshJagarlamudi - HalDaumé III + HalDaumé III 699–709 N12-1088 jagarlamudi-daume-iii-2012-low @@ -786,7 +786,7 @@ Autonomous Self-Assessment of Autocorrections: Exploring Text Message Dialogues TylerBaldwin - JoyceChai + JoyceChai 710–719 N12-1089 baldwin-chai-2012-autonomous @@ -803,14 +803,14 @@ Exploring Semi-Supervised Coreference Resolution of Medical Concepts using Semantic and Temporal Features PreethiRaghavan EricFosler-Lussier - AlbertLai + AlbertLai 731–741 N12-1091 raghavan-etal-2012-exploring Mind the Gap: Learning to Choose Gaps for Question Generation - LeeBecker + LeeBecker SumitBasu LucyVanderwende 742–751 @@ -835,9 +835,9 @@ KarlStratos KotaYamaguchi YejinChoi - HalDaumé III - AlexBerg - TamaraBerg + HalDaumé III + AlexBerg + TamaraBerg 762–772 N12-1094 dodge-etal-2012-detecting @@ -853,10 +853,10 @@ Shared Components Topic Models - Matthew R.Gormley + Matthew R.Gormley MarkDredze BenjaminVan Durme - JasonEisner + JasonEisner 783–792 N12-1096 gormley-etal-2012-shared @@ -864,8 +864,8 @@ Textual Predictors of Bill Survival in Congressional Committees TaeYano - Noah A.Smith - John D.Wilkerson + Noah A.Smith + John D.Wilkerson 793–802 N12-1097 yano-etal-2012-textual @@ -877,7 +877,7 @@ N12-2 RivkaLevitan MyleOtt - RogerLevy + RogerLevy AniNenkova Association for Computational Linguistics
Montréal, Canada
@@ -892,7 +892,7 @@ Finding the Right Supervisor: Expert-Finding in a University Domain FawazAlarfaj - UdoKruschwitz + UdoKruschwitz DavidHunter ChrisFox 1–6 @@ -901,7 +901,7 @@ Automatic <fixed-case>A</fixed-case>nimacy Classification - Samuel R.Bowman + Samuel R.Bowman HarshitChopra 7–10 N12-2002 @@ -984,7 +984,7 @@ Proceedings of the Demonstration Session at the Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies N12-3 AriaHaghighi - YaserAl-Onaizan + YaserAl-Onaizan Association for Computational Linguistics
Montréal, Canada
June @@ -1000,14 +1000,14 @@ EnriqueFlores AlbertoBarrón-Cedeño PaoloRosso - LidiaMoreno + LidiaMoreno 1–4 N12-3001 flores-etal-2012-desocore
A Graphical User Interface for Feature-Based Opinion Mining - Pedro PauloBalage Filho + Pedro PauloBalage Filho CarolineBrun GilbertRondeau 5–8 @@ -1016,9 +1016,9 @@ Navigating Large Comment Threads with <fixed-case>C</fixed-case>o<fixed-case>F</fixed-case>i - ChristineDoran + ChristineDoran GuidoZarrella - John C.Henderson + John C.Henderson 9–12 N12-3003 doran-etal-2012-navigating @@ -1034,7 +1034,7 @@ An Interactive Humanoid Robot Exhibiting Flexible Sub-Dialogues HeribertoCuayáhuitl - IvanaKruijff-Korbayová + IvanaKruijff-Korbayová 17–20 N12-3005 cuayahuitl-kruijff-korbayova-2012-interactive @@ -1047,7 +1047,7 @@ HisamiSuzuki KristinaToutanova MichaelGamon - Wen-tauYih + Wen-tauYih ColinCherry LucyVanderwende 21–24 @@ -1057,7 +1057,7 @@ Incremental Speech Understanding in a Multi-Party Virtual Human Dialogue System DavidDeVault - DavidTraum + DavidTraum 25–28 N12-3007 devault-traum-2012-incremental @@ -1074,8 +1074,8 @@ <fixed-case>A</fixed-case>ttitude<fixed-case>M</fixed-case>iner: Mining Attitude from Online Discussions AmjadAbu-Jbara - AhmedHassan - DragomirRadev + AhmedHassan + DragomirRadev 33–36 N12-3009 abu-jbara-etal-2012-attitudeminer @@ -1084,7 +1084,7 @@ Tutorial Abstracts at the Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies - RaduFlorian + RaduFlorian JacobEisenstein Association for Computational Linguistics
Montréal, Canada
@@ -1097,28 +1097,28 @@ 100 Things You Always Wanted to Know about Linguistics But Were Afraid to Ask* - Emily M.Bender + Emily M.Bender N12-4001 bender-2012-100 Structured Sparsity in Natural Language Processing: Models, Algorithms and Applications - André F. T.Martins + André F. T.Martins Mário A. T.Figueiredo - Noah A.Smith + Noah A.Smith N12-4002 martins-etal-2012-structured <fixed-case>A</fixed-case>rabic Dialect Processing Tutorial - MonaDiab + MonaDiab NizarHabash N12-4003 diab-habash-2012-arabic Natural Language Processing in <fixed-case>W</fixed-case>atson - Alfio M.Gliozzo + Alfio M.Gliozzo AdityaKalyanpur JamesFan N12-4004 diff --git a/data/xml/N13.xml b/data/xml/N13.xml index b9b1a51e96..4eb88764d3 100644 --- a/data/xml/N13.xml +++ b/data/xml/N13.xml @@ -5,7 +5,7 @@ Proceedings of the 2013 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies N13-1 LucyVanderwende - HalDaumé III + HalDaumé III KatrinKirchhoff Association for Computational Linguistics
Atlanta, Georgia
@@ -20,7 +20,7 @@ Model With Minimal Translation Units, But Decode With Phrases NadirDurrani - AlexanderFraser + AlexanderFraser HelmutSchmid 1–11 N13-1001 @@ -59,7 +59,7 @@ Multi-faceted Event Recognition with Bootstrapped Dictionaries RuihongHuang - EllenRiloff + EllenRiloff 41–51 N13-1005 huang-riloff-2013-multi @@ -69,7 +69,7 @@ Named Entity Recognition with Bilingual Constraints WanxiangChe MengqiuWang - Christopher D.Manning + Christopher D.Manning TingLiu 52–62 N13-1006 @@ -82,7 +82,7 @@ ChikaraHashimoto KentaroTorisawa TakaoKawai - Jun’ichiKazama + Jun’ichiKazama StijnDe Saeger 63–73 N13-1007 @@ -104,7 +104,7 @@ Extracting the Native Language Signal for Second Language Acquisition - BenSwanson + BenSwanson EugeneCharniak 85–94 N13-1009 @@ -113,7 +113,7 @@ An Analysis of Frequency- and Memory-Based Processing Costs - Martenvan Schijndel + Martenvan Schijndel WilliamSchuler 95–105 N13-1010 @@ -123,7 +123,7 @@ Cross-Lingual Semantic Similarity of Words as the Similarity of Their Semantic Word Responses IvanVulić - Marie-FrancineMoens + Marie-FrancineMoens 106–116 N13-1011 vulic-moens-2013-cross @@ -132,7 +132,7 @@ Combining multiple information types in <fixed-case>B</fixed-case>ayesian word segmentation GabrielDoyle - RogerLevy + RogerLevy 117–126 N13-1012 doyle-levy-2013-combining @@ -157,11 +157,11 @@ Experiments with Spectral Learning of Latent-Variable <fixed-case>PCFG</fixed-case>s - Shay B.Cohen + Shay B.Cohen KarlStratos - MichaelCollins - Dean P.Foster - LyleUngar + MichaelCollins + Dean P.Foster + LyleUngar 148–157 N13-1015 cohen-etal-2013-experiments @@ -178,7 +178,7 @@ Drug Extraction from the Web: Summarizing Drug Experiences with Multi-Dimensional Topic Models - Michael J.Paul + Michael J.Paul MarkDredze 168–178 N13-1017 @@ -189,8 +189,8 @@ Towards Topic Labeling with Phrase Entailment and Aggregation YasharMehdad GiuseppeCarenini - Raymond T.Ng - ShafiqJoty + Raymond T.Ng + ShafiqJoty 179–189 N13-1018 mehdad-etal-2013-towards @@ -210,8 +210,8 @@ Text Alignment for Real-Time Crowd Captioning IftekharNaim DanielGildea - WalterLasecki - Jeffrey P.Bigham + WalterLasecki + Jeffrey P.Bigham 201–210 N13-1020 naim-etal-2013-text @@ -221,7 +221,7 @@ Discriminative Joint Modeling of Lexical Variation and Acoustic Confusion for Automated Narrative Retelling Assessment MaiderLehr IzhakShafran - EmilyPrud’hommeaux + EmilyPrud’hommeaux BrianRoark 211–220 N13-1021 @@ -231,8 +231,8 @@ Using Out-of-Domain Data for Lexical Addressee Detection in Human-Human-Computer Dialog HeeyoungLee - AndreasStolcke - ElizabethShriberg + AndreasStolcke + ElizabethShriberg 221–229 N13-1022 lee-etal-2013-using @@ -242,8 +242,8 @@ Segmentation Strategies for Streaming Speech Translation Vivek KumarRangarajan Sridhar JohnChen - SrinivasBangalore - AndrejLjolje + SrinivasBangalore + AndrejLjolje RathinaveluChengalvarayan 230–238 N13-1023 @@ -254,7 +254,7 @@ Enforcing Subcategorization Constraints in a Parser Using Sub-parses Recombining Seyed AbolghasemMirroshandel AlexisNasr - BenoîtSagot + BenoîtSagot 239–247 N13-1024 mirroshandel-etal-2013-enforcing @@ -262,8 +262,8 @@ Large-Scale Discriminative Training for Statistical Machine Translation Using Held-Out Line Search JeffreyFlanigan - ChrisDyer - JaimeCarbonell + ChrisDyer + JaimeCarbonell 248–258 N13-1025 flanigan-etal-2013-large @@ -293,7 +293,7 @@ Improving Syntax-Augmented Machine Translation by Coarsening the Label Set GregHanneman - AlonLavie + AlonLavie 288–297 N13-1029 hanneman-lavie-2013-improving @@ -319,7 +319,7 @@ Improving reordering performance using higher order and structural features - Mitesh M.Khapra + Mitesh M.Khapra AnanthakrishnanRamanathan KarthikVisweswariah 315–324 @@ -377,10 +377,10 @@ Improved Part-of-Speech Tagging for Online Conversational Text with Word Clusters OlutobiOwoputi BrendanO’Connor - ChrisDyer + ChrisDyer KevinGimpel NathanSchneider - Noah A.Smith + Noah A.Smith 380–390 N13-1039 owoputi-etal-2013-improved @@ -388,7 +388,7 @@ Parser lexicalisation through self-learning MarekRei - TedBriscoe + TedBriscoe 391–400 N13-1040 rei-briscoe-2013-parser @@ -414,8 +414,8 @@ MarkYatskar SvitlanaVolkova AsliCelikyilmaz - BillDolan - LukeZettlemoyer + BillDolan + LukeZettlemoyer 416–425 N13-1043 yatskar-etal-2013-learning @@ -423,8 +423,8 @@ Morphological Analysis and Disambiguation for Dialectal <fixed-case>A</fixed-case>rabic NizarHabash - RyanRoth - OwenRambow + RyanRoth + OwenRambow RamyEskander NadiTomeh 426–432 @@ -433,8 +433,8 @@ Using a Supertagged Dependency Language Model to Select a Good Translation in System Combination - Wei-YunMa - KathleenMcKeown + Wei-YunMa + KathleenMcKeown 433–438 N13-1045 ma-mckeown-2013-using @@ -452,7 +452,7 @@ Better <fixed-case>T</fixed-case>witter Summaries? JoelJudd - JugalKalita + JugalKalita 445–449 N13-1047 judd-kalita-2013-better @@ -469,7 +469,7 @@ Automatic Morphological Enrichment of a Morphologically Underspecified Treebank SarahAlkuhlani NizarHabash - RyanRoth + RyanRoth 460–470 N13-1049 alkuhlani-etal-2013-automatic @@ -493,9 +493,9 @@ Approximate <fixed-case>PCFG</fixed-case> Parsing Using Tensor Decomposition - Shay B.Cohen + Shay B.Cohen GiorgioSatta - MichaelCollins + MichaelCollins 487–496 N13-1052 cohen-etal-2013-approximate @@ -503,7 +503,7 @@ Negative Deceptive Opinion Spam MyleOtt - ClaireCardie + ClaireCardie Jeffrey T.Hancock 497–501 N13-1053 @@ -513,7 +513,7 @@ Improving speech synthesis quality by reducing pitch peaks in the source recordings LuisinaViolante PabloRodríguez Zivic - AgustínGravano + AgustínGravano 502–506 N13-1054 violante-etal-2013-improving @@ -522,7 +522,7 @@ Robust Systems for Preposition Error Correction Using <fixed-case>W</fixed-case>ikipedia Revisions AoifeCahill NitinMadnani - JoelTetreault + JoelTetreault DianeNapolitano 507–517 N13-1055 @@ -539,7 +539,7 @@ Creating Reverse Bilingual Dictionaries Khang NhutLam - JugalKalita + JugalKalita 524–528 N13-1057 lam-kalita-2013-creating @@ -563,7 +563,7 @@ Modeling Syntactic and Semantic Structures in Hierarchical Phrase-based Translation - JunhuiLi + JunhuiLi PhilipResnik HalDaumé III 540–549 @@ -575,9 +575,9 @@ SethKulick AnnBies JustinMott - MohamedMaamouri - BeatriceSantorini - AnthonyKroch + MohamedMaamouri + BeatriceSantorini + AnthonyKroch 550–555 N13-1061 kulick-etal-2013-using @@ -592,7 +592,7 @@ Compound Embedding Features for Semi-supervised Learning MoYu - TiejunZhao + TiejunZhao DaxiangDong HaoTian DianhaiYu @@ -622,7 +622,7 @@ Processing Spontaneous Orthography RamyEskander NizarHabash - OwenRambow + OwenRambow NadiTomeh 585–595 N13-1066 @@ -632,14 +632,14 @@ Purpose and Polarity of Citation: Towards <fixed-case>NLP</fixed-case>-based Bibliometrics AmjadAbu-Jbara JeffersonEzra - DragomirRadev + DragomirRadev 596–606 N13-1067 abu-jbara-etal-2013-purpose Estimating effect size across datasets - AndersSøgaard + AndersSøgaard 607–611 N13-1068 sogaard-2013-estimating @@ -649,8 +649,8 @@ RabihZbib GretchenMarkiewicz SpyrosMatsoukas - RichardSchwartz - JohnMakhoul + RichardSchwartz + JohnMakhoul 612–616 N13-1069 zbib-etal-2013-systematic @@ -658,11 +658,11 @@ Down-stream effects of tree-to-dependency conversions JakobElming - AndersJohannsen + AndersJohannsen SigridKlerke EmanueleLapponi - HectorMartinez Alonso - AndersSøgaard + HectorMartinez Alonso + AndersSøgaard 617–626 N13-1070 elming-etal-2013-stream @@ -670,7 +670,7 @@ The Life and Death of Discourse Entities: Identifying Singleton Mentions MartaRecasens - Marie-Catherinede Marneffe + Marie-Catherinede Marneffe ChristopherPotts 627–633 N13-1071 @@ -688,9 +688,9 @@ A Simple, Fast, and Effective Reparameterization of <fixed-case>IBM</fixed-case> Model 2 - ChrisDyer + ChrisDyer VictorChahuneau - Noah A.Smith + Noah A.Smith 644–648 N13-1073 dyer-etal-2013-simple @@ -699,7 +699,7 @@ Phrase Training Based Adaptation for Statistical Machine Translation SaabMansour - HermannNey + HermannNey 649–654 N13-1074 mansour-ney-2013-phrase @@ -709,7 +709,7 @@ Translation Acquisition Using Synonym Sets DanielAndrade MasaakiTsuchida - TakashiOnishi + TakashiOnishi KaiIshikawa 655–660 N13-1075 @@ -720,9 +720,9 @@ Supersense Tagging for <fixed-case>A</fixed-case>rabic: the <fixed-case>MT</fixed-case>-in-the-Middle Attack NathanSchneider BehrangMohit - ChrisDyer + ChrisDyer KemalOflazer - Noah A.Smith + Noah A.Smith 661–667 N13-1076 schneider-etal-2013-supersense @@ -730,7 +730,7 @@ <fixed-case>Z</fixed-case>ipfian corruptions for robust <fixed-case>POS</fixed-case> tagging - AndersSøgaard + AndersSøgaard 668–672 N13-1077 sogaard-2013-zipfian @@ -758,8 +758,8 @@ What’s in a Domain? Multi-Domain Learning for Multi-Attribute Data MaheshJoshi MarkDredze - William W.Cohen - Carolyn P.Rosé + William W.Cohen + Carolyn P.Rosé 685–690 N13-1080 joshi-etal-2013-whats @@ -767,7 +767,7 @@ An opinion about opinions about opinions: subjectivity and the aggregate reader - AsadSayeed + AsadSayeed 691–696 N13-1081 sayeed-2013-opinion @@ -777,7 +777,7 @@ An Examination of Regret in Bullying Tweets Jun-MingXu BenjaminBurchfiel - XiaojinZhu + XiaojinZhu AmyBellmore 697–702 N13-1082 @@ -786,10 +786,10 @@ A Cross-language Study on Automatic Speech Disfluency Detection - WenWang - AndreasStolcke + WenWang + AndreasStolcke JiahongYuan - MarkLiberman + MarkLiberman 703–708 N13-1083 wang-etal-2013-cross @@ -798,7 +798,7 @@ Distributional semantic models for the evaluation of disordered language MasoudRouhizadeh - EmilyPrud’hommeaux + EmilyPrud’hommeaux BrianRoark Janvan Santen 709–714 @@ -809,7 +809,7 @@ Atypical Prosodic Structure as an Indicator of Reading Level and Text Difficulty JulieMedero - MariOstendorf + MariOstendorf 715–720 N13-1085 medero-ostendorf-2013-atypical @@ -820,7 +820,7 @@ KaiWei YuzongLiu KatrinKirchhoff - JeffBilmes + JeffBilmes 721–726 N13-1086 wei-etal-2013-using @@ -829,7 +829,7 @@ Semi-Supervised Discriminative Language Modeling with Out-of-Domain Text Data ArdaÇelebi - MuratSaraçlar + MuratSaraçlar 727–732 N13-1087 celebi-saraclar-2013-semi @@ -839,7 +839,7 @@ More than meets the eye: Study of Human Cognition in Sense Annotation SalilJoshi DipteshKanojia - PushpakBhattacharyya + PushpakBhattacharyya 733–738 N13-1088 joshi-etal-2013-meets @@ -848,7 +848,7 @@ Improving Lexical Semantics for Sentential Semantics: Modeling Selectional Preference and Similar Words in a Latent Variable Model WeiweiGuo - MonaDiab + MonaDiab 739–745 N13-1089 guo-diab-2013-improving @@ -856,9 +856,9 @@ Linguistic Regularities in Continuous Space Word Representations - TomasMikolov - Wen-tauYih - GeoffreyZweig + TomasMikolov + Wen-tauYih + GeoffreyZweig 746–751 N13-1090 mikolov-etal-2013-linguistic @@ -887,7 +887,7 @@ Exploiting the Scope of Negations and Heterogeneous Features for Relation Extraction: A Case Study for Drug-Drug Interaction Extraction Md. Faisal MahbubChowdhury - AlbertoLavelli + AlbertoLavelli 765–771 N13-1093 chowdhury-lavelli-2013-exploiting @@ -896,7 +896,7 @@ Graph-Based Seed Set Expansion for Relation Extraction Using Random Walk Hitting Times JoelLang - JamesHenderson + JamesHenderson 772–776 N13-1094 lang-henderson-2013-graph @@ -905,7 +905,7 @@ Distant Supervision for Relation Extraction with an Incomplete Knowledge Base BonanMin - RalphGrishman + RalphGrishman LiWan ChangWang DavidGondek @@ -917,9 +917,9 @@ Measuring the Structural Importance through Rhetorical Structure Index NarineKokhlikyan - AlexWaibel + AlexWaibel YuqiZhang - Joy YingZhang + Joy YingZhang 783–788 N13-1096 kokhlikyan-etal-2013-measuring @@ -928,7 +928,7 @@ Separating Fact from Fear: Tracking Flu Infections on <fixed-case>T</fixed-case>witter AlexLamb - Michael J.Paul + Michael J.Paul MarkDredze 789–795 N13-1097 @@ -938,7 +938,7 @@ Differences in User Responses to a <fixed-case>W</fixed-case>izard-of-<fixed-case>O</fixed-case>z versus Automated System JesseThomason - DianeLitman + DianeLitman 796–801 N13-1098 thomason-litman-2013-differences @@ -948,7 +948,7 @@ Improving the Quality of Minority Class Identification in Dialog Act Tagging AdinoyiOmuya VinodkumarPrabhakaran - OwenRambow + OwenRambow 802–807 N13-1099 omuya-etal-2013-improving @@ -993,7 +993,7 @@ Probabilistic Frame Induction - Jackie Chi KitCheung + Jackie Chi KitCheung HoifungPoon LucyVanderwende 837–846 @@ -1026,7 +1026,7 @@ Open Information Extraction with Tree Kernels YingXu Mi-YoungKim - KevinQuinn + KevinQuinn RandyGoebel DenilsonBarbosa 868–877 @@ -1038,11 +1038,11 @@ Finding What Matters in Questions - XiaoqiangLuo + XiaoqiangLuo HemaRaghavan VittorioCastelli SameerMaskey - RaduFlorian + RaduFlorian 878–887 N13-1108 luo-etal-2013-finding @@ -1053,7 +1053,7 @@ Hyun-JeSong JunhoGo Seong-BaePark - Se-YoungPark + Se-YoungPark 888–896 N13-1109 song-etal-2013-just @@ -1063,7 +1063,7 @@ Same Referent, Different Words: Unsupervised Mining of Opaque Coreferent Mentions MartaRecasens MatthewCan - DanielJurafsky + DanielJurafsky 897–906 N13-1110 recasens-etal-2013-referent @@ -1110,7 +1110,7 @@ Multi-Metric Optimization Using Ensemble Tuning - BaskaranSankaran + BaskaranSankaran AnoopSarkar KevinDuh 947–957 @@ -1122,7 +1122,7 @@ Grouping Language Model Boundary Words to Speed K–Best Extraction from Hypergraphs KennethHeafield PhilippKoehn - AlonLavie + AlonLavie 958–968 N13-1116 heafield-etal-2013-grouping @@ -1131,7 +1131,7 @@ A Systematic <fixed-case>B</fixed-case>ayesian Treatment of the <fixed-case>IBM</fixed-case> Alignment Models YarinGal - PhilBlunsom + PhilBlunsom 969–977 N13-1117 gal-blunsom-2013-systematic @@ -1149,7 +1149,7 @@ Three Knowledge-Free Methods for Automatic Lexical Chain Extraction SteffenRemus - ChrisBiemann + ChrisBiemann 989–999 N13-1119 remus-biemann-2013-three @@ -1158,10 +1158,10 @@ Combining Heterogeneous Models for Measuring Relational Similarity AlisaZhila - Wen-tauYih - ChristopherMeek - GeoffreyZweig - TomasMikolov + Wen-tauYih + ChristopherMeek + GeoffreyZweig + TomasMikolov 1000–1009 N13-1120 N13-1120.Presentation.pptx @@ -1235,7 +1235,7 @@ AdamVogel MaxBodoia ChristopherPotts - DanielJurafsky + DanielJurafsky 1072–1081 N13-1127 vogel-etal-2013-emergence @@ -1244,7 +1244,7 @@ Open Dialogue Management for Relational Databases BenHixon - Rebecca J.Passonneau + Rebecca J.Passonneau 1082–1091 N13-1128 hixon-passonneau-2013-open @@ -1253,7 +1253,7 @@ A method for the approximation of incremental understanding of explicit utterance meaning using predictive models in finite domains DavidDeVault - DavidTraum + DavidTraum 1092–1099 N13-1129 devault-traum-2013-method @@ -1271,7 +1271,7 @@ Labeling the Languages of Words in Mixed-Language Documents using Weakly Supervised Methods BenKing - StevenAbney + StevenAbney 1110–1119 N13-1131 king-abney-2013-labeling @@ -1282,7 +1282,7 @@ DirkHovy TaylorBerg-Kirkpatrick AshishVaswani - EduardHovy + EduardHovy 1120–1130 N13-1132 hovy-etal-2013-learning @@ -1291,7 +1291,7 @@ Supervised All-Words Lexical Substitution using Delexicalized Features GyörgySzarvas - ChrisBiemann + ChrisBiemann IrynaGurevych 1131–1141 N13-1133 @@ -1300,7 +1300,7 @@ A Tensor-based Factorization Model of Semantic Compositionality - TimVan de Cruys + TimVan de Cruys ThierryPoibeau AnnaKorhonen 1142–1151 @@ -1323,7 +1323,7 @@ Towards Coherent Multi-Document Summarization JanaraChristensen Mausam - StephenSoderland + StephenSoderland OrenEtzioni 1163–1173 N13-1136 @@ -1333,7 +1333,7 @@ Generating Expressions that Refer to Visible Objects MargaretMitchell - Keesvan Deemter + Keesvan Deemter EhudReiter 1174–1184 N13-1137 @@ -1361,8 +1361,8 @@ Knowledge-Rich Morphological Priors for <fixed-case>B</fixed-case>ayesian Language Models VictorChahuneau - Noah A.Smith - ChrisDyer + Noah A.Smith + ChrisDyer 1206–1215 N13-1140 chahuneau-etal-2013-knowledge @@ -1376,7 +1376,7 @@ AnnieLouis RichardSocher JuliaHockenmaier - Eric K.Ringger + Eric K.Ringger Association for Computational Linguistics
Atlanta, Georgia
June @@ -1389,7 +1389,7 @@ Critical Reflections on Evaluation Practices in Coreference Resolution - Gordana IlićHolen + Gordana IlićHolen 1–7 N13-2001 holen-2013-critical @@ -1397,7 +1397,7 @@ Reducing Annotation Effort on Unbalanced Corpus based on Cost Matrix WencanLuo - DianeLitman + DianeLitman JoelChan 8–15 N13-2002 @@ -1407,7 +1407,7 @@ A Machine Learning Approach to Automatic Term Extraction using a Rich Feature Set MerleyConrado ThiagoPardo - SolangeRezende + SolangeRezende 16–23 N13-2003 conrado-etal-2013-machine @@ -1430,7 +1430,7 @@ Ontology Label Translation - MihaelArcan + MihaelArcan PaulBuitelaar 40–46 N13-2006 @@ -1492,7 +1492,7 @@ Proceedings of the 2013 NAACL HLT Demonstration Session N13-3 - ChrisDyer + ChrisDyer DerrickHiggins Association for Computational Linguistics
Atlanta, Georgia
@@ -1517,7 +1517,7 @@ JustinSnyder RebeccaKnowles MarkDredze - MatthewGormley + MatthewGormley TravisWolfe 5–9 N13-3002 @@ -1525,10 +1525,10 @@
<fixed-case>TMT</fixed-case>prime: A Recommender System for <fixed-case>MT</fixed-case> and <fixed-case>TM</fixed-case> Integration - Aswarth AbhilashDara - SandipanDandapat + Aswarth AbhilashDara + SandipanDandapat DeclanGroves - Josefvan Genabith + Josefvan Genabith 10–13 N13-3003 dara-etal-2013-tmtprime @@ -1544,7 +1544,7 @@ A Web Application for the Diagnostic Evaluation of Machine Translation over Specific Linguistic Phenomena AntonioToral - SudipKumar Naskar + SudipKumar Naskar JorisVreeke FedericoGaspari DeclanGroves @@ -1562,9 +1562,9 @@ <fixed-case>UMLS</fixed-case>::<fixed-case>S</fixed-case>imilarity: Measuring the Relatedness and Similarity of Biomedical Concepts - BridgetMcInnes + BridgetMcInnes TedPedersen - SergueiPakhomov + SergueiPakhomov YingLiu GenevieveMelton-Meaux 28–31 @@ -1575,11 +1575,11 @@ <fixed-case>KELVIN</fixed-case>: a tool for automated knowledge base construction PaulMcNamee JamesMayfield - TimFinin + TimFinin TimOates DawnLawrie TanXu - DouglasOard + DouglasOard 32–35 N13-3008 mcnamee-etal-2013-kelvin @@ -1614,7 +1614,7 @@ Deep Learning for <fixed-case>NLP</fixed-case> (without Magic) RichardSocher - Christopher D.Manning + Christopher D.Manning 1–3 N13-4001 socher-manning-2013-deep @@ -1642,7 +1642,7 @@ Semantic Role Labeling - MarthaPalmer + MarthaPalmer IvanTitov ShuminWu 10–12 @@ -1654,11 +1654,11 @@ Spectral Learning Algorithms for Natural Language Processing - ShayCohen - MichaelCollins - DeanFoster + ShayCohen + MichaelCollins + DeanFoster KarlStratos - LyleUngar + LyleUngar 13–15 N13-4005 cohen-etal-2013-spectral @@ -1667,7 +1667,7 @@ Morphological, Syntactical and Semantic Knowledge in Statistical Machine Translation - MartaRuiz Costa-jussà + MartaRuiz Costa-jussà ChrisQuirk 16–18 N13-4006 diff --git a/data/xml/N15.xml b/data/xml/N15.xml index cfcc4a1fac..33f74ee670 100644 --- a/data/xml/N15.xml +++ b/data/xml/N15.xml @@ -4,8 +4,8 @@ Proceedings of the 2015 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies N15-1 - RadaMihalcea - JoyceChai + RadaMihalcea + JoyceChai AnoopSarkar 10.3115/v1/N15-1 Association for Computational Linguistics @@ -42,7 +42,7 @@ Improving unsupervised vector-space thematic fit evaluation via role-filler prototype clustering ClaytonGreenberg - AsadSayeed + AsadSayeed VeraDemberg 21–31 N15-1003 @@ -53,9 +53,9 @@ A Compositional and Interpretable Semantic Space AlonaFyshe LeilaWehbe - Partha P.Talukdar + Partha P.Talukdar BrianMurphy - Tom M.Mitchell + Tom M.Mitchell 32–41 N15-1004 10.3115/v1/N15-1004 @@ -79,7 +79,7 @@ Bharat RamAmbati TejaswiniDeoskar MarkJohnson - MarkSteedman + MarkSteedman 53–63 N15-1006 10.3115/v1/N15-1006 @@ -88,8 +88,8 @@ Because Syntax Does Matter: Improving Predicate-Argument Structures Parsing with Syntactic Features CorentinRibeyre - EricVillemonte de la Clergerie - DjaméSeddah + EricVillemonte de la Clergerie + DjaméSeddah 64–74 N15-1007 10.3115/v1/N15-1007 @@ -109,7 +109,7 @@ Young-BumKim MinwooJeong KarlStratos - RuhiSarikaya + RuhiSarikaya 84–92 N15-1009 10.3115/v1/N15-1009 @@ -119,7 +119,7 @@ Not All Character N-grams Are Created Equal: A Study in Authorship Attribution UpendraSapkota StevenBethard - ManuelMontes + ManuelMontes ThamarSolorio 93–102 N15-1010 @@ -128,7 +128,7 @@ Effective Use of Word Order for Text Categorization with Convolutional Neural Networks - RieJohnson + RieJohnson TongZhang 103–112 N15-1011 @@ -184,8 +184,8 @@ Combining Language and Vision with a Multimodal Skip-gram Model AngelikiLazaridou - Nghia ThePham - MarcoBaroni + Nghia ThePham + MarcoBaroni 153–163 N15-1016 10.3115/v1/N15-1016 @@ -194,7 +194,7 @@ Discriminative Unsupervised Alignment of Natural Language Instructions with Corresponding Video Segments IftekharNaim - Young C.Song + Young C.Song QiguangLiu LiangHuang HenryKautz @@ -209,7 +209,7 @@ <fixed-case>T</fixed-case>opic<fixed-case>C</fixed-case>heck: Interactive Alignment for Assessing Topic Model Stability JasonChuang Margaret E.Roberts - Brandon M.Stewart + Brandon M.Stewart RebeccaWeiss DustinTingley JustinGrimmer @@ -221,7 +221,7 @@ Inferring latent attributes of <fixed-case>T</fixed-case>witter users with label regularization - EhsanMohammady Ardehaly + EhsanMohammady Ardehaly AronCulotta 185–195 N15-1019 @@ -236,9 +236,9 @@ ChrisBrockett YangfengJi MargaretMitchell - Jian-YunNie + Jian-YunNie JianfengGao - BillDolan + BillDolan 196–205 N15-1020 10.3115/v1/N15-1020 @@ -257,7 +257,7 @@ Aligning Sentences from Standard <fixed-case>W</fixed-case>ikipedia to <fixed-case>S</fixed-case>imple <fixed-case>W</fixed-case>ikipedia WilliamHwang HannanehHajishirzi - MariOstendorf + MariOstendorf WeiWu 211–217 N15-1022 @@ -285,7 +285,7 @@ Spinning Straw into Gold: Using Free Text to Train Monolingual Alignment Models for Non-factoid Question Answering RebeccaSharp - PeterJansen + PeterJansen MihaiSurdeanu PeterClark 231–237 @@ -297,7 +297,7 @@ Personalized Page Rank for Named Entity Disambiguation MariaPershina YifanHe - RalphGrishman + RalphGrishman 238–243 N15-1026 10.3115/v1/N15-1026 @@ -346,7 +346,7 @@ Incrementally Tracking Reference in Human/Human Dialogue Using Linguistic and Extra-Linguistic Information - CaseyKennington + CaseyKennington RyuIida TakenobuTokunaga DavidSchlangen @@ -405,7 +405,7 @@ Interpreting Compound Noun Phrases Using Web Search Queries - MariusPaşca + MariusPaşca 335–344 N15-1037 10.3115/v1/N15-1037 @@ -413,10 +413,10 @@ Lexicon-Free Conversational Speech Recognition with Neural Networks - AndrewMaas + AndrewMaas ZiangXie - DanJurafsky - AndrewNg + DanJurafsky + AndrewNg 345–354 N15-1038 10.3115/v1/N15-1038 @@ -435,7 +435,7 @@ A Transition-based Algorithm for <fixed-case>AMR</fixed-case> Parsing ChuanWang NianwenXue - SameerPradhan + SameerPradhan 366–375 N15-1040 10.3115/v1/N15-1040 @@ -464,7 +464,7 @@ Latent Domain Word Alignment for Heterogeneous Corpora HoangCuong - KhalilSima’an + KhalilSima’an 398–408 N15-1043 10.3115/v1/N15-1043 @@ -472,17 +472,17 @@ Extracting Human Temporal Orientation from <fixed-case>F</fixed-case>acebook Language - H. AndrewSchwartz + H. AndrewSchwartz GregoryPark MaartenSap EvanWeingarten JohannesEichstaedt - MargaretKern + MargaretKern DavidStillwell MichalKosinski JonahBerger MartinSeligman - LyleUngar + LyleUngar 409–419 N15-1044 10.3115/v1/N15-1044 @@ -500,9 +500,9 @@ Using Summarization to Discover Argument Facets in Online Idealogical Dialog AmitaMisra - PranavAnand - Jean E.Fox Tree - MarilynWalker + PranavAnand + Jean E.Fox Tree + MarilynWalker 430–440 N15-1046 10.3115/v1/N15-1046 @@ -530,7 +530,7 @@ A Dynamic Programming Algorithm for Tree Trimming-based Text Summarization MasaakiNishino - NorihitoYasuda + NorihitoYasuda TsutomuHirao Shin-ichiMinato MasaakiNagata @@ -552,9 +552,9 @@ Corpus-based discovery of semantic intensity scales ChaitanyaShivade - Marie-Catherinede Marneffe - EricFosler-Lussier - Albert M.Lai + Marie-Catherinede Marneffe + EricFosler-Lussier + Albert M.Lai 483–493 N15-1051 10.3115/v1/N15-1051 @@ -564,7 +564,7 @@ Dialogue focus tracking for zero pronoun resolution SudhaRao AllysonEttinger - HalDaumé III + HalDaumé III PhilipResnik 494–503 N15-1052 @@ -593,8 +593,8 @@ Robust Morphological Tagging with Word Representations - ThomasMüller - HinrichSchuetze + ThomasMüller + HinrichSchuetze 526–536 N15-1055 10.3115/v1/N15-1055 @@ -634,7 +634,7 @@ <fixed-case>NASARI</fixed-case>: a Novel Approach to a Semantically-Aware Representation of Items - JoséCamacho-Collados + JoséCamacho-Collados Mohammad TaherPilehvar RobertoNavigli 567–577 @@ -645,7 +645,7 @@ Towards a standard evaluation method for grammatical error detection and correction MarianoFelice - TedBriscoe + TedBriscoe 578–587 N15-1060 10.3115/v1/N15-1060 @@ -654,7 +654,7 @@ Using Zero-Resource Spoken Term Discovery for Ranked Retrieval JeromeWhite - DouglasOard + DouglasOard ArenJansen JiaulPaik RashmiSankepally @@ -667,7 +667,7 @@ Constraint-Based Models of Lexical Borrowing YuliaTsvetkov WaleedAmmar - ChrisDyer + ChrisDyer 598–608 N15-1062 10.3115/v1/N15-1062 @@ -687,7 +687,7 @@ Jointly Modeling Inter-Slot Relations by Random Walk on Knowledge Graphs for Unsupervised Spoken Language Understanding Yun-NungChen William YangWang - AlexanderRudnicky + AlexanderRudnicky 619–629 N15-1064 10.3115/v1/N15-1064 @@ -705,8 +705,8 @@ Diamonds in the Rough: Event Extraction from Imperfect Microblog Data AnderIntxaurrondo - EnekoAgirre - OierLopez de Lacalle + EnekoAgirre + OierLopez de Lacalle MihaiSurdeanu 641–650 N15-1066 @@ -743,8 +743,8 @@ Ontologically Grounded Multi-sense Representation Learning for Semantic Vector Space Models Sujay KumarJauhar - ChrisDyer - EduardHovy + ChrisDyer + EduardHovy 683–693 N15-1070 10.3115/v1/N15-1070 @@ -771,9 +771,9 @@ Multitask Learning for Adaptive Quality Estimation of Automatically Transcribed Utterances - José G.C. de Souza + José G.C. de Souza HamedZamani - MatteoNegri + MatteoNegri MarcoTurchi DanieleFalavigna 714–724 @@ -785,7 +785,7 @@ Incorporating Word Correlation Knowledge into Topic Modeling PengtaoXie DiyiYang - EricXing + EricXing 725–734 N15-1074 10.3115/v1/N15-1074 @@ -806,7 +806,7 @@ JordanBoyd-Graber JeffreyLund KevinSeppi - EricRingger + EricRingger 746–755 N15-1076 10.3115/v1/N15-1076 @@ -814,7 +814,7 @@ Grounded Semantic Parsing for Complex Knowledge Extraction - Ankur P.Parikh + Ankur P.Parikh HoifungPoon KristinaToutanova 756–766 @@ -825,7 +825,7 @@ Sentiment after Translation: A Case-Study on <fixed-case>A</fixed-case>rabic Social Media Posts MohammadSalameh - SaifMohammad + SaifMohammad SvetlanaKiritchenko 767–777 N15-1078 @@ -845,8 +845,8 @@ Transforming Dependencies into Phrase Structures LingpengKong - Alexander M.Rush - Noah A.Smith + Alexander M.Rush + Noah A.Smith 788–798 N15-1080 10.3115/v1/N15-1080 @@ -874,7 +874,7 @@ Pragmatic Neural Language Modelling in Machine Translation PaulBaltescu - PhilBlunsom + PhilBlunsom 820–829 N15-1083 10.3115/v1/N15-1083 @@ -895,7 +895,7 @@ Semantic Grounding in Dialogue for Complex Problem Solving XiaolongLi - KristyBoyer + KristyBoyer 841–850 N15-1085 10.3115/v1/N15-1085 @@ -913,10 +913,10 @@ Sentence segmentation of aphasic speech - Kathleen C.Fraser + Kathleen C.Fraser NaamaBen-David GraemeHirst - NaidaGraham + NaidaGraham ElizabethRochon 862–871 N15-1087 @@ -926,7 +926,7 @@ Semantic parsing of speech using grammars learned with weak supervision JudithGaspers - PhilippCimiano + PhilippCimiano BrittaWrede 872–881 N15-1088 @@ -937,7 +937,7 @@ Early Gains Matter: A Case for Preferring Generative over Discriminative Crowdsourcing Models PaulFelt KevinBlack - EricRingger + EricRingger KevinSeppi RobbieHaertel 882–891 @@ -948,7 +948,7 @@ Optimizing Multivariate Performance Measures for Learning Relation Extraction Models - GholamrezaHaffari + GholamrezaHaffari AjayNagesh GaneshRamakrishnan 892–900 @@ -959,7 +959,7 @@ Convolutional Neural Network for Paraphrase Identification WenpengYin - HinrichSchütze + HinrichSchütze 901–911 N15-1091 10.3115/v1/N15-1091 @@ -992,7 +992,7 @@ Penalized Expectation Propagation for Graphical Models over Strings RyanCotterell - JasonEisner + JasonEisner 932–942 N15-1094 10.3115/v1/N15-1094 @@ -1020,8 +1020,8 @@ So similar and yet incompatible: Toward the automated identification of semantically compatible words - GermánKruszewski - MarcoBaroni + GermánKruszewski + MarcoBaroni 964–969 N15-1097 10.3115/v1/N15-1097 @@ -1031,7 +1031,7 @@ Do Supervised Distributional Methods Really Learn Lexical Inference Relations? OmerLevy SteffenRemus - ChrisBiemann + ChrisBiemann IdoDagan 970–976 N15-1098 @@ -1042,7 +1042,7 @@ A Word Embedding Approach to Predicting the Compositionality of Multiword Expressions BaharSalehi PaulCook - TimothyBaldwin + TimothyBaldwin 977–983 N15-1099 10.3115/v1/N15-1099 @@ -1072,7 +1072,7 @@ KatrinKirchhoff Yik-CheungTam ColleenRichey - WenWang + WenWang 995–1000 N15-1102 10.3115/v1/N15-1102 @@ -1080,7 +1080,7 @@ Continuous Adaptation to User Feedback for Statistical Machine Translation - FrédéricBlain + FrédéricBlain FethiBougares AmirHazem LoïcBarrault @@ -1103,7 +1103,7 @@ Fast and Accurate Preordering for <fixed-case>SMT</fixed-case> using Neural Networks - Adriàde Gispert + Adriàde Gispert GonzaloIglesias BillByrne 1012–1017 @@ -1173,7 +1173,7 @@ Socially-Informed Timeline Generation for Complex Events LuWang - ClaireCardie + ClaireCardie GalenMarchetti 1055–1065 N15-1112 @@ -1183,7 +1183,7 @@ Movie Script Summarization as Graph-based Scene Extraction - Philip JohnGorinski + Philip JohnGorinski MirellaLapata 1066–1076 N15-1113 @@ -1196,7 +1196,7 @@ JeffreyFlanigan SamThomson NormanSadeh - Noah A.Smith + Noah A.Smith 1077–1086 N15-1114 10.3115/v1/N15-1114 @@ -1237,7 +1237,7 @@ Injecting Logical Background Knowledge into Embeddings for Relation Extraction - TimRocktäschel + TimRocktäschel SameerSingh SebastianRiedel 1119–1129 @@ -1272,7 +1272,7 @@ High-Order Low-Rank Tensors for Semantic Role Labeling TaoLei YuanZhang - LluísMàrquez + LluísMàrquez AlessandroMoschitti ReginaBarzilay 1150–1160 @@ -1283,8 +1283,8 @@ Lexical Event Ordering with an Edge-Factored Model OmriAbend - Shay B.Cohen - MarkSteedman + Shay B.Cohen + MarkSteedman 1161–1171 N15-1122 10.3115/v1/N15-1122 @@ -1302,7 +1302,7 @@ Accurate Evaluation of Segment-level Machine Translation Metrics YvetteGraham - TimothyBaldwin + TimothyBaldwin NitikaMathur 1183–1191 N15-1124 @@ -1312,9 +1312,9 @@ Leveraging Small Multilingual Corpora for <fixed-case>SMT</fixed-case> Using Many Pivot Languages RajDabre - FabienCromieres + FabienCromieres SadaoKurohashi - PushpakBhattacharyya + PushpakBhattacharyya 1192–1202 N15-1125 10.3115/v1/N15-1125 @@ -1325,7 +1325,7 @@ DianYu HengJi SujianLi - Chin-YewLin + Chin-YewLin 1203–1208 N15-1126 10.3115/v1/N15-1126 @@ -1342,7 +1342,7 @@ Distributed Representations of Words to Guide Bootstrapped Entity Classifiers SonalGupta - Christopher D.Manning + Christopher D.Manning 1215–1220 N15-1128 10.3115/v1/N15-1128 @@ -1383,7 +1383,7 @@ DhirendraSingh RudramurthyV HanumantRedkar - PushpakBhattacharyya + PushpakBhattacharyya 1238–1243 N15-1132 10.3115/v1/N15-1132 @@ -1410,9 +1410,9 @@ Mining for unambiguous instances to adapt part-of-speech taggers to new domains DirkHovy - BarbaraPlank - HéctorMartínez Alonso - AndersSøgaard + BarbaraPlank + HéctorMartínez Alonso + AndersSøgaard 1256–1261 N15-1135 10.3115/v1/N15-1135 @@ -1420,7 +1420,7 @@ Clustering Sentences with Density Peaks for Multi-document Summarization - YangZhang + YangZhang YunqingXia YiLiu WenminWang @@ -1431,7 +1431,7 @@ Development of the Multilingual Semantic Annotation System - ScottPiao + ScottPiao FrancescaBianchi CarmenDayrell AngelaD’Egidio @@ -1453,8 +1453,8 @@ #<fixed-case>W</fixed-case>hy<fixed-case>IS</fixed-case>tayed, #<fixed-case>W</fixed-case>hy<fixed-case>IL</fixed-case>eft: Microblogging to Make Sense of Domestic Abuse NicolasSchrading - CeciliaOvesdotter Alm - RaymondPtucha + CeciliaOvesdotter Alm + RaymondPtucha ChristopherHoman 1281–1286 N15-1139 @@ -1464,7 +1464,7 @@ Morphological Word-Embeddings RyanCotterell - HinrichSchütze + HinrichSchütze 1287–1292 N15-1140 10.3115/v1/N15-1140 @@ -1474,7 +1474,7 @@ Recognizing Social Constructs from Textual Conversation SomakAditya ChittaBaral - NguyenHa Vo + NguyenHa Vo JoohyungLee JiepingYe ZawNaung @@ -1491,8 +1491,8 @@ Two/Too Simple Adaptations of <fixed-case>W</fixed-case>ord2<fixed-case>V</fixed-case>ec for Syntax Problems WangLing - ChrisDyer - Alan W.Black + ChrisDyer + Alan W.Black IsabelTrancoso 1299–1304 N15-1142 @@ -1502,7 +1502,7 @@ Estimating Numerical Attributes by Bringing Together Fragmentary Clues HiroyaTakamura - Jun’ichiTsujii + Jun’ichiTsujii 1305–1310 N15-1143 10.3115/v1/N15-1143 @@ -1512,8 +1512,8 @@ Unsupervised <fixed-case>POS</fixed-case> Induction with Word Embeddings Chu-ChengLin WaleedAmmar - ChrisDyer - LoriLevin + ChrisDyer + LoriLevin 1311–1316 N15-1144 10.3115/v1/N15-1144 @@ -1532,7 +1532,7 @@ <fixed-case>MPQA</fixed-case> 3.0: An Entity/Event-Level Sentiment Corpus LingjiaDeng - JanyceWiebe + JanyceWiebe 1323–1328 N15-1146 10.3115/v1/N15-1146 @@ -1569,7 +1569,7 @@ AndrewShin RyoheiSasano HiroyaTakamura - ManabuOkumura + ManabuOkumura 1345–1350 N15-1150 10.3115/v1/N15-1150 @@ -1586,10 +1586,10 @@ Learning to parse with <fixed-case>IAA</fixed-case>-weighted loss - HéctorMartínez Alonso - BarbaraPlank + HéctorMartínez Alonso + BarbaraPlank ArneSkjærholt - AndersSøgaard + AndersSøgaard 1357–1361 N15-1152 10.3115/v1/N15-1152 @@ -1599,8 +1599,8 @@ Exploiting Text and Network Context for Geolocation of Social Media Users AfshinRahimi DuyVu - TrevorCohn - TimothyBaldwin + TrevorCohn + TimothyBaldwin 1362–1367 N15-1153 10.3115/v1/N15-1153 @@ -1609,7 +1609,7 @@ Discriminative Phrase Embedding for Paraphrase Identification WenpengYin - HinrichSchütze + HinrichSchütze 1368–1373 N15-1154 10.3115/v1/N15-1154 @@ -1618,7 +1618,7 @@ Combining Word Embeddings and Feature Embeddings for Fine-grained Relation Extraction MoYu - Matthew R.Gormley + Matthew R.Gormley MarkDredze 1374–1379 N15-1155 @@ -1638,7 +1638,7 @@ Simple task-specific bilingual word embeddings StephanGouws - AndersSøgaard + AndersSøgaard 1386–1390 N15-1157 10.3115/v1/N15-1157 @@ -1665,7 +1665,7 @@ Large-Scale Native Language Identification with Cross-Corpus Evaluation - ShervinMalmasi + ShervinMalmasi MarkDras 1403–1409 N15-1160 @@ -1675,7 +1675,7 @@ <fixed-case>U</fixed-case>nediting: Detecting Disfluencies Without Careful Transcripts VictoriaZayats - MariOstendorf + MariOstendorf HannanehHajishirzi 1410–1415 N15-1161 @@ -1713,8 +1713,8 @@ Random Walks and Neural Network Language Models on Knowledge Bases JosuGoikoetxea - AitorSoroa - EnekoAgirre + AitorSoroa + EnekoAgirre 1434–1439 N15-1165 10.3115/v1/N15-1165 @@ -1743,7 +1743,7 @@ Extracting Information about Medication Use from Veterinary Discussions HaiboDing - EllenRiloff + EllenRiloff 1452–1458 N15-1168 10.3115/v1/N15-1168 @@ -1784,7 +1784,7 @@ Echoes of Persuasion: The Effect of Euphony in Persuasive Communication MarcoGuerini - GözdeÖzbal + GözdeÖzbal CarloStrapparava 1483–1493 N15-1172 @@ -1797,7 +1797,7 @@ HuijuanXu JeffDonahue MarcusRohrbach - RaymondMooney + RaymondMooney KateSaenko 1494–1504 N15-1173 @@ -1820,7 +1820,7 @@ SebastianMuehr PatrickLehnen StephanPeitz - HermannNey + HermannNey 1516–1526 N15-1175 10.3115/v1/N15-1175 @@ -1829,7 +1829,7 @@ Learning Translation Models from Monolingual Continuous Representations KaiZhao - HanyHassan + HanyHassan MichaelAuli 1527–1536 N15-1176 @@ -1839,7 +1839,7 @@ A Corpus and Model Integrating Multiword Expressions and Supersenses NathanSchneider - Noah A.Smith + Noah A.Smith 1537–1547 N15-1177 10.3115/v1/N15-1177 @@ -1856,7 +1856,7 @@ Do We Really Need Lexical Information? Towards a Top-down Approach to Sentiment Analysis of Product Reviews - YuliaOtmakhova + YuliaOtmakhova HyopilShin 1559–1568 N15-1179 @@ -1884,7 +1884,7 @@ Shared common ground influences information density in microblog texts GabrielDoyle - MichaelFrank + MichaelFrank 1587–1596 N15-1182 10.3115/v1/N15-1182 @@ -1892,7 +1892,7 @@ Hierarchic syntax improves reading time prediction - Martenvan Schijndel + Martenvan Schijndel WilliamSchuler 1597–1605 N15-1183 @@ -1904,9 +1904,9 @@ ManaalFaruqui JesseDodge Sujay KumarJauhar - ChrisDyer - EduardHovy - Noah A.Smith + ChrisDyer + EduardHovy + Noah A.Smith 1606–1615 N15-1184 10.3115/v1/N15-1184 @@ -1924,7 +1924,7 @@ Unsupervised Morphology Induction Using Word Embeddings RaduSoricut - FranzOch + FranzOch 1627–1637 N15-1186 10.3115/v1/N15-1186 @@ -1935,7 +1935,7 @@ Proceedings of the 2015 Conference of the North American Chapter of the Association for Computational Linguistics: Student Research Workshop N15-2 - DianaInkpen + DianaInkpen SmarandaMuresan ShibamouliLahiri KarenMazidi @@ -1961,7 +1961,7 @@ Reliable Lexical Simplification for Non-Native Speakers - GustavoPaetzold + GustavoPaetzold 9–16 N15-2002 10.3115/v1/N15-2002 @@ -1990,7 +1990,7 @@ Towards a Better Semantic Role Labeling of Complex Predicates GloriannaJagfeld - Lonnekevan der Plas + Lonnekevan der Plas 33–39 N15-2005 10.3115/v1/N15-2005 @@ -2057,7 +2057,7 @@ Relation Extraction from Community Generated Question-Answer Pairs DenisSavenkov - Wei-LwunLu + Wei-LwunLu JeffDalton EugeneAgichtein 96–102 @@ -2084,7 +2084,7 @@ Discourse and Document-level Information for Evaluating Language Output Tasks - CarolinaScarton + CarolinaScarton 118–125 N15-2016 10.3115/v1/N15-2016 @@ -2110,7 +2110,7 @@ Semantics-based Graph Approach to Complex Question-Answering TomaszJurczyk - Jinho D.Choi + Jinho D.Choi 140–146 N15-2019 10.3115/v1/N15-2019 @@ -2145,7 +2145,7 @@ Computational Exploration to Linguistic Structures of Future: Classification and Categorization AimingNi - Jinho D.Choi + Jinho D.Choi JasonShepard PhillipWolff 168–173 @@ -2158,9 +2158,9 @@ Proceedings of the 2015 Conference of the North American Chapter of the Association for Computational Linguistics: Demonstrations N15-3 - MattGerber + MattGerber CatherineHavasi - FinleyLacatusu + FinleyLacatusu 10.3115/v1/N15-3 Association for Computational Linguistics
Denver, Colorado
@@ -2176,7 +2176,7 @@ Two Practical <fixed-case>R</fixed-case>hetorical <fixed-case>S</fixed-case>tructure <fixed-case>T</fixed-case>heory Parsers MihaiSurdeanu TomHicks - Marco AntonioValenzuela-Escárcega + Marco AntonioValenzuela-Escárcega 1–5 N15-3001 10.3115/v1/N15-3001 @@ -2207,7 +2207,7 @@ XiangminFan MuhsinMenekse JingtaoWang - DianeLitman + DianeLitman 16–20 N15-3004 10.3115/v1/N15-3004 @@ -2216,7 +2216,7 @@ <fixed-case>RE</fixed-case>xtractor: a Robust Information Extractor VincentKríž - BarboraHladká + BarboraHladká 21–25 N15-3005 10.3115/v1/N15-3005 @@ -2235,7 +2235,7 @@ <fixed-case>ICE</fixed-case>: Rapid Information Extraction Customization for <fixed-case>NLP</fixed-case> Novices YifanHe - RalphGrishman + RalphGrishman 31–35 N15-3007 10.3115/v1/N15-3007 @@ -2255,7 +2255,7 @@ YusukeOda GrahamNeubig SakrianiSakti - TomokiToda + TomokiToda SatoshiNakamura 41–45 N15-3009 @@ -2264,7 +2264,7 @@ <fixed-case>ELCO</fixed-case>3: Entity Linking with Corpus Coherence Combining Open Source Annotators - PabloRuiz + PabloRuiz ThierryPoibeau FrédériqueMélanie 46–50 @@ -2285,7 +2285,7 @@ Visualizing Deep-Syntactic Parser Output - JuanSoler-Company + JuanSoler-Company MiguelBallesteros BerndBohnet SimonMille @@ -2298,7 +2298,7 @@ <fixed-case>WOLFE</fixed-case>: An <fixed-case>NLP</fixed-case>-friendly Declarative Machine Learning Stack SameerSingh - TimRocktäschel + TimRocktäschel LukeHewitt JasonNaradowsky SebastianRiedel @@ -2337,7 +2337,7 @@ Brahmi-Net: A transliteration and script conversion system for languages of the <fixed-case>I</fixed-case>ndian subcontinent AnoopKunchukuttan RatishPuduppully - PushpakBhattacharyya + PushpakBhattacharyya 81–85 N15-3017 10.3115/v1/N15-3017 @@ -2351,7 +2351,7 @@ NicholasAndrews JayDeYoung MaxThomas - Matthew R.Gormley + Matthew R.Gormley TravisWolfe CraigHarman BenjaminVan Durme @@ -2366,7 +2366,7 @@ HubertSoyer GoranTopić PontusStenetorp - AkikoAizawa + AkikoAizawa 91–95 N15-3019 10.3115/v1/N15-3019 @@ -2375,7 +2375,7 @@ Online Readability and Text Complexity Analysis with <fixed-case>T</fixed-case>ext<fixed-case>E</fixed-case>valuator DianeNapolitano - KathleenSheehan + KathleenSheehan RobertMundkowsky 96–100 N15-3020 @@ -2395,8 +2395,8 @@ <fixed-case>W</fixed-case>rite<fixed-case>A</fixed-case>head2: Mining Lexical Grammar Patterns for Assisted Writing - JimChang - JasonChang + JimChang + JasonChang 106–110 N15-3022 10.3115/v1/N15-3022 @@ -2409,7 +2409,7 @@ ByungsooKim SangdoHan HyosupShim - Gary GeunbaeLee + Gary GeunbaeLee 111–115 N15-3023 10.3115/v1/N15-3023 @@ -2418,8 +2418,8 @@ Using Word Semantics To Assist <fixed-case>E</fixed-case>nglish as a Second Language Learners MahmoudAzab - ChrisHokamp - RadaMihalcea + ChrisHokamp + RadaMihalcea 116–120 N15-3024 10.3115/v1/N15-3024 @@ -2445,7 +2445,7 @@ Hands-on Learning to Search for Structured Prediction - HalDaumé III + HalDaumé III JohnLangford Kai-WeiChang HeHe @@ -2458,7 +2458,7 @@ Crowdsourcing for <fixed-case>NLP</fixed-case> ChrisCallison-Burch - LyleUngar + LyleUngar ElliePavlick 2–3 N15-4002 @@ -2477,7 +2477,7 @@ Deep Learning and Continuous Representations for Natural Language Processing - Wen-tauYih + Wen-tauYih XiaodongHe JianfengGao 6–8 @@ -2498,10 +2498,10 @@ Getting the Roles Right: Using <fixed-case>F</fixed-case>rame<fixed-case>N</fixed-case>et in <fixed-case>NLP</fixed-case> - Collin F.Baker + Collin F.Baker NathanSchneider - Miriam R. L.Petruck - MichaelEllsworth + Miriam R. L.Petruck + MichaelEllsworth 10–12 N15-4006 10.3115/v1/N15-4006 diff --git a/data/xml/N16.xml b/data/xml/N16.xml index f5dd1a67e9..559b567642 100644 --- a/data/xml/N16.xml +++ b/data/xml/N16.xml @@ -6,7 +6,7 @@ N16-1 KevinKnight AniNenkova - OwenRambow + OwenRambow 10.18653/v1/N16-1 Association for Computational Linguistics
San Diego, California
@@ -30,7 +30,7 @@ Flexible Non-Terminals for Dependency Tree-to-Tree Reordering JohnRichardson - FabienCromières + FabienCromières ToshiakiNakazawa SadaoKurohashi 11–19 @@ -42,7 +42,7 @@ Selecting Syntactic, Non-redundant Segments in Active Learning for Machine Translation AkivaMiura GrahamNeubig - MichaelPaul + MichaelPaul SatoshiNakamura 20–29 N16-1003 @@ -89,8 +89,8 @@ A Low-Rank Approximation Approach to Learning Joint Embeddings of News Stories and Images for Timeline Summarization William YangWang YasharMehdad - Dragomir R.Radev - AmandaStent + Dragomir R.Radev + AmandaStent 58–68 N16-1008 10.18653/v1/N16-1008 @@ -111,7 +111,7 @@ WencanLuo FeiLiu ZitaoLiu - DianeLitman + DianeLitman 80–85 N16-1010 10.18653/v1/N16-1010 @@ -130,7 +130,7 @@ Abstractive Sentence Summarization with Attentive Recurrent Neural Networks SumitChopra MichaelAuli - Alexander M.Rush + Alexander M.Rush 93–98 N16-1012 10.18653/v1/N16-1012 @@ -140,7 +140,7 @@ Integer Linear Programming for Discourse Parsing JérémyPerret StergosAfantenos - NicholasAsher + NicholasAsher MathieuMorey 99–109 N16-1013 @@ -153,7 +153,7 @@ MichelGalley ChrisBrockett JianfengGao - BillDolan + BillDolan 110–119 N16-1014 10.18653/v1/N16-1014 @@ -162,12 +162,12 @@ Multi-domain Neural Network Language Generation for Spoken Dialogue Systems Tsung-HsienWen - MilicaGašić + MilicaGašić NikolaMrkšić - Lina M.Rojas-Barahona + Lina M.Rojas-Barahona Pei-HaoSu DavidVandyke - SteveYoung + SteveYoung 120–129 N16-1015 10.18653/v1/N16-1015 @@ -198,12 +198,12 @@ NikolaMrkšić DiarmuidÓ Séaghdha BlaiseThomson - MilicaGašić - Lina M.Rojas-Barahona + MilicaGašić + Lina M.Rojas-Barahona Pei-HaoSu DavidVandyke Tsung-HsienWen - SteveYoung + SteveYoung 142–148 N16-1018 10.18653/v1/N16-1018 @@ -215,8 +215,8 @@ QiaoziGao ChangsongLiu CaimingXiong - Song-ChunZhu - Joyce Y.Chai + Song-ChunZhu + Joyce Y.Chai 149–159 N16-1019 10.18653/v1/N16-1019 @@ -235,9 +235,9 @@ Bridge Correlational Neural Networks for Multilingual Multimodal Representation Learning JanarthananRajendran - Mitesh M.Khapra + Mitesh M.Khapra SarathChandar - BalaramanRavindran + BalaramanRavindran 171–181 N16-1021 10.18653/v1/N16-1021 @@ -265,10 +265,10 @@ Recurrent Neural Network Grammars - ChrisDyer + ChrisDyer AdhigunaKuncoro MiguelBallesteros - Noah A.Smith + Noah A.Smith 199–209 N16-1024 10.18653/v1/N16-1024 @@ -288,7 +288,7 @@ <fixed-case>LSTM</fixed-case> <fixed-case>CCG</fixed-case> Parsing MikeLewis KentonLee - LukeZettlemoyer + LukeZettlemoyer 221–231 N16-1026 10.18653/v1/N16-1026 @@ -334,7 +334,7 @@ MiguelBallesteros SandeepSubramanian KazuyaKawakami - ChrisDyer + ChrisDyer 260–270 N16-1030 10.18653/v1/N16-1030 @@ -342,7 +342,7 @@ Dynamic Feature Induction: The Last Gist to the State-of-the-Art - Jinho D.Choi + Jinho D.Choi 271–281 N16-1031 10.18653/v1/N16-1031 @@ -350,9 +350,9 @@ Drop-out Conditional Random Fields for <fixed-case>T</fixed-case>witter with Huge Mined Gazetteer - EunsukYang + EunsukYang Young-BumKim - RuhiSarikaya + RuhiSarikaya Yu-SeopKim 282–288 N16-1032 @@ -362,7 +362,7 @@ Joint Extraction of Events and Entities within a Document Context BishanYang - Tom M.Mitchell + Tom M.Mitchell 289–299 N16-1033 10.18653/v1/N16-1033 @@ -372,7 +372,7 @@ Joint Event Extraction via Recurrent Neural Networks Thien HuuNguyen KyunghyunCho - RalphGrishman + RalphGrishman 300–309 N16-1034 10.18653/v1/N16-1034 @@ -390,7 +390,7 @@ Recurrent Memory Networks for Language Modeling - KeTran + KeTran AriannaBisazza ChristofMonz 321–331 @@ -401,7 +401,7 @@ A Latent Variable Recurrent Neural Network for Discourse-Driven Language Models YangfengJi - GholamrezaHaffari + GholamrezaHaffari JacobEisenstein 332–342 N16-1037 @@ -411,7 +411,7 @@ Questioning Arbitrariness in Language: a Data-Driven Study of Conventional Iconicity EkaterinaAbramova - RaquelFernández + RaquelFernández 343–352 N16-1038 10.18653/v1/N16-1038 @@ -420,7 +420,7 @@ Distinguishing Literal and Non-Literal Usage of <fixed-case>G</fixed-case>erman Particle Verbs MaximilianKöper - SabineSchulte im Walde + SabineSchulte im Walde 353–362 N16-1039 10.18653/v1/N16-1039 @@ -447,7 +447,7 @@ Grammatical error correction using neural machine translation ZhengYuan - TedBriscoe + TedBriscoe 380–386 N16-1042 10.18653/v1/N16-1042 @@ -456,9 +456,9 @@ Multimodal Semantic Learning from Child-Directed Input AngelikiLazaridou - GrzegorzChrupała - RaquelFernández - MarcoBaroni + GrzegorzChrupała + RaquelFernández + MarcoBaroni 387–392 N16-1043 10.18653/v1/N16-1043 @@ -470,8 +470,8 @@ KaishengYao HuChen DongYu - Yi-ChengPan - Mei-YuhHwang + Yi-ChengPan + Mei-YuhHwang 393–399 N16-1044 10.18653/v1/N16-1044 @@ -479,7 +479,7 @@ Expectation-Regulated Neural Model for Event Mention Extraction - Ching-YunChang + Ching-YunChang ZhiyangTeng YueZhang 400–410 @@ -492,7 +492,7 @@ LemaoLiu MasaoUtiyama AndrewFinch - EiichiroSumita + EiichiroSumita 411–416 N16-1046 10.18653/v1/N16-1046 @@ -501,7 +501,7 @@ Psycholinguistic Features for Deceptive Role Detection in Werewolf CodrutaGirlea - RoxanaGirju + RoxanaGirju EyalAmir 417–422 N16-1047 @@ -510,8 +510,8 @@ Individual Variation in the Choice of Referential Form - ThiagoCastro Ferreira - EmielKrahmer + ThiagoCastro Ferreira + EmielKrahmer SanderWubben 423–427 N16-1048 @@ -522,7 +522,7 @@ Joint Learning Templates and Slots for Event Schema Induction LeiSha SujianLi - BaobaoChang + BaobaoChang ZhifangSui 428–434 N16-1049 @@ -531,7 +531,7 @@ Inferring Psycholinguistic Properties of Words - GustavoPaetzold + GustavoPaetzold LuciaSpecia 435–440 N16-1050 @@ -554,7 +554,7 @@ Shift-Reduce <fixed-case>CCG</fixed-case> Parsing using Neural Network Models Bharat RamAmbati TejaswiniDeoskar - MarkSteedman + MarkSteedman 447–453 N16-1052 10.18653/v1/N16-1052 @@ -563,7 +563,7 @@ Online Multilingual Topic Models with Multi-Level Hyperpriors KristeKrstovski - DavidSmith + DavidSmith Michael J.Kurtz 454–459 N16-1053 @@ -592,7 +592,7 @@ Bidirectional <fixed-case>RNN</fixed-case> for Medical Event Detection in Electronic Health Records - Abhyuday NJagannatha + Abhyuday NJagannatha HongYu 473–482 N16-1056 @@ -602,7 +602,7 @@ The Sensitivity of Topic Coherence Evaluation to Topic Cardinality Jey HanLau - TimothyBaldwin + TimothyBaldwin 483–487 N16-1057 10.18653/v1/N16-1057 @@ -612,7 +612,7 @@ Transition-Based Syntactic Linearization with Lookahead Features RatishPuduppully YueZhang - ManishShrivastava + ManishShrivastava 488–493 N16-1058 10.18653/v1/N16-1058 @@ -669,7 +669,7 @@ Learning Distributed Word Representations For Bidirectional <fixed-case>LSTM</fixed-case> Recurrent Neural Network PeiluWang YaoQian - Frank K.Soong + Frank K.Soong LeiHe HaiZhao 527–533 @@ -682,7 +682,7 @@ Ngoc ThangVu HeikeAdel PankajGupta - HinrichSchütze + HinrichSchütze 534–539 N16-1065 10.18653/v1/N16-1065 @@ -690,7 +690,7 @@ Building <fixed-case>C</fixed-case>hinese Affective Resources in Valence-Arousal Dimensions - Liang-ChihYu + Liang-ChihYu Lung-HaoLee ShuaiHao JinWang @@ -716,7 +716,7 @@ Structured Prediction with Output Embeddings for Semantic Image Annotation AriadnaQuattoni ArnauRamisa - Pranava SwaroopMadhyastha + Pranava SwaroopMadhyastha EdgarSimo-Serra FrancescMoreno-Noguer 552–557 @@ -782,7 +782,7 @@ Unsupervised Compound Splitting With Distributional Semantics Rivals Supervised Methods MartinRiedl - ChrisBiemann + ChrisBiemann 617–622 N16-1075 10.18653/v1/N16-1075 @@ -792,7 +792,7 @@ Weighting Finite-State Transductions With Neural Context PushpendreRastogi RyanCotterell - JasonEisner + JasonEisner 623–633 N16-1076 10.18653/v1/N16-1076 @@ -803,7 +803,7 @@ ManaalFaruqui YuliaTsvetkov GrahamNeubig - ChrisDyer + ChrisDyer 634–643 N16-1077 10.18653/v1/N16-1077 @@ -812,7 +812,7 @@ Towards Unsupervised and Language-independent Compound Splitting using Inflectional Morphological Transformations PatrickZiering - Lonnekevan der Plas + Lonnekevan der Plas 644–653 N16-1078 10.18653/v1/N16-1078 @@ -821,8 +821,8 @@ Phonological Pun-derstanding AaronJaech - RikKoncel-Kedziorski - MariOstendorf + RikKoncel-Kedziorski + MariOstendorf 654–663 N16-1079 10.18653/v1/N16-1079 @@ -832,7 +832,7 @@ A Joint Model of Orthography and Morphological Segmentation RyanCotterell TimVieira - HinrichSchütze + HinrichSchütze 664–669 N16-1080 10.18653/v1/N16-1080 @@ -853,8 +853,8 @@ Visualizing and Understanding Neural Models in <fixed-case>NLP</fixed-case> JiweiLi XinleiChen - EduardHovy - DanJurafsky + EduardHovy + DanJurafsky 681–691 N16-1082 10.18653/v1/N16-1082 @@ -871,9 +871,9 @@ Joint Learning with Global Inference for Comment Classification in Community Question Answering - ShafiqJoty - LluísMàrquez - PreslavNakov + ShafiqJoty + LluísMàrquez + PreslavNakov 703–713 N16-1084 10.18653/v1/N16-1084 @@ -904,9 +904,9 @@ Generation from <fixed-case>A</fixed-case>bstract <fixed-case>M</fixed-case>eaning <fixed-case>R</fixed-case>epresentation using Tree Transducers JeffreyFlanigan - ChrisDyer - Noah A.Smith - JaimeCarbonell + ChrisDyer + Noah A.Smith + JaimeCarbonell 731–739 N16-1087 10.18653/v1/N16-1087 @@ -933,9 +933,9 @@ Inter-document Contextual Language model - Quan HungTran + Quan HungTran IngridZukerman - GholamrezaHaffari + GholamrezaHaffari 762–766 N16-1090 10.18653/v1/N16-1090 @@ -945,7 +945,7 @@ Ultradense Word Embeddings by Orthogonal Transformation SaschaRothe SebastianEbert - HinrichSchütze + HinrichSchütze 767–777 N16-1091 10.18653/v1/N16-1091 @@ -985,7 +985,7 @@ Capturing Reliable Fine-Grained Sentiment Associations by Crowdsourcing and Best–Worst Scaling SvetlanaKiritchenko - Saif M.Mohammad + Saif M.Mohammad 811–817 N16-1095 10.18653/v1/N16-1095 @@ -993,8 +993,8 @@ Mapping Verbs in Different Languages to Knowledge Base Relations using Web Text as Interlingua - Derry TantiWijaya - Tom M.Mitchell + Derry TantiWijaya + Tom M.Mitchell 818–827 N16-1096 10.18653/v1/N16-1096 @@ -1004,7 +1004,7 @@ Comparing Convolutional Neural Networks to Traditional Models for Slot Filling HeikeAdel BenjaminRoth - HinrichSchütze + HinrichSchütze 828–838 N16-1097 10.18653/v1/N16-1097 @@ -1013,13 +1013,13 @@ A Corpus and Cloze Evaluation for Deeper Understanding of Commonsense Stories NasrinMostafazadeh - NathanaelChambers + NathanaelChambers XiaodongHe DeviParikh DhruvBatra LucyVanderwende PushmeetKohli - JamesAllen + JamesAllen 839–849 N16-1098 10.18653/v1/N16-1098 @@ -1029,7 +1029,7 @@ Dynamic Entity Representation with Max-pooling Improves Machine Reading SosukeKobayashi RanTian - NaoakiOkazaki + NaoakiOkazaki KentaroInui 850–855 N16-1099 @@ -1038,8 +1038,8 @@ Speed-Constrained Tuning for Statistical Machine Translation Using <fixed-case>B</fixed-case>ayesian Optimization - DanielBeck - Adriàde Gispert + DanielBeck + Adriàde Gispert GonzaloIglesias AurelienWaite BillByrne @@ -1060,12 +1060,12 @@ Incorporating Structural Alignment Biases into an Attentional Neural Translation Model - TrevorCohn + TrevorCohn Cong Duy VuHoang EkaterinaVymolova KaishengYao - ChrisDyer - GholamrezaHaffari + ChrisDyer + GholamrezaHaffari 876–885 N16-1102 10.18653/v1/N16-1102 @@ -1086,11 +1086,11 @@ Effective Crowd Annotation for Relation Extraction AngliLiu - StephenSoderland + StephenSoderland JonathanBragg Christopher H.Lin XiaoLing - Daniel S.Weld + Daniel S.Weld 897–906 N16-1104 10.18653/v1/N16-1104 @@ -1101,7 +1101,7 @@ Hee-GeunYoon Hyun-JeSong Seong-BaePark - Se-YoungPark + Se-YoungPark 907–916 N16-1105 10.18653/v1/N16-1105 @@ -1119,7 +1119,7 @@ <fixed-case>B</fixed-case>ayesian Supervised Domain Adaptation for Short Text Similarity - Md ArafatSultan + Md ArafatSultan JordanBoyd-Graber TamaraSumner 927–936 @@ -1138,11 +1138,11 @@ An Attentional Model for Speech Translation Without Transcription - LongDuong + LongDuong AntoniosAnastasopoulos DavidChiang StevenBird - TrevorCohn + TrevorCohn 949–959 N16-1109 10.18653/v1/N16-1109 @@ -1150,9 +1150,9 @@ Information Density and Quality Estimation Features as Translationese Indicators for Human Translation Classification - RaphaelRubino + RaphaelRubino EkaterinaLapshinova-Koltunski - Josefvan Genabith + Josefvan Genabith 960–970 N16-1110 10.18653/v1/N16-1110 @@ -1162,7 +1162,7 @@ Interpretese vs. Translationese: The Uniqueness of Human Strategies in Simultaneous Interpretation HeHe JordanBoyd-Graber - HalDaumé III + HalDaumé III 971–976 N16-1111 10.18653/v1/N16-1111 @@ -1194,8 +1194,8 @@ Learning Global Features for Coreference Resolution SamWiseman - Alexander M.Rush - Stuart M.Shieber + Alexander M.Rush + Stuart M.Shieber 994–1004 N16-1114 10.18653/v1/N16-1114 @@ -1203,7 +1203,7 @@ Search Space Pruning: A Simple Solution for Better Coreference Resolvers - Nafise SadatMoosavi + Nafise SadatMoosavi MichaelStrube 1005–1011 N16-1115 @@ -1214,7 +1214,7 @@ Unsupervised Ranking Model for Entity Coreference Resolution XuezheMa ZhengzhongLiu - EduardHovy + EduardHovy 1012–1018 N16-1116 10.18653/v1/N16-1116 @@ -1225,7 +1225,7 @@ MoYu MarkDredze RamanArora - Matthew R.Gormley + Matthew R.Gormley 1019–1029 N16-1117 10.18653/v1/N16-1117 @@ -1258,7 +1258,7 @@ Assessing Relative Sentence Complexity using an Incremental <fixed-case>CCG</fixed-case> Parser Bharat RamAmbati SivaReddy - MarkSteedman + MarkSteedman 1051–1057 N16-1120 10.18653/v1/N16-1120 @@ -1287,7 +1287,7 @@ Fast and Easy Short Answer Grading with High Accuracy - Md ArafatSultan + Md ArafatSultan CristobalSalazar TamaraSumner 1070–1075 @@ -1299,7 +1299,7 @@ Interlocking Phrases in Phrase-based Statistical Machine Translation YeKyaw Thu AndrewFinch - EiichiroSumita + EiichiroSumita 1076–1081 N16-1124 10.18653/v1/N16-1124 @@ -1308,12 +1308,12 @@ Eyes Don’t Lie: Predicting Machine Translation Quality Using Eye Movement HassanSajjad - FranciscoGuzmán + FranciscoGuzmán NadirDurrani AhmedAbdelali HoudaBouamor IrinaTemnikova - StephanVogel + StephanVogel 1082–1088 N16-1125 10.18653/v1/N16-1125 @@ -1331,8 +1331,8 @@ Deep Lexical Segmentation and Syntactic Parsing in the Easy-First Dependency Framework - MatthieuConstant - JosephLe Roux + MatthieuConstant + JosephLe Roux NadiTomeh 1095–1101 N16-1127 @@ -1342,7 +1342,7 @@ Sentiment Composition of Words with Opposing Polarities SvetlanaKiritchenko - Saif M.Mohammad + Saif M.Mohammad 1102–1108 N16-1128 10.18653/v1/N16-1128 @@ -1361,7 +1361,7 @@ Learning a <fixed-case>POS</fixed-case> tagger for <fixed-case>AAVE</fixed-case>-like language AnnaJørgensen DirkHovy - AndersSøgaard + AndersSøgaard 1115–1120 N16-1130 10.18653/v1/N16-1130 @@ -1379,7 +1379,7 @@ Bootstrapping Translation Detection and Sentence Extraction from Comparable Corpora KristeKrstovski - DavidSmith + DavidSmith 1127–1132 N16-1132 10.18653/v1/N16-1132 @@ -1388,7 +1388,7 @@ Discriminative Reranking for Grammatical Error Correction with Statistical Machine Translation TomoyaMizumoto - YujiMatsumoto + YujiMatsumoto 1133–1138 N16-1133 10.18653/v1/N16-1133 @@ -1397,7 +1397,7 @@ Patterns of Wisdom: Discourse-Level Style in Multi-Sentence Quotations KyleBooten - Marti A.Hearst + Marti A.Hearst 1139–1144 N16-1134 10.18653/v1/N16-1134 @@ -1414,7 +1414,7 @@ <fixed-case>MAWPS</fixed-case>: A Math Word Problem Repository - RikKoncel-Kedziorski + RikKoncel-Kedziorski SubhroRoy AidaAmini NateKushman @@ -1446,7 +1446,7 @@ <fixed-case>BIRA</fixed-case>: Improved Predictive Exchange Word Clustering JonDehdari LilingTan - Josefvan Genabith + Josefvan Genabith 1169–1174 N16-1139 10.18653/v1/N16-1139 @@ -1482,13 +1482,13 @@ Automatic Prediction of Linguistic Decline in Writings of Subjects with Degenerative Dementia - DavyWeissenbacher + DavyWeissenbacher Travis A.Johnson LauraWojtulewicz AmylouDueck DonaLocke RichardCaselli - GracielaGonzalez + GracielaGonzalez 1198–1207 N16-1143 10.18653/v1/N16-1143 @@ -1496,9 +1496,9 @@ Consensus Maximization Fusion of Probabilistic Information Extractors - MiguelRodríguez + MiguelRodríguez SeanGoldberg - Daisy ZheWang + Daisy ZheWang 1208–1216 N16-1144 10.18653/v1/N16-1144 @@ -1518,8 +1518,8 @@ Automatically Inferring Implicit Properties in Similes AshequlQadir - EllenRiloff - Marilyn A.Walker + EllenRiloff + Marilyn A.Walker 1223–1232 N16-1146 10.18653/v1/N16-1146 @@ -1527,7 +1527,7 @@ Visual Storytelling - Ting-Hao KennethHuang + Ting-Hao KennethHuang FrancisFerraro NasrinMostafazadeh IshanMisra @@ -1554,8 +1554,8 @@ ShanboCheng ShujianHuang HuadongChen - Xin-YuDai - JiajunChen + Xin-YuDai + JiajunChen 1240–1249 N16-1148 10.18653/v1/N16-1148 @@ -1564,8 +1564,8 @@ Incorporating Side Information into Recurrent Neural Network Language Models Cong Duy VuHoang - TrevorCohn - GholamrezaHaffari + TrevorCohn + GholamrezaHaffari 1250–1255 N16-1149 10.18653/v1/N16-1149 @@ -1583,7 +1583,7 @@ <tex-math>K</tex-math>-Embeddings: Learning Conceptual Embeddings for Words using Context - ThuyVu + ThuyVu D. StottParker 1262–1267 N16-1151 @@ -1608,7 +1608,7 @@ TommiJaakkola KaterynaTymoshenko AlessandroMoschitti - LluísMàrquez + LluísMàrquez 1279–1289 N16-1153 10.18653/v1/N16-1153 @@ -1625,10 +1625,10 @@ Multilingual Language Processing From Bytes - DanGillick + DanGillick CliffBrunk OriolVinyals - AmarnagSubramanya + AmarnagSubramanya 1296–1306 N16-1155 10.18653/v1/N16-1155 @@ -1668,9 +1668,9 @@ SakshiGupta RaveeshMotlani PiyushBansal - ManishShrivastava + ManishShrivastava RadhikaMamidi - Dipti M.Sharma + Dipti M.Sharma 1340–1345 N16-1159 10.18653/v1/N16-1159 @@ -1678,9 +1678,9 @@ Bilingual Learning of Multi-sense Embeddings with Discrete Autoencoders - SimonŠuster + SimonŠuster IvanTitov - Gertjanvan Noord + Gertjanvan Noord 1346–1356 N16-1160 10.18653/v1/N16-1160 @@ -1693,10 +1693,10 @@ ManaalFaruqui GuillaumeLample PatrickLittell - DavidMortensen - Alan WBlack - LoriLevin - ChrisDyer + DavidMortensen + Alan WBlack + LoriLevin + ChrisDyer 1357–1366 N16-1161 10.18653/v1/N16-1161 @@ -1733,7 +1733,7 @@ Cross-Domain Mining of Argumentative Text through Distant Supervision - KhalidAl-Khatib + KhalidAl-Khatib HenningWachsmuth MatthiasHagen JonasKöhler @@ -1745,7 +1745,7 @@ A Study of the Impact of Persuasive Argumentation in Political Debates - Amparo ElizabethCano-Basave + Amparo ElizabethCano-Basave YulanHe 1405–1413 N16-1166 @@ -1764,7 +1764,7 @@ Using Context to Predict the Purpose of Argumentative Writing Revisions FanZhang - DianeLitman + DianeLitman 1424–1430 N16-1168 10.18653/v1/N16-1168 @@ -1793,7 +1793,7 @@ AshequlQadir MichaelGamon PatrickPantel - Ahmed HassanAwadallah + Ahmed HassanAwadallah 1452–1462 N16-1171 10.18653/v1/N16-1171 @@ -1821,10 +1821,10 @@ Hierarchical Attention Networks for Document Classification ZichaoYang DiyiYang - ChrisDyer + ChrisDyer XiaodongHe AlexSmola - EduardHovy + EduardHovy 1480–1489 N16-1174 10.18653/v1/N16-1174 @@ -1854,7 +1854,7 @@ Dependency Sensitive Convolutional Neural Networks for Modeling Sentences and Documents RuiZhang HonglakLee - Dragomir R.Radev + Dragomir R.Radev 1512–1521 N16-1177 10.18653/v1/N16-1177 @@ -1864,7 +1864,7 @@ <fixed-case>MGNC</fixed-case>-<fixed-case>CNN</fixed-case>: A Simple Approach to Exploiting Multiple Word Embeddings for Sentence Classification YeZhang StephenRoller - Byron C.Wallace + Byron C.Wallace 1522–1527 N16-1178 10.18653/v1/N16-1178 @@ -1874,7 +1874,7 @@ Improving sentence compression by learning to predict gaze SigridKlerke YoavGoldberg - AndersSøgaard + AndersSøgaard 1528–1533 N16-1179 10.18653/v1/N16-1179 @@ -1886,7 +1886,7 @@ AnupamGuha SnigdhaChaturvedi JordanBoyd-Graber - HalDaumé III + HalDaumé III 1534–1544 N16-1180 10.18653/v1/N16-1180 @@ -1961,9 +1961,9 @@ Combining syntactic patterns and <fixed-case>W</fixed-case>ikipedia’s hierarchy of hyperlinks to extract meronym relations - Debela TesfayeGemechu + Debela TesfayeGemechu MichaelZock - SolomonTeferra + SolomonTeferra 29–36 N16-2005 10.18653/v1/N16-2005 @@ -2004,7 +2004,7 @@ FrancescaDelogu ClaytonGreenberg MindaugasMozuraitis - MatthewCrocker + MatthewCrocker 59–65 N16-2009 10.18653/v1/N16-2009 @@ -2013,7 +2013,7 @@ Explicit Argument Identification for Discourse Parsing In <fixed-case>H</fixed-case>indi: A Hybrid Pipeline RohitJain - DiptiSharma + DiptiSharma 66–72 N16-2010 10.18653/v1/N16-2010 @@ -2022,7 +2022,7 @@ Exploring Fine-Grained Emotion Detection in Tweets Jasy Suet YanLiew - Howard R.Turtle + Howard R.Turtle 73–80 N16-2011 10.18653/v1/N16-2011 @@ -2040,7 +2040,7 @@ Hateful Symbols or Hateful People? Predictive Features for Hate Speech Detection on <fixed-case>T</fixed-case>witter - ZeerakWaseem + ZeerakWaseem DirkHovy 88–93 N16-2013 @@ -2049,11 +2049,11 @@ Non-decreasing Sub-modular Function for Comprehensible Summarization - LittonJ Kurisinkel + LittonJ Kurisinkel PruthwikMishra VigneshwaranMuralidaran VasudevaVarma - DiptiMisra Sharma + DiptiMisra Sharma 94–101 N16-2014 10.18653/v1/N16-2014 @@ -2062,7 +2062,7 @@ Phylogenetic simulations over constraint-based grammar formalisms AndrewLamont - JonathanWashington + JonathanWashington 102–108 N16-2015 10.18653/v1/N16-2015 @@ -2092,7 +2092,7 @@ Proceedings of the 2016 Conference of the North American Chapter of the Association for Computational Linguistics: Demonstrations N16-3 JohnDeNero - MarkFinlayson + MarkFinlayson SravanaReddy 10.18653/v1/N16-3 Association for Computational Linguistics @@ -2117,7 +2117,7 @@ Instant Feedback for Increasing the Presence of Solutions in Peer Reviews HuyNguyen WentingXiong - DianeLitman + DianeLitman 6–10 N16-3002 10.18653/v1/N16-3002 @@ -2138,7 +2138,7 @@ i<fixed-case>A</fixed-case>ppraise: A Manual Machine Translation Evaluation Environment Supporting Eye-tracking AhmedAbdelali NadirDurrani - FranciscoGuzmán + FranciscoGuzmán 17–21 N16-3004 10.18653/v1/N16-3004 @@ -2172,7 +2172,7 @@ KallirroiGeorgila AntonLeuski AriShapiro - DavidTraum + DavidTraum 32–36 N16-3007 10.18653/v1/N16-3007 @@ -2182,7 +2182,7 @@ <fixed-case>A</fixed-case>rg<fixed-case>R</fixed-case>ewrite: A Web-based Revision Assistant for Argumentative Writings FanZhang RebeccaHwa - DianeLitman + DianeLitman Homa B.Hashemi 37–41 N16-3008 @@ -2193,7 +2193,7 @@ Scaling Up Word Clustering JonDehdari LilingTan - Josefvan Genabith + Josefvan Genabith 42–46 N16-3009 10.18653/v1/N16-3009 @@ -2201,7 +2201,7 @@ Task Completion Platform: A self-serve multi-domain goal oriented dialogue platform - PaulCrook + PaulCrook AlexMarin VipulAgarwal KhushbooAggarwal @@ -2223,7 +2223,7 @@ Jean-PhillipeRobichaud AlexandreRochette LoganStromberg - RuhiSarikaya + RuhiSarikaya 47–51 N16-3010 10.18653/v1/N16-3010 @@ -2241,7 +2241,7 @@ <fixed-case>L</fixed-case>ingo<fixed-case>T</fixed-case>urk: managing crowdsourced tasks for psycholinguistics FlorianPusse - AsadSayeed + AsadSayeed VeraDemberg 57–61 N16-3012 @@ -2274,7 +2274,7 @@ Cross-media Event Extraction and Recommendation DiLu - ClareVoss + ClareVoss FangboTao XiangRen RachelGuan @@ -2284,10 +2284,10 @@ HongzhiLi TaylorCassidy HengJi - Shih-fuChang + Shih-fuChang JiaweiHan WilliamWallace - JamesHendler + JamesHendler MeiSi LanceKaplan 72–76 @@ -2298,7 +2298,7 @@ <fixed-case>SODA</fixed-case>:Service Oriented Domain Adaptation Architecture for Microblog Categorization Himanshu SharadBhatt - SandipanDandapat + SandipanDandapat PeddamuthuBalaji ShouryaRoy SharmisthaJat @@ -2319,8 +2319,8 @@ KevinKilgour MatthiasSperber MohammedMediani - SebastianStüker - AlexWaibel + SebastianStüker + AlexWaibel 82–86 N16-3017 10.18653/v1/N16-3017 @@ -2342,10 +2342,10 @@ <fixed-case>K</fixed-case>athaa: A Visual Programming Framework for <fixed-case>NLP</fixed-case> Applications - Sharada PrasannaMohanty + Sharada PrasannaMohanty Nehal JWani - ManishSrivastava - Dipti MisraSharma + ManishSrivastava + Dipti MisraSharma 92–96 N16-3019 10.18653/v1/N16-3019 @@ -2353,7 +2353,7 @@ “Why Should <fixed-case>I</fixed-case> Trust You?”: Explaining the Predictions of Any Classifier - MarcoRibeiro + MarcoRibeiro SameerSingh CarlosGuestrin 97–101 @@ -2367,7 +2367,7 @@ Proceedings of the 2016 Conference of the North American Chapter of the Association for Computational Linguistics: Tutorial Abstracts N16-4 MohitBansal - Alexander M.Rush + Alexander M.Rush 10.18653/v1/N16-4 Association for Computational Linguistics
San Diego, California
@@ -2381,8 +2381,8 @@ <fixed-case>E</fixed-case>nglish <fixed-case>R</fixed-case>esource <fixed-case>S</fixed-case>emantics - DanFlickinger - Emily M.Bender + DanFlickinger + Emily M.Bender WoodleyPackard 1–5 N16-4001 @@ -2391,7 +2391,7 @@ Multilingual Multimodal Language Processing Using Neural Networks - Mitesh MKhapra + Mitesh MKhapra SarathChandar 6–7 N16-4002 @@ -2400,7 +2400,7 @@ Question Answering with Knowledge Base, Web and Beyond - Wen-tauYih + Wen-tauYih HaoMa 8–10 N16-4003 @@ -2420,7 +2420,7 @@ Scalable Statistical Relational Learning for <fixed-case>NLP</fixed-case> William YangWang - WilliamCohen + WilliamCohen 14–16 N16-4005 10.18653/v1/N16-4005 @@ -2428,8 +2428,8 @@ Statistical Machine Translation between Related Languages - PushpakBhattacharyya - Mitesh M.Khapra + PushpakBhattacharyya + Mitesh M.Khapra AnoopKunchukuttan 17–20 N16-4006 diff --git a/data/xml/N18.xml b/data/xml/N18.xml index 9422c64272..ff9da97de4 100644 --- a/data/xml/N18.xml +++ b/data/xml/N18.xml @@ -4,9 +4,9 @@ Proceedings of the 2018 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, Volume 1 (Long Papers) N18-1 - MarilynWalker + MarilynWalker HengJi - AmandaStent + AmandaStent 10.18653/v1/N18-1 Association for Computational Linguistics
New Orleans, Louisiana
@@ -24,7 +24,7 @@ YanruQu LihengChen JianShen - WeinanZhang + WeinanZhang ShaodianZhang YimeiGao GenGu @@ -52,7 +52,7 @@ Joint Bootstrapping Machines for High Confidence Relation Extraction PankajGupta BenjaminRoth - HinrichSchütze + HinrichSchütze 26–36 Semi-supervised bootstrapping techniques for relationship extraction from text iteratively expand a set of initial seed instances. Due to the lack of labeled data, a key challenge in bootstrapping is semantic drift: if a false positive instance is added during an iteration, then all following iterations are contaminated. We introduce BREX, a new bootstrapping method that protects against such contamination by highly effective confidence assessment. This is achieved by using entity and template seeds jointly (as opposed to just one as in previous work), by expanding entities and templates in parallel and in a mutually constraining fashion in each iteration and by introducing higherquality similarity measures for templates. Experimental results show that BREX achieves an F1 that is 0.13 (0.87 vs. 0.74) better than the state of the art for four relationships. N18-1003 @@ -63,7 +63,7 @@ A Deep Generative Model of Vowel Formant Typology RyanCotterell - JasonEisner + JasonEisner 37–46 What makes some types of languages more probable than others? For instance, we know that almost all spoken languages contain the vowel phoneme /i/; why should that be? The field of linguistic typology seeks to answer these questions and, thereby, divine the mechanisms that underlie human language. In our work, we tackle the problem of vowel system typology, i.e., we propose a generative probability model of which vowels a language contains. In contrast to previous work, we work directly with the acoustic information—the first two formant values—rather than modeling discrete sets of symbols from the international phonetic alphabet. We develop a novel generative probability model and report results on over 200 languages. N18-1004 @@ -73,10 +73,10 @@ Fortification of Neural Morphological Segmentation Models for Polysynthetic Minimal-Resource Languages - KatharinaKann + KatharinaKann Jesus ManuelMager Hois - Ivan VladimirMeza-Ruiz - HinrichSchütze + Ivan VladimirMeza-Ruiz + HinrichSchütze 47–57 Morphological segmentation for polysynthetic languages is challenging, because a word may consist of many individual morphemes and training data can be extremely scarce. Since neural sequence-to-sequence (seq2seq) models define the state of the art for morphological segmentation in high-resource settings and for (mostly) European languages, we first show that they also obtain competitive performance for Mexican polysynthetic languages in minimal-resource settings. We then propose two novel multi-task training approaches—one with, one without need for external unlabeled resources—, and two corresponding data augmentation methods, improving over the neural baseline for all languages. Finally, we explore cross-lingual transfer as a third way to fortify our neural model and show that we can train one single multi-lingual model for related languages while maintaining comparable or even improved performance, thus reducing the amount of parameters by close to 75%. We provide our morphological segmentation datasets for Mexicanero, Nahuatl, Wixarika and Yorem Nokki for future research. N18-1005 @@ -103,7 +103,7 @@ MohitBansal KevinGimpel KarenLivescu - MariOstendorf + MariOstendorf 69–81 In conversational speech, the acoustic signal provides cues that help listeners disambiguate difficult parses. For automatically parsing spoken utterances, we introduce a model that integrates transcribed text and acoustic-prosodic features using a convolutional neural network over energy and pitch trajectories coupled with an attention-based recurrent neural network that accepts text and prosodic features. We find that different types of acoustic-prosodic features are individually helpful, and together give statistically significant improvements in parse and disfluency detection F1 scores over a strong text-only baseline. For this study with known sentence boundaries, error analyses show that the main benefit of acoustic-prosodic features is in sentences with disfluencies, attachment decisions are most improved, and transcription errors obscure gains from prosody. N18-1007 @@ -139,7 +139,7 @@ ShivaniPoddar ByungsooJeon QinlanShen - CarolynRosé + CarolynRosé GrahamNeubig 103–116 We present a neural architecture for modeling argumentative dialogue that explicitly models the interplay between an Opinion Holder’s (OH’s) reasoning and a challenger’s argument, with the goal of predicting if the argument successfully changes the OH’s view. The model has two components: (1) vulnerable region detection, an attention model that identifies parts of the OH’s reasoning that are amenable to change, and (2) interaction encoding, which identifies the relationship between the content of the OH’s reasoning and that of the challenger’s argument. Based on evaluation on discussions from the Change My View forum on Reddit, the two components work together to predict an OH’s change in view, outperforming several baselines. A posthoc analysis suggests that sentences picked out by the attention model are addressed more frequently by successful arguments than by unsuccessful ones. @@ -150,7 +150,7 @@ Automatic Focus Annotation: Bringing Formal Pragmatics Alive in Analyzing the Information Structure of Authentic Data RamonZiai - DetmarMeurers + DetmarMeurers 117–128 Analyzing language in context, both from a theoretical and from a computational perspective, is receiving increased interest. Complementing the research in linguistics on discourse and information structure, in computational linguistics identifying discourse concepts was also shown to improve the performance of certain applications, for example, Short Answer Assessment systems (Ziai and Meurers, 2014). Building on the research that established detailed annotation guidelines for manual annotation of information structural concepts for written (Dipper et al., 2007; Ziai and Meurers, 2014) and spoken language data (Calhoun et al., 2010), this paper presents the first approach automating the analysis of focus in authentic written data. Our classification approach combines a range of lexical, syntactic, and semantic features to achieve an accuracy of 78.1% for identifying focus. N18-1011 @@ -160,7 +160,7 @@ Dear Sir or Madam, May <fixed-case>I</fixed-case> Introduce the <fixed-case>GYAFC</fixed-case> Dataset: Corpus, Benchmarks and Metrics for Formality Style Transfer SudhaRao - JoelTetreault + JoelTetreault 129–140 Style transfer is the task of automatically transforming a piece of text in one particular style into another. A major barrier to progress in this field has been a lack of training and evaluation datasets, as well as benchmarks and automatic metrics. In this work, we create the largest corpus for a particular stylistic transfer (formality) and show that techniques from the machine translation community can serve as strong baselines for future work. We also discuss challenges of using automatic metrics. N18-1012.Notes.pdf @@ -259,7 +259,7 @@ Zero-Shot Question Generation from Knowledge Graphs for Unseen Predicates and Entity Types HadyElsahar ChristopheGravier - FrederiqueLaforest + FrederiqueLaforest 218–228 We present a neural model for question generation from knowledge graphs triples in a “Zero-shot” setup, that is generating questions for predicate, subject types or object types that were not seen at training time. Our model leverages triples occurrences in the natural language corpus in a encoder-decoder architecture, paired with an original part-of-speech copy action mechanism to generate questions. Benchmark and human evaluation show that our model outperforms state-of-the-art on this task. N18-1020 @@ -308,7 +308,7 @@ Neural Automated Essay Scoring and Coherence Modeling for Adversarially Crafted Input YoumnaFarag HelenYannakoudakis - TedBriscoe + TedBriscoe 263–271 We demonstrate that current state-of-the-art approaches to Automated Essay Scoring (AES) are not well-suited to capturing adversarially crafted input of grammatical but incoherent sequences of sentences. We develop a neural model of local coherence that can effectively learn connectedness features between sentences, and propose a framework for integrating and jointly training the local coherence model with a state-of-the-art AES model. We evaluate our approach against a number of baselines and experimentally demonstrate its effectiveness on both the AES task and the task of flagging adversarial input, further contributing to the development of an approach that strengthens the validity of neural essay scoring models. N18-1024 @@ -339,7 +339,7 @@ Zero-Shot Sequence Labeling: Transferring Knowledge from Sentences to Tokens MarekRei - AndersSøgaard + AndersSøgaard 293–302 Can attention- or gradient-based visualization techniques be used to infer token-level labels for binary sequence tagging problems, using networks trained only on sentence-level labels? We construct a neural network architecture based on soft attention, train it as a binary sentence classifier and evaluate against token-level annotation on four different datasets. Inferring token labels from a network provides a method for quantitatively evaluating what the model is learning, along with generating useful feedback in assistance systems. Our results indicate that attention-based methods are able to predict token-level labels more accurately, compared to gradient-based methods, sometimes even rivaling the supervised oracle network. N18-1027 @@ -400,7 +400,7 @@ Improving Lexical Choice in Neural Machine Translation - ToanNguyen + ToanNguyen DavidChiang 334–343 We explore two solutions to the problem of mistranslating rare words in neural machine translation. First, we argue that the standard output layer, which computes the inner product of a vector representing the context with all possible output word embeddings, rewards frequent words disproportionately, and we propose to fix the norms of both vectors to a constant value. Second, we integrate a simple lexical module which is jointly trained with the rest of the model. We evaluate our approaches on eight language pairs with data sizes ranging from 100k to 8M words, and achieve improvements of up to +4.3 BLEU, surpassing phrase-based translation in nearly all settings. @@ -412,9 +412,9 @@ Universal Neural Machine Translation for Extremely Low Resource Languages JiataoGu - HanyHassan + HanyHassan JacobDevlin - Victor O.K.Li + Victor O.K.Li 344–354 In this paper, we propose a new universal machine translation approach focusing on languages with a limited amount of parallel data. Our proposed approach utilizes a transfer-learning approach to share lexical and sentence level representations across multiple source languages into one target language. The lexical part is shared through a Universal Lexical Representation to support multi-lingual word-level sharing. The sentence-level sharing is represented by a model of experts from all source languages that share the source encoders with all other languages. This enables the low-resource language to utilize the lexical and sentence representations of the higher resource languages. Our approach is able to achieve 23 BLEU on Romanian-English WMT2016 using a tiny parallel corpus of 6k sentences, compared to the 18 BLEU of strong baseline system which uses multi-lingual training and back-translation. Furthermore, we show that the proposed approach can achieve almost 20 BLEU on the same dataset through fine-tuning a pre-trained multi-lingual system in a zero-shot setting. N18-1032 @@ -454,7 +454,7 @@ LuWang NicholasBeauchamp SarahShugars - Kam-FaiWong + Kam-FaiWong 375–385 Millions of conversations are generated every day on social media platforms. With limited attention, it is challenging for users to select which discussions they would like to participate in. Here we propose a new method for microblog conversation recommendation. While much prior work has focused on post-level recommendation, we exploit both the conversational context, and user content and behavior preferences. We propose a statistical model that jointly captures: (1) topics for representing user interests and conversation content, and (2) discourse modes for describing user replying behavior and conversation dynamics. Experimental results on two Twitter datasets demonstrate that our system outperforms methods that only model content without considering discourse. N18-1035 @@ -505,8 +505,8 @@ Comparatives, Quantifiers, Proportions: a Multi-Task Model for the Learning of Quantities from Vision SandroPezzelle - Ionut-TeodorSorodoc - RaffaellaBernardi + Ionut-TeodorSorodoc + RaffaellaBernardi 419–430 The present work investigates whether different quantification mechanisms (set comparison, vague quantification, and proportional estimation) can be jointly learned from visual scenes by a multi-task computational model. The motivation is that, in humans, these processes underlie the same cognitive, non-symbolic ability, which allows an automatic estimation and comparison of set magnitudes. We show that when information about lower-complexity tasks is available, the higher-level proportional task becomes more accurate than when performed in isolation. Moreover, the multi-task model is able to generalize to unseen combinations of target/non-target objects. Consistently with behavioral evidence showing the interference of absolute number in the proportional task, the multi-task model no longer works when asked to provide the number of target objects in the scene. N18-1039 @@ -531,7 +531,7 @@ <fixed-case>A</fixed-case>bstract <fixed-case>M</fixed-case>eaning <fixed-case>R</fixed-case>epresentation for Paraphrase Detection FuadIssa MarcoDamonte - Shay B.Cohen + Shay B.Cohen XiaohuiYan YiChang 442–452 @@ -545,7 +545,7 @@ FabioPetroni VassilisPlachouras TimothyNugent - Jochen L.Leidner + Jochen L.Leidner 453–462 The widespread use of word embeddings is associated with the recent successes of many natural language processing (NLP) systems. The key approach of popular models such as word2vec and GloVe is to learn dense vector representations from the context of words. More recently, other approaches have been proposed that incorporate different types of contextual information, including topics, dependency relations, n-grams, and sentiment. However, these models typically integrate only limited additional contextual information, and often in ad hoc ways. In this work, we introduce attr2vec, a novel framework for jointly learning embeddings for words and contextual attributes based on factorization machines. We perform experiments with different types of contextual information. Our experimental results on a text classification task demonstrate that using attr2vec to jointly learn embeddings for words and Part-of-Speech (POS) tags improves results compared to learning the embeddings independently. Moreover, we use attr2vec to train dependency-based embeddings and we show that they exhibit higher similarity between functionally related words compared to traditional approaches. N18-1042 @@ -671,7 +671,7 @@ PalaashSawant SukantaSen AsifEkbal - PushpakBhattacharyya + PushpakBhattacharyya 572–582 Efficient word representations play an important role in solving various problems related to Natural Language Processing (NLP), data mining, text mining etc. The issue of data sparsity poses a great challenge in creating efficient word representation model for solving the underlying problem. The problem is more intensified in resource-poor scenario due to the absence of sufficient amount of corpus. In this work we propose to minimize the effect of data sparsity by leveraging bilingual word embeddings learned through a parallel corpus. We train and evaluate Long Short Term Memory (LSTM) based architecture for aspect level sentiment classification. The neural network architecture is further assisted by the hand-crafted features for the prediction. We show the efficacy of the proposed model against state-of-the-art methods in two experimental setups i.e. multi-lingual and cross-lingual. N18-1053 @@ -720,8 +720,8 @@ ZiangXie GuillaumeGenthial StanleyXie - AndrewNg - DanJurafsky + AndrewNg + DanJurafsky 619–628 Translation-based methods for grammar correction that directly map noisy, ungrammatical text to their clean counterparts are able to correct a broad range of errors; however, such techniques are bottlenecked by the need for a large parallel corpus of noisy and clean sentence pairs. In this paper, we consider synthesizing parallel data by noising a clean monolingual corpus. While most previous approaches introduce perturbations using features computed from local context windows, we instead develop error generation processes using a neural sequence transduction model trained to translate clean examples to their noisy counterparts. Given a corpus of clean examples, we propose beam search noising procedures to synthesize additional noisy examples that human evaluators were nearly unable to discriminate from nonsynthesized examples. Surprisingly, when trained on additional data synthesized using our best-performing noising scheme, our model approaches the same performance as when trained on additional nonsynthesized data. N18-1057 @@ -732,7 +732,7 @@ Self-Training for Jointly Learning to Ask and Answer Questions MrinmayaSachan - EricXing + EricXing 629–640 Building curious machines that can answer as well as ask questions is an important challenge for AI. The two tasks of question answering and question generation are usually tackled separately in the NLP literature. At the same time, both require significant amounts of supervised data which is hard to obtain in many domains. To alleviate these issues, we propose a self-training method for jointly learning to ask as well as answer questions, leveraging unlabeled text along with labeled question answer pairs for learning. We evaluate our approach on four benchmark datasets: SQUAD, MS MARCO, WikiQA and TrecQA, and show significant improvements over a number of established baselines on both question answering and question generation tasks. We also achieved new state-of-the-art results on two competitive answer sentence selection tasks: WikiQA and TrecQA. N18-1058 @@ -769,7 +769,7 @@ SabyasachiKamila MohammedHasanuzzaman AsifEkbal - PushpakBhattacharyya + PushpakBhattacharyya AndyWay 663–674 Temporal orientation refers to an individual’s tendency to connect to the psychological concepts of past, present or future, and it affects personality, motivation, emotion, decision making and stress coping processes. The study of the social media users’ psycho-demographic attributes from the perspective of human temporal orientation can be of utmost interest and importance to the business and administrative decision makers as it can provide an extra precious information for them to make informed decisions. In this paper, we propose a very first study to demonstrate the association between the sentiment view of the temporal orientation of the users and their different psycho-demographic attributes by analyzing their tweets. We first create a temporal orientation classifier in a minimally supervised way which classifies each tweet of the users in one of the three temporal categories, namely past, present, and future. A deep Bi-directional Long Short Term Memory (BLSTM) is used for the tweet classification task. Our tweet classifier achieves an accuracy of 78.27% when tested on a manually created test set. We then determine the users’ overall temporal orientation based on their tweets on the social media. The sentiment is added to the tweets at the fine-grained level where each temporal tweet is given a sentiment with either of the positive, negative or neutral. Our experiment reveals that depending upon the sentiment view of temporal orientation, a user’s attributes vary. We finally measure the correlation between the users’ sentiment view of temporal orientation and their different psycho-demographic factors using regression. @@ -779,9 +779,9 @@ Querying Word Embeddings for Similarity and Relatedness - FatemehTorabi Asr + FatemehTorabi Asr RobertZinkov - MichaelJones + MichaelJones 675–684 Word embeddings obtained from neural network models such as Word2Vec Skipgram have become popular representations of word meaning and have been evaluated on a variety of word similarity and relatedness norming data. Skipgram generates a set of word and context embeddings, the latter typically discarded after training. We demonstrate the usefulness of context embeddings in predicting asymmetric association between words from a recently published dataset of production norms (Jouravlev & McRae, 2016). Our findings suggest that humans respond with words closer to the cue within the context embedding space (rather than the word embedding space), when asked to generate thematically related words. N18-1062 @@ -802,7 +802,7 @@ Entity Commonsense Representation for Neural Abstractive Summarization - Reinald KimAmplayo + Reinald KimAmplayo SeonjaeLim Seung-wonHwang 697–707 @@ -877,9 +877,9 @@ Automatic Stance Detection Using End-to-End Memory Networks MitraMohtarami RamyBaly - JamesGlass - PreslavNakov - LluísMàrquez + JamesGlass + PreslavNakov + LluísMàrquez AlessandroMoschitti 767–776 We present an effective end-to-end memory network model that jointly (i) predicts whether a given document can be considered as relevant evidence for a given claim, and (ii) extracts snippets of evidence that can be used to reason about the factuality of the target claim. Our model combines the advantages of convolutional and recurrent neural networks as part of a memory network. We further introduce a similarity matrix at the inference level of the memory network in order to extract snippets of evidence for input claims more accurately. Our experiments on a public benchmark dataset, FakeNewsChallenge, demonstrate the effectiveness of our approach. @@ -890,7 +890,7 @@ Collective Entity Disambiguation with Structured Gradient Tree Boosting YiYang - OzanIrsoy + OzanIrsoy Kazi ShefaetRahman 777–786 We present a gradient-tree-boosting-based structured learning model for jointly disambiguating named entities in a document. Gradient tree boosting is a widely used machine learning algorithm that underlies many top-performing natural language processing systems. Surprisingly, most works limit the use of gradient tree boosting as a tool for regular classification or regression problems, despite the structured nature of language. To the best of our knowledge, our work is the first one that employs the structured gradient tree boosting (SGTB) algorithm for collective entity disambiguation. By defining global features over previous disambiguation decisions and jointly modeling them with local features, our system is able to produce globally optimized entity assignments for mentions in a document. Exact inference is prohibitively expensive for our globally normalized model. To solve this problem, we propose Bidirectional Beam Search with Gold path (BiBSG), an approximate inference algorithm that is a variant of the standard beam search algorithm. BiBSG makes use of global information from both past and future to perform better local search. Experiments on standard benchmark datasets show that SGTB significantly improves upon published results. Specifically, SGTB outperforms the previous state-of-the-art neural system by near 1% absolute accuracy on the popular AIDA-CoNLL dataset. @@ -915,7 +915,7 @@ LijunWu LiZhao TaoQin - XueqiCheng + XueqiCheng Tie-YanLiu 799–808 Recurrent neural networks have achieved state-of-the-art results in many artificial intelligence tasks, such as language modeling, neural machine translation, speech recognition and so on. One of the key factors to these successes is big models. However, training such big models usually takes days or even weeks of time even if using tens of GPU cards. In this paper, we propose an efficient architecture to improve the efficiency of such RNN model training, which adopts the group strategy for recurrent layers, while exploiting the representation rearrangement strategy between layers as well as time steps. To demonstrate the advantages of our models, we conduct experiments on several datasets and tasks. The results show that our architecture achieves comparable or better accuracy comparing with baselines, with a much smaller number of parameters and at a much lower computational cost. @@ -978,7 +978,7 @@ Multimodal Named Entity Recognition for Short Social Media Posts SeungwhanMoon LeonardoNeves - VitorCarvalho + VitorCarvalho 852–860 We introduce a new task called Multimodal Named Entity Recognition (MNER) for noisy user-generated data such as tweets or Snapchat captions, which comprise short text with accompanying images. These social media posts often come in inconsistent or incomplete syntax and lexical notations with very limited surrounding textual contexts, bringing significant challenges for NER. To this end, we create a new dataset for MNER called SnapCaptions (Snapchat image-caption pairs submitted to public and crowd-sourced stories with fully annotated named entities). We then build upon the state-of-the-art Bi-LSTM word/character based NER models with 1) a deep image network which incorporates relevant visual context to augment textual information, and 2) a generic modality-attention module which learns to attenuate irrelevant modalities while amplifying @@ -994,7 +994,7 @@ Nested Named Entity Recognition Revisited ArzooKatiyar - ClaireCardie + ClaireCardie 861–871 We propose a novel recurrent neural network-based approach to simultaneously handle nested named entity recognition and nested entity mention detection. The model learns a hypergraph representation for nested entities using features extracted from a recurrent neural network. In evaluations on three standard data sets, we show that our approach significantly outperforms existing state-of-the-art methods, which are feature-based. The approach is also efficient: it operates linearly in the number of tokens and the number of possible output labels at any token. Finally, we present an extension of our model that jointly learns the head of each entity mention. N18-1079 @@ -1016,7 +1016,7 @@ Supervised Open Information Extraction GabrielStanovsky JulianMichael - LukeZettlemoyer + LukeZettlemoyer IdoDagan 885–895 We present data and methods that enable a supervised learning approach to Open Information Extraction (Open IE). Central to the approach is a novel formulation of Open IE as a sequence tagging problem, addressing challenges such as encoding multiple extractions for a predicate. We also develop a bi-LSTM transducer, extending recent deep Semantic Role Labeling models to extract Open IE tuples and provide confidence scores for tuning their precision-recall tradeoff. Furthermore, we show that the recently released Question-Answer Meaning Representation dataset can be automatically converted into an Open IE corpus which significantly increases the amount of available training data. Our supervised model outperforms the existing state-of-the-art Open IE systems on benchmark datasets. @@ -1069,7 +1069,7 @@ Neural Particle Smoothing for Sampling from Conditional Sequence Models Chu-ChengLin - JasonEisner + JasonEisner 929–941 We introduce neural particle smoothing, a sequential Monte Carlo method for sampling annotations of an input string from a given probability model. In contrast to conventional particle filtering algorithms, we train a proposal distribution that looks ahead to the end of the input string by means of a right-to-left LSTM. We demonstrate that this innovation can improve the quality of the sample. To motivate our formal choices, we explain how neural transduction models and our sampler can be viewed as low-dimensional but nonlinear approximations to working with HMMs over very large state spaces. N18-1085.Notes.pdf @@ -1080,7 +1080,7 @@ Neural Syntactic Generative Models with Exact Marginalization JanBuys - PhilBlunsom + PhilBlunsom 942–952 We present neural syntactic generative models with exact marginalization that support both dependency parsing and language modeling. Exact marginalization is made tractable through dynamic programming over shift-reduce parsing and minimal RNN-based feature sets. Our algorithms complement previous approaches by supporting batched training and enabling online computation of next word probabilities. For supervised dependency parsing, our model achieves a state-of-the-art result among generative approaches. We also report empirical results on unsupervised syntactic models and their role in language modeling. We find that our model formulation of latent dependencies with exact marginalization do not lead to better intrinsic language modeling performance than vanilla RNNs, and that parsing accuracy is not correlated with language modeling perplexity in stack-based models. N18-1086 @@ -1105,7 +1105,7 @@ WanxiangChe BingQin NathanSchneider - Noah A.Smith + Noah A.Smith 965–975 We study the problem of analyzing tweets with universal dependencies (UD). We extend the UD guidelines to cover special constructions in tweets that affect tokenization, part-of-speech tagging, and labeled dependencies. Using the extended guidelines, we create a new tweet treebank for English (Tweebank v2) that is four times larger than the (unlabeled) Tweebank v1 introduced by Kong et al. (2014). We characterize the disagreements between our annotators and show that it is challenging to deliver consistent annotation due to ambiguity in understanding and explaining tweets. Nonetheless, using the new treebank, we build a pipeline system to parse raw tweets into UD. To overcome the annotation noise without sacrificing computational efficiency, we propose a new method to distill an ensemble of 20 transition-based parsers into a single one. Our parser achieves an improvement of 2.2 in LAS over the un-ensembled baseline and outperforms parsers that are state-of-the-art on other treebanks in both accuracy and speed. N18-1088 @@ -1116,7 +1116,7 @@ Robust Multilingual Part-of-Speech Tagging via Adversarial Training MichihiroYasunaga JungoKasai - DragomirRadev + DragomirRadev 976–986 Adversarial training (AT) is a powerful regularization method for neural networks, aiming to achieve robustness to input perturbations. Yet, the specific effects of the robustness obtained from AT are still unclear in the context of natural language processing. In this paper, we propose and analyze a neural POS tagging model that exploits AT. In our experiments on the Penn Treebank WSJ corpus and the Universal Dependencies (UD) dataset (27 languages), we find that AT not only improves the overall tagging accuracy, but also 1) prevents over-fitting well in low resource languages and 2) boosts tagging accuracy for rare / unseen words. We also demonstrate that 3) the improved tagging performance by AT contributes to the downstream task of dependency parsing, and that 4) AT helps the model to learn cleaner word representations. 5) The proposed AT model is generally effective in different sequence labeling tasks. These positive results motivate further use of AT for natural language tasks. N18-1089 @@ -1125,10 +1125,10 @@ <fixed-case>U</fixed-case>niversal <fixed-case>D</fixed-case>ependency Parsing for <fixed-case>H</fixed-case>indi-<fixed-case>E</fixed-case>nglish Code-Switching - IrshadBhat - Riyaz A.Bhat - ManishShrivastava - DiptiSharma + IrshadBhat + Riyaz A.Bhat + ManishShrivastava + DiptiSharma 987–998 Code-switching is a phenomenon of mixing grammatical structures of two or more languages under varied social constraints. The code-switching data differ so radically from the benchmark corpora used in NLP community that the application of standard technologies to these data degrades their performance sharply. Unlike standard corpora, these data often need to go through additional processes such as language identification, normalization and/or back-transliteration for their efficient processing. In this paper, we investigate these indispensable processes and other problems associated with syntactic parsing of code-switching data and propose methods to mitigate their effects. In particular, we study dependency parsing of code-switching data of Hindi and English multilingual speakers from Twitter. We present a treebank of Hindi-English code-switching tweets under Universal Dependencies scheme and propose a neural stacking model for parsing that efficiently leverages the part-of-speech tag and syntactic tree annotations in the code-switching treebank and the preexisting Hindi and English treebanks. We also present normalization and back-transliteration models with a decoding process tailored for code-switching data. Results show that our neural stacking parser is 1.5% LAS points better than the augmented parsing model and 3.8% LAS points better than the one which uses first-best normalization and/or back-transliteration. N18-1090 @@ -1151,7 +1151,7 @@ Deep Generative Model for Joint Alignment and Word Representation MiguelRios WilkerAziz - KhalilSima’an + KhalilSima’an 1011–1023 This work exploits translation data as a source of semantically relevant learning signal for models of word representation. In particular, we exploit equivalence through translation as a form of distributional context and jointly learn how to embed and align with a deep generative model. Our EmbedAlign model embeds words in their complete observed context and learns by marginalisation of latent lexical alignments. Besides, it embeds words as posterior probability densities, rather than point estimates, which allows us to compare words in context using a measure of overlap between distributions (e.g. KL divergence). We investigate our model’s performance on a range of lexical semantics tasks achieving competitive results on several standard benchmarks including natural language inference, paraphrasing, and text similarity. N18-1092 @@ -1176,7 +1176,7 @@ Exploring the Role of Prior Beliefs for Argument Persuasion EsinDurmus - ClaireCardie + ClaireCardie 1035–1045 Public debate forums provide a common platform for exchanging opinions on a topic of interest. While recent studies in natural language processing (NLP) have provided empirical evidence that the language of the debaters and their patterns of interaction play a key role in changing the mind of a reader, research in psychology has shown that prior beliefs can affect our interpretation of an argument and could therefore constitute a competing alternative explanation for resistance to changing one’s stance. To study the actual effect of language use vs. prior beliefs on persuasion, we provide a new dataset and propose a controlled setting that takes into consideration two reader-level factors: political and religious ideology. We find that prior beliefs affected by these reader-level factors play a more important role than language use effects and argue that it is important to account for them in NLP studies of persuasion. N18-1094 @@ -1201,7 +1201,7 @@ Author Commitment and Social Power: Automatic Belief Tagging to Infer the Social Context of Interactions VinodkumarPrabhakaran PremkumarGaneshkumar - OwenRambow + OwenRambow 1057–1068 Understanding how social power structures affect the way we interact with one another is of great interest to social scientists who want to answer fundamental questions about human behavior, as well as to computer scientists who want to build automatic methods to infer the social contexts of interactions. In this paper, we employ advancements in extra-propositional semantics extraction within NLP to study how author commitment reflects the social context of an interactions. Specifically, we investigate whether the level of commitment expressed by individuals in an organizational interaction reflects the hierarchical power structures they are part of. We find that subordinates use significantly more instances of non-commitment than superiors. More importantly, we also find that subordinates attribute propositions to other agents more often than superiors do — an aspect that has not been studied before. Finally, we show that enriching lexical features with commitment labels captures important distinctions in social meanings. N18-1096 @@ -1222,7 +1222,7 @@ Deep Temporal-Recurrent-Replicated-Softmax for Topical Trends over Time PankajGupta SubburamRajaram - HinrichSchütze + HinrichSchütze BerntAndrassy 1079–1089 Dynamic topic modeling facilitates the identification of topical trends over time in temporal collections of unstructured documents. We introduce a novel unsupervised neural dynamic topic model named as Recurrent Neural Network-Replicated Softmax Model (RNNRSM), where the discovered topics at each time influence the topic discovery in the subsequent time steps. We account for the temporal ordering of documents by explicitly modeling a joint distribution of latent topical dependencies over time, using distributional estimators with temporal recurrent connections. Applying RNN-RSM to 19 years of articles on NLP research, we demonstrate that compared to state-of-the art topic models, RNNRSM shows better generalization, topic interpretation, evolution and trends. We also introduce a metric (named as SPAN) to quantify the capability of dynamic topic model to capture word evolution in topics over time. @@ -1235,7 +1235,7 @@ Lessons from the <fixed-case>B</fixed-case>ible on Modern Topics: Low-Resource Multilingual Topic Model Evaluation ShudongHao JordanBoyd-Graber - Michael J.Paul + Michael J.Paul 1090–1100 Multilingual topic models enable document analysis across languages through coherent multilingual summaries of the data. However, there is no standard and effective metric to evaluate the quality of multilingual topics. We introduce a new intrinsic evaluation of multilingual topic models that correlates well with human judgments of multilingual topic coherence as well as performance in downstream applications. Importantly, we also study evaluation for low-resource languages. Because standard metrics fail to accurately measure topic quality when robust external resources are unavailable, we propose an adaptation model that improves the accuracy and reliability of these metrics in low-resource settings. N18-1099 @@ -1260,7 +1260,7 @@ A Broad-Coverage Challenge Corpus for Sentence Understanding through Inference AdinaWilliams NikitaNangia - SamuelBowman + SamuelBowman 1112–1122 This paper introduces the Multi-Genre Natural Language Inference (MultiNLI) corpus, a dataset designed for use in the development and evaluation of machine learning models for sentence understanding. At 433k examples, this resource is one of the largest corpora available for natural language inference (a.k.a. recognizing textual entailment), improving upon available resources in both its coverage and difficulty. MultiNLI accomplishes this by offering data from ten distinct genres of written and spoken English, making it possible to evaluate systems on nearly the full complexity of the language, while supplying an explicit setting for evaluating cross-genre domain adaptation. In addition, an evaluation using existing machine learning models designed for the Stanford NLI corpus shows that it represents a substantially more difficult task than does that corpus, despite the two showing similar levels of inter-annotator agreement. N18-1101 @@ -1294,7 +1294,7 @@ Cross-Lingual <fixed-case>A</fixed-case>bstract <fixed-case>M</fixed-case>eaning <fixed-case>R</fixed-case>epresentation Parsing MarcoDamonte - Shay B.Cohen + Shay B.Cohen 1146–1155 Abstract Meaning Representation (AMR) research has mostly focused on English. We show that it is possible to use AMR annotations for English as a semantic representation for sentences written in other languages. We exploit an AMR parser for English and parallel corpora to learn AMR parsers for Italian, Spanish, German and Chinese. Qualitative analysis show that the new parsers overcome structural differences between the languages. We further propose a method to evaluate the parsers that does not require gold standard data in the target languages. This method highly correlates with the gold standard evaluation, obtaining a Pearson correlation coefficient of 0.95. N18-1104 @@ -1306,7 +1306,7 @@ Sentences with Gapping: Parsing and Reconstructing Elided Predicates SebastianSchuster JoakimNivre - Christopher D.Manning + Christopher D.Manning 1156–1168 Sentences with gapping, such as Paul likes coffee and Mary tea, lack an overt predicate to indicate the relation between two or more arguments. Surface syntax representations of such sentences are often produced poorly by parsers, and even if correct, not well suited to downstream natural language understanding tasks such as relation extraction that are typically designed to extract information from sentences with canonical clause structure. In this paper, we present two methods for parsing to a Universal Dependencies graph representation that explicitly encodes the elided material with additional nodes and edges. We find that both methods can reconstruct elided material from dependency trees with high accuracy when the parser correctly predicts the existence of a gap. We further demonstrate that one of our methods can be applied to other languages based on a case study on Swedish. N18-1105 @@ -1332,7 +1332,7 @@ RobertFrank PauliXu WilliamMerrill - OwenRambow + OwenRambow 1181–1194 We present a graph-based Tree Adjoining Grammar (TAG) parser that uses BiLSTMs, highway connections, and character-level CNNs. Our best end-to-end parser, which jointly performs supertagging, POS tagging, and parsing, outperforms the previously reported best results by more than 2.2 LAS and UAS points. The graph-based parsing architecture allows for global inference and rich feature representations for TAG parsing, alleviating the fundamental trade-off between transition-based and graph-based parsing systems. We also demonstrate that the proposed parser achieves state-of-the-art performance in the downstream tasks of Parsing Evaluation using Textual Entailments (PETE) and Unbounded Dependency Recovery. This provides further support for the claim that TAG is a viable formalism for problems that require rich structural analysis of sentences. N18-1107 @@ -1344,9 +1344,9 @@ Colorless Green Recurrent Networks Dream Hierarchically KristinaGulordava PiotrBojanowski - EdouardGrave + EdouardGrave TalLinzen - MarcoBaroni + MarcoBaroni 1195–1205 Recurrent neural networks (RNNs) achieved impressive results in a variety of linguistic processing tasks, suggesting that they can induce non-trivial properties of language. We investigate to what extent RNNs learn to track abstract hierarchical syntactic structure. We test whether RNNs trained with a generic language modeling objective in four languages (Italian, English, Hebrew, Russian) can predict long-distance number agreement in various constructions. We include in our evaluation nonsensical sentences where RNNs cannot rely on semantic or lexical cues (“The colorless green ideas I ate with the chair sleep furiously”), and, for Italian, we compare model performance to human intuitions. Our language-model-trained RNNs make reliable predictions about long-distance agreement, and do not lag much behind human performance. We thus bring support to the hypothesis that RNNs are not just shallow-pattern extractors, but they also acquire deeper grammatical competence. N18-1108 @@ -1375,8 +1375,8 @@ Early Text Classification Using Multi-Resolution Concept Representations Adrian PastorLópez-Monroy - Fabio A.González - ManuelMontes + Fabio A.González + ManuelMontes Hugo JairEscalante ThamarSolorio 1216–1225 @@ -1388,7 +1388,7 @@ Multinomial Adversarial Networks for Multi-Domain Text Classification XilunChen - ClaireCardie + ClaireCardie 1226–1240 Many text classification tasks are known to be highly domain-dependent. Unfortunately, the availability of training data can vary drastically across domains. Worse still, for some domains there may not be any annotated data at all. In this work, we propose a multinomial adversarial network (MAN) to tackle this real-world problem of multi-domain text classification (MDTC) in which labeled data may exist for multiple domains, but in insufficient amounts to train effective classifiers for one or more of the domains. We provide theoretical justifications for the MAN framework, proving that different instances of MANs are essentially minimizers of various f-divergence metrics (Ali and Silvey, 1966) among multiple probability distributions. MANs are thus a theoretically sound generalization of traditional adversarial networks that discriminate over two distributions. More specifically, for the MDTC task, MAN learns features that are invariant across multiple domains by resorting to its ability to reduce the divergence among the feature distributions of each domain. We present experimental results showing that MANs significantly outperform the prior art on the MDTC task. We also show that MANs achieve state-of-the-art performance for domains with no labeled data. N18-1111.Software.tgz @@ -1432,11 +1432,11 @@ The Context-Dependent Additive Recurrent Neural Net - Quan HungTran - TuanLai - GholamrezaHaffari + Quan HungTran + TuanLai + GholamrezaHaffari IngridZukerman - TrungBui + TrungBui HungBui 1274–1283 Contextual sequence mapping is one of the fundamental problems in Natural Language Processing (NLP). Here, instead of relying solely on the information presented in the text, the learning agents have access to a strong external signal given to assist the learning process. In this paper, we propose a novel family of Recurrent Neural Network unit: the Context-dependent Additive Recurrent Neural Network (CARNN) that is designed specifically to address this type of problem. The experimental results on public datasets in the dialog problem (Babi dialog Task 6 and Frame), contextual language model (Switchboard and Penn Tree Bank) and question answering (Trec QA) show that our novel CARNN-based architectures outperform previous methods. @@ -1449,8 +1449,8 @@ HuadongChen ShujianHuang DavidChiang - XinyuDai - JiajunChen + XinyuDai + JiajunChen 1284–1293 Natural language sentences, being hierarchical, can be represented at different levels of granularity, like words, subwords, or characters. But most neural machine translation systems require the sentence to be represented as a sequence at a single level of granularity. It can be difficult to determine which granularity is better for a particular translation task. In this paper, we improve the model by incorporating multiple levels of granularity. Specifically, we propose (1) an encoder with character attention which augments the (sub)word-level representation with character-level information; (2) a decoder with multiple attentions that enable the representations from different levels of granularity to control the translation cooperatively. Experiments on three translation tasks demonstrate that our proposed models outperform the standard word-based model, the subword-based model, and a strong character-based model. N18-1116 @@ -1497,7 +1497,7 @@ Guiding Neural Machine Translation with Retrieved Translation Pieces JingyiZhang MasaoUtiyama - EiichroSumita + EiichroSumita GrahamNeubig SatoshiNakamura 1325–1335 @@ -1533,7 +1533,7 @@ Neural Machine Translation for Bilingually Scarce Scenarios: a Deep Multi-Task Learning Approach PooryaZaremoodi - GholamrezaHaffari + GholamrezaHaffari 1356–1365 Neural machine translation requires large amount of parallel training text to learn a reasonable quality translation model. This is particularly inconvenient for language pairs for which enough parallel text is not available. In this paper, we use monolingual linguistic resources in the source side to address this challenging problem based on a multi-task learning approach. More specifically, we scaffold the machine translation task on auxiliary tasks including semantic parsing, syntactic parsing, and named-entity recognition. This effectively injects semantic and/or syntactic knowledge into the translation model, which would otherwise require a large amount of training bitext to learn from. We empirically analyze and show the effectiveness of our multitask learning approach on three translation tasks: English-to-French, English-to-Farsi, and English-to-Vietnamese. N18-1123 @@ -1542,10 +1542,10 @@ Self-Attentive Residual Decoder for Neural Machine Translation - LeslyMiculicich Werlen + LeslyMiculicich Werlen NikolaosPappas DhananjayRam - AndreiPopescu-Belis + AndreiPopescu-Belis 1366–1379 Neural sequence-to-sequence networks with attention have achieved remarkable performance for machine translation. One of the reasons for their effectiveness is their ability to capture relevant source-side contextual information at each time-step prediction through an attention mechanism. However, the target-side context is solely based on the sequence model which, in practice, is prone to a recency bias and lacks the ability to capture effectively non-sequential dependencies among words. To address this limitation, we propose a target-side-attentive residual recurrent network for decoding, where attention over previous words contributes directly to the prediction of the next word. The residual learning facilitates the flow of information from the distant past and is able to emphasize any of the previously translated words, hence it gains access to a wider context. The proposed model outperforms a neural MT baseline as well as a memory and self-attention network on three language pairs. The analysis of the attention learned by the decoder confirms that it emphasizes a wider context, and that it captures syntactic-like structures. N18-1124.Notes.pdf @@ -1569,7 +1569,7 @@ Context Sensitive Neural Lemmatization with <fixed-case>L</fixed-case>ematus TomsBergmanis - SharonGoldwater + SharonGoldwater 1391–1400 The main motivation for developing contextsensitive lemmatizers is to improve performance on unseen and ambiguous words. Yet previous systems have not carefully evaluated whether the use of context actually helps in these cases. We introduce Lematus, a lemmatizer based on a standard encoder-decoder architecture, which incorporates character-level sentence context. We evaluate its lemmatization accuracy across 20 languages in both a full data setting and a lower-resource setting with 10k training examples in each language. In both settings, we show that including context significantly improves results against a context-free version of the model. Context helps more for ambiguous words than for unseen words, though the latter has a greater effect on overall performance differences between languages. We also compare to three previous context-sensitive lemmatization systems, which all use pre-extracted edit trees as well as hand-selected features and/or additional sources of information such as tagged training data. Without using any of these, our context-sensitive model outperforms the best competitor system (Lemming) in the fulldata setting, and performs on par in the lowerresource setting. N18-1126 @@ -1580,7 +1580,7 @@ Modeling Noisiness to Recognize Named Entities using Multitask Neural Networks on Social Media GustavoAguilar Adrian PastorLópez-Monroy - FabioGonzález + FabioGonzález ThamarSolorio 1401–1412 Recognizing named entities in a document is a key task in many NLP applications. Although current state-of-the-art approaches to this task reach a high performance on clean text (e.g. newswire genres), those algorithms dramatically degrade when they are moved to noisy environments such as social media domains. We present two systems that address the challenges of processing social media data using character-level phonetics and phonology, word embeddings, and Part-of-Speech tags as features. The first model is a multitask end-to-end Bidirectional Long Short-Term Memory (BLSTM)-Conditional Random Field (CRF) network whose output layer contains two CRF classifiers. The second model uses a multitask BLSTM network as feature extractor that transfers the learning to a CRF classifier for the final prediction. Our systems outperform the current F1 scores of the state of the art on the Workshop on Noisy User-generated Text 2017 dataset by 2.45% and 3.69%, establishing a more suitable approach for social media environments. @@ -1612,7 +1612,7 @@ Using Morphological Knowledge in Open-Vocabulary Neural Language Models AustinMatthews GrahamNeubig - ChrisDyer + ChrisDyer 1435–1445 Languages with productive morphology pose problems for language models that generate words from a fixed vocabulary. Although character-based models allow any possible word type to be generated, they are linguistically naïve: they must discover that words exist and are delimited by spaces—basic linguistic facts that are built in to the structure of word-based models. We introduce an open-vocabulary language model that incorporates more sophisticated linguistic knowledge by predicting words using a mixture of three generative processes: (1) by generating words as a sequence of characters, (2) by directly generating full word forms, and (3) by generating words as a sequence of morphemes that are combined using a hand-written morphological analyzer. Experiments on Finnish, Turkish, and Russian show that our model outperforms character sequence models and other strong baselines on intrinsic and extrinsic measures. Furthermore, we show that our model learns to exploit morphological knowledge encoded in the analyzer, and, as a byproduct, it can perform effective unsupervised morphological disambiguation. N18-1130 @@ -1634,7 +1634,7 @@ <fixed-case>DR</fixed-case>-<fixed-case>B</fixed-case>i<fixed-case>LSTM</fixed-case>: Dependent Reading Bidirectional <fixed-case>LSTM</fixed-case> for Natural Language Inference RezaGhaeini - Sadid A.Hasan + Sadid A.Hasan VivekDatla JoeyLiu KathyLee @@ -1682,7 +1682,7 @@ HaoPeng SamThomson SwabhaSwayamdipta - Noah A.Smith + Noah A.Smith 1492–1502 We present a new approach to learning a semantic parser from multiple datasets, even when the target semantic formalisms are drastically different and the underlying corpora do not overlap. We handle such “disjoint” data by treating annotations for unobserved formalisms as latent structured variables. Building on state-of-the-art baselines, we show improvements both in frame-semantic parsing and semantic dependency parsing by modeling them jointly. N18-1135 @@ -1733,7 +1733,7 @@ ParagJain AnirbanLaha KarthikSankaranarayanan - Mitesh M.Khapra + Mitesh M.Khapra 1539–1550 In this work, we focus on the task of generating natural language descriptions from a structured table of facts containing fields (such as nationality, occupation, etc) and values (such as Indian, actor, director, etc). One simple choice is to treat the table as a sequence of fields and values and then use a standard seq2seq model for this task. However, such a model is too generic and does not exploit task specific characteristics. For example, while generating descriptions from a table, a human would attend to information at two levels: (i) the fields (macro level) and (ii) the values within the field (micro level). Further, a human would continue attending to a field for a few timesteps till all the information from that field has been rendered and then never return back to this field (because there is nothing left to say about it). To capture this behavior we use (i) a fused bifocal attention mechanism which exploits and combines this micro and macro level information and (ii) a gated orthogonalization mechanism which tries to ensure that a field is remembered for a few time steps and then forgotten. We experiment with a recently released dataset which contains fact tables about people and their corresponding one line biographical descriptions in English. In addition, we also introduce two similar datasets for French and German. Our experiments show that the proposed model gives 21% relative improvement over a recently proposed state of the art method and 10% relative improvement over basic seq2seq models. The code and the datasets developed as a part of this work are publicly available on https://github.com/PrekshaNema25/StructuredData_To_Descriptions N18-1139 @@ -1742,8 +1742,8 @@ <fixed-case>C</fixed-case>li<fixed-case>CR</fixed-case>: a Dataset of Clinical Case Reports for Machine Reading Comprehension - SimonŠuster - WalterDaelemans + SimonŠuster + WalterDaelemans 1551–1563 We present a new dataset for machine comprehension in the medical domain. Our dataset uses clinical case reports with around 100,000 gap-filling queries about these cases. We apply several baselines and state-of-the-art neural readers to the dataset, and observe a considerable gap in performance (20% F1) between the best human and machine readers. We analyze the skills required for successful answering and show how reader performance varies depending on the applicable skills. We find that inferences using domain knowledge and object tracking are the most frequently required skills, and that recognizing omitted information and spatio-temporal reasoning are the most difficult for the machines. N18-1140 @@ -1781,7 +1781,7 @@ Supervised and Unsupervised Transfer Learning for Question Answering Yu-AnChung Hung-YiLee - JamesGlass + JamesGlass 1585–1594 Although transfer learning has been shown to be successful for tasks like object and speech recognition, its applicability to question answering (QA) has yet to be well-studied. In this paper, we conduct extensive experiments to investigate the transferability of knowledge learned from a source QA dataset to a target dataset using two QA models. The performance of both models on a TOEFL listening comprehension test (Tseng et al., 2016) and MCTest (Richardson et al., 2013) is significantly improved via a simple transfer learning technique from MovieQA (Tapaswi et al., 2016). In particular, one of the models achieves the state-of-the-art on all target datasets; for the TOEFL listening comprehension test, it outperforms the previous best model by 7%. Finally, we show that transfer learning is helpful even in unsupervised scenarios when correct answers for target QA dataset examples are not available. N18-1143 @@ -1790,10 +1790,10 @@ Tracking State Changes in Procedural Text: a Challenge Dataset and Models for Process Paragraph Comprehension - BhavanaDalvi + BhavanaDalvi LifuHuang NiketTandon - Wen-tauYih + Wen-tauYih PeterClark 1595–1604 We present a new dataset and models for comprehending paragraphs about processes (e.g., photosynthesis), an important genre of text describing a dynamic world. The new dataset, ProPara, is the first to contain natural (rather than machine-generated) text about a changing world along with a full annotation of entity states (location and existence) during those changes (81k datapoints). The end-task, tracking the location and existence of entities through the text, is challenging because the causal effects of actions are often implicit and need to be inferred. We find that previous models that have worked well on synthetic data achieve only mediocre performance on ProPara, and introduce two new neural models that exploit alternative mechanisms for state prediction, in particular using LSTM input encoding and span prediction. The new models improve accuracy by up to 19%. We are releasing the ProPara dataset and our models to the community. @@ -1817,8 +1817,8 @@ Deconfounded Lexicon Induction for Interpretable Social Science ReidPryzant KellyShen - DanJurafsky - StefanWagner + DanJurafsky + StefanWagner 1615–1625 NLP algorithms are increasingly used in computational social science to take linguistic observations and predict outcomes like human preferences or actions. Making these social models transparent and interpretable often requires identifying features in the input that predict outcomes while also controlling for potential confounds. We formalize this need as a new task: inducing a lexicon that is predictive of a set of target variables yet uncorrelated to a set of confounding variables. We introduce two deep learning algorithms for the task. The first uses a bifurcated architecture to separate the explanatory power of the text and confounds. The second uses an adversarial discriminator to force confound-invariant text encodings. Both elicit lexicons from learned weights and attentional scores. We use them to induce lexicons that are predictive of timely responses to consumer complaints (controlling for product), enrollment from course descriptions (controlling for subject), and sales from product descriptions (controlling for seller). In each domain our algorithms pick words that are associated with narrative persuasion; more predictive and less confound-related than those of standard @@ -1830,7 +1830,7 @@ Detecting Denial-of-Service Attacks from Social Media Text: Applying <fixed-case>NLP</fixed-case> to Computer Security - NathanaelChambers + NathanaelChambers BenFry JamesMcMasters 1626–1635 @@ -1842,7 +1842,7 @@ The Importance of Calibration for Estimating Proportions from Annotations DallasCard - Noah A.Smith + Noah A.Smith 1636–1646 Estimating label proportions in a target corpus is a type of measurement that is useful for answering certain types of social-scientific questions. While past work has described a number of relevant approaches, nearly all are based on an assumption which we argue is invalid for many problems, particularly when dealing with human annotations. In this paper, we identify and differentiate between two relevant data generating scenarios (intrinsic vs. extrinsic labels), introduce a simple but novel method which emphasizes the importance of calibration, and then analyze and experimentally validate the appropriateness of various methods for each of the two scenarios. N18-1148.Notes.pdf @@ -1854,10 +1854,10 @@ A Dataset of Peer Reviews (<fixed-case>P</fixed-case>eer<fixed-case>R</fixed-case>ead): Collection, Insights and <fixed-case>NLP</fixed-case> Applications DongyeopKang WaleedAmmar - BhavanaDalvi + BhavanaDalvi Madeleinevan Zuylen SebastianKohlmeier - EduardHovy + EduardHovy RoySchwartz 1647–1661 Peer reviewing is a central component in the scientific publishing process. We present the first public dataset of scientific peer reviews available for research purposes (PeerRead v1),1 providing an opportunity to study this important artifact. The dataset consists of 14.7K paper drafts and the corresponding accept/reject decisions in top-tier venues including ACL, NIPS and ICLR. The dataset also includes 10.7K textual peer reviews written by experts for a subset of the papers. We describe the data collection process and report interesting observed phenomena in the peer reviews. We also propose two novel NLP tasks based on this dataset and provide simple baseline models. In the first task, we show that simple models can predict whether a paper is accepted with up to 21% error reduction compared to the majority baseline. In the second task, we predict the numerical scores of review aspects and show that simple models can outperform the mean baseline for aspects with high variance such as ‘originality’ and ‘impact’. @@ -1965,7 +1965,7 @@ Ranking Sentences for Extractive Summarization with Reinforcement Learning ShashiNarayan - Shay B.Cohen + Shay B.Cohen MirellaLapata 1747–1759 Single document summarization is the task of producing a shorter version of a document while preserving its principal information content. In this paper we conceptualize extractive summarization as a sentence ranking task and propose a novel training algorithm which globally optimizes the ROUGE evaluation metric through a reinforcement learning objective. We use our algorithm to train a neural summarization model on the CNN and DailyMail datasets and demonstrate experimentally that it outperforms state-of-the-art extractive and abstractive systems when evaluated automatically and by humans. @@ -1985,7 +1985,7 @@ What’s This Movie About? A Joint Neural Network Architecture for Movie Content Analysis - Philip JohnGorinski + Philip JohnGorinski MirellaLapata 1770–1781 This work takes a first step toward movie content analysis by tackling the novel task of movie overview generation. Overviews are natural language texts that give a first impression of a movie, describing aspects such as its genre, plot, mood, or artistic style. We create a dataset that consists of movie scripts, attribute-value pairs for the movies’ aspects, as well as overviews, which we extract from an online database. We present a novel end-to-end model for overview generation, consisting of a multi-label encoder for identifying screenplay attributes, and an LSTM decoder to generate natural language sentences conditioned on the identified attributes. Automatic and human evaluation show that the encoder is able to reliably assign good labels for the movie’s attributes, and the overviews provide descriptions of the movie’s content which are informative and faithful. @@ -1996,7 +1996,7 @@ Which Scores to Predict in Sentence Regression for Text Summarization? MarkusZopf - EneldoLoza Mencía + EneldoLoza Mencía JohannesFürnkranz 1782–1791 The task of automatic text summarization is to generate a short text that summarizes the most important information in a given set of documents. Sentence regression is an emerging branch in automatic text summarizations. Its key idea is to estimate the importance of information via learned utility scores for individual sentences. These scores are then used for selecting sentences from the source documents, typically according to a greedy selection strategy. Recently proposed state-of-the-art models learn to predict ROUGE recall scores of individual sentences, which seems reasonable since the final summaries are evaluated according to ROUGE recall. In this paper, we show in extensive experiments that following this intuition leads to suboptimal results and that learning to predict ROUGE precision scores leads to better results. The crucial difference is to aim not at covering as much information as possible but at wasting as little space as possible in every greedy step. @@ -2035,7 +2035,7 @@ Learning to Disentangle Interleaved Conversational Threads with a <fixed-case>S</fixed-case>iamese Hierarchical Network and Similarity Ranking Jyun-YuJiang - FrancineChen + FrancineChen Yan-YingChen WeiWang 1812–1822 @@ -2072,7 +2072,7 @@ <fixed-case>ELDEN</fixed-case>: Improved Entity Linking Using Densified Knowledge Graphs PriyaRadhakrishnan - ParthaTalukdar + ParthaTalukdar VasudevaVarma 1844–1853 Entity Linking (EL) systems aim to automatically map mentions of an entity in text to the corresponding entity in a Knowledge Graph (KG). Degree of connectivity of an entity in the KG directly affects an EL system’s ability to correctly link mentions in text to the entity in KG. This causes many EL systems to perform well for entities well connected to other entities in KG, bringing into focus the role of KG density in EL. In this paper, we propose Entity Linking using Densified Knowledge Graphs (ELDEN). ELDEN is an EL system which first densifies the KG with co-occurrence statistics from a large text corpus, and then uses the densified KG to train entity embeddings. Entity similarity measured using these trained entity embeddings result in improved EL. ELDEN outperforms state-of-the-art EL system on benchmark datasets. Due to such densification, ELDEN performs well for sparsely connected entities in the KG too. ELDEN’s approach is simple, yet effective. We have made ELDEN’s code and data publicly available. @@ -2087,7 +2087,7 @@ HaiYe XinJiang ZhunchenLuo - WenhanChao + WenhanChao 1854–1864 In this paper, we propose to study the problem of court view generation from the fact description in a criminal case. The task aims to improve the interpretability of charge prediction systems and help automatic legal document generation. We formulate this task as a text-to-text natural language generation (NLG) problem. Sequence-to-sequence model has achieved cutting-edge performances in many NLG tasks. However, due to the non-distinctions of fact descriptions, it is hard for Seq2Seq model to generate charge-discriminative court views. In this work, we explore charge labels to tackle this issue. We propose a label-conditioned Seq2Seq model with attention for this problem, to decode court views conditioned on encoded charge labels. Experimental results show the effectiveness of our method. N18-1168 @@ -2114,7 +2114,7 @@ MohitIyyer JohnWieting KevinGimpel - LukeZettlemoyer + LukeZettlemoyer 1875–1885 We propose syntactically controlled paraphrase networks (SCPNs) and use them to generate adversarial examples. Given a sentence and a target syntactic form (e.g., a constituency parse), SCPNs are trained to produce a paraphrase of the sentence with the desired syntax. We show it is possible to create training data for this task by first doing backtranslation at a very large scale, and then using a parser to label the syntactic transformations that naturally occur during this process. Such data allows us to train a neural encoder-decoder model with extra inputs to specify the target syntax. A combination of automated and human evaluations show that SCPNs generate paraphrases that follow their target specifications without decreasing paraphrase quality when compared to baseline (uncontrolled) paraphrase systems. Furthermore, they are more capable of generating syntactically adversarial examples that both (1) “fool” pretrained models and (2) improve the robustness of these models to syntactic variation when used to augment their training data. N18-1170 @@ -2149,7 +2149,7 @@ Multi-Task Learning of Pairwise Sequence Classification Tasks over Disparate Label Spaces IsabelleAugenstein SebastianRuder - AndersSøgaard + AndersSøgaard 1896–1906 We combine multi-task learning and semi-supervised learning by inducing a joint embedding space between disparate label spaces and learning transfer functions between label embeddings, enabling us to jointly leverage unlabelled data and auxiliary, annotated datasets. We evaluate our approach on a variety of tasks with disparate label spaces. We outperform strong single and multi-task baselines and achieve a new state of the art for aspect-based and topic-based sentiment analysis. N18-1172 @@ -2159,7 +2159,7 @@ Word Emotion Induction for Multiple Languages as a Deep Multi-Task Learning Problem - SvenBuechel + SvenBuechel UdoHahn 1907–1918 Predicting the emotional value of lexical items is a well-known problem in sentiment analysis. While research has focused on polarity for quite a long time, meanwhile this early focus has been shifted to more expressive emotion representation models (such as Basic Emotions or Valence-Arousal-Dominance). This change resulted in a proliferation of heterogeneous formats and, in parallel, often small-sized, non-interoperable resources (lexicons and corpus annotations). In particular, the limitations in size hampered the application of deep learning methods in this area because they typically require large amounts of input data. We here present a solution to get around this language data bottleneck by rephrasing word emotion induction as a multi-task learning problem. In this approach, the prediction of each independent emotion dimension is considered as an individual task and hidden layers are shared between these dimensions. We investigate whether multi-task learning is more advantageous than single-task learning for emotion prediction by comparing our model against a wide range of alternative emotion and polarity induction methods featuring 9 typologically diverse languages and a total of 15 conditions. Our model turns out to outperform each one of them. Against all odds, the proposed deep learning approach yields the largest gain on the smallest data sets, merely composed of one thousand samples. @@ -2171,7 +2171,7 @@ Human Needs Categorization of Affective Events Using Labeled and Unlabeled Data HaiboDing - EllenRiloff + EllenRiloff 1919–1929 We often talk about events that impact us positively or negatively. For example “I got a job” is good news, but “I lost my job” is bad news. When we discuss an event, we not only understand its affective polarity but also the reason why the event is beneficial or detrimental. For example, getting or losing a job has affective polarity primarily because it impacts us financially. Our work aims to categorize affective events based upon human need categories that often explain people’s motivations and desires: PHYSIOLOGICAL, HEALTH, LEISURE, SOCIAL, FINANCIAL, COGNITION, and FREEDOM. We create classification models based on event expressions as well as models that use contexts surrounding event mentions. We also design a co-training model that learns from unlabeled data by simultaneously training event expression and event context classifiers in an iterative learning process. Our results show that co-training performs well, producing substantially better results than the individual classifiers. N18-1174 @@ -2197,7 +2197,7 @@ Linguistic Cues to Deception and Perceived Deception in Interview Dialogues Sarah ItaLevitan AngelMaredia - JuliaHirschberg + JuliaHirschberg 1941–1950 We explore deception detection in interview dialogues. We analyze a set of linguistic features in both truthful and deceptive responses to interview questions. We also study the perception of deception, identifying characteristics of statements that are perceived as truthful or deceptive by interviewers. Our analysis show significant differences between truthful and deceptive question responses, as well as variations in deception patterns across gender and native language. This analysis motivated our selection of features for machine learning experiments aimed at classifying globally deceptive speech. Our best classification performance is 72.74% F1-Score (about 17% better than human performance), which is achieved using a combination of linguistic features and individual traits. N18-1176 @@ -2219,9 +2219,9 @@ Hierarchical Structured Model for Fine-to-Coarse Manifesto Text Analysis - ShivashankarSubramanian - TrevorCohn - TimothyBaldwin + ShivashankarSubramanian + TrevorCohn + TimothyBaldwin 1964–1974 Election manifestos document the intentions, motives, and views of political parties. They are often used for analysing a party’s fine-grained position on a particular issue, as well as for coarse-grained positioning of a party on the left–right spectrum. In this paper we propose a two-stage model for automatically performing both levels of analysis over manifestos. In the first step we employ a hierarchical multi-task structured deep model to predict fine- and coarse-grained positions, and in the second step we perform post-hoc calibration of coarse-grained positions using probabilistic soft logic. We empirically show that the proposed model outperforms state-of-art approaches at both granularities using manifestos from twelve countries, written in ten different languages. N18-1178 @@ -2246,7 +2246,7 @@ Assessing Language Proficiency from Eye Movements in Reading YevgeniBerzak BorisKatz - RogerLevy + RogerLevy 1986–1996 We present a novel approach for determining learners’ second language proficiency which utilizes behavioral traces of eye movements during reading. Our approach provides stand-alone eyetracking based English proficiency scores which reflect the extent to which the learner’s gaze patterns in reading are similar to those of native English speakers. We show that our scores correlate strongly with standardized English proficiency tests. We also demonstrate that gaze information can be used to accurately predict the outcomes of such tests. Our approach yields the strongest performance when the test taker is presented with a suite of sentences for which we have eyetracking data from other readers. However, it remains effective even using eyetracking with sentences for which eye movement data have not been previously collected. By deriving proficiency as an automatic byproduct of eye movements during ordinary reading, our approach offers a potentially valuable new tool for second language proficiency assessment. More broadly, our results open the door to future methods for inferring reader characteristics from the behavioral traces of reading. N18-1180 @@ -2256,7 +2256,7 @@ Comparing Theories of Speaker Choice Using a Model of Classifier Production in <fixed-case>M</fixed-case>andarin <fixed-case>C</fixed-case>hinese MeilinZhan - RogerLevy + RogerLevy 1997–2005 Speakers often have more than one way to express the same meaning. What general principles govern speaker choice in the face of optionality when near semantically invariant alternation exists? Studies have shown that optional reduction in language is sensitive to contextual predictability, such that more predictable a linguistic unit is, the more likely it is to get reduced. Yet it is unclear whether these cases of speaker choice are driven by audience design versus toward facilitating production. Here we argue that for a different optionality phenomenon, namely classifier choice in Mandarin Chinese, Uniform Information Density and at least one plausible variant of availability-based production make opposite predictions regarding the relationship between the predictability of the upcoming material and speaker choices. In a corpus analysis of Mandarin Chinese, we show that the distribution of speaker choices supports the availability-based production account and not the Uniform Information Density. N18-1181 @@ -2266,7 +2266,7 @@ Spotting Spurious Data with Neural Networks HadiAmiri - TimothyMiller + TimothyMiller GuerganaSavova 2006–2016 Automatic identification of spurious instances (those with potentially wrong labels in datasets) can improve the quality of existing language resources, especially when annotations are obtained through crowdsourcing or automatically generated based on coded rankings. In this paper, we present effective approaches inspired by queueing theory and psychology of learning to automatically identify spurious instances in datasets. Our approaches discriminate instances based on their “difficulty to learn,” determined by a downstream learner. Our methods can be applied to any dataset assuming the existence of a neural network model for the target task of the dataset. Our best approach outperforms competing state-of-the-art baselines and has a MAP of 0.85 and 0.22 in identifying spurious instances in synthetic and carefully-crowdsourced real-world datasets respectively. @@ -2289,7 +2289,7 @@ MariaBarrett Ana ValeriaGonzález-Garduño LeaFrermann - AndersSøgaard + AndersSøgaard 2028–2038 When learning POS taggers and syntactic chunkers for low-resource languages, different resources may be available, and often all we have is a small tag dictionary, motivating type-constrained unsupervised induction. Even small dictionaries can improve the performance of unsupervised induction algorithms. This paper shows that performance can be further improved by including data that is readily available or can be easily obtained for most languages, i.e., eye-tracking, speech, or keystroke logs (or any combination thereof). We project information from all these data sources into shared spaces, in which the union of words is represented. For English unsupervised POS induction, the additional information, which is not required at test time, leads to an average error reduction on Ontonotes domains of 1.5% over systems augmented with state-of-the-art word embeddings. On Penn Treebank the best model achieves 5.4% error reduction over a word embeddings baseline. We also achieve significant improvements for syntactic chunk induction. Our analysis shows that improvements are even bigger when the available tag dictionaries are smaller. N18-1184 @@ -2300,7 +2300,7 @@ Challenging Reading Comprehension on Daily Conversation: Passage Completion on Multiparty Dialog KaixinMa TomaszJurczyk - Jinho D.Choi + Jinho D.Choi 2039–2048 This paper presents a new corpus and a robust deep learning architecture for a task in reading comprehension, passage completion, on multiparty dialog. Given a dialog in text and a passage containing factual descriptions about the dialog where mentions of the characters are replaced by blanks, the task is to fill the blanks with the most appropriate character names that reflect the contexts in the dialog. Since there is no dataset that challenges the task of passage completion in this genre, we create a corpus by selecting transcripts from a TV show that comprise 1,681 dialogs, generating passages for each dialog through crowdsourcing, and annotating mentions of characters in both the dialog and the passages. Given this dataset, we build a deep neural model that integrates rich feature extraction from convolutional neural networks into sequence modeling in recurrent neural networks, optimized by utterance and dialog level attentions. Our model outperforms the previous state-of-the-art model on this task in a different genre using bidirectional LSTM, showing a 13.0+% improvement for longer dialogs. Our analysis shows the effectiveness of the attention mechanisms and suggests a direction to machine comprehension on multiparty dialog. N18-1185 @@ -2321,8 +2321,8 @@ Dialogue Learning with Human Teaching and Feedback in End-to-End Trainable Task-Oriented Dialogue Systems BingLiu - GokhanTür - DilekHakkani-Tür + GokhanTür + DilekHakkani-Tür PararthShah LarryHeck 2060–2069 @@ -2335,7 +2335,7 @@ <fixed-case>LSDSCC</fixed-case>: a Large Scale Domain-Specific Conversational Corpus for Response Generation with Diversity Oriented Evaluation Metrics ZhenXu NanJiang - BingquanLiu + BingquanLiu WengeRong BowenWu BaoxunWang @@ -2361,9 +2361,9 @@ Factors Influencing the Surprising Instability of Word Embeddings - LauraWendlandt + LauraWendlandt Jonathan K.Kummerfeld - RadaMihalcea + RadaMihalcea 2092–2102 Despite the recent popularity of word embedding methods, there is only a small body of work exploring the limitations of these representations. In this paper, we consider one aspect of embedding spaces, namely their stability. We show that even relatively high frequency words (100-200 occurrences) are often unstable. We provide empirical evidence for how various factors contribute to the stability of word embeddings, and we analyze the effects of stability on downstream tasks. N18-1190 @@ -2453,7 +2453,7 @@ Object Counts! Bringing Explicit Detections Back into Image Captioning JosiahWang - Pranava SwaroopMadhyastha + Pranava SwaroopMadhyastha LuciaSpecia 2180–2193 The use of explicit object detectors as an intermediate step to image captioning – which used to constitute an essential stage in early work – is often bypassed in the currently dominant end-to-end approaches, where the language model is conditioned directly on a mid-level image embedding. We argue that explicit detections provide rich semantic information, and can thus be used as an interpretable representation to better understand why end-to-end image captioning systems work well. We provide an in-depth analysis of end-to-end image captioning by exploring a variety of cues that can be derived from such object detections. Our study reveals that end-to-end image captioning systems rely on matching image representations to generate captions, and that encoding the frequency, size and position of objects are complementary and all play a role in forming a good image representation. It also reveals that different object categories contribute in different ways towards image captioning. @@ -2480,7 +2480,7 @@ MaxSmith NoriyukiKojima JiaDeng - RadaMihalcea + RadaMihalcea 2206–2216 We propose a new model for speaker naming in movies that leverages visual, textual, and acoustic modalities in an unified optimization framework. To evaluate the performance of our model, we introduce a new dataset consisting of six episodes of the Big Bang Theory TV show and eighteen full movies covering different genres. Our experiments show that our multimodal model significantly outperforms several competitive baselines on the average weighted F-score metric. To demonstrate the effectiveness of our framework, we design an end-to-end memory network model that leverages our speaker naming model and achieves state-of-the-art results on the subtitles task of the MovieQA 2017 Challenge. N18-1200 @@ -2490,7 +2490,7 @@ Stacking with Auxiliary Features for Visual Question Answering Nazneen FatemaRajani - RaymondMooney + RaymondMooney 2217–2226 Visual Question Answering (VQA) is a well-known and challenging task that requires systems to jointly reason about natural language and vision. Deep learning models in various forms have been the standard for solving VQA. However, some of these VQA models are better at certain types of image-question pairs than other models. Ensembling VQA models intelligently to leverage their diverse expertise is, therefore, advantageous. Stacking With Auxiliary Features (SWAF) is an intelligent ensembling technique which learns to combine the results of multiple models using features of the current problem as context. We propose four categories of auxiliary features for ensembling for VQA. Three out of the four categories of features can be inferred from an image-question pair and do not require querying the component models. The fourth category of auxiliary features uses model-specific explanations. In this paper, we describe how we use these various categories of auxiliary features to improve performance for VQA. Using SWAF to effectively ensemble three recent systems, we obtain a new state-of-the-art. Our work also highlights the advantages of explainable AI models. N18-1201 @@ -2499,13 +2499,13 @@ Deep Contextualized Word Representations - Matthew E.Peters + Matthew E.Peters MarkNeumann MohitIyyer MattGardner ChristopherClark KentonLee - LukeZettlemoyer + LukeZettlemoyer 2227–2237 We introduce a new type of deep contextualized word representation that models both (1) complex characteristics of word use (e.g., syntax and semantics), and (2) how these uses vary across linguistic contexts (i.e., to model polysemy). Our word vectors are learned functions of the internal states of a deep bidirectional language model (biLM), which is pre-trained on a large text corpus. We show that these representations can be easily added to existing models and significantly improve the state of the art across six challenging NLP problems, including question answering, textual entailment and sentiment analysis. We also present an analysis showing that exposing the deep internals of the pre-trained network is crucial, allowing downstream models to mix different types of semi-supervision signals. N18-1202.Notes.pdf @@ -2531,7 +2531,7 @@ Neural Text Generation in Stories Using Entity Representations as Context ElizabethClark YangfengJi - Noah A.Smith + Noah A.Smith 2250–2260 We introduce an approach to neural text generation that explicitly represents entities mentioned in the text. Entity representations are vectors that are updated as the text proceeds; they are designed specifically for narrative text like fiction or news stories. Our experiments demonstrate that modeling entities offers a benefit in two automatic evaluations: mention generation (in which a model chooses which entity to mention next and which words to use in the mention) and selection between a correct next sentence and a distractor from later in the same story. We also conduct a human evaluation on automatically generated text in story contexts; this study supports our emphasis on entities and suggests directions for further research. N18-1204 @@ -2558,9 +2558,9 @@ Proceedings of the 2018 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, Volume 2 (Short Papers) N18-2 - MarilynWalker + MarilynWalker HengJi - AmandaStent + AmandaStent 10.18653/v1/N18-2 Association for Computational Linguistics
New Orleans, Louisiana
@@ -2611,10 +2611,10 @@ Integrating Stance Detection and Fact Checking in a Unified Corpus RamyBaly MitraMohtarami - JamesGlass - LluísMàrquez + JamesGlass + LluísMàrquez AlessandroMoschitti - PreslavNakov + PreslavNakov 21–27 A reasonable approach for fact checking a claim involves retrieving potentially relevant documents from different sources (e.g., news websites, social media, etc.), determining the stance of each document with respect to the claim, and finally making a prediction about the claim’s factuality by aggregating the strength of the stances, while taking the reliability of the source into account. Moreover, a fact checking system should be able to explain its decision by providing relevant extracts (rationales) from the documents. Yet, this setup is not directly supported by existing datasets, which treat fact checking, document retrieval, source credibility, stance detection and rationale extraction as independent tasks. In this paper, we support the interdependencies between these tasks as annotations in the same corpus. We implement this setup on an Arabic fact checking corpus, the first of its kind. N18-2004 @@ -2650,7 +2650,7 @@ BhuwanDhingra QiaoJin ZhilinYang - WilliamCohen + WilliamCohen RuslanSalakhutdinov 42–48 Many problems in NLP require aggregating information from multiple mentions of the same entity which may be far apart in the text. Existing Recurrent Neural Network (RNN) layers are biased towards short-term dependencies and hence not suited to such tasks. We present a recurrent layer which is instead biased towards coreferent dependencies. The layer uses coreference annotations extracted from an external system to connect entity mentions belonging to the same cluster. Incorporating this layer into a state-of-the-art reading comprehension model improves performance on three datasets – Wikihop, LAMBADA and the bAbi AI tasks – with large gains when training data is scarce. @@ -2661,7 +2661,7 @@ Automatic Dialogue Generation with Expressed Emotions ChenyangHuang - OsmarZaïane + OsmarZaïane AmineTrabelsi NouhaDziri 49–54 @@ -2718,7 +2718,7 @@ Sentence Simplification with Memory-Augmented Neural Networks - TuVu + TuVu BaotianHu TsendsurenMunkhdalai HongYu @@ -2731,7 +2731,7 @@ A Corpus of Non-Native Written <fixed-case>E</fixed-case>nglish Annotated for Metaphor BeataBeigman Klebanov - Chee Wee (Ben)Leong + Chee Wee (Ben)Leong MichaelFlor 86–91 We present a corpus of 240 argumentative essays written by non-native speakers of English annotated for metaphor. The corpus is made publicly available. We provide benchmark performance of state-of-the-art systems on this new corpus, and explore the relationship between writing proficiency and metaphor use. @@ -2770,8 +2770,8 @@ SwabhaSwayamdipta OmerLevy RoySchwartz - SamuelBowman - Noah A.Smith + SamuelBowman + Noah A.Smith 107–112 Large-scale datasets for natural language inference are created by presenting crowd workers with a sentence (premise), and asking them to generate three new sentences (hypotheses) that it entails, contradicts, or is logically neutral with respect to. We show that, in a significant portion of such data, this protocol leaves clues that make it possible to identify the label by looking only at the hypothesis, without observing the premise. Specifically, we show that a simple text categorization model can correctly classify the hypothesis alone in about 67% of SNLI (Bowman et. al, 2015) and 53% of MultiNLI (Williams et. al, 2017). Our analysis reveals that specific linguistic phenomena such as negation and vagueness are highly correlated with certain inference classes. Our findings suggest that the success of natural language inference models to date has been overestimated, and that the task remains a hard open problem. N18-2017 @@ -2781,7 +2781,7 @@ Humor Recognition Using Deep Learning Peng-YuChen - Von-WunSoo + Von-WunSoo 113–117 Humor is an essential but most fascinating element in personal communication. How to build computational models to discover the structures of humor, recognize humor and even generate humor remains a challenge and there have been yet few attempts on it. In this paper, we construct and collect four datasets with distinct joke types in both English and Chinese and conduct learning experiments on humor recognition. We implement a Convolutional Neural Network (CNN) with extensive filter size, number and Highway Networks to increase the depth of networks. Results show that our model outperforms in recognition of different types of humor with benchmarks collected in both English and Chinese languages on accuracy, precision, and recall in comparison to previous works. N18-2018 @@ -2848,7 +2848,7 @@ Analogies in Complex Verb Meaning Shifts: the Effect of Affect in Semantic Similarity Models MaximilianKöper - SabineSchulte im Walde + SabineSchulte im Walde 150–156 We present a computational model to detect and distinguish analogies in meaning shifts between German base and complex verbs. In contrast to corpus-based studies, a novel dataset demonstrates that “regular” shifts represent the smallest class. Classification experiments relying on a standard similarity model successfully distinguish between four types of shifts, with verb classes boosting the performance, and affective features for abstractness, emotion and sentiment representing the most salient indicators. N18-2024 @@ -2879,7 +2879,7 @@ Diachronic Usage Relatedness (<fixed-case>DUR</fixed-case>el): A Framework for the Annotation of Lexical Semantic Change DominikSchlechtweg - SabineSchulte im Walde + SabineSchulte im Walde StefanieEckmann 169–174 We propose a framework that extends synchronic polysemy annotation to diachronic changes in lexical meaning, to counteract the lack of resources for evaluating computational models of lexical semantic change. Our framework exploits an intuitive notion of semantic relatedness, and distinguishes between innovative and reductive meaning changes with high inter-annotator agreement. The resulting test set for German comprises ratings from five annotators for the relatedness of 1,320 use pairs across 22 target words. @@ -2918,7 +2918,7 @@ FabienneBraune ViktorHangya TobiasEder - AlexanderFraser + AlexanderFraser 188–193 Bilingual word embeddings are useful for bilingual lexicon induction, the task of mining translations of given words. Many studies have shown that bilingual word embeddings perform well for bilingual lexicon induction but they focused on frequent words in general domains. For many applications, bilingual lexicon induction of rare and domain-specific words is of critical importance. Therefore, we design a new task to evaluate bilingual word embeddings on rare words in different domains. We show that state-of-the-art approaches fail on this task and present simple new techniques to improve bilingual word embeddings for mining rare words. We release new gold standard datasets and code to stimulate research on this task. N18-2030 @@ -2938,7 +2938,7 @@ Introducing Two <fixed-case>V</fixed-case>ietnamese Datasets for Evaluating Semantic Models of (Dis-)Similarity and Relatedness Kim AnhNguyen - SabineSchulte im Walde + SabineSchulte im Walde Ngoc ThangVu 199–205 We present two novel datasets for the low-resource language Vietnamese to assess models of semantic similarity: ViCon comprises pairs of synonyms and antonyms across word classes, thus offering data to distinguish between similarity and dissimilarity. ViSim-400 provides degrees of similarity across five semantic relations, as rated by human judges. The two datasets are verified through standard co-occurrence and neural network models, showing results comparable to the respective English datasets. @@ -2949,7 +2949,7 @@ Lexical Substitution for Evaluating Compositional Distributional Models MajaBuljan - SebastianPadó + SebastianPadó JanŠnajder 206–211 Compositional Distributional Semantic Models (CDSMs) model the meaning of phrases and sentences in vector space. They have been predominantly evaluated on limited, artificial tasks such as semantic sentence similarity on hand-constructed datasets. This paper argues for lexical substitution (LexSub) as a means to evaluate CDSMs. LexSub is a more natural task, enables us to evaluate meaning composition at the level of individual words, and provides a common ground to compare CDSMs with dedicated LexSub models. We create a LexSub dataset for CDSM evaluation from a corpus with manual “all-words” LexSub annotation. Our experiments indicate that the Practical Lexical Function CDSM outperforms simple component-wise CDSMs and performs on par with the context2vec LexSub model using the same context. @@ -3006,7 +3006,7 @@ Sluice Resolution without Hand-Crafted Features over Brittle Syntax Trees OlaRønning DanielHardt - AndersSøgaard + AndersSøgaard 236–241 Sluice resolution in English is the problem of finding antecedents of wh-fronted ellipses. Previous work has relied on hand-crafted features over syntax trees that scale poorly to other languages and domains; in particular, to dialogue, which is one of the most interesting applications of sluice resolution. Syntactic information is arguably important for sluice resolution, but we show that multi-task learning with partial parsing as auxiliary tasks effectively closes the gap and buys us an additional 9% error reduction over previous work. Since we are not directly relying on features from partial parsers, our system is more robust to domain shifts, giving a 26% error reduction on embedded sluices in dialogue. N18-2038.Datasets.zip @@ -3049,8 +3049,8 @@ Letting Emotions Flow: Success Prediction by Modeling the Flow of Emotions in Books SurajMaharjan SudiptaKar - ManuelMontes - Fabio A.González + ManuelMontes + Fabio A.González ThamarSolorio 259–265 Books have the power to make us feel happiness, sadness, pain, surprise, or sorrow. An author’s dexterity in the use of these emotions captivates readers and makes it difficult for them to put the book down. In this paper, we model the flow of emotions over a book using recurrent neural networks and quantify its usefulness in predicting success in books. We obtained the best weighted F1-score of 69% for predicting books’ success in a multitask setting (simultaneously predicting success and genre of books). @@ -3077,7 +3077,7 @@ ShwetaYadav AsifEkbal SriparnaSaha - PushpakBhattacharyya + PushpakBhattacharyya AmitSheth 271–277 In recent past, social media has emerged as an active platform in the context of healthcare and medicine. In this paper, we present a study where medical user’s opinions on health-related issues are analyzed to capture the medical sentiment at a blog level. The medical sentiments can be studied in various facets such as medical condition, treatment, and medication that characterize the overall health status of the user. Considering these facets, we treat analysis of this information as a multi-task classification problem. In this paper, we adopt a novel adversarial learning approach for our multi-task learning framework to learn the sentiment’s strengths expressed in a medical blog. Our evaluation shows promising results for our target tasks. @@ -3088,8 +3088,8 @@ Recurrent Entity Networks with Delayed Memory Update for Targeted Aspect-Based Sentiment Analysis FeiLiu - TrevorCohn - TimothyBaldwin + TrevorCohn + TimothyBaldwin 278–283 While neural networks have been shown to achieve impressive results for sentence-level sentiment analysis, targeted aspect-based sentiment analysis (TABSA) — extraction of fine-grained opinion polarity w.r.t. a pre-defined set of aspects — remains a difficult task. Motivated by recent advances in memory-augmented models for machine reading, we propose a novel architecture, utilising external “memory chains” with a delayed memory update mechanism to track entities. On a TABSA task, the proposed model demonstrates substantial improvements over state-of-the-art approaches, including those using external knowledge bases. N18-2045 @@ -3159,7 +3159,7 @@ VassilisPlachouras FabioPetroni TimothyNugent - Jochen L.Leidner + Jochen L.Leidner 315–320 Taxonomies are often used to look up the concepts they contain in text documents (for instance, to classify a document). The more comprehensive the taxonomy, the higher recall the application has that uses the taxonomy. In this paper, we explore automatic taxonomy augmentation with paraphrases. We compare two state-of-the-art paraphrase models based on Moses, a statistical Machine Translation system, and a sequence-to-sequence neural network, trained on a paraphrase datasets with respect to their abilities to add novel nodes to an existing taxonomy from the risk domain. We conduct component-based and task-based evaluations. Our results show that paraphrasing is a viable method to enrich a taxonomy with more terms, and that Moses consistently outperforms the sequence-to-sequence neural model. To the best of our knowledge, this is the first approach to augment taxonomies with paraphrases. N18-2051 @@ -3169,7 +3169,7 @@ A Laypeople Study on Terminology Identification across Domains and Task Definitions AnnaHätty - SabineSchulte im Walde + SabineSchulte im Walde 321–326 This paper introduces a new dataset of term annotation. Given that even experts vary significantly in their understanding of termhood, and that term identification is mostly performed as a binary task, we offer a novel perspective to explore the common, natural understanding of what constitutes a term: Laypeople annotate single-word and multi-word terms, across four domains and across four task definitions. Analyses based on inter-annotator agreement offer insights into differences in term specificity, term granularity and subtermhood. N18-2052 @@ -3190,8 +3190,8 @@ Cross-language Article Linking Using Cross-Encyclopedia Entity Embedding - Chun-KaiWu - Richard Tzong-HanTsai + Chun-KaiWu + Richard Tzong-HanTsai 334–339 Cross-language article linking (CLAL) is the task of finding corresponding article pairs of different languages across encyclopedias. This task is a difficult disambiguation problem in which one article must be selected among several candidate articles with similar titles and contents. Existing works focus on engineering text-based or link-based features for this task, which is a time-consuming job, and some of these features are only applicable within the same encyclopedia. In this paper, we address these problems by proposing cross-encyclopedia entity embedding. Unlike other works, our proposed method does not rely on known cross-language pairs. We apply our method to CLAL between English Wikipedia and Chinese Baidu Baike. Our features improve performance relative to the baseline by 29.62%. Tested 30 times, our system achieved an average improvement of 2.76% over the current best system (26.86% over baseline), a statistically significant result. N18-2054 @@ -3200,7 +3200,7 @@ Identifying the Most Dominant Event in a News Article by Mining Event Coreference Relations - Prafulla KumarChoubey + Prafulla KumarChoubey KaushikRaju RuihongHuang 340–345 @@ -3235,7 +3235,7 @@ Semi-Supervised Event Extraction with Paraphrase Clusters JamesFerguson ColinLockard - DanielWeld + DanielWeld HannanehHajishirzi 359–364 Supervised event extraction systems are limited in their accuracy due to the lack of available training data. We present a method for self-training event extraction systems by bootstrapping additional training data. This is done by taking advantage of the occurrence of multiple mentions of the same event instances across newswire articles from multiple sources. If our system can make a high-confidence extraction of some mentions in such a cluster, it can then acquire diverse training examples by adding the other mentions as well. Our experiments show significant performance improvements on multiple event extractors over ACE 2005 and TAC-KBP 2015 datasets. @@ -3260,9 +3260,9 @@ Syntactic Patterns Improve Information Extraction for Medical Search RomaPatel YinfeiYang - IainMarshall + IainMarshall AniNenkova - ByronWallace + ByronWallace 371–377 Medical professionals search the published literature by specifying the type of patients, the medical intervention(s) and the outcome measure(s) of interest. In this paper we demonstrate how features encoding syntactic patterns improve the performance of state-of-the-art sequence tagging models (both neural and linear) for information extraction of these medically relevant categories. We present an analysis of the type of patterns exploited and of the semantic space induced for these, i.e., the distributed representations learned for identified multi-token patterns. We show that these learned representations differ substantially from those of the constituent unigrams, suggesting that the patterns capture contextual information that is otherwise lost. N18-2060 @@ -3271,7 +3271,7 @@ Syntactically Aware Neural Architectures for Definition Extraction - LuisEspinosa-Anke + LuisEspinosa-Anke StevenSchockaert 378–385 Automatically identifying definitional knowledge in text corpora (Definition Extraction or DE) is an important task with direct applications in, among others, Automatic Glossary Generation, Taxonomy Learning, Question Answering and Semantic Search. It is generally cast as a binary classification problem between definitional and non-definitional sentences. In this paper we present a set of neural architectures combining Convolutional and Recurrent Neural Networks, which are further enriched by incorporating linguistic information via syntactic dependencies. Our experimental results in the task of sentence classification, on two benchmarking DE datasets (one generic, one domain-specific), show that these models obtain consistent state of the art results. Furthermore, we demonstrate that models trained on clean Wikipedia-like definitions can successfully be applied to more noisy domain-specific corpora. @@ -3362,7 +3362,7 @@ Defoiling Foiled Image Captions - Pranava SwaroopMadhyastha + Pranava SwaroopMadhyastha JosiahWang LuciaSpecia 433–438 @@ -3375,7 +3375,7 @@ Pragmatically Informative Image Captioning with Character-Level Inference ReubenCohn-Gordon - NoahGoodman + NoahGoodman ChristopherPotts 439–443 We combine a neural image captioner with a Rational Speech Acts (RSA) model to make a system that is pragmatically informative: its objective is to produce captions that are not merely true but also distinguish their inputs from similar images. Previous attempts to combine RSA with neural image captioning require an inference which normalizes over the entire set of possible utterances. This poses a serious problem of efficiency, previously solved by sampling a small subset of possible utterances. We instead solve this problem by implementing a version of RSA which operates at the level of characters (“a”, “b”, “c”, ...) during the unrolling of the caption. We find that the utterance-level effect of referential captions can be obtained with only character-level decisions. Finally, we introduce an automatic method for testing the performance of pragmatic speaker models, and show that our model outperforms a non-pragmatic baseline as well as a word-level RSA captioner. @@ -3444,8 +3444,8 @@ What’s in a Domain? Learning Domain-Robust Text Representations using Adversarial Training YitongLi - TimothyBaldwin - TrevorCohn + TimothyBaldwin + TrevorCohn 474–479 Most real world language problems require learning from heterogenous corpora, raising the problem of learning robust models which generalise well to both similar (in domain) and dissimilar (out of domain) instances to those seen in @@ -3491,7 +3491,7 @@ FahimDalvi NadirDurrani HassanSajjad - StephanVogel + StephanVogel 493–499 We address the problem of simultaneous translation by modifying the Neural MT decoder to operate with dynamically built encoder and attention. We propose a tunable agent which decides the best segmentation strategy for a user-defined BLEU loss and Average Proportion (AP) constraint. Our agent outperforms previously proposed Wait-if-diff and Wait-if-worse agents (Cho and Esipova, 2016) on BLEU with a lower latency. Secondly we proposed data-driven changes to Neural MT training to better match the incremental decoding framework. N18-2079.Notes.pdf @@ -3511,7 +3511,7 @@ Neural Machine Translation Decoding with Terminology Constraints EvaHasler - Adriàde Gispert + Adriàde Gispert GonzaloIglesias BillByrne 506–512 @@ -3524,7 +3524,7 @@ On the Evaluation of Semantic Phenomena in Neural Machine Translation Using Natural Language Inference AdamPoliak YonatanBelinkov - JamesGlass + JamesGlass BenjaminVan Durme 513–523 We propose a process for investigating the extent to which sentence representations arising from neural machine translation (NMT) systems encode distinct semantic phenomena. We use these representations as features to train a natural language inference (NLI) classifier based on datasets recast from existing semantic annotations. In applying this process to a representative NMT system, we find its encoder appears most suited to supporting inferences at the syntax-semantics interface, as compared to anaphora resolution requiring world knowledge. We conclude with a discussion on the merits and potential deficiencies of the existing process, and how it may be improved and extended as a broader framework for evaluating semantic coverage @@ -3560,8 +3560,8 @@ Are All Languages Equally Hard to Language-Model? RyanCotterell - Sabrina J.Mielke - JasonEisner + Sabrina J.Mielke + JasonEisner BrianRoark 536–541 For general modeling methods applied to diverse languages, a natural question is: how well should we expect our models to work on languages with differing typological profiles? In this work, we develop an evaluation framework for fair cross-linguistic comparison of language models, using translated text so that all models are asked to predict approximately the same information. We then conduct a study on 21 languages, demonstrating that in some languages, the textual expression of the information is harder to predict with both n-gram and LSTM language models. We show complex inflectional morphology to be a cause of performance differences among languages. @@ -3586,8 +3586,8 @@ Unsupervised Disambiguation of Syncretism in Inflected Lexicons RyanCotterell ChristoKirov - Sabrina J.Mielke - JasonEisner + Sabrina J.Mielke + JasonEisner 548–553 Lexical ambiguity makes it difficult to compute useful statistics of a corpus. A given word form might represent any of several morphological feature bundles. One can, however, use unsupervised learning (as in EM) to fit a model that probabilistically disambiguates word forms. We present such an approach, which employs a neural network to smoothly model a prior distribution over feature bundles (even rare ones). Although this basic model does not consider a token’s context, that very property allows it to operate on a simple list of unigram type counts, partitioning each count among different analyses of that unigram. We discuss evaluation metrics for this novel task and report results on 5 languages. N18-2087 @@ -3613,7 +3613,7 @@ GabrielStanovsky LuhengHe IdoDagan - LukeZettlemoyer + LukeZettlemoyer 560–568 We introduce Question-Answer Meaning Representations (QAMRs), which represent the predicate-argument structure of a sentence as a set of question-answer pairs. We develop a crowdsourcing scheme to show that QAMRs can be labeled with very little training, and gather a dataset with over 5,000 sentences and 100,000 questions. A qualitative analysis demonstrates that the crowd-generated question-answer pairs cover the vast majority of predicate-argument relationships in existing datasets (including PropBank, NomBank, and QA-SRL) along with many previously under-resourced ones, including implicit arguments and relations. We also report baseline models for question generation and answering, and summarize a recent approach for using QAMR labels to improve an Open IE system. These results suggest the freely available QAMR data and annotation scheme should support significant future work. N18-2089 @@ -3660,7 +3660,7 @@ ZifanLi ZilinZhang RuiZhang - DragomirRadev + DragomirRadev 588–594 Interacting with relational databases through natural language helps users with any background easily query and analyze a vast amount of data. This requires a system that understands users’ questions and converts them to SQL queries automatically. In this paper, we present a novel approach TypeSQL which formats the problem as a slot filling task in a more reasonable way. In addition, TypeSQL utilizes type information to better understand rare entities and numbers in the questions. We experiment this idea on the WikiSQL dataset and outperform the prior art by 6% in much shorter time. We also show that accessing the content of databases can significantly improve the performance when users’ queries are not well-formed. TypeSQL can reach 82.6% accuracy, a 17.5% absolute improvement compared to the previous content-sensitive model. N18-2093 @@ -3671,7 +3671,7 @@ Community Member Retrieval on Social Media Using Textual Information AaronJaech ShobhitHathi - MariOstendorf + MariOstendorf 595–601 This paper addresses the problem of community membership detection using only text features in a scenario where a small number of positive labeled examples defines the community. The solution introduces an unsupervised proxy task for learning user embeddings: user re-identification. Experiments with 16 different communities show that the resulting embeddings are more effective for community membership identification than common unsupervised representations. N18-2094 @@ -3684,7 +3684,7 @@ YinfeiYang JunZhou XiaolongLi - Forrest ShengBao + Forrest ShengBao 602–607 With the growing amount of reviews in e-commerce websites, it is critical to assess the helpfulness of reviews and recommend them accordingly to consumers. Recent studies on review helpfulness require plenty of labeled samples for each domain/category of interests. However, such an approach based on close-world assumption is not always practical, especially for domains with limited reviews or the “out-of-vocabulary” problem. Therefore, we propose a convolutional neural network (CNN) based model which leverages both word-level and character-based representations. To transfer knowledge between domains, we further extend our model to jointly model different domains with auxiliary domain discriminators. On the Amazon product review dataset, our approach significantly outperforms the state of the art in terms of both accuracy and cross-domain robustness. N18-2095 @@ -3707,7 +3707,7 @@ ArmanCohan FranckDernoncourt Doo SoonKim - TrungBui + TrungBui SeokhwanKim WalterChang NazliGoharian @@ -3723,7 +3723,7 @@ AnirbanLaha KarthikSankaranarayanan PrekshaNema - Mitesh M.Khapra + Mitesh M.Khapra ShreyasShetty 622–627 Structured data summarization involves generation of natural language summaries from structured input data. In this work, we consider summarizing structured data occurring in the form of tables as they are prevalent across a wide variety of domains. We formulate the standard table summarization problem, which deals with tables conforming to a single predefined schema. To this end, we propose a mixed hierarchical attention based encoder-decoder model which is able to leverage the structure in addition to the content of the tables. Our experiments on the publicly available weathergov dataset show around 18 BLEU (around 30%) improvement over the current state-of-the-art. @@ -3736,7 +3736,7 @@ YouxuanJiang CatherineFinegan-Dollak Jonathan K.Kummerfeld - WalterLasecki + WalterLasecki 628–633 Most summarization research focuses on summarizing the entire given text, but in practice readers are often interested in only one aspect of the document or conversation. We propose targeted summarization as an umbrella category for summarization tasks that intentionally consider only parts of the input data. This covers query-based summarization, update summarization, and a new task we propose where the goal is to summarize a particular aspect of a document. However, collecting data for this new task is hard because directly asking annotators (e.g., crowd workers) to write summaries leads to data with low accuracy when there are a large number of facts to include. We introduce a novel crowdsourcing workflow, Pin-Refine, that allows us to collect high-quality summaries for our task, a necessary step for the development of automatic systems. N18-2099.Datasets.zip @@ -3748,7 +3748,7 @@ <fixed-case>K</fixed-case>ey2<fixed-case>V</fixed-case>ec: Automatic Ranked Keyphrase Extraction from Scientific Articles using Phrase Embeddings DebanjanMahata JohnKuriakose - Rajiv RatnShah + Rajiv RatnShah RogerZimmermann 634–639 Keyphrase extraction is a fundamental task in natural language processing that facilitates mapping of documents to a set of representative phrases. In this paper, we present an unsupervised technique (Key2Vec) that leverages phrase embeddings for ranking keyphrases extracted from scientific articles. Specifically, we propose an effective way of processing text documents for training multi-word phrase embeddings that are used for thematic representation of scientific articles and ranking of keyphrases extracted from them using theme-weighted PageRank. Evaluations are performed on benchmark datasets producing state-of-the-art results. @@ -3763,7 +3763,7 @@ HadyElsahar PavlosVougiouklis ChristopheGravier - FrédériqueLaforest + FrédériqueLaforest JonathonHare ElenaSimperl 640–645 @@ -3841,7 +3841,7 @@ Higher-Order Coreference Resolution with Coarse-to-Fine Inference KentonLee LuhengHe - LukeZettlemoyer + LukeZettlemoyer 687–692 We introduce a fully-differentiable approximation to higher-order inference for coreference resolution. Our approach uses the antecedent distribution from a span-ranking architecture as an attention mechanism to iteratively refine span representations. This enables the model to softly consider multiple hops in the predicted clusters. To alleviate the computational cost of this iterative process, we introduce a coarse-to-fine approach that incorporates a less accurate but more efficient bilinear factor, enabling more aggressive pruning without hurting accuracy. Compared to the existing state-of-the-art span-ranking approach, our model significantly improves accuracy on the English OntoNotes benchmark, while being far more computationally efficient. N18-2108 @@ -3887,9 +3887,9 @@ PawełBudzianowski Pei-HaoSu StefanUltes - Lina M.Rojas-Barahona + Lina M.Rojas-Barahona Bo-HsiangTseng - MilicaGašić + MilicaGašić 714–719 Reinforcement learning (RL) is a promising approach to solve dialogue policy optimisation. Traditional RL algorithms, however, fail to scale to large domains due to the curse of dimensionality. We propose a novel Dialogue Management architecture, based on Feudal RL, which decomposes the decision into two steps; a first step where a master policy selects a subset of primitive actions, and a second step where a primitive action is chosen from the selected subset. The structural information included in the domain ontology is used to abstract the dialogue state space, taking the decisions at each step using different parts of the abstracted state. This, combined with an information sharing mechanism between slots, increases the scalability to large domains. We show that an implementation of this approach, based on Deep-Q Networks, significantly outperforms previous state of the art in several dialogue domains and environments, without the need of any additional reward signal. N18-2112 @@ -3899,7 +3899,7 @@ Evaluating Historical Text Normalization Systems: How Well Do They Generalize? AlexanderRobertson - SharonGoldwater + SharonGoldwater 720–725 We highlight several issues in the evaluation of historical text normalization systems that make it hard to tell how well these systems would actually work in practice—i.e., for new datasets or languages; in comparison to more naïve systems; or as a preprocessing step for downstream NLP tools. We illustrate these issues and exemplify our proposed evaluation practices by comparing two neural models against a naïve baseline system. We show that the neural models generalize well to unseen words in tests on five languages; nevertheless, they provide no clear benefit over the naïve baseline for downstream POS tagging of an English historical collection. We conclude that future work should include more rigorous evaluation, including both intrinsic and extrinsic measures where possible. N18-2113 @@ -3922,7 +3922,7 @@ Po-SenHuang ChenglongWang RishabhSingh - Wen-tauYih + Wen-tauYih XiaodongHe 732–738 In conventional supervised training, a model is trained to fit all the training examples. However, having a monolithic model may not always be the best strategy, as examples could vary widely. In this work, we explore a different learning protocol that treats each example as a unique pseudo-task, by reducing the original learning problem to a few-shot meta-learning scenario with the help of a domain-dependent relevance function. When evaluated on the WikiSQL dataset, our approach leads to faster convergence and achieves 1.1%–5.4% absolute accuracy gains over the non-meta-learning counterparts. @@ -3945,7 +3945,7 @@ Role-specific Language Models for Processing Recorded Neuropsychological Exams TukaAl Hanai RhodaAu - JamesGlass + JamesGlass 746–752 Neuropsychological examinations are an important screening tool for the presence of cognitive conditions (e.g. Alzheimer’s, Parkinson’s Disease), and require a trained tester to conduct the exam through spoken interactions with the subject. While audio is relatively easy to record, it remains a challenge to automatically diarize (who spoke when?), decode (what did they say?), and assess a subject’s cognitive health. This paper demonstrates a method to determine the cognitive health (impaired or not) of 92 subjects, from audio that was diarized using an automatic speech recognition system trained on TED talks and on the structured language used by testers and subjects. Using leave-one-out cross validation and logistic regression modeling we show that even with noisily decoded data (81% WER) we can still perform accurate enough diarization (0.02% confusion rate) to determine the cognitive state of a subject (0.76 AUC). N18-2117 @@ -3979,7 +3979,7 @@ Learning to Color from Language - VarunManjunatha + VarunManjunatha MohitIyyer JordanBoyd-Graber LarryDavis @@ -4050,8 +4050,8 @@ Proceedings of the 2018 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, Volume 3 (Industry Papers) N18-3 - SrinivasBangalore - JenniferChu-Carroll + SrinivasBangalore + JenniferChu-Carroll YunyaoLi 10.18653/v1/N18-3 Association for Computational Linguistics @@ -4068,7 +4068,7 @@ Scalable Wide and Deep Learning for Computer Assisted Coding MarilisaAmoia FrankDiehl - JesusGimenez + JesusGimenez JoelPinto RaphaelSchumann FabianStemmer @@ -4084,8 +4084,8 @@ Neural Network based Extreme Classification and Similarity Models for Product Matching KashifShah - SelcukKopru - Jean-DavidRuvini + SelcukKopru + Jean-DavidRuvini 8–15 Matching a seller listed item to an appropriate product has become a fundamental and one of the most significant step for e-commerce platforms for product based experience. It has a huge impact on making the search effective, search engine optimization, providing product reviews and product price estimation etc. along with many other advantages for a better user experience. As significant and vital it has become, the challenge to tackle the complexity has become huge with the exponential growth of individual and business sellers trading millions of products everyday. We explored two approaches; classification based on shallow neural network and similarity based on deep siamese network. These models outperform the baseline by more than 5% in term of accuracy and are capable of extremely efficient training and inference. N18-3002 @@ -4098,7 +4098,7 @@ Young-BumKim DongchanKim Joo-KyungKim - RuhiSarikaya + RuhiSarikaya 16–24 Intelligent personal digital assistants (IPDAs), a popular real-life application with spoken language understanding capabilities, can cover potentially thousands of overlapping domains for natural language understanding, and the task of finding the best domain to handle an utterance becomes a challenging problem on a large scale. In this paper, we propose a set of efficient and scalable shortlisting-reranking neural models for effective large-scale domain classification for IPDAs. The shortlisting stage focuses on efficiently trimming all domains down to a list of k-best candidate domains, and the reranking stage performs a list-wise reranking of the initial k-best domains with additional contextual information. We show the effectiveness of our approach with extensive experiments on 1,500 IPDA domains. N18-3003 @@ -4135,9 +4135,9 @@ Bootstrapping a Neural Conversational Agent with Dialogue Self-Play, Crowdsourcing and On-Line Reinforcement Learning PararthShah - DilekHakkani-Tür + DilekHakkani-Tür BingLiu - GokhanTür + GokhanTür 41–51 End-to-end neural models show great promise towards building conversational agents that are trained from data and on-line experience using supervised and reinforcement learning. However, these models require a large corpus of dialogues to learn effectively. For goal-oriented dialogues, such datasets are expensive to collect and annotate, since each task involves a separate schema and database of entities. Further, the Wizard-of-Oz approach commonly used for dialogue collection does not provide sufficient coverage of salient dialogue flows, which is critical for guaranteeing an acceptable task completion rate in consumer-facing conversational agents. In this paper, we study a recently proposed approach for building an agent for arbitrary tasks by combining dialogue self-play and crowd-sourcing to generate fully-annotated dialogues with diverse and natural utterances. We discuss the advantages of this approach for industry applications of conversational agents, wherein an agent can be rapidly bootstrapped to deploy in front of users and further optimized via interactive learning from actual users of the system. N18-3006 @@ -4148,7 +4148,7 @@ Quality Estimation for Automatically Generated Titles of e<fixed-case>C</fixed-case>ommerce Browse Pages NicolaUeffing - José G.C. de Souza + José G.C. de Souza GregorLeusch 52–59 At eBay, we are automatically generating a large amount of natural language titles for eCommerce browse pages using machine translation (MT) technology. While automatic approaches can generate millions of titles very fast, they are prone to errors. We therefore develop quality estimation (QE) methods which can automatically detect titles with low quality in order to prevent them from going live. In this paper, we present different approaches: The first one is a Random Forest (RF) model that explores hand-crafted, robust features, which are a mix of established features commonly used in Machine Translation Quality Estimation (MTQE) and new features developed specifically for our task. The second model is based on Siamese Networks (SNs) which embed the metadata input sequence and the generated title in the same space and do not require hand-crafted features at all. We thoroughly evaluate and compare those approaches on in-house data. While the RF models are competitive for scenarios with smaller amounts of training data and somewhat more robust, they are clearly outperformed by the SN models when the amount of training data is larger. @@ -4158,7 +4158,7 @@ Atypical Inputs in Educational Applications - Su-YounYoon + Su-YounYoon AoifeCahill AnastassiaLoukina KlausZechner @@ -4188,7 +4188,7 @@ LauraChiticariu MarinaDanilevsky YunyaoLi - FrederickReiss + FrederickReiss HuaiyuZhu 76–83 The rise of enterprise applications over unstructured and semi-structured documents poses new challenges to text understanding systems across multiple dimensions. We present SystemT, a declarative text understanding system that addresses these challenges and has been deployed in a wide range of enterprise applications. We highlight the design considerations and decisions behind SystemT in addressing the needs of the enterprise setting. We also summarize the impact of SystemT on business and education. @@ -4213,12 +4213,12 @@ KyleLo TylerMurray Hsu-HanOoi - MatthewPeters + MatthewPeters JoannaPower SamSkjonsberg - Lucy LuWang + Lucy LuWang ChrisWilhelm - ZhengYuan + ZhengYuan Madeleinevan Zuylen OrenEtzioni 84–91 @@ -4244,7 +4244,7 @@ Accelerating <fixed-case>NMT</fixed-case> Batched Beam Decoding with <fixed-case>LMBR</fixed-case> Posteriors for Deployment GonzaloIglesias WilliamTambellini - AdriàDe Gispert + AdriàDe Gispert EvaHasler BillByrne 106–113 @@ -4267,7 +4267,7 @@ From dictations to clinical reports using machine translation - GregoryFinley + GregoryFinley WaelSalloum NajmehSadoughi ErikEdwards @@ -4275,7 +4275,7 @@ NicoAxtmann MichaelBrenndoerfer MarkMiller - DavidSuendermann-Oeft + DavidSuendermann-Oeft 121–128 A typical workflow to document clinical encounters entails dictating a summary, running speech recognition, and post-processing the resulting text into a formatted letter. Post-processing entails a host of transformations including punctuation restoration, truecasing, marking sections and headers, converting dates and numerical expressions, parsing lists, etc. In conventional implementations, most of these tasks are accomplished by individual modules. We introduce a novel holistic approach to post-processing that relies on machine callytranslation. We show how this technique outperforms an alternative conventional system—even learning to correct speech recognition errors during post-processing—while being much simpler to maintain. N18-3015 @@ -4298,7 +4298,7 @@ Selecting Machine-Translated Data for Quick Bootstrapping of a Natural Language Understanding System JudithGaspers PennyKaranasou - RajenChatterjee + RajenChatterjee 137–144 This paper investigates the use of Machine Translation (MT) to bootstrap a Natural Language Understanding (NLU) system for a new language for the use case of a large-scale voice-controlled device. The goal is to decrease the cost and time needed to get an annotated corpus for the new language, while still having a large enough coverage of user requests. Different methods of filtering MT data in order to keep utterances that improve NLU performance and language-specific post-processing methods are investigated. These methods are tested in a large-scale NLU task with translating around 10 millions training utterances from English to German. The results show a large improvement for using MT data over a grammar-based and over an in-house data collection baseline, while reducing the manual effort greatly. Both filtering and post-processing approaches improve results further. N18-3017 @@ -4345,8 +4345,8 @@ A Novel Approach to Part Name Discovery in Noisy Text - Nobal BikramNiraula - DanielWhyatt + Nobal BikramNiraula + DanielWhyatt AnneKao 170–176 As a specialized example of information extraction, part name extraction is an area that presents unique challenges. Part names are typically multi-word terms longer than two words. There is little consistency in how terms are described in noisy free text, with variations spawned by typos, ad hoc abbreviations, acronyms, and incomplete names. This makes search and analyses of parts in these data extremely challenging. In this paper, we present our algorithm, PANDA (Part Name Discovery Analytics), based on a unique method that exploits statistical, linguistic and machine learning techniques to discover part names in noisy text such as that in manufacturing quality documentation, supply chain management records, service communication logs, and maintenance reports. Experiments show that PANDA is scalable and outperforms existing techniques significantly. @@ -4425,7 +4425,7 @@ Document-based Recommender System for Job Postings using Dense Representations AhmedElsafty MartinRiedl - ChrisBiemann + ChrisBiemann 216–224 Job boards and professional social networks heavily use recommender systems in order to better support users in exploring job advertisements. Detecting the similarity between job advertisements is important for job recommendation systems as it allows, for example, the application of item-to-item based recommendations. In this work, we research the usage of dense vector representations to enhance a large-scale job recommendation system and to rank German job advertisements regarding their similarity. We follow a two-folded evaluation scheme: (1) we exploit historic user interactions to automatically create a dataset of similar jobs that enables an offline evaluation. (2) In addition, we conduct an online A/B test and evaluate the best performing method on our platform reaching more than 1 million users. We achieve the best results by combining job titles with full-text job descriptions. In particular, this method builds dense document representation using words of the titles to weigh the importance of words of the full-text description. In the online evaluation, this approach allows us to increase the click-through rate on job recommendations for active users by 8.0%. N18-3027 @@ -4437,7 +4437,7 @@ Proceedings of the 2018 Conference of the North American Chapter of the Association for Computational Linguistics: Student Research Workshop N18-4 - Silvio RicardoCordeiro + Silvio RicardoCordeiro ShereenOraby UmashanthiPavalanathan KyeongminRim @@ -4467,7 +4467,7 @@ Combining Abstractness and Language-specific Theoretical Indicators for Detecting Non-Literal Usage of <fixed-case>E</fixed-case>stonian Particle Verbs EleriAedmaa MaximilianKöper - SabineSchulte im Walde + SabineSchulte im Walde 9–16 This paper presents two novel datasets and a random-forest classifier to automatically predict literal vs. non-literal language usage for a highly frequent type of multi-word expression in a low-resource language, i.e., Estonian. We demonstrate the value of language-specific indicators induced from theoretical linguistic research, which outperform a high majority baseline when combined with language-independent features of non-literal language (such as abstractness). N18-4002 @@ -4488,7 +4488,7 @@ AliEmami AdamTrischler KaheerSuleman - Jackie Chi KitCheung + Jackie Chi KitCheung 25–31 We introduce an automatic system that performs well on two common-sense reasoning tasks, the Winograd Schema Challenge (WSC) and the Choice of Plausible Alternatives (COPA). Problem instances from these tasks require diverse, complex forms of inference and knowledge to solve. Our method uses a knowledge-hunting module to gather text from the web, which serves as evidence for candidate problem resolutions. Given an input problem, our system generates relevant queries to send to a search engine. It extracts and classifies knowledge from the returned results and weighs it to make a resolution. Our approach improves F1 performance on the WSC by 0.16 over the previous best and is competitive with the state-of-the-art on COPA, demonstrating its general applicability. N18-4004 @@ -4518,7 +4518,7 @@ Learning Word Embeddings for Data Sparse and Sentiment Rich Data Sets - PrathushaKameswara Sarma + PrathushaKameswara Sarma 46–53 This research proposal describes two algorithms that are aimed at learning word embeddings for data sparse and sentiment rich data sets. The goal is to use word embeddings adapted for domain specific data sets in downstream applications such as sentiment classification. The first approach learns word embeddings in a supervised fashion via SWESA (Supervised Word Embeddings for Sentiment Analysis), an algorithm for sentiment analysis on data sets that are of modest size. SWESA leverages document labels to jointly learn polarity-aware word embeddings and a classifier to classify unseen documents. In the second approach domain adapted (DA) word embeddings are learned by exploiting the specificity of domain specific data sets and the breadth of generic word embeddings. The new embeddings are formed by aligning corresponding word vectors using Canonical Correlation Analysis (CCA) or the related nonlinear Kernel CCA. Experimental results on binary sentiment classification tasks using both approaches for standard data sets are presented. N18-4007 @@ -4563,7 +4563,7 @@ Towards Generating Personalized Hospitalization Summaries SabitaAcharya BarbaraDi Eugenio - AndrewBoyd + AndrewBoyd RichardCameron KarenDunn Lopez PamelaMartyn-Nemeth @@ -4589,7 +4589,7 @@ <fixed-case>L</fixed-case>ist<fixed-case>O</fixed-case>ps: A Diagnostic Dataset for Latent Tree Learning NikitaNangia - SamuelBowman + SamuelBowman 92–99 Latent tree learning models learn to parse a sentence without syntactic supervision, and use that parse to build the sentence representation. Existing work on such models has shown that, while they perform well on tasks like sentence classification, they do not learn grammars that conform to any plausible semantic or syntactic formalism (Williams et al., 2018a). Studying the parsing ability of such models in natural language can be challenging due to the inherent complexities of natural language, like having several valid parses for a single sentence. In this paper we introduce ListOps, a toy dataset created to study the parsing ability of latent tree models. ListOps sequences are in the style of prefix arithmetic. The dataset is designed to have a single correct parsing strategy that a system needs to learn to succeed at the task. We show that the current leading latent tree models are unable to learn to parse and succeed at ListOps. These models achieve accuracies worse than purely sequential RNNs. N18-4013 @@ -4632,7 +4632,7 @@ Training a Ranking Function for Open-Domain Question Answering Phu MonHtut - SamuelBowman + SamuelBowman KyunghyunCho 120–127 In recent years, there have been amazing advances in deep learning methods for machine reading. In machine reading, the machine reader has to extract the answer from the given ground truth paragraph. Recently, the state-of-the-art machine reading models achieve human level performance in SQuAD which is a reading comprehension-style question answering (QA) task. The success of machine reading has inspired researchers to combine Information Retrieval with machine reading to tackle open-domain QA. However, these systems perform poorly compared to reading comprehension-style QA because it is difficult to retrieve the pieces of paragraphs that contain the answer to the question. In this study, we propose two neural network rankers that assign scores to different passages based on their likelihood of containing the answer to a given question. Additionally, we analyze the relative importance of semantic similarity and word level relevance matching in open-domain QA. @@ -4646,7 +4646,7 @@ AdityaBohra VinaySingh Syed SarfarazAkhtar - ManishShrivastava + ManishShrivastava 128–135 Emotion Prediction is a Natural Language Processing (NLP) task dealing with detection and classification of emotions in various monolingual and bilingual texts. While some work has been done on code-mixed social media text and in emotion prediction separately, our work is the first attempt which aims at identifying the emotion associated with Hindi-English code-mixed social media text. In this paper, we analyze the problem of emotion identification in code-mixed content and present a Hindi-English code-mixed corpus extracted from twitter and annotated with the associated emotion. For every tweet in the dataset, we annotate the source language of all the words present, and also the causal language of the expressed emotion. Finally, we propose a supervised classification system which uses various machine learning techniques for detecting the emotion associated with the text using a variety of character level, word level, and lexicon based features. N18-4018 @@ -4658,8 +4658,8 @@ McKennaTornblad LukeLapresi ChristopherHoman - RaymondPtucha - CeciliaOvesdotter Alm + RaymondPtucha + CeciliaOvesdotter Alm 136–143 While labor issues and quality assurance in crowdwork are increasingly studied, how annotators make sense of texts and how they are personally impacted by doing so are not. We study these questions via a narrative-sorting annotation task, where carefully selected (by sequentiality, topic, emotional content, and length) collections of tweets serve as examples of everyday storytelling. As readers process these narratives, we measure their facial expressions, galvanic skin response, and self-reported reactions. From the perspective of annotator well-being, a reassuring outcome was that the sorting task did not cause a measurable stress response, however readers reacted to humor. In terms of sensemaking, readers were more confident when sorting sequential, target-topical, and highly emotional tweets. As crowdsourcing becomes more common, this research sheds light onto the perceptive capabilities and emotional impact of human readers. N18-4019 @@ -4722,7 +4722,7 @@ An automated medical scribe for documenting clinical encounters - GregoryFinley + GregoryFinley ErikEdwards AmandaRobinson MichaelBrenndoerfer @@ -4730,7 +4730,7 @@ JamesFone NicoAxtmann MarkMiller - DavidSuendermann-Oeft + DavidSuendermann-Oeft 11–15 A medical scribe is a clinical professional who charts patient–physician encounters in real time, relieving physicians of most of their administrative burden and substantially increasing productivity and job satisfaction. We present a complete implementation of an automated medical scribe. Our system can serve either as a scalable, standardized, and economical alternative to human scribes; or as an assistive tool for them, providing a first draft of a report along with a convenient means to modify it. This solution is, to our knowledge, the first automated scribe ever presented and relies upon multiple speech and language technologies, including speaker diarization, medical speech recognition, knowledge extraction, and natural language generation. N18-5003 @@ -4739,7 +4739,7 @@ <fixed-case>CL</fixed-case> Scholar: The <fixed-case>ACL</fixed-case> <fixed-case>A</fixed-case>nthology Knowledge Graph Miner - MayankSingh + MayankSingh PradeepDogga SohanPatro DhirajBarnwal @@ -4776,8 +4776,8 @@ IsraaJaradat PepaGencheva AlbertoBarrón-Cedeño - LluísMàrquez - PreslavNakov + LluísMàrquez + PreslavNakov 26–30 We present ClaimRank, an online system for detecting check-worthy claims. While originally trained on political debates, the system can work for any kind of text, e.g., interviews or just regular news articles. Its aim is to facilitate manual fact-checking efforts by prioritizing the claims that fact-checkers should consider first. ClaimRank supports both Arabic and English, it is trained on actual annotations from nine reputable fact-checking organizations (PolitiFact, FactCheck, ABC, CNN, NPR, NYT, Chicago Tribune, The Guardian, and Washington Post), and thus it can mimic the claim selection strategies for each and any of them, as well as for the union of them all. N18-5006 @@ -4841,7 +4841,7 @@ Madly Ambiguous: A Game for Learning about Structural Ambiguity and Why It’s Hard for Computers AjdaGokcen EthanHill - MichaelWhite + MichaelWhite 51–55 Madly Ambiguous is an open source, online game aimed at teaching audiences of all ages about structural ambiguity and why it’s hard for computers. After a brief introduction to structural ambiguity, users are challenged to complete a sentence in a way that tricks the computer into guessing an incorrect interpretation. Behind the scenes are two different NLP-based methods for classifying the user’s input, one representative of classic rule-based approaches to disambiguation and the other representative of recent neural network approaches. Qualitative feedback from the system’s use in online, classroom, and science museum settings indicates that it is engaging and successful in conveying the intended take home messages. A demo of Madly Ambiguous can be played at http://madlyambiguous.osu.edu. N18-5011 @@ -4876,7 +4876,7 @@ Generating Continuous Representations of Medical Texts GrahamSpinks - Marie-FrancineMoens + Marie-FrancineMoens 66–70 We present an architecture that generates medical texts while learning an informative, continuous representation with discriminative features. During training the input to the system is a dataset of captions for medical X-Rays. The acquired continuous representations are of particular interest for use in many machine learning techniques where the discrete and high-dimensional nature of textual input is an obstacle. We use an Adversarially Regularized Autoencoder to create realistic text in both an unconditional and conditional setting. We show that this technique is applicable to medical texts which often contain syntactic and domain-specific shorthands. A quantitative evaluation shows that we achieve a lower model perplexity than a traditional LSTM generator. N18-5014 @@ -4914,7 +4914,7 @@ Yu-WenLiu Liang-ChihLiu Chuan-JuWang - Ming-FengTsai + Ming-FengTsai 81–85 This paper presents a web-based information system, RiskFinder, for facilitating the analyses of soft and hard information in financial reports. In particular, the system broadens the analyses from the word level to sentence level, which makes the system useful for practitioner communities and unprecedented among financial academics. The proposed system has four main components: 1) a Form 10-K risk-sentiment dataset, consisting of a set of risk-labeled financial sentences and pre-trained sentence embeddings; 2) metadata, including basic information on each company that published the Form 10-K financial report as well as several relevant financial measures; 3) an interface that highlights risk-related sentences in the financial reports based on the latest sentence embedding techniques; 4) a visualization of financial time-series data for a corresponding company. This paper also conducts some case studies to showcase that the system can be of great help in capturing valuable insight within large amounts of textual information. The system is now online available at https://cfda.csie.org/RiskFinder/. N18-5017 @@ -4927,7 +4927,7 @@ DavidSalter LukeDeLuccia KilhoSon - Mohamed R.Amer + Mohamed R.Amer AmirTamrakar 86–90 We demonstrate an intelligent conversational agent system designed for advancing human-machine collaborative tasks. The agent is able to interpret a user’s communicative intent from both their verbal utterances and non-verbal behaviors, such as gestures. The agent is also itself able to communicate both with natural language and gestures, through its embodiment as an avatar thus facilitating natural symmetric multi-modal interactions. We demonstrate two intelligent agents with specialized skills in the Blocks World as use-cases of our system. @@ -4956,8 +4956,8 @@ ElizabethClark AriHoltzman YejinChoi - Noah A.Smith - MariOstendorf + Noah A.Smith + MariOstendorf 96–100 We present Sounding Board, a social chatbot that won the 2017 Amazon Alexa Prize. The system architecture consists of several components including spoken language processing, dialogue management, language generation, and content management, with emphasis on user-centric and content-driven design. We also share insights gained from large-scale online logs based on 160,000 conversations with real-world users. N18-5020 @@ -4970,7 +4970,7 @@ Proceedings of the 2018 Conference of the North American Chapter of the Association for Computational Linguistics: Tutorial Abstracts N18-6 MohitBansal - RebeccaPassonneau + RebeccaPassonneau 10.18653/v1/N18-6 Association for Computational Linguistics
New Orleans, Louisiana
@@ -5017,8 +5017,8 @@
The interplay between lexical resources and Natural Language Processing - JoseCamacho-Collados - LuisEspinosa Anke + JoseCamacho-Collados + LuisEspinosa Anke Mohammad TaherPilehvar 17–23 Incorporating linguistic, world and common sense knowledge into AI/NLP systems is currently an important research area, with several open problems and challenges. At the same time, processing and storing this knowledge in lexical resources is not a straightforward task. We propose to address these complementary goals from two methodological perspectives: the use of NLP methods to help the process of constructing and enriching lexical resources and the use of lexical resources for improving NLP applications. This tutorial may be useful for two main types of audience: those working on language resources who are interested in becoming acquainted with automatic NLP techniques, with the end goal of speeding and/or easing up the process of resource curation; and on the other hand, researchers in NLP who would like to benefit from the knowledge of lexical resources to improve their systems and models. diff --git a/data/xml/N19.xml b/data/xml/N19.xml index 140a8fda19..cdbd2c02ed 100644 --- a/data/xml/N19.xml +++ b/data/xml/N19.xml @@ -31,11 +31,11 @@ The emergence of number and syntax units in <fixed-case>LSTM</fixed-case> language models YairLakretz - GermanKruszewski - TheoDesbordes + GermanKruszewski + TheoDesbordes DieuwkeHupkes StanislasDehaene - MarcoBaroni + MarcoBaroni 11–20 Recent work has shown that LSTMs trained on a generic language modeling objective capture syntax-sensitive generalizations such as long-distance number agreement. We have however no mechanistic understanding of how they accomplish this remarkable feat. Some have conjectured it depends on heuristics that do not truly take hierarchical structure into account. We present here a detailed study of the inner mechanics of number tracking in LSTMs at the single neuron level. We discover that long-distance number information is largely managed by two “number units”. Importantly, the behaviour of these units is partially controlled by other units independently shown to track syntactic structure. We conclude that LSTMs are, to some extent, implementing genuinely syntactic processing mechanisms, paving the way to a more general understanding of grammatical encoding in LSTMs. N19-1002 @@ -61,7 +61,7 @@ TakashiMorita PengQian MiguelBallesteros - RogerLevy + RogerLevy 32–42 We investigate the extent to which the behavior of neural network language models reflects incremental representations of syntactic state. To do so, we employ experimental methodologies which were originally developed in the field of psycholinguistics to study syntactic representation in the human mind. We examine neural network model behavior on sets of artificial sentences containing a variety of syntactically complex structures. These sentences not only test whether the networks have a representation of syntactic state, they also reveal the specific lexical cues that networks use to update these states. We test four models: two publicly available LSTM sequence models of English (Jozefowicz et al., 2016; Gulordava et al., 2018) trained on large datasets; an RNN Grammar (Dyer et al., 2016) trained on a small, parsed dataset; and an LSTM trained on the same small corpus as the RNNG. We find evidence for basic syntactic state representations in all models, but only the models trained on large datasets are sensitive to subtle lexical cues signaling changes in syntactic state. N19-1004 @@ -72,7 +72,7 @@ Understanding language-elicited <fixed-case>EEG</fixed-case> data by predicting it from a fine-tuned language model DanSchwartz - TomMitchell + TomMitchell 43–57 Electroencephalography (EEG) recordings of brain activity taken while participants read or listen to language are widely used within the cognitive neuroscience and psycholinguistics communities as a tool to study language comprehension. Several time-locked stereotyped EEG responses to word-presentations – known collectively as event-related potentials (ERPs) – are thought to be markers for semantic or syntactic processes that take place during comprehension. However, the characterization of each individual ERP in terms of what features of a stream of language trigger the response remains controversial. Improving this characterization would make ERPs a more useful tool for studying language comprehension. We take a step towards better understanding the ERPs by finetuning a language model to predict them. This new approach to analysis shows for the first time that all of the ERPs are predictable from embeddings of a stream of language. Prior work has only found two of the ERPs to be predictable. In addition to this analysis, we examine which ERPs benefit from sharing parameters during joint training. We find that two pairs of ERPs previously identified in the literature as being related to each other benefit from joint training, while several other pairs of ERPs that benefit from joint training are suggestive of potential relationships. Extensions of this analysis that further examine what kinds of information in the model embeddings relate to each ERP have the potential to elucidate the processes involved in human language comprehension. N19-1005 @@ -86,7 +86,7 @@ HermanKamper KarenLivescu AdamLopez - SharonGoldwater + SharonGoldwater 58–68 We present a simple approach to improve direct speech-to-text translation (ST) when the source language is low-resource: we pre-train the model on a high-resource automatic speech recognition (ASR) task, and then fine-tune its parameters for ST. We demonstrate that our approach is effective by pre-training on 300 hours of English ASR data to improve Spanish English ST from 10.8 to 20.2 BLEU when only 20 hours of Spanish-English ST training data are available. Through an ablation study, we find that the pre-trained encoder (acoustic model) accounts for most of the improvement, despite the fact that the shared language in these tasks is the target language text, not the source language audio. Applying this insight, we show that pre-training on ASR helps ST even when the ASR language differs from both source and target ST languages: pre-training on French ASR also improves Spanish-English ST. Finally, we show that the approach improves performance on a true low-resource task: pre-training on a combination of English ASR and French ASR improves Mboshi-French ST, where only 4 hours of data are available, from 3.5 to 7.1 BLEU. N19-1006 @@ -109,7 +109,7 @@ Giving Attention to the Unexpected: Using Prosody Innovations in Disfluency Detection VickyZayats - MariOstendorf + MariOstendorf 86–95 Disfluencies in spontaneous speech are known to be associated with prosodic disruptions. However, most algorithms for disfluency detection use only word transcripts. Integrating prosodic cues has proved difficult because of the many sources of variability affecting the acoustic correlates. This paper introduces a new approach to extracting acoustic-prosodic cues using text-based distributional prediction of acoustic cues to derive vector z-score features (innovations). We explore both early and late fusion techniques for integrating text and prosody, showing gains over a high-accuracy text-only model. N19-1008 @@ -169,7 +169,7 @@ <fixed-case>A</fixed-case>nswer-based <fixed-case>A</fixed-case>dversarial <fixed-case>T</fixed-case>raining for <fixed-case>G</fixed-case>enerating <fixed-case>C</fixed-case>larification <fixed-case>Q</fixed-case>uestions SudhaRao - HalDaumé III + HalDaumé III 143–155 We present an approach for generating clarification questions with the goal of eliciting new information that would make the given textual context more complete. We propose that modeling hypothetical answers (to clarification questions) as latent variables can guide our approach into generating more useful clarification questions. We develop a Generative Adversarial Network (GAN) where the generator is a sequence-to-sequence model and the discriminator is a utility function that models the value of updating the context with the answer to the clarification question. We evaluate on two datasets, using both automatic metrics and human judgments of usefulness, specificity and relevance, showing that our approach outperforms both a retrieval-based model and ablations that exclude the utility model and the adversarial training. N19-1013 @@ -232,7 +232,7 @@ Discontinuous Constituency Parsing with a Stack-Free Transition System and a Dynamic Oracle MaximinCoavoux - Shay B.Cohen + Shay B.Cohen 204–217 We introduce a novel transition system for discontinuous constituency parsing. Instead of storing subtrees in a stack –i.e. a data structure with linear-time sequential access– the proposed system uses a set of parsing items, with constant-time random access. This change makes it possible to construct any discontinuous constituency tree in exactly 4n–2 transitions for a sentence of length n. At each parsing step, the parser considers every item in the set to be combined with a focus item and to construct a new constituent in a bottom-up fashion. The parsing strategy is based on the assumption that most syntactic structures can be parsed incrementally and that the set –the memory of the parser– remains reasonably small on average. Moreover, we introduce a provably correct dynamic oracle for the new transition system, and present the first experiments in discontinuous constituency parsing using a dynamic oracle. Our parser obtains state-of-the-art results on three English and German discontinuous treebanks. N19-1018 @@ -252,7 +252,7 @@ <fixed-case>CCG</fixed-case> Parsing Algorithm with Incremental Tree Rotation MilošStanojević - MarkSteedman + MarkSteedman 228–239 The main obstacle to incremental sentence processing arises from right-branching constituent structures, which are present in the majority of English sentences, as well as optional constituents that adjoin on the right, such as right adjuncts and right conjuncts. In CCG, many right-branching derivations can be replaced by semantically equivalent left-branching incremental derivations. The problem of right-adjunction is more resistant to solution, and has been tackled in the past using revealing-based approaches that often rely either on the higher-order unification over lambda terms (Pareschi and Steedman,1987) or heuristics over dependency representations that do not cover the whole CCGbank (Ambati et al., 2015). We propose a new incremental parsing algorithm for CCG following the same revealing tradition of work but having a purely syntactic approach that does not depend on access to a distinct level of semantic representation. This algorithm can cover the whole CCGbank, with greater incrementality and accuracy than previous proposals. N19-1020 @@ -286,7 +286,7 @@ Evaluating Composition Models for Verb Phrase Elliptical Sentence Embeddings GijsWijnholds - MehrnooshSadrzadeh + MehrnooshSadrzadeh 261–271 Ellipsis is a natural language phenomenon where part of a sentence is missing and its information must be recovered from its surrounding context, as in “Cats chase dogs and so do foxes.”. Formal semantics has different methods for resolving ellipsis and recovering the missing information, but the problem has not been considered for distributional semantics, where words have vector embeddings and combinations thereof provide embeddings for sentences. In elliptical sentences these combinations go beyond linear as copying of elided information is necessary. In this paper, we develop different models for embedding VP-elliptical sentences. We extend existing verb disambiguation and sentence similarity datasets to ones containing elliptical phrases and evaluate our models on these datasets for a variety of non-linear combinations and their linear counterparts. We compare results of these compositional models to state of the art holistic sentence encoders. Our results show that non-linear addition and a non-linear tensor-based composition outperform the naive non-compositional baselines and the linear models, and that sentence encoders perform well on sentence similarity, but not on verb disambiguation. N19-1023 @@ -297,8 +297,8 @@ Neural Finite-State Transducers: Beyond Rational Relations Chu-ChengLin HaoZhu - Matthew R.Gormley - JasonEisner + Matthew R.Gormley + JasonEisner 272–283 We introduce neural finite state transducers (NFSTs), a family of string transduction models defining joint and conditional probability distributions over pairs of strings. The probability of a string pair is obtained by marginalizing over all its accepting paths in a finite state transducer. In contrast to ordinary weighted FSTs, however, each path is scored using an arbitrary function such as a recurrent neural network, which breaks the usual conditional independence assumption (Markov property). NFSTs are more powerful than previous finite-state models with neural features (Rastogi et al., 2016.) We present training and inference algorithms for locally and globally normalized variants of NFSTs. In experiments on different transduction tasks, they compete favorably against seq2seq models while offering interpretable paths that correspond to hard monotonic alignments. N19-1024 @@ -405,7 +405,7 @@ Vector of Locally-Aggregated Word Embeddings (<fixed-case>VLAWE</fixed-case>): A Novel Document-level Representation Radu TudorIonescu - AndreiButnaru + AndreiButnaru 363–369 In this paper, we propose a novel representation for text documents based on aggregating word embedding vectors into document embeddings. Our approach is inspired by the Vector of Locally-Aggregated Descriptors used for image representation, and it works as follows. First, the word embeddings gathered from a collection of documents are clustered by k-means in order to learn a codebook of semnatically-related word embeddings. Each word embedding is then associated to its nearest cluster centroid (codeword). The Vector of Locally-Aggregated Word Embeddings (VLAWE) representation of a document is then computed by accumulating the differences between each codeword vector and each word vector (from the document) associated to the respective codeword. We plug the VLAWE representation, which is learned in an unsupervised manner, into a classifier and show that it is useful for a diverse set of text classification tasks. We compare our approach with a broad range of recent state-of-the-art methods, demonstrating the effectiveness of our approach. Furthermore, we obtain a considerable improvement on the Movie Review data set, reporting an accuracy of 93.3%, which represents an absolute gain of 10% over the state-of-the-art approach. N19-1033 @@ -419,7 +419,7 @@ DeepanwayGhosal SoujanyaPoria AsifEkbal - PushpakBhattacharyya + PushpakBhattacharyya 370–379 Related tasks often have inter-dependence on each other and perform better when solved in a joint framework. In this paper, we present a deep multi-task learning framework that jointly performs sentiment and emotion analysis both. The multi-modal inputs (i.e. text, acoustic and visual frames) of a video convey diverse and distinctive information, and usually do not have equal contribution in the decision making. We propose a context-level inter-modal attention framework for simultaneously predicting the sentiment and expressed emotions of an utterance. We evaluate our proposed approach on CMU-MOSEI dataset for multi-modal sentiment and emotion analysis. Evaluation results suggest that multi-task learning framework offers improvement over the single-task framework. The proposed approach reports new state-of-the-art performance for both sentiment analysis and emotion analysis. N19-1034 @@ -475,7 +475,7 @@ Simplified Neural Unsupervised Domain Adaptation - TimothyMiller + TimothyMiller 414–419 Unsupervised domain adaptation (UDA) is the task of training a statistical model on labeled data from a source domain to achieve better performance on data from a target domain, with access to only unlabeled data in the target domain. Existing state-of-the-art UDA approaches use neural networks to learn representations that are trained to predict the values of subset of important features called “pivot features” on combined data from the source and target domains. In this work, we show that it is possible to improve on existing neural domain adaptation algorithms by 1) jointly training the representation learner with the task learner; and 2) removing the need for heuristically-selected “pivot features.” Our results show competitive performance with a simpler model. N19-1039 @@ -508,7 +508,7 @@ Lost in Machine Translation: A Method to Reduce Meaning Loss ReubenCohn-Gordon - NoahGoodman + NoahGoodman 437–441 A desideratum of high-quality translation systems is that they preserve meaning, in the sense that two sentences with different meanings should not translate to one and the same sentence in another language. However, state-of-the-art systems often fail in this regard, particularly in cases where the source and target languages partition the “meaning space” in different ways. For instance, “I cut my finger.” and “I cut my finger off.” describe different states of the world but are translated to French (by both Fairseq and Google Translate) as “Je me suis coupé le doigt.”, which is ambiguous as to whether the finger is detached. More generally, translation systems are typically many-to-one (non-injective) functions from source to target language, which in many cases results in important distinctions in meaning being lost in translation. Building on Bayesian models of informative utterance production, we present a method to define a less ambiguous translation system in terms of an underlying pre-trained neural sequence-to-sequence model. This method increases injectivity, resulting in greater preservation of meaning as measured by improvement in cycle-consistency, without impeding translation quality (measured by BLEU score). N19-1042 @@ -557,7 +557,7 @@ LemaoLiu XintongLi ConghuiZhu - TiejunZhao + TiejunZhao ShumingShi 466–477 Multilayer architectures are currently the gold standard for large-scale neural machine translation. Existing works have explored some methods for understanding the hidden representations, however, they have not sought to improve the translation quality rationally according to their understanding. Towards understanding for performance improvement, we first artificially construct a sequence of nested relative tasks and measure the feature generalization ability of the learned hidden representation over these tasks. Based on our understanding, we then propose to regularize the layer-wise representations with all tree-induced tasks. To overcome the computational bottleneck resulting from the large number of regularization terms, we design efficient approximation methods by selecting a few coarse-to-fine tasks for regularization. Extensive experiments on two widely-used datasets demonstrate the proposed methods only lead to small extra overheads in training but no additional overheads in testing, and achieve consistent improvements (up to +1.3 BLEU) compared to the state-of-the-art translation model. @@ -582,7 +582,7 @@ Attentive Mimicking: Better Word Embeddings by Attending to Informative Contexts TimoSchick - HinrichSchütze + HinrichSchütze 489–494 Learning high-quality embeddings for rare words is a hard problem because of sparse context information. Mimicking (Pinter et al., 2017) has been proposed as a solution: given embeddings learned by a standard algorithm, a model is first trained to reproduce embeddings of frequent words from their surface form and then used to compute embeddings for rare words. In this paper, we introduce attentive mimicking: the mimicking model is given access not only to a word’s surface form, but also to all available contexts and learns to attend to the most informative and reliable contexts for computing an embedding. In an evaluation on four tasks, we show that attentive mimicking outperforms previous work for both rare and medium-frequency words. Thus, compared to previous work, attentive mimicking improves embeddings for a much larger part of the vocabulary, including the medium-frequency range. N19-1048 @@ -605,7 +605,7 @@ Big <fixed-case>B</fixed-case>i<fixed-case>RD</fixed-case>: A Large, Fine-Grained, Bigram Relatedness Dataset for Examining Semantic Composition ShimaAsaadi - SaifMohammad + SaifMohammad SvetlanaKiritchenko 505–516 Bigrams (two-word sequences) hold a special place in semantic composition research since they are the smallest unit formed by composing words. A semantic relatedness dataset that includes bigrams will thus be useful in the development of automatic methods of semantic composition. However, existing relatedness datasets only include pairs of unigrams (single words). Further, existing datasets were created using rating scales and thus suffer from limitations such as in consistent annotations and scale region bias. In this paper, we describe how we created a large, fine-grained, bigram relatedness dataset (BiRD), using a comparative annotation technique called Best–Worst Scaling. Each of BiRD’s 3,345 English term pairs involves at least one bigram. We show that the relatedness scores obtained are highly reliable (split-half reliability r= 0.937). We analyze the data to obtain insights into bigram semantic relatedness. Finally, we present benchmark experiments on using the relatedness dataset as a testbed to evaluate simple unsupervised measures of semantic composition. BiRD is made freely available to foster further research on how meaning can be represented and how meaning can be composed. @@ -658,7 +658,7 @@ <fixed-case>IMHO</fixed-case> Fine-Tuning Improves Claim Detection TuhinChakrabarty ChristopherHidey - KathyMcKeown + KathyMcKeown 558–563 Claims are the central component of an argument. Detecting claims across different domains or data sets can often be challenging due to their varying conceptualization. We propose to alleviate this problem by fine-tuning a language model using a Reddit corpus of 5.5 million opinionated claims. These claims are self-labeled by their authors using the internet acronyms IMO/IMHO (in my (humble) opinion). Empirical results show that using this approach improves the state of art performance across four benchmark argumentation data sets by an average of 4 absolute F1 points in claim detection. As these data sets include diverse domains such as social media and student essays this improvement demonstrates the robustness of fine-tuning on this novel corpus. N19-1054 @@ -694,7 +694,7 @@ Improving Dialogue State Tracking by Discerning the Relevant Context SanujSharma - Prafulla KumarChoubey + Prafulla KumarChoubey RuihongHuang 576–581 A typical conversation comprises of multiple turns between participants where they go back and forth between different topics. At each user turn, dialogue state tracking (DST) aims to estimate user’s goal by processing the current utterance. However, in many turns, users implicitly refer to the previous goal, necessitating the use of relevant dialogue history. Nonetheless, distinguishing relevant history is challenging and a popular method of using dialogue recency for that is inefficient. We, therefore, propose a novel framework for DST that identifies relevant historical context by referring to the past utterances where a particular slot-value changes and uses that together with weighted system utterance to identify the relevant context. Specifically, we use the current user utterance and the most recent system utterance to determine the relevance of a system utterance. Empirical analyses show that our method improves joint goal accuracy by 2.75% and 2.36% on WoZ 2.0 and Multi-WoZ restaurant domain datasets respectively over the previous state-of-the-art GLAD model. @@ -754,7 +754,7 @@ <fixed-case>B</fixed-case>lack is to Criminal as <fixed-case>C</fixed-case>aucasian is to Police: Detecting and Removing Multiclass Bias in Word Embeddings ThomasManzini LimYao Chong - Alan WBlack + Alan WBlack YuliaTsvetkov 615–621 Online texts - across genres, registers, domains, and styles - are riddled with human stereotypes, expressed in overt or subtle ways. Word embeddings, trained on these texts, perpetuate and amplify these stereotypes, and propagate biases to machine learning models that use word embeddings as features. In this work, we propose a method to debias word embeddings in multiclass settings such as race and religion, extending the work of (Bolukbasi et al., 2016) from the binary setting, such as binary gender. Next, we propose a novel methodology for the evaluation of multiclass debiasing. We demonstrate that our multiclass debiasing is robust and maintains the efficacy in standard NLP tasks. @@ -769,7 +769,7 @@ ChandlerMay AlexWang ShikhaBordia - Samuel R.Bowman + Samuel R.Bowman RachelRudinger 622–628 The Word Embedding Association Test shows that GloVe and word2vec word embeddings exhibit human-like implicit biases based on gender, race, and other social constructs (Caliskan et al., 2017). Meanwhile, research on learning reusable text representations has begun to explore sentence-level texts, with some sentence encoders seeing enthusiastic adoption. Accordingly, we extend the Word Embedding Association Test to measure bias in sentence encoders. We then test several sentence encoders, including state-of-the-art methods such as ELMo and BERT, for the social biases studied in prior work and two important biases that are difficult or impossible to test at the word level. We observe mixed results including suspicious patterns of sensitivity that suggest the test’s assumptions may not hold in general. We conclude by proposing directions for future work on measuring bias in sentence encoders. @@ -799,7 +799,7 @@ <fixed-case>C</fixed-case>ombining <fixed-case>S</fixed-case>entiment <fixed-case>L</fixed-case>exica with a <fixed-case>M</fixed-case>ulti-<fixed-case>V</fixed-case>iew <fixed-case>V</fixed-case>ariational <fixed-case>A</fixed-case>utoencoder Alexander MiserlisHoyle LawrenceWolf-Sonkin - HannaWallach + HannaWallach RyanCotterell IsabelleAugenstein 635–640 @@ -863,7 +863,7 @@ Keyphrase Generation: A Text Summarization Struggle ErionÇano - OndřejBojar + OndřejBojar 666–672 Authors’ keyphrases assigned to scientific articles are essential for recognizing content and topic aspects. Most of the proposed supervised and unsupervised methods for keyphrase generation are unable to produce terms that are valuable but do not appear in the text. In this paper, we explore the possibility of considering the keyphrase string as an abstractive summary of the title and the abstract. First, we collect, process and release a large dataset of scientific paper metadata that contains 2.2 million records. Then we experiment with popular text summarization neural architectures. Despite using advanced deep learning models, large quantities of data and many days of computation, our systematic evaluation on four test datasets reveals that the explored text summarization methods could not produce better keyphrases than the simpler unsupervised methods, or the existing supervised ones. N19-1070 @@ -931,8 +931,8 @@ JungoKasai DanFriedman RobertFrank - DragomirRadev - OwenRambow + DragomirRadev + OwenRambow 701–709 We introduce a new syntax-aware model for dependency-based semantic role labeling that outperforms syntax-agnostic models for English and Spanish. We use a BiLSTM to tag the text with supertags extracted from dependency parses, and we feed these supertags, along with words and parts of speech, into a deep highway BiLSTM for semantic role labeling. Our model combines the strengths of earlier models that performed SRL on the basis of a full dependency parse with more recent models that use no syntactic information at all. Our local and non-ensemble model achieves state-of-the-art performance on the CoNLL 09 English and Spanish datasets. SRL models benefit from syntactic information, and we show that supertagging is a simple, powerful, and robust way to incorporate syntax into a neural SRL system. N19-1075 @@ -1139,7 +1139,7 @@ HiteshGolchha MauajamaFirdaus AsifEkbal - PushpakBhattacharyya + PushpakBhattacharyya 851–860 In this paper, we propose an effective deep learning framework for inducing courteous behavior in customer care responses. The interaction between a customer and the customer care representative contributes substantially to the overall customer experience. Thus it is imperative for customer care agents and chatbots engaging with humans to be personal, cordial and emphatic to ensure customer satisfaction and retention. Our system aims at automatically transforming neutral customer care responses into courteous replies. Along with stylistic transfer (of courtesy), our system ensures that responses are coherent with the conversation history, and generates courteous expressions consistent with the emotional state of the customer. Our technique is based on a reinforced pointer-generator model for the sequence to sequence task. The model is also conditioned on a hierarchically encoded and emotionally aware conversational context. We use real interactions on Twitter between customer care professionals and aggrieved customers to create a large conversational dataset having both forms of agent responses: ‘generic’ and ‘courteous’. We perform quantitative and qualitative analyses on established and task-specific metrics, both automatic and human evaluation based. Our evaluation shows that the proposed models can generate emotionally-appropriate courteous expressions while preserving the content. Experimental results also prove that our proposed approach performs better than the baseline models. N19-1091 @@ -1392,11 +1392,11 @@ Linguistic Knowledge and Transferability of Contextual Representations - Nelson F.Liu + Nelson F.Liu MattGardner YonatanBelinkov - Matthew E.Peters - Noah A.Smith + Matthew E.Peters + Noah A.Smith 1073–1094 Contextual word representations derived from large-scale neural language models are successful across a diverse set of NLP tasks, suggesting that they encode useful and transferable features of language. To shed light on the linguistic knowledge they capture, we study the representations produced by several recent pretrained contextualizers (variants of ELMo, the OpenAI transformer language model, and BERT) with a suite of sixteen diverse probing tasks. We find that linear models trained on top of frozen contextual representations are competitive with state-of-the-art task-specific models in many cases, but fail on tasks requiring fine-grained linguistic knowledge (e.g., conjunct identification). To investigate the transferability of contextual word representations, we quantify differences in the transferability of individual layers within contextualizers, especially between recurrent neural networks (RNNs) and transformers. For instance, higher layers of RNNs are more task-specific, while transformer layers do not exhibit the same monotonic trend. In addition, to better understand what makes contextual word representations transferable, we compare language model pretraining with eleven supervised pretraining tasks. For any given task, pretraining on a closely related task yields better performance than language model pretraining (which is better on average) when the pretraining dataset is fixed. However, language model pretraining on more data gives the best results. N19-1112 @@ -1417,10 +1417,10 @@ Unsupervised Recurrent Neural Network Grammars YoonKim - AlexanderRush + AlexanderRush LeiYu AdhigunaKuncoro - ChrisDyer + ChrisDyer GáborMelis 1105–1117 Recurrent neural network grammars (RNNG) are generative models of language which jointly model syntax and surface structure by incrementally generating a syntax tree and sentence in a top-down, left-to-right order. Supervised RNNGs achieve strong language modeling and parsing performance, but require an annotated corpus of parse trees. In this work, we experiment with unsupervised learning of RNNGs. Since directly marginalizing over the space of latent trees is intractable, we instead apply amortized variational inference. To maximize the evidence lower bound, we develop an inference network parameterized as a neural CRF constituency parser. On language modeling, unsupervised RNNGs perform as well their supervised counterparts on benchmarks in English and Chinese. On constituency grammar induction, they are competitive with recent neural language models that induce tree structures from words through attention mechanisms. @@ -1432,7 +1432,7 @@ Cooperative Learning of Disjoint Syntax and Semantics SerhiiHavrylov - GermánKruszewski + GermánKruszewski ArmandJoulin 1118–1128 There has been considerable attention devoted to models that learn to jointly infer an expression’s syntactic structure and its semantics. Yet, Nangia and Bowman (2018) has recently shown that the current best systems fail to learn the correct parsing strategy on mathematical expressions generated from a simple context-free grammar. In this work, we present a recursive model inspired by Choi et al. (2018) that reaches near perfect accuracy on this task. Our model is composed of two separated modules for syntax and semantics. They are cooperatively trained with standard continuous and discrete optimisation schemes. Our model does not require any linguistic structure for supervision, and its recursive nature allows for out-of-domain generalisation. Additionally, our approach performs competitively on several natural language tasks, such as Natural Language Inference and Sentiment Analysis. @@ -1486,7 +1486,7 @@ OtiliaStretcu GrahamNeubig BarnabasPoczos - TomMitchell + TomMitchell 1162–1172 Current state-of-the-art NMT systems use large neural networks that are not only slow to train, but also often require many heuristics and optimization tricks, such as specialized learning rate schedules and large batch sizes. This is undesirable as it requires extensive hyperparameter tuning. In this paper, we propose a curriculum learning framework for NMT that reduces training time, reduces the need for specialized heuristics or large batch sizes, and results in overall better performance. Our framework consists of a principled way of deciding which training samples are shown to the model at different times during training, based on the estimated difficulty of a sample and the current competence of the model. Filtering training samples in this manner prevents the model from getting stuck in bad local optima, making it converge faster and reach a better solution than the common approach of uniformly sampling training examples. Furthermore, the proposed method can be easily applied to existing NMT models by simply modifying their input data pipelines. We show that our framework can help improve the training time and the performance of both recurrent neural network models and Transformers, achieving up to a 70% decrease in training time, while at the same time obtaining accuracy improvements of up to 2.2 BLEU. N19-1119 @@ -1509,7 +1509,7 @@ Consistency by Agreement in Zero-Shot Neural Machine Translation MaruanAl-Shedivat - AnkurParikh + AnkurParikh 1184–1197 Generalization and reliability of multilingual translation often highly depend on the amount of available parallel data for each language pair of interest. In this paper, we focus on zero-shot generalization—a challenging setup that tests models on translation directions they have not been optimized for at training time. To solve the problem, we (i) reformulate multilingual translation as probabilistic inference, (ii) define the notion of zero-shot consistency and show why standard training often results in models unsuitable for zero-shot tasks, and (iii) introduce a consistent agreement-based training method that encourages the model to produce equivalent translations of parallel sentences in auxiliary languages. We test our multilingual NMT models on multiple public zero-shot translation benchmarks (IWSLT17, UN corpus, Europarl) and show that agreement-based learning often results in 2-3 BLEU zero-shot improvement over strong baselines without any loss in performance on supervised translation directions. N19-1121 @@ -1561,12 +1561,12 @@ Jointly Optimizing Diversity and Relevance in Neural Response Generation XiangGao - SungjinLee + SungjinLee YizheZhang ChrisBrockett MichelGalley JianfengGao - BillDolan + BillDolan 1229–1238 Although recent neural conversation models have shown great potential, they often generate bland and generic responses. While various approaches have been explored to diversify the output of the conversation model, the improvement often comes at the cost of decreased relevance. In this paper, we propose a SpaceFusion model to jointly optimize diversity and relevance that essentially fuses the latent space of a sequence-to-sequence model and that of an autoencoder model by leveraging novel regularization terms. As a result, our approach induces a latent space in which the distance and direction from the predicted response vector roughly match the relevance and diversity, respectively. This property also lends itself well to an intuitive visualization of the latent space. Both automatic and human evaluation results demonstrate that the proposed approach brings significant improvement compared to strong baselines in both diversity and relevance. N19-1125 @@ -1601,7 +1601,7 @@ <fixed-case>W</fixed-case>i<fixed-case>C</fixed-case>: the Word-in-Context Dataset for Evaluating Context-Sensitive Meaning Representations Mohammad TaherPilehvar - JoseCamacho-Collados + JoseCamacho-Collados 1267–1273 By design, word embeddings are unable to model the dynamic nature of words’ semantics, i.e., the property of words to correspond to potentially different meanings. To address this limitation, dozens of specialized meaning representation techniques such as sense or contextualized embeddings have been proposed. However, despite the popularity of research on this topic, very few evaluation benchmarks exist that specifically focus on the dynamic semantics of words. In this paper we show that existing models have surpassed the performance ceiling of the standard evaluation dataset for the purpose, i.e., Stanford Contextual Word Similarity, and highlight its shortcomings. To address the lack of a suitable benchmark, we put forward a large-scale Word in Context dataset, called WiC, based on annotations curated by experts, for generic evaluation of context-sensitive representations. WiC is released in https://pilehvar.github.io/wic/. N19-1128 @@ -1674,9 +1674,9 @@ Adaptation of Hierarchical Structured Models for Speech Act Recognition in Asynchronous Conversation - TasnimMohiuddin + TasnimMohiuddin Thanh-TungNguyen - ShafiqJoty + ShafiqJoty 1326–1336 We address the problem of speech act recognition (SAR) in asynchronous conversations (forums, emails). Unlike synchronous conversations (e.g., meetings, phone), asynchronous domains lack large labeled datasets to train an effective SAR model. In this paper, we propose methods to effectively leverage abundant unlabeled conversational data and the available labeled data from synchronous domains. We carry out our research in three main steps. First, we introduce a neural architecture based on hierarchical LSTMs and conditional random fields (CRF) for SAR, and show that our method outperforms existing methods when trained on in-domain data only. Second, we improve our initial SAR models by semi-supervised learning in the form of pretrained word embeddings learned from a large unlabeled conversational corpus. Finally, we employ adversarial training to improve the results further by leveraging the labeled data from synchronous domains and by explicitly modeling the distributional shift in two domains. N19-1134 @@ -1710,7 +1710,7 @@ Multi-Channel Convolutional Neural Network for <fixed-case>T</fixed-case>witter Emotion and Sentiment Recognition JumayelIslam - Robert E.Mercer + Robert E.Mercer LuXiao 1355–1365 The advent of micro-blogging sites has paved the way for researchers to collect and analyze huge volumes of data in recent years. Twitter, being one of the leading social networking sites worldwide, provides a great opportunity to its users for expressing their states of mind via short messages which are called tweets. The urgency of identifying emotions and sentiments conveyed through tweets has led to several research works. It provides a great way to understand human psychology and impose a challenge to researchers to analyze their content easily. In this paper, we propose a novel use of a multi-channel convolutional neural architecture which can effectively use different emotion and sentiment indicators such as hashtags, emoticons and emojis that are present in the tweets and improve the performance of emotion and sentiment identification. We also investigate the incorporation of different lexical features in the neural network model and its effect on the emotion and sentiment identification task. We analyze our model on some standard datasets and compare its effectiveness with existing techniques. @@ -1775,7 +1775,7 @@ MareikeHartmann TallulahJansen IsabelleAugenstein - AndersSøgaard + AndersSøgaard 1401–1407 In online discussion fora, speakers often make arguments for or against something, say birth control, by highlighting certain aspects of the topic. In social science, this is referred to as issue framing. In this paper, we introduce a new issue frame annotated corpus of online discussions. We explore to what extent models trained to detect issue frames in newswire and social media can be transferred to the domain of discussion fora, using a combination of multi-task and adversarial training, assuming only unlabeled training data in the target domain. N19-1142 @@ -1795,8 +1795,8 @@ Predicting the Type and Target of Offensive Posts in Social Media MarcosZampieri - ShervinMalmasi - PreslavNakov + ShervinMalmasi + PreslavNakov SaraRosenthal NouraFarra RiteshKumar @@ -1831,7 +1831,7 @@ Relation Extraction using Explicit Context Conditioning - GauravSingh + GauravSingh ParminderBhatia 1442–1447 Relation extraction (RE) aims to label relations between groups of marked entities in raw text. Most current RE models learn context-aware representations of the target entities that are then used to establish relation between them. This works well for intra-sentence RE, and we call them first-order relations. However, this methodology can sometimes fail to capture complex and long dependencies. To address this, we hypothesize that at times the target entities can be connected via a context token. We refer to such indirect relations as second-order relations, and describe an efficient implementation for computing them. These second-order relation scores are then combined with first-order relation scores to obtain final relation scores. Our empirical results show that the proposed method leads to state-of-the-art performance over two biomedical datasets. @@ -1843,7 +1843,7 @@ Conversation Model Fine-Tuning for Classifying Client Utterances in Counseling Dialogues SungjoonPark DonghyunKim - AliceOh + AliceOh 1448–1459 The recent surge of text-based online counseling applications enables us to collect and analyze interactions between counselors and clients. A dataset of those interactions can be used to learn to automatically classify the client utterances into categories that help counselors in diagnosing client status and predicting counseling outcome. With proper anonymization, we collect counselor-client dialogues, define meaningful categories of client utterances with professional counselors, and develop a novel neural network model for classifying the client utterances. The central idea of our model, ConvMFiT, is a pre-trained conversation model which consists of a general language model built from an out-of-domain corpus and two role-specific language models built from unlabeled in-domain dialogues. The classification result shows that ConvMFiT outperforms state-of-the-art comparison models. Further, the attention weights in the learned model confirm that the model finds expected linguistic patterns for each category. N19-1148 @@ -1852,10 +1852,10 @@ Using Similarity Measures to Select Pretraining Data for <fixed-case>NER</fixed-case> - XiangDai + XiangDai SarvnazKarimi BenHachey - CecileParis + CecileParis 1460–1470 Word vectors and Language Models (LMs) pretrained on a large amount of unlabelled data can dramatically improve various Natural Language Processing (NLP) tasks. However, the measure and impact of similarity between pretraining data and target task data are left to intuition. We propose three cost-effective measures to quantify different aspects of similarity between source pretraining and target task data. We demonstrate that these measures are good predictors of the usefulness of pretrained models for Named Entity Recognition (NER) over 30 data pairs. Results also suggest that pretrained LMs are more effective and more predictable than pretrained word vectors, but pretrained word vectors are better when pretraining data is dissimilar. N19-1149 @@ -1867,7 +1867,7 @@ YinfeiYang OshinAgarwal ChrisTar - Byron C.Wallace + Byron C.Wallace AniNenkova 1471–1480 Modern NLP systems require high-quality annotated data. For specialized domains, expert annotations may be prohibitively expensive; the alternative is to rely on crowdsourcing to reduce costs at the risk of introducing noise. In this paper we demonstrate that directly modeling instance difficulty can be used to improve model performance and to route instances to appropriate annotators. Our difficulty prediction model combines two learned representations: a ‘universal’ encoder trained on out of domain data, and a task-specific encoder. Experiments on a complex biomedical information extraction task using expert and lay annotators show that: (i) simply excluding from the training data instances predicted to be difficult yields a small boost in performance; (ii) using difficulty scores to weight instances during training provides further, consistent gains; (iii) assigning instances predicted to be difficult to domain experts is an effective strategy for task routing. Further, our experiments confirm the expectation that for such domain-specific tasks expert annotations are of much higher quality and preferable to obtain if practical and that augmenting small amounts of expert data with a larger set of lay annotations leads to further improvements in model performance. @@ -1880,7 +1880,7 @@ Mario EzraAragón Adrian PastorLópez-Monroy Luis CarlosGonzález-Gurrola - ManuelMontes-y-Gómez + ManuelMontes-y-Gómez 1481–1486 Nowadays social media platforms are the most popular way for people to share information, from work issues to personal matters. For example, people with health disorders tend to share their concerns for advice, support or simply to relieve suffering. This provides a great opportunity to proactively detect these users and refer them as soon as possible to professional help. We propose a new representation called Bag of Sub-Emotions (BoSE), which represents social media documents by a set of fine-grained emotions automatically generated using a lexical resource of emotions and subword embeddings. The proposed representation is evaluated in the task of depression detection. The results are encouraging; the usage of fine-grained emotions improved the results from a representation based on the core emotions and obtained competitive results in comparison to state of the art approaches. N19-1151 @@ -1890,8 +1890,8 @@ A Silver Standard Corpus of Human Phenotype-Gene Relations DianaSousa - AndreLamurias - Francisco M.Couto + AndreLamurias + Francisco M.Couto 1487–1492 Human phenotype-gene relations are fundamental to fully understand the origin of some phenotypic abnormalities and their associated diseases. Biomedical literature is the most comprehensive source of these relations, however, we need Relation Extraction tools to automatically recognize them. Most of these tools require an annotated corpus and to the best of our knowledge, there is no corpus available annotated with human phenotype-gene relations. This paper presents the Phenotype-Gene Relations (PGR) corpus, a silver standard corpus of human phenotype and gene annotations and their relations. The corpus consists of 1712 abstracts, 5676 human phenotype annotations, 13835 gene annotations, and 4283 relations. We generated this corpus using Named-Entity Recognition tools, whose results were partially evaluated by eight curators, obtaining a precision of 87.01%. By using the corpus we were able to obtain promising results with two state-of-the-art deep learning tools, namely 78.05% of precision. The PGR corpus was made publicly available to the research community. N19-1152 @@ -1901,7 +1901,7 @@ Improving Lemmatization of Non-Standard Languages with Joint Learning - EnriqueManjavacas + EnriqueManjavacas ÁkosKádár MikeKestemont 1493–1503 @@ -1916,7 +1916,7 @@ FahimDalvi HassanSajjad YonatanBelinkov - PreslavNakov + PreslavNakov 1504–1516 Recent work has shown that contextualized word representations derived from neural machine translation are a viable alternative to such from simple word predictions tasks. This is because the internal understanding that needs to be built in order to be able to translate from one language to another is much more comprehensive. Unfortunately, computational and memory limitations as of present prevent NMT models from using large word vocabularies, and thus alternatives such as subword units (BPE and morphological segmentations) and characters have been used. Here we study the impact of using different kinds of units on the quality of the resulting representations when used to model morphology, syntax, and semantics. We found that while representations derived from subwords are slightly better for modeling syntax, character-based representations are superior for modeling morphology and are also more robust to noisy input. N19-1154 @@ -1949,8 +1949,8 @@ Quantifying the morphosyntactic content of Brown Clusters - Manuel R.Ciosici - LeonDerczynski + Manuel R.Ciosici + LeonDerczynski IraAssent 1541–1550 Brown and Exchange word clusters have long been successfully used as word representations in Natural Language Processing (NLP) systems. Their success has been attributed to their seeming ability to represent both semantic and syntactic information. Using corpora representing several language families, we test the hypothesis that Brown and Exchange word clusters are highly effective at encoding morphosyntactic information. Our experiments show that word clusters are highly capable at distinguishing Parts of Speech. We show that increases in Average Mutual Information, the clustering algorithms’ optimization goal, are highly correlated with improvements in encoding of morphosyntactic information. Our results provide empirical evidence that downstream NLP systems addressing tasks dependent on morphosyntactic information can benefit from word cluster features. @@ -1962,7 +1962,7 @@ Analyzing <fixed-case>B</fixed-case>ayesian Crosslingual Transfer in Topic Models ShudongHao - Michael J.Paul + Michael J.Paul 1551–1565 We introduce a theoretical analysis of crosslingual transfer in probabilistic topic models. By formulating posterior inference through Gibbs sampling as a process of language transfer, we propose a new measure that quantifies the loss of knowledge across languages during this process. This measure enables us to derive a PAC-Bayesian bound that elucidates the factors affecting model quality, both during training and in downstream applications. We provide experimental validation of the analysis on a diverse set of five languages, and discuss best practices for data collection and model design based on our analysis. N19-1158 @@ -2053,7 +2053,7 @@ Text Processing Like Humans Do: Visually Attacking and Shielding <fixed-case>NLP</fixed-case> Systems SteffenEger - Gözde GülŞahin + Gözde GülŞahin AndreasRücklé Ji-UngLee ClaudiaSchulz @@ -2106,7 +2106,7 @@ Unifying Human and Statistical Evaluation for Natural Language Generation - Tatsunori B.Hashimoto + Tatsunori B.Hashimoto HughZhang PercyLiang 1689–1701 @@ -2132,7 +2132,7 @@ <fixed-case>A</fixed-case>n Empirical Investigation of Global and Local Normalization for Recurrent Neural Sequence Models Using a Continuous Relaxation to Beam Search KartikGoyal - ChrisDyer + ChrisDyer TaylorBerg-Kirkpatrick 1724–1733 Globally normalized neural sequence models are considered superior to their locally normalized equivalents because they may ameliorate the effects of label bias. However, when considering high-capacity neural parametrizations that condition on the whole input sequence, both model classes are theoretically equivalent in terms of the distributions they are capable of representing. Thus, the practical advantage of global normalization in the context of modern neural methods remains unclear. In this paper, we attempt to shed light on this problem through an empirical study. We extend an approach for search-aware training via a continuous relaxation of beam search (Goyal et al., 2017b) in order to enable training of globally normalized recurrent sequence models through simple backpropagation. We then use this technique to conduct an empirical study of the interaction between global normalization, high-capacity encoders, and search-aware optimization. We observe that in the context of inexact search, globally normalized neural models are still more effective than their locally normalized counterparts. Further, since our training approach is sensitive to warm-starting with pre-trained models, we also propose a novel initialization strategy based on self-normalization for pre-training globally normalized models. We perform analysis of our approach on two tasks: CCG supertagging and Machine Translation, and demonstrate the importance of global normalization under different conditions while using search-aware training. @@ -2165,7 +2165,7 @@ Fixed That for You: Generating Contrastive Claims with Semantic Edits ChristopherHidey - KathyMcKeown + KathyMcKeown 1756–1767 Understanding contrastive opinions is a key component of argument generation. Central to an argument is the claim, a statement that is in dispute. Generating a counter-argument then requires generating a response in contrast to the main claim of the original argument. To generate contrastive claims, we create a corpus of Reddit comment pairs self-labeled by posters using the acronym FTFY (fixed that for you). We then train neural models on these pairs to edit the original claim and produce a new claim with a different view. We demonstrate significant improvement over a sequence-to-sequence baseline in BLEU score and a human evaluation for fluency, coherence, and contrast. N19-1174 @@ -2176,7 +2176,7 @@ Box of Lies: Multimodal Deception Detection in Dialogues FelixSoldner VerónicaPérez-Rosas - RadaMihalcea + RadaMihalcea 1768–1777 Deception often takes place during everyday conversations, yet conversational dialogues remain largely unexplored by current work on automatic deception detection. In this paper, we address the task of detecting multimodal deceptive cues during conversational dialogues. We introduce a multimodal dataset containing deceptive conversations between participants playing the Box of Lies game from The Tonight Show Starring Jimmy Fallon, in which they try to guess whether an object description provided by their opponent is deceptive or not. We conduct annotations of multimodal communication behaviors, including facial and linguistic behaviors, and derive several learning features based on these annotations. Initial classification experiments show promising results, performing well above both a random and a human baseline, and reaching up to 69% accuracy in distinguishing deceptive and truthful behaviors. N19-1175 @@ -2185,12 +2185,12 @@ A Crowdsourced Corpus of Multiple Judgments and Disagreement on Anaphoric Interpretation - MassimoPoesio + MassimoPoesio JonChamberlain SilviuPaun JuntaoYu AlexandraUma - UdoKruschwitz + UdoKruschwitz 1778–1789 We present a corpus of anaphoric information (coreference) crowdsourced through a game-with-a-purpose. The corpus, containing annotations for about 108,000 markables, is one of the largest corpora for coreference for English, and one of the largest crowdsourced NLP corpora, but its main feature is the large number of judgments per markable: 20 on average, and over 2.2M in total. This characteristic makes the corpus a unique resource for the study of disagreements on anaphoric interpretation. A second distinctive feature is its rich annotation scheme, covering singletons, expletives, and split-antecedent plurals. Finally, the corpus also comes with labels inferred using a recently proposed probabilistic model of annotation for coreference. The labels are of high quality and make it possible to successfully train a state of the art coreference resolver, including training on singletons and non-referring expressions. The annotation model can also result in more than one label, or no label, being proposed for a markable, thus serving as a baseline method for automatically identifying ambiguous markables. A preliminary analysis of the results is presented. N19-1176 @@ -2222,7 +2222,7 @@ Modeling Document-level Causal Structures for Event Causal Relation Identification LeiGao - Prafulla KumarChoubey + Prafulla KumarChoubey RuihongHuang 1808–1817 We aim to comprehensively identify all the event causal relations in a document, both within a sentence and across sentences, which is important for reconstructing pivotal event structures. The challenges we identified are two: 1) event causal relations are sparse among all possible event pairs in a document, in addition, 2) few causal relations are explicitly stated. Both challenges are especially true for identifying causal relations between events across sentences. To address these challenges, we model rich aspects of document-level causal structures for achieving comprehensive causal relation identification. The causal structures include heavy involvements of document-level main events in causal relations as well as several types of fine-grained constraints that capture implications from certain sentential syntactic relations and discourse relations as well as interactions between event causal relations and event coreference relations. Our experimental results show that modeling the global and fine-grained aspects of causal structures using Integer Linear Programming (ILP) greatly improves the performance of causal relation identification, especially in identifying cross-sentence causal relations. @@ -2296,9 +2296,9 @@ Word Embedding-Based Automatic <fixed-case>MT</fixed-case> Evaluation Metric using Word Position Information - HiroshiEchizen’ya - KenjiAraki - EduardHovy + HiroshiEchizen’ya + KenjiAraki + EduardHovy 1874–1883 We propose a new automatic evaluation metric for machine translation. Our proposed metric is obtained by adjusting the Earth Mover’s Distance (EMD) to the evaluation task. The EMD measure is used to obtain the distance between two probability distributions consisting of some signatures having a feature and a weight. We use word embeddings, sentence-level tf-idf, and cosine similarity between two word embeddings, respectively, as the features, weight, and the distance between two features. Results show that our proposed metric can evaluate machine translation based on word meaning. Moreover, for distance, cosine similarity and word position information are used to address word-order differences. We designate this metric as Word Embedding-Based automatic MT evaluation using Word Position Information (WE_WPI). A meta-evaluation using WMT16 metrics shared task set indicates that our WE_WPI achieves the highest correlation with human judgment among several representative metrics. N19-1186 @@ -2321,7 +2321,7 @@ GeertHeyman BregtVerreet IvanVulić - Marie-FrancineMoens + Marie-FrancineMoens 1890–1902 Recent research has discovered that a shared bilingual word embedding space can be induced by projecting monolingual word embedding spaces from two languages using a self-learning paradigm without any bilingual supervision. However, it has also been shown that for distant language pairs such fully unsupervised self-learning methods are unstable and often get stuck in poor local optima due to reduced isomorphism between starting monolingual spaces. In this work, we propose a new robust framework for learning unsupervised multilingual word embeddings that mitigates the instability issues. We learn a shared multilingual embedding space for a variable number of languages by incrementally adding new languages one by one to the current multilingual space. Through the gradual language addition the method can leverage the interdependencies between the new language and all other languages in the current multilingual space. We find that it is beneficial to project more distant languages later in the iterative process. Our fully unsupervised multilingual embedding spaces yield results that are on par with the state-of-the-art methods in the bilingual lexicon induction (BLI) task, and simultaneously obtain state-of-the-art scores on two downstream tasks: multilingual document classification and multilingual dependency parsing, outperforming even supervised baselines. This finding also accentuates the need to establish evaluation protocols for cross-lingual word embeddings beyond the omnipresent intrinsic BLI task in future work. N19-1188 @@ -2369,8 +2369,8 @@ Hao-RanWei ShujianHuang RanWang - Xin-yuDai - JiajunChen + Xin-yuDai + JiajunChen 1932–1941 Current predominant neural machine translation (NMT) models often have a deep structure with large amounts of parameters, making these models hard to train and easily suffering from over-fitting. A common practice is to utilize a validation set to evaluate the training process and select the best checkpoint. Average and ensemble techniques on checkpoints can lead to further performance improvement. However, as these methods do not affect the training process, the system performance is restricted to the checkpoints generated in original training procedure. In contrast, we propose an online knowledge distillation method. Our method on-the-fly generates a teacher model from checkpoints, guiding the training process to obtain better performance. Experiments on several datasets and language pairs show steady improvement over a strong self-attention-based baseline system. We also provide analysis on data-limited setting against over-fitting. Furthermore, our method leads to an improvement in a machine reading experiment as well. N19-1192 @@ -2437,7 +2437,7 @@ SohamGhosh AnuvaAgarwal ZaranaParekh - AlexanderHauptmann + AlexanderHauptmann 1984–1990 The task of retrieving clips within videos based on a given natural language query requires cross-modal reasoning over multiple frames. Prior approaches such as sliding window classifiers are inefficient, while text-clip similarity driven ranking-based approaches such as segment proposal networks are far more complicated. In order to select the most relevant video clip corresponding to the given text description, we propose a novel extractive approach that predicts the start and end frames by leveraging cross-modal interactions between the text and video - this removes the need to retrieve and re-rank multiple proposal segments. Using recurrent networks we encode the two modalities into a joint representation which is then used in different variants of start-end frame predictor networks. Through extensive experimentation and ablative analysis, we demonstrate that our simple and elegant approach significantly outperforms state of the art on two datasets and has comparable performance on a third. N19-1198 @@ -2471,8 +2471,8 @@ Subword-Level Language Identification for Intra-Word Code-Switching ManuelMager - ÖzlemÇetinoğlu - KatharinaKann + ÖzlemÇetinoğlu + KatharinaKann 2005–2011 Language identification for code-switching (CS), the phenomenon of alternating between two or more languages in conversations, has traditionally been approached under the assumption of a single language per token. However, if at least one language is morphologically rich, a large number of words can be composed of morphemes from more than one language (intra-word CS). In this paper, we extend the language identification task to the subword-level, such that it includes splitting mixed words while tagging each part with a language ID. We further propose a model for this task, which is based on a segmental recurrent neural network. In experiments on a new Spanish–Wixarika dataset and on an adapted German–Turkish dataset, our proposed model performs slightly better than or roughly on par with our best baseline, respectively. Considering only mixed words, however, it strongly outperforms all baselines. N19-1201 @@ -2482,10 +2482,10 @@ <fixed-case>M</fixed-case>u<fixed-case>ST</fixed-case>-<fixed-case>C</fixed-case>: a <fixed-case>M</fixed-case>ultilingual <fixed-case>S</fixed-case>peech <fixed-case>T</fixed-case>ranslation <fixed-case>C</fixed-case>orpus - Mattia A.Di Gangi + Mattia A.Di Gangi RoldanoCattoni LuisaBentivogli - MatteoNegri + MatteoNegri MarcoTurchi 2012–2017 Current research on spoken language translation (SLT) has to confront with the scarcity of sizeable and publicly available training corpora. This problem hinders the adoption of neural end-to-end approaches, which represent the state of the art in the two parent tasks of SLT: automatic speech recognition and machine translation. To fill this gap, we created MuST-C, a multilingual speech translation corpus whose size and quality will facilitate the training of end-to-end systems for SLT from English into 8 languages. For each target language, MuST-C comprises at least 385 hours of audio recordings from English TED Talks, which are automatically aligned at the sentence level with their manual transcriptions and translations. Together with a description of the corpus creation methodology (scalable to add new data and cover new languages), we provide an empirical verification of its quality and SLT results computed with a state-of-the-art approach on each language direction. @@ -2499,9 +2499,9 @@ Contextualization of Morphological Inflection EkaterinaVylomova RyanCotterell - TrevorCohn - TimothyBaldwin - JasonEisner + TrevorCohn + TimothyBaldwin + JasonEisner 2018–2024 Critical to natural language generation is the production of correctly inflected text. In this paper, we isolate the task of predicting a fully inflected sentence from its partially lemmatized version. Unlike traditional morphological inflection or surface realization, our task input does not provide “gold” tags that specify what morphological features to realize on each lemmatized word; rather, such features must be inferred from sentential context. We develop a neural hybrid graphical model that explicitly reconstructs morphological features before predicting the inflected forms, and compare this to a system that directly predicts the inflected forms without relying on any morphological annotation. We experiment on several typologically diverse languages from the Universal Dependencies treebanks, showing the utility of incorporating linguistically-motivated latent variables into NLP models. N19-1203 @@ -2513,7 +2513,7 @@ A Robust Abstractive System for Cross-Lingual Summarization JessicaOuyang BoyaSong - KathyMcKeown + KathyMcKeown 2025–2031 We present a robust neural abstractive summarization system for cross-lingual summarization. We construct summarization corpora for documents automatically translated from three low-resource languages, Somali, Swahili, and Tagalog, using machine translation and the New York Times summarization corpus. We train three language-specific abstractive summarizers and evaluate on documents originally written in the source languages, as well as on a fourth, unseen language: Arabic. Our systems achieve significantly higher fluency than a standard copy-attention summarizer on automatically translated input documents, as well as comparable content selection. N19-1204 @@ -2525,8 +2525,8 @@ ChunpengMa AkihiroTamura MasaoUtiyama - EiichiroSumita - TiejunZhao + EiichiroSumita + TiejunZhao 2032–2037 The explicit use of syntactic information has been proved useful for neural machine translation (NMT). However, previous methods resort to either tree-structured neural networks or long linearized sequences, both of which are inefficient. Neural syntactic distance (NSD) enables us to represent a constituent tree using a sequence whose length is identical to the number of words in the sentence. NSD has been used for constituent parsing, but not in machine translation. We propose five strategies to improve NMT with NSD. Experiments show that it is not trivial to improve NMT with NSD; however, the proposed strategies are shown to improve translation performance of the baseline model (+2.1 (En–Ja), +1.3 (Ja–En), +1.2 (En–Ch), and +1.0 (Ch–En) BLEU). N19-1205 @@ -2590,8 +2590,8 @@ Short-Term Meaning Shift: A Distributional Exploration MarcoDel Tredici - RaquelFernández - GemmaBoleda + RaquelFernández + GemmaBoleda 2069–2075 We present the first exploration of meaning shift over short periods of time in online communities using distributional representations. We create a small annotated dataset and use it to assess the performance of a standard model for meaning shift detection on short-term meaning shift. We find that the model has problems distinguishing meaning shift from referential phenomena, and propose a measure of contextual variability to remedy this. N19-1210 @@ -2663,8 +2663,8 @@ RamyBaly GeorgiKaradzhov AbdelrhmanSaleh - JamesGlass - PreslavNakov + JamesGlass + PreslavNakov 2109–2116 In the context of fake news, bias, and propaganda, we study two important but relatively under-explored problems: (i) trustworthiness estimation (on a 3-point scale) and (ii) political ideology detection (left/right bias on a 7-point scale) of entire news outlets, as opposed to evaluating individual articles. In particular, we propose a multi-task ordinal regression framework that models the two problems jointly. This is motivated by the observation that hyper-partisanship is often linked to low trustworthiness, e.g., appealing to emotions rather than sticking to the facts, while center media tend to be generally more impartial and trustworthy. We further use several auxiliary tasks, modeling centrality, hyper-partisanship, as well as left-vs.-right bias on a coarse-grained scale. The evaluation results show sizable performance gains by the joint models over models that target the problems in isolation. N19-1216 @@ -2757,7 +2757,7 @@ A Crowdsourced Frame Disambiguation Corpus with Ambiguity AncaDumitrache LoraAroyo - ChrisWelty + ChrisWelty 2164–2170 We present a resource for the task of FrameNet semantic frame disambiguation of over 5,000 word-sentence pairs from the Wikipedia corpus. The annotations were collected using a novel crowdsourcing approach with multiple workers per sentence to capture inter-annotator disagreement. In contrast to the typical approach of attributing the best single frame to each word, we provide a list of frames with disagreement-based scores that express the confidence with which each frame applies to the word. This is based on the idea that inter-annotator disagreement is at least partly caused by ambiguity that is inherent to the text and frames. We have found many examples where the semantics of individual frames overlap sufficiently to make them acceptable alternatives for interpreting a sentence. We have argued that ignoring this ambiguity creates an overly arbitrary target for training and evaluating natural language processing systems - if humans cannot agree, why would we expect the correct answer from a machine to be any different? To process this data we also utilized an expanded lemma-set provided by the Framester system, which merges FN with WordNet to enhance coverage. Our dataset includes annotations of 1,000 sentence-word pairs whose lemmas are not part of FN. Finally we present metrics for evaluating frame disambiguation systems that account for ambiguity. N19-1224 @@ -2769,9 +2769,9 @@ Inoculation by Fine-Tuning: A Method for Analyzing Challenge Datasets - Nelson F.Liu + Nelson F.Liu RoySchwartz - Noah A.Smith + Noah A.Smith 2171–2179 Several datasets have recently been constructed to expose brittleness in models trained on existing benchmarks. While model performance on these challenge datasets is significantly lower compared to the original benchmark, it is unclear what particular weaknesses they reveal. For example, a challenge dataset may be difficult because it targets phenomena that current models cannot capture, or because it simply exploits blind spots in a model’s specific training set. We introduce inoculation by fine-tuning, a new analysis method for studying challenge datasets by exposing models (the metaphorical patient) to a small amount of data from the challenge dataset (a metaphorical pathogen) and assessing how well they can adapt. We apply our method to analyze the NLI “stress tests” (Naik et al., 2018) and the Adversarial SQuAD dataset (Jia and Liang, 2017). We show that after slight exposure, some of these datasets are no longer challenging, while others remain difficult. Our results indicate that failures on challenge datasets may lead to very different conclusions about models, training datasets, and the challenge datasets themselves. N19-1225 @@ -2851,7 +2851,7 @@ YukunFeng BrianJoseph BéatriceJoyeux-Prunel - Marie-Catherinede Marneffe + Marie-Catherinede Marneffe 2223–2234 Scholars in inter-disciplinary fields like the Digital Humanities are increasingly interested in semantic annotation of specialized corpora. Yet, under-resourced languages, imperfect or noisily structured data, and user-specific classification tasks make it difficult to meet their needs using off-the-shelf models. Manual annotation of large corpora from scratch, meanwhile, can be prohibitively expensive. Thus, we propose an active learning solution for named entity recognition, attempting to maximize a custom model’s improvement per additional unit of manual annotation. Our system robustly handles any domain or user-defined label set and requires no external resources, enabling quality named entity recognition for Humanities corpora where such resources are not available. Evaluating on typologically disparate languages and datasets, we reduce required annotation by 20-60% and greatly outperform a competitive active learning baseline. N19-1231 @@ -2896,8 +2896,8 @@ Neural Text Generation from Rich Semantic Representations ValerieHajdik JanBuys - Michael WayneGoodman - Emily M.Bender + Michael WayneGoodman + Emily M.Bender 2259–2266 We propose neural models to generate high-quality text from structured representations based on Minimal Recursion Semantics (MRS). MRS is a rich semantic representation that encodes more precise semantic detail than other representations such as Abstract Meaning Representation (AMR). We show that a sequence-to-sequence model that maps a linearization of Dependency MRS, a graph-based representation of MRS, to text can achieve a BLEU score of 66.11 when trained on gold data. The performance of the model can be improved further using a high-precision, broad coverage grammar-based parser to generate a large silver training corpus, achieving a final BLEU score of 77.17 on the full test set, and 83.37 on the subset of test data most closely matching the silver data domain. Our results suggest that MRS-based representations are a good choice for applications that need both structured semantics and the ability to produce natural language text as output. N19-1235 @@ -2930,7 +2930,7 @@ <fixed-case>T</fixed-case>ext <fixed-case>G</fixed-case>eneration from <fixed-case>K</fixed-case>nowledge <fixed-case>G</fixed-case>raphs with <fixed-case>G</fixed-case>raph <fixed-case>T</fixed-case>ransformers - RikKoncel-Kedziorski + RikKoncel-Kedziorski DhanushBekal YiLuan MirellaLapata @@ -2944,7 +2944,7 @@ Open Information Extraction from Question-Answer Pairs NikitaBhutani - YoshihikoSuhara + YoshihikoSuhara Wang-ChiewTan AlonHalevy H. V.Jagadish @@ -2979,7 +2979,7 @@ HuXu BingLiu LeiShu - PhilipYu + PhilipYu 2324–2335 Question-answering plays an important role in e-commerce as it allows potential customers to actively seek crucial information about products or services to help their purchase decision making. Inspired by the recent success of machine reading comprehension (MRC) on formal documents, this paper explores the potential of turning customer reviews into a large source of knowledge that can be exploited to answer user questions. We call this problem Review Reading Comprehension (RRC). To the best of our knowledge, no existing work has been done on RRC. In this work, we first build an RRC dataset called ReviewRC based on a popular benchmark for aspect-based sentiment analysis. Since ReviewRC has limited training examples for RRC (and also for aspect-based sentiment analysis), we then explore a novel post-training approach on the popular language model BERT to enhance the performance of fine-tuning of BERT for RRC. To show the generality of the approach, the proposed post-training is also applied to some other review-based tasks such as aspect extraction and aspect sentiment classification in aspect-based sentiment analysis. Experimental results demonstrate that the proposed post-training is highly effective. N19-1242 @@ -3004,12 +3004,12 @@ Be Consistent! Improving Procedural Text Comprehension using Label Consistency XinyaDu - BhavanaDalvi + BhavanaDalvi NiketTandon AntoineBosselut - Wen-tauYih + Wen-tauYih PeterClark - ClaireCardie + ClaireCardie 2347–2356 Our goal is procedural text comprehension, namely tracking how the properties of entities (e.g., their location) change with time given a procedural text (e.g., a paragraph about photosynthesis, a recipe). This task is challenging as the world is changing throughout the text, and despite recent advances, current systems still struggle with this task. Our approach is to leverage the fact that, for many procedural texts, multiple independent descriptions are readily available, and that predictions from them should be consistent (label consistency). We present a new learning framework that leverages label consistency during training, allowing consistency bias to be built into the model. Evaluation on a standard benchmark dataset for procedural text, ProPara (Dalvi et al., 2018), shows that our approach significantly improves prediction performance (F1) over prior state-of-the-art systems. N19-1244 @@ -3021,7 +3021,7 @@ AidaAmini SaadiaGabriel ShanchuanLin - RikKoncel-Kedziorski + RikKoncel-Kedziorski YejinChoi HannanehHajishirzi 2357–2367 @@ -3047,7 +3047,7 @@ An Encoding Strategy Based Word-Character <fixed-case>LSTM</fixed-case> for <fixed-case>C</fixed-case>hinese <fixed-case>NER</fixed-case> - WeiLiu + WeiLiu TonggeXu QinghuaXu JiayuSong @@ -3076,7 +3076,7 @@ <fixed-case>SC</fixed-case>-<fixed-case>LSTM</fixed-case>: Learning Task-Specific Representations in Multi-Task Learning for Sequence Labeling PengLu TingBai - PhilippeLanglais + PhilippeLanglais 2396–2406 Multi-task learning (MTL) has been studied recently for sequence labeling. Typically, auxiliary tasks are selected specifically in order to improve the performance of a target task. Jointly learning multiple tasks in a way that benefit all of them simultaneously can increase the utility of MTL. In order to do so, we propose a new LSTM cell which contains both shared parameters that can learn from all tasks, and task-specific parameters that can learn task-specific information. We name it a Shared-Cell Long-Short Term Memory SC-LSTM. Experimental results on three sequence labeling benchmarks (named-entity recognition, text chunking, and part-of-speech tagging) demonstrate the effectiveness of our SC-LSTM cell. N19-1249 @@ -3099,7 +3099,7 @@ OphélieLacroix MarekRei HelenYannakoudakis - AndersSøgaard + AndersSøgaard 2418–2427 While rule-based detection of subject-verb agreement (SVA) errors is sensitive to syntactic parsing errors and irregularities and exceptions to the main rules, neural sequential labelers have a tendency to overfit their training data. We observe that rule-based error generation is less sensitive to syntactic parsing errors and irregularities than error detection and explore a simple, yet efficient approach to getting the best of both worlds: We train neural sequential labelers on the combination of large volumes of silver standard data, obtained through rule-based error generation, and gold standard data. We show that our simple protocol leads to more robust detection of SVA errors on both in-domain and out-of-domain data, as well as in the context of other errors and long-distance dependencies; and across four standard benchmarks, the induced model on average achieves a new state of the art. N19-1251 @@ -3120,10 +3120,10 @@ On Difficulties of Cross-Lingual Transfer with Order Differences: A Case Study on Dependency Parsing - WasiAhmad + WasiAhmad ZhisongZhang XuezheMa - EduardHovy + EduardHovy Kai-WeiChang NanyunPeng 2440–2452 @@ -3150,7 +3150,7 @@ Hong-YouChen Chin-HuaHu LeilaWehbe - Shou-DeLin + Shou-DeLin 2465–2474 Unsupervised document representation learning is an important task providing pre-trained features for NLP applications. Unlike most previous work which learn the embedding based on self-prediction of the surface of text, we explicitly exploit the inter-document information and directly model the relations of documents in embedding space with a discriminative network and a novel objective. Extensive experiments on both small and large public datasets show the competitiveness of the proposed method. In evaluations on standard document classification, our model has errors that are 5 to 13% lower than state-of-the-art unsupervised embedding models. The reduction in error is even more pronounced in scarce label setting. N19-1255 @@ -3173,7 +3173,7 @@ <fixed-case>Z</fixed-case>ero-Shot Cross-Lingual Opinion Target Extraction SoufianJebbara - PhilippCimiano + PhilippCimiano 2486–2495 Aspect-based sentiment analysis involves the recognition of so called opinion target expressions (OTEs). To automatically extract OTEs, supervised learning algorithms are usually employed which are trained on manually annotated corpora. The creation of these corpora is labor-intensive and sufficiently large datasets are therefore usually only available for a very narrow selection of languages and domains. In this work, we address the lack of available annotated data for specific languages by proposing a zero-shot cross-lingual approach for the extraction of opinion target expressions. We leverage multilingual word embeddings that share a common vector space across various languages and incorporate these into a convolutional neural network architecture for OTE extraction. Our experiments with 5 languages give promising results: We can successfully train a model on annotated data of a source language and perform accurate prediction on a target language without ever using any annotated samples in that target language. Depending on the source and target language pairs, we reach performances in a zero-shot regime of up to 77% of a model trained on target language data. Furthermore, we can increase this performance up to 87% of a baseline model trained on target language data by performing cross-lingual learning from multiple source languages. N19-1257 @@ -3199,9 +3199,9 @@ Target-oriented Opinion Words Extraction with Target-fused Neural Sequence Labeling ZhifangFan ZhenWu - Xin-YuDai + Xin-YuDai ShujianHuang - JiajunChen + JiajunChen 2509–2518 Opinion target extraction and opinion words extraction are two fundamental subtasks in Aspect Based Sentiment Analysis (ABSA). Recently, many methods have made progress on these two tasks. However, few works aim at extracting opinion targets and opinion words as pairs. In this paper, we propose a novel sequence labeling subtask for ABSA named TOWE (Target-oriented Opinion Words Extraction), which aims at extracting the corresponding opinion words for a given opinion target. A target-fused sequence labeling neural network model is designed to perform this task. The opinion target information is well encoded into context by an Inward-Outward LSTM. Then left and right contexts of the opinion target and the global context are combined to find the corresponding opinion words. We build four datasets for TOWE based on several popular ABSA benchmarks from laptop and restaurant reviews. The experimental results show that our proposed model outperforms the other compared methods significantly. We believe that our work may not only be helpful for downstream sentiment analysis task, but can also be used for pair-wise opinion summarization. N19-1259 @@ -3224,7 +3224,7 @@ Automatic learner summary assessment for reading comprehension MenglinXia EkaterinaKochmar - TedBriscoe + TedBriscoe 2532–2542 Automating the assessment of learner summary provides a useful tool for assessing learner reading comprehension. We present a summarization task for evaluating non-native reading comprehension and propose three novel approaches to automatically assess the learner summaries. We evaluate our models on two datasets we created and show that our models outperform traditional approaches that rely on exact word match on this task. Our best model produces quality assessments close to professional examiners. N19-1261 @@ -3244,7 +3244,7 @@ Text Generation with Exemplar-based Adaptive Decoding HaoPeng - AnkurParikh + AnkurParikh ManaalFaruqui BhuwanDhingra DipanjanDas @@ -3272,9 +3272,9 @@ AashishVenkatesh TimBaumgärtner EliaBruni - BarbaraPlank - RaffaellaBernardi - RaquelFernández + BarbaraPlank + RaffaellaBernardi + RaquelFernández 2578–2587 We propose a grounded dialogue state encoder which addresses a foundational issue on how to integrate visual grounding with dialogue system components. As a test-bed, we focus on the GuessWhat?! game, a two-player game where the goal is to identify an object in a complex visual scene by asking a sequence of yes/no questions. Our visually-grounded encoder leverages synergies between guessing and asking questions, as it is trained jointly using multi-task learning. We further enrich our model via a cooperative learning regime. We show that the introduction of both the joint architecture and cooperative learning lead to accuracy improvements over the baseline system. We compare our approach to an alternative system which extends the baseline with reinforcement learning. Our in-depth analysis shows that the linguistic skills of the two models differ dramatically, despite approaching comparable performance levels. This points at the importance of analyzing the linguistic output of competing systems beyond numeric comparison solely based on task success. N19-1265 @@ -3334,7 +3334,7 @@ KaiSun DianYu DongYu - ClaireCardie + ClaireCardie 2633–2643 Reading strategies have been shown to improve comprehension levels, especially for readers lacking adequate prior knowledge. Just as the process of knowledge accumulation is time-consuming for human readers, it is resource-demanding to impart rich general domain knowledge into a deep language model via pre-training. Inspired by reading strategies identified in cognitive science, and given limited computational resources - just a pre-trained model and a fixed number of training instances - we propose three general strategies aimed to improve non-extractive machine reading comprehension (MRC): (i) BACK AND FORTH READING that considers both the original and reverse order of an input sequence, (ii) HIGHLIGHTING, which adds a trainable embedding to the text embedding of tokens that are relevant to the question and candidate answers, and (iii) SELF-ASSESSMENT that generates practice questions and candidate answers directly from the text in an unsupervised manner. By fine-tuning a pre-trained language model (Radford et al., 2018) with our proposed strategies on the largest general domain multiple-choice MRC dataset RACE, we obtain a 5.8% absolute increase in accuracy over the previous best result achieved by the same pre-trained model fine-tuned on RACE without the use of strategies. We further fine-tune the resulting model on a target MRC task, leading to an absolute improvement of 6.2% in average accuracy over previous state-of-the-art approaches on six representative non-extractive MRC datasets from different domains (i.e., ARC, OpenBookQA, MCTest, SemEval-2018 Task 11, ROCStories, and MultiRC). These results demonstrate the effectiveness of our proposed strategies and the versatility and general applicability of our fine-tuned models that incorporate these strategies. Core code is available at https://github.com/nlpdata/strategy/. N19-1270 @@ -3372,8 +3372,8 @@ PradeepDasigi MattGardner ShikharMurty - LukeZettlemoyer - EduardHovy + LukeZettlemoyer + EduardHovy 2669–2680 Training semantic parsers from question-answer pairs typically involves searching over an exponentially large space of logical forms, and an unguided search can easily be misled by spurious logical forms that coincidentally evaluate to the correct answer. We propose a novel iterative training algorithm that alternates between searching for consistent logical forms and maximizing the marginal likelihood of the retrieved ones. This training scheme lets us iteratively train models that provide guidance to subsequent ones to search for logical forms of increasing complexity, thus dealing with the problem of spuriousness. We evaluate these techniques on two hard datasets: WikiTableQuestions (WTQ) and Cornell Natural Language Visual Reasoning (NLVR), and show that our training algorithm outperforms the previous best systems, on WTQ in a comparable setting, and on NLVR with significantly less supervision. N19-1273 @@ -3398,7 +3398,7 @@ ShivaTaslimipoor SamanehKouchaki Le AnHa - RuslanMitkov + RuslanMitkov 2692–2698 We introduce a new method to tag Multiword Expressions (MWEs) using a linguistically interpretable language-independent deep learning architecture. We specifically target discontinuity, an under-explored aspect that poses a significant challenge to computational treatment of MWEs. Two neural architectures are explored: Graph Convolutional Network (GCN) and multi-head self-attention. GCN leverages dependency parse information, and self-attention attends to long-range relations. We finally propose a combined model that integrates complementary information from both, through a gating mechanism. The experiments on a standard multilingual dataset for verbal MWEs show that our model outperforms the baselines not only in the case of discontinuous MWEs but also in overall F-score. N19-1275 @@ -3409,7 +3409,7 @@ Incorporating Word Attention into Character-Based Word Segmentation ShoheiHigashiyama MasaoUtiyama - EiichiroSumita + EiichiroSumita MasaoIdeuchi YoshiakiOida YoheiSakamoto @@ -3424,7 +3424,7 @@ <fixed-case>VCWE</fixed-case>: Visual Character-Enhanced Word Embeddings ChiSun XipengQiu - XuanjingHuang + XuanjingHuang 2710–2719 Chinese is a logographic writing system, and the shape of Chinese characters contain rich syntactic and semantic information. In this paper, we propose a model to learn Chinese word embeddings via three-level composition: (1) a convolutional neural network to extract the intra-character compositionality from the visual shape of a character; (2) a recurrent neural network with self-attention to compose character representation into word embeddings; (3) the Skip-Gram framework to capture non-compositionality directly from the contextual information. Evaluations demonstrate the superior performance of our model on four tasks: word similarity, sentiment analysis, named entity recognition and part-of-speech tagging. N19-1277 @@ -3446,7 +3446,7 @@ Improving Cross-Domain <fixed-case>C</fixed-case>hinese Word Segmentation with Word Embeddings YuxiaoYe - WeikangLi + WeikangLi YueZhang LikunQiu JianSun @@ -3460,7 +3460,7 @@ Neural Semi-<fixed-case>M</fixed-case>arkov Conditional Random Fields for Robust Character-Based Part-of-Speech Tagging ApostolosKemos HeikeAdel - HinrichSchütze + HinrichSchütze 2736–2743 Character-level models of tokens have been shown to be effective at dealing with within-token noise and out-of-vocabulary words. However, they often still rely on correct token boundaries. In this paper, we propose to eliminate the need for tokenizers with an end-to-end character-level semi-Markov conditional random field. It uses neural networks for its character and segment representations. We demonstrate its effectiveness in multilingual settings and when token boundaries are noisy: It matches state-of-the-art part-of-speech taggers for various languages and significantly outperforms them on a noisy English version of a benchmark dataset. Our code and the noisy dataset are publicly available at http://cistern.cis.lmu.de/semiCRF. N19-1280 @@ -3503,7 +3503,7 @@ A Dynamic Speaker Model for Conversational Interactions HaoCheng HaoFang - MariOstendorf + MariOstendorf 2772–2785 Individual differences in speakers are reflected in their language use as well as in their interests and opinions. Characterizing these differences can be useful in human-computer interaction, as well as analysis of human-human conversations. In this work, we introduce a neural model for learning a dynamically updated speaker embedding in a conversational context. Initial model training is unsupervised, using context-sensitive language generation as an objective, with the context being the conversation history. Further fine-tuning can leverage task-dependent supervised training. The learned neural representation of speakers is shown to be useful for content ranking in a socialbot and dialog act prediction in human-human conversations. N19-1284 @@ -3514,7 +3514,7 @@ Fluent Translations from Disfluent Speech in End-to-End Speech Translation ElizabethSalesky MatthiasSperber - AlexanderWaibel + AlexanderWaibel 2786–2792 Spoken language translation applications for speech suffer due to conversational speech phenomena, particularly the presence of disfluencies. With the rise of end-to-end speech translation models, processing steps such as disfluency removal that were previously an intermediate step between speech recognition and machine translation need to be incorporated into model architectures. We use a sequence-to-sequence model to translate from noisy, disfluent speech to fluent text with disfluencies removed using the recently collected ‘copy-edited’ references for the Fisher Spanish-English dataset. We are able to directly generate fluent translations and introduce considerations about how to evaluate success on this task. This work provides a baseline for a new task, implicitly removing disfluencies in end-to-end translation of conversational speech. N19-1285 @@ -3526,7 +3526,7 @@ Van-HienTran Van-ThuyPhi HiroyukiShindo - YujiMatsumoto + YujiMatsumoto 2793–2798 Recently, relation classification has gained much success by exploiting deep neural networks. In this paper, we propose a new model effectively combining Segment-level Attention-based Convolutional Neural Networks (SACNNs) and Dependency-based Recurrent Neural Networks (DepRNNs). While SACNNs allow the model to selectively focus on the important information segment from the raw sequence, DepRNNs help to handle the long-distance relations from the shortest dependency path of relation entities. Experiments on the SemEval-2010 Task 8 dataset show that our model is comparable to the state-of-the-art without using any external lexical features. N19-1286 @@ -3537,8 +3537,8 @@ Document-Level Event Factuality Identification via Adversarial Neural Network ZhongQian PeifengLi - QiaomingZhu - GuodongZhou + QiaomingZhu + GuodongZhou 2799–2809 Document-level event factuality identification is an important subtask in event factuality and is crucial for discourse understanding in Natural Language Processing (NLP). Previous studies mainly suffer from the scarcity of suitable corpus and effective methods. To solve these two issues, we first construct a corpus annotated with both document- and sentence-level event factuality information on both English and Chinese texts. Then we present an LSTM neural network based on adversarial training with both intra- and inter-sequence attentions to identify document-level event factuality. Experimental results show that our neural network model can outperform various baselines on the constructed corpus. N19-1287 @@ -3673,8 +3673,8 @@ A Richer-but-Smarter Shortest Dependency Path with Attentive Augmentation for Relation Extraction Duy-CatCan - Hoang-QuynhLe - Quang-ThuyHa + Hoang-QuynhLe + Quang-ThuyHa NigelCollier 2902–2912 To extract the relationship between two entities in a sentence, two common approaches are (1) using their shortest dependency path (SDP) and (2) using an attention model to capture a context-based representation of the sentence. Each approach suffers from its own disadvantage of either missing or redundant information. In this work, we propose a novel model that combines the advantages of these two approaches. This is based on the basic information in the SDP enhanced with information selected by several attention mechanisms with kernel filters, namely RbSP (Richer-but-Smarter SDP). To exploit the representation behind the RbSP structure effectively, we develop a combined deep neural model with a LSTM network on word sequences and a CNN on RbSP. Experimental results on the SemEval-2010 dataset demonstrate improved performance over competitive baselines. The data and source code are available at https://github.com/catcd/RbSP. @@ -3699,7 +3699,7 @@ KentonLee Ming-WeiChang TomKwiatkowski - MichaelCollins + MichaelCollins KristinaToutanova 2924–2936 In this paper we study yes/no questions that are naturally occurring — meaning that they are generated in unprompted and unconstrained settings. We build a reading comprehension dataset, BoolQ, of such questions, and show that they are unexpectedly challenging. They often query for complex, non-factoid information, and require difficult entailment-like inference to solve. We also explore the effectiveness of a range of transfer learning baselines. We find that transferring from entailment data is more effective than transferring from paraphrase or extractive QA data, and that it, surprisingly, continues to be very beneficial even when starting from massive pre-trained language models such as BERT. Our best method trains BERT on MultiNLI and then re-trains it on our train set. It achieves 80.4% accuracy compared to 90% accuracy of human annotators (and 62% majority-baseline), leaving a significant gap for future work. @@ -3752,7 +3752,7 @@ JamesZou JesseShapiro MatthewGentzkow - DanJurafsky + DanJurafsky 2970–3005 We provide an NLP framework to uncover four linguistic dimensions of political polarization in social media: topic choice, framing, affect and illocutionary force. We quantify these aspects with existing lexical methods, and propose clustering of tweet embeddings as a means to identify salient topics for analysis across events; human evaluations show that our approach generates more cohesive topics than traditional LDA-based models. We apply our methods to study 4.4M tweets on 21 mass shootings. We provide evidence that the discussion of these events is highly polarized politically and that this polarization is primarily driven by partisan differences in framing rather than topic choice. We identify framing devices, such as grounding and the contrasting use of the terms “terrorist” and “crazy”, that contribute to polarization. Results pertaining to topic choice, affect and illocutionary force suggest that Republicans focus more on the shooter and event-specific facts (news) while Democrats focus more on the victims and call for policy changes. Our work contributes to a deeper understanding of the way group divisions manifest in language and to computational methods for studying them. N19-1304 @@ -3806,7 +3806,7 @@ DaveWadden LuhengHe AmyShah - MariOstendorf + MariOstendorf HannanehHajishirzi 3036–3046 We introduce a general framework for several information extraction tasks that share span representations using dynamically constructed span graphs. The graphs are dynamically constructed by selecting the most confident entity spans and linking these nodes with confidence-weighted relation types and coreferences. The dynamic span graph allow coreference and relation type confidences to propagate through the graph to iteratively refine the span representations. This is unlike previous multi-task frameworks for information extraction in which the only interaction between tasks is in the shared first-layer LSTM. Our framework significantly outperforms state-of-the-art on multiple information extraction tasks across multiple datasets reflecting different domains. We further observe that the span enumeration approach is good at detecting nested span entities, with significant F1 score improvement on the ACE dataset. @@ -3819,7 +3819,7 @@ <fixed-case>O</fixed-case>pen<fixed-case>C</fixed-case>eres: <fixed-case>W</fixed-case>hen Open Information Extraction Meets the Semi-Structured Web ColinLockard PrashantShiralkar - Xin LunaDong + Xin LunaDong 3047–3056 Open Information Extraction (OpenIE), the problem of harvesting triples from natural language text whose predicate relations are not aligned to any pre-defined ontology, has been a popular subject of research for the last decade. However, this research has largely ignored the vast quantity of facts available in semi-structured webpages. In this paper, we define the problem of OpenIE from semi-structured websites to extract such facts, and present an approach for solving it. We also introduce a labeled evaluation dataset to motivate research in this area. Given a semi-structured website and a set of seed facts for some relations existing on its pages, we employ a semi-supervised label propagation technique to automatically create training data for the relations present on the site. We then use this training data to learn a classifier for relation extraction. Experimental results of this method on our new benchmark dataset obtained a precision of over 70%. A larger scale extraction experiment on 31 websites in the movie vertical resulted in the extraction of over 2 million triples. N19-1309 @@ -3841,7 +3841,7 @@ Neural Machine Translation of Text from Non-Native Speakers AntoniosAnastasopoulos AlisonLui - Toan Q.Nguyen + Toan Q.Nguyen DavidChiang 3070–3080 Neural Machine Translation (NMT) systems are known to degrade when confronted with noisy data, especially when the system is trained only on clean data. In this paper, we show that augmenting training data with sentences containing artificially-introduced grammatical errors can make the system more robust to such errors. In combination with an automatic grammar error correction system, we can recover 1.0 BLEU out of 2.4 BLEU lost due to grammatical errors. We also present a set of Spanish translations of the JFLEG grammar error correction corpus, which allows for testing NMT robustness to real grammatical errors. @@ -3864,8 +3864,8 @@ Selective Attention for Context-aware Neural Machine Translation SameenMaruf - André F. T.Martins - GholamrezaHaffari + André F. T.Martins + GholamrezaHaffari 3092–3102 Despite the progress made in sentence-level NMT, current systems still fall short at achieving fluent, good quality translation for a full document. Recent works in context-aware NMT consider only a few previous sentences as context and may not scale to entire documents. To this end, we propose a novel and scalable top-down approach to hierarchical attention for context-aware NMT which uses sparse attention to selectively focus on relevant sentences in the document context and then attends to key words in those sentences. We also propose single-level attention approaches based on sentence or word-level information in the context. The document-level context representation, produced from these attention modules, is integrated into the encoder or decoder of the Transformer model depending on whether we use monolingual or bilingual context. Our experiments and evaluation on English-German datasets in different document MT settings show that our selective attention approach not only significantly outperforms context-agnostic baselines but also surpasses context-aware baselines in most cases. N19-1313 @@ -4023,9 +4023,9 @@ Exploiting Noisy Data in Distant Supervision Relation Classification KaijiaYang LiangHe - Xin-yuDai + Xin-yuDai ShujianHuang - JiajunChen + JiajunChen 3216–3225 Distant supervision has obtained great progress on relation classification task. However, it still suffers from noisy labeling problem. Different from previous works that underutilize noisy data which inherently characterize the property of classification, in this paper, we propose RCEND, a novel framework to enhance Relation Classification by Exploiting Noisy Data. First, an instance discriminator with reinforcement learning is designed to split the noisy data into correctly labeled data and incorrectly labeled data. Second, we learn a robust relation classifier in semi-supervised learning way, whereby the correctly and incorrectly labeled data are treated as labeled and unlabeled data respectively. The experimental results show that our method outperforms the state-of-the-art models. N19-1325 @@ -4037,7 +4037,7 @@ AleksandraPiktus Necati BoraEdizel PiotrBojanowski - EdouardGrave + EdouardGrave RuiFerreira FabrizioSilvestri 3226–3234 @@ -4049,10 +4049,10 @@ Learning Relational Representations by Analogy using Hierarchical <fixed-case>S</fixed-case>iamese Networks GaetanoRossiello - AlfioGliozzo + AlfioGliozzo RobertFarrell - NicolasFauceglia - MichaelGlass + NicolasFauceglia + MichaelGlass 3235–3245 We address relation extraction as an analogy problem by proposing a novel approach to learn representations of relations expressed by their textual mentions. In our assumption, if two pairs of entities belong to the same relation, then those two pairs are analogous. Following this idea, we collect a large set of analogous pairs by matching triples in knowledge bases with web-scale corpora through distant supervision. We leverage this dataset to train a hierarchical siamese network in order to learn entity-entity embeddings which encode relational information through the different linguistic paraphrasing expressing the same relation. We evaluate our model in a one-shot learning task by showing a promising generalization capability in order to classify unseen relation types, which makes this approach suitable to perform automatic knowledge base population with minimal supervision. Moreover, the model can be used to generate pre-trained embeddings which provide a valuable signal when integrated into an existing neural-based model by outperforming the state-of-the-art methods on a downstream relation extraction task. N19-1327 @@ -4095,7 +4095,7 @@ Continual Learning for Sentence Representations Using Conceptors TianlinLiu - LyleUngar + LyleUngar JoãoSedoc 3274–3279 Distributed representations of sentences have become ubiquitous in natural language processing tasks. In this paper, we consider a continual learning scenario for sentence representations: Given a sequence of corpora, we aim to optimize the sentence encoder with respect to the new corpus while maintaining its accuracy on the old corpora. To address this problem, we propose to initialize sentence encoders with the help of corpus-independent features, and then sequentially update sentence encoders using Boolean operations of conceptor matrices to learn corpus-dependent features. We evaluate our approach on semantic textual similarity tasks and show that our proposed sentence encoder can continually learn features from new corpora while retaining its competence on previously encountered corpora. @@ -4136,7 +4136,7 @@ PengQian RichardFutrell MiguelBallesteros - RogerLevy + RogerLevy 3302–3312 State-of-the-art LSTM language models trained on large corpora learn sequential contingencies in impressive detail, and have been shown to acquire a number of non-local grammatical dependencies with some success. Here we investigate whether supervision with hierarchical structure enhances learning of a range of grammatical dependencies, a question that has previously been addressed only for subject-verb agreement. Using controlled experimental methods from psycholinguistics, we compare the performance of word-based LSTM models versus Recurrent Neural Network Grammars (RNNGs) (Dyer et al. 2016) which represent hierarchical syntactic structure and use neural control to deploy it in left-to-right processing, on two classes of non-local grammatical dependencies in English—Negative Polarity licensing and Filler-Gap Dependencies—tested in a range of configurations. Using the same training data for both models, we find that the RNNG outperforms the LSTM on both types of grammatical dependencies and even learns many of the Island Constraints on the filler-gap dependency. Structural supervision thus provides data efficiency advantages over purely string-based training of neural language models in acquiring human-like generalizations about non-local grammatical dependencies. N19-1334 @@ -4202,7 +4202,7 @@ Better, Faster, Stronger Sequence Tagging Constituent Parsers DavidVilares MostafaAbdou - AndersSøgaard + AndersSøgaard 3372–3383 Sequence tagging models for constituent parsing are faster, but less accurate than other types of parsers. In this work, we address the following weaknesses of such constituent parsers: (a) high error rates around closing brackets of long constituents, (b) large label sets, leading to sparsity, and (c) error propagation arising from greedy decoding. To effectively close brackets, we train a model that learns to switch between tagging schemes. To reduce sparsity, we decompose the label set and use multi-task learning to jointly learn to predict sublabels. Finally, we mitigate issues from greedy decoding through auxiliary losses and sentence-level fine-tuning with policy gradient. Combining these techniques, we clearly surpass the performance of sequence tagging constituent parsers on the English and Chinese Penn Treebanks, and reduce their parsing time even further. On the SPMRL datasets, we observe even greater improvements across the board, including a new state of the art on Basque, Hebrew, Polish and Swedish. N19-1341 @@ -4223,7 +4223,7 @@ Decomposed Local Models for Coordinate Structure Parsing HirokiTeranishi HiroyukiShindo - YujiMatsumoto + YujiMatsumoto 3394–3403 We propose a simple and accurate model for coordination boundary identification. Our model decomposes the task into three sub-tasks during training; finding a coordinator, identifying inside boundaries of a pair of conjuncts, and selecting outside boundaries of it. For inference, we make use of probabilities of coordinators and conjuncts in the CKY parsing to find the optimal combination of coordinate structures. Experimental results demonstrate that our model achieves state-of-the-art results, ensuring that the global structure of coordinations is consistent. N19-1343 @@ -4318,7 +4318,7 @@ Mining Discourse Markers for Unsupervised Sentence Representation Learning DamienSileo - TimVan De Cruys + TimVan De Cruys CamillePradel PhilippeMuller 3477–3486 @@ -4333,7 +4333,7 @@ WenhuChen YuSu YilinShen - ZhiyuChen + ZhiyuChen XifengYan William YangWang 3487–3497 @@ -4396,7 +4396,7 @@ <fixed-case>A</fixed-case>ttention is not <fixed-case>E</fixed-case>xplanation SarthakJain - Byron C.Wallace + Byron C.Wallace 3543–3556 Attention mechanisms have seen wide adoption in neural NLP models. In addition to improving predictive performance, these are often touted as affording transparency: models equipped with attention provide a distribution over attended-to input units, and this is often presented (at least implicitly) as communicating the relative importance of inputs. However, it is unclear what relationship exists between attention weights and model outputs. In this work we perform extensive experiments across a variety of NLP tasks that aim to assess the degree to which attention weights provide meaningful “explanations” for predictions. We find that they largely do not. For example, learned attention weights are frequently uncorrelated with gradient-based measures of feature importance, and one can identify very different attention distributions that nonetheless yield equivalent predictions. Our findings show that standard attention modules do not provide meaningful explanations and should not be treated as though they do. N19-1357 @@ -4429,8 +4429,8 @@ Context Dependent Semantic Parsing over Temporally Structured Data - CharlesChen - RazvanBunescu + CharlesChen + RazvanBunescu 3576–3585 We describe a new semantic parsing setting that allows users to query the system using both natural language questions and actions within a graphical user interface. Multiple time series belonging to an entity of interest are stored in a database and the user interacts with the system to obtain a better understanding of the entity’s state and behavior, entailing sequences of actions and questions whose answers may depend on previous factual or navigational interactions. We design an LSTM-based encoder-decoder architecture that models context dependency through copying mechanisms and multiple levels of attention over inputs and previous outputs. When trained to predict tokens using supervised learning, the proposed architecture substantially outperforms standard sequence generation baselines. Training the architecture using policy gradient leads to further improvements in performance, reaching a sequence-level accuracy of 88.7% on artificial data and 74.8% on real data. N19-1360 @@ -4455,8 +4455,8 @@ MandarJoshi EunsolChoi OmerLevy - DanielWeld - LukeZettlemoyer + DanielWeld + LukeZettlemoyer 3597–3608 Reasoning about implied relationships (e.g. paraphrastic, common sense, encyclopedic) between pairs of words is crucial for many cross-sentence inference problems. This paper proposes new methods for learning and using embeddings of word pairs that implicitly represent background knowledge about such relationships. Our pairwise embeddings are computed as a compositional function of each word’s representation, which is learned by maximizing the pointwise mutual information (PMI) with the contexts in which the the two words co-occur. We add these representations to the cross-sentence attention layer of existing inference models (e.g. BiDAF for QA, ESIM for NLI), instead of extending or replacing existing word embeddings. Experiments show a gain of 2.7% on the recently released SQuAD 2.0 and 1.3% on MultiNLI. Our representations also aid in better generalization with gains of around 6-7% on adversarial SQuAD datasets, and 8.8% on the adversarial entailment test set by Glockner et al. (2018). N19-1362 @@ -4468,7 +4468,7 @@ AshutoshKumar SatwikBhattamishra ManikBhandari - ParthaTalukdar + ParthaTalukdar 3609–3619 Inducing diversity in the task of paraphrasing is an important problem in NLP with applications in data augmentation and conversational agents. Previous paraphrasing approaches have mainly focused on the issue of generating semantically similar paraphrases while paying little attention towards diversity. In fact, most of the methods rely solely on top-k beam search sequences to obtain a set of paraphrases. The resulting set, however, contains many structurally similar sentences. In this work, we focus on the task of obtaining highly diverse paraphrases while not compromising on paraphrasing quality. We provide a novel formulation of the problem in terms of monotone submodular function maximization, specifically targeted towards the task of paraphrasing. Additionally, we demonstrate the effectiveness of our method for data augmentation on multiple tasks such as intent classification and paraphrase recognition. In order to drive further research, we have made the source code available. N19-1363 @@ -4482,8 +4482,8 @@ DiyiYang JiaaoChen ZichaoYang - DanJurafsky - EduardHovy + DanJurafsky + EduardHovy 3620–3630 Modeling what makes a request persuasive - eliciting the desired response from a reader - is critical to the study of propaganda, behavioral economics, and advertising. Yet current models can’t quantify the persuasiveness of requests or extract successful persuasive strategies. Building on theories of persuasion, we propose a neural network to quantify persuasiveness and identify the persuasive strategies in advocacy requests. Our semi-supervised hierarchical neural network model is supervised by the number of people persuaded to take actions and partially supervised at the sentence level with human-labeled rhetorical strategies. Our method outperforms several baselines, uncovers persuasive strategies - offering increased interpretability of persuasive speech - and has applications for other situations with document-level supervision but only partial sentence supervision. N19-1364 @@ -4501,7 +4501,7 @@ OliviaLi SandhiniAgarwal Joshua D.Greene - DanJurafsky + DanJurafsky ChristopherPotts LauriKarttunen 3631–3648 @@ -4513,7 +4513,7 @@ Structural Neural Encoders for <fixed-case>AMR</fixed-case>-to-text Generation MarcoDamonte - Shay B.Cohen + Shay B.Cohen 3649–3658 AMR-to-text generation is a problem recently introduced to the NLP community, in which the goal is to generate sentences from Abstract Meaning Representation (AMR) graphs. Sequence-to-sequence models can be used to this end by converting the AMR graphs to strings. Approaching the problem while working directly with graphs requires the use of graph-to-sequence models that encode the AMR graph into a vector representation. Such encoding has been shown to be beneficial in the past, and unlike sequential encoding, it allows us to explicitly capture reentrant structures in the AMR graphs. We investigate the extent to which reentrancies (nodes with multiple parents) have an impact on AMR-to-text generation by comparing graph encoders to tree encoders, where reentrancies are not preserved. We show that improvements in the treatment of reentrancies and long-range dependencies contribute to higher overall scores for graph encoders. Our best model achieves 24.40 BLEU on LDC2015E86, outperforming the state of the art by 1.1 points and 24.54 BLEU on LDC2017T10, outperforming the state of the art by 1.24 points. N19-1366 @@ -4523,7 +4523,7 @@ Multilingual prediction of <fixed-case>A</fixed-case>lzheimer’s disease through domain adaptation and concept-based language modelling - Kathleen C.Fraser + Kathleen C.Fraser NicklasLinz BaiLi KristinaLundholm Fors @@ -4554,7 +4554,7 @@ <fixed-case>NLP</fixed-case> Whack-A-Mole: <fixed-case>C</fixed-case>hallenges in Cross-Domain Temporal Expression Extraction AmyOlex LukeMaffey - BridgetMcInnes + BridgetMcInnes 3682–3692 Incorporating domain knowledge is vital in building successful natural language processing (NLP) applications. Many times, cross-domain application of a tool results in poor performance as the tool does not account for domain-specific attributes. The clinical domain is challenging in this aspect due to specialized medical terms and nomenclature, shorthand notation, fragmented text, and a variety of writing styles used by different medical units. Temporal resolution is an NLP task that, in general, is domain-agnostic because temporal information is represented using a limited lexicon. However, domain-specific aspects of temporal resolution are present in clinical texts. Here we explore parsing issues that arose when running our system, a tool built on Newswire text, on clinical notes in the THYME corpus. Many parsing issues were straightforward to correct; however, a few code changes resulted in a cascading series of parsing errors that had to be resolved before an improvement in performance was observed, revealing the complexity temporal resolution and rule-based parsing. Our system now outperforms current state-of-the-art systems on the THYME corpus with little change in its performance on Newswire texts. N19-1369 @@ -4577,7 +4577,7 @@ EricLehman JayDeYoung ReginaBarzilay - Byron C.Wallace + Byron C.Wallace 3705–3717 How do we know if a particular medical treatment actually works? Ideally one would consult all available evidence from relevant clinical trials. Unfortunately, such results are primarily disseminated in natural language scientific articles, imposing substantial burden on those trying to make sense of them. In this paper, we present a new task and corpus for making this unstructured published scientific evidence actionable. The task entails inferring reported findings from a full-text article describing randomized controlled trials (RCT) with respect to a given intervention, comparator, and outcome of interest, e.g., inferring if a given article provides evidence supporting the use of aspirin to reduce risk of stroke, as compared to placebo. We present a new corpus for this task comprising 10,000+ prompts coupled with full-text articles describing RCTs. Results using a suite of baseline models — ranging from heuristic (rule-based) approaches to attentive neural architectures — demonstrate the difficulty of the task, which we believe largely owes to the lengthy, technical input texts. To facilitate further work on this important, challenging problem we make the corpus, documentation, a website and leaderboard, and all source code for baselines and evaluation publicly available. N19-1371 @@ -4598,7 +4598,7 @@ <fixed-case>D</fixed-case>ialogue <fixed-case>A</fixed-case>ct <fixed-case>C</fixed-case>lassification with <fixed-case>C</fixed-case>ontext-<fixed-case>A</fixed-case>ware <fixed-case>S</fixed-case>elf-<fixed-case>A</fixed-case>ttention VipulRaheja - JoelTetreault + JoelTetreault 3727–3733 Recent work in Dialogue Act classification has treated the task as a sequence labeling problem using hierarchical deep neural networks. We build on this prior work by leveraging the effectiveness of a context-aware self-attention mechanism coupled with a hierarchical recurrent neural network. We conduct extensive evaluations on standard Dialogue Act classification datasets and show significant improvement over state-of-the-art results on the Switchboard Dialogue Act (SwDA) Corpus. We also investigate the impact of different utterance-level representation learning methods and show that our method is effective at capturing utterance-level semantic text representations while maintaining high accuracy. N19-1373 @@ -4623,7 +4623,7 @@ RevanthGangi Reddy DanishContractor DineshRaghu - SachindraJoshi + SachindraJoshi 3744–3754 Recent end-to-end task oriented dialog systems use memory architectures to incorporate external knowledge in their dialogs. Current work makes simplifying assumptions about the structure of the knowledge base, such as the use of triples to represent knowledge, and combines dialog utterances (context) as well as knowledge base (KB) results as part of the same memory. This causes an explosion in the memory size, and makes the reasoning over memory harder. In addition, such a memory design forces hierarchical properties of the data to be fit into a triple structure of memory. This requires the memory reader to infer relationships across otherwise connected attributes. In this paper we relax the strong assumptions made by existing architectures and separate memories used for modeling dialog context and KB results. Instead of using triples to store KB results, we introduce a novel multi-level memory architecture consisting of cells for each query and their corresponding results. The multi-level memory first addresses queries, followed by results and finally each key-value pair within a result. We conduct detailed experiments on three publicly available task oriented dialog data sets and we find that our method conclusively outperforms current state-of-the-art models. We report a 15-25% increase in both entity F1 and BLEU scores. N19-1375 @@ -4646,7 +4646,7 @@ Top-Down Structurally-Constrained Neural Response Generation with Lexicalized Probabilistic Context-Free Grammar WenchaoDu - Alan WBlack + Alan WBlack 3762–3771 We consider neural language generation under a novel problem setting: generating the words of a sentence according to the order of their first appearance in its lexicalized PCFG parse tree, in a depth-first, left-to-right manner. Unlike previous tree-based language generation methods, our approach is both (i) top-down and (ii) explicitly generating syntactic structure at the same time. In addition, our method combines neural model with symbolic approach: word choice at each step is constrained by its predicted syntactic function. We applied our model to the task of dialog response generation, and found it significantly improves over sequence-to-sequence baseline, in terms of diversity and relevance. We also investigated the effect of lexicalization on language generation, and found that lexicalization schemes that give priority to content words have certain advantages over those focusing on dependency relations. N19-1377 @@ -4657,9 +4657,9 @@ What do Entity-Centric Models Learn? Insights from Entity Linking in Multi-Party Dialogue LauraAina CarinaSilberer - Ionut-TeodorSorodoc + Ionut-TeodorSorodoc MatthijsWestera - GemmaBoleda + GemmaBoleda 3772–3783 Humans use language to refer to entities in the external world. Motivated by this, in recent years several models that incorporate a bias towards learning entity representations have been proposed. Such entity-centric models have shown empirical success, but we still know little about why. In this paper we analyze the behavior of two recently proposed entity-centric models in a referential task, Entity Linking in Multi-party Dialogue (SemEval 2018 Task 4). We show that these models outperform the state of the art on this task, and that they do better on lower frequency entities than a counterpart model that is not entity-centric, with the same model size. We argue that making models entity-centric naturally fosters good architectural decisions. However, we also show that these models do not really build entity representations and that they make poor use of linguistic context. These negative results underscore the need for model analysis, to test whether the motivations for particular architectures are borne out in how models behave when deployed. N19-1378 @@ -4671,7 +4671,7 @@ HanLi JihwanLee SidharthMudgal - RuhiSarikaya + RuhiSarikaya Young-BumKim 3784–3794 Domain classification is the task to map spoken language utterances to one of the natural language understanding domains in intelligent personal digital assistants (IPDAs). This is observed in mainstream IPDAs in industry and third-party domains are developed to enhance the capability of the IPDAs. As more and more new domains are developed very frequently, how to continuously accommodate the new domains still remains challenging. Moreover, if one wants to use personalized information dynamically for better domain classification, it is infeasible to directly adopt existing continual learning approaches. In this paper, we propose CoNDA, a neural-based approach for continuous domain adaption with normalization and regularization. Unlike existing methods that often conduct full model parameter update, CoNDA only updates the necessary parameters in the model for the new domains. Empirical evaluation shows that CoNDA achieves high accuracy on both the accommodated new domains and the existing known domains for which input samples come with personal information, and outperforms the baselines by a large margin. @@ -4696,7 +4696,7 @@ NouhaDziri EhsanKamalloo KoryMathewson - OsmarZaiane + OsmarZaiane 3806–3812 Evaluating open-domain dialogue systems is difficult due to the diversity of possible correct answers. Automatic metrics such as BLEU correlate weakly with human annotations, resulting in a significant bias across different models and datasets. Some researchers resort to human judgment experimentation for assessing response quality, which is expensive, time consuming, and not scalable. Moreover, judges tend to evaluate a small number of dialogues, meaning that minor differences in evaluation configuration may lead to dissimilar results. In this paper, we present interpretable metrics for evaluating topic coherence by making use of distributed sentence representations. Furthermore, we introduce calculable approximations of human judgment based on conversational coherence by adopting state-of-the-art entailment techniques. Results show that our metrics can be used as a surrogate for human judgment, making it easy to evaluate dialogue systems on large-scale datasets and allowing an unbiased estimate for the quality of the responses. N19-1381 @@ -4706,7 +4706,7 @@ On Knowledge distillation from complex networks for response prediction SiddharthaArora - Mitesh M.Khapra + Mitesh M.Khapra Harish G.Ramaswamy 3813–3822 Recent advances in Question Answering have lead to the development of very complex models which compute rich representations for query and documents by capturing all pairwise interactions between query and document words. This makes these models expensive in space and time, and in practice one has to restrict the length of the documents that can be fed to these models. Such models have also been recently employed for the task of predicting dialog responses from available background documents (e.g., Holl-E dataset). However, here the documents are longer, thereby rendering these complex models infeasible except in select restricted settings. In order to overcome this, we use standard simple models which do not capture all pairwise interactions, but learn to emulate certain characteristics of a complex teacher network. Specifically, we first investigate the conicity of representations learned by a complex model and observe that it is significantly lower than that of simpler models. Based on this insight, we modify the simple architecture to mimic this characteristic. We go further by using knowledge distillation approaches, where the simple model acts as a student and learns to match the output from the complex teacher network. We experiment with the Holl-E dialog data set and show that by mimicking characteristics and matching outputs from a teacher, even a simple network can give improved performance. @@ -4738,7 +4738,7 @@ Low-Resource Syntactic Transfer with Unsupervised Source Reordering Mohammad SadeghRasooli - MichaelCollins + MichaelCollins 3845–3856 We describe a cross-lingual transfer method for dependency parsing that takes into account the problem of word order differences between source and target languages. Our model only relies on the Bible, a considerably smaller parallel data than the commonly used parallel data in transfer methods. We use the concatenation of projected trees from the Bible corpus, and the gold-standard treebanks in multiple source languages along with cross-lingual word representations. We demonstrate that reordering the source treebanks before training on them for a target language improves the accuracy of languages outside the European language family. Our experiments on 68 treebanks (38 languages) in the Universal Dependencies corpus achieve a high accuracy for all languages. Among them, our experiments on 16 treebanks of 12 non-European languages achieve an average UAS absolute improvement of 3.3% over a state-of-the-art method. N19-1385 @@ -4747,8 +4747,8 @@ Revisiting Adversarial Autoencoder for Unsupervised Word Translation with Cycle Consistency and Improved Training - TasnimMohiuddin - ShafiqJoty + TasnimMohiuddin + ShafiqJoty 3857–3867 Adversarial training has shown impressive success in learning bilingual dictionary without any parallel data by mapping monolingual embeddings to a shared space. However, recent work has shown superior performance for non-adversarial methods in more challenging language pairs. In this work, we revisit adversarial autoencoder for unsupervised word translation and propose two novel extensions to it that yield more stable training and improved results. Our method includes regularization terms to enforce cycle consistency and input reconstruction, and puts the target encoders as an adversary against the corresponding discriminator. Extensive experimentations with European, non-European and low-resource languages show that our method is more robust and achieves better performance than recently proposed adversarial and non-adversarial approaches. N19-1386 @@ -4759,7 +4759,7 @@ Addressing word-order Divergence in Multilingual Neural Machine Translation for extremely Low Resource Languages RudraMurthy AnoopKunchukuttan - PushpakBhattacharyya + PushpakBhattacharyya 3868–3873 Transfer learning approaches for Neural Machine Translation (NMT) train a NMT model on an assisting language-target language pair (parent model) which is later fine-tuned for the source language-target language pair of interest (child model), with the target language being the same. In many cases, the assisting language has a different word order from the source language. We show that divergent word order adversely limits the benefits from transfer learning when little to no parallel corpus between the source and target language is available. To bridge this divergence, we propose to pre-order the assisting language sentences to match the word order of the source language and train the parent model. Our experiments on many language pairs show that bridging the word order gap leads to significant improvement in the translation quality in extremely low-resource scenarios. N19-1387 @@ -4801,7 +4801,7 @@ Context-Aware Cross-Lingual Mapping HananAldarmaki - MonaDiab + MonaDiab 3906–3911 Cross-lingual word vectors are typically obtained by fitting an orthogonal matrix that maps the entries of a bilingual dictionary from a source to a target vector space. Word vectors, however, are most commonly used for sentence or document-level representations that are calculated as the weighted average of word embeddings. In this paper, we propose an alternative to word-level mapping that better reflects sentence-level cross-lingual similarity. We incorporate context in the transformation matrix by directly mapping the averaged embeddings of aligned sentences in a parallel corpus. We also implement cross-lingual mapping of deep contextualized word embeddings using parallel sentences with word alignments. In our experiments, both approaches resulted in cross-lingual sentence embeddings that outperformed context-independent word mapping in sentence translation retrieval. Furthermore, the sentence-level transformation could be used for word-level mapping without loss in word translation quality. N19-1391 @@ -4812,7 +4812,7 @@ Polyglot Contextual Representations Improve Crosslingual Transfer PhoebeMulcaire JungoKasai - Noah A.Smith + Noah A.Smith 3912–3918 We introduce Rosita, a method to produce multilingual contextual word representations by training a single language model on text from multiple languages. Our method combines the advantages of contextual word representations with those of multilingual representation learning. We produce language models from dissimilar language pairs (English/Arabic and English/Chinese) and use them in dependency parsing, semantic role labeling, and named entity recognition, with comparisons to monolingual and non-contextual variants. Our results provide further evidence for the benefits of polyglot learning, in which representations are shared across multiple languages. N19-1392 @@ -4824,7 +4824,7 @@ ManonScholivet FranckDary AlexisNasr - BenoitFavre + BenoitFavre CarlosRamisch 3919–3930 The existence of universal models to describe the syntax of languages has been debated for decades. The availability of resources such as the Universal Dependencies treebanks and the World Atlas of Language Structures make it possible to study the plausibility of universal grammar from the perspective of dependency parsing. Our work investigates the use of high-level language descriptions in the form of typological features for multilingual dependency parsing. Our experiments on multilingual parsing for 40 languages show that typological information can indeed guide parsers to share information between similar languages beyond simple language identification. @@ -4856,7 +4856,7 @@ <fixed-case>U</fixed-case>nderstanding the <fixed-case>B</fixed-case>ehaviour of <fixed-case>N</fixed-case>eural <fixed-case>A</fixed-case>bstractive <fixed-case>S</fixed-case>ummarizers using <fixed-case>C</fixed-case>ontrastive <fixed-case>E</fixed-case>xamples KrtinKumar - Jackie Chi KitCheung + Jackie Chi KitCheung 3949–3954 Neural abstractive summarizers generate summary texts using a language model conditioned on the input source text, and have recently achieved high ROUGE scores on benchmark summarization datasets. We investigate how they achieve this performance with respect to human-written gold-standard abstracts, and whether the systems are able to understand deeper syntactic and semantic structures. We generate a set of contrastive summaries which are perturbed, deficient versions of human-written summaries, and test whether existing neural summarizers score them more highly than the human-written summaries. We analyze their performance on different datasets and find that these systems fail to understand the source text, in a majority of the cases. N19-1396 @@ -4865,12 +4865,12 @@ Jointly Extracting and Compressing Documents with Summary State Representations - AfonsoMendes + AfonsoMendes ShashiNarayan SebastiãoMiranda ZitaMarinho - André F. T.Martins - Shay B.Cohen + André F. T.Martins + Shay B.Cohen 3955–3966 We present a new neural model for text summarization that first extracts sentences from a document and then compresses them. The pro-posed model offers a balance that sidesteps thedifficulties in abstractive methods while gener-ating more concise summaries than extractivemethods. In addition, our model dynamically determines the length of the output summary based on the gold summaries it observes during training and does not require length constraints typical to extractive summarization. The model achieves state-of-the-art results on the CNN/DailyMail and Newsroom datasets, improving over current extractive and abstractive methods. Human evaluations demonstratethat our model generates concise and informa-tive summaries. We also make available a new dataset of oracle compressive summaries derived automatically from the CNN/DailyMailreference summaries. N19-1397 @@ -4883,7 +4883,7 @@ Sanjeev KumarKarn MarkBuckley UlliWaltinger - HinrichSchütze + HinrichSchütze 3967–3977 In this work, we define the task of teaser generation and provide an evaluation benchmark and baseline systems for the process of generating teasers. A teaser is a short reading suggestion for an article that is illustrative and includes curiosity-arousing elements to entice potential readers to read particular news items. Teasers are one of the main vehicles for transmitting news to social media users. We compile a novel dataset of teasers by systematically accumulating tweets and selecting those that conform to the teaser definition. We have compared a number of neural abstractive architectures on the task of teaser generation and the overall best performing system is See et al. seq2seq with pointer network. N19-1398 @@ -4917,7 +4917,7 @@ Positional Encoding to Control Output Sequence Length ShoTakase - NaoakiOkazaki + NaoakiOkazaki 3999–4004 Neural encoder-decoder models have been successful in natural language generation tasks. However, real applications of abstractive summarization must consider an additional constraint that a generated summary should not exceed a desired length. In this paper, we propose a simple but effective extension of a sinusoidal positional encoding (Vaswani et al., 2017) so that a neural encoder-decoder model preserves the length constraint. Unlike previous studies that learn length embeddings, the proposed method can generate a text of any length even if the target length is unseen in training data. The experimental results show that the proposed method is able not only to control generation length but also improve ROUGE scores. N19-1401 @@ -4936,7 +4936,7 @@ Using Natural Language Relations between Answer Choices for Machine Comprehension - RajkumarPujari + RajkumarPujari DanGoldwasser 4010–4015 While evaluating an answer choice for Reading Comprehension task, other answer choices available for the question and the answers of related questions about the same paragraph often provide valuable information. In this paper, we propose a method to leverage the natural language relations between the answer choices, such as entailment and contradiction, to improve the performance of machine comprehension. We use a stand-alone question answering (QA) system to perform QA task and a Natural Language Inference (NLI) system to identify the relations between the choice pairs. Then we perform inference using an Integer Linear Programming (ILP)-based relational framework to re-evaluate the decisions made by the standalone QA system in light of the relations identified by the NLI system. We also propose a multitask learning model that learns both the tasks jointly. @@ -5115,7 +5115,7 @@ Data Augmentation for Context-Sensitive Neural Lemmatization Using Inflection Tables and Raw Text TomsBergmanis - SharonGoldwater + SharonGoldwater 4119–4128 Lemmatization aims to reduce the sparse data problem by relating the inflected forms of a word to its dictionary form. Using context can help, both for unseen and ambiguous words. Yet most context-sensitive approaches require full lemma-annotated sentences for training, which may be scarce or unavailable in low-resource languages. In addition (as shown here), in a low-resource setting, a lemmatizer can learn more from n labeled examples of distinct words (types) than from n (contiguous) labeled tokens, since the latter contain far fewer distinct types. To combine the efficiency of type-based learning with the benefits of context, we propose a way to train a context-sensitive lemmatizer with little or no labeled corpus data, using inflection tables from the UniMorph project and raw text examples from Wikipedia that provide sentence contexts for the unambiguous UniMorph examples. Despite these being unambiguous examples, the model successfully generalizes from them, leading to improved results (both overall, and especially on unseen words) in comparison to a baseline that does not use context. N19-1418 @@ -5126,7 +5126,7 @@ <fixed-case>A</fixed-case> Structural Probe for Finding Syntax in Word Representations JohnHewitt - Christopher D.Manning + Christopher D.Manning 4129–4138 Recent work has improved our ability to detect linguistic knowledge in word representations. However, current methods for detecting syntactic knowledge do not test whether syntax trees are represented in their entirety. In this work, we propose a structural probe, which evaluates whether syntax trees are embedded in a linear transformation of a neural network’s word representation space. The probe identifies a linear transformation under which squared L2 distance encodes the distance between words in the parse tree, and one in which squared L2 norm encodes depth in the parse tree. Using our probe, we show that such transformations exist for both ELMo and BERT but not in baselines, providing evidence that entire syntax trees are embedded implicitly in deep models’ vector geometry. N19-1419 @@ -5161,7 +5161,7 @@ Probing the Need for Visual Context in Multimodal Machine Translation OzanCaglayan - PranavaMadhyastha + PranavaMadhyastha LuciaSpecia LoïcBarrault 4159–4170 @@ -5189,7 +5189,7 @@ What’s in a Name? <fixed-case>R</fixed-case>educing Bias in Bios without Access to Protected Attributes AlexeyRomanov MariaDe-Arteaga - HannaWallach + HannaWallach JenniferChayes ChristianBorgs AlexandraChouldechova @@ -5238,7 +5238,7 @@ Locale-agnostic Universal Domain Classification Model in Spoken Language Understanding JihwanLee - RuhiSarikaya + RuhiSarikaya Young-BumKim 9–15 In this paper, we introduce an approach for leveraging available data across multiple locales sharing the same language to 1) improve domain classification model accuracy in Spoken Language Understanding and user experience even if new locales do not have sufficient data and 2) reduce the cost of scaling the domain classifier to a large number of locales. We propose a locale-agnostic universal domain classification model based on selective multi-task learning that learns a joint representation of an utterance over locales with different sets of domains and allows locales to share knowledge selectively depending on the domains. The experimental results demonstrate the effectiveness of our approach on domain classification task in the scenario of multiple locales with imbalanced data and disparate domain sets. The proposed approach outperforms other baselines models especially when classifying locale-specific domains and also low-resourced domains. @@ -5271,7 +5271,7 @@ SavithaRamasamy PavitraKrishnaswamy Wai LengChow - Nancy F.Chen + Nancy F.Chen 24–31 Data for human-human spoken dialogues for research and development are currently very limited in quantity, variety, and sources; such data are even scarcer in healthcare. In this work, we investigate fast prototyping of a dialogue comprehension system by leveraging on minimal nurse-to-patient conversations. We propose a framework inspired by nurse-initiated clinical symptom monitoring conversations to construct a simulated human-human dialogue dataset, embodying linguistic characteristics of spoken interactions like thinking aloud, self-contradiction, and topic drift. We then adopt an established bidirectional attention pointer network on this simulated dataset, achieving more than 80% F1 score on a held-out test set from real-world nurse-to-patient conversations. The ability to automatically comprehend conversations in the healthcare domain by exploiting only limited data has implications for improving clinical workflows through red flag symptom detection and triaging capabilities. We demonstrate the feasibility for efficient and effective extraction, retrieval and comprehension of symptom checking information discussed in multi-turn human-human spoken conversations. N19-2004 @@ -5343,7 +5343,7 @@ YaoWan XiumingPan YuGong - Philip S.Yu + Philip S.Yu 64–72 Nowadays, more and more customers browse and purchase products in favor of using mobile E-Commerce Apps such as Taobao and Amazon. Since merchants are usually inclined to describe redundant and over-informative product titles to attract attentions from customers, it is important to concisely display short product titles on limited screen of mobile phones. To address this discrepancy, previous studies mainly consider textual information of long product titles and lacks of human-like view during training and evaluation process. In this paper, we propose a Multi-Modal Generative Adversarial Network (MM-GAN) for short product title generation in E-Commerce, which innovatively incorporates image information and attribute tags from product, as well as textual information from original long titles. MM-GAN poses short title generation as a reinforcement learning process, where the generated titles are evaluated by the discriminator in a human-like view. Extensive experiments on a large-scale E-Commerce dataset demonstrate that our algorithm outperforms other state-of-the-art methods. Moreover, we deploy our model into a real-world online E-Commerce environment and effectively boost the performance of click through rate and click conversion rate by 1.66% and 1.87%, respectively. N19-2009 @@ -5379,8 +5379,8 @@ Active Learning for New Domains in Natural Language Understanding StanislavPeshterliev JohnKearney - AbhyudayJagannatha - ImreKiss + AbhyudayJagannatha + ImreKiss SpyrosMatsoukas 90–96 We explore active learning (AL) for improving the accuracy of new domains in a natural language understanding (NLU) system. We propose an algorithm called Majority-CRF that uses an ensemble of classification models to guide the selection of relevant utterances, as well as a sequence labeling model to help prioritize informative examples. Experiments with three domains show that Majority-CRF achieves 6.6%-9% relative error rate reduction compared to random sampling with the same annotation budget, and statistically significant improvements compared to other AL approaches. Additionally, case studies with human-in-the-loop AL on six new domains show 4.6%-9% improvement on an existing NLU system. @@ -5402,7 +5402,7 @@ Are the Tools up to the Task? an Evaluation of Commercial Dialog Tools in Developing Conversational Enterprise-grade Dialog Systems - MarieMeteer + MarieMeteer MeghanHickey CarmiRothberg DavidNahamoo @@ -5441,13 +5441,13 @@ Extraction of Message Sequence Charts from Software Use-Case Descriptions - GirishPalshikar + GirishPalshikar NitinRamrakhiyani SangameshwarPatil SachinPawar SwapnilHingmire VasudevaVarma - PushpakBhattacharyya + PushpakBhattacharyya 130–137 Software Requirement Specification documents provide natural language descriptions of the core functional requirements as a set of use-cases. Essentially, each use-case contains a set of actors and sequences of steps describing the interactions among them. Goals of use-case reviews and analyses include their correctness, completeness, detection of ambiguities, prototyping, verification, test case generation and traceability. Message Sequence Chart (MSC) have been proposed as a expressive, rigorous yet intuitive visual representation of use-cases. In this paper, we describe a linguistic knowledge-based approach to extract MSCs from use-cases. Compared to existing techniques, we extract richer constructs of the MSC notation such as timers, conditions and alt-boxes. We apply this tool to extract MSCs from several real-life software use-case descriptions and show that it performs better than the existing techniques. We also discuss the benefits and limitations of the extracted MSCs to meet the above goals. N19-2017 @@ -5482,7 +5482,7 @@ LahariPoddar LeonardoNeves WilliamBrendel - LuisMarujo + LuisMarujo SergeyTulyakov PradeepKaruturi 157–165 @@ -5494,9 +5494,9 @@ Robust Semantic Parsing with Adversarial Learning for Domain Generalization GabrielMarzinotto - GéraldineDamnati - FrédéricBéchet - BenoîtFavre + GéraldineDamnati + FrédéricBéchet + BenoîtFavre 166–173 This paper addresses the issue of generalization for Semantic Parsing in an adversarial framework. Building models that are more robust to inter-document variability is crucial for the integration of Semantic Parsing technologies in real applications. The underlying question throughout this study is whether adversarial learning can be used to train models on a higher level of abstraction in order to increase their robustness to lexical and stylistic variations. We propose to perform Semantic Parsing with a domain classification adversarial task, covering various use-cases with or without explicit knowledge of the domain. The strategy is first evaluated on a French corpus of encyclopedic documents, annotated with FrameNet, in an information retrieval perspective. This corpus constitutes a new public benchmark, gathering documents from various thematic domains and various sources. We show that adversarial learning yields improved results when using explicit domain classification as the adversarial task. We also propose an unsupervised domain discovery approach that yields equivalent improvements. The latter is also evaluated on a PropBank Semantic Role Labeling task on the CoNLL-2005 benchmark and is shown to increase the model’s generalization capabilities on out-of-domain data. N19-2021 @@ -5561,7 +5561,7 @@ In Other News: a Bi-style Text-to-speech Model for Synthesizing Newscaster Voice with Limited Data NishantPrateek MateuszŁajszczak - RobertoBarra-Chicote + RobertoBarra-Chicote ThomasDrugman JaimeLorenzo-Trueba ThomasMerritt @@ -5603,7 +5603,7 @@ N19-3 SudiptaKar FarahNadeem - LauraBurdick + LauraBurdick GregDurrett Na-RaeHan Association for Computational Linguistics @@ -5630,7 +5630,7 @@ Identifying and Reducing Gender Bias in Word-Level Language Models ShikhaBordia - Samuel R.Bowman + Samuel R.Bowman 7–15 Many text corpora exhibit socially problematic biases, which can be propagated or amplified in the models trained on such data. For example, doctor cooccurs more frequently with male pronouns than female pronouns. In this study we (i) propose a metric to measure gender bias; (ii) measure bias in a text corpus and the text generated from a recurrent neural network language model trained on the text corpus; (iii) propose a regularization loss term for the language model that minimizes the projection of encoder-trained embeddings onto an embedding subspace that encodes gender; (iv) finally, evaluate efficacy of our proposed method on reducing gender bias. We find this regularization method to be effective in reducing gender bias up to an optimal weight assigned to the loss term, beyond which the model becomes unstable as the perplexity increases. We replicate this study on three training corpora—Penn Treebank, WikiText-2, and CNN/Daily Mail—resulting in similar conclusions. N19-3002 @@ -5691,7 +5691,7 @@ A Bag-of-concepts Model Improves Relation Extraction in a Narrow Knowledge Domain with Limited Data JiyuChen - KarinVerspoor + KarinVerspoor ZenanZhai 43–52 This paper focuses on a traditional relation extraction task in the context of limited annotated data and a narrow knowledge domain. We explore this task with a clinical corpus consisting of 200 breast cancer follow-up treatment letters in which 16 distinct types of relations are annotated. We experiment with an approach to extracting typed relations called window-bounded co-occurrence (WBC), which uses an adjustable context window around entity mentions of a relevant type, and compare its performance with a more typical intra-sentential co-occurrence baseline. We further introduce a new bag-of-concepts (BoC) approach to feature engineering based on the state-of-the-art word embeddings and word synonyms. We demonstrate the competitiveness of BoC by comparing with methods of higher complexity, and explore its effectiveness on this small dataset. @@ -5766,7 +5766,7 @@ Data Augmentation by Data Noising for Open-vocabulary Slots in Spoken Language Understanding Hwa-YeonKim Yoon-HyungRoh - Young-KilKim + Young-KilKim 97–102 One of the main challenges in Spoken Language Understanding (SLU) is dealing with ‘open-vocabulary’ slots. Recently, SLU models based on neural network were proposed, but it is still difficult to recognize the slots of unknown words or ‘open-vocabulary’ slots because of the high cost of creating a manually tagged SLU dataset. This paper proposes data noising, which reflects the characteristics of the ‘open-vocabulary’ slots, for data augmentation. We applied it to an attention based bi-directional recurrent neural network (Liu and Lane, 2016) and experimented with three datasets: Airline Travel Information System (ATIS), Snips, and MIT-Restaurant. We achieved performance improvements of up to 0.57% and 3.25 in intent prediction (accuracy) and slot filling (f1-score), respectively. Our method is advantageous because it does not require additional memory and it can be applied simultaneously with the training process of the model. N19-3014 @@ -5776,7 +5776,7 @@ Expectation and Locality Effects in the Prediction of Disfluent Fillers and Repairs in <fixed-case>E</fixed-case>nglish Speech SamvitDammalapati - RajakrishnanRajkumar + RajakrishnanRajkumar SumeetAgarwal 103–109 This study examines the role of three influential theories of language processing, viz., Surprisal Theory, Uniform Information Density (UID) hypothesis and Dependency Locality Theory (DLT), in predicting disfluencies in speech production. To this end, we incorporate features based on lexical surprisal, word duration and DLT integration and storage costs into logistic regression classifiers aimed to predict disfluencies in the Switchboard corpus of English conversational speech. We find that disfluencies occur in the face of upcoming difficulties and speakers tend to handle this by lessening cognitive load before disfluencies occur. Further, we see that reparandums behave differently from disfluent fillers possibly due to the lessening of the cognitive load also happening in the word choice of the reparandum, i.e., in the disfluency itself. While the UID hypothesis does not seem to play a significant role in disfluency prediction, lexical surprisal and DLT costs do give promising results in explaining language production. Further, we also find that as a means to lessen cognitive load for upcoming difficulties speakers take more time on words preceding disfluencies, making duration a key element in understanding disfluencies. @@ -5798,7 +5798,7 @@ A Pregroup Representation of Word Order Alternation Using <fixed-case>H</fixed-case>indi Syntax AlokDebnath - ManishShrivastava + ManishShrivastava 125–135 Pregroup calculus has been used for the representation of free word order languages (Sanskrit and Hungarian), using a construction called precyclicity. However, restricted word order alternation has not been handled before. This paper aims at introducing and formally expressing three methods of representing word order alternation in the pregroup representation of any language. This paper describes the word order alternation patterns of Hindi, and creates a basic pregroup representation for the language. In doing so, the shortcoming of correct reductions for ungrammatical sentences due to the current apparatus is highlighted, and the aforementioned methods are invoked for a grammatically accurate representation of restricted word order alternation. The replicability of these methods is explained in the representation of adverbs and prepositional phrases in English. N19-3017 @@ -5852,7 +5852,7 @@ Abbreviation Explorer - an interactive system for pre-evaluation of Unsupervised Abbreviation Disambiguation - Manuel R.Ciosici + Manuel R.Ciosici IraAssent 1–5 We present Abbreviation Explorer, a system that supports interactive exploration of abbreviations that are challenging for Unsupervised Abbreviation Disambiguation (UAD). Abbreviation Explorer helps to identify long-forms that are easily confused, and to pinpoint likely causes such as limitations of normalization, language switching, or inconsistent typing. It can also support determining which long-forms would benefit from additional input text for unsupervised abbreviation disambiguation. The system provides options for creating corrective rules that merge redundant long-forms with identical meaning. The identified rules can be easily applied to the already existing vector spaces used by UAD to improve disambiguation performance, while also avoiding the cost of retraining. @@ -5876,10 +5876,10 @@ Enabling Search and Collaborative Assembly of Causal Interactions Extracted from Multilingual and Multi-domain Free Text George C. G.Barbosa ZechyWong - GusHahn-Powell + GusHahn-Powell DaneBell RebeccaSharp - Marco A.Valenzuela-Escárcega + Marco A.Valenzuela-Escárcega MihaiSurdeanu 12–17 Many of the most pressing current research problems (e.g., public health, food security, or climate change) require multi-disciplinary collaborations. In order to facilitate this process, we propose a system that incorporates multi-domain extractions of causal interactions into a single searchable knowledge graph. Our system enables users to search iteratively over direct and indirect connections in this knowledge graph, and collaboratively build causal models in real time. To enable the aggregation of causal information from multiple languages, we extend an open-domain machine reader to Portuguese. The new Portuguese reader extracts over 600 thousand causal statements from 120 thousand Portuguese publications with a precision of 62%, which demonstrates the value of mining multilingual scientific information. @@ -5901,7 +5901,7 @@ Learning to Respond to Mixed-code Queries using Bilingual Word Embeddings Chia-FangHo - JasonChang + JasonChang Jhih-JieChen ChingyuYang 24–28 @@ -5945,7 +5945,7 @@ BenjaminGyori KeithAlcock EgoitzLaparra - Marco A.Valenzuela-Escárcega + Marco A.Valenzuela-Escárcega AjayNagesh VikasYadav JohnBachman @@ -5999,7 +5999,7 @@ DaphneIppolito ArunKirubarajan JaiThirani - LyleUngar + LyleUngar ChrisCallison-Burch 60–65 Open-domain dialog systems (i.e. chatbots) are difficult to evaluate. The current best practice for analyzing and comparing these dialog systems is the use of human judgments. However, the lack of standardization in evaluation procedures, and the fact that model parameters and code are rarely published hinder systematic human evaluation experiments. We introduce a unified framework for human evaluation of chatbots that augments existing tools and provides a web-based hub for researchers to share and compare their dialog systems. Researchers can submit their trained models to the ChatEval web interface and obtain comparisons with baselines and prior work. The evaluation code is open-source to ensure standardization and transparency. In addition, we introduce open-source baseline models and evaluation datasets. ChatEval can be found at https://chateval.org. @@ -6040,7 +6040,7 @@ WeiFang BrianXu MitraMohtarami - JamesGlass + JamesGlass 78–83 We present FAKTA which is a unified framework that integrates various components of a fact-checking process: document retrieval from media sources with various types of reliability, stance detection of documents with respect to given claims, evidence extraction, and linguistic analysis. FAKTA predicts the factuality of given claims and provides evidence at the document and sentence level to explain its predictions. N19-4014 @@ -6051,7 +6051,7 @@ i<fixed-case>C</fixed-case>omposer: An Automatic Songwriting System for <fixed-case>C</fixed-case>hinese Popular Music Hsin-PeiLee Jhih-ShengFang - Wei-YunMa + Wei-YunMa 84–88 In this paper, we introduce iComposer, an interactive web-based songwriting system designed to assist human creators by greatly simplifying music production. iComposer automatically creates melodies to accompany any given text. It also enables users to generate a set of lyrics given arbitrary melodies. iComposer is based on three sequence-to-sequence models, which are used to predict melody, rhythm, and lyrics, respectively. Songs generated by iComposer are compared with human-composed and randomly-generated ones in a subjective test, the experimental results of which demonstrate the capability of the proposed system to write pleasing melodies and meaningful lyrics at a level similar to that of humans. N19-4015 @@ -6073,7 +6073,7 @@ <fixed-case>LT</fixed-case> Expertfinder: An Evaluation Framework for Expert Finding Methods TimFischer SteffenRemus - ChrisBiemann + ChrisBiemann 98–104 Expert finding is the task of ranking persons for a predefined topic or search query. Finding experts for a specified area is an important task and has attracted much attention in the information retrieval community. Most approaches for this task are evaluated in a supervised fashion, which depend on predefined topics of interest as well as gold standard expert rankings. Famous representatives of such datasets are enriched versions of DBLP provided by the ArnetMiner projet or the W3C Corpus of TREC. However, manually ranking experts can be considered highly subjective and detailed rankings are hardly distinguishable. Evaluating these datasets does not necessarily guarantee a good or bad performance of the system. Particularly for dynamic systems, where topics are not predefined but formulated as a search query, we believe a more informative approach is to perform user studies for directly comparing different methods in the same view. In order to accomplish this in a user-friendly way, we present the LT Expert Finder web-application, which is equipped with various query-based expert finding methods that can be easily extended, a detailed expert profile view, detailed evidence in form of relevant documents and statistics, and an evaluation component that allows the qualitative comparison between different rankings. N19-4017 @@ -6098,7 +6098,7 @@ YingLin JosephHoover SpencerWhitehead - ClareVoss + ClareVoss MortezaDehghani HengJi 110–115 @@ -6135,9 +6135,9 @@ Visualizing Inferred Morphotactic Systems - HaleyLepp + HaleyLepp OlgaZamaraeva - Emily M.Bender + Emily M.Bender 127–131 We present a web-based system that facilitates the exploration of complex morphological patterns found in morphologically very rich languages. The need for better understanding of such patterns is urgent for linguistics and important for cross-linguistically applicable natural language processing. In this paper we give an overview of the system architecture and describe a sample case study on Abui [abz], a Trans-New Guinea language spoken in Indonesia. N19-4022 @@ -6148,11 +6148,11 @@ A <fixed-case>R</fixed-case>esearch <fixed-case>P</fixed-case>latform for <fixed-case>M</fixed-case>ulti-<fixed-case>R</fixed-case>obot <fixed-case>D</fixed-case>ialogue with <fixed-case>H</fixed-case>umans MatthewMarge StephenNogar - Cory J.Hayes - Stephanie M.Lukin + Cory J.Hayes + Stephanie M.Lukin JesseBloecker EricHolder - ClareVoss + ClareVoss 132–137 This paper presents a research platform that supports spoken dialogue interaction with multiple robots. The demonstration showcases our crafted MultiBot testing scenario in which users can verbally issue search, navigate, and follow instructions to two robotic teammates: a simulated ground robot and an aerial robot. This flexible language and robotic platform takes advantage of existing tools for speech recognition and dialogue management that are compatible with new domains, and implements an inter-agent communication protocol (tactical behavior specification), where verbal instructions are encoded for tasks assigned to the appropriate robot. N19-4023 @@ -6202,7 +6202,7 @@ Deep Learning for Natural Language Inference - SamuelBowman + SamuelBowman XiaodanZhu 6–8 This tutorial discusses cutting-edge research on NLI, including recent advance on dataset development, cutting-edge deep learning models, and highlights from recent research on using NLI to understand capabilities and limits of deep learning models for language understanding and reasoning. @@ -6226,7 +6226,7 @@ Transfer Learning in Natural Language Processing SebastianRuder - Matthew E.Peters + Matthew E.Peters SwabhaSwayamdipta ThomasWolf 15–18 @@ -6240,7 +6240,7 @@ Language Learning and Processing in People and Machines AidaNematzadeh RichardFutrell - RogerLevy + RogerLevy 19–21 The goal of this tutorial is to bring the fields of computational linguistics and computational cognitive science closer: we will introduce different stages of language acquisition and their parallel problems in NLP. As an example, one of the early challenges children face is mapping the meaning of word labels (such as “cat”) to their referents (the furry animal in the living room). Word learning is similar to the word alignment problem in machine translation. We explain the current computational models of language acquisition, their limitations, and how the insights from these models can be incorporated into NLP applications. Moreover, we discuss how we can take advantage of the cognitive science of language in computational linguistics: for example, by designing cognitively-motivated evaluations task or buildings language-learning inductive biases into our models. N19-5005 diff --git a/data/xml/O00.xml b/data/xml/O00.xml index e4f7bf85d1..7a211ab892 100644 --- a/data/xml/O00.xml +++ b/data/xml/O00.xml @@ -5,7 +5,7 @@ Proceedings of Research on Computational Linguistics Conference XIII O00-1 Lee-FengChien - Kuang-HuaChen + Kuang-HuaChen The Association for Computational Linguistics and Chinese Language Processing (ACLCLP)
Taipei, Taiwan
August @@ -32,9 +32,9 @@
The Improving Techniques for Disambiguating Non-alphabet Sense Categories - Feng-LongHwang - Ming-ShingYu - Min-JerWu + Feng-LongHwang + Ming-ShingYu + Min-JerWu 67–86 O00-1003 hwang-etal-2000-improving @@ -42,7 +42,7 @@ Building A <fixed-case>C</fixed-case>hinese Text Summarizer with Phrasal Chunks and Domain Knowledge WeiquanLiu - JoeZhou + JoeZhou 87–96 O00-1004 liu-zhou-2000-building @@ -91,7 +91,7 @@ 具有累進學習能力之貝氏預測法則在汽車語音辨識之應用 (<fixed-case>B</fixed-case>ayesian Predictive Classification with Incremental Learning Capability for Car Speech Recognition) [In <fixed-case>C</fixed-case>hinese] - Jen-TzungChien + Jen-TzungChien Guo-HongLiao 179–197 chien-liao-2000-ju @@ -99,7 +99,7 @@ 結合麥克風陣列及模型調整技術之遠距離語音辨識系統 (Far-Distant Speech Recognition System Using Combined Techniques of Microphone Array and Model Adaptation)[In <fixed-case>C</fixed-case>hinese] Jain-RayLai - Jen-TzungChien + Jen-TzungChien 199–213 O00-1011 lai-chien-2000-jie @@ -128,8 +128,8 @@ 漢語動詞辭彙語意分析:表達模式與研究方法 (A Lexical-Semantic Analysis of <fixed-case>M</fixed-case>andarin <fixed-case>C</fixed-case>hinese Verbs: Representation and Methodology) [In <fixed-case>C</fixed-case>hinese] - Li-LiChang - Keh-JiannChen + Li-LiChang + Keh-JiannChen Chu-RenHuang 1–18 O00-2001 @@ -139,10 +139,10 @@ The Module-Attribute Representation of Verbal Semantics: From Semantic to Argument Structure Chu-RenHuang KathleenAhrens - Li-LiChang - Keh-JiannChen - Mei-ChunLiu - Mei-ChihTsai + Li-LiChang + Keh-JiannChen + Mei-ChunLiu + Mei-ChihTsai 19–46 O00-2002 huang-etal-2000-module @@ -151,17 +151,17 @@ What Can Near Synonyms Tell Us Lian-ChengChief Chu-RenHuang - Keh-JiannChen - Mei-ChihTsai - Li-LiChang + Keh-JiannChen + Mei-ChihTsai + Li-LiChang 47–60 O00-2003 chief-etal-2000-near Alternation Across Semantic Fields: A Study on <fixed-case>M</fixed-case>andarin Verbs of Emotion - Li-LiChang - Keh-JiannChen + Li-LiChang + Keh-JiannChen Chu-RenHuang 61–80 O00-2004 @@ -169,9 +169,9 @@ When Endpoint Meets Endpoint: A Corpus-based Lexical Semantic Study of <fixed-case>M</fixed-case>andarin Verbs of Throwing - Mei-ChunLiu + Mei-ChunLiu Chu-RenHuang - CharlesLee + CharlesLee Ching-YiLee 81–96 O00-2005 @@ -191,7 +191,7 @@ Adaptive Word Sense Disambiguation Using Lexical Knowledge in a Machine-readable Dictionary - Jen NanChen + Jen NanChen 1–42 O00-3001 chen-2000-adaptive @@ -206,15 +206,15 @@ <fixed-case>J</fixed-case>apanese-<fixed-case>C</fixed-case>hinese Cross-Language Information Retrieval: An Interlingua Approach - Md. MarufHasan - YujiMatsumoto + Md. MarufHasan + YujiMatsumoto 59–86 O00-3003 hasan-matsumoto-2000-japanese Compiling <fixed-case>T</fixed-case>aiwanese Learner Corpus of <fixed-case>E</fixed-case>nglish - Rebecca Hsue-HuehShih + Rebecca Hsue-HuehShih 87–100 O00-3004 shih-2000-compiling diff --git a/data/xml/O01.xml b/data/xml/O01.xml index c7d0a64524..5e19c0cb30 100644 --- a/data/xml/O01.xml +++ b/data/xml/O01.xml @@ -5,7 +5,7 @@ Proceedings of Research on Computational Linguistics Conference XIV O01-1 Chung-HsienWu - Jen-TzungChien + Jen-TzungChien The Association for Computational Linguistics and Chinese Language Processing (ACLCLP)
Tainan, Taiwan
August @@ -17,8 +17,8 @@ 新聞文件摘要之研究 (Text Summarization on News) [In <fixed-case>C</fixed-case>hinese] - Hsiang-PinLee - Su-JinKer + Hsiang-PinLee + Su-JinKer 23–42 O01-1001 lee-ker-2001-xin @@ -34,7 +34,7 @@ 多篇文件自動摘要系統 (Multi-Document Summarization System) [In <fixed-case>C</fixed-case>hinese] Jian-ChengShen - Jason S.Chang + Jason S.Chang 65–87 O01-1003 shen-chang-2001-duo @@ -66,9 +66,9 @@ Optimiztion of <fixed-case>HMM</fixed-case> by the Tabu Search Algorithm Xiao-danMei - Sheng-heSun + Sheng-heSun Jeng-shuangPan - Tsong-yiChen + Tsong-yiChen 147–153 O01-1007 mei-etal-2001-optimiztion @@ -84,10 +84,10 @@ 中文語料庫構建及管理系統設計 (Design of Management System for <fixed-case>C</fixed-case>hinese Corpus Construction) [In <fixed-case>C</fixed-case>hinese] - Wei-YunMa + Wei-YunMa Yu-MingHsieh Chang-HuaYang - Keh-JiannChen + Keh-JiannChen 175–191 O01-1009 ma-etal-2001-zhong @@ -95,9 +95,9 @@ Design, Compilation and Processing of <fixed-case>CUC</fixed-case>all: A Set of <fixed-case>C</fixed-case>antonese Spoken Language Corpora Collected Over Telephone Networks W.K.Lo - P.C.Ching + P.C.Ching TanLee - HelenMeng + HelenMeng 193–212 O01-1010 lo-etal-2001-design @@ -112,17 +112,17 @@ 中文動詞自動分類研究 (Automatic Classification of <fixed-case>C</fixed-case>hinese Unknown Verbs) [In <fixed-case>C</fixed-case>hinese] - Hui-hsinTseng + Hui-hsinTseng Chao-LinLiu - Zhao MingGao - Keh-JiannChen + Zhao MingGao + Keh-JiannChen 253–272 O01-1012 tseng-etal-2001-zhong 統計式片語翻譯模型(A Statistical Model of Terminology Translation) [In <fixed-case>C</fixed-case>hinese] - Jason S.Chang + Jason S.Chang Ta-weiYu 273–297 O01-1013 @@ -130,7 +130,7 @@ Metaphor, Inference, and Conceptualisation : On the Development of <fixed-case>V</fixed-case>-diao Construction in <fixed-case>M</fixed-case>andarin - Wei-lunLu + Wei-lunLu 299–316 O01-1014 lu-2001-metaphor @@ -159,7 +159,7 @@ Improving Translation Selection with a New Translation Model Trained by Independent Monolingual Corpora MingZhou YuanDing - ChangningHuang + ChangningHuang 1–26 O01-2001 zhou-etal-2001-improving @@ -167,7 +167,7 @@ The Use of Clustering Techniques for Language Modeling <fixed-case>V</fixed-case> Application to <fixed-case>A</fixed-case>sian Language JianfengGao - Joshua T.Goodman + Joshua T.Goodman JiangboMiao 27–60 O01-2002 @@ -183,11 +183,11 @@ Automatic Translation Template Acquisition Based on Bilingual Structure Alignment - YajuanLu + YajuanLu MingZhou ShengLi - ChangningHuang - TiejunZhao + ChangningHuang + TiejunZhao 83–108 O01-2004 lu-etal-2001-automatic @@ -216,7 +216,7 @@ Metaphorical Transfer and Pragmatic Strengthening: On the Development of <fixed-case>V</fixed-case>-diao in <fixed-case>M</fixed-case>andarin - Louis Wei-lunLu + Louis Wei-lunLu 1–10 O01-3001 lu-2001-metaphorical @@ -240,9 +240,9 @@ 統計式片語翻譯模型 (Statistical Translation Model for Phrases) [In <fixed-case>C</fixed-case>hinese] - Jason S.Chang + Jason S.Chang DavidYu - Chun-JunLee + Chun-JunLee 43–64 O01-3004 chang-etal-2001-tong diff --git a/data/xml/O02.xml b/data/xml/O02.xml index 29d5200a54..f0000f8c71 100644 --- a/data/xml/O02.xml +++ b/data/xml/O02.xml @@ -13,17 +13,17 @@ 以構詞律與相似法為本的中文動詞自動分類研究 (A Hybrid Approach for Automatic Classification of <fixed-case>C</fixed-case>hinese Unknown Verbs) [In <fixed-case>C</fixed-case>hinese] - Hui-HsinTseng + Hui-HsinTseng Chao-LinLiu - Zhao-MingGao - Keh-JiannChen + Zhao-MingGao + Keh-JiannChen 1–28 tseng-etal-2002-yi Word Sense Disambiguation and Sense-Based <fixed-case>NV</fixed-case> Event Frame Identifier Jia-LinTsai - Wen-LianHsu + Wen-LianHsu Jeng-WoeiSu 29–46 O02-1002 @@ -32,7 +32,7 @@ 一種基於知網的語義排歧模型研究 (A Study of Semantic Disambiguation Based on <fixed-case>H</fixed-case>ow<fixed-case>N</fixed-case>et) [In <fixed-case>C</fixed-case>hinese] XiaofengYang - TangqiuLi + TangqiuLi 47–78 O02-1003 yang-li-2002-yi @@ -41,7 +41,7 @@ 基於文本概念和k<fixed-case>NN</fixed-case>的跨語種文本過濾 (Cross-Language Text Filtering Based on Text Concepts and k<fixed-case>NN</fixed-case>) [In <fixed-case>C</fixed-case>hinese WeifengSu ShaoziLi - TanqiuLi + TanqiuLi WenjianYou 79–90 O02-1004 @@ -70,7 +70,7 @@ A Study on Word Similarity using Context Vector Models - Keh-JiannChen + Keh-JiannChen Jia-MingYou 37–58 O02-2002 diff --git a/data/xml/O03.xml b/data/xml/O03.xml index 8a8747acd6..4bc299a35c 100644 --- a/data/xml/O03.xml +++ b/data/xml/O03.xml @@ -4,7 +4,7 @@ Proceedings of Research on Computational Linguistics Conference XV O03-1 - Jason J.Chang + Jason J.Chang Hsien-ChinLiou The Association for Computational Linguistics and Chinese Language Processing (ACLCLP)
Hsinchu, Taiwan
@@ -51,14 +51,14 @@
從語域及借詞觀點探討台語文寫作風格 (Discussion on <fixed-case>T</fixed-case>aiwanese Writing Style from The Viewpoint of Register and Loanword) [In <fixed-case>C</fixed-case>hinese] - Un-GianIunn + Un-GianIunn 73–86 O03-1005 iunn-2003-cong <fixed-case>ECONOMY</fixed-case> <fixed-case>IS</fixed-case> <fixed-case>A</fixed-case> <fixed-case>PERSON</fixed-case>: A <fixed-case>C</fixed-case>hinese-<fixed-case>E</fixed-case>nglish Corpora and Ontological-based Comparison Using the <fixed-case>C</fixed-case>onceptual <fixed-case>M</fixed-case>apping <fixed-case>M</fixed-case>odel - Siaw-FongChung + Siaw-FongChung KathleenAhrens Chu-RenHuang 87–110 @@ -75,7 +75,7 @@ 文件自我擴展於自動分類之應用 (Application of Document Self-Expansion to Text Categorization) [In <fixed-case>C</fixed-case>hinese] - Yuen-HsienTseng + Yuen-HsienTseng Da-WeiJuang 129–141 O03-1008 @@ -85,7 +85,7 @@ Auto-Discovery of <fixed-case>NVEF</fixed-case> Word-Pairs in <fixed-case>C</fixed-case>hinese Jia-LinTsai GladysHsieh - Wen-LianHsu + Wen-LianHsu 143–160 O03-1009 tsai-etal-2003-auto @@ -93,7 +93,7 @@ Reliable and Cost-Effective <fixed-case>P</fixed-case>o<fixed-case>S</fixed-case>-Tagging Yu-FangTsai - Keh-JiannChen + Keh-JiannChen 161–173 O03-1010 tsai-chen-2003-reliable @@ -102,16 +102,16 @@ <fixed-case>C</fixed-case>hinese Word Auto-Confirmation Agent Jia-LinTsai Cheng-LungSung - Wen-LianHsu + Wen-LianHsu 175–191 O03-1011 tsai-etal-2003-chinese <fixed-case>M</fixed-case>encius: A <fixed-case>C</fixed-case>hinese Named Entity Recognizer Using Hybrid Model - Tzong-HanTsai + Tzong-HanTsai Shih-HungWu - Wen-LianHsu + Wen-LianHsu 193–209 O03-1012 tsai-etal-2003-mencius @@ -171,14 +171,14 @@ Extracting Verb-Noun Collocations from Text - Jia YanJian + Jia YanJian 295–302 O03-2003 jian-2003-extracting Bilingual Sentence Alignment Based on Punctuation Marks - Kevin C.Yeh + Kevin C.Yeh 303–312 O03-2004 yeh-2003-bilingual @@ -207,9 +207,9 @@ Interleaving Text and Punctuations for Bilingual Sub-sentential Alignment - Wen-ChiHsie - KevinYeh - Jason S.Chang + Wen-ChiHsie + KevinYeh + Jason S.Chang Thomas C.Chuang 327–333 O03-3002 @@ -218,16 +218,16 @@ Restoration of Case Information in All-Cap <fixed-case>E</fixed-case>nglish Broadcast Transcription Yu-TingLiang - Jian-ChenWu + Jian-ChenWu 335–337 O03-3003 liang-wu-2003-restoration Using Punctuations and Lengths for Bilingual Sub-sentential Alignment - Wen-ChiHsien - KevinYeh - Jason S.Chang + Wen-ChiHsien + KevinYeh + Jason S.Chang Thomas C.Chuang 339–345 O03-3004 @@ -235,16 +235,16 @@ <fixed-case>T</fixed-case>otal<fixed-case>R</fixed-case>ecall: A Bilingual Concordance in National Digital Learning Project - <fixed-case>CANDLE</fixed-case> - Jian-ChengWu + Jian-ChengWu Wen-ChiShei - Jason S.Chang + Jason S.Chang 347–353 O03-3005 wu-etal-2003-totalrecall Unsupervised Word Segmentation Without Dictionary - Jason S.Chang + Jason S.Chang TracyLin 355–359 O03-3006 @@ -252,9 +252,9 @@ 盲胞有聲書語音查詢系統 (A Speech-enabled Talking Book Retrieval System for the Blind) [In <fixed-case>C</fixed-case>hinese] - Cheng-YuanLin + Cheng-YuanLin Ming-FengHsieh - Jyh-ShingJang + Jyh-ShingJang 361–367 O03-3007 lin-etal-2003-mang @@ -263,7 +263,7 @@ 線上新聞語音檢索系統 (Online New Retrieval Based on Speech Input) [In <fixed-case>C</fixed-case>hinese] Jiang-ChunChen Jui-LinLo - Jyh-ShingJang + Jyh-ShingJang 369–376 O03-3008 chen-etal-2003-xian @@ -303,7 +303,7 @@ Extension of <fixed-case>Z</fixed-case>ipf’s Law to Word and Character N-grams for <fixed-case>E</fixed-case>nglish and <fixed-case>C</fixed-case>hinese - Le QuanHa + Le QuanHa E. I.Sicilia-Garcia JiMing F. J.Smith @@ -337,7 +337,7 @@ Hua-PingZhang QunLiu Hong-KuiYu - Xue-QiCheng + Xue-QiCheng ShuoBai 29–60 O03-5002 @@ -345,7 +345,7 @@ Building A <fixed-case>C</fixed-case>hinese <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et Via Class-Based Translation Model - Jason S.Chang + Jason S.Chang TracyLin Geeng-NengYou Thomas C.Chuang @@ -364,7 +364,7 @@ 從詞網出發的中文複合名詞的語意表達 (Learning the Semantic Meaning of a <fixed-case>C</fixed-case>hinese Compound from the <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et) [In <fixed-case>C</fixed-case>hinese] - Sue-JinKer + Sue-JinKer 93-108 O03-5005 ker-2003-cong diff --git a/data/xml/O04.xml b/data/xml/O04.xml index 3ee5485ad2..f177e4a86a 100644 --- a/data/xml/O04.xml +++ b/data/xml/O04.xml @@ -5,7 +5,7 @@ Proceedings of the 16th Conference on Computational Linguistics and Speech Processing O04-1 Lee-FengChien - Hsin-MinWang + Hsin-MinWang The Association for Computational Linguistics and Chinese Language Processing (ACLCLP)
Taipei, Taiwan
September @@ -28,7 +28,7 @@ 聚集事後機率線性迴歸調適演算法應用於語音辨識 (Aggregate a Posteriori Linear Regression for Speech Recognition) [In <fixed-case>C</fixed-case>hinese] Chih-HsienHuang Yii-KaiWang - Jen-TzungChien + Jen-TzungChien 11–20 O04-1002 huang-etal-2004-ju @@ -45,7 +45,7 @@ A Noise Estimator with Rapid Adaptation in Variable-Level Noisy Environments Bing-FeiWu - Kun-ChingWang + Kun-ChingWang Lung-YiKuo 33–38 O04-1004 @@ -86,7 +86,7 @@ Applying Meaningful Word-Pair Identifier to the <fixed-case>C</fixed-case>hinese Syllable-to-Word Conversion Problem Jia-LinTsai Tien-JienChiang - Wen-LianHsu + Wen-LianHsu 79–88 O04-1009 tsai-etal-2004-applying @@ -111,7 +111,7 @@ 利用自然語言處理技術自動產生英文克漏詞試題之研究 (A Study on Natural Language Processing Aided Grneration of Multiple-Choice Cloze Items) [In <fixed-case>C</fixed-case>hinese] Chun-HungWang Chao-LinLiu - Zhao MingGao + Zhao MingGao 111–120 O04-1012 wang-etal-2004-li-yong @@ -127,9 +127,9 @@ 現代漢語複合動詞之詞首詞尾研究 (Compositional Semantics of <fixed-case>M</fixed-case>andarin Affix Verbs) [In <fixed-case>C</fixed-case>hinese] - Chih-mingChiu + Chih-mingChiu Ji-ChinLo - Keh-JiannChen + Keh-JiannChen 131–139 O04-1014 chiu-etal-2004-xian @@ -138,7 +138,7 @@ 語法規律的抽取及普遍化與精確化的研究 (Grammar Extraction, Generalization and Specialization) [In <fixed-case>C</fixed-case>hinese] Yu-MingHsieh Duen-ChiYang - Keh-JiannChen + Keh-JiannChen 141–150 O04-1015 hsieh-etal-2004-yu @@ -161,14 +161,14 @@ Functional Distinction between Zai and Zhengzai in <fixed-case>M</fixed-case>andarin: Evidence from Collocations Tsi-chunLin - Mei-chunLiu + Mei-chunLiu 169–175 O04-1018 lin-liu-2004-functional 中文手機新聞簡訊自動摘要 (Automated Summarization for <fixed-case>C</fixed-case>hinese News Brief Service in Cellular Phones) [In <fixed-case>C</fixed-case>hinese] - Yuen-HsienTseng + Yuen-HsienTseng 177–189 O04-1019 tseng-2004-zhong @@ -176,9 +176,9 @@ Using the Web as Corpus for Un-supervised Learning in Question Answering Yi-ChiaWang - Jian-ChengWu + Jian-ChengWu TyneLiang - Jason S.Chang + Jason S.Chang 191–198 O04-1020 wang-etal-2004-using @@ -194,7 +194,7 @@ 具相關資訊回饋能力之貝氏混合式機率檢索模型 (Using Relevance Feedback in <fixed-case>B</fixed-case>ayesian Probabilistic Mixture Retrieval Model) [In <fixed-case>C</fixed-case>hinese] - Jen-TzungChien + Jen-TzungChien Duen-ChiYang 209–218 O04-1022 @@ -212,13 +212,13 @@ 華台雙語發音變異性之語音辨識研究及<fixed-case>PDA</fixed-case>之應用 (The study of pronunciation variations in <fixed-case>M</fixed-case>andarin and <fixed-case>T</fixed-case>aiwanese and its application in <fixed-case>PDA</fixed-case>) [In <fixed-case>C</fixed-case>hinese] - Dau-chengLyu + Dau-chengLyu Hong-WenHsien Yung-XianLee Zhong-IngLiou - Chun-NanHsu + Chun-NanHsu Yung-JienChiang - Ren-yuanLyu + Ren-yuanLyu 229–238 O04-1024 lyu-etal-2004-hua @@ -227,8 +227,8 @@ 以語音辨識與評分輔助口說英文學習 (Spoken <fixed-case>E</fixed-case>nglish Learning Based on Speech Recognition and Assessment) [In <fixed-case>C</fixed-case>hinese] Jiang-ChunChen Jui-LinLo - Jyh-ShingJang - Chun-JenLee + Jyh-ShingJang + Chun-JenLee 239–248 O04-1025 chen-etal-2004-yi @@ -243,9 +243,9 @@ Collocational Translation Memory Extraction Based on Statistical and Linguistic Information - Jia-YanJian + Jia-YanJian Yu-ChiaChang - Jason S.Chang + Jason S.Chang 257–264 O04-1027 jian-etal-2004-collocational @@ -262,7 +262,7 @@ 結合統計與語言訊息的混合式中英雙語句對應演算法 (Combining Linguistic and Statistical Information in <fixed-case>C</fixed-case>hinese-<fixed-case>E</fixed-case>nglish Bilingual Sentence Alignment) [In <fixed-case>C</fixed-case>hinese] Yu-ChunLin - Zhao MingGao + Zhao MingGao 273–283 O04-1029 lin-gao-2004-jie @@ -286,10 +286,10 @@ The Construction of a <fixed-case>C</fixed-case>hinese Named Entity Tagged Corpus: <fixed-case>CNEC</fixed-case>1.0 Cheng-WeiShih - Tzong-HanTsai + Tzong-HanTsai Shih-HungWu Chiu-ChenHsieh - Wen-LianHsu + Wen-LianHsu 305–313 O04-1032 shih-etal-2004-construction @@ -305,7 +305,7 @@ 仿趙氏音高尺度之基週軌跡正規化方法及其應用 (A Pitch-Contour Normalization Method Following Zhao’s Pitch Scale and Its Application) [In <fixed-case>C</fixed-case>hinese] - Hung-yanGu + Hung-yanGu Hsiao-FenChang Chun HsinWu 325–334 @@ -314,10 +314,10 @@ 基於反轉檔查找與最佳片段選取演算法的中文語音合成系統 (A <fixed-case>M</fixed-case>andarin Text-to-speech System based on Inverted File Indexing and Unit Selection) [In <fixed-case>C</fixed-case>hinese] - Cheng YuanLin + Cheng YuanLin Ming-FengHsieh Kuan-TingChen - Jyh-ShingJang + Jyh-ShingJang 335–344 O04-1035 lin-etal-2004-ji @@ -325,7 +325,7 @@ Improved Prosody Module in a Text-to-Speech System Wen-WeiLiao - Jia-LinShen + Jia-LinShen 345–354 O04-1036 liao-shen-2004-improved @@ -365,7 +365,7 @@ Bilingual Collocation Extraction Based on Syntactic and Statistical Analyses Chien-ChengWu - Jason S.Chang + Jason S.Chang 1–20 O04-2001 wu-chang-2004-bilingual @@ -382,18 +382,18 @@ Auto-Generation of <fixed-case>NVEF</fixed-case> Knowledge in <fixed-case>C</fixed-case>hinese Jia-LinTsai GladysHsieh - Wen-LianHsu + Wen-LianHsu 41–64 O04-2003 tsai-etal-2004-auto <fixed-case>M</fixed-case>encius: A <fixed-case>C</fixed-case>hinese Named Entity Recognizer Using the Maximum Entropy-based Hybrid Model - Tzong-HanTsai + Tzong-HanTsai Shih-HungWu Cheng-WeiLee Cheng-WeiShih - Wen-LianHsu + Wen-LianHsu 65–82 O04-2004 tsai-etal-2004-mencius @@ -401,7 +401,7 @@ Reliable and Cost-Effective Pos-Tagging Yu-FangTsai - Keh-JiannChen + Keh-JiannChen 83–96 O04-2005 tsai-chen-2004-reliable @@ -416,7 +416,7 @@ The Properties and Further Applications of <fixed-case>C</fixed-case>hinese Frequent Strings Yih-JengLin - Ming-ShingYu + Ming-ShingYu 113–128 O04-2007 lin-yu-2004-properties @@ -435,9 +435,9 @@ Toward Constructing A Multilingual Speech Corpus for <fixed-case>T</fixed-case>aiwanese (<fixed-case>M</fixed-case>in-nan), <fixed-case>H</fixed-case>akka, and <fixed-case>M</fixed-case>andarin - Ren-YuanLyu + Ren-YuanLyu Min-SiongLiang - Yuang-ChinChiang + Yuang-ChinChiang 1–12 O04-3001 lyu-etal-2004-toward @@ -454,7 +454,7 @@ Latent Semantic Language Modeling and Smoothing - Jen-TzungChien + Jen-TzungChien Meng-SungWu Hua-JuiPeng 29–44 diff --git a/data/xml/O05.xml b/data/xml/O05.xml index d235665e3d..7d65516f1e 100644 --- a/data/xml/O05.xml +++ b/data/xml/O05.xml @@ -5,7 +5,7 @@ Proceedings of the 17th Conference on Computational Linguistics and Speech Processing O05-1 Chung-HsienWu - Jen-TzungChien + Jen-TzungChien Wen-HsiangLu The Association for Computational Linguistics and Chinese Language Processing (ACLCLP)
Tainan, Taiwan
@@ -39,7 +39,7 @@ Yuan-FuLiao Zhi-XianZhuang Zi-HeChen - Yau-TarngJuang + Yau-TarngJuang 37–53 O05-1003 liao-etal-2005-jie @@ -89,8 +89,8 @@
日本學生學習華語的聲調偏誤分析:以二字調為例 (Tonal errors of <fixed-case>J</fixed-case>apanese students learning <fixed-case>C</fixed-case>hinese: A study of disyllabic words) [In <fixed-case>C</fixed-case>hinese] - Ke-JiaZhang - Li-MeiChen + Ke-JiaZhang + Li-MeiChen 125–139 O05-1009 zhang-chen-2005-ri @@ -100,7 +100,7 @@ Chia-YinChen Ming HsienKo Tzu-WeiWu - Jason S.Chang + Jason S.Chang 155–163 O05-1010 chen-etal-2005-fast @@ -116,7 +116,7 @@ 使用韻律階層及大量詞彙的中文文轉音系統 (A <fixed-case>M</fixed-case>andarin Text-to-Speech System Using Prosodic Hierarchy and a Large Number of Words) [In <fixed-case>C</fixed-case>hinese] - Ming-ShingYu + Ming-ShingYu Tang-YuZhang Tsan-HuangShiu Yu-HerTsai @@ -136,7 +136,7 @@ 閩南語語句基週軌跡產生: 兩種模型之混合與比較 (<fixed-case>M</fixed-case>in-<fixed-case>N</fixed-case>an Sentence Pitch-contour Generation: Mixing and Comparison of Two Kinds of Models) [In <fixed-case>C</fixed-case>hinese] - Hung-YanGu + Hung-YanGu WeiHuang 213–225 O05-1014 @@ -147,16 +147,16 @@ BinYan Zhe-MingLu Jeng-ShyangPan - Sheng-HeSun + Sheng-HeSun 239–246 O05-1015 yan-etal-2005-statistical A Probe into Ambiguities of Determinative-Measure Compounds - Shih-MinLi + Shih-MinLi Su-ChuLin - Keh-JiannChen + Keh-JiannChen 247–255 O05-1016 li-etal-2005-probe @@ -166,8 +166,8 @@ Shih-HungWu Cheng-WeiShih Chia-WeiWu - Tzong-HanTsai - Wen-LianHsu + Tzong-HanTsai + Wen-LianHsu 257–271 O05-1017 wu-etal-2005-applying @@ -182,10 +182,10 @@ 台語變調系統實作研究 (A Study on Implementation of <fixed-case>T</fixed-case>aiwanese Tone Sandhi System) [In <fixed-case>C</fixed-case>hinese] - Ún-giân + Ún-giân Sheng-anLi - Kiãt-gãkLâu - Cheng-yanKao + Kiãt-gãkLâu + Cheng-yanKao 293–304 O05-1019 iu-etal-2005-tai @@ -193,8 +193,8 @@ 利用雙語學術名詞庫抽取中英字詞互譯及詞義解歧 (Sense Extraction and Disambiguation for <fixed-case>C</fixed-case>hinese Words from Bilingual Terminology Bank) [In <fixed-case>C</fixed-case>hinese] Ming-HongBai - Keh-JiannChen - Jason S.Chang + Keh-JiannChen + Jason S.Chang 305–316 O05-1020 bai-etal-2005-li @@ -202,7 +202,7 @@ 利用向量支撐機辨識中文基底名詞組的初步研究 (A Preliminary Study on <fixed-case>C</fixed-case>hinese Base <fixed-case>NP</fixed-case> Detection using <fixed-case>SVM</fixed-case>) [In <fixed-case>C</fixed-case>hinese] Hsi-WeiChang - Zhao MingGao + Zhao MingGao Chao-LinLiu 317–331 O05-1021 @@ -219,7 +219,7 @@ 國語廣播新聞語料轉述系統之效能評估 (Evaluation of <fixed-case>M</fixed-case>andarin Broadcast News Transcription System) [In <fixed-case>C</fixed-case>hinese] Lung-HsunChang - Yih-ruWang + Yih-ruWang Sin-HorngChen 347–360 O05-1023 @@ -246,7 +246,7 @@ 基於統計與佚代的中英雙語詞及小句對應演算法 (An Iterative Algorithm for Bilingual <fixed-case>C</fixed-case>hinese-<fixed-case>E</fixed-case>nglish Word and Clause Alignment based on Statistics) [In <fixed-case>C</fixed-case>hinese] Tzu-HuanHuang - Zhao MingGao + Zhao MingGao 385–395 O05-1026 huang-gao-2005-ji @@ -254,7 +254,7 @@ 電視新聞語料場景的自動切割與分類 (Automatic Scene Segmentation and Classification in Television News Database) [In <fixed-case>C</fixed-case>hinese] Bo-SyuanZiang - Ren-YuanLyu + Ren-YuanLyu Bor-HoYaung Hong-WenHsien 397–409 @@ -303,7 +303,7 @@ Reduced N-Grams for <fixed-case>C</fixed-case>hinese Evaluation - Le QuanHa + Le QuanHa R.Seymour P.Hanna F. J.Smith @@ -316,7 +316,7 @@ Jui-FengYeh Chung-HsienWu Ming-JunChen - Liang-ChihYu + Liang-ChihYu 35–52 O05-2003 yeh-etal-2005-automated @@ -324,8 +324,8 @@ <fixed-case>C</fixed-case>hinese Main Verb Identification: From Specification to Realization Bing-GongDing - Chang-NingHuang - De-GenHuang + Chang-NingHuang + De-GenHuang 53–94 O05-2004 ding-etal-2005-chinese @@ -333,7 +333,7 @@ Aligning Parallel Bilingual Corpora Statistically with Punctuation Criteria Thomas C.Chuang - Kevin C.Yeh + Kevin C.Yeh 95–122 O05-2005 chuang-yeh-2005-aligning @@ -341,7 +341,7 @@ Similarity Based <fixed-case>C</fixed-case>hinese Synonym Collocation Extraction WanyinLi - QinLu + QinLu RuifengXu 123–144 O05-2006 @@ -361,8 +361,8 @@ Automatic Segmentation and Labeling for <fixed-case>M</fixed-case>andarin <fixed-case>C</fixed-case>hinese Speech Corpora for Concatenation-based <fixed-case>TTS</fixed-case> - Cheng-YuanLin - Jyh-Shing RogerJang + Cheng-YuanLin + Jyh-Shing RogerJang Kuan-TingChen 145–166 O05-3001 @@ -385,7 +385,7 @@ <fixed-case>MATBN</fixed-case>: A <fixed-case>M</fixed-case>andarin <fixed-case>C</fixed-case>hinese Broadcast News Corpus - Hsin-MinWang + Hsin-MinWang BerlinChen Jen-WeiKuo Shih-SianCheng @@ -399,7 +399,7 @@ Chung-HsienYang Jhing-FaWang Chung-HsienWu - Jen-TzungChien + Jen-TzungChien 237–250 O05-3005 wang-etal-2005-taicar @@ -407,7 +407,7 @@ Design and Development of a Bilingual Reading Comprehension Corpus KuiXu - HelenMeng + HelenMeng 251–276 O05-3006 xu-meng-2005-design @@ -438,7 +438,7 @@ Using Lexical Constraints to Enhance the Quality of Computer-Generated Multiple-Choice Cloze Items Chao-LinLiu Chun-HungWang - Zhao-MingGao + Zhao-MingGao 303-328 O05-4001 liu-etal-2005-using @@ -446,9 +446,9 @@ Collocational Translation Memory Extraction Based on Statistical and Linguistic Information Thomas C.Chuang - Jia-YanJian + Jia-YanJian Yu-ChiaChang - Jason S.Chang + Jason S.Chang 329–346 O05-4002 chuang-etal-2005-collocational @@ -465,19 +465,19 @@ Modeling Pronunciation Variation for Bi-Lingual <fixed-case>M</fixed-case>andarin/<fixed-case>T</fixed-case>aiwanese Speech Recognition - Dau-ChengLyu - Ren-YuanLyu - Yuang-ChinChiang - Chun-NanHsu + Dau-ChengLyu + Ren-YuanLyu + Yuang-ChinChiang + Chun-NanHsu 363–380 O05-4004 lyu-etal-2005-modeling <fixed-case>C</fixed-case>hinese Word Segmentation by Classification of Characters - Chooi-LingGoh + Chooi-LingGoh MasayukiAsahara - YujiMatsumoto + YujiMatsumoto 381–396 O05-4005 goh-etal-2005-chinese @@ -485,7 +485,7 @@ The Design and Construction of the <fixed-case>P</fixed-case>oly<fixed-case>U</fixed-case> Shallow Treebank RuifengXu - QinLu + QinLu YinLi WanyinLi 397–416 @@ -509,16 +509,16 @@ Chu-RenHuang Chun-LingChen Cui-XiaWeng - Hsiang-PingLee + Hsiang-PingLee Yong-XiangChen - Keh-JiannChen + Keh-JiannChen 417–430 O05-5001 huang-etal-2005-sinica From Frame to Subframe: Collocational Asymmetry in <fixed-case>M</fixed-case>andarin Verbs of Conversation - Mei-ChunLiu + Mei-ChunLiu Chun EdisonChang 431–444 O05-5002 @@ -526,9 +526,9 @@ Feature Representations and Logical Compatibility between Temporal Adverbs and Aspects - Shih-MinLi + Shih-MinLi Su-ChuLin - Keh-JiannChen + Keh-JiannChen 445–458 O05-5003 li-etal-2005-feature @@ -544,7 +544,7 @@ An Unsupervised Approach to <fixed-case>C</fixed-case>hinese Word Sense Disambiguation Based on Hownet HaoChen TingtingHe - DonghongJi + DonghongJi ChangqinQuan 473–482 O05-5005 @@ -552,7 +552,7 @@ 以句式為本的多義詞詞義辨識 (Word Sense Disambiguation Based on Syntactic Construction) [In <fixed-case>C</fixed-case>hinese] - Mei-ChihTsai + Mei-ChihTsai 483–494 O05-5006 tsai-2005-yi @@ -575,15 +575,15 @@ A Synchronous Corpus-Based Study on the Usage and Perception of Judgement Terms in the Pan-<fixed-case>C</fixed-case>hinese Context - Oi YeeKwong - Benjamin K.Tsou + Oi YeeKwong + Benjamin K.Tsou 519–532 O05-5009 kwong-tsou-2005-synchronous 《人民日報》語料庫命名實体分類的研究 (The <fixed-case>C</fixed-case>hinese Named Entity Categorization Based on the People’s Daily Corpus) [In <fixed-case>C</fixed-case>hinese] - YingJuXia + YingJuXia HaoYu FumihitoNishino 533–542 @@ -602,7 +602,7 @@ Source Domains as Concept Domains in Metaphorical Expressions - Siaw-FongChung + Siaw-FongChung KathleenAhrens Chu-RenHuang 553–570 diff --git a/data/xml/O06.xml b/data/xml/O06.xml index 8903799725..15d7690e10 100644 --- a/data/xml/O06.xml +++ b/data/xml/O06.xml @@ -4,7 +4,7 @@ Proceedings of the 18th Conference on Computational Linguistics and Speech Processing O06-1 - Yih-RuWang + Yih-RuWang Zaho-MingGao The Association for Computational Linguistics and Chinese Language Processing (ACLCLP)
Hsinchu, Taiwan
@@ -26,7 +26,7 @@
中文動詞名物化判斷的統計式模型設計 (A Stochastic Model for Prediction of Deverbal Nouns in <fixed-case>M</fixed-case>andarin <fixed-case>C</fixed-case>hinese) [In <fixed-case>C</fixed-case>hinese] - Wei-YunMa + Wei-YunMa Chu-RenHuang 29–40 O06-1002 @@ -34,7 +34,7 @@ 大規模詞彙語意關係自動標示之初步研究: 以中文詞網(<fixed-case>C</fixed-case>hinese <fixed-case>W</fixed-case>ordnet)為例 (A Preliminary Study on Large-scale Automatic Labeling of Lexical Semantic Relations: A Case study of <fixed-case>C</fixed-case>hinese <fixed-case>W</fixed-case>ordnet) [In <fixed-case>C</fixed-case>hinese] - Shu-KaiHsieh + Shu-KaiHsieh PetrŠimon Chu-RenHuang 41–51 @@ -54,14 +54,14 @@ Improve Parsing Performance by Self-Learning Yu-MingHsieh Duen-ChiYang - Keh-JiannChen + Keh-JiannChen 63–76 O06-1005 hsieh-etal-2006-improve 國語雙字語詞聲調評分系統 (A Scoring System for <fixed-case>M</fixed-case>andarin Tones Uttered in Disyllabic Words) [In <fixed-case>C</fixed-case>hinese] - Hung-YanGu + Hung-YanGu Shih-YanSun Hsiao-FenChang 77–89 @@ -70,7 +70,7 @@ 一種用於網路電話之遺失封包補償方法 (A Packet Loss Concealment Method for Voice over <fixed-case>IP</fixed-case> ) [In <fixed-case>C</fixed-case>hinese] - Hung-YanGu + Hung-YanGu Zia-SinChen 91–110 O06-1007 @@ -123,7 +123,7 @@ 鑑別性事前資訊應用於強健性語音辨識 (Robust Speech Recognition Using Discriminative Prior Statistics) [In <fixed-case>C</fixed-case>hinese] Chuan-WeiTing Bo-ShuWu - Jen-TzungChien + Jen-TzungChien 189–204 O06-1013 ting-etal-2006-jian @@ -135,7 +135,7 @@ Zi-HeChen Zhi-RenZeng Yuan-FuLiao - Yau-TangJuang + Yau-TangJuang 205–219 O06-1014 chang-etal-2006-jie @@ -189,8 +189,8 @@ Learning to Parse Bilingual Sentences Using Bilingual Corpus and Monolingual <fixed-case>CFG</fixed-case> - Chung-ChiHuang - Jason S.Chang + Chung-ChiHuang + Jason S.Chang 329–351 O06-1021 huang-chang-2006-learning @@ -236,7 +236,7 @@ Modeling <fixed-case>C</fixed-case>antonese Pronunciation Variations for Large-Vocabulary Continuous Speech Recognition TanLee PatgiKam - Frank K.Soong + Frank K.Soong 17–36 O06-2002 lee-etal-2006-modeling @@ -244,8 +244,8 @@ A Maximum Entropy Approach for Semantic Language Modeling Chuang-HuaChueh - Hsin-MinWang - Jen-TzungChien + Hsin-MinWang + Jen-TzungChien 37–56 O06-2003 chueh-etal-2006-maximum @@ -274,7 +274,7 @@ Voice Activity Detection Based on Auto-Correlation Function Using Wavelet Transform and Teager Energy Operator Bing-FeiWu - Kun-ChingWang + Kun-ChingWang 87–100 O06-2006 wu-wang-2006-voice @@ -294,7 +294,7 @@ Two-Fold Filtering for <fixed-case>C</fixed-case>hinese Subcategorization Acquisition with Diathesis Alternations Used as Heuristic Information XiwuHan - TiejunZhao + TiejunZhao 101–114 O06-3001 han-zhao-2006-two @@ -302,9 +302,9 @@ <fixed-case>C</fixed-case>hinese Chunking Based on Maximum Entropy <fixed-case>M</fixed-case>arkov Models Guang-LuSun - Chang-NingHuang - Xiao-LongWang - Zhi-MingXu + Chang-NingHuang + Xiao-LongWang + Zhi-MingXu 115–136 O06-3002 sun-etal-2006-chinese @@ -313,7 +313,7 @@ A Structural-Based Approach to <fixed-case>C</fixed-case>antonese-<fixed-case>E</fixed-case>nglish Machine Translation YanWu XiukunLi - CaesarLun + CaesarLun 137–158 O06-3003 wu-etal-2006-structural @@ -351,7 +351,7 @@ An Empirical Study of Word Error Minimization Approaches for <fixed-case>M</fixed-case>andarin Large Vocabulary Continuous Speech Recognition Jen-WeiKuo Shih-HungLiu - Hsin-MinWang + Hsin-MinWang BerlinChen 201–222 O06-4002 @@ -360,26 +360,26 @@ Sense Extraction and Disambiguation for <fixed-case>C</fixed-case>hinese Words from Bilingual Terminology Bank Ming-HongBai - Keh-JiannChen - Jason S.Chang + Keh-JiannChen + Jason S.Chang 223–244 O06-4003 bai-etal-2006-sense A Probe into Ambiguities of Determinative-Measure Compounds - Shih-MinLi + Shih-MinLi Su-ChuLin - Chia-HungTai - Keh-JiannChen + Chia-HungTai + Keh-JiannChen 245–280 O06-4004 li-etal-2006-probe Tonal Errors of <fixed-case>J</fixed-case>apanese Students Learning <fixed-case>C</fixed-case>hinese: A Study of Disyllabic Words - Ke-JiaChang - Li-MeiChen + Ke-JiaChang + Li-MeiChen Nien-ChenLee 281–296 O06-4005 @@ -387,10 +387,10 @@ Performance Analysis and Visualization of Machine Translation Evaluation - JianminYao - YunqianQu + JianminYao + YunqianQu QiangLv - QiaomingZhu + QiaomingZhu JingZhang 297–314 O06-4006 @@ -420,7 +420,7 @@ Multiply Quantified Internally Headed Relative Clause in <fixed-case>J</fixed-case>apanese: A Skolem Term Based Approach RuiOtake - KeiYoshimoto + KeiYoshimoto 333-348 O06-5002 otake-yoshimoto-2006-multiply @@ -429,7 +429,7 @@ Data Management in <fixed-case>QRL</fixed-case>ex, an Online Aid System for Volunteer Translators’ YoucefBey KyoKageura - ChristianBoitet + ChristianBoitet 349–376 O06-5003 bey-etal-2006-data @@ -445,7 +445,7 @@ A Pragmatic <fixed-case>C</fixed-case>hinese Word Segmentation Approach Based on Mixing Models WeiJiang YiGuan - Xiao-LongWang + Xiao-LongWang 393–416 O06-5005 jiang-etal-2006-pragmatic diff --git a/data/xml/O07.xml b/data/xml/O07.xml index bbed4fc677..9b554220ed 100644 --- a/data/xml/O07.xml +++ b/data/xml/O07.xml @@ -4,7 +4,7 @@ Proceedings of the 19th Conference on Computational Linguistics and Speech Processing O07-1 - Kuang-HuaChen + Kuang-HuaChen BerlinChen The Association for Computational Linguistics and Chinese Language Processing (ACLCLP)
Taipei, Taiwan
@@ -28,7 +28,7 @@ 貝氏主題混合資訊檢索模型 (<fixed-case>B</fixed-case>ayesian Topic Mixture Model for Information Retrieval) [In <fixed-case>C</fixed-case>hinese] Meng-SungWu Hsuan-JuiHsu - Jen-TzungChien + Jen-TzungChien 21–35 O07-1002 wu-etal-2007-bei @@ -36,9 +36,9 @@ <fixed-case>K</fixed-case>orean-<fixed-case>C</fixed-case>hinese Cross-Language Information Retrieval Based on Extension of Dictionaries and Transliteration Yu-ChunWang - Tzong-Han RichardTsai + Tzong-Han RichardTsai Hsu-ChunYen - Wen-LianHsu + Wen-LianHsu 37–44 O07-1003 wang-etal-2007-korean @@ -46,7 +46,7 @@ 加成性雜訊環境下運用特徵參數統計補償法於強健性語音辨識 (Feature Statistics Compensation for Robust Speech Recognition in Additive Noise Environments) [In <fixed-case>C</fixed-case>hinese] Tsung-hsuehHsieh - Jeih-weihHung + Jeih-weihHung 45–59 O07-1004 hsieh-hung-2007-jia @@ -70,15 +70,15 @@ 端點偵測技術在強健語音參數擷取之研究 (Study of the Voice Activity Detection Techniques for Robust Speech Feature Extraction) [In <fixed-case>C</fixed-case>hinese] - Wen-HsiangTu - Jeih-weihHung + Wen-HsiangTu + Jeih-weihHung 89–102 O07-1007 tu-hung-2007-duan 從不同韻律格式驗證階層式韻律架構並兼論對語音科技的應用 (One Base Form of Discourse Prosody Goes a Long Way– Evidence of Sytle Dependent Contribution and Possible Applilcation to Technology Development) [In <fixed-case>C</fixed-case>hinese] - Chiu-YuTseng + Chiu-YuTseng Zhao-YuSu 103–115 O07-1008 @@ -86,10 +86,10 @@ 多語聲學單位分類之最佳化研究 (The Study of Acoustic Model Clustering in Multilingual Speech Recognition) [In <fixed-case>C</fixed-case>hinese] - Dau-chengLyu - Ren-yuanLyu + Dau-chengLyu + Ren-yuanLyu Yung-JienChiang - Chun-nanHsu + Chun-nanHsu 117–130 O07-1009 lyu-etal-2007-duo @@ -97,7 +97,7 @@ 詞義辨識:機器學習演算法特徵的選取與組合 (Feature Selections in Word Sense Disambiguation) [In <fixed-case>C</fixed-case>hinese] Shao HangKao - Zhao MingGao + Zhao MingGao 131–144 O07-1010 kao-gao-2007-ci @@ -106,7 +106,7 @@ Word Translation Disambiguation via Dependency (利用依存關係之辭彙翻譯) Meng-ChinHsiao Kun-JuYang - Jason S.Chang + Jason S.Chang 145–159 O07-1011 hsiao-etal-2007-word @@ -114,9 +114,9 @@ Knowledge Representation for Interrogatives in <fixed-case>E</fixed-case>-<fixed-case>H</fixed-case>ow<fixed-case>N</fixed-case>et Shu-LingHuang - You-ShanChung + You-ShanChung Yueh-YinShih - Keh-JiannChen + Keh-JiannChen 161–175 O07-1012 huang-etal-2007-knowledge @@ -133,7 +133,7 @@ 基於統計方法之中文搭配詞自動擷取 (<fixed-case>C</fixed-case>hinese Collocation Extracting Automation Based on Statistical Methods) [In <fixed-case>C</fixed-case>hinese] Tsui-YunChang - Su-JinKer + Su-JinKer 191–203 O07-1014 chang-ker-2007-ji @@ -150,16 +150,16 @@ 混合語言之語音的語言辨認 (Language Identification on Code-Switching Speech) [In <fixed-case>C</fixed-case>hinese] Chyng-LeeiChu - Dau-chengLyu - Ren-yuanLyu + Dau-chengLyu + Ren-yuanLyu 219–231 O07-1016 chu-etal-2007-hun 基於<fixed-case>HNM</fixed-case> 之國語音節信號的合成方法 (An <fixed-case>HNM</fixed-case> Based Method for Synthesizing <fixed-case>M</fixed-case>andarin Syllable Signal) [In <fixed-case>C</fixed-case>hinese] - Hung-yanGu - Yen-zuoZhou + Hung-yanGu + Yen-zuoZhou 233–243 O07-1017 gu-zhou-2007-ji @@ -176,7 +176,7 @@ ROCLING 2007 Poster Papers O07-2 - Kuang-HuaChen + Kuang-HuaChen BerlinChen The Association for Computational Linguistics and Chinese Language Processing (ACLCLP)
Taipei, Taiwan
@@ -189,7 +189,7 @@ 中文詞彙語意資料的整合與擷取:詞彙語意學的觀點 (Extraction and Integration of <fixed-case>C</fixed-case>hinese Lexical Semantic Information) [In <fixed-case>C</fixed-case>hinese] - Zhao MingGao + Zhao MingGao 257–271 O07-2001 gao-2007-zhong @@ -198,7 +198,7 @@ 中文單詞之韻律模式研究 (A Study on Prosodic Modeling for Isolated <fixed-case>M</fixed-case>andarin Words) [In <fixed-case>C</fixed-case>hinese] Chi-FengChen Chen-YuChiang - Yih-RuWang + Yih-RuWang Sin-HorngChen 273–286 O07-2002 @@ -206,7 +206,7 @@ 以中文十億詞語料庫為基礎之兩岸詞彙對比研究 (A Study of Lexical Differences between <fixed-case>C</fixed-case>hina and <fixed-case>T</fixed-case>aiwan based on the <fixed-case>C</fixed-case>hinese <fixed-case>G</fixed-case>igaword Corpus) [In <fixed-case>C</fixed-case>hinese] - Jia-FeiHung + Jia-FeiHung Chu-RenHuang Ming-WeiXu 287–301 @@ -215,7 +215,7 @@ <fixed-case>VOT</fixed-case> productions of word-initial stops in <fixed-case>M</fixed-case>andarin and <fixed-case>E</fixed-case>nglish: A cross-language study - Li-meiChen + Li-meiChen Kuan-YiChao Jui-FengPeng 303–317 @@ -224,16 +224,16 @@ 台灣共通語言 (<fixed-case>T</fixed-case>aiwan Common Language) [In <fixed-case>C</fixed-case>hinese] - Ming-ShingYu + Ming-ShingYu 319–333 O07-2005 yu-2007-tai 中文詞義全文標記語料庫之設計與雛形製作 (Design and Prototype of a Fully Sense-tagged Corpus) [In <fixed-case>C</fixed-case>hinese] - Su-JinKer + Su-JinKer Chu-RenHuang - Jia-FeiHung + Jia-FeiHung Shih-yinLiu Hui-LingChien I-LiSu @@ -271,7 +271,7 @@ 應用文件重排序與局部查詢擴展於中文文件檢索之研究 (Improving Retrieval Effectiveness by Document Reranking and Local Expansion) [In <fixed-case>C</fixed-case>hinese] Wen-ChiWang - Bor-ShenLin + Bor-ShenLin 391–405 O07-2010 wang-lin-2007-ying @@ -279,7 +279,7 @@ 針對數學與科學教育領域之電腦輔助英中試題翻譯系統 (An Exploration of Computer Assisted Translation of Test Items for Mathematics and Sciences) [In <fixed-case>C</fixed-case>hinese] Ming-ShinLu - Zhao MingGao + Zhao MingGao Chao-LinLiu Chun-YenChang 407–421 @@ -289,7 +289,7 @@ Word sense induction using independent component analysis PetrŠimon - Jia-FeiHong + Jia-FeiHong 423–433 O07-2012 simon-hong-2007-word @@ -385,15 +385,15 @@ YiHu RuzhanLu YuquanChen - JianyongDuan + JianyongDuan 107–126 O07-4001 hu-etal-2007-using-generative An Empirical Study of Non-Stationary Ngram Model and its Smoothing Techniques - JinghuiXiao - BingquanLiu + JinghuiXiao + BingquanLiu XiaolongWang 127–154 O07-4002 @@ -419,7 +419,7 @@ Improve Parsing Performance by Self-Learning Yu-MingHsieh Duen-ChiYang - Keh-JiannChen + Keh-JiannChen 195–216 O07-4005 hsieh-etal-2007-improve @@ -458,7 +458,7 @@ A Novel Characterization of the Alternative Hypothesis Using Kernel Discriminant Analysis for <fixed-case>LLR</fixed-case>-Based Speaker Verification Yi-HsiangChao - Hsin-MinWang + Hsin-MinWang Ruei-ChuanChang 255–272 O07-5002 @@ -469,7 +469,7 @@ NenghengZheng TanLee NingWang - P. C.Ching + P. C.Ching 273–290 O07-5003 zheng-etal-2007-integrating @@ -478,7 +478,7 @@ Performance of Discriminative <fixed-case>HMM</fixed-case> Training in Noise JunDu PengLiu - Frank K.Soong + Frank K.Soong Jian-LaiZhou Ren-HuaWang 291–302 @@ -490,15 +490,15 @@ ToshiyukiTakezawa GenichiroKikui MasahideMizushima - EiichiroSumita + EiichiroSumita 303–324 O07-5005 takezawa-etal-2007-multilingual Exploiting <fixed-case>P</fixed-case>inyin Constraints in <fixed-case>P</fixed-case>inyin-to-Character Conversion Task: a Class-Based Maximum Entropy <fixed-case>M</fixed-case>arkov Model Approach - JinghuiXiao - BingquanLiu + JinghuiXiao + BingquanLiu XiaolongWang 325–348 O07-5006 @@ -518,19 +518,19 @@ Modeling <fixed-case>T</fixed-case>aiwanese <fixed-case>S</fixed-case>outhern-<fixed-case>M</fixed-case>in Tone Sandhi Using Rule-Based Methods - Un-GianIunn - Kiat-GakLau + Un-GianIunn + Kiat-GakLau Hong-GiauTan-Tenn Sheng-AnLee - Cheng-YanKao + Cheng-YanKao 349–370 O07-6001 iunn-etal-2007-modeling A System Framework for Integrated Synthesis of <fixed-case>M</fixed-case>andarin, <fixed-case>M</fixed-case>in-<fixed-case>N</fixed-case>an, and <fixed-case>H</fixed-case>akka Speech - Hung-YanGu - Yan-ZuoZhou + Hung-YanGu + Yan-ZuoZhou Huang-LiangLiau 371–390 O07-6002 @@ -543,7 +543,7 @@ Xiang-RuiZhong Zhen-FengLiang Hsiu-MinYu - Yih-RuWang + Yih-RuWang Sin-HorngChen 391–410 O07-6003 @@ -559,7 +559,7 @@ Automatic Pronunciation Assessment for <fixed-case>M</fixed-case>andarin <fixed-case>C</fixed-case>hinese: Approaches and System Overview Jiang-ChunChen - Jyh-Shing RogerJang + Jyh-Shing RogerJang Te-LuTsai 443–458 O07-6005 @@ -568,7 +568,7 @@ A Knowledge-Based Approach for Unsupervised <fixed-case>C</fixed-case>hinese Coreference Resolution GraceNgai - Chi-ShingWang + Chi-ShingWang 459–484 O07-6006 ngai-wang-2007-knowledge diff --git a/data/xml/O08.xml b/data/xml/O08.xml index ca28cacd29..d8159b5484 100644 --- a/data/xml/O08.xml +++ b/data/xml/O08.xml @@ -17,37 +17,37 @@ Measuring Text Readability by Lexical Relations Retrieved from <fixed-case>W</fixed-case>ordnet - Shu-yenLin - Cheng-chaoSu - Yu-daLai - Li-chinYang - Shu-kaiHsieh + Shu-yenLin + Cheng-chaoSu + Yu-daLai + Li-chinYang + Shu-kaiHsieh 1–17 O08-1001 lin-etal-2008-measuring A Semantic Composition Method for Deriving Sense Representations of Determinative-Measure Compounds in <fixed-case>E</fixed-case>-<fixed-case>H</fixed-case>ow<fixed-case>N</fixed-case>et - Chia-hungTai + Chia-hungTai Shu-LingHuang - Keh-JiannChen + Keh-JiannChen 18–37 O08-1002 tai-etal-2008-semantic-composition A Thesaurus-Based Semantic Classification of <fixed-case>E</fixed-case>nglish Collocations - Chung-chiHuang - Chiung-huiTseng + Chung-chiHuang + Chiung-huiTseng Kate H.Kao - Jason S.Chang + Jason S.Chang 38–52 O08-1003 huang-etal-2008-thesaurus 以<fixed-case>F</fixed-case>ujisaki模型驗證連續語流中字調及韻律詞對應於階層性韻律架構<fixed-case>HPG</fixed-case>的意義 (<fixed-case>M</fixed-case>andarin Discourse Prosody Other than Tones and Intonation – Decomposing the F0 Constitution by Prosodic Hierarchy with the Fujisaki Model) [In <fixed-case>C</fixed-case>hinese] - Chiu-YuTseng + Chiu-YuTseng Zhao-YuSu 53–65 O08-1004 @@ -55,7 +55,7 @@ 基於<fixed-case>ANN</fixed-case>之頻譜演進模型及其於國語語音合成之應用 (An <fixed-case>ANN</fixed-case> based Spectrum-progression Model and Its Application to <fixed-case>M</fixed-case>andarin Speech Synthesis) [In <fixed-case>C</fixed-case>hinese] - Hung-YanGu + Hung-YanGu Chang-YiWu 66–77 O08-1005 @@ -73,8 +73,8 @@ 調變頻譜正規化法使用於強健語音辨識之研究 (Study of Modulation Spectrum Normalization Techniques for Robust Speech Recognition) [In <fixed-case>C</fixed-case>hinese] Chih-ChengWang - Wen-hsiangTu - Jeih-weihHung + Wen-hsiangTu + Jeih-weihHung 93–107 O08-1007 wang-etal-2008-diao @@ -94,14 +94,14 @@ Chu-RenHuang Ting-ShuoYo PetrŠimon - Shu-KaiHsieh + Shu-KaiHsieh 123–136 O08-1009 huang-etal-2008-realistic 國台語無聲調拼音輸入法實作 (An Implementation of Toneless Input for <fixed-case>M</fixed-case>andarin and <fixed-case>T</fixed-case>aiwanese) [In <fixed-case>C</fixed-case>hinese] - Ming-ShingYu + Ming-ShingYu Cheng-RongTsai 137–150 O08-1010 @@ -117,11 +117,11 @@ 利用統計方法及中文訓練資料處理台語文詞性標記 (Modeling <fixed-case>T</fixed-case>aiwanese <fixed-case>POS</fixed-case> tagging with statistical methods and <fixed-case>M</fixed-case>andarin training data) [In <fixed-case>C</fixed-case>hinese] - Un-GianIunn - Chia-hungTai - Kiat-gakLau - Keh-JiannChen - Cheng YanKao + Un-GianIunn + Chia-hungTai + Kiat-gakLau + Keh-JiannChen + Cheng YanKao 166–179 O08-1012 iunn-etal-2008-li @@ -129,8 +129,8 @@ 中文名詞組的辨識:監督式與半監督式學習法的實驗 (<fixed-case>C</fixed-case>hinese <fixed-case>NP</fixed-case> Chunking: Experiments with Supervised,and Semisupervised Learning) [In <fixed-case>C</fixed-case>hinese] Yen HsiLin - Zhao MingGao - Cheng YanKao + Zhao MingGao + Cheng YanKao 180–193 O08-1013 lin-etal-2008-zhong @@ -138,8 +138,8 @@ 強健性語音辨識中能量相關特徵之改良式正規化技術的研究 (Study of the Improved Normalization Techniques of Energy-Related Features for Robust Speech Recognition) [In <fixed-case>C</fixed-case>hinese] Chi-anPan - Wen-hsiangTu - Jeih-weihHung + Wen-hsiangTu + Jeih-weihHung 194–208 O08-1014 pan-etal-2008-qiang @@ -154,16 +154,16 @@ Robust Voice Activity Detection Based on Discrete Wavelet Transform - Kun-ChingWang + Kun-ChingWang 216–228 O08-1016 wang-2008-robust 組合式倒頻譜統計正規化法於強健性語音辨識之研究 (Associative Cepstral Statistics Normalization Techniques for Robust Speech Recognition) [In <fixed-case>C</fixed-case>hinese] - Wen-hsiangTu + Wen-hsiangTu Kuang-chiehWu - Jeih-weihHung + Jeih-weihHung 229–243 O08-1017 tu-etal-2008-zu @@ -207,16 +207,16 @@ Automatic labeling of troponymy for <fixed-case>C</fixed-case>hinese verbs Chiao-ShanLo - Yi-RungChen + Yi-RungChen Chih-YuLin - Shu-KaiHsieh + Shu-KaiHsieh 284–292 O08-2004 lo-etal-2008-automatic 電腦輔助中學程度漢英翻譯習作環境之建置 (Computer Assisted Learning of <fixed-case>E</fixed-case>nglish-<fixed-case>C</fixed-case>hinese Translation for Middle Schoolers) [In <fixed-case>C</fixed-case>hinese] - Min HuaLai + Min HuaLai Chao-LinLiu 293–307 O08-2005 @@ -242,7 +242,7 @@ 多領域文件集之詞彙概念擴展與知識架構之建立 (Conceptual Expansion and Ontological Mapping of Multi-domain Documents) [In <fixed-case>C</fixed-case>hinese] Yong-XiangChen Xiu-LingKe - Keh-JiannChen + Keh-JiannChen Chu-RenHuang 338–350 O08-2008 @@ -261,7 +261,7 @@ Chih-HaoYeh Wei-ChiTsai Yu-ChunWang - Richard Tzong-HanTsai + Richard Tzong-HanTsai 366–372 O08-2010 yeh-etal-2008-generating @@ -282,7 +282,7 @@ Exploring Shallow Answer Ranking Features in Cross-Lingual and Monolingual Factoid Question Answering Cheng-WeiLee Yi-HsunLee - Wen-LianHsu + Wen-LianHsu 1–26 O08-3001 lee-etal-2008-exploring @@ -290,9 +290,9 @@ Two Approaches for Multilingual Question Answering: Merging Passages vs. Merging Answers Rita M.Aceves-Pérez - ManuelMontes-y-Gómez - LuisVillaseñor-Pineda - L. AlfonsoUreña-López + ManuelMontes-y-Gómez + LuisVillaseñor-Pineda + L. AlfonsoUreña-López 27–40 O08-3002 aceves-perez-etal-2008-two @@ -357,7 +357,7 @@ A Study on Consistency Checking Method of Part-Of-Speech Tagging for <fixed-case>C</fixed-case>hinese Corpora HuZhang - JiahengZheng + JiahengZheng 157–170 O08-4002 zhang-zheng-2008-study @@ -366,7 +366,7 @@ Constructing a Temporal Relation Tagged Corpus of <fixed-case>C</fixed-case>hinese Based on Dependency Structure Analysis YuchangCheng MasayukiAsahara - YujiMatsumoto + YujiMatsumoto 171–196 O08-4003 cheng-etal-2008-constructing @@ -381,7 +381,7 @@ A Cross-Linguistic Study of Voice Onset Time in Stop Consonant Productions Kuan-YiChao - Li-meiChen + Li-meiChen 215–232 O08-4005 chao-chen-2008-cross @@ -389,8 +389,8 @@ Data Driven Approaches to Phonetic Transcription with Integration of Automatic Speech Recognition and Grapheme-to-Phoneme for Spoken Buddhist Sutra Min-SiongLiang - Ren-YuanLyu - Yuang-ChinChiang + Ren-YuanLyu + Yuang-ChinChiang 233-254 O08-4006 liang-etal-2008-data @@ -409,7 +409,7 @@ Knowledge Representation and Sense Disambiguation for Interrogatives in <fixed-case>E</fixed-case>-<fixed-case>H</fixed-case>ow<fixed-case>N</fixed-case>et Shu-LingHuang - Keh-JiannChen + Keh-JiannChen 255-278 O08-5001 huang-chen-2008-knowledge @@ -434,8 +434,8 @@ An <fixed-case>HNM</fixed-case> Based Scheme for Synthesizing <fixed-case>M</fixed-case>andarin Syllable Signal - Hung-YanGu - Yan-ZuoZhou + Hung-YanGu + Yan-ZuoZhou 327–342 O08-5004 gu-zhou-2008-hnm @@ -452,10 +452,10 @@ Acoustic Model Optimization for Multilingual Speech Recognition - Dau-ChengLyu - Chun-NanHsu - Yuang-ChinChiang - Ren-YuanLyu + Dau-ChengLyu + Chun-NanHsu + Yuang-ChinChiang + Ren-YuanLyu 363–386 O08-5006 lyu-etal-2008-acoustic @@ -484,10 +484,10 @@ Corpus Cleanup of Mistaken Agreement Using Word Sense Disambiguation - Liang-ChihYu + Liang-ChihYu Chung-HsienWu Jui-FengYeh - EduardHovy + EduardHovy 405–420 O08-6002 yu-etal-2008-corpus diff --git a/data/xml/O09.xml b/data/xml/O09.xml index cbea71080c..a84abb9450 100644 --- a/data/xml/O09.xml +++ b/data/xml/O09.xml @@ -5,7 +5,7 @@ Proceedings of the 21st Conference on Computational Linguistics and Speech Processing O09-1 June-JeiKuo - Jeih-WeihHung + Jeih-WeihHung The Association for Computational Linguistics and Chinese Language Processing (ACLCLP)
Taichung, Taiwan
September @@ -43,7 +43,7 @@ 強健性語音辨識中分頻段調變頻譜補償之研究 (A Study of Sub-band Modulation Spectrum Compensation for Robust Speech Recognition) [In <fixed-case>C</fixed-case>hinese] Sheng-yuanHuang - Wen-hsiangTu + Wen-hsiangTu Jeih-weihHung 39–52 O09-1004 @@ -74,7 +74,7 @@ 中英文專利文書之文句對列 (Sentence alignment of <fixed-case>E</fixed-case>nglish and <fixed-case>C</fixed-case>hinese patent documents) [In <fixed-case>C</fixed-case>hinese] Kan-WenTien - Yuen-HsienTseng + Yuen-HsienTseng Chao-LinLiu 85–100 O09-1007 @@ -82,7 +82,7 @@ 意見持有者辨識之研究 (A Study on Identification of Opinion Holders) [In <fixed-case>C</fixed-case>hinese] - Chia-YingLee + Chia-YingLee Lun-WeiKu Hsin-HsiChen 101–114 @@ -92,7 +92,7 @@ Tonal effects on voice onset time: Stops in <fixed-case>M</fixed-case>andarin and <fixed-case>H</fixed-case>akka (聲調對嗓音起始時間的影響:以國語和客語為研究對象) Jui-FengPeng - Li-meiChen + Li-meiChen Yi-YunLin 115–124 O09-1009 @@ -102,7 +102,7 @@ Latent Prosody Model-Assisted <fixed-case>M</fixed-case>andarin Accent Identification Yuan-FuLiao Shuan-ChenYeh - Ming-FengTsai + Ming-FengTsai Wei-HsiungTing Sen-ChiaChang 125–136 @@ -112,14 +112,14 @@ 高解析度之國語類音素單元端點自動標示 (Sample-based Phone-like Unit Automatic Labeling in <fixed-case>M</fixed-case>andarin Speech) [In <fixed-case>C</fixed-case>hinese] You-YuLin - Yih-RuWang + Yih-RuWang 137–150 O09-1011 lin-wang-2009-gao 基於離散倒頻譜之頻譜包絡估計架構及其於語音轉換之應用 (A Discrete-cepstrum Based Spectrum-envelope Estimation Scheme and Its Application to Voice Transformation) [In <fixed-case>C</fixed-case>hinese] - Hung-YanGu + Hung-YanGu Song-FongTsai 151–164 O09-1012 @@ -131,7 +131,7 @@ Chao-LinLiu Wei-TiKuo Ying-TseSun - Min-HuaLai + Min-HuaLai 165–178 O09-1013 huang-etal-2009-dian @@ -154,9 +154,9 @@ Minimally Supervised Question Classification and Answering based on <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et and <fixed-case>W</fixed-case>ikipedia - JosephChang + JosephChang Tzu-HsiYen - Tzong-HanTsai + Tzong-HanTsai 209–222 O09-1016 chang-etal-2009-minimally @@ -183,7 +183,7 @@ 強健性語音辨識中基於小波轉換之分頻統計補償技術的研究 (A Study of Sub-band Feature Statistics Compensation Techniques Based on a Discrete Wavelet Transform for Robust Speech Recognition) [In <fixed-case>C</fixed-case>hinese] Hao-tengFan - Wen-HsiangTu + Wen-HsiangTu Jeih-weihHung 251–264 O09-1019 @@ -192,7 +192,7 @@ 併合式倒頻譜統計正規化技術於強健性語音辨識之研究 (A Study of Hybrid-based Cepstral Statistics Normalization Techniques for Robust Speech Recognition) [In <fixed-case>C</fixed-case>hinese] Guan-minHe - Wen-HsiangTu + Wen-HsiangTu Jeih-weihHung 265–278 O09-1020 @@ -214,7 +214,7 @@ 專利雙語語料之中、英對照詞自動擷取 (Automatic Term Pair Extraction from Bilingual Patent Corpus) [In <fixed-case>C</fixed-case>hinese] - Yuen-HsienTseng + Yuen-HsienTseng Chao-LinLiu Ze-JingChuang 279–292 @@ -285,7 +285,7 @@ Voice Activity Detection Using Spectral Entropy in Bark-Scale Wavelet Domain - Kun-chingWang + Kun-chingWang Tzuen-linHou Chuin-liChin 385–398 @@ -294,12 +294,12 @@ 讓格書寫以及台華互譯初探 (<fixed-case>L</fixed-case>ang<fixed-case>G</fixed-case>eh Orthography and an Initial Study of Statistical Translation Between <fixed-case>T</fixed-case>aiwanese and <fixed-case>M</fixed-case>andarin) [In <fixed-case>C</fixed-case>hinese] - Yuang-ChinChiang + Yuang-ChinChiang Pei-ChiYang Shu-ChinLin Chun-huangChang Ming-TatKo - Ren-YuanLyu + Ren-YuanLyu Meng ChangChen 399–414 O09-2010 @@ -319,44 +319,44 @@ Fertility-based Source-Language-biased Inversion Transduction Grammar for Word Alignment - Chung-ChiHuang - Jason S.Chang + Chung-ChiHuang + Jason S.Chang 1–18 O09-3001 huang-chang-2009-fertility Automatic Sense Derivation for Determinative-Measure Compounds under the Framework of <fixed-case>E</fixed-case>-<fixed-case>H</fixed-case>ow<fixed-case>N</fixed-case>et - Chia-HungTai + Chia-HungTai Jia-ZenFan Shu-LingHuang - Keh-JiannChen + Keh-JiannChen 19–44 O09-3002 tai-etal-2009-automatic Assessing Text Readability Using Hierarchical Lexical Relations Retrieved from <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et - Shu-YenLin - Cheng-ChaoSu - Yu-DaLai - Li-ChinYang - Shu-KaiHsieh + Shu-YenLin + Cheng-ChaoSu + Yu-DaLai + Li-ChinYang + Shu-KaiHsieh 45–84 O09-3003 lin-etal-2009-assessing Summarization Assistant for News Brief Services on Cellular Phones - Yuen-HsienTseng + Yuen-HsienTseng 85–104 O09-3004 tseng-2009-summarization Study of Associative Cepstral Statistics Normalization Techniques for Robust Speech Recognition in Additive Noise Environments - Wen-HsiangTu - Jeih-weihHung + Wen-HsiangTu + Jeih-weihHung 105–132 O09-3005 tu-hung-2009-study @@ -367,7 +367,7 @@ International Journal of Computational Linguistics & Chinese Language Processing, Volume 14, Number 2, June 2009-Special Issue on Computer Assisted Language Learning O09-4 Chao-LinLiu - Zhao-MingGao + Zhao-MingGao June 2009 ijclclp @@ -378,7 +378,7 @@ Speech-Based Interactive Games for Language Learning: Reading, Translation, and Question-Answering YushiXu - StephanieSeneff + StephanieSeneff O09-4001 xu-seneff-2009-speech @@ -403,7 +403,7 @@ A Corpus-based Study on Figurative Language through the <fixed-case>C</fixed-case>hinese Five Elements and Body Part Terms - Siaw-FongChung + Siaw-FongChung O09-4005 chung-2009-corpus @@ -412,8 +412,8 @@ International Journal of Computational Linguistics & Chinese Language Processing, Volume 14, Number 3, September 2009 O09-5 - Yuen-HsienTseng - Kuang-HuaChen + Yuen-HsienTseng + Kuang-HuaChen September 2009 ijclclp @@ -423,20 +423,20 @@ Modeling <fixed-case>T</fixed-case>aiwanese <fixed-case>POS</fixed-case> Tagging Using Statistical Methods and <fixed-case>M</fixed-case>andarin Training Data - Un-GianIunn + Un-GianIunn Jia-hungTai - Kiat-GakLau - Cheng-yanKao - Keh-jiannChen + Kiat-GakLau + Cheng-yanKao + Keh-jiannChen O09-5001 iunn-etal-2009-modeling A Thesaurus-Based Semantic Classification of <fixed-case>E</fixed-case>nglish Collocations - Chung-ChiHuang + Chung-ChiHuang Kate H.Kao - Chiung-HuiTseng - Jason S.Chang + Chiung-HuiTseng + Jason S.Chang O09-5002 huang-etal-2009-thesaurus @@ -444,14 +444,14 @@ Automatic Recognition of <fixed-case>C</fixed-case>antonese-<fixed-case>E</fixed-case>nglish Code-Mixing Speech Joyce Y. C.Chan HouweiCao - P. C.Ching + P. C.Ching TanLee O09-5003 chan-etal-2009-automatic Corpus, Lexicon, and Construction: A Quantitative Corpus Approach to <fixed-case>M</fixed-case>andarin Possessive Construction - Cheng-HsienChen + Cheng-HsienChen O09-5004 chen-2009-corpus @@ -460,8 +460,8 @@ International Journal of Computational Linguistics & Chinese Language Processing, Volume 14, Number 4, December 2009 O09-6 - Yuen-HsienTseng - Kuang-HuaChen + Yuen-HsienTseng + Kuang-HuaChen December 2009 ijclclp @@ -472,22 +472,22 @@ Tonal Effects on Voice Onset Time Jui-FengPeng - Li-meiChen + Li-meiChen Chia-ChengLee O09-6001 peng-etal-2009-tonal-effects A Discrete-cepstrum Based Spectrum-envelope Estimation Scheme and Its Example Application of Voice Transformation - Hung-YanGu - Sung-FengTsai + Hung-YanGu + Sung-FengTsai O09-6002 gu-tsai-2009-discrete Identification of Opinion Holders Lun-WeiKu - Chia-YingLee + Chia-YingLee Hsin-HsiChen O09-6003 ku-etal-2009-identification diff --git a/data/xml/O10.xml b/data/xml/O10.xml index a5f68936b3..67a3287528 100644 --- a/data/xml/O10.xml +++ b/data/xml/O10.xml @@ -4,7 +4,7 @@ Proceedings of the 22nd Conference on Computational Linguistics and Speech Processing (ROCLING 2010) Shih-HungWu - Jeih-weihHung + Jeih-weihHung The Association for Computational Linguistics and Chinese Language Processing (ACLCLP)
Nantou, Taiwan
September @@ -80,7 +80,7 @@ 基於對照表以及語言模型之簡繁字體轉換 (<fixed-case>C</fixed-case>hinese Characters Conversion System based on Lookup Table and Language Model) [In <fixed-case>C</fixed-case>hinese] Min-HsiangLi Shih-HungWu - Ping-cheYang + Ping-cheYang TsunKu 113–127 O10-1008 @@ -91,17 +91,17 @@ Chao-ShainnHuang Yu-ChiChang Chao-LinLiu - Yuan-HsienTseng + Yuan-HsienTseng 128–142 O10-1009 huang-etal-2010-yi-gong
Term Contributed Boundary Feature using Conditional Random Fields for <fixed-case>C</fixed-case>hinese Word Segmentation Task - Tian-JianJiang + Tian-JianJiang Shih-HungLiu Cheng-LungSung - Wen-LianHsu + Wen-LianHsu 143–156 O10-1010 jiang-etal-2010-term @@ -111,7 +111,7 @@ Jyun-WeiHuang Chia PeiKao Chun-YuChen - Tzong-HanTsai + Tzong-HanTsai 157–171 O10-1011 huang-etal-2010-ji @@ -123,7 +123,7 @@ Chang-AnShih Yen-ChingHsu Pei-YuHsu - Shu-KaiHsieh + Shu-KaiHsieh 172–183 O10-1012 chen-etal-2010-classifying @@ -134,7 +134,7 @@ Chien-LiangChen Chun-ChiehLiu Chao-LinLiu - Von-WunSoo + Von-WunSoo 184–198 O10-1013 sun-etal-2010-zhong @@ -160,9 +160,9 @@ 可變速中文文字轉語音系統 (Variable Speech Rate <fixed-case>M</fixed-case>andarin <fixed-case>C</fixed-case>hinese Text-to-Speech System) [In <fixed-case>C</fixed-case>hinese] - Qi-QuanHuang + Qi-QuanHuang Chen-YuChiang - Yih-RuWang + Yih-RuWang Hsiu-MinYu Sin-HorngChen 222–235 @@ -172,7 +172,7 @@ 進階式調變頻譜補償法於強健性語音辨識之研究 (Advanced Modulation Spectrum Compensation Techniques for Robust Speech Recognition) [In <fixed-case>C</fixed-case>hinese] Wei-JeihYeh - Wen-hsiangTu + Wen-hsiangTu Jeih-weihHung 236–250 O10-1017 @@ -193,10 +193,10 @@ Identifying Correction Rules for Auto Editing - AntaHuang + AntaHuang Tsung-TingKuo Ying-ChunLai - Shou-deLin + Shou-deLin 251–265 O10-2001 huang-etal-2010-identifying @@ -204,24 +204,24 @@ 台灣學生英文寫作冠詞錯誤分析 (<fixed-case>E</fixed-case>nglish article errors in <fixed-case>T</fixed-case>aiwanese college students’ <fixed-case>EFL</fixed-case> writing) Neil EdwardBarrett - Li-meiChen + Li-meiChen 266–280 O10-2002 barrett-chen-2010-tai 結合<fixed-case>HMM</fixed-case> 頻譜模型與<fixed-case>ANN</fixed-case> 韻律模型之國語語音合成系統 (A <fixed-case>M</fixed-case>andarin Speech Synthesis System Combining <fixed-case>HMM</fixed-case> Spectrum Model and <fixed-case>ANN</fixed-case> Prosody Model) [In <fixed-case>C</fixed-case>hinese] - Hung-YanGu + Hung-YanGu Ming-YenLai - Sung-FungTsai + Sung-FungTsai 281–295 O10-2003 gu-etal-2010-jie 美國專利書「獨立項數」之搭配詞初探: 以<fixed-case>L</fixed-case>exis<fixed-case>N</fixed-case>exis 法律資料庫為例 (Collocation Features of Independent Claim in <fixed-case>U</fixed-case>.<fixed-case>S</fixed-case>. Patent Documents: Information Retrieval from <fixed-case>L</fixed-case>exis<fixed-case>N</fixed-case>exis) - Hsin-HungLin - Ching-yuHsieh + Hsin-HungLin + Ching-yuHsieh 296–310 O10-2004 lin-hsieh-2010-mei @@ -244,7 +244,7 @@ 結合音長與發音特徵於<fixed-case>GTB</fixed-case> 之腔調化語音辨識 (Accented Speech Recognition based on Gradient Tree Boosting with Duration and Articulation Features) [In <fixed-case>C</fixed-case>hinese] - Ming-chinYen + Ming-chinYen Po-SanLai Jui-FengYeh 335–349 @@ -254,7 +254,7 @@ Discerning Emotions of Bloggers based on Topics – a Supervised Coreference Approach in <fixed-case>B</fixed-case>engali DipankarDas - SivajiBandyopadhyay + SivajiBandyopadhyay 350–363 O10-2008 das-bandyopadhyay-2010-discerning @@ -262,17 +262,17 @@ 應用直方圖均化於統計式未知詞萃取之研究 (Histogram Equalization for Statistical Unknown Word Extraction) [In <fixed-case>C</fixed-case>hinese] Yi-CongChen - Bor-ShenLin + Bor-ShenLin 364–378 O10-2009 chen-lin-2010-ying Qualia Modification in Noun-Noun Compounds: A Cross-Language Survey - Chih-yaoLee + Chih-yaoLee Chia-haoChang Wei-chiehHsu - Shu-kaiHsieh + Shu-kaiHsieh 379–390 O10-2010 lee-etal-2010-qualia @@ -282,8 +282,8 @@ International Journal of Computational Linguistics & Chinese Language Processing, Volume 15, Number 1, March 2010 O10-3 - Yuen-HsienTseng - Kuang-huaChen + Yuen-HsienTseng + Kuang-huaChen March 2010 ijclclp @@ -304,7 +304,7 @@ Min-HsiangLi Shih-HungWu Yi-ChingZeng - Ping-cheYang + Ping-cheYang TsunKu O10-3002 li-etal-2010-ji-yu @@ -320,7 +320,7 @@ Cross-Validation and Minimum Generation Error based Decision Tree Pruning for <fixed-case>HMM</fixed-case>-based Speech Synthesis HengLu Zhen-HuaLing - Li-RongDai + Li-RongDai Ren-HuaWang O10-3004 lu-etal-2010-cross @@ -330,8 +330,8 @@ International Journal of Computational Linguistics & Chinese Language Processing, Volume 15, Number 2, June 2010 O10-4 - Yuen-HsienTseng - Kuang-huaChen + Yuen-HsienTseng + Kuang-huaChen June 2010 ijclclp @@ -342,13 +342,13 @@ A <fixed-case>P</fixed-case>unjabi to <fixed-case>H</fixed-case>indi Machine Transliteration System Gurpreet SinghJosan - Gurpreet SinghLehal + Gurpreet SinghLehal O10-4001 josan-lehal-2010-punjabi A Posteriori Individual Word Language Models for <fixed-case>V</fixed-case>ietnamese Language - Le QuanHa + Le QuanHa Tran Thi ThuVan Hoang TienLong Nguyen HuuTinh @@ -361,7 +361,7 @@ Improving the Template Generation for <fixed-case>C</fixed-case>hinese Character Error Detection with Confusion Sets Yong-ZhiChen Shih-HungWu - Ping-cheYang + Ping-cheYang TsunKu O10-4003 chen-etal-2010-improving @@ -369,9 +369,9 @@ 以最佳化及機率分佈標記形聲字聲符之研究 (Annotating Phonetic Component of <fixed-case>C</fixed-case>hinese Characters Using Constrained Optimization and Pronunciation Distribution) [In <fixed-case>C</fixed-case>hinese] Chia-HuiChang - Shu-YenLin + Shu-YenLin Shu-YingLi - Meng-FengTsai + Meng-FengTsai Shu-PingLi Hsiang-MeiLiao Chih-WenSun @@ -384,8 +384,8 @@ International Journal of Computational Linguistics & Chinese Language Processing, Volume 15, Number 3-4, September/December 2010 O10-5 - Yuen-HsienTseng - Kuang-huaChen + Yuen-HsienTseng + Kuang-huaChen September/December 2010 ijclclp @@ -403,7 +403,7 @@ Word Sense Disambiguation Using Multiple Contextual Features - Liang-ChihYu + Liang-ChihYu Chung-HsienWu Jui-FengYeh O10-5002 @@ -419,10 +419,10 @@ Discovering Correction Rules for Auto Editing - An-TaHuang + An-TaHuang Tsung-TingKuo Ying-ChunLai - Shou-DeLin + Shou-DeLin O10-5004 huang-etal-2010-discovering diff --git a/data/xml/O11.xml b/data/xml/O11.xml index ade309c131..069b2c9f6d 100644 --- a/data/xml/O11.xml +++ b/data/xml/O11.xml @@ -5,7 +5,7 @@ Proceedings of the 23rd Conference on Computational Linguistics and Speech Processing (ROCLING 2011) O11-1 Wei-HoTsai - Liang-ChihYu + Liang-ChihYu The Association for Computational Linguistics and Chinese Language Processing (ACLCLP)
Taipei, Taiwan
September @@ -29,8 +29,8 @@ Compensating the Speech Features via Discrete Cosine Transform for Robust Speech Recognition (基於離散餘弦轉換之語音特徵的強健性補償法) Hsin-JuHsieh - Wen-hsiangTu - Jeih-weihHung + Wen-hsiangTu + Jeih-weihHung 21–42 O11-1002 hsieh-etal-2011-compensating @@ -54,11 +54,11 @@ 片語式機器翻譯中未知詞與落單字的問題探討 (Learning to Deal with the <fixed-case>OOV</fixed-case> Problem in Phrase-based <fixed-case>MT</fixed-case> System) [In <fixed-case>C</fixed-case>hinese] Ming-ChuanChiang - Chung-ChiHuang + Chung-ChiHuang “He,Ho-Ching”Yen - Shih-TingHuang + Shih-TingHuang Chun-ShengChang - Ping-CheYang + Ping-CheYang TsunKu 79–93 O11-1005 @@ -76,11 +76,11 @@ Unsupervised Overlapping Feature Selection for Conditional Random Fields Learning in <fixed-case>C</fixed-case>hinese Word Segmentation - Ting-haoYang - Tian-JianJiang - Chan-hungKuo - Richard Tzong-hanTsai - Wen-lianHsu + Ting-haoYang + Tian-JianJiang + Chan-hungKuo + Richard Tzong-hanTsai + Wen-lianHsu 109–122 O11-1007 yang-etal-2011-unsupervised @@ -97,8 +97,8 @@ 動補結構的及物性及修飾對象 (Transitivity of a <fixed-case>C</fixed-case>hinese Verb-result Compound and Affected Argument of the Result Verb) [In <fixed-case>C</fixed-case>hinese] - You-shanChung - Keh-JiannChen + You-shanChung + Keh-JiannChen 139–150 O11-1009 chung-chen-2011-dong @@ -115,7 +115,7 @@ 聲符部件排序與形聲字發音規則探勘 (Pronunciation Rules Discovery for Picto-Phonetic <fixed-case>C</fixed-case>hinese Characters) [In <fixed-case>C</fixed-case>hinese] Chia-HuiChang - Shu-YenLin + Shu-YenLin 166–178 O11-1011 chang-lin-2011-sheng @@ -126,7 +126,7 @@ Jing-ChenYang Yu-YunChang Yu-WenLiu - Shu-KaiHsieh + Shu-KaiHsieh 179–193 O11-1012 wang-etal-2011-frequency @@ -150,7 +150,7 @@ Wen-YiChu Yu-ChenKao BerlinChen - Jeih-WeihHung + Jeih-WeihHung 194–206 O11-2001 chu-etal-2011-ji @@ -166,8 +166,8 @@ 使用分段式<fixed-case>GMM</fixed-case> 及自動<fixed-case>GMM</fixed-case> 挑選之語音轉換方法 (A Voice Conversion Method Using Segmental <fixed-case>GMM</fixed-case>s and Automatic <fixed-case>GMM</fixed-case> Selection) [In <fixed-case>C</fixed-case>hinese] - Hung-YanGu - Sung-FungTsai + Hung-YanGu + Sung-FungTsai 216–226 O11-2003 gu-tsai-2011-shi @@ -220,7 +220,7 @@ 結合語言模型與網路知識源於列印前檢查 (Print Pickets Combined Language Models and Knowledge Resources in Web) [In <fixed-case>C</fixed-case>hinese] Yu-JuiHuang - Ming-ChinYen + Ming-ChinYen Guan-HueiWu Yao-YiWang Jui-FengYeh @@ -230,7 +230,7 @@ Diagnosing Discoursal Organization in Learner Writing via Conjunctive Adverbials (診斷學習者英語寫作篇章結構:以篇章連接副詞為例) Tung-yuKao - Li-meiChen + Li-meiChen 310–322 O11-2010 kao-chen-2011-diagnosing @@ -255,7 +255,7 @@ Chaio-WenHsieh Wei-HsuanLin Chun-YiLiu - Liang-ChihYu + Liang-ChihYu 349–360 O11-2013 wu-etal-2011-duo @@ -265,8 +265,8 @@ International Journal of Computational Linguistics & Chinese Language Processing, Volume 16, Number 1-2, March/June 2011 O11-3 - Yuen-HsienTseng - Kuang-huaChen + Yuen-HsienTseng + Kuang-huaChen March/June 2011 ijclclp @@ -310,8 +310,8 @@ International Journal of Computational Linguistics & Chinese Language Processing, Volume 16, Number 3-4, September/December 2011 O11-4 - Yuen-HsienTseng - Kuang-huaChen + Yuen-HsienTseng + Kuang-huaChen September/December 2011 ijclclp @@ -322,20 +322,20 @@ <fixed-case>E</fixed-case>nglish Article Errors in <fixed-case>T</fixed-case>aiwanese College Students’ <fixed-case>EFL</fixed-case> Writing Neil EdwardBarrett - Li-meiChen + Li-meiChen O11-4001 barrett-chen-2011-english 基於辭典辭彙釋義之多階層釋義關聯程度計量-以「目」字部為例 (A Measurement of Multi-Level Semantic Relations among <fixed-case>M</fixed-case>andarin Lexemes with Radical mu4: A Study based on Dictionary Explanations) - F. Y. AugustChao - Siaw-FongChung + F. Y. AugustChao + Siaw-FongChung O11-4002 chao-chung-2011-ji Histogram Equalization on Statistical Approaches for <fixed-case>C</fixed-case>hinese Unknown Word Extraction - Bor-ShenLin + Bor-ShenLin Yi-CongChen O11-4003 lin-chen-2011-histogram @@ -349,8 +349,8 @@ Characteristics of Independent Claim: A Corpus-Linguistic Approach to Contemporary <fixed-case>E</fixed-case>nglish Patents - Darren Hsin-HungLin - Shelley Ching-YuHsieh + Darren Hsin-HungLin + Shelley Ching-YuHsieh O11-4005 lin-hsieh-2011-characteristics diff --git a/data/xml/O12.xml b/data/xml/O12.xml index 84f9256c16..757a4f5573 100644 --- a/data/xml/O12.xml +++ b/data/xml/O12.xml @@ -4,8 +4,8 @@ Proceedings of the 24th Conference on Computational Linguistics and Speech Processing (ROCLING 2012) O12-1 - Richard Tzong-HanTsai - Liang-ChihYu + Richard Tzong-HanTsai + Liang-ChihYu The Association for Computational Linguistics and Chinese Language Processing (ACLCLP)
Chung-Li, Taiwan
September @@ -19,7 +19,7 @@ 改良式統計圖等化法強鍵性語音辨識之研究 (Improved Histogram Equalization Methods for Robust Speech Recognition) [In <fixed-case>C</fixed-case>hinese] Hsin-JuHsieh - Jeih-weihHung + Jeih-weihHung BerlinChen 1–2 O12-1001 @@ -27,7 +27,7 @@ 以線性多變量迴歸來對映分段後音框之語音轉換方法 (A Voice Conversion Method Mapping Segmented Frames with Linear Multivariate Regression) [In <fixed-case>C</fixed-case>hinese] - Hung-YanGu + Hung-YanGu Jia-WeiChang Zan-WeiWang 3–14 @@ -36,7 +36,7 @@ Acoustic Variability in the Speech of Children with Cerebral Palsy - Li-meiChen + Li-meiChen Han-chihNi Tzu-WenKuo Kuei-LingHsu @@ -47,7 +47,7 @@ 領域相關詞彙極性分析及文件情緒分類之研究 (Domain Dependent Word Polarity Analysis for Sentiment Classification) [In <fixed-case>C</fixed-case>hinese] Ho-ChengYu - Ting-HaoHuang + Ting-HaoHuang Hsin-HsiChen 30–31 O12-1004 @@ -63,9 +63,9 @@ Associating Collocations with <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et Senses Using Hybrid Models - Yi-ChunChen + Yi-ChunChen Tzu-XiYen - Jason S.Chang + Jason S.Chang 47–60 O12-1006 chen-etal-2012-associating @@ -73,7 +73,7 @@ Measuring Individual Differences in Word Recognition: The Role of Individual Lexical Behaviors Hsin-NiLin - Shu-KaiHsieh + Shu-KaiHsieh Shiao-HuiChan 61–74 O12-1007 @@ -92,7 +92,7 @@ 基於決策樹演算法之台語連音變調預估模組 (A Prediction Module for <fixed-case>T</fixed-case>aiwanese Tone Sandhi Based on the Decision Tree Algorithm) [In <fixed-case>C</fixed-case>hinese] Neng-HuangPan - Ming-ShingYu + Ming-ShingYu Pei-ChunTsai 92–101 O12-1009 @@ -101,10 +101,10 @@ 台語文字與語音語料庫之建置 (Development of a <fixed-case>T</fixed-case>aiwanese Speech and Text Corpus) [In <fixed-case>C</fixed-case>hinese] Tzu-YuLiao - Ren-yuanLyu + Ren-yuanLyu Ming-TatKo - Yuang-chinChiang - Jyh-ShingJang + Yuang-chinChiang + Jyh-ShingJang 102–111 O12-1010 liao-etal-2012-tai @@ -133,15 +133,15 @@ Tung-JiaChang Chueh-AnYen Chao-JuChen - Shou-deLin + Shou-deLin 140–141 O12-1013 lin-etal-2012-li Detecting and Correcting Syntactic Errors in Machine Translation Using Feature-Based <fixed-case>L</fixed-case>exicalized <fixed-case>T</fixed-case>ree <fixed-case>A</fixed-case>djoining <fixed-case>G</fixed-case>rammars - Wei-YunMa - KathleenMcKeown + Wei-YunMa + KathleenMcKeown 142–143 O12-1014 ma-mckeown-2012-detecting @@ -206,8 +206,8 @@ Disambiguating Main <fixed-case>POS</fixed-case> tags for <fixed-case>T</fixed-case>urkish RaziehEhsani Muzaffer EgeAlper - GülşenEryiğit - EşrefAdali + GülşenEryiğit + EşrefAdali 202–213 O12-1021 ehsani-etal-2012-disambiguating @@ -216,9 +216,9 @@ 台語朗讀資料庫之自動切音技術應用於音文同步有聲書之建立 (Automatic Time Alignment for a <fixed-case>T</fixed-case>aiwanese Read Speech Corpus and its Application to Constructing Audiobooks with Text-Speech Synchronization) [In <fixed-case>C</fixed-case>hinese] Wei-jayHuang Jhih-rouLin - Ren-yuanLyu - Yuang-chinChiang - Jyh-Shing RogerJang + Ren-yuanLyu + Yuang-chinChiang + Jyh-Shing RogerJang Ming-TatKo 214–230 O12-1022 @@ -237,9 +237,9 @@ Translating Collocation using Monolingual and Parallel Corpus Ming-ZhuanJiang Tzu-XiYen - Chung-ChiHuang - Mei-HuaChen - Jason S.Chang + Chung-ChiHuang + Mei-HuaChen + Jason S.Chang 246–260 O12-1024 jiang-etal-2012-translating @@ -250,7 +250,7 @@ BilelElayeb IbrahimBounhas FabriceEvrard - Narjès Bellamine BenSaoud + Narjès Bellamine BenSaoud 261–275 O12-1025 khiroun-etal-2012-possibilistic @@ -258,7 +258,7 @@ 利用關聯式規則解決台語文轉音系統中一詞多音之歧異 (Applying Association Rules in Solving the Polysemy Problem in a <fixed-case>C</fixed-case>hinese to <fixed-case>T</fixed-case>aiwanese <fixed-case>TTS</fixed-case> System) [In <fixed-case>C</fixed-case>hinese] Yih-JengLin - Ming-ShingYu + Ming-ShingYu Wei-LunLi 276–291 O12-1026 @@ -269,7 +269,7 @@ Yu-HaoLin Yu-LanLiu Tzu-XiYen - Jason S.Chang + Jason S.Chang 292–306 O12-1027 lin-etal-2012-context @@ -278,7 +278,7 @@ Implementation of <fixed-case>M</fixed-case>alayalam Morphological Analyzer Based on Hybrid Approach Vinod P.M JayanV - Bhadran V.K + Bhadran V.K 307–317 O12-1028 m-etal-2012-implementation @@ -290,7 +290,7 @@ SwapanDebbarma DipankarDas AmitavaDas - SivajiBandyopadhyay + SivajiBandyopadhyay 318–325 O12-1029 patra-etal-2012-light @@ -299,9 +299,9 @@ 台語關鍵詞辨識之實作與比較 (Implementation and Comparison of Keyword Spotting for <fixed-case>T</fixed-case>aiwanese) [In <fixed-case>C</fixed-case>hinese] Chung-CheWang Che-HsuanChou - Liang-YuChen + Liang-YuChen Yu-JheLi - Jyh-ShingJang + Jyh-ShingJang Hsun-ChengHu Shih-PengLin You-LianHuang @@ -328,7 +328,7 @@ Collaborative Annotation and Visualization of Functional and Discourse Structures HengbinYan - JonathanWebster + JonathanWebster 366–374 O12-1033 yan-webster-2012-collaborative @@ -339,7 +339,7 @@ Shih-HungWu Liang-PuChen Wen-TaiHsieh - Seng-Cho T.Chou + Seng-Cho T.Chou 375–384 O12-1034 yang-etal-2012-ji @@ -349,8 +349,8 @@ International Journal of Computational Linguistics & Chinese Language Processing, Volume 17, Number 1, March 2012 O12-2 - Yuen-HsienTseng - Kuang-huaChen + Yuen-HsienTseng + Kuang-huaChen March 2012 ijclclp @@ -370,8 +370,8 @@ 可變速中文文字轉語音系統 (Variable Speech Rate <fixed-case>M</fixed-case>andarin <fixed-case>C</fixed-case>hinese Text-to-Speech System) [In <fixed-case>C</fixed-case>hinese] Chen-YuChiang - Qi-QuanHuang - Yih-RuWang + Qi-QuanHuang + Yih-RuWang Hsiu-MinYu Sin-HorngChen O12-2002 @@ -379,7 +379,7 @@ The Polysemy Problem, an Important Issue in a <fixed-case>C</fixed-case>hinese to <fixed-case>T</fixed-case>aiwanese <fixed-case>TTS</fixed-case> System - Ming-ShingYu + Ming-ShingYu Yih-JengLin O12-2003 yu-lin-2012-polysemy @@ -396,7 +396,7 @@ International Journal of Computational Linguistics & Chinese Language Processing, Volume 17, Number 2, June 2012—Special Issue on Selected Papers from ROCLING XXIII O12-3 - Liang-ChihYu + Liang-ChihYu Wei-HoTsai June 2012 @@ -407,8 +407,8 @@ Transitivity of a <fixed-case>C</fixed-case>hinese Verb-Result Compound and Affected Argument of the Result Verb - You-shanChung - Keh-JiannChen + You-shanChung + Keh-JiannChen O12-3001 chung-chen-2012-transitivity @@ -426,7 +426,7 @@ Jing-ChenYang Yu-YunChang Yu-WenLiu - Shu-KaiHsieh + Shu-KaiHsieh O12-3003 wang-etal-2012-frequency @@ -441,8 +441,8 @@ International Journal of Computational Linguistics & Chinese Language Processing, Volume 17, Number 3, September 2012 O12-4 - Yuen-HsienTseng - Kuang-huaChen + Yuen-HsienTseng + Kuang-huaChen September 2012 ijclclp @@ -461,8 +461,8 @@ 聲符部件排序與形聲字發音規則探勘 (Phonetic Component Ranking and Pronunciation Rules Discovery for Picto-Phonetic <fixed-case>C</fixed-case>hinese Characters) [In <fixed-case>C</fixed-case>hinese] Chia-HuiChang - Shu-YenLin - Meng-FengTsai + Shu-YenLin + Meng-FengTsai Shu-PingLi Hsiang-MeiLiao Norden E.Huang @@ -471,12 +471,12 @@ Enhancement of Feature Engineering for Conditional Random Field Learning in <fixed-case>C</fixed-case>hinese Word Segmentation Using Unlabeled Data - Mike Tian-JianJiang + Mike Tian-JianJiang Cheng-WeiShih - Ting-HaoYang - Chan-HungKuo - Richard Tzong-HanTsai - Wen-LianHsu + Ting-HaoYang + Chan-HungKuo + Richard Tzong-HanTsai + Wen-LianHsu O12-4003 jiang-etal-2012-enhancement @@ -500,11 +500,11 @@ International Journal of Computational Linguistics & Chinese Language Processing, Volume 17, Number 4, December 2012-Special Issue on Selected Papers from ROCLING XXIV O12-5 - Liang-ChihYu - Richard Tzong-HanTsai + Liang-ChihYu + Richard Tzong-HanTsai Chia-PingChen Cheng-ZenYang - Shu-KaiHsieh + Shu-KaiHsieh December 2012 ijclclp @@ -514,8 +514,8 @@ Detecting and Correcting Syntactic Errors in Machine Translation Using Feature-Based <fixed-case>L</fixed-case>exicalized <fixed-case>T</fixed-case>ree <fixed-case>A</fixed-case>djoining <fixed-case>G</fixed-case>rammars - Wei-YunMa - KathleenMcKeown + Wei-YunMa + KathleenMcKeown O12-5001 ma-mckeown-2012-detecting-correcting @@ -530,7 +530,7 @@ 領域相關詞彙極性分析及文件情緒分類之研究 (Domain Dependent Word Polarity Analysis for Sentiment Classification) [In <fixed-case>C</fixed-case>hinese] Ho-ChengYu - Ting-Hao KennethHuang + Ting-Hao KennethHuang Hsin-HsiChen O12-5003 yu-etal-2012-ling-yu @@ -542,14 +542,14 @@ Tung-JiaChang Chueh-AnYen Chao-JuChen - Shou-deLin + Shou-deLin O12-5004 lin-etal-2012-li-yong-ji 語音辨識使用統計圖等化方法 (Speech Recognition Leveraging Histogram Equalization Methods) [In <fixed-case>C</fixed-case>hinese] Hsin-JuHsieh - Jeih-weihHung + Jeih-weihHung BerlinChen O12-5005 hsieh-etal-2012-yu diff --git a/data/xml/O13.xml b/data/xml/O13.xml index 892ebc6cbc..56c0688ccc 100644 --- a/data/xml/O13.xml +++ b/data/xml/O13.xml @@ -5,7 +5,7 @@ Proceedings of the 25th Conference on Computational Linguistics and Speech Processing (ROCLING 2013) O13-1 Hung-DuenYang - Wen-LianHsu + Wen-LianHsu Chia-PingChen The Association for Computational Linguistics and Chinese Language Processing (ACLCLP)
Kaohsiung, Taiwan
@@ -21,7 +21,7 @@ 改良語句模型技術於節錄式語音摘要之研究 (Improved Sentence Modeling Techniques for Extractive Speech Summarization) [In <fixed-case>C</fixed-case>hinese] Shih-HungLiu Kuan-YuChen - Hsin-MinWang + Hsin-MinWang Wen-LianHsu BerlinChen 5–21 @@ -32,7 +32,7 @@ 分頻式調變頻譜分解於強健性語音辨識 (Sub-band modulation spectrum factorization in robust speech recognition) [In <fixed-case>C</fixed-case>hinese] Hao-tengFan Yi-zhangCai - Jeih-weihHung + Jeih-weihHung 22–36 O13-1002 fan-etal-2013-fen @@ -41,9 +41,9 @@ 使用語音評分技術輔助台語語料的驗證 (Using Speech Assessment Technique for the Validation of <fixed-case>T</fixed-case>aiwanese Speech Corpus) [In <fixed-case>C</fixed-case>hinese] Yu-JheLi Chung-CheWang - Liang-YuChen - Jyh-Shing RogerJang - Ren-YuanLyu + Liang-YuChen + Jyh-Shing RogerJang + Ren-YuanLyu 37–38 O13-1003 li-etal-2013-shi @@ -58,18 +58,18 @@
機器翻譯為本的中文拼字改錯系統 (<fixed-case>C</fixed-case>hinese Spelling Checker Based on Statistical Machine Translation) - Hsun-wenChiu - Jian-chengWu - Jason S.Chang + Hsun-wenChiu + Jian-chengWu + Jason S.Chang 53–55 O13-1005 chiu-etal-2013-ji Detecting <fixed-case>E</fixed-case>nglish Grammatical Errors based on Machine Translation - JimChang - JianchengWu - Jason S.Chang + JimChang + JianchengWu + Jason S.Chang 56–58 O13-1006 chang-etal-2013-detecting @@ -102,7 +102,7 @@ 基於音段式<fixed-case>LMR</fixed-case> 對映之語音轉換方法的改進 (Improving of Segmental <fixed-case>LMR</fixed-case>-Mapping Based Voice Conversion Methods) [In <fixed-case>C</fixed-case>hinese] - Hung-YanGu + Hung-YanGu Jia-WeiChang 90–104 O13-1010 @@ -130,8 +130,8 @@ A Semantic-Based Approach to Noun-Noun Compound Interpretation - You-shanChung - Keh-JiannChen + You-shanChung + Keh-JiannChen 122–123 O13-1013 chung-chen-2013-semantic @@ -148,7 +148,7 @@ 雜訊環境下應用線性估測編碼於特徵時序列之強健性語音辨識 (Employing linear prediction coding in feature time sequences for robust speech recognition in noisy environments) [In <fixed-case>C</fixed-case>hinese] Hao-tengFan Wen-yuTseng - Jeih-weihHung + Jeih-weihHung 139–140 O13-1015 fan-etal-2013-za @@ -197,7 +197,7 @@ 主要漢字形聲字發音規則探勘與視覺化 (Primary <fixed-case>C</fixed-case>hinese Semantic-Phonetic Compounds Pronunciation Rules Mining and Visualization) [In <fixed-case>C</fixed-case>hinese] Chien-HuiHsu - Meng-FengTsai + Meng-FengTsai Chia-HuiChang Hsiang-MeiLiao Shu-PingLi @@ -209,7 +209,7 @@ 語料庫導向之方位短句於固定框架的共現概念統計分析 (A Corpus-driven Pattern Analysis in Locative Phrases: A Statistical Comparison of Co-appearing Concepts in Fixed Frames) [In <fixed-case>C</fixed-case>hinese] August F.Y.Chao - Siaw-FongChung + Siaw-FongChung 198–210 O13-1021 chao-chung-2013-yu @@ -217,7 +217,7 @@ A simple real-word error detection and correction using local word bigram and trigram PratipSamanta - Bidyut B.Chaudhuri + Bidyut B.Chaudhuri 211–220 O13-1022 samanta-chaudhuri-2013-simple @@ -226,7 +226,7 @@ 結合關鍵詞驗證及語者驗證之雲端身份驗證系統 (A Cloud Speaker Authentication System Based on Keyword Verification and Speaker Verification) [In <fixed-case>C</fixed-case>hinese] Yi-ChinChiu Chuan-YenFan - Bor-ShenLin + Bor-ShenLin 221–235 O13-1023 chiu-etal-2013-jie @@ -235,7 +235,7 @@ Causing Emotion in Collocation:An Exploratory Data Analysis Pei-YuLu Yu-YunChang - Shu-KaiHsieh + Shu-KaiHsieh 236–249 O13-1024 lu-etal-2013-causing @@ -243,15 +243,15 @@ Observing Features of <fixed-case>PTT</fixed-case> Neologisms: A Corpus-driven Study with N-gram Model Tsun-JuiLiu - Shu-KaiHsieh - LaurentPrevot + Shu-KaiHsieh + LaurentPrevot 250–259 O13-1025 liu-etal-2013-observing Variability in vowel formant frequencies of children with cerebral palsy - Li-meiChen + Li-meiChen Yung-ChiehLin Wei ChenHsu Fang-hsinLiao @@ -290,8 +290,8 @@ International Journal of Computational Linguistics & Chinese Language Processing, Volume 18, Number 1, March 2013 - Yuen-HsienTseng - Kuang-huaChen + Yuen-HsienTseng + Kuang-huaChen March 2013 rocling @@ -308,9 +308,9 @@ Learning to Find Translations and Transliterations on the Web based on Conditional Random Fields - Joseph Z.Chang - Jason S.Chang - Jyh-Shing RogerJang + Joseph Z.Chang + Jason S.Chang + Jyh-Shing RogerJang O13-2002 chang-etal-2013-learning @@ -323,7 +323,7 @@ Emotion Co-referencing - Emotional Expression, Holder, and Topic DipankarDas - SivajiBandyopadhyay + SivajiBandyopadhyay O13-2004 das-bandyopadhyay-2013-emotion @@ -331,7 +331,7 @@ International Journal of Computational Linguistics & Chinese Language Processing, Volume 18, Number 2, June 2013-Special Issue on Chinese Lexical Resources: Theories and Applications - Shu-KaiHsieh + Shu-KaiHsieh June 2013 rocling @@ -350,15 +350,15 @@ 以中文十億詞語料庫為基礎之兩岸詞彙對比研究 (Cross-Strait Lexical Differences: A Comparative Study based on <fixed-case>C</fixed-case>hinese <fixed-case>G</fixed-case>igaword Corpus) [In <fixed-case>C</fixed-case>hinese] - Jia-FeiHong + Jia-FeiHong Chu-RenHuang O13-3002 hong-huang-2013-yi 基於字典釋義關聯方法的同義詞概念擷取:以《同義詞詞林(擴展版)》為例 (A Definition-based Shared-concept Extraction within Groups of <fixed-case>C</fixed-case>hinese Synonyms: A Study Utilizing the Extended <fixed-case>C</fixed-case>hinese Synonym Forest) [In <fixed-case>C</fixed-case>hinese] - F. Y. AugustChao - Siaw-FongChung + F. Y. AugustChao + Siaw-FongChung O13-3003 chao-chung-2013-ji @@ -398,7 +398,7 @@ Acoustic Correlates of Contrastive Stress in Compound Words versus Verbal Phrase in <fixed-case>M</fixed-case>andarin <fixed-case>C</fixed-case>hinese WeilinShen - JacquelineVaissière + JacquelineVaissière FrédéricIsel O13-4003 shen-etal-2013-acoustic @@ -444,24 +444,24 @@ Integrating Dictionary and Web N-grams for <fixed-case>C</fixed-case>hinese Spell Checking - Jian-chengWu - Hsun-wenChiu - Jason S.Chang + Jian-chengWu + Hsun-wenChiu + Jason S.Chang O13-5002 wu-etal-2013-integrating Correcting Serial Grammatical Errors based on N-grams and Syntax - Jian-chengWu - JimChang - Jason S.Chang + Jian-chengWu + JimChang + Jason S.Chang O13-5003 wu-etal-2013-correcting A Semantic-Based Approach to Noun-Noun Compound Interpretation - You-shanChung - Keh-JiannChen + You-shanChung + Keh-JiannChen O13-5004 chung-chen-2013-semantic-based @@ -477,15 +477,15 @@ 使用語音評分技術輔助台語語料的驗證 (Using Speech Assessment Technique for the Validation of <fixed-case>T</fixed-case>aiwanese Speech Corpus) [In <fixed-case>C</fixed-case>hinese] Yu-JheLi Chung-CheWang - Liang-YuChen - Jyh-Shing RogerJang - Ren-YuanLyu + Liang-YuChen + Jyh-Shing RogerJang + Ren-YuanLyu O13-5006 li-etal-2013-shi-yong 基於音段式<fixed-case>LMR</fixed-case>對映之語音轉換方法的改進 (Improving of Segmental <fixed-case>LMR</fixed-case>-Mapping Based Voice Conversion Method) [In <fixed-case>C</fixed-case>hinese] - Hung-YanGu + Hung-YanGu Jia-WeiChang O13-5007 gu-chang-2013-ji-yu @@ -494,7 +494,7 @@ 雜訊環境下應用線性估測編碼於特徵時序列之強健性語音辨識 (Employing Linear Prediction Coding in Feature Time Sequences for Robust Speech Recognition in Noisy Environments) [In <fixed-case>C</fixed-case>hinese] Hao-tengFan Wen-yuTseng - Jeih-weihHung + Jeih-weihHung O13-5008 fan-etal-2013-za-xun diff --git a/data/xml/O14.xml b/data/xml/O14.xml index 209c712c56..247ffdd906 100644 --- a/data/xml/O14.xml +++ b/data/xml/O14.xml @@ -6,7 +6,7 @@ O14-1 Jing-YangJou Chia-HuiChang - Hsin-MinWang + Hsin-MinWang The Association for Computational Linguistics and Chinese Language Processing (ACLCLP)
Jhongli, Taiwan
October @@ -20,7 +20,7 @@ 運用概念模型化技術於中文大詞彙連續語音辨識之語言模型調適 (Leveraging Concept Modeling Techniques for Language Model Adaptation in <fixed-case>M</fixed-case>andarin Large Vocabulary Continuous Speech Recognition) [In <fixed-case>C</fixed-case>hinese] Po-HanHao - Su-ChengChen + Su-ChengChen BerlinChen 1–2 O14-1001 @@ -33,7 +33,7 @@ Yu-LunHsieh BerlinChen Hsin-MinWang - Wen-LianHsu + Wen-LianHsu 3–20 O14-1002 liu-etal-2014-tan @@ -65,8 +65,8 @@ 中文轉客文文轉音系統中的客語斷詞處理之研究 (Research on <fixed-case>H</fixed-case>akka Word Segmentation Processes in <fixed-case>C</fixed-case>hinese-to-<fixed-case>H</fixed-case>akka <fixed-case>T</fixed-case>ext-to-<fixed-case>S</fixed-case>peech System )[In <fixed-case>C</fixed-case>hinese] Hsin-WeiLin - Feng-LongHuang - Ming-ShingYu + Feng-LongHuang + Ming-ShingYu Yih-JengLin 58–77 O14-1006 @@ -74,7 +74,7 @@ 基於發音知識以建構頻譜<fixed-case>HMM</fixed-case> 之國語語音合成方法 (A <fixed-case>M</fixed-case>andarin Speech Synthesis Method Using Articulation-knowledge Based Spectral <fixed-case>HMM</fixed-case> Structure)[In <fixed-case>C</fixed-case>hinese] - Hung-YanGu + Hung-YanGu Ming-YenLai Wei-SiangHong Yan-HuaChen @@ -85,8 +85,8 @@ Some Prosodic Characteristics of <fixed-case>T</fixed-case>aiwan <fixed-case>E</fixed-case>nglish Accent Chao-yuSu - Chiu-yuTseng - Jyh-ShingRoger Jang + Chiu-yuTseng + Jyh-ShingRoger Jang 89–90 O14-1008 su-etal-2014-prosodic @@ -102,7 +102,7 @@ Public Opinion Toward <fixed-case>CSSTA</fixed-case>: A Text Mining Approach Yi-AnWu - Shu-KaiHsieh + Shu-KaiHsieh 94–95 O14-1010 wu-hsieh-2014-public @@ -111,7 +111,7 @@ Towards automatic enrichment of standardized electronic dictionaries by semantic classes BilelGargouri ImenElleuch - Abdelmajid-LinBen Hamadou + Abdelmajid-LinBen Hamadou 96–109 O14-1011 gargouri-etal-2014-towards @@ -134,7 +134,7 @@ Sketching the Dependency Relations of Words in <fixed-case>C</fixed-case>hinese ShihMeng-Hsien - Shu-KaiHsieh + Shu-KaiHsieh 139–152 O14-1014 shih-hsieh-2014-sketching @@ -151,10 +151,10 @@ 學術論文簡介的自動文步分析與寫作提示 (Automatic Move Analysis of Research Articles for Assisting Writing)[In <fixed-case>C</fixed-case>hinese] Guan-ChengHuang - Jian-ChengWu + Jian-ChengWu Hsiang-LingHsu Tzu-HsiYen - Jason S.Chang + Jason S.Chang 163–164 O14-1016 huang-etal-2014-xue @@ -164,7 +164,7 @@ Jia-GuuLin Jyh-BinShiau ChangEn Pu - Chia-LongWu + Chia-LongWu 165–174 O14-1017 lin-etal-2014-yi @@ -178,7 +178,7 @@ Testing Distributional Hypothesis in Patent Translation - Hsin-HungLin + Hsin-HungLin YvesLepage 185–192 O14-1019 @@ -186,7 +186,7 @@ Spectrum Analysis of Cry Sounds in Preterm and Full-Term Infants - Li-meiChen + Li-meiChen Yu-HsuanYang Chyi-HerLin Yuh-JyhLin @@ -211,8 +211,8 @@ International Journal of Computational Linguistics & Chinese Language Processing, Volume 19, Number 1, March 2014 - Yuen-HsienTseng - Kuang-huaChen + Yuen-HsienTseng + Kuang-huaChen March 2014 ijclclp @@ -224,15 +224,15 @@ A Novel Approach for Handling Unknown Word Problem in <fixed-case>C</fixed-case>hinese-<fixed-case>V</fixed-case>ietnamese Machine Translation PhuocTran - DienDinh + DienDinh O14-2001 tran-dinh-2014-novel Joint Learning of Entity Linking Constraints Using a <fixed-case>M</fixed-case>arkov-<fixed-case>L</fixed-case>ogic Network Hong-JieDai - Richard Tzong-HanTsai - Wen-LianHsu + Richard Tzong-HanTsai + Wen-LianHsu O14-2002 dai-etal-2014-joint @@ -253,8 +253,8 @@ International Journal of Computational Linguistics & Chinese Language Processing, Volume 19, Number 2, June 2014 - Yuen-HsienTseng - Kuang-huaChen + Yuen-HsienTseng + Kuang-huaChen June 2014 ijclclp @@ -265,7 +265,7 @@ Social Metaphor Detection via Topical Analysis - Ting-Hao KennethHuang + Ting-Hao KennethHuang O14-3001 huang-2014-social @@ -284,13 +284,13 @@ Shu-LingHuang Yu-MingHsieh Su-ChuLin - Keh-JiannChen + Keh-JiannChen O14-3003 huang-etal-2014-resolving 不同母語背景華語學習者的用詞特徵:以語料庫為本的研究 (Salient Linguistic Features of <fixed-case>C</fixed-case>hinese Learners with Different L1s: A Corpus-based Study) [In <fixed-case>C</fixed-case>hinese] - Li-pingChang + Li-pingChang O14-3004 chang-2014-bu @@ -298,8 +298,8 @@ International Journal of Computational Linguistics & Chinese Language Processing, Volume 19, Number 3, September 2014 - Yuen-HsienTseng - Kuang-huaChen + Yuen-HsienTseng + Kuang-huaChen September 2014 ijclclp @@ -314,7 +314,7 @@ SachiKato HikariKonishi MizuhoImada - KikuoMaekawa + KikuoMaekawa O14-4001 asahara-etal-2014-bccwj @@ -322,7 +322,7 @@ Transliteration Extraction from Classical <fixed-case>C</fixed-case>hinese Buddhist Literature Using Conditional Random Fields with Language Models Yu-ChunWang Karol Chia-TienChang - Richard Tzong-HanTsai + Richard Tzong-HanTsai JiehHsiang O14-4002 wang-etal-2014-transliteration @@ -339,7 +339,7 @@ International Journal of Computational Linguistics & Chinese Language Processing, Volume 19, Number 4, December 2014 - Special Issue on Selected Papers from ROCLING XXVI - Jen-TzungChien + Jen-TzungChien Hung-YuKao Chia-HuiChang December @@ -360,24 +360,24 @@ Public Opinion Toward <fixed-case>CSSTA</fixed-case>: A Text Mining Approach Yi-AnWu - Shu-KaiHsieh + Shu-KaiHsieh O14-5002 wu-hsieh-2014-public-opinion 學術論文簡介的自動文步分析與寫作提示 (Automatic Move Analysis of Research Articles for Assisting Writing) [In <fixed-case>C</fixed-case>hinese] Guan-ChengHuang - Jian-ChengWu + Jian-ChengWu Hsiang-LingHsu Tzu-HsiYen - Jason S.Chang + Jason S.Chang O14-5003 huang-etal-2014-xue-shu 使用概念資訊於中文大詞彙連續語音辨識之研究 (Exploring Concept Information for <fixed-case>M</fixed-case>andarin Large Vocabulary Continuous Speech Recognition) [In <fixed-case>C</fixed-case>hinese] Po-HanHao - Ssu-ChengChen + Ssu-ChengChen BerlinChen O14-5004 hao-etal-2014-shi @@ -385,14 +385,14 @@ Some Prosodic Characteristics of <fixed-case>T</fixed-case>aiwan <fixed-case>E</fixed-case>nglish Accent Chao-yuSu - Chiu-yuTseng - Jyh-Shing RogerJang + Chiu-yuTseng + Jyh-Shing RogerJang O14-5005 su-etal-2014-prosodic-characteristics Quantitative Assessment of Cry in Term and Preterm Infants: Long-Time Average Spectrum Analysis - Li-meiChen + Li-meiChen O14-5006 chen-2014-quantitative diff --git a/data/xml/O15.xml b/data/xml/O15.xml index d8c076594e..2d9ee20165 100644 --- a/data/xml/O15.xml +++ b/data/xml/O15.xml @@ -5,8 +5,8 @@ Proceedings of the 27th Conference on Computational Linguistics and Speech Processing (ROCLING 2015) O15-1 Sin-HorngChen - Hsin-MinWang - Jen-TzungChien + Hsin-MinWang + Jen-TzungChien The Association for Computational Linguistics and Chinese Language Processing (ACLCLP)
Hsinchu, Taiwan
October @@ -30,7 +30,7 @@
使用詞向量表示與概念資訊於中文大詞彙連續語音辨識之語言模型調適(Exploring Word Embedding and Concept Information for Language Model Adaptation in <fixed-case>M</fixed-case>andarin Large Vocabulary Continuous Speech Recognition) [In <fixed-case>C</fixed-case>hinese] - Ssu-ChengChen + Ssu-ChengChen Kuan-YuChen Hsiao-TsungHung BerlinChen @@ -51,8 +51,8 @@ 以自然語言處理方法研發智慧型客語無聲調拼音輸入法 (Smart Toneless <fixed-case>P</fixed-case>inyin Input Method for <fixed-case>H</fixed-case>akka Based on Natural Language Processing) [In <fixed-case>C</fixed-case>hinese] Hsin-WeiLin - Ming-ShingYu - Feng-LongHuang + Ming-ShingYu + Feng-LongHuang Jiun-WeiWei 27–42 O15-1004 @@ -63,7 +63,7 @@ Chao-LinLiu Chun-NingChang Chu-TingHsu - Wen-HuiCheng + Wen-HuiCheng HongsuWang Wei-YunChiu 43–57 @@ -77,7 +77,7 @@ Chao-ChunLiang Kuang-YiHsu Shen-YunMiao - Wei-YunMa + Wei-YunMa Lun-WenKu Churn-JungLiau Keh-YihSu @@ -108,7 +108,7 @@ 基於貝氏定理自動分析語料庫與標定文步 (A <fixed-case>B</fixed-case>ayesian approach to determine move tags in corpus) [In <fixed-case>C</fixed-case>hinese] Jia-LienHsu Chiung-WenChang - Jason S.Chang + Jason S.Chang 87–99 O15-1009 hsu-etal-2015-ji @@ -130,7 +130,7 @@ Ming-HanYang Hsiao-TsungHung YuwenHsiung - Yao-TingHung + Yao-TingHung BerlinChen 103–120 O15-1011 @@ -138,8 +138,8 @@ 透過語音特徵建構基於堆疊稀疏自編碼器演算法之婚姻治療中夫妻互動行為量表自動化評分系統(Automating Behavior Coding for Distressed Couples Interactions Based on Stacked Sparse Autoencoder Framework using Speech-acoustic Features)[In <fixed-case>C</fixed-case>hinese] - Po-HsuanChen - Chi-ChunLee + Po-HsuanChen + Chi-ChunLee 121–122 O15-1012 chen-lee-2015-tou @@ -186,7 +186,7 @@ 基於 <fixed-case>W</fixed-case>ord2<fixed-case>V</fixed-case>ec 詞向量的網路情緒文和流行音樂媒合方法之研究(Matching <fixed-case>I</fixed-case>nternet Mood Essays with Pop-Music Based on <fixed-case>W</fixed-case>ord2<fixed-case>V</fixed-case>ec)[In <fixed-case>C</fixed-case>hinese] Pin-ChuWen Yi-LinTsai - Tzong-HanTsai + Tzong-HanTsai 167–179 O15-1017 wen-etal-2015-ji @@ -228,7 +228,7 @@ The word complexity measure (<fixed-case>WCM</fixed-case>) in early phonological development: A longitudinal study from birth to three years old - Li-meiChen + Li-meiChen Yi-HsiangLiu 233–247 O15-1022 @@ -252,7 +252,7 @@ 結合<fixed-case>ANN</fixed-case>、全域變異數與真實軌跡挑選之基週軌跡產生方法(A Pitch-contour Generation Method Combining <fixed-case>ANN</fixed-case> Prediction,Global Variance Matching, and Real-contour Selection)[In <fixed-case>C</fixed-case>hinese] - Hung-YanGu + Hung-YanGu Kai-WeiJiang HaoWang 277–288 @@ -263,7 +263,7 @@ 運用<fixed-case>P</fixed-case>ython結合語音辨識及合成技術於自動化音文同步之實作(A Python Implementation of Automatic Speech-text Synchronization Using Speech Recognition and Text-to-Speech Technology)[In <fixed-case>C</fixed-case>hinese] ChunHanLai Chao-KaiChang - Ren-YuanLyu + Ren-YuanLyu 289–305 O15-1026 lai-etal-2015-yun @@ -281,8 +281,8 @@ International Journal of Computational Linguistics & Chinese Language Processing, Volume 20, Number 1, June 2015-Special Issue on Chinese as a Foreign Language Lung-HaoLee - Liang-ChihYu - Li-PingChang + Liang-ChihYu + Li-PingChang June 2015 ijclclp @@ -305,7 +305,7 @@ QiaoZhang ShuiyuanZhang JianpengHou - XueqiCheng + XueqiCheng O15-2002 xiong-etal-2015-hanspeller @@ -319,8 +319,8 @@ Automatically Detecting Syntactic Errors in Sentences Writing by Learners of <fixed-case>C</fixed-case>hinese as a Foreign Language Tao-HsingChang - Yao-TingSung - Jia-FeiHong + Yao-TingSung + Jia-FeiHong O15-2004 chang-etal-2015-automatically @@ -353,8 +353,8 @@ International Journal of Computational Linguistics & Chinese Language Processing, Volume 20, Number 2, December 2015 - Special Issue on Selected Papers from ROCLING XXVII Hung-YuKao - Yih-RuWang - Jen-TzongChien + Yih-RuWang + Jen-TzongChien December 2015 ijclclp @@ -370,7 +370,7 @@ Kuang-YiHsu Chien-TsungHuang Shen-YunMiao - Wei-YunMa + Wei-YunMa Lun-WeiKu Churn-JungLiau Keh-YihSu @@ -397,7 +397,7 @@ Kai-WunShih Kuan-YuChen Shih-HungLiu - Hsin-MinWang + Hsin-MinWang BerlinChen O15-3004 shih-etal-2015-jie @@ -407,15 +407,15 @@ Ting-HaoChang Hsiao-TsungHung Kuan-YuChen - Hsin-MinWang + Hsin-MinWang BerlinChen O15-3005 chang-etal-2015-diao-bian 透過語音特徵建構基於堆疊稀疏自編碼器演算法之婚姻治療中夫妻互動行為量表自動化評分系統 (Automating Behavior Coding for Distressed Couples Interactions Based on Stacked Sparse Autoencoder Framework using Speech-acoustic Features) [In <fixed-case>C</fixed-case>hinese] - Po-HsuanChen - Chi-ChunLee + Po-HsuanChen + Chi-ChunLee O15-3006 chen-lee-2015-tou-guo diff --git a/data/xml/O16.xml b/data/xml/O16.xml index b76f2e8123..9853619d67 100644 --- a/data/xml/O16.xml +++ b/data/xml/O16.xml @@ -5,7 +5,7 @@ Proceedings of the 28th Conference on Computational Linguistics and Speech Processing (ROCLING 2016) O16-1 Chung-HsienWu - Yuen-HsienTseng + Yuen-HsienTseng Hung-YuKao The Association for Computational Linguistics and Chinese Language Processing (ACLCLP)
Tainan, Taiwan
@@ -61,9 +61,9 @@ 「<fixed-case>V</fixed-case>到」結構的合分詞及語意區分(Word segmentation and sense representation for <fixed-case>V</fixed-case>-dao structure in <fixed-case>C</fixed-case>hinese)[In <fixed-case>C</fixed-case>hinese] Shu-LingHuang - Shi-MinLi + Shi-MinLi Ming-HongBai - Jian-ChengWu + Jian-ChengWu Ying-NiWang Qing-LongLin 22–34 @@ -84,7 +84,7 @@ 基於詞語分布均勻度的核心詞彙選擇之研究(A Study on Dispersion Measures for Core Vocabulary Compilation )[In <fixed-case>C</fixed-case>hinese] Ming-HongBai - Jian-ChengWu + Jian-ChengWu Ying-NiChien Shu-LingHuang Ching-LungLin @@ -95,7 +95,7 @@ 什麼時候「認真就輸了」?——語料庫中「認真」一詞的語意變化(Do We Lose When Being Serious? —<fixed-case>C</fixed-case>hange in Meaning of the Word “Renzen(認真)” in Corpora) Pei-YiChen - Siaw-FongChung + Siaw-FongChung 52–81 O16-1008 chen-chung-2016-shi @@ -107,7 +107,7 @@ Chia-ChenLee Shao-ManLee Guan-WeiLi - Shu-KaiHsieh + Shu-KaiHsieh 82–99 O16-1009 huang-etal-2016-crowdsourcing @@ -115,7 +115,7 @@ 基於相依詞向量的剖析結果重估與排序(N-best Parse Rescoring Based on Dependency-Based Word Embeddings) Yu-MingHsieh - Wei-YunMa + Wei-YunMa 100–102 O16-1010 hsieh-ma-2016-ji @@ -134,8 +134,8 @@ Yu-LunHsieh Shih-HungLiu Kuan-YuChen - Hsin-MinWang - Wen-LianHsu + Hsin-MinWang + Wen-LianHsu BerlinChen 115–128 O16-1012 @@ -146,7 +146,7 @@ Kuan-HungChen Shu-HanLiao Yuan-FuLiao - Yih-RuWang + Yih-RuWang 129–130 O16-1013 chen-etal-2016-ji @@ -171,8 +171,8 @@ Speech Intelligibility and the Production of Fricative and Affricate among <fixed-case>M</fixed-case>andarin-speaking Children with Cerebral Palsy - Chin-TingLiu - Li-meiChen + Chin-TingLiu + Li-meiChen Yu-ChingLin Chia-FangCheng Hui-chenChang @@ -183,7 +183,7 @@ 網路新興語言&耍’之語意辨析:以批踢踢語料庫為本(On the semantic analysis of the verb shua3 in <fixed-case>T</fixed-case>aiwan <fixed-case>M</fixed-case>andarin: The <fixed-case>PTT</fixed-case> corpus-based study)[In <fixed-case>C</fixed-case>hinese] Hsueh-yingHu - Siaw-FongChung + Siaw-FongChung 164–180 O16-1017 hu-chung-2016-wang @@ -209,7 +209,7 @@ Computing Sentiment Scores of Verb Phrases for <fixed-case>V</fixed-case>ietnamese Thien KhaiTran - Tuoi ThiPhan + Tuoi ThiPhan 204–213 O16-1020 tran-phan-2016-computing @@ -243,7 +243,7 @@ 基於深層類神經網路及表示學習技術之文件可讀性分類(Classification of Text Readability Based on Deep Neural Network and Representation Learning Techniques)[In <fixed-case>C</fixed-case>hinese] Hou-ChiangTseng Hsiao-TsungHung - Yao-TingSung + Yao-TingSung BerlinChen 255–270 O16-1024 @@ -255,7 +255,7 @@ Jheng-HuaHuang Rui-JiaZhong Liang-PuChen - Ping-CheYang + Ping-CheYang 271–283 O16-1025 hung-etal-2016-ming @@ -270,7 +270,7 @@ Sarcasm Detection in <fixed-case>C</fixed-case>hinese Using a Crowdsourced Corpus Shih-KaiLin - Shu-KaiHsieh + Shu-KaiHsieh 299–310 O16-1027 lin-hsieh-2016-sarcasm @@ -295,9 +295,9 @@ 中文近義詞的偵測與判別(Detection and Discrimination of <fixed-case>C</fixed-case>hinese Near-synonyms)[In <fixed-case>C</fixed-case>hinese] - Shih-MinLi + Shih-MinLi Ming-HongBai - Jian-ChengWu + Jian-ChengWu Shu-LingHuang Ching-LungLin 342–351 @@ -308,7 +308,7 @@ 構建一個中文國小數學文字問題語料庫(Building a Corpus for Developing the <fixed-case>C</fixed-case>hinese Elementary School Math Word Problem Solver)[In <fixed-case>C</fixed-case>hinese] Shen-YunMiao Su-ChuLin - Wei-YunMa + Wei-YunMa Keh-YihSu 352–371 O16-1031 @@ -326,7 +326,7 @@ 基於多模態主動式學習法進行需備標記樣本之挑選用於候用校長評鑑之自動化評分系統建置(A Multimodal Active Learning Approach toward Identifying Samples to Label during the Development of Automatic Oral Presentation Assessment System for Pre-service Principals Certification Program)[In <fixed-case>C</fixed-case>hinese] Hung-ChingSun - Chi-ChunLee + Chi-ChunLee 387–401 O16-1033 sun-lee-2016-ji @@ -345,8 +345,8 @@ International Journal of Computational Linguistics & Chinese Language Processing, Volume 21, Number 1, June 2016 - Yuen-HsienTseng - Kuang-HuaChen + Yuen-HsienTseng + Kuang-HuaChen June 2016 ijclclp @@ -369,7 +369,7 @@ Yung-ChunChang Chun-HanChu Chien ChinChen - Wen-LianHsu + Wen-LianHsu O16-2002 chang-etal-2016-linguistic @@ -378,7 +378,7 @@ Yu-YangHuang RuiYan Tsung-TingKuo - Shou-DeLin + Shou-DeLin O16-2003 huang-etal-2016-enriching @@ -393,8 +393,8 @@ International Journal of Computational Linguistics & Chinese Language Processing, Volume 21, Number 2, December 2016 - Yuen-HsienTseng - Jen-TzungChien + Yuen-HsienTseng + Jen-TzungChien December 2016 ijclclp @@ -406,7 +406,7 @@ 基於詞語分布均勻度的核心詞彙選擇 (A Study on Dispersion Measures for Core Vocabulary Compilation) [In <fixed-case>C</fixed-case>hinese] Ming-HongBai - Jian-ChengWu + Jian-ChengWu Ying-NiChien Shu-LingHuang Ching-LungLin @@ -416,7 +416,7 @@ N-best Rescoring for Parsing Based on Dependency-Based Word Embeddings Yu-MingHsieh - Wei-YunMa + Wei-YunMa O16-3002 hsieh-ma-2016-n @@ -445,7 +445,7 @@ Kuan-HungChen Shu-HanLiao Yuan-FuLiao - Yih-RuWang + Yih-RuWang O16-3005 chen-etal-2016-ji-yu-zi diff --git a/data/xml/O17.xml b/data/xml/O17.xml index f3d6070a1b..9030efd6cd 100644 --- a/data/xml/O17.xml +++ b/data/xml/O17.xml @@ -54,7 +54,7 @@ A Novel Trajectory-based Spatial-Temporal Spectral Features for Speech Emotion Recognition Chun-MinChang Wei-ChengLin - Chi-ChunLee + Chi-ChunLee 52–52 O17-1005 chang-etal-2017-novel @@ -72,14 +72,14 @@ Exploring Lavender Tongue from Social Media Texts[In <fixed-case>C</fixed-case>hinese] Hsiao-HanWu - Shu-KaiHsieh + Shu-KaiHsieh 68–80 O17-1007 wu-hsieh-2017-exploring 手機平台 <fixed-case>APP</fixed-case> 之四縣客語輸入法的研發 (Research and Implementation of <fixed-case>S</fixed-case>ixian <fixed-case>H</fixed-case>akka <fixed-case>P</fixed-case>inyin Input Method for Mobile Cell <fixed-case>APP</fixed-case>) [In <fixed-case>C</fixed-case>hinese] - Feng-LongHuang + Feng-LongHuang Kuei-SenLiu Sheng-YiTseng 81–100 @@ -91,7 +91,7 @@ Shih-KuangLee Syu-SiangWang YuTsao - Jeih-weihHung + Jeih-weihHung 101–113 O17-1009 lee-etal-2017-duo @@ -101,7 +101,7 @@ Yu-DingLu Hung-ShinLee YuTsao - Hsin-MinWang + Hsin-MinWang 114–115 O17-1010 lu-etal-2017-ji @@ -109,7 +109,7 @@ 探究不同領域文件之可讀性分析 (Exploring Readability Analysis on Multi-Domain Texts) [In <fixed-case>C</fixed-case>hinese] Hou-ChiangTseng - Yao-TingSung + Yao-TingSung BerlinChen 116–118 O17-1011 @@ -119,8 +119,8 @@ 基於i-vector與<fixed-case>PLDA</fixed-case>並使用<fixed-case>GMM</fixed-case>-<fixed-case>HMM</fixed-case>強制對位之自動語者分段標記系統 (Speaker Diarization based on <fixed-case>I</fixed-case>-vector <fixed-case>PLDA</fixed-case> Scoring and using <fixed-case>GMM</fixed-case>-<fixed-case>HMM</fixed-case> Forced Alignment) [In <fixed-case>C</fixed-case>hinese] Cheng-Jo RayChang Hung-ShinLee - Hsin-MinWang - Jyh-Shing RogerJang + Hsin-MinWang + Jyh-Shing RogerJang 119–135 O17-1012 chang-etal-2017-ji @@ -131,7 +131,7 @@ Chun-MinChang Yu-ShuoLiu Shiuan-KaiKao - Chi-ChunLee + Chi-ChunLee 136–147 O17-1013 chou-etal-2017-amplifying @@ -139,9 +139,9 @@ Question Retrieval with Distributed Representations and Participant Reputation in Community Question Answering SamWeng - Kevin Chun-KaiWu + Kevin Chun-KaiWu Yu-ChunWang - Richard Tzong-HanTsai + Richard Tzong-HanTsai 148–148 O17-1014 weng-etal-2017-question @@ -152,7 +152,7 @@ Ying-WenChen BerlinChen Kuan-YuChen - Hsin-MinWang + Hsin-MinWang 149–151 O17-1015 lo-etal-2017-shi @@ -160,7 +160,7 @@ Toward Contextual Valence Shifters in <fixed-case>V</fixed-case>ietnamese Reviews Thien KhaiTran - Tuoi ThiPhan + Tuoi ThiPhan 152–159 O17-1016 tran-phan-2017-toward @@ -213,7 +213,7 @@ 應用詞向量於語言樣式探勘之研究 (Mining Language Patterns Using Word Embeddings) [In <fixed-case>C</fixed-case>hinese] XiangXiao Shao-ZhenYe - Liang-ChihYu + Liang-ChihYu K. RobertLai 230–243 O17-1022 @@ -239,7 +239,7 @@ <fixed-case>SUT</fixed-case> System Description for Anti-Spoofing 2017 Challenge MohammadAdiban - HosseinSameti + HosseinSameti NoushinMaghsoodi SajjadShahsavari 264–275 @@ -249,7 +249,7 @@ <fixed-case>SUT</fixed-case> Submission for <fixed-case>NIST</fixed-case> 2016 Speaker Recognition Evaluation: Description and Analysis HosseinZeinali - HosseinSameti + HosseinSameti NoushinMaghsoodi 276–286 O17-1026 @@ -295,7 +295,7 @@ YuTsao Ying-HuiLai Hsiang-PingHsu - Chia-LungWu + Chia-LungWu 323–331 O17-1030 wang-etal-2017-yi-ruan @@ -324,8 +324,8 @@ International Journal of Computational Linguistics & Chinese Language Processing, Volume 22, Number 1, June 2017 - Yuen-HsienTseng - Jen-TzungChien + Yuen-HsienTseng + Jen-TzungChien June 2017 ijclclp @@ -340,15 +340,15 @@ Kuan-YuChen Kai-WunShih BerlinChen - Hsin-MinWang - Wen-LianHsu + Hsin-MinWang + Wen-LianHsu O17-2001 liu-etal-2017-dang 反義詞「多」和「少」在數量名結構中的不對稱現象--以語料庫為本的分析 (The Asymmetric Occurences of <i>Dou1</i> and <i>Shao3</i> in the [Numeral + Measure Word/Classifier + Noun] Construction: A Corpus-based Analysis) [In <fixed-case>C</fixed-case>hinese] Wei-YuChen - Siaw-FongChung + Siaw-FongChung O17-2002 chen-chung-2017-fan @@ -365,7 +365,7 @@ International Journal of Computational Linguistics & Chinese Language Processing, Volume 22, Number 2, December 2017-Special Issue on Selected Papers from ROCLING XXIX - Chi-Chun (Jeremy)Lee + Chi-Chun (Jeremy)Lee Cheng-ZenYang December 2017 @@ -387,7 +387,7 @@ Tien-HongLo Ying-WenChen Kuan-YuChen - Hsin-MinWang + Hsin-MinWang BerlinChen O17-3002 lo-etal-2017-yu @@ -395,9 +395,9 @@ Question Retrieval with Distributed Representations and Participant Reputation in Community Question Answering SamWeng - Chun-KaiWu + Chun-KaiWu Yu-ChunWang - Richard Tzong-HanTsai + Richard Tzong-HanTsai O17-3003 weng-etal-2017-question-retrieval @@ -405,7 +405,7 @@ 探究使用基於類神經網路之特徵於文本可讀性分類 (Exploring the Use of Neural Network based Features for Text Readability Classification) [In <fixed-case>C</fixed-case>hinese] Hou-ChiangTseng BerlinChen - Yao-TingSung + Yao-TingSung O17-3004 tseng-etal-2017-tan-jiu @@ -419,12 +419,12 @@ 基於鑑別式自編碼解碼器之錄音回放攻擊偵測系統 (A Replay Spoofing Detection System Based on Discriminative Autoencoders) [In <fixed-case>C</fixed-case>hinese] - Chia-LungWu + Chia-LungWu Hsiang-PingHsu Yu-DingLu YuTsao Hung-ShinLee - Hsin-MinWang + Hsin-MinWang O17-3006 wu-etal-2017-ji diff --git a/data/xml/O18.xml b/data/xml/O18.xml index bfa04542bc..816ff76bce 100644 --- a/data/xml/O18.xml +++ b/data/xml/O18.xml @@ -4,9 +4,9 @@ Proceedings of the 30th Conference on Computational Linguistics and Speech Processing (ROCLING 2018) O18-1 - Chi-Chun (Jeremy)Lee + Chi-Chun (Jeremy)Lee Cheng-ZenYang - Jen-TzungChien + Jen-TzungChien The Association for Computational Linguistics and Chinese Language Processing (ACLCLP)
Hsinchu, Taiwan
October @@ -20,7 +20,7 @@ 基於數字文本相關之語者驗證系統的研究與實作 (Study and Implementation on Digit-related Speaker Verification) [In <fixed-case>C</fixed-case>hinese] Chung-HungChou - Jyh-Shing RogerJang + Jyh-Shing RogerJang Shan-WenHsiao 1–15 O18-1001 @@ -30,7 +30,7 @@ Isolated and Ensemble Audio Preprocessing Methods for Detecting Adversarial Examples against Automatic Speech Recognition KrishanRajaratnam KunalShah - JugalKalita + JugalKalita 16–30 O18-1002 rajaratnam-etal-2018-isolated @@ -38,7 +38,7 @@ 使用性別資訊於語者驗證系統之研究與實作 (A study and implementation on Speaker Verification System using Gender Information) [In <fixed-case>C</fixed-case>hinese] Yu-JuiSu - Jyh-Shing RogerJang + Jyh-Shing RogerJang Po-ChengChan 31–45 O18-1003 @@ -58,7 +58,7 @@ 繁體中文依存句法剖析器 (Traditional <fixed-case>C</fixed-case>hinese Dependency Parser) [In <fixed-case>C</fixed-case>hinese] Yen-HsuanLee - Yih-RuWang + Yih-RuWang 61–75 O18-1005 lee-wang-2018-fan @@ -97,7 +97,7 @@ Chen-ChouLo Hsin-TeHwang YuTsao - Hsin-MinWang + Hsin-MinWang 96–110 O18-1009 huang-etal-2018-wavenet @@ -115,7 +115,7 @@ 使用長短期記憶類神經網路建構中文語音辨識器之研究 (A study on <fixed-case>M</fixed-case>andarin speech recognition using Long Short-Term Memory neural network) [In <fixed-case>C</fixed-case>hinese] Chien-hungLai - Yih-RuWang + Yih-RuWang 114–115 O18-1011 lai-wang-2018-shi @@ -124,7 +124,7 @@ 探索結合快速文本及卷積神經網路於可讀性模型之建立 (Exploring Combination of <fixed-case>F</fixed-case>ast<fixed-case>T</fixed-case>ext and Convolutional Neural Networks for Building Readability Models) [In <fixed-case>C</fixed-case>hinese] Hou-ChiangTseng BerlinChen - Yao-TingSung + Yao-TingSung 116–125 O18-1012 tseng-etal-2018-tan @@ -147,7 +147,7 @@ 智慧手機客語拼音輸入法之研發-以臺灣海陸腔為例 (Research and Implementation of <fixed-case>H</fixed-case>akka <fixed-case>P</fixed-case>inyin Input Method for Mobile Cell - An Example of <fixed-case>T</fixed-case>aiwan <fixed-case>H</fixed-case>io<fixed-case>L</fixed-case>iuk Accent) [In <fixed-case>C</fixed-case>hinese] - Feng-LongHuang + Feng-LongHuang Ming-ChanLiu 142–156 O18-1015 @@ -156,17 +156,17 @@ 以深層類神經網路標記中文階層式多標籤語意概念 (Hierarchical Multi-Label <fixed-case>C</fixed-case>hinese Word Semantic Labeling using Deep Neural Network ) [In <fixed-case>C</fixed-case>hinese] Wei-ChiehChou - Yih-RuWang + Yih-RuWang 157–157 O18-1016 chou-wang-2018-yi <fixed-case>LENA</fixed-case> computerized automatic analysis of speech development from birth to three - Li-MeiChen + Li-MeiChen D. KimbroughOller Chia-ChengLee - Chin-Ting JimboLiu + Chin-Ting JimboLiu 158–168 O18-1017 chen-etal-2018-lena @@ -260,7 +260,7 @@ Shih-YingChang Tsu-JinChiu Ming-ChiaoTsai - Jason S.Chang + Jason S.Chang 276–285 O18-1027 chen-etal-2018-jie diff --git a/data/xml/O88.xml b/data/xml/O88.xml index a5af2b066e..5dcf40e2a6 100644 --- a/data/xml/O88.xml +++ b/data/xml/O88.xml @@ -4,7 +4,7 @@ Proceedings of Rocling I Computational Linguistics Conference I O88-1 - Keh-JiannChen + Keh-JiannChen Chu-RenHuang The Association for Computational Linguistics and Chinese Language Processing (ACLCLP)
Nantou, Taiwan
@@ -25,7 +25,7 @@
漢語的時間詞組和語言剖析 (The Temporal Expressions of <fixed-case>M</fixed-case>andarin and Language Parsing) [In <fixed-case>C</fixed-case>hinese] - Li-pingChang + Li-pingChang 73–86 O88-1002 chang-1988-han @@ -48,7 +48,7 @@ A New Approach to Quality Text Generation - Jason S.Chang + Jason S.Chang Hwei-MingKou 163–177 O88-1005 @@ -65,9 +65,9 @@ The Parsing Environment for <fixed-case>M</fixed-case>andarin Syntax I-PengLin - Shuan-fanHuang + Shuan-fanHuang Hsin-HsiChen - Ka-WaiChui + Ka-WaiChui 211-214 O88-1007 lin-etal-1988-parsing diff --git a/data/xml/O89.xml b/data/xml/O89.xml index 3e36f4a100..d16cb9d9a0 100644 --- a/data/xml/O89.xml +++ b/data/xml/O89.xml @@ -5,7 +5,7 @@ Proceedings of Rocling II Computational Linguistics Conference II O89-1 Chu-RenHuang - Keh-JiannChen + Keh-JiannChen The Association for Computational Linguistics and Chinese Language Processing (ACLCLP)
Nantou, Taiwan
September @@ -19,7 +19,7 @@ The Identification Of Thematic Roles In Parsing <fixed-case>M</fixed-case>andarin <fixed-case>C</fixed-case>hinese Keh-jiannChen Chu-RenHuang - Li-pingChang + Li-pingChang 123–145 O89-1001 chen-etal-1989-identification @@ -35,9 +35,9 @@
<fixed-case>NTUMT</fixed-case> Strategy for Prepositional Phrase Attachment - Ka-WaiChui + Ka-WaiChui Yia-pingLin - Shuan-FanHuang + Shuan-FanHuang I-PengLin 163–186 O89-1003 @@ -46,7 +46,7 @@ Systemic Generation of <fixed-case>C</fixed-case>hinese Sentences Hwei-MingKuo - Jason S.Chang + Jason S.Chang 189–212 O89-1004 kuo-chang-1989-systemic @@ -77,7 +77,7 @@ Parsing <fixed-case>E</fixed-case>nglish Conjunctions And Comparatives Using The Wait-And-See Strategy Rey-LongLiu - Von-WunSoo + Von-WunSoo 291–310 O89-1008 liu-soo-1989-parsing diff --git a/data/xml/O90.xml b/data/xml/O90.xml index 09c79f96a3..a88918d696 100644 --- a/data/xml/O90.xml +++ b/data/xml/O90.xml @@ -4,8 +4,8 @@ Proceedings of Rocling III Computational Linguistics Conference III O90-1 - Jason J.Chang - Von-WunSoo + Jason J.Chang + Von-WunSoo The Association for Computational Linguistics and Chinese Language Processing (ACLCLP)
Taipei, Taiwan
September @@ -63,7 +63,7 @@ 詞彙訊息的層次表達與管理 (Hierarchical Representation of Word Information and Management) [In <fixed-case>C</fixed-case>hinese] Lee-FengChien - Keh-JiannChen + Keh-JiannChen 297–310 O90-1006 chien-chen-1990-ci @@ -95,7 +95,7 @@ An application of statistical optimization with dynamic programming to phonemic-input-to-character conversion for <fixed-case>C</fixed-case>hinese - RichardSproat + RichardSproat 379–390 O90-1010 sproat-1990-application diff --git a/data/xml/O91.xml b/data/xml/O91.xml index fc7b27214c..0ce975d27b 100644 --- a/data/xml/O91.xml +++ b/data/xml/O91.xml @@ -17,7 +17,7 @@ 連接詞的語法表達模式-以中文訊息格位語法(<fixed-case>ICG</fixed-case>)為本的表達形式 (The Grammar Representation of Conjunctions – a Representation Based on <fixed-case>ICG</fixed-case>) [In <fixed-case>C</fixed-case>hinese] Wen-JenWei - Keh-JiannChen + Keh-JiannChen 79–95 O91-1001 wei-chen-1991-lian @@ -34,7 +34,7 @@ Determinative-Measure Compounds in <fixed-case>M</fixed-case>andarin <fixed-case>C</fixed-case>hinese Formation Rules and Parser Implementation Ruo-pingMo Yao-JungYang - Keh-JiannChen + Keh-JiannChen Chu-RenHuang 111–134 O91-1003 @@ -42,9 +42,9 @@ 限制式滿足及機率最佳化的中文斷詞方法 (<fixed-case>C</fixed-case>hinese Word Segmentation based on Constraint satisfaction and Statistical Optimization) [In <fixed-case>C</fixed-case>hinese] - Jason S.Chang + Jason S.Chang Zhi-DaChen - Shun-DerChen + Shun-DerChen 147–165 O91-1004 chang-etal-1991-xian @@ -59,27 +59,27 @@ Lexicon-Driven Transfer In <fixed-case>E</fixed-case>nglish-<fixed-case>C</fixed-case>hinese Machine Translation Chung-TengSun - Jyun-ShengChang + Jyun-ShengChang 193–214 O91-1006 sun-chang-1991-lexicon Automatic <fixed-case>C</fixed-case>hinese Text Generation Based On Inference Trees - Hing-LungLin - Benjamin K.T’sou - Hing-CheungHo - Bong-YeungLai - Suen CaesarLun + Hing-LungLin + Benjamin K.T’sou + Hing-CheungHo + Bong-YeungLai + Suen CaesarLun Chi-YuenChoi - Chun-yuKit + Chun-yuKit 215–236 O91-1007 lin-etal-1991-automatic A Trace & Unification Grammar For <fixed-case>C</fixed-case>hinese - Hans UlrichBlock + Hans UlrichBlock PingPeng 237–255 O91-1008 @@ -88,7 +88,7 @@ Constructing A Phrase Structure Grammar By Incorporating Linguistic Knowledge And Statistical Log-Likelihood Ratio Keh-YihSu - Yu-LingHsu + Yu-LingHsu ClaireSaillard 257–275 O91-1009 @@ -99,7 +99,7 @@ Hsien-ChinLiou Hui-LiHsu Yong-ChangHuang - Von-WunSoo + Von-WunSoo 277–302 O91-1010 liou-etal-1991-development @@ -107,7 +107,7 @@ Training A Recurrent Neural Network to Parse Syntactically Ambiguous and Ill-Formed Sentences Ssu-LiangLin - Von-WunSoo + Von-WunSoo 303–317 O91-1011 lin-soo-1991-training diff --git a/data/xml/O92.xml b/data/xml/O92.xml index 1cf153dc06..13d1bfd963 100644 --- a/data/xml/O92.xml +++ b/data/xml/O92.xml @@ -17,7 +17,7 @@ Discrimination Oriented Probabilistic Tagging Yi-ChungLin - Tung-HuiChiang + Tung-HuiChiang Keh-YihSu 87–96 O92-1001 @@ -26,14 +26,14 @@ Acquisition of Unbounded Dependency Using Explanation-Based Learning Rey-LongLiu - Von-WunSoo + Von-WunSoo 99–119 O92-1002 liu-soo-1992-acquisition Statistical Models for Word Segmentation And Unknown Word Resolution - Tung-HuiChiang + Tung-HuiChiang Jing-ShinChang Ming-YuLin Keh-YihSu @@ -44,7 +44,7 @@ A Modular and Statistical Approach to Machine Translation Dah-YihWang - Jyun-ShengChang + Jyun-ShengChang 149-175 O92-1004 wang-chang-1992-modular @@ -54,7 +54,7 @@ Marie MeiliYeh Chih-ChenTang Chu-RenHuang - Keh-JiannChen + Keh-JiannChen 179–193 O92-1005 yeh-etal-1992-han @@ -69,10 +69,10 @@ Reduplication In <fixed-case>M</fixed-case>andarin <fixed-case>C</fixed-case>hinese: Their Formation Rules, Syntactic Behavior And <fixed-case>ICG</fixed-case> Representation - Feng-yiChen + Feng-yiChen Ruo-pingMo Chu-RenHuang - Keh-JiannChen + Keh-JiannChen 217–233 O92-1007 chen-etal-1992-reduplication diff --git a/data/xml/O93.xml b/data/xml/O93.xml index f562ef8312..fba662665f 100644 --- a/data/xml/O93.xml +++ b/data/xml/O93.xml @@ -4,7 +4,7 @@ Proceedings of Rocling VI Computational Linguistics Conference VI O93-1 - Keh-JiannChen + Keh-JiannChen Chu-RenHuang The Association for Computational Linguistics and Chinese Language Processing (ACLCLP)
Nantou, Taiwan
@@ -18,7 +18,7 @@ Automatic Clustering of <fixed-case>C</fixed-case>hinese Characters and Words Chao-HuangChang - Cheng-DerChen + Cheng-DerChen 57–78 O93-1001 chang-chen-1993-automatic @@ -33,7 +33,7 @@ A Probabilistic Chunker - Kuang-huaChen + Kuang-huaChen Hsin-HsiChen 99-117 O93-1003 @@ -42,7 +42,7 @@ A Preliminary Study On Unknown Word Problem In <fixed-case>C</fixed-case>hinese Word Segmentation Ming-YuLin - Tung-HuiChiang + Tung-HuiChiang Keh-YihSu 119–141 O93-1004 @@ -53,7 +53,7 @@ Sung-ChenLin Lee-FengChien Keh-JiannChen - Lin-ShanLee + Lin-ShanLee 143–160 O93-1005 lin-etal-1993-guo @@ -61,7 +61,7 @@ Corpus-based Automatic Rule Selection in Designing a Grammar Checker Yuan-LingLiu - Shih-pingWang + Shih-pingWang Keh-YihSu 161–171 O93-1006 @@ -70,16 +70,16 @@ 中文辭彙岐義之研究─斷詞與詞性標示 (The Resolution of Lexicon Ambiguity in <fixed-case>C</fixed-case>hinese - Segmentation and Tagging) [In <fixed-case>C</fixed-case>hinese] Tsai-YenPeng - Jason S.Chang + Jason S.Chang 173–193 O93-1007 peng-chang-1993-zhong 從中文語料庫中自動選取連續國語語音特性平衡句的方法 (Automatic Selection of Phonetically Rich Sentences from A <fixed-case>C</fixed-case>hinese Text Corpus) [In <fixed-case>C</fixed-case>hinese] - Hsin-MinWang + Hsin-MinWang Yuan-ChengChang - Lin-ShanLee + Lin-ShanLee 195–206 O93-1008 wang-etal-1993-cong @@ -94,7 +94,7 @@ 中文文件自動分類之研究 (A Study of Document Auto-Classification in <fixed-case>M</fixed-case>andarin <fixed-case>C</fixed-case>hinese) [In <fixed-case>C</fixed-case>hinese] - Un-GianIunn + Un-GianIunn Ching-CyunHsien Shu-MeiChen Keh-JiannChen @@ -120,7 +120,7 @@ <fixed-case>FAWRMT</fixed-case>: With Special Emphasis On Grammar Designs And Partitioned Parsing Andy Wong ManHon - Suen CaesarLun + Suen CaesarLun 235–258 O93-2001 hon-lun-1993-fawrmt @@ -128,7 +128,7 @@ Toward Discourse-guided Theta-grid Chart Parsing for Madarin <fixed-case>C</fixed-case>hinese – A Preliminary Report Koong H. C.Lin - Von-WunSoo + Von-WunSoo 259–270 O93-2002 lin-soo-1993-toward @@ -137,8 +137,8 @@ Developing a <fixed-case>C</fixed-case>hinese Module in <fixed-case>UNITRAN</fixed-case> ZhibiaoWu Loke SooHsu - MarthaPalmer - Chew LimTan + MarthaPalmer + Chew LimTan 271–284 O93-2003 wu-etal-1993-developing diff --git a/data/xml/O94.xml b/data/xml/O94.xml index a0a740f947..78551feb94 100644 --- a/data/xml/O94.xml +++ b/data/xml/O94.xml @@ -18,16 +18,16 @@ Yuan-ChengChang Sung-ChenLin Lee-FengChien - Keh-JiannChen - Lin-ShanLee + Keh-JiannChen + Lin-ShanLee 17–34 chang-etal-1994-guo Yanhui (宴會), a Softwre Based High Performance <fixed-case>M</fixed-case>andarin Text-To-Speech System JohnChoi - Hsiao-WuenHon - Jean-LucLebrun + Hsiao-WuenHon + Jean-LucLebrun Sun-PinLee GarethLoudon Viet-HoangPhan @@ -52,9 +52,9 @@ A Practical Tagger for <fixed-case>C</fixed-case>hinese Corpora - Keh-jiannChen + Keh-jiannChen Shing-HuanLiu - Li-pingChang + Li-pingChang Yeh-HaoChin 111-126 O94-1005 @@ -62,14 +62,14 @@ Automatic Terminology Extraction For Thematic Corpus Based On Subterm Co-Occurrence - Chun-yuKit + Chun-yuKit 127–134 O94-1006 kit-1994-automatic An Estimation of the Entropy of <fixed-case>C</fixed-case>hinese – A New Approach to Constructing Class-based n-gram Models - Jyun-shengChang + Jyun-shengChang Yuh-JuhLin 149–169 O94-1007 @@ -77,7 +77,7 @@ Some Issues on Applying <fixed-case>SA</fixed-case>-class Bigram Language Models - Chun-JenLee + Chun-JenLee Keh-HwaShyu Eng-FongHuang Bor-ShennJeng @@ -87,7 +87,7 @@ A Text Conversion System Between Simplified and Complex <fixed-case>C</fixed-case>hinese Characters Based on <fixed-case>OCR</fixed-case> Approaches - Chun-JenLee + Chun-JenLee Keh-HwaShyu Eng-FongHuang Bor-ShennJeng @@ -133,7 +133,7 @@ 使用新式注音鍵盤及複合馬可夫語言模型之中文輸入系統 (A <fixed-case>C</fixed-case>hinese-character Inputting System Using a New Type of Phonetic Keyboard and a Compound <fixed-case>M</fixed-case>arkov Language Model) [In <fixed-case>C</fixed-case>hinese] - Hung-yanGu + Hung-yanGu Jr-yiauChen 253–262 O94-2002 @@ -148,7 +148,7 @@ <fixed-case>C</fixed-case>hinese-Word Segmentation Based On Maximal-Matching And Bigram Techniques - Wing-PongLuk + Wing-PongLuk 273–282 O94-2004 luk-1994-chinese diff --git a/data/xml/O95.xml b/data/xml/O95.xml index cee1198166..7ebab9f815 100644 --- a/data/xml/O95.xml +++ b/data/xml/O95.xml @@ -25,14 +25,14 @@ 適合大量中文文件全文檢索的索引及資料壓縮技術 (Full-text Indexing and Data Compression for <fixed-case>C</fixed-case>hinese Documents) [In <fixed-case>C</fixed-case>hinese] Lee-FengChien - Hung-yanGu + Hung-yanGu 31–42 O95-1002 chien-gu-1995-shi The New Generation <fixed-case>B</fixed-case>ehavior<fixed-case>T</fixed-case>ran: Design Philosophy And System Architecture - Yu-Ling UnaHsu + Yu-Ling UnaHsu Keh-YihSu 65–79 O95-1003 @@ -59,7 +59,7 @@ WenjieLi HaihuaPan MingZhou - Kam-FaiWong + Kam-FaiWong VincentLum 137–153 O95-1006 @@ -74,7 +74,7 @@ A Unifying Approach To Segmentation Of <fixed-case>C</fixed-case>hinese And Its Application To Text Retrieval - Jian-YunNie + Jian-YunNie XiaoboRen MartinBrisebois 175–190 diff --git a/data/xml/O96.xml b/data/xml/O96.xml index a75a21e86d..440389e66d 100644 --- a/data/xml/O96.xml +++ b/data/xml/O96.xml @@ -18,16 +18,16 @@ 中文連音二字詞之語音合成 (Coarticulation of Two-Syllable Words in <fixed-case>M</fixed-case>andarin Speech Synthesis) [In <fixed-case>C</fixed-case>hinese] Jun-WenHwang - Ming-ShingYu + Ming-ShingYu Shyh-YangHwang - Ming-JerWu + Ming-JerWu 37–60 O96-1001 hwang-etal-1996-zhong 時間比例基週波形內差 – 一個國語音節信號合成之新方法 (Time-Proportionated Interpolation of Pitch Waveforms – A New Method for <fixed-case>M</fixed-case>andarin Syllable-Signal Synthesis) [In <fixed-case>C</fixed-case>hinese] - Hung-yanGu + Hung-yanGu Wen-lungShiu 61–84 O96-1002 @@ -70,8 +70,8 @@ Sung-ChenLin Jyi-LungTsai Lee-FengChien - Keh-JiannChen - Lin-ShanLee + Keh-JiannChen + Lin-ShanLee 159–182 O96-1006 lin-etal-1996-guo @@ -87,15 +87,15 @@ A Preliminary Study of Disambiguating <fixed-case>VO</fixed-case>-and <fixed-case>VN</fixed-case>-Constructions Using Selection Preferences - Kok-WeeGan + Kok-WeeGan 233–253 O96-1008 gan-1996-preliminary 語料庫在辭典編輯上的運用 (The Application of Language Corpus on Dictionary Editing) [In <fixed-case>C</fixed-case>hinese] - Li-LiChang - Keh-JiannChen + Li-LiChang + Keh-JiannChen Chu-RenHuang 255–279 O96-1009 @@ -103,18 +103,18 @@ 語料庫為本的語義訊息抽取與辨析以近義詞研究為例 (Synonym Discrimination Based on Corpus) [In <fixed-case>C</fixed-case>hinese] - Mei-ChihTsai + Mei-ChihTsai Chu-RenHuang - Keh-JiannChen + Keh-JiannChen 281–293 O96-1010 tsai-etal-1996-yu 介詞翻譯法則的自動擷取 (Learning to Translate <fixed-case>E</fixed-case>nglish Prepositions) [In <fixed-case>C</fixed-case>hinese] - Jason S.Chang + Jason S.Chang Ruei-HungHsu - Huey-ChyunChen + Huey-ChyunChen 295–320 O96-1011 chang-etal-1996-jie @@ -148,7 +148,7 @@ Issues in Text-to-Speech Conversion for <fixed-case>M</fixed-case>andarin ChilinShih - RichardSproat + RichardSproat 37–86 O96-2002 shih-sproat-1996-issues @@ -157,7 +157,7 @@ A <fixed-case>M</fixed-case>andarin Text-to-Speech System Sin-HorngChen Shaw-HwaHwang - Yih-RuWang + Yih-RuWang 87–100 O96-2003 chen-etal-1996-mandarin @@ -165,7 +165,7 @@ An Overview of Corpus-Based Statistics-Oriented (<fixed-case>CBSO</fixed-case>) Techniques for Natural Language Processing Keh-YihSu - Tung-HuiChiang + Tung-HuiChiang Jing-ShinChang 101–158 O96-2004 @@ -173,7 +173,7 @@ A Hybrid Approach to Machine Translation System Design - Kuang-HuaChen + Kuang-HuaChen Hsin-HsiChen 159–182 O96-2005 @@ -181,7 +181,7 @@ A Model for Robust <fixed-case>C</fixed-case>hinese Parser - Keh-JiannChen + Keh-JiannChen 183–204 O96-2006 chen-1996-model diff --git a/data/xml/O97.xml b/data/xml/O97.xml index 10a900b921..5fa274390f 100644 --- a/data/xml/O97.xml +++ b/data/xml/O97.xml @@ -4,9 +4,9 @@ Proceedings of the 10th Research on Computational Linguistics International Conference O97-1 - Keh-JiannChen + Keh-JiannChen Chu-RenHuang - RichardSproat + RichardSproat The Association for Computational Linguistics and Chinese Language Processing (ACLCLP)
Taipei, Taiwan
August @@ -19,7 +19,7 @@ Meaning Representation and Meaning Instantiation for <fixed-case>C</fixed-case>hinese Nominals KathleenAhrens - Li-liChang + Li-liChang Keh-JiannChen Chu-RenHuang 4–18 @@ -36,7 +36,7 @@ Towards a Representation of Verbal Semantics – An Approach Based on Near Synonyms - Mei-chihTsai + Mei-chihTsai Chu-RenHuang Keh-jiannChen KathleenAhrens @@ -48,15 +48,15 @@ Word Sense Disambiguation Based on The Information Theory HoLee Dae-HoBaek - Hae-ChangRim + Hae-ChangRim 49–58 O97-1004 lee-etal-1997-word An Agreement Error Correction Method Based on a Multicriteria Approach: An Application to <fixed-case>A</fixed-case>rabic Language - Belguith HadrichLamia - Ben HamadouAbdelmajid + LamiaBelguith Hadrich + AbdelmajidBen Hamadou AloulouChafik 59–75 O97-1005 @@ -113,16 +113,16 @@ Analyzing the Complexity of a Domain With Respect To An Information Extraction Task AmitBagga - Alan W.Biermann + Alan W.Biermann 175–194 O97-1012 bagga-biermann-1997-analyzing Human Judgment as a Basis for Evaluation of Discourse-Connective-based Full-text Abstraction in <fixed-case>C</fixed-case>hinese - Benjamin K.T’sou - Hing-LungLin - Tom B. Y.Lai + Benjamin K.T’sou + Hing-LungLin + Tom B. Y.Lai 195–208 O97-1013 tsou-etal-1997-human @@ -131,7 +131,7 @@ An Assessment on Character-based <fixed-case>C</fixed-case>hinese News Filtering Using Latent Semantic Indexing Shih-HungWu Pey-ChingYang - Von-WunSoo + Von-WunSoo 209–223 O97-1014 wu-etal-1997-assessment @@ -141,7 +141,7 @@ Tai-HsuanHo Kae-CherngYang Juei-SungLin - Lin-ShanLee + Lin-ShanLee 287–299 O97-1015 ho-etal-1997-integrating @@ -179,7 +179,7 @@ Kae-CherngYang Tai-HsuanHo Juei-SungLin - Lin-ShanLee + Lin-ShanLee 335–344 O97-1019 yang-etal-1997-truncation @@ -188,14 +188,14 @@ Recognizing <fixed-case>K</fixed-case>orean Unknown Proper Nouns by Using Automatically Extracted Lexical Clues Bong-RaePark Young-SookHwang - Hae-ChangRim + Hae-ChangRim 345–356 O97-1020 park-etal-1997-recognizing Logical Operators and Quantifiers in Natural Language - Shin-ichiroKamei + Shin-ichiroKamei KazunoriMuraki 357–367 O97-1021 @@ -204,7 +204,7 @@ <fixed-case>C</fixed-case>hinese Text Compression Using <fixed-case>C</fixed-case>hinese Language Information Processing [In <fixed-case>C</fixed-case>hinese] JunGao - XixianChen + XixianChen 368–379 O97-1022 gao-chen-1997-chinese @@ -237,7 +237,7 @@ Rejection in Speech Recognition Based on <fixed-case>CDCPM</fixed-case>s MingxingXu - FangZheng + FangZheng WenhuWu 412–419 O97-1026 @@ -267,10 +267,10 @@ <fixed-case>C</fixed-case>hinese Word Segmentation and Part-of-Speech Tagging in One Step - Tom B.Y.Lai + Tom B.Y.Lai MaosongSun - Benjamin K.T’sou - S. CaesarLun + Benjamin K.T’sou + S. CaesarLun 229–236 O97-2002 lai-etal-1997-chinese @@ -278,15 +278,15 @@ Corpus-Based <fixed-case>C</fixed-case>hinese Text Summarization System Jun-JieLi - Key-SunChoi + Key-SunChoi 237–241 O97-2003 li-choi-1997-corpus A Study on the Portability of a Grammatical Inference System - Hsue-HuehShih - SteveYoung + Hsue-HuehShih + SteveYoung 242–246 O97-2004 shih-young-1997-study @@ -338,7 +338,7 @@ The Description of the Intra-State Feature Space in Speech Recognition - FangZheng + FangZheng MingxingXu WenhuWu 272–276 @@ -374,7 +374,7 @@ Computational Tools and Resources for Linguistic Studies - Yu-Ling UnaHsu + Yu-Ling UnaHsu Jing-ShinChang Keh-YihSu 1–40 @@ -397,8 +397,8 @@ A Synchronous <fixed-case>C</fixed-case>hinese Language Corpus from Different Speech Communities: Construction and Applications - Benjamin K.T’sou - Hing-LungLin + Benjamin K.T’sou + Hing-LungLin GodfreyLiu TerenceChan JeromeHu @@ -455,17 +455,17 @@ Segmentation Standard for <fixed-case>C</fixed-case>hinese Natural Language Processing Chu-RenHuang - Keh-JiannChen - Li-LiChang - Feng-YiChen + Keh-JiannChen + Li-LiChang + Feng-YiChen 47–62 O97-4003 huang-etal-1997-segmentation Aligning More Words with High Precision for Small Bilingual Corpora - Sue J.Ker - Jason S.Chang + Sue J.Ker + Jason S.Chang 63–96 O97-4004 ker-chang-1997-aligning diff --git a/data/xml/O98.xml b/data/xml/O98.xml index 49f20bba0f..3f0d8f71cf 100644 --- a/data/xml/O98.xml +++ b/data/xml/O98.xml @@ -26,15 +26,15 @@ 以語境判定中文未知詞詞類的方法 (Guessing Parts-Of-Speech For <fixed-case>C</fixed-case>hinese Unknown Words Using Context Information) [In <fixed-case>C</fixed-case>hinese] Ming-HongBai - Chao-JanChen - Keh-JiannChen + Chao-JanChen + Keh-JiannChen 47–62 O98-1002 bai-etal-1998-yi 應用動態、靜待辭典以加速鍵盤輸入中文之方法 (A Dynamic-and Static-Dictionaries Based Method for Accelerating <fixed-case>C</fixed-case>hinese-Character Inputting with Keyboard) [In <fixed-case>C</fixed-case>hinese] - Hung-yanGu + Hung-yanGu Chung-ChiehYang 73–86 O98-1003 @@ -43,26 +43,26 @@ Quantitative Criteria for Computational <fixed-case>C</fixed-case>hinese Lexicography Chu-RenHuang - Zhao-mingGao + Zhao-mingGao Claude C.C.Shen - Keh-JiannChen + Keh-JiannChen 87–108 O98-1004 huang-etal-1998-quantitative Speaker-Independent Continuous <fixed-case>M</fixed-case>andarin Speech Recognition Under Telephone Environments - Jia-LinShen - Ying-ChiehTu - Po-YuLiang - Lin-ShanLee + Jia-LinShen + Ying-ChiehTu + Po-YuLiang + Lin-ShanLee 119–137 O98-1005 shen-etal-1998-speaker A Large-Vocabulary <fixed-case>T</fixed-case>aiwanese (<fixed-case>M</fixed-case>in-nan) Speech Recognition System Based on Inter-syllabic Initial-Final Modeling and Lexicon-Tree Search - Ren-YuanLyu + Ren-YuanLyu Yuang-JinChiang Ren-JouFang Wen-PingHsieh @@ -72,7 +72,7 @@ Using Keyword Spotting and Utterance Verification to a Prank Call Rejection System - Chun-JenLee + Chun-JenLee Eng-FongHuang Jung-KueiChen 151–162 @@ -84,7 +84,7 @@ Chun-LiangChen Bo-RenBai Lee-FengChien - Lin-ShanLee + Lin-ShanLee 189–203 O98-1008 chen-etal-1998-cpat @@ -131,14 +131,14 @@ 結合統計與規則的多層次中文斷詞系統 (A hierarchical <fixed-case>C</fixed-case>hinese word segmentation system based on statistical and rule-based methods) [In <fixed-case>C</fixed-case>hinese] Chung-ChenChen - Wen-LianHsu + Wen-LianHsu 63–72 O98-2001 chen-hsu-1998-jie The Design of Sem-Syn Initial Grammar in <fixed-case>C</fixed-case>hinese Grammatical Inference - Hsue-HuehShih + Hsue-HuehShih 109–118 O98-2002 shih-1998-design @@ -155,9 +155,9 @@ A Way to Extract Unknown Words Without Dictionary from <fixed-case>C</fixed-case>hinese Corpus and Its Applications Yih-JengLin - Ming-ShingYu + Ming-ShingYu Shyh-YangHwang - Ming-JerWu + Ming-JerWu 217–226 O98-2004 lin-etal-1998-way @@ -177,14 +177,14 @@ Analyzing the Performance of Message Understanding Systems AmitBagga - Alan W.Biermann + Alan W.Biermann 1–26 O98-3001 bagga-biermann-1998-analyzing Unknown Word Detection for <fixed-case>C</fixed-case>hinese by a Corpus-based Learning Method - Keh-JiannChen + Keh-JiannChen Ming-HongBai 27–44 O98-3002 @@ -193,8 +193,8 @@ Meaning Representation and Meaning Instantiation for <fixed-case>C</fixed-case>hinese Nominals KathleenAhrens - Li-LiChang - Ke-JiannChen + Li-LiChang + Ke-JiannChen Chu-RenHuang 45–60 O98-3003 @@ -202,9 +202,9 @@ Towards a Representation of Verbal Semantics – An Approach Based on Near-Synonyms - Mei-ChihTsai + Mei-ChihTsai Chu-RenHuang - Keh-JiannChen + Keh-JiannChen KathleenAhrens 61–74 O98-3004 @@ -220,10 +220,10 @@ Human Judgment as a Basis for Evaluation of Discourse-Connective-Based Full-Text Abstraction in <fixed-case>C</fixed-case>hinese - Benjamin K.T’sou - Hing-LungLin - Tom B. Y.Lai - Samuel W. K.Chan + Benjamin K.T’sou + Hing-LungLin + Tom B. Y.Lai + Samuel W. K.Chan 101–116 O98-3006 tsou-etal-1998-human @@ -242,15 +242,15 @@ Senses and Texts - YorickWilks + YorickWilks 1–16 O98-4001 wilks-1998-senses Information Extraction: Beyond Document Retrieval - RobertGaizauskas - YorickWilks + RobertGaizauskas + YorickWilks 17–60 O98-4002 gaizauskas-wilks-1998-information @@ -259,7 +259,7 @@ An Assessment of Character-based <fixed-case>C</fixed-case>hinese News Filtering Using Latent Semantic Indexing Shih-HungWu Pey-ChingYang - Von-WunSoo + Von-WunSoo 61–78 O98-4003 wu-etal-1998-assessment @@ -273,7 +273,7 @@ Statistical Analysis of <fixed-case>M</fixed-case>andarin Acoustic Units and Automatic Extraction of Phonetically Rich Sentences Based Upon a Very Large <fixed-case>C</fixed-case>hinese Text Corpus - Hsin-minWang + Hsin-minWang 93–114 O98-4005 wang-1998-statistical diff --git a/data/xml/O99.xml b/data/xml/O99.xml index d76b5a162e..5f8661369d 100644 --- a/data/xml/O99.xml +++ b/data/xml/O99.xml @@ -35,9 +35,9 @@ Semantic Classification for Patterns Containing Non-Text Symbols in <fixed-case>M</fixed-case>andarin Text - Feng-LongHwang - Ming-shingYu - Ming-JerWu + Feng-LongHwang + Ming-shingYu + Ming-JerWu Shyh-YangHwang 55–66 O99-1003 @@ -45,15 +45,15 @@ 動詞詞構與語法功能互動初探 (An Explorative Study on the Interaction Between Verb Compound Constructions and Syntactic Functions) [In <fixed-case>C</fixed-case>hinese] - Li-LiChang - Keh-JiannChen + Li-LiChang + Keh-JiannChen 67–85 O99-1004 chang-chen-1999-dong Semantic Representation of Verbal Information – A Case from <fixed-case>M</fixed-case>andarin Verbs of Judging - Mei-ChunLiu + Mei-ChunLiu Chu-RenHuang Jia-YingLee 87–100 @@ -62,15 +62,15 @@ 階層式文件自動分類之特徵選取研究 (A Study on Feature Selection in Hierarchical Text Classification) [In <fixed-case>C</fixed-case>hinese] - Su-JinKer - Jen-NanChen + Su-JinKer + Jen-NanChen 137–149 O99-1006 ker-chen-1999-jie Automatically Controlled-Vocabulary Indexing for Text Retrieval - Kuang-HuaChen + Kuang-HuaChen Chien-TinWu 171–185 O99-1007 @@ -78,7 +78,7 @@ A New Syllable-based Approach for Retrieving <fixed-case>M</fixed-case>andarin Spoken Documents Using Short Speech Queries - Hsin-minWang + Hsin-minWang 187–202 O99-1008 wang-1999-new @@ -103,7 +103,7 @@ 音框同步之雜訊補償方法在汽車語音辨識之應用 (Frame Synchronous Noise Compensation for Car Speech Recognition) [In <fixed-case>C</fixed-case>hinese] - Jen-TzungChien + Jen-TzungChien Ming-ShunLin 239–251 O99-1011 @@ -125,7 +125,7 @@ An Analytical Study of Transformational Tagging for <fixed-case>C</fixed-case>hinese Text - HelenMeng + HelenMeng Chun WahIp 101-122 O99-2001 @@ -160,10 +160,10 @@ Telephony Based Speaker-Independent Continuous <fixed-case>M</fixed-case>andarin Syllable Recognition - Jia-linShen - Ying-chiehTu - Po-yuLiang - Lin-shanLee + Jia-linShen + Ying-chiehTu + Po-yuLiang + Lin-shanLee 1–24 O99-3001 shen-etal-1999-telephony @@ -207,7 +207,7 @@ A Model for Word Sense Disambiguation JuanziLi - ChangningHuang + ChangningHuang 1–20 O99-4001 li-huang-1999-model @@ -223,7 +223,7 @@ 基於知網的常識知識標注 (General Knowledge Annotation Based on How-net) [In <fixed-case>C</fixed-case>hinese] - Kok WeeGan + Kok WeeGan Wai MunTham 39–86 O99-4003 @@ -231,9 +231,9 @@ 中文句結構樹資料庫的構建 (<fixed-case>S</fixed-case>inica <fixed-case>T</fixed-case>reebank) [In <fixed-case>C</fixed-case>hinese] - Feng-YiChen + Feng-YiChen Pi-FangTsai - Keh-JiannChen + Keh-JiannChen Chu-RenHunag 87–104 O99-4004 diff --git a/data/xml/P00.xml b/data/xml/P00.xml index a10ec20966..55b21a9aa6 100644 --- a/data/xml/P00.xml +++ b/data/xml/P00.xml @@ -14,7 +14,7 @@ Invited Talk: Processes that Shape Conversation and their Implications for Computational Linguistics - Susan E.Brennan + Susan E.Brennan 10.3115/1075218.1075219 1–11 P00-1001 @@ -22,7 +22,7 @@ Invited Talk: Generic <fixed-case>NLP</fixed-case> Technologies: Language, Knowledge and Information Extraction - Jun’ichiTsujii + Jun’ichiTsujii 10.3115/1075218.1075220 12–22 P00-1002 @@ -30,7 +30,7 @@ Invited Talk: Spoken Language Technology: Where Do We Go From Here? - Roger K.Moore + Roger K.Moore 10.3115/1075218.1075221 22–22 P00-1003 @@ -38,8 +38,8 @@ Translation with Cascaded Finite State Transducers - StephanVogel - HermannNey + StephanVogel + HermannNey 10.3115/1075218.1075222 23–30 P00-1004 @@ -47,8 +47,8 @@ Phrase-Pattern-based <fixed-case>K</fixed-case>orean to <fixed-case>E</fixed-case>nglish Machine Translation using Two Level Translation Pattern Selection - Jung-jaeKim - Key-SunChoi + Jung-jaeKim + Key-SunChoi Young-SoogChae 10.3115/1075218.1075223 31–36 @@ -99,7 +99,7 @@ Tagging Unknown Proper Names Using Decision Trees - FrédéricBéchet + FrédéricBéchet AlexisNasr FranckGenet 10.3115/1075218.1075229 @@ -109,7 +109,7 @@ The Order of Prenominal Adjectives in Natural Language Generation - RobertMalouf + RobertMalouf 10.3115/1075218.1075230 85–92 P00-1012 @@ -137,7 +137,7 @@ A Unified Statistical Model for the Identification of <fixed-case>E</fixed-case>nglish <fixed-case>B</fixed-case>ase<fixed-case>NP</fixed-case> EndongXun - ChangningHuang + ChangningHuang MingZhou 10.3115/1075218.1075233 109–116 @@ -155,7 +155,7 @@ Using Existing Systems to Supplement Small Amounts of Annotated Grammatical Relations Training Data - AlexanderYeh + AlexanderYeh 10.3115/1075218.1075235 126–132 P00-1017 @@ -171,7 +171,7 @@ Can Nominal Expressions Achieve Multiple Goals?: An Empirical Study - PamelaJordan + PamelaJordan 10.3115/1075218.1075237 142–149 P00-1019 @@ -180,7 +180,7 @@ An Empirical Study of the Influence of Argument Conciseness on Argument Effectiveness GiuseppeCarenini - Johanna D.Moore + Johanna D.Moore 10.3115/1075218.1075238 150–157 P00-1020 @@ -188,7 +188,7 @@ Multi-Agent Explanation Strategies in Real-Time Domains - KumikoTanaka-Ishii + KumikoTanaka-Ishii IanFrank 10.3115/1075218.1075239 158–165 @@ -197,8 +197,8 @@ A Computational Approach to Zero-pronouns in <fixed-case>S</fixed-case>panish - AntonioFerrández - JesúsPeral + AntonioFerrández + JesúsPeral 10.3115/1075218.1075240 166–172 P00-1022 @@ -206,7 +206,7 @@ Coreference for <fixed-case>NLP</fixed-case> Applications - Thomas S.Morton + Thomas S.Morton 10.3115/1075218.1075241 173–180 P00-1023 @@ -214,8 +214,8 @@ Learning Attribute Selections for Non-Pronominal Expressions - PamelaJordan - MarilynWalker + PamelaJordan + MarilynWalker 10.3115/1075218.1075242 181–190 P00-1024 @@ -232,7 +232,7 @@ A Morphologically Sensitive Clustering Algorithm for Identifying <fixed-case>A</fixed-case>rabic Roots - Anne N.De Roeck + Anne N.De Roeck WaleedAl-Fares 10.3115/1075218.1075244 199–206 @@ -259,7 +259,7 @@ Inducing Probabilistic Syllable Classes Using Multivariate Clustering KarinMüller - BerndMöbius + BerndMöbius DetlefPrescher 10.3115/1075218.1075247 225–232 @@ -269,7 +269,7 @@ Modeling Local Context for Pitch Accent Prediction ShimeiPan - JuliaHirschberg + JuliaHirschberg 10.3115/1075218.1075248 233–240 P00-1030 @@ -278,7 +278,7 @@ A New Statistical Approach To <fixed-case>C</fixed-case>hinese <fixed-case>P</fixed-case>inyin Input ZhengChen - Kai-FuLee + Kai-FuLee 10.3115/1075218.1075249 241–247 P00-1031 @@ -288,7 +288,7 @@ Automatic Detecting/Correcting Errors in <fixed-case>C</fixed-case>hinese Text by an Approximate Word-Matching Algorithm LeiZhang MingZhou - ChangningHuang + ChangningHuang HaihuaPan 10.3115/1075218.1075250 248–254 @@ -297,8 +297,8 @@ Dependency-based Syntactic Analysis of <fixed-case>C</fixed-case>hinese and Annotation of Parsed Corpus - Tom B.Y.Lai - ChangningHuang + Tom B.Y.Lai + ChangningHuang 10.3115/1075218.1075251 255–262 P00-1033 @@ -307,8 +307,8 @@ Part-of-Speech Tagging Based on Hidden <fixed-case>M</fixed-case>arkov Model Assuming Joint Independence Sang-ZooLee - Jun’ichiTsujii - Hae-ChangRim + Jun’ichiTsujii + Hae-ChangRim 10.3115/1075218.1075252 263–269 P00-1034 @@ -316,7 +316,7 @@ Language Independent, Minimally Supervised Induction of Lexical Probabilities - SilviuCucerzan + SilviuCucerzan DavidYarowsky 10.3115/1075218.1075253 270–277 @@ -334,7 +334,7 @@ An Improved Error Model for Noisy Channel Spelling Correction EricBrill - Robert C.Moore + Robert C.Moore 10.3115/1075218.1075255 286–293 P00-1037 @@ -342,8 +342,8 @@ Query-Relevant Summarization using <fixed-case>FAQ</fixed-case>s - AdamBerger - Vibhu O.Mittal + AdamBerger + Vibhu O.Mittal 10.3115/1075218.1075256 294–301 P00-1038 @@ -371,8 +371,8 @@ Headline Generation Based on Statistical Translation MicheleBanko - Vibhu O.Mittal - Michael J.Witbrock + Vibhu O.Mittal + Michael J.Witbrock 10.3115/1075218.1075259 318–325 P00-1041 @@ -404,7 +404,7 @@ Difficulty Indices for the Named Entity Task in <fixed-case>J</fixed-case>apanese ChikashiNobata SatoshiSekine - Jun’ichiTsujii + Jun’ichiTsujii 10.3115/1075218.1075262 344–351 P00-1044 @@ -412,7 +412,7 @@ Memory-Efficient and Thread-Safe Quasi-Destructive Graph Unification - Marcel P.van Lohuizen + Marcel P.van Lohuizen 10.3115/1075218.1075263 352–359 P00-1045 @@ -439,8 +439,8 @@ Hidden <fixed-case>M</fixed-case>arkov Model-Based <fixed-case>K</fixed-case>orean Part-of-Speech Tagging Considering High Agglutinativity, Word-Spacing, and Lexical Correlativity Sang-ZooLee - Jun’ichiTsujii - Hae-ChangRim + Jun’ichiTsujii + Hae-ChangRim 10.3115/1075218.1075266 384–391 P00-1048 @@ -457,7 +457,7 @@ <fixed-case>C</fixed-case>hinese-<fixed-case>K</fixed-case>orean Word Alignment Based on Linguistic Comparison Jin-XiaHuang - Key-SunChoi + Key-SunChoi 10.3115/1075218.1075268 392–399 P00-1050 @@ -487,7 +487,7 @@ A Hierarchical Account of Referential Accessibility - NancyIde + NancyIde DanCristea 10.3115/1075218.1075271 416–424 @@ -496,7 +496,7 @@ Lexical Transfer Using a Vector-Space Model - EiichiroSumita + EiichiroSumita 10.3115/1075218.1075272 425–431 P00-1054 @@ -505,7 +505,7 @@ Using Confidence Bands for Parallel Texts Alignment AntónioRibeiro - GabrielLopes + GabrielLopes JoãoMexia 10.3115/1075218.1075273 432–439 @@ -514,8 +514,8 @@ Improved Statistical Alignment Models - Franz JosefOch - HermannNey + Franz JosefOch + HermannNey 10.3115/1075218.1075274 440–447 P00-1056 @@ -541,8 +541,8 @@ Corpus-Based Lexical Choice in Natural Language Generation - SrinivasBangalore - OwenRambow + SrinivasBangalore + OwenRambow 10.3115/1075218.1075277 464–471 P00-1059 @@ -580,9 +580,9 @@ Term Recognition Using Technical Dictionary Hierarchy - Jong-HoonOh - KyungSoonLee - Key-SunChoi + Jong-HoonOh + KyungSoonLee + Key-SunChoi 10.3115/1075218.1075281 496–503 P00-1063 @@ -601,7 +601,7 @@ Automatic Labeling of Semantic Roles DanielGildea - DanielJurafsky + DanielJurafsky 10.3115/1075218.1075283 512–520 P00-1065 @@ -623,7 +623,7 @@ MingZhou JianfengGao EndongXun - ChangningHuang + ChangningHuang 10.3115/1075218.1075285 529–536 P00-1067 @@ -631,7 +631,7 @@ Diagnostic Processing of <fixed-case>J</fixed-case>apanese for Computer-Assisted Second Language Learning - Jun’ichiKakegawa + Jun’ichiKakegawa HisayukiKanda EitaroFujioka MakotoItami @@ -645,7 +645,7 @@ Word Sense Disambiguation by Learning from Unlabeled Data Seong-BaePark Byoung-TakZhang - Yung TaekKim + Yung TaekKim 10.3115/1075218.1075287 547–554 P00-1069 @@ -653,8 +653,8 @@ Importance of Pronominal Anaphora Resolution in Question Answering Systems - José L.Vicedo - AntonioFerrández + José L.Vicedo + AntonioFerrández 10.3115/1075218.1075288 555–562 P00-1070 @@ -662,11 +662,11 @@ The Structure and Performance of an Open-Domain Question Answering System - DanMoldovan - SandaHarabagiu - MariusPasca - RadaMihalcea - RoxanaGirju + DanMoldovan + SandaHarabagiu + MariusPasca + RadaMihalcea + RoxanaGirju RichardGoodrum VasileRus 10.3115/1075218.1075289 @@ -677,7 +677,7 @@ Dimension-Reduced Estimation of Word Co-occurrence Probability KilyounKim - Key-SunChoi + Key-SunChoi 10.3115/1075218.1075290 571–578 P00-1072 @@ -686,7 +686,7 @@ Distribution-Based Pruning of Backoff Language Models JianfengGao - Kai-FuLee + Kai-FuLee 10.3115/1075218.1075291 579–588 P00-1073 @@ -694,7 +694,7 @@ Panel: Computational Linguistics Research on <fixed-case>P</fixed-case>hilippine Languages - Rachel Edita O.Roxas + Rachel Edita O.Roxas AllanBorra 10.3115/1075218.1075292 1–2 @@ -719,7 +719,7 @@ Panel: Computational Linguistics in <fixed-case>I</fixed-case>ndia: An Overview - AksharBharati + AksharBharati VineetChaitanya RajeevSangal 10.3115/1075218.1075295 @@ -740,7 +740,7 @@ Panel: Computational Linguistics in <fixed-case>M</fixed-case>alaysia - ZaharinYusoff + ZaharinYusoff 10.3115/1075218.1075297 1–2 P00-1079 diff --git a/data/xml/P01.xml b/data/xml/P01.xml index c4be77b3e9..cda1f07cfd 100644 --- a/data/xml/P01.xml +++ b/data/xml/P01.xml @@ -23,10 +23,10 @@ Invited Talk: Processing Broadcast Audio for Information Access Jean-LucGauvain - LoriLamel - GillesAdda - MartineAdda-Decker - ClaudeBarras + LoriLamel + GillesAdda + MartineAdda-Decker + ClaudeBarras LangzhouChen Yannickde Kercadio 10.3115/1073012.1073014 @@ -36,8 +36,8 @@ Improvement of a Whole Sentence Maximum Entropy Language Model Using Grammatical Features - Fredy A.Amaya - José MiguelBenedí + Fredy A.Amaya + José MiguelBenedí 10.3115/1073012.1073015 10–17 P01-1003 @@ -45,7 +45,7 @@ Low-cost, High-performance Translation Retrieval: Dumber is Better - TimothyBaldwin + TimothyBaldwin 10.3115/1073012.1073016 18–25 P01-1004 @@ -62,8 +62,8 @@ Evaluation Tool for Rule-based Anaphora Resolution Methods - CatalinaBarbu - RuslanMitkov + CatalinaBarbu + RuslanMitkov 10.3115/1073012.1073018 34–41 P01-1006 @@ -71,10 +71,10 @@ Guided Parsing of Range Concatenation Languages - FrançoisBarthélemy + FrançoisBarthélemy PierreBoullier PhilippeDeschamp - ÉricVillemonte de la Clergerie + ÉricVillemonte de la Clergerie 10.3115/1073012.1073019 42–49 P01-1007 @@ -83,7 +83,7 @@ Extracting Paraphrases from a Parallel Corpus ReginaBarzilay - Kathleen R.McKeown + Kathleen R.McKeown 10.3115/1073012.1073020 50–57 P01-1008 @@ -118,8 +118,8 @@ Detecting Problematic Turns in Human-Machine Interactions: Rule-induction Versus Memory-based Learning Approaches - Antalvan den Bosch - EmielKrahmer + Antalvan den Bosch + EmielKrahmer MarcSwerts 10.3115/1073012.1073024 82–89 @@ -140,7 +140,7 @@ JillBurstein DanielMarcu SlavaAndreyev - MartinChodorow + MartinChodorow 10.3115/1073012.1073026 98–105 P01-1014 @@ -148,13 +148,13 @@ From <fixed-case>RAGS</fixed-case> to <fixed-case>RICHES</fixed-case>: Exploiting the Potential of a Flexible Generation Architecture - LynneCahill - JohnCarroll - RogerEvans - DanielPaiva + LynneCahill + JohnCarroll + RogerEvans + DanielPaiva RichardPower - DoniaScott - Keesvan Deemter + DoniaScott + Keesvan Deemter 10.3115/1073012.1073027 106–113 P01-1015 @@ -163,9 +163,9 @@ Non-Verbal Cues for Discourse Structure JustineCassell - YukikoNakano - Timothy W.Bickmore - Candace L.Sidner + YukikoNakano + Timothy W.Bickmore + Candace L.Sidner CharlesRich 10.3115/1073012.1073028 114–123 @@ -193,7 +193,7 @@ An Algebra for Semantic Construction in Constraint-based Grammars AnnCopestake AlexLascarides - DanFlickinger + DanFlickinger 10.3115/1073012.1073031 140–147 P01-1019 @@ -201,7 +201,7 @@ A Machine Learning Approach to the Automatic Evaluation of Machine Translation - SimonCorston-Oliver + SimonCorston-Oliver MichaelGamon ChrisBrockett 10.3115/1073012.1073032 @@ -219,10 +219,10 @@ Practical Issues in Compiling Typed Unification Grammars for Speech Recognition - JohnDowding - Beth AnnHockey - Jean MarkGawron - ChristopherCuly + JohnDowding + Beth AnnHockey + Jean MarkGawron + ChristopherCuly 10.3115/1073012.1073034 164–171 P01-1022 @@ -230,8 +230,8 @@ Empirically Estimating Order Constraints for Content Planning in Generation - Pablo A.Duboue - Kathleen R.McKeown + Pablo A.Duboue + Kathleen R.McKeown 10.3115/1073012.1073035 172–179 P01-1023 @@ -266,10 +266,10 @@ Refined Lexicon Models for Statistical Machine Translation using a Maximum Entropy Approach - IsmaelGarcía-Varea - Franz J.Och - HermannNey - FranciscoCasacuberta + IsmaelGarcía-Varea + Franz J.Och + HermannNey + FranciscoCasacuberta 10.3115/1073012.1073039 204–211 P01-1027 @@ -296,7 +296,7 @@ Fast Decoding and Optimal Decoding for Machine Translation UlrichGermann - MichaelJahr + MichaelJahr KevinKnight DanielMarcu KenjiYamada @@ -319,7 +319,7 @@ Mapping Lexical Entries in a Verbs Database to <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et Senses RebeccaGreen LisaPearl - Bonnie J.Dorr + Bonnie J.Dorr PhilipResnik 10.3115/1073012.1073044 244–251 @@ -345,11 +345,11 @@ Serial Combination of Rules and Statistics: A Case Study in <fixed-case>C</fixed-case>zech Tagging - JanHajic + JanHajic PavelKrbec - PavelKveton - KarelOliva - VladimirPetkevic + PavelKveton + KarelOliva + VladimirPetkevic 10.3115/1073012.1073047 268–275 P01-1035 @@ -357,8 +357,8 @@ Topic-focus and Salience - EvaHajicová - PetrSgall + EvaHajicová + PetrSgall 10.3115/1073012.1073048 276–281 P01-1036 @@ -366,15 +366,15 @@ The Role of Lexico-Semantic Feedback in Open-Domain Textual Question-Answering - SandaHarabagiu - DanMoldovan - MariusPasca - RadaMihalcea + SandaHarabagiu + DanMoldovan + MariusPasca + RadaMihalcea MihaiSurdeanu RazvanBunsecu - RoxanaGirju + RoxanaGirju VasileRus - PaulMorarescu + PaulMorarescu 10.3115/1073012.1073049 282–289 P01-1037 @@ -383,7 +383,7 @@ Generation of <fixed-case>VP</fixed-case> Ellipsis: A Corpus-Based Approach DanielHardt - OwenRambow + OwenRambow 10.3115/1073012.1073050 290–297 P01-1038 @@ -391,8 +391,8 @@ Information Extraction from Voicemail - JingHuang - GeoffreyZweig + JingHuang + GeoffreyZweig MukundPadmanabhan 10.3115/1073012.1073051 298–305 @@ -401,8 +401,8 @@ A Common Framework for Syntactic Annotation - NancyIde - LaurentRomary + NancyIde + LaurentRomary 10.3115/1073012.1073052 306–313 P01-1040 @@ -435,7 +435,7 @@ Parsing with Treebank Grammars: Empirical Bounds, Theoretical Models, and the Structure of the <fixed-case>P</fixed-case>enn <fixed-case>T</fixed-case>reebank DanKlein - Christopher D.Manning + Christopher D.Manning 10.3115/1073012.1073056 338–345 P01-1044 @@ -443,8 +443,8 @@ From Chunks to Function-Argument Structure: A Similarity-Based Approach - SandraKübler - Erhard W.Hinrichs + SandraKübler + Erhard W.Hinrichs 10.3115/1073012.1073057 346–353 P01-1045 @@ -471,8 +471,8 @@ Predicting User Reactions to System Error - DianeLitman - JuliaHirschberg + DianeLitman + JuliaHirschberg MarcSwerts 10.3115/1073012.1073060 370–377 @@ -482,7 +482,7 @@ Building Semantic Perceptron Net for Topic Spotting JiminLiu - Tat-SengChua + Tat-SengChua 10.3115/1073012.1073061 378–385 P01-1049 @@ -498,8 +498,8 @@ Error Profiling: Toward a Model of <fixed-case>E</fixed-case>nglish Acquisition for Deaf Learners - Lisa N.Michaud - Kathleen F.McCoy + Lisa N.Michaud + Kathleen F.McCoy 10.3115/1073012.1073063 394–401 P01-1051 @@ -507,7 +507,7 @@ Logic Form Transformation of <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et and its Applicability to Question Answering - DanMoldovan + DanMoldovan VasileRus 10.3115/1073012.1073064 402–409 @@ -537,7 +537,7 @@ FrancisWolinski GeorgiosPaliouras VangelisKarkaletsis - Constantine D.Spyropoulos + Constantine D.Spyropoulos 10.3115/1073012.1073067 426–433 P01-1055 @@ -545,9 +545,9 @@ Evaluating a Trainable Sentence Planner for a Spoken Dialogue System - OwenRambow + OwenRambow MonicaRogati - Marilyn A.Walker + Marilyn A.Walker 10.3115/1073012.1073068 434–441 P01-1056 @@ -577,7 +577,7 @@ Producing Biographical Summaries: Combining Linguistic Knowledge with Corpus Statistics BarrySchiffman InderjeetMani - KristianConcepcion + KristianConcepcion 10.3115/1073012.1073071 458–465 P01-1059 @@ -603,8 +603,8 @@ Incremental Construction of Compact Acyclic <fixed-case>NFA</fixed-case>s Kyriakos N.Sgarbas - Nikos D.Fakotakis - George K.Kokkinakis + Nikos D.Fakotakis + George K.Kokkinakis 10.3115/1073012.1073074 482–489 P01-1062 @@ -612,7 +612,7 @@ A <fixed-case>B</fixed-case>ayesian Model For Morpheme and Paradigm Identification - Matthew G.Snover + Matthew G.Snover Michael R.Brent 10.3115/1073012.1073075 490–498 @@ -630,7 +630,7 @@ A Generic Approach to Parallel Chart Parsing with an Application to <fixed-case>L</fixed-case>in<fixed-case>GO</fixed-case> - Marcel P.Van Lohuizen + Marcel P.Van Lohuizen 10.3115/1073012.1073077 507–514 P01-1065 @@ -638,9 +638,9 @@ Quantitative and Qualitative Evaluation of Darpa Communicator Spoken Dialogue Systems - Marilyn A.Walker - RebeccaPassonneau - Julie E.Boland + Marilyn A.Walker + RebeccaPassonneau + Julie E.Boland 10.3115/1073012.1073078 515–522 P01-1066 @@ -657,7 +657,7 @@ Multi-Class Composite N-gram Language Model for Spoken Language Processing Using Multiple Word Clusters - HirofumiYamamoto + HirofumiYamamoto ShuntaroIsogai YoshinoriSagisaka 10.3115/1073012.1073080 @@ -668,8 +668,8 @@ Text Chunking using Regularized Winnow TongZhang - FredDamerau - DavidJohnson + FredDamerau + DavidJohnson 10.3115/1073012.1073081 539–546 P01-1069 diff --git a/data/xml/P02.xml b/data/xml/P02.xml index fb159db204..449595e8de 100644 --- a/data/xml/P02.xml +++ b/data/xml/P02.xml @@ -18,7 +18,7 @@ Parameter Estimation for Probabilistic Finite-State Transducers - JasonEisner + JasonEisner 10.3115/1073083.1073085 1–8 P02-1001 @@ -26,7 +26,7 @@ Sequential Conditional Generalized Iterative Scaling - JoshuaGoodman + JoshuaGoodman 10.3115/1073083.1073086 9–16 P02-1002 @@ -44,9 +44,9 @@ Machine-learned contexts for linguistic operations in <fixed-case>G</fixed-case>erman sentence realization MichaelGamon - EricRingger - SimonCorston-Oliver - RobertMoore + EricRingger + SimonCorston-Oliver + RobertMoore 10.3115/1073083.1073089 25–32 P02-1004 @@ -54,9 +54,9 @@ Performance Issues and Error Analysis in an Open-Domain Question Answering System - DanMoldovan - MariusPasca - SandaHarabagiu + DanMoldovan + MariusPasca + SandaHarabagiu MihaiSurdeanu 10.3115/1073083.1073091 33–40 @@ -66,7 +66,7 @@ Learning surface text patterns for a Question Answering System DeepakRavichandran - EduardHovy + EduardHovy 10.3115/1073083.1073092 41–47 P02-1006 @@ -82,7 +82,7 @@ Phonological Comprehension and the Compilation of <fixed-case>O</fixed-case>ptimality <fixed-case>T</fixed-case>heory - JasonEisner + JasonEisner 10.3115/1073083.1073095 56–63 P02-1008 @@ -106,7 +106,7 @@ Resolving Pronominal Reference to Abstract Entities - Donna K.Byron + Donna K.Byron 10.3115/1073083.1073099 80–87 P02-1011 @@ -114,8 +114,8 @@ Pronominalization in Generated Discourse and Dialogue - Charles B.Callaway - James C.Lester + Charles B.Callaway + James C.Lester 10.3115/1073083.1073100 88–95 P02-1012 @@ -132,7 +132,7 @@ Improving Machine Learning Approaches to Coreference Resolution VincentNg - ClaireCardie + ClaireCardie 10.3115/1073083.1073102 104–111 P02-1014 @@ -150,8 +150,8 @@ Active Learning for Statistical Natural Language Parsing MinTang - XiaoqiangLuo - SalimRoukos + XiaoqiangLuo + SalimRoukos 10.3115/1073083.1073105 120–127 P02-1016 @@ -160,7 +160,7 @@ A Generative Constituent-Context Model for Improved Grammar Induction DanKlein - Christopher D.Manning + Christopher D.Manning 10.3115/1073083.1073106 128–135 P02-1017 @@ -177,7 +177,7 @@ Pronunciation Modeling for Improved Spelling Correction KristinaToutanova - RobertMoore + RobertMoore 10.3115/1073083.1073109 144–151 P02-1019 @@ -185,10 +185,10 @@ Measuring Text Reuse - PaulClough - RobertGaizauskas - Scott S.L.Piao - YorickWilks + PaulClough + RobertGaizauskas + Scott S.L.Piao + YorickWilks 10.3115/1073083.1073110 152–159 P02-1020 @@ -196,7 +196,7 @@ Semi-Supervised Maximum Entropy Based Approach to Acronym and Abbreviation Normalization in Medical Texts - SergueiPakhomov + SergueiPakhomov 10.3115/1073083.1073111 160–167 P02-1021 @@ -204,9 +204,9 @@ <fixed-case>GATE</fixed-case>: an Architecture for Development of Robust <fixed-case>HLT</fixed-case> applications - HamishCunningham + HamishCunningham DianaMaynard - KalinaBontcheva + KalinaBontcheva ValentinTablan 10.3115/1073083.1073112 168–175 @@ -225,7 +225,7 @@ Exploring Asymmetric Clustering for Statistical Language Modeling JianfengGao - JoshuaGoodman + JoshuaGoodman GuihongCao HangLi 10.3115/1073083.1073115 @@ -237,7 +237,7 @@ A Study on Richer Syntactic Dependencies for Structured Language Modeling PengXu CiprianChelba - FrederickJelinek + FrederickJelinek 10.3115/1073083.1073116 191–198 P02-1025 @@ -268,7 +268,7 @@ NobuhiroKaji DaisukeKawahara SadaoKurohashi - SatoshiSato + SatoshiSato 10.3115/1073083.1073120 215–222 P02-1028 @@ -276,7 +276,7 @@ Inducing <fixed-case>G</fixed-case>erman Semantic Verb Classes from Purely Syntactic Subcategorisation Information - SabineSchulte im Walde + SabineSchulte im Walde ChrisBrew 10.3115/1073083.1073121 223–230 @@ -285,7 +285,7 @@ Scaling Context Space - JamesCurran + JamesCurran MarcMoens 10.3115/1073083.1073123 231–238 @@ -295,7 +295,7 @@ The Necessity of Parsing for Predicate Argument Recognition DanielGildea - MarthaPalmer + MarthaPalmer 10.3115/1073083.1073124 239–246 P02-1031 @@ -304,8 +304,8 @@ The Descent of Hierarchy, and Selection in Relational Semantics BarbaraRosario - MartiHearst - CharlesFillmore + MartiHearst + CharlesFillmore 10.3115/1073083.1073125 247–254 P02-1032 @@ -313,7 +313,7 @@ An Unsupervised Method for Word Sense Tagging using Parallel Corpora - MonaDiab + MonaDiab PhilipResnik 10.3115/1073083.1073126 255–262 @@ -322,7 +322,7 @@ New Ranking Algorithms for Parsing and Tagging: Kernels over Discrete Structures, and the Voted Perceptron - MichaelCollins + MichaelCollins NigelDuffy 10.3115/1073083.1073128 263–270 @@ -332,10 +332,10 @@ Parsing the <fixed-case>W</fixed-case>all <fixed-case>S</fixed-case>treet <fixed-case>J</fixed-case>ournal using a <fixed-case>L</fixed-case>exical-<fixed-case>F</fixed-case>unctional <fixed-case>G</fixed-case>rammar and Discriminative Estimation Techniques StefanRiezler - Tracy H.King - Ronald M.Kaplan + Tracy H.King + Ronald M.Kaplan RichardCrouch - John T.Maxwell III + John T.Maxwell III MarkJohnson 10.3115/1073083.1073129 271–278 @@ -361,8 +361,8 @@ Discriminative Training and Maximum Entropy Models for Statistical Machine Translation - Franz JosefOch - HermannNey + Franz JosefOch + HermannNey 10.3115/1073083.1073133 295–302 Best Paper @@ -381,7 +381,7 @@ <fixed-case>B</fixed-case>leu: a Method for Automatic Evaluation of Machine Translation KishorePapineni - SalimRoukos + SalimRoukos ToddWard Wei-JingZhu 10.3115/1073083.1073135 @@ -393,7 +393,7 @@ Coupling <fixed-case>CCG</fixed-case> and Hybrid Logic Dependency Semantics JasonBaldridge - Geert-JanKruijff + Geert-JanKruijff 10.3115/1073083.1073137 319–326 P02-1041 @@ -403,7 +403,7 @@ Building Deep Dependency Structures using a Wide-Coverage <fixed-case>CCG</fixed-case> Parser StephenClark JuliaHockenmaier - MarkSteedman + MarkSteedman 10.3115/1073083.1073138 327–334 P02-1042 @@ -412,7 +412,7 @@ Generative Models for Statistical Parsing with <fixed-case>C</fixed-case>ombinatory <fixed-case>C</fixed-case>ategorial <fixed-case>G</fixed-case>rammar JuliaHockenmaier - MarkSteedman + MarkSteedman 10.3115/1073083.1073139 335–342 P02-1043 @@ -429,7 +429,7 @@ Applying Co-Training to Reference Resolution - ChristophMueller + ChristophMueller StefanRapp MichaelStrube 10.3115/1073083.1073142 @@ -439,7 +439,7 @@ Bootstrapping - StevenAbney + StevenAbney 10.3115/1073083.1073143 360–367 P02-1046 @@ -456,14 +456,14 @@ <fixed-case>MATCH</fixed-case>: An Architecture for Multimodal Dialogue Systems - MichaelJohnston - SrinivasBangalore - GunaranjanVasireddy - AmandaStent + MichaelJohnston + SrinivasBangalore + GunaranjanVasireddy + AmandaStent PatrickEhlen - MarilynWalker - SteveWhittaker - PreetamMaloor + MarilynWalker + SteveWhittaker + PreetamMaloor 10.3115/1073083.1073146 376–383 P02-1048 @@ -471,9 +471,9 @@ What’s the Trouble: Automatically Identifying Problematic Dialogues in <fixed-case>DARPA</fixed-case> Communicator Dialogue Systems - Helen WrightHastie + Helen WrightHastie RashmiPrasad - MarilynWalker + MarilynWalker 10.3115/1073083.1073147 384–391 P02-1049 @@ -483,7 +483,7 @@ Evaluating Translational Correspondence using Annotation Projection RebeccaHwa PhilipResnik - AmyWeinberg + AmyWeinberg OkanKolak 10.3115/1073083.1073149 392–399 @@ -492,7 +492,7 @@ Translating Named Entities Using Monolingual and Bilingual Resources - YaserAl-Onaizan + YaserAl-Onaizan KevinKnight 10.3115/1073083.1073150 400–408 @@ -502,7 +502,7 @@ Using Similarity Scoring to Improve the Bilingual Dictionary for Sub-sentential Alignment KatharinaProbst - RalfBrown + RalfBrown 10.3115/1073083.1073151 409–416 P02-1052 @@ -510,7 +510,7 @@ Thumbs Up or Thumbs Down? Semantic Orientation Applied to Unsupervised Classification of Reviews - PeterTurney + PeterTurney 10.3115/1073083.1073153 417–424 P02-1053 @@ -518,10 +518,10 @@ Is It the Right Answer? Exploiting Web Redundancy for Answer Validation - BernardoMagnini - MatteoNegri + BernardoMagnini + MatteoNegri RobertoPrevete - HristoTanev + HristoTanev 10.3115/1073083.1073154 425–432 P02-1054 @@ -529,7 +529,7 @@ Shallow Parsing on the Basis of Words Only: A Case Study - Antalvan den Bosch + Antalvan den Bosch SabineBuchholz 10.3115/1073083.1073156 433–440 @@ -540,16 +540,16 @@ An Integrated Archictecture for Shallow and Deep Processing BertholdCrysmann AnetteFrank - BerndKiefer - StefanMueller - GuenterNeumann + BerndKiefer + StefanMueller + GuenterNeumann JakubPiskorski - UlrichSchaefer + UlrichSchaefer MelanieSiegel HansUszkoreit FeiyuXu MarkusBecker - Hans-UlrichKrieger + Hans-UlrichKrieger 10.3115/1073083.1073157 441–448 P02-1056 @@ -557,7 +557,7 @@ A Noisy-Channel Model for Document Compression - HalDaume III + HalDaume III DanielMarcu 10.3115/1073083.1073159 449–456 @@ -566,8 +566,8 @@ From Single to Multi-document Summarization - Chin-YewLin - EduardHovy + Chin-YewLin + EduardHovy 10.3115/1073083.1073160 457–464 P02-1058 @@ -576,7 +576,7 @@ Supervised Ranking in Open-Domain Text Summarization TadashiNomoto - YujiMatsumoto + YujiMatsumoto 10.3115/1073083.1073161 465–472 P02-1059 @@ -584,7 +584,7 @@ Named Entity Recognition using an <fixed-case>HMM</fixed-case>-based Chunk Tagger - GuoDongZhou + GuoDongZhou JianSu 10.3115/1073083.1073163 473–480 @@ -602,7 +602,7 @@ Ranking Algorithms for Named Entity Extraction: Boosting and the <fixed-case>V</fixed-case>oted<fixed-case>P</fixed-case>erceptron - MichaelCollins + MichaelCollins 10.3115/1073083.1073165 489–496 P02-1062 @@ -611,8 +611,8 @@ Revision Learning and its Application to Part-of-Speech Tagging TetsujiNakagawa - TakuKudo - YujiMatsumoto + TakuKudo + YujiMatsumoto 10.3115/1073083.1073167 497–504 P02-1063 diff --git a/data/xml/P03.xml b/data/xml/P03.xml index 242b1d5230..76c39e40ce 100644 --- a/data/xml/P03.xml +++ b/data/xml/P03.xml @@ -16,7 +16,7 @@ Offline Strategies for Online Question Answering: Answering Questions Before They Are Asked MichaelFleischman - EduardHovy + EduardHovy AbdessamadEchihabi 10.3115/1075096.1075097 1–7 @@ -26,7 +26,7 @@ Using Predicate-Argument Structures for Information Extraction MihaiSurdeanu - SandaHarabagiu + SandaHarabagiu JohnWilliams PaulAarseth 10.3115/1075096.1075098 @@ -45,8 +45,8 @@ Fast Methods for Kernel-Based Text Analysis - TakuKudo - YujiMatsumoto + TakuKudo + YujiMatsumoto 10.3115/1075096.1075100 24–31 P03-1004 @@ -141,8 +141,8 @@ AnetteFrank MarkusBecker BertholdCrysmann - BerndKiefer - UlrichSchäfer + BerndKiefer + UlrichSchäfer 10.3115/1075096.1075110 104–111 P03-1014 @@ -167,7 +167,7 @@ Constructing Semantic Space Models from Parsed Corpora - SebastianPadó + SebastianPadó MirellaLapata 10.3115/1075096.1075113 128–135 @@ -185,7 +185,7 @@ A Comparative Study on Reordering Constraints in Statistical Machine Translation RichardZens - HermannNey + HermannNey 10.3115/1075096.1075115 144–151 P03-1019 @@ -193,10 +193,10 @@ t<fixed-case>R</fixed-case>u<fixed-case>E</fixed-case>cas<fixed-case>I</fixed-case>ng - Lucian VladLita - AbeIttycheriah - SalimRoukos - NandaKambhatla + Lucian VladLita + AbeIttycheriah + SalimRoukos + NandaKambhatla 10.3115/1075096.1075116 152–159 P03-1020 @@ -204,7 +204,7 @@ Minimum Error Rate Training in Statistical Machine Translation - Franz JosefOch + Franz JosefOch 10.3115/1075096.1075117 160–167 P03-1021 @@ -213,7 +213,7 @@ A Machine Learning Approach to Pronoun Resolution in Spoken Dialogue MichaelStrube - ChristophMüller + ChristophMüller 10.3115/1075096.1075118 168–175 P03-1022 @@ -222,9 +222,9 @@ Coreference Resolution Using Competition Learning Approach XiaofengYang - GuodongZhou + GuodongZhou JianSu - Chew LimTan + Chew LimTan 10.3115/1075096.1075119 176–183 P03-1023 @@ -258,7 +258,7 @@ Recognizing Expressions of Commonsense Psychology in <fixed-case>E</fixed-case>nglish Text - AndrewGordon + AndrewGordon AbeKazemzadeh AnishNair MilenaPetrova @@ -271,7 +271,7 @@ Closing the Gap: Learning-Based Information Extraction Rivaling Knowledge-Engineering Methods Hai LeongChieu Hwee TouNg - Yoong KeokLee + Yoong KeokLee 10.3115/1075096.1075124 216–223 P03-1028 @@ -281,7 +281,7 @@ An Improved Extraction Pattern Representation Model for Automatic <fixed-case>IE</fixed-case> Pattern Acquisition KiyoshiSudo SatoshiSekine - RalphGrishman + RalphGrishman 10.3115/1075096.1075125 224–231 P03-1029 @@ -290,7 +290,7 @@ Optimizing Story Link Detection is not Equivalent to Optimizing New Event Detection AymanFarahat - FrancineChen + FrancineChen ThorstenBrants 10.3115/1075096.1075126 232–239 @@ -310,8 +310,8 @@ Extracting Key Semantic Terms from <fixed-case>C</fixed-case>hinese Speech Query for Web Searches GangWang - Tat-SengChua - Yong-ChengWang + Tat-SengChua + Yong-ChengWang 10.3115/1075096.1075128 248–255 P03-1032 @@ -322,7 +322,7 @@ KazunoriKomatani ShinichiUeno TatsuyaKawahara - Hiroshi G.Okuno + Hiroshi G.Okuno 10.3115/1075096.1075129 256–263 P03-1033 @@ -330,7 +330,7 @@ Integrating Discourse Markers into a Pipelined Natural Language Generation Architecture - Charles B.Callaway + Charles B.Callaway 10.3115/1075096.1075130 264–271 P03-1034 @@ -340,7 +340,7 @@ Improved Source-Channel Models for <fixed-case>C</fixed-case>hinese Word Segmentation JianfengGao MuLi - Chang-NingHuang + Chang-NingHuang 10.3115/1075096.1075131 272–279 P03-1035 @@ -365,8 +365,8 @@ Self-Organizing <fixed-case>M</fixed-case>arkov Models and Their Application to Part-of-Speech Tagging Jin-DongKim - Hae-ChangRim - Jun’ichiTsujii + Hae-ChangRim + Jun’ichiTsujii 10.3115/1075096.1075134 296–302 P03-1038 @@ -375,8 +375,8 @@ Chunk-Based Statistical Translation TaroWatanabe - EiichiroSumita - Hiroshi G.Okuno + EiichiroSumita + Hiroshi G.Okuno 10.3115/1075096.1075135 303–310 P03-1039 @@ -394,8 +394,8 @@ Effective Phrase Translation Extraction from Alignment Models AshishVenugopal - StephanVogel - AlexWaibel + StephanVogel + AlexWaibel 10.3115/1075096.1075137 319–326 P03-1041 @@ -416,7 +416,7 @@ ChengNiu WeiLi JihongDing - RohiniSrihari + RohiniSrihari 10.3115/1075096.1075139 335–342 P03-1043 @@ -458,7 +458,7 @@ Evaluation Challenges in Large-Scale Document Summarization - Dragomir R.Radev + Dragomir R.Radev SimoneTeufel HoracioSaggion WaiLam @@ -466,7 +466,7 @@ HongQi ArdaÇelebi DanyuLiu - ElliottDrabek + ElliottDrabek 10.3115/1075096.1075144 375–382 P03-1048 @@ -474,7 +474,7 @@ Analysis of Source Identified Text Corpora: Exploring the Statistics of the Reused Text and Authorship - AkikoAizawa + AkikoAizawa 10.3115/1075096.1075145 383–390 P03-1049 @@ -483,7 +483,7 @@ Unsupervised Learning of <fixed-case>A</fixed-case>rabic Stemming Using a Parallel Corpus MonicaRogati - ScottMcCarley + ScottMcCarley YimingYang 10.3115/1075096.1075146 391–398 @@ -494,9 +494,9 @@ Language Model Based <fixed-case>A</fixed-case>rabic Word Segmentation Young-SukLee KishorePapineni - SalimRoukos + SalimRoukos OssamaEmam - HanyHassan + HanyHassan 10.3115/1075096.1075147 399–406 P03-1051 @@ -504,7 +504,7 @@ Acquiring Vocabulary for Predictive Text Entry through Dynamic Reuse of a Small User Corpus - KumikoTanaka-Ishii + KumikoTanaka-Ishii DaichiHayakawa MasatoTakeichi 10.3115/1075096.1075148 @@ -523,7 +523,7 @@ Accurate Unlexicalized Parsing DanKlein - Christopher D.Manning + Christopher D.Manning 10.3115/1075096.1075150 423–430 Best Paper @@ -541,8 +541,8 @@ Is it Harder to Parse <fixed-case>C</fixed-case>hinese, or the <fixed-case>C</fixed-case>hinese Treebank? - RogerLevy - Christopher D.Manning + RogerLevy + Christopher D.Manning 10.3115/1075096.1075152 439–446 P03-1056 @@ -551,8 +551,8 @@ Feedback Cleaning of Machine Translation Rules Using Automatic Evaluation KenjiImamura - EiichiroSumita - YujiMatsumoto + EiichiroSumita + YujiMatsumoto 10.3115/1075096.1075153 447–454 P03-1057 @@ -570,7 +570,7 @@ Learning the Countability of <fixed-case>E</fixed-case>nglish Nouns from Corpus Data - TimothyBaldwin + TimothyBaldwin FrancisBond 10.3115/1075096.1075155 463–470 @@ -580,8 +580,8 @@ A Syllable Based Word Recognition Model for <fixed-case>K</fixed-case>orean Noun Extraction Do-GilLee - Hae-ChangRim - Heui-SeokLim + Hae-ChangRim + Heui-SeokLim 10.3115/1075096.1075156 471–478 P03-1060 @@ -603,9 +603,9 @@ Learning to Predict Pitch Accents and Prosodic Boundaries in <fixed-case>D</fixed-case>utch ErwinMarsi MartinReynaert - Antalvan den Bosch - WalterDaelemans - VéroniqueHoste + Antalvan den Bosch + WalterDaelemans + VéroniqueHoste 10.3115/1075096.1075158 489–496 P03-1062 @@ -623,7 +623,7 @@ A <fixed-case>SN</fixed-case>o<fixed-case>W</fixed-case> Based Supertagger with Application to <fixed-case>NP</fixed-case> Chunking LibinShen - Aravind K.Joshi + Aravind K.Joshi 10.3115/1075096.1075160 505–512 P03-1064 @@ -635,7 +635,7 @@ XiuhongZhang ChengNiu YuankaiJiang - Rohini K.Srihari + Rohini K.Srihari 10.3115/1075096.1075161 513–520 P03-1065 @@ -662,7 +662,7 @@ Towards a Resource for Lexical Semantics: A Large <fixed-case>G</fixed-case>erman Corpus with Extensive Semantic Annotation KatrinErk AndreaKowalski - SebastianPadó + SebastianPadó ManfredPinkal 10.3115/1075096.1075164 537–544 @@ -679,7 +679,7 @@ Towards a Model of Face-to-Face Grounding - YukikoNakano + YukikoNakano GabeReinstein TomStocky JustineCassell @@ -691,9 +691,9 @@ Discourse Segmentation of Multi-Party Conversation MichelGalley - Kathleen R.McKeown - EricFosler-Lussier - HongyanJing + Kathleen R.McKeown + EricFosler-Lussier + HongyanJing 10.3115/1075096.1075167 562–569 P03-1071 @@ -727,7 +727,7 @@ On the Applicability of Global Index Grammars - José M.Castaño + José M.Castaño 10.3115/1075178.1075180 15–22 P03-2003 @@ -759,7 +759,7 @@ A Novel Approach to Semantic Indexing Based on Concept - Bo-YeongKang + Bo-YeongKang 10.3115/1075178.1075184 44–49 P03-2007 @@ -791,7 +791,7 @@ Semantic Classification of <fixed-case>C</fixed-case>hinese Unknown Words - HuihsinTseng + HuihsinTseng 10.3115/1075178.1075188 72–79 P03-2011 @@ -832,7 +832,7 @@ <fixed-case>K</fixed-case>iwi: A Multilingual Usage Consultation Tool based on <fixed-case>I</fixed-case>nternet Searching - KumikoTanaka-Ishii + KumikoTanaka-Ishii MasatoYamamoto HiroshiNakagawa 10.3115/1075178.1075192 @@ -862,10 +862,10 @@ Integrating Information Extraction and Automatic Hyperlinking StephanBusemann - WitoldDrozdzynski - Hans-UlrichKrieger + WitoldDrozdzynski + Hans-UlrichKrieger JakubPiskorski - UlrichSchaefer + UlrichSchaefer HansUszkoreit FeiyuXu 10.3115/1075178.1075195 @@ -875,7 +875,7 @@ Automatic Collection of Related Terms from the Web - SatoshiSato + SatoshiSato YasuhiroSasaki 10.3115/1075178.1075196 121–124 @@ -885,8 +885,8 @@ i<fixed-case>N</fixed-case>e<fixed-case>ATS</fixed-case>: Interactive Multi-Document Summarization AntonLeuski - Chin-YewLin - EduardHovy + Chin-YewLin + EduardHovy 10.3115/1075178.1075197 125–128 P03-2021 @@ -917,12 +917,12 @@ A Limited-Domain <fixed-case>E</fixed-case>nglish to <fixed-case>J</fixed-case>apanese Medical Speech Translator Built Using <fixed-case>REGULUS</fixed-case> 2 - MannyRayner - PierretteBouillon + MannyRayner + PierretteBouillon VolVan Dalsem III HitoshiIsahara KyokoKanzaki - Beth AnnHockey + Beth AnnHockey 10.3115/1075178.1075200 137–140 P03-2024 @@ -932,7 +932,7 @@ Bilingual Terminology Acquisition from Comparable Corpora and Phrasal Translation to Cross-Language Information Retrieval FatihaSadat MasatoshiYoshikawa - ShunsukeUemura + ShunsukeUemura 10.3115/1075178.1075201 141–144 P03-2025 @@ -980,7 +980,7 @@ Word Sense Disambiguation Using Pairwise Alignment KoichiYamashita KeiichiYoshida - YukihiroItoh + YukihiroItoh 10.3115/1075178.1075205 157–160 P03-2029 @@ -988,7 +988,7 @@ The <fixed-case>F</fixed-case>rame<fixed-case>N</fixed-case>et Data and Software - Collin F.Baker + Collin F.Baker HiroakiSato 10.3115/1075178.1075206 161–164 @@ -999,7 +999,7 @@ Automatic Acquisition of Named Entity Tagged Corpus from World Wide Web JoohuiAn SeungwooLee - Gary GeunbaeLee + Gary GeunbaeLee 10.3115/1075178.1075207 165–168 P03-2031 @@ -1019,10 +1019,10 @@ A Debug Tool for Practical Grammar Development AkaneYakushiji - YukaTateisi + YukaTateisi YusukeMiyao NaokiYoshinaga - Jun’ichiTsujii + Jun’ichiTsujii 10.3115/1075178.1075209 173–176 P03-2033 @@ -1030,7 +1030,7 @@ A Speech Interface for Open-Domain Question-Answering - EdwardSchofield + EdwardSchofield ZhipingZheng 10.3115/1075178.1075210 177–180 @@ -1051,7 +1051,7 @@ Comparison between <fixed-case>CFG</fixed-case> Filtering Techniques for <fixed-case>LTAG</fixed-case> and <fixed-case>HPSG</fixed-case> NaokiYoshinaga KentaroTorisawa - Jun’ichiTsujii + Jun’ichiTsujii 10.3115/1075178.1075212 185–188 P03-2036 @@ -1060,7 +1060,7 @@ Automatic Detection of Grammar Elements that Decrease Readability MasatoshiTsuchiya - SatoshiSato + SatoshiSato 10.3115/1075178.1075213 189–192 P03-2037 @@ -1068,12 +1068,12 @@ An Intelligent Procedure Assistant Built Using <fixed-case>REGULUS</fixed-case> 2 and <fixed-case>ALTERF</fixed-case> - MannyRayner - Beth AnnHockey + MannyRayner + Beth AnnHockey JimHieronymus - JohnDowding - GregAist - SusanaEarly + JohnDowding + GregAist + SusanaEarly 10.3115/1075178.1075214 193–196 P03-2038 @@ -1081,9 +1081,9 @@ <fixed-case>C</fixed-case>hinese Unknown Word Identification Using Character-based Tagging and Chunking - Chooi LingGoh + Chooi LingGoh MasayukiAsahara - YujiMatsumoto + YujiMatsumoto 10.3115/1075178.1075215 197–200 P03-2039 @@ -1091,11 +1091,11 @@ <fixed-case>T</fixed-case>otal<fixed-case>R</fixed-case>ecall: A Bilingual Concordance for Computer Assisted Translation and Language Learning - Jian-ChengWu - Kevin C.Yeh + Jian-ChengWu + Kevin C.Yeh Thomas C.Chuang Wen-ChiShei - Jason S.Chang + Jason S.Chang 10.3115/1075178.1075216 201–204 P03-2040 @@ -1103,7 +1103,7 @@ Learning Non-Isomorphic Tree Mappings for Machine Translation - JasonEisner + JasonEisner 10.3115/1075178.1075217 205–208 P03-2041 diff --git a/data/xml/P04.xml b/data/xml/P04.xml index 67af34de73..8edef5c473 100644 --- a/data/xml/P04.xml +++ b/data/xml/P04.xml @@ -14,7 +14,7 @@ Optimization in Multimodal Interpretation - Joyce Y.Chai + Joyce Y.Chai PengyuHong Michelle X.Zhou ZaharPrasov @@ -43,7 +43,7 @@ Analysis of Mixed Natural and Symbolic Input in Mathematical Dialogs MagdalenaWolska - IvanaKruijff-Korbayová + IvanaKruijff-Korbayová 10.3115/1218955.1218959 25–32 P04-1004 @@ -60,7 +60,7 @@ Attention Shifting for Parsing Speech - Keith B.Hall + Keith B.Hall MarkJohnson 10.3115/1218955.1218961 40–46 @@ -70,8 +70,8 @@ Discriminative Language Modeling with Conditional Random Fields and the Perceptron Algorithm BrianRoark - MuratSaraclar - MichaelCollins + MuratSaraclar + MichaelCollins MarkJohnson 10.3115/1218955.1218962 47–54 @@ -82,7 +82,7 @@ Statistical Modeling for Unit Selection in Speech Synthesis MehryarMohri CyrilAllauzen - MichaelRiley + MichaelRiley 10.3115/1218955.1218963 55–62 P04-1008 @@ -90,7 +90,7 @@ Developing a Flexible Spoken Dialog System Using Simulation - GraceChung + GraceChung 10.3115/1218955.1218964 63–70 P04-1009 @@ -99,11 +99,11 @@ Data-Driven Strategies for an Automated Dialogue System HildaHardy - TomekStrzalkowski + TomekStrzalkowski MinWu - CristianUrsu - NickWebb - AlanBiermann + CristianUrsu + NickWebb + AlanBiermann R. BryceInouye AshleyMcKenzie 10.3115/1218955.1218965 @@ -113,9 +113,9 @@ Trainable Sentence Planning for Complex Information Presentations in Spoken Dialog Systems - AmandaStent + AmandaStent RashmiPrasad - MarilynWalker + MarilynWalker 10.3115/1218955.1218966 79–86 P04-1011 @@ -123,7 +123,7 @@ User Expertise Modeling and Adaptivity in a Speech-Based <fixed-case>E</fixed-case>-Mail System - KristiinaJokinen + KristiinaJokinen KariKanto 10.3115/1218955.1218967 87–94 @@ -132,7 +132,7 @@ Discriminative Training of a Neural Network Statistical Parser - JamesHenderson + JamesHenderson 10.3115/1218955.1218968 95–102 P04-1013 @@ -141,7 +141,7 @@ Parsing the <fixed-case>WSJ</fixed-case> Using <fixed-case>CCG</fixed-case> and Log-Linear Models StephenClark - James R.Curran + James R.Curran 10.3115/1218955.1218969 103–110 P04-1014 @@ -149,7 +149,7 @@ Incremental Parsing with the Perceptron Algorithm - MichaelCollins + MichaelCollins BrianRoark 10.3115/1218955.1218970 111–118 @@ -170,8 +170,8 @@ Improving Pronoun Resolution by Incorporating Coreferential Information of Candidates XiaofengYang JianSu - GuodongZhou - Chew-LimTan + GuodongZhou + Chew-LimTan 10.3115/1218955.1218972 127–134 P04-1017 @@ -179,11 +179,11 @@ A Mention-Synchronous Coreference Resolution Algorithm Based On the Bell Tree - XiaoqiangLuo - AbeIttycheriah - HongyanJing - NandaKambhatla - SalimRoukos + XiaoqiangLuo + AbeIttycheriah + HongyanJing + NandaKambhatla + SalimRoukos 10.3115/1218955.1218973 135–142 P04-1018 @@ -191,10 +191,10 @@ Learning to Resolve Bridging References - MassimoPoesio + MassimoPoesio RahulMehta AxelMaroudas - JanetHitzeman + JanetHitzeman 10.3115/1218955.1218974 143–150 P04-1019 @@ -220,7 +220,7 @@ Collocation Translation Acquisition Using Monolingual Corpora - Yajuan + Yajuan MingZhou 10.3115/1218955.1218977 167–174 @@ -249,7 +249,7 @@ Extracting Regulatory Gene Expression Networks From Pubmed JasminSaric - Lars J.Jensen + Lars J.Jensen PeerBork RossitzaOuzounova IsabelRojas @@ -260,7 +260,7 @@ Linguistic Profiling for Authorship Recognition and Verification - Hansvan Halteren + Hansvan Halteren 10.3115/1218955.1218981 199–206 P04-1026 @@ -268,10 +268,10 @@ An Empirical Study of Information Synthesis Task - EnriqueAmigo + EnriqueAmigo JulioGonzalo - VictorPeinado - AnselmoPeñas + VictorPeinado + AnselmoPeñas FelisaVerdejo 10.3115/1218955.1218982 207–214 @@ -280,7 +280,7 @@ Mining Metalinguistic Activity in Corpora to Create Lexical Resources Using Information Extraction Techniques: the <fixed-case>MOP</fixed-case> System - Carlos RodriguezPenagos + Carlos RodriguezPenagos 10.3115/1218955.1218983 215–222 P04-1028 @@ -327,7 +327,7 @@ Learning with Unlabeled Data for Text Categorization Using a Bootstrapping and a Feature Projection Technique YoungjoongKo - JungyunSeo + JungyunSeo 10.3115/1218955.1218988 255–262 P04-1033 @@ -354,10 +354,10 @@ Finding Predominant Word Senses in Untagged Text - DianaMcCarthy + DianaMcCarthy RobKoeling JulieWeeds - JohnCarroll + JohnCarroll 10.3115/1218955.1218991 279–286 Best Paper @@ -377,7 +377,7 @@ <fixed-case>C</fixed-case>hinese Verb Sense Discrimination Using an <fixed-case>EM</fixed-case> Clustering Model with Rich Linguistic Features JinyingChen - MarthaPalmer + MarthaPalmer 10.3115/1218955.1218993 295–302 P04-1038 @@ -385,7 +385,7 @@ Relieving the data Acquisition Bottleneck in Word Sense Disambiguation - MonaDiab + MonaDiab 10.3115/1218955.1218994 303–310 P04-1039 @@ -394,7 +394,7 @@ Enriching the Output of a Parser Using Memory-based Learning ValentinJijkoun - Maartende Rijke + Maartende Rijke 10.3115/1218955.1218995 311–318 P04-1040 @@ -405,7 +405,7 @@ AoifeCahill MichaelBurke RuthO’Donovan - Josefvan Genabith + Josefvan Genabith AndyWay 10.3115/1218955.1218996 319–326 @@ -414,8 +414,8 @@ Deep Dependencies from Context-Free Statistical Parsers: Correcting the Surface Dependency Approximation - RogerLevy - ChristopherManning + RogerLevy + ChristopherManning 10.3115/1218955.1218997 327–334 P04-1042 @@ -440,8 +440,8 @@ Predicting Student Emotions in Computer-Human Tutoring Dialogues - Diane J.Litman - KateForbes-Riley + Diane J.Litman + KateForbes-Riley 10.3115/1218955.1219000 351–358 P04-1045 @@ -460,7 +460,7 @@ RuthO’Donovan MichaelBurke AoifeCahill - Josefvan Genabith + Josefvan Genabith AndyWay 10.3115/1218955.1219002 367–374 @@ -470,7 +470,7 @@ Inducing Frame Semantic Verb Classes from <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et and <fixed-case>LDOCE</fixed-case> RebeccaGreen - Bonnie J.Dorr + Bonnie J.Dorr PhilipResnik 10.3115/1218955.1219003 375–382 @@ -489,9 +489,9 @@ Evaluating Centering-Based Metrics of Coherence NikiforosKaramanis - MassimoPoesio - ChrisMellish - JonOberlander + MassimoPoesio + ChrisMellish + JonOberlander 10.3115/1218955.1219005 391–398 P04-1050 @@ -520,7 +520,7 @@ Discovering Relations among Named Entities from Large Corpora TakaakiHasegawa SatoshiSekine - RalphGrishman + RalphGrishman 10.3115/1218955.1219008 415–422 P04-1053 @@ -529,7 +529,7 @@ Dependency Tree Kernels for Relation Extraction AronCulotta - JeffreySorensen + JeffreySorensen 10.3115/1218955.1219009 423–429 P04-1054 @@ -538,7 +538,7 @@ Classifying Semantic Relations in Bioscience Texts BarbaraRosario - MartiHearst + MartiHearst 10.3115/1218955.1219010 430–437 P04-1055 @@ -546,8 +546,8 @@ Collective Information Extraction with Relational <fixed-case>M</fixed-case>arkov Networks - RazvanBunescu - RaymondMooney + RazvanBunescu + RaymondMooney 10.3115/1218955.1219011 438–445 P04-1056 @@ -555,7 +555,7 @@ Error Mining for Wide-Coverage Grammar Engineering - Gertjanvan Noord + Gertjanvan Noord 10.3115/1218955.1219012 446–453 P04-1057 @@ -564,7 +564,7 @@ Alternative approaches for Generating Bodies of Grammar Rules GabrielInfante-Lopez - Maartende Rijke + Maartende Rijke 10.3115/1218955.1219013 454–461 P04-1058 @@ -575,7 +575,7 @@ JianfengGao AndiWu MuLi - Chang-NingHuang + Chang-NingHuang HongqiaoLi XinsongXia HaoweiQin @@ -595,7 +595,7 @@ Corpus-Based Induction of Syntactic Structure: Models of Dependency and Constituency DanKlein - ChristopherManning + ChristopherManning 10.3115/1218955.1219016 478–485 P04-1061 @@ -603,8 +603,8 @@ Annealing Techniques For Unsupervised Statistical Language Learning - Noah A.Smith - JasonEisner + Noah A.Smith + JasonEisner 10.3115/1218955.1219017 486–493 P04-1062 @@ -620,9 +620,9 @@ Aligning words using matrix factorisation - CyrilGoutte + CyrilGoutte KenjiYamada - EricGaussier + EricGaussier 10.3115/1218955.1219019 502–509 P04-1064 @@ -631,7 +631,7 @@ <fixed-case>FSA</fixed-case>: An Efficient and Flexible <fixed-case>C</fixed-case>++ Toolkit for Finite State Automata Using On-Demand Computation StephanKanthak - HermannNey + HermannNey 10.3115/1218955.1219020 510–517 P04-1065 @@ -639,7 +639,7 @@ Improving <fixed-case>IBM</fixed-case> Word Alignment Model 1 - Robert C.Moore + Robert C.Moore 10.3115/1218955.1219021 518–525 P04-1066 @@ -647,7 +647,7 @@ A Geometric View on Bilingual Lexicon Extraction from Comparable Corpora - EricGaussier + EricGaussier J.M.Renders I.Matveeva C.Goutte @@ -708,7 +708,7 @@ Question Answering Using Constraint Satisfaction: <fixed-case>QA</fixed-case>-By-Dossier-With-Contraints JohnPrager - JenniferChu-Carroll + JenniferChu-Carroll KrzysztofCzuba 10.3115/1218955.1219028 574–581 @@ -718,7 +718,7 @@ Applying Machine Learning to <fixed-case>C</fixed-case>hinese Temporal Relation Resolution WenjieLi - Kam-FaiWong + Kam-FaiWong GuihongCao ChunfaYuan 10.3115/1218955.1219029 @@ -731,8 +731,8 @@ DanShen JieZhang JianSu - GuodongZhou - Chew-LimTan + GuodongZhou + Chew-LimTan 10.3115/1218955.1219030 589–596 P04-1075 @@ -742,7 +742,7 @@ Weakly Supervised Learning for Cross-document Person Name Disambiguation Supported by Information Extraction ChengNiu WeiLi - Rohini K.Srihari + Rohini K.Srihari 10.3115/1218955.1219031 597–604 P04-1076 @@ -750,8 +750,8 @@ Automatic Evaluation of Machine Translation Quality Using Longest Common Subsequence and Skip-Bigram Statistics - Chin-YewLin - Franz JosefOch + Chin-YewLin + Franz JosefOch 10.3115/1218955.1219032 605–612 P04-1077 @@ -777,9 +777,9 @@ Learning Word Sense With Feature Selection and Order Identification Capabilities - Zheng-YuNiu - Dong-HongJi - Chew-LimTan + Zheng-YuNiu + Dong-HongJi + Chew-LimTan 10.3115/1218955.1219035 629–636 P04-1080 @@ -824,9 +824,9 @@ Identifying Agreement and Disagreement in Conversational Speech: Use of <fixed-case>B</fixed-case>ayesian Networks to Model Pragmatic Dependencies MichelGalley - KathleenMcKeown - JuliaHirschberg - ElizabethShriberg + KathleenMcKeown + JuliaHirschberg + ElizabethShriberg 10.3115/1218955.1219040 669–676 P04-1085 @@ -834,7 +834,7 @@ Using Conditional Random Fields to Predict Pitch Accents in Conversational Speech - MichelleGregory + MichelleGregory YaseminAltun 10.3115/1218955.1219041 677–683 @@ -889,7 +889,7 @@ Searching for Topics in a Large Collection of Texts MartinHolub - JiříSemecký + JiříSemecký JiříDiviš 13–18 P04-2003 @@ -976,7 +976,7 @@ <fixed-case>T</fixed-case>rans<fixed-case>T</fixed-case>ype2 - An Innovative Computer-Assisted Translation System JoséEsteban JoséLorenzo - Antonio S.Valderrábanos + Antonio S.Valderrábanos GuyLapalme 94–97 P04-3001 @@ -1000,10 +1000,10 @@ Subsentential Translation Memory for Computer Assisted Writing and Translation - Jian-ChengWu + Jian-ChengWu Thomas C.Chuang Wen-ChiShei - Jason S.Chang + Jason S.Chang 106–109 P04-3004 wu-etal-2004-subsentential @@ -1034,7 +1034,7 @@ Interactive grammar development with <fixed-case>WCDG</fixed-case> - Kilian A.Foth + Kilian A.Foth MichaelDaum WolfgangMenzel 122–125 @@ -1043,7 +1043,7 @@ Wide Coverage Symbolic Surface Realization - CharlesCallaway + CharlesCallaway 126–129 P04-3009 callaway-2004-wide @@ -1051,7 +1051,7 @@ Part-of-Speech Tagging Considering Surface Form for an Agglutinative Language Do-GilLee - Hae-ChangRim + Hae-ChangRim 130–133 P04-3010 lee-rim-2004-part @@ -1066,21 +1066,21 @@ Corpus representativeness for syntactic information acquisition - NúriaBel + NúriaBel 138–141 P04-3012 bel-2004-corpus Exploiting Unannotated Corpora for Tagging and Chunking - Rie KubotaAndo + Rie KubotaAndo 142–145 P04-3013 ando-2004-exploiting Improving Bitext Word Alignments via Syntax-based Reordering of <fixed-case>E</fixed-case>nglish - Elliott FrancoDrabek + Elliott FrancoDrabek DavidYarowsky 146–149 P04-3014 @@ -1097,10 +1097,10 @@ Knowledge intensive e-mail summarization in <fixed-case>CARPANTA</fixed-case> - LauraAlonso - IreneCastellón - BernardinoCasas - LluísPadró + LauraAlonso + IreneCastellón + BernardinoCasas + LluísPadró 154–157 P04-3016 alonso-etal-2004-knowledge @@ -1108,48 +1108,48 @@ Finding Anchor Verbs for Biomedical <fixed-case>IE</fixed-case> Using Predicate-Argument Structures AkaneYakushiji - YukaTateisi + YukaTateisi YusukeMiyao - Jun’ichiTsujii + Jun’ichiTsujii 158–161 P04-3017 yakushiji-etal-2004-finding Resource Analysis for Question Answering - Lucian VladLita + Lucian VladLita Warren A.Hunt - EricNyberg + EricNyberg 162–165 P04-3018 lita-etal-2004-resource <fixed-case>TANGO</fixed-case>: Bilingual Collocational Concordancer - Jia-YanJian + Jia-YanJian Yu-ChiaChang - Jason S.Chang + Jason S.Chang 166–169 P04-3019 jian-etal-2004-tango Graph-based Ranking Algorithms for Sentence Extraction, Applied to Text Summarization - RadaMihalcea + RadaMihalcea 170–173 P04-3020 mihalcea-2004-graph Compiling Boostexter Rules into a Finite-state Transducer - SrinivasBangalore + SrinivasBangalore 174–177 P04-3021 bangalore-2004-compiling Combining Lexical, Syntactic, and Semantic Features with Maximum Entropy Models for Information Extraction - NandaKambhatla + NandaKambhatla 178–181 P04-3022 kambhatla-2004-combining @@ -1185,8 +1185,8 @@ Automatic clustering of collocation for detecting practical sense boundary - SaimShin - Key-SunChoi + SaimShin + Key-SunChoi 198–201 P04-3027 shin-choi-2004-automatic @@ -1194,7 +1194,7 @@ Co-training for Predicting Emotions with Spoken Dialogue Data BeatrizMaeireizo - DianeLitman + DianeLitman RebeccaHwa 202–205 P04-3028 @@ -1211,7 +1211,7 @@ Wysiwym with wider coverage RichardPower - RogerEvans + RogerEvans 210–213 P04-3030 power-evans-2004-wysiwym @@ -1226,17 +1226,17 @@ <fixed-case>D</fixed-case>yna: A Language for Weighted Dynamic Programming - JasonEisner + JasonEisner EricGoldlust - Noah A.Smith + Noah A.Smith 218–221 P04-3032 eisner-etal-2004-dyna <fixed-case>MATCH</fixed-case>kiosk: A Multimodal Interactive City Guide - MichaelJohnston - SrinivasBangalore + MichaelJohnston + SrinivasBangalore 222–225 P04-3033 johnston-bangalore-2004-matchkiosk @@ -1245,7 +1245,7 @@ Fragments and Text Categorization JanBlaták EvaMráková - LubosPopelínsky + LubosPopelínsky 226–229 P04-3034 blatak-etal-2004-fragments diff --git a/data/xml/P05.xml b/data/xml/P05.xml index 8930a297d5..420d78d3d0 100644 --- a/data/xml/P05.xml +++ b/data/xml/P05.xml @@ -19,7 +19,7 @@ A High-Performance Semi-Supervised Learning Method for Text Chunking - RieAndo + RieAndo TongZhang 1–9 P05-1001 @@ -28,8 +28,8 @@ Scaling Conditional Random Fields Using Error-Correcting Codes - TrevorCohn - AndrewSmith + TrevorCohn + AndrewSmith MilesOsborne 10–17 P05-1002 @@ -38,8 +38,8 @@ Logarithmic Opinion Pools for Conditional Random Fields - AndrewSmith - TrevorCohn + AndrewSmith + TrevorCohn MilesOsborne 18–25 P05-1003 @@ -48,7 +48,7 @@ Supersense Tagging of Unknown Nouns Using Semantic Similarity - JamesCurran + JamesCurran 26–33 P05-1004 10.3115/1219840.1219844 @@ -65,8 +65,8 @@ The Role of Semantic Roles in Disambiguating Verb Senses - Hoa TrangDang - MarthaPalmer + Hoa TrangDang + MarthaPalmer 42–49 P05-1006 10.3115/1219840.1219846 @@ -77,8 +77,8 @@ BarbaraDi Eugenio DavideFossati DanYu - SusanHaller - MichaelGlass + SusanHaller + MichaelGlass 50–57 P05-1007 10.3115/1219840.1219847 @@ -86,8 +86,8 @@ Empirically-based Control of Natural Language Generation - Daniel S.Paiva - RogerEvans + Daniel S.Paiva + RogerEvans 58–65 P05-1008 10.3115/1219840.1219848 @@ -106,7 +106,7 @@ Probabilistic <fixed-case>CFG</fixed-case> with Latent Annotations TakuyaMatsuzaki YusukeMiyao - Jun’ichiTsujii + Jun’ichiTsujii 75–82 P05-1010 10.3115/1219840.1219850 @@ -115,7 +115,7 @@ Probabilistic Disambiguation Models for Wide-Coverage <fixed-case>HPSG</fixed-case> Parsing YusukeMiyao - Jun’ichiTsujii + Jun’ichiTsujii 83–90 P05-1011 10.3115/1219840.1219851 @@ -142,7 +142,7 @@ The Distributional Inclusion Hypotheses and Lexical Entailment - MaayanGeffet + MaayanGeffet IdoDagan 107–114 P05-1014 @@ -170,7 +170,7 @@ Extracting Semantic Orientations of Words using Spin Model HiroyaTakamura TakashiInui - ManabuOkumura + ManabuOkumura 133–140 P05-1017 10.3115/1219840.1219857 @@ -205,7 +205,7 @@ Improving Pronoun Resolution Using Statistics-Based Semantic Compatibility Information XiaofengYang JianSu - Chew LimTan + Chew LimTan 165–172 P05-1021 10.3115/1219840.1219861 @@ -222,7 +222,7 @@ Data-Defined Kernels for Parse Reranking Derived from Probabilistic Models - JamesHenderson + JamesHenderson IvanTitov 181–188 P05-1023 @@ -231,7 +231,7 @@ Boosting-based Parse Reranking with Subtree Features - TakuKudo + TakuKudo JunSuzuki HidekiIsozaki 189–196 @@ -242,7 +242,7 @@ Automatic Measurement of Syntactic Development in Child Language KenjiSagae - AlonLavie + AlonLavie BrianMacWhinney 197–204 P05-1025 @@ -251,10 +251,10 @@ Experiments with Interactive Question-Answering - SandaHarabagiu + SandaHarabagiu AndrewHickl JohnLehmann - DanMoldovan + DanMoldovan 205–214 P05-1026 10.3115/1219840.1219866 @@ -271,10 +271,10 @@ Exploring and Exploiting the Limited Utility of Captions in Recognizing Intention in Information Graphics StephanieElzer - SandraCarberry + SandraCarberry DanielChester SenizDemir - NancyGreen + NancyGreen IngridZukerman KeithTrnka 223–230 @@ -285,7 +285,7 @@ Scaling up from Dialogue to Multilogue: Some Principles and Benchmarks JonathanGinzburg - RaquelFernández + RaquelFernández 231–238 P05-1029 10.3115/1219840.1219869 @@ -294,7 +294,7 @@ Implications for Generating Clarification Requests in Task-Oriented Dialogues VerenaRieser - JohannaMoore + JohannaMoore 239–246 P05-1030 10.3115/1219840.1219870 @@ -339,9 +339,9 @@ <fixed-case>QARLA</fixed-case>: A Framework for the Evaluation of Text Summarization Systems - EnriqueAmigó + EnriqueAmigó JulioGonzalo - AnselmoPeñas + AnselmoPeñas FelisaVerdejo 280–289 P05-1035 @@ -360,7 +360,7 @@ Digesting Virtual “Geek” Culture: The Summarization of Technical <fixed-case>I</fixed-case>nternet Relay Chats LiangZhou - EduardHovy + EduardHovy 298–305 P05-1037 10.3115/1219840.1219877 @@ -386,7 +386,7 @@ Detecting Errors in Discontinuous Structural Annotation MarkusDickinson - W. DetmarMeurers + W. DetmarMeurers 322–329 P05-1040 10.3115/1219840.1219880 @@ -406,7 +406,7 @@ A Dynamic <fixed-case>B</fixed-case>ayesian Framework to Model Context and Memory in Edit Distance Learning: An Application to Pronunciation Classification KarimFilali - JeffBilmes + JeffBilmes 338–345 P05-1042 10.3115/1219840.1219882 @@ -422,8 +422,8 @@ Contrastive Estimation: Training Log-Linear Models on Unlabeled Data - Noah A.Smith - JasonEisner + Noah A.Smith + JasonEisner 354–362 P05-1044 10.3115/1219840.1219884 @@ -431,9 +431,9 @@ Incorporating Non-local Information into Information Extraction Systems by <fixed-case>G</fixed-case>ibbs Sampling - Jenny RoseFinkel + Jenny RoseFinkel TrondGrenager - ChristopherManning + ChristopherManning 363–370 P05-1045 10.3115/1219840.1219885 @@ -443,7 +443,7 @@ Unsupervised Learning of Field Segmentation Models for Information Extraction TrondGrenager DanKlein - ChristopherManning + ChristopherManning 371–378 P05-1046 10.3115/1219840.1219886 @@ -452,7 +452,7 @@ A Semantic Approach to <fixed-case>IE</fixed-case> Pattern Induction MarkStevenson - MarkGreenwood + MarkGreenwood 379–386 P05-1047 10.3115/1219840.1219887 @@ -469,9 +469,9 @@ Word Sense Disambiguation Using Label Propagation Based Semi-Supervised Learning - Zheng-YuNiu - Dong-HongJi - Chew LimTan + Zheng-YuNiu + Dong-HongJi + Chew LimTan 395–402 P05-1049 10.3115/1219840.1219889 @@ -479,8 +479,8 @@ Domain Kernels for Word Sense Disambiguation - AlfioGliozzo - ClaudioGiuliano + AlfioGliozzo + ClaudioGiuliano CarloStrapparava 403–410 P05-1050 @@ -490,7 +490,7 @@ Improving Name Tagging by Reference Resolution and Relation Detection HengJi - RalphGrishman + RalphGrishman 411–418 P05-1051 10.3115/1219840.1219891 @@ -499,7 +499,7 @@ Extracting Relations with Integrated Information Using Kernel Methods ShubinZhao - RalphGrishman + RalphGrishman 419–426 P05-1052 10.3115/1219840.1219892 @@ -507,7 +507,7 @@ Exploring Various Knowledge in Relation Extraction - GuoDongZhou + GuoDongZhou JianSu JieZhang MinZhang @@ -519,7 +519,7 @@ A Quantitative Analysis of Lexical Differences Between Genders in Telephone Conversations ConstantinosBoulis - MariOstendorf + MariOstendorf 435–442 P05-1054 10.3115/1219840.1219894 @@ -537,9 +537,9 @@ Using Conditional Random Fields for Sentence Boundary Detection in Speech YangLiu - AndreasStolcke - ElizabethShriberg - MaryHarper + AndreasStolcke + ElizabethShriberg + MaryHarper 451–458 P05-1056 10.3115/1219840.1219896 @@ -576,7 +576,7 @@ Multi-Field Information Extraction and Cross-Document Fusion - GideonMann + GideonMann DavidYarowsky 483–490 P05-1060 @@ -590,7 +590,7 @@ SethKulick ScottWinters YangJin - PeteWhite + PeteWhite 491–498 P05-1061 10.3115/1219840.1219901 @@ -608,9 +608,9 @@ Discriminative Syntactic Language Modeling for Speech Recognition - MichaelCollins + MichaelCollins BrianRoark - MuratSaraclar + MuratSaraclar 507–514 P05-1063 10.3115/1219840.1219903 @@ -627,8 +627,8 @@ Reading Level Assessment Using Support Vector Machines and Statistical Language Models - SarahSchwarm - MariOstendorf + SarahSchwarm + MariOstendorf 523–530 P05-1065 10.3115/1219840.1219905 @@ -636,9 +636,9 @@ Clause Restructuring for Statistical Machine Translation - MichaelCollins + MichaelCollins PhilippKoehn - IvonaKučerová + IvonaKučerová 531–540 P05-1066 10.3115/1219840.1219906 @@ -647,7 +647,7 @@ Machine Translation Using Probabilistic Synchronous Dependency Insertion Grammars YuanDing - MarthaPalmer + MarthaPalmer 541–548 P05-1067 10.3115/1219840.1219907 @@ -664,7 +664,7 @@ A Localized Prediction Model for Statistical Machine Translation - ChristophTillmann + ChristophTillmann TongZhang 557–564 P05-1069 @@ -683,7 +683,7 @@ <fixed-case>A</fixed-case>rabic Tokenization, Part-of-Speech Tagging and Morphological Disambiguation in One Fell Swoop NizarHabash - OwenRambow + OwenRambow 573–580 P05-1071 10.3115/1219840.1219911 @@ -691,11 +691,11 @@ Semantic Role Labeling Using Different Syntactic Views - SameerPradhan - WayneWard - KadriHacioglu - JamesMartin - DanielJurafsky + SameerPradhan + WayneWard + KadriHacioglu + JamesMartin + DanielJurafsky 581–588 P05-1072 10.3115/1219840.1219912 @@ -705,7 +705,7 @@ Joint Learning Improves Semantic Role Labeling KristinaToutanova AriaHaghighi - ChristopherManning + ChristopherManning 589–596 P05-1073 10.3115/1219840.1219913 @@ -732,7 +732,7 @@ Automatic Acquisition of Adjectival Subcategorization from Corpora JeremyYallop AnnaKorhonen - TedBriscoe + TedBriscoe 614–621 P05-1076 10.3115/1219840.1219916 @@ -742,7 +742,7 @@ Randomized Algorithms and <fixed-case>NLP</fixed-case>: Using Locality Sensitive Hash Functions for High Speed Noun Clustering DeepakRavichandran PatrickPantel - EduardHovy + EduardHovy 622–629 P05-1077 10.3115/1219840.1219917 @@ -844,14 +844,14 @@ Phrase Linguistic Classification and Generalization for Improving Statistical Machine Translation - Adriàde Gispert + Adriàde Gispert 67–72 P05-2012 de-gispert-2005-phrase Automatic Induction of a <fixed-case>CCG</fixed-case> Grammar for <fixed-case>T</fixed-case>urkish - RukenÇakıcı + RukenÇakıcı 73–78 P05-2013 cakici-2005-automatic @@ -872,14 +872,14 @@ Dependency-Based Statistical Machine Translation - HeidiFox + HeidiFox 91–96 P05-2016 fox-2005-dependency Minimalist Parsing of Subjects Displaced from Embedded Clauses in Free Word Order Languages - Asad B.Sayeed + Asad B.Sayeed 97–102 P05-2017 sayeed-2005-minimalist @@ -901,7 +901,7 @@ Learning Information Structure in the <fixed-case>P</fixed-case>rague <fixed-case>T</fixed-case>reebank - OanaPostolache + OanaPostolache 115–120 P05-2020 postolache-2005-learning @@ -923,7 +923,7 @@ An Unsupervised System for Identifying <fixed-case>E</fixed-case>nglish Inclusions in <fixed-case>G</fixed-case>erman Text - BeatriceAlex + BeatriceAlex 133–138 P05-2023 alex-2005-unsupervised @@ -969,7 +969,7 @@ An Information-State Approach to Collaborative Reference DavidDeVault - NataliaKariaeva + NataliaKariaeva AnubhaKothari IrisOved MatthewStone @@ -1018,7 +1018,7 @@ Descriptive Question Answering in Encyclopedia Hyo-Jung O.Lee Hyeon-JinKim - Myung-GilJang + Myung-GilJang 21–24 P05-3006 10.3115/1225753.1225759 @@ -1026,7 +1026,7 @@ High Throughput Modularized <fixed-case>NLP</fixed-case> System for Clinical Text - SergueiPakhomov + SergueiPakhomov JamesBuntrock PatrickDuffy 25–28 @@ -1036,8 +1036,8 @@ A Voice Enabled Procedure Browser for the International Space Station - MannyRayner - Beth A.Hockey + MannyRayner + Beth A.Hockey NikosChatzichrisafis KimFarrell Jean-MichelRenders @@ -1057,9 +1057,9 @@ Learning Source-Target Surface Patterns for Web-based Terminology Translation - Jian-ChengWu + Jian-ChengWu TracyLin - Jason S.Chang + Jason S.Chang 37–40 P05-3010 10.3115/1225753.1225763 @@ -1076,10 +1076,10 @@ Multimodal Generation in the <fixed-case>COMIC</fixed-case> Dialogue System - Mary E.Foster - MichaelWhite - AndreaSetzer - RobertaCatizone + Mary E.Foster + MichaelWhite + AndreaSetzer + RobertaCatizone 45–48 P05-3012 10.3115/1225753.1225765 @@ -1087,7 +1087,7 @@ Language Independent Extractive Summarization - RadaMihalcea + RadaMihalcea 49–52 P05-3013 10.3115/1225753.1225766 @@ -1095,8 +1095,8 @@ <fixed-case>S</fixed-case>ense<fixed-case>L</fixed-case>earner: Word Sense Disambiguation for All Words in Unrestricted Text - RadaMihalcea - AndrasCsomai + RadaMihalcea + AndrasCsomai 53–56 P05-3014 10.3115/1225753.1225767 @@ -1124,10 +1124,10 @@ Supporting Annotation Layers for Natural Language Processing - PreslavNakov - ArielSchwartz + PreslavNakov + ArielSchwartz BrianWolf - MartiHearst + MartiHearst 65–68 P05-3017 10.3115/1225753.1225770 @@ -1164,13 +1164,13 @@ Automating Temporal Annotation with <fixed-case>TARSQI</fixed-case> MarcVerhagen InderjeetMani - RoserSauri + RoserSauri JessicaLittman RobertKnippen - Seok B.Jang + Seok B.Jang AnnaRumshisky JohnPhillips - JamesPustejovsky + JamesPustejovsky 81–84 P05-3021 10.3115/1225753.1225774 @@ -1178,15 +1178,15 @@ Two Diverse Systems Built using Generic Components for Spoken Dialogue (Recent Progress on <fixed-case>TRIPS</fixed-case>) - JamesAllen + JamesAllen GeorgeFerguson - AmandaStent - ScottStoness - MarySwift + AmandaStent + ScottStoness + MarySwift LucianGalescu - NathanChambers - EllenCampana - GregoryAist + NathanChambers + EllenCampana + GregoryAist 85–88 P05-3022 10.3115/1225753.1225775 @@ -1194,16 +1194,16 @@ <fixed-case>T</fixed-case>ransonics: A Practical Speech-to-Speech Translator for <fixed-case>E</fixed-case>nglish-<fixed-case>F</fixed-case>arsi Medical Dialogs - RobertBelvin + RobertBelvin EmilEttelaie SudeepGandhe - PanayiotisGeorgiou + PanayiotisGeorgiou KevinKnight DanielMarcu ScottMillward - ShrikanthNarayanan + ShrikanthNarayanan HowardNeely - DavidTraum + DavidTraum 89–92 P05-3023 10.3115/1225753.1225776 @@ -1211,7 +1211,7 @@ The Wild Thing - KenChurch + KenChurch BoThiesson 93–96 P05-3024 @@ -1231,7 +1231,7 @@ Multi-Engine Machine Translation Guided by Explicit Word Matching ShyamsundarJayaraman - AlonLavie + AlonLavie 101–104 P05-3026 10.3115/1225753.1225779 @@ -1248,7 +1248,7 @@ A Flexible Stand-Off Data Model with Query Language for Multi-Level Annotation - ChristophMueller + ChristophMueller 109–112 P05-3028 10.3115/1225753.1225781 diff --git a/data/xml/P06.xml b/data/xml/P06.xml index 4e0d3156dc..472b60fb84 100644 --- a/data/xml/P06.xml +++ b/data/xml/P06.xml @@ -4,8 +4,8 @@ Proceedings of the 21st International Conference on Computational Linguistics and 44th Annual Meeting of the Association for Computational Linguistics P06-1 - NicolettaCalzolari - ClaireCardie + NicolettaCalzolari + ClaireCardie PierreIsabelle Association for Computational Linguistics
Sydney, Australia
@@ -29,8 +29,8 @@
Going Beyond <fixed-case>AER</fixed-case>: An Extensive Analysis of Word Alignments and Their Impact on <fixed-case>MT</fixed-case> - Necip FazilAyan - Bonnie J.Dorr + Necip FazilAyan + Bonnie J.Dorr 9–16 P06-1002 10.3115/1220175.1220177 @@ -40,7 +40,7 @@ Unsupervised Topic Modelling for Multi-Party Spoken Discourse MatthewPurver Konrad P.Körding - Thomas L.Griffiths + Thomas L.Griffiths Joshua B.Tenenbaum 17–24 P06-1003 @@ -69,7 +69,7 @@ Kernel-Based Pronoun Resolution with Structured Syntactic Knowledge XiaofengYang JianSu - Chew LimTan + Chew LimTan 41–48 P06-1006 10.3115/1220175.1220181 @@ -96,8 +96,8 @@ Discriminative Word Alignment with Conditional Random Fields - PhilBlunsom - TrevorCohn + PhilBlunsom + TrevorCohn 65–72 P06-1009 10.3115/1220175.1220184 @@ -105,9 +105,9 @@ Named Entity Transliteration with Comparable Corpora - RichardSproat + RichardSproat TaoTao - ChengXiangZhai + ChengXiangZhai 73–80 P06-1010 10.3115/1220175.1220185 @@ -115,7 +115,7 @@ Extracting Parallel Sub-Sentential Fragments from Non-Parallel Corpora - Dragos StefanMunteanu + Dragos StefanMunteanu DanielMarcu 81–88 P06-1011 @@ -160,7 +160,7 @@ Modeling Commonality among Related Classes in Relation Extraction - GuoDongZhou + GuoDongZhou JianSu MinZhang 121–128 @@ -171,9 +171,9 @@ Relation Extraction Using Label Propagation Based Semi-Supervised Learning JinxiuChen - DonghongJi - Chew LimTan - ZhengyuNiu + DonghongJi + Chew LimTan + ZhengyuNiu 129–136 P06-1017 10.3115/1220175.1220192 @@ -189,7 +189,7 @@ Partially Specified Signatures: A Vehicle for Grammar Modularity - YaelCohen-Sygal + YaelCohen-Sygal ShulyWintner 145–152 P06-1019 @@ -198,7 +198,7 @@ Morphology-Syntax Interface for <fixed-case>T</fixed-case>urkish <fixed-case>LFG</fixed-case> - ÖzlemÇetinoğlu + ÖzlemÇetinoğlu KemalOflazer 153–160 P06-1020 @@ -207,16 +207,16 @@ <fixed-case>PCFG</fixed-case>s with Syntactic and Prosodic Indicators of Speech Repairs - JohnHale + JohnHale IzhakShafran LisaYung - Bonnie J.Dorr - MaryHarper + Bonnie J.Dorr + MaryHarper AnnaKrasnyanskaya MatthewLease YangLiu BrianRoark - MatthewSnover + MatthewSnover RobinStewart 161–168 P06-1021 @@ -227,7 +227,7 @@ Dependency Parsing of <fixed-case>J</fixed-case>apanese Spoken Monologue Based on Clause Boundaries TomohiroOhno ShigekiMatsubara - HidekiKashioka + HidekiKashioka TakehikoMaruyama YasuyoshiInagaki 169–176 @@ -255,7 +255,7 @@ Dependencies between Student State and Speech Recognition Problems in Spoken Tutoring Dialogues MihaiRotaru - Diane J.Litman + Diane J.Litman 193–200 P06-1025 10.3115/1220175.1220200 @@ -263,9 +263,9 @@ Learning the Structure of Task-Driven Human-Human Dialogs - SrinivasBangalore - GiuseppeDi Fabbrizio - AmandaStent + SrinivasBangalore + GiuseppeDi Fabbrizio + AmandaStent 201–208 P06-1026 10.3115/1220175.1220201 @@ -326,7 +326,7 @@ Correcting <fixed-case>ESL</fixed-case> Errors Using Phrasal <fixed-case>SMT</fixed-case> Techniques ChrisBrockett - William B.Dolan + William B.Dolan MichaelGamon 249–256 P06-1032 @@ -347,7 +347,7 @@ Learning to Generate Naturalistic Utterances Using Reviews in Spoken Dialogue Systems RyuichiroHigashinaka RashmiPrasad - Marilyn A.Walker + Marilyn A.Walker 265–272 P06-1034 10.3115/1220175.1220209 @@ -355,7 +355,7 @@ Measuring Language Divergence by Intra-Lexical Comparison - T. MarkEllison + T. MarkEllison SimonKirby 273–280 P06-1035 @@ -373,7 +373,7 @@ Guiding a Constraint Dependency Parser with Supertags - Kilian A.Foth + Kilian A.Foth TomasBy WolfgangMenzel 289–296 @@ -392,7 +392,7 @@ <fixed-case>B</fixed-case>ayesian Query-Focused Summarization - HalDaumé III + HalDaumé III DanielMarcu 305–312 P06-1039 @@ -401,7 +401,7 @@ Expressing Implicit Semantic Relations without Supervision - Peter D.Turney + Peter D.Turney 313–320 P06-1040 10.3115/1220175.1220215 @@ -409,7 +409,7 @@ Hybrid Parsing: Using Probabilistic Models as Predictors for a Symbolic Parser - Kilian A.Foth + Kilian A.Foth WolfgangMenzel 321–328 P06-1041 @@ -418,8 +418,8 @@ Error Mining in Parsing Results - BenoîtSagot - Éricde la Clergerie + BenoîtSagot + Éricde la Clergerie 329–336 P06-1042 10.3115/1220175.1220217 @@ -458,7 +458,7 @@ Scaling Distributional Similarity to Large Corpora JamesGorman - James R.Curran + James R.Curran 361–368 P06-1046 10.3115/1220175.1220221 @@ -468,7 +468,7 @@ Extractive Summarization using Inter- and Intra- Event Relevance WenjieLi MingliWu - QinLu + QinLu WeiXu ChunfaYuan 369–376 @@ -488,7 +488,7 @@ A Bottom-Up Approach to Sentence Ordering for Multi-Document Summarization DanushkaBollegala - NaoakiOkazaki + NaoakiOkazaki MitsuruIshizuka 385–392 P06-1049 @@ -498,8 +498,8 @@ Learning Event Durations from Event Descriptions FengPan - RutuMulkar - Jerry R.Hobbs + RutuMulkar + Jerry R.Hobbs 393–400 P06-1050 10.3115/1220175.1220225 @@ -507,7 +507,7 @@ Automatic Learning of Textual Entailments with Cross-Pair Similarities - Fabio MassimoZanzotto + Fabio MassimoZanzotto AlessandroMoschitti 401–408 P06-1051 @@ -557,7 +557,7 @@ Semi-Supervised Learning of Partial Cognates Using Bilingual Bootstrapping OanaFrunza - DianaInkpen + DianaInkpen 441–448 P06-1056 10.3115/1220175.1220231 @@ -567,7 +567,7 @@ Direct Word Sense Matching for Lexical Substitution IdoDagan OrenGlickman - AlfioGliozzo + AlfioGliozzo EfratMarmorshtein CarloStrapparava 449–456 @@ -579,7 +579,7 @@ An Equivalent Pseudoword Solution to <fixed-case>C</fixed-case>hinese Word Sense Disambiguation ZhimaoLu HaifengWang - JianminYao + JianminYao TingLiu ShengLi 457–464 @@ -592,7 +592,7 @@ DaisukeOkanohara YusukeMiyao YoshimasaTsuruoka - Jun’ichiTsujii + Jun’ichiTsujii 465–472 P06-1059 10.3115/1220175.1220234 @@ -600,9 +600,9 @@ Factorizing Complex Models: A Case Study in Mention Detection - RaduFlorian - HongyanJing - NandaKambhatla + RaduFlorian + HongyanJing + NandaKambhatla ImedZitouni 473–480 P06-1060 @@ -633,7 +633,7 @@ <fixed-case>Q</fixed-case>uestion<fixed-case>B</fixed-case>ank: Creating a Corpus of Parse-Annotated Questions JohnJudge AoifeCahill - Josefvan Genabith + Josefvan Genabith 497–504 P06-1063 10.3115/1220175.1220238 @@ -649,8 +649,8 @@ Improved Discriminative Bilingual Word Alignment - Robert C.Moore - Wen-tauYih + Robert C.Moore + Wen-tauYih AndreasBode 513–520 P06-1065 @@ -659,7 +659,7 @@ Maximum Entropy Based Phrase Reordering Model for Statistical Machine Translation - DeyiXiong + DeyiXiong QunLiu ShouxunLin 521–528 @@ -669,7 +669,7 @@ Distortion Models for Statistical Machine Translation - YaserAl-Onaizan + YaserAl-Onaizan KishorePapineni 529–536 P06-1067 @@ -679,7 +679,7 @@ A Study on Automatically Extracted Keywords in Text Categorization AnetteHulth - Beáta B.Megyesi + Beáta B.Megyesi 537–544 P06-1068 10.3115/1220175.1220243 @@ -697,7 +697,7 @@ Exploiting Comparable Corpora and Bilingual Dictionaries for Cross-Language Text Categorization - AlfioGliozzo + AlfioGliozzo CarloStrapparava 553–560 P06-1070 @@ -716,8 +716,8 @@ Annealing Structural Bias in Multilingual Weighted Grammar Induction - Noah A.Smith - JasonEisner + Noah A.Smith + JasonEisner 569–576 P06-1072 10.3115/1220175.1220247 @@ -726,8 +726,8 @@ Maximum Entropy Based Restoration of <fixed-case>A</fixed-case>rabic Diacritics ImedZitouni - Jeffrey S.Sorensen - RuhiSarikaya + Jeffrey S.Sorensen + RuhiSarikaya 577–584 P06-1073 10.3115/1220175.1220248 @@ -738,7 +738,7 @@ YuanhuaLv LeSun JunlinZhang - Jian-YunNie + Jian-YunNie WanChen WeiZhang 585–592 @@ -787,7 +787,7 @@ Exploiting Syntactic Patterns as Clues in Zero-Anaphora Resolution RyuIida KentaroInui - YujiMatsumoto + YujiMatsumoto 625–632 P06-1079 10.3115/1220175.1220254 @@ -797,7 +797,7 @@ Self-Organizing n-gram Model for Automatic Word Spacing Seong-BaePark Yoon-ShikTae - Se-YoungPark + Se-YoungPark 633–640 P06-1080 10.3115/1220175.1220255 @@ -806,9 +806,9 @@ Concept Unification of Terms in Different Languages for <fixed-case>IR</fixed-case> QingLi - Sung-HyonMyaeng + Sung-HyonMyaeng YunJin - Bo-yeongKang + Bo-yeongKang 641–648 P06-1081 10.3115/1220175.1220256 @@ -816,7 +816,7 @@ Word Alignment in <fixed-case>E</fixed-case>nglish-<fixed-case>H</fixed-case>indi Parallel Corpus Using Recency-Vector Approach: Some Studies - NiladriChatterjee + NiladriChatterjee SaumyaAgrawal 649–656 P06-1082 @@ -844,8 +844,8 @@ Contextual Dependencies in Unsupervised Word Segmentation - SharonGoldwater - Thomas L.Griffiths + SharonGoldwater + Thomas L.Griffiths MarkJohnson 673–680 P06-1085 @@ -855,7 +855,7 @@ <fixed-case>MAGEAD</fixed-case>: A Morphological Analyzer and Generator for the <fixed-case>A</fixed-case>rabic Dialects NizarHabash - OwenRambow + OwenRambow 681–688 P06-1086 10.3115/1220175.1220261 @@ -873,7 +873,7 @@ Multi-Tagging for Lexicalized-Grammar Parsing - James R.Curran + James R.Curran StephenClark DavidVadas 697–704 @@ -884,7 +884,7 @@ Guessing Parts-of-Speech of Unknown Words Using Global Information TetsujiNakagawa - YujiMatsumoto + YujiMatsumoto 705–712 P06-1089 10.3115/1220175.1220264 @@ -903,7 +903,7 @@ A Discriminative Global Training Algorithm for Statistical <fixed-case>MT</fixed-case> - ChristophTillmann + ChristophTillmann TongZhang 721–728 P06-1091 @@ -923,7 +923,7 @@ Automatic Generation of Domain Models for Call-Centers from Noisy Transcriptions ShouryaRoy - L VenkataSubramaniam + L VenkataSubramaniam 737–744 P06-1093 10.3115/1220175.1220268 @@ -931,9 +931,9 @@ Proximity in Context: An Empirically Grounded Computational Model of Proximity for Processing Topological Spatial Expressions - John D.Kelleher - Geert-Jan M.Kruijff - Fintan J.Costello + John D.Kelleher + Geert-Jan M.Kruijff + Fintan J.Costello 745–752 P06-1094 10.3115/1220175.1220269 @@ -943,9 +943,9 @@ Machine Learning of Temporal Relations InderjeetMani MarcVerhagen - BenWellner - Chong MinLee - JamesPustejovsky + BenWellner + Chong MinLee + JamesPustejovsky 753–760 P06-1095 10.3115/1220175.1220270 @@ -954,7 +954,7 @@ An End-to-End Discriminative Approach to Machine Translation PercyLiang - AlexandreBouchard-Côté + AlexandreBouchard-Côté DanKlein BenTaskar 761–768 @@ -964,7 +964,7 @@ Semi-Supervised Training for Statistical Word Alignment - AlexanderFraser + AlexanderFraser DanielMarcu 769–776 P06-1097 @@ -1002,8 +1002,8 @@ Semantic Taxonomy Induction from Heterogenous Evidence RionSnow - DanielJurafsky - Andrew Y.Ng + DanielJurafsky + Andrew Y.Ng 801–808 P06-1101 10.3115/1220175.1220276 @@ -1011,9 +1011,9 @@ Names and Similarities on the Web: Fact Extraction in the Fast Lane - MariusPaşca + MariusPaşca DekangLin - JeffreyBigham + JeffreyBigham AndreiLifchits AlpaJain 809–816 @@ -1035,7 +1035,7 @@ MinZhang JieZhang JianSu - GuoDongZhou + GuoDongZhou 825–832 P06-1104 10.3115/1220175.1220279 @@ -1044,7 +1044,7 @@ <fixed-case>J</fixed-case>apanese Dependency Parsing Using Co-Occurrence Information and a Combination of Case Elements TakeshiAbekawa - ManabuOkumura + ManabuOkumura 833–840 P06-1105 10.3115/1220175.1220280 @@ -1061,9 +1061,9 @@ Discovering Asymmetric Entailment Relations between Verbs Using Selectional Preferences - Fabio MassimoZanzotto + Fabio MassimoZanzotto MarcoPennacchiotti - Maria TeresaPazienza + Maria TeresaPazienza 849–856 P06-1107 10.3115/1220175.1220282 @@ -1072,7 +1072,7 @@ Event Extraction in a Plot Advice Agent HarryHalpin - Johanna D.Moore + Johanna D.Moore 857–864 P06-1108 10.3115/1220175.1220283 @@ -1088,7 +1088,7 @@ Advances in Discriminative Parsing - JosephTurian + JosephTurian I. DanMelamed 873–880 P06-1110 @@ -1116,7 +1116,7 @@ Question Answering with Lexical Chains Propagating Verb Arguments AdrianNovischi - DanMoldovan + DanMoldovan 897–904 P06-1113 10.3115/1220175.1220288 @@ -1124,7 +1124,7 @@ Methods for Using Textual Entailment in Open-Domain Question Answering - SandaHarabagiu + SandaHarabagiu AndrewHickl 905–912 P06-1114 @@ -1133,8 +1133,8 @@ Using String-Kernels for Learning Semantic Parsers - Rohit J.Kate - Raymond J.Mooney + Rohit J.Kate + Raymond J.Mooney 913–920 P06-1115 10.3115/1220175.1220290 @@ -1160,7 +1160,7 @@ Multilingual Legal Terminology on the Jibiki Platform: The <fixed-case>L</fixed-case>ex<fixed-case>ALP</fixed-case> Project - GillesSérasset + GillesSérasset FrancisBrunet-Manquat ElenaChiocchetti 937–944 @@ -1171,9 +1171,9 @@ Leveraging Reusability: Cost-Effective Lexical Acquisition for Large-Scale Ontology Translation G. CraigMurray - Bonnie J.Dorr + Bonnie J.Dorr JimmyLin - JanHajič + JanHajič PavelPecina 945–952 P06-1119 @@ -1183,7 +1183,7 @@ Accurate Collocation Extraction Using a Multilingual Parser VioletaSeretan - EricWehrli + EricWehrli 953–960 P06-1120 10.3115/1220175.1220295 @@ -1233,7 +1233,7 @@ A Phonetic-Based Approach to <fixed-case>C</fixed-case>hinese Chat Text Normalization YunqingXia - Kam-FaiWong + Kam-FaiWong WenjieLi 993–1000 P06-1125 @@ -1269,7 +1269,7 @@ YoshimasaTsuruoka KazuhiroYoshida TakashiNinomiya - Jun’ichiTsujii + Jun’ichiTsujii 1017–1024 P06-1128 10.3115/1220175.1220303 @@ -1279,7 +1279,7 @@ Exploring Distributional Similarity Based Models for Query Spelling Correction MuLi MuhuaZhu - YangZhang + YangZhang MingZhou 1025–1032 P06-1129 @@ -1289,7 +1289,7 @@ Robust <fixed-case>PCFG</fixed-case>-Based Generation Using Automatically Acquired <fixed-case>LFG</fixed-case> Approximations AoifeCahill - Josefvan Genabith + Josefvan Genabith 1033–1040 P06-1130 10.3115/1220175.1220305 @@ -1297,8 +1297,8 @@ Incremental Generation of Spatial Referring Expressions in Situated Dialog - John D.Kelleher - Geert-Jan M.Kruijff + John D.Kelleher + Geert-Jan M.Kruijff 1041–1048 P06-1131 10.3115/1220175.1220306 @@ -1316,7 +1316,7 @@ Are These Documents Written from Different Perspectives? A Test of Different Perspectives Based on Statistical Distribution Divergence Wei-HaoLin - AlexanderHauptmann + AlexanderHauptmann 1057–1064 P06-1133 10.3115/1220175.1220308 @@ -1324,8 +1324,8 @@ Word Sense and Subjectivity - JanyceWiebe - RadaMihalcea + JanyceWiebe + RadaMihalcea 1065–1072 P06-1134 10.3115/1220175.1220309 @@ -1334,8 +1334,8 @@ Improving <fixed-case>QA</fixed-case> Accuracy by Question Inversion JohnPrager - PabloDuboue - JenniferChu-Carroll + PabloDuboue + JenniferChu-Carroll 1073–1080 P06-1135 10.3115/1220175.1220310 @@ -1381,7 +1381,7 @@ Learning to Say It Well: Reranking Realizations by Predicted Synthesis Quality CrystalNakatsu - MichaelWhite + MichaelWhite 1113–1120 P06-1140 10.3115/1220175.1220315 @@ -1390,7 +1390,7 @@ An Effective Two-Stage Model for Exploiting Non-Local Dependencies in Named Entity Recognition VijayKrishnan - Christopher D.Manning + Christopher D.Manning 1121–1128 P06-1141 10.3115/1220175.1220316 @@ -1408,7 +1408,7 @@ <fixed-case>P</fixed-case>unjabi Machine Transliteration - M.G. AbbasMalik + M.G. AbbasMalik 1137–1144 P06-1143 10.3115/1220175.1220318 @@ -1417,8 +1417,8 @@ Multilingual Document Clustering: An Heuristic Approach Based on Cognate Named Entities SotoMontalvo - RaquelMartínez - ArantzaCasillas + RaquelMartínez + ArantzaCasillas VíctorFresno 1145–1152 P06-1144 @@ -1430,7 +1430,7 @@ TaichiNoro TakashiInui HiroyaTakamura - ManabuOkumura + ManabuOkumura 1153–1160 P06-1145 10.3115/1220175.1220320 @@ -1438,7 +1438,7 @@ Optimal Constituent Alignment with Edge Covers for Semantic Projection - SebastianPadó + SebastianPadó MirellaLapata 1161–1168 P06-1146 @@ -1448,7 +1448,7 @@ Utilizing Co-Occurrence of Answers in Question Answering MinWu - TomekStrzalkowski + TomekStrzalkowski 1169–1176 P06-1147 10.3115/1220175.1220322 @@ -1472,11 +1472,11 @@ Using Machine Learning Techniques to Build a Comma Checker for <fixed-case>B</fixed-case>asque - IñakiAlegria + IñakiAlegria BertolArrieta - ArantzaDiaz de Ilarraza + ArantzaDiaz de Ilarraza EliIzagirre - MontseMaritxalar + MontseMaritxalar 1–8 P06-2001 alegria-etal-2006-using @@ -1485,18 +1485,18 @@ A Rote Extractor with Edit Distance-Based Generalisation and Multi-Corpora Precision Calculation EnriqueAlfonseca PabloCastells - ManabuOkumura - MariaRuiz-Casado + ManabuOkumura + MariaRuiz-Casado 9–16 P06-2002 alfonseca-etal-2006-rote <fixed-case>MT</fixed-case> Evaluation: Human-Like vs. Human Acceptable - EnriqueAmigó - JesúsGiménez + EnriqueAmigó + JesúsGiménez JulioGonzalo - LluísMàrquez + LluísMàrquez 17–24 P06-2003 amigo-etal-2006-mt @@ -1504,14 +1504,14 @@ The Effect of Corpus Size in Combining Supervised and Unsupervised Training for Disambiguation MichaelaAtterer - HinrichSchütze + HinrichSchütze 25–32 P06-2004 atterer-schutze-2006-effect A Phrase-Based Statistical Model for <fixed-case>SMS</fixed-case> Text Normalization - AiTiAw + AiTiAw MinZhang JuanXiao JianSu @@ -1521,8 +1521,8 @@ Evaluating the Accuracy of an Unlexicalized Statistical Parser on the <fixed-case>PARC</fixed-case> <fixed-case>D</fixed-case>ep<fixed-case>B</fixed-case>ank - TedBriscoe - JohnCarroll + TedBriscoe + JohnCarroll 41–48 P06-2006 briscoe-carroll-2006-evaluating @@ -1539,8 +1539,8 @@ Towards Conversational <fixed-case>QA</fixed-case>: Automatic Identification of Problematic Situations and User Intent - Joyce Y.Chai - ChenZhang + Joyce Y.Chai + ChenZhang TylerBaldwin 57–64 P06-2008 @@ -1576,9 +1576,9 @@ Unsupervised Relation Disambiguation Using Spectral Clustering JinxiuChen - DonghongJi - Chew LimTan - ZhengyuNiu + DonghongJi + Chew LimTan + ZhengyuNiu 89–96 P06-2012 chen-etal-2006-unsupervised @@ -1626,8 +1626,8 @@ Using Machine-Learning to Assign Function Labels to Parser Output for <fixed-case>S</fixed-case>panish - GrzegorzChrupała - Josefvan Genabith + GrzegorzChrupała + Josefvan Genabith 136–143 P06-2018 chrupala-van-genabith-2006-using @@ -1642,16 +1642,16 @@ Topic-Focused Multi-Document Summarization Using an Approximate Oracle Score - John M.Conroy - Judith D.Schlesinger - Dianne P.O’Leary + John M.Conroy + Judith D.Schlesinger + Dianne P.O’Leary 152–159 P06-2020 conroy-etal-2006-topic Using <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et to Automatically Deduce Relations between Words in Noun-Noun Compounds - Fintan J.Costello + Fintan J.Costello TonyVeale SimonDunne 160–167 @@ -1663,16 +1663,16 @@ CassandreCreswell Matthew J.Beal JohnChen - Thomas L.Cornell + Thomas L.Cornell LarsNilsson - Rohini K.Srihari + Rohini K.Srihari 168–175 P06-2022 creswell-etal-2006-automatically A Bio-Inspired Approach for Multi-Word Expression Extraction - JianyongDuan + JianyongDuan RuzhanLu WeilinWu YiHu @@ -1691,8 +1691,8 @@ A Modified Joint Source-Channel Model for Transliteration AsifEkbal - Sudip KumarNaskar - SivajiBandyopadhyay + Sudip KumarNaskar + SivajiBandyopadhyay 191–198 P06-2025 ekbal-etal-2006-modified @@ -1710,7 +1710,7 @@ Automatic Creation of Domain Templates ElenaFilatova VasileiosHatzivassiloglou - KathleenMcKeown + KathleenMcKeown 207–214 P06-2027 filatova-etal-2006-automatic @@ -1718,16 +1718,16 @@ Using Lexical Dependency and Ontological Knowledge to Improve a Detailed Syntactic and Semantic Tagger of <fixed-case>E</fixed-case>nglish AndrewFinch - EzraBlack + EzraBlack Young-SookHwang - EiichiroSumita + EiichiroSumita 215–222 P06-2028 finch-etal-2006-using The Benefit of Stochastic <fixed-case>PP</fixed-case> Attachment to a Rule-Based Parser - Kilian A.Foth + Kilian A.Foth WolfgangMenzel 223–230 P06-2029 @@ -1760,7 +1760,7 @@ Conceptual Coherence in the Generation of Referring Expressions AlbertGatt - Keesvan Deemter + Keesvan Deemter 255–262 P06-2033 gatt-van-deemter-2006-conceptual @@ -1768,7 +1768,7 @@ Discriminative Reranking for Semantic Parsing RuifangGe - Raymond J.Mooney + Raymond J.Mooney 263–270 P06-2034 ge-mooney-2006-discriminative @@ -1792,8 +1792,8 @@ Low-Cost Enrichment of <fixed-case>S</fixed-case>panish <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et with Automatically Translated Glosses: Combining General and Specialized Models - JesúsGiménez - LluísMàrquez + JesúsGiménez + LluísMàrquez 287–294 P06-2037 gimenez-marquez-2006-low @@ -1801,7 +1801,7 @@ Speeding Up Full Syntactic Parsing by Leveraging Partial Parsing Decisions ElliotGlaysher - DanMoldovan + DanMoldovan 295–300 P06-2038 glaysher-moldovan-2006-speeding @@ -1809,14 +1809,14 @@ Parsing Aligned Parallel Corpus by Projecting Syntactic Relations from Annotated Source Corpus ShaillyGoyal - NiladriChatterjee + NiladriChatterjee 301–308 P06-2039 goyal-chatterjee-2006-parsing Reduced n-gram Models for <fixed-case>E</fixed-case>nglish and <fixed-case>C</fixed-case>hinese Corpora - Le QHa + Le QHa PHanna D WStewart F JSmith @@ -1846,7 +1846,7 @@ Improving <fixed-case>E</fixed-case>nglish Subcategorization Acquisition with Diathesis Alternations as Heuristic Information XiwuHan - TiejunZhao + TiejunZhao XingshangFu 331–336 P06-2043 @@ -1874,7 +1874,7 @@ <fixed-case>J</fixed-case>apanese Idiom Recognition: Drawing a Line between Literal and Idiomatic Meanings ChikaraHashimoto - SatoshiSato + SatoshiSato TakehitoUtsuro 353–360 P06-2046 @@ -1882,7 +1882,7 @@ Graph Branch Algorithm: An Optimum Tree Search Method for Scored Dependency Graph with Arc Co-Occurrence Constraints - HidekiHirakawa + HidekiHirakawa 361–368 P06-2047 hirakawa-2006-graph @@ -1905,7 +1905,7 @@ When Conset Meets Synset: A Preliminary Survey of an Ontological Lexical Resource Based on <fixed-case>C</fixed-case>hinese Characters - Shu-KaiHsieh + Shu-KaiHsieh Chu-RenHuang 385–390 P06-2050 @@ -1934,7 +1934,7 @@ SanazJabbari BenAllison DavidGuthrie - LouiseGuthrie + LouiseGuthrie 407–411 P06-2053 jabbari-etal-2006-towards @@ -1942,7 +1942,7 @@ Exploiting Non-Local Features for Spoken Language Understanding MinwooJeong - Gary GeunbaeLee + Gary GeunbaeLee 412–419 P06-2054 jeong-lee-2006-exploiting @@ -1950,7 +1950,7 @@ Analysis and Repair of Name Tagger Errors HengJi - RalphGrishman + RalphGrishman 420–427 P06-2055 ji-grishman-2006-analysis @@ -1958,7 +1958,7 @@ Unsupervised Segmentation of <fixed-case>C</fixed-case>hinese Text by Use of Branching Entropy ZhihuiJin - KumikoTanaka-Ishii + KumikoTanaka-Ishii 428–435 P06-2056 jin-tanaka-ishii-2006-unsupervised @@ -1989,7 +1989,7 @@ Minority Vote: At-Least-N Voting Improves Recall for Extracting Relations - NandaKambhatla + NandaKambhatla 460–466 P06-2060 kambhatla-2006-minority @@ -1998,7 +1998,7 @@ Integration of Speech to Computer-Assisted Translation Using Finite-State Automata ShahramKhadivi RichardZens - HermannNey + HermannNey 467–474 P06-2061 khadivi-etal-2006-integration @@ -2013,7 +2013,7 @@ Automatic Identification of Pro and Con Reasons in Online Reviews Soo-MinKim - EduardHovy + EduardHovy 483–490 P06-2063 kim-hovy-2006-automatic @@ -2021,7 +2021,7 @@ Interpreting Semantic Relations in Noun Compounds via Verb Semantics Su NamKim - TimothyBaldwin + TimothyBaldwin 491–498 P06-2064 kim-baldwin-2006-interpreting @@ -2078,8 +2078,8 @@ Discriminating Image Senses by Clustering with Multimodal Features NicolasLoeff - Cecilia OvesdotterAlm - David A.Forsyth + Cecilia OvesdotterAlm + David A.Forsyth 547–554 P06-2071 loeff-etal-2006-discriminating @@ -2094,9 +2094,9 @@ Segmented and Unsegmented Dialogue-Act Annotation with Statistical Dialogue Models - Carlos D.Martínez Hinarejos - RamónGranell - José MiguelBenedí + Carlos D.Martínez Hinarejos + RamónGranell + José MiguelBenedí 563–570 P06-2073 martinez-hinarejos-etal-2006-segmented @@ -2105,7 +2105,7 @@ <fixed-case>ARE</fixed-case>: Instance Splitting Strategies for Dependency Relation-Based Information Extraction MstislavMaslennikov Hai-KiatGoh - Tat-SengChua + Tat-SengChua 571–578 P06-2074 maslennikov-etal-2006-instance @@ -2114,7 +2114,7 @@ Integrating Pattern-Based and Distributional Similarity Methods for Lexical Entailment Acquisition ShacharMirkin IdoDagan - MaayanGeffet + MaayanGeffet 579–586 P06-2075 mirkin-etal-2006-integrating @@ -2142,7 +2142,7 @@ An Automatic Method for Summary Evaluation Using Multiple Evaluation Results by a Manual Method HidetsuguNanba - ManabuOkumura + ManabuOkumura 603–610 P06-2078 nanba-okumura-2006-automatic @@ -2158,7 +2158,7 @@ Semantic Parsing with Structured <fixed-case>SVM</fixed-case> Ensemble Classification Models - Le-MinhNguyen + Le-MinhNguyen AkiraShimazu Xuan-HieuPhan 619–626 @@ -2167,7 +2167,7 @@ Whose Thumb Is It Anyway? Classifying Author Personality from Weblog Text - JonOberlander + JonOberlander ScottNowson 627–634 P06-2081 @@ -2182,7 +2182,7 @@ A Term Recognition Approach to Acronym Recognition - NaoakiOkazaki + NaoakiOkazaki SophiaAnaniadou 643–650 P06-2083 @@ -2217,7 +2217,7 @@ PatrickRuch ImadTbahriti JulienGobeill - Alan R.Aronson + Alan R.Aronson 675–682 P06-2087 ruch-etal-2006-argumentative @@ -2234,7 +2234,7 @@ A Best-First Probabilistic Shift-Reduce Parser KenjiSagae - AlonLavie + AlonLavie 691–698 P06-2089 sagae-lavie-2006-best @@ -2243,7 +2243,7 @@ Implementing a Characterization of Genre for Automatic Genre Identification of Web Pages MarinaSantini RichardPower - RogerEvans + RogerEvans 699–706 P06-2090 santini-etal-2006-implementing @@ -2253,7 +2253,7 @@ ManabuSato DaisukeBekki YusukeMiyao - Jun’ichiTsujii + Jun’ichiTsujii 707–714 P06-2091 sato-etal-2006-translating @@ -2268,7 +2268,7 @@ Continuous Space Language Models for Statistical Machine Translation HolgerSchwenk - DanielDechelotte + DanielDechelotte Jean-LucGauvain 723–730 P06-2093 @@ -2285,7 +2285,7 @@ Using Comparable Corpora to Solve Problems Difficult for Human Translators SergeSharoff BogdanBabych - AnthonyHartley + AnthonyHartley 739–746 P06-2095 sharoff-etal-2006-using-comparable @@ -2293,9 +2293,9 @@ Adding Syntax to Dynamic Programming for Aligning Comparable Texts for the Generation of Paraphrases SiweiShen - Dragomir R.Radev + Dragomir R.Radev AgamPatel - GüneşErkan + GüneşErkan 747–754 P06-2096 shen-etal-2006-adding @@ -2311,7 +2311,7 @@ Exact Decoding for Jointly Labeling and Chunking Sequences NobuyukiShimizu - AndrewHaas + AndrewHaas 763–770 P06-2098 shimizu-haas-2006-exact @@ -2328,16 +2328,16 @@ Morphological Richness Offsets Resource Demand – Experiences in Constructing a <fixed-case>POS</fixed-case> Tagger for <fixed-case>H</fixed-case>indi SmritiSingh KuhooGupta - ManishShrivastava - PushpakBhattacharyya + ManishShrivastava + PushpakBhattacharyya 779–786 P06-2100 singh-etal-2006-morphological Minimum Risk Annealing for Training Log-Linear Models - David A.Smith - JasonEisner + David A.Smith + JasonEisner 787–794 P06-2101 smith-eisner-2006-minimum @@ -2345,7 +2345,7 @@ Unsupervised Induction of <fixed-case>M</fixed-case>odern <fixed-case>S</fixed-case>tandard <fixed-case>A</fixed-case>rabic Verb Classes Using Syntactic Frames and <fixed-case>LSA</fixed-case> NealSnider - MonaDiab + MonaDiab 795–802 P06-2102 snider-diab-2006-unsupervised-induction @@ -2361,7 +2361,7 @@ A Comparison of Alternative Parse Tree Paths for Labeling Semantic Roles ReidSwanson - Andrew S.Gordon + Andrew S.Gordon 811–818 P06-2104 swanson-gordon-2006-comparison @@ -2369,7 +2369,7 @@ A Logic-Based Semantic Approach to Recognizing Textual Entailment MartaTatu - DanMoldovan + DanMoldovan 819–826 P06-2105 tatu-moldovan-2006-logic @@ -2379,13 +2379,13 @@ TakenobuTokunaga VirachSornlertlamvanich ThatsaneeCharoenporn - NicolettaCalzolari + NicolettaCalzolari MonicaMonachini ClaudiaSoria Chu-RenHuang - YingJuXia + YingJuXia HaoYu - LaurentPrevot + LaurentPrevot KiyoakiShirai 827–834 P06-2106 @@ -2394,7 +2394,7 @@ Statistical Phrase-Based Models for Interactive Computer-Assisted Translation JesúsTomás - FranciscoCasacuberta + FranciscoCasacuberta 835–841 P06-2107 tomas-casacuberta-2006-statistical @@ -2411,7 +2411,7 @@ YuyaUnno TakashiNinomiya YusukeMiyao - Jun’ichiTsujii + Jun’ichiTsujii 850–857 P06-2109 unno-etal-2006-trimming @@ -2426,8 +2426,8 @@ Finding Synonyms Using Automatic Word Alignment and Measures of Distributional Similarity - Lonnekevan der Plas - JörgTiedemann + Lonnekevan der Plas + JörgTiedemann 866–873 P06-2111 van-der-plas-tiedemann-2006-finding @@ -2446,7 +2446,7 @@ Ye-YiWang AlexAcero MilindMahajan - JohnLee + JohnLee 882–889 P06-2113 wang-etal-2006-combining @@ -2489,7 +2489,7 @@ Aligning Features with Sense Distinction Dimensions NianwenXue JinyingChen - MarthaPalmer + MarthaPalmer 921–928 P06-2118 xue-etal-2006-aligning @@ -2497,7 +2497,7 @@ Word Sense Disambiguation Using Lexical Cohesion in the Context DongqiangYang - David M. W.Powers + David M. W.Powers 929–936 P06-2119 yang-powers-2006-word @@ -2513,7 +2513,7 @@ <fixed-case>HAL</fixed-case>-Based Cascaded Model for Variable-Length Semantic Pattern Induction from Psychiatry Web Resources - Liang-ChihYu + Liang-ChihYu Chung-HsienWu Fong-LinJang 945–952 @@ -2532,7 +2532,7 @@ Subword-Based Tagging for Confidence-Dependent <fixed-case>C</fixed-case>hinese Word Segmentation RuiqiangZhang GenichiroKikui - EiichiroSumita + EiichiroSumita 961–968 P06-2123 zhang-etal-2006-subword-based @@ -2540,7 +2540,7 @@ <fixed-case>B</fixed-case>i<fixed-case>TAM</fixed-case>: Bilingual Topic <fixed-case>A</fixed-case>d<fixed-case>M</fixed-case>ixture Models for Word Alignment BingZhao - Eric P.Xing + Eric P.Xing 969–976 P06-2124 zhao-xing-2006-bitam @@ -2581,14 +2581,14 @@ Unsupervised Part-of-Speech Tagging Employing Efficient Graph Clustering - ChrisBiemann + ChrisBiemann 7–12 P06-3002 biemann-2006-unsupervised Sub-Sentential Alignment Using Substring Co-Occurrence Counts - FabienCromieres + FabienCromieres 13–18 P06-3003 cromieres-2006-sub @@ -2682,7 +2682,7 @@ Proceedings of the COLING/ACL 2006 Interactive Presentation Sessions P06-4 - JamesCurran + JamesCurran Association for Computational Linguistics
Sydney, Australia
July @@ -2698,7 +2698,7 @@ <fixed-case>FAST</fixed-case> – An Automatic Generation System for Grammar Tests Chia-YinChen Hsien-ChinLiou - Jason S.Chang + Jason S.Chang 1–4 P06-4001 10.3115/1225403.1225404 @@ -2706,8 +2706,8 @@
Is It Correct? – Towards Web-Based Evaluation of Automatic Natural Language Phrase Generation - Calkin S.Montero - KenjiAraki + Calkin S.Montero + KenjiAraki 5–8 P06-4002 10.3115/1225403.1225405 @@ -2715,12 +2715,12 @@ <fixed-case>L</fixed-case>e<fixed-case>XF</fixed-case>low: A System for Cross-Fertilization of Computational Lexicons - MaurizioTesconi + MaurizioTesconi AndreaMarchetti FrancescaBertagna MonicaMonachini ClaudiaSoria - NicolettaCalzolari + NicolettaCalzolari 9–12 P06-4003 10.3115/1225403.1225406 @@ -2746,8 +2746,8 @@ KazuhiroYoshida TadayoshiHara Jin-DongKim - YukaTateisi - Jun’ichiTsujii + YukaTateisi + Jun’ichiTsujii 17–20 P06-4005 10.3115/1225403.1225408 @@ -2766,7 +2766,7 @@ AndrewHickl PatrickWang JohnLehmann - SandaHarabagiu + SandaHarabagiu 25–28 P06-4007 10.3115/1225403.1225410 @@ -2775,11 +2775,11 @@ <fixed-case>K</fixed-case>-<fixed-case>QARD</fixed-case>: A Practical <fixed-case>K</fixed-case>orean Question Answering Framework for Restricted Domain Young-InSong - HooJungChung + HooJungChung Kyoung-SooHan - JooYoungLee - Hae-ChangRim - Jae-WonLee + JooYoungLee + Hae-ChangRim + Jae-WonLee 29–32 P06-4008 10.3115/1225403.1225411 @@ -2787,7 +2787,7 @@ An Intermediate Representation for the Interpretation of Temporal Expressions - PawełMazur + PawełMazur RobertDale 33–36 P06-4009 @@ -2805,10 +2805,10 @@ Computational Analysis of Move Structures in Academic Abstracts - Jien-ChenWu + Jien-ChenWu Yu-ChiaChang Hsien-ChinLiou - Jason S.Chang + Jason S.Chang 41–44 P06-4011 10.3115/1225403.1225414 @@ -2816,8 +2816,8 @@ <fixed-case>L</fixed-case>ex<fixed-case>N</fixed-case>et: A Graphical Environment for Graph-Based <fixed-case>NLP</fixed-case> - Dragomir R.Radev - GüneşErkan + Dragomir R.Radev + GüneşErkan AnthonyFader PatrickJordan SiweiShen @@ -2832,8 +2832,8 @@ MaritaAilomaa MiroslavMelichar AgnesLisowska - MartinRajman - SusanArmstrong + MartinRajman + SusanArmstrong 49–52 P06-4013 10.3115/1225403.1225416 @@ -2853,9 +2853,9 @@ TilmanBecker PeterPoller JanSchehl - NateBlaylock + NateBlaylock CiprianGerstenberger - IvanaKruijff-Korbayová + IvanaKruijff-Korbayová 57–60 P06-4015 10.3115/1225403.1225418 @@ -2863,7 +2863,7 @@ <fixed-case>T</fixed-case>wic<fixed-case>P</fixed-case>en: Hand-held Scanner and Translation Software for non-Native Readers - EricWehrli + EricWehrli 61–64 P06-4016 10.3115/1225403.1225419 @@ -2891,7 +2891,7 @@ <fixed-case>O</fixed-case>utilex, a Linguistic Platform for Text Processing OlivierBlanc - MatthieuConstant + MatthieuConstant 73–76 P06-4019 10.3115/1225403.1225422 @@ -2899,8 +2899,8 @@ The Second Release of the <fixed-case>RASP</fixed-case> System - TedBriscoe - JohnCarroll + TedBriscoe + JohnCarroll RebeccaWatson 77–80 P06-4020 diff --git a/data/xml/P07.xml b/data/xml/P07.xml index 8f77114dba..a63d82ec20 100644 --- a/data/xml/P07.xml +++ b/data/xml/P07.xml @@ -5,7 +5,7 @@ Proceedings of the 45th Annual Meeting of the Association of Computational Linguistics P07-1 AnnieZaenen - Antalvan den Bosch + Antalvan den Bosch Association for Computational Linguistics
Prague, Czech Republic
June @@ -19,14 +19,14 @@ Guiding Statistical Word Alignment Models With Prior Knowledge YonggangDeng - YuqingGao + YuqingGao 1–8 P07-1001 deng-gao-2007-guiding A Discriminative Syntactic Word Order Model for Machine Translation - Pi-ChuanChang + Pi-ChuanChang KristinaToutanova 9–16 P07-1002 @@ -43,7 +43,7 @@ Transductive learning for statistical machine translation NicolaUeffing - GholamrezaHaffari + GholamrezaHaffari AnoopSarkar 25–32 P07-1004 @@ -62,7 +62,7 @@ Learning Expressive Models for Word Sense Disambiguation LuciaSpecia MarkStevenson - Maria das GraçasVolpe Nunes + Maria das GraçasVolpe Nunes 41–48 P07-1006 specia-etal-2007-learning @@ -85,7 +85,7 @@ A <fixed-case>B</fixed-case>ayesian Model for Discovering Typological Implications - HalDaumé III + HalDaumé III LyleCampbell 65–72 P07-1009 @@ -94,7 +94,7 @@ A discriminative language model with pseudo-negative samples DaisukeOkanohara - Jun’ichiTsujii + Jun’ichiTsujii 73–80 P07-1010 okanohara-tsujii-2007-discriminative @@ -111,10 +111,10 @@ On the role of context and prosody in the interpretation of ‘okay’ - AgustínGravano - StefanBenus + AgustínGravano + StefanBenus HéctorChávez - JuliaHirschberg + JuliaHirschberg LaurenWilcox 800–807 P07-1101 @@ -123,14 +123,14 @@ Predicting Success in Dialogue DavidReitter - Johanna D.Moore + Johanna D.Moore 808–815 P07-1102 reitter-moore-2007-predicting Resolving It, This, and That in Unrestricted Multi-Party Dialog - ChristophMüller + ChristophMüller 816–823 P07-1103 muller-2007-resolving @@ -148,7 +148,7 @@ Grammar Approximation by Representative Sublanguage: A New Model for Language Learning SmarandaMuresan - OwenRambow + OwenRambow 832–839 P07-1105 muresan-rambow-2007-grammar @@ -192,15 +192,15 @@ GaoCong MingZhou ZhongyangXiong - JohnLee - Chin-YewLin + JohnLee + Chin-YewLin 81–88 P07-1011 sun-etal-2007-detecting Benefits of the ‘Massively Parallel Rosetta Stone’: Cross-Language Information Retrieval with over 30 Languages - PeterChew + PeterChew AhmedAbdelali 872–879 P07-1110 @@ -216,7 +216,7 @@ Automatic Acquisition of Ranked Qualia Structures from the Web - PhilippCimiano + PhilippCimiano JohannaWenderoth 888–895 P07-1112 @@ -234,7 +234,7 @@ Words and Echoes: Assessing and Mitigating the Non-Randomness Problem in Word Frequency Distribution Modeling - MarcoBaroni + MarcoBaroni StefanEvert 904–911 P07-1114 @@ -243,7 +243,7 @@ A System for Large-Scale Acquisition of Verbal, Nominal and Adjectival Subcategorization Frames from Corpora JuditaPreiss - TedBriscoe + TedBriscoe AnnaKorhonen 912–919 P07-1115 @@ -258,7 +258,7 @@ Using Mazurkiewicz Trace Languages for Partition-Based Morphology - FrançoisBarthélemy + FrançoisBarthélemy 928–935 P07-1117 barthelemy-2007-using @@ -267,7 +267,7 @@ Much ado about nothing: A social network model of <fixed-case>R</fixed-case>ussian paradigmatic gaps RobertDaland Andrea D.Sims - JanetPierrehumbert + JanetPierrehumbert 936–943 P07-1118 daland-etal-2007-much @@ -299,7 +299,7 @@ Learning Synchronous Grammars for Semantic Parsing with Lambda Calculus Yuk WahWong - RaymondMooney + RaymondMooney 960–967 P07-1121 wong-mooney-2007-learning @@ -315,9 +315,9 @@ Learning Multilingual Subjective Language via Cross-Lingual Projections - RadaMihalcea + RadaMihalcea CarmenBanea - JanyceWiebe + JanyceWiebe 976–983 P07-1123 mihalcea-etal-2007-learning @@ -333,7 +333,7 @@ Weakly Supervised Learning for Hedge Classification in Scientific Literature BenMedlock - TedBriscoe + TedBriscoe 992–999 P07-1125 medlock-briscoe-2007-weakly @@ -341,7 +341,7 @@ Text Analysis for Automatic Image Annotation KoenDeschacht - Marie-FrancineMoens + Marie-FrancineMoens 1000–1007 P07-1126 deschacht-moens-2007-text @@ -358,17 +358,17 @@ Combining Multiple Knowledge Sources for Dialogue Segmentation in Multimedia Archives Pei-YunHsueh - Johanna D.Moore + Johanna D.Moore 1016–1023 P07-1128 hsueh-moore-2007-combining Topic Analysis for Psychiatric Document Retrieval - Liang-ChihYu + Liang-ChihYu Chung-HsienWu - Chin-YewLin - EduardHovy + Chin-YewLin + EduardHovy Chia-LingLin 1024–1031 P07-1129 @@ -386,7 +386,7 @@ What to be? - Electronic Career Guidance Based on Semantic Relatedness IrynaGurevych - ChristofMüller + ChristofMüller TorstenZesch 1032–1039 P07-1130 @@ -394,9 +394,9 @@ Extracting Social Networks and Biographical Facts From Conversational Speech Transcripts - HongyanJing - NandaKambhatla - SalimRoukos + HongyanJing + NandaKambhatla + SalimRoukos 1040–1047 P07-1131 jing-etal-2007-extracting @@ -413,9 +413,9 @@ Multilingual Transliteration Using Feature based Phonetic Method - Su-YounYoon - Kyoung-YoungKim - RichardSproat + Su-YounYoon + Kyoung-YoungKim + RichardSproat 112–119 P07-1015 yoon-etal-2007-multilingual @@ -423,7 +423,7 @@ Semantic Transliteration of Personal Names HaizhouLi - Khe ChaiSim + Khe ChaiSim Jin-SheaKuo MinghuiDong 120–127 @@ -442,7 +442,7 @@ Assisting Translators in Indirect Lexical Transfer BogdanBabych - AnthonyHartley + AnthonyHartley SergeSharoff OlgaMudraya 136–143 @@ -459,7 +459,7 @@ Statistical Machine Translation through Global Lexical Selection and Sentence Reconstruction - SrinivasBangalore + SrinivasBangalore PatrickHaffner StephanKanthak 152–159 @@ -498,7 +498,7 @@ Generalizing semantic role annotations across syntactically similar verbs - AndrewGordon + AndrewGordon ReidSwanson 192–199 P07-1025 @@ -508,9 +508,9 @@ A Grammar-driven Convolution Tree Kernel for Semantic Role Classification MinZhang WanxiangChe - AitiAw - Chew LimTan - GuodongZhou + AitiAw + Chew LimTan + GuodongZhou TingLiu ShengLi 200–207 @@ -552,7 +552,7 @@ Adding Noun Phrase Structure to the <fixed-case>P</fixed-case>enn <fixed-case>T</fixed-case>reebank DavidVadas - JamesCurran + JamesCurran 240–247 P07-1031 vadas-curran-2007-adding @@ -560,14 +560,14 @@ Formalism-Independent Parser Evaluation with <fixed-case>CCG</fixed-case> and <fixed-case>D</fixed-case>ep<fixed-case>B</fixed-case>ank StephenClark - JamesCurran + JamesCurran 248–255 P07-1032 clark-curran-2007-formalism Frustratingly Easy Domain Adaptation - HalDaumé III + HalDaumé III 256–263 P07-1033 daume-iii-2007-frustratingly @@ -575,16 +575,16 @@ Instance Weighting for Domain Adaptation in <fixed-case>NLP</fixed-case> JingJiang - ChengXiangZhai + ChengXiangZhai 264–271 P07-1034 jiang-zhai-2007-instance The Infinite Tree - Jenny RoseFinkel + Jenny RoseFinkel TrondGrenager - Christopher D.Manning + Christopher D.Manning 272–279 P07-1035 finkel-etal-2007-infinite @@ -592,7 +592,7 @@ Guiding Semi-Supervision with Constraint-Driven Learning Ming-WeiChang - LevRatinov + LevRatinov DanRoth 280–287 P07-1036 @@ -600,8 +600,8 @@ Supertagged Phrase-Based Statistical Machine Translation - HanyHassan - KhalilSima’an + HanyHassan + KhalilSima’an AndyWay 288–295 P07-1037 @@ -626,9 +626,9 @@ Improved Word-Level System Combination for Machine Translation - Antti-VeikkoRosti + Antti-VeikkoRosti SpyrosMatsoukas - RichardSchwartz + RichardSchwartz 312–319 P07-1040 rosti-etal-2007-improved @@ -678,7 +678,7 @@ The Utility of a Graphical Representation of Discourse Structure in Spoken Dialogue Systems MihaiRotaru - DianeLitman + DianeLitman 360–367 P07-1046 rotaru-litman-2007-utility @@ -686,7 +686,7 @@ Automated Vocabulary Acquisition and Interpretation in Multimodal Conversational Systems YiLiu - JoyceChai + JoyceChai RongJin 368–375 P07-1047 @@ -694,8 +694,8 @@ A Multimodal Interface for Access to Content in the Home - MichaelJohnston - Luis FernandoD’Haro + MichaelJohnston + Luis FernandoD’Haro MichelleLevine BernardRenger 376–383 @@ -711,7 +711,7 @@ K-best Spanning Tree Parsing - KeithHall + KeithHall 392–399 P07-1050 hall-2007-k @@ -787,9 +787,9 @@ Statistical Machine Translation for Query Expansion in Answer Retrieval StefanRiezler - AlexanderVasserman + AlexanderVasserman IoannisTsochantaridis - VibhuMittal + VibhuMittal YiLiu 464–471 P07-1059 @@ -797,7 +797,7 @@ A Computational Model of Text Reuse in Ancient Literary Texts - JohnLee + JohnLee 472–479 P07-1060 lee-2007-computational @@ -819,8 +819,8 @@ <fixed-case>PERSONAGE</fixed-case>: Personality Generation for Dialogue - FrançoisMairesse - MarilynWalker + FrançoisMairesse + MarilynWalker 496–503 P07-1063 mairesse-walker-2007-personage @@ -830,7 +830,7 @@ IgorMalioutov AlexPark ReginaBarzilay - JamesGlass + JamesGlass 504–511 P07-1064 malioutov-etal-2007-making @@ -869,7 +869,7 @@ Generating a Table-of-Contents - S. R. K.Branavan + S. R. K.Branavan PawanDeshpande ReginaBarzilay 544–551 @@ -895,15 +895,15 @@ Improving the Interpretation of Noun Phrases with Cross-linguistic Information - RoxanaGirju + RoxanaGirju 568–575 P07-1072 girju-2007-improving Learning to Extract Relations from the Web using Minimal Supervision - RazvanBunescu - RaymondMooney + RazvanBunescu + RaymondMooney 576–583 P07-1073 bunescu-mooney-2007-learning @@ -920,7 +920,7 @@ A Multi-resolution Framework for Information Extraction from Free Text MstislavMaslennikov - Tat-SengChua + Tat-SengChua 592–599 P07-1075 maslennikov-chua-2007-multi @@ -935,7 +935,7 @@ Beyond Projectivity: Multilingual Evaluation of Constraints and Measures on Non-Projective Structures - JiříHavelka + JiříHavelka 608–615 P07-1077 havelka-2007-beyond @@ -952,7 +952,7 @@ <fixed-case>HPSG</fixed-case> Parsing with Shallow Dependency Constraints KenjiSagae YusukeMiyao - Jun’ichiTsujii + Jun’ichiTsujii 624–631 P07-1079 sagae-etal-2007-hpsg @@ -960,7 +960,7 @@ Constituent Parsing with Incremental Sigmoid Belief Networks IvanTitov - JamesHenderson + JamesHenderson 632–639 P07-1080 titov-henderson-2007-constituent @@ -994,7 +994,7 @@ Bilingual Terminology Mining - Using Brain, not brawn comparable corpora EmmanuelMorin - BéatriceDaille + BéatriceDaille KoichiTakeuchi KyoKageura 664–671 @@ -1022,7 +1022,7 @@ JieTang HangLi Hwee TouNg - TiejunZhao + TiejunZhao 688–695 P07-1087 zhu-etal-2007-unified @@ -1069,7 +1069,7 @@ Machine Translation by Triangulation: Making Effective Use of Multi-Parallel Corpora - TrevorCohn + TrevorCohn MirellaLapata 728–735 P07-1092 @@ -1084,7 +1084,7 @@ A fully <fixed-case>B</fixed-case>ayesian approach to unsupervised part-of-speech tagging - SharonGoldwater + SharonGoldwater TomGriffiths 744–751 P07-1094 @@ -1092,9 +1092,9 @@ Computationally Efficient <fixed-case>M</fixed-case>-Estimation of Log-Linear Structure Models - Noah A.Smith + Noah A.Smith Douglas L.Vail - John D.Lafferty + John D.Lafferty 752–759 P07-1095 smith-etal-2007-computationally @@ -1103,14 +1103,14 @@ Guided Learning for Bidirectional Sequence Classification LibinShen GiorgioSatta - AravindJoshi + AravindJoshi 760–767 P07-1096 shen-etal-2007-guided Different Structures for Evaluating Answers to Complex Questions: Pyramids Won’t Topple, and Neither Will Human Assessors - Hoa TrangDang + Hoa TrangDang JimmyLin 768–775 P07-1097 @@ -1120,7 +1120,7 @@ Exploiting Syntactic and Shallow Semantic Kernels for Question Answer Classification AlessandroMoschitti SilviaQuarteroni - RobertoBasili + RobertoBasili SureshManandhar 776–783 P07-1098 @@ -1130,7 +1130,7 @@ Language-independent Probabilistic Answer Ranking for Question Answering JeongwooKo TerukoMitamura - EricNyberg + EricNyberg 784–791 P07-1099 ko-etal-2007-language @@ -1173,7 +1173,7 @@ AkitoshiOkumura TakahiroIkeda ToshihiroNishizawa - Shin-ichiAndo + Shin-ichiAndo FumihiroAdachi 9–12 P07-2003 @@ -1197,19 +1197,19 @@ Multilingual Ontological Analysis of <fixed-case>E</fixed-case>uropean Directives - GianmariaAjani - GuidoBoella - LeonardoLesmo - AlessandroMazzei - PiercarloRossi + GianmariaAjani + GuidoBoella + LeonardoLesmo + AlessandroMazzei + PiercarloRossi 21–24 P07-2006 ajani-etal-2007-multilingual <fixed-case>NICT</fixed-case>-<fixed-case>ATR</fixed-case> Speech-to-Speech Translation System - EiichiroSumita - TohruShimizu + EiichiroSumita + TohruShimizu SatoshiNakamura 25–28 P07-2007 @@ -1218,14 +1218,14 @@ zipf<fixed-case>R</fixed-case>: Word Frequency Modeling in <fixed-case>R</fixed-case> StefanEvert - MarcoBaroni + MarcoBaroni 29–32 P07-2008 evert-baroni-2007-zipfr Linguistically Motivated Large-Scale <fixed-case>NLP</fixed-case> with <fixed-case>C</fixed-case>&<fixed-case>C</fixed-case> and Boxer - JamesCurran + JamesCurran StephenClark JohanBos 33–36 @@ -1236,17 +1236,17 @@ Don’t worry about metaphor: affect detection for conversational agents CatherineSmith TimothyRumbell - JohnBarnden - RobertHendley - MarkLee - AlanWallington + JohnBarnden + RobertHendley + MarkLee + AlanWallington 37–40 P07-2010 smith-etal-2007-dont An efficient algorithm for building a distributional thesaurus (and other <fixed-case>S</fixed-case>ketch <fixed-case>E</fixed-case>ngine developments) - PavelRychlý + PavelRychlý AdamKilgarriff 41–44 P07-2011 @@ -1254,15 +1254,15 @@ Semantic enrichment of journal articles using chemical named entity recognition - Colin R.Batchelor - Peter T.Corbett + Colin R.Batchelor + Peter T.Corbett 45–48 P07-2012 batchelor-corbett-2007-semantic An <fixed-case>API</fixed-case> for Measuring the Relatedness of Words in <fixed-case>W</fixed-case>ikipedia - Simone PaoloPonzetto + Simone PaoloPonzetto MichaelStrube 49–52 P07-2013 @@ -1277,9 +1277,9 @@ Support Vector Machines for Query-focused Summarization trained and evaluated on Pyramid data - MariaFuentes + MariaFuentes EnriqueAlfonseca - HoracioRodríguez + HoracioRodríguez 57–60 P07-2015 fuentes-etal-2007-support @@ -1287,8 +1287,8 @@ A Joint Statistical Model for Simultaneous Word Spacing and Spelling Error Correction for <fixed-case>K</fixed-case>orean HyungjongNoh - Jeong-WonCha - Gary GeunbaeLee + Jeong-WonCha + Gary GeunbaeLee 61–64 P07-2016 noh-etal-2007-joint @@ -1306,8 +1306,8 @@ Rethinking <fixed-case>C</fixed-case>hinese Word Segmentation: Tokenization, Character Classification, or Wordbreak Identification Chu-RenHuang PetrŠimon - Shu-KaiHsieh - LaurentPrévot + Shu-KaiHsieh + LaurentPrévot 69–72 P07-2018 huang-etal-2007-rethinking @@ -1316,7 +1316,7 @@ A Feature Based Approach to Leveraging Context for Classifying Newsgroup Style Discussion Segments Yi-ChiaWang MaheshJoshi - CarolynRosé + CarolynRosé 73–76 P07-2019 wang-etal-2007-feature @@ -1355,7 +1355,7 @@ Generating Usable Formats for Metadata and Annotations in a Large Meeting Corpus - AndreiPopescu-Belis + AndreiPopescu-Belis PaulaEstrella 93–96 P07-2024 @@ -1375,7 +1375,7 @@ Minimum <fixed-case>B</fixed-case>ayes Risk Decoding for <fixed-case>BLEU</fixed-case> NicolaEhling RichardZens - HermannNey + HermannNey 101–104 P07-2026 ehling-etal-2007-minimum @@ -1384,25 +1384,25 @@ Disambiguating Between Generic and Referential “You” in Dialog SurabhiGupta MatthewPurver - DanJurafsky + DanJurafsky 105–108 P07-2027 gupta-etal-2007-disambiguating On the formalization of Invariant Mappings for Metaphor Interpretation - RodrigoAgerri - JohnBarnden - MarkLee - AlanWallington + RodrigoAgerri + JohnBarnden + MarkLee + AlanWallington 109–112 P07-2028 agerri-etal-2007-formalization Real-Time Correction of Closed-Captions - PatrickCardinal - GillesBoulianne + PatrickCardinal + GillesBoulianne MichelComeau MaryseBoisvert 113–116 @@ -1420,7 +1420,7 @@ Predicting Evidence of Understanding by Monitoring User’s Task Manipulation in Multimodal Conversations - YukikoNakano + YukikoNakano KazuyoshiMurata MikaEnomoto YoshikoArimoto @@ -1473,7 +1473,7 @@ A Linguistic Service Ontology for Language Infrastructures - YoshihikoHayashi + YoshihikoHayashi 145–148 P07-2037 hayashi-2007-linguistic @@ -1489,7 +1489,7 @@ Automatic Discovery of Named Entity Variants: Grammar-driven Approaches to Non-Alphabetical Transliterations Chu-RenHuang PetrŠimon - Shu-KaiHsieh + Shu-KaiHsieh 153–156 P07-2039 huang-etal-2007-automatic @@ -1516,23 +1516,23 @@ Extracting Hypernym Pairs from the Web - ErikTjong Kim Sang + ErikTjong Kim Sang 165–168 P07-2042 tjong-kim-sang-2007-extracting An <fixed-case>OWL</fixed-case> Ontology for <fixed-case>HPSG</fixed-case> - GrahamWilcock + GrahamWilcock 169–172 P07-2043 wilcock-2007-owl Classifying Temporal Relations Between Events - NathanaelChambers + NathanaelChambers ShanWang - DanJurafsky + DanJurafsky 173–176 P07-2044 chambers-etal-2007-classifying @@ -1549,8 +1549,8 @@ WadeShen ChristineMoran RichardZens - ChrisDyer - OndřejBojar + ChrisDyer + OndřejBojar AlexandraConstantin EvanHerbst 177–180 @@ -1560,7 +1560,7 @@ Boosting Statistical Machine Translation by Lemmatization and Linear Interpolation RuiqiangZhang - EiichiroSumita + EiichiroSumita 181–184 P07-2046 zhang-sumita-2007-boosting @@ -1570,15 +1570,15 @@ MaofuLiu WenjieLi MingliWu - QinLu + QinLu 185–188 P07-2047 liu-etal-2007-extractive Machine Translation between <fixed-case>T</fixed-case>urkic Languages - Ahmet CüneydTantuğ - EşrefAdali + Ahmet CüneydTantuğ + EşrefAdali KemalOflazer 189–192 P07-2048 @@ -1588,7 +1588,7 @@ Measuring Importance and Query Relevance in Topic-focused Multi-document Summarization SurabhiGupta AniNenkova - DanJurafsky + DanJurafsky 193–196 P07-2049 gupta-etal-2007-measuring @@ -1598,7 +1598,7 @@ MasatoshiTsuchiya AyuPurwarianti ToshiyukiWakita - SeiichiNakagawa + SeiichiNakagawa 197–200 P07-2050 tsuchiya-etal-2007-expanding @@ -1620,7 +1620,7 @@ Poster paper: <fixed-case>H</fixed-case>un<fixed-case>P</fixed-case>os – an open source trigram tagger - PéterHalácsy + PéterHalácsy AndrásKornai CsabaOravecz 209–212 @@ -1629,8 +1629,8 @@ Extending <fixed-case>MARIE</fixed-case>: an N-gram-based <fixed-case>SMT</fixed-case> decoder - Josep M.Crego - José B.Mariño + Josep M.Crego + José B.Mariño 213–216 P07-2054 crego-marino-2007-extending @@ -1645,7 +1645,7 @@ Automatic Part-of-Speech Tagging for <fixed-case>B</fixed-case>engali: An Approach for Morphologically Rich Languages in a Poor Resource Scenario - SandipanDandapat + SandipanDandapat SudeshnaSarkar AnupamBasu 221–224 @@ -1656,7 +1656,7 @@ <fixed-case>J</fixed-case>apanese Dependency Parsing Using Sequential Labeling for Semi-spoken Language KenjiImamura GenichiroKikui - NorihitoYasuda + NorihitoYasuda 225–228 P07-2057 imamura-etal-2007-japanese @@ -1666,9 +1666,9 @@ Proceedings of the ACL 2007 Student Research Workshop P07-3 - ChrisBiemann + ChrisBiemann VioletaSeretan - EllenRiloff + EllenRiloff Association for Computational Linguistics
Prague, Czech Republic
June diff --git a/data/xml/P08.xml b/data/xml/P08.xml index 81d3d3cae8..d5a2801ee3 100644 --- a/data/xml/P08.xml +++ b/data/xml/P08.xml @@ -4,10 +4,10 @@ Proceedings of ACL-08: HLT P08-1 - Johanna D.Moore + Johanna D.Moore SimoneTeufel - JamesAllan - SadaokiFurui + JamesAllan + SadaokiFurui Association for Computational Linguistics
Columbus, Ohio
June @@ -37,7 +37,7 @@
Weakly-Supervised Acquisition of Open-Domain Classes and Class Attributes from Web Documents and Query Logs - MariusPaşca + MariusPaşca BenjaminVan Durme 19–27 P08-1003 @@ -61,10 +61,10 @@ Task-oriented Evaluation of Syntactic Parsers and Their Representations YusukeMiyao - RuneSætre + RuneSætre KenjiSagae TakuyaMatsuzaki - Jun’ichiTsujii + Jun’ichiTsujii 46–54 P08-1006 miyao-etal-2008-task @@ -79,7 +79,7 @@ Contradictions and Justifications: Extensions to the Textual Entailment Task - Ellen M.Voorhees + Ellen M.Voorhees 63–71 P08-1008 voorhees-2008-contradictions @@ -95,7 +95,7 @@ Phrase Table Training for Precision and Recall: What Makes a Good Phrase and a Good Phrase Pair? YonggangDeng JiaXu - YuqingGao + YuqingGao 81–88 P08-1010 deng-etal-2008-phrase @@ -115,7 +115,7 @@ <fixed-case>B</fixed-case>ayesian Learning of Non-Compositional Phrases with Synchronous Parsing HaoZhang ChrisQuirk - Robert C.Moore + Robert C.Moore DanielGildea 97–105 P08-1012 @@ -142,7 +142,7 @@ Grounded Language Modeling for Automatic Speech Recognition of Sports Video MichaelFleischman - DebRoy + DebRoy 121–129 P08-1015 fleischman-roy-2008-grounded @@ -165,7 +165,7 @@ Selecting Query Term Alternations for Web Search by Exploiting Query Contexts GuihongCao StephenRobertson - Jian-YunNie + Jian-YunNie 148–155 P08-1018 cao-etal-2008-selecting @@ -174,7 +174,7 @@ Searching Questions by Identifying Question Topic and Question Focus HuizhongDuan YunboCao - Chin-YewLin + Chin-YewLin YongYu 156–164 P08-1019 @@ -182,16 +182,16 @@ Trainable Generation of Big-Five Personality Styles through Data-Driven Parameter Estimation - FrançoisMairesse - MarilynWalker + FrançoisMairesse + MarilynWalker 165–173 P08-1020 mairesse-walker-2008-trainable Correcting Misuse of Verb Forms - JohnLee - StephanieSeneff + JohnLee + StephanieSeneff 174–182 P08-1021 lee-seneff-2008-correcting @@ -199,8 +199,8 @@ <fixed-case>H</fixed-case>ypertagging: Supertagging for Surface Realization with <fixed-case>CCG</fixed-case> DominicEspinosa - MichaelWhite - DennisMehay + MichaelWhite + DennisMehay 183–191 P08-1022 espinosa-etal-2008-hypertagging @@ -216,8 +216,8 @@ A Discriminative Latent Variable Model for Statistical Machine Translation - PhilBlunsom - TrevorCohn + PhilBlunsom + TrevorCohn MilesOsborne 200–208 P08-1024 @@ -260,7 +260,7 @@ Exploiting Feature Hierarchy for Transfer Learning in Named Entity Recognition AndrewArnold RameshNallapati - William W.Cohen + William W.Cohen 245–253 P08-1029 arnold-etal-2008-exploiting @@ -268,14 +268,14 @@ Refining Event Extraction through Cross-Document Inference HengJi - RalphGrishman + RalphGrishman 254–262 P08-1030 ji-grishman-2008-refining Learning Document-Level Semantic Properties from Free-Text Annotations - S.R.K.Branavan + S.R.K.Branavan HarrChen JacobEisenstein ReginaBarzilay @@ -323,16 +323,16 @@ Improving Parsing and <fixed-case>PP</fixed-case> Attachment Performance with Sense Information - EnekoAgirre - TimothyBaldwin - DavidMartinez + EnekoAgirre + TimothyBaldwin + DavidMartinez 317–325 P08-1037 agirre-etal-2008-improving A Logical Basis for the <fixed-case>D</fixed-case> Combinator and Normal Form in <fixed-case>CCG</fixed-case> - FrederickHoyt + FrederickHoyt JasonBaldridge 326–334 P08-1038 @@ -341,7 +341,7 @@ Parsing Noun Phrase Structure with <fixed-case>CCG</fixed-case> DavidVadas - James R.Curran + James R.Curran 335–343 P08-1039 vadas-curran-2008-parsing @@ -357,7 +357,7 @@ Summarizing Emails with Conversational Cohesion and Subjectivity GiuseppeCarenini - Raymond T.Ng + Raymond T.Ng XiaodongZhou 353–361 P08-1041 @@ -380,9 +380,9 @@ Which Words Are Hard to Recognize? Prosodic, Lexical, and Disfluency Factors that Increase <fixed-case>ASR</fixed-case> Error Rates - SharonGoldwater - DanJurafsky - Christopher D.Manning + SharonGoldwater + DanJurafsky + Christopher D.Manning 380–388 P08-1044 goldwater-etal-2008-words @@ -391,7 +391,7 @@ Name Translation in Statistical Machine Translation - Learning When to Transliterate UlfHermjakob KevinKnight - HalDaumé III + HalDaumé III 389–397 P08-1045 hermjakob-etal-2008-name @@ -405,7 +405,7 @@ Inducing Gazetteers for Named Entity Recognition by Large-Scale Clustering of Dependency Relations - Jun’ichiKazama + Jun’ichiKazama KentaroTorisawa 407–415 P08-1047 @@ -414,7 +414,7 @@ Evaluating <fixed-case>R</fixed-case>oget‘s Thesauri AlistairKennedy - StanSzpakowicz + StanSzpakowicz 416–424 P08-1048 kennedy-szpakowicz-2008-evaluating @@ -437,7 +437,7 @@ Collecting a Why-Question Corpus for Development and Evaluation of an Automatic <fixed-case>QA</fixed-case>-System - JoannaMrozinski + JoannaMrozinski EdwardWhittaker SadaokiFurui 443–451 @@ -446,8 +446,8 @@ Solving Relational Similarity Problems Using the Web as a Corpus - PreslavNakov - Marti A.Hearst + PreslavNakov + Marti A.Hearst 452–460 P08-1052 nakov-hearst-2008-solving @@ -455,7 +455,7 @@ Combining Speech Retrieval Results with Generalized Additive Models J. ScottOlsson - Douglas W.Oard + Douglas W.Oard 461–469 P08-1053 olsson-oard-2008-combining @@ -470,8 +470,8 @@ Intensional Summaries as Cooperative Responses in Dialogue: Automation and Evaluation - JosephPolifroni - MarilynWalker + JosephPolifroni + MarilynWalker 479–487 P08-1055 polifroni-walker-2008-intensional @@ -487,7 +487,7 @@ Combining <fixed-case>EM</fixed-case> Training and the <fixed-case>MDL</fixed-case> Principle for an Automatic Verb Classification Incorporating Selectional Preferences - SabineSchulte im Walde + SabineSchulte im Walde ChristianHying ChristianScheible HelmutSchmid @@ -544,8 +544,8 @@ Robustness and Generalization of Role Sets: <fixed-case>P</fixed-case>rop<fixed-case>B</fixed-case>ank vs. <fixed-case>V</fixed-case>erb<fixed-case>N</fixed-case>et BeñatZapirain - EnekoAgirre - LluísMàrquez + EnekoAgirre + LluísMàrquez 550–558 P08-1063 zapirain-etal-2008-robustness @@ -554,9 +554,9 @@ A Tree Sequence Alignment-based Tree-to-Tree Translation Model MinZhang HongfeiJiang - AitiAw + AitiAw HaizhouLi - Chew LimTan + Chew LimTan ShengLi 559–567 P08-1064 @@ -575,7 +575,7 @@ A New String-to-Dependency Machine Translation Algorithm with a Target Dependency Language Model LibinShen JinxiXu - RalphWeischedel + RalphWeischedel 577–585 P08-1066 shen-etal-2008-new @@ -591,7 +591,7 @@ Simple Semi-supervised Dependency Parsing TerryKoo XavierCarreras - MichaelCollins + MichaelCollins 595–603 P08-1068 koo-etal-2008-simple @@ -600,7 +600,7 @@ Optimal <tex-math>k</tex-math>-arization of Synchronous <fixed-case>T</fixed-case>ree-<fixed-case>A</fixed-case>djoining <fixed-case>G</fixed-case>rammar RebeccaNesson GiorgioSatta - Stuart M.Shieber + Stuart M.Shieber 604–612 P08-1069 nesson-etal-2008-optimal @@ -617,7 +617,7 @@ Assessing Dialog System User Simulation Evaluation Measures Using Human Judges HuaAi - Diane J.Litman + Diane J.Litman 622–629 P08-1071 ai-litman-2008-assessing @@ -625,8 +625,8 @@ Robust Dialog Management with N-Best Hypotheses Using Dialog Examples and Agenda CheongjaeLee - SangkeunJung - Gary GeunbaeLee + SangkeunJung + Gary GeunbaeLee 630–637 P08-1072 lee-etal-2008-robust @@ -641,8 +641,8 @@ Phrase Chunking Using Entropy Guided Transformation Learning - Ruy LuizMilidiú - Cícero Nogueirados Santos + Ruy LuizMilidiú + Cícero Nogueirados Santos Julio C.Duarte 647–655 P08-1074 @@ -650,8 +650,8 @@ Learning Bigrams from Unigrams - XiaojinZhu - Andrew B.Goldberg + XiaojinZhu + Andrew B.Goldberg MichaelRabbat RobertNowak 656–664 @@ -694,9 +694,9 @@ Improving Search Results Quality by Customizing Summary Lengths - MichaelKaisser - Marti A.Hearst - John B.Lowe + MichaelKaisser + Marti A.Hearst + John B.Lowe 701–709 P08-1080 kaisser-etal-2008-improving @@ -705,7 +705,7 @@ Using Conditional Random Fields to Extract Contexts and Answers of Questions from Online Forums ShilinDing GaoCong - Chin-YewLin + Chin-YewLin XiaoyanZhu 710–718 P08-1081 @@ -785,15 +785,15 @@ Unsupervised Learning of Narrative Event Chains - NathanaelChambers - DanJurafsky + NathanaelChambers + DanJurafsky 789–797 P08-1090 chambers-jurafsky-2008-unsupervised Semantic Role Labeling Systems for <fixed-case>A</fixed-case>rabic using Kernel Methods - MonaDiab + MonaDiab AlessandroMoschitti DanielePighin 798–806 @@ -803,7 +803,7 @@ An Unsupervised Approach to Biography Production Using <fixed-case>W</fixed-case>ikipedia FadiBiadsy - JuliaHirschberg + JuliaHirschberg ElenaFilatova 807–815 P08-1092 @@ -812,7 +812,7 @@ Generating Impact-Based Summaries for Scientific Literature QiaozhuMei - ChengXiangZhai + ChengXiangZhai 816–824 P08-1093 mei-zhai-2008-generating @@ -838,7 +838,7 @@ XiaofengYang JianSu JunLang - Chew LimTan + Chew LimTan TingLiu ShengLi 843–851 @@ -866,7 +866,7 @@ Generalized Expectation Criteria for Semi-Supervised Learning of Conditional Random Fields - Gideon S.Mann + Gideon S.Mann AndrewMcCallum 870–878 P08-1099 @@ -893,7 +893,7 @@ WenbinJiang LiangHuang QunLiu - Yajuan + Yajuan 897–904 P08-1102 jiang-etal-2008-cascaded @@ -922,15 +922,15 @@ Credibility Improves Topical Blog Post Retrieval WouterWeerkamp - Maartende Rijke + Maartende Rijke 923–931 P08-1105 weerkamp-de-rijke-2008-credibility Linguistically Motivated Features for Enhanced Back-of-the-Book Indexing - AndrasCsomai - RadaMihalcea + AndrasCsomai + RadaMihalcea 932–940 P08-1106 csomai-mihalcea-2008-linguistically @@ -938,7 +938,7 @@ Resolving Personal Names in Email Using Context Expansion TamerElsayed - Douglas W.Oard + Douglas W.Oard GalileoNamata 941–949 P08-1107 @@ -954,9 +954,9 @@ Efficient, Feature-based, Conditional Random Field Parsing - Jenny RoseFinkel + Jenny RoseFinkel AlexKleeman - Christopher D.Manning + Christopher D.Manning 959–967 P08-1109 finkel-etal-2008-efficient @@ -964,15 +964,15 @@ A Deductive Approach to Dependency Parsing CarlosGómez-Rodríguez - JohnCarroll - DavidWeir + JohnCarroll + DavidWeir 968–976 P08-1110 gomez-rodriguez-etal-2008-deductive Evaluating a Crosslinguistic Grammar Resource: A Case Study of <fixed-case>W</fixed-case>ambaya - Emily M.Bender + Emily M.Bender 977–985 P08-1111 bender-2008-evaluating @@ -980,7 +980,7 @@ Better Alignments = Better Translations? KuzmanGanchev - João V.Graça + João V.Graça BenTaskar 986–993 P08-1112 @@ -991,7 +991,7 @@ DekangLin ShaojunZhao BenjaminVan Durme - MariusPaşca + MariusPaşca 994–1002 P08-1113 lin-etal-2008-mining @@ -1006,7 +1006,7 @@ Generalizing Word Lattice Translation - ChristopherDyer + ChristopherDyer SmarandaMuresan PhilipResnik 1012–1020 @@ -1037,9 +1037,9 @@ Finding Contradictions in Text - Marie-Catherinede Marneffe - Anna N.Rafferty - Christopher D.Manning + Marie-Catherinede Marneffe + Anna N.Rafferty + Christopher D.Manning 1039–1047 P08-1118 de-marneffe-etal-2008-finding @@ -1047,8 +1047,8 @@ Semantic Class Learning from the Web with Hyponym Pattern Linkage Graphs ZornitsaKozareva - EllenRiloff - EduardHovy + EllenRiloff + EduardHovy 1048–1056 P08-1119 kozareva-etal-2008-semantic @@ -1058,10 +1058,10 @@ Proceedings of ACL-08: HLT, Short Papers P08-2 - Johanna D.Moore + Johanna D.Moore SimoneTeufel - JamesAllan - SadaokiFurui + JamesAllan + SadaokiFurui Association for Computational Linguistics
Columbus, Ohio
June @@ -1075,7 +1075,7 @@ Language Dynamics and Capitalization using Maximum Entropy FernandoBatista - NunoMamede + NunoMamede IsabelTrancoso 1–4 P08-2001 @@ -1084,7 +1084,7 @@ Surprising Parser Actions and Reading Difficulty Marisa FerraraBoston - John T.Hale + John T.Hale ReinholdKliegl ShravanVasishth 5–8 @@ -1094,7 +1094,7 @@ Improving the Performance of the Random Walk Model for Answering Complex Questions YlliasChali - ShafiqJoty + ShafiqJoty 9–12 P08-2003 chali-joty-2008-improving @@ -1109,8 +1109,8 @@ Extractive Summaries for Educational Science Content Sebastiande la Chica - FaisalAhmad - James H.Martin + FaisalAhmad + James H.Martin TamaraSumner 17–20 P08-2005 @@ -1135,7 +1135,7 @@ Novel Semantic Features for Verb Sense Disambiguation DmitriyDligach - MarthaPalmer + MarthaPalmer 29–32 P08-2008 dligach-palmer-2008-novel @@ -1143,7 +1143,7 @@ <fixed-case>I</fixed-case>celandic Data Driven Part of Speech Tagging MarkDredze - JoelWallenberg + JoelWallenberg 33–36 P08-2009 dredze-wallenberg-2008-icelandic @@ -1166,8 +1166,8 @@ Enforcing Transitivity in Coreference Resolution - Jenny RoseFinkel - Christopher D.Manning + Jenny RoseFinkel + Christopher D.Manning 45–48 P08-2012 finkel-manning-2008-enforcing @@ -1198,7 +1198,7 @@ Combined One Sense Disambiguation of Abbreviations - YaakovHaCohen-Kerner + YaakovHaCohen-Kerner ArielKass ArielPeretz 61–64 @@ -1208,7 +1208,7 @@ Assessing the Costs of Sampling Methods in Active Learning for Annotation RobbieHaertel - EricRingger + EricRingger KevinSeppi JamesCarroll PeterMcClanahan @@ -1226,7 +1226,7 @@ Mixture Model <fixed-case>POMDP</fixed-case>s for Efficient Handling of Uncertainty in Dialogue Management - JamesHenderson + JamesHenderson OliverLemon 73–76 P08-2019 @@ -1234,13 +1234,13 @@ Recent Improvements in the <fixed-case>CMU</fixed-case> Large Scale <fixed-case>C</fixed-case>hinese-<fixed-case>E</fixed-case>nglish <fixed-case>SMT</fixed-case> System - Almut SiljaHildebrand + Almut SiljaHildebrand KayRottmann MohamedNoamany QuinGao SanjikaHewavitharana NguyenBach - StephanVogel + StephanVogel 77–80 P08-2020 hildebrand-etal-2008-recent @@ -1248,8 +1248,8 @@ Machine Translation System Combination using <fixed-case>ITG</fixed-case>-based Alignments DamianosKarakos - JasonEisner - SanjeevKhudanpur + JasonEisner + SanjeevKhudanpur MarkusDreyer 81–84 P08-2021 @@ -1269,7 +1269,7 @@ PengZhang FuruWei YuexianHou - QinLu + QinLu 89–92 P08-2023 li-etal-2008-novel @@ -1300,7 +1300,7 @@ A Unified Syntactic Model for Parsing Fluent and Disfluent Speech - TimMiller + TimMiller WilliamSchuler 105–108 P08-2027 @@ -1309,7 +1309,7 @@ The Good, the Bad, and the Unknown: Morphosyllabic Sentiment Tagging of Unseen Words KaroMoilanen - StephenPulman + StephenPulman 109–112 P08-2028 moilanen-pulman-2008-good @@ -1324,10 +1324,10 @@ <fixed-case>A</fixed-case>rabic Morphological Tagging, Diacritization, and Lemmatization Using Lexeme Models and Feature Ranking - RyanRoth - OwenRambow + RyanRoth + OwenRambow NizarHabash - MonaDiab + MonaDiab CynthiaRudin 117–120 P08-2030 @@ -1336,7 +1336,7 @@ Using Automatically Transcribed Dialogs to Learn User Models in a Spoken Dialog System UmarSyed - JasonWilliams + JasonWilliams 121–124 P08-2031 syed-williams-2008-using @@ -1345,7 +1345,7 @@ Robust Extraction of Named Entity Including Unfamiliar Word MasatoshiTsuchiya ShinyaHida - SeiichiNakagawa + SeiichiNakagawa 125–128 P08-2032 tsuchiya-etal-2008-robust @@ -1353,7 +1353,7 @@ In-Browser Summarisation: Generating Elaborative Summaries Biased Towards the Reading Context StephenWan - CécileParis + CécileParis 129–132 P08-2033 wan-paris-2008-browser @@ -1362,7 +1362,7 @@ Lyric-based Song Sentiment Classification with Sentiment Vector Space Model YunqingXia LinlinWang - Kam-FaiWong + Kam-FaiWong MingxingXu 133–136 P08-2034 @@ -1385,7 +1385,7 @@ Event Matching Using the Transitive Closure of Dependency Relations - Daniel M.Bikel + Daniel M.Bikel VittorioCastelli 145–148 P08-2037 @@ -1393,9 +1393,9 @@ A Linguistically Annotated Reordering Model for <fixed-case>BTG</fixed-case>-based Statistical Machine Translation - DeyiXiong + DeyiXiong MinZhang - AitiAw + AitiAw HaizhouLi 149–152 P08-2038 @@ -1405,7 +1405,7 @@ Segmentation for <fixed-case>E</fixed-case>nglish-to-<fixed-case>A</fixed-case>rabic Statistical Machine Translation IbrahimBadr RabihZbib - JamesGlass + JamesGlass 153–156 P08-2039 badr-etal-2008-segmentation @@ -1414,7 +1414,7 @@ Exploiting N-best Hypotheses for <fixed-case>SMT</fixed-case> Self-Enhancement BoxingChen MinZhang - AitiAw + AitiAw HaizhouLi 157–160 P08-2040 @@ -1432,7 +1432,7 @@ Unsupervised Learning of Acoustic Sub-word Units BalakrishnanVaradarajan - SanjeevKhudanpur + SanjeevKhudanpur EmmanuelDupoux 165–168 P08-2042 @@ -1441,8 +1441,8 @@ High Frequency Word Entrainment in Spoken Dialogue AniNenkova - AgustínGravano - JuliaHirschberg + AgustínGravano + JuliaHirschberg 169–172 P08-2043 nenkova-etal-2008-high @@ -1458,7 +1458,7 @@ Learning Semantic Links from a Corpus of Parallel Temporal and Causal Relations StevenBethard - James H.Martin + James H.Martin 177–180 P08-2045 bethard-martin-2008-learning @@ -1466,8 +1466,8 @@ Evolving New Lexical Association Measures Using Genetic Programming JanŠnajder - BojanaDalbelo Bašić - SašaPetrović + BojanaDalbelo Bašić + SašaPetrović IvanSikirić 181–184 P08-2046 @@ -1492,7 +1492,7 @@ Query-based Sentence Fusion is Better Defined and Leads to More Preferred Results than Generic Sentence Fusion - EmielKrahmer + EmielKrahmer ErwinMarsi Paulvan Pelt 193–196 @@ -1501,7 +1501,7 @@ Intrinsic vs. Extrinsic Evaluation Measures for Referring Expression Generation - AnjaBelz + AnjaBelz AlbertGatt 197–200 P08-2050 @@ -1518,7 +1518,7 @@ <fixed-case>F</fixed-case>ast<fixed-case>S</fixed-case>um: Fast and Accurate Query-based Multi-document Summarization FrankSchilder - RavikumarKondadadi + RavikumarKondadadi 205–208 P08-2052 schilder-kondadadi-2008-fastsum @@ -1533,7 +1533,7 @@ Unlexicalised Hidden Variable Models of Split Dependency Grammars - Gabriele AntonioMusillo + Gabriele AntonioMusillo PaolaMerlo 213–216 P08-2054 @@ -1551,7 +1551,7 @@ Adapting a <fixed-case>WSJ</fixed-case>-Trained Parser to Grammatically Noisy Text JenniferFoster JoachimWagner - Josefvan Genabith + Josefvan Genabith 221–224 P08-2056 foster-etal-2008-adapting @@ -1559,8 +1559,8 @@ Enriching Spoken Language Translation with Dialog Acts Vivek KumarRangarajan Sridhar - SrinivasBangalore - ShrikanthNarayanan + SrinivasBangalore + ShrikanthNarayanan 225–228 P08-2057 rangarajan-sridhar-etal-2008-enriching @@ -1571,7 +1571,7 @@ HyunjungLee Choong-NyoungSeon HarksooKim - JungyunSeo + JungyunSeo 229–232 P08-2058 kim-etal-2008-speakers @@ -1594,10 +1594,10 @@ Extracting a Representation from Text for Semantic Analysis - Rodney D.Nielsen - WayneWard - James H.Martin - MarthaPalmer + Rodney D.Nielsen + WayneWard + James H.Martin + MarthaPalmer 241–244 P08-2061 nielsen-etal-2008-extracting @@ -1613,7 +1613,7 @@ Choosing Sense Distinctions for <fixed-case>WSD</fixed-case>: Psycholinguistic Evidence - Susan WindischBrown + Susan WindischBrown 249–252 P08-2063 brown-2008-choosing @@ -1630,7 +1630,7 @@ Multi-domain Sentiment Classification ShoushanLi - ChengqingZong + ChengqingZong 257–260 P08-2065 li-zong-2008-multi @@ -1638,7 +1638,7 @@ Evaluating Word Prediction: Framing Keystroke Savings KeithTrnka - KathleenMcCoy + KathleenMcCoy 261–264 P08-2066 trnka-mccoy-2008-evaluating @@ -1647,7 +1647,7 @@ Pairwise Document Similarity in Large Collections with <fixed-case>M</fixed-case>ap<fixed-case>R</fixed-case>educe TamerElsayed JimmyLin - DouglasOard + DouglasOard 265–268 P08-2067 elsayed-etal-2008-pairwise @@ -1739,7 +1739,7 @@ An Unsupervised Vector Approach to Biomedical Term Disambiguation: Integrating <fixed-case>UMLS</fixed-case> and <fixed-case>M</fixed-case>edline - BridgetMcInnes + BridgetMcInnes 49–54 P08-3009 mcinnes-2008-unsupervised @@ -1782,7 +1782,7 @@ Demonstration of a <fixed-case>POMDP</fixed-case> Voice Dialer - JasonWilliams + JasonWilliams 1–4 P08-4001 williams-2008-demonstration @@ -1798,11 +1798,11 @@ <fixed-case>BART</fixed-case>: A Modular Toolkit for Coreference Resolution YannickVersley - Simone PaoloPonzetto - MassimoPoesio + Simone PaoloPonzetto + MassimoPoesio VladimirEidelman AlanJern - JasonSmith + JasonSmith XiaofengYang AlessandroMoschitti 9–12 @@ -1819,7 +1819,7 @@ Interactive <fixed-case>ASR</fixed-case> Error Correction for Touchscreen Devices DavidHuggins-Daines - Alexander I.Rudnicky + Alexander I.Rudnicky 17–19 P08-4005 huggins-daines-rudnicky-2008-interactive @@ -1836,7 +1836,7 @@ MoonyoungKang SourishChaudhuri MaheshJoshi - Carolyn P.Rosé + Carolyn P.Rosé 24–27 P08-4007 kang-etal-2008-side @@ -1845,7 +1845,7 @@ <fixed-case>M</fixed-case>odel<fixed-case>T</fixed-case>alker <fixed-case>V</fixed-case>oice <fixed-case>R</fixed-case>ecorder—<fixed-case>A</fixed-case>n Interface System for Recording a Corpus of Speech for Synthesis DebraYarrington JohnGray - ChrisPennington + ChrisPennington H. TimothyBunnell AllegraCornaglia JasonLilley @@ -1857,7 +1857,7 @@ The <fixed-case>Q</fixed-case>u<fixed-case>AL</fixed-case>i<fixed-case>M</fixed-case> Question Answering Demo: Supplementing Answers with Paragraphs drawn from <fixed-case>W</fixed-case>ikipedia - MichaelKaisser + MichaelKaisser 32–35 P08-4009 kaisser-2008-qualim @@ -1868,7 +1868,7 @@ Tutorial Abstracts of ACL-08: HLT P08-5 AniNenkova - MarilynWalker + MarilynWalker EugeneAgichtein Association for Computational Linguistics
Columbus, Ohio
@@ -1893,7 +1893,7 @@ Building Practical Spoken Dialog Systems AntoineRaux BrianLangner - Alan WBlack + Alan WBlack MaxineEskenazi 2 P08-5002 @@ -1902,7 +1902,7 @@ Semi-Supervised Learning for Natural Language Processing JohnBlitzer - Xiaojin JerryZhu + Xiaojin JerryZhu 3 P08-5003 blitzer-zhu-2008-semi @@ -1916,7 +1916,7 @@ Speech Technology: From Research to the Industry of Human-Machine Communication - RobertoPieraccini + RobertoPieraccini 5 P08-5005 pieraccini-2008-speech diff --git a/data/xml/P09.xml b/data/xml/P09.xml index 079b46f867..2d9d3760c9 100644 --- a/data/xml/P09.xml +++ b/data/xml/P09.xml @@ -6,7 +6,7 @@ P09-1 Keh-YihSu JianSu - JanyceWiebe + JanyceWiebe HaizhouLi Association for Computational Linguistics
Suntec, Singapore
@@ -33,7 +33,7 @@ Investigations on Word Senses and Word Usages KatrinErk - DianaMcCarthy + DianaMcCarthy NicholasGaylord 10–18 P09-1002 @@ -42,8 +42,8 @@ A Comparative Study on Generalization of Semantic Roles in <fixed-case>F</fixed-case>rame<fixed-case>N</fixed-case>et YuichirohMatsubayashi - NaoakiOkazaki - Jun’ichiTsujii + NaoakiOkazaki + Jun’ichiTsujii 19–27 P09-1003 matsubayashi-etal-2009-comparative @@ -59,8 +59,8 @@ <fixed-case>B</fixed-case>rutus: A Semantic Role Labeling System Incorporating <fixed-case>CCG</fixed-case>, <fixed-case>CFG</fixed-case>, and Dependency Features - StephenBoxwell - DennisMehay + StephenBoxwell + DennisMehay ChrisBrew 37–45 P09-1005 @@ -68,7 +68,7 @@ Exploiting Heterogeneous Treebanks for Parsing - Zheng-YuNiu + Zheng-YuNiu HaifengWang HuaWu 46–54 @@ -79,15 +79,15 @@ Cross Language Dependency Parsing using a Bilingual Lexicon HaiZhao YanSong - ChunyuKit - GuodongZhou + ChunyuKit + GuodongZhou 55–63 P09-1007 zhao-etal-2009-cross Topological Field Parsing of <fixed-case>G</fixed-case>erman - Jackie Chi KitCheung + Jackie Chi KitCheung GeraldPenn 64–72 P09-1008 @@ -104,9 +104,9 @@ Reinforcement Learning for Mapping Instructions to Actions - S.R.K.Branavan + S.R.K.Branavan HarrChen - LukeZettlemoyer + LukeZettlemoyer ReginaBarzilay 82–90 P09-1010 @@ -115,7 +115,7 @@ Learning Semantic Correspondences with Less Supervision PercyLiang - MichaelJordan + MichaelJordan DanKlein 91–99 P09-1011 @@ -133,7 +133,7 @@ Knowing the Unseen: Estimating Vocabulary Size over Unseen Samples SumaBhat - RichardSproat + RichardSproat 109–117 P09-1013 bhat-sproat-2009-knowing @@ -185,8 +185,8 @@ Efficient Minimum Error Rate Training and Minimum <fixed-case>B</fixed-case>ayes-Risk Decoding for Translation Hypergraphs and Lattices ShankarKumar WolfgangMacherey - ChrisDyer - FranzOch + ChrisDyer + FranzOch 163–171 P09-1019 kumar-etal-2009-efficient @@ -196,15 +196,15 @@ HuiZhang MinZhang HaizhouLi - AitiAw - Chew LimTan + AitiAw + Chew LimTan 172–180 P09-1020 zhang-etal-2009-forest Active Learning for Multilingual Statistical Machine Translation - GholamrezaHaffari + GholamrezaHaffari AnoopSarkar 181–189 P09-1021 @@ -220,7 +220,7 @@ Summarizing Definition from <fixed-case>W</fixed-case>ikipedia ShirenYe - Tat-SengChua + Tat-SengChua JieLu 199–207 P09-1023 @@ -278,11 +278,11 @@ Compiling a Massive, Multilingual Dictionary via Probabilistic Inference Mausam - StephenSoderland + StephenSoderland OrenEtzioni - DanielWeld - MichaelSkinner - JeffBilmes + DanielWeld + MichaelSkinner + JeffBilmes 262–270 P09-1030 mausam-etal-2009-compiling @@ -306,25 +306,25 @@ Abstraction and Generalisation in Semantic Role Labels: <fixed-case>P</fixed-case>rop<fixed-case>B</fixed-case>ank, <fixed-case>V</fixed-case>erb<fixed-case>N</fixed-case>et or both? PaolaMerlo - LonnekeVan Der Plas + LonnekeVan Der Plas 288–296 P09-1033 merlo-van-der-plas-2009-abstraction Robust Machine Translation Evaluation with Entailment Features - SebastianPadó + SebastianPadó MichelGalley - DanJurafsky - Christopher D.Manning + DanJurafsky + Christopher D.Manning 297–305 P09-1034 pado-etal-2009-robust The Contribution of Linguistic Features to Automatic Machine Translation Evaluation - EnriqueAmigó - JesúsGiménez + EnriqueAmigó + JesúsGiménez JulioGonzalo FelisaVerdejo 306–314 @@ -333,9 +333,9 @@ A Syntax-Driven Bracketing Model for Phrase-Based Translation - DeyiXiong + DeyiXiong MinZhang - AitiAw + AitiAw HaizhouLi 315–323 P09-1036 @@ -362,9 +362,9 @@ Concise Integer Linear Programming Formulations for Dependency Parsing - AndréMartins - NoahSmith - EricXing + AndréMartins + NoahSmith + EricXing 342–350 P09-1039 martins-etal-2009-concise @@ -379,7 +379,7 @@ Semi-supervised Learning of Dependency Parsers using Generalized Expectation Criteria GregoryDruck - GideonMann + GideonMann AndrewMcCallum 360–368 P09-1041 @@ -414,7 +414,7 @@ Reducing Semantic Drift with Bagging and Distributional Similarity TaraMcIntosh - James R.Curran + James R.Curran 396–404 P09-1045 mcintosh-curran-2009-reducing @@ -424,7 +424,7 @@ KatsumasaYoshikawa SebastianRiedel MasayukiAsahara - YujiMatsumoto + YujiMatsumoto 405–413 P09-1046 yoshikawa-etal-2009-jointly @@ -432,7 +432,7 @@ Profile Based Cross-Document Coreference Using Kernelized Fuzzy Relational Clustering JianHuang - Sarah M.Taylor + Sarah M.Taylor Jonathan L.Smith Konstantinos A.Fotiadis C. LeeGiles @@ -443,18 +443,18 @@ Who, What, When, Where, Why? Comparing Multiple Approaches to the Cross-Lingual 5<fixed-case>W</fixed-case> Task KristenParton - Kathleen R.McKeown + Kathleen R.McKeown BobCoyne - Mona T.Diab - RalphGrishman - DilekHakkani-Tür - MaryHarper + Mona T.Diab + RalphGrishman + DilekHakkani-Tür + MaryHarper HengJi - Wei YunMa - AdamMeyers + Wei YunMa + AdamMeyers SaraStolbach AngSun - GokhanTur + GokhanTur WeiXu SibelYaman 423–431 @@ -463,7 +463,7 @@ Bilingual Co-Training for Monolingual Hyponymy-Relation Acquisition - Jong-HoonOh + Jong-HoonOh KiyotakaUchimoto KentaroTorisawa 432–440 @@ -472,8 +472,8 @@ Automatic Set Instance Extraction using the Web - Richard C.Wang - William W.Cohen + Richard C.Wang + William W.Cohen 441–449 P09-1050 wang-cohen-2009-automatic @@ -500,7 +500,7 @@ Paraphrase Identification as Probabilistic Quasi-Synchronous Recognition DipanjanDas - Noah A.Smith + Noah A.Smith 468–476 P09-1053 das-smith-2009-paraphrase @@ -508,7 +508,7 @@ Stochastic Gradient Descent Training for <fixed-case>L</fixed-case>1-regularized Log-linear Models with Cumulative Penalty YoshimasaTsuruoka - Jun’ichiTsujii + Jun’ichiTsujii SophiaAnaniadou 477–485 P09-1054 @@ -542,7 +542,7 @@ An Error-Driven Word-Character Hybrid Model for Joint <fixed-case>C</fixed-case>hinese Word Segmentation and <fixed-case>POS</fixed-case> Tagging CanasaiKruengkrai KiyotakaUchimoto - Jun’ichiKazama + Jun’ichiKazama YiouWang KentaroTorisawa HitoshiIsahara @@ -588,7 +588,7 @@ Improving Tree-to-Tree Translation with Packed Forests YangLiu - Yajuan + Yajuan QunLiu 558–566 P09-1063 @@ -627,16 +627,16 @@ Variational Decoding for Statistical Machine Translation ZhifeiLi - JasonEisner - SanjeevKhudanpur + JasonEisner + SanjeevKhudanpur 593–601 P09-1067 li-etal-2009-variational Unsupervised Learning of Narrative Schemas and their Participants - NathanaelChambers - DanJurafsky + NathanaelChambers + DanJurafsky 602–610 P09-1068 chambers-jurafsky-2009-unsupervised @@ -644,7 +644,7 @@ Learning a Compositional Semantic Parser using an Existing Syntactic Parser RuifangGe - RaymondMooney + RaymondMooney 611–619 P09-1069 ge-mooney-2009-learning @@ -652,7 +652,7 @@ Latent Variable Models of Concept-Attribute Attachment JosephReisinger - MariusPaşca + MariusPaşca 620–628 P09-1070 reisinger-pasca-2009-latent @@ -660,17 +660,17 @@ The <fixed-case>C</fixed-case>hinese Aspect Generation Based on Aspect Selection Functions GuowenYang - JohnBateman + JohnBateman 629–637 P09-1071 yang-bateman-2009-chinese Quantitative modeling of the neural representation of adjective-noun phrases to account for f<fixed-case>MRI</fixed-case> activation - Kai-min K.Chang + Kai-min K.Chang Vladimir L.Cherkassky - Tom M.Mitchell - Marcel AdamJust + Tom M.Mitchell + Marcel AdamJust 638–646 P09-1072 chang-etal-2009-quantitative @@ -679,7 +679,7 @@ Capturing Salience with a Trainable Cache Model for Zero-anaphora Resolution RyuIida KentaroInui - YujiMatsumoto + YujiMatsumoto 647–655 P09-1073 iida-etal-2009-capturing @@ -688,8 +688,8 @@ Conundrums in Noun Phrase Coreference Resolution: Making Sense of the State-of-the-Art VeselinStoyanov NathanGilbert - ClaireCardie - EllenRiloff + ClaireCardie + EllenRiloff 656–664 P09-1074 stoyanov-etal-2009-conundrums @@ -704,7 +704,7 @@ Genre distinctions for discourse in the <fixed-case>P</fixed-case>enn <fixed-case>T</fixed-case>ree<fixed-case>B</fixed-case>ank - BonnieWebber + BonnieWebber 674–682 P09-1076 webber-2009-genre @@ -722,7 +722,7 @@ A Framework of Feature Selection Methods for Text Categorization ShoushanLi RuiXia - ChengqingZong + ChengqingZong Chu-RenHuang 692–700 P09-1078 @@ -774,7 +774,7 @@ What lies beneath: Semantic and syntactic analysis of manually reconstructed spontaneous speech ErinFitzgerald - FrederickJelinek + FrederickJelinek RobertFrank 746–754 P09-1084 @@ -782,9 +782,9 @@ Discriminative Lexicon Adaptation for Improved Character Accuracy - A New Direction in <fixed-case>C</fixed-case>hinese Language Modeling - Yi-chengPan - Lin-shanLee - SadaokiFurui + Yi-chengPan + Lin-shanLee + SadaokiFurui 755–763 P09-1085 pan-etal-2009-discriminative @@ -801,16 +801,16 @@ Quadratic-Time Dependency Parsing for Machine Translation MichelGalley - Christopher D.Manning + Christopher D.Manning 773–781 P09-1087 galley-manning-2009-quadratic A <fixed-case>G</fixed-case>ibbs Sampler for Phrasal Synchronous Grammar Induction - PhilBlunsom - TrevorCohn - ChrisDyer + PhilBlunsom + TrevorCohn + ChrisDyer MilesOsborne 782–790 P09-1088 @@ -833,7 +833,7 @@ AnanthakrishnanRamanathan HansrajChoudhary AvishekGhosh - PushpakBhattacharyya + PushpakBhattacharyya 800–808 P09-1090 ramanathan-etal-2009-case @@ -842,7 +842,7 @@ Dependency Based <fixed-case>C</fixed-case>hinese Sentence Realization WeiHe HaifengWang - YuqingGuo + YuqingGuo TingLiu 809–816 P09-1091 @@ -885,11 +885,11 @@ <fixed-case>SMS</fixed-case> based Interface for <fixed-case>FAQ</fixed-case> Retrieval - GovindKothari + GovindKothari SumitNegi - Tanveer A.Faruquie + Tanveer A.Faruquie Venkatesan T.Chakaravarthy - L. VenkataSubramaniam + L. VenkataSubramaniam 852–860 P09-1096 kothari-etal-2009-sms @@ -915,7 +915,7 @@ Comparing Objective and Subjective Measures of Usability in a Human-Robot Dialogue System - Mary EllenFoster + Mary EllenFoster ManuelGiuliani AloisKnoll 879–887 @@ -925,7 +925,7 @@ Setting Up User Action Probabilities in User Simulations for Dialog System Development HuaAi - DianeLitman + DianeLitman 888–896 P09-1100 ai-litman-2009-setting @@ -940,8 +940,8 @@ Robust Approach to Abbreviating Terms: A Discriminative Latent Variable Model with Global Information XuSun - NaoakiOkazaki - Jun’ichiTsujii + NaoakiOkazaki + Jun’ichiTsujii 905–913 P09-1102 sun-etal-2009-robust @@ -950,7 +950,7 @@ A non-contiguous Tree Sequence Alignment-based Model for Statistical Machine Translation JunSun MinZhang - Chew LimTan + Chew LimTan 914–922 P09-1103 sun-etal-2009-non @@ -977,7 +977,7 @@ BoxingChen MinZhang HaizhouLi - AitiAw + AitiAw 941–948 P09-1106 chen-etal-2009-comparative @@ -1005,15 +1005,15 @@ KazuoHara MasashiShimbo HideharuOkuma - YujiMatsumoto + YujiMatsumoto 967–975 P09-1109 hara-etal-2009-coordinate Learning Context-Dependent Mappings from Sentences to Logical Form - LukeZettlemoyer - MichaelCollins + LukeZettlemoyer + MichaelCollins 976–984 P09-1110 zettlemoyer-collins-2009-learning @@ -1039,7 +1039,7 @@ MikeMintz StevenBills RionSnow - DanielJurafsky + DanielJurafsky 1003–1011 P09-1113 mintz-etal-2009-distant @@ -1054,7 +1054,7 @@ Unsupervised Relation Extraction by Mining <fixed-case>W</fixed-case>ikipedia Texts Using Information from the Web YulanYan - NaoakiOkazaki + NaoakiOkazaki YutakaMatsuo ZhengluYang MitsuruIshizuka @@ -1082,7 +1082,7 @@ Word or Phrase? Learning Which Unit to Stress for Information Retrieval Young-InSong Jung-TaeLee - Hae-ChangRim + Hae-ChangRim 1048–1056 P09-1118 song-etal-2009-word @@ -1091,7 +1091,7 @@ A Generative Blog Post Retrieval Model that Uses Query Expansion based on External Collections WouterWeerkamp KrisztianBalog - Maartende Rijke + Maartende Rijke 1057–1065 P09-1119 weerkamp-etal-2009-generative @@ -1109,7 +1109,7 @@ WeiGao JohnBlitzer MingZhou - Kam-FaiWong + Kam-FaiWong 1075–1083 P09-1121 gao-etal-2009-exploiting @@ -1121,7 +1121,7 @@ P09-2 Keh-YihSu JianSu - JanyceWiebe + JanyceWiebe HaizhouLi Association for Computational Linguistics
Suntec, Singapore
@@ -1136,8 +1136,8 @@ Variational Inference for Grammar Induction with Prior Knowledge - ShayCohen - Noah A.Smith + ShayCohen + Noah A.Smith 1–4 P09-2001 cohen-smith-2009-variational @@ -1147,7 +1147,7 @@ HideharuOkuma KazuoHara MasashiShimbo - YujiMatsumoto + YujiMatsumoto 5–8 P09-2002 okuma-etal-2009-bypassed @@ -1171,17 +1171,17 @@ Hybrid Approach to User Intention Modeling for Dialog Simulation - SangkeunJung + SangkeunJung CheongjaeLee KyungdukKim - Gary GeunbaeLee + Gary GeunbaeLee 17–20 P09-2005 jung-etal-2009-hybrid Homophones and Tonal Patterns in <fixed-case>E</fixed-case>nglish-<fixed-case>C</fixed-case>hinese Transliteration - Oi YeeKwong + Oi YeeKwong 21–24 P09-2006 kwong-2009-homophones @@ -1190,7 +1190,7 @@ Capturing Errors in Written <fixed-case>C</fixed-case>hinese Words Chao-LinLiu Kan-WenTien - Min-HuaLai + Min-HuaLai Yi-HsuanChuang Shih-HungWu 25–28 @@ -1203,8 +1203,8 @@ Do-GilLee Jung-TaeLee PontusStenetorp - Jun’ichiTsujii - Hae-ChangRim + Jun’ichiTsujii + Hae-ChangRim 29–32 P09-2008 cho-etal-2009-novel @@ -1214,14 +1214,14 @@ NavanathSaharia DhrubajyotiDas UtpalSharma - JugalKalita + JugalKalita 33–36 P09-2009 saharia-etal-2009-part Improving data-driven dependency parsing using large-scale <fixed-case>LFG</fixed-case> grammars - LiljaØvrelid + LiljaØvrelid JonasKuhn KathrinSpreyer 37–40 @@ -1230,7 +1230,7 @@ Incremental Parsing with Monotonic Adjoining Operation - YoshihideKato + YoshihideKato ShigekiMatsubara 41–44 P09-2011 @@ -1255,7 +1255,7 @@ Comparing the Accuracy of <fixed-case>CCG</fixed-case> and <fixed-case>P</fixed-case>enn <fixed-case>T</fixed-case>reebank Parsers StephenClark - James R.Curran + James R.Curran 53–56 P09-2014 clark-curran-2009-comparing @@ -1293,7 +1293,7 @@ LiliKotlerman IdoDagan IdanSzpektor - MaayanZhitomirsky-Geffet + MaayanZhitomirsky-Geffet 69–72 P09-2018 kotlerman-etal-2009-directional @@ -1301,8 +1301,8 @@ Generalizing over Lexical Features: Selectional Preferences for Semantic Role Classification BeñatZapirain - EnekoAgirre - LluísMàrquez + EnekoAgirre + LluísMàrquez 73–76 P09-2019 zapirain-etal-2009-generalizing @@ -1311,15 +1311,15 @@ A Syntactic and Lexical-Based Discourse Segmenter MilanTofiloski JulianBrooke - MaiteTaboada + MaiteTaboada 77–80 P09-2020 tofiloski-etal-2009-syntactic Realistic Grammar Error Simulation using <fixed-case>M</fixed-case>arkov <fixed-case>L</fixed-case>ogic - SungjinLee - Gary GeunbaeLee + SungjinLee + Gary GeunbaeLee 81–84 P09-2021 lee-lee-2009-realistic @@ -1336,7 +1336,7 @@ Predicting Barge-in Utterance Errors by using Implicitly-Supervised <fixed-case>ASR</fixed-case> Accuracy and Barge-in Rate per User KazunoriKomatani - Alexander I.Rudnicky + Alexander I.Rudnicky 89–92 P09-2023 komatani-rudnicky-2009-predicting @@ -1359,9 +1359,9 @@ Leveraging Structural Relations for Fluent Compressions at Multiple Compression Rates SourishChaudhuri - Naman K.Gupta - Noah A.Smith - Carolyn P.Rosé + Naman K.Gupta + Noah A.Smith + Carolyn P.Rosé 101–104 P09-2026 chaudhuri-etal-2009-leveraging @@ -1376,7 +1376,7 @@ Using Generation for Grammar Analysis and Error Detection - MichaelGoodman + MichaelGoodman FrancisBond 109–112 P09-2028 @@ -1386,7 +1386,7 @@ An Integrated Multi-document Summarization Approach based on Word Hierarchical Representation YouOuyang WenjieLi - QinLu + QinLu 113–116 P09-2029 ouyang-etal-2009-integrated @@ -1404,7 +1404,7 @@ Reducing <fixed-case>SMT</fixed-case> Rule Table with Monolingual Key Phrase ZhongjunHe YaoMeng - Yajuan + Yajuan HaoYu QunLiu 121–124 @@ -1414,8 +1414,8 @@ A Statistical Machine Translation Model Based on a Synthetic Synchronous Grammar HongfeiJiang - MuyunYang - TiejunZhao + MuyunYang + TiejunZhao ShengLi BoWang 125–128 @@ -1460,7 +1460,7 @@ Hidden <fixed-case>M</fixed-case>arkov Tree Model in Dependency-based Machine Translation - ZdeněkŽabokrtský + ZdeněkŽabokrtský MartinPopel 145–148 P09-2037 @@ -1469,7 +1469,7 @@ Word to Sentence Level Emotion Tagging for <fixed-case>B</fixed-case>engali Blogs DipankarDas - SivajiBandyopadhyay + SivajiBandyopadhyay 149–152 P09-2038 das-bandyopadhyay-2009-word @@ -1484,18 +1484,18 @@ Opinion and Generic Question Answering Systems: a Performance Analysis - AlexandraBalahur + AlexandraBalahur EsterBoldrini - AndrésMontoyo - PatricioMartínez-Barco + AndrésMontoyo + PatricioMartínez-Barco 157–160 P09-2040 balahur-etal-2009-opinion Automatic Satire Detection: Are You Having a Laugh? - ClintBurfoot - TimothyBaldwin + ClintBurfoot + TimothyBaldwin 161–164 P09-2041 burfoot-baldwin-2009-automatic @@ -1504,7 +1504,7 @@ Hierarchical Multi-Label Text Categorization with Global Margin Maximization XipengQiu WenjunGao - XuanjingHuang + XuanjingHuang 165–168 P09-2042 qiu-etal-2009-hierarchical @@ -1512,7 +1512,7 @@ Toward finer-grained sentiment identification in product reviews through linguistic and ontological analyses Hye-JinMin - Jong C.Park + Jong C.Park 169–172 P09-2043 min-park-2009-toward @@ -1536,18 +1536,18 @@ An Ontology-Based Approach for Key Phrase Extraction ChauQ. Nguyen - TuoiT. Phan + TuoiT. Phan 181–184 P09-2046 q-nguyen-t-phan-2009-ontology Query Segmentation Based on Eigenspace Similarity - ChaoZhang + ChaoZhang NanSun XiaHu TingzhuHuang - Tat-SengChua + Tat-SengChua 185–188 P09-2047 zhang-etal-2009-query @@ -1584,7 +1584,7 @@ Mining Association Language Patterns for Negative Life Event Classification - Liang-ChihYu + Liang-ChihYu Chien-LungChan Chung-HsienWu Chao-ChengLin @@ -1606,7 +1606,7 @@ Play the Language: Play Coreference - BarboraHladká + BarboraHladká JiříMírovský PavelSchlesinger 209–212 @@ -1616,8 +1616,8 @@ <fixed-case>C</fixed-case>hinese Term Extraction Using Different Types of Relevance YuhangYang - TiejunZhao - QinLu + TiejunZhao + QinLu DequanZheng HaoYu 213–216 @@ -1626,8 +1626,8 @@ i<fixed-case>C</fixed-case>hi: a bilingual dictionary generating tool - IstvánVarga - ShoichiYokoyama + IstvánVarga + ShoichiYokoyama 217–220 P09-2055 varga-yokoyama-2009-ichi @@ -1635,14 +1635,14 @@ <fixed-case>CAT</fixed-case>i<fixed-case>B</fixed-case>: The <fixed-case>C</fixed-case>olumbia <fixed-case>A</fixed-case>rabic Treebank NizarHabash - RyanRoth + RyanRoth 221–224 P09-2056 habash-roth-2009-catib A Beam-Search Extraction Algorithm for Comparable Data - ChristophTillmann + ChristophTillmann 225–228 P09-2057 tillmann-2009-beam @@ -1659,7 +1659,7 @@ Bridging Morpho-Syntactic Gap between Source and Target Sentences for <fixed-case>E</fixed-case>nglish-<fixed-case>K</fixed-case>orean Statistical Machine Translation GumwonHong Seung-WookLee - Hae-ChangRim + Hae-ChangRim 233–236 P09-2059 hong-etal-2009-bridging @@ -1684,7 +1684,7 @@ Syntax is from <fixed-case>M</fixed-case>ars while Semantics from <fixed-case>V</fixed-case>enus! Insights from Spectral Analysis of Distributional Similarity Networks - ChrisBiemann + ChrisBiemann MonojitChoudhury AnimeshMukherjee 245–248 @@ -1703,7 +1703,7 @@ Prediction of Thematic Rank for Structured Semantic Role Labeling - WeiweiSun + WeiweiSun ZhifangSui MengWang 253–256 @@ -1712,8 +1712,8 @@ Transfer Learning, Feature Selection and Word Sense Disambiguation - Paramveer S.Dhillon - Lyle H.Ungar + Paramveer S.Dhillon + Lyle H.Ungar 257–260 P09-2065 dhillon-ungar-2009-transfer @@ -1728,16 +1728,16 @@ Automatic Story Segmentation using a <fixed-case>B</fixed-case>ayesian Decision Framework for Statistical Models of Lexical Chain Features - Wai-KitLo + Wai-KitLo WenyingXiong - HelenMeng + HelenMeng 265–268 P09-2067 lo-etal-2009-automatic Investigating Pitch Accent Recognition in Non-native Speech - Gina-AnneLevow + Gina-AnneLevow 269–272 P09-2068 levow-2009-investigating @@ -1746,14 +1746,14 @@ A Stochastic Finite-State Morphological Parser for <fixed-case>T</fixed-case>urkish HaşimSak TungaGüngör - MuratSaraçlar + MuratSaraçlar 273–276 P09-2069 sak-etal-2009-stochastic Parsing Speech Repair without Specialized Grammar Symbols - TimMiller + TimMiller LuanNguyen WilliamSchuler 277–280 @@ -1763,8 +1763,8 @@ Efficient Inference of <fixed-case>CRF</fixed-case>s for Large-Scale Natural Language Data MinwooJeong - Chin-YewLin - Gary GeunbaeLee + Chin-YewLin + Gary GeunbaeLee 281–284 P09-2071 jeong-etal-2009-efficient @@ -1788,7 +1788,7 @@ <fixed-case>M</fixed-case>arkov Random Topic Fields - HalDaumé III + HalDaumé III 293–296 P09-2074 daume-iii-2009-markov @@ -1807,12 +1807,12 @@ Validating the web-based evaluation of <fixed-case>NLG</fixed-case> systems AlexanderKoller KristinaStriegnitz - DonnaByron + DonnaByron JustineCassell RobertDale SaraDalzel-Job - JohannaMoore - JonOberlander + JohannaMoore + JonOberlander 301–304 P09-2076 koller-etal-2009-validating @@ -1826,7 +1826,7 @@ The Lie Detector: Explorations in the Automatic Recognition of Deceptive Language - RadaMihalcea + RadaMihalcea CarloStrapparava 309–312 P09-2078 @@ -1835,7 +1835,7 @@ Generalizing Dependency Features for Opinion Mining MaheshJoshi - CarolynPenstein-Rosé + CarolynPenstein-Rosé 313–316 P09-2079 joshi-penstein-rose-2009-generalizing @@ -1844,7 +1844,7 @@ Graph Ranking for Sentiment Transfer QiongWu SongboTan - XueqiCheng + XueqiCheng 317–320 P09-2080 wu-etal-2009-graph @@ -1853,7 +1853,7 @@ The Contribution of Stylistic Information to Content-based Mobile Spam Filtering Dae-NeungSohn Jung-TaeLee - Hae-ChangRim + Hae-ChangRim 321–324 P09-2081 sohn-etal-2009-contribution @@ -1870,25 +1870,25 @@ Do Automatic Annotation Techniques Have Any Impact on Supervised Complex Question Answering? YlliasChali - SadidHasan - ShafiqJoty + SadidHasan + ShafiqJoty 329–332 P09-2083 chali-etal-2009-automatic Where’s the Verb? Correcting Machine Translation During Question Answering - Wei-YunMa - KathyMcKeown + Wei-YunMa + KathyMcKeown 333–336 P09-2084 ma-mckeown-2009-wheres A Note on the Implementation of Hierarchical <fixed-case>D</fixed-case>irichlet Processes - PhilBlunsom - TrevorCohn - SharonGoldwater + PhilBlunsom + TrevorCohn + SharonGoldwater MarkJohnson 337–340 P09-2085 @@ -1906,14 +1906,14 @@ Modeling Morphologically Rich Languages Using Split Words and Unstructured Dependencies DenizYuret - ErgunBiçici + ErgunBiçici 345–348 P09-2087 yuret-bicici-2009-modeling Improved Smoothing for N-gram Language Models Based on Ordinary Counts - Robert C.Moore + Robert C.Moore ChrisQuirk 349–352 P09-2088 @@ -1922,21 +1922,21 @@ Updating a Name Tagger Using Contemporary Unlabeled Data CristinaMota - RalphGrishman + RalphGrishman 353–356 P09-2089 mota-grishman-2009-updating <fixed-case>A</fixed-case>rabic Cross-Document Coreference Resolution - AsadSayeed + AsadSayeed TamerElsayed NikeshGarera DavidAlexander TanXu - DougOard + DougOard DavidYarowsky - ChristinePiatko + ChristinePiatko 357–360 P09-2090 sayeed-etal-2009-arabic @@ -1945,7 +1945,7 @@ The Impact of Query Refinement in the Web People Search Task JavierArtiles JulioGonzalo - EnriqueAmigó + EnriqueAmigó 361–364 P09-2091 artiles-etal-2009-impact @@ -1954,7 +1954,7 @@ Composite Kernels For Relation Extraction FrankReichartz HannesKorte - GerhardPaass + GerhardPaass 365–368 P09-2092 reichartz-etal-2009-composite @@ -1974,8 +1974,8 @@ P09-3 BrianRoark GraceNgai - Davis Muhajereen D.Dimalen - Jenny RoseFinkel + Davis Muhajereen D.Dimalen + Jenny RoseFinkel BlaiseThomson Association for Computational Linguistics
Suntec, Singapore
@@ -1997,9 +1997,9 @@
Insights into Non-projectivity in <fixed-case>H</fixed-case>indi - PrashanthMannem - HimaniChaudhry - AksharBharati + PrashanthMannem + HimaniChaudhry + AksharBharati 10–17 P09-3002 mannem-etal-2009-insights @@ -2074,7 +2074,7 @@ Creating a Gold Standard for Sentence Clustering in Multi-Document Summarization - JohannaGeiss + JohannaGeiss 96–104 P09-3012 geiss-2009-creating @@ -2084,8 +2084,8 @@ Proceedings of the ACL-IJCNLP 2009 Software Demonstrations P09-4 - Gary GeunbaeLee - SabineSchulte im Walde + Gary GeunbaeLee + SabineSchulte im Walde Association for Computational Linguistics
Suntec, Singapore
August @@ -2112,12 +2112,12 @@
<fixed-case>LX</fixed-case>-Center: a center of online linguistic services - AntónioBranco + AntónioBranco FranciscoCosta EduardoFerreira PedroMartins FilipeNunes - JoãoSilva + JoãoSilva SaraSilveira 5–8 P09-4002 @@ -2125,8 +2125,8 @@ A Tool for Deep Semantic Encoding of Narrative Texts - David K.Elson - Kathleen R.McKeown + David K.Elson + Kathleen R.McKeown 9–12 P09-4003 elson-mckeown-2009-tool @@ -2148,9 +2148,9 @@ <fixed-case>MARS</fixed-case>: Multilingual Access and Retrieval System with Enhanced Query Translation and Document Retrieval - LianhauLee - AitiAw - ThuyVu + LianhauLee + AitiAw + ThuyVu Sharifah AljuniedMahani MinZhang HaizhouLi @@ -2162,21 +2162,21 @@ Demonstration of <fixed-case>J</fixed-case>oshua: An Open Source Toolkit for Parsing-based Machine Translation ZhifeiLi ChrisCallison-Burch - ChrisDyer + ChrisDyer JuriGanitkevitch - SanjeevKhudanpur + SanjeevKhudanpur LaneSchwartz Wren N. G.Thornton JonathanWeese - Omar F.Zaidan + Omar F.Zaidan 25–28 P09-4007 li-etal-2009-demonstration <fixed-case>W</fixed-case>iki<fixed-case>BABEL</fixed-case>: A <fixed-case>W</fixed-case>iki-style Platform for Creation of Parallel Data - AKumaran - KSaravanan + AKumaran + KSaravanan NarenDatha BAshok VikramDendi @@ -2205,7 +2205,7 @@ SebastianVarges SilviaQuarteroni GiuseppeRiccardi - Alexei V.Ivanov + Alexei V.Ivanov PierluigiRoberti 41–44 P09-4011 @@ -2216,8 +2216,8 @@ Tutorial Abstracts of ACL-IJCNLP 2009 P09-5 - DianaMcCarthy - ChengqingZong + DianaMcCarthy + ChengqingZong Association for Computational Linguistics
Suntec, Singapore
August @@ -2232,7 +2232,7 @@ Fundamentals of <fixed-case>C</fixed-case>hinese Language Processing Chu-RenHuang - QinLu + QinLu 1 P09-5001 huang-lu-2009-fundamentals @@ -2247,7 +2247,7 @@ Semantic Role Labeling: Past, Present and Future - LluísMàrquez + LluísMàrquez 3 P09-5003 marquez-2009-semantic @@ -2268,8 +2268,8 @@ State-of-the-art <fixed-case>NLP</fixed-case> Approaches to Coreference Resolution: Theory and Practical Recipes - Simone PaoloPonzetto - MassimoPoesio + Simone PaoloPonzetto + MassimoPoesio 6 P09-5006 ponzetto-poesio-2009-state diff --git a/data/xml/P10.xml b/data/xml/P10.xml index 06ae476425..a2da7c64d0 100644 --- a/data/xml/P10.xml +++ b/data/xml/P10.xml @@ -4,8 +4,8 @@ Proceedings of the 48th Annual Meeting of the Association for Computational Linguistics P10-1 - JanHajič - SandraCarberry + JanHajič + SandraCarberry StephenClark JoakimNivre Association for Computational Linguistics @@ -21,7 +21,7 @@ Efficient Third-Order Dependency Parsers TerryKoo - MichaelCollins + MichaelCollins 1–11 P10-1001 koo-collins-2010-efficient @@ -37,7 +37,7 @@ Bitext Dependency Parsing with Bilingual Subtree Constraints WenliangChen - Jun’ichiKazama + Jun’ichiKazama KentaroTorisawa 21–29 P10-1003 @@ -76,7 +76,7 @@ Learning to Adapt to Unknown Users: Referring Expression Generation in Spoken Dialogue Systems - SrinivasanJanarthanam + SrinivasanJanarthanam OliverLemon 69–78 P10-1008 @@ -92,7 +92,7 @@ The Human Language Project: Building a Universal Corpus of the World’s Languages - StevenAbney + StevenAbney StevenBird 88–97 P10-1010 @@ -108,8 +108,8 @@ Automatic Evaluation Method for Machine Translation Using Noun-Phrase Chunking - HiroshiEchizen-ya - KenjiAraki + HiroshiEchizen-ya + KenjiAraki 108–117 P10-1012 echizen-ya-araki-2010-automatic @@ -117,7 +117,7 @@ Open Information Extraction Using <fixed-case>W</fixed-case>ikipedia FeiWu - Daniel S.Weld + Daniel S.Weld 118–127 P10-1013 wu-weld-2010-open @@ -128,7 +128,7 @@ RajasekarKrishnamurthy YunyaoLi SriramRaghavan - FrederickReiss + FrederickReiss ShivakumarVaithyanathan 128–137 P10-1014 @@ -136,9 +136,9 @@ Extracting Social Networks from Literary Fiction - DavidElson + DavidElson NicholasDames - KathleenMcKeown + KathleenMcKeown 138–147 P10-1015 elson-etal-2010-extracting @@ -162,8 +162,8 @@ “Was It Good? It Was Provocative.” Learning the Meaning of Scalar Adjectives - Marie-Catherinede Marneffe - Christopher D.Manning + Marie-Catherinede Marneffe + Christopher D.Manning ChristopherPotts 167–176 P10-1018 @@ -171,15 +171,15 @@ Importance-Driven Turn-Bidding for Spoken Dialogue Systems - EthanSelfridge - PeterHeeman + EthanSelfridge + PeterHeeman 177–185 P10-1019 selfridge-heeman-2010-importance Entity-Based Local Coherence Modelling Using Topological Fields - Jackie Chi KitCheung + Jackie Chi KitCheung GeraldPenn 186–195 P10-1020 @@ -198,7 +198,7 @@ Rebanking <fixed-case>CCG</fixed-case>bank for Improved <fixed-case>NP</fixed-case> Interpretation MatthewHonnibal - James R.Curran + James R.Curran JohanBos 207–215 P10-1022 @@ -207,7 +207,7 @@ <fixed-case>B</fixed-case>abel<fixed-case>N</fixed-case>et: Building a Very Large Multilingual Semantic Network RobertoNavigli - Simone PaoloPonzetto + Simone PaoloPonzetto 216–225 P10-1023 navigli-ponzetto-2010-babelnet @@ -225,14 +225,14 @@ DaniloCroce CristinaGiannone PaoloAnnesi - RobertoBasili + RobertoBasili 237–246 P10-1025 croce-etal-2010-towards A <fixed-case>B</fixed-case>ayesian Method for Robust Estimation of Distributional Similarities - Jun’ichiKazama + Jun’ichiKazama StijnDe Saeger KowKuroda MasakiMurata @@ -245,7 +245,7 @@ Recommendation in <fixed-case>I</fixed-case>nternet Forums and Blogs JiaWang QingLi - Yuanzhu PeterChen + Yuanzhu PeterChen ZhangxiLin 257–265 P10-1027 @@ -264,16 +264,16 @@ Inducing Domain-Specific Semantic Class Taggers from (Almost) Nothing RuihongHuang - EllenRiloff + EllenRiloff 275–285 P10-1029 huang-riloff-2010-inducing Learning 5000 Relational Extractors - RaphaelHoffmann + RaphaelHoffmann CongleZhang - Daniel S.Weld + Daniel S.Weld 286–295 P10-1030 hoffmann-etal-2010-learning @@ -290,7 +290,7 @@ Exploring Syntactic Structural Features for Sub-Tree Alignment Using Bilingual Tree Kernels JunSun MinZhang - Chew LimTan + Chew LimTan 306–315 P10-1032 sun-etal-2010-exploring @@ -308,7 +308,7 @@ Fine-Grained Tree-to-String Translation Rule Extraction XianchaoWu TakuyaMatsuzaki - Jun’ichiTsujii + Jun’ichiTsujii 325–334 P10-1034 wu-etal-2010-fine @@ -327,7 +327,7 @@ JessikaRoesner TimDawborn JamesHaggerty - James R.Curran + James R.Curran StephenClark 345–355 P10-1036 @@ -359,8 +359,8 @@ Word Representations: A Simple and General Method for Semi-Supervised Learning - JosephTurian - Lev-ArieRatinov + JosephTurian + Lev-ArieRatinov YoshuaBengio 384–394 ACL 2020 Test of Time Award (10 year) @@ -369,8 +369,8 @@ Identifying Text Polarity Using Random Walks - AhmedHassan - Dragomir R.Radev + AhmedHassan + Dragomir R.Radev 395–403 P10-1041 hassan-radev-2010-identifying @@ -387,8 +387,8 @@ Employing Personal/Impersonal Views in Supervised and Semi-Supervised Sentiment Classification ShoushanLi Chu-RenHuang - GuodongZhou - Sophia Yat MeiLee + GuodongZhou + Sophia Yat MeiLee 414–423 P10-1043 li-etal-2010-employing @@ -411,8 +411,8 @@ Improving the Use of Pseudo-Words for Evaluating Selectional Preferences - NathanaelChambers - DanielJurafsky + NathanaelChambers + DanielJurafsky 445–453 P10-1046 chambers-jurafsky-2010-improving @@ -429,7 +429,7 @@ <fixed-case>H</fixed-case>indi-to-<fixed-case>U</fixed-case>rdu Machine Translation through Transliteration NadirDurrani HassanSajjad - AlexanderFraser + AlexanderFraser HelmutSchmid 465–474 P10-1048 @@ -439,7 +439,7 @@ Training Phrase Translation Models with Leaving-One-Out JoernWuebker ArneMauser - HermannNey + HermannNey 475–484 P10-1049 wuebker-etal-2010-training @@ -481,7 +481,7 @@ Optimal Rank Reduction for Linear Context-Free Rewriting Systems with Fan-Out Two - BenoîtSagot + BenoîtSagot GiorgioSatta 525–533 P10-1054 @@ -508,7 +508,7 @@ Identifying Non-Explicit Citing Sentences for Citation-Based Summarization. VahedQazvinian - Dragomir R.Radev + Dragomir R.Radev 555–564 P10-1057 qazvinian-radev-2010-identifying @@ -533,7 +533,7 @@ Generating Focused Topic-Specific Sentiment Lexicons ValentinJijkoun - Maartende Rijke + Maartende Rijke WouterWeerkamp 585–594 P10-1060 @@ -550,7 +550,7 @@ Error Detection for Statistical Machine Translation Using Linguistic Features - DeyiXiong + DeyiXiong MinZhang HaizhouLi 604–611 @@ -569,7 +569,7 @@ Bridging <fixed-case>SMT</fixed-case> and <fixed-case>TM</fixed-case> with Translation Recommendation YifanHe YanjunMa - Josefvan Genabith + Josefvan Genabith AndyWay 622–630 P10-1064 @@ -578,7 +578,7 @@ On Jointly Recognizing and Aligning Bilingual Named Entities YufengChen - ChengqingZong + ChengqingZong Keh-YihSu 631–639 P10-1065 @@ -596,7 +596,7 @@ Comparable Entity Mining from Comparative Questions ShashaLi - Chin-YewLin + Chin-YewLin Young-InSong ZhoujunLi 650–658 @@ -613,7 +613,7 @@ Temporal Information Processing of a New Language: Fast Porting with Minimal Resources FranciscoCosta - AntónioBranco + AntónioBranco 671–677 P10-1069 costa-branco-2010-temporal @@ -621,7 +621,7 @@ A Taxonomy, Dataset, and Classifier for Automatic Noun Compound Interpretation StephenTratz - EduardHovy + EduardHovy 678–687 P10-1070 tratz-hovy-2010-taxonomy @@ -643,17 +643,17 @@ Kernel Based Discourse Relation Recognition with Temporal Ordering Information - WenTingWang + WenTingWang JianSu - Chew LimTan + Chew LimTan 710–719 P10-1073 wang-etal-2010-kernel Hierarchical Joint Learning: Improving Joint Parsing and Named Entity Recognition with Non-Jointly Labeled Data - Jenny RoseFinkel - Christopher D.Manning + Jenny RoseFinkel + Christopher D.Manning 720–728 P10-1074 finkel-manning-2010-hierarchical @@ -696,8 +696,8 @@ A Hybrid Rule/Model-Based Finite-State Framework for Normalizing <fixed-case>SMS</fixed-case> Messages RichardBeaufort SophieRoekhaut - Louise-AmélieCougnon - CédrickFairon + Louise-AmélieCougnon + CédrickFairon 770–779 P10-1079 beaufort-etal-2010-hybrid @@ -713,7 +713,7 @@ Using Document Level Cross-Event Inference to Improve Event Extraction ShashaLiao - RalphGrishman + RalphGrishman 789–797 P10-1081 liao-grishman-2010-using @@ -728,7 +728,7 @@ Learning to Follow Navigational Directions AdamVogel - DanielJurafsky + DanielJurafsky 806–814 P10-1083 vogel-jurafsky-2010-learning @@ -736,7 +736,7 @@ A Hybrid Hierarchical Model for Multi-Document Summarization AsliCelikyilmaz - DilekHakkani-Tur + DilekHakkani-Tur 815–824 P10-1084 celikyilmaz-hakkani-tur-2010-hybrid @@ -838,7 +838,7 @@ <fixed-case>B</fixed-case>ayesian Synchronous Tree-Substitution Grammar Induction and Its Application to Sentence Compression ElifYamangil - Stuart M.Shieber + Stuart M.Shieber 937–947 P10-1096 yamangil-shieber-2010-bayesian @@ -965,7 +965,7 @@ WolfgangSeeker InesRehbein JonasKuhn - Josefvan Genabith + Josefvan Genabith 1087–1097 P10-1111 seeker-etal-2010-hard @@ -980,8 +980,8 @@ Joint Syntactic and Semantic Parsing of <fixed-case>C</fixed-case>hinese - JunhuiLi - GuodongZhou + JunhuiLi + GuodongZhou Hwee TouNg 1108–1117 P10-1113 @@ -999,7 +999,7 @@ Cross-Lingual Latent Topic Extraction DuoZhang QiaozhuMei - ChengXiangZhai + ChengXiangZhai 1128–1137 P10-1115 zhang-etal-2010-cross @@ -1033,7 +1033,7 @@ A Rational Model of Eye Movement Control in Reading KlintonBicknell - RogerLevy + RogerLevy 1168–1178 P10-1119 bicknell-levy-2010-rational @@ -1068,7 +1068,7 @@ Assessing the Role of Discourse References in Entailment Inference ShacharMirkin IdoDagan - SebastianPadó + SebastianPadó 1209–1219 P10-1123 mirkin-etal-2010-assessing @@ -1086,8 +1086,8 @@ Modeling Semantic Relevance for Question-Answer Pairs in Web Social Communities BaoxunWang XiaolongWang - ChengjieSun - BingquanLiu + ChengjieSun + BingquanLiu LinSun 1230–1238 P10-1125 @@ -1104,7 +1104,7 @@ Generating Image Descriptions Using Dependency Relational Patterns AhmetAker - RobertGaizauskas + RobertGaizauskas 1250–1258 P10-1127 aker-gaizauskas-2010-generating @@ -1120,8 +1120,8 @@ Reading between the Lines: Learning to Map High-Level Instructions to Commands - S.R.K.Branavan - LukeZettlemoyer + S.R.K.Branavan + LukeZettlemoyer ReginaBarzilay 1268–1277 P10-1129 @@ -1129,9 +1129,9 @@ Profiting from Mark-Up: Hyper-Text Annotations for Guided Parsing - Valentin I.Spitkovsky - DanielJurafsky - HiyanAlshawi + Valentin I.Spitkovsky + DanielJurafsky + HiyanAlshawi 1278–1287 P10-1130 spitkovsky-etal-2010-profiting @@ -1164,7 +1164,7 @@ Learning Word-Class Lattices for Definition and Hypernym Extraction RobertoNavigli - PaolaVelardi + PaolaVelardi 1318–1327 P10-1134 navigli-velardi-2010-learning @@ -1186,9 +1186,9 @@ Multilingual Pseudo-Relevance Feedback: Performance Study of Assisting Languages - Manoj KumarChinnakotla + Manoj KumarChinnakotla KarthikRaman - PushpakBhattacharyya + PushpakBhattacharyya 1346–1356 P10-1137 chinnakotla-etal-2010-multilingual @@ -1207,7 +1207,7 @@ BinyangLi LanjunZhou ShiFeng - Kam-FaiWong + Kam-FaiWong 1367–1375 P10-1139 li-etal-2010-unified @@ -1237,8 +1237,8 @@ Unsupervised Event Coreference Resolution with Rich Linguistic Features - CosminBejan - SandaHarabagiu + CosminBejan + SandaHarabagiu 1412–1422 P10-1143 bejan-harabagiu-2010-unsupervised @@ -1246,7 +1246,7 @@ Coreference Resolution across Corpora: Languages, Coding Schemes, and Preprocessing Information MartaRecasens - EduardHovy + EduardHovy 1423–1432 P10-1144 recasens-hovy-2010-coreference @@ -1278,14 +1278,14 @@ Detecting Experiences from Weblogs Keun ChanPark YoonjaeJeong - Sung HyonMyaeng + Sung HyonMyaeng 1464–1472 P10-1148 park-etal-2010-detecting Experiments in Graph-Based Semi-Supervised Learning Methods for Class-Instance Acquisition - Partha PratimTalukdar + Partha PratimTalukdar FernandoPereira 1473–1481 P10-1149 @@ -1294,7 +1294,7 @@ Learning Arguments and Supertypes of Semantic Relations Using Recursive Patterns ZornitsaKozareva - EduardHovy + EduardHovy 1482–1491 P10-1150 kozareva-hovy-2010-learning @@ -1309,8 +1309,8 @@ <fixed-case>V</fixed-case>iterbi Training for <fixed-case>PCFG</fixed-case>s: Hardness Results and Competitiveness of Uniform Initialization - ShayCohen - Noah A.Smith + ShayCohen + Noah A.Smith 1502–1511 P10-1152 cohen-smith-2010-viterbi @@ -1327,7 +1327,7 @@ Knowledge-Rich Word Sense Disambiguation Rivaling Supervised Systems - Simone PaoloPonzetto + Simone PaoloPonzetto RobertoNavigli 1522–1531 P10-1154 @@ -1335,10 +1335,10 @@ All Words Domain Adapted <fixed-case>WSD</fixed-case>: Finding a Middle Ground between Supervision and Unsupervision - MiteshKhapra + MiteshKhapra AnupKulkarni SaurabhSohoney - PushpakBhattacharyya + PushpakBhattacharyya 1532–1541 P10-1155 khapra-etal-2010-words @@ -1346,20 +1346,20 @@ Combining Orthogonal Monolingual and Multilingual Sources of Evidence for All Words <fixed-case>WSD</fixed-case> WeiweiGuo - MonaDiab + MonaDiab 1542–1551 P10-1156 guo-diab-2010-combining Phrase-Based Statistical Language Generation Using Graphical Models and Active Learning - FrançoisMairesse - MilicaGašić - FilipJurčíček + FrançoisMairesse + MilicaGašić + FilipJurčíček SimonKeizer BlaiseThomson KaiYu - SteveYoung + SteveYoung 1552–1561 P10-1157 mairesse-etal-2010-phrase @@ -1382,8 +1382,8 @@ Beyond <fixed-case>N</fixed-case>om<fixed-case>B</fixed-case>ank: A Study of Implicit Arguments for Nominal Predicates - MatthewGerber - JoyceChai + MatthewGerber + JoyceChai 1583–1592 P10-1160 gerber-chai-2010-beyond @@ -1393,8 +1393,8 @@ Proceedings of the ACL 2010 Conference Short Papers P10-2 - JanHajič - SandraCarberry + JanHajič + SandraCarberry StephenClark JoakimNivre Association for Computational Linguistics @@ -1409,9 +1409,9 @@ Paraphrase Lattice for Statistical Machine Translation - TakashiOnishi + TakashiOnishi MasaoUtiyama - EiichiroSumita + EiichiroSumita 1–5 P10-2001 onishi-etal-2010-paraphrase @@ -1422,7 +1422,7 @@ DongdongZhang MuLi MingZhou - TiejunZhao + TiejunZhao 6–11 P10-2002 cui-etal-2010-joint @@ -1431,7 +1431,7 @@ Learning Lexicalized Reordering Models from Reordering Graphs JinsongSu YangLiu - YajuanLv + YajuanLv HaitaoMi QunLiu 12–16 @@ -1441,7 +1441,7 @@ Filtering Syntactic Constraints for Statistical Machine Translation HailongCao - EiichiroSumita + EiichiroSumita 17–21 P10-2004 cao-sumita-2010-filtering @@ -1458,7 +1458,7 @@ Efficient Path Counting Transducers for Minimum <fixed-case>B</fixed-case>ayes-Risk Decoding of Statistical Machine Translation Lattices GraemeBlackwood - Adriàde Gispert + Adriàde Gispert WilliamByrne 27–32 P10-2006 @@ -1476,15 +1476,15 @@ Authorship Attribution Using Probabilistic Context-Free Grammars SindhuRaghavan AdrianaKovashka - RaymondMooney + RaymondMooney 38–42 P10-2008 raghavan-etal-2010-authorship The Impact of Interpretation Problems on Tutorial Dialogue - Myroslava O.Dzikovska - Johanna D.Moore + Myroslava O.Dzikovska + Johanna D.Moore NatalieSteinhauser GwendolynCampbell 43–48 @@ -1494,7 +1494,7 @@ The Prevalence of Descriptive Referring Expressions in News and Narrative RaquelHervás - MarkFinlayson + MarkFinlayson 49–54 P10-2010 hervas-finlayson-2010-prevalence @@ -1502,7 +1502,7 @@ Preferences versus Adaptation during Referring Expression Generation MartijnGoudbeek - EmielKrahmer + EmielKrahmer 55–59 P10-2011 goudbeek-krahmer-2010-preferences @@ -1516,17 +1516,17 @@ The Manually Annotated Sub-Corpus: A Community Resource for and by the People - NancyIde - CollinBaker + NancyIde + CollinBaker ChristianeFellbaum - RebeccaPassonneau + RebeccaPassonneau 68–73 P10-2013 ide-etal-2010-manually Correcting Errors in a Treebank Based on Synchronous Tree Substitution Grammar - YoshihideKato + YoshihideKato ShigekiMatsubara 74–79 P10-2014 @@ -1536,7 +1536,7 @@ Evaluating Machine Translations Using m<fixed-case>NCD</fixed-case> MarcusDobrinkat TeroTapiovaara - JaakkoVäyrynen + JaakkoVäyrynen KimmoKettunen 80–85 P10-2015 @@ -1544,7 +1544,7 @@ Tackling Sparse Data Issue in Machine Translation Evaluation - OndřejBojar + OndřejBojar KamilKos DavidMareček 86–91 @@ -1554,7 +1554,7 @@ Exemplar-Based Models for Word Meaning in Context KatrinErk - SebastianPadó + SebastianPadó 92–97 P10-2017 erk-pado-2010-exemplar @@ -1563,14 +1563,14 @@ A Structured Model for Joint Learning of Argument Roles and Predicate Senses YotaroWatanabe MasayukiAsahara - YujiMatsumoto + YujiMatsumoto 98–102 P10-2018 watanabe-etal-2010-structured Semantics-Driven Shallow Parsing for <fixed-case>C</fixed-case>hinese Semantic Role Labeling - WeiweiSun + WeiweiSun 103–108 P10-2019 sun-2010-semantics @@ -1584,10 +1584,10 @@ Automatic Collocation Suggestion in Academic Writing - Jian-ChengWu + Jian-ChengWu Yu-ChiaChang TerukoMitamura - Jason S.Chang + Jason S.Chang 115–119 P10-2021 wu-etal-2010-automatic-collocation @@ -1595,7 +1595,7 @@ Event-Based Hyperspace Analogue to Language for Query Expansion TingxuYan - TamsinMaxwell + TamsinMaxwell DaweiSong YuexianHou PengZhang @@ -1606,8 +1606,8 @@ Automatically Generating Term Frequency Induced Taxonomies KarinMurthy - Tanveer AFaruquie - L VenkataSubramaniam + Tanveer AFaruquie + L VenkataSubramaniam Hima PrasadK MukeshMohania 126–131 @@ -1633,7 +1633,7 @@ Better Filtration and Augmentation for Hierarchical Phrase-Based Translation Rules ZhiyangWang - YajuanLv + YajuanLv QunLiu Young-SookHwang 142–146 @@ -1643,7 +1643,7 @@ Fixed Length Word Suffix for Factored Statistical Machine Translation NargesSharif Razavian - StephanVogel + StephanVogel 147–150 P10-2027 sharif-razavian-vogel-2010-fixed @@ -1659,9 +1659,9 @@ Coreference Resolution with Reconcile VeselinStoyanov - ClaireCardie + ClaireCardie NathanGilbert - EllenRiloff + EllenRiloff DavidButtler DavidHysom 156–161 @@ -1679,16 +1679,16 @@ Improving <fixed-case>C</fixed-case>hinese Semantic Role Labeling with Rich Syntactic Features - WeiweiSun + WeiweiSun 168–172 P10-2031 sun-2010-improving Balancing User Effort and Translation Error in Interactive Machine Translation via Confidence Measures - JesúsGonzález-Rubio - DanielOrtiz-Martínez - FranciscoCasacuberta + JesúsGonzález-Rubio + DanielOrtiz-Martínez + FranciscoCasacuberta 173–177 P10-2032 gonzalez-rubio-etal-2010-balancing @@ -1714,7 +1714,7 @@ Tree-Based Deterministic Dependency Parsing — An Application to <fixed-case>N</fixed-case>ivre’s Method — KotaroKitagawa - KumikoTanaka-Ishii + KumikoTanaka-Ishii 189–193 P10-2035 kitagawa-tanaka-ishii-2010-tree @@ -1723,7 +1723,7 @@ Sparsity in Dependency Grammar Induction JenniferGillenwater KuzmanGanchev - JoãoGraça + JoãoGraça FernandoPereira BenTaskar 194–199 @@ -1741,7 +1741,7 @@ Simple Semi-Supervised Training of Part-Of-Speech Taggers - AndersSøgaard + AndersSøgaard 205–208 P10-2038 sogaard-2010-simple @@ -1767,16 +1767,16 @@ Intelligent Selection of Language Model Training Data - Robert C.Moore - WilliamLewis + Robert C.Moore + WilliamLewis 220–224 P10-2041 moore-lewis-2010-intelligent Blocked Inference in <fixed-case>B</fixed-case>ayesian Tree Substitution Grammars - TrevorCohn - PhilBlunsom + TrevorCohn + PhilBlunsom 225–230 P10-2042 cohn-blunsom-2010-blocked @@ -1792,15 +1792,15 @@ Optimizing Question Answering Accuracy by Maximizing Log-Likelihood Matthias H.Heie - Edward W. D.Whittaker - SadaokiFurui + Edward W. D.Whittaker + SadaokiFurui 236–240 P10-2044 heie-etal-2010-optimizing Generating Entailment Rules from <fixed-case>F</fixed-case>rame<fixed-case>N</fixed-case>et - RoniBen Aharon + RoniBen Aharon IdanSzpektor IdoDagan 241–246 @@ -1827,7 +1827,7 @@ Cross Lingual Adaptation: An Experiment on Sentiment Classifications BinWei - ChristopherPal + ChristopherPal 258–262 P10-2048 wei-pal-2010-cross @@ -1843,16 +1843,16 @@ Hierarchical Sequential Learning for Extracting Opinions and Their Attributes YejinChoi - ClaireCardie + ClaireCardie 269–274 P10-2050 choi-cardie-2010-hierarchical Jointly Optimizing a Two-Step Conditional Random Field Model for Machine Transliteration and Its Fast Decoding Algorithm - DongYang - PaulDixon - SadaokiFurui + DongYang + PaulDixon + SadaokiFurui 275–280 P10-2051 yang-etal-2010-jointly @@ -1861,7 +1861,7 @@ <fixed-case>A</fixed-case>rabic Named Entity Recognition: Using Features Extracted from Noisy Data YassineBenajiba ImedZitouni - MonaDiab + MonaDiab PaoloRosso 281–285 P10-2052 @@ -1870,7 +1870,7 @@ Extracting Sequences from the Web AnthonyFader - StephenSoderland + StephenSoderland OrenEtzioni 286–290 P10-2053 @@ -1905,7 +1905,7 @@ Decision Detection Using Hierarchical Graphical Models - Trung H.Bui + Trung H.Bui StanleyPeters 307–312 P10-2057 @@ -1913,7 +1913,7 @@ Using Speech to Reply to <fixed-case>SMS</fixed-case> Messages While Driving: An In-Car Simulator User Study - Yun-ChengJu + Yun-ChengJu TimPaek 313–317 P10-2058 @@ -1949,7 +1949,7 @@ Automatically Generating Annotator Rationales to Improve Sentiment Classification AinurYessenalina YejinChoi - ClaireCardie + ClaireCardie 336–341 P10-2062 yessenalina-etal-2010-automatically @@ -1971,9 +1971,9 @@ Using Parse Features for Preposition Selection and Error Detection - JoelTetreault + JoelTetreault JenniferFoster - MartinChodorow + MartinChodorow 353–358 P10-2065 tetreault-etal-2010-using @@ -1983,7 +1983,7 @@ Xiao-LiLi LeiZhang BingLiu - See-KiongNg + See-KiongNg 359–364 P10-2066 li-etal-2010-distributional @@ -1991,8 +1991,8 @@ Active Learning-Based Elicitation for Semi-Supervised Word Alignment VamshiAmbati - StephanVogel - JaimeCarbonell + StephanVogel + JaimeCarbonell 365–370 P10-2067 ambati-etal-2010-active-learning @@ -2008,8 +2008,8 @@ Learning Better Data Representation Using Inference-Driven Metric Learning - Paramveer S.Dhillon - Partha PratimTalukdar + Paramveer S.Dhillon + Partha PratimTalukdar KobyCrammer 377–381 P10-2069 @@ -2019,7 +2019,7 @@ Wrapping up a Summary: From Representation to Generation JosefSteinberger MarcoTurchi - MijailKabadjov + MijailKabadjov RalfSteinberger NelloCristianini 382–386 @@ -2034,8 +2034,8 @@ SenizDemir JanRaab NilsReiter - MarketaLopatkova - TomekStrzalkowski + MarketaLopatkova + TomekStrzalkowski Association for Computational Linguistics
Uppsala, Sweden
July @@ -2159,7 +2159,7 @@
Adapting Self-Training for Semantic Role Labeling - RasoulSamad Zadeh Kaljahi + RasoulSamad Zadeh Kaljahi 91–96 P10-3016 samad-zadeh-kaljahi-2010-adapting @@ -2190,7 +2190,7 @@ Proceedings of the ACL 2010 System Demonstrations P10-4 - SandraKübler + SandraKübler Association for Computational Linguistics
Uppsala, Sweden
July @@ -2203,11 +2203,11 @@ Grammar Prototyping and Testing with the <fixed-case>L</fixed-case>in<fixed-case>GO</fixed-case> Grammar Matrix Customization System - Emily M.Bender + Emily M.Bender ScottDrellishak AntskeFokkens - Michael WayneGoodman - Daniel P.Mills + Michael WayneGoodman + Daniel P.Mills LauriePoulson SafiyyahSaleem 1–6 @@ -2216,12 +2216,12 @@ cdec: A Decoder, Alignment, and Learning Framework for Finite-State and Context-Free Translation Models - ChrisDyer + ChrisDyer AdamLopez JuriGanitkevitch JonathanWeese - FerhanTure - PhilBlunsom + FerhanTure + PhilBlunsom HendraSetiawan VladimirEidelman PhilipResnik @@ -2231,12 +2231,12 @@ Beetle <fixed-case>II</fixed-case>: A System for Tutoring and Computational Linguistics Experimentation - Myroslava O.Dzikovska - Johanna D.Moore + Myroslava O.Dzikovska + Johanna D.Moore NatalieSteinhauser GwendolynCampbell ElaineFarrow - Charles B.Callaway + Charles B.Callaway 13–18 P10-4003 dzikovska-etal-2010-beetle @@ -2244,15 +2244,15 @@ <fixed-case>G</fixed-case>ern<fixed-case>E</fixed-case>di<fixed-case>T</fixed-case>: A Graphical Tool for <fixed-case>G</fixed-case>erma<fixed-case>N</fixed-case>et Development VerenaHenrich - ErhardHinrichs + ErhardHinrichs 19–24 P10-4004 henrich-hinrichs-2010-gernedit-graphical <fixed-case>W</fixed-case>eb<fixed-case>L</fixed-case>icht: Web-Based <fixed-case>LRT</fixed-case> Services for <fixed-case>G</fixed-case>erman - ErhardHinrichs - MarieHinrichs + ErhardHinrichs + MarieHinrichs ThomasZastrow 25–29 P10-4005 @@ -2268,7 +2268,7 @@ Talking <fixed-case>NPC</fixed-case>s in a Virtual Game World - TinaKlüwer + TinaKlüwer PeterAdolphs FeiyuXu HansUszkoreit @@ -2280,7 +2280,7 @@ An Open-Source Package for Recognizing Textual Entailment MilenKouylekov - MatteoNegri + MatteoNegri 42–47 P10-4008 kouylekov-negri-2010-open @@ -2298,7 +2298,7 @@ SimonKing HuiLiang KeiichiroOura - LakshmiSaheer + LakshmiSaheer MattShannon SayakiShiota JileiTian @@ -2308,7 +2308,7 @@ Hunting for the Black Swan: Risk Mining from Text - JochenLeidner + JochenLeidner FrankSchilder 54–59 P10-4010 @@ -2317,7 +2317,7 @@ Speech-Driven Access to the Deep Web on Mobile Devices TaniyaMishra - SrinivasBangalore + SrinivasBangalore 60–65 P10-4011 mishra-bangalore-2010-speech @@ -2333,8 +2333,8 @@ Demonstration of a Prototype for a Conversational Companion for Reminiscing about Images - YorickWilks - RobertaCatizone + YorickWilks + RobertaCatizone AlexieiDingli WeiweiCheng 72–77 @@ -2354,7 +2354,7 @@ Proceedings of the 48th Annual Meeting of the Association for Computational Linguistics: Tutorial Abstracts P10-5 - LluísMàrquez + LluísMàrquez HaifengWang Association for Computational Linguistics
Uppsala, Sweden
@@ -2370,7 +2370,7 @@ Wide-Coverage <fixed-case>NLP</fixed-case> with Linguistically Expressive Grammars JuliaHockenmaier YusukeMiyao - Josefvan Genabith + Josefvan Genabith 1 P10-5001 hockenmaier-etal-2010-wide @@ -2385,7 +2385,7 @@
Discourse Structure: Theory, Practice and Use - BonnieWebber + BonnieWebber MarkusEgg ValiaKordoni 3 @@ -2394,21 +2394,21 @@ Annotation - EduardHovy + EduardHovy 4 P10-5004 hovy-2010-annotation From Structured Prediction to Inverse Reinforcement Learning - HalDaumé III + HalDaumé III 5 P10-5005 daume-iii-2010-structured Semantic Parsing: The Task, the State of the Art and the Future - Rohit J.Kate + Rohit J.Kate Yuk WahWong 6 P10-5006 diff --git a/data/xml/P11.xml b/data/xml/P11.xml index 91cba3e629..feacd2d9ee 100644 --- a/data/xml/P11.xml +++ b/data/xml/P11.xml @@ -5,8 +5,8 @@ Proceedings of the 49th Annual Meeting of the Association for Computational Linguistics: Human Language Technologies P11-1 DekangLin - YujiMatsumoto - RadaMihalcea + YujiMatsumoto + RadaMihalcea Association for Computational Linguistics
Portland, Oregon, USA
June @@ -20,7 +20,7 @@ A Word-Class Approach to Labeling <fixed-case>PSCFG</fixed-case> Rules for Machine Translation AndreasZollmann - StephanVogel + StephanVogel 1–11 P11-1001 zollmann-vogel-2011-word @@ -37,7 +37,7 @@ Effective Use of Function Words for Rule Generalization in Forest-Based Translation XianchaoWu TakuyaMatsuzaki - Jun’ichiTsujii + Jun’ichiTsujii 22–31 P11-1003 wu-etal-2011-effective @@ -77,8 +77,8 @@ Exact Decoding of Syntactic Translation Models through <fixed-case>L</fixed-case>agrangian Relaxation - Alexander M.Rush - MichaelCollins + Alexander M.Rush + MichaelCollins 72–82 P11-1008 rush-collins-2011-exact @@ -102,8 +102,8 @@ Joint Annotation of Search Queries MichaelBendersky - W. BruceCroft - David A.Smith + W. BruceCroft + David A.Smith 102–111 P11-1011 P11-1011.Datasets.zip @@ -114,7 +114,7 @@ PengCai WeiGao AoyingZhou - Kam-FaiWong + Kam-FaiWong 112–122 P11-1012 cai-etal-2011-query @@ -131,19 +131,19 @@ Using Multiple Sources to Construct a Sentiment Sensitive Thesaurus for Cross-Domain Sentiment Classification DanushkaBollegala - DavidWeir - JohnCarroll + DavidWeir + JohnCarroll 132–141 P11-1014 bollegala-etal-2011-using Learning Word Vectors for Sentiment Analysis - Andrew L.Maas + Andrew L.Maas Raymond E.Daly Peter T.Pham DanHuang - Andrew Y.Ng + Andrew Y.Ng ChristopherPotts 142–150 P11-1015 @@ -155,7 +155,7 @@ MoYu MingZhou XiaohuaLiu - TiejunZhao + TiejunZhao 151–160 P11-1016 jiang-etal-2011-target @@ -171,9 +171,9 @@ Multi-Modal Annotation of Quest Games in Second Life - SharonGower Small - JenniferStrommer-Galley - TomekStrzalkowski + SharonGower Small + JenniferStrommer-Galley + TomekStrzalkowski 171–179 P11-1018 gower-small-etal-2011-multi @@ -181,7 +181,7 @@ A New Dataset and Method for Automatically Grading <fixed-case>ESOL</fixed-case> Texts HelenYannakoudakis - TedBriscoe + TedBriscoe BenMedlock 180–189 P11-1019 @@ -190,7 +190,7 @@ Collecting Highly Parallel Data for Paraphrase Evaluation DavidChen - WilliamDolan + WilliamDolan 190–200 P11-1020 P11-1020.Datasets.txt @@ -210,7 +210,7 @@ <fixed-case>G</fixed-case>oodness: A Method for Measuring Machine Translation Confidence NguyenBach FeiHuang - YaserAl-Onaizan + YaserAl-Onaizan 211–219 P11-1022 bach-etal-2011-goodness @@ -257,7 +257,7 @@ Learning to Win by Reading Manuals in a <fixed-case>M</fixed-case>onte-<fixed-case>C</fixed-case>arlo Framework - S.R.K.Branavan + S.R.K.Branavan DavidSilver ReginaBarzilay 268–277 @@ -275,7 +275,7 @@ Local Histograms of Character N-grams for Authorship Attribution Hugo JairEscalante ThamarSolorio - ManuelMontes-y-Gómez + ManuelMontes-y-Gómez 288–298 P11-1030 escalante-etal-2011-local @@ -283,7 +283,7 @@ Word Maturity: Computational Modeling of Word Knowledge KirillKireyev - Thomas KLandauer + Thomas KLandauer 299–308 P11-1031 kireyev-landauer-2011-word @@ -292,7 +292,7 @@ Finding Deceptive Opinion Spam by Any Stretch of the Imagination MyleOtt YejinChoi - ClaireCardie + ClaireCardie Jeffrey T.Hancock 309–319 ACL 2021 Test of Time Award (10 year) @@ -304,8 +304,8 @@ Joint Bilingual Sentiment Classification with Unlabeled Parallel Corpora BinLu ChenhaoTan - ClaireCardie - Benjamin K.Tsou + ClaireCardie + Benjamin K.Tsou 320–330 P11-1033 lu-etal-2011-joint @@ -321,7 +321,7 @@ Contrasting Opposing Views of News Articles on Contentious Issues SouneilPark - KyungSoonLee + KyungSoonLee JunehwaSong 340–349 P11-1035 @@ -349,7 +349,7 @@ Lexical Normalisation of Short Text Messages: Makn Sens a #twitter BoHan - TimothyBaldwin + TimothyBaldwin 368–378 P11-1038 P11-1038.Software.tar.bz2 @@ -358,7 +358,7 @@ Topical Keyphrase Extraction from <fixed-case>T</fixed-case>witter - XinZhao + XinZhao JingJiang JingHe YangSong @@ -389,10 +389,10 @@ Unsupervised Word Alignment with Arbitrary Features - ChrisDyer - Jonathan H.Clark - AlonLavie - Noah A.Smith + ChrisDyer + Jonathan H.Clark + AlonLavie + Noah A.Smith 409–419 P11-1042 dyer-etal-2011-unsupervised @@ -408,7 +408,7 @@ An Algorithm for Unsupervised Transliteration Mining with an Application to Word Alignment HassanSajjad - AlexanderFraser + AlexanderFraser HelmutSchmid 430–439 P11-1044 @@ -419,7 +419,7 @@ Beam-Width Prediction for Efficient Context-Free Parsing NathanBodenstab AaronDunlop - KeithHall + KeithHall BrianRoark 440–449 P11-1045 @@ -430,7 +430,7 @@ PierluigiCrescenzi DanielGildea AndreaMarino - GianlucaRossi + GianlucaRossi GiorgioSatta 450–459 P11-1046 @@ -455,7 +455,7 @@ Jointly Learning to Extract and Compress TaylorBerg-Kirkpatrick - DanGillick + DanGillick DanKlein 481–490 P11-1049 @@ -464,7 +464,7 @@ Discovery of Topically Coherent Sentences for Extractive Summarization AsliCelikyilmaz - DilekHakkani-Tür + DilekHakkani-Tür 491–499 P11-1050 celikyilmaz-hakkani-tur-2011-discovery @@ -472,7 +472,7 @@ Coherent Citation-Based Summarization of Scientific Papers AmjadAbu-Jbara - DragomirRadev + DragomirRadev 500–509 P11-1051 abu-jbara-radev-2011-coherent @@ -480,7 +480,7 @@ A Class of Submodular Functions for Document Summarization HuiLin - JeffBilmes + JeffBilmes 510–520 P11-1052 lin-bilmes-2011-class @@ -488,7 +488,7 @@ Semi-supervised Relation Extraction with Large-scale Word Clustering AngSun - RalphGrishman + RalphGrishman SatoshiSekine 521–529 P11-1053 @@ -506,11 +506,11 @@ Knowledge-Based Weak Supervision for Information Extraction of Overlapping Relations - RaphaelHoffmann + RaphaelHoffmann CongleZhang XiaoLing - LukeZettlemoyer - Daniel S.Weld + LukeZettlemoyer + Daniel S.Weld 541–550 P11-1055 hoffmann-etal-2011-knowledge @@ -525,10 +525,10 @@ Together We Can: Bilingual Bootstrapping for <fixed-case>WSD</fixed-case> - Mitesh M.Khapra + Mitesh M.Khapra SalilJoshi ArindamChatterjee - PushpakBhattacharyya + PushpakBhattacharyya 561–569 P11-1057 khapra-etal-2011-together @@ -536,7 +536,7 @@ Which Noun Phrases Denote Which Concepts? JayantKrishnamurthy - TomMitchell + TomMitchell 570–580 P11-1058 krishnamurthy-mitchell-2011-noun @@ -544,7 +544,7 @@ Semantic Representation of Negation Using Focus Detection EduardoBlanco - DanMoldovan + DanMoldovan 581–589 P11-1059 blanco-moldovan-2011-semantic @@ -552,7 +552,7 @@ Learning Dependency-Based Compositional Semantics PercyLiang - MichaelJordan + MichaelJordan DanKlein 590–599 P11-1060 @@ -594,7 +594,7 @@ An Unsupervised Model for Joint Phrase Alignment and Extraction GrahamNeubig TaroWatanabe - EiichiroSumita + EiichiroSumita ShinsukeMori TatsuyaKawahara 632–641 @@ -604,7 +604,7 @@ Learning Hierarchical Translation Structure with Linguistic Annotations MarkosMylonakis - KhalilSima’an + KhalilSima’an 642–652 P11-1065 mylonakis-simaan-2011-learning @@ -685,7 +685,7 @@ N-Best Rescoring Based on Pitch-accent Patterns Je HunJeon - WenWang + WenWang YangLiu 732–741 P11-1074 @@ -702,7 +702,7 @@ Learning to Grade Short Answer Questions using Semantic Similarity Measures and Dependency Graph Alignments MichaelMohler - RazvanBunescu + RazvanBunescu RadaMihalcea 752–762 P11-1076 @@ -712,7 +712,7 @@ Age Prediction in Blogs: A Study of Style, Content, and Online Behavior in Pre- and Post-Social Media Generations SaraRosenthal - KathleenMcKeown + KathleenMcKeown 763–772 P11-1077 rosenthal-mckeown-2011-age @@ -730,7 +730,7 @@ Bootstrapping coreference resolution using word associations HamidrezaKobdani - HinrichSchuetze + HinrichSchuetze MichaelSchiehlen HansKamp 783–792 @@ -740,7 +740,7 @@ Large-Scale Cross-Document Coreference Using Distributed Inference and Hierarchical Models SameerSingh - AmarnagSubramanya + AmarnagSubramanya FernandoPereira AndrewMcCallum 793–803 @@ -750,7 +750,7 @@ A Cross-Lingual <fixed-case>ILP</fixed-case> Solution to Zero Anaphora Resolution RyuIida - MassimoPoesio + MassimoPoesio 804–813 P11-1081 iida-poesio-2011-cross @@ -784,7 +784,7 @@ Learning to Transform and Select Elementary Trees for Improved Syntax-based Machine Translations BingZhao Young-SukLee - XiaoqiangLuo + XiaoqiangLuo LiuLi 846–855 P11-1085 @@ -802,8 +802,8 @@ A Hierarchical <fixed-case>P</fixed-case>itman-<fixed-case>Y</fixed-case>or Process <fixed-case>HMM</fixed-case> for Unsupervised Part of Speech Induction - PhilBlunsom - TrevorCohn + PhilBlunsom + TrevorCohn 865–874 P11-1087 blunsom-cohn-2011-hierarchical @@ -811,16 +811,16 @@ Using Deep Morphology to Improve Automatic Error Detection in <fixed-case>A</fixed-case>rabic Handwriting Recognition NizarHabash - RyanRoth + RyanRoth 875–884 P11-1088 habash-roth-2011-using A Discriminative Model for Joint Morphological Disambiguation and Dependency Parsing - JohnLee + JohnLee JasonNaradowsky - David A.Smith + David A.Smith 885–894 P11-1089 lee-etal-2011-discriminative @@ -861,8 +861,8 @@ Automated Whole Sentence Grammar Correction Using a Noisy Channel Model - Y. AlbertPark - RogerLevy + Y. AlbertPark + RogerLevy 934–944 P11-1094 park-levy-2011-automated @@ -877,7 +877,7 @@ Simple supervised document geolocation with geodesic grids - BenjaminWing + BenjaminWing JasonBaldridge 955–964 P11-1096 @@ -885,18 +885,18 @@ <fixed-case>P</fixed-case>iggyback: Using Search Engines for Robust Cross-Domain Named Entity Recognition - StefanRüd + StefanRüd MassimilianoCiaramita JensMüller - HinrichSchütze + HinrichSchütze 965–975 P11-1097 rud-etal-2011-piggyback Template-Based Information Extraction without the Templates - NathanaelChambers - DanJurafsky + NathanaelChambers + DanJurafsky 976–986 P11-1098 chambers-jurafsky-2011-template @@ -920,7 +920,7 @@ Underspecifying and Predicting Voice for Surface Realisation Ranking - SinaZarrieß + SinaZarrieß AoifeCahill JonasKuhn 1007–1017 @@ -930,7 +930,7 @@ Recognizing Authority in Dialogue with an Integer Linear Programming Constrained Model ElijahMayfield - CarolynPenstein Rosé + CarolynPenstein Rosé 1018–1026 P11-1102 mayfield-penstein-rose-2011-recognizing @@ -958,14 +958,14 @@ A Joint Sequence Translation Model with Integrated Reordering NadirDurrani HelmutSchmid - AlexanderFraser + AlexanderFraser 1045–1054 P11-1105 durrani-etal-2011-joint Integrating surprisal and uncertain-input models in online sentence comprehension: formal techniques and empirical results - RogerLevy + RogerLevy 1055–1065 P11-1106 levy-2011-integrating @@ -993,7 +993,7 @@ ChikaraHashimoto KentaroTorisawa StijnDe Saeger - Jun’ichiKazama + Jun’ichiKazama SadaoKurohashi 1087–1097 P11-1109 @@ -1002,7 +1002,7 @@ Learning From Collective Human Behavior to Introduce Diversity in Lexical Choice VahedQazvinian - Dragomir R.Radev + Dragomir R.Radev 1098–1108 P11-1110 qazvinian-radev-2011-learning @@ -1028,9 +1028,9 @@ YuHong JianfengZhang BinMa - JianminYao - GuodongZhou - QiaomingZhu + JianminYao + GuodongZhou + QiaomingZhu 1127–1136 P11-1113 hong-etal-2011-using @@ -1038,7 +1038,7 @@ Peeling Back the Layers: Detecting Event Role Fillers in Secondary Contexts RuihongHuang - EllenRiloff + EllenRiloff 1137–1147 P11-1114 huang-riloff-2011-peeling @@ -1046,7 +1046,7 @@ Knowledge Base Population: Successful Approaches and Challenges HengJi - RalphGrishman + RalphGrishman 1148–1158 P11-1115 ji-grishman-2011-knowledge @@ -1057,7 +1057,7 @@ ShumingShi JingLiu ShuqiSun - Chin-YewLin + Chin-YewLin 1159–1168 P11-1116 zhang-etal-2011-nonlinear @@ -1065,7 +1065,7 @@ A Pronoun Anaphora Resolution System based on Factorial Hidden <fixed-case>M</fixed-case>arkov Models DingchengLi - TimMiller + TimMiller WilliamSchuler 1169–1178 P11-1117 @@ -1082,11 +1082,11 @@ An Affect-Enriched Dialogue Act Classification Model for Task-Oriented Dialogue - KristyBoyer - JosephGrafsgaard - Eun YoungHa - RobertPhillips - JamesLester + KristyBoyer + JosephGrafsgaard + Eun YoungHa + RobertPhillips + JamesLester 1190–1199 P11-1119 boyer-etal-2011-affect @@ -1094,7 +1094,7 @@ Fine-Grained Class Label Markup of Search Queries JosephReisinger - MariusPaşca + MariusPaşca 1200–1209 P11-1120 reisinger-pasca-2011-fine @@ -1110,7 +1110,7 @@ Crowdsourcing Translation: Professional Quality from Non-Professionals - Omar F.Zaidan + Omar F.Zaidan ChrisCallison-Burch 1220–1229 P11-1122 @@ -1119,7 +1119,7 @@ A Statistical Tree Annotator and Its Applications - XiaoqiangLuo + XiaoqiangLuo BingZhao 1230–1238 P11-1123 @@ -1130,7 +1130,7 @@ YanjunMa YifanHe AndyWay - Josefvan Genabith + Josefvan Genabith 1239–1248 P11-1124 ma-etal-2011-consistent @@ -1138,7 +1138,7 @@ Machine Translation System Combination by Confusion Forest TaroWatanabe - EiichiroSumita + EiichiroSumita 1249–1257 P11-1125 watanabe-sumita-2011-machine @@ -1154,9 +1154,9 @@ Minimum <fixed-case>B</fixed-case>ayes-risk System Combination - JesúsGonzález-Rubio - AlfonsJuan - FranciscoCasacuberta + JesúsGonzález-Rubio + AlfonsJuan + FranciscoCasacuberta 1268–1277 P11-1127 gonzalez-rubio-etal-2011-minimum @@ -1165,14 +1165,14 @@ Adjoining Tree-to-String Translation YangLiu QunLiu - Yajuan + Yajuan 1278–1287 P11-1128 liu-etal-2011-adjoining Enhancing Language Models in Statistical Machine Translation with Backward N-grams and Mutual Information Triggers - DeyiXiong + DeyiXiong MinZhang HaizhouLi 1288–1297 @@ -1181,7 +1181,7 @@ Translating from Morphologically Complex Languages: A Paraphrase-Based Approach - PreslavNakov + PreslavNakov Hwee TouNg 1298–1307 P11-1130 @@ -1191,7 +1191,7 @@ Gappy Phrasal Alignment By Agreement MohitBansal ChrisQuirk - RobertMoore + RobertMoore 1308–1317 P11-1131 bansal-etal-2011-gappy @@ -1215,7 +1215,7 @@ Using Bilingual Parallel Corpora for Cross-Lingual Textual Entailment YasharMehdad - MatteoNegri + MatteoNegri MarcelloFederico 1336–1345 P11-1134 @@ -1225,7 +1225,7 @@ Using Large Monolingual and Bilingual Corpora to Improve Coordination Disambiguation ShaneBergsma DavidYarowsky - KennethChurch + KennethChurch 1346–1355 P11-1135 P11-1135.Datasets.tar.bz2 @@ -1244,15 +1244,15 @@ Discovering Sociolinguistic Associations with Structured Sparsity JacobEisenstein - Noah A.Smith - Eric P.Xing + Noah A.Smith + Eric P.Xing 1365–1374 P11-1137 eisenstein-etal-2011-discovering Local and Global Algorithms for Disambiguation to <fixed-case>W</fixed-case>ikipedia - LevRatinov + LevRatinov DanRoth DougDowney MikeAnderson @@ -1262,7 +1262,7 @@ A Stacked Sub-Word Model for Joint <fixed-case>C</fixed-case>hinese Word Segmentation and Part-of-Speech Tagging - WeiweiSun + WeiweiSun 1385–1394 P11-1139 sun-2011-stacked @@ -1272,8 +1272,8 @@ KlausMacherey AndrewDai DavidTalbot - AshokPopat - FranzOch + AshokPopat + FranzOch 1395–1404 P11-1140 macherey-etal-2011-language @@ -1287,8 +1287,8 @@ A Simple Measure to Assess Non-response - AnselmoPeñas - AlvaroRodrigo + AnselmoPeñas + AlvaroRodrigo 1415–1424 P11-1142 penas-rodrigo-2011-simple @@ -1304,7 +1304,7 @@ Semi-Supervised Frame-Semantic Parsing for Unknown Predicates DipanjanDas - Noah A.Smith + Noah A.Smith 1435–1444 P11-1144 das-smith-2011-semi @@ -1320,7 +1320,7 @@ Unsupervised Learning of Semantic Relation Composition EduardoBlanco - DanMoldovan + DanMoldovan 1456–1465 P11-1146 blanco-moldovan-2011-unsupervised @@ -1329,15 +1329,15 @@ Unsupervised Discovery of Domain-Specific Knowledge from Text DirkHovy ChunliangZhang - EduardHovy - AnselmoPeñas + EduardHovy + AnselmoPeñas 1466–1475 P11-1147 hovy-etal-2011-unsupervised Latent Semantic Word Sense Induction and Disambiguation - TimVan de Cruys + TimVan de Cruys MariannaApidianaki 1476–1485 P11-1148 @@ -1358,23 +1358,23 @@ JianxingYu Zheng-JunZha MengWang - Tat-SengChua + Tat-SengChua 1496–1505 P11-1150 yu-etal-2011-aspect Collective Classification of Congressional Floor-Debate Transcripts - ClintonBurfoot + ClintonBurfoot StevenBird - TimothyBaldwin + TimothyBaldwin 1506–1515 P11-1151 burfoot-etal-2011-collective Integrating history-length interpolation and classes in language modeling - HinrichSchütze + HinrichSchütze 1516–1525 P11-1152 schutze-2011-integrating @@ -1383,7 +1383,7 @@ Structural Topic Model for Latent Topical Structure Analysis HongningWang DuoZhang - ChengXiangZhai + ChengXiangZhai 1526–1535 P11-1153 wang-etal-2011-structural @@ -1393,7 +1393,7 @@ Jey HanLau KarlGrieser DavidNewman - TimothyBaldwin + TimothyBaldwin 1536–1545 P11-1154 lau-etal-2011-automatic @@ -1417,8 +1417,8 @@ Effective Measures of Domain Similarity for Parsing - BarbaraPlank - Gertjanvan Noord + BarbaraPlank + Gertjanvan Noord 1566–1576 P11-1157 plank-van-noord-2011-effective @@ -1435,15 +1435,15 @@ Improving <fixed-case>A</fixed-case>rabic Dependency Parsing with Form-based and Functional Morphological Features YuvalMarton NizarHabash - OwenRambow + OwenRambow 1586–1596 P11-1159 marton-etal-2011-improving Partial Parsing from Bitext Projections - PrashanthMannem - AswarthDara + PrashanthMannem + AswarthDara 1597–1606 P11-1160 P11-1160.Software.tar.bz2 @@ -1451,7 +1451,7 @@ Ranking Class Labels Using Query Sessions - MariusPaşca + MariusPaşca 1607–1615 P11-1161 pasca-2011-ranking @@ -1459,7 +1459,7 @@ Insights from Network Structure for Text Mining ZornitsaKozareva - EduardHovy + EduardHovy 1616–1625 P11-1162 kozareva-hovy-2011-insights @@ -1468,7 +1468,7 @@ Event Extraction as Dependency Parsing DavidMcClosky MihaiSurdeanu - ChristopherManning + ChristopherManning 1626–1635 P11-1163 mcclosky-etal-2011-event @@ -1487,8 +1487,8 @@ Proceedings of the 49th Annual Meeting of the Association for Computational Linguistics: Human Language Technologies P11-2 DekangLin - YujiMatsumoto - RadaMihalcea + YujiMatsumoto + RadaMihalcea Association for Computational Linguistics
Portland, Oregon, USA
June @@ -1502,7 +1502,7 @@ Lexicographic Semirings for Exact Automata Encoding of Sequence Models BrianRoark - RichardSproat + RichardSproat IzhakShafran 1–5 P11-2001 @@ -1511,7 +1511,7 @@ Good Seed Makes a Good Crop: Accelerating Active Learning Using Language Modeling DmitriyDligach - MarthaPalmer + MarthaPalmer 6–10 P11-2002 dligach-palmer-2011-good @@ -1519,7 +1519,7 @@ Temporal Restricted Boltzmann Machines for Dependency Parsing NikhilGarg - JamesHenderson + JamesHenderson 11–17 P11-2003 garg-henderson-2011-temporal @@ -1552,7 +1552,7 @@ The <fixed-case>A</fixed-case>rabic Online Commentary Dataset: an Annotated Dataset of Informal <fixed-case>A</fixed-case>rabic with High Dialectal Content - Omar F.Zaidan + Omar F.Zaidan ChrisCallison-Burch 37–41 P11-2007 @@ -1565,19 +1565,19 @@ NathanSchneider BrendanO’Connor DipanjanDas - DanielMills + DanielMills JacobEisenstein MichaelHeilman DaniYogatama JeffreyFlanigan - Noah A.Smith + Noah A.Smith 42–47 P11-2008 gimpel-etal-2011-part Semi-supervised condensed nearest neighbor for part-of-speech tagging - AndersSøgaard + AndersSøgaard 48–52 P11-2009 sogaard-2011-semi @@ -1642,7 +1642,7 @@ That’s What She Said: Double Entendre Identification - ChloéKiddon + ChloéKiddon YuriyBrun 89–94 P11-2016 @@ -1666,7 +1666,7 @@ Subjective Natural Language Problems: Motivations, Applications, Characterizations, and Implications - CeciliaOvesdotter Alm + CeciliaOvesdotter Alm 107–112 P11-2019 ovesdotter-alm-2011-subjective @@ -1674,8 +1674,8 @@ Entrainment in Speech Preceding Backchannels. RivkaLevitan - AgustínGravano - JuliaHirschberg + AgustínGravano + JuliaHirschberg 113–117 P11-2020 levitan-etal-2011-entrainment @@ -1683,7 +1683,7 @@ Question Detection in Spoken Conversations Using Textual Conversations AnnaMargolis - MariOstendorf + MariOstendorf 118–124 P11-2021 margolis-ostendorf-2011-question @@ -1718,8 +1718,8 @@ A Corpus of Scope-disambiguated <fixed-case>E</fixed-case>nglish Text MehdiManshadi - JamesAllen - MarySwift + JamesAllen + MarySwift 141–146 P11-2025 manshadi-etal-2011-corpus @@ -1727,15 +1727,15 @@ From Bilingual Dictionaries to Interlingual Document Representations JagadeeshJagarlamudi - HalDaumé III - RaghavendraUdupa + HalDaumé III + RaghavendraUdupa 147–152 P11-2026 jagarlamudi-etal-2011-bilingual <fixed-case>AM</fixed-case>-<fixed-case>FM</fixed-case>: A Semantic Framework for Translation Quality Assessment - Rafael E.Banchs + Rafael E.Banchs HaizhouLi 153–158 P11-2027 @@ -1744,7 +1744,7 @@ Automatic Evaluation of <fixed-case>C</fixed-case>hinese Translation Output: Word-Level or Character-Level? MaoxiLi - ChengqingZong + ChengqingZong Hwee TouNg 159–164 P11-2028 @@ -1761,17 +1761,17 @@ Word Alignment via Submodular Maximization over Matroids HuiLin - JeffBilmes + JeffBilmes 170–175 P11-2030 lin-bilmes-2011-word Better Hypothesis Testing for Statistical Machine Translation: Controlling for Optimizer Instability - Jonathan H.Clark - ChrisDyer - AlonLavie - Noah A.Smith + Jonathan H.Clark + ChrisDyer + AlonLavie + Noah A.Smith 176–181 P11-2031 clark-etal-2011-better @@ -1779,7 +1779,7 @@ <fixed-case>B</fixed-case>ayesian Word Alignment for Statistical Machine Translation CoşkunMermer - MuratSaraçlar + MuratSaraçlar 182–187 P11-2032 P11-2032.Software.txt @@ -1796,8 +1796,8 @@ Reversible Stochastic Attribute-Value Grammars Daniëlde Kok - BarbaraPlank - Gertjanvan Noord + BarbaraPlank + Gertjanvan Noord 194–199 P11-2034 de-kok-etal-2011-reversible @@ -1839,14 +1839,14 @@ Query Snowball: A Co-occurrence-based Approach to Multi-document Summarization for Question Answering HajimeMorita TetsuyaSakai - ManabuOkumura + ManabuOkumura 223–229 P11-2039 morita-etal-2011-query Discrete vs. Continuous Rating Scales for Language Evaluation in <fixed-case>NLP</fixed-case> - AnjaBelz + AnjaBelz EricKow 230–235 P11-2040 @@ -1881,7 +1881,7 @@ Optimal and Syntactically-Informed Decoding for Monolingual Phrase-Based Alignment KapilThadani - KathleenMcKeown + KathleenMcKeown 254–259 P11-2044 thadani-mckeown-2011-optimal @@ -1889,7 +1889,7 @@ Can Document Selection Help Semi-supervised Learning? A Case Study On Event Extraction ShashaLiao - RalphGrishman + RalphGrishman 260–265 P11-2045 liao-grishman-2011-document @@ -1898,8 +1898,8 @@ Relation Guided Bootstrapping of Semantic Lexicons TaraMcIntosh LarsYencken - James R.Curran - TimothyBaldwin + James R.Curran + TimothyBaldwin 266–270 P11-2046 mcintosh-etal-2011-relation @@ -1908,7 +1908,7 @@ Model-Portability Experiments for Textual Temporal Analysis OleksandrKolomiyets StevenBethard - Marie-FrancineMoens + Marie-FrancineMoens 271–276 P11-2047 kolomiyets-etal-2011-model @@ -1934,7 +1934,7 @@ Coreference for Learning to Extract Relations: Yes <fixed-case>V</fixed-case>irginia, Coreference Matters RyanGabbard MarjorieFreedman - RalphWeischedel + RalphWeischedel 288–293 P11-2050 gabbard-etal-2011-coreference @@ -1942,16 +1942,16 @@ Corpus Expansion for Statistical Machine Translation with Semantic Role Label Substitution Rules QinGao - StephanVogel + StephanVogel 294–298 P11-2051 gao-vogel-2011-corpus Scaling up Automatic Cross-Lingual Semantic Role Annotation - Lonnekevan der Plas + Lonnekevan der Plas PaolaMerlo - JamesHenderson + JamesHenderson 299–304 P11-2052 P11-2052.Datasets.txt @@ -1960,10 +1960,10 @@ Towards Tracking Semantic Change by Visual Analytics ChristianRohrdantz - AnnetteHautli + AnnetteHautli ThomasMayer MiriamButt - Daniel A.Keim + Daniel A.Keim FransPlank 305–310 P11-2053 @@ -1973,8 +1973,8 @@ Improving Classification of Medical Assertions in Clinical Notes YoungjunKim - EllenRiloff - StéphaneMeystre + EllenRiloff + StéphaneMeystre 311–316 P11-2054 kim-etal-2011-improving @@ -1982,7 +1982,7 @@ <fixed-case>P</fixed-case>ara<fixed-case>S</fixed-case>ense or How to Use Parallel Corpora for Word Sense Disambiguation ElsLefever - VéroniqueHoste + VéroniqueHoste MartineDe Cock 317–322 P11-2055 @@ -1994,7 +1994,7 @@ AshishVaswani StephenTratz DavidChiang - EduardHovy + EduardHovy 323–328 P11-2056 hovy-etal-2011-models @@ -2021,7 +2021,7 @@ MarjorieFreedman AlexBaron VasinPunyakanok - RalphWeischedel + RalphWeischedel 341–345 P11-2059 freedman-etal-2011-language @@ -2029,7 +2029,7 @@ Automatic Detection and Correction of Errors in Dependency Treebanks AlexanderVolokh - GünterNeumann + GünterNeumann 346–350 P11-2060 volokh-neumann-2011-automatic @@ -2037,7 +2037,7 @@ Temporal Evaluation NaushadUzZaman - JamesAllen + JamesAllen 351–356 P11-2061 P11-2061.Software.zip @@ -2054,7 +2054,7 @@ <fixed-case>NULEX</fixed-case>: An Open-License Broad Coverage Lexicon CliftonMcFate - KennethForbus + KennethForbus 363–367 P11-2063 P11-2063.Datasets.tar.bz2 @@ -2062,14 +2062,14 @@ <fixed-case>E</fixed-case>ven the Abstract have Color: Consensus in Word-Colour Associations - SaifMohammad + SaifMohammad 368–373 P11-2064 mohammad-2011-even Detection of Agreement and Disagreement in Broadcast Conversations - WenWang + WenWang SibelYaman KristinPrecoda ColleenRichey @@ -2081,8 +2081,8 @@ Dealing with Spurious Ambiguity in Learning <fixed-case>ITG</fixed-case>-based Word Alignment ShujianHuang - StephanVogel - JiajunChen + StephanVogel + JiajunChen 379–383 P11-2066 huang-etal-2011-dealing @@ -2099,9 +2099,9 @@ Improving On-line Handwritten Recognition using Translation Models in Multimodal Interactive Machine Translation - VicentAlabau + VicentAlabau AlbertoSanchis - FranciscoCasacuberta + FranciscoCasacuberta 389–394 P11-2068 alabau-etal-2011-improving @@ -2127,7 +2127,7 @@ Domain Adaptation for Machine Translation by Mining Unseen Words - HalDaumé III + HalDaumé III JagadeeshJagarlamudi 407–412 P11-2071 @@ -2169,9 +2169,9 @@ Reordering Constraint Based on Document-Level Context - TakashiOnishi + TakashiOnishi MasaoUtiyama - EiichiroSumita + EiichiroSumita 434–438 P11-2076 onishi-etal-2011-reordering @@ -2188,7 +2188,7 @@ On-line Language Model Biasing for Statistical Machine Translation SankaranarayananAnanthakrishnan RohitPrasad - PremNatarajan + PremNatarajan 445–449 P11-2078 ananthakrishnan-etal-2011-line @@ -2197,9 +2197,9 @@ Reordering Modeling using Weighted Alignment Matrices WangLing TiagoLuís - JoãoGraça + JoãoGraça IsabelTrancoso - LuísaCoheur + LuísaCoheur 450–454 P11-2079 ling-etal-2011-reordering @@ -2224,16 +2224,16 @@ “<fixed-case>I</fixed-case> Thou Thee, Thou Traitor”: Predicting Formal vs. Informal Address in <fixed-case>E</fixed-case>nglish Literature ManaalFaruqui - SebastianPadó + SebastianPadó 467–472 P11-2082 faruqui-pado-2011-thou Clustering Comparable Corpora For Bilingual Lexicon Extraction - BoLi - EricGaussier - AkikoAizawa + BoLi + EricGaussier + AkikoAizawa 473–478 P11-2083 li-etal-2011-clustering @@ -2242,7 +2242,7 @@ Identifying Word Translations from Comparable Corpora Using Latent Topic Models IvanVulić WimDe Smet - Marie-FrancineMoens + Marie-FrancineMoens 479–484 P11-2084 vulic-etal-2011-identifying @@ -2253,7 +2253,7 @@ LixingXie ZhiyuanLiu MaosongSun - YangZhang + YangZhang LiyunRu 485–490 P11-2085 @@ -2262,7 +2262,7 @@ Automatic Assessment of Coverage Quality in Intelligence Reports SamuelBrody - PaulKantor + PaulKantor 491–495 P11-2086 brody-kantor-2011-automatic @@ -2271,7 +2271,7 @@ Putting it Simply: a Context-Aware Approach to Lexical Simplification OrBiran SamuelBrody - NoémieElhadad + NoémieElhadad 496–501 P11-2087 biran-etal-2011-putting @@ -2279,7 +2279,7 @@ Automatically Predicting Peer-Review Helpfulness WentingXiong - DianeLitman + DianeLitman 502–507 P11-2088 P11-2088.Datasets.zip @@ -2288,8 +2288,8 @@ They Can Help: Using Crowdsourcing to Improve the Evaluation of Grammatical Error Detection Systems NitinMadnani - MartinChodorow - JoelTetreault + MartinChodorow + JoelTetreault AllaRozovskaya 508–513 P11-2089 @@ -2314,8 +2314,8 @@ Improved Modeling of Out-Of-Vocabulary Words Using Morphological Classes - ThomasMueller - HinrichSchuetze + ThomasMueller + HinrichSchuetze 524–528 P11-2092 mueller-schuetze-2011-improved @@ -2334,7 +2334,7 @@ Nonparametric <fixed-case>B</fixed-case>ayesian Machine Transliteration with Synchronous <fixed-case>A</fixed-case>daptor <fixed-case>G</fixed-case>rammars YunHuang MinZhang - Chew LimTan + Chew LimTan 534–539 P11-2094 huang-etal-2011-nonparametric @@ -2350,7 +2350,7 @@ An Empirical Evaluation of Data-Driven Paraphrase Generation Techniques DonaldMetzler - EduardHovy + EduardHovy ChunliangZhang 546–551 P11-2096 @@ -2378,7 +2378,7 @@ PaulaCarvalho LuísSarmento JorgeTeixeira - Mário J.Silva + Mário J.Silva 564–568 P11-2099 carvalho-etal-2011-liars @@ -2411,7 +2411,7 @@ Subjectivity and Sentiment Analysis of <fixed-case>M</fixed-case>odern <fixed-case>S</fixed-case>tandard <fixed-case>A</fixed-case>rabic MuhammadAbdul-Mageed - MonaDiab + MonaDiab MohammedKorayem 587–591 P11-2103 @@ -2419,10 +2419,10 @@ Identifying the Semantic Orientation of Foreign Words - AhmedHassan + AhmedHassan AmjadAbu-Jbara RahulJha - DragomirRadev + DragomirRadev 592–597 P11-2104 hassan-etal-2011-identifying @@ -2430,7 +2430,7 @@ Hierarchical Text Classification with Latent Concepts XipengQiu - XuanjingHuang + XuanjingHuang ZhaoLiu JinlongZhou 598–602 @@ -2449,7 +2449,7 @@ Predicting Relative Prominence in Noun-Noun Compounds TaniyaMishra - SrinivasBangalore + SrinivasBangalore 609–613 P11-2107 mishra-bangalore-2011-predicting @@ -2457,7 +2457,7 @@ Contrasting Multi-Lingual Prosodic Cues to Predict Verbal Feedback for Rapport SiweiWang - Gina-AnneLevow + Gina-AnneLevow 614–619 P11-2108 wang-levow-2011-contrasting @@ -2465,7 +2465,7 @@ Generalized Interpolation in Decision Tree <fixed-case>LM</fixed-case> DenisFilimonov - MaryHarper + MaryHarper 620–624 P11-2109 filimonov-harper-2011-generalized @@ -2522,9 +2522,9 @@ Does Size Matter – How Much Data is Required to Train a <fixed-case>REG</fixed-case> Algorithm? - MariëtTheune + MariëtTheune RuudKoolen - EmielKrahmer + EmielKrahmer SanderWubben 660–664 P11-2116 @@ -2532,7 +2532,7 @@ <fixed-case>S</fixed-case>imple <fixed-case>E</fixed-case>nglish <fixed-case>W</fixed-case>ikipedia: A New Text Simplification Task - WilliamCoster + WilliamCoster DavidKauchak 665–669 P11-2117 @@ -2542,7 +2542,7 @@ A Hierarchical Model of Web Summaries YvesPetinot - KathleenMcKeown + KathleenMcKeown KapilThadani 670–675 P11-2118 @@ -2559,15 +2559,15 @@ Data point selection for cross-language adaptation of dependency parsers - AndersSøgaard + AndersSøgaard 682–686 P11-2120 sogaard-2011-data Getting the Most out of Transition-based Dependency Parsing - Jinho D.Choi - MarthaPalmer + Jinho D.Choi + MarthaPalmer 687–692 P11-2121 choi-palmer-2011-getting @@ -2583,9 +2583,9 @@ Improving Dependency Parsing with Semantic Classes - EnekoAgirre + EnekoAgirre KepaBengoetxea - KoldoGojenola + KoldoGojenola JoakimNivre 699–703 P11-2123 @@ -2601,7 +2601,7 @@ An Ensemble Model that Combines Syntactic and Semantic Clustering for Discriminative Dependency Parsing - GholamrezaHaffari + GholamrezaHaffari MarziehRazavi AnoopSarkar 710–714 @@ -2640,8 +2640,8 @@ Proceedings of the ACL 2011 Student Session P11-3 - SasaPetrovic - EthanSelfridge + SasaPetrovic + EthanSelfridge EmilyPitler MilesOsborne ThamarSolorio @@ -2688,7 +2688,7 @@ Extracting and Classifying <fixed-case>U</fixed-case>rdu Multiword Expressions - AnnetteHautli + AnnetteHautli SebastianSulger 24–29 P11-3005 @@ -2704,7 +2704,7 @@ Syntax-based Statistical Machine Translation using Tree Automata and Tree Transducers - Daniel EmilioBeck + Daniel EmilioBeck 36–40 P11-3007 beck-2011-syntax @@ -2834,15 +2834,15 @@ <fixed-case>H</fixed-case>indi to <fixed-case>P</fixed-case>unjabi Machine Translation System VishalGoyal - GurpreetSingh Lehal + GurpreetSingh Lehal 1–6 P11-4001 goyal-singh-lehal-2011-hindi The <fixed-case>ACL</fixed-case> <fixed-case>A</fixed-case>nthology Searchbench - UlrichSchäfer - BerndKiefer + UlrichSchäfer + BerndKiefer ChristianSpurk JörgSteffen RuiWang @@ -2860,7 +2860,7 @@ A Mobile Touchable Application for Online Topic Graph Extraction and Exploration of Web Content - GünterNeumann + GünterNeumann SvenSchmeier 20–25 P11-4004 @@ -2868,10 +2868,10 @@ <fixed-case>E</fixed-case>d<fixed-case>I</fixed-case>t: A Broad-Coverage Grammar Checker Using Pattern Grammar - Chung-ChiHuang - Mei-HuaChen - Shih-TingHuang - Jason S.Chang + Chung-ChiHuang + Mei-HuaChen + Shih-TingHuang + Jason S.Chang 26–31 P11-4005 huang-etal-2011-edit @@ -2881,7 +2881,7 @@ Cheng-TeLi Chien-YuanWang Chien-LinTseng - Shou-DeLin + Shou-DeLin 32–37 P11-4006 li-etal-2011-memetube @@ -2913,7 +2913,7 @@ Dr Sentiment Knows Everything! AmitavaDas - SivajiBandyopadhyay + SivajiBandyopadhyay 50–55 P11-4009 das-bandyopadhyay-2011-dr @@ -2928,18 +2928,18 @@ Prototyping virtual instructors from human-human corpora LucianaBenotti - AlexandreDenis + AlexandreDenis 62–67 P11-4011 benotti-denis-2011-prototyping An Interactive Machine Translation System with Online Learning - DanielOrtiz-Martínez - Luis A.Leiva - VicentAlabau - IsmaelGarcía-Varea - FranciscoCasacuberta + DanielOrtiz-Martínez + Luis A.Leiva + VicentAlabau + IsmaelGarcía-Varea + FranciscoCasacuberta 68–73 P11-4012 ortiz-martinez-etal-2011-interactive @@ -2956,7 +2956,7 @@ A Speech-based Just-in-Time Retrieval System using Semantic Search - AndreiPopescu-Belis + AndreiPopescu-Belis MajidYazdani AlexandreNanchen Philip N.Garner @@ -2967,10 +2967,10 @@ <fixed-case>MACAON</fixed-case> An <fixed-case>NLP</fixed-case> Tool Suite for Processing Word Lattices AlexisNasr - FrédéricBéchet + FrédéricBéchet Jean-FrançoisRey - BenoîtFavre - JosephLe Roux + BenoîtFavre + JosephLe Roux 86–91 P11-4015 nasr-etal-2011-macaon @@ -2996,7 +2996,7 @@ An Efficient Indexer for Large N-Gram Corpora HakanCeylan - RadaMihalcea + RadaMihalcea 103–108 P11-4018 ceylan-mihalcea-2011-efficient @@ -3004,7 +3004,7 @@ <fixed-case>S</fixed-case>ystem<fixed-case>T</fixed-case>: A Declarative Information Extraction System YunyaoLi - FrederickReiss + FrederickReiss LauraChiticariu 109–114 P11-4019 @@ -3014,8 +3014,8 @@ <fixed-case>S</fixed-case>ci<fixed-case>S</fixed-case>umm: A Multi-Document Summarization System for Scientific Articles NitinAgarwal Ravi ShankarReddy - KiranGvr - Carolyn PensteinRosé + KiranGvr + Carolyn PensteinRosé 115–120 P11-4020 agarwal-etal-2011-scisumm @@ -3023,7 +3023,7 @@ <fixed-case>C</fixed-case>lairlib: A Toolkit for Natural Language Processing, Information Retrieval, and Network Analysis AmjadAbu-Jbara - DragomirRadev + DragomirRadev 121–126 P11-4021 abu-jbara-radev-2011-clairlib @@ -3031,8 +3031,8 @@ <fixed-case>C</fixed-case>-Feel-It: A Sentiment Analyzer for Micro-blogs AdityaJoshi - BalamuraliAR - PushpakBhattacharyya + BalamuraliAR + PushpakBhattacharyya RajatMohanty 127–132 P11-4022 @@ -3044,7 +3044,7 @@ Cheng-LunYang Bo-NianChen Yen-KaiWang - Shou-DeLin + Shou-DeLin 133–138 P11-4023 weng-etal-2011-imass @@ -3077,7 +3077,7 @@ Beyond Structured Prediction: Inverse Reinforcement Learning - HalDaumé III + HalDaumé III 1 P11-5001 daume-iii-2011-beyond @@ -3086,7 +3086,7 @@ Formal and Empirical Grammatical Inference JeffreyHeinz Colinde la Higuera - Mennovan Zannen + Mennovan Zannen 2 P11-5002 heinz-etal-2011-formal @@ -3102,7 +3102,7 @@ Web Search Queries as a Corpus - MariusPaşca + MariusPaşca 4 P11-5004 pasca-2011-web @@ -3111,15 +3111,15 @@ Rich Prior Knowledge in Learning for Natural Language Processing GregoryDruck KuzmanGanchev - JoãoGraça + JoãoGraça 5 P11-5005 druck-etal-2011-rich Dual Decomposition for Natural Language Processing - MichaelCollins - Alexander M.Rush + MichaelCollins + Alexander M.Rush 6 P11-5006 collins-rush-2011-dual diff --git a/data/xml/P12.xml b/data/xml/P12.xml index cea56821b4..8a416c8e9d 100644 --- a/data/xml/P12.xml +++ b/data/xml/P12.xml @@ -5,10 +5,10 @@ Proceedings of the 50th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers) P12-1 HaizhouLi - Chin-YewLin + Chin-YewLin MilesOsborne - Gary GeunbaeLee - Jong C.Park + Gary GeunbaeLee + Jong C.Park Association for Computational Linguistics
Jeju Island, Korea
July @@ -34,7 +34,7 @@ Joint Feature Selection in Distributed Stochastic Learning for Large-Scale Discriminative Training in <fixed-case>SMT</fixed-case> PatrickSimianer StefanRiezler - ChrisDyer + ChrisDyer 11–21 P12-1002 simianer-etal-2012-joint @@ -52,15 +52,15 @@
Probabilistic Integration of Partial Lexical Information for Noise Robust Haptic Voice Recognition - Khe ChaiSim + Khe ChaiSim 31–39 P12-1004 sim-2012-probabilistic A Nonparametric <fixed-case>B</fixed-case>ayesian Approach to Acoustic Model Discovery - Chia-yingLee - JamesGlass + Chia-yingLee + JamesGlass 40–49 P12-1005 lee-glass-2012-nonparametric @@ -103,14 +103,14 @@ Extracting Narrative Timelines as Temporal Dependency Structures OleksandrKolomiyets StevenBethard - Marie-FrancineMoens + Marie-FrancineMoens 88–97 P12-1010 kolomiyets-etal-2012-extracting Labeling Documents with Timestamps: Learning from their Time Expressions - NathanaelChambers + NathanaelChambers 98–106 P12-1011 chambers-2012-labeling @@ -118,9 +118,9 @@ Temporally Anchored Relation Extraction GuillermoGarrido - AnselmoPeñas + AnselmoPeñas BernardoCabaleiro - ÁlvaroRodrigo + ÁlvaroRodrigo 107–116 P12-1012 garrido-etal-2012-temporally @@ -137,7 +137,7 @@ Learning High-Level Planning from Text - S.R.K.Branavan + S.R.K.Branavan NateKushman TaoLei ReginaBarzilay @@ -148,9 +148,9 @@ Distributional Semantics in Technicolor EliaBruni - GemmaBoleda - MarcoBaroni - Nam-KhanhTran + GemmaBoleda + MarcoBaroni + Nam-KhanhTran 136–145 P12-1015 bruni-etal-2012-distributional @@ -167,7 +167,7 @@ Deciphering Foreign Language by Combining Language Models and Context Vectors MalteNuhn ArneMauser - HermannNey + HermannNey 156–164 P12-1017 nuhn-etal-2012-deciphering @@ -186,7 +186,7 @@ Fast Syntactic Analysis for Statistical Language Modeling via Substructure Sharing and Uptraining AriyaRastrow MarkDredze - SanjeevKhudanpur + SanjeevKhudanpur 175–183 P12-1019 rastrow-etal-2012-fast @@ -194,7 +194,7 @@ Bootstrapping a Unified Model of Lexical and Phonetic Acquisition MichaElsner - SharonGoldwater + SharonGoldwater JacobEisenstein 184–193 P12-1020 @@ -211,7 +211,7 @@ Discriminative Strategies to Integrate Multiword Expression Recognition and Parsing - MatthieuConstant + MatthieuConstant AnthonySigogne PatrickWatrin 204–212 @@ -229,18 +229,18 @@ Spectral Learning of Latent-Variable <fixed-case>PCFG</fixed-case>s - Shay B.Cohen + Shay B.Cohen KarlStratos - MichaelCollins - Dean P.Foster - LyleUngar + MichaelCollins + Dean P.Foster + LyleUngar 223–231 P12-1024 cohen-etal-2012-spectral Reducing Approximation and Estimation Errors for <fixed-case>C</fixed-case>hinese Lexical Processing with Heterogeneous Annotations - WeiweiSun + WeiweiSun XiaojunWan 232–241 P12-1025 @@ -249,7 +249,7 @@ Capturing Paradigmatic and Syntagmatic Lexical Relations: Towards Accurate <fixed-case>C</fixed-case>hinese Part-of-Speech Tagging - WeiweiSun + WeiweiSun HansUszkoreit 242–252 P12-1026 @@ -268,8 +268,8 @@ Verb Classification using Distributional Similarity in Syntactic and Semantic Structures DaniloCroce AlessandroMoschitti - RobertoBasili - MarthaPalmer + RobertoBasili + MarthaPalmer 263–272 P12-1028 croce-etal-2012-verb @@ -332,7 +332,7 @@ A Joint Model for Discovery of Aspects in Utterances AsliCelikyilmaz - DilekHakkani-Tur + DilekHakkani-Tur 330–338 P12-1035 celikyilmaz-hakkani-tur-2012-joint @@ -348,7 +348,7 @@ Learning to “Read Between the Lines” using <fixed-case>B</fixed-case>ayesian Logic Programs SindhuRaghavan - RaymondMooney + RaymondMooney HyeonseoKu 349–358 P12-1037 @@ -358,8 +358,8 @@ Collective Generation of Natural Image Descriptions PolinaKuznetsova VicenteOrdonez - AlexanderBerg - TamaraBerg + AlexanderBerg + TamaraBerg YejinChoi 359–368 P12-1038 @@ -394,8 +394,8 @@ Subgroup Detection in Ideological Discussions AmjadAbu-Jbara PradeepDasigi - MonaDiab - DragomirRadev + MonaDiab + DragomirRadev 399–409 P12-1042 abu-jbara-etal-2012-subgroup @@ -462,7 +462,7 @@ A Statistical Model for Unsupervised and Semi-supervised Transliteration Mining HassanSajjad - AlexanderFraser + AlexanderFraser HelmutSchmid 469–477 P12-1049 @@ -478,9 +478,9 @@ Semantic Parsing with <fixed-case>B</fixed-case>ayesian Tree Transducers - BevanJones + BevanJones MarkJohnson - SharonGoldwater + SharonGoldwater 488–496 P12-1051 jones-etal-2012-semantic @@ -488,7 +488,7 @@ Dependency Hashing for n-best <fixed-case>CCG</fixed-case> Parsing DominickNg - James R.Curran + James R.Curran 497–505 P12-1052 ng-curran-2012-dependency @@ -573,7 +573,7 @@ WenChan XiangdongZhou WeiWang - Tat-SengChua + Tat-SengChua 582–591 P12-1061 chan-etal-2012-community @@ -588,10 +588,10 @@ Computational Approaches to Sentence Completion - GeoffreyZweig - John C.Platt - ChristopherMeek - Christopher J.C.Burges + GeoffreyZweig + John C.Platt + ChristopherMeek + Christopher J.C.Burges AinurYessenalina QiangLiu 601–610 @@ -605,7 +605,7 @@ BoLong Jean-FrancoisCrespo AnleiDong - SathiyaKeerthi + SathiyaKeerthi Su-LinWu 611–619 P12-1064 @@ -649,7 +649,7 @@ KatsuhikoHayashi TaroWatanabe MasayukiAsahara - YujiMatsumoto + YujiMatsumoto 657–665 P12-1069 hayashi-etal-2012-head @@ -675,7 +675,7 @@ A Probabilistic Model for Canonicalizing Named Entity Mentions DaniYogatama YanchuanSim - Noah A.Smith + Noah A.Smith 685–693 P12-1072 yogatama-etal-2012-probabilistic @@ -691,7 +691,7 @@ A Computational Approach to the Automation of Creative Naming - GözdeÖzbal + GözdeÖzbal CarloStrapparava 703–711 P12-1074 @@ -720,7 +720,7 @@ RémyKessler XavierTannier CarolineHagège - VéroniqueMoriceau + VéroniqueMoriceau AndréBittar 730–739 P12-1077 @@ -739,7 +739,7 @@ A Topic Similarity Model for Hierarchical Phrase-based Translation XinyanXiao - DeyiXiong + DeyiXiong MinZhang QunLiu ShouxunLin @@ -767,7 +767,7 @@ Semi-supervised Dependency Parsing using Lexical Affinities Seyed AbolghasemMirroshandel AlexisNasr - JosephLe Roux + JosephLe Roux 777–785 P12-1082 mirroshandel-etal-2012-semi @@ -793,7 +793,7 @@ Structuring <fixed-case>E</fixed-case>-Commerce Inventory KarinMauge KhashRohanimanesh - Jean-DavidRuvini + Jean-DavidRuvini 805–814 P12-1085 mauge-etal-2012-structuring @@ -830,7 +830,7 @@ Discriminative Learning for Joint Template Filling EinatMinkov - LukeZettlemoyer + LukeZettlemoyer 845–853 P12-1089 minkov-zettlemoyer-2012-discriminative @@ -847,7 +847,7 @@ Modeling Sentences in the Latent Space WeiweiGuo - MonaDiab + MonaDiab 864–872 P12-1091 @@ -856,10 +856,10 @@ Improving Word Representations via Global Context and Multiple Word Prototypes - EricHuang + EricHuang RichardSocher - ChristopherManning - AndrewNg + ChristopherManning + AndrewNg 873–882 P12-1092 huang-etal-2012-improving @@ -868,7 +868,7 @@ Exploiting Social Information in Grounded Language Learning via Grammatical Reduction MarkJohnson KatherineDemuth - MichaelFrank + MichaelFrank 883–891 P12-1093 johnson-etal-2012-exploiting @@ -885,7 +885,7 @@ Modeling the Translation of Predicate-Argument Structure for <fixed-case>SMT</fixed-case> - DeyiXiong + DeyiXiong MinZhang HaizhouLi 902–911 @@ -923,7 +923,7 @@ Mixing Multiple Translation Models in Statistical Machine Translation MajidRazmara GeorgeFoster - BaskaranSankaran + BaskaranSankaran AnoopSarkar 940–949 P12-1099 @@ -950,7 +950,7 @@ Text Segmentation by Language Using Minimum Description Length HiroshiYamaguchi - KumikoTanaka-Ishii + KumikoTanaka-Ishii 969–978 P12-1102 yamaguchi-tanaka-ishii-2012-text @@ -978,7 +978,7 @@ Polarity Consistency Checking for Sentiment Dictionaries EduardDragut HongWang - ClementYu + ClementYu PrasadSistla WeiyiMeng 997–1005 @@ -998,8 +998,8 @@ Sentence Simplification by Monolingual Machine Translation SanderWubben - Antalvan den Bosch - EmielKrahmer + Antalvan den Bosch + EmielKrahmer 1015–1024 P12-1107 wubben-etal-2012-sentence @@ -1010,7 +1010,7 @@ Jeong-WooSon Tae-GilNoh Seong-BaePark - Sang-JoLee + Sang-JoLee 1025–1034 P12-1108 song-etal-2012-cost @@ -1029,7 +1029,7 @@ JunHatori TakuyaMatsuzaki YusukeMiyao - Jun’ichiTsujii + Jun’ichiTsujii 1045–1053 P12-1110 hatori-etal-2012-incremental @@ -1037,7 +1037,7 @@ Exploring Deterministic Constraints: from a Constrained <fixed-case>E</fixed-case>nglish <fixed-case>POS</fixed-case> Tagger to an Efficient <fixed-case>ILP</fixed-case> Solution to <fixed-case>C</fixed-case>hinese Word Segmentation QiuyeZhao - MitchMarcus + MitchMarcus 1054–1062 P12-1111 zhao-marcus-2012-exploring @@ -1048,10 +1048,10 @@ Proceedings of the 50th Annual Meeting of the Association for Computational Linguistics (Volume 2: Short Papers) P12-2 HaizhouLi - Chin-YewLin + Chin-YewLin MilesOsborne - Gary GeunbaeLee - Jong C.Park + Gary GeunbaeLee + Jong C.Park Association for Computational Linguistics
Jeju Island, Korea
July @@ -1065,7 +1065,7 @@ Higher-order Constituent Parsing and Parser Combination XiaoChen - ChunyuKit + ChunyuKit 1–5 P12-2001 chen-kit-2012-higher @@ -1082,7 +1082,7 @@ A Comparison of <fixed-case>C</fixed-case>hinese Parsers for <fixed-case>S</fixed-case>tanford Dependencies WanxiangChe - ValentinSpitkovsky + ValentinSpitkovsky TingLiu 11–16 P12-2003 @@ -1107,7 +1107,7 @@ Fast and Scalable Decoding with Language Model Look-Ahead for Phrase-based Statistical Machine Translation JoernWuebker - HermannNey + HermannNey RichardZens 28–32 P12-2006 @@ -1115,10 +1115,10 @@ Head-Driven Hierarchical Phrase-based Translation - JunhuiLi + JunhuiLi ZhaopengTu - GuodongZhou - Josefvan Genabith + GuodongZhou + Josefvan Genabith 33–37 P12-2007 li-etal-2012-head @@ -1133,7 +1133,7 @@ A Novel Burst-based Text Representation Model for Scalable Event Detection - XinZhao + XinZhao RishanChen KaiFan HongfeiYan @@ -1164,7 +1164,7 @@ Self-Disclosure and Relationship Strength in <fixed-case>T</fixed-case>witter Conversations JinYeongBak SuinKim - AliceOh + AliceOh 60–64 P12-2012 P12-2012.Presentation.pdf @@ -1174,7 +1174,7 @@ Genre Independent Subgroup Detection in Online Discussion Threads: A Study of Implicit Attitude using Textual Latent Semantics PradeepDasigi WeiweiGuo - MonaDiab + MonaDiab 65–69 P12-2013 P12-2013.Datasets.zip @@ -1183,8 +1183,8 @@ Learning to Temporally Order Medical Events in Clinical Text PreethiRaghavan - AlbertLai - EricFosler-Lussier + AlbertLai + EricFosler-Lussier 70–74 P12-2014 raghavan-etal-2012-learning-temporally @@ -1206,7 +1206,7 @@ Using Rejuvenation to Improve Particle Filtering for <fixed-case>B</fixed-case>ayesian Word Segmentation - BenjaminBörschinger + BenjaminBörschinger MarkJohnson 85–89 P12-2017 @@ -1214,8 +1214,8 @@ Baselines and Bigrams: Simple, Good Sentiment and Topic Classification - SidaWang - ChristopherManning + SidaWang + ChristopherManning 90–94 P12-2018 wang-manning-2012-baselines @@ -1243,7 +1243,7 @@ Robust Conversion of <fixed-case>CCG</fixed-case> Derivations to Phrase Structure Trees Jonathan K.Kummerfeld DanKlein - James R.Curran + James R.Curran 105–109 P12-2021 P12-2021.Software.zip @@ -1252,7 +1252,7 @@ Estimating Compact Yet Rich Tree Insertion Grammars ElifYamangil - StuartShieber + StuartShieber 110–114 P12-2022 yamangil-shieber-2012-estimating @@ -1269,7 +1269,7 @@ Detecting Semantic Equivalence and Information Disparity in Cross-lingual Documents YasharMehdad - MatteoNegri + MatteoNegri MarcelloFederico 120–124 P12-2024 @@ -1285,9 +1285,9 @@ Learning to Find Translations and Transliterations on the Web - Joseph Z.Chang - Jason S.Chang - Roger Jyh-ShingJang + Joseph Z.Chang + Jason S.Chang + Roger Jyh-ShingJang 130–134 P12-2026 chang-etal-2012-learning @@ -1303,7 +1303,7 @@ Learning the Latent Semantics of a Concept from its Definition WeiweiGuo - MonaDiab + MonaDiab 140–144 P12-2028 P12-2028.Datasets.zip @@ -1312,7 +1312,7 @@ Unsupervised Semantic Role Induction with Global Role Ordering NikhilGarg - JamesHenderson + JamesHenderson 145–149 P12-2029 garg-henderson-2012-unsupervised @@ -1339,7 +1339,7 @@ ApoorvAgarwal AdinoyiOmuya AaronHarnly - OwenRambow + OwenRambow 161–165 P12-2032 agarwal-etal-2012-comprehensive @@ -1393,7 +1393,7 @@ Native Language Detection with Tree Substitution Grammars - BenjaminSwanson + BenjaminSwanson EugeneCharniak 193–197 P12-2038 @@ -1403,14 +1403,14 @@ Tense and Aspect Error Correction for <fixed-case>ESL</fixed-case> Learners Using Global Context ToshikazuTajiri MamoruKomachi - YujiMatsumoto + YujiMatsumoto 198–202 P12-2039 tajiri-etal-2012-tense Movie-<fixed-case>D</fixed-case>i<fixed-case>C</fixed-case>: a Movie Dialogue Corpus for Research and Development - Rafael E.Banchs + Rafael E.Banchs 203–207 P12-2040 banchs-2012-movie @@ -1442,7 +1442,7 @@ Extracting and modeling durations for habits and events from <fixed-case>T</fixed-case>witter JenniferWilliams - GrahamKatz + GrahamKatz 223–227 P12-2044 williams-katz-2012-extracting @@ -1452,7 +1452,7 @@ JoelNothman MatthewHonnibal BenHachey - James R.Curran + James R.Curran 228–232 P12-2045 nothman-etal-2012-event @@ -1470,7 +1470,7 @@ Using Search-Logs to Improve Query Tagging KuzmanGanchev - KeithHall + KeithHall RyanMcDonald SlavPetrov 238–242 @@ -1486,8 +1486,8 @@ A Corpus of Textual Revisions in Second Language Writing - JohnLee - JonathanWebster + JohnLee + JonathanWebster 248–252 P12-2049 lee-webster-2012-corpus @@ -1497,7 +1497,7 @@ NathanSchneider BehrangMohit KemalOflazer - Noah A.Smith + Noah A.Smith 253–258 P12-2050 P12-2050.Datasets.zip @@ -1505,8 +1505,8 @@ Word Epoch Disambiguation: Finding How Words Change Over Time - RadaMihalcea - ViviNastase + RadaMihalcea + ViviNastase 259–263 P12-2051 mihalcea-nastase-2012-word @@ -1527,7 +1527,7 @@ PeiYang WeiGao QiTan - Kam-FaiWong + Kam-FaiWong 270–274 P12-2053 yang-etal-2012-information @@ -1555,9 +1555,9 @@ Enhancing Statistical Machine Translation with Character Alignment NingXi GuangchaoTang - XinyuDai + XinyuDai ShujianHuang - JiajunChen + JiajunChen 285–290 P12-2056 xi-etal-2012-enhancing @@ -1568,7 +1568,7 @@ DongdongZhang MuLi MingZhou - Hae-ChangRim + Hae-ChangRim 291–295 P12-2057 lee-etal-2012-translation @@ -1577,15 +1577,15 @@ Heuristic Cube Pruning in Linear Time AndreaGesmundo GiorgioSatta - JamesHenderson + JamesHenderson 296–300 P12-2058 gesmundo-etal-2012-heuristic Combining Word-Level and Character-Level Models for Machine Translation Between Closely-Related Languages - PreslavNakov - JörgTiedemann + PreslavNakov + JörgTiedemann 301–305 P12-2059 nakov-tiedemann-2012-combining @@ -1602,7 +1602,7 @@ Post-ordering by Parsing for <fixed-case>J</fixed-case>apanese-<fixed-case>E</fixed-case>nglish Statistical Machine Translation IsaoGoto MasaoUtiyama - EiichiroSumita + EiichiroSumita 311–316 P12-2061 goto-etal-2012-post @@ -1619,10 +1619,10 @@ Unsupervised Morphology Rivals Supervised Morphology for <fixed-case>A</fixed-case>rabic <fixed-case>MT</fixed-case> - DavidStallard + DavidStallard JacobDevlin MichaelKayser - Yoong KeokLee + Yoong KeokLee ReginaBarzilay 322–327 P12-2063 @@ -1630,7 +1630,7 @@ A Meta Learning Approach to Grammatical Error Correction - HongsuckSeo + HongsuckSeo JonghoonLee SeokhwanKim KyusongLee @@ -1653,7 +1653,7 @@ ZhaopengTu YifanHe JenniferFoster - Josefvan Genabith + Josefvan Genabith QunLiu ShouxunLin 338–343 @@ -1666,7 +1666,7 @@ San-ChuanHung Wei-ShihLin NanyunPeng - Shou-DeLin + Shou-DeLin Wei-FenLin 344–348 P12-2067 @@ -1679,7 +1679,7 @@ KatsumasaYoshikawa RyuIida TsutomuHirao - ManabuOkumura + ManabuOkumura 349–353 P12-2068 yoshikawa-etal-2012-sentence @@ -1695,17 +1695,17 @@ Assessing the Effect of Inconsistent Assessors on Summarization Evaluation KarolinaOwczarzak - Peter A.Rankel - Hoa TrangDang - John M.Conroy + Peter A.Rankel + Hoa TrangDang + John M.Conroy 359–362 P12-2070 owczarzak-etal-2012-assessing Fast and Robust Part-of-Speech Tagging Using Dynamic Model Selection - Jinho D.Choi - MarthaPalmer + Jinho D.Choi + MarthaPalmer 363–367 P12-2071 choi-palmer-2012-fast @@ -1713,7 +1713,7 @@ Lemmatisation as a Tagging Task AndreaGesmundo - TanjaSamardžić + TanjaSamardžić 368–372 P12-2072 gesmundo-samardzic-2012-lemmatisation @@ -1737,7 +1737,7 @@ Unsupervized Word Segmentation: the Case for <fixed-case>M</fixed-case>andarin <fixed-case>C</fixed-case>hinese PierreMagistry - BenoîtSagot + BenoîtSagot 383–387 P12-2075 magistry-sagot-2012-unsupervized @@ -1773,7 +1773,7 @@ Wei-JieHuang Chia-RuChou Yu-LinTzeng - Chia-YingLee + Chia-YingLee Chao-LinLiu 1–6 P12-3001 @@ -1810,22 +1810,22 @@ langid.py: An Off-the-shelf Language Identification Tool MarcoLui - TimothyBaldwin + TimothyBaldwin 25–30 P12-3005 lui-baldwin-2012-langid Personalized Normalization for a Multilingual Chat System - Ai TiAw - Lian HauLee + Ai TiAw + Lian HauLee 31–36 P12-3006 aw-lee-2012-personalized <fixed-case>IRIS</fixed-case>: a Chat-oriented Dialogue System based on the Vector Space Model - Rafael E.Banchs + Rafael E.Banchs HaizhouLi 37–42 P12-3007 @@ -1833,16 +1833,16 @@ <fixed-case>L</fixed-case>ets<fixed-case>MT</fixed-case>!: Cloud-Based Platform for Do-It-Yourself Machine Translation - AndrejsVasiļjevs + AndrejsVasiļjevs RaivisSkadiņš - JörgTiedemann + JörgTiedemann 43–48 P12-3008 vasiljevs-etal-2012-letsmt A Web-based Evaluation Framework for Spatial Instruction-Giving Systems - SrinivasanJanarthanam + SrinivasanJanarthanam OliverLemon XingkunLiu 49–54 @@ -1853,8 +1853,8 @@ <fixed-case>DOMCAT</fixed-case>: A Bilingual Concordancer for Domain-Specific Computer Assisted Translation Ming-HongBai Yu-MingHsieh - Keh-JiannChen - Jason S.Chang + Keh-JiannChen + Jason S.Chang 55–60 P12-3010 bai-etal-2012-domcat @@ -1862,10 +1862,10 @@ The <fixed-case>O</fixed-case>pen<fixed-case>G</fixed-case>rm open-source finite-state grammar software libraries BrianRoark - RichardSproat + RichardSproat CyrilAllauzen - MichaelRiley - JeffreySorensen + MichaelRiley + JeffreySorensen TerryTai 61–66 P12-3011 @@ -1874,7 +1874,7 @@ Multilingual <fixed-case>WSD</fixed-case> with Just a Few Lines of Code: the <fixed-case>B</fixed-case>abel<fixed-case>N</fixed-case>et <fixed-case>API</fixed-case> RobertoNavigli - Simone PaoloPonzetto + Simone PaoloPonzetto 67–72 P12-3012 navigli-ponzetto-2012-multilingual @@ -1907,12 +1907,12 @@ <fixed-case>ACCURAT</fixed-case> Toolkit for Multi-Level Alignment and Information Extraction from Comparable Corpora - MārcisPinnis + MārcisPinnis RaduIon - DanŞtefănescu + DanŞtefănescu FangzhongSu - IngunaSkadiņa - AndrejsVasiļjevs + IngunaSkadiņa + AndrejsVasiļjevs BogdanBabych 91–96 P12-3016 @@ -1940,7 +1940,7 @@ YunyaoLi LauraChiticariu HuahaiYang - FrederickReiss + FrederickReiss ArnaldoCarreno-fuentes 109–114 P12-3019 @@ -1949,10 +1949,10 @@ A System for Real-time <fixed-case>T</fixed-case>witter Sentiment Analysis of 2012 <fixed-case>U</fixed-case>.<fixed-case>S</fixed-case>. Presidential Election Cycle HaoWang - DoganCan + DoganCan AbeKazemzadeh FrançoisBar - ShrikanthNarayanan + ShrikanthNarayanan 115–120 P12-3020 wang-etal-2012-system @@ -1970,7 +1970,7 @@ <fixed-case>A</fixed-case>kamon: An Open Source Toolkit for Tree/Forest-Based Statistical Machine Translation XianchaoWu TakuyaMatsuzaki - Jun’ichiTsujii + Jun’ichiTsujii 127–132 P12-3022 wu-etal-2012-akamon @@ -1978,16 +1978,16 @@ Subgroup Detector: A System for Detecting Subgroups in Online Discussions AmjadAbu-Jbara - DragomirRadev + DragomirRadev 133–138 P12-3023 abu-jbara-radev-2012-subgroup A Graphical Interface for <fixed-case>MT</fixed-case> Evaluation and Error Analysis - MeritxellGonzàlez - JesúsGiménez - LluísMàrquez + MeritxellGonzàlez + JesúsGiménez + LluísMàrquez 139–144 P12-3024 gonzalez-etal-2012-graphical @@ -1997,7 +1997,7 @@ Wan-YuLin NanyunPeng Chun-ChaoYen - Shou-deLin + Shou-deLin 145–150 P12-3025 lin-etal-2012-online @@ -2012,11 +2012,11 @@ <fixed-case>FLOW</fixed-case>: A First-Language-Oriented Writing Assistant System - Mei-HuaChen - Shih-TingHuang - Hung-TingHsieh - Ting-HuiKao - Jason S.Chang + Mei-HuaChen + Shih-TingHuang + Hung-TingHsieh + Ting-HuiKao + Jason S.Chang 157–162 P12-3027 chen-etal-2012-flow @@ -2026,7 +2026,7 @@ Wen-TaiHsieh Chen-MingWu TsunKu - Seng-cho T.Chou + Seng-cho T.Chou 163–168 P12-3028 hsieh-etal-2012-social @@ -2062,7 +2062,7 @@ Qualitative Modeling of Spatial Prepositions and Motion Expressions InderjeetMani - JamesPustejovsky + JamesPustejovsky 1 P12-4001 P12-4001.Presentation.pdf @@ -2078,7 +2078,7 @@ Topic Models, Latent Space Models, Sparse Coding, and All That: A Systematic Understanding of Probabilistic Semantic Extraction in Large Corpus - EricXing + EricXing 3 P12-4003 P12-4003.Presentation.pdf @@ -2086,9 +2086,9 @@ Multilingual Subjectivity and Sentiment Analysis - RadaMihalcea + RadaMihalcea CarmenBanea - JanyceWiebe + JanyceWiebe 4 P12-4004 P12-4004.Presentation.pdf @@ -2098,7 +2098,7 @@ Deep Learning for <fixed-case>NLP</fixed-case> (without Magic) RichardSocher YoshuaBengio - Christopher D.Manning + Christopher D.Manning 5 P12-4005 P12-4005.Presentation.pdf @@ -2106,8 +2106,8 @@ Graph-based Semi-Supervised Learning Algorithms for <fixed-case>NLP</fixed-case> - AmarSubramanya - Partha PratimTalukdar + AmarSubramanya + Partha PratimTalukdar 6 P12-4006 P12-4006.Presentation.pdf diff --git a/data/xml/P13.xml b/data/xml/P13.xml index 80680b5b12..4c04a1404c 100644 --- a/data/xml/P13.xml +++ b/data/xml/P13.xml @@ -4,9 +4,9 @@ Proceedings of the 51st Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers) P13-1 - HinrichSchuetze + HinrichSchuetze PascaleFung - MassimoPoesio + MassimoPoesio Association for Computational Linguistics
Sofia, Bulgaria
August @@ -27,7 +27,7 @@ Integrating Translation Memory into Phrase-Based Machine Translation during Decoding KunWang - ChengqingZong + ChengqingZong Keh-YihSu 11–21 P13-1002 @@ -42,7 +42,7 @@ Modelling Annotator Bias with Multi-task <fixed-case>G</fixed-case>aussian Processes: An Application to Machine Translation Quality Estimation - TrevorCohn + TrevorCohn LuciaSpecia 32–42 P13-1004 @@ -52,7 +52,7 @@ Smoothed marginal distribution constraints for language modeling BrianRoark CyrilAllauzen - MichaelRiley + MichaelRiley 43–52 P13-1005 roark-etal-2013-smoothed @@ -69,14 +69,14 @@ Plurality, Negation, and Quantification:Towards Comprehensive Quantifier Scope Disambiguation MehdiManshadi DanielGildea - JamesAllen + JamesAllen 64–72 P13-1007 manshadi-etal-2013-plurality Joint Event Extraction via Structured Prediction with Global Features - QiLi + QiLi HengJi LiangHuang 73–82 @@ -103,7 +103,7 @@ Recognizing Rare Social Phenomena in Conversation: Empowerment Detection in Support Group Chatrooms ElijahMayfield DavidAdamson - CarolynPenstein Rosé + CarolynPenstein Rosé 104–113 P13-1011 mayfield-etal-2013-recognizing @@ -149,7 +149,7 @@ Distortion Model Considering Rich Context for Statistical Machine Translation IsaoGoto MasaoUtiyama - EiichiroSumita + EiichiroSumita AkihiroTamura SadaoKurohashi 155–165 @@ -171,8 +171,8 @@ Microblogs as Parallel Corpora WangLing GuangXiang - ChrisDyer - AlanBlack + ChrisDyer + AlanBlack IsabelTrancoso 176–186 P13-1018 @@ -189,8 +189,8 @@ Fast and Robust Compressive Summarization with Dual Decomposition and Multi-Task Learning - MiguelAlmeida - AndréMartins + MiguelAlmeida + AndréMartins 196–206 P13-1020 almeida-martins-2013-fast @@ -207,7 +207,7 @@ Adapting Discriminative Reranking to Grounded Language Learning JoohyunKim - RaymondMooney + RaymondMooney 218–227 P13-1022 kim-mooney-2013-adapting @@ -225,7 +225,7 @@ WeiweiGuo HaoLi HengJi - MonaDiab + MonaDiab 239–249 P13-1024 guo-etal-2013-linking @@ -234,7 +234,7 @@ A computational approach to politeness with application to social factors CristianDanescu-Niculescu-Mizil MoritzSudhof - DanJurafsky + DanJurafsky JureLeskovec ChristopherPotts 250–259 @@ -276,10 +276,10 @@ A Context Free <fixed-case>TAG</fixed-case> Variant - BenSwanson + BenSwanson ElifYamangil EugeneCharniak - StuartShieber + StuartShieber 302–310 P13-1030 swanson-etal-2013-context @@ -287,9 +287,9 @@ Fast and Adaptive Online Training of Feature-Rich Translation Models SpenceGreen - SidaWang + SidaWang DanielCer - Christopher D.Manning + Christopher D.Manning 311–321 P13-1031 green-etal-2013-fast @@ -298,7 +298,7 @@ Advancements in Reordering Models for Statistical Machine Translation MinweiFeng Jan-ThorstenPeter - HermannNey + HermannNey 322–332 P13-1032 feng-etal-2013-advancements @@ -306,7 +306,7 @@ A <fixed-case>M</fixed-case>arkov Model of Machine Translation using Non-parametric <fixed-case>B</fixed-case>ayesian Inference YangFeng - TrevorCohn + TrevorCohn 333–342 P13-1033 feng-cohn-2013-markov @@ -323,7 +323,7 @@ Learning Latent Personas of Film Characters DavidBamman BrendanO’Connor - Noah A.Smith + Noah A.Smith 352–361 P13-1035 bamman-etal-2013-learning @@ -338,7 +338,7 @@ Automatic Interpretation of the <fixed-case>E</fixed-case>nglish Possessive StephenTratz - EduardHovy + EduardHovy 372–381 P13-1037 tratz-hovy-2013-automatic @@ -348,7 +348,7 @@ KatsumaNarisawa YotaroWatanabe JuntaMizuno - NaoakiOkazaki + NaoakiOkazaki KentaroInui 382–391 P13-1038 @@ -356,7 +356,7 @@ Probabilistic Domain Modelling With Contextualized Distributional Semantic Vectors - Jackie Chi KitCheung + Jackie Chi KitCheung GeraldPenn 392–401 P13-1039 @@ -365,8 +365,8 @@ Extracting bilingual terminologies from comparable corpora AhmetAker - MonicaParamita - RobGaizauskas + MonicaParamita + RobGaizauskas 402–411 P13-1040 aker-etal-2013-extracting @@ -374,16 +374,16 @@ The Haves and the Have-Nots: Leveraging Unlabelled Corpora for Sentiment Analysis KashyapPopat - BalamuraliA.R - PushpakBhattacharyya - GholamrezaHaffari + BalamuraliA.R + PushpakBhattacharyya + GholamrezaHaffari 412–422 P13-1041 popat-etal-2013-haves Large-scale Semantic Parsing via Schema Matching and Lexicon Extension - QingqingCai + QingqingCai AlexanderYates 423–433 P13-1042 @@ -402,8 +402,8 @@ Nonconvex Global Optimization for Latent-Variable Models - Matthew R.Gormley - JasonEisner + Matthew R.Gormley + JasonEisner 444–454 P13-1044 gormley-eisner-2013-nonconvex @@ -412,8 +412,8 @@ Parsing with Compositional Vector Grammars RichardSocher JohnBauer - Christopher D.Manning - Andrew Y.Ng + Christopher D.Manning + Andrew Y.Ng 455–465 P13-1045 socher-etal-2013-parsing @@ -421,26 +421,26 @@ Discriminative state tracking for spoken dialog systems AngelikiMetallinou - DanBohus - JasonWilliams + DanBohus + JasonWilliams 466–475 P13-1046 metallinou-etal-2013-discriminative Leveraging Synthetic Discourse Data via Multi-task Learning for Implicit Discourse Relation Recognition - ManLan + ManLan YuXu - ZhengyuNiu + ZhengyuNiu 476–485 P13-1047 lan-etal-2013-leveraging Combining Intra- and Multi-sentential Rhetorical Parsing for Document-level Discourse Analysis - ShafiqJoty + ShafiqJoty GiuseppeCarenini - RaymondNg + RaymondNg YasharMehdad 486–496 P13-1048 @@ -456,9 +456,9 @@ Feature-Based Selection of Dependency Paths in Ad Hoc Information Retrieval - K. TamsinMaxwell - JonOberlander - W. BruceCroft + K. TamsinMaxwell + JonOberlander + W. BruceCroft 507–516 P13-1050 maxwell-etal-2013-feature @@ -468,8 +468,8 @@ MartinPopel DavidMareček JanŠtěpánek - DanielZeman - ZdeněkŽabokrtský + DanielZeman + ZdeněkŽabokrtský 517–527 P13-1051 popel-etal-2013-coordination @@ -486,7 +486,7 @@ Collective Annotation of Linguistic Resources: Basic Principles and a Formal Model UlleEndriss - RaquelFernández + RaquelFernández 539–549 P13-1053 endriss-fernandez-2013-collective @@ -495,16 +495,16 @@ <fixed-case>P</fixed-case>ar<fixed-case>G</fixed-case>ram<fixed-case>B</fixed-case>ank: The <fixed-case>P</fixed-case>ar<fixed-case>G</fixed-case>ram Parallel Treebank SebastianSulger MiriamButt - Tracy HollowayKing + Tracy HollowayKing PaulMeurer TiborLaczkó GyörgyRákosi - Cheikh BambaDione + Cheikh BambaDione HelgeDyvik VictoriaRosén - KoenraadDe Smedt + KoenraadDe Smedt AgnieszkaPatejuk - ÖzlemÇetinoğlu + ÖzlemÇetinoğlu I WayanArka MeladelMistica 550–560 @@ -539,8 +539,8 @@ Using subcategorization knowledge to improve case prediction for translation to <fixed-case>G</fixed-case>erman MarionWeller - AlexanderFraser - SabineSchulte im Walde + AlexanderFraser + SabineSchulte im Walde 593–603 P13-1058 weller-etal-2013-using @@ -550,8 +550,8 @@ HaiboLi JingZheng HengJi - QiLi - WenWang + QiLi + WenWang 604–614 P13-1059 li-etal-2013-name @@ -559,7 +559,7 @@ Decipherment Complexity in 1:1 Substitution Ciphers MalteNuhn - HermannNey + HermannNey 615–621 P13-1060 nuhn-ney-2013-decipherment @@ -567,7 +567,7 @@ Non-Monotonic Sentence Alignment via Semisupervised Learning XiaojunQuan - ChunyuKit + ChunyuKit YanSong 622–630 P13-1061 @@ -594,7 +594,7 @@ Bridging Languages through Etymology: The case of cross language text categorization - ViviNastase + ViviNastase CarloStrapparava 651–659 P13-1064 @@ -625,9 +625,9 @@ Large tagset labeling using Feed Forward Neural Networks. Case study on <fixed-case>R</fixed-case>omanian Language - TiberiuBoros + TiberiuBoros RaduIon - DanTufis + DanTufis 692–700 P13-1068 boros-etal-2013-large @@ -686,7 +686,7 @@ Discriminative Learning with Natural Annotations: Word Segmentation as a Case Study WenbinJiang MengSun - Yajuan + Yajuan YatingYang QunLiu 761–769 @@ -705,8 +705,8 @@ An Infinite Hierarchical <fixed-case>B</fixed-case>ayesian Model of Phrasal Translation - TrevorCohn - GholamrezaHaffari + TrevorCohn + GholamrezaHaffari 780–790 P13-1077 cohn-haffari-2013-infinite @@ -715,8 +715,8 @@ Additive Neural Networks for Statistical Machine Translation LemaoLiu TaroWatanabe - EiichiroSumita - TiejunZhao + EiichiroSumita + TiejunZhao 791–801 P13-1078 liu-etal-2013-additive @@ -725,8 +725,8 @@ Hierarchical Phrase Table Combination for Machine Translation ConghuiZhu TaroWatanabe - EiichiroSumita - TiejunZhao + EiichiroSumita + TiejunZhao 802–810 P13-1079 zhu-etal-2013-hierarchical @@ -744,7 +744,7 @@ Enlisting the Ghost: Modeling Empty Categories for Machine Translation BingXiang - XiaoqiangLuo + XiaoqiangLuo BowenZhou 822–831 P13-1081 @@ -763,9 +763,9 @@ Part-of-Speech Induction in Dependency Trees for Statistical Machine Translation AkihiroTamura TaroWatanabe - EiichiroSumita + EiichiroSumita HiroyaTakamura - ManabuOkumura + ManabuOkumura 841–851 P13-1083 tamura-etal-2013-part @@ -792,7 +792,7 @@ Semantic Frames to Predict Stock Price Movement BoyiXie - Rebecca J.Passonneau + Rebecca J.Passonneau LeonWu Germán G.Creamer 873–883 @@ -812,7 +812,7 @@ The Role of Syntax in Vector Space Models of Compositional Semantics Karl MoritzHermann - PhilBlunsom + PhilBlunsom 894–904 P13-1088 hermann-blunsom-2013-role @@ -829,9 +829,9 @@ Semi-Supervised Semantic Tagging of Conversational Understanding using <fixed-case>M</fixed-case>arkov Topic Regression AsliCelikyilmaz - DilekHakkani-Tur - GokhanTur - RuhiSarikaya + DilekHakkani-Tur + GokhanTur + RuhiSarikaya 914–923 P13-1090 celikyilmaz-etal-2013-semi @@ -842,7 +842,7 @@ JacobAndreas DanielBauer Karl MoritzHermann - BevanJones + BevanJones KevinKnight 924–932 P13-1091 @@ -884,7 +884,7 @@ Utterance-Level Multimodal Sentiment Analysis VerónicaPérez-Rosas - RadaMihalcea + RadaMihalcea Louis-PhilippeMorency 973–982 P13-1096 @@ -893,8 +893,8 @@ Probabilistic Sense Sentiment Similarity through Hidden Emotions MitraMohtarami - ManLan - Chew LimTan + ManLan + Chew LimTan 983–992 P13-1097 mohtarami-etal-2013-probabilistic @@ -902,8 +902,8 @@ A user-centric model of voting intention from Social Media VasileiosLampos - DanielPreoţiuc-Pietro - TrevorCohn + DanielPreoţiuc-Pietro + TrevorCohn 993–1003 P13-1098 P13-1098.Poster.pdf @@ -932,14 +932,14 @@ HajimeMorita RyoheiSasano HiroyaTakamura - ManabuOkumura + ManabuOkumura 1023–1032 P13-1101 morita-etal-2013-subtree The effect of non-tightness on <fixed-case>B</fixed-case>ayesian estimation of <fixed-case>PCFG</fixed-case>s - Shay B.Cohen + Shay B.Cohen MarkJohnson 1033–1041 P13-1102 @@ -960,7 +960,7 @@ Transition-based Dependency Parsing with Selectional Branching - Jinho D.Choi + Jinho D.Choi AndrewMcCallum 1052–1062 P13-1104 @@ -969,7 +969,7 @@ Bilingually-Guided Monolingual Dependency Grammar Induction KaiLiu - Yajuan + Yajuan WenbinJiang QunLiu 1063–1072 @@ -980,7 +980,7 @@ Joint Word Alignment and Bilingual Named Entity Recognition Using Dual Decomposition MengqiuWang WanxiangChe - Christopher D.Manning + Christopher D.Manning 1073–1082 P13-1106 wang-etal-2013-joint @@ -1001,8 +1001,8 @@ Learning to Extract International Relations from Political Context BrendanO’Connor - Brandon M.Stewart - Noah A.Smith + Brandon M.Stewart + Noah A.Smith 1094–1104 P13-1108 oconnor-etal-2013-learning @@ -1011,7 +1011,7 @@ Graph Propagation for Paraphrasing Out-of-Vocabulary Words in Statistical Machine Translation MajidRazmara MaryamSiahbani - GholamrezaHaffari + GholamrezaHaffari AnoopSarkar 1105–1115 P13-1109 @@ -1031,7 +1031,7 @@ FeifeiZhai JiajunZhang YuZhou - ChengqingZong + ChengqingZong 1127–1136 P13-1111 zhai-etal-2013-handling @@ -1076,7 +1076,7 @@ <fixed-case>I</fixed-case>mp<fixed-case>A</fixed-case>r: A Deterministic Algorithm for Implicit Semantic Role Labelling EgoitzLaparra - GermanRigau + GermanRigau 1180–1189 P13-1116 laparra-rigau-2013-impar @@ -1091,9 +1091,9 @@ <fixed-case>DE</fixed-case>riv<fixed-case>B</fixed-case>ase: Inducing and Evaluating a Derivational Morphology Resource for <fixed-case>G</fixed-case>erman - BrittaZeller + BrittaZeller JanŠnajder - SebastianPadó + SebastianPadó 1201–1211 P13-1118 zeller-etal-2013-derivbase @@ -1120,7 +1120,7 @@ Towards Robust Abstractive Multi-Document Summarization: A Caseframe Analysis of Centrality and Domain - Jackie Chi KitCheung + Jackie Chi KitCheung GeraldPenn 1233–1242 P13-1121 @@ -1138,7 +1138,7 @@ Conditional Random Fields for Responsive Surface Realisation using Global Features NinaDethlefs - HelenHastie + HelenHastie HeribertoCuayáhuitl OliverLemon 1254–1263 @@ -1158,7 +1158,7 @@ Cut the noise: Mutually reinforcing reordering and alignments for improved machine translation KarthikVisweswariah - Mitesh M.Khapra + Mitesh M.Khapra AnanthakrishnanRamanathan 1275–1284 P13-1125 @@ -1208,7 +1208,7 @@ Language Acquisition and Probabilistic Models: keeping it simple AlineVillavicencio MarcoIdiart - RobertBerwick + RobertBerwick IgorMalioutov 1321–1330 P13-1130 @@ -1252,9 +1252,9 @@ Dirt Cheap Web-Scale Parallel Text from the <fixed-case>C</fixed-case>ommon <fixed-case>C</fixed-case>rawl - Jason R.Smith - HerveSaint-Amand - MagdalenaPlamada + Jason R.Smith + HerveSaint-Amand + MagdalenaPlamada PhilippKoehn ChrisCallison-Burch AdamLopez @@ -1267,8 +1267,8 @@ LuWang HemaRaghavan VittorioCastelli - RaduFlorian - ClaireCardie + RaduFlorian + ClaireCardie 1384–1394 P13-1136 wang-etal-2013-sentence @@ -1276,15 +1276,15 @@ Domain-Independent Abstract Generation for Focused Meeting Summarization LuWang - ClaireCardie + ClaireCardie 1395–1405 P13-1137 wang-cardie-2013-domain A Statistical <fixed-case>NLG</fixed-case> Framework for Aggregated Planning and Realization - RaviKondadadi - BlakeHowald + RaviKondadadi + BlakeHowald FrankSchilder 1406–1415 P13-1138 @@ -1301,7 +1301,7 @@ Learning a Phrase-based Translation Model from Monolingual Data with Application to Domain Adaptation JiajunZhang - ChengqingZong + ChengqingZong 1425–1434 P13-1140 zhang-zong-2013-learning @@ -1309,7 +1309,7 @@ <fixed-case>S</fixed-case>ense<fixed-case>S</fixed-case>potting: Never let your parallel data tie you to an old domain MarineCarpuat - HalDaumé III + HalDaumé III KatharineHenry AnnIrvine JagadeeshJagarlamudi @@ -1320,7 +1320,7 @@ <fixed-case>BRAINSUP</fixed-case>: Brainstorming Support for Creative Sentence Generation - GözdeÖzbal + GözdeÖzbal DanielePighin CarloStrapparava 1446–1455 @@ -1346,15 +1346,15 @@ Argument Inference from Relevant Event Mentions in <fixed-case>C</fixed-case>hinese Argument Extraction PeifengLi - QiaomingZhu - GuodongZhou + QiaomingZhu + GuodongZhou 1477–1487 P13-1145 li-etal-2013-argument Fine-grained Semantic Typing of Emerging Entities - NdapandulaNakashole + NdapandulaNakashole TomaszTylenda GerhardWeikum 1488–1497 @@ -1363,7 +1363,7 @@ Embedding Semantic Similarity in Tree Kernels for Domain Adaptation of Relation Extraction - BarbaraPlank + BarbaraPlank AlessandroMoschitti 1498–1507 P13-1147 @@ -1371,7 +1371,7 @@ A joint model of word segmentation and phonological variation for <fixed-case>E</fixed-case>nglish word-final /t/-deletion - BenjaminBörschinger + BenjaminBörschinger MarkJohnson KatherineDemuth 1508–1516 @@ -1383,7 +1383,7 @@ AngelikiLazaridou MarcoMarelli RobertoZamparelli - MarcoBaroni + MarcoBaroni 1517–1526 P13-1149 lazaridou-etal-2013-compositional @@ -1405,7 +1405,7 @@ Combining Referring Expression Generation and Surface Realization: A Corpus-Based Investigation of Architectures - SinaZarrieß + SinaZarrieß JonasKuhn 1547–1557 P13-1152 @@ -1422,14 +1422,14 @@ Beam Search for Solving Substitution Ciphers MalteNuhn JulianSchamper - HermannNey + HermannNey 1568–1576 P13-1154 nuhn-etal-2013-beam Social Text Normalization using Contextual Graph Random Walks - HanyHassan + HanyHassan ArulMenezes 1577–1586 P13-1155 @@ -1437,8 +1437,8 @@ Integrating Phrase-based Reordering Features into a Chart-based Decoder for Machine Translation - ThuyLinhNguyen - StephanVogel + ThuyLinhNguyen + StephanVogel 1587–1596 P13-1156 nguyen-vogel-2013-integrating @@ -1454,7 +1454,7 @@ Paraphrase-Driven Learning for Open Question Answering AnthonyFader - LukeZettlemoyer + LukeZettlemoyer OrenEtzioni 1608–1618 P13-1158 @@ -1462,13 +1462,13 @@ Aid is Out There: Looking for Help from Tweets during a Large Scale Disaster - IstvánVarga + IstvánVarga MotokiSano KentaroTorisawa ChikaraHashimoto KiyonoriOhtake TakaoKawai - Jong-HoonOh + Jong-HoonOh StijnDe Saeger 1619–1629 P13-1159 @@ -1486,7 +1486,7 @@ Joint Inference for Fine-grained Opinion Extraction BishanYang - ClaireCardie + ClaireCardie 1640–1649 P13-1161 yang-cardie-2013-joint @@ -1495,21 +1495,21 @@ Linguistic Models for Analyzing and Detecting Biased Language MartaRecasens CristianDanescu-Niculescu-Mizil - DanJurafsky + DanJurafsky 1650–1659 P13-1162 recasens-etal-2013-linguistic Evaluating a City Exploration Dialogue System with Integrated Question-Answering and Pedestrian Navigation - SrinivasanJanarthanam + SrinivasanJanarthanam OliverLemon PhilBartie TiphaineDalmas AnnaDickinson XingkunLiu WilliamMackaness - BonnieWebber + BonnieWebber 1660–1668 P13-1163 janarthanam-etal-2013-evaluating @@ -1519,8 +1519,8 @@ SvitlanaVolkova PallaviChoudhury ChrisQuirk - BillDolan - LukeZettlemoyer + BillDolan + LukeZettlemoyer 1669–1679 P13-1164 volkova-etal-2013-lightly @@ -1577,7 +1577,7 @@ Why-Question Answering using Intra- and Inter-Sentential Causal Relations - Jong-HoonOh + Jong-HoonOh KentaroTorisawa ChikaraHashimoto MotokiSano @@ -1589,9 +1589,9 @@ Question Answering Using Enhanced Lexical Semantic Models - Wen-tauYih + Wen-tauYih Ming-WeiChang - ChristopherMeek + ChristopherMeek AndrzejPastusiak 1744–1753 P13-1171 @@ -1633,9 +1633,9 @@ Proceedings of the 51st Annual Meeting of the Association for Computational Linguistics (Volume 2: Short Papers) P13-2 - HinrichSchuetze + HinrichSchuetze PascaleFung - MassimoPoesio + MassimoPoesio Association for Computational Linguistics
Sofia, Bulgaria
August @@ -1664,9 +1664,9 @@
A Tale about <fixed-case>PRO</fixed-case> and Monsters - PreslavNakov - FranciscoGuzmán - StephanVogel + PreslavNakov + FranciscoGuzmán + StephanVogel 12–17 P13-2003 nakov-etal-2013-tale @@ -1706,7 +1706,7 @@ Natural Language Models for Predicting Programming Comments DanaMovshovitz-Attias - William W.Cohen + William W.Cohen 35–40 P13-2007 movshovitz-attias-cohen-2013-natural @@ -1732,10 +1732,10 @@ A relatedness benchmark to test the role of determiners in compositional distributional semantics - RaffaellaBernardi - GeorgianaDinu + RaffaellaBernardi + GeorgianaDinu MarcoMarelli - MarcoBaroni + MarcoBaroni 53–57 P13-2010 bernardi-etal-2013-relatedness @@ -1748,7 +1748,7 @@ BinyangLi LanjunZhou YulanHe - Kam-FaiWong + Kam-FaiWong 58–62 P13-2011 wei-etal-2013-empirical @@ -1773,7 +1773,7 @@ Aggregated Word Pair Features for Implicit Discourse Relation Disambiguation OrBiran - KathleenMcKeown + KathleenMcKeown 69–73 P13-2013 biran-mckeown-2013-aggregated @@ -1782,7 +1782,7 @@ Implicatures and Nested Beliefs in Approximate Decentralized-<fixed-case>POMDP</fixed-case>s AdamVogel ChristopherPotts - DanJurafsky + DanJurafsky 74–80 P13-2014 vogel-etal-2013-implicatures @@ -1790,7 +1790,7 @@ Domain-Specific Coreference Resolution with Lexicalized Features NathanGilbert - EllenRiloff + EllenRiloff 81–86 P13-2015 gilbert-riloff-2013-domain @@ -1812,12 +1812,12 @@ YoavGoldberg DipanjanDas KuzmanGanchev - KeithHall + KeithHall SlavPetrov HaoZhang OscarTäckström ClaudiaBedini - NúriaBertomeu Castelló + NúriaBertomeu Castelló JungmeeLee 92–97 P13-2017 @@ -1828,7 +1828,7 @@ An Empirical Examination of Challenges in <fixed-case>C</fixed-case>hinese Parsing Jonathan K.Kummerfeld DanielTse - James R.Curran + James R.Curran DanKlein 98–103 P13-2018 @@ -1856,7 +1856,7 @@ Arguments and Modifiers from the Learner’s Perspective LeonBergen EdwardGibson - Timothy J.O’Donnell + Timothy J.O’Donnell 115–119 P13-2021 bergen-etal-2013-arguments @@ -1865,7 +1865,7 @@ Benefactive/Malefactive Event and Writer Attitude Annotation LingjiaDeng YoonjungChoi - JanyceWiebe + JanyceWiebe 120–125 P13-2022 deng-etal-2013-benefactive @@ -1880,9 +1880,9 @@ A Decade of Automatic Content Evaluation of News Summaries: Reassessing the State of the Art - Peter A.Rankel - John M.Conroy - Hoa TrangDang + Peter A.Rankel + John M.Conroy + Hoa TrangDang AniNenkova 131–136 P13-2024 @@ -1897,7 +1897,7 @@ Automated Pyramid Scoring of Summaries using Distributional Semantics - Rebecca J.Passonneau + Rebecca J.Passonneau EmilyChen WeiweiGuo DoloresPerin @@ -1909,15 +1909,15 @@ Are Semantically Coherent Topic Models Useful for Ad Hoc Information Retrieval? RomainDeveaud EricSanJuan - PatriceBellot + PatriceBellot 148–152 P13-2027 deveaud-etal-2013-semantically Post-Retrieval Clustering Using Third-Order Similarity Measures - José G.Moreno - GaëlDias + José G.Moreno + GaëlDias GuillaumeCleuziou 153–158 P13-2028 @@ -1984,7 +1984,7 @@ Naresh KumarElluru AnandaswarupVadapalli RaghavendraElluru - HemaMurthy + HemaMurthy KishorePrahallad 196–200 P13-2035 @@ -2021,7 +2021,7 @@ <fixed-case>T</fixed-case>opic<fixed-case>S</fixed-case>pam: a Topic-Model based approach for spam detection JiweiLi - ClaireCardie + ClaireCardie SujianLi 217–221 P13-2039 @@ -2037,7 +2037,7 @@ Unsupervised joke generation from big data - SašaPetrović + SašaPetrović DavidMatthews 228–232 P13-2041 @@ -2046,7 +2046,7 @@ Modeling of term-distance and term-occurrence information for improving n-gram language model performance Tze YuangChong - Rafael E.Banchs + Rafael E.Banchs Eng SiongChng HaizhouLi 233–237 @@ -2076,7 +2076,7 @@ Random Walk Factoid Annotation for Collective Discourse BenKing RahulJha - DragomirRadev + DragomirRadev RobertMankoff 249–254 P13-2045 @@ -2085,8 +2085,8 @@ Identifying <fixed-case>E</fixed-case>nglish and <fixed-case>H</fixed-case>ungarian Light Verb Constructions: A Contrastive Approach VeronikaVincze - IstvánNagy T. - RichárdFarkas + IstvánNagy T. + RichárdFarkas 255–261 P13-2046 vincze-etal-2013-identifying @@ -2105,7 +2105,7 @@ <fixed-case>I</fixed-case>ndo<fixed-case>N</fixed-case>et: A Multilingual Lexical Knowledge Network for <fixed-case>I</fixed-case>ndian Languages BrijeshBhatt LahariPoddar - PushpakBhattacharyya + PushpakBhattacharyya 268–272 P13-2048 bhatt-etal-2013-indonet @@ -2145,7 +2145,7 @@ Anna LisaGentile IsabelleAugenstein EvaBlomqvist - FabioCiravegna + FabioCiravegna 289–293 P13-2052 zhang-etal-2013-mining @@ -2156,14 +2156,14 @@ Lay-KiSoon Tek YongLim Enya KongTang - BaliRanaivo-Malançon + BaliRanaivo-Malançon 294–299 P13-2053 lim-etal-2013-context <fixed-case>S</fixed-case>orani <fixed-case>K</fixed-case>urdish versus <fixed-case>K</fixed-case>urmanji <fixed-case>K</fixed-case>urdish: An Empirical Comparison - KyumarsSheykh Esmaili + KyumarsSheykh Esmaili ShahinSalavati 300–305 P13-2054 @@ -2173,7 +2173,7 @@ Enhanced and Portable Dependency Projection Algorithms Using Interlinear Glossed Text RyanGeorgi FeiXia - William D.Lewis + William D.Lewis 306–311 P13-2055 georgi-etal-2013-enhanced @@ -2181,7 +2181,7 @@ Cross-lingual Projections between Languages from Different Families MoYu - TiejunZhao + TiejunZhao YalongBai HaoTian DianhaiYu @@ -2209,8 +2209,8 @@ Sign Language Lexical Recognition With Propositional Dynamic Logic - ArturoCuriel - ChristopheCollet + ArturoCuriel + ChristopheCollet 328–333 P13-2059 curiel-collet-2013-sign @@ -2237,7 +2237,7 @@ Automatically Predicting Sentence Translation Difficulty AbhijitMishra - PushpakBhattacharyya + PushpakBhattacharyya MichaelCarl 346–351 P13-2062 @@ -2265,7 +2265,7 @@ Stem Translation with Affix-Based Rule Selection for Agglutinative Languages ZhiyangWang - Yajuan + Yajuan MengSun QunLiu 364–369 @@ -2276,7 +2276,7 @@ A Novel Translation Framework Based on <fixed-case>R</fixed-case>hetorical <fixed-case>S</fixed-case>tructure <fixed-case>T</fixed-case>heory MeiTu YuZhou - ChengqingZong + ChengqingZong 370–374 P13-2066 tu-etal-2013-novel @@ -2294,9 +2294,9 @@ Bilingual Lexical Cohesion Trigger Model for Document-Level Machine Translation GuoshengBen - DeyiXiong + DeyiXiong ZhiyangTeng - Yajuan + Yajuan QunLiu 382–386 P13-2068 @@ -2313,7 +2313,7 @@ A Tightly-coupled Unsupervised Clustering and Bilingual Alignment Model for Transliteration TingtingLi - TiejunZhao + TiejunZhao AndrewFinch ChunyueZhang 393–398 @@ -2323,7 +2323,7 @@ Can <fixed-case>M</fixed-case>arkov Models Over Minimal Translation Units Help Phrase-Based <fixed-case>SMT</fixed-case>? NadirDurrani - AlexanderFraser + AlexanderFraser HelmutSchmid HieuHoang PhilippKoehn @@ -2334,7 +2334,7 @@ Learning Non-linear Features for Machine Translation Using Gradient Boosting Machines KristinaToutanova - Byung-GyuAhn + Byung-GyuAhn 406–411 P13-2072 toutanova-ahn-2013-learning @@ -2378,14 +2378,14 @@ Latent Semantic Tensor Indexing for Community-based Question Answering XipengQiu LeTian - XuanjingHuang + XuanjingHuang 434–439 P13-2077 qiu-etal-2013-latent Measuring semantic content in distributional vectors - AurélieHerbelot + AurélieHerbelot MohanGanesalingam 440–445 P13-2078 @@ -2413,7 +2413,7 @@ Sentence Level Dialect Identification in <fixed-case>A</fixed-case>rabic HebaElfardy - MonaDiab + MonaDiab 456–461 P13-2081 elfardy-diab-2013-sentence @@ -2433,7 +2433,7 @@ HuiyingLi MrinmayaSachan ShashankSrivastava - EduardHovy + EduardHovy 467–473 P13-2083 goyal-etal-2013-structured-distributional @@ -2484,7 +2484,7 @@ Generating Recommendation Dialogs by Extracting Information from User Reviews KevinReschke AdamVogel - DanJurafsky + DanJurafsky 499–504 P13-2089 reschke-etal-2013-generating @@ -2502,7 +2502,7 @@ Joint Modeling of News Reader’s and Comment Writer’s Emotions HuanhuanLiu ShoushanLi - GuodongZhou + GuodongZhou Chu-RenHuang PeifengLi 511–515 @@ -2511,8 +2511,8 @@ An annotated corpus of quoted opinions in news articles - TimO’Keefe - James R.Curran + TimO’Keefe + James R.Curran PeterAshwell IrenaKoprinska 516–520 @@ -2525,7 +2525,7 @@ TaoWang XueleiHu ShoushanLi - ChengqingZong + ChengqingZong 521–525 P13-2093 xia-etal-2013-dual @@ -2539,8 +2539,8 @@ Extracting Definitions and Hypernym Relations relying on Syntactic Dependencies and Support Vector Machines - GuidoBoella - LuigiDi Caro + GuidoBoella + LuigiDi Caro 532–537 P13-2095 boella-di-caro-2013-extracting @@ -2549,16 +2549,16 @@ Neighbors Help: Bilingual Unsupervised <fixed-case>WSD</fixed-case> Using Context SudhaBhingardive SamiullaShaikh - PushpakBhattacharyya + PushpakBhattacharyya 538–542 P13-2096 bhingardive-etal-2013-neighbors Reducing Annotation Effort for Quality Estimation via Active Learning - DanielBeck + DanielBeck LuciaSpecia - TrevorCohn + TrevorCohn 543–548 P13-2097 beck-etal-2013-reducing @@ -2567,10 +2567,10 @@ Reranking with Linguistic and Semantic Features for <fixed-case>A</fixed-case>rabic Optical Character Recognition NadiTomeh NizarHabash - RyanRoth + RyanRoth NouraFarra PradeepDasigi - MonaDiab + MonaDiab 549–555 P13-2098 tomeh-etal-2013-reranking @@ -2604,7 +2604,7 @@ A System for Summarizing Scientific Topics Starting from Keywords RahulJha AmjadAbu-Jbara - DragomirRadev + DragomirRadev 572–577 P13-2102 jha-etal-2013-system @@ -2628,7 +2628,7 @@ Iterative Transformation of Annotation Guidelines for Constituency Parsing XiangLi WenbinJiang - Yajuan + Yajuan QunLiu 591–596 P13-2105 @@ -2637,7 +2637,7 @@ Nonparametric <fixed-case>B</fixed-case>ayesian Inference and Efficient Parsing for Tree-adjoining Grammars ElifYamangil - Stuart M.Shieber + Stuart M.Shieber 597–603 P13-2106 yamangil-shieber-2013-nonparametric @@ -2646,7 +2646,7 @@ Using <fixed-case>CCG</fixed-case> categories to improve <fixed-case>H</fixed-case>indi dependency parsing Bharat RamAmbati TejaswiniDeoskar - MarkSteedman + MarkSteedman 604–609 P13-2107 ambati-etal-2013-using @@ -2654,16 +2654,16 @@ The Effect of Higher-Order Dependency Features in Discriminative Phrase-Structure Parsing GregCoppola - MarkSteedman + MarkSteedman 610–616 P13-2108 coppola-steedman-2013-effect Turning on the Turbo: Fast Third-Order Non-Projective Turbo Parsers - AndréMartins - MiguelAlmeida - Noah A.Smith + AndréMartins + MiguelAlmeida + Noah A.Smith 617–622 P13-2109 martins-etal-2013-turning @@ -2671,7 +2671,7 @@ A Lattice-based Framework for Joint <fixed-case>C</fixed-case>hinese Word Segmentation, <fixed-case>POS</fixed-case> Tagging and Parsing ZhiguoWang - ChengqingZong + ChengqingZong NianwenXue 623–627 P13-2110 @@ -2688,7 +2688,7 @@ Simpler unsupervised <fixed-case>POS</fixed-case> tagging with bilingual projections - LongDuong + LongDuong PaulCook StevenBird PavelPecina @@ -2698,15 +2698,15 @@ Part-of-speech tagging with antagonistic adversaries - AndersSøgaard + AndersSøgaard 640–644 P13-2113 sogaard-2013-part Temporal Signals Help Label Temporal Relations - LeonDerczynski - RobertGaizauskas + LeonDerczynski + RobertGaizauskas 645–650 P13-2114 derczynski-gaizauskas-2013-temporal @@ -2714,7 +2714,7 @@ Diverse Keyword Extraction from Conversations MaryamHabibi - AndreiPopescu-Belis + AndreiPopescu-Belis 651–657 P13-2115 habibi-popescu-belis-2013-diverse @@ -2731,9 +2731,9 @@ Filling Knowledge Base Gaps for Distant Supervision of Relation Extraction WeiXu - RaphaelHoffmann + RaphaelHoffmann LeZhao - RalphGrishman + RalphGrishman 665–670 P13-2117 xu-etal-2013-filling @@ -2741,7 +2741,7 @@ Joint Apposition Extraction with Syntactic and Semantic Constraints WillRadford - James R.Curran + James R.Curran 671–677 P13-2118 radford-curran-2013-joint @@ -2758,7 +2758,7 @@ Mapping Source to Target Strings without Alignment by Analogical Learning: A Case Study with Transliteration - PhillippeLanglais + PhillippeLanglais 684–689 P13-2120 langlais-2013-mapping @@ -2767,7 +2767,7 @@ Scalable Modified <fixed-case>K</fixed-case>neser-<fixed-case>N</fixed-case>ey Language Model Estimation KennethHeafield IvanPouzyrevsky - Jonathan H.Clark + Jonathan H.Clark PhilippKoehn 690–696 P13-2121 @@ -2776,9 +2776,9 @@ Incremental Topic-Based Translation Model Adaptation for Conversational Spoken Language Translation SanjikaHewavitharana - DennisMehay + DennisMehay SankaranarayananAnanthakrishnan - PremNatarajan + PremNatarajan 697–701 P13-2122 hewavitharana-etal-2013-incremental @@ -2797,7 +2797,7 @@ A Learner Corpus-based Approach to Verb Suggestion for <fixed-case>ESL</fixed-case> YuSawai MamoruKomachi - YujiMatsumoto + YujiMatsumoto 708–713 P13-2124 sawai-etal-2013-learner @@ -2822,18 +2822,18 @@ Annotation of regular polysemy and underspecification - HéctorMartínez Alonso - BoletteSandford Pedersen - NúriaBel + HéctorMartínez Alonso + BoletteSandford Pedersen + NúriaBel 725–730 P13-2127 martinez-alonso-etal-2013-annotation Derivational Smoothing for Syntactic Distributional Semantics - SebastianPadó + SebastianPadó JanŠnajder - BrittaZeller + BrittaZeller 731–735 P13-2128 pado-etal-2013-derivational @@ -2841,7 +2841,7 @@ Diathesis alternation approximation for verb clustering LinSun - DianaMcCarthy + DianaMcCarthy AnnaKorhonen 736–741 P13-2129 @@ -2850,7 +2850,7 @@ Outsourcing <fixed-case>F</fixed-case>rame<fixed-case>N</fixed-case>et to the Crowd MarcoFossati - ClaudioGiuliano + ClaudioGiuliano SaraTonelli 742–747 P13-2130 @@ -2877,7 +2877,7 @@ Context Vector Disambiguation for Bilingual Lexicon Extraction from Comparable Corpora DhouhaBouamor NasredineSemmar - PierreZweigenbaum + PierreZweigenbaum 759–764 P13-2133 bouamor-etal-2013-context @@ -2885,18 +2885,18 @@ The Effects of Lexical Resource Quality on Preference Violation Detection JesseDunietz - LoriLevin - JaimeCarbonell + LoriLevin + JaimeCarbonell 765–770 P13-2134 dunietz-etal-2013-effects Exploiting Qualitative Information from Automatic Word Alignment for Cross-lingual <fixed-case>NLP</fixed-case> Tasks - José G.C.de Souza - MiquelEsplà-Gomis + José G.C.de Souza + MiquelEsplà-Gomis MarcoTurchi - MatteoNegri + MatteoNegri 771–776 P13-2135 de-souza-etal-2013-exploiting @@ -2904,7 +2904,7 @@ An Information Theoretic Approach to Bilingual Word Clustering ManaalFaruqui - ChrisDyer + ChrisDyer 777–783 P13-2136 faruqui-dyer-2013-information @@ -2912,8 +2912,8 @@ Building and Evaluating a Distributional Memory for <fixed-case>C</fixed-case>roatian JanŠnajder - SebastianPadó - ŽeljkoAgić + SebastianPadó + ŽeljkoAgić 784–789 P13-2137 snajder-etal-2013-building @@ -2922,8 +2922,8 @@ Generalizing Image Captions for Image-Text Parallel Corpus PolinaKuznetsova VicenteOrdonez - AlexanderBerg - TamaraBerg + AlexanderBerg + TamaraBerg YejinChoi 790–796 P13-2138 @@ -2980,8 +2980,8 @@ Identifying Opinion Subgroups in <fixed-case>A</fixed-case>rabic Online Discussions AmjadAbu-Jbara BenKing - MonaDiab - DragomirRadev + MonaDiab + DragomirRadev 829–835 P13-2144 abu-jbara-etal-2013-identifying @@ -2989,10 +2989,10 @@ Extracting Events with Informal Temporal References in Personal Histories in Online Communities MiaomiaoWen - ZeyuZheng + ZeyuZheng HyejuJang GuangXiang - CarolynPenstein Rosé + CarolynPenstein Rosé 836–842 P13-2145 wen-etal-2013-extracting @@ -3000,7 +3000,7 @@ Multimodal <fixed-case>DBN</fixed-case> for Predicting High-Quality Answers in c<fixed-case>QA</fixed-case> portals HaifengHu - BingquanLiu + BingquanLiu BaoxunWang MingLiu XiaolongWang @@ -3011,7 +3011,7 @@ Bi-directional Inter-dependencies of Subjective Expressions and Targets and their Value for a Joint Model RomanKlinger - PhilippCimiano + PhilippCimiano 848–854 P13-2147 klinger-cimiano-2013-bi @@ -3019,7 +3019,7 @@ Identifying Sentiment Words Using an Optimization-based Model without Seed Words HongliangYu - Zhi-HongDeng + Zhi-HongDeng ShiyingxueLi 855–859 P13-2148 @@ -3029,7 +3029,7 @@ Detecting Turnarounds in Sentiment Analysis: Thwarting AnkitRamteke AkshatMalu - PushpakBhattacharyya + PushpakBhattacharyya J. SakethaNath 860–865 P13-2149 @@ -3053,7 +3053,7 @@ Word surprisal predicts N400 amplitude during reading - Stefan L.Frank + Stefan L.Frank Leun J.Otten GiuliaGalli GabriellaVigliocco @@ -3064,7 +3064,7 @@ Computerized Analysis of a Verbal Fluency Test James O.Ryan - SergueiPakhomov + SergueiPakhomov SusanMarino CharlesBernick SarahBanks @@ -3089,9 +3089,9 @@ AnikDey SebastianKrause IvelinaNikolova - EvaVecchi + EvaVecchi StevenBethard - Preslav I.Nakov + Preslav I.Nakov FeiyuXu Association for Computational Linguistics
Sofia, Bulgaria
@@ -3106,7 +3106,7 @@ Categorization of <fixed-case>T</fixed-case>urkish News Documents with Morphological Analysis Burak KerimAkkuş - RuketCakici + RuketCakici 1–8 P13-3001 akkus-cakici-2013-categorization @@ -3136,7 +3136,7 @@ Survey on parsing three dependency representations for <fixed-case>E</fixed-case>nglish AngelinaIvanova StephanOepen - LiljaØvrelid + LiljaØvrelid 31–37 P13-3005 ivanova-etal-2013-survey @@ -3161,7 +3161,7 @@ Automated Collocation Suggestion for <fixed-case>J</fixed-case>apanese Second Language Learners LisPereira ErlynManguilimotan - YujiMatsumoto + YujiMatsumoto 52–58 P13-3008 pereira-etal-2013-automated @@ -3175,7 +3175,7 @@ Topic Modeling Based Classification of Clinical Reports - EfsunSarioglu + EfsunSarioglu KabirYadav Hyeong-AhChoi 67–73 @@ -3223,7 +3223,7 @@ ShoTakase AkikoMurakami MikiEnoki - NaoakiOkazaki + NaoakiOkazaki KentaroInui 110–116 P13-3016 @@ -3263,7 +3263,7 @@ Simple, readable sub-sentences SigridKlerke - AndersSøgaard + AndersSøgaard 142–149 P13-3021 klerke-sogaard-2013-simple @@ -3278,7 +3278,7 @@ Robust multilingual statistical morphological generation models OndřejDušek - FilipJurčíček + FilipJurčíček 158–164 P13-3023 dusek-jurcicek-2013-robust @@ -3322,7 +3322,7 @@ Seid MuhieYimam IrynaGurevych RichardEckart de Castilho - ChrisBiemann + ChrisBiemann 1–6 P13-4001 yimam-etal-2013-webanno @@ -3331,7 +3331,7 @@ A Stacking-based Approach to <fixed-case>T</fixed-case>witter User Geolocation Prediction BoHan PaulCook - TimothyBaldwin + TimothyBaldwin 7–12 P13-4002 han-etal-2013-stacking @@ -3339,7 +3339,7 @@ An Open Source Toolkit for Quantitative Historical Linguistics Johann-MattisList - StevenMoran + StevenMoran 13–18 P13-4003 list-moran-2013-open @@ -3347,9 +3347,9 @@ <fixed-case>A</fixed-case>nno<fixed-case>M</fixed-case>arket: An Open Cloud Platform for <fixed-case>NLP</fixed-case> ValentinTablan - KalinaBontcheva + KalinaBontcheva IanRoberts - HamishCunningham + HamishCunningham MarinDimitrov 19–24 P13-4004 @@ -3357,17 +3357,17 @@ Detecting Event-Related Links and Sentiments from Social Media Texts - AlexandraBalahur - HristoTanev + AlexandraBalahur + HristoTanev 25–30 P13-4005 balahur-tanev-2013-detecting <fixed-case>DISSECT</fixed-case> - <fixed-case>DIS</fixed-case>tributional <fixed-case>SE</fixed-case>mantics Composition Toolkit - GeorgianaDinu - Nghia ThePham - MarcoBaroni + GeorgianaDinu + Nghia ThePham + MarcoBaroni 31–36 P13-4006 dinu-etal-2013-dissect @@ -3387,7 +3387,7 @@ Extending an interoperable platform to facilitate the creation of multilingual and multimodal <fixed-case>NLP</fixed-case> applications GeorgiosKontonatsios PaulThompson - Riza TheresaBatista-Navarro + Riza TheresaBatista-Navarro ClaudiuMihăilă IoannisKorkontzelos SophiaAnaniadou @@ -3399,7 +3399,7 @@ <fixed-case>F</fixed-case>udan<fixed-case>NLP</fixed-case>: A Toolkit for <fixed-case>C</fixed-case>hinese Natural Language Processing XipengQiu QiZhang - XuanjingHuang + XuanjingHuang 49–54 P13-4009 qiu-etal-2013-fudannlp @@ -3418,10 +3418,10 @@ Meet <fixed-case>EDGAR</fixed-case>, a tutoring agent at <fixed-case>MONSERRATE</fixed-case> PedroFialho - LuísaCoheur + LuísaCoheur SérgioCurto PedroCláudio - ÂngelaCosta + ÂngelaCosta AlbertoAbad HugoMeinedo IsabelTrancoso @@ -3452,8 +3452,8 @@ <fixed-case>Q</fixed-case>u<fixed-case>E</fixed-case>st - A translation quality estimation framework LuciaSpecia KashifShah - Jose G.C.de Souza - TrevorCohn + Jose G.C.de Souza + TrevorCohn 79–84 P13-4014 specia-etal-2013-quest @@ -3496,7 +3496,7 @@ A Visual Analytics System for Cluster Exploration AndreasLamprecht - AnnetteHautli + AnnetteHautli ChristianRohrdantz TinaBögel 109–114 @@ -3546,10 +3546,10 @@ <fixed-case>L</fixed-case>inggle: a Web-scale Linguistic Search Engine for Words in Context JoanneBoisson - Ting-HuiKao - Jian-ChengWu + Ting-HuiKao + Jian-ChengWu Tzu-HsiYen - Jason S.Chang + Jason S.Chang 139–144 P13-4024 boisson-etal-2013-linggle @@ -3565,13 +3565,13 @@ <fixed-case>PATHS</fixed-case>: A System for Accessing Cultural Heritage Collections - EnekoAgirre + EnekoAgirre NikolaosAletras - PaulClough + PaulClough SamuelFernando PaulaGoodale - MarkHall - AitorSoroa + MarkHall + AitorSoroa MarkStevenson 151–156 P13-4026 @@ -3591,8 +3591,8 @@ VasileRus MihaiLintean RajendraBanjade - NobalNiraula - DanStefanescu + NobalNiraula + DanStefanescu 163–168 P13-4028 rus-etal-2013-semilar @@ -3602,8 +3602,8 @@ KapilaPonnamperuma AdvaithSiddharthan ChengZeng - ChrisMellish - Renévan der Wal + ChrisMellish + Renévan der Wal 169–174 P13-4029 ponnamperuma-etal-2013-tag2blog @@ -3611,19 +3611,19 @@ <fixed-case>T</fixed-case>rans<fixed-case>D</fixed-case>oop: A Map-Reduce based Crowdsourced Translation for Complex Domain AnoopKunchukuttan - RajenChatterjee + RajenChatterjee ShouryaRoy AbhijitMishra - PushpakBhattacharyya + PushpakBhattacharyya 175–180 P13-4030 kunchukuttan-etal-2013-transdoop t<fixed-case>SEARCH</fixed-case>: Flexible and Fast Search over Automatic Translations for Improved Quality/Error Analysis - MeritxellGonzàlez + MeritxellGonzàlez LauraMascarell - LluísMàrquez + LluísMàrquez 181–186 P13-4031 gonzalez-etal-2013-tsearch @@ -3632,7 +3632,7 @@ <fixed-case>VSEM</fixed-case>: An open library for visual semantics representation EliaBruni UlisseBordignon - AdamLiska + AdamLiska JasperUijlings IrinaSergienya 187–192 @@ -3643,7 +3643,7 @@ <fixed-case>D</fixed-case>ocent: A Document-Level Decoder for Phrase-Based Statistical Machine Translation ChristianHardmeier SaraStymne - JörgTiedemann + JörgTiedemann JoakimNivre 193–198 P13-4033 @@ -3653,7 +3653,7 @@ Mr. <fixed-case>MIRA</fixed-case>: Open-Source Large-Margin Structured Learning on <fixed-case>M</fixed-case>ap<fixed-case>R</fixed-case>educe VladimirEidelman KeWu - FerhanTure + FerhanTure PhilipResnik JimmyLin 199–204 @@ -3666,7 +3666,7 @@ Proceedings of the 51st Annual Meeting of the Association for Computational Linguistics (Tutorials) P13-5 JohanBos - KeithHall + KeithHall Association for Computational Linguistics
Sofia, Bulgaria
August @@ -3680,7 +3680,7 @@ Visual Features for Linguists: Basic image analysis techniques for multimodally-curious <fixed-case>NLP</fixed-case>ers EliaBruni - MarcoBaroni + MarcoBaroni 1 P13-5001 bruni-baroni-2013-visual @@ -3689,7 +3689,7 @@ Semantic Parsing with <fixed-case>C</fixed-case>ombinatory <fixed-case>C</fixed-case>ategorial <fixed-case>G</fixed-case>rammars YoavArtzi NicholasFitzGerald - LukeZettlemoyer + LukeZettlemoyer 2 P13-5002 artzi-etal-2013-semantic @@ -3703,7 +3703,7 @@ Exploiting Social Media for Natural Language Processing: Bridging the Gap between Language-centric and Real-world Applications - Simone PaoloPonzetto + Simone PaoloPonzetto AndreaZielinski 5–6 P13-5004 diff --git a/data/xml/P14.xml b/data/xml/P14.xml index acdf203fda..f2f5202bdc 100644 --- a/data/xml/P14.xml +++ b/data/xml/P14.xml @@ -53,7 +53,7 @@ Discovering Latent Structure in Task-Oriented Dialogues KeZhai - Jason D.Williams + Jason D.Williams 36–46 P14-1004 10.3115/v1/P14-1004 @@ -73,7 +73,7 @@ Multilingual Models for Compositional Distributed Semantics Karl MoritzHermann - PhilBlunsom + PhilBlunsom 58–68 P14-1006 10.3115/v1/P14-1006 @@ -83,7 +83,7 @@ Simple Negation Scope Resolution through Deep Parsing: A Semantic Solution to a Semantic Problem WoodleyPackard - Emily M.Bender + Emily M.Bender JonathonRead StephanOepen RebeccaDridan @@ -109,8 +109,8 @@ A practical and linguistically-motivated approach to compositional distributional semantics DenisPaperno - Nghia ThePham - MarcoBaroni + Nghia ThePham + MarcoBaroni 90–99 P14-1009 10.3115/v1/P14-1009 @@ -136,7 +136,7 @@ ShujieLiu MuLi MingZhou - ChengqingZong + ChengqingZong 111–121 P14-1011 10.3115/v1/P14-1011 @@ -164,7 +164,7 @@ QimingChen MuLi MingZhou - MuyunYang + MuyunYang 133–143 P14-1013 10.3115/v1/P14-1013 @@ -186,7 +186,7 @@ Unsupervised Solution Post Identification from Discussion Forums - DeepakP + DeepakP KarthikVisweswariah 155–164 P14-1015 @@ -199,7 +199,7 @@ Weakly Supervised User Profile Extraction from <fixed-case>T</fixed-case>witter JiweiLi AlanRitter - EduardHovy + EduardHovy 165–174 P14-1016 10.3115/v1/P14-1016 @@ -281,9 +281,9 @@ Don’t count, predict! A systematic comparison of context-counting vs. context-predicting semantic vectors - MarcoBaroni - GeorgianaDinu - GermánKruszewski + MarcoBaroni + GeorgianaDinu + GermánKruszewski 238–247 P14-1023 10.3115/v1/P14-1023 @@ -295,8 +295,8 @@ YuliaTsvetkov LeonidBoytsov AnatoleGershman - EricNyberg - ChrisDyer + EricNyberg + ChrisDyer 248–258 P14-1024 10.3115/v1/P14-1024 @@ -308,9 +308,9 @@ Learning Word Sense Distributions, Detecting Unattested Senses and Identifying Novel Senses Using Topic Models Jey HanLau PaulCook - DianaMcCarthy + DianaMcCarthy SpandanaGella - TimothyBaldwin + TimothyBaldwin 259–270 P14-1025 10.3115/v1/P14-1025 @@ -322,7 +322,7 @@ Learning to Automatically Solve Algebra Word Problems NateKushman YoavArtzi - LukeZettlemoyer + LukeZettlemoyer ReginaBarzilay 271–281 P14-1026 @@ -347,7 +347,7 @@ Max-Margin Tensor Neural Network for <fixed-case>C</fixed-case>hinese Word Segmentation WenzhePei TaoGe - BaobaoChang + BaobaoChang 293–303 P14-1028 10.3115/v1/P14-1028 @@ -359,7 +359,7 @@ An Empirical Study on the Effect of Negation Words on Sentiment XiaodanZhu HongyuGuo - SaifMohammad + SaifMohammad SvetlanaKiritchenko 304–313 P14-1029 @@ -382,7 +382,7 @@ Context-aware Learning for Sentence-level Sentiment Analysis with Posterior Regularization BishanYang - ClaireCardie + ClaireCardie 325–335 P14-1031 10.3115/v1/P14-1031 @@ -431,7 +431,7 @@ A <fixed-case>B</fixed-case>ayesian Mixed Effects Model of Literary Character DavidBamman TedUnderwood - Noah A.Smith + Noah A.Smith 370–379 P14-1035 10.3115/v1/P14-1035 @@ -444,7 +444,7 @@ YunboCao XiaojiangHuang HengJi - Chin-YewLin + Chin-YewLin 380–390 P14-1036 10.3115/v1/P14-1036 @@ -463,7 +463,7 @@ Incremental Joint Extraction of Entity Mentions and Relations - QiLi + QiLi HengJi 402–412 P14-1038 @@ -474,7 +474,7 @@ That’s Not What <fixed-case>I</fixed-case> Meant! Using Parsers to Avoid Structural Ambiguities in Generated Text ManjuanDuan - MichaelWhite + MichaelWhite 413–423 P14-1039 10.3115/v1/P14-1039 @@ -508,7 +508,7 @@ Grammatical Relations in <fixed-case>C</fixed-case>hinese: <fixed-case>GB</fixed-case>-Ground Extraction and Data-Driven Parsing - WeiweiSun + WeiweiSun YantaoDu XinKou ShuoyangDing @@ -547,7 +547,7 @@ Predicting the relevance of distributional semantic similarity with contextual information PhilippeMuller - CécileFabre + CécileFabre ClémentineAdam 479–488 P14-1045 @@ -559,9 +559,9 @@ Interpretable Semantic Vectors from a Joint Model of Brain- and Text- Based Meaning AlonaFyshe - Partha P.Talukdar + Partha P.Talukdar BrianMurphy - Tom M.Mitchell + Tom M.Mitchell 489–499 P14-1046 10.3115/v1/P14-1046 @@ -571,7 +571,7 @@ Single-Agent vs. Multi-Agent Techniques for Concurrent Reinforcement Learning of Negotiation Dialogue Policies KallirroiGeorgila ClaireNelson - DavidTraum + DavidTraum 500–510 P14-1047 10.3115/v1/P14-1047 @@ -591,8 +591,8 @@ Negation Focus Identification with Contextual Discourse Information BoweiZou - GuodongZhou - QiaomingZhu + GuodongZhou + QiaomingZhu 522–530 P14-1049 10.3115/v1/P14-1049 @@ -615,7 +615,7 @@ <fixed-case>R</fixed-case>e<fixed-case>N</fixed-case>ew: A Semi-Supervised Framework for Generating Domain-Specific Lexicons and Sentiment Analysis ZheZhang - Munindar P.Singh + Munindar P.Singh 542–551 P14-1051 10.3115/v1/P14-1051 @@ -655,11 +655,11 @@ Bilingual Active Learning for Relation Classification via Pseudo Parallel Corpora - LonghuaQian + LonghuaQian HaotianHui Ya’nanHu - GuodongZhou - QiaomingZhu + GuodongZhou + QiaomingZhu 582–592 P14-1055 10.3115/v1/P14-1055 @@ -691,8 +691,8 @@ Learning to Predict Distributions of Words Across Domains DanushkaBollegala - DavidWeir - JohnCarroll + DavidWeir + JohnCarroll 613–623 P14-1058 10.3115/v1/P14-1058 @@ -701,8 +701,8 @@ How to make words with vectors: Phrase generation in distributional semantics - GeorgianaDinu - MarcoBaroni + GeorgianaDinu + MarcoBaroni 624–633 P14-1059 10.3115/v1/P14-1059 @@ -711,7 +711,7 @@ Vector space semantics with frequency-driven motifs ShashankSrivastava - EduardHovy + EduardHovy 634–643 P14-1060 10.3115/v1/P14-1060 @@ -721,8 +721,8 @@ Lexical Inference over Multi-Word Predicates: A Distributional Approach OmriAbend - Shay B.Cohen - MarkSteedman + Shay B.Cohen + MarkSteedman 644–654 P14-1061 10.3115/v1/P14-1061 @@ -731,8 +731,8 @@ A Convolutional Neural Network for Modelling Sentences NalKalchbrenner - EdwardGrefenstette - PhilBlunsom + EdwardGrefenstette + PhilBlunsom 655–665 P14-1062 10.3115/v1/P14-1062 @@ -742,7 +742,7 @@ Online Learning in Tensor Space YuanCao - SanjeevKhudanpur + SanjeevKhudanpur 666–675 P14-1063 10.3115/v1/P14-1063 @@ -752,7 +752,7 @@ Graph-based Semi-Supervised Learning of Translation Models from Monolingual Data AvneeshSaluja - HanyHassan + HanyHassan KristinaToutanova ChrisQuirk 676–686 @@ -763,10 +763,10 @@ Using Discourse Structure Improves Machine Translation Evaluation - FranciscoGuzmán - ShafiqJoty - LluísMàrquez - PreslavNakov + FranciscoGuzmán + ShafiqJoty + LluísMàrquez + PreslavNakov 687–698 P14-1065 10.3115/v1/P14-1065 @@ -777,7 +777,7 @@ Learning Continuous Phrase Representations for Translation Modeling JianfengGao XiaodongHe - Wen-tauYih + Wen-tauYih LiDeng 699–709 P14-1066 @@ -788,8 +788,8 @@ Adaptive Quality Estimation for Machine Translation MarcoTurchi AntoniosAnastasopoulos - José G.C. de Souza - MatteoNegri + José G.C. de Souza + MatteoNegri 710–720 P14-1067 10.3115/v1/P14-1067 @@ -817,8 +817,8 @@ Strategies for Contiguous Multiword Expression Analysis and Dependency Parsing - MarieCandito - MatthieuConstant + MarieCandito + MatthieuConstant 743–753 P14-1070 10.3115/v1/P14-1070 @@ -849,7 +849,7 @@ Robust Entity Clustering via Phylogenetic Inference NicholasAndrews - JasonEisner + JasonEisner MarkDredze 775–785 P14-1073 @@ -862,7 +862,7 @@ Linguistic Structured Sparsity in Text Categorization DaniYogatama - Noah A.Smith + Noah A.Smith 786–796 P14-1074 10.3115/v1/P14-1074 @@ -883,7 +883,7 @@ Robust Domain Adaptation for Relation Extraction via Clustering Consistency Minh LuanNguyen Ivor W.Tsang - Kian Ming A.Chai + Kian Ming A.Chai Hai LeongChieu 807–817 P14-1076 @@ -921,8 +921,8 @@ DeliZhao QiangZhou ZhiyuanLiu - Thomas FangZheng - Edward Y.Chang + Thomas FangZheng + Edward Y.Chang 839–849 P14-1079 10.3115/v1/P14-1079 @@ -934,7 +934,7 @@ Enhancing Grammatical Cohesion: Generating Transitional Expressions for <fixed-case>SMT</fixed-case> MeiTu YuZhou - ChengqingZong + ChengqingZong 850–860 P14-1080 10.3115/v1/P14-1080 @@ -944,9 +944,9 @@ Adaptive <fixed-case>HTER</fixed-case> Estimation for Document-Specific <fixed-case>MT</fixed-case> Post-Editing FeiHuang - Jian-MingXu + Jian-MingXu AbrahamIttycheriah - SalimRoukos + SalimRoukos 861–870 P14-1081 10.3115/v1/P14-1081 @@ -956,7 +956,7 @@ Translation Assistance by Translation of <fixed-case>L</fixed-case>1 Fragments in an <fixed-case>L</fixed-case>2 Context Maartenvan Gompel - Antalvan den Bosch + Antalvan den Bosch 871–880 P14-1082 10.3115/v1/P14-1082 @@ -991,7 +991,7 @@ Hierarchical Summarization: Scaling Up Multi-Document Summarization JanaraChristensen - StephenSoderland + StephenSoderland GaganBansal Mausam 902–912 @@ -1014,7 +1014,7 @@ Exploiting Timelines to Enhance Multi-document Summarization - Jun-PingNg + Jun-PingNg YanChen Min-YenKan ZhoujunLi @@ -1063,7 +1063,7 @@ JunweiBao NanDuan MingZhou - TiejunZhao + TiejunZhao 967–976 P14-1091 10.3115/v1/P14-1091 @@ -1073,7 +1073,7 @@ Discourse Complements Lexical Semantics for Non-factoid Answer Reranking - PeterJansen + PeterJansen MihaiSurdeanu PeterClark 977–986 @@ -1089,8 +1089,8 @@ KentaroTorisawa JulienKloetzer MotokiSano - IstvánVarga - Jong-HoonOh + IstvánVarga + Jong-HoonOh YutakaKidawara 987–997 P14-1093 @@ -1103,9 +1103,9 @@ Cross-narrative Temporal Ordering of Medical Events PreethiRaghavan - EricFosler-Lussier - NoémieElhadad - Albert M.Lai + EricFosler-Lussier + NoémieElhadad + Albert M.Lai 998–1008 P14-1094 10.3115/v1/P14-1094 @@ -1114,8 +1114,8 @@ Language-Aware Truth Assessment of Fact Candidates - NdapandulaNakashole - Tom M.Mitchell + NdapandulaNakashole + Tom M.Mitchell 1009–1019 P14-1095 10.3115/v1/P14-1095 @@ -1127,7 +1127,7 @@ SunnyMitra RitwikMitra MartinRiedl - ChrisBiemann + ChrisBiemann AnimeshMukherjee PawanGoyal 1020–1029 @@ -1140,8 +1140,8 @@ A Step-wise Usage-based Method for Inducing Polysemy-aware Verb Classes DaisukeKawahara - Daniel W.Peterson - MarthaPalmer + Daniel W.Peterson + MarthaPalmer 1030–1040 P14-1097 10.3115/v1/P14-1097 @@ -1163,8 +1163,8 @@ A Provably Correct Learning Algorithm for Latent-Variable <fixed-case>PCFG</fixed-case>s - Shay B.Cohen - MichaelCollins + Shay B.Cohen + MichaelCollins 1052–1061 P14-1099 10.3115/v1/P14-1099 @@ -1173,9 +1173,9 @@ Spectral Unsupervised Parsing with Additive Tree Metrics - Ankur P.Parikh - Shay B.Cohen - Eric P.Xing + Ankur P.Parikh + Shay B.Cohen + Eric P.Xing 1062–1072 P14-1100 10.3115/v1/P14-1100 @@ -1187,8 +1187,8 @@ Weak semantic context helps phonetic learning in a model of infant language acquisition StellaFrank - Naomi H.Feldman - SharonGoldwater + Naomi H.Feldman + SharonGoldwater 1073–1083 P14-1101 10.3115/v1/P14-1101 @@ -1198,7 +1198,7 @@ Bootstrapping into Filler-Gap: An Acquisition Story - Martenvan Schijndel + Martenvan Schijndel MichaElsner 1084–1093 P14-1102 @@ -1211,7 +1211,7 @@ Nonparametric Learning of Phonological Constraints in <fixed-case>O</fixed-case>ptimality <fixed-case>T</fixed-case>heory GabrielDoyle KlintonBicknell - RogerLevy + RogerLevy 1094–1103 P14-1103 10.3115/v1/P14-1103 @@ -1245,10 +1245,10 @@ A Unified Model for Soft Linguistic Reordering Constraints in Statistical Machine Translation - JunhuiLi + JunhuiLi YuvalMarton PhilipResnik - HalDaumé III + HalDaumé III 1123–1133 P14-1106 10.3115/v1/P14-1106 @@ -1304,7 +1304,7 @@ Low-Resource Semantic Role Labeling - Matthew R.Gormley + Matthew R.Gormley MargaretMitchell BenjaminVan Durme MarkDredze @@ -1316,7 +1316,7 @@ Joint Syntactic and Semantic Parsing with <fixed-case>C</fixed-case>ombinatory <fixed-case>C</fixed-case>ategorial <fixed-case>G</fixed-case>rammar JayantKrishnamurthy - Tom M.Mitchell + Tom M.Mitchell 1188–1198 P14-1112 10.3115/v1/P14-1112 @@ -1339,9 +1339,9 @@ Probabilistic Soft Logic for Semantic Textual Similarity - IslamBeltagy + IslamBeltagy KatrinErk - RaymondMooney + RaymondMooney 1210–1219 P14-1114 10.3115/v1/P14-1114 @@ -1351,7 +1351,7 @@ Abstractive Summarization of Spoken and Written Conversations Based on Phrasal Queries YasharMehdad GiuseppeCarenini - Raymond T.Ng + Raymond T.Ng 1220–1230 P14-1115 10.3115/v1/P14-1115 @@ -1361,7 +1361,7 @@ Comparing Multi-label Classification with Reinforcement Learning for Summarisation of Time-series Data DimitraGkatzia - HelenHastie + HelenHastie OliverLemon 1231–1240 P14-1116 @@ -1383,7 +1383,7 @@ AliakseiSeveryn AlessandroMoschitti OlgaUryupina - BarbaraPlank + BarbaraPlank KatjaFilippova 1252–1261 P14-1118 @@ -1436,7 +1436,7 @@ Shallow Analysis Based Assessment of Syntactic Complexity for Automated Speech Scoring SumaBhat HuichaoXue - Su-YounYoon + Su-YounYoon 1305–1315 P14-1123 10.3115/v1/P14-1123 @@ -1446,7 +1446,7 @@ Can You Repeat That? Using Word Repetition to Improve Spoken Term Detection JonathanWintrode - SanjeevKhudanpur + SanjeevKhudanpur 1316–1325 P14-1124 10.3115/v1/P14-1124 @@ -1480,7 +1480,7 @@ Mohammad SadeghRasooli ThomasLippincott NizarHabash - OwenRambow + OwenRambow 1349–1359 P14-1127 10.3115/v1/P14-1127 @@ -1505,8 +1505,8 @@ RabihZbib ZhongqiangHuang ThomasLamar - RichardSchwartz - JohnMakhoul + RichardSchwartz + JohnMakhoul 1370–1380 P14-1129 10.3115/v1/P14-1129 @@ -1529,7 +1529,7 @@ <fixed-case>C</fixed-case>o<fixed-case>S</fixed-case>im<fixed-case>R</fixed-case>ank: A Flexible & Efficient Graph-Theoretic Similarity Measure SaschaRothe - HinrichSchütze + HinrichSchütze 1392–1402 P14-1131 10.3115/v1/P14-1131 @@ -1541,7 +1541,7 @@ Is this a wampimuk? Cross-modal mapping between distributional semantics and the visual world AngelikiLazaridou EliaBruni - MarcoBaroni + MarcoBaroni 1403–1414 P14-1132 10.3115/v1/P14-1132 @@ -1564,9 +1564,9 @@ A Discriminative Graph-Based Parser for the <fixed-case>A</fixed-case>bstract <fixed-case>M</fixed-case>eaning <fixed-case>R</fixed-case>epresentation JeffreyFlanigan SamThomson - JaimeCarbonell - ChrisDyer - Noah A.Smith + JaimeCarbonell + ChrisDyer + Noah A.Smith 1426–1436 P14-1134 10.3115/v1/P14-1134 @@ -1578,7 +1578,7 @@ KentonLee YoavArtzi JesseDodge - LukeZettlemoyer + LukeZettlemoyer 1437–1447 P14-1135 10.3115/v1/P14-1135 @@ -1602,7 +1602,7 @@ A Sense-Based Translation Model for Statistical Machine Translation - DeyiXiong + DeyiXiong MinZhang 1459–1469 P14-1137 @@ -1614,7 +1614,7 @@ Recurrent Neural Networks for Word Alignment Model AkihiroTamura TaroWatanabe - EiichiroSumita + EiichiroSumita 1470–1480 P14-1138 10.3115/v1/P14-1138 @@ -1625,9 +1625,9 @@ A Constrained <fixed-case>V</fixed-case>iterbi Relaxation for Bidirectional Word Alignment Yin-WenChang - Alexander M.Rush + Alexander M.Rush JohnDeNero - MichaelCollins + MichaelCollins 1481–1490 P14-1139 10.3115/v1/P14-1139 @@ -1651,7 +1651,7 @@ Predicting Instructor’s Intervention in <fixed-case>MOOC</fixed-case> forums SnigdhaChaturvedi DanGoldwasser - HalDaumé III + HalDaumé III 1501–1511 P14-1141 10.3115/v1/P14-1141 @@ -1724,8 +1724,8 @@ Towards a General Rule for Identifying Deceptive Opinion Spam JiweiLi MyleOtt - ClaireCardie - EduardHovy + ClaireCardie + EduardHovy 1566–1576 P14-1147 10.3115/v1/P14-1147 @@ -1763,9 +1763,9 @@ Biases in Predicting the Human Language Model - Alex B.Fine + Alex B.Fine Austin F.Frank - T. FlorianJaeger + T. FlorianJaeger BenjaminVan Durme 7–12 P14-2002 @@ -1777,7 +1777,7 @@ ChangsongLiu LanboShe RuiFang - Joyce Y.Chai + Joyce Y.Chai 13–18 P14-2003 10.3115/v1/P14-2003 @@ -1786,7 +1786,7 @@ A Composite Kernel Approach for Dialog Topic Tracking with Structured Domain Knowledge from <fixed-case>W</fixed-case>ikipedia SeokhwanKim - Rafael E.Banchs + Rafael E.Banchs HaizhouLi 19–23 P14-2004 @@ -1796,10 +1796,10 @@ An Extension of <fixed-case>BLANC</fixed-case> to System Mentions - XiaoqiangLuo - SameerPradhan + XiaoqiangLuo + SameerPradhan MartaRecasens - EduardHovy + EduardHovy 24–29 P14-2005 10.3115/v1/P14-2005 @@ -1808,10 +1808,10 @@ Scoring Coreference Partitions of Predicted Mentions: A Reference Implementation - SameerPradhan - XiaoqiangLuo + SameerPradhan + XiaoqiangLuo MartaRecasens - EduardHovy + EduardHovy VincentNg MichaelStrube 30–35 @@ -1824,7 +1824,7 @@ AdityaJoshi AbhijitMishra NivvedanSenthamilselvan - PushpakBhattacharyya + PushpakBhattacharyya 36–41 P14-2007 10.3115/v1/P14-2007 @@ -1834,7 +1834,7 @@ Improving Citation Polarity Classification with Product Reviews CharlesJochim - HinrichSchütze + HinrichSchütze 42–48 P14-2008 10.3115/v1/P14-2008 @@ -1876,7 +1876,7 @@ Employing Word Representations and Regularization for Domain Adaptation of Relation Extraction Thien HuuNguyen - RalphGrishman + RalphGrishman 68–74 P14-2012 10.3115/v1/P14-2012 @@ -1886,7 +1886,7 @@ Graph Ranking for Collective Named Entity Disambiguation AymanAlhelbawy - RobertGaizauskas + RobertGaizauskas 75–80 P14-2013 10.3115/v1/P14-2013 @@ -1896,11 +1896,11 @@ Descending-Path Convolution Kernel for Syntactic Structures ChenLin - TimothyMiller + TimothyMiller AlvinKho StevenBethard DmitriyDligach - SameerPradhan + SameerPradhan GuerganaSavova 81–86 P14-2014 @@ -1920,8 +1920,8 @@ Automatic Detection of Multilingual Dictionaries on the Web - GintarėGrigonytė - TimothyBaldwin + GintarėGrigonytė + TimothyBaldwin 93–98 P14-2016 10.3115/v1/P14-2016 @@ -1930,8 +1930,8 @@ Automatic Detection of Cognates Using Orthographic Alignment - Alina MariaCiobanu - Liviu P.Dinu + Alina MariaCiobanu + Liviu P.Dinu 99–105 P14-2017 10.3115/v1/P14-2017 @@ -1944,7 +1944,7 @@ Automatically constructing <fixed-case>W</fixed-case>ordnet Synsets Khang NhutLam FerasAl Tarouti - JugalKalita + JugalKalita 106–111 P14-2018 10.3115/v1/P14-2018 @@ -1974,7 +1974,7 @@ Robust Logistic Regression using Shift Parameters JulieTibshirani - Christopher D.Manning + Christopher D.Manning 124–129 P14-2021 10.3115/v1/P14-2021 @@ -1984,7 +1984,7 @@ Faster Phrase-Based Decoding by Refining Feature State KennethHeafield MichaelKayser - Christopher D.Manning + Christopher D.Manning 130–135 P14-2022 10.3115/v1/P14-2022 @@ -2013,7 +2013,7 @@ Simple extensions and <fixed-case>POS</fixed-case> Tags for a reparameterised <fixed-case>IBM</fixed-case> Model 2 DouweGelling - TrevorCohn + TrevorCohn 150–154 P14-2025 10.3115/v1/P14-2025 @@ -2023,7 +2023,7 @@ Dependency-based Pre-ordering for <fixed-case>C</fixed-case>hinese-<fixed-case>E</fixed-case>nglish Machine Translation JingshengCai MasaoUtiyama - EiichiroSumita + EiichiroSumita YujieZhang 155–160 P14-2026 @@ -2060,7 +2060,7 @@ NitinMadnani MelissaLopez MatthewMulholland - JoelTetreault + JoelTetreault 174–180 P14-2029 10.3115/v1/P14-2029 @@ -2096,7 +2096,7 @@ Two Knives Cut Better Than One: <fixed-case>C</fixed-case>hinese Word Segmentation with Dual Decomposition MengqiuWang RobVoigt - Christopher D.Manning + Christopher D.Manning 193–198 P14-2032 10.3115/v1/P14-2032 @@ -2117,7 +2117,7 @@ Word Segmentation of Informal <fixed-case>A</fixed-case>rabic with Domain Adaptation WillMonroe SpenceGreen - Christopher D.Manning + Christopher D.Manning 206–211 P14-2034 10.3115/v1/P14-2034 @@ -2130,7 +2130,7 @@ Resolving Lexical Ambiguity in Tensor Regression Models of Meaning DimitriKartsaklis NalKalchbrenner - MehrnooshSadrzadeh + MehrnooshSadrzadeh 212–217 P14-2035 10.3115/v1/P14-2035 @@ -2140,7 +2140,7 @@ A Novel Content Enriching Model for Microblog Using News Corpus YunlunYang - ZhihongDeng + ZhihongDeng HongliangYu 218–223 P14-2036 @@ -2152,7 +2152,7 @@ Learning Bilingual Word Representations by Marginalizing Alignments TomášKočiský Karl MoritzHermann - PhilBlunsom + PhilBlunsom 224–229 P14-2037 10.3115/v1/P14-2037 @@ -2161,7 +2161,7 @@ Detecting Retries of Voice Search Queries RivkaLevitan - DavidElson + DavidElson 230–235 P14-2038 10.3115/v1/P14-2038 @@ -2213,9 +2213,9 @@ Part-of-Speech Tagging using Conditional Random Fields: Exploiting Sub-Label Dependencies for Improved Accuracy - MiikkaSilfverberg + MiikkaSilfverberg TeemuRuokolainen - KristerLindén + KristerLindén MikkoKurimo 259–264 P14-2043 @@ -2228,7 +2228,7 @@ KairitSirts JacobEisenstein MichaElsner - SharonGoldwater + SharonGoldwater 265–271 P14-2044 10.3115/v1/P14-2044 @@ -2238,7 +2238,7 @@ Improving the Recognizability of Syntactic Relations Using Contextualized Examples AditiMuralidharan - Marti A.Hearst + Marti A.Hearst 272–277 P14-2045 10.3115/v1/P14-2045 @@ -2283,7 +2283,7 @@ Improving sparse word similarity models with asymmetric measures - Jean MarkGawron + Jean MarkGawron 296–301 P14-2049 10.3115/v1/P14-2049 @@ -2319,7 +2319,7 @@ YutaKikuchi TsutomuHirao HiroyaTakamura - ManabuOkumura + ManabuOkumura MasaakiNagata 315–320 P14-2052 @@ -2331,7 +2331,7 @@ Linguistic Considerations in Automatic Question Generation KarenMazidi - Rodney D.Nielsen + Rodney D.Nielsen 321–326 P14-2053 10.3115/v1/P14-2053 @@ -2363,7 +2363,7 @@ Predicting Power Relations between Participants in Written Dialog from a Single Thread VinodkumarPrabhakaran - OwenRambow + OwenRambow 339–344 P14-2056 10.3115/v1/P14-2056 @@ -2384,7 +2384,7 @@ Automation and Evaluation of the Keyword Method for Second Language Learning - GözdeÖzbal + GözdeÖzbal DanielePighin CarloStrapparava 352–357 @@ -2408,7 +2408,7 @@ Hippocratic Abbreviation Expansion BrianRoark - RichardSproat + RichardSproat 364–369 P14-2060 10.3115/v1/P14-2060 @@ -2418,10 +2418,10 @@ Unsupervised Feature Learning for Visual Sign Language Identification - Binyam GebrekidanGebre + Binyam GebrekidanGebre OnnoCrasborn - PeterWittenburg - SebastianDrude + PeterWittenburg + SebastianDrude TomHeskes 370–376 P14-2061 @@ -2433,8 +2433,8 @@ Experiments with crowdsourced re-annotation of a <fixed-case>POS</fixed-case> tagging data set DirkHovy - BarbaraPlank - AndersSøgaard + BarbaraPlank + AndersSøgaard 377–382 P14-2062 10.3115/v1/P14-2062 @@ -2445,7 +2445,7 @@ Building Sentiment Lexicons for All Major Languages YanqingChen - StevenSkiena + StevenSkiena 383–389 P14-2063 10.3115/v1/P14-2063 @@ -2467,8 +2467,8 @@ The <fixed-case>V</fixed-case>erb<fixed-case>C</fixed-case>orner Project: Findings from Phase 1 of crowd-sourcing a semantic decomposition of verbs Joshua K.Hartshorne - ClaireBonial - MarthaPalmer + ClaireBonial + MarthaPalmer 397–402 P14-2065 10.3115/v1/P14-2065 @@ -2492,7 +2492,7 @@ Determiner-Established Deixis to Communicative Artifacts in Pedagogical Text ShomirWilson - JonOberlander + JonOberlander 409–414 P14-2067 10.3115/v1/P14-2067 @@ -2545,7 +2545,7 @@ Cross-cultural Deception Detection VerónicaPérez-Rosas - RadaMihalcea + RadaMihalcea 440–445 P14-2072 10.3115/v1/P14-2072 @@ -2604,7 +2604,7 @@ Mutual Disambiguation for Entity Linking - EricCharton + EricCharton Marie-JeanMeurs LudovicJean-Louis MichelGagnon @@ -2626,7 +2626,7 @@ Learning Translational and Knowledge-based Similarities from Relevance Rankings for Cross-Language Retrieval ShigehikoSchamoni FelixHieber - ArtemSokolov + ArtemSokolov StefanRiezler 488–494 P14-2080 @@ -2637,7 +2637,7 @@ Two-Stage Hashing for Fast Document Retrieval HaoLi - WeiLiu + WeiLiu HengJi 495–500 P14-2081 @@ -2648,7 +2648,7 @@ An Annotation Framework for Dense Event Ordering TaylorCassidy BillMcDowell - NathanaelChambers + NathanaelChambers StevenBethard 501–506 P14-2082 @@ -2658,9 +2658,9 @@ Linguistically debatable or just plain wrong? - BarbaraPlank + BarbaraPlank DirkHovy - AndersSøgaard + AndersSøgaard 507–511 P14-2083 10.3115/v1/P14-2083 @@ -2669,7 +2669,7 @@ Humans Require Context to Infer Ironic Intent (so Computers Probably do, too) - Byron C.Wallace + Byron C.Wallace Do KookChoe LauraKertz EugeneCharniak @@ -2692,7 +2692,7 @@ Combining Word Patterns and Discourse Markers for Paradigmatic Relation Classification MichaelRoth - SabineSchulte im Walde + SabineSchulte im Walde 524–530 P14-2086 10.3115/v1/P14-2086 @@ -2733,7 +2733,7 @@ YusukeOda GrahamNeubig SakrianiSakti - TomokiToda + TomokiToda SatoshiNakamura 551–556 P14-2090 @@ -2742,7 +2742,7 @@ A joint inference of deep case analysis and zero subject generation for <fixed-case>J</fixed-case>apanese-to-<fixed-case>E</fixed-case>nglish statistical machine translation - TakuKudo + TakuKudo HiroshiIchikawa HidetoKazawa 557–562 @@ -2767,7 +2767,7 @@ YuHong HaoLiu XingWang - JianminYao + JianminYao 569–573 P14-2093 10.3115/v1/P14-2093 @@ -2777,7 +2777,7 @@ Refinements to Interactive Translation Prediction Based on Search Graphs PhilippKoehn CharaTsoukala - HerveSaint-Amand + HerveSaint-Amand 574–578 P14-2094 10.3115/v1/P14-2094 @@ -2796,8 +2796,8 @@ Cross-language and Cross-encyclopedia Article Linking Using Mixed-language Topic Model and Hypernym Translation Yu-ChunWang - Chun-KaiWu - Richard Tzong-HanTsai + Chun-KaiWu + Richard Tzong-HanTsai 586–591 P14-2096 10.3115/v1/P14-2096 @@ -2830,7 +2830,7 @@ RohanRamanath FeiLiu NormanSadeh - Noah A.Smith + Noah A.Smith 605–610 P14-2099 10.3115/v1/P14-2099 @@ -2842,7 +2842,7 @@ Yu-YangHuang RuiYan Tsung-TingKuo - Shou-DeLin + Shou-DeLin 611–617 P14-2100 10.3115/v1/P14-2100 @@ -2850,7 +2850,7 @@ Automatic Labelling of Topic Models Learned from <fixed-case>T</fixed-case>witter by Summarisation - Amparo ElizabethCano Basave + Amparo ElizabethCano Basave YulanHe RuifengXu 618–624 @@ -2863,7 +2863,7 @@ Stochastic Contextual Edit Distance and Probabilistic <fixed-case>FST</fixed-case>s RyanCotterell NanyunPeng - JasonEisner + JasonEisner 625–630 P14-2102 10.3115/v1/P14-2102 @@ -2893,9 +2893,9 @@ Semantic Parsing for Single-Relation Question Answering - Wen-tauYih + Wen-tauYih XiaodongHe - ChristopherMeek + ChristopherMeek 643–648 P14-2105 10.3115/v1/P14-2105 @@ -2906,10 +2906,10 @@ On <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et Semantic Classes and Dependency Parsing KepaBengoetxea - EnekoAgirre + EnekoAgirre JoakimNivre YueZhang - KoldoGojenola + KoldoGojenola 649–655 P14-2106 10.3115/v1/P14-2106 @@ -2928,8 +2928,8 @@ The <fixed-case>P</fixed-case>enn Parsed Corpus of <fixed-case>M</fixed-case>odern <fixed-case>B</fixed-case>ritish <fixed-case>E</fixed-case>nglish: First Parsing Results and Analysis SethKulick - AnthonyKroch - BeatriceSantorini + AnthonyKroch + BeatriceSantorini 662–667 P14-2108 10.3115/v1/P14-2108 @@ -2941,9 +2941,9 @@ SethKulick AnnBies JustinMott - AnthonyKroch - BeatriceSantorini - MarkLiberman + AnthonyKroch + BeatriceSantorini + MarkLiberman 668–673 P14-2109 10.3115/v1/P14-2109 @@ -2964,7 +2964,7 @@ Normalizing tweets with edit scripts and recurrent neural embeddings - GrzegorzChrupała + GrzegorzChrupała 680–686 P14-2111 10.3115/v1/P14-2111 @@ -2986,7 +2986,7 @@ A Piece of My Mind: A Sentiment Analysis Approach for Online Dispute Detection LuWang - ClaireCardie + ClaireCardie 693–699 P14-2113 10.3115/v1/P14-2113 @@ -2997,7 +2997,7 @@ A Simple <fixed-case>B</fixed-case>ayesian Modelling Approach to Event Extraction from <fixed-case>T</fixed-case>witter DeyuZhou - LiangyuChen + LiangyuChen YulanHe 700–705 P14-2114 @@ -3060,7 +3060,7 @@ MariaPershina BonanMin WeiXu - RalphGrishman + RalphGrishman 732–738 P14-2119 10.3115/v1/P14-2119 @@ -3096,7 +3096,7 @@ XiaolinWang MasaoUtiyama AndrewFinch - EiichiroSumita + EiichiroSumita 752–758 P14-2122 10.3115/v1/P14-2122 @@ -3107,7 +3107,7 @@ <fixed-case>EM</fixed-case> Decipherment for Large Vocabularies MalteNuhn - HermannNey + HermannNey 759–764 P14-2123 10.3115/v1/P14-2123 @@ -3132,7 +3132,7 @@ HebaElfardy LindaAlamir-Salloum NizarHabash - MonaDiab + MonaDiab 772–778 P14-2125 10.3115/v1/P14-2125 @@ -3144,7 +3144,7 @@ FeifeiZhai JiajunZhang YuZhou - ChengqingZong + ChengqingZong 779–784 P14-2126 10.3115/v1/P14-2126 @@ -3156,7 +3156,7 @@ KaiZhao LiangHuang HaitaoMi - AbeIttycheriah + AbeIttycheriah 785–790 P14-2127 10.3115/v1/P14-2127 @@ -3234,8 +3234,8 @@ Distributed Representations of Geographically Situated Language DavidBamman - ChrisDyer - Noah A.Smith + ChrisDyer + Noah A.Smith 828–834 P14-2134 10.3115/v1/P14-2134 @@ -3260,7 +3260,7 @@ Bilingual Event Extraction: a Case Study on Trigger Type Determination ZhuZhu ShoushanLi - GuodongZhou + GuodongZhou RuiXia 842–847 P14-2136 @@ -3294,7 +3294,7 @@ Cross-lingual Opinion Analysis via Negative Transfer Detection LinGui RuifengXu - QinLu + QinLu JunXu JianXu BinLiu @@ -3328,7 +3328,7 @@ <fixed-case>B</fixed-case>ayesian Kernel Methods for Natural Language Processing - DanielBeck + DanielBeck 1–9 P14-3001 10.3115/v1/P14-3001 @@ -3355,7 +3355,7 @@ Analyzing Positions and Topics in Political Discussions of the <fixed-case>G</fixed-case>erman Bundestag - CäciliaZirn + CäciliaZirn 26–33 P14-3004 10.3115/v1/P14-3004 @@ -3365,7 +3365,7 @@ A Mapping-Based Approach for General Formal Human Computer Interaction Using Natural Language VincentLetard - SophieRosset + SophieRosset GabrielIllouz 34–40 P14-3005 @@ -3375,7 +3375,7 @@ An Exploration of Embeddings for Generalized Phrases WenpengYin - HinrichSchütze + HinrichSchütze 41–47 P14-3006 10.3115/v1/P14-3006 @@ -3419,7 +3419,7 @@ Open Information Extraction for <fixed-case>S</fixed-case>panish Language based on Syntactic Constraints AlisaZhila - AlexanderGelbukh + AlexanderGelbukh 78–85 P14-3011 10.3115/v1/P14-3011 @@ -3447,7 +3447,7 @@ Proceedings of 52nd Annual Meeting of the Association for Computational Linguistics: System Demonstrations P14-5 - KalinaBontcheva + KalinaBontcheva ZhuJingbo 10.3115/v1/P14-5 Association for Computational Linguistics @@ -3462,11 +3462,11 @@ Cross-Lingual Information to the Rescue in Keyword Extraction - Chung-ChiHuang + Chung-ChiHuang MaxineEskenazi - JaimeCarbonell + JaimeCarbonell Lun-WeiKu - Ping-CheYang + Ping-CheYang 1–6 P14-5001 10.3115/v1/P14-5001 @@ -3488,7 +3488,7 @@ Open-Source Tools for Morphology, Lemmatization, <fixed-case>POS</fixed-case> Tagging and Named Entity Recognition JanaStraková MilanStraka - JanHajič + JanHajič 13–18 P14-5003 10.3115/v1/P14-5003 @@ -3497,7 +3497,7 @@ Community Evaluation and Exchange of Word Vectors at wordvectors.org ManaalFaruqui - ChrisDyer + ChrisDyer 19–24 P14-5004 10.3115/v1/P14-5004 @@ -3508,7 +3508,7 @@ XianjunDai YuanchaoLiu XiaolongWang - BingquanLiu + BingquanLiu 25–30 P14-5005 10.3115/v1/P14-5005 @@ -3538,7 +3538,7 @@ IadhOunis YulanHe TomJackson - FabioCiravegna + FabioCiravegna AnnO’Brien 37–42 P14-5007 @@ -3547,13 +3547,13 @@ The Excitement Open Platform for Textual Inferences - BernardoMagnini + BernardoMagnini RobertoZanoli IdoDagan KathrinEichler - GuenterNeumann + GuenterNeumann Tae-GilNoh - SebastianPado + SebastianPado AsherStern OmerLevy 43–48 @@ -3566,8 +3566,8 @@ MorganUlinski AnushaBalakrishnan BobCoyne - JuliaHirschberg - OwenRambow + JuliaHirschberg + OwenRambow 49–54 P14-5009 10.3115/v1/P14-5009 @@ -3575,10 +3575,10 @@ The <fixed-case>S</fixed-case>tanford <fixed-case>C</fixed-case>ore<fixed-case>NLP</fixed-case> Natural Language Processing Toolkit - ChristopherManning + ChristopherManning MihaiSurdeanu JohnBauer - JennyFinkel + JennyFinkel StevenBethard DavidMcClosky 55–60 @@ -3610,7 +3610,7 @@ A Rule-Augmented Statistical Phrase-based Translation System Cong Duy VuHoang - AiTiAw + AiTiAw Nhung T. H.Nguyen 73–78 P14-5013 @@ -3620,7 +3620,7 @@ <fixed-case>K</fixed-case>yoto<fixed-case>EBMT</fixed-case>: An Example-Based Dependency-to-Dependency Translation Framework JohnRichardson - FabienCromières + FabienCromières ToshiakiNakazawa SadaoKurohashi 79–84 @@ -3643,7 +3643,7 @@ Automatic Annotation Suggestions and Custom Annotation Layers in <fixed-case>W</fixed-case>eb<fixed-case>A</fixed-case>nno Seid MuhieYimam - ChrisBiemann + ChrisBiemann RichardEckart de Castilho IrynaGurevych 91–96 @@ -3656,7 +3656,7 @@ BinyangLi LanjunZhou ZhongyuWei - Kam-faiWong + Kam-faiWong RuifengXu YunqingXia 97–102 @@ -3700,8 +3700,8 @@ Simplified Dependency Annotations with <fixed-case>GFL</fixed-case>-<fixed-case>W</fixed-case>eb Michael T.Mordowanec NathanSchneider - ChrisDyer - Noah A.Smith + ChrisDyer + Noah A.Smith 121–126 P14-5021 10.3115/v1/P14-5021 @@ -3712,7 +3712,7 @@ Proceedings of the 52nd Annual Meeting of the Association for Computational Linguistics: Tutorials P14-6 - AlexFraser + AlexFraser YangLiu 10.3115/v1/P14-6 Association for Computational Linguistics @@ -3727,8 +3727,8 @@ <fixed-case>G</fixed-case>aussian Processes for Natural Language Processing - TrevorCohn - DanielPreoţiuc-Pietro + TrevorCohn + DanielPreoţiuc-Pietro NeilLawrence 1–3 P14-6001 @@ -3767,10 +3767,10 @@ New Directions in Vector Space Models of Meaning - EdwardGrefenstette + EdwardGrefenstette Karl MoritzHermann - GeorgianaDinu - PhilBlunsom + GeorgianaDinu + PhilBlunsom 8 P14-6005 10.3115/v1/P14-6005 @@ -3780,8 +3780,8 @@ Structured Belief Propagation for <fixed-case>NLP</fixed-case> - MatthewGormley - JasonEisner + MatthewGormley + JasonEisner 9–10 P14-6006 10.3115/v1/P14-6006 @@ -3789,7 +3789,7 @@ Semantics, Discourse and Statistical Machine Translation - DeyiXiong + DeyiXiong MinZhang 11–12 P14-6007 diff --git a/data/xml/P15.xml b/data/xml/P15.xml index 0494fd799f..b33543bf5c 100644 --- a/data/xml/P15.xml +++ b/data/xml/P15.xml @@ -4,7 +4,7 @@ Proceedings of the 53rd Annual Meeting of the Association for Computational Linguistics and the 7th International Joint Conference on Natural Language Processing (Volume 1: Long Papers) P15-1 - ChengqingZong + ChengqingZong MichaelStrube 10.3115/v1/P15-1 Association for Computational Linguistics @@ -31,9 +31,9 @@ Addressing the Rare Word Problem in Neural Machine Translation - ThangLuong + ThangLuong IlyaSutskever - QuocLe + QuocLe OriolVinyals WojciechZaremba 11–19 @@ -61,8 +61,8 @@ JacobDevlin ThomasLamar RabihZbib - RichardSchwartz - JohnMakhoul + RichardSchwartz + JohnMakhoul 31–41 P15-1004 10.3115/v1/P15-1004 @@ -79,11 +79,11 @@ Text to 3<fixed-case>D</fixed-case> Scene Generation with Rich Lexical Grounding - AngelChang + AngelChang WillMonroe ManolisSavva ChristopherPotts - Christopher D.Manning + Christopher D.Manning 53–62 P15-1006 10.3115/v1/P15-1006 @@ -92,7 +92,7 @@ <fixed-case>M</fixed-case>ulti<fixed-case>G</fixed-case>ran<fixed-case>CNN</fixed-case>: An Architecture for General Matching of Text Chunks on Multiple Levels of Granularity WenpengYin - HinrichSchütze + HinrichSchütze 63–73 P15-1007 10.3115/v1/P15-1007 @@ -101,7 +101,7 @@ Weakly Supervised Models of Aspect-Sentiment for Online Course Discussion Forums ArtiRamesh - ShachiH. Kumar + ShachiH. Kumar JamesFoulds LiseGetoor 74–83 @@ -152,7 +152,7 @@ JamesFoulds BertHuang LiseGetoor - MarilynWalker + MarilynWalker 116–125 P15-1012 10.3115/v1/P15-1012 @@ -174,7 +174,7 @@ JiafengGuo YanyanLan JunXu - XueqiCheng + XueqiCheng 136–145 P15-1014 10.3115/v1/P15-1014 @@ -219,7 +219,7 @@ VidhoonViswanathan Nazneen FatemaRajani YinonBentor - RaymondMooney + RaymondMooney 177–187 P15-1018 10.3115/v1/P15-1018 @@ -241,7 +241,7 @@ YusukeOda GrahamNeubig SakrianiSakti - TomokiToda + TomokiToda SatoshiNakamura 198–207 P15-1020 @@ -258,8 +258,8 @@ Online Multitask Learning for Machine Translation Quality Estimation - José G.C. de Souza - MatteoNegri + José G.C. de Souza + MatteoNegri ElisaRicci MarcoTurchi 219–228 @@ -270,7 +270,7 @@ A Context-Aware Topic Model for Statistical Machine Translation JinsongSu - DeyiXiong + DeyiXiong YangLiu XianpeiHan HongyuLin @@ -285,7 +285,7 @@ Learning Answer-Entailing Structures for Machine Comprehension MrinmayaSachan KumarDubey - EricXing + EricXing MatthewRichardson 239–249 P15-1024 @@ -318,8 +318,8 @@ Hubness and Pollution: Delving into Cross-Space Mapping for Zero-Shot Learning AngelikiLazaridou - GeorgianaDinu - MarcoBaroni + GeorgianaDinu + MarcoBaroni 270–280 P15-1027 10.3115/v1/P15-1027 @@ -328,8 +328,8 @@ A Generalisation of Lexical Functions for Composition in Distributional Semantics AntoineBride - TimVan de Cruys - NicholasAsher + TimVan de Cruys + NicholasAsher 281–291 P15-1028 10.3115/v1/P15-1028 @@ -338,7 +338,7 @@ Simple Learning and Compositional Application of Perceptually Grounded Word Meanings for Incremental Reference Resolution - CaseyKennington + CaseyKennington DavidSchlangen 292–301 P15-1029 @@ -358,7 +358,7 @@ An Effective Neural Network Model for Graph-based Dependency Parsing WenzhePei TaoGe - BaobaoChang + BaobaoChang 313–322 P15-1031 10.3115/v1/P15-1031 @@ -368,7 +368,7 @@ Structured Training for Neural Network Transition-Based Parsing DavidWeiss ChrisAlberti - MichaelCollins + MichaelCollins SlavPetrov 323–333 P15-1032 @@ -378,11 +378,11 @@ Transition-Based Dependency Parsing with Stack Long Short-Term Memory - ChrisDyer + ChrisDyer MiguelBallesteros WangLing AustinMatthews - Noah A.Smith + Noah A.Smith 334–343 P15-1033 10.3115/v1/P15-1033 @@ -392,7 +392,7 @@ Leveraging Linguistic Structure For Open Domain Information Extraction GaborAngeli Melvin JoseJohnson Premkumar - Christopher D.Manning + Christopher D.Manning 344–354 P15-1034 10.3115/v1/P15-1034 @@ -401,7 +401,7 @@ Joint Information Extraction and Reasoning: A Scalable Statistical Relational Learning Approach William YangWang - William W.Cohen + William W.Cohen 355–364 P15-1035 10.3115/v1/P15-1035 @@ -409,8 +409,8 @@ A Knowledge-Intensive Model for Prepositional Phrase Attachment - NdapandulaNakashole - Tom M.Mitchell + NdapandulaNakashole + Tom M.Mitchell 365–375 P15-1036 10.3115/v1/P15-1036 @@ -428,9 +428,9 @@ It Depends: Dependency Parser Comparison Using A Web-based Evaluation Tool - Jinho D.Choi - JoelTetreault - AmandaStent + Jinho D.Choi + JoelTetreault + AmandaStent 387–396 P15-1038 10.3115/v1/P15-1038 @@ -455,7 +455,7 @@ CláudiaPinto HelenaFigueira PedroMendes - André F. T.Martins + André F. T.Martins 408–418 P15-1040 10.3115/v1/P15-1040 @@ -476,10 +476,10 @@ Learning Bilingual Sentiment Word Embeddings for Cross-language Sentiment Classification - HuiWeiZhou + HuiWeiZhou LongChen FulinShi - DegenHuang + DegenHuang 430–440 P15-1042 10.3115/v1/P15-1042 @@ -491,7 +491,7 @@ CatherineFinegan-Dollak BenKing ReedCoke - DragomirRadev + DragomirRadev 441–450 P15-1043 10.3115/v1/P15-1043 @@ -500,7 +500,7 @@ Training a Natural Language Generator From Unaligned Data OndřejDušek - FilipJurčíček + FilipJurčíček 451–461 P15-1044 10.3115/v1/P15-1044 @@ -511,7 +511,7 @@ RuiSun YueZhang MeishanZhang - DonghongJi + DonghongJi 462–472 P15-1045 10.3115/v1/P15-1045 @@ -521,7 +521,7 @@ New Transfer Learning Techniques for Disparate Label Sets Young-BumKim KarlStratos - RuhiSarikaya + RuhiSarikaya MinwooJeong 473–482 P15-1046 @@ -533,7 +533,7 @@ Yun-NungChen William YangWang AnatoleGershman - AlexanderRudnicky + AlexanderRudnicky 483–494 P15-1047 10.3115/v1/P15-1047 @@ -545,7 +545,7 @@ ShuangzhiWu DongdongZhang MingZhou - TiejunZhao + TiejunZhao 495–503 P15-1048 10.3115/v1/P15-1048 @@ -607,7 +607,7 @@ RamakrishnaBairi RishabhIyer GaneshRamakrishnan - JeffBilmes + JeffBilmes 553–563 P15-1054 10.3115/v1/P15-1054 @@ -618,7 +618,7 @@ NikosVoskarides EdgarMeij ManosTsagkias - Maartende Rijke + Maartende Rijke WouterWeerkamp 564–574 P15-1055 @@ -631,7 +631,7 @@ WenzhePei HengJi SujianLi - BaobaoChang + BaobaoChang ZhifangSui 575–585 P15-1056 @@ -645,7 +645,7 @@ HongzhaoHuang XiaomanPan SujianLi - Chin-YewLin + Chin-YewLin HengJi KevinKnight ZhenWen @@ -693,7 +693,7 @@ Classifying Relations by Ranking with Convolutional Neural Networks - Cícerodos Santos + Cícerodos Santos BingXiang BowenZhou 626–634 @@ -704,8 +704,8 @@ Semantic Representations for Domain Adaptation: A Case Study on the Tree Kernel-based Method for Relation Extraction Thien HuuNguyen - BarbaraPlank - RalphGrishman + BarbaraPlank + RalphGrishman 635–644 P15-1062 10.3115/v1/P15-1062 @@ -725,8 +725,8 @@ Negation and Speculation Identification in <fixed-case>C</fixed-case>hinese Language BoweiZou - QiaomingZhu - GuodongZhou + QiaomingZhu + GuodongZhou 656–665 P15-1064 10.3115/v1/P15-1064 @@ -736,7 +736,7 @@ Learning Relational Features with Backward Random Walks NiLao EinatMinkov - WilliamCohen + WilliamCohen 666–675 P15-1065 10.3115/v1/P15-1065 @@ -777,7 +777,7 @@ Knowledge Portability with Semantic Expansion of Ontology Labels - MihaelArcan + MihaelArcan MarcoTurchi PaulBuitelaar 708–718 @@ -806,7 +806,7 @@ A Unified Multilingual Semantic Representation of Concepts - JoséCamacho-Collados + JoséCamacho-Collados Mohammad TaherPilehvar RobertoNavigli 741–751 @@ -825,7 +825,7 @@ Vector-space calculation of semantic surprisal for predicting word pronunciation duration - AsadSayeed + AsadSayeed StefanFischer VeraDemberg 763–773 @@ -857,7 +857,7 @@ <fixed-case>G</fixed-case>aussian <fixed-case>LDA</fixed-case> for Topic Models with Word Embeddings RajarshiDas ManzilZaheer - ChrisDyer + ChrisDyer 795–804 P15-1077 10.3115/v1/P15-1077 @@ -865,10 +865,10 @@ Pairwise Neural Machine Translation Evaluation - FranciscoGuzmán - ShafiqJoty - LluísMàrquez - PreslavNakov + FranciscoGuzmán + ShafiqJoty + LluísMàrquez + PreslavNakov 805–814 P15-1078 10.3115/v1/P15-1078 @@ -888,8 +888,8 @@ Non-linear Learning for Statistical Machine Translation ShujianHuang HuadongChen - Xin-YuDai - JiajunChen + Xin-YuDai + JiajunChen 825–835 P15-1080 10.3115/v1/P15-1080 @@ -900,7 +900,7 @@ QingDou AshishVaswani KevinKnight - ChrisDyer + ChrisDyer 836–845 P15-1081 10.3115/v1/P15-1081 @@ -909,7 +909,7 @@ Non-projective Dependency-based Pre-Reordering with Recurrent Neural Network for Machine Translation - Antonio ValerioMiceli-Barone + Antonio ValerioMiceli-Barone GiuseppeAttardi 846–856 P15-1082 @@ -939,7 +939,7 @@ Language to Code: Learning Semantic Parsers for If-This-Then-That Recipes ChrisQuirk - RaymondMooney + RaymondMooney MichelGalley 878–888 P15-1085 @@ -958,7 +958,7 @@ The <fixed-case>NL</fixed-case>2<fixed-case>KR</fixed-case> Platform for building Natural Language Translation Systems - NguyenVo + NguyenVo ArindamMitra ChittaBaral 899–908 @@ -1017,7 +1017,7 @@ HirokiOuchi HiroyukiShindo KevinDuh - YujiMatsumoto + YujiMatsumoto 961–970 P15-1093 10.3115/v1/P15-1093 @@ -1025,10 +1025,10 @@ Jointly optimizing word representations for lexical and sentential tasks with the <fixed-case>C</fixed-case>-<fixed-case>PHRASE</fixed-case> model - Nghia ThePham - GermánKruszewski + Nghia ThePham + GermánKruszewski AngelikiLazaridou - MarcoBaroni + MarcoBaroni 971–981 P15-1094 10.3115/v1/P15-1094 @@ -1038,7 +1038,7 @@ Robust Subgraph Generation Improves <fixed-case>A</fixed-case>bstract <fixed-case>M</fixed-case>eaning <fixed-case>R</fixed-case>epresentation Parsing KeenonWerling GaborAngeli - Christopher D.Manning + Christopher D.Manning 982–991 P15-1095 10.3115/v1/P15-1095 @@ -1046,7 +1046,7 @@ Environment-Driven Lexicon Induction for High-Level Instructions - Dipendra KumarMisra + Dipendra KumarMisra KejiaTao PercyLiang AshutoshSaxena @@ -1089,7 +1089,7 @@ Sparse, Contextually Informed Models for Irony Detection: Exploiting User Communities, Entities and Sentiment - Byron C.Wallace + Byron C.Wallace Do KookChoe EugeneCharniak 1035–1044 @@ -1102,7 +1102,7 @@ ShoushanLi LeiHuang RongWang - GuodongZhou + GuodongZhou 1045–1053 P15-1101 10.3115/v1/P15-1101 @@ -1112,7 +1112,7 @@ Co-training for Semi-supervised Sentiment Classification Based on Dual-view Bags-of-words Representation RuiXia ChengWang - Xin-YuDai + Xin-YuDai TaoLi 1054–1063 P15-1102 @@ -1130,10 +1130,10 @@ Learning Word Representations from Scarce and Noisy Data with Embedding Subspaces - RamonF. Astudillo + RamonF. Astudillo SilvioAmir WangLing - MárioSilva + MárioSilva IsabelTrancoso 1074–1084 P15-1104 @@ -1153,7 +1153,7 @@ Driving <fixed-case>ROVER</fixed-case> with Segment-based <fixed-case>ASR</fixed-case> Quality Estimation ShahabJalalvand - MatteoNegri + MatteoNegri DanieleFalavigna MarcoTurchi 1095–1105 @@ -1164,8 +1164,8 @@ A Hierarchical Neural Autoencoder for Paragraphs and Documents JiweiLi - ThangLuong - DanJurafsky + ThangLuong + DanJurafsky 1106–1115 P15-1107 10.3115/v1/P15-1107 @@ -1175,8 +1175,8 @@ Joint Dependency Parsing and Multiword Expression Tokenization AlexisNasr CarlosRamisch - JoséDeulofeu - AndréValli + JoséDeulofeu + AndréValli 1116–1126 P15-1108 10.3115/v1/P15-1108 @@ -1206,7 +1206,7 @@ Identifying Cascading Errors using Constraints in Dependency Parsing DominickNg - James R.Curran + James R.Curran 1148–1158 P15-1111 10.3115/v1/P15-1111 @@ -1217,7 +1217,7 @@ ChenxiZhu XipengQiu XinchiChen - XuanjingHuang + XuanjingHuang 1159–1168 P15-1112 10.3115/v1/P15-1112 @@ -1226,7 +1226,7 @@ Transition-based Neural Constituent Parsing TaroWatanabe - EiichiroSumita + EiichiroSumita 1169–1179 P15-1113 10.3115/v1/P15-1113 @@ -1263,7 +1263,7 @@ HaoZhou YueZhang ShujianHuang - JiajunChen + JiajunChen 1213–1222 P15-1117 10.3115/v1/P15-1117 @@ -1292,7 +1292,7 @@ Can Natural Language Processing Become Natural Language Coaching? - Marti A.Hearst + Marti A.Hearst 1245–1252 P15-1120 10.3115/v1/P15-1120 @@ -1330,7 +1330,7 @@ Model-based Word Embeddings from Decompositions of Count Matrices KarlStratos - MichaelCollins + MichaelCollins DanielHsu 1282–1291 P15-1124 @@ -1343,7 +1343,7 @@ PoyaoHuang YuntianDeng YingkaiGao - EricXing + EricXing 1292–1300 P15-1125 10.3115/v1/P15-1125 @@ -1353,7 +1353,7 @@ Orthogonality of Syntax and Semantics within Distributional Spaces JeffMitchell - MarkSteedman + MarkSteedman 1301–1310 P15-1126 10.3115/v1/P15-1126 @@ -1365,7 +1365,7 @@ Scalable Semantic Parsing with Partial Ontologies EunsolChoi TomKwiatkowski - LukeZettlemoyer + LukeZettlemoyer 1311–1320 P15-1127 10.3115/v1/P15-1127 @@ -1373,7 +1373,7 @@ Semantic Parsing via Staged Query Graph Generation: Question Answering with Knowledge Base - Wen-tauYih + Wen-tauYih Ming-WeiChang XiaodongHe JianfengGao @@ -1398,7 +1398,7 @@ Predicting Polarities of Tweets by Composing Word Embeddings with Long Short-Term Memory XinWang YuanchaoLiu - ChengjieSun + ChengjieSun BaoxunWang XiaolongWang 1343–1353 @@ -1420,7 +1420,7 @@ QiaoQian BoTian MinlieHuang - YangLiu + YangLiu XuanZhu XiaoyanZhu 1365–1374 @@ -1431,8 +1431,8 @@ A convex and feature-rich discriminative approach to dependency grammar induction - ÉdouardGrave - NoémieElhadad + ÉdouardGrave + NoémieElhadad 1375–1384 P15-1133 10.3115/v1/P15-1133 @@ -1460,7 +1460,7 @@ Entity-Centric Coreference Resolution with Model Stacking KevinClark - Christopher D.Manning + Christopher D.Manning 1405–1415 P15-1136 10.3115/v1/P15-1136 @@ -1469,8 +1469,8 @@ Learning Anaphoricity and Antecedent Ranking Features for Coreference Resolution SamWiseman - Alexander M.Rush - StuartShieber + Alexander M.Rush + StuartShieber JasonWeston 1416–1426 P15-1137 @@ -1479,7 +1479,7 @@ Transferring Coreference Resolvers with Posterior Regularization - André F. T.Martins + André F. T.Martins 1427–1437 P15-1138 10.3115/v1/P15-1138 @@ -1499,7 +1499,7 @@ <fixed-case>KB</fixed-case>-<fixed-case>LDA</fixed-case>: Jointly Learning a Knowledge Base of Hierarchy, Relations, and Facts DanaMovshovitz-Attias - William W.Cohen + William W.Cohen 1449–1459 P15-1140 10.3115/v1/P15-1140 @@ -1510,10 +1510,10 @@ A Computationally Efficient Algorithm for Learning Topical Collocation Models ZhendongZhao LanDu - BenjaminBörschinger - John KPate + BenjaminBörschinger + John KPate MassimilianoCiaramita - MarkSteedman + MarkSteedman MarkJohnson 1460–1469 P15-1141 @@ -1545,8 +1545,8 @@ ManaalFaruqui YuliaTsvetkov DaniYogatama - ChrisDyer - Noah A.Smith + ChrisDyer + Noah A.Smith 1491–1500 P15-1144 10.3115/v1/P15-1144 @@ -1584,7 +1584,7 @@ Parsing as Reduction DanielFernández-González - André F. T.Martins + André F. T.Martins 1523–1533 P15-1147 10.3115/v1/P15-1147 @@ -1604,7 +1604,7 @@ A Data-Driven, Factorization Parser for <fixed-case>CCG</fixed-case> Dependency Structures YantaoDu - WeiweiSun + WeiweiSun XiaojunWan 1545–1555 P15-1149 @@ -1617,7 +1617,7 @@ Improved Semantic Representations From Tree-Structured Long Short-Term Memory Networks Kai ShengTai RichardSocher - Christopher D.Manning + Christopher D.Manning 1556–1566 P15-1150 10.3115/v1/P15-1150 @@ -1652,7 +1652,7 @@ YiLiao WaiLam WeiweiGuo - RebeccaPassonneau + RebeccaPassonneau 1587–1597 P15-1153 10.3115/v1/P15-1153 @@ -1660,7 +1660,7 @@ Joint Graphical Models for Date Selection in Timeline Summarization - GiangTran + GiangTran EelcoHerder KatjaMarkert 1598–1607 @@ -1671,7 +1671,7 @@ Predicting Salient Updates for Disaster Summarization ChrisKedzie - KathleenMcKeown + KathleenMcKeown FernandoDiaz 1608–1617 P15-1155 @@ -1702,8 +1702,8 @@ Why discourse affects speakers’ choice of referring expressions NahoOrita ElianaVornov - NaomiFeldman - HalDaumé III + NaomiFeldman + HalDaumé III 1639–1649 P15-1158 10.3115/v1/P15-1158 @@ -1724,7 +1724,7 @@ Who caught a cold ? - Identifying the subject of a symptom ShinKanouchi MamoruKomachi - NaoakiOkazaki + NaoakiOkazaki EijiAramaki HiroshiIshikawa 1660–1670 @@ -1736,7 +1736,7 @@ Weakly Supervised Role Identification in Teamwork Interactions DiyiYang MiaomiaoWen - CarolynRosé + CarolynRosé 1671–1680 P15-1161 10.3115/v1/P15-1161 @@ -1745,9 +1745,9 @@ Deep Unordered Composition Rivals Syntactic Methods for Text Classification MohitIyyer - VarunManjunatha + VarunManjunatha JordanBoyd-Graber - HalDaumé III + HalDaumé III 1681–1691 P15-1162 10.3115/v1/P15-1162 @@ -1776,12 +1776,12 @@ Inverted indexing for cross-lingual <fixed-case>NLP</fixed-case> - AndersSøgaard - ŽeljkoAgić - HéctorMartínez Alonso - BarbaraPlank + AndersSøgaard + ŽeljkoAgić + HéctorMartínez Alonso + BarbaraPlank BerndBohnet - AndersJohannsen + AndersJohannsen 1713–1722 P15-1165 10.3115/v1/P15-1165 @@ -1802,7 +1802,7 @@ Accurate Linear-Time <fixed-case>C</fixed-case>hinese Word Segmentation via Embedding Matching JianqiangMa - ErhardHinrichs + ErhardHinrichs 1733–1743 P15-1167 10.3115/v1/P15-1167 @@ -1814,7 +1814,7 @@ XinchiChen XipengQiu ChenxiZhu - XuanjingHuang + XuanjingHuang 1744–1753 P15-1168 10.3115/v1/P15-1168 @@ -1822,7 +1822,7 @@ An analysis of the user occupational class through <fixed-case>T</fixed-case>witter content - DanielPreoţiuc-Pietro + DanielPreoţiuc-Pietro VasileiosLampos NikolaosAletras 1754–1764 @@ -1865,7 +1865,7 @@ <fixed-case>A</fixed-case>uto<fixed-case>E</fixed-case>xtend: Extending Word Embeddings to Embeddings for Synsets and Lexemes SaschaRothe - HinrichSchütze + HinrichSchütze 1793–1803 P15-1173 10.3115/v1/P15-1173 @@ -1884,7 +1884,7 @@ Proceedings of the 53rd Annual Meeting of the Association for Computational Linguistics and the 7th International Joint Conference on Natural Language Processing (Volume 2: Short Papers) P15-2 - ChengqingZong + ChengqingZong MichaelStrube 10.3115/v1/P15-2 Association for Computational Linguistics @@ -1900,7 +1900,7 @@ A Framework for the Construction of Monolingual and Cross-lingual Word Similarity Datasets - JoséCamacho-Collados + JoséCamacho-Collados Mohammad TaherPilehvar RobertoNavigli 1–7 @@ -1913,7 +1913,7 @@ On metric embedding for boosting semantic similarity computations JulienSubercaze ChristopheGravier - FrederiqueLaforest + FrederiqueLaforest 8–14 P15-2002 10.3115/v1/P15-2002 @@ -1932,9 +1932,9 @@ A Multitask Objective to Inject Lexical Contrast into Distributional Semantics - Nghia ThePham + Nghia ThePham AngelikiLazaridou - MarcoBaroni + MarcoBaroni 21–26 P15-2004 10.3115/v1/P15-2004 @@ -1945,7 +1945,7 @@ ShoushanLi LeiHuang JingjingWang - GuodongZhou + GuodongZhou 27–31 P15-2005 10.3115/v1/P15-2005 @@ -1964,7 +1964,7 @@ YinfeiYang YaoweiYan MinghuiQiu - ForrestBao + ForrestBao 38–44 P15-2007 10.3115/v1/P15-2007 @@ -2007,7 +2007,7 @@ Simplifying Lexical Simplification: Do We Need Simplified Corpora? GoranGlavaš - SanjaŠtajner + SanjaŠtajner 63–68 P15-2011 10.3115/v1/P15-2011 @@ -2015,9 +2015,9 @@ <fixed-case>Z</fixed-case>oom: a corpus of natural language descriptions of map locations - RominaAltamirano - ThiagoFerreira - IvandréParaboni + RominaAltamirano + ThiagoFerreira + IvandréParaboni LucianaBenotti 69–75 P15-2012 @@ -2026,7 +2026,7 @@ Generating overspecified referring expressions: the role of discrimination - IvandréParaboni + IvandréParaboni MichelleGalindo DouglasIacovelli 76–82 @@ -2036,7 +2036,7 @@ Using prosodic annotations to improve coreference resolution of spoken text - InaRoesiger + InaRoesiger ArndtRiester 83–88 P15-2014 @@ -2055,7 +2055,7 @@ <fixed-case>I</fixed-case> do not disagree: leveraging monolingual alignment to detect disagreement in dialogue AjdaGokcen - Marie-Catherinede Marneffe + Marie-Catherinede Marneffe 94–99 P15-2016 10.3115/v1/P15-2016 @@ -2069,7 +2069,7 @@ SaurabhGupta LiDeng XiaodongHe - GeoffreyZweig + GeoffreyZweig MargaretMitchell 100–105 P15-2017 @@ -2081,7 +2081,7 @@ SemihYagcioglu ErkutErdem AykutErdem - RuketCakici + RuketCakici 106–111 P15-2018 10.3115/v1/P15-2018 @@ -2090,7 +2090,7 @@ Learning language through pictures - GrzegorzChrupała + GrzegorzChrupała ÁkosKádár AfraAlishahi 112–118 @@ -2113,7 +2113,7 @@ Lexicon Stratification for Translating Out-of-Vocabulary Words YuliaTsvetkov - ChrisDyer + ChrisDyer 125–131 P15-2021 10.3115/v1/P15-2021 @@ -2145,7 +2145,7 @@ TakuyaMatsuzaki AkiraFujita NaoyaTodo - Noriko H.Arai + Noriko H.Arai 145–149 P15-2024 10.3115/v1/P15-2024 @@ -2162,9 +2162,9 @@ Exploring the Planet of the <fixed-case>APE</fixed-case>s: a Comparative Study of State-of-the-art Methods for <fixed-case>MT</fixed-case> Automatic Post-Editing - RajenChatterjee + RajenChatterjee MarionWeller - MatteoNegri + MatteoNegri MarcoTurchi 156–161 P15-2026 @@ -2175,7 +2175,7 @@ Efficient Learning for Undirected Topic Models JiataoGu - Victor O.K.Li + Victor O.K.Li 162–167 P15-2027 10.3115/v1/P15-2027 @@ -2204,7 +2204,7 @@ Non-Linear Text Regression with a Deep Convolutional Neural Network ZsoltBitvai - TrevorCohn + TrevorCohn 180–185 P15-2030 10.3115/v1/P15-2030 @@ -2223,7 +2223,7 @@ Pre-training of Hidden-Unit <fixed-case>CRF</fixed-case>s Young-BumKim KarlStratos - RuhiSarikaya + RuhiSarikaya 192–198 P15-2032 10.3115/v1/P15-2032 @@ -2252,9 +2252,9 @@ Measuring idiosyncratic interests in children with autism MasoudRouhizadeh - EmilyPrud’hommeaux + EmilyPrud’hommeaux Janvan Santen - RichardSproat + RichardSproat 212–217 P15-2035 10.3115/v1/P15-2035 @@ -2265,9 +2265,9 @@ MeghanaKshirsagar SamThomson NathanSchneider - JaimeCarbonell - Noah A.Smith - ChrisDyer + JaimeCarbonell + Noah A.Smith + ChrisDyer 218–224 P15-2036 10.3115/v1/P15-2036 @@ -2308,7 +2308,7 @@ <fixed-case>KL</fixed-case>cpos3 - a Language Similarity Measure for Delexicalized Parser Transfer RudolfRosa - ZdeněkŽabokrtský + ZdeněkŽabokrtský 243–249 P15-2040 10.3115/v1/P15-2040 @@ -2337,7 +2337,7 @@ Synthetic Word Parsing Improves <fixed-case>C</fixed-case>hinese Word Segmentation FeiCheng KevinDuh - YujiMatsumoto + YujiMatsumoto 262–267 P15-2043 10.3115/v1/P15-2043 @@ -2345,9 +2345,9 @@ If all you have is a bit of the <fixed-case>B</fixed-case>ible: Learning <fixed-case>POS</fixed-case> taggers for truly low-resource languages - ŽeljkoAgić + ŽeljkoAgić DirkHovy - AndersSøgaard + AndersSøgaard 268–272 P15-2044 10.3115/v1/P15-2044 @@ -2356,8 +2356,8 @@ Improving distant supervision using inference learning RolandRoller - EnekoAgirre - AitorSoroa + EnekoAgirre + AitorSoroa MarkStevenson 273–278 P15-2045 @@ -2393,7 +2393,7 @@ Embedding Methods for Fine Grained Entity Type Classification DaniYogatama - DanielGillick + DanielGillick NevenaLazic 291–296 P15-2048 @@ -2432,7 +2432,7 @@ The Users Who Say ‘Ni’: Audience Identification in <fixed-case>C</fixed-case>hinese-language Restaurant Reviews RobVoigt - DanJurafsky + DanJurafsky 314–319 P15-2052 10.3115/v1/P15-2052 @@ -2508,7 +2508,7 @@ Document Level Time-anchoring for <fixed-case>T</fixed-case>ime<fixed-case>L</fixed-case>ine Extraction EgoitzLaparra ItziarAldabe - GermanRigau + GermanRigau 358–364 P15-2059 10.3115/v1/P15-2059 @@ -2517,7 +2517,7 @@ Event Detection and Domain Adaptation with Convolutional Neural Networks Thien HuuNguyen - RalphGrishman + RalphGrishman 365–371 P15-2060 10.3115/v1/P15-2060 @@ -2527,7 +2527,7 @@ Seed-Based Event Trigger Labeling: How far can event descriptions get us? OferBronstein IdoDagan - QiLi + QiLi HengJi AnetteFrank 372–376 @@ -2607,7 +2607,7 @@ <fixed-case>TR</fixed-case>9856: A Multi-word Term Relatedness Benchmark RanLevy - LiatEin-Dor + LiatEin-Dor ShayHummel RutyRinott NoamSlonim @@ -2631,8 +2631,8 @@ Automatic Discrimination between Cognates and Borrowings - Alina MariaCiobanu - Liviu P.Dinu + Alina MariaCiobanu + Liviu P.Dinu 431–437 P15-2071 10.3115/v1/P15-2071 @@ -2641,10 +2641,10 @@ The Media Frames Corpus: Annotations of Frames Across Issues DallasCard - Amber E.Boydstun - Justin H.Gross + Amber E.Boydstun + Justin H.Gross PhilipResnik - Noah A.Smith + Noah A.Smith 438–444 P15-2072 10.3115/v1/P15-2072 @@ -2661,7 +2661,7 @@ ChrisQuirk MargaretMitchell JianfengGao - BillDolan + BillDolan 445–450 P15-2073 10.3115/v1/P15-2073 @@ -2669,8 +2669,8 @@ <fixed-case>T</fixed-case>ibetan Unknown Word Identification from News Corpora for Supporting Lexicon-based <fixed-case>T</fixed-case>ibetan Word Segmentation - MinghuaNuo - HuidanLiu + MinghuaNuo + HuidanLiu CongjunLong JianWu 451–457 @@ -2691,7 +2691,7 @@ Non-distributional Word Vector Representations ManaalFaruqui - ChrisDyer + ChrisDyer 464–469 P15-2076 10.3115/v1/P15-2076 @@ -2710,7 +2710,7 @@ Dependency length minimisation effects in short spans: a large-scale analysis of adjective placement in complex noun phrases KristinaGulordava PaolaMerlo - BenoitCrabbé + BenoitCrabbé 477–482 P15-2078 10.3115/v1/P15-2078 @@ -2719,7 +2719,7 @@ Tagging Performance Correlates with Author Age DirkHovy - AndersSøgaard + AndersSøgaard 483–488 P15-2079 10.3115/v1/P15-2079 @@ -2741,9 +2741,9 @@ The Fixed-Size Ordinally-Forgetting Encoding Method for Neural Network Language Models ShiLiangZhang HuiJiang - MingBinXu + MingBinXu JunFengHou - LiRongDai + LiRongDai 495–500 P15-2081 10.3115/v1/P15-2081 @@ -2782,8 +2782,8 @@ Point Process Modelling of Rumour Dynamics in Social Media MichalLukasik - TrevorCohn - KalinaBontcheva + TrevorCohn + KalinaBontcheva 518–523 P15-2085 10.3115/v1/P15-2085 @@ -2801,7 +2801,7 @@ <fixed-case>MT</fixed-case> Quality Estimation for Computer-assisted Translation: Does it Really Help? MarcoTurchi - MatteoNegri + MatteoNegri MarcelloFederico 530–535 P15-2087 @@ -2824,7 +2824,7 @@ Learning Word Reorderings for Hierarchical Phrase-based Statistical Machine Translation JingyiZhang MasaoUtiyama - EiichroSumita + EiichroSumita HaiZhao 542–548 P15-2089 @@ -2835,7 +2835,7 @@ <fixed-case>UNRAVEL</fixed-case>—<fixed-case>A</fixed-case> Decipherment Toolkit MalteNuhn JulianSchamper - HermannNey + HermannNey 549–553 P15-2090 10.3115/v1/P15-2090 @@ -2877,7 +2877,7 @@ AkivaMiura GrahamNeubig SakrianiSakti - TomokiToda + TomokiToda SatoshiNakamura 573–577 P15-2094 @@ -2896,7 +2896,7 @@ Automatic Identification of Age-Appropriate Ratings of Song Lyrics AnggiMaulidyani - RuliManurung + RuliManurung 583–587 P15-2096 10.3115/v1/P15-2096 @@ -2907,7 +2907,7 @@ CourtneyNapoles KeisukeSakaguchi MattPost - JoelTetreault + JoelTetreault 588–593 P15-2097 10.3115/v1/P15-2097 @@ -2930,7 +2930,7 @@ Automatic Detection of Sentence Fragments Chak YanYeung - JohnLee + JohnLee 599–603 P15-2099 10.3115/v1/P15-2099 @@ -2940,9 +2940,9 @@ A Computational Approach to Automatic Prediction of Drunk-Texting AdityaJoshi AbhijitMishra - BalamuraliAR - PushpakBhattacharyya - Mark J.Carman + BalamuraliAR + PushpakBhattacharyya + Mark J.Carman 604–608 P15-2100 10.3115/v1/P15-2100 @@ -2986,8 +2986,8 @@ <fixed-case>T</fixed-case>witter User Geolocation Using a Unified Text and Network Prediction Model AfshinRahimi - TrevorCohn - TimothyBaldwin + TrevorCohn + TimothyBaldwin 630–636 P15-2104 10.3115/v1/P15-2104 @@ -2995,15 +2995,15 @@ Automatic Keyword Extraction on <fixed-case>T</fixed-case>witter - LuísMarujo + LuísMarujo WangLing IsabelTrancoso - ChrisDyer - Alan W.Black + ChrisDyer + Alan W.Black AnatoleGershman - DavidMartins de Matos - JoãoNeto - JaimeCarbonell + DavidMartins de Matos + JoãoNeto + JaimeCarbonell 637–643 P15-2105 10.3115/v1/P15-2105 @@ -3013,10 +3013,10 @@ Towards a Contextual Pragmatic Model to Detect Irony in Tweets JihenKaroui - FarahBenamara Zitoune - VéroniqueMoriceau + FarahBenamara Zitoune + VéroniqueMoriceau NathalieAussenac-Gilles - LamiaHadrich Belguith + LamiaHadrich Belguith 644–650 P15-2106 10.3115/v1/P15-2106 @@ -3047,7 +3047,7 @@ The Discovery of Natural Typing Annotations: User-produced Potential <fixed-case>C</fixed-case>hinese Word Delimiters DakuiZhang YuMao - YangLiu + YangLiu HanshiWang ChuyuanWei ShipingTang @@ -3060,7 +3060,7 @@ One Tense per Scene: Predicting Tense in <fixed-case>C</fixed-case>hinese Conversations TaoGe HengJi - BaobaoChang + BaobaoChang ZhifangSui 668–673 P15-2110 @@ -3083,7 +3083,7 @@ Rhetoric Map of an Answer to Compound Queries BorisGalitsky DmitryIlvovsky - Sergey O.Kuznetsov + Sergey O.Kuznetsov 681–686 P15-2112 10.3115/v1/P15-2112 @@ -3094,9 +3094,9 @@ AlbertoBarrón-Cedeño SimoneFilice GiovanniDa San Martino - ShafiqJoty - LluísMàrquez - PreslavNakov + ShafiqJoty + LluísMàrquez + PreslavNakov AlessandroMoschitti 687–693 P15-2113 @@ -3105,7 +3105,7 @@ Learning Hybrid Representations to Retrieve Semantically Equivalent Questions - Cícerodos Santos + Cícerodos Santos LucianoBarbosa DashaBogdanova BiancaZadrozny @@ -3128,7 +3128,7 @@ A Long Short-Term Memory Model for Answer Sentence Selection in Question Answering DiWang - EricNyberg + EricNyberg 707–712 P15-2116 10.3115/v1/P15-2116 @@ -3149,7 +3149,7 @@ Bilingual Word Embeddings from Non-Parallel Document-Aligned Data Applied to Bilingual Lexicon Induction IvanVulić - Marie-FrancineMoens + Marie-FrancineMoens 719–725 P15-2118 10.3115/v1/P15-2118 @@ -3211,7 +3211,7 @@ Harnessing Context Incongruity for Sarcasm Detection AdityaJoshi VinitaSharma - PushpakBhattacharyya + PushpakBhattacharyya 757–762 P15-2124 10.3115/v1/P15-2124 @@ -3222,7 +3222,7 @@ ZhongqingWang SophiaLee ShoushanLi - GuodongZhou + GuodongZhou 763–768 P15-2125 10.3115/v1/P15-2125 @@ -3234,7 +3234,7 @@ MohammadAl Boni KeiraZhou HongningWang - Matthew S.Gerber + Matthew S.Gerber 769–774 P15-2126 10.3115/v1/P15-2126 @@ -3246,7 +3246,7 @@ Cen-ChiehChen Yu-LunHsieh Chien ChinChen - Wen-LianHsu + Wen-LianHsu 775–780 P15-2127 10.3115/v1/P15-2127 @@ -3262,7 +3262,7 @@ Predicting Valence-Arousal Ratings of Words Using a Weighted Graph Method - Liang-ChihYu + Liang-ChihYu JinWang K. RobertLai Xue-jieZhang @@ -3276,11 +3276,11 @@ NikolaMrkšić DiarmuidÓ Séaghdha BlaiseThomson - MilicaGašić + MilicaGašić Pei-HaoSu DavidVandyke Tsung-HsienWen - SteveYoung + SteveYoung 794–799 P15-2130 10.3115/v1/P15-2130 @@ -3300,7 +3300,7 @@ Young-BumKim KarlStratos XiaohuLiu - RuhiSarikaya + RuhiSarikaya 806–811 P15-2132 10.3115/v1/P15-2132 @@ -3321,7 +3321,7 @@ A Simultaneous Recognition Framework for the Spoken Language Understanding Module of Intelligent Personal Assistant Software on Smart Phones ChangsuLee YoungjoongKo - JungyunSeo + JungyunSeo 818–822 P15-2134 10.3115/v1/P15-2134 @@ -3329,8 +3329,8 @@ A Deeper Exploration of the Standard <fixed-case>PB</fixed-case>-<fixed-case>SMT</fixed-case> Approach to Text Simplification and its Evaluation - SanjaŠtajner - HannahBéchara + SanjaŠtajner + HannahBéchara HoracioSaggion 823–828 P15-2135 @@ -3362,7 +3362,7 @@ Unsupervised extractive summarization via coverage maximization with syntactic and semantic concepts NatalieSchluter - AndersSøgaard + AndersSøgaard 840–844 P15-2138 10.3115/v1/P15-2138 @@ -3370,8 +3370,8 @@ Low Resource Dependency Parsing: Cross-lingual Parameter Sharing in a Neural Network Parser - LongDuong - TrevorCohn + LongDuong + TrevorCohn StevenBird PaulCook 845–850 @@ -3383,7 +3383,7 @@ Semantic Structure Analysis of Noun Phrases using <fixed-case>A</fixed-case>bstract <fixed-case>M</fixed-case>eaning <fixed-case>R</fixed-case>epresentation YuichiroSawai HiroyukiShindo - YujiMatsumoto + YujiMatsumoto 851–856 P15-2140 10.3115/v1/P15-2140 @@ -3393,7 +3393,7 @@ Boosting Transition-based <fixed-case>AMR</fixed-case> Parsing with Refined Actions and Auxiliary Analyzers ChuanWang NianwenXue - SameerPradhan + SameerPradhan 857–862 P15-2141 10.3115/v1/P15-2141 @@ -3402,7 +3402,7 @@ Generative Incremental Dependency Parsing with Neural Networks JanBuys - PhilBlunsom + PhilBlunsom 863–869 P15-2142 10.3115/v1/P15-2142 @@ -3437,8 +3437,8 @@ Kuan-YuChen AngelinaIvanova ElliePavlick - EmilyBender - Chin-YewLin + EmilyBender + Chin-YewLin StephanOepen 10.3115/v1/P15-3 Association for Computational Linguistics @@ -3466,9 +3466,9 @@ Leveraging Compounds to Improve Noun Phrase Translation from <fixed-case>C</fixed-case>hinese and <fixed-case>G</fixed-case>erman XiaoPu LauraMascarell - AndreiPopescu-Belis + AndreiPopescu-Belis MarkFishel - Ngoc-QuangLuong + Ngoc-QuangLuong MartinVolk 8–15 P15-3002 @@ -3486,7 +3486,7 @@ Transition-based Dependency <fixed-case>DAG</fixed-case> Parsing Using Dynamic Oracles AlperTokgöz - GülşenEryiǧit + GülşenEryiǧit 22–27 P15-3004 10.3115/v1/P15-3004 @@ -3497,7 +3497,7 @@ YoshiakiKitagawa MamoruKomachi EijiAramaki - NaoakiOkazaki + NaoakiOkazaki HiroshiIshikawa 28–34 P15-3005 @@ -3553,7 +3553,7 @@ <fixed-case>IMI</fixed-case> — A Multilingual Semantic Annotation Environment FrancisBond LuísMorgado da Costa - Tuấn Anh + Tuấn Anh 7–12 P15-4002 10.3115/v1/P15-4002 @@ -3574,7 +3574,7 @@ SimoneFilice GiuseppeCastellucci DaniloCroce - RobertoBasili + RobertoBasili 19–24 P15-4004 10.3115/v1/P15-4004 @@ -3679,7 +3679,7 @@ <fixed-case>LEX</fixed-case>enstein: A Framework for Lexical Simplification - GustavoPaetzold + GustavoPaetzold LuciaSpecia 85–90 P15-4015 @@ -3695,7 +3695,7 @@ KaiHakala ChenLi PontusStenetorp - Lars JuhlJensen + Lars JuhlJensen 91–96 P15-4016 10.3115/v1/P15-4016 @@ -3703,8 +3703,8 @@ A Data Sharing and Annotation Service Infrastructure - SteliosPiperidis - DimitriosGalanis + SteliosPiperidis + DimitriosGalanis JuliBakagianni SokratisSofianopoulos 97–102 @@ -3717,7 +3717,7 @@ EugenRuppert ManuelKaufmann MartinRiedl - ChrisBiemann + ChrisBiemann 103–108 P15-4018 10.3115/v1/P15-4018 @@ -3740,8 +3740,8 @@ Multi-level Translation Quality Prediction with <fixed-case>Q</fixed-case>u<fixed-case>E</fixed-case>st++ LuciaSpecia - GustavoPaetzold - CarolinaScarton + GustavoPaetzold + CarolinaScarton 115–120 P15-4020 10.3115/v1/P15-4020 @@ -3758,8 +3758,8 @@ A Domain-independent Rule-based Framework for Event Extraction - Marco A.Valenzuela-Escárcega - GusHahn-Powell + Marco A.Valenzuela-Escárcega + GusHahn-Powell MihaiSurdeanu ThomasHicks 127–132 @@ -3780,10 +3780,10 @@ <fixed-case>W</fixed-case>rite<fixed-case>A</fixed-case>head: Mining Grammar Patterns in Corpora for Assisted Writing Tzu-HsiYen - Jian-ChengWu - JimChang + Jian-ChengWu + JimChang JoanneBoisson - JasonChang + JasonChang 139–144 P15-4024 10.3115/v1/P15-4024 @@ -3805,7 +3805,7 @@ Proceedings of the 53rd Annual Meeting of the Association for Computational Linguistics and the 7th International Joint Conference on Natural Language Processing: Tutorial Abstracts P15-5 - EnekoAgirre + EnekoAgirre KevinDuh 10.3115/v1/P15-5 Association for Computational Linguistics @@ -3831,8 +3831,8 @@ Structured Belief Propagation for <fixed-case>NLP</fixed-case> - Matthew R.Gormley - JasonEisner + Matthew R.Gormley + JasonEisner 5–6 P15-5002 10.3115/v1/P15-5002 @@ -3840,8 +3840,8 @@ Sentiment and Belief: How to Think about, Represent, and Annotate Private States - OwenRambow - JanyceWiebe + OwenRambow + JanyceWiebe 7–11 P15-5003 10.3115/v1/P15-5003 @@ -3863,7 +3863,7 @@ GuillaumeBouchard JasonNaradowsky SebastianRiedel - TimRocktäschel + TimRocktäschel AndreasVlachos 16–18 P15-5005 diff --git a/data/xml/P16.xml b/data/xml/P16.xml index e5e7833be9..542704f06f 100644 --- a/data/xml/P16.xml +++ b/data/xml/P16.xml @@ -5,7 +5,7 @@ Proceedings of the 54th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers) P16-1 KatrinErk - Noah A.Smith + Noah A.Smith 10.18653/v1/P16-1 Association for Computational Linguistics
Berlin, Germany
@@ -69,7 +69,7 @@ DiLu XiaomanPan NimaPourdamghani - Shih-FuChang + Shih-FuChang HengJi KevinKnight 54–65 @@ -82,8 +82,8 @@ JoernWuebker SpenceGreen JohnDeNero - SašaHasan - Minh-ThangLuong + SašaHasan + Minh-ThangLuong 66–75 P16-1007 10.18653/v1/P16-1007 @@ -124,7 +124,7 @@ Incremental Acquisition of Verb Hypothesis Space towards Physical World Interaction LanboShe - JoyceChai + JoyceChai 108–117 P16-1011 10.18653/v1/P16-1011 @@ -133,7 +133,7 @@ Language Transfer Learning for Supervised Lexical Substitution GeroldHintz - ChrisBiemann + ChrisBiemann 118–129 P16-1012 10.18653/v1/P16-1012 @@ -145,7 +145,7 @@ ManaalFaruqui WangLing BrianMacWhinney - ChrisDyer + ChrisDyer 130–139 P16-1013 10.18653/v1/P16-1013 @@ -176,7 +176,7 @@ A Transition-Based System for Joint Lexical and Syntactic Analysis - MatthieuConstant + MatthieuConstant JoakimNivre 161–171 P16-1016 @@ -186,7 +186,7 @@ Neural Greedy Constituent Parsing with Dynamic Oracles MaximinCoavoux - BenoîtCrabbé + BenoîtCrabbé 172–182 P16-1017 10.18653/v1/P16-1017 @@ -205,9 +205,9 @@ Idiom Token Classification using Sentential Distributed Semantics - GiancarloSalton + GiancarloSalton RobertRoss - JohnKelleher + JohnKelleher 194–204 P16-1019 10.18653/v1/P16-1019 @@ -229,7 +229,7 @@ QinlanShen MichaelMiller SeungwhanMoon - CarolynRosé + CarolynRosé 216–225 P16-1021 10.18653/v1/P16-1021 @@ -251,7 +251,7 @@ Intrinsic Subspace Evaluation of Word Embedding Representations YadollahYaghoobzadeh - HinrichSchütze + HinrichSchütze 236–246 P16-1023 10.18653/v1/P16-1023 @@ -272,9 +272,9 @@ TaylorCassidy XiaochengFeng HengJi - Clare R.Voss + Clare R.Voss JiaweiHan - AvirupSil + AvirupSil 258–268 P16-1025 10.18653/v1/P16-1025 @@ -293,7 +293,7 @@ Using Sentence-Level <fixed-case>LSTM</fixed-case> Language Models for Script Inference KarlPichotta - Raymond J.Mooney + Raymond J.Mooney 279–289 P16-1027 10.18653/v1/P16-1027 @@ -332,7 +332,7 @@ Bi-Transferring Deep Neural Networks for Domain Adaptation GuangyouZhou ZhiwenXie - Jimmy XiangjiHuang + Jimmy XiangjiHuang TingtingHe 322–332 P16-1031 @@ -343,7 +343,7 @@ Document-level Sentiment Inference with Social, Faction, and Discourse Context EunsolChoi HannahRashkin - LukeZettlemoyer + LukeZettlemoyer YejinChoi 333–343 P16-1032 @@ -367,7 +367,7 @@ Dependency Parsing with Bounded Block Degree and Well-nestedness via <fixed-case>L</fixed-case>agrangian Relaxation and Branch-and-Bound CaioCorro - JosephLe Roux + JosephLe Roux MathieuLacroix AntoineRozenknop RobertoWolfler Calvo @@ -390,8 +390,8 @@ Together we stand: <fixed-case>S</fixed-case>iamese Networks for Similar Question Retrieval ArpitaDas HarishYenala - ManojChinnakotla - ManishShrivastava + ManojChinnakotla + ManishShrivastava 378–387 P16-1036 10.18653/v1/P16-1036 @@ -401,7 +401,7 @@ News Citation Recommendation with Implicit and Explicit Semantics HaoPeng JingLiu - Chin-YewLin + Chin-YewLin 388–398 P16-1037 10.18653/v1/P16-1037 @@ -452,7 +452,7 @@ Combining Natural Logic and Shallow Reasoning for Question Answering GaborAngeli Neha NayakKennard - Christopher D.Manning + Christopher D.Manning 442–452 P16-1042 10.18653/v1/P16-1042 @@ -461,7 +461,7 @@ Easy Questions First? A Case Study on Curriculum Learning for Question Answering MrinmayaSachan - EricXing + EricXing 453–463 P16-1043 10.18653/v1/P16-1043 @@ -470,7 +470,7 @@ Improved Representation Learning for Question Answer Matching MingTan - Cicerodos Santos + Cicerodos Santos BingXiang BowenZhou 464–473 @@ -481,8 +481,8 @@ Tables as Semi-structured Knowledge for Question Answering Sujay KumarJauhar - PeterTurney - EduardHovy + PeterTurney + EduardHovy 474–483 P16-1045 10.18653/v1/P16-1045 @@ -502,7 +502,7 @@ Neural Networks For Negation Scope Detection FedericoFancellu AdamLopez - BonnieWebber + BonnieWebber 495–504 P16-1047 10.18653/v1/P16-1047 @@ -511,7 +511,7 @@ <fixed-case>CSE</fixed-case>: Conceptual Sentence Embeddings based on Attention Model YashenWang - HeyanHuang + HeyanHuang ChongFeng QiangZhou JiahuiGu @@ -538,7 +538,7 @@ Investigating the Sources of Linguistic Alignment in Conversation GabrielDoyle - Michael C.Frank + Michael C.Frank 526–536 P16-1050 10.18653/v1/P16-1050 @@ -574,8 +574,8 @@ Towards more variation in text generation: Developing and evaluating variation models for choice of referential form - ThiagoCastro Ferreira - EmielKrahmer + ThiagoCastro Ferreira + EmielKrahmer SanderWubben 568–577 P16-1054 @@ -593,8 +593,8 @@ Generating Factoid Questions With Recurrent Neural Networks: The 30<fixed-case>M</fixed-case> Factoid Question-Answer Corpus - Iulian VladSerban - AlbertoGarcía-Durán + Iulian VladSerban + AlbertoGarcía-Durán CaglarGulcehre SungjinAhn SarathChandar @@ -608,8 +608,8 @@ Latent Predictor Networks for Code Generation WangLing - PhilBlunsom - EdwardGrefenstette + PhilBlunsom + EdwardGrefenstette Karl MoritzHermann TomášKočiský FuminWang @@ -621,7 +621,7 @@ Easy Things First: Installments Improve Referring Expression Generation for Objects in Photographs - SinaZarrieß + SinaZarrieß DavidSchlangen 610–620 P16-1058 @@ -633,7 +633,7 @@ AmirGloberson NevenaLazic SoumenChakrabarti - AmarnagSubramanya + AmarnagSubramanya MichaelRinggaard FernandoPereira 621–631 @@ -643,7 +643,7 @@ Which Coreference Evaluation Metric Do You Trust? A Proposal for a Link-based Entity Aware Metric - Nafise SadatMoosavi + Nafise SadatMoosavi MichaelStrube 632–642 P16-1060 @@ -654,7 +654,7 @@ Improving Coreference Resolution by Learning Entity-Level Distributed Representations KevinClark - Christopher D.Manning + Christopher D.Manning 643–653 P16-1061 10.18653/v1/P16-1061 @@ -666,7 +666,7 @@ ReedCoke RuiZhang XiangyiYe - DragomirRadev + DragomirRadev 654–665 P16-1062 10.18653/v1/P16-1062 @@ -676,7 +676,7 @@ Generative Topic Embedding: a Continuous Representation of Documents ShaohuaLi - Tat-SengChua + Tat-SengChua JunZhu ChunyanMiao 666–675 @@ -768,7 +768,7 @@ Extracting token-level signals of syntactic processing from f<fixed-case>MRI</fixed-case> - with an application to <fixed-case>P</fixed-case>o<fixed-case>S</fixed-case> induction JoachimBingel MariaBarrett - AndersSøgaard + AndersSøgaard 747–755 P16-1071 10.18653/v1/P16-1071 @@ -808,7 +808,7 @@ Constrained Multi-Task Learning for Automated Essay Scoring RonanCummins MengZhang - TedBriscoe + TedBriscoe 789–799 P16-1075 10.18653/v1/P16-1075 @@ -856,8 +856,8 @@ LucieFlekova JordanCarpenter SalvatoreGiorgi - LyleUngar - DanielPreoţiuc-Pietro + LyleUngar + DanielPreoţiuc-Pietro 843–854 P16-1080 10.18653/v1/P16-1080 @@ -879,7 +879,7 @@ JonathanGordon LinhongZhu AramGalstyan - PremNatarajan + PremNatarajan GullyBurns 866–875 P16-1082 @@ -901,9 +901,9 @@ How well do Computers Solve Math Word Problems? Large-Scale Dataset Construction and Evaluation DanqingHuang ShumingShi - Chin-YewLin + Chin-YewLin JianYin - Wei-YingMa + Wei-YingMa 887–896 P16-1084 10.18653/v1/P16-1084 @@ -923,7 +923,7 @@ Text Understanding with the Attention Sum Reader Network RudolfKadlec MartinSchmid - OndrejBajgar + OndrejBajgar JanKleindienst 908–918 P16-1086 @@ -933,7 +933,7 @@ Investigating <fixed-case>LSTM</fixed-case>s for Joint Extraction of Opinion Entities and Relations ArzooKatiyar - ClaireCardie + ClaireCardie 919–929 P16-1087 10.18653/v1/P16-1087 @@ -941,7 +941,7 @@ Transition-Based Left-Corner Parsing for Identifying <fixed-case>PTB</fixed-case>-Style Nonlocal Dependencies - YoshihideKato + YoshihideKato ShigekiMatsubara 930–940 P16-1088 @@ -952,7 +952,7 @@ <fixed-case>S</fixed-case>iamese <fixed-case>CBOW</fixed-case>: Optimizing Word Embeddings for Sentence Representations TomKenter AlexeyBorisov - Maartende Rijke + Maartende Rijke 941–951 P16-1089 10.18653/v1/P16-1089 @@ -971,7 +971,7 @@ Exploring Convolutional and Recurrent Neural Networks in Sequential Labelling for Dialogue Topic Tracking SeokhwanKim - RafaelBanchs + RafaelBanchs HaizhouLi 963–973 P16-1091 @@ -989,7 +989,7 @@ A <fixed-case>CALL</fixed-case> System for Learning Preposition Usage - JohnLee + JohnLee DonaldSturgeon MengqiLuo 984–993 @@ -1004,7 +1004,7 @@ ChrisBrockett GeorgiosSpithourakis JianfengGao - BillDolan + BillDolan 994–1003 P16-1094 10.18653/v1/P16-1094 @@ -1047,7 +1047,7 @@ PengfeiLiu XipengQiu JifanChen - XuanjingHuang + XuanjingHuang 1034–1043 P16-1098 10.18653/v1/P16-1098 @@ -1057,7 +1057,7 @@ Understanding Discourse on Work and Job-Related Well-Being in Public Social Media TongLiu ChristopherHoman - CeciliaOvesdotter Alm + CeciliaOvesdotter Alm MeganLytle AnnMarie White HenryKautz @@ -1068,8 +1068,8 @@ Achieving Open Vocabulary Neural Machine Translation with Hybrid Word-Character Models - Minh-ThangLuong - Christopher D.Manning + Minh-ThangLuong + Christopher D.Manning 1054–1063 P16-1100 10.18653/v1/P16-1100 @@ -1078,7 +1078,7 @@ End-to-end Sequence Labeling via Bi-directional <fixed-case>LSTM</fixed-case>-<fixed-case>CNN</fixed-case>s-<fixed-case>CRF</fixed-case> XuezheMa - EduardHovy + EduardHovy 1064–1074 P16-1101 10.18653/v1/P16-1101 @@ -1100,8 +1100,8 @@ Synthesizing Compound Words for Machine Translation AustinMatthews EvaSchlinger - AlonLavie - ChrisDyer + AlonLavie + ChrisDyer 1085–1094 P16-1103 10.18653/v1/P16-1103 @@ -1114,7 +1114,7 @@ DipteshKanojia SeemaNagar KuntalDey - PushpakBhattacharyya + PushpakBhattacharyya 1095–1104 P16-1104 10.18653/v1/P16-1104 @@ -1140,7 +1140,7 @@ Context-aware Argumentative Relation Mining HuyNguyen - DianeLitman + DianeLitman 1127–1137 P16-1107 10.18653/v1/P16-1107 @@ -1180,8 +1180,8 @@ Predicting the Rise and Fall of Scientific Topics from Trends in their Rhetorical Framing VinodkumarPrabhakaran William L.Hamilton - DanMcFarland - DanJurafsky + DanMcFarland + DanJurafsky 1170–1180 P16-1111 10.18653/v1/P16-1111 @@ -1218,8 +1218,8 @@ Resolving References to Objects in Photographs using the Words-As-Classifiers Model DavidSchlangen - SinaZarrieß - CaseyKennington + SinaZarrieß + CaseyKennington 1213–1223 P16-1115 10.18653/v1/P16-1115 @@ -1229,9 +1229,9 @@ <fixed-case>RBPB</fixed-case>: Regularization-Based Pattern Balancing Method for Event Extraction LeiSha JingLiu - Chin-YewLin + Chin-YewLin SujianLi - BaobaoChang + BaobaoChang ZhifangSui 1224–1234 P16-1116 @@ -1270,7 +1270,7 @@ Bilingual Segmented Topic Model AkihiroTamura - EiichiroSumita + EiichiroSumita 1266–1276 P16-1120 10.18653/v1/P16-1120 @@ -1279,7 +1279,7 @@ Learning Semantically and Additively Compositional Distributional Representations RanTian - NaoakiOkazaki + NaoakiOkazaki KentaroInui 1277–1287 P16-1121 @@ -1313,7 +1313,7 @@ JingLiu YuanfeiLuo BinWang - Chin-YewLin + Chin-YewLin 1308–1318 P16-1124 10.18653/v1/P16-1124 @@ -1332,16 +1332,16 @@ The Creation and Analysis of a Website Privacy Policy Corpus ShomirWilson FlorianSchaub - Aswarth AbhilashDara + Aswarth AbhilashDara FrederickLiu SushainCherivirala PedroGiovanni Leon MadsSchaarup Andersen SebastianZimmeck - Kanthashree MysoreSathyendra + Kanthashree MysoreSathyendra N. CameronRussell Thomas B.Norton - EduardHovy + EduardHovy JoelReidenberg NormanSadeh 1330–1340 @@ -1362,7 +1362,7 @@ Learning Word Meta-Embeddings WenpengYin - HinrichSchütze + HinrichSchütze 1351–1360 P16-1128 10.18653/v1/P16-1128 @@ -1371,7 +1371,7 @@ Towards Constructing Sports News from Live Text Commentary JianminZhang - Jin-geYao + Jin-geYao XiaojunWan 1361–1371 P16-1129 @@ -1384,7 +1384,7 @@ A Continuous Space Rule Selection Model for Syntax-based Statistical Machine Translation JingyiZhang MasaoUtiyama - EiichroSumita + EiichroSumita GrahamNeubig SatoshiNakamura 1372–1381 @@ -1409,8 +1409,8 @@ YueZhang ShujianHuang JunshengZhou - Xin-YuDai - JiajunChen + Xin-YuDai + JiajunChen 1393–1402 P16-1132 10.18653/v1/P16-1132 @@ -1442,7 +1442,7 @@ Identifying Causal Relations Using Parallel <fixed-case>W</fixed-case>ikipedia Articles ChristopherHidey - KathyMcKeown + KathyMcKeown 1424–1433 P16-1135 10.18653/v1/P16-1135 @@ -1453,8 +1453,8 @@ Compositional Learning of Embeddings for Relation Paths in Knowledge Base and Text KristinaToutanova - Xi VictoriaLin - Wen-tauYih + Xi VictoriaLin + Wen-tauYih HoifungPoon ChrisQuirk 1434–1444 @@ -1485,11 +1485,11 @@ A Fast Unified Model for Parsing and Sentence Understanding - Samuel R.Bowman + Samuel R.Bowman JonGauthier AbhinavRastogi RaghavGupta - Christopher D.Manning + Christopher D.Manning ChristopherPotts 1466–1477 P16-1139 @@ -1500,7 +1500,7 @@ Investigating Language Universal and Specific Properties in Word Embeddings PengQian XipengQiu - XuanjingHuang + XuanjingHuang 1478–1488 P16-1140 10.18653/v1/P16-1140 @@ -1510,7 +1510,7 @@ Diachronic Word Embeddings Reveal Statistical Laws of Semantic Change William L.Hamilton JureLeskovec - DanJurafsky + DanJurafsky 1489–1501 P16-1141 10.18653/v1/P16-1141 @@ -1527,10 +1527,10 @@ <fixed-case>L</fixed-case>ex<fixed-case>S</fixed-case>em<fixed-case>T</fixed-case>m: A Semantic Dataset Based on All-words Unsupervised Sense Distribution Learning - AndrewBennett - TimothyBaldwin + AndrewBennett + TimothyBaldwin Jey HanLau - DianaMcCarthy + DianaMcCarthy FrancisBond 1513–1524 P16-1143 @@ -1540,14 +1540,14 @@ The <fixed-case>LAMBADA</fixed-case> dataset: Word prediction requiring a broad discourse context DenisPaperno - GermánKruszewski + GermánKruszewski AngelikiLazaridou - Ngoc QuanPham - RaffaellaBernardi + Ngoc QuanPham + RaffaellaBernardi SandroPezzelle - MarcoBaroni - GemmaBoleda - RaquelFernández + MarcoBaroni + GemmaBoleda + RaquelFernández 1525–1534 P16-1144 10.18653/v1/P16-1144 @@ -1571,7 +1571,7 @@ Optimizing Spectral Learning for Parsing ShashiNarayan - Shay B.Cohen + Shay B.Cohen 1546–1556 P16-1146 10.18653/v1/P16-1146 @@ -1627,7 +1627,7 @@ Learning Structured Predictors from Bandit Feedback for Interactive <fixed-case>NLP</fixed-case> - ArtemSokolov + ArtemSokolov JuliaKreutzer ChristopherLo StefanRiezler @@ -1644,7 +1644,7 @@ JianfengGao LihongLi LiDeng - MariOstendorf + MariOstendorf 1621–1630 P16-1153 10.18653/v1/P16-1153 @@ -1656,7 +1656,7 @@ JiataoGu ZhengdongLu HangLi - Victor O.K.Li + Victor O.K.Li 1631–1640 P16-1154 10.18653/v1/P16-1154 @@ -1675,8 +1675,8 @@ Morphological Smoothing and Extrapolation of Word Embeddings RyanCotterell - HinrichSchütze - JasonEisner + HinrichSchütze + JasonEisner 1651–1660 P16-1156 10.18653/v1/P16-1156 @@ -1687,7 +1687,7 @@ Cross-lingual Models of Word Embeddings: An Empirical Comparison ShyamUpadhyay ManaalFaruqui - ChrisDyer + ChrisDyer DanRoth 1661–1670 P16-1157 @@ -1698,8 +1698,8 @@ Take and Took, Gaggle and Goose, Book and Read: Evaluating the Utility of Vector Differences for Lexical Relation Learning EkaterinaVylomova LauraRimell - TrevorCohn - TimothyBaldwin + TrevorCohn + TimothyBaldwin 1671–1682 P16-1158 10.18653/v1/P16-1158 @@ -1732,8 +1732,8 @@ Target-Side Context for Discriminative Models in Statistical Machine Translation AlešTamchyna - AlexanderFraser - OndřejBojar + AlexanderFraser + OndřejBojar MarcinJunczys-Dowmunt 1704–1714 P16-1161 @@ -1757,7 +1757,7 @@ QiZhang PengfeiLiu XipengQiu - XuanjingHuang + XuanjingHuang 1726–1735 P16-1163 10.18653/v1/P16-1163 @@ -1767,7 +1767,7 @@ Model Architectures for Quotation Detection ChristianScheible RomanKlinger - SebastianPadó + SebastianPadó 1736–1745 P16-1164 10.18653/v1/P16-1164 @@ -1776,7 +1776,7 @@ Speech Act Modeling of Written Asynchronous Conversations with Task-Specific Embeddings and Conditional Structured Models - ShafiqJoty + ShafiqJoty EnamulHoque 1746–1756 P16-1165 @@ -1821,7 +1821,7 @@ YuntianDeng MrinmayaSachan ZhichengYan - EricXing + EricXing 1791–1801 P16-1169 10.18653/v1/P16-1169 @@ -1845,7 +1845,7 @@ QiaoziGao MalcolmDoering ShaohuaYang - JoyceChai + JoyceChai 1814–1824 P16-1171 10.18653/v1/P16-1171 @@ -1884,7 +1884,7 @@ AdithyaRenduchintala RebeccaKnowles PhilippKoehn - JasonEisner + JasonEisner 1859–1869 P16-1175 10.18653/v1/P16-1175 @@ -1907,7 +1907,7 @@ HadiAmiri PhilipResnik JordanBoyd-Graber - HalDaumé III + HalDaumé III 1882–1892 P16-1177 10.18653/v1/P16-1177 @@ -1927,8 +1927,8 @@ Alleviating Poor Context with Background Knowledge for Named Entity Disambiguation AnderBarrena - AitorSoroa - EnekoAgirre + AitorSoroa + EnekoAgirre 1903–1912 P16-1179 10.18653/v1/P16-1179 @@ -1938,7 +1938,7 @@ Mining Paraphrasal Typed Templates from a Plain Text Corpus OrBiran TerraBlevins - KathleenMcKeown + KathleenMcKeown 1913–1923 P16-1180 10.18653/v1/P16-1180 @@ -1947,7 +1947,7 @@ How to Train Dependency Parsers with Inexact Search for Joint Sentence Boundary Detection and Parsing of Entire Documents AndersBjörkelund - AgnieszkaFaleńska + AgnieszkaFaleńska WolfgangSeeker JonasKuhn 1924–1934 @@ -2010,7 +2010,7 @@ Predicting the Compositionality of Nominal Compounds: Giving Word Embeddings a Hard Time - SilvioCordeiro + SilvioCordeiro CarlosRamisch MarcoIdiart AlineVillavicencio @@ -2042,7 +2042,7 @@ Jointly Learning to Embed and Predict with Multiple Languages Daniel C.Ferreira - André F. T.Martins + André F. T.Martins Mariana S. C.Almeida 2019–2028 P16-1190 @@ -2072,8 +2072,8 @@ A Vector Space for Distributional Semantics for Entailment - JamesHenderson - DianaPopa + JamesHenderson + DianaPopa 2052–2062 P16-1193 10.18653/v1/P16-1193 @@ -2095,7 +2095,7 @@ SrinivasanIyer IoannisKonstas AlvinCheung - LukeZettlemoyer + LukeZettlemoyer 2073–2083 P16-1195 10.18653/v1/P16-1195 @@ -2138,7 +2138,7 @@ MingLiao WeiGao YulanHe - Kam-FaiWong + Kam-FaiWong 2114–2123 P16-1199 10.18653/v1/P16-1199 @@ -2213,7 +2213,7 @@ A New Psychometric-inspired Evaluation Metric for <fixed-case>C</fixed-case>hinese Word Segmentation PengQian XipengQiu - XuanjingHuang + XuanjingHuang 2185–2194 P16-1206 10.18653/v1/P16-1206 @@ -2251,7 +2251,7 @@ Domain Adaptation for Authorship Attribution: Improved Structural Correspondence Learning UpendraSapkota ThamarSolorio - ManuelMontes + ManuelMontes StevenBethard 2226–2235 P16-1210 @@ -2261,7 +2261,7 @@ A Corpus-Based Analysis of Canonical Word Order of <fixed-case>J</fixed-case>apanese Double Object Constructions RyoheiSasano - ManabuOkumura + ManabuOkumura 2236–2244 P16-1211 10.18653/v1/P16-1211 @@ -2284,8 +2284,8 @@ One for All: Towards Language Independent Named Entity Linking - AvirupSil - RaduFlorian + AvirupSil + RaduFlorian 2255–2264 P16-1213 10.18653/v1/P16-1213 @@ -2305,7 +2305,7 @@ Composing Distributed Representations of Relational Patterns ShoTakase - NaoakiOkazaki + NaoakiOkazaki KentaroInui 2276–2286 P16-1215 @@ -2334,7 +2334,7 @@ Graph-based Dependency Parsing with Bidirectional <fixed-case>LSTM</fixed-case> WenhuiWang - BaobaoChang + BaobaoChang 2306–2315 P16-1218 10.18653/v1/P16-1218 @@ -2386,7 +2386,7 @@ A Thorough Examination of the <fixed-case>CNN</fixed-case>/<fixed-case>D</fixed-case>aily <fixed-case>M</fixed-case>ail Reading Comprehension Task DanqiChen JasonBolton - Christopher D.Manning + Christopher D.Manning 2358–2367 P16-1223 10.18653/v1/P16-1223 @@ -2394,9 +2394,9 @@ Learning Language Games through Interaction - Sida I.Wang + Sida I.Wang PercyLiang - Christopher D.Manning + Christopher D.Manning 2368–2378 P16-1224 10.18653/v1/P16-1224 @@ -2405,7 +2405,7 @@ Finding Non-Arbitrary Form-Meaning Systematicity Using String-Metric Learning for Kernel Regression E. DarioGutiérrez - RogerLevy + RogerLevy BenjaminBergen 2379–2388 P16-1225 @@ -2437,8 +2437,8 @@ ZhitingHu XuezheMa ZhengzhongLiu - EduardHovy - EricXing + EduardHovy + EricXing 2410–2420 P16-1228 10.18653/v1/P16-1228 @@ -2457,13 +2457,13 @@ On-line Active Reward Learning for Policy Optimisation in Spoken Dialogue Systems Pei-HaoSu - MilicaGašić + MilicaGašić NikolaMrkšić - Lina M.Rojas-Barahona + Lina M.Rojas-Barahona StefanUltes DavidVandyke Tsung-HsienWen - SteveYoung + SteveYoung 2431–2441 P16-1230 10.18653/v1/P16-1230 @@ -2478,7 +2478,7 @@ AlessandroPresta KuzmanGanchev SlavPetrov - MichaelCollins + MichaelCollins 2442–2452 P16-1231 10.18653/v1/P16-1231 @@ -2490,7 +2490,7 @@ Proceedings of the 54th Annual Meeting of the Association for Computational Linguistics (Volume 2: Short Papers) P16-2 KatrinErk - Noah A.Smith + Noah A.Smith 10.18653/v1/P16-2 Association for Computational Linguistics
Berlin, Germany
@@ -2505,7 +2505,7 @@ Transition-based dependency parsing with topological fields Daniëlde Kok - ErhardHinrichs + ErhardHinrichs 1–7 P16-2001 10.18653/v1/P16-2001 @@ -2515,7 +2515,7 @@ Scalable Semi-Supervised Query Classification Using Matrix Sketching Young-BumKim KarlStratos - RuhiSarikaya + RuhiSarikaya 8–13 P16-2002 10.18653/v1/P16-2002 @@ -2571,7 +2571,7 @@ Sequence-to-Sequence Generation for Spoken Dialogue via Deep Syntax Trees and Strings OndřejDušek - FilipJurčíček + FilipJurčíček 45–51 P16-2008 10.18653/v1/P16-2008 @@ -2590,7 +2590,7 @@ Joint Word Segmentation and Phonetic Category Induction MichaElsner StephanieAntetomaso - NaomiFeldman + NaomiFeldman 59–65 P16-2010 10.18653/v1/P16-2010 @@ -2632,7 +2632,7 @@ Cross-lingual projection for class-based language models BeatGfeller VladSchogol - KeithHall + KeithHall 83–88 P16-2014 10.18653/v1/P16-2014 @@ -2663,8 +2663,8 @@ Semantic classifications for detection of verb metaphors BeataBeigman Klebanov - Chee WeeLeong - E. DarioGutierrez + Chee WeeLeong + E. DarioGutierrez EkaterinaShutova MichaelFlor 101–106 @@ -2675,7 +2675,7 @@ Recognizing Salient Entities in Shopping Queries ZornitsaKozareva - QiLi + QiLi KeZhai WeiweiGuo 107–111 @@ -2687,7 +2687,7 @@ Leveraging Lexical Resources for Learning Entity Embeddings in Multi-Relational Data TengLong RyanLowe - Jackie Chi KitCheung + Jackie Chi KitCheung DoinaPrecup 112–117 P16-2019 @@ -2709,7 +2709,7 @@ Vocabulary Manipulation for Neural Machine Translation HaitaoMi ZhiguoWang - AbeIttycheriah + AbeIttycheriah 124–129 P16-2021 10.18653/v1/P16-2021 @@ -2732,8 +2732,8 @@ Improving cross-domain n-gram language modelling with skipgrams LouisOnrust - Antalvan den Bosch - HugoVan hamme + Antalvan den Bosch + HugoVan hamme 137–142 P16-2023 10.18653/v1/P16-2023 @@ -2760,7 +2760,7 @@ How Naked is the Naked Truth? A Multilingual Lexicon of Nominal Compound Compositionality CarlosRamisch - SilvioCordeiro + SilvioCordeiro LeonardoZilio MarcoIdiart AlineVillavicencio @@ -2771,8 +2771,8 @@ An Open Web Platform for Rule-Based Speech-to-Sign Translation - MannyRayner - PierretteBouillon + MannyRayner + PierretteBouillon SarahEbling JohannaGerlach IreneStrasly @@ -2786,7 +2786,7 @@ Word Alignment without <fixed-case>NULL</fixed-case> Words PhilipSchulz WilkerAziz - KhalilSima’an + KhalilSima’an 169–174 P16-2028 10.18653/v1/P16-2028 @@ -2821,7 +2821,7 @@ IvanVulić DouweKiela StephenClark - Marie-FrancineMoens + Marie-FrancineMoens 188–194 P16-2031 10.18653/v1/P16-2031 @@ -2839,9 +2839,9 @@ The Value of Semantic Parse Labeling for Knowledge Base Question Answering - Wen-tauYih + Wen-tauYih MatthewRichardson - ChrisMeek + ChrisMeek Ming-WeiChang JinaSuh 201–206 @@ -2868,8 +2868,8 @@ The red one!: On learning to refer to things based on discriminative properties AngelikiLazaridou - Nghia ThePham - MarcoBaroni + Nghia ThePham + MarcoBaroni 213–218 P16-2035 10.18653/v1/P16-2035 @@ -2887,7 +2887,7 @@ Dimensional Sentiment Analysis Using a Regional <fixed-case>CNN</fixed-case>-<fixed-case>LSTM</fixed-case> Model JinWang - Liang-ChihYu + Liang-ChihYu K. RobertLai XuejieZhang 225–230 @@ -2897,7 +2897,7 @@ Deep multi-task learning with low level tasks supervised at lower layers - AndersSøgaard + AndersSøgaard YoavGoldberg 231–235 P16-2038 @@ -2918,8 +2918,8 @@ An Entity-Focused Approach to Generating Company Descriptions GavinSaldanha OrBiran - KathleenMcKeown - AlfioGliozzo + KathleenMcKeown + AlfioGliozzo 243–248 P16-2040 10.18653/v1/P16-2040 @@ -2939,7 +2939,7 @@ Automatic Semantic Classification of <fixed-case>G</fixed-case>erman Preposition Types: Comparing Hard and Soft Clustering Approaches across Features MaximilianKöper - SabineSchulte im Walde + SabineSchulte im Walde 256–263 P16-2042 10.18653/v1/P16-2042 @@ -2962,7 +2962,7 @@ ZhongZhou DylanFitzpatrick MichaelMuehl - WilliamCohen + WilliamCohen 269–274 P16-2044 10.18653/v1/P16-2044 @@ -2981,9 +2981,9 @@ A Neural Network based Approach to Automatic Post-Editing SantanuPal - Sudip KumarNaskar + Sudip KumarNaskar MihaelaVela - Josefvan Genabith + Josefvan Genabith 281–286 P16-2046 10.18653/v1/P16-2046 @@ -2992,7 +2992,7 @@ An Unsupervised Method for Automatic Translation Memory Cleaning MasoudJalili Sabet - MatteoNegri + MatteoNegri MarcoTurchi EduardBarbu 287–292 @@ -3004,7 +3004,7 @@ Exponentially Decaying Bag-of-Words Input Features for Feed-Forward Neural Network in Statistical Machine Translation Jan-ThorstenPeter WeiyueWang - HermannNey + HermannNey 293–298 P16-2048 10.18653/v1/P16-2048 @@ -3027,7 +3027,7 @@ ElsLefever IljaCroijmans AsifaMajid - Antalvan den Bosch + Antalvan den Bosch 306–312 P16-2050 10.18653/v1/P16-2050 @@ -3036,8 +3036,8 @@ Exploring Stylistic Variation with Age and Income on <fixed-case>T</fixed-case>witter LucieFlekova - DanielPreoţiuc-Pietro - LyleUngar + DanielPreoţiuc-Pietro + LyleUngar 313–319 P16-2051 10.18653/v1/P16-2051 @@ -3047,7 +3047,7 @@ Finding Optimists and Pessimists on <fixed-case>T</fixed-case>witter XianzhiRuan StevenWilson - RadaMihalcea + RadaMihalcea 320–325 P16-2052 10.18653/v1/P16-2052 @@ -3075,7 +3075,7 @@ Text Simplification as Tree Labeling JoachimBingel - AndersSøgaard + AndersSøgaard 337–343 P16-2055 10.18653/v1/P16-2055 @@ -3085,7 +3085,7 @@ Bootstrapped Text-level Named Entity Recognition for Literature JulianBrooke AdamHammond - TimothyBaldwin + TimothyBaldwin 344–350 P16-2056 10.18653/v1/P16-2056 @@ -3101,8 +3101,8 @@ Character-based Neural Machine Translation - Marta R.Costa-jussà - José A. R.Fonollosa + Marta R.Costa-jussà + José A. R.Fonollosa 357–361 P16-2058 10.18653/v1/P16-2058 @@ -3141,7 +3141,7 @@ A Latent Concept Topic Model for Robust Topic Inference Using Word Embeddings WeihuaHu - Jun’ichiTsujii + Jun’ichiTsujii 380–386 P16-2062 10.18653/v1/P16-2062 @@ -3162,9 +3162,9 @@ MichalLukasik P. K.Srijith DuyVu - KalinaBontcheva + KalinaBontcheva ArkaitzZubiaga - TrevorCohn + TrevorCohn 393–398 P16-2064 10.18653/v1/P16-2064 @@ -3173,7 +3173,7 @@ Hunting for Troll Comments in News Community Forums TodorMihaylov - PreslavNakov + PreslavNakov 399–405 P16-2065 10.18653/v1/P16-2065 @@ -3191,8 +3191,8 @@ Multilingual Part-of-Speech Tagging with Bidirectional Long Short-Term Memory Models and Auxiliary Loss - BarbaraPlank - AndersSøgaard + BarbaraPlank + AndersSøgaard YoavGoldberg 412–418 P16-2067 @@ -3213,7 +3213,7 @@ One model, two languages: training bilingual parsers with harmonized treebanks DavidVilares CarlosGómez-Rodríguez - Miguel A.Alonso + Miguel A.Alonso 425–431 P16-2069 10.18653/v1/P16-2069 @@ -3258,7 +3258,7 @@ Integrating Distributional Lexical Contrast into Word Embeddings for Antonym-Synonym Distinction Kim AnhNguyen - SabineSchulte im Walde + SabineSchulte im Walde Ngoc ThangVu 454–459 P16-2074 @@ -3267,9 +3267,9 @@ Machine Translation Evaluation Meets Community Question Answering - FranciscoGuzmán - LluísMàrquez - PreslavNakov + FranciscoGuzmán + LluísMàrquez + PreslavNakov 460–466 P16-2075 10.18653/v1/P16-2075 @@ -3279,7 +3279,7 @@ Science Question Answering using Instructional Materials MrinmayaSachan KumarDubey - EricXing + EricXing 467–473 P16-2076 10.18653/v1/P16-2076 @@ -3310,7 +3310,7 @@ Machine Comprehension using Rich Semantic Representations MrinmayaSachan - EricXing + EricXing 486–492 P16-2079 10.18653/v1/P16-2079 @@ -3330,7 +3330,7 @@ Semantics-Driven Recognition of Collocations Using Word Embeddings SaraRodríguez-Fernández - LuisEspinosa-Anke + LuisEspinosa-Anke RobertoCarlini LeoWanner 499–505 @@ -3350,7 +3350,7 @@ Word Embedding Calculus in Meaningful Ultradense Subspaces SaschaRothe - HinrichSchütze + HinrichSchütze 512–517 P16-2083 10.18653/v1/P16-2083 @@ -3379,7 +3379,7 @@ FrancesYung KevinDuh TakuKomura - YujiMatsumoto + YujiMatsumoto 531–536 P16-2086 10.18653/v1/P16-2086 @@ -3418,8 +3418,8 @@ Single-Model Encoder-Decoder with Explicit Morphological Representation for Reinflection - KatharinaKann - HinrichSchütze + KatharinaKann + HinrichSchütze 555–560 P16-2090 10.18653/v1/P16-2090 @@ -3427,9 +3427,9 @@ Joint part-of-speech and dependency projection from multiple sources - AndersJohannsen - ŽeljkoAgić - AndersSøgaard + AndersJohannsen + ŽeljkoAgić + AndersSøgaard 561–566 P16-2091 10.18653/v1/P16-2091 @@ -3458,7 +3458,7 @@ MariaBarrett JoachimBingel FrankKeller - AndersSøgaard + AndersSøgaard 579–584 P16-2094 10.18653/v1/P16-2094 @@ -3477,7 +3477,7 @@ The Social Impact of Natural Language Processing DirkHovy - Shannon L.Spruit + Shannon L.Spruit 591–598 P16-2096 10.18653/v1/P16-2096 @@ -3566,8 +3566,8 @@ Significance of an Accurate Sandhi-Splitter in Shallow Parsing of <fixed-case>D</fixed-case>ravidian Languages - DevadathV V - Dipti MisraSharma + DevadathV V + Dipti MisraSharma 37–42 P16-3006 10.18653/v1/P16-3006 @@ -3577,7 +3577,7 @@ Improving Topic Model Clustering of Newspaper Comments for Summarisation ClareLlewellyn ClaireGrover - JonOberlander + JonOberlander 43–50 P16-3007 10.18653/v1/P16-3007 @@ -3594,7 +3594,7 @@ Robust Co-occurrence Quantification for Lexical Distributional Semantics DmitrijsMilajevs - MehrnooshSadrzadeh + MehrnooshSadrzadeh MatthewPurver 58–64 P16-3009 @@ -3628,7 +3628,7 @@ Improving Dependency Parsing Using Sentence Clause Charts VincentKríž - BarboraHladká + BarboraHladká 86–92 P16-3013 10.18653/v1/P16-3013 @@ -3690,7 +3690,7 @@ <fixed-case>QA</fixed-case>-It: Classifying Non-Referential It for Question Answer Pairs TimothyLee AlexLutz - Jinho D.Choi + Jinho D.Choi 132–137 P16-3020 10.18653/v1/P16-3020 @@ -3699,7 +3699,7 @@ Building a Corpus for <fixed-case>J</fixed-case>apanese Wikification with Fine-Grained Entity Classes DavaajavJargalsaikhan - NaoakiOkazaki + NaoakiOkazaki KojiMatsuda KentaroInui 138–144 @@ -3722,7 +3722,7 @@ Proceedings of ACL-2016 System Demonstrations P16-4 - SameerPradhan + SameerPradhan MariannaApidianaki 10.18653/v1/P16-4 Association for Computational Linguistics @@ -3748,7 +3748,7 @@ Online Information Retrieval for Language Learning MariaChinkina MadeeswaranKannan - DetmarMeurers + DetmarMeurers 7–12 P16-4002 10.18653/v1/P16-4002 @@ -3757,7 +3757,7 @@ Terminology Extraction with Term Variant Detection DamienCram - BéatriceDaille + BéatriceDaille 13–18 P16-4003 10.18653/v1/P16-4003 @@ -3790,7 +3790,7 @@ A Web-framework for <fixed-case>ODIN</fixed-case> Annotation RyanGeorgi - Michael WayneGoodman + Michael WayneGoodman FeiXia 31–36 P16-4006 @@ -3818,9 +3818,9 @@ <fixed-case>T</fixed-case>ransc<fixed-case>R</fixed-case>ater: a Tool for Automatic Speech Recognition Quality Estimation ShahabJalalvand - MatteoNegri + MatteoNegri MarcoTurchi - José G.C. de Souza + José G.C. de Souza DanieleFalavigna Mohammed R. H.Qwaider 43–48 @@ -3831,9 +3831,9 @@ <fixed-case>TM</fixed-case>op: a Tool for Unsupervised Translation Memory Cleaning MasoudJalili Sabet - MatteoNegri + MatteoNegri MarcoTurchi - José G.C. de Souza + José G.C. de Souza MarcelloFederico 49–54 P16-4009 @@ -3861,7 +3861,7 @@ <fixed-case>O</fixed-case>pen<fixed-case>D</fixed-case>ial: A Toolkit for Developing Spoken Dialogue Systems with Probabilistic Rules PierreLison - CaseyKennington + CaseyKennington 67–72 P16-4012 10.18653/v1/P16-4012 @@ -3905,7 +3905,7 @@ <fixed-case>M</fixed-case>e<fixed-case>TA</fixed-case>: A Unified Toolkit for Text Retrieval and Analysis SeanMassung ChaseGeigle - ChengXiangZhai + ChengXiangZhai 91–96 P16-4016 10.18653/v1/P16-4016 @@ -3943,7 +3943,7 @@ Personalized Exercises for Preposition Learning - JohnLee + JohnLee MengqiLuo 115–120 P16-4020 @@ -3953,8 +3953,8 @@ My Science <fixed-case>T</fixed-case>utor—<fixed-case>L</fixed-case>earning Science with a Conversational Virtual Tutor SameerPradhan - RonCole - WayneWard + RonCole + WayneWard 121–126 P16-4021 10.18653/v1/P16-4021 @@ -3963,8 +3963,8 @@ pigeo: A Python Geotagging Tool AfshinRahimi - TrevorCohn - TimothyBaldwin + TrevorCohn + TimothyBaldwin 127–132 P16-4022 10.18653/v1/P16-4022 @@ -3975,7 +3975,7 @@ AdithyaRenduchintala RebeccaKnowles PhilippKoehn - JasonEisner + JasonEisner 133–138 P16-4023 10.18653/v1/P16-4023 @@ -3983,7 +3983,7 @@ <fixed-case>R</fixed-case>oleo: Visualising Thematic Fit Spaces on the Web - AsadSayeed + AsadSayeed XudongHong VeraDemberg 139–144 @@ -4012,7 +4012,7 @@ <fixed-case>L</fixed-case>i<fixed-case>M</fixed-case>o<fixed-case>SIN</fixed-case>e Pipeline: Multilingual <fixed-case>UIMA</fixed-case>-based <fixed-case>NLP</fixed-case> Platform OlgaUryupina - BarbaraPlank + BarbaraPlank GianniBarlacchi Francisco J.Valverde Albacete ManosTsagkias @@ -4033,7 +4033,7 @@ AlexanderPanchenko FranziskaLehmann UliFahrer - ChrisBiemann + ChrisBiemann KathrinBallweg 163–168 P16-4028 @@ -4067,7 +4067,7 @@ All material associated to the tutorial will be available at http://multimo <fixed-case>NLP</fixed-case> Approaches to Computational Argumentation NoamSlonim IrynaGurevych - ChrisReed + ChrisReed BennoStein Argumentation and debating represent primary intellectual activities of the human mind. People in all societies argue and debate, not only to convince others of their own opinions but also in order to explore the differences between multiple perspectives and conceptualizations, and to learn from this exploration. The process of reaching a resolution on controversial topics typically does not follow a simple sequence of purely logical steps. Rather it involves a wide variety of complex and interwoven actions. Presumably, pros and cons are identified, considered, and weighed, via cognitive processes that often involve persuasion and emotions, which are inherently harder to formalize from a computational perspective. @@ -4101,7 +4101,7 @@ For each of these, the state of the art and open challenges are presented. The t Semantic Representations of Word Senses and Concepts - JoséCamacho-Collados + JoséCamacho-Collados IgnacioIacobacci RobertoNavigli MohammadTaher Pilehvar @@ -4112,9 +4112,9 @@ This tutorial will first provide a brief overview of the recent literature conce Neural Machine Translation - ThangLuong + ThangLuong KyunghyunCho - Christopher D.Manning + Christopher D.Manning Neural Machine Translation (NMT) is a simple new architecture for getting machines to learn to translate. Despite being relatively new (Kalchbrenner and Blunsom, 2013; Cho et al., 2014; Sutskever et al., 2014), NMT has already shown promising results, achieving state-of-the-art performances for various language pairs (Luong et al, 2015a; Jean et al, 2015; Luong et al, 2015b; Sennrich et al., 2016; Luong and Manning, 2016). While many of these NMT papers were presented to the ACL community, research and practice of NMT are only at their beginning stage. This tutorial would be a great opportunity for the whole community of machine translation and natural language processing to learn more about a very promising new approach to MT. This tutorial has four parts. In the first part, we start with an overview of MT approaches, including: (a) traditional methods that have been dominant over the past twenty years and (b) recent hybrid models with the use of neural network components. From these, we motivate why an end-to-end approach like neural machine translation is needed. The second part introduces a basic instance of NMT. We start out with a discussion of recurrent neural networks, including the back-propagation-through-time algorithm and stochastic gradient descent optimizers, as these are the foundation on which NMT builds. We then describe in detail the basic sequence-to-sequence architecture of NMT (Cho et al., 2014; Sutskever et al., 2014), the maximum likelihood training approach, and a simple beam-search decoder to produce translations. @@ -4148,7 +4148,7 @@ The goal of this tutorial is to offer an introduction to the basic concepts of g <fixed-case>M</fixed-case>eta<fixed-case>N</fixed-case>et: Repository, Identification System, and Applications - Miriam R LPetruck + Miriam R LPetruck Ellen KDodge The ubiquity of metaphor in language (Lakoff and Johnson 1980) has served as impetus for cognitive linguistic approaches to the study of language, mind, and the study of mind (e.g. Thibodeau & Boroditsky 2011). While native speakers use metaphor naturally and easily, the treatment and interpretation of metaphor in computational systems remains challenging because such systems have not succeeded in developing ways to recognize the semantic elements that define metaphor. This tutorial demonstrates MetaNet's frame-based semantic analyses, and their informing of MetaNet's automatic metaphor identification system. Participants will gain a complete understanding of the theoretical basis and the practical workings of MetaNet, and acquire relevant information about the Frame Semantics basis of that knowledge base and the way that FrameNet handles the widespread phenomenon of metaphor in language. The tutorial is geared to researchers and practitioners of language technology, not necessarily experts in metaphor analysis or knowledgeable about either FrameNet or MetaNet, but who are interested in natural language processing tasks that involve automatic metaphor processing, or could benefit from exposure to tools and resources that support frame-based deep semantic, analyses of language, including metaphor as a widespread phenomenon in human language. petruck-dodge-2016-metanet diff --git a/data/xml/P17.xml b/data/xml/P17.xml index 7975ee129d..38366e7331 100644 --- a/data/xml/P17.xml +++ b/data/xml/P17.xml @@ -21,7 +21,7 @@ Adversarial Multi-task Learning for Text Classification PengfeiLiu XipengQiu - XuanjingHuang + XuanjingHuang 1–10 P17-1001 10.18653/v1/P17-1001 @@ -46,8 +46,8 @@ Neural Symbolic Machines: Learning Semantic Parsers on <fixed-case>F</fixed-case>reebase with Weak Supervision ChenLiang JonathanBerant - QuocLe - Kenneth D.Forbus + QuocLe + Kenneth D.Forbus NiLao 23–33 P17-1003 @@ -88,7 +88,7 @@ NikolaMrkšić RoiReichart DiarmuidÓ Séaghdha - SteveYoung + SteveYoung AnnaKorhonen 56–68 P17-1006 @@ -139,7 +139,7 @@ two word-vectors results in a vector that is only a small angle away from the ve TingLiu YimingCui QingyuYin - Wei-NanZhang + Wei-NanZhang ShijinWang GuopingHu 102–111 @@ -196,7 +196,7 @@ two word-vectors results in a vector that is only a small angle away from the ve SrinivasanIyer MarkYatskar YejinChoi - LukeZettlemoyer + LukeZettlemoyer 146–157 P17-1014 P17-1014.Presentation.pdf @@ -209,8 +209,8 @@ two word-vectors results in a vector that is only a small angle away from the ve Program Induction by Rationale Generation: Learning to Solve and Explain Algebraic Word Problems WangLing DaniYogatama - ChrisDyer - PhilBlunsom + ChrisDyer + PhilBlunsom 158–167 P17-1015 10.18653/v1/P17-1015 @@ -248,7 +248,7 @@ two word-vectors results in a vector that is only a small angle away from the ve WenhuiWang NanYang FuruWei - BaobaoChang + BaobaoChang MingZhou 189–198 P17-1018 @@ -312,7 +312,7 @@ two word-vectors results in a vector that is only a small angle away from the ve Obtaining referential word meanings from visual and distributional information: Experiments on object naming - SinaZarrieß + SinaZarrieß DavidSchlangen 243–254 P17-1023 @@ -326,10 +326,10 @@ two word-vectors results in a vector that is only a small angle away from the ve RaviShekhar SandroPezzelle YauhenKlimovich - AurélieHerbelot + AurélieHerbelot MoinNabi EnverSangineto - RaffaellaBernardi + RaffaellaBernardi 255–265 P17-1024 10.18653/v1/P17-1024 @@ -352,7 +352,7 @@ two word-vectors results in a vector that is only a small angle away from the ve <fixed-case>A</fixed-case>* <fixed-case>CCG</fixed-case> Parsing with a Supertag and Dependency Factored Model MasashiYoshikawa HiroshiNoji - YujiMatsumoto + YujiMatsumoto 277–287 P17-1026 10.18653/v1/P17-1026 @@ -374,7 +374,7 @@ two word-vectors results in a vector that is only a small angle away from the ve Aggregating and Predicting Sequence Labels from Crowd Annotations An ThanhNguyen - ByronWallace + ByronWallace Junyi JessyLi AniNenkova MatthewLease @@ -416,7 +416,7 @@ two word-vectors results in a vector that is only a small angle away from the ve Learning attention for historical text normalization by learning to pronounce MarcelBollmann JoachimBingel - AndersSøgaard + AndersSøgaard 332–344 P17-1031 10.18653/v1/P17-1031 @@ -430,7 +430,7 @@ two word-vectors results in a vector that is only a small angle away from the ve DaniloCroce SimoneFilice GiuseppeCastellucci - RobertoBasili + RobertoBasili 345–354 P17-1032 10.18653/v1/P17-1032 @@ -441,8 +441,8 @@ two word-vectors results in a vector that is only a small angle away from the ve Topically Driven Neural Language Model Jey HanLau - TimothyBaldwin - TrevorCohn + TimothyBaldwin + TrevorCohn 355–365 P17-1033 10.18653/v1/P17-1033 @@ -467,7 +467,7 @@ two word-vectors results in a vector that is only a small angle away from the ve Learning Cognitive Features from Gaze Data for Sentiment and Sarcasm Classification using Convolutional Neural Network AbhijitMishra KuntalDey - PushpakBhattacharyya + PushpakBhattacharyya 377–387 P17-1035 10.18653/v1/P17-1035 @@ -492,7 +492,7 @@ two word-vectors results in a vector that is only a small angle away from the ve Other Topics You May Also Agree or Disagree: Modeling Inter-Topic Preferences using Tweets and Matrix Factorization AkiraSasaki KazuakiHanawa - NaoakiOkazaki + NaoakiOkazaki KentaroInui 398–408 P17-1037 @@ -555,8 +555,8 @@ two word-vectors results in a vector that is only a small angle away from the ve Learning bilingual word embeddings with (almost) no bilingual data MikelArtetxe - GorkaLabaka - EnekoAgirre + GorkaLabaka + EnekoAgirre 451–462 P17-1042 P17-1042.Presentation.pdf @@ -568,7 +568,7 @@ two word-vectors results in a vector that is only a small angle away from the ve <fixed-case>A</fixed-case>bstract <fixed-case>M</fixed-case>eaning <fixed-case>R</fixed-case>epresentation Parsing using <fixed-case>LSTM</fixed-case> Recurrent Neural Networks WilliamFoland - James H.Martin + James H.Martin 463–472 P17-1043 10.18653/v1/P17-1043 @@ -581,7 +581,7 @@ two word-vectors results in a vector that is only a small angle away from the ve LuhengHe KentonLee MikeLewis - LukeZettlemoyer + LukeZettlemoyer 473–483 P17-1044 10.18653/v1/P17-1044 @@ -596,7 +596,7 @@ two word-vectors results in a vector that is only a small angle away from the ve XiujunLi JianfengGao Yun-NungChen - FaisalAhmed + FaisalAhmed LiDeng 484–495 P17-1045 @@ -622,7 +622,7 @@ two word-vectors results in a vector that is only a small angle away from the ve Learning Word-Like Units from Joint Audio-Visual Analysis DavidHarwath - JamesGlass + JamesGlass 506–517 P17-1047 10.18653/v1/P17-1047 @@ -634,7 +634,7 @@ two word-vectors results in a vector that is only a small angle away from the ve Joint <fixed-case>CTC</fixed-case>/attention decoding for end-to-end speech recognition TakaakiHori ShinjiWatanabe - JohnHershey + JohnHershey 518–529 P17-1048 10.18653/v1/P17-1048 @@ -684,7 +684,7 @@ two word-vectors results in a vector that is only a small angle away from the ve Deep Pyramid Convolutional Neural Networks for Text Categorization - RieJohnson + RieJohnson TongZhang 562–570 P17-1052 @@ -698,7 +698,7 @@ two word-vectors results in a vector that is only a small angle away from the ve MoYu WenpengYin Kazi SaidulHasan - Cicerodos Santos + Cicerodos Santos BingXiang BowenZhou 571–581 @@ -752,7 +752,7 @@ two word-vectors results in a vector that is only a small angle away from the ve GabrielDoyle AmirGoldberg SameerSrivastava - MichaelFrank + MichaelFrank 603–612 P17-1056 10.18653/v1/P17-1056 @@ -762,7 +762,7 @@ two word-vectors results in a vector that is only a small angle away from the ve Representations of language in a model of visually grounded speech signal - GrzegorzChrupała + GrzegorzChrupała LiekeGelderloos AfraAlishahi 613–622 @@ -826,9 +826,9 @@ two word-vectors results in a vector that is only a small angle away from the ve Hybrid Code Networks: practical and efficient end-to-end dialog control with supervised and reinforcement learning - Jason D.Williams - KavoshAsadi - GeoffreyZweig + Jason D.Williams + KavoshAsadi + GeoffreyZweig 665–677 P17-1062 10.18653/v1/P17-1062 @@ -851,12 +851,12 @@ two word-vectors results in a vector that is only a small angle away from the ve Modeling Source Syntax for Neural Machine Translation - JunhuiLi - DeyiXiong + JunhuiLi + DeyiXiong ZhaopengTu MuhuaZhu MinZhang - GuodongZhou + GuodongZhou 688–697 P17-1064 10.18653/v1/P17-1064 @@ -882,7 +882,7 @@ two word-vectors results in a vector that is only a small angle away from the ve Detect Rumors in Microblog Posts Using Propagation Structure via Kernel Learning JingMa WeiGao - Kam-FaiWong + Kam-FaiWong 708–717 P17-1066 P17-1066.Presentation.pdf @@ -894,7 +894,7 @@ two word-vectors results in a vector that is only a small angle away from the ve <fixed-case>E</fixed-case>mo<fixed-case>N</fixed-case>et: Fine-Grained Emotion Detection with Gated Recurrent Neural Networks MuhammadAbdul-Mageed - LyleUngar + LyleUngar 718–728 P17-1067 10.18653/v1/P17-1067 @@ -904,10 +904,10 @@ two word-vectors results in a vector that is only a small angle away from the ve Beyond Binary Labels: Political Ideology Prediction of <fixed-case>T</fixed-case>witter Users - DanielPreoţiuc-Pietro + DanielPreoţiuc-Pietro YeLiu DanielHopkins - LyleUngar + LyleUngar 729–740 P17-1068 P17-1068.Presentation.pdf @@ -918,7 +918,7 @@ two word-vectors results in a vector that is only a small angle away from the ve Leveraging Behavioral and Social Information for Weakly Supervised Collective Classification of Political Discourse on <fixed-case>T</fixed-case>witter - KristenJohnson + KristenJohnson DiJin DanGoldwasser 741–752 @@ -959,7 +959,7 @@ two word-vectors results in a vector that is only a small angle away from the ve Friendships, Rivalries, and Trysts: Characterizing Relations between Ideas in Texts ChenhaoTan DallasCard - Noah A.Smith + Noah A.Smith 773–783 P17-1072 10.18653/v1/P17-1072 @@ -984,7 +984,7 @@ two word-vectors results in a vector that is only a small angle away from the ve Automatic Annotation and Evaluation of Error Types for Grammatical Error Correction ChristopherBryant MarianoFelice - TedBriscoe + TedBriscoe 793–805 P17-1074 10.18653/v1/P17-1074 @@ -997,7 +997,7 @@ two word-vectors results in a vector that is only a small angle away from the ve SakuSugawara YusukeKido HikaruYokono - AkikoAizawa + AkikoAizawa 806–817 P17-1075 10.18653/v1/P17-1075 @@ -1019,7 +1019,7 @@ two word-vectors results in a vector that is only a small angle away from the ve Semantic Dependency Parsing via Book Embedding - WeiweiSun + WeiweiSun JunjieCao XiaojunWan 828–838 @@ -1063,7 +1063,7 @@ two word-vectors results in a vector that is only a small angle away from the ve NadirDurrani FahimDalvi HassanSajjad - JamesGlass + JamesGlass 861–872 P17-1080 10.18653/v1/P17-1080 @@ -1090,7 +1090,7 @@ two word-vectors results in a vector that is only a small angle away from the ve A Multidimensional Lexicon for Interpersonal Stancetaking UmashanthiPavalanathan JimFitzpatrick - ScottKiesling + ScottKiesling JacobEisenstein 884–895 P17-1082 @@ -1115,7 +1115,7 @@ two word-vectors results in a vector that is only a small angle away from the ve Apples to Apples: Learning Semantics of Common Entities Through a Novel Comprehension Task OmidBakhshandeh - JamesAllen + JamesAllen 906–916 P17-1084 10.18653/v1/P17-1084 @@ -1126,7 +1126,7 @@ two word-vectors results in a vector that is only a small angle away from the ve Going out on a limb: Joint Extraction of Entity Mentions and Relations without Dependency Trees ArzooKatiyar - ClaireCardie + ClaireCardie 917–928 P17-1085 10.18653/v1/P17-1085 @@ -1136,10 +1136,10 @@ two word-vectors results in a vector that is only a small angle away from the ve Naturalizing a Programming Language via Interactive Learning - Sida I.Wang + Sida I.Wang SamuelGinn PercyLiang - Christopher D.Manning + Christopher D.Manning 929–938 P17-1086 10.18653/v1/P17-1086 @@ -1151,8 +1151,8 @@ two word-vectors results in a vector that is only a small angle away from the ve Semantic Word Clusters Using Signed Spectral Clustering JoãoSedoc JeanGallier - DeanFoster - LyleUngar + DeanFoster + LyleUngar 939–949 P17-1087 10.18653/v1/P17-1087 @@ -1174,7 +1174,7 @@ two word-vectors results in a vector that is only a small angle away from the ve QizheXie XuezheMa ZihangDai - EduardHovy + EduardHovy 950–962 P17-1088 10.18653/v1/P17-1088 @@ -1187,7 +1187,7 @@ two word-vectors results in a vector that is only a small angle away from the ve IoannisKonstas AlvinCheung JayantKrishnamurthy - LukeZettlemoyer + LukeZettlemoyer 963–973 P17-1089 10.18653/v1/P17-1089 @@ -1212,7 +1212,7 @@ two word-vectors results in a vector that is only a small angle away from the ve Argument Mining with Structured <fixed-case>SVM</fixed-case>s and <fixed-case>RNN</fixed-case>s VladNiculae JoonsukPark - ClaireCardie + ClaireCardie 985–995 P17-1091 10.18653/v1/P17-1091 @@ -1223,7 +1223,7 @@ two word-vectors results in a vector that is only a small angle away from the ve Neural Discourse Structure for Text Categorization YangfengJi - Noah A.Smith + Noah A.Smith 996–1005 P17-1092 10.18653/v1/P17-1092 @@ -1238,7 +1238,7 @@ two word-vectors results in a vector that is only a small angle away from the ve ZhisongZhang HaiZhao ZhitingHu - EricXing + EricXing 1006–1017 P17-1093 10.18653/v1/P17-1093 @@ -1262,7 +1262,7 @@ two word-vectors results in a vector that is only a small angle away from the ve NicholasAndrews MarkDredze BenjaminVan Durme - JasonEisner + JasonEisner 1029–1039 P17-1095 10.18653/v1/P17-1095 @@ -1276,7 +1276,7 @@ two word-vectors results in a vector that is only a small angle away from the ve ZhilinYang JunjieHu RuslanSalakhutdinov - WilliamCohen + WilliamCohen 1040–1050 P17-1096 10.18653/v1/P17-1096 @@ -1316,9 +1316,9 @@ two word-vectors results in a vector that is only a small angle away from the ve Diversity driven attention model for query-based abstractive summarization PrekshaNema - Mitesh M.Khapra + Mitesh M.Khapra AnirbanLaha - BalaramanRavindran + BalaramanRavindran 1063–1072 P17-1098 10.18653/v1/P17-1098 @@ -1329,8 +1329,8 @@ two word-vectors results in a vector that is only a small angle away from the ve Get To The Point: Summarization with Pointer-Generator Networks AbigailSee - Peter J.Liu - Christopher D.Manning + Peter J.Liu + Christopher D.Manning 1073–1083 P17-1099 10.18653/v1/P17-1099 @@ -1378,7 +1378,7 @@ two word-vectors results in a vector that is only a small angle away from the ve Towards an Automatic <fixed-case>T</fixed-case>uring Test: Learning to Evaluate Dialogue Responses RyanLowe MichaelNoseworthy - Iulian VladSerban + Iulian VladSerban NicolasAngelard-Gontier YoshuaBengio JoellePineau @@ -1455,7 +1455,7 @@ two word-vectors results in a vector that is only a small angle away from the ve Probabilistic Typology: Deep Generative Models of Vowel Inventories RyanCotterell - JasonEisner + JasonEisner 1182–1192 P17-1109 10.18653/v1/P17-1109 @@ -1468,7 +1468,7 @@ two word-vectors results in a vector that is only a small angle away from the ve XinchiChen ZhanShi XipengQiu - XuanjingHuang + XuanjingHuang 1193–1203 P17-1110 10.18653/v1/P17-1110 @@ -1491,7 +1491,7 @@ two word-vectors results in a vector that is only a small angle away from the ve Robust Incremental Neural Semantic Graph Parsing JanBuys - PhilBlunsom + PhilBlunsom 1215–1226 P17-1112 10.18653/v1/P17-1112 @@ -1515,7 +1515,7 @@ two word-vectors results in a vector that is only a small angle away from the ve A Local Detection Approach for Named Entity Recognition and Mention Detection - MingbinXu + MingbinXu HuiJiang SedtawutWatcharawittayakul 1237–1247 @@ -1569,10 +1569,10 @@ two word-vectors results in a vector that is only a small angle away from the ve Enriching Complex Networks with Word Embeddings for Detecting Mild Cognitive Impairment from Speech Transcripts LeandroSantos Edilson AnselmoCorrêa Júnior - OsvaldoOliveira Jr - DiegoAmancio + OsvaldoOliveira Jr + DiegoAmancio LetíciaMansur - SandraAluísio + SandraAluísio 1284–1296 P17-1118 10.18653/v1/P17-1118 @@ -1603,7 +1603,7 @@ two word-vectors results in a vector that is only a small angle away from the ve A Neural Local Coherence Model DatTien Nguyen - ShafiqJoty + ShafiqJoty 1320–1330 P17-1121 10.18653/v1/P17-1121 @@ -1614,7 +1614,7 @@ two word-vectors results in a vector that is only a small angle away from the ve Data-Driven Broad-Coverage Grammars for Opinionated Natural Language Generation (<fixed-case>ONLG</fixed-case>) TomerCagan - Stefan L.Frank + Stefan L.Frank ReutTsarfaty 1331–1341 P17-1122 @@ -1627,7 +1627,7 @@ two word-vectors results in a vector that is only a small angle away from the ve Learning to Ask: Neural Question Generation for Reading Comprehension XinyaDu JunruShao - ClaireCardie + ClaireCardie 1342–1352 P17-1123 10.18653/v1/P17-1123 @@ -1708,7 +1708,7 @@ two word-vectors results in a vector that is only a small angle away from the ve A Constituent-Centric Neural Architecture for Reading Comprehension PengtaoXie - EricXing + EricXing 1405–1414 P17-1129 10.18653/v1/P17-1129 @@ -1728,7 +1728,7 @@ two word-vectors results in a vector that is only a small angle away from the ve Understanding and Predicting Empathic Behavior in Counseling Therapy VerónicaPérez-Rosas - RadaMihalcea + RadaMihalcea KennethResnicow SatinderSingh LawrenceAn @@ -1741,7 +1741,7 @@ two word-vectors results in a vector that is only a small angle away from the ve Leveraging Knowledge Bases in <fixed-case>LSTM</fixed-case>s for Improving Machine Reading BishanYang - TomMitchell + TomMitchell 1436–1446 P17-1132 10.18653/v1/P17-1132 @@ -1762,7 +1762,7 @@ two word-vectors results in a vector that is only a small angle away from the ve Unsupervised Text Segmentation Based on Native Language Characteristics - ShervinMalmasi + ShervinMalmasi MarkDras MarkJohnson LanDu @@ -1776,8 +1776,8 @@ two word-vectors results in a vector that is only a small angle away from the ve Weakly Supervised Cross-Lingual Named Entity Recognition via Effective Annotation and Representation Projection JianNi - GeorgianaDinu - RaduFlorian + GeorgianaDinu + RaduFlorian 1470–1480 P17-1135 10.18653/v1/P17-1135 @@ -1787,7 +1787,7 @@ two word-vectors results in a vector that is only a small angle away from the ve Context Sensitive Lemmatization Using Two Successive Bidirectional Gated Recurrent Networks AbhisekChakrabarty - Onkar ArunPandit + Onkar ArunPandit UtpalGarain 1481–1491 P17-1136 @@ -1800,8 +1800,8 @@ two word-vectors results in a vector that is only a small angle away from the ve Learning to Create and Reuse Words in Open-Vocabulary Neural Language Modeling KazuyaKawakami - ChrisDyer - PhilBlunsom + ChrisDyer + PhilBlunsom 1492–1502 P17-1137 10.18653/v1/P17-1137 @@ -1812,7 +1812,7 @@ two word-vectors results in a vector that is only a small angle away from the ve Bandit Structured Prediction for Neural Sequence-to-Sequence Learning JuliaKreutzer - ArtemSokolov + ArtemSokolov StefanRiezler 1503–1513 P17-1138 @@ -1848,7 +1848,7 @@ two word-vectors results in a vector that is only a small angle away from the ve Lexically Constrained Decoding for Sequence Generation Using Grid Beam Search - ChrisHokamp + ChrisHokamp QunLiu 1535–1546 P17-1141 @@ -1887,7 +1887,7 @@ two word-vectors results in a vector that is only a small angle away from the ve FanZhang Homa B.Hashemi RebeccaHwa - DianeLitman + DianeLitman 1568–1578 P17-1144 10.18653/v1/P17-1144 @@ -1898,7 +1898,7 @@ two word-vectors results in a vector that is only a small angle away from the ve <fixed-case>W</fixed-case>atset: Automatic Induction of Synsets from a Graph of Synonyms DmitryUstalov AlexanderPanchenko - ChrisBiemann + ChrisBiemann 1579–1590 P17-1145 10.18653/v1/P17-1145 @@ -1909,7 +1909,7 @@ two word-vectors results in a vector that is only a small angle away from the ve Neural Modeling of Multi-Predicate Interactions for <fixed-case>J</fixed-case>apanese Predicate Argument Structure Analysis HirokiOuchi HiroyukiShindo - YujiMatsumoto + YujiMatsumoto 1591–1600 P17-1146 10.18653/v1/P17-1146 @@ -1920,8 +1920,8 @@ two word-vectors results in a vector that is only a small angle away from the ve <fixed-case>T</fixed-case>rivia<fixed-case>QA</fixed-case>: A Large Scale Distantly Supervised Challenge Dataset for Reading Comprehension MandarJoshi EunsolChoi - DanielWeld - LukeZettlemoyer + DanielWeld + LukeZettlemoyer 1601–1611 P17-1147 10.18653/v1/P17-1147 @@ -1955,7 +1955,7 @@ two word-vectors results in a vector that is only a small angle away from the ve Interactive Learning of Grounded Verb Semantics towards Human-Robot Communication LanboShe - JoyceChai + JoyceChai 1634–1644 P17-1150 10.18653/v1/P17-1150 @@ -1965,7 +1965,7 @@ two word-vectors results in a vector that is only a small angle away from the ve Multimodal Word Distributions BenAthiwaratkun - AndrewWilson + AndrewWilson 1645–1656 P17-1151 10.18653/v1/P17-1151 @@ -1979,7 +1979,7 @@ two word-vectors results in a vector that is only a small angle away from the ve Zhen-HuaLing SiWei HuiJiang - DianaInkpen + DianaInkpen 1657–1668 P17-1152 10.18653/v1/P17-1152 @@ -1993,7 +1993,7 @@ two word-vectors results in a vector that is only a small angle away from the ve Victor R.Martínez NikolaosMalandrakis KaranSingla - ShrikanthNarayanan + ShrikanthNarayanan 1669–1678 P17-1153 10.18653/v1/P17-1153 @@ -2089,7 +2089,7 @@ two word-vectors results in a vector that is only a small angle away from the ve Semi-supervised sequence tagging with bidirectional language models - Matthew E.Peters + Matthew E.Peters WaleedAmmar ChandraBhagavatula RussellPower @@ -2127,7 +2127,7 @@ two word-vectors results in a vector that is only a small angle away from the ve DiarmuidÓ Séaghdha Tsung-HsienWen BlaiseThomson - SteveYoung + SteveYoung 1777–1788 P17-1163 10.18653/v1/P17-1163 @@ -2150,9 +2150,9 @@ two word-vectors results in a vector that is only a small angle away from the ve Topical Coherence in <fixed-case>LDA</fixed-case>-based Models through Induced Segmentation HesamAmoualian WeiLu - EricGaussier + EricGaussier GeorgiosBalikas - Massih R.Amini + Massih R.Amini MarianneClausel 1799–1809 P17-1165 @@ -2163,7 +2163,7 @@ two word-vectors results in a vector that is only a small angle away from the ve Jointly Extracting Relations with Class Ties via Effective Deep Ranking HaiYe - WenhanChao + WenhanChao ZhunchenLuo ZhoujunLi 1810–1820 @@ -2175,7 +2175,7 @@ two word-vectors results in a vector that is only a small angle away from the ve Search-based Neural Structured Learning for Sequential Question Answering MohitIyyer - Wen-tauYih + Wen-tauYih Ming-WeiChang 1821–1831 P17-1167 @@ -2189,7 +2189,7 @@ two word-vectors results in a vector that is only a small angle away from the ve BhuwanDhingra HanxiaoLiu ZhilinYang - WilliamCohen + WilliamCohen RuslanSalakhutdinov 1832–1846 P17-1168 @@ -2215,7 +2215,7 @@ two word-vectors results in a vector that is only a small angle away from the ve Towards a Seamless Integration of Word Senses into Downstream <fixed-case>NLP</fixed-case> Applications Mohammad TaherPilehvar - JoseCamacho-Collados + JoseCamacho-Collados RobertoNavigli NigelCollier 1857–1869 @@ -2240,7 +2240,7 @@ two word-vectors results in a vector that is only a small angle away from the ve Learning to Skim Text Adams WeiYu HongraeLee - QuocLe + QuocLe 1880–1890 P17-1172 10.18653/v1/P17-1172 @@ -2288,7 +2288,7 @@ two word-vectors results in a vector that is only a small angle away from the ve YunChen YangLiu YongCheng - Victor O.K.Li + Victor O.K.Li 1925–1935 P17-1176 10.18653/v1/P17-1176 @@ -2300,7 +2300,7 @@ two word-vectors results in a vector that is only a small angle away from the ve HuadongChen ShujianHuang DavidChiang - JiajunChen + JiajunChen 1936–1945 P17-1177 10.18653/v1/P17-1177 @@ -2362,9 +2362,9 @@ two word-vectors results in a vector that is only a small angle away from the ve One-Shot Neural Cross-Lingual Transfer for Paradigm Completion - KatharinaKann + KatharinaKann RyanCotterell - HinrichSchütze + HinrichSchütze 1993–2003 P17-1182 10.18653/v1/P17-1182 @@ -2411,7 +2411,7 @@ two word-vectors results in a vector that is only a small angle away from the ve Deep Multitask Learning for Semantic Dependency Parsing HaoPeng SamThomson - Noah A.Smith + Noah A.Smith 2037–2048 P17-1186 10.18653/v1/P17-1186 @@ -2448,7 +2448,7 @@ two word-vectors results in a vector that is only a small angle away from the ve A Progressive Learning Approach to <fixed-case>C</fixed-case>hinese <fixed-case>SRL</fixed-case> Using Heterogeneous Data QiaolinXia LeiSha - BaobaoChang + BaobaoChang ZhifangSui 2069–2077 P17-1189 @@ -2470,8 +2470,8 @@ two word-vectors results in a vector that is only a small angle away from the ve Ontology-Aware Token Embeddings for Prepositional Phrase Attachment PradeepDasigi WaleedAmmar - ChrisDyer - EduardHovy + ChrisDyer + EduardHovy 2089–2098 P17-1191 10.18653/v1/P17-1191 @@ -2481,7 +2481,7 @@ two word-vectors results in a vector that is only a small angle away from the ve Identifying 1950s <fixed-case>A</fixed-case>merican Jazz Musicians: Fine-Grained <fixed-case>I</fixed-case>s<fixed-case>A</fixed-case> Extraction via Modifier Composition ElliePavlick - MariusPaşca + MariusPaşca 2099–2109 P17-1192 10.18653/v1/P17-1192 @@ -2494,7 +2494,7 @@ two word-vectors results in a vector that is only a small angle away from the ve Parsing to 1-Endpoint-Crossing, Pagenumber-2 Graphs JunjieCao ShengHuang - WeiweiSun + WeiweiSun XiaojunWan 2110–2120 P17-1193 @@ -2520,7 +2520,7 @@ two word-vectors results in a vector that is only a small angle away from the ve TakumiIto HidenaoIwane HirokazuAnai - Noriko H.Arai + Noriko H.Arai 2131–2141 P17-1195 10.18653/v1/P17-1195 @@ -2572,7 +2572,7 @@ two word-vectors results in a vector that is only a small angle away from the ve Lexical Features in Coreference Resolution: To be Used With Caution - Nafise SadatMoosavi + Nafise SadatMoosavi MichaelStrube 14–19 P17-2003 @@ -2584,7 +2584,7 @@ two word-vectors results in a vector that is only a small angle away from the ve Alternative Objective Functions for Training <fixed-case>MT</fixed-case> Evaluation Metrics MilošStanojević - KhalilSima’an + KhalilSima’an 20–25 P17-2004 10.18653/v1/P17-2004 @@ -2606,7 +2606,7 @@ two word-vectors results in a vector that is only a small angle away from the ve Vector space models for evaluating semantic fluency in autism - EmilyPrud’hommeaux + EmilyPrud’hommeaux Janvan Santen DouglasGliner 32–37 @@ -2643,7 +2643,7 @@ two word-vectors results in a vector that is only a small angle away from the ve Incorporating Dialectal Variability for Socially Equitable Language Identification DavidJurgens YuliaTsvetkov - DanJurafsky + DanJurafsky 51–57 P17-2009 10.18653/v1/P17-2009 @@ -2656,7 +2656,7 @@ two word-vectors results in a vector that is only a small angle away from the ve Evaluating Compound Splitters Extrinsically with Textual Entailment GloriannaJagfeld PatrickZiering - Lonnekevan der Plas + Lonnekevan der Plas 58–63 P17-2010 10.18653/v1/P17-2010 @@ -2690,11 +2690,11 @@ two word-vectors results in a vector that is only a small angle away from the ve On the Distribution of Lexical Features at Multiple Levels of Analysis FatemehAlmodaresi - LyleUngar + LyleUngar VivekKulkarni MohsenZakeri SalvatoreGiorgi - H. AndrewSchwartz + H. AndrewSchwartz 79–84 P17-2013 10.18653/v1/P17-2013 @@ -2705,9 +2705,9 @@ two word-vectors results in a vector that is only a small angle away from the ve Exploring Neural Text Simplification Models SergiuNisioi - SanjaŠtajner - Simone PaoloPonzetto - Liviu P.Dinu + SanjaŠtajner + Simone PaoloPonzetto + Liviu P.Dinu 85–91 P17-2014 10.18653/v1/P17-2014 @@ -2728,9 +2728,9 @@ two word-vectors results in a vector that is only a small angle away from the ve Sentence Alignment Methods for Improving Text Simplification Systems - SanjaŠtajner + SanjaŠtajner MarcFranco-Salvador - Simone PaoloPonzetto + Simone PaoloPonzetto PaoloRosso HeinerStuckenschmidt 97–102 @@ -2744,7 +2744,7 @@ two word-vectors results in a vector that is only a small angle away from the ve Understanding Task Design Trade-offs in Crowdsourced Paraphrase Collection YouxuanJiang Jonathan K.Kummerfeld - Walter S.Lasecki + Walter S.Lasecki 103–109 P17-2017 10.18653/v1/P17-2017 @@ -2757,7 +2757,7 @@ two word-vectors results in a vector that is only a small angle away from the ve Arc-swift: A Novel Transition System for Dependency Parsing PengQi - Christopher D.Manning + Christopher D.Manning 110–117 P17-2018 10.18653/v1/P17-2018 @@ -2782,7 +2782,7 @@ two word-vectors results in a vector that is only a small angle away from the ve WeiyueWang TamerAlkhouli DeruiZhu - HermannNey + HermannNey 125–131 P17-2020 10.18653/v1/P17-2020 @@ -2807,8 +2807,8 @@ two word-vectors results in a vector that is only a small angle away from the ve LenaReed JiaqiWu ShereenOraby - PranavAnand - MarilynWalker + PranavAnand + MarilynWalker 141–147 P17-2022 10.18653/v1/P17-2022 @@ -2832,7 +2832,7 @@ two word-vectors results in a vector that is only a small angle away from the ve Exploiting Domain Knowledge via Grouped Weight Sharing with Application to Text Categorization YeZhang MatthewLease - Byron C.Wallace + Byron C.Wallace 155–160 P17-2024 10.18653/v1/P17-2024 @@ -2915,7 +2915,7 @@ two word-vectors results in a vector that is only a small angle away from the ve Attention Strategies for Multi-Source Sequence-to-Sequence Learning JindřichLibovický - JindřichHelcl + JindřichHelcl 196–202 P17-2031 10.18653/v1/P17-2031 @@ -2937,8 +2937,8 @@ two word-vectors results in a vector that is only a small angle away from the ve A Neural Model for User Geolocation and Lexical Dialectology AfshinRahimi - TrevorCohn - TimothyBaldwin + TrevorCohn + TimothyBaldwin 209–216 P17-2033 10.18653/v1/P17-2033 @@ -2964,7 +2964,7 @@ two word-vectors results in a vector that is only a small angle away from the ve Neural Architecture for Temporal Relation Extraction: A <fixed-case>B</fixed-case>i-<fixed-case>LSTM</fixed-case> Approach for Detecting Narrative Containers JulienTourille OlivierFerret - AurélieNévéol + AurélieNévéol XavierTannier 224–230 P17-2035 @@ -2990,7 +2990,7 @@ two word-vectors results in a vector that is only a small angle away from the ve Cross-lingual and cross-domain discourse segmentation of entire documents ChloéBraud OphélieLacroix - AndersSøgaard + AndersSøgaard 237–243 P17-2037 10.18653/v1/P17-2037 @@ -3076,7 +3076,7 @@ two word-vectors results in a vector that is only a small angle away from the ve ShunHasegawa YutaKikuchi HiroyaTakamura - ManabuOkumura + ManabuOkumura 281–286 P17-2044 10.18653/v1/P17-2044 @@ -3185,7 +3185,7 @@ two word-vectors results in a vector that is only a small angle away from the ve Multi-Task Learning of Keyphrase Boundary Classification IsabelleAugenstein - AndersSøgaard + AndersSøgaard 341–346 P17-2054 10.18653/v1/P17-2054 @@ -3234,7 +3234,7 @@ two word-vectors results in a vector that is only a small angle away from the ve Differentiable Scheduled Sampling for Credit Assignment KartikGoyal - ChrisDyer + ChrisDyer TaylorBerg-Kirkpatrick 366–371 P17-2058 @@ -3256,7 +3256,7 @@ two word-vectors results in a vector that is only a small angle away from the ve LongZhou WenpengHu JiajunZhang - ChengqingZong + ChengqingZong 378–384 P17-2060 10.18653/v1/P17-2060 @@ -3288,7 +3288,7 @@ two word-vectors results in a vector that is only a small angle away from the ve Feature Hashing for Language and Dialect Identification - ShervinMalmasi + ShervinMalmasi MarkDras 399–403 P17-2063 @@ -3345,7 +3345,7 @@ two word-vectors results in a vector that is only a small angle away from the ve <fixed-case>E</fixed-case>nglish Multiword Expression-aware Dependency Parsing Including Named Entities AkihikoKato HiroyukiShindo - YujiMatsumoto + YujiMatsumoto 427–432 P17-2068 10.18653/v1/P17-2068 @@ -3357,7 +3357,7 @@ two word-vectors results in a vector that is only a small angle away from the ve ThomasKober JulieWeeds JeremyReffin - DavidWeir + DavidWeir 433–440 P17-2069 10.18653/v1/P17-2069 @@ -3391,7 +3391,7 @@ two word-vectors results in a vector that is only a small angle away from the ve Methodical Evaluation of <fixed-case>A</fixed-case>rabic Word Embeddings MohammedElrazzaz ShadyElbassuoni - KhaledShaban + KhaledShaban ChadiHelwe 454–458 P17-2072 @@ -3414,7 +3414,7 @@ two word-vectors results in a vector that is only a small angle away from the ve Best-Worst Scaling More Reliable than Rating Scales: A Case Study on Sentiment Intensity Annotation SvetlanaKiritchenko - SaifMohammad + SaifMohammad 465–470 P17-2074 10.18653/v1/P17-2074 @@ -3423,11 +3423,11 @@ two word-vectors results in a vector that is only a small angle away from the ve Demographic Inference on <fixed-case>T</fixed-case>witter using Recursive Neural Networks - Sunghwan MacKim + Sunghwan MacKim QiongkaiXu LizhenQu StephenWan - CécileParis + CécileParis 471–477 P17-2075 10.18653/v1/P17-2075 @@ -3438,7 +3438,7 @@ two word-vectors results in a vector that is only a small angle away from the ve <fixed-case>T</fixed-case>witter Demographic Classification Using Deep Multi-modal Multi-task Learning PrashanthVijayaraghavan SoroushVosoughi - DebRoy + DebRoy 478–483 P17-2076 10.18653/v1/P17-2076 @@ -3463,7 +3463,7 @@ two word-vectors results in a vector that is only a small angle away from the ve Parser Adaptation for Social Media by Integrating Normalization Robvan der Goot - Gertjanvan Noord + Gertjanvan Noord 491–497 P17-2078 10.18653/v1/P17-2078 @@ -3497,7 +3497,7 @@ two word-vectors results in a vector that is only a small angle away from the ve WenjieLi ShuziNiu YangZhao - AkikoAizawa + AkikoAizawa GuopingLong 504–509 P17-2080 @@ -3529,8 +3529,8 @@ two word-vectors results in a vector that is only a small angle away from the ve A Generative Attentional Neural Network Model for Dialogue Act Classification - Quan HungTran - GholamrezaHaffari + Quan HungTran + GholamrezaHaffari IngridZukerman 524–529 P17-2083 @@ -3551,7 +3551,7 @@ two word-vectors results in a vector that is only a small angle away from the ve List-only Entity Linking YingLin - Chin-YewLin + Chin-YewLin HengJi 536–541 P17-2085 @@ -3563,7 +3563,7 @@ two word-vectors results in a vector that is only a small angle away from the ve Improving Native Language Identification by Using Spelling Errors LingzhenChen CarloStrapparava - ViviNastase + ViviNastase 542–546 P17-2086 10.18653/v1/P17-2086 @@ -3596,7 +3596,7 @@ two word-vectors results in a vector that is only a small angle away from the ve RuiWang AndrewFinch MasaoUtiyama - EiichiroSumita + EiichiroSumita 560–566 P17-2089 10.18653/v1/P17-2089 @@ -3632,7 +3632,7 @@ two word-vectors results in a vector that is only a small angle away from the ve ShujianHuang XiaohuaLiu HangLi - JiajunChen + JiajunChen 580–586 P17-2092 10.18653/v1/P17-2092 @@ -3642,7 +3642,7 @@ two word-vectors results in a vector that is only a small angle away from the ve Model Transfer for Tagging Low-resource Languages using a Bilingual Dictionary MengFang - TrevorCohn + TrevorCohn 587–593 P17-2093 10.18653/v1/P17-2093 @@ -3653,7 +3653,7 @@ two word-vectors results in a vector that is only a small angle away from the ve <fixed-case>E</fixed-case>uro<fixed-case>S</fixed-case>ense: Automatic Harvesting of Multilingual Sense Annotations from Parallel Text ClaudioDelli Bovi - JoseCamacho-Collados + JoseCamacho-Collados AlessandroRaganato RobertoNavigli 594–600 @@ -3669,7 +3669,7 @@ two word-vectors results in a vector that is only a small angle away from the ve NadirDurrani AhmedAbdelali YonatanBelinkov - StephanVogel + StephanVogel 601–607 P17-2095 10.18653/v1/P17-2095 @@ -3768,8 +3768,8 @@ two word-vectors results in a vector that is only a small angle away from the ve AllegraLarche AnthonyJanocko KevinZembroski - H AndrewSchwartz - LyleUngar + H AndrewSchwartz + LyleUngar 654–658 P17-2103 10.18653/v1/P17-2103 @@ -3815,7 +3815,7 @@ two word-vectors results in a vector that is only a small angle away from the ve How (not) to train a dependency parser: The curious case of jackknifing part-of-speech taggers - ŽeljkoAgić + ŽeljkoAgić NatalieSchluter 679–684 P17-2107 @@ -3831,7 +3831,7 @@ two word-vectors results in a vector that is only a small angle away from the ve AllysonEttinger SpandanaGella MatthieuLabeau - Cecilia OvesdotterAlm + Cecilia OvesdotterAlm MarineCarpuat MarkDredze Association for Computational Linguistics @@ -3919,7 +3919,7 @@ two word-vectors results in a vector that is only a small angle away from the ve Text-based Speaker Identification on Multiparty Dialogues Using Multi-document Convolutional Neural Networks KaixinMa CatherineXiao - Jinho D.Choi + Jinho D.Choi 49–55 P17-3009 ma-etal-2017-text @@ -3955,7 +3955,7 @@ two word-vectors results in a vector that is only a small angle away from the ve Word Embedding for Response-To-Text Assessment of Evidence HaoranZhang - DianeLitman + DianeLitman 75–81 P17-3013 zhang-litman-2017-word @@ -3970,7 +3970,7 @@ two word-vectors results in a vector that is only a small angle away from the ve <fixed-case>S</fixed-case>occ<fixed-case>E</fixed-case>val: An Annotation Schema for Rating Soccer Players JoseRamirez - MatthewGarber + MatthewGarber XinhaoWang 89–94 P17-3015 @@ -3978,7 +3978,7 @@ two word-vectors results in a vector that is only a small angle away from the ve Accent Adaptation for the Air Traffic Control Domain - MatthewGarber + MatthewGarber MeitalSinger ChristopherWard 95–99 @@ -4004,7 +4004,7 @@ two word-vectors results in a vector that is only a small angle away from the ve Fast Forward Through Opportunistic Incremental Meaning Representation Construction PetrBabkin - SergeiNirenburg + SergeiNirenburg 114–119 P17-3019 babkin-nirenburg-2017-fast @@ -4065,7 +4065,7 @@ two word-vectors results in a vector that is only a small angle away from the ve AnitaRamm SharidLoáiciga AnnemarieFriedrich - AlexanderFraser + AlexanderFraser 1–6 P17-4001 P17-4001.Poster.pdf @@ -4073,17 +4073,17 @@ two word-vectors results in a vector that is only a small angle away from the ve Automating Biomedical Evidence Synthesis: <fixed-case>R</fixed-case>obot<fixed-case>R</fixed-case>eviewer - IainMarshall + IainMarshall JoëlKuiper EdwardBanner - Byron C.Wallace + Byron C.Wallace 7–12 P17-4002 marshall-etal-2017-automating <fixed-case>B</fixed-case>enben: A <fixed-case>C</fixed-case>hinese Intelligent Conversational Robot - Wei-NanZhang + Wei-NanZhang TingLiu BingQin YuZhang @@ -4146,11 +4146,11 @@ two word-vectors results in a vector that is only a small angle away from the ve Interactive Visual Analysis of Transcribed Multi-Party Discourse MennatallahEl-Assady - AnnetteHautli-Janisz + AnnetteHautli-Janisz ValentinGold MiriamButt KatharinaHolzinger - DanielKeim + DanielKeim 49–54 P17-4009 el-assady-etal-2017-interactive @@ -4195,7 +4195,7 @@ two word-vectors results in a vector that is only a small angle away from the ve YoonKim YuntianDeng JeanSenellart - AlexanderRush + AlexanderRush 67–72 P17-4012 klein-etal-2017-opennmt @@ -4203,7 +4203,7 @@ two word-vectors results in a vector that is only a small angle away from the ve <fixed-case>P</fixed-case>y<fixed-case>D</fixed-case>ial: A Multi-domain Statistical Dialogue System Toolkit StefanUltes - Lina M.Rojas-Barahona + Lina M.Rojas-Barahona Pei-HaoSu DavidVandyke DonghoKim @@ -4211,8 +4211,8 @@ two word-vectors results in a vector that is only a small angle away from the ve PawełBudzianowski NikolaMrkšić Tsung-HsienWen - MilicaGašić - SteveYoung + MilicaGašić + SteveYoung 73–78 P17-4013 ultes-etal-2017-pydial @@ -4256,8 +4256,8 @@ two word-vectors results in a vector that is only a small angle away from the ve Swanson linking revisited: Accelerating literature-based discovery across domains using a conceptual influence graph - GusHahn-Powell - Marco A.Valenzuela-Escárcega + GusHahn-Powell + Marco A.Valenzuela-Escárcega MihaiSurdeanu 103–108 P17-4018 @@ -4297,7 +4297,7 @@ two word-vectors results in a vector that is only a small angle away from the ve Proceedings of the 55th Annual Meeting of the Association for Computational Linguistics: Tutorial Abstracts P17-5 - MajaPopović + MajaPopović JordanBoyd-Graber Association for Computational Linguistics
Vancouver, Canada
@@ -4314,7 +4314,7 @@ two word-vectors results in a vector that is only a small angle away from the ve HoifungPoon ChrisQuirk KristinaToutanova - Wen-tauYih + Wen-tauYih 1–2 P17-5001 We will introduce precision medicine and showcase the vast opportunities for NLP in this burgeoning field with great societal impact. We will review pressing NLP problems, state-of-the art methods, and important applications, as well as datasets, medical resources, and practical issues. The tutorial will provide an accessible overview of biomedicine, and does not presume knowledge in biology or healthcare. The ultimate goal is to reduce the entry barrier for NLP researchers to contribute to this exciting domain. @@ -4334,7 +4334,7 @@ This tutorial builds upon a recent course taught at Carnegie Mellon University d Deep Learning for Semantic Composition XiaodanZhu - EdwardGrefenstette + EdwardGrefenstette 6–7 P17-5003 @@ -254,7 +254,7 @@
Obtaining Reliable Human Ratings of Valence, Arousal, and Dominance for 20,000 <fixed-case>E</fixed-case>nglish Words - SaifMohammad + SaifMohammad 174–184 Words play a central role in language and thought. Factor analysis studies have shown that the primary dimensions of meaning are valence, arousal, and dominance (VAD). We present the NRC VAD Lexicon, which has human ratings of valence, arousal, and dominance for more than 20,000 English words. We use Best–Worst Scaling to obtain fine-grained scores and address issues of annotation consistency that plague traditional rating scale methods of annotation. We show that the ratings obtained are vastly more reliable than those in existing lexicons. We also show that there exist statistically significant differences in the shared understanding of valence, arousal, and dominance across demographic variables such as age, gender, and personality. P18-1017 @@ -270,16 +270,16 @@ VivekSrikumar JakobPrange AustinBlodgett - Sarah R.Moeller + Sarah R.Moeller AviramStern - AdiBitan + AdiBitan OmriAbend 185–196 Semantic relations are often signaled with prepositional or possessive marking—but extreme polysemy bedevils their analysis and automatic interpretation. We introduce a new annotation scheme, corpus, and task for the disambiguation of prepositions and possessives in English. Unlike previous approaches, our annotations are comprehensive with respect to types and tokens of these markers; use broadly applicable supersense classes rather than fine-grained dictionary definitions; unite prepositions and possessives under the same class inventory; and distinguish between a marker’s lexical contribution and the role it marks in the context of a predicate or scene. Strong interannotator agreement rates, as well as encouraging disambiguation results with established supervised methods, speak to the viability of the scheme and task. P18-1018 P18-1018.Notes.pdf - @@ -289,9 +289,9 @@ Junyi JessyLi RomaPatel YinfeiYang - IainMarshall + IainMarshall AniNenkova - ByronWallace + ByronWallace 197–207 We present a corpus of 5,000 richly annotated abstracts of medical articles describing clinical randomized controlled trials. Annotations include demarcations of text spans that describe the Patient population enrolled, the Interventions studied and to what they were Compared, and the Outcomes measured (the ‘PICO’ elements). These spans are further annotated at a more granular level, e.g., individual interventions within them are marked and mapped onto a structured medical vocabulary. We acquired annotations from a diverse set of workers with varying levels of expertise and cost. We describe our data collection process and the corpus itself in detail. We then outline a set of challenging NLP tasks that would aid searching of the medical literature and the practice of evidence-based medicine. P18-1019 @@ -356,7 +356,7 @@ <fixed-case>L</fixed-case>ink<fixed-case>NB</fixed-case>ed: Multi-Graph Representation Learning with Entity Linkage RakshitTrivedi BunyaminSisman - Xin LunaDong + Xin LunaDong ChristosFaloutsos JunMa HongyuanZha @@ -383,9 +383,9 @@
Graph-to-Sequence Learning using Gated Graph Neural Networks - DanielBeck - GholamrezaHaffari - TrevorCohn + DanielBeck + GholamrezaHaffari + TrevorCohn 273–283 Many NLP applications can be framed as a graph-to-sequence learning problem. Previous work proposing neural architectures on graph-to-sequence obtained promising results compared to grammar-based approaches but still rely on linearisation heuristics and/or standard recurrent networks to achieve the best performance. In this work propose a new model that encodes the full structural information contained in the graph. Our architecture couples the recently proposed Gated Graph Neural Networks with an input transformation that allows nodes and edges to have their own hidden representations, while tackling the parameter explosion problem present in previous work. Experimental results shows that our model outperforms strong baselines in generation from AMR graphs and syntax-based neural machine translation. P18-1026 @@ -398,7 +398,7 @@ UrvashiKhandelwal HeHe PengQi - DanJurafsky + DanJurafsky 284–294 We know very little about how neural language models (LM) use prior linguistic context. In this paper, we investigate the role of context in an LSTM LM, through ablation studies. Specifically, we analyze the increase in perplexity when prior context words are shuffled, replaced, or dropped. On two standard datasets, Penn Treebank and WikiText-2, we find that the model is capable of using about 200 tokens of context on average, but sharply distinguishes nearby context (recent 50 tokens) from the distant history. The model is highly sensitive to the order of words within the most recent sentence, but ignores word order in the long-range context (beyond 50 tokens), suggesting the distant past is modeled only as a rough semantic field or topic. We further find that the neural caching model (Grave et al., 2017b) especially helps the LSTM to copy words from within this distant context. Overall, our analysis not only provides a better understanding of how neural LMs use their context, but also sheds light on recent success from cache-based models. P18-1027 @@ -411,7 +411,7 @@ Bridging <fixed-case>CNN</fixed-case>s, <fixed-case>RNN</fixed-case>s, and Weighted Finite-State Machines RoySchwartz SamThomson - Noah A.Smith + Noah A.Smith 295–305 Recurrent and convolutional neural networks comprise two distinct families of models that have proven to be useful for encoding natural language utterances. In this paper we present SoPa, a new model that aims to bridge these two approaches. SoPa combines neural representation learning with weighted finite-state automata (WFSAs) to learn a soft version of traditional surface patterns. We show that SoPa is an extension of a one-layer CNN, and that such CNNs are equivalent to a restricted version of SoPa, and accordingly, to a restricted form of WFSA. Empirically, on three text classification tasks, SoPa is comparable or better than both a BiLSTM (RNN) baseline and a CNN baseline, and is particularly useful in small data settings. P18-1028 @@ -424,7 +424,7 @@ Zero-shot Learning of Classifiers from Natural Language Quantification ShashankSrivastava IgorLabutov - TomMitchell + TomMitchell 306–316 Humans can efficiently learn new concepts using language. We present a framework through which a set of explanations of a concept can be used to learn a classifier without access to any labeled examples. We use semantic parsing to map explanations to probabilistic assertions grounded in latent class labels and observed attributes of unlabeled data, and leverage the differential semantics of linguistic quantifiers (e.g., ‘usually’ vs ‘always’) to drive model training. Experiments on three domains show that the learned classifiers outperform previous approaches for learning with limited data, and are comparable with fully supervised classifiers trained from a small number of labeled examples. P18-1029 @@ -458,7 +458,7 @@ Evaluating neural network explanation methods using hybrid documents and morphosyntactic agreement NinaPoerner - HinrichSchütze + HinrichSchütze BenjaminRoth 340–350 The behavior of deep neural networks (DNNs) is hard to understand. This makes it necessary to explore post hoc explanation methods. We conduct the first comprehensive evaluation of explanation methods for NLP. To this end, we design two novel evaluation paradigms that cover two important classes of NLP problems: small context and large context problems. Both paradigms require no manual annotation and are therefore broadly applicable. We also introduce LIMSSE, an explanation method inspired by LIME that is designed for NLP. We show empirically that LIMSSE, LRP and DeepLIFT are the most effective explanation methods and recommend them for explaining DNNs in NLP. @@ -476,7 +476,7 @@ KarthikRamanathan SeshSadasivam RuiZhang - DragomirRadev + DragomirRadev 351–360 To be informative, an evaluation must measure how well systems generalize to realistic unseen data. We identify limitations of and propose improvements to current evaluations of text-to-SQL systems. First, we compare human-generated and automatically generated questions, characterizing properties of queries necessary for real-world applications. To facilitate evaluation on multiple datasets, we release standardized and improved versions of seven existing datasets and one new text-to-SQL dataset. Second, we show that the current division of data into training and test sets measures robustness to variations in the way questions are asked, but only partially tests how well systems generalize to new queries; therefore, we propose a complementary dataset split for evaluation of future work. Finally, we demonstrate how the common practice of anonymizing variables during evaluation removes an important challenge of the task. Our observations highlight key difficulties, and our methodology enables effective measurement of future development. P18-1033 @@ -517,8 +517,8 @@ Character-Level Models versus Morphology in Semantic Role Labeling - Gözde GülŞahin - MarkSteedman + Gözde GülŞahin + MarkSteedman 386–396 Character-level models have become a popular approach specially for their accessibility and ability to handle unseen data. However, little is known on their ability to reveal the underlying morphological structure of a word, which is a crucial skill for high-level semantic analysis tasks, such as semantic role labeling (SRL). In this work, we train various types of SRL models that use word, character and morphology level information and analyze how performance of characters compare to words and morphology for several languages. We conduct an in-depth error analysis for each morphological typology and analyze the strengths and limitations of character-level models that relate to out-of-domain data, training data size, long range dependencies and model complexity. Our exhaustive analyses shed light on important characteristics of character-level models and their semantic capability. P18-1036 @@ -541,7 +541,7 @@ Accurate <fixed-case>SHRG</fixed-case>-Based Semantic Parsing YufeiChen - WeiweiSun + WeiweiSun XiaojunWan 408–418 We demonstrate that an SHRG-based parser can produce semantic graphs much more accurately than previously shown, by relating synchronous production rules to the syntacto-semantic composition process. Our parser achieves an accuracy of 90.35 for EDS (89.51 for DMRS) in terms of elementary dependency match, which is a 4.87 (5.45) point improvement over the best existing data-driven model, indicating, in our view, the importance of linguistically-informed derivation for data-driven semantic parsing. This accuracy is equivalent to that of English Resource Grammar guided models, suggesting that (recurrent) neural network models are able to effectively learn deep linguistic knowledge from annotations. @@ -554,8 +554,8 @@ Using Intermediate Representations to Solve Math Word Problems DanqingHuang - Jin-GeYao - Chin-YewLin + Jin-GeYao + Chin-YewLin QingyuZhou JianYin 419–428 @@ -569,7 +569,7 @@ Discourse Representation Structure Parsing JiangmingLiu - Shay B.Cohen + Shay B.Cohen MirellaLapata 429–439 We introduce an open-domain neural semantic parser which generates formal meaning representations in the style of Discourse Representation Theory (DRT; Kamp and Reyle 1993). We propose a method which transforms Discourse Representation Structures (DRSs) to trees and develop a structure-aware model which decomposes the decoding process into three stages: basic DRS structure prediction, condition prediction (i.e., predicates and relations), and referent prediction (i.e., variables). Experimental results on the Groningen Meaning Bank (GMB) show that our model outperforms competitive baselines by a wide margin. @@ -614,7 +614,7 @@ HannahRashkin MaartenSap EmilyAllaway - Noah A.Smith + Noah A.Smith YejinChoi 463–473 We investigate a new commonsense inference task: given an event described in a short free-form text (“X drinks coffee in the morning”), a system reasons about the likely intents (“X wants to stay awake”) and reactions (“X feels alert”) of the event’s participants. To support this study, we construct a new crowdsourced corpus of 25,000 event phrases covering a diverse range of everyday events and situations. We report baseline performance on this task, demonstrating that neural encoder-decoder models can successfully compose embedding representations of previously unseen events and reason about the likely intents and reactions of the event participants. In addition, we demonstrate how commonsense inference on people’s intents and reactions can help unveil the implicit gender inequality prevalent in modern movie scripts. @@ -638,7 +638,7 @@ Improving Event Coreference Resolution by Modeling Correlations between Event Coreference Chains and Document Topic Structures - Prafulla KumarChoubey + Prafulla KumarChoubey RuihongHuang 485–495 This paper proposes a novel approach for event coreference resolution that models correlations between event coreference chains and document topical structures through an Integer Linear Programming formulation. We explicitly model correlations between the main event chains of a document with topic transition sentences, inter-coreference chain correlations, event mention distributional characteristics and sub-event structure, and use them with scores obtained from a local coreference relation classifier for jointly resolving multiple event chains in a document. Our experiments across KBP 2016 and 2017 datasets suggest that each of the structures contribute to improving event coreference resolution performance. @@ -678,8 +678,8 @@ YuHong WenxuanZhou JingliZhang - GuodongZhou - QiaomingZhu + GuodongZhou + QiaomingZhu 515–526 Due to the ability of encoding and mapping semantic information into a high-dimensional latent feature space, neural networks have been successfully used for detecting events to a certain extent. However, such a feature space can be easily contaminated by spurious features inherent in event detection. In this paper, we propose a self-regulated learning approach by utilizing a generative adversarial network to generate spurious features. On the basis, we employ a recurrent network to eliminate the fakes. Detailed experiments on the ACE 2005 and TAC-KBP 2015 corpora show that our proposed method is highly effective and adaptable. P18-1048 @@ -726,8 +726,8 @@ Coherence Modeling of Asynchronous Conversations: A Neural Entity Grid Approach - ShafiqJoty - Muhammad TasnimMohiuddin + ShafiqJoty + Muhammad TasnimMohiuddin DatTien Nguyen 558–568 We propose a novel coherence model for written asynchronous conversations (e.g., forums, emails), and show its applications in coherence assessment and thread reconstruction tasks. We conduct our research in two steps. First, we propose improvements to the recently proposed neural entity grid model by lexicalizing its entity transitions. Then, we extend the model to asynchronous conversations by incorporating the underlying conversational structure in the entity grid representation and feature computation. Our model achieves state of the art results on standard coherence assessment tasks in monologue and conversations outperforming existing models. We also demonstrate its effectiveness in reconstructing thread structures. @@ -741,7 +741,7 @@ Deep Reinforcement Learning for <fixed-case>C</fixed-case>hinese Zero Pronoun Resolution QingyuYin YuZhang - Wei-NanZhang + Wei-NanZhang TingLiu William YangWang 569–578 @@ -786,14 +786,14 @@ <fixed-case>T</fixed-case>utorial<fixed-case>B</fixed-case>ank: A Manually-Collected Corpus for Prerequisite Chains, Survey Extraction and Resource Recommendation - AlexanderFabbri + AlexanderFabbri IreneLi PrawatTrairatvorakul YijiaoHe WeitaiTing RobertTung CaitlinWesterfield - DragomirRadev + DragomirRadev 611–620 The field of Natural Language Processing (NLP) is growing rapidly, with new research published daily along with an abundance of tutorials, codebases and other online resources. In order to learn this dynamic field or stay up-to-date on the latest research, students as well as educators and researchers must constantly sift through multiple sources to find valuable, relevant information. To address this situation, we introduce TutorialBank, a new, publicly available dataset which aims to facilitate NLP education and research. We have manually collected and categorized over 5,600 resources on NLP as well as the related fields of Artificial Intelligence (AI), Machine Learning (ML) and Information Retrieval (IR). Our dataset is notably the largest manually-picked corpus of resources intended for NLP education which does not include only academic papers. Additionally, we have created both a search engine and a command-line tool for the resources and have annotated the corpus to include lists of research topics, relevant resources for each topic, prerequisite relations among topics, relevant sub-parts of individual resources, among other annotations. We are releasing the dataset and present several avenues for further research. P18-1057 @@ -846,7 +846,7 @@ FuruWei ShaohanHuang MingZhou - TiejunZhao + TiejunZhao 654–663 Sentence scoring and sentence selection are two main steps in extractive document summarization systems. However, previous works treat them as two separated subtasks. In this paper, we present a novel end-to-end neural network framework for extractive document summarization by jointly learning to score and select sentences. It first reads the document sentences with a hierarchical encoder to obtain the representation of sentences. Then it builds the output summary by extracting sentences one by one. Different from previous methods, our approach integrates the selection strategy into the scoring model, which directly predicts the relative importance given previously selected sentences. Experiments on the CNN/Daily Mail dataset show that the proposed framework significantly outperforms the state-of-the-art extractive summarization models. P18-1061 @@ -906,9 +906,9 @@ Mining Cross-Cultural Differences and Similarities in Social Media - Bill YuchenLin - Frank F.Xu - KennyZhu + Bill YuchenLin + Frank F.Xu + KennyZhu Seung-wonHwang 709–719 Cross-cultural differences and similarities are common in cross-lingual natural language understanding, especially for research in social media. For instance, people of distinct cultures often hold different opinions on a single named entity. Also, understanding slang terms across languages requires knowledge of cross-cultural similarities. In this paper, we study the problem of computing such cross-cultural differences and similarities. We present a lightweight yet effective approach, and evaluate it on two novel tasks: 1) mining cross-cultural differences of named entities and 2) finding similar terms for slang across languages. Experimental results show that our framework substantially outperforms a number of baseline methods on both tasks. The framework could be useful for machine translation applications and research in computational social science. @@ -919,7 +919,7 @@ Classification of Moral Foundations in Microblog Political Discourse - KristenJohnson + KristenJohnson DanGoldwasser 720–730 Previous works in computer science, as well as political and social science, have shown correlation in text between political ideologies and the moral foundations expressed within that text. Additional work has shown that policy frames, which are used by politicians to bias the public towards their stance on an issue, are also correlated with political ideology. Based on these associations, this work takes a first step towards modeling both the language and how politicians frame issues on Twitter, in order to predict the moral foundations that are used by politicians to express their stances on issues. The contributions of this work includes a dataset annotated for the moral foundations, annotation guidelines, and probabilistic graphical models which show the usefulness of jointly modeling abstract political slogans, as opposed to the unigrams of previous works, with policy frames for the prediction of the morality underlying political tweets. @@ -980,7 +980,7 @@ On the Limitations of Unsupervised Bilingual Dictionary Induction - AndersSøgaard + AndersSøgaard SebastianRuder IvanVulić 778–788 @@ -994,8 +994,8 @@ A robust self-learning method for fully unsupervised cross-lingual mappings of word embeddings MikelArtetxe - GorkaLabaka - EnekoAgirre + GorkaLabaka + EnekoAgirre 789–798 Recent work has managed to learn cross-lingual word embeddings without parallel data by mapping monolingual embeddings to a shared space through adversarial training. However, their evaluation has focused on favorable conditions, using comparable corpora or closely-related languages, and we show that they often fail in more realistic scenarios. This work proposes an alternative approach based on a fully unsupervised initialization that explicitly exploits the structural similarity of the embeddings, and a robust self-learning algorithm that iteratively improves this solution. Our method succeeds in all tested scenarios and obtains the best published results in standard datasets, even surpassing previous supervised systems. Our implementation is released as an open source project at https://github.com/artetxem/vecmap. P18-1073 @@ -1022,8 +1022,8 @@ Two Methods for Domain Adaptation of Bilingual Tasks: Delightfully Simple and Broadly Applicable ViktorHangya FabienneBraune - AlexanderFraser - HinrichSchütze + AlexanderFraser + HinrichSchütze 810–820 Bilingual tasks, such as bilingual lexicon induction and cross-lingual classification, are crucial for overcoming data sparsity in the target language. Resources required for such tasks are often out-of-domain, thus domain adaptation is an important problem here. We make two contributions. First, we test a delightfully simple method for domain adaptation of bilingual word embeddings. We evaluate these embeddings on two bilingual tasks involving different domains: cross-lingual twitter sentiment classification and medical bilingual lexicon induction. Second, we tailor a broadly applicable semi-supervised classification method from computer vision to these tasks. We show that this method also helps in low-resource setups. Using both methods together we achieve large improvements over our baselines, by using only additional unlabeled data. P18-1075 @@ -1071,15 +1071,15 @@ Semantically Equivalent Adversarial Rules for Debugging <fixed-case>NLP</fixed-case> models - Marco TulioRibeiro + Marco TulioRibeiro SameerSingh CarlosGuestrin 856–865 Complex machine learning models for NLP are often brittle, making different predictions for input instances that are extremely similar semantically. To automatically detect this behavior for individual instances, we present semantically equivalent adversaries (SEAs) – semantic-preserving perturbations that induce changes in the model’s predictions. We generalize these adversaries into semantically equivalent adversarial rules (SEARs) – simple, universal replacement rules that induce adversaries on many instances. We demonstrate the usefulness and flexibility of SEAs and SEARs by detecting bugs in black-box state-of-the-art models for three domains: machine comprehension, visual question-answering, and sentiment analysis. Via user studies, we demonstrate that we generate high-quality local adversaries for more instances than humans, and that SEARs induce four times as many mistakes as the bugs discovered by human experts. SEARs are also actionable: retraining models using data augmentation significantly reduces bugs, while maintaining accuracy. P18-1079 P18-1079.Notes.pdf - @@ -1088,7 +1088,7 @@ ShrimaiPrabhumoye YuliaTsvetkov RuslanSalakhutdinov - Alan WBlack + Alan WBlack 866–876 Style transfer is the task of rephrasing the text to contain specific stylistic properties without changing the intent or affect within the context. This paper introduces a new method for automatic style transfer. We first learn a latent representation of the input sentence which is grounded in a language translation model in order to better preserve the meaning of the sentence while reducing stylistic properties. Then adversarial generation techniques are used to make the output match the desired style. We evaluate this technique on three different style transformations: sentiment, gender and political slant. Compared to two state-of-the-art style transfer modeling techniques we show improvements both in automatic evaluation of style transfer and in manual evaluation of meaning preservation and fluency. P18-1080 @@ -1153,7 +1153,7 @@ Illustrative Language Understanding: Large-Scale Visual Grounding with Image Search - JamieKiros + JamieKiros WilliamChan GeoffreyHinton 922–933 @@ -1167,7 +1167,7 @@ What Action Causes This? Towards Naive Physical Action-Effect Prediction QiaoziGao ShaohuaYang - JoyceChai + JoyceChai LucyVanderwende 934–945 Despite recent advances in knowledge representation, automated reasoning, and machine learning, artificial agents still lack the ability to understand basic action-effect relations regarding the physical world, for example, the action of cutting a cucumber most likely leads to the state where the cucumber is broken apart into smaller pieces. If artificial agents (e.g., robots) ever become our partners in joint tasks, it is critical to empower them with such action-effect understanding so that they can reason about the state of the world and plan for actions. Towards this goal, this paper introduces a new task on naive physical action-effect prediction, which addresses the relations between concrete actions (expressed in the form of verb-noun pairs) and their effects on the state of the physical world as depicted by images. We collected a dataset for this task and developed an approach that harnesses web image data through distant supervision to facilitate learning for action-effect prediction. Our empirical results have shown that web data can be used to complement a small number of seed examples (e.g., three examples for each action) for model learning. This opens up possibilities for agents to learn physical action-effect relations for tasks at hand through communication with humans with a few examples. @@ -1208,8 +1208,8 @@ Identifying Transferable Information Across Domains for Cross-domain Sentiment Classification RakshaSharma - PushpakBhattacharyya - SandipanDandapat + PushpakBhattacharyya + SandipanDandapat Himanshu SharadBhatt 968–978 Getting manually labeled data in each domain is always an expensive and a time consuming task. Cross-domain sentiment analysis has emerged as a demanding concept where a labeled source domain facilitates a sentiment classifier for an unlabeled target domain. However, polarity orientation (positive or negative) and the significance of a word to express an opinion often differ from one domain to another domain. Owing to these differences, cross-domain sentiment classification is still a challenging task. In this paper, we propose that words that do not change their polarity and significance represent the transferable (usable) information across domains for cross-domain sentiment classification. We present a novel approach based on χ2 test and cosine-similarity between context vector of words to identify polarity preserving significant words across domains. Furthermore, we show that a weighted ensemble of the classifiers enhances the cross-domain classification performance. @@ -1255,7 +1255,7 @@ Working Memory Networks: Augmenting Memory Networks with a Relational Reasoning Module JuanPavez - HéctorAllende + HéctorAllende HéctorAllende-Cid 1000–1009 During the last years, there has been a lot of interest in achieving some kind of complex reasoning using deep neural networks. To do that, models like Memory Networks (MemNNs) have combined external memory storages and attention mechanisms. These architectures, however, lack of more complex reasoning mechanisms that could allow, for instance, relational reasoning. Relation Networks (RNs), on the other hand, have shown outstanding results in relational reasoning tasks. Unfortunately, their computational cost grows quadratically with the number of memories, something prohibitive for larger problems. To solve these issues, we introduce the Working Memory Network, a MemNN architecture with a novel working memory storage and reasoning module. Our model retains the relational reasoning abilities of the RN while reducing its computational complexity from quadratic to linear. We tested our model on the text QA dataset bAbI and the visual QA dataset NLVR. In the jointly trained bAbI-10k, we set a new state-of-the-art, achieving a mean error of less than 0.5%. Moreover, a simple ensemble of two of our models solves all 20 tasks in the joint version of the benchmark. @@ -1268,7 +1268,7 @@ Reasoning with Sarcasm by Reading In-Between YiTay - Anh TuanLuu + Anh TuanLuu Siu CheungHui JianSu 1010–1020 @@ -1308,7 +1308,7 @@ Strong Baselines for Neural Semi-Supervised Learning under Domain Shift SebastianRuder - BarbaraPlank + BarbaraPlank 1044–1054 Novel neural models have been proposed in recent years for learning under domain shift. Most models, however, only evaluate on a single task, on proprietary datasets, or compare to weak baselines, which makes comparison of models difficult. In this paper, we re-evaluate classic general-purpose bootstrapping approaches in the context of neural networks under domain shifts vs. recent neural approaches and propose a novel multi-task tri-training method that reduces the time and space complexity of classic tri-training. Extensive experiments on two benchmarks for part-of-speech tagging and sentiment analysis are negative: while our novel method establishes a new state-of-the-art for sentiment analysis, it does not fare consistently the best. More importantly, we arrive at the somewhat surprising conclusion that classic tri-training, with some additions, outperforms the state-of-the-art for NLP. Hence classic approaches constitute an important and strong baseline. P18-1096 @@ -1333,7 +1333,7 @@ A Neural Architecture for Automated <fixed-case>ICD</fixed-case> Coding PengtaoXie - EricXing + EricXing 1066–1076 The International Classification of Diseases (ICD) provides a hierarchy of diagnostic codes for classifying diseases. Medical coding – which assigns a subset of ICD codes to a patient visit – is a mandatory process that is crucial for patient care and billing. Manual coding is time-consuming, expensive, and error prone. In this paper, we build a neural architecture for automated coding. It takes the diagnosis descriptions (DDs) of a patient as inputs and selects the most relevant ICD codes. This architecture contains four major ingredients: (1) tree-of-sequences LSTM encoding of code descriptions (CDs), (2) adversarial learning for reconciling the different writing styles of DDs and CDs, (3) isotonic constraints for incorporating the importance order among the assigned codes, and (4) attentional matching for performing many-to-one and one-to-many mappings from DDs to CDs. We demonstrate the effectiveness of the proposed methods on a clinical datasets with 59K patient visits. P18-1098 @@ -1344,7 +1344,7 @@ Domain Adaptation with Adversarial Training and Graph Embeddings FirojAlam - ShafiqJoty + ShafiqJoty MuhammadImran 1077–1087 The success of deep neural networks (DNNs) is heavily dependent on the availability of labeled data. However, obtaining labeled data is a big challenge in many real-world problems. In such scenarios, a DNN model can leverage labeled and unlabeled data from a related domain, but it has to deal with the shift in data distributions between the source and the target domains. In this paper, we study the problem of classifying social media posts during a crisis event (e.g., Earthquake). For that, we use labeled and unlabeled data from past similar events (e.g., Flood) and unlabeled data for the current event. We propose a novel model that performs adversarial learning based domain adaptation to deal with distribution drifts and graph based semi-supervised learning to leverage unlabeled data within a single unified deep learning framework. Our experiments with two real-world crisis datasets collected from Twitter demonstrate significant improvements over several baselines. @@ -1389,7 +1389,7 @@ YixingFan YanyanLan JunXu - XueqiCheng + XueqiCheng 1108–1117 In conversation, a general response (e.g., “I don’t know”) could correspond to a large variety of input utterances. Previous generative conversational models usually employ a single model to learn the relationship between different utterance-response pairs, thus tend to favor general and trivial responses which appear frequently. To address this problem, we propose a novel controlled response generation mechanism to handle different utterance-response relationships in terms of specificity. Specifically, we introduce an explicit specificity control variable into a sequence-to-sequence model, which interacts with the usage representation of words through a Gaussian Kernel layer, to guide the model to generate responses at different specificity levels. We describe two ways to acquire distant labels for the specificity control variable in learning. Empirical studies show that our model can significantly outperform the state-of-the-art response generation models under both automatic and human evaluations. P18-1102 @@ -1405,15 +1405,15 @@ DaxiangDong YiLiu YingChen - Wayne XinZhao + Wayne XinZhao DianhaiYu HuaWu 1118–1127 Human generates responses relying on semantic and functional dependencies, including coreference relation, among dialogue elements and their context. In this paper, we investigate matching a response with its multi-turn context using dependency information based entirely on attention. Our solution is inspired by the recently proposed Transformer in machine translation (Vaswani et al., 2017) and we extend the attention mechanism in two ways. First, we construct representations of text segments at different granularities solely with stacked self-attention. Second, we try to extract the truly matched segment pairs with attention across the context and response. We jointly introduce those two kinds of attention in one uniform neural network. Experiments on two large-scale multi-turn response selection tasks show that our proposed model significantly outperforms the state-of-the-art models. P18-1103 P18-1103.Software.zip - @@ -1432,13 +1432,13 @@ <fixed-case>T</fixed-case>aylor’s law for Human Linguistic Sequences TatsuruKobayashi - KumikoTanaka-Ishii + KumikoTanaka-Ishii 1138–1148 Taylor’s law describes the fluctuation characteristics underlying a system in which the variance of an event within a time span grows by a power law with respect to the mean. Although Taylor’s law has been applied in many natural and social systems, its application for language has been scarce. This article describes a new way to quantify Taylor’s law in natural language and conducts Taylor analysis of over 1100 texts across 14 languages. We found that the Taylor exponents of natural language written texts exhibit almost the same value. The exponent was also compared for other language-related data, such as the child-directed speech, music, and programming languages. The results show how the Taylor exponent serves to quantify the fundamental structural complexity underlying linguistic time series. The article also shows the applicability of these findings in evaluating language models. P18-1105 P18-1105.Notes.pdf - @@ -1499,7 +1499,7 @@ Extending a Parser to Distant Domains Using a Few Dozen Partially Annotated Examples VidurJoshi - MatthewPeters + MatthewPeters MarkHopkins 1190–1199 We revisit domain adaptation for parsers in the neural era. First we show that recent advances in word representations greatly diminish the need for domain adaptation when the target domain is syntactically similar to the source domain. As evidence, we train a parser on the Wall Street Journal alone that achieves over 90% F1 on the Brown corpus. For more syntactically distant domains, we provide a simple way to adapt a parser using only dozens of partial annotations. For instance, we increase the percentage of error-free geometry-domain parses in a held-out set from 45% to 73% using approximately five dozen training examples. In the process, we demonstrate a new state-of-the-art single model result on the Wall Street Journal test set of 94.3%. This is an absolute increase of 1.7% over the previous state-of-the-art of 92.6%. @@ -1565,7 +1565,7 @@ A Stochastic Decoder for Neural Machine Translation PhilipSchulz WilkerAziz - TrevorCohn + TrevorCohn 1243–1252 The process of translation is ambiguous, in that there are typically many valid translations for a given sentence. This gives rise to significant variation in parallel corpora, however, most current models of machine translation do not account for this variation, instead treating the problem as a deterministic process. To this end, we present a deep generative model of machine translation which incorporates a chain of latent variables, in order to account for local lexical and syntactic variation in parallel corpora. We provide an in-depth analysis of the pitfalls encountered in variational inference for training deep generative models. Experiments on several different language pairs demonstrate that the model consistently improves over strong baselines. P18-1115 @@ -1579,8 +1579,8 @@ ChunpengMa AkihiroTamura MasaoUtiyama - TiejunZhao - EiichiroSumita + TiejunZhao + EiichiroSumita 1253–1263 Tree-based neural machine translation (NMT) approaches, although achieved impressive performance, suffer from a major drawback: they only use the 1-best parse tree to direct the translation, which potentially introduces translation mistakes due to parsing errors. For statistical machine translation (SMT), forest-based methods have been proven to be effective for solving this problem, while for NMT this kind of approach has not been attempted. This paper proposes a forest-based NMT method that translates a linearized packed forest under a simple sequence-to-sequence framework (i.e., a forest-to-sequence NMT model). The BLEU score of the proposed method is higher than that of the sequence-to-sequence NMT, tree-based NMT, and forest-based SMT systems. P18-1116 @@ -1609,7 +1609,7 @@ Document Context Neural Machine Translation with Memory Networks SameenMaruf - GholamrezaHaffari + GholamrezaHaffari 1275–1284 We present a document-level neural machine translation model which takes both source and target document context into account using memory networks. We model the problem as a structured prediction problem with interdependencies among the observed and hidden variables, i.e., the source sentences and their unobserved target translations in the document. The resulting structured prediction problem is tackled with a neural translation model equipped with two memory components, one each for the source and target side, to capture the documental interdependencies. We train the model end-to-end, and propose an iterative decoding algorithm based on block coordinate descent. Experimental results of English translations from French, German, and Estonian documents show that our model is effective in exploiting both source and target document context, and statistically significantly outperforms the previous work in terms of BLEU and METEOR. P18-1118 @@ -1635,7 +1635,7 @@ Learning Prototypical Goal Activities for Locations TianyuJiang - EllenRiloff + EllenRiloff 1297–1307 People go to different places to engage in activities that reflect their goals. For example, people go to restaurants to eat, libraries to study, and churches to pray. We refer to an activity that represents a common reason why people typically go to a location as a prototypical goal activity (goal-act). Our research aims to learn goal-acts for specific locations using a text corpus and semi-supervised learning. First, we extract activities and locations that co-occur in goal-oriented syntactic patterns. Next, we create an activity profile matrix and apply a semi-supervised label propagation algorithm to iteratively revise the activity strengths for different locations using a small set of labeled data. We show that this approach outperforms several baseline methods when judged against goal-acts identified by human annotators. P18-1120 @@ -1675,7 +1675,7 @@ GauravPandey DanishContractor VineetKumar - SachindraJoshi + SachindraJoshi 1329–1338 In this paper we present the Exemplar Encoder-Decoder network (EED), a novel conversation model that learns to utilize similar examples from training data to generate responses. Similar conversation examples (context-response pairs) from training data are retrieved using a traditional TF-IDF based retrieval model and the corresponding responses are used by our decoder to generate the ground truth response. The contribution of each retrieved response is weighed by the similarity of corresponding context with the input context. As a result, our model learns to assign higher similarity scores to those retrieved contexts whose responses are crucial for generating the final response. We present detailed experiments on two large data sets and we find that our method out-performs state of the art sequence to sequence generative models on several recently proposed evaluation metrics. P18-1123 @@ -1717,13 +1717,13 @@ Are <fixed-case>BLEU</fixed-case> and Meaning Representation in Opposition? OndřejCífka - OndřejBojar + OndřejBojar 1362–1371 One of possible ways of obtaining continuous-space sentence representations is by training neural machine translation (NMT) systems. The recent attention mechanism however removes the single point in the neural network from which the source sentence representation can be extracted. We propose several variations of the attentive NMT architecture bringing this meeting point back. Empirical evaluation suggests that the better the translation quality, the worse the learned sentence representations serve in a wide range of classification and similarity tasks. P18-1126 P18-1126.Notes.pdf - @@ -1775,13 +1775,13 @@ JingzhouLiu NanyunPeng GrahamNeubig - EduardHovy + EduardHovy 1403–1414 We introduce a novel architecture for dependency parsing: stack-pointer networks (StackPtr). Combining pointer networks (Vinyals et al., 2015) with an internal stack, the proposed model first reads and encodes the whole sentence, then builds the dependency tree top-down (from root-to-leaf) in a depth-first fashion. The stack tracks the status of the depth-first search and the pointer networks select one child for the word at the top of the stack at each step. The StackPtr parser benefits from the information of whole sentence and all previously derived subtree structures, and removes the left-to-right restriction in classical transition-based parsers. Yet the number of steps for building any (non-projective) parse tree is linear in the length of the sentence just as other transition-based parsers, yielding an efficient decoding algorithm with O(n^2) time complexity. We evaluate our model on 29 treebanks spanning 20 languages and different dependency annotation schemas, and achieve state-of-the-art performances on 21 of them P18-1130 P18-1130.Notes.zip - @@ -1800,11 +1800,11 @@ <fixed-case>LSTM</fixed-case>s Can Learn Syntax-Sensitive Dependencies Well, But Modeling Structure Makes Them Better AdhigunaKuncoro - ChrisDyer - JohnHale + ChrisDyer + JohnHale DaniYogatama StephenClark - PhilBlunsom + PhilBlunsom 1426–1436 Language exhibits hierarchical structure, but recent work using a subject-verb agreement diagnostic argued that state-of-the-art language models, LSTMs, fail to learn long-range syntax sensitive dependencies. Using the same diagnostic, we show that, in fact, LSTMs do succeed in learning such dependencies—provided they have enough capacity. We then explore whether models that have access to explicit syntactic information learn agreement more effectively, and how the way in which this structural information is incorporated into the model impacts performance. We find that the mere presence of syntactic information does not improve accuracy, but when model architecture is determined by syntax, number agreement is improved. Further, we find that the choice of how syntactic structure is built affects how well number agreement is learned: top-down construction outperforms left-corner and bottom-up variants in capturing non-local structural dependencies. P18-1132 @@ -1869,7 +1869,7 @@ YanyanLan JiafengGuo JunXu - XueqiCheng + XueqiCheng 1479–1488 Sequence to sequence (Seq2Seq) models have been widely used for response generation in the area of conversation. However, the requirements for different conversation scenarios are distinct. For example, customer service requires the generated responses to be specific and accurate, while chatbot prefers diverse responses so as to attract different users. The current Seq2Seq model fails to meet these diverse requirements, by using a general average likelihood as the optimization criteria. As a result, it usually generates safe and commonplace responses, such as ‘I don’t know’. In this paper, we propose two tailored optimization criteria for Seq2Seq to different conversation scenarios, i.e., the maximum generated likelihood for specific-requirement scenario, and the conditional value-at-risk for diverse-requirement scenario. Experimental results on the Ubuntu dialogue corpus (Ubuntu service scenario) and Chinese Weibo dataset (social chatbot scenario) show that our proposed models not only satisfies diverse requirements for different scenarios, but also yields better performances against traditional Seq2Seq models in terms of both metric-based and human evaluations. P18-1137 @@ -1922,8 +1922,8 @@ PhilippDufter MengjieZhao MartinSchmitt - AlexanderFraser - HinrichSchütze + AlexanderFraser + HinrichSchütze 1520–1530 We present a new method for estimating vector space representations of words: embedding learning by concept induction. We test this method on a highly parallel corpus and learn semantic representations of words in 1259 different languages in a single common space. An extensive experimental evaluation on crosslingual word similarity and sentiment analysis indicates that concept-based multilingual embedding learning performs better than previous approaches. P18-1141 @@ -1951,7 +1951,7 @@ GayatriBhat MonojitChoudhury SunayanaSitaram - SandipanDandapat + SandipanDandapat KalikaBali 1543–1553 Training language models for Code-mixed (CM) language is known to be a difficult problem because of lack of data compounded by the increased confusability due to the presence of more than one language. We present a computational technique for creation of grammatically valid artificial CM data based on the Equivalence Constraint Theory. We show that when training examples are sampled appropriately from this synthetic data and presented in certain order (aka training curriculum) along with monolingual and real CM data, it can significantly reduce the perplexity of an RNN-based language model. We also show that randomly generated CM data does not help in decreasing the perplexity of the LMs. @@ -1988,7 +1988,7 @@ Higher-order Relation Schema Induction using Tensor Factorization with Back-off and Aggregation MadhavNimishakavi ManishGupta - ParthaTalukdar + ParthaTalukdar 1575–1584 Relation Schema Induction (RSI) is the problem of identifying type signatures of arguments of relations from unlabeled text. Most of the previous work in this area have focused only on binary RSI, i.e., inducing only the subject and object type signatures per relation. However, in practice, many relations are high-order, i.e., they have more than two arguments and inducing type signatures of all arguments is necessary. For example, in the sports domain, inducing a schema win(WinningPlayer, OpponentPlayer, Tournament, Location) is more informative than inducing just win(WinningPlayer, OpponentPlayer). We refer to this problem as Higher-order Relation Schema Induction (HRSI). In this paper, we propose Tensor Factorization with Back-off and Aggregation (TFBA), a novel framework for the HRSI problem. To the best of our knowledge, this is the first attempt at inducing higher-order relation schemata from unlabeled text. Using the experimental analysis on three real world datasets we show how TFBA helps in dealing with sparsity and induce higher-order schemata. P18-1146 @@ -1998,8 +1998,8 @@ Discovering Implicit Knowledge with Unary Relations - MichaelGlass - AlfioGliozzo + MichaelGlass + AlfioGliozzo 1585–1594 State-of-the-art relation extraction approaches are only able to recognize relationships between mentions of entity arguments stated explicitly in the text and typically localized to the same sentence. However, the vast majority of relations are either implicit or not sententially localized. This is a major problem for Knowledge Base Population, severely limiting recall. In this paper we propose a new methodology to identify relations between two entities, consisting of detecting a very large number of unary relations, and using them to infer missing entities. We describe a deep learning architecture able to learn thousands of such relations very efficiently by using a common deep learning based representation. Our approach largely outperforms state of the art relation extraction technology on a newly introduced web scale knowledge base population benchmark, that we release to the research community. P18-1147 @@ -2023,7 +2023,7 @@ ShikharVashishth Shib SankarDasgupta Swayambhu NathRay - ParthaTalukdar + ParthaTalukdar 1605–1615 Document date is essential for many important tasks, such as document retrieval, summarization, event detection, etc. While existing approaches for these tasks assume accurate knowledge of the document date, this is not always available, especially for arbitrary documents from the Web. Document Dating is a challenging problem which requires inference over the temporal structure of the document. Prior document dating systems have largely relied on handcrafted features while ignoring such document-internal structures. In this paper, we propose NeuralDater, a Graph Convolutional Network (GCN) based document dating approach which jointly exploits syntactic and temporal graph structures of document in a principled way. To the best of our knowledge, this is the first application of deep learning for the problem of document dating. Through extensive experiments on real-world datasets, we find that NeuralDater significantly outperforms state-of-the-art baseline by 19% absolute (45% relative) accuracy points. P18-1149 @@ -2046,7 +2046,7 @@ <fixed-case>GTR</fixed-case>-<fixed-case>LSTM</fixed-case>: A Triple Encoder for Sentence Generation from <fixed-case>RDF</fixed-case> Data - Bayu DistiawanTrisedya + Bayu DistiawanTrisedya JianzhongQi RuiZhang WeiWang @@ -2089,7 +2089,7 @@ Learning to Generate Move-by-Move Commentary for Chess Games from Large-Scale Social Forum Data HarshJhamtani VarunGangal - EduardHovy + EduardHovy GrahamNeubig TaylorBerg-Kirkpatrick 1661–1671 @@ -2103,7 +2103,7 @@ From Credit Assignment to Entropy Regularization: Two New Algorithms for Neural Sequence Prediction ZihangDai QizheXie - EduardHovy + EduardHovy 1672–1682 In this work, we study the credit assignment problem in reward augmented maximum likelihood (RAML) learning, and establish a theoretical equivalence between the token-level counterpart of RAML and the entropy regularized reinforcement learning. Inspired by the connection, we propose two sequence prediction algorithms, one extending RAML with fine-grained credit assignment and the other improving Actor-Critic with a systematic entropy regularization. On two benchmark datasets, we show the proposed algorithms outperform RAML and Actor-Critic respectively, providing new alternatives to sequence prediction. P18-1155 @@ -2116,7 +2116,7 @@ <fixed-case>D</fixed-case>uo<fixed-case>RC</fixed-case>: Towards Complex Language Understanding with Paraphrased Reading Comprehension AmritaSaha RahulAralikatte - Mitesh M.Khapra + Mitesh M.Khapra KarthikSankaranarayanan 1683–1693 We propose DuoRC, a novel dataset for Reading Comprehension (RC) that motivates several new challenges for neural approaches in language understanding beyond those offered by existing RC datasets. DuoRC contains 186,089 unique question-answer pairs created from a collection of 7680 pairs of movie plots where each pair in the collection reflects two versions of the same movie - one from Wikipedia and the other from IMDb - written by two different authors. We asked crowdsourced workers to create questions from one version of the plot and a different set of workers to extract or synthesize answers from the other version. This unique characteristic of DuoRC where questions and answers are created from different versions of a document narrating the same underlying story, ensures by design, that there is very little lexical overlap between the questions created from one version and the segments containing the answer in the other version. Further, since the two versions have different levels of plot detail, narration style, vocabulary, etc., answering questions from the second version requires deeper language understanding and incorporating external background knowledge. Additionally, the narrative style of passages arising from movie plots (as opposed to typical descriptive passages in existing datasets) exhibits the need to perform complex reasoning over events across multiple sentences. Indeed, we observe that state-of-the-art neural RC models which have achieved near human performance on the SQuAD dataset, even when coupled with traditional NLP techniques to address the challenges presented in DuoRC exhibit very poor performance (F1 score of 37.42% on DuoRC v/s 86% on SQuAD dataset). This opens up several interesting research avenues wherein DuoRC could complement other RC datasets to explore novel neural approaches for studying language understanding. @@ -2219,10 +2219,10 @@ Attention Focusing for Neural Machine Translation by Bridging Source and Target Embeddings ShaohuiKuang - JunhuiLi - AntónioBranco + JunhuiLi + AntónioBranco WeihuaLuo - DeyiXiong + DeyiXiong 1767–1776 In neural machine translation, a source sequence of words is encoded into a vector from which a target sequence is generated in the decoding phase. Differently from statistical machine translation, the associations between source words and their possible target counterparts are not explicitly stored. Source and target words are at the two ends of a long information processing procedure, mediated by hidden states at both the source encoding and the target decoding phases. This makes it possible that a source word is incorrectly translated into a target word that is not any of its admissible equivalent counterparts in the target language. In this paper, we seek to somewhat shorten the distance between source and target words in that procedure, and thus strengthen their association, by means of a method we term bridging source and target word embeddings. We experiment with three strategies: (1) a source-side bridging model, where source word embeddings are moved one step closer to the output target sequence; (2) a target-side bridging model, which explores the more relevant source word embeddings for the prediction of the target sequence; and (3) a direct bridging model, which directly connects source and target word embeddings seeking to minimize errors in the translation of ones by the others. Experiments and analysis presented in this paper demonstrate that the proposed bridging models are able to significantly improve quality of both sentence translation, in general, and alignment and translation of individual source words with target words, in particular. P18-1164 @@ -2246,7 +2246,7 @@ Accelerating Neural Transformer via an Average Attention Network BiaoZhang - DeyiXiong + DeyiXiong JinsongSu 1789–1798 With parallelizable attention networks, the neural Transformer is very fast to train. However, due to the auto-regressive architecture and self-attention in the decoder, the decoding procedure becomes slow. To alleviate this issue, we propose an average attention network as an alternative to the self-attention network in the decoder of the neural Transformer. The average attention network consists of two layers, with an average layer that models dependencies on previous positions and a gating layer that is stacked over the average layer to enhance the expressiveness of the proposed attention network. We apply this network on the decoder part of the neural Transformer to replace the original target-side self-attention model. With masking tricks and dynamic programming, our model enables the neural Transformer to decode sentences over four times faster than its original version with almost no loss in training time and translation performance. We conduct a series of experiments on WMT17 translation tasks, where on 6 different language pairs, we obtain robust and consistent speed-ups in decoding. @@ -2341,13 +2341,13 @@ Backpropagating through Structured Argmax using a <fixed-case>SPIGOT</fixed-case> HaoPeng SamThomson - Noah A.Smith + Noah A.Smith 1863–1873 We introduce structured projection of intermediate gradients (SPIGOT), a new method for backpropagating through neural networks that include hard-decision structured predictions (e.g., parsing) in intermediate layers. SPIGOT requires no marginal inference, unlike structured attention networks and reinforcement learning-inspired solutions. Like so-called straight-through estimators, SPIGOT defines gradient-like quantities associated with intermediate nondifferentiable operations, allowing backpropagation before and after them; SPIGOT’s proxy aims to ensure that, after a parameter update, the intermediate structure will remain well-formed. We experiment on two structured NLP pipelines: syntactic-then-semantic dependency parsing, and semantic parsing followed by sentiment classification. We show that training with SPIGOT leads to a larger improvement on the downstream task than a modularly-trained pipeline, the straight-through estimator, and structured attention, reaching a new state of the art on semantic dependency parsing. P18-1173 P18-1173.Notes.pdf - @@ -2355,7 +2355,7 @@ Learning How to Actively Learn: A Deep Imitation Learning Approach MingLiu WrayBuntine - GholamrezaHaffari + GholamrezaHaffari 1874–1883 Heuristic-based active learning (AL) methods are limited when the data distribution of the underlying learning problems vary. We introduce a method that learns an AL “policy” using “imitation learning” (IL). Our IL-based approach makes use of an efficient and effective “algorithmic expert”, which provides the policy learner with good actions in the encountered AL situations. The AL strategy is then learned with a feedforward network, mapping situations to most informative query datapoints. We evaluate our method on two different tasks: text classification and named entity recognition. Experimental results show that our IL-based AL strategy is more effective than strong previous methods using heuristics and reinforcement learning. P18-1174 @@ -2398,7 +2398,7 @@ Harvesting Paragraph-level Question-Answer Pairs from <fixed-case>W</fixed-case>ikipedia XinyaDu - ClaireCardie + ClaireCardie 1907–1917 We study the task of generating from Wikipedia articles question-answer pairs that cover content beyond a single sentence. We propose a neural network approach that incorporates coreference knowledge via a novel gating mechanism. As compared to models that only take into account sentence-level information (Heilman and Smith, 2010; Du et al., 2017; Zhou et al., 2017), we find that the linguistic knowledge introduced by the coreference representation aids question generation significantly, producing models that outperform the current state-of-the-art. We apply our system (composed of an answer span extraction system and the passage-level QG system) to the 10,000 top ranking Wikipedia articles and create a corpus of over one million question-answer pairs. We provide qualitative analysis for the this large-scale generated corpus from Wikipedia. P18-1177 @@ -2428,14 +2428,14 @@ Language Generation via <fixed-case>DAG</fixed-case> Transduction YajieYe - WeiweiSun + WeiweiSun XiaojunWan 1928–1937 A DAG automaton is a formal device for manipulating graphs. By augmenting a DAG automaton with transduction rules, a DAG transducer has potential applications in fundamental NLP tasks. In this paper, we propose a novel DAG transducer to perform graph-to-program transformation. The target structure of our transducer is a program licensed by a declarative programming language rather than linguistic structures. By executing such a program, we can easily get a surface string. Our transducer is designed especially for natural language generation (NLG) from type-logical semantic graphs. Taking Elementary Dependency Structures, a format of English Resource Semantics, as input, our NLG system achieves a BLEU-4 score of 68.07. This remarkable result demonstrates the feasibility of applying a DAG transducer to resolve NLG, as well as the effectiveness of our design. P18-1179 P18-1179.Software.zip - @@ -2455,26 +2455,26 @@ Deep-speare: A joint neural model of poetic language, meter and rhyme Jey HanLau - TrevorCohn - TimothyBaldwin + TrevorCohn + TimothyBaldwin JulianBrooke AdamHammond 1948–1958 In this paper, we propose a joint architecture that captures language, rhyme and meter for sonnet modelling. We assess the quality of generated poems using crowd and expert judgements. The stress and rhyme models perform very well, as generated poems are largely indistinguishable from human-written poems. Expert evaluation, however, reveals that a vanilla language model captures meter implicitly, and that machine-generated poems still underperform in terms of readability and emotion. Our research shows the importance expert evaluation for poetry generation, and that future research should look beyond rhyme/meter and focus on poetic language. P18-1181 P18-1181.Notes.pdf - <fixed-case>N</fixed-case>eural<fixed-case>REG</fixed-case>: An end-to-end approach to referring expression generation - ThiagoCastro Ferreira + ThiagoCastro Ferreira DiegoMoussallem ÁkosKádár SanderWubben - EmielKrahmer + EmielKrahmer 1959–1969 Traditionally, Referring Expression Generation (REG) models first decide on the form and then on the content of references to discourse entities in text, typically relying on features such as salience and grammatical function. In this paper, we present a new approach (NeuralREG), relying on deep neural networks, which makes decisions about form and content in one go without explicit feature extraction. Using a delexicalized version of the WebNLG corpus, we show that the neural model substantially improves over two strong baselines. P18-1182 @@ -2486,13 +2486,13 @@ Stock Movement Prediction from Tweets and Historical Prices YumoXu - Shay B.Cohen + Shay B.Cohen 1970–1979 Stock movement prediction is a challenging problem: the market is highly stochastic, and we make temporally-dependent predictions from chaotic data. We treat these three complexities and present a novel deep generative model jointly exploiting text and price signals for this task. Unlike the case with discriminative or topic modeling, our model introduces recurrent, continuous latent variables for a better treatment of stochasticity, and uses neural variational inference to address the intractable posterior inference. We also provide a hybrid objective with temporal auxiliary to flexibly capture predictive dependencies. We demonstrate the state-of-the-art performance of our proposed model on a new stock movement prediction dataset which we collected. P18-1183 P18-1183.Notes.pdf - @@ -2500,7 +2500,7 @@ Rumor Detection on <fixed-case>T</fixed-case>witter with Tree-structured Recursive Neural Networks JingMa WeiGao - Kam-FaiWong + Kam-FaiWong 1980–1989 Automatic rumor detection is technically very challenging. In this work, we try to learn discriminative features from tweets content by following their non-sequential propagation structure and generate more powerful representations for identifying different type of rumors. We propose two recursive neural models based on a bottom-up and a top-down tree-structured neural networks for rumor representation learning and classification, which naturally conform to the propagation layout of tweets. Results on two public Twitter datasets demonstrate that our recursive neural models 1) achieve much better performance than state-of-the-art approaches; 2) demonstrate superior capacity on detecting rumors at very early stage. P18-1184 @@ -2513,7 +2513,7 @@ Visual Attention Model for Name Tagging in Multimodal Social Media DiLu LeonardoNeves - VitorCarvalho + VitorCarvalho NingZhang HengJi 1990–1999 @@ -2528,7 +2528,7 @@ Multimodal Named Entity Disambiguation for Noisy Social Media Posts SeungwhanMoon LeonardoNeves - VitorCarvalho + VitorCarvalho 2000–2008 We introduce the new Multimodal Named Entity Disambiguation (MNED) task for multimodal social media posts such as Snapchat or Instagram captions, which are composed of short captions with accompanying images. Social media posts bring significant challenges for disambiguation tasks because 1) ambiguity not only comes from polysemous entities, but also from inconsistent or incomplete notations, 2) very limited context is provided with surrounding words, and 3) there are many emerging entities often unseen during training. To this end, we build a new dataset called SnapCaptionsKB, a collection of Snapchat image captions submitted to public and crowd-sourced stories, with named entity mentions fully annotated and linked to entities in an external knowledge base. We then build a deep zeroshot multimodal network for MNED that 1) extracts contexts from both text and image, and 2) predicts correct entity in the knowledge graph embeddings space, allowing for zeroshot disambiguation of entities unseen in training set as well. The proposed model significantly outperforms the state-of-the-art text-only NED models, showing efficacy and potentials of the MNED task. P18-1186 @@ -2539,14 +2539,14 @@ Semi-supervised User Geolocation via Graph Convolutional Networks AfshinRahimi - TrevorCohn - TimothyBaldwin + TrevorCohn + TimothyBaldwin 2009–2019 Social media user geolocation is vital to many applications such as event detection. In this paper, we propose GCN, a multiview geolocation model based on Graph Convolutional Networks, that uses both text and network context. We compare GCN to the state-of-the-art, and to two baselines we propose, and show that our model achieves or is competitive with the state-of-the-art over three benchmark geolocation datasets when sufficient supervision is available. We also evaluate GCN under a minimal supervision scenario, and show it outperforms baselines. We find that highway network gates are essential for controlling the amount of useful neighbourhood expansion in GCN. P18-1187 P18-1187.Notes.pdf - @@ -2555,7 +2555,7 @@ ShashiNarayan RonaldCardenas NikosPapasarantopoulos - Shay B.Cohen + Shay B.Cohen MirellaLapata JiangshengYu YiChang @@ -2563,8 +2563,8 @@ Document modeling is essential to a variety of natural language understanding tasks. We propose to use external information to improve document modeling for problems that can be framed as sentence extraction. We develop a framework composed of a hierarchical document encoder and an attention-based extractor with attention over external information. We evaluate our model on extractive document summarization (where the external information is image captions and the title of the document) and answer selection (where the external information is a question). We show that our model consistently outperforms strong baselines, in terms of both informativeness and fluency (for CNN document summarization) and achieves state-of-the-art results for answer selection on WikiQA and NewsQA. P18-1188 P18-1188.Notes.pdf - @@ -2605,13 +2605,13 @@ NicholasFitzGerald JulianMichael LuhengHe - LukeZettlemoyer + LukeZettlemoyer 2051–2060 We present a new large-scale corpus of Question-Answer driven Semantic Role Labeling (QA-SRL) annotations, and the first high-quality QA-SRL parser. Our corpus, QA-SRL Bank 2.0, consists of over 250,000 question-answer pairs for over 64,000 sentences across 3 domains and was gathered with a new crowd-sourcing scheme that we show has high precision and good recall at modest cost. We also present neural models for two QA-SRL subtasks: detecting argument spans for a predicate and generating questions to label the semantic relationship. The best models achieve question accuracy of 82.6% and span-level accuracy of 77.6% (under human evaluation) on the full pipelined QA-SRL prediction task. They can also, as we show, be used to gather additional annotations at low cost. P18-1191 P18-1191.Notes.pdf - @@ -2661,14 +2661,14 @@ Token-level and sequence-level loss smoothing for <fixed-case>RNN</fixed-case> language models MahaElbayad - LaurentBesacier + LaurentBesacier JakobVerbeek 2094–2103 Despite the effectiveness of recurrent neural network language models, their maximum likelihood estimation suffers from two limitations. It treats all sentences that do not match the ground truth as equally poor, ignoring the structure of the output space. Second, it suffers from ’exposure bias’: during training tokens are predicted given ground-truth sequences, while at test time prediction is conditioned on generated output sequences. To overcome these limitations we build upon the recent reward augmented maximum likelihood approach that encourages the model to predict sentences that are close to the ground truth according to a given performance metric. We extend this approach to token-level loss smoothing, and propose improvements to the sequence-level smoothing approach. Our experiments on two different tasks, image captioning and machine translation, show that token-level and sequence-level loss smoothing are complementary, and significantly improve results. P18-1195 P18-1195.Notes.pdf - @@ -2699,16 +2699,16 @@ What you can cram into a single $&!#* vector: Probing sentence embeddings for linguistic properties AlexisConneau - GermanKruszewski + GermanKruszewski GuillaumeLample LoïcBarrault - MarcoBaroni + MarcoBaroni 2126–2136 Although much effort has recently been devoted to training high-quality sentence embeddings, we still have a poor understanding of what they are capturing. “Downstream” tasks, often based on sentence classification, are commonly used to evaluate the quality of sentence representations. The complexity of the tasks makes it however difficult to infer what kind of information is present in the representations. We introduce here 10 probing tasks designed to capture simple linguistic features of sentences, and we use them to study embeddings generated by three different encoders trained in eight distinct ways, uncovering intriguing properties of both encoders and training methods. P18-1198 P18-1198.Notes.pdf - @@ -2745,7 +2745,7 @@ KyunghyunCho IdoDagan SebastianRiedel - ClareVoss + ClareVoss 2160–2170 Most previous supervised event extraction methods have relied on features derived from manual annotations, and thus cannot be applied to new event types without extra annotation effort. We take a fresh look at event extraction and model it as a generic grounding problem: mapping each event mention to a specific type in a target event ontology. We design a transferable architecture of structural and compositional neural networks to jointly represent and map event mentions and types into a shared semantic space. Based on this new framework, we can select, for each event mention, the event type which is semantically closest in this space as its type. By leveraging manual annotations available for a small set of existing event types, our framework can be applied to new unseen event types without additional manual annotations. When tested on 23 unseen event types, our zero-shot framework, without manual annotations, achieved performance comparable to a supervised model trained from 3,000 sentences annotated with 500 event mentions. P18-1201 @@ -2772,7 +2772,7 @@ XiujunLi JianfengGao JingjingLiu - Kam-FaiWong + Kam-FaiWong 2182–2192 Training a task-completion dialogue agent via reinforcement learning (RL) is costly because it requires many interactions with real users. One common alternative is to use a user simulator. However, a user simulator usually lacks the language complexity of human interlocutors and the biases in its design may tend to degrade the agent. To address these issues, we present Deep Dyna-Q, which to our knowledge is the first deep RL framework that integrates planning for task-completion dialogue policy learning. We incorporate into the dialogue agent a model of the environment, referred to as the world model, to mimic real user response and generate simulated experience. During dialogue policy learning, the world model is constantly updated with real user experience to approach real user behavior, and in turn, the dialogue agent is optimized using both real experience and simulated experience. The effectiveness of our approach is demonstrated on a movie-ticket booking task in both simulated and human-in-the-loop settings. P18-1203 @@ -2816,7 +2816,7 @@ Young-BumKim DongchanKim AnjishnuKumar - RuhiSarikaya + RuhiSarikaya 2214–2224 In this paper, we explore the task of mapping spoken language utterances to one of thousands of natural language understanding domains in intelligent personal digital assistants (IPDAs). This scenario is observed in mainstream IPDAs in industry that allow third parties to develop thousands of new domains to augment built-in first party domains to rapidly increase domain coverage and overall IPDA capabilities. We propose a scalable neural model architecture with a shared encoder, a novel attention mechanism that incorporates personalization information and domain-specific classifiers that solves the problem efficiently. Our architecture is designed to efficiently accommodate incremental domain additions achieving two orders of magnitude speed up compared to full model retraining. We consider the practical constraints of real-time production systems, and design to minimize memory footprint and runtime latency. We demonstrate that incorporating personalization significantly improves domain classification accuracy in a setting with thousands of overlapping domains. P18-1206 @@ -2875,13 +2875,13 @@ HannahRohde AlexanderJohnson NathanSchneider - BonnieWebber + BonnieWebber 2257–2267 Theories of discourse coherence posit relations between discourse segments as a key feature of coherent text. Our prior work suggests that multiple discourse relations can be simultaneously operative between two segments for reasons not predicted by the literature. Here we test how this joint presence can lead participants to endorse seemingly divergent conjunctions (e.g., BUT and SO) to express the link they see between two segments. These apparent divergences are not symptomatic of participant naivety or bias, but arise reliably from the concurrent availability of multiple relations between segments – some available through explicit signals and some via inference. We believe that these new results can both inform future progress in theoretical work on discourse coherence and lead to higher levels of performance in discourse parsing. P18-1210 P18-1210.Notes.pdf - @@ -2932,7 +2932,7 @@ ChenliangLi WeiZhou FengJi - YuDuan + YuDuan HaiqingChen 2300–2310 In the era of big data, focused analysis for diverse topics with a short response time becomes an urgent demand. As a fundamental task, information filtering therefore becomes a critical necessity. In this paper, we propose a novel deep relevance model for zero-shot document filtering, named DAZER. DAZER estimates the relevance between a document and a category by taking a small set of seed words relevant to the category. With pre-trained word embeddings from a large external corpus, DAZER is devised to extract the relevance signals by modeling the hidden feature interactions in the word embedding space. The relevance signals are extracted through a gated convolutional process. The gate mechanism controls which convolution filters output the relevance signals in a category dependent manner. Experiments on two document collections of two different tasks (i.e., topic categorization and sentiment analysis) demonstrate that DAZER significantly outperforms the existing alternative solutions, including the state-of-the-art deep relevance ranking models. @@ -2974,7 +2974,7 @@ QianqianXie YanchunZhang HuaWang - XiuzhenZhang + XiuzhenZhang JiminHuang GangTian 2332–2340 @@ -3007,7 +3007,7 @@ KevinPatel SamarthAgrawal AbhijitMishra - PushpakBhattacharyya + PushpakBhattacharyya 2352–2362 Predicting a reader’s rating of text quality is a challenging task that involves estimating different subjective aspects of the text, like structure, clarity, etc. Such subjective aspects are better handled using cognitive information. One such source of cognitive information is gaze behaviour. In this paper, we show that gaze behaviour does indeed help in effectively predicting the rating of text quality. To do this, we first we model text quality as a function of three properties - organization, coherence and cohesion. Then, we demonstrate how capturing gaze behaviour helps in predicting each of these properties, and hence the overall quality, by reporting improvements obtained by adding gaze features to traditional textual features for score prediction. We also hypothesize that if a reader has fully understood the text, the corresponding gaze behaviour would give a better indication of the assigned rating, as opposed to partial understanding. Our experiments validate this hypothesis by showing greater agreement between the given rating and the predicted rating when the reader has a full understanding of the text. P18-1219 @@ -3019,7 +3019,7 @@ Multi-Input Attention for Unsupervised <fixed-case>OCR</fixed-case> Correction RuiDong - DavidSmith + DavidSmith 2363–2372 We propose a novel approach to OCR post-correction that exploits repeated texts in large corpora both as a source of noisy target outputs for unsupervised training and as a source of evidence when decoding. A sequence-to-sequence model with attention is applied for single-input correction, and a new decoder with multi-input attention averaging is developed to search for consensus among multiple sequences. We design two ways of training the correction model without human annotation, either training to match noisily observed textual variants or bootstrapping from a uniform error model. On two corpora of historical newspapers and books, we show that these unsupervised techniques cut the character and word error rates nearly in half on single inputs and, with the addition of multi-input decoding, can rival supervised methods. P18-1220 @@ -3029,7 +3029,7 @@ Building Language Models for Text with Named Entities - Md RizwanParvez + Md RizwanParvez SaikatChakraborty BaishakhiRay Kai-WeiChang @@ -3044,7 +3044,7 @@ hyperdoc2vec: Distributed Representations of Hypertext Documents JialongHan YanSong - Wayne XinZhao + Wayne XinZhao ShumingShi HaisongZhang 2384–2394 @@ -3072,7 +3072,7 @@ QianChen XiaodanZhu Zhen-HuaLing - DianaInkpen + DianaInkpen SiWei 2406–2417 Modeling natural language inference is a very challenging task. With the availability of large annotated data, it has recently become feasible to train complex models such as neural-network-based inference models, which have shown to achieve the state-of-the-art performance. Although there exist relatively large annotated data, can machines learn all knowledge needed to perform natural language inference (NLI) from these data? If not, how can neural-network-based NLI models benefit from external knowledge and how to build NLI models to leverage it? In this paper, we enrich the state-of-the-art neural natural language inference models with external knowledge. We demonstrate that the proposed models improve neural NLI models to achieve the state-of-the-art performance on the SNLI and MultiNLI datasets. @@ -3086,7 +3086,7 @@ DongyeopKang TusharKhot AshishSabharwal - EduardHovy + EduardHovy 2418–2428 We consider the problem of learning textual entailment models with limited supervision (5K-10K training examples), and present two complementary approaches for it. First, we propose knowledge-guided adversarial example generators for incorporating large lexical resources in entailment models via only a handful of rule templates. Second, to make the entailment model—a discriminator—more robust, we propose the first GAN-style approach for training it using a natural language example generator that iteratively adjusts to the discriminator’s weaknesses. We demonstrate effectiveness using two entailment datasets, where the proposed methods increase accuracy by 4.7% on SciTail and by 2.8% on a 1% sub-sample of SNLI. Notably, even a single hand-written rule, negate, improves the accuracy of negation examples in SNLI by 6.1%. P18-1225 @@ -3101,7 +3101,7 @@ JeongminByun SionBaek YongseokCho - AliceOh + AliceOh 2429–2438 Research on distributed word representations is focused on widely-used languages such as English. Although the same methods can be used for other languages, language-specific knowledge can enhance the accuracy and richness of word vector representations. In this paper, we look at improving distributed word representations for Korean using knowledge about the unique linguistic structure of Korean. Specifically, we decompose Korean words into the jamo-level, beyond the character-level, allowing a systematic use of subword information. To evaluate the vectors, we develop Korean test sets for word similarity and analogy and make them publicly available. The results show that our simple method outperforms word2vec and character-level Skip-Grams on semantic and syntactic similarity and analogy tasks and contributes positively toward downstream NLP tasks such as sentiment analysis. P18-1226 @@ -3156,7 +3156,7 @@ FuliLuo TianyuLiu QiaolinXia - BaobaoChang + BaobaoChang ZhifangSui 2473–2482 Word Sense Disambiguation (WSD) aims to identify the correct meaning of polysemous words in the particular context. Lexical resources like WordNet which are proved to be of great help for WSD in the knowledge-based methods. However, previous neural networks for WSD always rely on massive labeled data (context), ignoring lexical resources like glosses (sense definitions). In this paper, we integrate the context and glosses of the target word into a unified framework in order to make full use of both labeled data and lexical knowledge. Therefore, we propose GAS: a gloss-augmented WSD neural network which jointly encodes the context and glosses of the target word. GAS models the semantic relationship between the context and the gloss in an improved memory network framework, which breaks the barriers of the previous supervised methods and knowledge-based methods. We further extend the original gloss of word sense via its semantic relations in WordNet to enrich the gloss information. The experimental results show that our model outperforms the state-of-the-art systems on several English all-words WSD datasets. @@ -3169,7 +3169,7 @@ Bilingual Sentiment Embeddings: Joint Projection of Sentiment Across Languages JeremyBarnes RomanKlinger - SabineSchulte im Walde + SabineSchulte im Walde 2483–2493 Sentiment analysis in low-resource languages suffers from a lack of annotated corpora to estimate high-performing models. Machine translation and bilingual word embeddings provide some relief through cross-lingual sentiment approaches. However, they either require large amounts of parallel data or do not sufficiently capture sentiment information. We introduce Bilingual Sentiment Embeddings (BLSE), which jointly represent sentiment information in a source and target language. This model only requires a small bilingual lexicon, a source-language corpus annotated for sentiment, and monolingual word embeddings for each language. We perform experiments on three language combinations (Spanish, Catalan, Basque) for sentence-level cross-lingual sentiment classification and find that our model significantly outperforms state-of-the-art methods on four out of six experimental setups, as well as capturing complementary information to machine translation. Our analysis of the resulting embedding space provides evidence that it represents sentiment information in the resource-poor target language without any annotated data in that language. P18-1231 @@ -3195,7 +3195,7 @@ MinlongPeng QiZhang Yu-gangJiang - XuanjingHuang + XuanjingHuang 2505–2513 The task of adopting a model with good performance to a target domain that is different from the source domain used for training has received considerable attention in sentiment analysis. Most existing approaches mainly focus on learning representations that are domain-invariant in both the source and target domains. Few of them pay attention to domain-specific information, which should also be informative. In this work, we propose a method to simultaneously extract domain specific and invariant representations and train a classifier on each of the representation, respectively. And we introduce a few target domain labeled data for learning domain-specific information. To effectively utilize the target domain labeled data, we train the domain invariant representation based classifier with both the source and target domain labeled data and train the domain-specific representation based classifier with only the target domain labeled data. These two classifiers then boost each other in a co-training style. Extensive sentiment analysis experiments demonstrated that the proposed method could achieve better performance than state-of-the-art methods. P18-1233 @@ -3216,7 +3216,7 @@ A Helping Hand: Transfer Learning for Deep Sentiment Analysis - XinDong + XinDong Gerardde Melo 2524–2534 Deep convolutional neural networks excel at sentiment polarity classification, but tend to require substantial amounts of training data, which moreover differs quite significantly between domains. In this work, we present an approach to feed generic cues into the training process of such networks, leading to better generalization abilities given limited training data. We propose to induce sentiment embeddings via supervision on extrinsic data, which are then fed into the model via a dedicated memory-based component. We observe significant gains in effectiveness on a range of different datasets in seven different languages. @@ -3226,7 +3226,7 @@ Cold-Start Aware User and Product Attention for Sentiment Classification - Reinald KimAmplayo + Reinald KimAmplayo JihyeokKim SuaSung Seung-wonHwang @@ -3239,7 +3239,7 @@ Modeling Deliberative Argumentation Strategies on <fixed-case>W</fixed-case>ikipedia - KhalidAl-Khatib + KhalidAl-Khatib HenningWachsmuth KevinLang JakobHerpel @@ -3270,21 +3270,21 @@ DaphneIppolito BrendanCallahan RenoKriz - Derry TantiWijaya + Derry TantiWijaya ChrisCallison-Burch 2566–2576 We conduct the most comprehensive study to date into translating words via images. To facilitate research on the task, we introduce a large-scale multilingual corpus of images, each labeled with the word it represents. Past datasets have been limited to only a few high-resource languages and unrealistically easy translation settings. In contrast, we have collected by far the largest available dataset for this task, with images for approximately 10,000 words in each of 100 languages. We run experiments on a dozen high resource languages and 20 low resources languages, demonstrating the effect of word concreteness and part-of-speech on translation quality. %We find that while image features work best for concrete nouns, they are sometimes effective on other parts of speech. To improve image-based translation, we introduce a novel method of predicting word concreteness from images, which improves on a previous state-of-the-art unsupervised technique. This allows us to predict when image-based translation may be effective, enabling consistent improvements to a state-of-the-art text-based word translation system. Our code and the Massively Multilingual Image Dataset (MMID) are available at http://multilingual-images.org/. P18-1239 P18-1239.Poster.pdf - 10.18653/v1/P18-1239 P18-1239.Notes.pdf + 10.18653/v1/P18-1239 hewitt-etal-2018-learning On the Automatic Generation of Medical Imaging Reports BaoyuJing PengtaoXie - EricXing + EricXing 2577–2586 Medical imaging is widely used in clinical practice for diagnosis and treatment. Report-writing can be error-prone for unexperienced physicians, and time-consuming and tedious for experienced physicians. To address these issues, we study the automatic generation of medical imaging reports. This task presents several challenges. First, a complete report contains multiple heterogeneous forms of information, including findings and tags. Second, abnormal regions in medical images are difficult to identify. Third, the reports are typically long, containing multiple sentences. To cope with these challenges, we (1) build a multi-task learning framework which jointly performs the prediction of tags and the generation of paragraphs, (2) propose a co-attention mechanism to localize regions containing abnormalities and generate narrations for them, (3) develop a hierarchical LSTM model to generate long paragraphs. We demonstrate the effectiveness of the proposed methods on two publicly available dataset. P18-1240 @@ -3310,7 +3310,7 @@ Think Visually: Question Answering through Virtual Imagery AnkitGoyal - JianWang + JianWang JiaDeng 2598–2608 In this paper, we study the problem of geometric reasoning (a form of visual reasoning) in the context of question-answering. We introduce Dynamic Spatial Memory Network (DSMN), a new deep network architecture that specializes in answering questions that admit latent visual representations, and learns to generate and reason over such representations. Further, we propose two synthetic benchmarks, FloorPlanQA and ShapeIntersection, to evaluate the geometric reasoning capability of QA systems. Experimental results validate the effectiveness of our proposed DSMN for visual thinking tasks. @@ -3338,7 +3338,7 @@ TakaakiHori ShinjiWatanabe JonathanLe Roux - John R.Hershey + John R.Hershey 2620–2630 Recently, there has been growing interest in multi-speaker speech recognition, where the utterances of multiple speakers are recognized from their mixture. Promising techniques have been proposed for this task, but earlier works have required additional training data such as isolated source signals or senone alignments for effective learning. In this paper, we propose a new sequence-to-sequence framework to directly decode multiple label sequences from a single speech sequence by unifying source separation and speech recognition functions in an end-to-end manner. We further propose a new objective function to improve the contrast between the hidden vectors to avoid generating similar hypotheses. Experimental results show that the model is directly able to learn a mapping from a speech mixture to multiple label sequences, achieving 83.1% relative improvement compared to a model trained without the proposed objective. Interestingly, the results are comparable to those produced by previous end-to-end works featuring explicit separation and recognition modules. P18-1244 @@ -3351,7 +3351,7 @@ A Structured Variational Autoencoder for Contextual Morphological Inflection LawrenceWolf-Sonkin JasonNaradowsky - Sabrina J.Mielke + Sabrina J.Mielke RyanCotterell 2631–2641 Statistical morphological inflectors are typically trained on fully supervised, type-level data. One remaining open research question is the following: How can we effectively exploit raw, token-level data to improve their performance? To this end, we introduce a novel generative latent-variable model for the semi-supervised learning of inflection generation. To enable posterior inference over the latent variables, we derive an efficient variational inference procedure based on the wake-sleep algorithm. We experiment on 23 languages, using the Universal Dependencies corpora in a simulated low-resource setting, and find improvements of over 10% absolute accuracy in some cases. @@ -3381,7 +3381,7 @@ Neural Factor Graph Models for Cross-lingual Morphological Tagging ChaitanyaMalaviya - Matthew R.Gormley + Matthew R.Gormley GrahamNeubig 2653–2663 Morphological analysis involves predicting the syntactic traits of a word (e.g. POS: Noun, Case: Acc, Gender: Fem). Previous work in morphological tagging improves performance for low-resource languages (LRLs) through cross-lingual training with a high-resource language (HRL) from the same family, but is limited by the strict, often false, assumption that tag sets exactly overlap between the HRL and LRL. In this paper we propose a method for cross-lingual morphological tagging that aims to improve information sharing between languages by relaxing this assumption. The proposed model uses factorial conditional random fields with neural network potentials, making it possible to (1) utilize the expressive power of neural network representations to smooth over superficial differences in the surface forms, (2) model pairwise and transitive relationships between tags, and (3) accurately generate tag sets that are unseen or rare in the training data. Experiments on four languages from the Universal Dependencies Treebank demonstrate superior tagging accuracies over existing cross-lingual approaches. @@ -3418,7 +3418,7 @@ Pre- and In-Parsing Models for Neural Empty Category Detection YufeiChen YuanyuanZhao - WeiweiSun + WeiweiSun XiaojunWan 2687–2696 Motivated by the positive impact of empty category on syntactic parsing, we study neural models for pre- and in-parsing detection of empty category, which has not previously been investigated. We find several non-obvious facts: (a) BiLSTM can capture non-local contextual information which is essential for detecting empty categories, (b) even with a BiLSTM, syntactic information is still able to enhance the detection, and (c) automatic detection of empty categories improves parsing quality for overt words. Our neural ECD models outperform the prior state-of-the-art by significant margins. @@ -3472,8 +3472,8 @@ Finding syntax in human encephalography with beam search - JohnHale - ChrisDyer + JohnHale + ChrisDyer AdhigunaKuncoro JonathanBrennan 2727–2736 @@ -3486,13 +3486,13 @@ Learning to Ask Good Questions: Ranking Clarification Questions using Neural Expected Value of Perfect Information SudhaRao - HalDaumé III + HalDaumé III 2737–2746 Inquiry is fundamental to communication, and machines cannot effectively collaborate with humans unless they can ask questions. In this work, we build a neural network model for the task of ranking clarification questions. Our model is inspired by the idea of expected value of perfect information: a good question is one whose expected answer will be useful. We study this problem using data from StackExchange, a plentiful online resource in which people routinely ask clarifying questions to posts so that they can better offer assistance to the original poster. We create a dataset of clarification questions consisting of 77K posts paired with a clarification question (and answer) from three domains of StackExchange: askubuntu, unix and superuser. We evaluate our model on 500 samples of this dataset against expert human judgments and demonstrate significant improvements over controlled baselines. P18-1255 P18-1255.Notes.pdf - @@ -3501,7 +3501,7 @@ AndreCianflone YulanFeng JadKabbara - Jackie Chi KitCheung + Jackie Chi KitCheung 2747–2755 We introduce the novel task of predicting adverbial presupposition triggers, which is useful for natural language generation tasks such as summarization and dialogue systems. We introduce two new corpora, derived from the Penn Treebank and the Annotated English Gigaword dataset and investigate the use of a novel attention mechanism tailored to this task. Our attention mechanism augments a baseline recurrent neural network without the need for additional trainable parameters, minimizing the added computational cost of our mechanism. We demonstrate that this model statistically outperforms our baselines. P18-1256 @@ -3553,7 +3553,7 @@ Deep <fixed-case>RNN</fixed-case>s Encode Soft Hierarchical Syntax TerraBlevins OmerLevy - LukeZettlemoyer + LukeZettlemoyer 14–19 We present a set of experiments to demonstrate that deep recurrent neural networks (RNNs) learn internal representations that capture soft hierarchical notions of syntax from highly varied supervision. We consider four syntax tasks at different depths of the parse tree; for each word, we predict its part of speech as well as the first (parent), second (grandparent) and third level (great-grandparent) constituent labels that appear above it. These predictions are made from representations produced at different depths in networks that are pretrained with one of four objectives: dependency parsing, semantic role labeling, machine translation, or language modeling. In every case, we find a correspondence between network depth and syntactic depth, suggesting that a soft syntactic hierarchy emerges. This effect is robust across all conditions, indicating that the models encode significant amounts of syntax even in the absence of an explicit syntactic training supervision. P18-2003 @@ -3575,8 +3575,8 @@ Towards Robust and Privacy-preserving Text Representations YitongLi - TimothyBaldwin - TrevorCohn + TimothyBaldwin + TrevorCohn 25–30 Written text often provides sufficient clues to identify the author, their gender, age, and other important attributes. Consequently, the authorship of training and evaluation corpora can have unforeseen impacts, including differing model performance for different user groups, as well as privacy implications. In this paper, we propose an approach to explicitly obscure important author characteristics at training time, such that representations learned are invariant to these attributes. Evaluating on two tasks, we show that this leads to increased privacy in the learned representations, as well as more robust models to varying evaluation conditions, including out-of-domain corpora. P18-2005 @@ -3599,7 +3599,7 @@ Domain Adapted Word Embeddings for Improved Sentiment Classification - PrathushaK Sarma + PrathushaK Sarma YingyuLiang BillSethares 37–42 @@ -3612,11 +3612,11 @@ Active learning for deep semantic parsing - LongDuong + LongDuong HadiAfshar - DominiqueEstival + DominiqueEstival GlenPink - PhilipCohen + PhilipCohen MarkJohnson 43–48 Semantic parsing requires training data that is expensive and slow to collect. We apply active learning to both traditional and “overnight” data collection approaches. We show that it is possible to obtain good training hyperparameters from seed data which is only a small fraction of the full dataset. We show that uncertainty sampling based on least confidence score is competitive in traditional data collection but not applicable for overnight collection. We propose several active learning strategies for overnight data collection and show that different example selection strategies per domain perform best. @@ -3628,7 +3628,7 @@ Learning Thematic Similarity Metric from Article Sections Using Triplet Networks - LiatEin Dor + LiatEin Dor YosiMass AlonHalfon EladVenezian @@ -3647,8 +3647,8 @@ DmitryUstalov AlexanderPanchenko AndreyKutuzov - ChrisBiemann - Simone PaoloPonzetto + ChrisBiemann + Simone PaoloPonzetto 55–62 We use dependency triples automatically extracted from a Web-scale corpus to perform unsupervised semantic frame induction. We cast the frame induction problem as a triclustering problem that is a generalization of clustering for triadic data. Our replicable benchmarks demonstrate that the proposed graph-based approach, Triframes, shows state-of-the art results on this task on a FrameNet-derived dataset and performing on par with competitive methods on a verb class clustering task. P18-2010 @@ -3662,9 +3662,9 @@ SangameshwarPatil SachinPawar SwapnilHingmire - GirishPalshikar + GirishPalshikar VasudevaVarma - PushpakBhattacharyya + PushpakBhattacharyya 63–68 Identification of distinct and independent participants (entities of interest) in a narrative is an important task for many NLP applications. This task becomes challenging because these participants are often referred to using multiple aliases. In this paper, we propose an approach based on linguistic knowledge for identification of aliases mentioned using proper nouns, pronouns or noun phrases with common noun headword. We use Markov Logic Network (MLN) to encode the linguistic knowledge for identification of aliases. We evaluate on four diverse history narratives of varying complexity. Our approach performs better than the state-of-the-art approach as well as a combination of standard named entity recognition and coreference resolution techniques. P18-2011 @@ -3714,7 +3714,7 @@ Van-ThuyPhi JoanSantoso MasashiShimbo - YujiMatsumoto + YujiMatsumoto 89–95 This paper addresses the tasks of automatic seed selection for bootstrapping relation extraction, and noise reduction for distantly supervised relation extraction. We first point out that these tasks are related. Then, inspired by ranking relation instances and patterns computed by the HITS algorithm, and selecting cluster centroids using the K-means, LSA, or NMF method, we propose methods for selecting the initial seeds from an existing resource, or reducing the level of noise in the distantly labeled data. Experiments show that our proposed methods achieve a better performance than the baseline systems in both tasks. P18-2015 @@ -3723,9 +3723,9 @@ Automatic Extraction of Commonsense <fixed-case>L</fixed-case>ocated<fixed-case>N</fixed-case>ear Knowledge - Frank F.Xu - Bill YuchenLin - KennyZhu + Frank F.Xu + Bill YuchenLin + KennyZhu 96–101 LocatedNear relation is a kind of commonsense knowledge describing two physical objects that are typically found near each other in real life. In this paper, we study how to automatically extract such relationship through a sentence-level relation classifier and aggregating the scores of entity pairs from a large corpus. Also, we release two benchmark datasets for evaluation and future research. P18-2016 @@ -3736,10 +3736,10 @@ Neural Coreference Resolution with Deep Biaffine Attention by Joint Mention Detection and Mention Clustering RuiZhang - CíceroNogueira dos Santos + CíceroNogueira dos Santos MichihiroYasunaga BingXiang - DragomirRadev + DragomirRadev 102–107 Coreference resolution aims to identify in a text all mentions that refer to the same real world entity. The state-of-the-art end-to-end neural coreference model considers all text spans in a document as potential mentions and learns to link an antecedent for each possible mention. In this paper, we propose to improve the end-to-end coreference resolution system by (1) using a biaffine attention model to get antecedent scores for each possible mention, and (2) jointly optimizing the mention detection accuracy and mention clustering accuracy given the mention cluster labels. Our model achieves the state-of-the-art performance on the CoNLL-2012 shared task English test set. P18-2017 @@ -3761,7 +3761,7 @@ Some of Them Can be Guessed! Exploring the Effect of Linguistic Context in Predicting Quantifiers SandroPezzelle ShaneSteinert-Threlkeld - RaffaellaBernardi + RaffaellaBernardi JakubSzymanik 114–119 We study the role of linguistic context in predicting quantifiers (‘few’, ‘all’). We collect crowdsourced data from human participants and test various models in a local (single-sentence) and a global context (multi-sentence) condition. Models significantly out-perform humans in the former setting and are only slightly better in the latter. While human performance improves with more linguistic context (especially on proportional quantifiers), model performance suffers. Models are very effective in exploiting lexical and morpho-syntactic patterns; humans are better at genuinely understanding the meaning of the (global) context. @@ -3773,7 +3773,7 @@ A Named Entity Recognition Shootout for <fixed-case>G</fixed-case>erman MartinRiedl - SebastianPadó + SebastianPadó 120–125 We ask how to practically build a model for German named entity recognition (NER) that performs at the state of the art for both contemporary and historical texts, i.e., a big-data and a small-data scenario. The two best-performing model families are pitted against each other (linear-chain CRFs and BiLSTM) to observe the trade-off between expressiveness and data requirements. BiLSTM outperforms the CRF when large datasets are available and performs inferior for the smallest dataset. BiLSTMs profit substantially from transfer learning, which enables them to be trained on multiple corpora, resulting in a new state-of-the-art model for German NER on two contemporary German corpora (CoNLL 2003 and GermEval 2014) and two historic corpora. P18-2020 @@ -3783,11 +3783,11 @@ A dataset for identifying actionable feedback in collaborative software development - Benjamin S.Meyers + Benjamin S.Meyers NuthanMunaiah - EmilyPrud’hommeaux + EmilyPrud’hommeaux AndrewMeneely - CeciliaOvesdotter Alm + CeciliaOvesdotter Alm JosephineWolff Pradeep K.Murukannaiah 126–131 @@ -3799,9 +3799,9 @@ <fixed-case>SNAG</fixed-case>: Spoken Narratives and Gaze Dataset PreethiVaidyanathan - Emily T.Prud’hommeaux + Emily T.Prud’hommeaux Jeff B.Pelz - Cecilia O.Alm + Cecilia O.Alm 132–137 Humans rely on multiple sensory modalities when examining and reasoning over images. In this paper, we describe a new multimodal dataset that consists of gaze measurements and spoken descriptions collected in parallel during an image inspection task. The task was performed by multiple participants on 100 general-domain images showing everyday objects and activities. We demonstrate the usefulness of the dataset by applying an existing visual-linguistic data fusion framework in order to label important image regions with appropriate linguistic labels. P18-2022 @@ -3886,7 +3886,7 @@ A Language Model based Evaluator for Sentence Compression YangZhao ZhiyuanLuo - AkikoAizawa + AkikoAizawa 170–175 We herein present a language-model-based evaluator for deletion-based sentence compression and view this task as a series of deletion-and-evaluation operations using the evaluator. More specifically, the evaluator is a syntactic neural language model that is first built by learning the syntactic and structural collocation among words. Subsequently, a series of trial-and-error deletion operations are conducted on the source sentences via a reinforcement learning framework to obtain the best target compression. An empirical study shows that the proposed model can effectively generate more readable compression, comparable or superior to several strong baselines. Furthermore, we introduce a 200-sentence test set for a large-scale dataset, setting a new baseline for the future research. P18-2028 @@ -3908,9 +3908,9 @@ Content-based Popularity Prediction of Online Petitions Using a Deep Regression Model - ShivashankarSubramanian - TimothyBaldwin - TrevorCohn + ShivashankarSubramanian + TimothyBaldwin + TrevorCohn 182–188 Online petitions are a cost-effective way for citizens to collectively engage with policy-makers in a democracy. Predicting the popularity of a petition — commonly measured by its signature count — based on its textual content has utility for policymakers as well as those posting the petition. In this work, we model this task using CNN regression with an auxiliary ordinal regression objective. We demonstrate the effectiveness of our proposed approach using UK and US government petition datasets. P18-2030 @@ -3919,7 +3919,7 @@ Fighting Offensive Language on Social Media with Unsupervised Text Style Transfer - CiceroNogueira dos Santos + CiceroNogueira dos Santos IgorMelnyk InkitPadhi 189–194 @@ -3932,7 +3932,7 @@ Diachronic degradation of language models: Insights from social media KokilJaidka NiyatiChhaya - LyleUngar + LyleUngar 195–200 Natural languages change over time because they evolve to the needs of their users and the socio-technological environment. This study investigates the diachronic accuracy of pre-trained language models for downstream tasks in machine learning and user profiling. It asks the question: given that the social media platform and its users remain the same, how is language changing over time? How can these differences be used to track the changes in the affect around a particular topic? To our knowledge, this is the first study to show that it is possible to measure diachronic semantic drifts within social media and within the span of a few years. P18-2032 @@ -3947,9 +3947,9 @@ BaolinPeng HuaixiaoTou TingChen - XuanjingHuang - Kam-faiWong - XiangyingDai + XuanjingHuang + Kam-faiWong + XiangyingDai 201–207 In this paper, we make a move to build a dialogue system for automatic diagnosis. We first build a dataset collected from an online medical forum by extracting symptoms from both patients’ self-reports and conversational data between patients and doctors. Then we propose a task-oriented dialogue system framework to make diagnosis for patients automatically, which can converse with patients to collect additional symptoms beyond their self-reports. Experimental results on our dataset show that additional symptoms extracted from conversation can greatly improve the accuracy for disease identification and our dialogue system is able to collect these symptoms automatically and make a better diagnosis. P18-2033 @@ -3965,7 +3965,7 @@ WeiZhou JunHuang HaiqingChen - BruceCroft + BruceCroft WeiLin 208–213 Building multi-turn information-seeking conversation systems is an important and challenging research topic. Although several advanced neural text matching models have been proposed for this task, they are generally not efficient for industrial applications. Furthermore, they rely on a large amount of labeled data, which may not be available in real-world applications. To alleviate these problems, we study transfer learning for multi-turn information seeking conversations in this paper. We first propose an efficient and effective multi-turn conversation model based on convolutional neural networks. After that, we extend our model to adapt the knowledge learned from a resource-rich domain to enhance the performance. Finally, we deployed our model in an industrial chatbot called AliMe Assist and observed a significant improvement over the existing online model. @@ -3976,8 +3976,8 @@ A Multi-task Approach to Learning Multilingual Representations KaranSingla - DoganCan - ShrikanthNarayanan + DoganCan + ShrikanthNarayanan 214–220 We present a novel multi-task modeling approach to learning multilingual distributed representations of text. Our system learns word and sentence embeddings jointly by training a multilingual skip-gram model together with a cross-lingual sentence similarity model. Our architecture can transparently use both monolingual and sentence aligned bilingual corpora to learn multilingual embeddings, thus covering a vocabulary significantly larger than the vocabulary of the bilingual corpora alone. Our model shows competitive performance in a standard cross-lingual document classification task. We also show the effectiveness of our method in a limited resource scenario. P18-2035 @@ -3987,7 +3987,7 @@ Characterizing Departures from Linearity in Word Translation - NdapaNakashole + NdapaNakashole RaphaelFlauger 221–227 We investigate the behavior of maps learned by machine translation methods. The maps translate words by projecting between word embedding spaces of different languages. We locally approximate these maps using linear maps, and find that they vary across the word embedding space. This demonstrates that the underlying maps are non-linear. Importantly, we show that the locally linear maps vary by an amount that is tightly correlated with the distance between the neighborhoods on which they are trained. Our results can be used to test non-linear methods, and to drive the design of more accurate maps for word translation. @@ -4083,7 +4083,7 @@ <fixed-case>CNN</fixed-case> for Text-Based Multiple Choice Question Answering AkshayChaturvedi - OnkarPandit + OnkarPandit UtpalGarain 272–277 The task of Question Answering is at the very core of machine comprehension. In this paper, we propose a Convolutional Neural Network (CNN) model for text-based multiple choice question answering where questions are based on a particular article. Given an article and a multiple choice question, our model assigns a score to each question-option tuple and chooses the final option accordingly. We test our model on Textbook Question Answering (TQA) and SciQ dataset. Our model outperforms several LSTM-based baseline models on the two datasets. @@ -4096,8 +4096,8 @@ Narrative Modeling with Memory Chains and Semantic Supervision FeiLiu - TrevorCohn - TimothyBaldwin + TrevorCohn + TimothyBaldwin 278–284 Story comprehension requires a deep semantic understanding of the narrative, making it a challenging task. Inspired by previous studies on ROC Story Cloze Test, we propose a novel method, tracking various semantic aspects with external neural memory chains while encouraging each to focus on a particular semantic aspect. Evaluated on the task of story ending prediction, our model demonstrates superior performance to a collection of competitive baselines, setting a new state of the art. P18-2045 @@ -4136,7 +4136,7 @@ Dynamic Sentence Sampling for Efficient Training of Neural Machine Translation RuiWang MasaoUtiyama - EiichiroSumita + EiichiroSumita 298–304 Traditional Neural machine translation (NMT) involves a fixed training procedure where each sentence is sampled once during each epoch. In reality, some sentences are well-learned during the initial few epochs; however, using this approach, the well-learned sentences would continue to be trained along with those sentences that were not well learned for 10-30 epochs, which results in a wastage of time. Here, we propose an efficient method to dynamically sample the sentences in order to accelerate the NMT training. In this approach, a weight is assigned to each sentence based on the measured difference between the training costs of two iterations. Further, in each epoch, a certain percentage of sentences are dynamically sampled according to their weights. Empirical results based on the NIST Chinese-to-English and the WMT English-to-German tasks show that the proposed method can significantly accelerate the NMT training and improve the NMT performance. P18-2048 @@ -4170,7 +4170,7 @@ Multi-representation ensembles and delayed <fixed-case>SGD</fixed-case> updates improve syntax-based <fixed-case>NMT</fixed-case> DanielleSaunders FelixStahlberg - Adriàde Gispert + Adriàde Gispert BillByrne 319–325 We explore strategies for incorporating target syntax into Neural Machine Translation. We specifically focus on syntax in ensembles containing multiple sentence representations. We formulate beam search over such ensembles using WFSTs, and describe a delayed SGD update training procedure that is especially effective for long representations like linearized syntax. Our approach gives state-of-the-art performance on a difficult Japanese-English task. @@ -4255,13 +4255,13 @@ LuhengHe KentonLee OmerLevy - LukeZettlemoyer + LukeZettlemoyer 364–369 Recent BIO-tagging-based neural semantic role labeling models are very high performing, but assume gold predicates as part of the input and cannot incorporate span-level features. We propose an end-to-end approach for jointly predicting all predicates, arguments spans, and the relations between them. The model makes independent decisions about what relationship, if any, holds between every possible word-span pair, and learns contextualized span representations that provide rich, shared input features for each decision. Experiments demonstrate that this approach sets a new state of the art on PropBank SRL without gold predicates. P18-2058 P18-2058.Notes.pdf - @@ -4269,13 +4269,13 @@ Sparse and Constrained Attention for Neural Machine Translation ChaitanyaMalaviya PedroFerreira - André F. T.Martins + André F. T.Martins 370–376 In neural machine translation, words are sometimes dropped from the source or generated repeatedly in the translation. We explore novel strategies to address the coverage problem that change only the attention transformation. Our approach allocates fertilities to source words, used to bound the attention each word can receive. We experiment with various sparse and constrained attention transformations and propose a new one, constrained sparsemax, shown to be differentiable and sparse. Empirical evaluation is provided in three languages pairs. P18-2059 P18-2059.Notes.pdf - @@ -4285,7 +4285,7 @@ DeruiZhu TamerAlkhouli ZixuanGan - HermannNey + HermannNey 377–382 Attention-based neural machine translation (NMT) models selectively focus on specific source positions to produce a translation, which brings significant improvements over pure encoder-decoder sequence-to-sequence models. This work investigates NMT while replacing the attention component. We study a neural hidden Markov model (HMM) consisting of neural network-based alignment and lexicon models, which are trained jointly using the forward-backward algorithm. We show that the attention component can be effectively replaced by the neural network alignment model and the neural HMM approach is able to provide comparable performance with the state-of-the-art attention-based models on the WMT 2017 German↔English and Chinese→English translation tasks. P18-2060 @@ -4300,7 +4300,7 @@ NikolaLjubešić IanMatroos MalvinaNissim - BarbaraPlank + BarbaraPlank 383–389 Gender prediction has typically focused on lexical and social network features, yielding good performance, but making systems highly language-, topic-, and platform dependent. Cross-lingual embeddings circumvent some of these limitations, but capture gender-specific style less. We propose an alternative: bleaching text, i.e., transforming lexical strings into more abstract features. This study provides evidence that such features allow for better transfer across languages. Moreover, we present a first study on the ability of humans to perform cross-lingual gender prediction. We find that human predictive power proves similar to that of our bleached models, and both perform better than lexical models. P18-2061 @@ -4324,8 +4324,8 @@ Neural Cross-Lingual Coreference Resolution And Its Application To Entity Linking GourabKundu - AviSil - RaduFlorian + AviSil + RaduFlorian WaelHamza 395–400 We propose an entity-centric neural crosslingual coreference model that builds on multi-lingual embeddings and language independent features. We perform both intrinsic and extrinsic evaluations of our model. In the intrinsic evaluation, we show that our model, when trained on English and tested on Chinese and Spanish, achieves competitive results to the models trained directly on Chinese and Spanish respectively. In the extrinsic evaluation, we show that our English model helps achieve superior entity linking accuracy on Chinese and Spanish test sets than the top 2015 TAC system without using any annotated data from Chinese or Spanish. @@ -4338,7 +4338,7 @@ Judicious Selection of Training Data in Assisting Language for Multilingual Neural <fixed-case>NER</fixed-case> RudraMurthy AnoopKunchukuttan - PushpakBhattacharyya + PushpakBhattacharyya 401–406 Multilingual learning for Neural Named Entity Recognition (NNER) involves jointly training a neural network for multiple languages. Typically, the goal is improving the NER performance of one of the languages (the primary language) using the other assisting languages. We show that the divergence in the tag distributions of the common named entities between the primary and assisting languages can reduce the effectiveness of multilingual learning. To alleviate this problem, we propose a metric based on symmetric KL divergence to filter out the highly divergent training instances in the assisting language. We empirically show that our data selection strategy improves NER performance in many languages, including those with very limited training data. P18-2064 @@ -4364,7 +4364,7 @@ YueZhao XiaolongJin YuanzhuoWang - XueqiCheng + XueqiCheng 414–419 Document-level information is very important for event detection even at sentence level. In this paper, we propose a novel Document Embedding Enhanced Bi-RNN model, called DEEB-RNN, to detect events in sentences. This model first learns event detection oriented embeddings of documents through a hierarchical and supervised attention based RNN, which pays word-level attention to event triggers and sentence-level attention to those sentences containing events. It then uses the learned document embedding to enhance another bidirectional RNN model to identify event triggers and their types in sentences. Through experiments on the ACE-2005 dataset, we demonstrate the effectiveness and merits of the proposed DEEB-RNN model via comparison with state-of-the-art methods. P18-2066 @@ -4402,7 +4402,7 @@ Large-Scale Multi-Domain Belief Tracking with Knowledge Sharing OsmanRamadan PawełBudzianowski - MilicaGašić + MilicaGašić 432–437 Robust dialogue belief tracking is a key component in maintaining good quality dialogue systems. The tasks that dialogue systems are trying to solve are becoming increasingly complex, requiring scalability to multi-domain, semantically rich dialogues. However, most current approaches have difficulty scaling up with domains because of the dependency of the model parameters on the dialogue ontology. In this paper, a novel approach is introduced that fully utilizes semantic similarity between dialogue utterances and the ontology terms, allowing the information to be shared across domains. The evaluation is performed on a recently collected multi-domain dialogues dataset, one order of magnitude larger than currently available corpora. Our model demonstrates great capability in handling multi-domain dialogues, simultaneously outperforming existing state-of-the-art models in single-domain dialogue tracking tasks. P18-2069 @@ -4444,13 +4444,13 @@ MarkJohnson PeterAnderson MarkDras - MarkSteedman + MarkSteedman 450–455 Because obtaining training data is often the most difficult part of an NLP or ML project, we develop methods for predicting how much data is required to achieve a desired test accuracy by extrapolating results from models trained on a small pilot training dataset. We model how accuracy varies as a function of training size on subsets of the pilot data, and use that model to predict how much training data would be required to achieve the desired accuracy. We introduce a new performance extrapolation task to evaluate how well different extrapolations predict accuracy on larger training sets. We show that details of hyperparameter optimisation and the extrapolation models can have dramatic effects in a document classification task. We believe this is an important first step in developing methods for estimating the resources required to meet specific engineering performance targets. P18-2072 P18-2072.Notes.pdf - @@ -4471,13 +4471,13 @@ Do Neural Network Cross-Modal Mappings Really Bridge Modalities? GuillemCollell - Marie-FrancineMoens + Marie-FrancineMoens 462–468 Feed-forward networks are widely used in cross-modal applications to bridge modalities by mapping distributed vectors of one modality to the other, or to a shared space. The predicted vectors are then used to perform e.g., retrieval or labeling. Thus, the success of the whole system relies on the ability of the mapping to make the neighborhood structure (i.e., the pairwise similarities) of the predicted vectors akin to that of the target vectors. However, whether this is achieved has not been investigated yet. Here, we propose a new similarity measure and two ad hoc experiments to shed light on this issue. In three cross-modal benchmarks we learn a large number of language-to-vision and vision-to-language neural network mappings (up to five layers) using a rich diversity of image and text features and loss functions. Our results reveal that, surprisingly, the neighborhood structure of the predicted vectors consistently resembles more that of the input vectors than that of the target vectors. In a second experiment, we further show that untrained nets do not significantly disrupt the neighborhood (i.e., semantic) structure of the input vectors. P18-2074 P18-2074.Notes.pdf - @@ -4509,7 +4509,7 @@ Simpler but More Accurate Semantic Dependency Parsing TimothyDozat - Christopher D.Manning + Christopher D.Manning 484–490 While syntactic dependency annotations concentrate on the surface or functional structure of a sentence, semantic dependency annotations aim to capture between-word relationships that are more closely related to the meaning of a sentence, using graph-structured representations. We extend the LSTM-based syntactic parser of Dozat and Manning (2017) to train on and generate these graph structures. The resulting system on its own achieves state-of-the-art performance, beating the previous, substantially more complex state-of-the-art system by 0.6% labeled F1. Adding linguistically richer input representations pushes the margin even higher, allowing us to beat it by 1.9% labeled F1. P18-2077 @@ -4521,7 +4521,7 @@ Simplified Abugidas ChenchenDing MasaoUtiyama - EiichiroSumita + EiichiroSumita 491–495 An abugida is a writing system where the consonant letters represent syllables with a default vowel and other vowels are denoted by diacritics. We investigate the feasibility of recovering the original text written in an abugida after omitting subordinate diacritics and merging consonant letters with similar phonetic values. This is crucial for developing more efficient input methods by reducing the complexity in abugidas. Four abugidas in the southern Brahmic family, i.e., Thai, Burmese, Khmer, and Lao, were studied using a newswire 20,000-sentence dataset. We compared the recovery performance of a support vector machine and an LSTM-based recurrent neural network, finding that the abugida graphemes could be recovered with 94% - 97% accuracy at the top-1 level and 98% - 99% at the top-4 level, even after omitting most diacritics (10 - 30 types) and merging the remaining 30 - 50 characters into 21 graphemes. P18-2078 @@ -4546,7 +4546,7 @@ Automated essay scoring with string kernels and word embeddings MădălinaCozma - AndreiButnaru + AndreiButnaru Radu TudorIonescu 503–509 In this work, we present an approach based on combining string kernels and word embeddings for automatic essay scoring. String kernels capture the similarity among strings based on counting common character n-grams, which are a low-level yet powerful type of feature, demonstrating state-of-the-art results in various text classification tasks such as Arabic dialect identification or native language identification. To our best knowledge, we are the first to apply string kernels to automatically score essays. We are also the first to combine them with a high-level semantic feature representation, namely the bag-of-super-word-embeddings. We report the best performance on the Automated Student Assessment Prize data set, in both in-domain and cross-domain settings, surpassing recent state-of-the-art deep learning approaches. @@ -4615,7 +4615,7 @@ End-Task Oriented Textual Entailment via Deep Explorations of Inter-Sentence Interactions WenpengYin DanRoth - HinrichSchütze + HinrichSchütze 540–545 This work deals with SciTail, a natural entailment challenge derived from a multi-choice question answering problem. The premises and hypotheses in SciTail were generated with no awareness of each other, and did not specifically aim at the entailment task. This makes it more challenging than other entailment data sets and more directly useful to the end-task – question answering. We propose DEISTE (deep explorations of inter-sentence interactions for textual entailment) for this entailment task. Given word-to-word interactions between the premise-hypothesis pair (P, H), DEISTE consists of: (i) a parameter-dynamic convolution to make important words in P and H play a dominant role in learnt representations; and (ii) a position-aware attentive convolution to encode the representation and position information of the aligned word pairs. Experiments show that DEISTE gets ≈5% improvement over prior state of the art and that the pretrained DEISTE on SciTail generalizes well on RTE-5. P18-2086 @@ -4664,7 +4664,7 @@ <fixed-case>GNEG</fixed-case>: Graph-Based Negative Sampling for word2vec ZhengZhang - PierreZweigenbaum + PierreZweigenbaum 566–571 Negative sampling is an important component in word2vec for distributed word representation learning. We hypothesize that taking into account global, corpus-level information and generating a different noise distribution for each target word better satisfies the requirements of negative examples for each training word than the original frequency-based distribution. In this purpose we pre-compute word co-occurrence statistics from the corpus and apply to it network algorithms such as random walk. We test this hypothesis through a set of experiments whose results show that our approach boosts the word analogy task by about 5% and improves the performance on word similarity tasks by about 1% compared to the skip-gram negative sampling baseline. P18-2090 @@ -4716,7 +4716,7 @@ HuXu BingLiu LeiShu - Philip S.Yu + Philip S.Yu 592–598 One key task of fine-grained sentiment analysis of product reviews is to extract product aspects or features that users have expressed opinions on. This paper focuses on supervised aspect extraction using deep learning. Unlike other highly sophisticated supervised deep learning models, this paper proposes a novel and yet simple CNN model employing two types of pre-trained embeddings for aspect extraction: general-purpose embeddings and domain-specific embeddings. Without using any additional supervision, this model achieves surprisingly good results, outperforming state-of-the-art sophisticated existing methods. To our knowledge, this paper is the first to report such double embeddings based CNN model for aspect extraction and achieve very good results. P18-2094 @@ -4727,7 +4727,7 @@ Will it Blend? Blending Weak and Strong Labeled Data in a Neural Network for Argumentation Mining EyalShnarch - CarlosAlzate + CarlosAlzate LenaDankin MartinGleize YufangHou @@ -4851,7 +4851,7 @@ Adaptive Knowledge Sharing in Multi-Task Learning: Improving Low-Resource Neural Machine Translation PooryaZaremoodi WrayBuntine - GholamrezaHaffari + GholamrezaHaffari 656–661 Neural Machine Translation (NMT) is notorious for its need for large amounts of bilingual data. An effective approach to compensate for this requirement is Multi-Task Learning (MTL) to leverage different linguistic resources as a source of inductive bias. Current MTL architectures are based on the Seq2Seq transduction, and (partially) share different components of the models among the tasks. However, this MTL approach often suffers from task interference and is not able to fully capture commonalities among subsets of tasks. We address this issue by extending the recurrent units with multiple “blocks” along with a trainable “routing network”. The routing network enables adaptive collaboration by dynamic sharing of blocks conditioned on the task at hand, input, and model state. Empirical evaluation of two low-resource translation tasks, English to Vietnamese and Farsi, show +1 BLEU score improvements compared to strong baselines. P18-2104 @@ -4879,7 +4879,7 @@ Polyglot Semantic Role Labeling PhoebeMulcaire SwabhaSwayamdipta - Noah A.Smith + Noah A.Smith 667–672 Previous approaches to multilingual semantic dependency parsing treat languages independently, without exploiting the similarities between semantic structures across languages. We experiment with a new approach where we combine resources from different languages in the CoNLL 2009 shared task to build a single polyglot semantic dependency parser. Notwithstanding the absence of parallel data, and the dissimilarity in annotations between languages, our approach results in improvement in parsing performance on several languages over a monolingual baseline. Analysis of the polyglot models’ performance provides a new understanding of the similarities and differences between languages in the shared task. P18-2106 @@ -4931,7 +4931,7 @@ Examining Temporality in Document Classification XiaoleiHuang - Michael J.Paul + Michael J.Paul 694–699 Many corpora span broad periods of time. Language processing models trained during one time period may not work well in future time periods, and the best model may depend on specific times of year (e.g., people might describe hotels differently in reviews during the winter versus the summer). This study investigates how document classifiers trained on documents from certain time intervals perform on documents from other time intervals, considering both seasonal intervals (intervals that repeat across years, e.g., winter) and non-seasonal intervals (e.g., specific years). We show experimentally that classification performance varies over time, and that performance can be improved by using a standard domain adaptation approach to adjust for changes in time. P18-2110 @@ -4943,7 +4943,7 @@ Personalized Language Model for Query Auto-Completion AaronJaech - MariOstendorf + MariOstendorf 700–705 Query auto-completion is a search engine feature whereby the system suggests completed queries as the user types. Recently, the use of a recurrent neural network language model was suggested as a method of generating query completions. We show how an adaptable language model can be used to generate personalized completions and how the model can use online updating to make predictions for users not seen during training. The personalized predictions are significantly better than a baseline that uses no user information. P18-2111 @@ -4966,14 +4966,14 @@ Learning Simplifications for Specific Target Audiences - CarolinaScarton + CarolinaScarton LuciaSpecia 712–718 Text simplification (TS) is a monolingual text-to-text transformation task where an original (complex) text is transformed into a target (simpler) text. Most recent work is based on sequence-to-sequence neural models similar to those used for machine translation (MT). Different from MT, TS data comprises more elaborate transformations, such as sentence splitting. It can also contain multiple simplifications of the same original text targeting different audiences, such as school grade levels. We explore these two features of TS to build models tailored for specific grade levels. Our approach uses a standard sequence-to-sequence architecture where the original sequence is annotated with information about the target audience and/or the (predicted) type of simplification operation. We show that it outperforms state-of-the-art TS approaches (up to 3 and 12 BLEU and SARI points, respectively), including when training data for the specific complex-simple combination of grade levels is not available, i.e. zero-shot learning. P18-2113 P18-2113.Notes.pdf - @@ -5008,7 +5008,7 @@ OmerLevy KentonLee NicholasFitzGerald - LukeZettlemoyer + LukeZettlemoyer 732–739 LSTMs were introduced to combat vanishing gradients in simple RNNs by augmenting them with gated additive recurrent connections. We present an alternative view to explain the success of LSTMs: the gates themselves are versatile recurrent models that provide more representational power than previously appreciated. We do this by decoupling the LSTM’s gates from the embedded simple RNN, producing a new class of RNNs where the recurrence computes an element-wise weighted sum of context-independent functions of the input. Ablations on a range of problems demonstrate that the gating mechanism alone performs as well as an LSTM in most settings, strongly suggesting that the gates are doing much more in practice than just alleviating vanishing gradients. P18-2116 @@ -5047,7 +5047,7 @@ Tackling the Story Ending Biases in The Story Cloze Test RishiSharma - JamesAllen + JamesAllen OmidBakhshandeh NasrinMostafazadeh 752–757 @@ -5074,15 +5074,15 @@ Pretraining Sentiment Classifiers with Unlabeled Dialog Data - ToruShimizu + ToruShimizu NobuyukiShimizu HayatoKobayashi 764–770 The huge cost of creating labeled training data is a common problem for supervised learning tasks such as sentiment classification. Recent studies showed that pretraining with unlabeled data via a language model can improve the performance of classification models. In this paper, we take the concept a step further by using a conditional language model, instead of a language model. Specifically, we address a sentiment classification task for a tweet analysis service as a case study and propose a pretraining strategy with unlabeled dialog data (tweet-reply pairs) via an encoder-decoder model. Experimental results show that our strategy can improve the performance of sentiment classifiers and outperform several state-of-the-art strategies including language model pretraining. P18-2121 P18-2121.Notes.pdf - @@ -5102,7 +5102,7 @@ Cross-Target Stance Classification with Self-Attention Networks ChangXu - CécileParis + CécileParis SuryaNepal RossSparks 778–783 @@ -5147,7 +5147,7 @@ JeniyaTabassum RobVoigt WanxiangChe - Marie-Catherinede Marneffe + Marie-Catherinede Marneffe MalvinaNissim Association for Computational Linguistics
Melbourne, Australia
@@ -5212,7 +5212,7 @@
Recognizing Complex Entity Mentions: A Review and Future Directions - XiangDai + XiangDai 37–44 Standard named entity recognizers can effectively recognize entity mentions that consist of contiguous tokens and do not overlap with each other. However, in practice, there are many domains, such as the biomedical domain, in which there are nested, overlapping, and discontinuous entity mentions. These complex mentions cannot be directly recognized by conventional sequence tagging models because they may break the assumptions based on which sequence tagging techniques are built. We review the existing methods which are revised to tackle complex entity mentions and categorize them as tokenlevel and sentence-level approaches. We then identify the research gap, and discuss some directions that we are exploring. P18-3006 @@ -5332,7 +5332,7 @@ Exploring Chunk Based Templates for Generating a subset of <fixed-case>E</fixed-case>nglish Text NikhileshBhatnagar - ManishShrivastava + ManishShrivastava RadhikaMamidi 120–126 Natural Language Generation (NLG) is a research task which addresses the automatic generation of natural language text representative of an input non-linguistic collection of knowledge. In this paper, we address the task of the generation of grammatical sentences in an isolated context given a partial bag-of-words which the generated sentence must contain. We view the task as a search problem (a problem of choice) involving combinations of smaller chunk based templates extracted from a training corpus to construct a complete sentence. To achieve that, we propose a fitness function which we use in conjunction with an evolutionary algorithm as the search procedure to arrive at a potentially grammatical sentence (modeled by the fitness score) which satisfies the input constraints. @@ -5353,7 +5353,7 @@ Alignment Analysis of Sequential Segmentation of Lexicons to Improve Automatic Cognate Detection - PranavA + PranavA 134–140 Ranking functions in information retrieval are often used in search engines to extract the relevant answers to the query. This paper makes use of this notion of information retrieval and applies onto the problem domain of cognate detection. The main contributions of this paper are: (1) positional tokenization, which incorporates the sequential notion; (2) graphical error modelling, which calculates the morphological shifts. The current research work only distinguishes whether a pair of words are cognates or not. However, we also study if we could predict a possible cognate from the given input. Our study shows that language modelling based retrieval functions with positional tokenization and error modelling tend to give better results than competing baselines. P18-3019 @@ -5372,7 +5372,7 @@ Automatic Spelling Correction for Resource-Scarce Languages using Deep Learning PravallikaEtoori - ManojChinnakotla + ManojChinnakotla RadhikaMamidi 146–152 Spelling correction is a well-known task in Natural Language Processing (NLP). Automatic spelling correction is important for many NLP applications like web search engines, text summarization, sentiment analysis etc. Most approaches use parallel data of noisy and correct word mappings from different sources as training data for automatic spelling correction. Indic languages are resource-scarce and do not have such parallel data due to low volume of queries and non-existence of such prior implementations. In this paper, we show how to build an automatic spelling corrector for resource-scarce languages. We propose a sequence-to-sequence deep learning model which trains end-to-end. We perform experiments on synthetic datasets created for Indic languages, Hindi and Telugu, by incorporating the spelling mistakes committed at character level. A comparative evaluation shows that our model is competitive with the existing spell checking and correction techniques for Indic languages. @@ -5385,7 +5385,7 @@ PayalKhullar KonigariRachna MukulHase - ManishShrivastava + ManishShrivastava 153–158 This paper presents a system that automatically generates multiple, natural language questions using relative pronouns and relative adverbs from complex English sentences. Our system is syntax-based, runs on dependency parse information of a single-sentence input, and achieves high accuracy in terms of syntactic correctness, semantic adequacy, fluency and uniqueness. One of the key advantages of our system, in comparison with other rule-based approaches, is that we nearly eliminate the chances of getting a wrong wh-word in the generated question, by fetching the requisite wh-word from the input sentence itself. Depending upon the input, we generate both factoid and descriptive type questions. To the best of our information, the exploitation of wh-pronouns and wh-adverbs to generate questions is novel in the Automatic Question Generation task. P18-3022 @@ -5428,7 +5428,7 @@ <fixed-case>N</fixed-case>ovel<fixed-case>P</fixed-case>erspective: Identifying Point of View Characters LyndonWhite RobertoTogneri - WeiLiu + WeiLiu MohammedBennamoun 7–12 We present NovelPerspective: a tool to allow consumers to subset their digital literature, based on point of view (POV) character. Many novels have multiple main characters each with their own storyline running in parallel. A well-known example is George R. R. Martin’s novel: “A Game of Thrones”, and others from that series. Our tool detects the main character that each section is from the POV of, and allows the user to generate a new ebook with only those sections. This gives consumers new options in how they consume their media; allowing them to pursue the storylines sequentially, or skip chapters about characters they find boring. We present two heuristic-based baselines, and two machine learning based methods for the detection of the main character. @@ -5464,8 +5464,8 @@ PasqualeMinervini IsabelleAugenstein JohannesWelbl - TimRocktäschel - MatkoBošnjak + TimRocktäschel + MatkoBošnjak JeffMitchell ThomasDemeester TimDettmers @@ -5491,9 +5491,9 @@ <fixed-case>N</fixed-case>ext<fixed-case>G</fixed-case>en <fixed-case>AML</fixed-case>: Distributed Deep Learning based Language Technologies to Augment Anti Money Laundering Investigation - JingguangHan + JingguangHan UtsabBarman - JeremiahHayes + JeremiahHayes JinhuaDu EdwardBurgin DadongWan @@ -5532,7 +5532,7 @@ Sentence Suggestion of <fixed-case>J</fixed-case>apanese Functional Expressions for <fixed-case>C</fixed-case>hinese-speaking Learners JunLiu HiroyukiShindo - YujiMatsumoto + YujiMatsumoto 56–61 We present a computer-assisted learning system, Jastudy, which is particularly designed for Chinese-speaking learners of Japanese as a second language (JSL) to learn Japanese functional expressions with suggestion of appropriate example sentences. The system automatically recognizes Japanese functional expressions using a free Japanese morphological analyzer MeCab, which is retrained on a new Conditional Random Fields (CRF) model. In order to select appropriate example sentences, we apply a pairwise-based machine learning tool, Support Vector Machine for Ranking (SVMrank) to estimate the complexity of the example sentences using Japanese–Chinese homographs as an important feature. In addition, we cluster the example sentences that contain Japanese functional expressions with two or more meanings and usages, based on part-of-speech, conjugation forms of verbs and semantic attributes, using the K-means clustering algorithm in Scikit-Learn. Experimental results demonstrate the effectiveness of our approach. P18-4010 @@ -5553,12 +5553,12 @@ <fixed-case>SANTO</fixed-case>: A Web-based Annotation Tool for Ontology-driven Slot Filling - MatthiasHartung + MatthiasHartung Hendrikter Horst FrankGrimm TimDiekmann RomanKlinger - PhilippCimiano + PhilippCimiano 68–73 Supervised machine learning algorithms require training data whose generation for complex relation extraction tasks tends to be difficult. Being optimized for relation extraction at sentence level, many annotation tools lack in facilitating the annotation of relational structures that are widely spread across the text. This leads to non-intuitive and cumbersome visualizations, making the annotation process unnecessarily time-consuming. We propose SANTO, an easy-to-use, domain-adaptive annotation tool specialized for complex slot filling tasks which may involve problems of cardinality and referential grounding. The web-based architecture enables fast and clearly structured annotation for multiple users in parallel. Relational structures are formulated as templates following the conceptualization of an underlying ontology. Further, import and export procedures of standard formats enable interoperability with external sources and tools. P18-4012 @@ -5598,16 +5598,16 @@ <fixed-case>S</fixed-case>cout<fixed-case>B</fixed-case>ot: A Dialogue System for Collaborative Navigation - Stephanie M.Lukin + Stephanie M.Lukin FelixGervits - Cory J.Hayes + Cory J.Hayes PoojaMoolchandani AntonLeuski John G.Rogers III CarlosSanchez Amaro MatthewMarge - Clare R.Voss - DavidTraum + Clare R.Voss + DavidTraum 93–98 ScoutBot is a dialogue interface to physical and simulated robots that supports collaborative exploration of environments. The demonstration will allow users to issue unconstrained spoken language commands to ScoutBot. ScoutBot will prompt for clarification if the user’s instruction needs additional input. It is trained on human-robot dialogue collected from Wizard-of-Oz experiments, where robot responses were initiated by a human wizard in previous interactions. The demonstration will show a simulated ground robot (Clearpath Jackal) in a simulated environment supported by ROS (Robot Operating System). P18-4016 @@ -5618,8 +5618,8 @@ The <fixed-case>SUMMA</fixed-case> Platform: A Scalable Infrastructure for Multi-lingual Multi-media Monitoring UlrichGermann RenārsLiepins - GuntisBarzdins - DidzisGosko + GuntisBarzdins + DidzisGosko SebastiãoMiranda DavidNogueira 99–104 @@ -5661,7 +5661,7 @@ UlrichGermann Alham FikriAji NikolayBogoychev - André F. T.Martins + André F. T.Martins AlexandraBirch 116–121 We present Marian, an efficient and self-contained Neural Machine Translation framework with an integrated automatic differentiation engine based on dynamic computation graphs. Marian is written entirely in C++. We describe the design of the encoder-decoder framework and demonstrate that a research-friendly toolkit can achieve high training and translation speed. @@ -5679,7 +5679,7 @@ NickolayBushkov OlgaGureenkova TarasKhakhulin - YuriKuratov + YuriKuratov DenisKuznetsov AlexeyLitinsky VarvaraLogacheva @@ -5701,7 +5701,7 @@ <fixed-case>RETURNN</fixed-case> as a Generic Flexible Neural Toolkit with Application to Translation and Speech Recognition AlbertZeyer TamerAlkhouli - HermannNey + HermannNey 128–133 We compare the fast training and decoding speed of RETURNN of attention models for translation, due to fast CUDA LSTM kernels, and a fast pure TensorFlow beam search decoder. We show that a layer-wise pretraining scheme for recurrent attention models gives over 1% BLEU improvement absolute and it allows to train deeper recurrent encoder networks. Promising preliminary results on max. expected BLEU training are presented. We are able to train state-of-the-art models for translation and end-to-end models for speech recognition and show results on WMT 2017 and Switchboard. The flexibility of RETURNN allows a fast research feedback loop to experiment with alternative architectures, and its generality allows to use it on a wide range of applications. P18-4022 @@ -5751,7 +5751,7 @@ 100 Things You Always Wanted to Know about Semantics & Pragmatics But Were Afraid to Ask - Emily M.Bender + Emily M.Bender 1 Meaning is a fundamental concept in Natural Language Processing (NLP), given its aim to build systems that mean what they say to you, and understand what you say to them. In order for NLP to scale beyond partial, task-specific solutions, it must be informed by what is known about how humans use language to express and understand communicative intents. The purpose of this tutorial is to present a selection of useful information about semantics and pragmatics, as understood in linguistics, in a way that’s accessible to and useful for NLP practitioners with minimal (or even no) prior training in linguistics. The tutorial content is based on a manuscript in progress I am co-authoring with Prof. Alex Lascarides of the University of Edinburgh. P18-5001 @@ -5805,7 +5805,7 @@ PradeepDasigi SrinivasanIyer AlaneSuhr - LukeZettlemoyer + LukeZettlemoyer 17–18 Semantic parsing, the study of translating natural language utterances into machine-executable programs, is a well-established research area and has applications in question answering, instruction following, voice assistants, and code generation. In the last two years, the models used for semantic parsing have changed dramatically with the introduction of neural encoder-decoder methods that allow us to rethink many of the previous assumptions underlying semantic parsing. We aim to inform those already interested in semantic parsing research of these new developments in the field, as well as introduce the topic as an exciting research area to those who are unfamiliar with it. Current approaches for neural semantic parsing share several similarities with neural machine translation, but the key difference between the two fields is that semantic parsing translates natural language into a formal language, while machine translation translates it into a different natural language. The formal language used in semantic parsing allows for constrained decoding, where the model is constrained to only produce outputs that are valid formal statements. We will describe the various approaches researchers have taken to do this. We will also discuss the choice of formal languages used by semantic parsers, and describe why much recent work has chosen to use standard programming languages instead of more linguistically-motivated representations. We will then describe a particularly challenging setting for semantic parsing, where there is additional context or interaction that the parser must take into account when translating natural language to formal language, and give an overview of recent work in this direction. Finally, we will introduce some tools available in AllenNLP for doing semantic parsing research. P18-5006 @@ -5825,10 +5825,10 @@ Multi-lingual Entity Discovery and Linking - AviSil + AviSil HengJi DanRoth - Silviu-PetruCucerzan + Silviu-PetruCucerzan 22–29 The primary goals of this tutorial are to review the framework of cross-lingual EL and motivate it as a broad paradigm for the Information Extraction task. We will start by discussing the traditional EL techniques and metrics and address questions relevant to the adequacy of these to across domains and languages. We will then present more recent approaches such as Neural EL, discuss the basic building blocks of a state-of-the-art neural EL system and analyze some of the current results on English EL. We will then proceed to Cross-lingual EL and discuss methods that work across languages. In particular, we will discuss and compare multiple methods that make use of multi-lingual word embeddings. We will also present EL methods that work for both name tagging and linking in very low resource languages. Finally, we will discuss the uses of cross-lingual EL in a variety of applications like search engines and commercial product selling applications. Also, contrary to the 2014 EL tutorial, we will also focus on Entity Discovery which is an essential component of EL. P18-5008 diff --git a/data/xml/P19.xml b/data/xml/P19.xml index 3b36f126f1..2ed5396792 100644 --- a/data/xml/P19.xml +++ b/data/xml/P19.xml @@ -5,8 +5,8 @@ Proceedings of the 57th Annual Meeting of the Association for Computational Linguistics P19-1 AnnaKorhonen - DavidTraum - LluísMàrquez + DavidTraum + LluísMàrquez Association for Computational Linguistics
Florence, Italy
July @@ -67,7 +67,7 @@ Do Neural Dialog Systems Use the Conversation History Effectively? An Empirical Study ChinnadhuraiSankar SandeepSubramanian - ChrisPal + ChrisPal SarathChandar YoshuaBengio 32–37 @@ -80,7 +80,7 @@ Boosting Dialog Response Generation WenchaoDu - Alan WBlack + Alan WBlack 38–43 Neural models have become one of the most important approaches to dialog response generation. However, they still tend to generate the most common and generic responses in the corpus all the time. To address this problem, we designed an iterative training process and ensemble method based on boosting. We combined our method with different training and decoding paradigms as the base model, including mutual-information-based decoding and reward-augmented maximum likelihood learning. Empirical results show that our approach can significantly improve the diversity and relevance of the responses generated by all base models, backed by objective measurements and human evaluation. P19-1005 @@ -107,7 +107,7 @@ Semantic Parsing with Dual Learning RuishengCao SuZhu - ChenLiu + ChenLiu JieyuLi KaiYu 51–64 @@ -174,7 +174,7 @@ The (Non-)Utility of Structural Features in <fixed-case>B</fixed-case>i<fixed-case>LSTM</fixed-case>-based Dependency Parsers - AgnieszkaFalenska + AgnieszkaFalenska JonasKuhn 117–128 Classical non-neural dependency parsers put considerable effort on the design of feature functions. Especially, they benefit from information coming from structural features, such as features drawn from neighboring tokens in the dependency tree. In contrast, their BiLSTM-based successors achieve state-of-the-art performance without explicit information about the structural context. In this paper we aim to answer the question: How much structural context are the BiLSTM representations able to capture implicitly? We show that features drawn from partial subtrees become redundant when the BiLSTMs are used. We provide a deep insight into information flow in transition- and graph-based neural architectures to demonstrate where the implicit information comes from when the parsers make their decisions. Finally, with model ablations we demonstrate that the structural context is not only present in the models, but it significantly influences their performance. @@ -214,7 +214,7 @@ Massively Multilingual Transfer for <fixed-case>NER</fixed-case> AfshinRahimi YuanLi - TrevorCohn + TrevorCohn 151–164 In cross-lingual transfer, NLP models over one or more source languages are applied to a low-resource target language. While most prior work has used a single source model or a few carefully selected models, here we consider a “massive” setting with many such models. This setting raises the problem of poor transfer, particularly from distant languages. We propose two techniques for modulating the transfer, suitable for zero-shot or few-shot learning, respectively. Evaluating on named entity recognition, we show that our techniques are much more effective than strong baselines, including standard ensembling, and our unsupervised method rivals oracle selection of the single best individual model. P19-1015 @@ -258,7 +258,7 @@ BarunPatra Joel Ruben AntonyMoniz SarthakGarg - Matthew R.Gormley + Matthew R.Gormley GrahamNeubig 184–193 Recent work on bilingual lexicon induction (BLI) has frequently depended either on aligned bilingual lexicons or on distribution matching, often with an assumption about the isometry of the two spaces. We propose a technique to quantitatively estimate this assumption of the isometry between two embedding spaces and empirically show that this assumption weakens as the languages in question become increasingly etymologically distant. We then propose Bilingual Lexicon Induction with Semi-Supervision (BLISS) — a semi-supervised approach that relaxes the isometric assumption while leveraging both limited aligned bilingual lexicons and a larger set of unaligned word embeddings, as well as a novel hubness filtering technique. Our proposed method obtains state of the art results on 15 of 18 language pairs on the MUSE dataset, and does particularly well when the embedding spaces don’t appear to be isometric. In addition, we also show that adding supervision stabilizes the learning procedure, and is effective even with minimal supervision. @@ -271,8 +271,8 @@ An Effective Approach to Unsupervised Machine Translation MikelArtetxe - GorkaLabaka - EnekoAgirre + GorkaLabaka + EnekoAgirre 194–203 While machine translation has traditionally relied on large amounts of parallel corpora, a recent research line has managed to train both Neural Machine Translation (NMT) and Statistical Machine Translation (SMT) systems using monolingual corpora only. In this paper, we identify and address several deficiencies of existing unsupervised SMT approaches by exploiting subword information, developing a theoretically well founded unsupervised tuning method, and incorporating a joint refinement procedure. Moreover, we use our improved SMT system to initialize a dual NMT model, which is further fine-tuned through on-the-fly back-translation. Together, we obtain large improvements over the previous state-of-the-art in unsupervised machine translation. For instance, we get 22.5 BLEU points in English-to-German WMT 2014, 5.5 points more than the previous best unsupervised system, and 0.5 points more than the (supervised) shared task winner back in 2014. P19-1019 @@ -308,7 +308,7 @@ Domain Adaptive Inference for Neural Machine Translation DanielleSaunders FelixStahlberg - Adriàde Gispert + Adriàde Gispert BillByrne 222–228 We investigate adaptive ensemble weighting for Neural Machine Translation, addressing the case of improving performance on a new and potentially unknown domain without sacrificing performance on the original domain. We adapt sequentially across two Spanish-English and three English-German tasks, comparing unregularized fine-tuning, L2 and Elastic Weight Consolidation. We then report a novel scheme for adaptive NMT ensemble decoding by extending Bayesian Interpolation with source information, and report strong improvements across test domains without access to the domain label. @@ -319,7 +319,7 @@ Neural Relation Extraction for Knowledge Base Enrichment - Bayu DistiawanTrisedya + Bayu DistiawanTrisedya GerhardWeikum JianzhongQi RuiZhang @@ -403,7 +403,7 @@ You Only Need Attention to Traverse Trees MahtabAhmed Muhammad RifayatSamee - Robert E.Mercer + Robert E.Mercer 316–322 In recent NLP research, a topic of interest is universal sentence encoding, sentence representations that can be used in any supervised task. At the word sequence level, fully attention-based models suffer from two problems: a quadratic increase in memory consumption with respect to the sentence length and an inability to capture and use syntactic information. Recursive neural nets can extract very good syntactic information by traversing a tree structure. To this end, we propose Tree Transformer, a model that captures phrase level syntax for constituency trees as well as word-level dependencies for dependency trees by doing recursive traversal only with attention. Evaluation of this model on four tasks gets noteworthy results compared to the standard transformer and LSTM-based models as well as tree-structured LSTMs. Ablation studies to find whether positional information is inherently encoded in the trees and which type of attention is suitable for doing the recursive traversal are provided. P19-1030 @@ -426,7 +426,7 @@ Adaptive Attention Span in Transformers SainbayarSukhbaatar - EdouardGrave + EdouardGrave PiotrBojanowski ArmandJoulin 331–335 @@ -454,7 +454,7 @@ Automatic Domain Adaptation Outperforms Manual Domain Adaptation for Predicting Financial Outcomes MarinaSedinkina NikolasBreitkopf - HinrichSchütze + HinrichSchütze 346–359 In this paper, we automatically create sentiment dictionaries for predicting financial outcomes. We compare three approaches: (i) manual adaptation of the domain-general dictionary H4N, (ii) automatic adaptation of H4N and (iii) a combination consisting of first manual, then automatic adaptation. In our experiments, we demonstrate that the automatically adapted sentiment dictionary outperforms the previous state of the art in predicting the financial outcomes excess return and volatility. In particular, automatic adaptation performs better than manual adaptation. In our analysis, we find that annotation based on an expert’s a priori belief about a word’s meaning can be incorrect – annotation should be performed based on the word’s contexts in the target domain instead. P19-1034 @@ -552,7 +552,7 @@ This Email Could Save Your Life: Introducing the Task of Email Subject Line Generation RuiZhang - JoelTetreault + JoelTetreault 446–456 Given the overwhelming number of emails, an effective subject line becomes essential to better inform the recipient of the email’s content. In this paper, we propose and study the task of email subject line generation: automatically generating an email subject line from the email body. We create the first dataset for this task and find that email subject line generation favor extremely abstractive summary which differentiates it from news headline generation or news single document summarization. We then develop a novel deep learning method and compare it to several baselines as well as recent state-of-the-art text summarization systems. We also investigate the efficacy of several automatic metrics based on correlations with human judgments and propose a new automatic evaluation metric. Our system outperforms competitive baselines given both automatic and human evaluations. To our knowledge, this is the first work to tackle the problem of effective email subject line generation. P19-1043 @@ -576,7 +576,7 @@ Adversarial Attention Modeling for Multi-dimensional Emotion Regression SuyangZhu ShoushanLi - GuodongZhou + GuodongZhou 471–480 In this paper, we propose a neural network-based approach, namely Adversarial Attention Network, to the task of multi-dimensional emotion regression, which automatically rates multiple emotion dimension scores for an input text. Especially, to determine which words are valuable for a particular emotion dimension, an attention layer is trained to weight the words in an input sequence. Furthermore, adversarial training is employed between two attention layers to learn better word weights via a discriminator. In particular, a shared attention layer is incorporated to learn public word weights between two emotion dimensions. Empirical evaluation on the EMOBANK corpus shows that our approach achieves notable improvements in r-values on both EMOBANK Reader’s and Writer’s multi-dimensional emotion regression tasks in all domains over the state-of-the-art baselines. P19-1045 @@ -597,7 +597,7 @@ Modeling Financial Analysts’ Decision Making via the Pragmatics and Semantics of Earnings Calls KatherineKeith - AmandaStent + AmandaStent 493–503 Every fiscal quarter, companies hold earnings calls in which company executives respond to questions from analysts. After these calls, analysts often change their price target recommendations, which are used in equity re- search reports to help investors make deci- sions. In this paper, we examine analysts’ decision making behavior as it pertains to the language content of earnings calls. We identify a set of 20 pragmatic features of analysts’ questions which we correlate with analysts’ pre-call investor recommendations. We also analyze the degree to which semantic and pragmatic features from an earnings call complement market data in predicting analysts’ post-call changes in price targets. Our results show that earnings calls are moderately predictive of analysts’ decisions even though these decisions are influenced by a number of other factors including private communication with company executives and market conditions. A breakdown of model errors indicates disparate performance on calls from different market sectors. P19-1047 @@ -620,7 +620,7 @@ Decompositional Argument Mining: A General Purpose Approach for Argument Graph Construction DebelaGemechu - ChrisReed + ChrisReed 516–526 This work presents an approach decomposing propositions into four functional components and identify the patterns linking those components to determine argument structure. The entities addressed by a proposition are target concepts and the features selected to make a point about the target concepts are aspects. A line of reasoning is followed by providing evidence for the points made about the target concepts via aspects. Opinions on target concepts and opinions on aspects are used to support or attack the ideas expressed by target concepts and aspects. The relations between aspects, target concepts, opinions on target concepts and aspects are used to infer the argument relations. Propositions are connected iteratively to form a graph structure. The approach is generic in that it is not tuned for a specific corpus and evaluated on three different corpora from the literature: AAEC, AMT, US2016G1tv and achieved an F score of 0.79, 0.77 and 0.64, respectively. P19-1049 @@ -634,7 +634,7 @@ NavonilMajumder GautamNaik ErikCambria - RadaMihalcea + RadaMihalcea 527–536 Emotion recognition in conversations is a challenging task that has recently gained popularity due to its potential applications. Until now, however, a large-scale multimodal multi-party emotional conversational database containing more than two speakers per dialogue was missing. Thus, we propose the Multimodal EmotionLines Dataset (MELD), an extension and enhancement of EmotionLines. MELD contains about 13,000 utterances from 1,433 dialogues from the TV-series Friends. Each utterance is annotated with emotion and sentiment labels, and encompasses audio, visual and textual modalities. We propose several strong multimodal baselines and show the importance of contextual and multimodal information for emotion recognition in conversations. The full dataset is available for use at http://affective-meld.github.io. P19-1050 @@ -646,7 +646,7 @@ MinghaoHu YuxingPeng ZhenHuang - DongshengLi + DongshengLi YiweiLv 537–546 Open-domain targeted sentiment analysis aims to detect opinion targets along with their sentiment polarities from a sentence. Prior work typically formulates this task as a sequence tagging problem. However, such formulation suffers from problems such as huge search space and sentiment inconsistency. To address these problems, we propose a span-based extract-then-classify framework, where multiple opinion targets are directly extracted from the sentence under the supervision of target span boundaries, and corresponding polarities are then classified using their span representations. We further investigate three approaches under this framework, namely the pipeline, joint, and collapsed models. Experiments on three benchmark datasets show that our approach consistently outperforms the sequence tagging baseline. Moreover, we find that the pipeline model achieves the best performance compared with the other two models. @@ -720,7 +720,7 @@ A Corpus for Modeling User and Language Effects in Argumentation on Online Debating EsinDurmus - ClaireCardie + ClaireCardie 602–607 Existing argumentation datasets have succeeded in allowing researchers to develop computational methods for analyzing the content, structure and linguistic features of argumentative text. They have been much less successful in fostering studies of the effect of “user” traits — characteristics and beliefs of the participants — on the debate/argument outcome as this type of user information is generally not available. This paper presents a dataset of 78,376 debates generated over a 10-year period along with surprisingly comprehensive participant profiles. We also complete an example study using the dataset to analyze the effect of selected user traits on the debate outcome in comparison to the linguistic features typically employed in studies of this kind. P19-1057 @@ -732,8 +732,8 @@ ShengXu PeifengLi FangKong - QiaomingZhu - GuodongZhou + QiaomingZhu + GuodongZhou 608–618 In the literature, most of the previous studies on English implicit discourse relation recognition only use sentence-level representations, which cannot provide enough semantic information in Chinese due to its unique paratactic characteristics. In this paper, we propose a topic tensor network to recognize Chinese implicit discourse relations with both sentence-level and topic-level representations. In particular, besides encoding arguments (discourse units) using a gated convolutional network to obtain sentence-level representations, we train a simplified topic model to infer the latent topic-level representations. Moreover, we feed the two pairs of representations to two factored tensor networks, respectively, to capture both the sentence-level interactions and topic-level relevance using multi-slice tensors. Experimentation on CDTB, a Chinese discourse corpus, shows that our proposed model significantly outperforms several state-of-the-art baselines in both micro and macro F1-scores. P19-1058 @@ -743,7 +743,7 @@ Learning from Omission BillMcDowell - NoahGoodman + NoahGoodman 619–628 Pragmatic reasoning allows humans to go beyond the literal meaning when interpret- ing language in context. Previous work has shown that such reasoning can improve the performance of already-trained language understanding systems. Here, we explore whether pragmatic reasoning during training can improve the quality of learned meanings. Our experiments on reference game data show that end-to-end pragmatic training produces more accurate utterance interpretation models, especially when data is sparse and language is complex. P19-1059 @@ -765,7 +765,7 @@ SoniaBadene KateThompson Jean-PierreLorré - NicholasAsher + NicholasAsher 640–645 This paper investigates the advantages and limits of data programming for the task of learning discourse structure. The data programming paradigm implemented in the Snorkel framework allows a user to label training data using expert-composed heuristics, which are then transformed via the “generative step” into probability distributions of the class labels given the training candidates. These results are later generalized using a discriminative model. Snorkel’s attractive promise to create a large amount of annotated data from a smaller set of training data by unifying the output of a set of heuristics has yet to be used for computationally difficult tasks, such as that of discourse attachment, in which one must decide where a given discourse unit attaches to other units in a text in order to form a coherent discourse structure. Although approaching this problem using Snorkel requires significant modifications to the structure of the heuristics, we show that weak supervision methods can be more than competitive with classical supervised learning approaches to the attachment problem. P19-1061 @@ -787,7 +787,7 @@ Know What You Don’t Know: Modeling a Pragmatic Speaker that Refers to Objects of Unknown Categories - SinaZarrieß + SinaZarrieß DavidSchlangen 654–659 Zero-shot learning in Language & Vision is the task of correctly labelling (or naming) objects of novel categories. Another strand of work in L&V aims at pragmatically informative rather than “correct” object descriptions, e.g. in reference games. We combine these lines of research and model zero-shot reference games, where a speaker needs to successfully refer to a novel object in an image. Inspired by models of “rational speech acts”, we extend a neural generator to become a pragmatic speaker reasoning about uncertain object categories. As a result of this reasoning, the generator produces fewer nouns and names of distractor categories as compared to a literal speaker. We show that this conversational strategy for dealing with novel objects often improves communicative success, in terms of resolution accuracy of an automatic listener. @@ -813,7 +813,7 @@ KevinBowden JiaqiWu WenCui - MarilynWalker + MarilynWalker 666–672 Discourse relation identification has been an active area of research for many years, and the challenge of identifying implicit relations remains largely an unsolved task, especially in the context of an open-domain dialogue system. Previous work primarily relies on a corpora of formal text which is inherently non-dialogic, i.e., news and journals. This data however is not suitable to handle the nuances of informal dialogue nor is it capable of navigating the plethora of valid topics present in open-domain dialogue. In this paper, we designed a novel discourse relation identification pipeline specifically tuned for open-domain dialogue systems. We firstly propose a method to automatically extract the implicit discourse relation argument pairs and labels from a dataset of dialogic turns, resulting in a novel corpus of discourse relation pairs; the first of its kind to attempt to identify the discourse relations connecting the dialogic turns in open-domain discourse. Moreover, we have taken the first steps to leverage the dialogue features unique to our task to further improve the identification of such relations by performing feature ablation and incorporating dialogue features to enhance the state-of-the-art model. P19-1065 @@ -838,7 +838,7 @@ TengLong Avishek JoeyBose YanshuaiCao - Jackie Chi KitCheung + Jackie Chi KitCheung 678–687 Coherence is an important aspect of text quality and is crucial for ensuring its readability. One important limitation of existing coherence models is that training on one domain does not easily generalize to unseen categories of text. Previous work advocates for generative models for cross-domain generalization, because for discriminative models, the space of incoherent sentence orderings to discriminate against during training is prohibitively large. In this work, we propose a local discriminative neural model with a much smaller negative sampling space that can efficiently learn against incorrect orderings. The proposed coherence model is simple in structure, yet it significantly outperforms previous state-of-art methods on a standard benchmark dataset on the Wall Street Journal corpus, as well as in multiple new challenging settings of transfer to unseen categories of discourse on Wikipedia articles. P19-1067 @@ -847,7 +847,7 @@ <fixed-case>MOROCO</fixed-case>: The <fixed-case>M</fixed-case>oldavian and <fixed-case>R</fixed-case>omanian Dialectal Corpus - AndreiButnaru + AndreiButnaru Radu TudorIonescu 688–698 In this work, we introduce the MOldavian and ROmanian Dialectal COrpus (MOROCO), which is freely available for download at https://github.com/butnaruandrei/MOROCO. The corpus contains 33564 samples of text (with over 10 million tokens) collected from the news domain. The samples belong to one of the following six topics: culture, finance, politics, science, sports and tech. The data set is divided into 21719 samples for training, 5921 samples for validation and another 5924 samples for testing. For each sample, we provide corresponding dialectal and category labels. This allows us to perform empirical studies on several classification tasks such as (i) binary discrimination of Moldavian versus Romanian text samples, (ii) intra-dialect multi-class categorization by topic and (iii) cross-dialect multi-class categorization by topic. We perform experiments using a shallow approach based on string kernels, as well as a novel deep approach based on character-level convolutional neural networks containing Squeeze-and-Excitation blocks. We also present and analyze the most discriminative features of our best performing model, before and after named entity removal. @@ -895,7 +895,7 @@ DominikSchlechtweg AnnaHätty MarcoDel Tredici - SabineSchulte im Walde + SabineSchulte im Walde 732–746 We perform an interdisciplinary large-scale evaluation for detecting lexical semantic divergences in a diachronic and in a synchronic task: semantic sense changes across time, and semantic sense changes across domains. Our work addresses the superficialness and lack of comparison in assessing models of diachronic lexical change, by bringing together and extending benchmark models on a common state-of-the-art evaluation task. In addition, we demonstrate that the same evaluation task and modelling approaches can successfully be utilised for the synchronic detection of domain-specific sense divergences in the field of term extraction. P19-1072 @@ -905,9 +905,9 @@ <fixed-case>E</fixed-case>rrudite: Scalable, Reproducible, and Testable Error Analysis TongshuangWu - Marco TulioRibeiro + Marco TulioRibeiro JeffreyHeer - DanielWeld + DanielWeld 747–763 Though error analysis is crucial to understanding and improving NLP models, the common practice of manual, subjective categorization of a small sample of errors can yield biased and incomplete conclusions. This paper codifies model and task agnostic principles for informative error analysis, and presents Errudite, an interactive tool for better supporting this process. First, error groups should be precisely defined for reproducibility; Errudite supports this with an expressive domain-specific language. Second, to avoid spurious conclusions, a large set of instances should be analyzed, including both positive and negative examples; Errudite enables systematic grouping of relevant instances with filtering queries. Third, hypotheses about the cause of errors should be explicitly tested; Errudite supports this via automated counterfactual rewriting. We validate our approach with a user study, finding that Errudite (1) enables users to perform high quality and reproducible error analyses with less effort, (2) reveals substantial ambiguities in prior published error analyses practices, and (3) enhances the error analysis experience by allowing users to test and revise prior beliefs. P19-1073 @@ -963,9 +963,9 @@ ChrisMadge JuntaoYu JonChamberlain - UdoKruschwitz + UdoKruschwitz SilviuPaun - MassimoPoesio + MassimoPoesio 797–807 One of the key steps in language resource creation is the identification of the text segments to be annotated, or markables, which depending on the task may vary from nominal chunks for named entity resolution to (potentially nested) noun phrases in coreference resolution (or mentions) to larger text segments in text segmentation. Markable identification is typically carried out semi-automatically, by running a markable identifier and correcting its output by hand–which is increasingly done via annotators recruited through crowdsourcing and aggregating their responses. In this paper, we present a method for identifying markables for coreference annotation that combines high-performance automatic markable detectors with checking with a Game-With-A-Purpose (GWAP) and aggregation using a Bayesian annotation model. The method was evaluated both on news data and data from a variety of other genres and results in an improvement on F1 of mention boundaries of over seven percentage points when compared with a state-of-the-art, domain-independent automatic mention detector, and almost three points over an in-domain mention detector. One of the key contributions of our proposal is its applicability to the case in which markables are nested, as is the case with coreference markables; but the GWAP and several of the proposed markable detectors are task and language-independent and are thus applicable to a variety of other annotation scenarios. P19-1077 @@ -1005,7 +1005,7 @@ AnushaBalakrishnan JinfengRao KartikeyaUpasani - MichaelWhite + MichaelWhite RajenSubba 831–844 Generating fluent natural language responses from structured semantic representations is a critical step in task-oriented conversational systems. Avenues like the E2E NLG Challenge have encouraged the development of neural approaches, particularly sequence-to-sequence (Seq2Seq) models for this problem. The semantic representations used, however, are often underspecified, which places a higher burden on the generation model for sentence planning, and also limits the extent to which generated responses can be controlled in a live system. In this paper, we (1) propose using tree-structured semantic representations, like those used in traditional rule-based NLG systems, for better discourse-level structuring and sentence-level planning; (2) introduce a challenging dataset using this representation for the weather domain; (3) introduce a constrained decoding approach for Seq2Seq models that leverages this representation to improve semantic correctness; and (4) demonstrate promising results on our dataset and the E2E dataset. @@ -1058,9 +1058,9 @@ Don’t Take the Premise for Granted: Mitigating Artifacts in Natural Language Inference YonatanBelinkov AdamPoliak - StuartShieber + StuartShieber BenjaminVan Durme - AlexanderRush + AlexanderRush 877–891 Natural Language Inference (NLI) datasets often contain hypothesis-only biases—artifacts that allow models to achieve non-trivial performance without learning whether a premise entails a hypothesis. We propose two probabilistic methods to build models that are more robust to such biases and better transfer across datasets. In contrast to standard approaches to NLI, our methods predict the probability of a premise given a hypothesis and NLI label, discouraging models from ignoring the premise. We evaluate our methods on synthetic and existing NLI datasets by training on datasets containing biases and testing on datasets containing no (or different) hypothesis-only biases. Our results indicate that these methods can make NLI models more robust to dataset-specific artifacts, transferring better than a baseline architecture in 9 out of 12 NLI datasets. Additionally, we provide an extensive analysis of the interplay of our methods with known biases in NLI datasets, as well as the effects of encouraging models to ignore biases and fine-tuning on target datasets. P19-1084 @@ -1088,7 +1088,7 @@ <fixed-case>S</fixed-case>her<fixed-case>LI</fixed-case>i<fixed-case>C</fixed-case>: A Typed Event-Focused Lexical Inference Benchmark for Evaluating Natural Language Inference MartinSchmitt - HinrichSchütze + HinrichSchütze 902–914 We present SherLIiC, a testbed for lexical inference in context (LIiC), consisting of 3985 manually annotated inference rule candidates (InfCands), accompanied by (i) ~960k unlabeled InfCands, and (ii) ~190k typed textual relations between Freebase entities extracted from the large entity-linked corpus ClueWeb09. Each InfCand consists of one of these relations, expressed as a lemmatized dependency path, and two argument placeholders, each linked to one or more Freebase types. Due to our candidate selection process based on strong distributional evidence, SherLIiC is much harder than existing testbeds because distributional evidence is of little utility in the classification of InfCands. We also show that, due to its construction, many of SherLIiC’s correct InfCands are novel and missing from existing rule bases. We evaluate a large number of strong baselines on SherLIiC, ranging from semantic vector space models to state of the art neural models of natural language inference (NLI). We show that SherLIiC poses a tough challenge to existing NLI systems. P19-1086 @@ -1116,7 +1116,7 @@ VerónicaPérez-Rosas XinyiWu KennethResnicow - RadaMihalcea + RadaMihalcea 926–935 The quality of a counseling intervention relies highly on the active collaboration between clients and counselors. In this paper, we explore several linguistic aspects of the collaboration process occurring during counseling conversations. Specifically, we address the differences between high-quality and low-quality counseling. Our approach examines participants’ turn-by-turn interaction, their linguistic alignment, the sentiment expressed by speakers during the conversation, as well as the different topics being discussed. Our results suggest important language differences in low- and high-quality counseling, which we further use to derive linguistic features able to capture the differences between the two groups. These features are then used to build automatic classifiers that can predict counseling quality with accuracies of up to 88%. P19-1088 @@ -1270,7 +1270,7 @@ TakuyaMakino TomoyaIwakura HiroyaTakamura - ManabuOkumura + ManabuOkumura 1039–1048 We propose a global optimization method under length constraint (GOLC) for neural text summarization models. GOLC increases the probabilities of generating summaries that have high evaluation scores, ROUGE in this paper, within a desired length. We compared GOLC with two optimization methods, a maximum log-likelihood and a minimum risk training, on CNN/Daily Mail and a Japanese single document summarization data set of The Mainichi Shimbun Newspapers. The experimental results show that a state-of-the-art neural summarization model optimized with GOLC generates fewer overlength summaries while maintaining the fastest processing speed; only 6.70% overlength summaries on CNN/Daily and 7.8% on long summary of Mainichi, compared to the approximately 20% to 50% on CNN/Daily Mail and 10% to 30% on Mainichi with the other optimization methods. We also demonstrate the importance of the generation of in-length summaries for post-editing with the dataset Mainich that is created with strict length constraints. The ex- perimental results show approximately 30% to 40% improved post-editing time by use of in-length summaries. P19-1099 @@ -1284,7 +1284,7 @@ PengfeiLiu DanqingWang XipengQiu - XuanjingHuang + XuanjingHuang 1049–1058 The recent years have seen remarkable success in the use of deep neural networks on text summarization. However, there is no clear understanding of why they perform so well, or how they might be improved. In this paper, we seek to better understand how neural extractive summarization systems could benefit from different types of model architectures, transferable knowledge and learning schemas. Besides, we find an effective way to improve the current framework and achieve the state-of-the-art result on CNN/DailyMail by a large margin based on our observations and analysis. Hopefully, our work could provide more hints for future research on extractive summarization. P19-1100 @@ -1305,11 +1305,11 @@ Multi-News: A Large-Scale Multi-Document Summarization Dataset and Abstractive Hierarchical Model - AlexanderFabbri + AlexanderFabbri IreneLi TianweiShe SuyiLi - DragomirRadev + DragomirRadev 1074–1084 Automatic generation of summaries from multiple news articles is a valuable tool as the number of online publications grows rapidly. Single document summarization (SDS) systems have benefited from advances in neural encoder-decoder model thanks to the availability of large datasets. However, multi-document summarization (MDS) of news articles has been limited to datasets of a couple of hundred examples. In this paper, we introduce Multi-News, the first large-scale MDS news dataset. Additionally, we propose an end-to-end model which incorporates a traditional extractive summarization model with a standard SDS model and achieves competitive results on MDS datasets. We benchmark several methods on Multi-News and hope that this work will promote advances in summarization in the multi-document setting. P19-1102 @@ -1361,7 +1361,7 @@ TirthankarGhosal RajeevVerma AsifEkbal - PushpakBhattacharyya + PushpakBhattacharyya 1120–1130 Automatically validating a research artefact is one of the frontiers in Artificial Intelligence (AI) that directly brings it close to competing with human intellect and intuition. Although criticised sometimes, the existing peer review system still stands as the benchmark of research validation. The present-day peer review process is not straightforward and demands profound domain knowledge, expertise, and intelligence of human reviewer(s), which is somewhat elusive with the current state of AI. However, the peer review texts, which contains rich sentiment information of the reviewer, reflecting his/her overall attitude towards the research in the paper, could be a valuable entity to predict the acceptance or rejection of the manuscript under consideration. Here in this work, we investigate the role of reviewer sentiment embedded within peer review texts to predict the peer review outcome. Our proposed deep neural architecture takes into account three channels of information: the paper, the corresponding reviews, and review’s polarity to predict the overall recommendation score as well as the final decision. We achieve significant performance improvement over the baselines (∼ 29% error reduction) proposed in a recently released dataset of peer reviews. An AI of this kind could assist the editors/program chairs as an additional layer of confidence, especially when non-responding/missing reviewers are frequent in present day peer review. P19-1106 @@ -1385,7 +1385,7 @@ AdityaJoshi SarvnazKarimi RossSparks - CecileParis + CecileParis 1142–1147 Personal health mention detection deals with predicting whether or not a given sentence is a report of a health condition. Past work mentions errors in this prediction when symptom words, i.e., names of symptoms of interest, are used in a figurative sense. Therefore, we combine a state-of-the-art figurative usage detection with CNN-based personal health mention detection. To do so, we present two methods: a pipeline-based approach and a feature augmentation-based approach. The introduction of figurative usage detection results in an average improvement of 2.21% F-score of personal health mention detection, in the case of the feature augmentation-based approach. This paper demonstrates the promise of using figurative usage detection to improve personal health mention detection. P19-1108 @@ -1418,10 +1418,10 @@ Poetry to Prose Conversion in <fixed-case>S</fixed-case>anskrit as a Linearisation Task: A Case for Low-Resource Languages AmrithKrishna - VishnuSharma + VishnuSharma BishalSantra AishikChakraborty - PavankumarSatuluri + PavankumarSatuluri PawanGoyal 1160–1166 The word ordering in a Sanskrit verse is often not aligned with its corresponding prose order. Conversion of the verse to its corresponding prose helps in better comprehension of the construction. Owing to the resource constraints, we formulate this task as a word ordering (linearisation) task. In doing so, we completely ignore the word arrangement at the verse side. kāvya guru, the approach we propose, essentially consists of a pipeline of two pretraining steps followed by a seq2seq model. The first pretraining step learns task-specific token embeddings from pretrained embeddings. In the next step, we generate multiple possible hypotheses for possible word arrangements of the input %using another pretraining step. We then use them as inputs to a neural seq2seq model for the final prediction. We empirically show that the hypotheses generated by our pretraining step result in predictions that consistently outperform predictions based on the original order in the verse. Overall, kāvya guru outperforms current state of the art models in linearisation for the poetry to prose conversion task in Sanskrit. @@ -1458,7 +1458,7 @@ Context-specific Language Modeling for Human Trafficking Detection from Online Advertisements SaeidehShahrokh Esfahani - Michael J.Cafarella + Michael J.Cafarella MaziyarBaran Pouyan GregoryDeAngelo ElenaEneva @@ -1473,8 +1473,8 @@ Self-Attentional Models for Lattice Inputs MatthiasSperber GrahamNeubig - Ngoc-QuanPham - AlexWaibel + Ngoc-QuanPham + AlexWaibel 1185–1197 Lattices are an efficient and effective method to encode ambiguity of upstream systems in natural language processing tasks, for example to compactly capture multiple speech recognition hypotheses, or to represent multiple linguistic analyses. Previous work has extended recurrent neural networks to model lattice inputs and achieved improvements in various tasks, but these models suffer from very slow computation speeds. This paper extends the recently proposed paradigm of self-attention to handle lattice inputs. Self-attention is a sequence modeling technique that relates inputs to one another by computing pairwise similarities and has gained popularity for both its strong results and its computational efficiency. To extend such models to handle lattices, we introduce probabilistic reachability masks that incorporate lattice structure into the model and support lattice scores if available. We also propose a method for adapting positional embeddings to lattice structures. We apply the proposed model to a speech translation task and find that it outperforms all examined baselines while being much faster to compute than previous neural lattice models during both training and inference. P19-1115 @@ -1499,7 +1499,7 @@ JiajunZhang FeifeiZhai JingfangXu - ChengqingZong + ChengqingZong 1213–1223 Multilingual neural machine translation (Multi-NMT) with one encoder-decoder model has made remarkable progress due to its simple deployment. However, this multilingual translation paradigm does not make full use of language commonality and parameter sharing between encoder and decoder. Furthermore, this kind of paradigm cannot outperform the individual models trained on bilingual corpus in most cases. In this paper, we propose a compact and language-sensitive method for multilingual translation. To maximize parameter sharing, we first present a universal representor to replace both encoder and decoder models. To make the representor sensitive for specific languages, we further introduce language-sensitive embedding, attention, and discriminator with the ability to enhance model performance. We verify our methods on various translation scenarios, including one-to-many, many-to-many and zero-shot. Extensive experiments demonstrate that our proposed methods remarkably outperform strong standard multilingual translation systems on WMT and IWSLT datasets. Moreover, we find that our model is especially helpful in low-resource and zero-shot translation scenarios. P19-1117 @@ -1509,7 +1509,7 @@ Unsupervised Parallel Sentence Extraction with Parallel Segment Detection Helps Machine Translation ViktorHangya - AlexanderFraser + AlexanderFraser 1224–1234 Mining parallel sentences from comparable corpora is important. Most previous work relies on supervised systems, which are trained on parallel data, thus their applicability is problematic in low-resource scenarios. Recent developments in building unsupervised bilingual word embeddings made it possible to mine parallel sentences based on cosine similarities of source and target language words. We show that relying only on this information is not enough, since sentences often have similar words but different meanings. We detect continuous parallel segments in sentence pair candidates and rely on them when mining parallel sentences. We show better mining accuracy on three language pairs in a standard shared task on artificial data. We also provide the first experiments showing that parallel sentences mined from real life sources improve unsupervised MT. Our code is available, we hope it will be used to support low-resource MT research. P19-1118 @@ -1522,8 +1522,8 @@ RuiWang KehaiChen MasaoUtiyama - EiichiroSumita - TiejunZhao + EiichiroSumita + TiejunZhao 1235–1245 Unsupervised bilingual word embedding (UBWE), together with other technologies such as back-translation and denoising, has helped unsupervised neural machine translation (UNMT) achieve remarkable results in several language pairs. In previous methods, UBWE is first trained using non-parallel monolingual corpora and then this pre-trained UBWE is used to initialize the word embedding in the encoder and decoder of UNMT. That is, the training of UBWE and UNMT are separate. In this paper, we first empirically investigate the relationship between UBWE and UNMT. The empirical findings show that the performance of UNMT is significantly affected by the performance of UBWE. Thus, we propose two methods that train UNMT with UBWE agreement. Empirical results on several language pairs show that the proposed methods significantly outperform conventional UNMT. P19-1119 @@ -1534,7 +1534,7 @@ Effective Cross-lingual Transfer of Neural Machine Translation Models without Shared Vocabularies YunsuKim YingboGao - HermannNey + HermannNey 1246–1257 Transfer learning or multilingual model is essential for low-resource neural machine translation (NMT), but the applicability is limited to cognate languages by sharing their vocabularies. This paper shows effective techniques to transfer a pretrained NMT model to a new, unrelated language without shared vocabularies. We relieve the vocabulary mismatch by using cross-lingual word embedding, train a more language-agnostic encoder by injecting artificial noises, and generate synthetic data easily from the pretraining data without back-translation. Our methods do not require restructuring the vocabulary or retraining the model. We improve plain NMT transfer by up to +5.1% BLEU in five low-resource translation tasks, outperforming multilingual joint training by a large margin. We also provide extensive ablation studies on pretrained embedding, synthetic data, vocabulary size, and parameter freezing for a better understanding of NMT transfer. P19-1120 @@ -1546,7 +1546,7 @@ JiataoGu YongWang KyunghyunCho - Victor O.K.Li + Victor O.K.Li 1258–1268 Zero-shot translation, translating between language pairs on which a Neural Machine Translation (NMT) system has never been trained, is an emergent property when training the system in multilingual settings. However, naive training for zero-shot NMT easily fails, and is sensitive to hyper-parameter setting. The performance typically lags far behind the more conventional pivot-based approach which translates twice using a third language as a pivot. In this work, we address the degeneracy problem due to capturing spurious correlations by quantitatively analyzing the mutual information between language IDs of the source and decoded sentences. Inspired by this analysis, we propose to use two simple but effective approaches: (1) decoder pre-training; (2) back-translation. These methods show significant improvement (4 22 BLEU points) over the vanilla zero-shot translation on three challenging multilingual datasets, and achieve similar or better results than the pivot-based approach. P19-1121 @@ -1622,7 +1622,7 @@ Global Textual Relation Embedding for Relational Understanding - ZhiyuChen + ZhiyuChen HanwenZha HongleiLiu WenhuChen @@ -1640,7 +1640,7 @@ YankaiLin ZhiyuanLiu JieFu - Tat-SengChua + Tat-SengChua MaosongSun 1331–1339 In this paper, we propose a novel graph neural network with generated parameters (GP-GNNs). The parameters in the propagation module, i.e. the transition matrices used in message passing procedure, are produced by a generator taking natural language sentences as inputs. We verify GP-GNNs in relation extraction from text, both on bag- and instance-settings. Experimental results on a human-annotated dataset and two distantly supervised datasets show that multi-hop reasoning mechanism yields significant improvements. We also perform a qualitative analysis to demonstrate that our model could discover more accurate relations by multi-hop relational reasoning. @@ -1667,7 +1667,7 @@ Exploiting Entity <fixed-case>BIO</fixed-case> Tag Embeddings and Multi-task Learning for Relation Extraction with Imbalanced Data WeiYe - BoLi + BoLi RuiXie ZhonghaoSheng LongChen @@ -1685,7 +1685,7 @@ YuanbinWu MingGong DaxinJiang - ManLan + ManLan ShiliangSun NanDuan 1361–1370 @@ -1748,7 +1748,7 @@ <fixed-case>G</fixed-case>raph<fixed-case>R</fixed-case>el: Modeling Text as Relational Graphs for Joint Entity and Relation Extraction Tsu-JuiFu Peng-HsuanLi - Wei-YunMa + Wei-YunMa 1409–1418 In this paper, we present GraphRel, an end-to-end relation extraction model which uses graph convolutional networks (GCNs) to jointly learn named entities and relations. In contrast to previous baselines, we consider the interaction between named entities and relations via a 2nd-phase relation-weighted GCN to better extract relations. Linear and dependency structures are both used to extract both sequential and regional features of the text, and a complete word graph is further utilized to extract implicit features among all word pairs of the text. With the graph-based approach, the prediction for overlapping relations is substantially improved over previous sequential approaches. We evaluate GraphRel on two public datasets: NYT and WebNLG. Results show that GraphRel maintains high precision while increasing recall substantially. Also, GraphRel outperforms previous work by 3.2% and 5.8% (F1 score), achieving a new state-of-the-art for relation extraction. P19-1136 @@ -1779,9 +1779,9 @@ YaliangLi NanDu XianWu - WeiFan + WeiFan FenglongMa - PhilipYu + PhilipYu 1430–1440 This paper presents a novel framework, MGNER, for Multi-Grained Named Entity Recognition where multiple entities or entity mentions in a sentence could be non-overlapping or totally nested. Different from traditional approaches regarding NER as a sequential labeling task and annotate entities consecutively, MGNER detects and recognizes entities on multiple granularities: it is able to recognize named entities without explicitly assuming non-overlapping or totally nested structures. MGNER consists of a Detector that examines all possible word segments and a Classifier that categorizes entities. In addition, contextual information and a self-attention mechanism are utilized throughout the framework to improve the NER performance. Experimental results show that MGNER outperforms current state-of-the-art baselines up to 4.4% in terms of the F1 score among nested/non-overlapping NER tasks. P19-1138 @@ -1810,7 +1810,7 @@ ChengjiangLi ZhiyuanLiu JuanziLi - Tat-SengChua + Tat-SengChua 1452–1461 Entity alignment typically suffers from the issues of structural heterogeneity and limited seed alignments. In this paper, we propose a novel Multi-channel Graph Neural Network model (MuGNN) to learn alignment-oriented knowledge graph (KG) embeddings by robustly encoding two KGs via multiple channels. Each channel encodes KGs via different relation weighting schemes with respect to self-attention towards KG completion and cross-KG attention for pruning exclusive entities respectively, which are further combined via pooling techniques. Moreover, we also infer and transfer rule knowledge for completing two KGs consistently. MuGNN is expected to reconcile the structural differences of two KGs, and thus make better use of seed alignments. Extensive experiments on five publicly available datasets demonstrate our superior performance (5% Hits@1 up on average). Source code and data used in the experiments can be accessed at https://github.com/thunlp/MuGNN . P19-1140 @@ -1844,7 +1844,7 @@ Training Hybrid Language Models by Marginalizing over Segmentations - EdouardGrave + EdouardGrave SainbayarSukhbaatar PiotrBojanowski ArmandJoulin @@ -1859,7 +1859,7 @@ HongyinLuo LanJiang YonatanBelinkov - JamesGlass + JamesGlass 1483–1493 Common language models typically predict the next word given the context. In this work, we propose a method that improves language modeling by learning to align the given context and the following phrase. The model does not require any linguistic annotation of phrase segmentation. Instead, we define syntactic heights and phrase segmentation rules, enabling the model to automatically induce phrases, recognize their task-specific heads, and generate phrase embeddings in an unsupervised learning manner. Our method can easily be applied to language models with different network architectures since an independent module is used for phrase induction and context-phrase alignment, and no change is required in the underlying language modeling network. Experiments have shown that our model outperformed several strong baseline models on different data sets. We achieved a new state-of-the-art performance of 17.4 perplexity on the Wikitext-103 dataset. Additionally, visualizing the outputs of the phrase induction module showed that our model is able to learn approximate phrase-level structural knowledge without any annotation. P19-1144 @@ -1870,7 +1870,7 @@ Lightweight and Efficient Neural Natural Language Processing with Quaternion Networks YiTay AstonZhang - Anh TuanLuu + Anh TuanLuu JinfengRao ShuaiZhang ShuohangWang @@ -1886,7 +1886,7 @@ Sparse Sequence-to-Sequence Models BenPeters VladNiculae - André F. T.Martins + André F. T.Martins 1504–1519 Sequence-to-sequence models are a powerful workhorse of NLP. Most variants employ a softmax transformation in both their attention mechanism and output layer, leading to dense alignments and strictly positive output probabilities. This density is wasteful, making models less interpretable and assigning probability mass to many implausible outputs. In this paper, we propose sparse sequence-to-sequence models, rooted in a new family of \alpha-entmax transformations, which includes softmax and sparsemax as particular cases, and is sparse for any \alpha > 1. We provide fast algorithms to evaluate these transformations and their gradients, which scale well for large vocabulary sizes. Our models are able to produce sparse alignments and to assign nonzero probability to a short list of plausible outputs, sometimes rendering beam search exact. Experiments on morphological inflection and machine translation reveal consistent gains over dense models. P19-1146 @@ -1899,7 +1899,7 @@ MinhaoCheng Da-ChengJuan WeiWei - Wen-LianHsu + Wen-LianHsu Cho-JuiHsieh 1520–1529 This work examines the robustness of self-attentive neural networks against adversarial input perturbations. Specifically, we investigate the attention and feature extraction mechanisms of state-of-the-art recurrent neural networks and self-attentive architectures for sentiment analysis, entailment and machine translation under adversarial attacks. We also propose a novel attack algorithm for generating more natural adversarial examples that could mislead neural models but not humans. Experimental results show that, compared to recurrent neural models, self-attentive models are more robust against adversarial perturbation. In addition, we provide theoretical explanations for their superior robustness to support our claims. @@ -2004,7 +2004,7 @@ Better Character Language Modeling through Morphology TerraBlevins - LukeZettlemoyer + LukeZettlemoyer 1606–1613 We incorporate morphological supervision into character language models (CLMs) via multitasking and show that this addition improves bits-per-character (BPC) performance across 24 languages, even when the morphology data and language modeling data are disjoint. Analyzing the CLMs shows that inflected words benefit more from explicitly modeling morphology than uninflected words, and that morphological supervision improves performance even as the amount of language modeling data grows. We then transfer morphological supervision across languages to improve performance in the low-resource setting. P19-1156 @@ -2015,7 +2015,7 @@ Historical Text Normalization with Delayed Rewards SimonFlachs MarcelBollmann - AndersSøgaard + AndersSøgaard 1614–1619 Training neural sequence-to-sequence models with simple token-level log-likelihood is now a standard approach to historical text normalization, albeit often outperformed by phrase-based models. Policy gradient training enables direct optimization for exact matches, and while the small datasets in historical text normalization are prohibitive of from-scratch reinforcement learning, we show that policy gradient fine-tuning leads to significant improvements across the board. Policy gradient training, in particular, leads to more accurate normalizations for long or unseen words. P19-1157 @@ -2026,7 +2026,7 @@ Stochastic Tokenization with a Language Model for Neural Text Classification TatsuyaHiraoka HiroyukiShindo - YujiMatsumoto + YujiMatsumoto 1620–1629 For unsegmented languages such as Japanese and Chinese, tokenization of a sentence has a significant impact on the performance of text classification. Sentences are usually segmented with words or subwords by a morphological analyzer or byte pair encoding and then encoded with word (or subword) representations for neural networks. However, segmentation is potentially ambiguous, and it is unclear whether the segmented tokens achieve the best performance for the target task. In this paper, we propose a method to simultaneously learn tokenization and text classification to address these problems. Our model incorporates a language model for unsupervised tokenization into a text classifier and then trains both models simultaneously. To make the model robust against infrequent tokens, we sampled segmentation for each sentence stochastically during training, which resulted in improved performance of text classification. We conducted experiments on sentiment analysis as a text classification task and show that our method achieves better performance than previous methods. P19-1158 @@ -2066,8 +2066,8 @@ Counterfactual Data Augmentation for Mitigating Gender Stereotypes in Languages with Rich Morphology RanZmigrod - Sabrina J.Mielke - HannaWallach + Sabrina J.Mielke + HannaWallach RyanCotterell 1651–1661 Gender stereotypes are manifest in most of the world’s languages and are consequently propagated or amplified by NLP systems. Although research has focused on mitigating gender stereotypes in English, the approaches that are commonly employed produce ungrammatical sentences in morphologically rich languages. We present a novel approach for converting between masculine-inflected and feminine-inflected sentences in such languages. For Spanish and Hebrew, our approach achieves F1 scores of 82% and 73% at the level of tags and accuracies of 90% and 87% at the level of forms. By evaluating our approach using four different languages, we show that, on average, it reduces gender stereotyping by a factor of 2.5 without any sacrifice to grammaticality. @@ -2098,7 +2098,7 @@ DallasCard SaadiaGabriel YejinChoi - Noah A.Smith + Noah A.Smith 1668–1678 We investigate how annotators’ insensitivity to differences in dialect can lead to racial bias in automatic hate speech detection models, potentially amplifying harm against minority populations. We first uncover unexpected correlations between surface markers of African American English (AAE) and ratings of toxicity in several widely-used hate speech datasets. Then, we show that models trained on these corpora acquire and propagate these biases, such that AAE tweets and tweets by self-identified African Americans are up to two times more likely to be labelled as offensive compared to others. Finally, we propose *dialect* and *race priming* as ways to reduce the racial bias in annotation, showing that when annotators are made explicitly aware of an AAE tweet’s dialect they are significantly less likely to label the tweet as offensive. P19-1163 @@ -2109,8 +2109,8 @@ Evaluating Gender Bias in Machine Translation GabrielStanovsky - Noah A.Smith - LukeZettlemoyer + Noah A.Smith + LukeZettlemoyer 1679–1684 We present the first challenge set and evaluation protocol for the analysis of gender bias in machine translation (MT). Our approach uses two recent coreference resolution datasets composed of English sentences which cast participants into non-stereotypical gender roles (e.g., “The doctor asked the nurse to help her in the operation”). We devise an automatic gender bias evaluation method for eight target languages with grammatical gender, based on morphological analysis (e.g., the use of female inflection for the word “doctor”). Our analyses show that four popular industrial MT systems and two recent state-of-the-art academic MT models are significantly prone to gender-biased translation errors for all tested target languages. Our data and code are publicly available at https://github.com/gabrielStanovsky/mt_gender. P19-1164 @@ -2145,7 +2145,7 @@ Unsupervised Discovery of Gendered Language through Latent-Variable Modeling Alexander MiserlisHoyle LawrenceWolf-Sonkin - HannaWallach + HannaWallach IsabelleAugenstein RyanCotterell 1706–1716 @@ -2200,7 +2200,7 @@ Meaning to Form: Measuring Systematicity as Information TiagoPimentel - Arya D.McCarthy + Arya D.McCarthy DamianBlasi BrianRoark RyanCotterell @@ -2239,7 +2239,7 @@ KehaiChen RuiWang MasaoUtiyama - EiichiroSumita + EiichiroSumita 1787–1799 The reordering model plays an important role in phrase-based statistical machine translation. However, there are few works that exploit the reordering information in neural machine translation. In this paper, we propose a reordering mechanism to learn the reordering embedding of a word based on its contextual information. These learned reordering embeddings are stacked together with self-attention networks to learn sentence representation for machine translation. The reordering mechanism can be easily integrated into both the encoder and the decoder in the Transformer translation system. Experimental results on WMT’14 English-to-German, NIST Chinese-to-English, and WAT Japanese-to-English translation tasks demonstrate that the proposed methods can significantly improve the performance of the Transformer. P19-1174 @@ -2290,7 +2290,7 @@ Self-Supervised Neural Machine Translation DanaRuiter CristinaEspaña-Bonet - Josefvan Genabith + Josefvan Genabith 1828–1834 We present a simple new method where an emergent NMT system is used for simultaneously selecting training data and learning internal NMT representations. This is done in a self-supervised way without parallel data, in such a way that both tasks enhance each other during training. The method is language independent, introduces no additional hyper-parameters, and achieves BLEU scores of 29.21 (en2fr) and 27.36 (fr2en) on newstest2014 using English and French Wikipedia data for training. P19-1178 @@ -2302,7 +2302,7 @@ Exploring Phoneme-Level Speech Representations for End-to-End Speech Translation ElizabethSalesky MatthiasSperber - Alan WBlack + Alan WBlack 1835–1841 Previous work on end-to-end translation from speech has primarily used frame-level features as speech representations, which creates longer, sparser sequences than text. We show that a naive method to create compressed phoneme-like speech representations is far more effective and efficient for translation than traditional frame-level speech features. Specifically, we generate phoneme labels for speech frames and average consecutive frames with the same label to create shorter, higher-level source sequences for translation. We see improvements of up to 5 BLEU on both our high and low resource language pairs, with a reduction in training time of 60%. Our improvements hold across multiple data sizes and two language pairs. P19-1179 @@ -2312,7 +2312,7 @@ Visually Grounded Neural Syntax Acquisition - HaoyueShi + HaoyueShi JiayuanMao KevinGimpel KarenLivescu @@ -2343,7 +2343,7 @@ HaoTan FranckDernoncourt ZheLin - TrungBui + TrungBui MohitBansal 1873–1883 Describing images with text is a fundamental problem in vision-language research. Current studies in this domain mostly focus on single image captioning. However, in various real applications (e.g., image editing, difference interpretation, and retrieval), generating relational captions for two images, can also be very useful. This important problem has not been explored mostly due to lack of datasets and effective models. To push forward the research in this direction, we first introduce a new language-guided image editing dataset that contains a large number of real image pairs with corresponding editing instructions. We then propose a new relational speaker model based on an encoder-decoder architecture with static relational attention and sequential multi-head attention. We also extend the model with dynamic relational attention, which calculates visual alignment while decoding. Our models are evaluated on our newly collected and two public datasets consisting of image pairs annotated with relationship sentences. Experimental results, based on both automatic and human evaluation, demonstrate that our model outperforms all baselines and existing methods on all the datasets. @@ -2373,7 +2373,7 @@ EceTakmaz LiekeGelderloos EliaBruni - RaquelFernández + RaquelFernández 1895–1910 This paper introduces the PhotoBook dataset, a large-scale collection of visually-grounded, task-oriented dialogues in English designed to investigate shared dialogue history accumulating during conversation. Taking inspiration from seminal work on dialogue analysis, we propose a data-collection task formulated as a collaborative game prompting two online participants to refer to images utilising both their visual context as well as previously established referring expressions. We provide a detailed description of the task setup and a thorough analysis of the 2,500 dialogues collected. To further illustrate the novel features of the dataset, we propose a baseline model for reference resolution which uses a simple method to take into account shared information accumulated in a reference chain. Our results show that this information is particularly important to resolve later descriptions and underline the need to develop more sophisticated models of common ground in dialogue interaction. P19-1184 @@ -2397,8 +2397,8 @@ Semi-supervised Stochastic Multi-Domain Learning using Variational Inference YitongLi - TimothyBaldwin - TrevorCohn + TimothyBaldwin + TrevorCohn 1923–1934 Supervised models of NLP rely on large collections of text which closely resemble the intended testing setting. Unfortunately matching text is often not available in sufficient quantity, and moreover, within any domain of text, data is often highly heterogenous. In this paper we propose a method to distill the important domain signal as part of a multi-domain learning system, using a latent variable model in which parts of a neural model are stochastically gated based on the inferred domain. We compare the use of discrete versus continuous latent variables, operating in a domain-supervised or a domain semi-supervised setting, where the domain is known only for a subset of training inputs. We show that our model leads to substantial performance improvements over competitive benchmark domain adaptation methods, including methods using adversarial learning. P19-1186 @@ -2444,7 +2444,7 @@ Generating Long and Informative Reviews with Aspect-Aware Coarse-to-Fine Decoding JunyiLi - Wayne XinZhao + Wayne XinZhao Ji-RongWen YangSong 1969–1979 @@ -2505,7 +2505,7 @@ PengchengYang JieZhou YutongTan - BaobaoChang + BaobaoChang ZhifangSui XuSun 2013–2022 @@ -2620,7 +2620,7 @@ EijiAramaki IchiroKobayashi YusukeMiyao - NaoakiOkazaki + NaoakiOkazaki HiroyaTakamura 2102–2113 We propose a data-to-text generation model with two modules, one for tracking and the other for text generation. Our tracking module selects and keeps track of salient information and memorizes which record has been mentioned. Our generation module generates a summary conditioned on the state of tracking module. Our proposed model is considered to simulate the human-like writing process that gradually selects the information by determining the intermediate variables while writing the summary. In addition, we also explore the effectiveness of the writer information for generations. Experimental results show that our proposed model outperforms existing models in all evaluation metrics even without writer information. Incorporating writer information further improves the performance, contributing to content planning and surface realization. @@ -2731,7 +2731,7 @@ Adversarial Domain Adaptation Using Artificial Titles for Abstractive Title Generation - FrancineChen + FrancineChen Yan-YingChen 2197–2203 A common issue in training a deep learning, abstractive summarization model is lack of a large set of training summaries. This paper examines techniques for adapting from a labeled source domain to an unlabeled target domain in the context of an encoder-decoder model for text generation. In addition to adversarial domain adaptation (ADA), we introduce the use of artificial titles and sequential training to capture the grammatical style of the unlabeled target domain. Evaluation on adapting to/from news articles and Stack Exchange posts indicates that the use of these techniques can boost performance for both unsupervised adaptation as well as fine-tuning with limited target data. @@ -2793,7 +2793,7 @@ YangZhao XiaoyuShen WeiBi - AkikoAizawa + AkikoAizawa 2235–2240 Multi-sentence compression (MSC) aims to generate a grammatical but reduced compression from multiple input sentences while retaining their key information. Previous dominating approach for MSC is the extraction-based word graph approach. A few variants further leveraged lexical substitution to yield more abstractive compression. However, two limitations exist. First, the word graph approach that simply concatenates fragments from multiple sentences may yield non-fluent or ungrammatical compression. Second, lexical substitution is often inappropriate without the consideration of context information. To tackle the above-mentioned issues, we present a neural rewriter for multi-sentence compression that does not need any parallel corpus. Empirical studies have shown that our approach achieves comparable results upon automatic evaluation and improves the grammaticality of compression based on human evaluation. A parallel corpus with more than 140,000 (sentence group, compression) pairs is also constructed as a by-product for future research. P19-1216 @@ -2853,7 +2853,7 @@ MinghaoHu YuxingPeng ZhenHuang - DongshengLi + DongshengLi 2285–2295 This paper considers the reading comprehension task in which multiple documents are given as input. Prior work has shown that a pipeline of retriever, reader, and reranker can improve the overall performance. However, the pipeline system is inefficient since the input is re-encoded within each module, and is unable to leverage upstream components to help downstream training. In this work, we present RE^3QA, a unified question answering model that combines context retrieving, reading comprehension, and answer reranking to predict the final answer. Unlike previous pipelined approaches, RE^3QA shares contextualized text representation across different components, and is carefully designed to use high-quality upstream outputs (e.g., retrieved context or candidate answers) for directly supervising downstream modules (e.g., the reader or the reranker). As a result, the whole network can be trained end-to-end to avoid the context inconsistency problem. Experiments show that our model outperforms the pipelined baseline and achieves state-of-the-art results on two versions of TriviaQA and two variants of SQuAD. P19-1221 @@ -2873,7 +2873,7 @@ <fixed-case>E</fixed-case>3: Entailment-driven Extracting and Editing for Conversational Machine Reading VictorZhong - LukeZettlemoyer + LukeZettlemoyer 2310–2320 Conversational machine reading systems help users answer high-level questions (e.g. determine if they qualify for particular government benefits) when they do not know the exact rules by which the determination is made (e.g. whether they need certain income levels or veteran status). The key challenge is that these rules are only provided in the form of a procedural text (e.g. guidelines from government website) which the system must read to figure out what to ask the user. We present a new conversational machine reading model that jointly extracts a set of decision rules from the procedural text while reasoning about which are entailed by the conversational history and which still need to be edited to create questions for the user. On the recently introduced ShARC conversational machine reading dataset, our Entailment-driven Extract and Edit network (E3) achieves a new state-of-the-art, outperforming existing systems as well as a new BERT-based baseline. In addition, by explicitly highlighting which information still needs to be gathered, E3 provides a more explainable alternative to prior work. We release source code for our models and experiments at https://github.com/vzhong/e3. P19-1223 @@ -2940,8 +2940,8 @@ Compound Probabilistic Context-Free Grammars for Grammar Induction YoonKim - ChrisDyer - AlexanderRush + ChrisDyer + AlexanderRush 2369–2385 We study a formalization of the grammar induction problem that models sentences as being generated by a compound probabilistic context free grammar. In contrast to traditional formulations which learn a single stochastic grammar, our context-free rule probabilities are modulated by a per-sentence continuous latent variable, which induces marginal dependencies beyond the traditional context-free assumptions. Inference in this context-dependent grammar is performed by collapsed variational inference, in which an amortized variational posterior is placed on the continuous variable, and the latent trees are marginalized with dynamic programming. Experiments on English and Chinese show the effectiveness of our approach compared to recent state-of-the-art methods for grammar induction from words with neural language models. P19-1228 @@ -2978,7 +2978,7 @@ XiaoyuXing QiZhang JinlanFu - XuanjingHuang + XuanjingHuang 2409–2419 In this work, we explore the way to perform named entity recognition (NER) using only unlabeled data and named entity dictionaries. To this end, we formulate the task as a positive-unlabeled (PU) learning problem and accordingly propose a novel PU learning algorithm to perform the task. We prove that the proposed algorithm can unbiasedly and consistently estimate the task loss as if there is fully labeled data. A key feature of the proposed method is that it does not require the dictionaries to label every entity within a sentence, and it even does not require the dictionaries to label all of the words constituting an entity. This greatly reduces the requirement on the quality of the dictionaries and makes our method generalize well with quite simple dictionaries. Empirical studies on four public NER datasets demonstrate the effectiveness of our proposed method. We have published the source code at https://github.com/v-mipeng/LexiconNER. P19-1231 @@ -2988,7 +2988,7 @@ Multi-Task Semantic Dependency Parsing with Policy Gradient for Learning Easy-First Strategies ShuheiKurita - AndersSøgaard + AndersSøgaard 2420–2430 In Semantic Dependency Parsing (SDP), semantic relations form directed acyclic graphs, rather than trees. We propose a new iterative predicate selection (IPS) algorithm for SDP. Our IPS algorithm combines the graph-based and transition-based parsing approaches in order to handle multiple semantic head words. We train the IPS model using a combination of multi-task learning and task-specific policy gradient training. Trained this way, IPS achieves a new state of the art on the SemEval 2015 Task 18 datasets. Furthermore, we observe that policy gradient training learns an easy-first strategy. P19-1232 @@ -3001,7 +3001,7 @@ YijinLiu FandongMeng JinchaoZhang - JinanXu + JinanXu YufengChen JieZhou 2431–2441 @@ -3014,7 +3014,7 @@ Unsupervised Learning of <fixed-case>PCFG</fixed-case>s with Normalizing Flow LifengJin FinaleDoshi-Velez - TimothyMiller + TimothyMiller LaneSchwartz WilliamSchuler 2442–2452 @@ -3048,7 +3048,7 @@ Graph-based Dependency Parsing with Graph Neural Networks TaoJi YuanbinWu - ManLan + ManLan 2475–2485 We investigate the problem of efficiently incorporating high-order features into neural graph-based dependency parsing. Instead of explicitly extracting high-order features from intermediate parse trees, we develop a more powerful dependency tree node representation which captures high-order information concisely and efficiently. We use graph neural networks (GNNs) to learn the representations and discuss several new configurations of GNN’s updating and aggregation functions. Experiments on PTB show that our parser achieves the best UAS and LAS on PTB (96.0%, 94.3%) among systems without using any external resources. P19-1237 @@ -3059,8 +3059,8 @@ Wide-Coverage Neural <fixed-case>A</fixed-case>* Parsing for <fixed-case>M</fixed-case>inimalist <fixed-case>G</fixed-case>rammars JohnTorr MilošStanojević - MarkSteedman - Shay B.Cohen + MarkSteedman + Shay B.Cohen 2486–2505 Minimalist Grammars (Stabler, 1997) are a computationally oriented, and rigorous formalisation of many aspects of Chomsky’s (1995) Minimalist Program. This paper presents the first ever application of this formalism to the task of realistic wide-coverage parsing. The parser uses a linguistically expressive yet highly constrained grammar, together with an adaptation of the A* search algorithm currently used in CCG parsing (Lewis and Steedman, 2014; Lewis et al., 2016), with supertag probabilities provided by a bi-LSTM neural network supertagger trained on MGbank, a corpus of MG derivation trees. We report on some promising initial experimental results for overall dependency recovery as well as on the recovery of certain unbounded long distance dependencies. Finally, although like other MG parsers, ours has a high order polynomial worst case time complexity, we show that in practice its expected time complexity is cubic in the length of the sentence. The parser is publicly available. P19-1238 @@ -3096,7 +3096,7 @@ #<fixed-case>Y</fixed-case>ou<fixed-case>T</fixed-case>oo? Detection of Personal Recollections of Sexual Harassment on Social Media ArijitGhosh Chowdhury RamitSawhney - Rajiv RatnShah + Rajiv RatnShah DebanjanMahata 2527–2537 The availability of large-scale online social data, coupled with computational methods can help us answer fundamental questions relat- ing to our social lives, particularly our health and well-being. The #MeToo trend has led to people talking about personal experiences of harassment more openly. This work at- tempts to aggregate such experiences of sex- ual abuse to facilitate a better understanding of social media constructs and to bring about social change. It has been found that disclo- sure of abuse has positive psychological im- pacts. Hence, we contend that such informa- tion can leveraged to create better campaigns for social change by analyzing how users react to these stories and to obtain a better insight into the consequences of sexual abuse. We use a three part Twitter-Specific Social Media Lan- guage Model to segregate personal recollec- tions of sexual harassment from Twitter posts. An extensive comparison with state-of-the-art generic and specific models along with a de- tailed error analysis explores the merit of our proposed model. @@ -3108,7 +3108,7 @@ Multi-task Pairwise Neural Ranking for Hashtag Segmentation MounicaMaddela WeiXu - DanielPreoţiuc-Pietro + DanielPreoţiuc-Pietro 2538–2549 Hashtags are often employed on social media and beyond to add metadata to a textual utterance with the goal of increasing discoverability, aiding search, or providing additional semantics. However, the semantic content of hashtags is not straightforward to infer as these represent ad-hoc conventions which frequently include multiple words joined together and can include abbreviations and unorthodox spellings. We build a dataset of 12,594 hashtags split into individual segments and propose a set of approaches for hashtag segmentation by framing it as a pairwise ranking problem between candidate segmentations. Our novel neural approaches demonstrate 24.6% error reduction in hashtag segmentation accuracy compared to the current state-of-the-art method. Finally, we demonstrate that a deeper understanding of hashtag semantics obtained through segmentation is useful for downstream applications such as sentiment analysis, for which we achieved a 2.6% increase in average recall on the SemEval 2017 sentiment analysis dataset. P19-1242 @@ -3130,8 +3130,8 @@ Sentence-Level Evidence Embedding for Claim Verification with Hierarchical Attention Networks JingMa WeiGao - ShafiqJoty - Kam-FaiWong + ShafiqJoty + Kam-FaiWong 2561–2571 Claim verification is generally a task of verifying the veracity of a given claim, which is critical to many downstream applications. It is cumbersome and inefficient for human fact-checkers to find consistent pieces of evidence, from which solid verdict could be inferred against the claim. In this paper, we propose a novel end-to-end hierarchical attention network focusing on learning to represent coherent evidence as well as their semantic relatedness with the claim. Our model consists of three main components: 1) A coherence-based attention layer embeds coherent evidence considering the claim and sentences from relevant articles; 2) An entailment-based attention layer attends on sentences that can semantically infer the claim on top of the first attention; and 3) An output layer predicts the verdict based on the embedded evidence. Experimental results on three public benchmark datasets show that our proposed model outperforms a set of state-of-the-art baselines. P19-1244 @@ -3141,7 +3141,7 @@ Predicting Human Activities from User-Generated Content StevenWilson - RadaMihalcea + RadaMihalcea 2572–2582 The activities we do are linked to our interests, personality, political preferences, and decisions we make about the future. In this paper, we explore the task of predicting human activities from user-generated content. We collect a dataset containing instances of social media users writing about a range of everyday activities. We then use a state-of-the-art sentence embedding framework tailored to recognize the semantics of human activities and perform an automatic clustering of these activities. We train a neural network model to make predictions about which clusters contain activities that were performed by a given user based on the text of their previous posts and self-description. Additionally, we explore the degree to which incorporating inferred user traits into our model helps with this prediction task. P19-1245 @@ -3173,7 +3173,7 @@ Fine-Grained Spoiler Detection from Large-Scale Review Corpora MengtingWan RishabhMisra - NdapaNakashole + NdapaNakashole JulianMcAuley 2605–2610 This paper presents computational approaches for automatically detecting critical plot twists in reviews of media products. First, we created a large-scale book review dataset that includes fine-grained spoiler annotations at the sentence-level, as well as book and (anonymized) user information. Second, we carefully analyzed this dataset, and found that: spoiler language tends to be book-specific; spoiler distributions vary greatly across books and review authors; and spoiler sentences tend to jointly appear in the latter part of reviews. Third, inspired by these findings, we developed an end-to-end neural network architecture to detect spoiler sentences in review corpora. Quantitative and qualitative results demonstrate that the proposed method substantially outperforms existing baselines. @@ -3196,7 +3196,7 @@ Dataset Creation for Ranking Constructive News Comments SoichiroFujita HayatoKobayashi - ManabuOkumura + ManabuOkumura 2619–2626 Ranking comments on an online news service is a practically important task for the service provider, and thus there have been many studies on this task. However, most of them considered users’ positive feedback, such as “Like”-button clicks, as a quality measure. In this paper, we address directly evaluating the quality of comments on the basis of “constructiveness,” separately from user feedback. To this end, we create a new dataset including 100K+ Japanese comments with constructiveness scores (C-scores). Our experiments clarify that C-scores are not always related to users’ positive feedback, and the performance of pairwise ranking models tends to be enhanced by the variation of comments rather than articles. P19-1250 @@ -3270,10 +3270,10 @@ A Simple Recipe towards Reducing Hallucination in Neural Surface Realisation FengNie - Jin-GeYao + Jin-GeYao JinpengWang RongPan - Chin-YewLin + Chin-YewLin 2673–2679 Recent neural language generation systems often hallucinate contents (i.e., producing irrelevant or contradicted facts), especially when trained on loosely corresponding pairs of the input structure and text. To mitigate this issue, we propose to integrate a language understanding module for data refinement with self-training iterations to effectively induce strong equivalence between the input data and the paired text. Experiments on the E2E challenge dataset show that our proposed framework can reduce more than 50% relative unaligned noise from the original data-text pairs. A vanilla sequence-to-sequence neural NLG model trained on the refined data has improved on content correctness compared with the current state-of-the-art ensemble generator. P19-1256 @@ -3284,7 +3284,7 @@ Cross-Modal Commentator: Automatic Machine Commenting Based on Cross-Modal Information PengchengYang - ZhihanZhang + ZhihanZhang FuliLuo LeiLi ChengyangHuang @@ -3327,7 +3327,7 @@ Multi-hop Reading Comprehension across Multiple Documents by Reasoning over Heterogeneous Graphs MingTu GuangtaoWang - JingHuang + JingHuang YunTang XiaodongHe BowenZhou @@ -3380,7 +3380,7 @@ Sentence Mover’s Similarity: Automatic Evaluation for Multi-Sentence Texts ElizabethClark AsliCelikyilmaz - Noah A.Smith + Noah A.Smith 2748–2760 For evaluating machine-generated texts, automatic methods hold the promise of avoiding collection of human judgments, which can be expensive and time-consuming. The most common automatic metrics, like BLEU and ROUGE, depend on exact word matching, an inflexible approach for measuring semantic similarity. We introduce methods based on sentence mover’s similarity; our automatic metrics evaluate text in a continuous space using word and sentence embeddings. We find that sentence-based metrics correlate with human judgments significantly better than ROUGE, both on machine-generated summaries (average length of 3.4 sentences) and human-authored essays (average length of 7.5). We also show that sentence mover’s similarity can be used as a reward when learning a generation model via reinforcement learning; we present both automatic and human evaluations of summaries learned in this way, finding that our approach outperforms ROUGE. P19-1264 @@ -3447,8 +3447,8 @@ Putting Evaluation in Context: Contextual Embeddings Improve Machine Translation Evaluation NitikaMathur - TimothyBaldwin - TrevorCohn + TimothyBaldwin + TrevorCohn 2799–2808 Accurate, automatic evaluation of machine translation is critical for system tuning, and evaluating progress in the field. We proposed a simple unsupervised metric, and additional supervised metrics which rely on contextual word embeddings to encode the translation and reference sentences. We find that these models rival or surpass all existing metrics in the WMT 2017 sentence-level and system-level tracks, and our trained model has a substantially higher correlation with human judgements than all existing metrics on the WMT 2017 to-English sentence level dataset. P19-1269 @@ -3461,7 +3461,7 @@ XingshanZeng JingLi LuWang - Kam-FaiWong + Kam-FaiWong 2809–2818 As the online world continues its exponential growth, interpersonal communication has come to play an increasingly central role in opinion formation and change. In order to help users better engage with each other online, we study a challenging problem of re-entry prediction foreseeing whether a user will come back to a conversation they once participated in. We hypothesize that both the context of the ongoing conversations and the users’ previous chatting history will affect their continued interests in future engagement. Specifically, we propose a neural framework with three main layers, each modeling context, user history, and interactions between them, to explore how the conversation context and user chatting history jointly result in their re-entry behavior. We experiment with two large-scale datasets collected from Twitter and Reddit. Results show that our proposed framework with bi-attention achieves an F1 score of 61.1 on Twitter conversations, outperforming the state-of-the-art methods from previous work. P19-1270 @@ -3485,7 +3485,7 @@ Categorizing and Inferring the Relationship between the Text and Image of <fixed-case>T</fixed-case>witter Posts AlakanandaVempala - DanielPreoţiuc-Pietro + DanielPreoţiuc-Pietro 2830–2840 Text in social media posts is frequently accompanied by images in order to provide content, supply context, or to express feelings. This paper studies how the meaning of the entire tweet is composed through the relationship between its textual content and its image. We build and release a data set of image tweets annotated with four classes which express whether the text or the image provides additional information to the other modality. We show that by combining the text and image information, we can build a machine learning approach that accurately distinguishes between the relationship types. Further, we derive insights into how these relationships are materialized through text and image content analysis and how they are impacted by user demographic traits. These methods can be used in several downstream applications including pre-training image tagging models, collecting distantly supervised data for image captioning, and can be directly used in end-user applications to optimize screen estate. P19-1272 @@ -3496,8 +3496,8 @@ Who Sides with Whom? Towards Computational Construction of Discourse Networks for Political Debates - SebastianPadó - AndreBlessing + SebastianPadó + AndreBlessing NicoBlokker ErenayDayanik SebastianHaunss @@ -3512,7 +3512,7 @@ Analyzing Linguistic Differences between Owner and Staff Attributed Tweets - DanielPreoţiuc-Pietro + DanielPreoţiuc-Pietro RitaDevlin Marier 2848–2853 Research on social media has to date assumed that all posts from an account are authored by the same person. In this study, we challenge this assumption and study the linguistic differences between posts signed by the account owner or attributed to their staff. We introduce a novel data set of tweets posted by U.S. politicians who self-reported their tweets using a signature. We analyze the linguistic topics and style features that distinguish the two types of tweets. Predictive results show that we are able to predict owner and staff attributed tweets with good accuracy, even when not using any training data from that account. @@ -3536,7 +3536,7 @@ Open Domain Event Extraction Using Neural Latent Variable Models XiaoLiu - HeyanHuang + HeyanHuang YueZhang 2860–2871 We consider open domain event extraction, the task of extracting unconstraint types of events from news clusters. A novel latent variable neural model is constructed, which is scalable to very large corpus. A dataset is collected and manually annotated, with task-specific evaluation metrics being designed. Results show that the proposed unsupervised model gives better performance compared to the state-of-the-art method for event schema induction. @@ -3611,7 +3611,7 @@ Is Attention Interpretable? SofiaSerrano - Noah A.Smith + Noah A.Smith 2931–2951 Attention mechanisms have recently boosted performance on a range of NLP tasks. Because attention layers explicitly weight input components’ representations, it is also often assumed that attention can be used to identify information that models found important (e.g., specific contextualized word tokens). We test whether that assumption holds by manipulating attention weights in already-trained text classification models and analyzing the resulting differences in their predictions. While we observe some ways in which higher attention weights correlate with greater impact on model predictions, we also find many ways in which this does not hold, i.e., where gradient-based rankings of attention weights better predict their effects than their magnitudes. We conclude that while attention noisily predicts input components’ overall importance to a model, it is by no means a fail-safe indicator. P19-1282 @@ -3621,7 +3621,7 @@ Correlating Neural and Symbolic Representations of Language - GrzegorzChrupała + GrzegorzChrupała AfraAlishahi 2952–2962 Analysis methods which enable us to better understand the representations and functioning of neural models of language are increasingly needed as deep learning becomes the dominant approach in NLP. Here we present two methods based on Representational Similarity Analysis (RSA) and Tree Kernels (TK) which allow us to directly quantify how strongly the information encoded in neural activation patterns corresponds to information represented by symbolic structures such as syntax trees. We first validate our methods on the case of a simple synthetic language for arithmetic expressions with clearly defined syntax and semantics, and show that they exhibit the expected pattern of results. We then our methods to correlate neural representations of English sentences with their constituency parse trees. @@ -3646,8 +3646,8 @@ ZihangDai ZhilinYang YimingYang - JaimeCarbonell - QuocLe + JaimeCarbonell + QuocLe RuslanSalakhutdinov 2978–2988 Transformers have a potential of learning longer-term dependency, but are limited by a fixed-length context in the setting of language modeling. We propose a novel neural architecture Transformer-XL that enables learning dependency beyond a fixed length without disrupting temporal coherence. It consists of a segment-level recurrence mechanism and a novel positional encoding scheme. Our method not only enables capturing longer-term dependency, but also resolves the context fragmentation problem. As a result, Transformer-XL learns dependency that is 80% longer than RNNs and 450% longer than vanilla Transformers, achieves better performance on both short and long sequences, and is up to 1,800+ times faster than vanilla Transformers during evaluation. Notably, we improve the state-of-the-art results of bpc/perplexity to 0.99 on enwiki8, 1.08 on text8, 18.3 on WikiText-103, 21.8 on One Billion Word, and 54.5 on Penn Treebank (without finetuning). When trained only on WikiText-103, Transformer-XL manages to generate reasonably coherent, novel text articles with thousands of tokens. Our code, pretrained models, and hyperparameters are available in both Tensorflow and PyTorch. @@ -3662,7 +3662,7 @@ JunjieHu MengzhouXia GrahamNeubig - JaimeCarbonell + JaimeCarbonell 2989–3001 It has been previously noted that neural machine translation (NMT) is very sensitive to domain shift. In this paper, we argue that this is a dual effect of the highly lexicalized nature of NMT, resulting in failure for sentences with large numbers of unknown words, and lack of supervision for domain-specific words. To remedy this problem, we propose an unsupervised adaptation method which fine-tunes a pre-trained out-of-domain NMT model using a pseudo-in-domain corpus. Specifically, we perform lexicon induction to extract an in-domain lexicon, and construct a pseudo-parallel in-domain corpus by performing word-for-word back-translation of monolingual in-domain target sentences. In five domains over twenty pairwise adaptation settings and two model architectures, our method achieves consistent improvements without using any in-domain parallel sentences, improving up to 14 BLEU over unadapted models, and up to 2 BLEU over strong back-translation baselines. P19-1286 @@ -3717,7 +3717,7 @@ Look Harder: A Neural Machine Translation Model with Hard Attention - Sathish ReddyIndurthi + Sathish ReddyIndurthi InsooChung SanghaKim 3037–3043 @@ -3742,7 +3742,7 @@ A Simple and Effective Approach to Automatic Post-Editing with Transfer Learning Gonçalo M.Correia - André F. T.Martins + André F. T.Martins 3050–3056 Automatic post-editing (APE) seeks to automatically refine the output of a black-box machine translation (MT) system through human post-edits. APE systems are usually trained by complementing human post-edited data with large, artificial data generated through back-translations, a time-consuming process often no easier than training a MT system from scratch. in this paper, we propose an alternative where we fine-tune pre-trained BERT models on both the encoder and decoder of an APE system, exploring several parameter sharing strategies. By only training on a dataset of 23K sentences for 3 hours on a single GPU we obtain results that are competitive with systems that were trained on 5M artificial sentences. When we add this artificial data our method obtains state-of-the-art results. P19-1292 @@ -3764,10 +3764,10 @@ Training Neural Machine Translation to Apply Terminology Constraints - GeorgianaDinu + GeorgianaDinu PrashantMathur MarcelloFederico - YaserAl-Onaizan + YaserAl-Onaizan 3063–3068 This paper proposes a novel method to inject custom terminology into neural machine translation at run time. Previous works have mainly proposed modifications to the decoding algorithm in order to constrain the output to include run-time-provided target terms. While being effective, these constrained decoding methods add, however, significant computational overhead to the inference step, and, as we show in this paper, can be brittle when tested in realistic conditions. In this paper we approach the problem by training a neural MT system to learn how to use custom terminology when provided with the input. Comparative experiments show that our method is not only more effective than a state-of-the-art implementation of constrained decoding, but is also as fast as constraint-free decoding. P19-1294 @@ -3793,9 +3793,9 @@ RuiWang KehaiChen MasaoUtiyama - EiichiroSumita + EiichiroSumita MinZhang - TiejunZhao + TiejunZhao 3076–3082 The training objective of neural machine translation (NMT) is to minimize the loss between the words in the translated sentences and those in the references. In NMT, there is a natural correspondence between the source sentence and the target sentence. However, this relationship has only been represented using the entire neural network and the training objective is computed in word-level. In this paper, we propose a sentence-level agreement module to directly minimize the difference between the representation of source and target sentence. The proposed agreement module can be integrated into NMT as an additional training objective function and can also be used to enhance the representation of the source sentences. Empirical results on the NIST Chinese-to-English and WMT English-to-German tasks show the proposed agreement module can significantly improve the NMT performance. P19-1296 @@ -3807,7 +3807,7 @@ SukantaSen Kamal KumarGupta AsifEkbal - PushpakBhattacharyya + PushpakBhattacharyya 3083–3089 In this paper, we propose a multilingual unsupervised NMT scheme which jointly trains multiple languages with a shared encoder and multiple decoders. Our approach is based on denoising autoencoding of each language and back-translating between English and multiple non-English languages. This results in a universal encoder which can encode any language participating in training into an inter-lingual representation, and language-specific decoders. Our experiments using only monolingual corpora show that multilingual unsupervised model performs better than the separately trained bilingual models achieving improvement of up to 1.48 BLEU points on WMT test sets. We also observe that even if we do not train the network for all possible translation directions, the network is still able to translate in a many-to-many fashion leveraging encoder’s ability to generate interlingual representation. P19-1297 @@ -3830,10 +3830,10 @@ Multi-Source Cross-Lingual Model Transfer: Learning What to Share XilunChen - Ahmed HassanAwadallah - HanyHassan + Ahmed HassanAwadallah + HanyHassan WeiWang - ClaireCardie + ClaireCardie 3098–3112 Modern NLP applications have enjoyed a great boost utilizing neural networks models. Such deep neural models, however, are not applicable to most human languages due to the lack of annotated training data for various NLP tasks. Cross-lingual transfer learning (CLTL) is a viable method for building NLP models for a low-resource target language by leveraging labeled data from other (source) languages. In this work, we focus on the multilingual transfer setting where training data in multiple source languages is leveraged to further boost target language performance. Unlike most existing methods that rely only on language-invariant features for CLTL, our approach coherently utilizes both language-invariant and language-specific features at instance level. Our model leverages adversarial networks to learn language-invariant features, and mixture-of-experts models to dynamically exploit the similarity between the target language and each individual source language. This enables our model to learn effectively what to share between various languages in the multilingual setup. Moreover, when coupled with unsupervised multilingual embeddings, our model can operate in a zero-resource setting where neither target language training data nor cross-lingual resources are available. Our model achieves significant performance gains over prior art, as shown in an extensive set of experiments over multiple text classification and sequence tagging tasks including a large-scale industry dataset. P19-1299 @@ -3844,7 +3844,7 @@ Unsupervised Multilingual Word Embedding with Limited Resources using Neural Language Models TakashiWada TomoharuIwata - YujiMatsumoto + YujiMatsumoto 3113–3124 Recently, a variety of unsupervised methods have been proposed that map pre-trained word embeddings of different languages into the same space without any parallel data. These methods aim to find a linear transformation based on the assumption that monolingual word embeddings are approximately isomorphic between languages. However, it has been demonstrated that this assumption holds true only on specific conditions, and with limited resources, the performance of these methods decreases drastically. To overcome this problem, we propose a new unsupervised multilingual embedding method that does not rely on such assumption and performs well under resource-poor scenarios, namely when only a small amount of monolingual data (i.e., 50k sentences) are available, or when the domains of monolingual data are different across languages. Our proposed model, which we call ‘Multilingual Neural Language Models’, shares some of the network parameters among multiple languages, and encodes sentences of multiple languages into the same space. The model jointly learns word embeddings of different languages in the same space, and generates multilingual embeddings without any parallel data or pre-training. Our experiments on word alignment tasks have demonstrated that, on the low-resource condition, our model substantially outperforms existing unsupervised and even supervised methods trained with 500 bilingual pairs of words. Our model also outperforms unsupervised methods given different-domain corpora across languages. Our code is publicly available. P19-1300 @@ -3929,10 +3929,10 @@ CaitlinWesterfield SungrokShim GarrettBingham - AlexanderFabbri + AlexanderFabbri WilliamHu NehaVerma - DragomirRadev + DragomirRadev 3173–3179 In this paper, we propose to boost low-resource cross-lingual document retrieval performance with deep bilingual query-document representations. We match queries and documents in both source and target languages with four components, each of which is implemented as a term interaction-based deep neural network with cross-lingual word embeddings as input. By including query likelihood scores as extra features, our model effectively learns to rerank the retrieved documents by using a small number of relevance labels for low-resource language pairs. Due to the shared cross-lingual word embedding space, the model can also be directly applied to another language pair without any training label. Experimental results on the Material dataset show that our model outperforms the competitive translation-based baselines on English-Swahili, English-Tagalog, and English-Somali cross-lingual information retrieval tasks. P19-1306 @@ -3977,7 +3977,7 @@ <fixed-case>JW</fixed-case>300: A Wide-Coverage Parallel Corpus for Low-Resource Languages - ŽeljkoAgić + ŽeljkoAgić IvanVulić 3204–3210 Viable cross-lingual transfer critically depends on the availability of parallel texts. Shortage of such resources imposes a development and evaluation bottleneck in multilingual processing. We introduce JW300, a parallel corpus of over 300 languages with around 100 thousand parallel sentences per language pair on average. In this paper, we present the resource and showcase its utility in experiments with cross-lingual word embedding induction and multi-source part-of-speech projection. @@ -4052,7 +4052,7 @@ DimaPuzyrev AlexanderPanchenko PawanGoyal - ChrisBiemann + ChrisBiemann AnimeshMukherjee 3263–3274 The compositionality degree of multiword expressions indicates to what extent the meaning of a phrase can be derived from the meaning of its constituents and their grammatical relations. Prediction of (non)-compositionality is a task that has been frequently addressed with distributional semantic models. We introduce a novel technique to blend hierarchical information with distributional information for predicting compositionality. In particular, we use hypernymy information of the multiword and its constituents encoded in the form of the recently introduced Poincaré embeddings in addition to the distributional information to detect compositionality for noun phrases. Using a weighted average of the distributional similarity and a Poincaré similarity function, we obtain consistent and substantial, statistically significant improvement across three gold standard datasets over state-of-the-art models based on distributional information only. Unlike traditional approaches that solely use an unsupervised setting, we have also framed the problem as a supervised task, obtaining comparable improvements. Further, we publicly release our Poincaré embeddings, which are trained on the output of handcrafted lexical-syntactic patterns on a large corpus. @@ -4073,8 +4073,8 @@ Relational Word Embeddings - JoseCamacho-Collados - LuisEspinosa Anke + JoseCamacho-Collados + LuisEspinosa Anke StevenSchockaert 3286–3296 While word embeddings have been shown to implicitly encode various forms of attributional knowledge, the extent to which they capture relational information is far more limited. In previous work, this limitation has been addressed by incorporating relational knowledge from external knowledge bases when learning the word embedding. Such strategies may not be optimal, however, as they are limited by the coverage of available resources and conflate similarity with other forms of relatedness. As an alternative, in this paper we propose to encode relational knowledge in a separate word embedding, which is aimed to be complementary to a given standard word embedding. This relational word embedding is still learned from co-occurrence statistics, and can thus be used even when no external knowledge base is available. Our analysis shows that relational word vectors do indeed capture information that is complementary to what is encoded in standard word embeddings. @@ -4099,7 +4099,7 @@ PrateekYadav PiyushRai ChiranjibBhattacharyya - ParthaTalukdar + ParthaTalukdar 3308–3318 Word embeddings have been widely adopted across several NLP applications. Most existing word embedding methods utilize sequential context of a word to learn its embedding. While there have been some attempts at utilizing syntactic context of a word, such methods result in an explosion of the vocabulary size. In this paper, we overcome this problem by proposing SynGCN, a flexible Graph Convolution based method for learning word embeddings. SynGCN utilizes the dependency context of a word without increasing the vocabulary size. Word embeddings learned by SynGCN outperform existing methods on various intrinsic and extrinsic tasks and provide an advantage when used with ELMo. We also propose SemGCN, an effective framework for incorporating diverse semantic knowledge for further enhancing learned word representations. We make the source code of both models available to encourage reproducible research. P19-1320 @@ -4143,7 +4143,7 @@ Putting Words in Context: <fixed-case>LSTM</fixed-case> Language Models and Lexical Ambiguity LauraAina KristinaGulordava - GemmaBoleda + GemmaBoleda 3342–3348 In neural network models of language, words are commonly represented using context-invariant representations (word embeddings) which are then put in context in the hidden layers. Since words are often ambiguous, representing the contextually relevant information is not trivial. We investigate how an LSTM language model deals with lexical ambiguity in English, designing a method to probe its hidden representations for lexical and contextual information about words. We find that both types of information are represented to a large extent, but also that there is room for improvement for contextual information. P19-1324 @@ -4155,7 +4155,7 @@ AndreyKutuzov MohammadDorgham OleksiyOliynyk - ChrisBiemann + ChrisBiemann AlexanderPanchenko 3349–3355 Graph measures, such as node distances, are inefficient to compute. We explore dense vector representations as an effective way to approximate the same information. We introduce a simple yet efficient and effective approach for learning graph embeddings. Instead of directly operating on the graph structure, our method takes structural measures of pairwise node similarities into account and learns dense node representations reflecting user-defined graph distance measures, such as e.g. the shortest path distance or distance measures that take information beyond the graph structure into account. We demonstrate a speed-up of several orders of magnitude when predicting word similarity by vector operations on our embeddings as opposed to directly computing the respective path-based measures, while outperforming various other graph embeddings on semantic similarity and word sense disambiguation tasks. @@ -4203,8 +4203,8 @@ Exploring Numeracy in Word Embeddings AakankshaNaik AbhilashaRavichander - CarolynRose - EduardHovy + CarolynRose + EduardHovy 3374–3380 Word embeddings are now pervasive across NLP subfields as the de-facto method of forming text representataions. In this work, we show that existing embedding models are inadequate at constructing representations that capture salient aspects of mathematical meaning for numbers, which is important for language understanding. Numbers are ubiquitous and frequently appear in text. Inspired by cognitive studies on how humans perceive numbers, we develop an analysis framework to test how well word embeddings capture two essential properties of numbers: magnitude (e.g. 3<4) and numeration (e.g. 3=three). Our experiments reveal that most models capture an approximate notion of magnitude, but are inadequate at capturing numeration. We hope that our observations provide a starting point for the development of methods which better capture numeracy in NLP systems. P19-1329 @@ -4229,7 +4229,7 @@ YueDong ZichaoLi MehdiRezagholizadeh - Jackie Chi KitCheung + Jackie Chi KitCheung 3393–3402 We present the first sentence simplification model that learns explicit edit operations (ADD, DELETE, and KEEP) via a neural programmer-interpreter approach. Most current neural sentence simplification systems are variants of sequence-to-sequence models adopted from machine translation. These methods learn to simplify sentences as a byproduct of the fact that they are trained on complex-simple sentence pairs. By contrast, our neural programmer-interpreter is directly trained to predict explicit edit operations on targeted parts of the input sentence, resembling the way that humans perform simplification and revision. Our model outperforms previous state-of-the-art neural sentence simplification models (without external knowledge) by large margins on three benchmark text simplification corpora in terms of SARI (+0.95 WikiLarge, +1.89 WikiSmall, +1.41 Newsela), and is judged by humans to produce overall better and simpler output sentences. P19-1331 @@ -4254,7 +4254,7 @@ Transforming Complex Sentences into a Semantic Hierarchy ChristinaNiklaus MatthiasCetto - AndréFreitas + AndréFreitas SiegfriedHandschuh 3415–3427 We present an approach for recursively splitting and rephrasing complex English sentences into a novel semantic hierarchy of simplified sentences, with each of them presenting a more regular structure that may facilitate a wide variety of artificial intelligence tasks, such as machine translation (MT) or information extraction (IE). Using a set of hand-crafted transformation rules, input sentences are recursively transformed into a two-layered hierarchical representation in the form of core sentences and accompanying contexts that are linked via rhetorical relations. In this way, the semantic relationship of the decomposed constituents is preserved in the output, maintaining its interpretability for downstream applications. Both a thorough manual analysis and automatic evaluation across three datasets from two different domains demonstrate that the proposed syntactic simplification approach outperforms the state of the art in structural text simplification. Moreover, an extrinsic evaluation shows that when applying our framework as a preprocessing step the performance of state-of-the-art Open IE systems can be improved by up to 346% in precision and 52% in recall. To enable reproducible research, all code is provided online. @@ -4266,7 +4266,7 @@ Right for the Wrong Reasons: Diagnosing Syntactic Heuristics in Natural Language Inference - R. ThomasMcCoy + R. ThomasMcCoy ElliePavlick TalLinzen 3428–3448 @@ -4311,10 +4311,10 @@ Scalable Syntax-Aware Language Models Using Knowledge Distillation AdhigunaKuncoro - ChrisDyer + ChrisDyer LauraRimell StephenClark - PhilBlunsom + PhilBlunsom 3472–3484 Prior work has shown that, on small amounts of training data, syntactic neural language models learn structurally sensitive generalisations more successfully than sequential language models. However, their computational complexity renders scaling difficult, and it remains an open question whether structural biases are still necessary when sequential models have access to ever larger amounts of training data. To answer this question, we introduce an efficient knowledge distillation (KD) technique that transfers knowledge from a syntactic language model trained on a small corpus to an LSTM language model, hence enabling the LSTM to develop a more structurally sensitive representation of the larger training data it learns from. On targeted syntactic evaluations, we find that, while sequential LSTMs perform much better than previously reported, our proposed technique substantially improves on this baseline, yielding a new state of the art. Our findings and analysis affirm the importance of structural biases, even in models that learn from large amounts of data. P19-1337 @@ -4339,7 +4339,7 @@ AparnaGarimella CarmenBanea DirkHovy - RadaMihalcea + RadaMihalcea 3493–3498 Several linguistic studies have shown the prevalence of various lexical and grammatical patterns in texts authored by a person of a particular gender, but models for part-of-speech tagging and dependency parsing have still not adapted to account for these differences. To address this, we annotate the Wall Street Journal part of the Penn Treebank with the gender information of the articles’ authors, and build taggers and parsers trained on this data that show performance differences in text written by men and women. Further analyses reveal numerous part-of-speech tags and syntactic relations whose prediction performances benefit from the prevalence of a specific gender in the training data. The results underscore the importance of accounting for gendered differences in syntactic tasks, and outline future venues for developing more accurate taggers and parsers. We release our data to the research community. P19-1339 @@ -4362,7 +4362,7 @@ A Multilingual <fixed-case>BPE</fixed-case> Embedding Space for Universal Sentiment Lexicon Induction MengjieZhao - HinrichSchütze + HinrichSchütze 3506–3517 We present a new method for sentiment lexicon induction that is designed to be applicable to the entire range of typological diversity of the world’s languages. We evaluate our method on Parallel Bible Corpus+ (PBC+), a parallel corpus of 1593 languages. The key idea is to use Byte Pair Encodings (BPEs) as basic units for multilingual embeddings. Through zero-shot transfer from English sentiment, we learn a seed lexicon for each language in the domain of PBC+. Through domain adaptation, we then generalize the domain-specific lexicon to a general one. We show – across typologically diverse languages in PBC+ – good quality of seed and general-domain sentiment lexicons by intrinsic and extrinsic and by automatic and human evaluation. We make freely available our code, seed sentiment lexicons for all 1593 languages and induced general-domain sentiment lexicons for 200 languages. P19-1341 @@ -4415,7 +4415,7 @@ XiaozhongLiu LuoSi MinZhang - GuodongZhou + GuodongZhou 3548–3557 In the literature, existing studies on aspect sentiment classification (ASC) focus on individual non-interactive reviews. This paper extends the research to interactive reviews and proposes a new research task, namely Aspect Sentiment Classification towards Question-Answering (ASC-QA), for real-world applications. This new task aims to predict sentiment polarities for specific aspects from interactive QA style reviews. In particular, a high-quality annotated corpus is constructed for ASC-QA to facilitate corresponding research. On this basis, a Reinforced Bidirectional Attention Network (RBAN) approach is proposed to address two inherent challenges in ASC-QA, i.e., semantic matching between question and answer, and data noise. Experimental results demonstrate the great advantage of the proposed approach to ASC-QA against several state-of-the-art baselines. P19-1345 @@ -4455,7 +4455,7 @@ Generating Question Relevant Captions to Aid Visual Question Answering JialinWu ZeyuanHu - RaymondMooney + RaymondMooney 3585–3594 Visual question answering (VQA) and image captioning require a shared body of general knowledge connecting language and vision. We present a novel approach to better VQA performance that exploits this connection by jointly generating captions that are targeted to help answer a specific visual question. The model is trained using an existing caption dataset by automatically determining question-relevant captions using an online gradient-based method. Experimental results on the VQA v2 challenge demonstrates that our approach obtains state-of-the-art VQA performance (e.g. 68.4% in the Test-standard set using a single model) by simultaneously generating question-relevant captions. P19-1348 @@ -4467,7 +4467,7 @@ Multi-grained Attention with Object-level Grounding for Visual Question Answering PingpingHuang JianhuiHuang - YuqingGuo + YuqingGuo MinQiao YongZhu 3595–3600 @@ -4480,9 +4480,9 @@ Psycholinguistics Meets Continual Learning: Measuring Catastrophic Forgetting in Visual Question Answering ClaudioGreco - BarbaraPlank - RaquelFernández - RaffaellaBernardi + BarbaraPlank + RaquelFernández + RaffaellaBernardi 3601–3605 We study the issue of catastrophic forgetting in the context of neural multimodal approaches to Visual Question Answering (VQA). Motivated by evidence from psycholinguistics, we devise a set of linguistically-informed VQA tasks, which differ by the types of questions involved (Wh-questions and polar questions). We test what impact task difficulty has on continual learning, and whether the order in which a child acquires question types facilitates computational models. Our results show that dramatic forgetting is at play and that task difficulty and order matter. Two well-known current continual learning methods mitigate the problem only to a limiting degree. P19-1350 @@ -4558,8 +4558,8 @@ What Does <fixed-case>BERT</fixed-case> Learn about the Structure of Language? GaneshJawahar - BenoîtSagot - DjaméSeddah + BenoîtSagot + DjaméSeddah 3651–3657 BERT is a recent language representation model that has surprisingly performed well in diverse language understanding benchmarks. This result indicates the possibility that BERT networks capture structural information about language. In this work, we provide novel support for this claim by performing a series of experiments to unpack the elements of English language structure learned by BERT. Our findings are fourfold. BERT’s phrasal representation captures the phrase-level information in the lower layers. The intermediate layers of BERT compose a rich hierarchy of linguistic information, starting with surface features at the bottom, syntactic features in the middle followed by semantic features at the top. BERT requires deeper layers while tracking subject-verb agreement to handle long-term dependency problem. Finally, the compositional scheme underlying BERT mimics classical, tree-like structures. P19-1356 @@ -4583,7 +4583,7 @@ Learning from Dialogue after Deployment: Feed Yourself, Chatbot! BradenHancock AntoineBordes - Pierre-EmmanuelMazare + Pierre-EmmanuelMazare JasonWeston 3667–3684 The majority of conversations a dialogue agent sees over its lifetime occur after it has already been trained and deployed, leaving a vast store of potential training signal untapped. In this work, we propose the self-feeding chatbot, a dialogue agent with the ability to extract new training examples from the conversations it participates in. As our agent engages in conversation, it also estimates user satisfaction in its responses. When the conversation appears to be going well, the user’s responses become new training examples to imitate. When the agent believes it has made a mistake, it asks for feedback; learning to predict the feedback that will be given improves the chatbot’s dialogue abilities further. On the PersonaChat chit-chat dataset with over 131k training examples, we find that learning from dialogue with a self-feeding chatbot significantly improves performance, regardless of the amount of traditional supervision. @@ -4597,7 +4597,7 @@ XiaoqingZheng LuLiu MuXu - XuanjingHuang + XuanjingHuang 3685–3695 It is desirable for dialog systems to have capability to express specific emotions during a conversation, which has a direct, quantifiable impact on improvement of their usability and user satisfaction. After a careful investigation of real-life conversation data, we found that there are at least two ways to express emotions with language. One is to describe emotional states by explicitly using strong emotional words; another is to increase the intensity of the emotional experiences by implicitly combining neutral words in distinct ways. We propose an emotional dialogue system (EmoDS) that can generate the meaningful responses with a coherent structure for a post, and meanwhile express the desired emotion explicitly or implicitly within a unified framework. Experimental results showed EmoDS performed better than the baselines in BLEU, diversity and the quality of emotional expression. P19-1359 @@ -4622,8 +4622,8 @@ WeikangWang JiajunZhang QianLi - Mei-YuhHwang - ChengqingZong + Mei-YuhHwang + ChengqingZong ZhifeiLi 3710–3720 Clarifying user needs is essential for existing task-oriented dialogue systems. However, in real-world applications, developers can never guarantee that all possible user demands are taken into account in the design phase. Consequently, existing systems will break down when encountering unconsidered user needs. To address this problem, we propose a novel incremental learning framework to design task-oriented dialogue systems, or for short Incremental Dialogue System (IDS), without pre-defining the exhaustive list of user needs. Specifically, we introduce an uncertainty estimation module to evaluate the confidence of giving correct responses. If there is high confidence, IDS will provide responses to users. Otherwise, humans will be involved in the dialogue process, and IDS can learn from human intervention through an online learning module. To evaluate our method, we propose a new dataset which simulates unanticipated user needs in the deployment stage. Experiments show that IDS is robust to unconsidered user actions, and can update itself online by smartly selecting only the most effective training data, and hence attains better performance with less annotation cost. @@ -4638,7 +4638,7 @@ YanyanLan LiangPang JiafengGuo - XueqiCheng + XueqiCheng 3721–3730 In multi-turn dialogue generation, response is usually related with only a few contexts. Therefore, an ideal model should be able to detect these relevant contexts and produce a suitable response accordingly. However, the widely used hierarchical recurrent encoder-decoder models just treat all the contexts indiscriminately, which may hurt the following response generation process. Some researchers try to use the cosine similarity or the traditional attention mechanism to find the relevant contexts, but they suffer from either insufficient relevance assumption or position bias problem. In this paper, we propose a new model, named ReCoSa, to tackle this problem. Firstly, a word level LSTM encoder is conducted to obtain the initial representation of each context. Then, the self-attention mechanism is utilized to update both the context and masked response representation. Finally, the attention weights between each context and response representations are computed and used in the further decoding process. Experimental results on both Chinese customer services dataset and English Ubuntu dialogue dataset show that ReCoSa significantly outperforms baseline models, in terms of both metric-based and human evaluations. Further analysis on attention shows that the detected relevant contexts by ReCoSa are highly coherent with human’s understanding, validating the correctness and interpretability of ReCoSa. P19-1362 @@ -4687,7 +4687,7 @@ Retrieval-Enhanced Adversarial Training for Neural Response Generation QingfuZhu LeiCui - Wei-NanZhang + Wei-NanZhang FuruWei TingLiu 3763–3773 @@ -4789,13 +4789,13 @@ A Large-Scale Corpus for Conversation Disentanglement Jonathan K.Kummerfeld Sai R.Gouravajhala - Joseph J.Peper + Joseph J.Peper VigneshAthreya ChulakaGunasekara JatinGanhotra Siva SankalpPatel Lazaros CPolymenakos - WalterLasecki + WalterLasecki 3846–3856 Disentangling conversations mixed together in a single stream of messages is a difficult task, made harder by the lack of large manually annotated datasets. We created a new dataset of 77,563 messages manually annotated with reply-structure graphs that both disentangle conversations and define internal conversation structure. Our data is 16 times larger than all previously released datasets combined, the first to include adjudication of annotation disagreements, and the first to include context. We use our data to re-examine prior work, in particular, finding that 89% of conversations in a widely used dialogue corpus are either missing messages or contain extra messages. Our manually-annotated data presents an opportunity to develop robust data-driven methods for conversation disentanglement, which will help advance dialogue research. P19-1374 @@ -4819,7 +4819,7 @@ Are we there yet? Encoder-decoder neural networks as cognitive models of <fixed-case>E</fixed-case>nglish past tense inflection MariaCorkery YevgenMatusevych - SharonGoldwater + SharonGoldwater 3868–3877 The cognitive mechanisms needed to account for the English past tense have long been a subject of debate in linguistics and cognitive science. Neural network models were proposed early on, but were shown to have clear flaws. Recently, however, Kirov and Cotterell (2018) showed that modern encoder-decoder (ED) models overcome many of these flaws. They also presented evidence that ED models demonstrate humanlike performance in a nonce-word task. Here, we look more closely at the behaviour of their model in this task. We find that (1) the model exhibits instability across multiple simulations in terms of its correlation with human data, and (2) even when results are aggregated across simulations (treating each simulation as an individual human participant), the fit to the human data is not strong—worse than an older rule-based model. These findings hold up through several alternative training regimes and evaluation measures. Although other neural architectures might do better, we conclude that there is still insufficient evidence to claim that neural nets are a good cognitive model for this task. P19-1376 @@ -4829,7 +4829,7 @@ A Spreading Activation Framework for Tracking Conceptual Complexity of Texts IoanaHulpuș - SanjaŠtajner + SanjaŠtajner HeinerStuckenschmidt 3878–3887 We propose an unsupervised approach for assessing conceptual complexity of texts, based on spreading activation. Using DBpedia knowledge graph as a proxy to long-term memory, mentioned concepts become activated and trigger further activation as the text is sequentially traversed. Drawing inspiration from psycholinguistic theories of reading comprehension, we model memory processes such as semantic priming, sentence wrap-up, and forgetting. We show that our models capture various aspects of conceptual text complexity and significantly outperform current state of the art. @@ -4862,7 +4862,7 @@ Miss Tools and Mr Fruit: Emergent Communication in Agents Learning about Object Affordances DianeBouchacourt - MarcoBaroni + MarcoBaroni 3909–3918 Recent research studies communication emergence in communities of deep network agents assigned a joint task, hoping to gain insights on human language evolution. We propose here a new task capturing crucial aspects of the human environment, such as natural object affordances, and of human conversation, such as full symmetry among the participants. By conducting a thorough pragmatic and semantic analysis of the emergent protocol, we show that the agents solve the shared task through genuine bilateral, referential communication. However, the agents develop multiple idiolects, which makes us conclude that full symmetry is not a sufficient condition for a common language to emerge. P19-1380 @@ -4873,7 +4873,7 @@ <fixed-case>CNN</fixed-case>s found to jump around more skillfully than <fixed-case>RNN</fixed-case>s: Compositional Generalization in Seq2seq Convolutional Networks RobertoDessì - MarcoBaroni + MarcoBaroni 3919–3923 Lake and Baroni (2018) introduced the SCAN dataset probing the ability of seq2seq models to capture compositional generalizations, such as inferring the meaning of “jump around” 0-shot from the component words. Recurrent networks (RNNs) were found to completely fail the most challenging generalization cases. We test here a convolutional network (CNN) on these tasks, reporting hugely improved performance with respect to RNNs. Despite the big improvement, the CNN has however not induced systematic rules, suggesting that the difference between compositional and non-compositional behaviour is not clear-cut. P19-1381 @@ -4895,7 +4895,7 @@ Is Word Segmentation Child’s Play in All Languages? Georgia R.Loukatou - StevenMoran + StevenMoran DamianBlasi SabineStoll AlejandrinaCristia @@ -4912,7 +4912,7 @@ LawrenceWolf-Sonkin SabineStoll BalthasarBickel - MarcoBaroni + MarcoBaroni 3938–3943 Embedding a clause inside another (“the girl [who likes cars [that run fast]] has arrived”) is a fundamental resource that has been argued to be a key driver of linguistic expressiveness. As such, it plays a central role in fundamental debates on what makes human language unique, and how they might have evolved. Empirical evidence on the prevalence and the limits of embeddings has however been based on either laboratory setups or corpus data of relatively limited size. We introduce here a collection of large, dependency-parsed written corpora in 17 languages, that allow us, for the first time, to capture clausal embedding through dependency graphs and assess their distribution. Our results indicate that there is no evidence for hard constraints on embedding depth: the tail of depth distributions is heavy. Moreover, although deeply embedded clauses tend to be shorter, suggesting processing load issues, complex sentences with many embeddings do not display a bias towards less deep embeddings. Taken together, the results suggest that deep embeddings are not disfavoured in written language. More generally, our study illustrates how resources and methods from latest-generation big-data NLP can provide new perspectives on fundamental questions in theoretical linguistics. P19-1384 @@ -4937,7 +4937,7 @@ AdamTrischler KaheerSuleman HannesSchulz - Jackie Chi KitCheung + Jackie Chi KitCheung 3952–3961 We introduce a new benchmark for coreference resolution and NLI, KnowRef, that targets common-sense understanding and world knowledge. Previous coreference resolution tasks can largely be solved by exploiting the number and gender of the antecedents, or have been handcrafted and do not reflect the diversity of naturally occurring text. We present a corpus of over 8,000 annotated text passages with ambiguous pronominal anaphora. These instances are both challenging and realistic. We show that various coreference systems, whether rule-based, feature-rich, or neural, perform significantly worse on the task than humans, who display high inter-annotator agreement. To explain this performance gap, we show empirically that state-of-the art models often fail to capture context, instead relying on the gender or number of candidate antecedents to make a decision. We then use problem-specific insights to propose a data-augmentation trick called antecedent switching to alleviate this tendency in models. Finally, we show that antecedent switching yields promising results on other tasks as well: we use it to achieve state-of-the-art results on the GAP coreference task. P19-1386 @@ -4997,7 +4997,7 @@ Crowdsourcing and Validating Event-focused Emotion Corpora for <fixed-case>G</fixed-case>erman and <fixed-case>E</fixed-case>nglish EnricaTroiano - SebastianPadó + SebastianPadó RomanKlinger 4005–4011 Sentiment analysis has a range of corpora available across multiple languages. For emotion analysis, the situation is more limited, which hinders potential research on crosslingual modeling and the development of predictive models for other languages. In this paper, we fill this gap for German by constructing deISEAR, a corpus designed in analogy to the well-established English ISEAR emotion dataset. Motivated by Scherer’s appraisal theory, we implement a crowdsourcing experiment which consists of two steps. In step 1, participants create descriptions of emotional events for a given emotion. In step 2, five annotators assess the emotion expressed by the texts. We show that transferring an emotion classification model from the original English ISEAR to the German crowdsourced deISEAR via machine translation does not, on average, cause a performance drop. @@ -5009,10 +5009,10 @@ Pay Attention when you Pay the Bills. A Multilingual Corpus with Dependency-based and Semantic Annotation of Collocations. MarcosGarcia - MarcosGarcía Salido - SusanaSotelo + MarcosGarcía Salido + SusanaSotelo EstelaMosqueira - MargaritaAlonso-Ramos + MargaritaAlonso-Ramos 4012–4019 This paper presents a new multilingual corpus with semantic annotation of collocations in English, Portuguese, and Spanish. The whole resource contains 155k tokens and 1,526 collocations labeled in context. The annotated examples belong to three syntactic relations (adjective-noun, verb-object, and nominal compounds), and represent 58 lexical functions in the Meaning-Text Theory (e.g., Oper, Magn, Bon, etc.). Each collocation was annotated by three linguists and the final resource was revised by a team of experts. The resulting corpus can serve as a basis to evaluate different approaches for collocation identification, which in turn can be useful for different NLP tasks such as natural language understanding or natural language generation. P19-1392 @@ -5093,7 +5093,7 @@ Hubless Nearest Neighbor Search for Bilingual Lexicon Induction JiajiHuang QiangQiu - KennethChurch + KennethChurch 4072–4080 Bilingual Lexicon Induction (BLI) is the task of translating words from corpora in two languages. Recent advances in BLI work by aligning the two word embedding spaces. Following that, a key step is to retrieve the nearest neighbor (NN) in the target space given the source word. However, a phenomenon called hubness often degrades the accuracy of NN. Hubness appears as some data points, called hubs, being extra-ordinarily close to many of the other data points. Reducing hubness is necessary for retrieval tasks. One successful example is Inverted SoFtmax (ISF), recently proposed to improve NN. This work proposes a new method, Hubless Nearest Neighbor (HNN), to mitigate hubness. HNN differs from NN by imposing an additional equal preference assumption. Moreover, the HNN formulation explains why ISF works as well as it does. Empirical results demonstrate that HNN outperforms NN, ISF and other state-of-the-art. For reproducibility and follow-ups, we have published all code. P19-1399 @@ -5113,10 +5113,10 @@ Learning How to Active Learn by Dreaming - Thuy-TrangVu + Thuy-TrangVu MingLiu DinhPhung - GholamrezaHaffari + GholamrezaHaffari 4091–4101 Heuristic-based active learning (AL) methods are limited when the data distribution of the underlying learning problems vary. Recent data-driven AL policy learning methods are also restricted to learn from closely related domains. We introduce a new sample-efficient method that learns the AL policy directly on the target domain of interest by using wake and dream cycles. Our approach interleaves between querying the annotation of the selected datapoints to update the underlying student learner and improving AL policy using simulation where the current student learner acts as an imperfect annotator. We evaluate our method on cross-domain and cross-lingual text classification and named entity recognition tasks. Experimental results show that our dream-based AL policy training strategy is more effective than applying the pretrained policy without further fine-tuning and better than the existing strong baseline methods that use heuristics or reinforcement learning. P19-1401 @@ -5138,7 +5138,7 @@ Neural Temporality Adaptation for Document Classification: Diachronic Word Embeddings and Domain Adaptation Models XiaoleiHuang - Michael J.Paul + Michael J.Paul 4113–4123 Language usage can change across periods of time, but document classifiers models are usually trained and tested on corpora spanning multiple years without considering temporal variations. This paper describes two complementary ways to adapt classifiers to shifts across time. First, we show that diachronic word embeddings, which were originally developed to study language change, can also improve document classification, and we show a simple method for constructing this type of embedding. Second, we propose a time-driven neural classification model inspired by methods for domain adaptation. Experiments on six corpora show how these methods can make classifiers more robust over time. P19-1403 @@ -5184,7 +5184,7 @@ RyanBenmalek MadianKhabsa SumaDesu - ClaireCardie + ClaireCardie MicheleBanko 4157–4167 We introduce the Scratchpad Mechanism, a novel addition to the sequence-to-sequence (seq2seq) neural network architecture and demonstrate its effectiveness in improving the overall fluency of seq2seq models for natural language generation tasks. By enabling the decoder at each time step to write to all of the encoder output layers, Scratchpad can employ the encoder as a “scratchpad” memory to keep track of what has been generated so far and thereby guide future generation. We evaluate Scratchpad in the context of three well-studied natural language generation tasks — Machine Translation, Question Generation, and Text Summarization — and obtain state-of-the-art or comparable performance on standard datasets for each task. Qualitative assessments in the form of human judgements (question generation), attention visualization (MT), and sample output (summarization) provide further evidence of the ability of Scratchpad to generate fluent and expressive output. @@ -5194,9 +5194,9 @@ Using Automatically Extracted Minimum Spans to Disentangle Coreference Evaluation from Boundary Detection - Nafise SadatMoosavi + Nafise SadatMoosavi LeoBorn - MassimoPoesio + MassimoPoesio MichaelStrube 4168–4178 The common practice in coreference resolution is to identify and evaluate the maximum span of mentions. The use of maximum spans tangles coreference evaluation with the challenges of mention boundary detection like prepositional phrase attachment. To address this problem, minimum spans are manually annotated in smaller corpora. However, this additional annotation is costly and therefore, this solution does not scale to large corpora. In this paper, we propose the MINA algorithm for automatically extracting minimum spans to benefit from minimum span evaluation in all corpora. We show that the extracted minimum spans by MINA are consistent with those that are manually annotated by experts. Our experiments show that using minimum spans is in particular important in cross-dataset coreference evaluation, in which detected mention boundaries are noisier due to domain shift. We have integrated MINA into https://github.com/ns-moosavi/coval for reporting standard coreference scores based on both maximum and automatically detected minimum spans. @@ -5223,7 +5223,7 @@ A Unified Linear-Time Framework for Sentence-Level Discourse Parsing XiangLin - ShafiqJoty + ShafiqJoty PrathyushaJwalapuram M SaifulBari 4190–4200 @@ -5250,7 +5250,7 @@ Do You Know That Florence Is Packed with Visitors? Evaluating State-of-the-art Models of Speaker Commitment NanjiangJiang - Marie-Catherinede Marneffe + Marie-Catherinede Marneffe 4208–4213 When a speaker, Mary, asks “Do you know that Florence is packed with visitors?”, we take her to believe that Florence is packed with visitors, but not if she asks “Do you think that Florence is packed with visitors?”. Inferring speaker commitment (aka event factuality) is crucial for information extraction and question answering. Here, we explore the hypothesis that linguistic deficits drive the error patterns of existing speaker commitment models by analyzing the linguistic correlates of model error on a challenging naturalistic dataset. We evaluate two state-of-the-art speaker commitment models on the CommitmentBank, an English dataset of naturally occurring discourses. The CommitmentBank is annotated with speaker commitment towards the content of the complement (“Florence is packed with visitors” in our example) of clause-embedding verbs (“know”, “think”) under four entailment-canceling environments (negation, modal, question, conditional). A breakdown of items by linguistic features reveals asymmetrical error patterns: while the models achieve good performance on some classes (e.g., negation), they fail to generalize to the diverse linguistic constructions (e.g., conditionals) in natural language, highlighting directions for improvement. P19-1412 @@ -5272,7 +5272,7 @@ Open-Domain Why-Question Answering with Adversarial Learning to Encode Answer Texts - Jong-HoonOh + Jong-HoonOh KazumaKadowaki JulienKloetzer RyuIida @@ -5307,7 +5307,7 @@ SameerSingh MattGardner HannanehHajishirzi - LukeZettlemoyer + LukeZettlemoyer 4249–4257 Multi-hop reading comprehension (RC) questions are challenging because they require reading and reasoning over multiple paragraphs. We argue that it can be difficult to construct large multi-hop RC datasets. For example, even highly compositional questions can be answered with a single hop if they target specific entity types, or the facts needed to answer them are redundant. Our analysis is centered on HotpotQA, where we show that single-hop reasoning can solve much more of the dataset than previously thought. We introduce a single-hop BERT-based RC model that achieves 67 F1—comparable to state-of-the-art multi-hop models. We also design an evaluation setting where humans are not shown all of the necessary paragraphs for the intended multi-hop reasoning but can still answer over 80% of questions. Together with detailed error analysis, these results suggest there should be an increasing focus on the role of evidence in multi-hop reasoning and possibly even a shift towards information retrieval style evaluations with large and diverse evidence collections. P19-1416 @@ -5469,8 +5469,8 @@ <fixed-case>A</fixed-case>uto<fixed-case>ML</fixed-case> Strategy Based on Grammatical Evolution: A Case Study about Knowledge Discovery from Text SuilanEstevez-Velarde - YoanGutiérrez - AndrésMontoyo + YoanGutiérrez + AndrésMontoyo YudiviánAlmeida-Cruz 4356–4365 The process of extracting knowledge from natural language text poses a complex problem that requires both a combination of machine learning techniques and proper feature selection. Recent advances in Automatic Machine Learning (AutoML) provide effective tools to explore large sets of algorithms, hyper-parameters and features to find out the most suitable combination of them. This paper proposes a novel AutoML strategy based on probabilistic grammatical evolution, which is evaluated on the health domain by facing the knowledge discovery challenge in Spanish text documents. Our approach achieves state-of-the-art results and provides interesting insights into the best combination of parameters and algorithms to use when dealing with this challenge. Source code is provided for the research community. @@ -5565,7 +5565,7 @@ ShiyuChang MoYu ConghuiZhu - TiejunZhao + TiejunZhao 4418–4429 Natural Language Sentence Matching (NLSM) has gained substantial attention from both academics and the industry, and rich public datasets contribute a lot to this process. However, biased datasets can also hurt the generalization performance of trained models and give untrustworthy evaluation results. For many NLSM datasets, the providers select some pairs of sentences into the datasets, and this sampling procedure can easily bring unintended pattern, i.e., selection bias. One example is the QuoraQP dataset, where some content-independent naive features are unreasonably predictive. Such features are the reflection of the selection bias and termed as the “leakage features.” In this paper, we investigate the problem of selection bias on six NLSM datasets and find that four out of them are significantly biased. We further propose a training and evaluation framework to alleviate the bias. Experimental results on QuoraQP suggest that the proposed framework can improve the generalization ability of trained models, and give more trustworthy evaluation results for real-world adoptions. P19-1435 @@ -5579,7 +5579,7 @@ MinjoonSeo JinhyukLee TomKwiatkowski - AnkurParikh + AnkurParikh AliFarhadi HannanehHajishirzi 4430–4441 @@ -5617,7 +5617,7 @@ JanHula PatrickXia RaghavendraPappagari - R. ThomasMcCoy + R. ThomasMcCoy RomaPatel NajoungKim IanTenney @@ -5626,15 +5626,15 @@ ShuningJin BerlinChen BenjaminVan Durme - EdouardGrave + EdouardGrave ElliePavlick - Samuel R.Bowman + Samuel R.Bowman 4465–4476 Natural language understanding has recently seen a surge of progress with the use of sentence encoders like ELMo (Peters et al., 2018a) and BERT (Devlin et al., 2019) which are pretrained on variants of language modeling. We conduct the first large-scale systematic study of candidate pretraining tasks, comparing 19 different tasks both as alternatives and complements to language modeling. Our primary results support the use language modeling, especially when combined with pretraining on additional labeled-data tasks. However, our results are mixed across pretraining tasks and show some concerning trends: In ELMo’s pretrain-then-freeze paradigm, random baselines are worryingly strong and results vary strikingly across target tasks. In addition, fine-tuning BERT on an intermediate task often negatively impacts downstream transfer. In a more positive trend, we see modest gains from multitask training, suggesting the development of more sophisticated multitask and transfer learning techniques as an avenue for further research. P19-1439 P19-1439.Supplementary.pdf - 10.18653/v1/P19-1439 P19-1439.Poster.pdf + 10.18653/v1/P19-1439 wang-etal-2019-tell @@ -5665,7 +5665,7 @@ <fixed-case>D</fixed-case>is<fixed-case>S</fixed-case>ent: Learning Sentence Representations from Explicit Discourse Relations AllenNie ErinBennett - NoahGoodman + NoahGoodman 4497–4510 Learning effective representations of sentences is one of the core missions of natural language understanding. Existing models either train on a vast amount of text, or require costly, manually curated sentence relation datasets. We show that with dependency parsing and rule-based rubrics, we can curate a high quality sentence relation task by leveraging explicit discourse relations. We show that our curated dataset provides an excellent signal for learning vector representations of sentence meaning, representing relations that can only be determined when the meanings of two sentences are combined. We demonstrate that the automatically curated corpus allows a bidirectional LSTM sentence encoder to yield high quality sentence embeddings and can serve as a supervised fine-tuning dataset for larger models such as BERT. Our fixed sentence embeddings achieve high performance on a variety of transfer tasks, including SentEval, and we achieve state-of-the-art results on Penn Discourse Treebank’s implicit relation prediction task. P19-1442 @@ -5678,7 +5678,7 @@ RuiZhang MichihiroYasunaga Yi ChernTan - Xi VictoriaLin + Xi VictoriaLin SuyiLi HeyangEr IreneLi @@ -5692,7 +5692,7 @@ VincentZhang CaimingXiong RichardSocher - DragomirRadev + DragomirRadev 4511–4523 We present SParC, a dataset for cross-domainSemanticParsing inContext that consists of 4,298 coherent question sequences (12k+ individual questions annotated with SQL queries). It is obtained from controlled user interactions with 200 complex databases over 138 domains. We provide an in-depth analysis of SParC and show that it introduces new challenges compared to existing datasets. SParC demonstrates complex contextual dependencies, (2) has greater semantic diversity, and (3) requires generalization to unseen domains due to its cross-domain nature and the unseen databases at test time. We experiment with two state-of-the-art text-to-SQL models adapted to the context-dependent, cross-domain setup. The best model obtains an exact match accuracy of 20.2% over all questions and less than10% over all interaction sequences, indicating that the cross-domain setting and the con-textual phenomena of the dataset present significant challenges for future research. The dataset, baselines, and leaderboard are released at https://yale-lily.github.io/sparc. P19-1443 @@ -5760,7 +5760,7 @@ Human vs. Muppet: A Conservative Estimate of Human Performance on the <fixed-case>GLUE</fixed-case> Benchmark NikitaNangia - Samuel R.Bowman + Samuel R.Bowman 4566–4575 The GLUE benchmark (Wang et al., 2019b) is a suite of language understanding tasks which has seen dramatic progress in the past year, with average performance moving from 70.0 at launch to 83.9, state of the art at the time of writing (May 24, 2019). Here, we measure human performance on the benchmark, in order to learn whether significant headroom remains for further progress. We provide a conservative estimate of human performance on the benchmark through crowdsourcing: Our annotators are non-experts who must learn each task from a brief set of instructions and 20 examples. In spite of limited training, these annotators robustly outperform the state of the art on six of the nine GLUE tasks and achieve an average score of 87.1. Given the fast pace of progress however, the headroom we observe is quite limited. To reproduce the data-poor setting that our annotators must learn in, we also train the BERT model (Devlin et al., 2019) in limited-data regimes, and conclude that low-resource sentence classification remains a challenge for modern neural network approaches to text understanding. P19-1449 @@ -5784,8 +5784,8 @@ TahiraNaseem AbhishekShah HuiWan - RaduFlorian - SalimRoukos + RaduFlorian + SalimRoukos MiguelBallesteros 4586–4592 Our work involves enriching the Stack-LSTM transition-based AMR parser (Ballesteros and Al-Onaizan, 2017) by augmenting training with Policy Learning and rewarding the Smatch score of sampled graphs. In addition, we also combined several AMR-to-text alignments with an attention mechanism and we supplemented the parser with pre-processed concept identification, named entities and contextualized embeddings. We achieve a highly competitive performance that is comparable to the best published results. We show an in-depth study ablating each of the new components of the parser. @@ -5833,7 +5833,7 @@ DevamanyuHazarika VerónicaPérez-Rosas RogerZimmermann - RadaMihalcea + RadaMihalcea SoujanyaPoria 4619–4629 Sarcasm is often expressed through several verbal and non-verbal cues, e.g., a change of tone, overemphasis in a word, a drawn-out syllable, or a straight looking face. Most of the recent work in sarcasm detection has been carried out on textual data. In this paper, we argue that incorporating multimodal cues can improve the automatic classification of sarcasm. As a first step towards enabling the development of multimodal approaches for sarcasm detection, we propose a new sarcasm dataset, Multimodal Sarcasm Detection Dataset (MUStARD), compiled from popular TV shows. MUStARD consists of audiovisual utterances annotated with sarcasm labels. Each utterance is accompanied by its context of historical utterances in the dialogue, which provides additional information on the scenario where the utterance occurs. Our initial results show that the use of multimodal information can reduce the relative error rate of sarcasm detection by up to 12.9% in F-score when compared to the use of individual modalities. The full dataset is publicly available for use at https://github.com/soujanyaporia/MUStARD. @@ -5846,7 +5846,7 @@ Determining Relative Argument Specificity and Stance for Complex Argumentative Structures EsinDurmus FaisalLadhak - ClaireCardie + ClaireCardie 4630–4641 Systems for automatic argument generation and debate require the ability to (1) determine the stance of any claims employed in the argument and (2) assess the specificity of each claim relative to the argument context. Existing work on understanding claim specificity and stance, however, has been limited to the study of argumentative structures that are relatively shallow, most often consisting of a single claim that directly supports or opposes the argument thesis. In this paper, we tackle these tasks in the context of complex arguments on a diverse set of topics. In particular, our dataset consists of manually curated argument trees for 741 controversial topics covering 95,312 unique claims; lines of argument are generally of depth 2 to 6. We find that as the distance between a pair of claims increases along the argument path, determining the relative specificity of a pair of claims becomes easier and determining their relative stance becomes harder. P19-1456 @@ -5887,7 +5887,7 @@ Recognising Agreement and Disagreement between Stances with Reason Comparing Networks ChangXu - CecileParis + CecileParis SuryaNepal RossSparks 4665–4671 @@ -5899,7 +5899,7 @@ Toward Comprehensive Understanding of a Sentiment Based on Human Motives NaokiOtani - EduardHovy + EduardHovy 4672–4677 In sentiment detection, the natural language processing community has focused on determining holders, facets, and valences, but has paid little attention to the reasons for sentiment decisions. Our work considers human motives as the driver for human sentiments and addresses the problem of motive detection as the first step. Following a study in psychology, we define six basic motives that cover a wide range of topics appearing in review texts, annotate 1,600 texts in restaurant and laptop domains with the motives, and report the performance of baseline methods on this new dataset. We also show that cross-domain transfer learning boosts detection performance, which indicates that these universal motives exist across different domains. P19-1461 @@ -5974,7 +5974,7 @@ Neural Network Alignment for Sentential Paraphrases JessicaOuyang - KathyMcKeown + KathyMcKeown 4724–4735 We present a monolingual alignment system for long, sentence- or clause-level alignments, and demonstrate that systems designed for word- or short phrase-based alignment are ill-suited for these longer alignments. Our system is capable of aligning semantically similar spans of arbitrary length. We achieve significantly higher recall on aligning phrases of four or more words and outperform state-of-the- art aligners on the long alignments in the MSR RTE corpus. P19-1467 @@ -5984,9 +5984,9 @@ Duality of Link Prediction and Entailment Graph Induction Mohammad JavadHosseini - Shay B.Cohen + Shay B.Cohen MarkJohnson - MarkSteedman + MarkSteedman 4736–4746 Link prediction and entailment graph induction are often treated as different problems. In this paper, we show that these two problems are actually complementary. We train a link prediction model on a knowledge graph of assertions extracted from raw text. We propose an entailment score that exploits the new facts discovered by the link prediction model, and then form entailment graphs between relations. We further use the learned entailments to predict improved link prediction scores. Our results show that the two tasks can benefit from each other. The new entailment score outperforms prior state-of-the-art results on a standard entialment dataset and the new link prediction scores show improvements over the raw link prediction scores. P19-1468 @@ -6021,7 +6021,7 @@ Detecting Subevents using Discourse and Narrative Features MohammedAldawsari - MarkFinlayson + MarkFinlayson 4780–4790 Recognizing the internal structure of events is a challenging language processing task of great importance for text understanding. We present a supervised model for automatically identifying when one event is a subevent of another. Building on prior work, we introduce several novel features, in particular discourse and narrative features, that significantly improve upon prior state-of-the-art performance. Error analysis further demonstrates the utility of these features. We evaluate our model on the only two annotated corpora with event hierarchies: HiEve and the Intelligence Community corpus. No prior system has been evaluated on both corpora. Our model outperforms previous systems on both corpora, achieving 0.74 BLANC F1 on the Intelligence Community corpus and 0.70 F1 on the HiEve corpus, respectively a 15 and 5 percentage point improvement over previous models. P19-1471 @@ -6062,7 +6062,7 @@ ShantanuAcharya AlexanderOssa ArneKöhn - ChrisBiemann + ChrisBiemann AlexanderPanchenko 4811–4817 We introduce the use of Poincaré embeddings to improve existing state-of-the-art approaches to domain-specific taxonomy induction from text as a signal for both relocating wrong hyponym terms within a (pre-induced) taxonomy as well as for attaching disconnected terms in a taxonomy. This method substantially improves previous state-of-the-art results on the SemEval-2016 Task 13 on taxonomy extraction. We demonstrate the superiority of Poincaré embeddings over distributional semantic representations, supporting the hypothesis that they can better capture hierarchical lexical-semantic relationships than embeddings in the Euclidean space. @@ -6173,10 +6173,10 @@ Handling Divergent Reference Texts when Evaluating Table-to-Text Generation BhuwanDhingra ManaalFaruqui - AnkurParikh + AnkurParikh Ming-WeiChang DipanjanDas - WilliamCohen + WilliamCohen 4884–4895 Automatically constructed datasets for generating text from semi-structured data (tables), such as WikiBio, often contain reference texts that diverge from the information in the corresponding semi-structured data. We show that metrics which rely solely on the reference texts, such as BLEU and ROUGE, show poor correlation with human judgments when those references diverge. We propose a new metric, PARENT, which aligns n-grams from the reference and generated texts to the semi-structured data before computing their precision and recall. Through a large scale human evaluation study of table-to-text models for WikiBio, we show that PARENT correlates with human judgments better than existing text generation metrics. We also adapt and evaluate the information extraction based evaluation proposed by Wiseman et al (2017), and show that PARENT has comparable correlation to it, while being easier to use. We show that PARENT is also applicable when the reference texts are elicited from humans using the data from the WebNLG challenge. P19-1483 @@ -6212,7 +6212,7 @@ Simple and Effective Curriculum Pointer-Generator Networks for Reading Comprehension over Long Narratives YiTay ShuohangWang - Anh TuanLuu + Anh TuanLuu JieFu Minh C.Phan XingdiYuan @@ -6255,7 +6255,7 @@ A Resource-Free Evaluation Metric for Cross-Lingual Word Embeddings Based on Graph Modularity YoshinariFujinuma JordanBoyd-Graber - Michael J.Paul + Michael J.Paul 4952–4962 Cross-lingual word embeddings encode the meaning of words from different languages into a shared low-dimensional space. An important requirement for many downstream tasks is that word similarity should be independent of language—i.e., word vectors within one language should not be more similar to each other than to words in another language. We measure this characteristic using modularity, a network measurement that measures the strength of clusters in a graph. Modularity has a moderate to strong correlation with three downstream tasks, even though modularity is based only on the structure of embeddings and does not require any external resources. We show through experiments that modularity can serve as an intrinsic validation metric to improve unsupervised cross-lingual word embeddings, particularly on distant language pairs in low-resource settings. P19-1489 @@ -6266,7 +6266,7 @@ Multilingual and Cross-Lingual Graded Lexical Entailment IvanVulić - Simone PaoloPonzetto + Simone PaoloPonzetto GoranGlavaš 4963–4974 Grounded in cognitive linguistics, graded lexical entailment (GR-LE) is concerned with fine-grained assertions regarding the directional hierarchical relationships between concepts on a continuous scale. In this paper, we present the first work on cross-lingual generalisation of GR-LE relation. Starting from HyperLex, the only available GR-LE dataset in English, we construct new monolingual GR-LE datasets for three other languages, and combine those to create a set of six cross-lingual GR-LE datasets termed CL-HYPERLEX. We next present a novel method dubbed CLEAR (Cross-Lingual Lexical Entailment Attract-Repel) for effectively capturing graded (and binary) LE, both monolingually in different languages as well as across languages (i.e., on CL-HYPERLEX). Coupled with a bilingual dictionary, CLEAR leverages taxonomic LE knowledge in a resource-rich language (e.g., English) and propagates it to other languages. Supported by cross-lingual LE transfer, CLEAR sets competitive baseline performance on three new monolingual GR-LE datasets and six cross-lingual GR-LE datasets. In addition, we show that CLEAR outperforms current state-of-the-art on binary cross-lingual LE detection by a wide margin for diverse language pairs. @@ -6278,11 +6278,11 @@ What Kind of Language Is Hard to Language-Model? - Sabrina J.Mielke + Sabrina J.Mielke RyanCotterell KyleGorman BrianRoark - JasonEisner + JasonEisner 4975–4989 How language-agnostic are current state-of-the-art NLP tools? Are there some types of language that are easier to model with current methods? In prior work (Cotterell et al., 2018) we attempted to address this question for language modeling, and observed that recurrent neural network language models do not perform equally well over all the high-resource European languages found in the Europarl corpus. We speculated that inflectional morphology may be the primary culprit for the discrepancy. In this paper, we extend these earlier experiments to cover 69 languages from 13 language families using a multilingual Bible corpus. Methodologically, we introduce a new paired-sample multiplicative mixed-effects model to obtain language difficulty coefficients from at-least-pairwise parallel corpora. In other words, the model is aware of inter-sentence variation and can handle missing data. Exploiting this model, we show that “translationese” is not any easier to model than natively written language in a fair comparison. Trying to answer the question of what features difficult languages have in common, we try and fail to reproduce our earlier (Cotterell et al., 2018) observation about morphological complexity and instead reveal far simpler statistics of the data that seem to drive complexity in a much larger sample. P19-1491 @@ -6297,9 +6297,9 @@ Analyzing the Limitations of Cross-lingual Word Embedding Mappings AitorOrmazabal MikelArtetxe - GorkaLabaka - AitorSoroa - EnekoAgirre + GorkaLabaka + AitorSoroa + EnekoAgirre 4990–4995 Recent research in cross-lingual word embeddings has almost exclusively focused on offline methods, which independently train word embeddings in different languages and map them to a shared space through linear transformations. While several authors have questioned the underlying isomorphism assumption, which states that word embeddings in different languages have approximately the same structure, it is not clear whether this is an inherent limitation of mapping approaches or a more general issue when learning cross-lingual embeddings. So as to answer this question, we experiment with parallel corpora, which allows us to compare offline mapping to an extension of skip-gram that jointly learns both embedding spaces. We observe that, under these ideal conditions, joint learning yields to more isomorphic embeddings, is less sensitive to hubness, and obtains stronger results in bilingual lexicon induction. We thus conclude that current mapping methods do have strong limitations, calling for further research to jointly learn cross-lingual embeddings with a weaker cross-lingual signal. P19-1492 @@ -6322,8 +6322,8 @@ Bilingual Lexicon Induction through Unsupervised Machine Translation MikelArtetxe - GorkaLabaka - EnekoAgirre + GorkaLabaka + EnekoAgirre 5002–5007 A recent research line has obtained strong results on bilingual lexicon induction by aligning independently trained word embeddings in two languages and using the resulting cross-lingual embeddings to induce word translation pairs through nearest neighbor or related retrieval methods. In this paper, we propose an alternative approach to this problem that builds on the recent work on unsupervised machine translation. This way, instead of directly inducing a bilingual lexicon from cross-lingual embeddings, we use them to build a phrase-table, combine it with a language model, and use the resulting machine translation system to generate a synthetic parallel corpus, from which we extract the bilingual lexicon using statistical word alignment techniques. As such, our method can work with any word embedding and cross-lingual mapping technique, and it does not require any additional resource besides the monolingual corpus used to train the embeddings. When evaluated on the exact same cross-lingual embeddings, our proposed method obtains an average improvement of 6 accuracy points over nearest neighbor and 4 points over CSLS retrieval, establishing a new state-of-the-art in the standard MUSE dataset. P19-1494 @@ -6333,7 +6333,7 @@ Automatically Identifying Complaints in Social Media - DanielPreoţiuc-Pietro + DanielPreoţiuc-Pietro MihaelaGaman NikolaosAletras 5008–5019 @@ -6377,7 +6377,7 @@ Tree <fixed-case>LSTM</fixed-case>s with Convolution Units to Predict Stance and Rumor Veracity in Social Media Conversations SumeetKumar - KathleenCarley + KathleenCarley 5047–5058 Learning from social-media conversations has gained significant attention recently because of its applications in areas like rumor detection. In this research, we propose a new way to represent social-media conversations as binarized constituency trees that allows comparing features in source-posts and their replies effectively. Moreover, we propose to use convolution units in Tree LSTMs that are better at learning patterns in features obtained from the source and reply posts. Our Tree LSTM models employ multi-task (stance + rumor) learning and propagate the useful stance signal up in the tree for rumor classification at the root node. The proposed models achieve state-of-the-art performance, outperforming the current best model by 12% and 15% on F1-macro for rumor-veracity classification and stance classification tasks respectively. P19-1498 @@ -6433,7 +6433,7 @@ Simple Unsupervised Summarization by Contextual Matching JiaweiZhou - AlexanderRush + AlexanderRush 5101–5106 We propose an unsupervised method for sentence summarization using only language modeling. The approach employs two language models, one that is generic (i.e. pretrained), and the other that is specific to the target domain. We show that by using a product-of-experts criteria these are enough for maintaining continuous contextual matching while maintaining output fluency. Experiments on both abstractive and extractive sentence summarization data sets show promising results of our method without being exposed to any paired data. P19-1503 @@ -6457,7 +6457,7 @@ Morphological Irregularity Correlates with Frequency ShijieWu RyanCotterell - TimothyO’Donnell + TimothyO’Donnell 5117–5126 We present a study of morphological irregularity. Following recent work, we define an information-theoretic measure of irregularity based on the predictability of forms in a language. Using a neural transduction model, we estimate this quantity for the forms in 28 languages. We first present several validatory and exploratory analyses of irregularity. We then show that our analyses provide evidence for a correlation between irregularity and frequency: higher frequency items are more likely to be irregular and irregular items are more likely be highly frequent. To our knowledge, this result is the first of its breadth and confirms longstanding proposals from the linguistics literature. The correlation is more robust when aggregated at the level of whole paradigms—providing support for models of linguistic structure in which inflected forms are unified by abstract underlying stems or lexemes. P19-1505 @@ -6483,8 +6483,8 @@ Relating Simple Sentence Representations in Deep Neural Networks and the Brain SharmisthaJat HaoTang - ParthaTalukdar - TomMitchell + ParthaTalukdar + TomMitchell 5137–5154 What is the relationship between sentence representations learned by deep recurrent models against those encoded by the brain? Is there any correspondence between hidden layers of these recurrent models and brain regions when processing sentences? Can these deep models be used to synthesize brain data which can then be utilized in other extrinsic tasks? We investigate these questions using sentences with simple syntax and semantics (e.g., The bone was eaten by the dog.). We consider multiple neural network architectures, including recently proposed ELMo and BERT. We use magnetoencephalography (MEG) brain recording data collected from human subjects when they were reading these simple sentences. Overall, we find that BERT’s activations correlate the best with MEG brain data. We also find that the deep network representation can be used to generate brain data from new sentences to augment existing brain data. To the best of our knowledge, this is the first work showing that the MEG brain recording when reading a word in a sentence can be used to distinguish earlier words in the sentence. Our exploration is also the first to use deep neural network representations to generate synthetic brain data and to show that it helps in improving subsequent stimuli decoding task accuracy. P19-1507 @@ -6512,7 +6512,7 @@ EugeneKharitonov AlessandroLazaric EmmanuelDupoux - MarcoBaroni + MarcoBaroni 5166–5175 Sequence-processing neural networks led to remarkable progress on many NLP tasks. As a consequence, there has been increasing interest in understanding to what extent they process language as humans do. We aim here to uncover which biases such models display with respect to “natural” word-order constraints. We train models to communicate about paths in a simple gridworld, using miniature languages that reflect or violate various natural language trends, such as the tendency to avoid redundancy or to minimize long-distance dependencies. We study how the controlled characteristics of our miniature languages affect individual learning and their stability across multiple network generations. The results draw a mixed picture. On the one hand, neural networks show a strong tendency to avoid long-distance dependencies. On the other hand, there is no clear preference for the efficient, non-redundant encoding of information that is widely attested in natural language. We thus suggest inoculating a notion of “effort” into neural networks, as a possible way to make their linguistic behavior more human-like. P19-1509 @@ -6524,11 +6524,11 @@ <fixed-case>NNE</fixed-case>: A Dataset for Nested Named Entity Recognition in <fixed-case>E</fixed-case>nglish Newswire NickyRingland - XiangDai + XiangDai BenHachey SarvnazKarimi - CecileParis - James R.Curran + CecileParis + James R.Curran 5176–5181 Named entity recognition (NER) is widely used in natural language processing applications and downstream tasks. However, most NER tools target flat annotation from popular datasets, eschewing the semantic information available in nested entity mentions. We describe NNE—a fine-grained, nested named entity dataset over the full Wall Street Journal portion of the Penn Treebank (PTB). Our annotation comprises 279,795 mentions of 114 entity types with up to 6 layers of nesting. We hope the public release of this large dataset for English newswire will encourage development of new techniques for nested NER. P19-1510 @@ -6582,9 +6582,9 @@ Scaling up Open Tagging from Tens to Thousands: Comprehension Empowered Attribute Value Extraction from Product Title HuiminXu WentingWang - XinMao + XinMao XinyuJiang - ManLan + ManLan 5214–5223 Supplementing product information by extracting attribute values from title is a crucial task in e-Commerce domain. Previous studies treat each attribute only as an entity type and build one set of NER tags (e.g., BIO) for each of them, leading to a scalability issue which unfits to the large sized attribute system in real world e-Commerce. In this work, we propose a novel approach to support value extraction scaling up to thousands of attributes without losing performance: (1) We propose to regard attribute as a query and adopt only one global set of BIO tags for any attributes to reduce the burden of attribute tag or model explosion; (2) We explicitly model the semantic representations for attribute and title, and develop an attention mechanism to capture the interactive semantic relations in-between to enforce our framework to be attribute comprehensive. We conduct extensive experiments in real-life datasets. The results show that our model not only outperforms existing state-of-the-art NER tagging models, but also is robust and generates promising results for up to 8,906 attributes. P19-1514 @@ -6606,7 +6606,7 @@ ShwetaYadav AsifEkbal SriparnaSaha - PushpakBhattacharyya + PushpakBhattacharyya 5234–5245 The mining of adverse drug reaction (ADR) has a crucial role in the pharmacovigilance. The traditional ways of identifying ADR are reliable but time-consuming, non-scalable and offer a very limited amount of ADR relevant information. With the unprecedented growth of information sources in the forms of social media texts (Twitter, Blogs, Reviews etc.), biomedical literature, and Electronic Medical Records (EMR), it has become crucial to extract the most pertinent ADR related information from these free-form texts. In this paper, we propose a neural network inspired multi- task learning framework that can simultaneously extract ADRs from various sources. We adopt a novel adversarial learning-based approach to learn features across multiple ADR information sources. Unlike the other existing techniques, our approach is capable to extracting fine-grained information (such as ‘Indications’, ‘Symptoms’, ‘Finding’, ‘Disease’, ‘Drug’) which provide important cues in pharmacovigilance. We evaluate our proposed approach on three publicly available real- world benchmark pharmacovigilance datasets, a Twitter dataset from PSB 2016 Social Me- dia Shared Task, CADEC corpus and Medline ADR corpus. Experiments show that our unified framework achieves state-of-the-art performance on individual tasks associated with the different benchmark datasets. This establishes the fact that our proposed approach is generic, which enables it to achieve high performance on the diverse datasets. P19-1516 @@ -6642,8 +6642,8 @@ ChenweiZhang YaliangLi NanDu - WeiFan - PhilipYu + WeiFan + PhilipYu 5259–5267 Being able to recognize words as slots and detect the intent of an utterance has been a keen issue in natural language understanding. The existing works either treat slot filling and intent detection separately in a pipeline manner, or adopt joint models which sequentially label slots while summarizing the utterance-level intent without explicitly preserving the hierarchical relationship among words, slots, and intents. To exploit the semantic hierarchy for effective modeling, we propose a capsule-based neural network model which accomplishes slot filling and intent detection via a dynamic routing-by-agreement schema. A re-routing schema is proposed to further synergize the slot filling performance using the inferred intent representation. Experiments on two real-world datasets show the effectiveness of our model when compared with other alternative model architectures, as well as existing natural language understanding services. P19-1519 @@ -6679,7 +6679,7 @@ DaweiFeng LinboQiao ZhigangKan - DongshengLi + DongshengLi 5284–5294 Traditional approaches to the task of ACE event extraction usually depend on manually annotated data, which is often laborious to create and limited in size. Therefore, in addition to the difficulty of event extraction itself, insufficient training data hinders the learning process as well. To promote event extraction, we first propose an event extraction model to overcome the roles overlap problem by separating the argument prediction in terms of roles. Moreover, to address the problem of insufficient training data, we propose a method to automatically generate labeled data by editing prototypes and screen out generated samples by ranking the quality. Experiments on the ACE2005 dataset demonstrate that our extraction model can surpass most existing extraction methods. Besides, incorporating our generation method exhibits further significant improvement. It obtains new state-of-the-art results on the event extraction task, including pushing the F1 score of trigger classification to 81.1%, and the F1 score of argument classification to 58.9%. P19-1522 @@ -6700,8 +6700,8 @@ Towards Improving Neural Named Entity Recognition with Gazetteers TianyuLiu - Jin-GeYao - Chin-YewLin + Jin-GeYao + Chin-YewLin 5301–5307 Most of the recently proposed neural models for named entity recognition have been purely data-driven, with a strong emphasis on getting rid of the efforts for collecting external resources or designing hand-crafted features. This could increase the chance of overfitting since the models cannot access any supervision signal beyond the small amount of annotated data, limiting their power to generalize beyond the annotated entities. In this work, we show that properly utilizing external gazetteers could benefit segmental neural NER models. We add a simple module on the recently proposed hybrid semi-Markov CRF architecture and observe some promising results. P19-1524 @@ -6712,7 +6712,7 @@ Span-Level Model for Relation Extraction KalpitDixit - YaserAl-Onaizan + YaserAl-Onaizan 5308–5314 Relation Extraction is the task of identifying entity mention spans in raw text and then identifying relations between pairs of the entity mentions. Recent approaches for this span-level task have been token-level models which have inherent limitations. They cannot easily define and implement span-level features, cannot model overlapping entity mentions and have cascading errors due to the use of sequential decoding. To address these concerns, we present a model which directly models all possible spans and performs joint entity mention detection and relation extraction. We report a new state-of-the-art performance of 62.83 F1 (prev best was 60.49) on the ACE2005 dataset. P19-1525 @@ -6734,7 +6734,7 @@ Neural Architectures for Nested <fixed-case>NER</fixed-case> through Linearization JanaStraková MilanStraka - JanHajic + JanHajic 5326–5331 We propose two neural network architectures for nested named entity recognition (NER), a setting in which named entities may overlap and also be labeled with more than one label. We encode the nested labels using a linearized scheme. In our first proposed approach, the nested labels are modeled as multilabels corresponding to the Cartesian product of the nested labels in a standard LSTM-CRF architecture. In the second one, the nested NER is viewed as a sequence-to-sequence problem, in which the input sequence consists of the tokens and output sequence of the labels, using hard attention on the word whose label is being predicted. The proposed methods outperform the nested NER state of the art on four corpora: ACE-2004, ACE-2005, GENIA and Czech CNEC. We also enrich our architectures with the recently published contextual embeddings: ELMo, BERT and Flair, reaching further improvements for the four nested entity corpora. In addition, we report flat NER state-of-the-art results for CoNLL-2002 Dutch and Spanish and for CoNLL-2003 English. P19-1527 @@ -6768,7 +6768,7 @@ <fixed-case>PTB</fixed-case> Graph Parsing with Tree Approximation - YoshihideKato + YoshihideKato ShigekiMatsubara 5344–5349 The Penn Treebank (PTB) represents syntactic structures as graphs due to nonlocal dependencies. This paper proposes a method that approximates PTB graph-structured representations by trees. By our approximation method, we can reduce nonlocal dependency identification and constituency parsing into single tree-based parsing. An experimental result demonstrates that our approximation method with an off-the-shelf tree-based constituency parser significantly outperforms the previous methods in nonlocal dependency identification. @@ -6888,7 +6888,7 @@ ChrisBrockett XiaodongLiu XiangGao - BillDolan + BillDolan YejinChoi JianfengGao 5427–5436 @@ -6902,7 +6902,7 @@ HardikChauhan MauajamaFirdaus AsifEkbal - PushpakBhattacharyya + PushpakBhattacharyya 5437–5447 Multimodal dialogue systems have opened new frontiers in the traditional goal-oriented dialogue systems. The state-of-the-art dialogue systems are primarily based on unimodal sources, predominantly the text, and hence cannot capture the information present in the other sources such as videos, audios, images etc. With the availability of large scale multimodal dialogue dataset (MMD) (Saha et al., 2018) on the fashion domain, the visual appearance of the products is essential for understanding the intention of the user. Without capturing the information from both the text and image, the system will be incapable of generating correct and desirable responses. In this paper, we propose a novel position and attribute aware attention mechanism to learn enhanced image representation conditioned on the user utterance. Our evaluation shows that the proposed model can generate appropriate responses while preserving the position and attribute information. Experimental results also prove that our proposed approach attains superior performance compared to the baseline models, and outperforms the state-of-the-art approaches on text similarity based evaluation metrics. P19-1540 @@ -6914,7 +6914,7 @@ HeBai YuZhou JiajunZhang - ChengqingZong + ChengqingZong 5448–5453 Dialogue contexts are proven helpful in the spoken language understanding (SLU) system and they are typically encoded with explicit memory representations. However, most of the previous models learn the context memory with only one objective to maximizing the SLU performance, leaving the context memory under-exploited. In this paper, we propose a new dialogue logistic inference (DLI) task to consolidate the context memory jointly with SLU in the multi-task framework. DLI is defined as sorting a shuffled dialogue session into its original logical order and shares the same memory encoder and retrieval mechanism as the SLU model. Our experimental results show that various popular contextual SLU models can benefit from our approach, and improvements are quite impressive, especially in slot filling. P19-1541 @@ -6937,7 +6937,7 @@ Reading Turn by Turn: Hierarchical Attention Architecture for Spoken Dialogue Comprehension ZhengyuanLiu - NancyChen + NancyChen 5460–5466 Comprehending multi-turn spoken conversations is an emerging research area, presenting challenges different from reading comprehension of passages due to the interactive nature of information exchange from at least two speakers. Unlike passages, where sentences are often the default semantic modeling unit, in multi-turn conversations, a turn is a topically coherent unit embodied with immediately relevant context, making it a linguistically intuitive segment for computationally modeling verbal interactions. Therefore, in this work, we propose a hierarchical attention neural network architecture, combining turn-level and word-level attention mechanisms, to improve spoken dialogue comprehension performance. Experiments are conducted on a multi-turn conversation dataset, where nurses inquire and discuss symptom information with patients. We empirically show that the proposed approach outperforms standard attention baselines, achieves more efficient learning outcomes, and is more robust to lengthy and out-of-distribution test samples. P19-1543 @@ -6984,7 +6984,7 @@ DarshShah RaghavGupta AmirFayazi - DilekHakkani-Tur + DilekHakkani-Tur 5484–5490 Task-oriented dialog systems increasingly rely on deep learning-based slot filling models, usually needing extensive labeled training data for target domains. Often, however, little to no target domain training data may be available, or the training and target domain schemas may be misaligned, as is common for web forms on similar websites. Prior zero-shot slot filling models use slot descriptions to learn concepts, but are not robust to misaligned schemas. We propose utilizing both the slot description and a small number of examples of slot values, which may be easily available, to learn semantic representations of slots which are transferable across domains and robust to misaligned schemas. Our approach outperforms state-of-the-art models on two multi-domain datasets, especially in the low-data setting. P19-1547 @@ -7036,7 +7036,7 @@ Neural-based <fixed-case>C</fixed-case>hinese Idiom Recommendation for Enhancing Elegance in Essay Writing YuanchaoLiu BoPang - BingquanLiu + BingquanLiu 5522–5526 Although the proper use of idioms can enhance the elegance of writing, the active use of various expressions is a challenge because remembering idioms is difficult. In this study, we address the problem of idiom recommendation by leveraging a neural machine translation framework, in which we suppose that idioms are written with one pseudo target language. Two types of real-life datasets are collected to support this study. Experimental results show that the proposed approach achieves promising performance compared with other baseline methods. P19-1552 @@ -7070,7 +7070,7 @@ LijunWu YingceXia TaoQin - XueqiCheng + XueqiCheng WengangZhou Tie-YanLiu 5539–5544 @@ -7083,7 +7083,7 @@ Reversing Gradients in Adversarial Domain Adaptation for Question Deduplication and Textual Entailment Tasks AnushKamath SparshGupta - VitorCarvalho + VitorCarvalho 5545–5550 Adversarial domain adaptation has been recently proposed as an effective technique for textual matching tasks, such as question deduplication. Here we investigate the use of gradient reversal on adversarial domain adaptation to explicitly learn both shared and unshared (domain specific) representations between two textual domains. In doing so, gradient reversal learns features that explicitly compensate for domain mismatch, while still distilling domain specific knowledge that can improve target domain accuracy. We evaluate reversing gradients for adversarial adaptation on multiple domains, and demonstrate that it significantly outperforms other methods on question deduplication as well as on recognizing textual entailment (RTE) tasks, achieving up to 7% absolute boost in base model accuracy on some datasets. P19-1556 @@ -7145,7 +7145,7 @@ Combating Adversarial Misspellings with Robust Word Recognition DanishPruthi BhuwanDhingra - Zachary C.Lipton + Zachary C.Lipton 5582–5591 To combat adversarial spelling mistakes, we propose placing a word recognition model in front of the downstream classifier. Our word recognition models build upon the RNN semi-character architecture, introducing several new backoff strategies for handling rare and unseen words. Trained to recognize words corrupted by random adds, drops, swaps, and keyboard mistakes, our method achieves 32% relative (and 3.3% absolute) error reduction over the vanilla semi-character model. Notably, our pipeline confers robustness on the downstream classifier, outperforming both adversarial training and off-the-shelf spell checkers. Against a BERT model fine-tuned for sentiment analysis, a single adversarially-chosen character attack lowers accuracy from 90.3% to 45.8%. Our defense restores accuracy to 75%. Surprisingly, better word recognition does not always entail greater robustness. Our analysis reveals that robustness also depends upon a quantity that we denote the sensitivity. P19-1561 @@ -7156,7 +7156,7 @@ An Empirical Investigation of Structured Output Modeling for Graph-based Neural Dependency Parsing ZhisongZhang XuezheMa - EduardHovy + EduardHovy 5592–5598 In this paper, we investigate the aspect of structured output modeling for the state-of-the-art graph-based neural dependency parser (Dozat and Manning, 2017). With evaluations on 14 treebanks, we empirically show that global output-structured models can generally obtain better performance, especially on the metric of sentence-level Complete Match. However, probably because neural models already learn good global views of the inputs, the improvement brought by structured output modeling is modest. P19-1562 @@ -7184,7 +7184,7 @@ Multimodal Transformer Networks for End-to-End Video-Grounded Dialogue Systems HungLe DoyenSahoo - NancyChen + NancyChen StevenHoi 5612–5623 Developing Video-Grounded Dialogue Systems (VGDS), where a dialogue is conducted based on visual and audio aspects of a given video, is significantly more challenging than traditional image or text-grounded dialogue systems because (1) feature space of videos span across multiple picture frames, making it difficult to obtain semantic information; and (2) a dialogue agent must perceive and process information from different modalities (audio, video, caption, etc.) to obtain a comprehensive understanding. Most existing work is based on RNNs and sequence-to-sequence architectures, which are not very effective for capturing complex long-term dependencies (like in videos). To overcome this, we propose Multimodal Transformer Networks (MTN) to encode videos and incorporate information from different modalities. We also propose query-aware attention through an auto-encoder to extract query-aware features from non-text modalities. We develop a training procedure to simulate token-level decoding to improve the quality of generated responses during inference. We get state of the art performance on Dialogue System Technology Challenge 7 (DSTC7). Our model also generalizes to another multimodal visual-grounded dialogue task, and obtains promising performance. @@ -7199,7 +7199,7 @@ TianchengZhao ChenyanXiong XiaodanLiang - EricXing + EricXing ZhitingHu 5624–5634 Many real-world open-domain conversation applications have specific goals to achieve during open-ended chats, such as recommendation, psychotherapy, education, etc. We study the problem of imposing conversational goals on open-domain chat agents. In particular, we want a conversational system to chat naturally with human and proactively guide the conversation to a designated target subject. The problem is challenging as no public data is available for learning such a target-guided strategy. We propose a structured approach that introduces coarse-grained keywords to control the intended content of system responses. We then attain smooth conversation transition through turn-level supervised learning, and drive the conversation towards the target with discourse-level constraints. We further derive a keyword-augmented conversation dataset for the study. Quantitative and human evaluations show our system can produce meaningful and effective conversations, significantly improving over other approaches @@ -7243,7 +7243,7 @@ SawanKumar SharmisthaJat KaranSaxena - ParthaTalukdar + ParthaTalukdar 5670–5681 Word Sense Disambiguation (WSD) is a long-standing but open problem in Natural Language Processing (NLP). WSD corpora are typically small in size, owing to an expensive annotation process. Current supervised WSD methods treat senses as discrete labels and also resort to predicting the Most-Frequent-Sense (MFS) for words unseen during training. This leads to poor performance on rare and unseen senses. To overcome this challenge, we propose Extended WSD Incorporating Sense Embeddings (EWISE), a supervised model to perform WSD by predicting over a continuous sense embedding space as opposed to a discrete label space. This allows EWISE to generalize over both seen and unseen senses, thus achieving generalized zero-shot learning. To obtain target sense embeddings, EWISE utilizes sense definitions. EWISE learns a novel sentence encoder for sense definitions by using WordNet relations and also ConvE, a recently proposed knowledge graph embedding method. We also compare EWISE against other sentence encoders pretrained on large corpora to generate definition embeddings. EWISE achieves new state-of-the-art WSD performance. P19-1568 @@ -7254,7 +7254,7 @@ Language Modelling Makes Sense: Propagating Representations through <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et for Full-Coverage Word Sense Disambiguation DanielLoureiro - AlípioJorge + AlípioJorge 5682–5691 Contextual embeddings represent a new generation of semantic representations learned from Neural Language Modelling (NLM) that addresses the issue of meaning conflation hampering traditional word embeddings. In this work, we show that contextual embeddings can be used to achieve unprecedented gains in Word Sense Disambiguation (WSD) tasks. Our approach focuses on creating sense-level embeddings with full-coverage of WordNet, and without recourse to explicit knowledge of sense distributions or task-specific modelling. As a result, a simple Nearest Neighbors (k-NN) method using our representations is able to consistently surpass the performance of previous systems using powerful neural sequencing models. We also analyse the robustness of our approach when ignoring part-of-speech and lemma features, requiring disambiguation against the full sense inventory, and revealing shortcomings to be improved. Finally, we explore applications of our sense embeddings for concept-level analyses of contextual embeddings and their respective NLMs. P19-1569 @@ -7317,10 +7317,10 @@ Probing for Semantic Classes: Diagnosing the Meaning Content of Word Embeddings YadollahYaghoobzadeh - KatharinaKann - T. J.Hazen - EnekoAgirre - HinrichSchütze + KatharinaKann + T. J.Hazen + EnekoAgirre + HinrichSchütze 5740–5753 Word embeddings typically represent different meanings of a word in a single conflated vector. Empirical analysis of embeddings of ambiguous words is currently limited by the small size of manually annotated resources and by the fact that word senses are treated as unrelated individual concepts. We present a large dataset based on manual Wikipedia annotations and word senses, where word senses from different words are related by semantic classes. This is the basis for novel diagnostic tests for an embedding’s content: we probe word embeddings for semantic classes and analyze the embedding space by classifying embeddings into semantic classes. Our main findings are: (i) Information about a sense is generally represented well in a single-vector embedding – if the sense is frequent. (ii) A classifier can accurately predict whether a word is single-sense or multi-sense, based only on its embedding. (iii) Although rare senses are not well represented in single-vector embeddings, this does not have negative impact on an NLP application whose performance depends on frequent senses. P19-1574 @@ -7332,7 +7332,7 @@ Deep Neural Model Inspection and Comparison via Functional Neuron Pathways JamesFiacco SamridhiChoudhary - CarolynRose + CarolynRose 5754–5764 We introduce a general method for the interpretation and comparison of neural models. The method is used to factor a complex neural model into its functional components, which are comprised of sets of co-firing neurons that cut across layers of the network architecture, and which we call neural pathways. The function of these pathways can be understood by identifying correlated task level and linguistic heuristics in such a way that this knowledge acts as a lens for approximating what the network has learned to apply to its intended task. As a case study for investigating the utility of these pathways, we present an examination of pathways identified in models trained for two standard tasks, namely Named Entity Recognition and Recognizing Textual Entailment. P19-1575 @@ -7342,7 +7342,7 @@ Collocation Classification with Unsupervised Relation Vectors - LuisEspinosa Anke + LuisEspinosa Anke StevenSchockaert LeoWanner 5765–5772 @@ -7354,7 +7354,7 @@ Corpus-based Check-up for Thesaurus - NataliaLoukachevitch + NataliaLoukachevitch 5773–5779 In this paper we discuss the usefulness of applying a checking procedure to existing thesauri. The procedure is based on the analysis of discrepancies of corpus-based and thesaurus-based word similarities. We applied the procedure to more than 30 thousand words of the Russian wordnet and found some serious errors in word sense description, including inaccurate relationships and missing senses of ambiguous words. P19-1577 @@ -7405,7 +7405,7 @@ Better <fixed-case>OOV</fixed-case> Translation with Bilingual Terminology Mining MatthiasHuck ViktorHangya - AlexanderFraser + AlexanderFraser 5809–5815 Unseen words, also called out-of-vocabulary words (OOVs), are difficult for machine translation. In neural machine translation, byte-pair encoding can be used to represent OOVs, but they are still often incorrectly translated. We improve the translation of OOVs in NMT using easy-to-obtain monolingual data. We look for OOVs in the text to be translated and translate them using simple-to-construct bilingual word embeddings (BWEs). In our MT experiments we take the 5-best candidates, which is motivated by intrinsic mining experiments. Using all five of the proposed target language words as queries we mine target-language sentences. We then back-translate, forcing the back-translation of each of the five proposed target-language OOV-translation-candidates to be the original source-language OOV. We show that by using this synthetic data to fine-tune our system the translation of OOVs can be dramatically improved. In our experiments we use a system trained on Europarl and mine sentences containing medical terms from monolingual data. P19-1581 @@ -7442,7 +7442,7 @@ MaxFriedrich ArneKöhn GregorWiedemann - ChrisBiemann + ChrisBiemann 5829–5839 De-identification is the task of detecting protected health information (PHI) in medical text. It is a critical step in sanitizing electronic health records (EHR) to be shared for research. Automatic de-identification classifiers can significantly speed up the sanitization process. However, obtaining a large and diverse dataset to train such a classifier that works well across many types of medical text poses a challenge as privacy laws prohibit the sharing of raw medical records. We introduce a method to create privacy-preserving shareable representations of medical text (i.e. they contain no PHI) that does not require expensive manual pseudonymization. These representations can be shared between organizations to create unified datasets for training de-identification models. Our representation allows training a simple LSTM-CRF de-identification model to an F1 score of 97.4%, which is comparable to a strong baseline that exposes private information in its representation. A robust, widely available de-identification classifier based on our representation could potentially enable studies for which de-identification would otherwise be too costly. P19-1584 @@ -7517,7 +7517,7 @@ SuchinGururangan TamDang DallasCard - Noah A.Smith + Noah A.Smith 5880–5894 We introduce VAMPIRE, a lightweight pretraining framework for effective text classification when data and computing resources are limited. We pretrain a unigram document model as a variational autoencoder on in-domain, unlabeled data and use its internal states as features in a downstream classifier. Empirically, we show the relative strength of VAMPIRE against computationally expensive contextual embeddings and other popular semi-supervised baselines under low resource settings. We also find that fine-tuning to in-domain data is crucial to achieving decent performance from contextual embeddings when working with limited supervision. We accompany this paper with code to pretrain and use VAMPIRE embeddings in downstream tasks. P19-1590 @@ -7554,7 +7554,7 @@ The Referential Reader: A Recurrent Entity Network for Anaphora Resolution FeiLiu - LukeZettlemoyer + LukeZettlemoyer JacobEisenstein 5918–5925 We present a new architecture for storing and accessing entity mentions during online text processing. While reading the text, entity references are identified, and may be stored by either updating or overwriting a cell in a fixed-length memory. The update operation implies coreference with the other mentions that are stored in the same cell; the overwrite operation causes these mentions to be forgotten. By encoding the memory operations as differentiable gates, it is possible to train the model end-to-end, using both a supervised anaphora resolution objective as well as a supplementary language modeling objective. Evaluation on a dataset of pronoun-name anaphora demonstrates strong performance with purely incremental text processing. @@ -7577,10 +7577,10 @@ <fixed-case>BAM</fixed-case>! Born-Again Multi-Task Networks for Natural Language Understanding KevinClark - Minh-ThangLuong + Minh-ThangLuong UrvashiKhandelwal - Christopher D.Manning - Quoc V.Le + Christopher D.Manning + Quoc V.Le 5931–5937 It can be challenging to train multi-task neural networks that outperform or even match their single-task counterparts. To help address this, we propose using knowledge distillation where single-task models teach a multi-task model. We enhance this training with teacher annealing, a novel method that gradually transitions the model from distillation to supervised learning, helping the multi-task model surpass its single-task teachers. We evaluate our approach by multi-task fine-tuning BERT on the GLUE benchmark. Our method consistently improves over standard single-task and multi-task training. P19-1595 @@ -7593,7 +7593,7 @@ ShereenOraby VrindavanHarrison AbteenEbrahimi - MarilynWalker + MarilynWalker 5938–5951 Neural natural language generation (NNLG) from structured meaning representations has become increasingly popular in recent years. While we have seen progress with generating syntactically correct utterances that preserve semantics, various shortcomings of NNLG systems are clear: new tasks require new training data which is not available or straightforward to acquire, and model outputs are simple and may be dull and repetitive. This paper addresses these two critical challenges in NNLG by: (1) scalably (and at no cost) creating training datasets of parallel meaning representations and reference texts with rich style markup by using data from freely available and naturally descriptive user reviews, and (2) systematically exploring how the style markup enables joint control of semantic and stylistic aspects of neural model output. We present YelpNLG, a corpus of 300,000 rich, parallel meaning representations and highly stylistically varied reference texts spanning different restaurant attributes, and describe a novel methodology that can be scalably reused to generate NLG datasets for other domains. The experiments show that the models control important aspects, including lexical choice of adjectives, output length, and sentiment, allowing the models to successfully hit multiple style targets without sacrificing semantics. P19-1596 @@ -7614,8 +7614,8 @@ <fixed-case>B</fixed-case>arack’s Wife Hillary: Using Knowledge Graphs for Fact-Aware Language Modeling RobertLogan - Nelson F.Liu - Matthew E.Peters + Nelson F.Liu + Matthew E.Peters MattGardner SameerSingh 5962–5971 @@ -7642,7 +7642,7 @@ FuliLuo PengchengYang WeiWu - BaobaoChang + BaobaoChang ZhifangSui 5985–5996 The comprehensive descriptions for factual attribute-value tables, which should be accurate, informative and loyal, can be very helpful for end users to understand the structured data in this form. However previous neural generators might suffer from key attributes missing, less informative and groundless information problems, which impede the generation of high-quality comprehensive descriptions for tables. To relieve these problems, we first propose force attention (FA) method to encourage the generator to pay more attention to the uncovered attributes to avoid potential key attributes missing. Furthermore, we propose reinforcement learning for information richness to generate more informative as well as more loyal descriptions for tables. In our experiments, we utilize the widely used WIKIBIO dataset as a benchmark. Besides, we create WB-filter based on WIKIBIO to test our model in the simulated user-oriented scenarios, in which the generated descriptions should accord with particular user interests. Experimental results show that our model outperforms the state-of-the-art baselines on both automatic and human evaluation. @@ -7655,7 +7655,7 @@ NingDai JianzeLiang XipengQiu - XuanjingHuang + XuanjingHuang 5997–6007 Disentangling the content and style in the latent space is prevalent in unpaired text style transfer. However, two major issues exist in most of the current neural models. 1) It is difficult to completely strip the style information from the semantics for a sentence. 2) The recurrent neural network (RNN) based encoder and decoder, mediated by the latent representation, cannot well deal with the issue of the long-term dependency, resulting in poor preservation of non-stylistic semantic content. In this paper, we propose the Style Transformer, which makes no assumption about the latent representation of source sentence and equips the power of attention mechanism in Transformer to achieve better style transfer and better content preservation. P19-1601 @@ -7670,8 +7670,8 @@ LeiLi LiliMou OlgaVechtomova - Xin-yuDai - JiajunChen + Xin-yuDai + JiajunChen 6008–6019 Variational auto-encoders (VAEs) are widely used in natural language generation due to the regularization of the latent space. However, generating sentences from the continuous latent space does not explicitly model the syntactic information. In this paper, we propose to generate sentences from disentangled syntactic and semantic spaces. Our proposed method explicitly models syntactic information in the VAE’s latent space by using the linearized tree sequence, leading to better performance of language generation. Additionally, the advantage of sampling in the disentangled syntactic and semantic latent spaces enables us to perform novel applications, such as the unsupervised paraphrase generation and syntax transfer generation. Experimental results show that our proposed model achieves similar or better performance in various tasks, compared with state-of-the-art related work. P19-1602 @@ -7684,7 +7684,7 @@ DamaiDai PengchengYang TianyuLiu - BaobaoChang + BaobaoChang ZhifangSui XuSun 6020–6026 @@ -7718,8 +7718,8 @@ Storyboarding of Recipes: Grounded Contextual Generation KhyathiChandu - EricNyberg - Alan WBlack + EricNyberg + Alan WBlack 6040–6046 Information need of humans is essentially multimodal in nature, enabling maximum exploitation of situated context. We introduce a dataset for sequential procedural (how-to) text generation from images in cooking domain. The dataset consists of 16,441 cooking recipes with 160,479 photos associated with different steps. We setup a baseline motivated by the best performing model in terms of human evaluation for the Visual Story Telling (ViST) task. In addition, we introduce two models to incorporate high level structure learnt by a Finite State Machine (FSM) in neural sequential generation process by: (1) Scaffolding Structure in Decoder (SSiD) (2) Scaffolding Structure in Loss (SSiL). Our best performing model (SSiL) achieves a METEOR score of 0.31, which is an improvement of 0.6 over the baseline model. We also conducted human evaluation of the generated grounded recipes, which reveal that 61% found that our proposed (SSiL) model is better than the baseline model in terms of overall recipes. We also discuss analysis of the output highlighting key important NLP issues for prospective directions. P19-1606 @@ -7798,7 +7798,7 @@ Multi-hop Reading Comprehension through Question Decomposition and Rescoring SewonMin VictorZhong - LukeZettlemoyer + LukeZettlemoyer HannanehHajishirzi 6097–6109 Multi-hop Reading Comprehension (RC) requires reasoning and aggregation across several paragraphs. We propose a system for multi-hop RC that decomposes a compositional question into simpler sub-questions that can be answered by off-the-shelf single-hop RC models. Since annotations for such decomposition are expensive, we recast subquestion generation as a span prediction problem and show that our method, trained using only 400 labeled examples, generates sub-questions that are as effective as human-authored sub-questions. We also introduce a new global rescoring approach that considers each decomposition (i.e. the sub-questions and their answers) to select the best final answer, greatly improving overall performance. Our experiments on HotpotQA show that this approach achieves the state-of-the-art results, while providing explainable evidence for its decision making in the form of sub-questions. @@ -7838,7 +7838,7 @@ ZaixiangZheng JianbingZhang XiaohuiYan - JiajunChen + JiajunChen 6130–6139 Relation detection is a core step in many natural language process applications including knowledge base question answering. Previous efforts show that single-fact questions could be answered with high accuracy. However, one critical problem is that current approaches only get high accuracy for questions whose relations have been seen in the training data. But for unseen relations, the performance will drop rapidly. The main reason for this problem is that the representations for unseen relations are missing. In this paper, we propose a simple mapping method, named representation adapter, to learn the representation mapping for both seen and unseen relations based on previously learned relation embedding. We employ the adversarial objective and the reconstruction objective to improve the mapping performance. We re-organize the popular SimpleQuestion dataset to reveal and evaluate the problem of detecting unseen relations. Experiments show that our method can greatly improve the performance of unseen relations while the performance for those seen part is kept comparable to the state-of-the-art. P19-1616 @@ -7852,7 +7852,7 @@ YanruQu HaoZhou LeiLi - WeinanZhang + WeinanZhang YongYu 6140–6150 Text-based question answering (TBQA) has been studied extensively in recent years. Most existing approaches focus on finding the answer to a question within a single paragraph. However, many difficult questions require multiple supporting evidence from scattered text among two or more documents. In this paper, we propose Dynamically Fused Graph Network (DFGN), a novel method to answer those questions requiring multiple scattered evidence and reasoning over them. Inspired by human’s step-by-step reasoning behavior, DFGN includes a dynamic fusion layer that starts from the entities mentioned in the given query, explores along the entity graph dynamically built from the text, and gradually finds relevant supporting entities from the given documents. We evaluate DFGN on HotpotQA, a public TBQA dataset requiring multi-hop reasoning. DFGN achieves competitive results on the public board. Furthermore, our analysis shows DFGN produces interpretable reasoning chains. @@ -7866,7 +7866,7 @@ PasqualeMinervini JannesMünchmeyer UlfLeser - TimRocktäschel + TimRocktäschel 6151–6161 Rule-based models are attractive for various tasks because they inherently lead to interpretable and explainable decisions and can easily incorporate prior knowledge. However, such systems are difficult to apply to problems involving natural language, due to its large linguistic variability. In contrast, neural models can cope very well with ambiguity by learning distributed representations of words and their composition from data, but lead to models that are difficult to interpret. In this paper, we describe a model combining neural networks with logic programming in a novel manner for solving multi-hop reasoning tasks over natural language. Specifically, we propose to use an Prolog prover which we extend to utilize a similarity function over pretrained sentence encoders. We fine-tune the representations for the similarity function via backpropagation. This leads to a system that can apply rule-based reasoning to natural language, and induce domain-specific natural language rules from training data. We evaluate the proposed system on two different question answering tasks, showing that it outperforms two baselines – BiDAF (Seo et al., 2016a) and FastQA( Weissenborn et al., 2017) on a subset of the WikiHop corpus and achieves competitive results on the MedHop data set (Welbl et al., 2017). P19-1618 @@ -7893,7 +7893,7 @@ DanielAndor EmilyPitler JacobDevlin - MichaelCollins + MichaelCollins 6168–6173 We introduce a novel method of generating synthetic question answering corpora by combining models of question generation and answer extraction, and by filtering the results to ensure roundtrip consistency. By pretraining on the resulting corpora we obtain significant improvements on SQuAD2 and NQ, establishing a new state-of-the-art on the latter. Our synthetic data generation models, for both question generation and answer extraction, can be fully reproduced by finetuning a publicly available BERT model on the extractive subsets of SQuAD2 and NQ. We also describe a more powerful variant that does full sequence-to-sequence pretraining for question generation, obtaining exact match and F1 at less than 0.1% and 0.4% from human performance on SQuAD2. P19-1620 @@ -7903,7 +7903,7 @@ Are Red Roses Red? Evaluating Consistency of Question-Answering Models - Marco TulioRibeiro + Marco TulioRibeiro CarlosGuestrin SameerSingh 6174–6184 @@ -7989,7 +7989,7 @@ Discourse Representation Parsing for Sentences and Documents JiangmingLiu - Shay B.Cohen + Shay B.Cohen MirellaLapata 6248–6262 We introduce a novel semantic parsing task based on Discourse Representation Theory (DRT; Kamp and Reyle 1993). Our model operates over Discourse Representation Tree Structures which we formally define for sentences and documents. We present a general framework for parsing discourse structures of arbitrary length and granularity. We achieve this with a neural model equipped with a supervised hierarchical attention mechanism and a linguistically-motivated copy strategy. Experimental results on sentence- and document-level benchmarks show that our model outperforms competitive baselines by a wide margin. @@ -8098,7 +8098,7 @@ Encouraging Paragraph Embeddings to Remember Sentence Identity Improves Classification - TuVu + TuVu MohitIyyer 6331–6338 While paragraph embedding models are remarkably effective for downstream classification tasks, what they learn and encode into a single vector remains opaque. In this paper, we investigate a state-of-the-art paragraph embedding method proposed by Zhang et al. (2017) and discover that it cannot reliably tell whether a given sentence occurs in the input paragraph or not. We formulate a sentence content task to probe for this basic linguistic property and find that even a much simpler bag-of-words method has no trouble solving it. This result motivates us to replace the reconstruction-based objective of Zhang et al. (2017) with our sentence content probe objective in a semi-supervised setting. Despite its simplicity, our objective improves over paragraph reconstruction in terms of (1) downstream classification accuracies on benchmark datasets, (2) faster training, and (3) better generalization ability. @@ -8111,7 +8111,7 @@ A Multi-Task Architecture on Relevance-based Neural Query Translation Sheikh MuhammadSarwar HamedBonab - JamesAllan + JamesAllan 6339–6344 We describe a multi-task learning approach to train a Neural Machine Translation (NMT) model with a Relevance-based Auxiliary Task (RAT) for search query translation. The translation process for Cross-lingual Information Retrieval (CLIR) task is usually treated as a black box and it is performed as an independent step. However, an NMT model trained on sentence-level parallel data is not aware of the vocabulary distribution of the retrieval corpus. We address this problem and propose a multi-task learning architecture that achieves 16% improvement over a strong baseline on Italian-English query-document dataset. We show using both quantitative and qualitative analysis that our model generates balanced and precise translations with the regularization effect it achieves from multi-task learning paradigm. P19-1639 @@ -8159,9 +8159,9 @@ Identifying Visible Actions in Lifestyle Vlogs OanaIgnat - LauraBurdick + LauraBurdick JiaDeng - RadaMihalcea + RadaMihalcea 6406–6417 We consider the task of identifying human actions visible in online videos. We focus on the widely spread genre of lifestyle vlogs, which consist of videos of people performing actions while verbally describing them. Our goal is to identify if actions mentioned in the speech description of a video are visually present. We construct a dataset with crowdsourced manual annotations of visible actions, and introduce a multimodal algorithm that leverages information derived from visual and linguistic clues to automatically infer which actions are visible in a video. P19-1643 @@ -8188,8 +8188,8 @@ Learning to Discover, Ground and Use Words with Segmental Neural Language Models KazuyaKawakami - ChrisDyer - PhilBlunsom + ChrisDyer + PhilBlunsom 6429–6441 We propose a segmental neural language model that combines the generalization power of neural networks with the ability to discover word-like units that are latent in unsegmented character sequences. In contrast to previous segmentation models that treat word segmentation as an isolated task, our model unifies word discovery, learning how words fit together to form sentences, and, by conditioning the model on visual context, how words’ meanings ground in representations of nonlinguistic modalities. Experiments show that the unconditional model learns predictive distributions better than character LSTM models, discovers words competitively with nonparametric Bayesian word segmentation models, and that modeling language conditional on visual context improves performance on both. P19-1645 @@ -8212,7 +8212,7 @@ Symbolic Inductive Bias for Visually Grounded Learning of Spoken Language - GrzegorzChrupała + GrzegorzChrupała 6452–6462 A widespread approach to processing spoken language is to first automatically transcribe it into text. An alternative is to use an end-to-end approach: recent works have proposed to learn semantic embeddings of spoken language from images with spoken captions, without an intermediate transcription step. We propose to use multitask learning to exploit existing transcribed speech within the end-to-end setting. We describe a three-task architecture which combines the objectives of matching spoken captions with corresponding images, speech with text, and text with images. We show that the addition of the speech/text task leads to substantial performance improvements on image retrieval when compared to training the speech/image task in isolation. We conjecture that this is due to a strong inductive bias transcribed speech provides to the model, and offer supporting evidence for this. P19-1647 @@ -8278,7 +8278,7 @@ ZhihaoFan ZhongyuWei SiyuanWang - XuanjingHuang + XuanjingHuang 6514–6524 Image Captioning aims at generating a short description for an image. Existing research usually employs the architecture of CNN-RNN that views the generation as a sequential decision-making process and the entire dataset vocabulary is used as decoding space. They suffer from generating high frequent n-gram with irrelevant words. To tackle this problem, we propose to construct an image-grounded vocabulary, based on which, captions are generated with limitation and guidance. In specific, a novel hierarchical structure is proposed to construct the vocabulary incorporating both visual information and relations among words. For generation, we propose a word-aware RNN cell incorporating vocabulary information into the decoding process directly. Reinforce algorithm is employed to train the generator using constraint vocabulary as action space. Experimental results on MS COCO and Flickr30k show the effectiveness of our framework compared to some state-of-the-art models. P19-1652 @@ -8288,7 +8288,7 @@ Distilling Translations with Visual Awareness JuliaIve - PranavaMadhyastha + PranavaMadhyastha LuciaSpecia 6525–6538 Previous work on multimodal machine translation has shown that visual information is only needed in very specific cases, for example in the presence of ambiguous words where the textual context is not sufficient. As a consequence, models tend to learn to ignore this information. We propose a translate-and-refine approach to this problem where images are only used by a second stage decoder. This approach is trained jointly to generate a good first draft translation and to improve over this draft by (i) making better use of the target language textual context (both left and right-side contexts) and (ii) making use of visual context. This approach leads to the state of the art results. Additionally, we show that it has the ability to recover from erroneous or missing words in the source language. @@ -8298,7 +8298,7 @@ <fixed-case>VIFIDEL</fixed-case>: Evaluating the Visual Fidelity of Image Descriptions - PranavaMadhyastha + PranavaMadhyastha JosiahWang LuciaSpecia 6539–6550 @@ -8340,7 +8340,7 @@ Show, Describe and Conclude: On Exploiting the Structure Information of Chest <fixed-case>X</fixed-case>-ray Reports BaoyuJing ZeyaWang - EricXing + EricXing 6570–6580 Chest X-Ray (CXR) images are commonly used for clinical screening and diagnosis. Automatically writing reports for these images can considerably lighten the workload of radiologists for summarizing descriptive findings and conclusive impressions. The complex structures between and within sections of the reports pose a great challenge to the automatic report generation. Specifically, the section Impression is a diagnostic summarization over the section Findings; and the appearance of normality dominates each section over that of abnormality. Existing studies rarely explore and consider this fundamental structure information. In this work, we propose a novel framework which exploits the structure information between and within report sections for generating CXR imaging reports. First, we propose a two-stage strategy that explicitly models the relationship between Findings and Impression. Second, we design a novel co-operative multi-agent system that implicitly captures the imbalanced distribution between abnormality and normality. Experiments on two CXR report datasets show that our method achieves state-of-the-art performance in terms of various evaluation metrics. Our results expose that the proposed approach is able to generate high-quality medical reports through integrating the structure information. P19-1657 @@ -8352,7 +8352,7 @@ Ting-YaoHsu Chieh-YangHuang Yen-ChiaHsu - Ting-HaoHuang + Ting-HaoHuang 6581–6586 We introduce the first dataset for human edits of machine-generated visual stories and explore how these collected edits may be used for the visual story post-editing task. The dataset ,VIST-Edit, includes 14,905 human-edited versions of 2,981 machine-generated visual stories. The stories were generated by two state-of-the-art visual storytelling models, each aligned to 5 human-edited versions. We establish baselines for the task, showing how a relatively small set of human edits can be leveraged to boost the performance of large visual storytelling models. We also discuss the weak correlation between automatic evaluation scores and human ratings, motivating the need for new automatic metrics. P19-1658 @@ -8450,7 +8450,7 @@ Towards <fixed-case>T</fixed-case>urkish <fixed-case>A</fixed-case>bstract <fixed-case>M</fixed-case>eaning <fixed-case>R</fixed-case>epresentation ZahraAzin - GülşenEryiğit + GülşenEryiğit 43–47 Using rooted, directed and labeled graphs, Abstract Meaning Representation (AMR) abstracts away from syntactic features such as word order and does not annotate every constituent in a sentence. AMR has been specified for English and was not supposed to be an Interlingua. However, several studies strived to overcome divergences in the annotations between English AMRs and those of their target languages by refining the annotation specification. Following this line of research, we have started to build the first Turkish AMR corpus by hand-annotating 100 sentences of the Turkish translation of the novel “The Little Prince” and comparing the results with the English AMRs available for the same corpus. The next step is to prepare the Turkish AMR annotation specification for training future annotators. P19-2006 @@ -8489,7 +8489,7 @@ NinaHosseini-Kivanani Juan CamiloVásquez-Correa ManfredStede - ElmarNöth + ElmarNöth 74–80 Speech deficits are common symptoms amongParkinson’s Disease (PD) patients. The automatic assessment of speech signals is promising for the evaluation of the neurological state and the speech quality of the patients. Recently, progress has been made in applying machine learning and computational methods to automatically evaluate the speech of PD patients. In the present study, we plan to analyze the speech signals of PD patients and healthy control (HC) subjects in three different languages: German, Spanish, and Czech, with the aim to identify biomarkers to discriminate between PD patients and HC subjects and to evaluate the neurological state of the patients. Therefore, the main contribution of this study is the automatic classification of PD patients and HC subjects in different languages with focusing on phonation, articulation, and prosody. We will focus on an intelligibility analysis based on automatic speech recognition systems trained on these three languages. This is one of the first studies done that considers the evaluation of the speech of PD patients in different languages. The purpose of this research proposal is to build a model that can discriminate PD and HC subjects even when the language used for train and test is different. P19-2010 @@ -8498,7 +8498,7 @@ Natural Language Generation: Recently Learned Lessons, Directions for Semantic Representation-based Approaches, and the Case of <fixed-case>B</fixed-case>razilian <fixed-case>P</fixed-case>ortuguese Language - Marco AntonioSobrevilla Cabezudo + Marco AntonioSobrevilla Cabezudo ThiagoPardo 81–88 This paper presents a more recent literature review on Natural Language Generation. In particular, we highlight the efforts for Brazilian Portuguese in order to show the available resources and the existent approaches for this language. We also focus on the approaches for generation from semantic representations (emphasizing the Abstract Meaning Representation formalism) as well as their advantages and limitations, including possible future directions. @@ -8537,7 +8537,7 @@ Paraphrases as Foreign Languages in Multilingual Neural Machine Translation ZhongZhou MatthiasSperber - AlexanderWaibel + AlexanderWaibel 113–122 Paraphrases, rewordings of the same semantic meaning, are useful for improving generalization and translation. Unlike previous works that only explore paraphrases at the word or phrase level, we use different translations of the whole training data that are consistent in structure as paraphrases at the corpus level. We treat paraphrases as foreign languages, tag source sentences with paraphrase labels, and train on parallel paraphrases in the style of multilingual Neural Machine Translation (NMT). Our multi-paraphrase NMT that trains only on two languages outperforms the multilingual baselines. Adding paraphrases improves the rare word translation and increases entropy and diversity in lexical choice. Adding the source paraphrases boosts performance better than adding the target ones, while adding both lifts performance further. We achieve a BLEU score of 57.2 for French-to-English translation using 24 corpus-level paraphrases of the Bible, which outperforms the multilingual baselines and is +34.7 above the single-source single-target NMT baseline. P19-2015 @@ -8558,8 +8558,8 @@ Unsupervised Pretraining for Neural Machine Translation Using Elastic Weight Consolidation - DušanVariš - OndřejBojar + DušanVariš + OndřejBojar 130–135 This work presents our ongoing research of unsupervised pretraining in neural machine translation (NMT). In our method, we initialize the weights of the encoder and decoder with two language models that are trained with monolingual data and then fine-tune the model on parallel data using Elastic Weight Consolidation (EWC) to avoid forgetting of the original language modeling task. We compare the regularization by EWC with the previous work that focuses on regularization by language modeling objectives. The positive result is that using EWC with the decoder achieves BLEU scores similar to the previous work. However, the model converges 2-3 times faster and does not require the original unlabeled training data during the fine-tuning stage. In contrast, the regularization using EWC is less effective if the original and new tasks are not closely related. We show that initializing the bidirectional NMT encoder with a left-to-right language model and forcing the model to remember the original left-to-right language modeling task limits the learning capacity of the encoder for the whole bidirectional context. P19-2017 @@ -8603,7 +8603,7 @@ From Brain Space to Distributional Space: The Perilous Journeys of f<fixed-case>MRI</fixed-case> Decoding GosseMinnema - AurélieHerbelot + AurélieHerbelot 155–161 Recent work in cognitive neuroscience has introduced models for predicting distributional word meaning representations from brain imaging data. Such models have great potential, but the quality of their predictions has not yet been thoroughly evaluated from a computational linguistics point of view. Due to the limited size of available brain imaging datasets, standard quality metrics (e.g. similarity judgments and analogies) cannot be used. Instead, we investigate the use of several alternative measures for evaluating the predicted distributional space against a corpus-derived distributional space. We show that a state-of-the-art decoder, while performing impressively on metrics that are commonly used in cognitive neuroscience, performs unexpectedly poorly on our metrics. To address this, we propose strategies for improving the model’s performance. Despite returning promising results, our experiments also demonstrate that much work remains to be done before distributional representations can reliably be predicted from brain data. P19-2021 @@ -8614,7 +8614,7 @@ Towards Incremental Learning of Word Embeddings Using Context Informativeness AlexandreKabbach KristinaGulordava - AurélieHerbelot + AurélieHerbelot 162–168 In this paper, we investigate the task of learning word embeddings from very sparse data in an incremental, cognitively-plausible way. We focus on the notion of ‘informativeness’, that is, the idea that some content is more valuable to the learning process than other. We further highlight the challenges of online learning and argue that previous systems fall short of implementing incrementality. Concretely, we incorporate informativeness in a previously proposed model of nonce learning, using it for context selection and learning rate modulation. We test our system on the task of learning new words from definitions, as well as on the task of learning new words from potentially uninformative contexts. We demonstrate that informativeness is crucial to obtaining state-of-the-art performance in a truly incremental setup. P19-2022 @@ -8647,7 +8647,7 @@ Vamshi KrishnaSrirangam Appidi AbhinavReddy VinaySingh - ManishShrivastava + ManishShrivastava 183–189 Named Entity Recognition(NER) is one of the important tasks in Natural Language Processing(NLP) and also is a subtask of Information Extraction. In this paper we present our work on NER in Telugu-English code-mixed social media data. Code-Mixing, a progeny of multilingualism is a way in which multilingual people express themselves on social media by using linguistics units from different languages within a sentence or speech context. Entity Extraction from social media data such as tweets(twitter) is in general difficult due to its informal nature, code-mixed data further complicates the problem due to its informal, unstructured and incomplete information. We present a Telugu-English code-mixed corpus with the corresponding named entity tags. The named entities used to tag data are Person(‘Per’), Organization(‘Org’) and Location(‘Loc’). We experimented with the machine learning models Conditional Random Fields(CRFs), Decision Trees and BiLSTMs on our corpus which resulted in a F1-score of 0.96, 0.94 and 0.95 respectively. P19-2025 @@ -8658,7 +8658,7 @@ Joint Learning of Named Entity Recognition and Entity Linking Pedro HenriqueMartins ZitaMarinho - André F. T.Martins + André F. T.Martins 190–196 Named entity recognition (NER) and entity linking (EL) are two fundamentally related tasks, since in order to perform EL, first the mentions to entities have to be detected. However, most entity linking approaches disregard the mention detection part, assuming that the correct mentions have been previously detected. In this paper, we perform joint learning of NER and EL to leverage their relatedness and obtain a more robust and generalisable system. For that, we introduce a model inspired by the Stack-LSTM approach. We observe that, in fact, doing multi-task learning of NER and EL improves the performance in both tasks when comparing with models trained with individual objectives. Furthermore, we achieve results competitive with the state-of-the-art in both NER and EL. P19-2026 @@ -8741,8 +8741,8 @@ From Bilingual to Multilingual Neural Machine Translation by Incremental Training CarlosEscolano - Marta R.Costa-jussà - José A. R.Fonollosa + Marta R.Costa-jussà + José A. R.Fonollosa 236–242 Multilingual Neural Machine Translation approaches are based on the use of task specific models and the addition of one more language can only be done by retraining the whole system. In this work, we propose a new training schedule that allows the system to scale to more languages without modification of the previous components based on joint training and language-independent encoder/decoder modules allowing for zero-shot translation. This work in progress shows close results to state-of-the-art in the WMT task. P19-2033 @@ -8787,7 +8787,7 @@ Normalizing Non-canonical <fixed-case>T</fixed-case>urkish Texts Using Machine Translation Approaches TalhaÇolakoğlu UmutSulubacak - Ahmet CüneydTantuğ + Ahmet CüneydTantuğ 267–272 With the growth of the social web, user-generated text data has reached unprecedented sizes. Non-canonical text normalization provides a way to exploit this as a practical source of training data for language processing systems. The state of the art in Turkish text normalization is composed of a token level pipeline of modules, heavily dependent on external linguistic resources and manually defined rules. Instead, we propose a fully automated, context-aware machine translation approach with fewer stages of processing. Experiments with various implementations of our approach show that we are able to surpass the current best-performing system by a large margin. P19-2037 @@ -8799,7 +8799,7 @@ ArijitGhosh Chowdhury AniketDidolkar RamitSawhney - Rajiv RatnShah + Rajiv RatnShah 273–280 The rapid widespread of social media has lead to some undesirable consequences like the rapid increase of hateful content and offensive language. Religious Hate Speech, in particular, often leads to unrest and sometimes aggravates to violence against people on the basis of their religious affiliations. The richness of the Arabic morphology and the limited available resources makes this task especially challenging. The current state-of-the-art approaches to detect hate speech in Arabic rely entirely on textual (lexical and semantic) cues. Our proposed methodology contends that leveraging Community-Interaction can better help us profile hate speech content on social media. Our proposed ARHNet (Arabic Religious Hate Speech Net) model incorporates both Arabic Word Embeddings and Social Network Graphs for the detection of religious hate speech. P19-2038 @@ -8812,7 +8812,7 @@ RohanMishra Pradyumna PrakharSinha RamitSawhney - Rajiv RatnShah + Rajiv RatnShah 281–287 Analyzing polarities and sentiments inherent in political speeches and debates poses an important problem today. This experiment aims to address this issue by analyzing publicly-available Hansard transcripts of the debates conducted in the UK Parliament. Our proposed approach, which uses community-based graph information to augment hand-crafted features based on topic modeling and emotion detection on debate transcripts, currently surpasses the benchmark results on the same dataset. Such sentiment classification systems could prove to be of great use in today’s politically turbulent times, for public knowledge of politicians’ stands on various relevant issues proves vital for good governance and citizenship. The experiments also demonstrate that continuous feature representations learned from graphs can improve performance on sentiment classification tasks significantly. P19-2039 @@ -8835,7 +8835,7 @@ HichamEl Boukkouri OlivierFerret ThomasLavergne - PierreZweigenbaum + PierreZweigenbaum 295–301 Using pre-trained word embeddings in conjunction with Deep Learning models has become the “de facto” approach in Natural Language Processing (NLP). While this usually yields satisfactory results, off-the-shelf word embeddings tend to perform poorly on texts from specialized domains such as clinical reports. Moreover, training specialized word representations from scratch is often either impossible or ineffective due to the lack of large enough in-domain data. In this work, we focus on the clinical domain for which we study embedding strategies that rely on general-domain resources only. We show that by combining off-the-shelf contextual embeddings (ELMo) with static word2vec embeddings trained on a small in-domain corpus built from the task data, we manage to reach and sometimes outperform representations learned from a large corpus in the medical domain. P19-2041 @@ -8865,7 +8865,7 @@ Improving Neural Entity Disambiguation with Graph Embeddings ÖzgeSevgili AlexanderPanchenko - ChrisBiemann + ChrisBiemann 315–322 Entity Disambiguation (ED) is the task of linking an ambiguous entity mention to a corresponding entry in a knowledge base. Current methods have mostly focused on unstructured text data to learn representations of entities, however, there is structured information in the knowledge base itself that should be useful to disambiguate entities. In this work, we propose a method that uses graph embeddings for integrating structured information from the knowledge base with unstructured information from text-based representations. Our experiments confirm that graph embeddings trained on a graph of hyperlinks between Wikipedia articles improve the performances of simple feed-forward neural ED model and a state-of-the-art neural ED system. P19-2044 @@ -8876,7 +8876,7 @@ Hierarchical Multi-label Classification of Text with Capsule Networks RamiAly SteffenRemus - ChrisBiemann + ChrisBiemann 323–330 Capsule networks have been shown to demonstrate good performance on structured data in the area of visual inference. In this paper we apply and compare simple shallow capsule networks for hierarchical multi-label text classification and show that they can perform superior to other neural networks, such as CNNs and LSTMs, and non-neural network architectures such as SVMs. For our experiments, we use the established Web of Science (WOS) dataset and introduce a new real-world scenario dataset, the BlurbGenreCollection (BGC). Our results confirm the hypothesis that capsule networks are especially advantageous for rare events and structurally diverse categories, which we attribute to their ability to combine latent encoded information. P19-2045 @@ -8886,7 +8886,7 @@ Convolutional Neural Networks for Financial Text Regression NeşatDereli - MuratSaraclar + MuratSaraclar 331–337 Forecasting financial volatility of a publicly-traded company from its annual reports has been previously defined as a text regression problem. Recent studies use a manually labeled lexicon to filter the annual reports by keeping sentiment words only. In order to remove the lexicon dependency without decreasing the performance, we replace bag-of-words model word features by word embedding vectors. Using word vectors increases the number of parameters. Considering the increase in number of parameters and excessive lengths of annual reports, a convolutional neural network model is proposed and transfer learning is applied. Experimental results show that the convolutional neural network model provides more accurate volatility predictions than lexicon based models. P19-2046 @@ -8920,7 +8920,7 @@ Scheduled Sampling for Transformers TsvetomilaMihaylova - André F. T.Martins + André F. T.Martins 351–356 Scheduled sampling is a technique for avoiding one of the known problems in sequence-to-sequence generation: exposure bias. It consists of feeding the model a mix of the teacher forced embeddings and the model predictions from the previous step in training time. The technique has been used for improving model performance with recurrent neural networks (RNN). In the Transformer model, unlike the RNN, the generation of a new word attends to the full sentence generated so far, not only to the last word, and it is not straightforward to apply the scheduled sampling technique. We propose some structural changes to allow scheduled sampling to be applied to Transformer architectures, via a two-pass decoding strategy. Experiments on two language pairs achieve performance close to a teacher-forcing baseline and show that this technique is promising for further exploration. P19-2049 @@ -8930,7 +8930,7 @@ <fixed-case>BREAKING</fixed-case>! Presenting Fake News Corpus for Automated Fact Checking ArchitaPathak - RohiniSrihari + RohiniSrihari 357–362 Popular fake news articles spread faster than mainstream articles on the same topic which renders manual fact checking inefficient. At the same time, creating tools for automatic detection is as challenging due to lack of dataset containing articles which present fake or manipulated stories as compelling facts. In this paper, we introduce manually verified corpus of compelling fake and questionable news articles on the USA politics, containing around 700 articles from Aug-Nov, 2016. We present various analyses on this corpus and finally implement classification model based on linguistic features. This work is still in progress as we plan to extend the dataset in the future and use it for our approach towards automated fake news detection. 10.18653/v1/P19-2050 @@ -8954,7 +8954,7 @@ Yash KumarLal VaibhavKumar MrinalDhar - ManishShrivastava + ManishShrivastava PhilippKoehn 371–377 Code-mixing is the phenomenon of mixing the vocabulary and syntax of multiple languages in the same sentence. It is an increasingly common occurrence in today’s multilingual society and poses a big challenge when encountered in different downstream tasks. In this paper, we present a hybrid architecture for the task of Sentiment Analysis of English-Hindi code-mixed data. Our method consists of three components, each seeking to alleviate different issues. We first generate subword level representations for the sentences using a CNN architecture. The generated representations are used as inputs to a Dual Encoder Network which consists of two different BiLSTMs - the Collective and Specific Encoder. The Collective Encoder captures the overall sentiment of the sentence, while the Specific Encoder utilizes an attention mechanism in order to focus on individual sentiment-bearing sub-words. This, combined with a Feature Network consisting of orthographic features and specially trained word embeddings, achieves state-of-the-art results - 83.54% accuracy and 0.827 F1 score - on a benchmark dataset. @@ -9057,7 +9057,7 @@ Proceedings of the 57th Annual Meeting of the Association for Computational Linguistics: System Demonstrations P19-3 - Marta R.Costa-jussà + Marta R.Costa-jussà EnriqueAlfonseca Association for Computational Linguistics
Florence, Italy
@@ -9096,9 +9096,9 @@ WolfgangJentner FabianSperrle RitaSevastjanova - AnnetteHautli-Janisz + AnnetteHautli-Janisz MiriamButt - DanielKeim + DanielKeim 13–18 We present a modular framework for the rapid-prototyping of linguistic, web-based, visual analytics applications. Our framework gives developers access to a rich set of machine learning and natural language processing steps, through encapsulating them into micro-services and combining them into a computational pipeline. This processing pipeline is auto-configured based on the requirements of the visualization front-end, making the linguistic processing and visualization design, detached independent development tasks. This paper describes the constellation and modality of our framework, which continues to support the efficient development of various human-in-the-loop, linguistic visual analytics research techniques and applications. P19-3003 @@ -9178,7 +9178,7 @@
An adaptable task-oriented dialog system for stand-alone embedded devices - LongDuong + LongDuong Vu Cong DuyHoang Tuyen QuangPham Yu-HengHong @@ -9196,9 +9196,9 @@ <fixed-case>A</fixed-case>lpaca<fixed-case>T</fixed-case>ag: An Active Learning-based Crowd Annotation Framework for Sequence Tagging - Bill YuchenLin + Bill YuchenLin Dong-HoLee - Frank F.Xu + Frank F.Xu OuyuLan XiangRen 58–63 @@ -9209,7 +9209,7 @@ <fixed-case>C</fixed-case>onv<fixed-case>L</fixed-case>ab: Multi-Domain End-to-End Dialog System Platform - SungjinLee + SungjinLee QiZhu RyuichiTakanobu ZhengZhang @@ -9229,12 +9229,12 @@ Demonstration of a Neural Machine Translation System with Online Learning for Translators MiguelDomingo - MercedesGarcía-Martínez + MercedesGarcía-Martínez AmandoEstela Pastor LaurentBié AlexanderHelle - ÁlvaroPeris - FranciscoCasacuberta + ÁlvaroPeris + FranciscoCasacuberta ManuelHerranz Pérez 70–74 We present a demonstration of our system, which implements online learning for neural machine translation in a production environment. These techniques allow the system to continuously learn from the corrections provided by the translators. We implemented an end-to-end platform integrating our machine translation servers to one of the most common user interfaces for professional translators: SDL Trados Studio. We pretend to save post-editing effort as the machine is continuously learning from its mistakes and adapting the models to a specific domain or user style. @@ -9245,7 +9245,7 @@ <fixed-case>FASTD</fixed-case>ial: Abstracting Dialogue Policies for Fast Development of Task Oriented Agents Serra SinemTekiroglu - BernardoMagnini + BernardoMagnini MarcoGuerini 75–80 We present a novel abstraction framework called FASTDial for designing task oriented dialogue agents, built on top of the OpenDial toolkit. This framework is meant to facilitate prototyping and development of dialogue systems from scratch also by non tech savvy especially when limited training data is available. To this end, we use a generic and simple frame-slots data-structure with pre-defined dialogue policies that allows for fast design and implementation at the price of some flexibility reduction. Moreover, it allows for minimizing programming effort and domain expert training time, by hiding away many implementation details. We provide a system demonstration screencast video in the following link: https://vimeo.com/329840716 @@ -9255,8 +9255,8 @@ A Neural, Interactive-predictive System for Multimodal Sequence to Sequence Tasks - ÁlvaroPeris - FranciscoCasacuberta + ÁlvaroPeris + FranciscoCasacuberta 81–86 We present a demonstration of a neural interactive-predictive system for tackling multimodal sequence to sequence tasks. The system generates text predictions to different sequence to sequence tasks: machine translation, image and video captioning. These predictions are revised by a human agent, who introduces corrections in the form of characters. The system reacts to each correction, providing alternative hypotheses, compelling with the feedback provided by the user. The final objective is to reduce the human effort required during this correction process. This system is implemented following a client-server architecture. For accessing the system, we developed a website, which communicates with the neural model, hosted in a local server. From this website, the different tasks can be tackled following the interactive–predictive framework. We open-source all the code developed for building this system. The demonstration in hosted in http://casmacat.prhlt.upv.es/interactive-seq2seq. P19-3014 @@ -9315,12 +9315,12 @@ An Environment for Relational Annotation of Political Debates - AndreBlessing + AndreBlessing NicoBlokker SebastianHaunss JonasKuhn GabriellaLapesa - SebastianPadó + SebastianPadó 105–110 This paper describes the MARDY corpus annotation environment developed for a collaboration between political science and computational linguistics. The tool realizes the complete workflow necessary for annotating a large newspaper text collection with rich information about claims (demands) raised by politicians and other actors, including claim and actor spans, relations, and polarities. In addition to the annotation GUI, the tool supports the identification of relevant documents, text pre-processing, user management, integration of external knowledge bases, annotation comparison and merging, statistical analysis, and the incorporation of machine learning models as “pseudo-annotators”. P19-3018 @@ -9331,7 +9331,7 @@ <fixed-case>GLTR</fixed-case>: Statistical Detection and Visualization of Generated Text SebastianGehrmann HendrikStrobelt - AlexanderRush + AlexanderRush 111–116 The rapid improvement of language models has raised the specter of abuse of text generation systems. This progress motivates the development of simple methods for detecting generated text that can be used by non-experts. In this work, we introduce GLTR, a tool to support humans in detecting whether a text was generated by a model. GLTR applies a suite of baseline statistical methods that can detect generation artifacts across multiple sampling schemes. In a human-subjects study, we show that the annotation scheme provided by GLTR improves the human detection-rate of fake text from 54% to 72% without any prior training. GLTR is open-source and publicly deployed, and has already been widely used to detect generated outputs. P19-3019 @@ -9340,11 +9340,11 @@ <fixed-case>O</fixed-case>pen<fixed-case>K</fixed-case>iwi: An Open Source Framework for Quality Estimation - FabioKepler + FabioKepler JonayTrénous MarcosTreviso MiguelVera - André F. T.Martins + André F. T.Martins 117–122 We introduce OpenKiwi, a Pytorch-based open source framework for translation quality estimation. OpenKiwi supports training and testing of word-level and sentence-level quality estimation systems, implementing the winning systems of the WMT 2015–18 quality estimation campaigns. We benchmark OpenKiwi on two datasets from WMT 2018 (English-German SMT and NMT), yielding state-of-the-art performance on the word-level tasks and near state-of-the-art in the sentence-level tasks. P19-3020 @@ -9365,7 +9365,7 @@ MichelGalley ChrisBrockett TulasiMenon - BillDolan + BillDolan 123–128 The Intelligent Conversation Engine: Code and Pre-trained Systems (Microsoft Icecaps) is an upcoming open-source natural language processing repository. Icecaps wraps TensorFlow functionality in a modular component-based architecture, presenting an intuitive and flexible paradigm for constructing sophisticated learning setups. Capabilities include multitask learning between models with shared parameters, upgraded language model decoding features, a range of built-in architectures, and a user-friendly data processing pipeline. The system is targeted toward conversational tasks, exploring diverse response generation, coherence, and knowledge grounding. Icecaps also provides pre-trained conversational models that can be either used directly or loaded for fine-tuning or bootstrapping other models; these models power an online demo of our framework. P19-3021 @@ -9390,7 +9390,7 @@ YunyaoLi EserKandogan YiweiYang - WalterLasecki + WalterLasecki 135–140 While the role of humans is increasingly recognized in machine learning community, representation of and interaction with models in current human-in-the-loop machine learning (HITL-ML) approaches are too low-level and far-removed from human’s conceptual models. We demonstrate HEIDL, a prototype HITL-ML system that exposes the machine-learned model through high-level, explainable linguistic expressions formed of predicates representing semantic structure of text. In HEIDL, human’s role is elevated from simply evaluating model predictions to interpreting and even updating the model logic directly by enabling interaction with rule predicates themselves. Raising the currency of interaction to such semantic levels calls for new interaction paradigms between humans and machines that result in improved productivity for text analytics model development process. Moreover, by involving humans in the process, the human-machine co-created models generalize better to unseen data as domain experts are able to instill their expertise by extrapolating from what has been learned by automated algorithms from few labelled data. P19-3023 @@ -9403,7 +9403,7 @@ BeataBeigman Klebanov AnastassiaLoukina BinodGyawali - PatrickLange + PatrickLange JohnSabatini MichaelFlor 141–146 @@ -9454,7 +9454,7 @@ XiaodanLiang WanrongZhu DevendraSachan - EricXing + EricXing 159–164 We introduce Texar, an open-source toolkit aiming to support the broad set of text generation tasks that transform any inputs into natural language, such as machine translation, summarization, dialog, content manipulation, and so forth. With the design goals of modularity, versatility, and extensibility in mind, Texar extracts common patterns underlying the diverse tasks and methodologies, creates a library of highly reusable modules and functionalities, and allows arbitrary model architectures and algorithmic paradigms. In Texar, model architecture, inference, and learning processes are properly decomposed. Modules at a high concept level can be freely assembled or plugged in/swapped out. Texar is thus particularly suitable for researchers and practitioners to do fast prototyping and experimentation. The versatile toolkit also fosters technique sharing across different text generation tasks. Texar supports both TensorFlow and PyTorch, and is released under Apache License 2.0 at https://www.texar.io. P19-3027 @@ -9500,7 +9500,7 @@ PhilippHeidenreich AlexanderBondarenko MatthiasHagen - ChrisBiemann + ChrisBiemann AlexanderPanchenko 195–200 We present TARGER, an open source neural argument mining framework for tagging arguments in free input texts and for keyword-based retrieval of arguments from an argument-tagged web-scale corpus. The currently available models are pre-trained on three recent argument mining datasets and enable the use of neural argument mining without any reproducibility effort on the user’s side. The open source code ensures portability to other domains and use cases. @@ -9524,7 +9524,7 @@ Wen-BinHan Jhih-JieChen ChingyuYang - JasonChang + JasonChang 207–212 We introduce a method for generating suggestions on a given sentence for improving the proficiency level. In our approach, the sentence is transformed into a sequence of grammatical elements aimed at providing suggestions of more advanced grammar elements based on originals. The method involves parsing the sentence, identifying grammatical elements, and ranking related elements to recommend a higher level of grammatical element. We present a prototype tutoring system, Level-Up, that applies the method to English learners’ essays in order to assist them in writing and reading. Evaluation on a set of essays shows that our method does assist user in writing. P19-3033 @@ -9541,7 +9541,7 @@ Kai-WenTuan Chung-TingTsai Wen-BinHan - JasonChang + JasonChang 213–218 We introduce a system aimed at improving and expanding second language learners’ English vocabulary. In addition to word definitions, we provide rich lexical information such as collocations and grammar patterns for target words. We present Linggle Booster that takes an article, identifies target vocabulary, provides lexical information, and generates a quiz on target words. Linggle Booster also links named-entity to corresponding Wikipedia pages. Evaluation on a set of target words shows that the method have reasonably good performance in terms of generating useful and information for learning vocabulary. P19-3034 @@ -9553,7 +9553,7 @@ Proceedings of the 57th Annual Meeting of the Association for Computational Linguistics: Tutorial Abstracts P19-4 - PreslavNakov + PreslavNakov AlexisPalmer Association for Computational Linguistics
Florence, Italy
@@ -9567,7 +9567,7 @@ Latent Structure Models for Natural Language Processing - André F. T.Martins + André F. T.Martins TsvetomilaMihaylova NikitaNangia VladNiculae @@ -9582,7 +9582,7 @@ Graph-Based Meaning Representations: Design and Processing AlexanderKoller StephanOepen - WeiweiSun + WeiweiSun 6–11 This tutorial is on representing and processing sentence meaning in the form of labeled directed graphs. The tutorial will (a) briefly review relevant background in formal and linguistic semantics; (b) semi-formally define a unified abstract view on different flavors of semantic graphs and associated terminology; (c) survey common frameworks for graph-based meaning representation and available graph banks; and (d) offer a technical overview of a representative selection of different parsing approaches. P19-4002 @@ -9591,9 +9591,9 @@ Discourse Analysis and Its Applications - ShafiqJoty + ShafiqJoty GiuseppeCarenini - RaymondNg + RaymondNg GabrielMurray 12–17 Discourse processing is a suite of Natural Language Processing (NLP) tasks to uncover linguistic structures from texts at several levels, which can support many downstream applications. This involves identifying the topic structure, the coherence structure, the coreference structure, and the conversation structure for conversational discourse. Taken together, these structures can inform text summarization, machine translation, essay scoring, sentiment analysis, information extraction, question answering, and thread recovery. The tutorial starts with an overview of basic concepts in discourse analysis – monologue vs. conversation, synchronous vs. asynchronous conversation, and key linguistic structures in discourse analysis. We also give an overview of linguistic structures and corresponding discourse analysis tasks that discourse researchers are generally interested in, as well as key applications on which these discourse structures have an impact. @@ -9605,7 +9605,7 @@ Computational Analysis of Political Texts: Bridging Research Efforts Across Communities GoranGlavaš FedericoNanni - Simone PaoloPonzetto + Simone PaoloPonzetto 18–23 In the last twenty years, political scientists started adopting and developing natural language processing (NLP) methods more actively in order to exploit text as an additional source of data in their analyses. Over the last decade the usage of computational methods for analysis of political texts has drastically expanded in scope, allowing for a sustained growth of the text-as-data community in political science. In political science, NLP methods have been extensively used for a number of analyses types and tasks, including inferring policy position of actors from textual evidence, detecting topics in political texts, and analyzing stylistic aspects of political texts (e.g., assessing the role of language ambiguity in framing the political agenda). Just like in numerous other domains, much of the work on computational analysis of political texts has been enabled and facilitated by the development of resources such as, the topically coded electoral programmes (e.g., the Manifesto Corpus) or topically coded legislative texts (e.g., the Comparative Agenda Project). Political scientists created resources and used available NLP methods to process textual data largely in isolation from the NLP community. At the same time, NLP researchers addressed closely related tasks such as election prediction, ideology classification, and stance detection. In other words, these two communities have been largely agnostic of one another, with NLP researchers mostly unaware of interesting applications in political science and political scientists not applying cutting-edge NLP methodology to their problems. The main goal of this tutorial is to systematize and analyze the body of research work on political texts from both communities. We aim to provide a gentle, all-round introduction to methods and tasks related to computational analysis of political texts. Our vision is to bring the two research communities closer to each other and contribute to faster and more significant developments in this interdisciplinary research area. P19-4004 @@ -9614,7 +9614,7 @@ <fixed-case>W</fixed-case>ikipedia as a Resource for Text Analysis and Retrieval - MariusPasca + MariusPasca 24 This tutorial examines the role of Wikipedia in tasks related to text analysis and retrieval. Text analysis tasks, which take advantage of Wikipedia, include coreference resolution, word sense and entity disambiguation and information extraction. In information retrieval, a better understanding of the structure and meaning of queries helps in matching queries against documents, clustering search results, answer and entity retrieval and retrieving knowledge panels for queries asking about popular entities. P19-4005 @@ -9623,7 +9623,7 @@ Deep <fixed-case>B</fixed-case>ayesian Natural Language Processing - Jen-TzungChien + Jen-TzungChien 25–30 This introductory tutorial addresses the advances in deep Bayesian learning for natural language with ubiquitous applications ranging from speech recognition to document summarization, text classification, text segmentation, information extraction, image caption generation, sentence generation, dialogue control, sentiment classification, recommendation system, question answering and machine translation, to name a few. Traditionally, “deep learning” is taken to be a learning process where the inference or optimization is based on the real-valued deterministic model. The “semantic structure” in words, sentences, entities, actions and documents drawn from a large vocabulary may not be well expressed or correctly optimized in mathematical logic or computer programs. The “distribution function” in discrete or continuous latent variable model for natural language may not be properly decomposed or estimated. This tutorial addresses the fundamentals of statistical models and neural networks, and focus on a series of advanced Bayesian models and deep models including hierarchical Dirichlet process, Chinese restaurant process, hierarchical Pitman-Yor process, Indian buffet process, recurrent neural network, long short-term memory, sequence-to-sequence model, variational auto-encoder, generative adversarial network, attention mechanism, memory-augmented neural network, skip neural network, stochastic neural network, predictive state neural network and policy neural network. We present how these models are connected and why they work for a variety of applications on symbolic and complex patterns in natural language. The variational inference and sampling method are formulated to tackle the optimization for complicated models. The word and sentence embeddings, clustering and co-clustering are merged with linguistic and semantic constraints. A series of case studies and domain applications are presented to tackle different issues in deep Bayesian processing, learning and understanding. At last, we will point out a number of directions and outlooks for future studies. P19-4006 @@ -9633,7 +9633,7 @@ Unsupervised Cross-Lingual Representation Learning SebastianRuder - AndersSøgaard + AndersSøgaard IvanVulić 31–38 In this tutorial, we provide a comprehensive survey of the exciting recent work on cutting-edge weakly-supervised and unsupervised cross-lingual word representations. After providing a brief history of supervised cross-lingual word representations, we focus on: 1) how to induce weakly-supervised and unsupervised cross-lingual word representations in truly resource-poor settings where bilingual supervision cannot be guaranteed; 2) critical examinations of different training conditions and requirements under which unsupervised algorithms can and cannot work effectively; 3) more robust methods for distant language pairs that can mitigate instability issues and low performance for distant language pairs; 4) how to comprehensively evaluate such representations; and 5) diverse applications that benefit from cross-lingual word representations (e.g., MT, dialogue, cross-lingual sequence labeling and structured prediction applications, cross-lingual IR). @@ -9644,7 +9644,7 @@ Advances in Argument Mining KatarzynaBudzynska - ChrisReed + ChrisReed 39–42 This course aims to introduce students to an exciting and dynamic area that has witnessed remarkable growth over the past 36 months. Argument mining builds on opinion mining, sentiment analysis and related to tasks to automatically extract not just *what* people think, but *why* they hold the opinions they do. From being largely beyond the state of the art barely five years ago, there are now many hundreds of papers on the topic, millions of dollars of commercial and research investment, and the 6th ACL workshop on the topic will be in Florence in 2019. The tutors have delivered tutorials on argument mining at ACL 2016, at IJCAI 2016 and at ESSLLI 2017; for ACL 2019, we have developed a tutorial that provides a synthesis of the major advances in the area over the past three years. P19-4008 diff --git a/data/xml/P79.xml b/data/xml/P79.xml index 9b8533ef75..b6658aef5a 100644 --- a/data/xml/P79.xml +++ b/data/xml/P79.xml @@ -23,7 +23,7 @@ Towards a Self-Extending Parser - Jaime G.Carbonell + Jaime G.Carbonell 10.3115/982163.982166 3–7 P79-1002 @@ -47,7 +47,7 @@ Toward a Computational Theory of Speech Perception - JonathanAll + JonathanAll 10.3115/982163.982169 17–17 P79-1005 @@ -55,8 +55,8 @@ Ungrammaticality and Extra-Grammaticality in Natural Language Understanding Systems - Stan C.Kwasny - Norman K.Sondheimer + Stan C.Kwasny + Norman K.Sondheimer 10.3115/982163.982170 19–23 P79-1006 @@ -64,7 +64,7 @@ Generalized Augmented Transitiom Network Grammars for Generation From Semantic Networks - Stuart C.Shapiro + Stuart C.Shapiro 10.3115/982163.982171 25–29 P79-1007 @@ -72,7 +72,7 @@ Knowledge Organization and Application: Brief Comments on Papers in the Session - Aravind K.Joshi + Aravind K.Joshi 10.3115/982163.982173 31–31 P79-1008 @@ -105,7 +105,7 @@ A Snapshot of <fixed-case>KDS</fixed-case>: A Knowledge Delivery System James A.Moore - William C.Mann + William C.Mann 10.3115/982163.982177 51–52 P79-1012 @@ -129,7 +129,7 @@ Discourse: Codes and Clues in Contexts - Jane J.Robinson + Jane J.Robinson 10.3115/982163.982181 65–65 P79-1015 @@ -137,7 +137,7 @@ Paraphrasing Using Given and New Information in a Question-Answer System - Kathleen R.McKeown + Kathleen R.McKeown 10.3115/982163.982182 67–72 P79-1016 @@ -153,7 +153,7 @@ The Role Of Focussing in Interpretation of Pronouns - Candace L.Sidner + Candace L.Sidner 10.3115/982163.982184 77–78 P79-1018 @@ -170,7 +170,7 @@ Design for Dialogue Comprehension - William C.Mann + William C.Mann 10.3115/982163.982186 83–84 P79-1020 @@ -178,7 +178,7 @@ Plans, Inference, and Indirect Speech Acts - James F.Allen + James F.Allen 10.3115/982163.982187 85–87 P79-1021 @@ -203,7 +203,7 @@ An Application of Automated Language Understanding Techniques to the Generation of Data Base Elements GeorgetteSilva - ChristineMontgomery + ChristineMontgomery DonDwiggins 10.3115/982163.982191 95–97 @@ -211,7 +211,7 @@ Response Generation in Question - Answering Systems - RalphGrishman + RalphGrishman 10.3115/982163.982192 99–101 P79-1025 @@ -228,7 +228,7 @@ Prospects for Computer-Assisted Dialect Adaption David J.Weber - William C.Mann + William C.Mann 10.3115/982163.982194 109–110 P79-1027 diff --git a/data/xml/P80.xml b/data/xml/P80.xml index 8c6cdbd4e3..cfae10ef5d 100644 --- a/data/xml/P80.xml +++ b/data/xml/P80.xml @@ -23,7 +23,7 @@ Understanding Scene Descriptions as Event Simulations - David L.Waltz + David L.Waltz 10.3115/981436.981439 7–11 P80-1002 @@ -39,7 +39,7 @@ Metaphor - A Key to Extensible Semantic Analysis - Jaime G.Carbonell + Jaime G.Carbonell 10.3115/981436.981441 17–21 P80-1004 @@ -55,7 +55,7 @@ Interactive Discourse: Influence of Problem Context Panel Chair’s Introduction - BarbaraGrosz + BarbaraGrosz 10.3115/981436.981444 25–25 P80-1006 @@ -71,7 +71,7 @@ Signalling the Interpretation of Indirect Speech Acts - Philip R.Cohen + Philip R.Cohen 10.3115/981436.981446 29–30 P80-1008 @@ -79,7 +79,7 @@ Parasession on Topics in Interactive Discourse Influence of the Problem Context - Aravind K.Joshi + Aravind K.Joshi 10.3115/981436.981447 31–33 P80-1009 @@ -104,7 +104,7 @@ Phrase Structure Trees Bear More Fruit Than You Would Have Thought - Aravind K.Joshi + Aravind K.Joshi Leon S.Levy 10.3115/981436.981451 41–42 @@ -121,7 +121,7 @@ Computational Analogues of Constraints on Grammars: A Model of Syntactic Acquisition - Robert CregarBerwick + Robert CregarBerwick 10.3115/981436.981453 49–53 P80-1014 @@ -129,7 +129,7 @@ A Linear-time Model of Language Production: some psychological implications (extended abstract) - David D.McDonald + David D.McDonald 10.3115/981436.981454 55–57 P80-1015 @@ -137,7 +137,7 @@ Problem Solving Applied to Language Generation - Douglas E.Appelt + Douglas E.Appelt 10.3115/981436.981455 59–63 P80-1016 @@ -145,7 +145,7 @@ Interactive Discourse: Influence of the Social Context: Panel Chair’s Introduction - Jerry R.Hobbs + Jerry R.Hobbs 10.3115/981436.981457 65–66 P80-1017 @@ -187,7 +187,7 @@ The Computer as an Active Communication Medium - John C.Thomas + John C.Thomas 10.3115/981436.981462 83–86 P80-1022 @@ -211,7 +211,7 @@ If The Parser Fails - Ralph M.Weischedel + Ralph M.Weischedel John E.Black 10.3115/981436.981466 95–95 @@ -238,7 +238,7 @@ On Parsing Strategies and Closure - KennethChurch + KennethChurch 10.3115/981436.981469 107–111 P80-1028 @@ -273,7 +273,7 @@ Interactive Discourse: Looking to the Future: Panel Chair’s Introduction - Bonnie LynnWebber + Bonnie LynnWebber 10.3115/981436.981474 127–127 P80-1032 @@ -289,7 +289,7 @@ Future Prospects for Computational Linguistics - Gary G.Hendrix + Gary G.Hendrix 10.3115/981436.981476 131–135 P80-1034 @@ -367,7 +367,7 @@ Real Reading Behavior RobertThibadeau - MarcelJust + MarcelJust PatriciaCarpenter 10.3115/981436.981486 159–162 diff --git a/data/xml/P81.xml b/data/xml/P81.xml index e927152db6..6834cb1d4c 100644 --- a/data/xml/P81.xml +++ b/data/xml/P81.xml @@ -23,7 +23,7 @@ Computational Complexity and <fixed-case>L</fixed-case>exical <fixed-case>F</fixed-case>unctional <fixed-case>G</fixed-case>rammar - Robert C.Berwick + Robert C.Berwick 10.3115/981923.981926 7–12 P81-1002 @@ -31,7 +31,7 @@ Corepresentational Grammar and Parsing <fixed-case>E</fixed-case>nglish Comparatives - KarenRyan + KarenRyan 10.3115/981923.981927 13–18 P81-1003 @@ -48,7 +48,7 @@ <fixed-case>PHONY</fixed-case>: A Heuristic Phonological Analyzer - Lee A.Becker + Lee A.Becker 10.3115/981923.981929 23–27 P81-1005 @@ -56,7 +56,7 @@ Evaluation of Natural Language Interfaces to Database Systems: A Panel Discussion - Norman K.Sondheimer + Norman K.Sondheimer 10.3115/981923.981931 29–29 P81-1006 @@ -72,7 +72,7 @@ Selective Planning of Interface Evaluations - William C.Mann + William C.Mann 10.3115/981923.981933 33–34 P81-1008 @@ -88,7 +88,7 @@ What Makes Evaluation Hard? - HarryTennant + HarryTennant 10.3115/981923.981935 37–38 P81-1010 @@ -104,7 +104,7 @@ Two Discourse Generators - William C.Mann + William C.Mann 10.3115/981923.981938 43–47 P81-1012 @@ -112,7 +112,7 @@ A Grammar and a Lexicon for a Text-Production System - Christian M.I.M.Matthiessen + Christian M.I.M.Matthiessen 10.3115/981923.981939 49–55 P81-1013 @@ -120,7 +120,7 @@ Language Production: the Source of the Dictionary - David D.McDonald + David D.McDonald 10.3115/981923.981940 57–62 P81-1014 @@ -144,7 +144,7 @@ What’s Necessary to Hide?: Modeling Action Verbs - James F.Allen + James F.Allen 10.3115/981923.981944 77–81 P81-1017 @@ -152,7 +152,7 @@ A Rule-based Conversation Participant - Robert E.Frederking + Robert E.Frederking 10.3115/981923.981945 83–87 P81-1018 @@ -168,7 +168,7 @@ Perspectives on Parsing Issues - Jane J.Robinson + Jane J.Robinson 10.3115/981923.981948 95–95 P81-1020 @@ -176,8 +176,8 @@ Some Issues in Parsing and Natural Language Understanding - Robert J.Bobrow - Bonnie L.Webber + Robert J.Bobrow + Bonnie L.Webber 10.3115/981923.981949 97–99 P81-1021 @@ -185,7 +185,7 @@ Parsing - RalphGrishman + RalphGrishman 10.3115/981923.981950 101–101 P81-1022 @@ -193,7 +193,7 @@ A View of Parsing - Ronald M.Kaplan + Ronald M.Kaplan 10.3115/981923.981951 103–104 P81-1023 @@ -209,7 +209,7 @@ Presupposition and Implicature in Model-Theoretic Pragmatics - Douglas B.Moran + Douglas B.Moran 10.3115/981923.981954 107–108 P81-1025 @@ -225,7 +225,7 @@ A Situation Semantics Approach to the Analysis of Speech Acts - David AndreoffEvans + David AndreoffEvans 10.3115/981923.981956 113–116 P81-1027 @@ -233,7 +233,7 @@ Problems in Logical Form - Robert C.Moore + Robert C.Moore 10.3115/981923.981957 117–124 P81-1028 @@ -241,7 +241,7 @@ A Case for Rule-Driven Semantic Processing - MarthaPalmer + MarthaPalmer 10.3115/981923.981958 125–131 P81-1029 @@ -266,7 +266,7 @@ Dynamic Strategy Selection in Flexible Parsing - Jaime G.Carbonell + Jaime G.Carbonell Philip J.Hayes 10.3115/981923.981962 143–147 @@ -283,8 +283,8 @@ Controlled Transformational Sentence Generation - MadeleineBates - RobertIngria + MadeleineBates + RobertIngria 10.3115/981923.981964 153–158 P81-1034 @@ -292,7 +292,7 @@ Transportable Natural-Language Interfaces to Databases - Gary G.Hendrix + Gary G.Hendrix William H.Lewis 10.3115/981923.981965 159–165 @@ -301,7 +301,7 @@ Chart Parsing and Rule Schemata in <fixed-case>PSG</fixed-case> - HenryThompson + HenryThompson 10.3115/981923.981966 167–172 P81-1036 diff --git a/data/xml/P82.xml b/data/xml/P82.xml index 0ff858a81c..14f4a6f4de 100644 --- a/data/xml/P82.xml +++ b/data/xml/P82.xml @@ -15,8 +15,8 @@ Translating <fixed-case>E</fixed-case>nglish Into Logical Form - Stanley J.Rosenschein - Stuart M.Shieber + Stanley J.Rosenschein + Stuart M.Shieber 10.3115/981251.981253 1–8 P82-1001 @@ -24,7 +24,7 @@ Linguistic and Computational Semantics - Brian CantwellSmith + Brian CantwellSmith 10.3115/981251.981254 9–15 P82-1002 @@ -32,7 +32,7 @@ The Representation of Inconsistent Information in a Dynamic Model-Theoretic Semantics - Douglas B.Moran + Douglas B.Moran 10.3115/981251.981255 16–18 P82-1003 @@ -40,7 +40,7 @@ What’s in a Semantic Network? - James F.Allen + James F.Allen Alan M.Frisch 10.3115/981251.981256 19–27 @@ -49,7 +49,7 @@ Dependencies of Discourse Structure on the Modality of Communication: Telephone vs. Teletype - Philip R.Cohen + Philip R.Cohen ScottFertig KathyStarr 10.3115/981251.981258 @@ -67,7 +67,7 @@ Natural-Language Access to Databases–Theoretical/Technical Issues - Robert C.Moore + Robert C.Moore 10.3115/981251.981261 44–45 P82-1007 @@ -75,7 +75,7 @@ Transportable Natural-Language Interfaces: Problems and Techniques - Barbara J.Grosz + Barbara J.Grosz 10.3115/981251.981262 46–50 P82-1008 @@ -91,7 +91,7 @@ <fixed-case>E</fixed-case>nglish Words and Data Bases: How to Bridge the Gap - Remko J. H.Scha + Remko J. H.Scha 10.3115/981251.981264 57–59 P82-1010 @@ -107,7 +107,7 @@ Issues in Natural Language Access to Databases From a Logic Programming Perspective - David H. D.Warren + David H. D.Warren 10.3115/981251.981266 63–66 P82-1012 @@ -124,14 +124,14 @@ Processing <fixed-case>E</fixed-case>nglish With a Generalized Phrase Structure Grammar - Jean MarkGawron + Jean MarkGawron JonathanKing JohnLamping EgonLoebner E. AnnePaulson - Geoffrey K.Pullum + Geoffrey K.Pullum Ivan A.Sag - ThomasWasow + ThomasWasow 10.3115/981251.981269 74–81 P82-1014 @@ -139,7 +139,7 @@ Experience with an Easily Computed Metric for Ranking Alternative Parses - George E.Heidorn + George E.Heidorn 10.3115/981251.981270 82–84 P82-1015 @@ -147,8 +147,8 @@ An Improved Heuristic for Ellipsis Processing - Ralph M.Weischedel - Norman K.Sondheimer + Ralph M.Weischedel + Norman K.Sondheimer 10.3115/981251.981271 85–88 P82-1016 @@ -204,7 +204,7 @@ Twenty Years of Reflections - Aravind K.Joshi + Aravind K.Joshi 10.3115/981251.981279 102–102 P82-1023 @@ -220,7 +220,7 @@ Reflections on Twenty Years of the <fixed-case>ACL</fixed-case> - JonathanAllen + JonathanAllen 10.3115/981251.981281 104–106 P82-1025 @@ -228,7 +228,7 @@ On the Present - Norman K.Sondheimer + Norman K.Sondheimer 10.3115/981251.981282 107–107 P82-1026 @@ -236,7 +236,7 @@ Planning Natural Language Referring Expressions - Douglas E.Appelt + Douglas E.Appelt 10.3115/981251.981284 108–112 P82-1027 @@ -244,7 +244,7 @@ The Text System for Natural Language Generation: An Overview - Kathleen R.McKeown + Kathleen R.McKeown 10.3115/981251.981285 113–120 P82-1028 @@ -252,7 +252,7 @@ Augmenting a Database Knowledge Representation for Natural Language Generation - Kathleen F.McCoy + Kathleen F.McCoy 10.3115/981251.981286 121–128 P82-1029 @@ -261,7 +261,7 @@ Salience: The Key to the Selection Problem in Natural Language Generation E. JeffreyConklin - David D.McDonald + David D.McDonald 10.3115/981251.981287 129–135 P82-1030 @@ -269,8 +269,8 @@ A Knowledge Engineering Approach to Natural Language Understanding - Stuart C.Shapiro - Jeannette G.Neal + Stuart C.Shapiro + Jeannette G.Neal 10.3115/981251.981289 136–144 P82-1031 @@ -286,7 +286,7 @@ Building Non-Normative Systems - The Search for Robustness: An Overview - Mitchell P.Marcus + Mitchell P.Marcus 10.3115/981251.981292 152–152 P82-1033 @@ -294,7 +294,7 @@ Design Dimensions for Non-Normative Understanding Systems - Robert J.Bobrow + Robert J.Bobrow 10.3115/981251.981293 153–156 P82-1034 @@ -310,8 +310,8 @@ On the Linguistic Character of Non-Standard Input - Anthony S.Kroch - DonaldHindle + Anthony S.Kroch + DonaldHindle 10.3115/981251.981295 161–163 P82-1036 @@ -319,7 +319,7 @@ Ill-Formed and Non-Standard Language Problems - StanKwasny + StanKwasny 10.3115/981251.981296 164–166 P82-1037 @@ -327,7 +327,7 @@ “Natural Language Texts Are Not Necessarily Grammatical and Unambiguous or <fixed-case>E</fixed-case>ven Complete.” - Lance A.Miller + Lance A.Miller 10.3115/981251.981297 167–168 P82-1038 diff --git a/data/xml/P83.xml b/data/xml/P83.xml index 5326f7b923..0d6ee6cf5f 100644 --- a/data/xml/P83.xml +++ b/data/xml/P83.xml @@ -15,7 +15,7 @@ Context-Freeness and the Computer Processing of Human Languages - Geoffrey K.Pullum + Geoffrey K.Pullum 10.3115/981311.981313 1–6 P83-1001 @@ -23,7 +23,7 @@ Factoring Recursion and Dependencies: An Aspect of <fixed-case>T</fixed-case>ree <fixed-case>A</fixed-case>djoining <fixed-case>G</fixed-case>rammars (<fixed-case>TAG</fixed-case>) and a Comparison of Some Formal Properties of <fixed-case>TAG</fixed-case>s, <fixed-case>GPSG</fixed-case>s, <fixed-case>PLG</fixed-case>s, and <fixed-case>LPG</fixed-case>s - Aravind K.Joshi + Aravind K.Joshi 10.3115/981311.981314 7–15 P83-1002 @@ -31,7 +31,7 @@ Crossed Serial Dependencies: A low-power parseable extension to <fixed-case>GPSG</fixed-case> - HenryThompson + HenryThompson 10.3115/981311.981315 16–21 P83-1003 @@ -39,10 +39,10 @@ Formal Constraints on Metarules - Stuart M.Shieber + Stuart M.Shieber Swan U.Stucky HansUszkoreit - Jane J.Robinson + Jane J.Robinson 10.3115/981311.981316 22–27 P83-1004 @@ -50,7 +50,7 @@ A Prolegomenon to Situation Semantics - David J.Israel + David J.Israel 10.3115/981311.981318 28–37 P83-1005 @@ -66,8 +66,8 @@ Providing a Unified Account of Definite Noun Phrases in Discourse - Barbara J.Grosz - Aravind K.Joshi + Barbara J.Grosz + Aravind K.Joshi ScottWeinstein 10.3115/981311.981320 44–50 @@ -84,7 +84,7 @@ An Improper Treatment of Quantification in Ordinary <fixed-case>E</fixed-case>nglish - Jerry R.Hobbs + Jerry R.Hobbs 10.3115/981311.981322 57–63 P83-1009 @@ -100,7 +100,7 @@ <fixed-case>TELEGRAM</fixed-case>: A Grammar Formalism for Language Planning - Douglas E.Appelt + Douglas E.Appelt 10.3115/981311.981325 74–78 P83-1011 @@ -108,7 +108,7 @@ An Overview of the <fixed-case>N</fixed-case>igel Text Generation Grammar - William C.Mann + William C.Mann 10.3115/981311.981326 79–84 P83-1012 @@ -116,7 +116,7 @@ Automatic Recognition of Intonation Patterns - Janet B.Pierrehumbert + Janet B.Pierrehumbert 10.3115/981311.981328 85–90 P83-1013 @@ -124,7 +124,7 @@ A Finite-State Parser for Use in Speech Recognition - Kenneth W.Church + Kenneth W.Church 10.3115/981311.981329 91–97 P83-1014 @@ -132,7 +132,7 @@ On the Mathematical Properties of Linguistic Theories - C. RaymondPerrault + C. RaymondPerrault 10.3115/981311.981331 98–105 P83-1015 @@ -148,7 +148,7 @@ Sentence Disambiguation by a Shift-Reduce Parsing Technique - Stuart M.Shieber + Stuart M.Shieber 10.3115/981311.981334 113–118 P83-1017 @@ -156,8 +156,8 @@ Syntactic Constraints and Efficient Parsability - Robert C.Berwick - Amy S.Weinberg + Robert C.Berwick + Amy S.Weinberg 10.3115/981311.981335 119–122 P83-1018 @@ -165,7 +165,7 @@ Deterministic Parsing of Syntactic Non-fluencies - DonaldHindle + DonaldHindle 10.3115/981311.981336 123–128 P83-1019 @@ -173,8 +173,8 @@ <fixed-case>D</fixed-case>-Theory: Talking about Talking about Trees - Mitchell P.Marcus - DonaldHindle + Mitchell P.Marcus + DonaldHindle Margaret M.Fleck 10.3115/981311.981337 129–136 @@ -183,8 +183,8 @@ Parsing as Deduction - Fernando C. N.Pereira - David H. D.Warren + Fernando C. N.Pereira + David H. D.Warren 10.3115/981311.981338 137–144 P83-1021 @@ -200,7 +200,7 @@ Menu-Based Natural Language Understanding - Harry R.Tennant + Harry R.Tennant Kenneth M.Ross Richard M.Saenz Craig W.Thompson @@ -220,7 +220,7 @@ Discourse Pragmatics and Ellipsis Resolution in Task-Oriented Natural Language Interfaces - Jaime G.Carbonell + Jaime G.Carbonell 10.3115/981311.981343 164–168 P83-1025 diff --git a/data/xml/P84.xml b/data/xml/P84.xml index 1bf256bcc1..3d1e6115f0 100644 --- a/data/xml/P84.xml +++ b/data/xml/P84.xml @@ -25,8 +25,8 @@ Conveying Implicit Content in Narrative Summaries Malcolm E.Cook - Wendy G.Lehnert - David D.McDonald + Wendy G.Lehnert + David D.McDonald 10.3115/980491.980493 5–7 P84-1002 @@ -34,7 +34,7 @@ Transforming <fixed-case>E</fixed-case>nglish Interfaces to Other Natural Languages: An Experiment With <fixed-case>P</fixed-case>ortuguese - Gabriel PereiraLopes + Gabriel PereiraLopes 10.3115/980491.980494 8–10 P84-1003 @@ -58,7 +58,7 @@ Bounded Context Parsing and Easy Learnability - Robert C.Berwick + Robert C.Berwick 10.3115/980491.980497 20–23 P84-1006 @@ -66,7 +66,7 @@ The Representation of Constituent Structures for Finite-State Parsing - D. TerenceLangendoen + D. TerenceLangendoen YedidyahLangsam 10.3115/980491.980498 24–27 @@ -109,7 +109,7 @@ Toward a Redefinition of Yes/No Questions - JuliaHirschberg + JuliaHirschberg 10.3115/980491.980503 48–51 P84-1012 @@ -117,7 +117,7 @@ The Syntax and Semantics of User-Defined Modifiers in Transportable Natural Language Processor - Bruce W.Ballard + Bruce W.Ballard 10.3115/980491.980504 52–56 P84-1013 @@ -142,7 +142,7 @@ Quasi-Indexical Reference in Propositional Semantic Networks William J.Rapaport - Stuart C.Shapiro + Stuart C.Shapiro 10.3115/980491.980507 65–70 P84-1016 @@ -199,10 +199,10 @@ Automated Determination of Sublanguage Syntactic Usage - RalphGrishman - Ngo ThanhNhan + RalphGrishman + Ngo ThanhNhan ElaineMarsh - LynetteHirschman + LynetteHirschman 10.3115/980491.980514 96–100 P84-1023 @@ -210,9 +210,9 @@ Semantic Interpretation Using <fixed-case>KL</fixed-case>-<fixed-case>ONE</fixed-case> - Norman K.Sondheimer - Ralph M.Weischedel - Robert J.Bobrow + Norman K.Sondheimer + Ralph M.Weischedel + Robert J.Bobrow 10.3115/980491.980515 101–107 P84-1024 @@ -221,7 +221,7 @@ Two Theories for Computing the Logical Form of Mass Expressions Francis JeffryPelletier - Lenhart K.Schubert + Lenhart K.Schubert 10.3115/980491.980516 108–111 P84-1025 @@ -229,7 +229,7 @@ Syntactic and Semantic Parsability - Geoffrey K.Pullum + Geoffrey K.Pullum 10.3115/980491.980517 112–122 P84-1026 @@ -237,8 +237,8 @@ The Semantics of Grammar Formalisms Seen as Computer Languages - Fernando C. N.Pereira - Stuart M.Shieber + Fernando C. N.Pereira + Stuart M.Shieber 10.3115/980491.980518 123–129 P84-1027 @@ -246,7 +246,7 @@ The Resolution of Quantificational Ambiguity in the <fixed-case>TENDUM</fixed-case> System - HarryBunt + HarryBunt 10.3115/980491.980519 130–133 P84-1028 @@ -254,9 +254,9 @@ Preventing False Inferences - AravindJoshi - BonnieWebber - Ralph M.Weischedel + AravindJoshi + BonnieWebber + Ralph M.Weischedel 10.3115/980491.980520 134–138 P84-1029 @@ -264,8 +264,8 @@ Problem Localization Strategies for Pramatics Processing in Natural-Language Front Ends - Lance A.Ramshaw - Ralph M.Weischedel + Lance A.Ramshaw + Ralph M.Weischedel 10.3115/980491.980521 139–143 P84-1030 @@ -273,7 +273,7 @@ A Connectionist Model of Some Aspects of Anaphor Resolution - Ronan G.Reilly + Ronan G.Reilly 10.3115/980491.980522 144–149 P84-1031 @@ -317,7 +317,7 @@ Detecting Patterns in a Lexical Data Base - NicolettaCalzolari + NicolettaCalzolari 10.3115/980491.980527 170–173 P84-1036 @@ -341,7 +341,7 @@ Panel: Natural Language and Databases, Again - KarenSparck Jones + KarenSparck Jones 10.3115/980491.980530 182–183 P84-1039 @@ -349,7 +349,7 @@ There Still Is Gold in the Database Mine - MadeleineBates + MadeleineBates 10.3115/980491.980531 184–185 P84-1040 @@ -357,7 +357,7 @@ Is There Natural Language after Data Bases? - Jaime G.Carbonell + Jaime G.Carbonell 10.3115/980491.980532 186–187 P84-1041 @@ -365,7 +365,7 @@ Panel on Natural Language and Databases - Daniel P.Flickinger + Daniel P.Flickinger 10.3115/980491.980533 188–189 P84-1042 @@ -373,7 +373,7 @@ Natural Language for Exert Systems: Comparisons with Database Systems - Kathleen R.McKeown + Kathleen R.McKeown 10.3115/980491.980534 190–193 P84-1043 @@ -389,7 +389,7 @@ Understanding Pragmatically Ill-Formed Input - M. SandraCarberry + M. SandraCarberry 10.3115/980491.980536 200–206 P84-1045 @@ -397,7 +397,7 @@ Referring as Requesting - Philip R.Cohen + Philip R.Cohen 10.3115/980491.980537 207–211 P84-1046 @@ -413,7 +413,7 @@ Combining Functionality and Object-Orientedness for Natural Language Processing - ToyoakiNishida + ToyoakiNishida ShujiDoshita 10.3115/980491.980539 218–221 @@ -423,7 +423,7 @@ Use of Heuristic Knowledge in <fixed-case>C</fixed-case>hinese Language Analysis YimingYang - ToyoakiNishida + ToyoakiNishida ShujiDoshita 10.3115/980491.980540 222–225 @@ -466,7 +466,7 @@ On Parsing Preferences - Lenhart K.Schubert + Lenhart K.Schubert 10.3115/980491.980545 247–250 P84-1054 @@ -491,9 +491,9 @@ Analysis Grammar of <fixed-case>J</fixed-case>apanese in the <fixed-case>M</fixed-case>u-project - A Procedural Approach to Analysis Grammar - Jun-ichiTsujii - Jun-ichiNakamura - MakotoNagao + Jun-ichiTsujii + Jun-ichiNakamura + MakotoNagao 10.3115/980491.980548 267–274 P84-1057 @@ -509,7 +509,7 @@ Building a Large Knowledge Base for a Natural Language System - Jerry R.Hobbs + Jerry R.Hobbs 10.3115/980491.980550 283–286 P84-1059 @@ -526,7 +526,7 @@ Inferencing on Linguistically Based Semantic Structures - EvaHajičová + EvaHajičová MilenaHnátková 10.3115/980491.980552 291–297 @@ -543,8 +543,8 @@ A Plan Recognition Model for Clarification Subdialogues - Diane J.Litman - James F.Allen + Diane J.Litman + James F.Allen 10.3115/980491.980554 302–311 P84-1063 @@ -561,7 +561,7 @@ Using Focus to Generate Complex and Simple Sentences Marcia A.Derr - Kathleen R.McKeown + Kathleen R.McKeown 10.3115/980491.980556 319–326 P84-1065 @@ -569,7 +569,7 @@ A Rational Reconstruction of the <fixed-case>PROTEUS</fixed-case> Sentence Planner - GraemeRitchie + GraemeRitchie 10.3115/980491.980557 327–329 P84-1066 @@ -577,7 +577,7 @@ Software Tools for the Environment of a Computer Aided Translation System - DanielBachut + DanielBachut NelsonVerastegui 10.3115/980491.980558 330–333 @@ -587,7 +587,7 @@ Design of a Machine Translation System for a Sublanguage BeatBuchmann - SusanWarwick + SusanWarwick PatrickShann 10.3115/980491.980559 334–337 @@ -596,9 +596,9 @@ Grammar Writing System (<fixed-case>GRADE</fixed-case>) of <fixed-case>M</fixed-case>u-Machine Translation Project and its Characteristics - Jun-ichiNakamura - Jun-ichiTsujii - MakotoNagao + Jun-ichiNakamura + Jun-ichiTsujii + MakotoNagao 10.3115/980491.980560 338–343 P84-1069 @@ -630,7 +630,7 @@ <fixed-case>LR</fixed-case> Parsers For Natural Languages - MasaruTomita + MasaruTomita 10.3115/980491.980564 354–357 P84-1073 @@ -646,7 +646,7 @@ The Design of a Computer Language for Linguistic Information - Stuart M.Shieber + Stuart M.Shieber 10.3115/980491.980566 362–366 P84-1075 @@ -654,7 +654,7 @@ Discourse Structures for Text Generation - William C.Mann + William C.Mann 10.3115/980491.980567 367–375 P84-1076 @@ -662,7 +662,7 @@ Semantic Rule Based Text Generation - Michael L.Mauldin + Michael L.Mauldin 10.3115/980491.980568 376–380 P84-1077 @@ -670,7 +670,7 @@ Controlling Lexical Substitution in Computer Text Generation - RobertGranville + RobertGranville 10.3115/980491.980569 381–384 P84-1078 @@ -688,9 +688,9 @@ Two-Way Finite Automata and Dependency Grammar: A Parsing Method for Inflectional Free Word Order Languages - EsaNelimarkka - HarriJappinen - AarnoLehtola + EsaNelimarkka + HarriJappinen + AarnoLehtola 10.3115/980491.980571 389–392 P84-1080 @@ -698,7 +698,7 @@ Interruptable Transition Networks - SergeiNirenburg + SergeiNirenburg 10.3115/980491.980572 393–397 P84-1081 @@ -706,7 +706,7 @@ Automatic Construction of Discourse Representation Structures - FranzGuenthner + FranzGuenthner 10.3115/980491.980573 398–401 P84-1082 @@ -731,7 +731,7 @@ A Syntactic Approach to Discourse Semantics LiviaPolanyi - RemkoScha + RemkoScha 10.3115/980491.980576 413–419 P84-1085 @@ -739,9 +739,9 @@ Dealing With Incompleteness of Linguistic Knowledge in Language Translation – Transfer and Generation Stage of <fixed-case>M</fixed-case>u Machine Translation Project - MakotoNagao - ToyoakiNishida - Jun-ichiTsujii + MakotoNagao + ToyoakiNishida + Jun-ichiTsujii 10.3115/980491.980577 420–427 P84-1086 @@ -767,7 +767,7 @@ Coping with Extragrammaticality - Jaime G.Carbonell + Jaime G.Carbonell Philip J.Hayes 10.3115/980491.980580 437–443 @@ -776,7 +776,7 @@ Correcting Object-Related Misconceptions: How Should The System Respond? - Kathleen F.McCoy + Kathleen F.McCoy 10.3115/980491.980581 444–447 P84-1090 @@ -792,7 +792,7 @@ From <fixed-case>HOPE</fixed-case> en <fixed-case>I</fixed-case>’<fixed-case>ESPERANCE</fixed-case> On the Role of Computational Neurolinguistics in Cross-Language Studies - Helen M.Gigley + Helen M.Gigley 10.3115/980491.980583 452–456 P84-1092 @@ -816,7 +816,7 @@ Machine-Readable Dictionaries, Lexical Data Bases and the Lexical System - NicolettaCalzolari + NicolettaCalzolari 10.3115/980491.980586 460–460 P84-1095 @@ -848,8 +848,8 @@ Transfer in a Multilingual <fixed-case>MT</fixed-case> System - StevenKrauwer - Louisdes Tombe + StevenKrauwer + Louisdes Tombe 10.3115/980491.980590 464–467 P84-1099 @@ -857,7 +857,7 @@ Expert Systems and Other New Techniques in <fixed-case>MT</fixed-case> Systems - ChristianBoitet + ChristianBoitet ReneGerber 10.3115/980491.980591 468–471 @@ -875,7 +875,7 @@ Disambiguating Grammatically Ambiguous Sentences By Asking - MasaruTomita + MasaruTomita 10.3115/980491.980593 475–480 P84-1102 @@ -910,8 +910,8 @@ <fixed-case>NA</fixed-case>tural Language driven Image Generation - GiovanniAdorni - MauroDi Manzo + GiovanniAdorni + MauroDi Manzo FaustoGiunchiglia 10.3115/980491.980597 495–500 @@ -978,7 +978,7 @@ Interpreting Syntactically Ill-Formed Sentences - LeonardoLesmo + LeonardoLesmo PietroTorasso 10.3115/980491.980605 534–539 diff --git a/data/xml/P85.xml b/data/xml/P85.xml index ee1821ac7b..d038a39dda 100644 --- a/data/xml/P85.xml +++ b/data/xml/P85.xml @@ -65,7 +65,7 @@ Speech Acts and Rationality - Philip R.Cohen + Philip R.Cohen Hector J.Levesque 10.3115/981210.981217 49–60 @@ -74,7 +74,7 @@ Ontological Promiscuity - Jerry R.Hobbs + Jerry R.Hobbs 10.3115/981210.981218 60–69 P85-1008 @@ -90,7 +90,7 @@ The Computational Difficulty of <fixed-case>ID/LP</fixed-case> Parsing - G. EdwardBarton + G. EdwardBarton 10.3115/981210.981220 76–81 P85-1010 @@ -98,8 +98,8 @@ Some Computational Properties of <fixed-case>T</fixed-case>ree <fixed-case>A</fixed-case>djoining <fixed-case>G</fixed-case>rammars - K.Vijay-Shankar - Aravind K.Joshi + K.Vijay-Shankar + Aravind K.Joshi 10.3115/981210.981221 82–93 P85-1011 @@ -107,8 +107,8 @@ <fixed-case>TAG</fixed-case>’s as a Grammatical Formalism for Generation - David D.McDonald - James D.Pustejovsky + David D.McDonald + James D.Pustejovsky 10.3115/981210.981222 94–103 P85-1012 @@ -116,7 +116,7 @@ Modular Logic Grammars - Michael C.McCord + Michael C.McCord 10.3115/981210.981223 104–117 P85-1013 @@ -125,7 +125,7 @@ New Approaches to Parsing Conjunctions Using <fixed-case>P</fixed-case>rolog SandiwayFong - Robert C.Berwick + Robert C.Berwick 10.3115/981210.981224 118–126 P85-1014 @@ -149,7 +149,7 @@ A Structure-Sharing Representation for Unification-Based Grammar Formalisms - Fernando C. N.Pereira + Fernando C. N.Pereira 10.3115/981210.981227 137–144 P85-1017 @@ -157,7 +157,7 @@ Using Restriction to Extend Parsing Algorithms for Complex-Feature-Based Formalisms - Stuart M.Shieber + Stuart M.Shieber 10.3115/981210.981228 145–152 P85-1018 @@ -175,7 +175,7 @@ Movement in Active Production Networks - Mark A.Jones + Mark A.Jones Alan S.Driscoll 10.3115/981210.981230 161–166 @@ -185,7 +185,7 @@ Parsing <fixed-case>H</fixed-case>ead-<fixed-case>D</fixed-case>riven <fixed-case>P</fixed-case>hrase <fixed-case>S</fixed-case>tructure <fixed-case>G</fixed-case>rammar DerekProudian - CarlPollard + CarlPollard 10.3115/981210.981231 167–171 P85-1021 @@ -194,7 +194,7 @@ A Computational Semantics for Natural Language Lewis G.Creary - Carl J.Pollard + Carl J.Pollard 10.3115/981210.981232 172–179 P85-1022 @@ -202,7 +202,7 @@ Analysis of Conjunctions in a Rule-Based Parser - LeonardoLesmo + LeonardoLesmo PietroTorasso 10.3115/981210.981233 180–187 @@ -211,7 +211,7 @@ A Pragmatics-Based Approach to Understanding Intersentential Ellipsis - SandraCarberry + SandraCarberry 10.3115/981210.981234 188–197 P85-1024 @@ -219,7 +219,7 @@ Some Pragmatic Issues in the Planning of Definite and Indefinite Noun Phrases - Douglas E.Appelt + Douglas E.Appelt 10.3115/981210.981235 198–203 P85-1025 @@ -251,7 +251,7 @@ Description Strategies for Naive and Expert Users - Cecile L.Paris + Cecile L.Paris 10.3115/981210.981239 238–245 P85-1029 @@ -259,7 +259,7 @@ Stress Assignment in Letter to Sound Rules for Speech Synthesis - KennethChurch + KennethChurch 10.3115/981210.981240 246–253 P85-1030 @@ -280,9 +280,9 @@ Structure-Sharing in Lexical Representation - DanielFlickinger - CarlPollard - ThomasWasow + DanielFlickinger + CarlPollard + ThomasWasow 10.3115/981210.981242 262–267 P85-1032 @@ -290,7 +290,7 @@ A Tool Kit for Lexicon Building - Thomas E.Ahlswede + Thomas E.Ahlswede 10.3115/981210.981243 268–276 P85-1033 @@ -299,7 +299,7 @@ Using an On-Line Dictionary to Find Rhyming Words and Pronunciations for Unknown Words Roy J.Byrd - Martin S.Chodorow + Martin S.Chodorow 10.3115/981210.981244 277–283 P85-1034 @@ -308,7 +308,7 @@ Towards a Self-Extending Lexicon UriZernik - Michael G.Dyer + Michael G.Dyer 10.3115/981210.981245 284–292 P85-1035 @@ -316,7 +316,7 @@ Grammatical Analysis by Computer of the <fixed-case>L</fixed-case>ancaster-<fixed-case>O</fixed-case>slo/<fixed-case>B</fixed-case>ergen (<fixed-case>LOB</fixed-case>) Corpus of <fixed-case>B</fixed-case>ritish <fixed-case>E</fixed-case>nglish Texts - AndrewDavid + AndrewDavid 10.3115/981210.981246 293–298 P85-1036 @@ -324,9 +324,9 @@ Extracting Semantic Hierarchies From a Large On-Line Dictionary - Martin S.Chodorow + Martin S.Chodorow Roy J.Byrd - George E.Heidorn + George E.Heidorn 10.3115/981210.981247 299–304 P85-1037 @@ -350,7 +350,7 @@ Grammar Viewed as a Functioning Part of a Cognitive System - Helen M.Gigley + Helen M.Gigley 10.3115/981210.981250 324–332 P85-1040 diff --git a/data/xml/P86.xml b/data/xml/P86.xml index daf57c3e99..48a8a4bc96 100644 --- a/data/xml/P86.xml +++ b/data/xml/P86.xml @@ -15,7 +15,7 @@ Tutorial Abstracts - RalphGrishman + RalphGrishman 10.3115/981131.981132 1–1 P86-1001 @@ -23,7 +23,7 @@ Bringing Natural Language Processing to the Microcomputer Market: The Story of <fixed-case>Q&A</fixed-case> - Gary G.Hendrix + Gary G.Hendrix 10.3115/981131.981133 2–2 P86-1002 @@ -31,7 +31,7 @@ Time and Tense in <fixed-case>E</fixed-case>nglish - Mary P.Harper + Mary P.Harper EugeneCharniak 10.3115/981131.981134 3–9 @@ -40,12 +40,12 @@ Recovering Implicit Information - Martha S.Palmer - Deborah A.Dahl + Martha S.Palmer + Deborah A.Dahl Rebecca J.Schiffman - LynetteHirschman - MarciaLinebarger - JohnDowding + LynetteHirschman + MarciaLinebarger + JohnDowding 10.3115/981131.981135 10–19 P86-1004 @@ -53,7 +53,7 @@ Semantic Acquisition In <fixed-case>TELI</fixed-case>: A Transportable, User-Customized Natural Language Processor - Bruce W.Ballard + Bruce W.Ballard Douglas E.Stumberger 10.3115/981131.981136 20–29 @@ -62,7 +62,7 @@ Computational Complexity of Current <fixed-case>GPSG</fixed-case> Theory - Eric SvenRistad + Eric SvenRistad 10.3115/981131.981137 30–39 P86-1006 @@ -70,7 +70,7 @@ Defining Natural Language Grammars in <fixed-case>GPSG</fixed-case> - Eric SvenRistad + Eric SvenRistad 10.3115/981131.981138 40–44 P86-1007 @@ -78,7 +78,7 @@ Constraint Propagation in <fixed-case>K</fixed-case>immo Systems - G. EdwardBarton + G. EdwardBarton 10.3115/981131.981139 45–52 P86-1008 @@ -86,7 +86,7 @@ Computational Complexity in Two-Level Morphology - G. EdwardBarton + G. EdwardBarton 10.3115/981131.981140 53–59 P86-1009 @@ -103,7 +103,7 @@ The Relationship Between <fixed-case>T</fixed-case>ree <fixed-case>A</fixed-case>djoining <fixed-case>G</fixed-case>rammars And Head Grammars D. J.Weir - K.Vijay-Shanker + K.Vijay-Shanker A. K.Joshi 10.3115/981131.981142 67–74 @@ -129,7 +129,7 @@ Copying in Natural Languages, Context-Freeness, and Queue Grammars - AlexisManaster-Ramer + AlexisManaster-Ramer 10.3115/981131.981145 85–89 P86-1014 @@ -138,7 +138,7 @@ A Model of Revision in Natural Language Generation Marie M.Vaughan - David D.McDonald + David D.McDonald 10.3115/981131.981146 90–96 P86-1015 @@ -146,7 +146,7 @@ The <fixed-case>ROMPER</fixed-case> System: Responding to Object-Related Misconceptions using Perspective - Kathleen F.McCoy + Kathleen F.McCoy 10.3115/981131.981147 97–105 P86-1016 @@ -154,7 +154,7 @@ Encoding and Acquiring Meanings for Figurative Phrases - Michael G.Dyer + Michael G.Dyer UriZernik 10.3115/981131.981148 106–111 @@ -164,8 +164,8 @@ Semantically Significant Patterns in Dictionary Definitions JudithMarkowitz - ThomasAhlswede - MarthaEvens + ThomasAhlswede + MarthaEvens 10.3115/981131.981149 112–119 P86-1018 @@ -174,7 +174,7 @@ Computer Methods for Morphological Analysis Roy J.Byrd - Judith L.Klavans + Judith L.Klavans MarkAronoff FrankAnshen 10.3115/981131.981150 @@ -192,8 +192,8 @@ The Intonational Structuring of Discourse - JuliaHirschberg - JanetPierrehumbert + JuliaHirschberg + JanetPierrehumbert 10.3115/981131.981152 136–144 P86-1021 @@ -201,8 +201,8 @@ The Contribution of Parsing to Prosodic Phrasing in an Experimental Text-to-Speech System - JoanBachenko - EileenFitzpatrick + JoanBachenko + EileenFitzpatrick C. E.Wright 10.3115/981131.981153 145–155 @@ -211,7 +211,7 @@ Morphological Decomposition and Stress Assignment for Speech Synthesis - KennethChurch + KennethChurch 10.3115/981131.981154 156–164 P86-1023 @@ -231,7 +231,7 @@ <fixed-case>J</fixed-case>apanese Prosodic Phrasing and Intonation Synthesis Mary E.Beckman - Janet B.Pierrehumbert + Janet B.Pierrehumbert 10.3115/981131.981156 173–180 P86-1025 @@ -239,7 +239,7 @@ Questions about Connectionist Models of Natural Language - MarkLiberman + MarkLiberman 10.3115/981131.981168 181–183 P86-1026 @@ -255,7 +255,7 @@ Connectionist Models for Natural Language Processing Program - David L.Waltz + David L.Waltz 10.3115/981131.981170 185–185 P86-1028 @@ -296,7 +296,7 @@ Linguistic Coherence: A Plan-Based Alternative - Diane J.Litman + Diane J.Litman 10.3115/981131.981161 215–223 P86-1033 @@ -313,7 +313,7 @@ Commonsense Metaphysics and Lexical Semantics - Jerry R.Hobbs + Jerry R.Hobbs WilliamCroft ToddDavies DouglasEdwards @@ -325,7 +325,7 @@ A Terminological Simplification Transformation for Natural Language Question-Answering Systems - David G.Stallard + David G.Stallard 10.3115/981131.981164 241–246 P86-1036 @@ -333,7 +333,7 @@ Some Uses of Higher-Order Logic in Computational Linguistics - Dale A.Miller + Dale A.Miller GopalanNadathur 10.3115/981131.981165 247–256 @@ -342,7 +342,7 @@ A Logical Semantics for Feature Structures - Robert T.Kasper + Robert T.Kasper 10.3115/981131.981166 257–266 P86-1038 @@ -350,7 +350,7 @@ What Should Machine Translation Be? - John S.White + John S.White 10.3115/981131.981172 267–267 P86-1039 diff --git a/data/xml/P87.xml b/data/xml/P87.xml index 99df74f2ca..9816d4cb99 100644 --- a/data/xml/P87.xml +++ b/data/xml/P87.xml @@ -16,7 +16,7 @@ Temporal Ontology in Natural Language MarcMoens - MarkSteedman + MarkSteedman 10.3115/981175.981176 1–7 P87-1001 @@ -24,7 +24,7 @@ A Compositional Semantics of Temporal Expressions in <fixed-case>E</fixed-case>nglish - Erhard W.Hinrichs + Erhard W.Hinrichs 10.3115/981175.981177 8–15 P87-1002 @@ -32,7 +32,7 @@ Situations and Intervals - Rebecca J.Passonneau + Rebecca J.Passonneau 10.3115/981175.981178 16–24 P87-1003 @@ -48,9 +48,9 @@ An Environment for Acquiring Semantic Information - Damaris M.Ayuso + Damaris M.Ayuso VardaShaked - Ralph M.Weischedel + Ralph M.Weischedel 10.3115/981175.981180 32–40 P87-1005 @@ -94,7 +94,7 @@ Constituent-Based Morphological Parsing: A New Approach to the Problem of Word-Recognition. - RichardSproat + RichardSproat BarbaraBrunson 10.3115/981175.981185 65–72 @@ -112,7 +112,7 @@ A Lazy way to Chart-Parse with Categorial Grammars RemoPareschi - MarkSteedman + MarkSteedman 10.3115/981175.981187 81–88 P87-1012 @@ -121,7 +121,7 @@ A Logical Version of Functional Grammar William C.Rounds - AlexisManaster-Ramer + AlexisManaster-Ramer 10.3115/981175.981188 89–96 P87-1013 @@ -129,8 +129,8 @@ Functional Unification Grammar Revisited - Kathleen R.McKeown - Cecile L.Paris + Kathleen R.McKeown + Cecile L.Paris 10.3115/981175.981189 97–103 P87-1014 @@ -138,9 +138,9 @@ Characterizing Structural Descriptions Produced by Various Grammatical Formalisms - K.Vijay-Shanker - David J.Weir - Aravind K.Joshi + K.Vijay-Shanker + David J.Weir + Aravind K.Joshi 10.3115/981175.981190 104–111 P87-1015 @@ -157,7 +157,7 @@ Context-Freeness of the Language Accepted by <fixed-case>M</fixed-case>arcus’ Parser - RNozohoor-Farshi + RNozohoor-Farshi 10.3115/981175.981192 117–122 P87-1017 @@ -175,9 +175,9 @@ Nominalizations in <fixed-case>PUNDIT</fixed-case> - Deborah A.Dahl - Martha S.Palmer - Rebecca J.Passonneau + Deborah A.Dahl + Martha S.Palmer + Rebecca J.Passonneau 10.3115/981175.981194 131–139 P87-1019 @@ -185,8 +185,8 @@ Toward Treating <fixed-case>E</fixed-case>nglish Nominals Correctly - Richard W.Sproat - Mark Y.Liberman + Richard W.Sproat + Mark Y.Liberman 10.3115/981175.981195 140–146 P87-1020 @@ -194,7 +194,7 @@ The Interpretation of Tense in Discourse - Bonnie LynnWebber + Bonnie LynnWebber 10.3115/981175.981196 147–154 P87-1021 @@ -202,9 +202,9 @@ A Centering Approach to Pronouns - Susan E.Brennan + Susan E.Brennan Marilyn W.Friedman - Carl J.Pollard + Carl J.Pollard 10.3115/981175.981197 155–162 P87-1022 @@ -212,8 +212,8 @@ Now Let’s Talk About Now; Identifying Cue Phrases Intonationally - JuliaHirschberg - DianeLitman + JuliaHirschberg + DianeLitman 10.3115/981175.981198 163–171 P87-1023 @@ -221,7 +221,7 @@ On the Acquisition of Lexical Entries: The Perceptual Origin of Thematic Relations - JamesPustejovsky + JamesPustejovsky 10.3115/981175.981199 172–178 P87-1024 @@ -229,7 +229,7 @@ The Logical Analysis of Lexical Ambiguity - DavidStallard + DavidStallard 10.3115/981175.981200 179–185 P87-1025 @@ -238,7 +238,7 @@ <fixed-case>FLUSH</fixed-case>: A Flexible Lexicon Design David J.Besemer - Paul S.Jacobs + Paul S.Jacobs 10.3115/981175.981201 186–192 P87-1026 @@ -246,10 +246,10 @@ The Derivation of a Grammatically Indexed Lexicon from the Longman Dictionary of Contemporary <fixed-case>E</fixed-case>nglish - BranBoguraev - TedBriscoe - JohnCarroll - DavidCarter + BranBoguraev + TedBriscoe + JohnCarroll + DavidCarter ClaireGrover 10.3115/981175.981202 193–200 @@ -258,8 +258,8 @@ Lexical Selection in the Process of Language Generation - JamesPustejovsky - SergeiNirenburg + JamesPustejovsky + SergeiNirenburg 10.3115/981175.981203 201–206 P87-1028 @@ -269,7 +269,7 @@ Constraints on the Generation of Adjunct Clauses Alison K.Huettner Marie M.Vaughan - David D.McDonald + David D.McDonald 10.3115/981175.981204 207–214 P87-1029 @@ -301,7 +301,7 @@ A Unification Method for Disjunctive Feature Descriptions - Robert T.Kasper + Robert T.Kasper 10.3115/981175.981208 235–242 P87-1033 @@ -309,7 +309,7 @@ Revised Generalized Phrase Structure Grammar - Eric SvenRistad + Eric SvenRistad 10.3115/981175.981209 243–250 P87-1034 diff --git a/data/xml/P88.xml b/data/xml/P88.xml index aacaa706fc..9f74a23ed5 100644 --- a/data/xml/P88.xml +++ b/data/xml/P88.xml @@ -16,7 +16,7 @@ Adapting an <fixed-case>E</fixed-case>nglish Morphological Analyzer for <fixed-case>F</fixed-case>rench Roy J.Byrd - EvelyneTzoukermann + EvelyneTzoukermann 10.3115/982023.982024 1–6 P88-1001 @@ -24,10 +24,10 @@ Sentence Fragments Regular Structures - Marcia C.Linebarger - Deborah A.Dahl - LynetteHirschman - Rebecca J.Passonneau + Marcia C.Linebarger + Deborah A.Dahl + LynetteHirschman + Rebecca J.Passonneau 10.3115/982023.982025 7–16 P88-1002 @@ -35,8 +35,8 @@ Multi-Level Plurals and Distributivity - RemkoScha - DavidStallard + RemkoScha + DavidStallard 10.3115/982023.982026 17–24 P88-1003 @@ -45,7 +45,7 @@ The Interpretation of Relational Nouns Josde Bruin - RemkoScha + RemkoScha 10.3115/982023.982027 25–32 P88-1004 @@ -53,7 +53,7 @@ Quantifier Scoping in the <fixed-case>SRI</fixed-case> Core Language Engine - Douglas B.Moran + Douglas B.Moran 10.3115/982023.982028 33–40 P88-1005 @@ -61,7 +61,7 @@ A General Computational Treatment of Comparatives for Natural Language Question Answering - Bruce W.Ballard + Bruce W.Ballard 10.3115/982023.982029 41–48 P88-1006 @@ -69,7 +69,7 @@ Parsing and Interpreting Comparatives - MannyRayner + MannyRayner AmelieBanks 10.3115/982023.982030 49–60 @@ -95,7 +95,7 @@ An Integrated Framework for Semantic and Pragmatic Interpretation Martha E.Pollack - Fernando C.N.Pereira + Fernando C.N.Pereira 10.3115/982023.982033 75–86 P88-1010 @@ -112,7 +112,7 @@ Interpretation as Abduction - Jerry R.Hobbs + Jerry R.Hobbs MarkStickel PaulMartin DouglasEdwards @@ -125,7 +125,7 @@ <fixed-case>P</fixed-case>roject <fixed-case>A</fixed-case>pril --- A Progress Report RobinHaigh GeoffreySampson - EricAtwell + EricAtwell 10.3115/982023.982036 104–112 P88-1013 @@ -133,7 +133,7 @@ Discourse Deixis: Reference to Discourse Segments - Bonnie LynnWebber + Bonnie LynnWebber 10.3115/982023.982037 113–122 P88-1014 @@ -141,7 +141,7 @@ Cues and control in Expert-Client Dialogues - SteveWhittaker + SteveWhittaker PhilStenton 10.3115/982023.982038 123–130 @@ -150,7 +150,7 @@ A Computational Theory of Perspective and Reference in Narrative - Janyce M.Wiebe + Janyce M.Wiebe William J.Rapaport 10.3115/982023.982039 131–138 @@ -170,7 +170,7 @@ Aspects of Clause Politeness in <fixed-case>J</fixed-case>apanese: An Extended Inquiry Semantics Treatment - John A.Bateman + John A.Bateman 10.3115/982023.982041 147–154 P88-1018 @@ -180,8 +180,8 @@ Experiences With an On-Line Translating Dialogue System SeijiMiike KoichiHasebe - HaroldSomers - Shin-yaAmano + HaroldSomers + Shin-yaAmano 10.3115/982023.982042 155–162 P88-1019 @@ -189,7 +189,7 @@ Planning Coherent Multisentential Text - Eduard H.Hovy + Eduard H.Hovy 10.3115/982023.982043 163–169 P88-1020 @@ -197,7 +197,7 @@ A Practical Nonmonotonic Theory for Reasoning about Speech Acts - DouglasAppelt + DouglasAppelt KurtKonolige 10.3115/982023.982044 170–178 @@ -206,7 +206,7 @@ Two Types of Planning in Language Generation - Eduard H.Hovy + Eduard H.Hovy 10.3115/982023.982045 179–186 P88-1022 @@ -214,8 +214,8 @@ Assigning Intonational Features in Synthesized Spoken Directions - James RaymondDavis - JuliaHirschberg + James RaymondDavis + JuliaHirschberg 10.3115/982023.982046 187–193 P88-1023 @@ -231,7 +231,7 @@ Syntactic Approaches to Automatic Book Indexing - GerardSalton + GerardSalton 10.3115/982023.982048 204–210 P88-1025 @@ -239,7 +239,7 @@ Lexicon and grammar in probabilistic tagging of written <fixed-case>E</fixed-case>nglish. - Andrew DavidBeale + Andrew DavidBeale 10.3115/982023.982049 211–216 P88-1026 @@ -247,8 +247,8 @@ Parsing vs. Text Processing in the Analysis of Dictionary Definitions - ThomasAhlswede - MarthaEvens + ThomasAhlswede + MarthaEvens 10.3115/982023.982050 217–224 P88-1027 @@ -264,7 +264,7 @@ Conditional Descriptions in Functional Unification Grammar - Robert T.Kasper + Robert T.Kasper 10.3115/982023.982052 233–240 P88-1029 @@ -280,7 +280,7 @@ Graph-structured Stack and Natural Language Parsing - MasaruTomita + MasaruTomita 10.3115/982023.982054 249–257 P88-1031 @@ -289,7 +289,7 @@ An <fixed-case>E</fixed-case>arley-Type Parsing Algorithm for <fixed-case>T</fixed-case>ree <fixed-case>A</fixed-case>djoining <fixed-case>G</fixed-case>rammars YvesSchabes - Aravind K.Joshi + Aravind K.Joshi 10.3115/982023.982055 258–269 P88-1032 @@ -305,8 +305,8 @@ <fixed-case>C</fixed-case>ombinatory <fixed-case>C</fixed-case>ategorial <fixed-case>G</fixed-case>rammars: Generative Power and Relationship to Linear Context-Free Rewriting Systems - David J.Weir - Aravind K.Joshi + David J.Weir + Aravind K.Joshi 10.3115/982023.982057 278–285 P88-1034 @@ -314,8 +314,8 @@ Unification of Disjunctive Feature Descriptions - AndreasEisele - JochenDorre + AndreasEisele + JochenDorre 10.3115/982023.982058 286–294 P88-1035 diff --git a/data/xml/P89.xml b/data/xml/P89.xml index 14edcfdff8..73ba46cf9d 100644 --- a/data/xml/P89.xml +++ b/data/xml/P89.xml @@ -23,10 +23,10 @@ A Semantic-Head-Driven Generation Algorithm for Unification-Based Formalisms - Stuart M.Shieber - Gertjanvan Noord - Robert C.Moore - Fernando C. N.Pereira + Stuart M.Shieber + Gertjanvan Noord + Robert C.Moore + Fernando C. N.Pereira 10.3115/981623.981625 7–17 P89-1002 @@ -35,7 +35,7 @@ A Three-Valued Interpretation of Negation in Feature Structure Descriptions AnujDawar - K.Vijay-Shanker + K.Vijay-Shanker 10.3115/981623.981626 18–24 P89-1003 @@ -43,7 +43,7 @@ Logical Forms in the Core Language Engine - HiyanAlshawi + HiyanAlshawi Janvan Eijck 10.3115/981623.981627 25–32 @@ -52,7 +52,7 @@ Unification-Based Semantic Interpretation - Robert C.Moore + Robert C.Moore 10.3115/981623.981628 33–41 P89-1005 @@ -61,7 +61,7 @@ Reference to Locations Lewis G.Creary - J. MarkGawron + J. MarkGawron JohnNerbonne 10.3115/981623.981629 42–50 @@ -70,7 +70,7 @@ Getting at Discourse Referents - Rebecca J.Passonneau + Rebecca J.Passonneau 10.3115/981623.981630 51–59 P89-1007 @@ -94,7 +94,7 @@ Word Association Norms, Mutual Information, and Lexicography - Kenneth WardChurch + Kenneth WardChurch PatrickHanks 10.3115/981623.981633 76–83 @@ -103,7 +103,7 @@ Lexical Access in Connected Speech Recognition - TedBriscoe + TedBriscoe 10.3115/981623.981634 84–90 P89-1011 @@ -111,8 +111,8 @@ Dictionaries, Dictionary Grammars and Dictionary Entry Parsing - Mary S.Neff - Branimir K.Boguraev + Mary S.Neff + Branimir K.Boguraev 10.3115/981623.981635 91–101 P89-1012 @@ -120,7 +120,7 @@ Some Chart-Based Techniques for Parsing Ill-Formed Input - Chris S.Mellish + Chris S.Mellish 10.3115/981623.981636 102–109 P89-1013 @@ -128,9 +128,9 @@ On Representing Governed Prepositions and Handling “Incorrect” and Novel Prepositions - Hatte R.Blejer + Hatte R.Blejer SharonFlank - AndrewKehler + AndrewKehler 10.3115/981623.981637 110–117 P89-1014 @@ -138,7 +138,7 @@ Acquiring Disambiguation Rules From Text - DonaldHindle + DonaldHindle 10.3115/981623.981638 118–125 P89-1015 @@ -146,8 +146,8 @@ The Effects of Interaction on Spoken Discourse - Sharon L.Oviatt - Philip R.Cohen + Sharon L.Oviatt + Philip R.Cohen 10.3115/981623.981639 126–134 P89-1016 @@ -172,7 +172,7 @@ A Calculus for Semantic Composition and Scoping - Fernando C.N.Pereira + Fernando C.N.Pereira 10.3115/981623.981642 152–160 P89-1019 @@ -180,7 +180,7 @@ A General Computational Treatment Of The Comparative - CarolFriedman + CarolFriedman 10.3115/981623.981643 161–168 P89-1020 @@ -188,7 +188,7 @@ The Lexical Semantics of Comparative Expressions in a Multi-Level Semantic Processor - Duane E.Olawsky + Duane E.Olawsky 10.3115/981623.981644 169–176 P89-1021 @@ -197,7 +197,7 @@ Automatic Acquisition of the Lexical Semantics of Verbs From Sentence Frames MortWebster - MitchMarcus + MitchMarcus 10.3115/981623.981645 177–184 P89-1022 @@ -205,8 +205,8 @@ Computer Aided Interpretation of Lexical Cooccurrences - PaolaVelardi - Maria TeresaPazienza + PaolaVelardi + Maria TeresaPazienza 10.3115/981623.981646 185–192 P89-1023 @@ -214,7 +214,7 @@ A Hybrid Approach to Representation in the <fixed-case>J</fixed-case>anus Natural Language Processor - Ralph M.Weischedel + Ralph M.Weischedel 10.3115/981623.981647 193–202 P89-1024 @@ -222,8 +222,8 @@ Planning Text for Advisory Dialogues - Johanna D.Moore - Cecile L.Paris + Johanna D.Moore + Cecile L.Paris 10.3115/981623.981648 203–211 P89-1025 @@ -231,8 +231,8 @@ Two Constraints on Speech Act Ambiguity - Elizabeth A.Hinkelman - James F.Allen + Elizabeth A.Hinkelman + James F.Allen 10.3115/981623.981649 212–219 P89-1026 @@ -240,8 +240,8 @@ Treatment of Long Distance Dependencies in <fixed-case>LFG</fixed-case> and <fixed-case>TAG</fixed-case>: Functional Uncertainty in <fixed-case>LFG</fixed-case> Is a Corollary in <fixed-case>TAG</fixed-case> - Aravind K.Joshi - K.Vijay-Shanker + Aravind K.Joshi + K.Vijay-Shanker 10.3115/981623.981650 220–227 P89-1027 @@ -257,7 +257,7 @@ A Generalization of the Offline Parsable Grammars - AndrewHaas + AndrewHaas 10.3115/981623.981652 237–242 P89-1029 @@ -265,7 +265,7 @@ Discourse Entities in <fixed-case>J</fixed-case>anus - Damaris M.Ayuso + Damaris M.Ayuso 10.3115/981623.981653 243–250 P89-1030 @@ -273,7 +273,7 @@ Evaluating Discourse Processing Algorithms - Marilyn A.Walker + Marilyn A.Walker 10.3115/981623.981654 251–261 P89-1031 @@ -282,7 +282,7 @@ A Computational Mechanism for Pronominal Reference Robert J. P.Ingria - DavidStallard + DavidStallard 10.3115/981623.981655 262–271 P89-1032 @@ -301,7 +301,7 @@ ClaireGardent Gabriel G.Bias Pierre-FrangoisJurie - KarineBaschung + KarineBaschung 10.3115/981623.981657 280–287 P89-1034 diff --git a/data/xml/P90.xml b/data/xml/P90.xml index 82e95578ad..862b75215e 100644 --- a/data/xml/P90.xml +++ b/data/xml/P90.xml @@ -15,8 +15,8 @@ Polynomial Time Parsing of <fixed-case>C</fixed-case>ombinatory <fixed-case>C</fixed-case>ategorial <fixed-case>G</fixed-case>rammars - K.Vijay-Shanker - David J.Weir + K.Vijay-Shanker + David J.Weir 10.3115/981823.981824 1–8 P90-1001 @@ -24,7 +24,7 @@ Structure and Intonation in Spoken Language Understanding - MarkSteedman + MarkSteedman 10.3115/981823.981825 9–16 P90-1002 @@ -32,8 +32,8 @@ Prosody, Syntax and Parsing - JohnBear - PattiPrice + JohnBear + PattiPrice 10.3115/981823.981826 17–22 P90-1003 @@ -42,7 +42,7 @@ Empirical Study of Predictive Powers of Simple Attachment Schemes for Post-modifier Prepositional Phrases GregWhittemore - KathleenFerrara + KathleenFerrara HansBrunner 10.3115/981823.981827 23–30 @@ -67,9 +67,9 @@ Transforming Syntactic Graphs Into Semantic Graphs - Hae-ChangRim + Hae-ChangRim Robert F.Simmons - JungyunSeo + JungyunSeo 10.3115/981823.981830 47–53 P90-1007 @@ -85,7 +85,7 @@ Designer Definites in Logical Form - Mary P.Harper + Mary P.Harper 10.3115/981823.981832 62–69 P90-1009 @@ -93,8 +93,8 @@ Mixed Initiative in Dialogue: An Investigation into Discourse Segmentation - MarilynWalker - SteveWhittaker + MarilynWalker + SteveWhittaker 10.3115/981823.981833 70–78 P90-1010 @@ -102,7 +102,7 @@ Performatives in a Rationally Based Speech Act Theory - Philip R.Cohen + Philip R.Cohen Hector J.Levesque 10.3115/981823.981834 79–88 @@ -111,7 +111,7 @@ Normal State Implicature - Nancy L.Green + Nancy L.Green 10.3115/981823.981835 89–96 P90-1012 @@ -151,7 +151,7 @@ Solving Thematic Divergences in Machine Translation - BonnieDorr + BonnieDorr 10.3115/981823.981840 127–134 P90-1017 @@ -160,7 +160,7 @@ A Syntactic Filter on Pronominal Anaphora for Slot Grammar ShalomLappin - MichaelMcCord + MichaelMcCord 10.3115/981823.981841 135–142 P90-1018 @@ -225,9 +225,9 @@ Asymmetry in Parsing and Generating with Unification Grammars: Case Studies From <fixed-case>ELU</fixed-case> - GrahamRussell - JohnCarroll - SusanWarwick + GrahamRussell + JohnCarroll + SusanWarwick 10.3115/981823.981849 205–211 P90-1026 @@ -235,7 +235,7 @@ Automated Inversion of Logic Grammars for Generation - TomekStrzalkowski + TomekStrzalkowski PingPeng 10.3115/981823.981850 212–219 @@ -253,9 +253,9 @@ Multiple Underlying Systems: Translating User Requests into Programs to Produce Answers - Robert J.Bobrow + Robert J.Bobrow PhilipResnik - Ralph M.Weischedel + Ralph M.Weischedel 10.3115/981823.981852 227–234 P90-1029 @@ -263,7 +263,7 @@ Computational structure of generative phonology and its relation to language comprehension. - Eric SvenRistad + Eric SvenRistad 10.3115/981823.981853 235–242 P90-1030 @@ -271,7 +271,7 @@ Parsing the <fixed-case>LOB</fixed-case> Corpus - Carl G.de Marcken + Carl G.de Marcken 10.3115/981823.981854 243–251 P90-1031 @@ -279,8 +279,8 @@ Automatically Extracting and Representing Collocations for Language Generation - Frank A.Smadja - Kathleen R.McKeown + Frank A.Smadja + Kathleen R.McKeown 10.3115/981823.981855 252–259 P90-1032 @@ -288,7 +288,7 @@ Disamibiguating and Interpreting Verb Definitions - YaelRavin + YaelRavin 10.3115/981823.981856 260–267 P90-1033 @@ -296,7 +296,7 @@ Noun Classification From Predicate-Argument Structures - DonaldHindle + DonaldHindle 10.3115/981823.981857 268–275 P90-1034 @@ -305,7 +305,7 @@ Deterministic Left to Right Parsing of Tree Adjoining Languages YvesSchabes - K.Vijay-Shanker + K.Vijay-Shanker 10.3115/981823.981858 276–283 P90-1035 @@ -321,7 +321,7 @@ Lexical and Syntactic Rules in a <fixed-case>T</fixed-case>ree <fixed-case>A</fixed-case>djoining <fixed-case>G</fixed-case>rammar - AnneAbeille + AnneAbeille 10.3115/981823.981860 292–298 P90-1037 diff --git a/data/xml/P91.xml b/data/xml/P91.xml index 38ed3e3ce4..0f63d5f5f1 100644 --- a/data/xml/P91.xml +++ b/data/xml/P91.xml @@ -24,7 +24,7 @@ Inclusion, Disjointness and Choice: The Logic of Linguistic Classification BobCarpenter - CarlPollard + CarlPollard 10.3115/981344.981346 9–16 P91-1002 @@ -43,7 +43,7 @@ Toward a Plan-Based Understanding Model for Mixed-Initiative Dialogues HiroakiKitano - CarolVan Ess-Dykema + CarolVan Ess-Dykema 10.3115/981344.981348 25–32 P91-1004 @@ -51,7 +51,7 @@ An Algorithm for Plan Recognition in Collaborative Discourse - Karen E.Lochbaum + Karen E.Lochbaum 10.3115/981344.981349 33–38 P91-1005 @@ -59,7 +59,7 @@ A Three-Level Model for Plan Exploration - Lance A.Ramshaw + Lance A.Ramshaw 10.3115/981344.981350 39–46 P91-1006 @@ -68,7 +68,7 @@ A Tripartite Plan-Based Model of Dialogue LynnLambert - SandraCarberry + SandraCarberry 10.3115/981344.981351 47–54 P91-1007 @@ -77,7 +77,7 @@ Discourse Relations and Defeasible Knowledge AlexLascarides - NicholasAsher + NicholasAsher 10.3115/981344.981352 55–62 P91-1008 @@ -85,7 +85,7 @@ Some Facts About Centers, Indexicals, and Demonstratives - Rebecca J.Passonneau + Rebecca J.Passonneau 10.3115/981344.981353 63–70 P91-1009 @@ -93,7 +93,7 @@ Type-Raising and Directionality in Combinatory Grammar - MarkSteedman + MarkSteedman 10.3115/981344.981354 71–78 P91-1010 @@ -109,7 +109,7 @@ Compose-Reduce Parsing - Henry S.Thompson + Henry S.Thompson MikeDixon JohnLamping 10.3115/981344.981356 @@ -135,7 +135,7 @@ Head Corner Parsing for Discontinuous Constituency - Gertjanvan Noord + Gertjanvan Noord 10.3115/981344.981359 114–121 P91-1015 @@ -171,9 +171,9 @@ Subject-Dependent Co-Occurrence and Word Sense Disambiguation Joe A.Guthriee - LouiseGuthrie + LouiseGuthrie HomaAidinejad - YorickWilks + YorickWilks 10.3115/981344.981363 146–152 P91-1019 @@ -182,7 +182,7 @@ A System for Translating Locative Prepositions From <fixed-case>E</fixed-case>nglish Into <fixed-case>F</fixed-case>rench NathalieJapkowicz - Janyce M.Wiebe + Janyce M.Wiebe 10.3115/981344.981364 153–160 P91-1020 @@ -190,10 +190,10 @@ Translation by Quasi Logical Form Transfer - HiyanAlshawi - DavidCarter - MannyRayner - BjornGamback + HiyanAlshawi + DavidCarter + MannyRayner + BjornGamback 10.3115/981344.981365 161–168 P91-1021 @@ -201,9 +201,9 @@ Aligning Sentences in Parallel Corpora - Peter F.Brown - Jennifer C.Lai - Robert L.Mercer + Peter F.Brown + Jennifer C.Lai + Robert L.Mercer 10.3115/981344.981366 169–176 P91-1022 @@ -211,8 +211,8 @@ A Program for Aligning Sentences in Bilingual Corpora - William A.Gale - Kenneth W.Church + William A.Gale + Kenneth W.Church 10.3115/981344.981367 177–184 P91-1023 @@ -220,7 +220,7 @@ Experiments and Prospects of Example-Based Machine Translation - EiichiroSumita + EiichiroSumita HitoshiIida 10.3115/981344.981368 185–192 @@ -255,9 +255,9 @@ Multiple Default Inheritance in a Unification-Based Lexicon - GrahamRussell - JohnCarroll - SusanWarwick-Armstrong + GrahamRussell + JohnCarroll + SusanWarwick-Armstrong 10.3115/981344.981372 215–221 P91-1028 @@ -273,7 +273,7 @@ Structural Ambiguity and Lexical Relations - DonaldHindle + DonaldHindle MatsRooth 10.3115/981344.981374 229–236 @@ -290,7 +290,7 @@ Finite-State Approximation of Phrase Structure Grammars - Fernando C. N.Pereira + Fernando C. N.Pereira 10.3115/981344.981376 246–255 P91-1032 @@ -298,7 +298,7 @@ Feature Logic With Weak Subsumption Constraints - JochenDorre + JochenDorre 10.3115/981344.981377 256–263 P91-1033 @@ -306,10 +306,10 @@ Word-Sense Disambiguation Using Statistical Methods - Peter F.Brown - Stephen A.Della Pietra - Vincent J.Della Pietra - Robert L.Mercer + Peter F.Brown + Stephen A.Della Pietra + Vincent J.Della Pietra + Robert L.Mercer 10.3115/981344.981378 264–270 P91-1034 @@ -317,7 +317,7 @@ A Stochastic Process for Word Frequency Distributions - HaraldBaayen + HaraldBaayen 10.3115/981344.981379 271–278 P91-1035 @@ -325,7 +325,7 @@ From N-Grams to Collocations: An Evaluation of <fixed-case>X</fixed-case>tract - Frank A.Smadja + Frank A.Smadja 10.3115/981344.981380 279–284 P91-1036 @@ -333,7 +333,7 @@ Predicting Intonational Phrasing From Text - Michelle Q.Wang + Michelle Q.Wang 10.3115/981344.981381 285–292 P91-1037 @@ -342,8 +342,8 @@ A Preference-first Language Processor: Integrating the Unification Grammar and <fixed-case>M</fixed-case>arkov Language Model for Speech Recognition Applications Lee-FengChien - K. J.Chen - Lin-ShanLee + K. J.Chen + Lin-ShanLee 10.3115/981344.981382 293–298 P91-1038 @@ -351,7 +351,7 @@ Factorization of Language Constraints in Speech Recognition - RobertoPieraccini + RobertoPieraccini Chin-HuiLee 10.3115/981344.981383 299–306 @@ -376,7 +376,7 @@ Unification With Lazy Non-Redundant Copying - Martin C.Emele + Martin C.Emele 10.3115/981344.981386 323–330 P91-1042 @@ -441,7 +441,7 @@ Collaborating on Referring Expressions - Peter A.Heeman + Peter A.Heeman 10.3115/981344.981395 345–346 P91-1050 @@ -465,7 +465,7 @@ Resolving a Pragmatic Prepositional Phrase Attachment Ambiguity - Christine H.Nakatani + Christine H.Nakatani 10.3115/981344.981398 351–352 P91-1053 @@ -489,7 +489,7 @@ An Incremental Connectionist Phrase Structure Parser - JamesHenderson + JamesHenderson 10.3115/981344.981401 357–358 P91-1056 diff --git a/data/xml/P92.xml b/data/xml/P92.xml index 5e24ff35b5..f67111aae6 100644 --- a/data/xml/P92.xml +++ b/data/xml/P92.xml @@ -16,8 +16,8 @@ Inferring Discourse Relations in Context AlexLascarides - NicholasAsher - JonOberlander + NicholasAsher + JonOberlander 10.3115/981967.981968 1–8 P92-1001 @@ -42,7 +42,7 @@ The Representation of Multimodal User Interface Dialogues Using Discourse Pegs - SusannLuperfoy + SusannLuperfoy 10.3115/981967.981971 22–31 P92-1004 @@ -50,7 +50,7 @@ Monotonic Semantic Interpretation - HiyanAlshawi + HiyanAlshawi RichardCrouch 10.3115/981967.981972 32–39 @@ -59,7 +59,7 @@ Efficiency, Robustness and Accuracy in picky Chart Parsing - David M.Magerman + David M.Magerman CarlWeir 10.3115/981967.981973 40–47 @@ -68,8 +68,8 @@ A Functional Approach to Generation with <fixed-case>TAG</fixed-case> - Kathleen F.McCoy - K.Vijay-Shanker + Kathleen F.McCoy + K.Vijay-Shanker GijooYang 10.3115/981967.981974 48–55 @@ -78,9 +78,9 @@ Integrating Multiple Knowledge Sources for Detection and Correction of Repairs in Human-Computer Dialog - JohnBear - JohnDowding - ElizabethShriberg + JohnBear + JohnDowding + ElizabethShriberg 10.3115/981967.981975 56–63 P92-1008 @@ -88,8 +88,8 @@ Conversational Implicatures in Indirect Replies - NancyGreen - SandraCarberry + NancyGreen + SandraCarberry 10.3115/981967.981976 64–71 P92-1009 @@ -98,7 +98,7 @@ Reasoning with Descriptions of Trees JamesRogers - K.Vijay-Shanker + K.Vijay-Shanker 10.3115/981967.981977 72–80 P92-1010 @@ -106,8 +106,8 @@ Comparing Two Grammar-Based Generation Algorithms: A Case Study - MiroslavMartinovic - TomekStrzalkowski + MiroslavMartinovic + TomekStrzalkowski 10.3115/981967.981978 81–88 P92-1011 @@ -123,7 +123,7 @@ Accommodating Context Change - Bonnie LynnWebber + Bonnie LynnWebber BreckBaldwin 10.3115/981967.981980 96–103 @@ -132,7 +132,7 @@ Information Retrieval Using Robust Natural Language Processing - TomekStrzalkowski + TomekStrzalkowski BarbaraVauthey 10.3115/981967.981981 104–111 @@ -167,7 +167,7 @@ Linear Context-Free Rewriting Systems and Deterministic Tree-Walking Transducers - David J.Weir + David J.Weir 10.3115/981967.981985 136–143 P92-1018 @@ -175,7 +175,7 @@ A Connectionist Parser for Structure Unification Grammar - James B.Henderson + James B.Henderson 10.3115/981967.981986 144–151 P92-1019 @@ -184,7 +184,7 @@ Would <fixed-case>I</fixed-case> Lie to You? Modelling Misrepresentation and Context in Dialogue CarlGutwin - GordonMcCalla + GordonMcCalla 10.3115/981967.981987 152–158 P92-1020 @@ -192,7 +192,7 @@ Lattice-Based Word Identification in <fixed-case>CLARE</fixed-case> - David M.Carter + David M.Carter 10.3115/981967.981988 159–166 P92-1021 @@ -201,7 +201,7 @@ An Alternative Conception of Tree-Adjoining Derivation YvesSchabes - Stuart M.Shieber + Stuart M.Shieber 10.3115/981967.981989 167–176 P92-1022 @@ -219,9 +219,9 @@ Development and Evaluation of a Broad-Coverage Probabilistic Grammar of <fixed-case>E</fixed-case>nglish-Language Computer Manuals - EzraBlack - JohnLafferty - SalimRoukos + EzraBlack + JohnLafferty + SalimRoukos 10.3115/981967.981991 185–192 P92-1024 @@ -230,7 +230,7 @@ Modeling Negotiation Subdialogues LynnLambert - SandraCarberry + SandraCarberry 10.3115/981967.981992 193–200 P92-1025 @@ -248,7 +248,7 @@ A Unification-Based Semantic Interpretation for Coordinate Constructs - Jong C.Park + Jong C.Park 10.3115/981967.981994 209–215 P92-1027 @@ -256,7 +256,7 @@ Corpus-Based Acquisition of Relative Pronoun Disambiguation Heuristics - ClaireCardie + ClaireCardie 10.3115/981967.981995 216–223 P92-1028 @@ -266,7 +266,7 @@ Association-based Natural Language Processing with Neural Networks KimuraKazuhiro UzuokaTakashi - AmanoSin-ya + Sin-yaAmano 10.3115/981967.981996 224–231 P92-1029 @@ -275,7 +275,7 @@ Tense Trees as the “Fine Structure” of Discourse Chung HeeHwang - Lenhart K.Schubert + Lenhart K.Schubert 10.3115/981967.981997 232–240 P92-1030 @@ -291,8 +291,8 @@ Estimating Upper and Lower Bounds on the Performance of Word-Sense Disambiguation Programs - WilliamGale - Kenneth WardChurch + WilliamGale + Kenneth WardChurch DavidYarowsky 10.3115/981967.981999 249–256 @@ -301,7 +301,7 @@ A Parameterized Approach to Integrating Aspect With Lexical-Semantics for Machine Translation - Bonnie J.Dorr + Bonnie J.Dorr 10.3115/981967.982000 257–264 P92-1033 @@ -310,7 +310,7 @@ Using Classification to Generate Text EhudReiter - ChrisMellish + ChrisMellish 10.3115/981967.982001 265–272 P92-1034 @@ -398,7 +398,7 @@ Information States as First Class Citizens - JorgenVilladsen + JorgenVilladsen 10.3115/981967.982013 303–305 P92-1045 @@ -414,8 +414,8 @@ Metonymy: Reassessment, Survey of Acceptability, and Its Treatment in a Machine Translation System - Shin-ichiroKamei - TakahiroWakao + Shin-ichiroKamei + TakahiroWakao 10.3115/981967.982015 309–311 P92-1047 @@ -423,7 +423,7 @@ A Basis for a Formalization of Linguistic Style - Stephen J.Green + Stephen J.Green 10.3115/981967.982016 312–314 P92-1048 @@ -431,7 +431,7 @@ Elaboration in Object Descriptions Through Examples - Vibhu O.Mittal + Vibhu O.Mittal 10.3115/981967.982017 315–317 P92-1049 @@ -447,7 +447,7 @@ Generating a Specific Class of Metaphors - Mark AlanJones + Mark AlanJones 10.3115/981967.982019 321–323 P92-1051 diff --git a/data/xml/P93.xml b/data/xml/P93.xml index 0421d1769a..e0c4815796 100644 --- a/data/xml/P93.xml +++ b/data/xml/P93.xml @@ -15,7 +15,7 @@ Char_align: A Program for Aligning Parallel Texts at the Character Level - Kenneth WardChurch + Kenneth WardChurch 10.3115/981574.981575 1–8 P93-1001 @@ -23,7 +23,7 @@ Aligning Sentences in Bilingual Corpora Using Lexical Information - Stanley F.Chen + Stanley F.Chen 10.3115/981574.981576 9–16 P93-1002 @@ -39,7 +39,7 @@ Structural Matching of Parallel Texts - YujiMatsumoto + YujiMatsumoto TakehitoUtsuro HiroyukiIshimoto 10.3115/981574.981578 @@ -49,12 +49,12 @@ Towards History-based Grammars: Using Richer Models for Probabilistic Parsing - EzraBlack - FredJelinek - JohnLafrerty - David M.Magerman + EzraBlack + FredJelinek + JohnLafrerty + David M.Magerman RobertMercer - SalimRoukos + SalimRoukos 10.3115/981574.981579 31–37 P93-1005 @@ -63,7 +63,7 @@ Using Bracketed Parses to Evaluate a Grammar Checking Application Richard H.Wojcik - PhilipHarrison + PhilipHarrison JohnBremer 10.3115/981574.981580 38–45 @@ -72,8 +72,8 @@ A Speech-First Model for Repair Detection and Correction - ChristineNakatani - JuliaHirschberg + ChristineNakatani + JuliaHirschberg 10.3115/981574.981581 46–53 P93-1007 @@ -81,13 +81,13 @@ <fixed-case>GEMINI</fixed-case>: A Natural Language System for Spoken-Language Understanding - JohnDowding - Jean MarkGawron - DougAppelt - JohnBear + JohnDowding + Jean MarkGawron + DougAppelt + JohnBear LynnCherny - RobertMoore - DouglasMoran + RobertMoore + DouglasMoran 10.3115/981574.981582 54–61 P93-1008 @@ -95,7 +95,7 @@ The Effect of Establishing Coherence in Ellipsis and Anaphora Resolution - AndrewKehler + AndrewKehler 10.3115/981574.981583 62–69 P93-1009 @@ -104,8 +104,8 @@ Temporal Centering MegumiKameyama - RebeccaPassonneau - MassimoPoesio + RebeccaPassonneau + MassimoPoesio 10.3115/981574.981584 70–77 P93-1010 @@ -113,7 +113,7 @@ Assigning a Semantic Scope to Operators - MassimoPoesio + MassimoPoesio 10.3115/981574.981585 78–86 P93-1011 @@ -121,7 +121,7 @@ Two Kinds of Metonymy - DavidStallard + DavidStallard 10.3115/981574.981586 87–94 P93-1012 @@ -129,7 +129,7 @@ Planning Multimodal Discourse - WolfgangWahlster + WolfgangWahlster 10.3115/981574.981587 95–96 P93-1013 @@ -137,9 +137,9 @@ A Unification-Based Parser for Relational Grammar - David E.Johnson - AdamMeyers - Lawrence S.Moss + David E.Johnson + AdamMeyers + Lawrence S.Moss 10.3115/981574.981588 97–104 P93-1014 @@ -147,7 +147,7 @@ Parsing Free Word Order Languages in the <fixed-case>P</fixed-case>aninian Framework - AksharBharati + AksharBharati RajeevSangal 10.3115/981574.981589 105–111 @@ -185,7 +185,7 @@ Feature-Based Allomorphy - Hans-UlrichKrieger + Hans-UlrichKrieger HannesPirker 10.3115/981574.981593 140–147 @@ -194,8 +194,8 @@ Intention-Based Segmentation: Human Reliability and Correlation With Linguistic Cues - Rebecca J.Passonneau - Diane J.Litman + Rebecca J.Passonneau + Diane J.Litman 10.3115/981574.981594 148–155 P93-1020 @@ -204,7 +204,7 @@ A Language-Independent Anaphora Resolution System for Understanding Multilingual Texts ChinatsuAone - DouglasMcKee + DouglasMcKee 10.3115/981574.981595 156–163 P93-1021 @@ -223,7 +223,7 @@ Towards the Automatic Identification of Adjectival Scales: Clustering Adjectives According to Meaning VasileiosHatzivassiloglou - Kathleen R.McKeown + Kathleen R.McKeown 10.3115/981574.981597 172–182 P93-1023 @@ -283,7 +283,7 @@ Quantificational Domains and Recursive Contexts - BarbaraPartee + BarbaraPartee 10.3115/981574.981604 224–225 P93-1030 @@ -291,7 +291,7 @@ Tailoring Lexical Choice to the User’s Vocabulary in Multimedia Explanation Generation - KathleenMcKeown + KathleenMcKeown JacquesRobin MichaelTanenblatt 10.3115/981574.981605 @@ -301,7 +301,7 @@ Automatic Acquisition of a Large Sub Categorization Dictionary From Corpora - Christopher D.Manning + Christopher D.Manning 10.3115/981574.981606 235–242 P93-1032 @@ -310,7 +310,7 @@ An Empirical Study on Thematic Knowledge Acquisition Based on Syntactic Clues and Heuristics Rey-LongLiu - Von-wunSoo + Von-wunSoo 10.3115/981574.981607 243–250 P93-1033 @@ -318,7 +318,7 @@ Part-of-Speech Induction From Scratch - HinrichSchütze + HinrichSchütze 10.3115/981574.981608 251–258 P93-1034 @@ -360,7 +360,7 @@ Responding to User Queries in a Collaborative Environment - JenniferChu + JenniferChu 10.3115/981574.981614 280–282 P93-1039 @@ -368,7 +368,7 @@ The Imperfective Paradox and Trajectory-of-Motion Events - MichaelWhite + MichaelWhite 10.3115/981574.981615 283–285 P93-1040 @@ -416,7 +416,7 @@ Integrating Word Boundary Identification With Sentence Understanding - Kok WeeGan + Kok WeeGan 10.3115/981574.981621 301–303 P93-1046 diff --git a/data/xml/P94.xml b/data/xml/P94.xml index de23e0b009..2c6a758090 100644 --- a/data/xml/P94.xml +++ b/data/xml/P94.xml @@ -15,8 +15,8 @@ Discourse Obligations in Dialogue Processing - David R.Traum - James F.Allen + David R.Traum + James F.Allen 10.3115/981732.981733 1–8 P94-1001 @@ -24,7 +24,7 @@ Multi-Paragraph Segmentation Expository Text - Marti A.Hearst + Marti A.Hearst 10.3115/981732.981734 9–16 P94-1002 @@ -44,9 +44,9 @@ Hidden Understanding Models of Natural Language ScottMiller - RobertBobrow - RobertIngria - RichardSchwartz + RobertBobrow + RobertIngria + RichardSchwartz 10.3115/981732.981736 25–32 P94-1004 @@ -54,7 +54,7 @@ From Strings to Trees to Strings to Trees ... (Abstract) - Aravind K.Joshi + Aravind K.Joshi 10.3115/981732.981737 33–33 P94-1005 @@ -62,7 +62,7 @@ Intentions and Information in Discourse - NicholasAsher + NicholasAsher AlexLascarides 10.3115/981732.981738 34–41 @@ -79,7 +79,7 @@ Common Topics and Coherent Situations: Interpreting Ellipsis in the Context of Discourse Inference - AndrewKehler + AndrewKehler 10.3115/981732.981740 50–57 P94-1008 @@ -87,8 +87,8 @@ A Hybrid Reasoning Model for Indirect Answers - NancyGreen - SandraCarberry + NancyGreen + SandraCarberry 10.3115/981732.981741 58–65 P94-1009 @@ -96,9 +96,9 @@ A Stochastic Finite-State Word-Segmentation Algorithm for <fixed-case>C</fixed-case>hinese - RichardSproat + RichardSproat ChilinShih - WilliamGale + WilliamGale NancyChang 10.3115/981732.981742 66–73 @@ -107,7 +107,7 @@ Precise N-Gram Probabilities From Stochastic Context-Free Grammars - AndreasStolcke + AndreasStolcke JonathanSegal 10.3115/981732.981743 74–79 @@ -150,10 +150,10 @@ Interleaving Syntax and Semantics in an Efficient Bottom-Up Parser - JohnDowding - RobertMoore + JohnDowding + RobertMoore FrancoisAndry - DouglasMoran + DouglasMoran 10.3115/981732.981748 110–116 P94-1016 @@ -178,7 +178,7 @@ Verb Semantics and Lexical Selection ZhibiaoWu - MarthaPalmer + MarthaPalmer 10.3115/981732.981751 133–138 P94-1019 @@ -186,8 +186,8 @@ Word-Sense Disambiguation Using Decomposable Models - RebeccaBruce - JanyceWiebe + RebeccaBruce + JanyceWiebe 10.3115/981732.981752 139–146 P94-1020 @@ -196,7 +196,7 @@ Constraint-Based Categorial Grammar GosseBouma - Gertjanvan Noord + Gertjanvan Noord 10.3115/981732.981753 147–154 P94-1021 @@ -212,7 +212,7 @@ On Determining the Consistency of Partial Descriptions of Trees - Thomas L.Cornell + Thomas L.Cornell 10.3115/981732.981755 163–170 P94-1023 @@ -221,7 +221,7 @@ A <fixed-case>M</fixed-case>arkov Language Learning Model for Finite Parameter Spaces ParthaNiyogi - Robert C.Berwick + Robert C.Berwick 10.3115/981732.981756 171–180 P94-1024 @@ -229,7 +229,7 @@ Part-of-Speech Tagging Using a Variable Memory <fixed-case>M</fixed-case>arkov Model - HinrichSchuetze + HinrichSchuetze YoramSinger 10.3115/981732.981757 181–187 @@ -287,7 +287,7 @@ Extracting Noun Phrases from Large-Scale Texts: A Hybrid Approach and Its Automatic Evaluation - Kuang-huaChen + Kuang-huaChen Hsin-HsiChen 10.3115/981732.981764 234–241 @@ -324,7 +324,7 @@ Multiset-Valued Linear Index Grammars: Imposing Dominance Constraints on Derivations - OwenRambow + OwenRambow 10.3115/981732.981768 263–270 P94-1036 @@ -359,7 +359,7 @@ Relating Complexity to Practical Performance in Parsing With Wide-Coverage Unification Grammars - JohnCarroll + JohnCarroll 10.3115/981732.981772 287–294 P94-1040 @@ -367,8 +367,8 @@ Detecting and Correcting Speech Repairs - PeterHeeman - JamesAllen + PeterHeeman + JamesAllen 10.3115/981732.981773 295–302 P94-1041 @@ -377,7 +377,7 @@ A Computational View of the Cognitive Semantics of Spatial Prepositions PatrickOlivier - Jun-ichiTsujii + Jun-ichiTsujii 10.3115/981732.981774 303–309 P94-1042 @@ -385,7 +385,7 @@ Reaping the Benefits of Interactive Syntax and Semantics - KaviMahesh + KaviMahesh 10.3115/981732.981776 310–312 P94-1043 @@ -401,7 +401,7 @@ An Integrated Heuristic Scheme for Partial Parse Evaluation - AlonLavie + AlonLavie 10.3115/981732.981778 316–318 P94-1045 @@ -409,7 +409,7 @@ Temporal Relations: Reference or Discourse Coherence? - AndrewKehler + AndrewKehler 10.3115/981732.981779 319–321 P94-1046 @@ -449,9 +449,9 @@ Automatic Alignment in Parallel Corpora - HarrisPapageorgiou - LambrosCranias - SteliosPiperidis + HarrisPapageorgiou + LambrosCranias + SteliosPiperidis 10.3115/981732.981784 334–336 P94-1051 diff --git a/data/xml/P95.xml b/data/xml/P95.xml index 7c81015203..cad76d2aff 100644 --- a/data/xml/P95.xml +++ b/data/xml/P95.xml @@ -16,8 +16,8 @@ Learning Phonological Rule Probabilities from Speech Corpora with Exploratory Computational Phonology GaryTajchman - DanielJurafsky - EricFosler + DanielJurafsky + EricFosler 10.3115/981658.981659 1–8 P95-1001 @@ -26,7 +26,7 @@ Automatic Induction of Finite State Transducers for Simple Phonological Rules DanielGildea - DanielJurafsky + DanielJurafsky 10.3115/981658.981660 9–15 P95-1002 @@ -43,7 +43,7 @@ A Morphographemic Model for Error Correction in Nonconcatenative Strings TanyaBowden - George AntonKiraz + George AntonKiraz 10.3115/981658.981662 24–30 P95-1004 @@ -51,10 +51,10 @@ Discourse Processing of Dialogues with Multiple Threads - Carolyn PensteinRosé + Carolyn PensteinRosé BarbaraDi Eugenio - Lori S.Levin - CarolVan Ess-Dykema + Lori S.Levin + CarolVan Ess-Dykema 10.3115/981658.981663 31–38 P95-1005 @@ -86,7 +86,7 @@ User-Defined Nonmonotonicity in Unification-Based Formalisms - LenaStromback + LenaStromback 10.3115/981658.981667 63–69 P95-1009 @@ -94,7 +94,7 @@ Features and Agreement - SamBayer + SamBayer MarkJohnson 10.3115/981658.981668 70–76 @@ -103,9 +103,9 @@ Encoding <fixed-case>L</fixed-case>exicalized <fixed-case>T</fixed-case>ree <fixed-case>A</fixed-case>djoining <fixed-case>G</fixed-case>rammars with a Nonmonotonic Inheritance Hierarchy - RogerEvans + RogerEvans GeraldGazdar - DavidWeir + DavidWeir 10.3115/981658.981669 77–84 P95-1011 @@ -113,8 +113,8 @@ Compiling <fixed-case>HPSG</fixed-case> type constraints into definite clause programs - ThiloGotz - Walt DetmarMeurers + ThiloGotz + Walt DetmarMeurers 10.3115/981658.981670 85–91 P95-1012 @@ -122,10 +122,10 @@ Compilation of <fixed-case>HPSG</fixed-case> to <fixed-case>TAG</fixed-case> - RobertKasper - BerndKiefer + RobertKasper + BerndKiefer KlausNetter - K.Vijay-Shanker + K.Vijay-Shanker 10.3115/981658.981671 92–99 P95-1013 @@ -134,7 +134,7 @@ Memoization of Coroutined Constraints MarkJohnson - JochenDorre + JochenDorre 10.3115/981658.981672 100–107 P95-1014 @@ -142,8 +142,8 @@ Combining Multiple Knowledge Sources for Discourse Segmentation - Diane J.Litman - Rebecca J.Passonneau + Diane J.Litman + Rebecca J.Passonneau 10.3115/981658.981673 108–115 P95-1015 @@ -152,7 +152,7 @@ Utilizing Statistical Dialogue Act Processing in Verbrnobil NorbertReithinger - ElisabethMaier + ElisabethMaier 10.3115/981658.981674 116–121 P95-1016 @@ -170,7 +170,7 @@ Investigating Cue Selection and Placement in Tutorial Discourse MeganMoser - Johanna D.Moore + Johanna D.Moore 10.3115/981658.981676 130–135 P95-1018 @@ -178,8 +178,8 @@ Response Generation in Collaborative Negotiation - JenniferChu-Carroll - SandraCarberry + JenniferChu-Carroll + SandraCarberry 10.3115/981658.981677 136–143 P95-1019 @@ -196,9 +196,9 @@ <fixed-case>D</fixed-case>-Tree Grammars - OwenRambow - K.Vijay-Shanker - DavidWeir + OwenRambow + K.Vijay-Shanker + DavidWeir 10.3115/981658.981679 151–158 P95-1021 @@ -206,7 +206,7 @@ The intersection of Finite State Automata and Definite Clause Grammars - Gertjanvan Noord + Gertjanvan Noord 10.3115/981658.981680 159–165 P95-1022 @@ -224,7 +224,7 @@ Extraposition via Complex Domain Formation AndreasKathol - CarlPollard + CarlPollard 10.3115/981658.981682 174–180 P95-1024 @@ -250,7 +250,7 @@ A Quantitative Evaluation of Linguistic Tests for the Automatic Prediction of Semantic Markedness VasileiosHatzivassiloglou - KathleenMcKeown + KathleenMcKeown 10.3115/981658.981685 197–204 P95-1027 @@ -258,7 +258,7 @@ Quantifier Scope and Constituency - Jong C.Park + Jong C.Park 10.3115/981658.981686 205–212 P95-1028 @@ -274,7 +274,7 @@ New Techniques for Context Modeling - Eric SvenRistad + Eric SvenRistad Robert G.Thomas 10.3115/981658.981688 220–227 @@ -283,7 +283,7 @@ <fixed-case>B</fixed-case>ayesian Grammar Induction for Language Modeling - Stanley F.Chen + Stanley F.Chen 10.3115/981658.981689 228–235 P95-1031 @@ -318,7 +318,7 @@ An Efficient Generation Algorithm for Lexicalist <fixed-case>MT</fixed-case> VictorPoznanski John L.Beaven - PeteWhitelock + PeteWhitelock 10.3115/981658.981693 261–267 P95-1035 @@ -326,8 +326,8 @@ Some Novel Applications of Explanation-Based Learning to Parsing <fixed-case>L</fixed-case>exicalized <fixed-case>T</fixed-case>ree-<fixed-case>A</fixed-case>djoining <fixed-case>G</fixed-case>rammars - B.Srinivas - Aravind K.Joshi + B.Srinivas + Aravind K.Joshi 10.3115/981658.981694 268–275 P95-1036 @@ -335,7 +335,7 @@ Statistical Decision-Tree Models for Parsing - David M.Magerman + David M.Magerman 10.3115/981658.981695 276–283 P95-1037 @@ -407,7 +407,7 @@ Knowledge-based Automatic Topic Identification - Chin-YewLin + Chin-YewLin 10.3115/981658.981705 308–310 P95-1046 @@ -415,7 +415,7 @@ Acquiring a Lexicon from Unsegmented Speech - Carlde Marcken + Carlde Marcken 10.3115/981658.981706 311–313 P95-1047 @@ -432,7 +432,7 @@ Mapping Scrambled <fixed-case>K</fixed-case>orean Sentences into <fixed-case>E</fixed-case>nglish Using Synchronous <fixed-case>TAG</fixed-case>s - Hyun S.Park + Hyun S.Park 10.3115/981658.981708 317–319 P95-1049 diff --git a/data/xml/P96.xml b/data/xml/P96.xml index a145fe2373..c504f9e6c6 100644 --- a/data/xml/P96.xml +++ b/data/xml/P96.xml @@ -32,8 +32,8 @@ Noun Phrase Analysis in Large Unrestricted Text for Information Retrieval - David A.Evans - ChengxiangZhai + David A.Evans + ChengxiangZhai 10.3115/981863.981866 17–24 P96-1003 @@ -50,9 +50,9 @@ From Submit to Submitted via Submission: On Lexical Rules in Large-Scale Lexicon Acquisition EvelyneViegas - BoyanOnyshkevych + BoyanOnyshkevych VictorRaskin - SergeiNirenburg + SergeiNirenburg 10.3115/981863.981868 32–39 P96-1005 @@ -78,9 +78,9 @@ A Fully Statistical Approach to Natural Language Interfaces ScottMiller - DavidStallard - RobertBobrow - RichardSchwartz + DavidStallard + RobertBobrow + RichardSchwartz 10.3115/981863.981871 55–61 P96-1008 @@ -88,9 +88,9 @@ A Robust System for Natural Spoken Dialogue - James F.Allen + James F.Allen Bradford W.Miller - Eric K.Ringger + Eric K.Ringger TeresaSikorski 10.3115/981863.981872 62–70 @@ -108,7 +108,7 @@ Efficient Normal-Form Parsing for <fixed-case>C</fixed-case>ombinatory <fixed-case>C</fixed-case>ategorial <fixed-case>G</fixed-case>rammar - JasonEisner + JasonEisner 10.3115/981863.981874 79–86 P96-1011 @@ -124,7 +124,7 @@ Parsing with Semidirectional <fixed-case>L</fixed-case>ambek Grammar is <fixed-case>NP</fixed-case>-Complete - JochenDörre + JochenDörre 10.3115/981863.981876 95–100 P96-1013 @@ -148,7 +148,7 @@ Synchronous Models of Language - OwenRambow + OwenRambow GiorgioSatta 10.3115/981863.981879 116–123 @@ -158,7 +158,7 @@ Coordination as a Direct Process AugustaMela - ChristopheFouqueré + ChristopheFouqueré 10.3115/981863.981880 124–130 P96-1017 @@ -175,8 +175,8 @@ An Iterative Algorithm to Build <fixed-case>C</fixed-case>hinese Language Models - XiaoqiangLuo - SalimRoukos + XiaoqiangLuo + SalimRoukos 10.3115/981863.981882 139–143 P96-1019 @@ -200,7 +200,7 @@ <fixed-case>SEMHE</fixed-case>: A Generalised Two-level System - George AntonKiraz + George AntonKiraz 10.3115/981863.981885 159–166 P96-1022 @@ -208,7 +208,7 @@ Head Automata and Bilingual Tiling: Translation with Minimal Representations (Invited Talk) - HiyanAlshawi + HiyanAlshawi 10.3115/981863.981886 167–176 P96-1023 @@ -216,7 +216,7 @@ Parsing Algorithms and Metrics - JoshuaGoodman + JoshuaGoodman 10.3115/981863.981887 177–183 P96-1024 @@ -224,7 +224,7 @@ A New Statistical Parser Based on Bigram Lexical Dependencies - Michael JohnCollins + Michael JohnCollins 10.3115/981863.981888 184–191 P96-1025 @@ -232,8 +232,8 @@ Two Sources of Control Over the Generation of Software Instructions - AnthonyHartley - CecileParis + AnthonyHartley + CecileParis 10.3115/981863.981889 192–199 P96-1026 @@ -257,8 +257,8 @@ Compilation of Weighted Finite-State Transducers from Decision Trees - RichardSproat - MichaelRiley + RichardSproat + MichaelRiley 10.3115/981863.981892 215–222 P96-1029 @@ -266,8 +266,8 @@ Fast Parsing Using Pruning and Grammar Specialization - MannyRayner - DavidCarter + MannyRayner + DavidCarter 10.3115/981863.981893 223–230 P96-1030 @@ -276,7 +276,7 @@ An Efficient Compiler for Weighted Rewrite Rules MehryarMohri - RichardSproat + RichardSproat 10.3115/981863.981894 231–238 P96-1031 @@ -310,7 +310,7 @@ Resolving Anaphors in Embedded Sentences - SalihaAzzam + SalihaAzzam 10.3115/981863.981898 263–268 P96-1035 @@ -327,7 +327,7 @@ Mechanisms for Mixed-Initiative Human-Computer Collaborative Discourse - Curry I.Guinn + Curry I.Guinn 10.3115/981863.981900 278–285 P96-1037 @@ -335,8 +335,8 @@ A Prosodic Analysis of Discourse Segments in Direction-Giving Monologues - JuliaHirschberg - Christine H.Nakatani + JuliaHirschberg + Christine H.Nakatani 10.3115/981863.981901 286–293 P96-1038 @@ -360,8 +360,8 @@ An Empirical Study of Smoothing Techniques for Language Modeling - Stanley F.Chen - JoshuaGoodman + Stanley F.Chen + JoshuaGoodman 10.3115/981863.981904 310–318 P96-1041 @@ -386,7 +386,7 @@ Linguistic Structure as Composition and Perturbation - Carlde Marcken + Carlde Marcken 10.3115/981863.981907 335–341 P96-1044 @@ -394,7 +394,7 @@ Generating an <fixed-case>LTAG</fixed-case> out of a Principle-based Hierarchical Representation - Marie-HeleneCandito + Marie-HeleneCandito 10.3115/981863.981909 342–344 P96-1045 @@ -402,7 +402,7 @@ Using Parsed Corpora for Structural Disambiguation in the <fixed-case>TRAINS</fixed-case> Domain - MarkCore + MarkCore 10.3115/981863.981910 345–347 P96-1046 @@ -418,7 +418,7 @@ Using Textual Clues to Improve Metaphor Processing - StephaneFerrari + StephaneFerrari 10.3115/981863.981912 351–353 P96-1048 @@ -426,7 +426,7 @@ On Reversing the Generation Process in <fixed-case>O</fixed-case>ptimality <fixed-case>T</fixed-case>heory - J. EricFosler + J. EricFosler 10.3115/981863.981913 354–356 P96-1049 @@ -442,7 +442,7 @@ An Application of <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et to Prepositional Attachment - Sanda M.Harabagiu + Sanda M.Harabagiu 10.3115/981863.981915 360–362 P96-1051 @@ -458,7 +458,7 @@ Using Terminological Knowledge Representation Languages to Manage Linguistic Resources - Pamela W.Jordan + Pamela W.Jordan 10.3115/981863.981917 366–368 P96-1053 diff --git a/data/xml/P97.xml b/data/xml/P97.xml index e2dce1ae6e..107f6c9c5f 100644 --- a/data/xml/P97.xml +++ b/data/xml/P97.xml @@ -16,8 +16,8 @@ Interleaving Universal Principles and Relational Constraints over Typed Feature Logic - ThiloGotz - DetmarMeurers + ThiloGotz + DetmarMeurers 10.3115/976909.979618 1–8 P97-1001 @@ -34,7 +34,7 @@ Three Generative, Lexicalised Models for Statistical Parsing ACL 2022 25-Year Test of Time - MichaelCollins + MichaelCollins 10.3115/976909.979620 16–23 P97-1003 @@ -43,8 +43,8 @@ Expansion of Multi-Word Terms for Indexing and Retrieval Using Morphology and Syntax ChristianJacquemin - Judith L.Klavans - EvelyneTzoukermann + Judith L.Klavans + EvelyneTzoukermann 10.3115/976909.979621 24–31 P97-1004 @@ -54,7 +54,7 @@ Automatic Detection of Text Genre BrettKessler GeoffreyNunberg - HinrichSchutze + HinrichSchutze 10.3115/976909.979622 32–38 P97-1005 @@ -71,9 +71,9 @@ Combining Unsupervised Lexical Knowledge Methods for Word Sense Disambiguation - GermanRigau - JordiAtserias - EnekoAgirre + GermanRigau + JordiAtserias + EnekoAgirre 10.3115/976909.979624 48–55 P97-1007 @@ -108,7 +108,7 @@ Learning Features that Predict Cue Usage BarbaraDi Eugenio - Johanna D.Moore + Johanna D.Moore MassimoPaolucci 10.3115/976909.979628 80–87 @@ -118,7 +118,7 @@ Expectations in Incremental Discourse Processing DanCristea - BonnieWebber + BonnieWebber 10.3115/976909.979629 88–95 P97-1012 @@ -152,8 +152,8 @@ Ambiguity Resolution for Machine Translation of Telegraphic Messages Young-SukLee - CliffordWeinstein - StephanieSeneff + CliffordWeinstein + StephanieSeneff DineshTummala 10.3115/976909.979633 120–127 @@ -189,8 +189,8 @@ Deriving Verbal and Compositonal Lexical Aspect for <fixed-case>NLP</fixed-case> Applications - Bonnie J.Dorr - Mari BromanOlsen + Bonnie J.Dorr + Mari BromanOlsen 10.3115/976909.979637 151–158 P97-1020 @@ -200,7 +200,7 @@ A <fixed-case>DOP</fixed-case> Model for Semantic Interpretation RemkoBonnema RensBod - RemkoScha + RemkoScha 10.3115/976909.979638 159–167 P97-1021 @@ -208,9 +208,9 @@ Fertility Models for Statistical Natural Language Understanding - StephenDella Pietra - MarkEpstein - SalimRoukos + StephenDella Pietra + MarkEpstein + SalimRoukos ToddWard 10.3115/976909.979639 168–173 @@ -220,7 +220,7 @@ Predicting the Semantic Orientation of Adjectives VasileiosHatzivassiloglou - Kathleen R.McKeown + Kathleen R.McKeown 10.3115/976909.979640 174–181 P97-1023 @@ -228,7 +228,7 @@ Independence Assumptions Considered Harmful - AlexanderFranz + AlexanderFranz 10.3115/976909.979641 182–189 P97-1024 @@ -245,7 +245,7 @@ Sentence Planning as Description Using <fixed-case>T</fixed-case>ree <fixed-case>A</fixed-case>djoining <fixed-case>G</fixed-case>rammar MatthewStone - ChristineDoran + ChristineDoran 10.3115/976909.979643 198–205 P97-1026 @@ -261,7 +261,7 @@ Applying Explanation-based Learning to Control and Speeding-up Natural Language Generation - GunterNeumann + GunterNeumann 10.3115/976909.979645 214–221 P97-1028 @@ -270,7 +270,7 @@ Morphological Disambiguation by Voting Constraints KemalOflazer - GokhanTur + GokhanTur 10.3115/976909.979646 222–229 P97-1029 @@ -279,7 +279,7 @@ Mistake-Driven Mixture of Hierarchical Tag Context Trees MasahikoHaruno - YujiMatsumoto + YujiMatsumoto 10.3115/976909.979647 230–237 P97-1030 @@ -287,8 +287,8 @@ A Flexible <fixed-case>POS</fixed-case> Tagger Using an Automatically Acquired Language Model - LluisMarquez - LluisPadro + LluisMarquez + LluisPadro 10.3115/976909.979648 238–245 P97-1031 @@ -305,8 +305,8 @@ Intonational Boundaries, Speech Repairs, and Discourse Markers: Modeling Spoken Dialog - Peter A.Heeman - James F.Allen + Peter A.Heeman + James F.Allen 10.3115/976909.979650 254–261 P97-1033 @@ -314,8 +314,8 @@ Tracking Initiative in Collaborative Dialogue Interactions - JenniferChu-Carroll - Michael K.Brown + JenniferChu-Carroll + Michael K.Brown 10.3115/976909.979651 262–270 P97-1034 @@ -323,9 +323,9 @@ <fixed-case>PARADISE</fixed-case>: A Framework for Evaluating Spoken Dialogue Agents - Marilyn A.Walker - Diane J.Litman - Candace A.Kamm + Marilyn A.Walker + Diane J.Litman + Candace A.Kamm AliciaAbella 10.3115/976909.979652 271–280 @@ -334,10 +334,10 @@ Unification-based Multimodal Integration - MichaelJohnston - Philip R.Cohen - DavidMcGee - Sharon L.Oviatt + MichaelJohnston + Philip R.Cohen + DavidMcGee + Sharon L.Oviatt James A.Pittman IraSmith 10.3115/976909.979653 @@ -347,9 +347,9 @@ A <fixed-case>DP</fixed-case>-based Search Using Monotone Alignments in Statistical Translation - ChristophTillmann - StephanVogel - HermannNey + ChristophTillmann + StephanVogel + HermannNey AlexZubiaga 10.3115/976909.979654 289–296 @@ -358,7 +358,7 @@ An Alignment Method for Noisy Parallel Corpora based on Image Processing Techniques - Jason S.Chang + Jason S.Chang Mathis H.Chen 10.3115/976909.979655 297–304 @@ -375,7 +375,7 @@ Efficient Generation in Primitive <fixed-case>O</fixed-case>ptimality <fixed-case>T</fixed-case>heory - JasonEisner + JasonEisner 10.3115/976909.979657 313–320 P97-1040 @@ -383,7 +383,7 @@ A Trainable Rule-Based Algorithm for Word Segmentation - David D.Palmer + David D.Palmer 10.3115/976909.979658 321–328 P97-1041 @@ -391,7 +391,7 @@ Compiling Regular Formalisms with Rule Features into Finite-State Automata - George AntonKiraz + George AntonKiraz 10.3115/976909.979659 329–336 P97-1042 @@ -417,7 +417,7 @@ Automatic Extraction of Aspectual Information from a Monolingual Corpus AkiraOishi - YujiMatsumoto + YujiMatsumoto 10.3115/976909.979662 352–359 P97-1045 @@ -425,7 +425,7 @@ A Comparison of Head Transducers and Transfer for a Limited Domain Translation Application - HiyanAlshawi + HiyanAlshawi Adam L.Buchsbaum FeiXia 10.3115/976909.979663 @@ -436,7 +436,7 @@ Decoding Algorithm in Statistical Machine Translation Ye-YiWang - AlexWaibel + AlexWaibel 10.3115/976909.979664 366–372 P97-1047 @@ -445,8 +445,8 @@ A Model of Lexical Attraction and Repulsion DougBeeferman - AdamBerger - JohnLafferty + AdamBerger + JohnLafferty 10.3115/976909.979665 373–380 P97-1048 @@ -454,7 +454,7 @@ Hierarchical Non-Emitting <fixed-case>M</fixed-case>arkov Models - Eric SvenRistad + Eric SvenRistad Robert G.Thomas 10.3115/976909.979666 381–385 @@ -463,7 +463,7 @@ Efficient Construction of Underspecified Semantics under Massive Ambiguity - JochenDörre + JochenDörre 10.3115/976909.979667 386–393 P97-1050 @@ -471,8 +471,8 @@ A Theory of Parallelism and the Case of <fixed-case>VP</fixed-case> Ellipsis - Jerry R.Hobbs - AndrewKehler + Jerry R.Hobbs + AndrewKehler 10.3115/976909.979668 394–401 P97-1051 @@ -480,7 +480,7 @@ On Interpreting <fixed-case>F</fixed-case>-Structures as <fixed-case>UDRS</fixed-case>s - Josefvan Genabith + Josefvan Genabith RichardCrouch 10.3115/976909.979669 402–409 @@ -499,7 +499,7 @@ Co-Evolution of Language and of the Language Acquisition Device - TedBriscoe + TedBriscoe 10.3115/976909.979671 418–427 P97-1054 @@ -516,7 +516,7 @@ Memory-Based Learning: Using Similarity for Smoothing JakubZavrel - WalterDaelemans + WalterDaelemans 10.3115/976909.979673 436–443 P97-1056 @@ -525,7 +525,7 @@ String Transformation Learning GiorgioSatta - John C.Henderson + John C.Henderson 10.3115/976909.979674 444–451 P97-1057 @@ -533,7 +533,7 @@ Approximating Context-Free Grammars with a Finite-State Calculus - Edmund GrimleyEvans + Edmund GrimleyEvans 10.3115/976909.979675 452–459 P97-1058 @@ -541,7 +541,7 @@ Finite State Transducers Approximating Hidden <fixed-case>M</fixed-case>arkov Models - AndreKempe + AndreKempe 10.3115/976909.979676 460–467 P97-1059 @@ -569,7 +569,7 @@ Learning Parse and Translation Decisions from Examples with Rich Context UlfHermjakob - Raymond J.Mooney + Raymond J.Mooney 10.3115/976909.979679 482–489 P97-1062 @@ -609,7 +609,7 @@ Choosing the Word Most Typical in Context Using a Lexical Co-occurrence Network - PhilipEdmonds + PhilipEdmonds 10.3115/976909.979684 507–509 P97-1067 @@ -617,7 +617,7 @@ Improving Translation through Contextual Information - MaiteTaboada + MaiteTaboada 10.3115/976909.979685 510–512 P97-1068 @@ -641,7 +641,7 @@ Contrastive accent in a data-to-speech system - MarietTheune + MarietTheune 10.3115/976909.979688 519–521 P97-1071 @@ -649,7 +649,7 @@ Towards resolution of bridging descriptions - RenataVieira + RenataVieira SimoneTeufel 10.3115/976909.979689 522–524 diff --git a/data/xml/P98.xml b/data/xml/P98.xml index a24bec262f..f174811ab8 100644 --- a/data/xml/P98.xml +++ b/data/xml/P98.xml @@ -16,7 +16,7 @@ A Quasi-Dependency Model for the Structural Analysis of <fixed-case>C</fixed-case>hinese <fixed-case>B</fixed-case>ase<fixed-case>NP</fixed-case>s ZhaoJun - HuangChangning + ChangningHuang 10.3115/980845.980847 1–7 P98-1001 @@ -32,9 +32,9 @@ Towards a Single Proposal in Spelling Correction - EnekoAgirre - KoldoGojenola - KepaSarasola + EnekoAgirre + KoldoGojenola + KepaSarasola AtroVoutilainen 10.3115/980845.980850 22–28 @@ -53,7 +53,7 @@ Parole et Prduction automatique: le module de reconnaissance <fixed-case>RAPHAEL</fixed-case> - MohammadAkbar + MohammadAkbar JeanCaelen 10.3115/980845.980852 36–40 @@ -62,8 +62,8 @@ Automatic Acquisition of Hierarchical Transduction Models for Machine Translation - HiyanAlshawi - SrinivasBangalore + HiyanAlshawi + SrinivasBangalore ShonaDouglas 10.3115/980845.980853 41–47 @@ -110,9 +110,9 @@ Evaluating a Focus-Based Approach to Anaphora Resolution - SalihaAzzam - KevinHumphreys - RobertGaizauskas + SalihaAzzam + KevinHumphreys + RobertGaizauskas 10.3115/980845.980858 74–78 P98-1011 @@ -129,9 +129,9 @@ The <fixed-case>B</fixed-case>erkeley <fixed-case>F</fixed-case>rame<fixed-case>N</fixed-case>et Project - Collin F.Baker - Charles J.Fillmore - John B.Lowe + Collin F.Baker + Charles J.Fillmore + John B.Lowe 10.3115/980845.980860 86–90 P98-1013 @@ -149,7 +149,7 @@ Semi-Automatic Recognition of Noun Modifier Relationships KenBarker - StanSzpakowicz + StanSzpakowicz 10.3115/980845.980862 96–102 P98-1015 @@ -157,7 +157,7 @@ <fixed-case>R</fixed-case>edundancy: Helping Semantic Disambiguation - CarolineBarriere + CarolineBarriere 10.3115/980845.980863 103–109 P98-1016 @@ -192,9 +192,9 @@ Trigger-Pair Predictors in Parsing and Tagging - EzraBlack + EzraBlack AndrewFinch - HidekiKashioka + HidekiKashioka 10.3115/980845.980867 131–137 P98-1020 @@ -211,7 +211,7 @@ A Probabilistic Corpus-Driven Model for Lexical-Functional Analysis RensBod - RonaldKaplan + RonaldKaplan 10.3115/980845.980869 145–151 P98-1022 @@ -221,7 +221,7 @@ Anchoring Floating Quantifiers in <fixed-case>J</fixed-case>apanese-to-<fixed-case>E</fixed-case>nglish Machine Translation FrancisBond DanielaKurz - SatoshiShirai + SatoshiShirai 10.3115/980845.980870 152–159 P98-1023 @@ -230,8 +230,8 @@ Managing Information at Linguistic Interfaces JohanBos - C.J.Rupp - BiankaBuschbeck-Wolf + C.J.Rupp + BiankaBuschbeck-Wolf MichaelDorna 10.3115/980845.980871 160–166 @@ -240,7 +240,7 @@ Deriving the Predicate-Argument Structure for a Free Word Order Language - CemBozsahin + CemBozsahin 10.3115/980845.980872 167–173 P98-1025 @@ -256,7 +256,7 @@ The Logical Structure of Binding - AntonioBranco + AntonioBranco 10.3115/980845.980874 181–185 P98-1027 @@ -265,9 +265,9 @@ Beyond N -Grams: Can Linguistic Sophistication Improve Language Modeling? EricBrill - RaduFlorian - John C.Henderson - LidiaMangu + RaduFlorian + John C.Henderson + LidiaMangu 10.3115/980845.980875 186–190 P98-1028 @@ -292,9 +292,9 @@ Named Entity Scoring for Speech Input - John D.Burger - DavidPalmer - LynetteHirschman + John D.Burger + DavidPalmer + LynetteHirschman 10.3115/980845.980878 201–205 P98-1031 @@ -306,8 +306,8 @@ KarenKukich SusanneWolff ChiLu - MartinChodorow - LisaBraden-Harder + MartinChodorow + LisaBraden-Harder Mary DeeHarris 10.3115/980845.980879 206–210 @@ -316,7 +316,7 @@ Building Parallel <fixed-case>LTAG</fixed-case> for <fixed-case>F</fixed-case>rench and <fixed-case>I</fixed-case>talian - Marie-HeleneCandito + Marie-HeleneCandito 10.3115/980845.980880 211–217 P98-1033 @@ -324,8 +324,8 @@ Error-Driven Pruning of Treebank Grammars for Base Noun Phrase Identification - ClaireCardie - DavidPierce + ClaireCardie + DavidPierce 10.3115/980845.980881 218–224 P98-1034 @@ -334,7 +334,7 @@ Exploiting Syntactic Structure for Language Modeling CiprianChelba - FrederickJelinek + FrederickJelinek 10.3115/980845.980882 225–231 P98-1035 @@ -353,8 +353,8 @@ A Concept-based Adaptive Approach to Word Sense Disambiguation - Jen NanChen - Jason S.Chang + Jen NanChen + Jason S.Chang 10.3115/980845.980884 237–243 P98-1037 @@ -362,7 +362,7 @@ <fixed-case>PAT</fixed-case>-Trees with the Deletion Function as the Learning Device for Linguistic Patterns - Keh-JiannChen + Keh-JiannChen WenTsuei Lee-FengChien 10.3115/980845.980885 @@ -373,12 +373,12 @@ Hybrid Approaches to Improvement of Translation Quality in Web-based <fixed-case>E</fixed-case>nglish-<fixed-case>K</fixed-case>orean Machine Translation Sung-KwonChoi - Han-MinJung + Han-MinJung Chul-MinSim TaewanKim Dong-InPark Jun-SikPark - Key-SunChoi + Key-SunChoi 10.3115/980845.980886 251–255 P98-1039 @@ -386,7 +386,7 @@ Dialogue Management in Vector-Based Call Routing - JenniferChu-Carroll + JenniferChu-Carroll BobCarpenter 10.3115/980845.980887 256–262 @@ -396,7 +396,7 @@ Machine Translation vs. Dictionary Term Translation - a Comparison for <fixed-case>E</fixed-case>nglish-<fixed-case>J</fixed-case>apanese News Article Alignment NigelCollier - HidekiHirakawa + HidekiHirakawa AkiraKumano 10.3115/980845.980888 263–267 @@ -407,7 +407,7 @@ An Experiment in Hybrid Dictionary and Statistical Sentence Alignment NigelCollier KenjiOno - HidekiHirakawa + HidekiHirakawa 10.3115/980845.980889 268–274 P98-1042 @@ -424,8 +424,8 @@ Veins Theory: A Model of Global Discourse Cohesion and Coherence DanCristea - NancyIde - LaurentRomary + NancyIde + LaurentRomary 10.3115/980845.980891 281–285 P98-1044 @@ -435,7 +435,7 @@ Automatic Semantic Tagging of Unknown Proper Names AlessandroCucchiarelli DaniloLuzi - PaolaVelardi + PaolaVelardi 10.3115/980845.980892 286–292 P98-1045 @@ -443,9 +443,9 @@ Investigating Regular Sense Extensions based on Intersective <fixed-case>L</fixed-case>evin Classes - Hoa TrangDang - KarinKipper - MarthaPalmer + Hoa TrangDang + KarinKipper + MarthaPalmer JosephRosenzweig 10.3115/980845.980893 293–299 @@ -484,11 +484,11 @@ <fixed-case>M</fixed-case>ultext-<fixed-case>E</fixed-case>ast: Parallel and Comparable Corpora and Lexicons for Six <fixed-case>C</fixed-case>entral and <fixed-case>E</fixed-case>astern <fixed-case>E</fixed-case>uropean Languages LudmilaDimitrova - TomazErjavec - NancyIde - Heiki JaanKaalep - VladimirPetkevic - DanTufis + TomazErjavec + NancyIde + Heiki JaanKaalep + VladimirPetkevic + DanTufis 10.3115/980845.980897 315–319 P98-1050 @@ -496,9 +496,9 @@ Error Driven Word Sense Disambiguation - LucaDini - VittorioDi Tomaso - FrederiqueSegond + LucaDini + VittorioDi Tomaso + FrederiqueSegond 10.3115/980845.980898 320–324 P98-1051 @@ -507,9 +507,9 @@ An Empirical Investigation of Proposals in Collaborative Dialogues BarbaraDi Eugenio - Pamela W.Jordan - Johanna D.Moore - Richmond H.Thomason + Pamela W.Jordan + Johanna D.Moore + Richmond H.Thomason 10.3115/980845.980899 325–329 P98-1052 @@ -533,8 +533,8 @@ A Text Input Front-end Processor as an Information Access Platform - ShinichiDoi - Shin-ichiroKamei + ShinichiDoi + Shin-ichiroKamei KiyoshiYamabana 10.3115/980845.980902 336–340 @@ -545,8 +545,8 @@ Syntactic and Semantic Transfer with <fixed-case>F</fixed-case>-Structures MichaelDorna AnetteFrank - Josefvan Genabith - Martin C.Emele + Josefvan Genabith + Martin C.Emele 10.3115/980845.980903 341–347 P98-1056 @@ -574,7 +574,7 @@ Spelling Correction using Context Mohammad AliElmi - MarthaEvens + MarthaEvens 10.3115/980845.980906 360–364 P98-1059 @@ -582,7 +582,7 @@ Ambiguity Preserving Machine Translation using Packed Representations - Martin C.Emele + Martin C.Emele MichaelDorna 10.3115/980845.980907 365–371 @@ -591,8 +591,8 @@ A Structure-sharing Parser for Lexicalized Grammars - RogerEvans - DavidWeir + RogerEvans + DavidWeir 10.3115/980845.980908 372–378 P98-1061 @@ -624,9 +624,9 @@ Anaphor Resolution In Unrestricted Texts With Partial Parsing - AntonioFerrandez - ManuelPalomar - LidiaMoreno + AntonioFerrandez + ManuelPalomar + LidiaMoreno 10.3115/980845.980911 385–391 P98-1064 @@ -652,7 +652,7 @@ Toward General-Purpose Learning for Information Extraction - DayneFreitag + DayneFreitag 10.3115/980845.980914 404–408 P98-1067 @@ -696,7 +696,7 @@ Semantic-Head Based Resolution of Scopal Ambiguities - BjornGamback + BjornGamback JohanBos 10.3115/980845.980919 433–437 @@ -707,7 +707,7 @@ Vers l’utilisation des méthodes formelles pour le développement de linguiciels BilelGargouri MohamedJmaiel - AbdelmajidBen Hamadou + AbdelmajidBen Hamadou 10.3115/980845.980920 438–443 P98-1073 @@ -715,7 +715,7 @@ Flow Network Models for Word Alignment and Terminology Extraction from Bilingual Corpora - EricGaussier + EricGaussier 10.3115/980845.980921 444–450 P98-1074 @@ -723,8 +723,8 @@ Growing Semantic Grammars - MarsalGavalda - AlexWaibel + MarsalGavalda + AlexWaibel 10.3115/980845.980922 451–456 P98-1075 @@ -740,7 +740,7 @@ Efficient Linear Logic Meaning Assembly - VineetGupta + VineetGupta JohnLamping 10.3115/980845.980924 464–470 @@ -767,8 +767,8 @@ Tagging Inflective Languages: Prediction of Morphological Categories for a Rich Structured Tagset - JanHajič - BarboraHladká + JanHajič + BarboraHladká 10.3115/980845.980927 483–490 P98-1080 @@ -776,9 +776,9 @@ Improving Data Driven Wordclass Tagging by System Combination - Hansvan Halteren + Hansvan Halteren JakubZavrel - WalterDaelemans + WalterDaelemans 10.3115/980845.980928 491–497 P98-1081 @@ -786,8 +786,8 @@ A Step towards the Detection of Semantic Variants of Terms in Technical Documents - ThierryHamon - AdelineNazarenko + ThierryHamon + AdelineNazarenko CecileGros 10.3115/980845.980929 498–504 @@ -797,7 +797,7 @@ Using Decision Trees to Construct a Practical Parser MasahikoHaruno - SatoshiShirai + SatoshiShirai YoshifumiOoyama 10.3115/980845.980930 505–511 @@ -807,7 +807,7 @@ Integrating Text Plans for Conciseness and Coherence TerrenceHarvey - SandraCarberry + SandraCarberry 10.3115/980845.980931 512–518 P98-1084 @@ -834,7 +834,7 @@ A Connectionist Architecture for Learning to Parse - JamesHenderson + JamesHenderson PeterLane 10.3115/980845.980934 531–537 @@ -859,8 +859,8 @@ Long Distance Pronominalisation and Global Focus - JanetHitzeman - MassimoPoesio + JanetHitzeman + MassimoPoesio 10.3115/980845.980937 550–556 P98-1090 @@ -876,7 +876,7 @@ Terminological Variation, a Means of Identifying Research Topics from Texts - FideliaIbekwe-SanJuan + FideliaIbekwe-SanJuan 10.3115/980845.980939 564–570 P98-1092 @@ -903,7 +903,7 @@ Exploring the Characteristics of Multi-party Dialogues - MasatoIshizaki + MasatoIshizaki TsuneakiKato 10.3115/980845.980942 583–589 @@ -912,8 +912,8 @@ Robust Interaction through Partial Interpretation and Dialogue Management - ArneJönsson - LenaStrömbäck + ArneJönsson + LenaStrömbäck 10.3115/980845.980943 590–594 P98-1096 @@ -929,9 +929,9 @@ Combining a <fixed-case>C</fixed-case>hinese Thesaurus with a <fixed-case>C</fixed-case>hinese Dictionary - DonghongJi - JunpingGong - ChangningHuang + DonghongJi + JunpingGong + ChangningHuang 10.3115/980845.980945 600–606 P98-1098 @@ -939,8 +939,8 @@ Combining Multiple, Large-Scale Resources in a Reusable Lexicon for Natural Language Generation - HongyanJing - KathleenMcKeown + HongyanJing + KathleenMcKeown 10.3115/980845.980946 607–613 P98-1099 @@ -948,8 +948,8 @@ Text Segmentation Using Reiteration and Collocation - Amanda C.Jobbins - Lindsay J.Evett + Amanda C.Jobbins + Lindsay J.Evett 10.3115/980845.980947 614–618 P98-1100 @@ -965,7 +965,7 @@ Unification-based Multimodal Parsing - MichaelJohnston + MichaelJohnston 10.3115/980845.980949 624–630 P98-1102 @@ -973,7 +973,7 @@ Context Management with Topics for Spoken Dialogue Systems - KristiinaJokinen + KristiinaJokinen HidekiTanaka AkioYokoo 10.3115/980845.980950 @@ -1001,7 +1001,7 @@ Pseudo-Projectivity, A Polynomially Parsable Non-Projective Dependency Grammar SylvainKahane AlexisNasr - OwenRambow + OwenRambow 10.3115/980845.980953 646–652 P98-1106 @@ -1010,7 +1010,7 @@ A Method for Correcting Errors in Speech Recognition using the Statistical Features of Character Co-occurrence SatoshiKaki - EiichiroSumita + EiichiroSumita HitoshiIida 10.3115/980845.980954 653–657 @@ -1019,11 +1019,11 @@ Use of Mutual Information Based Character Clusters in Dictionary-less Morphological Analysis of <fixed-case>J</fixed-case>apanese - HidekiKashioka + HidekiKashioka YasuhiroKawata YumikoKinjo AndrewFinch - Ezra W.Black + Ezra W.Black 10.3115/980845.980955 658–662 P98-1108 @@ -1031,7 +1031,7 @@ Know When to Hold '<fixed-case>E</fixed-case>m: Shuffling Deterministically in a Parser for Nonconcatenative Grammars - Robert T.Kasper + Robert T.Kasper MikeCalcagno Paul C.Davis 10.3115/980845.980956 @@ -1051,7 +1051,7 @@ Unlimited Vocabulary Grapheme to Phoneme Conversion for <fixed-case>K</fixed-case>orean <fixed-case>TTS</fixed-case> ByeongchangKim WonIlLee - GeunbaeLee + GeunbaeLee Jong-HyeokLee 10.3115/980845.980958 675–679 @@ -1060,7 +1060,7 @@ Role of Verbs in Document Analysis - Judith L.Klavans + Judith L.Klavans Min-YenKan 10.3115/980845.980959 680–686 @@ -1069,7 +1069,7 @@ A Flexible Example-Based Parser Based on the <fixed-case>SSTC</fixed-case> - Mosleh HmoudAl-Adhaileh + Mosleh HmoudAl-Adhaileh TangEnya Kong 10.3115/980845.980960 687–693 @@ -1091,8 +1091,8 @@ Compacting the <fixed-case>P</fixed-case>enn <fixed-case>T</fixed-case>reebank Grammar AlexanderKrotov MarkHepple - RobertGaizauskas - YorickWilks + RobertGaizauskas + YorickWilks 10.3115/980845.980962 699–703 P98-1115 @@ -1100,7 +1100,7 @@ Generation that Exploits Corpus-Based Statistical Knowledge - IreneLangkilde + IreneLangkilde KevinKnight 10.3115/980845.980963 704–710 @@ -1110,9 +1110,9 @@ Methods and Practical Issues in Evaluating Alignment Techniques - PhilippeLanglais + PhilippeLanglais MichelSimard - JeanVeronis + JeanVeronis 10.3115/980845.980964 711–717 P98-1117 @@ -1120,8 +1120,8 @@ A Framework for Customizable Generation of Hypertext Presentations - BenoitLavoie - OwenRambow + BenoitLavoie + OwenRambow 10.3115/980845.980965 718–722 P98-1118 @@ -1130,7 +1130,7 @@ Automatic Acquisition of Language Model based on Head-Dependent Relation between Words SeungmiLee - Key-SunChoi + Key-SunChoi 10.3115/980845.980966 723–727 P98-1119 @@ -1154,7 +1154,7 @@ Characterizing and Recognizing Spoken Corrections in Human-Computer Dialogue - Gina-AnneLevow + Gina-AnneLevow 10.3115/980845.980969 736–742 P98-1122 @@ -1195,9 +1195,9 @@ Identifying Syntactic Role of Antecedent in <fixed-case>K</fixed-case>orean Relative Clause using Corpus and Thesaurus Informationes - Hui-FengLi + Hui-FengLi Jong-HyeokLee - GeunbaeLee + GeunbaeLee 10.3115/980691.980694 756–762 P98-2125 @@ -1206,9 +1206,9 @@ A Test Environment for Natural Language Understanding Systems LiLi - Deborah A.Dahl - Lewis M.Norton - Marcia C.Linebarger + Deborah A.Dahl + Lewis M.Norton + Marcia C.Linebarger DongdongChen 10.3115/980691.980695 763–767 @@ -1234,9 +1234,9 @@ Evaluating Response Strategies in a Web-Based Spoken Dialogue Agent - Diane J.Litman + Diane J.Litman ShimeiPan - Marilyn A.Walker + Marilyn A.Walker 10.3115/980691.980698 780–786 P98-2129 @@ -1245,7 +1245,7 @@ Formal Aspects and Parsing Issues of Dependency Theory VincenzoLombardo - LeonardoLesmo + LeonardoLesmo 10.3115/980691.980699 787–793 P98-2130 @@ -1253,11 +1253,11 @@ A Multi-Neuro Tagger Using Variable Lengths of Contexts - SusannLuperFoy + SusannLuperFoy DanLoehr DavidDuff - KeithMiller - FlorenceReeder + KeithMiller + FlorenceReeder LisaHarper QingMa HitoshiIsahara @@ -1271,7 +1271,7 @@ TakakiMakino MinoruYoshida KentaroTorisawa - Jun’ichiTsujii + Jun’ichiTsujii 10.3115/980691.980702 807–811 P98-2132 @@ -1279,9 +1279,9 @@ Bitext Correspondences through Rich Mark-up - RaquelMartinez + RaquelMartinez JosebaAbaitua - ArantzaCasillas + ArantzaCasillas 10.3115/980691.980703 812–818 P98-2134 @@ -1289,7 +1289,7 @@ Discourse Cues for Broadcast News Segmentation - Mark T.Maybury + Mark T.Maybury 10.3115/980691.980704 819–822 P98-2135 @@ -1297,9 +1297,9 @@ Confirmation in Multimodal Systems - David R.McGee - Phil R.Cohen - SharonOviatt + David R.McGee + Phil R.Cohen + SharonOviatt 10.3115/980691.980705 823–829 P98-2136 @@ -1326,11 +1326,11 @@ Deriving Transfer Rules from Dominance-Preserving Alignments - AdamMeyers + AdamMeyers RomanYangarber - RalphGrishman - CatherineMacleod - AntonioMoreno-Sandoval + RalphGrishman + CatherineMacleod + AntonioMoreno-Sandoval 10.3115/980691.980708 843–847 P98-2139 @@ -1365,7 +1365,7 @@ Robust Pronoun Resolution with Limited Knowledge - RuslanMitkov + RuslanMitkov 10.3115/980691.980712 869–875 P98-2143 @@ -1375,7 +1375,7 @@ <fixed-case>HPSG</fixed-case>-Style Underspecified <fixed-case>J</fixed-case>apanese Grammar with Wide Coverage YutakaMitsuishi KentaroTorisawa - Jun’ichiTsujii + Jun’ichiTsujii 10.3115/980691.980713 876–880 P98-2144 @@ -1385,7 +1385,7 @@ Text Segmentation with Multiple Surface Linguistic Cues HajimeMochizuki TakeoHonda - ManabuOkumura + ManabuOkumura 10.3115/980691.980714 881–885 P98-2145 @@ -1402,7 +1402,7 @@ Dynamic Compilation of Weighted Context-Free Grammars MehryarMohri - Fernando C.N.Pereira + Fernando C.N.Pereira 10.3115/980691.980716 891–897 P98-2147 @@ -1411,7 +1411,7 @@ A Stochastic Language Model using Dependency and its Improvement by Word Clustering ShinsukeMori - MakotoNagao + MakotoNagao 10.3115/980691.980717 898–904 P98-2148 @@ -1429,7 +1429,7 @@ An Estimate of Referent of Noun Phrases in <fixed-case>J</fixed-case>apanese Sentences MasakiMurata - MakotoNagao + MakotoNagao 10.3115/980691.980719 912–916 P98-2150 @@ -1438,7 +1438,7 @@ Automatic Text Summarization Based on the Global Document Annotation KatashiNagao - KoitiHasida + KoitiHasida 10.3115/980691.980720 917–921 P98-2151 @@ -1474,7 +1474,7 @@ Constituent-based Accent Prediction - Christine H.Nakatani + Christine H.Nakatani 10.3115/980691.980724 939–945 P98-2155 @@ -1513,7 +1513,7 @@ An Efficient Parallel Substrate for Typed Feature Structures on Shared Memory Parallel Machines TakashiNinomiya KentaroTorisawa - Jun’ichiTsujii + Jun’ichiTsujii 10.3115/980691.980728 968–974 P98-2159 @@ -1521,7 +1521,7 @@ Universal Grammar and Lexis for Quick Ramp-Up of <fixed-case>MT</fixed-case> Systems - SergeiNirenburg + SergeiNirenburg VictorRaskin 10.3115/980691.980729 975–979 @@ -1530,8 +1530,8 @@ Integration of Large-Scale Linguistic Resources in a Natural Language Understanding System - Lewis M.Norton - Deborah A.Dahl + Lewis M.Norton + Deborah A.Dahl LiLi Katharine P.Beals 10.3115/980691.980730 @@ -1541,7 +1541,7 @@ Improving Statistical Natural Language Translation with Categories and Rules - Franz JosefOch + Franz JosefOch HansWeber 10.3115/980691.980731 985–989 @@ -1551,7 +1551,7 @@ Recognition of the Coherence Relation between Te-linked Clauses AkiraOishi - YujiMatsumoto + YujiMatsumoto 10.3115/980691.980732 990–996 P98-2163 @@ -1559,8 +1559,8 @@ On the Evaluation and Comparison of Taggers: the Effect of Noise in Testing Corpora - LluisPadro - LluisMarquez + LluisPadro + LluisMarquez 10.3115/980691.980733 997–1002 P98-2164 @@ -1569,7 +1569,7 @@ Learning Intonation Rules for Concept to Speech Generation ShimeiPan - KathleenMcKeown + KathleenMcKeown 10.3115/980691.980734 1003–1009 P98-2165 @@ -1577,8 +1577,8 @@ Possessive Pronominal Anaphor Resolution in <fixed-case>P</fixed-case>ortuguese Written Texts - IvandreParaboni - Vera Lucia Strubede Lima + IvandreParaboni + Vera Lucia Strubede Lima 10.3115/980691.980735 1010–1014 P98-2166 @@ -1589,7 +1589,7 @@ JunsikPark Jung-GooKang WookHur - Key-SunChoi + Key-SunChoi 10.3115/980691.980736 1015–1019 P98-2167 @@ -1626,7 +1626,7 @@ HannesPirker GeorgNiklfeld JohannesMatiasek - HaraldTrost + HaraldTrost 10.3115/980691.980740 1041–1045 P98-2171 @@ -1634,7 +1634,7 @@ Reference Resolution beyond Coreference: a Conceptual Frame and its Application - AndreiPopescu-Belis + AndreiPopescu-Belis IsabelleRobba GerardSabah 10.3115/980691.980741 @@ -1645,7 +1645,7 @@ Multilingual Authoring using Feedback Texts RichardPower - DoniaScott + DoniaScott 10.3115/980691.980742 1053–1059 P98-2173 @@ -1654,7 +1654,7 @@ Practical Glossing by Prioritised Tiling VictorPoznanski - PeteWhitelock + PeteWhitelock JanIJdens SteffanCorley 10.3115/980691.980743 @@ -1664,7 +1664,7 @@ An Intelligent Multi-Dictionary Environment - GaborPrbszeky + GaborPrbszeky 10.3115/980691.980744 1067–1071 P98-2175 @@ -1672,7 +1672,7 @@ Learning Correlations between Linguistic Indicators and Semantic Constraints: Reuse of Context-Dependent Descriptions of Entities - Dragomir R.Radev + Dragomir R.Radev 10.3115/980691.980745 1072–1078 P98-2176 @@ -1680,7 +1680,7 @@ Statistical Models for Unsupervised Prepositional Phrase Attachment - AdwaitRatnaparkhi + AdwaitRatnaparkhi 10.3115/980691.980746 1079–1085 P98-2177 @@ -1697,8 +1697,8 @@ Generating the Structure of Argument - ChrisReed - DerekLong + ChrisReed + DerekLong 10.3115/980691.980748 1091–1097 P98-2179 @@ -1707,7 +1707,7 @@ <fixed-case>M</fixed-case>ind<fixed-case>N</fixed-case>et: Acquiring and Structuring Semantic Information from Text Stephen D.Richardson - William B.Dolan + William B.Dolan LucyVanderwende 10.3115/980691.980749 1098–1102 @@ -1716,9 +1716,9 @@ Building Accurate Semantic Taxonomies from Monolingual <fixed-case>MRD</fixed-case>s - GermanRigau - HoracioRodriguez - EnekoAgirre + GermanRigau + HoracioRodriguez + EnekoAgirre 10.3115/980691.980750 1103–1109 P98-2181 @@ -1744,7 +1744,7 @@ How Verb Subcategorization Frequencies are Affected by Corpus Choice DouglasRoland - DanielJurafsky + DanielJurafsky 10.3115/980691.980753 1122–1128 P98-2184 @@ -1752,8 +1752,8 @@ An Interactive Domain Independent Approach to Robust Dialogue Interpretation - Carolyn PensteinRose - Lori S.Levin + Carolyn PensteinRose + Lori S.Levin 10.3115/980691.980754 1129–1135 P98-2185 @@ -1770,7 +1770,7 @@ A <fixed-case>G</fixed-case>enerative <fixed-case>L</fixed-case>exicon Perspective for Adjectival Modification - PatrickSaint-Dizier + PatrickSaint-Dizier 10.3115/980691.980756 1143–1149 P98-2187 @@ -1778,9 +1778,9 @@ Dialogue Act Tagging with Transformation-Based Learning - KenSamuel - SandraCarberry - K.Vijay-Shanker + KenSamuel + SandraCarberry + K.Vijay-Shanker 10.3115/980691.980757 1150–1156 P98-2188 @@ -1854,7 +1854,7 @@ Recognizing Syntactic Errors in the Writing of Second Language Learners DavidSchneider - Kathleen F.McCoy + Kathleen F.McCoy 10.3115/980691.980765 1198–1204 P98-2196 @@ -1863,8 +1863,8 @@ Transforming Lattices into Non-deterministic Automata with Optional Null Arcs MarkSeligman - ChristianBoitet - BoubakerMeddeb-Hamrouni + ChristianBoitet + BoubakerMeddeb-Hamrouni 10.3115/980691.980766 1205–1211 P98-2197 @@ -1888,7 +1888,7 @@ Similarity Metrics for Aligning Children’s Articulation Data - Harold L.Somers + Harold L.Somers 10.3115/980691.980769 1227–1232 P98-2200 @@ -1897,7 +1897,7 @@ A Connectionist Approach to Prepositional Phrase Attachment for Real World Texts Josep M.Sopena - AgustiLLoberas + AgustiLLoberas Joan L.Moliner 10.3115/980691.980770 1233–1237 @@ -1932,9 +1932,9 @@ Summarization-based Query Expansion in Information Retrieval - TomekStrzalkowski + TomekStrzalkowski JinWang - BowdenWise + BowdenWise 10.3115/980691.980774 1258–1264 P98-2205 @@ -1944,7 +1944,7 @@ <fixed-case>C</fixed-case>hinese Word Segmentation without Using Lexicon and Hand-crafted Training Data MaosongSun DayangShen - Benjamin K.Tsou + Benjamin K.Tsou 10.3115/980691.980775 1265–1271 P98-2206 @@ -1971,8 +1971,8 @@ Reactive Content Selection in the Generation of Real-time Soccer Commentary - KumikoTanaka-Ishii - KoitiHasida + KumikoTanaka-Ishii + KoitiHasida ItsukiNoda 10.3115/980691.980778 1282–1288 @@ -1983,7 +1983,7 @@ Idiomatic Object Usage and Support Verbs PasiTapanainen JussiPiitulainen - TimoJarvinen + TimoJarvinen 10.3115/980691.980779 1289–1293 P98-2210 @@ -2019,7 +2019,7 @@ General-to-Specific Model Selection for Subcategorization Preference TakehitoUtsuro TakashiMiyata - YujiMatsumoto + YujiMatsumoto 10.3115/980691.980783 1314–1320 P98-2214 @@ -2037,7 +2037,7 @@ The Computational Lexical Semantics of Syntagmatic Expressions EvelyneViegas StephenBeale - SergeiNirenburg + SergeiNirenburg 10.3115/980691.980785 1328–1332 P98-2216 @@ -2045,8 +2045,8 @@ A tabular interpretation of a class of 2-Stack Automata - EricVillemonte de la Clergerie - MiguelAlonso Pardo + EricVillemonte de la Clergerie + MiguelAlonso Pardo 10.3115/980691.980786 1333–1339 P98-2217 @@ -2054,7 +2054,7 @@ Project for production of closed-caption <fixed-case>TV</fixed-case> programs for the hearing impaired - TakahiroWakao + TakahiroWakao 10.3115/980691.980787 1340–1344 P98-2218 @@ -2062,7 +2062,7 @@ Learning Optimal Dialogue Strategies: A Case Study of a Spoken Dialogue Agent for Email - Marilyn A.Walker + Marilyn A.Walker 10.3115/980691.980788 1345–1351 P98-2219 @@ -2071,7 +2071,7 @@ Automatic <fixed-case>E</fixed-case>nglish-<fixed-case>C</fixed-case>hinese name transliteration for development of multilingual resources StephenWan - Cornelia MariaVerspoor + Cornelia MariaVerspoor 10.3115/980691.980789 1352–1356 P98-2220 @@ -2080,7 +2080,7 @@ Modeling with Structures in Statistical Machine translation Ye-YiWang - AlexWaibel + AlexWaibel 10.3115/980691.980790 1357–1363 P98-2221 @@ -2106,7 +2106,7 @@ Diagram Understanding Using Integration of Layout Information and Textual Information YasuhikoWatanabe - MakotoNagao + MakotoNagao 10.3115/980691.980793 1374–1380 P98-2224 @@ -2117,7 +2117,7 @@ YasuhikoWatanabe YoshihiroOkada KengoKaneji - MakotoNagao + MakotoNagao 10.3115/980691.980794 1381–1387 P98-2225 @@ -2125,7 +2125,7 @@ Translating Idioms - EricWehrli + EricWehrli 10.3115/980691.980795 1388–1392 P98-2226 @@ -2133,7 +2133,7 @@ Head-Driven Generation with <fixed-case>HPSG</fixed-case> - GrahamWilcock + GrahamWilcock 10.3115/980691.980796 1393–1397 P98-2227 @@ -2141,7 +2141,7 @@ Word Sense Disambiguation using Optimised Combinations of Knowledge Sources - YorickWilks + YorickWilks MarkStevenson 10.3115/980691.980797 1398–1402 @@ -2150,7 +2150,7 @@ A Model for Robust Processing of Spontaneous Speech by Integrating Viable Fragments - Karsten L.Worm + Karsten L.Worm 10.3115/980691.980798 1403–1407 P98-2229 @@ -2190,7 +2190,7 @@ Feasibility Study for Ellipsis Resolution in Dialogues by Machine-Learning Technique KazuhideYamamoto - EiichiroSumita + EiichiroSumita 10.3115/980691.980802 1428–1435 P98-2233 @@ -2198,8 +2198,8 @@ Some Properties of Preposition and Subordinate Conjunction Attachments - Alexander S.Yeh - Marc B.Vilain + Alexander S.Yeh + Marc B.Vilain 10.3115/980691.980803 1436–1442 P98-2234 @@ -2207,7 +2207,7 @@ Evaluation of Importance of Sentences based on Connectivity to Title - TakehikoYoshimi + TakehikoYoshimi ToshiyukiOkunishi TakahiroYamaji YojiFukumochi @@ -2227,7 +2227,7 @@ Using Chunk Based Partial Parsing of Spontaneous Speech in Unrestricted Domains for Reducing Word Error Rate in Speech Recognition KlausZechner - AlexWaibel + AlexWaibel 10.3115/980691.980806 1453–1459 P98-2237 @@ -2243,8 +2243,8 @@ Word Association and <fixed-case>MI-T</fixed-case>rigger-based Language Modeling - GuoDongZhou - KimTengLua + GuoDongZhou + KimTengLua 10.3115/980691.980808 1465–1471 P98-2239 @@ -2252,7 +2252,7 @@ Discovering Phonotactic Finite-State Automata by Genetic Search - AnjaBelz + AnjaBelz 10.3115/980691.980810 1472–1474 P98-2240 @@ -2260,8 +2260,8 @@ A Preliminary Model of Centering in Dialog - DonnaByron - AmandaStent + DonnaByron + AmandaStent 10.3115/980691.980811 1475–1477 P98-2241 @@ -2293,7 +2293,7 @@ Bridging the Gap between Dictionary and Thesaurus - Oi YeeKwong + Oi YeeKwong 10.3115/980691.980815 1487–1489 P98-2245 @@ -2309,7 +2309,7 @@ Detecting Verbal Participation in Diathesis Alternations - DianaMcCarthy + DianaMcCarthy AnnaKorhonen 10.3115/980691.980817 1493–1495 diff --git a/data/xml/P99.xml b/data/xml/P99.xml index 44afe77ac1..b54045ca07 100644 --- a/data/xml/P99.xml +++ b/data/xml/P99.xml @@ -15,7 +15,7 @@ Untangling Text Data Mining - Marti A.Hearst + Marti A.Hearst 10.3115/1034678.1034679 3–10 P99-1001 @@ -23,7 +23,7 @@ Automatic Speech Recognition and Its Application to Information Extraction - SadaokiFurui + SadaokiFurui 10.3115/1034678.1034680 11–20 P99-1002 @@ -55,10 +55,10 @@ Discourse Relations: A Structural and Presuppositional Account Using Lexicalised <fixed-case>TAG</fixed-case> - BonnieWebber + BonnieWebber AlistairKnott MatthewStone - AravindJoshi + AravindJoshi 10.3115/1034678.1034695 41–48 P99-1006 @@ -152,7 +152,7 @@ Using aggregation for selecting content when generating referring expressions - John A.Bateman + John A.Bateman 10.3115/1034678.1034706 127–134 P99-1017 @@ -169,7 +169,7 @@ Bilingual <fixed-case>H</fixed-case>ebrew-<fixed-case>E</fixed-case>nglish Generation of Possessives and Partitives: Raising the Input Abstraction Level - Yael DahanNetzer + Yael DahanNetzer MichaelElhadad 10.3115/1034678.1034708 144–151 @@ -178,8 +178,8 @@ A Method for Word Sense Disambiguation of Unrestricted Text - RadaMihalcea - Dan I.Moldovan + RadaMihalcea + Dan I.Moldovan 10.3115/1034678.1034709 152–158 P99-1020 @@ -195,7 +195,7 @@ Dynamic Nonlocal Language Modeling via Hierarchical Topic-Based Adaptation - RaduFlorian + RaduFlorian DavidYarowsky 10.3115/1034678.1034711 167–174 @@ -205,7 +205,7 @@ A Second-Order Hidden <fixed-case>M</fixed-case>arkov Model for Part-of-Speech Tagging Scott M.Thede - Mary P.Harper + Mary P.Harper 10.3115/1034678.1034712 175–182 P99-1023 @@ -213,11 +213,11 @@ The <fixed-case>C</fixed-case>ommand<fixed-case>T</fixed-case>alk Spoken Dialogue System - AmandaStent - JohnDowding - Jean MarkGawron + AmandaStent + JohnDowding + Jean MarkGawron Elizabeth OwenBratt - RobertMoore + RobertMoore 10.3115/1034678.1034713 183–190 P99-1024 @@ -226,7 +226,7 @@ Construct Algebra: Analytical Dialog Management AliciaAbella - Allen L.Gorin + Allen L.Gorin 10.3115/1034678.1034714 191–199 P99-1025 @@ -246,7 +246,7 @@ Should we Translate the Documents or the Queries in Cross-language Information Retrieval? - J. ScottMcCarley + J. ScottMcCarley 10.3115/1034678.1034716 208–214 P99-1027 @@ -264,9 +264,9 @@ Using Mutual Information to Resolve Query Translation Ambiguities and Query Term Weighting - Myung-GilJang - Sung HyonMyaeng - Se YoungPark + Myung-GilJang + Sung HyonMyaeng + Se YoungPark 10.3115/1034678.1034718 223–229 P99-1029 @@ -276,7 +276,7 @@ Analysis System of Speech Acts and Discourse Structures Using Maximum Entropy Model Won SeugChoi Jeong-MiCho - JungyunSeo + JungyunSeo 10.3115/1034678.1034719 230–237 P99-1030 @@ -284,7 +284,7 @@ Measuring Conformity to Discourse Routines in Decision-Making Interactions - Sherri L.Condon + Sherri L.Condon Claude G.Cech William R.Edwards 10.3115/1034678.1034720 @@ -294,9 +294,9 @@ Development and Use of a Gold-Standard Data Set for Subjectivity Classifications - Janyce M.Wiebe - Rebecca F.Bruce - Thomas P.O’Hara + Janyce M.Wiebe + Rebecca F.Bruce + Thomas P.O’Hara 10.3115/1034678.1034721 246–253 P99-1032 @@ -312,8 +312,8 @@ A Unification-based Approach to Morpho-syntactic Parsing of Agglutinative and Other (Highly) Inflectional Languages - GaborProszeky - BalazsKis + GaborProszeky + BalazsKis 10.3115/1034678.1034723 261–268 P99-1034 @@ -341,8 +341,8 @@ Memory-Based Morphological Analysis - Antalvan den Bosch - WalterDaelemans + Antalvan den Bosch + WalterDaelemans 10.3115/1034678.1034726 285–292 P99-1037 @@ -359,7 +359,7 @@ Alternating Quantifier Scope in <fixed-case>CCG</fixed-case> - MarkSteedman + MarkSteedman 10.3115/1034678.1034728 301–308 P99-1039 @@ -367,9 +367,9 @@ Automatic Detection of Poor Speech Recognition at the Dialogue Level - Diane J.Litman - Marilyn A.Walker - Michael S.Kearns + Diane J.Litman + Marilyn A.Walker + Michael S.Kearns 10.3115/1034678.1034729 309–316 P99-1040 @@ -385,10 +385,10 @@ Deep Read: A Reading Comprehension System - LynetteHirschman + LynetteHirschman MarcLight - EricBreck - John D.Burger + EricBreck + John D.Burger 10.3115/1034678.1034731 325–332 P99-1042 @@ -398,7 +398,7 @@ Mixed Language Query Disambiguation PascaleFung LiuXiaohu - CheungChi Shun + Chi ShunCheung 10.3115/1034678.1034732 333–340 P99-1043 @@ -414,8 +414,8 @@ Less is more: Eliminating index terms from subordinate clauses - Simon H.Corston-Oliver - William B.Dolan + Simon H.Corston-Oliver + William B.Dolan 10.3115/1034678.1034734 349–356 P99-1045 @@ -439,8 +439,8 @@ Corpus-Based Identification of Non-Anaphoric Noun Phrases - David L.Bean - EllenRiloff + David L.Bean + EllenRiloff 10.3115/1034678.1034737 373–380 P99-1048 @@ -476,7 +476,7 @@ W.Kasper B.Kiefer H.-U.Krieger - C. J.Rupp + C. J.Rupp K. L.Worm 10.3115/1034678.1034741 405–412 @@ -485,8 +485,8 @@ A Syntactic Framework for Speech Repairs and Other Disruptions - Mark G.Core - Lenhart K.Schubert + Mark G.Core + Lenhart K.Schubert 10.3115/1034678.1034742 413–420 P99-1053 @@ -503,7 +503,7 @@ A Selectionist Theory of Language Acquisition - Charles D.Yang + Charles D.Yang 10.3115/1034678.1034744 429–435 P99-1055 @@ -521,7 +521,7 @@ Learning to Recognize Tables in Free Text Hwee TouNg - Chung YongLim + Chung YongLim Jessica Li TengKoo 10.3115/1034678.1034746 443–450 @@ -539,7 +539,7 @@ Efficient Parsing for Bilexical Context-Free Grammars and Head Automaton Grammars - JasonEisner + JasonEisner GiorgioSatta 10.3115/1034678.1034748 457–464 @@ -556,10 +556,10 @@ A Bag of Useful Techniques for Efficient and Robust Parsing - BerndKiefer - Hans-UlrichKrieger - JohnCarroll - RobMalouf + BerndKiefer + Hans-UlrichKrieger + JohnCarroll + RobMalouf 10.3115/1034678.1034750 473–480 P99-1061 @@ -593,10 +593,10 @@ A Statistical Parser for <fixed-case>C</fixed-case>zech - MichaelCollins - JanHajic - LanceRamshaw - ChristophTillmann + MichaelCollins + JanHajic + LanceRamshaw + ChristophTillmann 10.3115/1034678.1034754 505–512 P99-1065 @@ -641,7 +641,7 @@ Relating Probabilistic Grammars and Automata - StevenAbney + StevenAbney DavidMcAllester FernandoPereira 10.3115/1034678.1034759 @@ -652,7 +652,7 @@ Information Fusion in the Context of Multi-Document Summarization ReginaBarzilay - Kathleen R.McKeown + Kathleen R.McKeown MichaelElhadad 10.3115/1034678.1034760 550–557 @@ -719,7 +719,7 @@ Analysis of Syntax-Based Pronoun Resolution Methods - Joel R.Tetreault + Joel R.Tetreault 10.3115/1034678.1034688 602–605 P99-1079 @@ -751,7 +751,7 @@ Modeling Filled Pauses in Medical Dictations - Sergey V.Pakhomov + Sergey V.Pakhomov 10.3115/1034678.1034692 619–624 P99-1083 diff --git a/data/xml/Q13.xml b/data/xml/Q13.xml index 1239c280e3..d3ba13b428 100644 --- a/data/xml/Q13.xml +++ b/data/xml/Q13.xml @@ -4,7 +4,7 @@ Transactions of the Association for Computational Linguistics, Volume 1 LinDekang - CollinsMichael + MichaelCollins MIT Press
Cambridge, MA
2013 @@ -31,7 +31,7 @@ Finding Optimal 1-Endpoint-Crossing Trees EmilyPitler SampathKannan - MitchellMarcus + MitchellMarcus 10.1162/tacl_a_00206 Dependency parsing algorithms capable of producing the types of crossing dependencies seen in natural language sentences have traditionally been orders of magnitude slower than algorithms for projective trees. For 95.8–99.8% of dependency parses in various natural language treebanks, whenever an edge is crossed, the edges that cross it all have a common vertex. The optimal dependency tree that satisfies this 1-Endpoint-Crossing property can be found with an O(n4) parsing algorithm that recursively combines forests over intervals with one exterior point. 1-Endpoint-Crossing trees also have natural connections to linguistics and another class of graphs that has been studied in NLP. 13–24 @@ -66,7 +66,7 @@ Weakly Supervised Learning of Semantic Parsers for Mapping Instructions to Actions YoavArtzi - LukeZettlemoyer + LukeZettlemoyer 10.1162/tacl_a_00209 The context in which language is used provides a strong signal for learning to recover its meaning. In this paper, we show it can be used within a grounded CCG semantic parsing approach that learns a joint model of meaning and context for interpreting and executing natural language instructions, using various types of weak supervision. The joint nature provides crucial benefits by allowing situated cues, such as the set of visible objects, to directly influence learning. It also enables algorithms that learn while executing instructions, for example by trying to replicate human actions. Experiments on a benchmark navigational dataset demonstrate strong performance under differing forms of supervision, including correctly executing 60% more instruction sets relative to the previous state of the art. 49–62 @@ -77,8 +77,8 @@ Unsupervised Dependency Parsing with Acoustic Cues - John KPate - SharonGoldwater + John KPate + SharonGoldwater 10.1162/tacl_a_00210 Unsupervised parsing is a difficult task that infants readily perform. Progress has been made on this task using text-based models, but few computational approaches have considered how infants might benefit from acoustic cues. This paper explores the hypothesis that word duration can help with learning syntax. We describe how duration information can be incorporated into an unsupervised Bayesian dependency parser whose only other source of information is the words themselves (without punctuation or parts of speech). Our results, evaluated on both adult-directed and child-directed utterances, show that using word duration can improve parse quality relative to words-only baselines. These results support the idea that acoustic cues provide useful evidence about syntactic structure for language-learning infants, and motivate the use of word duration cues in NLP tasks with speech. 63–74 @@ -142,7 +142,7 @@ Efficient Stacked Dependency Parsing by Forest Reranking KatsuhikoHayashi ShuheiKondo - YujiMatsumoto + YujiMatsumoto 10.1162/tacl_a_00216 This paper proposes a discriminative forest reranking algorithm for dependency parsing that can be seen as a form of efficient stacked parsing. A dynamic programming shift-reduce parser produces a packed derivation forest which is then scored by a discriminative reranker, using the 1-best tree output by the shift-reduce parser as guide features in addition to third-order graph-based features. To improve efficiency and accuracy, this paper also proposes a novel shift-reduce parser that eliminates the spurious ambiguity of arc-standard transition systems. Testing on the English Penn Treebank data, forest reranking gave a state-of-the-art unlabeled dependency accuracy of 93.12. 139–150 @@ -171,7 +171,7 @@ LeahHanson BeenishJamil MatthiasLee - Ya-TingLin + Ya-TingLin HenryPao FatimaRivera LeiliShahriyari @@ -191,7 +191,7 @@ Combined Distributional and Logical Semantics MikeLewis - MarkSteedman + MarkSteedman 10.1162/tacl_a_00219 We introduce a new approach to semantics which combines the benefits of distributional and formal logical semantics. Distributional models have been successful in modelling the meanings of content words, but logical semantics is necessary to adequately represent many function words. We follow formal semantics in mapping language to logical representations, but differ in that the relational constants used are induced by offline distributional clustering at the level of predicate-argument structure. Our clustering algorithm is highly scalable, allowing us to run on corpora the size of Gigaword. Different senses of a word are disambiguated based on their induced types. We outperform a variety of existing approaches on a wide-coverage question answering task, and demonstrate the ability to make complex multi-sentence inferences involving quantifiers on the FraCaS suite. 179–192 @@ -211,7 +211,7 @@ Dual Coordinate Descent Algorithms for Efficient Large Margin Structured Prediction Ming-WeiChang - Wen-tauYih + Wen-tauYih 10.1162/tacl_a_00221 Due to the nature of complex NLP problems, structured prediction algorithms have been important modeling tools for a wide range of tasks. While there exists evidence showing that linear Structural Support Vector Machine (SSVM) algorithm performs better than structured Perceptron, the SSVM algorithm is still less frequently chosen in the NLP community because of its relatively slow training speed. In this paper, we propose a fast and easy-to-implement dual coordinate descent algorithm for SSVMs. Unlike algorithms such as Perceptron and stochastic gradient descent, our method keeps track of dual variables and updates the weight vector more aggressively. As a result, this training process is as efficient as existing online learning methods, and yet derives consistently better models, as evaluated on four benchmark NLP datasets for part-of-speech tagging, named-entity recognition and dependency parsing. 207–218 @@ -222,7 +222,7 @@ Joint Arc-factored Parsing of Syntactic and Semantic Dependencies XavierLluís XavierCarreras - LluísMàrquez + LluísMàrquez 10.1162/tacl_a_00222 In this paper we introduce a joint arc-factored model for syntactic and semantic dependency parsing. The semantic role labeler predicts the full syntactic paths that connect predicates with their arguments. This process is framed as a linear assignment task, which allows to control some well-formedness constraints. For the syntactic part, we define a standard arc-factored dependency model that predicts the full syntactic tree. Finally, we employ dual decomposition techniques to produce consistent syntactic and predicate-argument structures while searching over a large space of syntactic configurations. In experiments on the CoNLL-2009 English benchmark we observe very competitive results. 219–230 @@ -245,7 +245,7 @@ FeifeiZhai JiajunZhang YuZhou - ChengqingZong + ChengqingZong 10.1162/tacl_a_00224 In current research, most tree-based translation models are built directly from parse trees. In this study, we go in another direction and build a translation model with an unsupervised tree structure derived from a novel non-parametric Bayesian model. In the model, we utilize synchronous tree substitution grammars (STSG) to capture the bilingual mapping between language pairs. To train the model efficiently, we develop a Gibbs sampler with three novel Gibbs operators. The sampler is capable of exploring the infinite space of tree structures by performing local changes on the tree nodes. Experimental results show that the string-to-tree translation system using our Bayesian tree structures significantly outperforms the strong baseline string-to-tree system using parse trees. 243–254 @@ -255,7 +255,7 @@ Minimally-Supervised Morphological Segmentation using <fixed-case>A</fixed-case>daptor <fixed-case>G</fixed-case>rammars KairitSirts - SharonGoldwater + SharonGoldwater 10.1162/tacl_a_00225 This paper explores the use of Adaptor Grammars, a nonparametric Bayesian modelling framework, for minimally supervised morphological segmentation. We compare three training methods: unsupervised training, semi-supervised training, and a novel model selection method. In the model selection method, we train unsupervised Adaptor Grammars using an over-articulated metagrammar, then use a small labelled data set to select which potential morph boundaries identified by the metagrammar should be returned in the final output. We evaluate on five languages and show that semi-supervised training provides a boost over unsupervised training, while the model selection method yields the best average results over all languages and is competitive with state-of-the-art semi-supervised systems. Moreover, this method provides the potential to tune performance according to different evaluation metrics or downstream tasks. 255–266 @@ -285,7 +285,7 @@ Large-scale Word Alignment Using Soft Dependency Cohesion Constraints ZhiguoWang - ChengqingZong + ChengqingZong 10.1162/tacl_a_00228 Dependency cohesion refers to the observation that phrases dominated by disjoint dependency subtrees in the source language generally do not overlap in the target language. It has been verified to be a useful constraint for word alignment. However, previous work either treats this as a hard constraint or uses it as a feature in discriminative models, which is ineffective for large-scale tasks. In this paper, we take dependency cohesion as a soft constraint, and integrate it into a generative model for large-scale word alignment experiments. We also propose an approximate EM algorithm and a Gibbs sampling algorithm to estimate model parameters in an unsupervised manner. Experiments on large-scale Chinese-English translation tasks demonstrate that our model achieves improvements in both alignment quality and translation quality. 291–300 @@ -294,7 +294,7 @@ Data-driven, <fixed-case>PCFG</fixed-case>-based and Pseudo-<fixed-case>PCFG</fixed-case>-based Models for <fixed-case>C</fixed-case>hinese Dependency Parsing - WeiweiSun + WeiweiSun XiaojunWan 10.1162/tacl_a_00229 We present a comparative study of transition-, graph- and PCFG-based models aimed at illuminating more precisely the likely contribution of CFGs in improving Chinese dependency parsing accuracy, especially by combining heterogeneous models. Inspired by the impact of a constituency grammar on dependency parsing, we propose several strategies to acquire pseudo CFGs only from dependency annotations. Compared to linguistic grammars learned from rich phrase-structure treebanks, well designed pseudo grammars achieve similar parsing accuracy and have equivalent contributions to parser ensemble. Moreover, pseudo grammars increase the diversity of base models; therefore, together with all other models, further improve system combination. Based on automatic POS tagging, our final model achieves a UAS of 87.23%, resulting in a significant improvement of the state of the art. @@ -304,8 +304,8 @@ Parsing entire discourses as very long strings: Capturing topic continuity in grounded language learning - Minh-ThangLuong - Michael C.Frank + Minh-ThangLuong + Michael C.Frank MarkJohnson 10.1162/tacl_a_00230 Grounded language learning, the task of mapping from natural language to a representation of meaning, has attracted more and more interest in recent years. In most work on this topic, however, utterances in a conversation are treated independently and discourse structure information is largely ignored. In the context of language acquisition, this independence assumption discards cues that are important to the learner, e.g., the fact that consecutive utterances are likely to share the same referent (Frank et al., 2013). The current paper describes an approach to the problem of simultaneously modeling grounded language at the sentence and discourse levels. We combine ideas from parsing and grammar induction to produce a parser that can handle long input strings with thousands of tokens, creating parse trees that represent full discourses. By casting grounded language learning as a grammatical inference task, we use our parser to extend the work of Johnson et al. (2012), investigating the importance of discourse continuity in children’s language acquisition and its interaction with social cues. Our model boosts performance in a language acquisition task and yields good discourse segmentations compared with human annotators. @@ -335,7 +335,7 @@ Distributional Semantics Beyond Words: Supervised Learning of Analogy and Paraphrase - Peter D.Turney + Peter D.Turney 10.1162/tacl_a_00233 There have been several efforts to extend distributional semantics beyond individual words, to measure the similarity of word pairs, phrases, and sentences (briefly, tuples; ordered sets of words, contiguous or noncontiguous). One way to extend beyond words is to compare two tuples using a function that combines pairwise similarities between the component words in the tuples. A strength of this approach is that it works with both relational similarity (analogy) and compositional similarity (paraphrase). However, past work required hand-coding the combination function for different tasks. The main contribution of this paper is that combination functions are generated by supervised learning. We achieve state-of-the-art results in measuring relational similarity between word pairs (SAT analogies and SemEval 2012 Task 2) and measuring compositional similarity between noun-modifier phrases and unigrams (multiple-choice paraphrase questions). 353–366 @@ -345,7 +345,7 @@ Modeling Missing Data in Distant Supervision for Information Extraction AlanRitter - LukeZettlemoyer + LukeZettlemoyer Mausam OrenEtzioni 10.1162/tacl_a_00234 @@ -357,7 +357,7 @@ Data-Driven Metaphor Recognition and Explanation HongsongLi - Kenny Q.Zhu + Kenny Q.Zhu HaixunWang 10.1162/tacl_a_00235 Recognizing metaphors and identifying the source-target mappings is an important task as metaphorical text poses a big challenge for machine reading. To address this problem, we automatically acquire a metaphor knowledge base and an isA knowledge base from billions of web pages. Using the knowledge bases, we develop an inference mechanism to recognize and explain the metaphors in the text. To our knowledge, this is the first purely data-driven approach of probabilistic metaphor acquisition, recognition, and explanation. Our results shows that it significantly outperforms other state-of-the-art methods in recognizing and explaining metaphors. @@ -390,10 +390,10 @@ Joint Morphological and Syntactic Analysis for Richly Inflected Languages BerndBohnet JoakimNivre - IgorBoguslavsky - RichárdFarkas + IgorBoguslavsky + RichárdFarkas FilipGinter - JanHajič + JanHajič 10.1162/tacl_a_00238 Joint morphological and syntactic analysis has been proposed as a way of improving parsing accuracy for richly inflected languages. Starting from a transition-based model for joint part-of-speech tagging and dependency parsing, we explore different ways of integrating morphological features into the model. We also investigate the use of rule-based morphological analyzers to provide hard or soft lexical constraints and the use of word clusters to tackle the sparsity of lexical features. Evaluation on five morphologically rich languages (Czech, Finnish, German, Hungarian, and Russian) shows consistent improvements in both morphological and syntactic accuracy for joint prediction over a pipeline model, with further improvements thanks to lexical constraints and word clusters. The final results improve the state of the art in dependency parsing for all languages. 415–428 @@ -405,8 +405,8 @@ AnnIrvine JohnMorgan MarineCarpuat - HalDaumé III - DragosMunteanu + HalDaumé III + DragosMunteanu 10.1162/tacl_a_00239 We develop two techniques for analyzing the effect of porting a machine translation system to a new domain. One is a macro-level analysis that measures how domain shift affects corpus-level evaluation; the second is a micro-level analysis for word-level errors. We apply these methods to understand what happens when a Parliament-trained phrase-based machine translation system is applied in four very different domains: news, medical texts, scientific articles and movie subtitles. We present quantitative and qualitative experiments that highlight opportunities for future research in domain adaptation for machine translation. 429–440 diff --git a/data/xml/Q14.xml b/data/xml/Q14.xml index 41daf276db..f77ae8f20a 100644 --- a/data/xml/Q14.xml +++ b/data/xml/Q14.xml @@ -4,7 +4,7 @@ Transactions of the Association for Computational Linguistics, Volume 2 LinDekang - CollinsMichael + MichaelCollins LeeLillian MIT Press
Cambridge, MA
@@ -19,7 +19,7 @@ Heterogeneous Networks and Their Applications: Scientometrics, Name Disambiguation, and Topic Modeling BenKing RahulJha - Dragomir R.Radev + Dragomir R.Radev 10.1162/tacl_a_00161 We present heterogeneous networks as a way to unify lexical networks with relational data. We build a unified ACL Anthology network, tying together the citation, author collaboration, and term-cooccurence networks with affiliation and venue relations. This representation proves to be convenient and allows problems such as name disambiguation, topic modeling, and the measurement of scientific impact to be easily solved using only this network and off-the-shelf graph algorithms. 1–14 @@ -29,7 +29,7 @@ <fixed-case>FLORS</fixed-case>: Fast and Simple Domain Adaptation for Part-of-Speech Tagging TobiasSchnabel - HinrichSchütze + HinrichSchütze 10.1162/tacl_a_00162 We present FLORS, a new part-of-speech tagger for domain adaptation. FLORS uses robust representations that work especially well for unknown words and for known words with unseen tags. FLORS is simpler and faster than previous domain adaptation methods, yet it has significantly better accuracy than several baselines. 15–26 @@ -41,7 +41,7 @@ Automatic Detection and Language Identification of Multilingual Documents MarcoLui Jey HanLau - TimothyBaldwin + TimothyBaldwin 10.1162/tacl_a_00163 Language identification is the task of automatically detecting the language(s) present in a document based on the content of the document. In this work, we address the problem of detecting documents that contain text from more than one language (multilingual documents). We introduce a method that is able to detect that a document is multilingual, identify the languages present, and estimate their relative proportions. We demonstrate the effectiveness of our method over synthetic data, as well as real-world multilingual documents collected from the web. 27–40 @@ -61,7 +61,7 @@ Cross-lingual Projected Expectation Regularization for Weakly Supervised Learning MengqiuWang - Christopher D.Manning + Christopher D.Manning 10.1162/tacl_a_00165 We consider a multilingual weakly supervised learning scenario where knowledge from annotated corpora in a resource-rich language is transferred via bitext to guide the learning in other languages. Past approaches project labels across bitext and use them as features or gold labels for training. We propose a new method that projects model expectations rather than labels, which facilities transfer of model uncertainty across language boundaries. We encode expectations as constraints and train a discriminative CRF model using Generalized Expectation Criteria (Mann and McCallum, 2010). Evaluated on standard Chinese-English and German-English NER datasets, our method demonstrates F1 scores of 64% and 60% when no labeled data is used. Attaining the same accuracy with supervised CRFs requires 12k and 1.5k labeled sentences. Furthermore, when combined with labeled examples, our method yields significant improvements over state-of-the-art supervised methods, achieving best reported numbers to date on Chinese OntoNotes and German CoNLL-03 datasets. 55–66 @@ -98,7 +98,7 @@ Exploring the Role of Stress in <fixed-case>B</fixed-case>ayesian Word Segmentation using <fixed-case>A</fixed-case>daptor <fixed-case>G</fixed-case>rammars - BenjaminBörschinger + BenjaminBörschinger MarkJohnson 10.1162/tacl_a_00168 Stress has long been established as a major cue in word segmentation for English infants. We show that enabling a current state-of-the-art Bayesian word segmentation model to take advantage of stress cues noticeably improves its performance. We find that the improvements range from 10 to 4%, depending on both the use of phonotactic cues and, to a lesser extent, the amount of evidence available to the learner. We also find that in particular early on, stress cues are much more useful for our model than phonotactic cues by themselves, consistent with the finding that children do seem to use stress cues before they use phonotactic cues. Finally, we study how the model’s knowledge about stress patterns evolves over time. We not only find that our model correctly acquires the most frequent patterns relatively quickly but also that the Unique Stress Constraint that is at the heart of a previously proposed model does not need to be built in but can be acquired jointly with word segmentation. @@ -146,14 +146,14 @@ William F.Styler IV StevenBethard SeanFinan - MarthaPalmer - SameerPradhan + MarthaPalmer + SameerPradhan Piet Cde Groen BradErickson - TimothyMiller + TimothyMiller ChenLin GuerganaSavova - JamesPustejovsky + JamesPustejovsky 10.1162/tacl_a_00172 This article discusses the requirements of a formal specification for the annotation of temporal information in clinical narratives. We discuss the implementation and extension of ISO-TimeML for annotating a corpus of clinical notes, known as the THYME corpus. To reflect the information task and the heavily inference-based reasoning demands in the domain, a new annotation guideline has been developed, “the THYME Guidelines to ISO-TimeML (THYME-TimeML)”. To clarify what relations merit annotation, we distinguish between linguistically-derived and inferentially-derived temporal orderings in the text. We also apply a top performing TempEval 2013 system against this new resource to measure the difficulty of adapting systems to the clinical domain. The corpus is available to the community and has been proposed for use in a SemEval 2015 task. 143–154 @@ -182,7 +182,7 @@ MirjamSimantzik GrahamNeubig SatoshiNakamura - AlexWaibel + AlexWaibel 10.1162/tacl_a_00174 In this paper, we study the problem of manually correcting automatic annotations of natural language in as efficient a manner as possible. We introduce a method for automatically segmenting a corpus into chunks such that many uncertain labels are grouped into the same chunk, while human supervision can be omitted altogether for other segments. A tradeoff must be found for segment sizes. Choosing short segments allows us to reduce the number of highly confident labels that are supervised by the annotator, which is useful because these labels are often already correct and supervising correct labels is a waste of effort. In contrast, long segments reduce the cognitive effort due to context switches. Our method helps find the segmentation that optimizes supervision efficiency by defining user models to predict the cost and utility of supervising each segment and solving a constrained optimization problem balancing these contradictory objectives. A user study demonstrates noticeable gains over pre-segmented, confidence-ordered baselines on two natural language processing tasks: speech transcription and word segmentation. 169–180 @@ -194,9 +194,9 @@ Dynamic Language Models for Streaming Text DaniYogatama ChongWang - Bryan R.Routledge - Noah A.Smith - Eric P.Xing + Bryan R.Routledge + Noah A.Smith + Eric P.Xing 10.1162/tacl_a_00175 We present a probabilistic language model that captures temporal dynamics and conditions on arbitrary non-linguistic context features. These context features serve as important indicators of language changes that are otherwise difficult to capture using text data by itself. We learn our model in an efficient online fashion that is scalable for large, streaming data. With five streaming datasets from two different genres—economics news articles and social media—we evaluate our model on the task of sequential language modeling. Our model consistently outperforms competing models. 181–192 @@ -207,8 +207,8 @@ Discriminative Lexical Semantic Segmentation with Gaps: Running the <fixed-case>MWE</fixed-case> Gamut NathanSchneider EmilyDanchik - ChrisDyer - Noah A.Smith + ChrisDyer + Noah A.Smith 10.1162/tacl_a_00176 We present a novel representation, evaluation measure, and supervised models for the task of identifying the multiword expressions (MWEs) in a sentence, resulting in a lexical semantic segmentation. Our approach generalizes a standard chunking representation to encode MWEs containing gaps, thereby enabling efficient sequence tagging algorithms for feature-rich discriminative models. Experiments on a new dataset of English web text offer the first linguistically-driven evaluation of MWE identification with truly heterogeneous expression types. Our statistical sequence model greatly outperforms a lookup-based segmentation procedure, achieving nearly 60% F1 for MWE identification. 193–206 @@ -219,9 +219,9 @@ Grounded Compositional Semantics for Finding and Describing Images with Sentences RichardSocher AndrejKarpathy - Quoc V.Le - Christopher D.Manning - Andrew Y.Ng + Quoc V.Le + Christopher D.Manning + Andrew Y.Ng 10.1162/tacl_a_00177 Previous work on Recursive Neural Networks (RNNs) shows that these models can produce compositional feature vectors for accurately representing and classifying sentences or images. However, the sentence vectors of previous models cannot accurately represent visually grounded meaning. We introduce the DT-RNN model which uses dependency trees to embed sentences into a vector space in order to retrieve images that are described by those sentences. Unlike previous RNN-based models which use constituency trees, DT-RNNs naturally focus on the action and agents in a sentence. They are better able to abstract from the details of word order and syntactic expression. DT-RNNs outperform other recursive and recurrent neural networks, kernelized CCA and a bag-of-words baseline on the tasks of finding an image that fits a sentence description and vice versa. They also give more similar representations to sentences that describe the same image. 207–218 @@ -230,7 +230,7 @@ Back to Basics for Monolingual Alignment: Exploiting Word Similarity and Contextual Evidence - Md ArafatSultan + Md ArafatSultan StevenBethard TamaraSumner 10.1162/tacl_a_00178 @@ -254,7 +254,7 @@ Crosslingual and Multilingual Construction of Syntax-Based Vector Space Models JasonUtt - SebastianPadó + SebastianPadó 10.1162/tacl_a_00180 Syntax-based distributional models of lexical semantics provide a flexible and linguistically adequate representation of co-occurrence information. However, their construction requires large, accurately parsed corpora, which are unavailable for most languages. In this paper, we develop a number of methods to overcome this obstacle. We describe (a) a crosslingual approach that constructs a syntax-based model for a new language requiring only an English resource and a translation lexicon; and (b) multilingual approaches that combine crosslingual with monolingual information, subject to availability. We evaluate on two lexical semantic benchmarks in German and Croatian. We find that the models exhibit complementary profiles: crosslingual models yield higher accuracies while monolingual models provide better coverage. In addition, we show that simple multilingual models can successfully combine their strengths. 245–258 @@ -273,7 +273,7 @@ Dense Event Ordering with a Multi-Pass Architecture - NathanaelChambers + NathanaelChambers TaylorCassidy BillMcDowell StevenBethard @@ -308,7 +308,7 @@ The Benefits of a Model of Annotation - Rebecca J.Passonneau + Rebecca J.Passonneau BobCarpenter 10.1162/tacl_a_00185 Standard agreement measures for interannotator reliability are neither necessary nor sufficient to ensure a high quality corpus. In a case study of word sense annotation, conventional methods for evaluating labels from trained annotators are contrasted with a probabilistic annotation model applied to crowdsourced data. The annotation model provides far more information, including a certainty measure for each gold standard label; the crowdsourced data was collected at less than half the cost of the conventional approach. @@ -320,7 +320,7 @@ Improved <fixed-case>CCG</fixed-case> Parsing with Semi-supervised Supertagging MikeLewis - MarkSteedman + MarkSteedman 10.1162/tacl_a_00186 Current supervised parsers are limited by the size of their labelled training data, making improving them with unlabelled data an important goal. We show how a state-of-the-art CCG parser can be enhanced, by predicting lexical categories using unsupervised vector-space embeddings of words. The use of word embeddings enables our model to better generalize from the labelled data, and allows us to accurately assign lexical categories without depending on a POS-tagger. Our approach leads to substantial improvements in dependency parsing results over the standard supervised CCG parser when evaluated on Wall Street Journal (0.8%), Wikipedia (1.8%) and biomedical (3.4%) text. We compare the performance of two recently proposed approaches for classification using a wide variety of word embeddings. We also give a detailed error analysis demonstrating where using embeddings outperforms traditional feature sets, and showing how including POS features can decrease accuracy. 327–338 @@ -341,7 +341,7 @@ <fixed-case>T</fixed-case>ree<fixed-case>T</fixed-case>alk: Composition and Compression of Trees for Image Descriptions PolinaKuznetsova VicenteOrdonez - Tamara L.Berg + Tamara L.Berg YejinChoi 10.1162/tacl_a_00188 We present a new tree based approach to composing expressive image descriptions that makes use of naturally occuring web images with captions. We investigate two related tasks: image caption generalization and generation, where the former is an optional subtask of the latter. The high-level idea of our approach is to harvest expressive phrases (as tree fragments) from existing image descriptions, then to compose a new description by selectively combining the extracted (and optionally pruned) tree fragments. Key algorithmic components are tree composition and compression, both integrating tree structure with sequence structure. Our proposed system attains significantly better performance than previous approaches for both image caption generalization and generation. In addition, our work is the first to show the empirical benefit of automatically generalized captions for composing natural image descriptions. @@ -352,7 +352,7 @@ Unsupervised Discovery of Biographical Structure from Text DavidBamman - Noah A.Smith + Noah A.Smith 10.1162/tacl_a_00189 We present a method for discovering abstract event classes in biographies, based on a probabilistic latent-variable model. Taking as input timestamped text, we exploit latent correlations among events to learn a set of event classes (such as Born, Graduates High School, and Becomes Citizen), along with the typical times in a person’s life when those events occur. In a quantitative evaluation at the task of predicting a person’s age for a given event, we find that our generative model outperforms a strong linear regression baseline, along with simpler variants of the model that ablate some features. The abstract event classes that we learn allow us to perform a large-scale analysis of 242,970 Wikipedia biographies. Though it is known that women are greatly underrepresented on Wikipedia—not only as editors (Wikipedia, 2011) but also as subjects of articles (Reagle and Rhue, 2011)—we find that there is a bias in their characterization as well, with biographies of women containing significantly more emphasis on events of marriage and divorce than biographies of men. 363–376 @@ -363,7 +363,7 @@ Large-scale Semantic Parsing without Question-Answer Pairs SivaReddy MirellaLapata - MarkSteedman + MarkSteedman 10.1162/tacl_a_00190 In this paper we introduce a novel semantic parsing approach to query Freebase in natural language without requiring manual annotations or question-answer pairs. Our key insight is to represent natural language via semantic graphs whose topology shares many commonalities with Freebase. Given this representation, we conceptualize semantic parsing as a graph matching problem. Our model converts sentences to semantic graphs using CCG and subsequently grounds them to Freebase guided by denotations as a form of weak supervision. Evaluation experiments on a subset of the Free917 and WebQuestions benchmark datasets show our semantic parser improves over the state of the art. 377–392 @@ -372,9 +372,9 @@ Locally Non-Linear Learning for Statistical Machine Translation via Discretization and Structured Regularization - Jonathan H.Clark - ChrisDyer - AlonLavie + Jonathan H.Clark + ChrisDyer + AlonLavie 10.1162/tacl_a_00191 Linear models, which support efficient learning and inference, are the workhorses of statistical machine translation; however, linear decision rules are less attractive from a modeling perspective. In this work, we introduce a technique for learning arbitrary, rule-local, non-linear feature transforms that improve model expressivity, but do not sacrifice the efficient inference and learning associated with linear models. To demonstrate the value of our technique, we discard the customary log transform of lexical probabilities and drop the phrasal translation probability in favor of raw counts. We observe that our algorithm learns a variation of a log transform that leads to better translation quality compared to the explicit log transform. We conclude that non-linear responses play an important role in SMT, an observation that we hope will inform the efforts of feature engineers. 393–404 @@ -406,7 +406,7 @@ WeiXu AlanRitter ChrisCallison-Burch - William B.Dolan + William B.Dolan YangfengJi 10.1162/tacl_a_00194 We present MultiP (Multi-instance Learning Paraphrase Model), a new model suited to identify paraphrases within the short messages on Twitter. We jointly model paraphrase relations between word and sentence pairs and assume only sentence-level annotations during learning. Using this principled latent variable model alone, we achieve the performance competitive with a state-of-the-art method which combines a latent space model with a feature-based supervised classifier. Our model also captures lexically divergent paraphrases that differ from yet complement previous methods; combining our model with previous work significantly outperforms the state-of-the-art. In addition, we present a novel annotation methodology that has allowed us to crowdsource a paraphrase corpus from Twitter. We make this new dataset available to the research community. @@ -429,7 +429,7 @@ Online <fixed-case>A</fixed-case>daptor <fixed-case>G</fixed-case>rammars with Hybrid Inference KeZhai JordanBoyd-Graber - Shay B.Cohen + Shay B.Cohen 10.1162/tacl_a_00196 Adaptor grammars are a flexible, powerful formalism for defining nonparametric, unsupervised models of grammar productions. This flexibility comes at the cost of expensive inference. We address the difficulty of inference through an online algorithm which uses a hybrid of Markov chain Monte Carlo and variational inference. We show that this inference strategy improves scalability without sacrificing performance on unsupervised word segmentation and topic modeling tasks. 465–476 @@ -460,7 +460,7 @@ Joint Modeling of Opinion Expression Extraction and Attribute Classification BishanYang - ClaireCardie + ClaireCardie 10.1162/tacl_a_00199 In this paper, we study the problems of opinion expression extraction and expression-level polarity and intensity classification. Traditional fine-grained opinion analysis systems address these problems in isolation and thus cannot capture interactions among the textual spans of opinion expressions and their opinion-related properties. We present two types of joint approaches that can account for such interactions during 1) both learning and inference or 2) only during inference. Extensive experiments on a standard dataset demonstrate that our approaches provide substantial improvements over previously published results. By analyzing the results, we gain some insight into the advantages of different joint models. 505–516 diff --git a/data/xml/Q15.xml b/data/xml/Q15.xml index d891b5446d..6267f79918 100644 --- a/data/xml/Q15.xml +++ b/data/xml/Q15.xml @@ -3,7 +3,7 @@ Transactions of the Association for Computational Linguistics, Volume 3 - CollinsMichael + MichaelCollins LeeLillian MIT Press
Cambridge, MA
@@ -48,7 +48,7 @@
<fixed-case>S</fixed-case>prite: Generalizing Topic Models with Structured Priors - Michael J.Paul + Michael J.Paul MarkDredze 10.1162/tacl_a_00121 We introduce Sprite, a family of topic models that incorporates structure into model priors as a function of underlying components. The structured priors can be constrained to model topic hierarchies, factorizations, correlations, and supervision, allowing Sprite to be tailored to particular settings. We demonstrate this flexibility by constructing a Sprite-based model to jointly infer topic hierarchies and author perspective, which we apply to corpora of political debates and online reviews. We show that the model learns intuitive topics, outperforming several other topic models at predictive tasks. @@ -62,7 +62,7 @@ MohitBansal KevinGimpel Brian D.Ziebart - Clement T.Yu + Clement T.Yu 10.1162/tacl_a_00122 Word sense induction (WSI) seeks to automatically discover the senses of a word in a corpus via unsupervised methods. We propose a sense-topic model for WSI, which treats sense and topic as two separate latent variables to be inferred jointly. Topics are informed by the entire document, while senses are informed by the local context surrounding the ambiguous word. We also discuss unsupervised ways of enriching the original corpus in order to improve model performance, including using neural word embeddings and external corpora to expand the context of each data instance. We demonstrate significant improvements over the previous state-of-the-art, achieving the best results reported to date on the SemEval-2013 WSI task. 59–71 @@ -104,8 +104,8 @@ Exploiting Parallel News Streams for Unsupervised Event Extraction CongleZhang - StephenSoderland - Daniel S.Weld + StephenSoderland + Daniel S.Weld 10.1162/tacl_a_00127 Most approaches to relation extraction, the task of extracting ground facts from natural language text, are based on machine learning and thus starved by scarce training data. Manual annotation is too expensive to scale to a comprehensive set of relations. Distant supervision, which automatically creates training data, only works with relations that already populate a knowledge base (KB). Unfortunately, KBs such as FreeBase rarely cover event relations (e.g. “person travels to location”). Thus, the problem of extracting a wide range of events — e.g., from news streams — is an important, open challenge. This paper introduces NewsSpike-RE, a novel, unsupervised algorithm that discovers event relations and then learns to extract them. NewsSpike-RE uses a novel probabilistic graphical model to cluster sentences describing similar events from parallel news streams. These clusters then comprise training data for the extractor. Our evaluation shows that NewsSpike-RE generates high quality training sentences and learns extractors that perform much better than rival approaches, more than doubling the area under a precision-recall curve compared to Universal Schemas. 117–129 @@ -157,9 +157,9 @@ From Visual Attributes to Adjectives through Decompositional Distributional Semantics AngelikiLazaridou - GeorgianaDinu - AdamLiska - MarcoBaroni + GeorgianaDinu + AdamLiska + MarcoBaroni 10.1162/tacl_a_00132 As automated image analysis progresses, there is increasing interest in richer linguistic annotation of pictures, with attributes of objects (e.g., furry, brown…) attracting most attention. By building on the recent “zero-shot learning” approach, and paying attention to the linguistic nature of attributes as noun modifiers, and specifically adjectives, we show that it is possible to tag images with attribute-denoting adjectives even when no training data containing the relevant annotation are available. Our approach relies on two key observations. First, objects can be seen as bundles of attributes, typically expressed as adjectival modifiers (a dog is something furry, brown, etc.), and thus a function trained to map visual representations of objects to nominal labels can implicitly learn to map attributes to adjectives. Second, objects and attributes come together in pictures (the same thing is a dog and it is brown). We can thus achieve better attribute (and object) label retrieval by treating images as “visual phrases”, and decomposing their linguistic representation into an attribute-denoting adjective and an object-denoting noun. Our approach performs comparably to a method exploiting manual attribute annotation, it out-performs various competitive alternatives in both attribute and object annotation, and it automatically constructs attribute-centric representations that significantly improve performance in supervised object recognition. 183–196 @@ -169,8 +169,8 @@ Higher-order Lexical Semantic Models for Non-factoid Answer Reranking DanielFried - PeterJansen - GustaveHahn-Powell + PeterJansen + GustaveHahn-Powell MihaiSurdeanu PeterClark 10.1162/tacl_a_00133 @@ -203,8 +203,8 @@ Combining Minimally-supervised Methods for <fixed-case>A</fixed-case>rabic Named Entity Recognition MahaAlthobaiti - UdoKruschwitz - MassimoPoesio + UdoKruschwitz + MassimoPoesio 10.1162/tacl_a_00136 Supervised methods can achieve high performance on NLP tasks, such as Named Entity Recognition (NER), but new annotations are required for every new domain and/or genre change. This has motivated research in minimally supervised methods such as semi-supervised learning and distant learning, but neither technique has yet achieved performance levels comparable to those of supervised methods. Semi-supervised methods tend to have very high precision but comparatively low recall, whereas distant learning tends to achieve higher recall but lower precision. This complementarity suggests that better results may be obtained by combining the two types of minimally supervised methods. In this paper we present a novel approach to Arabic NER using a combination of semi-supervised and distant learning techniques. We trained a semi-supervised NER classifier and another one using distant learning techniques, and then combined them using a variety of classifier combination schemes, including the Bayesian Classifier Combination (BCC) procedure recently proposed for sentiment analysis. According to our results, the BCC model leads to an increase in performance of 8 percentage points over the best base classifiers. 243–255 @@ -214,7 +214,7 @@ Learning a Compositional Semantics for <fixed-case>F</fixed-case>reebase with an Open Predicate Vocabulary JayantKrishnamurthy - Tom M.Mitchell + Tom M.Mitchell 10.1162/tacl_a_00137 We present an approach to learning a model-theoretic semantics for natural language tied to Freebase. Crucially, our approach uses an open predicate vocabulary, enabling it to produce denotations for phrases such as “Republican front-runner from Texas” whose semantics cannot be represented using the Freebase schema. Our approach directly converts a sentence’s syntactic CCG parse into a logical form containing predicates derived from the words in the sentence, assigning each word a consistent semantics across sentences. This logical form is evaluated against a learned probabilistic database that defines a distribution over denotations for each textual predicate. A training phase produces this probabilistic database using a corpus of entity-linked text and probabilistic matrix factorization with a novel ranking objective function. We evaluate our approach on a compositional question answering task where it outperforms several competitive baselines. We also compare our approach against manually annotated Freebase queries, finding that our open predicate vocabulary enables us to answer many questions that Freebase cannot. 257–270 @@ -225,7 +225,7 @@ Domain Adaptation for Syntactic and Semantic Dependency Parsing Using Deep Belief Networks HaitongYang TaoZhuang - ChengqingZong + ChengqingZong 10.1162/tacl_a_00138 In current systems for syntactic and semantic dependency parsing, people usually define a very high-dimensional feature space to achieve good performance. But these systems often suffer severe performance drops on out-of-domain test data due to the diversity of features of different domains. This paper focuses on how to relieve this domain adaptation problem with the help of unlabeled target domain data. We propose a deep learning method to adapt both syntactic and semantic parsers. With additional unlabeled target domain data, our method can learn a latent feature representation (LFR) that is beneficial to both domains. Experiments on English data in the CoNLL 2009 shared task show that our method largely reduced the performance drop on out-of-domain test data. Moreover, we get a Macro F1 score that is 2.32 points higher than the best system in the CoNLL 2009 shared task in out-of-domain tests. 271–282 @@ -264,7 +264,7 @@ Design Challenges for Entity Linking XiaoLing SameerSingh - Daniel S.Weld + Daniel S.Weld 10.1162/tacl_a_00141 Recent research on entity linking (EL) has introduced a plethora of promising techniques, ranging from deep neural networks to joint inference. But despite numerous papers there is surprisingly little understanding of the state of the art in EL. We attack this confusion by analyzing differences between several versions of the EL problem and presenting a simple yet effective, modular, unsupervised system, called Vinculum, for entity linking. We conduct an extensive evaluation on nine data sets, comparing Vinculum with two state-of-the-art systems, and elucidate key aspects of the system that include mention extraction, candidate generation, entity type prediction, entity coreference, and coherence. 315–328 @@ -301,7 +301,7 @@ A Graph-based Lattice Dependency Parser for Joint Morphological Segmentation and Syntactic Analysis WolfgangSeeker - ÖzlemÇetinoğlu + ÖzlemÇetinoğlu 10.1162/tacl_a_00144 Space-delimited words in Turkish and Hebrew text can be further segmented into meaningful units, but syntactic and semantic context is necessary to predict segmentation. At the same time, predicting correct syntactic structures relies on correct segmentation. We present a graph-based lattice dependency parser that operates on morphological lattices to represent different segmentations and morphological analyses for a given input sentence. The lattice parser predicts a dependency tree over a path in the lattice and thus solves the joint task of segmentation, morphological analysis, and syntactic parsing. We conduct experiments on the Turkish and the Hebrew treebank and show that the joint model outperforms three state-of-the-art pipeline systems on both data sets. Our work corroborates findings from constituency lattice parsing for Hebrew and presents the first results for full lattice parsing on Turkish. 359–373 @@ -310,9 +310,9 @@ Deriving <fixed-case>B</fixed-case>oolean structures from distributional vectors - GermanKruszewski + GermanKruszewski DenisPaperno - MarcoBaroni + MarcoBaroni 10.1162/tacl_a_00145 Corpus-based distributional semantic models capture degrees of semantic relatedness among the words of very large vocabularies, but have problems with logical phenomena such as entailment, that are instead elegantly handled by model-theoretic approaches, which, in turn, do not scale up. We combine the advantages of the two views by inducing a mapping from distributional vectors of words (or sentences) into a Boolean structure of the kind in which natural language terms are assumed to denote. We evaluate this Boolean Distributional Semantic Model (BDSM) on recognizing entailment between words and sentences. The method achieves results comparable to a state-of-the-art SVM, degrades more gracefully when less training data are available and displays interesting qualitative properties. 375–388 @@ -322,9 +322,9 @@ Unsupervised Lexicon Discovery from Acoustic Input - Chia-yingLee - Timothy J.O’Donnell - JamesGlass + Chia-yingLee + Timothy J.O’Donnell + JamesGlass 10.1162/tacl_a_00146 We present a model of unsupervised phonological lexicon discovery—the problem of simultaneously learning phoneme-like and word-like units from acoustic input. Our model builds on earlier models of unsupervised phone-like unit discovery from acoustic data (Lee and Glass, 2012), and unsupervised symbolic lexicon discovery using the Adaptor Grammar framework (Johnson et al., 2006), integrating these earlier approaches using a probabilistic model of phonological variation. We show that the model is competitive with state-of-the-art spoken term discovery systems, and present analyses exploring the model’s behavior and the kinds of linguistic structures it learns. 389–403 @@ -357,7 +357,7 @@ Modeling Word Forms Using Latent Underlying Morphs and Phonology RyanCotterell NanyunPeng - JasonEisner + JasonEisner 10.1162/tacl_a_00149 The observed pronunciations or spellings of words are often explained as arising from the “underlying forms” of their morphemes. These forms are latent strings that linguists try to reconstruct by hand. We propose to reconstruct them automatically at scale, enabling generalization to new words. Given some surface word types of a concatenative language along with the abstract morpheme sequences that they express, we show how to recover consistent underlying forms for these morphemes, together with the (stochastic) phonology that maps each concatenation of underlying forms to a surface form. Our technique involves loopy belief propagation in a natural directed graphical model whose variables are unknown strings and whose conditional distributions are encoded as finite-state machines with trainable weights. We define training and evaluation paradigms for the task of surface word prediction, and report results on subsets of 7 languages. 433–447 @@ -376,8 +376,8 @@ Learning Structural Kernels for Natural Language Processing - DanielBeck - TrevorCohn + DanielBeck + TrevorCohn ChristianHardmeier LuciaSpecia 10.1162/tacl_a_00151 @@ -404,9 +404,9 @@ Approximation-Aware Dependency Parsing by Belief Propagation - Matthew R.Gormley + Matthew R.Gormley MarkDredze - JasonEisner + JasonEisner 10.1162/tacl_a_00153 We show how to train the fast dependency parser of Smith and Eisner (2008) for improved accuracy. This parser can consider higher-order interactions among edges while retaining O(n3) runtime. It outputs the parse with maximum expected recall—but for speed, this expectation is taken under a posterior distribution that is constructed only approximately, using loopy belief propagation through structured factors. We show how to adjust the model parameters to compensate for the errors introduced by this approximation, by following the gradient of the actual loss on training data. We find this gradient by back-propagation. That is, we treat the entire parser (approximations and all) as a differentiable circuit, as others have done for loopy CRFs (Domke, 2010; Stoyanov et al., 2011; Domke, 2011; Stoyanov and Eisner, 2012). The resulting parser obtains higher accuracy with fewer iterations of belief propagation than one trained by conditional log-likelihood. 489–501 @@ -417,7 +417,7 @@ <fixed-case>P</fixed-case>lato: A Selective Context Model for Entity Resolution NevenaLazic - AmarnagSubramanya + AmarnagSubramanya MichaelRinggaard FernandoPereira 10.1162/tacl_a_00154 @@ -429,7 +429,7 @@ A Hierarchical Distance-dependent <fixed-case>B</fixed-case>ayesian Model for Event Coreference Resolution BishanYang - ClaireCardie + ClaireCardie PeterFrazier 10.1162/tacl_a_00155 We present a novel hierarchical distance-dependent Bayesian model for event coreference resolution. While existing generative models for event coreference resolution are completely unsupervised, our model allows for the incorporation of pairwise distances between event mentions — information that is widely used in supervised coreference models to guide the generative clustering processing for better event clustering both within and across documents. We model the distances between event mentions using a feature-rich learnable distance function and encode them as Bayesian priors for nonparametric clustering. Experiments on the ECB+ corpus show that our model outperforms state-of-the-art methods for both within- and cross-document event coreference resolution. @@ -473,7 +473,7 @@ PhilipArthur GrahamNeubig SakrianiSakti - TomokiToda + TomokiToda SatoshiNakamura 10.1162/tacl_a_00159 We propose a new method for semantic parsing of ambiguous and ungrammatical input, such as search queries. We do so by building on an existing semantic parsing framework that uses synchronous context free grammars (SCFG) to jointly model the input sentence and output meaning representation. We generalize this SCFG framework to allow not one, but multiple outputs. Using this formalism, we construct a grammar that takes an ambiguous input string and jointly maps it into both a meaning representation and a natural language paraphrase that is less ambiguous than the original input. This paraphrase can be used to disambiguate the meaning representation via verification using a language model that calculates the probability of each paraphrase. @@ -483,7 +483,7 @@ Parsing Algebraic Word Problems into Equations - RikKoncel-Kedziorski + RikKoncel-Kedziorski HannanehHajishirzi AshishSabharwal OrenEtzioni diff --git a/data/xml/Q16.xml b/data/xml/Q16.xml index a54bbd369b..187dab1e18 100644 --- a/data/xml/Q16.xml +++ b/data/xml/Q16.xml @@ -64,7 +64,7 @@ An Empirical Analysis of Formality in Online Communication ElliePavlick - JoelTetreault + JoelTetreault 10.1162/tacl_a_00083 This paper presents an empirical study of linguistic formality. We perform an analysis of humans’ perceptions of formality in four different genres. These findings are used to develop a statistical model for predicting formality, which is evaluated under different feature settings and genres. We apply our model to an investigation of formality in online discussion forums, and present findings consistent with theories of formality and linguistic coordination. 61–74 @@ -96,7 +96,7 @@ Adapting to All Domains at Once: Rewarding Domain Invariance in <fixed-case>SMT</fixed-case> HoangCuong - KhalilSima’an + KhalilSima’an IvanTitov 10.1162/tacl_a_00086 Existing work on domain adaptation for statistical machine translation has consistently assumed access to a small sample from the test distribution (target domain) at training time. In practice, however, the target domain may not be known at training time or it may change to match user needs. In such situations, it is natural to push the system to make safer choices, giving higher preference to domain-invariant translations, which work well across domains, over risky domain-specific alternatives. We encode this intuition by (1) inducing latent subdomains from the training data only; (2) introducing features which measure how specialized phrases are to individual induced sub-domains; (3) estimating feature weights on out-of-domain data (rather than on the target domain). We conduct experiments on three language pairs and a number of different domains. We observe consistent improvements over a baseline which does not explicitly reward domain invariance. @@ -106,9 +106,9 @@ A Joint Model for Answer Sentence Ranking and Answer Extraction - Md ArafatSultan + Md ArafatSultan VittorioCastelli - RaduFlorian + RaduFlorian 10.1162/tacl_a_00087 Answer sentence ranking and answer extraction are two key challenges in question answering that have traditionally been treated in isolation, i.e., as independent tasks. In this article, we (1) explain how both tasks are related at their core by a common quantity, and (2) propose a simple and intuitive joint probabilistic model that addresses both via joint computation but task-specific application of that quantity. In our experiments with two TREC datasets, our joint model substantially outperforms state-of-the-art systems in both tasks. 113–125 @@ -119,10 +119,10 @@ Transforming Dependency Structures to Logical Forms for Semantic Parsing SivaReddy OscarTäckström - MichaelCollins + MichaelCollins TomKwiatkowski DipanjanDas - MarkSteedman + MarkSteedman MirellaLapata 10.1162/tacl_a_00088 The strongly typed syntax of grammar formalisms such as CCG, TAG, LFG and HPSG offers a synchronous framework for deriving syntactic structures and semantic logical forms. In contrast—partly due to the lack of a strong type system—dependency structures are easy to annotate and have become a widely used form of syntactic analysis for many languages. However, the lack of a type system makes a formal mechanism for deriving logical forms from dependency structures challenging. We address this by introducing a robust system based on the lambda calculus for deriving neo-Davidsonian logical forms from dependency trees. These logical forms are then used for semantic parsing of natural language to Freebase. Experiments on the Free917 and Web-Questions datasets show that our representation is superior to the original dependency trees and that it outperforms a CCG-based representation on this task. Compared to prior work, we obtain the strongest result to date on Free917 and competitive results on WebQuestions. @@ -155,7 +155,7 @@ KeisukeSakaguchi CourtneyNapoles MattPost - JoelTetreault + JoelTetreault 10.1162/tacl_a_00091 The field of grammatical error correction (GEC) has grown substantially in recent years, with research directed at both evaluation metrics and improved system performance against those metrics. One unvisited assumption, however, is the reliance of GEC evaluation on error-coded corpora, which contain specific labeled corrections. We examine current practices and show that GEC’s reliance on such corpora unnaturally constrains annotation and automatic evaluation, resulting in (a) sentences that do not sound acceptable to native speakers and (b) system rankings that do not correlate with human judgments. In light of this, we propose an alternate approach that jettisons costly error coding in favor of unannotated, whole-sentence rewrites. We compare the performance of existing metrics over different gold-standard annotations, and show that automatic evaluation with our new annotation scheme has very strong correlation with expert rankings (ρ = 0.82). As a result, we advocate for a fundamental and necessary shift in the goal of GEC, from correcting small, labeled error types, to producing text that has native fluency. 169–182 @@ -207,7 +207,7 @@ Unsupervised Part-Of-Speech Tagging with Anchor Hidden <fixed-case>M</fixed-case>arkov Models KarlStratos - MichaelCollins + MichaelCollins DanielHsu 10.1162/tacl_a_00096 We tackle unsupervised part-of-speech (POS) tagging by learning hidden Markov models (HMMs) that are particularly well-suited for the problem. These HMMs, which we call anchor HMMs, assume that each tag is associated with at least one word that can have no other tag, which is a relatively benign condition for POS tagging (e.g., “the” is a word that appears only under the determiner tag). We exploit this assumption and extend the non-negative matrix factorization framework of Arora et al. (2013) to design a consistent estimator for anchor HMMs. In experiments, our algorithm is competitive with strong baselines such as the clustering method of Brown et al. (1992) and the log-linear model of Berg-Kirkpatrick et al. (2010). Furthermore, it produces an interpretable model in which hidden states are automatically lexicalized by words. @@ -218,7 +218,7 @@ <fixed-case>ABCNN</fixed-case>: Attention-Based Convolutional Neural Network for Modeling Sentence Pairs WenpengYin - HinrichSchütze + HinrichSchütze BingXiang BowenZhou 10.1162/tacl_a_00097 @@ -230,7 +230,7 @@ Word Embeddings as Metric Recovery in Semantic Spaces - Tatsunori B.Hashimoto + Tatsunori B.Hashimoto DavidAlvarez-Melis Tommi S.Jaakkola 10.1162/tacl_a_00098 @@ -251,12 +251,12 @@ Multilingual Projection for Parsing Truly Low-Resource Languages - ŽeljkoAgić - AndersJohannsen - BarbaraPlank - HéctorMartínez Alonso + ŽeljkoAgić + AndersJohannsen + BarbaraPlank + HéctorMartínez Alonso NatalieSchluter - AndersSøgaard + AndersSøgaard 10.1162/tacl_a_00100 We propose a novel approach to cross-lingual part-of-speech tagging and dependency parsing for truly low-resource languages. Our annotation projection-based approach yields tagging and parsing models for over 100 languages. All that is needed are freely available parallel texts, and taggers and parsers for resource-rich languages. The empirical evaluation across 30 test languages shows that our method consistently provides top-level accuracies, close to established upper bounds, and outperforms several competitive baselines. 301–312 @@ -349,7 +349,7 @@ Encoding Prior Knowledge with Eigenword Embeddings DominiqueOsborne ShashiNarayan - Shay B.Cohen + Shay B.Cohen 10.1162/tacl_a_00108 Canonical correlation analysis (CCA) is a method for reducing the dimension of data represented using two views. It has been previously used to derive word embeddings, where one view indicates a word, and the other view indicates its context. We describe a way to incorporate prior knowledge into CCA, give a theoretical justification for it, and test it by deriving word embeddings and evaluating them on a myriad of datasets. 417–430 @@ -361,8 +361,8 @@ WaleedAmmar GeorgeMulcaire MiguelBallesteros - ChrisDyer - Noah A.Smith + ChrisDyer + Noah A.Smith 10.1162/tacl_a_00109 We train one multilingual model for dependency parsing and use it to parse sentences in several languages. The parsing model uses (i) multilingual word clusters and embeddings; (ii) token-level language information; and (iii) language-specific features (fine-grained POS tags). This input representation enables the parser not only to parse effectively in multiple languages, but also to generalize across languages based on linguistic universals and typological similarities, making it more effective to learn from limited annotations. Our parser’s performance compares favorably to strong baselines in a range of data scenarios, including when the target language has a large treebank, a small treebank, or no treebank for training. 431–444 @@ -396,8 +396,8 @@ Fast, Small and Exact: Infinite-order Language Modelling with Compressed Suffix Trees EhsanShareghi MatthiasPetri - GholamrezaHaffari - TrevorCohn + GholamrezaHaffari + TrevorCohn 10.1162/tacl_a_00112 Efficient methods for storing and querying are critical for scaling high-order m-gram language models to large corpora. We propose a language model based on compressed suffix trees, a representation that is highly compact and can be easily held in memory, while supporting queries needed in computing language model probabilities on-the-fly. We present several optimisations which improve query runtimes up to 2500×, despite only incurring a modest increase in construction time and memory usage. For large corpora and high Markov orders, our method is highly competitive with the state-of-the-art KenLM package. It imposes much lower memory requirements, often by orders of magnitude, and has runtimes that are either similar (for training) or comparable (for querying). 477–490 @@ -408,7 +408,7 @@ The Galactic Dependencies Treebanks: Getting More Data by Synthesizing New Languages DingquanWang - JasonEisner + JasonEisner 10.1162/tacl_a_00113 We release Galactic Dependencies 1.0—a large set of synthetic languages not found on Earth, but annotated in Universal Dependencies format. This new resource aims to provide training and development data for NLP methods that aim to adapt to unfamiliar languages. Each synthetic treebank is produced from a real treebank by stochastically permuting the dependents of nouns and/or verbs to match the word order of other real languages. We discuss the usefulness, realism, parsability, perplexity, and diversity of the synthetic languages. As a simple demonstration of the use of Galactic Dependencies, we consider single-source transfer, which attempts to parse a real target language using a parser trained on a “nearby” source language. We find that including synthetic source languages somewhat increases the diversity of the source pool, which significantly improves results for most target languages. 491–505 @@ -419,7 +419,7 @@ Minimally Supervised Number Normalization KyleGorman - RichardSproat + RichardSproat 10.1162/tacl_a_00114 We propose two models for verbalizing numbers, a key component in speech recognition and synthesis systems. The first model uses an end-to-end recurrent neural network. The second model, drawing inspiration from the linguistics literature, uses finite-state transducers constructed with a minimal amount of training data. While both models achieve near-perfect performance, the latter model can be trained using several orders of magnitude less data than the former, making it particularly useful for low-resource languages. 507–519 @@ -451,9 +451,9 @@ Utilizing Temporal Information for Taxonomy Construction - Luu AnhTuan + Luu AnhTuan Siu CheungHui - See KiongNg + See KiongNg 10.1162/tacl_a_00117 Taxonomies play an important role in many applications by organizing domain knowledge into a hierarchy of ‘is-a’ relations between terms. Previous work on automatic construction of taxonomies from text documents either ignored temporal information or used fixed time periods to discretize the time series of documents. In this paper, we propose a time-aware method to automatically construct and effectively maintain a taxonomy from a given series of documents preclustered for a domain of interest. The method extracts temporal information from the documents and uses a timestamp contribution function to score the temporal relevance of the evidence from source texts when identifying the taxonomic relations for constructing the taxonomy. Experimental results show that our proposed method outperforms the state-of-the-art methods by increasing F-measure up to 7%–20%. Furthermore, the proposed method can incrementally update the taxonomy by adding fresh relations from new data and removing outdated relations using an information decay function. It thus avoids rebuilding the whole taxonomy from scratch for every update and keeps the taxonomy effectively up-to-date in order to track the latest information trends in the rapidly evolving domain. 551–564 diff --git a/data/xml/Q17.xml b/data/xml/Q17.xml index 6db3ae34f0..3417f659fd 100644 --- a/data/xml/Q17.xml +++ b/data/xml/Q17.xml @@ -32,10 +32,10 @@ Visually Grounded and Textual Semantic Models Differentially Decode Brain Activity Associated with Concrete and Abstract Nouns - Andrew J.Anderson + Andrew J.Anderson DouweKiela StephenClark - MassimoPoesio + MassimoPoesio 10.1162/tacl_a_00043 Important advances have recently been made using computational semantic models to decode brain activity patterns associated with concepts; however, this work has almost exclusively focused on concrete nouns. How well these models extend to decoding abstract nouns is largely unknown. We address this question by applying state-of-the-art computational models to decode functional Magnetic Resonance Imaging (fMRI) activity patterns, elicited by participants reading and imagining a diverse set of both concrete and abstract nouns. One of the models we use is linguistic, exploiting the recent word2vec skipgram approach trained on Wikipedia. The second is visually grounded, using deep convolutional neural networks trained on Google Images. Dual coding theory considers concrete concepts to be encoded in the brain both linguistically and visually, and abstract concepts only linguistically. Splitting the fMRI data according to human concreteness ratings, we indeed observe that both models significantly decode the most concrete nouns; however, accuracy is significantly greater using the text-based models for the most abstract nouns. More generally this confirms that current computational models are sufficiently advanced to assist in investigating the representational structure of abstract concepts in the brain. 17–30 @@ -48,7 +48,7 @@ AshutoshModi IvanTitov VeraDemberg - AsadSayeed + AsadSayeed ManfredPinkal 10.1162/tacl_a_00044 Recent research in psycholinguistics has provided increasing evidence that humans predict upcoming content. Prediction also affects perception and might be a key to robustness in human language processing. In this paper, we investigate the factors that affect human prediction by building a computational model that can predict upcoming discourse referents based on linguistic knowledge alone vs. linguistic knowledge jointly with common-sense knowledge in the form of scripts. We find that script knowledge significantly improves model estimates of human predictions. In a second study, we test the highly controversial hypothesis that predictability influences referring expression type but do not find evidence for such an effect. @@ -70,7 +70,7 @@ A Polynomial-Time Dynamic Programming Algorithm for Phrase-Based Decoding with a Fixed Distortion Limit Yin-WenChang - MichaelCollins + MichaelCollins 10.1162/tacl_a_00046 Decoding of phrase-based translation models in the general case is known to be NP-complete, by a reduction from the traveling salesman problem (Knight, 1999). In practice, phrase-based systems often impose a hard distortion limit that limits the movement of phrases during translation. However, the impact on complexity after imposing such a constraint is not well studied. In this paper, we describe a dynamic programming algorithm for phrase-based decoding with a fixed distortion limit. The runtime of the algorithm is O(nd!lhd+1) where n is the sentence length, d is the distortion limit, l is a bound on the number of phrases starting at any position in the sentence, and h is related to the maximum number of target language translations for any source word. The algorithm makes use of a novel representation that gives a new perspective on decoding of phrase-based models. 59–71 @@ -84,7 +84,7 @@ RichardFutrell AdamAlbright PeterGraff - Timothy J.O’Donnell + Timothy J.O’Donnell 10.1162/tacl_a_00047 We present a probabilistic model of phonotactics, the set of well-formed phoneme sequences in a language. Unlike most computational models of phonotactics (Hayes and Wilson, 2008; Goldsmith and Riggle, 2012), we take a fully generative approach, modeling a process where forms are built up out of subparts by phonologically-informed structure building operations. We learn an inventory of subparts by applying stochastic memoization (Johnson et al., 2007; Goodman et al., 2008) to a generative process for phonemes structured as an and-or graph, based on concepts of feature hierarchy from generative phonology (Clements, 1985; Dresher, 2009). Subparts are combined in a way that allows tier-based feature interactions. We evaluate our models’ ability to capture phonotactic distributions in the lexicons of 14 languages drawn from the WOLEX corpus (Graff, 2012). Our full model robustly assigns higher probabilities to held-out forms than a sophisticated N-gram model for all languages. We also present novel analyses that probe model behavior in more detail. 73–86 @@ -112,7 +112,7 @@ HoifungPoon ChrisQuirk KristinaToutanova - Wen-tauYih + Wen-tauYih 10.1162/tacl_a_00049 Past work in relation extraction has focused on binary relations in single sentences. Recent NLP inroads in high-value domains have sparked interest in the more general setting of extracting n-ary relations that span multiple sentences. In this paper, we explore a general relation extraction framework based on graph long short-term memory networks (graph LSTMs) that can be easily extended to cross-sentence n-ary relation extraction. The graph formulation provides a unified way of exploring different LSTM approaches and incorporating various intra-sentential and inter-sentential dependencies, such as sequential, syntactic, and discourse relations. A robust contextual representation is learned for the entities, which serves as input to the relation classifier. This simplifies handling of relations with arbitrary arity, and enables multi-task learning with related relations. We evaluate this framework in two important precision medicine settings, demonstrating its effectiveness with both conventional supervised learning and distant supervision. Cross-sentence extraction produced larger knowledge bases. and multi-task learning significantly improved extraction accuracy. A thorough analysis of various LSTM approaches yielded useful insight the impact of linguistic analysis on extraction accuracy. 101–115 @@ -123,8 +123,8 @@ Automatically Tagging Constructions of Causation and Their Slot-Fillers JesseDunietz - LoriLevin - JaimeCarbonell + LoriLevin + JaimeCarbonell 10.1162/tacl_a_00050 This paper explores extending shallow semantic parsing beyond lexical-unit triggers, using causal relations as a test case. Semantic parsing becomes difficult in the face of the wide variety of linguistic realizations that causation can take on. We therefore base our approach on the concept of constructions from the linguistic paradigm known as Construction Grammar (CxG). In CxG, a construction is a form/function pairing that can rely on arbitrary linguistic and semantic features. Rather than codifying all aspects of each construction’s form, as some attempts to employ CxG in NLP have done, we propose methods that offload that problem to machine learning. We describe two supervised approaches for tagging causal constructions and their arguments. Both approaches combine automatically induced pattern-matching rules with statistical classifiers that learn the subtler parameters of the constructions. Our results show that these approaches are promising: they significantly outperform naïve baselines for both construction recognition and cause and effect head matches. 117–133 @@ -134,9 +134,9 @@ Enriching Word Vectors with Subword Information PiotrBojanowski - EdouardGrave + EdouardGrave ArmandJoulin - TomasMikolov + TomasMikolov 10.1162/tacl_a_00051 Continuous word representations, trained on large unlabeled corpora are useful for many natural language processing tasks. Popular models that learn such representations ignore the morphology of words, by assigning a distinct vector to each word. This is a limitation, especially for languages with large vocabularies and many rare words. In this paper, we propose a new approach based on the skipgram model, where each word is represented as a bag of character n-grams. A vector representation is associated to each character n-gram; words being represented as the sum of these representations. Our method is fast, allowing to train models on large corpora quickly and allows us to compute word representations for words that did not appear in the training data. We evaluate our word representations on nine different languages, both on word similarity and analogy tasks. By comparing to recently proposed morphological word representations, we show that our vectors achieve state-of-the-art performance on these tasks. 135–146 @@ -147,7 +147,7 @@ Fine-Grained Prediction of Syntactic Typology: Discovering Latent Structure with Supervised Learning DingquanWang - JasonEisner + JasonEisner 10.1162/tacl_a_00052 We show how to predict the basic word-order facts of a novel language given only a corpus of part-of-speech (POS) sequences. We predict how often direct objects follow their verbs, how often adjectives follow their nouns, and in general the directionalities of all dependency relations. Such typological properties could be helpful in grammar induction. While such a problem is usually regarded as unsupervised learning, our innovation is to treat it as supervised learning, using a large collection of realistic synthetic languages as training data. The supervised learner must identify surface features of a language’s POS sequence (hand-engineered or neural features) that correlate with the language’s deeper structure (latent trees). In the experiment, we show: 1) Given a small set of real languages, it helps to add many synthetic languages to the training data. 2) Our system is robust even when the POS sequences include noise. 3) Our system on this task outperforms a grammar induction baseline by a large margin. 147–161 @@ -182,7 +182,7 @@ Joint Modeling of Topics, Citations, and Topical Authority in Academic Corpora JooyeonKim DongwooKim - AliceOh + AliceOh 10.1162/tacl_a_00055 Much of scientific progress stems from previously published findings, but searching through the vast sea of scientific publications is difficult. We often rely on metrics of scholarly authority to find the prominent authors but these authority indices do not differentiate authority based on research topics. We present Latent Topical-Authority Indexing (LTAI) for jointly modeling the topics, citations, and topical authority in a corpus of academic papers. Compared to previous models, LTAI differs in two main aspects. First, it explicitly models the generative process of the citations, rather than treating the citations as given. Second, it models each author’s influence on citations of a paper based on the topics of the cited papers, as well as the citing papers. We fit LTAI into four academic corpora: CORA, Arxiv Physics, PNAS, and Citeseer. We compare the performance of LTAI against various baselines, starting with the latent Dirichlet allocation, to the more advanced models including author-link topic model and dynamic author citation topic model. The results show that LTAI achieves improved accuracy over other similar models when predicting words, citations and authors of publications. 191–204 @@ -192,11 +192,11 @@ Pushing the Limits of Translation Quality Estimation - André F. T.Martins + André F. T.Martins MarcinJunczys-Dowmunt - Fabio N.Kepler - RamónAstudillo - ChrisHokamp + Fabio N.Kepler + RamónAstudillo + ChrisHokamp RomanGrundkiewicz 10.1162/tacl_a_00056 Translation quality estimation is a task of growing importance in NLP, due to its potential to reduce post-editing human effort in disruptive ways. However, this potential is currently limited by the relatively low accuracy of existing systems. In this paper, we achieve remarkable improvements by exploiting synergies between the related tasks of word-level quality estimation and automatic post-editing. First, we stack a new, carefully engineered, neural model into a rich feature-based word-level quality estimation system. Then, we use the output of an automatic post-editing system as an extra feature, obtaining striking results on WMT16: a word-level FMULT1 score of 57.47% (an absolute gain of +7.95% over the current state of the art), and a Pearson correlation score of 65.56% for sentence-level HTER prediction (an absolute gain of +13.36%). @@ -220,7 +220,7 @@ Domain-Targeted, High Precision Knowledge Extraction - BhavanaDalvi Mishra + BhavanaDalvi Mishra NiketTandon PeterClark 10.1162/tacl_a_00058 @@ -243,7 +243,7 @@ Learning to Prune: Exploring the Frontier of Fast and Accurate Parsing TimVieira - JasonEisner + JasonEisner 10.1162/tacl_a_00060 Pruning hypotheses during dynamic programming is commonly used to speed up inference in settings such as parsing. Unlike prior work, we train a pruning policy under an objective that measures end-to-end performance: we search for a fast and accurate policy. This poses a difficult machine learning problem, which we tackle with the lols algorithm. lols training must continually compute the effects of changing pruning decisions: we show how to make this efficient in the constituency parsing setting, via dynamic programming and change propagation algorithms. We find that optimizing end-to-end performance in this way leads to a better Pareto frontier—i.e., parsers which are more accurate for a given runtime. 263–278 @@ -254,7 +254,7 @@ Cross-Lingual Syntactic Transfer with Limited Resources Mohammad SadeghRasooli - MichaelCollins + MichaelCollins 10.1162/tacl_a_00061 We describe a simple but effective method for cross-lingual syntactic transfer of dependency parsers, in the scenario where a large amount of translation data is not available. This method makes use of three steps: 1) a method for deriving cross-lingual word clusters, which can then be used in a multilingual parser; 2) a method for transferring lexical information from a target language to source language treebanks; 3) a method for integrating these steps with the density-driven annotation projection method of Rasooli and Collins (2015). Experiments show improvements over the state-of-the-art in several languages used in previous work, in a setting where the only source of translation data is the Bible, a considerably smaller corpus than the Europarl corpus used in previous work. Results using the Europarl corpus as a source of translation data show additional improvements over the results of Rasooli and Collins (2015). We conclude with results on 38 datasets from the Universal Dependencies corpora. 279–293 @@ -280,9 +280,9 @@ DiarmuidÓ Séaghdha IraLeviant RoiReichart - MilicaGašić + MilicaGašić AnnaKorhonen - SteveYoung + SteveYoung 10.1162/tacl_a_00063 We present Attract-Repel, an algorithm for improving the semantic quality of word vectors by injecting constraints extracted from lexical resources. Attract-Repel facilitates the use of constraints from mono- and cross-lingual resources, yielding semantically specialized cross-lingual vector spaces. Our evaluation shows that the method can make use of existing cross-lingual lexicons to construct high-quality vector spaces for a plethora of different languages, facilitating semantic transfer from high- to lower-resource ones. The effectiveness of our approach is demonstrated with state-of-the-art results on semantic similarity datasets in six languages. We next show that Attract-Repel-specialized vectors boost performance in the downstream task of dialogue state tracking (DST) across multiple languages. Finally, we show that cross-lingual vector spaces produced by our algorithm facilitate the training of multilingual DST models, which brings further performance improvements. 309–324 @@ -293,7 +293,7 @@ Colors in Context: A Pragmatic Neural Model for Grounded Language Understanding WillMonroe Robert X.D.Hawkins - Noah D.Goodman + Noah D.Goodman ChristopherPotts 10.1162/tacl_a_00064 We present a model of pragmatic referring expression interpretation in a grounded communication task (identifying colors from descriptions) that draws upon predictions from two recurrent neural network classifiers, a speaker and a listener, unified by a recursive pragmatic reasoning framework. Experiments show that this combined pragmatic model interprets color descriptions more accurately than the classifiers from which it is built, and that much of this improvement results from combining the speaker and listener perspectives. We observe that pragmatic reasoning helps primarily in the hardest cases: when the model must distinguish very similar colors, or when few utterances adequately express the target color. Our findings make use of a newly-collected corpus of human utterances in color reference games, which exhibit a variety of pragmatic behaviors. We also show that the embedded speaker model reproduces many of these pragmatic behaviors. @@ -306,7 +306,7 @@ <fixed-case>G</fixed-case>oogle’s Multilingual Neural Machine Translation System: Enabling Zero-Shot Translation MelvinJohnson MikeSchuster - Quoc V.Le + Quoc V.Le MaximKrikun YonghuiWu ZhifengChen @@ -384,7 +384,7 @@ Evaluating Low-Level Speech Features Against Human Perceptual Data CaitlinRichter - Naomi H.Feldman + Naomi H.Feldman HariniSalgado ArenJansen 10.1162/tacl_a_00071 @@ -408,7 +408,7 @@ Unsupervised Acquisition of Comprehensive Multiword Lexicons using Competition in an n-gram Lattice JulianBrooke JanŠnajder - TimothyBaldwin + TimothyBaldwin 10.1162/tacl_a_00073 We present a new model for acquiring comprehensive multiword lexicons from large corpora based on competition among n-gram candidates. In contrast to the standard approach of simple ranking by association measure, in our model n-grams are arranged in a lattice structure based on subsumption and overlap relationships, with nodes inhibiting other nodes in their vicinity when they are selected as a lexical item. We show how the configuration of such a lattice can be optimized tractably, and demonstrate using annotations of sampled n-grams that our method consistently outperforms alternatives by at least 0.05 F-score across several corpora and languages. 455–470 diff --git a/data/xml/Q18.xml b/data/xml/Q18.xml index 2e363410f8..5056929662 100644 --- a/data/xml/Q18.xml +++ b/data/xml/Q18.xml @@ -19,7 +19,7 @@ Whodunnit? Crime Drama as a Case for Natural Language Understanding LeaFrermann - Shay B.Cohen + Shay B.Cohen MirellaLapata 10.1162/tacl_a_00001 In this paper we argue that crime drama exemplified in television programs such as CSI: Crime Scene Investigation is an ideal testbed for approximating real-world natural language understanding and the complex inferences associated with it. We propose to treat crime drama as a new inference task, capitalizing on the fact that each episode poses the same basic question (i.e., who committed the crime) and naturally provides the answer when the perpetrator is revealed. We develop a new dataset based on CSI episodes, formalize perpetrator identification as a sequence labeling problem, and develop an LSTM-based model which learns from multi-modal data. Experimental results show that an incremental inference strategy is key to making accurate guesses as well as learning from representations fusing textual, visual, and acoustic input. @@ -42,7 +42,7 @@ Joint Semantic Synthesis and Morphological Analysis of the Derived Word RyanCotterell - HinrichSchütze + HinrichSchütze 10.1162/tacl_a_00003 Much like sentences are composed of words, words themselves are composed of smaller units. For example, the English word questionably can be analyzed as question+able+ly. However, this structural decomposition of the word does not directly give us a semantic representation of the word’s meaning. Since morphology obeys the principle of compositionality, the semantics of the word can be systematically derived from the meaning of its parts. In this work, we propose a novel probabilistic model of word formation that captures both the analysis of a word w into its constituent segments and the synthesis of the meaning of w from the meanings of those segments. Our model jointly learns to segment words into morphemes and compose distributional semantic vectors of those morphemes. We experiment with the model on English CELEX data and German DErivBase (Zeller et al., 2013) data. We show that jointly modeling semantics increases both segmentation accuracy and morpheme F1 by between 3% and 5%. Additionally, we investigate different models of vector composition, showing that recurrent neural networks yield an improvement over simple additive models. Finally, we study the degree to which the representations correspond to a linguist’s notion of morphological productivity. 33–48 @@ -88,7 +88,7 @@ Towards Evaluating Narrative Quality In Student Writing SwapnaSomasundaran MichaelFlor - MartinChodorow + MartinChodorow HillaryMolloy BinodGyawali LauraMcCulla @@ -113,7 +113,7 @@ Conversation Modeling on <fixed-case>R</fixed-case>eddit Using a Graph-Structured <fixed-case>LSTM</fixed-case> VictoriaZayats - MariOstendorf + MariOstendorf 10.1162/tacl_a_00009 This paper presents a novel approach for modeling threaded discussions on social media using a graph-structured bidirectional LSTM (long-short term memory) which represents both hierarchical and temporal conversation structure. In experiments with a task of predicting popularity of comments in Reddit discussions, the proposed model outperforms a node-independent architecture for different sets of input features. Analyses show a benefit to the model over the full course of the discussion, improving detection in both early and late stages. Further, the use of language cues with the bidirectional tree state updates helps with identifying controversial comments. 121–132 @@ -124,7 +124,7 @@ Learning Representations Specialized in Spatial Knowledge: Leveraging Language and Vision GuillemCollell - Marie-FrancineMoens + Marie-FrancineMoens 10.1162/tacl_a_00010 Spatial understanding is crucial in many real-world problems, yet little progress has been made towards building representations that capture spatial knowledge. Here, we move one step forward in this direction and learn such representations by leveraging a task consisting in predicting continuous 2D spatial arrangements of objects given object-relationship-object instances (e.g., “cat under chair”) and a simple neural network model that learns the task from annotated images. We show that the model succeeds in this task and, furthermore, that it is capable of predicting correct spatial arrangements for unseen objects if either CNN features or word embeddings of the objects are provided. The differences between visual and linguistic features are discussed. Next, to evaluate the spatial representations learned in the previous task, we introduce a task and a dataset consisting in a set of crowdsourced human ratings of spatial similarity for object pairs. We find that both CNN (convolutional neural network) features and word embeddings predict human judgments of similarity well and that these vectors can be further specialized in spatial knowledge if we update them when training the model that predicts spatial arrangements of objects. Overall, this paper paves the way towards building distributed spatial representations, contributing to the understanding of spatial expressions in language. 133–144 @@ -137,8 +137,8 @@ HaoZhou ShujianHuang LiliMou - XinyuDai - JiajunChen + XinyuDai + JiajunChen ZhaopengTu 10.1162/tacl_a_00011 Existing neural machine translation systems do not explicitly model what has been translated and what has not during the decoding phase. To address this problem, we propose a novel mechanism that separates the source information into two parts: translated Past contents and untranslated Future contents, which are modeled by two additional recurrent layers. The Past and Future contents are fed to both the attention model and the decoder states, which provides Neural Machine Translation (NMT) systems with the knowledge of translated and untranslated contents. Experimental results show that the proposed approach significantly improves the performance in Chinese-English, German-English, and English-German translation tasks. Specifically, the proposed model outperforms the conventional coverage model in terms of both the translation quality and the alignment error rate. @@ -172,7 +172,7 @@ Unsupervised Word Mapping Using Structural Similarities in Monolingual Embeddings HananAldarmaki MaheshMohan - MonaDiab + MonaDiab 10.1162/tacl_a_00014 Most existing methods for automatic bilingual dictionary induction rely on prior alignments between the source and target languages, such as parallel corpora or seed dictionaries. For many language pairs, such supervised alignments are not readily available. We propose an unsupervised approach for learning a bilingual dictionary for a pair of languages given their independently-learned monolingual word embeddings. The proposed method exploits local and global structures in monolingual vector spaces to align them such that similar words are mapped to each other. We show empirically that the performance of bilingual correspondents that are learned using our proposed unsupervised method is comparable to that of using supervised bilingual correspondents from a seed dictionary. 185–196 @@ -194,7 +194,7 @@ Unsupervised Grammar Induction with Depth-bounded <fixed-case>PCFG</fixed-case> LifengJin FinaleDoshi-Velez - TimothyMiller + TimothyMiller WilliamSchuler LaneSchwartz 10.1162/tacl_a_00016 @@ -228,7 +228,7 @@ Do latent tree learning models identify meaningful structure in sentences? AdinaWilliams AndrewDrozdov - Samuel R.Bowman + Samuel R.Bowman 10.1162/tacl_a_00019 Recent work on the problem of latent tree learning has made it possible to train neural networks that learn to both parse a sentence and use the resulting parse to interpret the sentence, all without exposure to ground-truth parse trees at training time. Surprisingly, these models often perform better at sentence understanding tasks than models that use parse trees from conventional parsers. This paper aims to investigate what these latent tree learning models learn. We replicate two such models in a shared codebase and find that (i) only one of these models outperforms conventional tree-structured models on sentence classification, (ii) its parsing strategies are not especially consistent across random restarts, (iii) the parses it produces tend to be shallower than standard Penn Treebank (PTB) parses, and (iv) they do not resemble those of PTB or any other semantic or syntactic formalism that the authors are aware of. 253–267 @@ -261,9 +261,9 @@ Leveraging Orthographic Similarity for Multilingual Neural Transliteration AnoopKunchukuttan - MiteshKhapra + MiteshKhapra GurneetSingh - PushpakBhattacharyya + PushpakBhattacharyya 10.1162/tacl_a_00022 We address the task of joint training of transliteration models for multiple language pairs (multilingual transliteration). This is an instance of multitask learning, where individual tasks (language pairs) benefit from sharing knowledge with related tasks. We focus on transliteration involving related tasks i.e., languages sharing writing systems and phonetic properties (orthographically similar languages). We propose a modified neural encoder-decoder model that maximizes parameter sharing across language pairs in order to effectively leverage orthographic similarity. We show that multilingual transliteration significantly outperforms bilingual transliteration in different scenarios (average increase of 58% across a variety of languages we experimented with). We also show that multilingual transliteration models can generalize well to languages/language pairs not encountered during training and hence perform well on the zeroshot transliteration task. We show that further improvements can be achieved by using phonetic feature input. 303–316 @@ -274,11 +274,11 @@ The <fixed-case>N</fixed-case>arrative<fixed-case>QA</fixed-case> Reading Comprehension Challenge TomášKočiský JonathanSchwarz - PhilBlunsom - ChrisDyer + PhilBlunsom + ChrisDyer Karl MoritzHermann GáborMelis - EdwardGrefenstette + EdwardGrefenstette 10.1162/tacl_a_00023 Reading comprehension (RC)—in contrast to information retrieval—requires integrating information and reasoning about events, entities, and their relations across a full document. Question answering is conventionally used to assess RC ability, in both artificial agents and children learning to read. However, existing RC datasets and tasks are dominated by questions that can be solved by selecting answers using superficial information (e.g., local context similarity or global term frequency); they thus fail to test for the essential integrative aspect of RC. To encourage progress on deeper comprehension of language, we present a new dataset and set of tasks in which the reader must answer questions about stories by reading entire books or movie scripts. These tasks are designed so that successfully answering their questions requires understanding the underlying narrative rather than relying on shallow pattern matching or salience. We show that although humans solve the tasks easily, standard RC models struggle on the tasks presented here. We provide an analysis of the dataset and the challenges it presents. 317–328 @@ -335,8 +335,8 @@ DavidJurgens SrijanKumar RaineHoover - DanMcFarland - DanJurafsky + DanMcFarland + DanJurafsky 10.1162/tacl_a_00028 Citations have long been used to characterize the state of a scientific field and to identify influential works. However, writers use citations for different purposes, and this varied purpose influences uptake by future scholars. Unfortunately, our understanding of how scholars use and frame citations has been limited to small-scale manual citation analysis of individual papers. We perform the largest behavioral study of citations to date, analyzing how scientific works frame their contributions through different types of citations and how this framing affects the field as a whole. We introduce a new dataset of nearly 2,000 citations annotated for their function, and use it to develop a state-of-the-art classifier and label the papers of an entire field: Natural Language Processing. We then show how differences in framing affect scientific uptake and reveal the evolution of the publication venues and the field as a whole. We demonstrate that authors are sensitive to discourse structure and publication venue when citing, and that how a paper frames its work through citations is predictive of the citation count it will receive. Finally, we use changes in citation framing to show that the field of NLP is undergoing a significant increase in consensus. 391–406 @@ -372,7 +372,7 @@ Generating Sentences by Editing Prototypes KelvinGuu - Tatsunori B.Hashimoto + Tatsunori B.Hashimoto YonatanOren PercyLiang 10.1162/tacl_a_00030 @@ -404,7 +404,7 @@ PrateekVerma NelsonMorgan Jennifer L.Eberhardt - DanJurafsky + DanJurafsky 10.1162/tacl_a_00031 We apply computational dialog methods to police body-worn camera footage to model conversations between police officers and community members in traffic stops. Relying on the theory of institutional talk, we develop a labeling scheme for police speech during traffic stops, and a tagger to detect institutional dialog acts (Reasons, Searches, Offering Help) from transcribed text at the turn (78% F-score) and stop (89% F-score) level. We then develop speech recognition and segmentation algorithms to detect these acts at the stop level from raw camera audio (81% F-score, with even higher accuracy for crucial acts like conveying the reason for the stop). We demonstrate that the dialog structures produced by our tagger could reveal whether officers follow law enforcement norms like introducing themselves, explaining the reason for the stop, and asking permission for searches. This work may therefore inform and aid efforts to ensure the procedural justice of police-community interactions. 467–481 @@ -428,7 +428,7 @@ Low-Rank <fixed-case>RNN</fixed-case> Adaptation for Context-Aware Language Modeling AaronJaech - MariOstendorf + MariOstendorf 10.1162/tacl_a_00035 A context-aware language model uses location, user and/or domain metadata (context) to adapt its predictions. In neural language models, context information is typically represented as an embedding and it is given to the RNN as an additional input, which has been shown to be useful in many applications. We introduce a more powerful mechanism for using context to adapt an RNN by letting the context vector control a low-rank transformation of the recurrent layer weight matrix. Experiments show that allowing a greater fraction of the model parameters to be adjusted has benefits in terms of perplexity and classification for several different types of context. 497–510 @@ -448,7 +448,7 @@ Planning, Inference and Pragmatics in Sequential Language Games FereshteKhani - Noah D.Goodman + Noah D.Goodman PercyLiang 10.1162/tacl_a_00037 We study sequential language games in which two players, each with private information, communicate to achieve a common goal. In such games, a successful player must (i) infer the partner’s private information from the partner’s messages, (ii) generate messages that are most likely to help with the goal, and (iii) reason pragmatically about the partner’s strategy. We propose a model that captures all three characteristics and demonstrate their importance in capturing human behavior on a new goal-oriented dataset we collected using crowdsourcing. @@ -459,7 +459,7 @@ Probabilistic Verb Selection for Data-to-Text Generation DellZhang - JiahaoYuan + JiahaoYuan XiaolingWang AdamFoster 10.1162/tacl_a_00038 @@ -474,7 +474,7 @@ XilunChen YuSun BenAthiwaratkun - ClaireCardie + ClaireCardie KilianWeinberger 10.1162/tacl_a_00039 In recent years great success has been achieved in sentiment classification for English, thanks in part to the availability of copious annotated resources. Unfortunately, most languages do not enjoy such an abundance of labeled data. To tackle the sentiment classification problem in low-resource languages without adequate annotated data, we propose an Adversarial Deep Averaging Network (ADAN1) to transfer the knowledge learned from labeled data on a resource-rich source language to low-resource languages where only unlabeled data exist. ADAN has two discriminative branches: a sentiment classifier and an adversarial language discriminator. Both branches take input from a shared feature extractor to learn hidden representations that are simultaneously indicative for the classification task and invariant across languages. Experiments on Chinese and Arabic sentiment classification demonstrate that ADAN significantly outperforms state-of-the-art systems. @@ -489,8 +489,8 @@ BobCarpenter JonChamberlain DirkHovy - UdoKruschwitz - MassimoPoesio + UdoKruschwitz + MassimoPoesio 10.1162/tacl_a_00040 The analysis of crowdsourced annotations in natural language processing is concerned with identifying (1) gold standard labels, (2) annotator accuracies and biases, and (3) item difficulties and error patterns. Traditionally, majority voting was used for 1, and coefficients of agreement for 2 and 3. Lately, model-based analysis of corpus annotations have proven better at all three tasks. But there has been relatively little work comparing them on the same datasets. This paper aims to fill this gap by analyzing six models of annotation, covering different approaches to annotator ability, item difficulty, and parameter pooling (tying) across annotators and items. We evaluate these models along four aspects: comparison to gold labels, predictive accuracy for new annotations, annotator characterization, and item difficulty, using four datasets with varying degrees of noise in the form of random (spammy) annotators. We conclude with guidelines for model selection, application, and implementation. 571–585 @@ -500,7 +500,7 @@ Data Statements for Natural Language Processing: Toward Mitigating System Bias and Enabling Better Science - Emily M.Bender + Emily M.Bender BatyaFriedman 10.1162/tacl_a_00041 In this paper, we propose data statements as a design solution and professional practice for natural language processing technologists, in both research and development. Through the adoption and widespread use of data statements, the field can begin to address critical scientific and ethical issues that result from the use of data from certain populations in the development of technology for other populations. We present a form that data statements can take and explore the implications of adopting them as part of regular practice. We argue that data statements will help alleviate issues related to exclusion and bias in language technology, lead to better precision in claims about how natural language processing research can generalize and thus better engineering results, protect companies from public embarrassment, and ultimately lead to language technology that meets its users in their own preferred linguistic style and furthermore does not misrepresent them to others. @@ -536,8 +536,8 @@ Integrating Weakly Supervised Word Sense Disambiguation into Neural Machine Translation XiaoPu NikolaosPappas - JamesHenderson - AndreiPopescu-Belis + JamesHenderson + AndreiPopescu-Belis 10.1162/tacl_a_00242 This paper demonstrates that word sense disambiguation (WSD) can improve neural machine translation (NMT) by widening the source context considered when modeling the senses of potentially ambiguous words. We first introduce three adaptive clustering algorithms for WSD, based on k-means, Chinese restaurant processes, and random walks, which are then applied to large word contexts represented in a low-rank space and evaluated on SemEval shared-task data. We then learn word vectors jointly with sense vectors defined by our best WSD method, within a state-of-the-art NMT system. We show that the concatenation of these vectors, and the use of a sense selection mechanism based on the weighted average of sense vectors, outperforms several baselines including sense-aware ones. This is demonstrated by translation on five language pairs. The improvements are more than 1 BLEU point over strong NMT baselines, +4% accuracy over all ambiguous nouns and verbs, or +20% when scored manually over several challenging words. 635–649 @@ -559,7 +559,7 @@ Surface Statistics of an Unknown Language Indicate How to Parse It DingquanWang - JasonEisner + JasonEisner 10.1162/tacl_a_00248 We introduce a novel framework for delexicalized dependency parsing in a new language. We show that useful features of the target language can be extracted automatically from an unparsed corpus, which consists only of gold part-of-speech (POS) sequences. Providing these features to our neural parser enables it to parse sequences like those in the corpus. Strikingly, our system has no supervision in the target language. Rather, it is a multilingual system that is trained end-to-end on a variety of other languages, so it learns a feature extractor that works well. We show experimentally across multiple languages: (1) Features computed from the unparsed corpus improve parsing accuracy. (2) Including thousands of synthetic languages in the training yields further improvement. (3) Despite being computed from unparsed corpora, our learned task-specific features beat previous work’s interpretable typological features that require parsed corpora or expert categorization of the language. Our best method improved attachment scores on held-out test languages by an average of 5.6 percentage points over past work that does not inspect the unparsed data (McDonald et al., 2011), and by 20.7 points over past “grammar induction” work that does not use training languages (Naseem et al., 2010). 667–685 @@ -569,7 +569,7 @@ Attentive Convolution: Equipping <fixed-case>CNN</fixed-case>s with <fixed-case>RNN</fixed-case>-style Attention Mechanisms WenpengYin - HinrichSchütze + HinrichSchütze 10.1162/tacl_a_00249 In NLP, convolutional neural networks (CNNs) have benefited less than recurrent neural networks (RNNs) from attention mechanisms. We hypothesize that this is because the attention in CNNs has been mainly implemented as attentive pooling (i.e., it is applied to pooling) rather than as attentive convolution (i.e., it is integrated into convolution). Convolution is the differentiator of CNNs in that it can powerfully model the higher-level representation of a word by taking into account its local fixed-size context in the input text tx. In this work, we propose an attentive convolution network, ATTCONV. It extends the context scope of the convolution operation, deriving higher-level features for a word not only from local context, but also from information extracted from nonlocal context by the attention mechanism commonly used in RNNs. This nonlocal context can come (i) from parts of the input text tx that are distant or (ii) from extra (i.e., external) contexts ty. Experiments on sentence modeling with zero-context (sentiment analysis), single-context (textual entailment) and multiple-context (claim verification) demonstrate the effectiveness of ATTCONV in sentence representation learning with the incorporation of context. In particular, attentive convolution outperforms attentive pooling and is a strong competitor to popular attentive RNNs.1 687–702 @@ -579,12 +579,12 @@ Learning Typed Entailment Graphs with Global Soft Constraints Mohammad JavadHosseini - NathanaelChambers + NathanaelChambers SivaReddy Xavier R.Holt - Shay B.Cohen + Shay B.Cohen MarkJohnson - MarkSteedman + MarkSteedman 10.1162/tacl_a_00250 This paper presents a new method for learning typed entailment graphs from text. We extract predicate-argument structures from multiple-source news corpora, and compute local distributional similarity scores to learn entailments between predicates with typed arguments (e.g., person contracted disease). Previous work has used transitivity constraints to improve local decisions, but these constraints are intractable on large graphs. We instead propose a scalable method that learns globally consistent similarity scores based on new soft constraints that consider both the structures across typed entailment graphs and inside each graph. Learning takes only a few hours to run over 100K predicates and our results show large improvements over local similarity scores on two entailment data sets. We further show improvements over paraphrases and entailments from the Paraphrase Database, and prior state-of-the-art entailment graphs. We show that the entailment graphs improve performance in a downstream task. 703–717 diff --git a/data/xml/Q19.xml b/data/xml/Q19.xml index 9beb2729d4..d2b703a620 100644 --- a/data/xml/Q19.xml +++ b/data/xml/Q19.xml @@ -55,7 +55,7 @@ Analysis Methods in Neural Language Processing: A Survey YonatanBelinkov - JamesGlass + JamesGlass 10.1162/tacl_a_00254 The field of natural language processing has seen impressive progress in recent years, with neural network models replacing many of the traditional systems. A plethora of new models have been proposed, many of which are thought to be opaque compared to their feature-rich counterparts. This has led researchers to analyze, interpret, and evaluate neural networks in novel and more fine-grained ways. In this survey paper, we review analysis methods in neural language processing, categorize them according to prominent research trends, highlight existing limitations, and point to potential directions for future work. 49–72 @@ -65,8 +65,8 @@ Unlexicalized Transition-based Discontinuous Constituency Parsing MaximinCoavoux - BenoîtCrabbé - Shay B.Cohen + BenoîtCrabbé + Shay B.Cohen 10.1162/tacl_a_00255 Lexicalized parsing models are based on the assumptions that (i) constituents are organized around a lexical head and (ii) bilexical statistics are crucial to solve ambiguities. In this paper, we introduce an unlexicalized transition-based parser for discontinuous constituency structures, based on a structure-label transition system and a bi-LSTM scoring system. We compare it with lexicalized parsing models in order to address the question of lexicalization in the context of discontinuous constituency parsing. Our experiments show that unlexicalized models systematically achieve higher results than lexicalized models, and provide additional empirical evidence that lexicalization is not necessary to achieve strong parsing results. Our best unlexicalized model sets a new state of the art on English and German discontinuous constituency treebanks. We further provide a per-phenomenon analysis of its errors on discontinuous constituents. 73–89 @@ -77,7 +77,7 @@ Synchronous Bidirectional Neural Machine Translation LongZhou JiajunZhang - ChengqingZong + ChengqingZong 10.1162/tacl_a_00256 Existing approaches to neural machine translation (NMT) generate the target language sequence token-by-token from left to right. However, this kind of unidirectional decoding framework cannot make full use of the target-side future contexts which can be produced in a right-to-left decoding direction, and thus suffers from the issue of unbalanced outputs. In this paper, we introduce a synchronous bidirectional–neural machine translation (SB-NMT) that predicts its outputs using left-to-right and right-to-left decoding simultaneously and interactively, in order to leverage both of the history and future information at the same time. Specifically, we first propose a new algorithm that enables synchronous bidirectional decoding in a single model. Then, we present an interactive decoding model in which left-to-right (right-to-left) generation does not only depend on its previously generated outputs, but also relies on future contexts predicted by right-to-left (left-to-right) decoding. We extensively evaluate the proposed SB-NMT model on large-scale NIST Chinese–English, WMT14 English–German, and WMT18 Russian–English translation tasks. Experimental results demonstrate that our model achieves significant improvements over the strong Transformer model by 3.92, 1.49, and 1.04 BLEU points, respectively, and obtains the state-of-the-art performance on Chinese–English and English–German translation tasks. 91–105 @@ -102,7 +102,7 @@ Rotational Unit of Memory: A Novel Representation Unit for <fixed-case>RNN</fixed-case>s with Scalable Applications RumenDangovski LiJing - PreslavNakov + PreslavNakov MićoTatalović MarinSoljačić 10.1162/tacl_a_00258 @@ -114,7 +114,7 @@ <fixed-case>GILE</fixed-case>: A Generalized Input-Label Embedding for Text Classification NikolaosPappas - JamesHenderson + JamesHenderson 10.1162/tacl_a_00259 Neural text classification models typically treat output labels as categorical variables that lack description and semantics. This forces their parametrization to be dependent on the label set size, and, hence, they are unable to scale to large label sets and generalize to unseen ones. Existing joint input-label text models overcome these issues by exploiting label descriptions, but they are unable to capture complex label relationships, have rigid parametrization, and their gains on unseen labels happen often at the expense of weak performance on the labels seen during training. In this paper, we propose a new input-label model that generalizes over previous such models, addresses their limitations, and does not compromise performance on seen labels. The model consists of a joint nonlinear input-label embedding with controllable capacity and a joint-space-dependent classification unit that is trained with cross-entropy loss to optimize classification performance. We evaluate models on full-resource and low- or zero-resource text classification of multilingual news and biomedical text with a large label set. Our model outperforms monolingual and multilingual models that do not leverage label semantics and previous joint input-label space models in both scenarios. 139–155 @@ -162,7 +162,7 @@ Categorical Metadata Representation for Customized Text Classification JihyeokKim - Reinald KimAmplayo + Reinald KimAmplayo KyungjaeLee SuaSung MinjiSeo @@ -181,7 +181,7 @@ JianshuChen DongYu YejinChoi - ClaireCardie + ClaireCardie 10.1162/tacl_a_00264 We present DREAM, the first dialogue-based multiple-choice reading comprehension data set. Collected from English as a Foreign Language examinations designed by human experts to evaluate the comprehension level of Chinese learners of English, our data set contains 10,197 multiple-choice questions for 6,444 dialogues. In contrast to existing reading comprehension data sets, DREAM is the first to focus on in-depth multi-turn multi-party dialogue understanding. DREAM is likely to present significant challenges for existing reading comprehension systems: 84% of answers are non-extractive, 85% of questions require reasoning beyond a single sentence, and 34% of questions also involve commonsense knowledge. We apply several popular neural reading comprehension models that primarily exploit surface information within the text and find them to, at best, just barely outperform a rule-based approach. We next investigate the effects of incorporating dialogue structure and different kinds of general world knowledge into both rule-based and (neural and non-neural) machine learning-based reading comprehension models. Experimental results on the DREAM data set show the effectiveness of dialogue structure and general world knowledge. DREAM is available at https://dataset.org/dream/. 217–231 @@ -204,7 +204,7 @@ <fixed-case>C</fixed-case>o<fixed-case>QA</fixed-case>: A Conversational Question Answering Challenge SivaReddy DanqiChen - Christopher D.Manning + Christopher D.Manning 10.1162/tacl_a_00266 Humans gather information through conversations involving a series of interconnected questions and answers. For machines to assist in information gathering, it is therefore essential to enable them to answer conversational questions. We introduce CoQA, a novel dataset for building Conversational Question Answering systems. Our dataset contains 127k questions with answers, obtained from 8k conversations about text passages from seven diverse domains. The questions are conversational, and the answers are free-form text with their corresponding evidence highlighted in the passage. We analyze CoQA in depth and show that conversational questions have challenging phenomena not present in existing reading comprehension datasets (e.g., coreference and pragmatic reasoning). We evaluate strong dialogue and reading comprehension models on CoQA. The best system obtains an F1 score of 65.4%, which is 23.4 points behind human performance (88.8%), indicating that there is ample room for improvement. We present CoQA as a challenge to the community at https://stanfordnlp.github.io/coqa. 249–266 @@ -253,7 +253,7 @@ MatthiasSperber GrahamNeubig JanNiehues - AlexWaibel + AlexWaibel 10.1162/tacl_a_00270 Speech translation has traditionally been approached through cascaded models consisting of a speech recognizer trained on a corpus of transcribed speech, and a machine translation system trained on parallel texts. Several recent works have shown the feasibility of collapsing the cascade into a single, direct model that can be trained in an end-to-end fashion on a corpus of translated speech. However, experiments are inconclusive on whether the cascade or the direct model is stronger, and have only been conducted under the unrealistic assumption that both are trained on equal amounts of data, ignoring other available speech recognition and machine translation corpora. In this paper, we demonstrate that direct speech translation models require more data to perform well than cascaded models, and although they allow including auxiliary data through multi-task training, they are poor at exploiting such data, putting them at a severe disadvantage. As a remedy, we propose the use of end- to-end trainable models with two attention mechanisms, the first establishing source speech to source text alignments, the second modeling source to target text alignment. We show that such models naturally decompose into multi-task–trainable recognition and translation tasks and propose an attention-passing technique that alleviates error propagation issues in a previous formulation of a model with two attention stages. Our proposed model outperforms all examined baselines and is able to exploit auxiliary training data much more effectively than direct attentional models. 313–325 @@ -266,7 +266,7 @@ RyanCotterell ChristoKirov MansHulden - JasonEisner + JasonEisner 10.1162/tacl_a_00271 We quantify the linguistic complexity of different languages’ morphological systems. We verify that there is a statistically significant empirical trade-off between paradigm size and irregularity: A language’s inflectional paradigms may be either large in size or highly irregular, but never both. We define a new measure of paradigm irregularity based on the conditional entropy of the surface realization of a paradigm— how hard it is to jointly predict all the word forms in a paradigm from the lemma. We estimate irregularity by training a predictive model. Our measurements are taken on large morphological paradigms from 36 typologically diverse languages. 327–342 @@ -288,7 +288,7 @@ A Generative Model for Punctuation in Dependency Trees Xiang LisaLi DingquanWang - JasonEisner + JasonEisner 10.1162/tacl_a_00273 Treebanks traditionally treat punctuation marks as ordinary words, but linguists have suggested that a tree’s “true” punctuation marks are not observed (Nunberg, 1990). These latent “underlying” marks serve to delimit or separate constituents in the syntax tree. When the tree’s yield is rendered as a written sentence, a string rewriting mechanism transduces the underlying marks into “surface” marks, which are part of the observed (surface) string but should not be regarded as part of the tree. We formalize this idea in a generative model of punctuation that admits efficient dynamic programming. We train it without observing the underlying marks, by locally maximizing the incomplete data likelihood (similarly to the EM algorithm). When we use the trained model to reconstruct the tree’s underlying punctuation, the results appear plausible across 5 languages, and in particular are consistent with Nunberg’s analysis of English. We show that our generative model can be used to beat baselines on punctuation restoration. Also, our reconstruction of a sentence’s underlying punctuation lets us appropriately render the surface punctuation (via our trained underlying-to-surface mechanism) when we syntactically transform the sentence. 357–373 @@ -345,7 +345,7 @@ CorinaDima Daniëlde Kok NeeleWitte - ErhardHinrichs + ErhardHinrichs 10.1162/tacl_a_00275 Composition models of distributional semantics are used to construct phrase representations from the representations of their words. Composition models are typically situated on two ends of a spectrum. They either have a small number of parameters but compose all phrases in the same way, or they perform word-specific compositions at the cost of a far larger number of parameters. In this paper we propose transformation weighting (TransWeight), a composition model that consistently outperforms existing models on nominal compounds, adjective-noun phrases, and adverb-adjective phrases in English, German, and Dutch. TransWeight drastically reduces the number of parameters needed compared with the best model in the literature by composing similar words in the same way. 437–451 @@ -358,8 +358,8 @@ TomKwiatkowski JennimariaPalomaki OliviaRedfield - MichaelCollins - AnkurParikh + MichaelCollins + AnkurParikh ChrisAlberti DanielleEpstein IlliaPolosukhin @@ -371,7 +371,7 @@ Ming-WeiChang Andrew M.Dai JakobUszkoreit - QuocLe + QuocLe SlavPetrov 10.1162/tacl_a_00276 We present the Natural Questions corpus, a question answering data set. Questions consist of real anonymized, aggregated queries issued to the Google search engine. An annotator is presented with a question along with a Wikipedia page from the top 5 search results, and annotates a long answer (typically a paragraph) and a short answer (one or more entities) if present on the page, or marks null if no long/short answer is present. The public release consists of 307,373 training examples with single annotations; 7,830 examples with 5-way annotations for development data; and a further 7,842 examples with 5-way annotated sequestered as test data. We present experiments validating quality of the data. We also describe analysis of 25-way annotations on 302 examples, giving insights into human variability on the annotation task. We introduce robust metrics for the purposes of evaluating question answering systems; demonstrate high human upper bounds on these metrics; and establish baseline results using competitive methods drawn from related literature. @@ -383,7 +383,7 @@ Tabula Nearly Rasa: Probing the Linguistic Knowledge of Character-level Neural Language Models Trained on Unsegmented Text MichaelHahn - MarcoBaroni + MarcoBaroni 10.1162/tacl_a_00283 Recurrent neural networks (RNNs) have reached striking performance in many natural language processing tasks. This has renewed interest in whether these generic sequence processing devices are inducing genuine linguistic knowledge. Nearly all current analytical studies, however, initialize the RNNs with a vocabulary of known words, and feed them tokenized input during training. We present a multi-lingual study of the linguistic knowledge encoded in RNNs trained as character-level language models, on input data with word boundaries removed. These networks face a tougher and more cognitively realistic task, having to discover any useful linguistic unit from scratch based on input statistics. The results show that our “near tabula rasa” RNNs are mostly able to solve morphological, syntactic and semantic tasks that intuitively presuppose word-level knowledge, and indeed they learned, to some extent, to track word boundaries. Our study opens the door to speculations about the necessity of an explicit, rigid word lexicon in language learning and usage. 467–484 @@ -393,7 +393,7 @@ Graph Convolutional Network with Sequential Attention for Goal-Oriented Dialogue Systems SumanBanerjee - Mitesh M.Khapra + Mitesh M.Khapra 10.1162/tacl_a_00284 Domain-specific goal-oriented dialogue systems typically require modeling three types of inputs, namely, (i) the knowledge-base associated with the domain, (ii) the history of the conversation, which is a sequence of utterances, and (iii) the current utterance for which the response needs to be generated. While modeling these inputs, current state-of-the-art models such as Mem2Seq typically ignore the rich structure inherent in the knowledge graph and the sentences in the conversation context. Inspired by the recent success of structure-aware Graph Convolutional Networks (GCNs) for various NLP tasks such as machine translation, semantic role labeling, and document dating, we propose a memory-augmented GCN for goal-oriented dialogues. Our model exploits (i) the entity relation graph in a knowledge-base and (ii) the dependency graph associated with an utterance to compute richer representations for words and entities. Further, we take cognizance of the fact that in certain situations, such as when the conversation is in a code-mixed language, dependency parsers may not be available. We show that in such situations we could use the global word co-occurrence graph to enrich the representations of utterances. We experiment with four datasets: (i) the modified DSTC2 dataset, (ii) recently released code-mixed versions of DSTC2 dataset in four languages, (iii) Wizard-of-Oz style CAM676 dataset, and (iv) Wizard-of-Oz style MultiWOZ dataset. On all four datasets our method outperforms existing methods, on a wide range of evaluation metrics. 485–500 @@ -425,7 +425,7 @@ Measuring Online Debaters’ Persuasive Skill from Text over Time KelvinLuu ChenhaoTan - Noah A.Smith + Noah A.Smith 10.1162/tacl_a_00281 Online debates allow people to express their persuasive abilities and provide exciting opportunities for understanding persuasion. Prior studies have focused on studying persuasion in debate content, but without accounting for each debater’s history or exploring the progression of a debater’s persuasive ability. We study debater skill by modeling how participants progress over time in a collection of debates from Debate.org. We build on a widely used model of skill in two-player games and augment it with linguistic features of a debater’s content. We show that online debaters’ skill levels do tend to improve over time. Incorporating linguistic profiles leads to more robust skill estimation than winning records alone. Notably, we find that an interaction feature combining uncertainty cues (hedging) with terms strongly associated with either side of a particular debate (fightin’ words) is more predictive than either feature on its own, indicating the importance of fine- grained linguistic features. 537–550 @@ -435,8 +435,8 @@ Enabling Robust Grammatical Error Correction in New Domains: Data Sets, Metrics, and Analyses CourtneyNapoles - MariaNădejde - JoelTetreault + MariaNădejde + JoelTetreault 10.1162/tacl_a_00282 Until now, grammatical error correction (GEC) has been primarily evaluated on text written by non-native English speakers, with a focus on student essays. This paper enables GEC development on text written by native speakers by providing a new data set and metric. We present a multiple-reference test corpus for GEC that includes 4,000 sentences in two new domains (formal and informal writing by native English speakers) and 2,000 sentences from a diverse set of non-native student writing. We also collect human judgments of several GEC systems on this new test set and perform a meta-evaluation, assessing how reliable automatic metrics are across these domains. We find that commonly used GEC metrics have inconsistent performance across domains, and therefore we propose a new ensemble metric that is robust on all three domains of text. 551–566 @@ -490,7 +490,7 @@ Neural Network Acceptability Judgments AlexWarstadt AmanpreetSingh - Samuel R.Bowman + Samuel R.Bowman 10.1162/tacl_a_00290 This paper investigates the ability of artificial neural networks to judge the grammatical acceptability of a sentence, with the goal of testing their linguistic competence. We introduce the Corpus of Linguistic Acceptability (CoLA), a set of 10,657 English sentences labeled as grammatical or ungrammatical from published linguistics literature. As baselines, we train several recurrent neural network models on acceptability classification, and find that our models outperform unsupervised models by Lau et al. (2016) on CoLA. Error-analysis on specific grammatical phenomena reveals that both Lau et al.’s models and ours learn systematic generalizations like subject-verb-object order. However, all models we test perform far below human level on a wide range of grammatical constructions. 625–641 diff --git a/data/xml/R09.xml b/data/xml/R09.xml index 64f20bf275..e894ca6fd4 100644 --- a/data/xml/R09.xml +++ b/data/xml/R09.xml @@ -5,7 +5,7 @@ Proceedings of the International Conference RANLP-2009 R09-1 GaliaAngelova - RuslanMitkov + RuslanMitkov Association for Computational Linguistics
Borovets, Bulgaria
September @@ -27,7 +27,7 @@ Summary Generation for Toponym-referenced Images using Object Type Language Models AhmetAker - RobertGaizauskas + RobertGaizauskas 6–11 R09-1002 aker-gaizauskas-2009-summary @@ -35,17 +35,17 @@ Prepositional Phrase Attachment in Shallow Parsing VincentVan Asch - WalterDaelemans + WalterDaelemans 12–17 R09-1003 van-asch-daelemans-2009-prepositional A Comparative Study of Open Domain and Opinion Question Answering Systems for Factual and Opinionated Queries - AlexandraBalahur + AlexandraBalahur EsterBoldrini - AndrésMontoyo - PatricioMartínez-Barco + AndrésMontoyo + PatricioMartínez-Barco 18–22 R09-1004 balahur-etal-2009-comparative @@ -60,7 +60,7 @@ Unsupervised Knowledge Extraction for Taxonomies of Concepts from <fixed-case>W</fixed-case>ikipedia EduardBarbu - MassimoPoesio + MassimoPoesio 28–32 R09-1006 barbu-poesio-2009-unsupervised @@ -68,7 +68,7 @@ Exploring Treebank Transformations in Dependency Parsing KepaBengoetxea - KoldoGojenola + KoldoGojenola 33–38 R09-1007 bengoetxea-gojenola-2009-exploring @@ -92,7 +92,7 @@ Cross-Linguistic Sentiment Analysis: From <fixed-case>E</fixed-case>nglish to <fixed-case>S</fixed-case>panish JulianBrooke MilanTofiloski - MaiteTaboada + MaiteTaboada 50–54 R09-1010 brooke-etal-2009-cross @@ -108,16 +108,16 @@ Combining Finite State and Corpus-based Techniques for Unknown Word Prediction KostadinCholakov - Gertjanvan Noord + Gertjanvan Noord 60–64 R09-1012 cholakov-van-noord-2009-combining Prototype-based Active Learning for Lemmatization - WalterDaelemans - Hendrik J.Groenewald - Gerhard B.van Huyssteen + WalterDaelemans + Hendrik J.Groenewald + Gerhard B.van Huyssteen 65–70 R09-1013 daelemans-etal-2009-prototype @@ -142,7 +142,7 @@ Singular Value Decomposition for Feature Selection in Taxonomy Learning FrancescaFallucchi - Fabio MassimoZanzotto + Fabio MassimoZanzotto 82–87 R09-1016 fallucchi-zanzotto-2009-singular @@ -164,7 +164,7 @@ Exploiting the Use of Prior Probabilities for Passage Retrieval in Question Answering - SuryaGanesh + SuryaGanesh VasudevaVarma 99–102 R09-1019 @@ -172,7 +172,7 @@ Exploiting Structure and Content of <fixed-case>W</fixed-case>ikipedia for Query Expansion in the Context - SuryaGanesh + SuryaGanesh VasudevaVarma 103–106 R09-1020 @@ -189,10 +189,10 @@ Feature-Rich Named Entity Recognition for <fixed-case>B</fixed-case>ulgarian Using Conditional Random Fields GeorgiGeorgiev - PreslavNakov + PreslavNakov KuzmanGanchev PetyaOsenova - KirilSimov + KirilSimov 113–117 R09-1022 georgiev-etal-2009-feature @@ -207,15 +207,15 @@ Learning to Identify Educational Materials SamerHassan - RadaMihalcea + RadaMihalcea 123–127 R09-1024 hassan-mihalcea-2009-learning Lexicalized Semi-incremental Dependency Parsing - HanyHassan - KhalilSima’an + HanyHassan + KhalilSima’an AndyWay 128–134 R09-1025 @@ -246,7 +246,7 @@ Detection of Opinions and Facts. A Cognitive Approach Yann VigileHoareau - AdilEl-Ghali + AdilEl-Ghali CharlesTijus 150–154 R09-1029 @@ -254,9 +254,9 @@ Evaluating the Impact of Morphosyntactic Ambiguity in Grammatical Error Detection - ArantzaDíaz de Ilarraza - KoldoGojenola - MaiteOronoz + ArantzaDíaz de Ilarraza + KoldoGojenola + MaiteOronoz 155–160 R09-1030 diaz-de-ilarraza-etal-2009-evaluating @@ -271,7 +271,7 @@ Cross-document Event Extraction and Tracking: Task, Evaluation, Techniques and Challenges HengJi - RalphGrishman + RalphGrishman ZhengChen PrashantGupta 166–172 @@ -281,7 +281,7 @@ Co-Parsing with Competitive Models LidiaKhmylko - Kilian A.Foth + Kilian A.Foth WolfgangMenzel 173–179 R09-1033 @@ -314,7 +314,7 @@ Semi-Supervised Learning for Word Sense Disambiguation: Quality vs. Quantity - SandraKübler + SandraKübler DesislavaZhekova 197–202 R09-1037 @@ -322,7 +322,7 @@ Treelex Meets Adjectival Tables - AnnaKupść + AnnaKupść 203–207 R09-1038 kupsc-2009-treelex @@ -330,7 +330,7 @@ Integrating <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et and <fixed-case>F</fixed-case>rame<fixed-case>N</fixed-case>et using a Knowledge-based Word Sense Disambiguation Algorithm EgoitzLaparra - GermanRigau + GermanRigau 208–213 R09-1039 laparra-rigau-2009-integrating @@ -345,9 +345,9 @@ Using Semantic Networks to Identify Temporal Expressions from Semantic Roles - HectorLlorens - BorjaNavarro - EstelaSaquete + HectorLlorens + BorjaNavarro + EstelaSaquete 219–224 R09-1041 llorens-etal-2009-using @@ -355,7 +355,7 @@ The Design of an Experiment in Anaphora Resolution for Referring Expressions Generation Diego Jesusde Lucena - IvandréParaboni + IvandréParaboni 225–229 R09-1042 de-lucena-paraboni-2009-design @@ -396,7 +396,7 @@ Diacritization for Real-World <fixed-case>A</fixed-case>rabic Texts EmadMohamed - SandraKübler + SandraKübler 251–257 R09-1047 mohamed-kubler-2009-diacritization @@ -404,7 +404,7 @@ Multi-entity Sentiment Scoring KaroMoilanen - StephenPulman + StephenPulman 258–263 R09-1048 moilanen-pulman-2009-multi @@ -412,7 +412,7 @@ A Morphological and Syntactic Wide-coverage Lexicon for <fixed-case>S</fixed-case>panish: The Leffe Miguel A.Molinero - BenoîtSagot + BenoîtSagot LionelNicolas 264–269 R09-1049 @@ -429,7 +429,7 @@ Dependency Parsing and Semantic Role Labeling as a Single Task RoserMorante VincentVan Asch - Antalvan den Bosch + Antalvan den Bosch 275–280 R09-1051 morante-etal-2009-dependency @@ -438,7 +438,7 @@ Structured Output Learning with Polynomial Kernel HajimeMorita HiroyaTakamura - ManabuOkumura + ManabuOkumura 281–286 R09-1052 morita-etal-2009-structured @@ -456,15 +456,15 @@ Unsupervised Extraction of False <fixed-case>F</fixed-case>riends from Parallel Bi-Texts Using the Web as a Corpus SvetlinNakov - PreslavNakov - ElenaPaskaleva + PreslavNakov + ElenaPaskaleva 292–298 R09-1054 nakov-etal-2009-unsupervised Evaluating Term Extraction - AdelineNazarenko + AdelineNazarenko HaïfaZargayouna 299–304 R09-1055 @@ -472,7 +472,7 @@ Question Answering over Structured Data: an Entailment-Based Approach to Question Analysis - MatteoNegri + MatteoNegri MilenKouylekov 305–311 R09-1056 @@ -480,8 +480,8 @@ A Semi-supervised Approach for Generating a Table-of-Contents - Viet CuongNguyen - Le MinhNguyen + Viet CuongNguyen + Le MinhNguyen AkiraShimazu 312–317 R09-1057 @@ -491,10 +491,10 @@ Towards Efficient Production of Linguistic Resources: the <fixed-case>V</fixed-case>ictoria Project LionelNicolas Miguel A.Molinero - BenoîtSagot + BenoîtSagot ElenaTrigo - Éricde La Clergerie - MiguelAlonso Pardo + Éricde La Clergerie + MiguelAlonso Pardo JacquesFarré Joan MiquelVergés 318–323 @@ -506,24 +506,24 @@ RafaelOliveira EderNovais RobertoAraujo - IvandréParaboni + IvandréParaboni 324–329 R09-1059 oliveira-etal-2009-classification Interactive Machine Translation Based on Partial Statistical Phrase-based Alignments - DanielOrtiz-Martínez - IsmaelGarcía-Varea - FranciscoCasacuberta + DanielOrtiz-Martínez + IsmaelGarcía-Varea + FranciscoCasacuberta 330–336 R09-1060 ortiz-martinez-etal-2009-interactive Topic Modeling of Research Fields: An Interdisciplinary Perspective - MichaelPaul - RoxanaGirju + MichaelPaul + RoxanaGirju 337–342 R09-1061 paul-girju-2009-topic @@ -538,7 +538,7 @@ Comparing Statistical Similarity Measures for Stylistic Multivariate Analysis MariusPopescu - Liviu P.Dinu + Liviu P.Dinu 349–354 R09-1063 popescu-dinu-2009-comparing @@ -546,7 +546,7 @@ From Bag of Languages to Family Trees From Noisy Corpus TarakaRama - Anil KumarSingh + Anil KumarSingh 355–359 R09-1064 rama-singh-2009-bag @@ -554,7 +554,7 @@ Language-Independent Sentiment Analysis Using Subjectivity and Positional Information VeselinRaychev - PreslavNakov + PreslavNakov 360–364 R09-1065 raychev-nakov-2009-language @@ -591,7 +591,7 @@ Identifying Semantic Relations in Context: Near-misses and Overlaps AllaRozovskaya - RoxanaGirju + RoxanaGirju 381–387 R09-1069 rozovskaya-girju-2009-identifying @@ -599,8 +599,8 @@ Statistical Confidence Measures for Probabilistic Parsing RicardoSánchez-Sáez - Joan-AndreuSánchez - José-MiguelBenedí Ruíz + Joan-AndreuSánchez + José-MiguelBenedí Ruíz 388–392 R09-1070 sanchez-saez-etal-2009-statistical @@ -624,7 +624,7 @@ Combining Lexical Resources for Contextual Synonym Expansion RaviSinha - RadaMihalcea + RadaMihalcea 404–410 R09-1073 sinha-mihalcea-2009-combining @@ -632,7 +632,7 @@ String Distance-Based Stemming of the Highly Inflected <fixed-case>C</fixed-case>roatian Language JanŠnajder - BojanaDalbelo Bašić + BojanaDalbelo Bašić 411–415 R09-1074 snajder-dalbelo-basic-2009-string @@ -655,7 +655,7 @@ <fixed-case>A</fixed-case>mharic Part-of-Speech Tagger for Factored Language Modeling - Martha YifiruTachbelie + Martha YifiruTachbelie WolfgangMenzel 428–433 R09-1077 @@ -664,8 +664,8 @@ Improving Unsegmented Statistical Dialogue Act Labelling VicentTamarit - Carlos-D.Martínez-Hinarejos - José MiguelBenedí Ruíz + Carlos-D.Martínez-Hinarejos + José MiguelBenedí Ruíz 434–440 R09-1078 tamarit-etal-2009-improving @@ -673,7 +673,7 @@ Three Issues in Cross-Language Frame Information Transfer SaraTonelli - EmanuelePianta + EmanuelePianta 441–448 R09-1079 tonelli-pianta-2009-three @@ -681,9 +681,9 @@ A Study on Linking <fixed-case>W</fixed-case>ikipedia Categories to <fixed-case>W</fixed-case>ordnet Synsets using Text Similarity AntonioToral - ÓscarFerrández - EnekoAgirre - RafaelMuñoz + ÓscarFerrández + EnekoAgirre + RafaelMuñoz 449–454 R09-1080 toral-etal-2009-study @@ -700,16 +700,16 @@ A Method to Restrict the Blow-up of Hypotheses of a Non-disambiguated JernejVičič PetrHomola - VladislavKuboň + VladislavKuboň 460–464 R09-1082 vicic-etal-2009-method Sources of Performance in <fixed-case>CRF</fixed-case> Transfer Training: a Business Name-tagging Case Study - MarcVilain + MarcVilain JonathanHuggins - BenWellner + BenWellner 465–470 R09-1083 vilain-etal-2009-sources @@ -725,7 +725,7 @@ Instance Sampling Methods for Pronoun Resolution HolgerWunsch - SandraKübler + SandraKübler RachaelCantrell 478–483 R09-1085 @@ -743,8 +743,8 @@ Too Many Mammals: Improving the Diversity of Automatically Recognized Terms ZiqiZhang LeiXia - Mark A.Greenwood - JoséIria + Mark A.Greenwood + JoséIria 490–495 R09-1087 zhang-etal-2009-many @@ -770,7 +770,7 @@ Effect of Minimal Semantics on Dependency Parsing Bharat RamAmbati - PujithaGade + PujithaGade ChaitanyaGSK SamarHusain 1–5 @@ -786,7 +786,7 @@ A Study of Machine Learning Algorithms for Recognizing Textual Entailment - Julio JavierCastillo + Julio JavierCastillo 12–17 R09-2003 castillo-2009-study @@ -856,7 +856,7 @@ Exploring Context Variation and Lexicon Coverage in Projection-based Approach for Term Translation - RaphaëlRubino + RaphaëlRubino 66–70 R09-2012 rubino-2009-exploring diff --git a/data/xml/R11.xml b/data/xml/R11.xml index 45a75b63a6..62444f12b8 100644 --- a/data/xml/R11.xml +++ b/data/xml/R11.xml @@ -4,7 +4,7 @@ Proceedings of the International Conference Recent Advances in Natural Language Processing 2011 R11-1 - RuslanMitkov + RuslanMitkov GaliaAngelova Association for Computational Linguistics
Hissar, Bulgaria
@@ -18,7 +18,7 @@ Extracting <fixed-case>STRIPS</fixed-case> Representations of Actions and Events - AvirupSil + AvirupSil AlexanderYates 1–8 R11-1001 @@ -27,7 +27,7 @@ Acquiring Topic Features to improve Event Extraction: in Pre-selected and Balanced Collections ShashaLiao - RalphGrishman + RalphGrishman 9–16 R11-1002 liao-grishman-2011-acquiring @@ -52,14 +52,14 @@ Knowledge-Poor Approach to Shallow Parsing: Contribution of Unsupervised Part-of-Speech Induction MarieGuégan - Claudede Loupy + Claudede Loupy 33–40 R11-1005 guegan-de-loupy-2011-knowledge Fast Domain Adaptation for Part of Speech Tagging for Dialogues - SandraKübler + SandraKübler EricBaucom 41–48 R11-1006 @@ -69,14 +69,14 @@ Using a Morphological Database to Increase the Accuracy in <fixed-case>POS</fixed-case> Tagging HrafnLoftsson SigrúnHelgadóttir - EiríkurRögnvaldsson + EiríkurRögnvaldsson 49–55 R11-1007 loftsson-etal-2011-using Actions Speak Louder than Words: Evaluating Parsers in the Context of Natural Language Understanding Systems for Human-Robot Interaction - SandraKübler + SandraKübler RachaelCantrell MatthiasScheutz 56–62 @@ -118,7 +118,7 @@ Enriching a statistical machine translation system trained on small parallel corpora with rule-based bilingual phrases - Víctor M.Sánchez-Cartagena + Víctor M.Sánchez-Cartagena FelipeSánchez-Martínez Juan AntonioPérez-Ortiz 90–96 @@ -127,7 +127,7 @@ Assessing the Post-Editing Effort for Automatic and Semi-Automatic Translations of <fixed-case>DVD</fixed-case> Subtitles - SheilaC. M. de Sousa + SheilaC. M. de Sousa WilkerAziz LuciaSpecia 97–103 @@ -138,9 +138,9 @@ <fixed-case>JRC</fixed-case>-<fixed-case>NAMES</fixed-case>: A Freely Available, Highly Multilingual Named Entity Resource RalfSteinberger BrunoPouliquen - MijailKabadjov + MijailKabadjov JenyaBelyaeva - Erikvan der Goot + Erikvan der Goot 104–110 R11-1015 steinberger-etal-2011-jrc @@ -211,7 +211,7 @@ Noun Compound and Named Entity Recognition and their Usability in Keyphrase Extraction - IstvánNagy T. + IstvánNagy T. GáborBerend VeronikaVincze 162–169 @@ -235,8 +235,8 @@ Cross-Domain <fixed-case>D</fixed-case>utch Coreference Resolution - OrphéeDe Clercq - VéroniqueHoste + OrphéeDe Clercq + VéroniqueHoste IrisHendrickx 186–193 R11-1026 @@ -245,7 +245,7 @@ Finding the Best Approach for Multi-lingual Text Summarisation: A Comparative Analysis ElenaLloret - ManuelPalomar + ManuelPalomar 194–201 R11-1027 lloret-palomar-2011-finding @@ -253,7 +253,7 @@ Automatically Creating General-Purpose Opinion Summaries from Text VeselinStoyanov - ClaireCardie + ClaireCardie 202–209 R11-1028 stoyanov-cardie-2011-automatically @@ -270,7 +270,7 @@ Temporal Relation Extraction Using Expectation Maximization Seyed AbolghasemMirroshandel - GholamrezaGhassem-Sani + GholamrezaGhassem-Sani 218–225 R11-1030 mirroshandel-ghassem-sani-2011-temporal @@ -286,9 +286,9 @@ Improving <fixed-case>WSD</fixed-case> using <fixed-case>ISR</fixed-case>-<fixed-case>WN</fixed-case> with Relevant Semantic Trees and <fixed-case>S</fixed-case>em<fixed-case>C</fixed-case>or Senses Frequency - YoanGutiérrez - SoniaVázquez - AndrésMontoyo + YoanGutiérrez + SoniaVázquez + AndrésMontoyo 233–239 R11-1032 gutierrez-etal-2011-improving @@ -296,8 +296,8 @@ Investigating Advanced Techniques for Document Content Similarity Applied to External Plagiarism Analysis DanielMicol - RafaelMuñoz - ÓscarFerrández + RafaelMuñoz + ÓscarFerrández 240–246 R11-1033 micol-etal-2011-investigating @@ -305,7 +305,7 @@ Using Cognates in a <fixed-case>F</fixed-case>rench-<fixed-case>R</fixed-case>omanian Lexical Alignment System: A Comparative Study MirabelaNavlea - AmaliaTodiraşcu + AmaliaTodiraşcu 247–253 R11-1034 navlea-todirascu-2011-using @@ -315,19 +315,19 @@ JosefSteinberger JenyaBelyaeva JonathanCrawley - LeonidaDella-Rocca + LeonidaDella-Rocca MohamedEbrahim MaudEhrmann - MijailKabadjov + MijailKabadjov RalfSteinberger - Erikvan der Goot + Erikvan der Goot 254–260 R11-1035 steinberger-etal-2011-highly Singletons and Coreference Resolution Evaluation - SandraKübler + SandraKübler DesislavaZhekova 261–267 R11-1036 @@ -335,7 +335,7 @@ Modelling Entity Instantiations - AndrewMcKinlay + AndrewMcKinlay KatjaMarkert 268–274 R11-1037 @@ -360,7 +360,7 @@ Multiword Expressions and Named Entities in the Wiki50 Corpus VeronikaVincze - IstvánNagy T. + IstvánNagy T. GáborBerend 289–295 R11-1040 @@ -368,9 +368,9 @@ Towards the Automatic Merging of Lexical Resources: Automatic Mapping - MuntsaPadró - NúriaBel - SilviaNecsulescu + MuntsaPadró + NúriaBel + SilviaNecsulescu 296–301 R11-1041 padro-etal-2011-towards @@ -394,14 +394,14 @@ Cultural Configuration of <fixed-case>W</fixed-case>ikipedia: measuring Autoreferentiality in Different Languages MarcMiquel Ribé - HoracioRodríguez + HoracioRodríguez 316–322 R11-1044 miquel-ribe-rodriguez-2011-cultural Combining Relational and Attributional Similarity for Semantic Relation Classification - PreslavNakov + PreslavNakov ZornitsaKozareva 323–330 R11-1045 @@ -410,7 +410,7 @@ In Search of Missing Arguments: A Linguistic Approach JosefRuppenhofer - PhilipGorinski + PhilipGorinski CarolineSporleder 331–338 R11-1046 @@ -418,8 +418,8 @@ Enlarging Monolingual Dictionaries for Machine Translation with Active Learning and Non-Expert Users - MiquelEsplà-Gomis - Víctor M.Sánchez-Cartagena + MiquelEsplà-Gomis + Víctor M.Sánchez-Cartagena Juan AntonioPérez-Ortiz 339–346 R11-1047 @@ -436,7 +436,7 @@ Adaptability of Lexical Acquisition for Large-scale Grammars KostadinCholakov - Gertjanvan Noord + Gertjanvan Noord ValiaKordoni YiZhang 355–362 @@ -446,8 +446,8 @@ Integration of Data from a Syntactic Lexicon into Generative and Discriminative Probabilistic Parsers AnthonySigogne - MatthieuConstant - ÉricLaporte + MatthieuConstant + ÉricLaporte 363–370 R11-1050 sigogne-etal-2011-integration @@ -456,7 +456,7 @@ Pattern Learning for Event Extraction using Monolingual Statistical Machine Translation MarcoTurchi VanniZavarella - HristoTanev + HristoTanev 371–377 R11-1051 turchi-etal-2011-pattern @@ -504,7 +504,7 @@ A Semi-Automatic, Iterative Method for Creating a Domain-Specific Treebank CorinaDima - ErhardHinrichs + ErhardHinrichs 413–419 R11-1057 dima-hinrichs-2011-semi @@ -512,7 +512,7 @@ Determining Immediate Constituents of Compounds in <fixed-case>G</fixed-case>erma<fixed-case>N</fixed-case>et VerenaHenrich - ErhardHinrichs + ErhardHinrichs 420–426 R11-1058 henrich-hinrichs-2011-determining @@ -530,7 +530,7 @@ A Contextual Classification Strategy for Polarity Analysis of Direct Quotations from Financial News BrettDrury - GaëlDias + GaëlDias LuísTorgo 434–440 R11-1060 @@ -572,7 +572,7 @@ Towards <fixed-case>M</fixed-case>inimal <fixed-case>R</fixed-case>ecursion <fixed-case>S</fixed-case>emantics over <fixed-case>B</fixed-case>ulgarian Dependency Parsing - KirilSimov + KirilSimov PetyaOsenova 471–478 R11-1065 @@ -632,8 +632,8 @@ Evaluating the Robustness of <fixed-case>E</fixed-case>moti<fixed-case>B</fixed-case>log for Sentiment Analysis and Opinion Mining EsterBoldrini JaviFernández - José ManuelGómez - PatricioMartínez-Barco + José ManuelGómez + PatricioMartínez-Barco 521–526 R11-1072 boldrini-etal-2011-evaluating @@ -641,8 +641,8 @@ Hybrid System For Plagiarism Detection Javier R.Bru - PatricioMartínez-Barco - RafaelMuñoz + PatricioMartínez-Barco + RafaelMuñoz 527–532 R11-1073 bru-etal-2011-hybrid @@ -650,19 +650,19 @@ Data-Driven Approach Using Semantics for Recognizing and Classifying <fixed-case>T</fixed-case>ime<fixed-case>ML</fixed-case> Events in <fixed-case>I</fixed-case>talian TommasoCaselli - HectorLlorens - BorjaNavarro-Colorado - EstelaSaquete + HectorLlorens + BorjaNavarro-Colorado + EstelaSaquete 533–538 R11-1074 caselli-etal-2011-data Can Alternations Be Learned? A Machine Learning Approach To <fixed-case>R</fixed-case>omanian Verb Conjugation - Liviu P.Dinu + Liviu P.Dinu EmilIonescu VladNiculae - Octavia-MariaŞulea + Octavia-MariaŞulea 539–544 R11-1075 dinu-etal-2011-alternations @@ -671,7 +671,7 @@ A New Representation Model for the Automatic Recognition and Translation of <fixed-case>A</fixed-case>rabic Named Entities with <fixed-case>N</fixed-case>oo<fixed-case>J</fixed-case> HélaFehri KaisHaddar - AbdelmajidBen hamadou + AbdelmajidBen hamadou 545–550 R11-1076 fehri-etal-2011-new @@ -702,7 +702,7 @@ An algorithm of Identifying Semantic Arguments of a Verb From Structured Data MinhuaHuang - Robert M.Haralick + Robert M.Haralick 568–573 R11-1080 huang-haralick-2011-algorithm @@ -734,9 +734,9 @@ A Hybrid Approach for Event Extraction and Event Actor Identification - Anup KumarKolya + Anup KumarKolya AsifEkbal - SivajiBandyopadhyay + SivajiBandyopadhyay 592–597 R11-1084 kolya-etal-2011-hybrid @@ -778,7 +778,7 @@ Domain-Dependent Identification of Multiword Expressions - IstvánNagy T. + IstvánNagy T. VeronikaVincze GáborBerend 622–627 @@ -812,7 +812,7 @@ Finding Negative Key Phrases for <fixed-case>I</fixed-case>nternet Advertising Campaigns using <fixed-case>W</fixed-case>ikipedia MartinScaiano - DianaInkpen + DianaInkpen 648–653 R11-1093 scaiano-inkpen-2011-finding @@ -844,7 +844,7 @@ <fixed-case>W</fixed-case>ordnets: State of the Art and Perspectives. Case Study: the <fixed-case>R</fixed-case>omanian <fixed-case>W</fixed-case>ordnet - VerginicaBarbu Mititelu + VerginicaBarbu Mititelu 672–677 R11-1097 barbu-mititelu-2011-wordnets @@ -879,10 +879,10 @@ The <fixed-case>RST</fixed-case> <fixed-case>S</fixed-case>panish Treebank On-line Interface - Iriada Cunha - Juan-ManuelTorres-Moreno - GerardoSierra - Luis-AdriánCabrera-Diego + Iriada Cunha + Juan-ManuelTorres-Moreno + GerardoSierra + Luis-AdriánCabrera-Diego Brenda-GabrielaCastro-Rolón Juan-MiguelRolland Bartilotti 698–703 @@ -899,8 +899,8 @@ Multiple Evidence for Term Extraction in Broad Domains - BorisDobrov - NataliaLoukachevitch + BorisDobrov + NataliaLoukachevitch 710–715 R11-1103 dobrov-loukachevitch-2011-multiple @@ -908,8 +908,8 @@ Language Modeling for Document Selection in Question Answering NicolasFoucault - GillesAdda - SophieRosset + GillesAdda + SophieRosset 716–720 R11-1104 foucault-etal-2011-language @@ -941,9 +941,9 @@ Bilingual Experiments with an <fixed-case>A</fixed-case>rabic-<fixed-case>E</fixed-case>nglish Corpus for Opinion Mining MohammedRushdi-Saleh - M. TeresaMartín-Valdivia - L. AlfonsoUreña-López - José M.Perea-Ortega + M. TeresaMartín-Valdivia + L. AlfonsoUreña-López + José M.Perea-Ortega 740–745 R11-1108 rushdi-saleh-etal-2011-bilingual @@ -954,7 +954,7 @@ WalterKoza JosukaDíaz-Labrador JosebaAbaitua - SolangeOliveira Rezende + SolangeOliveira Rezende ThiagoPardo ZulemaSolana 746–751 @@ -980,10 +980,10 @@ Recognition and Classification of Numerical Entities in <fixed-case>B</fixed-case>asque AnderSoraluze - IñakiAlegria - OlatzAnsa + IñakiAlegria + OlatzAnsa OlatzArregi - XabierArregi + XabierArregi 764–769 R11-1112 soraluze-etal-2011-recognition @@ -992,9 +992,9 @@ Multilingual Entity-Centered Sentiment Analysis Evaluated by Parallel Corpora JosefSteinberger PolinaLenkova - MijailKabadjov + MijailKabadjov RalfSteinberger - Erikvan der Goot + Erikvan der Goot 770–775 R11-1113 steinberger-etal-2011-multilingual @@ -1038,7 +1038,7 @@ Domain-Dependent Detection of Light Verb Constructions - István T.Nagy + István T.Nagy GáborBerend GyörgyMóra VeronikaVincze @@ -1055,7 +1055,7 @@ Towards a Better Exploitation of the Brown ‘Family’ Corpora in Diachronic Studies of <fixed-case>B</fixed-case>ritish and <fixed-case>A</fixed-case>merican <fixed-case>E</fixed-case>nglish Language Varieties - SanjaŠtajner + SanjaŠtajner 17–24 R11-2003 stajner-2011-towards @@ -1081,7 +1081,7 @@ Inter-domain Opinion Phrase Extraction Based on Feature Augmentation GáborBerend - István T.Nagy + István T.Nagy GyörgyMóra VeronikaVincze 41–47 @@ -1133,7 +1133,7 @@ Initial Experiments with Multilingual Extraction of Rhetoric Figures by means of <fixed-case>PERL</fixed-case>-compatible Regular Expressions - Daniel DevatmanHromada + Daniel DevatmanHromada 85–90 R11-2013 hromada-2011-initial @@ -1149,7 +1149,7 @@ Heterogeneous Natural Language Processing Tools via Language Processing Chains - DimanKaragiozov + DimanKaragiozov 97–102 R11-2015 karagiozov-2011-heterogeneous @@ -1179,7 +1179,7 @@ Automatic Acquisition of Possible Contexts for Low-Frequent Words - SilviaNecsulescu + SilviaNecsulescu 121–126 R11-2019 necsulescu-2011-automatic @@ -1193,7 +1193,7 @@ Towards Cross-Language Word Sense Disambiguation for <fixed-case>Q</fixed-case>uechua - AlexRudnick + AlexRudnick 133–138 R11-2021 rudnick-2011-towards @@ -1201,7 +1201,7 @@ Annotating Negation and Speculation: the Case of the Review Domain NataliaKonstantinova - SheilaC. M. de Sousa + SheilaC. M. de Sousa 139–144 R11-2022 konstantinova-c-m-de-sousa-2011-annotating diff --git a/data/xml/R13.xml b/data/xml/R13.xml index 6dd39520bc..d641d549ee 100644 --- a/data/xml/R13.xml +++ b/data/xml/R13.xml @@ -4,9 +4,9 @@ Proceedings of the International Conference Recent Advances in Natural Language Processing RANLP 2013 R13-1 - RuslanMitkov + RuslanMitkov GaliaAngelova - KalinaBontcheva + KalinaBontcheva INCOMA Ltd. Shoumen, BULGARIA
Hissar, Bulgaria
September @@ -20,8 +20,8 @@ <fixed-case>ASMA</fixed-case>: A System for Automatic Segmentation and Morpho-Syntactic Disambiguation of <fixed-case>M</fixed-case>odern <fixed-case>S</fixed-case>tandard <fixed-case>A</fixed-case>rabic MuhammadAbdul-Mageed - MonaDiab - SandraKübler + MonaDiab + SandraKübler 1–8 R13-1001 abdul-mageed-etal-2013-asma @@ -39,7 +39,7 @@ TanveerAli MarinaSokolova DavidSchramm - DianaInkpen + DianaInkpen 18–24 R13-1003 ali-etal-2013-opinion @@ -56,8 +56,8 @@ A Semi-supervised Learning Approach to <fixed-case>A</fixed-case>rabic Named Entity Recognition MahaAlthobaiti - UdoKruschwitz - MassimoPoesio + UdoKruschwitz + MassimoPoesio 32–40 R13-1005 althobaiti-etal-2013-semi @@ -76,7 +76,7 @@ Improving Sentiment Analysis in <fixed-case>T</fixed-case>witter Using Multilingual Machine Translated Data - AlexandraBalahur + AlexandraBalahur MarcoTurchi 49–55 R13-1007 @@ -86,7 +86,7 @@ Domain Adaptation for Parsing EricBaucom LeviKing - SandraKübler + SandraKübler 56–64 R13-1008 baucom-etal-2013-domain @@ -112,9 +112,9 @@ <fixed-case>T</fixed-case>wit<fixed-case>IE</fixed-case>: An Open-Source Information Extraction Pipeline for Microblog Text KalinaBontcheva - LeonDerczynski + LeonDerczynski AdamFunk - MarkGreenwood + MarkGreenwood DianaMaynard NirajAswani 83–90 @@ -123,7 +123,7 @@ A unified lexical processing framework based on the Margin Infused Relaxed Algorithm. A case study on the <fixed-case>R</fixed-case>omanian Language - TiberiuBoroș + TiberiuBoroș 91–97 R13-1012 boros-2013-unified @@ -149,8 +149,8 @@ Recognising and Interpreting Named Temporal Expressions MatteoBrucato - LeonDerczynski - HectorLlorens + LeonDerczynski + HectorLlorens KalinaBontcheva Christian S.Jensen 113–121 @@ -175,9 +175,9 @@ Temporal Text Classification for <fixed-case>R</fixed-case>omanian Novels set in the Past - Alina MariaCiobanu - Liviu P.Dinu - Octavia-MariaŞulea + Alina MariaCiobanu + Liviu P.Dinu + Octavia-MariaŞulea AncaDinu VladNiculae 136–140 @@ -186,8 +186,8 @@ A Dictionary-Based Approach for Evaluating Orthographic Methods in Cognates Identification - Alina MariaCiobanu - Liviu PetrisorDinu + Alina MariaCiobanu + Liviu PetrisorDinu 141–147 R13-1019 ciobanu-dinu-2013-dictionary @@ -202,7 +202,7 @@ Semantic Relations between Events and their Time, Locations and Participants for Event Coreference Resolution - AgataCybulska + AgataCybulska PiekVossen 156–163 R13-1021 @@ -211,9 +211,9 @@ Sense Clustering Using <fixed-case>W</fixed-case>ikipedia BharathDandala - ChrisHokamp - RadaMihalcea - RazvanBunescu + ChrisHokamp + RadaMihalcea + RazvanBunescu 164–171 R13-1022 dandala-etal-2013-sense @@ -228,11 +228,11 @@ Normalization of <fixed-case>D</fixed-case>utch User-Generated Content - OrphéeDe Clercq + OrphéeDe Clercq SarahSchulz BartDesmet ElsLefever - VéroniqueHoste + VéroniqueHoste 179–188 R13-1024 de-clercq-etal-2013-normalization @@ -240,7 +240,7 @@ Linguistic Profiling of Texts Across Textual Genres and Readability Levels. An Exploratory Study on <fixed-case>I</fixed-case>talian Fictional Prose FeliceDell’Orletta - SimonettaMontemagni + SimonettaMontemagni GiuliaVenturi 189–197 R13-1025 @@ -248,7 +248,7 @@ <fixed-case>T</fixed-case>witter Part-of-Speech Tagging for All: Overcoming Sparse and Noisy Data - LeonDerczynski + LeonDerczynski AlanRitter SamClark KalinaBontcheva @@ -268,8 +268,8 @@ Sequence Tagging for Verb Conjugation in <fixed-case>R</fixed-case>omanian - LiviuDinu - Octavia-MariaŞulea + LiviuDinu + Octavia-MariaŞulea VladNiculae 215–220 R13-1028 @@ -278,17 +278,17 @@ A Tagging Approach to Identify Complex Constituents for Text Simplification IustinDornescu - RichardEvans - ConstantinOrăsan + RichardEvans + ConstantinOrăsan 221–229 R13-1029 dornescu-etal-2013-tagging Automatic Evaluation Metric for Machine Translation that is Independent of Sentence Length - HiroshiEchizen’ya - KenjiAraki - EduardHovy + HiroshiEchizen’ya + KenjiAraki + EduardHovy 230–236 R13-1030 echizenya-etal-2013-automatic @@ -296,18 +296,18 @@ Acronym recognition and processing in 22 languages MaudEhrmann - LeonidaDella Rocca + LeonidaDella Rocca RalfSteinberger - HristoTannev + HristoTannev 237–244 R13-1031 ehrmann-etal-2013-acronym An Evaluation Summary Method Based on a Combination of Content and Linguistic Metrics - SamiraEllouze + SamiraEllouze MaherJaoua - LamiaHadrich Belguith + LamiaHadrich Belguith 245–251 R13-1032 ellouze-etal-2013-evaluation-summary @@ -323,8 +323,8 @@ Temporal Relation Classification in <fixed-case>P</fixed-case>ersian and <fixed-case>E</fixed-case>nglish contexts - MahbanehEshaghzadeh Torbati - GholamrezaGhassem-sani + MahbanehEshaghzadeh Torbati + GholamrezaGhassem-sani Seyed AbolghasemMirroshandel YadollahYaghoobzadeh NeginKarimi Hosseini @@ -334,7 +334,7 @@ The Extended Lexicon: Language Processing as Lexical Description - RogerEvans + RogerEvans 270–276 R13-1035 evans-2013-extended @@ -342,7 +342,7 @@ Did <fixed-case>I</fixed-case> really mean that? Applying automatic summarisation techniques to formative feedback DeboraField - StephenPulman + StephenPulman NicolasVan Labeke DeniseWhitelock JohnRichardson @@ -354,7 +354,7 @@ Matching sets of parse trees for answering multi-sentence questions BorisGalitsky DmitryIlvovsky - Sergei O.Kuznetsov + Sergei O.Kuznetsov FedorStrok 285–293 R13-1037 @@ -419,7 +419,7 @@ Unsupervised Induction of <fixed-case>A</fixed-case>rabic Root and Pattern Lexicons using Machine Learning BilalKhaliq - JohnCarroll + JohnCarroll 350–356 R13-1045 khaliq-carroll-2013-unsupervised @@ -428,7 +428,7 @@ Towards Domain Adaptation for Parsing Web Data MohammadKhan MarkusDickinson - SandraKübler + SandraKübler 357–364 R13-1046 khan-etal-2013-towards @@ -436,7 +436,7 @@ Capturing Anomalies in the Choice of Content Words in Compositional Distributional Semantic Space EkaterinaKochmar - TedBriscoe + TedBriscoe 365–372 R13-1047 kochmar-briscoe-2013-capturing @@ -468,7 +468,7 @@ Confidence Estimation for Knowledge Base Population XiangLi - RalphGrishman + RalphGrishman 396–401 R13-1051 li-grishman-2013-confidence @@ -477,8 +477,8 @@ Towards Fine-grained Citation Function Classification XiangLi YifanHe - AdamMeyers - RalphGrishman + AdamMeyers + RalphGrishman 402–407 R13-1052 li-etal-2013-towards @@ -534,7 +534,7 @@ MarekMaziarz MaciejPiasecki EwaRudnicka - StanSzpakowicz + StanSzpakowicz 443–452 R13-1058 maziarz-etal-2013-beyond @@ -542,14 +542,14 @@ History Based Unsupervised Data Oriented Parsing MohsenMesgar - GholamrezaGhasem-Sani + GholamrezaGhasem-Sani 453–459 R13-1059 mesgar-ghasem-sani-2013-history Contrasting and Corroborating Citations in Journal Articles - AdamMeyers + AdamMeyers 460–466 R13-1060 meyers-2013-contrasting @@ -557,7 +557,7 @@ <fixed-case>CCG</fixed-case> Categories for Distributional Semantic Models ParamitaMirza - RaffaellaBernardi + RaffaellaBernardi 467–474 R13-1061 mirza-bernardi-2013-ccg @@ -584,26 +584,26 @@ Improving Web 2.0 Opinion Mining Systems Using Text Normalisation Techniques AlejandroMosquera - PalomaMoreda Pozo + PalomaMoreda Pozo 491–495 R13-1064 mosquera-moreda-pozo-2013-improving Identifying Social and Expressive Factors in Request Texts Using Transaction/Sequence Model - DašaMunková + DašaMunková MichalMunk - ZuzanaFráterová + ZuzanaFráterová 496–503 R13-1065 munkova-etal-2013-identifying Parameter Optimization for Statistical Machine Translation: It Pays to Learn from Hard Examples - PreslavNakov + PreslavNakov FahadAl Obaidli - FranciscoGuzmán - StephanVogel + FranciscoGuzmán + StephanVogel 504–510 R13-1066 nakov-etal-2013-parameter @@ -620,7 +620,7 @@ High-Accuracy Phrase Translation Acquisition Through Battle-Royale Selection LionelNicolas - Egon W.Stemle + Egon W.Stemle KlaraKranebitter VerenaLyding 516–524 @@ -639,7 +639,7 @@ A clustering approach for translationese identification SergiuNisioi - Liviu P.Dinu + Liviu P.Dinu 532–538 R13-1070 nisioi-dinu-2013-clustering @@ -663,7 +663,7 @@ Information Spreading in Expanding <fixed-case>W</fixed-case>ordnet Hypernymy Structure MaciejPiasecki - RadosławRamocki + RadosławRamocki MichałKaliński 553–561 R13-1073 @@ -671,7 +671,7 @@ Context Independent Term Mapper for <fixed-case>E</fixed-case>uropean Languages - MārcisPinnis + MārcisPinnis 562–570 R13-1074 pinnis-2013-context @@ -704,7 +704,7 @@ A Combined Pattern-based and Distributional Approach for Automatic Hypernym Detection in <fixed-case>D</fixed-case>utch. GwendolijnSchropp ElsLefever - VéroniqueHoste + VéroniqueHoste 593–600 R13-1078 schropp-etal-2013-combined @@ -730,7 +730,7 @@ A New Approach to the <fixed-case>POS</fixed-case> Tagging Problem Using Evolutionary Computation Ana PaulaSilva ArlindoSilva - IreneRodrigues + IreneRodrigues 619–625 R13-1081 silva-etal-2013-new @@ -758,7 +758,7 @@ GiovanniStilo MorenoDe Vincenzi Alberto E.Tozzi - PaolaVelardi + PaolaVelardi 640–648 R13-1084 stilo-etal-2013-automated @@ -774,9 +774,9 @@ Measuring Closure Properties of Patent Sublanguages IrinaTemnikova - NegacyHailu + NegacyHailu GaliaAngelova - K. BretonnelCohen + K. BretonnelCohen 659–666 R13-1086 temnikova-etal-2013-measuring @@ -785,17 +785,17 @@ Closure Properties of <fixed-case>B</fixed-case>ulgarian Clinical Text IrinaTemnikova IvelinaNikolova - William A.Baumgartner + William A.Baumgartner GaliaAngelova - K. BretonnelCohen + K. BretonnelCohen 667–675 R13-1087 temnikova-etal-2013-closure Analyzing the Use of Character-Level Translation with Sparse and Noisy Datasets - JörgTiedemann - PreslavNakov + JörgTiedemann + PreslavNakov 676–684 R13-1088 tiedemann-nakov-2013-analyzing @@ -812,20 +812,20 @@ Introducing a Corpus of Human-Authored Dialogue Summaries in <fixed-case>P</fixed-case>ortuguese - NortonTrevisan Roman + NortonTrevisan Roman PaulPiwek - AriadneM. B. Rizzoni Carvalho - AlexandreRossi Alvares + AriadneM. B. Rizzoni Carvalho + AlexandreRossi Alvares 692–701 R13-1090 trevisan-roman-etal-2013-introducing <fixed-case>W</fixed-case>ikipedia as an <fixed-case>SMT</fixed-case> Training Corpus - DanTufiș + DanTufiș RaduIon - ȘtefanDumitrescu - DanȘtefănescu + ȘtefanDumitrescu + DanȘtefănescu 702–709 R13-1091 tufis-etal-2013-wikipedia @@ -833,7 +833,7 @@ <fixed-case>D</fixed-case>utch<fixed-case>S</fixed-case>em<fixed-case>C</fixed-case>or: in quest of the ideal sense-tagged corpus PiekVossen - RubénIzquierdo + RubénIzquierdo AttilaGörög 710–718 R13-1092 @@ -843,7 +843,7 @@ Towards detecting anomalies in the content of standardized <fixed-case>LMF</fixed-case> dictionaries WafaWali BilelGargouri - AbdelmajidBen Hamadou + AbdelmajidBen Hamadou 719–726 R13-1093 wali-etal-2013-towards @@ -882,7 +882,7 @@ Machine Learning for Mention Head Detection in Multilingual Coreference Resolution DesislavaZhekova - SandraKübler + SandraKübler 747–754 R13-1097 zhekova-kubler-2013-machine @@ -891,7 +891,7 @@ Combining <fixed-case>POS</fixed-case> Tagging, Dependency Parsing and Coreferential Resolution for <fixed-case>B</fixed-case>ulgarian ValentinZhikov GeorgiGeorgiev - KirilSimov + KirilSimov PetyaOsenova 755–762 R13-1098 @@ -901,7 +901,7 @@ magyarlanc: A Tool for Morphological and Dependency Parsing of <fixed-case>H</fixed-case>ungarian JánosZsibrita VeronikaVincze - RichárdFarkas + RichárdFarkas 763–771 R13-1099 zsibrita-etal-2013-magyarlanc @@ -969,7 +969,7 @@ Detecting Negated and Uncertain Information in Biomedical and Review Texts - Noa P.Cruz Díaz + Noa P.Cruz Díaz 45–50 R13-2007 cruz-diaz-2013-detecting @@ -990,7 +990,7 @@ Towards Definition Extraction Using Conditional Random Fields - LuisEspinosa Anke + LuisEspinosa Anke 63–70 R13-2010 espinosa-anke-2013-towards @@ -998,14 +998,14 @@ Event-Centered Simplification of News Stories GoranGlavaš - SanjaŠtajner + SanjaŠtajner 71–78 R13-2011 glavas-stajner-2013-event Random Projection and Geometrization of String Distance Metrics - DanielHromada + DanielHromada 79–85 R13-2012 hromada-2013-random diff --git a/data/xml/R15.xml b/data/xml/R15.xml index 15dee4540d..b9a6aae7b9 100644 --- a/data/xml/R15.xml +++ b/data/xml/R15.xml @@ -4,9 +4,9 @@ Proceedings of the International Conference Recent Advances in Natural Language Processing R15-1 - RuslanMitkov + RuslanMitkov GaliaAngelova - KalinaBontcheva + KalinaBontcheva INCOMA Ltd. Shoumen, BULGARIA
Hissar, Bulgaria
September @@ -38,7 +38,7 @@ Automatic Construction of a <fixed-case>TMF</fixed-case> Terminological Database using a Transducer Cascade ChihebeddineAmmar KaisHaddar - LaurentRomary + LaurentRomary 17–23 R15-1003 ammar-etal-2015-automatic @@ -55,7 +55,7 @@ Predicting the quality of questions on <fixed-case>S</fixed-case>tackoverflow AntoanetaBaltadzhieva - GrzegorzChrupała + GrzegorzChrupała 32–40 R15-1005 baltadzhieva-chrupala-2015-predicting @@ -81,7 +81,7 @@ <fixed-case>D</fixed-case>an<fixed-case>P</fixed-case>roof: Pedagogical Spell and Grammar Checking for <fixed-case>D</fixed-case>anish - EckhardBick + EckhardBick 55–62 R15-1008 bick-2015-danproof @@ -98,7 +98,7 @@ KaiCao XiangLi MiaoFan - RalphGrishman + RalphGrishman 72–77 R15-1010 cao-etal-2015-improving @@ -107,7 +107,7 @@ Improving Event Detection with Dependency Regularization KaiCao XiangLi - RalphGrishman + RalphGrishman 78–83 R15-1011 cao-etal-2015-improving-event @@ -117,7 +117,7 @@ DanielCastro Castro YaritzaAdame Arcia MaríaPelaez Brioso - RafaelMuñoz Guillena + RafaelMuñoz Guillena 84–90 R15-1012 castro-castro-etal-2015-authorship @@ -125,15 +125,15 @@ Coreference Resolution to Support <fixed-case>IE</fixed-case> from <fixed-case>I</fixed-case>ndian Classical Music Forums JoeCheri - PushpakBhattacharyya + PushpakBhattacharyya 91–96 R15-1013 cheri-bhattacharyya-2015-coreference Readability Assessment of Translated Texts - Alina MariaCiobanu - Liviu P.Dinu + Alina MariaCiobanu + Liviu P.Dinu FlaviuPepelea 97–103 R15-1014 @@ -149,24 +149,24 @@ Tune Your Brown Clustering, Please - LeonDerczynski + LeonDerczynski SeanChester - KennethBøgh + KennethBøgh 110–117 R15-1016 derczynski-etal-2015-tune Temporal Relation Classification using a Model of Tense and Aspect - LeonDerczynski - RobertGaizauskas + LeonDerczynski + RobertGaizauskas 118–122 R15-1017 derczynski-gaizauskas-2015-temporal Efficient Named Entity Annotation through Pre-empting - LeonDerczynski + LeonDerczynski KalinaBontcheva 123–130 R15-1018 @@ -191,7 +191,7 @@ Cross-lingual Synonymy Overlap AncaDinu - Liviu P.Dinu + Liviu P.Dinu Ana SabinaUban 147–152 R15-1021 @@ -199,9 +199,9 @@ Barbecued Opakapaka: Using Semantic Preferences for Ontology Population - IsmailEl Maarouf + IsmailEl Maarouf GeorgianaMarsic - ConstantinOrăsan + ConstantinOrăsan 153–159 R15-1022 el-maarouf-etal-2015-barbecued @@ -220,14 +220,14 @@ Using the Textual Content of the <fixed-case>LMF</fixed-case>-Normalized Dictionaries for Identifying and Linking the Syntactic Behaviors to the Meanings ImenElleuch BilelGargouri - AbdelmajidBen Hamadou + AbdelmajidBen Hamadou 168–175 R15-1024 elleuch-etal-2015-using Weakly Supervised Definition Extraction - LuisEspinosa-Anke + LuisEspinosa-Anke HoracioSaggion FrancescoRonzano 176–185 @@ -239,7 +239,7 @@ MiaoFan KaiCao YifanHe - RalphGrishman + RalphGrishman 186–191 R15-1026 fan-etal-2015-jointly @@ -248,7 +248,7 @@ Distributional Semantics for Resolving Bridging Mentions TimFeuerbach MartinRiedl - ChrisBiemann + ChrisBiemann 192–199 R15-1027 feuerbach-etal-2015-distributional @@ -257,7 +257,7 @@ Text Classification into Abstract Classes Based on Discourse Structure BorisGalitsky DmitryIlvovsky - Sergey O.Kuznetsov + Sergey O.Kuznetsov 200–207 R15-1028 galitsky-etal-2015-text @@ -296,7 +296,7 @@ Part-of-Speech Tagging for Code-Mixed <fixed-case>E</fixed-case>nglish-<fixed-case>H</fixed-case>indi <fixed-case>T</fixed-case>witter and <fixed-case>F</fixed-case>acebook Chat Messages AnupamJamatia - BjörnGambäck + BjörnGambäck AmitavaDas 239–248 R15-1033 @@ -306,7 +306,7 @@ Sentiment Analysis in <fixed-case>T</fixed-case>witter for <fixed-case>M</fixed-case>acedonian DameJovanoski VenoPachovski - PreslavNakov + PreslavNakov 249–257 R15-1034 jovanoski-etal-2015-sentiment @@ -322,7 +322,7 @@ Fine-Grained Sentiment Analysis for Movie Reviews in <fixed-case>B</fixed-case>ulgarian BorislavKapukaranov - PreslavNakov + PreslavNakov 266–274 R15-1036 kapukaranov-nakov-2015-fine @@ -367,7 +367,7 @@ Learning Agglutinative Morphology of <fixed-case>I</fixed-case>ndian Languages with Linguistically Motivated <fixed-case>A</fixed-case>daptor <fixed-case>G</fixed-case>rammars ArunKumar - LluísPadró + LluísPadró AntoniOliver 307–312 R15-1041 @@ -385,7 +385,7 @@ Automatically Identifying Periodic Social Events from <fixed-case>T</fixed-case>witter FlorianKunneman - AntalVan den Bosch + AntalVan den Bosch 320–328 R15-1043 kunneman-van-den-bosch-2015-automatically @@ -394,7 +394,7 @@ Collecting and Evaluating Lexical Polarity with A Game With a Purpose MathieuLafourcade AlainJoubert - NathalieLe Brun + NathalieLe Brun 329–337 R15-1044 lafourcade-etal-2015-collecting @@ -437,7 +437,7 @@ Predicting the Level of Text Standardness in User-generated Content NikolaLjubešić DarjaFišer - TomažErjavec + TomažErjavec JakaČibej DafneMarko SenjaPollak @@ -449,9 +449,9 @@ Predicting Inflectional Paradigms and Lemmata of Unknown Words for Semi-automatic Expansion of Morphological Lexicons NikolaLjubešić - MiquelEsplà-Gomis + MiquelEsplà-Gomis FilipKlubička - NivesMikelić Preradović + NivesMikelić Preradović 379–387 R15-1050 ljubesic-etal-2015-predicting-inflectional @@ -459,7 +459,7 @@ Predicting Correlations Between Lexical Alignments and Semantic Inferences SimoneMagnolini - BernardoMagnini + BernardoMagnini 388–397 R15-1051 magnolini-magnini-2015-predicting @@ -475,7 +475,7 @@ <fixed-case>N</fixed-case>orwegian Native Language Identification - ShervinMalmasi + ShervinMalmasi MarkDras IrinaTemnikova 404–412 @@ -492,8 +492,8 @@ Pattern Construction for Extracting Domain Terminology YusneyMarrero García - PalomaMoreda Pozo - RafaelMuñoz-Guillena + PalomaMoreda Pozo + RafaelMuñoz-Guillena 420–426 R15-1055 marrero-garcia-etal-2015-pattern @@ -501,7 +501,7 @@ A Procedural Definition of Multi-word Lexical Units MarekMaziarz - StanSzpakowicz + StanSzpakowicz MaciejPiasecki 427–435 R15-1056 @@ -509,7 +509,7 @@ Semi-Supervised Never-Ending Learning in Rhetorical Relation Identification - Erick GalaniMaziero + Erick GalaniMaziero GraemeHirst Thiago Alexandre SalgueiroPardo 436–442 @@ -521,7 +521,7 @@ TodorMihaylov IvanKoychev GeorgiGeorgiev - PreslavNakov + PreslavNakov 443–450 R15-1058 mihaylov-etal-2015-exposing @@ -539,7 +539,7 @@ Statistical Machine Translation Improvement based on Phrase Selection CyrineNasri ChirazLatiri - KamelSmaili + KamelSmaili 458–464 R15-1060 nasri-etal-2015-statistical @@ -571,7 +571,7 @@ A Comparative Study of Different Sentiment Lexica for Sentiment Analysis of Tweets - CanberkOzdemir + CanberkOzdemir SabineBergler 488–496 R15-1064 @@ -608,7 +608,7 @@ A New Approach to Automated Text Readability Classification based on Concept Indexing with Integrated Part-of-Speech n-gram Features AbigailRazon - JohnBarnden + JohnBarnden 521–528 R15-1068 razon-barnden-2015-new @@ -671,23 +671,23 @@ Evaluating the Impact of Using a Domain-specific Bilingual Lexicon on the Performance of a Hybrid Machine Translation Approach NasredineSemmar OthmanZennaki - MeriamaLaib + MeriamaLaib 579–587 R15-1075 semmar-etal-2015-evaluating Hierarchical Topic Structuring: From Dense Segmentation to Topically Focused Fragments via Burst Analysis - Anca-RoxanaSimon + Anca-RoxanaSimon PascaleSébillot - GuillaumeGravier + GuillaumeGravier 588–595 R15-1076 simon-etal-2015-hierarchical Improving Word Sense Disambiguation with Linguistic Knowledge from a Sense Annotated Treebank - KirilSimov + KirilSimov AlexanderPopov PetyaOsenova 596–603 @@ -704,7 +704,7 @@ Translating from Original to Simplified Sentences using <fixed-case>M</fixed-case>oses: When does it Actually Work? - SanjaŠtajner + SanjaŠtajner HoracioSaggion 611–617 R15-1079 @@ -712,7 +712,7 @@ Automatic Text Simplification for <fixed-case>S</fixed-case>panish: Comparative Evaluation of Various Simplification Strategies - SanjaŠtajner + SanjaŠtajner IacerCalixto HoracioSaggion 618–626 @@ -722,7 +722,7 @@ Towards Multilingual Event Extraction Evaluation: A Case Study for the <fixed-case>C</fixed-case>zech Language JosefSteinberger - HristoTanev + HristoTanev 627–635 R15-1081 steinberger-tanev-2015-towards @@ -747,7 +747,7 @@ Training Automatic Transliteration Models on <fixed-case>DBP</fixed-case>edia Data VelislavaTodorova - KirilSimov + KirilSimov 654–662 R15-1084 todorova-simov-2015-training @@ -756,7 +756,7 @@ <fixed-case>A</fixed-case>rabic-<fixed-case>E</fixed-case>nglish Semantic Word Class Alignment to Improve Statistical Machine Translation InesTurki Khemakhem SalmaJamoussi - AbdelmajidBen Hamadou + AbdelmajidBen Hamadou 663–671 R15-1085 turki-khemakhem-etal-2015-arabic @@ -769,8 +769,8 @@ JulieMennes BartDesmet GuyDe Pauw - WalterDaelemans - VeroniqueHoste + WalterDaelemans + VeroniqueHoste 672–680 R15-1086 van-hee-etal-2015-detection @@ -794,7 +794,7 @@ Six Good Predictors of Autistic Text Comprehension VictoriaYaneva - RichardEvans + RichardEvans 697–706 R15-1089 yaneva-evans-2015-six @@ -818,7 +818,7 @@ A Large <fixed-case>W</fixed-case>ordnet-based Sentiment Lexicon for <fixed-case>P</fixed-case>olish MonikaZaśko-Zielińska MaciejPiasecki - StanSzpakowicz + StanSzpakowicz 721–730 R15-1092 zasko-zielinska-etal-2015-large @@ -826,7 +826,7 @@ Named Entity Recognition of Persons’ Names in <fixed-case>A</fixed-case>rabic Tweets OmniaZayed - SamhaaEl-Beltagy + SamhaaEl-Beltagy 731–738 R15-1093 zayed-el-beltagy-2015-named @@ -834,14 +834,14 @@ One Tree is not Enough: Cross-lingual Accumulative Structure Transfer for Semantic Indeterminacy PatrickZiering - Lonnekevan der Plas + Lonnekevan der Plas 739–746 R15-1094 ziering-van-der-plas-2015-one Lost in Discussion? Tracking Opinion Groups in Complex Political Discussions by the Example of the <fixed-case>FOMC</fixed-case> Meeting Transcriptions - CäciliaZirn + CäciliaZirn RobertMeusel HeinerStuckenschmidt 747–753 diff --git a/data/xml/R17.xml b/data/xml/R17.xml index 250160589a..c34259ff70 100644 --- a/data/xml/R17.xml +++ b/data/xml/R17.xml @@ -3,7 +3,7 @@ Proceedings of the International Conference Recent Advances in Natural Language Processing, RANLP 2017 - RuslanMitkov + RuslanMitkov GaliaAngelova INCOMA Ltd.
Varna, Bulgaria
@@ -28,7 +28,7 @@
What Sentence are you Referring to and Why? Identifying Cited Sentences in Scientific Literature - AhmedAbuRa’ed + AhmedAbuRa’ed LuisChiruzzo HoracioSaggion 9–17 @@ -40,7 +40,7 @@ A Comparison of Feature-Based and Neural Scansion of Poetry ManexAgirrezabal - IñakiAlegria + IñakiAlegria MansHulden 18–23 10.26615/978-954-452-049-6_003 @@ -52,7 +52,7 @@ <fixed-case>P</fixed-case>ersian-<fixed-case>S</fixed-case>panish Low-Resource Statistical Machine Translation Through <fixed-case>E</fixed-case>nglish as Pivot Language BenyaminAhmadnia JavierSerrano - GholamrezaHaffari + GholamrezaHaffari 24–30 10.26615/978-954-452-049-6_004 https://doi.org/10.26615/978-954-452-049-6_004 @@ -62,8 +62,8 @@ Simple Open Stance Classification for Rumour Analysis AhmetAker - LeonDerczynski - KalinaBontcheva + LeonDerczynski + KalinaBontcheva 31–39 10.26615/978-954-452-049-6_005 https://doi.org/10.26615/978-954-452-049-6_005 @@ -176,8 +176,8 @@ Fast and Accurate Decision Trees for Natural Language Processing Tasks - TiberiuBoros - Stefan DanielDumitrescu + TiberiuBoros + Stefan DanielDumitrescu SoniaPipa 103–110 10.26615/978-954-452-049-6_016 @@ -198,7 +198,7 @@ Building Chatbots from Forum Data: Model Selection Using Question Answering Metrics MartinBoyanov - PreslavNakov + PreslavNakov AlessandroMoschitti GiovanniDa San Martino IvanKoychev @@ -233,8 +233,8 @@ Role-based model for Named Entity Recognition PabloCalleja RaúlGarcía-Castro - GuadalupeAguado-de-Cea - AsunciónGómez-Pérez + GuadalupeAguado-de-Cea + AsunciónGómez-Pérez 149–156 10.26615/978-954-452-049-6_021 https://doi.org/10.26615/978-954-452-049-6_021 @@ -244,9 +244,9 @@ Towards the Improvement of Automatic Emotion Pre-annotation with Polarity and Subjective Information LeaCanales - WalterDaelemans + WalterDaelemans EsterBoldrini - PatricioMartínez-Barco + PatricioMartínez-Barco 157–163 10.26615/978-954-452-049-6_022 https://doi.org/10.26615/978-954-452-049-6_022 @@ -256,7 +256,7 @@ Underspecification in Natural Language Understanding for Dialog Automation JohnChen - SrinivasBangalore + SrinivasBangalore 164–170 10.26615/978-954-452-049-6_023 https://doi.org/10.26615/978-954-452-049-6_023 @@ -289,7 +289,7 @@ Towards Replicability in Parsing DanielDakota - SandraKübler + SandraKübler 185–194 10.26615/978-954-452-049-6_026 https://doi.org/10.26615/978-954-452-049-6_026 @@ -309,7 +309,7 @@ On the stylistic evolution from communism to democracy: <fixed-case>S</fixed-case>olomon <fixed-case>M</fixed-case>arcus study case AncaDinu - Liviu P.Dinu + Liviu P.Dinu BogdanDumitru 201–207 10.26615/978-954-452-049-6_028 @@ -356,9 +356,9 @@ Opinion Mining in Social Networks versus Electoral Polls JaviFernández - FernandoLlopis - YoanGutiérrez - PatricioMartínez-Barco + FernandoLlopis + YoanGutiérrez + PatricioMartínez-Barco ÁlvaroDíez 231–237 10.26615/978-954-452-049-6_032 @@ -370,7 +370,7 @@ Corpus Creation and Initial <fixed-case>SMT</fixed-case> Experiments between <fixed-case>S</fixed-case>panish and <fixed-case>S</fixed-case>hipibo-konibo Ana-PaulaGalarreta AndrésMelgar - ArturoOncevay + ArturoOncevay 238–244 10.26615/978-954-452-049-6_033 https://doi.org/10.26615/978-954-452-049-6_033 @@ -411,8 +411,8 @@ A Context-Aware Approach for Detecting Worth-Checking Claims in Political Debates PepaGencheva - PreslavNakov - LluísMàrquez + PreslavNakov + LluísMàrquez AlbertoBarrón-Cedeño IvanKoychev 267–276 @@ -434,8 +434,8 @@ Natural Language Processing Technologies for Document Profiling AntonioGuillén - YoanGutiérrez - RafaelMuñoz + YoanGutiérrez + RafaelMuñoz 284–290 10.26615/978-954-452-049-6_039 https://doi.org/10.26615/978-954-452-049-6_039 @@ -477,7 +477,7 @@ Non-Deterministic Segmentation for <fixed-case>C</fixed-case>hinese Lattice Parsing HaiHu DanielDakota - SandraKübler + SandraKübler 316–324 10.26615/978-954-452-049-6_043 https://doi.org/10.26615/978-954-452-049-6_043 @@ -498,7 +498,7 @@ We Built a Fake News / Click Bait Filter: What Happened Next Will Blow Your Mind! GeorgiKaradzhov PepaGencheva - PreslavNakov + PreslavNakov IvanKoychev 334–343 10.26615/978-954-452-049-6_045 @@ -509,8 +509,8 @@ Fully Automated Fact Checking Using External Sources GeorgiKaradzhov - PreslavNakov - LluísMàrquez + PreslavNakov + LluísMàrquez AlbertoBarrón-Cedeño IvanKoychev 344–353 @@ -522,7 +522,7 @@ Making Travel Smarter: Extracting Travel Information From Email Itineraries Using Named Entity Recognition DivyanshKaushik - ShashankGupta + ShashankGupta ChakradharRaju ReubenDias SanjibGhosh @@ -546,7 +546,7 @@ Domain Control for Neural Machine Translation CatherineKobus - JosepCrego + JosepCrego JeanSenellart 372–378 10.26615/978-954-452-049-6_049 @@ -557,7 +557,7 @@ Curriculum Learning and Minibatch Bucketing in Neural Machine Translation TomKocmi - OndřejBojar + OndřejBojar 379–386 10.26615/978-954-452-049-6_050 https://doi.org/10.26615/978-954-452-049-6_050 @@ -607,7 +607,7 @@ Extracting semantic relations via the combination of inferences, schemas and cooccurrences MathieuLafourcade - NathalieLe Brun + NathalieLe Brun 417–423 10.26615/978-954-452-049-6_055 https://doi.org/10.26615/978-954-452-049-6_055 @@ -618,7 +618,7 @@ If mice were reptiles, then reptiles could be mammals or How to detect errors in the <fixed-case>J</fixed-case>eux<fixed-case>D</fixed-case>e<fixed-case>M</fixed-case>ots lexical network? MathieuLafourcade AlainJoubert - NathalieLe Brun + NathalieLe Brun 424–430 10.26615/978-954-452-049-6_056 https://doi.org/10.26615/978-954-452-049-6_056 @@ -658,7 +658,7 @@ Summarizing World Speak : A Preliminary Graph Based Approach NikhilLondhe - RohiniSrihari + RohiniSrihari 452–458 10.26615/978-954-452-049-6_060 https://doi.org/10.26615/978-954-452-049-6_060 @@ -667,7 +667,7 @@ Human Associations Help to Detect Conventionalized Multiword Expressions - NataliaLoukachevitch + NataliaLoukachevitch AnastasiaGerasimova 459–466 10.26615/978-954-452-049-6_061 @@ -677,7 +677,7 @@ Detecting Hate Speech in Social Media - ShervinMalmasi + ShervinMalmasi MarcosZampieri 467–472 10.26615/978-954-452-049-6_062 @@ -733,7 +733,7 @@ A Domain and Language Independent Named Entity Classification Approach Based on Profiles and Local Information IsabelMoreno María TeresaRomá-Ferri - PalomaMoreda Pozo + PalomaMoreda Pozo 510–518 10.26615/978-954-452-049-6_067 https://doi.org/10.26615/978-954-452-049-6_067 @@ -743,7 +743,7 @@ Similarity Based Genre Identification for <fixed-case>POS</fixed-case> Tagging Experts & Dependency Parsing AtreyeeMukherjee - SandraKübler + SandraKübler 519–526 10.26615/978-954-452-049-6_068 https://doi.org/10.26615/978-954-452-049-6_068 @@ -772,8 +772,8 @@ Robust Tuning Datasets for Statistical Machine Translation - PreslavNakov - StephanVogel + PreslavNakov + StephanVogel 543–550 10.26615/978-954-452-049-6_071 https://doi.org/10.26615/978-954-452-049-6_071 @@ -782,9 +782,9 @@ Do Not Trust the Trolls: Predicting Credibility in Community Question Answering Forums - PreslavNakov + PreslavNakov TsvetomilaMihaylova - LluísMàrquez + LluísMàrquez YashkumarShiroya IvanKoychev 551–560 @@ -796,7 +796,7 @@ <fixed-case>B</fixed-case>ulgarian-<fixed-case>E</fixed-case>nglish and <fixed-case>E</fixed-case>nglish-<fixed-case>B</fixed-case>ulgarian Machine Translation: System Design and Evaluation PetyaOsenova - KirilSimov + KirilSimov 561–568 10.26615/978-954-452-049-6_073 https://doi.org/10.26615/978-954-452-049-6_073 @@ -902,9 +902,9 @@ Idiom Type Identification with Smoothed Lexical Features and a Maximum Margin Classifier - GiancarloSalton + GiancarloSalton RobertRoss - JohnKelleher + JohnKelleher 642–651 10.26615/978-954-452-049-6_083 https://doi.org/10.26615/978-954-452-049-6_083 @@ -914,7 +914,7 @@ A Calibration Method for Evaluation of Sentiment Analysis F. SharmilaSatthar - RogerEvans + RogerEvans GuldenUchyigit 652–660 10.26615/978-954-452-049-6_084 @@ -925,7 +925,7 @@ Building Multiword Expressions Bilingual Lexicons for Domain Adaptation of an Example-Based Machine Translation System NasredineSemmar - MariamaLaib + MariamaLaib 661–670 10.26615/978-954-452-049-6_085 https://doi.org/10.26615/978-954-452-049-6_085 @@ -946,7 +946,7 @@ Towards Lexical Chains for Knowledge-Graph-based Word Embeddings - KirilSimov + KirilSimov SvetlaBoytcheva PetyaOsenova 679–685 @@ -992,8 +992,8 @@ Large-scale news entity sentiment analysis RalfSteinberger StefanieHegele - HristoTanev - LeonidaDella Rocca + HristoTanev + LeonidaDella Rocca 707–715 10.26615/978-954-452-049-6_091 https://doi.org/10.26615/978-954-452-049-6_091 @@ -1002,10 +1002,10 @@ Predicting the Law Area and Decisions of <fixed-case>F</fixed-case>rench <fixed-case>S</fixed-case>upreme <fixed-case>C</fixed-case>ourt Cases - Octavia-MariaŞulea + Octavia-MariaŞulea MarcosZampieri MihaelaVela - Josefvan Genabith + Josefvan Genabith 716–722 10.26615/978-954-452-049-6_092 https://doi.org/10.26615/978-954-452-049-6_092 @@ -1024,7 +1024,7 @@ Multi-entity sentiment analysis using entity-level feature extraction and word embeddings approach ColmSweeney - DeepakPadmanabhan + DeepakPadmanabhan 733–740 10.26615/978-954-452-049-6_094 https://doi.org/10.26615/978-954-452-049-6_094 @@ -1137,9 +1137,9 @@ Multilingual and Cross-Lingual Complex Word Identification Seid MuhieYimam - SanjaŠtajner + SanjaŠtajner MartinRiedl - ChrisBiemann + ChrisBiemann 813–822 10.26615/978-954-452-049-6_104 https://doi.org/10.26615/978-954-452-049-6_104 @@ -1168,7 +1168,7 @@ Using <fixed-case>NLP</fixed-case> for Enhancing Second Language Acquisition LeonardoZilio RodrigoWilkens - CédrickFairon + CédrickFairon 839–846 10.26615/978-954-452-049-6_107 https://doi.org/10.26615/978-954-452-049-6_107 diff --git a/data/xml/R19.xml b/data/xml/R19.xml index 3aecde9e69..f3ba5be13a 100644 --- a/data/xml/R19.xml +++ b/data/xml/R19.xml @@ -4,7 +4,7 @@ Proceedings of the International Conference on Recent Advances in Natural Language Processing (RANLP 2019) R19-1 - RuslanMitkov + RuslanMitkov GaliaAngelova INCOMA Ltd.
Varna, Bulgaria
@@ -41,7 +41,7 @@ Bilingual Low-Resource Neural Machine Translation with Round-Tripping: The Case of <fixed-case>P</fixed-case>ersian-<fixed-case>S</fixed-case>panish BenyaminAhmadnia - BonnieDorr + BonnieDorr 18–24 The quality of Neural Machine Translation (NMT), as a data-driven approach, massively depends on quantity, quality, and relevance of the training dataset. Such approaches have achieved promising results for bilingually high-resource scenarios but are inadequate for low-resource conditions. This paper describes a round-trip training approach to bilingual low-resource NMT that takes advantage of monolingual datasets to address training data scarcity, thus augmenting translation quality. We conduct detailed experiments on Persian-Spanish as a bilingually low-resource scenario. Experimental results demonstrate that this competitive approach outperforms the baselines. R19-1003 @@ -51,7 +51,7 @@ Enhancing Phrase-Based Statistical Machine Translation by Learning Phrase Representations Using Long Short-Term Memory Network BenyaminAhmadnia - BonnieDorr + BonnieDorr 25–32 Phrases play a key role in Machine Translation (MT). In this paper, we apply a Long Short-Term Memory (LSTM) model over conventional Phrase-Based Statistical MT (PBSMT). The core idea is to use an LSTM encoder-decoder to score the phrase table generated by the PBSMT decoder. Given a source sequence, the encoder and decoder are jointly trained in order to maximize the conditional probability of a target sequence. Analytically, the performance of a PBSMT system is enhanced by using the conditional probabilities of phrase pairs computed by an LSTM encoder-decoder as an additional feature in the existing log-linear model. We compare the performance of the phrase tables in the PBSMT to the performance of the proposed LSTM and observe its positive impact on translation quality. We construct a PBSMT model using the Moses decoder and enrich the Language Model (LM) utilizing an external dataset. We then rank the phrase tables using an LSTM-based encoder-decoder. This method produces a gain of up to 3.14 BLEU score on the test set. R19-1004 @@ -82,7 +82,7 @@ Supervised Morphological Segmentation Using Rich Annotated Lexicon EbrahimAnsari - ZdeněkŽabokrtský + ZdeněkŽabokrtský MohammadMahmoudi HamidHaghdoost JonášVidra @@ -94,7 +94,7 @@ Combining Lexical Substitutes in Neural Word Sense Induction - NikolayArefyev + NikolayArefyev BorisSheludko AlexanderPanchenko 62–70 @@ -107,7 +107,7 @@ Detecting Clitics Related Orthographic Errors in <fixed-case>T</fixed-case>urkish UgurcanArikan OnurGungor - SuzanUskudarli + SuzanUskudarli 71–76 For the spell correction task, vocabulary based methods have been replaced with methods that take morphological and grammar rules into account. However, such tools are fairly immature, and, worse, non-existent for many low resource languages. Checking only if a word is well-formed with respect to the morphological rules of a language may produce false negatives due to the ambiguity resulting from the presence of numerous homophonic words. In this work, we propose an approach to detect and correct the “de/da” clitic errors in Turkish text. Our model is a neural sequence tagger trained with a synthetically constructed dataset consisting of positive and negative samples. The model’s performance with this dataset is presented according to different word embedding configurations. The model achieved an F1 score of 86.67% on a synthetically constructed dataset. We also compared the model’s performance on a manually curated dataset of challenging samples that proved superior to other spelling correctors with 71% accuracy compared to the second-best (Google Docs) with and accuracy of 34%. R19-1009 @@ -129,8 +129,8 @@ Diachronic Analysis of Entities by Exploiting <fixed-case>W</fixed-case>ikipedia Page revisions PierpaoloBasile AnnalinaCaputo - SeamusLawless - GiovanniSemeraro + SeamusLawless + GiovanniSemeraro 84–91 In the last few years, the increasing availability of large corpora spanning several time periods has opened new opportunities for the diachronic analysis of language. This type of analysis can bring to the light not only linguistic phenomena related to the shift of word meanings over time, but it can also be used to study the impact that societal and cultural trends have on this language change. This paper introduces a new resource for performing the diachronic analysis of named entities built upon Wikipedia page revisions. This resource enables the analysis over time of changes in the relations between entities (concepts), surface forms (words), and the contexts surrounding entities and surface forms, by analysing the whole history of Wikipedia internal links. We provide some useful use cases that prove the impact of this resource on diachronic studies and delineate some possible future usage. R19-1011 @@ -153,7 +153,7 @@ MeriemBeloucif Ana ValeriaGonzalez MarcelBollmann - AndersSøgaard + AndersSøgaard 102–111 Neural machine translation models have little inductive bias, which can be a disadvantage in low-resource scenarios. Neural models have to be trained on large amounts of data and have been shown to perform poorly when only limited data is available. We show that using naive regularization methods, based on sentence length, punctuation and word frequencies, to penalize translations that are very different from the input sentences, consistently improves the translation quality across multiple low-resource languages. We experiment with 12 language pairs, varying the training data size between 17k to 230k sentence pairs. Our best regularizer achieves an average increase of 1.5 BLEU score and 1.0 TER score across all the language pairs. For example, we achieve a BLEU score of 26.70 on the IWSLT15 English–Vietnamese translation task simply by using relative differences in punctuation as a regularizer. R19-1013 @@ -183,7 +183,7 @@ Evaluating the Consistency of Word Embeddings from Small Data JelkeBloem AntskeFokkens - AurélieHerbelot + AurélieHerbelot 132–141 In this work, we address the evaluation of distributional semantic models trained on smaller, domain-specific texts, specifically, philosophical text. Specifically, we inspect the behaviour of models using a pre-trained background space in learning. We propose a measure of consistency which can be used as an evaluation metric when no in-domain gold-standard data is available. This measure simply computes the ability of a model to learn similar embeddings from different parts of some homogeneous data. We show that in spite of being a simple evaluation, consistency actually depends on various combinations of factors, including the nature of the data itself, the model used to train the semantic space, and the frequency of the learnt terms, both in the background space and in the in-domain data of interest. R19-1016 @@ -237,7 +237,7 @@ Classifying Author Intention for Writer Feedback in Related Work ArleneCasey - BonnieWebber + BonnieWebber DorotaGlowacka 178–187 The ability to produce high-quality publishable material is critical to academic success but many Post-Graduate students struggle to learn to do so. While recent years have seen an increase in tools designed to provide feedback on aspects of writing, one aspect that has so far been neglected is the Related Work section of academic research papers. To address this, we have trained a supervised classifier on a corpus of 94 Related Work sections and evaluated it against a manually annotated gold standard. The classifier uses novel features pertaining to citation types and co-reference, along with patterns found from studying Related Works. We show that these novel features contribute to classifier performance with performance being favourable compared to other similar works that classify author intentions and consider feedback for academic writing. @@ -271,7 +271,7 @@ Personality-dependent Neural Text Summarization PabloCosta - IvandréParaboni + IvandréParaboni 205–212 In Natural Language Generation systems, personalization strategies - i.e, the use of information about a target author to generate text that (more) closely resembles human-produced language - have long been applied to improve results. The present work addresses one such strategy - namely, the use of Big Five personality information about the target author - applied to the case of abstractive text summarization using neural sequence-to-sequence models. Initial results suggest that having access to personality information does lead to more accurate (or human-like) text summaries, and paves the way for more robust systems of this kind. R19-1024 @@ -324,7 +324,7 @@ Detecting Toxicity in News Articles: Application to <fixed-case>B</fixed-case>ulgarian YoanDinkov IvanKoychev - PreslavNakov + PreslavNakov 247–258 Online media aim for reaching ever bigger audience and for attracting ever longer attention span. This competition creates an environment that rewards sensational, fake, and toxic news. To help limit their spread and impact, we propose and develop a news toxicity detector that can recognize various types of toxic content. While previous research primarily focused on English, here we target Bulgarian. We created a new dataset by crawling a website that for five years has been collecting Bulgarian news articles that were manually categorized into eight toxicity groups. Then we trained a multi-class classifier with nine categories: eight toxic and one non-toxic. We experimented with different representations based on ElMo, BERT, and XLM, as well as with a variety of domain-specific features. Due to the small size of our dataset, we created a separate model for each feature type, and we ultimately combined these models into a meta-classifier. The evaluation results show an accuracy of 59.0% and a macro-F1 score of 39.7%, which represent sizable improvements over the majority-class baseline (Acc=30.3%, macro-F1=5.2%). R19-1029 @@ -355,11 +355,11 @@ Demo Application for <fixed-case>LETO</fixed-case>: Learning Engine Through Ontologies SuilanEstevez-Velarde - AndrésMontoyo + AndrésMontoyo YudivianAlmeida-Cruz - YoanGutiérrez + YoanGutiérrez AlejandroPiad-Morffis - RafaelMuñoz + RafaelMuñoz 276–284 The massive amount of multi-formatted information available on the Web necessitates the design of software systems that leverage this information to obtain knowledge that is valid and useful. The main challenge is to discover relevant information and continuously update, enrich and integrate knowledge from various sources of structured and unstructured data. This paper presents the Learning Engine Through Ontologies(LETO) framework, an architecture for the continuous and incremental discovery of knowledge from multiple sources of unstructured and structured data. We justify the main design decision behind LETO’s architecture and evaluate the framework’s feasibility using the Internet Movie Data Base(IMDB) and Twitter as a practical application. R19-1032 @@ -368,8 +368,8 @@ Sentence Simplification for Semantic Role Labelling and Information Extraction - RichardEvans - ConstantinOrasan + RichardEvans + ConstantinOrasan 285–294 In this paper, we report on the extrinsic evaluation of an automatic sentence simplification method with respect to two NLP tasks: semantic role labelling (SRL) and information extraction (IE). The paper begins with our observation of challenges in the intrinsic evaluation of sentence simplification systems, which motivates the use of extrinsic evaluation of these systems with respect to other NLP tasks. We describe the two NLP systems and the test data used in the extrinsic evaluation, and present arguments and evidence motivating the integration of a sentence simplification step as a means of improving the accuracy of these systems. Our evaluation reveals that their performance is improved by the simplification step: the SRL system is better able to assign semantic roles to the majority of the arguments of verbs and the IE system is better able to identify fillers for all IE template slots. R19-1033 @@ -379,7 +379,7 @@ <fixed-case>O</fixed-case>llo<fixed-case>B</fixed-case>ot - Towards A Text-Based <fixed-case>A</fixed-case>rabic Health Conversational Agent: Evaluation and Results AhmedFadhil - AhmedAbuRa’ed + AhmedAbuRa’ed 295–303 We introduce OlloBot, an Arabic conversational agent that assists physicians and supports patients with the care process. It doesn’t replace the physicians, instead provides health tracking and support and assists physicians with the care delivery through a conversation medium. The current model comprises healthy diet, physical activity, mental health, in addition to food logging. Not only OlloBot tracks user daily food, it also offers useful tips for healthier living. We will discuss the design, development and testing of OlloBot, and highlight the findings and limitations arose from the testing. R19-1034 @@ -445,7 +445,7 @@ LauraFranzoi AndreaSgarro AncaDinu - Liviu P.Dinu + Liviu P.Dinu 345–352 In this paper, we present new methods for language classification which put to good use both syntax and fuzzy tools, and are capable of dealing with irrelevant linguistic features (i.e. features which should not contribute to the classification) and even inconsistent features (which do not make sense for specific languages). We introduce a metric distance, based on the generalized Steinhaus transform, which allows one to deal jointly with irrelevance and inconsistency. To evaluate our methods, we test them on a syntactic data set, due to the linguist G. Longobardi and his school. We obtain phylogenetic trees which sometimes outperform the ones obtained by Atkinson and Gray. R19-1040 @@ -579,7 +579,7 @@ Beyond <fixed-case>E</fixed-case>nglish-Only Reading Comprehension: Experiments in Zero-shot Multilingual Transfer for <fixed-case>B</fixed-case>ulgarian MomchilHardalov IvanKoychev - PreslavNakov + PreslavNakov 447–459 Recently, reading comprehension models achieved near-human performance on large-scale datasets such as SQuAD, CoQA, MS Macro, RACE, etc. This is largely due to the release of pre-trained contextualized representations such as BERT and ELMo, which can be fine-tuned for the target task. Despite those advances and the creation of more challenging datasets, most of the work is still done for English. Here, we study the effectiveness of multilingual BERT fine-tuned on large-scale English datasets for reading comprehension (e.g., for RACE), and we apply it to Bulgarian multiple-choice reading comprehension. We propose a new dataset containing 2,221 questions from matriculation exams for twelfth grade in various subjects —history, biology, geography and philosophy—, and 412 additional questions from online quizzes in history. While the quiz authors gave no relevant context, we incorporate knowledge from Wikipedia, retrieving documents matching the combination of question + each answer option. Moreover, we experiment with different indexing and pre-training strategies. The evaluation results show accuracy of 42.23%, which is well above the baseline of 24.89%. R19-1053 @@ -642,7 +642,7 @@ TatsuyaIshigaki HidetakaKamigaito HiroyaTakamura - ManabuOkumura + ManabuOkumura 497–506 Discourse relations between sentences are often represented as a tree, and the tree structure provides important information for summarizers to create a short and coherent summary. However, current neural network-based summarizers treat the source document as just a sequence of sentences and ignore the tree-like discourse structure inherent in the document. To incorporate the information of a discourse tree structure into the neural network-based summarizers, we propose a discourse-aware neural extractive summarizer which can explicitly take into account the discourse dependency tree structure of the source document. Our discourse-aware summarizer can jointly learn the discourse structure and the salience score of a sentence by using novel hierarchical attention modules, which can be trained on automatically parsed discourse dependency trees. Experimental results showed that our model achieved competitive or better performances against state-of-the-art models in terms of ROUGE scores on the DailyMail dataset. We further conducted manual evaluations. The results showed that our approach also gained the coherence of the output summaries. R19-1059 @@ -682,7 +682,7 @@ Using Syntax to Resolve <fixed-case>NPE</fixed-case> in <fixed-case>E</fixed-case>nglish PayalKhullar AllenAntony - ManishShrivastava + ManishShrivastava 534–540 This paper describes a novel, syntax-based system for automatic detection and resolution of Noun Phrase Ellipsis (NPE) in English. The system takes in free input English text, detects the site of nominal elision, and if present, selects potential antecedent candidates. The rules are built using the syntactic information on ellipsis and its antecedent discussed in previous theoretical linguistics literature on NPE. Additionally, we prepare a curated dataset of 337 sentences from well-known, reliable sources, containing positive and negative samples of NPE. We split this dataset into two parts, and use one part to refine our rules and the other to test the performance of our final system. We get an F1-score of 76.47% for detection and 70.27% for NPE resolution on the testset. To the best of our knowledge, ours is the first system that detects and resolves NPE in English. The curated dataset used for this task, albeit small, covers a wide variety of NPE cases and will be made public for future work. R19-1063 @@ -722,7 +722,7 @@ A Qualitative Evaluation Framework for Paraphrase Identification VenelinKovatchev - M. AntoniaMarti + M. AntoniaMarti MariaSalamo JavierBeltran 568–577 @@ -757,8 +757,8 @@ Question Similarity in Community Question Answering: A Systematic Exploration of Preprocessing Methods and Models FlorianKunneman Thiago CastroFerreira - EmielKrahmer - Antalvan den Bosch + EmielKrahmer + Antalvan den Bosch 593–601 Community Question Answering forums are popular among Internet users, and a basic problem they encounter is trying to find out if their question has already been posed before. To address this issue, NLP researchers have developed methods to automatically detect question-similarity, which was one of the shared tasks in SemEval. The best performing systems for this task made use of Syntactic Tree Kernels or the SoftCosine metric. However, it remains unclear why these methods seem to work, whether their performance can be improved by better preprocessing methods and what kinds of errors they (and other methods) make. In this paper, we therefore systematically combine and compare these two approaches with the more traditional BM25 and translation-based models. Moreover, we analyze the impact of preprocessing steps (lowercasing, suppression of punctuation and stop words removal) and word meaning similarity based on different distributions (word translation probability, Word2Vec, fastText and ELMo) on the performance of the task. We conduct an error analysis to gain insight into the differences in performance between the system set-ups. The implementation is made publicly available from https://github.com/fkunneman/DiscoSumo/tree/master/ranlp. R19-1070 @@ -777,7 +777,7 @@ Resolving Pronouns for a Resource-Poor Language, <fixed-case>M</fixed-case>alayalam Using Resource-Rich Language, <fixed-case>T</fixed-case>amil. - SobhaLalitha Devi + SobhaLalitha Devi 611–618 In this paper we give in detail how a resource rich language can be used for resolving pronouns for a less resource language. The source language, which is resource rich language in this study, is Tamil and the resource poor language is Malayalam, both belonging to the same language family, Dravidian. The Pronominal resolution developed for Tamil uses CRFs. Our approach is to leverage the Tamil language model to test Malayalam data and the processing required for Malayalam data is detailed. The similarity at the syntactic level between the languages is exploited in identifying the features for developing the Tamil language model. The word form or the lexical item is not considered as a feature for training the CRFs. Evaluation on Malayalam Wikipedia data shows that our approach is correct and the results, though not as good as Tamil, but comparable. R19-1072 @@ -831,8 +831,8 @@ PilarLópez Úbeda Flor MiriamPlaza del Arco Manuel CarlosDíaz Galiano - L. AlfonsoUrena Lopez - MaiteMartin + L. AlfonsoUrena Lopez + MaiteMartin 655–663 Mental health is one of the main concerns of today’s society. Early detection of symptoms can greatly help people with mental disorders. People are using social networks more and more to express emotions, sentiments and mental states. Thus, the treatment of this information using NLP technologies can be applied to the automatic detection of mental problems such as eating disorders. However, the first step to solving the problem should be to provide a corpus in order to evaluate our systems. In this paper, we specifically focus on detecting anorexia messages on Twitter. Firstly, we have generated a new corpus of tweets extracted from different accounts including anorexia and non-anorexia messages in Spanish. The corpus is called SAD: Spanish Anorexia Detection corpus. In order to validate the effectiveness of the SAD corpus, we also propose several machine learning approaches for automatically detecting anorexia symptoms in the corpus. The good results obtained show that the application of textual classification methods is a promising option for developing this kind of system demonstrating that these tools could be used by professionals to help in the early detection of mental problems. R19-1077 @@ -869,8 +869,8 @@ SurajMaharjan DeepthiMave PrashaShrestha - ManuelMontes - Fabio A.González + ManuelMontes + Fabio A.González ThamarSolorio 684–692 An author’s way of presenting a story through his/her writing style has a great impact on whether the story will be liked by readers or not. In this paper, we learn representations for authors of literary texts together with representations for character n-grams annotated with their functional roles. We train a neural character n-gram based language model using an external corpus of literary texts and transfer learned representations for use in downstream tasks. We show that augmenting the knowledge from external works of authors produces results competitive with other style-based methods for book likability prediction, genre classification, and authorship attribution. @@ -917,8 +917,8 @@ Semantic Language Model for <fixed-case>T</fixed-case>unisian Dialect AbirMasmoudi RimLaatar - MariemEllouze - LamiaHadrich Belguith + MariemEllouze + LamiaHadrich Belguith 720–729 In this paper, we describe the process of creating a statistical Language Model (LM) for the Tunisian Dialect. Indeed, this work is part of the realization of Automatic Speech Recognition (ASR) system for the Tunisian Railway Transport Network. Since our eld of work has been limited, there are several words with similar behaviors (semantic for example) but they do not have the same appearance probability; their class groupings will therefore be possible. For these reasons, we propose to build an n-class LM that is based mainly on the integration of purely semantic data. Indeed, each class represents an abstraction of similar labels. In order to improve the sequence labeling task, we proposed to use a discriminative algorithm based on the Conditional Random Field (CRF) model. To better judge our choice of creating an n-class word model, we compared the created model with the 3-gram type model on the same test corpus of evaluation. Additionally, to assess the impact of using the CRF model to perform the semantic labelling task in order to construct semantic classes, we compared the n-class created model with using the CRF in the semantic labelling task and the n- class model without using the CRF in the semantic labelling task. The drawn comparison of the predictive power of the n-class model obtained by applying the CRF model in the semantic labelling is that it is better than the other two models presenting the highest value of its perplexity. R19-1084 @@ -928,7 +928,7 @@ Automatic diacritization of <fixed-case>T</fixed-case>unisian dialect text using Recurrent Neural Network AbirMasmoudi - MariemEllouze + MariemEllouze LamiaHadrich belguith 730–739 The absence of diacritical marks in the Arabic texts generally leads to morphological, syntactic and semantic ambiguities. This can be more blatant when one deals with under-resourced languages, such as the Tunisian dialect, which suffers from unavailability of basic tools and linguistic resources, like sufficient amount of corpora, multilingual dictionaries, morphological and syntactic analyzers. Thus, this language processing faces greater challenges due to the lack of these resources. The automatic diacritization of MSA text is one of the various complex problems that can be solved by deep neural networks today. Since the Tunisian dialect is an under-resourced language of MSA and as there are a lot of resemblance between both languages, we suggest to investigate a recurrent neural network (RNN) for this dialect diacritization problem. This model will be compared to our previous models models CRF and SMT (CITATION) based on the same dialect corpus. We can experimentally show that our model can achieve better outcomes (DER of 10.72%), as compared to the two models CRF (DER of 20.25%) and SMT (DER of 33.15%). @@ -939,8 +939,8 @@ Comparing <fixed-case>MT</fixed-case> Approaches for Text Normalization ClaudiaMatos Veliz - OrpheeDe Clercq - VeroniqueHoste + OrpheeDe Clercq + VeroniqueHoste 740–749 One of the main characteristics of social media data is the use of non-standard language. Since NLP tools have been trained on traditional text material their performance drops when applied to social media data. One way to overcome this is to first perform text normalization. In this work, we apply text normalization to noisy English and Dutch text coming from different social media genres: text messages, message board posts and tweets. We consider the normalization task as a Machine Translation problem and test the two leading paradigms: statistical and neural machine translation. For SMT we explore the added value of varying background corpora for training the language model. For NMT we have a look at data augmentation since the parallel datasets we are working with are limited in size. Our results reveal that when relying on SMT to perform the normalization it is beneficial to use a background corpus that is close to the genre you are normalizing. Regarding NMT, we find that the translations - or normalizations - coming out of this model are far from perfect and that for a low-resource language like Dutch adding additional training data works better than artificially augmenting the data. R19-1086 @@ -951,7 +951,7 @@ Sentiment and Emotion Based Representations for Fake Reviews Detection AlimuddinMelleng AnnaJurek-Loughrey - DeepakP + DeepakP 750–757 Fake reviews are increasingly prevalent across the Internet. They can be unethical as well as harmful. They can affect businesses and mislead individual customers. As the opinions on the Web are increasingly used the detection of fake reviews has become more and more critical. In this study, we explore the effectiveness of sentiment and emotions based representations for the task of building machine learning models for fake review detection. We perform empirical studies over three real world datasets and demonstrate that improved data representation can be achieved by combining sentiment and emotion extraction methods, as well as by performing sentiment and emotion analysis on a part-by-part basis by segmenting the reviews. R19-1087 @@ -972,9 +972,9 @@ Community Perspective on Replicability in Natural Language Processing MargotMieskes KarënFort - AurélieNévéol + AurélieNévéol CyrilGrouin - KevinCohen + KevinCohen 768–775 With recent efforts in drawing attention to the task of replicating and/or reproducing results, for example in the context of COLING 2018 and various LREC workshops, the question arises how the NLP community views the topic of replicability in general. Using a survey, in which we involve members of the NLP community, we investigate how our community perceives this topic, its relevance and options for improvement. Based on over two hundred participants, the survey results confirm earlier observations, that successful reproducibility requires more than having access to code and data. Additionally, the results show that the topic has to be tackled from the authors’, reviewers’ and community’s side. R19-1089 @@ -1060,7 +1060,7 @@ Summary Refinement through Denoising - Nikola I.Nikolov + Nikola I.Nikolov AlessandroCalmanovici RichardHahnloser 837–843 @@ -1071,7 +1071,7 @@ Large-Scale Hierarchical Alignment for Data-driven Text Rewriting - Nikola I.Nikolov + Nikola I.Nikolov RichardHahnloser 844–853 We propose a simple unsupervised method for extracting pseudo-parallel monolingual sentence pairs from comparable corpora representative of two different text styles, such as news articles and scientific papers. Our approach does not require a seed parallel corpus, but instead relies solely on hierarchical search over pre-trained embeddings of documents and sentences. We demonstrate the effectiveness of our method through automatic and extrinsic evaluation on text simplification from the normal to the Simple Wikipedia. We show that pseudo-parallel sentences extracted with our method not only supplement existing parallel data, but can even lead to competitive performance on their own. @@ -1093,7 +1093,7 @@ From Image to Text in Sentiment Analysis via Regression and Deep Learning DanielaOnita - Liviu P.Dinu + Liviu P.Dinu AdrianaBirlutiu 862–868 Images and text represent types of content which are used together for conveying user emotions in online social networks. These contents are usually associated with a sentiment category. In this paper, we investigate an approach for mapping images to text for three types of sentiment categories: positive, neutral and negative. The mapping from images to text is performed using a Kernel Ridge Regression model. We considered two types of image features: i) RGB pixel-values features, and ii) features extracted with a deep learning approach. The experimental evaluation was performed on a Twitter data set containing both text and images and the sentiment associated with these. The experimental results show a difference in performance for different sentiment categories, in particular the mapping that we propose performs better for the positive sentiment category in comparison with the neutral and negative ones. Furthermore, the experimental results show that the more complex deep learning features perform better than the RGB pixel-value features for all sentiment categories and for larger training sets. @@ -1104,7 +1104,7 @@ Building a Morphological Analyser for <fixed-case>L</fixed-case>az EsraOnal - FrancisTyers + FrancisTyers 869–877 This study is an attempt to contribute to documentation and revitalization efforts of endangered Laz language, a member of South Caucasian language family mainly spoken on northeastern coastline of Turkey. It constitutes the first steps to create a general computational model for word form recognition and production for Laz by building a rule-based morphological analyser using Helsinki Finite-State Toolkit (HFST). The evaluation results show that the analyser has a 64.9% coverage over a corpus collected for this study with 111,365 tokens. We have also performed an error analysis on randomly selected 100 tokens from the corpus which are not covered by the analyser, and these results show that the errors mostly result from Turkish words in the corpus and missing stems in our lexicon. R19-1101 @@ -1115,7 +1115,7 @@ Term Based Semantic Clusters for Very Short Text Classification JasperPaalman ShantanuMullick - KalliopiZervanou + KalliopiZervanou YingqianZhang 878–887 Very short texts, such as tweets and invoices, present challenges in classification. Although term occurrences are strong indicators of content, in very short texts, the sparsity of these texts makes it difficult to capture important semantic relationships. A solution calls for a method that not only considers term occurrence, but also handles sparseness well. In this work, we introduce such an approach, the Term Based Semantic Clusters (TBSeC) that employs terms to create distinctive semantic concept clusters. These clusters are ranked using a semantic similarity function which in turn defines a semantic feature space that can be used for text classification. Our method is evaluated in an invoice classification task. Compared to well-known content representation methods the proposed method performs competitively. @@ -1126,7 +1126,7 @@ Quotation Detection and Classification with a Corpus-Agnostic Model SeanPapay - SebastianPadó + SebastianPadó 888–894 The detection of quotations (i.e., reported speech, thought, and writing) has established itself as an NLP analysis task. However, state-of-the-art models have been developed on the basis of specific corpora and incorpo- rate a high degree of corpus-specific assumptions and knowledge, which leads to fragmentation. In the spirit of task-agnostic modeling, we present a corpus-agnostic neural model for quotation detection and evaluate it on three corpora that vary in language, text genre, and structural assumptions. The model (a) approaches the state-of-the-art on the corpora when using established feature sets and (b) shows reasonable performance even when us- ing solely word forms, which makes it applicable for non-standard (i.e., historical) corpora. R19-1103 @@ -1149,11 +1149,11 @@ A Neural Network Component for Knowledge-Based Semantic Representations of Text AlejandroPiad-Morffis - RafaelMuñoz - YoanGutiérrez + RafaelMuñoz + YoanGutiérrez YudivianAlmeida-Cruz SuilanEstevez-Velarde - AndrésMontoyo + AndrésMontoyo 904–911 This paper presents Semantic Neural Networks (SNNs), a knowledge-aware component based on deep learning. SNNs can be trained to encode explicit semantic knowledge from an arbitrary knowledge base, and can subsequently be combined with other deep learning architectures. At prediction time, SNNs provide a semantic encoding extracted from the input data, which can be exploited by other neural network components to build extended representation models that can face alternative problems. The SNN architecture is defined in terms of the concepts and relations present in a knowledge base. Based on this architecture, a training procedure is developed. Finally, an experimental setup is presented to illustrate the behaviour and performance of a SNN for a specific NLP problem, in this case, opinion mining for the classification of movie reviews. R19-1105 @@ -1164,7 +1164,7 @@ Toponym Detection in the Bio-Medical Domain: A Hybrid Approach with Deep Learning AlistairPlum TharinduRanasinghe - ConstantinOrasan + ConstantinOrasan 912–921 This paper compares how different machine learning classifiers can be used together with simple string matching and named entity recognition to detect locations in texts. We compare five different state-of-the-art machine learning classifiers in order to predict whether a sentence contains a location or not. Following this classification task, we use a string matching algorithm with a gazetteer to identify the exact index of a toponym within the sentence. We evaluate different approaches in terms of machine learning classifiers, text pre-processing and location extraction on the SemEval-2019 Task 12 dataset, compiled for toponym resolution in the bio-medical domain. Finally, we compare the results with our system that was previously submitted to the SemEval-2019 task evaluation. R19-1106 @@ -1174,7 +1174,7 @@ Combining <fixed-case>PBSMT</fixed-case> and <fixed-case>NMT</fixed-case> Back-translated Data for Efficient <fixed-case>NMT</fixed-case> AlbertoPoncelas - MajaPopović + MajaPopović DimitarShterionov GideonMaillette de Buy Wenniger AndyWay @@ -1209,7 +1209,7 @@ Know Your Graph. State-of-the-Art Knowledge-Based <fixed-case>WSD</fixed-case> AlexanderPopov - KirilSimov + KirilSimov PetyaOsenova 949–958 This paper introduces several improvements over the current state of the art in knowledge-based word sense disambiguation. Those innovations are the result of modifying and enriching a knowledge base created originally on the basis of WordNet. They reflect several separate but connected strategies: manipulating the shape and the content of the knowledge base, assigning weights over the relations in the knowledge base, and the addition of new relations to it. The main contribution of the paper is to demonstrate that the previously proposed knowledge bases organize linguistic and world knowledge suboptimally for the task of word sense disambiguation. In doing so, the paper also establishes a new state of the art for knowledge-based approaches. Its best models are competitive in the broader context of supervised systems as well. @@ -1219,7 +1219,7 @@ Are ambiguous conjunctions problematic for machine translation? - MajaPopović + MajaPopović SheilaCastilho 959–966 The translation of ambiguous words still poses challenges for machine translation. In this work, we carry out a systematic quantitative analysis regarding the ability of different machine translation systems to disambiguate the source language conjunctions “but” and “and”. We evaluate specialised test sets focused on the translation of these two conjunctions. The test sets contain source languages that do not distinguish different variants of the given conjunction, whereas the target languages do. In total, we evaluate the conjunction “but” on 20 translation outputs, and the conjunction “and” on 10. All machine translation systems almost perfectly recognise one variant of the target conjunction, especially for the source conjunction “but”. The other target variant, however, represents a challenge for machine translation systems, with accuracy varying from 50% to 95% for “but” and from 20% to 57% for “and”. The major error for all systems is replacing the correct target variant with the opposite one. @@ -1265,7 +1265,7 @@ Enhancing Unsupervised Sentence Similarity Methods with Deep Contextualised Word Representations TharinduRanasinghe - ConstantinOrasan + ConstantinOrasan RuslanMitkov 994–1003 Calculating Semantic Textual Similarity (STS) plays a significant role in many applications such as question answering, document summarisation, information retrieval and information extraction. All modern state of the art STS methods rely on word embeddings one way or another. The recently introduced contextualised word embeddings have proved more effective than standard word embeddings in many natural language processing tasks. This paper evaluates the impact of several contextualised word embeddings on unsupervised STS methods and compares it with the existing supervised/unsupervised STS methods for different datasets in different languages and different domains @@ -1276,7 +1276,7 @@ Semantic Textual Similarity with <fixed-case>S</fixed-case>iamese Neural Networks TharinduRanasinghe - ConstantinOrasan + ConstantinOrasan RuslanMitkov 1004–1011 Calculating the Semantic Textual Similarity (STS) is an important research area in natural language processing which plays a significant role in many applications such as question answering, document summarisation, information retrieval and information extraction. This paper evaluates Siamese recurrent architectures, a special type of neural networks, which are used here to measure STS. Several variants of the architecture are compared with existing methods @@ -1288,7 +1288,7 @@ Analysing the Impact of Supervised Machine Learning on Automatic Term Extraction: <fixed-case>HAMLET</fixed-case> vs <fixed-case>T</fixed-case>ermo<fixed-case>S</fixed-case>tat AylaRigouts Terryn PatrickDrouin - VeroniqueHoste + VeroniqueHoste ElsLefever 1012–1021 Traditional approaches to automatic term extraction do not rely on machine learning (ML) and select the top n ranked candidate terms or candidate terms above a certain predefined cut-off point, based on a limited number of linguistic and statistical clues. However, supervised ML approaches are gaining interest. Relatively little is known about the impact of these supervised methodologies; evaluations are often limited to precision, and sometimes recall and f1-scores, without information about the nature of the extracted candidate terms. Therefore, the current paper presents a detailed and elaborate analysis and comparison of a traditional, state-of-the-art system (TermoStat) and a new, supervised ML approach (HAMLET), using the results obtained for the same, manually annotated, Dutch corpus about dressage. @@ -1299,7 +1299,7 @@ Distant Supervision for Sentiment Attitude Extraction NicolayRusnachenko - NataliaLoukachevitch + NataliaLoukachevitch ElenaTutubalina 1022–1030 News articles often convey attitudes between the mentioned subjects, which is essential for understanding the described situation. In this paper, we describe a new approach to distant supervision for extracting sentiment attitudes between named entities mentioned in texts. Two factors (pair-based and frame-based) were used to automatically label an extensive news collection, dubbed as RuAttitudes. The latter became a basis for adaptation and training convolutional architectures, including piecewise max pooling and full use of information across different sentences. The results show that models, trained with RuAttitudes, outperform ones that were trained with only supervised learning approach and achieve 13.4% increase in F1-score on RuSentRel collection. @@ -1311,7 +1311,7 @@ Self-Attentional Models Application in Task-Oriented Dialogue Generation Systems MansourSaffar Mehrjardi AmineTrabelsi - Osmar R.Zaiane + Osmar R.Zaiane 1031–1040 Self-attentional models are a new paradigm for sequence modelling tasks which differ from common sequence modelling methods, such as recurrence-based and convolution-based sequence learning, in the way that their architecture is only based on the attention mechanism. Self-attentional models have been used in the creation of the state-of-the-art models in many NLP task such as neural machine translation, but their usage has not been explored for the task of training end-to-end task-oriented dialogue generation systems yet. In this study, we apply these models on the DSTC2 dataset for training task-oriented chatbots. Our finding shows that self-attentional models can be exploited to create end-to-end task-oriented chatbots which not only achieve higher evaluation scores compared to recurrence-based models, but also do so more efficiently. R19-1119 @@ -1321,9 +1321,9 @@ Whom to Learn From? Graph- vs. Text-based Word Embeddings MałgorzataSalawa - AntónioBranco + AntónioBranco RubenBranco - JoãoAntónio Rodrigues + JoãoAntónio Rodrigues ChakavehSaedi 1041–1051 Vectorial representations of meaning can be supported by empirical data from diverse sources and obtained with diverse embedding approaches. This paper aims at screening this experimental space and reports on an assessment of word embeddings supported (i) by data in raw texts vs. in lexical graphs, (ii) by lexical information encoded in association- vs. inference-based graphs, and obtained (iii) by edge reconstruction- vs. matrix factorisation vs. random walk-based graph embedding methods. The results observed with these experiments indicate that the best solutions with graph-based word embeddings are very competitive, consistently outperforming mainstream text-based ones. @@ -1333,8 +1333,8 @@ Persistence pays off: Paying Attention to What the <fixed-case>LSTM</fixed-case> Gating Mechanism Persists - GiancarloSalton - JohnKelleher + GiancarloSalton + JohnKelleher 1052–1059 Recurrent Neural Network Language Models composed of LSTM units, especially those augmented with an external memory, have achieved state-of-the-art results in Language Modeling. However, these models still struggle to process long sequences which are more likely to contain long-distance dependencies because of information fading. In this paper we demonstrate an effective mechanism for retrieving information in a memory augmented LSTM LM based on attending to information in memory in proportion to the number of timesteps the LSTM gating mechanism persisted the information. R19-1121 @@ -1355,7 +1355,7 @@ Moral Stance Recognition and Polarity Classification from <fixed-case>T</fixed-case>witter and Elicited Text WesleySantos - IvandréParaboni + IvandréParaboni 1069–1075 We introduce a labelled corpus of stances about moral issues for the Brazilian Portuguese language, and present reference results for both the stance recognition and polarity classification tasks. The corpus is built from Twitter and further expanded with data elicited through crowd sourcing and labelled by their own authors. Put together, the corpus and reference results are expected to be taken as a baseline for further studies in the field of stance recognition and polarity classification from text. R19-1123 @@ -1396,9 +1396,9 @@ A Morpho-Syntactically Informed <fixed-case>LSTM</fixed-case>-<fixed-case>CRF</fixed-case> Model for Named Entity Recognition LiliaSimeonova - KirilSimov + KirilSimov PetyaOsenova - PreslavNakov + PreslavNakov 1104–1113 We propose a morphologically informed model for named entity recognition, which is based on LSTM-CRF architecture and combines word embeddings, Bi-LSTM character embeddings, part-of-speech (POS) tags, and morphological information. While previous work has focused on learning from raw word input, using word and character embeddings only, we show that for morphologically rich languages, such as Bulgarian, access to POS information contributes more to the performance gains than the detailed morphological information. Thus, we show that named entity recognition needs only coarse-grained POS tags, but at the same time it can benefit from simultaneously using some POS information of different granularity. Our evaluation results over a standard dataset show sizeable improvements over the state-of-the-art for Bulgarian NER. R19-1127 @@ -1408,7 +1408,7 @@ Named Entity Recognition in Information Security Domain for <fixed-case>R</fixed-case>ussian AnastasiiaSirotina - NataliaLoukachevitch + NataliaLoukachevitch 1114–1120 In this paper we discuss the named entity recognition task for Russian texts related to cybersecurity. First of all, we describe the problems that arise in course of labeling unstructured texts from information security domain. We introduce guidelines for human annotators, according to which a corpus has been marked up. Then, a CRF-based system and different neural architectures have been implemented and applied to the corpus. The named entity recognition systems have been evaluated and compared to determine the most efficient one. R19-1128 @@ -1437,8 +1437,8 @@ Automated Text Simplification as a Preprocessing Step for Machine Translation into an Under-resourced Language - SanjaŠtajner - MajaPopović + SanjaŠtajner + MajaPopović 1141–1150 In this work, we investigate the possibility of using fully automatic text simplification system on the English source in machine translation (MT) for improving its translation into an under-resourced language. We use the state-of-the-art automatic text simplification (ATS) system for lexically and syntactically simplifying source sentences, which are then translated with two state-of-the-art English-to-Serbian MT systems, the phrase-based MT (PBMT) and the neural MT (NMT). We explore three different scenarios for using the ATS in MT: (1) using the raw output of the ATS; (2) automatically filtering out the sentences with low grammaticality and meaning preservation scores; and (3) performing a minimal manual correction of the ATS output. Our results show improvement in fluency of the translation regardless of the chosen scenario, and difference in success of the three scenarios depending on the MT approach used (PBMT or NMT) with regards to improving translation fluency and post-editing effort. R19-1131 @@ -1450,7 +1450,7 @@ KennethSteimel DanielDakota YueChen - SandraKübler + SandraKübler 1151–1160 Abusive language detection has received much attention in the last years, and recent approaches perform the task in a number of different languages. We investigate which factors have an effect on multilingual settings, focusing on the compatibility of data and annotations. In the current paper, we focus on English and German. Our findings show large differences in performance between the two languages. We find that the best performance is achieved by different classification algorithms. Sampling to address class imbalance issues is detrimental for German and beneficial for English. The only similarity that we find is that neither data set shows clear topics when we compare the results of topic modeling to the gold standard. Based on our findings, we can conclude that a multilingual optimization of classifiers is not possible even in settings where comparable data sets are used. R19-1132 @@ -1481,7 +1481,7 @@ A Quantum-Like Approach to Word Sense Disambiguation - FabioTamburini + FabioTamburini 1176–1185 This paper presents a novel algorithm for Word Sense Disambiguation (WSD) based on Quantum Probability Theory. The Quantum WSD algorithm requires concepts representations as vectors in the complex domain and thus we have developed a technique for computing complex word and sentence embeddings based on the Paragraph Vectors algorithm. Despite the proposed method is quite simple and that it does not require long training phases, when it is evaluated on a standardized benchmark for this task it exhibits state-of-the-art (SOTA) performances. R19-1135 @@ -1503,7 +1503,7 @@ Text-Based Joint Prediction of Numeric and Categorical Attributes of Entities in Knowledge Bases VThejas AbhijeetGupta - SebastianPadó + SebastianPadó 1194–1202 Collaboratively constructed knowledge bases play an important role in information systems, but are essentially always incomplete. Thus, a large number of models has been developed for Knowledge Base Completion, the task of predicting new attributes of entities given partial descriptions of these entities. Virtually all of these models either concentrate on numeric attributes (<Italy,GDP,2T$>) or they concentrate on categorical attributes (<Tim Cook,chairman,Apple>). In this paper, we propose a simple feed-forward neural architecture to jointly predict numeric and categorical attributes based on embeddings learned from textual occurrences of the entities in question. Following insights from multi-task learning, our hypothesis is that due to the correlations among attributes of different kinds, joint prediction improves over separate prediction. Our experiments on seven FreeBase domains show that this hypothesis is true of the two attribute types: we find substantial improvements for numeric attributes in the joint model, while performance remains largely unchanged for categorical attributes. Our analysis indicates that this is the case because categorical attributes, many of which describe membership in various classes, provide useful ‘background knowledge’ for numeric prediction, while this is true to a lesser degree in the inverse direction. R19-1137 @@ -1541,7 +1541,7 @@ Cross-Lingual Word Embeddings for Morphologically Rich Languages AhmetÜstün GosseBouma - Gertjanvan Noord + Gertjanvan Noord 1222–1228 Cross-lingual word embedding models learn a shared vector space for two or more languages so that words with similar meaning are represented by similar vectors regardless of their language. Although the existing models achieve high performance on pairs of morphologically simple languages, they perform very poorly on morphologically rich languages such as Turkish and Finnish. In this paper, we propose a morpheme-based model in order to increase the performance of cross-lingual word embeddings on morphologically rich languages. Our model includes a simple extension which enables us to exploit morphemes for cross-lingual mapping. We applied our model for the Turkish-Finnish language pair on the bilingual word translation task. Results show that our model outperforms the baseline models by 2% in the nearest neighbour ranking. R19-1140 @@ -1552,9 +1552,9 @@ It Takes Nine to Smell a Rat: Neural Multi-Task Learning for Check-Worthiness Prediction SlavenaVasileva PepaAtanasova - LluísMàrquez + LluísMàrquez AlbertoBarrón-Cedeño - PreslavNakov + PreslavNakov 1229–1239 We propose a multi-task deep-learning approach for estimating the check-worthiness of claims in political debates. Given a political debate, such as the 2016 US Presidential and Vice-Presidential ones, the task is to predict which statements in the debate should be prioritized for fact-checking. While different fact-checking organizations would naturally make different choices when analyzing the same debate, we show that it pays to learn from multiple sources simultaneously (PolitiFact, FactCheck, ABC, CNN, NPR, NYT, Chicago Tribune, The Guardian, and Washington Post) in a multi-task learning setup, even when a particular source is chosen as a target to imitate. Our evaluation shows state-of-the-art results on a standard dataset for the task of check-worthiness prediction. R19-1141 @@ -1653,7 +1653,7 @@ Bigger versus Similar: Selecting a Background Corpus for First Story Detection Based on Distributional Similarity FeiWang Robert J.Ross - John D.Kelleher + John D.Kelleher 1312–1320 The current state of the art for First Story Detection (FSD) are nearest neighbour-based models with traditional term vector representations; however, one challenge faced by FSD models is that the document representation is usually defined by the vocabulary and term frequency from a background corpus. Consequently, the ideal background corpus should arguably be both large-scale to ensure adequate term coverage, and similar to the target domain in terms of the language distribution. However, given these two factors cannot always be mutually satisfied, in this paper we examine whether the distributional similarity of common terms is more important than the scale of common terms for FSD. As a basis for our analysis we propose a set of metrics to quantitatively measure the scale of common terms and the distributional similarity between corpora. Using these metrics we rank different background corpora relative to a target corpus. We also apply models based on different background corpora to the FSD task. Our results show that term distributional similarity is more predictive of good FSD performance than the scale of common terms; and, thus we demonstrate that a smaller recent domain-related corpus will be more suitable than a very large-scale general corpus for FSD. R19-1150 @@ -1706,7 +1706,7 @@ A Survey of the Perceived Text Adaptation Needs of Adults with Autism VictoriaYaneva - ConstantinOrasan + ConstantinOrasan Le AnHa NataliaPonomareva 1356–1363 @@ -1741,7 +1741,7 @@ <fixed-case>T</fixed-case>urkish Tweet Classification with Transformer Encoder Atıf EmreYüksel Yaşar AlimTürkmen - ArzucanÖzgür + ArzucanÖzgür BernaAltınel 1380–1387 Short-text classification is a challenging task, due to the sparsity and high dimensionality of the feature space. In this study, we aim to analyze and classify Turkish tweets based on their topics. Social media jargon and the agglutinative structure of the Turkish language makes this classification task even harder. As far as we know, this is the first study that uses a Transformer Encoder for short text classification in Turkish. The model is trained in a weakly supervised manner, where the training data set has been labeled automatically. Our results on the test set, which has been manually labeled, show that performing morphological analysis improves the classification performance of the traditional machine learning algorithms Random Forest, Naive Bayes, and Support Vector Machines. Still, the proposed approach achieves an F-score of 89.3 % outperforming those algorithms by at least 5 points. diff --git a/data/xml/S01.xml b/data/xml/S01.xml index fd4db78542..c169af9363 100644 --- a/data/xml/S01.xml +++ b/data/xml/S01.xml @@ -18,7 +18,7 @@ <fixed-case>SENSEVAL</fixed-case>-2: Overview - PhilipEdmonds + PhilipEdmonds ScottCotton 1–5 S01-1001 @@ -26,10 +26,10 @@ The <fixed-case>B</fixed-case>asque Task: Did Systems Perform in the Upperbound? - EnekoAgirre + EnekoAgirre ElenaGarcia - MikelLersundi - DavidMartinez + MikelLersundi + DavidMartinez EliPociello 9–12 S01-1002 @@ -38,7 +38,7 @@ <fixed-case>D</fixed-case>utch Word Sense Disambiguation: Data and Preliminary Results IrisHendrickx - Antalvan den Bosch + Antalvan den Bosch 13–16 S01-1003 hendrickx-van-den-bosch-2001-dutch @@ -52,11 +52,11 @@ <fixed-case>E</fixed-case>nglish Tasks: All-Words and Verb Lexical Sample - MarthaPalmer + MarthaPalmer ChristianeFellbaum ScottCotton LaurenDelfs - Hoa TrangDang + Hoa TrangDang 21–24 S01-1005 palmer-etal-2001-english @@ -65,7 +65,7 @@ Sensiting Inflectionality: <fixed-case>E</fixed-case>stonian Task for <fixed-case>SENSEVAL</fixed-case>-2 NeemeKahusk HeiliOrav - HaldurÕim + HaldurÕim 25–28 S01-1006 kahusk-etal-2001-sensiting @@ -74,7 +74,7 @@ The <fixed-case>I</fixed-case>talian Lexical Sample Task FrancescaBertagna ClaudiaSoria - NicolettaCalzolari + NicolettaCalzolari 29–32 S01-1007 bertagna-etal-2001-italian @@ -95,9 +95,9 @@ Framework and Results for the <fixed-case>S</fixed-case>panish <fixed-case>SENSEVAL</fixed-case> - GermanRigau - MarionaTaulé - AnaFernandez + GermanRigau + MarionaTaulé + AnaFernandez JulioGonzalo 41–44 S01-1010 @@ -121,7 +121,7 @@ The <fixed-case>J</fixed-case>apanese Translation Task: Lexical and Structural Perspectives - TimothyBaldwin + TimothyBaldwin AtsushiOkazaki TakenobuTokunaga HozumiTanaka @@ -141,7 +141,7 @@ Probabilistic Network Models for Word Sense Disambiguation GeraldChao - Michael G.Dyer + Michael G.Dyer 63–66 S01-1015 chao-dyer-2001-probabilistic @@ -149,8 +149,8 @@ Improving <fixed-case>WSD</fixed-case> with Multi-Level View of Context Monitored by Similarity Measure EricCrestan - MarcEl-Bèze - Claudede Loupy + MarcEl-Bèze + Claudede Loupy 67–70 S01-1016 crestan-etal-2001-improving @@ -158,15 +158,15 @@ Using <fixed-case>L</fixed-case>azy<fixed-case>B</fixed-case>oosting for Word Sense Disambiguation GerardEscudero - LluísMàrquez - GermanRigau + LluísMàrquez + GermanRigau 71–74 S01-1017 escudero-etal-2001-using The <fixed-case>UNED</fixed-case> Systems at <fixed-case>SENSEVAL</fixed-case>-2 - DavidFernández-Amorós + DavidFernández-Amorós JulioGonzalo FelisaVerdejo 75–78 @@ -182,9 +182,9 @@ Classifier Optimization and Combination in the <fixed-case>E</fixed-case>nglish All Words Task - VéroniqueHoste + VéroniqueHoste AnneKool - WalterDaelemans + WalterDaelemans 83–86 S01-1020 hoste-etal-2001-classifier @@ -194,7 +194,7 @@ H. TolgaIlhan Sepandar D.Kamvar DanKlein - Christopher D.Manning + Christopher D.Manning KristinaToutanova 87–90 S01-1021 @@ -210,7 +210,7 @@ <fixed-case>ATR</fixed-case>-<fixed-case>SLT</fixed-case> System for <fixed-case>SENSEVAL</fixed-case>-2 <fixed-case>J</fixed-case>apanese Translation Task TadashiKumano - HidekiKashioka + HidekiKashioka HidekiTanaka 95–98 S01-1023 @@ -218,7 +218,7 @@ Sense and Deduction: The Power of Peewees Applied to the <fixed-case>SENSEVAL</fixed-case>-2 <fixed-case>S</fixed-case>wedish Lexical Sample Task - TorbjörnLager + TorbjörnLager NataliaZinovjeva 99–102 S01-1024 @@ -227,9 +227,9 @@ Primitive-Based Word Sense Disambiguation for <fixed-case>SENSEVAL</fixed-case>-2 LimBeng Tat - ZaharinYusoff - TangEnya Kong - GuoCheng Ming + ZaharinYusoff + Enya KongTang + Cheng MingGuo 103–106 S01-1025 lim-etal-2001-primitive @@ -243,26 +243,26 @@ Using Domain Information for Word Sense Disambiguation - BernardoMagnini + BernardoMagnini CarloStrapparava GiovanniPezzulo - AlfioGliozzo + AlfioGliozzo 111–114 S01-1027 magnini-etal-2001-using Decision Lists for <fixed-case>E</fixed-case>nglish and <fixed-case>B</fixed-case>asque - DavidMartinez - EnekoAgirre + DavidMartinez + EnekoAgirre 115–118 S01-1028 martinez-agirre-2001-decision Disambiguating Noun and Verb Senses Using Automatically Acquired Selectional Preferences - DianaMcCarthy - JohnCarroll + DianaMcCarthy + JohnCarroll JuditaPreiss 119–122 S01-1029 @@ -278,16 +278,16 @@ Pattern Learning and Active Feature Selection for Word Sense Disambiguation - Rada F.Mihalcea - Dan I.Moldovan + Rada F.Mihalcea + Dan I.Moldovan 127–130 S01-1031 mihalcea-moldovan-2001-pattern <fixed-case>T</fixed-case>he <fixed-case>U</fixed-case>niversity of <fixed-case>A</fixed-case>licante Word Sense Disambiguation System - AndrésMontoyo - ArmandoSuárez + AndrésMontoyo + ArmandoSuárez 131–134 S01-1032 montoyo-suarez-2001-university @@ -321,7 +321,7 @@ <fixed-case>KUNLP</fixed-case> system using Classification Information Model at <fixed-case>SENSEVAL</fixed-case>-2 Hee-CheolSeo Sang-ZooLee - Hae-ChangRim + Hae-ChangRim HoLee 147–150 S01-1036 @@ -356,8 +356,8 @@ <fixed-case>T</fixed-case>he <fixed-case>J</fixed-case>ohn <fixed-case>H</fixed-case>opkins <fixed-case>SENSEVAL</fixed-case>-2 System Descriptions DavidYarowsky - SilviuCucerzan - RaduFlorian + SilviuCucerzan + RaduFlorian CharlesSchafer RichardWicentowski 163–166 diff --git a/data/xml/S07.xml b/data/xml/S07.xml index db4eaa5f3d..562c8e0e46 100644 --- a/data/xml/S07.xml +++ b/data/xml/S07.xml @@ -4,8 +4,8 @@ Proceedings of the Fourth International Workshop on Semantic Evaluations (SemEval-2007) S07-1 - EnekoAgirre - LluísMàrquez + EnekoAgirre + LluísMàrquez RichardWicentowski Association for Computational Linguistics
Prague, Czech Republic
@@ -20,10 +20,10 @@ <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2007 Task 01: Evaluating <fixed-case>WSD</fixed-case> on Cross-Language Information Retrieval EnekoAgirre - BernardoMagnini - OierLopez de Lacalle + BernardoMagnini + OierLopez de Lacalle ArantxaOtegi - GermanRigau + GermanRigau PiekVossen 1–6 S07-1001 @@ -32,18 +32,18 @@ <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2007 Task 02: Evaluating Word Sense Induction and Discrimination Systems EnekoAgirre - AitorSoroa + AitorSoroa 7–12 S07-1002 agirre-soroa-2007-semeval <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2007 Task 04: Classification of Semantic Relations between Nominals - RoxanaGirju - PreslavNakov - ViviNastase - StanSzpakowicz - PeterTurney + RoxanaGirju + PreslavNakov + ViviNastase + StanSzpakowicz + PeterTurney DenizYuret 13–18 S07-1003 @@ -86,16 +86,16 @@ <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2007 Task 09: Multilevel Semantic Annotation of <fixed-case>C</fixed-case>atalan and <fixed-case>S</fixed-case>panish LluísMàrquez - LuisVillarejo + LuisVillarejo M. A.Martí - MarionaTaulé + MarionaTaulé 42–47 S07-1008 marquez-etal-2007-semeval <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2007 Task 10: <fixed-case>E</fixed-case>nglish Lexical Substitution Task - DianaMcCarthy + DianaMcCarthy RobertoNavigli 48–53 S07-1009 @@ -111,7 +111,7 @@ <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2007 Task 12: <fixed-case>T</fixed-case>urkish Lexical Sample Task - ZeynepOrhan + ZeynepOrhan EmineÇelik DemirgüçNeslihan 59–63 @@ -130,7 +130,7 @@ <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2007 Task 14: Affective Text CarloStrapparava - RadaMihalcea + RadaMihalcea 70–74 S07-1013 strapparava-mihalcea-2007-semeval @@ -138,11 +138,11 @@ <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2007 Task 15: <fixed-case>T</fixed-case>emp<fixed-case>E</fixed-case>val Temporal Relation Identification MarcVerhagen - RobertGaizauskas + RobertGaizauskas FrankSchilder MarkHepple - GrahamKatz - JamesPustejovsky + GrahamKatz + JamesPustejovsky 75–80 S07-1014 verhagen-etal-2007-semeval @@ -150,37 +150,37 @@ <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2007 Task 16: Evaluation of Wide Coverage Knowledge Resources MontseCuadros - GermanRigau + GermanRigau 81–86 S07-1015 cuadros-rigau-2007-semeval <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2007 Task-17: <fixed-case>E</fixed-case>nglish Lexical Sample, <fixed-case>SRL</fixed-case> and All Words - SameerPradhan + SameerPradhan EdwardLoper DmitriyDligach - MarthaPalmer + MarthaPalmer 87–92 S07-1016 pradhan-etal-2007-semeval <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2007 Task 18: <fixed-case>A</fixed-case>rabic Semantic Labeling - MonaDiab + MonaDiab MusaAlkhalifa SabryElKateb ChristianeFellbaum AousMansouri - MarthaPalmer + MarthaPalmer 93–98 S07-1017 diab-etal-2007-semeval <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2007 Task 19: Frame Semantic Structure Extraction - CollinBaker - MichaelEllsworth + CollinBaker + MichaelEllsworth KatrinErk 99–104 S07-1018 @@ -189,7 +189,7 @@ <fixed-case>AUG</fixed-case>: A combined classification and clustering approach for web people disambiguation ElsLefever - VéroniqueHoste + VéroniqueHoste TimurFayruzov 105–108 S07-1019 @@ -197,7 +197,7 @@ <fixed-case>CITYU</fixed-case>-<fixed-case>HIF</fixed-case>: <fixed-case>WSD</fixed-case> with Human-Informed Feature Preference - Oi YeeKwong + Oi YeeKwong 109–112 S07-1020 kwong-2007-cityu @@ -228,7 +228,7 @@ <fixed-case>CU</fixed-case>-<fixed-case>COMSEM</fixed-case>: Exploring Rich Features for Unsupervised Web Personal Name Disambiguation YingChen - James H.Martin + James H.Martin 125–128 S07-1024 chen-martin-2007-cu @@ -236,14 +236,14 @@ <fixed-case>CU</fixed-case>-<fixed-case>TMP</fixed-case>: Temporal Relation Classification Using Syntactic and Semantic Features StevenBethard - James H.Martin + James H.Martin 129–132 S07-1025 bethard-martin-2007-cu <fixed-case>CUNIT</fixed-case>: A Semantic Role Labeling System for <fixed-case>M</fixed-case>odern <fixed-case>S</fixed-case>tandard <fixed-case>A</fixed-case>rabic - MonaDiab + MonaDiab AlessandroMoschitti DanielePighin 133–136 @@ -253,25 +253,25 @@ <fixed-case>DFKI</fixed-case>2: An Information Extraction Based Approach to People Disambiguation AndreaHeyl - GünterNeumann + GünterNeumann 137–140 S07-1027 heyl-neumann-2007-dfki2 <fixed-case>FBK</fixed-case>-<fixed-case>IRST</fixed-case>: Kernel Methods for Semantic Relation Extraction - ClaudioGiuliano - AlbertoLavelli + ClaudioGiuliano + AlbertoLavelli DanielePighin - LorenzaRomano + LorenzaRomano 141–144 S07-1028 giuliano-etal-2007-fbk <fixed-case>FBK</fixed-case>-irst: Lexical Substitution Task Exploiting Domain and Syntagmatic Coherence - ClaudioGiuliano - AlfioGliozzo + ClaudioGiuliano + AlfioGliozzo CarloStrapparava 145–148 S07-1029 @@ -294,19 +294,19 @@ <fixed-case>GPLSI</fixed-case>: Word Coarse-grained Disambiguation aided by Basic Level Concepts - RubénIzquierdo - ArmandoSuárez - GermanRigau + RubénIzquierdo + ArmandoSuárez + GermanRigau 157–160 S07-1032 izquierdo-etal-2007-gplsi <fixed-case>GYDER</fixed-case>: Maxent Metonymy Resolution - RichárdFarkas + RichárdFarkas EszterSimon GyörgySzarvas - DánielVarga + DánielVarga 161–164 S07-1033 farkas-etal-2007-gyder @@ -324,9 +324,9 @@ <fixed-case>HIT</fixed-case>-<fixed-case>WSD</fixed-case>: Using Search Engine for Multilingual <fixed-case>C</fixed-case>hinese-<fixed-case>E</fixed-case>nglish Lexical Sample Task - PengYuanLiu - TieJunZhao - MuYunYang + PengYuanLiu + TieJunZhao + MuYunYang 169–172 S07-1035 liu-etal-2007-hit @@ -344,9 +344,9 @@ <fixed-case>I</fixed-case>2<fixed-case>R</fixed-case>: Three Systems for Word Sense Discrimination, <fixed-case>C</fixed-case>hinese Word Sense Disambiguation, and <fixed-case>E</fixed-case>nglish Word Sense Disambiguation - Zheng-YuNiu - Dong-HongJi - Chew-LimTan + Zheng-YuNiu + Dong-HongJi + Chew-LimTan 177–182 S07-1037 niu-etal-2007-i2r @@ -364,7 +364,7 @@ IrisHendrickx RoserMorante CarolineSporleder - Antalvan den Bosch + Antalvan den Bosch 187–190 S07-1039 hendrickx-etal-2007-ilk @@ -373,7 +373,7 @@ <fixed-case>IRST</fixed-case>-<fixed-case>BP</fixed-case>: Preposition Disambiguation based on Chain Clarifying Relationships Contexts OctavianPopescu SaraTonelli - EmanuelePianta + EmanuelePianta 191–194 S07-1040 popescu-etal-2007-irst @@ -381,7 +381,7 @@ <fixed-case>IRST</fixed-case>-<fixed-case>BP</fixed-case>: Web People Search Using Name Entities OctavianPopescu - BernardoMagnini + BernardoMagnini 195–198 S07-1041 popescu-magnini-2007-irst @@ -397,8 +397,8 @@ <fixed-case>JU</fixed-case>-<fixed-case>SKNSB</fixed-case>: Extended <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et Based <fixed-case>WSD</fixed-case> on the <fixed-case>E</fixed-case>nglish All-Words Task at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-1 - Sudip KumarNaskar - SivajiBandyopadhyay + Sudip KumarNaskar + SivajiBandyopadhyay 203–206 S07-1043 naskar-bandyopadhyay-2007-ju @@ -412,8 +412,8 @@ <fixed-case>LCC</fixed-case>-<fixed-case>SRN</fixed-case>: <fixed-case>LCC</fixed-case>’s <fixed-case>SRN</fixed-case> System for <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val 2007 Task 4 - AdrianaBadulescu - MunirathnamSrikanth + AdrianaBadulescu + MunirathnamSrikanth 215–218 S07-1045 badulescu-srikanth-2007-lcc @@ -421,7 +421,7 @@ <fixed-case>LCC</fixed-case>-<fixed-case>TE</fixed-case>: A Hybrid Approach to Temporal Relation Identification in News Text CongminMin - MunirathnamSrikanth + MunirathnamSrikanth AbrahamFowler 219–222 S07-1046 @@ -430,8 +430,8 @@ <fixed-case>LCC</fixed-case>-<fixed-case>WSD</fixed-case>: System Description for <fixed-case>E</fixed-case>nglish Coarse Grained All Words Task at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val 2007 AdrianNovischi - MuirathnamSrikanth - AndrewBennett + MuirathnamSrikanth + AndrewBennett 223–226 S07-1047 novischi-etal-2007-lcc @@ -447,16 +447,16 @@ <fixed-case>MELB</fixed-case>-<fixed-case>KB</fixed-case>: Nominal Classification as Noun Compound Interpretation Su NamKim - TimothyBaldwin + TimothyBaldwin 231–236 S07-1049 kim-baldwin-2007-melb <fixed-case>MELB</fixed-case>-<fixed-case>MKB</fixed-case>: Lexical Substitution system based on Relatives in Context - DavidMartinez + DavidMartinez Su NamKim - TimothyBaldwin + TimothyBaldwin 237–240 S07-1050 martinez-etal-2007-melb @@ -464,7 +464,7 @@ <fixed-case>MELB</fixed-case>-<fixed-case>YB</fixed-case>: Preposition Sense Disambiguation Using Rich Semantic Features PatrickYe - TimothyBaldwin + TimothyBaldwin 241–244 S07-1051 ye-baldwin-2007-melb @@ -473,14 +473,14 @@ <fixed-case>NAIST</fixed-case>.<fixed-case>J</fixed-case>apan: Temporal Relation Identification Using Dependency Parsed Tree YuchangCheng MasayukiAsahara - YujiMatsumoto + YujiMatsumoto 245–248 S07-1052 cheng-etal-2007-naist <fixed-case>NUS</fixed-case>-<fixed-case>ML</fixed-case>:Improving Word Sense Disambiguation Using Topic Features - Jun FuCai + Jun FuCai Wee SunLee Yee WhyeTeh 249–252 @@ -517,7 +517,7 @@ <fixed-case>PNNL</fixed-case>: A Supervised Maximum Entropy Approach to Word Sense Disambiguation StephenTratz AntonioSanfilippo - MichelleGregory + MichelleGregory AlanChappell ChristianPosse PaulWhitney @@ -540,7 +540,7 @@ <fixed-case>PU</fixed-case>-<fixed-case>BCD</fixed-case>: Exponential Family Models for the Coarse- and Fine-Grained All-Words Tasks JonathanChang MiroslavDudík - DavidBlei + DavidBlei 272–276 S07-1059 chang-etal-2007-pu @@ -548,7 +548,7 @@ <fixed-case>PUTOP</fixed-case>: Turning Predominant Senses into a Topic Model for Word Sense Disambiguation JordanBoyd-Graber - DavidBlei + DavidBlei 277–281 S07-1060 boyd-graber-blei-2007-putop @@ -556,7 +556,7 @@ <fixed-case>RACAI</fixed-case>: Meaning Affinity Models RaduIon - DanTufiş + DanTufiş 282–287 S07-1061 ion-tufis-2007-racai @@ -565,7 +565,7 @@ <fixed-case>RTV</fixed-case>: Tree Kernels for Thematic Role Classification DanielePighin AlessandroMoschitti - RobertoBasili + RobertoBasili 288–291 S07-1062 pighin-etal-2007-rtv @@ -614,7 +614,7 @@ <fixed-case>S</fixed-case>ussx: <fixed-case>WSD</fixed-case> using Automatically Acquired Predominant Senses RobKoeling - DianaMcCarthy + DianaMcCarthy 314–317 S07-1068 koeling-mccarthy-2007-sussx @@ -622,7 +622,7 @@ <fixed-case>TITPI</fixed-case>: Web People Search Task Using Semi-Supervised Clustering Approach KazunariSugiyama - ManabuOkumura + ManabuOkumura 318–321 S07-1069 sugiyama-okumura-2007-titpi @@ -638,7 +638,7 @@ Tor, <fixed-case>T</fixed-case>or<fixed-case>M</fixed-case>d: Distributional Profiles of Concepts for Unsupervised Word Sense Disambiguation - SaifMohammad + SaifMohammad GraemeHirst PhilipResnik 326–333 @@ -648,9 +648,9 @@ <fixed-case>UA</fixed-case>-<fixed-case>ZBSA</fixed-case>: A Headline Emotion Classification through Web Information ZornitsaKozareva - BorjaNavarro - SoniaVázquez - AndrésMontoyo + BorjaNavarro + SoniaVázquez + AndrésMontoyo 334–337 S07-1072 kozareva-etal-2007-ua @@ -658,8 +658,8 @@ <fixed-case>UA</fixed-case>-<fixed-case>ZSA</fixed-case>: Web Page Clustering on the basis of Name Disambiguation ZornitsaKozareva - SoniaVazquez - AndresMontoyo + SoniaVazquez + AndresMontoyo 338–341 S07-1073 kozareva-etal-2007-ua-zsa @@ -667,7 +667,7 @@ <fixed-case>UBC</fixed-case>-<fixed-case>ALM</fixed-case>: Combining k-<fixed-case>NN</fixed-case> with <fixed-case>SVD</fixed-case> for <fixed-case>WSD</fixed-case> EnekoAgirre - OierLopez de Lacalle + OierLopez de Lacalle 342–345 S07-1074 agirre-lopez-de-lacalle-2007-ubc @@ -675,17 +675,17 @@ <fixed-case>UBC</fixed-case>-<fixed-case>AS</fixed-case>: A Graph Based Unsupervised System for Induction and Classification EnekoAgirre - AitorSoroa + AitorSoroa 346–349 S07-1075 agirre-soroa-2007-ubc <fixed-case>UBC</fixed-case>-<fixed-case>UMB</fixed-case>: Combining unsupervised and supervised systems for all-words <fixed-case>WSD</fixed-case> - DavidMartinez - TimothyBaldwin + DavidMartinez + TimothyBaldwin EnekoAgirre - OierLopez de Lacalle + OierLopez de Lacalle 350–353 S07-1076 martinez-etal-2007-ubc @@ -719,15 +719,15 @@ <fixed-case>UCB</fixed-case>: System Description for <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val Task #4 - PreslavNakov - MartiHearst + PreslavNakov + MartiHearst 366–369 S07-1080 nakov-hearst-2007-ucb <fixed-case>UCD</fixed-case>-<fixed-case>FC</fixed-case>: Deducing semantic relations using <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et senses that occur frequently in a database of noun-noun compounds - Fintan J.Costello + Fintan J.Costello 370–373 S07-1081 costello-2007-ucd @@ -749,7 +749,7 @@ <fixed-case>UCM</fixed-case>3: Classification of Semantic Relations between Nominals using Sequential Minimal Optimization - IsabelSegura Bedmar + IsabelSegura Bedmar DoaaSamy Jose L.Martinez 382–385 @@ -763,7 +763,7 @@ BrantChee AndrewFister AllaRozovskaya - RoxanaGirju + RoxanaGirju 386–389 S07-1085 beamer-etal-2007-uiuc @@ -790,7 +790,7 @@ Marcode Gemmis Anna LisaGentile PasqualeLops - GiovanniSemeraro + GiovanniSemeraro 398–401 S07-1088 basile-etal-2007-uniba @@ -805,8 +805,8 @@ <fixed-case>UNT</fixed-case>-Yahoo: <fixed-case>S</fixed-case>uper<fixed-case>S</fixed-case>ense<fixed-case>L</fixed-case>earner: Combining <fixed-case>S</fixed-case>ense<fixed-case>L</fixed-case>earner with <fixed-case>S</fixed-case>uper<fixed-case>S</fixed-case>ense and other Coarse Semantic Features - RadaMihalcea - AndrasCsomai + RadaMihalcea + AndrasCsomai MassimilianoCiaramita 406–409 S07-1090 @@ -815,17 +815,17 @@ <fixed-case>UNT</fixed-case>: <fixed-case>S</fixed-case>ub<fixed-case>F</fixed-case>inder: Combining Knowledge Sources for Automatic Lexical Substitution SamerHassan - AndrasCsomai + AndrasCsomai CarmenBanea RaviSinha - RadaMihalcea + RadaMihalcea 410–413 S07-1091 hassan-etal-2007-unt <fixed-case>UOY</fixed-case>: A Hypergraph Model For Word Sense Induction & Disambiguation - IoannisKlapaftis + IoannisKlapaftis SureshManandhar 414–417 S07-1092 @@ -848,16 +848,16 @@ <fixed-case>UPC</fixed-case>: Experiments with Joint Learning within <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val Task 9 LluísMàrquez - LluísPadró + LluísPadró MihaiSurdeanu - LuisVillarejo + LuisVillarejo 426–429 S07-1095 marquez-etal-2007-upc <fixed-case>UPV</fixed-case>-<fixed-case>SI</fixed-case>: Word Sense Induction using Self Term Expansion - DavidPinto + DavidPinto PaoloRosso HéctorJiménez-Salazar 430–433 @@ -875,8 +875,8 @@ <fixed-case>USFD</fixed-case>: Preliminary Exploration of Features and Classifiers for the <fixed-case>T</fixed-case>emp<fixed-case>E</fixed-case>val-2007 Task MarkHepple - AndreaSetzer - RobertGaizauskas + AndreaSetzer + RobertGaizauskas 438–441 S07-1098 hepple-etal-2007-usfd @@ -884,7 +884,7 @@ <fixed-case>USP</fixed-case>-<fixed-case>IBM</fixed-case>-1 and <fixed-case>USP</fixed-case>-<fixed-case>IBM</fixed-case>-2: The <fixed-case>ILP</fixed-case>-based Systems for Lexical Sample <fixed-case>WSD</fixed-case> in <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2007 LuciaSpecia - Mariadas Graças + Mariadas Graças VolpeNunes AshwinSrinivasan GaneshRamakrishnan @@ -903,14 +903,14 @@ <fixed-case>UTD</fixed-case>-<fixed-case>HLT</fixed-case>-<fixed-case>CG</fixed-case>: Semantic Architecture for Metonymy Resolution and Classification of Nominal Relations CristinaNicolae GabrielNicolae - SandaHarabagiu + SandaHarabagiu 454–459 S07-1101 nicolae-etal-2007-utd <fixed-case>UTD</fixed-case>-<fixed-case>SRL</fixed-case>: A Pipeline Architecture for Extracting Frame Semantic Structures - Cosmin AdrianBejan + Cosmin AdrianBejan ChrisHathaway 460–463 S07-1102 @@ -930,14 +930,14 @@ <fixed-case>UVA</fixed-case>: Language Modeling Techniques for Web People Search KrisztianBalog LeifAzzopardi - Maartende Rijke + Maartende Rijke 468–471 S07-1104 balog-etal-2007-uva <fixed-case>UVAVU</fixed-case>: <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et Similarity and Lexical Patterns for Semantic Relation Classification - Willem Robertvan Hage + Willem Robertvan Hage SophiaKatrenko 472–475 S07-1105 @@ -946,14 +946,14 @@ <fixed-case>U</fixed-case>of<fixed-case>L</fixed-case>: Word Sense Disambiguation Using Lexical Cohesion YlliasChali - Shafiq R.Joty + Shafiq R.Joty 476–479 S07-1106 chali-joty-2007-uofl <fixed-case>WIT</fixed-case>: Web People Search Disambiguation using Random Walks - JoséIria + JoséIria LeiXia ZiqiZhang 480–483 diff --git a/data/xml/S10.xml b/data/xml/S10.xml index d648768067..c224ad0a40 100644 --- a/data/xml/S10.xml +++ b/data/xml/S10.xml @@ -19,12 +19,12 @@ <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2010 Task 1: Coreference Resolution in Multiple Languages MartaRecasens - LluísMàrquez + LluísMàrquez EmiliSapena - M. AntòniaMartí - MarionaTaulé - VéroniqueHoste - MassimoPoesio + M. AntòniaMartí + MarionaTaulé + VéroniqueHoste + MassimoPoesio YannickVersley 1–8 S10-1001 @@ -32,9 +32,9 @@ <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2010 Task 2: Cross-Lingual Lexical Substitution - RadaMihalcea + RadaMihalcea RaviSinha - DianaMcCarthy + DianaMcCarthy 9–14 S10-1002 mihalcea-etal-2010-semeval @@ -42,7 +42,7 @@ <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2010 Task 3: Cross-Lingual Word Sense Disambiguation ElsLefever - VeroniqueHoste + VeroniqueHoste 15–20 S10-1003 lefever-hoste-2010-semeval @@ -52,14 +52,14 @@ Su NamKim OlenaMedelyan Min-YenKan - TimothyBaldwin + TimothyBaldwin 21–26 S10-1004 kim-etal-2010-semeval <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2010 Task 7: Argument Selection and Coercion - JamesPustejovsky + JamesPustejovsky AnnaRumshisky AlexPlotnick ElisabettaJezek @@ -74,12 +74,12 @@ IrisHendrickx Su NamKim ZornitsaKozareva - PreslavNakov + PreslavNakov DiarmuidÓ Séaghdha - SebastianPadó + SebastianPadó MarcoPennacchiotti - LorenzaRomano - StanSzpakowicz + LorenzaRomano + StanSzpakowicz 33–38 S10-1006 hendrickx-etal-2010-semeval @@ -88,9 +88,9 @@ <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2 Task 9: The Interpretation of Noun Compounds Using Paraphrasing Verbs and Prepositions CristinaButnariu Su NamKim - PreslavNakov + PreslavNakov DiarmuidÓ Séaghdha - StanSzpakowicz + StanSzpakowicz TonyVeale 39–44 S10-1007 @@ -101,8 +101,8 @@ JosefRuppenhofer CarolineSporleder RoserMorante - CollinBaker - MarthaPalmer + CollinBaker + MarthaPalmer 45–50 S10-1008 ruppenhofer-etal-2010-semeval @@ -119,9 +119,9 @@ <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2010 Task 13: <fixed-case>T</fixed-case>emp<fixed-case>E</fixed-case>val-2 MarcVerhagen - RoserSaurí + RoserSaurí TommasoCaselli - JamesPustejovsky + JamesPustejovsky 57–62 S10-1010 verhagen-etal-2010-semeval @@ -129,16 +129,16 @@ <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2010 Task 14: Word Sense Induction &Disambiguation SureshManandhar - IoannisKlapaftis + IoannisKlapaftis DmitriyDligach - SameerPradhan + SameerPradhan 63–68 S10-1011 manandhar-etal-2010-semeval <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2010 Task: <fixed-case>J</fixed-case>apanese <fixed-case>WSD</fixed-case> - ManabuOkumura + ManabuOkumura KiyoakiShirai KanakoKomiya HikaruYokono @@ -148,14 +148,14 @@ <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2010 Task 17: All-Words Word Sense Disambiguation on a Specific Domain - EnekoAgirre - OierLopez de Lacalle + EnekoAgirre + OierLopez de Lacalle ChristianeFellbaum - Shu-KaiHsieh - MaurizioTesconi + Shu-KaiHsieh + MaurizioTesconi MonicaMonachini PiekVossen - RoxanneSegers + RoxanneSegers 75–80 S10-1013 agirre-etal-2010-semeval @@ -186,8 +186,8 @@ <fixed-case>R</fixed-case>elax<fixed-case>C</fixed-case>or: A Global Relaxation Labeling Approach to Coreference Resolution EmiliSapena - LluísPadró - JordiTurmo + LluísPadró + JordiTurmo 88–91 S10-1017 sapena-etal-2010-relaxcor @@ -195,7 +195,7 @@ <fixed-case>SUCRE</fixed-case>: A Modular System for Coreference Resolution HamidrezaKobdani - HinrichSchütze + HinrichSchütze 92–95 S10-1018 kobdani-schutze-2010-sucre @@ -203,7 +203,7 @@ <fixed-case>UBIU</fixed-case>: A Language-Independent System for Coreference Resolution DesislavaZhekova - SandraKübler + SandraKübler 96–99 S10-1019 zhekova-kubler-2010-ubiu @@ -218,10 +218,10 @@ <fixed-case>BART</fixed-case>: A Multilingual Anaphora Resolution System SamuelBroscheit - MassimoPoesio - Simone PaoloPonzetto - Kepa JosebaRodriguez - LorenzaRomano + MassimoPoesio + Simone PaoloPonzetto + Kepa JosebaRodriguez + LorenzaRomano OlgaUryupina YannickVersley RobertoZanoli @@ -233,18 +233,18 @@ <fixed-case>TANL</fixed-case>-1: Coreference Resolution by Parse Analysis and Similarity Clustering GiuseppeAttardi MariaSimi - StefanoDei Rossi + StefanoDei Rossi 108–111 S10-1022 attardi-etal-2010-tanl <fixed-case>FCC</fixed-case>: Modeling Probabilities with <fixed-case>GIZA</fixed-case>++ for Task 2 and 3 of <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2 - DarnesVilariño Ayala + DarnesVilariño Ayala CarlosBalderas Posada - David EduardoPinto Avendaño - MiguelRodríguez Hernández - SaulLeón Silverio + David EduardoPinto Avendaño + MiguelRodríguez Hernández + SaulLeón Silverio 112–116 S10-1023 vilarino-ayala-etal-2010-fcc @@ -269,7 +269,7 @@ <fixed-case>COLEPL</fixed-case> and <fixed-case>COLSLM</fixed-case>: An Unsupervised <fixed-case>WSD</fixed-case> Approach to Multilingual Lexical Substitution, Tasks 2 and 3 <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val 2010 WeiweiGuo - MonaDiab + MonaDiab 129–133 S10-1026 guo-diab-2010-colepl @@ -277,7 +277,7 @@ <fixed-case>UHD</fixed-case>: Cross-Lingual Word Sense Disambiguation Using Multilingual Co-Occurrence Graphs CarinaSilberer - Simone PaoloPonzetto + Simone PaoloPonzetto 134–137 S10-1027 silberer-ponzetto-2010-uhd @@ -286,8 +286,8 @@ <fixed-case>OWNS</fixed-case>: Cross-lingual Word Sense Disambiguation Using Weighted Overlap Counts and <fixed-case>W</fixed-case>ordnet Based Similarity Measures LiptaMahapatra MeeraMohan - MiteshKhapra - PushpakBhattacharyya + MiteshKhapra + PushpakBhattacharyya 138–141 S10-1028 mahapatra-etal-2010-owns @@ -312,7 +312,7 @@ <fixed-case>DFKI</fixed-case> <fixed-case>K</fixed-case>ey<fixed-case>WE</fixed-case>: Ranking Keyphrases Extracted from Scientific Articles KathrinEichler - GünterNeumann + GünterNeumann 150–153 S10-1031 eichler-neumann-2010-dfki @@ -343,14 +343,14 @@ <fixed-case>WINGNUS</fixed-case>: Keyphrase Extraction Utilizing Document Logical Structure Thuy DungNguyen - Minh-ThangLuong + Minh-ThangLuong 166–169 S10-1035 nguyen-luong-2010-wingnus <fixed-case>KX</fixed-case>: A Flexible System for Keyphrase e<fixed-case>X</fixed-case>traction - EmanuelePianta + EmanuelePianta SaraTonelli 170–173 S10-1036 @@ -359,7 +359,7 @@ <fixed-case>BUAP</fixed-case>: An Unsupervised Approach to Automatic Keyphrase Extraction from Scientific Articles RobertoOrtiz - DavidPinto + DavidPinto MireyaTovar HéctorJiménez-Salazar 174–177 @@ -370,7 +370,7 @@ <fixed-case>UNPMC</fixed-case>: Naive Approach to Extract Keyphrases from Scientific Articles JungyeulPark Jong GunLee - BéatriceDaille + BéatriceDaille 178–181 S10-1038 park-etal-2010-unpmc @@ -388,14 +388,14 @@ <fixed-case>SZTERGAK</fixed-case> : Feature Engineering for Keyphrase Extraction GáborBerend - RichárdFarkas + RichárdFarkas 186–189 S10-1040 berend-farkas-2010-sztergak <fixed-case>KP</fixed-case>-Miner: Participation in <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2 - Samhaa R.El-Beltagy + Samhaa R.El-Beltagy AhmedRafea 190–193 S10-1041 @@ -403,7 +403,7 @@ <fixed-case>U</fixed-case>v<fixed-case>T</fixed-case>: The <fixed-case>U</fixed-case>v<fixed-case>T</fixed-case> Term Extraction System in the Keyphrase Extraction Task - KalliopiZervanou + KalliopiZervanou 194–197 S10-1042 zervanou-2010-uvt @@ -417,7 +417,7 @@ <fixed-case>FBK</fixed-case>_<fixed-case>NK</fixed-case>: A <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et-Based System for Multi-Way Classification of Semantic Relations - MatteoNegri + MatteoNegri MilenKouylekov 202–205 S10-1044 @@ -428,7 +428,7 @@ SantanuPal ParthaPakray DipankarDas - SivajiBandyopadhyay + SivajiBandyopadhyay 206–209 S10-1045 pal-etal-2010-ju @@ -444,7 +444,7 @@ <fixed-case>FBK</fixed-case>-<fixed-case>IRST</fixed-case>: Semantic Relation Extraction Using <fixed-case>C</fixed-case>yc KaterynaTymoshenko - ClaudioGiuliano + ClaudioGiuliano 214–217 S10-1047 tymoshenko-giuliano-2010-fbk @@ -461,7 +461,7 @@ <fixed-case>ISI</fixed-case>: Automatic Classification of Relations Between Nominals Using a Maximum Entropy Classifier StephenTratz - EduardHovy + EduardHovy 222–225 S10-1049 tratz-hovy-2010-isi @@ -469,9 +469,9 @@ <fixed-case>ECNU</fixed-case>: Effective Semantic Relations Classification without Complicated Features or Multiple External Corpora YuanChen - ManLan + ManLan JianSu - Zhi MinZhou + Zhi MinZhou YuXu 226–229 S10-1050 @@ -489,7 +489,7 @@ <fixed-case>UCD</fixed-case>-<fixed-case>PN</fixed-case>: Selecting General Paraphrases Using Conditional Probability PaulNulty - FintanCostello + FintanCostello 234–237 S10-1052 nulty-costello-2010-ucd @@ -504,7 +504,7 @@ <fixed-case>UBA</fixed-case>: Using Automatic Translation and <fixed-case>W</fixed-case>ikipedia for Cross-Lingual Lexical Substitution PierpaoloBasile - GiovanniSemeraro + GiovanniSemeraro 242–247 S10-1054 basile-semeraro-2010-uba @@ -512,7 +512,7 @@ <fixed-case>HUMB</fixed-case>: Automatic Key Term Extraction from Scientific Articles in <fixed-case>GROBID</fixed-case> PatriceLopez - LaurentRomary + LaurentRomary 248–251 S10-1055 lopez-romary-2010-humb @@ -520,7 +520,7 @@ <fixed-case>UTDM</fixed-case>et: Combining <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et and Corpus Data for Argument Coercion Detection KirkRoberts - SandaHarabagiu + SandaHarabagiu 252–255 S10-1056 roberts-harabagiu-2010-utdmet @@ -528,7 +528,7 @@ <fixed-case>UTD</fixed-case>: Classifying Semantic Relations by Combining Lexical and Semantic Resources BryanRink - SandaHarabagiu + SandaHarabagiu 256–259 S10-1057 rink-harabagiu-2010-utd @@ -545,7 +545,7 @@ DesaiChen NathanSchneider DipanjanDas - Noah A.Smith + Noah A.Smith 264–267 S10-1059 chen-etal-2010-semafor @@ -569,16 +569,16 @@ <fixed-case>TRIPS</fixed-case> and <fixed-case>TRIOS</fixed-case> System for <fixed-case>T</fixed-case>emp<fixed-case>E</fixed-case>val-2: Extracting Temporal Information from Text NaushadUzZaman - JamesAllen + JamesAllen 276–283 S10-1062 uzzaman-allen-2010-trips <fixed-case>TIPS</fixed-case>em (<fixed-case>E</fixed-case>nglish and <fixed-case>S</fixed-case>panish): Evaluating <fixed-case>CRF</fixed-case>s and Semantic Roles in <fixed-case>T</fixed-case>emp<fixed-case>E</fixed-case>val-2 - HectorLlorens - EstelaSaquete - BorjaNavarro + HectorLlorens + EstelaSaquete + BorjaNavarro 284–291 S10-1063 llorens-etal-2010-tipsem @@ -586,7 +586,7 @@ <fixed-case>C</fixed-case>ity<fixed-case>U</fixed-case>-<fixed-case>DAC</fixed-case>: Disambiguating Sentiment-Ambiguous Adjectives within Context BinLu - Benjamin K.Tsou + Benjamin K.Tsou 292–295 S10-1064 lu-tsou-2010-cityu @@ -594,7 +594,7 @@ <fixed-case>VENSES</fixed-case>++: Adapting a deep semantic processing system to the identification of null instantiations SaraTonelli - RodolfoDelmonte + RodolfoDelmonte 296–299 S10-1065 tonelli-delmonte-2010-venses @@ -609,9 +609,9 @@ <fixed-case>PKU</fixed-case>_<fixed-case>HIT</fixed-case>: An Event Detection System Based on Instances Expansion and Rich Syntactic Features ShiqiLi - PengyuanLiu - TiejunZhao - QinLu + PengyuanLiu + TiejunZhao + QinLu HanjingLi 304–307 S10-1067 @@ -620,7 +620,7 @@ 372:Comparing the Benefit of Different Dependency Parsers for Textual Entailment Using Syntactic Constraints Only AlexanderVolokh - GünterNeumann + GünterNeumann 308–312 S10-1068 volokh-neumann-2010-372 @@ -630,14 +630,14 @@ DominickNg James W.D.Constable MatthewHonnibal - James R.Curran + James R.Curran 313–316 S10-1069 ng-etal-2010-schwa <fixed-case>ID 392:TERSEO + T2T3</fixed-case> Transducer. A systems for Recognizing and Normalizing <fixed-case>TIMEX3</fixed-case> - EstelaSaquete Boro + EstelaSaquete Boro 317–320 S10-1070 saquete-boro-2010-id @@ -653,7 +653,7 @@ <fixed-case>KUL</fixed-case>: Recognition and Normalization of Temporal Expressions OleksandrKolomiyets - Marie-FrancineMoens + Marie-FrancineMoens 325–328 S10-1072 kolomiyets-moens-2010-kul @@ -661,7 +661,7 @@ <fixed-case>UC</fixed-case>3<fixed-case>M</fixed-case> System: Determining the Extent, Type and Value of Time Expressions in <fixed-case>T</fixed-case>emp<fixed-case>E</fixed-case>val-2 María TeresaVicente-Díez - JuliánMoreno Schneider + JuliánMoreno Schneider PalomaMartínez 329–332 S10-1073 @@ -671,7 +671,7 @@ <fixed-case>E</fixed-case>dinburgh-<fixed-case>LTG</fixed-case>: <fixed-case>T</fixed-case>emp<fixed-case>E</fixed-case>val-2 System Description ClaireGrover RichardTobin - BeatriceAlex + BeatriceAlex KateByrne 333–336 S10-1074 @@ -679,27 +679,27 @@ <fixed-case>USFD</fixed-case>2: Annotating Temporal Expresions and <fixed-case>TLINK</fixed-case>s for <fixed-case>T</fixed-case>emp<fixed-case>E</fixed-case>val-2 - LeonDerczynski - RobertGaizauskas + LeonDerczynski + RobertGaizauskas 337–340 S10-1075 derczynski-gaizauskas-2010-usfd2 <fixed-case>NCSU</fixed-case>: Modeling Temporal Relations with <fixed-case>M</fixed-case>arkov <fixed-case>L</fixed-case>ogic and Lexical Ontology - EunHa + EunHa AlokBaikadi CarlyleLicata - JamesLester + JamesLester 341–344 S10-1076 ha-etal-2010-ncsu <fixed-case>JU</fixed-case>_<fixed-case>CSE</fixed-case>_<fixed-case>TEMP</fixed-case>: A First Step towards Evaluating Events, Time Expressions and Temporal Relations - AnupKumar Kolya + AnupKumar Kolya AsifEkbal - SivajiBandyopadhyay + SivajiBandyopadhyay 345–350 S10-1077 kumar-kolya-etal-2010-ju @@ -747,10 +747,10 @@ <fixed-case>P</fixed-case>eng<fixed-case>Y</fixed-case>uan@<fixed-case>PKU</fixed-case>: Extracting Infrequent Sense Instance with the Same N-Gram Pattern for the <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2010 Task 15 - Peng-YuanLiu + Peng-YuanLiu Shi-WenYu ShuiLiu - Tie-JunZhao + Tie-JunZhao 371–374 S10-1083 liu-etal-2010-pengyuan @@ -759,7 +759,7 @@ <fixed-case>RALI</fixed-case>: Automatic Weighting of Text Window Distances BernardBrosseau-Villeneuve NorikoKando - Jian-YunNie + Jian-YunNie 375–378 S10-1084 brosseau-villeneuve-etal-2010-rali @@ -787,7 +787,7 @@ <fixed-case>IIITH</fixed-case>: Domain Specific Word Sense Disambiguation SivaReddy AbhilashInumella - DianaMcCarthy + DianaMcCarthy MarkStevenson 387–391 S10-1087 @@ -795,7 +795,7 @@ <fixed-case>UCF</fixed-case>-<fixed-case>WS</fixed-case>: Domain Word Sense Disambiguation Using Web Selectors - Hansen A.Schwartz + Hansen A.Schwartz FernandoGomez 392–395 S10-1088 @@ -815,9 +815,9 @@ <fixed-case>GPLSI</fixed-case>-<fixed-case>IXA</fixed-case>: Using Semantic Classes to Acquire Monosemous Training Examples from Domain Texts - RubénIzquierdo - ArmandoSuárez - GermanRigau + RubénIzquierdo + ArmandoSuárez + GermanRigau 402–406 S10-1090 izquierdo-etal-2010-gplsi @@ -836,21 +836,21 @@ <fixed-case>RACAI</fixed-case>: Unsupervised <fixed-case>WSD</fixed-case> Experiments @ <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2, Task 17 RaduIon - DanŞtefănescu + DanŞtefănescu 411–416 S10-1092 ion-stefanescu-2010-racai <fixed-case>K</fixed-case>yoto: An Integrated System for Specific Domain <fixed-case>WSD</fixed-case> - AitorSoroa - EnekoAgirre - OierLopez de Lacalle + AitorSoroa + EnekoAgirre + OierLopez de Lacalle WauterBosma PiekVossen MonicaMonachini JessieLo - Shu-KaiHsieh + Shu-KaiHsieh 417–420 S10-1093 soroa-etal-2010-kyoto @@ -858,19 +858,19 @@ <fixed-case>CFILT</fixed-case>: Resource Conscious Approaches for All-Words Domain Specific <fixed-case>WSD</fixed-case> AnupKulkarni - MiteshKhapra + MiteshKhapra SaurabhSohoney - PushpakBhattacharyya + PushpakBhattacharyya 421–426 S10-1094 kulkarni-etal-2010-cfilt <fixed-case>UMCC</fixed-case>-<fixed-case>DLSI</fixed-case>: Integrative Resource for Disambiguation Task - YoanGutiérrez Vázquez - AntonioFernandez Orquín - AndrésMontoyo Guijarro - SoniaVázquez Pérez + YoanGutiérrez Vázquez + AntonioFernandez Orquín + AndrésMontoyo Guijarro + SoniaVázquez Pérez 427–432 S10-1095 gutierrez-vazquez-etal-2010-umcc @@ -885,7 +885,7 @@ <fixed-case>T</fixed-case>witter Based System: Using <fixed-case>T</fixed-case>witter for Disambiguating Sentiment Ambiguous Adjectives AlexanderPak - PatrickParoubek + PatrickParoubek 436–439 S10-1097 pak-paroubek-2010-twitter-based @@ -900,8 +900,8 @@ <fixed-case>O</fixed-case>p<fixed-case>AL</fixed-case>: Applying Opinion Mining Techniques for the Disambiguation of Sentiment Ambiguous Adjectives in <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2 Task 18 - AlexandraBalahur - AndrésMontoyo + AlexandraBalahur + AndrésMontoyo 444–447 S10-1099 balahur-montoyo-2010-opal @@ -910,7 +910,7 @@ <fixed-case>HITSZ</fixed-case>_<fixed-case>CITYU</fixed-case>: Combine Collocation, Context Words and Neighboring Sentence Sentiment in Sentiment Adjectives Disambiguation RuifengXu JunXu - ChunyuKit + ChunyuKit 448–451 S10-1100 xu-etal-2010-hitsz diff --git a/data/xml/S12.xml b/data/xml/S12.xml index 01c3cc7623..5cfab83dfe 100644 --- a/data/xml/S12.xml +++ b/data/xml/S12.xml @@ -4,9 +4,9 @@ *SEM 2012: The First Joint Conference on Lexical and Computational Semantics – Volume 1: Proceedings of the main conference and the shared task, and Volume 2: Proceedings of the Sixth International Workshop on Semantic Evaluation (SemEval 2012) S12-1 - EnekoAgirre + EnekoAgirre JohanBos - MonaDiab + MonaDiab SureshManandhar YuvalMarton DenizYuret @@ -31,7 +31,7 @@ Adaptive Clustering for Coreference Resolution with Deterministic Rules and Web-Based Language Models - RazvanBunescu + RazvanBunescu 11–19 S12-1002 bunescu-2012-adaptive @@ -40,7 +40,7 @@ Measuring Semantic Relatedness using Multilingual Representations SamerHassan CarmenBanea - RadaMihalcea + RadaMihalcea 20–29 S12-1003 hassan-etal-2012-measuring @@ -48,8 +48,8 @@ Towards Building a Multilingual Semantic Network: Identifying Interlingual Links in <fixed-case>W</fixed-case>ikipedia BharathDandala - RadaMihalcea - RazvanBunescu + RadaMihalcea + RazvanBunescu 30–37 S12-1004 dandala-etal-2012-towards @@ -66,7 +66,7 @@ The Use of Granularity in Rhetorical Relation Prediction - BlakeHowald + BlakeHowald MarthaAbramson 44–48 S12-1006 @@ -83,7 +83,7 @@ Detecting Text Reuse with Modified and Weighted N-grams Rao Muhammad AdeelNawab MarkStevenson - PaulClough + PaulClough 54–58 S12-1008 nawab-etal-2012-detecting @@ -108,9 +108,9 @@ Learning Semantics and Selectional Preference of Adjective-Noun Pairs Karl MoritzHermann - ChrisDyer - PhilBlunsom - StephenPulman + ChrisDyer + PhilBlunsom + StephenPulman 70–74 S12-1011 hermann-etal-2012-learning @@ -126,7 +126,7 @@ Towards a Flexible Semantics: Colour Terms in Collaborative Reference Tasks BertBaumgaertner - RaquelFernández + RaquelFernández MatthewStone 80–84 S12-1013 @@ -160,16 +160,16 @@ Combining resources for <fixed-case>MWE</fixed-case>-token classification RichardFothergill - TimothyBaldwin + TimothyBaldwin 100–104 S12-1017 fothergill-baldwin-2012-combining Annotating Preferences in Negotiation Dialogues - AnaïsCadilhac - NicholasAsher - FarahBenamara + AnaïsCadilhac + NicholasAsher + FarahBenamara 105–113 S12-1018 S12-1018e1 @@ -178,8 +178,8 @@ Selecting Corpus-Semantic Models for Neurolinguistic Decoding BrianMurphy - ParthaTalukdar - TomMitchell + ParthaTalukdar + TomMitchell 114–123 S12-1019 murphy-etal-2012-selecting @@ -194,8 +194,8 @@ An Unsupervised Ranking Model for Noun-Noun Compositionality Karl MoritzHermann - PhilBlunsom - StephenPulman + PhilBlunsom + StephenPulman 132–141 S12-1021 hermann-etal-2012-unsupervised @@ -203,15 +203,15 @@ Expanding the Range of Tractable Scope-Underspecified Semantic Representations MehdiManshadi - JamesAllen + JamesAllen 142–150 S12-1022 manshadi-allen-2012-expanding Regular polysemy: A distributional model - GemmaBoleda - SebastianPadó + GemmaBoleda + SebastianPadó JasonUtt 151–160 S12-1023 @@ -237,7 +237,7 @@ Unsupervised Induction of a Syntax-Semantics Lexicon Using Iterative Refinement HagenFürstenau - OwenRambow + OwenRambow 180–188 S12-1026 furstenau-rambow-2012-unsupervised @@ -252,7 +252,7 @@ Ensemble-based Semantic Lexicon Induction for Semantic Tagging AshequlQadir - EllenRiloff + EllenRiloff 199–208 S12-1028 qadir-riloff-2012-ensemble @@ -260,8 +260,8 @@ An Exact Dual Decomposition Algorithm for Shallow Semantic Parsing with Constraints DipanjanDas - André F. T.Martins - Noah A.Smith + André F. T.Martins + Noah A.Smith 209–217 S12-1029 das-etal-2012-exact @@ -276,10 +276,10 @@ The Effects of Semantic Annotations on Precision Parse Ranking - AndrewMacKinlay + AndrewMacKinlay RebeccaDridan - DianaMcCarthy - TimothyBaldwin + DianaMcCarthy + TimothyBaldwin 228–236 S12-1031 mackinlay-etal-2012-effects @@ -295,7 +295,7 @@ #Emotional Tweets - SaifMohammad + SaifMohammad 246–255 S12-1033 mohammad-2012-emotional @@ -327,7 +327,7 @@ <fixed-case>UCM</fixed-case>-<fixed-case>I</fixed-case>: A Rule-based Syntactic Approach for Resolving the Scope of Negation - JorgeCarrillo de Albornoz + JorgeCarrillo de Albornoz LauraPlaza AlbertoDíaz MiguelBallesteros @@ -340,8 +340,8 @@ MiguelBallesteros AlbertoDíaz VirginiaFrancisco - PabloGervás - JorgeCarrillo de Albornoz + PabloGervás + JorgeCarrillo de Albornoz LauraPlaza 288–293 S12-1038 @@ -360,7 +360,7 @@ ValerioBasile JohanBos KilianEvang - NoortjeVenhuizen + NoortjeVenhuizen 301–309 S12-1040 basile-etal-2012-ugroningen @@ -369,7 +369,7 @@ <fixed-case>U</fixed-case>i<fixed-case>O</fixed-case>1: Constituent-Based Discriminative Ranking for Negation Resolution JonathonRead ErikVelldal - LiljaØvrelid + LiljaØvrelid StephanOepen 310–318 S12-1041 @@ -379,7 +379,7 @@ <fixed-case>U</fixed-case>i<fixed-case>O</fixed-case> 2: Sequence-labeling Negation Using Dependency Features EmanueleLapponi ErikVelldal - LiljaØvrelid + LiljaØvrelid JonathonRead 319–327 S12-1042 @@ -388,14 +388,14 @@ <fixed-case>UM</fixed-case>ichigan: A Conditional Random Field Model for Resolving the Scope of Negation AmjadAbu-Jbara - DragomirRadev + DragomirRadev 328–334 S12-1043 abu-jbara-radev-2012-umichigan <fixed-case>UW</fixed-case>ashington: Negation Resolution using Machine Learning Methods - James PaulWhite + James PaulWhite 335–339 S12-1044 white-2012-uwashington @@ -411,7 +411,7 @@ <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2012 Task 1: <fixed-case>E</fixed-case>nglish Lexical Simplification LuciaSpecia Sujay KumarJauhar - RadaMihalcea + RadaMihalcea 347–355 S12-1046 specia-etal-2012-semeval @@ -419,8 +419,8 @@ <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2012 Task 2: Measuring Degrees of Relational Similarity DavidJurgens - SaifMohammad - PeterTurney + SaifMohammad + PeterTurney KeithHolyoak 356–364 S12-1047 @@ -430,7 +430,7 @@ <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2012 Task 3: Spatial Role Labeling ParisaKordjamshidi StevenBethard - Marie-FrancineMoens + Marie-FrancineMoens 365–373 S12-1048 kordjamshidi-etal-2012-semeval @@ -458,14 +458,14 @@ EnekoAgirre DanielCer MonaDiab - AitorGonzalez-Agirre + AitorGonzalez-Agirre 385–393 S12-1051 agirre-etal-2012-semeval <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2012 Task 7: Choice of Plausible Alternatives: An Evaluation of Commonsense Causal Reasoning - AndrewGordon + AndrewGordon ZornitsaKozareva MelissaRoemmele 394–398 @@ -474,7 +474,7 @@ <fixed-case>S</fixed-case>emeval-2012 Task 8: Cross-lingual Textual Entailment for Content Synchronization - MatteoNegri + MatteoNegri AlessandroMarchetti YasharMehdad LuisaBentivogli @@ -485,10 +485,10 @@ <fixed-case>EMNLP</fixed-case>@<fixed-case>CPH</fixed-case>: Is frequency all there is to simplicity? - AndersJohannsen - HéctorMartínez + AndersJohannsen + HéctorMartínez SigridKlerke - AndersSøgaard + AndersSøgaard 408–412 S12-1054 johannsen-etal-2012-emnlp @@ -496,7 +496,7 @@ <fixed-case>UTD</fixed-case>: Determining Relational Similarity Using Lexical Patterns BryanRink - SandaHarabagiu + SandaHarabagiu 413–418 S12-1055 rink-harabagiu-2012-utd @@ -504,7 +504,7 @@ <fixed-case>UTD</fixed-case>-<fixed-case>S</fixed-case>p<fixed-case>RL</fixed-case>: A Joint Approach to Spatial Role Labeling KirkRoberts - SandaHarabagiu + SandaHarabagiu 419–424 S12-1056 roberts-harabagiu-2012-utd @@ -513,8 +513,8 @@ <fixed-case>MIXCD</fixed-case>: System Description for Evaluating <fixed-case>C</fixed-case>hinese Word Similarity at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2012 YingjieZhang BinLi - XinyuDai - JiajunChen + XinyuDai + JiajunChen 425–429 S12-1057 zhang-etal-2012-mixcd @@ -531,7 +531,7 @@ <fixed-case>UKP</fixed-case>: Computing Semantic Textual Similarity by Combining Multiple Content Similarity Measures DanielBär - ChrisBiemann + ChrisBiemann IrynaGurevych TorstenZesch 435–440 @@ -542,9 +542,9 @@ <fixed-case>T</fixed-case>ake<fixed-case>L</fixed-case>ab: Systems for Measuring Semantic Text Similarity FraneŠarić GoranGlavaš - Vanja MladenKaran + Vanja MladenKaran JanŠnajder - BojanaDalbelo Bašić + BojanaDalbelo Bašić 441–448 S12-1060 saric-etal-2012-takelab @@ -553,15 +553,15 @@ Soft Cardinality: A Parameterized Similarity Function for Text Comparison SergioJimenez ClaudiaBecerra - AlexanderGelbukh + AlexanderGelbukh 449–453 S12-1061 jimenez-etal-2012-soft <fixed-case>UNED</fixed-case>: Improving Text Similarity Measures without Human Assessments - EnriqueAmigó - JesúsGiménez + EnriqueAmigó + JesúsGiménez JulioGonzalo FelisaVerdejo 454–460 @@ -573,14 +573,14 @@ TravisGoodwin BryanRink KirkRoberts - SandaHarabagiu + SandaHarabagiu 461–466 S12-1063 goodwin-etal-2012-utdhlt <fixed-case>HDU</fixed-case>: Cross-lingual Textual Entailment with <fixed-case>SMT</fixed-case> Features - KatharinaWäschle + KatharinaWäschle SaschaFendrich 467–471 S12-1064 @@ -588,9 +588,9 @@ <fixed-case>UA</fixed-case>lacant: Using Online Machine Translation for Cross-Lingual Textual Entailment - MiquelEsplà-Gomis + MiquelEsplà-Gomis FelipeSánchez-Martínez - Mikel L.Forcada + Mikel L.Forcada 472–476 S12-1065 espla-gomis-etal-2012-ualacant @@ -639,10 +639,10 @@ <fixed-case>BUAP</fixed-case>: A First Approximation to Relational Similarity Measuring MireyaTovar J. AlejandroReyes - AzucenaMontes - DarnesVilariño - DavidPinto - SaulLeón + AzucenaMontes + DarnesVilariño + DavidPinto + SaulLeón 502–505 S12-1071 tovar-etal-2012-buap @@ -651,9 +651,9 @@ <fixed-case>Z</fixed-case>hou qiaoli: A divide-and-conquer strategy for semantic dependency parsing ZhouQiaoli ZhangLing - LiuFei - CaiDongfeng - ZhangGuiping + LiuFei + DongfengCai + GuipingZhang 506–513 S12-1072 zhou-etal-2012-zhou @@ -671,8 +671,8 @@ GuangchaoTang BinLi ShuaishuaiXu - XinyuDai - JiajunChen + XinyuDai + JiajunChen 519–523 S12-1074 tang-etal-2012-nju @@ -680,7 +680,7 @@ <fixed-case>P</fixed-case>oly<fixed-case>UCOMP</fixed-case>: Combining Semantic Vectors with Skip bigrams for Semantic Textual Similarity JianXu - QinLu + QinLu ZhengzhongLiu 524–528 S12-1075 @@ -696,7 +696,7 @@ <fixed-case>S</fixed-case>bdlrhmn: A Rule-based Human Interpretation System for Semantic Textual Similarity Task - SamirAbdelRahman + SamirAbdelRahman CatherineBlake 536–542 S12-1077 @@ -704,7 +704,7 @@ <fixed-case>LIMSI</fixed-case>: Learning Semantic Similarity by Selecting Random Word Subsets - ArtemSokolov + ArtemSokolov 543–546 S12-1078 sokolov-2012-limsi @@ -728,7 +728,7 @@ <fixed-case>DSS</fixed-case>: Text Similarity Using Lexical Alignments of Form, Distributional Semantics and Grammatical Relations - DianaMcCarthy + DianaMcCarthy SpandanaGella SivaReddy 557–564 @@ -750,8 +750,8 @@ <fixed-case>JU</fixed-case>_<fixed-case>CSE</fixed-case>_<fixed-case>NLP</fixed-case>: Multi-grade Classification of Semantic Similarity between Text Pairs SnehasisNeogi ParthaPakray - SivajiBandyopadhyay - AlexanderGelbukh + SivajiBandyopadhyay + AlexanderGelbukh 571–574 S12-1083 neogi-etal-2012-ju @@ -759,7 +759,7 @@ <fixed-case>T</fixed-case>iantianzhu7:System Description of Semantic Textual Similarity (<fixed-case>STS</fixed-case>) in the <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2012 (Task 6) ZhuTiantian - LanMan + ManLan 575–578 S12-1084 zhu-lan-2012-tiantianzhu7 @@ -785,7 +785,7 @@ <fixed-case>UNIBA</fixed-case>: Distributional Semantics for Textual Similarity AnnalinaCaputo PierpaoloBasile - GiovanniSemeraro + GiovanniSemeraro 591–596 S12-1087 caputo-etal-2012-uniba @@ -795,14 +795,14 @@ DaniloCroce PaoloAnnesi ValerioStorch - RobertoBasili + RobertoBasili 597–602 S12-1088 croce-etal-2012-unitor <fixed-case>S</fixed-case>aarland: Vector-based models of semantic textual similarity - GeorgianaDinu + GeorgianaDinu StefanThater 603–607 S12-1089 @@ -810,16 +810,16 @@ <fixed-case>UMCC</fixed-case>_<fixed-case>DLSI</fixed-case>: Multidimensional Lexical-Semantic Textual Similarity - AntonioFernández - YoanGutiérrez + AntonioFernández + YoanGutiérrez HéctorDávila AlexanderChávez AndyGonzález RainelEstrada YenierCastañeda - SoniaVázquez - AndrésMontoyo - RafaelMuñoz + SoniaVázquez + AndrésMontoyo + RafaelMuñoz 608–616 S12-1090 fernandez-etal-2012-umcc @@ -834,8 +834,8 @@ <fixed-case>FBK</fixed-case>: Machine Translation Evaluation and Word Similarity metrics for Semantic Textual Similarity - José GuilhermeCamargo de Souza - MatteoNegri + José GuilhermeCamargo de Souza + MatteoNegri YasharMehdad 624–630 S12-1092 @@ -844,10 +844,10 @@ <fixed-case>FCC</fixed-case>: Three Approaches for Semantic Textual Similarity MayaCarrillo - DarnesVilariño - DavidPinto + DarnesVilariño + DavidPinto MireyaTovar - SaulLeón + SaulLeón EstebanCastillo 631–634 S12-1093 @@ -858,7 +858,7 @@ CarmenBanea SamerHassan MichaelMohler - RadaMihalcea + RadaMihalcea 635–642 S12-1094 banea-etal-2012-unt @@ -896,14 +896,14 @@ janardhan: Semantic Textual Similarity using Universal Networking Language graph matching JanardhanSingh ArindamBhattacharya - PushpakBhattacharyya + PushpakBhattacharyya 662–666 S12-1098 singh-etal-2012-janardhan <fixed-case>SAGAN</fixed-case>: An approach to Semantic Textual Similarity based on Textual Entailment - JulioCastillo + JulioCastillo PaulaEstrella 667–672 S12-1099 @@ -921,8 +921,8 @@ <fixed-case>P</fixed-case>enn: Using Word Similarities to better Estimate Sentence Similarity SnehaJha - Hansen A.Schwartz - LyleUngar + Hansen A.Schwartz + LyleUngar 679–683 S12-1101 jha-etal-2012-penn @@ -931,7 +931,7 @@ Soft Cardinality + <fixed-case>ML</fixed-case>: Learning Adaptive Similarity Functions for Cross-lingual Textual Entailment SergioJimenez ClaudiaBecerra - AlexanderGelbukh + AlexanderGelbukh 684–688 S12-1102 jimenez-etal-2012-soft-cardinality @@ -940,8 +940,8 @@ <fixed-case>JU</fixed-case>_<fixed-case>CSE</fixed-case>_<fixed-case>NLP</fixed-case>: Language Independent Cross-lingual Textual Entailment System SnehasisNeogi ParthaPakray - SivajiBandyopadhyay - AlexanderGelbukh + SivajiBandyopadhyay + AlexanderGelbukh 689–695 S12-1103 neogi-etal-2012-ju-cse @@ -949,7 +949,7 @@ <fixed-case>CELI</fixed-case>: An Experiment with Cross Language Textual Entailment MilenKouylekov - LucaDini + LucaDini AlessioBosca MarcoTrevisan 696–700 @@ -959,18 +959,18 @@ <fixed-case>FBK</fixed-case>: Cross-Lingual Textual Entailment Without Translation YasharMehdad - MatteoNegri - José GuilhermeC. de Souza + MatteoNegri + José GuilhermeC. de Souza 701–705 S12-1105 mehdad-etal-2012-fbk <fixed-case>BUAP</fixed-case>: Lexical and Semantic Similarity for Cross-lingual Textual Entailment - DarnesVilariño - DavidPinto + DarnesVilariño + DavidPinto MireyaTovar - SaulLeón + SaulLeón EstebanCastillo 706–709 S12-1106 @@ -994,7 +994,7 @@ <fixed-case>SAGAN</fixed-case>: A Machine Translation Approach for Cross-Lingual Textual Entailment - JulioCastillo + JulioCastillo MarinaCardenas 721–726 S12-1109 diff --git a/data/xml/S13.xml b/data/xml/S13.xml index 233fea72b0..565a93a539 100644 --- a/data/xml/S13.xml +++ b/data/xml/S13.xml @@ -4,9 +4,9 @@ Second Joint Conference on Lexical and Computational Semantics (*SEM), Volume 1: Proceedings of the Main Conference and the Shared Task: Semantic Textual Similarity S13-1 - MonaDiab - TimBaldwin - MarcoBaroni + MonaDiab + TimBaldwin + MarcoBaroni Association for Computational Linguistics
Atlanta, Georgia, USA
June @@ -19,19 +19,19 @@ Towards a Formal Distributional Semantics: Simulating Logical Calculi with Tensors - EdwardGrefenstette + EdwardGrefenstette 1–10 S13-1001 grefenstette-2013-towards <fixed-case>M</fixed-case>ontague Meets <fixed-case>M</fixed-case>arkov: Deep Semantics with Probabilistic Logical Form - IslamBeltagy + IslamBeltagy CuongChau - GemmaBoleda + GemmaBoleda DanGarrette KatrinErk - RaymondMooney + RaymondMooney 11–21 S13-1002 beltagy-etal-2013-montague @@ -39,18 +39,18 @@ Coarse to Fine Grained Sense Disambiguation in <fixed-case>W</fixed-case>ikipedia HuiShen - RazvanBunescu - RadaMihalcea + RazvanBunescu + RadaMihalcea 22–31 S13-1003 shen-etal-2013-coarse *<fixed-case>SEM</fixed-case> 2013 shared task: Semantic Textual Similarity - EnekoAgirre + EnekoAgirre DanielCer MonaDiab - AitorGonzalez-Agirre + AitorGonzalez-Agirre WeiweiGuo 32–43 S13-1004 @@ -59,8 +59,8 @@ <fixed-case>UMBC</fixed-case>_<fixed-case>EBIQUITY</fixed-case>-<fixed-case>CORE</fixed-case>: Semantic Textual Similarity Systems LushanHan - AbhayL. Kashyap - TimFinin + AbhayL. Kashyap + TimFinin JamesMayfield JonathanWeese 44–52 @@ -80,7 +80,7 @@ <fixed-case>UNITOR</fixed-case>-<fixed-case>CORE</fixed-case>_<fixed-case>TYPED</fixed-case>: Combining Text Similarity and Semantic Filters through <fixed-case>SV</fixed-case> Regression DaniloCroce ValerioStorch - RobertoBasili + RobertoBasili 59–65 S13-1007 croce-etal-2013-unitor @@ -91,7 +91,7 @@ HansMoen LarsBungum GlebSizov - BjörnGambäck + BjörnGambäck AndréLynum 66–73 S13-1008 @@ -128,7 +128,7 @@ <fixed-case>P</fixed-case>oly<fixed-case>UCOMP</fixed-case>-<fixed-case>CORE</fixed-case>_<fixed-case>TYPED</fixed-case>: Computing Semantic Textual Similarity using Overlapped Senses JianXu - QinLu + QinLu 90–95 S13-1012 xu-lu-2013-polyucomp @@ -147,7 +147,7 @@ EliasIosif VassilikiProkopi AlexandrosPotamianos - ShrikanthNarayanan + ShrikanthNarayanan 103–108 S13-1014 malandrakis-etal-2013-deeppurple @@ -156,12 +156,12 @@ <fixed-case>UMCC</fixed-case>_<fixed-case>DLSI</fixed-case>: Textual Similarity based on Lexical-Semantic features AlexanderChávez HéctorDávila - YoanGutiérrez + YoanGutiérrez ArmandoCollazo - José I.Abreu - AntonioFernández Orquín - AndrésMontoyo - RafaelMuñoz + José I.Abreu + AntonioFernández Orquín + AndrésMontoyo + RafaelMuñoz 109–118 S13-1015 chavez-etal-2013-umcc @@ -169,7 +169,7 @@ <fixed-case>BUT</fixed-case>-<fixed-case>TYPED</fixed-case>: Using domain knowledge for computing typed similarity LubomirOtrusina - PavelSmrz + PavelSmrz 119–123 S13-1016 otrusina-smrz-2013-typed @@ -177,17 +177,17 @@ <fixed-case>ECNUCS</fixed-case>: Measuring Short Text Semantic Equivalence Using Multiple Similarity Measurements ZhuTiantian - ManLan + LanMan 124–131 S13-1017 zhu-man-2013-ecnucs <fixed-case>UBC</fixed-case>_<fixed-case>UOS</fixed-case>-<fixed-case>TYPED</fixed-case>: Regression for typed-similarity - EnekoAgirre + EnekoAgirre NikolaosAletras - AitorGonzalez-Agirre - GermanRigau + AitorGonzalez-Agirre + GermanRigau MarkStevenson 132–137 S13-1018 @@ -204,10 +204,10 @@ <fixed-case>UPC</fixed-case>-<fixed-case>CORE</fixed-case>: What Can Machine Translation Evaluation Metrics and <fixed-case>W</fixed-case>ikipedia Do for Estimating Semantic Textual Similarity? AlbertoBarrón-Cedeño - LluísMàrquez - MariaFuentes - HoracioRodríguez - JordiTurmo + LluísMàrquez + MariaFuentes + HoracioRodríguez + JordiTurmo 143–147 S13-1020 barron-cedeno-etal-2013-upc @@ -232,8 +232,8 @@ <fixed-case>LIPN</fixed-case>-<fixed-case>CORE</fixed-case>: Semantic Text Similarity using n-grams, <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et, Syntactic Analysis, <fixed-case>ESA</fixed-case> and Information Retrieval based Features DavideBuscaldi - JosephLe Roux - Jorge J.García Flores + JosephLe Roux + Jorge J.García Flores AdrianPopescu 162–168 S13-1023 @@ -243,14 +243,14 @@ <fixed-case>UNIBA</fixed-case>-<fixed-case>CORE</fixed-case>: Combining Strategies for Semantic Textual Similarity AnnalinaCaputo PierpaoloBasile - GiovanniSemeraro + GiovanniSemeraro 169–175 S13-1024 caputo-etal-2013-uniba <fixed-case>DLS</fixed-case>@<fixed-case>CU</fixed-case>-<fixed-case>CORE</fixed-case>: A Simple Machine Learning Model of Semantic Textual Similarity - Md.Sultan + Md.Sultan StevenBethard TamaraSumner 176–180 @@ -278,7 +278,7 @@ <fixed-case>SOFTCARDINALITY</fixed-case>-<fixed-case>CORE</fixed-case>: Improving Text Overlap with Distributional Measures for Semantic Textual Similarity SergioJimenez ClaudiaBecerra - AlexanderGelbukh + AlexanderGelbukh 194–201 S13-1028 jimenez-etal-2013-softcardinality @@ -306,7 +306,7 @@ <fixed-case>CFILT</fixed-case>-<fixed-case>CORE</fixed-case>: Semantic Textual Similarity using Universal Networking Language AvishekDan - PushpakBhattacharyya + PushpakBhattacharyya 216–220 S13-1031 dan-bhattacharyya-2013-cfilt @@ -319,9 +319,9 @@ SamerHassan MichaelMohler BishanYang - ClaireCardie - RadaMihalcea - JanWiebe + ClaireCardie + RadaMihalcea + JanWiebe 221–228 S13-1032 banea-etal-2013-cpn @@ -329,17 +329,17 @@ <fixed-case>INAOE</fixed-case>_<fixed-case>UPV</fixed-case>-<fixed-case>CORE</fixed-case>: Extracting Word Associations from Document Corpora to estimate Semantic Textual Similarity FernandoSánchez-Vega - ManuelMontes-y-Gómez + ManuelMontes-y-Gómez PaoloRosso - LuisVillaseñor-Pineda + LuisVillaseñor-Pineda 229–233 S13-1033 sanchez-vega-etal-2013-inaoe <fixed-case>CNGL</fixed-case>-<fixed-case>CORE</fixed-case>: Referential Translation Machines for Measuring Semantic Similarity - ErgunBiçici - Josefvan Genabith + ErgunBiçici + Josefvan Genabith 234–240 S13-1034 bicici-van-genabith-2013-cngl @@ -363,15 +363,15 @@ More Words and Bigger Pictures - DavidForsyth + DavidForsyth 254 S13-1037 forsyth-2013-words Exploring Vector Space Models to Predict the Compositionality of <fixed-case>G</fixed-case>erman Noun-Noun Compounds - SabineSchulte im Walde - StefanMüller + SabineSchulte im Walde + StefanMüller StefanRoller 255–265 S13-1038 @@ -403,14 +403,14 @@ Choosing the Right Words: Characterizing and Reducing Error of the Word Count Approach - Hansen AndrewSchwartz + Hansen AndrewSchwartz JohannesEichstaedt EduardoBlanco LukaszDziurzynski - Margaret L.Kern + Margaret L.Kern StephanieRamones MartinSeligman - LyleUngar + LyleUngar 296–305 S13-1042 schwartz-etal-2013-choosing @@ -433,7 +433,7 @@ Semantic Parsing <fixed-case>F</fixed-case>reebase: Towards Open-domain Semantic Parsing - QingqingCai + QingqingCai AlexanderYates 328–338 S13-1045 @@ -459,11 +459,11 @@ <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2013 Task 1: <fixed-case>T</fixed-case>emp<fixed-case>E</fixed-case>val-3: Evaluating Time Expressions, Events, and Temporal Relations NaushadUzZaman - HectorLlorens - LeonDerczynski - JamesAllen + HectorLlorens + LeonDerczynski + JamesAllen MarcVerhagen - JamesPustejovsky + JamesPustejovsky 1–9 S13-2001 uzzaman-etal-2013-semeval @@ -487,14 +487,14 @@ <fixed-case>ATT</fixed-case>1: Temporal Annotation Using Big Windows and Rich Syntactic and Semantic Features HyuckchulJung - AmandaStent + AmandaStent 20–24 S13-2004 jung-stent-2013-att1 <fixed-case>S</fixed-case>emeval-2013 Task 8: Cross-lingual Textual Entailment for Content Synchronization - MatteoNegri + MatteoNegri AlessandroMarchetti YasharMehdad LuisaBentivogli @@ -507,7 +507,7 @@ <fixed-case>SOFTCARDINALITY</fixed-case>: Learning to Identify Directional Cross-Lingual Entailment from Cardinalities and <fixed-case>SMT</fixed-case> SergioJimenez ClaudiaBecerra - AlexanderGelbukh + AlexanderGelbukh 34–38 S13-2006 jimenez-etal-2013-softcardinality-learning @@ -516,8 +516,8 @@ <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2013 Task 5: Evaluating Phrasal Semantics IoannisKorkontzelos TorstenZesch - Fabio MassimoZanzotto - ChrisBiemann + Fabio MassimoZanzotto + ChrisBiemann 39–47 S13-2007 korkontzelos-etal-2013-semeval @@ -533,7 +533,7 @@ <fixed-case>M</fixed-case>an<fixed-case>TIME</fixed-case>: Temporal expression identification and normalization in the <fixed-case>T</fixed-case>emp<fixed-case>E</fixed-case>val-3 challenge MicheleFilannino GavinBrown - GoranNenadic + GoranNenadic 53–57 S13-2009 filannino-etal-2013-mantime @@ -541,33 +541,33 @@ <fixed-case>FSS</fixed-case>-<fixed-case>T</fixed-case>im<fixed-case>E</fixed-case>x for <fixed-case>T</fixed-case>emp<fixed-case>E</fixed-case>val-3: Extracting Temporal Information from Text VanniZavarella - HristoTanev + HristoTanev 58–63 S13-2010 zavarella-tanev-2013-fss <fixed-case>JU</fixed-case>_<fixed-case>CSE</fixed-case>: A <fixed-case>CRF</fixed-case> Based Approach to Annotation of Temporal Expression, Event and Temporal Relations - Anup KumarKolya + Anup KumarKolya AmitavaKundu RajdeepGupta AsifEkbal - SivajiBandyopadhyay + SivajiBandyopadhyay 64–72 S13-2011 kolya-etal-2013-ju <fixed-case>N</fixed-case>avy<fixed-case>T</fixed-case>ime: Event and Time Ordering from Raw Text - NathanaelChambers + NathanaelChambers 73–77 S13-2012 chambers-2013-navytime <fixed-case>SUT</fixed-case>ime: Evaluation in <fixed-case>T</fixed-case>emp<fixed-case>E</fixed-case>val-3 - AngelChang - Christopher D.Manning + AngelChang + Christopher D.Manning 78–82 S13-2013 chang-manning-2013-sutime @@ -575,7 +575,7 @@ <fixed-case>KUL</fixed-case>: Data-driven Approach to Temporal Parsing of Newswire Articles OleksandrKolomiyets - Marie-FrancineMoens + Marie-FrancineMoens 83–87 S13-2014 kolomiyets-moens-2013-kul @@ -593,20 +593,20 @@ <fixed-case>UMCC</fixed-case>_<fixed-case>DLSI</fixed-case>-(<fixed-case>EPS</fixed-case>): Paraphrases Detection Based on Semantic Distance HéctorDávila - AntonioFernández Orquín + AntonioFernández Orquín AlexanderChávez - YoanGutiérrez + YoanGutiérrez ArmandoCollazo - José I.Abreu - AndrésMontoyo - RafaelMuñoz + José I.Abreu + AndrésMontoyo + RafaelMuñoz 93–97 S13-2016 davila-etal-2013-umcc <fixed-case>MELODI</fixed-case>: Semantic Similarity of Words and Compositional Phrases using Latent Vector Weighting - TimVan de Cruys + TimVan de Cruys StergosAfantenos PhilippeMuller 98–102 @@ -634,7 +634,7 @@ <fixed-case>UNAL</fixed-case>: Discriminating between Literal and Figurative Phrasal Usage Using Distributional Statistics and <fixed-case>POS</fixed-case> tags SergioJimenez ClaudiaBecerra - AlexanderGelbukh + AlexanderGelbukh 114–117 S13-2020 jimenez-etal-2013-unal @@ -642,19 +642,19 @@ <fixed-case>ECNUCS</fixed-case>: Recognizing Cross-lingual Textual Entailment Using Multiple Text Similarity and Text Difference Measures JiangZhao - ManLan - Zheng-YuNiu + ManLan + Zheng-YuNiu 118–123 S13-2021 zhao-etal-2013-ecnucs <fixed-case>BUAP</fixed-case>: N-gram based Feature Evaluation for the Cross-Lingual Textual Entailment Task - DarnesVilariño - DavidPinto - SaúlLeón + DarnesVilariño + DavidPinto + SaúlLeón YuridianaAlemán - HelenaGómez + HelenaGómez 124–127 S13-2022 vilarino-etal-2013-buap @@ -662,7 +662,7 @@ <fixed-case>ALTN</fixed-case>: Word Alignment Features for Cross-lingual Textual Entailment MarcoTurchi - MatteoNegri + MatteoNegri 128–132 S13-2023 turchi-negri-2013-altn @@ -671,7 +671,7 @@ <fixed-case>U</fixed-case>melb: Cross-lingual Textual Entailment with Word Alignment and String Similarity Features YvetteGraham BaharSalehi - TimothyBaldwin + TimothyBaldwin 133–137 S13-2024 graham-etal-2013-umelb @@ -680,9 +680,9 @@ <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2013 Task 4: Free Paraphrases of Noun Compounds IrisHendrickx ZornitsaKozareva - PreslavNakov + PreslavNakov DiarmuidÓ Séaghdha - StanSzpakowicz + StanSzpakowicz TonyVeale 138–143 S13-2025 @@ -690,7 +690,7 @@ <fixed-case>MELODI</fixed-case>: A Supervised Distributional Approach for Free Paraphrasing of Noun Compounds - TimVan de Cruys + TimVan de Cruys StergosAfantenos PhilippeMuller 144–147 @@ -717,7 +717,7 @@ <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2013 Task 10: Cross-lingual Word Sense Disambiguation ElsLefever - VéroniqueHoste + VéroniqueHoste 158–166 S13-2029 lefever-hoste-2013-semeval @@ -732,7 +732,7 @@ <fixed-case>HLTDI</fixed-case>: <fixed-case>CL</fixed-case>-<fixed-case>WSD</fixed-case> Using <fixed-case>M</fixed-case>arkov Random Fields for <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2013 Task 10 - AlexRudnick + AlexRudnick CanLiu MichaelGasser 171–177 @@ -749,7 +749,7 @@ <fixed-case>WSD</fixed-case>2: Parameter optimisation for Memory-based Cross-Lingual Word-Sense Disambiguation Maartenvan Gompel - Antalvan den Bosch + Antalvan den Bosch 183–187 S13-2033 van-gompel-van-den-bosch-2013-wsd2 @@ -798,7 +798,7 @@ unimelb: Topic Modelling-based Word Sense Induction for Web Snippet Clustering Jey HanLau PaulCook - TimothyBaldwin + TimothyBaldwin 217–221 S13-2039 lau-etal-2013-unimelb @@ -816,26 +816,26 @@ <fixed-case>GETALP</fixed-case> System : Propagation of a <fixed-case>L</fixed-case>esk Measure through an Ant Colony Algorithm DidierSchwab AndonTchechmedjiev - JérômeGoulian + JérômeGoulian MohammadNasiruddin - GillesSérasset - HervéBlanchon + GillesSérasset + HervéBlanchon 232–240 S13-2041 schwab-etal-2013-getalp <fixed-case>UMCC</fixed-case>_<fixed-case>DLSI</fixed-case>: Reinforcing a Ranking Algorithm with Sense Frequencies and Multidimensional Semantic Resources to solve Multilingual Word Sense Disambiguation - YoanGutiérrez + YoanGutiérrez YenierCastañeda AndyGonzález RainelEstrada Dennys D.Piug - Jose I.Abreu + Jose I.Abreu RogerPérez - AntonioFernández Orquín - AndrésMontoyo - RafaelMuñoz + AntonioFernández Orquín + AndrésMontoyo + RafaelMuñoz FrancCamara 241–249 S13-2042 @@ -853,7 +853,7 @@ <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2013 Task 3: Spatial Role Labeling OleksandrKolomiyets ParisaKordjamshidi - Marie-FrancineMoens + Marie-FrancineMoens StevenBethard 255–262 S13-2044 @@ -861,15 +861,15 @@ <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2013 Task 7: The Joint Student Response Analysis and 8th Recognizing Textual Entailment Challenge - MyroslavaDzikovska - RodneyNielsen + MyroslavaDzikovska + RodneyNielsen ChrisBrew ClaudiaLeacock DaniloGiampiccolo LuisaBentivogli PeterClark IdoDagan - Hoa TrangDang + Hoa TrangDang 263–274 S13-2045 dzikovska-etal-2013-semeval @@ -886,7 +886,7 @@ <fixed-case>SOFTCARDINALITY</fixed-case>: Hierarchical Text Overlap for Student Response Analysis SergioJimenez ClaudiaBecerra - AlexanderGelbukh + AlexanderGelbukh 280–284 S13-2047 jimenez-etal-2013-softcardinality-hierarchical @@ -904,7 +904,7 @@ <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2013 Task 13: Word Sense Induction for Graded and Non-Graded Senses DavidJurgens - IoannisKlapaftis + IoannisKlapaftis 290–299 S13-2049 jurgens-klapaftis-2013-semeval @@ -923,14 +923,14 @@ unimelb: Topic Modelling-based Word Sense Induction Jey HanLau PaulCook - TimothyBaldwin + TimothyBaldwin 307–311 S13-2051 lau-etal-2013-unimelb-topic <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2013 Task 2: Sentiment Analysis in <fixed-case>T</fixed-case>witter - PreslavNakov + PreslavNakov SaraRosenthal ZornitsaKozareva VeselinStoyanov @@ -942,7 +942,7 @@ <fixed-case>NRC</fixed-case>-<fixed-case>C</fixed-case>anada: Building the State-of-the-Art in Sentiment Analysis of Tweets - SaifMohammad + SaifMohammad SvetlanaKiritchenko XiaodanZhu 321–327 @@ -959,7 +959,7 @@ <fixed-case>AVAYA</fixed-case>: Sentiment Analysis on <fixed-case>T</fixed-case>witter with Self-Training and Polarity Lexicon Expansion - LeeBecker + LeeBecker GeorgeErhart DavidSkiba ValentineMatula @@ -969,7 +969,7 @@ <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2013 Task 9 : Extraction of Drug-Drug Interactions from Biomedical Texts (<fixed-case>DDIE</fixed-case>xtraction 2013) - IsabelSegura-Bedmar + IsabelSegura-Bedmar PalomaMartínez MaríaHerrero-Zazo 341–350 @@ -979,14 +979,14 @@ <fixed-case>FBK</fixed-case>-irst : A Multi-Phase Kernel Based Approach for Drug-Drug Interaction Detection and Classification that Exploits Linguistic Information Md. Faisal MahbubChowdhury - AlbertoLavelli + AlbertoLavelli 351–355 S13-2057 chowdhury-lavelli-2013-fbk <fixed-case>WBI</fixed-case>-<fixed-case>NER</fixed-case>: The impact of domain-specific features on the performance of identifying and classifying mentions of drugs - TimRocktäschel + TimRocktäschel TorstenHuber MichaelWeidlich UlfLeser @@ -1009,7 +1009,7 @@ GiuseppeCastellucci SimoneFilice DaniloCroce - RobertoBasili + RobertoBasili 369–374 S13-2060 castellucci-etal-2013-unitor @@ -1026,7 +1026,7 @@ u<fixed-case>O</fixed-case>ttawa: System description for <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val 2013 Task 2 Sentiment Analysis in <fixed-case>T</fixed-case>witter HamidPoursepanj JoshWeissbock - DianaInkpen + DianaInkpen 380–383 S13-2062 poursepanj-etal-2013-uottawa @@ -1045,7 +1045,7 @@ <fixed-case>USNA</fixed-case>: A Dual-Classifier Approach to Contextual Sentiment Analysis GaneshHarihara EugeneYang - NathanaelChambers + NathanaelChambers 390–394 S13-2064 harihara-etal-2013-usna @@ -1062,10 +1062,10 @@ <fixed-case>SINAI</fixed-case>: Machine Learning and Emotion of the Crowd for Sentiment Analysis in Microblogs - EugenioMartínez-Cámara + EugenioMartínez-Cámara ArturoMontejo-Ráez - M. TeresaMartín-Valdivia - L. AlfonsoUreña-López + M. TeresaMartín-Valdivia + L. AlfonsoUreña-López 402–407 S13-2066 martinez-camara-etal-2013-sinai @@ -1074,7 +1074,7 @@ <fixed-case>ECNUCS</fixed-case>: A Surface Information Based System Description of Sentiment Analysis in <fixed-case>T</fixed-case>witter in the <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2013 (Task 2) ZhuTiantian ZhangFangxi - ManLan + LanMan 408–413 S13-2067 zhu-etal-2013-ecnucs @@ -1089,7 +1089,7 @@ [<fixed-case>LVIC</fixed-case>-<fixed-case>LIMSI</fixed-case>]: Using Syntactic Features and Multi-polarity Words for Sentiment Analysis in <fixed-case>T</fixed-case>witter MorganeMarchand - AlexandruGinsca + AlexandruGinsca RomaricBesançon OlivierMesnard 418–424 @@ -1108,7 +1108,7 @@ <fixed-case>NTNU</fixed-case>: Domain Semi-Independent Short Message Sentiment Classification ØyvindSelmer MikaelBrevik - BjörnGambäck + BjörnGambäck LarsBungum 430–437 S13-2071 @@ -1119,21 +1119,21 @@ NikolaosMalandrakis AbeKazemzadeh AlexandrosPotamianos - ShrikanthNarayanan + ShrikanthNarayanan 438–442 S13-2072 malandrakis-etal-2013-sail <fixed-case>UMCC</fixed-case>_<fixed-case>DLSI</fixed-case>-(<fixed-case>SA</fixed-case>): Using a ranking algorithm and informal features to solve Sentiment Analysis in <fixed-case>T</fixed-case>witter - YoanGutiérrez + YoanGutiérrez AndyGonzález RogerPérez - José I.Abreu - AntonioFernández Orquín + José I.Abreu + AntonioFernández Orquín AlejandroMosquera - AndrésMontoyo - RafaelMuñoz + AndrésMontoyo + RafaelMuñoz FrancCamara 443–449 S13-2073 @@ -1149,15 +1149,15 @@ Experiments with <fixed-case>DB</fixed-case>pedia, <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et and <fixed-case>S</fixed-case>enti<fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et as resources for sentiment analysis in micro-blogging HussamHamdan - FredericBéchet - PatriceBellot + FredericBéchet + PatriceBellot 455–459 S13-2075 hamdan-etal-2013-experiments <fixed-case>OPTWIMA</fixed-case>: Comparing Knowledge-rich and Knowledge-poor Approaches for Sentiment Analysis in Short Informal Texts - AlexandraBalahur + AlexandraBalahur 460–465 S13-2076 balahur-2013-optwima @@ -1167,7 +1167,7 @@ Md. Faisal MahbubChowdhury MarcoGuerini SaraTonelli - AlbertoLavelli + AlbertoLavelli 466–470 S13-2077 chowdhury-etal-2013-fbk @@ -1178,7 +1178,7 @@ RahimDehkharghani BerrinYanikoglu DilekTapucu - YucelSaygin + YucelSaygin 471–477 S13-2078 gezici-etal-2013-su @@ -1186,20 +1186,20 @@ <fixed-case>C</fixed-case>olumbia <fixed-case>NLP</fixed-case>: Sentiment Detection of Subjective Phrases in Social Media SaraRosenthal - KathyMcKeown + KathyMcKeown 478–482 S13-2079 rosenthal-mckeown-2013-columbia <fixed-case>FBM</fixed-case>: Combining lexicon-based <fixed-case>ML</fixed-case> and heuristics for Social Media Polarities - CarlosRodríguez-Penagos - JordiAtserias Batalla - JoanCodina-Filbà + CarlosRodríguez-Penagos + JordiAtserias Batalla + JoanCodina-Filbà DavidGarcía-Narbona JensGrivolla PatrikLambert - RoserSaurí + RoserSaurí 483–489 S13-2080 rodriguez-penagos-etal-2013-fbm @@ -1209,8 +1209,8 @@ SilvioMoreira JoãoFilgueiras BrunoMartins - FranciscoCouto - Mário J.Silva + FranciscoCouto + Mário J.Silva 490–494 S13-2081 moreira-etal-2013-reaction @@ -1219,7 +1219,7 @@ <fixed-case>IITB</fixed-case>-Sentiment-Analysts: Participation in Sentiment Analysis in <fixed-case>T</fixed-case>witter <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val 2013 Task KaranChawla AnkitRamteke - PushpakBhattacharyya + PushpakBhattacharyya 495–500 S13-2082 chawla-etal-2013-iitb @@ -1228,8 +1228,8 @@ <fixed-case>SSA</fixed-case>-<fixed-case>UO</fixed-case>: Unsupervised Sentiment Analysis in <fixed-case>T</fixed-case>witter ReynierOrtega Bueno AdrianFonseca Bruzón - YoanGutiérrez - AndrésMontoyo + YoanGutiérrez + AndrésMontoyo 501–507 S13-2083 ortega-bueno-etal-2013-ssa @@ -1259,7 +1259,7 @@ <fixed-case>C</fixed-case>ode<fixed-case>X</fixed-case>: Combining an <fixed-case>SVM</fixed-case> Classifier and Character N-gram Language Models for Sentiment Analysis on <fixed-case>T</fixed-case>witter Text QiHan JunfeiGuo - HinrichSchuetze + HinrichSchuetze 520–524 S13-2086 han-etal-2013-codex @@ -1284,7 +1284,7 @@ <fixed-case>U</fixed-case>o<fixed-case>M</fixed-case>: Using Explicit Semantic Analysis for Classifying Sentiments SapnaNegi - MichaelRosner + MichaelRosner 535–538 S13-2089 negi-rosner-2013-uom @@ -1309,7 +1309,7 @@ <fixed-case>SZTE</fixed-case>-<fixed-case>NLP</fixed-case>: Sentiment Detection on <fixed-case>T</fixed-case>witter Messages ViktorHangya GáborBerend - RichárdFarkas + RichárdFarkas 549–553 S13-2092 hangya-etal-2013-szte @@ -1318,8 +1318,8 @@ <fixed-case>BOUNCE</fixed-case>: Sentiment Classification in <fixed-case>T</fixed-case>witter using Rich Feature Sets NadinKökciyan ArdaÇelebi - ArzucanÖzgür - SuzanÜsküdarlı + ArzucanÖzgür + SuzanÜsküdarlı 554–561 S13-2093 kokciyan-etal-2013-bounce @@ -1327,7 +1327,7 @@ nlp.cs.aueb.gr: Two Stage Sentiment Analysis ProdromosMalakasiotis - Rafael MichaelKarampatsis + Rafael MichaelKarampatsis KonstantinaMakrynioti JohnPavlopoulos 562–567 @@ -1336,7 +1336,7 @@ <fixed-case>NILC</fixed-case>_<fixed-case>USP</fixed-case>: A Hybrid System for Sentiment Analysis in <fixed-case>T</fixed-case>witter Messages - PedroBalage Filho + PedroBalage Filho ThiagoPardo 568–572 S13-2095 @@ -1346,7 +1346,7 @@ <fixed-case>UNITOR</fixed-case>-<fixed-case>HMM</fixed-case>-<fixed-case>TK</fixed-case>: Structured Kernel-based learning for Spatial Role Labeling EmanueleBastianelli DaniloCroce - RobertoBasili + RobertoBasili DanieleNardi 573–579 S13-2096 @@ -1355,16 +1355,16 @@ <fixed-case>EHU</fixed-case>-<fixed-case>ALM</fixed-case>: Similarity-Feature Based Approach for Student Response Analysis ItziarAldabe - MontseMaritxalar - OierLopez de Lacalle + MontseMaritxalar + OierLopez de Lacalle 580–584 S13-2097 aldabe-etal-2013-ehu <fixed-case>CNGL</fixed-case>: Grading Student Answers by Acts of Translation - ErgunBiçici - Josefvan Genabith + ErgunBiçici + Josefvan Genabith 585–591 S13-2098 bicici-van-genabith-2013-cngl-grading @@ -1372,7 +1372,7 @@ <fixed-case>C</fixed-case>eli: <fixed-case>EDITS</fixed-case> and Generic Text Pair Classification MilenKouylekov - LucaDini + LucaDini AlessioBosca MarcoTrevisan 592–597 @@ -1401,21 +1401,21 @@ NielsOtt RamonZiai MichaelHahn - DetmarMeurers + DetmarMeurers 608–616 S13-2102 ott-etal-2013-comet <fixed-case>UC</fixed-case>3<fixed-case>M</fixed-case>: A kernel-based approach to identify and classify <fixed-case>DDI</fixed-case>s in bio-medical texts. - DanielSanchez-Cisneros + DanielSanchez-Cisneros 617–621 S13-2103 sanchez-cisneros-2013-uc3m <fixed-case>UEM</fixed-case>-<fixed-case>UC</fixed-case>3<fixed-case>M</fixed-case>: An Ontology-based named entity recognition system for biomedical texts. - DanielSanchez-Cisneros + DanielSanchez-Cisneros FernandoAparicio Gali 622–627 S13-2104 @@ -1425,7 +1425,7 @@ <fixed-case>WBI</fixed-case>-<fixed-case>DDI</fixed-case>: Drug-Drug Interaction Extraction using Majority Voting PhilippeThomas MarianaNeves - TimRocktäschel + TimRocktäschel UlfLeser 628–635 S13-2105 @@ -1436,12 +1436,12 @@ ArmandoCollazo AlbertoCeballo Dennys D.Puig - YoanGutiérrez - José I.Abreu + YoanGutiérrez + José I.Abreu RogerPérez - AntonioFernández Orquín - AndrésMontoyo - RafaelMuñoz + AntonioFernández Orquín + AndrésMontoyo + RafaelMuñoz FrancCamara 636–643 S13-2106 @@ -1468,7 +1468,7 @@ <fixed-case>LASIGE</fixed-case>: using Conditional Random Fields and <fixed-case>C</fixed-case>h<fixed-case>EBI</fixed-case> ontology TiagoGrego FranciscoPinto - Francisco M.Couto + Francisco M.Couto 660–666 S13-2109 grego-etal-2013-lasige @@ -1476,7 +1476,7 @@ <fixed-case>UWM</fixed-case>-<fixed-case>TRIADS</fixed-case>: Classifying Drug-Drug Interactions with Two-Stage <fixed-case>SVM</fixed-case> and Post-Processing MajidRastegar-Mojarad - Richard D.Boyce + Richard D.Boyce RashmiPrasad 667–674 S13-2110 @@ -1484,18 +1484,18 @@ <fixed-case>SCAI</fixed-case>: Extracting drug-drug interactions using a rich feature vector - TamaraBobić + TamaraBobić JulianeFluck - MartinHofmann-Apitius + MartinHofmann-Apitius 675–683 S13-2111 bobic-etal-2013-scai <fixed-case>UC</fixed-case>olorado_<fixed-case>SOM</fixed-case>: Extraction of Drug-Drug Interactions from Biomedical Text using Knowledge-rich and Knowledge-poor Features - NegacyHailu - Lawrence E.Hunter - K. BretonnelCohen + NegacyHailu + Lawrence E.Hunter + K. BretonnelCohen 684–688 S13-2112 hailu-etal-2013-ucolorado diff --git a/data/xml/S14.xml b/data/xml/S14.xml index d2a50d657e..8273c26364 100644 --- a/data/xml/S14.xml +++ b/data/xml/S14.xml @@ -20,11 +20,11 @@ More or less supervised supersense tagging of <fixed-case>T</fixed-case>witter - AndersJohannsen + AndersJohannsen DirkHovy - HéctorMartínez Alonso - BarbaraPlank - AndersSøgaard + HéctorMartínez Alonso + BarbaraPlank + AndersSøgaard 1–11 S14-1001 10.3115/v1/S14-1001 @@ -34,7 +34,7 @@ Generating a Word-Emotion Lexicon from #Emotional Tweets AnilBandhakavi NirmalieWiratunga - DeepakP + DeepakP StewartMassie 12–21 S14-1002 @@ -101,7 +101,7 @@ An analysis of textual inference in <fixed-case>G</fixed-case>erman customer emails KathrinEichler AleksandraGabryszak - GünterNeumann + GünterNeumann 69–74 S14-1009 10.3115/v1/S14-1009 @@ -140,9 +140,9 @@ Compositional Distributional Semantics Models in Chunk-based Smoothed Tree Kernels - Nghia ThePham + Nghia ThePham LorenzoFerrone - Fabio MassimoZanzotto + Fabio MassimoZanzotto 93–98 S14-1013 10.3115/v1/S14-1013 @@ -150,7 +150,7 @@ Generating Simulations of Motion Events from Verbal Descriptions - JamesPustejovsky + JamesPustejovsky NikhilKrishnaswamy 99–109 S14-1014 @@ -162,7 +162,7 @@ MarkYatskar MichelGalley LucyVanderwende - LukeZettlemoyer + LukeZettlemoyer 110–120 S14-1015 10.3115/v1/S14-1015 @@ -201,7 +201,7 @@ Vagueness and Learning: A Type-Theoretic Approach - RaquelFernández + RaquelFernández StaffanLarsson 151–159 S14-1019 @@ -212,7 +212,7 @@ Contrasting Syntagmatic and Paradigmatic Relations: Insights from Distributional Semantic Models GabriellaLapesa StefanEvert - SabineSchulte im Walde + SabineSchulte im Walde 160–170 S14-1020 10.3115/v1/S14-1020 @@ -220,8 +220,8 @@ Dead parrots make bad pets: Exploring modifier effects in noun phrases - GermánKruszewski - MarcoBaroni + GermánKruszewski + MarcoBaroni 171–181 S14-1021 10.3115/v1/S14-1021 @@ -230,7 +230,7 @@ Syntactic Transfer Patterns of <fixed-case>G</fixed-case>erman Particle Verbs and their Impact on Lexical Semantics StefanBott - SabineSchulte im Walde + SabineSchulte im Walde 182–192 S14-1022 10.3115/v1/S14-1022 @@ -241,7 +241,7 @@ Proceedings of the 8th International Workshop on Semantic Evaluation (SemEval 2014) S14-2 - PreslavNakov + PreslavNakov TorstenZesch 10.3115/v1/S14-2 Association for Computational Linguistics @@ -258,8 +258,8 @@ <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2014 Task 1: Evaluation of Compositional Distributional Semantic Models on Full Sentences through Semantic Relatedness and Textual Entailment MarcoMarelli LuisaBentivogli - MarcoBaroni - RaffaellaBernardi + MarcoBaroni + RaffaellaBernardi StefanoMenini RobertoZamparelli 1–8 @@ -291,9 +291,9 @@ <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2014 Task 4: Aspect Based Sentiment Analysis MariaPontiki - DimitrisGalanis + DimitrisGalanis JohnPavlopoulos - HarrisPapageorgiou + HarrisPapageorgiou IonAndroutsopoulos SureshManandhar 27–35 @@ -305,9 +305,9 @@ <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val 2014 Task 5 - <fixed-case>L</fixed-case>2 Writing Assistant Maartenvan Gompel IrisHendrickx - Antalvan den Bosch + Antalvan den Bosch ElsLefever - VéroniqueHoste + VéroniqueHoste 36–44 S14-2005 10.3115/v1/S14-2005 @@ -323,9 +323,9 @@ <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2014 Task 7: Analysis of Clinical Text - SameerPradhan - NoémieElhadad - WendyChapman + SameerPradhan + NoémieElhadad + WendyChapman SureshManandhar GuerganaSavova 54–62 @@ -338,9 +338,9 @@ StephanOepen MarcoKuhlmann YusukeMiyao - DanielZeman - DanFlickinger - JanHajič + DanielZeman + DanFlickinger + JanHajič AngelinaIvanova YiZhang 63–72 @@ -361,16 +361,16 @@ <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2014 Task 10: Multilingual Semantic Textual Similarity - EnekoAgirre + EnekoAgirre CarmenBanea - ClaireCardie + ClaireCardie DanielCer - MonaDiab - AitorGonzalez-Agirre + MonaDiab + AitorGonzalez-Agirre WeiweiGuo - RadaMihalcea - GermanRigau - JanyceWiebe + RadaMihalcea + GermanRigau + JanyceWiebe 81–91 S14-2010 10.3115/v1/S14-2010 @@ -387,8 +387,8 @@ <fixed-case>A</fixed-case>lpage: Transition-based Semantic Graph Parsing with Syntactic Features CorentinRibeyre - EricVillemonte de la Clergerie - DjaméSeddah + EricVillemonte de la Clergerie + DjaméSeddah 97–103 S14-2012 10.3115/v1/S14-2012 @@ -410,7 +410,7 @@ SvetlanaStoyanchev HyuckchulJung JohnChen - SrinivasBangalore + SrinivasBangalore 109–113 S14-2014 10.3115/v1/S14-2014 @@ -418,7 +418,7 @@ <fixed-case>AUEB</fixed-case>: Two Stage Sentiment Analysis of Social Network Messages - Rafael MichaelKarampatsis + Rafael MichaelKarampatsis JohnPavlopoulos ProdromosMalakasiotis 114–118 @@ -428,8 +428,8 @@ Bielefeld <fixed-case>SC</fixed-case>: Orthonormal Topic Modelling for Grammar Induction - John PhilipMcCrae - PhilippCimiano + John PhilipMcCrae + PhilippCimiano 119–122 S14-2016 10.3115/v1/S14-2016 @@ -459,7 +459,7 @@ <fixed-case>B</fixed-case>ioinformatics<fixed-case>UA</fixed-case>: Concept Recognition in Clinical Narratives Using a Modular and Highly Efficient Text Processing Framework SérgioMatos TiagoNunes - José LuísOliveira + José LuísOliveira 135–139 S14-2019 10.3115/v1/S14-2019 @@ -476,9 +476,9 @@ <fixed-case>BUAP</fixed-case>: Evaluating Compositional Distributional Semantic Models on Full Sentences through Semantic Relatedness and Textual Entailment - SaúlLeón - DarnesVilariño - DavidPinto + SaúlLeón + DarnesVilariño + DavidPinto MireyaTovar BeatrizBeltrán 145–148 @@ -488,9 +488,9 @@ <fixed-case>BUAP</fixed-case>: Evaluating Features for Multilingual and Cross-Level Semantic Textual Similarity - DarnesVilariño - DavidPinto - SaúlLeón + DarnesVilariño + DavidPinto + SaúlLeón MireyaTovar BeatrizBeltrán 149–153 @@ -500,9 +500,9 @@ <fixed-case>BUAP</fixed-case>: Polarity Classification of Short Texts - DavidPinto - DarnesVilariño - SaulLeón + DavidPinto + DarnesVilariño + SaulLeón MiguelJasso CupertinoLucero 154–159 @@ -523,7 +523,7 @@ JoãoLeal SaraPinto AnaBento - HugoGonçalo Oliveira + HugoGonçalo Oliveira PauloGomes 166–170 S14-2025 @@ -548,8 +548,8 @@ JesseDodge SwabhaSwayamdipta NathanSchneider - ChrisDyer - Noah A.Smith + ChrisDyer + Noah A.Smith 176–180 S14-2027 10.3115/v1/S14-2027 @@ -581,7 +581,7 @@ <fixed-case>CNRC</fixed-case>-<fixed-case>TMT</fixed-case>: Second Language Writing Assistant System Description - CyrilGoutte + CyrilGoutte MichelSimard MarineCarpuat 192–197 @@ -592,7 +592,7 @@ <fixed-case>C</fixed-case>olumbia <fixed-case>NLP</fixed-case>: Sentiment Detection of Sentences and Subjective Phrases in Social Media SaraRosenthal - KathyMcKeown + KathyMcKeown ApoorvAgarwal 198–202 S14-2031 @@ -624,12 +624,12 @@ Copenhagen-Malmö: Tree Approximations of Semantic Parsing Problems NatalieSchluter - AndersSøgaard + AndersSøgaard JakobElming DirkHovy - BarbaraPlank - HéctorMartínez Alonso - AndersJohanssen + BarbaraPlank + HéctorMartínez Alonso + AndersJohanssen SigridKlerke 213–217 S14-2034 @@ -650,7 +650,7 @@ <fixed-case>DCU</fixed-case>: Aspect-based Polarity Classification for <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val Task 4 JoachimWagner PiyushArora - SantiagoCortes + SantiagoCortes UtsabBarman DashaBogdanova JenniferFoster @@ -663,7 +663,7 @@ <fixed-case>DIT</fixed-case>: Summarisation and Semantic Expansion in Evaluating Semantic Similarity MagdalenaKacmajor - John D.Kelleher + John D.Kelleher 230–234 S14-2037 10.3115/v1/S14-2037 @@ -680,7 +680,7 @@ <fixed-case>DLS</fixed-case>@<fixed-case>CU</fixed-case>: Sentence Similarity from Word Alignment - Md ArafatSultan + Md ArafatSultan StevenBethard TamaraSumner 241–246 @@ -700,7 +700,7 @@ <fixed-case>ECNU</fixed-case>: A Combination Method and Multiple Features for Aspect Extraction and Sentiment Polarity Classification FangxiZhang ZhihuaZhang - ManLan + ManLan 252–258 S14-2041 10.3115/v1/S14-2041 @@ -709,7 +709,7 @@ <fixed-case>ECNU</fixed-case>: Expression- and Message-level Sentiment Orientation Classification in <fixed-case>T</fixed-case>witter Using Multiple Effective Features JiangZhao - ManLan + ManLan TiantianZhu 259–264 S14-2042 @@ -719,7 +719,7 @@ <fixed-case>ECNU</fixed-case>: Leveraging on Ensemble of Heterogeneous Features and Information Enrichment for Cross Level Semantic Similarity Estimation TiantianZhu - ManLan + ManLan 265–270 S14-2043 10.3115/v1/S14-2043 @@ -729,7 +729,7 @@ <fixed-case>ECNU</fixed-case>: One Stone Two Birds: Ensemble of Heterogenous Measures for Semantic Relatedness and Textual Entailment JiangZhao TiantianZhu - ManLan + ManLan 271–277 S14-2044 10.3115/v1/S14-2044 @@ -771,9 +771,9 @@ <fixed-case>GPLSI</fixed-case>: Supervised Sentiment Analysis in <fixed-case>T</fixed-case>witter using Skipgrams JaviFernández - YoanGutiérrez - Jose ManuelGómez - PatricioMartínez-Barco + YoanGutiérrez + Jose ManuelGómez + PatricioMartínez-Barco 294–299 S14-2048 10.3115/v1/S14-2048 @@ -782,7 +782,7 @@ ha<fixed-case>LF</fixed-case>: Comparing a Pure <fixed-case>CDSM</fixed-case> Approach with a Standard Machine Learning System for <fixed-case>RTE</fixed-case> LorenzoFerrone - Fabio MassimoZanzotto + Fabio MassimoZanzotto 300–304 S14-2049 10.3115/v1/S14-2049 @@ -790,10 +790,10 @@ <fixed-case>H</fixed-case>ul<fixed-case>T</fixed-case>ech: A General Purpose System for Cross-Level Semantic Similarity based on Anchor Web Counts - Jose G.Moreno + Jose G.Moreno RumenMoraliyski AsmaBerrezoug - GaëlDias + GaëlDias 305–308 S14-2050 10.3115/v1/S14-2050 @@ -809,7 +809,7 @@ <fixed-case>IITP</fixed-case>: A Supervised Approach for Disorder Mention Detection and Disambiguation - Utpal KumarSikdar + Utpal KumarSikdar AsifEkbal SriparnaSaha 314–318 @@ -819,7 +819,7 @@ <fixed-case>IITP</fixed-case>: Supervised Machine Learning for Aspect based Sentiment Analysis - Deepak KumarGupta + Deepak KumarGupta AsifEkbal 319–323 S14-2053 @@ -848,7 +848,7 @@ In-House: An Ensemble of Pre-Existing Off-the-Shelf Parsers YusukeMiyao StephanOepen - DanielZeman + DanielZeman 335–340 S14-2056 10.3115/v1/S14-2056 @@ -857,7 +857,7 @@ <fixed-case>I</fixed-case>ndian Institute of Technology-Patna: Sentiment Analysis in <fixed-case>T</fixed-case>witter VikramSingh - Arif Md.Khan + Arif Md.Khan AsifEkbal 341–345 S14-2057 @@ -897,11 +897,11 @@ <fixed-case>IUCL</fixed-case>: Combining Information Sources for <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val Task 5 - AlexRudnick + AlexRudnick LeviKing CanLiu MarkusDickinson - SandraKübler + SandraKübler 356–360 S14-2060 10.3115/v1/S14-2060 @@ -909,10 +909,10 @@ <fixed-case>I</fixed-case>xa<fixed-case>M</fixed-case>ed: Applying Freeling and a Perceptron Sequential Tagger at the Shared Task on Analyzing Clinical Texts - KoldoGojenola - MaiteOronoz + KoldoGojenola + MaiteOronoz AliciaPérez - ArantzaCasillas + ArantzaCasillas 361–365 S14-2061 10.3115/v1/S14-2061 @@ -931,9 +931,9 @@ <fixed-case>JU</fixed-case>_<fixed-case>CSE</fixed-case>: A Conditional Random Field (<fixed-case>CRF</fixed-case>) Based Approach to Aspect Based Sentiment Analysis Braja GopalPatra - SoumikMandal + SoumikMandal DipankarDas - SivajiBandyopadhyay + SivajiBandyopadhyay 370–374 S14-2063 10.3115/v1/S14-2063 @@ -943,7 +943,7 @@ <fixed-case>JU</fixed-case>-Evora: A Graph Based Cross-Level Semantic Similarity Analysis using Discourse Information SwarnenduGhosh NibaranDas - TeresaGonçalves + TeresaGonçalves PauloQuaresma 375–379 S14-2064 @@ -988,10 +988,10 @@ <fixed-case>LIPN</fixed-case>: Introducing a new Geographical Context Similarity Measure and a Statistical Similarity Measure based on the Bhattacharyya coefficient DavideBuscaldi - JorgeGarcía Flores - JosephLe Roux + JorgeGarcía Flores + JosephLe Roux NadiTomeh - BelémPriego Sanchez + BelémPriego Sanchez 400–405 S14-2069 10.3115/v1/S14-2069 @@ -1001,9 +1001,9 @@ <fixed-case>LT</fixed-case>3: Sentiment Classification in User-Generated Content Using a Rich Feature Set CynthiaVan Hee MarjanVan de Kauter - OrphéeDe Clercq + OrphéeDe Clercq ElsLefever - VéroniqueHoste + VéroniqueHoste 406–410 S14-2070 10.3115/v1/S14-2070 @@ -1013,9 +1013,9 @@ <fixed-case>L</fixed-case>y<fixed-case>S</fixed-case>: Porting a <fixed-case>T</fixed-case>witter Sentiment Analysis Approach from <fixed-case>S</fixed-case>panish to <fixed-case>E</fixed-case>nglish DavidVilares MiguelHermo - Miguel A.Alonso + Miguel A.Alonso CarlosGómez-Rodríguez - YeraiDoval + YeraiDoval 411–415 S14-2071 10.3115/v1/S14-2071 @@ -1023,13 +1023,13 @@ Meerkat Mafia: Multilingual and Cross-Level Semantic Textual Similarity Systems - AbhayKashyap + AbhayKashyap LushanHan RobertoYus JenniferSleeman TaneeyaSatyapanich SunilGandhi - TimFinin + TimFinin 416–423 S14-2072 10.3115/v1/S14-2072 @@ -1039,7 +1039,7 @@ <fixed-case>M</fixed-case>ind<fixed-case>L</fixed-case>ab-<fixed-case>UNAL</fixed-case>: Comparing Metamap and <fixed-case>T</fixed-case>-mapper for Medical Concept Extraction in <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val 2014 Task 7 AlejandroRiveros MariaDe-Arteaga - FabioGonzález + FabioGonzález SergioJimenez HenningMüller 424–427 @@ -1049,10 +1049,10 @@ <fixed-case>NILC</fixed-case>_<fixed-case>USP</fixed-case>: An Improved Hybrid System for Sentiment Analysis in <fixed-case>T</fixed-case>witter Messages - PedroBalage Filho + PedroBalage Filho LucasAvanço ThiagoPardo - Maria das GraçasVolpe Nunes + Maria das GraçasVolpe Nunes 428–432 S14-2074 10.3115/v1/S14-2074 @@ -1060,7 +1060,7 @@ <fixed-case>NILC</fixed-case>_<fixed-case>USP</fixed-case>: Aspect Extraction using Semantic Labels - PedroBalage Filho + PedroBalage Filho ThiagoPardo 433–436 S14-2075 @@ -1072,7 +1072,7 @@ SvetlanaKiritchenko XiaodanZhu ColinCherry - SaifMohammad + SaifMohammad 437–442 S14-2076 10.3115/v1/S14-2076 @@ -1082,7 +1082,7 @@ <fixed-case>NRC</fixed-case>-<fixed-case>C</fixed-case>anada-2014: Recent Improvements in the Sentiment Analysis of Tweets XiaodanZhu SvetlanaKiritchenko - SaifMohammad + SaifMohammad 443–447 S14-2077 10.3115/v1/S14-2077 @@ -1092,7 +1092,7 @@ <fixed-case>NTNU</fixed-case>: Measuring Semantic Similarity with Sublexical Feature Representations and Soft Cardinality AndréLynum ParthaPakray - BjörnGambäck + BjörnGambäck SergioJimenez 448–453 S14-2078 @@ -1101,7 +1101,7 @@ <fixed-case>OPI</fixed-case>: <fixed-case>S</fixed-case>emeval-2014 Task 3 System Description - MarekKozlowski + MarekKozlowski 454–458 S14-2079 10.3115/v1/S14-2079 @@ -1111,7 +1111,7 @@ <fixed-case>P</fixed-case>eking: Profiling Syntactic Tree Parsing Techniques for Semantic Graph Parsing YantaoDu FanZhang - WeiweiSun + WeiweiSun XiaojunWan 459–464 S14-2080 @@ -1120,7 +1120,7 @@ <fixed-case>P</fixed-case>otsdam: Semantic Dependency Parsing by Bidirectional Graph-Tree Transformations and Syntactic Parsing - ŽeljkoAgić + ŽeljkoAgić AlexanderKoller 465–470 S14-2081 @@ -1129,7 +1129,7 @@ <fixed-case>P</fixed-case>riberam: A Turbo Semantic Parser with Second Order Features - André F. T.Martins + André F. T.Martins Mariana S. C.Almeida 471–476 S14-2082 @@ -1138,8 +1138,8 @@ <fixed-case>R</fixed-case>el<fixed-case>A</fixed-case>gent: Entity Detection and Normalization for Diseases in Clinical Records: a Linguistically Driven Approach - SvRamanan - SenthilNathan + SvRamanan + SenthilNathan 477–481 S14-2083 10.3115/v1/S14-2083 @@ -1156,7 +1156,7 @@ <fixed-case>RTM</fixed-case>-<fixed-case>DCU</fixed-case>: Referential Translation Machines for Semantic Similarity - ErgunBiçici + ErgunBiçici AndyWay 487–496 S14-2085 @@ -1186,9 +1186,9 @@ <fixed-case>SAIL</fixed-case>-<fixed-case>GRS</fixed-case>: Grammar Induction for Spoken Dialogue Systems using <fixed-case>CF</fixed-case>-<fixed-case>IRF</fixed-case> Rule Similarity - KalliopiZervanou + KalliopiZervanou NikolaosMalandrakis - ShrikanthNarayanan + ShrikanthNarayanan 508–511 S14-2088 10.3115/v1/S14-2088 @@ -1201,7 +1201,7 @@ ColinVaz Jesse JamesBisogni AlexandrosPotamianos - ShrikanthNarayanan + ShrikanthNarayanan 512–516 S14-2089 10.3115/v1/S14-2089 @@ -1212,7 +1212,7 @@ NaveenNandan DanielDahlmeier AkritiVij - NishthaMalhotra + NishthaMalhotra 517–521 S14-2090 10.3115/v1/S14-2090 @@ -1221,7 +1221,7 @@ <fixed-case>SAP</fixed-case>-<fixed-case>RI</fixed-case>: <fixed-case>T</fixed-case>witter Sentiment Analysis in Two Days AkritiVij - NishtaMalhotra + NishtaMalhotra NaveenNandan DanielDahlmeier 522–526 @@ -1232,7 +1232,7 @@ <fixed-case>S</fixed-case>eem<fixed-case>G</fixed-case>o: Conditional Random Fields Labeling and Maximum Entropy Classification for Aspect Based Sentiment Analysis PengfeiLiu - HelenMeng + HelenMeng 527–531 S14-2092 10.3115/v1/S14-2092 @@ -1253,7 +1253,7 @@ <fixed-case>S</fixed-case>ensible: <fixed-case>L</fixed-case>2 Translation Assistance by Emulating the Manual Post-Editing Process LilingTan Anne-KathrinSchumann - Jose M.M.Martinez + Jose M.M.Martinez FrancisBond 541–545 S14-2094 @@ -1281,7 +1281,7 @@ <fixed-case>S</fixed-case>hrd<fixed-case>L</fixed-case>ite: Semantic Parsing Using a Handmade Grammar - PeterLjunglöf + PeterLjunglöf 556–559 S14-2097 10.3115/v1/S14-2097 @@ -1291,9 +1291,9 @@ <fixed-case>S</fixed-case>im<fixed-case>C</fixed-case>ompass: Using Deep Learning Word Embeddings to Assess Cross-level Similarity CarmenBanea DiChen - RadaMihalcea - ClaireCardie - JanyceWiebe + RadaMihalcea + ClaireCardie + JanyceWiebe 560–565 S14-2098 10.3115/v1/S14-2098 @@ -1301,10 +1301,10 @@ <fixed-case>SINAI</fixed-case>: Voting System for Aspect Based Sentiment Analysis - Salud MaríaJiménez-Zafra - EugenioMartínez-Cámara - MaiteMartin - L. AlfonsoUreña-López + Salud MaríaJiménez-Zafra + EugenioMartínez-Cámara + MaiteMartin + L. AlfonsoUreña-López 566–571 S14-2099 10.3115/v1/S14-2099 @@ -1312,10 +1312,10 @@ <fixed-case>SINAI</fixed-case>: Voting System for <fixed-case>T</fixed-case>witter Sentiment Analysis - EugenioMartínez-Cámara - Salud MaríaJiménez-Zafra - MaiteMartin - L. AlfonsoUreña-López + EugenioMartínez-Cámara + Salud MaríaJiménez-Zafra + MaiteMartin + L. AlfonsoUreña-López 572–577 S14-2100 10.3115/v1/S14-2100 @@ -1342,7 +1342,7 @@ <fixed-case>SSMT</fixed-case>:A Machine Translation Evaluation View To Paragraph-to-Sentence Semantic Similarity PingpingHuang - BaobaoChang + BaobaoChang 585–589 S14-2102 10.3115/v1/S14-2102 @@ -1367,8 +1367,8 @@ Supervised Methods for Aspect-Based Sentiment Analysis HussamHamdan - PatriceBellot - FredericBéchet + PatriceBellot + FredericBéchet 596–600 S14-2104 10.3115/v1/S14-2104 @@ -1386,7 +1386,7 @@ Synalp-Empathic: A Valence Shifting Hybrid System for Sentiment Analysis - AlexandreDenis + AlexandreDenis SamuelCruz-Lara NadiaBellalem LotfiBellalem @@ -1399,8 +1399,8 @@ <fixed-case>SZTE</fixed-case>-<fixed-case>NLP</fixed-case>: Aspect level opinion mining exploiting syntactic cues ViktorHangya GáborBerend - IstvánVarga - RichárdFarkas + IstvánVarga + RichárdFarkas 610–614 S14-2107 10.3115/v1/S14-2107 @@ -1409,7 +1409,7 @@ <fixed-case>SZTE</fixed-case>-<fixed-case>NLP</fixed-case>: Clinical Text Analysis with Named Entity Recognition MelindaKatona - RichárdFarkas + RichárdFarkas 615–618 S14-2108 10.3115/v1/S14-2108 @@ -1417,9 +1417,9 @@ <fixed-case>TCDSCSS</fixed-case>: Dimensionality Reduction to Evaluate Texts of Varying Lengths - an <fixed-case>IR</fixed-case> Approach - Arun KumarJayapal + Arun KumarJayapal MartinEmms - JohnKelleher + JohnKelleher 619–623 S14-2109 10.3115/v1/S14-2109 @@ -1455,8 +1455,8 @@ The Impact of Z_score on <fixed-case>T</fixed-case>witter Sentiment Analysis HussamHamdan - PatriceBellot - FredericBéchet + PatriceBellot + FredericBéchet 636–641 S14-2113 10.3115/v1/S14-2113 @@ -1475,7 +1475,7 @@ Think Positive: Towards <fixed-case>T</fixed-case>witter Sentiment Analysis from Scratch - Cícerodos Santos + Cícerodos Santos 647–651 S14-2115 10.3115/v1/S14-2115 @@ -1483,7 +1483,7 @@ <fixed-case>T</fixed-case>hink<fixed-case>M</fixed-case>iners: Disorder Recognition using Conditional Random Fields and Distributional Semantics - AnkurParikh + AnkurParikh AvineshPVS JoyMustafi LalitAgarwalla @@ -1529,10 +1529,10 @@ <fixed-case>TUGAS</fixed-case>: Exploiting unlabelled data for <fixed-case>T</fixed-case>witter sentiment analysis SilvioAmir - Miguel B.Almeida + Miguel B.Almeida BrunoMartins JoãoFilgueiras - Mário J.Silva + Mário J.Silva 673–677 S14-2120 10.3115/v1/S14-2120 @@ -1599,7 +1599,7 @@ AndréLeal DiogoGonçalves BrunoMartins - Francisco M.Couto + Francisco M.Couto 711–715 S14-2127 10.3115/v1/S14-2127 @@ -1609,10 +1609,10 @@ <fixed-case>UMCC</fixed-case>_<fixed-case>DLSI</fixed-case>_<fixed-case>S</fixed-case>em<fixed-case>S</fixed-case>im: Multilingual System for Measuring Semantic Textual Similarity AlexanderChávez HéctorDávila - YoanGutiérrez - AntonioFernández-Orquín - AndrésMontoyo - RafaelMuñoz + YoanGutiérrez + AntonioFernández-Orquín + AndrésMontoyo + RafaelMuñoz 716–721 S14-2128 10.3115/v1/S14-2128 @@ -1624,10 +1624,10 @@ ArmandoCollazo ElvisCrego Jorge L.Garcia - YoanGutiérrez - DavidTomás - AndrésMontoyo - RafaelMuñoz + YoanGutiérrez + DavidTomás + AndrésMontoyo + RafaelMuñoz 722–726 S14-2129 10.3115/v1/S14-2129 @@ -1638,9 +1638,9 @@ Pedro AnielSánchez-Mirabal YarelisRuano Torres SuilenHernández Alvarado - YoanGutiérrez - AndrésMontoyo - RafaelMuñoz + YoanGutiérrez + AndrésMontoyo + RafaelMuñoz 727–731 S14-2130 10.3115/v1/S14-2130 @@ -1651,7 +1651,7 @@ SergioJimenez GeorgeDueñas JuliaBaquero - AlexanderGelbukh + AlexanderGelbukh 732–742 S14-2131 10.3115/v1/S14-2131 @@ -1671,7 +1671,7 @@ <fixed-case>UNIBA</fixed-case>: Combining Distributional Semantic Models and Word Sense Disambiguation for Textual Similarity PierpaoloBasile AnnalinaCaputo - GiovanniSemeraro + GiovanniSemeraro 748–753 S14-2133 10.3115/v1/S14-2133 @@ -1692,7 +1692,7 @@ GiuseppeCastellucci SimoneFilice DaniloCroce - RobertoBasili + RobertoBasili 761–767 S14-2135 10.3115/v1/S14-2135 @@ -1715,8 +1715,8 @@ ReynierOrtega Bueno AdrianFonseca Bruzón CarlosMuñiz Cuza - YoanGutiérrez - AndrésMontoyo + YoanGutiérrez + AndrésMontoyo 773–778 S14-2137 10.3115/v1/S14-2137 @@ -1733,9 +1733,9 @@ <fixed-case>U</fixed-case>o<fixed-case>W</fixed-case>: <fixed-case>NLP</fixed-case> techniques developed at the <fixed-case>U</fixed-case>niversity of <fixed-case>W</fixed-case>olverhampton for Semantic Similarity and Textual Entailment RohitGupta - HannaBéchara - IsmailEl Maarouf - ConstantinOrăsan + HannaBéchara + IsmailEl Maarouf + ConstantinOrăsan 785–789 S14-2139 10.3115/v1/S14-2139 @@ -1751,11 +1751,11 @@ <fixed-case>UT</fixed-case>exas: Natural Language Semantics using Distributional Semantics and Probabilistic Logic - IslamBeltagy + IslamBeltagy StephenRoller - GemmaBoleda + GemmaBoleda KatrinErk - RaymondMooney + RaymondMooney 796–801 S14-2141 10.3115/v1/S14-2141 @@ -1805,7 +1805,7 @@ <fixed-case>UWM</fixed-case>: Applying an Existing Trainable Semantic Parser to Parse Robotic Spatial Commands - RohitKate + RohitKate 823–827 S14-2146 10.3115/v1/S14-2146 @@ -1814,7 +1814,7 @@ <fixed-case>UWM</fixed-case>: Disorder Mention Extraction from Clinical Text Using <fixed-case>CRF</fixed-case>s and Normalization Using Learned Edit Distance Patterns OmidGhiasvand - RohitKate + RohitKate 828–832 S14-2147 10.3115/v1/S14-2147 @@ -1824,7 +1824,7 @@ <fixed-case>V</fixed-case>3: Unsupervised Generation of Domain Aspect Terms for Aspect Based Sentiment Analysis AitorGarcía-Pablos MontseCuadros - GermanRigau + GermanRigau 833–837 S14-2148 10.3115/v1/S14-2148 @@ -1833,7 +1833,7 @@ <fixed-case>XRCE</fixed-case>: Hybrid Classification for Aspect-based Sentiment Analysis CarolineBrun - Diana NicoletaPopa + Diana NicoletaPopa ClaudeRoux 838–842 S14-2149 diff --git a/data/xml/S15.xml b/data/xml/S15.xml index f5edc6e07f..a95b8db703 100644 --- a/data/xml/S15.xml +++ b/data/xml/S15.xml @@ -4,8 +4,8 @@ Proceedings of the Fourth Joint Conference on Lexical and Computational Semantics S15-1 - MarthaPalmer - GemmaBoleda + MarthaPalmer + GemmaBoleda PaoloRosso 10.18653/v1/S15-1 Association for Computational Linguistics @@ -59,7 +59,7 @@ Combining Seemingly Incompatible Corpora for Implicit Semantic Role Labeling Parvin SadatFeizabadi - SebastianPadó + SebastianPadó 40–50 S15-1005 10.18653/v1/S15-1005 @@ -76,8 +76,8 @@ A Methodology for Word Sense Disambiguation at 90% based on large-scale <fixed-case>C</fixed-case>rowd<fixed-case>S</fixed-case>ourcing - OierLopez de Lacalle - EnekoAgirre + OierLopez de Lacalle + EnekoAgirre 61–70 S15-1007 10.18653/v1/S15-1007 @@ -86,7 +86,7 @@ Learning Structures of Negations from Flat Annotations VinodkumarPrabhakaran - BranimirBoguraev + BranimirBoguraev 71–81 S15-1008 10.18653/v1/S15-1008 @@ -96,22 +96,22 @@ A New Dataset and Evaluation for Belief/Factuality VinodkumarPrabhakaran TomasBy - JuliaHirschberg - OwenRambow + JuliaHirschberg + OwenRambow SamiraShaikh - TomekStrzalkowski + TomekStrzalkowski JenniferTracey MichaelArrigo RupayanBasu MicahClark AdamDalton - MonaDiab - LouiseGuthrie + MonaDiab + LouiseGuthrie AnnaProkofieva - StephanieStrassel + StephanieStrassel GregoryWerner - YorickWilks - JanyceWiebe + YorickWilks + JanyceWiebe 82–91 S15-1009 10.18653/v1/S15-1009 @@ -131,8 +131,8 @@ Combining Mention Context and Hyperlinks from <fixed-case>W</fixed-case>ikipedia for Named Entity Disambiguation AnderBarrena - AitorSoroa - EnekoAgirre + AitorSoroa + EnekoAgirre 101–105 S15-1011 10.18653/v1/S15-1011 @@ -144,7 +144,7 @@ Collective Document Classification with Implicit Inter-document Semantic Relationships ClintBurford StevenBird - TimothyBaldwin + TimothyBaldwin 106–116 S15-1012 10.18653/v1/S15-1012 @@ -154,7 +154,7 @@ <fixed-case>SGR</fixed-case>ank: Combining Statistical and Graphical Methods to Improve the State of the Art in Unsupervised Keyphrase Extraction SoheilDanesh TamaraSumner - James H.Martin + James H.Martin 117–126 S15-1013 10.18653/v1/S15-1013 @@ -173,7 +173,7 @@ Ideological Perspective Detection Using Semantic Features HebaElfardy - MonaDiab + MonaDiab ChrisCallison-Burch 137–146 S15-1015 @@ -182,8 +182,8 @@ Mapping Different Rhetorical Relation Annotations: A Proposal - FarahBenamara - MaiteTaboada + FarahBenamara + MaiteTaboada 147–152 S15-1016 10.18653/v1/S15-1016 @@ -193,7 +193,7 @@ Dissecting the Practical Lexical Function Model for Compositional Distributional Semantics AbhijeetGupta JasonUtt - SebastianPadó + SebastianPadó 153–158 S15-1017 10.18653/v1/S15-1017 @@ -213,7 +213,7 @@ AndrásKornai JuditÁcs MártonMakrai - Dávid MárkNemeskey + Dávid MárkNemeskey KatalinPajkossy GáborRecski 165–175 @@ -223,12 +223,12 @@ Extending a Single-Document Summarizer to Multi-Document: a Hierarchical Approach - LuísMarujo - RicardoRibeiro - DavidMartins de Matos - JoãoNeto + LuísMarujo + RicardoRibeiro + DavidMartins de Matos + JoãoNeto AnatoleGershman - JaimeCarbonell + JaimeCarbonell 176–181 S15-1020 10.18653/v1/S15-1020 @@ -236,10 +236,10 @@ Reading Between the Lines: Overcoming Data Sparsity for Accurate Classification of Lexical Relationships - SilviaNecşulescu + SilviaNecşulescu SaraMendes DavidJurgens - NúriaBel + NúriaBel RobertoNavigli 182–192 S15-1021 @@ -249,10 +249,10 @@ Multi-Level Alignments As An Extensible Representation Basis for Textual Entailment Algorithms Tae-GilNoh - SebastianPadó + SebastianPadó VeredShwartz IdoDagan - ViviNastase + ViviNastase KathrinEichler LiliKotlerman MeniAdler @@ -266,9 +266,9 @@ SamuelRitter CotieLong DenisPaperno - MarcoBaroni + MarcoBaroni MatthewBotvinick - AdeleGoldberg + AdeleGoldberg 199–204 S15-1023 10.18653/v1/S15-1023 @@ -288,7 +288,7 @@ Combining Open Source Annotators for Entity Linking through Weighted Voting - PabloRuiz + PabloRuiz ThierryPoibeau 211–215 S15-1025 @@ -297,8 +297,8 @@ Automatic Generation of a Lexical Resource to support Semantic Role Labeling in <fixed-case>P</fixed-case>ortuguese - MagaliSanches Duran - SandraAluísio + MagaliSanches Duran + SandraAluísio 216–221 S15-1026 10.18653/v1/S15-1026 @@ -356,7 +356,7 @@ Incremental Semantic Construction Using Normal Form <fixed-case>CCG</fixed-case> Derivation - YoshihideKato + YoshihideKato ShigekiMatsubara 269–278 S15-1032 @@ -366,7 +366,7 @@ Dependency-Based Semantic Role Labeling using Convolutional Neural Networks WilliamFoland - JamesMartin + JamesMartin 279–288 S15-1033 10.18653/v1/S15-1033 @@ -385,7 +385,7 @@ Resolving Discourse-Deictic Pronouns: A Two-Stage Approach to Do It Sujay KumarJauhar RaulGuerra - EdgarGonzàlez Pellicer + EdgarGonzàlez Pellicer MartaRecasens 299–308 S15-1035 @@ -406,7 +406,7 @@ Proceedings of the 9th International Workshop on Semantic Evaluation (SemEval 2015) S15-2 - PreslavNakov + PreslavNakov TorstenZesch DanielCer DavidJurgens @@ -425,7 +425,7 @@ <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2015 Task 1: Paraphrase and Semantic Similarity in <fixed-case>T</fixed-case>witter (<fixed-case>PIT</fixed-case>) WeiXu ChrisCallison-Burch - BillDolan + BillDolan 1–11 S15-2001 10.18653/v1/S15-2001 @@ -434,8 +434,8 @@ <fixed-case>MITRE</fixed-case>: Seven Systems for Semantic Similarity in Tweets GuidoZarrella - JohnHenderson - Elizabeth M.Merkhofer + JohnHenderson + Elizabeth M.Merkhofer LauraStrickhart 12–17 S15-2002 @@ -444,9 +444,9 @@ <fixed-case>CICBUAP</fixed-case>nlp: Graph-Based Approach for Answer Selection in Community Question Answering Task - HelenaGomez - DarnesVilariño - DavidPinto + HelenaGomez + DarnesVilariño + DavidPinto GrigoriSidorov 18–22 S15-2003 @@ -475,7 +475,7 @@ <fixed-case>ECNU</fixed-case>: Leveraging Word Embeddings to Boost Performance for Paraphrase in <fixed-case>T</fixed-case>witter JiangZhao - ManLan + ManLan 34–39 S15-2006 10.18653/v1/S15-2006 @@ -484,7 +484,7 @@ <fixed-case>ROB</fixed-case>: Using Semantic Meaning to Recognize Paraphrases Robvan der Goot - Gertjanvan Noord + Gertjanvan Noord 40–44 S15-2007 10.18653/v1/S15-2007 @@ -493,7 +493,7 @@ <fixed-case>AMRITA</fixed-case>_<fixed-case>CEN</fixed-case>@<fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2015: Paraphrase Detection for <fixed-case>T</fixed-case>witter using Unsupervised Feature Learning with Recursive Autoencoders MahalakshmiShanumuga Sundaram - Anand KumarMadasamy + Anand KumarMadasamy SomanKotti Padannayil 45–50 S15-2008 @@ -504,7 +504,7 @@ <fixed-case>E</fixed-case>biquity: Paraphrase and Semantic Similarity in <fixed-case>T</fixed-case>witter using Skipgrams TaneeyaSatyapanich HangGao - TimFinin + TimFinin 51–55 S15-2009 10.18653/v1/S15-2009 @@ -512,7 +512,7 @@ <fixed-case>RTM</fixed-case>-<fixed-case>DCU</fixed-case>: Predicting Semantic Similarity with Referential Translation Machines - ErgunBiçici + ErgunBiçici 56–63 S15-2010 10.18653/v1/S15-2010 @@ -529,12 +529,12 @@ <fixed-case>TKLBLIIR</fixed-case>: Detecting <fixed-case>T</fixed-case>witter Paraphrases with <fixed-case>T</fixed-case>weeting<fixed-case>J</fixed-case>ay - Vanja MladenKaran + Vanja MladenKaran GoranGlavaš JanŠnajder - BojanaDalbelo Bašić + BojanaDalbelo Bašić IvanVulić - Marie-FrancineMoens + Marie-FrancineMoens 70–74 S15-2012 10.18653/v1/S15-2012 @@ -542,7 +542,7 @@ <fixed-case>CDTDS</fixed-case>: Predicting Paraphrases in <fixed-case>T</fixed-case>witter via Support Vector Regression - Rafael MichaelKarampatsis + Rafael MichaelKarampatsis 75–79 S15-2013 10.18653/v1/S15-2013 @@ -550,8 +550,8 @@ yi<fixed-case>G</fixed-case>ou: A Semantic Text Similarity Computing System Based on <fixed-case>SVM</fixed-case> - YangLiu - ChengjieSun + YangLiu + ChengjieSun LeiLin XiaolongWang 80–84 @@ -562,9 +562,9 @@ <fixed-case>USAAR</fixed-case>-<fixed-case>SHEFFIELD</fixed-case>: Semantic Textual Similarity with Deep Regression and Machine Translation Evaluation Metrics LilingTan - CarolinaScarton + CarolinaScarton LuciaSpecia - Josefvan Genabith + Josefvan Genabith 85–89 S15-2015 10.18653/v1/S15-2015 @@ -573,7 +573,7 @@ <fixed-case>T</fixed-case>r<fixed-case>WP</fixed-case>: Text Relatedness using Word and Phrase Relatedness Md Rashadul HasanRakib - AminulIslam + AminulIslam EvangelosMilios 90–95 S15-2016 @@ -582,13 +582,13 @@ <fixed-case>M</fixed-case>ini<fixed-case>E</fixed-case>xperts: An <fixed-case>SVM</fixed-case> Approach for Measuring Semantic Textual Similarity - HannaBéchara + HannaBéchara HernaniCosta ShivaTaslimipoor RohitGupta - ConstantinOrasan - GloriaCorpas Pastor - RuslanMitkov + ConstantinOrasan + GloriaCorpas Pastor + RuslanMitkov 96–101 S15-2017 10.18653/v1/S15-2017 @@ -628,8 +628,8 @@ <fixed-case>ECNU</fixed-case>: Using Traditional Similarity Measurements and Word Embedding for Semantic Textual Similarity Estimation JiangZhao - ManLan - Jun FengTian + ManLan + Jun FengTian 117–122 S15-2021 10.18653/v1/S15-2021 @@ -658,8 +658,8 @@ <fixed-case>SOPA</fixed-case>: Random Forests Regression for the Semantic Textual Similarity task DavideBuscaldi - JorgeGarcía Flores - Ivan V.Meza + JorgeGarcía Flores + Ivan V.Meza IsaacRodríguez 132–137 S15-2024 @@ -678,7 +678,7 @@ <fixed-case>DCU</fixed-case>: Using Distributional Semantics and Domain Adaptation for the Semantic Textual Similarity <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2015 Task 2 PiyushArora - ChrisHokamp + ChrisHokamp JenniferFoster GarethJones 143–147 @@ -688,7 +688,7 @@ <fixed-case>DLS</fixed-case>@<fixed-case>CU</fixed-case>: Sentence Similarity from Word Alignment and Semantic Vector Composition - Md ArafatSultan + Md ArafatSultan StevenBethard TamaraSumner 148–153 @@ -699,7 +699,7 @@ <fixed-case>FCICU</fixed-case>: The Integration between Sense-Based Kernel and Surface-Based Methods to Measure Semantic Textual Similarity BasmaHassan - SamirAbdelRahman + SamirAbdelRahman ReemBahgat 154–158 S15-2028 @@ -710,8 +710,8 @@ <fixed-case>AZMAT</fixed-case>: Sentence Similarity Using Associative Matrices EvanJaffe LifengJin - DavidKing - Martenvan Schijndel + DavidKing + Martenvan Schijndel 159–163 S15-2029 10.18653/v1/S15-2029 @@ -720,10 +720,10 @@ <fixed-case>N</fixed-case>e<fixed-case>R</fixed-case>o<fixed-case>S</fixed-case>im: A System for Measuring and Interpreting Semantic Textual Similarity RajendraBanjade - Nobal BikramNiraula + Nobal BikramNiraula NabinMaharjan VasileRus - DanStefanescu + DanStefanescu MihaiLintean DipeshGautam 164–171 @@ -736,7 +736,7 @@ LushanHan JustinMartineau DoreenCheng - ChristopherThomas + ChristopherThomas 172–177 S15-2031 10.18653/v1/S15-2031 @@ -744,11 +744,11 @@ <fixed-case>UBC</fixed-case>: Cubes for <fixed-case>E</fixed-case>nglish Semantic Textual Similarity and Supervised Approaches for Interpretable <fixed-case>STS</fixed-case> - EnekoAgirre - AitorGonzalez-Agirre + EnekoAgirre + AitorGonzalez-Agirre IñigoLopez-Gazpio - MontseMaritxalar - GermanRigau + MontseMaritxalar + GermanRigau LarraitzUria 178–183 S15-2032 @@ -759,7 +759,7 @@ <fixed-case>ASAP</fixed-case>-<fixed-case>II</fixed-case>: From the Alignment of Phrases to Textual Similarity AnaAlves DavidSimões - Hugo GonçaloOliveira + Hugo GonçaloOliveira AdrianaFerrugento 184–189 S15-2033 @@ -768,8 +768,8 @@ <fixed-case>TATO</fixed-case>: Leveraging on Multiple Strategies for Semantic Textual Similarity - Tu ThanhVu - Quan HungTran + Tu ThanhVu + Quan HungTran Son BaoPham 190–195 S15-2034 @@ -801,8 +801,8 @@ GiovanniDa San Martino AlessandroMoschitti KareemDarwish - LluísMàrquez - ShafiqJoty + LluísMàrquez + ShafiqJoty WalidMagdy 203–209 S15-2036 @@ -823,10 +823,10 @@ <fixed-case>JAIST</fixed-case>: Combining multiple features for Answer Selection in Community Question Answering - Quan HungTran - Vu DucTran - Tu ThanhVu - Minh LeNguyen + Quan HungTran + Vu DucTran + Tu ThanhVu + Minh LeNguyen Son BaoPham 215–219 S15-2038 @@ -849,7 +849,7 @@ RehamMohamed MahaRagab HebaAbdelnasser - NagwaM. El-Makky + NagwaM. El-Makky MarwanTorki 226–230 S15-2040 @@ -869,8 +869,8 @@ <fixed-case>ECNU</fixed-case>: Using Multiple Sources of <fixed-case>CQA</fixed-case>-based Information for Answers Selection and <fixed-case>YES</fixed-case>/<fixed-case>NO</fixed-case> Response Inference LiangYi - JianXiangWang - ManLan + JianXiangWang + ManLan 236–241 S15-2042 10.18653/v1/S15-2042 @@ -891,7 +891,7 @@ <fixed-case>C</fixed-case>o<fixed-case>M</fixed-case>i<fixed-case>C</fixed-case>: Adapting a Short Answer Assessment System for Answer Selection - BjörnRudzewitz + BjörnRudzewitz RamonZiai 247–251 S15-2044 @@ -900,19 +900,19 @@ <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2015 Task 2: Semantic Textual Similarity, <fixed-case>E</fixed-case>nglish, <fixed-case>S</fixed-case>panish and Pilot on Interpretability - EnekoAgirre + EnekoAgirre CarmenBanea - ClaireCardie + ClaireCardie DanielCer - MonaDiab - AitorGonzalez-Agirre + MonaDiab + AitorGonzalez-Agirre WeiweiGuo IñigoLopez-Gazpio - MontseMaritxalar - RadaMihalcea - GermanRigau + MontseMaritxalar + RadaMihalcea + GermanRigau LarraitzUria - JanyceWiebe + JanyceWiebe 252–263 S15-2045 10.18653/v1/S15-2045 @@ -920,7 +920,7 @@ <fixed-case>E</fixed-case>x<fixed-case>B</fixed-case> Themis: Extensive Feature Extraction from Word Alignments for Semantic Textual Similarity - ChristianHänig + ChristianHänig RobertRemus XoseDe La Puente 264–268 @@ -931,7 +931,7 @@ <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2015 Task 3: Answer Selection in Community Question Answering PreslavNakov - LluísMàrquez + LluísMàrquez WalidMagdy AlessandroMoschitti JimGlass @@ -945,8 +945,8 @@ <fixed-case>V</fixed-case>ector<fixed-case>SLU</fixed-case>: A Continuous Word Vector Approach to Answer Selection in Community Question Answering Systems YonatanBelinkov MitraMohtarami - ScottCyphers - JamesGlass + ScottCyphers + JamesGlass 282–287 S15-2048 10.18653/v1/S15-2048 @@ -972,11 +972,11 @@ <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2015 Task 14: Analysis of Clinical Text - NoémieElhadad - SameerPradhan + NoémieElhadad + SameerPradhan SharonGorman SureshManandhar - WendyChapman + WendyChapman GuerganaSavova 303–310 S15-2051 @@ -1002,7 +1002,7 @@ VítBaisa JaneBradbury SilvieCinková - IsmaïlEl Maarouf + IsmaïlEl Maarouf AdamKilgarriff OctavianPopescu 315–324 @@ -1042,7 +1042,7 @@ <fixed-case>EBL</fixed-case>-Hope: Multilingual Word Sense Disambiguation Using a Hybrid Knowledge-Based Technique Eniafe FestusAyetiran - GuidoBoella + GuidoBoella 340–344 S15-2057 10.18653/v1/S15-2057 @@ -1051,7 +1051,7 @@ <fixed-case>VUA</fixed-case>-background : When to Use Background Information to Perform Word Sense Disambiguation MartenPostma - RubenIzquierdo + RubenIzquierdo PiekVossen 345–349 S15-2058 @@ -1062,7 +1062,7 @@ <fixed-case>T</fixed-case>eam<fixed-case>UFAL</fixed-case>: <fixed-case>WSD</fixed-case>+<fixed-case>EL</fixed-case> as Document Retrieval PetrFanta RomanSudarikov - OndřejBojar + OndřejBojar 350–354 S15-2059 10.18653/v1/S15-2059 @@ -1070,7 +1070,7 @@ <fixed-case>EL</fixed-case>92: Entity Linking Combining Open Source Annotators via Weighted Voting - PabloRuiz + PabloRuiz ThierryPoibeau 355–359 S15-2060 @@ -1081,7 +1081,7 @@ <fixed-case>UNIBA</fixed-case>: Combining Distributional Semantic Models and Sense Distribution for Multilingual All-Words Sense Disambiguation and Entity Linking PierpaoloBasile AnnalinaCaputo - GiovanniSemeraro + GiovanniSemeraro 360–364 S15-2061 10.18653/v1/S15-2061 @@ -1098,7 +1098,7 @@ <fixed-case>T</fixed-case>eam<fixed-case>HCMUS</fixed-case>: Analysis of Clinical Text NghiaHuynh - QuocHo + QuocHo 370–374 S15-2063 10.18653/v1/S15-2063 @@ -1124,7 +1124,7 @@ <fixed-case>UWM</fixed-case>: A Simple Baseline Method for Identifying Attributes of Disease and Disorder Mentions in Clinical Text OmidGhiasvand - RohitKate + RohitKate 385–388 S15-2066 10.18653/v1/S15-2066 @@ -1165,7 +1165,7 @@ <fixed-case>UL</fixed-case>isboa: Recognition and Normalization of Medical Concepts AndréLeal BrunoMartins - FranciscoCouto + FranciscoCouto 406–411 S15-2070 10.18653/v1/S15-2070 @@ -1199,7 +1199,7 @@ <fixed-case>B</fixed-case>ioinformatics<fixed-case>UA</fixed-case>: Machine Learning and Rule-Based Recognition of Disorders and Clinical Attributes from Patient Notes SérgioMatos JoséSequeira - José LuísOliveira + José LuísOliveira 422–426 S15-2073 10.18653/v1/S15-2073 @@ -1219,7 +1219,7 @@ <fixed-case>CMILLS</fixed-case>: Adapting Semantic Role Labeling Features to Dependency Parsing ChadMills - Gina-AnneLevow + Gina-AnneLevow 433–437 S15-2075 10.18653/v1/S15-2075 @@ -1248,7 +1248,7 @@ SaraRosenthal PreslavNakov SvetlanaKiritchenko - SaifMohammad + SaifMohammad AlanRitter VeselinStoyanov 451–463 @@ -1272,7 +1272,7 @@ TonyVeale PaoloRosso EkaterinaShutova - JohnBarnden + JohnBarnden AntonioReyes 470–478 S15-2080 @@ -1281,7 +1281,7 @@ <fixed-case>CL</fixed-case>a<fixed-case>C</fixed-case>-<fixed-case>S</fixed-case>enti<fixed-case>P</fixed-case>ipe: <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val2015 Subtasks 10 <fixed-case>B</fixed-case>,<fixed-case>E</fixed-case>, and Task 11 - CanberkÖzdemir + CanberkÖzdemir SabineBergler 479–485 S15-2081 @@ -1291,8 +1291,8 @@ <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2015 Task 12: Aspect Based Sentiment Analysis MariaPontiki - DimitrisGalanis - HarisPapageorgiou + DimitrisGalanis + HarisPapageorgiou SureshManandhar IonAndroutsopoulos 486–495 @@ -1321,8 +1321,8 @@ <fixed-case>DIEGOL</fixed-case>ab: An Approach for Message-level Sentiment Classification in <fixed-case>T</fixed-case>witter AbeedSarker AzadehNikfarjam - DavyWeissenbacher - GracielaGonzalez + DavyWeissenbacher + GracielaGonzalez 510–514 S15-2085 10.18653/v1/S15-2085 @@ -1354,7 +1354,7 @@ <fixed-case>CIS</fixed-case>-positive: A Combination of Convolutional Neural Networks and Support Vector Machines for Sentiment Analysis in <fixed-case>T</fixed-case>witter SebastianEbert Ngoc ThangVu - HinrichSchütze + HinrichSchütze 527–532 S15-2088 10.18653/v1/S15-2088 @@ -1366,7 +1366,7 @@ TamaraÁlvarez-López JonathanJuncal-Martínez EnriqueCosta-Montenegro - Francisco JavierGonzález-Castaño + Francisco JavierGonzález-Castaño 533–538 S15-2089 10.18653/v1/S15-2089 @@ -1395,9 +1395,9 @@ <fixed-case>R</fixed-case>ose<fixed-case>M</fixed-case>erry: A Baseline Message-level Sentiment Classification System - HuizhiLiang + HuizhiLiang RichardFothergill - TimothyBaldwin + TimothyBaldwin 551–555 S15-2092 10.18653/v1/S15-2092 @@ -1407,7 +1407,7 @@ <fixed-case>UDLAP</fixed-case>: Sentiment Analysis Using a Graph-Based Representation EstebanCastillo OfeliaCervantes - DarnesVilariño + DarnesVilariño DavidBáez AlfredoSánchez 556–560 @@ -1419,7 +1419,7 @@ <fixed-case>ECNU</fixed-case>: Multi-level Sentiment Analysis on <fixed-case>T</fixed-case>witter Using Traditional Linguistic Features and Word Embedding Features ZhihuaZhang GuoshunWu - ManLan + ManLan 561–567 S15-2094 10.18653/v1/S15-2094 @@ -1428,8 +1428,8 @@ <fixed-case>L</fixed-case>sislif: Feature Extraction and Label Weighting for Sentiment Analysis in <fixed-case>T</fixed-case>witter HussamHamdan - PatriceBellot - FredericBechet + PatriceBellot + FredericBechet 568–573 S15-2095 10.18653/v1/S15-2095 @@ -1439,7 +1439,7 @@ <fixed-case>EL</fixed-case>i<fixed-case>RF</fixed-case>: A <fixed-case>SVM</fixed-case> Approach for <fixed-case>SA</fixed-case> tasks in <fixed-case>T</fixed-case>witter at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2015 MayteGiménez FerranPla - Lluís-F.Hurtado + Lluís-F.Hurtado 574–581 S15-2096 10.18653/v1/S15-2096 @@ -1477,7 +1477,7 @@ <fixed-case>IITPS</fixed-case>em<fixed-case>E</fixed-case>val: Sentiment Discovery from 140 Characters - AyushKumar + AyushKumar VamsiKrishna AsifEkbal 601–607 @@ -1491,7 +1491,7 @@ MartinJaggi DominicEgger PascalJulmy - LeonDerczynski + LeonDerczynski MarkCieliebak 608–612 S15-2101 @@ -1501,10 +1501,10 @@ <fixed-case>INESC</fixed-case>-<fixed-case>ID</fixed-case>: A Regression Model for Large Scale <fixed-case>T</fixed-case>witter Sentiment Lexicon Induction SilvioAmir - RamonF. Astudillo + RamonF. Astudillo WangLing BrunoMartins - Mario J.Silva + Mario J.Silva IsabelTrancoso 613–618 S15-2102 @@ -1574,11 +1574,11 @@ <fixed-case>INESC</fixed-case>-<fixed-case>ID</fixed-case>: Sentiment Analysis without Hand-Coded Features or Linguistic Resources using Embedding Subspaces - RamonF. Astudillo + RamonF. Astudillo SilvioAmir WangLing BrunoMartins - Mario J.Silva + Mario J.Silva IsabelTrancoso 652–656 S15-2109 @@ -1608,7 +1608,7 @@ YuxiaoZhang GaoyanOu TengjiaoWang - Kam-faiWong + Kam-faiWong 664–668 S15-2111 10.18653/v1/S15-2111 @@ -1651,7 +1651,7 @@ <fixed-case>LT</fixed-case>3: Sentiment Analysis of Figurative Tweets: piece of cake #<fixed-case>N</fixed-case>ot<fixed-case>R</fixed-case>eally CynthiaVan Hee ElsLefever - VéroniqueHoste + VéroniqueHoste 684–688 S15-2115 10.18653/v1/S15-2115 @@ -1681,8 +1681,8 @@ <fixed-case>CPH</fixed-case>: Sentiment analysis of Figurative Language on <fixed-case>T</fixed-case>witter #easypeasy #not SarahMcGillion - HéctorMartínez Alonso - BarbaraPlank + HéctorMartínez Alonso + BarbaraPlank 699–703 S15-2118 10.18653/v1/S15-2118 @@ -1712,7 +1712,7 @@ <fixed-case>V</fixed-case>3: Unsupervised Aspect Based Sentiment Analysis for <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val2015 Task 12 AitorGarcía-Pablos MontseCuadros - GermanRigau + GermanRigau 714–718 S15-2121 10.18653/v1/S15-2121 @@ -1720,10 +1720,10 @@ <fixed-case>LT</fixed-case>3: Applying Hybrid Terminology Extraction to Aspect-Based Sentiment Analysis - OrphéeDe Clercq + OrphéeDe Clercq MarjanVan de Kauter ElsLefever - VéroniqueHoste + VéroniqueHoste 719–724 S15-2122 10.18653/v1/S15-2122 @@ -1740,10 +1740,10 @@ <fixed-case>SINAI</fixed-case>: Syntactic Approach for Aspect-Based Sentiment Analysis - Salud M.Jiménez-Zafra - EugenioMartínez-Cámara - M. TeresaMartín-Valdivia - L. AlfonsoUreña-López + Salud M.Jiménez-Zafra + EugenioMartínez-Cámara + M. TeresaMartín-Valdivia + L. AlfonsoUreña-López 730–735 S15-2124 10.18653/v1/S15-2124 @@ -1752,7 +1752,7 @@ <fixed-case>ECNU</fixed-case>: Extracting Effective Features from Multiple Sequential Sentences for Target-dependent Sentiment Analysis in Reviews ZhihuaZhang - ManLan + ManLan 736–741 S15-2125 10.18653/v1/S15-2125 @@ -1772,8 +1772,8 @@ <fixed-case>E</fixed-case>li<fixed-case>X</fixed-case>a: A Modular and Flexible <fixed-case>ABSA</fixed-case> Platform IñakiSan Vicente - XabierSaralegi - RodrigoAgerri + XabierSaralegi + RodrigoAgerri 748–752 S15-2127 10.18653/v1/S15-2127 @@ -1782,8 +1782,8 @@ <fixed-case>L</fixed-case>sislif: <fixed-case>CRF</fixed-case> and Logistic Regression for Opinion Target Extraction and Sentiment Polarity Analysis HussamHamdan - PatriceBellot - FredericBechet + PatriceBellot + FredericBechet 753–758 S15-2128 10.18653/v1/S15-2128 @@ -1810,7 +1810,7 @@ <fixed-case>TJU</fixed-case>de<fixed-case>M</fixed-case>: A Combination Classifier for Aspect Category Detection and Sentiment Polarity Classification ZhifeiZhang - Jian-YunNie + Jian-YunNie HonglingWang 772–777 S15-2131 @@ -1820,13 +1820,13 @@ <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2015 Task 4: <fixed-case>T</fixed-case>ime<fixed-case>L</fixed-case>ine: Cross-Document Event Ordering Anne-LyseMinard - ManuelaSperanza - EnekoAgirre + ManuelaSperanza + EnekoAgirre ItziarAldabe Mariekevan Erp - BernardoMagnini - GermanRigau - RubénUrizar + BernardoMagnini + GermanRigau + RubénUrizar 778–786 S15-2132 10.18653/v1/S15-2132 @@ -1845,12 +1845,12 @@ <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2015 Task 5: <fixed-case>QA</fixed-case> <fixed-case>T</fixed-case>emp<fixed-case>E</fixed-case>val - Evaluating Temporal Information Understanding with Question Answering - HectorLlorens - NathanaelChambers + HectorLlorens + NathanaelChambers NaushadUzZaman NasrinMostafazadeh - JamesAllen - JamesPustejovsky + JamesAllen + JamesPustejovsky 792–800 S15-2134 10.18653/v1/S15-2134 @@ -1868,9 +1868,9 @@ <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2015 Task 6: Clinical <fixed-case>T</fixed-case>emp<fixed-case>E</fixed-case>val StevenBethard - LeonDerczynski + LeonDerczynski GuerganaSavova - JamesPustejovsky + JamesPustejovsky MarcVerhagen 806–814 S15-2136 @@ -1880,10 +1880,10 @@ <fixed-case>B</fixed-case>lu<fixed-case>L</fixed-case>ab: Temporal Information Extraction for the 2015 Clinical <fixed-case>T</fixed-case>emp<fixed-case>E</fixed-case>val Challenge SumithraVelupillai - Danielle LMowery - SamirAbdelrahman + Danielle LMowery + SamirAbdelrahman LeeChristensen - WendyChapman + WendyChapman 815–819 S15-2137 10.18653/v1/S15-2137 @@ -1891,8 +1891,8 @@ <fixed-case>GPLSIUA</fixed-case>: Combining Temporal Information and Topic Modeling for Cross-Document Event Ordering - BorjaNavarro - EstelaSaquete + BorjaNavarro + EstelaSaquete 820–824 S15-2138 10.18653/v1/S15-2138 @@ -1923,9 +1923,9 @@ <fixed-case>UFPRS</fixed-case>heffield: Contrasting Rule-based and Support Vector Machine Approaches to Time Expression Identification in Clinical <fixed-case>T</fixed-case>emp<fixed-case>E</fixed-case>val HeglerTissot - GenevieveGorrell + GenevieveGorrell AngusRoberts - LeonDerczynski + LeonDerczynski Marcos DidonetDel Fabro 835–839 S15-2141 @@ -1955,9 +1955,9 @@ <fixed-case>AMBRA</fixed-case>: A Ranking Approach to Temporal Text Classification MarcosZampieri - Alina MariaCiobanu + Alina MariaCiobanu VladNiculae - Liviu P.Dinu + Liviu P.Dinu 851–855 S15-2144 10.18653/v1/S15-2144 @@ -2002,9 +2002,9 @@ <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2015 Task 8: <fixed-case>S</fixed-case>pace<fixed-case>E</fixed-case>val - JamesPustejovsky + JamesPustejovsky ParisaKordjamshidi - Marie-FrancineMoens + Marie-FrancineMoens AaronLevine SethDworman ZacharyYocum @@ -2046,11 +2046,11 @@ StephanOepen MarcoKuhlmann YusukeMiyao - DanielZeman + DanielZeman SilvieCinková - DanFlickinger - JanHajič - ZdeňkaUrešová + DanFlickinger + JanHajič + ZdeňkaUrešová 915–926 S15-2153 10.18653/v1/S15-2153 @@ -2061,7 +2061,7 @@ YantaoDu FanZhang XunZhang - WeiweiSun + WeiweiSun XiaojunWan 927–931 S15-2154 @@ -2072,7 +2072,7 @@ <fixed-case>USAAR</fixed-case>-<fixed-case>WLV</fixed-case>: Hypernym Generation with Deep Neural Nets LilingTan RohitGupta - Josefvan Genabith + Josefvan Genabith 932–937 S15-2155 10.18653/v1/S15-2155 @@ -2081,7 +2081,7 @@ <fixed-case>NTNU</fixed-case>: An Unsupervised Knowledge Approach for Taxonomy Extraction BamfaCeesay - WenJuan Hou + WenJuan Hou 938–943 S15-2156 10.18653/v1/S15-2156 @@ -2097,7 +2097,7 @@ <fixed-case>TALN</fixed-case>-<fixed-case>UPF</fixed-case>: Taxonomy Learning Exploiting <fixed-case>CRF</fixed-case>-Based Hypernym Extraction on Encyclopedic Definitions - LuisEspinosa-Anke + LuisEspinosa-Anke HoracioSaggion FrancescoRonzano 949–954 @@ -2109,7 +2109,7 @@ <fixed-case>QASSIT</fixed-case>: A Pretopological Framework for the Automatic Construction of Lexical Taxonomies from Raw Texts GuillaumeCleuziou DavideBuscaldi - GaelDias + GaelDias VincentLevorato ChristineLargeron 955–959 @@ -2119,9 +2119,9 @@ <fixed-case>R</fixed-case>iga: from <fixed-case>F</fixed-case>rame<fixed-case>N</fixed-case>et to Semantic Frames with C6.0 Rules - GuntisBarzdins - PeterisPaikens - DidzisGosko + GuntisBarzdins + PeterisPaikens + DidzisGosko 960–964 S15-2160 10.18653/v1/S15-2160 @@ -2140,7 +2140,7 @@ <fixed-case>L</fixed-case>isbon: Evaluating <fixed-case>T</fixed-case>urbo<fixed-case>S</fixed-case>emantic<fixed-case>P</fixed-case>arser on Multiple Languages and Out-of-Domain Data Mariana S. C.Almeida - André F. T.Martins + André F. T.Martins 970–973 S15-2162 10.18653/v1/S15-2162 diff --git a/data/xml/S16.xml b/data/xml/S16.xml index 3c992c1e81..5faf65f0f1 100644 --- a/data/xml/S16.xml +++ b/data/xml/S16.xml @@ -7,7 +7,7 @@ MarineCarpuat DanielCer DavidJurgens - PreslavNakov + PreslavNakov TorstenZesch 10.18653/v1/S16-1 Association for Computational Linguistics @@ -35,23 +35,23 @@ <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2016 Task 5: Aspect Based Sentiment Analysis MariaPontiki - DimitrisGalanis - HarisPapageorgiou + DimitrisGalanis + HarisPapageorgiou IonAndroutsopoulos SureshManandhar MohammadAL-Smadi MahmoudAl-Ayyoub YanyanZhao BingQin - OrphéeDe Clercq - VéroniqueHoste + OrphéeDe Clercq + VéroniqueHoste MariannaApidianaki XavierTannier - NataliaLoukachevitch + NataliaLoukachevitch EvgeniyKotelnikov - NuriaBel - Salud MaríaJiménez-Zafra - GülşenEryiğit + NuriaBel + Salud MaríaJiménez-Zafra + GülşenEryiğit 19–30 S16-1002 10.18653/v1/S16-1002 @@ -59,7 +59,7 @@ <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2016 Task 6: Detecting Stance in Tweets - SaifMohammad + SaifMohammad SvetlanaKiritchenko ParinazSobhani XiaodanZhu @@ -72,7 +72,7 @@ <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2016 Task 7: Determining Sentiment Intensity of <fixed-case>E</fixed-case>nglish and <fixed-case>A</fixed-case>rabic Phrases SvetlanaKiritchenko - SaifMohammad + SaifMohammad MohammadSalameh 42–51 S16-1004 @@ -103,7 +103,7 @@ <fixed-case>S</fixed-case>te<fixed-case>M</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2016 Task 4: Applying Active Learning to Improve Sentiment Classification StefanRäbiger MishalKazmi - YücelSaygın + YücelSaygın PeterSchüller MyraSpiliopoulou 64–70 @@ -114,7 +114,7 @@ <fixed-case>I</fixed-case>2<fixed-case>RNTU</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2016 Task 4: Classifier Fusion for Polarity Classification in <fixed-case>T</fixed-case>witter ZhengchenZhang - ChenZhang + ChenZhang FuxiangWu Dong-YanHuang WeisiLin @@ -127,8 +127,8 @@ <fixed-case>L</fixed-case>y<fixed-case>S</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2016 Task 4: Exploiting Neural Activation Values for <fixed-case>T</fixed-case>witter Sentiment Classification and Quantification DavidVilares - YeraiDoval - Miguel A.Alonso + YeraiDoval + Miguel A.Alonso CarlosGómez-Rodríguez 79–84 S16-1009 @@ -138,7 +138,7 @@ <fixed-case>T</fixed-case>wi<fixed-case>SE</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2016 Task 4: <fixed-case>T</fixed-case>witter Sentiment Classification GeorgiosBalikas - Massih-RezaAmini + Massih-RezaAmini 85–91 S16-1010 10.18653/v1/S16-1010 @@ -174,9 +174,9 @@ <fixed-case>NTNUS</fixed-case>ent<fixed-case>E</fixed-case>val at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2016 Task 4: Combining General Classifiers for Fast <fixed-case>T</fixed-case>witter Sentiment Analysis - Brage EkrollJahren + Brage EkrollJahren ValerijFredriksen - BjörnGambäck + BjörnGambäck LarsBungum 103–108 S16-1014 @@ -187,7 +187,7 @@ <fixed-case>UDLAP</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2016 Task 4: Sentiment Quantification Using a Graph Based Representation EstebanCastillo OfeliaCervantes - DarnesVilariño + DarnesVilariño DavidBáez 109–114 S16-1015 @@ -200,7 +200,7 @@ TamaraÁlvarez-López MilagrosFernández-Gavilanes EnriqueCosta-Montenegro - Francisco JavierGonzález-Castaño + Francisco JavierGonzález-Castaño 115–119 S16-1016 10.18653/v1/S16-1016 @@ -245,10 +245,10 @@ <fixed-case>CICBUAP</fixed-case>nlp at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2016 Task 4-A: Discovering <fixed-case>T</fixed-case>witter Polarity using Enhanced Embeddings - HelenaGomez - DarnesVilariño + HelenaGomez + DarnesVilariño GrigoriSidorov - DavidPinto Avendaño + DavidPinto Avendaño 145–148 S16-1021 10.18653/v1/S16-1021 @@ -273,8 +273,8 @@ FilipposKokkinos EliasIosif NikolaosMalandrakis - HarisPapageorgiou - ShrikanthNarayanan + HarisPapageorgiou + ShrikanthNarayanan AlexandrosPotamianos 155–163 S16-1023 @@ -313,8 +313,8 @@ <fixed-case>UNIMELB</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2016 Tasks 4<fixed-case>A</fixed-case> and 4<fixed-case>B</fixed-case>: An Ensemble of Neural Networks and a <fixed-case>W</fixed-case>ord2<fixed-case>V</fixed-case>ec Based Model for Sentiment Classification StevenXu - HuiZhiLiang - TimothyBaldwin + HuiZhiLiang + TimothyBaldwin 183–189 S16-1027 10.18653/v1/S16-1027 @@ -331,7 +331,7 @@ <fixed-case>DSIC</fixed-case>-<fixed-case>ELIRF</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2016 Task 4: Message Polarity Classification in <fixed-case>T</fixed-case>witter using a Support Vector Machine Approach VíctorMartinez Morant - LLuís-F.Hurtado + LLuís-F.Hurtado FerranPla 198–201 S16-1029 @@ -341,7 +341,7 @@ <fixed-case>SENSEI</fixed-case>-<fixed-case>LIF</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2016 Task 4: Polarity embedding fusion for robust sentiment analysis MickaelRouvier - BenoitFavre + BenoitFavre 202–208 S16-1030 10.18653/v1/S16-1030 @@ -350,7 +350,7 @@ <fixed-case>D</fixed-case>iego<fixed-case>L</fixed-case>ab16 at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2016 Task 4: Sentiment Analysis in <fixed-case>T</fixed-case>witter using Centroids, Clusters, and Sentiment Lexicons AbeedSarker - GracielaGonzalez + GracielaGonzalez 209–214 S16-1031 10.18653/v1/S16-1031 @@ -360,7 +360,7 @@ <fixed-case>VCU</fixed-case>-<fixed-case>TSA</fixed-case> at <fixed-case>S</fixed-case>emeval-2016 Task 4: Sentiment Analysis in <fixed-case>T</fixed-case>witter GerardBriones KasunAmarasinghe - BridgetMcInnes + BridgetMcInnes 215–219 S16-1032 10.18653/v1/S16-1032 @@ -394,9 +394,9 @@ <fixed-case>INESC</fixed-case>-<fixed-case>ID</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2016 Task 4-A: Reducing the Problem of Out-of-Embedding Words SilvioAmir - RamonF. Astudillo + RamonF. Astudillo WangLing - Mário J.Silva + Mário J.Silva IsabelTrancoso 238–242 S16-1036 @@ -410,7 +410,7 @@ EduardApostol OctavianCiobanu AdrianIftene - DianaTrandabăţ + DianaTrandabăţ 243–246 S16-1037 10.18653/v1/S16-1037 @@ -422,7 +422,7 @@ Marius-ValentinHrişca MihailGliga DianaDarabană - DianaTrandabăţ + DianaTrandabăţ AdrianIftene 247–250 S16-1038 @@ -432,10 +432,10 @@ <fixed-case>YZU</fixed-case>-<fixed-case>NLP</fixed-case> Team at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2016 Task 4: Ordinal Sentiment Classification Using a Recurrent Convolutional Network YunchaoHe - Liang-ChihYu + Liang-ChihYu Chin-ShengYang K. RobertLai - WeiyiLiu + WeiyiLiu 251–255 S16-1039 10.18653/v1/S16-1039 @@ -445,7 +445,7 @@ <fixed-case>ECNU</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2016 Task 4: An Empirical Investigation of Traditional <fixed-case>NLP</fixed-case> Features and Word Embedding Features for Sentence-level and Topic-level Sentiment Analysis in <fixed-case>T</fixed-case>witter YunxiaoZhou ZhihuaZhang - ManLan + ManLan 256–261 S16-1040 10.18653/v1/S16-1040 @@ -453,7 +453,7 @@ <fixed-case>OPAL</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2016 Task 4: the Challenge of Porting a Sentiment Analysis System to the “Real” World - AlexandraBalahur + AlexandraBalahur 262–265 S16-1041 10.18653/v1/S16-1041 @@ -472,7 +472,7 @@ <fixed-case>N</fixed-case>ile<fixed-case>TMRG</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2016 Task 5: Deep Convolutional Neural Networks for Aspect Category and Sentiment Extraction TalaatKhalil - Samhaa R.El-Beltagy + Samhaa R.El-Beltagy 271–276 S16-1043 10.18653/v1/S16-1043 @@ -531,7 +531,7 @@ JonathanJuncal-Martínez MilagrosFernández-Gavilanes EnriqueCosta-Montenegro - Francisco JavierGonzález-Castaño + Francisco JavierGonzález-Castaño 306–311 S16-1049 10.18653/v1/S16-1049 @@ -552,7 +552,7 @@ <fixed-case>AKTSKI</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2016 Task 5: Aspect Based Sentiment Analysis for Consumer Reviews ShubhamPateria - PrafullaChoubey + PrafullaChoubey 318–324 S16-1051 10.18653/v1/S16-1051 @@ -582,7 +582,7 @@ Fatih SametÇetin EzgiYıldırım CanÖzbey - GülşenEryiğit + GülşenEryiğit 337–341 S16-1054 10.18653/v1/S16-1054 @@ -620,7 +620,7 @@ <fixed-case>ECNU</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2016 Task 5: Extracting Effective Features from Relevant Fragments in Sentence for Aspect-Based Sentiment Analysis in Reviews MengxiaoJiang ZhihuaZhang - ManLan + ManLan 361–366 S16-1058 10.18653/v1/S16-1058 @@ -669,7 +669,7 @@ <fixed-case>USFD</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2016 Task 6: Any-Target Stance Detection on <fixed-case>T</fixed-case>witter with Autoencoders IsabelleAugenstein AndreasVlachos - KalinaBontcheva + KalinaBontcheva 389–393 S16-1063 10.18653/v1/S16-1063 @@ -688,7 +688,7 @@ AndrewLamont MananPancholi KennethSteimel - SandraKübler + SandraKübler 394–400 S16-1064 10.18653/v1/S16-1064 @@ -699,7 +699,7 @@ YukiIgarashi HiroyaKomatsu SosukeKobayashi - NaoakiOkazaki + NaoakiOkazaki KentaroInui 401–407 S16-1065 @@ -720,7 +720,7 @@ PrashanthVijayaraghavan IvanSysoev SoroushVosoughi - DebRoy + DebRoy 413–419 S16-1067 10.18653/v1/S16-1067 @@ -732,7 +732,7 @@ BrianEcker TheodoreHandleman NicolasHahn - MarilynWalker + MarilynWalker 420–427 S16-1068 10.18653/v1/S16-1068 @@ -750,7 +750,7 @@ <fixed-case>CU</fixed-case>-<fixed-case>GWU</fixed-case> Perspective at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2016 Task 6: Ideological Stance Detection in Informal Text HebaElfardy - MonaDiab + MonaDiab 434–439 S16-1070 10.18653/v1/S16-1070 @@ -760,7 +760,7 @@ <fixed-case>JU</fixed-case>_<fixed-case>NLP</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2016 Task 6: Detecting Stance in Tweets using Support Vector Machines Braja GopalPatra DipankarDas - SivajiBandyopadhyay + SivajiBandyopadhyay 440–444 S16-1071 10.18653/v1/S16-1071 @@ -771,7 +771,7 @@ HenrikBøhler PetterAsla ErwinMarsi - RuneSætre + RuneSætre 445–450 S16-1072 10.18653/v1/S16-1072 @@ -780,7 +780,7 @@ <fixed-case>ECNU</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val 2016 Task 6: Relevant or Not? Supportive or Not? A Two-step Learning System for Automatic Detecting Stance in Tweets ZhihuaZhang - ManLan + ManLan 451–457 S16-1073 10.18653/v1/S16-1073 @@ -803,7 +803,7 @@ IvanPaljak FilipČulinović FilipBoltužić - Vanja MladenKaran + Vanja MladenKaran DomagojAlagić JanŠnajder 464–468 @@ -814,8 +814,8 @@ <fixed-case>LSIS</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2016 Task 7: Using Web Search Engines for <fixed-case>E</fixed-case>nglish and <fixed-case>A</fixed-case>rabic Unsupervised Sentiment Intensity Prediction AmalHtait - SebastienFournier - PatriceBellot + SebastienFournier + PatriceBellot 469–473 S16-1076 10.18653/v1/S16-1076 @@ -842,7 +842,7 @@ <fixed-case>N</fixed-case>ile<fixed-case>TMRG</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2016 Task 7: Deriving Prior Polarities for <fixed-case>A</fixed-case>rabic Sentiment Terms - Samhaa R.El-Beltagy + Samhaa R.El-Beltagy 486–490 S16-1079 10.18653/v1/S16-1079 @@ -852,7 +852,7 @@ <fixed-case>ECNU</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2016 Task 7: An Enhanced Supervised Learning Method for Lexicon Sentiment Intensity Ranking FeixiangWang ZhihuaZhang - ManLan + ManLan 491–496 S16-1080 10.18653/v1/S16-1080 @@ -860,14 +860,14 @@ <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2016 Task 1: Semantic Textual Similarity, Monolingual and Cross-Lingual Evaluation - EnekoAgirre + EnekoAgirre CarmenBanea DanielCer - MonaDiab - AitorGonzalez-Agirre - RadaMihalcea - GermanRigau - JanyceWiebe + MonaDiab + AitorGonzalez-Agirre + RadaMihalcea + GermanRigau + JanyceWiebe 497–511 S16-1081 10.18653/v1/S16-1081 @@ -875,11 +875,11 @@ <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2016 Task 2: Interpretable Semantic Textual Similarity - EnekoAgirre - AitorGonzalez-Agirre + EnekoAgirre + AitorGonzalez-Agirre IñigoLopez-Gazpio - MontseMaritxalar - GermanRigau + MontseMaritxalar + GermanRigau LarraitzUria 512–524 S16-1082 @@ -889,7 +889,7 @@ <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2016 Task 3: Community Question Answering PreslavNakov - LluísMàrquez + LluísMàrquez AlessandroMoschitti WalidMagdy HamdyMubarak @@ -905,7 +905,7 @@ <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2016 Task 10: Detecting Minimal Semantic Units and their Meanings (<fixed-case>D</fixed-case>i<fixed-case>MSUM</fixed-case>) NathanSchneider DirkHovy - AndersJohannsen + AndersJohannsen MarineCarpuat 546–559 S16-1084 @@ -914,7 +914,7 @@ <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val 2016 Task 11: Complex Word Identification - GustavoPaetzold + GustavoPaetzold LuciaSpecia 560–569 S16-1085 @@ -924,9 +924,9 @@ <fixed-case>FBK</fixed-case> <fixed-case>HLT</fixed-case>-<fixed-case>MT</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2016 Task 1: Cross-lingual Semantic Similarity Measurement Using Quality Estimation Features and Compositional Bilingual Word Embeddings DuyguAtaman - José G.C. de Souza + José G.C. de Souza MarcoTurchi - MatteoNegri + MatteoNegri 570–576 S16-1086 10.18653/v1/S16-1086 @@ -985,12 +985,12 @@ <fixed-case>USFD</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2016 Task 1: Putting different State-of-the-Arts into a Box AhmetAker - FredericBlain + FredericBlain AndresDuque MarinaFomicheva - JuricaSeva + JuricaSeva KashifShah - DanielBeck + DanielBeck 609–613 S16-1092 10.18653/v1/S16-1092 @@ -1010,8 +1010,8 @@ <fixed-case>ECNU</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2016 Task 1: Leveraging Word Embedding From Macro and Micro Views to Boost Performance for Semantic Textual Similarity - JunfengTian - ManLan + JunfengTian + ManLan 621–627 S16-1094 10.18653/v1/S16-1094 @@ -1020,9 +1020,9 @@ <fixed-case>SAARSHEFF</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2016 Task 1: Semantic Textual Similarity with Machine Translation Evaluation Metrics and (e<fixed-case>X</fixed-case>treme) Boosted Tree Ensembles LilingTan - CarolinaScarton + CarolinaScarton LuciaSpecia - Josefvan Genabith + Josefvan Genabith 628–633 S16-1095 10.18653/v1/S16-1095 @@ -1030,12 +1030,12 @@ <fixed-case>WOLVESAAR</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2016 Task 1: Replicating the Success of Monolingual Word Alignment and Neural Embeddings for Semantic Textual Similarity - HannahBechara + HannahBechara RohitGupta LilingTan - ConstantinOrăsan - RuslanMitkov - Josefvan Genabith + ConstantinOrăsan + RuslanMitkov + Josefvan Genabith 634–639 S16-1096 10.18653/v1/S16-1096 @@ -1065,7 +1065,7 @@ <fixed-case>DLS</fixed-case>@<fixed-case>CU</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2016 Task 1: Supervised Models of Sentence Similarity - Md ArafatSultan + Md ArafatSultan StevenBethard TamaraSumner 650–655 @@ -1075,7 +1075,7 @@ <fixed-case>DCU</fixed-case>-<fixed-case>SEM</fixed-case>aniacs at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2016 Task 1: Synthetic Paragram Embeddings for Semantic Textual Similarity - ChrisHokamp + ChrisHokamp PiyushArora 656–662 S16-1100 @@ -1085,7 +1085,7 @@ <fixed-case>GWU</fixed-case> <fixed-case>NLP</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2016 Shared Task 1: Matrix Factorization for Crosslingual <fixed-case>STS</fixed-case> HananAldarmaki - MonaDiab + MonaDiab 663–667 S16-1101 10.18653/v1/S16-1101 @@ -1094,7 +1094,7 @@ <fixed-case>CNRC</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2016 Task 1: Experiments in Crosslingual Semantic Textual Similarity Chi-kiuLo - CyrilGoutte + CyrilGoutte MichelSimard 668–673 S16-1102 @@ -1115,7 +1115,7 @@ <fixed-case>U</fixed-case>o<fixed-case>B</fixed-case>-<fixed-case>UK</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2016 Task 1: A Flexible and Extendable System for Semantic Text Similarity using Types, Surprise and Phrase Linking HarishTayyar Madabushi MarkBuhagiar - MarkLee + MarkLee 680–685 S16-1104 10.18653/v1/S16-1104 @@ -1124,7 +1124,7 @@ <fixed-case>BIT</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2016 Task 1: Sentence Similarity Based on Alignments and Vector with the Weight of Information Content HaoWu - HeyanHuang + HeyanHuang WenpengLu 686–690 S16-1105 @@ -1153,7 +1153,7 @@ SandipSarkar DipankarDas ParthaPakray - AlexanderGelbukh + AlexanderGelbukh 702–705 S16-1108 10.18653/v1/S16-1108 @@ -1161,8 +1161,8 @@ <fixed-case>A</fixed-case>mrita_<fixed-case>CEN</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2016 Task 1: Semantic Relation from Word Embeddings in Higher Dimension - BarathiGanesh HB - Anand KumarM + BarathiGanesh HB + Anand KumarM SomanKP 706–711 S16-1109 @@ -1171,7 +1171,7 @@ <fixed-case>NUIG</fixed-case>-<fixed-case>UNLP</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2016 Task 1: Soft Alignment and Deep Learning for Semantic Textual Similarity - John PhilipMcCrae + John PhilipMcCrae KartikAsooja NitishAggarwal PaulBuitelaar @@ -1183,8 +1183,8 @@ <fixed-case>NORMAS</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2016 Task 1: <fixed-case>SEMSIM</fixed-case>: A Multi-Feature Approach to Semantic Text Similarity KolawoleAdebayo - LuigiDi Caro - GuidoBoella + LuigiDi Caro + GuidoBoella 718–725 S16-1111 10.18653/v1/S16-1111 @@ -1193,9 +1193,9 @@ <fixed-case>LIPN</fixed-case>-<fixed-case>IIMAS</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2016 Task 1: Random Forest Regression Experiments on Align-and-Differentiate and Word Embeddings penalizing strategies Oscar WilliamLightgow Serrano - Ivan VladimirMeza Ruiz + Ivan VladimirMeza Ruiz Albert ManuelOrozco Camacho - JorgeGarcia Flores + JorgeGarcia Flores DavideBuscaldi 726–731 S16-1112 @@ -1244,7 +1244,7 @@ <fixed-case>RTM</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2016 Task 1: Predicting Semantic Similarity with Referential Translation Machines and Related Statistics - ErgunBiçici + ErgunBiçici 758–764 S16-1117 10.18653/v1/S16-1117 @@ -1255,7 +1255,7 @@ <fixed-case>D</fixed-case>al<fixed-case>GTM</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2016 Task 1: Importance-Aware Compositional Approach to Short Text Similarity JieMei - AminulIslam + AminulIslam EvangelosMilios 765–770 S16-1118 @@ -1265,8 +1265,8 @@ i<fixed-case>UBC</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2016 Task 2: <fixed-case>RNN</fixed-case>s and <fixed-case>LSTM</fixed-case>s for interpretable <fixed-case>STS</fixed-case> IñigoLopez-Gazpio - EnekoAgirre - MontseMaritxalar + EnekoAgirre + MontseMaritxalar 771–776 S16-1119 10.18653/v1/S16-1119 @@ -1275,8 +1275,8 @@ Rev at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2016 Task 2: Aligning Chunks by Lexical, Part of Speech and Semantic Equivalence PingTan - KarinVerspoor - TimothyMiller + KarinVerspoor + TimothyMiller 777–782 S16-1120 10.18653/v1/S16-1120 @@ -1286,7 +1286,7 @@ <fixed-case>FBK</fixed-case>-<fixed-case>HLT</fixed-case>-<fixed-case>NLP</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2016 Task 2: A Multitask, Deep Learning Approach for Interpretable Semantic Textual Similarity SimoneMagnolini AnnaFeltracco - BernardoMagnini + BernardoMagnini 783–789 S16-1121 10.18653/v1/S16-1121 @@ -1303,7 +1303,7 @@ <fixed-case>VENSESEVAL</fixed-case> at <fixed-case>S</fixed-case>emeval-2016 Task 2 i<fixed-case>STS</fixed-case> - with a full-fledged rule-based approach - RodolfoDelmonte + RodolfoDelmonte 796–802 S16-1123 10.18653/v1/S16-1123 @@ -1324,7 +1324,7 @@ <fixed-case>DTS</fixed-case>im at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2016 Task 2: Interpreting Similarity of Texts Based on Automated Chunking, Chunk Alignment and Semantic Relation Prediction RajendraBanjade NabinMaharjan - Nobal BikramNiraula + Nobal BikramNiraula VasileRus 809–813 S16-1125 @@ -1348,8 +1348,8 @@ AmrGomaa AshrafMahgoub HanyAhmed - MohsenRashwan - HazemRaafat + MohsenRashwan + HazemRaafat EslamKamal AhmadAl Sallab 822–827 @@ -1365,7 +1365,7 @@ YuZhang TaoLei KfirBar - ScottCyphers + ScottCyphers JimGlass 828–835 S16-1128 @@ -1404,12 +1404,12 @@ <fixed-case>U</fixed-case>ni<fixed-case>M</fixed-case>elb at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2016 Task 3: Identifying Similar Questions by combining a <fixed-case>CNN</fixed-case> with String Similarity Measures - TimothyBaldwin - HuizhiLiang + TimothyBaldwin + HuizhiLiang BaharSalehi DorisHoogeveen YitongLi - LongDuong + LongDuong 851–856 S16-1131 10.18653/v1/S16-1131 @@ -1446,7 +1446,7 @@ <fixed-case>ECNU</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2016 Task 3: Exploring Traditional Method and Deep Learning Method for Question Retrieval and Answer Ranking in Community Question Answering GuoshunWu - ManLan + ManLan 872–878 S16-1135 10.18653/v1/S16-1135 @@ -1463,9 +1463,9 @@ <fixed-case>MTE</fixed-case>-<fixed-case>NN</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2016 Task 3: Can Machine Translation Evaluation Help Community Question Answering? - FranciscoGuzmán + FranciscoGuzmán PreslavNakov - LluísMàrquez + LluísMàrquez 887–895 S16-1137 10.18653/v1/S16-1137 @@ -1476,7 +1476,7 @@ AlbertoBarrón-Cedeño DanieleBonadiman GiovanniDa San Martino - ShafiqJoty + ShafiqJoty AlessandroMoschitti FahadAl Obaidli SalvatoreRomeo @@ -1497,7 +1497,7 @@ <fixed-case>UFRGS</fixed-case>&<fixed-case>LIF</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2016 Task 10: Rule-Based <fixed-case>MWE</fixed-case> Identification and Predominant-Supersense Tagging - SilvioCordeiro + SilvioCordeiro CarlosRamisch AlineVillavicencio 910–917 @@ -1509,7 +1509,7 @@ <fixed-case>WHUN</fixed-case>lp at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2016 Task <fixed-case>D</fixed-case>i<fixed-case>MSUM</fixed-case>: A Pilot Study in Detecting Minimal Semantic Units and their Meanings using Supervised Models XinTang FeiLi - DonghongJi + DonghongJi 918–924 S16-1141 10.18653/v1/S16-1141 @@ -1527,7 +1527,7 @@ <fixed-case>UW</fixed-case>-<fixed-case>CSE</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2016 Task 10: Detecting Multiword Expressions and Supersenses using Double-Chained Conditional Random Fields Mohammad JavadHosseini - Noah A.Smith + Noah A.Smith Su-InLee 931–936 S16-1143 @@ -1549,7 +1549,7 @@ AndreasScherbakov EkaterinaVylomova FeiLiu - TimothyBaldwin + TimothyBaldwin 946–952 S16-1145 10.18653/v1/S16-1145 @@ -1565,7 +1565,7 @@ <fixed-case>USAAR</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2016 Task 11: Complex Word Identification with Sense Entropy and Sentence Perplexity - José ManuelMartínez Martínez + José ManuelMartínez Martínez LilingTan 958–962 S16-1147 @@ -1582,7 +1582,7 @@ <fixed-case>SV</fixed-case>000gg at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2016 Task 11: Heavy Gauge Complex Word Identification with System Voting - GustavoPaetzold + GustavoPaetzold LuciaSpecia 969–974 S16-1149 @@ -1592,8 +1592,8 @@ <fixed-case>M</fixed-case>elbourne at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val 2016 Task 11: Classifying Type-level Word Complexity using Random Forests with Corpus and Word List Features JulianBrooke - AlexandraUitdenbogerd - TimothyBaldwin + AlexandraUitdenbogerd + TimothyBaldwin 975–981 S16-1150 10.18653/v1/S16-1150 @@ -1613,7 +1613,7 @@ NiloyMukherjee Braja GopalPatra DipankarDas - SivajiBandyopadhyay + SivajiBandyopadhyay 986–990 S16-1152 10.18653/v1/S16-1152 @@ -1621,7 +1621,7 @@ <fixed-case>MAZA</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2016 Task 11: Detecting Lexical Complexity Using a Decision Stump Meta-Classifier - ShervinMalmasi + ShervinMalmasi MarcosZampieri 991–995 S16-1153 @@ -1630,7 +1630,7 @@ <fixed-case>LTG</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2016 Task 11: Complex Word Identification with Classifier Ensembles - ShervinMalmasi + ShervinMalmasi MarkDras MarcosZampieri 996–1000 @@ -1642,7 +1642,7 @@ <fixed-case>M</fixed-case>ac<fixed-case>S</fixed-case>aar at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2016 Task 11: <fixed-case>Z</fixed-case>ipfian and Character Features for <fixed-case>C</fixed-case>omplex<fixed-case>W</fixed-case>ord Identification MarcosZampieri LilingTan - Josefvan Genabith + Josefvan Genabith 1001–1005 S16-1155 10.18653/v1/S16-1155 @@ -1650,7 +1650,7 @@ Garuda & <fixed-case>B</fixed-case>hasha at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2016 Task 11: Complex Word Identification Using Aggregated Learning Models - PrafullaChoubey + PrafullaChoubey ShubhamPateria 1006–1010 S16-1156 @@ -1660,8 +1660,8 @@ <fixed-case>TALN</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2016 Task 11: Modelling Complex Words by Contextual, Lexical and Semantic Features FrancescoRonzano - AhmedAbura’ed - LuisEspinosa-Anke + AhmedAbura’ed + LuisEspinosa-Anke HoracioSaggion 1011–1016 S16-1157 @@ -1680,7 +1680,7 @@ <fixed-case>A</fixed-case>mrita<fixed-case>CEN</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2016 Task 11: Complex Word Identification using Word Embedding SanjayS.P - AnandKumar M + AnandKumar M SomanK P 1022–1027 S16-1159 @@ -1691,7 +1691,7 @@ <fixed-case>C</fixed-case>oastal<fixed-case>CPH</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2016 Task 11: The importance of designing your Neural Networks right JoachimBingel NatalieSchluter - HéctorMartínez Alonso + HéctorMartínez Alonso 1028–1033 S16-1160 10.18653/v1/S16-1160 @@ -1735,8 +1735,8 @@ StevenBethard GuerganaSavova Wei-TeChen - LeonDerczynski - JamesPustejovsky + LeonDerczynski + JamesPustejovsky MarcVerhagen 1052–1062 S16-1165 @@ -1809,7 +1809,7 @@ SimoneFilice DaniloCroce AlessandroMoschitti - RobertoBasili + RobertoBasili 1116–1123 S16-1172 10.18653/v1/S16-1172 @@ -1817,7 +1817,7 @@ <fixed-case>S</fixed-case>wiss<fixed-case>C</fixed-case>heese at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2016 Task 4: Sentiment Classification Using an Ensemble of Convolutional Neural Networks with Distant Supervision - JanDeriu + JanDeriu MauriceGonzenbach FatihUzdilli AurelienLucchi @@ -1830,11 +1830,11 @@ <fixed-case>IIT</fixed-case>-<fixed-case>TUDA</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2016 Task 5: Beyond Sentiment Lexicon: Combining Domain Dependency and Distributional Semantics Features for Aspect Based Sentiment Analysis - AyushKumar + AyushKumar SarahKohail AmitKumar AsifEkbal - ChrisBiemann + ChrisBiemann 1129–1135 S16-1174 10.18653/v1/S16-1174 @@ -1844,7 +1844,7 @@ <fixed-case>LIMSI</fixed-case>-<fixed-case>COT</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2016 Task 12: Temporal relation identification using a pipeline of classifiers JulienTourille OlivierFerret - AurélieNévéol + AurélieNévéol XavierTannier 1136–1142 S16-1175 @@ -1853,8 +1853,8 @@ <fixed-case>RIGA</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2016 Task 8: Impact of <fixed-case>S</fixed-case>match Extensions and Character-Level Neural Translation on <fixed-case>AMR</fixed-case> Parsing Accuracy - GuntisBarzdins - DidzisGosko + GuntisBarzdins + DidzisGosko 1143–1147 S16-1176 10.18653/v1/S16-1176 @@ -1902,7 +1902,7 @@ <fixed-case>CAMR</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2016 Task 8: An Extended Transition-based <fixed-case>AMR</fixed-case> Parser ChuanWang - SameerPradhan + SameerPradhan XiaomanPan HengJi NianwenXue @@ -1934,7 +1934,7 @@ <fixed-case>CLIP</fixed-case>@<fixed-case>UMD</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2016 Task 8: Parser for <fixed-case>A</fixed-case>bstract <fixed-case>M</fixed-case>eaning <fixed-case>R</fixed-case>epresentation using Learning to Search SudhaRao YogarshiVyas - HalDaumé III + HalDaumé III PhilipResnik 1190–1196 S16-1184 @@ -1944,7 +1944,7 @@ <fixed-case>CU</fixed-case>-<fixed-case>NLP</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2016 Task 8: <fixed-case>AMR</fixed-case> Parsing using <fixed-case>LSTM</fixed-case>-based Recurrent Neural Networks WilliamFoland - James H.Martin + James H.Martin 1197–1201 S16-1185 10.18653/v1/S16-1185 @@ -1953,9 +1953,9 @@ <fixed-case>CMU</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2016 Task 8: Graph-based <fixed-case>AMR</fixed-case> Parsing with Infinite Ramp Loss JeffreyFlanigan - ChrisDyer - Noah A.Smith - JaimeCarbonell + ChrisDyer + Noah A.Smith + JaimeCarbonell 1202–1206 S16-1186 10.18653/v1/S16-1186 @@ -1994,7 +1994,7 @@ <fixed-case>LIMSI</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2016 Task 12: machine-learning and temporal information to identify clinical events and time expressions CyrilGrouin - VéroniqueMoriceau + VéroniqueMoriceau 1225–1230 S16-1190 10.18653/v1/S16-1190 @@ -2041,7 +2041,7 @@ <fixed-case>U</fixed-case>tah<fixed-case>BMI</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2016 Task 12: Extracting Temporal Information from Clinical Text AbdulrahmanKhalifa SumithraVelupillai - StephaneMeystre + StephaneMeystre 1256–1262 S16-1195 10.18653/v1/S16-1195 @@ -2050,12 +2050,12 @@ <fixed-case>ULISBOA</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2016 Task 12: Extraction of temporal expressions, clinical events and relations using <fixed-case>IBE</fixed-case>nt MarciaBarros - AndreLamurias + AndreLamurias GonçaloFigueiro MartaAntunes JoanaTeixeira AlexandrePinheiro - Francisco M.Couto + Francisco M.Couto 1263–1267 S16-1196 10.18653/v1/S16-1196 @@ -2081,7 +2081,7 @@ <fixed-case>KUL</fixed-case>euven-<fixed-case>LIIR</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val 2016 Task 12: Detecting Narrative Containment in Clinical Records ArtuurLeeuwenberg - Marie-FrancineMoens + Marie-FrancineMoens 1280–1285 S16-1199 10.18653/v1/S16-1199 @@ -2093,7 +2093,7 @@ DamienDe Meyere PatrickWatrin AndréBittar - CédrickFairon + CédrickFairon 1286–1291 S16-1200 10.18653/v1/S16-1200 @@ -2105,7 +2105,7 @@ HuaXu JingqiWang YaoyunZhang - SungrimMoon + SungrimMoon JunXu YonghuiWu 1292–1297 @@ -2125,7 +2125,7 @@ <fixed-case>USAAR</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2016 Task 13: Hyponym Endocentricity LilingTan FrancisBond - Josefvan Genabith + Josefvan Genabith 1303–1309 S16-1203 10.18653/v1/S16-1203 @@ -2143,7 +2143,7 @@ <fixed-case>QASSIT</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2016 Task 13: On the integration of Semantic Vectors in Pretopological Spaces for Lexical Taxonomy Acquisition GuillaumeCleuziou - Jose G.Moreno + Jose G.Moreno 1315–1319 S16-1205 10.18653/v1/S16-1205 @@ -2156,9 +2156,9 @@ EugenRuppert SteffenRemus HubertNaets - CédrickFairon - Simone PaoloPonzetto - ChrisBiemann + CédrickFairon + Simone PaoloPonzetto + ChrisBiemann 1320–1327 S16-1206 10.18653/v1/S16-1206 @@ -2174,7 +2174,7 @@ <fixed-case>TALN</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2016 Task 14: Semantic Taxonomy Enrichment Via Sense-Based Embeddings - LuisEspinosa-Anke + LuisEspinosa-Anke FrancescoRonzano HoracioSaggion 1332–1336 @@ -2184,8 +2184,8 @@ <fixed-case>MS</fixed-case>ejr<fixed-case>K</fixed-case>u at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2016 Task 14: Taxonomy Enrichment by Evidence Ranking - MichaelSchlichtkrull - HéctorMartínez Alonso + MichaelSchlichtkrull + HéctorMartínez Alonso 1337–1341 S16-1209 10.18653/v1/S16-1209 @@ -2193,7 +2193,7 @@ Deftor at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2016 Task 14: Taxonomy enrichment using definition vectors - HristoTanev + HristoTanev AgataRotondi 1342–1345 S16-1210 @@ -2211,7 +2211,7 @@ <fixed-case>VCU</fixed-case> at <fixed-case>S</fixed-case>emeval-2016 Task 14: Evaluating definitional-based similarity measure for semantic taxonomy enrichment - BridgetMcInnes + BridgetMcInnes 1351–1355 S16-1212 10.18653/v1/S16-1212 @@ -2223,7 +2223,7 @@ Proceedings of the Fifth Joint Conference on Lexical and Computational Semantics S16-2 ClaireGardent - RaffaellaBernardi + RaffaellaBernardi IvanTitov 10.18653/v1/S16-2 Association for Computational Linguistics @@ -2257,9 +2257,9 @@ Metaphor as a Medium for Emotion: An Empirical Study - SaifMohammad + SaifMohammad EkaterinaShutova - PeterTurney + PeterTurney 23–33 S16-2003 10.18653/v1/S16-2003 @@ -2268,7 +2268,7 @@ High-Fidelity Lexical Axiom Construction from Verb Glosses GeneKim - LenhartSchubert + LenhartSchubert 34–44 S16-2004 10.18653/v1/S16-2004 @@ -2324,9 +2324,9 @@ Improving Zero-Shot-Learning for <fixed-case>G</fixed-case>erman Particle Verbs by using Training-Space Restrictions and Local Scaling MaximilianKöper - SabineSchulte im Walde + SabineSchulte im Walde MaxKisselew - SebastianPadó + SebastianPadó 91–96 S16-2010 10.18653/v1/S16-2010 @@ -2345,9 +2345,9 @@ Leveraging <fixed-case>V</fixed-case>erb<fixed-case>N</fixed-case>et to build Corpus-Specific Verb Clusters - DanielPeterson + DanielPeterson JordanBoyd-Graber - MarthaPalmer + MarthaPalmer DaisukeKawahara 102–107 S16-2012 @@ -2376,7 +2376,7 @@ Linguistic Style Accommodation in Disagreements Elisevan der Pol SharonGieske - RaquelFernández + RaquelFernández 120–124 S16-2015 10.18653/v1/S16-2015 @@ -2386,7 +2386,7 @@ Unsupervised Text Segmentation Using Semantic Relatedness Graphs GoranGlavaš FedericoNanni - Simone PaoloPonzetto + Simone PaoloPonzetto 125–130 S16-2016 10.18653/v1/S16-2016 @@ -2398,7 +2398,7 @@ GillesJacobs VincentVandeghinste InekeSchuurman - FrankVan Eynde + FrankVan Eynde 131–135 S16-2017 10.18653/v1/S16-2017 @@ -2426,7 +2426,7 @@ The Role of Modifier and Head Properties in Predicting the Compositionality of <fixed-case>E</fixed-case>nglish and <fixed-case>G</fixed-case>erman Noun-Noun Compounds: A Vector-Space Perspective - SabineSchulte im Walde + SabineSchulte im Walde AnnaHätty StefanBott 148–158 @@ -2437,7 +2437,7 @@ Detecting Stance in Tweets And Analyzing its Interaction with Sentiment ParinazSobhani - SaifMohammad + SaifMohammad SvetlanaKiritchenko 159–169 S16-2021 @@ -2457,8 +2457,8 @@ You and me... in a vector space: modelling individual speakers with distributional semantics - AurélieHerbelot - BehrangQasemiZadeh + AurélieHerbelot + BehrangQasemiZadeh 179–188 S16-2023 10.18653/v1/S16-2023 @@ -2466,7 +2466,7 @@ Random Positive-Only Projections: <fixed-case>PPMI</fixed-case>-Enabled Incremental Semantic Space Construction - BehrangQasemiZadeh + BehrangQasemiZadeh LauraKallmeyer 189–198 S16-2024 @@ -2475,8 +2475,8 @@ A Compositional-Distributional Semantic Model for Searching Complex Entity Categories - Juliano EfsonSales - AndréFreitas + Juliano EfsonSales + AndréFreitas BrianDavis SiegfriedHandschuh 199–208 @@ -2488,7 +2488,7 @@ Approximating Givenness in Content Assessment through Distributional Semantics RamonZiai KordulaDe Kuthy - DetmarMeurers + DetmarMeurers 209–218 S16-2026 10.18653/v1/S16-2026 diff --git a/data/xml/S17.xml b/data/xml/S17.xml index b9ab9e2c06..0e3b6d0bb5 100644 --- a/data/xml/S17.xml +++ b/data/xml/S17.xml @@ -4,9 +4,9 @@ Proceedings of the 6th Joint Conference on Lexical and Computational Semantics (*SEM 2017) S17-1 - NancyIde - AurélieHerbelot - LluísMàrquez + NancyIde + AurélieHerbelot + LluísMàrquez 10.18653/v1/S17-1 Association for Computational Linguistics
Vancouver, Canada
@@ -20,9 +20,9 @@ What Analogies Reveal about Word Vectors and their Compositionality - GregoryFinley + GregoryFinley StephanieFarmer - SergueiPakhomov + SergueiPakhomov 1–11 S17-1001 10.18653/v1/S17-1001 @@ -75,7 +75,7 @@ Deep Learning Models For Multiword Expression Identification WaseemGharbieh - VirendrakumarBhavsar + VirendrakumarBhavsar PaulCook 54–64 S17-1006 @@ -85,7 +85,7 @@ Emotion Intensities in Tweets - SaifMohammad + SaifMohammad FelipeBravo-Marquez 65–77 S17-1007 @@ -143,8 +143,8 @@ Distributed Prediction of Relations for Entities: The Easy, The Difficult, and The Impossible AbhijeetGupta - GemmaBoleda - SebastianPadó + GemmaBoleda + SebastianPadó 104–109 S17-1012 10.18653/v1/S17-1012 @@ -156,7 +156,7 @@ AngelMaredia KaraSchechtman Sarah ItaLevitan - JuliaHirschberg + JuliaHirschberg 110–114 S17-1013 10.18653/v1/S17-1013 @@ -167,7 +167,7 @@ Does Free Word Order Hurt? Assessing the Practical Lexical Function Model for <fixed-case>C</fixed-case>roatian ZoranMedić JanŠnajder - SebastianPadó + SebastianPadó 115–120 S17-1014 S17-1014.Poster.pdf @@ -298,7 +298,7 @@ Embedded Semantic Lexicon Induction with Joint Global and Local Optimization Sujay KumarJauhar - EduardHovy + EduardHovy 209–219 S17-1025 10.18653/v1/S17-1025 @@ -321,7 +321,7 @@ Classifying Semantic Clause Types: Modeling Context and Genre Characteristics with Recurrent Neural Networks and Attention MariaBecker MichaelStaniek - ViviNastase + ViviNastase AlexisPalmer AnetteFrank 230–240 @@ -332,8 +332,8 @@ Predictive Linguistic Features of Schizophrenia - EfsunSarioglu Kayi - MonaDiab + EfsunSarioglu Kayi + MonaDiab LucaPauselli MichaelCompton GlenCoppersmith @@ -346,7 +346,7 @@ Learning to Solve Geometry Problems from Natural Language Demonstrations in Textbooks MrinmayaSachan - EricXing + EricXing 251–261 S17-1029 10.18653/v1/S17-1029 @@ -355,11 +355,11 @@ Ways of Asking and Replying in Duplicate Question Detection - JoãoAntónio Rodrigues + JoãoAntónio Rodrigues ChakavehSaedi VladislavMaraev - JoãoSilva - AntónioBranco + JoãoSilva + AntónioBranco 262–270 S17-1030 10.18653/v1/S17-1030 @@ -374,7 +374,7 @@ StevenBethard MarineCarpuat MariannaApidianaki - Saif M.Mohammad + Saif M.Mohammad DanielCer DavidJurgens 10.18653/v1/S17-2 @@ -391,8 +391,8 @@ <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2017 Task 1: Semantic Textual Similarity Multilingual and Crosslingual Focused Evaluation DanielCer - MonaDiab - EnekoAgirre + MonaDiab + EnekoAgirre IñigoLopez-Gazpio LuciaSpecia 1–14 @@ -409,7 +409,7 @@ <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2017 Task 2: Multilingual and Cross-lingual Semantic Word Similarity - JoseCamacho-Collados + JoseCamacho-Collados Mohammad TaherPilehvar NigelCollier RobertoNavigli @@ -421,13 +421,13 @@ <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2017 Task 3: Community Question Answering - PreslavNakov + PreslavNakov DorisHoogeveen - LluísMàrquez + LluísMàrquez AlessandroMoschitti HamdyMubarak - TimothyBaldwin - KarinVerspoor + TimothyBaldwin + KarinVerspoor 27–48 S17-2003 10.18653/v1/S17-2003 @@ -448,7 +448,7 @@ <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2017 Task 7: Detection and Interpretation of <fixed-case>E</fixed-case>nglish Puns TristanMiller - ChristianHempelmann + ChristianHempelmann IrynaGurevych 58–68 S17-2005 @@ -458,8 +458,8 @@ <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2017 Task 8: <fixed-case>R</fixed-case>umour<fixed-case>E</fixed-case>val: Determining rumour veracity and support for rumours - LeonDerczynski - KalinaBontcheva + LeonDerczynski + KalinaBontcheva MariaLiakata RobProcter GeraldineWong Sak Hoi @@ -473,7 +473,7 @@ <fixed-case>BIT</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2017 Task 1: Using Semantic Information Space to Evaluate Semantic Textual Similarity HaoWu - HeyanHuang + HeyanHuang PingJian YuhangGuo ChaoSu @@ -498,12 +498,12 @@ <fixed-case>IIT</fixed-case>-<fixed-case>UHH</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2017 Task 3: Exploring Multiple Features for Community Question Answering and Implicit Dialogue Identification TitasNandi - ChrisBiemann + ChrisBiemann Seid MuhieYimam - DeepakGupta + DeepakGupta SarahKohail AsifEkbal - PushpakBhattacharyya + PushpakBhattacharyya 90–97 S17-2009 10.18653/v1/S17-2009 @@ -536,7 +536,7 @@ <fixed-case>C</fixed-case>ompi<fixed-case>LIG</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2017 Task 1: Cross-Language Plagiarism Detection Methods for Semantic Textual Similarity JérémyFerrero - LaurentBesacier + LaurentBesacier DidierSchwab FrédéricAgnès 109–114 @@ -574,7 +574,7 @@ <fixed-case>FCICU</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2017 Task 1: Sense-Based Language Independent Semantic Textual Similarity Approach BasmaHassan - SamirAbdelRahman + SamirAbdelRahman ReemBahgat IbrahimFarag 125–129 @@ -651,9 +651,9 @@ <fixed-case>ITNLP</fixed-case>-<fixed-case>A</fixed-case>i<fixed-case>KF</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2017 Task 1: Rich Features Based <fixed-case>SVR</fixed-case> for Semantic Textual Similarity Computing WenjieLiu - ChengjieSun + ChengjieSun LeiLin - BingquanLiu + BingquanLiu 159–163 S17-2022 10.18653/v1/S17-2022 @@ -684,7 +684,7 @@ <fixed-case>STS</fixed-case>-<fixed-case>UHH</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2017 Task 1: Scoring Semantic Textual Similarity Using Supervised and Unsupervised Ensemble SarahKohail Amr RekabySalama - ChrisBiemann + ChrisBiemann 175–179 S17-2025 10.18653/v1/S17-2025 @@ -703,8 +703,8 @@ <fixed-case>MITRE</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2017 Task 1: Simple Semantic Similarity - JohnHenderson - ElizabethMerkhofer + JohnHenderson + ElizabethMerkhofer LauraStrickhart GuidoZarrella 185–190 @@ -715,9 +715,9 @@ <fixed-case>ECNU</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2017 Task 1: Leverage Kernel-based Traditional <fixed-case>NLP</fixed-case> features and Neural Networks to Build a Universal Model for Multilingual and Cross-lingual Semantic Textual Similarity - JunfengTian + JunfengTian ZhihengZhou - ManLan + ManLan YuanbinWu 191–197 S17-2028 @@ -731,9 +731,9 @@ MahakGoindani ChangLi DiJin - Kristen MarieJohnson + Kristen MarieJohnson XiaoZhang - Maria LeonorPacheco + Maria LeonorPacheco DanGoldwasser 198–202 S17-2029 @@ -743,7 +743,7 @@ <fixed-case>RTM</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2017 Task 1: Referential Translation Machines for Predicting Semantic Similarity - ErgunBiçici + ErgunBiçici 203–207 S17-2030 10.18653/v1/S17-2030 @@ -753,7 +753,7 @@ <fixed-case>LIPN</fixed-case>-<fixed-case>IIMAS</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2017 Task 1: Subword Embeddings, Attention Recurrent Neural Networks and Cross Word Alignment for Semantic Textual Similarity IgnacioArroyo-Fernández - Ivan VladimirMeza Ruiz + Ivan VladimirMeza Ruiz 208–212 S17-2031 10.18653/v1/S17-2031 @@ -764,7 +764,7 @@ <fixed-case>L</fixed-case>2<fixed-case>F</fixed-case>/<fixed-case>INESC</fixed-case>-<fixed-case>ID</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2017 Tasks 1 and 2: Lexical and semantic features in word and textual similarity PedroFialho HugoPatinho Rodrigues - LuísaCoheur + LuísaCoheur PauloQuaresma 213–219 S17-2032 @@ -810,7 +810,7 @@ YutengZhang PingJian ShuminShi - HeyanHuang + HeyanHuang 235–238 S17-2036 10.18653/v1/S17-2036 @@ -842,7 +842,7 @@ <fixed-case>HHU</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2017 Task 2: Fast Hash-Based Embeddings for Semantic Word Similarity Assessment - BehrangQasemiZadeh + BehrangQasemiZadeh LauraKallmeyer 250–255 S17-2039 @@ -878,11 +878,11 @@ Răzvan-GabrielRotari IonuțHulub ȘtefanOprea - MihaelaPlămadă-Onofrei - Alina BeatriceLorenţ + MihaelaPlămadă-Onofrei + Alina BeatriceLorenţ RalucaPreisler AdrianIftene - DianaTrandabăț + DianaTrandabăț 267–270 S17-2042 10.18653/v1/S17-2042 @@ -904,9 +904,9 @@ <fixed-case>UPC</fixed-case>-<fixed-case>USMBA</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2017 Task 3: Combining multiple approaches for <fixed-case>CQA</fixed-case> for <fixed-case>A</fixed-case>rabic YassineEl Adlouni ImaneLahbari - HoracioRodríguez + HoracioRodríguez MohammedMeknassi - Said OuatikEl Alaoui + Said OuatikEl Alaoui NoureddineEnnahnahi 275–279 S17-2044 @@ -930,7 +930,7 @@ <fixed-case>M</fixed-case>o<fixed-case>RS</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2017 Task 3: Easy to use <fixed-case>SVM</fixed-case> in Ranking Tasks Miguel J.Rodrigues - Francisco M.Couto + Francisco M.Couto 287–291 S17-2046 10.18653/v1/S17-2046 @@ -986,7 +986,7 @@ <fixed-case>S</fixed-case>im<fixed-case>B</fixed-case>ow at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2017 Task 3: Soft-Cosine Semantic Similarity between Questions for Community Question Answering DelphineCharlet - GéraldineDamnati + GéraldineDamnati 315–319 S17-2051 10.18653/v1/S17-2051 @@ -1020,7 +1020,7 @@ <fixed-case>S</fixed-case>wiss<fixed-case>A</fixed-case>lps at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2017 Task 3: Attention-based Convolutional Neural Network for Community Question Answering - Jan MilanDeriu + Jan MilanDeriu MarkCieliebak 334–338 S17-2054 @@ -1033,7 +1033,7 @@ FilipŠaina ToniKukurin LukrecijaPuljić - Vanja MladenKaran + Vanja MladenKaran JanŠnajder 339–343 S17-2055 @@ -1044,7 +1044,7 @@ <fixed-case>GW</fixed-case>_<fixed-case>QA</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2017 Task 3: Question Answer Re-ranking on <fixed-case>A</fixed-case>rabic Fora NadaAlmarwani - MonaDiab + MonaDiab 344–348 S17-2056 10.18653/v1/S17-2056 @@ -1091,7 +1091,7 @@ <fixed-case>ECNU</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2017 Task 3: Using Traditional and Deep Learning Methods to Address Community Question Answering Task GuoshunWu YixuanSheng - ManLan + ManLan YuanbinWu 365–369 S17-2060 @@ -1177,14 +1177,14 @@ #<fixed-case>W</fixed-case>ar<fixed-case>T</fixed-case>eam at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2017 Task 6: Using Neural Networks for Discovering Humorous Tweets - Iuliana AlexandraFleșcan-Lovin-Arseni - Ramona AndreeaTurcu + Iuliana AlexandraFleșcan-Lovin-Arseni + Ramona AndreeaTurcu CristinaSîrbu LarisaAlexa Sandra MariaAmarandei NichitaHerciu ConstantinScutaru - DianaTrandabăț + DianaTrandabăț AdrianIftene 407–410 S17-2068 @@ -1253,7 +1253,7 @@ <fixed-case>EL</fixed-case>i<fixed-case>RF</fixed-case>-<fixed-case>UPV</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2017 Task 7: Pun Detection and Interpretation - Lluís-F.Hurtado + Lluís-F.Hurtado EncarnaSegarra FerranPla PascualCarrasco @@ -1286,7 +1286,7 @@ <fixed-case>ECNU</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2017 Task 7: Using Supervised and Unsupervised Methods to Detect and Locate <fixed-case>E</fixed-case>nglish Puns YuhuanXiu - ManLan + ManLan YuanbinWu 453–456 S17-2078 @@ -1328,7 +1328,7 @@ <fixed-case>N</fixed-case>ile<fixed-case>TMRG</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2017 Task 8: Determining Rumour and Veracity Support for Rumours on <fixed-case>T</fixed-case>witter. OmarEnayet - Samhaa R.El-Beltagy + Samhaa R.El-Beltagy 470–474 S17-2082 10.18653/v1/S17-2082 @@ -1360,9 +1360,9 @@ <fixed-case>DFKI</fixed-case>-<fixed-case>DKT</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2017 Task 8: Rumour Detection and Classification using Cascading Heuristics - AnkitSrivastava + AnkitSrivastava GeorgRehm - JulianMoreno Schneider + JulianMoreno Schneider 486–490 S17-2085 10.18653/v1/S17-2085 @@ -1372,7 +1372,7 @@ <fixed-case>ECNU</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2017 Task 8: Rumour Evaluation Using Effective Features and Supervised Ensemble Models FeixiangWang - ManLan + ManLan YuanbinWu 491–496 S17-2086 @@ -1386,7 +1386,7 @@ SunnyNarayan Md ShadAkhtar AsifEkbal - PushpakBhattacharyya + PushpakBhattacharyya 497–501 S17-2087 10.18653/v1/S17-2087 @@ -1397,7 +1397,7 @@ <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2017 Task 4: Sentiment Analysis in <fixed-case>T</fixed-case>witter SaraRosenthal NouraFarra - PreslavNakov + PreslavNakov 502–518 S17-2088 10.18653/v1/S17-2088 @@ -1407,9 +1407,9 @@ <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2017 Task 5: Fine-Grained Sentiment Analysis on Financial Microblogs and News KeithCortis - AndréFreitas + AndréFreitas TobiasDaudert - ManuelaHuerlimann + ManuelaHuerlimann ManelZarrouk SiegfriedHandschuh BrianDavis @@ -1444,9 +1444,9 @@ <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2017 Task 11: End-User Development using Natural Language - JulianoSales + JulianoSales SiegfriedHandschuh - AndréFreitas + AndréFreitas 556–564 S17-2092 10.18653/v1/S17-2092 @@ -1457,8 +1457,8 @@ <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2017 Task 12: Clinical <fixed-case>T</fixed-case>emp<fixed-case>E</fixed-case>val StevenBethard GuerganaSavova - MarthaPalmer - JamesPustejovsky + MarthaPalmer + JamesPustejovsky 565–572 S17-2093 10.18653/v1/S17-2093 @@ -1499,7 +1499,7 @@ The <fixed-case>AI</fixed-case>2 system at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2017 Task 10 (<fixed-case>S</fixed-case>cience<fixed-case>IE</fixed-case>): semi-supervised end-to-end entity and relation extraction WaleedAmmar - Matthew E.Peters + Matthew E.Peters ChandraBhagavatula RussellPower 592–596 @@ -1513,7 +1513,7 @@ JulienTourille OlivierFerret XavierTannier - AurélieNévéol + AurélieNévéol 597–602 S17-2098 10.18653/v1/S17-2098 @@ -1531,8 +1531,8 @@ AhmadAl Sallab HazemHajj NizarHabash - KhaledShaban - WassimEl-Hajj + KhaledShaban + WassimEl-Hajj 603–610 S17-2099 10.18653/v1/S17-2099 @@ -1584,10 +1584,10 @@ <fixed-case>SINAI</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2017 Task 4: User based classification - Salud MaríaJiménez-Zafra + Salud MaríaJiménez-Zafra ArturoMontejo-Ráez - MaiteMartin - L. AlfonsoUreña-López + MaiteMartin + L. AlfonsoUreña-López 634–639 S17-2104 10.18653/v1/S17-2104 @@ -1597,7 +1597,7 @@ <fixed-case>HLP</fixed-case>@<fixed-case>UP</fixed-case>enn at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2017 Task 4<fixed-case>A</fixed-case>: A simple, self-optimizing text classification system combining dense and sparse vectors AbeedSarker - GracielaGonzalez + GracielaGonzalez 640–643 S17-2105 10.18653/v1/S17-2105 @@ -1616,7 +1616,7 @@ <fixed-case>S</fixed-case>enti<fixed-case>ME</fixed-case>++ at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2017 Task 4: Stacking State-of-the-Art Classifiers to Enhance Sentiment Classification - RaphaëlTroncy + RaphaëlTroncy EnricoPalumbo EfstratiosSygkounas GiuseppeRizzo @@ -1638,7 +1638,7 @@ <fixed-case>TWINA</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2017 Task 4: <fixed-case>T</fixed-case>witter Sentiment Analysis with Ensemble Gradient Boost Tree Classifier - Naveen KumarLaskari + Naveen KumarLaskari Suresh KumarSanampudi 659–663 S17-2109 @@ -1651,7 +1651,7 @@ HalaMulki HatemHaddad MouradGridach - IsmailBabaoglu + IsmailBabaoglu 664–669 S17-2110 10.18653/v1/S17-2110 @@ -1677,8 +1677,8 @@ EliasIosif NikolaosMalandrakis ElisavetPalogiannidi - HarisPapageorgiou - ShrikanthNarayanan + HarisPapageorgiou + ShrikanthNarayanan AlexandrosPotamianos 675–682 S17-2112 @@ -1741,7 +1741,7 @@ <fixed-case>SSN</fixed-case>_<fixed-case>MLRG</fixed-case>1 at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2017 Task 4: Sentiment Analysis in <fixed-case>T</fixed-case>witter Using Multi-Kernel <fixed-case>G</fixed-case>aussian Process Classifier Angel DeborahS S MiltonRajendram - T TMirnalinee + T TMirnalinee 709–712 S17-2118 10.18653/v1/S17-2118 @@ -1763,8 +1763,8 @@ <fixed-case>LSIS</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2017 Task 4: Using Adapted Sentiment Similarity Seed Words For <fixed-case>E</fixed-case>nglish and <fixed-case>A</fixed-case>rabic Tweet Polarity Classification AmalHtait - SébastienFournier - PatriceBellot + SébastienFournier + PatriceBellot 718–722 S17-2120 10.18653/v1/S17-2120 @@ -1775,7 +1775,7 @@ <fixed-case>EL</fixed-case>i<fixed-case>RF</fixed-case>-<fixed-case>UPV</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2017 Task 4: Sentiment Analysis using Deep Learning José-ÁngelGonzález FerranPla - Lluís-F.Hurtado + Lluís-F.Hurtado 723–727 S17-2121 10.18653/v1/S17-2121 @@ -1863,7 +1863,7 @@ <fixed-case>T</fixed-case>opic<fixed-case>T</fixed-case>hunder at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2017 Task 4: Sentiment Classification Using a Convolutional Neural Network with Distant Supervision SimonMüller TobiasHuonder - JanDeriu + JanDeriu MarkCieliebak 766–770 S17-2129 @@ -1875,7 +1875,7 @@ <fixed-case>INGEOTEC</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val 2017 Task 4: A <fixed-case>B</fixed-case>4<fixed-case>MSA</fixed-case> Ensemble based on Genetic Programming for <fixed-case>T</fixed-case>witter Sentiment Analysis SabinoMiranda-Jiménez MarioGraff - Eric SaditTellez + Eric SaditTellez DanielaMoctezuma 771–776 S17-2130 @@ -1886,8 +1886,8 @@ <fixed-case>BUSEM</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2017 Task 4<fixed-case>A</fixed-case> Sentiment Analysis with Word Embedding and Long Short Term Memory <fixed-case>RNN</fixed-case> Approaches DegerAyata - MuratSaraclar - ArzucanOzgur + MuratSaraclar + ArzucanOzgur 777–783 S17-2131 10.18653/v1/S17-2131 @@ -1909,7 +1909,7 @@ <fixed-case>N</fixed-case>ile<fixed-case>TMRG</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2017 Task 4: <fixed-case>A</fixed-case>rabic Sentiment Analysis - Samhaa R.El-Beltagy + Samhaa R.El-Beltagy MonaEl Kalamawy Abu BakrSoliman 790–795 @@ -1942,11 +1942,11 @@ <fixed-case>UCSC</fixed-case>-<fixed-case>NLP</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2017 Task 4: Sense n-grams for Sentiment Analysis in <fixed-case>T</fixed-case>witter - JoséAbreu + JoséAbreu IvánCastro ClaudiaMartínez SebastiánOliva - YoanGutiérrez + YoanGutiérrez 807–811 S17-2136 10.18653/v1/S17-2136 @@ -1958,7 +1958,7 @@ of average. Among 39 submissions to this task, we ranked 10th.
<fixed-case>ECNU</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2017 Task 4: Evaluating Effective Features on Machine Learning Methods for <fixed-case>T</fixed-case>witter Message Polarity Classification YunxiaoZhou - ManLan + ManLan YuanbinWu 812–816 S17-2137 @@ -1984,7 +1984,7 @@ of average. Among 39 submissions to this task, we ranked 10th.
<fixed-case>SSN</fixed-case>_<fixed-case>MLRG</fixed-case>1 at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2017 Task 5: Fine-Grained Sentiment Analysis Using Multiple Kernel <fixed-case>G</fixed-case>aussian Process Regression Model Angel DeborahS S MiltonRajendram - T TMirnalinee + T TMirnalinee 823–826 S17-2139 10.18653/v1/S17-2139 @@ -2127,7 +2127,7 @@ of average. Among 39 submissions to this task, we ranked 10th.
<fixed-case>ECNU</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2017 Task 5: An Ensemble of Regression Algorithms with Effective Features for Fine-Grained Sentiment Analysis in Financial Domain MengxiaoJiang - ManLan + ManLan YuanbinWu 888–893 S17-2152 @@ -2141,8 +2141,8 @@ of average. Among 39 submissions to this task, we ranked 10th.
AbhishekSethi Md ShadAkhtar AsifEkbal - ChrisBiemann - PushpakBhattacharyya + ChrisBiemann + PushpakBhattacharyya 894–898 S17-2153 10.18653/v1/S17-2153 @@ -2155,7 +2155,7 @@ of average. Among 39 submissions to this task, we ranked 10th.
ShobhitBhatnagar Md ShadAkhtar AsifEkbal - PushpakBhattacharyya + PushpakBhattacharyya 899–903 S17-2154 10.18653/v1/S17-2154 @@ -2187,7 +2187,7 @@ of average. Among 39 submissions to this task, we ranked 10th. <fixed-case>O</fixed-case>xford at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2017 Task 9: Neural <fixed-case>AMR</fixed-case> Parsing with Pointer-Augmented Attention JanBuys - PhilBlunsom + PhilBlunsom 914–919 S17-2157 10.18653/v1/S17-2157 @@ -2208,9 +2208,9 @@ of average. Among 39 submissions to this task, we ranked 10th. <fixed-case>RIGOTRIO</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2017 Task 9: Combining Machine Learning and Grammar Engineering for <fixed-case>AMR</fixed-case> Parsing and Generation - NormundsGruzitis - DidzisGosko - GuntisBarzdins + NormundsGruzitis + DidzisGosko + GuntisBarzdins 924–928 S17-2159 10.18653/v1/S17-2159 @@ -2240,10 +2240,10 @@ of average. Among 39 submissions to this task, we ranked 10th. <fixed-case>NTNU</fixed-case>-1@<fixed-case>S</fixed-case>cience<fixed-case>IE</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2017 Task 10: Identifying and Labelling Keyphrases with Conditional Random Fields ErwinMarsi - Utpal KumarSikdar - CristinaMarco + Utpal KumarSikdar + CristinaMarco BiswanathBarik - RuneSætre + RuneSætre 938–941 S17-2162 10.18653/v1/S17-2162 @@ -2265,7 +2265,7 @@ of average. Among 39 submissions to this task, we ranked 10th. <fixed-case>LABDA</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2017 Task 10: Extracting Keyphrases from Scientific Publications by combining the <fixed-case>BANNER</fixed-case> tool and the <fixed-case>UMLS</fixed-case> Semantic Network - IsabelSegura-Bedmar + IsabelSegura-Bedmar CristóbalColón-Ruiz PalomaMartínez 947–950 @@ -2278,7 +2278,7 @@ of average. Among 39 submissions to this task, we ranked 10th. The <fixed-case>NTNU</fixed-case> System at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2017 Task 10: Extracting Keyphrases and Relations from Scientific Publications Using Multiple Conditional Random Fields Lung-HaoLee Kuei-ChingLee - Yuen-HsienTseng + Yuen-HsienTseng 951–955 S17-2165 10.18653/v1/S17-2165 @@ -2321,7 +2321,7 @@ of average. Among 39 submissions to this task, we ranked 10th. <fixed-case>LABDA</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2017 Task 10: Relation Classification between keyphrases via Convolutional Neural Network VíctorSuárez-Paniagua - IsabelSegura-Bedmar + IsabelSegura-Bedmar PalomaMartínez 969–972 S17-2169 @@ -2432,11 +2432,11 @@ of average. Among 39 submissions to this task, we ranked 10th. <fixed-case>ULISBOA</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2017 Task 12: Extraction and classification of temporal expressions and events - AndreLamurias + AndreLamurias DianaSousa SofiaPereira - LukaClarke - Francisco M.Couto + LukaClarke + Francisco M.Couto 1019–1023 S17-2179 10.18653/v1/S17-2179 @@ -2457,7 +2457,7 @@ of average. Among 39 submissions to this task, we ranked 10th. <fixed-case>KUL</fixed-case>euven-<fixed-case>LIIR</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2017 Task 12: Cross-Domain Temporal Information Extraction from Clinical Records ArtuurLeeuwenberg - Marie-FrancineMoens + Marie-FrancineMoens 1030–1034 S17-2181 10.18653/v1/S17-2181 diff --git a/data/xml/S18.xml b/data/xml/S18.xml index 1b326f2ee9..af318d7271 100644 --- a/data/xml/S18.xml +++ b/data/xml/S18.xml @@ -5,7 +5,7 @@ Proceedings of the 12th International Workshop on Semantic Evaluation S18-1 MariannaApidianaki - Saif M.Mohammad + Saif M.Mohammad JonathanMay EkaterinaShutova StevenBethard @@ -48,9 +48,9 @@ <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val 2018 Task 2: Multilingual Emoji Prediction FrancescoBarbieri - JoseCamacho-Collados + JoseCamacho-Collados FrancescoRonzano - LuisEspinosa-Anke + LuisEspinosa-Anke MiguelBallesteros ValerioBasile VivianaPatti @@ -75,7 +75,7 @@ <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2018 Task 3: Irony Detection in <fixed-case>E</fixed-case>nglish Tweets CynthiaVan Hee ElsLefever - VéroniqueHoste + VéroniqueHoste 39–50 This paper presents the first shared task on irony detection: given a tweet, automatic natural language processing systems should determine whether the tweet is ironic (Task A) and which type of irony (if any) is expressed (Task B). The ironic tweets were collected using irony-related hashtags (i.e. #irony, #sarcasm, #not) and were subsequently manually annotated to minimise the amount of noise in the corpus. Prior to distributing the data, hashtags that were used to collect the tweets were removed from the corpus. For both tasks, a training corpus of 3,834 tweets was provided, as well as a test set containing 784 tweets. Our shared tasks received submissions from 43 teams for the binary classification Task A and from 31 teams for the multiclass Task B. The highest classification scores obtained for both subtasks are respectively F1= 0.71 and F1= 0.51 and demonstrate that fine-grained irony classification is much more challenging than binary irony detection. S18-1005 @@ -98,7 +98,7 @@ <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val 2018 Task 4: Character Identification on Multiparty Dialogues - Jinho D.Choi + Jinho D.Choi Henry Y.Chen 57–64 Character identification is a task of entity linking that finds the global entity of each personal mention in multiparty dialogue. For this task, the first two seasons of the popular TV show Friends are annotated, comprising a total of 448 dialogues, 15,709 mentions, and 401 entities. The personal mentions are detected from nominals referring to certain characters in the show, and the entities are collected from the list of all characters in those two seasons of the show. This task is challenging because it requires the identification of characters that are mentioned but may not be active during the conversation. Among 90+ participants, four of them submitted their system outputs and showed strengths in different aspects about the task. Thorough analyses of the distributed datasets, system outputs, and comparative studies are also provided. To facilitate the momentum, we create an open-source project for this task and publicly release a larger and cleaner dataset, hoping to support researchers for more enhanced modeling. @@ -110,9 +110,9 @@ <fixed-case>AMORE</fixed-case>-<fixed-case>UPF</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2018 Task 4: <fixed-case>B</fixed-case>i<fixed-case>LSTM</fixed-case> with Entity Library LauraAina CarinaSilberer - Ionut-TeodorSorodoc + Ionut-TeodorSorodoc MatthijsWestera - GemmaBoleda + GemmaBoleda 65–69 This paper describes our winning contribution to SemEval 2018 Task 4: Character Identification on Multiparty Dialogues. It is a simple, standard model with one key innovation, an entity library. Our results show that this innovation greatly facilitates the identification of infrequent characters. Because of the generic nature of our model, this finding is potentially relevant to any task that requires the effective learning from sparse or imbalanced data. S18-1008 @@ -147,7 +147,7 @@ DongfangXu AhmedElsayed StevenBethard - MarthaPalmer + MarthaPalmer 88–96 This paper presents the outcomes of the Parsing Time Normalization shared task held within SemEval-2018. The aim of the task is to parse time expressions into the compositional semantic graphs of the Semantically Compositional Annotation of Time Expressions (SCATE) schema, which allows the representation of a wider variety of time expressions than previous approaches. Two tracks were included, one to evaluate the parsing of individual components of the produced graphs, in a classic information extraction way, and another one to evaluate the quality of the time intervals resulting from the interpretation of those graphs. Though 40 participants registered for the task, only one team submitted output, achieving 0.55 F1 in Track 1 (parsing) and 0.70 F1 in Track 2 (intervals). S18-1011 @@ -159,7 +159,7 @@ AmyOlex LukeMaffey NicholasMorgan - BridgetMcInnes + BridgetMcInnes 97–101 Temporal information extraction is a challenging task. Here we describe Chrono, a hybrid rule-based and machine learning system that identifies temporal expressions in text and normalizes them into the SCATE schema. After minor parsing logic adjustments, Chrono has emerged as the top performing system for SemEval 2018 Task 6: Parsing Time Normalizations. S18-1012 @@ -201,8 +201,8 @@ <fixed-case>LT</fixed-case>3 at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2018 Task 1: A classifier chain to detect emotions in tweets LunaDe Bruyne - OrphéeDe Clercq - VéroniqueHoste + OrphéeDe Clercq + VéroniqueHoste 123–127 This paper presents an emotion classification system for English tweets, submitted for the SemEval shared task on Affect in Tweets, subtask 5: Detecting Emotions. The system combines lexicon, n-gram, style, syntactic and semantic features. For this multi-class multi-label problem, we created a classifier chain. This is an ensemble of eleven binary classifiers, one for each possible emotion category, where each model gets the predictions of the preceding models as additional features. The predicted labels are combined to get a multi-label representation of the predictions. Our system was ranked eleventh among thirty five participating teams, with a Jaccard accuracy of 52.0% and macro- and micro-average F1-scores of 49.3% and 64.0%, respectively. S18-1016 @@ -212,9 +212,9 @@ <fixed-case>SINAI</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2018 Task 1: Emotion Recognition in Tweets Flor MiriamPlaza-del-Arco - Salud MaríaJiménez-Zafra - MaiteMartin - L. AlfonsoUreña-López + Salud MaríaJiménez-Zafra + MaiteMartin + L. AlfonsoUreña-López 128–132 Emotion classification is a new task that combines several disciplines including Artificial Intelligence and Psychology, although Natural Language Processing is perhaps the most challenging area. In this paper, we describe our participation in SemEval-2018 Task1: Affect in Tweets. In particular, we have participated in EI-oc, EI-reg and E-c subtasks for English and Spanish languages. S18-1017 @@ -247,7 +247,7 @@ <fixed-case>INGEOTEC</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2018 Task 1: <fixed-case>E</fixed-case>vo<fixed-case>MSA</fixed-case> and μ<fixed-case>TC</fixed-case> for Sentiment Analysis MarioGraff SabinoMiranda-Jiménez - Eric S.Tellez + Eric S.Tellez DanielaMoctezuma 146–150 This paper describes our participation in Affective Tweets task for emotional intensity and sentiment intensity subtasks for English, Spanish, and Arabic languages. We used two approaches, μTC and EvoMSA. The first one is a generic text categorization and regression system; and the second one, a two-stage architecture for Sentiment Analysis. Both approaches are multilingual and domain independent. @@ -258,7 +258,7 @@ Epita at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2018 Task 1: Sentiment Analysis Using Transfer Learning Approach GuillaumeDaval-Frerot - AbdesselamBouchekif + AbdesselamBouchekif AnatoleMoreau 151–155 In this paper we present our system for detecting valence task. The major issue was to apply a state-of-the-art system despite the small dataset provided: the system would quickly overfit. The main idea of our proposal is to use transfer learning, which allows to avoid learning from scratch. Indeed, we start to train a first model to predict if a tweet is positive, negative or neutral. For this we use an external dataset which is larger and similar to the target dataset. Then, the pre-trained model is re-used as the starting point to train a new model that classifies a tweet into one of the seven various levels of sentiment intensity. Our system, trained using transfer learning, achieves 0.776 and 0.763 respectively for Pearson correlation coefficient and weighted quadratic kappa metrics on the subtask evaluation dataset. @@ -289,9 +289,9 @@ Tw-<fixed-case>S</fixed-case>t<fixed-case>AR</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2018 Task 1: Preprocessing Impact on Multi-label Emotion Classification HalaMulki - ChediBechikh Ali + ChediBechikh Ali HatemHaddad - IsmailBabaoğlu + IsmailBabaoğlu 167–171 In this paper, we describe our contribution in SemEval-2018 contest. We tackled task 1 “Affect in Tweets”, subtask E-c “Detecting Emotions (multi-label classification)”. A multilabel classification system Tw-StAR was developed to recognize the emotions embedded in Arabic, English and Spanish tweets. To handle the multi-label classification problem via traditional classifiers, we employed the binary relevance transformation strategy while a TF-IDF scheme was used to generate the tweets’ features. We investigated using single and combinations of several preprocessing tasks to further improve the performance. The results showed that specific combinations of preprocessing tasks could significantly improve the evaluation measures. This has been later emphasized by the official results as our system ranked 3rd for both Arabic and Spanish datasets and 14th for the English dataset. S18-1024 @@ -310,11 +310,11 @@ <fixed-case>E</fixed-case>mo<fixed-case>I</fixed-case>ntens Tracker at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2018 Task 1: Emotional Intensity Levels in #Tweets - Ramona-AndreeaTurcu + Ramona-AndreeaTurcu Sandra MariaAmarandei - Iuliana-AlexandraFlescan-Lovin-Arseni - DanielaGifu - DianaTrandabat + Iuliana-AlexandraFlescan-Lovin-Arseni + DanielaGifu + DianaTrandabat 177–180 The „Affect in Tweets” task is centered on emotions categorization and evaluation matrix using multi-language tweets (English and Spanish). In this research, SemEval Affect dataset was preprocessed, categorized, and evaluated accordingly (precision, recall, and accuracy). The system described in this paper is based on the implementation of supervised machine learning (Naive Bayes, KNN and SVM), deep learning (NN Tensor Flow model), and decision trees algorithms. S18-1026 @@ -325,7 +325,7 @@ u<fixed-case>O</fixed-case>ttawa at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2018 Task 1: Self-Attentive Hybrid <fixed-case>GRU</fixed-case>-Based Network AhmedHusseini Orabi MahmoudHusseini Orabi - DianaInkpen + DianaInkpen DavidVan Bruwaene 181–185 We propose a novel attentive hybrid GRU-based network (SAHGN), which we used at SemEval-2018 Task 1: Affect in Tweets. Our network has two main characteristics, 1) has the ability to internally optimize its feature representation using attention mechanisms, and 2) provides a hybrid representation using a character level Convolutional Neural Network (CNN), as well as a self-attentive word-level encoder. The key advantage of our model is its ability to signify the relevant and important information that enables self-optimization. Results are reported on the valence intensity regression task. @@ -412,7 +412,7 @@ <fixed-case>ECNU</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2018 Task 1: Emotion Intensity Prediction Using Effective Features and Machine Learning Models HuiminXu - ManLan + ManLan YuanbinWu 231–235 This paper describes our submissions to SemEval 2018 task 1. The task is affect intensity prediction in tweets, including five subtasks. We participated in all subtasks of English tweets. We extracted several traditional NLP, sentiment lexicon, emotion lexicon and domain specific features from tweets, adopted supervised machine learning algorithms to perform emotion intensity prediction. @@ -428,7 +428,7 @@ AlaaMaarouf RaslanKain HazemHajj - WassimEl-Hajj + WassimEl-Hajj 236–244 While significant progress has been achieved for Opinion Mining in Arabic (OMA), very limited efforts have been put towards the task of Emotion mining in Arabic. In fact, businesses are interested in learning a fine-grained representation of how users are feeling towards their products or services. In this work, we describe the methods used by the team Emotion Mining in Arabic (EMA), as part of the SemEval-2018 Task 1 for Affect Mining for Arabic tweets. EMA participated in all 5 subtasks. For the five tasks, several preprocessing steps were evaluated and eventually the best system included diacritics removal, elongation adjustment, replacement of emojis by the corresponding Arabic word, character normalization and light stemming. Moreover, several features were evaluated along with different classification and regression techniques. For the 5 subtasks, word embeddings feature turned out to perform best along with Ensemble technique. EMA achieved the 1st place in subtask 5, and 3rd place in subtasks 1 and 3. S18-1036 @@ -443,7 +443,7 @@ AthanasiaKolovou GeorgiosParaskevopoulos NikolaosEllinas - ShrikanthNarayanan + ShrikanthNarayanan AlexandrosPotamianos 245–255 In this paper we present deep-learning models that submitted to the SemEval-2018 Task 1 competition: “Affect in Tweets”. We participated in all subtasks for English tweets. We propose a Bi-LSTM architecture equipped with a multi-layer self attention mechanism. The attention mechanism improves the model performance and allows us to identify salient words in tweets, as well as gain insight into the models making them more interpretable. Our model utilizes a set of word2vec word embeddings trained on a large collection of 550 million Twitter messages, augmented by a set of word affective features. Due to the limited amount of task-specific training data, we opted for a transfer learning approach by pretraining the Bi-LSTMs on the dataset of Semeval 2017, Task 4A. The proposed approach ranked 1st in Subtask E “Multi-Label Emotion Classification”, 2nd in Subtask A “Emotion Intensity Regression” and achieved competitive results in other subtasks. @@ -547,7 +547,7 @@ Zewen at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2018 Task 1: An Ensemble Model for Affect Prediction in Tweets ZewenChi - HeyanHuang + HeyanHuang JianguiChen HaoWu RanWei @@ -561,8 +561,8 @@ Amrita_student at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2018 Task 1: Distributed Representation of Social Media Text for Affects in Tweets Nidhin AUnnithan ShaliniK. - BarathiGanesh H. B. - AnandKumar M + BarathiGanesh H. B. + AnandKumar M SomanK. P. 319–323 In this paper we did an analysis of “Affects in Tweets” which was one of the task conducted by semeval 2018. Task was to build a model which is able to do regression and classification of different emotions from the given tweets data set. We developed a base model for all the subtasks using distributed representation (Doc2Vec) and applied machine learning techniques for classification and regression. Distributed representation is an unsupervised algorithm which is capable of learning fixed length feature representation from variable length texts. Machine learning techniques used for regression is ’Linear Regression’ while ’Random Forest Tree’ is used for classification purpose. Empirical results obtained for all the subtasks by our model are shown in this paper. @@ -575,7 +575,7 @@ Angel DeborahS RajalakshmiS S MiltonRajendram - MirnalineeT T + MirnalineeT T 324–328 The system developed by the SSN MLRG1 team for Semeval-2018 task 1 on affect in tweets uses rule based feature selection and one-hot encoding to generate the input feature vector. Multilayer Perceptron was used to build the model for emotion intensity ordinal classification, sentiment analysis ordinal classification and emotion classfication subtasks. Support Vector Machine was used to build the model for emotion intensity regression and sentiment intensity regression subtasks. S18-1048 @@ -585,8 +585,8 @@ <fixed-case>CENNLP</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2018 Task 1: Constrained Vector Space Model in Affects in Tweets NaveenJ R - BarathiGanesh H. B. - AnandKumar M + BarathiGanesh H. B. + AnandKumar M SomanK P 329–333 This paper discusses on task 1, “Affect in Tweets” sharedtask, conducted in SemEval-2018. This task comprises of various subtasks, which required participants to analyse over different emotions and sentiments based on the provided tweet data and also measure the intensity of these emotions for subsequent subtasks. Our approach in these task was to come up with a model on count based representation and use machine learning techniques for regression and classification related tasks. In this work, we use a simple bag of words technique for supervised text classification model as to compare, that even with some advance distributed representation models we can still achieve significant accuracy. Further, fine tuning on various parameters for the bag of word, representation model we acquired better scores over various other baseline models (Vinayan et al.) participated in the sharedtask. @@ -597,8 +597,8 @@ <fixed-case>T</fixed-case>eam<fixed-case>CEN</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2018 Task 1: Global Vectors Representation in Emotion Detection AnonGeorge - BarathiGanesh H. B. - AnandKumar M + BarathiGanesh H. B. + AnandKumar M SomanK P 334–338 Emotions are a way of expressing human sentiments. In the modern era, social media is a platform where we convey our emotions. These emotions can be joy, anger, sadness and fear. Understanding the emotions from the written sentences is an interesting part in knowing about the writer. In the amount of digital language shared through social media, a considerable amount of data reflects the sentiment or emotion towards some product, person and organization. Since these texts are from users with diverse social aspects, these texts can be used to enrich the application related to the business intelligence. More than the sentiment, identification of intensity of the sentiment will enrich the performance of the end application. In this paper we experimented the intensity prediction as a text classification problem that evaluates the distributed representation text using aggregated sum and dimensionality reduction of the glove vectors of the words present in the respective texts . @@ -620,7 +620,7 @@ Mutux at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2018 Task 1: Exploring Impacts of Context Information On Emotion Detection PanDu - Jian-YunNie + Jian-YunNie 345–349 This paper describes MuTuX, our system that is designed for task 1-5a, emotion classification analysis of tweets on SemEval2018. The system aims at exploring the potential of context information of terms for emotion analysis. A Recurrent Neural Network is adopted to capture the context information of terms in tweets. Only term features and the sequential relations are used in our system. The results submitted ranks 16th out of 35 systems on the task of emotion detection in English-language tweets. S18-1052 @@ -669,7 +669,7 @@ <fixed-case>UIUC</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2018 Task 1: Recognizing Affect with Ensemble Models Abhishek AvinashNarwekar - RoxanaGirju + RoxanaGirju 377–384 Our submission to the SemEval-2018 Task1: Affect in Tweets shared task competition is a supervised learning model relying on standard lexicon features coupled with word embedding features. We used an ensemble of diverse models, including random forests, gradient boosted trees, and linear models, corrected for training-development set mismatch. We submitted the system’s output for subtasks 1 (emotion intensity prediction), 2 (emotion ordinal classification), 3 (valence intensity regression) and 4 (valence ordinal classification), for English tweets. We placed 25th, 19th, 24th and 15th in the four subtasks respectively. The baseline considered was an SVM (Support Vector Machines) model with linear kernel on the lexicon and embedding based features. Our system’s final performance measured in Pearson correlation scores outperformed the baseline by a margin of 2.2% to 14.6% across all tasks. S18-1057 @@ -720,9 +720,9 @@ The Dabblers at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2018 Task 2: Multilingual Emoji Prediction LarisaAlexa - AlinaLorenț - DanielaGîfu - DianaTrandabăț + AlinaLorenț + DanielaGîfu + DianaTrandabăț 405–409 The “Multilingual Emoji Prediction” task focuses on the ability of predicting the correspondent emoji for a certain tweet. In this paper, we investigate the relation between words and emojis. In order to do that, we used supervised machine learning (Naive Bayes) and deep learning (Recursive Neural Network). S18-1062 @@ -788,7 +788,7 @@ Peperomia at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2018 Task 2: Vector Similarity Based Approach for Emoji Prediction JingChen - DechuanYang + DechuanYang XilianLi WeiChen TengjiaoWang @@ -801,8 +801,8 @@ <fixed-case>ECNU</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2018 Task 2: Leverage Traditional <fixed-case>NLP</fixed-case> Features and Neural Networks Methods to Address <fixed-case>T</fixed-case>witter Emoji Prediction Task XingwuLu - XinMao - ManLan + XinMao + ManLan YuanbinWu 433–437 This paper describes our submissions to Task 2 in SemEval 2018, i.e., Multilingual Emoji Prediction. We first investigate several traditional Natural Language Processing (NLP) features, and then design several deep learning models. For subtask 1: Emoji Prediction in English, we combine two different methods to represent tweet, i.e., supervised model using traditional features and deep learning model. For subtask 2: Emoji Prediction in Spanish, we only use deep learning model. @@ -896,7 +896,7 @@ <fixed-case>S</fixed-case>ynt<fixed-case>NN</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2018 Task 2: is Syntax Useful for Emoji Prediction? Embedding Syntactic Trees in Multi Layer Perceptrons - Fabio MassimoZanzotto + Fabio MassimoZanzotto AndreaSantilli 477–481 In this paper, we present SyntNN as a way to include traditional syntactic models in multilayer neural networks used in the task of Semeval Task 2 of emoji prediction. The model builds on the distributed tree embedder also known as distributed tree kernel. Initial results are extremely encouraging but additional analysis is needed to overcome the problem of overfitting. @@ -918,8 +918,8 @@ <fixed-case>CENNLP</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2018 Task 2: Enhanced Distributed Representation of Text using Target Classes for Emoji Prediction Representation NaveenJ R HariharanV - BarathiGanesh H. B. - AnandKumar M + BarathiGanesh H. B. + AnandKumar M SomanK P 486–490 Emoji is one of the “fastest growing language ” in pop-culture, especially in social media and it is very unlikely for its usage to decrease. These are generally used to bring an extra level of meaning to the texts, posted on social media platforms. Providing such an added info, gives more insights to the plain text, arising to hidden interpretation within the text. This paper explains our analysis on Task 2, ” Multilingual Emoji Prediction” sharedtask conducted by Semeval-2018. In the task, a predicted emoji based on a piece of Twitter text are labelled under 20 different classes (most commonly used emojis) where these classes are learnt and further predicted are made for unseen Twitter text. In this work, we have experimented and analysed emojis predicted based on Twitter text, as a classification problem where the entailing emoji is considered as a label for every individual text data. We have implemented this using distributed representation of text through fastText. Also, we have made an effort to demonstrate how fastText framework can be useful in case of emoji prediction. This task is divide into two subtask, they are based on dataset presented in two different languages English and Spanish. @@ -956,7 +956,7 @@ <fixed-case>LIS</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2018 Task 2: Mixing Word Embeddings and Bag of Features for Multilingual Emoji Prediction GaëlGuibon MagalieOchs - PatriceBellot + PatriceBellot 502–506 In this paper we present the system submitted to the SemEval2018 task2 : Multilingual Emoji Prediction. Our system approaches both languages as being equal by first; considering word embeddings associated to automatically computed features of different types, then by applying bagging algorithm RandomForest to predict the emoji of a tweet. S18-1081 @@ -1021,7 +1021,7 @@ <fixed-case>IIIDYT</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2018 Task 3: Irony detection in <fixed-case>E</fixed-case>nglish tweets EdisonMarrese-Taylor - SuzanaIlic + SuzanaIlic JorgeBalazs HelmutPrendinger YutakaMatsuo @@ -1059,8 +1059,8 @@ <fixed-case>WLV</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2018 Task 3: Dissecting Tweets in Search of Irony OmidRohanian ShivaTaslimipoor - RichardEvans - RuslanMitkov + RichardEvans + RuslanMitkov 553–559 This paper describes the systems submitted to SemEval 2018 Task 3 “Irony detection in English tweets” for both subtasks A and B. The first system leveraging a combination of sentiment, distributional semantic, and text surface features is ranked third among 44 teams according to the official leaderboard of the subtask A. The second system with slightly different representation of the features ranked ninth in subtask B. We present a method that entails decomposing tweets into separate parts. Searching for contrast within the constituents of a tweet is an integral part of our system. We embrace an extensive definition of contrast which leads to a vast coverage in detecting ironic content. S18-1090 @@ -1079,7 +1079,7 @@ <fixed-case>EL</fixed-case>i<fixed-case>RF</fixed-case>-<fixed-case>UPV</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2018 Tasks 1 and 3: Affect and Irony Detection in Tweets José-ÁngelGonzález - Lluís-F.Hurtado + Lluís-F.Hurtado FerranPla 565–569 This paper describes the participation of ELiRF-UPV team at tasks 1 and 3 of Semeval-2018. We present a deep learning based system that assembles Convolutional Neural Networks and Long Short-Term Memory neural networks. This system has been used with slight modifications for the two tasks addressed both for English and Spanish. Finally, the results obtained in the competition are reported and discussed. @@ -1134,7 +1134,7 @@ <fixed-case>INAOE</fixed-case>-<fixed-case>UPV</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2018 Task 3: An Ensemble Approach for Irony Detection in <fixed-case>T</fixed-case>witter Delia IrazúHernández Farías FernandoSánchez-Vega - ManuelMontes-y-Gómez + ManuelMontes-y-Gómez PaoloRosso 594–599 This paper describes an ensemble approach to the SemEval-2018 Task 3. The proposed method is composed of two renowned methods in text classification together with a novel approach for capturing ironic content by exploiting a tailored lexicon for irony detection. We experimented with different ensemble settings. The obtained results show that our method has a good performance for detecting the presence of ironic content in Twitter. @@ -1146,7 +1146,7 @@ <fixed-case>ECNU</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2018 Task 3: Exploration on Irony Detection from Tweets via Machine Learning and Deep Learning Methods ZhenghangYin FeixiangWang - ManLan + ManLan WentingWang 600–606 The paper describes our submissions to task 3 in SemEval-2018. There are two subtasks: Subtask A is a binary classification task to determine whether a tweet is ironic, and Subtask B is a fine-grained classification task including four classes. To address them, we explored supervised machine learning method alone and in combination with neural networks. @@ -1204,7 +1204,7 @@ RajalakshmiS Angel DeborahS S MiltonRajendram - MirnalineeT T + MirnalineeT T 633–637 Sentiment analysis plays an important role in E-commerce. Identifying ironic and sarcastic content in text plays a vital role in inferring the actual intention of the user, and is necessary to increase the accuracy of sentiment analysis. This paper describes the work on identifying the irony level in twitter texts. The system developed by the SSN MLRG1 team in SemEval-2018 for task 3 (irony detection) uses rule based approach for feature selection and MultiLayer Perceptron (MLP) technique to build the model for multiclass irony classification subtask, which classifies the given text into one of the four class labels. S18-1103 @@ -1215,7 +1215,7 @@ <fixed-case>NLPRL</fixed-case>-<fixed-case>IITBHU</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2018 Task 3: Combining Linguistic Features and Emoji pre-trained <fixed-case>CNN</fixed-case> for Irony Detection in Tweets HarshRangwani DevangKulshreshtha - AnilKumar Singh + AnilKumar Singh 638–642 This paper describes our participation in SemEval 2018 Task 3 on Irony Detection in Tweets. We combine linguistic features with pre-trained activations of a neural network. The CNN is trained on the emoji prediction task. We combine the two feature sets and feed them into an XGBoost Classifier for classification. Subtask-A involves classification of tweets into ironic and non-ironic instances whereas Subtask-B involves classification of the tweet into - non-ironic, verbal irony, situational irony or other verbal irony. It is observed that combining features from these two different feature spaces improves our system results. We leverage the SMOTE algorithm to handle the problem of class imbalance in Subtask-B. Our final model achieves an F1-score of 0.65 and 0.47 on Subtask-A and Subtask-B respectively. Our system ranks 4th on both tasks respectively, outperforming the baseline by 6% on Subtask-A and 14% on Subtask-B. S18-1104 @@ -1289,7 +1289,7 @@ KataGábor DavideBuscaldi Anne-KathrinSchumann - BehrangQasemiZadeh + BehrangQasemiZadeh HaïfaZargayouna ThierryCharnois 679–688 @@ -1336,9 +1336,9 @@ <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2018 Task 9: Hypernym Discovery - JoseCamacho-Collados + JoseCamacho-Collados ClaudioDelli Bovi - LuisEspinosa-Anke + LuisEspinosa-Anke SergioOramas TommasoPasini EnricoSantus @@ -1354,7 +1354,7 @@ <fixed-case>CRIM</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2018 Task 9: A Hybrid Approach to Hypernym Discovery GabrielBernier-Colborne - CarolineBarrière + CarolineBarrière 725–731 This report describes the system developed by the CRIM team for the hypernym discovery task at SemEval 2018. This system exploits a combination of supervised projection learning and unsupervised pattern-based hypernym discovery. It was ranked first on the 3 sub-tasks for which we submitted results. S18-1116 @@ -1434,7 +1434,7 @@ <fixed-case>L</fixed-case>ight<fixed-case>R</fixed-case>el at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2018 Task 7: Lightweight and Fast Relation Classification TylerRenslow - GünterNeumann + GünterNeumann 778–782 We present LightRel, a lightweight and fast relation classifier. Our goal is to develop a high baseline for different relation extraction tasks. By defining only very few data-internal, word-level features and external knowledge sources in the form of word clusters and word embeddings, we train a fast and simple linear classifier S18-1123 @@ -1453,7 +1453,7 @@ The <fixed-case>UWNLP</fixed-case> system at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2018 Task 7: Neural Relation Extraction Model with Selectively Incorporated Concept Embeddings YiLuan - MariOstendorf + MariOstendorf HannanehHajishirzi 788–792 This paper describes our submission for SemEval 2018 Task 7 shared task on semantic relation extraction and classification in scientific papers. Our model is based on the end-to-end relation extraction model of (Miwa and Bansal, 2016) with several enhancements such as character-level encoding attention mechanism on selecting pretrained concept candidate embeddings. Our official submission ranked the second in relation classification task (Subtask 1.1 and Subtask 2 Senerio 2), and the first in the relation extraction task (Subtask 2 Scenario 1). @@ -1464,8 +1464,8 @@ <fixed-case>UC</fixed-case>3<fixed-case>M</fixed-case>-<fixed-case>NII</fixed-case> Team at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2018 Task 7: Semantic Relation Classification in Scientific Papers via Convolutional Neural Network VíctorSuárez-Paniagua - IsabelSegura-Bedmar - AkikoAizawa + IsabelSegura-Bedmar + AkikoAizawa 793–797 This paper reports our participation for SemEval-2018 Task 7 on extraction and classification of relationships between entities in scientific papers. Our approach is based on the use of a Convolutional Neural Network (CNN) trained on350 abstract with manually annotated entities and relations. Our hypothesis is that this deep learning model can be applied to extract and classify relations between entities for scientific papers at the same time. We use the Part-of-Speech and the distances to the target entities as part of the embedding for each word and we blind all the entities by marker names. In addition, we use sampling techniques to overcome the imbalance issues of this dataset. Our architecture obtained an F1-score of 35.4% for the relation extraction task and 18.5% for the relation classification task with a basic configuration of the one step CNN. S18-1126 @@ -1488,7 +1488,7 @@ <fixed-case>SIRIUS</fixed-case>-<fixed-case>LTG</fixed-case>-<fixed-case>U</fixed-case>i<fixed-case>O</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2018 Task 7: Convolutional Neural Networks with Shortest Dependency Paths for Semantic Relation Extraction and Classification in Scientific Papers FarhadNooralahzadeh - LiljaØvrelid + LiljaØvrelid Jan ToreLønning 805–810 This article presents the SIRIUS-LTG-UiO system for the SemEval 2018 Task 7 on Semantic Relation Extraction and Classification in Scientific Papers. First we extract the shortest dependency path (sdp) between two entities, then we introduce a convolutional neural network (CNN) which takes the shortest dependency path embeddings as input and performs relation classification with differing objectives for each subtask of the shared task. This approach achieved overall F1 scores of 76.7 and 83.2 for relation classification on clean and noisy data, respectively. Furthermore, for combined relation extraction and classification on clean data, it obtained F1 scores of 37.4 and 33.6 for each phase. Our system ranks 3rd in all three sub-tasks of the shared task. @@ -1541,7 +1541,7 @@ YuanXu JingyiZhang AnneLauscher - Simone PaoloPonzetto + Simone PaoloPonzetto 826–830 Large repositories of scientific literature call for the development of robust methods to extract information from scholarly papers. This problem is addressed by the SemEval 2018 Task 7 on extracting and classifying relations found within scientific publications. In this paper, we present a feature-based and a deep learning-based approach to the task and discuss the results of the system runs that we submitted for evaluation. S18-1132 @@ -1598,7 +1598,7 @@ <fixed-case>S</fixed-case>ci<fixed-case>REL</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2018 Task 7: A System for Semantic Relation Extraction and Classification DarshiniMahendran ChathurikaBrahmana - BridgetMcInnes + BridgetMcInnes 853–857 This paper describes our system, SciREL (Scientific abstract RELation extraction system), developed for the SemEval 2018 Task 7: Semantic Relation Extraction and Classification in Scientific Papers. We present a feature-vector based system to extract explicit semantic relation and classify them. Our system is trained in the ACL corpus (BIrd et al., 2008) that contains annotated abstracts given by the task organizers. When an abstract with annotated entities is given as the input into our system, it extracts the semantic relations through a set of defined features and classifies them into one of the given six categories of relations through feature engineering and a learned model. For the best combination of features, our system SciREL obtained an F-measure of 20.03 on the official test corpus which includes 150 abstracts in the relation classification Subtask 1.1. In this paper, we provide an in-depth error analysis of our results to prevent duplication of research efforts in the development of future systems S18-1137 @@ -1608,8 +1608,8 @@ <fixed-case>NTNU</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2018 Task 7: Classifier Ensembling for Semantic Relation Identification and Classification in Scientific Papers BiswanathBarik - Utpal KumarSikdar - BjörnGambäck + Utpal KumarSikdar + BjörnGambäck 858–862 The paper presents NTNU’s contribution to SemEval-2018 Task 7 on relation identification and classification. The class weights and parameters of five alternative supervised classifiers were optimized through grid search and cross-validation. The outputs of the classifiers were combined through voting for the final prediction. A wide variety of features were explored, with the most informative identified by feature selection. The best setting achieved F1 scores of 47.4% and 66.0% in the relation classification subtasks 1.1 and 1.2. For relation identification and classification in subtask 2, it achieved F1 scores of 33.9% and 17.0%, S18-1138 @@ -1659,7 +1659,7 @@ ShimeiPan YoungjaPark AnupamJoshi - TimFinin + TimFinin 878–884 We describe the systems developed by the UMBC team for 2018 SemEval Task 8, SecureNLP (Semantic Extraction from CybersecUrity REports using Natural Language Processing). We participated in three of the sub-tasks: (1) classifying sentences as being relevant or irrelevant to malware, (2) predicting token labels for sentences, and (4) predicting attribute labels from the Malware Attribute Enumeration and Characterization vocabulary for defining malware characteristics. We achieve F1 score of 50.34/18.0 (dev/test), 22.23 (test-data), and 31.98 (test-data) for Task1, Task2 and Task2 respectively. We also make our cybersecurity embeddings publicly available at http://bit.ly/cyber2vec. S18-1142 @@ -1680,9 +1680,9 @@ <fixed-case>F</fixed-case>lytxt_<fixed-case>NTNU</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2018 Task 8: Identifying and Classifying Malware Text Using Conditional Random Fields and Naïve <fixed-case>B</fixed-case>ayes Classifiers - Utpal KumarSikdar + Utpal KumarSikdar BiswanathBarik - BjörnGambäck + BjörnGambäck 890–893 Cybersecurity risks such as malware threaten the personal safety of users, but to identify malware text is a major challenge. The paper proposes a supervised learning approach to identifying malware sentences given a document (subTask1 of SemEval 2018, Task 8), as well as to classifying malware tokens in the sentences (subTask2). The approach achieved good results, ranking second of twelve participants for both subtasks, with F-scores of 57% for subTask1 and 28% for subTask2. S18-1144 @@ -1700,10 +1700,10 @@ <fixed-case>A</fixed-case>pollo at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2018 Task 9: Detecting Hypernymy Relations Using Syntactic Dependencies - MihaelaOnofrei + MihaelaOnofrei IonuțHulub - DianaTrandabăț - DanielaGîfu + DianaTrandabăț + DanielaGîfu 898–902 This paper presents the participation of Apollo’s team in the SemEval-2018 Task 9 “Hypernym Discovery”, Subtask 1: “General-Purpose Hypernym Discovery”, which tries to produce a ranked list of hypernyms for a specific term. We propose a novel approach for automatic extraction of hypernymy relations from a corpus by using dependency patterns. We estimated that the application of these patterns leads to a higher score than using the traditional lexical patterns. S18-1146 @@ -1760,7 +1760,7 @@ <fixed-case>ADAPT</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2018 Task 9: Skip-Gram Word Embeddings for Unsupervised Hypernym Discovery in Specialised Corpora - AlfredoMaldonado + AlfredoMaldonado FilipKlubička 924–927 This paper describes a simple but competitive unsupervised system for hypernym discovery. The system uses skip-gram word embeddings with negative sampling, trained on specialised corpora. Candidate hypernyms for an input word are predicted based based on cosine similarity scores. Two sets of word embedding models were trained separately on two specialised corpora: a medical corpus and a music industry corpus. Our system scored highest in the medical domain among the competing unsupervised systems but performed poorly on the music industry domain. Our system does not depend on any external data other than raw specialised corpora. @@ -1839,8 +1839,8 @@ <fixed-case>ALB</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2018 Task 10: A System for Capturing Discriminative Attributes BogdanDumitru - Alina MariaCiobanu - Liviu P.Dinu + Alina MariaCiobanu + Liviu P.Dinu 963–967 Semantic difference detection attempts to capture whether a word is a discriminative attribute between two other words. For example, the discriminative feature red characterizes the first word from the (apple, banana) pair, but not the second. Modeling semantic difference is essential for language understanding systems, as it provides useful information for identifying particular aspects of word senses. This paper describes our system implementation (the ALB system of the NLP@Unibuc team) for the 10th task of the SemEval 2018 workshop, “Capturing Discriminative Attributes”. We propose a method for semantic difference detection that uses an SVM classifier with features based on co-occurrence counts and shallow semantic parsing, achieving 0.63 F1 score in the competition. S18-1158 @@ -1850,7 +1850,7 @@ <fixed-case>EL</fixed-case>i<fixed-case>RF</fixed-case>-<fixed-case>UPV</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2018 Task 10: Capturing Discriminative Attributes with Knowledge Graphs and <fixed-case>W</fixed-case>ikipedia José-ÁngelGonzález - Lluís-F.Hurtado + Lluís-F.Hurtado EncarnaSegarra FerranPla 968–971 @@ -1864,8 +1864,8 @@ ShivaTaslimipoor OmidRohanian Le AnHa - GloriaCorpas Pastor - RuslanMitkov + GloriaCorpas Pastor + RuslanMitkov 972–976 This paper describes the system submitted to SemEval 2018 shared task 10 ‘Capturing Dicriminative Attributes’. We use a combination of knowledge-based and co-occurrence features to capture the semantic difference between two words in relation to an attribute. We define scores based on association measures, ngram counts, word similarity, and ConceptNet relations. The system is ranked 4th (joint) on the official leaderboard of the task. S18-1160 @@ -1875,7 +1875,7 @@ <fixed-case>UNAM</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2018 Task 10: Unsupervised Semantic Discriminative Attribute Identification in Neural Word Embedding Cones IgnacioArroyo-Fernández - IvanMeza + IvanMeza Carlos-FranciscoMéndez-Cruz 977–984 In this paper we report an unsupervised method aimed to identify whether an attribute is discriminative for two words (which are treated as concepts, in our particular case). To this end, we use geometrically inspired vector operations underlying unsupervised decision functions. These decision functions operate on state-of-the-art neural word embeddings of the attribute and the concepts. The main idea can be described as follows: if attribute q discriminates concept a from concept b, then q is excluded from the feature set shared by these two concepts: the intersection. That is, the membership q\in (a\cap b) does not hold. As a,b,q are represented with neural word embeddings, we tested vector operations allowing us to measure membership, i.e. fuzzy set operations (t-norm, for fuzzy intersection, and t-conorm, for fuzzy union) and the similarity between q and the convex cone described by a and b. @@ -1898,7 +1898,7 @@ <fixed-case>B</fixed-case>om<fixed-case>J</fixed-case>i at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2018 Task 10: Combining Vector-, Pattern- and Graph-based Information to Identify Discriminative Attributes EnricoSantus - ChrisBiemann + ChrisBiemann EmmanueleChersoni 990–994 This paper describes BomJi, a supervised system for capturing discriminative attributes in word pairs (e.g. yellow as discriminative for banana over watermelon). The system relies on an XGB classifier trained on carefully engineered graph-, pattern- and word embedding-based features. It participated in the SemEval-2018 Task 10 on Capturing Discriminative Attributes, achieving an F1 score of 0.73 and ranking 2nd out of 26 participant systems. @@ -1918,7 +1918,7 @@ <fixed-case>ECNU</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2018 Task 10: Evaluating Simple but Effective Features on Machine Learning Methods for Semantic Difference Detection YunxiaoZhou - ManLan + ManLan YuanbinWu 999–1002 This paper describes the system we submitted to Task 10 (Capturing Discriminative Attributes) in SemEval 2018. Given a triple (word1, word2, attribute), this task is to predict whether it exemplifies a semantic difference or not. We design and investigate several word embedding features, PMI features and WordNet features together with supervised machine learning methods to address this task. Officially released results show that our system ranks above average. @@ -1929,7 +1929,7 @@ <fixed-case>A</fixed-case>mrita<fixed-case>NLP</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2018 Task 10: Capturing discriminative attributes using convolution neural network over global vector representation. VivekVinayan - AnandKumar M + AnandKumar M SomanK P 1003–1007 The “Capturing Discriminative Attributes” sharedtask is the tenth task, conjoint with SemEval2018. The task is to predict if a word can capture distinguishing attributes of one word from another. We use GloVe word embedding, pre-trained on openly sourced corpus for this task. A base representation is initially established over varied dimensions. These representations are evaluated based on validation scores over two models, first on an SVM based classifier and second on a one dimension CNN model. The scores are used to further develop the representation with vector combinations, by considering various distance measures. These measures correspond to offset vectors which are concatenated as features, mainly to improve upon the F1score, with the best accuracy. The features are then further tuned on the validation scores, to achieve highest F1score. Our evaluation narrowed down to two representations, classified on CNN models, having a total dimension length of 1204 & 1203 for the final submissions. Of the two, the latter feature representation delivered our best F1score of 0.658024 (as per result). @@ -1996,7 +1996,7 @@ <fixed-case>EL</fixed-case>i<fixed-case>RF</fixed-case>-<fixed-case>UPV</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2018 Task 11: Machine Comprehension using Commonsense Knowledge José-ÁngelGonzález - Lluís-F.Hurtado + Lluís-F.Hurtado EncarnaSegarra FerranPla 1034–1037 @@ -2030,7 +2030,7 @@ <fixed-case>ECNU</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2018 Task 11: Using Deep Learning Method to Address Machine Comprehension Task YixuanSheng - ManLan + ManLan YuanbinWu 1048–1052 This paper describes the system we submitted to the Task 11 in SemEval 2018, i.e., Machine Comprehension using Commonsense Knowledge. Given a passage and some questions that each have two candidate answers, this task requires the participate system to select out one answer meet the meaning of original text or commonsense knowledge from the candidate answers. For this task, we use a deep learning method to obtain final predict answer by calculating relevance of choices representations and question-aware document representation. @@ -2040,7 +2040,7 @@ <fixed-case>CSR</fixed-case>eader at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2018 Task 11: Multiple Choice Question Answering as Textual Entailment - ZhengpingJiang + ZhengpingJiang QiSun 1053–1057 In this document we present an end-to-end machine reading comprehension system that solves multiple choice questions with a textual entailment perspective. Since some of the knowledge required is not explicitly mentioned in the text, we try to exploit commonsense knowledge by using pretrained word embeddings during contextual embeddings and by dynamically generating a weighted representation of related script knowledge. In the model two kinds of prediction structure are ensembled, and the final accuracy of our system is 10 percent higher than the naiive baseline. @@ -2071,7 +2071,7 @@ <fixed-case>IUCM</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2018 Task 11: Similar-Topic Texts as a Comprehension Knowledge Source SofiaReznikova - LeonDerczynski + LeonDerczynski 1068–1072 This paper describes the IUCM entry at SemEval-2018 Task 11, on machine comprehension using commonsense knowledge. First, clustering and topic modeling are used to divide given texts into topics. Then, during the answering phase, other texts of the same topic are retrieved and used as commonsense knowledge. Finally, the answer is selected. While clustering itself shows good results, finding an answer proves to be more challenging. This paper reports the results of system evaluation and suggests potential improvements. S18-1179 @@ -2090,8 +2090,8 @@ <fixed-case>MITRE</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2018 Task 11: Commonsense Reasoning without Commonsense Knowledge - ElizabethMerkhofer - JohnHenderson + ElizabethMerkhofer + JohnHenderson DavidBloom LauraStrickhart GuidoZarrella @@ -2115,9 +2115,9 @@ <fixed-case>ITNLP</fixed-case>-<fixed-case>ARC</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2018 Task 12: Argument Reasoning Comprehension with Attention WenjieLiu - ChengjieSun + ChengjieSun LeiLin - BingquanLiu + BingquanLiu 1089–1093 Reasoning is a very important topic and has many important applications in the field of natural language processing. Semantic Evaluation (SemEval) 2018 Task 12 “The Argument Reasoning Comprehension” committed to research natural language reasoning. In this task, we proposed a novel argument reasoning comprehension system, ITNLP-ARC, which use Neural Networks technology to solve this problem. In our system, the LSTM model is involved to encode both the premise sentences and the warrant sentences. The attention model is used to merge the two premise sentence vectors. Through comparing the similarity between the attention vector and each of the two warrant vectors, we choose the one with higher similarity as our system’s final answer. S18-1183 @@ -2126,8 +2126,8 @@ <fixed-case>ECNU</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2018 Task 12: An End-to-End Attention-based Neural Network for the Argument Reasoning Comprehension Task - JunfengTian - ManLan + JunfengTian + ManLan YuanbinWu 1094–1098 This paper presents our submissions to SemEval 2018 Task 12: the Argument Reasoning Comprehension Task. We investigate an end-to-end attention-based neural network to represent the two lexically close candidate warrants. On the one hand, we extract their different parts as attention vectors to obtain distinguishable representations. On the other hand, we use their surrounds (i.e., claim, reason, debate context) as another attention vectors to get contextual representations, which work as final clues to select the correct warrant. Our model achieves 60.4% accuracy and ranks 3rd among 22 participating systems. @@ -2194,9 +2194,9 @@ <fixed-case>U</fixed-case>ni<fixed-case>M</fixed-case>elb at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2018 Task 12: Generative Implication using <fixed-case>LSTM</fixed-case>s, <fixed-case>S</fixed-case>iamese Networks and Semantic Representations with Synonym Fuzzing AnirudhJoshi - TimBaldwin + TimBaldwin Richard O.Sinnott - CecileParis + CecileParis 1124–1128 This paper describes a warrant classification system for SemEval 2018 Task 12, that attempts to learn semantic representations of reasons, claims and warrants. The system consists of 3 stacked LSTMs: one for the reason, one for the claim, and one shared Siamese Network for the 2 candidate warrants. Our main contribution is to force the embeddings into a shared feature space using vector operations, semantic similarity classification, Siamese networks, and multi-task learning. In doing so, we learn a form of generative implication, in encoding implication interrelationships between reasons, claims, and the associated correct and incorrect warrants. We augment the limited data in the task further by utilizing WordNet synonym “fuzzing”. When applied to SemEval 2018 Task 12, our system performs well on the development data, and officially ranked 8th among 21 teams. S18-1190 @@ -2206,7 +2206,7 @@ Joker at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2018 Task 12: The Argument Reasoning Comprehension with Neural Attention Sui Guobin - Chao Wenhan + WenhanChao Luo Zhunchen 1129–1132 This paper describes a classification system that participated in the SemEval-2018 Task 12: The Argument Reasoning Comprehension Task. Briefly the task can be described as that a natural language “argument” is what we have, with reason, claim, and correct and incorrect warrants, and we need to choose the correct warrant. In order to make fully understand of the semantic information of the sentences, we proposed a neural network architecture with attention mechanism to achieve this goal. Besides we try to introduce keywords into the model to improve accuracy. Finally the proposed system achieved 5th place among 22 participating systems @@ -2269,7 +2269,7 @@ Resolving Event Coreference with Supervised Representation Learning and Clustering-Oriented Regularization KianKenyon-Dean - Jackie Chi KitCheung + Jackie Chi KitCheung DoinaPrecup 1–10 We present an approach to event coreference resolution by developing a general framework for clustering that uses supervised representation learning. We propose a neural network architecture with novel Clustering-Oriented Regularization (CORE) terms in the objective function. These terms encourage the model to create embeddings of event mentions that are amenable to clustering. We then use agglomerative clustering on these embeddings to build event coreference chains. For both within- and cross-document coreference on the ECB+ corpus, our model obtains better results than models that require significantly more pre-annotated information. This work provides insight and motivating results for a new general approach to solving coreference and clustering problems with representation learning. @@ -2280,7 +2280,7 @@ Learning distributed event representations with a multi-task approach XudongHong - AsadSayeed + AsadSayeed VeraDemberg 11–21 Human world knowledge contains information about prototypical events and their participants and locations. In this paper, we train the first models using multi-task learning that can both predict missing event participants and also perform semantic role classification based on semantic plausibility. Our best-performing model is an improvement over the previous state-of-the-art on thematic fit modelling tasks. The event embeddings learned by the model can additionally be used effectively in an event similarity task, also outperforming the state-of-the-art. @@ -2290,7 +2290,7 @@ Assessing Meaning Components in <fixed-case>G</fixed-case>erman Complex Verbs: A Collection of Source-Target Domains and Directionality - SabineSchulte im Walde + SabineSchulte im Walde MaximilianKöper SylviaSpringorum 22–32 @@ -2325,7 +2325,7 @@ Examining Gender and Race Bias in Two Hundred Sentiment Analysis Systems SvetlanaKiritchenko - SaifMohammad + SaifMohammad 43–53 Automatic machine learning systems can inadvertently accentuate and perpetuate inappropriate human biases. Past work on examining inappropriate biases has largely focused on just individual systems. Further, there is no benchmark dataset for examining inappropriate biases in systems. Here for the first time, we present the Equity Evaluation Corpus (EEC), which consists of 8,640 English sentences carefully chosen to tease out biases towards certain races and genders. We use the dataset to examine 219 automatic sentiment analysis systems that took part in a recent shared task, SemEval-2018 Task 1 ‘Affect in Tweets’. We find that several of the systems show statistically significant bias; that is, they consistently provide slightly higher sentiment intensity predictions for one race or one gender. We make the EEC freely available. S18-2005 @@ -2356,7 +2356,7 @@ Quantitative Semantic Variation in the Contexts of Concrete and Abstract Words DanielaNaumann DiegoFrassinelli - SabineSchulte im Walde + SabineSchulte im Walde 76–85 Across disciplines, researchers are eager to gain insight into empirical features of abstract vs. concrete concepts. In this work, we provide a detailed characterisation of the distributional nature of abstract and concrete words across 16,620 English nouns, verbs and adjectives. Specifically, we investigate the following questions: (1) What is the distribution of concreteness in the contexts of concrete and abstract target words? (2) What are the differences between concrete and abstract words in terms of contextual semantic diversity? (3) How does the entropy of concrete and abstract word contexts differ? Overall, our studies show consistent differences in the distributional representation of concrete and abstract words, thus challenging existing theories of cognition and providing a more fine-grained description of their nature. S18-2008 @@ -2368,7 +2368,7 @@ GilbertBadaro HusseinJundi HazemHajj - WassimEl-Hajj + WassimEl-Hajj 86–93 Nowadays, social media have become a platform where people can easily express their opinions and emotions about any topic such as politics, movies, music, electronic products and many others. On the other hand, politicians, companies, and businesses are interested in analyzing automatically people’s opinions and emotions. In the last decade, a lot of efforts has been put into extracting sentiment polarity from texts. Recently, the focus has expanded to also cover emotion recognition from texts. In this work, we expand an existing emotion lexicon, DepecheMood, by leveraging semantic knowledge from English WordNet (EWN). We create an expanded lexicon, EmoWordNet, consisting of 67K terms aligned with EWN, almost 1.8 times the size of DepecheMood. We also evaluate EmoWordNet in an emotion recognition task using SemEval 2007 news headlines dataset and we achieve an improvement compared to the use of DepecheMood. EmoWordNet is publicly available to speed up research in the field on http://oma-project.com. S18-2009 @@ -2389,7 +2389,7 @@ How Gender and Skin Tone Modifiers Affect Emoji Semantics in <fixed-case>T</fixed-case>witter FrancescoBarbieri - JoseCamacho-Collados + JoseCamacho-Collados 101–106 In this paper we analyze the use of emojis in social media with respect to gender and skin tone. By gathering a dataset of over twenty two million tweets from United States some findings are clearly highlighted after performing a simple frequency-based analysis. Moreover, we carry out a semantic analysis on the usage of emojis and their modifiers (e.g. gender and skin tone) by embedding all words, emojis and modifiers into the same vector space. Our analyses reveal that some stereotypes related to the skin color and gender seem to be reflected on the use of these modifiers. For example, emojis representing hand gestures are more widely utilized with lighter skin tones, and the usage across skin tones differs significantly. At the same time, the vector corresponding to the male modifier tends to be semantically close to emojis related to business or technology, whereas their female counterparts appear closer to emojis about love or makeup. S18-2011 @@ -2420,7 +2420,7 @@ Learning Patient Representations from Text DmitriyDligach - TimothyMiller + TimothyMiller 119–123 Mining electronic health records for patients who satisfy a set of predefined criteria is known in medical informatics as phenotyping. Phenotyping has numerous applications such as outcome prediction, clinical trial recruitment, and retrospective studies. Supervised machine learning for phenotyping typically relies on sparse patient representations such as bag-of-words. We consider an alternative that involves learning patient representations. We develop a neural network model for learning patient representations and show that the learned representations are general enough to obtain state-of-the-art performance on a standard comorbidity detection task. S18-2014 @@ -2440,8 +2440,8 @@ Coarse Lexical Frame Acquisition at the Syntax–Semantics Interface Using a Latent-Variable <fixed-case>PCFG</fixed-case> Model LauraKallmeyer - BehrangQasemiZadeh - Jackie Chi KitCheung + BehrangQasemiZadeh + Jackie Chi KitCheung 130–141 We present a method for unsupervised lexical frame acquisition at the syntax–semantics interface. Given a set of input strings derived from dependency parses, our method generates a set of clusters that resemble lexical frame structures. Our work is motivated not only by its practical applications (e.g., to build, or expand the coverage of lexical frame databases), but also to gain linguistic insight into frame structures with respect to lexical distributions in relation to grammatical structures. We model our task using a hierarchical Bayesian network and employ tools and methods from latent variable probabilistic context free grammars (L-PCFGs) for statistical inference and parameter fitting, for which we propose a new split and merge procedure. We show that our model outperforms several baselines on a portion of the Wall Street Journal sentences that we have newly annotated for evaluation purposes. S18-2016 @@ -2489,7 +2489,7 @@ Integrating Multiplicative Features into Supervised Distributional Methods for Lexical Entailment - TuVu + TuVu VeredShwartz 160–166 Supervised distributional methods are applied successfully in lexical entailment, but recent work questioned whether these methods actually learn a relation between two words. Specifically, Levy et al. (2015) claimed that linear classifiers learn only separate properties of each word. We suggest a cheap and easy way to boost the performance of these methods by integrating multiplicative features into commonly used representations. We provide an extensive evaluation with different classifiers and evaluation setups, and suggest a suitable evaluation setup for the task, eliminating biases existing in previous ones. @@ -2558,7 +2558,7 @@ Agree or Disagree: Predicting Judgments on Nuanced Assertions MichaelWojatzki TorstenZesch - SaifMohammad + SaifMohammad SvetlanaKiritchenko 214–224 Being able to predict whether people agree or disagree with an assertion (i.e. an explicit, self-contained statement) has several applications ranging from predicting how many people will like or dislike a social media post to classifying posts based on whether they are in accordance with a particular point of view. We formalize this as two NLP tasks: predicting judgments of (i) individuals and (ii) groups based on the text of the assertion and previous judgments. We evaluate a wide range of approaches on a crowdsourced data set containing over 100,000 judgments on over 2,000 assertions. We find that predicting individual judgments is a hard task with our best results only slightly exceeding a majority baseline, but that judgments of groups can be more reliably predicted using a Siamese neural network, which outperforms all other approaches by a wide margin. @@ -2580,7 +2580,7 @@ Putting Semantics into Semantic Roles - JamesAllen + JamesAllen Choh ManTeng 235–244 While there have been many proposals for theories of semantic roles over the years, these models are mostly justified by intuition and the only evaluation methods have been inter-annotator agreement. We explore three different ideas for providing more rigorous theories of semantic roles. These ideas give rise to more objective criteria for designing role sets, and lend themselves to some experimental evaluation. We illustrate the discussion by examining the semantic roles in TRIPS. @@ -2591,7 +2591,7 @@ Measuring Frame Instance Relatedness ValerioBasile - RoqueLopez Condori + RoqueLopez Condori ElenaCabrio 245–254 Frame semantics is a well-established framework to represent the meaning of natural language in computational terms. In this work, we aim to propose a quantitative measure of relatedness between pairs of frame instances. We test our method on a dataset of sentence pairs, highlighting the correlation between our metric and human judgments of semantic similarity. Furthermore, we propose an application of our measure for clustering frame instances to extract prototypical knowledge from natural language. @@ -2624,7 +2624,7 @@ Multiplicative Tree-Structured Long Short-Term Memory Networks for Semantic Representations - Nam KhanhTran + Nam KhanhTran WeiweiCheng 276–286 Tree-structured LSTMs have shown advantages in learning semantic representations by exploiting syntactic information. Most existing methods model tree structures by bottom-up combinations of constituent nodes using the same shared compositional function and often making use of input word information only. The inability to capture the richness of compositionality makes these models lack expressive power. In this paper, we propose multiplicative tree-structured LSTMs to tackle this problem. Our model makes use of not only word information but also relation information between words. It is more expressive, as different combination functions can be used for each child node. In addition to syntactic trees, we also investigate the use of Abstract Meaning Representation in tree-structured models, in order to incorporate both syntactic and semantic information from the sentence. Experimental results on common NLP tasks show the proposed models lead to better sentence representation and AMR brings benefits in complex tasks. diff --git a/data/xml/S19.xml b/data/xml/S19.xml index a1961b1730..baafe7e447 100644 --- a/data/xml/S19.xml +++ b/data/xml/S19.xml @@ -4,7 +4,7 @@ Proceedings of the Eighth Joint Conference on Lexical and Computational Semantics (*SEM 2019) S19-1 - RadaMihalcea + RadaMihalcea EkaterinaShutova Lun-WeiKu KilianEvang @@ -23,7 +23,7 @@ <fixed-case>SUR</fixed-case>el: A Gold Standard for Incorporating Meaning Shifts into Term Extraction AnnaHätty DominikSchlechtweg - SabineSchulte im Walde + SabineSchulte im Walde 1–8 We introduce SURel, a novel dataset with human-annotated meaning shifts between general-language and domain-specific contexts. We show that meaning shifts of term candidates cause errors in term extraction, and demonstrate that the SURel annotation reflects these errors. Furthermore, we illustrate that SURel enables us to assess optimisations of term extraction techniques when incorporating meaning shifts. S19-1001 @@ -32,7 +32,7 @@ Word Usage Similarity Estimation with Sentence Representations and Automatic Substitutes - AinaGarí Soler + AinaGarí Soler MariannaApidianaki AlexandreAllauzen 9–21 @@ -54,7 +54,7 @@ Composition of Sentence Embeddings: Lessons from Statistical Relational Learning DamienSileo - TimVan De Cruys + TimVan De Cruys CamillePradel PhilippeMuller 33–43 @@ -78,7 +78,7 @@ Scalable Cross-Lingual Transfer of Neural Sentence Embeddings HananAldarmaki - MonaDiab + MonaDiab 51–60 We develop and investigate several cross-lingual alignment approaches for neural sentence embedding models, such as the supervised inference classifier, InferSent, and sequential encoder-decoder models. We evaluate three alignment frameworks applied to these models: joint modeling, representation transfer learning, and sentence mapping, using parallel text to guide the alignment. Our results support representation transfer as a scalable approach for modular cross-lingual alignment of neural sentence embeddings, where we observe better performance compared to joint models in intrinsic and extrinsic evaluations, particularly with smaller sets of parallel data. S19-1006 @@ -88,7 +88,7 @@ Second-order contexts from lexical substitutes for few-shot learning of word representations QianchuLiu - DianaMcCarthy + DianaMcCarthy AnnaKorhonen 61–67 There is a growing awareness of the need to handle rare and unseen words in word representation modelling. In this paper, we focus on few-shot learning of emerging concepts that fully exploits only a few available contexts. We introduce a substitute-based context representation technique that can be applied on an existing word embedding space. Previous context-based approaches to modelling unseen words only consider bag-of-word first-order contexts, whereas our method aggregates contexts as second-order substitutes that are produced by a sequence-aware sentence completion model. We experimented with three tasks that aim to test the modelling of emerging concepts. We found that these tasks show different emphasis on first and second order contexts, and our substitute-based method achieves superior performance on naturally-occurring contexts from corpora. @@ -156,7 +156,7 @@ Deconstructing multimodality: visual properties and visual context in human semantic processing ChristopherDavis LuanaBulat - Anita LillaVero + Anita LillaVero EkaterinaShutova 118–124 Multimodal semantic models that extend linguistic representations with additional perceptual input have proved successful in a range of natural language processing (NLP) tasks. Recent research has successfully used neural methods to automatically create visual representations for words. However, these works have extracted visual features from complete images, and have not examined how different kinds of visual information impact performance. In contrast, we construct multimodal models that differentiate between internal visual properties of the objects and their external visual context. We evaluate the models on the task of decoding brain activity associated with the meanings of nouns, demonstrating their advantage over those based on complete images. @@ -169,7 +169,7 @@ AndreyKutuzov MohammadDorgham OleksiyOliynyk - ChrisBiemann + ChrisBiemann AlexanderPanchenko 125–135 We present path2vec, a new approach for learning graph embeddings that relies on structural measures of pairwise node similarities. The model learns representations for nodes in a dense space that approximate a given user-defined graph distance measure, such as e.g. the shortest path distance or distance measures that take information beyond the graph structure into account. Evaluation of the proposed model on semantic similarity and word sense disambiguation tasks, using various WordNet-based similarity measures, show that our approach yields competitive results, outperforming strong graph embedding baselines. The model is computationally efficient, being orders of magnitude faster than the direct computation of graph-based distances. @@ -180,7 +180,7 @@ Neural User Factor Adaptation for Text Classification: Learning to Generalize Across Author Demographics XiaoleiHuang - Michael J.Paul + Michael J.Paul 136–146 Language use varies across different demographic factors, such as gender, age, and geographic location. However, most existing document classification methods ignore demographic variability. In this study, we examine empirically how text data can vary across four demographic factors: gender, age, country, and region. We propose a multitask neural model to account for demographic variations via adversarial training. In experiments on four English-language social media datasets, we find that classification performance improves when adapting for user factors. S19-1015 @@ -189,7 +189,7 @@ Abstract Graphs and Abstract Paths for Knowledge Graph Completion - ViviNastase + ViviNastase BhushanKotnis 147–157 Knowledge graphs, which provide numerous facts in a machine-friendly format, are incomplete. Information that we induce from such graphs – e.g. entity embeddings, relation representations or patterns – will be affected by the imbalance in the information captured in the graph – by biasing representations, or causing us to miss potential patterns. To partially compensate for this situation we describe a method for representing knowledge graphs that capture an intensional representation of the original extensional information. This representation is very compact, and it abstracts away from individual links, allowing us to find better path candidates, as shown by the results of link prediction using this information. @@ -252,7 +252,7 @@ Improving Human Needs Categorization of Events with Semantic Classification HaiboDing - EllenRiloff + EllenRiloff ZheFeng 198–204 Human Needs categories have been used to characterize the reason why an affective event is positive or negative. For example, “I got the flu” and “I got fired” are both negative (undesirable) events, but getting the flu is a Health problem while getting fired is a Financial problem. Previous work created learning models to assign events to Human Needs categories based on their words and contexts. In this paper, we introduce an intermediate step that assigns words to relevant semantic concepts. We create lightly supervised models that learn to label words with respect to 10 semantic concepts associated with Human Needs categories, and incorporate these labels as features for event categorization. Our results show that recognizing relevant semantic concepts improves both the recall and precision of Human Needs categorization for events. @@ -299,12 +299,12 @@ AdamPoliak AlexWang PatrickXia - R. ThomasMcCoy + R. ThomasMcCoy IanTenney AlexisRoss TalLinzen BenjaminVan Durme - Samuel R.Bowman + Samuel R.Bowman ElliePavlick 235–249 We introduce a set of nine challenge tasks that test for the understanding of function words. These tasks are created by structurally mutating sentences from existing datasets to target the comprehension of specific types of function words (e.g., prepositions, wh-words). Using these probing tasks, we explore the effects of various pretraining objectives for sentence encoders (e.g., language modeling, CCG supertagging and natural language inference (NLI)) on the learned representations. Our results show that pretraining on CCG—our most syntactic objective—performs the best on average across our probing tasks, suggesting that syntactic knowledge helps function word comprehension. Language modeling also shows strong performance, supporting its widespread use for pretraining state-of-the-art NLP models. Overall, no pretraining objective dominates across the board, and our function word probing tasks highlight several intuitive differences between pretraining objectives, e.g., that NLI helps the comprehension of negation. @@ -333,9 +333,9 @@ On Adversarial Removal of Hypothesis-only Bias in Natural Language Inference YonatanBelinkov AdamPoliak - StuartShieber + StuartShieber BenjaminVan Durme - AlexanderRush + AlexanderRush 256–262 Popular Natural Language Inference (NLI) datasets have been shown to be tainted by hypothesis-only biases. Adversarial learning may help models ignore sensitive biases and spurious correlations in data. We evaluate whether adversarial learning can be used in NLI to encourage models to learn representations free of hypothesis-only biases. Our analyses indicate that the representations learned via adversarial learning may be less biased, with only small drops in NLI accuracy. S19-1028 @@ -357,9 +357,9 @@ Target Based Speech Act Classification in Political Campaign Text - ShivashankarSubramanian - TrevorCohn - TimothyBaldwin + ShivashankarSubramanian + TrevorCohn + TimothyBaldwin 273–282 We study pragmatics in political campaign text, through analysis of speech acts and the target of each utterance. We propose a new annotation schema incorporating domain-specific speech acts, such as commissive-action, and present a novel annotated corpus of media releases and speech transcripts from the 2016 Australian election cycle. We show how speech acts and target referents can be modeled as sequential classification, and evaluate several techniques, exploiting contextualized word representations, semi-supervised learning, task dependencies and speaker meta-data. S19-1030 @@ -401,10 +401,10 @@ S19-2 JonathanMay EkaterinaShutova - AurelieHerbelot + AurelieHerbelot XiaodanZhu MariannaApidianaki - Saif M.Mohammad + Saif M.Mohammad Association for Computational Linguistics
Minneapolis, Minnesota, USA
June @@ -446,11 +446,11 @@
<fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2019 Task 2: Unsupervised Lexical Frame Induction - BehrangQasemiZadeh - Miriam R. L.Petruck + BehrangQasemiZadeh + Miriam R. L.Petruck ReginaStodden LauraKallmeyer - MarieCandito + MarieCandito 16–30 This paper presents Unsupervised Lexical Frame Induction, Task 2 of the International Workshop on Semantic Evaluation in 2019. Given a set of prespecified syntactic forms in context, the task requires that verbs and their arguments be clustered to resemble semantic frame structures. Results are useful in identifying polysemous words, i.e., those whose frame structures are not easily distinguished, as well as discerning semantic relations of the arguments. Evaluation of unsupervised frame induction methods fell into two tracks: Task A) Verb Clustering based on FrameNet 1.7; and B) Argument Clustering, with B.1) based on FrameNet’s core frame elements, and B.2) on VerbNet 3.2 semantic roles. The shared task attracted nine teams, of whom three reported promising results. This paper describes the task and its data, reports on methods and resources that these systems used, and offers a comparison to human annotation. S19-2003 @@ -459,7 +459,7 @@ Neural <fixed-case>GRANN</fixed-case>y at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2019 Task 2: A combined approach for better modeling of semantic relationships in semantic frame induction - NikolayArefyev + NikolayArefyev BorisSheludko AdisDavletov DmitryKharchev @@ -487,7 +487,7 @@ <fixed-case>ANA</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2019 Task 3: Contextual Emotion detection in Conversations through hierarchical <fixed-case>LSTM</fixed-case>s and <fixed-case>BERT</fixed-case> ChenyangHuang AmineTrabelsi - OsmarZaïane + OsmarZaïane 49–53 This paper describes the system submitted by ANA Team for the SemEval-2019 Task 3: EmoContext. We propose a novel Hierarchi- cal LSTMs for Contextual Emotion Detection (HRLCE) model. It classifies the emotion of an utterance given its conversational con- text. The results show that, in this task, our HRCLE outperforms the most recent state-of- the-art text classification framework: BERT. We combine the results generated by BERT and HRCLE to achieve an overall score of 0.7709 which ranked 5th on the final leader board of the competition among 165 Teams. S19-2006 @@ -524,7 +524,7 @@ <fixed-case>FERMI</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2019 Task 5: Using Sentence embeddings to Identify Hate Speech Against Immigrants and Women in <fixed-case>T</fixed-case>witter VijayasaradhiIndurthi BakhtiyarSyed - ManishShrivastava + ManishShrivastava NikhilChakravartula ManishGupta VasudevaVarma @@ -537,8 +537,8 @@ <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2019 Task 6: Identifying and Categorizing Offensive Language in Social Media (<fixed-case>O</fixed-case>ffens<fixed-case>E</fixed-case>val) MarcosZampieri - ShervinMalmasi - PreslavNakov + ShervinMalmasi + PreslavNakov SaraRosenthal NouraFarra RiteshKumar @@ -565,7 +565,7 @@ ShengHuang Abdul RafaeKhan ShengqiangZhang - WeiweiSun + WeiweiSun JiaXu 92–96 This paper describes the systems of the CUNY-PKU team in SemEval 2019 Task 1: Cross-lingual Semantic Parsing with UCCA. We introduce a novel model by applying a cascaded MLP and BiLSTM model. Then, we ensemble multiple system-outputs by reparsing. In particular, we introduce a new decoding algorithm for building the UCCA representation. Our system won the first place in one track (French-20K-Open), second places in four tracks (English-Wiki-Open, English-20K-Open, German-20K-Open, and German-20K-Closed), and third place in one track (English-20K-Closed), among all seven tracks. @@ -600,7 +600,7 @@ <fixed-case>M</fixed-case>ask<fixed-case>P</fixed-case>arse@Deskin at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2019 Task 1: Cross-lingual <fixed-case>UCCA</fixed-case> Semantic Parsing using Recursive Masked Sequence Tagging GabrielMarzinotto JohannesHeinecke - GéraldineDamnati + GéraldineDamnati 107–112 This paper describes our recursive system for SemEval-2019 Task 1: Cross-lingual Semantic Parsing with UCCA. Each recursive step consists of two parts. We first perform semantic parsing using a sequence tagger to estimate the probabilities of the UCCA categories in the sentence. Then, we apply a decoding policy which interprets these probabilities and builds the graph nodes. Parsing is done recursively, we perform a first inference on the sentence to extract the main scenes and links and then we recursively apply our model on the sentence using a masking features that reflects the decisions made in previous steps. Process continues until the terminal nodes are reached. We chose a standard neural tagger and we focus on our recursive parsing strategy and on the cross lingual transfer problem to develop a robust model for the French language, using only few training samples S19-2015 @@ -632,9 +632,9 @@ <fixed-case>HHMM</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2019 Task 2: Unsupervised Frame Induction using Contextualized Word Embeddings SabaAnwar DmitryUstalov - NikolayArefyev - Simone PaoloPonzetto - ChrisBiemann + NikolayArefyev + Simone PaoloPonzetto + ChrisBiemann AlexanderPanchenko 125–129 We present our system for semantic frame induction that showed the best performance in Subtask B.1 and finished as the runner-up in Subtask A of the SemEval 2019 Task 2 on unsupervised semantic frame induction (Qasem-iZadeh et al., 2019). Our approach separates this task into two independent steps: verb clustering using word and their context embeddings and role labeling by combining these embeddings with syntactical features. A simple combination of these steps shows very competitive results and can be extended to process other datasets and languages. @@ -646,11 +646,11 @@ <fixed-case>L</fixed-case>2<fixed-case>F</fixed-case>/<fixed-case>INESC</fixed-case>-<fixed-case>ID</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2019 Task 2: Unsupervised Lexical Semantic Frame Induction using Contextualized Word Representations EugénioRibeiro VâniaMendonça - RicardoRibeiro - DavidMartins de Matos + RicardoRibeiro + DavidMartins de Matos AlbertoSardinha Ana LúciaSantos - LuísaCoheur + LuísaCoheur 130–136 Building large datasets annotated with semantic information, such as FrameNet, is an expensive process. Consequently, such resources are unavailable for many languages and specific domains. This problem can be alleviated by using unsupervised approaches to induce the frames evoked by a collection of documents. That is the objective of the second task of SemEval 2019, which comprises three subtasks: clustering of verbs that evoke the same frame and clustering of arguments into both frame-specific slots and semantic roles. We approach all the subtasks by applying a graph clustering algorithm on contextualized embedding representations of the verbs and arguments. Using such representations is appropriate in the context of this task, since they provide cues for word-sense disambiguation. Thus, they can be used to identify different frames evoked by the same words. Using this approach we were able to outperform all of the baselines reported for the task on the test set in terms of Purity F1, as well as in terms of BCubed F1 in most cases. S19-2019 @@ -668,7 +668,7 @@ <fixed-case>CA</fixed-case>i<fixed-case>RE</fixed-case>_<fixed-case>HKUST</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2019 Task 3: Hierarchical Attention for Dialogue Emotion Classification - Genta IndraWinata + Genta IndraWinata AndreaMadotto ZhaojiangLin JaminShin @@ -728,7 +728,7 @@ Ana ValeriaGonzález VictorPetrén Bach Hansen JoachimBingel - AndersSøgaard + AndersSøgaard 169–174 This work describes the system presented by the CoAStaL Natural Language Processing group at University of Copenhagen. The main system we present uses the same attention mechanism presented in (Yang et al., 2016). Our overall model architecture is also inspired by their hierarchical classification model and adapted to deal with classification in dialogue by encoding information at the turn level. We use different encodings for each turn to create a more expressive representation of dialogue context which is then fed into our classifier. We also define a custom preprocessing step in order to deal with language commonly used in interactions across many social media outlets. Our proposed system achieves a micro F1 score of 0.7340 on the test set and shows significant gains in performance compared to a system using dialogue level encoding. S19-2026 @@ -778,7 +778,7 @@ <fixed-case>EL</fixed-case>i<fixed-case>RF</fixed-case>-<fixed-case>UPV</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2019 Task 3: Snapshot Ensemble of Hierarchical Convolutional Neural Networks for Contextual Emotion Detection José-ÁngelGonzález - Lluís-F.Hurtado + Lluís-F.Hurtado FerranPla 195–199 This paper describes the approach developed by the ELiRF-UPV team at SemEval 2019 Task 3: Contextual Emotion Detection in Text. We have developed a Snapshot Ensemble of 1D Hierarchical Convolutional Neural Networks to extract features from 3-turn conversations in order to perform contextual emotion detection in text. This Snapshot Ensemble is obtained by averaging the models selected by a Genetic Algorithm that optimizes the evaluation measure. The proposed ensemble obtains better results than a single model and it obtains competitive and promising results on Contextual Emotion Detection in Text. @@ -818,7 +818,7 @@ <fixed-case>EPITA</fixed-case>-<fixed-case>ADAPT</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2019 Task 3: Detecting emotions in textual conversations using deep learning models combination - AbdessalamBouchekif + AbdessalamBouchekif PraveenJoshi LatifaBouchekif HaithemAfli @@ -849,7 +849,7 @@ <fixed-case>GWU</fixed-case> <fixed-case>NLP</fixed-case> Lab at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2019 Task 3 : <fixed-case>E</fixed-case>mo<fixed-case>C</fixed-case>ontext: Effectiveness of<fixed-case>C</fixed-case>ontextual Information in Models for Emotion Detection in<fixed-case>S</fixed-case>entence-level at Multi-genre Corpus ShabnamTafreshi - MonaDiab + MonaDiab 230–235 In this paper we present an emotion classifier models that submitted to the SemEval-2019 Task 3 : EmoContext. Our approach is a Gated Recurrent Neural Network (GRU) model with attention layer is bootstrapped with contextual information and trained with a multigenre corpus, which is combination of several popular emotional data sets. We utilize different word embeddings to empirically select the most suited embedding to represent our features. Our aim is to build a robust emotion classifier that can generalize emotion detection, which is to learn emotion cues in a noisy training environment. To fulfill this aim we train our model with a multigenre emotion corpus, this way we leverage from having more training set. We achieved overall %56.05 f1-score and placed 144. Given our aim and noisy training environment, the results are anticipated. S19-2038 @@ -861,7 +861,7 @@ ArikPamnani RajatGoel JayeshChoudhari - MayankSingh + MayankSingh 236–240 Recent advancements in Internet and Mobile infrastructure have resulted in the development of faster and efficient platforms of communication. These platforms include speech, facial and text-based conversational mediums. Majority of these are text-based messaging platforms. Development of Chatbots that automatically understand latent emotions in the textual message is a challenging task. In this paper, we present an automatic emotion detection system that aims to detect the emotion of a person textually conversing with a chatbot. We explore deep learning techniques such as CNN and LSTM based neural networks and outperformed the baseline score by 14%. The trained model and code are kept in public domain. S19-2039 @@ -1012,9 +1012,9 @@ <fixed-case>SINAI</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2019 Task 3: Using affective features for emotion classification in textual conversations Flor MiriamPlaza-del-Arco - M. DoloresMolina-González - MaiteMartin - L. AlfonsoUreña-López + M. DoloresMolina-González + MaiteMartin + L. AlfonsoUreña-López 307–311 Detecting emotions in textual conversation is a challenging problem in absence of nonverbal cues typically associated with emotion, like fa- cial expression or voice modulations. How- ever, more and more users are using message platforms such as WhatsApp or Telegram. For this reason, it is important to develop systems capable of understanding human emotions in textual conversations. In this paper, we carried out different systems to analyze the emotions of textual dialogue from SemEval-2019 Task 3: EmoContext for English language. Our main contribution is the integration of emotional and sentimental features in the classification using the SVM algorithm. S19-2053 @@ -1048,7 +1048,7 @@ <fixed-case>SWAP</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2019 Task 3: Emotion detection in conversations through Tweets, <fixed-case>CNN</fixed-case> and <fixed-case>LSTM</fixed-case> deep neural networks MarcoPolignano Marcode Gemmis - GiovanniSemeraro + GiovanniSemeraro 324–329 Emotion detection from user-generated contents is growing in importance in the area of natural language processing. The approach we proposed for the EmoContext task is based on the combination of a CNN and an LSTM using a concatenation of word embeddings. A stack of convolutional neural networks (CNN) is used for capturing the hierarchical hidden relations among embedding features. Meanwhile, a long short-term memory network (LSTM) is used for capturing information shared among words of the sentence. Each conversation has been formalized as a list of word embeddings, in particular during experimental runs pre-trained Glove and Google word embeddings have been evaluated. Surface lexical features have been also considered, but they have been demonstrated to be not usefully for the classification in this specific task. The final system configuration achieved a micro F1 score of 0.7089. The python code of the system is fully available at https://github.com/marcopoli/EmoContext2019 S19-2056 @@ -1061,7 +1061,7 @@ AngeloBasile MarcFranco-Salvador NehaPawar - SanjaŠtajner + SanjaŠtajner MaraChinea Rios YassineBenajiba 330–334 @@ -1106,7 +1106,7 @@ <fixed-case>T</fixed-case>okyo<fixed-case>T</fixed-case>ech_<fixed-case>NLP</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2019 Task 3: Emotion-related Symbols in Emotion Detection ZhishenYang SamVijlbrief - NaoakiOkazaki + NaoakiOkazaki 350–354 This paper presents our contextual emotion detection system in approaching the SemEval2019 shared task 3: EmoContext: Contextual Emotion Detection in Text. This system cooperates with an emotion detection neural network method (Poria et al., 2017), emoji2vec (Eisner et al., 2016) embedding, word2vec embedding (Mikolov et al., 2013), and our proposed emoticon and emoji preprocessing method. The experimental results demonstrate the usefulness of our emoticon and emoji prepossessing method, and representations of emoticons and emoji contribute model’s emotion detection. S19-2061 @@ -1175,7 +1175,7 @@ IqraAmeer Muhammad Hammad FahimSiddiqui GrigoriSidorov - AlexanderGelbukh + AlexanderGelbukh 382–386 In recent years, the use of social media has in-creased incredibly. Social media permits Inter-net users a friendly platform to express their views and opinions. Along with these nice and distinct communication chances, it also allows bad things like usage of hate speech. Online automatic hate speech detection in various aspects is a significant scientific problem. This paper presents the Instituto Politécnico Nacional (Mexico) approach for the Semeval 2019 Task-5 [Hateval 2019] (Basile et al., 2019) competition for Multilingual Detection of Hate Speech on Twitter. The goal of this paper is to detect (A) Hate speech against immigrants and women, (B) Aggressive behavior and target classification, both for English and Spanish. In the proposed approach, we used a bag of words model with preprocessing (stem-ming and stop words removal). We submitted two different systems with names: (i) CIC-1 and (ii) CIC-2 for Hateval 2019 shared task. We used TF values in the first system and TF-IDF for the second system. The first system, CIC-1 got 2nd rank in subtask B for both English and Spanish languages with EMR score of 0.568 for English and 0.675 for Spanish. The second system, CIC-2 was ranked 4th in sub-task A and 1st in subtask B for Spanish language with a macro-F1 score of 0.727 and EMR score of 0.705 respectively. S19-2067 @@ -1209,7 +1209,7 @@ <fixed-case>GSI</fixed-case>-<fixed-case>UPM</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2019 Task 5: Semantic Similarity and Word Embeddings for Multilingual Detection of Hate Speech Against Immigrants and Women on <fixed-case>T</fixed-case>witter DiegoBenito OscarAraque - Carlos A.Iglesias + Carlos A.Iglesias 396–403 This paper describes the GSI-UPM system for SemEval-2019 Task 5, which tackles multilingual detection of hate speech on Twitter. The main contribution of the paper is the use of a method based on word embeddings and semantic similarity combined with traditional paradigms, such as n-grams, TF-IDF and POS. This combination of several features is fine-tuned through ablation tests, demonstrating the usefulness of different features. While our approach outperforms baseline classifiers on different sub-tasks, the best of our submitted runs reached the 5th position on the Spanish sub-task A. S19-2070 @@ -1255,7 +1255,7 @@ <fixed-case>JCTDHS</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2019 Task 5: Detection of Hate Speech in Tweets using Deep Learning Methods, Character N-gram Features, and Preprocessing Methods - YaakovHaCohen-Kerner + YaakovHaCohen-Kerner ElyashivShayovitz ShalomRochman EliCahn @@ -1281,7 +1281,7 @@ <fixed-case>LT</fixed-case>3 at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2019 Task 5: Multilingual Detection of Hate Speech Against Immigrants and Women in <fixed-case>T</fixed-case>witter (hat<fixed-case>E</fixed-case>val) NinaBauwelinck GillesJacobs - VéroniqueHoste + VéroniqueHoste ElsLefever 436–440 This paper describes our contribution to the SemEval-2019 Task 5 on the detection of hate speech against immigrants and women in Twitter (hatEval). We considered a supervised classification-based approach to detect hate speech in English tweets, which combines a variety of standard lexical and syntactic features with specific features for capturing offensive language. Our experimental results show good classification performance on the training data, but a considerable drop in recall on the held-out test set. @@ -1306,7 +1306,7 @@ Luis EnriqueArgota Vega Jorge CarlosReyes-Magaña HelenaGómez-Adorno - GemmaBel-Enguix + GemmaBel-Enguix 447–452 This paper presents our approach to the Task 5 of Semeval-2019, which aims at detecting hate speech against immigrants and women in Twitter. The task consists of two sub-tasks, in Spanish and English: (A) detection of hate speech and (B) classification of hateful tweets as aggressive or not, and identification of the target harassed as individual or group. We used linguistically motivated features and several types of n-grams (words, characters, functional words, punctuation symbols, POS, among others). For task A, we trained a Support Vector Machine using a combinatorial framework, whereas for task B we followed a multi-labeled approach using the Random Forest classifier. Our approach achieved the highest F1-score in sub-task A for the Spanish language. S19-2079 @@ -1315,11 +1315,11 @@ <fixed-case>MITRE</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2019 Task 5: Transfer Learning for Multilingual Hate Speech Detection - AbigailGertner - JohnHenderson - ElizabethMerkhofer + AbigailGertner + JohnHenderson + ElizabethMerkhofer AmyMarsh - BenWellner + BenWellner GuidoZarrella 453–459 This paper describes MITRE’s participation in SemEval-2019 Task 5, HatEval: Multilingual detection of hate speech against immigrants and women in Twitter. The techniques explored range from simple bag-of-ngrams classifiers to neural architectures with varied attention mechanisms. We describe several styles of transfer learning from auxiliary tasks, including a novel method for adapting pre-trained BERT models to Twitter data. Logistic regression ties the systems together into an ensemble submitted for evaluation. The resulting system was used to produce predictions for all four HatEval subtasks, achieving the best mean rank of all teams that participated in all four conditions. @@ -1360,9 +1360,9 @@ <fixed-case>SINAI</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2019 Task 5: Ensemble learning to detect hate speech against inmigrants and women in <fixed-case>E</fixed-case>nglish and <fixed-case>S</fixed-case>panish tweets Flor MiriamPlaza-del-Arco - M. DoloresMolina-González - MaiteMartin - L. AlfonsoUreña-López + M. DoloresMolina-González + MaiteMartin + L. AlfonsoUreña-López 476–479 Misogyny and xenophobia are some of the most important social problems. With the in- crease in the use of social media, this feeling ofhatred towards women and immigrants can be more easily expressed, therefore it can cause harmful effects on social media users. For this reason, it is important to develop systems ca- pable of detecting hateful comments automatically. In this paper, we describe our system to analyze the hate speech in English and Spanish tweets against Immigrants and Women as part of our participation in SemEval-2019 Task 5: hatEval. Our main contribution is the integration of three individual algorithms of predic- tion in a model based on Vote ensemble classifier. S19-2084 @@ -1372,7 +1372,7 @@ <fixed-case>SINAI</fixed-case>-<fixed-case>DL</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2019 Task 5: Recurrent networks and data augmentation by paraphrasing ArturoMontejo-Ráez - Salud MaríaJiménez-Zafra + Salud MaríaJiménez-Zafra Miguel A.García-Cumbreras Manuel CarlosDíaz-Galiano 480–483 @@ -1396,8 +1396,8 @@ The binary trio at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2019 Task 5: Multitarget Hate Speech Detection in Tweets PatriciaChiril - FarahBenamara Zitoune - VéroniqueMoriceau + FarahBenamara Zitoune + VéroniqueMoriceau AbhishekKumar 489–493 The massive growth of user-generated web content through blogs, online forums and most notably, social media networks, led to a large spreading of hatred or abusive messages which have to be moderated. This paper proposes a supervised approach to hate speech detection towards immigrants and women in English tweets. Several models have been developed ranging from feature-engineering approaches to neural ones. @@ -1430,9 +1430,9 @@ Tw-<fixed-case>S</fixed-case>t<fixed-case>AR</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2019 Task 5: N-gram embeddings for Hate Speech Detection in Multilingual Tweets HalaMulki - ChediBechikh Ali + ChediBechikh Ali HatemHaddad - IsmailBabaoğlu + IsmailBabaoğlu 503–507 In this paper, we describe our contribution in SemEval-2019: subtask A of task 5 “Multilingual detection of hate speech against immigrants and women in Twitter (HatEval)”. We developed two hate speech detection model variants through Tw-StAR framework. While the first model adopted one-hot encoding ngrams to train an NB classifier, the second generated and learned n-gram embeddings within a feedforward neural network. For both models, specific terms, selected via MWT patterns, were tagged in the input data. With two feature types employed, we could investigate the ability of n-gram embeddings to rival one-hot n-grams. Our results showed that in English, n-gram embeddings outperformed one-hot ngrams. However, representing Spanish tweets by one-hot n-grams yielded a slightly better performance compared to that of n-gram embeddings. The official ranking indicated that Tw-StAR ranked 9th for English and 20th for Spanish. S19-2090 @@ -1442,10 +1442,10 @@ <fixed-case>UA</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2019 Task 5: Setting A Strong Linear Baseline for Hate Speech Detection CarlosPerelló - DavidTomás + DavidTomás AlbertoGarcia-Garcia JoseGarcia-Rodriguez - JoseCamacho-Collados + JoseCamacho-Collados 508–513 This paper describes the system developed at the University of Alicante (UA) for the SemEval 2019 Task 5: Shared Task on Multilingual Detection of Hate. The purpose of this work is to build a strong baseline for hate speech detection, using a traditional machine learning approach with standard textual features, which could serve in a near future as a reference to compare with deep learning systems. We participated in both task A (Hate Speech Detection against Immigrants and Women) and task B (Aggressive behavior and Target Classification). Despite its simplicity, our system obtained a remarkable F1-score of 72.5 (sixth highest) and an accuracy of 73.6 (second highest) in Spanish (task A), outperforming more complex neural models from a total of 40 participant systems. S19-2091 @@ -1465,9 +1465,9 @@ <fixed-case>UTFPR</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2019 Task 5: Hate Speech Identification with Recurrent Neural Networks - Gustavo HenriquePaetzold + Gustavo HenriquePaetzold MarcosZampieri - ShervinMalmasi + ShervinMalmasi 519–523 In this paper we revisit the problem of automatically identifying hate speech in posts from social media. We approach the task using a system based on minimalistic compositional Recurrent Neural Networks (RNN). We tested our approach on the SemEval-2019 Task 5: Multilingual Detection of Hate Speech Against Immigrants and Women in Twitter (HatEval) shared task dataset. The dataset made available by the HatEval organizers contained English and Spanish posts retrieved from Twitter annotated with respect to the presence of hateful content and its target. In this paper we present the results obtained by our system in comparison to the other entries in the shared task. Our system achieved competitive performance ranking 7th in sub-task A out of 62 systems in the English track. S19-2093 @@ -1477,7 +1477,7 @@ Vista.ue at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2019 Task 5: Single Multilingual Hate Speech Detection Model KashyapRaiyani - TeresaGonçalves + TeresaGonçalves PauloQuaresma VitorNogueira 524–528 @@ -1545,7 +1545,7 @@ <fixed-case>CAM</fixed-case>sterdam at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2019 Task 6: Neural and graph-based feature extraction for the identification of offensive tweets GuyAglionby - ChrisDavis + ChrisDavis PushkarMishra AndrewCaines HelenYannakoudakis @@ -1562,7 +1562,7 @@ <fixed-case>CN</fixed-case>-<fixed-case>HIT</fixed-case>-<fixed-case>MI</fixed-case>.<fixed-case>T</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2019 Task 6: Offensive Language Identification Based on <fixed-case>B</fixed-case>i<fixed-case>LSTM</fixed-case> with Double Attention YaojieZhang BingXu - TiejunZhao + TiejunZhao 564–570 Offensive language has become pervasive in social media. In Offensive Language Identification tasks, it may be difficult to predict accurately only according to the surface words. So we try to dig deeper semantic information of text. This paper presents use an attention-based two layers bidirectional longshort memory neural network (BiLSTM) for semantic feature extraction. Additionally, a residual connection mechanism is used to synthesize two different deep features, and an emoji attention mechanism is used to extract semantic information of emojis in text. We participated in three sub-tasks of SemEval 2019 Task 6 as CN-HIT-MI.T team. Our macro-averaged F1-score in sub-task A is 0.768, ranking 28/103. We got 0.638 in sub-task B, ranking 30/75. In sub-task C, we got 0.549, ranking 22/65. We also tried some other methods of not submitting results. S19-2101 @@ -1650,7 +1650,7 @@ Fermi at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2019 Task 6: Identifying and Categorizing Offensive Language in Social Media using Sentence Embeddings VijayasaradhiIndurthi BakhtiyarSyed - ManishShrivastava + ManishShrivastava ManishGupta VasudevaVarma 611–616 @@ -1662,7 +1662,7 @@ Ghmerti at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2019 Task 6: A Deep Word- and Character-based Approach to Offensive Language Identification EhsanDoostmohammadi - HosseinSameti + HosseinSameti AliSaffar 617–621 This paper presents the models submitted by Ghmerti team for subtasks A and B of the OffensEval shared task at SemEval 2019. OffensEval addresses the problem of identifying and categorizing offensive language in social media in three subtasks; whether or not a content is offensive (subtask A), whether it is targeted (subtask B) towards an individual, a group, or other entities (subtask C). The proposed approach includes character-level Convolutional Neural Network, word-level Recurrent Neural Network, and some preprocessing. The performance achieved by the proposed model is 77.93% macro-averaged F1-score. @@ -1697,8 +1697,8 @@ Hope at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2019 Task 6: Mining social media language to discover offensive language Gabriel FlorentinPatras Diana FlorinaLungu - DanielaGifu - DianaTrandabat + DanielaGifu + DianaTrandabat 635–638 User’s content share through social media has reached huge proportions nowadays. However, along with the free expression of thoughts on social media, people risk getting exposed to various aggressive statements. In this paper, we present a system able to identify and classify offensive user-generated content. S19-2113 @@ -1719,7 +1719,7 @@ <fixed-case>JCTICOL</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2019 Task 6: Classifying Offensive Language in Social Media using Deep Learning Methods, Word/Character N-gram Features, and Preprocessing Methods - YaakovHaCohen-Kerner + YaakovHaCohen-Kerner ZivBen-David GalDidi EliCahn @@ -1757,7 +1757,7 @@ PreetiMukherjee MainakPal SomnathBanerjee - Sudip KumarNaskar + Sudip KumarNaskar 662–667 This paper describes our system submissions as part of our participation (team name: JU_ETCE_17_21) in the SemEval 2019 shared task 6: “OffensEval: Identifying and Catego- rizing Offensive Language in Social Media”. We participated in all the three sub-tasks: i) Sub-task A: offensive language identification, ii) Sub-task B: automatic categorization of of- fense types, and iii) Sub-task C: offense target identification. We employed machine learn- ing as well as deep learning approaches for the sub-tasks. We employed Convolutional Neural Network (CNN) and Recursive Neu- ral Network (RNN) Long Short-Term Memory (LSTM) with pre-trained word embeddings. We used both word2vec and Glove pre-trained word embeddings. We obtained the best F1- score using CNN based model for sub-task A, LSTM based model for sub-task B and Lo- gistic Regression based model for sub-task C. Our best submissions achieved 0.7844, 0.5459 and 0.48 F1-scores for sub-task A, sub-task B and sub-task C respectively. S19-2118 @@ -1767,7 +1767,7 @@ <fixed-case>KMI</fixed-case>-Coling at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2019 Task 6: Exploring N-grams for Offensive Language detection PriyaRani - Atul Kr.Ojha + Atul Kr.Ojha 668–671 In this paper, we present the system description of Offensive language detection tool which is developed by the KMI_Coling under the OffensEval Shared task. The OffensEval Shared Task was conducted in SemEval 2019 workshop. To develop the system, we have explored n-grams up to 8-gram and trained three different namely A, B and C systems for three different subtasks within the OffensEval task which achieves 79.76%, 87.91% and 44.37% accuracy respectively. The task was completed using the dataset provided to us by the OffensEval organisers was the part of OLID dataset. It consists of 13,240 tweets extracted from twitter and were annotated at three levels using crowdsourcing. S19-2119 @@ -1803,7 +1803,7 @@ HaiminZhang KaranUppal YamanKumar - Rajiv RatnShah + Rajiv RatnShah SimraShahid LaibaMehnaz SarthakAnand @@ -1827,7 +1827,7 @@ <fixed-case>NIT</fixed-case>_<fixed-case>A</fixed-case>gartala_<fixed-case>NLP</fixed-case>_<fixed-case>T</fixed-case>eam at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2019 Task 6: An Ensemble Approach to Identifying and Categorizing Offensive Language in <fixed-case>T</fixed-case>witter Social Media Corpora Steve DurairajSwamy AnupamJamatia - BjörnGambäck + BjörnGambäck AmitavaDas 696–703 The paper describes the systems submitted to OffensEval (SemEval 2019, Task 6) on ‘Identifying and Categorizing Offensive Language in Social Media’ by the ‘NIT_Agartala_NLP_Team’. A Twitter annotated dataset of 13,240 English tweets was provided by the task organizers to train the individual models, with the best results obtained using an ensemble model composed of six different classifiers. The ensemble model produced macro-averaged F1-scores of 0.7434, 0.7078 and 0.4853 on Subtasks A, B, and C, respectively. The paper highlights the overall low predictive nature of various linguistic features and surface level count features, as well as the limitations of a traditional machine learning approach when compared to a Deep Learning counterpart. @@ -1884,9 +1884,9 @@ <fixed-case>SINAI</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2019 Task 6: Incorporating lexicon knowledge into <fixed-case>SVM</fixed-case> learning to identify and categorize offensive language in social media Flor MiriamPlaza-del-Arco - M. DoloresMolina-González - MaiteMartin - L. AlfonsoUreña-López + M. DoloresMolina-González + MaiteMartin + L. AlfonsoUreña-López 735–738 Offensive language has an impact across society. The use of social media has aggravated this issue among online users, causing suicides in the worst cases. For this reason, it is important to develop systems capable of identifying and detecting offensive language in text automatically. In this paper, we developed a system to classify offensive tweets as part of our participation in SemEval-2019 Task 6: OffensEval. Our main contribution is the integration of lexical features in the classification using the SVM algorithm. S19-2129 @@ -1908,7 +1908,7 @@ Stop <fixed-case>P</fixed-case>ropag<fixed-case>H</fixed-case>ate at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2019 Tasks 5 and 6: Are abusive language classification results reproducible? PaulaFortuna - JuanSoler-Company + JuanSoler-Company SérgioNunes 745–752 This paper summarizes the participation of Stop PropagHate team at SemEval 2019. Our approach is based on replicating one of the most relevant works on the literature, using word embeddings and LSTM. After circumventing some of the problems of the original code, we found poor results when applying it to the HatEval contest (F1=0.45). We think this is due mainly to inconsistencies in the data of this contest. Finally, for the OffensEval the classifier performed well (F1=0.74), proving to have a better performance for offense detection than for hate speech. @@ -1925,7 +1925,7 @@ GeetikaB DyaneswaranS S MiltonRajendram - MirnalineeT T + MirnalineeT T 753–758 Task 6 of SemEval 2019 involves identifying and categorizing offensive language in social media. The systems developed by TECHSSN team uses multi-level classification techniques. We have developed two systems. In the first system, the first level of classification is done by a multi-branch 2D CNN classifier with Google’s pre-trained Word2Vec embedding and the second level of classification by string matching technique supported by offensive and bad words dictionary. The second system uses a multi-branch 1D CNN classifier with Glove pre-trained embedding layer for the first level of classification and string matching for the second level of classification. Input data with a probability of less than 0.70 in the first level are passed on to the second level. The misclassified examples are classified correctly in the second level. S19-2132 @@ -1978,7 +1978,7 @@ <fixed-case>UHH</fixed-case>-<fixed-case>LT</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2019 Task 6: Supervised vs. Unsupervised Transfer Learning for Offensive Language Detection GregorWiedemann EugenRuppert - ChrisBiemann + ChrisBiemann 782–787 We present a neural network based approach of transfer learning for offensive language detection. For our system, we compare two types of knowledge transfer: supervised and unsupervised pre-training. Supervised pre-training of our bidirectional GRU-3-CNN architecture is performed as multi-task learning of parallel training of five different tasks. The selected tasks are supervised classification problems from public NLP resources with some overlap to offensive language such as sentiment detection, emoji classification, and aggressive language classification. Unsupervised transfer learning is performed with a thematic clustering of 40M unlabeled tweets via LDA. Based on this dataset, pre-training is performed by predicting the main topic of a tweet. Results indicate that unsupervised transfer from large datasets performs slightly better than supervised training on small ‘near target category’ datasets. In the SemEval Task, our system ranks 14 out of 103 participants. S19-2137 @@ -1989,7 +1989,7 @@ <fixed-case>UM</fixed-case>-<fixed-case>IU</fixed-case>@<fixed-case>LING</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2019 Task 6: Identifying Offensive Tweets Using <fixed-case>BERT</fixed-case> and <fixed-case>SVM</fixed-case>s JianZhu ZuoyuTian - SandraKübler + SandraKübler 788–795 This paper describes the UM-IU@LING’s system for the SemEval 2019 Task 6: Offens-Eval. We take a mixed approach to identify and categorize hate speech in social media. In subtask A, we fine-tuned a BERT based classifier to detect abusive content in tweets, achieving a macro F1 score of 0.8136 on the test data, thus reaching the 3rd rank out of 103 submissions. In subtasks B and C, we used a linear SVM with selected character n-gram features. For subtask C, our system could identify the target of abuse with a macro F1 score of 0.5243, ranking it 27th out of 65 submissions. S19-2138 @@ -2008,7 +2008,7 @@ <fixed-case>UTFPR</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2019 Task 6: Relying on Compositionality to Find Offense - Gustavo HenriquePaetzold + Gustavo HenriquePaetzold 801–805 We present the UTFPR system for the OffensEval shared task of SemEval 2019: A character-to-word-to-sentence compositional RNN model trained exclusively over the training data provided by the organizers. We find that, although not very competitive for the task at hand, it offers a robust solution to the orthographic irregularity inherent to tweets. S19-2140 @@ -2078,7 +2078,7 @@ YeJiang JohannPetrak XingyiSong - KalinaBontcheva + KalinaBontcheva DianaMaynard 840–844 This paper describes the participation of team “bertha-von-suttner” in the SemEval2019 task 4 Hyperpartisan News Detection task. Our system uses sentence representations from averaged word embeddings generated from the pre-trained ELMo model with Convolutional Neural Networks and Batch Normalization for predicting hyperpartisan news. The final predictions were generated from the averaged predictions of an ensemble of models. With this architecture, our system ranked in first place, based on accuracy, the official scoring metric. @@ -2088,13 +2088,13 @@ <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2019 Task 7: <fixed-case>R</fixed-case>umour<fixed-case>E</fixed-case>val, Determining Rumour Veracity and Support for Rumours - GenevieveGorrell + GenevieveGorrell ElenaKochkina MariaLiakata AhmetAker ArkaitzZubiaga - KalinaBontcheva - LeonDerczynski + KalinaBontcheva + LeonDerczynski 845–854 Since the first RumourEval shared task in 2017, interest in automated claim validation has greatly increased, as the danger of “fake news” has become a mainstream concern. However automated support for rumour verification remains in its infancy. It is therefore important that a shared task in this area continues to provide a focus for effort, which is likely to increase. Rumour verification is characterised by the need to consider evolving conversations and news updates to reach a verdict on a rumour’s veracity. As in RumourEval 2017 we provided a dataset of dubious posts and ensuing conversations in social media, annotated both for stance and veracity. The social media rumours stem from a variety of breaking news stories and the dataset is expanded to include Reddit as well as new Twitter posts. There were two concrete tasks; rumour stance prediction and rumour verification, which we present in detail along with results achieved by participants. We received 22 system submissions (a 70% increase from RumourEval 2017) many of which used state-of-the-art methodology to tackle the challenges involved. S19-2147 @@ -2119,7 +2119,7 @@ PepaAtanasova RamyBaly MitraMohtarami - PreslavNakov + PreslavNakov 860–869 We present SemEval-2019 Task 8 on Fact Checking in Community Question Answering Forums, which features two subtasks. Subtask A is about deciding whether a question asks for factual information vs. an opinion/advice vs. just socializing. Subtask B asks to predict whether an answer to a factual question is true, false or not a proper answer. We received 17 official submissions for subtask A and 11 official submissions for Subtask B. For subtask A, all systems improved over the majority class baseline. For Subtask B, all systems were below a majority class baseline, but several systems were very close to it. The leaderboard and the data from the competition can be found at http://competitions.codalab.org/competitions/20022. S19-2149 @@ -2170,7 +2170,7 @@ CristianPetrescu-Prahova GabrielStanovsky HannanehHajishirzi - RikKoncel-Kedziorski + RikKoncel-Kedziorski 893–899 We report on the SemEval 2019 task on math question answering. We provided a question set derived from Math SAT practice exams, including 2778 training questions and 1082 test questions. For a significant subset of these questions, we also provided SMT-LIB logical form annotations and an interpreter that could solve these logical forms. Systems were evaluated based on the percentage of correctly answered questions. The top system correctly answered 45% of the test questions, a considerable improvement over the 17% random guessing baseline. S19-2153 @@ -2190,11 +2190,11 @@ <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2019 Task 12: Toponym Resolution in Scientific Papers - DavyWeissenbacher + DavyWeissenbacher ArjunMagge KarenO’Connor MatthewScotch - GracielaGonzalez-Hernandez + GracielaGonzalez-Hernandez 907–916 We present the SemEval-2019 Task 12 which focuses on toponym resolution in scientific articles. Given an article from PubMed, the task consists of detecting mentions of names of places, or toponyms, and mapping the mentions to their corresponding entries in GeoNames.org, a database of geospatial locations. We proposed three subtasks. In Subtask 1, we asked participants to detect all toponyms in an article. In Subtask 2, given toponym mentions as input, we asked participants to disambiguate them by linking them to entries in GeoNames. In Subtask 3, we asked participants to perform both the detection and the disambiguation steps for all toponyms. A total of 29 teams registered, and 8 teams submitted a system run. We summarize the corpus and the tools created for the challenge. They are freely available at https://competitions.codalab.org/competitions/19948. We also analyze the methods, the results and the errors made by the competing systems with a focus on toponym disambiguation. S19-2155 @@ -2232,7 +2232,7 @@ <fixed-case>C</fixed-case>ardiff <fixed-case>U</fixed-case>niversity at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2019 Task 4: Linguistic Features for Hyperpartisan News Detection CarlaPérez-Almendros - LuisEspinosa-Anke + LuisEspinosa-Anke StevenSchockaert 929–933 This paper summarizes our contribution to the Hyperpartisan News Detection task in SemEval 2019. We experiment with two different approaches: 1) an SVM classifier based on word vector averages and hand-crafted linguistic features, and 2) a BiLSTM-based neural text classifier trained on a filtered training set. Surprisingly, despite their different nature, both approaches achieve an accuracy of 0.74. The main focus of this paper is to further analyze the remarkable fact that a simple feature-based approach can perform on par with modern neural classifiers. We also highlight the effectiveness of our filtering strategy for training the neural network on a large but noisy training set. @@ -2264,7 +2264,7 @@ Doris <fixed-case>M</fixed-case>artin at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2019 Task 4: Hyperpartisan News Detection with Generic Semi-supervised Features - RodrigoAgerri + RodrigoAgerri 944–948 In this paper we describe our participation to the Hyperpartisan News Detection shared task at SemEval 2019. Motivated by the late arrival of Doris Martin, we test a previously developed document classification system which consists of a combination of clustering features implemented on top of some simple shallow local features. We show how leveraging distributional features obtained from large in-domain unlabeled data helps to easily and quickly develop a reasonably good performing system for detecting hyperpartisan news. The system and models generated for this task are publicly available. S19-2161 @@ -2350,7 +2350,7 @@ Rouletabille at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2019 Task 4: Neural Network Baseline for Identification of Hyperpartisan Publishers - Jose G.Moreno + Jose G.Moreno YoannPitarch KarenPinel-Sauvagnat GillesHubert @@ -2435,7 +2435,7 @@ DanielShaprin GiovanniDa San Martino AlbertoBarrón-Cedeño - PreslavNakov + PreslavNakov 1012–1015 We describe the system submitted by the Jack Ryder team to SemEval-2019 Task 4 on Hyperpartisan News Detection. The task asked participants to predict whether a given article is hyperpartisan, i.e., extreme-left or extreme-right. We proposed an approach based on BERT with fine-tuning, which was ranked 7th out 28 teams on the distantly supervised dataset, where all articles from a hyperpartisan/non-hyperpartisan news outlet are considered to be hyperpartisan/non-hyperpartisan. On a manually annotated test dataset, where human annotators double-checked the labels, we were ranked 29th out of 42 teams. S19-2176 @@ -2501,8 +2501,8 @@ AlbertoBarrón-Cedeño GiovanniDa San Martino MitraMohtarami - PreslavNakov - JamesGlass + PreslavNakov + JamesGlass 1041–1046 We describe our submission to SemEval-2019 Task 4 on Hyperpartisan News Detection. We rely on a variety of engineered features originally used to detect propaganda. This is based on the assumption that biased messages are propagandistic and promote a particular political cause or viewpoint. In particular, we trained a logistic regression model with features ranging from simple bag of words to vocabulary richness and text readability. Our system achieved 72.9% accuracy on the manually annotated testset, and 60.8% on the test data that was obtained with distant supervision. Additional experiments showed that significant performance gains can be achieved with better feature pre-processing. S19-2182 @@ -2616,7 +2616,7 @@ <fixed-case>BUT</fixed-case>-<fixed-case>FIT</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2019 Task 7: Determining the Rumour Stance with Pre-Trained Deep Bidirectional Transformers MartinFajcik - PavelSmrz + PavelSmrz LukasBurget 1097–1104 This paper describes our system submitted to SemEval 2019 Task 7: RumourEval 2019: Determining Rumour Veracity and Support for Rumours, Subtask A (Gorrell et al., 2019). The challenge focused on classifying whether posts from Twitter and Reddit support, deny, query, or comment a hidden rumour, truthfulness of which is the topic of an underlying discussion thread. We formulate the problem as a stance classification, determining the rumour stance of a post with respect to the previous thread post and the source thread post. The recent BERT architecture was employed to build an end-to-end system which has reached the F1 score of 61.67 % on the provided test data. Without any hand-crafted feature, the system finished at the 2nd place in the competition, only 0.2 % behind the winner. @@ -2650,7 +2650,7 @@ <fixed-case>GWU</fixed-case> <fixed-case>NLP</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2019 Task 7: Hybrid Pipeline for Rumour Veracity and Stance Classification on Social Media SardarHamidian - MonaDiab + MonaDiab 1115–1119 Social media plays a crucial role as the main resource news for information seekers online. However, the unmoderated feature of social media platforms lead to the emergence and spread of untrustworthy contents which harm individuals or even societies. Most of the current automated approaches for automatically determining the veracity of a rumor are not generalizable for novel emerging topics. This paper describes our hybrid system comprising rules and a machine learning model which makes use of replied tweets to identify the veracity of the source tweet. The proposed system in this paper achieved 0.435 F-Macro in stance classification, and 0.262 F-macro and 0.801 RMSE in rumor verification tasks in Task7 of SemEval 2019. S19-2195 @@ -2660,10 +2660,10 @@ <fixed-case>SINAI</fixed-case>-<fixed-case>DL</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2019 Task 7: Data Augmentation and Temporal Expressions Miguel A.García-Cumbreras - Salud MaríaJiménez-Zafra + Salud MaríaJiménez-Zafra ArturoMontejo-Ráez Manuel CarlosDíaz-Galiano - EstelaSaquete + EstelaSaquete 1120–1124 This paper describes the participation of the SINAI-DL team at RumourEval (Task 7 in SemEval 2019, subtask A: SDQC). SDQC addresses the challenge of rumour stance classification as an indirect way of identifying potential rumours. Given a tweet with several replies, our system classifies each reply into either supporting, denying, questioning or commenting on the underlying rumours. We have applied data augmentation, temporal expressions labelling and transfer learning with a four-layer neural classifier. We achieve an accuracy of 0.715 with the official run over reply tweets. S19-2196 @@ -2720,7 +2720,7 @@ <fixed-case>DOMLIN</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2019 Task 8: Automated Fact Checking exploiting Ratings in Community Question Answering Forums DominikStammbach StalinVaranasi - GuenterNeumann + GuenterNeumann 1149–1154 In the following, we describe our system developed for the Semeval2019 Task 8. We fine-tuned a BERT checkpoint on the qatar living forum dump and used this checkpoint to train a number of models. Our hand-in for subtask A consists of a fine-tuned classifier from this BERT checkpoint. For subtask B, we first have a classifier deciding whether a comment is factual or non-factual. If it is factual, we retrieve intra-forum evidence and using this evidence, have a classifier deciding the comment’s veracity. We trained this classifier on ratings which we crawled from qatarliving.com S19-2201 @@ -2742,7 +2742,7 @@ Fermi at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2019 Task 8: An elementary but effective approach to Question Discernment in Community <fixed-case>QA</fixed-case> Forums BakhtiyarSyed VijayasaradhiIndurthi - ManishShrivastava + ManishShrivastava ManishGupta VasudevaVarma 1160–1164 @@ -2832,7 +2832,7 @@ <fixed-case>INRIA</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2019 Task 9: Suggestion Mining Using <fixed-case>SVM</fixed-case> with Handcrafted Features IliaMarkov - EricVillemonte de la Clergerie + EricVillemonte de la Clergerie 1204–1207 We present the INRIA approach to the suggestion mining task at SemEval 2019. The task consists of two subtasks: suggestion mining under single-domain (Subtask A) and cross-domain (Subtask B) settings. We used the Support Vector Machines algorithm trained on handcrafted features, function words, sentiment features, digits, and verbs for Subtask A, and handcrafted features for Subtask B. Our best run archived a F1-score of 51.18% on Subtask A, and ranked in the top ten of the submissions for Subtask B with 73.30% F1-score. S19-2211 @@ -2857,7 +2857,7 @@ SimraShahid HaiminZhang YamanKumar - RajivShah + RajivShah KaranUppal 1213–1217 In this paper we present our approach to tackle the Suggestion Mining from Online Reviews and Forums Sub-Task A. Given a review, we are asked to predict whether the review consists of a suggestion or not. Our model is based on Universal Language Model Fine-tuning for Text Classification. We apply various pre-processing techniques before training the language and the classification model. We further provide analysis of the model. Our team ranked 10th out of 34 participants, achieving an F1 score of 0.7011. @@ -2903,7 +2903,7 @@ RajalakshmiS AngelSuseelan S MiltonRajendram - MirnalineeT T + MirnalineeT T 1237–1241 This paper describes the work on mining the suggestions from online reviews and forums. Opinion mining detects whether the comments are positive, negative or neutral, while suggestion mining explores the review content for the possible tips or advice. The system developed by SSN-SPARKS team in SemEval-2019 for task 9 (suggestion mining) uses a rule-based approach for feature selection, SMOTE technique for data augmentation and deep learning technique (Convolutional Neural Network) for classification. We have compared the results with Random Forest classifier (RF) and MultiLayer Perceptron (MLP) model. Results show that the CNN model performs better than other models for both the subtasks. S19-2217 @@ -2925,7 +2925,7 @@ Team <fixed-case>T</fixed-case>aurus at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2019 Task 9: Expert-informed pattern recognition for suggestion mining NellekeOostdijk - Hansvan Halteren + Hansvan Halteren 1247–1253 This paper presents our submissions to SemEval-2019 Task9, Suggestion Mining. Our system is one in a series of systems in which we compare an approach using expert-defined rules with a comparable one using machine learning. We target tasks with a syntactic or semantic component that might be better described by a human understanding the task than by a machine learner only able to count features. For Semeval-2019 Task 9, the expert rules clearly outperformed our machine learning model when training and testing on equally balanced testsets. S19-2219 @@ -2936,10 +2936,10 @@ <fixed-case>T</fixed-case>his<fixed-case>I</fixed-case>s<fixed-case>C</fixed-case>ompetition at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2019 Task 9: <fixed-case>BERT</fixed-case> is unstable for out-of-domain samples CheoneumPark JuaeKim - Hyeon-guLee - Reinald KimAmplayo + Hyeon-guLee + Reinald KimAmplayo HarksooKim - JungyunSeo + JungyunSeo ChangkiLee 1254–1261 This paper describes our system, Joint Encoders for Stable Suggestion Inference (JESSI), for the SemEval 2019 Task 9: Suggestion Mining from Online Reviews and Forums. JESSI is a combination of two sentence encoders: (a) one using multiple pre-trained word embeddings learned from log-bilinear regression (GloVe) and translation (CoVe) models, and (b) one on top of word encodings from a pre-trained deep bidirectional transformer (BERT). We include a domain adversarial training module when training for out-of-domain samples. Our experiments show that while BERT performs exceptionally well for in-domain samples, several runs of the model show that it is unstable for out-of-domain samples. The problem is mitigated tremendously by (1) combining BERT with a non-BERT encoder, and (2) using an RNN-based classifier on top of BERT. Our final models obtained second place with 77.78% F-Score on Subtask A (i.e. in-domain) and achieved an F-Score of 79.59% on Subtask B (i.e. out-of-domain), even without using any additional external data. @@ -3028,8 +3028,8 @@ AlistairPlum TharinduRanasinghe PabloCalleja - ConstantinOrăsan - RuslanMitkov + ConstantinOrăsan + RuslanMitkov 1297–1301 This article describes the system submitted by the RGCL-WLV team to the SemEval 2019 Task 12: Toponym resolution in scientific papers. The system detects toponyms using a bootstrapped machine learning (ML) approach which classifies names identified using gazetteers extracted from the GeoNames geographical database. The paper evaluates the performance of several ML classifiers, as well as how the gazetteers influence the accuracy of the system. Several runs were submitted. The highest precision achieved for one of the submissions was 89%, albeit it at a relatively low recall of 49%. S19-2228 @@ -3063,7 +3063,7 @@ <fixed-case>U</fixed-case>ni<fixed-case>M</fixed-case>elb at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2019 Task 12: Multi-model combination for toponym resolution HaonanLi MinghanWang - TimothyBaldwin + TimothyBaldwin MartinTomko MariaVasardani 1313–1318 diff --git a/data/xml/S98.xml b/data/xml/S98.xml index 104ecc46d3..11d060e8ca 100644 --- a/data/xml/S98.xml +++ b/data/xml/S98.xml @@ -5,7 +5,7 @@ Proceedings of the Pilot SENSEVAL S98-1 AdamKilgarriff - MarthaPalmer + MarthaPalmer Association for Computational Linguistics
Hermonceux Castle, Sussex, UK
September diff --git a/data/xml/T75.xml b/data/xml/T75.xml index 5498ddf69d..3ad799163a 100644 --- a/data/xml/T75.xml +++ b/data/xml/T75.xml @@ -4,7 +4,7 @@ Theoretical Issues in Natural Language Processing: Supplement B.L.Nash-Webber - RogerSchank + RogerSchank tinlap 1975 @@ -37,7 +37,7 @@
What Makes <fixed-case>S</fixed-case>am Run? Script Based Techniques for Question Answering - WendyLehnert + WendyLehnert T75-1005 lehnert-1975-makes @@ -51,7 +51,7 @@ Theoretical Issues in Natural Language Processing B.L.Nash-Webber - RogerSchank + RogerSchank tinlap 1975 @@ -61,13 +61,13 @@ Augmented Phrase Structure Grammars - George E.Heidorn + George E.Heidorn T75-2001 heidorn-1975-augmented Diagnosis as a Notion of Grammar - MitchellMarcus + MitchellMarcus T75-2002 marcus-1975-diagnosis @@ -109,7 +109,7 @@
Primitives and Words - YorickWilks + YorickWilks T75-2009 wilks-1975-primitives @@ -145,7 +145,7 @@
Speaking With Manytongues: Some Problems in Modeling Speakers of Actual Discourse - John H.Clippinger, Jr. + John H.Clippinger, Jr. T75-2015 clippinger-jr-1975-speaking @@ -157,8 +157,8 @@
A Formalism for Relating Lexical and Pragmatic Information: Its Relevance to Recognition and Generation - Aravind K.Joshi - Stanley J.Rosenschein + Aravind K.Joshi + Stanley J.Rosenschein T75-2017 joshi-rosenschein-1975-formalism @@ -199,19 +199,19 @@
Considerations for Computational Theories of Speaking: Seven Things Speakers Do - John H.Clippinger, Jr. + John H.Clippinger, Jr. T75-2024 clippinger-jr-1975-considerations <fixed-case>IMPROVING</fixed-case> <fixed-case>METHODOLOGY</fixed-case> in Natural Language Processing - William C.Mann + William C.Mann T75-2025 mann-1975-improving Methodology in <fixed-case>AI</fixed-case> and Natural Language Understanding - YorickWilks + YorickWilks T75-2026 wilks-1975-methodology diff --git a/data/xml/T78.xml b/data/xml/T78.xml index 79bdf8ea05..0a24068c52 100644 --- a/data/xml/T78.xml +++ b/data/xml/T78.xml @@ -3,7 +3,7 @@ Theoretical Issues in Natural Language Processing-2 - David L.Waltz + David L.Waltz tinlap 1978 @@ -19,7 +19,7 @@
What Makes Something “Ad Hoc” - Roger C.Schank + Roger C.Schank T78-1002 schank-1978-makes-something @@ -37,13 +37,13 @@
Taxonomic Lattice Structures for Situation Recognition - William A.Woods + William A.Woods T78-1005 woods-1978-taxonomic-lattice Description Formation and Discourse Model Synthesis - Bonnie LynnWebber + Bonnie LynnWebber T78-1006 webber-1978-description-formation @@ -62,7 +62,7 @@
Subsequent Reference: Syntactic and Rhetorical Constraints - David D.McDonald + David D.McDonald T78-1009 mcdonald-1978-subsequent-reference @@ -74,19 +74,19 @@
Bound Variables and Other Anaphors - Barbara H.Partee + Barbara H.Partee T78-1011 partee-1978-bound-variables The Use of Focus as a Tool for Disambiguation of Definite Noun Phrases - Candace L.Sidner + Candace L.Sidner T78-1012 sidner-1978-use-focus Focusing in Dialog - Barbara J.Grosz + Barbara J.Grosz T78-1013 grosz-1978-focusing-dialog @@ -110,8 +110,8 @@
Speech Acts as a Basis for Understanding Dialogue Coherence - C. RaymondPerrault - James F.Allen + C. RaymondPerrault + James F.Allen T78-1017 perrault-allen-1978-speech @@ -123,7 +123,7 @@
Intentlonallty and Human Conversations - Jaime G.Carbonell Jr + Jaime G.Carbonell Jr T78-1019 carbonell-jr-1978-intentlonallty-human @@ -160,13 +160,13 @@
Semantic Primitives in Language and Vision - YorickWilks + YorickWilks T78-1025 wilks-1978-semantic-primitives A Note on Partial Match of Descriptions. Can One Simultaneously Question (Retrieve) and Inform (Update)? - Aravind K.Joshi + Aravind K.Joshi T78-1026 joshi-1978-note-partial @@ -196,7 +196,7 @@
Path-Based and Node-Based Inference in Semantic Networks - Stuart C.Shapiro + Stuart C.Shapiro T78-1031 shapiro-1978-path-based @@ -216,13 +216,13 @@
A Computational Account of Some Constraints on Language - MitchellMarcus + MitchellMarcus T78-1034 marcus-1978-computational-account Remarks on Processing, Constraints, and the Lexicon - ThomasWasow + ThomasWasow T78-1035 wasow-1978-remarks-processing diff --git a/data/xml/T87.xml b/data/xml/T87.xml index 09a5845b8a..bd59cba3b9 100644 --- a/data/xml/T87.xml +++ b/data/xml/T87.xml @@ -3,7 +3,7 @@ Theoretical Issues in Natural Language Processing 3 - YorickWilks + YorickWilks tinlap 1987 @@ -28,7 +28,7 @@
The Definitional Power of Words - Branimir K.Boguraev + Branimir K.Boguraev T87-1004 boguraev-1987-definitional @@ -40,19 +40,19 @@
World Knowledge and Word Meaning - Jerry R.Hobbs + Jerry R.Hobbs T87-1006 hobbs-1987-world The Boundary Between Word Knowledge and World Knowledge - JudyKegl + JudyKegl T87-1007 kegl-1987-boundary Information, Unification and Locality - Fernando C. N.Pereira + Fernando C. N.Pereira T87-1008 pereira-1987-information @@ -70,7 +70,7 @@
Unification and Some New Grammatical Formalisms - Aravind K.Joshi + Aravind K.Joshi T87-1011 joshi-1987-unification @@ -82,7 +82,7 @@
Connectionist Models: Not Just a Notational Variant Not a Panacea - David L.Waltz + David L.Waltz T87-1013 waltz-1987-connectionist @@ -106,25 +106,25 @@
Possible Implications of Connectionism - Wendy G.Lehnert + Wendy G.Lehnert T87-1017 lehnert-1987-possible Whither Discourse and Speech Acts? - Barbara J.Grosz + Barbara J.Grosz T87-1018 grosz-1987-whither NO TITLE - JuliaHirschberg + JuliaHirschberg T87-1019 hirschberg-1987-title Towards a Semantic Theory of Discourse - C. RaymondPerrault + C. RaymondPerrault T87-1020 perrault-1987-towards @@ -142,14 +142,14 @@
Natural Language Processing: What’s Really Involved? - RogerSchank + RogerSchank AlexKass T87-1023 schank-kass-1987-natural The Rate of Progress in Natural Language Processing - Norman K.Sondheimer + Norman K.Sondheimer T87-1024 sondheimer-1987-rate @@ -173,13 +173,13 @@
On Formal Versus Commonsense Semantics - DavidIsrael + DavidIsrael T87-1028 israel-1987-formal They say it’s a new sort of engine: but the <fixed-case>SUMP</fixed-case>’s still there - KarenSparck Jones + KarenSparck Jones T87-1029 sparck-jones-1987-say @@ -191,19 +191,19 @@
Reference and Pragmatic Identification - Douglas E.Appelt + Douglas E.Appelt T87-1031 appelt-1987-reference Determiners, Entities, and Contexts - Deborah A.Dahl + Deborah A.Dahl T87-1032 dahl-1987-determiners Position Paper: Event Reference - Bonnie LynnWebber + Bonnie LynnWebber T87-1033 webber-1987-position @@ -254,37 +254,37 @@
Generation - A New Frontier of Natural Language Processing? - Aravind K.Joshi + Aravind K.Joshi T87-1041 joshi-1987-generation Bidirectional Grammars and the Design of Natural Language Generation Systems - Douglas E.Appelt + Douglas E.Appelt T87-1042 appelt-1987-bidirectional Limits on the human sentence generator - Anthony S.Kroch + Anthony S.Kroch T87-1043 kroch-1987-limits “No Better, but no Worse, than People” - David D.McDonald + David D.McDonald T87-1044 mcdonald-1987-better What is Special About Natural Language Generation Research? - William C.Mann + William C.Mann T87-1045 mann-1987-special Generation Systems Should Choose Their Words - MitchellMarcus + MitchellMarcus T87-1046 marcus-1987-generation diff --git a/data/xml/U03.xml b/data/xml/U03.xml index 05a2ddb123..95ec9e8aa1 100644 --- a/data/xml/U03.xml +++ b/data/xml/U03.xml @@ -33,7 +33,7 @@ Application of search algorithms to natural language processing TakeshiMatsumoto - David M. W.Powers + David M. W.Powers GeoffJarrad 22–29 U03-1003 @@ -65,7 +65,7 @@ The Ins and Outs of <fixed-case>D</fixed-case>utch noun countability classification - TimothyBaldwin + TimothyBaldwin Leonoorvan der Beek 53–60 U03-1007 @@ -98,7 +98,7 @@ Performance metrics for word sense disambiguation - TrevorCohn + TrevorCohn 86–93 U03-1011 cohn-2003-performance @@ -107,7 +107,7 @@ Straight to the point: Discovering themes for summary generation StephenWan MarkDras - CecileParis + CecileParis RobertDale 94–101 U03-1012 @@ -123,7 +123,7 @@ Towards semantic-based overlap measures for question-answering - DiegoMollá + DiegoMollá 110–117 U03-1014 molla-2003-towards @@ -147,7 +147,7 @@ Document classification in structured military messages OliverCarr - DominiqueEstival + DominiqueEstival 134–142 U03-1017 carr-estival-2003-document diff --git a/data/xml/U04.xml b/data/xml/U04.xml index 65e5643c41..ca37b51382 100644 --- a/data/xml/U04.xml +++ b/data/xml/U04.xml @@ -5,7 +5,7 @@ Proceedings of the Australasian Language Technology Workshop 2004 U04-1 AshAsudeh - CecileParis + CecileParis StephenWan
Sydney, Australia
December @@ -19,14 +19,14 @@ Complex, Corpus-Driven, Syntactic Features for Word Sense Disambiguation AriChanen - JonPatrick + JonPatrick 1-8 U04-1001 chanen-patrick-2004-complex <fixed-case>A</fixed-case>nswerfinder: Question Answering by Combining Lexical, Syntactic and Semantic Information - DiegoMolla + DiegoMolla MaryGardiner 9-16 U04-1002 @@ -35,14 +35,14 @@ Using <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et Domains In A Supervised Learning Word Sense Disambiguation System DavidBell - JonPatrick + JonPatrick 17-24 U04-1003 bell-patrick-2004-using Using a Trie-based Structure for Question Analysis - Luiz Augusto SangoiPizzato + Luiz Augusto SangoiPizzato 25-31 U04-1004 pizzato-2004-using @@ -58,7 +58,7 @@ Thin Parsing: A Balance between Wide Scale Parsing and Chunking - JonPatrick + JonPatrick Pham HongNguyen 39-46 U04-1006 @@ -108,7 +108,7 @@ Intelligent Multi Media Presentation of information in a semi-immersive Command and Control environment CecileParis NathalieColineau - DominiqueEstival + DominiqueEstival 85-92 U04-1012 paris-etal-2004-intelligent @@ -116,7 +116,7 @@ Selecting Systemic Features for Text Classification CaseyWhitelaw - JonPatrick + JonPatrick 93-100 U04-1013 whitelaw-patrick-2004-selecting @@ -132,7 +132,7 @@ Maximum Entropy <fixed-case>M</fixed-case>arkov Models for Semantic Role Labelling - PhilBlunsom + PhilBlunsom 109-116 U04-1015 blunsom-2004-maximum @@ -185,7 +185,7 @@ Differentiating Types of Verb Particle Constructions - JonPatrick + JonPatrick JeremyFletcher 163-170 U04-1022 diff --git a/data/xml/U05.xml b/data/xml/U05.xml index 2973fba0dc..6d3311b371 100644 --- a/data/xml/U05.xml +++ b/data/xml/U05.xml @@ -4,9 +4,9 @@ Proceedings of the Australasian Language Technology Workshop 2005 U05-1 - TimothyBaldwin - JamesCurran - Mennovan Zaanen + TimothyBaldwin + JamesCurran + Mennovan Zaanen
Sydney, Australia
December 2005 @@ -18,14 +18,14 @@ Dimensions of Deep Grammar Validation - DanFlickinger + DanFlickinger 1–3 U05-1001 flickinger-2005-dimensions Text Summarization: News and Beyond - KathyMcKeown + KathyMcKeown 4 U05-1002 mckeown-2005-text @@ -39,7 +39,7 @@ Disambiguating Conjunctions in Named Entities - PawelMazur + PawelMazur RobertDale 7–14 U05-1004 @@ -47,7 +47,7 @@ Learning of Graph Rules for Question Answering - DiegoMolla + DiegoMolla Mennovan Zaanen 15–23 U05-1005 @@ -71,7 +71,7 @@ <fixed-case>POS</fixed-case> Tagging with a More Informative Tagset - AndrewMacKinlay + AndrewMacKinlay TimothyBaldwin 40–48 U05-1008 @@ -95,14 +95,14 @@ Using Diverse Information Sources to Retrieve Samples of Low Density Languages - AndrewMacKinlay + AndrewMacKinlay 64–70 U05-1011 mackinlay-2005-using Faking it: Synthetic Text-to-speech Synthesis for Under-resourced Languages – Experimental Design - HaroldSomers + HaroldSomers 71–77 U05-1012 somers-2005-faking @@ -133,8 +133,8 @@ Extracting Exact Answers using a Meta Question Answering System - Luiz AugustoPizzato - DiegoMolla + Luiz AugustoPizzato + DiegoMolla 105–112 U05-1016 pizzato-molla-2005-extracting @@ -157,7 +157,7 @@ Round-trip Translation: What Is It Good For? - HaroldSomers + HaroldSomers 127–133 U05-1019 somers-2005-round @@ -165,7 +165,7 @@ Evaluating the Utility of Appraisal Hierarchies as a Method for Sentiment Classification JeremyFletcher - JonPatrick + JonPatrick 134–142 U05-1020 fletcher-patrick-2005-evaluating @@ -189,7 +189,7 @@ Paraphrase Identification by Text Canonicalization YitaoZhang - JonPatrick + JonPatrick 160–166 U05-1023 zhang-patrick-2005-paraphrase @@ -204,10 +204,10 @@ Automatic Induction of a <fixed-case>POS</fixed-case> Tagset for <fixed-case>I</fixed-case>talian - RaffaellaBernardi - AndreaBolognesi - CorradoSeidenari - FabioTamburini + RaffaellaBernardi + AndreaBolognesi + CorradoSeidenari + FabioTamburini 176–183 U05-1025 bernardi-etal-2005-automatic @@ -259,7 +259,7 @@ Design and Development of a Speech-driven Control for a In-car Personal Navigation System YingSu TaoBai - Catherine I.Watson + Catherine I.Watson 224–232 U05-1031 su-etal-2005-design diff --git a/data/xml/U06.xml b/data/xml/U06.xml index 15b00f8d43..c014e0d218 100644 --- a/data/xml/U06.xml +++ b/data/xml/U06.xml @@ -17,7 +17,7 @@ Robust multimodal understanding for interactive systems - MichaelJohnston + MichaelJohnston 1 U06-1001 johnston-2006-robust @@ -32,7 +32,7 @@ Efficient <fixed-case>C</fixed-case>ombinatory <fixed-case>C</fixed-case>ategorial <fixed-case>G</fixed-case>rammar Parsing BojanDjordjevic - James R.Curran + James R.Curran 3-10 U06-1003 djordjevic-curran-2006-efficient @@ -65,15 +65,15 @@ Classifying Speech Acts using Verbal Response Modes AndrewLampert RobertDale - CécileParis + CécileParis 34-41 U06-1007 lampert-etal-2006-classifying Word Relatives in Context for Word Sense Disambiguation - DavidMartinez - EnekoAgirre + DavidMartinez + EnekoAgirre XinglongWang 42-50 U06-1008 @@ -81,8 +81,8 @@ Named Entity Recognition for Question Answering - DiegoMollá - Mennovan Zaanen + DiegoMollá + Mennovan Zaanen DanielSmith 51-58 U06-1009 @@ -92,7 +92,7 @@ Named Entity Recognition for Astronomy Literature TaraMurphy TaraMcIntosh - James R.Curran + James R.Curran 59-66 U06-1010 murphy-etal-2006-named @@ -100,15 +100,15 @@ Die Morphologie (f): Targeted Lexical Acquisition for Languages other than <fixed-case>E</fixed-case>nglish JeremyNicholson - TimothyBaldwin - PhilBlunsom + TimothyBaldwin + PhilBlunsom 67-74 U06-1011 nicholson-etal-2006-die Automatic Mapping Clinical Notes to Medical Terminologies - JonPatrick + JonPatrick YefengWang PeterBudd 75-82 @@ -117,9 +117,9 @@ Pseudo Relevance Feedback Using Named Entities for Question Answering - Luiz AugustoPizzato - DiegoMollá - CécileParis + Luiz AugustoPizzato + DiegoMollá + CécileParis 83-90 U06-1013 pizzato-etal-2006-pseudo @@ -133,7 +133,7 @@ Web Readability and Computer-Assisted Language Learning - Alexandra L.Uitdenbogerd + Alexandra L.Uitdenbogerd 99-106 U06-1015 uitdenbogerd-2006-web @@ -167,7 +167,7 @@ StephenWan MarkDras RobertDale - CécileParis + CécileParis 131-138 U06-1019 wan-etal-2006-using @@ -175,7 +175,7 @@ Verb Sense Disambiguation Using Selectional Preferences Extracted with a State-of-the-art Semantic Role Labeler PatrickYe - TimothyBaldwin + TimothyBaldwin 139-148 U06-1020 ye-baldwin-2006-verb @@ -191,7 +191,7 @@ Analysis and Prediction of User Behaviour in a Museum Environment KarlGrieser - TimothyBaldwin + TimothyBaldwin StevenBird 157-158 U06-1022 @@ -206,7 +206,7 @@ Probabilities improve stress-prediction in a <fixed-case>CFG</fixed-case> of <fixed-case>H</fixed-case>awaiian phonology - ‘ŌiwiParker Jones + ‘ŌiwiParker Jones 161-162 U06-1024 parker-jones-2006-probabilities @@ -215,7 +215,7 @@ Towards Cognitive Optimisation of a Search Engine Interface KennethTreharne DariusPfitzner - David M WPowers + David M WPowers 163-164 U06-1025 treharne-etal-2006-towards @@ -233,7 +233,7 @@ Extracting Patient Clinical Profiles from Case Reports YitaoZhang - JonPatrick + JonPatrick 167-168 U06-1027 zhang-patrick-2006-extracting diff --git a/data/xml/U07.xml b/data/xml/U07.xml index 010ff7d4ce..f1361cff5b 100644 --- a/data/xml/U07.xml +++ b/data/xml/U07.xml @@ -47,14 +47,14 @@ Measuring Correlation Between Linguist’s Judgments and <fixed-case>L</fixed-case>atent <fixed-case>D</fixed-case>irichlet <fixed-case>A</fixed-case>llocation Topics AriChanen - JonPatrick + JonPatrick 13–20 U07-1005 chanen-patrick-2007-measuring <fixed-case>TAT</fixed-case>: An Author Profiling Tool with Application to <fixed-case>A</fixed-case>rabic Emails - DominiqueEstival + DominiqueEstival TanjaGaustad Son BaoPham WillRadford @@ -75,7 +75,7 @@ Practical Queries of a Massive n-gram Database TobiasHawker MaryGardiner - AndrewBennetts + AndrewBennetts 40–48 U07-1008 hawker-etal-2007-practical @@ -84,15 +84,15 @@ Extending Sense Collocations in Interpreting Noun Compounds Su NamKim MeladelMistica - TimothyBaldwin + TimothyBaldwin 49–56 U07-1009 kim-etal-2007-extending Named Entity Recognition in Question Answering of Speech Data - DiegoMollá - Mennovan Zaanen + DiegoMollá + Mennovan Zaanen SteveCassidy 57–65 U07-1010 @@ -101,7 +101,7 @@ Experiments in Mutual Exclusion Bootstrapping TaraMurphy - JamesCurran + JamesCurran 66–74 U07-1011 murphy-curran-2007-experiments @@ -125,8 +125,8 @@ Question Prediction Language Model - Luiz AugustoPizzato - DiegoMollá + Luiz AugustoPizzato + DiegoMollá 92–99 U07-1014 pizzato-molla-2007-question @@ -144,7 +144,7 @@ Parsing Internal Noun Phrase Structure with Collins’ Models DavidVadas - James R.Curran + James R.Curran 109–116 U07-1016 vadas-curran-2007-parsing @@ -160,7 +160,7 @@ Dictionary Alignment for Context-sensitive Word Glossing WillyYap - TimothyBaldwin + TimothyBaldwin 125–133 U07-1018 yap-baldwin-2007-dictionary @@ -177,7 +177,7 @@ Exploring Extensions to Machine-learning based Gene Normalisation BenjaminGoudey NicolaStokes - DavidMartinez + DavidMartinez 143–145 U07-1020 goudey-etal-2007-exploring @@ -185,7 +185,7 @@ Distributional Similarity of Multi-Word Expressions LauraIngram - JamesCurran + JamesCurran 146–148 U07-1021 ingram-curran-2007-distributional @@ -193,7 +193,7 @@ Extending <fixed-case>CCG</fixed-case>bank with Quotes and Multi-modal <fixed-case>CCG</fixed-case> DanielTse - JamesCurran + JamesCurran 149–151 U07-1022 tse-curran-2007-extending diff --git a/data/xml/U08.xml b/data/xml/U08.xml index f2870628b7..6299d337f9 100644 --- a/data/xml/U08.xml +++ b/data/xml/U08.xml @@ -25,14 +25,14 @@ Answer Attenuation in Question Answering KatieBell - James R.Curran + James R.Curran 2–10 U08-1002 bell-curran-2008-answer Using Multiple Sources of Agreement Information for Sentiment Classification of Political Transcripts - ClintBurfoot + ClintBurfoot 11–18 U08-1003 burfoot-2008-using @@ -40,7 +40,7 @@ All-Topology, Semi-Abstract Syntactic Features for Text Categorization AriChanen - JonPatrick + JonPatrick 19–27 U08-1004 chanen-patrick-2008-topology @@ -57,7 +57,7 @@ Automatic Acquisition of Training Data for Statistical Parsers SusanHowlett - James R.Curran + James R.Curran 37–45 U08-1006 howlett-curran-2008-automatic @@ -73,7 +73,7 @@ Classification of Verb Particle Constructions with the <fixed-case>G</fixed-case>oogle <fixed-case>W</fixed-case>eb1<fixed-case>T</fixed-case> Corpus Jonathan K.Kummerfeld - James R.Curran + James R.Curran 55–63 U08-1008 kummerfeld-curran-2008-classification @@ -82,7 +82,7 @@ Requests and Commitments in Email are More Complex Than You Think: Eight Reasons to be Cautious AndrewLampert RobertDale - CécileParis + CécileParis 64–72 U08-1009 lampert-etal-2008-requests @@ -99,7 +99,7 @@ Automatic Event Reference Identification OliviaMarch - TimothyBaldwin + TimothyBaldwin 79–87 U08-1011 march-baldwin-2008-automatic @@ -107,7 +107,7 @@ Comparing the Value of Latent Semantic Analysis on two <fixed-case>E</fixed-case>nglish-to-<fixed-case>I</fixed-case>ndonesian lexical mapping tasks ElizaMargaretha - RuliManurung + RuliManurung 88–96 U08-1012 margaretha-manurung-2008-comparing @@ -115,7 +115,7 @@ Weighted Mutual Exclusion Bootstrapping for Domain Independent Lexicon and Template Acquisition TaraMcIntosh - James R.Curran + James R.Curran 97–105 U08-1013 mcintosh-curran-2008-weighted @@ -125,7 +125,7 @@ DominickNg David J.Kedziora Terry T. W.Miu - James R.Curran + James R.Curran 106–114 U08-1014 ng-etal-2008-investigating @@ -133,7 +133,7 @@ Learning Count Classifier Preferences of <fixed-case>M</fixed-case>alay Nouns JeremyNicholson - TimothyBaldwin + TimothyBaldwin 115–123 U08-1015 nicholson-baldwin-2008-learning @@ -141,7 +141,7 @@ Transforming <fixed-case>W</fixed-case>ikipedia into Named Entity Training Data JoelNothman - James R.Curran + James R.Curran TaraMurphy 124–132 U08-1016 @@ -149,10 +149,10 @@ Fit it in but say it well! - CécileParis + CécileParis NathalieColineau AndrewLampert - Joan GiraltDuran + Joan GiraltDuran 133–141 U08-1017 paris-etal-2008-fit @@ -161,7 +161,7 @@ A Two-Level Morphological Analyser for the <fixed-case>I</fixed-case>ndonesian Language FemphyPisceldo RahmadMahendra - RuliManurung + RuliManurung I WayanArka 142–150 U08-1018 @@ -170,7 +170,7 @@ Punctuation Normalisation for Cleaner Treebanks and Parsers DanielTse - JamesCurran + JamesCurran 151–159 U08-1019 tse-curran-2008-punctuation diff --git a/data/xml/U09.xml b/data/xml/U09.xml index 36e19553a8..55e9b3bd26 100644 --- a/data/xml/U09.xml +++ b/data/xml/U09.xml @@ -4,7 +4,7 @@ Proceedings of the Australasian Language Technology Association Workshop 2009 U09-1 - Luiz AugustoPizzato + Luiz AugustoPizzato RolfSchwitter
Sydney, Australia
December @@ -18,7 +18,7 @@ <fixed-case>HCSN</fixed-case>et Plenary Talk: Spoken Dialogue Models for Virtual Humans - DavidTraum + DavidTraum 1–1 U09-1001 traum-2009-hcsnet @@ -34,7 +34,7 @@ Tracking Information Flow in Financial Text WillRadford BenHachey - James R.Curran + James R.Curran MariaMilosavljevic 11–19 U09-1003 @@ -45,7 +45,7 @@ NickyRingland JoelNothman TaraMurphy - James R.Curran + James R.Curran 20–28 U09-1004 ringland-etal-2009-classifying @@ -60,7 +60,7 @@ Corpus-based Extraction of <fixed-case>J</fixed-case>apanese Compound Verbs JamesBreen - TimothyBaldwin + TimothyBaldwin 35–43 U09-1006 breen-baldwin-2009-corpus @@ -69,7 +69,7 @@ Double Double, Morphology and Trouble: Looking into Reduplication in <fixed-case>I</fixed-case>ndonesian MeladelMistica I WayanArka - TimothyBaldwin + TimothyBaldwin AveryAndrews 44–52 U09-1007 @@ -87,7 +87,7 @@ Faster parsing and supertagging model estimation Jonathan K.Kummerfeld JessikaRoesner - JamesCurran + JamesCurran 62–70 U09-1009 kummerfeld-etal-2009-faster @@ -95,7 +95,7 @@ <fixed-case>CCG</fixed-case> parsing with one syntactic structure per n-gram TimDawborn - James R.Curran + James R.Curran 71–79 U09-1010 dawborn-curran-2009-ccg @@ -112,7 +112,7 @@ A Sentiment Detection Engine for <fixed-case>I</fixed-case>nternet Stock Message Boards ChristopherChua MariaMilosavljevic - James R.Curran + James R.Curran 89–93 U09-1012 chua-etal-2009-sentiment @@ -120,7 +120,7 @@ Extracting Domain-Specific Words - A Statistical Approach Su NamKim - TimothyBaldwin + TimothyBaldwin Min-YenKan 94–98 U09-1013 @@ -128,7 +128,7 @@ A Cascade Approach to Extracting Medication Events - JonPatrick + JonPatrick MinLi 99–103 U09-1014 @@ -137,7 +137,7 @@ Improved Text Categorisation for <fixed-case>W</fixed-case>ikipedia Named Entities SamTardif - James R.Curran + James R.Curran TaraMurphy 104–108 U09-1015 @@ -146,7 +146,7 @@ Towards a flexible platform for voice accent and expression selection on a Healthcare Robot AleksandarIgic - CatherineWatson + CatherineWatson JonathanTeutenberg ElizabethBroadbent RieTamagawa @@ -158,7 +158,7 @@ Integrating Verb-Particle Constructions into <fixed-case>CCG</fixed-case> Parsing JamesConstable - JamesCurran + JamesCurran 114–118 U09-1017 constable-curran-2009-integrating diff --git a/data/xml/U10.xml b/data/xml/U10.xml index ab73240546..2d4b961c16 100644 --- a/data/xml/U10.xml +++ b/data/xml/U10.xml @@ -18,7 +18,7 @@ Opinion Mining, Subjectivity and Factuality - RodolfoDelmonte + RodolfoDelmonte 2–2 U10-1001 delmonte-2010-opinion @@ -32,7 +32,7 @@ Multilingual Language Identification: <fixed-case>ALTW</fixed-case> 2010 Shared Task Data - TimothyBaldwin + TimothyBaldwin MarcoLui 4–7 U10-1003 @@ -57,7 +57,7 @@ Thread-level Analysis over Technical User Forum Data LiWang Su NamKim - TimothyBaldwin + TimothyBaldwin 27–31 U10-1006 wang-etal-2010-thread @@ -73,8 +73,8 @@ Information Extraction of Multiple Categories from Pathology Reports - YueLi - DavidMartinez + YueLi + DavidMartinez 41–48 U10-1008 li-martinez-2010-information @@ -82,14 +82,14 @@ Classifying User Forum Participants: Separating the Gurus from the Hacks, and Other Tales of the <fixed-case>I</fixed-case>nternet MarcoLui - TimothyBaldwin + TimothyBaldwin 49–57 U10-1009 lui-baldwin-2010-classifying Fun with Filtering <fixed-case>F</fixed-case>rench - Alexandra L.Uitdenbogerd + Alexandra L.Uitdenbogerd 58–66 U10-1010 uitdenbogerd-2010-fun @@ -104,7 +104,7 @@ A Corpus for Evidence Based Medicine Summarisation - DiegoMolla + DiegoMolla 76–80 U10-1012 molla-2010-corpus @@ -121,7 +121,7 @@ Reranking a wide-coverage ccg parser DominickNg MatthewHonnibal - James R.Curran + James R.Curran 90–98 U10-1014 ng-etal-2010-reranking diff --git a/data/xml/U11.xml b/data/xml/U11.xml index 7b932c3761..e94ada9a9c 100644 --- a/data/xml/U11.xml +++ b/data/xml/U11.xml @@ -4,8 +4,8 @@ Proceedings of the Australasian Language Technology Association Workshop 2011 U11-1 - DiegoMolla - DavidMartinez + DiegoMolla + DavidMartinez
Canberra, Australia
December 2011 @@ -24,7 +24,7 @@
<fixed-case>O</fixed-case>z<fixed-case>CLO</fixed-case>: The <fixed-case>A</fixed-case>ustralian Computational Linguistic Olympiad - DominiqueEstival + DominiqueEstival 3–3 U11-1002 estival-2011-ozclo @@ -39,7 +39,7 @@ A Particle Filter algorithm for <fixed-case>B</fixed-case>ayesian Wordsegmentation - BenjaminBörschinger + BenjaminBörschinger MarkJohnson 10–18 U11-1004 @@ -47,9 +47,9 @@ Formalizing Semantic Parsing with Tree Transducers - BevanJones + BevanJones MarkJohnson - SharonGoldwater + SharonGoldwater 19–28 U11-1005 jones-etal-2011-formalizing @@ -89,7 +89,7 @@ Frontier Pruning for Shift-Reduce <fixed-case>CCG</fixed-case> Parsing StephenMerity - JamesCurran + JamesCurran 66–75 U11-1010 merity-curran-2011-frontier @@ -97,8 +97,8 @@ Predicting Thread Linking Structure by Lexical Chaining LiWang - DianaMcCarthy - TimothyBaldwin + DianaMcCarthy + TimothyBaldwin 76–85 U11-1011 wang-etal-2011-predicting-thread @@ -115,7 +115,7 @@ Collocations in Multilingual Natural Language Generation: Lexical Functions meet <fixed-case>L</fixed-case>exical <fixed-case>F</fixed-case>unctional <fixed-case>G</fixed-case>rammar FrançoisLareau MarkDras - BenjaminBörschinger + BenjaminBörschinger RobertDale 95–104 U11-1013 @@ -125,7 +125,7 @@ Outcome Polarity Identification of Medical Papers AbeedSarker DiegoMolla - CécileParis + CécileParis 105–114 U11-1014 sarker-etal-2011-outcome diff --git a/data/xml/U12.xml b/data/xml/U12.xml index e8285f8c53..7b0135678d 100644 --- a/data/xml/U12.xml +++ b/data/xml/U12.xml @@ -17,7 +17,7 @@ Using a large annotated historical corpus to study word-specific effects in sound change - JenniferHay + JenniferHay 2–2 U12-1001 hay-2012-using @@ -54,7 +54,7 @@ TeresaLynn JenniferFoster MarkDras - ElaineUí Dhonnchadha + ElaineUí Dhonnchadha 23–32 U12-1005 lynn-etal-2012-active @@ -62,8 +62,8 @@ Unsupervised Estimation of Word Usage Similarity MarcoLui - TimothyBaldwin - DianaMcCarthy + TimothyBaldwin + DianaMcCarthy 33–41 U12-1006 lui-etal-2012-unsupervised @@ -87,7 +87,7 @@ Segmentation and Translation of <fixed-case>J</fixed-case>apanese Multi-word Loanwords JamesBreen - TimothyBaldwin + TimothyBaldwin FrancisBond 61–69 U12-1009 @@ -98,7 +98,7 @@ Measurement of Progress in Machine Translation YvetteGraham - TimothyBaldwin + TimothyBaldwin AaronHarwood AlistairMoffat JustinZobel @@ -109,8 +109,8 @@ Towards Two-step Multi-document Summarisation for Evidence Based Medicine: A Quantitative Analysis AbeedSarker - DiegoMollá-Aliod - CécileParis + DiegoMollá-Aliod + CécileParis 79–87 U12-1011 sarker-etal-2012-towards @@ -119,7 +119,7 @@ In Your Eyes: Identifying Clichés in Song Lyrics Alex G.Smith Christopher X. S.Zee - Alexandra L.Uitdenbogerd + Alexandra L.Uitdenbogerd 88–96 U12-1012 smith-etal-2012-eyes @@ -144,7 +144,7 @@ <fixed-case>L</fixed-case>a<fixed-case>BB</fixed-case>-<fixed-case>CAT</fixed-case>: an Annotation Store RobertFromont - JenniferHay + JenniferHay 113–117 U12-1015 fromont-hay-2012-labb @@ -152,9 +152,9 @@ Classification of Study Region in Environmental Science Abstracts JaredWillett - TimothyBaldwin - DavidMartinez - AngusWebb + TimothyBaldwin + DavidMartinez + AngusWebb 118–122 U12-1016 willett-etal-2012-classification @@ -162,8 +162,8 @@ Overview of the <fixed-case>ALTA</fixed-case> 2012 Shared Task ImanAmini - DavidMartinez - DiegoMolla + DavidMartinez + DiegoMolla 124–129 U12-1017 amini-etal-2012-overview @@ -171,7 +171,7 @@ Automatic sentence classifier using sentence ordering features for Event Based Medicine: Shared task system description SpandanaGella - Duong ThanhLong + LongDuong Thanh 130–133 U12-1018 gella-duong-thanh-2012-automatic @@ -185,7 +185,7 @@ Experiments with Clustering-based Features for Sentence Classification in Medical Publications: <fixed-case>M</fixed-case>acquarie Test’s participation in the <fixed-case>ALTA</fixed-case> 2012 shared task. - DiegoMollá + DiegoMollá 139–142 U12-1020 molla-2012-experiments diff --git a/data/xml/U13.xml b/data/xml/U13.xml index f35125e49a..cb4995ac5a 100644 --- a/data/xml/U13.xml +++ b/data/xml/U13.xml @@ -5,7 +5,7 @@ Proceedings of the Australasian Language Technology Association Workshop 2013 (ALTA 2013) U13-1 SarvnazKarimi - KarinVerspoor + KarinVerspoor
Brisbane, Australia
December 2013 @@ -17,14 +17,14 @@ Robust Computational Semantics - MarkSteedman + MarkSteedman 2–2 U13-1001 steedman-2013-robust Concurrent Discourse Relations - BonnieWebber + BonnieWebber 3–3 U13-1002 webber-2013-concurrent @@ -40,7 +40,7 @@ Crowd-Sourcing of Human Judgments of Machine Translation Fluency YvetteGraham - TimothyBaldwin + TimothyBaldwin AlistairMoffat JustinZobel 16–24 @@ -64,9 +64,9 @@ Examining the Impact of Coreference Resolution on Quote Attribution - TimO’Keefe + TimO’Keefe KellieWebster - James R.Curran + James R.Curran IrenaKoprinska 43–52 U13-1007 @@ -76,8 +76,8 @@ Multi-Objective Optimization for Clustering of Medical Publications AsifEkbal SriparnaSaha - DiegoMollá - KRavikumar + DiegoMollá + KRavikumar 53–61 U13-1008 ekbal-etal-2013-multi @@ -85,7 +85,7 @@ A Study: From Electronic Laboratory Notebooks to Generated Queries for Literature Recommendation OldoozDianat - CécileParis + CécileParis StephenWan 62–70 U13-1009 @@ -111,7 +111,7 @@ Impact of Corpus Diversity and Complexity on <fixed-case>NER</fixed-case> Performance TatyanaShmanina IngridZukerman - AntonioJimeno Yepes + AntonioJimeno Yepes LawrenceCavedon KarinVerspoor 91–95 @@ -144,7 +144,7 @@ Rhythm, Metrics, and the Link to Phonology - JasonBrown + JasonBrown SamMandal 112–117 U13-1016 @@ -160,16 +160,16 @@ Automatic Climate Classification of Environmental Science Literature JaredWillett - DavidMartinez - J. AngusWebb - TimothyBaldwin + DavidMartinez + J. AngusWebb + TimothyBaldwin 123–130 U13-1018 willett-etal-2013-automatic Overview of the 2013 <fixed-case>ALTA</fixed-case> Shared Task - DiegoMolla + DiegoMolla 132–136 U13-1019 molla-2013-overview diff --git a/data/xml/U14.xml b/data/xml/U14.xml index d1b57a8d80..ada517ebbb 100644 --- a/data/xml/U14.xml +++ b/data/xml/U14.xml @@ -17,15 +17,15 @@ Deep <fixed-case>QA</fixed-case>: Moving beyond the hype to examine the challenges in creating a cognitive assistant for humans - JenniferLai + JenniferLai 2–2 U14-1001 lai-2014-deep The Effect of Dependency Representation Scheme on Syntactic Language Modelling - SunghwanKim - JohnPate + SunghwanKim + JohnPate MarkJohnson 4–13 U14-1002 @@ -42,7 +42,7 @@ Automated Generation of Test Suites for Error Analysis of Concept Recognition Systems TudorGroza - KarinVerspoor + KarinVerspoor 23–31 U14-1004 groza-verspoor-2014-automated @@ -51,7 +51,7 @@ Trading accuracy for faster named entity linking KristyHughes JoelNothman - James R.Curran + James R.Curran 32–40 U14-1005 hughes-etal-2014-trading @@ -60,7 +60,7 @@ Unsupervised Biographical Event Extraction Using <fixed-case>W</fixed-case>ikipedia Traffic AlexanderHogue JoelNothman - James R.Curran + James R.Curran 41–49 U14-1006 hogue-etal-2014-unsupervised @@ -94,8 +94,8 @@ Impact of Citing Papers for Summarisation of Clinical Documents - DiegoMollá - ChristopherJones + DiegoMollá + ChristopherJones AbeedSarker 79–87 U14-1010 @@ -123,7 +123,7 @@ Alveo, a Human Communication Science Virtual Laboratory - DominiqueEstival + DominiqueEstival SteveCassidy 104–107 U14-1013 @@ -140,7 +140,7 @@ Exploring Temporal Patterns in Emergency Department Triage Notes with Topic Models SimonKocbek - KarinVerspoor + KarinVerspoor WrayBuntine 113–117 U14-1015 @@ -157,8 +157,8 @@ Deep Belief Networks and Biomedical Text Categorisation - Antonio JimenoYepes - AndrewMacKinlay + Antonio JimenoYepes + AndrewMacKinlay JustinBedo RahilGarvani QiangChen @@ -178,7 +178,7 @@ Analysis of Coreference Relations in the Biomedical Literature MijiChoi - KarinVerspoor + KarinVerspoor JustinZobel 134–138 U14-1019 @@ -186,7 +186,7 @@ <fixed-case>F</fixed-case>innish Native Language Identification - ShervinMalmasi + ShervinMalmasi MarkDras 139–144 U14-1020 @@ -194,14 +194,14 @@ A Data-driven Approach to Studying Given Names and their Gender and Ethnicity Associations - ShervinMalmasi + ShervinMalmasi 145–149 U14-1021 malmasi-2014-data Overview of the 2014 <fixed-case>ALTA</fixed-case> Shared Task: Identifying Expressions of Locations in Tweets - DiegoMolla + DiegoMolla SarvnazKarimi 151–156 U14-1022 @@ -210,8 +210,8 @@ Identifying <fixed-case>T</fixed-case>witter Location Mentions BoHan - Antonio JimenoYepes - AndrewMacKinlay + Antonio JimenoYepes + AndrewMacKinlay QiangChen 157–162 U14-1023 @@ -234,7 +234,7 @@ BaharSalehi MijiChoi PingTan - LongDuong + LongDuong 171–176 U14-1025 liu-etal-2014-automatic diff --git a/data/xml/U15.xml b/data/xml/U15.xml index 8fe00eb946..a0bd4e5949 100644 --- a/data/xml/U15.xml +++ b/data/xml/U15.xml @@ -19,7 +19,7 @@ Query-Based Single Document Summarization Using an Ensemble Noisy Auto-Encoder Mahmood YousefiAzar KairitSirts - DiegoMollá Aliod + DiegoMollá Aliod LenHamey 2–10 U15-1001 @@ -64,7 +64,7 @@ Similarity Metrics for Clustering <fixed-case>P</fixed-case>ub<fixed-case>M</fixed-case>ed Abstracts for Evidence Based Medicine HamedHassanzadeh - DiegoMollá + DiegoMollá TudorGroza AnthonyNguyen JaneHunter @@ -74,7 +74,7 @@ Finding Names in Trove: Named Entity Recognition for <fixed-case>A</fixed-case>ustralian Historical Newspapers - Sunghwan MacKim + Sunghwan MacKim SteveCassidy 57–65 U15-1007 @@ -82,7 +82,7 @@ Clinical Information Extraction Using Word Representations - ShervinMalmasi + ShervinMalmasi HamedHassanzadeh MarkDras 66–74 @@ -101,8 +101,8 @@ Domain Adaption of Named Entity Recognition to Support Credit Risk Assessment Julio CesarSalinas Alvarado - KarinVerspoor - TimothyBaldwin + KarinVerspoor + TimothyBaldwin 84–90 U15-1010 salinas-alvarado-etal-2015-domain @@ -118,8 +118,8 @@ Structural Alignment as the Basis to Improve Significant Change Detection in Versioned Sentences Ping PingTan - KarinVerspoor - TimMiller + KarinVerspoor + TimMiller 101–109 U15-1012 tan-etal-2015-structural @@ -147,7 +147,7 @@ AtifAhmad ChristophBreidbach DavidMalet - TimothyBaldwin + TimothyBaldwin 122–127 U15-1015 nothman-etal-2015-understanding @@ -163,7 +163,7 @@ Overview of the 2015 <fixed-case>ALTA</fixed-case> Shared Task: Identifying <fixed-case>F</fixed-case>rench Cognates in <fixed-case>E</fixed-case>nglish Text LaurianneSitbon - DiegoMolla + DiegoMolla HaoxingWang 134–137 U15-1017 @@ -171,7 +171,7 @@ Cognate Identification using Machine Translation - ShervinMalmasi + ShervinMalmasi MarkDras 138–141 U15-1018 @@ -179,7 +179,7 @@ Word Transformation Heuristics Agains Lexicons for Cognate Detection - AlexandraUitdenbogerd + AlexandraUitdenbogerd 142–144 U15-1019 uitdenbogerd-2015-word diff --git a/data/xml/U16.xml b/data/xml/U16.xml index 38ce38d04e..722fc7697c 100644 --- a/data/xml/U16.xml +++ b/data/xml/U16.xml @@ -4,7 +4,7 @@ Proceedings of the Australasian Language Technology Association Workshop 2016 U16-1 - TrevorCohn + TrevorCohn
Melbourne, Australia
December 2016 @@ -17,7 +17,7 @@ Improving Neural Translation Models with Linguistic Factors Cong Duy VuHoang - GholamrezaHaffari + GholamrezaHaffari TrevorCohn 7–14 U16-1001 @@ -46,8 +46,8 @@ Syndromic Surveillance using Generic Medical Entities on <fixed-case>T</fixed-case>witter PinHuang - AndrewMacKinlay - Antonio JimenoYepes + AndrewMacKinlay + Antonio JimenoYepes 35–44 U16-1004 huang-etal-2016-syndromic @@ -56,7 +56,7 @@ Syndromic Surveillance through Measuring Lexical Shift in Emergency Department Chief Complaint Texts HafsahAamer BahadorrezaOfoghi - KarinVerspoor + KarinVerspoor 45–53 U16-1005 aamer-etal-2016-syndromic @@ -73,8 +73,8 @@ <fixed-case>ASM</fixed-case> Kernel: Graph Kernel using Approximate Subgraph Matching for Relation Extraction - Nagesh C.Panyam - KarinVerspoor + Nagesh C.Panyam + KarinVerspoor TrevorCohn RaoKotagiri 65–73 @@ -103,7 +103,7 @@ The Role of Features and Context on Suicide Ideation Detection YufeiWang StephenWan - CécileParis + CécileParis 94–102 U16-1010 wang-etal-2016-role @@ -111,7 +111,7 @@ Featureless Domain-Specific Term Extraction with Minimal Labelled Data RuiWang - WeiLiu + WeiLiu ChrisMcDonald 103–112 U16-1011 @@ -128,8 +128,8 @@ How Challenging is Sarcasm versus Irony Classification?: A Study With a Dataset from <fixed-case>E</fixed-case>nglish Literature AdityaJoshi VaibhavTripathi - PushpakBhattacharyya - MarkCarman + PushpakBhattacharyya + MarkCarman MeghnaSingh JayaSaraswati RajitaShukla @@ -140,7 +140,7 @@ Learning cascaded latent variable models for biomedical text classification MingLiu - GholamrezaHaffari + GholamrezaHaffari WrayBuntine 128–132 U16-1014 @@ -149,8 +149,8 @@ Temporal Modelling of Geospatial Words in <fixed-case>T</fixed-case>witter BoHan - Antonio JimenoYepes - AndrewMacKinlay + Antonio JimenoYepes + AndrewMacKinlay LianhuaChi 133–137 U16-1015 @@ -158,8 +158,8 @@ <fixed-case>NER</fixed-case> for Medical Entities in <fixed-case>T</fixed-case>witter using Sequence to Sequence Neural Networks - Antonio JimenoYepes - AndrewMacKinlay + Antonio JimenoYepes + AndrewMacKinlay 138–142 U16-1016 yepes-mackinlay-2016-ner @@ -196,7 +196,7 @@ Overview of the 2016 <fixed-case>ALTA</fixed-case> Shared Task: Cross-<fixed-case>KB</fixed-case> Coreference AndrewChisholm BenHachey - DiegoMollá + DiegoMollá 161–164 U16-1020 chisholm-etal-2016-overview @@ -212,7 +212,7 @@ Filter and Match Approach to Pair-wise Web <fixed-case>URI</fixed-case> Linking - S.Shivashankar + S.Shivashankar YitongLi AfshinRahimi 170–174 diff --git a/data/xml/U17.xml b/data/xml/U17.xml index c07f01eec1..2ac05bb512 100644 --- a/data/xml/U17.xml +++ b/data/xml/U17.xml @@ -5,7 +5,7 @@ Proceedings of the Australasian Language Technology Association Workshop 2017 U17-1 Jojo Sze-MengWong - GholamrezaHaffari + GholamrezaHaffari
Brisbane, Australia
December 2017 @@ -26,17 +26,17 @@ Improving End-to-End Memory Networks with Unified Weight Tying FeiLiu - TrevorCohn - TimothyBaldwin + TrevorCohn + TimothyBaldwin 16–24 U17-1002 liu-etal-2017-improving Joint Sentence-Document Model for Manifesto Text Analysis - ShivashankarSubramanian - TrevorCohn - TimothyBaldwin + ShivashankarSubramanian + TrevorCohn + TimothyBaldwin JulianBrooke 25–33 U17-1003 @@ -56,7 +56,7 @@ A Hybrid Model for Quality Assessment of <fixed-case>W</fixed-case>ikipedia Articles AiliShen JianzhongQi - TimothyBaldwin + TimothyBaldwin 43–52 U17-1005 shen-etal-2017-hybrid @@ -64,7 +64,7 @@ Phonemic Transcription of Low-Resource Tonal Languages OliverAdams - TrevorCohn + TrevorCohn GrahamNeubig AlexisMichaud 53–60 @@ -81,17 +81,17 @@ Automatic Negation and Speculation Detection in Veterinary Clinical Text KatherineCheng - TimothyBaldwin - KarinVerspoor + TimothyBaldwin + KarinVerspoor 70–78 U17-1008 cheng-etal-2017-automatic Medication and Adverse Event Extraction from Noisy Text - XiangDai + XiangDai SarvnazKarimi - CecileParis + CecileParis 79–87 U17-1009 dai-etal-2017-medication @@ -106,7 +106,7 @@ On Extending Neural Networks with Loss Ensembles for Text Classification HamidehHajiabadi - DiegoMolla-Aliod + DiegoMolla-Aliod RezaMonsefi 98–102 U17-1011 @@ -114,7 +114,7 @@ Towards the Use of Deep Reinforcement Learning with Global Policy for Query-based Extractive Summarisation - DiegoMollá-Aliod + DiegoMollá-Aliod 103–107 U17-1012 molla-aliod-2017-towards @@ -132,7 +132,7 @@ Overview of the 2017 <fixed-case>ALTA</fixed-case> Shared Task: Correcting <fixed-case>OCR</fixed-case> Errors - DiegoMollá-Aliod + DiegoMollá-Aliod SteveCassidy 115–118 U17-1014 diff --git a/data/xml/U18.xml b/data/xml/U18.xml index 5fb7aca9be..9d09c78348 100644 --- a/data/xml/U18.xml +++ b/data/xml/U18.xml @@ -4,8 +4,8 @@ Proceedings of the Australasian Language Technology Association Workshop 2018 U18-1 - Sunghwan MacKim - Xiuzhen (Jenny)Zhang + Sunghwan MacKim + Xiuzhen (Jenny)Zhang
Dunedin, New Zealand
December 2018 @@ -18,8 +18,8 @@ Improved Neural Machine Translation using Side Information Cong Duy VuHoang - GholamrezaHaffari - TrevorCohn + GholamrezaHaffari + TrevorCohn 6–16 U18-1001 In this work, we investigate whether side information is helpful in neural machine translation (NMT). We study various kinds of side information, including topical information, personal trait, then propose different ways of incorporating them into the existing NMT models. Our experimental results show the benefits of side information in improving the NMT models. @@ -75,11 +75,11 @@ Exploring Textual and Speech information in Dialogue Act Classification with Speaker Domain Adaptation XuanliHe - QuanTran + QuanTran WilliamHavard - LaurentBesacier + LaurentBesacier IngridZukerman - GholamrezaHaffari + GholamrezaHaffari 61–65 U18-1007 In spite of the recent success of Dialogue Act (DA) classification, the majority of prior works focus on text-based classification with oracle transcriptions, i.e. human transcriptions, instead of Automatic Speech Recognition (ASR)’s transcriptions. In spoken dialog systems, however, the agent would only have access to noisy ASR transcriptions, which may further suffer performance degradation due to domain shift. In this paper, we explore the effectiveness of using both acoustic and textual signals, either oracle or ASR transcriptions, and investigate speaker domain adaptation for DA classification. Our multimodal model proves to be superior to the unimodal models, particularly when the oracle transcriptions are not available. We also propose an effective method for speaker domain adaptation, which achieves competitive results. @@ -98,7 +98,7 @@ A Comparative Study of Embedding Models in Predicting the Compositionality of Multiword Expressions NavnitaNandakumar BaharSalehi - TimothyBaldwin + TimothyBaldwin 71–76 U18-1009 In this paper, we perform a comparative evaluation of off-the-shelf embedding models over the task of compositionality prediction of multiword expressions("MWEs"). Our experimental results suggest that character- and document-level models capture knowledge of MWE compositionality and are effective in modelling varying levels of compositionality, with the advantage over word-level models that they do not require token-level identification of MWEs in the training corpus. @@ -107,8 +107,8 @@ Towards Efficient Machine Translation Evaluation by Modelling Annotators NitikaMathur - TimothyBaldwin - TrevorCohn + TimothyBaldwin + TrevorCohn 77–82 U18-1010 Accurate evaluation of translation has long been a difficult, yet important problem. Current evaluations use direct assessment (DA), based on crowd sourcing judgements from a large pool of workers, along with quality control checks, and a robust method for combining redundant judgements. In this paper we show that the quality control mechanism is overly conservative, which increases the time and expense of the evaluation. We propose a model that does not rely on a pre-processing step to filter workers and takes into account varying annotator reliabilities. Our model effectively weights each worker's scores based on the inferred precision of the worker, and is much more reliable than the mean of either the raw scores or the standardised scores. We also show that DA does not deliver on the promise of longitudinal evaluation, and propose redesigning the structure of the annotation tasks that can solve this problem. @@ -116,7 +116,7 @@ Overview of the 2018 <fixed-case>ALTA</fixed-case> Shared Task: Classifying Patent Applications - DiegoMollá + DiegoMollá DileshaSeneviratne 84–88 U18-1011 @@ -126,7 +126,7 @@ Classifying Patent Applications with Ensemble Methods FernandoBenites - ShervinMalmasi + ShervinMalmasi MarcosZampieri 89–92 U18-1012 diff --git a/data/xml/U19.xml b/data/xml/U19.xml index 014fde1cce..0e1321592e 100644 --- a/data/xml/U19.xml +++ b/data/xml/U19.xml @@ -6,7 +6,7 @@ U19-1 MeladelMistica MassimoPiccardi - AndrewMacKinlay + AndrewMacKinlay Australasian Language Technology Association
Sydney, Australia
4--6 December @@ -30,7 +30,7 @@ From Shakespeare to <fixed-case>L</fixed-case>i-<fixed-case>B</fixed-case>ai: Adapting a Sonnet Model to <fixed-case>C</fixed-case>hinese Poetry ZhuohanXie Jey HanLau - TrevorCohn + TrevorCohn 10–18 In this paper, we adapt Deep-speare, a joint neural network model for English sonnets, to Chinese poetry. We illustrate characteristics of Chinese quatrain and explain our architecture as well as training and generation procedure, which differs from Shakespeare sonnets in several aspects. We analyse the generated poetry and find that model works well for Chinese poetry, as it can: (1) generate coherent 4-line quatrains of different topics; and (2) capture rhyme automatically (to a certain extent). U19-1002 @@ -39,7 +39,7 @@ Readability of <fixed-case>T</fixed-case>witter Tweets for Second Language Learners PatrickJacob - AlexandraUitdenbogerd + AlexandraUitdenbogerd 19–27 Optimal language acquisition via reading requires the learners to read slightly above their current language skill level. Identifying material at the right level is the essential role of automatic readability measurement. Short message platforms such as Twitter offer the opportunity for language practice while reading about current topics and engaging in conversation in small doses, and can be filtered according to linguistic criteria to suit the learner. In this research, we explore how readable tweets are for English language learners and which factors contribute to their readability. With participants from six language groups, we collected 14,659 data points, each representing a tweet from a pool of 4100 tweets, and a judgement of perceived readability. Traditional readability measures and features failed on the data-set, but demographic data showed that judgements were largely genuine and reflected reported language skill, which is consistent with other recent studies. We report on the properties of the data set and implications for future research. U19-1003 @@ -57,9 +57,9 @@ Modelling <fixed-case>T</fixed-case>ibetan Verbal Morphology QianjiDi EkaterinaVylomova - TimBaldwin + TimBaldwin 35–40 - + U19-1005 di-etal-2019-modelling @@ -77,9 +77,9 @@ AiliShen BaharSalehi JianzhongQi - TimothyBaldwin + TimothyBaldwin 47–51 - + U19-1007 shen-etal-2019-feature
@@ -87,7 +87,7 @@ Red-faced <fixed-case>ROUGE</fixed-case>: Examining the Suitability of <fixed-case>ROUGE</fixed-case> for Opinion Summary Evaluation WenyiTay AdityaJoshi - XiuzhenZhang + XiuzhenZhang SarvnazKarimi StephenWan 52–60 @@ -109,7 +109,7 @@ Improved Document Modelling with a Neural Discourse Parser FajriKoto Jey HanLau - TimothyBaldwin + TimothyBaldwin 67–76 Despite the success of attention-based neural models for natural language generation and classification tasks, they are unable to capture the discourse structure of larger documents. We hypothesize that explicit discourse representations have utility for NLP tasks over longer documents or document sequences, which sequence-to-sequence models are unable to capture. For abstractive summarization, for instance, conventional neural models simply match source documents and the summary in a latent space without explicit representation of text structure or relations. In this paper, we propose to use neural discourse representations obtained from a rhetorical structure theory (RST) parser to enhance document representations. Specifically, document representations are generated for discourse spans, known as the elementary discourse units (EDUs). We empirically investigate the benefit of the proposed approach on two different tasks: abstractive summarization and popularity prediction of online petitions. We find that the proposed approach leads to substantial improvements in all cases. U19-1010 @@ -119,7 +119,7 @@ Does an <fixed-case>LSTM</fixed-case> forget more than a <fixed-case>CNN</fixed-case>? An empirical study of catastrophic forgetting in <fixed-case>NLP</fixed-case> GauravArora AfshinRahimi - TimothyBaldwin + TimothyBaldwin 77–86 Catastrophic forgetting — whereby a model trained on one task is fine-tuned on a second, and in doing so, suffers a “catastrophic” drop in performance over the first task — is a hurdle in the development of better transfer learning techniques. Despite impressive progress in reducing catastrophic forgetting, we have limited understanding of how different architectures and hyper-parameters affect forgetting in a network. With this study, we aim to understand factors which cause forgetting during sequential training. Our primary finding is that CNNs forget less than LSTMs. We show that max-pooling is the underlying operation which helps CNNs alleviate forgetting compared to LSTMs. We also found that curriculum learning, placing a hard task towards the end of task sequence, reduces forgetting. We analysed the effect of fine-tuning contextual embeddings on catastrophic forgetting and found that using embeddings as feature extractor is preferable to fine-tuning in continual learning setup. U19-1011 @@ -138,10 +138,10 @@ A Pointer Network Architecture for Context-Dependent Semantic Parsing XuanliHe - QuanTran - GholamrezaHaffari + QuanTran + GholamrezaHaffari 94–99 - + U19-1013 he-etal-2019-pointer @@ -153,8 +153,8 @@ ChristianDruckenbrodt CamiloThorne Saber A.Akhondi - TimothyBaldwin - KarinVerspoor + TimothyBaldwin + KarinVerspoor 100–110 Extracting chemical reactions from patents is a crucial task for chemists working on chemical exploration. In this paper we introduce the novel task of detecting the textual spans that describe or refer to chemical reactions within patents. We formulate this task as a paragraph-level sequence tagging problem, where the system is required to return a sequence of paragraphs which contain a description of a reaction. To address this new task, we construct an annotated dataset from an existing proprietary database of chemical reactions manually extracted from patents. We introduce several baseline methods for the task and evaluate them over our dataset. Through error analysis, we discuss what makes the task complex and challenging, and suggest possible directions for future research. U19-1014 @@ -195,7 +195,7 @@ Measuring <fixed-case>E</fixed-case>nglish Readability for <fixed-case>V</fixed-case>ietnamese Speakers PhuocNguyen - AlexandraUitdenbogerd + AlexandraUitdenbogerd 136–145 Reading is important for any language learner, but the difficulty level of the text needs to match a reader’s level to enable efficient learning of new vocabulary. Many widely used traditional readability measures are not effective for those who speak English as a second or additional language. This study examines English readability for Vietnamese native speakers (VL1). A collection of text difficulty judgements of nearly 100 English text passages was obtained from 12 VL1 participants, using a 5-point Likert scale. Using the same basic features found in traditional English readability measures we found that SVMs and Dale-Chall features were slightly better than linear models using either Flesch or Dale-Chall. VL1 participants’ text judgements were strongly correlated with their past IELTS test scores. This study introduces a first approximation to readability of English text for VL1, with suggestions for further improvements. U19-1018 @@ -216,7 +216,7 @@ AdityaJoshi SarvnazKarimi RossSparks - CecileParis + CecileParis C RainaMacIntyre 151–158 Multi-Task Learning (MTL) has been an attractive approach to deal with limited labeled datasets or leverage related tasks, for a variety of NLP problems. We examine the benefit of MTL for three specific pairs of health informatics tasks that deal with: (a) overlapping symptoms for the same classification problem (personal health mention classification for influenza and for a set of symptoms); (b) overlapping medical concepts for related classification problems (vaccine usage and drug usage detection); and, (c) related classification problems (vaccination intent and vaccination relevance detection). We experiment with a simple neural architecture: a shared layer followed by task-specific dense layers. The novelty of this work is that it compares alternatives for shared layers for these pairs of tasks. While our observations agree with the promise of MTL as compared to single-task learning, for health informatics, we show that the benefit also comes with caveats in terms of the choice of shared layers and the relatedness between the participating tasks. @@ -226,10 +226,10 @@ Difficulty-aware Distractor Generation for Gap-Fill Items Chak YanYeung - JohnLee - BenjaminTsou + JohnLee + BenjaminTsou 159–164 - + U19-1021 yeung-etal-2019-difficulty @@ -247,9 +247,9 @@ Neural Versus Non-Neural Text Simplification: A Case Study IslamNassar MichelleAnanda-Rajah - GholamrezaHaffari + GholamrezaHaffari 172–177 - + U19-1023 nassar-etal-2019-neural @@ -276,7 +276,7 @@
Overview of the 2019 <fixed-case>ALTA</fixed-case> Shared Task: Sarcasm Target Identification - DiegoMolla + DiegoMolla AdityaJoshi 192–196 We present an overview of the 2019 ALTA shared task. This is the 10th of the series of shared tasks organised by ALTA since 2010. The task was to detect the target of sarcastic comments posted on social media. We intro- duce the task, describe the data and present the results of baselines and participants. This year’s shared task was particularly challenging and no participating systems improved the re- sults of our baseline. diff --git a/data/xml/W00.xml b/data/xml/W00.xml index 66d563f280..b88ca91098 100644 --- a/data/xml/W00.xml +++ b/data/xml/W00.xml @@ -37,7 +37,7 @@ Dependency of context-based Word Sense Disambiguation from representation and domain complexity - PaolaVelardi + PaolaVelardi RomaVelardi W00-0105 velardi-velardi-2000-dependency @@ -51,7 +51,7 @@ A Measure of Semantic Complexity for Natural Language Systems ShannonPollard - Alan W.Biermann + Alan W.Biermann W00-0107 pollard-biermann-2000-measure @@ -63,7 +63,7 @@ Partially Saturated Referents as a Source of Complexity in Semantic Interpretation - David D.McDonald + David D.McDonald W00-0109 mcdonald-2000-partially @@ -88,23 +88,23 @@ An Interlingual-based Approach to Reference Resolution - DavidFarwell + DavidFarwell W00-0201 farwell-2000-interlingual Representations of Actions as an Interlingua - Karin ChristineKipper - MarthaPalmer + Karin ChristineKipper + MarthaPalmer W00-0202 kipper-palmer-2000-representations Evaluation of a Practical Interlingua for Task-Oriented Dialogue - LoriLevin - DonnaGates - AlonLavie - FabioPianesi + LoriLevin + DonnaGates + AlonLavie + FabioPianesi DorcasWallace TaroWatanabe W00-0203 @@ -112,34 +112,34 @@ An interlingua aiming at communication on the Web: How language-independent can it be? - Ronaldo TeixeiraMartins + Ronaldo TeixeiraMartins Lucia Helena MachadoRino - Mariadas Gracas Volpe Nunes - GiseleMontilha + Mariadas Gracas Volpe Nunes + GiseleMontilha Osvaldo Novaisde Oliveira W00-0204 martins-etal-2000-interlingua Telicity as a Cue to Temporaland Discourse Structure in <fixed-case>C</fixed-case>hinese-<fixed-case>E</fixed-case>nglish Machine Translation - MariOlsen - DavidTraum - CarolVan Ess-Dykema - AmyWeinberg + MariOlsen + DavidTraum + CarolVan Ess-Dykema + AmyWeinberg RonDolan W00-0205 olsen-etal-2000-telicity An Application of the Interlingua System <fixed-case>ISS</fixed-case> for <fixed-case>S</fixed-case>panish-<fixed-case>E</fixed-case>nglish Pronominal Anaphora Generation - JesusPeral - AntonioFerrandez + JesusPeral + AntonioFerrandez W00-0206 peral-ferrandez-2000-application Generation from Lexical Conceptual Structures - DavidTraum + DavidTraum NizarHabash W00-0207 traum-habash-2000-generation @@ -157,7 +157,7 @@ Lessons Learned in Building Spoken Language Collaborative Interface Agents - Candace L.Sidner + Candace L.Sidner CarolynBoettner CharlesRich W00-0301 @@ -166,7 +166,7 @@ <fixed-case>G</fixed-case>o<fixed-case>D</fixed-case>i<fixed-case>S</fixed-case>- An Accommodating Dialogue System StaffanLarsson - PeterLjunglof + PeterLjunglof RobinCooper ElisabetEngdahl StinaEricsson @@ -175,17 +175,17 @@ Dialogue Management in the Mercury Flight Reservation System - StephanieSeneff - JosephPolifroni + StephanieSeneff + JosephPolifroni W00-0303 seneff-polifroni-2000-dialogue <fixed-case>NJF</fixed-case>un- A Reinforcement Learning Spoken Dialogue System - DianeLitman + DianeLitman SatinderSingh - MichaelKearns - MarilynWalker + MichaelKearns + MarilynWalker W00-0304 litman-etal-2000-njfun @@ -197,57 +197,57 @@ Stochastic Language Generation for Spoken Dialogue Systems - Alice H.Oh - Alexander I.Rudnicky + Alice H.Oh + Alexander I.Rudnicky W00-0306 oh-rudnicky-2000-stochastic <fixed-case>TRIPS</fixed-case>- 911 System Demonstration - JamesAllen - DonnaByron + JamesAllen + DonnaByron DaveCostello - MyroslavaDzikovska + MyroslavaDzikovska GeorgeFerguson LucianGalescu - AmandaStent + AmandaStent W00-0307 allen-etal-2000-trips Epiphenomenal Grammar Acquisition with <fixed-case>GSG</fixed-case> - MarsalGavalda + MarsalGavalda W00-0308 gavalda-2000-epiphenomenal Task-based dialog management using an agenda WeiXu - Alexander I.Rudnicky + Alexander I.Rudnicky W00-0309 xu-rudnicky-2000-task Using Dialogue Representations for Concept-to-Speech Generation - Christine H.Nakatani - JenniferChu-Carroll + Christine H.Nakatani + JenniferChu-Carroll W00-0310 nakatani-chu-carroll-2000-using A Compact Architecture for Dialogue Management Based on Scripts and Meta-Outputs - MannyRayner - Beth AnnHockey + MannyRayner + Beth AnnHockey FrankieJames W00-0311 rayner-etal-2000-compact-architecture Building a Robust Dialogue System with Limited Data - Sharon J.Goldwater + Sharon J.Goldwater Elizabeth OwenBratt - Jean MarkGawron - JohnDowding + Jean MarkGawron + JohnDowding W00-0312 goldwater-etal-2000-building @@ -271,17 +271,17 @@ Mining Discourse Markers for <fixed-case>C</fixed-case>hinese Textual Summarization - Samuel W. K.Chan - Tom B. Y.Lai + Samuel W. K.Chan + Tom B. Y.Lai W. J.Gao - Benjamin K.T’sou + Benjamin K.T’sou W00-0402 chan-etal-2000-mining Centroid-based summarization of multiple documents: sentence extraction, utility-based evaluation, and user studies - Dragomir R.Radev - HongyanJing + Dragomir R.Radev + HongyanJing MalgorzataBudzikowska No description of the changes were recorded. @@ -297,9 +297,9 @@ Multi-Document Summarization By Sentence Extraction - JadeGoldstein - VibhuMittal - JaimeCarbonell + JadeGoldstein + VibhuMittal + JaimeCarbonell MarkKantrowitz W00-0405 goldstein-etal-2000-multi @@ -318,7 +318,7 @@ Evaluation of Phrase-Representation Summarization based on Information Retrieval Task MamikoOka - YoshihiroUeda + YoshihiroUeda W00-0407 oka-ueda-2000-evaluation @@ -332,17 +332,17 @@ Multi-document Summarization by Visualizing Topical Content - Rie KubotaAndo - Branimir K.Boguraev + Rie KubotaAndo + Branimir K.Boguraev Roy J.Byrd - Mary S.Neff + Mary S.Neff W00-0409 ando-etal-2000-multi Using Summarization for Automatic Briefing Generation InderjeetMani - KristianConcepcion + KristianConcepcion LindaVan Guilder W00-0410 mani-etal-2000-using @@ -360,43 +360,43 @@ When is an Embedded <fixed-case>MT</fixed-case> System “Good Enough” for Filtering? - Clare R.Voss - CarolVan Ess-Dykema + Clare R.Voss + CarolVan Ess-Dykema W00-0501 voss-van-ess-dykema-2000-embedded Task Tolerance of <fixed-case>MT</fixed-case> Output in Integrated Text Processes - John S.White - Jennifer B.Doyon - Susan W.Talbott + John S.White + Jennifer B.Doyon + Susan W.Talbott W00-0502 white-etal-2000-task At Your Service: Embedded <fixed-case>MT</fixed-case> As a Service - Florence M.Reeder + Florence M.Reeder W00-0503 reeder-2000-service <fixed-case>M</fixed-case>andarin-<fixed-case>E</fixed-case>nglish Information (<fixed-case>MEI</fixed-case>): Investigating Translingual Speech Retrieval - HelenMeng - SanjeevKhudanpur - GinaLevow - Douglas W.Oard - Hsin-MinWang + HelenMeng + SanjeevKhudanpur + GinaLevow + Douglas W.Oard + Hsin-MinWang W00-0504 meng-etal-2000-mandarin Towards Translingual Information Access using Portable Information Extraction - MichaelWhite - ClaireCardie - Chung-hyeHan + MichaelWhite + ClaireCardie + Chung-hyeHan NariKim - BenoitLavoie - MarthaPalmer + BenoitLavoie + MarthaPalmer OwenRainbow JuntaeYoon W00-0505 @@ -414,7 +414,7 @@ <fixed-case>T</fixed-case>rans<fixed-case>T</fixed-case>ype: a Computer-Aided Translation Typing System - PhilippeLanglais + PhilippeLanglais GeorgeFoster GuyLapalme W00-0507 @@ -422,7 +422,7 @@ Stochastic Finite-State models for Spoken Language Machine Translation - SrinivasBangalore + SrinivasBangalore GiuseppeRiccardi W00-0508 bangalore-riccardi-2000-stochastic @@ -458,13 +458,13 @@ Some Challenges of Developing Fully-Automated Systems for Taking Audio Comprehension Exams - David D.Palmer + David D.Palmer W00-0602 palmer-2000-challenges A Rule-based Question Answering System for Reading Comprehension Tests - EllenRiloff + EllenRiloff MichaelThelen W00-0603 riloff-thelen-2000-rule @@ -472,7 +472,7 @@ Answer Extraction Towards better Evaluations of <fixed-case>NLP</fixed-case> Systems RolfSchwitter - DiegoMolla + DiegoMolla RachelFournier MichaelHess W00-0604 @@ -523,14 +523,14 @@ The Role of Algorithm Bias vs Information Source in Learning Algorithms for Morphosyntactic Disambiguation GuyDe Pauw - WalterDaelemans + WalterDaelemans W00-0704 de-pauw-daelemans-2000-role Increasing our Ignorance’ of Language: Identifying Language Structure in an Unknown ‘Signal’ - JohnElliot - EricAtwell + JohnElliot + EricAtwell BillWhyte W00-0705 elliot-etal-2000-increasing @@ -538,8 +538,8 @@ A Comparison between Supervised Learning Algorithms for Word Sense Disambiguation GerardEscudero - LluísMàrquez - GermanRigau + LluísMàrquez + GermanRigau W00-0706 escudero-etal-2000-comparison @@ -579,13 +579,13 @@ Knowledge-Free Induction of Morphology Using Latent Semantic Analysis PatrickSchone - DanielJurafsky + DanielJurafsky W00-0712 schone-jurafsky-2000-knowledge Using Induced Rules as Complex Features in Memory-Based Language Learning - Antalvan den Bosch + Antalvan den Bosch W00-0713 van-den-bosch-2000-using @@ -617,14 +617,14 @@ <fixed-case>ALL</fixed-case>i<fixed-case>S</fixed-case>: a Symbolic Learning System for Natural Language Learning - HervéDéjean + HervéDéjean W00-0718 dejean-2000-allis Combining Text and Heuristics for Cost-Sensitive Spam Filtering - José M. GómezHidalgo - Manual MañaLópez + José M. GómezHidalgo + Manual MañaLópez Enrique PuertasSanz W00-0719 hidalgo-etal-2000-combining @@ -632,7 +632,7 @@ Genetic Algorithms for Feature Relevance Assignment in Memory-Based Language Processing AnneKool - WalterDaelemans + WalterDaelemans JakubZavrel W00-0720 kool-etal-2000-genetic @@ -648,7 +648,7 @@ Minimal Commitment and Full Lexical Disambiguation: Balancing Rules and Hidden <fixed-case>M</fixed-case>arkov Models PatrickRuch RobertBaud - PierretteBouillon + PierretteBouillon GilbertRobert W00-0722 ruch-etal-2000-minimal @@ -656,13 +656,13 @@ Learning <fixed-case>IE</fixed-case> Rules for a Set of Related Concepts J.Turmo - H.Rodriguez + H.Rodriguez W00-0723 turmo-rodriguez-2000-learning A Default First Order Family Weight Determination Procedure for <fixed-case>WPDV</fixed-case> Models - Hansvan Halteren + Hansvan Halteren W00-0724 van-halteren-2000-default @@ -676,14 +676,14 @@ Introduction to the <fixed-case>C</fixed-case>o<fixed-case>NLL</fixed-case>-2000 Shared Task Chunking - Erik F.Tjong Kim Sang + Erik F.Tjong Kim Sang SabineBuchholz W00-0726 tjong-kim-sang-buchholz-2000-introduction Learning Syntactic Structures with <fixed-case>XML</fixed-case> - HervéDéjean + HervéDéjean W00-0727 dejean-2000-learning @@ -701,8 +701,8 @@ Use of Support Vector Learning for Chunk Identification - TakuKudoh - YujiMatsumoto + TakuKudoh + YujiMatsumoto W00-0730 kudoh-matsumoto-2000-use @@ -722,33 +722,33 @@ Text Chunking by System Combination - Erik F.Tjong Kim Sang + Erik F.Tjong Kim Sang W00-0733 tjong-kim-sang-2000-text Chunking with <fixed-case>WPDV</fixed-case> Models - Hansvan Halteren + Hansvan Halteren W00-0734 van-halteren-2000-chunking Single-Classifier Memory-Based Phrase Chunking JornVeenstra - Antalvan den Bosch + Antalvan den Bosch W00-0735 veenstra-van-den-bosch-2000-single Phrase Parsing with Rule Sequence Processors: an Application to the Shared <fixed-case>C</fixed-case>o<fixed-case>NLL</fixed-case> Task - MarcVilain - DavidDay + MarcVilain + DavidDay W00-0736 vilain-day-2000-phrase Hybrid Text Chunking - GuoDongZhou + GuoDongZhou JianSu TongGuanTey W00-0737 @@ -772,7 +772,7 @@ Incorporating Linguistics Constraints into Inductive Logic Programming JamesCussens - StephenPulman + StephenPulman W00-0740 cussens-pulman-2000-incorporating @@ -788,8 +788,8 @@ Inductive Logic Programming for Corpus-Based Acquisition of Semantic Lexicons PascaleSébillot - PierretteBouillon - CecileFabre + PierretteBouillon + CecileFabre W00-0742 sebillot-etal-2000-inductive @@ -802,7 +802,7 @@ Recognition and Tagging of Compound Verb Groups in <fixed-case>C</fixed-case>zech EvaZácková - LubošPopelínský + LubošPopelínský MilošNepil W00-0744 zackova-etal-2000-recognition @@ -823,7 +823,7 @@ An Unsupervised Method for Multilingual Word Sense Tagging Using Parallel Corpora - MonaDiab + MonaDiab 10.3115/1117724.1117725 1–9 W00-0801 @@ -841,8 +841,8 @@ <fixed-case>C</fixed-case>hinese-<fixed-case>J</fixed-case>apanese Cross Language Information Retrieval: A <fixed-case>H</fixed-case>an Character Based Approach - MarufHasan - YujiMatsumoto + MarufHasan + YujiMatsumoto 10.3115/1117724.1117727 19–26 W00-0803 @@ -850,7 +850,7 @@ Experiments in Word Domain Disambiguation for Parallel Texts - BernardoMagnini + BernardoMagnini CarloStrapparava 10.3115/1117724.1117728 27–33 @@ -901,7 +901,7 @@ Comparison between Tagged Corpora for the Named Entity Task ChikashiNobata NigelCollier - Jun’ichiTsujii + Jun’ichiTsujii 10.3115/1117729.1117733 20–27 W00-0904 @@ -910,7 +910,7 @@ Verb Subcategorization Frequency Differences between Business- News and Balanced Corpora: The Role of Verb Sense DouglasRoland - DanielJurafsky + DanielJurafsky LiseMenn SusanneGahl ElezabethElder @@ -923,11 +923,11 @@ Discriminating the registers and styles in the <fixed-case>M</fixed-case>odern <fixed-case>G</fixed-case>reek language GeorgeTambouratzis - StellaMarkantonatou + StellaMarkantonatou NikolaosHairetakis MarinaVassiliou DimitriosTambouratzis - GeorgeCarayannis + GeorgeCarayannis 10.3115/1117729.1117735 35–42 W00-0906 @@ -954,7 +954,7 @@ <fixed-case>J</fixed-case>apanese Dialogue Corpus of Multi-Level Annotation - ShuNakazato + ShuNakazato 10.3115/1117736.1117737 1–8 W00-1001 @@ -972,8 +972,8 @@ The <fixed-case>MATE</fixed-case> Markup Framework - LailaDybkjaer - Niels OleBernsen + LailaDybkjaer + Niels OleBernsen 10.3115/1117736.1117739 19–28 W00-1003 @@ -981,7 +981,7 @@ Issues in the Transcription of <fixed-case>E</fixed-case>nglish Conversational Grunts - NigelWard + NigelWard 10.3115/1117736.1117740 29–35 W00-1004 @@ -1016,7 +1016,7 @@ Using decision trees to select the grammatical relation of a noun phrase - SimonCorston-Oliver + SimonCorston-Oliver 10.3115/1117736.1117744 66–73 W00-1008 @@ -1024,7 +1024,7 @@ A Common Theory of Information Fusion from Multiple Text Sources Step One: Cross-Document Structure - DragomirRadev + DragomirRadev 10.3115/1117736.1117745 74–83 W00-1009 @@ -1032,9 +1032,9 @@ Social Goals in Conversational Cooperation - GuidoBoella + GuidoBoella RossanaDamiano - LeonardoLesmo + LeonardoLesmo 10.3115/1117736.1117746 84–93 W00-1010 @@ -1042,8 +1042,8 @@ Dynamic User Level and Utility Measurement for Adaptive Dialog in a Help-Desk System - PreetamMaloor - JoyceChai + PreetamMaloor + JoyceChai 10.3115/1117736.1117747 94–101 W00-1011 @@ -1051,8 +1051,8 @@ Dialogue Management in the Agreement Negotiation Process: A Model that Involves Natural Reasoning - MareKoit - HaldurOim + MareKoit + HaldurOim 10.3115/1117736.1117748 102–111 W00-1012 @@ -1070,7 +1070,7 @@ Dialogue and Domain Knowledge Management in Dialogue Systems AnnikaFlycht-Eriksson - ArneJonsson + ArneJonsson 10.3115/1117736.1117750 121–130 W00-1014 @@ -1098,7 +1098,7 @@ <fixed-case>WIT</fixed-case>: A Toolkit for Building Robust and Real-Time Spoken Dialogu Systems MikioNakano NoboruMiyazaki - NorihitoYasuda + NorihitoYasuda AkiraSugiyama Jun-ichiHirasawa KohjiDohsaka @@ -1146,7 +1146,7 @@ Exploiting Lexical Expansions and <fixed-case>B</fixed-case>oolean Compositions for Web Querying - BernardoMagnini + BernardoMagnini RobertoPrevete 10.3115/1117755.1117758 13–21 @@ -1156,7 +1156,7 @@ Use of Dependency Tree Structures for the Microcontext Extraction MartinHolub - AlenaBohmova + AlenaBohmova 10.3115/1117755.1117759 23–33 W00-1103 @@ -1164,8 +1164,8 @@ Semantic Indexing using <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et Senses - RadaMihalcea - DanMoldovan + RadaMihalcea + DanMoldovan 10.3115/1117755.1117760 35–45 W00-1104 @@ -1183,8 +1183,8 @@ Corpus-Based Learning of Compound Noun Indexing Byung-KwanKwak Jee-HyubKim - GeunbaeLee - Jung YunSeo + GeunbaeLee + Jung YunSeo 10.3115/1117755.1117763 57–66 W00-1106 @@ -1201,8 +1201,8 @@ A Text Categorization Based on a Summarization Extraction - Sue J.Ker - Jen-NanChen + Sue J.Ker + Jen-NanChen 10.3115/1117755.1117766 79–83 W00-1108 @@ -1219,7 +1219,7 @@ Automatic summarization of search engine hit lists - Dragomir R.Radev + Dragomir R.Radev WeiguoFan 10.3115/1117755.1117768 99–109 @@ -1247,7 +1247,7 @@ Two Statistical Parsing Models Applied to the <fixed-case>C</fixed-case>hinese Treebank - Daniel M.Bikel + Daniel M.Bikel DavidChiang 10.3115/1117769.1117771 1–6 @@ -1265,7 +1265,7 @@ Knowledge Extraction for Identification of <fixed-case>C</fixed-case>hinese Organization Names - Keh-JiannChen + Keh-JiannChen Chao-janChert 10.3115/1117769.1117773 15–21 @@ -1274,7 +1274,7 @@ Using Co-occurrence Statistics as an Information Source for Partial Parsing of <fixed-case>C</fixed-case>hinese - Elliott FrancoDrabek + Elliott FrancoDrabek QjangZhou 10.3115/1117769.1117774 22–28 @@ -1284,9 +1284,9 @@ <fixed-case>S</fixed-case>inica <fixed-case>T</fixed-case>reebank: Design Criteria, Annotation Guidelines, and On-line Interface Chu-RenHuang - Feng-YiChen - Keh-JiannChen - Zhao-mingGao + Feng-YiChen + Keh-JiannChen + Zhao-mingGao Kuang-YuChen 10.3115/1117769.1117775 29–37 @@ -1295,9 +1295,9 @@ Enhancement of a <fixed-case>C</fixed-case>hinese Discourse Marker Tagger with <fixed-case>C</fixed-case>4.5 - Benjamin K.T’sou - Tom B.YLai - Samuel W.K.Chan + Benjamin K.T’sou + Tom B.YLai + Samuel W.K.Chan WeijunGao XuegangZhan 10.3115/1117769.1117776 @@ -1317,9 +1317,9 @@ Comparing Lexicalized Treebank Grammars Extracted from <fixed-case>C</fixed-case>hinese, <fixed-case>K</fixed-case>orean, and <fixed-case>E</fixed-case>nglish Corpora FeiXia - ChunghyeHan - MarthaPalmer - AravindJoshi + ChunghyeHan + MarthaPalmer + AravindJoshi 10.3115/1117769.1117778 52–59 W00-1208 @@ -1338,7 +1338,7 @@ A Trainable Method for Extracting <fixed-case>C</fixed-case>hinese Entity Names and Their Relations YiminZhang - Joe F.Zhou + Joe F.Zhou 10.3115/1117769.1117780 66–72 W00-1210 @@ -1346,10 +1346,10 @@ Statistics Based Hybrid Approach to <fixed-case>C</fixed-case>hinese Base Phrase Identification - Tie-junZhao - Mu-yunYang + Tie-junZhao + Mu-yunYang FangLiu - Jian-minYao + Jian-minYao HaoYu 10.3115/1117769.1117781 73–77 @@ -1366,8 +1366,8 @@ Annotating Information Structures in <fixed-case>C</fixed-case>hinese Texts Using <fixed-case>H</fixed-case>ow<fixed-case>N</fixed-case>et - Kok WeeGan - Ping WaiWong + Kok WeeGan + Ping WaiWong 10.3115/1117769.1117784 85–92 W00-1213 @@ -1377,7 +1377,7 @@ Machine Learning Methods for <fixed-case>C</fixed-case>hinese Web page Categorization JiHe Ah-HweeTan - Chew-LimTan + Chew-LimTan 10.3115/1117769.1117785 93–100 W00-1214 @@ -1385,7 +1385,7 @@ Semantic Annotation of <fixed-case>C</fixed-case>hinese Phrases Using Recursive Graph - DonghongJi + DonghongJi 10.3115/1117769.1117786 101–108 W00-1215 @@ -1401,7 +1401,7 @@ How Should a Large Corpus Be Built?-A Comparative Study of Closure in Annotated Newspaper Corpora from Two <fixed-case>C</fixed-case>hinese Sources, Towards Building a Larger Representative Corpus Merged from Representative Sublanguage Collections - John J.Kovarik + John J.Kovarik 10.3115/1117769.1117788 116–123 W00-1217 @@ -1411,7 +1411,7 @@ A Clustering Algorithm for <fixed-case>C</fixed-case>hinese Adjectives and Nouns YangWen ChunfaYuan - ChangningHuang + ChangningHuang 10.3115/1117769.1117789 124–131 W00-1218 @@ -1480,8 +1480,8 @@ <fixed-case>J</fixed-case>apanese Dependency Structure Analysis Based on Support Vector Machines - TakuKudo - YujiMatsumoto + TakuKudo + YujiMatsumoto 10.3115/1117794.1117797 18–25 W00-1303 @@ -1489,8 +1489,8 @@ Coaxing Confidences from an Old Freind: Probabilistic Classifications from Transformation Rule Lists - RaduFlorian - John C.Henderson + RaduFlorian + John C.Henderson GraceNgai 10.3115/1117794.1117798 26–34 @@ -1517,8 +1517,8 @@ A Uniform Method of Grammar Extraction and Its Applications FeiXia - MarthaPalmer - AravindJoshi + MarthaPalmer + AravindJoshi 10.3115/1117794.1117801 53–62 W00-1307 @@ -1527,7 +1527,7 @@ Enriching the Knowledge Sources Used in a Maximum Entropy Part-of-Speech Tagger KristinaToutanvoa - Christopher D.Manning + Christopher D.Manning 10.3115/1117794.1117802 63–70 W00-1308 @@ -1535,7 +1535,7 @@ Error-driven <fixed-case>HMM</fixed-case>-based Chunk Tagger with Context-dependent Lexicon - GuoDongZhou + GuoDongZhou JianSu 10.3115/1117794.1117803 71–79 @@ -1566,7 +1566,7 @@ Cross-lingual Information Retrieval Using Hidden <fixed-case>M</fixed-case>arkov Models JinxiXu - RalphWeischedel + RalphWeischedel 10.3115/1117794.1117806 95–103 W00-1312 @@ -1577,7 +1577,7 @@ YiboZhang LeSun LinDu - YufangSun + YufangSun 10.3115/1117794.1117807 104–109 W00-1313 @@ -1588,7 +1588,7 @@ LeSun YoubingJin LinDu - YufangSun + YufangSun 10.3115/1117794.1117808 110–116 W00-1314 @@ -1597,7 +1597,7 @@ Empirical Term Weighting and Expansion Frequency KyojiUmemura - Kenneth W.Church + Kenneth W.Church 10.3115/1117794.1117809 117–123 W00-1315 @@ -1616,7 +1616,7 @@ Automated Construction of Database Interfaces: Intergrating Statistical and Relational Learning for Semantic Parsing Lappoon R.Tang - Raymond J.Mooney + Raymond J.Mooney 10.3115/1117794.1117811 133–141 W00-1317 @@ -1624,7 +1624,7 @@ Automatic <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et Mapping Using Word Sense Disambiguation - Daniel M.Bikel + Daniel M.Bikel 10.3115/1117794.1117812 142–147 W00-1318 @@ -1632,7 +1632,7 @@ A Real-time Integration Of Concept-based Search and Summarization of <fixed-case>C</fixed-case>hinese Websites - Joe F.Zhou + Joe F.Zhou WeiquanLiu 10.3115/1117794.1117813 148–154 @@ -1641,7 +1641,7 @@ A Statistical Model for Parsing and Word-Sense Disambiguation - Daniel M.Bikel + Daniel M.Bikel 10.3115/1117794.1117814 155–163 W00-1320 @@ -1649,9 +1649,9 @@ Reducing Parsing Complexity by Intra-Sentence Segmentation based on Maximum Entropy Model - Sung DongKim + Sung DongKim Byoung-TakZhang - Yung TaekKim + Yung TaekKim 10.3115/1117794.1117815 164–171 W00-1321 @@ -1660,8 +1660,8 @@ An Empirical Study of the Domain Dependence of Supervised Word Disambiguation Systems GerardEscudero - LluisMarquez - GermanRigau + LluisMarquez + GermanRigau 10.3115/1117794.1117816 172–180 W00-1322 @@ -1687,8 +1687,8 @@ Statistical Filtering and Subcategorization Frame Acquisition AnnaKorhonen - GenevieveGorrell - DianaMcCarthy + GenevieveGorrell + DianaMcCarthy 10.3115/1117794.1117819 199–206 W00-1325 @@ -1696,8 +1696,8 @@ One Sense per Collocation and Genre/Topic Variations - DavidMartinez - EnekoAgirre + DavidMartinez + EnekoAgirre 10.3115/1117794.1117820 207–215 W00-1326 @@ -1733,9 +1733,9 @@ Evaluation Metrics for Generation - SrinivasBangalore - OwenRambow - SteveWhittaker + SrinivasBangalore + OwenRambow + SteveWhittaker 10.3115/1118253.1118255 1–8 W00-1401 @@ -1763,7 +1763,7 @@ Document structure and multilingual authoring CarolineBrun MarcDymetman - VeronikaLux + VeronikaLux 10.3115/1118253.1118258 24–31 W00-1404 @@ -1771,9 +1771,9 @@ <fixed-case>DTD</fixed-case>-driven bilingual document generation - ArantzaCasillas + ArantzaCasillas JosebaAbaitua - RaquelMartínez + RaquelMartínez 10.3115/1118253.1118259 32–38 W00-1405 @@ -1793,7 +1793,7 @@ A strategy for generating evaluative arguments GiuseppeCarenini - JohannaMoore + JohannaMoore 10.3115/1118253.1118261 47–54 W00-1407 @@ -1822,14 +1822,14 @@ Reinterpretation of an Existing <fixed-case>NLG</fixed-case> System in a Generic Generation Architecture - LynneCahill + LynneCahill ChristyDoran - RogerEvans - ChrisMellish - DanielPaiva - MikeReape - DoniaScott - NeilTipper + RogerEvans + ChrisMellish + DanielPaiva + MikeReape + DoniaScott + NeilTipper 10.3115/1118253.1118264 69–76 W00-1410 @@ -1837,7 +1837,7 @@ An integrated framework for text planning and pronominalisation - RodgerKibble + RodgerKibble RichardPower 10.3115/1118253.1118265 77–84 @@ -1847,7 +1847,7 @@ Incremental Event Conceptualization and Natural Language Generation in Monitoring Enviroments MarkusGuhe - ChristopherHabel + ChristopherHabel HeikeTappe 10.3115/1118253.1118266 85–92 @@ -1865,7 +1865,7 @@ Generating Referring Quantified Expressions JamesShaw - KathleenMcKeown + KathleenMcKeown 10.3115/1118253.1118268 100–107 W00-1414 @@ -1874,7 +1874,7 @@ An Empirical Analysis of Constructing Non-restrictive <fixed-case>NP</fixed-case> Modifiers to Express Semantic Relations HuaCheng - ChrisMellish + ChrisMellish 10.3115/1118253.1118269 108–115 W00-1415 @@ -1901,8 +1901,8 @@ Optimising text quality in generation from relational databases MichaelO’Donnell AlistairKnott - JonOberlander - ChrisMellish + JonOberlander + ChrisMellish 10.3115/1118253.1118272 133–140 W00-1418 @@ -1912,7 +1912,7 @@ Generating a controlled language LaurenceDanlos GuyLapalme - VeronikaLux + VeronikaLux 10.3115/1118253.1118273 141–147 W00-1419 @@ -1941,7 +1941,7 @@ Enriching partially-specified representations for text realization using an attribute grammar SongsakChannarukul - Susan W.McRoy + Susan W.McRoy Syed S.Ali 10.3115/1118253.1118276 163–170 @@ -1960,7 +1960,7 @@ Generating Vague Descriptions - Keesvan Deemter + Keesvan Deemter 10.3115/1118253.1118278 179–185 W00-1424 @@ -1969,7 +1969,7 @@ Capturing the Interaction between Aggregation and Text Planning in Two Generation Systems HuaCheng - ChrisMellish + ChrisMellish 10.3115/1118253.1118279 186–193 W00-1425 @@ -1979,7 +1979,7 @@ Can text structure be incompatible with rhetorical structure? NadjetBouayad-Agha RichardPower - DoniaScott + DoniaScott 10.3115/1118253.1118280 194–200 W00-1426 @@ -1988,7 +1988,7 @@ Robust, applied morphological generation GuidoMinnen - JohnCarroll + JohnCarroll DarrenPearce 10.3115/1118253.1118281 201–208 @@ -1997,10 +1997,10 @@ Integrating a Large-Scale, Reusable Lexicon with a Natural Language Generator - HongyanJing - YaelDahan + HongyanJing + YaelDahan MichaelElhadad - KathyMcKeown + KathyMcKeown 10.3115/1118253.1118282 209–216 W00-1428 @@ -2042,7 +2042,7 @@ Rhetorical Structure in Dialog - AmandaStent + AmandaStent 10.3115/1118253.1118288 247–252 W00-1433 @@ -2060,8 +2060,8 @@ Demonstration of <fixed-case>ILEX</fixed-case> 3.0 MichaelO’Donnell AlistairKnott - JonOberlander - ChrisMellish + JonOberlander + ChrisMellish 10.3115/1118253.1118291 257–259 W00-1435 @@ -2079,7 +2079,7 @@ <fixed-case>YAG</fixed-case>: A Template-Based Generator for Real-Time Systems - Susan W.McRoy + Susan W.McRoy SongsakChannarukul Syed S.Ali 10.3115/1118253.1118293 @@ -2090,7 +2090,7 @@ An Efficient Text Summarizer using Lexical Chains H. GregorySilber - Kathleen F.McCoy + Kathleen F.McCoy 10.3115/1118253.1118294 268–271 W00-1438 @@ -2118,7 +2118,7 @@ Proceedings of the COLING-2000 Workshop on Using Toolsets and Architectures To Build NLP Systems W00-15 - RémiZajac + RémiZajac International Committee on Computational Linguistics
Centre Universitaire, Luxembourg
August @@ -2131,11 +2131,11 @@ Experience using <fixed-case>GATE</fixed-case> for <fixed-case>NLP</fixed-case> <fixed-case>R</fixed-case>&<fixed-case>D</fixed-case> - HamishCunningham + HamishCunningham DianaMaynard - KalinaBontcheva + KalinaBontcheva ValentinTablan - YorickWilks + YorickWilks 1–8 W00-1501 cunningham-etal-2000-experience @@ -2143,18 +2143,18 @@ Composing a General-Purpose Toolbox for <fixed-case>S</fixed-case>wedish FredrikOlsson - BjörnGambäck + BjörnGambäck 9–18 W00-1502 olsson-gamback-2000-composing An Experiment in Unifying Audio-Visual and Textual Infrastructures for Language Processing Research and Development - KalinaBontcheva - HennieBrugman - HamishCunningham - AlbertRussel - PeterWittenburg + KalinaBontcheva + HennieBrugman + HamishCunningham + AlbertRussel + PeterWittenburg 19–25 W00-1503 bontcheva-etal-2000-experiment @@ -2176,7 +2176,7 @@ The <fixed-case>XML</fixed-case> Framework and Its Implications for the Development of Natural Language Processing Tools - NancyIde + NancyIde 38–43 W00-1506 ide-2000-xml @@ -2215,8 +2215,8 @@ Proceedings of the COLING-2000 Workshop on Efficiency In Large-Scale Parsing Systems W00-16 - JohnCarroll - Robert C.Moore + JohnCarroll + Robert C.Moore StephanOepen International Committee on Computational Linguistics
Centre Universitaire, Luxembourg
@@ -2263,14 +2263,14 @@ Some Experiments on Indicators of Parsing Complexity for Lexicalized Grammars AnoopSarkar FeiXia - AravindJoshi + AravindJoshi 37–42 W00-1605 sarkar-etal-2000-experiments
Large Scale Parsing of <fixed-case>C</fixed-case>zech - PavelSmrž + PavelSmrž AlešHorák 43–50 W00-1606 @@ -2298,7 +2298,7 @@ Proceedings of the COLING-2000 Workshop on Semantic Annotation and Intelligent Content W00-17 PaulBuitelaar - KôitiHasida + KôitiHasida International Committee on Computational Linguistics
Centre Universitaire, Luxembourg
August @@ -2319,8 +2319,8 @@
Exploring Automatic Word Sense Disambiguation with Decision Lists and the Web - EnekoAgirre - DavidMartinez + EnekoAgirre + DavidMartinez 11–19 W00-1702 agirre-martinez-2000-exploring @@ -2329,7 +2329,7 @@ Improving Natural Language Processing by Linguistic Document Annotation HideoWatanabe KatashiNagao - MichaelMcCord + MichaelMcCord ArendseBernth 20–27 W00-1703 @@ -2337,18 +2337,18 @@ Building an Annotated Corpus in the Molecular-Biology Domain - YukaTateisi + YukaTateisi TomokoOhta NigelCollier ChikashiNobata - Jun-ichiTsujii + Jun-ichiTsujii 28–34 W00-1704 tateisi-etal-2000-building Semantic Annotation for Generation: Issues in Annotating a Corpus to Develop and Evaluate Discourse Entity Realization Algorithms - MassimoPoesio + MassimoPoesio 37–43 W00-1705 poesio-2000-semantic @@ -2403,7 +2403,7 @@ Proceedings of the Fifth Workshop of the ACL Special Interest Group in Computational Phonology W00-18 - JasonEisner + JasonEisner LauriKarttunen AlainThèriault International Committee on Computational Linguistics @@ -2441,14 +2441,14 @@ Approximation and Exactness in Finite State <fixed-case>O</fixed-case>ptimality <fixed-case>T</fixed-case>heory DaleGerdemann - Gertjanvan Noord + Gertjanvan Noord 34–45 W00-1804 gerdemann-van-noord-2000-approximation Multi-Syllable Phonotactic Modelling - AnjaBelz + AnjaBelz 46–56 W00-1805 belz-2000-multi @@ -2465,7 +2465,7 @@ Proceedings of the COLING-2000 Workshop on Linguistically Interpreted Corpora W00-19 - AnneAbeille + AnneAbeille ThorstenBrants HansUszkoreit International Committee on Computational Linguistics @@ -2480,8 +2480,8 @@ Comparing Linguistic Interpretation Schemes for <fixed-case>E</fixed-case>nglish Corpora - EricAtwell - GeorgeDemetriou + EricAtwell + GeorgeDemetriou JohnHughes AmandaSchiffrin CliveSouter @@ -2492,8 +2492,8 @@ Dependency-based Syntactic Annotation of a Chiense Corpus - Tom B. Y.Lai - HuangChangning + Tom B. Y.Lai + ChangningHuang 11–17 W00-1902 lai-huang-2000-dependency-based @@ -2544,7 +2544,7 @@ The Detection of Inconsistency in Manually Tagged Text - Hansvan Halteren + Hansvan Halteren 48–55 W00-1907 van-halteren-2000-detection @@ -2558,16 +2558,16 @@ Automatic Procedures in Tectogrammatical Tagging - AlenaBöhmová - PetrSgall + AlenaBöhmová + PetrSgall 65–70 W00-1909 bohmova-sgall-2000-automatic Considering Automatic Aids to Corpus Annotation - DavidDay - BenjaminWellner + DavidDay + BenjaminWellner 71–79 W00-1910 day-wellner-2000-considering @@ -2588,8 +2588,8 @@ The current status of <fixed-case>FTAG</fixed-case> - AnneAbeillé - Marie-HélèneCandito + AnneAbeillé + Marie-HélèneCandito AlexandraKinyon 11–18 W00-2001 @@ -2597,8 +2597,8 @@ A redefinition of Embedded Push-Down Automata - Miguel A.Alonso - ÉricVillemonte de la Clergerie + Miguel A.Alonso + ÉricVillemonte de la Clergerie ManuelVilares 19–26 W00-2002 @@ -2606,17 +2606,17 @@ Practical aspects in compiling tabular <fixed-case>TAG</fixed-case> parsers - Miguel A.Alonso - DjaméSeddah - ÉricVillemonte de la Clergerie + Miguel A.Alonso + DjaméSeddah + ÉricVillemonte de la Clergerie 27–32 W00-2003 alonso-etal-2000-practical Using <fixed-case>TAG</fixed-case>s, a Tree Model, and a Language Model for Generation - SrinivasBangalore - OwenRambow + SrinivasBangalore + OwenRambow 33–40 W00-2004 bangalore-rambow-2000-using @@ -2640,11 +2640,11 @@ Engineering a Wide-Coverage Lexicalized Grammar - JohnCarroll - NicolasNicolov + JohnCarroll + NicolasNicolov OlgaShaumyan MartineSmets - DavidWeir + DavidWeir 55–60 W00-2007 carroll-etal-2000-engineering @@ -2660,8 +2660,8 @@ Bidirectional parsing of <fixed-case>TAG</fixed-case> without heads - Víctor J.Díaz - Miguel A.Alonso + Víctor J.Díaz + Miguel A.Alonso VicenteCarrillo 67–72 W00-2009 @@ -2669,14 +2669,14 @@ Punctuation in a Lexicalized Grammar - ChristineDoran + ChristineDoran 73–78 W00-2010 doran-2000-punctuation A faster parsing algorithm for <fixed-case>L</fixed-case>exicalized <fixed-case>T</fixed-case>ree-<fixed-case>A</fixed-case>djoining <fixed-case>G</fixed-case>rammars - JasonEisner + JasonEisner GiorgioSatta 79–84 W00-2011 @@ -2691,8 +2691,8 @@ The <fixed-case>S</fixed-case>ino-<fixed-case>K</fixed-case>orean light verb construction and lexical argument structure - Chung-hyeHan - OwenRambow + Chung-hyeHan + OwenRambow 93–100 W00-2013 han-rambow-2000-sino @@ -2707,7 +2707,7 @@ Relationship between strong and weak generative power of formal systems - Aravind K.Joshi + Aravind K.Joshi 107–114 W00-2015 joshi-2000-relationship @@ -2715,7 +2715,7 @@ An alternative description of extractions in <fixed-case>TAG</fixed-case> SylvainKahane - Marie-HélèneCandito + Marie-HélèneCandito Yannickde Kercadio 115–122 W00-2016 @@ -2751,10 +2751,10 @@ Building a class-based verb lexicon using <fixed-case>TAG</fixed-case>s - KarinKipper - Hoa TrangDang + KarinKipper + Hoa TrangDang WilliamSchuler - MarthaPalmer + MarthaPalmer 147–154 W00-2021 kipper-etal-2000-building @@ -2791,8 +2791,8 @@ A comparison of the <fixed-case>XTAG</fixed-case> and <fixed-case>CLE</fixed-case> Grammars for <fixed-case>E</fixed-case>nglish - MannyRayner - Beth AnnHockey + MannyRayner + Beth AnnHockey FrankieJames 185–192 W00-2026 @@ -2809,8 +2809,8 @@ Lexicalized grammar and the description of motion events MatthewStone ToniaBleam - ChristineDoran - MarthaPalmer + ChristineDoran + MarthaPalmer 199–206 W00-2028 stone-etal-2000-lexicalized @@ -2833,7 +2833,7 @@ Customizing the <fixed-case>XTAG</fixed-case> system for efficient grammar development for <fixed-case>K</fixed-case>orean JuntaeYoon - Chung-hyeHan + Chung-hyeHan NariKim MeesookKim 221–226 @@ -2842,7 +2842,7 @@ Deriving polarity effects - RaffaellaBernardi + RaffaellaBernardi 229–232 W00-2032 bernardi-2000-deriving @@ -2856,9 +2856,9 @@ Elementary trees for syntactic and statistical disambiguation - RodolfoDelmonte - LuminitaChiran - CiprianBacalu + RodolfoDelmonte + LuminitaChiran + CiprianBacalu 237–240 W00-2034 delmonte-etal-2000-elementary @@ -2912,7 +2912,7 @@ Comparing and integrating <fixed-case>T</fixed-case>ree <fixed-case>A</fixed-case>djoining <fixed-case>G</fixed-case>rammars FeiXia - MarthaPalmer + MarthaPalmer 265–268 W00-2041 xia-palmer-2000-comparing diff --git a/data/xml/W01.xml b/data/xml/W01.xml index dd6e343908..ecf29b50e5 100644 --- a/data/xml/W01.xml +++ b/data/xml/W01.xml @@ -51,8 +51,8 @@ Limitations of Co-Training for Natural Language Learning from Large Datasets - DavidPierce - ClaireCardie + DavidPierce + ClaireCardie W01-0501 pierce-cardie-2001-limitations @@ -67,7 +67,7 @@ Learning Within-Sentence Semantic Coherence ElenaEneva RoseHoberman - LucianLita + LucianLita W01-0503 eneva-etal-2001-learning @@ -92,7 +92,7 @@ IonAndroutsopoulos GeorgiosPaliouras VangelisKarkaletsis - Constantine D.Spyropoulos + Constantine D.Spyropoulos PanagiotisStamatopoulos W01-0506 sakkis-etal-2001-stacking @@ -100,14 +100,14 @@ Feature Space Restructuring for <fixed-case>SVM</fixed-case>s with Application to Text Categorization HiroyaTakamura - YujiMatsumoto + YujiMatsumoto W01-0507 takamura-matsumoto-2001-feature Using Bins to Empirically Estimate Term Weights for Text Categorization CarlSable - Kenneth W.Church + Kenneth W.Church W01-0508 sable-church-2001-using @@ -129,7 +129,7 @@ Classifying the Semantic Relations in Noun Compounds via a Domain-Specific Lexical Hierarchy BarbaraRosario - MartiHearst + MartiHearst W01-0511 rosario-hearst-2001-classifying @@ -144,7 +144,7 @@ Is Knowledge-Free Induction of Multiword Unit Dictionary Headwords a Solved Problem? PatrickSchone - DanielJurafsky + DanielJurafsky W01-0513 schone-jurafsky-2001-knowledge-free @@ -152,7 +152,7 @@ Latent Semantic Analysis for Text Segmentation Freddy Y. Y.Choi PeterWiemer-Hastings - JohannaMoore + JohannaMoore W01-0514 choi-etal-2001-latent @@ -173,9 +173,9 @@ Automatic Corpus-based Tone Prediction using K-<fixed-case>T</fixed-case>o<fixed-case>BI</fixed-case> Representation - Jin-SeokLee + Jin-SeokLee ByeongchangKim - Gary GeunbaeLee + Gary GeunbaeLee W01-0517 lee-etal-2001-automatic @@ -187,15 +187,15 @@ Comparing Data-Driven Learning Algorithms for <fixed-case>P</fixed-case>o<fixed-case>S</fixed-case> Tagging of <fixed-case>S</fixed-case>wedish - BeátaMegyesi + BeátaMegyesi W01-0519 megyesi-2001-comparing Impact of Quality and Quantity of Corpora on Stochastic Generation - SrinivasBangalore + SrinivasBangalore JohnChen - OwenRambow + OwenRambow W01-0520 bangalore-etal-2001-impact @@ -228,7 +228,7 @@ Multidimensional transformation-based learning - RaduFlorian + RaduFlorian GraceNgai W01-0701 florian-ngai-2001-multidimensional @@ -236,22 +236,22 @@ Combining a self-organising map with memory-based learning JamesHammerton - Erik F.Tjong Kim Sang + Erik F.Tjong Kim Sang W01-0702 hammerton-tjong-kim-sang-2001-combining Learning class-to-class selectional preferences - EnekoAgirre - DavidMartinez + EnekoAgirre + DavidMartinez W01-0703 agirre-martinez-2001-learning Semantic pattern learning through maximum entropy-based <fixed-case>WSD</fixed-case> technique - MaximilianoSaiz-Noeda - ArmandoSuárez - ManuelPalomar + MaximilianoSaiz-Noeda + ArmandoSuárez + ManuelPalomar W01-0704 saiz-noeda-etal-2001-semantic @@ -271,15 +271,15 @@ Probabilistic models for <fixed-case>PP</fixed-case>-attachment resolution and <fixed-case>NP</fixed-case> analysis - EricGaussier + EricGaussier NicolaCancedda W01-0707 gaussier-cancedda-2001-probabilistic Introduction to the <fixed-case>C</fixed-case>o<fixed-case>NLL</fixed-case>-2001 shared task: clause identification - Erik F.Tjong Kim Sang - HervéDéjean + Erik F.Tjong Kim Sang + HervéDéjean W01-0708 tjong-kim-sang-dejean-2001-introduction @@ -298,22 +298,22 @@ <fixed-case>M</fixed-case>orpholog: Constrained and Supervised Learning of Morphology - RémiZajac + RémiZajac W01-0711 zajac-2001-morpholog Learning Computational Grammars JohnNerbonne - AnjaBelz + AnjaBelz NicolaCancedda - HervéDéjean + HervéDéjean JamesHammerton RobKoeling StasinosKonstantopoulos MilesOsborne FranckThollard - Erik F.Tjong Kim Sang + Erik F.Tjong Kim Sang W01-0712 nerbonne-etal-2001-learning @@ -326,7 +326,7 @@ Distributional phrase structure induction DanKlein - Christopher D.Manning + Christopher D.Manning W01-0714 klein-manning-2001-distributional @@ -339,8 +339,8 @@ Learning to identify animate references - ConstantinOrasan - RichardEvans + ConstantinOrasan + RichardEvans W01-0716 orasan-evans-2001-learning @@ -359,8 +359,8 @@ Combining linguistic and machine learning techniques for email summarization SmarandaMuresan - EvelyneTzoukermann - Judith L.Klavans + EvelyneTzoukermann + Judith L.Klavans W01-0719 muresan-etal-2001-combining @@ -373,7 +373,7 @@ Boosted decision graphs for <fixed-case>NLP</fixed-case> learning tasks - Jon D.Patrick + Jon D.Patrick IshaanGoyal W01-0721 patrick-goyal-2001-boosted @@ -386,13 +386,13 @@ Using <fixed-case>ALL</fixed-case>i<fixed-case>S</fixed-case> for clausing - HervéDéjean + HervéDéjean W01-0723 dejean-2001-using Memory-based clause identification - Erik F.Tjong Kim Sang + Erik F.Tjong Kim Sang W01-0724 tjong-kim-sang-2001-memory @@ -406,7 +406,7 @@ Boosting trees for clause splitting XavierCarreras - LluísMàrquez + LluísMàrquez W01-0726 carreras-marquez-2001-boosting @@ -417,7 +417,7 @@ Association for Computational Linguistics
Toulouse, France
HelmutHoracek - NicolasNicolov + NicolasNicolov LeoWanner 2001 enlg @@ -428,13 +428,13 @@ Corpus-Based Methods in Natural Language Generation: <fixed-case>F</fixed-case>riends or Foe? (invited talk) - OwenRambow + OwenRambow W01-0801 rambow-2001-corpus A Two-Staged Model For Content Determination - Somayajula G.Sripada + Somayajula G.Sripada EhudReiter JimHunter JinYu @@ -451,14 +451,14 @@ Logical Form Equivalence: the Case of Referring Expressions Generation - Keesvan Deemter + Keesvan Deemter Magnús M.Halldórsson W01-0804 van-deemter-halldorsson-2001-logical A Meta-Algorithm for the Generation of Referring Expressions - EmielKrahmer + EmielKrahmer Sebastiaanvan Erk AndréVerleg W01-0805 @@ -496,9 +496,9 @@ Linear Order as Higher-Level Decision: Information Structure in Strategic and Tactical Generation - Geert-Jan M.Kruijff - IvanaKruijff-Korbayovà - JohnBateman + Geert-Jan M.Kruijff + IvanaKruijff-Korbayovà + JohnBateman ElkeTeich W01-0810 kruijff-etal-2001-linear @@ -511,7 +511,7 @@ Reusing a Statistical Language Model for Generation - KevinHumphreys + KevinHumphreys MikeCalcagno DavidWeise W01-0812 @@ -520,8 +520,8 @@ Applying Natural Language Generation to Indicative Summarization Min-YenKan - Kathleen R.McKeown - Judith L.Klavans + Kathleen R.McKeown + Judith L.Klavans W01-0813 kan-etal-2001-applying @@ -534,8 +534,8 @@ Evaluating Text Quality: Judging Output Texts Without a Clear Source - AnthonyHartley - DoniaScott + AnthonyHartley + DoniaScott W01-0815 hartley-scott-2001-evaluating @@ -552,7 +552,7 @@ Introduction - PatrickParoubek + PatrickParoubek W01-0901 paroubek-2001-introduction @@ -564,8 +564,8 @@
Usability Evaluation in Spoken Language Dialogue Systems - LailaDybkjær - Niels O.Bernsen + LailaDybkjær + Niels O.Bernsen W01-0903 dybkjaer-bernsen-2001-usability @@ -587,14 +587,14 @@ Verification and validation of language processing systems: Is it evaluation? ValerieBarr - Judith L.Klavans + Judith L.Klavans W01-0906 barr-klavans-2001-verification The <fixed-case>ARC</fixed-case> A3 Project: Terminology Acquisition Tools: Evaluation Method and Task - Widad Mustafa ElHadi - IsmailTimimi + Widad Mustafa ElHadi + IsmailTimimi AnnetteBeguin Marciliode Brito W01-0907 @@ -608,7 +608,7 @@ A Cross-Comparison of Two Clustering Methods - MicheleJardino + MicheleJardino BrigitteGrau OlivierFerret W01-0909 @@ -633,7 +633,7 @@ Human Language Technologies for Knowledge Management - MarkMaybury + MarkMaybury W01-1002 maybury-2001-human @@ -645,21 +645,21 @@
Using <fixed-case>HLT</fixed-case> for Acquiring, Retrieving and Publishing Knowledge in <fixed-case>AKT</fixed-case> - KalinaBontcheva + KalinaBontcheva ChristopherBrewster - FabioCiravegna - HamishCunningham - LouiseGuthrie - RobertGaizauskas - YorickWilks + FabioCiravegna + HamishCunningham + LouiseGuthrie + RobertGaizauskas + YorickWilks W01-1004 bontcheva-etal-2001-using Identification of Relevant Terms to Support the Construction of Domain Ontologies - PaolaVelardi + PaolaVelardi MicheleMissikoff - RobertoBasili + RobertoBasili W01-1005 velardi-etal-2001-identification @@ -673,7 +673,7 @@ The Form is the Substance: Classification of Genres in Text NigelDewdney - CarolVanEss-Dykema + CarolVanEss-Dykema RichardMacMillan W01-1007 dewdney-etal-2001-form @@ -700,44 +700,44 @@ <fixed-case>GIST</fixed-case>-<fixed-case>IT</fixed-case>: Combining Linguistic and Machine Learning Techniques for Email Summarization - EvelyneTzoukermann + EvelyneTzoukermann SmarandaMuresan - Judith L.Klavans + Judith L.Klavans W01-1011 tzoukermann-etal-2001-gist What are the points? What are the stances? Decanting for Question-driven Retrieval and Executive Summarization - Jean-FrançoisDelannoy + Jean-FrançoisDelannoy W01-1012 delannoy-2001-points Multilingual Authoring: the <fixed-case>NAMIC</fixed-case> Approach - RobertoBasili - Maria TeresaPazienza - Fabio MassimoZanzotto - RobertaCatizone - AndreaSetzer - NickWebb - YorickWilks - LluísPadró - GermanRigau + RobertoBasili + Maria TeresaPazienza + Fabio MassimoZanzotto + RobertaCatizone + AndreaSetzer + NickWebb + YorickWilks + LluísPadró + GermanRigau W01-1013 basili-etal-2001-multilingual Automatic Augmentation of Translation Dictionary with Database Terminologies In Multilingual Query Interpretation HodongLee - Jong C.Park + Jong C.Park W01-1014 lee-park-2001-automatic Adapting and Extending Lexical Resources in a Dialogue System - AnaGarcía-Serrano + AnaGarcía-Serrano PalomaMartínez - LuisRodrigo + LuisRodrigo W01-1015 garcia-serrano-etal-2001-adapting @@ -750,14 +750,14 @@ The Automatic Generation of Formal Annotations in a Multimedia Indexing and Searching Environment ThierryDeclerck - PeterWittenburg - HamishCunningham + PeterWittenburg + HamishCunningham W01-1017 declerck-etal-2001-automatic Human Language Technology and Knowledge Management - Final Roadmap Session - Niels OleBernsen + Niels OleBernsen W01-1018 bernsen-2001-human @@ -784,12 +784,12 @@ Looking Under the Hood: Tools for Diagnosing Your Question Answering Engine - EricBreck + EricBreck MarcLight - GideonMann - EllenRiloff + GideonMann + EllenRiloff BrianneBrown - PranavAnand + PranavAnand W01-1201 breck-etal-2001-looking @@ -797,8 +797,8 @@ <fixed-case>MAYA</fixed-case>: A Fast Question-answering System Based on a Predictive Answer Indexer HarksooKim KyungsunKim - Gary GeunbaeLee - JungyunSeo + Gary GeunbaeLee + JungyunSeo W01-1202 kim-etal-2001-maya
@@ -810,20 +810,20 @@
A Statistical Method for Short Answer Extraction - GideonMann + GideonMann W01-1204 mann-2001-statistical Towards Ontological Question Answering - RemiZajac + RemiZajac W01-1205 zajac-2001-towards Answer Mining from On-Line Documents - MariusPasca - SandaHarabagiu + MariusPasca + SandaHarabagiu W01-1206 pasca-harabagiu-2001-answer @@ -872,9 +872,9 @@
Towards Invariant Meanings Of Spatial Prepositions and Preverbs - Jean-PierreDesclés + Jean-PierreDesclés EwaGwiazdecka - AzucenaMontes-Rendon + AzucenaMontes-Rendon W01-1303 descles-etal-2001-towards @@ -887,7 +887,7 @@ A Model For Processing Temporal References In <fixed-case>C</fixed-case>hinese WenjieLi - Kam-FaiWong + Kam-FaiWong ChunfaYuan W01-1305 li-etal-2001-model @@ -915,20 +915,20 @@ From Temporal Expressions To Temporal Information: Semantic Tagging Of News Messages FrankSchilder - ChristopherHabel + ChristopherHabel W01-1309 schilder-habel-2001-temporal Some Facts About Times, Events and Subjects - Invited Talk - FabioPianesi + FabioPianesi W01-1310 pianesi-2001-facts A Pilot Study On Annotating Temporal Relations In Text - AndreaSetzer - RobertGaizauskas + AndreaSetzer + RobertGaizauskas W01-1311 setzer-gaizauskas-2001-pilot @@ -936,7 +936,7 @@ A Multilingual Approach To Annotating And Extracting Temporal Information GeorgeWilson InderjeetMani - BethSundheim + BethSundheim LisaFerro W01-1312 wilson-etal-2001-multilingual @@ -944,7 +944,7 @@ Assigning Time-Stamps To Event-Clauses ElenaFilatova - EduardHovy + EduardHovy W01-1313 filatova-hovy-2001-assigning @@ -956,7 +956,7 @@ The Annotation Of Temporal Information In Natural Language Sentences - GrahamKatz + GrahamKatz FabrizioArosio W01-1315 katz-arosio-2001-annotation @@ -974,14 +974,14 @@ Example-based machine translation using <fixed-case>DP</fixed-case>-matching between work sequences - EiichiroSumita + EiichiroSumita W01-1401 sumita-2001-example Overcoming the customization bottleneck using example-based <fixed-case>MT</fixed-case> Stephen D.Richardson - William B.Dolan + William B.Dolan ArulMenezes MonicaCorston-Oliver W01-1402 @@ -989,8 +989,8 @@ Inducing Lexico-Structural Transfer Rules from Parsed Bi-texts - BenoitLavoie - MichaelWhite + BenoitLavoie + MichaelWhite TanyaKorelsky W01-1403 lavoie-etal-2001-inducing @@ -1003,7 +1003,7 @@ Stochastic Modelling: From Pattern Classification to Language Translation - HermannNey + HermannNey W01-1405 ney-2001-stochastic @@ -1016,16 +1016,16 @@ Toward hierarchical models for statistical machine translation of inflected languages - SonjaNiessen - HermannNey + SonjaNiessen + HermannNey W01-1407 niessen-ney-2001-toward An Efficient <fixed-case>A</fixed-case>* Search Algorithm for Statistical Machine Translation - Franz JosefOch + Franz JosefOch NicolaUeffing - HermannNey + HermannNey W01-1408 och-etal-2001-efficient @@ -1037,20 +1037,20 @@
Machine Translation with Grammar Association: Some Improvements and the <fixed-case>L</fixed-case>oco_<fixed-case>C</fixed-case> Model - FedericoPrat + FedericoPrat W01-1410 prat-2001-machine Towards a Simple and Accurate Statistical Approach to Learning Translation Relationships among Words - Robert C.Moore + Robert C.Moore W01-1411 moore-2001-towards A Comparative Study on Translation Units for Bilingual Lexicon Extraction KaoruYamamoto - YujiMatsumoto + YujiMatsumoto MihokoKitamura W01-1412 yamamoto-etal-2001-comparative @@ -1092,7 +1092,7 @@ Preface - MikeRosner + MikeRosner W01-1501 rosner-2001-preface @@ -1104,8 +1104,8 @@ The <fixed-case>TELRI</fixed-case> tool catalogue: structure and prospects - TomažErjavec - TamásVáradi + TomažErjavec + TamásVáradi W01-1503 erjavec-varadi-2001-telri @@ -1117,7 +1117,7 @@
<fixed-case>S</fixed-case>i<fixed-case>SSA</fixed-case> - An Infrastructure for <fixed-case>NLP</fixed-case> Application Development - AlbertoLavelli + AlbertoLavelli F.Pianesi E.Maci I.Prodanof @@ -1135,26 +1135,26 @@ International Standards for Multilingual Resource Sharing: The <fixed-case>ISLE</fixed-case> Computational Lexicon Working Group - NicolettaCalzolari + NicolettaCalzolari AlessandroLenci - AntonioZampolli + AntonioZampolli W01-1507 calzolari-etal-2001-international Multimedia Language Resources - DaanBroeder - PeterWittenburg + DaanBroeder + PeterWittenburg W01-1508 broeder-wittenburg-2001-multimedia Tools and resources for <fixed-case>T</fixed-case>ree <fixed-case>A</fixed-case>djoining <fixed-case>G</fixed-case>rammars - FrançoisBarthélemy + FrançoisBarthélemy PierreBouiller PhilippeDeschamp LindaKaouane - ÉricVillemonte de la Clergerie + ÉricVillemonte de la Clergerie W01-1509 barthelemy-etal-2001-tools @@ -1163,14 +1163,14 @@ NaokiYoshinaga YusukeMiyao KentaroTorisawa - Jun’ichiTsujii + Jun’ichiTsujii W01-1510 yoshinaga-etal-2001-resource
Covering Treebanks with <fixed-case>GLARF</fixed-case> A.Meyers - RalphGrishman + RalphGrishman MichikoKosaka ShubinZhao W01-1511 @@ -1179,9 +1179,9 @@ Using an Open-Source Unification-Based System for <fixed-case>CL</fixed-case>/<fixed-case>NLP</fixed-case> Teaching AnneCopestake - JohnCarroll - DanFlickinger - RobertMalouf + JohnCarroll + DanFlickinger + RobertMalouf StephanOepen W01-1512 copestake-etal-2001-using @@ -1196,7 +1196,7 @@ Annotation Graphs and Servers and Multi-Modal Resources: Infrastructure for Interdisciplinary Education, Research and Development - ChristopherCieri + ChristopherCieri StevenBird W01-1514 cieri-bird-2001-annotation @@ -1223,22 +1223,22 @@ Annotations and Tools for an Activity Based Spoken Language Corpus JensAllwood - LeifGroenqvist - ElisabethAhlsen + LeifGroenqvist + ElisabethAhlsen MagnusGunnarsson W01-1601 allwood-etal-2001-annotations Variant Transduction: A Method for Rapid Development of Interactive Spoken Interfaces - HiyanAlshawi + HiyanAlshawi ShonaDouglas W01-1602 alshawi-douglas-2001-variant Development of a Machine Learnable Discourse Tagging Tool - MasahiroAraki + MasahiroAraki YukihikoKimura TakuyaNishimoto YasuhisaNiimi @@ -1267,10 +1267,10 @@ Comparing Several Aspects of Human-Computer and Human-Human Dialogues - ChristineDoran + ChristineDoran JohnAberdeen - LaurieDamianos - LynetteHirschman + LaurieDamianos + LynetteHirschman W01-1607 doran-etal-2001-comparing @@ -1279,7 +1279,7 @@ ChristianEbert ShalomLappin HowardGregory - NicolasNicolov + NicolasNicolov W01-1608 ebert-etal-2001-generating @@ -1295,22 +1295,22 @@ Labeling Corrections and Aware Sites in Spoken Dialogue Systems - JuliaHirschberg + JuliaHirschberg MarcSwerts - DianeLitman + DianeLitman W01-1610 hirschberg-etal-2001-labeling Confidence-Based Adaptivity in Response Generation for a Spoken Dialogue System - KristiinaJokinen - GrahamWilcock + KristiinaJokinen + GrahamWilcock W01-1611 jokinen-wilcock-2001-confidence Annotating Anaphoric and Bridging Relations with <fixed-case>MMAX</fixed-case> - ChristophMueller + ChristophMueller MichaelStrube W01-1612 mueller-strube-2001-annotating @@ -1329,8 +1329,8 @@ Integration of Referential Scope Limitations into <fixed-case>J</fixed-case>apanese Pronoun Resolution - MichaelPaul - EiichiroSumita + MichaelPaul + EiichiroSumita W01-1615 paul-sumita-2001-integration @@ -1338,43 +1338,43 @@ On the Means for Clarification in Dialogue MatthewPurver JonathanGinzburg - PatrickHealey + PatrickHealey W01-1616 purver-etal-2001-means
Plug and Play Speech Understanding - MannyRayner + MannyRayner IanLewin - GenevieveGorrell + GenevieveGorrell JohanBoye W01-1617 rayner-etal-2001-plug Designing Confirmation Mechanisms and Error Recover Techniques in a Railway Information System for <fixed-case>S</fixed-case>panish - RubenSan-Segundo - Juan ManuelMontero - Jose ManuelPardo + RubenSan-Segundo + Juan ManuelMontero + Jose ManuelPardo W01-1618 san-segundo-etal-2001-designing A Telephone-Based Railway Information System for <fixed-case>S</fixed-case>panish: Development of a Methodology for Spoken Dialogue Design - RubenSan-Segundo - Juan M.Montero + RubenSan-Segundo + Juan M.Montero Juana M.Guitierrez - AscensionGallardo + AscensionGallardo Jose D.Romeral - Jose M.Pardo + Jose M.Pardo W01-1619 san-segundo-etal-2001-telephone A Hybrid Approach to the Development of Dialogue Systems directed by Semantics - EmilioSanchis + EmilioSanchis IsabelGaliano - FernandoGarcia + FernandoGarcia AntonioCano W01-1620 sanchis-etal-2001-hybrid @@ -1382,13 +1382,13 @@ Reconciling Initiative and Discourse Structure Susan E.Strayer - Peter A.Heeman + Peter A.Heeman W01-1621 strayer-heeman-2001-reconciling Adding Extra Input/Output Modalities to a Spoken Dialogue System - JanienkeSturm + JanienkeSturm FusiWang BertCranen W01-1622 @@ -1415,17 +1415,17 @@ A Corpus Study of Evaluative and Speculative Language - JanyceWiebe - RebeccaBruce + JanyceWiebe + RebeccaBruce MatthewBell - MelanieMartin + MelanieMartin TheresaWilson W01-1626 wiebe-etal-2001-corpus Dialogue Tagsets in Oncology - Mary McGeeWood + Mary McGeeWood W01-1627 wood-2001-dialogue @@ -1437,7 +1437,7 @@ Spoken Dialogue Control Based on a Turn-minimization Criterion Depending on the Speech Recognition Accuracy - NorihiYasuda + NorihiYasuda KohjiDohsaka KiyoakiAikawa W01-1629 @@ -1447,7 +1447,7 @@ Proceedings of the 13th Nordic Conference of Computational Linguistics (NODALIDA 2001) - Anna SågvallHein + Anna SågvallHein Department of Linguistics, Uppsala University, Sweden
Uppsala, Sweden
May @@ -1466,7 +1466,7 @@
The <fixed-case>VISL</fixed-case> System: Research and applicative aspects of <fixed-case>IT</fixed-case>-based learning - EckhardBick + EckhardBick W01-1702 bick-2001-visl @@ -1481,14 +1481,14 @@
The interaction between local focusing structure and global intentions in spoken discourse - SofiaGustafson-Capková + SofiaGustafson-Capková W01-1704 gustafson-capkova-2001-interaction Some problems related to the development of a grammar checker KristinHagen - Janne BondiJohannessen + Janne BondiJohannessen PiaLane W01-1705 hagen-etal-2001-problems @@ -1501,9 +1501,9 @@ On Ambiguity in <fixed-case>I</fixed-case>nternet Searches - Gordana IlicHolen + Gordana IlicHolen Jannevon Koss Torkildsen - Janne BondiJohannessen + Janne BondiJohannessen W01-1707 holen-etal-2001-ambiguity @@ -1515,7 +1515,7 @@
En automatisk navnegjenkjenner for norsk, svensk og dansk - Janne BondiJohannessen + Janne BondiJohannessen W01-1709 johannessen-2001-en @@ -1528,7 +1528,7 @@ Clustering dialogue knowledge with self-organizing maps MauriKaipainen - KristiinaJokinen + KristiinaJokinen TimoKoskenniemi AnttiKerminen KariKanto @@ -1546,20 +1546,20 @@ Corpus-Based Extension of Semantic Lexicons in Large Scale DimitriosKokkinakis - MariaToporowska Gronostaj + MariaToporowska Gronostaj KarinWarmenius W01-1713 kokkinakis-etal-2001-corpus Transformation-Based Learning of Rules for Constraint Grammar Tagging - TorbjörnLager + TorbjörnLager W01-1714 lager-2001-transformation Data-Driven Methods for <fixed-case>P</fixed-case>o<fixed-case>S</fixed-case> Tagging and Chunking of <fixed-case>S</fixed-case>wedish - BeátaMegyesi + BeátaMegyesi W01-1715 megyesi-2001-data @@ -1572,7 +1572,7 @@ Towards multimodal public information systems MagnusMerkel - ArneJönsson + ArneJönsson W01-1717 merkel-jonsson-2001-towards @@ -1603,13 +1603,13 @@ Detecting Grammar Errors in Children’s Writing: A Finite State Approach - SylvanaSofkova Hashemi + SylvanaSofkova Hashemi W01-1722 sofkova-hashemi-2001-detecting <fixed-case>U</fixed-case>plug<fixed-case>W</fixed-case>eb–Corpus Tools on the Web - JörgTiedemann + JörgTiedemann W01-1723 tiedemann-2001-uplugweb @@ -1621,7 +1621,7 @@
Towards a Discourse-Oriented Representation of Information Structure in <fixed-case>HPSG</fixed-case> - GrahamWilcock + GrahamWilcock W01-1725 wilcock-2001-towards @@ -1641,21 +1641,21 @@ Issues in Extracting Information from the Web - William W.Cohen + William W.Cohen W01-1801 3 cohen-2001-issues Parameter Estimation for Statistical Parsing Models: Theory and Practice of - MichaelCollins + MichaelCollins W01-1802 4–15 collins-2001-parameter The <fixed-case>XTAG</fixed-case> Project at <fixed-case>P</fixed-case>enn - Aravind K.Joshi + Aravind K.Joshi W01-1803 16–27 joshi-2001-xtag @@ -1663,15 +1663,15 @@ Probabilistic Modelling of Island-Driven Parsing AliciaAgeno - HoracioRodríguez + HoracioRodríguez W01-1804 31–41 ageno-rodriguez-2001-probabilistic Bidirectional Automata for <fixed-case>T</fixed-case>ree <fixed-case>A</fixed-case>djoining <fixed-case>G</fixed-case>rammars - Miguel A.Alonso - Víctor J.Díaz + Miguel A.Alonso + Víctor J.Díaz ManuelVilares W01-1805 42–53 @@ -1695,7 +1695,7 @@ High Precision Extraction of Grammatical Relations JohnCarrol - TedBriscoe + TedBriscoe W01-1808 78–89 carrol-briscoe-2001-high @@ -1719,7 +1719,7 @@ Grammar Induction by <fixed-case>MDL</fixed-case>-Based Distributional Classification YikunGuo FuliangWeng - LideWu + LideWu W01-1811 112–122 guo-etal-2001-grammar @@ -1727,7 +1727,7 @@ Parsing and Hypergraphs DanKlein - Christopher D.Manning + Christopher D.Manning W01-1812 123–134 klein-manning-2001-parsing @@ -1735,9 +1735,9 @@ Automatic Detection of Prosody Phrase Boundaries for Text-to-Speech System XinLv - Tie-junZhao + Tie-junZhao Zhan-yiLiu - Mu-yunYang + Mu-yunYang W01-1813 135–141 lv-etal-2001-automatic @@ -1752,7 +1752,7 @@ Unsupervised <fixed-case>POS</fixed-case>-Tagging Improves Parsing Accuracy and Parsing Efficiency RobbertPrins - Gertjanvan Noord + Gertjanvan Noord W01-1815 154–165 prins-van-noord-2001-unsupervised @@ -1760,7 +1760,7 @@ Parsing the <fixed-case>CHILDES</fixed-case> Database: Methodology and Lessons Learned KenjiSagae - AlonLavie + AlonLavie BrianMacWhinney W01-1816 166–176 @@ -1768,7 +1768,7 @@ Robust Data Oriented Parsing of Speech Utterances - KhalilSima’an + KhalilSima’an W01-1817 177–188 simaan-2001-robust @@ -1785,7 +1785,7 @@ A Multi-Input Dependency Parser - SalahAït-Mokhtar + SalahAït-Mokhtar Jean-PierreChanod ClaudeRoux W01-1819 @@ -1809,8 +1809,8 @@ An Approach to Parsing <fixed-case>V</fixed-case>ietnamese Noun Compounds - DinhDien - HoangKiem + DinhDien + HoangKiem W01-1822 213–216 dien-kiem-2001-approach @@ -1818,7 +1818,7 @@ The Implementation Process of a Statistical Parser for <fixed-case>B</fixed-case>razilian <fixed-case>P</fixed-case>ortuguese AndréiaGentil Bonfante - Mariadas Graças Volpe Nunes + Mariadas Graças Volpe Nunes W01-1823 217–220 gentil-bonfante-das-gracas-volpe-nunes-2001-implementation @@ -1826,14 +1826,14 @@ Efficient Sentence Parsing with Language Specific Features: A Case Study of <fixed-case>C</fixed-case>zech AlešHorák - PavelSmrž + PavelSmrž W01-1824 221–224 horak-smrz-2001-efficient Efficient Incremental Dependency Parsing - YoshihideKato + YoshihideKato ShigekiMatsubara KatsuhikoToyama YasuyoshiInagaki @@ -1845,7 +1845,7 @@ Automatic Grammar Partitioning for Syntactic Parsing Po ChuiLuk FuliangWeng - HelenMeng + HelenMeng W01-1826 229–232 luk-etal-2001-automatic @@ -1862,10 +1862,10 @@ Word-Order Relaxations & Restrictions within a Dependency Grammar - MartinPlátek - TomášHolan - VladislavKuboň - KarelOliva + MartinPlátek + TomášHolan + VladislavKuboň + KarelOliva W01-1828 237–240 platek-etal-2001-word @@ -1894,7 +1894,7 @@ How Much Will a <fixed-case>RE</fixed-case>-Based Preprocessor Help a Statistical Parser? - DanielZeman + DanielZeman W01-1832 253–256 zeman-2001-much diff --git a/data/xml/W02.xml b/data/xml/W02.xml index 3510b4dd05..663a203f82 100644 --- a/data/xml/W02.xml +++ b/data/xml/W02.xml @@ -15,7 +15,7 @@ Teaching <fixed-case>NLP</fixed-case>/<fixed-case>CL</fixed-case> through Games: the Case of Parsing - Hansvan Halteren + Hansvan Halteren 10.3115/1118108.1118109 1–9 W02-0101 @@ -23,7 +23,7 @@ An Interactive Spreadsheet for Teaching the Forward-Backward Algorithm - JasonEisner + JasonEisner 10.3115/1118108.1118110 10–18 W02-0102 @@ -31,7 +31,7 @@ A Web-based Instructional Platform for Contraint-Based Grammar Formalisms and Parsing - W. DetmarMeurers + W. DetmarMeurers GeraldPenn FrankRichter 10.3115/1118108.1118111 @@ -42,7 +42,7 @@ Evangelising Language Technology: A Practically-Focussed Undergraduate Program RobertDale - DiegoMollá Aliod + DiegoMollá Aliod RolfSchwitter 10.3115/1118108.1118112 27–32 @@ -59,10 +59,10 @@ Design and Evolution of a Language Technologies Curriculum - RobertFrederking - Eric H.Nyberg + RobertFrederking + Eric H.Nyberg TerukoMitamura - Jaime G.Carbonell + Jaime G.Carbonell 10.3115/1118108.1118114 39–45 W02-0106 @@ -78,8 +78,8 @@ Using <fixed-case>GATE</fixed-case> as an Environment for Teaching <fixed-case>NLP</fixed-case> - KalinaBontcheva - HamishCunningham + KalinaBontcheva + HamishCunningham ValentinTablan DianaMaynard OanaHamza @@ -115,9 +115,9 @@ Teaching Computational Linguistics at the <fixed-case>U</fixed-case>niversity of <fixed-case>T</fixed-case>artu: Experience, Perspectives and Challenges - MareKoit - TiitRoosmaa - HaldurÕim + MareKoit + TiitRoosmaa + HaldurÕim 10.3115/1118108.1118120 85–90 W02-0112 @@ -139,8 +139,8 @@ Synchronization in an Asynchronous Agent-based architecture for Dialogue Systems - NateBlaylock - JamesAllen + NateBlaylock + JamesAllen GeorgeFerguson 10.3115/1118121.1118122 1–02 @@ -158,7 +158,7 @@ Non-Sentential Utterances in Dialogue: A: Corpus-Based Study - RaquelFernandez + RaquelFernandez JonathanGinzburg 10.3115/1118121.1118124 15–26 @@ -167,8 +167,8 @@ A Semantic Account of Adverbials as Discourse Connectives - KateForbes - BonnieWebber + KateForbes + BonnieWebber 10.3115/1118121.1118125 27–36 W02-0204 @@ -176,7 +176,7 @@ <fixed-case>MUP</fixed-case> - The <fixed-case>UIC</fixed-case> Standoff Markup Tool - MichaelGlass + MichaelGlass BarbaraDi Eugenio 10.3115/1118121.1118126 37–41 @@ -185,7 +185,7 @@ An Experiment to evaluate the effectiveness of cross-media cues in computer media - NancyGreen + NancyGreen 10.3115/1118121.1118127 42–45 W02-0206 @@ -203,7 +203,7 @@ <fixed-case>D</fixed-case>ialogue<fixed-case>V</fixed-case>iew - An Annotation Tool for Dialogue - Peter A.Heeman + Peter A.Heeman FanYang Susan E.Strayer 10.3115/1118121.1118129 @@ -214,8 +214,8 @@ A Flexible Framework for Developing Mixed-Initiative Dialog Systems JudithHochberg - NandaKambhatla - SalimRoukos + NandaKambhatla + SalimRoukos 10.3115/1118121.1118130 60–63 W02-0209 @@ -223,11 +223,11 @@ Adaptive Dialogue Systems - Interaction with Interact - KristiinaJokinen + KristiinaJokinen AnttiKerminen TommiLagus JukkaKuusisto - GrahamWilcock + GrahamWilcock MarkkuTurunen JaakkoHakulinen KristaJauhiainen @@ -238,7 +238,7 @@ Discourse Processing for Explanatory Essays in Tutorial Applications - Pamela W.Jordan + Pamela W.Jordan KurtVanLehn 10.3115/1118121.1118132 74–83 @@ -248,7 +248,7 @@ Conditional responses in information-seeking dialogues ElenaKaragjosova - IvanaKruijff-Korbayova + IvanaKruijff-Korbayova 10.3115/1118121.1118133 84–87 W02-0212 @@ -304,7 +304,7 @@ Dialogue Macrogame Theory - WilliamMann + WilliamMann 10.3115/1118121.1118139 129–141 W02-0218 @@ -330,7 +330,7 @@ Training a Dialogue Act Tagger for Human-human and Human-computer Travel dialogues RashmiPrasad - MarilynWalker + MarilynWalker 10.3115/1118121.1118142 162–173 W02-0221 @@ -346,7 +346,7 @@ A Dialog Architecture for Military Story Capture - RonnieSmith + RonnieSmith BrianManning JonRogers BrianAdams @@ -368,7 +368,7 @@ Rare Dialogue Acts in Oncology Consultations - Mary McGeeWood + Mary McGeeWood RichardCraggs IanFletcher PeterMaguire @@ -413,10 +413,10 @@ Tuning support vector machines for biomedical named entity recognition - Jun’ichiKazama + Jun’ichiKazama TakakiMakino YoshihiroOhta - Jun’ichiTsujii + Jun’ichiTsujii 10.3115/1118149.1118150 1–8 W02-0301 @@ -433,10 +433,10 @@ Contrast and variability in gene names - K. BretonnelCohen + K. BretonnelCohen AndrewDolbey GeorgeAcquaah-Mensah - LawrenceHunter + LawrenceHunter 10.3115/1118149.1118152 14–20 W02-0303 @@ -444,7 +444,7 @@ Accenting unknown words in a specialized language - PierreZweigenbaum + PierreZweigenbaum NataliaGrabar 10.3115/1118149.1118153 21–28 @@ -473,7 +473,7 @@ Enhanced natural language access to anatomically-indexed data GailSinclair - BonnieWebber + BonnieWebber DuncanDavidson 10.3115/1118149.1118156 45–52 @@ -483,7 +483,7 @@ Unsupervised,corpus-based method for extending a biomedical terminology OlivierBodenreider - ThomasRindflesch + ThomasRindflesch AnitaBurgun 10.3115/1118149.1118157 53–60 @@ -492,7 +492,7 @@ Biomedical text retrieval in languages with a complex morphology - StefanSchultz + StefanSchultz MartinHoneck UdoHahn 10.3115/1118149.1118158 @@ -502,7 +502,7 @@ Analyzing the Semantics of patient data to rank records of literature retrieval - EneidaMendonca + EneidaMendonca StephenJohnson Yoon-hoSeol JamesCimino @@ -513,8 +513,8 @@ Utilizing text mining results: The Pasta Web System - GeorgeDemetriou - RobertGaizauskas + GeorgeDemetriou + RobertGaizauskas 10.3115/1118149.1118160 77–84 W02-0311 @@ -522,9 +522,9 @@ <fixed-case>M</fixed-case>edstract: creating large-scale information servers from biomedical texts - JamesPustejovsky - JoséCastaño - RoserSaurí + JamesPustejovsky + JoséCastaño + RoserSaurí JasonZhang WeiLuo 10.3115/1118149.1118161 @@ -556,8 +556,8 @@ Selecting sentences for multidocument summaries using randomized local search - MichaelWhite - ClaireCardie + MichaelWhite + ClaireCardie 10.3115/1118162.1118164 9–18 W02-0402 @@ -566,9 +566,9 @@ Using a text engineering framework to build an extendable and portable <fixed-case>IE</fixed-case>-based summarisation system DianaMaynard - KalinaBontcheva + KalinaBontcheva HoracioSaggion - HamishCunningham + HamishCunningham OanaHamza 10.3115/1118162.1118165 19–26 @@ -577,8 +577,8 @@ Revisions that improve cohesion in multi-document summaries: a preliminary study - Jahna C.Otterbacher - Dragomir R.Radev + Jahna C.Otterbacher + Dragomir R.Radev AirongLuo 10.3115/1118162.1118166 27–44 @@ -595,8 +595,8 @@ Manual and automatic evaluation of summaries - Chin-YewLin - EduardHovy + Chin-YewLin + EduardHovy 10.3115/1118162.1118168 45–51 W02-0406 @@ -635,7 +635,7 @@ Acquisition System for <fixed-case>A</fixed-case>rabic Noun Morphology SaleemAbuleil KhalidAlsamara - MarthaEvens + MarthaEvens 10.3115/1118637.1118640 W02-0503 abuleil-etal-2002-acquisition @@ -649,7 +649,7 @@ Machine Transliteration of Names in <fixed-case>A</fixed-case>rabic Texts - YaserAl-Onaizan + YaserAl-Onaizan KevinKnight 10.3115/1118637.1118642 W02-0505 @@ -666,8 +666,8 @@ <fixed-case>QARAB</fixed-case>: A: Question Answering System to Support the <fixed-case>A</fixed-case>rabic Language BassamHammo HaniAbu-Salem - StevenLytinen - MarthaEvens + StevenLytinen + MarthaEvens 10.3115/1118637.1118644 W02-0507 hammo-etal-2002-qarab @@ -704,7 +704,7 @@ Unsupervised Learning of Morphology for Building Lexicon for a Highly Inflectional Language UtpalSharma - JugalKalita + JugalKalita RajibDas 10.3115/1118647.1118648 1–10 @@ -713,8 +713,8 @@ Unsupervised Learning of Morphology Using a Novel Directed Search Algorithm: Taking the First Step - Matthew G.Snover - Gaja E.Jarosz + Matthew G.Snover + Gaja E.Jarosz Michael R.Brent 10.3115/1118647.1118649 11–20 @@ -733,7 +733,7 @@ Unsupervised Learning of Morphology Without Morphemes SylvainNeuvel - Sean A.Fulop + Sean A.Fulop 10.3115/1118647.1118651 31–40 W02-0604 @@ -750,9 +750,9 @@ Unsupervised discovery of morphologically related words based on orthographic and semantic similarity - MarcoBaroni + MarcoBaroni JohannesMatiasek - HaraldTrost + HaraldTrost 10.3115/1118647.1118653 48–57 W02-0606 @@ -790,7 +790,7 @@ Corpus-Centered Computation - EiichiroSumita + EiichiroSumita 10.3115/1118656.1118657 1–8 W02-0701 @@ -800,7 +800,7 @@ Topic Detection Based on Dialogue History TakayukiNakata TakahiroIkeda - ShinichiAndo + ShinichiAndo AkitoshiOkumura 10.3115/1118656.1118658 9–14 @@ -810,10 +810,10 @@ Spoken Language Parsing Using Phrase-Level Grammars and Trainable Classifiers ChadLangley - AlonLavie - LoriLevin + AlonLavie + LoriLevin DorcasWallace - DonnaGates + DonnaGates KayPeterson 10.3115/1118656.1118659 15–22 @@ -823,7 +823,7 @@ Finding Translation Pairs from <fixed-case>E</fixed-case>nglish-<fixed-case>J</fixed-case>apanese Untokenized Aligned Corpora GenichiroKikui - HirofumiYamamoto + HirofumiYamamoto 10.3115/1118656.1118660 23–30 W02-0704 @@ -831,7 +831,7 @@ Speech Translation Performance of Statistical Dependency Transduction and Semantic Similarity Transduction - HiyanAlshawi + HiyanAlshawi ShonaDouglas 10.3115/1118656.1118661 31–38 @@ -840,9 +840,9 @@ Architectures for Speech-to-Speech Translation Using Finite-state Models - FranciscoCasacuberta - EnriqueVidal - Juan MiguelVilar + FranciscoCasacuberta + EnriqueVidal + Juan MiguelVilar 10.3115/1118656.1118662 39–44 W02-0706 @@ -852,7 +852,7 @@ Evaluation of Direct Speech Translation Method Using Inductive Learning for Conversations in the Travel Domain KojiMurakami MakotoHiroshige - KenjiAraki + KenjiAraki KojiTochinai 10.3115/1118656.1118663 45–52 @@ -861,14 +861,14 @@ Balancing Expressiveness and Simplicity in an Interlingua for Task Based Dialogue - LoriLevin - DonnaGates + LoriLevin + DonnaGates DorcasPianta RoldanoCattoni - NadiaMana + NadiaMana KayPeterson - AlonLavie - FabioPianesi + AlonLavie + FabioPianesi 10.3115/1118656.1118664 53–60 W02-0708 @@ -876,7 +876,7 @@ Interactive <fixed-case>C</fixed-case>hinese-to-<fixed-case>E</fixed-case>nglish Speech Translation Based on Dialogue Management - ChengqingZong + ChengqingZong BoXu TaiyiHuang 10.3115/1118656.1118665 @@ -886,8 +886,8 @@ A Flexible Speech to Speech Phrasebook Translator - MannyRayner - PierretteBouillon + MannyRayner + PierretteBouillon 10.3115/1118656.1118666 69–76 W02-0710 @@ -895,10 +895,10 @@ Speech Translation on a Tight Budget without Enough Data - Robert E.Frederking - Alan W.Black - Ralf D.Brown - AlexanderRudnicky + Robert E.Frederking + Alan W.Black + Ralf D.Brown + AlexanderRudnicky JohnMoody EricSteinbrecher 10.3115/1118656.1118667 @@ -909,7 +909,7 @@ Automatic Interpretation System Integrating Free-Style Sentence Translation and Parallel Text Based Translation TakahiroIkeda - ShinichiAndo + ShinichiAndo KenjiSatoh AkitoshiOkumura 10.3115/1118656.1118668 @@ -919,8 +919,8 @@ Sharing Problems and Solutions for Machine Translation of Spoken and Written Interaction - SherriCondon - KeithMiller + SherriCondon + KeithMiller 10.3115/1118656.1118669 93–070 W02-0713 @@ -932,7 +932,7 @@ QinJin KornelLaskowski AliciaTribble - AlexWaibel + AlexWaibel 10.3115/1118656.1118670 101–078 W02-0714 @@ -959,7 +959,7 @@ A Multi-Perspective Evaluation of the <fixed-case>NESPOLE</fixed-case>! Speech-to-Speech Translation System - AlonLavie + AlonLavie FlorianMetze RoldanoCattoni EricaCostantini @@ -991,9 +991,9 @@ A Multilingual Approach to Disambiguate Prepositions and Case Suffixes - EnekoAgirre - MikelLersundi - DavidMartinez + EnekoAgirre + MikelLersundi + DavidMartinez 10.3115/1118675.1118676 1–8 W02-0801 @@ -1018,7 +1018,7 @@ Defining and Representing Preposition Senses: a preliminary analysis EmmanuelleCannesson - PatrickSaint-Dizier + PatrickSaint-Dizier 10.3115/1118675.1118679 25–31 W02-0804 @@ -1052,9 +1052,9 @@ Sense Discrimination with Parallel Corpora - NancyIde - TomazErjavec - DanTufis + NancyIde + TomazErjavec + DanTufis 10.3115/1118675.1118683 61–66 W02-0808 @@ -1062,10 +1062,10 @@ <fixed-case>D</fixed-case>utch Word Sense Disambiguation: Optimizing the Localness of Context - Antalvan den Bosch + Antalvan den Bosch IrisHendrickx - VeroniqueHoste - WalterDaelemans + VeroniqueHoste + WalterDaelemans 10.3115/1118675.1118684 61–66 W02-0809 @@ -1073,7 +1073,7 @@ Unsupervised <fixed-case>I</fixed-case>talian Word Sense Disambiguation using <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>ets and Unlabeled Corpora - RaduFlorian + RaduFlorian RichardWicentowski 10.3115/1118675.1118685 67–73 @@ -1086,7 +1086,7 @@ KristinaToutanova H. TolgaIlhan Sepandar D.Kamvar - Christopher D.Manning + Christopher D.Manning 10.3115/1118675.1118686 74–80 W02-0811 @@ -1102,8 +1102,8 @@ Combining Contextual Features for Word Sense Disambiguation - Hoa TrangDang - MarthaPalmer + Hoa TrangDang + MarthaPalmer 10.3115/1118675.1118688 88–94 W02-0813 @@ -1111,10 +1111,10 @@ Evaluating the results of a memory-based word-expert approach to unrestricted word sense disambiguation - VeroniqueHoste - WalterDaelemans + VeroniqueHoste + WalterDaelemans IrisHendrickx - Antalvan den Bosch + Antalvan den Bosch 10.3115/1118675.1118689 95–081 W02-0814 @@ -1131,7 +1131,7 @@ Lexical Substitution as a Task for <fixed-case>WSD</fixed-case> Evaluation - DianaMcCarthy + DianaMcCarthy 10.3115/1118675.1118691 089–115 W02-0816 @@ -1139,8 +1139,8 @@ Building a Sense Tagged Corpus with Open Mind Word Expert - TimothyChklovski - RadaMihalcea + TimothyChklovski + RadaMihalcea 10.3115/1118675.1118692 116–122 W02-0817 @@ -1197,7 +1197,7 @@ Using Co-Composition for Acquiring Syntactic and Semantic Subcategorisation PabloGamallo AlexandreAgustini - Gabriel P.Lopes + Gabriel P.Lopes 10.3115/1118627.1118632 34–41 W02-0905 @@ -1205,10 +1205,10 @@ Learning Argument/Adjunct Dictinction for <fixed-case>B</fixed-case>asque - IzaskunAldezabal + IzaskunAldezabal MaxuxAranzabe - KoldoGojenola - KepaSarasola + KoldoGojenola + KepaSarasola AitziberAtutxa 10.3115/1118627.1118633 42–50 @@ -1225,7 +1225,7 @@ Improvements in Automatic Thesaurus Extraction - James R.Curran + James R.Curran MarcMoens 10.3115/1118627.1118635 59–66 @@ -1234,7 +1234,7 @@ Acquiring Collocations for Lexical Choice between Near-Synonyms - Diana ZaiuInkpen + Diana ZaiuInkpen GraemeHirst 10.3115/1118627.1118636 67–76 @@ -1256,7 +1256,7 @@ Discriminative Training Methods for Hidden <fixed-case>M</fixed-case>arkov Models: Theory and Experiments with Perceptron Algorithms - MichaelCollins + MichaelCollins 10.3115/1118693.1118694 1–8 Best Paper @@ -1267,7 +1267,7 @@ Conditional Structure versus Conditional Estimation in <fixed-case>NLP</fixed-case> Models DanKlein - Christopher D.Manning + Christopher D.Manning 10.3115/1118693.1118695 9–16 W02-1002 @@ -1275,7 +1275,7 @@ An Incremental Decision List Learner - JoshuaGoodman + JoshuaGoodman 10.3115/1118693.1118696 17–24 W02-1003 @@ -1283,7 +1283,7 @@ Modeling Consensus: Classifier Combination for Word Sense Disambiguation - RaduFlorian + RaduFlorian DavidYarowsky 10.3115/1118693.1118697 25–32 @@ -1292,7 +1292,7 @@ Augmented Mixture Models for Lexical Disambiguation - SilviuCucerzan + SilviuCucerzan DavidYarowsky 10.3115/1118693.1118698 33–40 @@ -1301,7 +1301,7 @@ An Empirical Evaluation of Knowledge Sources and Learning Algorithms for Word Sense Disambiguation - Yoong KeokLee + Yoong KeokLee Hwee TouNg 10.3115/1118693.1118699 41–48 @@ -1321,7 +1321,7 @@ Combining Sample Selection and Error-Driven Pruning for Machine Learning of Coreference Rules VincentNg - ClaireCardie + ClaireCardie 10.3115/1118693.1118701 55–62 W02-1008 @@ -1329,7 +1329,7 @@ Transformational Priors Over Grammars - JasonEisner + JasonEisner 10.3115/1118693.1118702 63–70 W02-1009 @@ -1360,7 +1360,7 @@ Extensions to <fixed-case>HMM</fixed-case>-based Statistical Word Alignment Models KristinaToutanova H. TolgaIlhan - ChristopherManning + ChristopherManning 10.3115/1118693.1118705 87–94 W02-1012 @@ -1368,7 +1368,7 @@ From Words to Corpora: Recognizing Translation - Noah A.Smith + Noah A.Smith 10.3115/1118693.1118706 95–102 W02-1013 @@ -1376,7 +1376,7 @@ Fast <fixed-case>LR</fixed-case> parsing Using Rich (Tree Adjoining) Grammars - Carlos A.Prolo + Carlos A.Prolo 10.3115/1118693.1118707 103–110 W02-1014 @@ -1393,7 +1393,7 @@ Spectral Clustering for <fixed-case>G</fixed-case>erman Verbs ChrisBrew - SabineSchulte im Walde + SabineSchulte im Walde 10.3115/1118693.1118709 117–124 W02-1016 @@ -1402,7 +1402,7 @@ Exploiting Strong Syntactic Heuristics and Co-Training to Learn Semantic Lexicons WilliamPhillips - EllenRiloff + EllenRiloff 10.3115/1118693.1118710 125–132 W02-1017 @@ -1429,7 +1429,7 @@ User-Friendly Text Prediction For Translators GeorgeFoster - PhilippeLanglais + PhilippeLanglais GuyLapalme 10.3115/1118693.1118713 148–155 @@ -1439,8 +1439,8 @@ Generation of Word Graphs in Statistical Machine Translation NicolaUeffing - Franz JosefOch - HermannNey + Franz JosefOch + HermannNey 10.3115/1118693.1118714 156–163 W02-1021 @@ -1458,8 +1458,8 @@ <fixed-case>NLP</fixed-case> Found Helpful (at least for one Text Categorization Task) CarlSable - KathleenMcKeown - KennethChurch + KathleenMcKeown + KennethChurch 10.3115/1118693.1118716 172–179 W02-1023 @@ -1467,9 +1467,9 @@ A Hybrid Approach to Natural Language Web Search - JenniferChu-Carroll + JenniferChu-Carroll JohnPrager - YaelRavin + YaelRavin ChristianCesar 10.3115/1118693.1118717 180–187 @@ -1507,7 +1507,7 @@ A Bootstrapping Method for Learning Semantic Lexicons using Extraction Pattern Contexts MichaelThelen - EllenRiloff + EllenRiloff 10.3115/1118693.1118721 214–221 W02-1028 @@ -1515,7 +1515,7 @@ Ensemble Methods for Automatic Thesaurus Extraction - JamesCurran + JamesCurran 10.3115/1118693.1118722 222–229 W02-1029 @@ -1534,8 +1534,8 @@ The <fixed-case>S</fixed-case>uper<fixed-case>ARV</fixed-case> Language Model: Investigating the Effectiveness of Tightly Integrating Multiple Knowledge Sources - WenWang - Mary P.Harper + WenWang + Mary P.Harper 10.3115/1118693.1118724 238–247 W02-1031 @@ -1565,7 +1565,7 @@ A Machine-Learning Approach to Introspection in a Question Answering System KrzysztofCzuba JohnPrager - JenniferChu-Carroll + JenniferChu-Carroll 10.3115/1118693.1118727 265–272 W02-1034 @@ -1574,7 +1574,7 @@ Extracting Clauses for Spoken Language Understanding in Conversational Systems NarendraGupta - SrinivasBangalore + SrinivasBangalore 10.3115/1118693.1118728 273–280 W02-1035 @@ -1592,7 +1592,7 @@ Processing Comparable Corpora With Bilingual Suffix Trees - Dragos StefanMunteanu + Dragos StefanMunteanu DanielMarcu 10.3115/1118693.1118730 289–295 @@ -1611,7 +1611,7 @@ Phrasal Cohesion and Statistical Machine Translation - HeidiFox + HeidiFox 10.3115/1118693.1118732 304–3111 W02-1039 @@ -1621,7 +1621,7 @@ The Influence of Minimum Edit Distance on Reference Resolution MichaelStrube StefanRapp - ChristophMüller + ChristophMüller 10.3115/1118693.1118733 312–319 W02-1040 @@ -1630,7 +1630,7 @@ Information Extraction from Voicemail Transcripts MartinJansche - StevenAbney + StevenAbney 10.3115/1118693.1118734 320–327 W02-1041 @@ -1664,15 +1664,15 @@ Induction of Classification from Lexicon Expansion: Assigning Domain Tags to <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et Entries EchaChang Chu-RenHuang - Sue-JinKer + Sue-JinKer Chang-HuaYang W02-1102 chang-etal-2002-induction Semiautomatic Creation of Taxonomies - JavierFarreres - HoracioRodríguez + JavierFarreres + HoracioRodríguez KarinaGibert W02-1103 farreres-etal-2002-semiautomatic @@ -1685,7 +1685,7 @@ Building Semantic/Ontological Knowledge by Text Mining - EduardHovy + EduardHovy W02-1105 hovy-2002-building @@ -1714,24 +1714,24 @@ A <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et-Based Approach to Named Entites Recognition - BernardoMagnini - MatteoNegri + BernardoMagnini + MatteoNegri RobertoPrevete - HristoTanev + HristoTanev W02-1109 magnini-etal-2002-wordnet Frameworks, Implementation and Open Problems for the Collaborative Building of a Multilingual Lexical Database - MathieuMangeot-Lerebours - GillesSérasset + MathieuMangeot-Lerebours + GillesSérasset FrédéricAndrès W02-1110 mangeot-lerebours-etal-2002-frameworks Fine-Grained Proper Noun Ontologies for Question Answering - Gideon S.Mann + Gideon S.Mann W02-1111 mann-2002-fine @@ -1764,7 +1764,7 @@ A Maximum Entropy Approach to <fixed-case>H</fixed-case>ow<fixed-case>N</fixed-case>et-Based <fixed-case>C</fixed-case>hinese Word Sense Disambiguation - Ping WaiWong + Ping WaiWong YongshengYang W02-1116 wong-yang-2002-maximum @@ -1802,11 +1802,11 @@ <fixed-case>A</fixed-case>nn<fixed-case>C</fixed-case>orra: Building Tree-banks in <fixed-case>I</fixed-case>ndian Languages - AksharBharati + AksharBharati RajeevSangal VineetChaitanya AmbaKulkarni - Dipti MisraSharma + Dipti MisraSharma K.V.Ramakrishnamacharyulu W02-1202 bharati-etal-2002-anncorra @@ -1814,16 +1814,16 @@ <fixed-case>U</fixed-case>rdu and the Parallel Grammar Project MiriamButt - Tracy HollowayKing + Tracy HollowayKing W02-1203 butt-king-2002-urdu Broadening the Scope of the <fixed-case>EAGLES</fixed-case>/<fixed-case>ISLE</fixed-case> Lexical Standardization Initiative - NicolettaCalzolari + NicolettaCalzolari AlessandroLenci FrancescaBertagna - AntonioZampolli + AntonioZampolli W02-1204 calzolari-etal-2002-broadening @@ -1853,14 +1853,14 @@ Automatic Word Spacing Using Hidden <fixed-case>M</fixed-case>arkov Model for Refining <fixed-case>K</fixed-case>orean Text Corpora Do-GilLee Sang-ZooLee - Hae-ChangRim - Heui-SeokLim + Hae-ChangRim + Heui-SeokLim W02-1208 lee-etal-2002-automatic Decomposition for <fixed-case>ISO</fixed-case>/<fixed-case>IEC</fixed-case> 10646 Ideographic Characters - QinLu + QinLu Shiu TongChan YinLi Ngai LingLi @@ -1870,7 +1870,7 @@ Efficient Deep Processing of <fixed-case>J</fixed-case>apanese MelanieSiegel - Emily M.Bender + Emily M.Bender W02-1210 siegel-bender-2002-efficient @@ -1880,7 +1880,7 @@ SongXue WeiminQu XiaofengWang - YufangSun + YufangSun W02-1211 sun-etal-2002-constructing @@ -1897,13 +1897,13 @@ Speech-Related Technologies - Where Will the Field Go in 10 Years? - Niels OleBernsen + Niels OleBernsen W02-1301 bernsen-2002-speech Towards a Road Map on Human Language Technology: Natural Language Processing - AndreasEisele + AndreasEisele DorotheaZiegler-Eisele W02-1302 eisele-ziegler-eisele-2002-towards @@ -1911,20 +1911,20 @@ Why <fixed-case>NLP</fixed-case> Should Move into <fixed-case>IAS</fixed-case> VictorRaskin - SergeiNirenburg + SergeiNirenburg Mikhail J.Atallah - Christian F.Hempelmann + Christian F.Hempelmann Katrina E.Triezenberg W02-1303 raskin-etal-2002-nlp <fixed-case>MEANING</fixed-case>: a Roadmap to Knowledge Technologies - GermanRigau - BernardoMagnini - EnekoAgirre + GermanRigau + BernardoMagnini + EnekoAgirre PiekVossen - JohnCarroll + JohnCarroll W02-1304 rigau-etal-2002-meaning @@ -1942,38 +1942,38 @@ Disambiguating Noun Compounds with Latent Semantic Indexing Alan M.Buckeridge - Richard F. E.Sutcliffe + Richard F. E.Sutcliffe W02-1401 buckeridge-sutcliffe-2002-disambiguating An Intelligent Terminology Database as a Pre-processor for Statistical Machine Translation MichaelCarl - PhilippeLanglais + PhilippeLanglais W02-1402 carl-langlais-2002-intelligent Lexically-Based Terminology Structuring: Some Inherent Limits NataliaGrabar - PierreZweigenbaum + PierreZweigenbaum W02-1403 grabar-zweigenbaum-2002-lexically Alignment and Extraction of Bilingual Legal Terminology from Context Profiles - Oi YeeKwong - Benjamin K.Tsou - Tom B.Y.Lai - Robert W.P.Luk - Lawrence Y.L.Cheung + Oi YeeKwong + Benjamin K.Tsou + Tom B.Y.Lai + Robert W.P.Luk + Lawrence Y.L.Cheung Francis C.Y.Chik W02-1404 kwong-etal-2002-alignment Improving a general-purpose Statistical Translation Engine by Terminological lexicons - PhilippeLanglais + PhilippeLanglais W02-1405 langlais-2002-improving @@ -1993,8 +1993,8 @@ Automatic Discovery of Term Similarities Using Pattern Mining - GoranNenadić - IrenaSpasić + GoranNenadić + IrenaSpasić SophiaAnaniadou W02-1408 nenadic-etal-2002-automatic-discovery @@ -2034,15 +2034,15 @@ Grammar and Lexicon in the Robust Parsing of <fixed-case>I</fixed-case>talian towards a Non-Naïve Interplay RobertoBartolini AlessandroLenci - SimonettaMontemagni + SimonettaMontemagni VitoPirrelli W02-1501 bartolini-etal-2002-grammar The Grammar Matrix: An Open-Source Starter-Kit for the Rapid Development of Cross-linguistically Consistent Broad-Coverage Precision Grammars - Emily M.Bender - DanFlickinger + Emily M.Bender + DanFlickinger StephanOepen W02-1502 bender-etal-2002-grammar @@ -2051,8 +2051,8 @@ The Parallel Grammar Project MiriamButt HelgeDyvik - Tracy HollowayKing - HiroshiMasuichi + Tracy HollowayKing + HiroshiMasuichi ChristianRohrer W02-1503 butt-etal-2002-parallel @@ -2069,38 +2069,38 @@ Encoding and Reusing Linguistic Information Expressed by Linguistic Properties CarolineHagège - Gabriel G.Bès + Gabriel G.Bès W02-1505 hagege-bes-2002-encoding Adapting Existing Grammars: The <fixed-case>XLE</fixed-case> Experience - Ronald M.Kaplan - Tracy HollowayKing - John T.Maxwell III + Ronald M.Kaplan + Tracy HollowayKing + John T.Maxwell III W02-1506 kaplan-etal-2002-adapting A Classification of Grammar Development Strategies AlexandraKinyon - Carlos A.Prolo + Carlos A.Prolo W02-1507 kinyon-prolo-2002-classification Parallel Distributed Grammar Engineering for Practical Applications StephanOepen - Emily M.Bender + Emily M.Bender UliCallmeier - DanFlickinger + DanFlickinger MelanieSiegel W02-1508 oepen-etal-2002-parallel Coping with Problems in Grammars Automatically Extracted from Treebanks - Carlos A.Prolo + Carlos A.Prolo W02-1509 prolo-2002-coping @@ -2123,15 +2123,15 @@ A Synchronization Structure of <fixed-case>SSTC</fixed-case> and Its Applications in Machine Translation - Mosleh H.Al-Adhaileh - TangEnya Kong - ZaharinYusoff + Mosleh H.Al-Adhaileh + Enya KongTang + ZaharinYusoff W02-1601 al-adhaileh-etal-2002-synchronization Coedition to Share Text Revision across Languages and Improve <fixed-case>MT</fixed-case> a Posteriori - ChristianBoitet + ChristianBoitet Wang-JuTsai W02-1602 boitet-tsai-2002-coedition @@ -2166,14 +2166,14 @@ Word Sense Disambiguation in a <fixed-case>K</fixed-case>orean-to-<fixed-case>J</fixed-case>apanese <fixed-case>MT</fixed-case> System Using Neural Networks You-JinChung Sin-JaeKang - Kyong-HiMoon + Kyong-HiMoon Jong-HyeokLee W02-1606 chung-etal-2002-word Building a Training Corpus for Word Sense Disambiguation in <fixed-case>E</fixed-case>nglish-to-<fixed-case>V</fixed-case>ietnamese Machine Translation - DienDinh + DienDinh W02-1607 dinh-2002-building @@ -2192,16 +2192,16 @@ Learning Domain-Specific Transfer Rules: An Experiment with <fixed-case>K</fixed-case>orean to <fixed-case>E</fixed-case>nglish Translation - BenoitLavoie - MichaelWhite + BenoitLavoie + MichaelWhite TanyaKorelsky W02-1610 lavoie-etal-2002-learning Identifying Synonymous Expressions from a Bilingual Corpus for Example-Based Machine Translation - MitsuoShimohata - EiichiroSumita + MitsuoShimohata + EiichiroSumita W02-1611 shimohata-sumita-2002-identifying @@ -2215,9 +2215,9 @@ Automatic Information Transfer between <fixed-case>E</fixed-case>nglish and <fixed-case>C</fixed-case>hinese - JianminYao + JianminYao HaoYu - TiejunZhao + TiejunZhao XiaohongLi W02-1613 yao-etal-2002-automatic-information @@ -2235,9 +2235,9 @@ <fixed-case>RDF</fixed-case>(<fixed-case>S</fixed-case>)/<fixed-case>XML</fixed-case> Linguistic Annotation of Semantic Web Pages - Guadalupe Aguadode Cea + Guadalupe Aguadode Cea InmaculadaÁlvarez-de-Mon - AntonioPareja-Lora + AntonioPareja-Lora RosarioPlaza-Arteche W02-1701 de-cea-etal-2002-rdf @@ -2252,8 +2252,8 @@ A Brief Introduction to the <fixed-case>G</fixed-case>e<fixed-case>M</fixed-case> Annotation Schema for Complex Document Layout - JohnBateman - RenateHenschel + JohnBateman + RenateHenschel JudyDelin W02-1703 bateman-etal-2002-brief @@ -2268,9 +2268,9 @@ The <fixed-case>PAPILLON</fixed-case> Project: Cooperatively Building a Multilingual Lexical Data-base to Derive Open Source Dictionaries & Lexicons - ChristianBoitet - MathieuMangeot - GillesSérasset + ChristianBoitet + MathieuMangeot + GillesSérasset W02-1705 boitet-etal-2002-papillon @@ -2318,7 +2318,7 @@ Cascaded Regular Grammars over <fixed-case>XML</fixed-case> Documents - KirilSimov + KirilSimov MilenKouylekov AlexanderSimov W02-1712 @@ -2334,7 +2334,7 @@ <fixed-case>X</fixed-case>i<fixed-case>STS</fixed-case> - <fixed-case>XML</fixed-case> in Speech Technology Systems MichaelWalsh StephenWilson - JulieCarson-Berndsen + JulieCarson-Berndsen W02-1714 walsh-etal-2002-xists @@ -2357,7 +2357,7 @@ Extraction of Translation Unit from <fixed-case>C</fixed-case>hinese-<fixed-case>E</fixed-case>nglish Parallel Corpora - BaobaoChang + BaobaoChang PernillaDanielsson WolfgangTeubert W02-1801 @@ -2368,15 +2368,15 @@ LawrenceCheung TomLai RobertLuk - Oi YeeKwong - King KuiSin - Benjamin K.Tsou + Oi YeeKwong + King KuiSin + Benjamin K.Tsou W02-1802 cheung-etal-2002-considerations Developing Guidelines for the Annotation of Anaphors in the <fixed-case>C</fixed-case>hinese Treebank - SusanConverse + SusanConverse W02-1803 converse-2002-developing @@ -2385,7 +2385,7 @@ HongzhaoHe JianfengGao PilianHe - ChangningHuang + ChangningHuang W02-1804 he-etal-2002-finding @@ -2407,7 +2407,7 @@ A Knowledge Based Approach to Identification of Serial Verb Construction in <fixed-case>C</fixed-case>hinese-to-<fixed-case>K</fixed-case>orean Machine Translation System - Dong-ilKim + Dong-ilKim ZhengCui JinjiLi Jong-HyeokLee @@ -2416,7 +2416,7 @@ Learning Case-based Knowledge for Disambiguating <fixed-case>C</fixed-case>hinese Word Segmentation: A Preliminary Study - ChunyuKit + ChunyuKit HaihuaPan HongbiaoChen W02-1808 @@ -2424,8 +2424,8 @@ Corpus-Based <fixed-case>P</fixed-case>inyin Name Resolution - Kui-LamKwok - PeterDeng + Kui-LamKwok + PeterDeng W02-1809 kwok-deng-2002-corpus @@ -2440,15 +2440,15 @@ Design of <fixed-case>C</fixed-case>hinese Morphological Analyzer - HuihsinTseng - Keh-JiannChen + HuihsinTseng + Keh-JiannChen W02-1811 tseng-chen-2002-design A Word Segmentation Method with Dynamic Adapting to Text Using Inductive Learning ZhongjianWang - KenjiAraki + KenjiAraki KojiTochinai W02-1812 wang-etal-2002-word @@ -2466,14 +2466,14 @@ Extracting Pronunciation-translated Names from <fixed-case>C</fixed-case>hinese Texts using Bootstrapping Approach JingXiao JiminLiu - Tat-SengChua + Tat-SengChua W02-1814 xiao-etal-2002-extracting Combining Classifiers for <fixed-case>C</fixed-case>hinese Word Segmentation NianwenXue - Susan P.Converse + Susan P.Converse W02-1815 xue-converse-2002-combining @@ -2488,7 +2488,7 @@ KevinZhang QunLiu HaoZhang - Xue-QiCheng + Xue-QiCheng W02-1817 zhang-etal-2002-automatic @@ -2535,15 +2535,15 @@ Multilingual Question Answering with High Portability on Relational Databases - HanminJung - Gary GeunbaeLee + HanminJung + Gary GeunbaeLee W02-1902 jung-lee-2002-multilingual A Reliable Indexing Method for a Practical <fixed-case>QA</fixed-case> System HarksooKim - JungyunSeo + JungyunSeo W02-1903 kim-seo-2002-reliable @@ -2558,7 +2558,7 @@ Extracting Exact Answers to Questions Based on Structural Links WeiLi - Rohini K.Srihari + Rohini K.Srihari XiaogeLi M.Srikanth XiuhongZhang @@ -2568,9 +2568,9 @@ Passage Selection to Improve Question Answering - FernandoLLopis - José LuisVicedo - AntonioFerrández + FernandoLLopis + José LuisVicedo + AntonioFerrández W02-1906 llopis-etal-2002-passage @@ -2594,31 +2594,31 @@ Extracting the Unextractable: A Case Study on Verb-particles - TimothyBaldwin + TimothyBaldwin AlineVillavicencio W02-2001 baldwin-villavicencio-2002-extracting Language Independent Named Entity Classification by modified Transformation-based Learning and by Decision Tree Induction - William J.Black + William J.Black ArgyriosVasilakopoulos W02-2002 black-vasilakopoulos-2002-language Statistical Named Entity Recognizer Adaptation - John D.Burger - John C.Henderson - William T.Morgan + John D.Burger + John C.Henderson + William T.Morgan W02-2003 burger-etal-2002-statistical Named Entity Extraction using <fixed-case>A</fixed-case>da<fixed-case>B</fixed-case>oost XavierCarreras - LluísMàrquez - LluísPadró + LluísMàrquez + LluísPadró W02-2004 carreras-etal-2002-named @@ -2630,21 +2630,21 @@ Bootstrapping a Multilingual Part-of-speech Tagger in One Person-day - SilviuCucerzan + SilviuCucerzan DavidYarowsky W02-2006 cucerzan-yarowsky-2002-bootstrapping Language Independent <fixed-case>NER</fixed-case> using a Unified Model of Internal and Contextual Evidence - SilviuCucerzan + SilviuCucerzan DavidYarowsky W02-2007 cucerzan-yarowsky-2002-language A Very Very Large Corpus Doesn’t Always Yield Reliable Estimates - James R.Curran + James R.Curran MilesOsborne W02-2008 curran-osborne-2002-large @@ -2659,15 +2659,15 @@ Named Entity Recognition as a House of Cards: Classifier Stacking - RaduFlorian + RaduFlorian W02-2010 florian-2002-named Combining Labelled and Unlabelled Data: A Case Study on Fisher Kernels and Transductive Inference for Biological Entity Recognition - CyrilGoutte - HervéDéjean - EricGaussier + CyrilGoutte + HervéDéjean + EricGaussier NicolaCancedda Jean-MichelRenders W02-2011 @@ -2675,7 +2675,7 @@ <fixed-case>G</fixed-case>ra<fixed-case>S</fixed-case>p: Grammar Learning from Unlabelled Speech Corpora - Peter JuelHenrichsen + Peter JuelHenrichsen W02-2012 henrichsen-2002-grasp @@ -2700,8 +2700,8 @@ <fixed-case>J</fixed-case>apanese Dependency Analysis using Cascaded Chunking - TakuKudo - YujiMatsumoto + TakuKudo + YujiMatsumoto W02-2016 kudo-matsumoto-2002-japanese @@ -2714,13 +2714,13 @@ A Comparison of Algorithms for Maximum Entropy Parameter Estimation - RobertMalouf + RobertMalouf W02-2018 malouf-2002-comparison <fixed-case>M</fixed-case>arkov Models for Language-independent Named Entity Recognition - RobertMalouf + RobertMalouf W02-2019 malouf-2002-markov @@ -2733,14 +2733,14 @@ Letter Level Learning for Language Independent Diacritics Restoration - RadaMihalcea - ViviNastase + RadaMihalcea + ViviNastase W02-2021 mihalcea-nastase-2002-letter <fixed-case>SLINERC</fixed-case>: The <fixed-case>S</fixed-case>ydney Language-Independent Named Entity Recogniser and Classifier - JonPatrick + JonPatrick CaseyWhitelaw RobertMunro W02-2022 @@ -2749,20 +2749,20 @@ Named Entity Learning and Verification: Expectation Maximization in Large Corpora UweQuasthoff - ChristianBiemann + ChristianBiemann ChristianWolff W02-2023 quasthoff-etal-2002-named Introduction to the <fixed-case>C</fixed-case>o<fixed-case>NLL</fixed-case>-2002 Shared Task: Language-Independent Named Entity Recognition - Erik F.Tjong Kim Sang + Erik F.Tjong Kim Sang W02-2024 tjong-kim-sang-2002-introduction Memory-Based Named Entity Recognition - Erik F.Tjong Kim Sang + Erik F.Tjong Kim Sang W02-2025 tjong-kim-sang-2002-memory @@ -2782,7 +2782,7 @@ Two-dimensional Clustering for Text Categorization HiroyaTakamura - YujiMatsumoto + YujiMatsumoto W02-2028 takamura-matsumoto-2002-two @@ -2796,7 +2796,7 @@ Feature Selection for a Rich <fixed-case>HPSG</fixed-case> Grammar Using Decision Trees KristinaToutanova - Christopher D.Manning + Christopher D.Manning W02-2030 toutanova-manning-2002-feature @@ -2811,7 +2811,7 @@ Topological Field Chunking for <fixed-case>G</fixed-case>erman JornVeenstra - Frank HenrikMüller + Frank HenrikMüller TylmanUle W02-2032 veenstra-etal-2002-topological @@ -2824,7 +2824,7 @@ Learning to Disambiguate Potentially Subjective Expressions - JanyceWiebe + JanyceWiebe TheresaWilson W02-2034 wiebe-wilson-2002-learning @@ -2843,7 +2843,7 @@ Proceedings of the International Natural Language Generation Conference - KathleenMcKeown + KathleenMcKeown Association for Computational Linguistics
Harriman, New York, USA
July @@ -2863,9 +2863,9 @@
The Importance of Lexicalized Syntax Models for Natural Language Generation Tasks - HalDaume III + HalDaume III KevinKnight - IreneLangkilde-Geary + IreneLangkilde-Geary DanielMarcu KenjiYamada 9–16 @@ -2874,7 +2874,7 @@ An Empirical Verification of Coverage and Correctness for a General-Purpose Sentence Generator - IreneLangkilde-Geary + IreneLangkilde-Geary 17–24 W02-2103 langkilde-geary-2002-empirical @@ -2890,17 +2890,17 @@ An Overview of Amalgam: A Machine-learned Generation Module - SimonCorston-Oliver + SimonCorston-Oliver MichaelGamon - EricRingger - RobertMoore + EricRingger + RobertMoore 33–40 W02-2105 corston-oliver-etal-2002-overview A Complete, Efficient Sentence-Realization Algorithm for Unification Grammar - RobertMoore + RobertMoore 41–48 W02-2106 moore-2002-complete @@ -2916,7 +2916,7 @@ Towards Emotional Variation in Speech-Based Natural Language Processing MichaelFleischman - EduardHovy + EduardHovy 57–64 W02-2108 fleischman-hovy-2002-towards @@ -2953,7 +2953,7 @@ Content Planner Construction via Evolutionary Algorithms and a Corpus-based Fitness Function - PabloDuboue + PabloDuboue KathleenMcKeown 89–96 W02-2112 @@ -2962,7 +2962,7 @@ Should Corpora Texts Be Gold Standards for <fixed-case>NLG</fixed-case>? EhudReiter - SomayajuluSripada + SomayajuluSripada 97–104 W02-2113 reiter-sripada-2002-corpora @@ -2976,8 +2976,8 @@ Generating Easy References: the Case of Document Deixis - IvandreParaboni - Keesvan Deemter + IvandreParaboni + Keesvan Deemter 113–119 W02-2115 W02-2115.Attachment.pdf @@ -2986,7 +2986,7 @@ The <fixed-case>DIAG</fixed-case> experiments: Natural Language Generation for Intelligent Tutoring Systems BarbaraDi Eugenio - MichaelGlass + MichaelGlass MichaelTrolio 120–127 W02-2116 @@ -2995,7 +2995,7 @@ An Evaluation of Procedural Instructional Text NathalieColineau - CecileParis + CecileParis KeithVander Linden 128–135 W02-2117 @@ -3004,16 +3004,16 @@ A Constraint-Based Approach for Cooperative Information-Seeking Dialogue YanQu - NancyGreen + NancyGreen 136–143 W02-2118 qu-green-2002-constraint What is <fixed-case>NLG</fixed-case>? - RogerEvans + RogerEvans PaulPiwek - LynneCahill + LynneCahill 144–151 W02-2119 evans-etal-2002-nlg @@ -3048,7 +3048,7 @@ Use of Description Logic and <fixed-case>SDRT</fixed-case> in an <fixed-case>NLG</fixed-case> System - AdilEl Ghali + AdilEl Ghali 179–184 W02-2124 el-ghali-2002-use @@ -3078,7 +3078,7 @@ Compositional Semantics for Relative Clauses in <fixed-case>L</fixed-case>exicalized <fixed-case>T</fixed-case>ree <fixed-case>A</fixed-case>djoining <fixed-case>G</fixed-case>rammars - Chung-hyeHan + Chung-hyeHan 1–10 W02-2201 han-2002-compositional @@ -3123,7 +3123,7 @@ Statistical Morphological Tagging and Parsing of <fixed-case>K</fixed-case>orean with an <fixed-case>LTAG</fixed-case> Grammar AnoopSarkar - Chung-HyeHan + Chung-HyeHan 48–56 W02-2207 sarkar-han-2002-statistical @@ -3140,7 +3140,7 @@ Gregory M.Kobele TravisCollier CharlesTaylor - Edward P.Stabler + Edward P.Stabler 66–73 W02-2209 kobele-etal-2002-learning @@ -3164,16 +3164,16 @@ Relative Clause Attachment and Anaphora: A Case for Short Binding - RodolfoDelmonte + RodolfoDelmonte 84–89 W02-2212 delmonte-2002-relative A Left Corner Parser for <fixed-case>T</fixed-case>ree <fixed-case>A</fixed-case>djoining <fixed-case>G</fixed-case>rammars - Victor J.Díaz + Victor J.Díaz VicenteCarrillo - Miguel A.Alonso + Miguel A.Alonso 90–95 W02-2213 diaz-etal-2002-left @@ -3181,9 +3181,9 @@ Context-Free Parsing of a <fixed-case>T</fixed-case>ree <fixed-case>A</fixed-case>djoining <fixed-case>G</fixed-case>rammar Using Finite-State Machines AlexisNasr - OwenRambow + OwenRambow JohnChen - SrinivasBangalore + SrinivasBangalore 96–101 W02-2214 nasr-etal-2002-context @@ -3239,7 +3239,7 @@ A Note on the Complexity of Associative-Commutative <fixed-case>L</fixed-case>ambek Calculus - ChristopheCosta Florêncio + ChristopheCosta Florêncio 159–162 W02-2222 costa-florencio-2002-note @@ -3253,25 +3253,25 @@ On the Affinity of <fixed-case>TAG</fixed-case> with Projective, Bilexical Dependency Grammar - Tom B.Y.Lai - ChangningHuang - Robert W.P.Luk + Tom B.Y.Lai + ChangningHuang + Robert W.P.Luk 169–174 W02-2224 lai-etal-2002-affinity The Theory of Control Applied to the <fixed-case>P</fixed-case>rague Dependency Treebank (<fixed-case>PDT</fixed-case>) - JarmilaPanevová + JarmilaPanevová VeronikaŘezníčková - ZdeňkaUrešová + ZdeňkaUrešová 175–180 W02-2225 panevova-etal-2002-theory Systematic Grammar Development in the <fixed-case>XTAG</fixed-case> Project - CarlosProlo + CarlosProlo 181–186 W02-2226 prolo-2002-systematic @@ -3280,14 +3280,14 @@ A Formal Proof of Strong Equivalence for a Grammar Conversion from <fixed-case>LTAG</fixed-case> to <fixed-case>HPSG</fixed-case>-style NaokiYoshinaga YusukeMiyao - Jun’ichiTsujii + Jun’ichiTsujii 187–192 W02-2227 yoshinaga-etal-2002-formal Parsing <fixed-case>MCS</fixed-case> languages with Thread Automata - ÉricVillemonte de la Clergerie + ÉricVillemonte de la Clergerie 193–200 W02-2228 villemonte-de-la-clergerie-2002-parsing-mcs @@ -3295,8 +3295,8 @@ Evaluation of <fixed-case>LTAG</fixed-case> Parsing with Supertag Compaction OlgaShaumyan - JohnCarroll - DavidWeir + JohnCarroll + DavidWeir 201–205 W02-2229 shaumyan-etal-2002-evaluation @@ -3304,14 +3304,14 @@ <fixed-case>K</fixed-case>orean-<fixed-case>E</fixed-case>nglish <fixed-case>MT</fixed-case> and <fixed-case>S</fixed-case>-<fixed-case>TAG</fixed-case> MarkDras - Chung-hyeHan + Chung-hyeHan 206–215 W02-2230 dras-han-2002-korean Tectogrammatical representation: towards a minimal transfer in machine translation - JanHajič + JanHajič 216–226 W02-2231 hajic-2002-tectogrammatical @@ -3320,7 +3320,7 @@ Clustering for obtaining syntactic classes of words from automatically extracted <fixed-case>LTAG</fixed-case> grammars TadayoshiHara YusukeMiyao - Jun’ichiTsujii + Jun’ichiTsujii 227–233 W02-2232 hara-etal-2002-clustering @@ -3328,7 +3328,7 @@ A new metagrammar compiler BertrandGaiffe - BenoitCrabbé + BenoitCrabbé AzimRoussanaly 234–241 W02-2233 @@ -3344,7 +3344,7 @@ Cross-serial dependencies in <fixed-case>T</fixed-case>agalog AnnaMaclachlan - OwenRambow + OwenRambow 252–258 W02-2235 maclachlan-rambow-2002-cross @@ -3352,9 +3352,9 @@ Reranking an n-gram supertagger JohnChen - SrinivasBangalore - MichaelCollins - OwenRambow + SrinivasBangalore + MichaelCollins + OwenRambow 259–268 W02-2236 chen-etal-2002-reranking diff --git a/data/xml/W03.xml b/data/xml/W03.xml index e1d50afcf5..d4d491cb04 100644 --- a/data/xml/W03.xml +++ b/data/xml/W03.xml @@ -13,11 +13,11 @@ Experiments with geographic knowledge for information extraction DimitarManov - AtanasKiryakov + AtanasKiryakov BorislavPopov - KalinaBontcheva + KalinaBontcheva DianaMaynard - HamishCunningham + HamishCunningham 1–9 W03-0101 manov-etal-2003-experiments @@ -40,7 +40,7 @@ <fixed-case>G</fixed-case>eo<fixed-case>N</fixed-case>ame: a system for back-transliterating pinyin place names - Kui LamKwok + Kui LamKwok QiangDeng 26–30 W03-0104 @@ -48,17 +48,17 @@ Grounding spatial named entities for information extraction and question answering - Jochen L.Leidner + Jochen L.Leidner GailSinclair - BonnieWebber + BonnieWebber 31–38 W03-0105 leidner-etal-2003-grounding <fixed-case>I</fixed-case>nfo<fixed-case>X</fixed-case>tract location normalization: a hybrid approach to geographic references in information extraction - HuifengLi - K. RohiniSrihari + HuifengLi + K. RohiniSrihari ChengNiu WeiLi 39–44 @@ -67,8 +67,8 @@ Bootstrapping toponym classifiers - David A.Smith - Gideon S.Mann + David A.Smith + Gideon S.Mann 45–49 W03-0107 smith-mann-2003-bootstrapping @@ -127,12 +127,12 @@ Utterance Classification in <fixed-case>A</fixed-case>uto<fixed-case>T</fixed-case>utor - AndrewOlney + AndrewOlney MaxLouwerse EricMatthews JohannaMarineau HeatherHite-Mitchell - ArthurGraesser + ArthurGraesser 1–8 W03-0201 olney-etal-2003-utterance @@ -147,7 +147,7 @@ Computer-Aided Generation of Multiple-Choice Tests - RuslanMitkov + RuslanMitkov Le AnHa 17–22 W03-0203 @@ -171,10 +171,10 @@ A Comparison of Tutor and Student Behavior in Speech Versus Text Based Tutoring - Carolyn P.Rosé - DianeLitman + Carolyn P.Rosé + DianeLitman DumisizweBhembe - KateForbes + KateForbes ScottSilliman RameshSrivastava KurtVanLehn @@ -186,14 +186,14 @@ Transforming Grammar Checking Technology into a Learning Environment for Second Language Writing OlaKnutsson TeresaCerrato Pargman - KerstinSeverinson Eklundh + KerstinSeverinson Eklundh 38–45 W03-0206 knutsson-etal-2003-transforming Pasteur’s Quadrant: Computational Linguistics, <fixed-case>LSA</fixed-case>, and Education - ThomasLandauer + ThomasLandauer 46–52 W03-0207 landauer-2003-pasteurs @@ -217,7 +217,7 @@ A Hybrid Text Classification Approach for Analysis of Student Essays - Carolyn P.Rosé + Carolyn P.Rosé AntonioRoque DumisizweBhembe KurtVanLehn @@ -238,7 +238,7 @@ An Evaluation Exercise for Word Alignment - RadaMihalcea + RadaMihalcea TedPedersen 1–10 W03-0301 @@ -255,7 +255,7 @@ Word Alignment Based on Bilingual Bracketing BingZhao - StephanVogel + StephanVogel 15–18 W03-0303 zhao-vogel-2003-word @@ -263,16 +263,16 @@ Statistical Translation Alignment with Compositionality Constraints MichelSimard - PhilippeLanglais + PhilippeLanglais 19–22 W03-0304 simard-langlais-2003-statistical Reducing Parameter Space for Word Alignment - HerveDejean - EricGaussier - CyrilGoutte + HerveDejean + EricGaussier + CyrilGoutte KenjiYamada 23–26 W03-0305 @@ -280,7 +280,7 @@ Word Alignment Baselines - John C.Henderson + John C.Henderson 27–30 W03-0306 henderson-2003-word @@ -288,14 +288,14 @@ Phrase-based Evaluation of Word-to-Word Alignments MichaelCarl - SisayFissaha + SisayFissaha 31–35 W03-0307 carl-fissaha-2003-phrase <fixed-case>TREQ</fixed-case>-<fixed-case>AL</fixed-case>: A word alignment system with limited language resources - DanTufiş + DanTufiş Ana-MariaBarbu RaduIon 36–39 @@ -304,7 +304,7 @@ The <fixed-case>D</fixed-case>uluth Word Alignment System - Bridget ThomsonMcInnes + Bridget ThomsonMcInnes TedPedersen 40–43 W03-0309 @@ -320,9 +320,9 @@ Retrieving Meaning-equivalent Sentences for Example-based Rough Translation - MitsuoShimohata - EiichiroSumita - YujiMatsumoto + MitsuoShimohata + EiichiroSumita + YujiMatsumoto 50–56 W03-0311 shimohata-etal-2003-retrieving @@ -331,7 +331,7 @@ Word Selection for <fixed-case>EBMT</fixed-case> based on Monolingual Similarity and Translation Confidence EijiAramaki SadaoKurohashi - HidekiKashioka + HidekiKashioka HidekiTanaka 57–64 W03-0312 @@ -347,9 +347,9 @@ Learning Sequence-to-Sequence Correspondences from Parallel Corpora via Sequential Pattern Mining KaoruYamamoto - TakuKudo + TakuKudo YutaTsuboi - YujiMatsumoto + YujiMatsumoto 73–80 W03-0314 yamamoto-etal-2003-learning @@ -358,24 +358,24 @@ Efficient Optimization for Bilingual Sentence Alignment Based on Linear Regression BingZhao KlausZechner - StephenVogel - AlexWaibel + StephenVogel + AlexWaibel 81–87 W03-0315 zhao-etal-2003-efficient <fixed-case>POS</fixed-case>-Tagger for <fixed-case>E</fixed-case>nglish-<fixed-case>V</fixed-case>ietnamese Bilingual Corpus - DinhDien - HoangKiem + DinhDien + HoangKiem 88–95 W03-0316 dien-kiem-2003-pos Acquisition of <fixed-case>E</fixed-case>nglish-<fixed-case>C</fixed-case>hinese Transliterated Word Pairs from Parallel-Aligned Texts using a Statistical Machine Transliteration Model - Chun-JenLee - Jason S.Chang + Chun-JenLee + Jason S.Chang 96–103 W03-0317 lee-chang-2003-acquisition @@ -383,7 +383,7 @@ Input Sentence Splitting and Translating TakaoDoi - EiichiroSumita + EiichiroSumita 104–110 W03-0318 doi-sumita-2003-input @@ -427,7 +427,7 @@ A model of syntactic disambiguation based on lexicalized grammars YusukeMiyao - Jun’ichiTsujii + Jun’ichiTsujii 1–8 W03-0401 miyao-tsujii-2003-model @@ -435,7 +435,7 @@ An <fixed-case>SVM</fixed-case>-based voting algorithm with application to parse reranking LibinShen - Aravind K.Joshi + Aravind K.Joshi 9–16 W03-0402 shen-joshi-2003-svm @@ -450,8 +450,8 @@ Learning subjective nouns using extraction pattern bootstrapping - EllenRiloff - JanyceWiebe + EllenRiloff + JanyceWiebe TheresaWilson 25–32 W03-0404 @@ -459,7 +459,7 @@ Unsupervised Personal Name Disambiguation - GideonMann + GideonMann DavidYarowsky 33–40 W03-0405 @@ -476,7 +476,7 @@ Bootstrapping <fixed-case>POS</fixed-case>-taggers using unlabelled data StephenClark - JamesCurran + JamesCurran MilesOsborne 49–55 W03-0407 @@ -485,8 +485,8 @@ Updating an <fixed-case>NLP</fixed-case> system to fit new domains: an empirical study on the sentence segmentation problem TongZhang - FredDamerau - DavidJohnson + FredDamerau + DavidJohnson 56–62 W03-0408 zhang-etal-2003-updating @@ -494,7 +494,7 @@ Exceptionality and Natural Language Learning MihaiRotaru - Diane J.Litman + Diane J.Litman 63–70 W03-0409 rotaru-litman-2003-exceptionality @@ -510,7 +510,7 @@ Preposition Semantic Classification via Treebank and <fixed-case>F</fixed-case>rame<fixed-case>N</fixed-case>et TomO’Hara - JanyceWiebe + JanyceWiebe 79–86 W03-0411 ohara-wiebe-2003-preposition @@ -551,7 +551,7 @@ An efficient clustering algorithm for class-based language models TakuyaMatsuzaki YusukeMiyao - Jun’ichiTsujii + Jun’ichiTsujii 119–126 W03-0416 matsuzaki-etal-2003-efficient @@ -559,14 +559,14 @@ Training a Naive <fixed-case>B</fixed-case>ayes Classifier via the <fixed-case>EM</fixed-case> Algorithm with a Class Distribution Constraint YoshimasaTsuruoka - Jun’ichiTsujii + Jun’ichiTsujii 127–134 W03-0417 tsuruoka-tsujii-2003-training Identifying Events using Similarity and Context - Dominic R.Jones + Dominic R.Jones Cynthia A.Thompson 135–141 W03-0418 @@ -574,7 +574,7 @@ Introduction to the <fixed-case>C</fixed-case>o<fixed-case>NLL</fixed-case>-2003 Shared Task: Language-Independent Named Entity Recognition - Erik F.Tjong Kim Sang + Erik F.Tjong Kim Sang FienDe Meulder 142–147 W03-0419 @@ -583,8 +583,8 @@ Maximum Entropy Models for Named Entity Recognition OliverBender - Franz JosefOch - HermannNey + Franz JosefOch + HermannNey 148–151 W03-0420 bender-etal-2003-maximum @@ -592,8 +592,8 @@ A Simple Named Entity Extractor using <fixed-case>A</fixed-case>da<fixed-case>B</fixed-case>oost XavierCarreras - LluísMàrquez - LluísPadró + LluísMàrquez + LluísPadró 152–155 W03-0421 carreras-etal-2003-simple @@ -601,8 +601,8 @@ Learning a Perceptron-Based Named Entity Chunker via Online Recognition Feedback XavierCarreras - LluísMàrquez - LluísPadró + LluísMàrquez + LluísPadró 156–159 W03-0422 carreras-etal-2003-learning @@ -617,7 +617,7 @@ Language Independent <fixed-case>NER</fixed-case> using a Maximum Entropy Tagger - JamesCurran + JamesCurran StephenClark 164–167 W03-0424 @@ -625,9 +625,9 @@ Named Entity Recognition through Classifier Combination - RaduFlorian - AbeIttycheriah - HongyanJing + RaduFlorian + AbeIttycheriah + HongyanJing TongZhang 168–171 W03-0425 @@ -643,7 +643,7 @@ Memory-based one-step named-entity recognition: Effects of seed list features, classifier stacking, and unannotated data IrisHendrickx - Antalvan den Bosch + Antalvan den Bosch 176–179 W03-0427 hendrickx-van-den-bosch-2003-memory @@ -653,7 +653,7 @@ DanKlein JosephSmarr HuyNguyen - Christopher D.Manning + Christopher D.Manning 180–183 W03-0428 klein-etal-2003-named @@ -662,7 +662,7 @@ Named Entity Recognition using Hundreds of Thousands of Features JamesMayfield PaulMcNamee - ChristinePiatko + ChristinePiatko 184–187 W03-0429 mayfield-etal-2003-named @@ -679,7 +679,7 @@ Meta-Learning Orthographic and Contextual Models for Language Independent Named Entity Recognition RobertMunro DarenLer - JonPatrick + JonPatrick 192–195 W03-0431 munro-etal-2003-meta @@ -687,7 +687,7 @@ Named Entity Recognition Using a Character-based Probabilistic Approach CaseyWhitelaw - JonPatrick + JonPatrick 196–199 W03-0432 whitelaw-patrick-2003-named @@ -704,7 +704,7 @@ A Robust Risk Minimization based Named Entity Recognition System TongZhang - DavidJohnson + DavidJohnson 204–207 W03-0434 zhang-johnson-2003-robust @@ -712,7 +712,7 @@ Memory-Based Named Entity Recognition using Unannotated Data FienDe Meulder - WalterDaelemans + WalterDaelemans 208–211 W03-0435 de-meulder-daelemans-2003-memory @@ -730,9 +730,9 @@ Hedge Trimmer: A Parse-and-Trim Approach to Headline Generation - BonnieDorr + BonnieDorr DavidZajic - RichardSchwartz + RichardSchwartz 1–8 W03-0501 dorr-etal-2003-hedge @@ -740,7 +740,7 @@ Sub-event based multi-document summarization NaomiDaniel - DragomirRadev + DragomirRadev TimothyAllison 9–16 W03-0502 @@ -751,14 +751,14 @@ AmardeepGrewal TimothyAllison StankoDimitrov - DragomirRadev + DragomirRadev 17–24 W03-0503 grewal-etal-2003-multi Summarization of Noisy Documents: A Pilot Study - HongyanJing + HongyanJing DanielLopresti ChilinShih 25–32 @@ -778,7 +778,7 @@ A Study for Document Summarization Based on Personal Annotation HaiqinZhang ZhengChen - Wei-yingMa + Wei-yingMa QingshengCai 41–48 W03-0506 @@ -786,7 +786,7 @@ Text Summarization Challenge 2 - Text summarization evaluation at <fixed-case>NTCIR</fixed-case> Workshop 3 - ManabuOkumura + ManabuOkumura TakahiroFukusima HidetsuguNanba 49–56 @@ -795,7 +795,7 @@ Examining the consensus between human summaries: initial experiments with factoid analysis - Hansvan Halteren + Hansvan Halteren SimoneTeufel 57–64 W03-0508 @@ -811,8 +811,8 @@ The Potential and Limitations of Automatic Sentence Extraction for Summarization - Chin-YewLin - EduardHovy + Chin-YewLin + EduardHovy 73–80 W03-0510 lin-hovy-2003-potential @@ -832,7 +832,7 @@ Word Sense Disambiguation with Pictures KobusBarnard MatthewJohnson - DavidForsyth + DavidForsyth 1–5 W03-0601 barnard-etal-2003-word @@ -840,8 +840,8 @@ Words and Pictures in the News JaetyEdwards - RyanWhite - DavidForsyth + RyanWhite + DavidForsyth 6–13 W03-0602 edwards-etal-2003-words @@ -849,7 +849,7 @@ Understanding Complex Visually Referring Utterances PeterGorniak - DebRoy + DebRoy 14–21 W03-0603 gorniak-roy-2003-understanding @@ -907,7 +907,7 @@ Conversational Robots: Building Blocks for Grounding Word Meaning - DebRoy + DebRoy Kai-YuhHsiao NikolaosMavridis 70–77 @@ -917,7 +917,7 @@ Learning the Meaning and Usage of Time Phrases from a Parallel Text-Data Corpus EhudReiter - SomayajuluSripada + SomayajuluSripada 78–85 W03-0611 reiter-sripada-2003-learning @@ -933,7 +933,7 @@ Learning Word Meanings and Descriptive Parameter Spaces from Music BrianWhitman - DebRoy + DebRoy BarryVercoe 92–99 W03-0613 @@ -952,7 +952,7 @@ Combining Semantic and Temporal Constraints for Multimodal Integration in Conversation Systems - Joyce Y.Chai + Joyce Y.Chai PengyuHong Michelle X.Zhou 1–3 @@ -961,8 +961,8 @@ Conceptual Language Models for Dialog Systems - RenatoDe Mori - FredericBéchet + RenatoDe Mori + FredericBéchet 4–6 W03-0702 de-mori-bechet-2003-conceptual @@ -970,7 +970,7 @@ Directions For Multi-Party Human-Computer Interaction Research KatrinKirchhoff - MariOstendorf + MariOstendorf 7–9 W03-0703 kirchhoff-ostendorf-2003-directions @@ -978,7 +978,7 @@ Dialogue Management for an Automated Multilingual Call Center HildaHardy - TomekStrzalkowski + TomekStrzalkowski MinWu 10–12 W03-0704 @@ -988,7 +988,7 @@ Dialogue complexity with portability? Research directions for the Information State approach CarlBurke ChristyDoran - AbigailGertner + AbigailGertner AndyGregorowicz LisaHarper JoelKorb @@ -1000,15 +1000,15 @@ The Pragmatics of Taking a Spoken Language System Out of the Laboratory Jody J.Daniels - Helen WrightHastie + Helen WrightHastie 16–18 W03-0706 daniels-hastie-2003-pragmatics Flexible and Personalizable Mixed-Initiative Dialogue Systems - JamesGlass - StephanieSeneff + JamesGlass + StephanieSeneff 19–21 W03-0707 glass-seneff-2003-flexible @@ -1026,16 +1026,16 @@ The Talent System: <fixed-case>TEXTRACT</fixed-case> Architecture and Data Model - Mary S.Neff + Mary S.Neff Roy J.Byrd - Branimir K.Boguraev + Branimir K.Boguraev 1–8 W03-0801 neff-etal-2003-talent <fixed-case>WHAT</fixed-case>: An <fixed-case>XSLT</fixed-case>-based Infrastructure for the Integration of Natural Language Processing Components - UlrichSchäfer + UlrichSchäfer 9–16 W03-0802 schafer-2003-xslt @@ -1043,18 +1043,18 @@ <fixed-case>OLLIE</fixed-case>: On-Line Learning for Information Extraction ValentinTablan - KalinaBontcheva + KalinaBontcheva DianaMaynard - HamishCunningham + HamishCunningham 17–24 W03-0803 tablan-etal-2003-ollie International Standard for a Linguistic Annotation Framework - NancyIde - LaurentRomary - Ericde la Clergerie + NancyIde + LaurentRomary + Ericde la Clergerie 25–30 W03-0804 ide-etal-2003-international @@ -1069,24 +1069,24 @@ Blueprint for a High Performance <fixed-case>NLP</fixed-case> Infrastructure - James R.Curran + James R.Curran 39–44 W03-0806 curran-2003-blueprint Current Issues in Software Engineering for Natural Language Processing - JochenLeidner + JochenLeidner 45–50 W03-0807 leidner-2003-current <fixed-case>I</fixed-case>nfo<fixed-case>X</fixed-case>tract: A Customizable Intermediate Level Information Extraction Engine - Rohini K.Srihari + Rohini K.Srihari WeiLi ChengNiu - ThomasCornell + ThomasCornell 51–58 W03-0808 srihari-etal-2003-infoxtract @@ -1121,7 +1121,7 @@ <fixed-case>SDL</fixed-case>—<fixed-case>A</fixed-case> Description Language for Building <fixed-case>NLP</fixed-case> Systems - Hans-UlrichKrieger + Hans-UlrichKrieger 83–90 W03-0812 krieger-2003-sdl @@ -1140,7 +1140,7 @@ A knowledge-driven approach to text meaning processing PeterClark - PhilHarrison + PhilHarrison JohnThompson 1–6 W03-0901 @@ -1148,7 +1148,7 @@ Extracting and evaluating general world knowledge from the Brown Corpus - LenhartSchubert + LenhartSchubert MatthewTong 7–13 W03-0902 @@ -1168,7 +1168,7 @@ Operative strategies in ontological semantics - SergeiNirenburg + SergeiNirenburg MarjorieMcShane StephenBeale 22–29 @@ -1178,8 +1178,8 @@ The genesis of a script for bankruptcy in ontological semantics VictorRaskin - SergeiNirenburg - Christian F.Hempelmann + SergeiNirenburg + Christian F.Hempelmann InnaNirenburg Katrina E.Triezenberg 30–37 @@ -1190,9 +1190,9 @@ Entailment, intensionality and text understanding CleoCondoravdi DickCrouch - Valeriade Paiva + Valeriade Paiva ReinhardStolle - Daniel G.Bobrow + Daniel G.Bobrow 38–45 W03-0906 condoravdi-etal-2003-entailment @@ -1208,8 +1208,8 @@ Towards light semantic processing for question answering BenjaminVan Durme YifenHuang - AnnaKupść - EricNyberg + AnnaKupść + EricNyberg 54–61 W03-0908 van-durme-etal-2003-towards @@ -1224,7 +1224,7 @@ Deriving verb-meaning clusters from syntactic structure PaulKingsbury - KarinKipper + KarinKipper 70–77 W03-0910 kingsbury-kipper-2003-deriving @@ -1242,7 +1242,7 @@ A Projection Extension Algorithm for Statistical Machine Translation - ChristophTillmann + ChristophTillmann 1–8 W03-1001 tillmann-2003-projection @@ -1258,7 +1258,7 @@ Cross-Lingual Lexical Triggers in Statistical Language Modeling WoosungKim - SanjeevKhudanpur + SanjeevKhudanpur 17–24 W03-1003 kim-khudanpur-2003-cross @@ -1266,7 +1266,7 @@ Sentence Alignment for Monolingual Comparable Corpora ReginaBarzilay - NoemieElhadad + NoemieElhadad 25–32 W03-1004 barzilay-elhadad-2003-sentence @@ -1282,7 +1282,7 @@ Use of Deep Linguistic Features for the Recognition and Labeling of Semantic Arguments JohnChen - OwenRambow + OwenRambow 41–48 W03-1006 chen-rambow-2003-use @@ -1291,7 +1291,7 @@ Maximum Entropy Models for <fixed-case>F</fixed-case>rame<fixed-case>N</fixed-case>et Classification MichaelFleischman NamheeKwon - EduardHovy + EduardHovy 49–56 W03-1007 fleischman-etal-2003-maximum @@ -1314,7 +1314,7 @@ A Plethora of Methods for Learning <fixed-case>E</fixed-case>nglish Countability - TimothyBaldwin + TimothyBaldwin FrancisBond 73–80 W03-1010 @@ -1323,7 +1323,7 @@ A General Framework for Distributional Similarity JulieWeeds - DavidWeir + DavidWeir 81–88 W03-1011 weeds-weir-2003-general @@ -1332,7 +1332,7 @@ Using <fixed-case>LTAG</fixed-case> Based Features in Parse Reranking LibinShen AnoopSarkar - AravindJoshi + AravindJoshi 89–96 W03-1012 shen-etal-2003-using @@ -1340,15 +1340,15 @@ Log-Linear Models for Wide-Coverage <fixed-case>CCG</fixed-case> Parsing StephenClark - JamesCurran + JamesCurran 97–104 W03-1013 clark-curran-2003-log Learning Extraction Patterns for Subjective Expressions - EllenRiloff - JanyceWiebe + EllenRiloff + JanyceWiebe 105–112 W03-1014 riloff-wiebe-2003-learning @@ -1356,15 +1356,15 @@ Bootstrapping Coreference Classifiers with Multiple Machine Learning Algorithms VincentNg - ClaireCardie + ClaireCardie 113–120 W03-1015 ng-cardie-2003-bootstrapping Statistical Acquisition of Content Selection Rules for Natural Language Generation - Pablo ArielDuboue - Kathleen R.McKeown + Pablo ArielDuboue + Kathleen R.McKeown 121–128 W03-1016 duboue-mckeown-2003-statistical @@ -1379,8 +1379,8 @@ Evaluation and Extension of Maximum Entropy Models with Inequality Constraints - Jun’ichiKazama - Jun’ichiTsujii + Jun’ichiKazama + Jun’ichiTsujii 137–144 W03-1018 kazama-tsujii-2003-evaluation @@ -1398,7 +1398,7 @@ A Fast Algorithm for Feature Selection in Conditional Maximum Entropy Modeling YaqianZhou FuliangWeng - LideWu + LideWu HaukeSchmidt 153–159 W03-1020 @@ -1408,7 +1408,7 @@ Training Connectionist Models for the <fixed-case>S</fixed-case>tructured <fixed-case>L</fixed-case>anguage <fixed-case>M</fixed-case>odel PengXu AhmadEmami - FrederickJelinek + FrederickJelinek 160–167 W03-1021 xu-etal-2003-training @@ -1423,7 +1423,7 @@ Using the Web in Machine Learning for Other-Anaphora Resolution - Natalia N.Modjeska + Natalia N.Modjeska KatjaMarkert MalvinaNissim 176–183 @@ -1440,16 +1440,16 @@ A Maximum Entropy <fixed-case>C</fixed-case>hinese Character-Based Parser - XiaoqiangLuo + XiaoqiangLuo 192–199 W03-1025 luo-2003-maximum <fixed-case>H</fixed-case>owtogeta<fixed-case>C</fixed-case>hinese<fixed-case>N</fixed-case>ame(<fixed-case>E</fixed-case>ntity): Segmentation and Combination Issues - HongyanJing - RaduFlorian - XiaoqiangLuo + HongyanJing + RaduFlorian + XiaoqiangLuo TongZhang AbrahamIttycheriah 200–207 @@ -1486,7 +1486,7 @@ Improving Summarization Performance by Sentence Compression — A Pilot Study - Chin-YewLin + Chin-YewLin 10.3115/1118935.1118936 1–8 W03-1101 @@ -1524,7 +1524,7 @@ <fixed-case>P</fixed-case>oisson Naive <fixed-case>B</fixed-case>ayes for Text Classification with Feature Weighting Sang-BumKim Hee-CheolSeo - Hae-ChangRim + Hae-ChangRim 10.3115/1118935.1118940 33–40 W03-1105 @@ -1533,7 +1533,7 @@ Text Classification in <fixed-case>A</fixed-case>sian Languages without Word Segmentation FuchunPeng - XiangjiHuang + XiangjiHuang DaleSchuurmans ShaojunWang 10.3115/1118935.1118941 @@ -1545,7 +1545,7 @@ Feature Selection in Categorizing Procedural Expressions MinekiTakechi TakenobuTokunaga - YujiMatsumoto + YujiMatsumoto HozumiTanaka 10.3115/1118935.1118942 49–56 @@ -1556,7 +1556,7 @@ Learning Bilingual Translations from Comparable Corpora to Cross-Language Information Retrieval: Hybrid Statistics-based and Linguistics-based Approach FatihaSadat MasatoshiYoshikawa - ShunsukeUemura + ShunsukeUemura 10.3115/1118935.1118943 57–64 W03-1108 @@ -1576,7 +1576,7 @@ Issues in Pre- and Post-translation Document Expansion: Untranslatable Cognates and Missegmented Words - Gina-AnneLevow + Gina-AnneLevow 10.3115/1118935.1118945 77–83 W03-1110 @@ -1594,7 +1594,7 @@ <fixed-case>A</fixed-case>ny<fixed-case>Q</fixed-case>: Answer Set based Information Retrieval System Hyo-JungOh - Myung-GilJang + Myung-GilJang Moon-SooChang 10.3115/1118935.1118947 92–99 @@ -1615,7 +1615,7 @@ Improving Document Clustering by Utilizing Meta-Data - Kam-FaiWong + Kam-FaiWong Nam-KiuChan Kam-LaiWong 10.3115/1118935.1118949 @@ -1637,7 +1637,7 @@ Extraction of User Preferences from a Few Positive Documents Byeong ManKim QingLi - Jong WanKim + Jong WanKim 10.3115/1118935.1118951 124–131 W03-1116 @@ -1654,8 +1654,8 @@ Text Categorization Using Automatically Acquired Domain Ontology Shih-HungWu - Tzong-HanTsai - Wen-LianHsu + Tzong-HanTsai + Wen-LianHsu 10.3115/1118935.1118953 138–145 W03-1118 @@ -1663,7 +1663,7 @@ A Sentence Reduction using Syntax Control - Minh LeNguyen + Minh LeNguyen SusumuHoriguchi 10.3115/1118935.1118954 146–152 @@ -1675,7 +1675,7 @@ FuminoriKimura AkiraMaeda MasatoshiYoshikawa - ShunsukeUemura + ShunsukeUemura 10.3115/1118935.1118955 153–160 W03-1120 @@ -1685,7 +1685,7 @@ <fixed-case>K</fixed-case>orean Named Entity Recognition using <fixed-case>HMM</fixed-case> and <fixed-case>C</fixed-case>o<fixed-case>T</fixed-case>raining Model EuisokChung Yi-GyuHwang - Myung-GilJang + Myung-GilJang 10.3115/1118935.1118956 161–167 W03-1121 @@ -1693,7 +1693,7 @@ Question-Answering Based on Virtually Integrated Lexical Knowledge Base - Key-SunChoi + Key-SunChoi Jae-HoKim MasaruMiyazaki JunGoto @@ -1723,7 +1723,7 @@ ApurvaJadhav AshutoshJoshi SoumenChakrabarti - PushpakBhattacharyya + PushpakBhattacharyya 10.3115/1119312.1119313 1–10 W03-1201 @@ -1733,7 +1733,7 @@ Using Thematic Information in Statistical Headline Generation StephenWan MarkDras - CécileParis + CécileParis RobertDale 10.3115/1119312.1119314 11–20 @@ -1744,7 +1744,7 @@ Combining Optimal Clustering and Hidden <fixed-case>M</fixed-case>arkov Models for Extractive Summarization PascaleFung GraceNgai - Chi-ShunCheung + Chi-ShunCheung 10.3115/1119312.1119315 21–28 W03-1203 @@ -1762,7 +1762,7 @@ An Evolutionary Approach for Improving the Quality of Automatic Summaries - ConstantinOrasan + ConstantinOrasan 10.3115/1119312.1119317 37–45 W03-1205 @@ -1770,10 +1770,10 @@ <fixed-case>HITIQA</fixed-case>: An Interactive Question Answering System: A Preliminary Report - SharonSmall + SharonSmall TingLiu NobuyukiShimizu - TomekStrzalkowski + TomekStrzalkowski 10.3115/1119312.1119318 46–53 W03-1206 @@ -1781,9 +1781,9 @@ Discovery of Manner Relations and Their Applicability to Question Answering - RoxanaGirju + RoxanaGirju ManjuPutcha - DanMoldovan + DanMoldovan 10.3115/1119312.1119319 54–60 W03-1207 @@ -1803,8 +1803,8 @@ Statistical <fixed-case>QA</fixed-case> - Classifier vs. Re-ranker: What’s the difference? DeepakRavichandran - EduardHovy - Franz JosefOch + EduardHovy + Franz JosefOch 10.3115/1119312.1119321 69–75 W03-1209 @@ -1812,7 +1812,7 @@ Automatic Detection of Causal Relations for Question Answering - RoxanaGirju + RoxanaGirju 10.3115/1119312.1119322 76–83 W03-1210 @@ -1821,7 +1821,7 @@ Question Answering on a Case Insensitive Corpus WeiLi - RohiniSrihari + RohiniSrihari ChengNiu XiaogeLi 10.3115/1119312.1119323 @@ -1846,8 +1846,8 @@ Gene Name Extraction Using <fixed-case>F</fixed-case>ly<fixed-case>B</fixed-case>ase Resources AlexMorgan - LynetteHirschman - AlexanderYeh + LynetteHirschman + AlexanderYeh MarcColosimo 10.3115/1118958.1118959 1–8 @@ -1869,8 +1869,8 @@ Using Domain-Specific Verbs for Term Classification - IrenaSpasic - GoranNenadic + IrenaSpasic + GoranNenadic SophiaAnaniadou 10.3115/1118958.1118961 17–24 @@ -1879,7 +1879,7 @@ Enhancing Performance of Protein Name Recognizers Using Collocation - Wen-JuanHou + Wen-JuanHou Hsin-HsiChen 10.3115/1118958.1118962 25–32 @@ -1890,7 +1890,7 @@ Two-Phase Biomedical <fixed-case>NE</fixed-case> Recognition based on <fixed-case>SVM</fixed-case>s Ki-JoongLee Young-SookHwang - Hae-ChangRim + Hae-ChangRim 10.3115/1118958.1118963 33–40 W03-1305 @@ -1899,7 +1899,7 @@ Boosting Precision and Recall of Dictionary-Based Protein Name Recognition YoshimasaTsuruoka - Jun’ichiTsujii + Jun’ichiTsujii 10.3115/1118958.1118964 41–48 W03-1306 @@ -1909,9 +1909,9 @@ Effective Adaptation of Hidden <fixed-case>M</fixed-case>arkov Model-based Named Entity Recognizer for Biomedical Domain DanShen JieZhang - GuodongZhou + GuodongZhou JianSu - Chew-LimTan + Chew-LimTan 10.3115/1118958.1118965 49–56 W03-1307 @@ -1929,9 +1929,9 @@ Protein Name Tagging for Biomedical Annotation in Text KaoruYamamoto - TakuKudo + TakuKudo AkihikoKonagaya - YujiMatsumoto + YujiMatsumoto 10.3115/1118958.1118967 65–72 W03-1309 @@ -1950,11 +1950,11 @@ Extracting Information on Pneumonia in Infants Using Natural Language Processing of Radiology Reports - Eneida A.Mendonca + Eneida A.Mendonca JanetHaas LyudmilaShagina ElaineLarson - CarolFriedman + CarolFriedman 10.3115/1118958.1118969 81–88 W03-1311 @@ -1962,9 +1962,9 @@ Identification of Patients with Congestive Heart Failure using a Binary Classifier: A Case Study. - Serguei V.Pakhomov + Serguei V.Pakhomov JamesBuntrock - Christopher G.Chute + Christopher G.Chute 10.3115/1118958.1118970 89–96 W03-1312 @@ -1972,11 +1972,11 @@ Encoding Biomedical Resources in <fixed-case>TEI</fixed-case>: The Case of the <fixed-case>GENIA</fixed-case> Corpus - TomazErjavec + TomazErjavec Jin-DongKim TomokoOhta - YukaTateisi - Jun’ichiTsujii + YukaTateisi + Jun’ichiTsujii 10.3115/1118958.1118971 97–104 W03-1313 @@ -1985,7 +1985,7 @@ Exploring Adjectival Modification in Biomedical Discourse Across Two Genres OlivierBodenreider - Serguei V.Pakhomov + Serguei V.Pakhomov 10.3115/1118958.1118972 105–112 W03-1314 @@ -1995,7 +1995,7 @@ An Investigation of Various Information Sources for Classifying Biological names ManabuTorii SachinKamboj - K.Vijay-Shanker + K.Vijay-Shanker 10.3115/1118958.1118973 113–120 W03-1315 @@ -2003,9 +2003,9 @@ Selecting Text Features for Gene Name Classification: from Documents to Terms - GoranNenadic + GoranNenadic SimonRice - IrenaSpasic + IrenaSpasic SophiaAnaniadou BenjaminStapley 10.3115/1118958.1118974 @@ -2029,7 +2029,7 @@ Metonymy as a Cross-lingual Phenomenon - WimPeters + WimPeters 10.3115/1118975.1118976 1–9 W03-1401 @@ -2046,7 +2046,7 @@ Is There a Way to Represent Metaphors in <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>ets? Insights from the <fixed-case>H</fixed-case>amburg Metaphor Database - BirteLönneker + BirteLönneker 10.3115/1118975.1118978 18–27 W03-1403 @@ -2063,7 +2063,7 @@ Conceptual Metaphors: Ontology-based Representation and Corpora Driven Mapping Principles KathleenAhrens - Siaw FongChung + Siaw FongChung Chu-RenHuang 10.3115/1118975.1118980 36–42 @@ -2072,7 +2072,7 @@ Let’s Paint the Town Red for a Few Hours: Composition of Aspect in Idioms - Sheila R.Glasbey + Sheila R.Glasbey 10.3115/1118975.1118981 43–49 W03-1406 @@ -2121,8 +2121,8 @@ Automatic Extraction of Named Entity Translingual Equivalence Based on Multi-Feature Cost Minimization FeiHuang - StephanVogel - AlexWaibel + StephanVogel + AlexWaibel 10.3115/1119384.1119386 9–16 W03-1502 @@ -2131,7 +2131,7 @@ Construction and Analysis of <fixed-case>J</fixed-case>apanese-<fixed-case>E</fixed-case>nglish Broadcast News Corpus with Named Entity Tags TadashiKumano - HidekiKashioka + HidekiKashioka HidekiTanaka TakahiroFukusima 10.3115/1119384.1119387 @@ -2141,10 +2141,10 @@ Low-cost Named Entity Classification for <fixed-case>C</fixed-case>atalan: Exploiting Multilingual Resources and Unlabeled Data - LluísMàrquez - Adriàde Gispert + LluísMàrquez + Adriàde Gispert XavierCarreras - LluísPadró + LluísPadró 10.3115/1119384.1119388 25–32 W03-1504 @@ -2154,7 +2154,7 @@ <fixed-case>NE</fixed-case> Recognition Without Training Data on a Language You Don’t Speak DianaMaynard ValentinTablan - HamishCunningham + HamishCunningham 10.3115/1119384.1119389 33–40 W03-1505 @@ -2171,7 +2171,7 @@ Multilingual Resources for Entity Extraction - StephanieStrassel + StephanieStrassel AlexisMitchell 10.3115/1119384.1119391 49–56 @@ -2181,7 +2181,7 @@ Transliteration of Proper Names in Cross-Lingual Information Retrieval PaolaVirga - SanjeevKhudanpur + SanjeevKhudanpur 10.3115/1119384.1119392 57–64 W03-1508 @@ -2214,8 +2214,8 @@ Generation of Single-sentence Paraphrases from Predicate/Argument Structure using Lexico-grammatical Resources RaymondKozlowski - Kathleen F.McCoy - K.Vijay-Shanker + Kathleen F.McCoy + K.Vijay-Shanker 10.3115/1118984.1118985 1–8 W03-1601 @@ -2249,7 +2249,7 @@ JamesDowdall KaarelKaljurand MichaelHess - DiegoMollá + DiegoMollá 10.3115/1118984.1118988 25–32 W03-1604 @@ -2356,7 +2356,7 @@ Unsupervised Training for Overlapping Ambiguity Resolution in <fixed-case>C</fixed-case>hinese Word Segmentation MuLi JianfengGao - Chang-NingHuang + Chang-NingHuang JianfengLi 10.3115/1119250.1119251 1–7 @@ -2366,7 +2366,7 @@ Class Based Sense Definition Model for Word Sense Tagging and Disambiguation TracyLin - Jason S.Chang + Jason S.Chang 10.3115/1119250.1119252 8–15 W03-1702 @@ -2375,7 +2375,7 @@ Utterance Segmentation Using Combined Approach Based on Bi-directional N-gram and Maximum Entropy DingLiu - ChengqingZong + ChengqingZong 10.3115/1119250.1119253 16–23 W03-1703 @@ -2392,8 +2392,8 @@ A Bottom-up Merging Algorithm for <fixed-case>C</fixed-case>hinese Unknown Word Extraction - Wei-YunMa - Keh-JiannChen + Wei-YunMa + Keh-JiannChen 10.3115/1119250.1119255 31–38 W03-1705 @@ -2402,7 +2402,7 @@ The Effect of Rhythm on Structural Disambiguation in <fixed-case>C</fixed-case>hinese HonglinSun - DanJurafsky + DanJurafsky 10.3115/1119250.1119256 39–46 W03-1706 @@ -2411,7 +2411,7 @@ Annotating the Propositions in the <fixed-case>P</fixed-case>enn <fixed-case>C</fixed-case>hinese Treebank NianwenXue - MarthaPalmer + MarthaPalmer 10.3115/1119250.1119257 47–54 W03-1707 @@ -2431,7 +2431,7 @@ <fixed-case>C</fixed-case>hinese Lexical Analysis Using Hierarchical Hidden <fixed-case>M</fixed-case>arkov Model Hua-PingZhang QunLiu - Xue-QiCheng + Xue-QiCheng HaoZhang Hong-KuiYu 10.3115/1119250.1119259 @@ -2441,7 +2441,7 @@ Modeling of Long Distance Context Dependency in <fixed-case>C</fixed-case>hinese - GuoDongZhou + GuoDongZhou 10.3115/1119250.1119260 71–77 W03-1710 @@ -2449,7 +2449,7 @@ A <fixed-case>C</fixed-case>hinese Efficient Analyser Integrating Word Segmentation, Part-Of-Speech Tagging, Partial Parsing and Full Parsing - GuoDongZhou + GuoDongZhou JianSu 10.3115/1119250.1119261 78–83 @@ -2520,7 +2520,7 @@ XiaodanZhu MuLi JianfengGao - Chang-NingHuang + Chang-NingHuang 10.3115/1119250.1119268 125–132 W03-1718 @@ -2528,7 +2528,7 @@ The First International <fixed-case>C</fixed-case>hinese Word Segmentation Bakeoff - RichardSproat + RichardSproat ThomasEmerson 10.3115/1119250.1119269 133–143 @@ -2538,9 +2538,9 @@ Combining Segmenter and Chunker for <fixed-case>C</fixed-case>hinese Word Segmentation MasayukiAsahara - Chooi LingGoh + Chooi LingGoh XiaojieWang - YujiMatsumoto + YujiMatsumoto 10.3115/1119250.1119270 144–147 W03-1720 @@ -2558,7 +2558,7 @@ <fixed-case>C</fixed-case>hinese Word Segmentation at Peking University DuanHuiming BaiXiaojing - ChangBaobao + BaobaoChang YuShiwen 10.3115/1119250.1119272 152–155 @@ -2576,9 +2576,9 @@ Integrating Ngram Model and Case-based Learning for <fixed-case>C</fixed-case>hinese Word Segmentation - ChunyuKit - ZhimingXu - Jonathan J.Webster + ChunyuKit + ZhimingXu + Jonathan J.Webster 10.3115/1119250.1119274 160–163 W03-1724 @@ -2599,8 +2599,8 @@ Introduction to <fixed-case>CKIP</fixed-case> <fixed-case>C</fixed-case>hinese Word Segmentation System for the First International <fixed-case>C</fixed-case>hinese Word Segmentation Bakeoff - Wei-YunMa - Keh-JiannChen + Wei-YunMa + Keh-JiannChen 10.3115/1119250.1119276 168–171 W03-1726 @@ -2627,7 +2627,7 @@ <fixed-case>SYSTRAN</fixed-case>’s <fixed-case>C</fixed-case>hinese Word Segmentation JinYang JeanSenellart - RemiZajac + RemiZajac 10.3115/1119250.1119279 180–183 W03-1729 @@ -2637,7 +2637,7 @@ <fixed-case>HHMM</fixed-case>-based <fixed-case>C</fixed-case>hinese Lexical Analyzer <fixed-case>ICTCLAS</fixed-case> Hua-PingZhang Hong-KuiYu - De-YiXiong + De-YiXiong QunLiu 10.3115/1119250.1119280 184–187 @@ -2646,7 +2646,7 @@ Chunking-based <fixed-case>C</fixed-case>hinese Word Tokenization - GuoDongZhou + GuoDongZhou 10.3115/1119250.1119281 188–191 W03-1731 @@ -2670,7 +2670,7 @@ Complex Structuring of Term Variants for Question Answering JamesDowdall FabioRinaldi - FideliaIbekwe-SanJuan + FideliaIbekwe-SanJuan EricSanJuan 10.3115/1119282.1119283 1–8 @@ -2679,7 +2679,7 @@ Conceptual Structuring through Term Variations - BéatriceDaille + BéatriceDaille 10.3115/1119282.1119284 9–16 W03-1802 @@ -2688,7 +2688,7 @@ Noun-Noun Compound Machine Translation A Feasibility Study on Shallow Processing TakaakiTanaka - TimothyBaldwin + TimothyBaldwin 10.3115/1119282.1119285 17–24 W03-1803 @@ -2697,7 +2697,7 @@ Using Masks, Suffix Array-based Data Structures and Multidimensional Arrays to Compute Positional Ngram Statistics from Corpora AlexandreGil - GaëlDias + GaëlDias 10.3115/1119282.1119286 25–32 W03-1804 @@ -2714,7 +2714,7 @@ Multiword Unit Hybrid Extraction - GaëlDias + GaëlDias 10.3115/1119282.1119288 41–48 W03-1806 @@ -2722,10 +2722,10 @@ Extracting Multiword Expressions with A Semantic Tagger - Scott S. L.Piao + Scott S. L.Piao PaulRayson DawnArcher - AndrewWilson + AndrewWilson TonyMcEnery 10.3115/1119282.1119289 49–56 @@ -2743,7 +2743,7 @@ A Statistical Approach to the Semantics of Verb-Particles ColinBannard - TimothyBaldwin + TimothyBaldwin AlexLascarides 10.3115/1119282.1119291 65–72 @@ -2752,9 +2752,9 @@ Detecting a Continuum of Compositionality in Phrasal Verbs - DianaMcCarthy + DianaMcCarthy BillKeller - JohnCarroll + JohnCarroll 10.3115/1119282.1119292 73–80 W03-1810 @@ -2771,7 +2771,7 @@ An Empirical Model of Multiword Expression Decomposability - TimothyBaldwin + TimothyBaldwin ColinBannard TakaakiTanaka DominicWiddows @@ -2782,7 +2782,7 @@ Licensing Complex Prepositions via Lexical Constraints - BeataTrawinski + BeataTrawinski 10.3115/1119282.1119295 97–104 W03-1813 @@ -2804,8 +2804,8 @@ Outline of the International Standard Linguistic Annotation Framework - NancyIde - LaurentRomary + NancyIde + LaurentRomary 10.3115/1119296.1119297 1–5 W03-1901 @@ -2813,7 +2813,7 @@ From Concrete to Virtual Annotation Mark-up Language: The Case of <fixed-case>COMMO</fixed-case>n-<fixed-case>REF</fixed-case>s - RenataVieira + RenataVieira CarolineGasperin RodrigoGoulart SusanneSalmon-Alt @@ -2824,7 +2824,7 @@ Ontology-based Linguistic Annotation - PhilippCimiano + PhilippCimiano SiegfriedHandschuh 10.3115/1119296.1119299 14–21 @@ -2833,10 +2833,10 @@ Putting <fixed-case>F</fixed-case>rame<fixed-case>N</fixed-case>et Data into the <fixed-case>ISO</fixed-case> Linguistic Annotation Framework - SrinivasNarayanan - Miriam R. L.Petruck - Collin F.Baker - Charles J.Fillmore + SrinivasNarayanan + Miriam R. L.Petruck + Collin F.Baker + Charles J.Fillmore 10.3115/1119296.1119300 22–29 W03-1904 @@ -2844,9 +2844,9 @@ <fixed-case>RDF</fixed-case> Instantiation of <fixed-case>ISLE</fixed-case>/<fixed-case>MILE</fixed-case> Lexical Entries - NancyIde + NancyIde AlessandroLenci - NicolettaCalzolari + NicolettaCalzolari 10.3115/1119296.1119301 30–37 W03-1905 @@ -2858,7 +2858,7 @@ JamesDowdall MichaelHess KaarelKaljurand - AndreasPersidis + AndreasPersidis 10.3115/1119296.1119302 38–46 W03-1906 @@ -2893,7 +2893,7 @@ Jean-CharlesLamirel Shadi AlShehabi MartialHoffmann - ClaireFrancois + ClaireFrancois 10.3115/1119303.1119305 7–23 W03-2002 @@ -2940,7 +2940,7 @@ Patent Claim Processing for Readability - Structure Analysis and Term Explanation AkihiroShinmori - ManabuOkumura + ManabuOkumura YuzoMarukawa MakotoIwayama 10.3115/1119303.1119310 @@ -2969,10 +2969,10 @@ Understanding Information Graphics: A Discourse-Level Problem - SandraCarberry + SandraCarberry StephanieElzer - NancyGreen - KathleenMcCoy + NancyGreen + KathleenMcCoy DanielChester 1–12 W03-2101 @@ -2981,7 +2981,7 @@ Annotating Opinions in the World Press TheresaWilson - JanyceWiebe + JanyceWiebe 13–22 W03-2102 wilson-wiebe-2003-annotating @@ -2989,7 +2989,7 @@ Answering Clarification Questions MatthewPurver - Patrick G.T.Healey + Patrick G.T.Healey JamesKing JonathanGinzburg Greg J.Mills @@ -3026,7 +3026,7 @@ FumihiroAdachi ShinichiUeno TatsuyaKawahara - Hiroshi G.Okuno + Hiroshi G.Okuno 87–96 W03-2107 komatani-etal-2003-flexible @@ -3035,7 +3035,7 @@ Building a New <fixed-case>I</fixed-case>nternet Chat System for Sharing Timing Information KanayoOgura TakeshiMasuda - MasatoIshizaki + MasatoIshizaki 97–104 W03-2108 ogura-etal-2003-building @@ -3044,14 +3044,14 @@ Interpreter for Highly Portable Spoken Dialogue System MasamitsuUmeda SatoruKogure - SeiichiNakagawa + SeiichiNakagawa 105–114 W03-2109 umeda-etal-2003-interpreter Spoken Dialogue for Virtual Advisers in a semi-immersive Command and Control environment - DominiqueEstival + DominiqueEstival MichaelBroughton AndrewZschorn ElizabethPronger @@ -3061,8 +3061,8 @@ Using <fixed-case>W</fixed-case>izard-of-<fixed-case>O</fixed-case>z simulations to bootstrap Reinforcement - Learning based dialog management systems - Jason D.Williams - SteveYoung + Jason D.Williams + SteveYoung 135–139 W03-2111 williams-young-2003-using @@ -3081,7 +3081,7 @@ Some empirical findings on dialogue management and domain ontologies in dialogue systems - Implications from an evaluation of <fixed-case>B</fixed-case>ird<fixed-case>Q</fixed-case>uest AnnikaFlycht-Eriksson - ArneJönsson + ArneJönsson 158–167 W03-2113 flycht-eriksson-jonsson-2003-empirical @@ -3099,7 +3099,7 @@ Ontology-based Contextual Coherence Scoring RobertPorzel IrynaGurevych - Christof E.Müller + Christof E.Müller 178–186 W03-2115 porzel-etal-2003-ontology @@ -3114,7 +3114,7 @@ Multi-Level Annotation in <fixed-case>MMAX</fixed-case> - ChristophMüller + ChristophMüller MichaelStrube 198–207 W03-2117 @@ -3122,10 +3122,10 @@ Domain Specific Speech Acts for Spoken Language Translation - LoriLevin + LoriLevin ChadLangley - AlonLavie - DonnaGates + AlonLavie + DonnaGates DorcasWallace KayPeterson 208–217 @@ -3143,7 +3143,7 @@ <fixed-case>PAL</fixed-case>ink<fixed-case>A</fixed-case>: A highly customisable tool for discourse annotation - ConstantinOrăsan + ConstantinOrăsan 39–43 W03-2120 orasan-2003-palinka @@ -3176,18 +3176,18 @@ Learning to Speak to a Spoken Language System: Vocabulary Convergence in Novice Users - Gina-AnneLevow + Gina-AnneLevow 149–153 W03-2124 levow-2003-learning A procedure assistant for astronauts in a functional programming architecture, with step previewing and spoken correction of dialogue moves - GregoryAist - MannyRayner - JohnDowding - Beth AnnHockey - SusanaEarly + GregoryAist + MannyRayner + JohnDowding + Beth AnnHockey + SusanaEarly JimHieronymus 154–157 W03-2125 @@ -3195,8 +3195,8 @@ Dialog Input Ranking in a Multi-Domain Environment Using Transferable Belief Model - Hong-INg - Kim-TengLua + Hong-INg + Kim-TengLua 187–191 W03-2126 ng-lua-2003-dialog @@ -3204,7 +3204,7 @@ Annotating emotion in dialogue RichardCraggs - Mary McGeeWood + Mary McGeeWood 218–225 W03-2127 craggs-wood-2003-annotating @@ -3212,7 +3212,7 @@ Developing a Typology of Dialogue Acts: Some Boundary Problems TiitHennoste - MareKoit + MareKoit AndrielaRääbis KristaStrandson MaretValdisoo @@ -3236,7 +3236,7 @@ Improving Machine Translation Quality with Automatic Named Entity Recognition BogdanBabych - AnthonyHartley + AnthonyHartley W03-2201 babych-hartley-2003-improving @@ -3245,13 +3245,13 @@ RobKoeling AdamKilgarriff DavidTugwell - RogerEvans + RogerEvans W03-2202 koeling-etal-2003-evaluation Two Approaches to Aspect Assignment in an <fixed-case>E</fixed-case>nglish-<fixed-case>P</fixed-case>olish Machine Translation System - AnnaKupsc + AnnaKupsc W03-2203 kupsc-2003-two @@ -3264,14 +3264,14 @@ Parallel Corpora Segmentation Using Anchor Words FranciscoNevado - FranciscoCasacuberta - EnriqueVidal + FranciscoCasacuberta + EnriqueVidal W03-2205 nevado-etal-2003-parallel Computer-based Support for Patients with Limited <fixed-case>E</fixed-case>nglish - HaroldSomers + HaroldSomers HermioneLovel W03-2206 somers-lovel-2003-computer @@ -3289,7 +3289,7 @@
Budapest, Hungary
EhudReiter HelmutHoracek - Keesvan Deemter + Keesvan Deemter April 2003 W03-23 @@ -3301,8 +3301,8 @@ Dynamic Generation of Cooperative Natural Language Responses in <fixed-case>WEBCOOP</fixed-case> - FarahBenamara - PatrickSaint Dizier + FarahBenamara + PatrickSaint Dizier W03-2301 benamara-saint-dizier-2003-dynamic @@ -3314,7 +3314,7 @@
Multilingual Revision - CharlesCallaway + CharlesCallaway W03-2303 callaway-2003-multilingual @@ -3339,7 +3339,7 @@
A New Model for Generating Multimodal Referring Expressions - EmielKrahmer + EmielKrahmer Ielkavan der Sluis W03-2307 krahmer-van-der-sluis-2003-new @@ -3354,14 +3354,14 @@ Phrasal Generator for Describing Relational Database Queries - Michael J.Minock + Michael J.Minock W03-2309 minock-2003-phrasal Porting to an <fixed-case>I</fixed-case>talian Surface Realizer: A Case Study AlessandraNovello - Charles B.Callaway + Charles B.Callaway W03-2310 novello-callaway-2003-porting @@ -3375,7 +3375,7 @@ Acquiring and Using Limited User Models in <fixed-case>NLG</fixed-case> EhudReiter - SomayajuluSripada + SomayajuluSripada SandraWilliams W03-2312 reiter-etal-2003-acquiring @@ -3403,7 +3403,7 @@ Adapting Chart Realization to <fixed-case>CCG</fixed-case> - MichaelWhite + MichaelWhite JasonBaldridge W03-2316 white-baldridge-2003-adapting @@ -3434,19 +3434,19 @@ The <fixed-case>PARC</fixed-case> 700 Dependency Bank - Tracy HollowayKing + Tracy HollowayKing RichardCrouch StefanRiezler MaryDalrymple - Ronald M.Kaplan + Ronald M.Kaplan W03-2401 king-etal-2003-parc Issues in the Syntactic Annotation of <fixed-case>C</fixed-case>ast3<fixed-case>LB</fixed-case> - MontserratCivit + MontserratCivit Ma. AntòniaMartí - BorjaNavarro + BorjaNavarro NúriaBufí BelénFernández RaquelMarcos @@ -3455,7 +3455,7 @@ Practical Annotation Scheme for an <fixed-case>HPSG</fixed-case> Treebank of <fixed-case>B</fixed-case>ulgarian - KirilSimov + KirilSimov PetyaOsenova W03-2403 simov-osenova-2003-practical @@ -3476,9 +3476,9 @@ Automatic Multi-Layer Corpus Annotation for Evaluation Question Answering Methods: <fixed-case>CBC</fixed-case>4<fixed-case>K</fixed-case>ids - Jochen L.Leidner + Jochen L.Leidner TiphaineDalmas - BonnieWebber + BonnieWebber JohanBos ClaireGrover W03-2406 @@ -3493,8 +3493,8 @@ Open Mind Word Expert: Creating Large Annotated Data Collections with Web Users’ Help - RadaMihalcea - TimothyChklovski + RadaMihalcea + TimothyChklovski W03-2408 mihalcea-chklovski-2003-open @@ -3529,7 +3529,7 @@ The Spoken <fixed-case>D</fixed-case>utch Corpus and its Exploitation Environment NellekeOostdijk - DaanBroeder + DaanBroeder W03-2413 oostdijk-broeder-2003-spoken @@ -3551,18 +3551,18 @@ Stretching <fixed-case>TEI</fixed-case>: Converting the <fixed-case>G</fixed-case>enia Corpus - TomazErjavec + TomazErjavec Jin-DongKim TomokoOhta - YukaTateisi - Jun-ichiTsujii + YukaTateisi + Jun-ichiTsujii W03-2416 erjavec-etal-2003-stretching The <fixed-case>M</fixed-case>eta<fixed-case>G</fixed-case>rammar: a cross-framework and cross-language test-suite generation tool AlexandraKinyon - OwenRambow + OwenRambow W03-2417 kinyon-rambow-2003-metagrammar @@ -3584,7 +3584,7 @@ Exploiting Long Distance Collocational Relations in Predictive Typing JohannesMatiasek - MarcoBaroni + MarcoBaroni W03-2501 matiasek-baroni-2003-exploiting @@ -3597,7 +3597,7 @@
Language-Models for Questions - EdSchofield + EdSchofield W03-2503 schofield-2003-language @@ -3605,7 +3605,7 @@ Automatic Acquisition of Word Interaction Patterns from Corpora VeskaNoncheva Joaqium Ferreirada Silva - GabrielLopes + GabrielLopes W03-2504 noncheva-etal-2003-automatic
@@ -3627,7 +3627,7 @@ Word N-Grams for Cluster Keyboards NilsKlarlund - MichaelRiley + MichaelRiley W03-2507 klarlund-riley-2003-word @@ -3644,7 +3644,7 @@ Domain-Specific Disambiguation for Typing with Ambiguous Keyboards KarinHarbusch - SasaHasan + SasaHasan HajoHoffmann MichaelKühn BernhardSchüler @@ -3670,7 +3670,7 @@ Intermediate Parsing for Anaphora Resolution? Implementing the Lappin and Leass non-coreference filters JuditaPreiss - TedBriscoe + TedBriscoe W03-2601 preiss-briscoe-2003-intermediate @@ -3691,13 +3691,13 @@ RyuIida KentaroInui HiroyaTakamura - YujiMatsumoto + YujiMatsumoto W03-2604 iida-etal-2003-incorporating Associative Descriptions and Salience: A Preliminary Investigation - MassimoPoesio + MassimoPoesio W03-2605 poesio-2003-associative @@ -3705,13 +3705,13 @@ Using the Web for Nominal Anaphora Resolution KatjaMarkert MalvinaNissim - NataliaModjeska + NataliaModjeska W03-2606 markert-etal-2003-using
Associative Anaphora Resolution: A Web-Based Approach - RazvanBunescu + RazvanBunescu W03-2607 bunescu-2003-associative @@ -3719,9 +3719,9 @@ Anaphoric arguments of discourse connectives: Semantic properties of antecedents versus non-antecedents EleniMiltsakaki CassandreCreswell - KatherineForbes - AravindJoshi - BonnieWebber + KatherineForbes + AravindJoshi + BonnieWebber W03-2608 miltsakaki-etal-2003-anaphoric
@@ -3751,17 +3751,17 @@ <fixed-case>I</fixed-case>ntroduction: Dialogue Systems: Interaction, Adaptation and Styles of Management - KristiinaJokinen - BjörnGämback - WilliamBlack - RobertaCatizone - YorickWilks + KristiinaJokinen + BjörnGämback + WilliamBlack + RobertaCatizone + YorickWilks W03-2701 jokinen-etal-2003-introduction Why a Static Interpretation Is Not Sufficient in Spatial Communication - John A.Bateman + John A.Bateman KerstinFischer ThoraTenbrink W03-2702 @@ -3769,7 +3769,7 @@ Learning to Classify Utterances in a Task-Oriented Dialogue - WilliamBlack + WilliamBlack PaulThompson AdamFunk AndrewConroy @@ -3778,7 +3778,7 @@ Flexibility and Efficiency through Personalisation? Experiments with a conversational Program Guide Information System - PéterBoda + PéterBoda SureshChande ElviiraHartikainen NidhiGupta @@ -3788,9 +3788,9 @@ Multimodal Dialogue Management in the <fixed-case>COMIC</fixed-case> Project - RobertaCatizone - AndreaSetzer - YorickWilks + RobertaCatizone + AndreaSetzer + YorickWilks W03-2705 catizone-etal-2003-multimodal @@ -3810,7 +3810,7 @@ Distributed Dialogue Management in a Blackboard Architecture AnttiKerminen - KristiinaJokinen + KristiinaJokinen W03-2708 kerminen-jokinen-2003-distributed @@ -3824,8 +3824,8 @@ Machine Learning for Shallow Interpretation of User Utterances in Spoken Dialogue Systems PiroskaLendvai - Antalvan den Bosch - EmielKrahmer + Antalvan den Bosch + EmielKrahmer W03-2710 lendvai-etal-2003-machine @@ -3886,17 +3886,17 @@ Reuse and Challenges in Evaluating Language Generation Systems: Position Paper - KalinaBontcheva + KalinaBontcheva 3-10 W03-2801 bontcheva-2003-reuse The <fixed-case>PEACE</fixed-case> <fixed-case>SLDS</fixed-case> understanding evaluation paradigm of the <fixed-case>F</fixed-case>rench <fixed-case>MEDIA</fixed-case> campaign - LaurenceDevillers - HélèneMaynard - PatrickParoubek - SophieRosset + LaurenceDevillers + HélèneMaynard + PatrickParoubek + SophieRosset 11-18 W03-2802 devillers-etal-2003-peace @@ -3913,7 +3913,7 @@ A Quantitative Method for Machine Translation Evaluation JesúsTomás Josep ÀngelMas - FranciscoCasacuberta + FranciscoCasacuberta 27-34 W03-2804 tomas-etal-2003-quantitative @@ -3928,7 +3928,7 @@ Intrinsic versus Extrinsic Evaluations of Parsing Systems - DiegoMollá + DiegoMollá BenHutchinson 43-50 W03-2806 @@ -3957,8 +3957,8 @@ Setting up an Evaluation Infrastructure for Human Language Technologies in <fixed-case>E</fixed-case>urope - KevinMcTait - KhalidChoukri + KevinMcTait + KhalidChoukri 7377 W03-2810 mctait-choukri-2003-setting @@ -3988,10 +3988,10 @@ A Large-scale Inheritance-based Morphological Lexicon for <fixed-case>R</fixed-case>ussian - RogerEvans + RogerEvans CaroleTiberius DunstanBrown - Greville C.Corbett + Greville C.Corbett W03-2902 9–16 evans-etal-2003-large @@ -3999,18 +3999,18 @@ Automatic Lexical Acquisition from Raw Corpora: An Application to <fixed-case>R</fixed-case>ussian AntoniOliver - IreneCastellón - LluísMàrquez + IreneCastellón + LluísMàrquez W03-2903 17–24 oliver-etal-2003-automatic The <fixed-case>MULTEXT</fixed-case>-East Morphosyntactic Specification for <fixed-case>S</fixed-case>lavic Languages - TomažErjavec + TomažErjavec CvetanaKrstev - VladimírPetkevič - KirilSimov + VladimírPetkevič + KirilSimov MarkoTadić DuškoVitas W03-2904 @@ -4064,8 +4064,8 @@ Morpho-syntactic Clues for Terminological Processing in <fixed-case>S</fixed-case>erbian - GoranNenadić - IrenaSpasić + GoranNenadić + IrenaSpasić SophiaAnaniadou W03-2911 79–86 @@ -4073,7 +4073,7 @@ <fixed-case>R</fixed-case>ussian Morphology: Ressources and <fixed-case>J</fixed-case>ava Software Application - SergeYablonsky + SergeYablonsky W03-2912 87–94 yablonsky-2003-russian @@ -4092,8 +4092,8 @@ Parsing <fixed-case>T</fixed-case>ree <fixed-case>A</fixed-case>djoining <fixed-case>G</fixed-case>rammars and Tree Insertion Grammars with Simultaneous Adjunctions - Miguel A.Alonso - Víctor J.Díaz + Miguel A.Alonso + Víctor J.Díaz A large part of wide coverage Tree Adjoining Grammars (TAG) is formed by trees that satisfy the restrictions imposed by Tree Insertion Grammars (TIG). This characteristic can be used to reduce the practical complexity of TAG parsing, applying the standard adjunction operation only in those cases in which the simpler cubic-time TIG adjunction cannot be applied. In this paper, we describe a parsing algorithm managing simultaneous adjunctions in TAG and TIG. 19–30 W03-3001 @@ -4102,7 +4102,7 @@ Implémentation du système <fixed-case>MASPAR</fixed-case> selon une approche multi-agent ChafikAloulou - LamiaHadrich Belguith + LamiaHadrich Belguith AhmedHadj Kacem SouhaHammami Mezghani Le traitement automatique du langage naturel est un axe de recherche qui connaît chaque jour de nouvelles théories et approches. Les systèmes d’analyse automatique qui sont fondés sur une approche séquentielle présentent plusieurs inconvénients. Afin de pallier ces limites, nous nous sommes intéressés à la réalisation d’un système d’analyse syntaxique de textes arabes basé sur l’approche multi-agent : MASPAR « Multi-Agent System for Parsing ARabic ». @@ -4158,7 +4158,7 @@ Subtree Parsing to Speed up Deep Analysis - KilianFoth + KilianFoth WolfgangMenzel Within a grammar formalism that treats syntax analysis as a global optimization problem, methods are investigated to improve parsing performance by recombining the solutions of smaller and easier subproblems. The robust nature of the formalism allows the application of this technique with little change to the original grammar. 91–102 @@ -4175,7 +4175,7 @@ Generative versus Discriminative Models for Statistical Left-Corner Parsing - JamesHenderson + JamesHenderson We propose two statistical left-corner parsers and investigate their accuracy at varying speeds. The parser based on a generative probability model achieves state-of-the-art accuracy when sufficient time is available, but when high speed is required the parser based on a discriminative probability model performs better. Neural network probability estimation is used to handle conditioning on both the unbounded parse histories and the unbounded lookahead strings. 115–126 W03-3011 @@ -4183,8 +4183,8 @@ <fixed-case>PACE</fixed-case> — Parser Comparison and Evaluation - VladimirKadlec - PavelSmrz + VladimirKadlec + PavelSmrz The paper introduces PACE — a parser comparison and evaluation system for the syntactic processing of natural languages. The analysis is based on context free grammar with contextual extensions (constraints). The system is able to manage very large and extremely ambiguous CF grammars. It is independent of the parsing algorithm used. The tool can solve the contextual constraints on the resulting CF structure, select the best parsing trees according to their probabilities, or combine them. We discuss the advantages and disadvantages of our modular design as well as how efficiently it processes the standard evaluation grammars. 211–212 W03-3012 @@ -4194,7 +4194,7 @@ <fixed-case>GLR</fixed-case> Parser with Conditional Action Model using Surface Phrasal Types for <fixed-case>K</fixed-case>orean Yong-JaeKwak So-YoungPark - Hae-ChangRim + Hae-ChangRim In this paper, we propose a new probabilistic GLR parsing method that can solve the problems of conventional methods. Our proposed Conditional Action Model uses Surface Phrasal Types (SPTs) encoding the functional word sequences of the sub-trees for describing structural characteristics of the partial parse. And, the proposed GLR model outperforms the previous methods by about 6~8%. 213–214 W03-3013 @@ -4203,7 +4203,7 @@ Parsing Domain Actions with Phrase-Level Grammars and Memory-Based Learners ChadLangley - AlonLavie + AlonLavie In this paper, we describe an approach to analysis for spoken language translation that combines phrase-level grammar-based parsing and automatic domain action classification. The job of the analyzer is to transform utterances into a shallow semantic task-oriented interlingua representation. The goal of our hybrid approach is to provide accurate real-time analyses and to improve robustness and portability to new domains and languages. 127–136 W03-3014 @@ -4236,7 +4236,7 @@ Dependency parsing using dependency graph for storing alternative structures - TomaszObrebski + TomaszObrebski In this paper an efficient algorithm for dependency parsing is described in which ambiguous dependency structure of a sentence is represented in the form of a graph. The idea of the algorithm is shortly outlined and some issues as to its time complexity are discussed. W03-3018 obrebski-2003-dependency @@ -4244,7 +4244,7 @@ Combining Rule-based and Data-driven Techniques for Grammatical Relation Extraction in Spoken Language KenjiSagae - AlonLavie + AlonLavie We investigate an aspect of the relationship between parsing and corpus-based methods in NLP that has received relatively little attention: coverage augmentation in rule-based parsers. In the specific task of determining grammatical relations (such as subjects and objects) in transcribed spoken language, we show that a combination of rule-based and corpus-based approaches, where a rule-based system is used as the teacher (or an automatic data annotator) to a corpus-based system, outperforms either system in isolation. W03-3019 sagae-lavie-2003-combining @@ -4253,7 +4253,7 @@ Partially Ordered Multiset Context-free Grammars and Free-word-order Parsing Mark-JanNederhof GiorgioSatta - StuartShieber + StuartShieber We present a new formalism, partially ordered multiset context-free grammars (poms-CFG), along with an Earley-style parsing algorithm. The formalism, which can be thought of as a generalization of context-free grammars with partially ordered right-hand sides, is of interest in its own right, and also as infrastructure for obtaining tighter complexity bounds for more expressive context-free formalisms intended to express free or multiple word-order, such as ID/LP grammars. We reduce ID/LP grammars to poms-grammars, thereby getting finer-grained bounds on the parsing complexity of ID/LP grammars. We argue that in practice, the width of attested ID/LP grammars is small, yielding effectively polynomial time complexity for ID/LP grammar parsing. 171–182 W03-3020 @@ -4261,7 +4261,7 @@ On maximizing metrics for syntactic disambiguation - KhalilSima’an + KhalilSima’an Given a probabilistic parsing model and an evaluation metric for scoring the match between parse-trees, e.g., PARSEVAL [Black et al., 1991], this paper addresses the problem of how to select the on average best scoring parse-tree for an input sentence. Common wisdom dictates that it is optimal to select the parse with the highest probability, regardless of the evaluation metric. In contrast, the Maximizing Metrics (MM) method [Goodman, 1998, Stolcke et al., 1997] proposes that an algorithm that optimizes the evaluation metric itself constitutes the optimal choice. We study the MM method within parsing. We observe that the MM does not always hold for tree-bank models, and that optimizing weak metrics is not interesting for semantic processing. Subsequently, we state an alternative proposition: the optimal algorithm must maximize the metric that scores parse-trees according to linguistically relevant features. We present new algorithms that optimize metrics that take into account increasingly more linguistic features, and exhibit experiments in support of our claim. 183–194 W03-3021 @@ -4273,13 +4273,13 @@ Yong-JaeKwak Hoo-JungChung Young-SookHwang - Hae-ChangRim + Hae-ChangRim park-etal-2003-automatic Statistical Dependency Analysis with Support Vector Machines HiroyasuYamada - YujiMatsumoto + YujiMatsumoto In this paper, we propose a method for analyzing word-word dependencies using deterministic bottom-up manner using Support Vector machines. We experimented with dependency trees converted from Penn treebank data, and achieved over 90% accuracy of word-word dependency. Though the result is little worse than the most up-to-date phrase structure based parsers, it looks satisfactorily accurate considering that our parser uses no information from phrase structures. 195–206 W03-3023 diff --git a/data/xml/W04.xml b/data/xml/W04.xml index a5ec08a978..fb2fb5b62e 100644 --- a/data/xml/W04.xml +++ b/data/xml/W04.xml @@ -41,7 +41,7 @@ Automatic Acquisition of Feature-Based Phonotactic Resources - JulieCarson-Berndsen + JulieCarson-Berndsen RobertKelly MoritzNeugebauer 27–34 @@ -50,7 +50,7 @@ Priors in <fixed-case>B</fixed-case>ayesian Learning of Phonological Rules - SharonGoldwater + SharonGoldwater MarkJohnson 35–42 W04-0105 @@ -67,9 +67,9 @@ Unsupervised Induction of Natural Language Morphology Inflection Classes ChristianMonson - AlonLavie - JaimeCarbonell - LoriLevin + AlonLavie + JaimeCarbonell + LoriLevin 52–61 W04-0107 monson-etal-2004-unsupervised @@ -78,8 +78,8 @@ A Comparison of Two Different Approaches to Morphological Analysis of <fixed-case>D</fixed-case>utch GuyDe Pauw TomLaureys - WalterDaelemans - HugoVan hamme + WalterDaelemans + HugoVan hamme 62–69 W04-0108 de-pauw-etal-2004-comparison @@ -122,7 +122,7 @@ A Framework for Feature based Description of Low level Discourse - LauraAlonso Alemany + LauraAlonso Alemany Ezequiel AndujarHinojosa Robert SolaSalvatierra 1–8 @@ -131,9 +131,9 @@ <fixed-case>COOPML</fixed-case>: Towards Annotating Cooperative Discourse - FarahBenamara - VeroniqueMoriceau - PatrickSaint-Dizier + FarahBenamara + VeroniqueMoriceau + PatrickSaint-Dizier 9–16 W04-0202 benamara-etal-2004-coopml @@ -163,8 +163,8 @@ Discourse-level Annotation for Investigating Information Structure - IvanaKruijff-Korbayova - Geert-Jan M.Kruijff + IvanaKruijff-Korbayova + Geert-Jan M.Kruijff 41–48 W04-0206 kruijff-korbayova-kruijff-2004-discourse @@ -172,7 +172,7 @@ Text Type Structure and Logical Document Structure HagenLanger - HaraldLungen + HaraldLungen Petra SaskiaBayerl 49–56 W04-0207 @@ -181,23 +181,23 @@ Temporal Discourse Models for Narrative Structure InderjeetMani - JamesPustejovsky + JamesPustejovsky 57–64 W04-0208 mani-pustejovsky-2004-temporal Exploiting Semantic Information for Manual Anaphoric Annotation in <fixed-case>C</fixed-case>ast3<fixed-case>LB</fixed-case> Corpus - BorjaNavarro - RubenIzquierdo - MaximilianoSaiz-Noeda + BorjaNavarro + RubenIzquierdo + MaximilianoSaiz-Noeda 65–71 W04-0209 navarro-etal-2004-exploiting Discourse Annotation and Semantic Annotation in the <fixed-case>GNOME</fixed-case> corpus - MassimoPoesio + MassimoPoesio 72–79 W04-0210 poesio-2004-discourse @@ -205,7 +205,7 @@ Sentential Structure and Discourse Parsing LiviaPolanyi - ChrisCuly + ChrisCuly Martinvan den Berg Gian LorenzoThione DavidAhn @@ -217,8 +217,8 @@ Annotation and Data Mining of the <fixed-case>P</fixed-case>enn <fixed-case>D</fixed-case>iscourse <fixed-case>T</fixed-case>ree<fixed-case>B</fixed-case>ank RashmiPrasad EleniMiltsakaki - AravindJoshi - BonnieWebber + AravindJoshi + BonnieWebber 88–95 W04-0212 prasad-etal-2004-annotation @@ -232,11 +232,11 @@ Discourse Annotation in the Monroe Corpus - JoelTetreault - MarySwift + JoelTetreault + MarySwift PreethumPrithviraj - MyroslavaDzikovska - JamesAllen + MyroslavaDzikovska + JamesAllen 103–109 W04-0214 tetreault-etal-2004-discourse @@ -245,7 +245,7 @@ <fixed-case>L</fixed-case>ive<fixed-case>T</fixed-case>ree: An Integrated Workbench for Discourse Processing Gian LorenzoThione Martinvan den Berg - ChrisCuly + ChrisCuly LiviaPolanyi 110–117 W04-0215 @@ -260,7 +260,7 @@ AndrewKoontz-Garboden TatianaNikitina M. CatherineO’Connor - TomWasow + TomWasow 118–125 W04-0216 zaenen-etal-2004-animacy @@ -282,7 +282,7 @@ Competence and Performance Grammar in Incremental Processing VincenzoLombardo - AlessandroMazzei + AlessandroMazzei PatrickSturt 1–8 W04-0301 @@ -290,7 +290,7 @@ Stochastically Evaluating the Validity of Partial Parse Trees in Incremental Parsing - YoshihideKato + YoshihideKato ShigekiMatsubara YasuyoshiInagaki 9–15 @@ -306,16 +306,16 @@ Incremental Parsing with Reference Interaction - Scott C.Stoness - JoelTetreault - JamesAllen + Scott C.Stoness + JoelTetreault + JamesAllen 18–25 W04-0304 stoness-etal-2004-incremental Lookahead in Deterministic Left-Corner Parsing - JamesHenderson + JamesHenderson 26–33 W04-0305 henderson-2004-lookahead @@ -330,8 +330,8 @@ A Statistical Constraint Dependency Grammar (<fixed-case>CDG</fixed-case>) Parser - WenWang - Mary P.Harper + WenWang + Mary P.Harper 42–49 W04-0307 wang-harper-2004-statistical @@ -345,7 +345,7 @@ The Information-Processing Difficulty of Incremental Parsing - JohnHale + JohnHale 58–65 W04-0309 hale-2004-information @@ -375,7 +375,7 @@ Modeling Sentence Processing in <fixed-case>ACT</fixed-case>-<fixed-case>R</fixed-case> ShravanVasishth - Richard L.Lewis + Richard L.Lewis 82–87 W04-0313 vasishth-lewis-2004-modeling @@ -408,7 +408,7 @@ AtsushiFujita KentaroFurihata KentaroInui - YujiMatsumoto + YujiMatsumoto KoichiTakeuchi 9–16 W04-0402 @@ -423,7 +423,7 @@ Translation by Machine of Complex Nominals: Getting it Right - TimothyBaldwin + TimothyBaldwin TakaakiTanaka 24–31 W04-0404 @@ -449,12 +449,12 @@ Representation and Treatment of Multiword Expressions in <fixed-case>B</fixed-case>asque - IñakiAlegria - OlatzAnsa - XabierArtola - NereaEzeiza - KoldoGojenola - RubenUrizar + IñakiAlegria + OlatzAnsa + XabierArtola + NereaEzeiza + KoldoGojenola + RubenUrizar 48–55 W04-0407 alegria-etal-2004-representation @@ -469,7 +469,7 @@ Integrating Morphology with Multi-word Expression Processing in <fixed-case>T</fixed-case>urkish KemalOflazer - ÖzlemÇetinoğlu + ÖzlemÇetinoğlu BilgeSay 64–71 W04-0409 @@ -504,9 +504,9 @@ <fixed-case>NP</fixed-case>-External Arguments: A Study of Argument Sharing in <fixed-case>E</fixed-case>nglish - AdamMeyers + AdamMeyers RuthReeves - CatherineMacleod + CatherineMacleod 96–103 W04-0413 meyers-etal-2004-np @@ -536,7 +536,7 @@ Evaluation of Restricted Domain Question-Answering Systems Anne R.Diekema OzgurYilmazel - Elizabeth D.Liddy + Elizabeth D.Liddy 2–7 W04-0502 diekema-etal-2004-evaluation @@ -552,7 +552,7 @@ A Qualitative Comparison of Scientific and Journalistic Texts from the Perspective of Extracting Definitions IgalGabbay - Richard F.E.Sutcliffe + Richard F.E.Sutcliffe 16–22 W04-0504 gabbay-sutcliffe-2004-qualitative @@ -560,27 +560,27 @@ <fixed-case>B</fixed-case>io<fixed-case>G</fixed-case>rapher: Biography Questions as a Restricted Domain Question Answering Task OrenTsur - Maartende Rijke - KhalilSima’an + Maartende Rijke + KhalilSima’an 23–30 W04-0505 tsur-etal-2004-biographer Cooperative Question Answering in Restricted Domains: the <fixed-case>WEBCOOP</fixed-case> Experiment - FarahBenamara + FarahBenamara 31–38 W04-0506 benamara-2004-cooperative A Practical <fixed-case>QA</fixed-case> System in Restricted Domains - HoojungChung + HoojungChung Young-InSong Kyoung-SooHan Do-SangYoon - Joo-YoungLee - Hae-ChangRim + Joo-YoungLee + Hae-ChangRim Soo-HongKim 39–45 W04-0507 @@ -591,7 +591,7 @@ FabioRinaldi JamesDowdall GeroldSchneider - AndreasPersidis + AndreasPersidis 46–53 W04-0508 rinaldi-etal-2004-answering @@ -620,16 +620,16 @@ Techniques for Text Planning with <fixed-case>XSLT</fixed-case> - Mary EllenFoster - MichaelWhite + Mary EllenFoster + MichaelWhite 1–8 W04-0601 foster-white-2004-techniques Towards Metadata Interoperability - PeterWittenburg - DaanBroeder + PeterWittenburg + DaanBroeder PaulBuitelaar 9–16 W04-0602 @@ -639,7 +639,7 @@ A Web Application using <fixed-case>RDF</fixed-case>/<fixed-case>RDFS</fixed-case> for Metadata Navigation XiGuo MarkChaudhary - ChristopherDozier + ChristopherDozier YogiArumainayagam VenkatesanSubramanian 17–24 @@ -649,9 +649,9 @@ The Semantics of Markup: Mapping Legacy Markup Schemas to a Common Semantics GarySimons - WilliamLewis + WilliamLewis ScottFarrar - TerenceLangendoen + TerenceLangendoen BrianFitzsimons HectorGonzalez 25–32 @@ -687,14 +687,14 @@ An Extensible Framework for Efficient Document Management using <fixed-case>RDF</fixed-case> and <fixed-case>OWL</fixed-case> EricaMeena AshwaniKumar - LaurentRomary + LaurentRomary 51–58 W04-0608 meena-etal-2004-extensible Towards Ontology-based Natural Language Processing - DominiqueEstival + DominiqueEstival ChrisNowak AndrewZschorn 59–66 @@ -718,14 +718,14 @@ Multi-Document Person Name Resolution MichaelFleischman - EduardHovy + EduardHovy 1–8 W04-0701 fleischman-hovy-2004-multi Cross Document Co-Reference Resolution Applications for People in the Legal Domain - ChristopherDozier + ChristopherDozier ThomasZielund 9–16 W04-0702 @@ -751,7 +751,7 @@ Applying Coreference to Improve Name Recognition HengJi - RalphGrishman + RalphGrishman 32–39 W04-0705 ji-grishman-2004-applying @@ -759,17 +759,17 @@ Using Word Similarity Lists for Resolving Indirect Anaphora CarolineGasperin - RenataVieira + RenataVieira 40–46 W04-0706 gasperin-vieira-2004-using Discourse-New Detectors for Definite Description Resolution: A Survey and a Preliminary Proposal - MassimoPoesio + MassimoPoesio OlgaUryupina - RenataVieira - MijailAlexandrov-Kabadjov + RenataVieira + MijailAlexandrov-Kabadjov RodrigoGoulart 47–54 W04-0707 @@ -791,7 +791,7 @@ Reference Resolution over a Restricted Domain: References to Documents - AndreiPopescu-Belis + AndreiPopescu-Belis DenisLalanne 71–78 W04-0710 @@ -799,8 +799,8 @@ <fixed-case>B</fixed-case>io<fixed-case>AR</fixed-case>: Anaphora Resolution for Relating Protein Names to Proteome Database Entries - Jung-JaeKim - Jong C.Park + Jung-JaeKim + Jong C.Park 79–86 W04-0711 kim-park-2004-bioar @@ -808,7 +808,7 @@ Ellipsis Resolution by Controlled Default Unification for Multi-modal and Speech Dialog Systems MichaelStreit - HansUlrichKrieger + HansUlrichKrieger 87–94 W04-0712 streit-krieger-2004-ellipsis @@ -823,7 +823,7 @@ Topic Identification in <fixed-case>C</fixed-case>hinese Based on Centering Model Ching-LongYeh - Yi-ChunChen + Yi-ChunChen 103–109 W04-0714 yeh-chen-2004-topic @@ -844,10 +844,10 @@ The <fixed-case>B</fixed-case>asque lexical-sample task - EnekoAgirre + EnekoAgirre ItziarAldabe - MikelLersundi - DavidMartínez + MikelLersundi + DavidMartínez EliPociello LarraitzUria 1–4 @@ -856,8 +856,8 @@ The Senseval-3 Multilingual <fixed-case>E</fixed-case>nglish-<fixed-case>H</fixed-case>indi lexical sample task - TimothyChklovski - RadaMihalcea + TimothyChklovski + RadaMihalcea TedPedersen AmrutaPurandare 5–8 @@ -880,7 +880,7 @@ The <fixed-case>I</fixed-case>talian lexical sample task at Senseval-3 - BernardoMagnini + BernardoMagnini DaniloGiampiccolo AlessandroVallin 17–20 @@ -889,21 +889,21 @@ Senseval-3: The <fixed-case>S</fixed-case>panish lexical sample task - LluisMàrquez - MarionaTaulé - AntoniaMartí - NúriaArtigas - MarGarcía + LluisMàrquez + MarionaTaulé + AntoniaMartí + NúriaArtigas + MarGarcía FrancisReal - DaniFerrés + DaniFerrés 21–24 W04-0806 marquez-etal-2004-senseval The Senseval-3 <fixed-case>E</fixed-case>nglish lexical sample task - RadaMihalcea - TimothyChklovski + RadaMihalcea + TimothyChklovski AdamKilgarriff 25–28 W04-0807 @@ -911,11 +911,11 @@ An evaluation exercise for <fixed-case>R</fixed-case>omanian Word Sense Disambiguation - RadaMihalcea - ViviNăstase - TimothyChklovski - DoinaTătar - DanTufiş + RadaMihalcea + ViviNăstase + TimothyChklovski + DoinaTătar + DanTufiş FlorentinaHristea 29–32 W04-0808 @@ -939,7 +939,7 @@ The <fixed-case>E</fixed-case>nglish all-words task BenjaminSnyder - MarthaPalmer + MarthaPalmer 41–43 W04-0811 snyder-palmer-2004-english @@ -949,14 +949,14 @@ MarisaUlivieri ElisabettaGuazzini FrancescaBertagna - NicolettaCalzolari + NicolettaCalzolari W04-0812 ulivieri-etal-2004-senseval The <fixed-case>B</fixed-case>asque Country University system: <fixed-case>E</fixed-case>nglish and <fixed-case>B</fixed-case>asque tasks - EnekoAgirre - DavidMartínez + EnekoAgirre + DavidMartínez 44–48 W04-0813 agirre-martinez-2004-basque @@ -964,9 +964,9 @@ The <fixed-case>U</fixed-case>niversity of <fixed-case>A</fixed-case>msterdam at Senseval-3: Semantic roles and Logic forms DavidAhn - SisayFissaha + SisayFissaha ValentinJijkoun - MaartenDe Rijke + MaartenDe Rijke 49–53 W04-0814 ahn-etal-2004-university @@ -974,7 +974,7 @@ Dependency based logical form transformations StephenAnthony - JonPatrick + JonPatrick 54–57 W04-0815 anthony-patrick-2004-dependency @@ -982,7 +982,7 @@ Word Sense Disambiguation based on term to term similarity in a context space JavierArtiles - AnselmoPenas + AnselmoPenas FelisaVerdejo 58–63 W04-0816 @@ -992,7 +992,7 @@ Semantic role labelling with similarity-based generalization using <fixed-case>EM</fixed-case>-based clustering UlrikeBaldewein KatrinErk - SebastianPadó + SebastianPadó DetlefPrescher 64–68 W04-0817 @@ -1000,21 +1000,21 @@ The <fixed-case>MITRE</fixed-case> logical form generation system - SamuelBayer + SamuelBayer JohnBurger JohnGreiff - BenWellner + BenWellner 69–72 W04-0818 bayer-etal-2004-mitre Semantic parsing based on <fixed-case>F</fixed-case>rame<fixed-case>N</fixed-case>et - Cosmin AdrianBejan + Cosmin AdrianBejan AlessandroMoschitti - PaulMorărescu + PaulMorărescu GabrielNicolae - SandaHarabagiu + SandaHarabagiu 73–76 W04-0819 bejan-etal-2004-semantic @@ -1051,7 +1051,7 @@ MauroCastillo FrancisReal JordiAsterias - GermanRigau + GermanRigau 93–96 W04-0823 castillo-etal-2004-talp @@ -1073,7 +1073,7 @@ <fixed-case>UBBNBC</fixed-case> <fixed-case>WSD</fixed-case> system description - AndrásCsomai + AndrásCsomai 105–107 W04-0826 csomai-2004-ubbnbc @@ -1081,9 +1081,9 @@ <fixed-case>GAMBL</fixed-case>, genetic algorithm optimization of memory-based <fixed-case>WSD</fixed-case> BartDecadt - VéroniqueHoste - WalterDaelemans - Antalvan den Bosch + VéroniqueHoste + WalterDaelemans + Antalvan den Bosch 108–112 W04-0827 decadt-etal-2004-gambl @@ -1091,25 +1091,25 @@ <fixed-case>TALP</fixed-case> system for the <fixed-case>E</fixed-case>nglish lexical sample task GerardEscudero - LluisMàrquez - GermanRigau + LluisMàrquez + GermanRigau 113–116 W04-0828 escudero-etal-2004-talp <fixed-case>WSD</fixed-case> based on mutual information and syntactic patterns - DavidFérnandez-Amorós + DavidFérnandez-Amorós 117–120 W04-0829 fernandez-amoros-2004-wsd The <fixed-case>U</fixed-case>niversity of <fixed-case>J</fixed-case>aén Word Sense Disambiguation system - ManuelGarcía-Vega - MiguelGarcía-Cumbreras - M. TeresaMartín-Valdivia - L. AlfonsoUrena-López + ManuelGarcía-Vega + MiguelGarcía-Cumbreras + M. TeresaMartín-Valdivia + L. AlfonsoUrena-López 121–124 W04-0830 garcia-vega-etal-2004-university @@ -1125,7 +1125,7 @@ Senseval automatic labeling of semantic roles using Maximum Entropy models NamheeKwon MichaelFleischman - EduardHovy + EduardHovy 129–132 W04-0832 kwon-etal-2004-senseval @@ -1141,7 +1141,7 @@ Supervised Word Sense Disambiguation with Support Vector Machines and multiple knowledge sources - Yoong KeokLee + Yoong KeokLee Hwee TouNg Tee KiahChia 137–140 @@ -1157,29 +1157,29 @@ Senseval-3: The <fixed-case>C</fixed-case>atalan lexical sample task - LluisMàrquez - MarionaTaulé - AntoniaMartí - MarGarcía + LluisMàrquez + MarionaTaulé + AntoniaMartí + MarGarcía FrancisReal - DaniFerrés + DaniFerrés 147–150 W04-0836 marquez-etal-2004-senseval-3 Using automatically acquired predominant senses for Word Sense Disambiguation - DianaMcCarthy + DianaMcCarthy RobKoeling JulieWeeds - JohnCarroll + JohnCarroll 151–154 W04-0837 mccarthy-etal-2004-using <fixed-case>S</fixed-case>ense<fixed-case>L</fixed-case>earner: Minimally supervised Word Sense Disambiguation for all words in open text - RadaMihalcea + RadaMihalcea EhsanulFaruque 155–158 W04-0838 @@ -1187,7 +1187,7 @@ Complementarity of lexical and simple syntactic features: The <fixed-case>S</fixed-case>ynta<fixed-case>L</fixed-case>ex approach to Senseval-3 - SaifMohammad + SaifMohammad TedPedersen 159–162 W04-0839 @@ -1196,7 +1196,7 @@ Senseval-3 logic forms: A system and possible improvements AltafMohammed - DanMoldovan + DanMoldovan PaulParker 163–166 W04-0840 @@ -1204,8 +1204,8 @@ <fixed-case>SVM</fixed-case> classification of <fixed-case>F</fixed-case>rame<fixed-case>N</fixed-case>et semantic roles - DanMoldovan - RoxanaGîrju + DanMoldovan + RoxanaGîrju MarianOlteanu OvidiuFortu 167–170 @@ -1223,10 +1223,10 @@ Using a Word Sense Disambiguation system for translation disambiguation: the <fixed-case>LIA</fixed-case>-<fixed-case>LIDILEM</fixed-case> team experiment - GrégoireMoreau de Montcheuil - MarcEl-Bèze + GrégoireMoreau de Montcheuil + MarcEl-Bèze BoxingChen - OlivierKraif + OlivierKraif 175–178 W04-0843 moreau-de-montcheuil-etal-2004-using @@ -1234,7 +1234,7 @@ Structural semantic interconnection: a knowledge-based approach to Word Sense Disambiguation RobertoNavigli - PaolaVelardi + PaolaVelardi 179–182 W04-0844 navigli-velardi-2004-structural @@ -1244,7 +1244,7 @@ GraceNgai DekaiWu MarineCarpuat - Chi-ShingWang + Chi-ShingWang Chi-YungWang 183–186 W04-0845 @@ -1254,8 +1254,8 @@ Context clustering for Word Sense Disambiguation based on modeling pairwise context similarities ChengNiu WeiLi - Rohini K.Srihari - HuifengLi + Rohini K.Srihari + HuifengLi LaurieCrist 187–190 W04-0846 @@ -1263,9 +1263,9 @@ Optimizing feature set for <fixed-case>C</fixed-case>hinese Word Sense Disambiguation - Zheng-YuNiu - Dong-HongJi - Chew-LimTan + Zheng-YuNiu + Dong-HongJi + Chew-LimTan 191–194 W04-0847 niu-etal-2004-optimizing @@ -1273,9 +1273,9 @@ <fixed-case>LCC</fixed-case>’s <fixed-case>WSD</fixed-case> systems for Senseval-3 AdrianNovischi - DanMoldovan + DanMoldovan PaulParker - AdrianaBădulescu + AdrianaBădulescu BobHauser 195–198 W04-0848 @@ -1284,9 +1284,9 @@ Class-based collocations for Word Sense Disambiguation TomO’Hara - RebeccaBruce + RebeccaBruce JeffDonner - JanyceWiebe + JanyceWiebe 199–202 W04-0849 ohara-etal-2004-class @@ -1316,7 +1316,7 @@ A gloss-centered algorithm for disambiguation GaneshRamakrishnan B.Prithviraj - PushpakBhattacharyya + PushpakBhattacharyya 217–221 W04-0853 ramakrishnan-etal-2004-gloss @@ -1324,7 +1324,7 @@ <fixed-case>KUNLP</fixed-case> system in Senseval-3 Hee-CheolSeo - Hae-ChangRim + Hae-ChangRim Soo-HongKim 222–225 W04-0854 @@ -1332,8 +1332,8 @@ <fixed-case>UBB</fixed-case> system at Senseval-3 - GabrielaŞerban - DoinaTătar + GabrielaŞerban + DoinaTătar 226–228 W04-0855 serban-tatar-2004-ubb @@ -1341,8 +1341,8 @@ Pattern abstraction and term similarity for Word Sense Disambiguation: <fixed-case>IRST</fixed-case> at Senseval-3 CarloStrapparava - AlfioGliozzo - ClaudioGiuliano + AlfioGliozzo + ClaudioGiuliano 229–234 W04-0856 strapparava-etal-2004-pattern @@ -1358,48 +1358,48 @@ Word Sense Disambiguation by Web mining for word co-occurrence probabilities - PeterTurney + PeterTurney 239–242 W04-0858 turney-2004-word The <fixed-case>U</fixed-case>niversity of <fixed-case>A</fixed-case>licante systems at Senseval-3 - SoniaVázquez + SoniaVázquez RafaelRomero - ArmandoSuárez - AndrésMontoyo + ArmandoSuárez + AndrésMontoyo IuliaNica - AntoniaMartí + AntoniaMartí 243–247 W04-0859 vazquez-etal-2004-university The <fixed-case>R</fixed-case>2<fixed-case>D</fixed-case>2 team at Senseval-3 - SoniaVázquez + SoniaVázquez RafaelRomero - ArmandoSuárez - AndrésMontoyo - ManuelGarcía - M. TeresaMartín - M. ÁngelGarcía - L. AlfonsoUrena + ArmandoSuárez + AndrésMontoyo + ManuelGarcía + M. TeresaMartín + M. ÁngelGarcía + L. AlfonsoUrena 248–252 W04-0860 vazquez-etal-2004-r2d2 The “Meaning” system on the <fixed-case>E</fixed-case>nglish all-words task - LuísVillarejo - LluisMàrquez - EnekoAgirre - DavidMartínez - BernardoMagnini + LuísVillarejo + LluisMàrquez + EnekoAgirre + DavidMartínez + BernardoMagnini CarloStrapparava - DianaMcCarthy - AndrésMontoyo - ArmandoSuárez + DianaMcCarthy + AndrésMontoyo + ArmandoSuárez 253–256 W04-0861 villarejo-etal-2004-meaning @@ -1457,8 +1457,8 @@ Solving logic puzzles: From robust processing to precise semantics IddoLev BillMacCartney - ChristopherManning - RogerLevy + ChristopherManning + RogerLevy 9–16 W04-0902 lev-etal-2004-solving @@ -1474,7 +1474,7 @@ <fixed-case>O</fixed-case>nto<fixed-case>S</fixed-case>em and <fixed-case>SIMPLE</fixed-case>: Two multi-lingual world views MarjorieMcShane MargalitZabludowski - SergeiNirenburg + SergeiNirenburg StephenBeale 25–32 W04-0904 @@ -1482,7 +1482,7 @@ Evaluating the performance of the <fixed-case>O</fixed-case>nto<fixed-case>S</fixed-case>em semantic analyzer - SergeiNirenburg + SergeiNirenburg StephenBeale MarjorieMcShane 33–40 @@ -1492,9 +1492,9 @@ Question answering using ontological semantics StephenBeale - BenoitLavoie + BenoitLavoie MarjorieMcShane - SergeiNirenburg + SergeiNirenburg TanyaKorelsky 41–48 W04-0906 @@ -1502,7 +1502,7 @@ Making sense of <fixed-case>J</fixed-case>apanese relative clause constructions - TimothyBaldwin + TimothyBaldwin 49–56 W04-0907 baldwin-2004-making @@ -1520,7 +1520,7 @@ Inducing a semantic frame lexicon from <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et data RebeccaGreen - BonnieDorr + BonnieDorr 65–72 W04-0909 green-dorr-2004-inducing @@ -1537,7 +1537,7 @@ Lexical-semantic interpretation of language input in mathematical dialogs MagdalenaWolska - IvanaKruijff-Korbayová + IvanaKruijff-Korbayová HelmutHoracek 81–88 W04-0911 @@ -1552,7 +1552,7 @@ Text Understanding with <fixed-case>GETARUNS</fixed-case> for <fixed-case>Q</fixed-case>/A and Summarization - RodolfoDelmonte + RodolfoDelmonte 97–104 W04-0913 delmonte-2004-text @@ -1560,7 +1560,7 @@ Semantic forensics: An application of ontological semantics to information assurance VictorRaskin - Christian F.Hempelmann + Christian F.Hempelmann Katrina E.Triezenberg 105–112 W04-0914 @@ -1596,10 +1596,10 @@ Extending Document Summarization to Information Graphics - SandraCarberry + SandraCarberry StephanieElzer - NancyGreen - KathleenMcCoy + NancyGreen + KathleenMcCoy DanielChester 3–9 W04-1002 @@ -1624,7 +1624,7 @@ Vocabulary Usage in Newswire Summaries TerryCopeck - StanSzpakowicz + StanSzpakowicz 19–26 W04-1005 copeck-szpakowicz-2004-vocabulary @@ -1647,8 +1647,8 @@ Task-Focused Summarization of Email - SimonCorston-Oliver - EricRingger + SimonCorston-Oliver + EricRingger MichaelGamon RichardCampbell 43–50 @@ -1660,7 +1660,7 @@ Gian LorenzoThione Martinvan den Berg LiviaPolanyi - ChrisCuly + ChrisCuly 51–55 W04-1009 thione-etal-2004-hybrid @@ -1668,14 +1668,14 @@ Template-Filtered Headline Summarization LiangZhou - EduardHovy + EduardHovy 56–60 W04-1010 zhou-hovy-2004-template Handling Figures in Document Summarization - Robert P.Futrelle + Robert P.Futrelle 61–65 W04-1011 futrelle-2004-handling @@ -1691,7 +1691,7 @@ <fixed-case>ROUGE</fixed-case>: A Package for Automatic Evaluation of Summaries - Chin-YewLin + Chin-YewLin 74–81 W04-1013 lin-2004-rouge @@ -1715,7 +1715,7 @@ Generic Sentence Fusion is an Ill-Defined Summarization Task - HalDaume III + HalDaume III DanielMarcu 96–103 W04-1016 @@ -1733,7 +1733,7 @@ <fixed-case>C</fixed-case>hinese Text Summarization Based on Thematic Area Detection PoHu TingtingHe - DonghongJi + DonghongJi 112–119 W04-1018 hu-etal-2004-chinese @@ -1772,8 +1772,8 @@ Document Re-ranking based on Global and Local Terms - LingpengYang - DongHongJi + LingpengYang + DongHongJi LiTang 17–23 W04-1103 @@ -1781,7 +1781,7 @@ Adaptive Compression-based Approach for <fixed-case>C</fixed-case>hinese <fixed-case>P</fixed-case>inyin Input - JinHuHuang + JinHuHuang DavidPowers 24–27 W04-1104 @@ -1799,7 +1799,7 @@ Character-Sense Association and Compounding Template Similarity: Automatic Semantic Classification of <fixed-case>C</fixed-case>hinese Compounds - Chao-JanChen + Chao-JanChen 33–40 W04-1106 chen-2004-character @@ -1807,7 +1807,7 @@ <fixed-case>C</fixed-case>hinese Chunking with Another Type of Spec HongqiaoLi - ChangningHuang + ChangningHuang JianfengGao XiaozhongFan 41–48 @@ -1825,9 +1825,9 @@ <fixed-case>C</fixed-case>hinese Word Segmentation by Classification of Characters - Chooi-LingGoh + Chooi-LingGoh MasayukiAsahara - YujiMatsumoto + YujiMatsumoto 57–64 W04-1109 goh-etal-2004-chinese @@ -1837,7 +1837,7 @@ Jui-FengYeh Chung-HsienWu Ming-JunChen - Liang-chihYu + Liang-chihYu 65–71 W04-1110 yeh-etal-2004-automated-alignment @@ -1846,7 +1846,7 @@ A Statistical Model for Hangeul-Hanja Conversion in Terminology Domain Jin-XiaHuang Sun-MeeBae - Key-sunChoi + Key-sunChoi 72–78 W04-1111 huang-etal-2004-statistical @@ -1863,7 +1863,7 @@ Using Synonym Relations in <fixed-case>C</fixed-case>hinese Collocation Extraction WanyinLi - QinLu + QinLu RuifengXu 86–93 W04-1113 @@ -1872,7 +1872,7 @@ The Construction of A <fixed-case>C</fixed-case>hinese Shallow Treebank RuifengXu - QinLu + QinLu YinLi WanyinLi 94–101 @@ -1881,7 +1881,7 @@ Combining Prosodic and Text Features for Segmentation of <fixed-case>M</fixed-case>andarin Broadcast News - Gina-AnneLevow + Gina-AnneLevow 102–108 W04-1115 levow-2004-combining @@ -1889,7 +1889,7 @@ Automatic Semantic Role Assignment for a Tree Structure Jia-MingYou - Keh-JiannChen + Keh-JiannChen 109–115 W04-1116 you-chen-2004-automatic @@ -1897,8 +1897,8 @@ A Large-Scale Semantic Structure for <fixed-case>C</fixed-case>hinese Sentences LiTang - DonghongJi - LingpengYang + DonghongJi + LingpengYang 116–121 W04-1117 tang-etal-2004-large @@ -1907,7 +1907,7 @@ Do We Need <fixed-case>C</fixed-case>hinese Word Segmentation for Statistical Machine Translation? JiaXu RichardZens - HermannNey + HermannNey 122–128 W04-1118 xu-etal-2004-need @@ -1932,7 +1932,7 @@ Aligning Bilingual Corpora Using Sentences Location Information - WeigangLi + WeigangLi TingLiu ZhenWang ShengLi @@ -1954,7 +1954,7 @@ Proceedings of the International Joint Workshop on Natural Language Processing in Biomedicine and its Applications (NLPBA/BioNLP) NigelCollier PatrickRuch - AdelineNazarenko + AdelineNazarenko COLING
Geneva, Switzerland
August 28th and 29th @@ -1967,7 +1967,7 @@ Recognizing Names in Biomedical Texts using Hidden <fixed-case>M</fixed-case>arkov Model and <fixed-case>SVM</fixed-case> plus Sigmoid - GuoDongZhou + GuoDongZhou 1–7 W04-1201 zhou-2004-recognizing @@ -2026,18 +2026,18 @@ Event-Based Information Extraction for the Biomedical Domain: the Caderige Project - ErickAlphonse + ErickAlphonse SophieAubin PhilippeBessières GillesBisson - ThierryHamon + ThierryHamon SandrineLagarrigue AdelineNazarenko Alain-PierreManine - ClaireNédellec + ClaireNédellec Mohamed Ould AbdelVetah ThierryPoibeau - DavyWeissenbacher + DavyWeissenbacher 43–49 W04-1207 alphonse-etal-2004-event @@ -2045,7 +2045,7 @@ Distributed Modules for Text Annotation and <fixed-case>IE</fixed-case> Applied to the Biomedical Domain HaraldKirsch - DietrichRebholz-Schuhmann + DietrichRebholz-Schuhmann 50–56 W04-1208 kirsch-rebholz-schuhmann-2004-distributed @@ -2053,7 +2053,7 @@ Support Vector Machine Approach to Extracting Gene References into Function from Biological Documents ChihLee - Wen-JuanHou + Wen-JuanHou Hsin-HsiChen 57–60 W04-1209 @@ -2070,9 +2070,9 @@ Creating a Test Corpus of Clinical Notes Manually Tagged for Part-of-Speech Information - SergueiPakhomov + SergueiPakhomov AnniCoden - ChristopherChute + ChristopherChute 65–68 W04-1211 pakhomov-etal-2004-creating @@ -2080,7 +2080,7 @@ Classification from Full Text: A Comparison of Canonical Sections of Scientific Papers GailSinclair - BonnieWebber + BonnieWebber 69–72 W04-1212 sinclair-webber-2004-classification @@ -2090,7 +2090,7 @@ NigelCollier TomokoOhta YoshimasaTsuruoka - YukaTateisi + YukaTateisi Jin-DongKim 73–78 W04-1213 @@ -2102,7 +2102,7 @@ Seon-HoKim Ki-JoongLee Do-GilLee - Hae-ChangRim + Hae-ChangRim 79–82 W04-1214 park-etal-2004-incorporating @@ -2110,7 +2110,7 @@ Annotating Multiple Types of Biomedical Entities: A Single Word Classification Approach ChihLee - Wen-JuanHou + Wen-JuanHou Hsin-HsiChen 83–86 W04-1215 @@ -2125,11 +2125,11 @@ Exploiting Context for Biomedical Entity Recognition: From Syntax to the Web - JennyFinkel + JennyFinkel ShipraDingare HuyNguyen MalvinaNissim - ChristopherManning + ChristopherManning GailSinclair 91–94 W04-1217 @@ -2144,7 +2144,7 @@ Exploring Deep Knowledge Resources in Biomedical Name Recognition - ZhouGuoDong + GuoDongZhou SuJian 99–102 W04-1219 @@ -2154,7 +2154,7 @@ <fixed-case>POSBIOTM</fixed-case>-<fixed-case>NER</fixed-case> in the Shared Task of <fixed-case>B</fixed-case>io<fixed-case>NLP</fixed-case>/<fixed-case>NLPBA</fixed-case>2004 YuSong EunjuKim - Gary GeunbaeLee + Gary GeunbaeLee Byoung-keeYi 103–106 W04-1220 @@ -2191,7 +2191,7 @@ On Statistical Parameter Setting - DamirĆavar + DamirĆavar JoshuaHerring ToshikazuIkuta PaulRodrigues @@ -2232,7 +2232,7 @@ Statistics Learning and Universal Grammar: Modeling Word Segmentation TimothyGambell - CharlesYang + CharlesYang 51–54 W04-1307 gambell-yang-2004-statistics @@ -2249,8 +2249,8 @@ A Computational Model of Emergent Simple Syntax: Supporting the Natural Transition from the One-Word Stage to the Two-Word Stage KrisJack - ChrisReed - AnnaluWaller + ChrisReed + AnnaluWaller 63–70 W04-1309 jack-etal-2004-computational @@ -2265,7 +2265,7 @@ Some Tests of an Unsupervised Model of Language Acquisition - BoPedersen + BoPedersen ShimonEdelman ZachSolan DavidHorn @@ -2311,20 +2311,20 @@ <fixed-case>CESTA</fixed-case>: Machine Translation Evaluation Campaign [Work-in-Progress Project Report] - WidadMustafa El Hadi - MarianneDabbadie - IsmaïlTimimi - MartinRajman - PhilippeLanglais + WidadMustafa El Hadi + MarianneDabbadie + IsmaïlTimimi + MartinRajman + PhilippeLanglais AntonyHartley - AndreiPopescu Belis + AndreiPopescu Belis 8–17 W04-1402 mustafa-el-hadi-etal-2004-cesta Language Resources and Localisation - ReinhardSchäler + ReinhardSchäler 18–25 W04-1403 schaler-2004-language @@ -2401,7 +2401,7 @@ A Simple String-Rewriting Formalism for Dependency Grammar AlexisNasr - OwenRambow + OwenRambow 17–24 W04-1503 nasr-rambow-2004-simple @@ -2443,8 +2443,8 @@ Some Notes on Generative Capacity of Dependency Grammar - TomaszObrebski - FilipGralinski + TomaszObrebski + FilipGralinski 57–63 W04-1508 obrebski-gralinski-2004-notes @@ -2460,16 +2460,16 @@ Extensible Dependency Grammar: A New Methodology RalphDebusmann DenysDuchier - Geert-Jan M.Kruijff + Geert-Jan M.Kruijff 70–76 W04-1510 debusmann-etal-2004-extensible From a Surface Analysis to a Dependency Structure - LuisaCoheur - NunoMamede - Gabriel G.Bes + LuisaCoheur + NunoMamede + Gabriel G.Bes 77–81 W04-1511 coheur-etal-2004-surface @@ -2484,7 +2484,7 @@ Synchronous Dependency Insertion Grammars: A Grammar Formalism for Syntax Based Statistical <fixed-case>MT</fixed-case> YuanDing - MarthaPalmer + MarthaPalmer 90–97 W04-1513 ding-palmer-2004-synchronous @@ -2512,7 +2512,7 @@ Developing an <fixed-case>A</fixed-case>rabic Treebank: Methods, Guidelines, Procedures, and Tools - MohamedMaamouri + MohamedMaamouri AnnBies 2–9 W04-1602 @@ -2527,7 +2527,7 @@ The Architecture of a <fixed-case>S</fixed-case>tandard <fixed-case>A</fixed-case>rabic Lexical Database. Some Figures, Ratios and Categories from the <fixed-case>DIINAR</fixed-case>.1 Source Program - RamziAbbès + RamziAbbès JosephDichy MohamedHassoun 15–22 @@ -2565,7 +2565,7 @@ An Unsupervised Approach for Bootstrapping <fixed-case>A</fixed-case>rabic Sense Tagging - Mona T.Diab + Mona T.Diab 43–50 W04-1609 diab-2004-unsupervised @@ -2582,8 +2582,8 @@ A Transcription Scheme for Languages Employing the <fixed-case>A</fixed-case>rabic Script Motivated by Speech Processing Applications ShadiGanjavi - Panayiotis G.Georgiou - ShrikanthNarayanan + Panayiotis G.Georgiou + ShrikanthNarayanan 59–65 W04-1611 ganjavi-etal-2004-transcription @@ -2628,7 +2628,7 @@ Language Weaver <fixed-case>A</fixed-case>rabic-><fixed-case>E</fixed-case>nglish <fixed-case>MT</fixed-case> DanielMarcu - AlexFraser + AlexFraser WilliamWong KevinKnight 89–89 @@ -2651,14 +2651,14 @@ Integrating Natural Language Processing into <fixed-case>E</fixed-case>-Learning - A Case of <fixed-case>C</fixed-case>zech - PavelSmrž + PavelSmrž 1–10 W04-1701 smrz-2004-integrating <fixed-case>NLP</fixed-case> serving the cause of language learning - FrédériqueSegond + FrédériqueSegond ThibaultParmentier 11–17 W04-1702 @@ -2686,7 +2686,7 @@ Indexing Student Essays Paragraphs Using <fixed-case>LSA</fixed-case> Over an Integrated Ontological Space - Gaston G.Burek + Gaston G.Burek MariaVargas-Vera EmanuelaMoreale 32–37 @@ -2697,7 +2697,7 @@ <fixed-case>E</fixed-case>-Assessment using Latent Semantic Analysis in the Computer Science Domain: A Pilot Study PeteThomas DebraHaley - AnnedeRoeck + AnnedeRoeck MarianPetre 38–44 W04-1706 @@ -2711,7 +2711,7 @@ MilenaYankova SvetlaBoytcheva IrenaVitanova - PreslavNakov + PreslavNakov 45–52 W04-1707 angelova-etal-2004-towards @@ -2720,7 +2720,7 @@ Automatic Measuring of <fixed-case>E</fixed-case>nglish Language Proficiency using <fixed-case>MT</fixed-case> Evaluation Technology KeijiYasuda FumiakiSugaya - EiichiroSumita + EiichiroSumita ToshiyukiTakezawa GenichiroKikui SeiichiYamamoto @@ -2748,8 +2748,8 @@ KatrinaKeogh ThomasKoller MonicaWard - ElaineUí Dhonnchadha - Josefvan Genabith + ElaineUí Dhonnchadha + Josefvan Genabith 79–85 W04-1711 keogh-etal-2004-cl @@ -2789,14 +2789,14 @@ A Lexico-semantic Approach to the Structuring of Terminology - Marie-ClaudeL’Homme + Marie-ClaudeL’Homme 7–14 W04-1801 lhomme-2004-lexico Metalinguistic Information Extraction for Terminology - CarlosRodríguez Penagos + CarlosRodríguez Penagos 15–22 W04-1802 rodriguez-penagos-2004-metalinguistic @@ -2821,7 +2821,7 @@ Discovering Specific Semantic Relationships between Nouns and Verbs in a Specialized <fixed-case>F</fixed-case>rench Corpus VincentClaveau - Marie-ClaudeL’Homme + Marie-ClaudeL’Homme 39–46 W04-1805 claveau-lhomme-2004-discovering @@ -2829,8 +2829,8 @@ Automatically Inducing Ontologies from Corpora InderjeetMani - KenSamuel - KrisConcepcion + KenSamuel + KrisConcepcion DavidVogel 47–54 W04-1806 @@ -2839,7 +2839,7 @@ Detecting Semantic Relations between Terms in Definitions VéroniqueMalaisé - PierreZweigenbaum + PierreZweigenbaum BrunoBachimont 55–62 W04-1807 @@ -2847,7 +2847,7 @@ Discovering Synonyms and Other Related Words - KristerLindén + KristerLindén JussiPiitulainen 63–70 W04-1808 @@ -2889,7 +2889,7 @@ Determining the Specificity of Terms based on Information Theoretic Measures Pum-MoRyu - Key-SunChoi + Key-SunChoi 87–90 W04-1813 ryu-choi-2004-determining @@ -2898,8 +2898,8 @@ Construction of Grammar Based Term Extraction Model for <fixed-case>J</fixed-case>apanese KoichiTakeuchi KyoKageura - BéatriceDaille - LaurentRomary + BéatriceDaille + LaurentRomary 91–94 W04-1814 takeuchi-etal-2004-construction @@ -2908,7 +2908,7 @@ Proceedings of the 5th International Workshop on Linguistically Interpreted Corpora - SilviaHansen-Schirra + SilviaHansen-Schirra StephanOepen HansUszkoreit COLING @@ -2940,7 +2940,7 @@ Inflectional Syncretism and Corpora DunstanBrown CaroleTiberius - Greville G.Corbett + Greville G.Corbett 11–18 W04-1902 brown-etal-2004-inflectional @@ -2949,7 +2949,7 @@ The <fixed-case>S</fixed-case>zeged Corpus. A <fixed-case>POS</fixed-case> Tagged and Syntactically Annotated <fixed-case>H</fixed-case>ungarian Natural Language Corpus DóraCsendes JánosCsirik - TiborGyimóthy + TiborGyimóthy 19–22 W04-1903 csendes-etal-2004-szeged @@ -2966,7 +2966,7 @@ Towards a Dependency-Based Gold Standard for <fixed-case>G</fixed-case>erman Parsers. The <fixed-case>TIGER</fixed-case> Dependency Bank MartinForst - NúriaBertomeu + NúriaBertomeu BertholdCrysmann FrederikFouvry SilviaHansen-Schirra @@ -2978,7 +2978,7 @@ Corpus-based Induction of an <fixed-case>LFG</fixed-case> Syntax-Semantics Interface for Frame Semantic Processing AnetteFrank - JiríSemecky + JiríSemecky 39–46 W04-1906 frank-semecky-2004-corpus @@ -2994,7 +2994,7 @@ Automated Induction of Sense in Context - JamesPustejovsky + JamesPustejovsky PatrickHanks AnnaRumshisky 55–58 @@ -3004,7 +3004,7 @@ Mining Linguistically Interpreted Texts Cassiana Fagundesda Silva - RenataVieira + RenataVieira Fernando SantosOsório PauloQuaresma 59–62 @@ -3044,7 +3044,7 @@ Introduction to <fixed-case>ROMAND</fixed-case> 2004 VincenzoPallotta - AmaliaTodirascu + AmaliaTodirascu 7–10 W04-2001 pallotta-todirascu-2004-introduction @@ -3074,23 +3074,23 @@ Evaluating <fixed-case>GETARUNS</fixed-case> parser with <fixed-case>GREVAL</fixed-case> test suite - RodolfoDelmonte + RodolfoDelmonte 32–41 W04-2005 delmonte-2004-evaluating A step towards incremental generation of logical forms - LuísaCoheur - NunoMamede - GabrielBès + LuísaCoheur + NunoMamede + GabrielBès 42–50 W04-2006 coheur-etal-2004-step Using an incremental robust parser to automatically generate semantic <fixed-case>UNL</fixed-case> graphs - NuriaGala + NuriaGala 51–58 W04-2007 gala-2004-using @@ -3098,7 +3098,7 @@ An algorithm for open text semantic parsing LeiShi - RadaMihalcea + RadaMihalcea 59–67 W04-2008 shi-mihalcea-2004-algorithm @@ -3112,8 +3112,8 @@ Robust ending guessing rules with application to slavonic languages - PreslavNakov - ElenaPaskaleva + PreslavNakov + ElenaPaskaleva 76–85 W04-2010 nakov-paskaleva-2004-robust @@ -3132,7 +3132,7 @@ Answer validation by keyword association MasatsuguTonoike TakehitoUtsuro - SatoshiSato + SatoshiSato 95–103 W04-2012 tonoike-etal-2004-answer @@ -3182,7 +3182,7 @@ Standards going concrete : from <fixed-case>LMF</fixed-case> to Morphalou - LaurentRomary + LaurentRomary SusanneSalmon-Alt GilFrancopoulo 22–28 @@ -3240,8 +3240,8 @@ A Very Large Dictionary with Paradigmatic, Syntagmatic, and Paronymic - IgorBolshakov - AlexanderGelbukh + IgorBolshakov + AlexanderGelbukh 53–56 W04-2110 bolshakov-gelbukh-2004-large @@ -3255,7 +3255,7 @@ <fixed-case>R</fixed-case>{j}ecnik.com : <fixed-case>E</fixed-case>nglish - <fixed-case>S</fixed-case>erbo-<fixed-case>C</fixed-case>roatian Electronic Dictionary - VladoKešelj + VladoKešelj TanjaKešelj LarisaZlatić 61–64 @@ -3289,7 +3289,7 @@ Empirical Acquisition of Differentiating Relations from Definitions TomO’Hara - JanyceWiebe + JanyceWiebe 77–80 W04-2116 ohara-wiebe-2004-empirical @@ -3303,7 +3303,7 @@ Identification, Quantitative Description, and Preliminary Distributional Analysis of <fixed-case>G</fixed-case>erman Particle Verbs - SabineSchulte im Walde + SabineSchulte im Walde 85–88 W04-2118 schulte-im-walde-2004-identification @@ -3331,8 +3331,8 @@ Multilinguality in <fixed-case>ETAP</fixed-case>-3: Reuse of Lexical Resources - IgorBoguslavsky - LeonidIomdin + IgorBoguslavsky + LeonidIomdin VictorSizov 1–8 W04-2201 @@ -3356,7 +3356,7 @@ Automatic Construction of a Transfer Dictionary Considering Directionality KyongheePaik - SatoshiShirai + SatoshiShirai HiromiNakaiwa 25–32 W04-2204 @@ -3405,7 +3405,7 @@ A Generic Collaborative Platform for Multilingual Lexical Database Development - GillesSérasset + GillesSérasset 73–79 W04-2210 serasset-2004-generic @@ -3413,8 +3413,8 @@ Semi-Automatic Construction of <fixed-case>K</fixed-case>orean-<fixed-case>C</fixed-case>hinese Verb Patterns Based on Translation Equivalency MunpyoHong - Young-KilKim - Sang-KyuPark + Young-KilKim + Sang-KyuPark Young-JikLee 80–85 W04-2211 @@ -3445,8 +3445,8 @@ Revising the <fixed-case>W</fixed-case>ordnet Domains Hierarchy: semantics, coverage and balancing LuisaBentivogli PamelaForner - BernardoMagnini - EmanuelePianta + BernardoMagnini + EmanuelePianta 94–101 W04-2214 bentivogli-etal-2004-revising @@ -3454,7 +3454,7 @@ <fixed-case>P</fixed-case>olyphra<fixed-case>Z</fixed-case>: a Tool for the Management of Parallel Corpora NajehHajlaoui - ChristianBoitet + ChristianBoitet 102–109 W04-2215 hajlaoui-boitet-2004-polyphraz @@ -3482,7 +3482,7 @@ Usability and Acceptability Studies of Conversational Virtual Human Technology - CurryGuinn + CurryGuinn RobertHubal GeoffreyFrank HenrySchwetzke @@ -3502,8 +3502,8 @@ Stochastic Language Generation in a Dialogue System: Toward a Domain Independent Generator - NathanaelChambers - JamesAllen + NathanaelChambers + JamesAllen 9–18 W04-2302 chambers-allen-2004-stochastic @@ -3525,7 +3525,7 @@ LindaBell JohanBoye AndersLindström - MatsWirén + MatsWirén 23–26 W04-2304 gustafson-etal-2004-nice @@ -3542,9 +3542,9 @@ StefanHamerich VolkerSchubert VolkerSchless - Ricardode Córdoba - José M.Pardo - Luis F.d’Haro + Ricardode Córdoba + José M.Pardo + Luis F.d’Haro BasilisKladis OtiliaKocsis StefanIgel @@ -3564,7 +3564,7 @@ Other-Initiated Self-Repairs in <fixed-case>E</fixed-case>stonian Information Dialogues: Solving Communication Problems in Cooperation OlgaGerassimenko TiitHennoste - MareKoit + MareKoit AndrielaRääbis 39–42 W04-2308 @@ -3583,7 +3583,7 @@ Manav RatanMital SumitKumar AmitabhaMukerjee - Achla M.Raina + Achla M.Raina 47–50 W04-2310 jain-etal-2004-anaphora @@ -3607,7 +3607,7 @@ Towards Automatic Identification of Discourse Markers in Dialogs: The Case of Like SandrineZufferey - AndreiPopescu-Belis + AndreiPopescu-Belis 63–71 W04-2313 zufferey-popescu-belis-2004-towards @@ -3615,8 +3615,8 @@ Bootstrapping Spoken Dialog Systems with Data Reuse GuiseppeDi Fabbrizio - GokhanTur - DilekHakkani-Tür + GokhanTur + DilekHakkani-Tür 72–80 W04-2314 di-fabbrizio-etal-2004-bootstrapping @@ -3648,14 +3648,14 @@ Prosodic Cues to Discourse Segment Boundaries in Human-Computer Dialogue - Gina-AnneLevow + Gina-AnneLevow 93–96 W04-2318 levow-2004-prosodic The <fixed-case>ICSI</fixed-case> Meeting Recorder Dialog Act (<fixed-case>MRDA</fixed-case>) Corpus - ElizabethShriberg + ElizabethShriberg RajDhillon SonaliBhagat JeremyAng @@ -3674,9 +3674,9 @@ On the Use of Confidence for Statistical Decision in Dialogue Strategies ChristianRaymond - FrédéricBéchet - RenatoDe Mori - GéraldineDamnati + FrédéricBéchet + RenatoDe Mori + GéraldineDamnati 102–107 W04-2321 raymond-etal-2004-use @@ -3684,7 +3684,7 @@ A Rule Based Approach to Discourse Parsing LiviaPolanyi - ChrisCuly + ChrisCuly Martinvan den Berg Gian LorenzoThione DavidAhn @@ -3696,8 +3696,8 @@ Unifying Annotated Discourse Hierarchies to Create a Gold Standard MarcoCarbone Ya’akovGal - StuartShieber - BarbaraGrosz + StuartShieber + BarbaraGrosz 118–126 W04-2323 carbone-etal-2004-unifying @@ -3718,15 +3718,15 @@ Annotating Student Emotional States in Spoken Tutoring Dialogues - Diane J.Litman - KateForbes-Riley + Diane J.Litman + KateForbes-Riley 144–153 W04-2326 litman-forbes-riley-2004-annotating The <fixed-case>MATE</fixed-case>/<fixed-case>GNOME</fixed-case> Proposals for Anaphoric Annotation, Revisited - MassimoPoesio + MassimoPoesio 154–162 W04-2327 poesio-2004-mate @@ -3734,7 +3734,7 @@ Multi-level Dialogue Act Tags AlexanderClark - AndreiPopescu-Belis + AndreiPopescu-Belis 163–170 W04-2328 clark-popescu-belis-2004-multi @@ -3756,14 +3756,14 @@ A Linear Programming Formulation for Global Inference in Natural Language Tasks DanRoth - Wen-tauYih + Wen-tauYih 1–8 W04-2401 roth-yih-2004-linear Semantic Lexicon Construction: Learning from Unlabeled Data via Spectral Analysis - Rie KubotaAndo + Rie KubotaAndo 9–16 W04-2402 ando-2004-semantic @@ -3771,14 +3771,14 @@ A Semantic Kernel for Predicate Argument Classification AlessandroMoschitti - Cosmin AdrianBejan + Cosmin AdrianBejan 17–24 W04-2403 moschitti-bejan-2004-semantic Combining Lexical and Syntactic Features for Supervised Word Sense Disambiguation - SaifMohammad + SaifMohammad TedPedersen 25–32 W04-2404 @@ -3786,7 +3786,7 @@ Co-training and Self-training for Word Sense Disambiguation - RadaMihalcea + RadaMihalcea 33–40 W04-2405 mihalcea-2004-co @@ -3811,7 +3811,7 @@ Modeling Category Structures with a Kernel Function HiroyaTakamura - YujiMatsumoto + YujiMatsumoto HiroyasuYamada 57–64 W04-2408 @@ -3843,7 +3843,7 @@ Introduction to the <fixed-case>C</fixed-case>o<fixed-case>NLL</fixed-case>-2004 Shared Task: Semantic Role Labeling XavierCarreras - LluísMàrquez + LluísMàrquez 89–97 W04-2412 carreras-marquez-2004-introduction @@ -3852,7 +3852,7 @@ Semantic Role Labelling With Chunk Sequences UlrikeBaldewein KatrinErk - SebastianPadó + SebastianPadó DetlefPrescher 98–101 W04-2413 @@ -3860,11 +3860,11 @@ Memory-based semantic role labeling: Optimizing features, algorithm, and output - Antalvan den Bosch + Antalvan den Bosch SanderCanisius - WalterDaelemans + WalterDaelemans IrisHendrickx - ErikTjong Kim Sang + ErikTjong Kim Sang 102–105 W04-2414 van-den-bosch-etal-2004-memory @@ -3872,19 +3872,19 @@ Hierarchical Recognition of Propositional Arguments with Perceptrons XavierCarreras - LluísMàrquez - GrzegorzChrupała + LluísMàrquez + GrzegorzChrupała 106–109 W04-2415 carreras-etal-2004-hierarchical Semantic Role Labeling by Tagging Syntactic Chunks - KadriHacioglu - SameerPradhan - WayneWard - James H.Martin - DanielJurafsky + KadriHacioglu + SameerPradhan + WayneWard + James H.Martin + DanielJurafsky 110–113 W04-2416 hacioglu-etal-2004-semantic @@ -3908,7 +3908,7 @@ Joon-HoLim Young-SookHwang So-YoungPark - Hae-ChangRim + Hae-ChangRim 122–125 W04-2419 lim-etal-2004-semantic @@ -3917,7 +3917,7 @@ Two-Phase Semantic Role Labeling based on Support Vector Machines Kyung-MiPark Young-SookHwang - Hae-ChangRim + Hae-ChangRim 126–129 W04-2420 park-etal-2004-two @@ -3926,7 +3926,7 @@ Semantic Role Labeling Via Generalized Inference Over Classifiers VasinPunyakanok DanRoth - Wen-tauYih + Wen-tauYih DavZimak YuanchengTu 130–133 @@ -3936,7 +3936,7 @@ Learning Transformation Rules for Semantic Role Labeling KenWilliams - ChristopherDozier + ChristopherDozier AndrewMcCulloh 134–137 W04-2422 @@ -3944,14 +3944,14 @@ Language Learning: Beyond Thunderdome - Christopher D.Manning + Christopher D.Manning 138–138 W04-2423 manning-2004-language Putting Meaning into Your Trees - MarthaPalmer + MarthaPalmer 139–139 W04-2424 palmer-2004-putting @@ -3972,16 +3972,16 @@ Strategies for Advanced Question Answering - SandaHarabagiu - FinleyLacatusu + SandaHarabagiu + FinleyLacatusu 1–9 W04-2501 harabagiu-lacatusu-2004-strategies Answering Questions Using Advanced Semantics and Probabilistic Inference - SriniNarayanan - SandaHarabagiu + SriniNarayanan + SandaHarabagiu 10–16 W04-2502 narayanan-harabagiu-2004-answering @@ -3997,7 +3997,7 @@ Discourse Structure for Context Question Answering - Joyce Y.Chai + Joyce Y.Chai RongJin 23–30 W04-2504 @@ -4005,10 +4005,10 @@ Intentions, Implicatures and Processing of Complex Questions - SandaHarabagiu - StevenMaiorano + SandaHarabagiu + StevenMaiorano AlessandroMoschitti - CosminBejan + CosminBejan 31–42 W04-2505 harabagiu-etal-2004-intentions @@ -4016,20 +4016,20 @@ A Novel Approach to Focus Identification in Question/Answering Systems AlessandroMoschitti - SandaHarabagiu + SandaHarabagiu 43–51 W04-2506 moschitti-harabagiu-2004-novel <fixed-case>HITIQA</fixed-case>: Scenario Based Question Answering - SharonSmall - TomekStrzalkowski + SharonSmall + TomekStrzalkowski TingLiu SeanRyan RobertSalkin NobuyukiShimizu - PaulKantor + PaulKantor DianeKelly RobertRittman NinaWacholder @@ -4043,7 +4043,7 @@ AndrewHickl JohnLehmann JohnWilliams - SandaHarabagiu + SandaHarabagiu 60–69 W04-2508 hickl-etal-2004-experiments @@ -4051,8 +4051,8 @@ Handling Information Access Dialogue through <fixed-case>QA</fixed-case> Technologies - A novel challenge for open-domain question answering TsuneakiKato - Jun’ichiFukumoto - FumitoMasui + Jun’ichiFukumoto + FumitoMasui NorikoKando 70–77 W04-2509 @@ -4060,11 +4060,11 @@ Ontological resources and question answering - RobertoBasili - Dorte H.Hansen + RobertoBasili + Dorte H.Hansen PatriziaPaggio - Maria TeresaPazienza - Fabio MassimoZanzotto + Maria TeresaPazienza + Fabio MassimoZanzotto 78–84 W04-2510 basili-etal-2004-ontological @@ -4087,7 +4087,7 @@ <fixed-case>O</fixed-case>nto<fixed-case>S</fixed-case>em Methods for Processing Semantic Ellipsis MarjorieMcShane StephenBeale - SergeiNirenburg + SergeiNirenburg 1–8 W04-2601 mcshane-etal-2004-ontosem-methods @@ -4095,7 +4095,7 @@ Towards Full Automation of Lexicon Construction RichardRohwer - DayneFreitag + DayneFreitag 9–16 W04-2602 rohwer-freitag-2004-towards @@ -4112,9 +4112,9 @@ Using prepositions to extend a verb lexicon - KarinKipper + KarinKipper BenjaminSnyder - MarthaPalmer + MarthaPalmer 23–29 W04-2604 kipper-etal-2004-using @@ -4130,7 +4130,7 @@ Extended Lexical-Semantic Classification of <fixed-case>E</fixed-case>nglish Verbs AnnaKorhonen - TedBriscoe + TedBriscoe 38–45 W04-2606 korhonen-briscoe-2004-extended @@ -4152,23 +4152,23 @@ Models for the Semantic Classification of Noun Phrases - DanMoldovan - AdrianaBadulescu + DanMoldovan + AdrianaBadulescu MartaTatu DanielAntohe - RoxanaGirju + RoxanaGirju 60–67 W04-2609 moldovan-etal-2004-models Support Vector Machines Applied to the Classification of Semantic Relations in Nominalized Noun Phrases - RoxanaGirju + RoxanaGirju Ana-MariaGiuglea MarianOlteanu OvidiuFortu OrestBolohan - DanMoldovan + DanMoldovan 68–75 W04-2610 girju-etal-2004-support @@ -4176,7 +4176,7 @@ Abstraction Summarization for Managing the Biomedical Research Literature MarceloFiszman - Thomas C.Rindflesch + Thomas C.Rindflesch HalilKilicoglu 76–83 W04-2611 @@ -4184,7 +4184,7 @@ Comparing, Integrating Lexical Definitional Knowledge From Multiple Sources - Lucja M.Iwanska + Lucja M.Iwanska 84–91 W04-2612 iwanska-2004-comparing @@ -4220,7 +4220,7 @@ Introduction to Frontiers in Corpus Annotation - AdamMeyers + AdamMeyers 1–2 W04-2701 meyers-2004-introduction @@ -4236,9 +4236,9 @@ Annotating Discourse Connectives and Their Arguments EleniMiltsakaki - AravindJoshi + AravindJoshi RashmiPrasad - BonnieWebber + BonnieWebber 9–16 W04-2703 miltsakaki-etal-2004-annotating @@ -4246,9 +4246,9 @@ <fixed-case>P</fixed-case>roposition <fixed-case>B</fixed-case>ank <fixed-case>II</fixed-case>: Delving Deeper OlgaBabko-Malaya - MarthaPalmer + MarthaPalmer NianwenXue - AravindJoshi + AravindJoshi SethKulick 17–23 W04-2704 @@ -4256,22 +4256,22 @@ The <fixed-case>N</fixed-case>om<fixed-case>B</fixed-case>ank Project: An Interim Report - AdamMeyers + AdamMeyers RuthReeves - CatherineMacleod + CatherineMacleod RachelSzekely VeronikaZielinska BrianYoung - RalphGrishman + RalphGrishman 24–31 W04-2705 meyers-etal-2004-nombank Deep Syntactic Annotation: Tectogrammatical Representation and Beyond - PetrSgall - JarmilaPanevová - EvaHajičová + PetrSgall + JarmilaPanevová + EvaHajičová 32–38 W04-2706 sgall-etal-2004-deep @@ -4280,9 +4280,9 @@ Multi-dimensional annotation of linguistic corpora for investigating information structure StefanBaumann CarenBrinckmann - SilviaHansen-Schirra - Geert-JanKruijff - IvanaKruijff-Korbayová + SilviaHansen-Schirra + Geert-JanKruijff + IvanaKruijff-Korbayová StellaNeumann ElkeTeich 39–46 @@ -4291,9 +4291,9 @@ <fixed-case>P</fixed-case>rague <fixed-case>C</fixed-case>zech-<fixed-case>E</fixed-case>nglish <fixed-case>D</fixed-case>ependency <fixed-case>T</fixed-case>reebank: Any Hopes for a Common Annotation Scheme? - MartinČmejrek - JanCuřín - JiříHavelka + MartinČmejrek + JanCuřín + JiříHavelka 47–54 W04-2708 cmejrek-etal-2004-prague-czech @@ -4301,15 +4301,15 @@ Interlingual Annotation of Multilingual Text Corpora StephenHelmreich - DavidFarwell - BonnieDorr + DavidFarwell + BonnieDorr NizarHabash - LoriLevin + LoriLevin TerukoMitamura - FlorenceReeder - KeithMiller - EduardHovy - OwenRambow + FlorenceReeder + KeithMiller + EduardHovy + OwenRambow AdvaithSiddharthan 55–62 W04-2709 @@ -4326,8 +4326,8 @@ Valency Frames of <fixed-case>C</fixed-case>zech Verbs in <fixed-case>VALLEX</fixed-case> 1.0 - ZdeněkŽabokrtský - MarkétaLopatková + ZdeněkŽabokrtský + MarkétaLopatková 70–77 W04-2711 zabokrtsky-lopatkova-2004-valency @@ -4348,8 +4348,8 @@ Robustness versus Fidelity in Natural Language Understanding - Mark G.Core - Johanna D.Moore + Mark G.Core + Johanna D.Moore 1–8 W04-2801 core-moore-2004-robustness @@ -4364,7 +4364,7 @@ A Little Goes a Long Way: Quick Authoring of Semantic Knowledge Sources for Interpretation - Carolyn PensteinRosé + Carolyn PensteinRosé Brian S.Hall 17–24 W04-2803 @@ -4402,9 +4402,9 @@ Different Sense Granularities for Different Applications - MarthaPalmer + MarthaPalmer OlgaBabko-Malaya - Hoa TrangDang + Hoa TrangDang 49–56 W04-2807 palmer-etal-2004-different @@ -4443,7 +4443,7 @@ A System for Searching and Browsing Spoken Communications LeeBegeja BernardRenger - MuratSaraclar + MuratSaraclar DavidGibbon ZhuLiu BehzadShahraray @@ -4453,8 +4453,8 @@ Analysis and Processing of Lecture Audio Data: Preliminary Investigations - JamesGlass - Timothy J.Hazen + JamesGlass + Timothy J.Hazen LeeHetherington ChaoWang 9–12 @@ -4467,17 +4467,17 @@ FredGoodman StanleyBoykin RandyFish - WarrenGreiff + WarrenGreiff 13–17 W04-2903 hu-etal-2004-audio Scoring Algorithms for Wordspotting Systems - Robert W.Morris + Robert W.Morris Jon A.Arrowood Peter S.Cardillo - Mark A.Clements + Mark A.Clements 18–21 W04-2904 morris-etal-2004-scoring @@ -4485,14 +4485,14 @@ Using Soundex Codes for Indexing Names in <fixed-case>ASR</fixed-case> Documents HemaRaghavan - JamesAllan + JamesAllan 22–27 W04-2905 raghavan-allan-2004-using Assessing Prosodic and Text Features for Segmentation of <fixed-case>M</fixed-case>andarin Broadcast News - Gina-AnneLevow + Gina-AnneLevow 28–32 W04-2906 levow-2004-assessing @@ -4501,7 +4501,7 @@ General Indexation of Weighted Automata - Application to Spoken Utterance Retrieval CyrilAllauzen MehryarMohri - MuratSaraclar + MuratSaraclar 33–40 W04-2907 allauzen-etal-2004-general @@ -4522,7 +4522,7 @@ Invited Talk: Sentence Interpretation using Stochastic Finite State Transducers - RenatoDe Mori + RenatoDe Mori 1–1 W04-3001 de-mori-2004-invited @@ -4530,7 +4530,7 @@ Hybrid Statistical and Structural Semantic Modeling for <fixed-case>T</fixed-case>hai Multi-Stage Spoken Language Understanding ChaiWutiwiwatchai - SadaokiFurui + SadaokiFurui 2–9 W04-3002 wutiwiwatchai-furui-2004-hybrid @@ -4548,7 +4548,7 @@ Virtual Modality: a Framework for Testing and Building Multimodal Applications - Péter PálBoda + Péter PálBoda EdwardFilisko 17–24 W04-3004 @@ -4565,7 +4565,7 @@ Error Detection and Recovery in Spoken Dialogue Systems EdwardFilisko - StephanieSeneff + StephanieSeneff 31–38 W04-3006 filisko-seneff-2004-error @@ -4573,14 +4573,14 @@ Robustness Issues in a Data-Driven Spoken Language Understanding System YulanHe - SteveYoung + SteveYoung 39–46 W04-3007 he-young-2004-robustness Invited Talk: Spoken Language Understanding: The Research/Industry Chasm - RobertoPieraccini + RobertoPieraccini 47–47 W04-3008 pieraccini-2004-invited @@ -4589,7 +4589,7 @@ Using Higher-level Linguistic Knowledge for Speech Recognition Error Correction in a Spoken <fixed-case>Q</fixed-case>/A Dialog MinwooJeong ByeongchangKim - Gary GeunbaeLee + Gary GeunbaeLee 48–55 W04-3009 jeong-etal-2004-using @@ -4613,7 +4613,7 @@ Modeling Prosodic Consistency for Automatic Speech Recognition: Preliminary Investigations Ernest>Pusateri - JamesGlass + JamesGlass 64–69 W04-3011 pusateri-glass-2004-modeling @@ -4650,10 +4650,10 @@ A Resource for Constructing Customized Test Suites for Molecular Biology Entity Identification Systems - K. BretonnelCohen + K. BretonnelCohen LorraineTanabe ShuheiKinoshita - LawrenceHunter + LawrenceHunter 1–8 W04-3101 cohen-etal-2004-resource @@ -4669,7 +4669,7 @@ The Language of Bioscience: Facts, Speculations, and Statements In Between MarcLight - Xin YingQiu + Xin YingQiu PadminiSrinivasan 17–24 W04-3103 @@ -4724,7 +4724,7 @@ A Large Scale Terminology Resource for Biomedical Text Processing HenkHarkema - RobertGaizauskas + RobertGaizauskas MarkHepple AngusRoberts IanRoberts @@ -4738,14 +4738,14 @@ Integrated Annotation for Biomedical Information Extraction SethKulick AnnBies - MarkLiberman + MarkLiberman MarkMandel RyanMcDonald - MarthaPalmer + MarthaPalmer AndrewSchein - LyleUngar + LyleUngar ScottWinters - PeteWhite + PeteWhite 61–68 W04-3111 kulick-etal-2004-integrated @@ -4754,17 +4754,17 @@ Using Natural Language Processing, <fixed-case>L</fixed-case>ocus<fixed-case>L</fixed-case>ink and the Gene Ontology to Compare <fixed-case>OMIM</fixed-case> to <fixed-case>MEDLINE</fixed-case> BisharahLibbus HalilKilicoglu - Thomas C.Rindflesch - James G.Mork - Alan R.Aronson + Thomas C.Rindflesch + James G.Mork + Alan R.Aronson 69–76 W04-3112 libbus-etal-2004-using A Design Methodology for a Biomedical Literature Indexing Tool Using the Rhetoric of Science - Robert E.Mercer - ChrysanneDi Marco + Robert E.Mercer + ChrysanneDi Marco 77–84 W04-3113 mercer-di-marco-2004-design @@ -4789,9 +4789,9 @@ Max-Margin Parsing BenTaskar DanKlein - MichaelCollins + MichaelCollins DaphneKoller - ChristopherManning + ChristopherManning 1–8 W04-3201 taskar-etal-2004-max @@ -4813,15 +4813,15 @@ Unsupervised <fixed-case>WSD</fixed-case> based on Automatically Retrieved Examples: The Importance of Bias - EnekoAgirre - DavidMartinez + EnekoAgirre + DavidMartinez 25–32 W04-3204 agirre-martinez-2004-unsupervised <fixed-case>V</fixed-case>erb<fixed-case>O</fixed-case>cean: Mining the Web for Fine-Grained Semantic Verb Relations - TimothyChklovski + TimothyChklovski PatrickPantel 33–40 W04-3205 @@ -4830,7 +4830,7 @@ Scaling Web-based Acquisition of Entailment Relations IdanSzpektor - HristoTanev + HristoTanev IdoDagan BonaventuraCoppola 41–48 @@ -4839,8 +4839,8 @@ Bilingual Parsing with Factored Estimation: Using <fixed-case>E</fixed-case>nglish to Parse <fixed-case>K</fixed-case>orean - David A.Smith - Noah A.Smith + David A.Smith + Noah A.Smith 49–56 W04-3207 smith-smith-2004-bilingual @@ -4856,9 +4856,9 @@ Comparing and Combining Generative and Posterior Probability Models: Some Advances in Sentence Boundary Detection in Speech YangLiu - AndreasStolcke - ElizabethShriberg - MaryHarper + AndreasStolcke + ElizabethShriberg + MaryHarper 64–71 W04-3209 liu-etal-2004-comparing @@ -4873,8 +4873,8 @@ Mixing Weak Learners in Semantic Parsin - Rodney D.Nielsen - SameerPradhan + Rodney D.Nielsen + SameerPradhan 80–87 W04-3211 nielsen-pradhan-2004-mixing @@ -4882,14 +4882,14 @@ Calibrating Features for Semantic Role Labeling NianwenXue - MarthaPalmer + MarthaPalmer 88–94 W04-3212 xue-palmer-2004-calibrating Unsupervised Semantic Role Labellin - Robert S.Swier + Robert S.Swier SuzanneStevenson 95–102 W04-3213 @@ -4897,8 +4897,8 @@ The Influence of Argument Structure on Semantic Role Assignment - SebastianPadó - GemmaBoleda + SebastianPadó + GemmaBoleda 103–110 W04-3214 pado-boleda-2004-influence @@ -4906,15 +4906,15 @@ Object-Extraction and Question-Parsing using <fixed-case>CCG</fixed-case> StephenClark - MarkSteedman - James R.Curran + MarkSteedman + James R.Curran 111–118 W04-3215 clark-etal-2004-object A Phrase-Based <fixed-case>HMM</fixed-case> Approach to Document/Abstract Alignment - HalDaumé III + HalDaumé III DanielMarcu 119–126 W04-3216 @@ -4923,7 +4923,7 @@ Automatic Analysis of Plot for Story Rewriting HarryHalpin - Johanna D.Moore + Johanna D.Moore JudyRobertson 127–133 W04-3217 @@ -4931,9 +4931,9 @@ Mining Spoken Dialogue Corpora for System Evaluation and Modelin - FredericBechet + FredericBechet GiuseppeRiccardi - DilekHakkani-Tur + DilekHakkani-Tur 134–141 W04-3218 bechet-etal-2004-mining @@ -4942,7 +4942,7 @@ Monolingual Machine Translation for Paraphrase Generation ChrisQuirk ChrisBrockett - WilliamDolan + WilliamDolan 142–149 W04-3219 quirk-etal-2004-monolingual @@ -4951,7 +4951,7 @@ Verb Sense and Subcategorization: Using Joint Inference to Improve Performance on Complementary Task GalenAndrew TrondGrenager - ChristopherManning + ChristopherManning 150–157 W04-3220 andrew-etal-2004-verb @@ -4959,7 +4959,7 @@ Attribute-Based and Value-Based Clustering: An Evaluation AbdulrahmanAlmuhareb - MassimoPoesio + MassimoPoesio 158–165 W04-3221 almuhareb-poesio-2004-attribute @@ -4968,7 +4968,7 @@ The Leaf Path Projection View of Parse Trees: Exploring String Kernels for <fixed-case>HPSG</fixed-case> Parse Selection KristinaToutanova PenkaMarkova - ChristopherManning + ChristopherManning 166–173 W04-3222 toutanova-etal-2004-leaf @@ -4976,14 +4976,14 @@ Incremental Feature Selection and l1 Regularization for Relaxed Maximum-Entropy Modeling StefanRiezler - AlexanderVasserman + AlexanderVasserman 174–181 W04-3223 riezler-vasserman-2004-incremental A Distributional Analysis of a Lexicalized Statistical Parsing Model - Daniel M.Bikel + Daniel M.Bikel 182–189 W04-3224 bikel-2004-distributional @@ -4992,7 +4992,7 @@ Adaptive Language and Translation Models for Interactive Machine Translation LaurentNepveu GuyLapalme - PhilippeLanglais + PhilippeLanglais GeorgeFoster 190–197 W04-3225 @@ -5009,9 +5009,9 @@ Phrase Pair Rescoring with Term Weighting for Statistical Machine Translation BingZhao - StephanVogel + StephanVogel MatthiasEck - AlexWaibel + AlexWaibel 206–213 W04-3227 zhao-etal-2004-phrase @@ -5034,9 +5034,9 @@ Applying Conditional Random Fields to <fixed-case>J</fixed-case>apanese Morphological Analysis - TakuKudo + TakuKudo KaoruYamamoto - YujiMatsumoto + YujiMatsumoto 230–237 W04-3230 kudo-etal-2004-applying @@ -5044,7 +5044,7 @@ A Hybrid Model for Morpho-Syntactic Annotation of <fixed-case>G</fixed-case>erman with a Large Tagset JuliaTrushkina - ErhardHinrichs + ErhardHinrichs 238–245 W04-3231 trushkina-hinrichs-2004-hybrid @@ -5052,8 +5052,8 @@ Identifying Broken Plurals in Unvowelised <fixed-case>A</fixed-case>rabic Tex AbduelbasetGoweder - MassimoPoesio - AnneDe Roeck + MassimoPoesio + AnneDe Roeck JeffReynolds 246–253 W04-3232 @@ -5061,7 +5061,7 @@ <fixed-case>NP</fixed-case> Bracketing by Maximum Entropy Tagging and <fixed-case>SVM</fixed-case> Reranking - HalDaumé III + HalDaumé III DanielMarcu 254–261 W04-3233 @@ -5069,15 +5069,15 @@ Trained Named Entity Recognition using Distributional Clusters - DayneFreitag + DayneFreitag 262–269 W04-3234 freitag-2004-trained Error Measures and <fixed-case>B</fixed-case>ayes Decision Rules Revisited with Applications to <fixed-case>POS</fixed-case> Tagging - HermannNey - MajaPopović + HermannNey + MajaPopović DavidSündermann 270–276 W04-3235 @@ -5101,7 +5101,7 @@ Spelling Correction as an Iterative Process that Exploits the Collective Knowledge of Web Users - SilviuCucerzan + SilviuCucerzan EricBrill 293–300 W04-3238 @@ -5109,17 +5109,17 @@ A Boosting Algorithm for Classification of Semi-Structured Text - TakuKudo - YujiMatsumoto + TakuKudo + YujiMatsumoto 301–308 W04-3239 kudo-matsumoto-2004-boosting Learning to Classify Email into “Speech Acts” - William W.Cohen - Vitor R.Carvalho - Tom M.Mitchell + William W.Cohen + Vitor R.Carvalho + Tom M.Mitchell 309–316 W04-3240 cohen-etal-2004-learning @@ -5134,14 +5134,14 @@ Random Forests in Language Modelin PengXu - FrederickJelinek + FrederickJelinek 325–332 W04-3242 xu-jelinek-2004-random On Log-Likelihood-Ratios and the Significance of Rare Events - Robert C.Moore + Robert C.Moore 333–340 W04-3243 moore-2004-log @@ -5159,13 +5159,13 @@ From Machine Translation to Computer Assisted Translation using Finite-State Models JorgeCivera ElsaCubel - Antonio L.Lagarda - DavidPicó + Antonio L.Lagarda + DavidPicó JorgeGonzález - EnriqueVidal - FranciscoCasacuberta - Juan M.Vilar - SergioBarrachina + EnriqueVidal + FranciscoCasacuberta + Juan M.Vilar + SergioBarrachina 349–356 W04-3245 civera-etal-2004-machine @@ -5181,8 +5181,8 @@ <fixed-case>L</fixed-case>ex<fixed-case>P</fixed-case>age<fixed-case>R</fixed-case>ank: Prestige in Multi-Document Text Summarization - GüneşErkan - Dragomir R.Radev + GüneşErkan + Dragomir R.Radev 365–371 W04-3247 erkan-radev-2004-lexpagerank @@ -5190,7 +5190,7 @@ A New Approach for <fixed-case>E</fixed-case>nglish-<fixed-case>C</fixed-case>hinese Named Entity Alignment DonghuiFeng - YajuanLv + YajuanLv MingZhou 372–379 W04-3248 @@ -5198,8 +5198,8 @@ Unsupervised Domain Relevance Estimation for Word Sense Disambiguation - AlfioGliozzo - BernardoMagnini + AlfioGliozzo + BernardoMagnini CarloStrapparava 380–387 W04-3249 @@ -5214,15 +5214,15 @@ Instance-Based Question Answering: A Data-Driven Approach - Lucian VladLita - JaimeCarbonell + Lucian VladLita + JaimeCarbonell 396–403 W04-3251 lita-carbonell-2004-instance <fixed-case>T</fixed-case>ext<fixed-case>R</fixed-case>ank: Bringing Order into Text - RadaMihalcea + RadaMihalcea PaulTarau 404–411 W04-3252 @@ -5239,7 +5239,7 @@ Evaluating Information Content by Factoid Analysis: Human annotation and stability SimoneTeufel - Hansvan Halteren + Hansvan Halteren 419–426 W04-3254 teufel-van-halteren-2004-evaluating @@ -5256,7 +5256,7 @@ Multi-Document Biography Summarization LiangZhou MirunaTicrea - EduardHovy + EduardHovy 434–441 W04-3256 zhou-etal-2004-multi @@ -5271,7 +5271,7 @@ Proceedings of the 7th International Workshop on Tree Adjoining Grammar and Related Formalisms W04-33 - OwenRambow + OwenRambow MatthewStone Simon Fraser University
Vancouver, Canada
@@ -5306,7 +5306,7 @@
N-Best Hidden <fixed-case>M</fixed-case>arkov Model Supertagging to Improve Typing on an Ambiguous Keyboard - SašaHasan + SašaHasan KarinHarbusch 24–31 W04-3304 @@ -5369,7 +5369,7 @@ Synchronous Grammars as Tree Transducers - Stuart M.Shieber + Stuart M.Shieber 88–95 W04-3312 shieber-2004-synchronous @@ -5384,7 +5384,7 @@ Generalizing Subcategorization Frames Acquired from Corpora Using Lexicalized Grammars NaokiYoshinaga - Jun’ichiTsujii + Jun’ichiTsujii 104–110 W04-3314 yoshinaga-tsujii-2004-generalizing @@ -5456,7 +5456,7 @@ Context-free Approximation of <fixed-case>LTAG</fixed-case> towards <fixed-case>CFG</fixed-case> Filtering KentaOouchida NaokiYoshinaga - Jun’ichiTsujii + Jun’ichiTsujii 171–177 W04-3323 oouchida-etal-2004-context @@ -5480,7 +5480,7 @@ Assigning <fixed-case>XTAG</fixed-case> Trees to <fixed-case>V</fixed-case>erb<fixed-case>N</fixed-case>et NevilleRyant - KarinKipper + KarinKipper 194–198 W04-3326 ryant-kipper-2004-assigning diff --git a/data/xml/W05.xml b/data/xml/W05.xml index 107b35091c..62398c669c 100644 --- a/data/xml/W05.xml +++ b/data/xml/W05.xml @@ -5,7 +5,7 @@ Proceedings of the Second ACL Workshop on Effective Tools and Methodologies for Teaching NLP and CL W05-01 ChrisBrew - DragomirRadev + DragomirRadev Association for Computational Linguistics
Ann Arbor, Michigan
June @@ -18,7 +18,7 @@ Teaching Applied Natural Language Processing: Triumphs and Tribulations - MartiHearst + MartiHearst 1–8 W05-0101 hearst-2005-teaching @@ -35,7 +35,7 @@ “Language and <fixed-case>C</fixed-case>omputers”: Creating an Introduction for a General Undergraduate Audience ChrisBrew MarkusDickinson - W. DetmarMeurers + W. DetmarMeurers 15–22 W05-0103 brew-etal-2005-language @@ -75,8 +75,8 @@ Language Technology from a <fixed-case>E</fixed-case>uropean Perspective HansUszkoreit ValiaKordoni - VladislavKubon - MichaelRosner + VladislavKubon + MichaelRosner SabineKirchmeier-Andersen 43–48 W05-0108 @@ -84,7 +84,7 @@ Natural Language Processing at the <fixed-case>S</fixed-case>chool of <fixed-case>I</fixed-case>nformation <fixed-case>S</fixed-case>tudies for <fixed-case>A</fixed-case>frica - BjörnGambäck + BjörnGambäck GunnarEriksson AthanassiaFourla 49–56 @@ -94,7 +94,7 @@ Teaching Language Technology at the <fixed-case>N</fixed-case>orth-<fixed-case>W</fixed-case>est <fixed-case>U</fixed-case>niversity SulénePilon - Gerhard Bvan Huyssteen + Gerhard Bvan Huyssteen Bertusvan Rooy 57–61 W05-0110 @@ -102,8 +102,8 @@ Hands-On <fixed-case>NLP</fixed-case> for an Interdisciplinary Audience - ElizabethLiddy - NancyMcCracken + ElizabethLiddy + NancyMcCracken 62–68 W05-0111 liddy-mccracken-2005-hands @@ -129,7 +129,7 @@ Applications of Lexical Information for Algorithmically Composing Multiple-Choice Cloze Items Chao-LinLiu Chun-HungWang - Zhao-MingGao + Zhao-MingGao Shang-MingHuang 1–8 W05-0201 @@ -137,8 +137,8 @@ Automatic Short Answer Marking - Stephen G.Pulman - Jana Z.Sukkarieh + Stephen G.Pulman + Jana Z.Sukkarieh 9–16 W05-0202 pulman-sukkarieh-2005-automatic @@ -154,7 +154,7 @@ Predicting Learning in Tutoring with the Landscape Model of Memory ArthurWard - DianeLitman + DianeLitman 21–24 W05-0204 ward-litman-2005-predicting @@ -179,7 +179,7 @@ Using Syntactic Information to Identify Plagiarism - ÖzlemUzuner + ÖzlemUzuner BorisKatz ThadeNahnsen 37–44 @@ -193,7 +193,7 @@ CarolPai ReganCarey ZacharyZaiss - CarolynRosé + CarolynRosé 45–52 W05-0208 gweon-etal-2005-towards @@ -213,7 +213,7 @@ Measuring Non-native Speakers’ Proficiency of <fixed-case>E</fixed-case>nglish by Using a Test with Automatically-Generated Fill-in-the-Blank Questions - EiichiroSumita + EiichiroSumita FumiakiSugaya SeiichiYamamoto 61–68 @@ -222,10 +222,10 @@ Evaluating State-of-the-Art <fixed-case>T</fixed-case>reebank-style Parsers for <fixed-case>C</fixed-case>oh-<fixed-case>M</fixed-case>etrix and Other Learning Technology Environments - Christian F.Hempelmann + Christian F.Hempelmann VasileRus - Arthur C.Graesser - Danielle S.McNamara + Arthur C.Graesser + Danielle S.McNamara 69–76 W05-0211 hempelmann-etal-2005-evaluating @@ -240,7 +240,7 @@ Situational Language Training for Hotel Receptionists - FrédériqueSegond + FrédériqueSegond ThibaultParmentier RobertaStock RanRosner @@ -254,7 +254,7 @@ Proceedings of the Workshop on Frontiers in Corpus Annotations II: Pie in the Sky W05-03 - AdamMeyers + AdamMeyers Association for Computational Linguistics
Ann Arbor, Michigan
June @@ -274,18 +274,18 @@
Merging <fixed-case>P</fixed-case>rop<fixed-case>B</fixed-case>ank, <fixed-case>N</fixed-case>om<fixed-case>B</fixed-case>ank, <fixed-case>T</fixed-case>ime<fixed-case>B</fixed-case>ank, <fixed-case>P</fixed-case>enn <fixed-case>D</fixed-case>iscourse <fixed-case>T</fixed-case>reebank and Coreference - JamesPustejovsky + JamesPustejovsky AdamMeyers - MarthaPalmer - MassimoPoesio + MarthaPalmer + MassimoPoesio 5–12 W05-0302 pustejovsky-etal-2005-merging A Unified Representation for Morphological, Syntactic, Semantic, and Referential Annotations - Erhard W.Hinrichs - SandraKübler + Erhard W.Hinrichs + SandraKübler KarinNaumann 13–20 W05-0303 @@ -306,8 +306,8 @@ AlanLee EleniMiltsakaki RashmiPrasad - AravindJoshi - BonnieWebber + AravindJoshi + BonnieWebber 29–36 W05-0305 dinesh-etal-2005-attribution @@ -315,7 +315,7 @@ Investigating the Characteristics of Causal Relations in <fixed-case>J</fixed-case>apanese Text TakashiInui - ManabuOkumura + ManabuOkumura 37–44 W05-0306 inui-okumura-2005-investigating @@ -324,8 +324,8 @@ A Framework for Annotating Information Structure in Discourse SashaCalhoun MalvinaNissim - MarkSteedman - JasonBrenier + MarkSteedman + JasonBrenier 45–52 W05-0307 calhoun-etal-2005-framework @@ -333,14 +333,14 @@ Annotating Attributions and Private States TheresaWilson - JanyceWiebe + JanyceWiebe 53–60 W05-0308 wilson-wiebe-2005-annotating A Parallel <fixed-case>P</fixed-case>roposition <fixed-case>B</fixed-case>ank <fixed-case>II</fixed-case> for <fixed-case>C</fixed-case>hinese and <fixed-case>E</fixed-case>nglish - MarthaPalmer + MarthaPalmer NianwenXue OlgaBabko-Malaya JinyingChen @@ -352,16 +352,16 @@ Semantically Rich Human-Aided Machine Annotation MarjorieMcShane - SergeiNirenburg + SergeiNirenburg StephenBeale - ThomasO’Hara + ThomasO’Hara 68–75 W05-0310 mcshane-etal-2005-semantically The Reliability of Anaphoric Annotation, Reconsidered: Taking Ambiguity into Account - MassimoPoesio + MassimoPoesio RonArtstein 76–83 W05-0311 @@ -379,7 +379,7 @@ Proceedings of the ACL Workshop on Feature Engineering for Machine Learning in Natural Language Processing W05-04 - EricRingger + EricRingger Association for Computational Linguistics
Ann Arbor, Michigan
June @@ -400,8 +400,8 @@
Feature Engineering and Post-Processing for Temporal Expression Recognition Using Conditional Random Fields - SisayFissaha Adafre - Maartende Rijke + SisayFissaha Adafre + Maartende Rijke 9–16 W05-0402 fissaha-adafre-de-rijke-2005-feature @@ -416,8 +416,8 @@ Using Semantic and Syntactic Graphs for Call Classification - DilekHakkani-Tür - GokhanTur + DilekHakkani-Tür + GokhanTur AnanladaChotimongkol 24–31 W05-0404 @@ -426,7 +426,7 @@ Feature-Based Segmentation of Narrative Documents DavidKauchak - FrancineChen + FrancineChen 32–39 W05-0405 kauchak-chen-2005-feature @@ -435,7 +435,7 @@ Identifying Non-Referential it: A Machine Learning Approach Incorporating Linguistically Motivated Patterns AdrianeBoyd WhitneyGegg-Harrison - DonnaByron + DonnaByron 40–47 W05-0406 boyd-etal-2005-identifying @@ -445,7 +445,7 @@ AlessandroMoschitti BonaventuraCoppola DanielePighin - RobertoBasili + RobertoBasili 48–56 W05-0407 moschitti-etal-2005-engineering @@ -461,7 +461,7 @@ Studying Feature Generation from Various Data Representations for Answer Extraction DanShen - Geert-Jan M.Kruijff + Geert-Jan M.Kruijff DietrichKlakow 65–72 W05-0409 @@ -504,7 +504,7 @@ Using Morphology and Syntax Together in Unsupervised Learning YuHu - IrinaMatveeva + IrinaMatveeva JohnGoldsmith ColinSprague 20–27 @@ -514,7 +514,7 @@ Refining the <fixed-case>SED</fixed-case> Heuristic for Morpheme Discovery: Another Look at <fixed-case>S</fixed-case>wahili YuHu - IrinaMatveeva + IrinaMatveeva JohnGoldsmith ColinSprague 28–35 @@ -524,7 +524,7 @@ A Connectionist Model of Language-Scene Interaction Marshall R.Mayberry - Matthew W.Crocker + Matthew W.Crocker PiaKnoeferle 36–44 W05-0505 @@ -547,7 +547,7 @@ Statistics vs. <fixed-case>UG</fixed-case> in Language Acquisition: Does a Bigram Analysis Predict Auxiliary Inversion? - Xuân-Nga CaoKam + Xuân-Nga CaoKam IglikaStoyneshka LidiyaTornyova William GregorySakas @@ -560,7 +560,7 @@ Climbing the Path to Grammar: A Maximum Entropy Model of Subject/Object Learning FeliceDell’Orletta AlessandroLenci - SimonettaMontemagni + SimonettaMontemagni VitoPirrelli 72–81 W05-0509 @@ -600,7 +600,7 @@ Effective use of <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et Semantics via Kernel-Based Learning - RobertoBasili + RobertoBasili MarcoCammisa AlessandroMoschitti 1–8 @@ -610,22 +610,22 @@ A Statistical Semantic Parser that Integrates Syntax and Semantics RuifangGe - RaymondMooney + RaymondMooney 9–16 W05-0602 ge-mooney-2005-statistical Search Engine Statistics Beyond the n-Gram: Application to Noun Compound Bracketing - PreslavNakov - MartiHearst + PreslavNakov + MartiHearst 17–24 W05-0603 nakov-hearst-2005-search New Experiments in Distributional Representations of Synonymy - DayneFreitag + DayneFreitag MatthiasBlume JohnByrnes EdmondChow @@ -640,8 +640,8 @@ Word Independent Context Pair Classification Model for Word Sense Disambiguation ChengNiu WeiLi - Rohini K.Srihari - HuifengLi + Rohini K.Srihari + HuifengLi 33–39 W05-0605 niu-etal-2005-word @@ -657,15 +657,15 @@ A <fixed-case>B</fixed-case>ayesian Mixture Model for Term Re-occurrence and Burstiness AvikSarkar - Paul HGarthwaite - AnneDe Roeck + Paul HGarthwaite + AnneDe Roeck 48–55 W05-0607 sarkar-etal-2005-bayesian Domain Kernels for Text Categorization - AlfioGliozzo + AlfioGliozzo CarloStrapparava 56–63 W05-0608 @@ -682,16 +682,16 @@ Using Uneven Margins <fixed-case>SVM</fixed-case> and Perceptron for Information Extraction YaoyongLi - KalinaBontcheva - HamishCunningham + KalinaBontcheva + HamishCunningham 72–79 W05-0610 li-etal-2005-using Improving Sequence Segmentation Learning by Predicting Trigrams - Antalvan den Bosch - WalterDaelemans + Antalvan den Bosch + WalterDaelemans 80–87 W05-0611 van-den-bosch-daelemans-2005-improving @@ -715,14 +715,14 @@ Intentional Context in Situated Natural Language Learning MichaelFleischman - DebRoy + DebRoy 104–111 W05-0614 fleischman-roy-2005-intentional Representational Bias in Unsupervised Learning of Syllable Structure - SharonGoldwater + SharonGoldwater MarkJohnson 112–119 W05-0615 @@ -738,7 +738,7 @@ Morphology Induction from Term Clusters - DayneFreitag + DayneFreitag 128–135 W05-0617 freitag-2005-morphology @@ -754,7 +754,7 @@ Investigating the Effects of Selective Sampling on the Annotation Task BenHachey - BeatriceAlex + BeatriceAlex MarkusBecker 144–151 W05-0619 @@ -763,24 +763,24 @@ Introduction to the <fixed-case>C</fixed-case>o<fixed-case>NLL</fixed-case>-2005 Shared Task: Semantic Role Labeling XavierCarreras - LluísMàrquez + LluísMàrquez 152–164 W05-0620 carreras-marquez-2005-introduction Inferring Semantic Roles Using Sub-Categorization Frames and Maximum Entropy Model - AksharBharati + AksharBharati SriramVenkatapathy - PrashanthReddy + PrashanthReddy 165–168 W05-0621 bharati-etal-2005-inferring Semantic Role Labelling with Tree Conditional Random Fields - TrevorCohn - PhilipBlunsom + TrevorCohn + PhilipBlunsom 169–172 W05-0622 cohn-blunsom-2005-semantic @@ -789,7 +789,7 @@ A Joint Model for Semantic Role Labeling AriaHaghighi KristinaToutanova - ChristopherManning + ChristopherManning 173–176 W05-0623 haghighi-etal-2005-joint @@ -807,14 +807,14 @@ PeterKoomen VasinPunyakanok DanRoth - Wen-tauYih + Wen-tauYih 181–184 W05-0625 koomen-etal-2005-generalized Semantic Role Labeling via Consensus in Pattern-Matching - Chi-SanLin + Chi-SanLin Tony C.Smith 185–188 W05-0626 @@ -833,9 +833,9 @@ Semantic Role Labeling as Sequential Tagging - LluísMàrquez - PereComas - JesúsGiménez + LluísMàrquez + PereComas + JesúsGiménez NeusCatalà 193–196 W05-0628 @@ -857,7 +857,7 @@ AlessandroMoschitti Ana-MariaGiuglea BonaventuraCoppola - RobertoBasili + RobertoBasili 201–204 W05-0630 moschitti-etal-2005-hierarchical @@ -865,7 +865,7 @@ Semantic Role Labeling Using lib<fixed-case>SVM</fixed-case> Necati ErcanOzgencil - NancyMcCracken + NancyMcCracken 205–208 W05-0631 ozgencil-mccracken-2005-semantic @@ -873,14 +873,14 @@ Maximum Entropy Based Semantic Role Labeling Kyung-MiPark - Hae-ChangRim + Hae-ChangRim 209–212 W05-0632 park-rim-2005-maximum Semantic Role Labeling Using Lexical Statistical Information - Simone PaoloPonzetto + Simone PaoloPonzetto MichaelStrube 213–216 W05-0633 @@ -888,11 +888,11 @@ Semantic Role Chunking Combining Complementary Syntactic Views - SameerPradhan - KadriHacioglu - WayneWard - James H.Martin - DanielJurafsky + SameerPradhan + KadriHacioglu + WayneWard + James H.Martin + DanielJurafsky 217–220 W05-0634 pradhan-etal-2005-semantic @@ -900,7 +900,7 @@ Semantic Role Labeling Using Complete Syntactic Analysis MihaiSurdeanu - JordiTurmo + JordiTurmo 221–224 W05-0635 surdeanu-turmo-2005-semantic @@ -915,9 +915,9 @@ Applying Spelling Error Correction Techniques for Improving Semantic Role Labelling - ErikTjong Kim Sang + ErikTjong Kim Sang SanderCanisius - Antalvan den Bosch + Antalvan den Bosch ToineBogers 229–232 W05-0637 @@ -925,18 +925,18 @@ Exploiting Full Parsing Information to Label Semantic Roles Using an Ensemble of <fixed-case>ME</fixed-case> and <fixed-case>SVM</fixed-case> via Integer Linear Programming - Tzong-HanTsai + Tzong-HanTsai Chia-WeiWu Yu-ChunLin - Wen-LianHsu + Wen-LianHsu 233–236 W05-0638 tsai-etal-2005-exploiting The Integration of Syntactic Parsing and Semantic Role Labeling - Szu-tingYi - MarthaPalmer + Szu-tingYi + MarthaPalmer 237–240 W05-0639 yi-palmer-2005-integration @@ -947,7 +947,7 @@ Proceedings of the ACL Workshop on Computational Approaches to Semitic Languages W05-07 KareemDarwish - MonaDiab + MonaDiab NizarHabash Association for Computational Linguistics
Ann Arbor, Michigan
@@ -962,7 +962,7 @@ Memory-Based Morphological Analysis Generation and Part-of-Speech Tagging of <fixed-case>A</fixed-case>rabic ErwinMarsi - Antalvan den Bosch + Antalvan den Bosch AbdelhadiSoudi 1–8 W05-0701 @@ -979,8 +979,8 @@ Morphological Analysis and Generation for <fixed-case>A</fixed-case>rabic Dialects NizarHabash - OwenRambow - GeorgeKiraz + OwenRambow + GeorgeKiraz 17–24 W05-0703 habash-etal-2005-morphological @@ -988,7 +988,7 @@ Examining the Effect of Improved Context Sensitive Morphology on <fixed-case>A</fixed-case>rabic Information Retrieval KareemDarwish - HanyHassan + HanyHassan OssamaEmam 25–30 W05-0704 @@ -998,7 +998,7 @@ Modifying a Natural Language Processing System for <fixed-case>E</fixed-case>uropean Languages to Treat <fixed-case>A</fixed-case>rabic in Information Processing and Information Retrieval Applications GregoryGrefenstette NasredineSemmar - FaïzaElkateb-Gara + FaïzaElkateb-Gara 31–38 W05-0705 grefenstette-etal-2005-modifying @@ -1006,7 +1006,7 @@ Choosing an Optimal Architecture for Segmentation and <fixed-case>POS</fixed-case>-Tagging of <fixed-case>M</fixed-case>odern <fixed-case>H</fixed-case>ebrew RoyBar-Haim - KhalilSima’an + KhalilSima’an YoadWinter 39–46 W05-0706 @@ -1014,7 +1014,7 @@ Part of Speech Tagging for <fixed-case>A</fixed-case>mharic using Conditional Random Fields - SisayFissaha Adafre + SisayFissaha Adafre 47–54 W05-0707 fissaha-adafre-2005-part @@ -1030,9 +1030,9 @@ The Impact of Morphological Stemming on <fixed-case>A</fixed-case>rabic Mention Detection and Coreference Resolution ImedZitouni - JeffreySorensen - XiaoqiangLuo - RaduFlorian + JeffreySorensen + XiaoqiangLuo + RaduFlorian 63–70 W05-0709 zitouni-etal-2005-impact @@ -1040,7 +1040,7 @@ Classifying <fixed-case>A</fixed-case>mharic News Text Using Self-Organizing Maps SamuelEyassu - BjörnGambäck + BjörnGambäck 71–78 W05-0710 eyassu-gamback-2005-classifying @@ -1048,15 +1048,15 @@ <fixed-case>A</fixed-case>rabic Diacritization Using Weighted Finite-State Transducers RaniNelken - Stuart M.Shieber + Stuart M.Shieber 79–86 W05-0711 nelken-shieber-2005-arabic An Integrated Approach for <fixed-case>A</fixed-case>rabic-<fixed-case>E</fixed-case>nglish Named Entity Translation - HanyHassan - JeffreySorensen + HanyHassan + JeffreySorensen 87–93 W05-0712 hassan-sorensen-2005-integrated @@ -1068,7 +1068,7 @@ W05-08 PhilippKoehn JoelMartin - RadaMihalcea + RadaMihalcea ChristofMonz TedPedersen Association for Computational Linguistics @@ -1083,14 +1083,14 @@ Association-Based Bilingual Word Alignment - Robert C.Moore + Robert C.Moore 1–8 W05-0801 moore-2005-association Cross Language Text Categorization by Acquiring Multilingual Domain Models from Comparable Corpora - AlfioGliozzo + AlfioGliozzo CarloStrapparava 9–16 W05-0802 @@ -1106,8 +1106,8 @@ Bilingual Word Spectral Clustering for Statistical Machine Translation BingZhao - Eric P.Xing - AlexWaibel + Eric P.Xing + AlexWaibel 25–32 W05-0804 zhao-etal-2005-bilingual @@ -1121,9 +1121,9 @@ Augmenting a Small Parallel Text with Morpho-Syntactic Language - MajaPopović + MajaPopović DavidVilar - HermannNey + HermannNey SlobodanJovičić ZoranŠarić 41–48 @@ -1132,7 +1132,7 @@ Induction of Fine-Grained Part-of-Speech Taggers via Classifier Combination and Crosslingual Projection - ElliottDrábek + ElliottDrábek DavidYarowsky 49–56 W05-0807 @@ -1141,7 +1141,7 @@ A Hybrid Approach to Align Sentences and Words in <fixed-case>E</fixed-case>nglish-<fixed-case>H</fixed-case>indi Parallel Corpora NirajAswani - RobertGaizauskas + RobertGaizauskas 57–64 W05-0808 aswani-gaizauskas-2005-hybrid @@ -1157,7 +1157,7 @@ <fixed-case>NUKTI</fixed-case>: <fixed-case>E</fixed-case>nglish-<fixed-case>I</fixed-case>nuktitut Word Alignment System Description - PhilippeLanglais + PhilippeLanglais FabrizioGotti GuihongCao 75–78 @@ -1167,7 +1167,7 @@ Models for <fixed-case>I</fixed-case>nuktitut-<fixed-case>E</fixed-case>nglish Word Alignment CharlesSchafer - ElliottDrábek + ElliottDrábek 79–82 W05-0811 schafer-drabek-2005-models @@ -1182,17 +1182,17 @@ Symmetric Probabilistic Alignment - Ralf D.Brown + Ralf D.Brown Jae DongKim - Peter J.Jansen - Jaime G.Carbonell + Peter J.Jansen + Jaime G.Carbonell 87–90 W05-0813 brown-etal-2005-symmetric <fixed-case>ISI</fixed-case>‘s Participation in the <fixed-case>R</fixed-case>omanian-<fixed-case>E</fixed-case>nglish Alignment Task - AlexanderFraser + AlexanderFraser DanielMarcu 91–94 W05-0814 @@ -1200,14 +1200,14 @@ Experiments Using <fixed-case>MAR</fixed-case> for Aligning Corpora - Juan MiguelVilar + Juan MiguelVilar 95–98 W05-0815 vilar-2005-experiments Comparison, Selection and Use of Sentence Alignment Algorithms for New Language Pairs - Anil KumarSingh + Anil KumarSingh SamarHusain 99–106 W05-0816 @@ -1215,10 +1215,10 @@ Combined Word Alignments - DanTufiş + DanTufiş RaduIon - AlexandruCeauşu - DanŞtefănescu + AlexandruCeauşu + DanŞtefănescu 107–110 W05-0817 tufis-etal-2005-combined @@ -1227,7 +1227,7 @@ <fixed-case>LIHLA</fixed-case>: Shared Task System Description Helena M.Caseli Maria G. V.Nunes - Mikel L.Forcada + Mikel L.Forcada 111–114 W05-0818 caseli-etal-2005-lihla @@ -1235,7 +1235,7 @@ Aligning Words in <fixed-case>E</fixed-case>nglish-<fixed-case>H</fixed-case>indi Parallel Corpora NirajAswani - RobertGaizauskas + RobertGaizauskas 115–118 W05-0819 aswani-gaizauskas-2005-aligning @@ -1271,18 +1271,18 @@ Statistical Machine Translation of <fixed-case>E</fixed-case>uparl Data by using Bilingual N-grams - Rafael E.Banchs - Josep M.Crego - Adriàde Gispert + Rafael E.Banchs + Josep M.Crego + Adriàde Gispert PatrikLambert - José B.Mariño + José B.Mariño 133–136 W05-0823 banchs-etal-2005-statistical <fixed-case>RALI</fixed-case>: <fixed-case>SMT</fixed-case> Shared Task System Description - PhilippeLanglais + PhilippeLanglais GuihongCao FabrizioGotti 137–140 @@ -1292,38 +1292,38 @@ A Generalized Alignment-Free Phrase Extraction BingZhao - StephanVogel + StephanVogel 141–144 W05-0825 zhao-vogel-2005-generalized Combining Linguistic Data Views for Phrase-based <fixed-case>SMT</fixed-case> - JesúsGiménez - LluísMàrquez + JesúsGiménez + LluísMàrquez 145–148 W05-0826 gimenez-marquez-2005-combining Improving Phrase-Based Statistical Translation by Modifying Phrase Extraction and Including Several Features - MartaRuiz Costa-jussà - José A. R.Fonollosa + MartaRuiz Costa-jussà + José A. R.Fonollosa 149–154 W05-0827 ruiz-costa-jussa-fonollosa-2005-improving First Steps towards Multi-Engine Machine Translation - AndreasEisele + AndreasEisele 155–158 W05-0828 eisele-2005-first Competitive Grouping in Integrated Phrase Segmentation and Alignment Model - YingZhang - StephanVogel + YingZhang + StephanVogel 159–162 W05-0829 zhang-vogel-2005-competitive @@ -1342,15 +1342,15 @@ DavidVilar EvgenyMatusov RichardZens - HermannNey + HermannNey 167–174 W05-0831 kanthak-etal-2005-novel Gaming Fluency: Evaluating the Bounds and Expectations of Segment-based Translation Memory - JohnHenderson - WilliamMorgan + JohnHenderson + WilliamMorgan 175–182 W05-0832 henderson-morgan-2005-gaming @@ -1366,15 +1366,15 @@ Word Graphs for Statistical Machine Translation RichardZens - HermannNey + HermannNey 191–198 W05-0834 zens-ney-2005-word A Recursive Statistical Translation Model - Juan MiguelVilar - EnriqueVidal + Juan MiguelVilar + EnriqueVidal 199–207 W05-0835 vilar-vidal-2005-recursive @@ -1383,7 +1383,7 @@ Training and Evaluating Error Minimization Decision Rules for Statistical Machine Translation AshishVenugopal AndreasZollmann - AlexWaibel + AlexWaibel 208–215 W05-0836 venugopal-etal-2005-training @@ -1393,10 +1393,10 @@ Proceedings of the ACL Workshop on Intrinsic and Extrinsic Evaluation Measures for Machine Translation and/or Summarization W05-09 - JadeGoldstein - AlonLavie - Chin-YewLin - ClareVoss + JadeGoldstein + AlonLavie + Chin-YewLin + ClareVoss Association for Computational Linguistics
Ann Arbor, Michigan
June @@ -1409,10 +1409,10 @@ A Methodology for Extrinsic Evaluation of Text Summarization: Does <fixed-case>ROUGE</fixed-case> Correlate? - BonnieDorr + BonnieDorr ChristofMonz StacyPresident - RichardSchwartz + RichardSchwartz DavidZajic 1–8 W05-0901 @@ -1431,7 +1431,7 @@ GregorLeusch NicolaUeffing DavidVilar - HermannNey + HermannNey 17–24 W05-0903 leusch-etal-2005-preprocessing @@ -1449,7 +1449,7 @@ GabrielMurray SteveRenals JeanCarletta - JohannaMoore + JohannaMoore 33–40 W05-0905 murray-etal-2005-evaluating @@ -1464,9 +1464,9 @@ Evaluating <fixed-case>DUC</fixed-case> 2004 Tasks with the <fixed-case>QARLA</fixed-case> Framework - EnriqueAmigó + EnriqueAmigó JulioGonzalo - AnselmoPeñas + AnselmoPeñas FelisaVerdejo 49–56 W05-0907 @@ -1475,7 +1475,7 @@ On Some Pitfalls in Automatic Evaluation and Significance Testing for <fixed-case>MT</fixed-case> StefanRiezler - John T.Maxwell + John T.Maxwell 57–64 W05-0908 riezler-maxwell-2005-pitfalls @@ -1493,7 +1493,7 @@ Proceedings of the ACL-SIGLEX Workshop on Deep Lexical Acquisition W05-10 - TimothyBaldwin + TimothyBaldwin AnnaKorhonen AlineVillavicencio Association for Computational Linguistics @@ -1508,8 +1508,8 @@ Data Homogeneity and Semantic Role Tagging in <fixed-case>C</fixed-case>hinese - Oi YeeKwong - Benjamin K.Tsou + Oi YeeKwong + Benjamin K.Tsou 1–9 W05-1001 kwong-tsou-2005-data @@ -1517,14 +1517,14 @@ Verb Subcategorization Kernels for Automatic Semantic Labeling AlessandroMoschitti - RobertoBasili + RobertoBasili 10–17 W05-1002 moschitti-basili-2005-verb Identifying Concept Attributes Using a Classifier - MassimoPoesio + MassimoPoesio AbdulrahmanAlmuhareb 18–27 W05-1003 @@ -1532,7 +1532,7 @@ Automatically Learning Qualia Structures from the Web - PhilippCimiano + PhilippCimiano JohannaWenderoth 28–37 W05-1004 @@ -1558,7 +1558,7 @@ Frame Semantic Enhancement of Lexical-Semantic Resources RebeccaGreen - BonnieDorr + BonnieDorr 57–66 W05-1007 green-dorr-2005-frame @@ -1572,17 +1572,17 @@ Morphology vs. Syntax in Adjective Class Acquisition - GemmaBoleda + GemmaBoleda ToniBadia - SabineSchulte im Walde + SabineSchulte im Walde 77–86 W05-1009 boleda-etal-2005-morphology Automatic Acquisition of Bilingual Rules for Extraction of Bilingual Word Pairs from Parallel Corpora - HiroshiEchizen-ya - KenjiAraki + HiroshiEchizen-ya + KenjiAraki YoshioMomouchi 87–96 W05-1010 @@ -1591,7 +1591,7 @@ Approximate Searching for Distributional Similarity JamesGorman - JamesCurran + JamesCurran 97–104 W05-1011 gorman-curran-2005-approximate @@ -1613,7 +1613,7 @@ <fixed-case>T</fixed-case>ext<fixed-case>T</fixed-case>ree Construction for Parser and Treebank Development - Paula S.Newman + Paula S.Newman 1–13 W05-1101 newman-2005-texttree @@ -1628,14 +1628,14 @@ Interleaved Preparation and Output in the <fixed-case>COMIC</fixed-case> Fission Module - Mary EllenFoster + Mary EllenFoster 34–46 W05-1103 foster-2005-interleaved Designing an Extensible <fixed-case>API</fixed-case> for Integrating Language Modeling and Realization - MichaelWhite + MichaelWhite 47-64 W05-1104 white-2005-designing @@ -1652,10 +1652,10 @@ <fixed-case>H</fixed-case>unmorph: Open Source Word Analysis ViktorTrón GyögyGyepesi - PéterHalácsky + PéterHalácsky AndrásKornai LászlóNémeth - DánielVarga + DánielVarga 77–85 W05-1106 tron-etal-2005-hunmorph @@ -1669,7 +1669,7 @@ <fixed-case>XFST</fixed-case>2<fixed-case>FSA</fixed-case>: Comparing Two Finite-State Toolboxes - YaelCohen-Sygal + YaelCohen-Sygal ShulyWintner 100–117 W05-1108 @@ -1685,7 +1685,7 @@ Proceedings of the ACL Workshop on Empirical Modeling of Semantic Equivalence and Entailment W05-12 - BillDolan + BillDolan IdoDagan Association for Computational Linguistics
Ann Arbor, Michigan
@@ -1700,7 +1700,7 @@ Classification of Semantic Relations by Humans and Machines ErwinMarsi - EmielKrahmer + EmielKrahmer 1–6 W05-1201 marsi-krahmer-2005-classification @@ -1708,7 +1708,7 @@ The Distributional Similarity of Sub-Parses JulieWeeds - DavidWeir + DavidWeir BillKeller 7–12 W05-1202 @@ -1716,15 +1716,15 @@ Measuring the Semantic Similarity of Texts - CourtneyCorley - RadaMihalcea + CourtneyCorley + RadaMihalcea 13–18 W05-1203 corley-mihalcea-2005-measuring Training Data Modification for <fixed-case>SMT</fixed-case> Considering Groups of Synonymous Sentences - HidekiKashioka + HidekiKashioka 19–24 W05-1204 kashioka-2005-training @@ -1747,8 +1747,8 @@ Discovering Entailment Relations Using “Textual Entailment Patterns” - Fabio MassimoZanzotto - Maria TeresaPazienza + Fabio MassimoZanzotto + Maria TeresaPazienza MarcoPennacchiotti 37–42 W05-1207 @@ -1784,10 +1784,10 @@ Proceedings of the ACL-ISMB Workshop on Linking Biological Literature, Ontologies and Databases: Mining Biological Semantics W05-13 - K. BretonnelCohen - LynetteHirschman + K. BretonnelCohen + LynetteHirschman HagitShatkay - ChristianBlaschke + ChristianBlaschke Association for Computational Linguistics
Detroit
June @@ -1800,16 +1800,16 @@ Weakly Supervised Learning Methods for Improving the Quality of Gene Name Normalization Data - BenWellner + BenWellner 1–8 W05-1301 wellner-2005-weakly Adaptive String Similarity Metrics for Biomedical Reference Resolution - BenWellner - JoséCastaño - JamesPustejovsky + BenWellner + JoséCastaño + JamesPustejovsky 9–16 W05-1302 wellner-etal-2005-adaptive @@ -1825,7 +1825,7 @@ A Machine Learning Approach to Acronym Generation YoshimasaTsuruoka SophiaAnaniadou - Jun’ichiTsujii + Jun’ichiTsujii 25–31 W05-1304 tsuruoka-etal-2005-machine @@ -1834,7 +1834,7 @@ <fixed-case>M</fixed-case>ed<fixed-case>T</fixed-case>ag: A Collection of Biomedical Annotations Lawrence H.Smith LorraineTanabe - ThomasRindflesch + ThomasRindflesch W. JohnWilbur 32–37 W05-1305 @@ -1844,8 +1844,8 @@ Corpus Design for Biomedical Natural Language Processing K. BretonnelCohen LynneFox - Philip V.Ogren - LawrenceHunter + Philip V.Ogren + LawrenceHunter 38–45 W05-1306 cohen-etal-2005-corpus @@ -1853,8 +1853,8 @@ Using Biomedical Literature Mining to Consolidate the Set of Known Human Protein-Protein Interactions ArunRamani - RazvanBunescu - RaymondMooney + RazvanBunescu + RaymondMooney EdwardMarcotte 46–53 W05-1307 @@ -1875,8 +1875,8 @@ Proceedings of the Ninth International Workshop on Parsing Technology W05-15 - HarryBunt - RobertMalouf + HarryBunt + RobertMalouf Association for Computational Linguistics
Vancouver, British Columbia
October @@ -1890,7 +1890,7 @@ Efficient and Robust <fixed-case>LFG</fixed-case> Parsing: <fixed-case>S</fixed-case>x<fixed-case>LFG</fixed-case> PierreBoullier - BenoîtSagot + BenoîtSagot 1–10 W05-1501 boullier-sagot-2005-efficient @@ -1898,7 +1898,7 @@ Parsing Linear Context-Free Rewriting Systems HåkanBurden - PeterLjunglöf + PeterLjunglöf 11–17 W05-1502 burden-ljunglof-2005-parsing @@ -1913,15 +1913,15 @@ Parsing with Soft and Hard Constraints on Dependency Length - JasonEisner - Noah A.Smith + JasonEisner + Noah A.Smith 30–41 W05-1504 eisner-smith-2005-parsing Corrective Modeling for Non-Projective Dependency Parsing - KeithHall + KeithHall VáclavNovák 42–52 W05-1505 @@ -1955,7 +1955,7 @@ Lexical and Structural Biases for Function Parsing - GabrieleMusillo + GabrieleMusillo PaolaMerlo 83–92 W05-1509 @@ -1965,7 +1965,7 @@ Probabilistic Models for Disambiguation of an <fixed-case>HPSG</fixed-case>-Based Chart Generator HirokoNakanishi YusukeMiyao - Jun’ichiTsujii + Jun’ichiTsujii 93–102 W05-1510 nakanishi-etal-2005-probabilistic @@ -1975,7 +1975,7 @@ TakashiNinomiya YoshimasaTsuruoka YusukeMiyao - Jun’ichiTsujii + Jun’ichiTsujii 103–114 W05-1511 ninomiya-etal-2005-efficacy @@ -1990,7 +1990,7 @@ A Classifier-Based Parser with Linear Run-Time Complexity KenjiSagae - AlonLavie + AlonLavie 125–132 W05-1513 sagae-lavie-2005-classifier @@ -1998,14 +1998,14 @@ Chunk Parsing Revisited YoshimasaTsuruoka - Jun’ichiTsujii + Jun’ichiTsujii 133–140 W05-1514 tsuruoka-tsujii-2005-chunk Constituent Parsing by Classification - JosephTurian + JosephTurian I. DanMelamed 141–151 W05-1515 @@ -2023,16 +2023,16 @@ Efficient Extraction of Grammatical Relations RebeccaWatson - JohnCarroll - TedBriscoe + JohnCarroll + TedBriscoe 160–170 W05-1517 watson-etal-2005-efficient Improving Parsing Accuracy by Combining Diverse Dependency Parsers - DanielZeman - ZdeněkŽabokrtský + DanielZeman + ZdeněkŽabokrtský 171–178 W05-1518 zeman-zabokrtsky-2005-improving @@ -2049,7 +2049,7 @@ Statistical Shallow Semantic Parsing despite Little Training Data RahulBhagat AntonLeuski - EduardHovy + EduardHovy 186–187 W05-1520 bhagat-etal-2005-statistical @@ -2063,7 +2063,7 @@ From metagrammars to factorized <fixed-case>TAG</fixed-case>/<fixed-case>TIG</fixed-case> parsers - ÉricVillemonte de la Clergerie + ÉricVillemonte de la Clergerie 190–191 W05-1522 villemonte-de-la-clergerie-2005-metagrammars @@ -2077,18 +2077,18 @@ <fixed-case>TFLEX</fixed-case>: Speeding Up Deep Parsing with Strategic Pruning - Myroslava O.Dzikovska - Carolyn P.Rose + Myroslava O.Dzikovska + Carolyn P.Rose 194–195 W05-1524 dzikovska-rose-2005-tflex Generic Parsing for Multi-Domain Semantic Interpretation - MyroslavaDzikovska - MarySwift - JamesAllen - Williamde Beaumont + MyroslavaDzikovska + MarySwift + JamesAllen + Williamde Beaumont 196–197 W05-1525 dzikovska-etal-2005-generic @@ -2096,8 +2096,8 @@ Online Statistics for a Unification-Based Dialogue Parser MichaElsner - MarySwift - JamesAllen + MarySwift + JamesAllen DanielGildea 198–199 W05-1526 @@ -2105,11 +2105,11 @@ <fixed-case>SUPPLE</fixed-case>: A Practical Parser for Natural Language Engineering Applications - RobertGaizauskas + RobertGaizauskas MarkHepple HoracioSaggion - Mark A.Greenwood - KevinHumphreys + Mark A.Greenwood + KevinHumphreys 200–201 W05-1527 gaizauskas-etal-2005-supple @@ -2125,7 +2125,7 @@ Robust Extraction of Subcategorization Data from Spoken Language JianguoLi ChrisBrew - EricFosler-Lussier + EricFosler-Lussier 204–205 W05-1529 li-etal-2005-robust @@ -2134,9 +2134,9 @@ Proceedings of the Tenth European Workshop on Natural Language Generation (ENLG-05) - GrahamWilcock - KristiinaJokinen - ChrisMellish + GrahamWilcock + KristiinaJokinen + ChrisMellish EhudReiter Association for Computational Linguistics
Aberdeen, Scotland
@@ -2150,7 +2150,7 @@ Statistical Generation: Three Methods Compared and Evaluated - AnjaBelz + AnjaBelz W05-1601 belz-2005-statistical @@ -2158,7 +2158,7 @@ Interactive Authoring of Logical Forms for Multilingual Generation OferBiller MichaelElhadad - YaelNetzer + YaelNetzer W05-1602 biller-etal-2005-interactive
@@ -2170,7 +2170,7 @@
Real-Time Stochastic Language Generation for Dialogue Systems - NathanaelChambers + NathanaelChambers W05-1604 chambers-2005-real @@ -2189,8 +2189,8 @@
A Context-dependent Algorithm for Generating Locative Expressions in Physically Situated Environments - JohnKelleher - Geert-JanKruijff + JohnKelleher + Geert-JanKruijff W05-1607 kelleher-kruijff-2005-context @@ -2203,13 +2203,13 @@
Context-sensitive Utterance Planning for <fixed-case>CCG</fixed-case> - Geert-JanKruijff + Geert-JanKruijff W05-1609 kruijff-2005-context Narratological Knowledge for Natural Language Generation - BirteLönneker + BirteLönneker W05-1610 lonneker-2005-narratological @@ -2223,7 +2223,7 @@ Explorations in Sentence Fusion ErwinMarsi - EmielKrahmer + EmielKrahmer W05-1612 marsi-krahmer-2005-explorations @@ -2236,13 +2236,13 @@
Computational Mechanisms for Pun Generation - GraemeRitchie + GraemeRitchie W05-1614 ritchie-2005-computational Evaluation of an <fixed-case>NLG</fixed-case> System using Post-Edit Data: Lessons Learnt - SomayajuluSripada + SomayajuluSripada EhudReiter LezanHawizy W05-1615 @@ -2266,20 +2266,20 @@ Towards Generating Procedural Texts: An Exploration of their Rhetorical and Argumentative Structure FaridaAouladomar - PatrickSaint-Dizier + PatrickSaint-Dizier W05-1618 aouladomar-saint-dizier-2005-towards The Types and Distributions of Errors in a Wide Coverage Surface Realizer Evaluation - CharlesCallaway + CharlesCallaway W05-1619 callaway-2005-types An Evolutionary Approach to Referring Expression Generation and Aggregation RaquelHervás - PabloGervás + PabloGervás W05-1620 hervas-gervas-2005-evolutionary @@ -2304,12 +2304,12 @@ An Experiment Setup for Collecting Data for Adaptive Output Planning in a Multimodal Dialogue System - IvanaKruijff-Korbayová - NateBlaylock + IvanaKruijff-Korbayová + NateBlaylock CiprianGerstenberger VerenaRieser TilmanBecker - MichaelKaisser + MichaelKaisser PeterPoller JanSchehl W05-1624 @@ -2317,7 +2317,7 @@ Answer Generation with Temporal Data Integration - VéroniqueMoriceau + VéroniqueMoriceau W05-1625 moriceau-2005-answer @@ -2360,8 +2360,8 @@ Robust stochastic parsing: Comparing and combining two approaches for processing extra-grammatical sentences MaritaAilomaa - VladimírKadlec - MartinRajman + VladimírKadlec + MartinRajman Jean-CédricChappelier 1–7 W05-1701 @@ -2386,7 +2386,7 @@ Dictionary acquisition using parallel text and co-occurrence statistics - ChrisBiemann + ChrisBiemann UweQuasthoff 22–29 W05-1704 @@ -2487,7 +2487,7 @@ Synthetic regional <fixed-case>D</fixed-case>anish BodilKyst - Peter JuelHenrichsen + Peter JuelHenrichsen 116–123 W05-1717 kyst-henrichsen-2006-synthetic @@ -2509,7 +2509,7 @@ <fixed-case>SU</fixed-case>i<fixed-case>S</fixed-case>–cross-language ontology-driven information retrieval in a restricted domain - KristinaNilsson + KristinaNilsson HansHjelm HenrikOxhammar 139–145 @@ -2518,7 +2518,7 @@ Towards automatic recognition of product names: an exploratory study of brand names in economic texts - KristinaNilsson + KristinaNilsson AishaMalmgren 146–155 W05-1721 @@ -2556,15 +2556,15 @@ <fixed-case>D</fixed-case>an<fixed-case>PO</fixed-case>–a transcription-based dictionary for <fixed-case>D</fixed-case>anish speech technology - PeterRossen Skadhauge - Peter JuelHenrichsen + PeterRossen Skadhauge + Peter JuelHenrichsen 186–192 W05-1726 rossen-skadhauge-henrichsen-2006-danpo Functionality in grammar design - AndersSøgaard + AndersSøgaard PetterHaugereid 193–202 W05-1727 @@ -2580,7 +2580,7 @@ Rigorous dimensionality reduction through linguistically motivated feature selection for text categorization Hans FriedrichWitschel - ChrisBiemann + ChrisBiemann 210–217 W05-1729 witschel-biemann-2006-rigorous diff --git a/data/xml/W06.xml b/data/xml/W06.xml index 9f44b300be..fab204d231 100644 --- a/data/xml/W06.xml +++ b/data/xml/W06.xml @@ -5,7 +5,7 @@ Proceedings of the Fifth SIGHAN Workshop on Chinese Language Processing W06-01 Hwee TouNg - Olivia O.Y.Kwong + Olivia O.Y.Kwong Association for Computational Linguistics
Sydney, Australia
July @@ -19,7 +19,7 @@ Improving Context Vector Models by Feature Clustering for Automatic Thesaurus Construction Jia-MingYou - Keh-JiannChen + Keh-JiannChen 1–8 W06-0101 you-chen-2006-improving @@ -27,7 +27,7 @@ Regional Variation of Domain-Specific Lexical Items: Toward a Pan-<fixed-case>C</fixed-case>hinese Lexical Resource Oi YeeKwong - Benjamin K.Tsou + Benjamin K.Tsou 9–16 W06-0102 kwong-tsou-2006-regional @@ -57,7 +57,7 @@ A Clustering Approach for Unsupervised <fixed-case>C</fixed-case>hinese Coreference Resolution - Chi-shingWang + Chi-shingWang GraceNgai 40–47 W06-0106 @@ -66,8 +66,8 @@ Latent Features in Automatic Tense Translation between <fixed-case>C</fixed-case>hinese and <fixed-case>E</fixed-case>nglish YangYe - Victoria LiFossum - StevenAbney + Victoria LiFossum + StevenAbney 48–55 W06-0107 ye-etal-2006-latent @@ -92,7 +92,7 @@ Hybrid Models for <fixed-case>C</fixed-case>hinese Named Entity Recognition LishuangLi TingtingMao - DegenHuang + DegenHuang YuanshengYang 72–78 W06-0110 @@ -110,7 +110,7 @@ A Hybrid Approach to <fixed-case>C</fixed-case>hinese Base Noun Phrase Chunking FangXu - ChengqingZong + ChengqingZong JunZhao 87–93 W06-0112 @@ -137,7 +137,7 @@ The Third International <fixed-case>C</fixed-case>hinese Language Processing Bakeoff: Word Segmentation and Named Entity Recognition - Gina-AnneLevow + Gina-AnneLevow 108–117 W06-0115 levow-2006-third @@ -180,11 +180,11 @@ On Closed Task of <fixed-case>C</fixed-case>hinese Word Segmentation: An Improved <fixed-case>CRF</fixed-case> Model Coupled with Character Clustering and Automatically Generated Template Matching - Richard Tzong-HanTsai + Richard Tzong-HanTsai Hsieh-ChuanHung Cheng-LungSung Hong-JieDai - Wen-LianHsu + Wen-LianHsu 134–137 W06-0120 tsai-etal-2006-closed @@ -192,7 +192,7 @@ <fixed-case>C</fixed-case>hinese Word Segmentation with Maximum Entropy and N-gram Language Model XinhaoWang - XiaojunLin + XiaojunLin DianhaiYu HaoTian XihongWu @@ -204,8 +204,8 @@ On Using Ensemble Methods for <fixed-case>C</fixed-case>hinese Named Entity Recognition Chia-WeiWu Shyh-YiJan - Richard Tzong-HanTsai - Wen-LianHsu + Richard Tzong-HanTsai + Wen-LianHsu 142–145 W06-0122 wu-etal-2006-using @@ -232,9 +232,9 @@ <fixed-case>C</fixed-case>hinese Word Segmentation and Named Entity Recognition Based on a Context-Dependent Mutual Information Independence Model MinZhang - GuoDongZhou - LingPengYang - DongHongJi + GuoDongZhou + LingPengYang + DongHongJi 154–157 W06-0125 zhang-etal-2006-chinese @@ -243,7 +243,7 @@ Word Segmentation and Named Entity Recognition for <fixed-case>SIGHAN</fixed-case> Bakeoff3 SuxiangZhang YingQin - JuanWen + JuanWen XiaojieWang 158–161 W06-0126 @@ -252,7 +252,7 @@ An Improved <fixed-case>C</fixed-case>hinese Word Segmentation System with Conditional Random Field HaiZhao - Chang-NingHuang + Chang-NingHuang MuLi 162–165 W06-0127 @@ -313,7 +313,7 @@ A Pragmatic <fixed-case>C</fixed-case>hinese Word Segmentation System WeiJiang YiGuan - Xiao-LongWang + Xiao-LongWang 189–192 W06-0134 jiang-etal-2006-pragmatic-chinese @@ -339,7 +339,7 @@ <fixed-case>C</fixed-case>hinese Word Segmentation Based on an Approach of Maximum Entropy Modeling YanSong JiaqingGuo - DongfengCai + DongfengCai 201–204 W06-0137 song-etal-2006-chinese @@ -365,8 +365,8 @@ <fixed-case>C</fixed-case>hinese Named Entity Recognition with a Multi-Phase Model JunshengZhou LiangHe - XinyuDai - JiajunChen + XinyuDai + JiajunChen 213–216 W06-0140 zhou-etal-2006-chinese @@ -387,8 +387,8 @@ Proceedings of the Workshop on Information Extraction Beyond The Document W06-02 - Mary ElaineCaliff - Mark A.Greenwood + Mary ElaineCaliff + Mark A.Greenwood MarkStevenson RomanYangarber Association for Computational Linguistics @@ -440,7 +440,7 @@ Automatic Knowledge Representation using a Graph-based Algorithm for Language-Independent Lexical Chaining - GaëlDias + GaëlDias CláudiaSantos GuillaumeCleuziou 36–47 @@ -450,7 +450,7 @@ Data Selection in Semi-supervised Learning for Name Tagging HengJi - RalphGrishman + RalphGrishman 48–55 W06-0206 ji-grishman-2006-data @@ -466,7 +466,7 @@ Learning Domain-Specific Information Extraction Patterns from the Web SiddharthPatwardhan - EllenRiloff + EllenRiloff 66–73 W06-0208 patwardhan-riloff-2006-learning @@ -491,7 +491,7 @@ Extracting Opinions, Opinion Holders, and Topics Expressed in Online News Media Text Soo-MinKim - EduardHovy + EduardHovy 1–8 W06-0301 kim-hovy-2006-extracting @@ -499,7 +499,7 @@ Toward Opinion Summarization: Linking the Sources VeselinStoyanov - ClaireCardie + ClaireCardie 9–14 W06-0302 stoyanov-cardie-2006-toward @@ -514,8 +514,8 @@ User-directed Sentiment Analysis: Visualizing the Affective Content of Documents - Michelle L.Gregory - NancyChinchor + Michelle L.Gregory + NancyChinchor PaulWhitney RichardCarter ElizabethHetzler @@ -529,8 +529,8 @@ RashmiPrasad NikhilDinesh AlanLee - AravindJoshi - BonnieWebber + AravindJoshi + BonnieWebber 31–38 W06-0305 prasad-etal-2006-annotating @@ -548,9 +548,9 @@ Exploitation in Affect Detection in Open-Ended Improvisational Text LiZhang - John A.Barnden - Robert J.Hendley - Alan M.Wallington + John A.Barnden + Robert J.Hendley + Alan M.Wallington 47–54 W06-0307 zhang-etal-2006-exploitation @@ -558,7 +558,7 @@ Towards a validated model for affective classification of texts MichelGénéreux - RogerEvans + RogerEvans 55–62 W06-0308 genereux-evans-2006-towards @@ -624,7 +624,7 @@ Capturing Disjunction in Lexicalization with Extensible Dependency Grammar JorgeMarques Pelizzoni - Maria das GraçasVolpe Nunes + Maria das GraçasVolpe Nunes 41–50 W06-0406 marques-pelizzoni-volpe-nunes-2006-capturing @@ -635,7 +635,7 @@ Proceedings of the 2nd Workshop on Ontology Learning and Population: Bridging the Gap between Text and Knowledge W06-05 PaulBuitelaar - PhilippCimiano + PhilippCimiano BerenikeLoos Association for Computational Linguistics
Sydney, Australia
@@ -650,7 +650,7 @@ Enriching a Formal Ontology with a Thesaurus: an Application in the Cultural Heritage Domain RobertoNavigli - PaolaVelardi + PaolaVelardi 1–9 W06-0501 navigli-velardi-2006-enriching @@ -661,14 +661,14 @@ FrancisBond TakaakiTanaka SanaeFujita - DanFlickinger + DanFlickinger 10–17 W06-0502 nichols-etal-2006-multilingual <fixed-case>LEILA</fixed-case>: Learning to Extract Information by Linguistic Analysis - Fabian M.Suchanek + Fabian M.Suchanek GeorgianaIfrim GerhardWeikum 18–25 @@ -677,10 +677,10 @@ Ontology Population from Textual Mentions: Task Definition and Benchmark - BernardoMagnini - EmanuelePianta + BernardoMagnini + EmanuelePianta OctavianPopescu - ManuelaSperanza + ManuelaSperanza 26–32 W06-0504 magnini-etal-2006-ontology @@ -688,7 +688,7 @@ Efficient Hierarchical Entity Classifier Using Conditional Random Fields KoenDeschacht - Marie-FrancineMoens + Marie-FrancineMoens 33–40 W06-0505 deschacht-moens-2006-efficient @@ -696,7 +696,7 @@ Taxonomy Learning using Term Specificity and Similarity Pum-MoRyu - Key-SunChoi + Key-SunChoi 41–48 W06-0506 ryu-choi-2006-taxonomy @@ -704,8 +704,8 @@ Towards Large-scale Non-taxonomic Relation Extraction: Estimating the Precision of Rote Extractors EnriqueAlfonseca - MariaRuiz-Casado - ManabuOkumura + MariaRuiz-Casado + ManabuOkumura PabloCastells 49–56 W06-0507 @@ -724,9 +724,9 @@ Proceedings of the Workshop on Frontiers in Linguistically Annotated Corpora 2006 W06-06 - TimothyBaldwin + TimothyBaldwin FrancisBond - AdamMeyers + AdamMeyers ShigekoNariyama Association for Computational Linguistics
Sydney, Australia
@@ -740,9 +740,9 @@ Challenges for Annotating Images for Sense Disambiguation - Cecilia OvesdotterAlm + Cecilia OvesdotterAlm NicolasLoeff - David A.Forsyth + David A.Forsyth 1–4 W06-0601 alm-etal-2006-challenges @@ -750,11 +750,11 @@ A Semi-Automatic Method for Annotating a Biomedical <fixed-case>P</fixed-case>roposition <fixed-case>B</fixed-case>ank Wen-ChiChou - Richard Tzong-HanTsai + Richard Tzong-HanTsai Ying-ShanSu WeiKu Ting-YiSung - Wen-LianHsu + Wen-LianHsu 5–12 W06-0602 chou-etal-2006-semi @@ -762,7 +762,7 @@ How and Where do People Fail with Time: Temporal Reference Mapping Annotation by <fixed-case>C</fixed-case>hinese and <fixed-case>E</fixed-case>nglish Bilinguals YangYe - StevenAbney + StevenAbney 13–20 W06-0603 ye-abney-2006-people @@ -771,7 +771,7 @@ Probing the Space of Grammatical Variation: Induction of Cross-Lingual Grammatical Constraints from Treebanks FeliceDell’Orletta AlessandroLenci - SimonettaMontemagni + SimonettaMontemagni VitoPirrelli 21–28 W06-0604 @@ -795,9 +795,9 @@ Manual Annotation of Opinion Categories in Meetings SwapnaSomasundaran - JanyceWiebe + JanyceWiebe PaulHoffmann - DianeLitman + DianeLitman 54–61 W06-0607 somasundaran-etal-2006-manual @@ -816,9 +816,9 @@ OlgaBabko-Malaya AnnBies AnnTaylor - SzutingYi - MarthaPalmer - MitchMarcus + SzutingYi + MarthaPalmer + MitchMarcus SethKulick LibinShen 70–77 @@ -835,7 +835,7 @@ Corpus Annotation by Generation ElkeTeich - John A.Bateman + John A.Bateman RichardEckart 86–93 W06-0611 @@ -843,7 +843,7 @@ Constructing an <fixed-case>E</fixed-case>nglish Valency Lexicon - JiříSemecký + JiříSemecký SilvieCinková 94–97 W06-0612 @@ -854,8 +854,8 @@ Proceedings of the Workshop on Task-Focused Summarization and Question Answering W06-07 - Tat-SengChua - JadeGoldstein + Tat-SengChua + JadeGoldstein SimoneTeufel LucyVanderwende Association for Computational Linguistics @@ -880,7 +880,7 @@ Challenges in Evaluating Summaries of Short Stories AnnaKazantseva - StanSzpakowicz + StanSzpakowicz 8–15 W06-0702 kazantseva-szpakowicz-2006-challenges @@ -903,7 +903,7 @@ Using Scenario Knowledge in Automatic Question Answering - SandaHarabagiu + SandaHarabagiu AndrewHickl 32–39 W06-0705 @@ -919,7 +919,7 @@ <fixed-case>DUC</fixed-case> 2005: Evaluation of Question-Focused Summarization Systems - Hoa TrangDang + Hoa TrangDang 48–55 W06-0707 dang-2006-duc @@ -929,7 +929,7 @@ Proceedings of the Workshop on How Can Computational Linguistics Improve Information Retrieval? W06-08 - JohnTait + JohnTait MichaelOakes Association for Computational Linguistics
Sydney, Australia
@@ -945,14 +945,14 @@ <fixed-case>I</fixed-case>ndonesian-<fixed-case>J</fixed-case>apanese <fixed-case>CLIR</fixed-case> Using Only Limited Resource AyuPurwarianti MasatoshiTsuchiya - SeiichiNakagawa + SeiichiNakagawa 1–8 W06-0801 purwarianti-etal-2006-indonesian
Hybrid Systems for Information Extraction and Question Answering - RodolfoDelmonte + RodolfoDelmonte 9–16 W06-0802 delmonte-2006-hybrid @@ -980,7 +980,7 @@ HuaCheng YanQu JesseMontgomery - David A.Evans + David A.Evans 33–40 W06-0805 cheng-etal-2006-exploring @@ -990,9 +990,9 @@ Proceedings of the Workshop on Annotating and Reasoning about Time and Events W06-09 - BranimirBoguraev - RafaelMuñoz - JamesPustejovsky + BranimirBoguraev + RafaelMuñoz + JamesPustejovsky Association for Computational Linguistics
Sydney, Australia
July @@ -1013,7 +1013,7 @@ Local Semantics in the Interpretation of Temporal Expressions RobertDale - PawełMazur + PawełMazur 9–16 W06-0902 dale-mazur-2006-local @@ -1021,7 +1021,7 @@ Automatic Dating of Documents and Temporal Text Classification AngeloDalli - YorickWilks + YorickWilks 17–22 W06-0903 dalli-wilks-2006-automatic @@ -1029,16 +1029,16 @@ A Pilot Study on Acquiring Metric Temporal Constraints for Events InderjeetMani - BenWellner + BenWellner 23–29 W06-0904 mani-wellner-2006-pilot Evaluating Knowledge-based Approaches to the Multilingual Extension of a Temporal Expression Normalizer - MatteoNegri - EstelaSaquete - PatricioMartínez-Barco + MatteoNegri + EstelaSaquete + PatricioMartínez-Barco RafaelMuñoz 30–37 W06-0905 @@ -1047,8 +1047,8 @@ Extending <fixed-case>T</fixed-case>ime<fixed-case>ML</fixed-case> with Typical Durations of Events FengPan - RutuMulkar - Jerry R.Hobbs + RutuMulkar + Jerry R.Hobbs 38–45 W06-0906 pan-etal-2006-extending @@ -1056,7 +1056,7 @@ Marking Time in Developmental Biology: Annotating Developmental Events and their Links with Molecular Events GailSinclair - BonnieWebber + BonnieWebber DuncanDavidson 46–53 W06-0907 @@ -1068,8 +1068,8 @@ Proceedings of the Workshop on Multilingual Language Resources and Interoperability W06-10 AndreasWitt - GillesSérasset - SusanArmstrong + GillesSérasset + SusanArmstrong JimBreen UlrichHeid FelixSasaki @@ -1086,9 +1086,9 @@ Lexical Markup Framework (<fixed-case>LMF</fixed-case>) for <fixed-case>NLP</fixed-case> Multilingual Resources GilFrancopoulo - NuriaBel + NuriaBel MonteGeorge - NicolettaCalzolari + NicolettaCalzolari MonicaMonachini MandyPet ClaudiaSoria @@ -1106,12 +1106,12 @@ Towards Agent-based Cross-Lingual Interoperability of Distributed Lexical Resources ClaudiaSoria - MaurizioTesconi + MaurizioTesconi AndreaMarchetti FrancescaBertagna MonicaMonachini Chu-RenHuang - NicolettaCalzolari + NicolettaCalzolari 17–24 W06-1003 soria-etal-2006-towards @@ -1138,14 +1138,14 @@ Multilingual Collocation Extraction: Issues and Solutions VioletaSeretan - EricWehrli + EricWehrli 40–49 W06-1006 seretan-wehrli-2006-multilingual Structural Properties of Lexical Systems: Monolingual and Multilingual Perspectives - AlainPolguère + AlainPolguère 50–59 W06-1007 polguere-2006-structural @@ -1161,7 +1161,7 @@ Evaluation of the <fixed-case>B</fixed-case>ible as a Resource for Cross-Language Information Retrieval - Peter A.Chew + Peter A.Chew Steve J.Verzi Travis L.Bauer Jonathan T.McClain @@ -1175,7 +1175,7 @@ Proceedings of the Workshop on Linguistic Distances W06-11 JohnNerbonne - ErhardHinrichs + ErhardHinrichs Association for Computational Linguistics
Sydney, Australia
July @@ -1204,7 +1204,7 @@ Similarity Judgments: Philosophical, Psychological and Mathematical Investigations ClaudeSt-Jacques - CarolineBarrière + CarolineBarrière 8–15 W06-1103 st-jacques-barriere-2006-similarity @@ -1252,14 +1252,14 @@ Study of Some Distance Measures for Language and Encoding Identification - Anil KumarSingh + Anil KumarSingh 63–72 W06-1109 singh-2006-study Towards Case-Based Parsing: Are Chunks Reliable Indicators for Syntax Trees? - SandraKübler + SandraKübler 73–81 W06-1110 kubler-2006-towards @@ -1275,7 +1275,7 @@ A Structural Similarity Measure PetrHomola - VladislavKuboň + VladislavKuboň 91–99 W06-1112 homola-kubon-2006-structural @@ -1290,7 +1290,7 @@ Total Rank Distance and Scaled Total Rank Distance: Two Alternative Metrics in Computational Linguistics AncaDinu - Liviu P.Dinu + Liviu P.Dinu 109–116 W06-1114 dinu-dinu-2006-total @@ -1300,9 +1300,9 @@ Proceedings of the Workshop on Multiword Expressions: Identifying and Exploiting Underlying Properties W06-12 - Begoña VilladaMoirón + Begoña VilladaMoirón AlineVillavicencio - DianaMcCarthy + DianaMcCarthy StefanEvert SuzanneStevenson Association for Computational Linguistics @@ -1317,17 +1317,17 @@ Compositionality and Multiword Expressions: Six of One, Half a Dozen of the Other? - TimothyBaldwin + TimothyBaldwin 1 W06-1201 baldwin-2006-compositionality Measuring <fixed-case>MWE</fixed-case> Compositionality Using Semantic Annotation - Scott S.L.Piao + Scott S.L.Piao PaulRayson OlgaMudraya - AndrewWilson + AndrewWilson RogerGarside 2–11 W06-1202 @@ -1335,7 +1335,7 @@ Automatic Identification of Non-Compositional Multi-Word Expressions using Latent Semantic Analysis - GrahamKatz + GrahamKatz EugenieGiesbrecht 12–19 W06-1203 @@ -1344,7 +1344,7 @@ Using Information about Multi-word Expressions for the Word-Alignment Task SriramVenkatapathy - Aravind K.Joshi + Aravind K.Joshi 20–27 W06-1204 venkatapathy-joshi-2006-using @@ -1353,7 +1353,7 @@ Detecting Complex Predicates in <fixed-case>H</fixed-case>indi using <fixed-case>POS</fixed-case> Projection across Parallel Corpora AmitabhaMukerjee AnkitSoni - Achla MRaina + Achla MRaina 28–35 W06-1205 mukerjee-etal-2006-detecting @@ -1379,7 +1379,7 @@ Interpretation of Compound Nominalisations using Corpus and Web Statistics JeremyNicholson - TimothyBaldwin + TimothyBaldwin 54–61 W06-1208 nicholson-baldwin-2006-interpretation @@ -1418,7 +1418,7 @@ KazuhiroNakadai HiroshiTsujino TetsuyaOgata - Hiroshi G.Okuno + Hiroshi G.Okuno 9–17 W06-1302 komatani-etal-2006-multi @@ -1427,7 +1427,7 @@ Building Effective Question Answering Characters AntonLeuski RonakkumarPatel - DavidTraum + DavidTraum BrandonKennedy 18–27 W06-1303 @@ -1452,7 +1452,7 @@ Multidimensional Dialogue Management SimonKeizer - HarryBunt + HarryBunt 37–45 W06-1306 keizer-bunt-2006-multidimensional @@ -1466,9 +1466,9 @@ Resolution of Referents Groupings in Practical Dialogues - AlexandreDenis + AlexandreDenis GuillaumePitel - MatthieuQuignard + MatthieuQuignard 54–59 W06-1308 denis-etal-2006-resolution @@ -1512,17 +1512,17 @@ An Information State-Based Dialogue Manager for Call for Fire Dialogues AntonioRoque - DavidTraum + DavidTraum 88–95 W06-1313 roque-traum-2006-information Automatically Detecting Action Items in Audio Meeting Recordings - WilliamMorgan - Pi-ChuanChang + WilliamMorgan + Pi-ChuanChang SurabhiGupta - Jason M.Brenier + Jason M.Brenier 96–103 W06-1314 morgan-etal-2006-automatically @@ -1538,7 +1538,7 @@ Multimodal Dialog Description Language for Rapid System Development - MasahiroAraki + MasahiroAraki KenjiTachibana 109–116 W06-1316 @@ -1546,11 +1546,11 @@ Classification of Discourse Coherence Relations: An Exploratory Study using Multiple Knowledge Sources - BenWellner - JamesPustejovsky + BenWellner + JamesPustejovsky CatherineHavasi AnnaRumshisky - RoserSaurí + RoserSaurí 117–125 W06-1317 wellner-etal-2006-classification @@ -1558,7 +1558,7 @@ Measuring annotator agreement in a complex hierarchical dialogue act annotation scheme JeroenGeertzen - HarryBunt + HarryBunt 126–133 W06-1318 geertzen-bunt-2006-measuring @@ -1576,14 +1576,14 @@ An Analysis of Quantitative Aspects in the Evaluation of Thematic Segmentation Algorithms MariaGeorgescul AlexanderClark - SusanArmstrong + SusanArmstrong 144–151 W06-1320 georgescul-etal-2006-analysis Discourse and Dialogue Processing in Spoken Intelligent Tutoring Systems - Diane J.Litman + Diane J.Litman 152 W06-1321 litman-2006-discourse @@ -1611,7 +1611,7 @@ Proceedings of the Fourth International Natural Language Generation Conference W06-14 NathalieColineau - CécileParis + CécileParis StephenWan RobertDale Association for Computational Linguistics @@ -1626,7 +1626,7 @@ Lessons Learned from Large Scale Evaluation of Systems that Produce Text: Nightmares and Pleasant Surprises - Kathleen R.McKeown + Kathleen R.McKeown 3–5 W06-1401 mckeown-2006-lessons @@ -1643,7 +1643,7 @@ <fixed-case>CCG</fixed-case> Chart Realization from Disjunctive Inputs - MichaelWhite + MichaelWhite 12–19 W06-1403 white-2006-ccg @@ -1659,7 +1659,7 @@ Individuality and Alignment in Generated Dialogues AmyIsard CarstenBrockmann - JonOberlander + JonOberlander 25–32 W06-1405 isard-etal-2006-individuality @@ -1675,7 +1675,7 @@ Adjective-to-Verb Paraphrasing in <fixed-case>J</fixed-case>apanese Based on Lexical Constraints of Verbs AtsushiFujita NaruakiMasuno - SatoshiSato + SatoshiSato TakehitoUtsuro 41–43 W06-1407 @@ -1690,9 +1690,9 @@ Overspecified Reference in Hierarchical Domains: Measuring the Benefits for Readers - IvandréParaboni + IvandréParaboni JudithMasthoff - Keesvan Deemter + Keesvan Deemter 55–62 W06-1409 paraboni-etal-2006-overspecified @@ -1717,9 +1717,9 @@ Noun Phrase Generation for Situated Dialogs LauraStoia - Darla MagdaleneShockley - Donna K.Byron - EricFosler-Lussier + Darla MagdaleneShockley + Donna K.Byron + EricFosler-Lussier 81–88 W06-1412 stoia-etal-2006-noun @@ -1727,8 +1727,8 @@ The Clarity-Brevity Trade-off in Generating Referring Expressions Imtiaz HussainKhan - GraemeRitchie - Keesvan Deemter + GraemeRitchie + Keesvan Deemter 89–91 W06-1413 khan-etal-2006-clarity @@ -1742,7 +1742,7 @@ Generating Intelligent Numerical Answers in a Question-Answering System - VéroniqueMoriceau + VéroniqueMoriceau 103–110 W06-1415 moriceau-2006-generating @@ -1751,21 +1751,21 @@ Generating Multiple-Choice Test Items from Medical Text: A Pilot Study NikiforosKaramanis Le AnHa - RuslanMitkov + RuslanMitkov 111–113 W06-1416 karamanis-etal-2006-generating Generation of Biomedical Arguments for Lay Readers - NancyGreen + NancyGreen 114–121 W06-1417 green-2006-generation Introduction to the <fixed-case>INLG</fixed-case>’06 Special Session on Sharing Data and Comparative Evaluation - AnjaBelz + AnjaBelz RobertDale 125–126 W06-1418 @@ -1782,7 +1782,7 @@ Building a Semantically Transparent Corpus for the Generation of Referring Expressions. - Keesvan Deemter + Keesvan Deemter Ielkavan der Sluis AlbertGatt 130–132 @@ -1791,7 +1791,7 @@ Shared-Task Evaluations in <fixed-case>HLT</fixed-case>: Lessons for <fixed-case>NLG</fixed-case> - AnjaBelz + AnjaBelz AdamKilgarriff 133–135 W06-1421 @@ -1800,7 +1800,7 @@ <fixed-case>GENEVAL</fixed-case>: A Proposal for Shared-task Evaluation in <fixed-case>NLG</fixed-case> EhudReiter - AnjaBelz + AnjaBelz 136–138 W06-1422 reiter-belz-2006-geneval @@ -1825,15 +1825,15 @@ The Hidden <fixed-case>TAG</fixed-case> Model: Synchronous Grammars for Parsing Resource-Poor Languages DavidChiang - OwenRambow + OwenRambow 1–8 W06-1501 chiang-rambow-2006-hidden A Constraint Driven Metagrammar - JosephLe Roux - BenoîtCrabbé + JosephLe Roux + BenoîtCrabbé YannickParmentier 9–16 W06-1502 @@ -1842,10 +1842,10 @@ The Metagrammar Goes Multilingual: A Cross-Linguistic Look at the V2-Phenomenon AlexandraKinyon - OwenRambow + OwenRambow TatjanaScheffler SinWonYoon - Aravind K.Joshi + Aravind K.Joshi 17–24 W06-1503 kinyon-etal-2006-metagrammar @@ -1859,7 +1859,7 @@ A <fixed-case>T</fixed-case>ree <fixed-case>A</fixed-case>djoining <fixed-case>G</fixed-case>rammar Analysis of the Syntax and Semantics of <i>It</i>-Clefts - Chung-hyeHan + Chung-hyeHan NancyHedberg 33–40 W06-1505 @@ -1867,14 +1867,14 @@ Pied-Piping in Relative Clauses: Syntax and Compositional Semantics Based on <fixed-case>S</fixed-case>ynchronous <fixed-case>T</fixed-case>ree <fixed-case>A</fixed-case>djoining <fixed-case>G</fixed-case>rammar - Chung-hyeHan + Chung-hyeHan 41–48 W06-1506 han-2006-pied Negative Concord and Restructuring in Palestinian <fixed-case>A</fixed-case>rabic: A Comparison of <fixed-case>TAG</fixed-case> and <fixed-case>CCG</fixed-case> Analyses - Frederick M.Hoyt + Frederick M.Hoyt 49–56 W06-1507 hoyt-2006-negative @@ -1930,7 +1930,7 @@ Generating <fixed-case>XTAG</fixed-case> Parsers from Algebraic Specifications CarlosGómez-Rodríguez - Miguel A.Alonso + Miguel A.Alonso ManuelVilares 103–108 W06-1514 @@ -1978,7 +1978,7 @@ Handling Unlike Coordinated Phrases in <fixed-case>TAG</fixed-case> by Mixing Syntactic Category and Grammatical Function - Carlos A.Prolo + Carlos A.Prolo 137–140 W06-1520 prolo-2006-handling @@ -1992,8 +1992,8 @@ Modeling and Analysis of Elliptic Coordination by Dynamic Exploitation of Derivation Forests in <fixed-case>LTAG</fixed-case> Parsing - DjaméSeddah - BenoîtSagot + DjaméSeddah + BenoîtSagot 147–152 W06-1522 seddah-sagot-2006-modeling @@ -2007,7 +2007,7 @@ Reconsidering Raising and Experiencers in <fixed-case>E</fixed-case>nglish - Dennis RyanStoroshenko + Dennis RyanStoroshenko 159–164 W06-1524 storoshenko-2006-reconsidering @@ -2017,8 +2017,8 @@ Proceedings of the 2006 Conference on Empirical Methods in Natural Language Processing W06-16 - DanJurafsky - EricGaussier + DanJurafsky + EricGaussier Association for Computational Linguistics
Sydney, Australia
July @@ -2032,7 +2032,7 @@ Unsupervised Discovery of a Statistical Verb Lexicon TrondGrenager - Christopher D.Manning + Christopher D.Manning 1–8 W06-1601 grenager-manning-2006-unsupervised @@ -2049,7 +2049,7 @@ Paraphrase Recognition via Dissimilarity Significance Classification LongQiu Min-YenKan - Tat-SengChua + Tat-SengChua 18–26 W06-1603 qiu-etal-2006-paraphrase @@ -2065,7 +2065,7 @@ Distributional measures of concept-distance: A task-oriented evaluation - SaifMohammad + SaifMohammad GraemeHirst 35–43 W06-1605 @@ -2093,15 +2093,15 @@ The impact of parse quality on syntactically-informed statistical machine translation ChrisQuirk - SimonCorston-Oliver + SimonCorston-Oliver 62–69 W06-1608 quirk-corston-oliver-2006-impact Statistical Machine Reordering - Marta R.Costa-jussà - José A. R.Fonollosa + Marta R.Costa-jussà + José A. R.Fonollosa 70–76 W06-1609 costa-jussa-fonollosa-2006-statistical @@ -2109,8 +2109,8 @@ Re-evaluating Machine Translation Results with Paraphrase Support LiangZhou - Chin-YewLin - EduardHovy + Chin-YewLin + EduardHovy 77–84 W06-1610 zhou-etal-2006-evaluating @@ -2118,7 +2118,7 @@ Exploiting Discourse Structure for Spoken Dialogue Performance Analysis MihaiRotaru - Diane J.Litman + Diane J.Litman 85–93 W06-1611 rotaru-litman-2006-exploiting @@ -2141,8 +2141,8 @@ Is it Really that Difficult to Parse <fixed-case>G</fixed-case>erman? - SandraKübler - Erhard W.Hinrichs + SandraKübler + Erhard W.Hinrichs WolfgangMaier 111–119 W06-1614 @@ -2167,7 +2167,7 @@ Semantic Role Labeling of <fixed-case>N</fixed-case>om<fixed-case>B</fixed-case>ank: A Maximum Entropy Approach - Zheng PingJiang + Zheng PingJiang Hwee TouNg 138–145 W06-1617 @@ -2176,7 +2176,7 @@ Identification of Event Mentions and their Semantic Class StevenBethard - James H.Martin + James H.Martin 146–154 W06-1618 bethard-martin-2006-identification @@ -2187,15 +2187,15 @@ TakuyaMatsuzaki YoshimasaTsuruoka YusukeMiyao - Jun’ichiTsujii + Jun’ichiTsujii 155–163 W06-1619 ninomiya-etal-2006-extremely Multilingual Deep Lexical Acquisition for <fixed-case>HPSG</fixed-case>s via Supertagging - PhilBlunsom - TimothyBaldwin + PhilBlunsom + TimothyBaldwin 164–171 W06-1620 blunsom-baldwin-2006-multilingual @@ -2211,7 +2211,7 @@ Semantic Role Labeling via Instance-Based Learning - Chi-San AlthonLin + Chi-San AlthonLin Tony C.Smith 180–188 W06-1622 @@ -2221,7 +2221,7 @@ Inducing Temporal Graphs PhilipBramsen PawanDeshpande - Yoong KeokLee + Yoong KeokLee ReginaBarzilay 189–198 W06-1623 @@ -2231,7 +2231,7 @@ A Weakly Supervised Learning Approach for Spoken Language Understanding Wei-LinWu Ru-ZhanLu - Jian-YongDuan + Jian-YongDuan HuiLiu FengGao Yu-QuanChen @@ -2242,16 +2242,16 @@ <fixed-case>H</fixed-case>umor: Prosody Analysis and Automatic Recognition for <fixed-case>F</fixed-case>*<fixed-case>R</fixed-case>*<fixed-case>I</fixed-case>*<fixed-case>E</fixed-case>*<fixed-case>N</fixed-case>*<fixed-case>D</fixed-case>*<fixed-case>S</fixed-case>* AmrutaPurandare - DianeLitman + DianeLitman 208–215 W06-1625 purandare-litman-2006-humor Distributed Language Modeling for <tex-math>N</tex-math>-best List Re-ranking - YingZhang - Almut SiljaHildebrand - StephanVogel + YingZhang + Almut SiljaHildebrand + StephanVogel 216–223 W06-1626 zhang-etal-2006-distributed @@ -2267,8 +2267,8 @@ A Discriminative Model for Tree-to-Tree Translation BrookeCowan - IvonaKuc̆erová - MichaelCollins + IvonaKuc̆erová + MichaelCollins 232–241 W06-1628 cowan-etal-2006-discriminative @@ -2285,10 +2285,10 @@ Unsupervised Named Entity Transliteration Using Temporal and Phonetic Correlation TaoTao - Su-YounYoon + Su-YounYoon AndrewFister - RichardSproat - ChengXiangZhai + RichardSproat + ChengXiangZhai 250–257 W06-1630 tao-etal-2006-unsupervised @@ -2323,8 +2323,8 @@ AkaneYakushiji YusukeMiyao TomokoOhta - YukaTateisi - Jun’ichiTsujii + YukaTateisi + Jun’ichiTsujii 284–292 W06-1634 yakushiji-etal-2006-automatic @@ -2332,7 +2332,7 @@ Protein folding and chart parsing JuliaHockenmaier - Aravind K.Joshi + Aravind K.Joshi Ken A.Dill 293–300 W06-1635 @@ -2359,7 +2359,7 @@ Better Informed Training of Latent Syntactic Features MarkusDreyer - JasonEisner + JasonEisner 317–326 W06-1638 dreyer-eisner-2006-better @@ -2376,7 +2376,7 @@ Partially Supervised Coreference Resolution for Opinion Summarization through Structured Rule Learning VeselinStoyanov - ClaireCardie + ClaireCardie 336–344 W06-1640 stoyanov-cardie-2006-partially @@ -2406,8 +2406,8 @@ Style & Topic Language Model Adaptation Using <fixed-case>HMM</fixed-case>-<fixed-case>LDA</fixed-case> - Bo-June PaulHsu - JamesGlass + Bo-June PaulHsu + JamesGlass 373–381 W06-1644 hsu-glass-2006-style @@ -2415,8 +2415,8 @@ Text data acquisition for domain-specific language models AbhinavSethy - Panayiotis G.Georgiou - ShrikanthNarayanan + Panayiotis G.Georgiou + ShrikanthNarayanan 382–389 W06-1645 sethy-etal-2006-text @@ -2424,7 +2424,7 @@ Corrective Models for Speech Recognition of Inflected Languages IzhakShafran - KeithHall + KeithHall 390–398 W06-1646 shafran-hall-2006-corrective @@ -2447,9 +2447,9 @@ Partially Supervised Sense Disambiguation by Learning Sense Number from Tagged and Untagged Corpora - Zheng-YuNiu - Dong-HongJi - Chew LimTan + Zheng-YuNiu + Dong-HongJi + Chew LimTan 415–422 W06-1649 niu-etal-2006-partially @@ -2458,7 +2458,7 @@ Automatically Assessing Review Helpfulness Soo-MinKim PatrickPantel - TimChklovski + TimChklovski MarcoPennacchiotti 423–430 W06-1650 @@ -2467,17 +2467,17 @@ Joint Extraction of Entities and Relations for Opinion Recognition YejinChoi - EricBreck - ClaireCardie + EricBreck + ClaireCardie 431–439 W06-1651 choi-etal-2006-joint Feature Subsumption for Opinion Analysis - EllenRiloff + EllenRiloff SiddharthPatwardhan - JanyceWiebe + JanyceWiebe 440–448 W06-1652 riloff-etal-2006-feature @@ -2493,7 +2493,7 @@ Random Indexing using Statistical Weight Functions JamesGorman - James R.Curran + James R.Curran 457–464 W06-1654 gorman-curran-2006-random @@ -2525,15 +2525,15 @@ Entity Annotation based on Inverse Index Operations GaneshRamakrishnan SreeramBalakrishnan - SachindraJoshi + SachindraJoshi 492–500 W06-1658 ramakrishnan-etal-2006-entity Unsupervised Information Extraction Approach Using Graph Mutual Reinforcement - HanyHassan - AhmedHassan + HanyHassan + AhmedHassan OssamaEmam 501–508 W06-1659 @@ -2558,8 +2558,8 @@ Sentence ordering with manifold-based classification in multi-document summarization - Paul DJi - StephenPulman + Paul DJi + StephenPulman 526–533 W06-1662 ji-pulman-2006-sentence @@ -2567,7 +2567,7 @@ Quality Assessment of Large Scale Knowledge Resources MontseCuadros - GermanRigau + GermanRigau 534–541 W06-1663 cuadros-rigau-2006-quality @@ -2585,7 +2585,7 @@ Context-Dependent Term Relations for Information Retrieval JingBai - Jian-YunNie + Jian-YunNie GuihongCao 551–559 W06-1665 @@ -2594,7 +2594,7 @@ Loss Minimization in Parse Reranking IvanTitov - JamesHenderson + JamesHenderson 560–567 W06-1666 titov-henderson-2006-loss @@ -2602,9 +2602,9 @@ Unsupervised Relation Disambiguation with Order Identification Capabilities JinxiuChen - DonghongJi - Chew LimTan - ZhengyuNiu + DonghongJi + Chew LimTan + ZhengyuNiu 568–575 W06-1667 chen-etal-2006-unsupervised-relation @@ -2618,10 +2618,10 @@ Two graph-based algorithms for state-of-the-art <fixed-case>WSD</fixed-case> - EnekoAgirre - DavidMartínez - OierLópez de Lacalle - AitorSoroa + EnekoAgirre + DavidMartínez + OierLópez de Lacalle + AitorSoroa 585–593 W06-1669 agirre-etal-2006-two @@ -2653,9 +2653,9 @@ Solving the Problem of Cascading Errors: Approximate <fixed-case>B</fixed-case>ayesian Inference for Linguistic Annotation Pipelines - Jenny RoseFinkel - Christopher D.Manning - Andrew Y.Ng + Jenny RoseFinkel + Christopher D.Manning + Andrew Y.Ng 618–626 W06-1673 finkel-etal-2006-solving @@ -2674,11 +2674,11 @@ Web-based frequency dictionaries for medium density languages AndrásKornai - PéterHalácsy + PéterHalácsy ViktorNagy CsabaOravecz ViktorTrón - DánielVarga + DánielVarga W06-1701 kornai-etal-2006-web @@ -2702,7 +2702,7 @@ <fixed-case>CUCW</fixed-case>eb: A <fixed-case>C</fixed-case>atalan corpus built from the Web - GemmaBoleda + GemmaBoleda StefanBott RodrigoMeza CarlosCastillo @@ -2729,7 +2729,7 @@ <fixed-case>C</fixed-case>orporator: A tool for creating <fixed-case>RSS</fixed-case>-based specialized corpora - CédrickFairon + CédrickFairon W06-1707 fairon-2006-corporator @@ -2771,7 +2771,7 @@ Language and Reasoning for Question Answering: State of the Artand Future Directions - FarahBenamara + FarahBenamara W06-1801 benamara-2006-language @@ -2783,8 +2783,8 @@ Interpretation and Generation in a Knowledge-Based <fixed-case>T</fixed-case>utorial<fixed-case>S</fixed-case>ystem - Myroslava O.Dzikovska - Charles B.Callaway + Myroslava O.Dzikovska + Charles B.Callaway ElaineFarrow W06-1803 dzikovska-etal-2006-interpretation @@ -2820,7 +2820,7 @@ Numerical Data Integration for Cooperative Question-Answering - VéroniqueMoriceau + VéroniqueMoriceau W06-1808 moriceau-2006-numerical @@ -2860,7 +2860,7 @@ Cross-Cutting Aspects of Cross-Language Question Answering Systems BogdanSacaleanu - GünterNeumann + GünterNeumann W06-1903 sacaleanu-neumann-2006-cross @@ -2884,9 +2884,9 @@ <fixed-case>BRUJA</fixed-case>: Question Classification for <fixed-case>S</fixed-case>panish. Using Machine Translationand an <fixed-case>E</fixed-case>nglish Classifier - Miguel Á.García Cumbreras - L. AlfonsoUreña López - FernandoMartínez Santiago + Miguel Á.García Cumbreras + L. AlfonsoUreña López + FernandoMartínez Santiago W06-1906 a-garcia-cumbreras-etal-2006-bruja @@ -2904,7 +2904,7 @@ Dialogue based Question Answering System in <fixed-case>T</fixed-case>elugu RamiReddy NandiReddy - SivajiBandyopadhyay + SivajiBandyopadhyay W06-1908 reddy-etal-2006-dialogue @@ -2916,8 +2916,8 @@ Experiments Adapting an Open-Domain Question Answering System to the Geographical Domain Using Scope-Based Resources - DanielFerrés - HoracioRodríguez + DanielFerrés + HoracioRodríguez W06-1910 ferres-rodriguez-2006-experiments @@ -2986,7 +2986,7 @@ Word Sense Disambiguation Using Automatically Translated Sense Examples XinglongWang - DavidMartinez + DavidMartinez W06-2007 wang-martinez-2006-word @@ -3009,8 +3009,8 @@ Spatial Prepositions in Context: The Semantics of near in the Presence of Distractor Objects - Fintan J.Costello - John D.Kelleher + Fintan J.Costello + John D.Kelleher W06-2101 costello-kelleher-2006-spatial @@ -3022,7 +3022,7 @@ A Quantitative Approach to Preposition-Pronoun Contraction in <fixed-case>P</fixed-case>olish - BeataTrawiński + BeataTrawiński W06-2103 trawinski-2006-quantitative @@ -3057,34 +3057,34 @@ A Conceptual Analysis of the Notion of Instrumentality via a Multilingual Analysis AsaneeKawtrakul MukdaSuktarachan - BaliRanaivo-Malancon + BaliRanaivo-Malancon PekKuan - AchlaRaina + AchlaRaina SudeshnaSarkar AldaMari - SinaZarriess + SinaZarriess ElixabeteMurguia - PatrickSaint-Dizier + PatrickSaint-Dizier W06-2108 kawtrakul-etal-2006-conceptual <fixed-case>G</fixed-case>erman Particle Verbs and Pleonastic Prepositions InesRehbein - Josefvan Genabith + Josefvan Genabith W06-2109 rehbein-van-genabith-2006-german Automatic Identification of <fixed-case>E</fixed-case>nglish Verb Particle Constructions using Linguistic Features Su NamKim - TimothyBaldwin + TimothyBaldwin W06-2110 kim-baldwin-2006-automatic On the Prepositions which Introduce an Adjunct of Duration - FrankVan Eynde + FrankVan Eynde W06-2111 van-eynde-2006-prepositions @@ -3096,8 +3096,8 @@ Handling of Prepositions in <fixed-case>E</fixed-case>nglish to <fixed-case>B</fixed-case>engali Machine Translation - Sudip KumarNaskar - SivajiBandyopadhyay + Sudip KumarNaskar + SivajiBandyopadhyay W06-2113 naskar-bandyopadhyay-2006-handling @@ -3115,30 +3115,30 @@ Learning Effective Surface Text Patterns for Information Extraction GijsGeleijnse - JanKorst + JanKorst W06-2201 geleijnse-korst-2006-learning Simple Information Extraction (<fixed-case>SIE</fixed-case>): A Portable and Effective <fixed-case>IE</fixed-case> System - ClaudioGiuliano - AlbertoLavelli - LorenzaRomano + ClaudioGiuliano + AlbertoLavelli + LorenzaRomano W06-2202 giuliano-etal-2006-simple An Experimental Study on Boundary Classification Algorithms for Information Extraction using <fixed-case>SVM</fixed-case> - JoseIria + JoseIria NeilIreson - FabioCiravegna + FabioCiravegna W06-2203 iria-etal-2006-experimental Transductive Pattern Learning for Information Extraction BrianMcLernon - NicholasKushmerick + NicholasKushmerick W06-2204 mclernon-kushmerick-2006-transductive @@ -3153,14 +3153,14 @@ CarolineSporleder Mariekevan Erp TijnPorcelijn - Antalvan den Bosch + Antalvan den Bosch W06-2206 sporleder-etal-2006-spotting A Hybrid Approach for the Acquisition of Information Extraction Patterns MihaiSurdeanu - JordiTurmo + JordiTurmo AliciaAgeno W06-2207 surdeanu-etal-2006-hybrid @@ -3168,7 +3168,7 @@ Expanding the Recall of Relation Extraction by Bootstrapping JunjiTomita - StephenSoderland + StephenSoderland OrenEtzioni W06-2208 tomita-etal-2006-expanding @@ -3192,22 +3192,22 @@ Robust Parsing, Error Mining, Automated Lexical Acquisition, and Evaluation - Gertjanvan Noord + Gertjanvan Noord W06-2301 van-noord-2006-robust Another Evaluation of Anaphora Resolution Algorithms and a Comparison with <fixed-case>GETARUNS</fixed-case>’ Knowledge Rich Approach - RodolfoDelmonte + RodolfoDelmonte AntonellaBristot - Marco Aldo PiccolinoBoniforti + Marco Aldo PiccolinoBoniforti SaraTonelli W06-2302 delmonte-etal-2006-another Robust Parsing of the <fixed-case>P</fixed-case>roposition <fixed-case>B</fixed-case>ank - GabrieleMusillo + GabrieleMusillo PaolaMerlo W06-2303 musillo-merlo-2006-robust @@ -3220,7 +3220,7 @@ Robust Parsing: More with Less - KilianFoth + KilianFoth WolfgangMenzel W06-2305 foth-menzel-2006-robust @@ -3238,8 +3238,8 @@ Named Entities Translation Based on Comparable Corpora - IñakiAlegria - NereaEzeiza + IñakiAlegria + NereaEzeiza IzaskunFernandez W06-2401 alegria-etal-2006-named @@ -3247,13 +3247,13 @@ Grouping Multi-word Expressions According to Part-Of-Speech in Statistical Machine Translation PatrikLambert - RafaelBanchs + RafaelBanchs W06-2402 lambert-banchs-2006-grouping Automatic Extraction of <fixed-case>C</fixed-case>hinese Multiword Expressions with a Statistical Tool - Scott S.L.Piao + Scott S.L.Piao GuangfanSun PaulRayson QiYuan @@ -3268,15 +3268,15 @@ TakehitoUtsuro KiyotakaUchimoto SuguruMatsuyoshi - SatoshiSato - SeiichiNakagawa + SatoshiSato + SeiichiNakagawa W06-2404 tsuchiya-etal-2006-chunking Identifying idiomatic expressions using automatic word-alignment - Begoña VilladaMoirón - JörgTiedemann + Begoña VilladaMoirón + JörgTiedemann W06-2405 moiron-tiedemann-2006-identifying @@ -3296,7 +3296,7 @@ Multi-word verbs in a flective language: the case of <fixed-case>E</fixed-case>stonian - Heiki-JaanKaalep + Heiki-JaanKaalep KadriMuischnek W06-2408 kaalep-muischnek-2006-multi @@ -3310,7 +3310,7 @@ Multiword Units in an <fixed-case>MT</fixed-case> Lexicon - TamásVáradi + TamásVáradi W06-2410 varadi-2006-multiword @@ -3336,13 +3336,13 @@ Cluster Stopping Rules for Word Sense Discrimination GuerganaSavova TerryTherneau - ChristopherChute + ChristopherChute W06-2502 savova-etal-2006-cluster Relating <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et Senses for Word Sense Disambiguation - DianaMcCarthy + DianaMcCarthy W06-2503 mccarthy-2006-relating @@ -3355,7 +3355,7 @@ Multilingual versus Monolingual <fixed-case>WSD</fixed-case> LuciaSpecia - Maria das GraçasVolpe Nunes + Maria das GraçasVolpe Nunes MarkStevenson Gabriela Castelo BrancoRibeiro W06-2505 @@ -3364,7 +3364,7 @@ Characterizing Response Types and Revealing Noun Ambiguity in <fixed-case>G</fixed-case>erman Association Norms AlissaMelinger - SabineSchulte im Walde + SabineSchulte im Walde AndreaWeber W06-2506 melinger-etal-2006-characterizing @@ -3391,8 +3391,8 @@ Constraint Satisfaction Inference: Non-probabilistic Global Inference for Sequence Labelling SanderCanisius - Antalvan den Bosch - WalterDaelemans + Antalvan den Bosch + WalterDaelemans W06-2602 canisius-etal-2006-constraint @@ -3409,7 +3409,7 @@ A Multiclassifier based Document Categorization System: profiting from the Singular Value Decomposition Dimensionality Reduction Technique AnaZelaia - IñakiAlegria + IñakiAlegria OlatzArregi BasilioSierra W06-2604 @@ -3425,9 +3425,9 @@ Reranking Translation Hypotheses Using Structural Properties - SašaHasan + SašaHasan OliverBender - HermannNey + HermannNey W06-2606 hasan-etal-2006-reranking @@ -3435,14 +3435,14 @@ Tree Kernel Engineering in Semantic Role Labeling Systems AlessandroMoschitti DanielePighin - RobertoBasili + RobertoBasili W06-2607 moschitti-etal-2006-tree Syntagmatic Kernels: a Word Sense Disambiguation Case Study - ClaudioGiuliano - AlfioGliozzo + ClaudioGiuliano + AlfioGliozzo CarloStrapparava W06-2608 giuliano-etal-2006-syntagmatic @@ -3490,7 +3490,7 @@ WouterAlink ValentinJijkoun DavidAhn - Maartende Rijke + Maartende Rijke PeterBoncz Arjende Vries W06-2701 @@ -3498,11 +3498,11 @@ Annotation and Disambiguation of Semantic Types in Biomedical Text: A Cascaded Approach to Named Entity Recognition - DietrichRebholz-Schuhmann + DietrichRebholz-Schuhmann HaraldKirsch SylvainGaudan MiguelArregui - GoranNenadic + GoranNenadic W06-2702 rebholz-schuhmann-etal-2006-annotation @@ -3524,7 +3524,7 @@ Multi-dimensional Annotation and Alignment in an <fixed-case>E</fixed-case>nglish-<fixed-case>G</fixed-case>erman Translation Corpus - SilviaHansen-Schirra + SilviaHansen-Schirra StellaNeumann MihaelaVela W06-2705 @@ -3547,8 +3547,8 @@ Tools for hierarchical annotation of typed dialogue - MyroslavaDzikovska - CharlesCallaway + MyroslavaDzikovska + CharlesCallaway ElaineFarrow W06-2708 dzikovska-etal-2006-tools @@ -3569,7 +3569,7 @@ The <fixed-case>SAMMIE</fixed-case> Multimodal Dialogue Corpus Meets the Nite <fixed-case>XML</fixed-case> Toolkit - IvanaKruijff-Korbayová + IvanaKruijff-Korbayová VerenaRieser CiprianGerstenberger JanSchehl @@ -3579,22 +3579,22 @@ Representing and Accessing Multi-Level Annotations in <fixed-case>MMAX</fixed-case>2 - ChristophMüller + ChristophMüller W06-2712 muller-2006-representing Representing and Accessing Multilevel Linguistic Annotation using the <fixed-case>MEANING</fixed-case> Format - EmanuelePianta + EmanuelePianta LuisaBentivogli - ChristianGirardi - BernardoMagnini + ChristianGirardi + BernardoMagnini W06-2713 pianta-etal-2006-representing Middleware for Creating and Combining Multi-dimensional <fixed-case>NLP</fixed-case> Markup - UlrichSchäfer + UlrichSchäfer W06-2714 schafer-2006-middleware @@ -3603,7 +3603,7 @@ MaikStührenberg AndreasWitt DanielaGoecke - DieterMetzing + DieterMetzing OliverSchonefeld W06-2715 stuhrenberg-etal-2006-multidimensional @@ -3611,14 +3611,14 @@ Layering and Merging Linguistic Annotations KeithSuderman - NancyIde + NancyIde W06-2716 suderman-ide-2006-layering <fixed-case>XML</fixed-case>-based Phrase Alignment in Parallel Treebanks MartinVolk - SofiaGustafson-Capková + SofiaGustafson-Capková JoakimLundborg TorstenMarek YvonneSamuelsson @@ -3671,7 +3671,7 @@ Learning to Recognize Blogs: A Preliminary Exploration ErikElgersma - Maartende Rijke + Maartende Rijke W06-2805 elgersma-de-rijke-2006-learning @@ -3692,27 +3692,27 @@ Anomaly Detecting within Dynamic <fixed-case>C</fixed-case>hinese Chat Text YunqingXia - Kam-FaiWong + Kam-FaiWong W06-2808 xia-wong-2006-anomaly A proposal to automatically build and maintain gazetteers for Named Entity Recognition by using <fixed-case>W</fixed-case>ikipedia AntonioToral - RafaelMuñoz + RafaelMuñoz W06-2809 toral-munoz-2006-proposal Finding Similar Sentences across Multiple Languages in <fixed-case>W</fixed-case>ikipedia - Sisay FissahaAdafre - Maartende Rijke + Sisay FissahaAdafre + Maartende Rijke W06-2810 adafre-de-rijke-2006-finding Multilingual interactive experiments with <fixed-case>F</fixed-case>lickr - Paul D.Clough + Paul D.Clough JulioGonzales JussiKarlgren W06-2811 @@ -3723,7 +3723,7 @@ Proceedings of the Tenth Conference on Computational Natural Language Learning (CoNLL-X) W06-29 - LluísMàrquez + LluísMàrquez DanKlein Association for Computational Linguistics
New York City
@@ -3737,7 +3737,7 @@ A Mission for Computational Natural Language Learning - WalterDaelemans + WalterDaelemans 1–5 W06-2901 daelemans-2006-mission @@ -3745,7 +3745,7 @@ Porting Statistical Parsers with Data-Defined Kernels IvanTitov - JamesHenderson + JamesHenderson 6–13 W06-2902 titov-henderson-2006-porting @@ -3788,7 +3788,7 @@ Investigating Lexical Substitution Scoring for Subtitle Generation OrenGlickman IdoDagan - WalterDaelemans + WalterDaelemans MikaelaKeller SamyBengio 45–52 @@ -3797,7 +3797,7 @@ Semantic Role Recognition Using Kernels on Weighted Marked Ordered Labeled Trees - Jun’ichiKazama + Jun’ichiKazama KentaroTorisawa 53–60 W06-2908 @@ -3807,21 +3807,21 @@ Semantic Role Labeling via Tree Kernel Joint Inference AlessandroMoschitti DanielePighin - RobertoBasili + RobertoBasili 61–68 W06-2909 moschitti-etal-2006-semantic Can Human Verb Associations Help Identify Salient Features for Semantic Verb Classification? - SabineSchulte im Walde + SabineSchulte im Walde 69–76 W06-2910 schulte-im-walde-2006-human-verb Applying Alternating Structure Optimization to Word Sense Disambiguation - Rie KubotaAndo + Rie KubotaAndo 77–84 W06-2911 ando-2006-applying @@ -3836,7 +3836,7 @@ A Lattice-Based Framework for Enhancing Statistical Parsers with Information from Unlabeled Corpora MichaelaAtterer - HinrichSchütze + HinrichSchütze 93–100 W06-2913 atterer-schutze-2006-lattice @@ -3845,7 +3845,7 @@ Word Distributions for Thematic Segmentation in a <fixed-case>S</fixed-case>upport <fixed-case>V</fixed-case>ector <fixed-case>M</fixed-case>achine Approach MariaGeorgescul AlexanderClark - SusanArmstrong + SusanArmstrong 101–108 W06-2914 georgescul-etal-2006-word @@ -3854,8 +3854,8 @@ Which Side are You on? Identifying Perspectives at the Document and Sentence Levels Wei-HaoLin TheresaWilson - JanyceWiebe - AlexanderHauptmann + JanyceWiebe + AlexanderHauptmann 109–116 W06-2915 lin-etal-2006-side @@ -3877,7 +3877,7 @@ Using Gazetteers in Discriminative Information Extraction - AndrewSmith + AndrewSmith MilesOsborne 133–140 W06-2918 @@ -3885,9 +3885,9 @@ A Context Pattern Induction Method for Named Entity Extraction - Partha PratimTalukdar + Partha PratimTalukdar ThorstenBrants - MarkLiberman + MarkLiberman FernandoPereira 141–148 W06-2919 @@ -3917,7 +3917,7 @@ <fixed-case>L</fixed-case>ing<fixed-case>P</fixed-case>ars, a Linguistically Inspired, Language-Independent Machine Learner for Dependency Treebanks - EckhardBick + EckhardBick 171–175 W06-2923 bick-2006-lingpars @@ -3926,9 +3926,9 @@ Dependency Parsing by Inference over High-recall Dependency Predictions SanderCanisius ToineBogers - Antalvan den Bosch + Antalvan den Bosch JeroenGeertzen - ErikTjong Kim Sang + ErikTjong Kim Sang 176–180 W06-2924 canisius-etal-2006-dependency @@ -3955,14 +3955,14 @@ Multi-lingual Dependency Parsing at <fixed-case>NAIST</fixed-case> YuchangCheng MasayukiAsahara - YujiMatsumoto + YujiMatsumoto 191–195 W06-2927 cheng-etal-2006-multi Dependency Parsing with Reference to <fixed-case>S</fixed-case>lovene, <fixed-case>S</fixed-case>panish and <fixed-case>S</fixed-case>wedish - SimonCorston-Oliver + SimonCorston-Oliver AnthonyAue 196–200 W06-2928 @@ -3971,8 +3971,8 @@ Vine Parsing and Minimum Risk Reranking for Speed and Precision MarkusDreyer - David A.Smith - Noah A.Smith + David A.Smith + Noah A.Smith 201–205 W06-2929 dreyer-etal-2006-vine @@ -4009,7 +4009,7 @@ JoakimNivre JohanHall JensNilsson - GülşenEryiǧit + GülşenEryiǧit SvetoslavMarinov 221–225 W06-2933 @@ -4018,8 +4018,8 @@ Multi-lingual Dependency Parsing with Incremental Integer Linear Programming SebastianRiedel - RuketÇakıcı - IvanMeza-Ruiz + RuketÇakıcı + IvanMeza-Ruiz 226–230 W06-2934 riedel-etal-2006-multi @@ -4060,7 +4060,7 @@ Proceedings of the Interactive Question Answering Workshop at HLT-NAACL 2006 W06-30 - NickWebb + NickWebb Association for Computational Linguistics
New York, NY, USA
June @@ -4073,11 +4073,11 @@ Contextual phenomena and thematic relations in database <fixed-case>QA</fixed-case> dialogues: results from a <fixed-case>W</fixed-case>izard-of-<fixed-case>O</fixed-case>z Experiment - NúriaBertomeu + NúriaBertomeu HansUszkoreit AnetteFrank - Hans-UlrichKrieger - BrigitteJörg + Hans-UlrichKrieger + BrigitteJörg 1–8 W06-3001 bertomeu-etal-2006-contextual @@ -4085,8 +4085,8 @@ <fixed-case>W</fixed-case>o<fixed-case>Z</fixed-case> Simulation of Interactive Question Answering TsuneakiKato - Jun’ichiFukumoto - FumitoMasui + Jun’ichiFukumoto + FumitoMasui NorikoKando 9–16 W06-3002 @@ -4094,13 +4094,13 @@ Modeling Reference Interviews as a Basis for Improving Automatic <fixed-case>QA</fixed-case> Systems - Nancy J.McCracken + Nancy J.McCracken Anne R.Diekema GrantIngersoll Sarah C.Harwell Eileen E.Allen OzgurYilmazel - Elizabeth D.Liddy + Elizabeth D.Liddy 17–24 W06-3003 mccracken-etal-2006-modeling @@ -4108,7 +4108,7 @@ Enhanced Interactive Question-Answering with Conditional Random Fields AndrewHickl - SandaHarabagiu + SandaHarabagiu 25–32 W06-3004 hickl-harabagiu-2006-enhanced @@ -4117,14 +4117,14 @@ A Data Driven Approach to Relevancy Recognition for Contextual Question Answering FanYang JunlanFeng - GiuseppeDi Fabbrizio + GiuseppeDi Fabbrizio 33–40 W06-3005 yang-etal-2006-data Answering questions of Information Access Dialogue (<fixed-case>IAD</fixed-case>) task using ellipsis handling of follow-up questions - JunichiFukumoto + JunichiFukumoto 41–48 W06-3006 fukumoto-2006-answering @@ -4132,7 +4132,7 @@ User-Centered Evaluation of Interactive Question Answering Systems DianeKelly - PaulKantor + PaulKantor EmileMorse JeanScholtz YingSun @@ -4159,21 +4159,21 @@ Morpho-syntactic Information for Automatic Error Analysis of Statistical Machine Translation Output - MajaPopović - Adriàde Gispert - DeepaGupta + MajaPopović + Adriàde Gispert + DeepaGupta PatrikLambert - HermannNey - José B.Mariño + HermannNey + José B.Mariño MarcelloFederico - RafaelBanchs + RafaelBanchs 1–6 W06-3101 popovic-etal-2006-morpho Initial Explorations in <fixed-case>E</fixed-case>nglish to <fixed-case>T</fixed-case>urkish Statistical Machine Translation - İlknurDurgar El-Kahlout + İlknurDurgar El-Kahlout KemalOflazer 7–14 W06-3102 @@ -4181,18 +4181,18 @@ Morpho-syntactic <fixed-case>A</fixed-case>rabic Preprocessing for <fixed-case>A</fixed-case>rabic to <fixed-case>E</fixed-case>nglish Statistical Machine Translation - AnasEl Isbihani + AnasEl Isbihani ShahramKhadivi OliverBender - HermannNey + HermannNey 15–22 W06-3103 el-isbihani-etal-2006-morpho Quasi-Synchronous Grammars: Alignment by Soft Projection of Syntactic Dependencies - DavidSmith - JasonEisner + DavidSmith + JasonEisner 23–30 W06-3104 smith-eisner-2006-quasi @@ -4200,7 +4200,7 @@ Why Generative Phrase Models Underperform Surface Heuristics JohnDeNero - DanGillick + DanGillick JamesZhang DanKlein 31–38 @@ -4209,7 +4209,7 @@ Phrase-Based <fixed-case>SMT</fixed-case> with Shallow Tree-Phrases - PhilippeLanglais + PhilippeLanglais FabrizioGotti 39–46 W06-3106 @@ -4218,7 +4218,7 @@ Searching for alignments in <fixed-case>SMT</fixed-case>. A novel approach based on an Estimation of Distribution Algorithm LuisRodríguez - IsmaelGarcía-Varea + IsmaelGarcía-Varea José A.Gámez 47–54 W06-3107 @@ -4227,16 +4227,16 @@ Discriminative Reordering Models for Statistical Machine Translation RichardZens - HermannNey + HermannNey 55–63 W06-3108 zens-ney-2006-discriminative Generalized Stack Decoding Algorithms for Statistical Machine Translation - DanielOrtiz Martínez - IsmaelGarcía Varea - FranciscoCasacuberta + DanielOrtiz Martínez + IsmaelGarcía Varea + FranciscoCasacuberta 64–71 W06-3109 ortiz-martinez-etal-2006-generalized @@ -4244,7 +4244,7 @@ N-Gram Posterior Probabilities for Statistical Machine Translation RichardZens - HermannNey + HermannNey 72–77 W06-3110 zens-ney-2006-n @@ -4253,7 +4253,7 @@ Partitioning Parallel Documents Using Binary Segmentation JiaXu RichardZens - HermannNey + HermannNey 78–85 W06-3111 xu-etal-2006-partitioning @@ -4262,7 +4262,7 @@ Contextual Bitext-Derived Paraphrases in Automatic <fixed-case>MT</fixed-case> Evaluation KarolinaOwczarzak DeclanGroves - JosefVan Genabith + JosefVan Genabith AndyWay 86–93 W06-3112 @@ -4297,15 +4297,15 @@ Mood at work: Ramses versus Pharaoh AlexandrePatry FabrizioGotti - PhilippeLanglais + PhilippeLanglais 126–129 W06-3116 patry-etal-2006-mood-work Stochastic Inversion Transduction Grammars for Obtaining Word Phrases for Phrase-based Statistical Machine Translation - Joan AndreuSánchez - José MiguelBenedí + Joan AndreuSánchez + José MiguelBenedí 130–133 W06-3117 sanchez-benedi-2006-stochastic @@ -4333,14 +4333,14 @@ <fixed-case>TALP</fixed-case> Phrase-based statistical translation system for <fixed-case>E</fixed-case>uropean language pairs - Marta R.Costa-jussà - Josep M.Crego - Adriàde Gispert + Marta R.Costa-jussà + Josep M.Crego + Adriàde Gispert PatrikLambert MaximKhalilov - José B.Mariño - José A. R.Fonollosa - RafaelBanchs + José B.Mariño + José A. R.Fonollosa + RafaelBanchs 142–145 W06-3120 costa-jussa-etal-2006-talp @@ -4348,9 +4348,9 @@ Phramer - An Open Source Statistical Phrase-Based Translator MarianOlteanu - ChrisDavis + ChrisDavis IonutVolosen - DanMoldovan + DanMoldovan 146–149 W06-3121 olteanu-etal-2006-phramer @@ -4359,7 +4359,7 @@ Language Models and Reranking for Machine Translation MarianOlteanu PasinSuriyentrakorn - DanMoldovan + DanMoldovan 150–153 W06-3122 olteanu-etal-2006-language @@ -4385,22 +4385,22 @@ N-gram-based <fixed-case>SMT</fixed-case> System Enhanced with Reordering Patterns - Josep M.Crego - Adriàde Gispert + Josep M.Crego + Adriàde Gispert PatrikLambert - Marta R.Costa-jussà + Marta R.Costa-jussà MaximKhalilov - RafaelBanchs - José B.Mariño - José A. R.Fonollosa + RafaelBanchs + José B.Mariño + José A. R.Fonollosa 162–165 W06-3125 crego-etal-2006-n The <fixed-case>LDV</fixed-case>-<fixed-case>COMBO</fixed-case> system for <fixed-case>SMT</fixed-case> - JesúsGiménez - LluísMàrquez + JesúsGiménez + LluísMàrquez 166–169 W06-3126 gimenez-marquez-2006-ldv @@ -4425,7 +4425,7 @@ A Combined Phonetic-Phonological Approach to Estimating Cross-Language Phoneme Similarity in an <fixed-case>ASR</fixed-case> Environment LynetteMelnar - ChenLiu + ChenLiu 1–10 W06-3201 melnar-liu-2006-combined @@ -4462,7 +4462,7 @@ Improved morpho-phonological sequence processing with constraint satisfaction inference - Antalvan den Bosch + Antalvan den Bosch SanderCanisius 41–49 W06-3206 @@ -4470,7 +4470,7 @@ Richness of the Base and Probabilistic Unsupervised Learning in <fixed-case>O</fixed-case>ptimality <fixed-case>T</fixed-case>heory - GajaJarosz + GajaJarosz 50–59 W06-3207 jarosz-2006-richness @@ -4479,7 +4479,7 @@ Morphology Induction from Limited Noisy Data Using Approximate String Matching BurcuKaragol-Ayan DavidDoermann - AmyWeinberg + AmyWeinberg 60–68 W06-3208 karagol-ayan-etal-2006-morphology @@ -4503,8 +4503,8 @@ Proceedings of the HLT-NAACL BioNLP Workshop on Linking Natural Language and Biology W06-33 - KarinVerspoor - Kevin BretonnelCohen + KarinVerspoor + Kevin BretonnelCohen BenGoertzel InderjeetMani Association for Computational Linguistics @@ -4549,7 +4549,7 @@ ChristianPosse BanuGopalan StephenTratz - MichelleGregory + MichelleGregory 25–32 W06-3304 sanfilippo-etal-2006-integrating @@ -4568,15 +4568,15 @@ KevinMurphy YangJin JessicaKim - PeterWhite + PeterWhite 41–48 W06-3306 fang-etal-2006-human Integrating Co-occurrence Statistics with Information Extraction for Robust Retrieval of Protein Interactions from <fixed-case>M</fixed-case>edline - RazvanBunescu - RaymondMooney + RazvanBunescu + RaymondMooney ArunRamani EdwardMarcotte 49–56 @@ -4585,14 +4585,14 @@ <fixed-case>BIOSMILE</fixed-case>: Adapting Semantic Role Labeling for Biomedical Verbs: - Richard Tzong-HanTsai + Richard Tzong-HanTsai Wen-ChiChou Yu-ChunLin Cheng-LungSung WeiKu Ying-ShanSu Ting-YiSung - Wen-LianHsu + Wen-LianHsu 57–64 W06-3308 tsai-etal-2006-biosmile @@ -4602,7 +4602,7 @@ JimmyLin DamianosKarakos DinaDemner-Fushman - SanjeevKhudanpur + SanjeevKhudanpur 65–72 W06-3309 lin-etal-2006-generative @@ -4611,7 +4611,7 @@ Exploring Text and Image Features to Classify Images in Bioscience Literature BarryRafkind MinsukLee - Shih-FuChang + Shih-FuChang HongYu 73–80 W06-3310 @@ -4644,7 +4644,7 @@ A Graph-Search Framework for <fixed-case>G</fixed-case>ene<fixed-case>I</fixed-case>d Ranking - WilliamCohen + WilliamCohen 93–95 W06-3315 cohen-2006-graph @@ -4686,21 +4686,21 @@ Refactoring Corpora - Helen L.Johnson - William A.Baumgartner Jr. - MartinKrallinger + Helen L.Johnson + William A.Baumgartner Jr. + MartinKrallinger K. BretonnelCohen - LawrenceHunter + LawrenceHunter 116–117 W06-3320 johnson-etal-2006-refactoring Rapid Adaptation of <fixed-case>POS</fixed-case> Tagging for Domain Specific Uses - John E.Miller + John E.Miller MichaelBloodgood ManabuTorii - K.Vijay-Shanker + K.Vijay-Shanker 118–119 W06-3321 miller-etal-2006-rapid @@ -4739,17 +4739,17 @@ Summarizing Key Concepts using Citation Sentences - Ariel S.Schwartz - MartiHearst + Ariel S.Schwartz + MartiHearst 134–135 W06-3326 schwartz-hearst-2006-summarizing Subdomain adaptation of a <fixed-case>POS</fixed-case> tagger with a small corpus - YukaTateisi + YukaTateisi YoshimasaTsuruoka - Jun’ichiTsujii + Jun’ichiTsujii 136–137 W06-3327 tateisi-etal-2006-subdomain @@ -4767,7 +4767,7 @@ Proceedings of the Analyzing Conversations in Text and Speech W06-34 - EduardHovy + EduardHovy KlausZechner LiangZhou Association for Computational Linguistics @@ -4783,7 +4783,7 @@ Prosodic Correlates of Rhetorical Relations GabrielMurray - MaiteTaboada + MaiteTaboada SteveRenals 1–7 W06-3401 @@ -4812,7 +4812,7 @@ You Are What You Say: Using Meeting Participants’ Speech to Detect their Roles and Expertise SatanjeevBanerjee - AlexanderRudnicky + AlexanderRudnicky 23–30 W06-3404 banerjee-rudnicky-2006-say @@ -4828,8 +4828,8 @@ Improving “Email Speech Acts” Analysis via N-gram Selection - VitorCarvalho - WilliamCohen + VitorCarvalho + WilliamCohen 35–41 W06-3406 carvalho-cohen-2006-improving @@ -4837,14 +4837,14 @@ Topic-Segmentation of Dialogue JaimeArguello - CarolynRosé + CarolynRosé 42–49 W06-3407 arguello-rose-2006-topic <fixed-case>C</fixed-case>h<fixed-case>AT</fixed-case>: A Time-Linked System for Conversational Analysis - Michelle L.Gregory + Michelle L.Gregory DouglasLove StuartRose AnneSchur @@ -4873,7 +4873,7 @@ Proceedings of the Third Workshop on Scalable Natural Language Understanding W06-35 - JamesAllen + JamesAllen JanAlexandersson JeromeFeldman RobertPorzel @@ -4896,15 +4896,15 @@ Backbone Extraction and Pruning for Speeding Up a Deep Parser for Dialogue Systems - Myroslava O.Dzikovska - Carolyn P.Rosé + Myroslava O.Dzikovska + Carolyn P.Rosé 9–16 W06-3502 dzikovska-rose-2006-backbone Understanding Complex Natural Language Explanations in Tutorial Applications - Pamela W.Jordan + Pamela W.Jordan MaximMakatchev UmaraniPappuswamy 17–24 @@ -4913,10 +4913,10 @@ Increasing the coverage of a domain independent dialogue lexicon with <fixed-case>VERBNET</fixed-case> - BenoitCrabbé - Myroslava O.Dzikovska - Williamde Beaumont - MarySwift + BenoitCrabbé + Myroslava O.Dzikovska + Williamde Beaumont + MarySwift 25–32 W06-3504 crabbe-etal-2006-increasing @@ -4932,7 +4932,7 @@ Catching Metaphors MattGedigian JohnBryant - SriniNarayanan + SriniNarayanan BranimirCiric 41–48 W06-3506 @@ -4954,7 +4954,7 @@ Embodied construction grammar as layered modal languages - AndersSogaard + AndersSogaard 65–72 W06-3509 sogaard-2006-embodied @@ -4974,10 +4974,10 @@ W06-36 RyanMcDonald CharlesSutton - HalDaumé III + HalDaumé III AndrewMcCallum FernandoPereira - JeffBilmes + JeffBilmes Association for Computational Linguistics
New York City, New York
June @@ -4992,21 +4992,21 @@ A Syntax-Directed Translator with Extended Domain of Locality LiangHuang KevinKnight - AravindJoshi + AravindJoshi 1–8 W06-3601 huang-etal-2006-syntax
Efficient Dynamic Programming Search Algorithms for Phrase-Based <fixed-case>SMT</fixed-case> - ChristophTillmann + ChristophTillmann 9–16 W06-3602 tillmann-2006-efficient Computational Challenges in Parsing by Classification - JosephTurian + JosephTurian I. DanMelamed 17–24 W06-3603 @@ -5014,7 +5014,7 @@ All-word Prediction as the Ultimate Confusible Disambiguation - Antalvan den Bosch + Antalvan den Bosch 25–32 W06-3604 van-den-bosch-2006-word @@ -5040,7 +5040,7 @@ Re-Ranking Algorithms for Name Tagging HengJi CynthiaRudin - RalphGrishman + RalphGrishman 49–56 W06-3607 ji-etal-2006-ranking @@ -5050,10 +5050,10 @@ Proceedings of the First International Workshop on Medical Speech Translation W06-37 - PierretteBouillon + PierretteBouillon FarzadEhsani - RobertFrederking - MannyRayner + RobertFrederking + MannyRayner Association for Computational Linguistics
New York, New York
June @@ -5079,7 +5079,7 @@ MannyRayner MarianneSantaholma MarianneStarlander - Beth AnnHockey + Beth AnnHockey 5–12 W06-3702 chatzichrisafis-etal-2006-evaluating @@ -5087,8 +5087,8 @@ Speech to Speech Translation for Medical Triage in <fixed-case>K</fixed-case>orean FarzadEhsani - JimKinzey - DemetriosMaster + JimKinzey + DemetriosMaster KarenLesea HunilPark 13–19 @@ -5106,7 +5106,7 @@ Language Engineering and the Pathway to Healthcare: A User-Oriented View - HaroldSomers + HaroldSomers 28–35 W06-3705 somers-2006-language @@ -5126,7 +5126,7 @@ NikosChatzichrisafis MarianneSantaholma MarianneStarlander - Beth AnnHockey + Beth AnnHockey YukieNakao HitoshiIsahara KyokoKanzaki @@ -5137,8 +5137,8 @@ <fixed-case>S</fixed-case>-<fixed-case>MINDS</fixed-case> 2-Way Speech-to-Speech Translation System FarzadEhsani - JimKinzey - DemetriosMaster + JimKinzey + DemetriosMaster KarenSudre DavidDomingo HunilPark @@ -5166,16 +5166,16 @@ <fixed-case>IBM</fixed-case> <fixed-case>MASTOR</fixed-case> SYSTEM: Multilingual Automatic Speech-to-Speech Translator - YuqingGao + YuqingGao BowenZhou - RuhiSarikaya + RuhiSarikaya MohamedAfify Hong-KwangKuo Wei-zhongZhu YonggangDeng CharlesProsser WeiZhang - LaurentBesacier + LaurentBesacier 53–56 W06-3711 gao-etal-2006-ibm @@ -5185,8 +5185,8 @@ Proceedings of TextGraphs: the First Workshop on Graph Based Methods for Natural Language Processing W06-38 - RadaMihalcea - DragomirRadev + RadaMihalcea + DragomirRadev Association for Computational Linguistics
New York City
June @@ -5200,16 +5200,16 @@ A Graphical Framework for Contextual Search and Name Disambiguation in Email EinatMinkov - WilliamCohen - AndrewNg + WilliamCohen + AndrewNg 1–8 W06-3801 minkov-etal-2006-graphical Graph Based Semi-Supervised Approach for Information Extraction - HanyHassan - AhmedHassan + HanyHassan + AhmedHassan SaraNoeman 9–16 W06-3802 @@ -5224,7 +5224,7 @@ Measuring Aboutness of an Entity in a Text - Marie-FrancineMoens + Marie-FrancineMoens PatrickJeuniaux RoxanaAngheluta RudradebMitra @@ -5234,15 +5234,15 @@ A Study of Two Graph Algorithms in Topic-driven Summarization - ViviNastase - StanSzpakowicz + ViviNastase + StanSzpakowicz 29–32 W06-3805 nastase-szpakowicz-2006-study Similarity between Pairs of Co-indexed Trees for Textual Entailment Recognition - Fabio MassimoZanzotto + Fabio MassimoZanzotto AlessandroMoschitti 33–36 W06-3806 @@ -5250,15 +5250,15 @@ Learning of Graph-based Question Answering Rules - DiegoMollá + DiegoMollá 37–44 W06-3807 molla-2006-learning Seeing stars when there aren’t many stars: Graph-based semi-supervised learning for sentiment categorization - AndrewGoldberg - XiaojinZhu + AndrewGoldberg + XiaojinZhu 45–52 W06-3808 goldberg-zhu-2006-seeing @@ -5273,8 +5273,8 @@ Graph-based Generalized Latent Semantic Analysis for Document Representation - IrinaMatveeva - Gina-AnneLevow + IrinaMatveeva + Gina-AnneLevow 61–64 W06-3810 matveeva-levow-2006-graph @@ -5290,25 +5290,25 @@ <fixed-case>C</fixed-case>hinese Whispers - an Efficient Graph Clustering Algorithm and its Application to Natural Language Processing Problems - ChrisBiemann + ChrisBiemann 73–80 W06-3812 biemann-2006-chinese Matching syntactic-semantic graphs for semantic relation assignment - ViviNastase - StanSzpakowicz + ViviNastase + StanSzpakowicz 81–88 W06-3813 nastase-szpakowicz-2006-matching Evaluating and optimizing the parameters of an unsupervised graph-based <fixed-case>WSD</fixed-case> algorithm - EnekoAgirre - DavidMartínez - OierLópez de Lacalle - AitorSoroa + EnekoAgirre + DavidMartínez + OierLópez de Lacalle + AitorSoroa 89–96 W06-3814 agirre-etal-2006-evaluating @@ -5344,9 +5344,9 @@ Extracting formal specifications from natural language regulatory documents NikhilDinesh - AravindJoshi + AravindJoshi InsupLee - BonnieWebber + BonnieWebber W06-3902 dinesh-etal-2006-extracting @@ -5366,7 +5366,7 @@ Towards a redundancy elimination algorithm for underspecified descriptions - LeonardoLesmo + LeonardoLesmo LivioRobaldo JelleGerbrandy W06-3905 @@ -5442,7 +5442,7 @@ Ingredients of a first-order account of bridging - PhilippCimiano + PhilippCimiano W06-3915 cimiano-2006-ingredients diff --git a/data/xml/W07.xml b/data/xml/W07.xml index 111528f2db..0e560d51b5 100644 --- a/data/xml/W07.xml +++ b/data/xml/W07.xml @@ -27,7 +27,7 @@ Corpus-driven Metaphor Harvesting AstridReining - BirteLönneker-Rodman + BirteLönneker-Rodman 5–12 W07-0102 reining-lonneker-rodman-2007-corpus @@ -35,7 +35,7 @@ Hunting Elusive Metaphors Using Lexical Resources. SaisureshKrishnakumaran - XiaojinZhu + XiaojinZhu 13–20 W07-0103 krishnakumaran-zhu-2007-hunting @@ -53,10 +53,10 @@ Proceedings of the Second Workshop on TextGraphs: Graph-Based Algorithms for Natural Language Processing W07-02 - ChrisBiemann - IrinaMatveeva - RadaMihalcea - DragomirRadev + ChrisBiemann + IrinaMatveeva + RadaMihalcea + DragomirRadev Association for Computational Linguistics
Rochester, NY, USA
2007 @@ -86,7 +86,7 @@ Daniel S.Leite Lucia H. M.Rino Thiago A. S.Pardo - Maria das Graças V.Nunes + Maria das Graças V.Nunes 17–24 W07-0203 leite-etal-2007-extractive @@ -117,7 +117,7 @@
Latent Semantic Grammar Induction: Context, Projectivity, and Prior Distributions - Andrew M.Olney + Andrew M.Olney 45–52 W07-0207 olney-2007-latent @@ -125,15 +125,15 @@ Learning to Transform Linguistic Graphs ValentinJijkoun - Maartende Rijke + Maartende Rijke 53–60 W07-0208 jijkoun-de-rijke-2007-learning Semi-supervised Algorithm for Human-Computer Dialogue Mining - Calkin S.Montero - KenjiAraki + Calkin S.Montero + KenjiAraki 61–64 W07-0209 montero-araki-2007-semi @@ -151,9 +151,9 @@ <fixed-case>DLSITE</fixed-case>-2: Semantic Similarity Based on Syntactic Dependency Trees Applied to Textual Entailment DanielMicol - ÓscarFerrández - RafaelMuñoz - ManuelPalomar + ÓscarFerrández + RafaelMuñoz + ManuelPalomar 73–80 W07-0211 micol-etal-2007-dlsite @@ -184,7 +184,7 @@ W07-03 FuliangWeng Ye-YiWang - GokhanTur + GokhanTur Association for Computational Linguistics
Rochester, NY
April @@ -197,7 +197,7 @@ Applying <fixed-case>POMDP</fixed-case>s to Dialog Systems in the Troubleshooting Domain - JasonWilliams + JasonWilliams 1–8 W07-0301 williams-2007-applying @@ -208,14 +208,14 @@ JostSchatzmann KarlWeilhammer HuiYe - SteveYoung + SteveYoung 9–16 W07-0302 thomson-etal-2007-training The Multimodal Presentation Dashboard - MichaelJohnston + MichaelJohnston PatrickEhlen DavidGibbon ZhuLiu @@ -231,18 +231,18 @@ PhillipHunter PeterKrogh EstherLevin - RobertoPieraccini + RobertoPieraccini 25–31 W07-0304 acomb-etal-2007-technical <fixed-case>O</fixed-case>lympus: an open-source framework for conversational spoken language interface research - DanBohus + DanBohus AntoineRaux ThomasHarris MaxineEskenazi - AlexanderRudnicky + AlexanderRudnicky 32–39 W07-0305 bohus-etal-2007-olympus @@ -256,16 +256,16 @@ Experiments on the <fixed-case>F</fixed-case>rance Telecom 3000 Voice Agency corpus: academic research on an industrial spoken dialog system - GéraldineDamnati - FrédéricBéchet - RenatoDe Mori + GéraldineDamnati + FrédéricBéchet + RenatoDe Mori 48–55 W07-0307 damnati-etal-2007-experiments Experiences of an In-Service <fixed-case>W</fixed-case>izard-of-<fixed-case>O</fixed-case>z Data Collection for the Deployment of a Call-Routing Application - MatsWirén + MatsWirén RobertEklund 56–63 W07-0308 @@ -273,7 +273,7 @@ <fixed-case>A</fixed-case>da<fixed-case>RTE</fixed-case>: An Extensible and Adaptable Architecture for Dialog Systems - LinaRojas + LinaRojas ToniGiorgino 64–67 W07-0309 @@ -282,7 +282,7 @@ Multi-slot semantics for natural-language call routing systems JohanBoye - MatsWirén + MatsWirén 68–75 W07-0310 boye-wiren-2007-multi @@ -296,7 +296,7 @@ <fixed-case>WIRE</fixed-case>: A Wearable Spoken Language Understanding System for the Military - HelenHastie + HelenHastie PatrickCraven MichaelOrr 84–88 @@ -305,8 +305,8 @@ Different measurement metrics to evaluate a chatbot system - BayanAbu Shawar - EricAtwell + BayanAbu Shawar + EricAtwell 89–96 W07-0313 abu-shawar-atwell-2007-different @@ -332,7 +332,7 @@ Chunk-Level Reordering of Source Language Sentences with Automatically Learned Rules for Statistical Machine Translation YuqiZhang RichardZens - HermannNey + HermannNey 1–8 W07-0401 zhang-etal-2007-chunk @@ -340,7 +340,7 @@ Extraction Phenomena in Synchronous <fixed-case>TAG</fixed-case> Syntax and Semantics RebeccaNesson - Stuart M.Shieber + Stuart M.Shieber 9–16 W07-0402 nesson-shieber-2007-extraction @@ -379,14 +379,14 @@ Discriminative word alignment by learning the alignment structure and syntactic divergence between a language pair SriramVenkatapathy - AravindJoshi + AravindJoshi 49–56 W07-0407 venkatapathy-joshi-2007-discriminative Generation in Machine Translation from Deep Syntactic Trees - KeithHall + KeithHall PetrNěmec 57–64 W07-0408 @@ -394,9 +394,9 @@ Combining Morphosyntactic Enriched Representation with n-best Reranking in Statistical Translation - HélèneBonneau-Maynard + HélèneBonneau-Maynard AlexandreAllauzen - DanielDéchelotte + DanielDéchelotte HolgerSchwenk 65–71 W07-0409 @@ -404,8 +404,8 @@ A Walk on the Other Side: Using <fixed-case>SMT</fixed-case> Components in a Transfer-Based Translation System - AriadnaFont Llitjós - StephanVogel + AriadnaFont Llitjós + StephanVogel 72–79 W07-0410 font-llitjos-vogel-2007-walk @@ -413,7 +413,7 @@ Dependency-Based Automatic Evaluation for Machine Translation KarolinaOwczarzak - Josefvan Genabith + Josefvan Genabith AndyWay 80–87 W07-0411 @@ -421,7 +421,7 @@ Probabilistic Synchronous <fixed-case>T</fixed-case>ree-<fixed-case>A</fixed-case>djoining <fixed-case>G</fixed-case>rammars for Machine Translation: The Argument from Bilingual Dictionaries - Stuart M.Shieber + Stuart M.Shieber 88–95 W07-0412 shieber-2007-probabilistic @@ -429,7 +429,7 @@ Three models for discriminative machine translation using Global Lexical Selection and Sentence Reconstruction SriramVenkatapathy - SrinivasBangalore + SrinivasBangalore 96–102 W07-0413 venkatapathy-bangalore-2007-three @@ -437,8 +437,8 @@ Comparing Reordering Constraints for <fixed-case>SMT</fixed-case> Using Efficient <fixed-case>BLEU</fixed-case> Oracle Computation MarkusDreyer - KeithHall - SanjeevKhudanpur + KeithHall + SanjeevKhudanpur 103–110 W07-0414 dreyer-etal-2007-comparing @@ -491,7 +491,7 @@ High-accuracy Annotation and Parsing of <fixed-case>CHILDES</fixed-case> Transcripts KenjiSagae EricDavis - AlonLavie + AlonLavie BrianMacWhinney ShulyWintner 25–32 @@ -516,7 +516,7 @@ <fixed-case>ISA</fixed-case> meets <fixed-case>L</fixed-case>ara: An incremental word space model for cognitively plausible simulations of semantic learning - MarcoBaroni + MarcoBaroni AlessandroLenci LucaOnnis 49–56 @@ -542,14 +542,14 @@ The Topology of Synonymy and Homonymy Networks JamesGorman - JamesCurran + JamesCurran 73–80 W07-0610 gorman-curran-2007-topology The Benefits of Errors: Learning an <fixed-case>OT</fixed-case> Grammar with a Structured Candidate Set - TamásBiró + TamásBiró 81–88 W07-0611 biro-2007-benefits @@ -557,7 +557,7 @@ Learning to interpret novel noun-noun compounds: evidence from a category learning experiment BarryDevereux - FintanCostello + FintanCostello 89–96 W07-0612 devereux-costello-2007-learning @@ -569,7 +569,7 @@ W07-07 ChrisCallison-Burch PhilippKoehn - Cameron ShawFordyce + Cameron ShawFordyce ChristofMonz Association for Computational Linguistics
Prague, Czech Republic
@@ -612,7 +612,7 @@ Exploring Different Representational Units in <fixed-case>E</fixed-case>nglish-to-<fixed-case>T</fixed-case>urkish Statistical Machine Translation KemalOflazer - İlknurDurgar El-Kahlout + İlknurDurgar El-Kahlout 25–32 W07-0704 oflazer-durgar-el-kahlout-2007-exploring @@ -621,14 +621,14 @@ Can We Translate Letters? DavidVilar Jan-ThorstenPeter - HermannNey + HermannNey 33–39 W07-0705 vilar-etal-2007-translate A Dependency Treelet String Correspondence Model for Statistical Machine Translation - DeyiXiong + DeyiXiong QunLiu ShouxunLin 40–47 @@ -637,8 +637,8 @@ Word Error Rates: Decomposition over <fixed-case>POS</fixed-case> classes and Applications for Error Analysis - MajaPopović - HermannNey + MajaPopović + HermannNey 48–55 W07-0707 popovic-ney-2007-word @@ -647,8 +647,8 @@ Speech-Input Multi-Target Machine Translation AliciaPérez M. TeresaGonzález - M. InésTorres - FranciscoCasacuberta + M. InésTorres + FranciscoCasacuberta 56–63 W07-0708 perez-etal-2007-speech @@ -656,7 +656,7 @@ Meta-Structure Transformation Model for Statistical Machine Translation JiadongSun - TiejunZhao + TiejunZhao HuashenLiang 64–71 W07-0709 @@ -690,8 +690,8 @@ Human Evaluation of Machine Translation Through Binary System Comparisons DavidVilar GregorLeusch - HermannNey - Rafael E.Banchs + HermannNey + Rafael E.Banchs 96–103 W07-0713 vilar-etal-2007-human @@ -699,7 +699,7 @@ Labelled Dependencies in Machine Translation Evaluation KarolinaOwczarzak - Josefvan Genabith + Josefvan Genabith AndyWay 104–111 W07-0714 @@ -707,7 +707,7 @@ An Iteratively-Trained Segmentation-Free Phrase Translation Model for Statistical Machine Translation - RobertMoore + RobertMoore ChrisQuirk 112–119 W07-0715 @@ -716,9 +716,9 @@ Using Paraphrases for Parameter Tuning in Statistical Machine Translation NitinMadnani - NecipFazil Ayan + NecipFazil Ayan PhilipResnik - BonnieDorr + BonnieDorr 120–127 W07-0716 madnani-etal-2007-using @@ -744,29 +744,29 @@ Context-aware Discriminative Phrase Selection for Statistical Machine Translation - JesúsGiménez - LluísMàrquez + JesúsGiménez + LluísMàrquez 159–166 W07-0719 gimenez-marquez-2007-context Ngram-Based Statistical Machine Translation Enhanced with Multiple Weighted Reordering Hypotheses - MartaR. Costa-jussà - Josep M.Crego + MartaR. Costa-jussà + Josep M.Crego PatrikLambert MaximKhalilov - José A.R. Fonollosa - José B.Mariño - Rafael E.Banchs + José A.R. Fonollosa + José B.Mariño + Rafael E.Banchs 167–170 W07-0720 r-costa-jussa-etal-2007-ngram Analysis of Statistical and Morphological Classes to Generate Weigthed Reordering Hypotheses on a Statistical Machine Translation System - MartaR. Costa-jussà - José A.R. Fonollosa + MartaR. Costa-jussà + José A.R. Fonollosa 171–176 W07-0721 r-costa-jussa-r-fonollosa-2007-analysis @@ -774,7 +774,7 @@ Domain Adaptation in Statistical Machine Translation with Mixture Modelling JorgeCivera - AlfonsJuan + AlfonsJuan 177–180 W07-0722 civera-juan-2007-domain @@ -808,7 +808,7 @@ Multi-Engine Machine Translation with an Open-Source <fixed-case>SMT</fixed-case> Decoder YuChen - AndreasEisele + AndreasEisele ChristianFedermann EvaHasler MichaelJellinghaus @@ -822,8 +822,8 @@ MatthiasPaulik KayRottmann JanNiehues - SiljaHildebrand - StephanVogel + SiljaHildebrand + StephanVogel 197–202 W07-0727 paulik-etal-2007-isl @@ -840,15 +840,15 @@ The “Noisier Channel”: Translation from Morphologically Complex Languages - Christopher J.Dyer + Christopher J.Dyer 207–211 W07-0729 dyer-2007-noisier <fixed-case>UCB</fixed-case> System Description for the <fixed-case>WMT</fixed-case> 2007 Shared Task - PreslavNakov - MartiHearst + PreslavNakov + MartiHearst 212–215 W07-0730 nakov-hearst-2007-ucb-system @@ -858,14 +858,14 @@ AndreasZollmann AshishVenugopal MatthiasPaulik - StephanVogel + StephanVogel 216–219 W07-0731 zollmann-etal-2007-syntax Statistical Post-Editing on <fixed-case>SYSTRAN</fixed-case>‘s Rule-Based Translation System - LoïcDugast + LoïcDugast JeanSenellart PhilippKoehn 220–223 @@ -882,7 +882,7 @@ <fixed-case>METEOR</fixed-case>: An Automatic Metric for <fixed-case>MT</fixed-case> Evaluation with High Levels of Correlation with Human Judgments - AlonLavie + AlonLavie AbhayaAgarwal 228–231 W07-0734 @@ -890,7 +890,7 @@ <fixed-case>E</fixed-case>nglish-to-<fixed-case>C</fixed-case>zech Factored Machine Translation - OndřejBojar + OndřejBojar 232–239 W07-0735 bojar-2007-english @@ -899,7 +899,7 @@ Sentence Level Machine Translation Evaluation as a Ranking YangYe MingZhou - Chin-YewLin + Chin-YewLin 240–247 W07-0736 ye-etal-2007-sentence @@ -914,8 +914,8 @@ Linguistic Features for Automatic Evaluation of Heterogenous <fixed-case>MT</fixed-case> Systems - JesúsGiménez - LluísMàrquez + JesúsGiménez + LluísMàrquez 256–264 W07-0738 gimenez-marquez-2007-linguistic @@ -939,7 +939,7 @@ <fixed-case>E</fixed-case>lixir<fixed-case>FM</fixed-case> – Implementation of Functional <fixed-case>A</fixed-case>rabic Morphology - OtakarSmrž + OtakarSmrž 1–8 W07-0801 smrz-2007-elixirfm @@ -964,14 +964,14 @@ WalidMagdy KareemDarwish OssamaEmam - HanyHassan + HanyHassan 25–32 W07-0804 magdy-etal-2007-arabic Syllable-Based Speech Recognition for <fixed-case>A</fixed-case>mharic - Solomon TeferraAbate + Solomon TeferraAbate WolfgangMenzel 33–40 W07-0805 @@ -979,10 +979,10 @@ Adapting a Medical speech to speech translation system (<fixed-case>M</fixed-case>ed<fixed-case>SLT</fixed-case>) to <fixed-case>A</fixed-case>rabic - PierretteBouillon + PierretteBouillon SoniaHalimi - MannyRayner - Beth AnnHockey + MannyRayner + Beth AnnHockey 41–48 W07-0806 bouillon-etal-2007-adapting @@ -998,7 +998,7 @@ Can You Tag the Modal? You Should. - YaelNetzer + YaelNetzer MeniAdler DavidGabay MichaelElhadad @@ -1016,14 +1016,14 @@ <fixed-case>A</fixed-case>rabic to <fixed-case>F</fixed-case>rench Sentence Alignment: Exploration of A Cross-language Information Retrieval Approach NasredineSemmar - ChristianFluhr + ChristianFluhr 73–80 W07-0810 semmar-fluhr-2007-arabic An <fixed-case>A</fixed-case>rabic Slot Grammar Parser - MichaelMcCord + MichaelMcCord ViolettaCavalli-Sforza 81–88 W07-0811 @@ -1031,7 +1031,7 @@ Improved <fixed-case>A</fixed-case>rabic Base Phrase Chunking with a new enriched <fixed-case>POS</fixed-case> tag set - MonaDiab + MonaDiab 89–96 W07-0812 diab-2007-improved @@ -1039,7 +1039,7 @@ Smoothing a Lexicon-based <fixed-case>POS</fixed-case> Tagger for <fixed-case>A</fixed-case>rabic and <fixed-case>H</fixed-case>ebrew SaibManour - KhalilSima’an + KhalilSima’an YoadWinter 97–103 W07-0813 @@ -1059,7 +1059,7 @@ Proceedings of the Workshop on Language Technology for Cultural Heritage Data (LaTeCH 2007). W07-09 CarolineSporleder - Antalvan den Bosch + Antalvan den Bosch ClaireGrover Association for Computational Linguistics
Prague, Czech Republic
@@ -1082,9 +1082,9 @@
<fixed-case>V</fixed-case>iterbi Based Alignment between Text Images and their Transcripts - Alejandro H.Toselli + Alejandro H.Toselli VerónicaRomero - EnriqueVidal + EnriqueVidal 9–16 W07-0902 toselli-etal-2007-viterbi @@ -1099,7 +1099,7 @@ Concept Disambiguation for Improved Subject Access Using Multiple Knowledge Sources TandeepSidhu - JudithKlavans + JudithKlavans JimmyLin 25–32 W07-0904 @@ -1108,7 +1108,7 @@ The <fixed-case>L</fixed-case>atin Dependency Treebank in a Cultural Heritage Digital Library DavidBamman - GregoryCrane + GregoryCrane 33–40 W07-0905 bamman-crane-2007-latin @@ -1123,7 +1123,7 @@ Dynamic Path Prediction and Recommendation in a Museum Environment KarlGrieser - TimothyBaldwin + TimothyBaldwin StevenBird 49–56 W07-0907 @@ -1134,7 +1134,7 @@ VéroniqueMalaisé AntoineIsaac LuitGazendam - HennieBrugman + HennieBrugman 57–64 W07-0908 malaise-etal-2007-anchoring @@ -1143,7 +1143,7 @@ Cross Lingual and Semantic Retrieval for Cultural Heritage Appreciation IdanSzpektor IdoDagan - AlonLavie + AlonLavie DannyShacham ShulyWintner 65–72 @@ -1162,8 +1162,8 @@ Multilingual Search for Cultural Heritage Archives via Combining Multiple Translation Resources - Gareth J. F.Jones - YingZhang + Gareth J. F.Jones + YingZhang EamonnNewman FabioFantino FrancaDebole @@ -1173,7 +1173,7 @@ Invited Talk: Lessons from the <fixed-case>MALACH</fixed-case> Project: Applying New Technologies to Improve Intellectual Access to Large Oral History Collections - Douglas W.Oard + Douglas W.Oard 89 W07-0912 oard-2007-invited @@ -1183,11 +1183,11 @@ Biological, translational, and clinical language processing W07-10 - K. BretonnelCohen + K. BretonnelCohen DinaDemner-Fushman - CarolFriedman - LynetteHirschman - JohnPestian + CarolFriedman + LynetteHirschman + JohnPestian Association for Computational Linguistics
Prague, Czech Republic
June @@ -1209,9 +1209,9 @@
Determining the Syntactic Structure of Medical Terms in Clinical Notes - BridgetMcInnes + BridgetMcInnes TedPedersen - SergueiPakhomov + SergueiPakhomov 9–16 W07-1002 mcinnes-etal-2007-determining @@ -1241,7 +1241,7 @@ An Unsupervised Method for Extracting Domain-specific Affixes in Biological Literature HaibinLiu ChristianBlouin - VladoKešelj + VladoKešelj 33–40 W07-1005 liu-etal-2007-unsupervised @@ -1257,7 +1257,7 @@ Mining a Lexicon of Technical Terms and Lay Equivalents - NoemieElhadad + NoemieElhadad KomalSutaria 49–56 W07-1007 @@ -1265,8 +1265,8 @@ Annotation of Chemical Named Entities - PeterCorbett - ColinBatchelor + PeterCorbett + ColinBatchelor SimoneTeufel 57–64 W07-1008 @@ -1274,7 +1274,7 @@ Recognising Nested Named Entities in Biomedical Text - BeatriceAlex + BeatriceAlex BarryHaddow ClaireGrover 65–72 @@ -1283,7 +1283,7 @@ Exploring the Efficacy of Caption Search for Bioscience Journal Search Interfaces - MartiHearst + MartiHearst AnnaDivoli YeJerry MichaelWooldridge @@ -1293,7 +1293,7 @@ <fixed-case>C</fixed-case>on<fixed-case>T</fixed-case>ext: An Algorithm for Identifying Contextual Features from Clinical Text - WendyChapman + WendyChapman JohnDowling DavidChu 81–88 @@ -1304,8 +1304,8 @@ <fixed-case>B</fixed-case>io<fixed-case>N</fixed-case>oculars: Extracting Protein-Protein Interactions from Biomedical Text AmgadMadkour KareemDarwish - HanyHassan - AhmedHassan + HanyHassan + AhmedHassan OssamaEmam 89–96 W07-1012 @@ -1326,13 +1326,13 @@ From indexing the biomedical literature to coding clinical text: experience with <fixed-case>MTI</fixed-case> and machine learning approaches - Alan R.Aronson + Alan R.Aronson OlivierBodenreider DinaDemner-Fushman Kin WahFung - Vivian K.Lee - James G.Mork - AurélieNévéol + Vivian K.Lee + James G.Mork + AurélieNévéol LeePeters Willie J.Rogers 105–112 @@ -1349,7 +1349,7 @@ A Study of Structured Clinical Abstracts and the Semantic Classification of Sentences - GraceChung + GraceChung EnricoCoiera 121–128 W07-1016 @@ -1360,7 +1360,7 @@ KobyCrammer MarkDredze KuzmanGanchev - ParthaPratim Talukdar + ParthaPratim Talukdar StevenCarroll 129–136 W07-1017 @@ -1370,9 +1370,9 @@ Interpreting comparative constructions in biomedical text MarceloFiszman DinaDemner-Fushman - Francois M.Lang + Francois M.Lang PhilipGoetz - Thomas C.Rindflesch + Thomas C.Rindflesch 137–144 W07-1018 fiszman-etal-2007-interpreting @@ -1410,16 +1410,16 @@ Challenges for extracting biomedical knowledge from full text TaraMcIntosh - James R.Curran + James R.Curran 171–178 W07-1023 mcintosh-curran-2007-challenges Adaptation of <fixed-case>POS</fixed-case> Tagging for Multiple <fixed-case>B</fixed-case>io<fixed-case>M</fixed-case>edical Domains - John E.Miller + John E.Miller ManabuTorii - K.Vijay-Shanker + K.Vijay-Shanker 179–180 W07-1024 miller-etal-2007-adaptation @@ -1427,23 +1427,23 @@ Information Extraction from Patients’ Free Form Documentation AgnieszkaMykowiecka - MalgorzataMarciniak + MalgorzataMarciniak 181–182 W07-1025 mykowiecka-marciniak-2007-information Automatic Indexing of Specialized Documents: Using Generic vs. Domain-Specific Document Representations - AurélieNévéol - James G.Mork - Alan R.Aronson + AurélieNévéol + James G.Mork + Alan R.Aronson 183–190 W07-1026 neveol-etal-2007-automatic Developing Feature Types for Classifying Clinical Notes - JonPatrick + JonPatrick YitaoZhang YefengWang 191–192 @@ -1460,8 +1460,8 @@ Discovering contradicting protein-protein interactions in text - OliviaSanchez - MassimoPoesio + OliviaSanchez + MassimoPoesio 195–196 W07-1029 sanchez-poesio-2007-discovering @@ -1469,7 +1469,7 @@ Marking time in developmental biology GailSinclair - BonnieWebber + BonnieWebber 197–198 W07-1030 sinclair-webber-2007-marking @@ -1491,7 +1491,7 @@ Reranking for Biomedical Named-Entity Recognition KazuhiroYoshida - Jun’ichiTsujii + Jun’ichiTsujii 209–216 W07-1033 yoshida-tsujii-2007-reranking @@ -1501,7 +1501,7 @@ Proceedings of the Workshop on A Broader Perspective on Multiword Expressions W07-11 - NicoleGregoire + NicoleGregoire StefanEvert Su NamKim Association for Computational Linguistics @@ -1538,15 +1538,15 @@ Semantics-based Multiword Expression Extraction - TimVan de Cruys - BegoñaVillada Moirón + TimVan de Cruys + BegoñaVillada Moirón 25–32 W07-1104 van-de-cruys-villada-moiron-2007-semantics <fixed-case>S</fixed-case>panish Adverbial Frozen Expressions - DolorsCatalà + DolorsCatalà JorgeBaptista 33–40 W07-1105 @@ -1583,7 +1583,7 @@ TakaoShime MasatoshiTsuchiya SuguruMatsuyoshi - SatoshiSato + SatoshiSato 65–72 W07-1109 utsuro-etal-2007-learning @@ -1602,11 +1602,11 @@ ACL 2007 Workshop on Deep Linguistic Processing W07-12 - TimothyBaldwin + TimothyBaldwin MarkDras JuliaHockenmaier - Tracy HollowayKing - Gertjanvan Noord + Tracy HollowayKing + Gertjanvan Noord Association for Computational Linguistics
Prague, Czech Republic
June @@ -1620,7 +1620,7 @@ Multi-Component <fixed-case>T</fixed-case>ree <fixed-case>A</fixed-case>djoining <fixed-case>G</fixed-case>rammars, Dependency Graph Models, and Linguistic Analyses JoanChen-Main - AravindJoshi + AravindJoshi 1–8 W07-1201 chen-main-joshi-2007-multi @@ -1628,7 +1628,7 @@ Perceptron Training for a Wide-Coverage Lexicalized-Grammar Parser StephenClark - JamesCurran + JamesCurran 9–16 W07-1202 clark-curran-2007-perceptron @@ -1660,25 +1660,25 @@ Question Answering based on Semantic Roles - MichaelKaisser - BonnieWebber + MichaelKaisser + BonnieWebber 41–48 W07-1206 kaisser-webber-2007-question Deep Linguistic Processing for Spoken Dialogue Systems - JamesAllen - MyroslavaDzikovska + JamesAllen + MyroslavaDzikovska MehdiManshadi - MarySwift + MarySwift 49–56 W07-1207 allen-etal-2007-deep Self- or Pre-Tuning? Deep Linguistic Processing of Language Variants - AntónioBranco + AntónioBranco FranciscoCosta 57–64 W07-1208 @@ -1688,7 +1688,7 @@ Pruning the Search Space of a Hand-Crafted Parsing System with a Probabilistic Parser AoifeCahill Tracy HollowayKing - John T.Maxwell III + John T.Maxwell III 65–72 W07-1209 cahill-etal-2007-pruning @@ -1702,7 +1702,7 @@ A Task-based Comparison of Information Extraction Pattern Models - MarkGreenwood + MarkGreenwood MarkStevenson 81–88 W07-1211 @@ -1711,7 +1711,7 @@ Creating a Systemic Functional Grammar Corpus from the <fixed-case>P</fixed-case>enn <fixed-case>T</fixed-case>reebank MatthewHonnibal - James R.Curran + James R.Curran 89–96 W07-1212 honnibal-curran-2007-creating @@ -1728,8 +1728,8 @@ The <fixed-case>S</fixed-case>panish Resource Grammar: Pre-processing Strategy and Lexical Acquisition - MontserratMarimon - NúriaBel + MontserratMarimon + NúriaBel SergioEspeja NataliaSeghezzi 105–111 @@ -1739,14 +1739,14 @@ Extracting a Verb Lexicon for Deep Parsing from <fixed-case>F</fixed-case>rame<fixed-case>N</fixed-case>et MarkMcConville - Myroslava O.Dzikovska + Myroslava O.Dzikovska 112–119 W07-1215 mcconville-dzikovska-2007-extracting Fips, A “Deep” Linguistic Multilingual Parser - EricWehrli + EricWehrli 120–127 W07-1216 wehrli-2007-fips @@ -1762,7 +1762,7 @@ Validation and Regression Testing for a Cross-linguistic Grammar Resource - Emily M.Bender + Emily M.Bender LauriePoulson ScottDrellishak ChrisEvans @@ -1792,7 +1792,7 @@ Proceedings of Ninth Meeting of the ACL Special Interest Group in Computational Morphology and Phonology W07-13 JohnNerbonne - T. MarkEllison + T. MarkEllison GrzegorzKondrak Association for Computational Linguistics
Prague, Czech Republic
@@ -1846,7 +1846,7 @@
Can Corpus Based Measures be Used for Comparative Study of Languages? - Anil KumarSingh + Anil KumarSingh HarshitSurana 40–47 W07-1306 @@ -1922,9 +1922,9 @@ <fixed-case>P</fixed-case>ara<fixed-case>M</fixed-case>or: Minimally Supervised Induction of Paradigm Structure and Morphological Analysis ChristianMonson - JaimeCarbonell - AlonLavie - LoriLevin + JaimeCarbonell + AlonLavie + LoriLevin 117–125 W07-1315 monson-etal-2007-paramor @@ -1954,9 +1954,9 @@ SatoshiSekine KentaroInui IdoDagan - BillDolan + BillDolan DaniloGiampiccolo - BernardoMagnini + BernardoMagnini Association for Computational Linguistics
Prague
June @@ -1989,13 +1989,13 @@
Precision-focused Textual Inference - DanielBobrow + DanielBobrow DickCrouch - Tracy HollowayKing + Tracy HollowayKing CleoCondoravdi LauriKarttunen RowanNairn - Valeriade Paiva + Valeriade Paiva AnnieZaenen 16–21 W07-1403 @@ -2004,15 +2004,15 @@ <fixed-case>COGEX</fixed-case> at <fixed-case>RTE</fixed-case> 3 MartaTatu - DanMoldovan + DanMoldovan 22–27 W07-1404 tatu-moldovan-2007-cogex A Corpus of Fine-Grained Entailment Relations - Rodney D.Nielsen - WayneWard + Rodney D.Nielsen + WayneWard 28–35 W07-1405 nielsen-ward-2007-corpus @@ -2020,7 +2020,7 @@ Recognizing Textual Entailment Using Sentence Similarity based on Dependency Tree Skeletons RuiWang - GünterNeumann + GünterNeumann 36–41 W07-1406 wang-neumann-2007-recognizing @@ -2035,9 +2035,9 @@ Entailment and Anaphora Resolution in <fixed-case>RTE</fixed-case>3 - RodolfoDelmonte + RodolfoDelmonte AntonellaBristot - Marco AldoPiccolino Boniforti + Marco AldoPiccolino Boniforti SaraTonelli 48–53 W07-1408 @@ -2046,10 +2046,10 @@ On the Role of Lexical and World Knowledge in <fixed-case>RTE</fixed-case>3 PeterClark - PhilHarrison + PhilHarrison JohnThompson - WilliamMurray - JerryHobbs + WilliamMurray + JerryHobbs ChristianeFellbaum 54–59 W07-1409 @@ -2057,25 +2057,25 @@ Machine Learning with Semantic-Based Distances Between Sentences for Textual Entailment - DanielFerrés - HoracioRodríguez + DanielFerrés + HoracioRodríguez 60–65 W07-1410 ferres-rodriguez-2007-machine A Perspective-Based Approach for Solving Textual Entailment Recognition - ÓscarFerrández + ÓscarFerrández DanielMicol - RafaelMuñoz - ManuelPalomar + RafaelMuñoz + ManuelPalomar 66–71 W07-1411 ferrandez-etal-2007-perspective Shallow Semantic in Fast Textual Entailment Rule Learners - Fabio MassimoZanzotto + Fabio MassimoZanzotto MarcoPennacchiotti AlessandroMoschitti 72–77 @@ -2085,11 +2085,11 @@ Combining Lexical-Syntactic Information with Machine Learning for Recognizing Textual Entailment ArturoMontejo-Ráez - Jose ManuelPerea - FernandoMartínez-Santiago - Miguel ÁngelGarcía-Cumbreras - MaiteMartín-Valdivia - AlfonsoUreña-López + Jose ManuelPerea + FernandoMartínez-Santiago + Miguel ÁngelGarcía-Cumbreras + MaiteMartín-Valdivia + AlfonsoUreña-López 78–82 W07-1413 montejo-raez-etal-2007-combining @@ -2097,7 +2097,7 @@ Dependency-based paraphrasing for recognizing textual entailment ErwinMarsi - EmielKrahmer + EmielKrahmer WauterBosma 83–88 W07-1414 @@ -2105,8 +2105,8 @@ Experiments of <fixed-case>UNED</fixed-case> at the Third Recognising Textual Entailment Challenge - ÁlvaroRodrigo - AnselmoPeñas + ÁlvaroRodrigo + AnselmoPeñas JesúsHerrera FelisaVerdejo 89–94 @@ -2137,9 +2137,9 @@ <fixed-case>SVO</fixed-case> triple based Latent Semantic Analysis for recognising textual entailment - GastonBurek + GastonBurek ChristianPietsch - AnneDe Roeck + AnneDe Roeck 113–118 W07-1419 burek-etal-2007-svo @@ -2149,7 +2149,7 @@ RodAdams GabrielNicolae CristinaNicolae - SandaHarabagiu + SandaHarabagiu 119–124 W07-1420 adams-etal-2007-textual @@ -2157,7 +2157,7 @@ Hypothesis Transformation and Semantic Variability Rules Used in Recognizing Textual Entailment AdrianIftene - AlexandraBalahur-Dobrescu + AlexandraBalahur-Dobrescu 125–130 W07-1421 iftene-balahur-dobrescu-2007-hypothesis @@ -2182,7 +2182,7 @@ <fixed-case>M</fixed-case>utaphrase: Paraphrasing with <fixed-case>F</fixed-case>rame<fixed-case>N</fixed-case>et - MichaelEllsworth + MichaelEllsworth AdamJanin 143–150 W07-1424 @@ -2193,7 +2193,7 @@ AtsushiFujita ShuheiKato NaokiKato - SatoshiSato + SatoshiSato 151–158 W07-1425 fujita-etal-2007-compositional @@ -2210,16 +2210,16 @@ Learning Alignments and Leveraging Natural Logic - NathanaelChambers + NathanaelChambers DanielCer TrondGrenager DavidHall - ChloeKiddon + ChloeKiddon BillMacCartney - Marie-Catherinede Marneffe + Marie-Catherinede Marneffe DanielRamage EricYeh - Christopher D.Manning + Christopher D.Manning 165–170 W07-1427 chambers-etal-2007-learning @@ -2234,8 +2234,8 @@ Biology Based Alignments of Paraphrases for Sentence Compression - JoãoCordeiro - GäelDias + JoãoCordeiro + GäelDias GuillaumeCleuziou 177–184 W07-1429 @@ -2252,7 +2252,7 @@ Natural Logic for Textual Inference BillMacCartney - Christopher D.Manning + Christopher D.Manning 193–200 W07-1431 maccartney-manning-2007-natural @@ -2262,13 +2262,13 @@ Proceedings of the Linguistic Annotation Workshop W07-15 - BranimirBoguraev - NancyIde - AdamMeyers + BranimirBoguraev + NancyIde + AdamMeyers ShigekoNariyama ManfredStede - JanyceWiebe - GrahamWilcock + JanyceWiebe + GrahamWilcock Association for Computational Linguistics
Prague, Czech Republic
June @@ -2306,7 +2306,7 @@
Assocating Facial Displays with Syntactic Constituents for Generation - Mary EllenFoster + Mary EllenFoster 25–32 W07-1504 foster-2007-assocating @@ -2316,8 +2316,8 @@ UdoHahn EkaterinaBuyko KatrinTomanek - ScottPiao - JohnMcNaught + ScottPiao + JohnMcNaught YoshimasaTsuruoka SophiaAnaniadou 33–40 @@ -2334,7 +2334,7 @@ Usage of <fixed-case>XSL</fixed-case> Stylesheets for the Annotation of the <fixed-case>S</fixed-case>ámi Language Corpora. SaaraHuhmarniemi - Sjur N.Moshagen + Sjur N.Moshagen TrondTrosterud 45–48 W07-1507 @@ -2344,11 +2344,11 @@ Criteria for the Manual Grouping of Verb Senses Cecily JillDuffield Jena D.Hwang - Susan WindischBrown + Susan WindischBrown DmitriyDligach - Sarah E.Vieweg + Sarah E.Vieweg JennyDavis - MarthaPalmer + MarthaPalmer 49–52 W07-1508 duffield-etal-2007-criteria @@ -2359,7 +2359,7 @@ FernandoPereira MarkMandel StevenCarroll - PeterWhite + PeterWhite 53–56 W07-1509 ganchev-etal-2007-semi @@ -2374,8 +2374,8 @@ Annotating <fixed-case>C</fixed-case>hinese Collocations with Multi Information RuifengXu - QinLu - Kam-FaiWong + QinLu + Kam-FaiWong WenjieLi 61–68 W07-1511 @@ -2410,14 +2410,14 @@ Annotating Expressions of Appraisal in <fixed-case>E</fixed-case>nglish JonathonRead DavidHope - JohnCarroll + JohnCarroll 93–100 W07-1515 read-etal-2007-annotating Active Learning for Part-of-Speech Tagging: Accelerating Corpus Annotation - EricRingger + EricRingger PeterMcClanahan RobbieHaertel GeorgeBusby @@ -2433,7 +2433,7 @@ Combining Independent Syntactic and Semantic Annotation Schemes MarcVerhagen AmberStubbs - JamesPustejovsky + JamesPustejovsky 109–112 W07-1517 verhagen-etal-2007-combining @@ -2447,7 +2447,7 @@ <fixed-case>ITU</fixed-case> Treebank Annotation Tool - GülşenEryiǧit + GülşenEryiǧit 117–120 W07-1519 eryigit-2007-itu @@ -2474,7 +2474,7 @@ RyuIida MamoruKomachi KentaroInui - YujiMatsumoto + YujiMatsumoto 132–139 W07-1522 iida-etal-2007-annotating @@ -2492,10 +2492,10 @@ Standoff Coordination for Multi-Tool Annotation in a Dialogue Corpus - Kepa JosebaRodríguez + Kepa JosebaRodríguez StefanieDipper MichaelGötze - MassimoPoesio + MassimoPoesio GiuseppeRiccardi ChristianRaymond JoannaRabiega-Wiśniewska @@ -2520,7 +2520,7 @@ Experiments with an Annotation Scheme for a Knowledge-rich Noun Phrase Interpretation System - RoxanaGirju + RoxanaGirju 168–175 W07-1527 girju-2007-experiments @@ -2547,10 +2547,10 @@ Discourse Annotation Working Group Report ManfredStede JanyceWiebe - EvaHajičová + EvaHajičová BrianReese SimoneTeufel - BonnieWebber + BonnieWebber TheresaWilson 191–196 W07-1530 @@ -2561,8 +2561,8 @@ Proceedings of the Fourth ACL-SIGSEM Workshop on Prepositions W07-16 - FintanCostello - JohnKelleher + FintanCostello + JohnKelleher MartinVolk Association for Computational Linguistics
Prague, Czech Republic
@@ -2584,7 +2584,7 @@ Landmark Classification for Route Directions AidanFurlan - TimothyBaldwin + TimothyBaldwin AlexKlippel 9–16 W07-1602 @@ -2600,8 +2600,8 @@ Detection of Grammatical Errors Involving Prepositions - MartinChodorow - JoelTetreault + MartinChodorow + JoelTetreault Na-RaeHan 25–30 W07-1604 @@ -2628,7 +2628,7 @@ Automatically Acquiring Models of Preposition Use RacheleDe Felice - StephenPulman + StephenPulman 45–50 W07-1607 de-felice-pulman-2007-automatically @@ -2636,7 +2636,7 @@ Simple Preposition Correspondence: A Problem in <fixed-case>E</fixed-case>nglish to <fixed-case>I</fixed-case>ndian Language Machine Translation SamarHusain - Dipti MisraSharma + Dipti MisraSharma ManoharReddy 51–58 W07-1608 @@ -2648,7 +2648,7 @@ Proceedings of the Workshop on Balto-Slavonic Natural Language Processing W07-17 JakubPiskorski - HristoTanev + HristoTanev Association for Computational Linguistics
Prague, Czech Republic
June @@ -2677,8 +2677,8 @@ A Language Independent Approach for Name Categorization and Discrimination ZornitsaKozareva - SoniaVázquez - AndrésMontoyo + SoniaVázquez + AndrésMontoyo 19–26 W07-1703 kozareva-etal-2007-language @@ -2687,14 +2687,14 @@ Lemmatization of <fixed-case>P</fixed-case>olish Person Names JakubPiskorski MarcinSydow - AnnaKupść + AnnaKupść 27–34 W07-1704 piskorski-etal-2007-lemmatization Automatic Processing of Diabetic Patients’ Hospital Documentation - MałgorzataMarciniak + MałgorzataMarciniak AgnieszkaMykowiecka 35–42 W07-1705 @@ -2705,10 +2705,10 @@ AdamPrzepiórkowski ŁukaszDegórski MiroslavSpousta - KirilSimov + KirilSimov PetyaOsenova LotharLemnitzer - VladislavKuboň + VladislavKuboň BeataWójtowicz 43–50 W07-1706 @@ -2732,11 +2732,11 @@ The Best of Two Worlds: Cooperation of Statistical and Rule-Based Taggers for <fixed-case>C</fixed-case>zech - Drahomíra “johanka”Spoustová - JanHajič + Drahomíra “johanka”Spoustová + JanHajič JanVotrubec PavelKrbec - PavelKvětoň + PavelKvětoň 67–74 W07-1709 spoustova-etal-2007-best @@ -2752,7 +2752,7 @@ Multilingual Word Sense Discrimination: A Comparative Cross-Linguistic Study AllaRozovskaya - RichardSproat + RichardSproat 82–87 W07-1711 rozovskaya-sproat-2007-multilingual @@ -2768,7 +2768,7 @@ Morphological Annotation of the <fixed-case>L</fixed-case>ithuanian Corpus ErikaRimkutė - VidasDaudaravičius + VidasDaudaravičius AndriusUtka 94–99 W07-1713 @@ -2779,8 +2779,8 @@ Proceedings of the Workshop on Grammar-Based Approaches to Spoken Language Processing W07-18 - PierretteBouillon - MannyRayner + PierretteBouillon + MannyRayner Association for Computational Linguistics
Prague, Czech Republic
June @@ -2800,7 +2800,7 @@
Converting Grammatical Framework to Regulus - PeterLjunglöf + PeterLjunglöf 9–16 W07-1802 ljunglof-2007-converting @@ -2825,7 +2825,7 @@ TimPaek SudeepGandhe MaxChickering - Yun ChengJu + Yun ChengJu 33–40 W07-1805 paek-etal-2007-handling @@ -2839,7 +2839,7 @@ MarianneSantaholma NikosTsourakis MannyRayner - Beth AnnHockey + Beth AnnHockey 41–48 W07-1806 bouillon-etal-2007-bidirectional @@ -2873,7 +2873,7 @@ Comparing Rule-Based and Data-Driven Selection of Facial Displays - Mary EllenFoster + Mary EllenFoster 1–8 W07-1901 foster-2007-comparing @@ -2896,7 +2896,7 @@ Which Way to Turn? Guide Orientation in Virtual Way Finding MarkEvers - MariëtTheune + MariëtTheune JoyceKarreman 25–32 W07-1904 @@ -2908,7 +2908,7 @@ BeatrizLópez DavidDíaz RubénFernández - LuisHernández + LuisHernández JavierCaminero 33–40 W07-1905 @@ -2936,7 +2936,7 @@ Dynamic Movement and Positioning of Embodied Agents in Multiparty Conversations DušanJan - DavidTraum + DavidTraum 59–66 W07-1908 jan-traum-2007-dynamic @@ -2947,7 +2947,7 @@ ÁlvaroHernández DavidDíaz RubénFernández - LuisHernández + LuisHernández DoroteoTorre 67–74 W07-1909 @@ -2958,7 +2958,7 @@ Proceedings of the Tenth International Conference on Parsing Technologies W07-22 - HarryBunt + HarryBunt PaolaMerlo Association for Computational Linguistics
Prague, Czech Republic
@@ -2972,7 +2972,7 @@ Using Self-Trained Bilexical Preferences to Improve Disambiguation Accuracy - Gertjanvan Noord + Gertjanvan Noord 1–10 W07-2201 van-noord-2007-using @@ -2981,7 +2981,7 @@ Evaluating Impact of Re-training a Lexical Disambiguation Model on Domain Adaptation of an <fixed-case>HPSG</fixed-case> Parser TadayoshiHara YusukeMiyao - Jun’ichiTsujii + Jun’ichiTsujii 11–22 W07-2202 hara-etal-2007-evaluating @@ -2989,8 +2989,8 @@ Semi-supervised Training of a Statistical Parser from Unlabeled Partially-bracketed Data RebeccaWatson - TedBriscoe - JohnCarroll + TedBriscoe + JohnCarroll 23–32 W07-2203 watson-etal-2007-semi @@ -2999,19 +2999,19 @@ Adapting <fixed-case>WSJ</fixed-case>-Trained Parsers to the <fixed-case>B</fixed-case>ritish <fixed-case>N</fixed-case>ational <fixed-case>C</fixed-case>orpus using In-Domain Self-Training JenniferFoster JoachimWagner - DjaméSeddah - Josefvan Genabith + DjaméSeddah + Josefvan Genabith 33–35 W07-2204 foster-etal-2007-adapting The Impact of Deep Linguistic Processing on Parsing Technology - TimothyBaldwin + TimothyBaldwin MarkDras JuliaHockenmaier - Tracy HollowayKing - Gertjanvan Noord + Tracy HollowayKing + Gertjanvan Noord 36–38 W07-2205 baldwin-etal-2007-impact @@ -3019,7 +3019,7 @@ Improving the Efficiency of a Wide-Coverage <fixed-case>CCG</fixed-case> Parser BojanDjordjevic - JamesCurran + JamesCurran StephenClark 39–47 W07-2206 @@ -3029,7 +3029,7 @@ Efficiency in Unification-Based N-Best Parsing YiZhang StephanOepen - JohnCarroll + JohnCarroll 48–59 W07-2207 zhang-etal-2007-efficiency @@ -3039,14 +3039,14 @@ TakashiNinomiya TakuyaMatsuzaki YusukeMiyao - Jun’ichiTsujii + Jun’ichiTsujii 60–68 W07-2208 ninomiya-etal-2007-log Ambiguity Resolution by Reordering Rules in Text Containing Errors - SylvanaSofkova Hashemi + SylvanaSofkova Hashemi 69–79 W07-2209 sofkova-hashemi-2007-ambiguity @@ -3060,14 +3060,14 @@ Symbolic Preference Using Simple Scoring - PaulaNewman + PaulaNewman 83–92 W07-2211 newman-2007-symbolic Synchronous Grammars and Transducers: Good News and Bad News - StuartShieber + StuartShieber 93 W07-2212 shieber-2007-synchronous @@ -3075,14 +3075,14 @@ Are Very Large Context-Free Grammars Tractable? PierreBoullier - BenoîtSagot + BenoîtSagot 94–105 W07-2213 boullier-sagot-2007-large Pomset mcfgs - MichaelPan + MichaelPan 106–108 W07-2214 pan-2007-pomset @@ -3115,7 +3115,7 @@ A Latent Variable Model for Generative Dependency Parsing IvanTitov - JamesHenderson + JamesHenderson 144–155 W07-2218 titov-henderson-2007-latent @@ -3123,7 +3123,7 @@ Three-Dimensional Parametrization for Parsing Morphologically Rich Languages ReutTsarfaty - KhalilSima’an + KhalilSima’an 156–167 W07-2219 tsarfaty-simaan-2007-three @@ -3153,14 +3153,14 @@ Quality of Service and Communicative Competence in <fixed-case>NLG</fixed-case> Evaluation - KristiinaJokinen + KristiinaJokinen 3–6 W07-2301 jokinen-2007-quality Generation of repeated references to discourse entities - AnjaBelz + AnjaBelz SebastianVarges 9–16 W07-2302 @@ -3177,19 +3177,19 @@ Modelling control in generation - RogerEvans - DavidWeir - JohnCarroll - DanielPaiva - AnjaBelz + RogerEvans + DavidWeir + JohnCarroll + DanielPaiva + AnjaBelz 25–32 W07-2304 evans-etal-2007-modelling Avoiding Repetition in Generated Text - Mary EllenFoster - MichaelWhite + Mary EllenFoster + MichaelWhite 33–40 W07-2305 foster-white-2007-avoiding @@ -3206,7 +3206,7 @@ Evaluating algorithms for the Generation of Referring Expressions using a balanced corpus AlbertGatt Ielkavan der Sluis - Keesvan Deemter + Keesvan Deemter 49–56 W07-2307 gatt-etal-2007-evaluating @@ -3214,8 +3214,8 @@ Generating Politeness in Task Based Interaction: An Evaluation of the Effect of Linguistic Form and Culture SwatiGupta - MarilynWalker - DanielaRomano + MarilynWalker + DanielaRomano 57–64 W07-2308 gupta-etal-2007-generating @@ -3233,7 +3233,7 @@ Using <fixed-case>WYSIWYM</fixed-case> to Create an Open-ended Interface for the Semantic Grid FeikjeHielkema - ChrisMellish + ChrisMellish PeterEdwards 69–72 W07-2310 @@ -3249,13 +3249,13 @@ Measuring Variability in Sentence Ordering for News Summarization NitinMadnani - RebeccaPassonneau - Necip FazilAyan - JohnConroy - BonnieDorr - JudithKlavans - DianneO’Leary - JudithSchlesinger + RebeccaPassonneau + Necip FazilAyan + JohnConroy + BonnieDorr + JudithKlavans + DianneO’Leary + JudithSchlesinger 81–88 W07-2312 madnani-etal-2007-measuring @@ -3264,7 +3264,7 @@ Visualising Discourse Structure in Interactive Documents ClaraMancini ChristianPietsch - DoniaScott + DoniaScott 89–92 W07-2313 mancini-etal-2007-visualising @@ -3286,14 +3286,14 @@ An Experiment on “Free Generation” from Single <fixed-case>RDF</fixed-case> Triples XiantangSun - ChrisMellish + ChrisMellish 105–108 W07-2316 sun-mellish-2007-experiment The Narrator: <fixed-case>NLG</fixed-case> for digital storytelling - MariëtTheune + MariëtTheune NandaSlabbers FeikjeHielkema 109–112 @@ -3310,8 +3310,8 @@ Determining tutorial remediation strategies from a corpus of human-human tutoring dialogues - CharlesCallaway - JohannaMoore + CharlesCallaway + JohannaMoore 123–130 W07-2319 callaway-moore-2007-determining @@ -3334,7 +3334,7 @@ Generating Multilingual Descriptions from Linguistically Annotated <fixed-case>OWL</fixed-case> Ontologies: the <fixed-case>N</fixed-case>atural<fixed-case>OWL</fixed-case> System - DimitriosGalanis + DimitriosGalanis IonAndroutsopoulos 143–146 W07-2322 @@ -3360,7 +3360,7 @@ A Comparison of Hedged and Non-hedged <fixed-case>NLG</fixed-case> Texts SaadMahamood EhudReiter - ChrisMellish + ChrisMellish 155–158 W07-2325 mahamood-etal-2007-comparison @@ -3368,7 +3368,7 @@ Cueing the Virtual Storyteller: Analysis of cue phrase usage in fairy tales ManonPenning - MariëtTheune + MariëtTheune 159–162 W07-2326 penning-theune-2007-cueing @@ -3376,7 +3376,7 @@ <fixed-case>A</fixed-case>tlas.txt: Linking Geo-referenced Data to Text for <fixed-case>NLG</fixed-case> KavitaThomas - SomayajuluSripada + SomayajuluSripada 163–166 W07-2327 thomas-sripada-2007-atlas @@ -3396,9 +3396,9 @@ Proceedings of the 16th Nordic Conference of Computational Linguistics (NODALIDA 2007) W07-24 JoakimNivre - Heiki-JaanKaalep + Heiki-JaanKaalep KadriMuischnek - MareKoit + MareKoit University of Tartu, Estonia
Tartu, Estonia
May @@ -3411,14 +3411,14 @@ Invited talk: Evaluating Automatic Approaches for Word Meaning Discovery and Disambiguation using Lexical Substitution - Diana F.McCarthy + Diana F.McCarthy 2–2 W07-2401 mccarthy-2007-invited Invited talk: Text Analysis and Machine Learning for Stylometrics and Stylogenetics - WalterDaelemans + WalterDaelemans 3–3 W07-2402 daelemans-2007-invited @@ -3432,17 +3432,17 @@ Dependency-Based Hybrid Model of Syntactic Analysis for the Languages with a Rather Free Word Order - GuntisBārzdiņš - NormundsGrūzītis - GuntaNešpore - BaibaSaulīte + GuntisBārzdiņš + NormundsGrūzītis + GuntaNešpore + BaibaSaulīte 13–20 W07-2404 barzdins-etal-2007-dependency Using <fixed-case>D</fixed-case>anish as a <fixed-case>CG</fixed-case> Interlingua: A Wide-Coverage <fixed-case>N</fixed-case>orwegian-<fixed-case>E</fixed-case>nglish Machine Translation System - EckhardBick + EckhardBick LarsNygaard 21–28 W07-2405 @@ -3450,9 +3450,9 @@ An Advanced Speech Corpus for <fixed-case>N</fixed-case>orwegian - Janne BondiJohannessen + Janne BondiJohannessen KristinHagen - Joel JamesPriestley + Joel JamesPriestley LarsNygaard 29–36 W07-2406 @@ -3493,7 +3493,7 @@ Development of Text-To-Speech system for <fixed-case>L</fixed-case>atvian KārlisGoba - AndrejsVasiļjevs + AndrejsVasiļjevs 67–72 W07-2411 goba-vasiljevs-2007-development @@ -3548,7 +3548,7 @@ The Extraction of Trajectories from Real Texts Based on Linear Classification HanjingLi - TiejunZhao + TiejunZhao ShengLi JiyuanZhao 121–127 @@ -3558,14 +3558,14 @@ <fixed-case>I</fixed-case>ce<fixed-case>P</fixed-case>arser: An Incremental Finite-State Parser for <fixed-case>I</fixed-case>celandic HrafnLoftsson - EiríkurRögnvaldsson + EiríkurRögnvaldsson 128–135 W07-2419 loftsson-rognvaldsson-2007-iceparser The <fixed-case>S</fixed-case>wedish-<fixed-case>T</fixed-case>urkish Parallel Corpus and Tools for its Creation - BeataMegyesi + BeataMegyesi BengtDahlqvist 136–143 W07-2420 @@ -3573,8 +3573,8 @@ Multivariate Cepstral Feature Compensation on Band-limited Data for Robust Speech Recognition - NicolasMorales - Doroteo T.Toledano + NicolasMorales + Doroteo T.Toledano John H. L.Hansen JavierGarrido 144–151 @@ -3584,7 +3584,7 @@ Theoretically Motivated Treebank Coverage VictoriaRosén - Koenraadde Smedt + Koenraadde Smedt 152–159 W07-2422 rosen-de-smedt-2007-theoretically @@ -3602,8 +3602,8 @@ Comprehension Assistant for Languages of <fixed-case>B</fixed-case>altic States - IngunaSkadiņa - AndrejsVasiļjevs + IngunaSkadiņa + AndrejsVasiļjevs DaigaDeksne RaivisSkadiņš LindaGoldberga @@ -3614,7 +3614,7 @@ Combining Contexts in Lexicon Learning for Semantic Parsing RichardSocher - ChrisBiemann + ChrisBiemann RainerOsswald 175–182 W07-2425 @@ -3622,7 +3622,7 @@ Polynomial Charts For Totally Unordered Languages - AndersSøgaard + AndersSøgaard 183–190 W07-2426 sogaard-2007-polynomial @@ -3639,7 +3639,7 @@ Interview and Delivery: Dialogue Strategies for Conversational Recommender Systems PontusWärnestål LarsDegerstedt - ArneJönsson + ArneJönsson 199–205 W07-2428 warnestal-etal-2007-interview @@ -3668,7 +3668,7 @@ Decomposing <fixed-case>S</fixed-case>wedish Compounds Using Memory-Based Learning - KarinFriberg Heppin + KarinFriberg Heppin 224–230 W07-2432 friberg-heppin-2007-decomposing @@ -3739,8 +3739,8 @@ Posterior Probability Based Confidence Measures Applied to a Children’s Speech Reading Tracking System - DanielBolanos - Wayne H.Ward + DanielBolanos + Wayne H.Ward 274–277 W07-2442 bolanos-ward-2007-posterior @@ -3767,7 +3767,7 @@ Íslenskur Orðasjóður – Building a Large <fixed-case>I</fixed-case>celandic Corpus ErlaHallsteinsdóttir ThomasEckart - ChrisBiemann + ChrisBiemann UweQuasthoff MatthiasRichter 288–291 @@ -3793,7 +3793,7 @@ A <fixed-case>N</fixed-case>orwegian Letter-to-Sound Engine with <fixed-case>D</fixed-case>anish as a Catalyst - Peter JuelHenrichsen + Peter JuelHenrichsen 305–309 W07-2448 henrichsen-2007-norwegian @@ -3832,7 +3832,7 @@ Lexical Parameters, Based on Corpus Analysis of <fixed-case>E</fixed-case>nglish and <fixed-case>S</fixed-case>wedish Cancer Data, of Relevance for <fixed-case>NLG</fixed-case> DimitriosKokkinakis - MariaToporowska Gronostaj + MariaToporowska Gronostaj CatalinaHallett DavidHardcastle 333–336 @@ -3889,7 +3889,7 @@ Evaluating Evaluation Measures InesRehbein - Josefvan Genabith + Josefvan Genabith 372–379 W07-2460 rehbein-van-genabith-2007-evaluating @@ -3915,7 +3915,7 @@ Recreating Humorous Split Compound Errors in <fixed-case>S</fixed-case>wedish by Using Grammaticality JonasSjöbergh - KenjiAraki + KenjiAraki 389–393 W07-2463 sjobergh-araki-2007-recreating @@ -3930,7 +3930,7 @@ Interpretation of Yes/No Questions as Metaphor Recognition TarmoTruu - HaldurÕim + HaldurÕim MareKoit 398–401 W07-2465 diff --git a/data/xml/W08.xml b/data/xml/W08.xml index bdb0037d16..b779be3d60 100644 --- a/data/xml/W08.xml +++ b/data/xml/W08.xml @@ -5,7 +5,7 @@ Proceedings of the 9th SIGdial Workshop on Discourse and Dialogue W08-01 DavidSchlangen - Beth AnnHockey + Beth AnnHockey Association for Computational Linguistics
Columbus, Ohio
June @@ -33,10 +33,10 @@
Learning N-Best Correction Models from Implicit User Feedback in a Multi-Modal Local Search Application - DanBohus + DanBohus XiaoLi PatrickNguyen - GeoffreyZweig + GeoffreyZweig 21–28 W08-0103 bohus-etal-2008-learning @@ -44,7 +44,7 @@ Agreement and Disputes in Dialogue AlexLascarides - NicholasAsher + NicholasAsher 29–36 W08-0104 lascarides-asher-2008-agreement @@ -61,7 +61,7 @@ Semantic negotiation in dialogue: the mechanisms of alignment GregoryMills - PatHealey + PatHealey 46–53 W08-0106 mills-healey-2008-semantic @@ -69,7 +69,7 @@ Degrees of Grounding Based on Evidence of Understanding AntonioRoque - DavidTraum + DavidTraum 54–63 W08-0107 roque-traum-2008-degrees @@ -143,7 +143,7 @@ Quantifying Ellipsis in Dialogue: an index of mutual understanding MarcusColman ArashEshghi - PatHealey + PatHealey 96–99 W08-0116 colman-etal-2008-quantifying @@ -154,7 +154,7 @@ YosukeMatsusaka YasuharuDen MikaEnomoto - MasatoIshizaki + MasatoIshizaki KatsuyaTakanashi 100–103 W08-0117 @@ -164,20 +164,20 @@ Optimal Dialog in Consumer-Rating Systems using <fixed-case>POMDP</fixed-case> Framework ZhifeiLi PatrickNguyen - GeoffreyZweig + GeoffreyZweig 104–111 W08-0118 li-etal-2008-optimal Training and Evaluation of the <fixed-case>HIS</fixed-case> <fixed-case>POMDP</fixed-case> Dialogue System in Noise - MilicaGašić + MilicaGašić SimonKeizer - FrancoisMairesse + FrancoisMairesse JostSchatzmann BlaiseThomson KaiYu - SteveYoung + SteveYoung 112–119 W08-0119 gasic-etal-2008-training @@ -186,15 +186,15 @@ A Frame-Based Probabilistic Framework for Spoken Dialog Management Using Dialog Examples KyungdukKim CheongjaeLee - SangkeunJung - Gary GeunbaeLee + SangkeunJung + Gary GeunbaeLee 120–127 W08-0120 kim-etal-2008-frame Speaking More Like You: Lexical, Acoustic/Prosodic, and Discourse Entrainment in Spoken Dialogue Systems - JuliaHirschberg + JuliaHirschberg 128 W08-0121 hirschberg-2008-speaking @@ -203,7 +203,7 @@ Discourse Level Opinion Relations: An Annotation Study SwapnaSomasundaran JosefRuppenhofer - JanyceWiebe + JanyceWiebe 129–137 W08-0122 somasundaran-etal-2008-discourse-level @@ -220,7 +220,7 @@ Modeling Vocal Interaction for Text-Independent Participant Characterization in Multi-Party Conversation KornelLaskowski - MariOstendorf + MariOstendorf TanjaSchultz 148–155 W08-0124 @@ -228,7 +228,7 @@ Modelling and Detecting Decisions in Multi-party Dialogue - RaquelFernández + RaquelFernández MatthewFrampton PatrickEhlen MatthewPurver @@ -248,7 +248,7 @@ Evaluation Understudy for Dialogue Coherence Models SudeepGandhe - DavidTraum + DavidTraum 172–181 W08-0127 gandhe-traum-2008-evaluation @@ -256,14 +256,14 @@ A Framework for Model-based Evaluation of Spoken Dialog Systems SebastianMöller - NigelWard + NigelWard 182–189 W08-0128 moller-ward-2008-framework The Effect of Dialogue System Output Style Variation on Users’ Evaluation Judgments and Input Style - IvanaKruijff-Korbayová + IvanaKruijff-Korbayová OlgaKukina 190–197 W08-0129 @@ -272,7 +272,7 @@ Making Grammar-Based Generation Easier to Deploy in Dialogue Systems DavidDeVault - DavidTraum + DavidTraum RonArtstein 198–207 W08-0130 @@ -283,7 +283,7 @@ Proceedings of the Third Workshop on Issues in Teaching Computational Linguistics W08-02 - MarthaPalmer + MarthaPalmer ChrisBrew FeiXia Association for Computational Linguistics @@ -306,7 +306,7 @@ Building a Flexible, Collaborative, Intensive Master’s Program in Computational Linguistics - Emily M.Bender + Emily M.Bender FeiXia ErikBansleben 10–18 @@ -329,7 +329,7 @@ Strategies for Teaching “Mixed” Computational Linguistics Classes - EricFosler-Lussier + EricFosler-Lussier 36–44 W08-0205 fosler-lussier-2008-strategies @@ -361,14 +361,14 @@ Combining Open-Source with Research to Re-engineer a Hands-on Introductory <fixed-case>NLP</fixed-case> Course NitinMadnani - Bonnie J.Dorr + Bonnie J.Dorr 71–79 W08-0209 madnani-dorr-2008-combining Zero to Spoken Dialogue System in One Quarter: Teaching Computational Linguistics to Linguists Using Regulus - Beth AnnHockey + Beth AnnHockey GwenChristian 80–86 W08-0210 @@ -376,8 +376,8 @@ The <fixed-case>N</fixed-case>orth <fixed-case>A</fixed-case>merican Computational Linguistics Olympiad (<fixed-case>NACLO</fixed-case>) - Dragomir R.Radev - LoriLevin + Dragomir R.Radev + LoriLevin Thomas E.Payne 87–96 W08-0211 @@ -385,15 +385,15 @@ Competitive Grammar Writing - JasonEisner - Noah A.Smith + JasonEisner + Noah A.Smith 97–105 W08-0212 eisner-smith-2008-competitive Studying Discourse and Dialogue with <fixed-case>SIDG</fixed-case>rid - Gina-AnneLevow + Gina-AnneLevow 106–113 W08-0213 levow-2008-studying @@ -428,7 +428,7 @@ PhilippKoehn ChristofMonz JoshSchroeder - Cameron ShawFordyce + Cameron ShawFordyce Association for Computational Linguistics
Columbus, Ohio
June @@ -453,7 +453,7 @@ Rich Source-Side Context for Statistical Machine Translation KevinGimpel - Noah A.Smith + Noah A.Smith 9–17 W08-0302 gimpel-smith-2008-rich @@ -461,7 +461,7 @@ Discriminative Word Alignment via Alignment Matrix Modeling JanNiehues - StephanVogel + StephanVogel 18–25 W08-0303 niehues-vogel-2008-discriminative @@ -469,8 +469,8 @@ Regularization and Search for Minimum Error Rate Training DanielCer - DanJurafsky - Christopher D.Manning + DanJurafsky + Christopher D.Manning 26–34 W08-0304 cer-etal-2008-regularization @@ -486,16 +486,16 @@ Using Syntax to Improve Word Alignment Precision for Syntax-Based Machine Translation - VictoriaFossum + VictoriaFossum KevinKnight - StevenAbney + StevenAbney 44–52 W08-0306 fossum-etal-2008-using Using Shallow Syntax Information to Improve Word Alignment and Reordering for <fixed-case>SMT</fixed-case> - Josep M.Crego + Josep M.Crego NizarHabash 53–61 W08-0307 @@ -522,13 +522,13 @@ Limsi’s Statistical Translation Systems for <fixed-case>WMT</fixed-case>‘08 - DanielDéchelotte - GillesAdda + DanielDéchelotte + GillesAdda AlexandreAllauzen - HélèneBonneau-Maynard + HélèneBonneau-Maynard OlivierGalibert Jean-LucGauvain - PhilippeLanglais + PhilippeLanglais FrançoisYvon 107–110 W08-0310 @@ -537,8 +537,8 @@ The <fixed-case>M</fixed-case>eta<fixed-case>M</fixed-case>orpho Translation System AttilaNovák - LászlóTihanyi - GáborPrószéky + LászlóTihanyi + GáborPrószéky 111–114 W08-0311 novak-etal-2008-metamorpho @@ -546,7 +546,7 @@ Meteor, <fixed-case>M</fixed-case>-<fixed-case>BLEU</fixed-case> and <fixed-case>M</fixed-case>-<fixed-case>TER</fixed-case>: Evaluation Metrics for High-Correlation with Human Rankings of Machine Translation Output AbhayaAgarwal - AlonLavie + AlonLavie 115–118 W08-0312 agarwal-lavie-2008-meteor @@ -573,14 +573,14 @@ The <fixed-case>TALP</fixed-case>-<fixed-case>UPC</fixed-case> <fixed-case>N</fixed-case>gram-Based Statistical Machine Translation System for <fixed-case>ACL</fixed-case>-<fixed-case>WMT</fixed-case> 2008 MaximKhalilov - AdolfoHernández H. - Marta R.Costa-jussà - Josep M.Crego - Carlos A.Henríquez Q. + AdolfoHernández H. + Marta R.Costa-jussà + Josep M.Crego + Carlos A.Henríquez Q. PatrikLambert - José A. R.Fonollosa - José B.Mariño - Rafael E.Banchs + José A. R.Fonollosa + José B.Mariño + Rafael E.Banchs 127–130 W08-0315 khalilov-etal-2008-talp @@ -588,7 +588,7 @@ <fixed-case>E</fixed-case>uropean Language Translation with Weighted Finite State Transducers: The <fixed-case>CUED</fixed-case> <fixed-case>MT</fixed-case> System for the 2008 <fixed-case>ACL</fixed-case> Workshop on <fixed-case>SMT</fixed-case> GraemeBlackwood - Adriàde Gispert + Adriàde Gispert JamieBrunning WilliamByrne 131–134 @@ -615,15 +615,15 @@ Phrase-Based and Deep Syntactic <fixed-case>E</fixed-case>nglish-to-<fixed-case>C</fixed-case>zech Statistical Machine Translation - OndřejBojar - JanHajič + OndřejBojar + JanHajič 143–146 W08-0319 bojar-hajic-2008-phrase Improving <fixed-case>E</fixed-case>nglish-<fixed-case>S</fixed-case>panish Statistical Machine Translation: Experiments in Domain Adaptation, Sentence Paraphrasing, Tokenization, and Recasing - PreslavNakov + PreslavNakov 147–150 W08-0320 nakov-2008-improving @@ -632,7 +632,7 @@ Improving Word Alignment with Language Model Based Confidence Scores NguyenBach QinGao - StephanVogel + StephanVogel 151–154 W08-0321 bach-etal-2008-improving @@ -661,15 +661,15 @@ VamshiAmbati AlokParlikar ErikPeterson - AlonLavie + AlonLavie 163–166 W08-0324 hanneman-etal-2008-statistical <fixed-case>T</fixed-case>ecto<fixed-case>MT</fixed-case>: Highly Modular <fixed-case>MT</fixed-case> System with Tectogrammatics Used as Transfer Layer - ZdeněkŽabokrtský - JanPtáček + ZdeněkŽabokrtský + JanPtáček PetrPajas 167–170 W08-0325 @@ -687,7 +687,7 @@ Can we Relearn an <fixed-case>RBMT</fixed-case> System? - LoïcDugast + LoïcDugast JeanSenellart PhilippKoehn 175–178 @@ -696,9 +696,9 @@ Using <fixed-case>M</fixed-case>oses to Integrate Multiple Rule-Based Machine Translation Engines into a Hybrid System - AndreasEisele + AndreasEisele ChristianFedermann - HervéSaint-Amand + HervéSaint-Amand MichaelJellinghaus TeresaHerrmann YuChen @@ -708,10 +708,10 @@ Incremental Hypothesis Alignment for Building Confusion Networks with Application to Machine Translation System Combination - Antti-VeikkoRosti + Antti-VeikkoRosti BingZhang SpyrosMatsoukas - RichardSchwartz + RichardSchwartz 183–186 W08-0329 rosti-etal-2008-incremental @@ -733,15 +733,15 @@ A Smorgasbord of Features for Automatic <fixed-case>MT</fixed-case> Evaluation - JesúsGiménez - LluísMàrquez + JesúsGiménez + LluísMàrquez 195–198 W08-0332 gimenez-marquez-2008-smorgasbord Fast, Easy, and Cheap: Construction of Statistical Machine Translation Models with <fixed-case>M</fixed-case>ap<fixed-case>R</fixed-case>educe - ChrisDyer + ChrisDyer AaronCordova AlexMont JimmyLin @@ -752,7 +752,7 @@ Dynamic Model Interpolation for Statistical Machine Translation AndrewFinch - EiichiroSumita + EiichiroSumita 208–215 W08-0334 finch-sumita-2008-dynamic @@ -761,16 +761,16 @@ Improved Statistical Machine Translation by Multiple <fixed-case>C</fixed-case>hinese Word Segmentation RuiqiangZhang KeijiYasuda - EiichiroSumita + EiichiroSumita 216–223 W08-0335 zhang-etal-2008-improved Optimizing <fixed-case>C</fixed-case>hinese Word Segmentation for Machine Translation Performance - Pi-ChuanChang + Pi-ChuanChang MichelGalley - Christopher D.Manning + Christopher D.Manning 224–232 W08-0336 chang-etal-2008-optimizing @@ -794,9 +794,9 @@ Imposing Constraints from the Source Tree on <fixed-case>ITG</fixed-case> Constraints for <fixed-case>SMT</fixed-case> - HirofumiYamamoto + HirofumiYamamoto HideoOkuma - EiichiroSumita + EiichiroSumita 1–9 W08-0401 yamamoto-etal-2008-imposing @@ -804,7 +804,7 @@ A Scalable Decoder for Parsing-Based Machine Translation with Equivalent Language Model State Maintenance ZhifeiLi - SanjeevKhudanpur + SanjeevKhudanpur 10–18 W08-0402 li-khudanpur-2008-scalable @@ -814,7 +814,7 @@ BowenZhou BingXiang XiaodanZhu - YuqingGao + YuqingGao 19–27 W08-0403 zhou-etal-2008-prior @@ -828,7 +828,7 @@ A Rule-Driven Dynamic Programming Decoder for Statistical <fixed-case>MT</fixed-case> - ChristophTillmann + ChristophTillmann 37–45 W08-0405 tillmann-2008-rule @@ -851,7 +851,7 @@ Multiple Reorderings in Phrase-Based Machine Translation NiyuGe - AbeIttycheriah + AbeIttycheriah KishorePapineni 61–68 W08-0408 @@ -869,16 +869,16 @@ Inductive Detection of Language Features via Clustering Minimal Pairs: Toward Feature-Rich Grammars in Machine Translation - Jonathan H.Clark - RobertFrederking - LoriLevin + Jonathan H.Clark + RobertFrederking + LoriLevin 78–86 W08-0410 clark-etal-2008-inductive Syntax-Driven Learning of Sub-Sentential Translation Equivalents and Translation Rules from Parsed Parallel Corpora - AlonLavie + AlonLavie AlokParlikar VamshiAmbati 87–95 @@ -890,7 +890,7 @@ Software Engineering, Testing, and Quality Assurance for Natural Language Processing W08-05 - K. BretonnelCohen + K. BretonnelCohen BobCarpenter Association for Computational Linguistics
Columbus, Ohio
@@ -913,14 +913,14 @@ Type-checking in Formally Non-typed Systems DickCrouch - Tracy HollowayKing + Tracy HollowayKing 3–4 W08-0502 crouch-king-2008-type zymake: A Computational Workflow System for Machine Learning and Natural Language Processing - EricBreck + EricBreck 5–13 W08-0503 breck-2008-zymake @@ -929,8 +929,8 @@ Evaluating the Effects of Treebank Size in a Practical Application for Parsing KenjiSagae YusukeMiyao - RuneSaetre - Jun’ichiTsujii + RuneSaetre + Jun’ichiTsujii 14–20 W08-0504 sagae-etal-2008-evaluating @@ -945,8 +945,8 @@ Software Testing and the Naturally Occurring Data Assumption in Natural Language Processing K. BretonnelCohen - William A.Baumgartner Jr. - LawrenceHunter + William A.Baumgartner Jr. + LawrenceHunter 23–30 W08-0506 cohen-etal-2008-software @@ -971,7 +971,7 @@ Parallel Implementations of Word Alignment Tool QinGao - StephanVogel + StephanVogel 49–57 W08-0509 gao-vogel-2008-parallel @@ -987,7 +987,7 @@ <fixed-case>B</fixed-case>uckwalter-based Lookup Tool as Language Resource for <fixed-case>A</fixed-case>rabic Language Learners JeffreyMicher - ClareVoss + ClareVoss 66–67 W08-0511 micher-voss-2008-buckwalter @@ -995,12 +995,12 @@ Reengineering a Domain-Independent Framework for Spoken Dialogue Systems Filipe M.Martins - AnaMendes + AnaMendes Mácio FreitasViveiros Joana PauloPardal PedroArez - Nuno J.Mamede - João PauloNeto + Nuno J.Mamede + João PauloNeto 68–76 W08-0512 martins-etal-2008-reengineering @@ -1012,10 +1012,10 @@ W08-06 DinaDemner-Fushman SophiaAnaniadou - Kevin BretonnelCohen - JohnPestian - Jun’ichiTsujii - BonnieWebber + Kevin BretonnelCohen + JohnPestian + Jun’ichiTsujii + BonnieWebber Association for Computational Linguistics
Columbus, Ohio
June @@ -1041,7 +1041,7 @@ Extracting Clinical Relationships from Patient Narratives AngusRoberts - RobertGaizauskas + RobertGaizauskas MarkHepple 10–18 W08-0602 @@ -1056,8 +1056,8 @@ Mining the Biomedical Literature for Genic Information - Catalina O.Tudor - K.Vijay-Shanker + Catalina O.Tudor + K.Vijay-Shanker Carl J.Schmidt 28–29 W08-0604 @@ -1076,7 +1076,7 @@ The <fixed-case>B</fixed-case>io<fixed-case>S</fixed-case>cope corpus: annotation for negation, uncertainty and their scope in biomedical texts GyörgySzarvas VeronikaVincze - RichárdFarkas + RichárdFarkas JánosCsirik 38–45 W08-0606 @@ -1092,7 +1092,7 @@ Cascaded Classifiers for Confidence-Based Chemical Named Entity Recognition - PeterCorbett + PeterCorbett AnnCopestake 54–62 W08-0608 @@ -1102,7 +1102,7 @@ How to Make the Most of <fixed-case>NE</fixed-case> Dictionaries in Statistical <fixed-case>NER</fixed-case> YutakaSasaki YoshimasaTsuruoka - JohnMcNaught + JohnMcNaught SophiaAnaniadou 63–70 W08-0609 @@ -1120,15 +1120,15 @@ Knowledge Sources for Word Sense Disambiguation of Biomedical Text MarkStevenson YinkunGuo - RobertGaizauskas - DavidMartinez + RobertGaizauskas + DavidMartinez 80–87 W08-0611 stevenson-etal-2008-knowledge Automatic inference of indexing rules for <fixed-case>MEDLINE</fixed-case> - AurélieNévéol + AurélieNévéol SonyaShooshan VincentClaveau 88–89 @@ -1155,10 +1155,10 @@ A Pilot Annotation to Investigate Discourse Connectivity in Biomedical Text HongYu NadyaFrid - SusanMcRoy + SusanMcRoy RashmiPrasad AlanLee - AravindJoshi + AravindJoshi 92–93 W08-0614 yu-etal-2008-pilot @@ -1167,7 +1167,7 @@ Conditional Random Fields and Support Vector Machines for Disorder Named Entity Recognition in Clinical Texts DingchengLi GuerganaSavova - KarinKipper-Schuler + KarinKipper-Schuler 94–95 W08-0615 li-etal-2008-conditional @@ -1194,7 +1194,7 @@ A preliminary approach to extract drugs by combining <fixed-case>UMLS</fixed-case> resources and <fixed-case>USAN</fixed-case> naming conventions - IsabelSegura-Bedmar + IsabelSegura-Bedmar PalomaMartínez DoaaSamy 100–101 @@ -1204,7 +1204,7 @@ Mapping Clinical Notes to Medical Terminologies at Point of Care YefengWang - JonPatrick + JonPatrick 102–103 W08-0619 wang-patrick-2008-mapping @@ -1212,16 +1212,16 @@ An Approach to Reducing Annotation Costs for <fixed-case>B</fixed-case>io<fixed-case>NLP</fixed-case> MichaelBloodgood - K.Vijay-Shanker + K.Vijay-Shanker 104–105 W08-0620 bloodgood-vijay-shanker-2008-approach Temporal Annotation of Clinical Text - DanielleMowery + DanielleMowery HenkHarkema - WendyChapman + WendyChapman 106–107 W08-0621 mowery-etal-2008-temporal @@ -1230,7 +1230,7 @@ <fixed-case>CBR</fixed-case>-Tagger: a case-based reasoning approach to the gene/protein mention problem MarianaNeves MonicaChagoyen - José MaríaCarazo + José MaríaCarazo AlbertoPascual-Montano 108–109 W08-0622 @@ -1239,7 +1239,7 @@ Textual Information for Predicting Functional Properties of the Genes OanaFrunza - DianaInkpen + DianaInkpen 110–111 W08-0623 frunza-inkpen-2008-textual @@ -1249,7 +1249,7 @@ Pietervan der Horn BartBakker GijsGeleijnse - JanKorst + JanKorst SergeiKurkin 112–113 W08-0624 @@ -1258,7 +1258,7 @@ Statistical Term Profiling for Query Pattern Mining PaulBuitelaar - PinarOezden Wennerberg + PinarOezden Wennerberg SonjaZillner 114–115 W08-0625 @@ -1277,7 +1277,7 @@ YueWang KazuhiroYoshida Jin-DongKim - RuneSaetre + RuneSaetre Jun’ichiTsujii 118–119 W08-0627 @@ -1287,7 +1287,7 @@ Adaptive Information Extraction for Complex Biomedical Tasks DonghuiFeng GullyBurns - EduardHovy + EduardHovy 120–121 W08-0628 feng-etal-2008-adaptive @@ -1297,7 +1297,7 @@ Proceedings of the Tenth Meeting of ACL Special Interest Group on Computational Morphology and Phonology W08-07 - JasonEisner + JasonEisner JeffreyHeinz Association for Computational Linguistics
Columbus, Ohio
@@ -1325,7 +1325,7 @@
A <fixed-case>B</fixed-case>ayesian Model of Natural Language Phonology: Generating Alternations from Underlying Forms - DavidEllis + DavidEllis 12–19 W08-0703 ellis-2008-bayesian @@ -1354,7 +1354,7 @@ Phonotactic Probability and the <fixed-case>M</fixed-case>aori Passive: A Computational Approach - ‘ŌiwiParker Jones + ‘ŌiwiParker Jones 39–48 W08-0707 parker-jones-2008-phonotactic @@ -1362,9 +1362,9 @@ Evaluating an Agglutinative Segmentation Model for <fixed-case>P</fixed-case>ara<fixed-case>M</fixed-case>or ChristianMonson - AlonLavie - JaimeCarbonell - LoriLevin + AlonLavie + JaimeCarbonell + LoriLevin 49–58 W08-0708 monson-etal-2008-evaluating @@ -1389,11 +1389,11 @@ A Multimodal Home Entertainment Interface via a Mobile Device AlexanderGruenstein - Bo-June PaulHsu - JamesGlass - StephanieSeneff + Bo-June PaulHsu + JamesGlass + StephanieSeneff LeeHetherington - ScottCyphers + ScottCyphers IbrahimBadr ChaoWang SeanLiu @@ -1406,9 +1406,9 @@ KristeKrstovski MichaelDecerbo RohitPrasad - DavidStallard + DavidStallard ShirinSaleem - PremkumarNatarajan + PremkumarNatarajan 10–12 W08-0802 krstovski-etal-2008-wearable @@ -1417,7 +1417,7 @@ Information extraction using finite state automata and syllable n-grams in a mobile environment Choong-NyoungSeon HarksooKim - JungyunSeo + JungyunSeo 13–18 W08-0803 seon-etal-2008-information @@ -1433,7 +1433,7 @@ Mixture Pruning and Roughening for Scalable Acoustic Models DavidHuggins-Daines - Alexander I.Rudnicky + Alexander I.Rudnicky 21–24 W08-0805 huggins-daines-rudnicky-2008-mixture @@ -1457,7 +1457,7 @@ Proceedings of the Third Workshop on Innovative Use of NLP for Building Educational Applications - JoelTetreault + JoelTetreault JillBurstein RacheleDe Felice Association for Computational Linguistics @@ -1480,16 +1480,16 @@ Classification Errors in a Domain-Independent Assessment System - Rodney D.Nielsen - WayneWard - James H.Martin + Rodney D.Nielsen + WayneWard + James H.Martin 10–18 W08-0902 nielsen-etal-2008-classification King Alfred: A Translation Environment for Learners of <fixed-case>A</fixed-case>nglo-<fixed-case>S</fixed-case>axon <fixed-case>E</fixed-case>nglish - Lisa N.Michaud + Lisa N.Michaud 19–26 W08-0903 michaud-2008-king @@ -1497,7 +1497,7 @@ Recognizing Noisy <fixed-case>R</fixed-case>omanized <fixed-case>J</fixed-case>apanese Words in Learner <fixed-case>E</fixed-case>nglish RyoNagata - Jun-ichiKakegawa + Jun-ichiKakegawa HiromiSugimoto YukikoYabuta 27–35 @@ -1506,7 +1506,7 @@ An Annotated Corpus Outside Its Original Context: A Corpus-Based Exercise Book - BarboraHladká + BarboraHladká OndřejKučera 36–43 W08-0905 @@ -1522,11 +1522,11 @@ Learner Characteristics and Feedback in Tutorial Dialogue - KristyBoyer - RobertPhillips + KristyBoyer + RobertPhillips MichaelWallis MladenVouk - JamesLester + JamesLester 53–61 W08-0907 boyer-etal-2008-learner @@ -1577,7 +1577,7 @@ Diagnosing Meaning Errors in Short Answers to Reading Comprehension Questions StaceyBailey - DetmarMeurers + DetmarMeurers 107–115 W08-0913 bailey-meurers-2008-diagnosing @@ -1587,7 +1587,7 @@ Proceedings of the Workshop on Parsing German W08-10 - SandraKübler + SandraKübler GeraldPenn Association for Computational Linguistics
Columbus, Ohio
@@ -1623,7 +1623,7 @@ Revisiting the Impact of Different Annotation Schemes on <fixed-case>PCFG</fixed-case> Parsing: A Grammatical Dependency Evaluation AdrianeBoyd - DetmarMeurers + DetmarMeurers 24–32 W08-1004 boyd-meurers-2008-revisiting @@ -1638,8 +1638,8 @@ Parsing Three <fixed-case>G</fixed-case>erman Treebanks: Lexicalized and Unlexicalized Baselines - AnnaRafferty - Christopher D.Manning + AnnaRafferty + Christopher D.Manning 40–46 W08-1006 rafferty-manning-2008-parsing @@ -1663,7 +1663,7 @@ Proceedings of the Fifth International Natural Language Generation Conference - MichaelWhite + MichaelWhite CrystalNakatsu DavidMcDonald Association for Computational Linguistics @@ -1694,8 +1694,8 @@ Generating Textual Summaries of Bar Charts SenizDemir - SandraCarberry - KathleenMcCoy + SandraCarberry + KathleenMcCoy 7–15 W08-1103 demir-etal-2008-generating @@ -1703,9 +1703,9 @@ Using Spatial Reference Frames to Generate Grounded Textual Summaries of Georeferenced Data RossTurner - SomayajuluSripada + SomayajuluSripada EhudReiter - IanDavy + IanDavy 16–24 W08-1104 turner-etal-2008-using @@ -1721,7 +1721,7 @@ Extractive vs. <fixed-case>NLG</fixed-case>-based Abstractive Summarization of Evaluative Text: The Effect of Corpus Controversiality GiuseppeCarenini - Jackie C. K.Cheung + Jackie C. K.Cheung 33–41 W08-1106 carenini-cheung-2008-extractive @@ -1738,7 +1738,7 @@ Attribute Selection for Referring Expression Generation: New Algorithms and Evaluation Methods AlbertGatt - AnjaBelz + AnjaBelz 50–58 W08-1108 gatt-belz-2008-attribute @@ -1754,7 +1754,7 @@ Using Tactical <fixed-case>NLG</fixed-case> to Induce Affective States: Empirical Investigations Ielkavan der Sluis - ChrisMellish + ChrisMellish 68–76 W08-1110 van-der-sluis-mellish-2008-using @@ -1762,7 +1762,7 @@ Practical Grammar-Based <fixed-case>NLG</fixed-case> from Examples DavidDeVault - DavidTraum + DavidTraum RonArtstein 77–85 W08-1111 @@ -1770,16 +1770,16 @@ Accurate and Robust <fixed-case>LFG</fixed-case>-Based Generation for <fixed-case>C</fixed-case>hinese - YuqingGuo + YuqingGuo HaifengWang - Josefvan Genabith + Josefvan Genabith 86–94 W08-1112 guo-etal-2008-accurate Automated Metrics That Agree With Human Judgements On Generated Output for an Embodied Conversational Agent - Mary EllenFoster + Mary EllenFoster 95–103 W08-1113 foster-2008-automated @@ -1811,7 +1811,7 @@ The Effect of Dialogue System Output Style Variation on Users’ Evaluation Judgments and Input Style - IvanaKruijff-Korbayová + IvanaKruijff-Korbayová CiprianGerstenberger OlgaKukina JanSchehl @@ -1822,7 +1822,7 @@ Evaluating an Ontology-Driven <fixed-case>WYSIWYM</fixed-case> Interface FeikjeHielkema - ChrisMellish + ChrisMellish PeterEdwards 138–146 W08-1118 @@ -1848,7 +1848,7 @@ Degree of Abstraction in Referring Expression Generation and its Relation with the Construction of the Contrast Set RaquelHervás - PabloGervás + PabloGervás 161–164 W08-1121 hervas-gervas-2008-degree @@ -1858,7 +1858,7 @@ DeirdreHogan JenniferFoster JoachimWagner - Josefvan Genabith + Josefvan Genabith 165–168 W08-1122 hogan-etal-2008-parser @@ -1867,14 +1867,14 @@ Creation of a New Domain and Evaluation of Comparison Generation in a Natural Language Generation System MatthewMarge AmyIsard - JohannaMoore + JohannaMoore 169–172 W08-1123 marge-etal-2008-creation Generating Baseball Summaries from Multiple Perspectives by Reordering Content - AliceOh + AliceOh HowardShrobe 173–176 W08-1124 @@ -1889,7 +1889,7 @@ <fixed-case>REG</fixed-case> Challenge Preface - AnjaBelz + AnjaBelz AlbertGatt 181–182 W08-1126 @@ -1897,7 +1897,7 @@ The <fixed-case>GREC</fixed-case> Challenge 2008: Overview and Evaluation Results - AnjaBelz + AnjaBelz EricKow JetteViethen AlbertGatt @@ -1915,7 +1915,7 @@ <fixed-case>CNTS</fixed-case>: Memory-Based Learning of Generating Repeated References IrisHendrickx - WalterDaelemans + WalterDaelemans KimLuyckx RoserMorante VincentVan Asch @@ -1926,7 +1926,7 @@ <fixed-case>OSU</fixed-case>-2: Generating Referring Expressions with a Maximum Entropy Classifier EmilyJamison - DennisMehay + DennisMehay 196–197 W08-1130 jamison-mehay-2008-osu @@ -1934,7 +1934,7 @@ The <fixed-case>TUNA</fixed-case> Challenge 2008: Overview and Evaluation Results AlbertGatt - AnjaBelz + AnjaBelz EricKow 198–206 W08-1131 @@ -1949,16 +1949,16 @@ Referring Expression Generation Using Speaker-based Attribute Selection and Trainable Realization (<fixed-case>ATTR</fixed-case>) - GiuseppeDi Fabbrizio - Amanda J.Stent - SrinivasBangalore + GiuseppeDi Fabbrizio + Amanda J.Stent + SrinivasBangalore 211–214 W08-1133 di-fabbrizio-etal-2008-referring <fixed-case>NIL</fixed-case>-<fixed-case>UCM</fixed-case>: Most-Frequent-Value-First Attribute Selection and Best-Scoring-Choice Realization - PabloGervás + PabloGervás RaquelHervás CarlosLeón 215–218 @@ -1968,14 +1968,14 @@ <fixed-case>USP</fixed-case>-<fixed-case>EACH</fixed-case> Frequency-based Greedy Attribute Selection for Referring Expressions Generation Diego Jesusde Lucena - IvandréParaboni + IvandréParaboni 219–220 W08-1135 de-lucena-paraboni-2008-usp Referring Expression Generation Challenge 2008 <fixed-case>DIT</fixed-case> System Descriptions (<fixed-case>DIT</fixed-case>-<fixed-case>FBI</fixed-case>, <fixed-case>DIT</fixed-case>-<fixed-case>TVAS</fixed-case>, <fixed-case>DIT</fixed-case>-<fixed-case>CBSR</fixed-case>, <fixed-case>DIT</fixed-case>-<fixed-case>RBR</fixed-case>, <fixed-case>DIT</fixed-case>-<fixed-case>FBI</fixed-case>-<fixed-case>CBSR</fixed-case>, <fixed-case>DIT</fixed-case>-<fixed-case>TVAS</fixed-case>-<fixed-case>RBR</fixed-case>) - John D.Kelleher + John D.Kelleher BrianMac Namee 221–224 W08-1136 @@ -1990,8 +1990,8 @@ <fixed-case>GRAPH</fixed-case>: The Costs of Redundancy in Referring Expressions - EmielKrahmer - MariëtTheune + EmielKrahmer + MariëtTheune JetteViethen IrisHendrickx 227–229 @@ -2001,15 +2001,15 @@ <fixed-case>JU</fixed-case>-<fixed-case>PTBSGRE</fixed-case>: <fixed-case>GRE</fixed-case> Using Prefix Tree Based Structure SibabrataPaladhi - SivajiBandyopadhyay + SivajiBandyopadhyay 230–231 W08-1139 paladhi-bandyopadhyay-2008-ju From <fixed-case>TUNA</fixed-case> Attribute Sets to <fixed-case>P</fixed-case>ortuguese Text: a First Report - Daniel BastosPereira - IvandréParaboni + Daniel BastosPereira + IvandréParaboni 232–233 W08-1140 pereira-paraboni-2008-tuna @@ -2026,9 +2026,9 @@ Coling 2008: Proceedings of the workshop on Human Judgements in Computational Linguistics W08-12 RonArtstein - GemmaBoleda + GemmaBoleda FrankKeller - SabineSchulte im Walde + SabineSchulte im Walde Coling 2008 Organizing Committee
Manchester, UK
August @@ -2041,8 +2041,8 @@ Invited Talk: The Relevance of a Cognitive Model of the Mental Lexicon to Automatic Word Sense Disambiguation - MarthaPalmer - SusanBrown + MarthaPalmer + SusanBrown 1 W08-1201 palmer-brown-2008-invited @@ -2069,15 +2069,15 @@ Jean-BaptisteBerthelin CyrilGrouin MartineHurault-Plantet - PatrickParoubek + PatrickParoubek 17–23 W08-1204 berthelin-etal-2008-human Native Judgments of Non-Native Usage: Experiments in Preposition Error Detection - JoelTetreault - MartinChodorow + JoelTetreault + MartinChodorow 24–32 W08-1205 tetreault-chodorow-2008-native @@ -2122,21 +2122,21 @@ Coling 2008: Proceedings of the workshop on Cross-Framework and Cross-Domain Parser Evaluation W08-13 JohanBos - EdwardBriscoe + EdwardBriscoe AoifeCahill - JohnCarroll + JohnCarroll StephenClark AnnCopestake - DanFlickinger - Josefvan Genabith + DanFlickinger + Josefvan Genabith JuliaHockenmaier - AravindJoshi - RonaldKaplan - Tracy HollowayKing - SandraKuebler + AravindJoshi + RonaldKaplan + Tracy HollowayKing + SandraKuebler DekangLin Jan ToreLønning - ChristopherManning + ChristopherManning YusukeMiyao JoakimNivre StephanOepen @@ -2155,7 +2155,7 @@ The <fixed-case>S</fixed-case>tanford Typed Dependencies Representation - Marie-Catherinede Marneffe + Marie-Catherinede Marneffe Christopher D.Manning 1–8 W08-1301 @@ -2163,15 +2163,15 @@ Exploring an Auxiliary Distribution Based Approach to Domain Adaptation of a Syntactic Disambiguation Model - BarbaraPlank - Gertjanvan Noord + BarbaraPlank + Gertjanvan Noord 9–16 W08-1302 plank-van-noord-2008-exploring Toward an Underspecifiable Corpus Annotation Scheme - YukaTateisi + YukaTateisi 17–23 W08-1303 tateisi-2008-toward @@ -2185,10 +2185,10 @@ Parser Evaluation Across Frameworks without Format Conversion - Wai LokTam + Wai LokTam YoSato YusukeMiyao - JunichiTsujii + JunichiTsujii 29–35 W08-1305 tam-etal-2008-parser @@ -2197,10 +2197,10 @@ Large Scale Production of Syntactic Annotations to Move Forward AnneVilnat GilFrancopoulo - OlivierHamon + OlivierHamon SylvainLoiseau - PatrickParoubek - EricVillemonte de la Clergerie + PatrickParoubek + EricVillemonte de la Clergerie 36–43 W08-1306 vilnat-etal-2008-large @@ -2216,7 +2216,7 @@ ‘Deep’ Grammatical Relations for Semantic Interpretation MarkMcConville - Myroslava O.Dzikovska + Myroslava O.Dzikovska 51–58 W08-1308 mcconville-dzikovska-2008-deep @@ -2226,7 +2226,7 @@ Coling 2008: Proceedings of the workshop Multi-source Multilingual Information Extraction and Summarization W08-14 - SivajiBandyopadhyay + SivajiBandyopadhyay ThierryPoibeau HoracioSaggion RomanYangarber @@ -2242,7 +2242,7 @@ Generating Image Captions using Topic Focused Multi-document Summarization - RobertGaizauskas + RobertGaizauskas 1 W08-1401 gaizauskas-2008-generating @@ -2250,8 +2250,8 @@ Learning to Match Names Across Languages InderjeetMani - AlexYeh - SherriCondon + AlexYeh + SherriCondon 2–9 W08-1402 mani-etal-2008-learning @@ -2274,7 +2274,7 @@ <fixed-case>M</fixed-case>ulti<fixed-case>S</fixed-case>um: Query-Based Multi-Document Summarization - MikeRosner + MikeRosner CarlCamilleri 25–32 W08-1405 @@ -2282,8 +2282,8 @@ Mixed-Source Multi-Document Speech-to-Text Summarization - RicardoRibeiro - David Martinsde Matos + RicardoRibeiro + David Martinsde Matos 33–40 W08-1406 ribeiro-de-matos-2008-mixed @@ -2291,7 +2291,7 @@ Evaluating automatically generated user-focused multi-document summaries for geo-referenced images AhmetAker - RobertGaizauskas + RobertGaizauskas 41–48 W08-1407 aker-gaizauskas-2008-evaluating @@ -2317,11 +2317,11 @@ Coling 2008: Proceedings of the workshop on Speech Processing for Safety Critical Translation and Pervasive Applications W08-15 - PierretteBouillon + PierretteBouillon FarzadEhsani - RobertFrederking - MichaelMcTear - MannyRayner + RobertFrederking + MichaelMcTear + MannyRayner Coling 2008 Organizing Committee
Manchester, UK
August @@ -2335,8 +2335,8 @@ Mitigation of Data Sparsity in Classifier-Based Translation EmilEttelaie - Panayiotis G.Georgiou - Shrikanth S.Narayanan + Panayiotis G.Georgiou + Shrikanth S.Narayanan 1–4 W08-1501 ettelaie-etal-2008-mitigation @@ -2350,10 +2350,10 @@ An Integrated Dialog Simulation Technique for Evaluating Spoken Dialog Systems - SangkeunJung + SangkeunJung CheongjaeLee KyungdukKim - Gary GeunbaeLee + Gary GeunbaeLee 9–16 W08-1503 jung-etal-2008-integrated @@ -2381,7 +2381,7 @@ JaneBrotanek GlennFlores SoniaHalimi - Beth AnnHockey + Beth AnnHockey HitoshiIsahara KyokoKanzaki ElisabethKron @@ -2395,7 +2395,7 @@ Language Understanding in <fixed-case>M</fixed-case>aryland Virtual Patient - SergeiNirenburg + SergeiNirenburg StephenBeale MarjorieMcShane BruceJarrell @@ -2415,7 +2415,7 @@ Speech Translation for Triage of Emergency Phonecalls in Minority Languages UdhyakumarNallasamy - AlanBlack + AlanBlack TanjaSchultz RobertFrederking JerryWeltman @@ -2426,9 +2426,9 @@ Speech to Speech Translation for Nurse Patient Interaction FarzadEhsani - JimKimzey + JimKimzey ElaineZuber - DemitriosMaster + DemitriosMaster KarenSudre 54–59 W08-1510 @@ -2441,7 +2441,7 @@ GlennFlores FarzadEhsani MarianneStarlander - Beth AnnHockey + Beth AnnHockey JaneBrotanek LukasBiewald 60–63 @@ -2453,8 +2453,8 @@ Coling 2008: Proceedings of the workshop on Knowledge and Reasoning for Answering Questions W08-16 - Marie-FrancineMoens - PatrickSaint-Dizier + Marie-FrancineMoens + PatrickSaint-Dizier Coling 2008 Organizing Committee
Manchester, UK
August @@ -2467,7 +2467,7 @@ Semantic Chunk Annotation for complex questions using Conditional Random Field - ShixiFan + ShixiFan YaoyunZhang Wing W. Y.Ng XuanWang @@ -2479,8 +2479,8 @@ Context Inducing Nouns CharlottePrice - Valeriade Paiva - Tracy HollowayKing + Valeriade Paiva + Tracy HollowayKing 9–16 W08-1602 price-etal-2008-context @@ -2496,7 +2496,7 @@ Context Modelling for <fixed-case>IQA</fixed-case>: the Role of Tasks and Entities - RaffaellaBernardi + RaffaellaBernardi ManuelKirschner 25–32 W08-1604 @@ -2523,7 +2523,7 @@ Coling 2008: Proceedings of the workshop on Grammar Engineering Across Frameworks W08-17 StephenClark - Tracy HollowayKing + Tracy HollowayKing Coling 2008 Organizing Committee
Manchester, England
August @@ -2549,9 +2549,9 @@ Making Speech Look Like Text in the Regulus Development Environment ElisabethKron - MannyRayner + MannyRayner MarianneSantaholma - PierretteBouillon + PierretteBouillon AgnesLisowska 9–16 W08-1702 @@ -2559,8 +2559,8 @@ A More Precise Analysis of Punctuation for Broad-Coverage Surface Realization with <fixed-case>CCG</fixed-case> - MichaelWhite - RajakrishnanRajkumar + MichaelWhite + RajakrishnanRajkumar 17–24 W08-1703 white-rajkumar-2008-precise @@ -2575,7 +2575,7 @@ Speeding up <fixed-case>LFG</fixed-case> Parsing Using <fixed-case>C</fixed-case>-Structure Pruning AoifeCahill - John T.Maxwell III + John T.Maxwell III PaulMeurer ChristianRohrer VictoriaRosén @@ -2592,7 +2592,7 @@ Designing Testsuites for Grammar-based Systems in Applications - Valeriade Paiva + Valeriade Paiva Tracy HollowayKing 49–56 W08-1707 @@ -2612,7 +2612,7 @@ Coling 2008: Proceedings of the 2nd workshop on Information Retrieval for Question Answering W08-18 - Mark A.Greenwood + Mark A.Greenwood Coling 2008 Organizing Committee
Manchester, UK
August @@ -2625,8 +2625,8 @@ Improving Text Retrieval Precision and Answer Accuracy in Question Answering Systems - MatthewBilotti - EricNyberg + MatthewBilotti + EricNyberg 1–8 W08-1801 bilotti-nyberg-2008-improving @@ -2634,7 +2634,7 @@ Exact Phrases in Information Retrieval for Question Answering SvetlanaStoyanchev - Young CholSong + Young CholSong WilliamLahti 9–16 W08-1802 @@ -2642,7 +2642,7 @@ Simple is Best: Experiments with Different Document Segmentation Strategies for Passage Retrieval - JörgTiedemann + JörgTiedemann JoriMur 17–25 W08-1803 @@ -2658,9 +2658,9 @@ A Data Driven Approach to Query Expansion in Question Answering - LeonDerczynski + LeonDerczynski JunWang - RobertGaizauskas + RobertGaizauskas Mark A.Greenwood 34–41 W08-1805 @@ -2677,8 +2677,8 @@ Using Lexico-Semantic Information for Query Expansion in Passage Retrieval for Question Answering - Lonnekevan der Plas - JörgTiedemann + Lonnekevan der Plas + JörgTiedemann 50–57 W08-1807 van-der-plas-tiedemann-2008-using @@ -2687,7 +2687,7 @@ Evaluation of Automatically Reformulated Questions in Question Series RichardShaw BenSolway - RobertGaizauskas + RobertGaizauskas Mark A.Greenwood 58–65 W08-1808 @@ -2696,15 +2696,15 @@ Topic Indexing and Retrieval for Factoid <fixed-case>QA</fixed-case> KisuhAhn - BonnieWebber + BonnieWebber 66–73 W08-1809 ahn-webber-2008-topic Indexing on Semantic Roles for Question Answering - Luiz AugustoPizzato - DiegoMollá + Luiz AugustoPizzato + DiegoMollá 74–81 W08-1810 pizzato-molla-2008-indexing @@ -2754,14 +2754,14 @@ <fixed-case>P</fixed-case>ro<fixed-case>POSEL</fixed-case>: a human-oriented prosody and <fixed-case>P</fixed-case>o<fixed-case>S</fixed-case> <fixed-case>E</fixed-case>nglish lexicon for machine-learning and <fixed-case>NLP</fixed-case> ClaireBrierley - EricAtwell + EricAtwell 25–31 W08-1904 brierley-atwell-2008-proposel Natural Language Searching in Onomasiological Dictionaries - GerardoSierra + GerardoSierra 32–38 W08-1905 sierra-2008-natural @@ -2788,8 +2788,8 @@ Extracting Sense Trees from the <fixed-case>R</fixed-case>omanian Thesaurus by Sense Segmentation & Dependency Parsing NeculaiCurteanu - AlexMoruz - DianaTrandabăţ + AlexMoruz + DianaTrandabăţ 55–63 W08-1908 curteanu-etal-2008-extracting @@ -2822,7 +2822,7 @@ Toward a cognitive organization for electronic dictionaries, the case for semantic proxemy BrunoGaume KarineDuvignau - LaurentPrévot + LaurentPrévot YannDesalle 86–93 W08-1912 @@ -2832,7 +2832,7 @@ Cognitively Salient Relations for Multilingual Lexicography GerhardKremer AndreaAbel - MarcoBaroni + MarcoBaroni 94–101 W08-1913 kremer-etal-2008-cognitively @@ -2849,10 +2849,10 @@ Coling 2008: Proceedings of the 3rd Textgraphs workshop on Graph-based Algorithms for Natural Language Processing W08-20 - IrinaMatveeva - ChrisBiemann + IrinaMatveeva + ChrisBiemann MonojitChoudhury - MonaDiab + MonaDiab Coling 2008 Organizing Committee
Manchester, UK
August @@ -2892,7 +2892,7 @@ Encoding Tree Pair-Based Graphs in Learning Algorithms: The Textual Entailment Recognition Case AlessandroMoschitti - Fabio MassimoZanzotto + Fabio MassimoZanzotto 25–32 W08-2004 moschitti-zanzotto-2008-encoding @@ -2927,14 +2927,14 @@ Concept-Graph Based Biomedical Automatic Summarization Using Ontologies LauraPlaza AlbertoDíaz - PabloGervás + PabloGervás 53–56 W08-2008 plaza-etal-2008-concept Random Graph Model Simulations of Semantic Networks for Associative Concept Dictionaries - HiroyukiAkama + HiroyukiAkama JaeyoungJung TerryJoyce MakiMiyake @@ -2962,7 +2962,7 @@ Semantic Parsing for High-Precision Semantic Role Labelling PaolaMerlo - GabrieleMusillo + GabrieleMusillo 1–8 W08-2101 merlo-musillo-2008-semantic @@ -2970,7 +2970,7 @@ <fixed-case>TAG</fixed-case>, Dynamic Programming, and the Perceptron for Efficient, Feature-Rich Parsing XavierCarreras - MichaelCollins + MichaelCollins TerryKoo 9–16 W08-2102 @@ -2986,14 +2986,14 @@ Linguistic features in data-driven dependency parsing - LiljaOvrelid + LiljaOvrelid 25–32 W08-2104 ovrelid-2008-linguistic Transforming Meaning Representation Grammars to Improve Semantic Parsing - RohitKate + RohitKate 33–40 W08-2105 kate-2008-transforming @@ -3065,7 +3065,7 @@ Fully Unsupervised Graph-Based Discovery of General-Specific Noun Relationships from Web Corpora Frequency Counts - GaëlDias + GaëlDias RaychoMukelov GuillaumeCleuziou 97–104 @@ -3074,7 +3074,7 @@ Acquiring Knowledge from the Web to be used as Selectors for Noun Sense Disambiguation - Hansen A.Schwartz + Hansen A.Schwartz FernandoGomez 105–112 W08-2114 @@ -3083,7 +3083,7 @@ Automatic <fixed-case>C</fixed-case>hinese Catchword Extraction Based on Time Series Analysis HanRen - DonghongJi + DonghongJi JingWan LeiHan 113–118 @@ -3092,8 +3092,8 @@ Easy as <fixed-case>ABC</fixed-case>? Facilitating Pictorial Communication via Semantically Enhanced Layout - Andrew B.Goldberg - XiaojinZhu + Andrew B.Goldberg + XiaojinZhu Charles R.Dyer MohamedEldawy LijieHeng @@ -3103,15 +3103,15 @@ A Nearest-Neighbor Approach to the Automatic Analysis of <fixed-case>A</fixed-case>ncient <fixed-case>G</fixed-case>reek Morphology - JohnLee + JohnLee 127–134 W08-2117 lee-2008-nearest Context-based <fixed-case>A</fixed-case>rabic Morphological Analysis for Machine Translation - ThuyLinhNguyen - StephanVogel + ThuyLinhNguyen + StephanVogel 135–142 W08-2118 nguyen-vogel-2008-context @@ -3120,18 +3120,18 @@ A Tree-to-String Phrase-based Model for Statistical Machine Translation Thai PhuongNguyen AkiraShimazu - Tu-BaoHo - Minh LeNguyen - Vinh VanNguyen + Tu-BaoHo + Minh LeNguyen + Vinh VanNguyen 143–150 W08-2119 nguyen-etal-2008-tree Trainable Speaker-Based Referring Expression Generation - GiuseppeDi Fabbrizio - AmandaStent - SrinivasBangalore + GiuseppeDi Fabbrizio + AmandaStent + SrinivasBangalore 151–158 W08-2120 di-fabbrizio-etal-2008-trainable @@ -3140,8 +3140,8 @@ The <fixed-case>C</fixed-case>o<fixed-case>NLL</fixed-case> 2008 Shared Task on Joint Parsing of Syntactic and Semantic Dependencies MihaiSurdeanu RichardJohansson - AdamMeyers - LluísMàrquez + AdamMeyers + LluísMàrquez JoakimNivre 159–177 W08-2121 @@ -3149,9 +3149,9 @@ A Latent Variable Model of Synchronous Parsing for Syntactic and Semantic Dependencies - JamesHenderson + JamesHenderson PaolaMerlo - GabrieleMusillo + GabrieleMusillo IvanTitov 178–182 W08-2122 @@ -3168,7 +3168,7 @@ A Joint Model for Parsing Syntactic and Semantic Dependencies XavierLluís - LluísMàrquez + LluísMàrquez 188–192 W08-2124 lluis-marquez-2008-joint @@ -3176,7 +3176,7 @@ Collective Semantic Role Labelling with <fixed-case>M</fixed-case>arkov <fixed-case>L</fixed-case>ogic SebastianRiedel - IvanMeza-Ruiz + IvanMeza-Ruiz 193–197 W08-2125 riedel-meza-ruiz-2008-collective @@ -3193,7 +3193,7 @@ Parsing Syntactic and Semantic Dependencies with Two Single-Stage Maximum Entropy Models HaiZhao - ChunyuKit + ChunyuKit 203–207 W08-2127 zhao-kit-2008-parsing @@ -3201,7 +3201,7 @@ A Combined Memory-Based Semantic Role Labeler of <fixed-case>E</fixed-case>nglish RoserMorante - WalterDaelemans + WalterDaelemans VincentVan Asch 208–212 W08-2128 @@ -3210,7 +3210,7 @@ A Puristic Approach for Joint Dependency Parsing and Semantic Role Labeling AlexanderVolokh - GünterNeumann + GünterNeumann 213–217 W08-2129 volokh-neumann-2008-puristic @@ -3218,7 +3218,7 @@ Discriminative Learning of Syntactic and Semantic Dependencies LuLi - ShixiFan + ShixiFan XuanWang XiaolongWang 218–222 @@ -3239,16 +3239,16 @@ YotaroWatanabe MasakazuIwatate MasayukiAsahara - YujiMatsumoto + YujiMatsumoto 228–232 W08-2132 watanabe-etal-2008-pipeline Semantic Dependency Parsing using N-best Semantic Role Sequences and Roleset Information - Joo-YoungLee + Joo-YoungLee Han-CheolCho - Hae-ChangRim + Hae-ChangRim 233–237 W08-2133 lee-etal-2008-semantic @@ -3268,7 +3268,7 @@ The Integration of Dependency Relation Classification and Semantic Role Labeling Using Bilayer Maximum Entropy <fixed-case>M</fixed-case>arkov Models - WeiweiSun + WeiweiSun HongzhanLi ZhifangSui 243–247 @@ -3291,8 +3291,8 @@ Dependency Tree-based <fixed-case>SRL</fixed-case> with Proper Pruning and Extensive Feature Engineering HonglingWang HonglinWang - GuodongZhou - QiaomingZhu + GuodongZhou + QiaomingZhu 253–257 W08-2137 wang-etal-2008-dependency @@ -3329,7 +3329,7 @@ Semantics in Text Processing. STEP 2008 Conference Proceedings JohanBos - RodolfoDelmonte + RodolfoDelmonte College Publications 2008 step @@ -3340,7 +3340,7 @@ A New Life for Semantic Annotations? - HarryBunt + HarryBunt 1–2 W08-2201 bunt-2008-new @@ -3350,14 +3350,14 @@ PierpaoloBasile Marcode Gemmis PasqualeLops - GiovanniSemeraro + GiovanniSemeraro 5–16 W08-2202 basile-etal-2008-combining Semantic Representations of Syntactically Marked Discourse Status in Crosslinguistic Perspective - Emily M.Bender + Emily M.Bender DavidGoss-Grubbs 17–29 W08-2203 @@ -3365,7 +3365,7 @@ High Precision Analysis of <fixed-case>NP</fixed-case>s with a Deep Processing Grammar - AntónioBranco + AntónioBranco FranciscoCosta 31–43 W08-2204 @@ -3375,9 +3375,9 @@ Augmenting <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et for Deep Understanding of Text PeterClark ChristianeFellbaum - Jerry R.Hobbs - PhilHarrison - William R.Murray + Jerry R.Hobbs + PhilHarrison + William R.Murray JohnThompson 45–57 W08-2205 @@ -3393,7 +3393,7 @@ <fixed-case>K</fixed-case>now<fixed-case>N</fixed-case>et: A Proposal for Building Highly Connected and Dense Knowledge Bases from the <fixed-case>W</fixed-case>eb MontseCuadros - GermanRigau + GermanRigau 71–84 W08-2207 cuadros-rigau-2008-knownet-proposal @@ -3403,7 +3403,7 @@ DiegoDe Cao DaniloCroce MarcoPennacchiotti - RobertoBasili + RobertoBasili 85–101 W08-2208 de-cao-etal-2008-combining @@ -3411,7 +3411,7 @@ Answering Why-Questions in Closed Domains from a Discourse Model RodolfoDelmonte - EmanuelePianta + EmanuelePianta 103–114 W08-2209 delmonte-pianta-2008-answering @@ -3419,7 +3419,7 @@ Analyzing the Explanation Structure of Procedural Texts: Dealing with Advice and Warnings LionelFontan - PatrickSaint-Dizier + PatrickSaint-Dizier 115–127 W08-2210 fontan-saint-dizier-2008-analyzing @@ -3427,7 +3427,7 @@ From Predicting Predominant Senses to Local Context for Word Sense Disambiguation RobKoeling - DianaMcCarthy + DianaMcCarthy 129–138 W08-2211 koeling-mccarthy-2008-predicting @@ -3435,7 +3435,7 @@ Automatic Fine-Grained Semantic Classification for Domain Adaptation MariaLiakata - StephenPulman + StephenPulman 139–153 W08-2212 liakata-pulman-2008-automatic @@ -3452,14 +3452,14 @@ The Idiom–Reference Connection MarjorieMcShane - SergeiNirenburg + SergeiNirenburg 165–177 W08-2214 mcshane-nirenburg-2008-idiom Resolving Paraphrases to Support Modeling Language Perception in an Intelligent Agent - SergeiNirenburg + SergeiNirenburg MarjorieMcShane StephenBeale 179–192 @@ -3478,7 +3478,7 @@ Refining the Meaning of Sense Labels in <fixed-case>PDTB</fixed-case>: “Concession” LivioRobaldo EleniMiltsakaki - Jerry R.Hobbs + Jerry R.Hobbs 207–219 W08-2217 robaldo-etal-2008-refining @@ -3493,7 +3493,7 @@ Open Knowledge Extraction through Compositional Language Processing BenjaminVan Durme - LenhartSchubert + LenhartSchubert 239–254 W08-2219 van-durme-schubert-2008-open @@ -3508,7 +3508,7 @@ <fixed-case>B</fixed-case>oeing’s <fixed-case>NLP</fixed-case> System and the Challenges of Semantic Representation PeterClark - PhilHarrison + PhilHarrison 263–276 W08-2221 clark-harrison-2008-boeings @@ -3529,7 +3529,7 @@ <fixed-case>LXG</fixed-case>ram in the Shared Task “<fixed-case>C</fixed-case>omparing <fixed-case>S</fixed-case>emantic <fixed-case>R</fixed-case>epresentations” of <fixed-case>STEP</fixed-case> 2008 - AntónioBranco + AntónioBranco FranciscoCosta 299–314 W08-2224 @@ -3537,7 +3537,7 @@ Baseline Evaluation of <fixed-case>WSD</fixed-case> and Semantic Dependency in <fixed-case>O</fixed-case>nto<fixed-case>S</fixed-case>em - SergeiNirenburg + SergeiNirenburg StephenBeale MarjorieMcShane 315–326 @@ -3546,26 +3546,26 @@ The <fixed-case>T</fixed-case>ext<fixed-case>C</fixed-case>ap Semantic Interpreter - Charles B.Callaway + Charles B.Callaway 327–342 W08-2226 callaway-2008-textcap Deep Semantic Analysis of Text - James F.Allen - MarySwift - Willde Beaumont + James F.Allen + MarySwift + Willde Beaumont 343–354 W08-2227 allen-etal-2008-deep Textual Entailment as an Evaluation Framework for Metaphor Resolution: A Proposal - RodrigoAgerri - JohnBarnden - MarkLee - AlanWallington + RodrigoAgerri + JohnBarnden + MarkLee + AlanWallington 357–363 W08-2228 agerri-etal-2008-textual @@ -3583,8 +3583,8 @@ Addressing the Resource Bottleneck to Create Large-Scale Annotated Texts JonChamberlain - MassimoPoesio - UdoKruschwitz + MassimoPoesio + UdoKruschwitz 375–380 W08-2230 chamberlain-etal-2008-addressing @@ -3592,7 +3592,7 @@ A Resource-Poor Approach for Linking Ontology Classes to <fixed-case>W</fixed-case>ikipedia Articles NilsReiter - MatthiasHartung + MatthiasHartung AnetteFrank 381–387 W08-2231 @@ -3600,9 +3600,9 @@ Top-Down Cohesion Segmentation in Summarization - DoinaTatar + DoinaTatar Andreea DianaMihis - GabrielaSerban + GabrielaSerban 389–397 W08-2232 tatar-etal-2008-top @@ -3634,7 +3634,7 @@ Flexible Composition, Multiple Adjoining and Word Order Variation JoanChen-Main - Aravind K.Joshi + Aravind K.Joshi 9–16 W08-2302 chen-main-joshi-2008-flexible @@ -3657,9 +3657,9 @@ Compositional Semantics of Coordination using Synchronous <fixed-case>T</fixed-case>ree <fixed-case>A</fixed-case>djoining <fixed-case>G</fixed-case>rammar - Chung-hyeHan + Chung-hyeHan DavidPotter - Dennis R.Storoshenko + Dennis R.Storoshenko 33–40 W08-2305 han-etal-2008-compositional @@ -3697,14 +3697,14 @@ Synchronous Vector <fixed-case>TAG</fixed-case> for Syntax and Semantics: Control Verbs, Relative Clauses, and Inverse Linking RebeccaNesson - StuartShieber + StuartShieber 73–80 W08-2310 nesson-shieber-2008-synchronous The use of <fixed-case>MCTAG</fixed-case> to Process Elliptic Coordination - DjaméSeddah + DjaméSeddah 81–88 W08-2311 seddah-2008-use @@ -3751,8 +3751,8 @@ A Metagrammar for <fixed-case>V</fixed-case>ietnamese <fixed-case>LTAG</fixed-case> - PhươngLê Hồng - Thị Minh HuyềnNguyễn + PhươngLê Hồng + Thị Minh HuyềnNguyễn AzimRoussanaly 129–132 W08-2317 @@ -3761,7 +3761,7 @@ Is Coordination Quantification? KevinLerman - OwenRambow + OwenRambow 133–140 W08-2318 lerman-rambow-2008-coordination @@ -3769,15 +3769,15 @@ Feature Unification in <fixed-case>TAG</fixed-case> Derivation Trees SylvainSchmitz - JosephLe Roux + JosephLe Roux 141–148 W08-2319 schmitz-le-roux-2008-feature Reflexivity in <fixed-case>E</fixed-case>nglish: an <fixed-case>STAG</fixed-case> analysis - Dennis R.Storoshenko - Chung-hyeHan + Dennis R.Storoshenko + Chung-hyeHan DavidPotter 149–156 W08-2320 diff --git a/data/xml/W09.xml b/data/xml/W09.xml index 10eb054316..5f0a991e0c 100644 --- a/data/xml/W09.xml +++ b/data/xml/W09.xml @@ -4,7 +4,7 @@ Proceedings of the EACL 2009 Workshop on the Interaction between Linguistics and Computational Linguistics: Virtuous, Vicious or Vacuous? W09-01 - TimothyBaldwin + TimothyBaldwin ValiaKordoni Association for Computational Linguistics
Athens, Greece
@@ -18,14 +18,14 @@ Machine Translation and its Philosophical Accounts - SteliosPiperidis + SteliosPiperidis 1 W09-0101 piperidis-2009-machine The Annotation Conundrum - MarkLiberman + MarkLiberman 2 W09-0102 liberman-2009-annotation @@ -39,7 +39,7 @@ Computational Linguistics and Generative Linguistics: The Triumph of Hope over Experience - GeoffreyPullum + GeoffreyPullum 12–21 W09-0104 pullum-2009-computational @@ -53,14 +53,14 @@ Linguistically Naïve != Language Independent: Why <fixed-case>NLP</fixed-case> Needs Linguistic Typology - Emily M.Bender + Emily M.Bender 26–32 W09-0106 bender-2009-linguistically Parsed Corpora for Linguistics - Gertjanvan Noord + Gertjanvan Noord GosseBouma 33–39 W09-0107 @@ -75,7 +75,7 @@ What Do Computational Linguists Need to Know about Linguistics? - Robert C.Moore + Robert C.Moore 41–42 W09-0109 moore-2009-computational @@ -92,7 +92,7 @@ Proceedings of the Workshop on Geometrical Models of Natural Language Semantics W09-02 - RobertoBasili + RobertoBasili MarcoPennacchiotti Association for Computational Linguistics
Athens, Greece
@@ -106,7 +106,7 @@ One Distributional Memory, Many Semantic Spaces - MarcoBaroni + MarcoBaroni AlessandroLenci 1–8 W09-0201 @@ -123,7 +123,7 @@ Unsupervised Classification with Dependency Based Word Spaces KlausRothenhäusler - HinrichSchütze + HinrichSchütze 17–24 W09-0203 rothenhausler-schutze-2009-unsupervised @@ -131,15 +131,15 @@ A Study of Convolution Tree Kernel with Local Alignment LidanZhang - Kwok-PingChan + Kwok-PingChan 25–32 W09-0204 zhang-chan-2009-study <fixed-case>B</fixed-case>ag<fixed-case>P</fixed-case>ack: A General Framework to Represent Semantic Relations - AmaçHerdağdelen - MarcoBaroni + AmaçHerdağdelen + MarcoBaroni 33–40 W09-0205 herdagdelen-baroni-2009-bagpack @@ -148,14 +148,14 @@ Positioning for Conceptual Development using Latent Semantic Analysis FridolinWild BernhardHoisl - GastonBurek + GastonBurek 41–48 W09-0206 wild-etal-2009-positioning Semantic Similarity of Distractors in Multiple-Choice Tests: Extrinsic Evaluation - RuslanMitkov + RuslanMitkov Le AnHa AndreaVarga LuzRello @@ -166,7 +166,7 @@ Paraphrase Assessment in Structured Vector Space: Exploring Parameters and Datasets KatrinErk - SebastianPadó + SebastianPadó 57–65 W09-0208 erk-pado-2009-paraphrase @@ -174,7 +174,7 @@ <fixed-case>SVD</fixed-case> Feature Selection for Probabilistic Taxonomy Learning FrancescaFallucchi - Fabio MassimoZanzotto + Fabio MassimoZanzotto 66–73 W09-0209 fallucchi-zanzotto-2009-svd @@ -190,7 +190,7 @@ A Non-negative Tensor Factorization Model for Selectional Preference Induction - TimVan de Cruys + TimVan de Cruys 83–90 W09-0211 van-de-cruys-2009-non @@ -208,7 +208,7 @@ Handling Sparsity for Verb Noun <fixed-case>MWE</fixed-case> Token Classification - MonaDiab + MonaDiab MadhavKrishna 96–103 W09-0213 @@ -249,7 +249,7 @@ Content Analysis of Museum Documentation in a Transdisciplinary Perspective - GuentherGoerz + GuentherGoerz MartinScholz 1–9 W09-0301 @@ -259,7 +259,7 @@ An Intelligent Authoring Environment for Abstract Semantic Representations of Cultural Object Descriptions StasinosKonstantopoulos VangelisKarkaletsis - DimitrisBilidas + DimitrisBilidas 10–17 W09-0302 konstantopoulos-etal-2009-intelligent @@ -284,9 +284,9 @@ A Web-Enabled and Speech-Enhanced Parallel Corpus of <fixed-case>G</fixed-case>reek-<fixed-case>B</fixed-case>ulgarian Cultural Texts - VoulaGiouli + VoulaGiouli NikosGlaros - KirilSimov + KirilSimov PetyaOsenova 35–42 W09-0305 @@ -303,7 +303,7 @@ Applying <fixed-case>NLP</fixed-case> Technologies to the Collection and Enrichment of Language Data on the Web to Aid Linguistic Research FeiXia - WilliamLewis + WilliamLewis 51–59 W09-0307 xia-lewis-2009-applying @@ -311,7 +311,7 @@ Instance-Driven Discovery of Ontological Relation Labels Mariekevan Erp - Antalvan den Bosch + Antalvan den Bosch SanderWubben SteveHunt 60–68 @@ -357,8 +357,8 @@ Syntax-Oriented Evaluation Measures for Machine Translation Output - MajaPopović - HermannNey + MajaPopović + HermannNey 29–32 W09-0402 popovic-ney-2009-syntax @@ -366,7 +366,7 @@ A Simple Automatic <fixed-case>MT</fixed-case> Evaluation Metric PetrHomola - VladislavKuboň + VladislavKuboň PavelPecina 33–36 W09-0403 @@ -374,10 +374,10 @@ Machine Translation Evaluation with Textual Entailment Features - SebastianPadó + SebastianPadó MichelGalley - DanielJurafsky - Christopher D.Manning + DanielJurafsky + Christopher D.Manning 37–41 W09-0404 pado-etal-2009-machine @@ -386,7 +386,7 @@ Combining Multi-Engine Translations with <fixed-case>M</fixed-case>oses YuChen MichaelJellinghaus - AndreasEisele + AndreasEisele YiZhang SabineHunsicker SilkeTheison @@ -398,8 +398,8 @@ <fixed-case>CMU</fixed-case> System Combination for <fixed-case>WMT</fixed-case>‘09 - Almut SiljaHildebrand - StephanVogel + Almut SiljaHildebrand + StephanVogel 47–50 W09-0406 hildebrand-vogel-2009-cmu @@ -408,7 +408,7 @@ The <fixed-case>RWTH</fixed-case> System Combination System for <fixed-case>WMT</fixed-case> 2009 GregorLeusch EvgenyMatusov - HermannNey + HermannNey 51–55 W09-0407 leusch-etal-2009-rwth @@ -417,28 +417,28 @@ Machine Translation System Combination with Flexible Word Ordering KennethHeafield GregHanneman - AlonLavie + AlonLavie 56–60 W09-0408 heafield-etal-2009-machine Incremental Hypothesis Alignment with Flexible Matching for Building Confusion Networks: <fixed-case>BBN</fixed-case> System Description for <fixed-case>WMT</fixed-case>09 System Combination Task - Antti-VeikkoRosti + Antti-VeikkoRosti BingZhang SpyrosMatsoukas - RichardSchwartz + RichardSchwartz 61–65 W09-0409 rosti-etal-2009-incremental The <fixed-case>RWTH</fixed-case> Machine Translation System for <fixed-case>WMT</fixed-case> 2009 - MajaPopović + MajaPopović DavidVilar DanielStein EvgenyMatusov - HermannNey + HermannNey 66–69 W09-0410 popovic-etal-2009-rwth @@ -447,7 +447,7 @@ Translation Combination using Factored Word Substitution ChristianFedermann SilkeTheison - AndreasEisele + AndreasEisele HansUszkoreit YuChen MichaelJellinghaus @@ -458,7 +458,7 @@ <fixed-case>NUS</fixed-case> at <fixed-case>WMT</fixed-case>09: Domain Adaptation Experiments for <fixed-case>E</fixed-case>nglish-<fixed-case>S</fixed-case>panish Machine Translation of News Commentary Text - PreslavNakov + PreslavNakov Hwee TouNg 75–79 W09-0412 @@ -469,27 +469,27 @@ JanNiehues TeresaHerrmann MuntsinKolss - AlexWaibel + AlexWaibel 80–84 W09-0413 niehues-etal-2009-universitat The <fixed-case>TALP</fixed-case>-<fixed-case>UPC</fixed-case> Phrase-Based Translation System for <fixed-case>EACL</fixed-case>-<fixed-case>WMT</fixed-case> 2009 - José A. R.Fonollosa + José A. R.Fonollosa MaximKhalilov - Marta R.Costa-jussà - José B.Mariño - Carlos A.Henríquez Q. - AdolfoHernández H. - Rafael E.Banchs + Marta R.Costa-jussà + José B.Mariño + Carlos A.Henríquez Q. + AdolfoHernández H. + Rafael E.Banchs 85–89 W09-0414 fonollosa-etal-2009-talp Deep Linguistic Multilingual Translation and Bilingual Dictionaries - EricWehrli + EricWehrli LukaNerima YvesScherrer 90–94 @@ -509,7 +509,7 @@ <fixed-case>LIMSI</fixed-case>‘s Statistical Translation Systems for <fixed-case>WMT</fixed-case>‘09 AlexandreAllauzen - JosepCrego + JosepCrego AurélienMax FrançoisYvon 100–104 @@ -518,16 +518,16 @@ <fixed-case>NICT</fixed-case>@<fixed-case>WMT</fixed-case>09: Model Adaptation and Transliteration for <fixed-case>S</fixed-case>panish-<fixed-case>E</fixed-case>nglish <fixed-case>SMT</fixed-case> - MichaelPaul + MichaelPaul AndrewFinch - EiichiroSumita + EiichiroSumita 105–109 W09-0418 paul-etal-2009-nict Statistical Post Editing and Dictionary Extraction: <fixed-case>S</fixed-case>ystran/<fixed-case>E</fixed-case>dinburgh Submissions for <fixed-case>ACL</fixed-case>-<fixed-case>WMT</fixed-case>2009 - LoicDugast + LoicDugast JeanSenellart PhilippKoehn 110–114 @@ -536,7 +536,7 @@ Experiments in Morphosyntactic Processing for Translating to and from <fixed-case>G</fixed-case>erman - AlexanderFraser + AlexanderFraser 115–119 W09-0420 fraser-2009-experiments @@ -553,13 +553,13 @@ <fixed-case>E</fixed-case>nglish-<fixed-case>C</fixed-case>zech <fixed-case>MT</fixed-case> in 2008 - OndřejBojar + OndřejBojar DavidMareček VáclavNovák MartinPopel - JanPtáček + JanPtáček JanRouš - ZdeněkŽabokrtský + ZdeněkŽabokrtský 125–129 W09-0422 bojar-etal-2009-english @@ -578,12 +578,12 @@ <fixed-case>J</fixed-case>oshua: An Open Source Toolkit for Parsing-Based Machine Translation ZhifeiLi ChrisCallison-Burch - ChrisDyer - SanjeevKhudanpur + ChrisDyer + SanjeevKhudanpur LaneSchwartz WrenThornton JonathanWeese - OmarZaidan + OmarZaidan 135–139 W09-0424 li-etal-2009-joshua @@ -592,16 +592,16 @@ An Improved Statistical Transfer System for <fixed-case>F</fixed-case>rench-<fixed-case>E</fixed-case>nglish Machine Translation GregHanneman VamshiAmbati - Jonathan H.Clark + Jonathan H.Clark AlokParlikar - AlonLavie + AlonLavie 140–144 W09-0425 hanneman-etal-2009-improved The <fixed-case>U</fixed-case>niversity of <fixed-case>M</fixed-case>aryland Statistical Machine Translation System for the <fixed-case>F</fixed-case>ourth <fixed-case>W</fixed-case>orkshop on <fixed-case>M</fixed-case>achine <fixed-case>T</fixed-case>ranslation - ChrisDyer + ChrisDyer HendraSetiawan YuvalMarton PhilipResnik @@ -636,8 +636,8 @@ Thi-Ngoc-DiepDo Viet-BacLe BrigitteBigi - LaurentBesacier - EricCastelli + LaurentBesacier + EricCastelli 165–172 W09-0430 do-etal-2009-mining @@ -662,7 +662,7 @@ <fixed-case>C</fixed-case>hinese Syntactic Reordering for Adequate Generation of <fixed-case>K</fixed-case>orean Verbal Phrases in <fixed-case>C</fixed-case>hinese-to-<fixed-case>K</fixed-case>orean <fixed-case>SMT</fixed-case> Jin-JiLi JungiKim - Dong-IlKim + Dong-IlKim Jong-HyeokLee 190–196 W09-0433 @@ -671,7 +671,7 @@ A Quantitative Analysis of Reordering Phenomena AlexandraBirch - PhilBlunsom + PhilBlunsom MilesOsborne 197–205 W09-0434 @@ -687,9 +687,9 @@ Disambiguating “<fixed-case>DE</fixed-case>” for <fixed-case>C</fixed-case>hinese-<fixed-case>E</fixed-case>nglish Machine Translation - Pi-ChuanChang - DanielJurafsky - Christopher D.Manning + Pi-ChuanChang + DanielJurafsky + Christopher D.Manning 215–223 W09-0436 chang-etal-2009-disambiguating @@ -707,9 +707,9 @@ A Deep Learning Approach to Machine Transliteration ThomasDeselaers - SašaHasan + SašaHasan OliverBender - HermannNey + HermannNey 233–241 W09-0438 deselaers-etal-2009-deep @@ -724,18 +724,18 @@ On the Robustness of Syntactic and Semantic Features for Automatic <fixed-case>MT</fixed-case> Evaluation - JesúsGiménez - LluísMàrquez + JesúsGiménez + LluísMàrquez 250–258 W09-0440 gimenez-marquez-2009-robustness Fluency, Adequacy, or <fixed-case>HTER</fixed-case>? <fixed-case>E</fixed-case>xploring Different Human Judgments with a Tunable <fixed-case>MT</fixed-case> Metric - MatthewSnover + MatthewSnover NitinMadnani - BonnieDorr - RichardSchwartz + BonnieDorr + RichardSchwartz 259–268 W09-0441 snover-etal-2009-fluency @@ -776,7 +776,7 @@ JohannesMatiasek JeremyJancsary AlexandraKlein - HaraldTrost + HaraldTrost 19–25 W09-0503 matiasek-etal-2009-identifying @@ -802,14 +802,14 @@ Predicting Concept Types in User Corrections in Dialog SvetlanaStoyanchev - AmandaStent + AmandaStent 42–49 W09-0506 stoyanchev-stent-2009-predicting Deeper Spoken Language Understanding for Man-Machine Dialogue on Broader Application Domains: A Logical Alternative to Concept Spotting - JeanneVillaneau + JeanneVillaneau Jean-YvesAntoine 50–57 W09-0507 @@ -818,7 +818,7 @@ An Integrated Approach to Robust Processing of Situated Spoken Dialogue PierreLison - Geert-Jan M.Kruijff + Geert-Jan M.Kruijff 58–65 W09-0508 lison-kruijff-2009-integrated @@ -845,8 +845,8 @@ Proceedings of the 12th European Workshop on Natural Language Generation (ENLG 2009) W09-06 - EmielKrahmer - MariëtTheune + EmielKrahmer + MariëtTheune Association for Computational Linguistics
Athens, Greece
March @@ -864,7 +864,7 @@ NormanAlm RolfBlack MartinDempster - AnnaluWaller + AnnaluWaller 1–8 W09-0601 reiter-etal-2009-using @@ -878,7 +878,7 @@
System Building Cost vs. Output Quality in Data-to-Text Generation - AnjaBelz + AnjaBelz EricKow 16–24 W09-0603 @@ -889,7 +889,7 @@ ErwinMarsi EmielKrahmer IrisHendrickx - WalterDaelemans + WalterDaelemans 25–32 W09-0604 marsi-etal-2009-sentence @@ -938,7 +938,7 @@ DanielDionne Salvadorde la Puente CarlosLeón - PabloGervás + PabloGervás RaquelHervás 66–73 W09-0610 @@ -946,7 +946,7 @@ Learning Lexical Alignment Policies for Generating Referring Expressions for Spoken Dialogue Systems - SrinivasanJanarthanam + SrinivasanJanarthanam OliverLemon 74–81 W09-0611 @@ -971,7 +971,7 @@ A <fixed-case>W</fixed-case>izard-of-<fixed-case>O</fixed-case>z Environment to Study Referring Expression Generation in a Situated Spoken Dialogue Task - SrinivasanJanarthanam + SrinivasanJanarthanam OliverLemon 94–97 W09-0614 @@ -980,8 +980,8 @@ A Hearer-Oriented Evaluation of Referring Expression Generation Imtiaz HussainKhan - Keesvan Deemter - GraemeRitchie + Keesvan Deemter + GraemeRitchie AlbertGatt Alexandra A.Cleland 98–101 @@ -1016,7 +1016,7 @@ The Effect of Linguistic Devices in Information Presentation Messages on Recall and Comprehension Martin I.Tietze AndiWinterboer - JohannaMoore + JohannaMoore 114–117 W09-0619 tietze-etal-2009-effect @@ -1032,7 +1032,7 @@ Clustering and Matching Headlines for Automatic Paraphrase Acquisition SanderWubben - Antalvan den Bosch + Antalvan den Bosch EmielKrahmer ErwinMarsi 122–125 @@ -1042,8 +1042,8 @@ A Situated Context Model for Resolution and Generation of Referring Expressions HendrikZender - Geert-Jan M.Kruijff - IvanaKruijff-Korbayová + Geert-Jan M.Kruijff + IvanaKruijff-Korbayová 126–129 W09-0622 zender-etal-2009-situated @@ -1068,21 +1068,21 @@ Towards Empirical Evaluation of Affective Tactical <fixed-case>NLG</fixed-case> Ielkavan der Sluis - ChrisMellish + ChrisMellish 146–153 W09-0625 van-der-sluis-mellish-2009-towards What Game Theory Can Do for <fixed-case>NLG</fixed-case>: The Case of Vague Language (Invited Talk) - Keesvan Deemter + Keesvan Deemter 154–161 W09-0626 van-deemter-2009-game Generation <fixed-case>C</fixed-case>hallenges 2009: Preface - AnjaBelz + AnjaBelz AlbertGatt 162–164 W09-0627 @@ -1090,13 +1090,13 @@ Report on the <fixed-case>F</fixed-case>irst <fixed-case>NLG</fixed-case> <fixed-case>C</fixed-case>hallenge on <fixed-case>G</fixed-case>enerating <fixed-case>I</fixed-case>nstructions in <fixed-case>V</fixed-case>irtual <fixed-case>E</fixed-case>nvironments (<fixed-case>GIVE</fixed-case>) - DonnaByron + DonnaByron AlexanderKoller KristinaStriegnitz JustineCassell RobertDale - JohannaMoore - JonOberlander + JohannaMoore + JonOberlander 165–173 W09-0628 byron-etal-2009-report @@ -1104,7 +1104,7 @@ The <fixed-case>TUNA</fixed-case>-<fixed-case>REG</fixed-case> <fixed-case>C</fixed-case>hallenge 2009: Overview and Evaluation Results AlbertGatt - AnjaBelz + AnjaBelz EricKow 174–182 W09-0629 @@ -1130,7 +1130,7 @@ Evolutionary and Case-Based Approaches to <fixed-case>REG</fixed-case>: <fixed-case>NIL</fixed-case>-<fixed-case>UCM</fixed-case>-<fixed-case>E</fixed-case>vo<fixed-case>TAP</fixed-case>, <fixed-case>NIL</fixed-case>-<fixed-case>UCM</fixed-case>-<fixed-case>V</fixed-case>alues<fixed-case>CBR</fixed-case> and <fixed-case>NIL</fixed-case>-<fixed-case>UCM</fixed-case>-<fixed-case>E</fixed-case>vo<fixed-case>CBR</fixed-case> RaquelHervás - PabloGervás + PabloGervás 187–188 W09-0632 hervas-gervas-2009-evolutionary @@ -1138,7 +1138,7 @@ <fixed-case>USP</fixed-case>-<fixed-case>EACH</fixed-case>: Improved Frequency-based Greedy Attribute Selection Diego Jesusde Lucena - IvandréParaboni + IvandréParaboni 189–190 W09-0633 de-lucena-paraboni-2009-usp @@ -1158,12 +1158,12 @@ Proceedings of the First Workshop on Language Technologies for African Languages W09-07 - LoriLevin + LoriLevin JohnKiango - JudithKlavans + JudithKlavans GuyDe Pauw Gilles-Mauricede Schryver - Peter WaiganjoWagacha + Peter WaiganjoWagacha Association for Computational Linguistics
Athens, Greece
March @@ -1218,7 +1218,7 @@
Using Technology Transfer to Advance Automatic Lemmatisation for <fixed-case>S</fixed-case>etswana - Hendrik JohannesGroenewald + Hendrik JohannesGroenewald 32–37 W09-0705 groenewald-2009-using @@ -1228,7 +1228,7 @@ GertrudFaaß UlrichHeid ElsabéTaljard - DaniePrinsloo + DaniePrinsloo 38–45 W09-0706 faass-etal-2009-part @@ -1276,7 +1276,7 @@ Towards an Electronic Dictionary of <fixed-case>T</fixed-case>amajaq Language in <fixed-case>N</fixed-case>iger - ChantalEnguehard + ChantalEnguehard IssoufModi 81–88 W09-0712 @@ -1293,14 +1293,14 @@ Exploiting Cross-Linguistic Similarities in <fixed-case>Z</fixed-case>ulu and <fixed-case>X</fixed-case>hosa Computational Morphology LaurettePretorius - SonjaBosch + SonjaBosch 96–103 W09-0714 pretorius-bosch-2009-exploiting Methods for <fixed-case>A</fixed-case>mharic Part-of-Speech Tagging - BjörnGambäck + BjörnGambäck FredrikOlsson AtelachAlemu Argaw LarsAsker @@ -1310,7 +1310,7 @@ An Ontology for Accessing Transcription Systems (<fixed-case>OATS</fixed-case>) - StevenMoran + StevenMoran 112–120 W09-0716 moran-2009-ontology @@ -1320,7 +1320,7 @@ Proceedings of the EACL 2009 Workshop on Computational Approaches to Semitic Languages W09-08 - MikeRosner + MikeRosner ShulyWintner Association for Computational Linguistics
Athens, Greece
@@ -1341,7 +1341,7 @@
The <fixed-case>K</fixed-case>aramel System and <fixed-case>S</fixed-case>emitic Languages: Structured Multi-Tiered Morphology - FrançoisBarthélemy + FrançoisBarthélemy 10–18 W09-0802 barthelemy-2009-karamel @@ -1375,7 +1375,7 @@ Automatic Treebank-Based Acquisition of <fixed-case>A</fixed-case>rabic <fixed-case>LFG</fixed-case> Dependency Structures LamiaTounsi MohammedAttia - Josefvan Genabith + Josefvan Genabith 45–52 W09-0806 tounsi-etal-2009-automatic @@ -1383,7 +1383,7 @@ Spoken <fixed-case>A</fixed-case>rabic Dialect Identification Using Phonotactic Modeling FadiBiadsy - JuliaHirschberg + JuliaHirschberg NizarHabash 53–61 W09-0807 @@ -1486,7 +1486,7 @@ Proceedings of the EACL 2009 Workshop on Computational Linguistic Aspects of Grammatical Inference W09-10 - Mennovan Zaanen + Mennovan Zaanen Colinde la Higuera Association for Computational Linguistics
Athens, Greece
@@ -1508,7 +1508,7 @@
On Bootstrapping of Linguistic Features for Bootstrapping Grammars - DamirĆavar + DamirĆavar 5–6 W09-1002 cavar-2009-bootstrapping @@ -1531,7 +1531,7 @@ <fixed-case>GREAT</fixed-case>: A Finite-State Machine Translation Toolkit Implementing a Grammatical Inference Approach for Transducer Inference (<fixed-case>GIATI</fixed-case>) JorgeGonzález - FranciscoCasacuberta + FranciscoCasacuberta 24–32 W09-1005 gonzalez-casacuberta-2009-great @@ -1555,9 +1555,9 @@ On Statistical Parsing of <fixed-case>F</fixed-case>rench with Supervised and Semi-Supervised Strategies - MarieCandito - BenoitCrabbé - DjaméSeddah + MarieCandito + BenoitCrabbé + DjaméSeddah 49–57 candito-etal-2009-statistical @@ -1602,7 +1602,7 @@ Modeling Word Learning As Communicative Inference - Michael C.Frank + Michael C.Frank 2 W09-1102 frank-2009-modeling @@ -1626,7 +1626,7 @@ A Metalearning Approach to Processing the Scope of Negation RoserMorante - WalterDaelemans + WalterDaelemans 21–29 W09-1105 morante-daelemans-2009-metalearning @@ -1642,7 +1642,7 @@ A Method for Stopping Active Learning Based on Stabilizing Predictions and the Need for User-Adjustable Stopping MichaelBloodgood - K.Vijay-Shanker + K.Vijay-Shanker 39–47 W09-1107 bloodgood-vijay-shanker-2009-method @@ -1673,8 +1673,8 @@ Mining the Web for Reciprocal Relationships - MichaelPaul - RoxanaGirju + MichaelPaul + RoxanaGirju ChenLi 75–83 W09-1111 @@ -1701,9 +1701,9 @@ <fixed-case>M</fixed-case>onte <fixed-case>C</fixed-case>arlo inference and maximization for phrase-based translation AbhishekArun - ChrisDyer + ChrisDyer BarryHaddow - PhilBlunsom + PhilBlunsom AdamLopez PhilippKoehn 102–110 @@ -1713,7 +1713,7 @@ Investigating Automatic Alignment Methods for Slide Generation from Academic Papers BrandonBeamer - RoxanaGirju + RoxanaGirju 111–119 W09-1115 beamer-girju-2009-investigating @@ -1746,7 +1746,7 @@ Design Challenges and Misconceptions in Named Entity Recognition - LevRatinov + LevRatinov DanRoth 147–155 W09-1119 @@ -1770,7 +1770,7 @@ Lexical Patterns or Dependency Patterns: Which Is Better for Hypernym Extraction? - ErikTjong Kim Sang + ErikTjong Kim Sang KatjaHofmann 174–182 W09-1122 @@ -1780,23 +1780,23 @@ Improving Text Classification by a Sense Spectrum Approach to Term Expansion PeterWittek SándorDarányi - Chew LimTan + Chew LimTan 183–191 W09-1123 wittek-etal-2009-improving A simple feature-copying approach for long-distance dependencies - MarcVilain + MarcVilain JonathanHuggins - BenWellner + BenWellner 192–200 W09-1124 vilain-etal-2009-simple Fine-Grained Classification of Named Entities Exploiting Latent Semantic Kernels - ClaudioGiuliano + ClaudioGiuliano 201–209 W09-1125 giuliano-2009-fine @@ -1804,7 +1804,7 @@ Using Encyclopedic Knowledge for Automatic Topic Identification KinoCoursey - RadaMihalcea + RadaMihalcea WilliamMoen 210–218 W09-1126 @@ -1823,7 +1823,7 @@ Proceedings of the Thirteenth Conference on Computational Natural Language Learning (CoNLL 2009): Shared Task W09-12 - JanHajič + JanHajič Association for Computational Linguistics
Boulder, Colorado
June @@ -1840,11 +1840,11 @@ MassimilianoCiaramita RichardJohansson DaisukeKawahara - Maria AntòniaMartí - LluísMàrquez - AdamMeyers + Maria AntòniaMartí + LluísMàrquez + AdamMeyers JoakimNivre - SebastianPadó + SebastianPadó JanŠtěpánek PavelStraňák MihaiSurdeanu @@ -1867,7 +1867,7 @@ Joint Memory-Based Learning of Syntactic and Semantic Dependencies in Multiple Languages RoserMorante VincentVan Asch - Antalvan den Bosch + Antalvan den Bosch 25–30 W09-1203 morante-etal-2009-joint @@ -1884,7 +1884,7 @@ A Latent Variable Model of Synchronous Syntactic-Semantic Parsing for Multiple Languages AndreaGesmundo - JamesHenderson + JamesHenderson PaolaMerlo IvanTitov 37–42 @@ -1916,8 +1916,8 @@ Multilingual Dependency Learning: A Huge Feature Engineering Method to Semantic Dependency Parsing HaiZhao WenliangChen - ChunyuKit - GuodongZhou + ChunyuKit + GuodongZhou 55–60 W09-1208 zhao-etal-2009-multilingual @@ -1926,7 +1926,7 @@ Multilingual Dependency Learning: Exploiting Rich Features for Tagging Syntactic and Semantic Dependencies HaiZhao WenliangChen - Jun’ichiKazama + Jun’ichiKazama KiyotakaUchimoto KentaroTorisawa 61–66 @@ -1954,14 +1954,14 @@ A Second-Order Joint Eisner Model for Syntactic and Semantic Dependency Parsing XavierLluís StefanBott - LluísMàrquez + LluísMàrquez 79–84 W09-1212 lluis-etal-2009-second Multilingual Semantic Role Labelling with <fixed-case>M</fixed-case>arkov <fixed-case>L</fixed-case>ogic - IvanMeza-Ruiz + IvanMeza-Ruiz SebastianRiedel 85–90 W09-1213 @@ -1978,7 +1978,7 @@ Parsing Syntactic and Semantic Dependencies for Multiple Languages with A Pipeline Approach HanRen - DonghongJi + DonghongJi JingWan MingyaoZhang 97–102 @@ -2007,14 +2007,14 @@ Multilingual Syntactic-Semantic Dependency Parsing with Three-Stage Approximate Max-Margin Linear Models YotaroWatanabe MasayukiAsahara - YujiMatsumoto + YujiMatsumoto 114–119 W09-1218 watanabe-etal-2009-multilingual A Simple Generative Pipeline Approach to Dependency Parsing and Semantic Role Labeling - DanielZeman + DanielZeman 120–125 W09-1219 zeman-2009-simple @@ -2024,12 +2024,12 @@ Proceedings of the BioNLP 2009 Workshop W09-13 - K. BretonnelCohen + K. BretonnelCohen DinaDemner-Fushman SophiaAnaniadou - JohnPestian - Jun’ichiTsujii - BonnieWebber + JohnPestian + Jun’ichiTsujii + BonnieWebber Association for Computational Linguistics
Boulder, Colorado
June @@ -2052,11 +2052,11 @@
Distinguishing Historical from Current Problems in Clinical Reports – Which Textual Features Help? - DanielleMowery + DanielleMowery HenkHarkema JohnDowling JonathanLustgarten - WendyChapman + WendyChapman 10–18 W09-1302 mowery-etal-2009-distinguishing @@ -2067,7 +2067,7 @@ HenkHarkema PeterHaug JeannieIrwin - WendyChapman + WendyChapman 19–27 W09-1303 christensen-etal-2009-onyx @@ -2075,7 +2075,7 @@ Learning the Scope of Hedge Cues in Biomedical Texts RoserMorante - WalterDaelemans + WalterDaelemans 28–36 W09-1304 morante-daelemans-2009-learning @@ -2085,8 +2085,8 @@ UdoHahn KatrinTomanek EkaterinaBuyko - Jung-jaeKim - DietrichRebholz-Schuhmann + Jung-jaeKim + DietrichRebholz-Schuhmann 37–45 W09-1305 hahn-etal-2009-feasible @@ -2094,7 +2094,7 @@ Extraction of Named Entities from Tables in Gene Mutation Literature WernWong - DavidMartinez + DavidMartinez LawrenceCavedon 46–54 W09-1306 @@ -2123,7 +2123,7 @@ MarkStevenson YikunGuo AbdulazizAlamri - RobertGaizauskas + RobertGaizauskas 71–79 W09-1309 stevenson-etal-2009-disambiguation @@ -2139,7 +2139,7 @@ Exploring Graph Structure for Detection of Reliability Zones within Synonym Resources: Experiment with the Gene Ontology - ThierryHamon + ThierryHamon NataliaGrabar 89–96 W09-1311 @@ -2196,7 +2196,7 @@ Identifying Interaction Sentences from Biological Literature Using Automatically Extracted Patterns HaibinLiu ChristianBlouin - VladoKešelj + VladoKešelj 133–141 W09-1317 liu-etal-2009-identifying @@ -2212,7 +2212,7 @@ Exploring Two Biomedical Text Genres for Disease Recognition - AurélieNévéol + AurélieNévéol WonKim W. JohnWilbur ZhiyongLu @@ -2261,7 +2261,7 @@ YasuhideMiura MasatsuguTonoike TomokoOhkuma - HiroshiMashuichi + HiroshiMashuichi KazuhikoOhe 185–192 W09-1324 @@ -2281,7 +2281,7 @@ Proceedings of the BioNLP 2009 Workshop Companion Volume for Shared Task W09-14 - Jun’ichiTsujii + Jun’ichiTsujii Association for Computational Linguistics
Boulder, Colorado
June @@ -2339,7 +2339,7 @@ AndreasVlachos PaulaButtery DiarmuidÓ Séaghdha - TedBriscoe + TedBriscoe 37–40 W09-1405 vlachos-etal-2009-biomedical @@ -2356,14 +2356,14 @@
High-precision biological event extraction with a concept recognizer - K. BretonnelCohen - KarinVerspoor - HelenJohnson - ChrisRoeder - PhilipOgren - WilliamBaumgartner + K. BretonnelCohen + KarinVerspoor + HelenJohnson + ChrisRoeder + PhilipOgren + WilliamBaumgartner ElizabethWhite - LawrenceHunter + LawrenceHunter 50–58 W09-1407 cohen-etal-2009-high @@ -2372,7 +2372,7 @@ A memory-based learning approach to event extraction in biomedical texts RoserMorante VincentVan Asch - WalterDaelemans + WalterDaelemans 59–67 W09-1408 morante-etal-2009-memory @@ -2380,7 +2380,7 @@ Extraction of biomedical events using case-based reasoning MarianaNeves - José-MaríaCarazo + José-MaríaCarazo AlbertoPascual-Montano 68–76 W09-1409 @@ -2388,9 +2388,9 @@ Biomedical Event Annotation with <fixed-case>CRF</fixed-case>s and Precision Grammars - AndrewMacKinlay - DavidMartinez - TimothyBaldwin + AndrewMacKinlay + DavidMartinez + TimothyBaldwin 77–85 W09-1410 mackinlay-etal-2009-biomedical @@ -2403,7 +2403,7 @@ LuisTari AstridRheinländer NguyenQuang Long - GracielaGonzalez + GracielaGonzalez UlfLeser 86–94 W09-1411 @@ -2415,7 +2415,7 @@ KuzmanGanchev VassilMomchev DeyanPeychev - PreslavNakov + PreslavNakov AngusRoberts 95–98 W09-1412 @@ -2433,7 +2433,7 @@ From Protein-Protein Interaction to Molecular Event Extraction - RuneSætre + RuneSætre MakotoMiwa KazuhiroYoshida Jun’ichiTsujii @@ -2446,17 +2446,17 @@ Hyoung-GyuLee Han-CheolCho Min-JeongKim - Joo-YoungLee + Joo-YoungLee GumwonHong - Hae-ChangRim + Hae-ChangRim 107–110 W09-1415 lee-etal-2009-multi Supervised Classification for Extracting Biomedical Events - ArzucanÖzgür - DragomirRadev + ArzucanÖzgür + DragomirRadev 111–114 W09-1416 ozgur-radev-2009-supervised @@ -2468,7 +2468,7 @@ RezaMohammadi JonathanDickerson DavidRobertson - GoranNenadic + GoranNenadic 115–118 W09-1417 sarafraz-etal-2009-biomedical @@ -2494,7 +2494,7 @@ Exploring ways beyond the simple supervised learning approach for biological event extraction GyörgyMóra - RichárdFarkas + RichárdFarkas GyörgySzarvas ZsoltMolnár 137–140 @@ -2506,7 +2506,7 @@ Proceedings of the Workshop on Software Engineering, Testing, and Quality Assurance for Natural Language Processing (SETQA-NLP 2009) W09-15 - Kevin BretonnelCohen + Kevin BretonnelCohen MarcLight Association for Computational Linguistics
Boulder, Colorado
@@ -2520,7 +2520,7 @@ Building Test Suites for <fixed-case>UIMA</fixed-case> Components - PhilipOgren + PhilipOgren StevenBethard 1–4 W09-1501 @@ -2529,15 +2529,15 @@ Context-Dependent Regression Testing for Natural Language Processing ElaineFarrow - Myroslava O.Dzikovska + Myroslava O.Dzikovska 5–13 W09-1502 farrow-dzikovska-2009-context Using Paraphrases of Deep Semantic Representions to Support Regression Testing in Spoken Dialogue Systems - Beth AnnHockey - MannyRayner + Beth AnnHockey + MannyRayner 14–21 W09-1503 hockey-rayner-2009-using @@ -2547,7 +2547,7 @@ YoshinobuKano LukeMcCrohon SophiaAnaniadou - Jun’ichiTsujii + Jun’ichiTsujii 22–30 W09-1504 kano-etal-2009-integrated @@ -2563,7 +2563,7 @@ Scaling up a <fixed-case>NLU</fixed-case> system from text to dialogue understanding - RodolfoDelmonte + RodolfoDelmonte AntonellaBristot GloriaVoltolina VincenzoPallotta @@ -2573,7 +2573,7 @@ Towards Agile and Test-Driven Development in <fixed-case>NLP</fixed-case> Applications - JanaSukkarieh + JanaSukkarieh JyotiKamal 42–44 W09-1507 @@ -2582,21 +2582,21 @@ Grammar Engineering for <fixed-case>CCG</fixed-case> using Ant and <fixed-case>XSLT</fixed-case> ScottMartin - RajakrishnanRajkumar - MichaelWhite + RajakrishnanRajkumar + MichaelWhite 45–46 W09-1508 martin-etal-2009-grammar Web Service Integration for Next Generation Localisation - DavidLewis + DavidLewis StephenCurran KevinFeeney ZoharEtzioni JohnKeeney AndyWay - ReinhardSchäler + ReinhardSchäler 47–55 W09-1509 lewis-etal-2009-web @@ -2630,12 +2630,12 @@ Proceedings of the Third International Workshop on Cross Lingual Information Access: Addressing the Information Need of Multilingual Societies (CLIAWS3) W09-16 - SivajiBandyopadhyay - PushpakBhattacharyya + SivajiBandyopadhyay + PushpakBhattacharyya VasudevaVarma SudeshnaSarkar - AKumaran - RaghavendraUdupa + AKumaran + RaghavendraUdupa Association for Computational Linguistics
Boulder, Colorado
June @@ -2648,7 +2648,7 @@ Cross-Language Information Access: Looking Backward, Looking Forward - Douglas W.Oard + Douglas W.Oard 1–2 W09-1601 oard-2009-cross @@ -2656,7 +2656,7 @@ Speech Retrieval in Unknown Languages: a Pilot Study XiaodanZhuang - Jui TingHuang + Jui TingHuang MarkHasegawa-Johnson 3–11 W09-1602 @@ -2674,7 +2674,7 @@ Cross-lingual Alignment and Completion of <fixed-case>W</fixed-case>ikipedia Templates GosseBouma SergioDuarte - ZahurulIslam + ZahurulIslam 21–29 W09-1604 bouma-etal-2009-cross @@ -2705,8 +2705,8 @@ An Approach to Text Summarization. - SankarK - SobhaL + SankarK + SobhaL 53–60 W09-1608 k-l-2009-approach @@ -2714,7 +2714,7 @@ <fixed-case>NE</fixed-case> Tagging for <fixed-case>U</fixed-case>rdu based on Bootstrap <fixed-case>POS</fixed-case> Learning SmruthiMukund - Rohini K.Srihari + Rohini K.Srihari 61–69 W09-1609 mukund-srihari-2009-ne @@ -2725,7 +2725,7 @@ Proceedings of the Workshop on Unsupervised and Minimally Supervised Learning of Lexical Semantics W09-17 SureshManandhar - Ioannis P.Klapaftis + Ioannis P.Klapaftis Association for Computational Linguistics
Boulder, Colorado, USA
June @@ -2738,7 +2738,7 @@ Acquiring Applicable Common Sense Knowledge from the Web - Hansen A.Schwartz + Hansen A.Schwartz FernandoGomez 1–9 W09-1701 @@ -2755,7 +2755,7 @@ Corpus-based Semantic Lexicon Induction with Web-based Corroboration SeanIgo - EllenRiloff + EllenRiloff 18–26 W09-1703 igo-riloff-2009-corpus @@ -2778,15 +2778,15 @@ Combining Syntactic Co-occurrences and Nearest Neighbours in Distributional Methods to Remedy Data Sparseness. - Lonnekevan der Plas + Lonnekevan der Plas 45–53 W09-1706 van-der-plas-2009-combining Using <fixed-case>DEDICOM</fixed-case> for Completely Unsupervised Part-of-Speech Tagging - PeterChew - BrettBader + PeterChew + BrettBader AllaRozovskaya 54–62 W09-1707 @@ -2811,16 +2811,16 @@ Summarization with a Joint Model for Sentence Extraction and Compression - AndréMartins - Noah A.Smith + AndréMartins + Noah A.Smith 1–9 W09-1801 martins-smith-2009-summarization A Scalable Global Model for Summarization - DanGillick - BenoitFavre + DanGillick + BenoitFavre 10–18 W09-1802 gillick-favre-2009-scalable @@ -2844,7 +2844,7 @@ A Constraint Programming Approach to Probabilistic Syntactic Processing - IreneLangkilde-Geary + IreneLangkilde-Geary 36–37 W09-1805 langkilde-geary-2009-constraint @@ -2854,7 +2854,7 @@ Proceedings of the NAACL HLT 2009 Workshop on Active Learning for Natural Language Processing W09-19 - EricRingger + EricRingger RobbieHaertel KatrinTomanek Association for Computational Linguistics @@ -2879,7 +2879,7 @@ KatrinTomanek FlorianLaws UdoHahn - HinrichSchütze + HinrichSchütze 9–17 W09-1902 tomanek-etal-2009-proper @@ -2887,8 +2887,8 @@ Estimating Annotation Cost for Active Learning in a Multi-Annotator Environment ShilpaArora - EricNyberg - Carolyn P.Rosé + EricNyberg + Carolyn P.Rosé 18–26 W09-1903 arora-etal-2009-estimating @@ -2930,7 +2930,7 @@ Proactive Learning for Building Machine Translation Systems for Minority Languages VamshiAmbati - JaimeCarbonell + JaimeCarbonell 58–61 W09-1908 ambati-carbonell-2009-proactive @@ -2941,7 +2941,7 @@ Proceedings of the Workshop on Computational Approaches to Linguistic Creativity W09-20 AnnaFeldman - BirteLoenneker-Rodman + BirteLoenneker-Rodman Association for Computational Linguistics
Boulder, Colorado
June @@ -2965,7 +2965,7 @@ Topic Model Analysis of Metaphor Frequency for Psycholinguistic Stimuli StevenBethard Vicky TzuyinLai - James H.Martin + James H.Martin 9–16 W09-2002 bethard-etal-2009-topic @@ -2987,7 +2987,7 @@
<fixed-case>G</fixed-case>aiku : Generating Haiku with Word Associations Norms - YaelNetzer + YaelNetzer DavidGabay YoavGoldberg MichaelElhadad @@ -2997,9 +2997,9 @@ Automatic Generation of <fixed-case>T</fixed-case>amil Lyrics for Melodies - AnanthRamakrishnan A - SankarKuppan - SobhaLalitha Devi + AnanthRamakrishnan A + SankarKuppan + SobhaLalitha Devi 40–46 W09-2006 ramakrishnan-a-etal-2009-automatic @@ -3044,7 +3044,7 @@ How Creative is Your Writing? - XiaojinZhu + XiaojinZhu ZhitingXu TusharKhot 87–93 @@ -3064,7 +3064,7 @@ Proceedings of the Fourth Workshop on Innovative Use of NLP for Building Educational Applications W09-21 - JoelTetreault + JoelTetreault JillBurstein ClaudiaLeacock Association for Computational Linguistics @@ -3081,7 +3081,7 @@ Automated Assessment of Spoken <fixed-case>M</fixed-case>odern <fixed-case>S</fixed-case>tandard <fixed-case>A</fixed-case>rabic JianCheng JaredBernstein - UlrikePado + UlrikePado MasanoriSuzuki 1–9 W09-2101 @@ -3098,19 +3098,19 @@ Inferring Tutorial Dialogue Structure with Hidden <fixed-case>M</fixed-case>arkov Modeling - Kristy ElizabethBoyer - Eun YoungHa - RobertPhillips + Kristy ElizabethBoyer + Eun YoungHa + RobertPhillips MichaelWallis MladenVouk - JamesLester + JamesLester 19–26 W09-2103 boyer-etal-2009-inferring A New Yardstick and Tool for Personalized Vocabulary Building - ThomasLandauer + ThomasLandauer KirillKireyev CharlesPanaccione 27–33 @@ -3119,12 +3119,12 @@ Supporting the Adaptation of Texts for Poor Literacy Readers: a Text Simplification Editor for <fixed-case>B</fixed-case>razilian <fixed-case>P</fixed-case>ortuguese - ArnaldoCandido - ErickMaziero + ArnaldoCandido + ErickMaziero LuciaSpecia CarolineGasperin ThiagoPardo - SandraAluisio + SandraAluisio 34–42 W09-2105 candido-etal-2009-supporting @@ -3158,7 +3158,7 @@ <fixed-case>KSC</fixed-case>-<fixed-case>P</fixed-case>a<fixed-case>L</fixed-case>: A Peer Learning Agent that Encourages Students to take the Initiative CynthiaKersey BarbaraDi Eugenio - PamelaJordan + PamelaJordan SandraKatz 55–63 W09-2109 @@ -3212,7 +3212,7 @@ AndrewCarlson JustinBetteridge Estevam RafaelHruschka Junior - Tom M.Mitchell + Tom M.Mitchell 1–9 W09-2201 carlson-etal-2009-coupling @@ -3220,15 +3220,15 @@ Surrogate Learning - From Feature Independence to Semi-Supervised Classification SriharshaVeeramachaneni - Ravi KumarKondadadi + Ravi KumarKondadadi 10–18 W09-2202 veeramachaneni-kondadadi-2009-surrogate Keepin’ It Real: Semi-Supervised Learning with Realistic Tuning - Andrew B.Goldberg - XiaojinZhu + Andrew B.Goldberg + XiaojinZhu 19–27 W09-2203 goldberg-zhu-2009-keepin @@ -3237,14 +3237,14 @@ Is Unlabeled Data Suitable for Multiclass <fixed-case>SVM</fixed-case>-based Web Page Classification? ArkaitzZubiaga VíctorFresno - RaquelMartínez + RaquelMartínez 28–36 W09-2204 zubiaga-etal-2009-unlabeled A Comparison of Structural Correspondence Learning and Self-training for Discriminative Parse Selection - BarbaraPlank + BarbaraPlank 37–42 W09-2205 plank-2009-comparison @@ -3252,7 +3252,7 @@ <fixed-case>L</fixed-case>atent <fixed-case>D</fixed-case>irichlet <fixed-case>A</fixed-case>llocation with Topic-in-Set Knowledge DavidAndrzejewski - XiaojinZhu + XiaojinZhu 43–48 W09-2206 andrzejewski-zhu-2009-latent @@ -3261,7 +3261,7 @@ An Analysis of Bootstrapping for the Recognition of Temporal Expressions JordiPoveda MihaiSurdeanu - JordiTurmo + JordiTurmo 49–57 W09-2207 poveda-etal-2009-analysis @@ -3284,7 +3284,7 @@ On Semi-Supervised Learning of <fixed-case>G</fixed-case>aussian Mixture Models for Phonetic Classification - Jui-TingHuang + Jui-TingHuang MarkHasegawa-Johnson 75–83 W09-2210 @@ -3318,7 +3318,7 @@ Decoding with Syntactic and Non-Syntactic Phrases in a Syntax-Based Machine Translation System GregHanneman - AlonLavie + AlonLavie 1–9 W09-2301 hanneman-lavie-2009-decoding @@ -3333,7 +3333,7 @@ Empirical Lower Bounds on Aligment Error Rates in Syntax-Based Machine Translation - AndersSøgaard + AndersSøgaard JonasKuhn 19–27 W09-2303 @@ -3350,8 +3350,8 @@ References Extension for the Automatic Evaluation of <fixed-case>MT</fixed-case> by Syntactic Hybridization BoWang - TiejunZhao - MuyunYang + TiejunZhao + MuyunYang ShengLi 37–44 W09-2305 @@ -3361,25 +3361,25 @@ A Study of Translation Rule Classification for Syntax-based Statistical Machine Translation HongfeiJiang ShengLi - MuyunYang - TiejunZhao + MuyunYang + TiejunZhao 45–50 W09-2306 jiang-etal-2009-study Discriminative Reordering with <fixed-case>C</fixed-case>hinese Grammatical Relations Features - Pi-ChuanChang - HuihsinTseng - DanJurafsky - Christopher D.Manning + Pi-ChuanChang + HuihsinTseng + DanJurafsky + Christopher D.Manning 51–59 W09-2307 chang-etal-2009-discriminative On the Complexity of Alignment Problems in Two Synchronous Grammar Formalisms - AndersSøgaard + AndersSøgaard 60–68 W09-2308 sogaard-2009-complexity @@ -3387,9 +3387,9 @@ Reordering Model Using Syntactic Information of a Source Tree for Statistical Machine Translation KeiHashimoto - HirohumiYamamoto + HirohumiYamamoto HideoOkuma - EiichiroSumita + EiichiroSumita KeiichiTokuda 69–77 W09-2309 @@ -3398,7 +3398,7 @@ Coupling Hierarchical Word Reordering and Decoding in Phrase-Based Statistical Machine Translation MaximKhalilov - José A. R.Fonollosa + José A. R.Fonollosa MarkDras 78–86 W09-2310 @@ -3409,8 +3409,8 @@ Proceedings of the Workshop on Semantic Evaluations: Recent Achievements and Future Directions (SEW-2009) W09-24 - EnekoAgirre - LluísMàrquez + EnekoAgirre + LluísMàrquez RichardWicentowski Association for Computational Linguistics
Boulder, Colorado
@@ -3424,16 +3424,16 @@ Invited Talk: Alternative Annotations of Word Usage - DianaMcCarthy + DianaMcCarthy 1 W09-2401 mccarthy-2009-invited Making Sense of Word Sense Variation - RebeccaPassonneau - AnsafSalleb-Aouissi - NancyIde + RebeccaPassonneau + AnsafSalleb-Aouissi + NancyIde 2–9 W09-2402 passonneau-etal-2009-making @@ -3443,7 +3443,7 @@ JuditaPreiss JonDehdari JoshKing - DennisMehay + DennisMehay 10–18 W09-2403 preiss-etal-2009-refining @@ -3457,7 +3457,7 @@ Using Web Selectors for the Disambiguation of All Words - Hansen A.Schwartz + Hansen A.Schwartz FernandoGomez 28–36 W09-2405 @@ -3467,7 +3467,7 @@ Large-scale Semantic Networks: Annotation and Evaluation VáclavNovák SvenHartrumpf - KeithHall + KeithHall 37–45 W09-2406 novak-etal-2009-large @@ -3475,7 +3475,7 @@ Making Semantic Topicality Robust Through Term Abstraction Paul M.Heider - Rohini K.Srihari + Rohini K.Srihari 46–51 W09-2407 heider-srihari-2009-making @@ -3483,7 +3483,7 @@ Meeting <fixed-case>T</fixed-case>emp<fixed-case>E</fixed-case>val-2: Shallow Approach for Temporal Tagger OleksandrKolomiyets - Marie-FrancineMoens + Marie-FrancineMoens 52–57 W09-2408 kolomiyets-moens-2009-meeting @@ -3491,7 +3491,7 @@ Using Lexical Patterns in the <fixed-case>G</fixed-case>oogle Web 1<fixed-case>T</fixed-case> Corpus to Deduce Semantic Relations Between Nouns PaulNulty - FintanCostello + FintanCostello 58–63 W09-2409 nulty-costello-2009-using @@ -3499,7 +3499,7 @@ Improvements To Monolingual <fixed-case>E</fixed-case>nglish Word Sense Disambiguation WeiweiGuo - MonaDiab + MonaDiab 64–69 W09-2410 guo-diab-2009-improvements @@ -3507,8 +3507,8 @@ <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2010 Task 1: Coreference Resolution in Multiple Languages MartaRecasens - ToniMartí - MarionaTaulé + ToniMartí + MarionaTaulé LluísMàrquez EmiliSapena 70–75 @@ -3518,8 +3518,8 @@ <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2010 Task 2: Cross-Lingual Lexical Substitution RaviSinha - DianaMcCarthy - RadaMihalcea + DianaMcCarthy + RadaMihalcea 76–81 W09-2412 sinha-etal-2009-semeval @@ -3527,14 +3527,14 @@ <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2010 Task 3: Cross-lingual Word Sense Disambiguation ElsLefever - VeroniqueHoste + VeroniqueHoste 82–87 W09-2413 lefever-hoste-2009-semeval <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2010 Task 7: Argument Selection and Coercion - JamesPustejovsky + JamesPustejovsky AnnaRumshisky 88–93 W09-2414 @@ -3545,12 +3545,12 @@ IrisHendrickx Su NamKim ZornitsaKozareva - PreslavNakov + PreslavNakov DiarmuidÓ Séaghdha - SebastianPadó + SebastianPadó MarcoPennacchiotti - LorenzaRomano - StanSzpakowicz + LorenzaRomano + StanSzpakowicz 94–99 W09-2415 hendrickx-etal-2009-semeval @@ -3559,9 +3559,9 @@ <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2010 Task 9: The Interpretation of Noun Compounds Using Paraphrasing Verbs and Prepositions CristinaButnariu Su NamKim - PreslavNakov + PreslavNakov DiarmuidÓ Séaghdha - StanSzpakowicz + StanSzpakowicz TonyVeale 100–105 W09-2416 @@ -3572,15 +3572,15 @@ JosefRuppenhofer CarolineSporleder RoserMorante - CollinBaker - MarthaPalmer + CollinBaker + MarthaPalmer 106–111 W09-2417 ruppenhofer-etal-2009-semeval <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2010 Task 13: Evaluating Events, Time Expressions, and Temporal Relations (<fixed-case>T</fixed-case>emp<fixed-case>E</fixed-case>val-2) - JamesPustejovsky + JamesPustejovsky MarcVerhagen 112–116 W09-2418 @@ -3589,7 +3589,7 @@ <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2010 Task 14: Evaluation Setting for Word Sense Induction & Disambiguation Systems SureshManandhar - IoannisKlapaftis + IoannisKlapaftis 117–122 W09-2419 manandhar-klapaftis-2009-semeval @@ -3597,7 +3597,7 @@ <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2010 Task 17: All-words Word Sense Disambiguation on a Specific Domain EnekoAgirre - OierLopez de Lacalle + OierLopez de Lacalle ChristianeFellbaum AndreaMarchetti AntonioToral @@ -3608,10 +3608,10 @@ Relation detection between named entities: report of a shared task - CláudiaFreitas + CláudiaFreitas DianaSantos CristinaMota - HugoGonçalo Oliveira + HugoGonçalo Oliveira PaulaCarvalho 129–137 W09-2421 @@ -3619,15 +3619,15 @@ Error Analysis of the <fixed-case>T</fixed-case>emp<fixed-case>E</fixed-case>val Temporal Relation Identification Task - Chong MinLee - GrahamKatz + Chong MinLee + GrahamKatz 138–145 W09-2422 lee-katz-2009-error Automatic Recognition of Logical Relations for <fixed-case>E</fixed-case>nglish, <fixed-case>C</fixed-case>hinese and <fixed-case>J</fixed-case>apanese in the <fixed-case>GLARF</fixed-case> Framework - AdamMeyers + AdamMeyers MichikoKosaka NianwenXue HengJi @@ -3645,9 +3645,9 @@ W09-25 ChrisCallison-Burch IdoDagan - ChristopherManning + ChristopherManning MarcoPennacchiotti - Fabio MassimoZanzotto + Fabio MassimoZanzotto Association for Computational Linguistics
Suntec, Singapore
August @@ -3660,8 +3660,8 @@ Multi-word expressions in textual inference: Much ado about nothing? - Marie-Catherinede Marneffe - SebastianPadó + Marie-Catherinede Marneffe + SebastianPadó Christopher D.Manning 1–9 W09-2501 @@ -3692,7 +3692,7 @@ Optimizing Textual Entailment Recognition Using Particle Swarm Optimization YasharMehdad - BernardoMagnini + BernardoMagnini 36–43 W09-2505 mehdad-magnini-2009-optimizing @@ -3700,7 +3700,7 @@ Ranking Paraphrases in Context StefanThater - GeorgianaDinu + GeorgianaDinu ManfredPinkal 44–47 W09-2506 @@ -3723,7 +3723,7 @@ Automating Model Building in c-rater - JanaSukkarieh + JanaSukkarieh SvetlanaStoyanchev 61–69 W09-2509 @@ -3742,7 +3742,7 @@ Proceedings of the 2009 Workshop on Grammar Engineering Across Frameworks (GEAF 2009) W09-26 - Tracy HollowayKing + Tracy HollowayKing MarianneSantaholma Association for Computational Linguistics
Suntec, Singapore
@@ -3764,7 +3764,7 @@
Developing <fixed-case>G</fixed-case>erman Semantics on the basis of Parallel <fixed-case>LFG</fixed-case> Grammars - SinaZarrieß + SinaZarrieß 10–18 W09-2602 zarriess-2009-developing @@ -3801,10 +3801,10 @@ Using Artificially Generated Data to Evaluate Statistical Machine Translation - MannyRayner + MannyRayner PaulaEstrella - PierretteBouillon - Beth AnnHockey + PierretteBouillon + Beth AnnHockey YukieNakao 54–62 W09-2607 @@ -3822,7 +3822,7 @@ A generalized method for iterative error mining in parsing results Daniëlde Kok JianqiangMa - Gertjanvan Noord + Gertjanvan Noord 71–79 W09-2609 de-kok-etal-2009-generalized @@ -3832,8 +3832,8 @@ Proceedings of the 2009 Workshop on Knowledge and Reasoning for Answering Questions (KRAQ 2009) W09-27 - PatrickSaint-Dizier - Marie-FrancineMoens + PatrickSaint-Dizier + Marie-FrancineMoens Association for Computational Linguistics
Suntec, Singapore
August @@ -3846,7 +3846,7 @@ Knowledge and Reasoning for Medical Question-Answering - PierreZweigenbaum + PierreZweigenbaum 1–2 W09-2701 zweigenbaum-2009-knowledge @@ -3871,9 +3871,9 @@ Some Challenges in the Design of Comparative and Evaluative Question Answering Systems - NathalieLim + NathalieLim PatrickSaint-Dizier - RachelRoxas + RachelRoxas 15–18 W09-2704 lim-etal-2009-challenges @@ -3891,8 +3891,8 @@ Proceedings of the 2009 Workshop on Language Generation and Summarisation (UCNLG+Sum 2009) W09-28 - AnjaBelz - RogerEvans + AnjaBelz + RogerEvans SebastianVarges Association for Computational Linguistics
Suntec, Singapore
@@ -3906,24 +3906,24 @@ Query-focused Summarization Using Text-to-Text Generation: When Information Comes from Multilingual Sources - KathyMcKeown + KathyMcKeown 3 W09-2802 mckeown-2009-query Optimization-based Content Selection for Opinion Summarization - Jackie Chi KitCheung + Jackie Chi KitCheung GiuseppeCarenini - Raymond T.Ng + Raymond T.Ng 7–14 W09-2804 cheung-etal-2009-optimization Unsupervised Induction of Sentence Compression Rules - JoãoCordeiro - GaëlDias + JoãoCordeiro + GaëlDias PavelBrazdil 15–22 W09-2805 @@ -3932,7 +3932,7 @@ Evaluation of Automatic Summaries: Metrics under Varying Data Conditions KarolinaOwczarzak - Hoa TrangDang + Hoa TrangDang 23–30 W09-2806 owczarzak-dang-2009-evaluation @@ -3950,7 +3950,7 @@ AkinoriKinoshita TakeshiKobayakawa TadashiKumano - NaotoKatoh + NaotoKatoh 39–47 W09-2808 tanaka-etal-2009-syntax @@ -3958,7 +3958,7 @@ A Parse-and-Trim Approach with Information Significance for <fixed-case>C</fixed-case>hinese Sentence Compression WeiXu - RalphGrishman + RalphGrishman 48–55 W09-2809 xu-grishman-2009-parse @@ -3966,7 +3966,7 @@ Visual Development Process for Automatic Generation of Digital Games Narrative Content Maria FernandaCaropreso - DianaInkpen + DianaInkpen ShahzadKhan FazelKeshtkar 59–62 @@ -3976,9 +3976,9 @@ Reducing Redundancy in Multi-document Summarization Using Lexical Semantic Similarity IrisHendrickx - WalterDaelemans + WalterDaelemans ErwinMarsi - EmielKrahmer + EmielKrahmer 63–66 W09-2812 hendrickx-etal-2009-reducing @@ -3988,7 +3988,7 @@ MohitKumar DipanjanDas SachinAgarwal - AlexanderRudnicky + AlexanderRudnicky 67–71 W09-2813 kumar-etal-2009-non @@ -4024,7 +4024,7 @@ <fixed-case>ICSI</fixed-case>-<fixed-case>CRF</fixed-case>: The Generation of References to the Main Subject and Named Entities Using Conditional Random Fields - BenoitFavre + BenoitFavre BerndBohnet 99–100 W09-2818 @@ -4033,7 +4033,7 @@ <fixed-case>UD</fixed-case>el: Generating Referring Expressions Guided by Psycholinguistc Findings CharlesGreenbacker - KathleenMcCoy + KathleenMcCoy 101–102 W09-2819 greenbacker-mccoy-2009-udel @@ -4041,7 +4041,7 @@ <fixed-case>JUNLG</fixed-case>-<fixed-case>MSR</fixed-case>: A Machine Learning Approach of Main Subject Reference Selection with Rule Based Improvement SamirGupta - SivajiBandopadhyay + SivajiBandopadhyay 103–104 W09-2820 gupta-bandopadhyay-2009-junlg @@ -4049,14 +4049,14 @@ <fixed-case>UD</fixed-case>el: Extending Reference Generation to Multiple Entities CharlesGreenbacker - KathleenMcCoy + KathleenMcCoy 105–106 W09-2821 greenbacker-mccoy-2009-udel-extending <fixed-case>WLV</fixed-case>: A Confidence-based Machine Learning Method for the <fixed-case>GREC</fixed-case>-<fixed-case>NEG</fixed-case>’09 Task - ConstantinOrăsan + ConstantinOrăsan IustinDornescu 107–108 W09-2822 @@ -4069,7 +4069,7 @@ W09-29 DimitraAnastasiou ChikaraHashimoto - PreslavNakov + PreslavNakov Su NamKim Association for Computational Linguistics
Singapore
@@ -4086,7 +4086,7 @@ HelenaCaseli AlineVillavicencio AndréMachado - Maria JoséFinatto + Maria JoséFinatto 1–8 W09-2901 caseli-etal-2009-statistically @@ -4101,7 +4101,7 @@
Verb Noun Construction <fixed-case>MWE</fixed-case> Token Classification - MonaDiab + MonaDiab PravinBhutada 17–22 W09-2903 @@ -4109,7 +4109,7 @@ Exploiting Translational Correspondences for Pattern-Independent <fixed-case>MWE</fixed-case> Identification - SinaZarrieß + SinaZarrieß JonasKuhn 23–30 W09-2904 @@ -4126,7 +4126,7 @@ Mining Complex Predicates In <fixed-case>H</fixed-case>indi Using A Parallel <fixed-case>H</fixed-case>indi-<fixed-case>E</fixed-case>nglish Corpus - R. Mahesh K.Sinha + R. Mahesh K.Sinha 40–46 W09-2906 sinha-2009-mining @@ -4134,7 +4134,7 @@ Improving Statistical Machine Translation Using Domain Bilingual Multiword Expressions ZhixiangRen - Yajuan + Yajuan JieCao QunLiu YunHuang @@ -4169,8 +4169,8 @@ W09-30 ManfredStede Chu-RenHuang - NancyIde - AdamMeyers + NancyIde + AdamMeyers Association for Computational Linguistics
Suntec, Singapore
August @@ -4184,7 +4184,7 @@ A Cognitive-based Annotation System for Emotion Computing YingChen - Sophia Y. M.Lee + Sophia Y. M.Lee Chu-RenHuang 1–9 W09-3001 @@ -4192,7 +4192,7 @@ Complex Linguistic Annotation – No Easy Way Out! A Case from <fixed-case>B</fixed-case>angla and <fixed-case>H</fixed-case>indi <fixed-case>POS</fixed-case> Labeling Tasks - SandipanDandapat + SandipanDandapat PriyankaBiswas MonojitChoudhury KalikaBali @@ -4229,14 +4229,14 @@ Annotating Subordinators in the <fixed-case>T</fixed-case>urkish Discourse Bank DenizZeyrek - Umit DenizTuran - CemBozsahin - RuketCakici - Ayisigi B.Sevdik-Calli - IsinDemirsahin - BerfinAktas - İhsanYalcinkaya - HaleOgel + Umit DenizTuran + CemBozsahin + RuketCakici + Ayisigi B.Sevdik-Calli + IsinDemirsahin + BerfinAktas + İhsanYalcinkaya + HaleOgel 44–47 W09-3006 zeyrek-etal-2009-annotating @@ -4250,7 +4250,7 @@ Designing a Language Game for Collecting Coreference Annotation - BarboraHladká + BarboraHladká JiříMírovský PavelSchlesinger 52–55 @@ -4259,17 +4259,17 @@ Explorations in Automatic Image Annotation using Textual Features - Chee WeeLeong - RadaMihalcea + Chee WeeLeong + RadaMihalcea 56–59 W09-3009 leong-mihalcea-2009-explorations Human Evaluation of Article and Noun Number Usage: Influences of Context and Construction Variability - JohnLee - JoelTetreault - MartinChodorow + JohnLee + JoelTetreault + MartinChodorow 60–63 W09-3010 lee-etal-2009-human @@ -4284,10 +4284,10 @@ Committed Belief Annotation and Tagging - MonaDiab - LoriLevin + MonaDiab + LoriLevin TerukoMitamura - OwenRambow + OwenRambow VinodkumarPrabhakaran WeiweiGuo 68–73 @@ -4296,8 +4296,8 @@ Annotation of Sentence Structure; Capturing the Relationship among Clauses in <fixed-case>C</fixed-case>zech Sentences - MarkétaLopatková - NataliaKlyueva + MarkétaLopatková + NataliaKlyueva PetrHomola 74–81 W09-3013 @@ -4314,8 +4314,8 @@ Syntactic annotation of spoken utterances: A case study on the <fixed-case>C</fixed-case>zech Academic Corpus - BarboraHladká - ZdeňkaUrešová + BarboraHladká + ZdeňkaUrešová 90–98 W09-3015 hladka-uresova-2009-syntactic @@ -4323,7 +4323,7 @@ High-Performance High-Volume Layered Corpora Annotation TiagoLuís - DavidMartins de Matos + DavidMartins de Matos 99–107 W09-3016 luis-martins-de-matos-2009-high @@ -4351,7 +4351,7 @@ MichikoKosaka HengJi NianwenXue - MaryHarper + MaryHarper AngSun WeiXu ShashaLiao @@ -4361,8 +4361,8 @@ Using Parallel Propbanks to enhance Word-alignments - JinhoChoi - MarthaPalmer + JinhoChoi + MarthaPalmer NianwenXue 121–124 W09-3020 @@ -4370,7 +4370,7 @@ <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et and <fixed-case>F</fixed-case>rame<fixed-case>N</fixed-case>et as Complementary Resources for Annotation - Collin F.Baker + Collin F.Baker ChristianeFellbaum 125–129 W09-3021 @@ -4382,7 +4382,7 @@ LaurieBuscail MarieGarnier ArnaudRykner - PatrickSaint-Dizier + PatrickSaint-Dizier 130–133 W09-3022 albert-etal-2009-annotating @@ -4410,7 +4410,7 @@ Towards a Methodology for Named Entities Annotation KarënFort MaudEhrmann - AdelineNazarenko + AdelineNazarenko 142–145 W09-3025 fort-etal-2009-towards @@ -4430,7 +4430,7 @@ SuguruMatsuyoshi EricNichols KentaroInui - YujiMatsumoto + YujiMatsumoto 150–153 W09-3027 murakami-etal-2009-annotating @@ -4448,19 +4448,19 @@ UmangiOza RashmiPrasad SudheerKolachina - DiptiMisra Sharma - AravindJoshi + DiptiMisra Sharma + AravindJoshi 158–161 W09-3029 oza-etal-2009-hindi Simple Parser for <fixed-case>I</fixed-case>ndian Languages in a Dependency Framework - AksharBharati + AksharBharati MridulGupta VineetYadav KarthikGali - Dipti MisraSharma + Dipti MisraSharma 162–165 W09-3030 bharati-etal-2009-simple @@ -4491,8 +4491,8 @@ The <fixed-case>SILT</fixed-case> and <fixed-case>F</fixed-case>la<fixed-case>R</fixed-case>e<fixed-case>N</fixed-case>et International Collaboration for Interoperability NancyIde - JamesPustejovsky - NicolettaCalzolari + JamesPustejovsky + NicolettaCalzolari ClaudiaSoria 178–181 W09-3034 @@ -4500,22 +4500,22 @@ Building a Large Syntactically-Annotated Corpus of <fixed-case>V</fixed-case>ietnamese - Phuong-ThaiNguyen - Xuan-LuongVu - Thi-Minh-HuyenNguyen + Phuong-ThaiNguyen + Xuan-LuongVu + Thi-Minh-HuyenNguyen Van-HiepNguyen - Hong-PhuongLe + Hong-PhuongLe 182–185 W09-3035 nguyen-etal-2009-building A Multi-Representational and Multi-Layered Treebank for <fixed-case>H</fixed-case>indi/<fixed-case>U</fixed-case>rdu - RajeshBhatt + RajeshBhatt BhuvanaNarasimhan - MarthaPalmer - OwenRambow - DiptiSharma + MarthaPalmer + OwenRambow + DiptiSharma FeiXia 186–189 W09-3036 @@ -4527,7 +4527,7 @@ Proceedings of the 2nd Workshop on Building and Using Comparable Corpora: from Parallel to Non-parallel Corpora (BUCC) W09-31 PascaleFung - PierreZweigenbaum + PierreZweigenbaum ReinhardRapp Association for Computational Linguistics
Singapore
@@ -4541,7 +4541,7 @@ Repetition and Language Models and Comparable Corpora - KenChurch + KenChurch 1 W09-3101 church-2009-repetition @@ -4566,7 +4566,7 @@ An Analysis of the Calque Phenomena Based on Comparable Corpora MarieGarnier - PatrickSaint-Dizier + PatrickSaint-Dizier 19–22 W09-3104 garnier-saint-dizier-2009-analysis @@ -4583,7 +4583,7 @@ Train the Machine with What It Can <fixed-case>L</fixed-case>earn—<fixed-case>C</fixed-case>orpus Selection for <fixed-case>SMT</fixed-case> XiwuHan HanzhangLi - TiejunZhao + TiejunZhao 27–33 W09-3106 han-etal-2009-train @@ -4615,15 +4615,15 @@ Compilation of Specialized Comparable Corpora in <fixed-case>F</fixed-case>rench and <fixed-case>J</fixed-case>apanese LorraineGoeuriot EmmanuelMorin - BéatriceDaille + BéatriceDaille 55–63 W09-3110 goeuriot-etal-2009-compilation Toward Categorization of Sign Language Corpora - JérémieSegouat - AnneliesBraffort + JérémieSegouat + AnneliesBraffort 64–67 W09-3111 segouat-braffort-2009-toward @@ -4649,7 +4649,7 @@ Social (distributed) language modeling, clustering and dialectometry - DavidEllis + DavidEllis 1–4 W09-3201 ellis-2009-social @@ -4659,7 +4659,7 @@ SitabhraSinha Raj KumarPan NishaYadav - MayankVahia + MayankVahia IravathamMahadevan 5–13 W09-3202 @@ -4676,8 +4676,8 @@ Random Walks for Text Semantic Similarity DanielRamage - Anna N.Rafferty - Christopher D.Manning + Anna N.Rafferty + Christopher D.Manning 23–31 W09-3204 ramage-etal-2009-random @@ -4694,18 +4694,18 @@ <fixed-case>W</fixed-case>iki<fixed-case>W</fixed-case>alk: Random walks on <fixed-case>W</fixed-case>ikipedia for Semantic Relatedness EricYeh DanielRamage - Christopher D.Manning - EnekoAgirre - AitorSoroa + Christopher D.Manning + EnekoAgirre + AitorSoroa 41–49 W09-3206 yeh-etal-2009-wikiwalk Measuring semantic relatedness with vector space models and random walks - AmaçHerdağdelen + AmaçHerdağdelen KatrinErk - MarcoBaroni + MarcoBaroni 50–53 W09-3207 herdagdelen-etal-2009-measuring @@ -4731,7 +4731,7 @@ SwapnaSomasundaran GalileoNamata LiseGetoor - JanyceWiebe + JanyceWiebe 66–74 W09-3210 somasundaran-etal-2009-opinion @@ -4771,7 +4771,7 @@ A Novel Approach to Automatic Gazetteer Generation using <fixed-case>W</fixed-case>ikipedia ZiqiZhang - JoséIria + JoséIria 1–9 W09-3301 zhang-iria-2009-novel @@ -4782,7 +4782,7 @@ NickyRingland JoelNothman TaraMurphy - James R.Curran + James R.Curran 10–18 W09-3302 balasuriya-etal-2009-named @@ -4792,8 +4792,8 @@ EmmanuelNavarro FranckSajous BrunoGaume - LaurentPrévot - ShuKaiHsieh + LaurentPrévot + ShuKaiHsieh IvyKuo PierreMagistry Chu-RenHuang @@ -4805,7 +4805,7 @@ Using the <fixed-case>W</fixed-case>iktionary Graph Structure for Synonym Detection TimothyWeale ChrisBrew - EricFosler-Lussier + EricFosler-Lussier 28–31 W09-3304 weale-etal-2009-using @@ -4822,7 +4822,7 @@ Evaluating a Statistical <fixed-case>CCG</fixed-case> Parser on <fixed-case>W</fixed-case>ikipedia MatthewHonnibal JoelNothman - James R.Curran + James R.Curran 38–41 W09-3306 honnibal-etal-2009-evaluating @@ -4839,7 +4839,7 @@ Acquiring High Quality Non-Expert Knowledge from On-Demand Workforce DonghuiFeng SvevaBesana - RemiZajac + RemiZajac 51–56 W09-3308 feng-etal-2009-acquiring @@ -4847,8 +4847,8 @@ Constructing an Anaphorically Annotated Corpus with Non-Experts: Assessing the Quality of Collaborative Annotations JonChamberlain - UdoKruschwitz - MassimoPoesio + UdoKruschwitz + MassimoPoesio 57–62 W09-3309 chamberlain-etal-2009-constructing @@ -4858,7 +4858,7 @@ Proceedings of the 7th Workshop on Asian Language Resources (ALR7) W09-34 - HammamRiza + HammamRiza VirachSornlertlamvanich Association for Computational Linguistics
Suntec, Singapore
@@ -4884,10 +4884,10 @@
An Empirical Study of <fixed-case>V</fixed-case>ietnamese Noun Phrase Chunking with Discriminative Sequence Models - Le MinhNguyen + Le MinhNguyen Huong ThaoNguyen - Phuong ThaiNguyen - Tu BaoHo + Phuong ThaiNguyen + Tu BaoHo AkiraShimazu 9–16 W09-3402 @@ -4916,7 +4916,7 @@ KiyonoriOhtake TeruhisaMisu ChioriHori - HidekiKashioka + HidekiKashioka SatoshiNakamura 32–39 W09-3405 @@ -4933,7 +4933,7 @@ Automated Mining Of Names Using Parallel <fixed-case>H</fixed-case>indi-<fixed-case>E</fixed-case>nglish Corpus - R. Mahesh K.Sinha + R. Mahesh K.Sinha 48–54 W09-3407 sinha-2009-automated @@ -4942,15 +4942,15 @@ Basic Language Resources for Diverse <fixed-case>A</fixed-case>sian Languages: A Streamlined Approach for Resource Creation HeatherSimpson KazuakiMaeda - ChristopherCieri + ChristopherCieri 55–62 W09-3408 simpson-etal-2009-basic Finite-State Description of <fixed-case>V</fixed-case>ietnamese Reduplication - Le HongPhuong - NguyenThi Minh Huyen + PhuongLe Hong + Thi Minh HuyenNguyen RoussanalyAzim 63–69 W09-3409 @@ -4969,7 +4969,7 @@ <fixed-case>B</fixed-case>engali Verb Subcategorization Frame Acquisition - A Baseline Model SomnathBanerjee DipankarDas - SivajiBandyopadhyay + SivajiBandyopadhyay 76–83 W09-3411 banerjee-etal-2009-bengali @@ -4978,7 +4978,7 @@ Phonological and Logographic Influences on Errors in Written <fixed-case>C</fixed-case>hinese Words Chao-LinLiu Kan-WenTien - Min-HuaLai + Min-HuaLai Yi-HsuanChuang Shih-HungWu 84–91 @@ -4997,7 +4997,7 @@ A Syntactic Resource for <fixed-case>T</fixed-case>hai: <fixed-case>CG</fixed-case> Treebank TanethRuangrajitpakorn - KanokornTrakultaweekoon + KanokornTrakultaweekoon ThepchaiSupnithi 96–102 W09-3414 @@ -5006,7 +5006,7 @@ Part of Speech Tagging for <fixed-case>M</fixed-case>ongolian Corpus PurevJaimai - OdbayarChimeddorj + OdbayarChimeddorj 103–106 W09-3415 jaimai-chimeddorj-2009-part @@ -5033,7 +5033,7 @@ <fixed-case>CWN</fixed-case>-<fixed-case>LMF</fixed-case>: <fixed-case>C</fixed-case>hinese <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et in the <fixed-case>L</fixed-case>exical <fixed-case>M</fixed-case>arkup <fixed-case>F</fixed-case>ramework Lung-HaoLee - Shu-KaiHsieh + Shu-KaiHsieh Chu-RenHuang 123–130 W09-3418 @@ -5041,9 +5041,9 @@ <fixed-case>P</fixed-case>hilippine Language Resources: Trends and Directions - Rachel EditaRoxas + Rachel EditaRoxas CharibethCheng - Nathalie RoseLim + Nathalie RoseLim 131–138 W09-3419 roxas-etal-2009-philippine @@ -5065,14 +5065,14 @@ Query Expansion using <fixed-case>LMF</fixed-case>-Compliant Lexical Resources TakenobuTokunaga DainKaplan - NicolettaCalzolari + NicolettaCalzolari MonicaMonachini ClaudiaSoria VirachSornlertlamvanich ThatsaneeCharoenporn - YingjuXia + YingjuXia Chu-RenHuang - Shu-KaiHsieh + Shu-KaiHsieh KiyoakiShirai 145–152 W09-3421 @@ -5089,7 +5089,7 @@ The <fixed-case>FL</fixed-case>a<fixed-case>R</fixed-case>e<fixed-case>N</fixed-case>et Thematic Network: A Global Forum for Cooperation - NicolettaCalzolari + NicolettaCalzolari ClaudiaSoria 161–164 W09-3423 @@ -5111,7 +5111,7 @@ Word Segmentation Standard in <fixed-case>C</fixed-case>hinese, <fixed-case>J</fixed-case>apanese and <fixed-case>K</fixed-case>orean - Key-SunChoi + Key-SunChoi HitoshiIsahara KyokoKanzaki HansaemKim @@ -5127,7 +5127,7 @@ Proceedings of the 2009 Named Entities Workshop: Shared Task on Transliteration (NEWS 2009) W09-35 HaizhouLi - AKumaran + AKumaran Association for Computational Linguistics
Suntec, Singapore
August @@ -5179,14 +5179,14 @@ Named Entity Transcription with Pair n-Gram Models MartinJansche - RichardSproat + RichardSproat 32–35 W09-3505 jansche-sproat-2009-named Machine Transliteration using Target-Language Grapheme and Phoneme: Multi-engine Transliteration Approach - Jong-HoonOh + Jong-HoonOh KiyotakaUchimoto KentaroTorisawa 36–39 @@ -5195,8 +5195,8 @@ A Language-Independent Transliteration Schema Using Character Aligned Models at <fixed-case>NEWS</fixed-case> 2009 - PraneethShishtla - Surya GaneshV + PraneethShishtla + Surya GaneshV SethuramalingamSubramaniam VasudevaVarma 40–43 @@ -5205,8 +5205,8 @@ Experiences with <fixed-case>E</fixed-case>nglish-<fixed-case>H</fixed-case>indi, <fixed-case>E</fixed-case>nglish-<fixed-case>T</fixed-case>amil and <fixed-case>E</fixed-case>nglish-<fixed-case>K</fixed-case>annada Transliteration Tasks at <fixed-case>NEWS</fixed-case> 2009 - Manoj KumarChinnakotla - Om P.Damani + Manoj KumarChinnakotla + Om P.Damani 44–47 W09-3508 chinnakotla-damani-2009-experiences @@ -5221,7 +5221,7 @@ Transliteration by Bidirectional Statistical Machine Translation AndrewFinch - EiichiroSumita + EiichiroSumita 52–56 W09-3510 finch-sumita-2009-transliteration @@ -5229,7 +5229,7 @@ Transliteration of Name Entity via Improved Statistical Translation on Character Sequences YanSong - ChunyuKit + ChunyuKit XiaoChen 57–60 W09-3511 @@ -5261,19 +5261,19 @@ Combining a Two-step Conditional Random Field Model and a Joint Source Channel Model for Machine Transliteration - DongYang - PaulDixon - Yi-ChengPan + DongYang + PaulDixon + Yi-ChengPan TasukuOonishi MasanobuNakamura - SadaokiFurui + SadaokiFurui 72–75 W09-3515 yang-etal-2009-combining Phonological Context Approximation and Homophone Treatment for <fixed-case>NEWS</fixed-case> 2009 <fixed-case>E</fixed-case>nglish-<fixed-case>C</fixed-case>hinese Transliteration Shared Task - Oi YeeKwong + Oi YeeKwong 76–79 W09-3516 kwong-2009-phonological @@ -5283,15 +5283,15 @@ AmitavaDas AsifEkbal TapabrataMondal - SivajiBandyopadhyay + SivajiBandyopadhyay 80–83 W09-3517 das-etal-2009-english Improving Transliteration Accuracy Using Word-Origin Detection and Lexicon Lookup - MiteshKhapra - PushpakBhattacharyya + MiteshKhapra + PushpakBhattacharyya 84–87 W09-3518 khapra-bhattacharyya-2009-improving @@ -5332,9 +5332,9 @@ <fixed-case>E</fixed-case>nglish-<fixed-case>H</fixed-case>indi Transliteration Using Context-Informed <fixed-case>PB</fixed-case>-<fixed-case>SMT</fixed-case>: the <fixed-case>DCU</fixed-case> System for <fixed-case>NEWS</fixed-case> 2009 RejwanulHaque - SandipanDandapat - Ankit KumarSrivastava - Sudip KumarNaskar + SandipanDandapat + Ankit KumarSrivastava + Sudip KumarNaskar AndyWay 104–107 W09-3523 @@ -5345,7 +5345,7 @@ GumwonHong Min-JeongKim Do-GilLee - Hae-ChangRim + Hae-ChangRim 108–111 W09-3524 hong-etal-2009-hybrid @@ -5389,7 +5389,7 @@ Name Transliteration with Bidirectional Perceptron Edit Models - DayneFreitag + DayneFreitag ZhiqiangWang 132–135 W09-3530 @@ -5398,7 +5398,7 @@ Bridging Languages by <fixed-case>S</fixed-case>uper<fixed-case>S</fixed-case>ense Entity Tagging DavidePicca - Alfio MassimilianoGliozzo + Alfio MassimilianoGliozzo SimoneCampora 136–142 W09-3531 @@ -5416,10 +5416,10 @@ Name Matching between <fixed-case>R</fixed-case>oman and <fixed-case>C</fixed-case>hinese Scripts: Machine Complements Human - KenSamuel + KenSamuel AlanRubenstein - SherriCondon - AlexYeh + SherriCondon + AlexYeh 152–160 W09-3533 samuel-etal-2009-name @@ -5428,7 +5428,7 @@ Analysis and Robust Extraction of Changing Named Entities MasatoshiTsuchiya ShokoEndo - SeiichiNakagawa + SeiichiNakagawa 161–167 W09-3534 tsuchiya-etal-2009-analysis @@ -5444,24 +5444,24 @@ A Hybrid Model for <fixed-case>U</fixed-case>rdu <fixed-case>H</fixed-case>indi Transliteration AbbasMalik - LaurentBesacier - ChristianBoitet - PushpakBhattacharyya + LaurentBesacier + ChristianBoitet + PushpakBhattacharyya 177–185 W09-3536 malik-etal-2009-hybrid Graphemic Approximation of Phonological Context for <fixed-case>E</fixed-case>nglish-<fixed-case>C</fixed-case>hinese Transliteration - Oi YeeKwong + Oi YeeKwong 186–193 W09-3537 kwong-2009-graphemic <fixed-case>C</fixed-case>zech Named Entity Corpus and <fixed-case>SVM</fixed-case>-based Recognizer - JanaKravalová - ZdeněkŽabokrtský + JanaKravalová + ZdeněkŽabokrtský 194–201 W09-3538 kravalova-zabokrtsky-2009-czech @@ -5469,7 +5469,7 @@ Voted <fixed-case>NER</fixed-case> System using Appropriate Unlabeled Data AsifEkbal - SivajiBandyopadhyay + SivajiBandyopadhyay 202–210 W09-3539 ekbal-bandyopadhyay-2009-voted @@ -5493,8 +5493,8 @@ Researcher affiliation extraction from homepages - IstvánNagy - RichárdFarkas + IstvánNagy + RichárdFarkas MárkJelasity 1–9 W09-3601 @@ -5515,7 +5515,7 @@ Accurate Argumentative Zoning with Maximum Entropy models StephenMerity TaraMurphy - James R.Curran + James R.Curran 19–26 W09-3603 merity-etal-2009-accurate @@ -5530,7 +5530,7 @@ Detecting key sentences for automatic assistance in peer reviewing research articles in educational sciences - ÁgnesSándor + ÁgnesSándor AngelaVorndran 36–44 W09-3605 @@ -5539,7 +5539,7 @@ Designing a Citation-Sensitive Research Tool: An Initial Study of Browsing-Specific Information Needs StephenWan - CécileParis + CécileParis MichaelMuthukrishna RobertDale 45–53 @@ -5548,8 +5548,8 @@ The <fixed-case>ACL</fixed-case> <fixed-case>A</fixed-case>nthology Network Corpus - Dragomir R.Radev - PradeepMuthukrishnan + Dragomir R.Radev + PradeepMuthukrishnan VahedQazvinian 54–61 W09-3607 @@ -5557,7 +5557,7 @@ <fixed-case>NLP</fixed-case> Support for Faceted Navigation in Scholarly Collection - Marti A.Hearst + Marti A.Hearst EmiliaStoica 62–70 W09-3608 @@ -5576,7 +5576,7 @@ Citations in the Digital Library of Classics: Extracting Canonical References by Using Conditional Random Fields MatteoRomanello FedericoBoschetti - GregoryCrane + GregoryCrane 80–87 W09-3610 romanello-etal-2009-citations @@ -5594,7 +5594,7 @@ Proceedings of the Eight International Conference on Computational Semantics - HarryBunt + HarryBunt Association for Computational Linguistics
Tilburg, The Netherlands
January @@ -5614,14 +5614,14 @@
Knowing a word (sense) by its company - MarthaPalmer + MarthaPalmer 2 W09-3702 palmer-2009-knowing Play your way to an annotated corpus: Games with a purpose and anaphoric annotation - MassimoPoesio + MassimoPoesio 3 W09-3703 poesio-2009-play @@ -5629,7 +5629,7 @@ A computational account of comparative implicatures for a spoken dialogue agent LucianaBenotti - DavidTraum + DavidTraum 4–17 W09-3704 benotti-traum-2009-computational @@ -5666,14 +5666,14 @@ A Formal Model for Procedural Texts and its Use in Textual Integration IsabelleDautriche - PatrickSaint-Dizier + PatrickSaint-Dizier 73–89 W09-3709 dautriche-saint-dizier-2009-formal Inference Rules for Recognizing Textual Entailment - GeorgianaDinu + GeorgianaDinu RuiWang 90–103 W09-3710 @@ -5708,7 +5708,7 @@ An extended model of natural logic BillMacCartney - Christopher D.Manning + Christopher D.Manning 140–156 W09-3714 maccartney-manning-2009-extended @@ -5723,8 +5723,8 @@ <fixed-case>GLML</fixed-case>: Annotating Argument Selection and Coercion - JamesPustejovsky - JessicaMoszkowicz + JamesPustejovsky + JessicaMoszkowicz OlgaBatiukova AnnaRumshisky 169–180 @@ -5768,7 +5768,7 @@ An Ordering of Terms Based on Semantic Relatedness PeterWittek SándorDarányi - Chew LimTan + Chew LimTan 235–247 W09-3721 wittek-etal-2009-ordering @@ -5810,15 +5810,15 @@ Flexible Semantic Composition with <fixed-case>DUDES</fixed-case> (short paper) - PhilippCimiano + PhilippCimiano 272–276 W09-3726 cimiano-2009-flexible Computing Implicit Entities and Events with Getaruns (short paper) - RodolfoDelmonte - EmanuelePianta + RodolfoDelmonte + EmanuelePianta 277–281 W09-3727 delmonte-pianta-2009-computing @@ -5827,7 +5827,7 @@ Comparing Alternative Data-Driven Ontological Vistas of Natural History (short paper) Mariekevan Erp PiroskaLendvai - Antalvan den Bosch + Antalvan den Bosch 282–285 W09-3728 van-erp-etal-2009-comparing @@ -5855,7 +5855,7 @@ A Study of a Segmentation Technique for Dialogue Act Assignation (short paper) - Carlos-D.Martínez-Hinarejos + Carlos-D.Martínez-Hinarejos 299–304 W09-3732 martinez-hinarejos-2009-study @@ -5863,7 +5863,7 @@ Application of Cognitive Strategies to <fixed-case>C</fixed-case>hinese Noun Classifier <fixed-case>E</fixed-case>-learning (short paper) WeiNi - Helena HongGao + Helena HongGao ShixiaoOuyang 305–309 W09-3733 @@ -5879,7 +5879,7 @@ Developing a Computer-facilitated Tool for Acquiring Near-synonyms in <fixed-case>C</fixed-case>hinese and <fixed-case>E</fixed-case>nglish (short paper) ShixiaoOuyang - Helena HongGao + Helena HongGao Soo NgeeKoh 316–319 W09-3735 @@ -5899,7 +5899,7 @@ JaimeSnyder Michael A.D’Eredita OzgurYilmazel - Elizabeth D.Liddy + Elizabeth D.Liddy 326–332 W09-3737 snyder-etal-2009-towards @@ -5914,7 +5914,7 @@ An Application of Lexical Semantics Annotation to Question-Answering in e-Farming MukdaSuktarachan - PatrickSaint-Dizier + PatrickSaint-Dizier 338–341 W09-3739 suktarachan-saint-dizier-2009-application @@ -5922,7 +5922,7 @@ A novel approach to mapping <fixed-case>F</fixed-case>rame<fixed-case>N</fixed-case>et lexical units to <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et synsets (short paper) SaraTonelli - EmanuelePianta + EmanuelePianta 342–345 W09-3740 tonelli-pianta-2009-novel @@ -5947,14 +5947,14 @@ A semantic relatedness metric based on free link structure (short paper) SanderWubben - Antalvan den Bosch + Antalvan den Bosch 355–358 W09-3743 wubben-van-den-bosch-2009-semantic Semantic Normalisation : a Framework and an Experiment - PaulBedaride + PaulBedaride ClaireGardent 359–370 W09-3744 @@ -5965,8 +5965,8 @@ Proceedings of the 11th International Conference on Parsing Technologies (IWPT’09) W09-38 - HarryBunt - ÉricVillemonte de la Clergerie + HarryBunt + ÉricVillemonte de la Clergerie Association for Computational Linguistics
Paris, France
October @@ -6011,7 +6011,7 @@
Empirical lower bounds on translation unit error rate for the full class of inversion transduction grammars - AndersSøgaard + AndersSøgaard DekaiWu 33–36 W09-3805 @@ -6046,7 +6046,7 @@ Deductive Parsing in Interaction Grammars - JosephLe Roux + JosephLe Roux 65–68 W09-3809 le-roux-2009-deductive @@ -6071,9 +6071,9 @@ Two stage constraint based hybrid approach to free word order language dependency parsing - AksharBharati + AksharBharati SamarHusain - DiptiMisra + DiptiMisra RajeevSangal 77–80 W09-3812 @@ -6089,7 +6089,7 @@ Evaluating Contribution of Deep Syntactic Information to Shallow Semantic Analysis SumireUematsu - Jun’ichiTsujii + Jun’ichiTsujii 85–88 W09-3814 uematsu-tsujii-2009-evaluating @@ -6105,7 +6105,7 @@ Co-Parsing with Competitive Models LidiaKhmylko - Kilian A.Foth + Kilian A.Foth WolfgangMenzel 99–107 W09-3816 @@ -6123,7 +6123,7 @@ Constructing parse forests that include exactly the n-best <fixed-case>PCFG</fixed-case> trees PierreBoullier AlexisNasr - BenoîtSagot + BenoîtSagot 117–128 W09-3818 boullier-etal-2009-constructing @@ -6146,8 +6146,8 @@ Improving generative statistical parsing with semi-supervised word clustering - MarieCandito - BenoîtCrabbé + MarieCandito + BenoîtCrabbé 138–141 W09-3821 candito-crabbe-2009-improving @@ -6155,7 +6155,7 @@ Application of feature propagation to dependency parsing KepaBengoetxea - KoldoGojenola + KoldoGojenola 142–145 W09-3822 bengoetxea-gojenola-2009-application @@ -6163,7 +6163,7 @@ Guessing the Grammatical Function of a Non-Root <fixed-case>F</fixed-case>-Structure in <fixed-case>LFG</fixed-case> AntonBryl - Josefvan Genabith + Josefvan Genabith YvetteGraham 146–149 W09-3823 @@ -6171,9 +6171,9 @@ Cross parser evaluation : a <fixed-case>F</fixed-case>rench Treebanks study - DjaméSeddah - MarieCandito - BenoîtCrabbé + DjaméSeddah + MarieCandito + BenoîtCrabbé 150–161 W09-3824 seddah-etal-2009-cross @@ -6205,7 +6205,7 @@ Effective Analysis of Causes and Inter-dependencies of Parsing Errors TadayoshiHara YusukeMiyao - Jun’ichiTsujii + Jun’ichiTsujii 180–191 W09-3828 hara-etal-2009-effective @@ -6213,7 +6213,7 @@ Clustering Words by Syntactic Similarity improves Dependency Parsing of Predicate-argument Structures KenjiSagae - Andrew S.Gordon + Andrew S.Gordon 192–201 W09-3829 sagae-gordon-2009-clustering @@ -6227,7 +6227,7 @@ Using a maximum entropy-based tagger to improve a very fast vine parser - AndersSøgaard + AndersSøgaard JonasKuhn 206–209 W09-3831 @@ -6235,9 +6235,9 @@ <fixed-case>HPSG</fixed-case> Supertagging: A Sequence Labeling View - Yao-zhongZhang + Yao-zhongZhang TakuyaMatsuzaki - Jun’ichiTsujii + Jun’ichiTsujii 210–213 W09-3832 zhang-etal-2009-hpsg @@ -6246,7 +6246,7 @@ Smoothing fine-grained <fixed-case>PCFG</fixed-case> lexicons TejaswiniDeoskar MatsRooth - KhalilSima’an + KhalilSima’an 214–217 W09-3833 deoskar-etal-2009-smoothing @@ -6261,8 +6261,8 @@ Interactive Predictive Parsing RicardoSánchez-Sáez - Joan-AndreuSánchez - José-MiguelBenedí + Joan-AndreuSánchez + José-MiguelBenedí 222–225 W09-3835 sanchez-saez-etal-2009-interactive @@ -6278,7 +6278,7 @@ Heuristic search in a cognitive model of human parsing - JohnHale + JohnHale 230–233 W09-3837 hale-2009-heuristic @@ -6286,7 +6286,7 @@ Dependency Parsing with Energy-based Reinforcement Learning LidanZhang - Kwok PingChan + Kwok PingChan 234–237 W09-3838 zhang-chan-2009-dependency @@ -6302,7 +6302,7 @@ Dependency Constraints for Lexical Disambiguation - GuillaumeBonfante + GuillaumeBonfante BrunoGuillaume MathieuMorey 242–253 @@ -6312,7 +6312,7 @@ Parsing Directed Acyclic Graphs with Range Concatenation Grammars PierreBoullier - BenoîtSagot + BenoîtSagot 254–265 W09-3841 boullier-sagot-2009-parsing @@ -6322,10 +6322,10 @@ Proceedings of the SIGDIAL 2009 Conference W09-39 - PatrickHealey - RobertoPieraccini - DonnaByron - SteveYoung + PatrickHealey + RobertoPieraccini + DonnaByron + SteveYoung MatthewPurver Association for Computational Linguistics
London, UK
@@ -6341,7 +6341,7 @@ Evaluating the Effectiveness of Information Presentation in a Full End-To-End Dialogue System TaghiPaksima KallirroiGeorgila - JohannaMoore + JohannaMoore 1–10 W09-3901 paksima-etal-2009-evaluating @@ -6350,7 +6350,7 @@ Can <fixed-case>I</fixed-case> Finish? Learning When to Respond to Incremental Interpretation Results in Interactive Dialogue DavidDeVault KenjiSagae - DavidTraum + DavidTraum 11–20 W09-3902 devault-etal-2009-finish @@ -6381,10 +6381,10 @@
Dealing with Interpretation Errors in Tutorial Dialogue - MyroslavaDzikovska - CharlesCallaway + MyroslavaDzikovska + CharlesCallaway ElaineFarrow - JohannaMoore + JohannaMoore NatalieSteinhauser GwendolynCampbell 38–45 @@ -6404,14 +6404,14 @@ Participant Subjectivity and Involvement as a Basis for Discourse Segmentation JohnNiekrasz - JohannaMoore + JohannaMoore 54–61 W09-3908 niekrasz-moore-2009-participant Genre-Based Paragraph Classification for Sentiment Analysis - MaiteTaboada + MaiteTaboada JulianBrooke ManfredStede 62–70 @@ -6421,7 +6421,7 @@ Detecting the Noteworthiness of Utterances in Human Meetings SatanjeevBanerjee - AlexanderRudnicky + AlexanderRudnicky 71–78 W09-3910 banerjee-rudnicky-2009-detecting @@ -6438,7 +6438,7 @@ Interactive Gesture in Dialogue: a <fixed-case>PTT</fixed-case> Model HannesRieser - MassimoPoesio + MassimoPoesio 87–96 W09-3912 rieser-poesio-2009-interactive @@ -6469,7 +6469,7 @@ A Two-Tier User Simulation Model for Reinforcement Learning of Adaptive Referring Expression Generation Policies - SrinivasanJanarthanam + SrinivasanJanarthanam OliverLemon 120–123 W09-3916 @@ -6497,7 +6497,7 @@ Estimating Probability of Correctness for <fixed-case>ASR</fixed-case> <fixed-case>N</fixed-case>-<fixed-case>B</fixed-case>est Lists - JasonWilliams + JasonWilliams SuhridBalakrishnan 132–135 W09-3919 @@ -6505,7 +6505,7 @@ Not a Simple Yes or No: Uncertainty in Indirect Answers - Marie-Catherinede Marneffe + Marie-Catherinede Marneffe ScottGrimm ChristopherPotts 136–143 @@ -6515,14 +6515,14 @@ Concept Form Adaptation in Human-Computer Dialog SvetlanaStoyanchev - AmandaStent + AmandaStent 144–147 W09-3921 stoyanchev-stent-2009-concept Automatic Generation of Information State Update Dialogue Systems that Dynamically Create Voice <fixed-case>XML</fixed-case>, as Demonstrated on the i<fixed-case>P</fixed-case>hone - HelenHastie + HelenHastie XingkunLiu OliverLemon 148–151 @@ -6544,7 +6544,7 @@ SebastianVarges SilviaQuarteroni GiuseppeRiccardi - AlexeiIvanov + AlexeiIvanov PierluigiRoberti 156–159 W09-3924 @@ -6552,11 +6552,11 @@ Speeding Up the Design of Dialogue Applications by Using Database Contents and Structure Information - Luis FernandoD’Haro - Ricardode Cordoba - Juan ManuelLucas - RobertoBarra-Chicote - RubenSan-Segundo + Luis FernandoD’Haro + Ricardode Cordoba + Juan ManuelLucas + RobertoBarra-Chicote + RubenSan-Segundo 160–169 W09-3925 dharo-etal-2009-speeding @@ -6575,7 +6575,7 @@ Discourse Structure and Performance Analysis: Beyond the Correlation MihaiRotaru - DianeLitman + DianeLitman 178–187 W09-3927 rotaru-litman-2009-discourse @@ -6583,7 +6583,7 @@ The Role of Interactivity in Human-Machine Conversation for Automatic Word Acquisition ShaolinQu - JoyceChai + JoyceChai 188–195 W09-3928 qu-chai-2009-role @@ -6597,15 +6597,15 @@ What do We Know about Conversation Participants: Experiments on Conversation Entailment - ChenZhang - JoyceChai + ChenZhang + JoyceChai 206–215 W09-3930 zhang-chai-2009-know Artificial <fixed-case>C</fixed-case>ompanions as Dialogue Agents - YorickWilks + YorickWilks 216 W09-3931 wilks-2009-artificial @@ -6623,7 +6623,7 @@ Models for Multiparty Engagement in Open-World Dialog - DanBohus + DanBohus EricHorvitz 225–234 W09-3933 @@ -6631,9 +6631,9 @@ Extracting Decisions from Multi-Party Dialogue Using Directed Graphical Models and Semantic Similarity - TrungBui + TrungBui MatthewFrampton - JohnDowding + JohnDowding StanleyPeters 235–243 W09-3934 @@ -6641,7 +6641,7 @@ Learning to Predict Engagement with a Spoken Dialog System in Open-World Settings - DanBohus + DanBohus EricHorvitz 244–252 W09-3935 @@ -6649,8 +6649,8 @@ Turn-Yielding Cues in Task-Oriented Dialogue - AgustínGravano - JuliaHirschberg + AgustínGravano + JuliaHirschberg 253–261 W09-3936 gravano-hirschberg-2009-turn @@ -6667,11 +6667,11 @@ k-<fixed-case>N</fixed-case>earest Neighbor <fixed-case>M</fixed-case>onte-<fixed-case>C</fixed-case>arlo Control Algorithm for <fixed-case>POMDP</fixed-case>-Based Dialogue Systems - FabriceLefèvre - MilicaGašić - FilipJurčíček + FabriceLefèvre + MilicaGašić + FilipJurčíček SimonKeizer - FrançoisMairesse + FrançoisMairesse BlaiseThomson KaiYu SteveYoung @@ -6683,22 +6683,22 @@ Comparison of Classification and Ranking Approaches to Pronominal Anaphora Resolution in <fixed-case>C</fixed-case>zech Giang LinhNgụy VáclavNovák - ZdeněkŽabokrtský + ZdeněkŽabokrtský 276–285 W09-3939 nguy-etal-2009-comparison Spoken Tutorial Dialogue and the Feeling of Another’s Knowing - DianeLitman - KateForbes-Riley + DianeLitman + KateForbes-Riley 286–289 W09-3940 litman-forbes-riley-2009-spoken Evaluating Automatic Extraction of Rules for Sentence Plan Construction - AmandaStent + AmandaStent MartinMolina 290–297 W09-3941 @@ -6724,7 +6724,7 @@ Cascaded Lexicalised Classifiers for Second-Person Reference Resolution MatthewPurver - RaquelFernández + RaquelFernández MatthewFrampton StanleyPeters 306–309 @@ -6745,7 +6745,7 @@ SatoshiIkeda YuichiroFukubayashi TetsuyaOgata - HiroshiOkuno + HiroshiOkuno 314–321 W09-3946 komatani-etal-2009-ranking @@ -6761,23 +6761,23 @@ A Comparison between Dialog Corpora Acquired with Real and Simulated Users DavidGriol ZoraidaCallejas - RamónLópez-Cózar + RamónLópez-Cózar 326–332 W09-3948 griol-etal-2009-comparison Simultaneous Dialogue Act Segmentation and Labelling using Lexical and Syntactic Features - RamonGranell - StephenPulman - Carlos-D.Martínez-Hinarejos + RamonGranell + StephenPulman + Carlos-D.Martínez-Hinarejos 333–336 W09-3949 granell-etal-2009-simultaneous The Spoken Dialogue Challenge - AlanBlack + AlanBlack MaxineEskenazi 337–340 W09-3950 @@ -6786,15 +6786,15 @@ Unsupervised Classification of Dialogue Acts using a <fixed-case>D</fixed-case>irichlet Process Mixture Model NigelCrook - RamonGranell - StephenPulman + RamonGranell + StephenPulman 341–348 W09-3951 crook-etal-2009-unsupervised A Handsome Set of Metrics to Measure Utterance Classification Performance in Spoken Dialog Systems - DavidSuendermann + DavidSuendermann JacksonLiscombe KrishnaDayanidhi RobertoPieraccini @@ -6805,8 +6805,8 @@ Contrasting the Interaction Structure of an Email and a Telephone Corpus: A Machine Learning Approach to Annotation of Dialogue Function Units JunHu - RebeccaPassonneau - OwenRambow + RebeccaPassonneau + OwenRambow 357–366 W09-3953 hu-etal-2009-contrasting @@ -6816,8 +6816,8 @@ Proceedings of the Workshop Multilingual resources, technologies and evaluation for central and Eastern European languages W09-40 - ElenaPaskaleva - SteliosPiperidis + ElenaPaskaleva + SteliosPiperidis MilenaSlavcheva CristinaVertan Association for Computational Linguistics @@ -6843,7 +6843,7 @@ On the behavior of <fixed-case>R</fixed-case>omanian syllables related to minimum effort laws AncaDinu - Liviu P.Dinu + Liviu P.Dinu 9–13 W09-4002 dinu-dinu-2009-behavior @@ -6858,7 +6858,7 @@ <fixed-case>E</fixed-case>-Connecting <fixed-case>B</fixed-case>alkan Languages CvetanaKrstev - RankaStanković + RankaStanković DuškoVitas SvetlaKoeva 19–25 @@ -6868,7 +6868,7 @@ Converting <fixed-case>R</fixed-case>ussian Treebank <fixed-case>S</fixed-case>yn<fixed-case>T</fixed-case>ag<fixed-case>R</fixed-case>us into Praguian <fixed-case>PDT</fixed-case> Style DavidMareček - NataliaKljueva + NataliaKljueva 26–31 W09-4005 marecek-kljueva-2009-converting @@ -6893,10 +6893,10 @@ Proceedings of the Workshop on Adaptation of Language Resources and Technology to New Domains W09-41 - NúriaBel - ErhardHinrichs + NúriaBel + ErhardHinrichs PetyaOsenova - KirilSimov + KirilSimov Association for Computational Linguistics
Borovets, Bulgaria
September @@ -6917,7 +6917,7 @@
Maximal Phrases Based Analysis for Prototyping Online Discussion Forums Postings - GastonBurek + GastonBurek DaleGerdemann 12–18 W09-4102 @@ -6926,15 +6926,15 @@ <fixed-case>LEXIE</fixed-case> – an Experiment in Lexical Information Extraction John J.Camilleri - MichaelRosner + MichaelRosner 19–26 W09-4103 camilleri-rosner-2009-lexie Adapting <fixed-case>NLP</fixed-case> and Corpus Analysis Techniques to Structured Imagery Analysis in Classical <fixed-case>C</fixed-case>hinese Poetry - Alex ChengyuFang - FengjuLo + Alex ChengyuFang + FengjuLo Cheuk KitChinn 27–34 W09-4104 @@ -6943,7 +6943,7 @@ Cross-lingual Adaptation as a Baseline: Adapting Maximum Entropy Models to <fixed-case>B</fixed-case>ulgarian GeorgiGeorgiev - PreslavNakov + PreslavNakov PetyaOsenova KirilSimov 35–38 @@ -6967,7 +6967,7 @@ <fixed-case>QALL</fixed-case>-<fixed-case>ME</fixed-case> needs <fixed-case>AIR</fixed-case>: a portability study - ConstantinOrăsan + ConstantinOrăsan IustinDornescu NataliaPonomareva 50–57 @@ -7007,7 +7007,7 @@ Finding Domain Specific Collocations and Concordances on the Web - CarolineBarrière + CarolineBarrière 1–8 W09-4201 barriere-2009-finding @@ -7039,14 +7039,14 @@ Evidence-Based Word Alignment - JörgTiedemann + JörgTiedemann 28–32 W09-4205 tiedemann-2009-evidence A Discriminative Approach to Tree Alignment - JörgTiedemann + JörgTiedemann GideonKotzé 33–39 W09-4206 @@ -7057,9 +7057,9 @@ Proceedings of the Workshop on Events in Emerging Text Types W09-43 - ConstantinOrasan + ConstantinOrasan LauraHasler - CorinaForăscu + CorinaForăscu Association for Computational Linguistics
Borovets, Bulgaria
September @@ -7090,19 +7090,19 @@ A Pairwise Event Coreference Model, Feature Impact and Evaluation for Event Coreference Resolution ZhengChen HengJi - RobertHaralick + RobertHaralick 17–22 W09-4303 chen-etal-2009-pairwise
Summarizing Threads in Blogs Using Opinion Polarity - AlexandraBalahur + AlexandraBalahur ElenaLloret EsterBoldrini - AndrésMontoyo - ManuelPalomar - PatricioMartínez-Barco + AndrésMontoyo + ManuelPalomar + PatricioMartínez-Barco 23–31 W09-4304 balahur-etal-2009-summarizing @@ -7127,9 +7127,9 @@ Proceedings of the 1st Workshop on Definition Extraction W09-44 - GerardoSierra - MaraPozzi - Juan-ManuelTorres + GerardoSierra + MaraPozzi + Juan-ManuelTorres Association for Computational Linguistics
Borovets, Bulgaria
September @@ -7160,7 +7160,7 @@ Enriching a Lexicographic Tool with Domain Definitions: Problems and Solutions María A.Barrios - GuadalupeAguado de Cea + GuadalupeAguado de Cea José ÁngelRamos 14–20 W09-4403 @@ -7170,7 +7170,7 @@ Extraction of Author’s Definitions Using Indexed Reference Identification MarcBertin IanaAtanassova - Jean-PierreDescles + Jean-PierreDescles 21–25 W09-4404 bertin-etal-2009-extraction @@ -7178,16 +7178,16 @@ Evolutionary Algorithms for Definition Extraction ClaudiaBorg - MikeRosner - GordonPace + MikeRosner + GordonPace 26–32 W09-4405 borg-etal-2009-evolutionary Language Independent System for Definition Extraction: First Results Using Learning Algorithms - RosaDel Gaudio - AntónioBranco + RosaDel Gaudio + AntónioBranco 33–39 W09-4406 del-gaudio-branco-2009-language @@ -7244,7 +7244,7 @@ Extraction and Exploration of Correlations in Patient Status Data SvetlaBoytcheva IvelinaNikolova - ElenaPaskaleva + ElenaPaskaleva GaliaAngelova DimitarTcharaktchiev NadyaDimitrova @@ -7256,7 +7256,7 @@ Semantic Portals in Biomedicine: Case Study IrinaEfimenko SergeyMinor - AnatoliStarostin + AnatoliStarostin VladimirKhoroshevsky 8–13 W09-4502 @@ -7265,7 +7265,7 @@ A Joint Model for Normalizing Gene and Organism Mentions in Text GeorgiGeorgiev - PreslavNakov + PreslavNakov KuzmanGanchev DeyanPeychev VassilMomchev @@ -7294,12 +7294,12 @@ Natural Language Processing to Detect Risk Patterns Related to Hospital Acquired Infections DenysProux PierreMarchal - FrédériqueSegond + FrédériqueSegond IvanKergourlay - StéfanDarmoni + StéfanDarmoni SuzannePereira QuentinGicquel - Marie-HélèneMetzger + Marie-HélèneMetzger 35–41 W09-4506 proux-etal-2009-natural @@ -7307,14 +7307,14 @@ Cascading Classifiers for Named Entity Recognition in Clinical Notes YefengWang - JonPatrick + JonPatrick 42–49 W09-4507 wang-patrick-2009-cascading Deriving Clinical Query Patterns from Medical Corpora Using Domain Ontologies - Pinar OezdenWennerberg + Pinar OezdenWennerberg PaulBuitelaar SonjaZillner 50–56 @@ -7326,8 +7326,8 @@ Proceedings of the 17th Nordic Conference of Computational Linguistics (NODALIDA 2009) W09-46 - KristiinaJokinen - EckhardBick + KristiinaJokinen + EckhardBick Northern European Association for Language Technology (NEALT)
Odense, Denmark
May @@ -7354,7 +7354,7 @@
Text Annotation with <fixed-case>O</fixed-case>pen<fixed-case>NLP</fixed-case> and <fixed-case>UIMA</fixed-case> - GrahamWilcock + GrahamWilcock 7–8 W09-4603 wilcock-2009-text @@ -7398,7 +7398,7 @@ Pattern-based <fixed-case>E</fixed-case>nglish-<fixed-case>L</fixed-case>atvian Toponym Translation TatianaGornostay - IngunaSkadiņa + IngunaSkadiņa 41–47 W09-4608 gornostay-skadina-2009-pattern @@ -7408,7 +7408,7 @@ NathanGreen PaulBreimyer VinayKumar - Nagiza F.Samatova + Nagiza F.Samatova 48–56 W09-4609 green-etal-2009-webbanc @@ -7433,8 +7433,8 @@ The <fixed-case>N</fixed-case>ordic Dialect Corpus–an advanced research tool - Janne BondiJohannessen - Joel JamesPriestley + Janne BondiJohannessen + Joel JamesPriestley KristinHagen Tor AndersÅfarli Øystein AlexanderVangsnes @@ -7451,15 +7451,15 @@ Weighted Finite-State Morphological Analysis of <fixed-case>F</fixed-case>innish Compounding with <fixed-case>HFST</fixed-case>-<fixed-case>LEXC</fixed-case> - KristerLindén - TommiPirinen + KristerLindén + TommiPirinen 89–95 W09-4614 linden-pirinen-2009-weighted Corpus-based Paradigm Selection for Morphological Entries - KristerLindén + KristerLindén JussiTuovila 96–102 W09-4615 @@ -7470,14 +7470,14 @@ HrafnLoftsson IdaKramarczyk SigrúnHelgadóttir - EiríkurRögnvaldsson + EiríkurRögnvaldsson 103–110 W09-4616 loftsson-etal-2009-improving Disambiguation of Taxonomy Markers in Context: <fixed-case>R</fixed-case>ussian Nouns - OlgaLashevskaja + OlgaLashevskaja OlgaMitrofanova 111–117 W09-4617 @@ -7486,7 +7486,7 @@ Towards automatic acquisition of linguistic features YvesLepage - Chooi LingGoh + Chooi LingGoh 118–125 W09-4618 lepage-goh-2009-towards @@ -7494,7 +7494,7 @@ Building a morphological and syntactic lexicon by merging various linguistic resources Miguel A.Molinero - BenoîtSagot + BenoîtSagot LionelNicolas 126–133 W09-4619 @@ -7502,7 +7502,7 @@ Using Semantic Features Derived from Word-Space Models for <fixed-case>S</fixed-case>wedish Coreference Resolution - KristinaNilsson + KristinaNilsson HansHjelm 134–141 W09-4620 @@ -7526,7 +7526,7 @@ What do we need to know about humans? A view into the <fixed-case>D</fixed-case>an<fixed-case>N</fixed-case>et database - BoletteSandford Pedersen + BoletteSandford Pedersen AnnaBraasch 158–165 W09-4623 @@ -7535,29 +7535,29 @@ Dependency Parsing Resources for <fixed-case>F</fixed-case>rench: Converting Acquired <fixed-case>L</fixed-case>exical <fixed-case>F</fixed-case>unctional <fixed-case>G</fixed-case>rammar <fixed-case>F</fixed-case>-Structure Annotations and Parsing <fixed-case>F</fixed-case>-Structures Directly NatalieSchluter - Josefvan Genabith + Josefvan Genabith 166–173 W09-4624 schluter-van-genabith-2009-dependency Conflict Resolution Using Weighted Rules in <fixed-case>HFST</fixed-case>-<fixed-case>TWOLC</fixed-case> - MiikkaSilfverberg - KristerLindén + MiikkaSilfverberg + KristerLindén 174–181 W09-4625 silfverberg-linden-2009-conflict A linear time extension of deterministic pushdown automata - AndersSøgaard + AndersSøgaard 182–189 W09-4626 sogaard-2009-linear Verifying context-sensitive treebanks and heuristic parses in polynomial time - AndersSøgaard + AndersSøgaard 190–197 W09-4627 sogaard-2009-verifying @@ -7580,7 +7580,7 @@ Automatic Semantic Role Annotation for <fixed-case>S</fixed-case>panish EckhardBick - M. PilarValverde Ibáñez + M. PilarValverde Ibáñez 215–218 W09-4630 bick-valverde-ibanez-2009-automatic @@ -7595,7 +7595,7 @@ <fixed-case>M</fixed-case>ed<fixed-case>E</fixed-case>val–six test collections in one - KarinFriberg Heppin + KarinFriberg Heppin 223–226 W09-4632 friberg-heppin-2009-medeval @@ -7603,17 +7603,17 @@ Active Learning in Example-Based Machine Translation RashmiGangadharaiah - Ralf D.Brown - JaimeCarbonell + Ralf D.Brown + JaimeCarbonell 227–230 W09-4633 gangadharaiah-etal-2009-active Context-Sensitive Spelling Correction and Rich Morphology - Anton K.Ingason + Anton K.Ingason Skúli B.Jóhannsson - EiríkurRögnvaldsson + EiríkurRögnvaldsson HrafnLoftsson SigrúnHelgadóttir 231–234 @@ -7631,14 +7631,14 @@ The Open Source Tagger <fixed-case>H</fixed-case>un<fixed-case>P</fixed-case>o<fixed-case>S</fixed-case> for <fixed-case>S</fixed-case>wedish - BeátaMegyesi + BeátaMegyesi 239–241 W09-4636 megyesi-2009-open <fixed-case>E</fixed-case>nglish-<fixed-case>L</fixed-case>atvian <fixed-case>SMT</fixed-case>: knowledge or data? - IngunaSkadiņa + IngunaSkadiņa EdgarsBrālītis 242–245 W09-4637 @@ -7646,7 +7646,7 @@ Cross-lingual porting of distributional semantic classification - LiljaØvrelid + LiljaØvrelid 246–249 W09-4638 ovrelid-2009-cross @@ -7683,7 +7683,7 @@ <fixed-case>S</fixed-case>ub<fixed-case>TTS</fixed-case>: Light-weight automatic reading of subtitles SandraDerbring - PeterLjunglöf + PeterLjunglöf MariaOlsson 272–274 W09-4643 @@ -7691,9 +7691,9 @@ <fixed-case>TRIK</fixed-case>: A Talking and Drawing Robot for Children with Communication Disabilities - PeterLjunglöf + PeterLjunglöf StaffanLarsson - KatarinaHeimann Mühlenbock + KatarinaHeimann Mühlenbock GunillaThunberg 275–278 W09-4644 @@ -7701,8 +7701,8 @@ <fixed-case>CAOS</fixed-case>–A Tool for the Construction of Terminological Ontologies - BodilNistrup Madsen - HanneErdman Thomsen + BodilNistrup Madsen + HanneErdman Thomsen 279–282 W09-4645 nistrup-madsen-erdman-thomsen-2009-caos @@ -7711,7 +7711,7 @@ The <fixed-case>N</fixed-case>ordic Dialect Database: Mapping Microsyntactic Variation in the <fixed-case>S</fixed-case>candinavian Languages Arne MartinusLindstad AndersNøklestad - Janne BondiJohannessen + Janne BondiJohannessen Øystein AlexanderVangsnes 283–286 W09-4646 diff --git a/data/xml/W10.xml b/data/xml/W10.xml index 55a4e3abfa..4ebed2ba59 100644 --- a/data/xml/W10.xml +++ b/data/xml/W10.xml @@ -27,8 +27,8 @@ Active Semi-Supervised Learning for Improving Word Alignment VamshiAmbati - StephanVogel - JaimeCarbonell + StephanVogel + JaimeCarbonell 10–17 W10-0102 ambati-etal-2010-active-semi @@ -36,7 +36,7 @@ <fixed-case>D</fixed-case>-Confidence: An Active Learning Strategy which Efficiently Identifies Small Classes NunoEscudeiro - AlípioJorge + AlípioJorge 18–26 W10-0103 escudeiro-jorge-2010-confidence @@ -45,7 +45,7 @@ Domain Adaptation meets Active Learning PiyushRai AvishekSaha - HalDaumé + HalDaumé SureshVenkatasubramanian 27–32 W10-0104 @@ -55,7 +55,7 @@ Parallel Active Learning: Eliminating Wait Time with Minimal Staleness RobbieHaertel PaulFelt - Eric K.Ringger + Eric K.Ringger KevinSeppi 33–41 W10-0105 @@ -66,7 +66,7 @@ Proceedings of the NAACL HLT 2010 Workshop on Computational Approaches to Analysis and Generation of Emotion in Text W10-02 - DianaInkpen + DianaInkpen CarloStrapparava Association for Computational Linguistics
Los Angeles, CA
@@ -89,7 +89,7 @@ Emotion Detection in Email Customer Care NarendraGupta MazinGilbert - GiuseppeDi Fabbrizio + GiuseppeDi Fabbrizio 10–16 W10-0202 gupta-etal-2010-emotion @@ -97,8 +97,8 @@ Toward Plot Units: Automatic Affect State Analysis AmitGoyal - EllenRiloff - HalDaume III + EllenRiloff + HalDaume III NathanGilbert 17–25 W10-0203 @@ -106,8 +106,8 @@ Emotions Evoked by Common Words and Phrases: Using <fixed-case>M</fixed-case>echanical <fixed-case>T</fixed-case>urk to Create an Emotion Lexicon - SaifMohammad - PeterTurney + SaifMohammad + PeterTurney 26–34 W10-0204 mohammad-turney-2010-emotions @@ -122,7 +122,7 @@ A Text-driven Rule-based System for Emotion Cause Detection - Sophia Yat MeiLee + Sophia Yat MeiLee YingChen Chu-RenHuang 45–53 @@ -140,7 +140,7 @@ Evaluation of Unsupervised Emotion Models to Textual Affect Recognition - Sunghwan MacKim + Sunghwan MacKim AlessandroValitutti Rafael A.Calvo 62–70 @@ -176,7 +176,7 @@ Emotional Perception of Fairy Tales: Achieving Agreement in Emotion Annotation of Text Ekaterina P.Volkova BettyMohler - DetmarMeurers + DetmarMeurers DaleGerdemann Heinrich H.Bülthoff 98–106 @@ -187,7 +187,7 @@ Experiments on Summary-based Opinion Classification ElenaLloret HoracioSaggion - ManuelPalomar + ManuelPalomar 107–115 W10-0213 lloret-etal-2010-experiments @@ -195,7 +195,7 @@ Recognizing Stances in Ideological On-Line Debates SwapnaSomasundaran - JanyceWiebe + JanyceWiebe 116–124 W10-0214 somasundaran-wiebe-2010-recognizing @@ -214,8 +214,8 @@ Sentiment Classification using Automatically Extracted Subgraph Features ShilpaArora ElijahMayfield - CarolynPenstein-Rosé - EricNyberg + CarolynPenstein-Rosé + EricNyberg 131–139 W10-0216 arora-etal-2010-sentiment @@ -224,7 +224,7 @@ Hierarchical versus Flat Classification of Emotions in Text DimanGhazi DianaInkpen - StanSzpakowicz + StanSzpakowicz 140–146 W10-0217 ghazi-etal-2010-hierarchical @@ -265,7 +265,7 @@ Comparing Semantic Role Labeling with Typed Dependency Parsing in Computational Metaphor Identification Eric P. S.Baumer - James P.White + James P.White BillTomlinson 14–22 W10-0303 @@ -273,15 +273,15 @@ Engineering Linguistic Creativity: Bird Flight and Jet Planes - PabloGervás + PabloGervás 23–30 W10-0304 gervas-2010-engineering An alternate approach towards meaningful lyric generation in <fixed-case>T</fixed-case>amil - AnanthRamakrishnan A - SobhaLalitha Devi + AnanthRamakrishnan A + SobhaLalitha Devi 31–39 W10-0305 ramakrishnan-a-lalitha-devi-2010-alternate @@ -289,7 +289,7 @@ Representing Story Plans in <fixed-case>SUMO</fixed-case> JeffreyCua - RuliManurung + RuliManurung EthelOng AdamPease 40–48 @@ -330,7 +330,7 @@ Scientific Authoring Support: A Tool to Navigate in Typed Citation Graphs - UlrichSchäfer + UlrichSchäfer UweKasterka 7–14 W10-0402 @@ -380,10 +380,10 @@ Exploring Individual Differences in Student Writing with a Narrative Composition Support Environment JuliusGoth AlokBaikadi - Eun YoungHa + Eun YoungHa JonathanRowe BradfordMott - JamesLester + JamesLester 56–64 W10-0408 goth-etal-2010-exploring @@ -430,7 +430,7 @@ Detecting Word Misuse in <fixed-case>C</fixed-case>hinese - WeiLiu + WeiLiu 5–6 W10-0503 liu-2010-detecting @@ -469,13 +469,13 @@ Intelligent Linux Information Access by Data Mining: the <fixed-case>ILIAD</fixed-case> Project - TimothyBaldwin - DavidMartinez + TimothyBaldwin + DavidMartinez RichardPenman Su NamKim MarcoLui LiWang - AndrewMacKinlay + AndrewMacKinlay 15–16 W10-0508 baldwin-etal-2010-intelligent @@ -484,7 +484,7 @@ Mining User Experiences from Online Forums: An Exploration ValentinJijkoun WouterWeerkamp - Maartende Rijke + Maartende Rijke PaulAckermans GijsGeleijnse 17–18 @@ -495,8 +495,8 @@ Social Links from Latent Topics in Microblogs KritiPuniyani JacobEisenstein - Shay B.Cohen - EricXing + Shay B.Cohen + EricXing 19–20 W10-0510 puniyani-etal-2010-social @@ -511,17 +511,17 @@ <fixed-case>T</fixed-case>witter in Mass Emergency: What <fixed-case>NLP</fixed-case> Can Contribute - William J.Corvey - SarahVieweg + William J.Corvey + SarahVieweg TravisRood - MarthaPalmer + MarthaPalmer 23–24 W10-0512 corvey-etal-2010-twitter The <fixed-case>E</fixed-case>dinburgh <fixed-case>T</fixed-case>witter Corpus - SašaPetrović + SašaPetrović MilesOsborne VictorLavrenko 25–26 @@ -530,7 +530,7 @@ Labelling and Spatio-Temporal Grounding of News Events - BeaAlex + BeaAlex ClaireGrover 27–28 W10-0514 @@ -540,7 +540,7 @@ Tracking Information Flow between Primary and Secondary News Sources WillRadford BenHachey - JamesCurran + JamesCurran MariaMilosavljevic 29–30 W10-0515 @@ -560,7 +560,7 @@ Proceedings of the NAACL HLT 2010 First Workshop on Computational Neurolinguistics W10-06 BrianMurphy - Kai-min KevinChang + Kai-min KevinChang AnnaKorhonen Association for Computational Linguistics
Los Angeles, USA
@@ -602,7 +602,7 @@ Network Analysis of <fixed-case>K</fixed-case>orean Word Associations JaeyoungJung NaLi - HiroyukiAkama + HiroyukiAkama 27–35 W10-0604 jung-etal-2010-network @@ -610,7 +610,7 @@ Detecting Semantic Category in Simultaneous <fixed-case>EEG</fixed-case>/<fixed-case>MEG</fixed-case> Recordings BrianMurphy - MassimoPoesio + MassimoPoesio 36–44 W10-0605 murphy-poesio-2010-detecting @@ -618,7 +618,7 @@ Hemispheric processing of <fixed-case>C</fixed-case>hinese polysemy in the disyllabic verb/ noun compounds: an event-related potential study Chih-yingHuang - Chia-yingLee + Chia-yingLee 45–51 W10-0606 huang-lee-2010-hemispheric @@ -681,7 +681,7 @@ JacobAndreas KapilThadani SaraRosenthal - KathleenMcKeown + KathleenMcKeown 13–20 W10-0702 jha-etal-2010-corpus @@ -697,7 +697,7 @@ Semi-supervised Word Alignment with <fixed-case>M</fixed-case>echanical <fixed-case>T</fixed-case>urk QinGao - StephanVogel + StephanVogel 30–34 W10-0704 gao-vogel-2010-semi @@ -705,7 +705,7 @@ Rating Computer-Generated Questions with <fixed-case>M</fixed-case>echanical <fixed-case>T</fixed-case>urk MichaelHeilman - Noah A.Smith + Noah A.Smith 35–40 W10-0705 heilman-smith-2010-rating @@ -720,10 +720,10 @@ Document Image Collection Using <fixed-case>A</fixed-case>mazon’s <fixed-case>M</fixed-case>echanical <fixed-case>T</fixed-case>urk - AudreyLe + AudreyLe JeromeAjot - MarkPrzybocki - StephanieStrassel + MarkPrzybocki + StephanieStrassel 45–52 le-etal-2010-document @@ -739,7 +739,7 @@ Exploring Normalization Techniques for Human Judgments of Machine Translation Adequacy Collected Using <fixed-case>A</fixed-case>mazon <fixed-case>M</fixed-case>echanical <fixed-case>T</fixed-case>urk MichaelDenkowski - AlonLavie + AlonLavie 57–61 W10-0709 denkowski-lavie-2010-exploring @@ -747,7 +747,7 @@ Can Crowds Build parallel corpora for Machine Translation Systems? VamshiAmbati - StephanVogel + StephanVogel 62–65 W10-0710 ambati-vogel-2010-crowds @@ -756,7 +756,7 @@ Turker-Assisted Paraphrasing for <fixed-case>E</fixed-case>nglish-<fixed-case>A</fixed-case>rabic Machine Translation MichaelDenkowski HassanAl-Haj - AlonLavie + AlonLavie 66–70 W10-0711 denkowski-etal-2010-turker @@ -766,14 +766,14 @@ NolanLawson KevinEustice MikePerkowitz - MelihaYetisgen-Yildiz + MelihaYetisgen-Yildiz 71–79 W10-0712 lawson-etal-2010-annotating Annotating Named Entities in <fixed-case>T</fixed-case>witter Data with Crowdsourcing - TimFinin + TimFinin WilliamMurnane AnandKarandikar NicholasKeller @@ -794,7 +794,7 @@ An Enriched <fixed-case>MT</fixed-case> Grammar for Under $100 - Omar F.Zaidan + Omar F.Zaidan JuriGanitkevitch 93–98 W10-0715 @@ -804,7 +804,7 @@ Using the <fixed-case>A</fixed-case>mazon <fixed-case>M</fixed-case>echanical <fixed-case>T</fixed-case>urk to Transcribe and Annotate Meeting Speech for Extractive Summarization MatthewMarge SatanjeevBanerjee - AlexanderRudnicky + AlexanderRudnicky 99–107 W10-0716 marge-etal-2010-using @@ -822,9 +822,9 @@ BartMellebeek FrancescBenavent JensGrivolla - JoanCodina - MartaR. Costa-jussà - RafaelBanchs + JoanCodina + MartaR. Costa-jussà + RafaelBanchs 114–121 W10-0718 mellebeek-etal-2010-opinion @@ -838,7 +838,7 @@ RobinMelnick ChristopherPotts TylerSchnoebelen - HarryTily + HarryTily 122–130 W10-0719 munro-etal-2010-crowdsourcing @@ -862,7 +862,7 @@ Non-Expert Evaluation of Summarization Systems is Risky - DanGillick + DanGillick YangLiu 148–151 W10-0722 @@ -872,7 +872,7 @@ Shedding (a Thousand Points of) Light on Biased Language TaeYano PhilipResnik - Noah A.Smith + Noah A.Smith 152–158 W10-0723 yano-etal-2010-shedding @@ -881,7 +881,7 @@ Evaluation of Commonsense Knowledge with <fixed-case>M</fixed-case>echanical <fixed-case>T</fixed-case>urk JonathanGordon BenjaminVan Durme - LenhartSchubert + LenhartSchubert 159–162 W10-0724 gordon-etal-2010-evaluation @@ -896,8 +896,8 @@ The Wisdom of the Crowd’s Ear: Speech Accent Rating and Annotation with <fixed-case>A</fixed-case>mazon <fixed-case>M</fixed-case>echanical <fixed-case>T</fixed-case>urk - StephenKunath - StevenWeinberger + StephenKunath + StevenWeinberger 168–171 W10-0726 kunath-weinberger-2010-wisdom @@ -912,7 +912,7 @@ Preliminary Experiments with <fixed-case>A</fixed-case>mazon’s <fixed-case>M</fixed-case>echanical <fixed-case>T</fixed-case>urk for Annotating Medical Named Entities - MelihaYetisgen-Yildiz + MelihaYetisgen-Yildiz ImreSolti FeiXia ScottHalgrim @@ -925,7 +925,7 @@ IanLane MatthiasEck KayRottmann - AlexWaibel + AlexWaibel 184–187 W10-0729 lane-etal-2010-tools @@ -943,17 +943,17 @@ <fixed-case>A</fixed-case>mazon <fixed-case>M</fixed-case>echanical <fixed-case>T</fixed-case>urk for Subjectivity Word Sense Disambiguation CemAkkaya AlexanderConrad - JanyceWiebe - RadaMihalcea + JanyceWiebe + RadaMihalcea 195–203 W10-0731 akkaya-etal-2010-amazon Non-Expert Correction of Automatically Generated Relation Annotations - Matthew R.Gormley + Matthew R.Gormley AdamGerber - MaryHarper + MaryHarper MarkDredze 204–207 W10-0732 @@ -969,7 +969,7 @@ Creating a Bi-lingual Entailment Corpus through Translations with <fixed-case>M</fixed-case>echanical <fixed-case>T</fixed-case>urk: $100 for a 10-day Rush - MatteoNegri + MatteoNegri YasharMehdad 212–216 W10-0734 @@ -1003,8 +1003,8 @@ Towards a Domain Independent Semantics: Enhancing Semantic Representation with Construction Grammar Jena D.Hwang - Rodney D.Nielsen - MarthaPalmer + Rodney D.Nielsen + MarthaPalmer 1–8 W10-0801 hwang-etal-2010-towards @@ -1045,7 +1045,7 @@ Automatic Extraction of Constructional Schemas - Gerhardvan Huyssteen + Gerhardvan Huyssteen MarelieDavel 39–46 W10-0806 @@ -1056,12 +1056,12 @@ Proceedings of the NAACL HLT 2010 First International Workshop on Formalisms and Methodology for Learning by Reading W10-09 - RutuMulkar-Mehta - JamesAllen - JerryHobbs - EduardHovy - BernardoMagnini - ChrisManning + RutuMulkar-Mehta + JamesAllen + JerryHobbs + EduardHovy + BernardoMagnini + ChrisManning Association for Computational Linguistics
Los Angeles, California
June @@ -1075,7 +1075,7 @@ Machine Reading as a Process of Partial Question-Answering PeterClark - PhilHarrison + PhilHarrison 1–9 W10-0901 clark-harrison-2010-machine @@ -1084,14 +1084,14 @@ Building an end-to-end text reading system based on a packed representation Doo SoonKim KenBarker - BrucePorter + BrucePorter 10–14 W10-0902 kim-etal-2010-building Semantic Enrichment of Text with Background Knowledge - AnselmoPeñas + AnselmoPeñas EduardHovy 15–23 W10-0903 @@ -1099,7 +1099,7 @@ Large Scale Relation Detection - ChrisWelty + ChrisWelty JamesFan DavidGondek AndrewSchlaikjer @@ -1117,8 +1117,8 @@ Open-domain Commonsense Reasoning Using Discourse Relations from a Corpus of Weblog Stories - MatthewGerber - AndrewGordon + MatthewGerber + AndrewGordon KenjiSagae 43–51 W10-0906 @@ -1128,7 +1128,7 @@ Semantic Role Labeling for Open Information Extraction JanaraChristensen Mausam - StephenSoderland + StephenSoderland OrenEtzioni 52–60 W10-0907 @@ -1139,7 +1139,7 @@ MarjorieFreedman EdwardLoper ElizabethBoschee - RalphWeischedel + RalphWeischedel 61–69 W10-0908 freedman-etal-2010-empirical @@ -1159,7 +1159,7 @@ Unsupervised techniques for discovering ontology elements from <fixed-case>W</fixed-case>ikipedia article links ZareenSyed - TimFinin + TimFinin 78–86 W10-0910 syed-finin-2010-unsupervised @@ -1170,15 +1170,15 @@ JanaraChristensen PedroDomingos OrenEtzioni - RaphaelHoffmann - ChloeKiddon + RaphaelHoffmann + ChloeKiddon ThomasLin XiaoLing - Mausam + Mausam AlanRitter - StefanSchoenmackers - StephenSoderland - DanWeld + SchoenmackersStefan + StephenSoderland + DanWeld FeiWu CongleZhang 87–95 @@ -1188,7 +1188,7 @@ Analogical Dialogue Acts: Supporting Learning by Reading Analogies DavidBarbella - KennethForbus + KennethForbus 96–104 W10-0912 barbella-forbus-2010-analogical @@ -1205,8 +1205,8 @@ Supporting rule-based representations with corpus-derived lexical information. AnnieZaenen CleoCondoravdi - DanielBobrow - RaphaelHoffmann + DanielBobrow + RaphaelHoffmann 114–121 W10-0914 zaenen-etal-2010-supporting @@ -1226,7 +1226,7 @@ Proceedings of the NAACL HLT 2010 Fifth Workshop on Innovative Use of NLP for Building Educational Applications W10-10 - JoelTetreault + JoelTetreault JillBurstein ClaudiaLeacock Association for Computational Linguistics @@ -1241,17 +1241,17 @@ Readability Assessment for Text Simplification - SandraAluisio + SandraAluisio LuciaSpecia CarolineGasperin - CarolinaScarton + CarolinaScarton 1–9 W10-1001 aluisio-etal-2010-readability Enhancing Authentic Web Pages for Language Learners - DetmarMeurers + DetmarMeurers RamonZiai LuizAmaral AdrianeBoyd @@ -1292,7 +1292,7 @@ Rethinking Grammatical Error Annotation and Evaluation with the <fixed-case>A</fixed-case>mazon <fixed-case>M</fixed-case>echanical <fixed-case>T</fixed-case>urk JoelTetreault ElenaFilatova - MartinChodorow + MartinChodorow 45–48 W10-1006 tetreault-etal-2010-rethinking @@ -1315,12 +1315,12 @@ Leveraging Hidden Dialogue State to Select Tutorial Moves - KristyBoyer - RobPhillips - Eun YoungHa + KristyBoyer + RobPhillips + Eun YoungHa MichaelWallis MladenVouk - JamesLester + JamesLester 66–73 W10-1009 boyer-etal-2010-leveraging @@ -1379,7 +1379,7 @@ <fixed-case>M</fixed-case>ed<fixed-case>E</fixed-case>val- A <fixed-case>S</fixed-case>wedish Medical Test Collection with Doctors and Patients User Groups - KarinFriberg Heppin + KarinFriberg Heppin 1–7 W10-1101 friberg-heppin-2010-medeval @@ -1408,7 +1408,7 @@ StephanieSchreitter AlexandraKlein JohannesMatiasek - HaraldTrost + HaraldTrost 22–28 W10-1104 schreitter-etal-2010-using @@ -1426,7 +1426,7 @@ Reliability and Type of Consumer Health Documents on the World Wide Web: an Annotation Study - MelanieMartin + MelanieMartin 38–45 W10-1106 martin-2010-reliability @@ -1434,9 +1434,9 @@ Automated Identification of Synonyms in Biomedical Acronym Sense Inventories Genevieve B.Melton - SungRimMoon - BridgetMcInnes - SergueiPakhomov + SungRimMoon + BridgetMcInnes + SergueiPakhomov 46–52 W10-1107 melton-etal-2010-automated @@ -1447,7 +1447,7 @@ ElinCarlsson HerculesDalianis RiittaDanielsson-Ojala - VidasDaudaravicius + VidasDaudaravicius MartinHassel DimitriosKokkinakis HeljäLundgren-Laine @@ -1467,7 +1467,7 @@ FeiXia ImreSolti EithonCadag - ÖzlemUzuner + ÖzlemUzuner 61–67 W10-1109 halgrim-etal-2010-extracting @@ -1475,14 +1475,14 @@ Linking <fixed-case>S</fixed-case>we<fixed-case>FN</fixed-case>++ with Medical Resources, towards a <fixed-case>M</fixed-case>ed<fixed-case>F</fixed-case>rame<fixed-case>N</fixed-case>et for <fixed-case>S</fixed-case>wedish DimitriosKokkinakis - MariaToporowska Gronostaj + MariaToporowska Gronostaj 68–71 W10-1110 kokkinakis-toporowska-gronostaj-2010-linking Measuring Risk and Information Preservation: Toward New Metrics for De-identification of Clinical Texts - LynetteHirschman + LynetteHirschman JohnAberdeen 72–75 W10-1111 @@ -1500,7 +1500,7 @@ Machine learning and features selection for semi-automatic <fixed-case>ICD</fixed-case>-9-<fixed-case>CM</fixed-case> encoding JuliaMedori - CédrickFairon + CédrickFairon 84–89 W10-1113 medori-fairon-2010-machine @@ -1508,7 +1508,7 @@ Extracting Formulaic and Free Text Clinical Research Articles Metadata using Conditional Random Fields SeinLin - Jun-PingNg + Jun-PingNg ShreyaseePradhan JatinShah RicardoPietrobon @@ -1524,8 +1524,8 @@ W10-12 DonghuiFeng JamieCallan - EduardHovy - MariusPasca + EduardHovy + MariusPasca Association for Computational Linguistics
Los Angeles, California
June @@ -1539,8 +1539,8 @@ <fixed-case>LDA</fixed-case> Based Similarity Modeling for Question Answering AsliCelikyilmaz - DilekHakkani-Tur - GokhanTur + DilekHakkani-Tur + GokhanTur 1–9 W10-1201 celikyilmaz-etal-2010-lda @@ -1567,7 +1567,7 @@ A Graph-Based Semi-Supervised Learning for Question Semantic Labeling AsliCelikyilmaz - DilekHakkani-Tur + DilekHakkani-Tur 27–35 W10-1204 celikyilmaz-hakkani-tur-2010-graph @@ -1575,7 +1575,7 @@ Capturing the Stars: Predicting Ratings for Service and Product Reviews NarendraGupta - GiuseppeDi Fabbrizio + GiuseppeDi Fabbrizio PatrickHaffner 36–43 W10-1205 @@ -1584,7 +1584,7 @@ Object Search: Supporting Structured Queries in Web Search Engines KimPham - NicholasRizzolo + NicholasRizzolo KevinSmall Kevin Chen-ChuanChang DanRoth @@ -1598,7 +1598,7 @@ Proceedings of the NAACL HLT 2010 Workshop on Speech and Language Processing for Assistive Technologies W10-13 MelanieFried-Oken - Kathleen F.McCoy + Kathleen F.McCoy BrianRoark Association for Computational Linguistics
Los Angeles, California
@@ -1616,7 +1616,7 @@ JosephReddington EhudReiter NavaTintarev - AnnaluWaller + AnnaluWaller 1–9 W10-1301 black-etal-2010-using @@ -1670,7 +1670,7 @@ Using Reinforcement Learning to Create Communication Channel Management Strategies for Diverse Users RebeccaLunsford - Peter A.Heeman + Peter A.Heeman 53–61 W10-1307 lunsford-heeman-2010-using @@ -1693,7 +1693,7 @@ State-Transition Interpolation and <fixed-case>MAP</fixed-case> Adaptation for <fixed-case>HMM</fixed-case>-based Dysarthric Speech Recognition - Harsh VardhanSharma + Harsh VardhanSharma MarkHasegawa-Johnson 72–79 W10-1310 @@ -1727,7 +1727,7 @@ Proceedings of the NAACL HLT 2010 First Workshop on Statistical Parsing of Morphologically-Rich Languages W10-14 - DjameSeddah + DjameSeddah SandraKoebler ReutTsarfaty Association for Computational Linguistics @@ -1745,9 +1745,9 @@ ReutTsarfaty DjaméSeddah YoavGoldberg - SandraKuebler + SandraKuebler YannickVersley - MarieCandito + MarieCandito JenniferFoster InesRehbein LamiaTounsi @@ -1759,7 +1759,7 @@ Improving <fixed-case>A</fixed-case>rabic Dependency Parsing with Lexical and Inflectional Morphological Features YuvalMarton NizarHabash - OwenRambow + OwenRambow 13–21 W10-1402 marton-etal-2010-improving @@ -1769,7 +1769,7 @@ Bharat RamAmbati SamarHusain SambhavJain - Dipti MisraSharma + Dipti MisraSharma RajeevSangal 22–30 W10-1403 @@ -1778,7 +1778,7 @@ Application of Different Techniques to Dependency Parsing of <fixed-case>B</fixed-case>asque KepaBengoetxea - KoldoGojenola + KoldoGojenola 31–39 W10-1404 bengoetxea-gojenola-2010-application @@ -1786,7 +1786,7 @@ Modeling Morphosyntactic Agreement in Constituency-Based Parsing of <fixed-case>M</fixed-case>odern <fixed-case>H</fixed-case>ebrew ReutTsarfaty - KhalilSima’an + KhalilSima’an 40–48 W10-1405 tsarfaty-simaan-2010-modeling @@ -1812,16 +1812,16 @@ MohammedAttia JenniferFoster DeirdreHogan - JosephLe Roux + JosephLe Roux LamiaTounsi - Josefvan Genabith + Josefvan Genabith 67–75 W10-1408 attia-etal-2010-handling Parsing Word Clusters - MarieCandito + MarieCandito DjaméSeddah 76–84 W10-1409 @@ -1830,10 +1830,10 @@ Lemmatization and Lexicalized Statistical Parsing of Morphologically-Rich Languages: the Case of <fixed-case>F</fixed-case>rench DjaméSeddah - GrzegorzChrupała - ÖzlemÇetinoğlu - Josefvan Genabith - MarieCandito + GrzegorzChrupała + ÖzlemÇetinoğlu + Josefvan Genabith + MarieCandito 85–93 W10-1410 seddah-etal-2010-lemmatization @@ -1875,7 +1875,7 @@ <fixed-case>N</fixed-case>o<fixed-case>W</fixed-case>a<fixed-case>C</fixed-case>: a large web-based corpus for <fixed-case>N</fixed-case>orwegian - Emiliano RaulGuevara + Emiliano RaulGuevara 1–7 W10-1501 guevara-2010-nowac @@ -1893,7 +1893,7 @@ Sketching Techniques for Large Scale <fixed-case>NLP</fixed-case> AmitGoyal JagadeeshJagarlamudi - HalDaumé III + HalDaumé III SureshVenkatasubramanian 17–25 W10-1503 @@ -1934,15 +1934,15 @@ Computational Linguistics in <fixed-case>B</fixed-case>razil: An Overview ThiagoPardo CarolineGasperin - Helenade Medeiros Caseli - Maria das GraçasNunes + Helenade Medeiros Caseli + Maria das GraçasNunes 1–7 W10-1601 pardo-etal-2010-computational Data-driven computational linguistics at <fixed-case>F</fixed-case>a<fixed-case>MAF</fixed-case>-<fixed-case>UNC</fixed-case>, <fixed-case>A</fixed-case>rgentina - LauraAlonso Alemany + LauraAlonso Alemany GabrielInfante-Lopez 8–14 W10-1602 @@ -1950,7 +1950,7 @@ Variable-Length <fixed-case>M</fixed-case>arkov Models and Ambiguous Words in <fixed-case>P</fixed-case>ortuguese - Fabio NatanaelKepler + Fabio NatanaelKepler MarceloFinger 15–23 W10-1603 @@ -1958,7 +1958,7 @@ Using Common Sense to generate culturally contextualized Machine Translation - Helenade Medeiros Caseli + Helenade Medeiros Caseli Bruno AkioSugiyama Junia CoutinhoAnacleto 24–31 @@ -1984,7 +1984,7 @@ Fostering Digital Inclusion and Accessibility: The <fixed-case>P</fixed-case>or<fixed-case>S</fixed-case>imples project for Simplification of <fixed-case>P</fixed-case>ortuguese Texts - SandraAluísio + SandraAluísio CarolineGasperin 46–53 W10-1607 @@ -2001,7 +2001,7 @@ A Machine Learning Approach for Recognizing Textual Entailment in <fixed-case>S</fixed-case>panish - JulioCastillo + JulioCastillo 62–67 W10-1609 castillo-2010-machine @@ -2009,31 +2009,31 @@ The emergence of the modern concept of introspection: a quantitative linguistic analysis IvánRaskovsky - DiegoFernández Slezak + DiegoFernández Slezak CarlosDiuk - Guillermo A.Cecchi + Guillermo A.Cecchi 68–75 W10-1610 raskovsky-etal-2010-emergence Combining <fixed-case>CBIR</fixed-case> and <fixed-case>NLP</fixed-case> for Multilingual Terminology Alignment and Cross-Language Image Indexing - DiegoBurgos + DiegoBurgos 76–83 W10-1611 burgos-2010-combining <fixed-case>IRAS</fixed-case>ubcat, a highly parametrizable, language independent tool for the acquisition of verbal subcategorization information from corpus - Ivana RominaAltamirano - LauraAlonso Alemany + Ivana RominaAltamirano + LauraAlonso Alemany 84–91 W10-1612 altamirano-alonso-alemany-2010-irasubcat The <fixed-case>T</fixed-case>ermi<fixed-case>N</fixed-case>et Project: an Overview - ArianiDi Felippo + ArianiDi Felippo 92–99 W10-1613 di-felippo-2010-terminet @@ -2050,14 +2050,14 @@ Recognition and extraction of definitional contexts in <fixed-case>S</fixed-case>panish for sketching a lexical network CésarAguilar OlgaAcosta - GerardoSierra + GerardoSierra 109–116 W10-1615 aguilar-etal-2010-recognition Computational Linguistics for helping Requirements Elicitation: a dream about Automated Software Development - Carlos MarioZapata Jaramillo + Carlos MarioZapata Jaramillo 117–124 W10-1616 zapata-jaramillo-2010-computational @@ -2065,8 +2065,8 @@ Text Generation for <fixed-case>B</fixed-case>razilian <fixed-case>P</fixed-case>ortuguese: the Surface Realization Task EderNovais - ThiagoTadeu - IvandréParaboni + ThiagoTadeu + IvandréParaboni 125–131 W10-1617 novais-etal-2010-text @@ -2089,7 +2089,7 @@ PhilippKoehn ChristofMonz KayPeterson - OmarZaidan + OmarZaidan Association for Computational Linguistics
Uppsala, Sweden
July @@ -2104,7 +2104,7 @@ A Semi-Supervised Word Alignment Algorithm with Partial Manual Alignments QinGao NguyenBach - StephanVogel + StephanVogel 1–10 W10-1701 gao-etal-2010-semi @@ -2124,7 +2124,7 @@ PhilippKoehn ChristofMonz KayPeterson - MarkPrzybocki + MarkPrzybocki OmarZaidan 17–53 W10-1703 @@ -2133,8 +2133,8 @@ <fixed-case>LIMSI</fixed-case>’s Statistical Translation Systems for <fixed-case>WMT</fixed-case>’10 AlexandreAllauzen - Josep M.Crego - İlknurDurgar El-Kahlout + Josep M.Crego + İlknurDurgar El-Kahlout FrançoisYvon 54–59 W10-1704 @@ -2142,7 +2142,7 @@ 2010 Failures in <fixed-case>E</fixed-case>nglish-<fixed-case>C</fixed-case>zech Phrase-Based <fixed-case>MT</fixed-case> - OndřejBojar + OndřejBojar KamilKos 60–66 W10-1705 @@ -2152,7 +2152,7 @@ An Empirical Study on Development Set Selection Strategy for Machine Translation Learning CongHui HaiZhao - Bao-LiangLu + Bao-LiangLu YanSong 67–71 W10-1706 @@ -2161,7 +2161,7 @@ The <fixed-case>U</fixed-case>niversity of <fixed-case>M</fixed-case>aryland Statistical Machine Translation System for the Fifth Workshop on Machine Translation VladimirEidelman - ChrisDyer + ChrisDyer PhilipResnik 72–76 W10-1707 @@ -2170,7 +2170,7 @@ Further Experiments with Shallow Hybrid <fixed-case>MT</fixed-case> Systems ChristianFedermann - AndreasEisele + AndreasEisele YuChen SabineHunsicker JiaXu @@ -2182,8 +2182,8 @@ Improved Features and Grammar Selection for Syntax-Based <fixed-case>MT</fixed-case> GregHanneman - JonathanClark - AlonLavie + JonathanClark + AlonLavie 82–87 W10-1709 hanneman-etal-2010-improved @@ -2205,18 +2205,18 @@ GregorLeusch SaabMansour DanielStein - HermannNey + HermannNey 93–97 W10-1711 heger-etal-2010-rwth Using Collocation Segmentation to Augment the Phrase Table - Carlos A.Henríquez Q. - MartaRuiz Costa-jussà - VidasDaudaravicius - Rafael E.Banchs - José B.Mariño + Carlos A.Henríquez Q. + MartaRuiz Costa-jussà + VidasDaudaravicius + Rafael E.Banchs + José B.Mariño 98–102 W10-1712 henriquez-q-etal-2010-using @@ -2226,7 +2226,7 @@ StéphaneHuet JulienBourdaillet AlexandrePatry - PhilippeLanglais + PhilippeLanglais 103–109 W10-1713 huet-etal-2010-rali @@ -2235,7 +2235,7 @@ Exodus - Exploring <fixed-case>SMT</fixed-case> for <fixed-case>EU</fixed-case> Institutions MichaelJellinghaus AlexandrosPoulis - DavidKolovratník + DavidKolovratník 110–114 W10-1714 jellinghaus-etal-2010-exodus @@ -2276,10 +2276,10 @@ <fixed-case>J</fixed-case>oshua 2.0: A Toolkit for Parsing-Based Machine Translation with Syntax, Semirings, Discriminative Training and Other Goodies ZhifeiLi ChrisCallison-Burch - ChrisDyer + ChrisDyer JuriGanitkevitch AnnIrvine - SanjeevKhudanpur + SanjeevKhudanpur LaneSchwartz WrenThornton ZiyuanWang @@ -2294,7 +2294,7 @@ JanNiehues TeresaHerrmann MohammedMediani - AlexWaibel + AlexWaibel 138–142 W10-1719 niehues-etal-2010-karlsruhe @@ -2303,13 +2303,13 @@ <fixed-case>MATREX</fixed-case>: The <fixed-case>DCU</fixed-case> <fixed-case>MT</fixed-case> System for <fixed-case>WMT</fixed-case> 2010 SergioPenkale RejwanulHaque - SandipanDandapat + SandipanDandapat PratyushBanerjee - Ankit K.Srivastava + Ankit K.Srivastava JinhuaDu PavelPecina - Sudip KumarNaskar - Mikel L.Forcada + Sudip KumarNaskar + Mikel L.Forcada AndyWay 143–148 W10-1720 @@ -2326,7 +2326,7 @@ The <fixed-case>CUED</fixed-case> <fixed-case>H</fixed-case>i<fixed-case>FST</fixed-case> System for the <fixed-case>WMT</fixed-case>10 Translation Shared Task JuanPino GonzaloIglesias - Adriàde Gispert + Adriàde Gispert GraemeBlackwood JamieBrunning WilliamByrne @@ -2337,8 +2337,8 @@ The <fixed-case>LIG</fixed-case> Machine Translation System for <fixed-case>WMT</fixed-case> 2010 MarionPotet - LaurentBesacier - HervéBlanchon + LaurentBesacier + HervéBlanchon 161–166 W10-1723 potet-etal-2010-lig @@ -2354,14 +2354,14 @@ <fixed-case>UPV</fixed-case>-<fixed-case>PRHLT</fixed-case> <fixed-case>E</fixed-case>nglish–<fixed-case>S</fixed-case>panish System for <fixed-case>WMT</fixed-case>10 - GermánSanchis-Trilles + GermánSanchis-Trilles JesúsAndrés-Ferrer GuillemGascó - JesúsGonzález-Rubio + JesúsGonzález-Rubio PascualMartínez-Gómez - Martha-AliciaRocha - Joan-AndreuSánchez - FranciscoCasacuberta + Martha-AliciaRocha + Joan-AndreuSánchez + FranciscoCasacuberta 172–176 W10-1725 sanchis-trilles-etal-2010-upv @@ -2386,7 +2386,7 @@ To Cache or Not To Cache? Experiments with Adaptive Models in Statistical Machine Translation - JörgTiedemann + JörgTiedemann 189–194 W10-1728 tiedemann-2010-cache @@ -2394,8 +2394,8 @@ Applying Morphological Decompositions to Statistical Machine Translation SamiVirpioja - JaakkoVäyrynen - AndréMansikkaniemi + JaakkoVäyrynen + AndréMansikkaniemi MikkoKurimo 195–200 W10-1729 @@ -2403,7 +2403,7 @@ Maximum Entropy Translation Model in Dependency-Based <fixed-case>MT</fixed-case> Framework - ZdeněkŽabokrtský + ZdeněkŽabokrtský MartinPopel DavidMareček 201–206 @@ -2413,21 +2413,21 @@ <fixed-case>UCH</fixed-case>-<fixed-case>UPV</fixed-case> <fixed-case>E</fixed-case>nglish–<fixed-case>S</fixed-case>panish System for <fixed-case>WMT</fixed-case>10 FranciscoZamora-Martínez - GermánSanchis-Trilles + GermánSanchis-Trilles 207–211 W10-1731 zamora-martinez-sanchis-trilles-2010-uch Hierarchical Phrase-Based <fixed-case>MT</fixed-case> at the <fixed-case>C</fixed-case>harles <fixed-case>U</fixed-case>niversity for the <fixed-case>WMT</fixed-case> 2010 Shared Task - DanielZeman + DanielZeman 212–215 W10-1732 zeman-2010-hierarchical Incremental Decoding for Phrase-Based Statistical Machine Translation - BaskaranSankaran + BaskaranSankaran AjeetGrewal AnoopSarkar 216–223 @@ -2437,7 +2437,7 @@ How to Avoid Burning Ducks: Combining Linguistic Analysis and Corpus Statistics for <fixed-case>G</fixed-case>erman Compound Processing FabienneFritzinger - AlexanderFraser + AlexanderFraser 224–234 W10-1734 fritzinger-fraser-2010-avoid @@ -2473,7 +2473,7 @@ DavidVilar DanielStein MatthiasHuck - HermannNey + HermannNey 262–270 W10-1738 vilar-etal-2010-jane @@ -2487,7 +2487,7 @@ Adaptive Model Weighting and Transductive Regression for Predicting Best System Combinations - ErgunBiçici + ErgunBiçici S. SerdarKozat 276–281 W10-1740 @@ -2495,7 +2495,7 @@ <fixed-case>L</fixed-case>1 Regularized Regression for Reranking and System Combination in Machine Translation - ErgunBiçici + ErgunBiçici DenizYuret 282–289 W10-1741 @@ -2512,14 +2512,14 @@ The <fixed-case>UPV</fixed-case>-<fixed-case>PRHLT</fixed-case> Combination System for <fixed-case>WMT</fixed-case> 2010 - JesúsGonzález-Rubio - GermánSanchis-Trilles - Joan-AndreuSánchez + JesúsGonzález-Rubio + GermánSanchis-Trilles + Joan-AndreuSánchez JesúsAndrés-Ferrer GuillemGascó PascualMartínez-Gómez - Martha-AliciaRocha - FranciscoCasacuberta + Martha-AliciaRocha + FranciscoCasacuberta 296–300 W10-1743 gonzalez-rubio-etal-2010-upv @@ -2527,15 +2527,15 @@ <fixed-case>CMU</fixed-case> Multi-Engine Machine Translation for <fixed-case>WMT</fixed-case> 2010 KennethHeafield - AlonLavie + AlonLavie 301–306 W10-1744 heafield-lavie-2010-cmu <fixed-case>CMU</fixed-case> System Combination via Hypothesis Selection for <fixed-case>WMT</fixed-case>’10 - Almut SiljaHildebrand - StephanVogel + Almut SiljaHildebrand + StephanVogel 307–310 W10-1745 hildebrand-vogel-2010-cmu @@ -2550,17 +2550,17 @@ The <fixed-case>RWTH</fixed-case> System Combination System for <fixed-case>WMT</fixed-case> 2010 GregorLeusch - HermannNey + HermannNey 315–320 W10-1747 leusch-ney-2010-rwth <fixed-case>BBN</fixed-case> System Description for <fixed-case>WMT</fixed-case>10 System Combination Task - Antti-VeikkoRosti + Antti-VeikkoRosti BingZhang SpyrosMatsoukas - RichardSchwartz + RichardSchwartz 321–326 W10-1748 rosti-etal-2010-bbn @@ -2575,10 +2575,10 @@ Document-Level Automatic <fixed-case>MT</fixed-case> Evaluation based on Discourse Representations - ElisabetComelles - JesúsGiménez - LluísMàrquez - IreneCastellón + ElisabetComelles + JesúsGiménez + LluísMàrquez + IreneCastellón VictoriaArranz 333–338 W10-1750 @@ -2587,7 +2587,7 @@ <fixed-case>METEOR</fixed-case>-<fixed-case>NEXT</fixed-case> and the <fixed-case>METEOR</fixed-case> Paraphrase Tables: Improved Evaluation Support for Five Target Languages MichaelDenkowski - AlonLavie + AlonLavie 339–342 W10-1751 denkowski-lavie-2010-meteor @@ -2596,7 +2596,7 @@ Normalized Compression Distance Based Measures for <fixed-case>M</fixed-case>etrics<fixed-case>MATR</fixed-case> 2010 MarcusDobrinkat TeroTapiovaara - JaakkoVäyrynen + JaakkoVäyrynen KimmoKettunen 343–348 W10-1752 @@ -2607,7 +2607,7 @@ YifanHe JinhuaDu AndyWay - Josefvan Genabith + Josefvan Genabith 349–353 W10-1753 he-etal-2010-dcu @@ -2624,7 +2624,7 @@ The Parameter-Optimized <fixed-case>ATEC</fixed-case> Metric for <fixed-case>MT</fixed-case> Evaluation BillyWong - ChunyuKit + ChunyuKit 360–364 W10-1755 wong-kit-2010-parameter @@ -2651,7 +2651,7 @@ Taming Structured Perceptrons on Wild Feature Vectors - RalfBrown + RalfBrown 384–391 W10-1758 brown-2010-taming @@ -2667,9 +2667,9 @@ Integration of Multiple Bilingually-Learned Segmentation Schemes into Statistical Machine Translation - MichaelPaul + MichaelPaul AndrewFinch - EiichiroSumita + EiichiroSumita 400–408 W10-1760 paul-etal-2010-integration @@ -2697,8 +2697,8 @@ Decision Trees for Lexical Smoothing in Statistical Machine Translation RabihZbib SpyrosMatsoukas - RichardSchwartz - JohnMakhoul + RichardSchwartz + JohnMakhoul 428–437 W10-1763 zbib-etal-2010-decision @@ -2709,7 +2709,7 @@ Proceedings of the Fourth Linguistic Annotation Workshop W10-18 NianwenXue - MassimoPoesio + MassimoPoesio Association for Computational Linguistics
Uppsala, Sweden
July @@ -2723,9 +2723,9 @@ <fixed-case>E</fixed-case>moti<fixed-case>B</fixed-case>log: A Finer-Grained and More Precise Learning of Subjectivity Expression Models EsterBoldrini - AlexandraBalahur - PatricioMartínez-Barco - AndrésMontoyo + AlexandraBalahur + PatricioMartínez-Barco + AndrésMontoyo 1–10 W10-1801 boldrini-etal-2010-emotiblog @@ -2743,18 +2743,18 @@ Annotation Scheme for Social Network Extraction from Text ApoorvAgarwal - Owen C.Rambow - Rebecca J.Passonneau + Owen C.Rambow + Rebecca J.Passonneau 20–28 W10-1803 agarwal-etal-2010-annotation Agile Corpus Annotation in Practice: An Overview of Manual and Automatic Annotation of <fixed-case>CV</fixed-case>s - BeaAlex + BeaAlex ClaireGrover RongzhouShen - MijailKabadjov + MijailKabadjov 29–37 W10-1804 alex-etal-2010-agile @@ -2770,9 +2770,9 @@ <fixed-case>A</fixed-case>nveshan: A Framework for Analysis of Multiple Annotators’ Labeling Behavior VikasBhardwaj - RebeccaPassonneau - AnsafSalleb-Aouissi - NancyIde + RebeccaPassonneau + AnsafSalleb-Aouissi + NancyIde 47–55 W10-1806 bhardwaj-etal-2010-anveshan @@ -2780,7 +2780,7 @@ Influence of Pre-Annotation on <fixed-case>POS</fixed-case>-Tagged Corpus Development KarënFort - BenoîtSagot + BenoîtSagot 56–63 W10-1807 fort-sagot-2010-influence @@ -2788,15 +2788,15 @@ To Annotate More Accurately or to Annotate More DmitriyDligach - RodneyNielsen - MarthaPalmer + RodneyNielsen + MarthaPalmer 64–72 W10-1808 dligach-etal-2010-annotate Annotating Underquantification - AurelieHerbelot + AurelieHerbelot AnnCopestake 73–81 W10-1809 @@ -2806,19 +2806,19 @@ <fixed-case>P</fixed-case>rop<fixed-case>B</fixed-case>ank Annotation of Multilingual Light Verb Constructions Jena D.Hwang ArchnaBhatia - ClaireBonial + ClaireBonial AousMansouri AshwiniVaidya NianwenXue - MarthaPalmer + MarthaPalmer 82–90 W10-1810 hwang-etal-2010-propbank Retrieving Correct Semantic Boundaries in Dependency Structure - JinhoChoi - MarthaPalmer + JinhoChoi + MarthaPalmer 91–99 W10-1811 choi-palmer-2010-retrieving @@ -2843,8 +2843,8 @@ Cross-Lingual Validity of <fixed-case>P</fixed-case>rop<fixed-case>B</fixed-case>ank in the Manual Annotation of <fixed-case>F</fixed-case>rench - Lonnekevan der Plas - TanjaSamardžić + Lonnekevan der Plas + TanjaSamardžić PaolaMerlo 113–117 W10-1814 @@ -2852,7 +2852,7 @@ Characteristics of High Agreement Affect Annotation in Text - Cecilia OvesdotterAlm + Cecilia OvesdotterAlm 118–122 W10-1815 alm-2010-characteristics @@ -2863,7 +2863,7 @@ XiangliWang YusukeMiyao TakuyaMatsuzaki - JunichiTsujii + JunichiTsujii 123–126 W10-1816 yu-etal-2010-deep @@ -2903,9 +2903,9 @@ Chunking <fixed-case>G</fixed-case>erman: An Unsolved Problem - SandraKübler + SandraKübler KathrinBeck - ErhardHinrichs + ErhardHinrichs HeikeTelljohann 147–151 W10-1821 @@ -2922,8 +2922,8 @@ A Feature Type Classification for Therapeutic Purposes: A Preliminary Evaluation with Non-Expert Speakers - Gianluca E.Lebani - EmanuelePianta + Gianluca E.Lebani + EmanuelePianta 157–161 W10-1823 lebani-pianta-2010-feature @@ -2982,12 +2982,12 @@ EmmanuelBruno BrigitteBigi RobertEspesser - GaelleFerré + GaelleFerré MathildeGuardiola DanielHirst NingTan EdliraCela - Jean-ClaudeMartin + Jean-ClaudeMartin StéphaneRauzy Mary-AnnickMorel ElisabethMurisasco @@ -2999,7 +2999,7 @@ Combining Parallel Treebanks and Geo-Tagging MartinVolk - AnneGoehring + AnneGoehring TorstenMarek 192–196 W10-1830 @@ -3015,8 +3015,8 @@ Discourse Relation Configurations in <fixed-case>T</fixed-case>urkish and an Annotation Environment - BerfinAktaş - CemBozsahin + BerfinAktaş + CemBozsahin DenizZeyrek 202–206 W10-1832 @@ -3026,8 +3026,8 @@ An Overview of the <fixed-case>CRAFT</fixed-case> Concept Annotation Guidelines MichaelBada MiriamEckert - MarthaPalmer - LawrenceHunter + MarthaPalmer + LawrenceHunter 207–211 W10-1833 bada-etal-2010-overview @@ -3042,8 +3042,8 @@ An Integrated Tool for Annotating Historical Corpora Pablo Picasso Felicianode Faria - Fabio NatanaelKepler - Maria ClaraPaixão de Sousa + Fabio NatanaelKepler + Maria ClaraPaixão de Sousa 217–221 W10-1835 de-faria-etal-2010-integrated @@ -3051,10 +3051,10 @@ The Revised <fixed-case>A</fixed-case>rabic <fixed-case>P</fixed-case>rop<fixed-case>B</fixed-case>ank WajdiZaghouani - MonaDiab + MonaDiab AousMansouri - SameerPradhan - MarthaPalmer + SameerPradhan + MarthaPalmer 222–226 W10-1836 zaghouani-etal-2010-revised @@ -3064,7 +3064,7 @@ NathanGreen PaulBreimyer VinayKumar - NagizaSamatova + NagizaSamatova 227–234 W10-1837 green-etal-2010-packplay @@ -3091,8 +3091,8 @@ Anatomy of Annotation Schemes: Mapping to <fixed-case>G</fixed-case>r<fixed-case>AF</fixed-case> - NancyIde - HarryBunt + NancyIde + HarryBunt 247–255 W10-1840 ide-bunt-2010-anatomy @@ -3100,14 +3100,14 @@ Annotating Participant Reference in <fixed-case>E</fixed-case>nglish Spoken Conversation JohnNiekrasz - Johanna D.Moore + Johanna D.Moore 256–264 W10-1841 niekrasz-moore-2010-annotating Design and Evaluation of Shared Prosodic Annotation for Spontaneous <fixed-case>F</fixed-case>rench Speech: From Expert Knowledge to Non-Expert Annotation - AnneLacheret-Dujour + AnneLacheret-Dujour NicolasObin MathieuAvanzi 265–273 @@ -3116,11 +3116,11 @@ Depends on What the <fixed-case>F</fixed-case>rench Say - Spoken Corpus Annotation with and beyond Syntactic Functions - JoséDeulofeu + JoséDeulofeu LucieDuffort KimGerdes SylvainKahane - PaolaPietrandrea + PaolaPietrandrea 274–281 W10-1843 deulofeu-etal-2010-depends @@ -3128,11 +3128,11 @@ The Annotation Scheme of the <fixed-case>T</fixed-case>urkish Discourse Bank and an Evaluation of Inconsistent Annotations DenizZeyrek - IşinDemirşahin - AyişiğiSevdik-Çalli - HaleÖgel Balaban - İhsanYalçinkaya - Ümit DenizTuran + IşinDemirşahin + AyişiğiSevdik-Çalli + HaleÖgel Balaban + İhsanYalçinkaya + Ümit DenizTuran 282–289 W10-1844 zeyrek-etal-2010-annotation @@ -3142,12 +3142,12 @@ Proceedings of the 2010 Workshop on Biomedical Natural Language Processing W10-19 - K. BretonnelCohen + K. BretonnelCohen DinaDemner-Fushman SophiaAnaniadou - JohnPestian - Jun’ichiTsujii - BonnieWebber + JohnPestian + Jun’ichiTsujii + BonnieWebber Association for Computational Linguistics
Uppsala, Sweden
July @@ -3212,7 +3212,7 @@ MarceloFiszman GracielaRosemblat SeanMarimpietri - ThomasRindflesch + ThomasRindflesch 46–54 W10-1906 kilicoglu-etal-2010-arguments @@ -3229,7 +3229,7 @@ Cancer Stage Prediction Based on Patient Online Discourse MukundJha - NoémieElhadad + NoémieElhadad 64–71 W10-1908 jha-elhadad-2010-cancer @@ -3237,7 +3237,7 @@ An Exploration of Mining Gene Expression Mentions and Their Anatomical Locations from Biomedical Text MartinGerner - GoranNenadic + GoranNenadic Casey M.Bergman 72–80 W10-1909 @@ -3254,7 +3254,7 @@ Disease Mention Recognition with Specific Features Md. Faisal MahbubChowdhury - AlbertoLavelli + AlbertoLavelli 83–90 W10-1911 chowdhury-lavelli-2010-disease @@ -3262,7 +3262,7 @@ Extraction of Disease-Treatment Semantic Relations from Biomedical Sentences OanaFrunza - DianaInkpen + DianaInkpen 91–98 W10-1912 frunza-inkpen-2010-extraction @@ -3295,7 +3295,7 @@ RyanSullivan AnnieSkariah JianYang - GracielaGonzalez + GracielaGonzalez 117–125 W10-1915 leaman-etal-2010-towards @@ -3360,7 +3360,7 @@ Proceedings of the 2010 Workshop on Cognitive Modeling and Computational Linguistics W10-20 - John T.Hale + John T.Hale Association for Computational Linguistics
Uppsala, Sweden
July @@ -3374,7 +3374,7 @@ Using Sentence Type Information for Syntactic Category Acquisition StellaFrank - SharonGoldwater + SharonGoldwater FrankKeller 1–8 W10-2001 @@ -3390,9 +3390,9 @@ Syntactic Adaptation in Language Comprehension - AlexFine + AlexFine TingQian - T. FlorianJaeger + T. FlorianJaeger RobertJacobs 18–26 W10-2003 @@ -3400,7 +3400,7 @@ <fixed-case>HHMM</fixed-case> Parsing with Limited Parallelism - TimMiller + TimMiller WilliamSchuler 27–35 W10-2004 @@ -3416,7 +3416,7 @@ Close = Relevant? The Role of Context in Efficient Language Production TingQian - T. FlorianJaeger + T. FlorianJaeger 45–53 W10-2006 qian-jaeger-2010-close @@ -3424,7 +3424,7 @@ Predicting Cognitively Salient Modifiers of the Constitutive Parts of Concepts GerhardKremer - MarcoBaroni + MarcoBaroni 54–62 W10-2007 kremer-baroni-2010-predicting @@ -3448,7 +3448,7 @@ Uncertainty Reduction as a Measure of Cognitive Processing Effort - StefanFrank + StefanFrank 81–89 W10-2010 frank-2010-uncertainty @@ -3459,8 +3459,8 @@ Proceedings of the 2010 Workshop on NLP and Linguistics: Finding the Common Ground W10-21 FeiXia - WilliamLewis - LoriLevin + WilliamLewis + LoriLevin Association for Computational Linguistics
Uppsala, Sweden
July @@ -3483,14 +3483,14 @@ Evidentiality for Text Trustworthiness Detection QiSu Chu-RenHuang - Kai-yunChen + Kai-yunChen 10–17 W10-2102 su-etal-2010-evidentiality
On the Role of <fixed-case>NLP</fixed-case> in Linguistics - Dipti MisraSharma + Dipti MisraSharma 18–21 W10-2103 sharma-2010-role @@ -3504,15 +3504,15 @@ Grammar-Driven versus Data-Driven: Which Parsing System Is More Affected by Domain Shifts? - BarbaraPlank - Gertjanvan Noord + BarbaraPlank + Gertjanvan Noord 25–33 W10-2105 plank-van-noord-2010-grammar A Cross-Lingual Induction Technique for <fixed-case>G</fixed-case>erman Adverbial Participles - SinaZarrieß + SinaZarrieß AoifeCahill JonasKuhn ChristianRohrer @@ -3530,7 +3530,7 @@ Cross-Lingual Variation of Light Verb Constructions: Using Parallel Corpora and Automatic Alignment for Linguistic Research - TanjaSamardžić + TanjaSamardžić PaolaMerlo 52–60 W10-2108 @@ -3551,14 +3551,14 @@ FransPlank PeterBak MiriamButt - Daniel A.Keim + Daniel A.Keim 70–78 W10-2110 mayer-etal-2010-consonant Injecting Linguistics into <fixed-case>NLP</fixed-case> through Annotation - EduardHovy + EduardHovy 79 W10-2111 hovy-2010-injecting @@ -3569,7 +3569,7 @@ Proceedings of the 11th Meeting of the ACL Special Interest Group on Computational Morphology and Phonology W10-22 JeffreyHeinz - LynneCahill + LynneCahill RichardWicentowski Association for Computational Linguistics
Uppsala, Sweden
@@ -3583,7 +3583,7 @@ Instance-Based Acquisition of Vowel Harmony - FrédéricMailhot + FrédéricMailhot 1–8 W10-2201 mailhot-2010-instance @@ -3613,7 +3613,7 @@ A Method for Compiling Two-Level Rules with Multiple Contexts KimmoKoskenniemi - MiikkaSilfverberg + MiikkaSilfverberg 38–45 W10-2205 koskenniemi-silfverberg-2010-method @@ -3621,7 +3621,7 @@ Exploring Dialect Phonetic Variation Using <fixed-case>PARAFAC</fixed-case> JelenaProkić - TimVan de Cruys + TimVan de Cruys 46–53 W10-2206 prokic-van-de-cruys-2010-exploring @@ -3675,7 +3675,7 @@ CarmenBanea AlessandroMoschitti SwapnaSomasundaran - Fabio MassimoZanzotto + Fabio MassimoZanzotto Association for Computational Linguistics
Uppsala, Sweden
July @@ -3696,7 +3696,7 @@
Towards the Automatic Creation of a <fixed-case>W</fixed-case>ordnet from a Term-Based Lexical Network - HugoGonçalo Oliveira + HugoGonçalo Oliveira PauloGomes 10–18 W10-2302 @@ -3714,7 +3714,7 @@ Robust and Efficient Page Rank for Word Sense Disambiguation DiegoDe Cao - RobertoBasili + RobertoBasili MatteoLuciani FrancescoMesiano RiccardoRossi @@ -3754,7 +3754,7 @@ Co-Occurrence Cluster Features for Lexical Substitutions in Context - ChrisBiemann + ChrisBiemann 55–59 W10-2309 biemann-2010-co @@ -3776,7 +3776,7 @@ Experiments with <fixed-case>CST</fixed-case>-Based Multidocument Summarization - Maria LucíaCastro Jorge + Maria LucíaCastro Jorge ThiagoPardo 74–82 W10-2312 @@ -3784,10 +3784,10 @@ Distinguishing between Positive and Negative Opinions with Complex Network Features - Diego RaphaelAmancio + Diego RaphaelAmancio RenatoFabbri - Osvaldo NovaisOliveira Jr. - Maria das Graças VolpeNunes + Osvaldo NovaisOliveira Jr. + Maria das Graças VolpeNunes Luciano da FontouraCosta 83–87 W10-2313 @@ -3837,7 +3837,7 @@ Proceedings of the 2010 Named Entities Workshop W10-24 - AKumaran + AKumaran HaizhouLi Association for Computational Linguistics
Uppsala, Sweden
@@ -3872,7 +3872,7 @@ Report of <fixed-case>NEWS</fixed-case> 2010 Transliteration Mining Shared Task AKumaran - MiteshM. Khapra + MiteshM. Khapra HaizhouLi 21–28 W10-2403 @@ -3881,7 +3881,7 @@ Whitepaper of <fixed-case>NEWS</fixed-case> 2010 Shared Task on Transliteration Mining AKumaran - MiteshM. Khapra + MiteshM. Khapra HaizhouLi 29–38 W10-2404 @@ -3903,7 +3903,7 @@ Transliteration Using a Phrase-Based Statistical Machine Translation System to Re-Score the Output of a Joint Multigram Model AndrewFinch - EiichiroSumita + EiichiroSumita 48–52 W10-2406 finch-sumita-2010-transliteration @@ -3926,7 +3926,7 @@ Reranking with Multiple Features for Better Transliteration YanSong - ChunyuKit + ChunyuKit HaiZhao 62–65 W10-2409 @@ -3946,7 +3946,7 @@ TanikSaikh TapabrataMondal AsifEkbal - SivajiBandyopadhyay + SivajiBandyopadhyay 71–75 W10-2411 das-etal-2010-english @@ -3961,7 +3961,7 @@ Phrase-Based Transliteration with Simple Heuristics AvineshPVS - AnkurParikh + AnkurParikh 81–84 W10-2413 pvs-parikh-2010-phrase @@ -3980,7 +3980,7 @@ AsifEkbal EvaSourjikova AnetteFrank - Simone PaoloPonzetto + Simone PaoloPonzetto 93–101 W10-2415 ekbal-etal-2010-assessing @@ -3991,7 +3991,7 @@ YouOuyang WenjieLi DequanZheng - TiejunZhao + TiejunZhao 102–109 W10-2416 chen-etal-2010-using @@ -4007,7 +4007,7 @@ Think Globally, Apply Locally: Using Distributional Characteristics for <fixed-case>H</fixed-case>indi Named Entity Identification ShaliniGupta - PushpakBhattacharyya + PushpakBhattacharyya 116–125 W10-2418 gupta-bhattacharyya-2010-think @@ -4023,7 +4023,7 @@ <fixed-case>CONE</fixed-case>: Metrics for Automatic Evaluation of Named Entity Co-Reference Resolution BoLin RushinShah - RobertFrederking + RobertFrederking AnatoleGershman 136–144 W10-2420 @@ -4102,11 +4102,11 @@ Proceedings of the 2010 Workshop on Domain Adaptation for Natural Language Processing W10-26 - HalDaumé III + HalDaumé III TejaswiniDeoskar DavidMcClosky - BarbaraPlank - JörgTiedemann + BarbaraPlank + JörgTiedemann Association for Computational Linguistics
Uppsala, Sweden
July @@ -4137,7 +4137,7 @@ OanaSandu GiuseppeCarenini GabrielMurray - RaymondNg + RaymondNg 16–22 W10-2603 sandu-etal-2010-domain @@ -4153,7 +4153,7 @@ Using Domain Similarity for Performance Estimation VincentVan Asch - WalterDaelemans + WalterDaelemans 31–36 W10-2605 van-asch-daelemans-2010-using @@ -4169,7 +4169,7 @@ Domain Adaptation with Unlabeled Data for Dialog Act Tagging AnnaMargolis KarenLivescu - MariOstendorf + MariOstendorf 45–52 W10-2607 margolis-etal-2010-domain @@ -4188,8 +4188,8 @@ Proceedings of the 2010 Workshop on Companionable Dialogue Systems W10-27 - YorickWilks - BjörnGambäck + YorickWilks + BjörnGambäck MorenaDanieli Association for Computational Linguistics
Uppsala, Sweden
@@ -4211,10 +4211,10 @@
<fixed-case>MANA</fixed-case> for the Ageing - David M WPowers + David M WPowers Martin HLuerssen Trent WLewis - Richard ELeibbrandt + Richard ELeibbrandt MarissaMilne JohnPashalis KennethTreharne @@ -4254,7 +4254,7 @@ How Was Your Day? - StephenPulman + StephenPulman JohanBoye MarcCavazza CameronSmith @@ -4266,9 +4266,9 @@ <fixed-case>VCA</fixed-case>: An Experiment with a Multiparty Virtual Chat Agent SamiraShaikh - TomekStrzalkowski - SarahTaylor - NickWebb + TomekStrzalkowski + SarahTaylor + NickWebb 43–48 W10-2708 shaikh-etal-2010-vca @@ -4278,7 +4278,7 @@ Proceedings of the 2010 Workshop on GEometrical Models of Natural Language Semantics W10-28 - RobertoBasili + RobertoBasili MarcoPennacchiotti Association for Computational Linguistics
Uppsala, Sweden
@@ -4315,15 +4315,15 @@
Relatedness Curves for Acquiring Paraphrases - GeorgianaDinu - GrzegorzChrupała + GeorgianaDinu + GrzegorzChrupała 27–32 W10-2804 dinu-chrupala-2010-relatedness A Regression Model of Adjective-Noun Compositionality in Distributional Semantics - EmilianoGuevara + EmilianoGuevara 33–37 W10-2805 guevara-2010-regression @@ -4332,7 +4332,7 @@ Semantic Composition with Quotient Algebras DaoudClarke RudiLutz - DavidWeir + DavidWeir 38–44 W10-2806 clarke-etal-2010-semantic @@ -4348,7 +4348,7 @@ Sketch Techniques for Scaling Distributional Similarity to the Web AmitGoyal JagadeeshJagarlamudi - HalDaumé III + HalDaumé III SureshVenkatasubramanian 51–56 W10-2808 @@ -4358,7 +4358,7 @@ Active Learning for Constrained <fixed-case>D</fixed-case>irichlet Process Mixture Models AndreasVlachos ZoubinGhahramani - TedBriscoe + TedBriscoe 57–61 W10-2809 vlachos-etal-2010-active @@ -4382,17 +4382,17 @@ Improvements in Unsupervised Co-Occurrence Based Parsing - ChristianHänig + ChristianHänig 1–8 W10-2901 hanig-2010-improvements <fixed-case>V</fixed-case>iterbi Training Improves Unsupervised Dependency Parsing - Valentin I.Spitkovsky - HiyanAlshawi - DanielJurafsky - Christopher D.Manning + Valentin I.Spitkovsky + HiyanAlshawi + DanielJurafsky + Christopher D.Manning 9–17 W10-2902 spitkovsky-etal-2010-viterbi @@ -4475,7 +4475,7 @@ Recession Segmentation: Simpler Online Word Segmentation Using Limited Resources ConstantineLignos - CharlesYang + CharlesYang 88–97 W10-2912 lignos-yang-2010-recession @@ -4499,7 +4499,7 @@ Learning Probabilistic Synchronous <fixed-case>CFG</fixed-case>s for Phrase-Based Translation MarkosMylonakis - KhalilSima’an + KhalilSima’an 117–125 W10-2915 mylonakis-simaan-2010-learning @@ -4508,8 +4508,8 @@ A Semi-Supervised Batch-Mode Active Learning Strategy for Improved Statistical Machine Translation SankaranarayananAnanthakrishnan RohitPrasad - DavidStallard - PremNatarajan + DavidStallard + PremNatarajan 126–134 W10-2916 ananthakrishnan-etal-2010-semi @@ -4518,8 +4518,8 @@ Improving Word Alignment by Semi-Supervised Ensemble ShujianHuang KangxiLi - XinyuDai - JiajunChen + XinyuDai + JiajunChen 135–143 W10-2917 huang-etal-2010-improving @@ -4535,9 +4535,9 @@ A Hybrid Approach to Emotional Sentence Polarity and Intensity Classification - JorgeCarrillo de Albornoz + JorgeCarrillo de Albornoz LauraPlaza - PabloGervás + PabloGervás 153–161 W10-2919 carrillo-de-albornoz-etal-2010-hybrid @@ -4563,7 +4563,7 @@ Online Entropy-Based Model of Lexical Category Acquisition - GrzegorzChrupała + GrzegorzChrupała AfraAlishahi 182–191 W10-2922 @@ -4573,15 +4573,15 @@ Tagging and Linking Web Forum Posts Su NamKim LiWang - TimothyBaldwin + TimothyBaldwin 192–202 W10-2923 kim-etal-2010-tagging Joint Entity and Relation Extraction Using Card-Pyramid Parsing - Rohit J.Kate - RaymondMooney + Rohit J.Kate + RaymondMooney 203–212 W10-2924 kate-mooney-2010-joint @@ -4590,7 +4590,7 @@ Distributed Asynchronous Online Learning for Natural Language Processing KevinGimpel DipanjanDas - Noah A.Smith + Noah A.Smith 213–222 W10-2925 gimpel-etal-2010-distributed @@ -4616,7 +4616,7 @@ Proceedings of the Fourteenth Conference on Computational Natural Language Learning – Shared Task W10-30 - RichárdFarkas + RichárdFarkas VeronikaVincze GyörgySzarvas GyörgyMóra @@ -4648,7 +4648,7 @@ XiaolongWang XuanWang BoYuan - ShixiFan + ShixiFan 13–17 W10-3002 tang-etal-2010-cascade @@ -4672,7 +4672,7 @@ Detecting Hedge Cues and their Scopes with Average Perceptron FengJi XipengQiu - XuanjingHuang + XuanjingHuang 32–39 W10-3005 ji-etal-2010-detecting @@ -4681,7 +4681,7 @@ Memory-Based Resolution of In-Sentence Scopes of Hedge Cues RoserMorante VincentVan Asch - WalterDaelemans + WalterDaelemans 40–47 W10-3006 morante-etal-2010-memory @@ -4689,7 +4689,7 @@ Resolving Speculation: <fixed-case>M</fixed-case>ax<fixed-case>E</fixed-case>nt Cue Classification and Dependency-Based Scope Rules ErikVelldal - LiljaØvrelid + LiljaØvrelid StephanOepen 48–55 W10-3007 @@ -4698,16 +4698,16 @@ Combining Manual Rules and Supervised Learning for Hedge Cue and Scope Detection MarekRei - TedBriscoe + TedBriscoe 56–63 W10-3008 rei-briscoe-2010-combining Hedge Detection Using the <fixed-case>R</fixed-case>el<fixed-case>H</fixed-case>unter Approach - EraldoFernandes + EraldoFernandes CarlosCrestana - RuyMilidiú + RuyMilidiú 64–69 W10-3009 fernandes-etal-2010-hedge @@ -4746,8 +4746,8 @@ Hedge Detection and Scope Finding by Sequence Labeling with Procedural Feature Selection ShaodianZhang HaiZhao - GuodongZhou - Bao-LiangLu + GuodongZhou + Bao-LiangLu 92–99 W10-3013 zhang-etal-2010-hedge @@ -4755,8 +4755,8 @@ Learning to Detect Hedges and their Scope Using <fixed-case>CRF</fixed-case> QiZhao - ChengjieSun - BingquanLiu + ChengjieSun + BingquanLiu YongCheng 100–105 W10-3014 @@ -4764,9 +4764,9 @@ Exploiting Multi-Features to Detect Hedges and their Scope in Biomedical Texts - HuiweiZhou + HuiweiZhou XiaoyanLi - DegenHuang + DegenHuang ZezhongLi YuanshengYang 106–113 @@ -4790,7 +4790,7 @@ Exploiting <fixed-case>CCG</fixed-case> Structures with Tree Kernels for Speculation Detection - LilianaMamani Sánchez + LilianaMamani Sánchez BaoliLi CarlVogel 126–131 @@ -4823,7 +4823,7 @@ A Baseline Approach for Detecting Sentences Containing Uncertainty - ErikTjong Kim Sang + ErikTjong Kim Sang 148–150 W10-3022 tjong-kim-sang-2010-baseline @@ -4880,7 +4880,7 @@ Does negation really matter? IraGoldstein - ÖzlemUzuner + ÖzlemUzuner 23–27 W10-3104 goldstein-uzuner-2010-negation @@ -4911,14 +4911,14 @@ Importance of negations and experimental qualifiers in biomedical literature - MartinKrallinger + MartinKrallinger 46–49 W10-3108 krallinger-2010-importance Negation and modality in distributional semantics - EdHovy + EdHovy 50 W10-3109 hovy-2010-negation @@ -4935,10 +4935,10 @@ A survey on the role of negation in sentiment analysis MichaelWiegand - AlexandraBalahur + AlexandraBalahur BenjaminRoth DietrichKlakow - AndrésMontoyo + AndrésMontoyo 60–68 W10-3111 wiegand-etal-2010-survey @@ -4955,14 +4955,14 @@ Using <fixed-case>SVM</fixed-case>s with the Command Relation features to identify negated events in biomedical literature FarzanehSarafraz - GoranNenadic + GoranNenadic 78–85 W10-3113 sarafraz-nenadic-2010-using Contradiction-focused qualitative evaluation of textual entailment - BernardoMagnini + BernardoMagnini ElenaCabrio 86–94 W10-3114 @@ -4975,7 +4975,7 @@ W10-32 SarmadHussain VirachSornlertlamvanich - HammamRiza + HammamRiza Coling 2010 Organizing Committee
Beijing, China
August @@ -5008,7 +5008,7 @@ Considerations on Automatic Mapping Large-Scale Heterogeneous Language Resources: <fixed-case>S</fixed-case>ejong Semantic Classes and <fixed-case>K</fixed-case>or<fixed-case>L</fixed-case>ex HeumPark - Ae sunYoon + Ae sunYoon Woo ChulPark Hyuk-ChulKwon 14–21 @@ -5039,7 +5039,7 @@ MasaakiYasuhara AsukaTerai DavidMorris - AnjaBelz + AnjaBelz 38–46 W10-3206 tokunaga-etal-2010-construction @@ -5047,7 +5047,7 @@ Labeling Emotion in <fixed-case>B</fixed-case>engali Blog Corpus – A Fine Grained Tagging at Sentence Level DipankarDas - SivajiBandyopadhyay + SivajiBandyopadhyay 47–55 W10-3207 das-bandyopadhyay-2010-labeling @@ -5055,7 +5055,7 @@ <fixed-case>S</fixed-case>enti<fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et for <fixed-case>I</fixed-case>ndian Languages AmitavaDas - SivajiBandyopadhyay + SivajiBandyopadhyay 56–63 W10-3208 das-bandyopadhyay-2010-sentiwordnet @@ -5127,7 +5127,7 @@ A Preliminary Work on <fixed-case>H</fixed-case>indi Causatives RafiyaBegum - Dipti MisraSharma + Dipti MisraSharma 120–128 W10-3216 begum-sharma-2010-preliminary @@ -5138,7 +5138,7 @@ ChanonOnman PeerachetPorkaew TanethRuangrajitpakorn - KanokornTrakultaweekool + KanokornTrakultaweekool AsaneeKawtrakul 129–136 W10-3217 @@ -5196,7 +5196,7 @@ W10-33 AlessandroOltramari PiekVossen - QinLu + QinLu Coling 2010 Organizing Committee
Beijing, China
August @@ -5210,9 +5210,9 @@ <fixed-case>KYOTO</fixed-case>: an open platform for mining facts PiekVossen - GermanRigau - EnekoAgirre - AitorSoroa + GermanRigau + EnekoAgirre + AitorSoroa MonicaMonachini RobertoBartolini 1–10 @@ -5232,17 +5232,17 @@ Multilingual Lexical Network from the Archives of the Digital Silk Road Hans-MohammadDaoud KyoKageura - ChristianBoitet + ChristianBoitet AsanobuKitamoto - MathieuMangeot + MathieuMangeot 19–27 W10-3303 daoud-etal-2010-multilingual Finding Medical Term Variations using Parallel Corpora and Distributional Similarity - Lonnekevan der Plas - JörgTiedemann + Lonnekevan der Plas + JörgTiedemann 28–37 W10-3304 van-der-plas-tiedemann-2010-finding @@ -5257,9 +5257,9 @@ Intrinsic Property-based Taxonomic Relation Extraction from Category Structure DongHyunChoi - Eun-KyungKim + Eun-KyungKim Sang-AhShim - Key-SunChoi + Key-SunChoi 48–57 W10-3306 choi-etal-2010-intrinsic @@ -5268,15 +5268,15 @@ Developing a Biosurveillance Application Ontology for Influenza-Like-Illness MikeConway JohnDowling - WendyChapman + WendyChapman 58–66 W10-3307 conway-etal-2010-developing Interfacing the Lexicon and the Ontology in a Semantic Analyzer - IgorBoguslavsky - LeonidIomdin + IgorBoguslavsky + LeonidIomdin VictorSizov SvetlanaTimoshenko 67–76 @@ -5285,8 +5285,8 @@ Ontolexical resources for feature-based opinion mining: a case-study - AnaïsCadilhac - FarahBenamara + AnaïsCadilhac + FarahBenamara NathalieAussenac-Gilles 77–86 W10-3309 @@ -5311,7 +5311,7 @@ Distributional Semantics and the Lexicon - EduardHovy + EduardHovy 1 W10-3401 hovy-2010-distributional @@ -5319,15 +5319,15 @@ <fixed-case>S</fixed-case>emantic<fixed-case>N</fixed-case>et-Perception of Human Pragmatics AmitavaDas - SivajiBandyopadhyay + SivajiBandyopadhyay 2–11 W10-3402 das-bandyopadhyay-2010-semanticnet Exploiting Lexical Resources for Therapeutic Purposes: the Case of <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et and <fixed-case>ST</fixed-case>a<fixed-case>RS</fixed-case>.sys - Gianluca E.Lebani - EmanuelePianta + Gianluca E.Lebani + EmanuelePianta 12–17 W10-3403 lebani-pianta-2010-exploiting @@ -5344,7 +5344,7 @@ The Color of Emotions in Texts CarloStrapparava - GozdeOzbal + GozdeOzbal 28–32 W10-3405 strapparava-ozbal-2010-color @@ -5360,8 +5360,8 @@ An Optimal and Portable Parsing Method for <fixed-case>R</fixed-case>omanian, <fixed-case>F</fixed-case>rench, and <fixed-case>G</fixed-case>erman Large Dictionaries NeculaiCurteanu - AlexMoruz - DianaTrandabăţ + AlexMoruz + DianaTrandabăţ 38–47 W10-3407 curteanu-etal-2010-optimal @@ -5376,14 +5376,14 @@ Computational Lexicography: A Feature-based Approach in Designing an <fixed-case>E</fixed-case>-dictionary of <fixed-case>C</fixed-case>hinese Classifiers - HelenaGao + HelenaGao 56–65 W10-3409 gao-2010-computational In Search of the ’Right’ Word - StellaMarkantonatou + StellaMarkantonatou AggelikiFotopoulou MariaAlexopoulou MariannaMini @@ -5438,9 +5438,9 @@ Extending <fixed-case>E</fixed-case>nglish <fixed-case>ACE</fixed-case> 2005 Corpus Annotation with Ground-truth Links to <fixed-case>W</fixed-case>ikipedia LuisaBentivogli PamelaForner - ClaudioGiuliano + ClaudioGiuliano AlessandroMarchetti - EmanuelePianta + EmanuelePianta KaterynaTymoshenko 19–27 W10-3503 @@ -5448,7 +5448,7 @@ Expanding textual entailment corpora from<fixed-case>W</fixed-case>ikipedia using co-training - Fabio MassimoZanzotto + Fabio MassimoZanzotto MarcoPennacchiotti 28–36 W10-3504 @@ -5478,7 +5478,7 @@ ArjumandYounus MuhammadSaeed NasirTouheed - EmanuelePianta + EmanuelePianta KaterynaTymoshenko 55–62 W10-3507 @@ -5488,7 +5488,7 @@ Helping Volunteer Translators, Fostering Language Resources MasaoUtiyama TakeshiAbekawa - EiichiroSumita + EiichiroSumita KyoKageura 63–66 W10-3508 @@ -5512,8 +5512,8 @@ Boosting N-gram Coverage for Unsegmented Languages Using Multiple Text Segmentation Approach - Solomon TeferraAbate - LaurentBesacier + Solomon TeferraAbate + LaurentBesacier SopheapSeng 1–7 W10-3601 @@ -5522,7 +5522,7 @@ <fixed-case>T</fixed-case>hai Sentence-Breaking for Large-Scale <fixed-case>SMT</fixed-case> GlennSlayden - Mei-YuhHwang + Mei-YuhHwang LeeSchwartz 8–16 W10-3602 @@ -5532,7 +5532,7 @@ Clause Identification and Classification in <fixed-case>B</fixed-case>engali AniruddhaGhosh AmitavaDas - SivajiBandyopadhyay + SivajiBandyopadhyay 17–25 W10-3603 ghosh-etal-2010-clause @@ -5541,31 +5541,31 @@ A Paradigm-Based Finite State Morphological Analyzer for <fixed-case>M</fixed-case>arathi MugdhaBapat HarshadaGune - PushpakBhattacharyya + PushpakBhattacharyya 26–34 W10-3604 bapat-etal-2010-paradigm Web Based <fixed-case>M</fixed-case>anipuri Corpus for Multiword <fixed-case>NER</fixed-case> and Reduplicated <fixed-case>MWE</fixed-case>s Identification using <fixed-case>SVM</fixed-case> - Thoudam DorenSingh - SivajiBandyopadhyay + Thoudam DorenSingh + SivajiBandyopadhyay 35–42 W10-3605 singh-bandyopadhyay-2010-web A Word Segmentation System for Handling Space Omission Problem in <fixed-case>U</fixed-case>rdu Script - GurpreetLehal + GurpreetLehal 43–50 W10-3606 lehal-2010-word Hybrid Stemmer for <fixed-case>G</fixed-case>ujarati - PratikkumarPatel + PratikkumarPatel KashyapPopat - PushpakBhattacharyya + PushpakBhattacharyya 51–55 W10-3607 patel-etal-2010-hybrid @@ -5575,8 +5575,8 @@ Proceedings of the 2010 Workshop on Multiword Expressions: from Theory to Applications W10-37 - ÉricLaporte - PreslavNakov + ÉricLaporte + PreslavNakov CarlosRamisch AlineVillavicencio Coling 2010 Organizing Committee @@ -5598,7 +5598,7 @@ Computational Lexicography of Multi-Word Units. How Efficient Can It Be? - FilipGraliński + FilipGraliński AgataSavary MonikaCzerepowicka FilipMakowiecki @@ -5620,14 +5620,14 @@ AntonioToral LamiaTounsi PavelPecina - Josefvan Genabith + Josefvan Genabith 19–27 W10-3704 attia-etal-2010-automatic Sentence Analysis and Collocation Identification - EricWehrli + EricWehrli VioletaSeretan LukaNerima 28–36 @@ -5640,7 +5640,7 @@ SantanuPal TapabrataMondal TanmoyChakraborty - SivajiBandyopadhyay + SivajiBandyopadhyay 37–45 W10-3706 das-etal-2010-automatic @@ -5648,9 +5648,9 @@ Handling Named Entities and Compound Verbs in Phrase-Based Statistical Machine Translation SantanuPal - Sudip KumarNaskar + Sudip KumarNaskar PavelPecina - SivajiBandyopadhyay + SivajiBandyopadhyay AndyWay 46–54 W10-3707 @@ -5671,7 +5671,7 @@ TomokoIzumi KenjiImamura GenichiroKikui - SatoshiSato + SatoshiSato 64–72 W10-3709 izumi-etal-2010-standardizing @@ -5679,7 +5679,7 @@ Identification of Reduplication in <fixed-case>B</fixed-case>engali Corpus and their Semantic Analysis: A Rule Based Approach TanmoyChakraborty - SivajiBandyopadhyay + SivajiBandyopadhyay 73–76 W10-3710 chakraborty-bandyopadhyay-2010-identification @@ -5689,7 +5689,7 @@ FrancescaBonin FeliceDell’Orletta GiuliaVenturi - SimonettaMontemagni + SimonettaMontemagni 77–80 W10-3711 bonin-etal-2010-contrastive-filtering @@ -5712,7 +5712,7 @@ Multiword Expressions as Discourse Relation Markers (<fixed-case>DRM</fixed-case>s) - AravindJoshi + AravindJoshi 89 W10-3714 joshi-2010-multiword @@ -5763,7 +5763,7 @@ Syntactic Constraints on Phrase Extraction for Phrase-Based Machine Translation HailongCao AndrewFinch - EiichiroSumita + EiichiroSumita 28–33 W10-3804 cao-etal-2010-syntactic @@ -5772,7 +5772,7 @@ Phrase Based Decoding using a Discriminative Model PrasanthKolachina SriramVenkatapathy - SrinivasBangalore + SrinivasBangalore SudheerKolachina AvineshPVS 34–42 @@ -5782,7 +5782,7 @@ Seeding Statistical Machine Translation with Translation Memory Output through Tree-Based Structural Alignment VentsislavZhechev - Josefvan Genabith + Josefvan Genabith 43–51 W10-3806 zhechev-van-genabith-2010-seeding @@ -5799,7 +5799,7 @@ <fixed-case>A</fixed-case>rabic morpho-syntactic feature disambiguation in a translation context InesTurki Khemakhem SalmaJamoussi - AbdelmajidBen Hamadou + AbdelmajidBen Hamadou 61–65 W10-3808 turki-khemakhem-etal-2010-arabic @@ -5808,7 +5808,7 @@ A Discriminative Approach for Dependency Based Statistical Machine Translation SriramVenkatapathy RajeevSangal - AravindJoshi + AravindJoshi KarthikGali 66–74 W10-3809 @@ -5825,8 +5825,8 @@ <fixed-case>M</fixed-case>anipuri-<fixed-case>E</fixed-case>nglish Bidirectional Statistical Machine Translation Systems using Morphology and Dependency Relations - Thoudam DorenSingh - SivajiBandyopadhyay + Thoudam DorenSingh + SivajiBandyopadhyay 83–91 W10-3811 singh-bandyopadhyay-2010-manipuri @@ -5834,7 +5834,7 @@ A Discriminative Syntactic Model for Source Permutation via Tree Transduction MaximKhalilov - KhalilSima’an + KhalilSima’an 92–100 W10-3812 khalilov-simaan-2010-discriminative @@ -5850,7 +5850,7 @@ New Parameterizations and Features for <fixed-case>PSCFG</fixed-case>-Based Machine Translation AndreasZollmann - StephanVogel + StephanVogel 110–117 W10-3814 zollmann-vogel-2010-new @@ -5858,7 +5858,7 @@ Deep Syntax Language Models and Statistical Machine Translation YvetteGraham - Josefvan Genabith + Josefvan Genabith 118–126 W10-3815 graham-van-genabith-2010-deep @@ -5913,7 +5913,7 @@ MegumiOhki SuguruMatsuyoshi KentaroInui - YujiMatsumoto + YujiMatsumoto 21–30 W10-3904 murakami-etal-2010-automatic @@ -5936,7 +5936,7 @@ A Look inside the Distributionally Similar Terms KowKuroda - Jun’ichiKazama + Jun’ichiKazama KentaroTorisawa 40–49 W10-3907 @@ -5953,7 +5953,7 @@ Large Corpus-based Semantic Feature Extraction for Pronoun Coreference ShashaLiao - RalphGrishman + RalphGrishman 60–68 W10-3909 liao-grishman-2010-large @@ -5963,7 +5963,7 @@ MinhNghiem Quoc KeisukeYokoi YuichirohMatsubayashi - AkikoAizawa + AkikoAizawa 69–74 W10-3910 nghiem-quoc-etal-2010-mining @@ -5975,7 +5975,7 @@ TomokoOhkuma MasatsuguTonoike DaigoSugihara - HiroshiMasuichi + HiroshiMasuichi KazuhikoOhe 75–83 W10-3911 @@ -5989,7 +5989,7 @@ SudeshnaSarkar MinZhang AdamLopez - RaghavendraUdupa + RaghavendraUdupa Coling 2010 Organizing Committee
Beijing, China
August @@ -6002,7 +6002,7 @@ Word Sense Disambiguation and <fixed-case>IR</fixed-case> - PushpakBhattacharyya + PushpakBhattacharyya 1 W10-4001 bhattacharyya-2010-word @@ -6026,8 +6026,8 @@ How to Get the Same News from Different Language News Papers - T. PattabhiR. K Rao - SobhaLalitha Devi + T. PattabhiR. K Rao + SobhaLalitha Devi 11–15 W10-4004 r-k-rao-lalitha-devi-2010-get @@ -6045,7 +6045,7 @@ Multi-Word Expression-Sensitive Word Alignment TsuyoshiOkita - AlfredoMaldonado Guerra + AlfredoMaldonado Guerra YvetteGraham AndyWay 26–34 @@ -6076,8 +6076,8 @@ AchilleFalaise DavidRouquet DidierSchwab - HervéBlanchon - ChristianBoitet + HervéBlanchon + ChristianBoitet 52–60 W10-4009 falaise-etal-2010-ontology @@ -6087,7 +6087,7 @@ MarinaLitvak MarkLast SlavaKisilevich - DanielKeim + DanielKeim HagayLipman AssafBen Gur 61–69 @@ -6097,9 +6097,9 @@ More Languages, More <fixed-case>MAP</fixed-case>?: A Study of Multiple Assisting Languages in Multilingual <fixed-case>PRF</fixed-case> VishalVachhani - ManojChinnakotla - MiteshKhapra - PushpakBhattacharyya + ManojChinnakotla + MiteshKhapra + PushpakBhattacharyya 70–78 W10-4011 vachhani-etal-2010-languages @@ -6107,7 +6107,7 @@ Multilinguization and Personalization of <fixed-case>NL</fixed-case>-based Systems NajehHajlaoui - ChristianBoitet + ChristianBoitet 79–87 W10-4012 hajlaoui-boitet-2010-multilinguization @@ -6136,7 +6136,7 @@ Textual Emotion Processing From Event Analysis Chu-RenHuang YingChen - Sophia Yat MeiLee + Sophia Yat MeiLee W10-4102 huang-etal-2010-textual @@ -6175,7 +6175,7 @@ Reducing the False Alarm Rate of <fixed-case>C</fixed-case>hinese Character Error Detection and Correction Shih-HungWu Yong-ZhiChen - Ping-cheYang + Ping-cheYang TsunKu Chao-LinLiu W10-4107 @@ -6191,25 +6191,25 @@ Bigram <fixed-case>HMM</fixed-case> with Context Distribution Clustering for Unsupervised <fixed-case>C</fixed-case>hinese Part-of-Speech tagging LidanZhang - Kwok-PingChan - ChunyuKit - DongfengCai + Kwok-PingChan + ChunyuKit + DongfengCai W10-4109 zhang-etal-2010-bigram Mining Large-scale Parallel Corpora from Multilingual Patents: An <fixed-case>E</fixed-case>nglish-<fixed-case>C</fixed-case>hinese example and its application to <fixed-case>SMT</fixed-case> BinLu - Benjamin K.Tsou + Benjamin K.Tsou TaoJiang - Oi YeeKwong + Oi YeeKwong JingboZhu W10-4110 lu-etal-2010-mining Studies on Automatic Recognition of Common <fixed-case>C</fixed-case>hinese Adverb’s usages Based on Statistics Methods - HongyingZan + HongyingZan JunhuiZhang XuefengZhu ShiwenYu @@ -6220,8 +6220,8 @@ Automatic Identification of Predicate Heads in <fixed-case>C</fixed-case>hinese Sentences XiaonaRen QiaoliZhou - ChunyuKit - DongfengCai + ChunyuKit + DongfengCai W10-4112 ren-etal-2010-automatic @@ -6240,7 +6240,7 @@ ZhenHai KuiyuChang QinbaoSong - Jung-jaeKim + Jung-jaeKim W10-4114 hai-etal-2010-statistical @@ -6250,7 +6250,7 @@ WenjieLi YanLiu DequanZheng - TiejunZhao + TiejunZhao W10-4115 chen-etal-2010-exploring @@ -6265,7 +6265,7 @@ Exploiting Social <fixed-case>Q</fixed-case>&<fixed-case>A</fixed-case> Collection in Answering Complex Questions YouzhengWu - KawaiHisashi + KawaiHisashi W10-4117 wu-hisashi-2010-exploiting @@ -6293,8 +6293,8 @@ Active Learning Based Corpus Annotation HongyanSong TianfangYao - ChunyuKit - DongfengCai + ChunyuKit + DongfengCai W10-4121 song-etal-2010-active @@ -6310,7 +6310,7 @@ <fixed-case>CMDMC</fixed-case>: A Diachronic Digital Museum of <fixed-case>C</fixed-case>hinese <fixed-case>M</fixed-case>andarin MinHou YuZou - YonglinTeng + YonglinTeng WeiHe YanWang JunLiu @@ -6321,7 +6321,7 @@ <fixed-case>K</fixed-case>azakh Segmentation System of Inflectional Affixes GulilaAltenbek - WangXiao-long + Xiao-longWang W10-4124 altenbek-wang-2010-kazakh @@ -6343,13 +6343,13 @@ A Multi-layer <fixed-case>C</fixed-case>hinese Word Segmentation System Optimized for Out-of-domain Tasks QinGao - StephanVogel + StephanVogel W10-4127 gao-vogel-2010-multi <fixed-case>HMM</fixed-case> Revises Low Marginal Probability by <fixed-case>CRF</fixed-case> for <fixed-case>C</fixed-case>hinese Word Segmentation - DegenHuang + DegenHuang DeqinTong YanyanLuo W10-4128 @@ -6384,14 +6384,14 @@ Adaptive <fixed-case>C</fixed-case>hinese Word Segmentation with Online Passive-Aggressive Algorithm WenjunGao XipengQiu - XuanjingHuang + XuanjingHuang W10-4132 gao-etal-2010-adaptive A Character-Based Joint Model for <fixed-case>CIPS</fixed-case>-<fixed-case>SIGHAN</fixed-case> Word Segmentation Bakeoff 2010 KunWang - ChengqingZong + ChengqingZong Keh-YihSu W10-4133 wang-etal-2010-character @@ -6401,7 +6401,7 @@ Hua-PingZhang JianGao QianMo - He-YanHuang + He-YanHuang W10-4134 zhang-etal-2010-incorporating @@ -6416,8 +6416,8 @@
<fixed-case>C</fixed-case>hinese word segmentation model using bootstrapping - BaobaoChang - MansurMairgup + BaobaoChang + MansurMairgup W10-4136 chang-mairgup-2010-chinese @@ -6433,10 +6433,10 @@
Term Contributed Boundary Tagging by Conditional Random Fields for <fixed-case>SIGHAN</fixed-case> 2010 <fixed-case>C</fixed-case>hinese Word Segmentation Bakeoff - Tian-JianJiang + Tian-JianJiang Shih-HungLiu Cheng-LungSung - Wen-LianHsu + Wen-LianHsu W10-4138 jiang-etal-2010-term-contributed @@ -6481,7 +6481,7 @@
Discriminative Parse Reranking for <fixed-case>C</fixed-case>hinese with Homogeneous and Heterogeneous Annotations - WeiweiSun + WeiweiSun RuiWang YiZhang W10-4144 @@ -6493,7 +6493,7 @@ WenjingLang YingyingWang YanWang - DongfengCai + DongfengCai W10-4145 zhou-etal-2010-sau @@ -6502,7 +6502,7 @@ XuezheMa XiaotianZhang HaiZhao - Bao-LiangLu + Bao-LiangLu W10-4146 ma-etal-2010-dependency
@@ -6515,8 +6515,8 @@ <fixed-case>CRF</fixed-case> tagging for head recognition based on <fixed-case>S</fixed-case>tanford parser YongCheng - ChengjieSun - BingquanLiu + ChengjieSun + BingquanLiu LeiLin W10-4148 cheng-etal-2010-crf @@ -6524,7 +6524,7 @@ Treebank Conversion based Self-training Strategy for Parsing ZhiguoWang - ChengqingZong + ChengqingZong W10-4149 wang-zong-2010-treebank @@ -6570,8 +6570,8 @@ Combine Person Name and Person Identity Recognition and Document Clustering for <fixed-case>C</fixed-case>hinese Person Name Disambiguation RuifengXu JunXu - XiangyingDai - ChunyuKit + XiangyingDai + ChunyuKit W10-4154 xu-etal-2010-combine @@ -6589,7 +6589,7 @@ XiangZhu XiaodongShi NingfengLiu - YingMeiGuo + YingMeiGuo YidongChen W10-4156 zhu-etal-2010-chinese @@ -6599,7 +6599,7 @@ Hua-PingZhang Zhi-HuaLiu QianMo - He-YanHuang + He-YanHuang W10-4157 zhang-etal-2010-chinese-personal
@@ -6608,8 +6608,8 @@ YuHong FeiPei Yue-huiYang - Jian-minYao - Qiao-mingZhu + Jian-minYao + Qiao-mingZhu W10-4158 hong-etal-2010-jumping
@@ -6623,7 +6623,7 @@ <fixed-case>DLUT</fixed-case>: <fixed-case>C</fixed-case>hinese Personal Name Disambiguation with Rich Features DongliangWang - DegenHuang + DegenHuang W10-4160 wang-huang-2010-dlut @@ -6666,7 +6666,7 @@ Triplet-Based <fixed-case>C</fixed-case>hinese Word Sense Induction ZhaoLiu XipengQiu - XuanjingHuang + XuanjingHuang W10-4165 liu-etal-2010-triplet
@@ -6715,8 +6715,8 @@ Soochow University: Description and Analysis of the <fixed-case>C</fixed-case>hinese Word Sense Induction System for <fixed-case>CLP</fixed-case>2010 HuaXu BingLiu - LonghuaQian - GuodongZhou + LonghuaQian + GuodongZhou W10-4171 xu-etal-2010-soochow
@@ -6741,7 +6741,7 @@ Proceedings of the 6th International Natural Language Generation Conference - JohnKelleher + JohnKelleher Brian MacNamee Ielka van derSluis Association for Computational Linguistics @@ -6756,7 +6756,7 @@ Comparing Rating Scales and Preference Judgements in Language Evaluation - AnjaBelz + AnjaBelz EricKow W10-4201 belz-kow-2010-comparing @@ -6764,14 +6764,14 @@ A Discourse-Aware Graph-Based Content-Selection Framework SenizDemir - SandraCarberry - Kathleen F.McCoy + SandraCarberry + Kathleen F.McCoy W10-4202 demir-etal-2010-discourse Generating Referring Expressions with Reference Domain Theory - AlexandreDenis + AlexandreDenis W10-4203 denis-2010-generating @@ -6784,8 +6784,8 @@ Tense and Aspect Assignment in Narrative Discourse - DavidElson - KathleenMcKeown + DavidElson + KathleenMcKeown W10-4205 elson-mckeown-2010-tense @@ -6799,30 +6799,30 @@ Situated Reference in a Hybrid Human-Robot Interaction System ManuelGiuliani - Mary EllenFoster + Mary EllenFoster AmyIsard ColinMatheson - JonOberlander + JonOberlander AloisKnoll W10-4207 giuliani-etal-2010-situated Towards a Programmable Instrumented Generator - ChrisMellish + ChrisMellish W10-4208 mellish-2010-towards Using Semantic Web Technology to Support <fixed-case>NLG</fixed-case>. Case Study: <fixed-case>OWL</fixed-case> finds <fixed-case>RAGS</fixed-case> - ChrisMellish + ChrisMellish W10-4209 mellish-2010-using Natural Reference to Objects in a Visual Domain MargaretMitchell - Keesvan Deemter + Keesvan Deemter EhudReiter W10-4210 mitchell-etal-2010-natural @@ -6831,14 +6831,14 @@ Generating and Validating Abstracts of Meeting Conversations: a User Study GabrielMurray GiuseppeCarenini - RaymondNg + RaymondNg W10-4211 murray-etal-2010-generating Charting the Potential of Description Logic for the Generation of Referring Expressions YuanRen - Keesvan Deemter + Keesvan Deemter Jeff Z.Pan W10-4212 ren-etal-2010-charting @@ -6874,7 +6874,7 @@ Extracting Parallel Fragments from Comparable Corpora for Data-to-text Generation - AnjaBelz + AnjaBelz EricKow W10-4217 belz-kow-2010-extracting @@ -6894,16 +6894,16 @@ ‘If you’ve heard it, you can say it’ - Towards an Account of Expressibility - David D.McDonald + David D.McDonald CharlieGreenbacker W10-4220 mcdonald-greenbacker-2010-youve Cross-linguistic Attribute Selection for <fixed-case>REG</fixed-case>: Comparing <fixed-case>D</fixed-case>utch and <fixed-case>E</fixed-case>nglish - MariëtTheune + MariëtTheune RuudKoolen - EmielKrahmer + EmielKrahmer W10-4221 theune-etal-2010-cross @@ -6917,8 +6917,8 @@ Paraphrase Generation as Monolingual Translation: Data and Evaluation SanderWubben - Antalvan den Bosch - EmielKrahmer + Antalvan den Bosch + EmielKrahmer W10-4223 wubben-etal-2010-paraphrase @@ -6927,13 +6927,13 @@ HendrikZender ChristopherKoppermann FaiGreeve - Geert-JanKruijff + Geert-JanKruijff W10-4224 zender-etal-2010-anchor
Generation Challenges 2010 Preface - AnjaBelz + AnjaBelz AlbertGatt AlexanderKoller W10-4225 @@ -6941,7 +6941,7 @@ The <fixed-case>GREC</fixed-case> Challenges 2010: Overview and Evaluation Results - AnjaBelz + AnjaBelz EricKow W10-4226 belz-kow-2010-grec @@ -6954,9 +6954,9 @@ Poly-co: An Unsupervised Co-reference Detection System - ÉricCharton + ÉricCharton MichelGagnon - BenoitOzell + BenoitOzell W10-4228 charton-etal-2010-poly @@ -6965,13 +6965,13 @@ AmitavaDas TanikSaikh TapabrataMondal - SivajiBandyopadhyay + SivajiBandyopadhyay W10-4229 das-etal-2010-ju
The <fixed-case>UMUS</fixed-case> System for Named Entity Generation at <fixed-case>GREC</fixed-case> 2010 - BenoitFavre + BenoitFavre BerndBohnet W10-4230 favre-bohnet-2010-umus @@ -6980,7 +6980,7 @@ <fixed-case>UD</fixed-case>el: Refining a Method of Named Entity Generation CharlesGreenbacker NicoleSparks - KathleenMcCoy + KathleenMcCoy Che-YuKuo W10-4231 greenbacker-etal-2010-udel @@ -6989,7 +6989,7 @@ <fixed-case>UD</fixed-case>el: Named Entity Recognition and Reference Regeneration from Surface Text NicoleSparks CharlesGreenbacker - KathleenMcCoy + KathleenMcCoy Che-YuKuo W10-4232 sparks-etal-2010-udel @@ -6999,11 +6999,11 @@ AlexanderKoller KristinaStriegnitz AndrewGargett - DonnaByron + DonnaByron JustineCassell RobertDale - JohannaMoore - JonOberlander + JohannaMoore + JonOberlander W10-4233 koller-etal-2010-report @@ -7014,14 +7014,14 @@ PaulPiwek MihaiLintean SvetlanaStoyanchev - ChristianMoldovan + ChristianMoldovan W10-4234 rus-etal-2010-first
Generation Under Uncertainty OliverLemon - SriniJanarthanam + SriniJanarthanam VerenaRieser W10-4235 lemon-etal-2010-generation @@ -7035,11 +7035,11 @@ Finding Common Ground: Towards a Surface Realisation Shared Task - AnjaBelz - MikeWhite - Josefvan Genabith + AnjaBelz + MikeWhite + Josefvan Genabith DeirdreHogan - AmandaStent + AmandaStent W10-4237 belz-etal-2010-finding @@ -7084,7 +7084,7 @@
Dynamic Adaptation in Dialog Systems - MarilynWalker + MarilynWalker 17 W10-4303 walker-2010-dynamic @@ -7153,7 +7153,7 @@ Using entity features to classify implicit discourse relations AnnieLouis - AravindJoshi + AravindJoshi RashmiPrasad AniNenkova 59–62 @@ -7186,18 +7186,18 @@ Exploring the Effectiveness of Lexical Ontologies for Modeling Temporal Relations with <fixed-case>M</fixed-case>arkov <fixed-case>L</fixed-case>ogic - Eun Y.Ha + Eun Y.Ha AlokBaikadi CarlyleLicata BradfordMott - JamesLester + JamesLester 75–78 W10-4314 ha-etal-2010-exploring Reference reversibility with Reference Domain Theory - AlexandreDenis + AlexandreDenis 79–82 W10-4315 denis-2010-reference @@ -7205,8 +7205,8 @@ Utilizing Review Summarization in a Spoken Recommendation System JingjingLiu - StephanieSeneff - VictorZue + StephanieSeneff + VictorZue 83–86 W10-4316 liu-etal-2010-utilizing @@ -7214,7 +7214,7 @@ Dialogue Management Based on Entities and Constraints YushiXu - StephanieSeneff + StephanieSeneff 87–90 W10-4317 xu-seneff-2010-dialogue @@ -7223,8 +7223,8 @@ Towards Improving the Naturalness of Social Conversations with Dialogue Systems MatthewMarge JoãoMiranda - AlanBlack - AlexanderRudnicky + AlanBlack + AlexanderRudnicky 91–94 W10-4318 marge-etal-2010-towards @@ -7251,7 +7251,7 @@ Learning Dialogue Strategies from Older and Younger Simulated Users KallirroiGeorgila MariaWolters - JohannaMoore + JohannaMoore 103–106 W10-4321 georgila-etal-2010-learning @@ -7268,19 +7268,19 @@ Parameter estimation for agenda-based user simulation SimonKeizer - MilicaGašić - FilipJurčíček - FrançoisMairesse + MilicaGašić + FilipJurčíček + FrançoisMairesse BlaiseThomson KaiYu - SteveYoung + SteveYoung 116–123 W10-4323 keizer-etal-2010-parameter Adaptive Referring Expression Generation in Spoken Dialogue Systems: Evaluation with Real Users - SrinivasanJanarthanam + SrinivasanJanarthanam OliverLemon 124–131 W10-4324 @@ -7295,9 +7295,9 @@ The Effects of Discourse Connectives Prediction on Implicit Discourse Relation Recognition - Zhi MinZhou - ManLan - Zheng YuNiu + Zhi MinZhou + ManLan + Zheng YuNiu YuXu JianSu 139–146 @@ -7307,7 +7307,7 @@ Discourse indicators for content selection in summarization AnnieLouis - AravindJoshi + AravindJoshi AniNenkova 147–156 W10-4327 @@ -7316,7 +7316,7 @@ Comparing Spoken Language Route Instructions for Robots across Environment Representations MatthewMarge - AlexanderRudnicky + AlexanderRudnicky 157–164 W10-4328 marge-rudnicky-2010-comparing @@ -7360,20 +7360,20 @@ Don’t tell anyone! Two Experiments on Gossip Conversations JennyBrusk RonArtstein - DavidTraum + DavidTraum 193–200 W10-4333 brusk-etal-2010-dont <fixed-case>G</fixed-case>aussian Processes for Fast Policy Optimisation of <fixed-case>POMDP</fixed-case>-based Dialogue Managers - MilicaGašić - FilipJurčíček + MilicaGašić + FilipJurčíček SimonKeizer - FrancoisMairesse + FrancoisMairesse BlaiseThomson KaiYu - SteveYoung + SteveYoung 201–204 W10-4334 gasic-etal-2010-gaussian @@ -7389,7 +7389,7 @@ Representing Uncertainty about Complex User Goals in Statistical Dialogue Systems - Paul A.Crook + Paul A.Crook OliverLemon 209–212 W10-4336 @@ -7400,14 +7400,14 @@ SebastianVarges SilviaQuarteroni GiuseppeRiccardi - AlexeiIvanov + AlexeiIvanov 213–216 W10-4337 varges-etal-2010-investigating Cooperative User Models in Statistical Dialog Simulators - MeritxellGonzález + MeritxellGonzález SilviaQuarteroni GiuseppeRiccardi SebastianVarges @@ -7421,8 +7421,8 @@ KomeiSugiura KiyonoriOhtake ChioriHori - HidekiKashioka - HisashiKawai + HidekiKashioka + HisashiKawai SatoshiNakamura 221–224 W10-4339 @@ -7473,17 +7473,17 @@ <fixed-case>I</fixed-case>’ve said it before, and <fixed-case>I</fixed-case>’ll say it again: An empirical investigation of the upper bound of the selection approach to dialogue SudeepGandhe - DavidTraum + DavidTraum 245–248 W10-4345 gandhe-traum-2010-ive Autism and Interactional Aspects of Dialogue - PeterHeeman + PeterHeeman RebeccaLunsford - EthanSelfridge - LoisBlack + EthanSelfridge + LoisBlack Janvan Santen 249–252 W10-4346 @@ -7501,9 +7501,9 @@ How to Drink from a Fire Hose: One Person Can Annoscribe One Million Utterances in One Month - DavidSuendermann + DavidSuendermann JacksonLiscombe - RobertoPieraccini + RobertoPieraccini 257–260 W10-4348 suendermann-etal-2010-drink @@ -7529,7 +7529,7 @@ Statistical Dialog Management Methodologies for Real Applications DavidGriol ZoraidaCallejas - RamónLópez-Cózar + RamónLópez-Cózar 269–272 W10-4351 griol-etal-2010-statistical @@ -7537,7 +7537,7 @@ <fixed-case>Y</fixed-case>ou<fixed-case>B</fixed-case>ot: A Simple Framework for Building Virtual Networking Agents SeijiTakegata - KumikoTanaka-Ishii + KumikoTanaka-Ishii 273–276 W10-4352 takegata-tanaka-ishii-2010-youbot @@ -7547,7 +7547,7 @@ MarcCavazza RaúlSantos de la Cámara MarkkuTurunen - JoséRelaño Gil + JoséRelaño Gil JaakkoHakulinen NigelCrook DeboraField @@ -7557,7 +7557,7 @@ <tex-math>F^2</tex-math> - New Technique for Recognition of User Emotional States in Spoken Dialogue Systems - RamónLópez-Cózar + RamónLópez-Cózar JanSilovsky DavidGriol 281–288 @@ -7567,19 +7567,19 @@ Online Error Detection of Barge-In Utterances by Using Individual Users’ Utterance Histories in Spoken Dialogue System KazunoriKomatani - Hiroshi G.Okuno + Hiroshi G.Okuno 289–296 W10-4355 komatani-okuno-2010-online Dialogue Act Modeling in a Complex Task-Oriented Domain - KristyBoyer - Eun Y.Ha - RobertPhillips + KristyBoyer + Eun Y.Ha + RobertPhillips MichaelWallis MladenVouk - JamesLester + JamesLester 297–305 W10-4356 boyer-etal-2010-dialogue @@ -7587,7 +7587,7 @@ Hand Gestures in Disambiguating Types of You Expressions in Multiparty Meetings TylerBaldwin - JoyceChai + JoyceChai KatrinKirchhoff 306–313 W10-4357 @@ -7607,7 +7607,7 @@ Towards an Empirically Motivated Typology of Follow-Up Questions: The Role of Dialogue Context ManuelKirschner - RaffaellaBernardi + RaffaellaBernardi 322–331 W10-4359 kirschner-bernardi-2010-towards @@ -7625,7 +7625,7 @@ Proceedings of the 10th International Workshop on Tree Adjoining Grammar and Related Frameworks (TAG+10) W10-44 - SrinivasBangalore + SrinivasBangalore RobertFrank MaribelRomero Linguistic Department, Yale University @@ -7647,9 +7647,9 @@ Non-local Right Node Raising: an Analysis using Delayed Tree-Local <fixed-case>MC</fixed-case>-<fixed-case>TAG</fixed-case> - Chung-hyeHan + Chung-hyeHan DavidPotter - Dennis RyanStoroshenko + Dennis RyanStoroshenko 9–16 W10-4402 han-etal-2010-non @@ -7686,7 +7686,7 @@ Unavoidable Ill-nestedness in Natural Language and the Adequacy of Tree Local-<fixed-case>MCTAG</fixed-case> Induced Dependency Structures JoanChen-Main - Aravind K.Joshi + Aravind K.Joshi 53–60 W10-4407 chen-main-joshi-2010-unavoidable @@ -7695,7 +7695,7 @@ Generating <fixed-case>LTAG</fixed-case> grammars from a lexicon/ontology interface ChristinaUnger FelixHieber - PhilippCimiano + PhilippCimiano 61–68 W10-4408 unger-etal-2010-generating @@ -7732,8 +7732,8 @@ Control Verb, Argument Cluster Coordination and Multi Component <fixed-case>TAG</fixed-case> - DjaméSeddah - BenoitSagot + DjaméSeddah + BenoitSagot LaurenceDanlos 101–110 W10-4413 @@ -7741,7 +7741,7 @@ Building factorized <fixed-case>TAG</fixed-case>s with meta-grammars - ÉricVillemonte de la Clergerie + ÉricVillemonte de la Clergerie 111–118 W10-4414 villemonte-de-la-clergerie-2010-building @@ -7771,8 +7771,8 @@ Binding Variables in <fixed-case>E</fixed-case>nglish: An Analysis Using Delayed Tree Locality - Dennis RyanStoroshenko - Chung-hyeHan + Dennis RyanStoroshenko + Chung-hyeHan 143–150 W10-4418 storoshenko-han-2010-binding @@ -7794,9 +7794,9 @@ Automated Extraction of <fixed-case>T</fixed-case>ree <fixed-case>A</fixed-case>djoining <fixed-case>G</fixed-case>rammars from a Treebank for <fixed-case>V</fixed-case>ietnamese - PhuongLe-Hong - Thi Minh HuyenNguyen - Phuong ThaiNguyen + PhuongLe-Hong + Thi Minh HuyenNguyen + Phuong ThaiNguyen AzimRoussanaly 165–174 W10-4421 diff --git a/data/xml/W11.xml b/data/xml/W11.xml index da033dfa1d..dc8175501a 100644 --- a/data/xml/W11.xml +++ b/data/xml/W11.xml @@ -4,7 +4,7 @@ Proceedings of the Ninth International Conference on Computational Semantics (IWCS 2011) JohanBos - StephenPulman + StephenPulman iwcs 2011 @@ -14,20 +14,20 @@ The Semantics of Dialogue Acts - HarryBunt + HarryBunt W11-0101 bunt-2011-semantics A New Semantics: Merging Propositional and Distributional Information - EduardHovy + EduardHovy W11-0102 hovy-2011-new Deterministic Statistical Mapping of Sentences to Underspecified Semantics - HiyanAlshawi - Pi-ChuanChang + HiyanAlshawi + Pi-ChuanChang MichaelRinggaard W11-0103 alshawi-etal-2011-deterministic @@ -35,7 +35,7 @@ Word Sense Disambiguation with Multilingual Features CarmenBanea - RadaMihalcea + RadaMihalcea W11-0104 banea-mihalcea-2011-word @@ -51,23 +51,23 @@ A Model for Composing Semantic Relations EduardoBlanco - DanMoldovan + DanMoldovan W11-0106 blanco-moldovan-2011-model Implementing Weighted Abduction in <fixed-case>M</fixed-case>arkov <fixed-case>L</fixed-case>ogic JamesBlythe - JerryHobbs + JerryHobbs PedroDomingos - RohitKate - RaymondMooney + RohitKate + RaymondMooney W11-0107 blythe-etal-2011-implementing Modular Graph Rewriting to Compute Semantics - GuillaumeBonfante + GuillaumeBonfante BrunoGuillaume MathieuMorey GuyPerrier @@ -84,16 +84,16 @@ <fixed-case>V</fixed-case>erb<fixed-case>N</fixed-case>et Class Assignment as a <fixed-case>WSD</fixed-case> Task - Susan WindischBrown + Susan WindischBrown DmitriyDligach - MarthaPalmer + MarthaPalmer W11-0110 brown-etal-2011-verbnet Acquiring entailment pairs across languages and domains: A Data Analysis ManaalFaruqui - SebastianPadó + SebastianPadó W11-0111 faruqui-pado-2011-acquiring @@ -101,7 +101,7 @@ Integrating Logical Representations with Probabilistic Information using <fixed-case>M</fixed-case>arkov <fixed-case>L</fixed-case>ogic DanGarrette KatrinErk - RaymondMooney + RaymondMooney W11-0112 garrette-etal-2011-integrating @@ -113,8 +113,8 @@ Concrete Sentence Spaces for Compositional Distributional Models of Meaning - EdwardGrefenstette - MehrnooshSadrzadeh + EdwardGrefenstette + MehrnooshSadrzadeh StephenClark BobCoecke StephenPulman @@ -123,52 +123,52 @@ Computing Semantic Compositionality in Distributional Semantics - Emiliano RaulGuevara + Emiliano RaulGuevara W11-0115 guevara-2011-computing Using Query Patterns to Learn the Duration of Events AndreyGusev - NathanaelChambers + NathanaelChambers Divye RajKhilnani PranavKhaitan StevenBethard - DanJurafsky + DanJurafsky W11-0116 gusev-etal-2011-using A Representation Framework for Cross-lingual/Interlingual Lexical Semantic Correspondences - YoshihikoHayashi + YoshihikoHayashi W11-0117 hayashi-2011-representation Formalising and specifying underquantification - AurelieHerbelot + AurelieHerbelot AnnCopestake W11-0118 herbelot-copestake-2011-formalising The Exploitation of Spatial Information in Narrative Discourse - Blake StephenHowald - E. GrahamKatz + Blake StephenHowald + E. GrahamKatz W11-0119 howald-katz-2011-exploitation Measuring the semantic relatedness between words and images - Chee WeeLeong - RadaMihalcea + Chee WeeLeong + RadaMihalcea W11-0120 leong-mihalcea-2011-measuring Elaborating a Knowledge Base for Deep Lexical Semantics NiloofarMontazeri - JerryHobbs + JerryHobbs W11-0121 montazeri-hobbs-2011-elaborating @@ -188,7 +188,7 @@ JuntaMizuno ShoukoMasuda KentaroInui - YujiMatsumoto + YujiMatsumoto W11-0123 ohki-etal-2011-recognizing @@ -197,16 +197,16 @@ EkaterinaOvchinnikova NiloofarMontazeri TheodoreAlexandrov - JerryHobbs - Michael C.McCord - RutuMulkar-Mehta + JerryHobbs + Michael C.McCord + RutuMulkar-Mehta W11-0124 ovchinnikova-etal-2011-abductive Incremental dialogue act understanding VolhaPetukhova - HarryBunt + HarryBunt W11-0125 petukhova-bunt-2011-incremental @@ -214,7 +214,7 @@ Extracting aspects of determiner meaning from dialogue in a virtual world environment HilkeReckman JeffOrkin - DebRoy + DebRoy W11-0126 reckman-etal-2011-extracting @@ -227,7 +227,7 @@ Ontology-based Distinction between Polysemy and Homonymy JasonUtt - SebastianPadó + SebastianPadó W11-0128 utt-pado-2011-ontology @@ -253,17 +253,17 @@ Discovering Semantic Classes for <fixed-case>U</fixed-case>rdu N-<fixed-case>V</fixed-case> Complex Predicates - TafseerAhmed + TafseerAhmed MiriamButt W11-0132 ahmed-butt-2011-discovering <fixed-case>DISCUSS</fixed-case>: A dialogue move taxonomy layered over semantic representations - LeeBecker - WayneWard + LeeBecker + WayneWard Sarelvan Vuuren - MarthaPalmer + MarthaPalmer W11-0133 becker-etal-2011-discuss @@ -277,14 +277,14 @@ Towards Component-Based Textual Entailment ElenaCabrio - BernardoMagnini + BernardoMagnini W11-0135 cabrio-magnini-2011-towards Algebraic Approaches to Compositional Distributional Semantics DaoudClarke - DavidWeir + DavidWeir RudiLutz W11-0136 clarke-etal-2011-algebraic @@ -297,8 +297,8 @@ Towards a More Natural Multilingual Controlled Language Interface to <fixed-case>OWL</fixed-case> - NormundsGruzitis - GuntisBarzdins + NormundsGruzitis + GuntisBarzdins W11-0138 gruzitis-barzdins-2011-towards @@ -312,8 +312,8 @@ An Ontology Based Architecture for Translation - LeonardoLesmo - AlessandroMazzei + LeonardoLesmo + AlessandroMazzei Daniele P.Radicioni W11-0140 lesmo-etal-2011-ontology @@ -322,7 +322,7 @@ Corpus-based approaches to processing the scope of negation cues: an evaluation of the state of the art RoserMorante SarahSchrauwen - WalterDaelemans + WalterDaelemans W11-0141 morante-etal-2011-corpus @@ -334,9 +334,9 @@ Granularity in Natural Language Discourse - RutuMulkar-Mehta - JerryHobbs - EduardHovy + RutuMulkar-Mehta + JerryHobbs + EduardHovy W11-0143 mulkar-mehta-etal-2011-granularity @@ -351,16 +351,16 @@ Extracting Contextual Evaluativity KevinReschke - PranavAnand + PranavAnand W11-0145 reschke-anand-2011-extracting Using <fixed-case>MMIL</fixed-case> for the High Level Semantic Annotation of the <fixed-case>F</fixed-case>rench <fixed-case>MEDIA</fixed-case> Dialogue Corpus - Lina MariaRojas-Barahona + Lina MariaRojas-Barahona ThierryBazillon - MatthieuQuignard - FabriceLefevre + MatthieuQuignard + FabriceLefevre W11-0146 rojas-barahona-etal-2011-using @@ -368,7 +368,7 @@ Collecting Semantic Data from <fixed-case>M</fixed-case>echanical <fixed-case>T</fixed-case>urk for a Lexical Knowledge Resource in a Text to Picture Generating System MasoudRouhizadeh MargitBowler - RichardSproat + RichardSproat BobCoyne W11-0147 rouhizadeh-etal-2011-collecting @@ -399,12 +399,12 @@ Proceedings of BioNLP 2011 Workshop W11-02 - Kevin BretonnelCohen + Kevin BretonnelCohen DinaDemner-Fushman SophiaAnaniadou - JohnPestian - Jun’ichiTsujii - BonnieWebber + JohnPestian + Jun’ichiTsujii + BonnieWebber Association for Computational Linguistics
Portland, Oregon, USA
June @@ -429,14 +429,14 @@ Unsupervised Entailment Detection between Dependency Graph Fragments MarekRei - TedBriscoe + TedBriscoe 10–18 W11-0202 rei-briscoe-2011-unsupervised Learning Phenotype Mapping for Integrating Large Genetic Data - Chun-NanHsu + Chun-NanHsu Cheng-JuKuo CongxingCai SarahPendergrass @@ -459,10 +459,10 @@ Fast and simple semantic class assignment for biomedical text K. BretonnelCohen - ThomasChristiansen - WilliamBaumgartner Jr. - KarinVerspoor - LawrenceHunter + ThomasChristiansen + WilliamBaumgartner Jr. + KarinVerspoor + LawrenceHunter 38–45 W11-0205 cohen-etal-2011-fast @@ -471,8 +471,8 @@ The Role of Information Extraction in the Design of a Document Triage Application for Biocuration SandeepPokkunuri CarticRamakrishnan - EllenRiloff - EduardHovy + EllenRiloff + EduardHovy GullyBurns 46–55 W11-0206 @@ -481,7 +481,7 @@ Medical Entity Recognition: A Comparaison of Semantic and Statistical Methods AsmaBen Abacha - PierreZweigenbaum + PierreZweigenbaum 56–64 W11-0207 ben-abacha-zweigenbaum-2011-medical @@ -490,7 +490,7 @@ Automatic Acquisition of Huge Training Data for Bio-Medical Named Entity Recognition YuUsami Han-CheolCho - NaoakiOkazaki + NaoakiOkazaki Jun’ichiTsujii 65–73 W11-0208 @@ -507,7 +507,7 @@ Building a Coreference-Annotated Corpus from the Domain of Biochemistry - Riza TheresaBatista-Navarro + Riza TheresaBatista-Navarro SophiaAnaniadou 83–91 W11-0210 @@ -515,7 +515,7 @@ Towards Morphologically Annotated Corpus of Hospital Discharge Reports in <fixed-case>P</fixed-case>olish - MalgorzataMarciniak + MalgorzataMarciniak AgnieszkaMykowiecka 92–100 W11-0211 @@ -531,7 +531,7 @@ Automatic extraction of data deposition statements: where do the research results go? - AurélieNévéol + AurélieNévéol W. JohnWilbur ZhiyongLu 103–104 @@ -560,7 +560,7 @@ A Study on Dependency Tree Kernels for Automatic Extraction of Protein-Protein Interaction Faisal Md.Chowdhury - AlbertoLavelli + AlbertoLavelli AlessandroMoschitti 124–133 W11-0216 @@ -587,11 +587,11 @@ Building Timelines from Narrative Clinical Records: Initial Results Based-on Deep Natural Language Understanding HyuckchulJung - JamesAllen - NateBlaylock - Williamde Beaumont + JamesAllen + NateBlaylock + Williamde Beaumont LucianGalescu - MarySwift + MarySwift 146–154 W11-0219 jung-etal-2011-building @@ -599,7 +599,7 @@ Text Mining Techniques for Leveraging Positively Labeled Data LanaYeganova - Donald C.Comeau + Donald C.Comeau WonKim W. JohnWilbur 155–163 @@ -609,7 +609,7 @@ Parsing Natural Language Queries for Life Science Knowledge TadayoshiHara - YukaTateisi + YukaTateisi Jin-DongKim YusukeMiyao 164–173 @@ -619,7 +619,7 @@ Unlocking Medical Ontologies for Non-Ontology Experts Shao FenLiang - DoniaScott + DoniaScott RobertStevens AlanRector 174–181 @@ -628,8 +628,8 @@ Self-training and co-training in biomedical word sense disambiguation - AntonioJimeno-Yepes - AlanAronson + AntonioJimeno-Yepes + AlanAronson 182–183 W11-0223 jimeno-yepes-aronson-2011-self @@ -637,7 +637,7 @@ Medstract - The Next Generation MarcVerhagen - JamesPustejovsky + JamesPustejovsky 184–185 W11-0224 verhagen-pustejovsky-2011-medstract @@ -657,8 +657,8 @@ Proceedings of the Fifteenth Conference on Computational Natural Language Learning W11-03 - SharonGoldwater - ChristopherManning + SharonGoldwater + ChristopherManning Association for Computational Linguistics
Portland, Oregon, USA
June @@ -671,7 +671,7 @@ Modeling Syntactic Context Improves Morphological Segmentation - Yoong KeokLee + Yoong KeokLee AriaHaghighi ReginaBarzilay 1–9 @@ -687,9 +687,9 @@ <fixed-case>P</fixed-case>unctuation: Making a Point in Unsupervised Dependency Parsing - Valentin I.Spitkovsky - HiyanAlshawi - DanielJurafsky + Valentin I.Spitkovsky + HiyanAlshawi + DanielJurafsky 19–28 W11-0303 spitkovsky-etal-2011-punctuation @@ -728,7 +728,7 @@ Using Sequence Kernels to identify Opinion Entities in <fixed-case>U</fixed-case>rdu SmruthiMukund DebanjanGhosh - RohiniSrihari + RohiniSrihari 58–67 W11-0308 mukund-etal-2011-using @@ -752,9 +752,9 @@ Improving the Impact of Subjectivity Word Sense Disambiguation on Contextual Opinion Analysis CemAkkaya - JanyceWiebe + JanyceWiebe AlexanderConrad - RadaMihalcea + RadaMihalcea 87–96 W11-0311 akkaya-etal-2011-improving @@ -770,7 +770,7 @@ Assessing Benefit from Feature Feedback in Active Learning for Text Classification ShilpaArora - EricNyberg + EricNyberg 106–114 W11-0313 arora-nyberg-2011-assessing @@ -779,7 +779,7 @@ <fixed-case>ULISSE</fixed-case>: an Unsupervised Algorithm for Detecting Reliable Dependency Parses FeliceDell’Orletta GiuliaVenturi - SimonettaMontemagni + SimonettaMontemagni 115–124 W11-0314 dellorletta-etal-2011-ulisse @@ -806,10 +806,10 @@ Using Second-order Vectors in a Knowledge-based Method for Acronym Disambiguation - Bridget T.McInnes + Bridget T.McInnes TedPedersen YingLiu - Serguei V.Pakhomov + Serguei V.Pakhomov Genevieve B.Melton 145–153 W11-0317 @@ -820,7 +820,7 @@ KoheiOzaki MasashiShimbo MamoruKomachi - YujiMatsumoto + YujiMatsumoto 154–162 W11-0318 ozaki-etal-2011-using @@ -860,7 +860,7 @@ Filling the Gap: Semi-Supervised Learning for Opinion Detection Across Domains NingYu - SandraKübler + SandraKübler 200–209 W11-0323 yu-kubler-2011-filling @@ -883,8 +883,8 @@ Composing Simple Image Descriptions using Web-scale N-grams SimingLi GirishKulkarni - Tamara LBerg - Alexander CBerg + Tamara LBerg + Alexander CBerg YejinChoi 220–228 W11-0326 @@ -902,17 +902,17 @@ Learning with Lookahead: Can History-Based Models Rival Globally Optimized Models? YoshimasaTsuruoka YusukeMiyao - Jun’ichiKazama + Jun’ichiKazama 238–246 W11-0328 tsuruoka-etal-2011-learning Learning Discriminative Projections for Text Similarity Measures - Wen-tauYih + Wen-tauYih KristinaToutanova - John C.Platt - ChristopherMeek + John C.Platt + ChristopherMeek 247–256 W11-0329 W11-0329.Presentation.pptx @@ -923,9 +923,9 @@ Proceedings of the 5th Linguistic Annotation Workshop W11-04 - NancyIde - AdamMeyers - SameerPradhan + NancyIde + AdamMeyers + SameerPradhan KatrinTomanek Association for Computational Linguistics
Portland, Oregon, USA
@@ -939,9 +939,9 @@ On the Development of the <fixed-case>RST</fixed-case> <fixed-case>S</fixed-case>panish Treebank - Iriada Cunha - Juan-ManuelTorres-Moreno - GerardoSierra + Iriada Cunha + Juan-ManuelTorres-Moreno + GerardoSierra 1–10 W11-0401 da-cunha-etal-2011-development @@ -949,7 +949,7 @@ <fixed-case>OWL</fixed-case>/<fixed-case>DL</fixed-case> formalization of the <fixed-case>MULTEXT</fixed-case>-East morphosyntactic specifications ChristianChiarcos - TomažErjavec + TomažErjavec 11–20 W11-0402 chiarcos-erjavec-2011-owl @@ -957,8 +957,8 @@ Analysis of the <fixed-case>H</fixed-case>indi <fixed-case>P</fixed-case>roposition <fixed-case>B</fixed-case>ank using Dependency Structure AshwiniVaidya - JinhoChoi - MarthaPalmer + JinhoChoi + MarthaPalmer BhuvanaNarasimhan 21–29 W11-0403 @@ -967,7 +967,7 @@ How Good is the Crowd at “real” <fixed-case>WSD</fixed-case>? JisupHong - Collin F.Baker + Collin F.Baker 30–37 W11-0404 hong-baker-2011-good @@ -991,11 +991,11 @@ A Collaborative Annotation between Human Annotators and a Statistical Parser - Shun’yaIwasawa + Shun’yaIwasawa HirokiHanaoka TakuyaMatsuzaki YusukeMiyao - Jun’ichiTsujii + Jun’ichiTsujii 56–64 W11-0407 iwasawa-etal-2011-collaborative @@ -1003,7 +1003,7 @@ Reducing the Need for Double Annotation DmitriyDligach - MarthaPalmer + MarthaPalmer 65–73 W11-0408 dligach-palmer-2011-reducing @@ -1017,9 +1017,9 @@ A scaleable automated quality assurance technique for semantic representations and proposition banks - K. BretonnelCohen - LawrenceHunter - MarthaPalmer + K. BretonnelCohen + LawrenceHunter + MarthaPalmer 82–91 W11-0410 cohen-etal-2011-scaleable @@ -1027,8 +1027,8 @@ Proposal for an Extension of Traditional Named Entities: From Guidelines to Evaluation, an Overview CyrilGrouin - SophieRosset - PierreZweigenbaum + SophieRosset + PierreZweigenbaum KarënFort OlivierGalibert LudovicQuintard @@ -1039,7 +1039,7 @@ Assessing the practical usability of an automatically annotated corpus Md. Faisal MahbubChowdhury - AlbertoLavelli + AlbertoLavelli 101–109 W11-0412 chowdhury-lavelli-2011-assessing @@ -1047,7 +1047,7 @@ Subjectivity and Sentiment Annotation of <fixed-case>M</fixed-case>odern <fixed-case>S</fixed-case>tandard <fixed-case>A</fixed-case>rabic Newswire MuhammadAbdul-Mageed - MonaDiab + MonaDiab 110–118 W11-0413 abdul-mageed-diab-2011-subjectivity @@ -1055,7 +1055,7 @@ Creating an Annotated <fixed-case>T</fixed-case>amil Corpus as a Discourse Resource Ravi TejaRachakonda - Dipti MisraSharma + Dipti MisraSharma 119–123 W11-0414 rachakonda-sharma-2011-creating @@ -1065,7 +1065,7 @@ SilkeScheible Richard J.Whitt MartinDurrell - PaulBennett + PaulBennett 124–128 W11-0415 scheible-etal-2011-gold @@ -1081,7 +1081,7 @@ Empty Categories in <fixed-case>H</fixed-case>indi Dependency Treebank: Analysis and Recovery ChaitanyaGSK SamarHusain - PrashanthMannem + PrashanthMannem 134–142 W11-0417 gsk-etal-2011-empty @@ -1089,17 +1089,17 @@ Annotating Events, Temporal Expressions and Relations in <fixed-case>I</fixed-case>talian: the It-Timeml Experience for the Ita-<fixed-case>T</fixed-case>ime<fixed-case>B</fixed-case>ank TommasoCaselli - ValentinaBartalesi Lenzi - RacheleSprugnoli - EmanuelePianta - IrinaProdanof + ValentinaBartalesi Lenzi + RacheleSprugnoli + EmanuelePianta + IrinaProdanof 143–151 W11-0418 caselli-etal-2011-annotating Increasing Informativeness in Temporal Annotation - JamesPustejovsky + JamesPustejovsky AmberStubbs 152–160 W11-0419 @@ -1119,7 +1119,7 @@ Proceedings of the Workshop on Automatic Summarization for Different Genres, Media, and Languages W11-05 AniNenkova - JuliaHirschberg + JuliaHirschberg YangLiu Association for Computational Linguistics
Portland, Oregon
@@ -1133,8 +1133,8 @@ Plans Toward Automated Chat Summarization - David C.Uthus - David W.Aha + David C.Uthus + David W.Aha 1–7 W11-0501 uthus-aha-2011-plans @@ -1143,8 +1143,8 @@ Towards Multi-Document Summarization of Scientific Articles:Making Interesting Comparisons with <fixed-case>S</fixed-case>ci<fixed-case>S</fixed-case>umm NitinAgarwal Ravi ShankarReddy - KiranGvr - Carolyn PensteinRosé + KiranGvr + Carolyn PensteinRosé 8–15 W11-0502 agarwal-etal-2011-towards @@ -1152,7 +1152,7 @@ Summarizing Decisions in Spoken Meetings LuWang - ClaireCardie + ClaireCardie 16–24 W11-0503 wang-cardie-2011-summarizing @@ -1160,14 +1160,14 @@ Who wrote What Where: Analyzing the content of human and automatic summaries KarolinaOwczarzak - HoaDang + HoaDang 25–32 W11-0504 owczarzak-dang-2011-wrote <fixed-case>W</fixed-case>iki<fixed-case>T</fixed-case>opics: What is Popular on <fixed-case>W</fixed-case>ikipedia and Why - Byung GyuAhn + Byung GyuAhn BenjaminVan Durme ChrisCallison-Burch 33–40 @@ -1178,8 +1178,8 @@ Abstractive Summarization of Line Graphs from Popular Media CharlesGreenbacker PengWu - SandraCarberry - KathleenMcCoy + SandraCarberry + KathleenMcCoy StephanieElzer 41–48 W11-0506 @@ -1214,7 +1214,7 @@ Testing the Robustness of Online Word Segmentation: Effects of Linguistic Diversity and Phonetic Variation LucBoruta SharonPeperkamp - BenoîtCrabbé + BenoîtCrabbé EmmanuelDupoux 1–9 W11-0601 @@ -1223,37 +1223,37 @@ A <fixed-case>B</fixed-case>ayesian Belief Updating Model of Phonetic Recalibration and Selective Adaptation DaveKleinschmidt - T. FlorianJaeger + T. FlorianJaeger 10–19 W11-0602 kleinschmidt-jaeger-2011-bayesian Unsupervised Syntactic Chunking with Acoustic Cues: Computational Models for Prosodic Bootstrapping - JohnPate - SharonGoldwater + JohnPate + SharonGoldwater 20–29 W11-0603 pate-goldwater-2011-unsupervised A Statistical Test for Grammar - CharlesYang + CharlesYang 30–38 W11-0604 yang-2011-statistical Top-Down Recognizers for <fixed-case>MCFG</fixed-case>s and <fixed-case>MG</fixed-case>s - EdwardStabler + EdwardStabler 39–48 W11-0605 stabler-2011-top Exploring the Relationship Between Learnability and Linguistic Universals - Anna N.Rafferty - Thomas L.Griffiths + Anna N.Rafferty + Thomas L.Griffiths MarcEttlinger 49–57 W11-0606 @@ -1285,9 +1285,9 @@ Classification of Atypical Language in Autism - Emily T.Prud’hommeaux + Emily T.Prud’hommeaux BrianRoark - Lois M.Black + Lois M.Black Janvan Santen 88–96 W11-0610 @@ -1295,7 +1295,7 @@ Colourful Language: Measuring Word-Colour Associations - SaifMohammad + SaifMohammad 97–106 W11-0611 mohammad-2011-colourful @@ -1313,7 +1313,7 @@ Proceedings of the Workshop on Language in Social Media (LSM 2011) W11-07 - MeenakshiNagarajan + MeenakshiNagarajan MichaelGamon Association for Computational Linguistics
Portland, Oregon
@@ -1335,9 +1335,9 @@ How can you say such things?!?: Recognizing Disagreement in Informal Political Argument RobAbbott - MarilynWalker - PranavAnand - Jean E.Fox Tree + MarilynWalker + PranavAnand + Jean E.Fox Tree RobesonBowmani JosephKing 2–11 @@ -1347,7 +1347,7 @@ What pushes their buttons? Predicting comment polarity from the content of political blog posts RamnathBalasubramanyan - William W.Cohen + William W.Cohen DougPierce David P.Redlawsk 12–19 @@ -1359,7 +1359,7 @@ StephanGouws DonaldMetzler CongxingCai - EduardHovy + EduardHovy 20–29 W11-0704 gouws-etal-2011-contextual @@ -1369,8 +1369,8 @@ ApoorvAgarwal BoyiXie IliaVovsha - OwenRambow - RebeccaPassonneau + OwenRambow + RebeccaPassonneau 30–38 W11-0705 agarwal-etal-2011-sentiment @@ -1379,21 +1379,21 @@ Detecting Forum Authority Claims in Online Discussions AlexMarin BinZhang - MariOstendorf + MariOstendorf 39–47 W11-0706 marin-etal-2011-detecting Annotating Social Acts: Authority Claims and Alignment Moves in <fixed-case>W</fixed-case>ikipedia Talk Pages - Emily M.Bender + Emily M.Bender Jonathan T.Morgan MeghanOxley MarkZachry BrianHutchinson AlexMarin BinZhang - MariOstendorf + MariOstendorf 48–57 W11-0707 bender-etal-2011-annotating @@ -1422,7 +1422,7 @@ Language use as a reflection of socialization in online communities DongNguyen - CarolynP. Rosé + CarolynP. Rosé 76–85 W11-0710 nguyen-p-rose-2011-language @@ -1456,15 +1456,15 @@ <fixed-case>MWE</fixed-case>s and Topic Modelling: Enhancing Machine Learning with Linguistics - TimothyBaldwin + TimothyBaldwin 1 W11-0801 baldwin-2011-mwes Automatic Extraction of <fixed-case>NV</fixed-case> Expressions in <fixed-case>B</fixed-case>asque: Basic Issues on Cooccurrence Techniques - AnttonGurrutxaga - IñakiAlegria + AnttonGurrutxaga + IñakiAlegria 2–7 W11-0802 gurrutxaga-alegria-2011-automatic @@ -1473,7 +1473,7 @@ Semantic Clustering: an Attempt to Identify Multiword Expressions in <fixed-case>B</fixed-case>engali TanmoyChakraborty DipankarDas - SivajiBandyopadhyay + SivajiBandyopadhyay 8–13 W11-0803 chakraborty-etal-2011-semantic @@ -1482,14 +1482,14 @@ Decreasing Lexical Data Sparsity in Statistical Syntactic Parsing - Experiments with Named Entities DeirdreHogan JenniferFoster - Josefvan Genabith + Josefvan Genabith 14–19 W11-0804 hogan-etal-2011-decreasing Detecting Multi-Word Expressions Improves Word Sense Disambiguation - MarkFinlayson + MarkFinlayson NidhiKulkarni 20–24 W11-0805 @@ -1498,7 +1498,7 @@ Tree-Rewriting Models of Multi-Word Expressions WilliamSchuler - AravindJoshi + AravindJoshi 25–30 W11-0806 schuler-joshi-2011-tree @@ -1520,7 +1520,7 @@ <fixed-case>MWU</fixed-case>-Aware Part-of-Speech Tagging with a <fixed-case>CRF</fixed-case> Model and Lexical Resources - MatthieuConstant + MatthieuConstant AnthonySigogne 49–56 W11-0809 @@ -1544,9 +1544,9 @@ Identifying and Analyzing <fixed-case>B</fixed-case>razilian <fixed-case>P</fixed-case>ortuguese Complex Predicates - MagaliSanches Duran + MagaliSanches Duran CarlosRamisch - Sandra MariaAluísio + Sandra MariaAluísio AlineVillavicencio 74–82 W11-0812 @@ -1589,7 +1589,7 @@ Detecting Noun Compounds and Light Verb Constructions: a Contrastive Study VeronikaVincze - IstvánNagy T. + IstvánNagy T. GáborBerend 116–121 W11-0817 @@ -1598,7 +1598,7 @@ j<fixed-case>MWE</fixed-case>: A <fixed-case>J</fixed-case>ava Toolkit for Detecting Multi-Word Expressions NidhiKulkarni - MarkFinlayson + MarkFinlayson 122–124 W11-0818 kulkarni-finlayson-2011-jmwe @@ -1606,7 +1606,7 @@ <fixed-case>F</fixed-case>ips<fixed-case>C</fixed-case>o<fixed-case>V</fixed-case>iew: On-line Visualisation of Collocations Extracted from Multilingual Parallel Corpora VioletaSeretan - EricWehrli + EricWehrli 125–127 W11-0819 seretan-wehrli-2011-fipscoview @@ -1623,7 +1623,7 @@ The Ngram Statistics Package (Text::<fixed-case>NSP</fixed-case>) : A Flexible Tool for Identifying Ngrams, Collocations, and Word Associations TedPedersen SatanjeevBanerjee - BridgetMcInnes + BridgetMcInnes SaiyamKohli MaheshJoshi YingLiu @@ -1633,7 +1633,7 @@ Fast and Flexible <fixed-case>MWE</fixed-case> Candidate Generation with the mwetoolkit - VitorDe Araujo + VitorDe Araujo CarlosRamisch AlineVillavicencio 134–136 @@ -1642,7 +1642,7 @@ How Many Multiword Expressions do People Know? - KennethChurch + KennethChurch 137–144 W11-0823 church-2011-many @@ -1654,10 +1654,10 @@ W11-09 Su NamKim ZornitsaKozareva - PreslavNakov + PreslavNakov DiarmuidÓ Séaghdha - SebastianPadó - StanSzpakowicz + SebastianPadó + StanSzpakowicz Association for Computational Linguistics
Portland, Oregon, USA
June @@ -1670,7 +1670,7 @@ Going Beyond Shallow Semantics - MarthaPalmer + MarthaPalmer 1 W11-0901 palmer-2011-going @@ -1681,7 +1681,7 @@ DavidMcClosky MasonSmith AndreyGusev - ChristopherManning + ChristopherManning 2–10 W11-0902 surdeanu-etal-2011-customizing @@ -1689,7 +1689,7 @@ Extraction of Semantic Word Relations in <fixed-case>T</fixed-case>urkish from Dictionary Definitions ŞerbetçiAyşe - OrhanZeynep + OrhanZeynep Pehlivanİlknur 11–18 W11-0903 @@ -1697,10 +1697,10 @@ Identifying Event-Sentiment Association using Lexical Equivalence and Co-reference Approaches - AnupKolya + AnupKolya DipankarDas AsifEkbal - SivajiBandyopadhyay + SivajiBandyopadhyay 19–27 W11-0904 kolya-etal-2011-identifying @@ -1709,15 +1709,15 @@ <fixed-case>V</fixed-case>ig<fixed-case>N</fixed-case>et: Grounding Language in Graphics using Frame Semantics BobCoyne DanielBauer - OwenRambow + OwenRambow 28–36 W11-0905 coyne-etal-2011-vignet Transition-based Semantic Role Labeling Using Predicate Argument Clustering - Jinho D.Choi - MarthaPalmer + Jinho D.Choi + MarthaPalmer 37–45 W11-0906 choi-palmer-2011-transition @@ -1732,15 +1732,15 @@ Desperately Seeking Implicit Arguments in Text SaraTonelli - RodolfoDelmonte + RodolfoDelmonte 54–62 W11-0908 tonelli-delmonte-2011-desperately A Joint Model of Implicit Arguments for Nominal Predicates - MatthewGerber - JoyceChai + MatthewGerber + JoyceChai RobertBart 63–71 W11-0909 @@ -1748,11 +1748,11 @@ Incorporating Coercive Constructions into a Verb Lexicon - ClaireBonial - Susan WindischBrown + ClaireBonial + Susan WindischBrown Jena D.Hwang ChristopherParisien - MarthaPalmer + MarthaPalmer SuzanneStevenson 72–80 W11-0910 @@ -1780,7 +1780,7 @@ Automatic Projection of Semantic Structures: an Application to Pairwise Translation Ranking DanielePighin - LluísMàrquez + LluísMàrquez 1–9 W11-1001 pighin-marquez-2011-automatic @@ -1796,7 +1796,7 @@ Semantic Mapping Using Automatic Word Alignment and Semantic Role Labeling ShuminWu - MarthaPalmer + MarthaPalmer 21–30 W11-1003 wu-palmer-2011-semantic @@ -1821,7 +1821,7 @@ An Evaluation and Possible Improvement Path for Current <fixed-case>SMT</fixed-case> Behavior on Ambiguous Nouns ElsLefever - VéroniqueHoste + VéroniqueHoste 52–60 W11-1006 lefever-hoste-2011-evaluation @@ -1847,14 +1847,14 @@ A Dependency Based Statistical Translation Model GiuseppeAttardi AtanasChanev - Antonio ValerioMiceli Barone + Antonio ValerioMiceli Barone 79–87 W11-1009 attardi-etal-2011-dependency Improving <fixed-case>MT</fixed-case> Word Alignment Using Aligned Multi-Stage Parses - AdamMeyers + AdamMeyers MichikoKosaka ShashaLiao NianwenXue @@ -1865,7 +1865,7 @@ Automatic Category Label Coarsening for Syntax-Based Machine Translation GregHanneman - AlonLavie + AlonLavie 98–106 W11-1011 hanneman-lavie-2011-automatic @@ -1873,26 +1873,26 @@ Utilizing Target-Side Semantic Role Labels to Assist Hierarchical Phrase-based Machine Translation QinGao - StephanVogel + StephanVogel 107–115 W11-1012 gao-vogel-2011-utilizing Combining statistical and semantic approaches to the translation of ontologies and taxonomies - JohnMcCrae + JohnMcCrae MauricioEspinoza ElenaMontiel-Ponsoda - GuadalupeAguado-de-Cea - PhilippCimiano + GuadalupeAguado-de-Cea + PhilippCimiano 116–125 W11-1013 mccrae-etal-2011-combining A Semantic Feature for Statistical Machine Translation - Rafael E.Banchs - Marta R.Costa-jussà + Rafael E.Banchs + Marta R.Costa-jussà 126–134 W11-1014 banchs-costa-jussa-2011-semantic @@ -1901,7 +1901,7 @@ A General-Purpose Rule Extractor for <fixed-case>SCFG</fixed-case>-Based Machine Translation GregHanneman MichelleBurroughs - AlonLavie + AlonLavie 135–144 W11-1015 hanneman-etal-2011-general @@ -1911,10 +1911,10 @@ Proceedings of TextGraphs-6: Graph-based Methods for Natural Language Processing W11-11 - IrinaMatveeva + IrinaMatveeva AlessandroMoschitti - LluísMàrquez - FabioMassimo Zanzotto + LluísMàrquez + FabioMassimo Zanzotto Association for Computational Linguistics
Portland, Oregon
June @@ -1944,7 +1944,7 @@
Invariants and Variability of Synonymy Networks: Self Mediated Agreement by Confluence - BenoîtGaillard + BenoîtGaillard BrunoGaume EmmanuelNavarro 15–23 @@ -1961,7 +1961,7 @@ Using a <fixed-case>W</fixed-case>ikipedia-based Semantic Relatedness Measure for Document Clustering MajidYazdani - AndreiPopescu-Belis + AndreiPopescu-Belis 29–36 W11-1105 yazdani-popescu-belis-2011-using @@ -1970,15 +1970,15 @@ <fixed-case>G</fixed-case>rawl<fixed-case>TCQ</fixed-case>: Terminology and Corpora Building by Ranking Simultaneously Terms, Queries and Documents using Graph Random Walks XavierTannier JavierCouto - Clémentde Groc + Clémentde Groc 37–41 W11-1106 tannier-etal-2011-grawltcq Simultaneous Similarity Learning and Feature-Weight Learning for Document Clustering - PradeepMuthukrishnan - DragomirRadev + PradeepMuthukrishnan + DragomirRadev QiaozhuMei 42–50 W11-1107 @@ -1987,14 +1987,14 @@ Unrestricted Quantifier Scope Disambiguation MehdiManshadi - JamesAllen + JamesAllen 51–59 W11-1108 manshadi-allen-2011-unrestricted From ranked words to dependency trees: two-stage unsupervised non-projective dependency parsing - AndersSøgaard + AndersSøgaard 60–68 W11-1109 sogaard-2011-ranked @@ -2004,7 +2004,7 @@ Proceedings of the 4th Workshop on Building and Using Comparable Corpora: Comparable Corpora and the Web W11-12 - PierreZweigenbaum + PierreZweigenbaum ReinhardRapp SergeSharoff Association for Computational Linguistics @@ -2027,7 +2027,7 @@ The Copiale Cipher KevinKnight - BeátaMegyesi + BeátaMegyesi ChristianeSchaefer 2–9 W11-1202 @@ -2037,7 +2037,7 @@ Learning the Optimal Use of Dependency-parsing Information for Finding Translations with Comparable Corpora DanielAndrade TakuyaMatsuzaki - Jun’ichiTsujii + Jun’ichiTsujii 10–18 W11-1203 andrade-etal-2011-learning @@ -2046,7 +2046,7 @@ Building and Using Comparable Corpora for Domain-Specific Bilingual Lexicon Extraction DarjaFišer NikolaLjubešić - ŠpelaVintar + ŠpelaVintar SenjaPollak 19–26 W11-1204 @@ -2064,7 +2064,7 @@ Bilingual Lexicon Extraction from Comparable Corpora as Metasearch AmirHazem EmmanuelMorin - SebastianPeña Saldarriaga + SebastianPeña Saldarriaga 35–43 W11-1206 hazem-etal-2011-bilingual @@ -2072,7 +2072,7 @@ Two Ways to Use a Noisy Parallel News Corpus for Improving Statistical Machine Translation SouhirGahbiche-Braham - HélèneBonneau-Maynard + HélèneBonneau-Maynard FrançoisYvon 44–51 W11-1207 @@ -2089,7 +2089,7 @@ Extracting Parallel Phrases from Comparable Data SanjikaHewavitharana - StephanVogel + StephanVogel 61–68 W11-1209 hewavitharana-vogel-2011-extracting @@ -2098,8 +2098,8 @@ Active Learning with Multiple Annotations for Comparable Data Classification Task VamshiAmbati SanjikaHewavitharana - StephanVogel - JaimeCarbonell + StephanVogel + JaimeCarbonell 69–77 W11-1210 ambati-etal-2011-active @@ -2109,7 +2109,7 @@ BrunoCartoni SandrineZufferey ThomasMeyer - AndreiPopescu-Belis + AndreiPopescu-Belis 78–86 W11-1211 cartoni-etal-2011-comparable @@ -2117,14 +2117,14 @@ Identifying Parallel Documents from a Large Bilingual Collection of Texts: Application to Parallel Article Extraction in <fixed-case>W</fixed-case>ikipedia. AlexandrePatry - PhilippeLanglais + PhilippeLanglais 87–95 W11-1212 patry-langlais-2011-identifying Comparable Fora - JohankaSpoustová + JohankaSpoustová MiroslavSpousta 96–101 W11-1213 @@ -2132,7 +2132,7 @@ Unsupervised Alignment of Comparable Data and Text Resources - AnjaBelz + AnjaBelz EricKow 102–109 W11-1214 @@ -2140,14 +2140,14 @@ Cross-lingual Slot Filling from Comparable Corpora - MatthewSnover + MatthewSnover XiangLi Wen-PinLin ZhengChen SuzanneTamang MingminGe AdamLee - QiLi + QiLi HaoLi SamAnzaroot HengJi @@ -2157,7 +2157,7 @@ Towards a Data Model for the Universal Corpus - StevenAbney + StevenAbney StevenBird 120–127 W11-1216 @@ -2166,7 +2166,7 @@ An Expectation Maximization Algorithm for Textual Unit Alignment RaduIon - AlexandruCeauşu + AlexandruCeauşu ElenaIrimia 128–135 W11-1217 @@ -2193,7 +2193,7 @@ Proceedings of the Workshop on Distributional Semantics and Compositionality W11-13 - ChrisBiemann + ChrisBiemann EugenieGiesbrecht Association for Computational Linguistics
Portland, Oregon, USA
@@ -2207,8 +2207,8 @@ (Linear) Maps of the Impossible: Capturing Semantic Anomalies in Distributional Space - Eva MariaVecchi - MarcoBaroni + Eva MariaVecchi + MarcoBaroni RobertoZamparelli 1–9 W11-1301 @@ -2216,7 +2216,7 @@ Distributed Structures and Distributional Meaning - Fabio MassimoZanzotto + Fabio MassimoZanzotto LorenzoDell’Arciprete 10–15 W11-1302 @@ -2224,7 +2224,7 @@ Two Multivariate Generalizations of Pointwise Mutual Information - TimVan de Cruys + TimVan de Cruys 16–20 W11-1303 van-de-cruys-2011-two @@ -2239,10 +2239,10 @@ Shared Task System Description: Frustratingly Hard Compositionality Prediction - AndersJohannsen - HectorMartinez + AndersJohannsen + HectorMartinez ChristianRishøj - AndersSøgaard + AndersSøgaard 29–32 W11-1305 johannsen-etal-2011-shared @@ -2260,7 +2260,7 @@ SantanuPal TapabrataMondal TanikSaikh - SivajuBandyopadhyay + SivajuBandyopadhyay 38–42 W11-1307 chakraborty-etal-2011-shared @@ -2268,14 +2268,14 @@ Detecting Compositionality Using Semantic Vector Space Models Based on Syntactic Context. Shared Task System Description GuillermoGarrido - AnselmoPeñas + AnselmoPeñas 43–47 W11-1308 garrido-penas-2011-detecting Measuring the Compositionality of Collocations via Word Co-occurrence Vectors: Shared Task System Description - AlfredoMaldonado-Guerra + AlfredoMaldonado-Guerra MartinEmms 48–53 W11-1309 @@ -2284,7 +2284,7 @@ Exemplar-Based Word-Space Model for Compositionality Detection: Shared Task System Description SivaReddy - DianaMcCarthy + DianaMcCarthy SureshManandhar SpandanaGella 54–60 @@ -2296,7 +2296,7 @@ Proceedings of the Sixth Workshop on Innovative Use of NLP for Building Educational Applications W11-14 - JoelTetreault + JoelTetreault JillBurstein ClaudiaLeacock Association for Computational Linguistics @@ -2313,7 +2313,7 @@ Automatic Question Generation using Discourse Cues ManishAgarwal RakshitShah - PrashanthMannem + PrashanthMannem 1–9 W11-1401 agarwal-etal-2011-automatic @@ -2321,14 +2321,14 @@ Understanding Differences in Perceived Peer-Review Helpfulness using Natural Language Processing WentingXiong - DianeLitman + DianeLitman 10–19 W11-1402 xiong-litman-2011-understanding Generating Varied Narrative Probability Exercises - MariëtTheune + MariëtTheune RoanBoer Rookhuiszen Rieksop den Akker HannekeGeerlings @@ -2348,7 +2348,7 @@ Detecting Structural Events for Assessing Non-Native Speech LeiChen - Su-YounYoon + Su-YounYoon 38–45 W11-1405 chen-yoon-2011-detecting @@ -2366,7 +2366,7 @@ Automatic Gap-fill Question Generation from Text Books ManishAgarwal - PrashanthMannem + PrashanthMannem 56–64 W11-1407 agarwal-mannem-2011-automatic @@ -2409,11 +2409,11 @@ <fixed-case>GRASP</fixed-case>: Grammar- and Syntax-based Pattern-Finder in <fixed-case>CALL</fixed-case> - Chung-ChiHuang - Mei-HuaChen - Shih-TingHuang + Chung-ChiHuang + Mei-HuaChen + Shih-TingHuang Hsien-ChinLiou - Jason S.Chang + Jason S.Chang 96–104 W11-1412 huang-etal-2011-grasp @@ -2428,8 +2428,8 @@ Generating Concept Map Exercises from Textbooks - AndrewOlney - WhitneyCade + AndrewOlney + WhitneyCade ClaireWilliams 111–119 W11-1414 @@ -2438,7 +2438,7 @@ Readability Annotation: Replacing the Expert by the Crowd Philipvan Oosten - VéroniqueHoste + VéroniqueHoste 120–129 W11-1415 van-oosten-hoste-2011-readability @@ -2454,7 +2454,7 @@ Predicting Change in Student Motivation by Measuring Cohesion between Tutor and Student ArthurWard - DianeLitman + DianeLitman MaxineEskenazi 136–141 W11-1417 @@ -2470,7 +2470,7 @@ Non-scorable Response Detection for Automated Speaking Proficiency Assessment - Su-YounYoon + Su-YounYoon KeelanEvanini KlausZechner 152–160 @@ -2479,7 +2479,7 @@ Non-<fixed-case>E</fixed-case>nglish Response Detection Method for Automated Proficiency Scoring System - Su-YounYoon + Su-YounYoon DerrickHiggins 161–169 W11-1420 @@ -2488,8 +2488,8 @@ Bilingual Random Walk Models for Automated Grammar Correction of <fixed-case>ESL</fixed-case> Author-Produced Text RandyWest - Y. AlbertPark - RogerLevy + Y. AlbertPark + RogerLevy 170–179 W11-1421 west-etal-2011-bilingual @@ -2506,7 +2506,7 @@ Proceedings of the 5th ACL-HLT Workshop on Language Technology for Cultural Heritage, Social Sciences, and Humanities W11-15 - KalliopiZervanou + KalliopiZervanou PiroskaLendvai Association for Computational Linguistics
Portland, OR, USA
@@ -2520,9 +2520,9 @@ Extending the tool, or how to annotate historical language varieties - CristinaSánchez-Marco - GemmaBoleda - LluísPadró + CristinaSánchez-Marco + GemmaBoleda + LluísPadró 1–9 W11-1501 sanchez-marco-etal-2011-extending @@ -2541,7 +2541,7 @@ SilkeScheible Richard J.Whitt MartinDurrell - PaulBennett + PaulBennett 19–23 W11-1503 scheible-etal-2011-evaluating @@ -2556,14 +2556,14 @@ Automatic linguistic annotation of historical language: <fixed-case>T</fixed-case>o<fixed-case>T</fixed-case>r<fixed-case>T</fixed-case>a<fixed-case>L</fixed-case>e and <fixed-case>XIX</fixed-case> century <fixed-case>S</fixed-case>lovene - TomažErjavec + TomažErjavec 33–38 W11-1505 erjavec-2011-automatic Historical Event Extraction from Text - Agata KatarzynaCybulska + Agata KatarzynaCybulska PiekVossen 39–43 W11-1506 @@ -2573,7 +2573,7 @@ Enrichment and Structuring of Archival Description Metadata KalliopiZervanou IoannisKorkontzelos - Antalvan den Bosch + Antalvan den Bosch SophiaAnaniadou 44–53 W11-1507 @@ -2581,10 +2581,10 @@ Structure-Preserving Pipelines for Digital Libraries - MassimoPoesio + MassimoPoesio EduardBarbu - EgonStemle - ChristianGirardi + EgonStemle + ChristianGirardi 54–62 W11-1508 poesio-etal-2011-structure @@ -2602,12 +2602,12 @@ Crowdsourcing syntactic relatedness judgements for opinion mining in the study of information technology adoption - Asad B.Sayeed + Asad B.Sayeed BryanRusk MartinPetrov Hieu C.Nguyen Timothy J.Meyer - AmyWeinberg + AmyWeinberg 69–77 W11-1510 sayeed-etal-2011-crowdsourcing @@ -2632,14 +2632,14 @@ Topic Modeling on Historical Newspapers Tze-IYang AndrewTorget - RadaMihalcea + RadaMihalcea 96–104 W11-1513 yang-etal-2011-topic From Once Upon a Time to Happily Ever After: Tracking Emotions in Novels and Fairy Tales - SaifMohammad + SaifMohammad 105–114 W11-1514 mohammad-2011-upon @@ -2647,8 +2647,8 @@ Author Age Prediction from Text using Linear Regression DongNguyen - Noah A.Smith - Carolyn P.Rosé + Noah A.Smith + Carolyn P.Rosé 115–123 W11-1515 nguyen-etal-2011-author @@ -2657,8 +2657,8 @@ A Study of Academic Collaborations in Computational Linguistics using a Latent Mixture of Authors Model NikhilJohri DanielRamage - DanielMcFarland - DanielJurafsky + DanielMcFarland + DanielJurafsky 124–132 W11-1516 johri-etal-2011-study @@ -2682,7 +2682,7 @@ Learning to Simplify Sentences Using <fixed-case>W</fixed-case>ikipedia - WillCoster + WillCoster DavidKauchak 1–9 W11-1601 @@ -2710,8 +2710,8 @@ Comparing Phrase-based and Syntax-based Paraphrase Generation SanderWubben ErwinMarsi - Antalvan den Bosch - EmielKrahmer + Antalvan den Bosch + EmielKrahmer 27–33 W11-1604 wubben-etal-2011-comparing @@ -2727,7 +2727,7 @@ Towards Strict Sentence Intersection: Decoding and Evaluation Strategies KapilThadani - KathleenMcKeown + KathleenMcKeown 43–53 W11-1606 thadani-mckeown-2011-towards @@ -2751,7 +2751,7 @@ Creating Disjunctive Logical Forms from Aligned Sentences for Grammar-Based Paraphrase Generation ScottMartin - MichaelWhite + MichaelWhite 74–83 W11-1609 martin-white-2011-creating @@ -2780,10 +2780,10 @@ Proceedings of the 2nd Workshop on Computational Approaches to Subjectivity and Sentiment Analysis (WASSA 2.011) W11-17 - AlexandraBalahur + AlexandraBalahur EsterBoldrini - AndresMontoyo - PatricioMartinez-Barco + AndresMontoyo + PatricioMartinez-Barco Association for Computational Linguistics
Portland, Oregon
June @@ -2796,10 +2796,10 @@ Cats Rule and Dogs Drool!: Classifying Stance in Online Debate - PranavAnand - MarilynWalker + PranavAnand + MarilynWalker RobAbbott - Jean E.Fox Tree + Jean E.Fox Tree RobesonBowmani MichaelMinor 1–9 @@ -2816,8 +2816,8 @@ Experiments with a Differential Semantics Annotation for <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et 3.0 - DanTufiş - DanŞtefănescu + DanTufiş + DanŞtefănescu 19–27 W11-1703 tufis-stefanescu-2011-experiments @@ -2828,12 +2828,12 @@ PolinaLenkova MohamedEbrahim MaudEhrmann - AliHurriyetoglu - MijailKabadjov + AliHurriyetoglu + MijailKabadjov RalfSteinberger - HristoTanev + HristoTanev VanniZavarella - SilviaVázquez + SilviaVázquez 28–36 W11-1704 steinberger-etal-2011-creating @@ -2841,7 +2841,7 @@ Generating Semantic Orientation Lexicon using Large Data and Thesaurus AmitGoyal - HalDaumé + HalDaumé 37–43 W11-1705 goyal-daume-2011-generating @@ -2867,14 +2867,14 @@ A Link to the Past: Constructing Historical Social Networks Matjevan de Camp - Antalvan den Bosch + Antalvan den Bosch 61–69 W11-1708 van-de-camp-van-den-bosch-2011-link Tracking Sentiment in Mail: How Genders Differ on Emotional Axes - SaifMohammad + SaifMohammad TonyYang 70–79 W11-1709 @@ -2884,8 +2884,8 @@ Developing <fixed-case>J</fixed-case>apanese <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et Affect for Analyzing Emotions YoshimitsuTorii DipankarDas - SivajiBandyopadhyay - ManabuOkumura + SivajiBandyopadhyay + ManabuOkumura 80–86 W11-1710 torii-etal-2011-developing @@ -2910,7 +2910,7 @@ Automatic Emotion Classification for Interpersonal Communication FrederikVaassen - WalterDaelemans + WalterDaelemans 104–110 W11-1713 vaassen-daelemans-2011-automatic @@ -2919,7 +2919,7 @@ Automatic Sentiment Classification of Product Reviews Using Maximal Phrases Based Analysis MariaTchalakova DaleGerdemann - DetmarMeurers + DetmarMeurers 111–117 W11-1714 tchalakova-etal-2011-automatic @@ -2944,17 +2944,17 @@ Robust Sense-based Sentiment Classification - BalamuraliAR + BalamuraliAR AdityaJoshi - PushpakBhattacharyya + PushpakBhattacharyya 132–138 W11-1717 ar-etal-2011-robust Sentiment Classification Using Semantic Features Extracted from <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et-based Resources - YoanGutiérrez - SoniaVázquez + YoanGutiérrez + SoniaVázquez AndrésMontoyo 139–145 W11-1718 @@ -2963,7 +2963,7 @@ On the Difficulty of Clustering Microblog Texts for Online Reputation Management FernandoPerez-Tellez - DavidPinto + DavidPinto JohnCardiff PaoloRosso 146–152 @@ -2993,7 +2993,7 @@ Towards a Unified Approach for Opinion Question Answering and Summarization ElenaLloret AlexandraBalahur - ManuelPalomar + ManuelPalomar AndrésMontoyo 168–174 W11-1722 @@ -3018,13 +3018,13 @@ Sentimatrix – Multilingual Sentiment Analysis Service - Alexandru-LucianGînscă - EmanuelaBoroș + Alexandru-LucianGînscă + EmanuelaBoroș AdrianIftene - DianaTrandabăț + DianaTrandabăț MihaiToader MariusCorîci - Cenel-AugustoPerez + Cenel-AugustoPerez DanCristea 189–195 W11-1725 @@ -3035,7 +3035,7 @@ Proceedings of BioNLP Shared Task 2011 Workshop W11-18 - Jun’ichiTsujii + Jun’ichiTsujii Jin-DongKim SampoPyysalo Association for Computational Linguistics @@ -3106,7 +3106,7 @@ Event Extraction as Dependency Parsing for <fixed-case>B</fixed-case>io<fixed-case>NLP</fixed-case> 2011 DavidMcClosky MihaiSurdeanu - ChristopherManning + ChristopherManning 41–45 W11-1806 mcclosky-etal-2011-event-extraction @@ -3125,7 +3125,7 @@ DavidMcClosky MihaiSurdeanu AndrewMcCallum - Christopher D.Manning + Christopher D.Manning 51–55 W11-1808 riedel-etal-2011-model @@ -3136,7 +3136,7 @@ JulienJourde PhilippeBessières Maartenvan de Guchte - ClaireNédellec + ClaireNédellec 56–64 W11-1809 bossy-etal-2011-bionlp @@ -3148,7 +3148,7 @@ PhilippeVeber KarënFort RobertBossy - ErickAlphonse + ErickAlphonse PhilippeBessières 65–73 W11-1810 @@ -3175,7 +3175,7 @@ The Taming of Reconcile as a Biomedical Coreference Resolver YoungjunKim - EllenRiloff + EllenRiloff NathanGilbert 89–93 W11-1813 @@ -3195,7 +3195,7 @@ WiktoriaGolik PierreWarnier PhilippeVeber - ClaireNédellec + ClaireNédellec 102–111 W11-1815 ratkovic-etal-2011-bionlp @@ -3215,7 +3215,7 @@ Sentence Filtering for <fixed-case>B</fixed-case>io<fixed-case>NLP</fixed-case>: Searching for Renaming Acts PierreWarnier - ClaireNédellec + ClaireNédellec 121–129 W11-1817 warnier-nedellec-2011-sentence @@ -3224,7 +3224,7 @@ Complex Biological Event Extraction from Full Text using Signatures of Linguistic and Semantic Features Liam R.McGrath KellyDomico - Courtney D.Corley + Courtney D.Corley Bobbie-JoWebb-Robertson 130–137 W11-1818 @@ -3232,11 +3232,11 @@ Using Kybots for Extracting Events in Biomedical Texts - ArantzaCasillas - ArantzaDíaz de Ilarraza - KoldoGojenola - MaiteOronoz - GermanRigau + ArantzaCasillas + ArantzaDíaz de Ilarraza + KoldoGojenola + MaiteOronoz + GermanRigau 138–142 W11-1819 casillas-etal-2011-using @@ -3261,9 +3261,9 @@ A Pattern Approach for Biomedical Event Annotation - QuangLe Minh + QuangLe Minh SonNguyen Truong - QuocHo Bao + QuocHo Bao 149–150 W11-1822 le-minh-etal-2011-pattern @@ -3283,7 +3283,7 @@ Double Layered Learning for Biological Event Extraction from Text EhsanEmadzadeh AzadehNikfarjam - GracielaGonzalez + GracielaGonzalez 153–154 W11-1824 emadzadeh-etal-2011-double @@ -3301,8 +3301,8 @@ From Graphs to Events: A Subgraph Matching Approach for Information Extraction from Biomedical Text HaibinLiu - RavikumarKomandur - KarinVerspoor + RavikumarKomandur + KarinVerspoor 164–172 W11-1826 liu-etal-2011-graphs @@ -3328,7 +3328,7 @@ Proceedings of the Fifteenth Conference on Computational Natural Language Learning: Shared Task W11-19 - SameerPradhan + SameerPradhan Association for Computational Linguistics
Portland, Oregon, USA
June @@ -3342,10 +3342,10 @@ <fixed-case>C</fixed-case>o<fixed-case>NLL</fixed-case>-2011 Shared Task: Modeling Unrestricted Coreference in <fixed-case>O</fixed-case>nto<fixed-case>N</fixed-case>otes SameerPradhan - LanceRamshaw - MitchellMarcus - MarthaPalmer - RalphWeischedel + LanceRamshaw + MitchellMarcus + MarthaPalmer + RalphWeischedel NianwenXue 1–27 W11-1901 @@ -3355,10 +3355,10 @@ <fixed-case>S</fixed-case>tanford’s Multi-Pass Sieve Coreference Resolution System at the <fixed-case>C</fixed-case>o<fixed-case>NLL</fixed-case>-2011 Shared Task HeeyoungLee YvesPeirsman - AngelChang - NathanaelChambers + AngelChang + NathanaelChambers MihaiSurdeanu - DanJurafsky + DanJurafsky 28–34 W11-1902 lee-etal-2011-stanfords @@ -3366,8 +3366,8 @@ <fixed-case>R</fixed-case>elax<fixed-case>C</fixed-case>or Participation in <fixed-case>C</fixed-case>o<fixed-case>NLL</fixed-case> Shared Task on Coreference Resolution EmiliSapena - LluísPadró - JordiTurmo + LluísPadró + JordiTurmo 35–39 W11-1903 sapena-etal-2011-relaxcor @@ -3377,7 +3377,7 @@ Kai-WeiChang RajhansSamdani AllaRozovskaya - NickRizzolo + NickRizzolo MarkSammons DanRoth 40–44 @@ -3394,7 +3394,7 @@ Rule and Tree Ensembles for Unrestricted Coreference Resolution - CiceroNogueira dos Santos + CiceroNogueira dos Santos DaviLopes Carvalho 51–55 W11-1906 @@ -3414,16 +3414,16 @@ OlgaUryupina SriparnaSaha AsifEkbal - MassimoPoesio + MassimoPoesio 61–65 W11-1908 uryupina-etal-2011-multi Combining Syntactic and Semantic Features by <fixed-case>SVM</fixed-case> for Unrestricted Coreference Resolution - HuiweiZhou + HuiweiZhou YaoLi - DegenHuang + DegenHuang YanZhang ChunlongWu YuanshengYang @@ -3434,7 +3434,7 @@ Supervised Coreference Resolution with <fixed-case>SUCRE</fixed-case> HamidrezaKobdani - HinrichSchuetze + HinrichSchuetze 71–75 W11-1910 kobdani-schuetze-2011-supervised @@ -3446,7 +3446,7 @@ FandongMeng YangLiu QunLiu - YajuanLv + YajuanLv 76–80 W11-1911 xiong-etal-2011-ets @@ -3463,25 +3463,25 @@ Narrative Schema as World Knowledge for Coreference Resolution JosephIrwin MamoruKomachi - YujiMatsumoto + YujiMatsumoto 86–92 W11-1913 irwin-etal-2011-narrative Hybrid Approach for Coreference Resolution - SobhaLalitha Devi - Pattabhi RKRao - R. Vijay SundarRam - CS.Malarkodi - A.Akilandeswari + Lalitha DeviSobha + Pattabhi RKRao + R. Vijay SundarRam + CS.Malarkodi + A.Akilandeswari 93–96 W11-1914 sobha-etal-2011-hybrid Poly-co: a multilayer perceptron approach for coreference detection - EricCharton + EricCharton MichelGagnon 97–101 W11-1915 @@ -3509,7 +3509,7 @@ <fixed-case>UBIU</fixed-case>: A Robust System for Resolving Unrestricted Coreference DesislavaZhekova - SandraKübler + SandraKübler 112–116 W11-1918 zhekova-kubler-2011-ubiu @@ -3518,7 +3518,7 @@ A Machine Learning-Based Coreference Detection System for <fixed-case>O</fixed-case>nto<fixed-case>N</fixed-case>otes YaqinYang NianwenXue - PeterAnick + PeterAnick 117–121 W11-1919 yang-etal-2011-machine @@ -3528,7 +3528,7 @@ VeselinStoyanov UdayBabbar PracheerGupta - ClaireCardie + ClaireCardie 122–126 W11-1920 stoyanov-etal-2011-reconciling @@ -3556,10 +3556,10 @@ Proceedings of the SIGDIAL 2011 Conference W11-20 - Joyce Y.Chai - Johanna D.Moore - Rebecca J.Passonneau - David R.Traum + Joyce Y.Chai + Johanna D.Moore + Rebecca J.Passonneau + David R.Traum Association for Computational Linguistics
Portland, Oregon
June @@ -3579,19 +3579,19 @@
Spoken Dialog Challenge 2010: Comparison of Live and Control Test Results - Alan WBlack + Alan WBlack SusanneBurger AlistairConkie - HelenHastie + HelenHastie SimonKeizer OliverLemon NicolasMerigaud GabrielParent GabrielSchubiner BlaiseThomson - Jason D.Williams + Jason D.Williams KaiYu - SteveYoung + SteveYoung MaxineEskenazi 2–7 W11-2002 @@ -3612,7 +3612,7 @@ KazunoriKomatani KyokoMatsuyama KotaroFunakoshi - Hiroshi G.Okuno + Hiroshi G.Okuno 18–29 W11-2004 nakano-etal-2011-two @@ -3636,10 +3636,10 @@ The Impact of Task-Oriented Feature Sets on <fixed-case>HMM</fixed-case>s for Dialogue Modeling - KristyBoyer - Eun YoungHa - RobertPhillips - JamesLester + KristyBoyer + Eun YoungHa + RobertPhillips + JamesLester 49–58 W11-2007 boyer-etal-2011-impact @@ -3663,7 +3663,7 @@ Giving instructions in virtual environments by corpus based selection LucianaBenotti - AlexandreDenis + AlexandreDenis 68–77 W11-2010 benotti-denis-2011-giving @@ -3688,7 +3688,7 @@ Multiparty Turn Taking in Situated Dialog: Study, Lessons, and Directions - DanBohus + DanBohus EricHorvitz 98–109 W11-2013 @@ -3696,10 +3696,10 @@ Stability and Accuracy in Incremental Speech Recognition - EthanSelfridge + EthanSelfridge IkerArizmendi - PeterHeeman - JasonWilliams + PeterHeeman + JasonWilliams 110–119 W11-2014 selfridge-etal-2011-stability @@ -3714,15 +3714,15 @@ An Empirical Evaluation of a Statistical Dialog System in Public Use - JasonWilliams + JasonWilliams 130–141 W11-2016 williams-2011-empirical “The day after the day after tomorrow?” A machine learning approach to adaptive temporal expression generation: training and evaluation with real users - SrinivasanJanarthanam - HelenHastie + SrinivasanJanarthanam + HelenHastie OliverLemon XingkunLiu 142–151 @@ -3732,14 +3732,14 @@ Detecting Levels of Interest from Spoken Dialog with Multistream Prediction Feedback and Similarity Based Hierarchical Fusion Learning William YangWang - JuliaHirschberg + JuliaHirschberg 152–161 W11-2018 wang-hirschberg-2011-detecting Exploring User Satisfaction in a Tutorial Dialogue System - Myroslava O.Dzikovska + Myroslava O.Dzikovska Johanna D.Moore NatalieSteinhauser GwendolynCampbell @@ -3759,7 +3759,7 @@ Topics as Contextual Indicators for Word Choice in <fixed-case>SMS</fixed-case> Conversations UteWinter - RoniBen-Aharon + RoniBen-Aharon DanielChernobrov RonHecht 185–193 @@ -3769,7 +3769,7 @@ Multilingual Annotation and Disambiguation of Discourse Connectives for Machine Translation ThomasMeyer - AndreiPopescu-Belis + AndreiPopescu-Belis SandrineZufferey BrunoCartoni 194–203 @@ -3778,9 +3778,9 @@ Commitments to Preferences in Dialogue - AnaisCadilhac - NicholasAsher - FarahBenamara + AnaisCadilhac + NicholasAsher + FarahBenamara AlexLascarides 204–215 W11-2023 @@ -3789,8 +3789,8 @@ Using Performance Trajectories to Analyze the Immediate Impact of User State Misclassification in an Adaptive Spoken Dialogue System - KateForbes-Riley - DianeLitman + KateForbes-Riley + DianeLitman 216–226 W11-2024 forbes-riley-litman-2011-using @@ -3798,7 +3798,7 @@ Comparing Triggering Policies for Social Behaviors RohitKumar - CarolynRosé + CarolynRosé 227–238 W11-2025 kumar-rose-2011-comparing @@ -3816,7 +3816,7 @@ Rebecca J.Passonneau Susan L.Epstein TizianaLigorio - JoshuaGordon + JoshuaGordon 248–258 W11-2027 passonneau-etal-2011-embedded @@ -3827,7 +3827,7 @@ EtsuoMizukami YoshinoriShiga ShinichiKawamoto - HisashiKawai + HisashiKawai SatoshiNakamura 259–265 W11-2028 @@ -3835,7 +3835,7 @@ Learning to Balance Grounding Rationales for Dialogue Systems - JoshuaGordon + JoshuaGordon Rebecca J.Passonneau Susan L.Epstein 266–271 @@ -3901,7 +3901,7 @@ Examining the Impacts of Dialogue Content and System Automation on Affect Models in a Spoken Tutorial Dialogue System JoannaDrummond - DianeLitman + DianeLitman 312–318 W11-2036 drummond-litman-2011-examining @@ -3925,8 +3925,8 @@ An Incremental Architecture for the Semantic Annotation of Dialogue Corpora with High-Level Structures. A case of study for the <fixed-case>MEDIA</fixed-case> corpus. - Lina MariaRojas-Barahona - MatthieuQuignard + Lina MariaRojas-Barahona + MatthieuQuignard 332–334 W11-2039 rojas-barahona-quignard-2011-incremental @@ -3941,7 +3941,7 @@ Beetle <fixed-case>II</fixed-case>: an adaptable tutorial dialogue system - MyroslavaDzikovska + MyroslavaDzikovska AmyIsard PeterBell JohannaMoore @@ -3968,8 +3968,8 @@ <fixed-case>POMY</fixed-case>: A Conversational Virtual Environment for Language Learning in <fixed-case>POSTECH</fixed-case> HyungjongNoh KyusongLee - SungjinLee - Gary GeunbaeLee + SungjinLee + Gary GeunbaeLee 344–346 W11-2043 noh-etal-2011-pomy @@ -3986,7 +3986,7 @@ A Just-in-Time Document Retrieval System for Dialogues or Monologues - AndreiPopescu-Belis + AndreiPopescu-Belis MajidYazdani AlexandreNanchen Philip N.Garner @@ -4002,7 +4002,7 @@ ChrisCallison-Burch PhilippKoehn ChristofMonz - Omar F.Zaidan + Omar F.Zaidan Association for Computational Linguistics
Edinburgh, Scotland
July @@ -4015,7 +4015,7 @@ A Grain of Salt for the <fixed-case>WMT</fixed-case> Manual Evaluation - OndřejBojar + OndřejBojar MilošErcegovčević MartinPopel OmarZaidan @@ -4028,9 +4028,9 @@ DavidTalbot HidetoKazawa HiroshiIchikawa - JasonKatz-Brown + JasonKatz-Brown MasakazuSeno - FranzOch + FranzOch 12–21 W11-2102 talbot-etal-2011-lightweight @@ -4048,7 +4048,7 @@ Evaluate with Confidence Estimation: Machine ranking of translation outputs using grammatical features EleftheriosAvramidis - MajaPopovic + MajaPopovic DavidVilar AljoschaBurchardt 65–70 @@ -4075,7 +4075,7 @@ Meteor 1.3: Automatic Metric for Reliable Optimization and Evaluation of Machine Translation Systems MichaelDenkowski - AlonLavie + AlonLavie 85–91 W11-2107 denkowski-lavie-2011-meteor @@ -4083,14 +4083,14 @@ Approximating a Deep-Syntactic Metric for <fixed-case>MT</fixed-case> Evaluation and Tuning MatoušMacháček - OndřejBojar + OndřejBojar 92–98 W11-2108 machacek-bojar-2011-approximating Evaluation without references: <fixed-case>IBM</fixed-case>1 scores as evaluation metrics - MajaPopović + MajaPopović DavidVilar EleftheriosAvramidis AljoschaBurchardt @@ -4100,7 +4100,7 @@ Morphemes and <fixed-case>POS</fixed-case> tags for n-gram based evaluation metrics - MajaPopović + MajaPopović 104–107 W11-2110 popovic-2011-morphemes @@ -4108,9 +4108,9 @@ <fixed-case>E</fixed-case>-rating Machine Translation KristenParton - JoelTetreault + JoelTetreault NitinMadnani - MartinChodorow + MartinChodorow 108–115 W11-2111 parton-etal-2011-e @@ -4127,7 +4127,7 @@ Regression and Ranking based Optimisation for Sentence Level <fixed-case>MT</fixed-case> Evaluation XingyiSong - TrevorCohn + TrevorCohn 123–129 W11-2113 song-cohn-2011-regression @@ -4148,8 +4148,8 @@ The <fixed-case>UPV</fixed-case>-<fixed-case>PRHLT</fixed-case> combination system for <fixed-case>WMT</fixed-case> 2011 - JesúsGonzález-Rubio - FranciscoCasacuberta + JesúsGonzález-Rubio + FranciscoCasacuberta 140–144 W11-2116 gonzalez-rubio-casacuberta-2011-upv @@ -4157,7 +4157,7 @@ <fixed-case>CMU</fixed-case> System Combination in <fixed-case>WMT</fixed-case> 2011 KennethHeafield - AlonLavie + AlonLavie 145–151 W11-2117 heafield-lavie-2011-cmu @@ -4166,17 +4166,17 @@ The <fixed-case>RWTH</fixed-case> System Combination System for <fixed-case>WMT</fixed-case> 2011 GregorLeusch MarkusFreitag - HermannNey + HermannNey 152–158 W11-2118 leusch-etal-2011-rwth Expected <fixed-case>BLEU</fixed-case> Training for Graphs: <fixed-case>BBN</fixed-case> System Description for <fixed-case>WMT</fixed-case>11 System Combination Task - Antti-VeikkoRosti + Antti-VeikkoRosti BingZhang SpyrosMatsoukas - RichardSchwartz + RichardSchwartz 159–165 W11-2119 rosti-etal-2011-expected @@ -4217,8 +4217,8 @@ Wider Context by Using Bilingual Language Models in Machine Translation JanNiehues TeresaHerrmann - StephanVogel - AlexWaibel + StephanVogel + AlexWaibel 198–206 W11-2124 niehues-etal-2011-wider @@ -4226,7 +4226,7 @@ A Minimally Supervised Approach for Detecting and Ranking Document Translation Pairs KristeKrstovski - David A.Smith + David A.Smith 207–216 W11-2125 krstovski-smith-2011-minimally @@ -4243,7 +4243,7 @@ Fuzzy Syntactic Reordering for Phrase-based Statistical Machine Translation JacobAndreas NizarHabash - OwenRambow + OwenRambow 227–236 W11-2127 andreas-etal-2011-fuzzy @@ -4276,7 +4276,7 @@ Instance Selection for Machine Translation using Feature Decay Algorithms - ErgunBiçici + ErgunBiçici DenizYuret 272–283 W11-2131 @@ -4286,7 +4286,7 @@ Investigations on Translation Model Adaptation Using Monolingual Data PatrikLambert HolgerSchwenk - ChristopheServan + ChristopheServan SadafAbdul-Rauf 284–293 W11-2132 @@ -4311,16 +4311,16 @@ <fixed-case>LIMSI</fixed-case> @ <fixed-case>WMT</fixed-case>11 AlexandreAllauzen - HélèneBonneau-Maynard - Hai-SonLe + HélèneBonneau-Maynard + Hai-SonLe AurélienMax GuillaumeWisniewski FrançoisYvon - GillesAdda - Josep MariaCrego + GillesAdda + Josep MariaCrego AdrienLardilleux ThomasLavergne - ArtemSokolov + ArtemSokolov 309–315 W11-2135 allauzen-etal-2011-limsi @@ -4336,7 +4336,7 @@ <fixed-case>R</fixed-case>eg<fixed-case>MT</fixed-case> System for Machine Translation, System Combination, and Evaluation - ErgunBiçici + ErgunBiçici DenizYuret 323–329 W11-2137 @@ -4344,7 +4344,7 @@ Improving Translation Model by Monolingual Data - OndřejBojar + OndřejBojar AlešTamchyna 330–336 W11-2138 @@ -4352,10 +4352,10 @@ The <fixed-case>CMU</fixed-case>-<fixed-case>ARK</fixed-case> <fixed-case>G</fixed-case>erman-<fixed-case>E</fixed-case>nglish Translation System - ChrisDyer + ChrisDyer KevinGimpel - Jonathan H.Clark - Noah A.Smith + Jonathan H.Clark + Noah A.Smith 337–343 W11-2139 dyer-etal-2011-cmu @@ -4383,14 +4383,14 @@ GregorLeusch JoernWuebker StephanPeitz - HermannNey + HermannNey TeresaHerrmann JanNiehues - AlexWaibel + AlexWaibel AlexandreAllauzen - GillesAdda - Josep MariaCrego - BiankaBuschbeck + GillesAdda + Josep MariaCrego + BiankaBuschbeck TonioWandmacher JeanSenellart 358–364 @@ -4400,7 +4400,7 @@ <fixed-case>CMU</fixed-case> Syntax-Based Machine Translation at <fixed-case>WMT</fixed-case> 2011 GregHanneman - AlonLavie + AlonLavie 365–371 W11-2143 hanneman-lavie-2011-cmu @@ -4408,7 +4408,7 @@ The <fixed-case>U</fixed-case>ppsala-<fixed-case>FBK</fixed-case> systems at <fixed-case>WMT</fixed-case> 2011 ChristianHardmeier - JörgTiedemann + JörgTiedemann MarkusSaers MarcelloFederico MathurPrashant @@ -4421,7 +4421,7 @@ TeresaHerrmann MohammedMediani JanNiehues - AlexWaibel + AlexWaibel 379–385 W11-2145 herrmann-etal-2011-karlsruhe @@ -4432,7 +4432,7 @@ NguyenBach QinGao VamshiAmbati - StephanVogel + StephanVogel 386–392 W11-2146 hewavitharana-etal-2011-cmu @@ -4469,7 +4469,7 @@ ArnaudDagnelies SaabMansour GregorLeusch - HermannNey + HermannNey 405–412 W11-2149 huck-etal-2011-rwth @@ -4477,7 +4477,7 @@ <fixed-case>ILLC</fixed-case>-<fixed-case>U</fixed-case>v<fixed-case>A</fixed-case> translation system for <fixed-case>EMNLP</fixed-case>-<fixed-case>WMT</fixed-case> 2011 MaximKhalilov - KhalilSima’an + KhalilSima’an 413–419 W11-2150 khalilov-simaan-2011-illc @@ -4485,7 +4485,7 @@ <fixed-case>UPM</fixed-case> system for the translation task VerónicaLópez-Ludeña - RubénSan-Segundo + RubénSan-Segundo 420–425 W11-2151 lopez-ludena-san-segundo-2011-upm @@ -4495,7 +4495,7 @@ DavidMareček RudolfRosa PetraGaluščáková - OndřejBojar + OndřejBojar 426–432 W11-2152 marecek-etal-2011-two @@ -4505,7 +4505,7 @@ MartinPopel DavidMareček NathanGreen - ZdeněkŽabokrtský + ZdeněkŽabokrtský 433–439 W11-2153 popel-etal-2011-influence @@ -4513,12 +4513,12 @@ The <fixed-case>LIGA</fixed-case> (<fixed-case>LIG</fixed-case>/<fixed-case>LIA</fixed-case>) Machine Translation System for <fixed-case>WMT</fixed-case> 2011 MarionPotet - RaphaëlRubino + RaphaëlRubino BenjaminLecouteux StéphaneHuet - LaurentBesacier - HervéBlanchon - FabriceLefèvre + LaurentBesacier + HervéBlanchon + FabriceLefèvre 440–446 W11-2154 potet-etal-2011-liga @@ -4526,22 +4526,22 @@ Factored Translation with Unsupervised Word Clusters ChristianRishøj - AndersSøgaard + AndersSøgaard 447–451 W11-2155 rishoj-sogaard-2011-factored The <fixed-case>BM</fixed-case>-<fixed-case>I</fixed-case>2<fixed-case>R</fixed-case> <fixed-case>H</fixed-case>aitian-Créole-to-<fixed-case>E</fixed-case>nglish translation system description for the <fixed-case>WMT</fixed-case> 2011 evaluation campaign - MartaR. Costa-jussà - Rafael E.Banchs + MartaR. Costa-jussà + Rafael E.Banchs 452–456 W11-2156 r-costa-jussa-banchs-2011-bm The <fixed-case>U</fixed-case>niversitat d’Alacant hybrid machine translation system for <fixed-case>WMT</fixed-case> 2011 - Víctor M.Sánchez-Cartagena + Víctor M.Sánchez-Cartagena FelipeSánchez-Martínez Juan AntonioPérez-Ortiz 457–463 @@ -4553,7 +4553,7 @@ HolgerSchwenk PatrikLambert LoïcBarrault - ChristopheServan + ChristopheServan SadafAbdul-Rauf HaithemAfli KashifShah @@ -4583,7 +4583,7 @@ <fixed-case>DFKI</fixed-case> Hybrid Machine Translation System for <fixed-case>WMT</fixed-case> 2011 - On the Integration of <fixed-case>SMT</fixed-case> and <fixed-case>RBMT</fixed-case> JiaXu HansUszkoreit - CaseyKennington + CaseyKennington DavidVilar XiaojunZhang 485–489 @@ -4593,23 +4593,23 @@ <fixed-case>CEU</fixed-case>-<fixed-case>UPV</fixed-case> <fixed-case>E</fixed-case>nglish-<fixed-case>S</fixed-case>panish system for <fixed-case>WMT</fixed-case>11 FranciscoZamora-Martínez - Maria JoseCastro-Bleda + Maria JoseCastro-Bleda 490–495 W11-2162 zamora-martinez-castro-bleda-2011-ceu Hierarchical Phrase-Based <fixed-case>MT</fixed-case> at the <fixed-case>C</fixed-case>harles <fixed-case>U</fixed-case>niversity for the <fixed-case>WMT</fixed-case> 2011 Shared Task - DanielZeman + DanielZeman 496–500 W11-2163 zeman-2011-hierarchical Crisis <fixed-case>MT</fixed-case>: Developing A Cookbook for <fixed-case>MT</fixed-case> in Crisis Situations - WilliamLewis + WilliamLewis RobertMunro - StephanVogel + StephanVogel 501–511 W11-2164 lewis-etal-2011-crisis @@ -4617,7 +4617,7 @@ Generative Models of Monolingual and Bilingual Gappy Patterns KevinGimpel - Noah A.Smith + Noah A.Smith 512–522 W11-2165 gimpel-smith-2011-generative @@ -4633,8 +4633,8 @@ <fixed-case>B</fixed-case>ayesian Extraction of Minimal <fixed-case>SCFG</fixed-case> Rules for Hierarchical Phrase-based Translation - BaskaranSankaran - GholamrezaHaffari + BaskaranSankaran + GholamrezaHaffari AnoopSarkar 533–541 W11-2167 @@ -4644,7 +4644,7 @@ From n-gram-based to <fixed-case>CRF</fixed-case>-based Translation Models ThomasLavergne AlexandreAllauzen - Josep MariaCrego + Josep MariaCrego FrançoisYvon 542–553 W11-2168 @@ -4671,7 +4671,7 @@ Unsupervised <fixed-case>NLP</fixed-case> and Human Language Acquisition: Making Connections to Make Progress - SharonGoldwater + SharonGoldwater 1 W11-2201 goldwater-2011-unsupervised @@ -4680,9 +4680,9 @@ Structured Databases of Named Entities from <fixed-case>B</fixed-case>ayesian Nonparametrics JacobEisenstein TaeYano - WilliamCohen - NoahSmith - EricXing + WilliamCohen + NoahSmith + EricXing 2–12 W11-2202 eisenstein-etal-2011-structured @@ -4711,7 +4711,7 @@ Unsupervised Language-Independent Name Translation Mining from <fixed-case>W</fixed-case>ikipedia Infoboxes Wen-PinLin - MatthewSnover + MatthewSnover HengJi 43–52 W11-2206 @@ -4730,9 +4730,9 @@ Unsupervised Bilingual <fixed-case>POS</fixed-case> Tagging with <fixed-case>M</fixed-case>arkov Random Fields DesaiChen - ChrisDyer - ShayCohen - NoahSmith + ChrisDyer + ShayCohen + NoahSmith 64–71 W11-2208 chen-etal-2011-unsupervised @@ -4743,7 +4743,7 @@ BorisDetienne StéphaneHuet DominiqueQuadri - FabriceLefèvre + FabriceLefèvre 72–81 W11-2209 camelin-etal-2011-unsupervised @@ -4762,7 +4762,7 @@ MatthiasHuck DavidVilar DanielStein - HermannNey + HermannNey 91–96 W11-2211 huck-etal-2011-lightly @@ -4770,7 +4770,7 @@ Unsupervised Alignment for Segmental-based Language Understanding StéphaneHuet - FabriceLefèvre + FabriceLefèvre 97–104 W11-2212 huet-lefevre-2011-unsupervised @@ -4831,7 +4831,7 @@ Towards technology-assisted co-construction with communication partners BrianRoark AndrewFowler - RichardSproat + RichardSproat ChristopherGibbons MelanieFried-Oken 22–31 @@ -4848,7 +4848,7 @@ Asynchronous fixed-grid scanning with dynamic codes - RussBeckley + RussBeckley BrianRoark 43–51 W11-2305 @@ -4858,10 +4858,10 @@ Improving the Accessibility of Line Graphs in Multimodal Documents Charles F.Greenbacker PengWu - SandraCarberry - Kathleen F.McCoy + SandraCarberry + Kathleen F.McCoy StephanieElzer - David D.McDonald + David D.McDonald DanielChester SenizDemir 52–62 @@ -4874,11 +4874,11 @@ BadriNarayan NagarajanMadasamy AshwinBellur - RaghavaKrishnan + RaghavaKrishnan KasthuriG. Vinodh M.Vishwanath KishorePrahallad - Hema A.Murthy + Hema A.Murthy 63–72 W11-2307 kurian-etal-2011-indian @@ -4886,7 +4886,7 @@ <fixed-case>READ</fixed-case>–<fixed-case>IT</fixed-case>: Assessing Readability of <fixed-case>I</fixed-case>talian Texts with a View to Text Simplification FeliceDell’Orletta - SimonettaMontemagni + SimonettaMontemagni GiuliaVenturi 73–83 W11-2308 @@ -4895,9 +4895,9 @@ Source Language Categorization for improving a Speech into Sign Language Translation System VerónicaLópez-Ludeña - RubénSan-Segundo + RubénSan-Segundo SyaheerahLufti - Juan ManuelLucas-Cuesta + Juan ManuelLucas-Cuesta Julián DavidEchevarry BeatrizMartínez-González 84–93 @@ -4923,7 +4923,7 @@ <fixed-case>L</fixed-case>ekbot: A talking and playing robot for children with disabilities - PeterLjunglöf + PeterLjunglöf BrittClaesson IngridMattsson Müller StinaEricsson @@ -4936,7 +4936,7 @@ Using lexical and corpus resources for augmenting the <fixed-case>AAC</fixed-case>-lexicon - KatarinaHeimann Mühlenbock + KatarinaHeimann Mühlenbock MatsLundälv 120–127 W11-2313 @@ -4946,7 +4946,7 @@ Experimental Identification of the Use of Hedges in the Simplification of Numerical Expressions SusanaBautista RaquelHervás - PabloGervás + PabloGervás RichardPower SandraWilliams 128–136 @@ -4955,17 +4955,17 @@ Towards an on-demand Simple <fixed-case>P</fixed-case>ortuguese <fixed-case>W</fixed-case>ikipedia - ArnaldoCandido Jr + ArnaldoCandido Jr AnnCopestake LuciaSpecia - Sandra MariaAluísio + Sandra MariaAluísio 137–147 W11-2315 candido-jr-etal-2011-towards <fixed-case>SLPAT</fixed-case> Demo Session - AnnaluWaller + AnnaluWaller 148–149 W11-2316 waller-2011-slpat @@ -4975,7 +4975,7 @@ Proceedings of the TextInfer 2011 Workshop on Textual Entailment W11-24 - SebastianPadó + SebastianPadó StefanThater Association for Computational Linguistics
Edinburgh, Scottland, UK
@@ -4989,7 +4989,7 @@ Evaluating Answers to Reading Comprehension Questions in Context: Results for <fixed-case>G</fixed-case>erman and the Role of Information Structure - DetmarMeurers + DetmarMeurers RamonZiai NielsOtt JaninaKopp @@ -5020,7 +5020,7 @@ Is it Worth Submitting this Run? Assess your <fixed-case>RTE</fixed-case> System with a Good Sparring Partner MilenKouylekov YasharMehdad - MatteoNegri + MatteoNegri 30–34 W11-2404 kouylekov-etal-2011-worth @@ -5036,7 +5036,7 @@ Representing and resolving ambiguities in ontology-based question answering ChristinaUnger - PhilippCimiano + PhilippCimiano 40–49 W11-2406 unger-cimiano-2011-representing @@ -5051,7 +5051,7 @@ Discovering Commonsense Entailment Rules Implicit in Sentences JonathanGordon - LenhartSchubert + LenhartSchubert 59–63 W11-2408 gordon-schubert-2011-discovering @@ -5061,7 +5061,7 @@ Proceedings of the GEMS 2011 Workshop on GEometrical Models of Natural Language Semantics W11-25 - SebastianPado + SebastianPado YvesPeirsman Association for Computational Linguistics
Edinburgh, UK
@@ -5075,7 +5075,7 @@ How we <fixed-case>BLESS</fixed-case>ed distributional semantic evaluation - MarcoBaroni + MarcoBaroni AlessandroLenci 1–10 W11-2501 @@ -5091,8 +5091,8 @@ Distributional semantics from text and images EliaBruni - Giang BinhTran - MarcoBaroni + Giang BinhTran + MarcoBaroni 22–32 W11-2503 bruni-etal-2011-distributional @@ -5110,14 +5110,14 @@ Encoding syntactic dependencies by vector permutation PierpaoloBasile AnnalinaCaputo - GiovanniSemeraro + GiovanniSemeraro 43–51 W11-2505 basile-etal-2011-encoding Assessing Interpretable, Attribute-related Meaning Representations for Adjective-Noun Phrases in a Similarity Prediction Task - MatthiasHartung + MatthiasHartung AnetteFrank 52–61 W11-2506 @@ -5125,8 +5125,8 @@ Experimenting with transitive verbs in a <fixed-case>D</fixed-case>is<fixed-case>C</fixed-case>o<fixed-case>C</fixed-case>at - EdwardGrefenstette - MehrnooshSadrzadeh + EdwardGrefenstette + MehrnooshSadrzadeh 62–66 W11-2507 grefenstette-sadrzadeh-2011-experimenting @@ -5134,7 +5134,7 @@ A distributional similarity approach to the detection of semantic change in the <fixed-case>G</fixed-case>oogle <fixed-case>B</fixed-case>ooks Ngram corpus. KristinaGulordava - MarcoBaroni + MarcoBaroni 67–71 W11-2508 gulordava-baroni-2011-distributional @@ -5146,7 +5146,7 @@ W11-26 JeremyJancsary FriedrichNeubarth - HaraldTrost + HaraldTrost Association for Computational Linguistics
Edinburgh, Scotland
July @@ -5159,10 +5159,10 @@ Dialect Translation: Integrating <fixed-case>B</fixed-case>ayesian Co-segmentation Models with Pivot-based <fixed-case>SMT</fixed-case> - MichaelPaul + MichaelPaul AndrewFinch - Paul R.Dixon - EiichiroSumita + Paul R.Dixon + EiichiroSumita 1–9 W11-2601 paul-etal-2011-dialect @@ -5178,7 +5178,7 @@ <fixed-case>P</fixed-case>addy<fixed-case>W</fixed-case>a<fixed-case>C</fixed-case>: A Minimally-Supervised Web-Corpus of Hiberno-<fixed-case>E</fixed-case>nglish BrianMurphy - Egon W.Stemle + Egon W.Stemle 22–29 W11-2603 murphy-stemle-2011-paddywac @@ -5193,9 +5193,9 @@ Learning word-level dialectal variation as phonological replacement rules using a limited parallel corpus MansHulden - IñakiAlegria + IñakiAlegria IzaskunEtxeberria - MontseMaritxalar + MontseMaritxalar 39–48 W11-2605 hulden-etal-2011-learning @@ -5204,18 +5204,18 @@ Modeling of Stylistic Variation in Social Media with Stretchy Patterns PhilipGianfortoni DavidAdamson - Carolyn P.Rosé + Carolyn P.Rosé 49–59 W11-2606 gianfortoni-etal-2011-modeling Adapting <fixed-case>S</fixed-case>lovak <fixed-case>ASR</fixed-case> for native Germans speaking <fixed-case>S</fixed-case>lovak - ŠtefanBeňuš + ŠtefanBeňuš MilošCerňak SakhiaDarjaa MilanRusko - MariánTrnka + MariánTrnka 60–64 W11-2607 benus-etal-2011-adapting @@ -5243,8 +5243,8 @@ Proceedings of the UCNLG+Eval: Language Generation and Evaluation Workshop W11-27 - AnjaBelz - RogerEvans + AnjaBelz + RogerEvans AlbertGatt KristinaStriegnitz Association for Computational Linguistics @@ -5259,7 +5259,7 @@ A New Sentence Compression Dataset and Its Use in an Abstractive Generate-and-Rank Sentence Compressor - DimitriosGalanis + DimitriosGalanis IonAndroutsopoulos 1–11 W11-2701 @@ -5276,8 +5276,8 @@ A Corpus of Human-written Summaries of Line Graphs CharlesGreenbacker - SandraCarberry - KathleenMcCoy + SandraCarberry + KathleenMcCoy 23–27 W11-2703 greenbacker-etal-2011-corpus @@ -5292,16 +5292,16 @@ Exploring linguistically-rich patterns for question generation SérgioCurto - Ana CristinaMendes - LuísaCoheur + Ana CristinaMendes + LuísaCoheur 33–38 W11-2705 curto-etal-2011-exploring Linguistically Motivated Complementizer Choice in Surface Realization - RajakrishnanRajkumar - MichaelWhite + RajakrishnanRajkumar + MichaelWhite 39–44 W11-2706 rajkumar-white-2011-linguistically @@ -5309,7 +5309,7 @@ Exciting and interesting: issues in the generation of binomials AnnCopestake - AurélieHerbelot + AurélieHerbelot 45–53 W11-2707 copestake-herbelot-2011-exciting @@ -5400,7 +5400,7 @@ Two Approaches for Generating Size Modifiers MargaretMitchell - Keesvan Deemter + Keesvan Deemter EhudReiter 63–70 W11-2808 @@ -5465,7 +5465,7 @@ Language Generation for Spoken Dialogue Systems [Invited Talk] - Johanna D.Moore + Johanna D.Moore 132 W11-2816 moore-2011-language @@ -5481,7 +5481,7 @@ <fixed-case>E</fixed-case>asy<fixed-case>T</fixed-case>ext: an Operational <fixed-case>NLG</fixed-case> System LaurenceDanlos - FrédéricMeunier + FrédéricMeunier VanessaCombet 139–144 W11-2818 @@ -5499,7 +5499,7 @@ A Policy-Based Approach to Context Dependent Natural Language Generation ThomasBouttaz EdoardoPignotti - ChrisMellish + ChrisMellish PeterEdwards 151–157 W11-2820 @@ -5516,14 +5516,14 @@ Using semantic roles to improve summaries - DianaTrandabăț + DianaTrandabăț 164–169 W11-2822 trandabat-2011-using Building a Generator for <fixed-case>I</fixed-case>talian <fixed-case>S</fixed-case>ign <fixed-case>L</fixed-case>anguage - AlessandroMazzei + AlessandroMazzei 170–175 W11-2823 mazzei-2011-building @@ -5548,14 +5548,14 @@ Generation of Formal and Informal Sentences FadiAbu Sheikha - DianaInkpen + DianaInkpen 187–193 W11-2826 abu-sheikha-inkpen-2011-generation Glue Rules for Robust Chart Realization - MichaelWhite + MichaelWhite 194–199 W11-2827 white-2011-glue @@ -5571,7 +5571,7 @@ Generation Challenges 2011 Preface - AnjaBelz + AnjaBelz AlbertGatt AlexanderKoller KristinaStriegnitz @@ -5581,7 +5581,7 @@ The <fixed-case>GRUVE</fixed-case> Challenge: Generating Routes under Uncertainty in Virtual Environments - SriniJanarthanam + SriniJanarthanam OliverLemon 208–211 W11-2830 @@ -5589,7 +5589,7 @@ A Proposal for a <fixed-case>S</fixed-case>panish Surface Realization Shared Task - PabloGervás + PabloGervás MiguelBallesteros 212–216 W11-2831 @@ -5597,28 +5597,28 @@ The First Surface Realisation Shared Task: Overview and Evaluation Results - AnjaBelz - MichaelWhite + AnjaBelz + MichaelWhite DominicEspinosa EricKow DeirdreHogan - AmandaStent + AmandaStent 217–226 W11-2832 belz-etal-2011-first <fixed-case>DCU</fixed-case> at Generation Challenges 2011 Surface Realisation Track - YuqingGuo + YuqingGuo DeirdreHogan - Josefvan Genabith + Josefvan Genabith 227–229 W11-2833 guo-etal-2011-dcu <fixed-case>ATT</fixed-case>-0: Submission to Generation Challenges 2011 Surface Realization Shared Task - AmandaStent + AmandaStent 230–231 W11-2834 stent-2011-att @@ -5627,7 +5627,7 @@ <<fixed-case>S</fixed-case>tu<fixed-case>M</fixed-case>a<fixed-case>B</fixed-case>a>: From Deep Representation to Surface BerndBohnet SimonMille - BenoîtFavre + BenoîtFavre LeoWanner 232–235 W11-2835 @@ -5635,16 +5635,16 @@ The <fixed-case>OSU</fixed-case> System for Surface Realization at Generation Challenges 2011 - RajakrishnanRajkumar + RajakrishnanRajkumar DominicEspinosa - MichaelWhite + MichaelWhite 236–238 W11-2836 rajkumar-etal-2011-osu <fixed-case>UCM</fixed-case> Submission to the Surface Realization Challenge - PabloGervás + PabloGervás 239–241 W11-2837 gervas-2011-ucm @@ -5662,7 +5662,7 @@ PinakiBhaskar AniruddhaGhosh SantanuPal - SivajiBandyopadhyay + SivajiBandyopadhyay 250–253 W11-2839 bhaskar-etal-2011-may @@ -5705,7 +5705,7 @@ Data-Driven Correction of <fixed-case>F</fixed-case>unction<fixed-case>W</fixed-case>ords in Non-Native <fixed-case>E</fixed-case>nglish AdrianeBoyd - DetmarMeurers + DetmarMeurers 267–269 W11-2844 boyd-meurers-2011-data @@ -5713,11 +5713,11 @@ Report on the Second Second Challenge on Generating Instructions in Virtual Environments (<fixed-case>GIVE</fixed-case>-2.5) KristinaStriegnitz - AlexandreDenis + AlexandreDenis AndrewGargett KonstantinaGaroufi AlexanderKoller - MariëtTheune + MariëtTheune 270–279 W11-2845 striegnitz-etal-2011-report @@ -5725,7 +5725,7 @@ Direction giving: an attempt to increase user engagement BobDuncan - Keesvan Deemter + Keesvan Deemter 280–283 W11-2846 duncan-van-deemter-2011-direction @@ -5739,9 +5739,9 @@ The <fixed-case>GIVE</fixed-case>-2.5 <fixed-case>C</fixed-case> Generation System - David NicolásRacca + David NicolásRacca LucianaBenotti - PabloDuboue + PabloDuboue 290–295 W11-2848 racca-etal-2011-give @@ -5749,14 +5749,14 @@ <fixed-case>CL</fixed-case> system: Giving instructions by corpus based selection LucianaBenotti - AlexandreDenis + AlexandreDenis 296–301 W11-2849 benotti-denis-2011-cl The Loria Instruction Generation System <fixed-case>L</fixed-case> in <fixed-case>GIVE</fixed-case> 2.5 - AlexandreDenis + AlexandreDenis 302–306 W11-2850 denis-2011-loria @@ -5774,7 +5774,7 @@ SaskiaAkkersdijk MarinLangenbach FriederLoch - MariëtTheune + MariëtTheune 312–317 W11-2852 akkersdijk-etal-2011-thumbs @@ -5786,7 +5786,7 @@ PaulPiwek MihaiLintean SvetlanaStoyanchev - CristianMoldovan + CristianMoldovan 318–320 W11-2853 rus-etal-2011-question @@ -5796,9 +5796,9 @@ Proceedings of the 12th International Conference on Parsing Technologies W11-29 - HarryBunt + HarryBunt JoakimNivre - ÖzlemÇetinoglu + ÖzlemÇetinoglu Association for Computational Linguistics
Dublin, Ireland
October @@ -5811,7 +5811,7 @@ Computing Scope in a <fixed-case>CCG</fixed-case> Parser - MarkSteedman + MarkSteedman 1 W11-2901 steedman-2011-computing @@ -5843,16 +5843,16 @@ A Word Clustering Approach to Domain Adaptation: Effective Parsing of Biomedical Texts - MarieCandito - EnriqueHenestroza Anguiano - DjaméSeddah + MarieCandito + EnriqueHenestroza Anguiano + DjaméSeddah 37–42 W11-2905 candito-etal-2011-word Sentence-Level Instance-Weighting for Graph-Based and Transition-Based Dependency Parsing - AndersSøgaard + AndersSøgaard MartinHaulrich 43–47 W11-2906 @@ -5864,7 +5864,7 @@ YusukeMiyao TakuyaMatsuzaki XiangliWang - JunichiTsujii + JunichiTsujii 48–57 W11-2907 yu-etal-2011-analysis @@ -5879,7 +5879,7 @@ <fixed-case>B</fixed-case>ayesian Network Automata for Modelling Unbounded Structures - JamesHenderson + JamesHenderson 63–74 W11-2909 henderson-2011-bayesian @@ -5897,7 +5897,7 @@ Learning Structural Dependencies of Words in the <fixed-case>Z</fixed-case>ipfian Tail TejaswiniDeoskar MarkosMylonakis - KhalilSima’an + KhalilSima’an 80–91 W11-2911 deoskar-etal-2011-learning @@ -5941,7 +5941,7 @@ Gregory F.Coppola AlexandraBirch TejaswiniDeoskar - MarkSteedman + MarkSteedman 129–139 W11-2916 coppola-etal-2011-simple @@ -5956,7 +5956,7 @@ <fixed-case>L</fixed-case>agrangian Relaxation for Inference in Natural Language Processing - MichaelCollins + MichaelCollins 150 W11-2918 collins-2011-lagrangian @@ -5998,14 +5998,14 @@ Large-Scale Corpus-Driven <fixed-case>PCFG</fixed-case> Approximation of an <fixed-case>HPSG</fixed-case> YiZhang - Hans-UlrichKrieger + Hans-UlrichKrieger 198–208 W11-2923 zhang-krieger-2011-large Features for Phrase-Structure Reranking from Dependency Parses - RichárdFarkas + RichárdFarkas BerndBohnet HelmutSchmid 209–214 @@ -6017,7 +6017,7 @@ JenniferFoster ÖzlemÇetinoğlu JoachimWagner - Josefvan Genabith + Josefvan Genabith 215–219 W11-2925 foster-etal-2011-comparing @@ -6061,7 +6061,7 @@ Proceedings of the 2nd Workshop on South Southeast Asian Natural Language Processing (WSSANLP) W11-30 RajeevSangal - M. G. AbbasMalik + M. G. AbbasMalik Asian Federation of Natural Language Processing
Chiang Mai, Thailand
November @@ -6076,7 +6076,7 @@ Hybrid Inflectional Stemmer and Rule-based Derivational Stemmer for <fixed-case>G</fixed-case>ujarati KartikSuba DiptiJiandani - PushpakBhattacharyya + PushpakBhattacharyya 1–8 W11-3001 suba-etal-2011-hybrid @@ -6113,7 +6113,7 @@ Towards a <fixed-case>M</fixed-case>alay Derivational Lexicon: Learning Affixes Using Expectation Maximization SurianiSulaiman MichaelGasser - SandraKuebler + SandraKuebler 30–34 W11-3005 sulaiman-etal-2011-towards @@ -6121,7 +6121,7 @@ <fixed-case>P</fixed-case>unjabi Language Stemmer for nouns and proper names VishalGupta - Gurpreet SinghLehal + Gurpreet SinghLehal 35–39 W11-3006 gupta-lehal-2011-punjabi @@ -6129,7 +6129,7 @@ Challenges in <fixed-case>U</fixed-case>rdu Text Tokenization and Sentence Boundary Disambiguation ZobiaRehman - WaqasAnwar + WaqasAnwar Usama IjazBajwa 40–45 W11-3007 @@ -6138,7 +6138,7 @@ Challenges in Developing a Rule based <fixed-case>U</fixed-case>rdu Stemmer Sajjad AhmadKhan - WaqasAnwar + WaqasAnwar Usama IjazBajwa 46–51 W11-3008 @@ -6147,7 +6147,7 @@ Developing a New System for <fixed-case>A</fixed-case>rabic Morphological Analysis and Generation MouradGridach - NoureddineChenfour + NoureddineChenfour 52–57 W11-3009 gridach-chenfour-2011-developing @@ -6157,7 +6157,7 @@ Proceedings of the KRAQ11 workshop W11-31 - PatrickSaint-Dizier + PatrickSaint-Dizier Asian Federation of Natural Language Processing
Chiang Mai
November @@ -6206,7 +6206,7 @@ A Rule Based Approach for Analysis of Comparative or Evaluative Questions in Tourism Domain Bidhan ChandraPal PinakiBhaskar - SivajiBandyopadhyay + SivajiBandyopadhyay 29–37 W11-3105 pal-etal-2011-rule @@ -6228,7 +6228,7 @@ W11-32 MinZhang HaizhouLi - AKumaran + AKumaran Asian Federation of Natural Language Processing
Chiang Mai, Thailand
November @@ -6261,8 +6261,8 @@ Integrating Models Derived from non-Parametric <fixed-case>B</fixed-case>ayesian Co-segmentation into a Statistical Machine Transliteration System AndrewFinch - PaulDixon - EiichiroSumita + PaulDixon + EiichiroSumita 23–27 W11-3203 finch-etal-2011-integrating @@ -6279,7 +6279,7 @@ <fixed-case>E</fixed-case>nglish-<fixed-case>K</fixed-case>orean Named Entity Transliteration Using Statistical Substring-based and Rule-based Approaches Yu-ChunWang - Richard Tzong-HanTsai + Richard Tzong-HanTsai 32–35 W11-3205 wang-tsai-2011-english @@ -6296,7 +6296,7 @@ Comparative Evaluation of <fixed-case>S</fixed-case>panish Segmentation Strategies for <fixed-case>S</fixed-case>panish-<fixed-case>C</fixed-case>hinese Transliteration - Rafael E.Banchs + Rafael E.Banchs 41–48 W11-3207 banchs-2011-comparative @@ -6306,7 +6306,7 @@ TakaakiFukunishi AndrewFinch SeiichiYamamoto - EiichiroSumita + EiichiroSumita 49–57 W11-3208 fukunishi-etal-2011-using @@ -6323,7 +6323,7 @@ Mining Multi-word Named Entity Equivalents from Comparable Corpora AbhijitBhole GouthamTholpadi - RaghavendraUdupa + RaghavendraUdupa 65–72 W11-3210 bhole-etal-2011-mining @@ -6346,9 +6346,9 @@ <fixed-case>E</fixed-case>nglish-to-<fixed-case>C</fixed-case>hinese Machine Transliteration using Accessor Variety Features of Source Graphemes - Mike Tian-JianJiang - Chan-HungKuo - Wen-LianHsu + Mike Tian-JianJiang + Chan-HungKuo + Wen-LianHsu 86–90 W11-3213 jiang-etal-2011-english @@ -6364,7 +6364,7 @@ <fixed-case>E</fixed-case>nglish-<fixed-case>C</fixed-case>hinese Personal Name Transliteration by Syllable-Based Maximum Matching - Oi YeeKwong + Oi YeeKwong 96–100 W11-3215 kwong-2011-english-chinese @@ -6381,7 +6381,7 @@ Named Entity Transliteration Generation Leveraging Statistical Machine Translation Technology PradeepDasigi - MonaDiab + MonaDiab 106–111 W11-3217 dasigi-diab-2011-named @@ -6391,9 +6391,9 @@ Proceedings of the Workshop on Language Resources, Technology and Services in the Sharing Paradigm W11-33 - NicolettaCalzolari + NicolettaCalzolari ToruIshida - SteliosPiperidis + SteliosPiperidis VirachSornlertlamvanich Asian Federation of Natural Language Processing
Chiang Mai, Thailand
@@ -6407,16 +6407,16 @@ Prospects for an Ontology-Grounded Language Service Infrastructure - YoshihikoHayashi + YoshihikoHayashi 1–7 W11-3301 hayashi-2011-prospects A Method Towards the Fully Automatic Merging of Lexical Resources - NúriaBel - MuntsaPadró - SilviaNecsulescu + NúriaBel + MuntsaPadró + SilviaNecsulescu 8–15 W11-3302 bel-etal-2011-method @@ -6441,7 +6441,7 @@ Interoperability and Technology for a Language Resources Factory MarcPoch - NúriaBel + NúriaBel 32–40 W11-3305 poch-bel-2011-interoperability @@ -6459,10 +6459,10 @@ Promoting Interoperability of Resources in <fixed-case>META</fixed-case>-<fixed-case>SHARE</fixed-case> PaulThompson YoshinobuKano - JohnMcNaught + JohnMcNaught StevePettifer TeresaAttwood - JohnKeane + JohnKeane SophiaAnaniadou 50–58 W11-3307 @@ -6490,9 +6490,9 @@ Proposal for the International Standard Language Resource Number - KhalidChoukri + KhalidChoukri JungyeulPark - OlivierHamon + OlivierHamon VictoriaArranz 75–83 W11-3310 @@ -6500,14 +6500,14 @@ A Metadata Schema for the Description of Language Resources (<fixed-case>LR</fixed-case>s) - MariaGavrilidou - PennyLabropoulou + MariaGavrilidou + PennyLabropoulou SteliosPiperidis MonicaMonachini FrancescaFrontini GilFrancopoulo VictoriaArranz - ValérieMapelli + ValérieMapelli 84–92 W11-3311 gavrilidou-etal-2011-metadata @@ -6515,7 +6515,7 @@ The Language Library: Many Layers, More Knowledge NicolettaCalzolari - RiccardoDel Gratta + RiccardoDel Gratta FrancescaFrontini IreneRusso 93–97 @@ -6524,7 +6524,7 @@ Sharing Resources in <fixed-case>CLARIN</fixed-case>-<fixed-case>NL</fixed-case> - JanOdijk + JanOdijk Arjanvan Hessen 98–106 W11-3313 @@ -6532,12 +6532,12 @@ <fixed-case>META</fixed-case>-<fixed-case>NORD</fixed-case>: Towards Sharing of Language Resources in <fixed-case>N</fixed-case>ordic and <fixed-case>B</fixed-case>altic Countries - IngunaSkadiņa - AndrejsVasiļjevs + IngunaSkadiņa + AndrejsVasiļjevs LarsBorin - KoenraadDe Smedt - KristerLindén - EiríkurRögnvaldsson + KoenraadDe Smedt + KristerLindén + EiríkurRögnvaldsson 107–114 W11-3314 skadina-etal-2011-meta @@ -6547,9 +6547,9 @@ Proceedings of the 9th Workshop on Asian Language Resources W11-34 - Rachel Edita O.Roxas + Rachel Edita O.Roxas SarmadHussain - Key-SunChoi + Key-SunChoi Asian Federation of Natural Language Processing
Chiang Mai, Thailand
November @@ -6601,7 +6601,7 @@ RahulAgarwal MridulGupta SamarHusain - Dipti MisraSharma + Dipti MisraSharma 23–30 W11-3405 ambati-etal-2011-error @@ -6616,7 +6616,7 @@
Feasibility of Leveraging Crowd Sourcing for the Creation of a Large Scale Annotated Resource for <fixed-case>H</fixed-case>indi <fixed-case>E</fixed-case>nglish Code Switched Data: A Pilot Annotation - MonaDiab + MonaDiab AnkitKamboj 36–40 W11-3407 @@ -6640,7 +6640,7 @@ <fixed-case>P</fixed-case>hilippine Languages Online Corpora: Status, issues, and prospects - ShirleyDita + ShirleyDita Rachel EditaRoxas 59–62 W11-3410 @@ -6657,7 +6657,7 @@ Towards a Computational Semantic Analyzer for <fixed-case>U</fixed-case>rdu - AnnetteHautli + AnnetteHautli MiriamButt 71–78 W11-3412 @@ -6666,7 +6666,7 @@ Word Disambiguation in Shahmukhi to Gurmukhi Transliteration Tejinder SinghSaini - Gurpreet SinghLehal + Gurpreet SinghLehal 79–87 W11-3413 saini-lehal-2011-word @@ -6678,7 +6678,7 @@ W11-35 HidetoKazawa HisamiSuzuki - TakuKudo + TakuKudo Asian Federation of Natural Language Processing
Chiang Mai, Thailand
November @@ -6724,7 +6724,7 @@ AsadHabib MasakazuIwatate MasayukiAsahara - YujiMatsumoto + YujiMatsumoto 26–30 W11-3504 habib-etal-2011-different @@ -6744,7 +6744,7 @@ SeijiKasahara MamoruKomachi MasaakiNagata - YujiMatsumoto + YujiMatsumoto 38–42 W11-3506 kasahara-etal-2011-error @@ -6767,11 +6767,11 @@
Robustness Analysis of Adaptive <fixed-case>C</fixed-case>hinese Input Methods - Mike Tian-JianJiang + Mike Tian-JianJiang Cheng-WeiLee ChadLiu Yung-ChunChang - Wen-LianHsu + Wen-LianHsu 53–61 W11-3509 jiang-etal-2011-robustness @@ -6782,7 +6782,7 @@ Proceedings of the Fifth International Workshop On Cross Lingual Information Access W11-36 AsifEkbal - DeyiXiong + DeyiXiong Asian Federation of Natural Language Processing
Chiang Mai, Thailand
November @@ -6803,7 +6803,7 @@ Using Explicit Semantic Analysis for Cross-Lingual Link Discovery PetrKnoth - LukasZilka + LukasZilka ZdenekZdrahal 2–10 W11-3602 @@ -6828,7 +6828,7 @@ Soundex-based Translation Correction in <fixed-case>U</fixed-case>rdu–<fixed-case>E</fixed-case>nglish Cross-Language Information Retrieval ManaalFaruqui PrasenjitMajumder - SebastianPadó + SebastianPadó 25–29 W11-3605 faruqui-etal-2011-soundex @@ -6839,7 +6839,7 @@ ErikPeterson JohnChen YanaPetrova - RohiniSrihari + RohiniSrihari 30–34 W11-3606 yang-etal-2011-unsupervised-russian @@ -6856,8 +6856,8 @@ Proceedings of the Workshop on Sentiment Analysis where AI meets Psychology (SAAIP 2011) W11-37 - SivajiBandyopadhyay - ManabuOkumura + SivajiBandyopadhyay + ManabuOkumura Asian Federation of Natural Language Processing
Chiang Mai, Thailand
November @@ -6870,7 +6870,7 @@ Invited Keynote: What are Subjectivity, Sentiment, and Affect? - EduardHovy + EduardHovy 1 W11-3701 hovy-2011-invited @@ -6895,7 +6895,7 @@ Towards automatic detection of antisocial behavior from texts MyriamMunezero TuomoKakkonen - CalkinMontero + CalkinMontero 20–27 W11-3704 munezero-etal-2011-towards @@ -6905,7 +6905,7 @@ LeilaAmgoud FlorenceBannay CharlotteCostedoat - PatrickSaint-Dizier + PatrickSaint-Dizier CamilleAlbert 28–34 W11-3705 @@ -6923,8 +6923,8 @@ Sense-level Subjectivity in a Multilingual Setting CarmenBanea - RadaMihalcea - JanyceWiebe + RadaMihalcea + JanyceWiebe 44–50 W11-3707 banea-etal-2011-sense @@ -7003,7 +7003,7 @@ Proceedings of the Second Workshop on Statistical Parsing of Morphologically Rich Languages W11-38 - DjaméSeddah + DjaméSeddah ReutTsarfaty JenniferFoster Association for Computational Linguistics @@ -7018,8 +7018,8 @@ Statistical Dependency Parsing in <fixed-case>K</fixed-case>orean: From Corpus Generation To Automatic Parsing - Jinho D.Choi - MarthaPalmer + Jinho D.Choi + MarthaPalmer 1–11 W11-3801 choi-palmer-2011-statistical @@ -7028,7 +7028,7 @@ Morphological Features for Parsing Morphologically-rich Languages: A Case of <fixed-case>A</fixed-case>rabic JonDehdari LamiaTounsi - Josefvan Genabith + Josefvan Genabith 12–21 W11-3802 dehdari-etal-2011-morphological @@ -7036,8 +7036,8 @@ <fixed-case>F</fixed-case>rench parsing enhanced with a word clustering method based on a syntactic lexicon AnthonySigogne - MatthieuConstant - ÉricLaporte + MatthieuConstant + ÉricLaporte 22–27 W11-3803 sigogne-etal-2011-french @@ -7045,8 +7045,8 @@ Testing the Effect of Morphological Disambiguation in Dependency Parsing of <fixed-case>B</fixed-case>asque KepaBengoetxea - ArantzaCasillas - KoldoGojenola + ArantzaCasillas + KoldoGojenola 28–33 W11-3804 bengoetxea-etal-2011-testing @@ -7054,7 +7054,7 @@ Discontinuous Data-Oriented Parsing: A mildly context-sensitive all-fragments grammar Andreasvan Cranenburgh - RemkoScha + RemkoScha FedericoSangati 34–44 W11-3805 @@ -7062,7 +7062,7 @@ Multiword Expressions in Statistical Dependency Parsing - GülşenEryiğit + GülşenEryiğit Tugayİlbay Ozan ArkanCan 45–55 @@ -7072,7 +7072,7 @@ Linguistically Rich Graph Based Data Driven Parsing For <fixed-case>H</fixed-case>indi SamarHusain - Raghu PujithaGade + Raghu PujithaGade RajeevSangal 56–61 W11-3807 @@ -7090,8 +7090,8 @@ Proceedings of Workshop on Robust Unsupervised and Semisupervised Methods in Natural Language Processing W11-39 - ChrisBiemann - AndersSøgaard + ChrisBiemann + AndersSøgaard Association for Computational Linguistics
Hissar, Bulgaria
September @@ -7105,7 +7105,7 @@ <fixed-case>G</fixed-case>ibbs Sampling with Treeness Constraint in Unsupervised Dependency Parsing DavidMareček - ZdeněkŽabokrtský + ZdeněkŽabokrtský 1–8 W11-3901 marecek-zabokrtsky-2011-gibbs @@ -7114,7 +7114,7 @@ Guided Self Training for Sentiment Classification BrettDrury LuísTorgo - Jose JoaoAlmeida + Jose JoaoAlmeida 9–16 W11-3902 drury-etal-2011-guided @@ -7152,7 +7152,7 @@ Investigation of Co-training Views and Variations for Semantic Role Labeling - RasoulSamad Zadeh Kaljahi + RasoulSamad Zadeh Kaljahi Mohd SapiyanBaba 41–49 W11-3906 @@ -7163,10 +7163,10 @@ Proceedings of the RANLP 2011 Workshop on Information Extraction and Knowledge Acquisition W11-40 - PreslavNakov + PreslavNakov ZornitsaKozareva KuzmanGanchev - JerryHobbs + JerryHobbs Association for Computational Linguistics
Hissar, Bulgaria
September @@ -7179,7 +7179,7 @@ INVITED TALK 1: The Knowledge Base Population Task: Challenges for Information Extraction - RalphGrishman + RalphGrishman 1 W11-4001 grishman-2011-invited @@ -7187,7 +7187,7 @@ Fine-grained Entity Set Refinement with User Feedback BonanMin - RalphGrishman + RalphGrishman 2–6 W11-4002 min-grishman-2011-fine @@ -7195,7 +7195,7 @@ Extraction of Domain-specific Opinion Words for Similar Domains IliaChetviorkin - NataliaLoukachevitch + NataliaLoukachevitch 7–12 W11-4003 chetviorkin-loukachevitch-2011-extraction @@ -7219,14 +7219,14 @@ How to Distinguish a Kidney Theft from a Death Car? Experiments in Clustering Urban-Legend Texts RomanGrundkiewicz - FilipGraliński + FilipGraliński 29–36 W11-4006 grundkiewicz-gralinski-2011-distinguish Machine Reading Between the Lines: A Simple Evaluation Framework for Extracted Knowledge Bases - AvirupSil + AvirupSil AlexanderYates 37–40 W11-4007 @@ -7235,7 +7235,7 @@ Temporal Expressions Extraction in <fixed-case>SMS</fixed-case> messages StéphanieWeiser - Louis-AmélieCougnon + Louis-AmélieCougnon PatrickWatrin 41–44 W11-4008 @@ -7256,7 +7256,7 @@ CristinaVertan MilenaSlavcheva PetyaOsenova - SteliosPiperidis + SteliosPiperidis Association for Computational Linguistics
Hissar, Bulgaria
September @@ -7269,7 +7269,7 @@ Endangered <fixed-case>U</fixed-case>ralic Languages and Language Technologies - GáborPrószéky + GáborPrószéky 1–2 W11-4101 proszeky-2011-endangered @@ -7287,7 +7287,7 @@ Query classification via Topic Models for an art image archive Dieu-ThuLe - RaffaellaBernardi + RaffaellaBernardi EdVald 11–18 W11-4103 @@ -7331,7 +7331,7 @@ Language Technology Support for Semantic Annotation of Icono-graphic Descriptions KamenkaStaykova GennadyAgre - KirilSimov + KirilSimov PetyaOsenova 51–56 W11-4108 @@ -7365,8 +7365,8 @@ Diachronic Stylistic Changes in <fixed-case>B</fixed-case>ritish and <fixed-case>A</fixed-case>merican Varieties of 20th Century Written <fixed-case>E</fixed-case>nglish Language - SanjaŠtajner - RuslanMitkov + SanjaŠtajner + RuslanMitkov 78–85 W11-4112 stajner-mitkov-2011-diachronic @@ -7378,7 +7378,7 @@ RolfBardeli OliverSchreer StefanoMasneri - PeterWittenburg + PeterWittenburg HanSloetjes PrzemekLenkiewicz EricAuer @@ -7389,10 +7389,10 @@ Handwritten Text Recognition for Historical Documents VerónicaRomero - NicolásSerrano - Alejandro H.Toselli - Joan AndreuSánchez - EnriqueVidal + NicolásSerrano + Alejandro H.Toselli + Joan AndreuSánchez + EnriqueVidal 90–96 W11-4114 romero-etal-2011-handwritten @@ -7411,7 +7411,7 @@ Proceedings of the Second Workshop on Biomedical Natural Language Processing W11-42 GuerganaSavova - Kevin BretonnelCohen + Kevin BretonnelCohen GaliaAngelova Association for Computational Linguistics
Hissar, Bulgaria
@@ -7457,7 +7457,7 @@ GeorgiGeorgiev ValentinZhikov BorislavPopov - PreslavNakov + PreslavNakov 27–34 W11-4205 georgiev-etal-2011-building @@ -7475,9 +7475,9 @@ CarolineHagège QuentinGicquel SuzannePereira - StefanDarmoni - FrédériqueSegond - Marie-HélèneMetzger + StefanDarmoni + FrédériqueSegond + Marie-HélèneMetzger 43–48 W11-4207 proux-etal-2011-architecture @@ -7495,10 +7495,10 @@ Proceedings of the Second Workshop on Annotation and Exploitation of Parallel Corpora W11-43 - KirilSimov + KirilSimov PetyaOsenova - JörgTiedemann - RadovanGarabik + JörgTiedemann + RadovanGarabik Association for Computational Linguistics
Hissar, Bulgaria
September @@ -7511,7 +7511,7 @@ Reusing Parallel Corpora between Related Languages - PreslavNakov + PreslavNakov 1 W11-4301 nakov-2011-reusing @@ -7519,7 +7519,7 @@ Discontinuous Constituents: a Problematic Case for Parallel Corpora Annotation and Querying MarilisaAmoia - KerstinKunz + KerstinKunz EkaterinaLapshinova-Koltunski 2–10 W11-4302 @@ -7582,7 +7582,7 @@ Proceedings of the 9th International Workshop on Finite State Methods and Natural Language Processing W11-44 AndreasMaletti - MatthieuConstant + MatthieuConstant Association for Computational Linguistics
Blois, France
July @@ -7616,8 +7616,8 @@
Supervised and Semi-Supervised Sequence Learning for Recognition of Requisite Part and Effectuation Part in Law Sentences - Le-MinhNguyen - Ngo XuanBach + Le-MinhNguyen + Ngo XuanBach AkiraShimazu 21–29 W11-4404 @@ -7641,7 +7641,7 @@ <fixed-case>E</fixed-case>-Dictionaries and Finite-State Automata for the Recognition of Named Entities CvetanaKrstev DuškoVitas - IvanObradović + IvanObradović MilošUtvić 48–56 W11-4407 @@ -7657,7 +7657,7 @@ Open Source <fixed-case>WFST</fixed-case> Tools for <fixed-case>LVCSR</fixed-case> Cascade Development Josef R.Novak - NobuakiMinematsu + NobuakiMinematsu KeikichiHirose 65–73 W11-4409 @@ -7665,7 +7665,7 @@ Intersection of Multitape Transducers vs. Cascade of Binary Transducers: The Example of <fixed-case>E</fixed-case>gyptian Hieroglyphs Transliteration - FrançoisBarthélemy + FrançoisBarthélemy SergeRosmorduc 74–82 W11-4410 @@ -7681,7 +7681,7 @@ <fixed-case>FT</fixed-case>race: A Tool for Finite-State Morphology JamesKilbury - KatinaBontcheva + KatinaBontcheva YounesSamih 88–92 W11-4412 @@ -7699,8 +7699,8 @@ Stochastic <fixed-case>K</fixed-case>-<fixed-case>TSS</fixed-case> Bi-Languages for Machine Translation - M. InésTorres - FranciscoCasacuberta + M. InésTorres + FranciscoCasacuberta 98–106 W11-4414 torres-casacuberta-2011-stochastic @@ -7709,16 +7709,16 @@ Measuring the Confusability of Pronunciations in Speech Recognition PanagiotaKaranasou FrançoisYvon - LoriLamel + LoriLamel 107–115 W11-4415 karanasou-etal-2011-measuring Fast Yet Rich Morphological Analysis - MohamedAltantawy + MohamedAltantawy NizarHabash - OwenRambow + OwenRambow 116–124 W11-4416 altantawy-etal-2011-fast @@ -7729,7 +7729,7 @@ PavelPecina AntonioToral LamiaTounsi - Josefvan Genabith + Josefvan Genabith 125–133 W11-4417 attia-etal-2011-open @@ -7738,7 +7738,7 @@ Recognition and Translation of <fixed-case>A</fixed-case>rabic Named Entities with <fixed-case>N</fixed-case>oo<fixed-case>J</fixed-case> Using a New Representation Model HélaFehri KaisHaddar - AbdelmajidBen Hamadou + AbdelmajidBen Hamadou 134–142 W11-4418 fehri-etal-2011-recognition @@ -7756,7 +7756,7 @@ Multi-Document Discourse Parsing Using Traditional and Hierarchical Machine Learning - Erick GalaniMaziero + Erick GalaniMaziero Thiago Alexandre SalgueiroPardo W11-4501 maziero-pardo-2011-multi @@ -7770,15 +7770,15 @@ <fixed-case>V</fixed-case>erb<fixed-case>N</fixed-case>et.<fixed-case>B</fixed-case>r: construção semiautomática de um léxico computacional de verbos para o português do Brasil (<fixed-case>V</fixed-case>erb<fixed-case>N</fixed-case>et.<fixed-case>B</fixed-case>r: semiautomatic construction of a computational verb lexicon for <fixed-case>B</fixed-case>razilian <fixed-case>P</fixed-case>ortuguese) [in <fixed-case>P</fixed-case>ortuguese] - Carolina EvaristoScarton + Carolina EvaristoScarton W11-4503 scarton-2011-verbnet Comparando Avaliações de Inteligibilidade Textual entre Originais e Traduções de Textos Literários (Comparing Textual Intelligibility Evaluations among Literary Source Texts and their Translations) [in <fixed-case>P</fixed-case>ortuguese] Bianca FrancoPasqualini - Carolina EvaristoScarton - Maria José B.Finatto + Carolina EvaristoScarton + Maria José B.Finatto W11-4504 pasqualini-etal-2011-comparando @@ -7793,17 +7793,17 @@ Características do jornalismo popular: avaliação da inteligibilidade e auxílio à descrição do gênero (Characteristics of Popular News: the Evaluation of Intelligibility and Support to the Genre Description) [in <fixed-case>P</fixed-case>ortuguese] - Maria José B.Finatto - Carolina EvaristoScarton + Maria José B.Finatto + Carolina EvaristoScarton AmandaRocha - SandraAluísio + SandraAluísio W11-4506 finatto-etal-2011-caracteristicas Construction of a <fixed-case>P</fixed-case>ortuguese Opinion Lexicon from multiple resources MarloSouza - RenataVieira + RenataVieira DéboraBusetti RoveChishman Isa MaraAlves @@ -7813,14 +7813,14 @@ Using machine learning methods to avoid the pitfall of cognates and false friends in <fixed-case>S</fixed-case>panish-<fixed-case>P</fixed-case>ortuguese word pairs LianetSepúlveda Torres - Sandra MariaAluísio + Sandra MariaAluísio W11-4508 sepulveda-torres-aluisio-2011-using Análise automática de aspectos relacionados a coerência semântica em resumos acadêmicos (Automatic Analysis of Semantic Coherence Aspects in Academic Abstracts) [in <fixed-case>P</fixed-case>ortuguese] - Vinícius Mourão Alvesde Souza - Valéria DelisandraFeltrim + Vinícius Mourão Alvesde Souza + Valéria DelisandraFeltrim W11-4509 de-souza-feltrim-2011-analise @@ -7852,7 +7852,7 @@ Extração de Contextos Definitórios a partir de Textos em Língua Portuguesa (Extraction of Defining Contexts from Texts in <fixed-case>P</fixed-case>ortuguese) [in <fixed-case>P</fixed-case>ortuguese] Igor S.Wendt - RenataVieira + RenataVieira W11-4513 wendt-vieira-2011-extracao @@ -7869,16 +7869,16 @@ Minimização do Impacto do Problema de Desvio de Conceito por Meio de Acoplamento em Ambiente de Aprendizado Sem Fim (Minimizing the Impact of the Concept Drift Problem by Using a Framework of Endless Learning) [in <fixed-case>P</fixed-case>ortuguese] Maisa CristinaDuarte - Estevam R.Hruschka Jr. + Estevam R.Hruschka Jr. Mariado Carmo Nicoletti W11-4515 duarte-etal-2011-minimizacao Generating a Pronunciation Dictionary for <fixed-case>E</fixed-case>uropean <fixed-case>P</fixed-case>ortuguese Using a Joint-Sequence Model with Embedded Stress Assignment - ArlindoVeiga + ArlindoVeiga SaraCandeias - FernandoPerdigão + FernandoPerdigão W11-4516 veiga-etal-2011-generating @@ -7892,14 +7892,14 @@ Part-of-Speech Tagging of <fixed-case>P</fixed-case>ortuguese Using Hidden <fixed-case>M</fixed-case>arkov Models with Character Language Model Emissions Marcelo Rodriguesde Holanda Maia - Geraldo BonorinoXexéo + Geraldo BonorinoXexéo W11-4518 de-holanda-maia-xexeo-2011-part <fixed-case>P</fixed-case>ropbank-Br: a <fixed-case>B</fixed-case>razilian <fixed-case>P</fixed-case>ortuguese corpus annotated with semantic role labels - Magali SanchesDuran - Sandra MariaAluísio + Magali SanchesDuran + Sandra MariaAluísio W11-4519 duran-aluisio-2011-propbank @@ -7921,21 +7921,21 @@ <fixed-case>P</fixed-case>or<fixed-case>TA</fixed-case>l: Recursos e Ferramentas de Tradução Automática para o Português do Brasil (<fixed-case>P</fixed-case>or<fixed-case>TA</fixed-case>l: Resources and Tools for Machine Translation of <fixed-case>B</fixed-case>razilian <fixed-case>P</fixed-case>ortuguese) [in <fixed-case>P</fixed-case>ortuguese] Thiago LimaVieira - Helenade Medeiros Caseli + Helenade Medeiros Caseli W11-4522 vieira-de-medeiros-caseli-2011-portal The Use of Metrics for Measuring Informality Levels in Web 2.0 Texts AlejandroMosquera - PalomaMoreda + PalomaMoreda W11-4523 mosquera-moreda-2011-use Uma abordagem de classificação automática para Tipo de Pergunta e Tipo de Resposta (An Automatic Approach for Classification of Question Type and Answer Type) [in <fixed-case>P</fixed-case>ortuguese] - Patricia NunesGonçalves - António HortaBranco + Patricia NunesGonçalves + António HortaBranco W11-4524 goncalves-branco-2011-uma @@ -7958,7 +7958,7 @@ Quotation Extraction for <fixed-case>P</fixed-case>ortuguese William Paulo DuccaFernandes EduardoMotta - Ruy LuizMilidiú + Ruy LuizMilidiú W11-4527 fernandes-etal-2011-quotation @@ -7988,7 +7988,7 @@ A Generative Approach for Multi-Document Summarization using Semantic-Discursive information - Maria Lucía CastroJorge + Maria Lucía CastroJorge Thiago Alexandre SalgueiroPardo W11-4531 jorge-pardo-2011-generative @@ -8012,9 +8012,9 @@ Proceedings of the 18th Nordic Conference of Computational Linguistics (NODALIDA 2011) W11-46 - Bolette SandfordPedersen - GuntaNešpore - IngunaSkadiņa + Bolette SandfordPedersen + GuntaNešpore + IngunaSkadiņa Northern European Association for Language Technology (NEALT)
Riga, Latvia
May @@ -8027,7 +8027,7 @@ Invited Paper: When <fixed-case>F</fixed-case>rame<fixed-case>N</fixed-case>et meets a Controlled Natural Language - GuntisBārzdiņš + GuntisBārzdiņš 2–5 W11-4601 barzdins-2011-invited @@ -8041,15 +8041,15 @@ Invited Paper: Discourse Structures and Language Technologies - BonnieWebber + BonnieWebber 12–16 W11-4603 webber-2011-invited Identification of sense selection in regular polysemy using shallow features - Héctor MartínezAlonso - NúriaBel + Héctor MartínezAlonso + NúriaBel BoletteSandford Pedersen 18–25 W11-4604 @@ -8066,7 +8066,7 @@ A <fixed-case>F</fixed-case>rame<fixed-case>N</fixed-case>et for <fixed-case>D</fixed-case>anish - EckhardBick + EckhardBick 34–41 W11-4606 bick-2011-framenet @@ -8080,7 +8080,7 @@ The Formal Patterns of the <fixed-case>L</fixed-case>ithuanian Verb Forms - LoïcBoizou + LoïcBoizou 50–57 W11-4608 boizou-2011-formal @@ -8111,17 +8111,17 @@ Experiments on <fixed-case>L</fixed-case>ithuanian Term Extraction - GintarėGrigonytė + GintarėGrigonytė ErikaRimkutė AndriusUtka - LoicBoizou + LoicBoizou 82–89 W11-4612 grigonyte-etal-2011-experiments Fishing in a Speech Stream – Angling for a Lexicon - Peter JuelHenrichsen + Peter JuelHenrichsen 90–97 W11-4613 henrichsen-2011-fishing @@ -8155,15 +8155,15 @@ What kind of corpus is a web corpus? - Janne BondiJohannessen - Emiliano RaulGuevara + Janne BondiJohannessen + Emiliano RaulGuevara 122–129 W11-4617 johannessen-guevara-2011-kind Morphological analysis of a non-standard language variety - Heiki-JaanKaalep + Heiki-JaanKaalep KadriMuischnek 130–137 W11-4618 @@ -8171,7 +8171,7 @@ Editing Syntax Trees on the Surface - PeterLjunglöf + PeterLjunglöf 138–145 W11-4619 ljunglof-2011-editing @@ -8179,7 +8179,7 @@ Do wordnets also improve human performance on <fixed-case>NLP</fixed-case> tasks? KristiinaMuhonen - KristerLindén + KristerLindén 146–152 W11-4620 muhonen-linden-2011-wordnets @@ -8187,9 +8187,9 @@ Creating Comparable Multimodal Corpora for <fixed-case>N</fixed-case>ordic Languages CostanzaNavarretta - ElisabethAhlsén + ElisabethAhlsén JensAllwood - KristiinaJokinen + KristiinaJokinen PatriziaPaggio 153–160 W11-4621 @@ -8220,8 +8220,8 @@ Combining Statistical Models for <fixed-case>POS</fixed-case> Tagging using Finite-State Calculus - MiikkaSilfverberg - KristerLindén + MiikkaSilfverberg + KristerLindén 183–190 W11-4625 silfverberg-linden-2011-combining @@ -8238,21 +8238,21 @@ Automatic summarization as means of simplifying texts, an evaluation for <fixed-case>S</fixed-case>wedish ChristianSmith - ArneJönsson + ArneJönsson 198–205 W11-4627 smith-jonsson-2011-automatic Using graphical models for <fixed-case>PP</fixed-case> attachment - AndersSøgaard + AndersSøgaard 206–213 W11-4628 sogaard-2011-using Corrective re-synthesis of deviant speech using unit selection - SofiaStrömbergsson + SofiaStrömbergsson 214–217 W11-4629 strombergsson-2011-corrective @@ -8355,14 +8355,14 @@ Knowledge-free Verb Detection through Tag Sequence Alignment - ChristianHänig + ChristianHänig 291–294 W11-4642 hanig-2011-knowledge “Andre ord” – a wordnet browser for the <fixed-case>D</fixed-case>anish wordnet, <fixed-case>D</fixed-case>an<fixed-case>N</fixed-case>et - AndersJohannsen + AndersJohannsen BoletteSandford Pedersen 295–298 W11-4643 @@ -8370,17 +8370,17 @@ Modularisation of <fixed-case>F</fixed-case>innish Finite-State Language Description – Towards Wide Collaboration in Open Source Development of a Morphological Analyser - TommiPirinen + TommiPirinen 299–302 W11-4644 pirinen-2011-modularisation A <fixed-case>P</fixed-case>rague Markup Language profile for the <fixed-case>S</fixed-case>em<fixed-case>T</fixed-case>i-Kamols grammar model - LaumaPretkalniņa + LaumaPretkalniņa GuntaNešpore - KristīneLevāne-Petrova - BaibaSaulīte + KristīneLevāne-Petrova + BaibaSaulīte 303–306 W11-4645 pretkalnina-etal-2011-prague diff --git a/data/xml/W12.xml b/data/xml/W12.xml index 7e251073f8..0464f7e04c 100644 --- a/data/xml/W12.xml +++ b/data/xml/W12.xml @@ -4,9 +4,9 @@ Proceedings of the Joint Workshop on Exploiting Synergies between Information Retrieval and Machine Translation (ESIRMT) and Hybrid Approaches to Machine Translation (HyTra) W12-01 - Marta R.Costa-jussà + Marta R.Costa-jussà PatrikLambert - Rafael E.Banchs + Rafael E.Banchs ReinhardRapp BogdanBabych Association for Computational Linguistics @@ -38,7 +38,7 @@ Full Machine Translation for Factoid Question Answering CristinaEspaña-Bonet - Pere R.Comas + Pere R.Comas 20–29 W12-0103 espana-bonet-comas-2012-full @@ -63,10 +63,10 @@ Combining <fixed-case>EBMT</fixed-case>, <fixed-case>SMT</fixed-case>, <fixed-case>TM</fixed-case> and <fixed-case>IR</fixed-case> Technologies for Quality and Scale - SandipanDandapat + SandipanDandapat SaraMorrissey AndyWay - Josefvan Genabith + Josefvan Genabith 48–58 W12-0106 dandapat-etal-2012-combining @@ -90,7 +90,7 @@ <fixed-case>PLUTO</fixed-case>: Automated Solutions for Patent Translation JohnTinsley - AlexandruCeausu + AlexandruCeausu JianZhang 69–71 W12-0109 @@ -105,14 +105,14 @@ Tree-based Hybrid Machine Translation - Andreas SøeborgKirkedal + Andreas SøeborgKirkedal 77–86 W12-0111 kirkedal-2012-tree Were the clocks striking or surprising? Using <fixed-case>WSD</fixed-case> to improve <fixed-case>MT</fixed-case> performance - ŠpelaVintar + ŠpelaVintar DarjaFišer AljošaVrščaj 87–92 @@ -122,7 +122,7 @@ Bootstrapping Method for Chunk Alignment in Phrase Based <fixed-case>SMT</fixed-case> SantanuPal - SivajiBandyopadhyay + SivajiBandyopadhyay 93–100 W12-0113 pal-bandyopadhyay-2012-bootstrapping @@ -131,9 +131,9 @@ Design of a hybrid high quality machine translation system BogdanBabych KurtEberle - JohannaGeiß - MireiaGinestí-Rosell - AnthonyHartley + JohannaGeiß + MireiaGinestí-Rosell + AnthonyHartley ReinhardRapp SergeSharoff MartinThomas @@ -152,7 +152,7 @@ Linguistically-Augmented <fixed-case>B</fixed-case>ulgarian-to-<fixed-case>E</fixed-case>nglish Statistical Machine Translation Model RuiWang PetyaOsenova - KirilSimov + KirilSimov 119–128 W12-0116 wang-etal-2012-linguistically @@ -160,7 +160,7 @@ Using Sense-labeled Discourse Connectives for Statistical Machine Translation ThomasMeyer - AndreiPopescu-Belis + AndreiPopescu-Belis 129–138 W12-0117 meyer-popescu-belis-2012-using @@ -199,9 +199,9 @@ Lexical Semantics and Distribution of Suffixes - A Visual Analysis ChristianRohrdantz AndreasNiekler - AnnetteHautli + AnnetteHautli MiriamButt - Daniel A.Keim + Daniel A.Keim 7–15 W12-0202 rohrdantz-etal-2012-lexical @@ -237,7 +237,7 @@ Automating Second Language Acquisition Research: Integrating Information Visualisation and Machine Learning HelenYannakoudakis - TedBriscoe + TedBriscoe TheodoraAlexopoulou 35–43 W12-0206 @@ -249,7 +249,7 @@ EkaterinaLapshinova-Koltunski StefaniaDegaetano-Ortlieb HenrikDittmann - ChrisCuly + ChrisCuly 44–48 W12-0207 lyding-etal-2012-visualising @@ -346,11 +346,11 @@ From Character to Word Level: Enabling the Linguistic Analyses of Inputlog Process Data - MariëlleLeijten + MariëlleLeijten LieveMacken - VeroniqueHoste + VeroniqueHoste EricVan Horenbeeck - LuukVan Waes + LuukVan Waes 1–8 W12-0301 leijten-etal-2012-character @@ -383,7 +383,7 @@ CamilleAlbert FloreBarcellini CorinneGrosse - PatrickSaint-Dizier + PatrickSaint-Dizier 35–38 W12-0305 albert-etal-2012-lelie @@ -401,8 +401,8 @@ Proceedings of the Workshop on Computational Approaches to Deception Detection W12-04 - EileenFitzpatrick - JoanBachenko + EileenFitzpatrick + JoanBachenko TommasoFornaciari Association for Computational Linguistics
Avignon, France
@@ -447,7 +447,7 @@ JeffHancock PoornimaPrabhu MyleOtt - ClaireCardie + ClaireCardie 23–30 W12-0404 gokhman-etal-2012-search @@ -463,7 +463,7 @@ On the Use of Homogenous Sets of Subjects in Deceptive Language Analysis TommasoFornaciari - MassimoPoesio + MassimoPoesio 39–47 W12-0406 fornaciari-poesio-2012-use @@ -501,9 +501,9 @@ Pastiche Detection Based on Stopword Rankings. Exposing Impersonators of a <fixed-case>R</fixed-case>omanian Writer - Liviu P.Dinu + Liviu P.Dinu VladNiculae - Maria-OctaviaSulea + Maria-OctaviaSulea 72–77 W12-0411 dinu-etal-2012-pastiche @@ -533,7 +533,7 @@ Identification of Truth and Deception in Text: Application of Vector Space Model to <fixed-case>R</fixed-case>hetorical <fixed-case>S</fixed-case>tructure <fixed-case>T</fixed-case>heory - Victoria L.Rubin + Victoria L.Rubin TatianaVashchilko 97–106 W12-0415 @@ -547,7 +547,7 @@ NataliaGrabar MarieDupuch AmandinePérinet - ThierryHamon + ThierryHamon Association for Computational Linguistics
Avignon, France
April @@ -562,7 +562,7 @@ Experiments on Hybrid Corpus-Based Sentiment Lexicon Acquisition GoranGlavaš JanŠnajder - BojanaDalbelo Bašić + BojanaDalbelo Bašić 1–9 W12-0501 glavas-etal-2012-experiments @@ -578,7 +578,7 @@ Hybrid Combination of Constituency and Dependency Trees into an Ensemble Dependency Parser NathanGreen - ZdeněkŽabokrtský + ZdeněkŽabokrtský 19–26 W12-0503 green-zabokrtsky-2012-hybrid @@ -594,14 +594,14 @@ An Unsupervised and Data-Driven Approach for Spell Checking in <fixed-case>V</fixed-case>ietnamese <fixed-case>OCR</fixed-case>-scanned Texts Cong Duy VuHoang - Ai TiAw + Ai TiAw 36–44 W12-0505 hoang-aw-2012-unsupervised Multilingual Natural Language Processing - RadaMihalcea + RadaMihalcea 45 W12-0506 mihalcea-2012-multilingual @@ -617,16 +617,16 @@ A Joint Named Entity Recognition and Entity Linking System RosaStern - BenoîtSagot - FrédéricBéchet + BenoîtSagot + FrédéricBéchet 52–60 W12-0508 stern-etal-2012-joint Collaborative Annotation of Dialogue Acts: Application of a New <fixed-case>ISO</fixed-case> Standard to the Switchboard Corpus - Alex C.Fang - HarryBunt + Alex C.Fang + HarryBunt JingCao XiaoyueLiu 61–68 @@ -658,7 +658,7 @@ Methods Combination and <fixed-case>ML</fixed-case>-based Re-ranking of Multiple Hypothesis for Question-Answering Systems ArnaudGrappy BrigitteGrau - SophieRosset + SophieRosset 87–96 W12-0512 grappy-etal-2012-methods @@ -667,7 +667,7 @@ A Generalised Hybrid Architecture for <fixed-case>NLP</fixed-case> AlistairWillis HuiYang - AnneDe Roeck + AnneDe Roeck 97–105 W12-0513 willis-etal-2012-generalised @@ -696,7 +696,7 @@ Proceedings of the Workshop on Semantic Analysis in Social Media W12-06 AtefehFarzindar - DianaInkpen + DianaInkpen Association for Computational Linguistics
Avignon, France
April @@ -710,7 +710,7 @@ Unsupervised Part-of-Speech Tagging in Noisy and Esoteric Domains With a Syntactic-Semantic <fixed-case>B</fixed-case>ayesian <fixed-case>HMM</fixed-case> William M.Darling - Michael J.Paul + Michael J.Paul FeiSong 1–9 W12-0601 @@ -751,16 +751,16 @@ A Hybrid Framework for Scalable Opinion Mining in Social Media: Detecting Polarities and Attitude Targets - CarlosRodríguez-Penagos + CarlosRodríguez-Penagos JensGrivolla - JoanCodina-Filba + JoanCodina-Filba 46–52 W12-0606 rodriguez-penagos-etal-2012-hybrid Predicting the 2011 <fixed-case>D</fixed-case>utch Senate Election Results with <fixed-case>T</fixed-case>witter - ErikTjong Kim Sang + ErikTjong Kim Sang JohanBos 53–60 W12-0607 @@ -780,11 +780,11 @@ Proceedings of the Joint Workshop on Unsupervised and Semi-Supervised Learning in NLP W12-07 OmriAbend - ChrisBiemann + ChrisBiemann AnnaKorhonen AriRappoport RoiReichart - AndersSøgaard + AndersSøgaard Association for Computational Linguistics
Avignon, France
April @@ -829,17 +829,17 @@
Improving Distantly Supervised Extraction of Drug-Drug and Protein-Protein Interactions - TamaraBobić + TamaraBobić RomanKlinger PhilippeThomas - MartinHofmann-Apitius + MartinHofmann-Apitius 35–43 W12-0705 bobic-etal-2012-improving Robust Induction of Parts-of-Speech in Child-Directed Language by Co-Clustering of Words and Contexts - Richard E.Leibbrandt + Richard E.Leibbrandt David MWPowers 44–54 W12-0706 @@ -847,7 +847,7 @@ Dependency Parsing Domain Adaptation using Transductive <fixed-case>SVM</fixed-case> - Antonio ValerioMiceli-Barone + Antonio ValerioMiceli-Barone GiuseppeAttardi 55–59 W12-0707 @@ -891,7 +891,7 @@ <fixed-case>TTT</fixed-case>: A Tree Transduction Language for Syntactic and Semantic Processing AdamPurtee - LenhartSchubert + LenhartSchubert 21–30 W12-0803 purtee-schubert-2012-ttt @@ -908,7 +908,7 @@ Proceedings of the Workshop on Computational Models of Language Acquisition and Loss W12-09 - RobertBerwick + RobertBerwick AnnaKorhonen ThierryPoibeau AlineVillavicencio @@ -924,7 +924,7 @@ Distinguishing Contact-Induced Change from Language Drift in Genetically Related Languages - T. MarkEllison + T. MarkEllison LuisaMiceli 1–9 W12-0901 @@ -940,7 +940,7 @@ Probabilistic Models of Grammar Acquisition - MarkSteedman + MarkSteedman 19 W12-0903 steedman-2012-probabilistic @@ -975,8 +975,8 @@ Webservices for <fixed-case>B</fixed-case>ayesian Learning - MuntsaPadró - NúriaBel + MuntsaPadró + NúriaBel 29–31 W12-0907 padro-bel-2012-webservices @@ -984,7 +984,7 @@ Unseen features. Collecting semantic data from congenital blind subjects AlessandroLenci - MarcoBaroni + MarcoBaroni GiovannaMarotta 32 W12-0908 @@ -1011,7 +1011,7 @@ AlineVillavicencio MarcoIdiart CarlosRamisch - VítorAraújo + VítorAraújo BeracahYankama RobertBerwick 43–50 @@ -1030,8 +1030,8 @@ Proceedings of the 6th Workshop on Language Technology for Cultural Heritage, Social Sciences, and Humanities W12-10 - KalliopiZervanou - Antalvan den Bosch + KalliopiZervanou + Antalvan den Bosch Association for Computational Linguistics
Avignon, France
April @@ -1045,7 +1045,7 @@ Lexicon Construction and Corpus Annotation of Historical Language with the <fixed-case>C</fixed-case>o<fixed-case>B</fixed-case>a<fixed-case>LT</fixed-case> Editor TomKenter - TomažErjavec + TomažErjavec MajaŽorga Dulmin DarjaFišer 1–6 @@ -1058,7 +1058,7 @@ JeremyHammond HermanStehouwer AarthySomasundaram - SebastianDrude + SebastianDrude 7–12 W12-1002 dingemanse-etal-2012-high @@ -1066,7 +1066,7 @@ <fixed-case>BAD</fixed-case>: An Assistant tool for making verses in <fixed-case>B</fixed-case>asque ManexAgirrezabal - IñakiAlegria + IñakiAlegria BertolArrieta MansHulden 13–17 @@ -1093,7 +1093,7 @@ Ontology-Based Incremental Annotation of Characters in Folktales ThierryDeclerck NikolinaKoleva - Hans-UlrichKrieger + Hans-UlrichKrieger 30–34 W12-1006 declerck-etal-2012-ontology @@ -1109,7 +1109,7 @@ Distributional techniques for philosophical enquiry - AurélieHerbelot + AurélieHerbelot Evavon Redecker JohannaMüller 45–54 @@ -1128,7 +1128,7 @@ Parsing the Past - Identification of Verb Constructions in Historical Text EvaPettersson - BeátaMegyesi + BeátaMegyesi JoakimNivre 65–74 W12-1010 @@ -1136,7 +1136,7 @@ A Classical <fixed-case>C</fixed-case>hinese Corpus with Nested Part-of-Speech Tags - JohnLee + JohnLee 75–84 W12-1011 lee-2012-classical @@ -1151,11 +1151,11 @@ Enabling the Discovery of Digital Cultural Heritage Objects through <fixed-case>W</fixed-case>ikipedia - Mark MichaelHall - OierLopez de Lacalle - AitorSoroa Etxabe - PaulClough - EnekoAgirre + Mark MichaelHall + OierLopez de Lacalle + AitorSoroa Etxabe + PaulClough + EnekoAgirre 94–100 W12-1013 hall-etal-2012-enabling @@ -1171,8 +1171,8 @@ Natural Language Inspired Approach for Handwritten Text Line Detection in Legacy Documents VicenteBosch - Alejandro HéctorToselli - EnriqueVidal + Alejandro HéctorToselli + EnriqueVidal 107–111 W12-1015 bosch-etal-2012-natural @@ -1192,7 +1192,7 @@ W12-11 CyrilGrouin DominicForest - GillesSérasset + GillesSérasset ATALA/AFCP
Grenoble, France
June @@ -1205,8 +1205,8 @@ Indexation libre et contrôlée d’articles scientifiques. Présentation et résultats du défi fouille de textes <fixed-case>DEFT</fixed-case>2012 (Controlled and free indexing of scientific papers. Presentation and results of the <fixed-case>DEFT</fixed-case>2012 text-mining challenge) [in <fixed-case>F</fixed-case>rench] - PatrickParoubek - PierreZweigenbaum + PatrickParoubek + PierreZweigenbaum DominicForest CyrilGrouin 1–13 @@ -1217,14 +1217,14 @@ Key-concept extraction from <fixed-case>F</fixed-case>rench articles with <fixed-case>KX</fixed-case> SaraTonelli ElenaCabrio - EmanuelePianta + EmanuelePianta 15–24 W12-1102 tonelli-etal-2012-key Acquisition terminologique pour identifier les mots-clés d’articles scientifiques (Terminological acquisition for identifying keywords of scientific articles) [in <fixed-case>F</fixed-case>rench] - ThierryHamon + ThierryHamon 25–31 W12-1103 hamon-2012-acquisition @@ -1233,7 +1233,7 @@ Indexation à base des syntagmes nominaux (Nominal-chunk based indexing) [in <fixed-case>F</fixed-case>rench] AmineAmri MarouaMbarek - ChediBechikh + ChediBechikh ChirazLatiri HatemHaddad 33–39 @@ -1246,7 +1246,7 @@ MathieuBoucher RomainBrixtel GaëlLejeune - GaëlDias + GaëlDias 41–48 W12-1105 doualan-etal-2012-detection @@ -1264,7 +1264,7 @@ FlorianBoudin AmirHazem NicolasHernandez - PrajolShrestha + PrajolShrestha 61–68 W12-1107 boudin-etal-2012-participation @@ -1282,8 +1282,8 @@ Enrichir et raisonner sur des espaces sémantiques pour l’attribution de mots-clés (Enriching and reasoning on semantic spaces for keyword extraction) [in <fixed-case>F</fixed-case>rench] - AdilEl Ghali - DanielHromada + AdilEl Ghali + DanielHromada KaoutarEl Ghali 77–90 W12-1109 @@ -1294,9 +1294,9 @@ JEP-TALN-RECITAL 2012, Workshop DEGELS 2012: Défi GEste Langue des Signes (DEGELS 2012: Gestures and Sign Language Challenge) W12-12 - AnneliesBraffort + AnneliesBraffort LeïlaBoutora - GillesSérasset + GillesSérasset ATALA/AFCP
Grenoble, France
June @@ -1317,7 +1317,7 @@
Critères de segmentation de la gestualité co-verbale (Segmentation criteria for the annotation of co-speech gestures) [in <fixed-case>F</fixed-case>rench] - GaëlleFerré + GaëlleFerré 9–21 W12-1202 ferre-2012-criteres @@ -1351,7 +1351,7 @@ Influence de la segmentation temporelle sur la caractérisation de signes (Influence of the temporal segmentation on the sign characterization) [in <fixed-case>F</fixed-case>rench] FrançoisLefebvre-Albaret - JérémieSegouat + JérémieSegouat 73–83 W12-1206 lefebvre-albaret-segouat-2012-influence @@ -1375,9 +1375,9 @@ JEP-TALN-RECITAL 2012, Workshop TALAf 2012: Traitement Automatique des Langues Africaines (TALAf 2012: African Language Processing) W12-13 - ChantalEnguehard - MathieuMangeot - GillesSérasset + ChantalEnguehard + MathieuMangeot + GillesSérasset ATALA/AFCP
Grenoble, France
June @@ -1392,8 +1392,8 @@ <fixed-case>M</fixed-case>bochi : corpus oral, traitement automatique et exploration phonologique (<fixed-case>M</fixed-case>boshi: oral corpus, automatic processing & phonological mining) [in <fixed-case>F</fixed-case>rench] AnnieRialland MartialEmbanga Aborobongui - MartineAdda-Decker - LoriLamel + MartineAdda-Decker + LoriLamel 1–12 W12-1301 rialland-etal-2012-mbochi @@ -1432,8 +1432,8 @@ Analyse des performances de modèles de langage sub-lexicale pour des langues peu-dotées à morphologie riche (Performance analysis of sub-word language modeling for under-resourced languages with rich morphology: case study on <fixed-case>S</fixed-case>wahili and <fixed-case>A</fixed-case>mharic) [in <fixed-case>F</fixed-case>rench] HadrienGelas - Solomon TeferraAbate - LaurentBesacier + Solomon TeferraAbate + LaurentBesacier FrançoisPellegrino 53–62 W12-1305 @@ -1466,7 +1466,7 @@ Décrire la morphologie des verbes en ikota au moyen d’une métagrammaire (Describing the Morphology of Verbs in Ikota using a Metagrammar) [in <fixed-case>F</fixed-case>rench] DenysDuchier - Brunelle MagnanaEkoukou + Brunelle MagnanaEkoukou YannickParmentier SimonPetitjean EmmanuelSchang @@ -1478,7 +1478,7 @@ Extraction de lexiques bilingues à partir de Wikipédia (Bilingual lexicon extraction from <fixed-case>W</fixed-case>ikipedia) [in <fixed-case>F</fixed-case>rench] RahmaSellami FatihaSadat - LamiaHadrich Belguith + LamiaHadrich Belguith 107–117 W12-1310 sellami-etal-2012-extraction @@ -1490,7 +1490,7 @@ W12-14 FrançoisPortet MichelVacher - GillesSérasset + GillesSérasset ATALA/AFCP
Grenoble, France
June @@ -1561,7 +1561,7 @@ INLG 2012 Proceedings of the Seventh International Natural Language Generation Conference W12-15 BarbaraDi Eugenio - SusanMcRoy + SusanMcRoy Association for Computational Linguistics
Utica, IL
May @@ -1574,14 +1574,14 @@ Natural Language Generation and Assistive Technologies - KathleenMcCoy + KathleenMcCoy 1 W12-1501 mccoy-2012-natural Expressive <fixed-case>NLG</fixed-case> for Next-Generation Learning Environments: Language, Affect, and Narrative - JamesLester + JamesLester 2 W12-1502 lester-2012-expressive @@ -1589,8 +1589,8 @@ Learning Preferences for Referring Expression Generation: Effects of Domain, Language and Algorithm KoolenRuud - KrahmerEmiel - TheuneMariët + EmielKrahmer + MariëtTheune 3–11 W12-1503 koolen-etal-2012-learning @@ -1608,10 +1608,10 @@ <fixed-case>M</fixed-case>ink<fixed-case>A</fixed-case>pp: Generating Spatio-temporal Summaries for Nature Conservation Volunteers NavaTintarev YolandaMelero - SomayajuluSripada + SomayajuluSripada ElizabethTait - ReneVan Der Wal - ChrisMellish + ReneVan Der Wal + ChrisMellish 17–21 W12-1505 tintarev-etal-2012-minkapp @@ -1628,7 +1628,7 @@ Generation for Grammar Engineering ClaireGardent - GermanKruszewski + GermanKruszewski 31–39 W12-1507 gardent-kruszewski-2012-generation @@ -1637,7 +1637,7 @@ Perceptions of Alignment and Personality in Generated Dialogue AlastairGill CarstenBrockmann - JonOberlander + JonOberlander 40–48 W12-1508 gill-etal-2012-perceptions @@ -1645,7 +1645,7 @@ Optimising Incremental Generation for Spoken Dialogue Systems: Reducing the Need for Fillers NinaDethlefs - HelenHastie + HelenHastie VerenaRieser OliverLemon 49–58 @@ -1677,7 +1677,7 @@ Extractive email thread summarization: Can we do better than He Said She Said? - PabloDuboue + PabloDuboue 85–89 W12-1513 duboue-2012-extractive @@ -1692,7 +1692,7 @@ Reformulating student contributions in tutorial dialogue - PamelaJordan + PamelaJordan SandraKatz PatriciaAlbacete MichaelFord @@ -1711,7 +1711,7 @@ Sign Language Generation with Expert Systems and <fixed-case>CCG</fixed-case> - AlessandroMazzei + AlessandroMazzei 105–109 W12-1517 mazzei-2012-sign @@ -1738,10 +1738,10 @@ Blogging birds: Generating narratives about reintroduced species to promote public engagement AdvaithSiddharthan - MatthewGreen - Keesvan Deemter - ChrisMellish - Renévan der Wal + MatthewGreen + Keesvan Deemter + ChrisMellish + Renévan der Wal 120–124 W12-1520 siddharthan-etal-2012-blogging @@ -1786,11 +1786,11 @@ The Surface Realisation Task: Recent Developments and Future Plans - AnjaBelz + AnjaBelz BerndBohnet SimonMille LeoWanner - MichaelWhite + MichaelWhite 136–140 W12-1525 belz-etal-2012-surface @@ -1799,7 +1799,7 @@ <fixed-case>KBG</fixed-case>en – Text Generation from Knowledge Bases as a New Shared Task EvaBanik ClaireGardent - DoniaScott + DoniaScott NikhilDinesh FennieLiang 141–145 @@ -1811,14 +1811,14 @@ NadjetBouayad-Agha GerardCasamayor LeoWanner - ChrisMellish + ChrisMellish 146–149 W12-1527 bouayad-agha-etal-2012-content Shared Task Proposal: Syntactic Paraphrase Ranking - MichaelWhite + MichaelWhite 150–153 W12-1528 white-2012-shared @@ -1828,10 +1828,10 @@ Proceedings of the 13th Annual Meeting of the Special Interest Group on Discourse and Dialogue W12-16 - Gary GeunbaeLee + Gary GeunbaeLee JonathanGinzburg ClaireGardent - AmandaStent + AmandaStent Association for Computational Linguistics
Seoul, South Korea
July @@ -1851,7 +1851,7 @@
An End-to-End Evaluation of Two Situated Dialog Systems - Lina M.Rojas-Barahona + Lina M.Rojas-Barahona AlejandraLorenzo ClaireGardent 10–19 @@ -1863,7 +1863,7 @@ William YangWang SamanthaFinkelstein AmyOgan - Alan WBlack + Alan WBlack JustineCassell 20–29 W12-1603 @@ -1874,7 +1874,7 @@ AlexanderKoller KonstantinaGaroufi MariaStaudte - MatthewCrocker + MatthewCrocker 30–39 W12-1604 koller-etal-2012-enhancing @@ -1882,14 +1882,14 @@ Unsupervised Topic Modeling Approaches to Decision Summarization in Spoken Meetings LuWang - ClaireCardie + ClaireCardie 40–49 W12-1605 wang-cardie-2012-unsupervised An Unsupervised Approach to User Simulation: Toward Self-Improving Dialog Systems - SungjinLee + SungjinLee MaxineEskenazi 50–59 W12-1606 @@ -1899,27 +1899,27 @@ Hierarchical Conversation Structure Prediction in Multi-Party Chat ElijahMayfield DavidAdamson - CarolynPenstein Rosé + CarolynPenstein Rosé 60–69 W12-1607 mayfield-etal-2012-hierarchical Rapid Development Process of Spoken Dialogue Systems using Collaboratively Constructed Semantic Resources - MasahiroAraki + MasahiroAraki 70–73 W12-1608 araki-2012-rapid The Effect of Cognitive Load on a Statistical Dialogue System - MilicaGašić - PirrosTsiakoulis + MilicaGašić + PirrosTsiakoulis MatthewHenderson BlaiseThomson KaiYu EliTzirkel - SteveYoung + SteveYoung 74–78 W12-1609 gasic-etal-2012-effect @@ -1929,7 +1929,7 @@ ChristineHowes MatthewPurver RoseMcCabe - Patrick G. T.Healey + Patrick G. T.Healey MaryLavelle 79–83 W12-1610 @@ -1940,16 +1940,16 @@ TeruhisaMisu KallirroiGeorgila AntonLeuski - DavidTraum + DavidTraum 84–93 W12-1611 misu-etal-2012-reinforcement From Strangers to Partners: Examining Convergence within a Longitudinal Study of Task-Oriented Dialogue - Christopher M.Mitchell - Kristy ElizabethBoyer - James C.Lester + Christopher M.Mitchell + Kristy ElizabethBoyer + James C.Lester 94–98 W12-1612 mitchell-etal-2012-strangers @@ -1957,7 +1957,7 @@ The Structure and Generality of Spoken Route Instructions AasishPappu - AlexanderRudnicky + AlexanderRudnicky 99–107 W12-1613 pappu-rudnicky-2012-structure @@ -1965,15 +1965,15 @@ Improving Implicit Discourse Relation Recognition Through Feature Set Optimization JoonsukPark - ClaireCardie + ClaireCardie 108–112 W12-1614 park-cardie-2012-improving A Temporal Simulator for Developing Turn-Taking Methods for Spoken Dialogue Systems - Ethan O.Selfridge - Peter A.Heeman + Ethan O.Selfridge + Peter A.Heeman 113–117 W12-1615 selfridge-heeman-2012-temporal @@ -1989,7 +1989,7 @@ Estimating Adaptation of Dialogue Partners with Different Verbal Intelligence KseniyaZablotskaya - FernandoFernández-Martínez + FernandoFernández-Martínez WolfgangMinker 126–130 W12-1617 @@ -1998,20 +1998,20 @@ A Demonstration of Incremental Speech Understanding and Confidence Estimation in a Virtual Human Dialogue System DavidDeVault - DavidTraum + DavidTraum 131–133 W12-1618 devault-traum-2012-demonstration Integrating Location, Visibility, and Question-Answering in a Spoken Dialogue System for Pedestrian City Exploration - SrinivasanJanarthanam + SrinivasanJanarthanam OliverLemon XingkunLiu PhilBartie WilliamMackaness TiphaineDalmas - JanaGoetze + JanaGoetze 134–136 W12-1619 janarthanam-etal-2012-integrating @@ -2022,8 +2022,8 @@ EricForbell DavidDeVault KenjiSagae - DavidTraum - AlbertRizzo + DavidTraum + AlbertRizzo 137–139 W12-1620 morbini-etal-2012-mixed @@ -2032,7 +2032,7 @@ Towards Mediating Shared Perceptual Basis in Situated Dialogue ChangsongLiu RuiFang - JoyceChai + JoyceChai 140–149 W12-1621 liu-etal-2012-towards @@ -2048,8 +2048,8 @@ A Reranking Model for Discourse Segmentation using Subtree Features - NgoXuan Bach - NguyenLe Minh + NgoXuan Bach + NguyenLe Minh AkiraShimazu 160–168 W12-1623 @@ -2074,7 +2074,7 @@ Exploiting Machine-Transcribed Dialog Corpus to Improve Multiple Dialog States Tracking Methods - SungjinLee + SungjinLee MaxineEskenazi 189–196 W12-1626 @@ -2082,14 +2082,14 @@ Cohesion, Entrainment and Task Success in Educational Dialog - DianeLitman + DianeLitman 197 W12-1627 litman-2012-cohesion A Bottom-Up Exploration of the Dimensions of Dialog State in Spoken Interaction - Nigel G.Ward + Nigel G.Ward AlejandroVega 198–206 W12-1628 @@ -2098,7 +2098,7 @@ Using Group History to Identify Character-Directed Utterances in Multi-Child Interactions HannanehHajishirzi - Jill F.Lehman + Jill F.Lehman Jessica K.Hodgins 207–216 W12-1629 @@ -2106,8 +2106,8 @@ Adapting to Multiple Affective States in Spoken Dialogue - KateForbes-Riley - DianeLitman + KateForbes-Riley + DianeLitman 217–226 W12-1630 forbes-riley-litman-2012-adapting @@ -2141,11 +2141,11 @@ Combining Verbal and Nonverbal Features to Overcome the “Information Gap” in Task-Oriented Dialogue - Eun YoungHa - Joseph F.Grafsgaard - ChristopherMitchell - Kristy ElizabethBoyer - James C.Lester + Eun YoungHa + Joseph F.Grafsgaard + ChristopherMitchell + Kristy ElizabethBoyer + James C.Lester 247–256 W12-1634 ha-etal-2012-combining @@ -2153,7 +2153,7 @@ Semantic Specificity in Spoken Dialogue Requests BenHixon - Rebecca J.Passonneau + Rebecca J.Passonneau Susan L.Epstein 257–260 W12-1635 @@ -2177,10 +2177,10 @@ Integrating Incremental Speech Recognition and <fixed-case>POMDP</fixed-case>-Based Dialogue Systems - Ethan O.Selfridge + Ethan O.Selfridge IkerArizmendi - Peter A.Heeman - Jason D.Williams + Peter A.Heeman + Jason D.Williams 275–279 W12-1638 selfridge-etal-2012-integrating @@ -2216,14 +2216,14 @@ Focused Meeting Summarization via Unsupervised Relation Extraction LuWang - ClaireCardie + ClaireCardie 304–313 W12-1642 wang-cardie-2012-focused <fixed-case>M</fixed-case>arkov <fixed-case>L</fixed-case>ogic <fixed-case>N</fixed-case>etworks for Situated Incremental Natural Language Understanding - CaseyKennington + CaseyKennington DavidSchlangen 314–323 W12-1643 @@ -2235,7 +2235,7 @@ Proceedings of the 3rd Workshop on Cognitive Modeling and Computational Linguistics (CMCL 2012) W12-17 DavidReitter - RogerLevy + RogerLevy Association for Computational Linguistics
Montréal, Canada
June @@ -2283,7 +2283,7 @@
Connectionist-Inspired Incremental <fixed-case>PCFG</fixed-case> Parsing - Martenvan Schijndel + Martenvan Schijndel AndyExley WilliamSchuler 51–60 @@ -2292,7 +2292,7 @@ Sequential vs. Hierarchical Syntactic Models of Human Incremental Sentence Processing - VictoriaFossum + VictoriaFossum RogerLevy 61–69 W12-1706 @@ -2302,7 +2302,7 @@ Modeling covert event retrieval in logical metonymy: probabilistic and distributional accounts AlessandraZarcone JasonUtt - SebastianPadó + SebastianPadó 70–79 W12-1707 zarcone-etal-2012-modeling @@ -2322,8 +2322,8 @@ NAACL-HLT Workshop on Future directions and needs in the Spoken Dialog Community: Tools and Data (SDCTD 2012) W12-18 MaxineEskenazi - AlanBlack - DavidTraum + AlanBlack + DavidTraum Association for Computational Linguistics
Montréal, Canada
June @@ -2337,7 +2337,7 @@ Up from Limited Dialog Systems! GiuseppeRiccardi - PhilippCimiano + PhilippCimiano AlexandrosPotamianos ChristinaUnger 1–2 @@ -2346,7 +2346,7 @@ Directions for Research on Spoken Dialog Systems, Broadly Defined - Nigel G.Ward + Nigel G.Ward 3–4 W12-1802 ward-2012-directions @@ -2386,7 +2386,7 @@ Towards Situated Collaboration - DanBohus + DanBohus EceKamar EricHorvitz 13–14 @@ -2395,7 +2395,7 @@ Incremental Spoken Dialogue Systems: Tools and Data - HelenHastie + HelenHastie OliverLemon NinaDethlefs 15–16 @@ -2404,7 +2404,7 @@ After Dialog Went Pervasive: Separating Dialog Behavior Modeling and Task Modeling - AmandaStent + AmandaStent 17–18 W12-1809 stent-2012-dialog @@ -2419,21 +2419,21 @@ Bridging Gaps for Spoken Dialog System Frameworks in Instructional Settings - Gina-AnneLevow + Gina-AnneLevow 21–22 W12-1811 levow-2012-bridging A belief tracking challenge task for spoken dialog systems - JasonWilliams + JasonWilliams 23–24 W12-1812 williams-2012-belief Framework for the Development of Spoken Dialogue System based on Collaboratively Constructed Semantic Resources - MasahiroAraki + MasahiroAraki DaisukeTakegoshi 25–28 W12-1813 @@ -2457,8 +2457,8 @@ Mining Search Query Logs for Spoken Language Understanding - DilekHakkani-Tür - GokhanTür + DilekHakkani-Tür + GokhanTür AsliCelikyilmaz 37–40 W12-1816 @@ -2479,8 +2479,8 @@ One Year of Contender: What Have We Learned about Assessing and Tuning Industrial Spoken Dialog Systems? - DavidSuendermann - RobertoPieraccini + DavidSuendermann + RobertoPieraccini 45–48 W12-1818 suendermann-pieraccini-2012-one @@ -2499,9 +2499,9 @@ Proceedings of the NAACL-HLT Workshop on the Induction of Linguistic Structure W12-19 - TrevorCohn - PhilBlunsom - JoaoGraca + TrevorCohn + PhilBlunsom + JoaoGraca Association for Computational Linguistics
Montréal, Canada
June @@ -2535,9 +2535,9 @@
Capitalization Cues Improve Dependency Grammar Induction - Valentin I.Spitkovsky - HiyanAlshawi - DanielJurafsky + Valentin I.Spitkovsky + HiyanAlshawi + DanielJurafsky 16–22 W12-1903 spitkovsky-etal-2012-capitalization @@ -2554,7 +2554,7 @@ Exploiting Partial Annotations with <fixed-case>EM</fixed-case> Training DirkHovy - EduardHovy + EduardHovy 31–38 W12-1905 hovy-hovy-2012-exploiting @@ -2594,7 +2594,7 @@ Two baselines for unsupervised dependency parsing - AndersSøgaard + AndersSøgaard 81–83 W12-1910 sogaard-2012-two @@ -2602,7 +2602,7 @@ Unsupervised Dependency Parsing using Reducibility and Fertility features DavidMareček - ZdeněkŽabokrtský + ZdeněkŽabokrtský 84–89 W12-1911 marecek-zabokrtsky-2012-unsupervised @@ -2618,15 +2618,15 @@ Turning the pipeline into a loop: Iterated unsupervised dependency parsing and <fixed-case>P</fixed-case>o<fixed-case>S</fixed-case> induction ChristosChristodoulopoulos - SharonGoldwater - MarkSteedman + SharonGoldwater + MarkSteedman 96–99 W12-1913 christodoulopoulos-etal-2012-turning Hierarchical clustering of word class distributions - GrzegorzChrupała + GrzegorzChrupała 100–104 W12-1914 chrupala-2012-hierarchical @@ -2643,7 +2643,7 @@ Proceedings of the Seventh Workshop on Building Educational Applications Using NLP W12-20 - JoelTetreault + JoelTetreault JillBurstein ClaudiaLeacock Association for Computational Linguistics @@ -2658,10 +2658,10 @@ Question Ranking and Selection in Tutorial Dialogues - LeeBecker - MarthaPalmer + LeeBecker + MarthaPalmer Sarelvan Vuuren - WayneWard + WayneWard 1–11 W12-2001 becker-etal-2012-question @@ -2670,9 +2670,9 @@ Identifying science concepts and student misconceptions in an interactive essay writing tutor StevenBethard IfeyinwaOkoye - Md. ArafatSultan + Md. ArafatSultan HaojieHang - James H.Martin + James H.Martin TamaraSumner 12–21 W12-2002 @@ -2680,7 +2680,7 @@ Automatic Grading of Scientific Inquiry - AvirupSil + AvirupSil AngelaShelton Diane JassKetelhut AlexanderYates @@ -2691,7 +2691,7 @@ Modeling coherence in <fixed-case>ESOL</fixed-case> learner texts HelenYannakoudakis - TedBriscoe + TedBriscoe 33–43 W12-2004 yannakoudakis-briscoe-2012-modeling @@ -2700,7 +2700,7 @@ Exploring Grammatical Error Correction with Not-So-Crummy Machine Translation NitinMadnani JoelTetreault - MartinChodorow + MartinChodorow 44–53 W12-2005 madnani-etal-2012-exploring @@ -2731,11 +2731,11 @@ <fixed-case>PREFER</fixed-case>: Using a Graph-Based Approach to Generate Paraphrases for Language Learning - Mei-HuaChen - Shi-TingHuang - Chung-ChiHuang + Mei-HuaChen + Shi-TingHuang + Chung-ChiHuang Hsien-ChinLiou - Jason S.Chang + Jason S.Chang 80–85 W12-2009 chen-etal-2012-prefer @@ -2751,7 +2751,7 @@ Predicting Learner Levels for Online Exercises of <fixed-case>H</fixed-case>ebrew MarkusDickinson - SandraKübler + SandraKübler AnthonyMeyer 95–104 W12-2011 @@ -2778,14 +2778,14 @@ Scoring Spoken Responses Based on Content Accuracy FeiHuang LeiChen - JanaSukkarieh + JanaSukkarieh 122–126 W12-2014 huang-etal-2012-scoring Developing <fixed-case>ARET</fixed-case>: An <fixed-case>NLP</fixed-case>-based Educational Tool Set for <fixed-case>A</fixed-case>rabic Reading Enhancement - MohammedMaamouri + MohammedMaamouri WajdiZaghouani ViolettaCavalli-Sforza DaveGraff @@ -2806,7 +2806,7 @@ Generating Grammar Exercises LauraPerez-Beltrachini ClaireGardent - GermanKruszewski + GermanKruszewski 147–156 W12-2017 perez-beltrachini-etal-2012-generating @@ -2822,7 +2822,7 @@ On Improving the Accuracy of Readability Classification using Insights from Second Language Acquisition SowmyaVajjala - DetmarMeurers + DetmarMeurers 163–173 W12-2019 vajjala-meurers-2012-improving @@ -2830,7 +2830,7 @@ An Interactive Analytic Tool for Peer-Review Exploration WentingXiong - DianeLitman + DianeLitman JingtaoWang ChristianSchunn 174–179 @@ -2839,7 +2839,7 @@ Vocabulary Profile as a Measure of Vocabulary Sophistication - Su-YounYoon + Su-YounYoon SumaBhat KlausZechner 180–189 @@ -2850,7 +2850,7 @@ Short Answer Assessment: Establishing Links Between Research Strands RamonZiai NielsOtt - DetmarMeurers + DetmarMeurers 190–200 W12-2022 ziai-etal-2012-short @@ -2860,7 +2860,7 @@ PinakiBhaskar AniruddhaGhosh SantanuPal - SivajiBandyopadhyay + SivajiBandyopadhyay 201–207 W12-2023 bhaskar-etal-2012-detection @@ -2869,7 +2869,7 @@ Informing Determiner and Preposition Error Correction with Hierarchical Word Clustering AdrianeBoyd MarionZepf - DetmarMeurers + DetmarMeurers 208–215 W12-2024 boyd-etal-2012-informing @@ -2885,7 +2885,7 @@ <fixed-case>VTEX</fixed-case> Determiner and Preposition Correction System for the <fixed-case>HOO</fixed-case> 2012 Shared Task - VidasDaudaravičius + VidasDaudaravičius 225–232 W12-2026 daudaravicius-2012-vtex @@ -2902,8 +2902,8 @@ <fixed-case>HOO</fixed-case> 2012 Error Recognition and Correction Shared Task: <fixed-case>C</fixed-case>ambridge <fixed-case>U</fixed-case>niversity Submission Report EkaterinaKochmar - ØisteinAndersen - TedBriscoe + ØisteinAndersen + TedBriscoe 242–250 W12-2028 kochmar-etal-2012-hoo @@ -2912,7 +2912,7 @@ <fixed-case>K</fixed-case>orea <fixed-case>U</fixed-case>niversity System in the <fixed-case>HOO</fixed-case> 2012 Shared Task JieunLee Jung-TaeLee - Hae-ChangRim + Hae-ChangRim 251–256 W12-2029 lee-etal-2012-korea @@ -2930,7 +2930,7 @@ <fixed-case>KU</fixed-case> Leuven at <fixed-case>HOO</fixed-case>-2012: A Hybrid Approach to Detection and Correction of Determiner and Preposition Errors in Non-native <fixed-case>E</fixed-case>nglish Text LiQuan OleksandrKolomiyets - Marie-FrancineMoens + Marie-FrancineMoens 263–271 W12-2031 quan-etal-2012-ku @@ -2952,14 +2952,14 @@ LisKanashiro TomoyaMizumoto MamoruKomachi - YujiMatsumoto + YujiMatsumoto 281–288 W12-2033 sakaguchi-etal-2012-naist Memory-based text correction for preposition and determiner errors - Antalvan den Bosch + Antalvan den Bosch PeterBerck 289–294 W12-2034 @@ -2967,12 +2967,12 @@ Helping Our Own: <fixed-case>NTHU</fixed-case> <fixed-case>NLPLAB</fixed-case> System Description - Jian-ChengWu - JosephChang - Yi-ChunChen - Shih-TingHuang - Mei-HuaChen - Jason S.Chang + Jian-ChengWu + JosephChang + Yi-ChunChen + Shih-TingHuang + Mei-HuaChen + Jason S.Chang 295–301 W12-2035 wu-etal-2012-helping @@ -2987,8 +2987,8 @@ Crowdsourced Comprehension: Predicting Prerequisite Structure in <fixed-case>W</fixed-case>ikipedia - ParthaTalukdar - WilliamCohen + ParthaTalukdar + WilliamCohen 307–315 W12-2037 talukdar-cohen-2012-crowdsourced @@ -3005,7 +3005,7 @@ Evaluating the Meaning of Answers to Reading Comprehension Questions: A Semantics-Based Approach MichaelHahn - DetmarMeurers + DetmarMeurers 326–336 W12-2039 hahn-meurers-2012-evaluating @@ -3016,7 +3016,7 @@ Proceedings of the Second Workshop on Language in Social Media W12-21 Sara OwsleySood - MeenakshiNagarajan + MeenakshiNagarajan MichaelGamon Association for Computational Linguistics
Montréal, Canada
@@ -3031,7 +3031,7 @@ Analyzing <fixed-case>U</fixed-case>rdu Social Media for Sentiments using Transfer Learning with Controlled Translations SmruthiMukund - RohiniSrihari + RohiniSrihari 1–8 W12-2101 mukund-srihari-2012-analyzing @@ -3039,8 +3039,8 @@ Detecting Distressed and Non-distressed Affect States in Short Forum Texts MichaelThaul Lehrman - CeciliaOvesdotter Alm - Rubén A.Proaño + CeciliaOvesdotter Alm + Rubén A.Proaño 9–18 W12-2102 thaul-lehrman-etal-2012-detecting @@ -3048,7 +3048,7 @@ Detecting Hate Speech on the World Wide Web WilliamWarner - JuliaHirschberg + JuliaHirschberg 19–26 W12-2103 warner-hirschberg-2012-detecting @@ -3070,8 +3070,8 @@ OrBiran SaraRosenthal JacobAndreas - KathleenMcKeown - OwenRambow + KathleenMcKeown + OwenRambow 37–45 W12-2105 biran-etal-2012-detecting @@ -3088,9 +3088,9 @@ Robust kaomoji detection in <fixed-case>T</fixed-case>witter StevenBedrick - RussellBeckley + RussellBeckley BrianRoark - RichardSproat + RichardSproat 56–64 W12-2107 bedrick-etal-2012-robust @@ -3146,7 +3146,7 @@ Towards Automatic Lexical Simplification in <fixed-case>S</fixed-case>panish: An Empirical Study - BiljanaDrndarević + BiljanaDrndarević HoracioSaggion 8–16 W12-2202 @@ -3184,8 +3184,8 @@ Making Readability Indices Readable SaraTonelli - KeTran Manh - EmanuelePianta + KeTran Manh + EmanuelePianta 40–48 W12-2206 tonelli-etal-2012-making @@ -3202,7 +3202,7 @@ Comparing human versus automatic feature extraction for fine-grained elementary readability assessment YiMa RituSingh - EricFosler-Lussier + EricFosler-Lussier RobertLofthus 58–64 W12-2208 @@ -3213,7 +3213,7 @@ Proceedings of the Twelfth Meeting of the Special Interest Group on Computational Morphology and Phonology W12-23 - LynneCahill + LynneCahill AdamAlbright Association for Computational Linguistics
Montréal, Canada
@@ -3238,7 +3238,7 @@ <fixed-case>H</fixed-case>indi Derivational Morphological Analyzer NikhilKanuparthi AbhilashInumella - DiptiMisra Sharma + DiptiMisra Sharma 10–16 W12-2302 kanuparthi-etal-2012-hindi @@ -3287,7 +3287,7 @@ JoePater RobertStaubs KarenJesney - BrianSmith + BrianSmith 62–71 W12-2308 pater-etal-2012-learning @@ -3304,12 +3304,12 @@ BioNLP: Proceedings of the 2012 Workshop on Biomedical Natural Language Processing W12-24 - Kevin B.Cohen + Kevin B.Cohen DinaDemner-Fushman SophiaAnaniadou - BonnieWebber - Jun’ichiTsujii - JohnPestian + BonnieWebber + Jun’ichiTsujii + JohnPestian Association for Computational Linguistics
Montréal, Canada
June @@ -3322,7 +3322,7 @@ Graph-based alignment of narratives for automated neurological assessment - EmilyPrud’hommeaux + EmilyPrud’hommeaux BrianRoark 1–10 W12-2401 @@ -3331,7 +3331,7 @@ Bootstrapping Biomedical Ontologies for Scientific Text using <fixed-case>NELL</fixed-case> DanaMovshovitz-Attias - William W.Cohen + William W.Cohen 11–19 W12-2402 movshovitz-attias-cohen-2012-bootstrapping @@ -3340,7 +3340,7 @@ Semantic distance and terminology structuring methods for the detection of semantically close terms MarieDupuch LaëtitiaDupuch - ThierryHamon + ThierryHamon NataliaGrabar 20–28 W12-2403 @@ -3349,8 +3349,8 @@ Temporal Classification of Medical Events PreethiRaghavan - EricFosler-Lussier - AlbertLai + EricFosler-Lussier + AlbertLai 29–37 W12-2404 raghavan-etal-2012-temporal @@ -3368,33 +3368,33 @@ Alignment-<fixed-case>HMM</fixed-case>-based Extraction of Abbreviations from Biomedical Text DanaMovshovitz-Attias - William W.Cohen + William W.Cohen 47–55 W12-2406 movshovitz-attias-cohen-2012-alignment Medical diagnosis lost in translation – Analysis of uncertainty and negation expressions in <fixed-case>E</fixed-case>nglish and <fixed-case>S</fixed-case>wedish clinical texts - Danielle LMowery + Danielle LMowery SumithraVelupillai - Wendy WChapman + Wendy WChapman 56–64 W12-2407 mowery-etal-2012-medical A Hybrid Stepwise Approach for De-identifying Person Names in Clinical Documents - OscarFerrández + OscarFerrández BrettSouth ShuyingShen - StéphaneMeystre + StéphaneMeystre 65–72 W12-2408 ferrandez-etal-2012-hybrid Active Learning for Coreference Resolution - TimothyMiller + TimothyMiller DmitriyDligach GuerganaSavova 73–81 @@ -3417,7 +3417,7 @@ An improved corpus of disease mentions in <fixed-case>P</fixed-case>ub<fixed-case>M</fixed-case>ed citations - RezartaIslamaj Doğan + RezartaIslamaj Doğan ZhiyongLu 91–99 W12-2411 @@ -3436,7 +3436,7 @@ Combining Compositionality and Pagerank for the Identification of Semantic Relations between Biomedical Words - ThierryHamon + ThierryHamon ChristopherEngström MouniraManser ZinaBadji @@ -3462,7 +3462,7 @@ MichaelShafir MichaelCrivaro BensiinBorukhov - MarieMeteer + MarieMeteer 122–129 W12-2415 thamrongrattanarit-etal-2012-nlp @@ -3474,14 +3474,14 @@ JianweiLeng TylerForbush ScottDuVall - WendyChapman + WendyChapman 130–139 W12-2416 south-etal-2012-prototype <fixed-case>M</fixed-case>ed<fixed-case>L</fixed-case>ing<fixed-case>M</fixed-case>ap: A growing resource mapping the Bio-Medical <fixed-case>NLP</fixed-case> field - MarieMeteer + MarieMeteer BensiinBorukhov MikeCrivaro MichaelShafir @@ -3496,7 +3496,7 @@ LeslieLange Jose LuisAmbite YigalArens - Chun-NanHsu + Chun-NanHsu 146–154 W12-2418 sharma-etal-2012-exploring @@ -3505,7 +3505,7 @@ Evaluating Joint Modeling of Yeast Biology Literature and Protein-Protein Interaction Networks RamnathBalasubramanyan KathrynRivard - William W.Cohen + William W.Cohen JelenaJakovljevic John L.Woolford 155–162 @@ -3514,8 +3514,8 @@ <fixed-case>R</fixed-case>ank<fixed-case>P</fixed-case>ref: Ranking Sentences Describing Relations between Biomedical Entities with an Application - Catalina OanaTudor - KVijay-Shanker + Catalina OanaTudor + KVijay-Shanker 163–171 W12-2420 tudor-vijay-shanker-2012-rankpref @@ -3525,7 +3525,7 @@ YingYan Jee-HyubKim SamuelCroset - DietrichRebholz-Schuhmann + DietrichRebholz-Schuhmann 172–175 W12-2421 yan-etal-2012-finding @@ -3543,7 +3543,7 @@ Classifying Gene Sentences in Biomedical Literature by Combining High-Precision Gene Identifiers SunKim WonKim - DonComeau + DonComeau W. JohnWilbur 185–192 W12-2423 @@ -3567,7 +3567,7 @@ Using Natural Language Processing to Extract Drug-Drug Interaction Information from Package Inserts - RichardBoyce + RichardBoyce GregoryGardner HenkHarkema 206–213 @@ -3579,7 +3579,7 @@ NateSutton LauraWojtulewicz NeelMehta - GracielaGonzalez + GracielaGonzalez 214–222 W12-2427 sutton-etal-2012-automatic @@ -3617,10 +3617,10 @@ Proceedings of the NAACL-HLT 2012 Workshop on Computational Linguistics for Literature W12-25 - DavidElson + DavidElson AnnaKazantseva - RadaMihalcea - StanSzpakowicz + RadaMihalcea + StanSzpakowicz Association for Computational Linguistics
Montréal, Canada
June @@ -3643,7 +3643,7 @@ A Computational Analysis of Style, Affect, and Imagery in Contemporary Poetry JustineKao - DanJurafsky + DanJurafsky 8–17 W12-2502 kao-jurafsky-2012-computational @@ -3651,7 +3651,7 @@ Towards a Literary Machine Translation: The Role of Referential Cohesion RobVoigt - DanJurafsky + DanJurafsky 18–25 W12-2503 voigt-jurafsky-2012-towards @@ -3683,7 +3683,7 @@ Mining wisdom - AndersSøgaard + AndersSøgaard 54–58 W12-2507 sogaard-2012-mining @@ -3707,7 +3707,7 @@ A Dictionary of Wisdom and Wit: Learning to Extract Quotable Phrases MichaelBendersky - DavidSmith + DavidSmith 69–77 W12-2510 bendersky-smith-2012-dictionary @@ -3716,7 +3716,7 @@ A Pilot <fixed-case>P</fixed-case>rop<fixed-case>B</fixed-case>ank Annotation for <fixed-case>Q</fixed-case>uranic <fixed-case>A</fixed-case>rabic WajdiZaghouani AbdelatiHawwari - MonaDiab + MonaDiab 78–83 W12-2511 zaghouani-etal-2012-pilot @@ -3733,7 +3733,7 @@ ApoorvAgarwal AugustoCorvalan JacobJensen - OwenRambow + OwenRambow 88–96 W12-2513 agarwal-etal-2012-social @@ -3750,8 +3750,8 @@ Proceedings of Workshop on Evaluation Metrics and System Comparison for Automatic Summarization W12-26 - John M.Conroy - Hoa TrangDang + John M.Conroy + Hoa TrangDang AniNenkova KarolinaOwczarzak Association for Computational Linguistics @@ -3778,7 +3778,7 @@ Using the Omega Index for Evaluating Abstractive Community Detection GabrielMurray GiuseppeCarenini - RaymondNg + RaymondNg 10–18 W12-2602 murray-etal-2012-using @@ -3803,7 +3803,7 @@ The Heterogeneity Principle in Evaluation Measures for Automatic Summarization - EnriqueAmigó + EnriqueAmigó JulioGonzalo FelisaVerdejo 36–43 @@ -3824,8 +3824,8 @@ Proceedings of the NAACL-HLT 2012 Workshop: Will We Ever Really Replace the N-gram Model? On the Future of Language Modeling for HLT W12-27 - BhuvanaRamabhadran - SanjeevKhudanpur + BhuvanaRamabhadran + SanjeevKhudanpur EbruArisoy Association for Computational Linguistics
Montréal, Canada
@@ -3839,7 +3839,7 @@ Measuring the Influence of Long Range Dependencies with Neural Network Language Models - Hai SonLe + Hai SonLe AlexandreAllauzen FrançoisYvon 1–10 @@ -3859,7 +3859,7 @@ Deep Neural Network Language Models EbruArisoy Tara N.Sainath - BrianKingsbury + BrianKingsbury BhuvanaRamabhadran 20–28 W12-2703 @@ -3867,15 +3867,15 @@ A Challenge Set for Advancing Language Modeling - GeoffreyZweig - Chris J.C.Burges + GeoffreyZweig + Chris J.C.Burges 29–36 W12-2704 zweig-burges-2012-challenge Unsupervised Vocabulary Adaptation for Morph-based Language Models - AndréMansikkaniemi + AndréMansikkaniemi MikkoKurimo 37–40 W12-2705 @@ -3948,10 +3948,10 @@ Proceedings of the Third Workshop on Speech and Language Processing for Assistive Technologies W12-29 JanAlexandersson - PeterLjunglöf - Kathleen F.McCoy + PeterLjunglöf + Kathleen F.McCoy BrianRoark - AnnaluWaller + AnnaluWaller Association for Computational Linguistics
Montréal, Canada
June @@ -3975,8 +3975,8 @@ <fixed-case>W</fixed-case>ink<fixed-case>T</fixed-case>alk: a demonstration of a multimodal speech synthesis platform linking facial expressions to expressive synthetic voices ÉvaSzékely ZeeshanAhmed - João P.Cabral - JulieCarson-Berndsen + João P.Cabral + JulieCarson-Berndsen 5–8 W12-2902 szekely-etal-2012-winktalk @@ -3984,7 +3984,7 @@ Discourse-Based Modeling for <fixed-case>AAC</fixed-case> MargaretMitchell - RichardSproat + RichardSproat 9–18 W12-2903 mitchell-sproat-2012-discourse @@ -4010,9 +4010,9 @@ Assisting Social Conversation between Persons with <fixed-case>A</fixed-case>lzheimer’s Disease and their Conversational Partners - NancyGreen - CurryGuinn - RonnieSmith + NancyGreen + CurryGuinn + RonnieSmith 37–46 W12-2906 green-etal-2012-assisting @@ -4060,11 +4060,11 @@ Proceedings of the Joint Workshop on Automatic Knowledge Base Construction and Web-scale Knowledge Extraction (AKBC-WEKEX) W12-30 JamesFan - RaphaelHoffman + RaphaelHoffman AdityaKalyanpur SebastianRiedel - FabianSuchanek - Partha PratimTalukdar + FabianSuchanek + Partha PratimTalukdar Association for Computational Linguistics
Montréal, Canada
June @@ -4086,8 +4086,8 @@
Collectively Representing Semi-Structured Data from the Web - BhavanaDalvi - WilliamCohen + BhavanaDalvi + WilliamCohen JamieCallan 7–12 W12-3002 @@ -4122,7 +4122,7 @@ Web Based Collection and Comparison of Cognitive Properties in <fixed-case>E</fixed-case>nglish and <fixed-case>C</fixed-case>hinese BinLi - JiajunChen + JiajunChen YingjieZhang 31–34 W12-3006 @@ -4131,14 +4131,14 @@ Population of a Knowledge Base for News Metadata from Unstructured Text and Web Data RosaStern - BenoîtSagot + BenoîtSagot 35–40 W12-3007 stern-sagot-2012-population Real-time Population of Knowledge Bases: Opportunities and Challenges - NdapandulaNakashole + NdapandulaNakashole GerhardWeikum 41–45 W12-3008 @@ -4161,7 +4161,7 @@ Structural Linguistics and Unsupervised Information Extraction - RalphGrishman + RalphGrishman 57–61 W12-3011 grishman-2012-structural @@ -4171,10 +4171,10 @@ VeselinStoyanov JamesMayfield TanXu - DouglasOard + DouglasOard DawnLawrie TimOates - TimFinin + TimFinin 62–67 W12-3012 stoyanov-etal-2012-context @@ -4182,7 +4182,7 @@ Evaluating the Quality of a Knowledge Base Populated from Text JamesMayfield - TimFinin + TimFinin 68–73 W12-3013 mayfield-finin-2012-evaluating @@ -4190,7 +4190,7 @@ Constructing a Textual <fixed-case>KB</fixed-case> from a Biology <fixed-case>T</fixed-case>ext<fixed-case>B</fixed-case>ook PeterClark - PhilHarrison + PhilHarrison NiranjanBalasubramanian OrenEtzioni 74–78 @@ -4199,7 +4199,7 @@ Knowledge Extraction and Joint Inference Using Tractable <fixed-case>M</fixed-case>arkov <fixed-case>L</fixed-case>ogic - ChloéKiddon + ChloéKiddon PedroDomingos 79–83 W12-3015 @@ -4226,7 +4226,7 @@ Annotated <fixed-case>G</fixed-case>igaword CourtneyNapoles - MatthewGormley + MatthewGormley BenjaminVan Durme 95–100 W12-3018 @@ -4235,7 +4235,7 @@ Rel-grams: A Probabilistic Model of Relations in Text NiranjanBalasubramanian - StephenSoderland + StephenSoderland Mausam OrenEtzioni 101–105 @@ -4244,7 +4244,7 @@ Automatic Knowledge Base Construction using Probabilistic Extraction, Deductive Reasoning, and Human Feedback - Daisy ZheWang + Daisy ZheWang YangChen SeanGoldberg ChristanGrant @@ -4274,7 +4274,7 @@ Using Textual Patterns to Learn Expected Event Frequencies JonathanGordon - LenhartSchubert + LenhartSchubert 122–127 W12-3023 gordon-schubert-2012-using @@ -4321,7 +4321,7 @@ Semantic Textual Similarity for <fixed-case>MT</fixed-case> evaluation - JulioCastillo + JulioCastillo PaulaEstrella 52–58 W12-3103 @@ -4340,15 +4340,15 @@ <fixed-case>T</fixed-case>error<fixed-case>C</fixed-case>at: a Translation Error Categorization-based <fixed-case>MT</fixed-case> Quality Metric MarkFishel RicoSennrich - MajaPopović - OndřejBojar + MajaPopović + OndřejBojar 64–70 W12-3105 fishel-etal-2012-terrorcat Class error rates for evaluation of machine translation output - MajaPopović + MajaPopović 71–75 W12-3106 popovic-2012-class @@ -4356,7 +4356,7 @@ <fixed-case>SPEDE</fixed-case>: Probabilistic Edit Distance Metrics for <fixed-case>MT</fixed-case> Evaluation MengqiuWang - ChristopherManning + ChristopherManning 76–83 W12-3107 wang-manning-2012-spede @@ -4385,9 +4385,9 @@ <fixed-case>PRHLT</fixed-case> Submission to the <fixed-case>WMT</fixed-case>12 Quality Estimation Task - JesúsGonzález Rubio + JesúsGonzález Rubio AlbertoSanchis - FranciscoCasacuberta + FranciscoCasacuberta 104–108 W12-3111 gonzalez-rubio-etal-2012-prhlt @@ -4396,7 +4396,7 @@ Tree Kernels for Machine Translation Quality Estimation ChristianHardmeier JoakimNivre - JörgTiedemann + JörgTiedemann 109–113 W12-3112 hardmeier-etal-2012-tree @@ -4405,7 +4405,7 @@ <fixed-case>LORIA</fixed-case> System for the <fixed-case>WMT</fixed-case>12 Quality Estimation Shared Task DavidLanglois SylvainRaybaud - KamelSmaïli + KamelSmaïli 114–119 W12-3113 langlois-etal-2012-loria @@ -4421,26 +4421,26 @@ The <fixed-case>UPC</fixed-case> Submission to the <fixed-case>WMT</fixed-case> 2012 Shared Task on Quality Estimation DanielePighin - MeritxellGonzález - LluísMàrquez + MeritxellGonzález + LluísMàrquez 127–132 W12-3115 pighin-etal-2012-upc Morpheme- and <fixed-case>POS</fixed-case>-based <fixed-case>IBM</fixed-case>1 and language model scores for translation quality estimation - MajaPopović + MajaPopović 133–137 W12-3116 popovic-2012-morpheme <fixed-case>DCU</fixed-case>-Symantec Submission for the <fixed-case>WMT</fixed-case> 2012 Quality Estimation Task - RaphaelRubino + RaphaelRubino JenniferFoster JoachimWagner JohannRoturier - RasulSamad Zadeh Kaljahi + RasulSamad Zadeh Kaljahi FredHollowood 138–144 W12-3117 @@ -4483,7 +4483,7 @@ Match without a Referee: Evaluating <fixed-case>MT</fixed-case> Adequacy without Reference Translations YasharMehdad - MatteoNegri + MatteoNegri MarcelloFederico 171–180 W12-3122 @@ -4498,15 +4498,15 @@ Review of Hypothesis Alignment Algorithms for <fixed-case>MT</fixed-case> System Combination via Confusion Network Decoding - Antti-VeikkoRosti + Antti-VeikkoRosti XiaodongHe DamianosKarakos GregorLeusch YuanCao MarkusFreitag SpyrosMatsoukas - HermannNey - JasonSmith + HermannNey + JasonSmith BingZhang 191–199 W12-3124 @@ -4515,7 +4515,7 @@ On Hierarchical Re-ordering and Permutation Parsing for Phrase-based Decoding ColinCherry - Robert C.Moore + Robert C.Moore ChrisQuirk 200–209 W12-3125 @@ -4523,7 +4523,7 @@ <fixed-case>CCG</fixed-case> Syntactic Reordering Models for Phrase-based Machine Translation - Dennis NolanMehay + Dennis NolanMehay Christopher HardieBrew 210–221 W12-3126 @@ -4540,10 +4540,10 @@ Using Syntactic Head Information in Hierarchical Phrase-Based Translation - JunhuiLi + JunhuiLi ZhaopengTu - GuodongZhou - Josefvan Genabith + GuodongZhou + Josefvan Genabith 232–242 W12-3128 li-etal-2012-using @@ -4559,7 +4559,7 @@ Probes in a Taxonomy of Factored Phrase-Based Models - OndřejBojar + OndřejBojar BushraJawaid AmirKamran 253–260 @@ -4570,7 +4570,7 @@ The <fixed-case>CMU</fixed-case>-Avenue <fixed-case>F</fixed-case>rench-<fixed-case>E</fixed-case>nglish Translation System MichaelDenkowski GregHanneman - AlonLavie + AlonLavie 261–266 W12-3131 denkowski-etal-2012-cmu @@ -4578,7 +4578,7 @@ Formemes in <fixed-case>E</fixed-case>nglish-<fixed-case>C</fixed-case>zech Deep Syntactic <fixed-case>MT</fixed-case> OndřejDušek - ZdeněkŽabokrtský + ZdeněkŽabokrtský MartinPopel MartinMajliš MichalNovák @@ -4589,12 +4589,12 @@ The <fixed-case>TALP</fixed-case>-<fixed-case>UPC</fixed-case> phrase-based translation systems for <fixed-case>WMT</fixed-case>12: Morphology simplification and domain adaptation - LluísFormiga - Carlos A.Henríquez Q. - AdolfoHernández - José B.Mariño + LluísFormiga + Carlos A.Henríquez Q. + AdolfoHernández + José B.Mariño EnricMonte - José A. R.Fonollosa + José A. R.Fonollosa 275–282 W12-3133 formiga-etal-2012-talp @@ -4619,10 +4619,10 @@ <fixed-case>QCRI</fixed-case> at <fixed-case>WMT</fixed-case>12: Experiments in <fixed-case>S</fixed-case>panish-<fixed-case>E</fixed-case>nglish and <fixed-case>G</fixed-case>erman-<fixed-case>E</fixed-case>nglish Machine Translation of News Text - FranciscoGuzmán - PreslavNakov + FranciscoGuzmán + PreslavNakov AhmedThabet - StephanVogel + StephanVogel 298–303 W12-3136 guzman-etal-2012-qcri @@ -4633,7 +4633,7 @@ StephanPeitz MarkusFreitag MalteNuhn - HermannNey + HermannNey 304–311 W12-3137 huck-etal-2012-rwth @@ -4660,15 +4660,15 @@ MarkusFreitag StephanPeitz MatthiasHuck - HermannNey + HermannNey JanNiehues TeresaHerrmann - AlexWaibel - LeHai-son + AlexWaibel + Hai-sonLe ThomasLavergne AlexandreAllauzen - BiankaBuschbeck - Josep MariaCrego + BiankaBuschbeck + Josep MariaCrego JeanSenellart 322–329 W12-3140 @@ -4676,13 +4676,13 @@ <fixed-case>LIMSI</fixed-case> @ <fixed-case>WMT</fixed-case>12 - Hai-SonLe + Hai-SonLe ThomasLavergne AlexandreAllauzen MariannaApidianaki LiGong AurélienMax - ArtemSokolov + ArtemSokolov GuillaumeWisniewski FrançoisYvon 330–337 @@ -4692,8 +4692,8 @@ <fixed-case>UPM</fixed-case> system for <fixed-case>WMT</fixed-case> 2012 VerónicaLópez-Ludeña - RubénSan-Segundo - Juan M.Montero + RubénSan-Segundo + Juan M.Montero 338–344 W12-3142 lopez-ludena-etal-2012-upm @@ -4712,7 +4712,7 @@ MohammedMediani TeresaHerrmann EunahCho - AlexWaibel + AlexWaibel 349–355 W12-3144 niehues-etal-2012-karlsruhe @@ -4720,7 +4720,7 @@ Kriya - The <fixed-case>SFU</fixed-case> System for Translation Task at <fixed-case>WMT</fixed-case>-12 MajidRazmara - BaskaranSankaran + BaskaranSankaran AnnClifton AnoopSarkar 356–361 @@ -4738,7 +4738,7 @@ <fixed-case>LIUM</fixed-case>’s <fixed-case>SMT</fixed-case> Machine Translation Systems for <fixed-case>WMT</fixed-case> 2012 - ChristopheServan + ChristopheServan PatrikLambert AnthonyRousseau HolgerSchwenk @@ -4753,7 +4753,7 @@ PetraGaluščáková AmirKamran MilošStanojević - OndřejBojar + OndřejBojar 374–381 W12-3148 tamchyna-etal-2012-selecting @@ -4775,7 +4775,7 @@ Data Issues of the Multilingual Translation Matrix - DanielZeman + DanielZeman 395–400 W12-3151 zeman-2012-data @@ -4827,7 +4827,7 @@ Phrase Model Training for Statistical Machine Translation with Word Lattices of Preprocessing Alternatives JoernWuebker - HermannNey + HermannNey 450–459 W12-3157 wuebker-ney-2012-phrase @@ -4835,7 +4835,7 @@ Leave-One-Out Phrase Model Training for Large-Scale Deployment JoernWuebker - Mei-YuhHwang + Mei-YuhHwang ChrisQuirk 460–467 W12-3158 @@ -4861,7 +4861,7 @@ Proceedings of the ACL-2012 Special Workshop on Rediscovering 50 Years of Discoveries W12-32 - Rafael E.Banchs + Rafael E.Banchs Association for Computational Linguistics
Jeju Island, Korea
July @@ -4874,7 +4874,7 @@ Rediscovering <fixed-case>ACL</fixed-case> Discoveries Through the Lens of <fixed-case>ACL</fixed-case> <fixed-case>A</fixed-case>nthology Network Citing Sentences - DragomirRadev + DragomirRadev AmjadAbu-Jbara 1–12 W12-3201 @@ -4883,8 +4883,8 @@ Towards a Computational History of the <fixed-case>ACL</fixed-case>: 1980-2008 AshtonAnderson - DanJurafsky - Daniel A.McFarland + DanJurafsky + Daniel A.McFarland 13–21 W12-3202 anderson-etal-2012-towards @@ -4892,8 +4892,8 @@ Discovering Factions in the Computational Linguistics Community YanchuanSim - Noah A.Smith - David A.Smith + Noah A.Smith + David A.Smith 22–32 W12-3203 sim-etal-2012-discovering @@ -4901,15 +4901,15 @@ He Said, She Said: Gender in the <fixed-case>ACL</fixed-case> <fixed-case>A</fixed-case>nthology AdamVogel - DanJurafsky + DanJurafsky 33–41 W12-3204 vogel-jurafsky-2012-said Discourse Structure and Computation: Past, Present and Future - BonnieWebber - AravindJoshi + BonnieWebber + AravindJoshi 42–54 W12-3205 webber-joshi-2012-discourse @@ -4917,7 +4917,7 @@ Extracting glossary sentences from scholarly articles: A comparative evaluation of pattern bootstrapping and deep analysis MelanieReiplinger - UlrichSchäfer + UlrichSchäfer MagdalenaWolska 55–65 W12-3206 @@ -4925,7 +4925,7 @@ Applying Collocation Segmentation to the <fixed-case>ACL</fixed-case> <fixed-case>A</fixed-case>nthology Reference Corpus - VidasDaudaravičius + VidasDaudaravičius 66–75 W12-3207 daudaravicius-2012-applying @@ -4948,7 +4948,7 @@ Towards an <fixed-case>ACL</fixed-case> <fixed-case>A</fixed-case>nthology Corpus with Logical Document Structure. An Overview of the <fixed-case>ACL</fixed-case> 2012 Contributed Task - UlrichSchäfer + UlrichSchäfer JonathonRead StephanOepen 88–97 @@ -4966,7 +4966,7 @@ Combining <fixed-case>OCR</fixed-case> Outputs for Logical Document Structure Markup. Technical Background to the <fixed-case>ACL</fixed-case> 2012 Contributed Task - UlrichSchäfer + UlrichSchäfer BenjaminWeitz 104–109 W12-3212 @@ -4985,9 +4985,9 @@ Proceedings of ACL 2012 Student Research Workshop W12-33 - Jackie C. K.Cheung + Jackie C. K.Cheung JunHatori - CarlosHenriquez + CarlosHenriquez AnnIrvine Association for Computational Linguistics
Jeju Island, Korea
@@ -5002,7 +5002,7 @@ A Broad Evaluation of Techniques for Automatic Acquisition of Multiword Expressions CarlosRamisch - VitorDe Araujo + VitorDe Araujo AlineVillavicencio 1–6 W12-3301 @@ -5019,8 +5019,8 @@ Active Learning with Transfer Learning ChunyongLuo YangshengJi - XinyuDai - JiajunChen + XinyuDai + JiajunChen 13–18 W12-3303 luo-etal-2012-active @@ -5028,7 +5028,7 @@ Query classification using topic models and support vector machine Dieu-ThuLe - RaffaellaBernardi + RaffaellaBernardi 19–24 W12-3304 le-bernardi-2012-query @@ -5050,7 +5050,7 @@ <fixed-case>T</fixed-case>opic<fixed-case>T</fixed-case>iling: A Text Segmentation Algorithm based on <fixed-case>LDA</fixed-case> MartinRiedl - ChrisBiemann + ChrisBiemann 37–42 W12-3307 riedl-biemann-2012-topictiling @@ -5073,7 +5073,7 @@ Discourse Structure in Simultaneous Spoken <fixed-case>T</fixed-case>urkish - IsinDemirşahin + IsinDemirşahin 55–60 W12-3310 demirsahin-2012-discourse @@ -5105,7 +5105,7 @@ IdoDagan JenniferFoster YuvalMarton - DjaméSeddah + DjaméSeddah ReutTsarfaty Association for Computational Linguistics
Jeju, Republic of Korea
@@ -5119,8 +5119,8 @@ Probabilistic Lexical Generalization for <fixed-case>F</fixed-case>rench Dependency Parsing - EnriqueHenestroza Anguiano - MarieCandito + EnriqueHenestroza Anguiano + MarieCandito 1–11 W12-3401 henestroza-anguiano-candito-2012-probabilistic @@ -5136,7 +5136,7 @@ Building an <fixed-case>A</fixed-case>rabic Multiword Expressions Repository AbdelatiHawwari KfirBar - MonaDiab + MonaDiab 24–29 W12-3403 hawwari-etal-2012-building @@ -5169,9 +5169,9 @@ Combining Rule-Based and Statistical Syntactic Analyzers IakesGoenaga - KoldobikaGojenola + KoldobikaGojenola María JesúsAranzabe - ArantzaDíaz de Ilarraza + ArantzaDíaz de Ilarraza KepaBengoetxea 48–54 W12-3407 @@ -5179,8 +5179,8 @@ Statistical Parsing of <fixed-case>S</fixed-case>panish and Data Driven Lemmatization - JosephLe Roux - BenoîtSagot + JosephLe Roux + BenoîtSagot DjaméSeddah 55–61 W12-3408 @@ -5188,8 +5188,8 @@ Assigning Deep Lexical Types Using Structured Classifier Features for Grammatical Dependencies - JoãoSilva - AntónioBranco + JoãoSilva + AntónioBranco 62–71 W12-3409 silva-branco-2012-assigning @@ -5198,7 +5198,7 @@ Using an <fixed-case>SVM</fixed-case> Ensemble System for Improved <fixed-case>T</fixed-case>amil Dependency Parsing NathanGreen LoganathanRamasamy - ZdeněkŽabokrtský + ZdeněkŽabokrtský 72–77 W12-3410 green-etal-2012-using @@ -5207,15 +5207,15 @@ <fixed-case>K</fixed-case>orean Treebank Transformation for Parser Training DongHyunChoi JungyeulPark - Key-SunChoi + Key-SunChoi 78–88 W12-3411 choi-etal-2012-korean Generative Constituent Parsing and Discriminative Dependency Reranking: Experiments on <fixed-case>E</fixed-case>nglish and <fixed-case>F</fixed-case>rench - JosephLe Roux - BenoîtFavre + JosephLe Roux + BenoîtFavre AlexisNasr Seyed AbolghasemMirroshandel 89–99 @@ -5256,7 +5256,7 @@ Integration of Multimodal Interaction as Assistance in Virtual Environments KiranPala RamNaresh - SachinJoshi + SachinJoshi Suryakanth VGanagshetty 8–12 W12-3502 @@ -5291,11 +5291,11 @@ Towards a Self-Learning Assistive Vocal Interface: Vocabulary and Grammar Learning Jannekevan de Loo - Jort F.Gemmeke + Jort F.Gemmeke GuyDe Pauw JorisDriesen - HugoVan hamme - WalterDaelemans + HugoVan hamme + WalterDaelemans 34–42 W12-3506 van-de-loo-etal-2012-towards @@ -5313,7 +5313,7 @@ Proceedings of the Sixth Linguistic Annotation Workshop W12-36 - NancyIde + NancyIde FeiXia Association for Computational Linguistics
Jeju, Republic of Korea
@@ -5327,7 +5327,7 @@ The Role of Linguistic Models and Language Annotation in Feature Selection for Machine Learning - JamesPustejovsky + JamesPustejovsky 1 W12-3601 pustejovsky-2012-role @@ -5336,8 +5336,8 @@ Who Did What to Whom? A Contrastive Study of Syntacto-Semantic Dependencies AngelinaIvanova StephanOepen - LiljaØvrelid - DanFlickinger + LiljaØvrelid + DanFlickinger 2–11 W12-3602 ivanova-etal-2012-contrastive @@ -5360,8 +5360,8 @@ Pair Annotation: Adaption of Pair Programming to Corpus Annotation - IsinDemirşahin - İhsanYalcinkaya + IsinDemirşahin + İhsanYalcinkaya DenizZeyrek 31–39 W12-3605 @@ -5369,12 +5369,12 @@ Structured Named Entities in two distinct press corpora: Contemporary Broadcast News and Old Newspapers - SophieRosset + SophieRosset CyrilGrouin KarënFort OlivierGalibert JulietteKahn - PierreZweigenbaum + PierreZweigenbaum 40–48 W12-3606 rosset-etal-2012-structured @@ -5384,7 +5384,7 @@ PrudhviKosaraju Bharat RamAmbati SamarHusain - Dipti MisraSharma + Dipti MisraSharma RajeevSangal 49–56 W12-3607 @@ -5400,15 +5400,15 @@ A <fixed-case>G</fixed-case>r<fixed-case>AF</fixed-case>-compliant <fixed-case>I</fixed-case>ndonesian Speech Recognition Web Service on the Language Grid for Transcription Crowdsourcing - BayuDistiawan - RuliManurung + BayuDistiawan + RuliManurung 67–74 W12-3609 distiawan-manurung-2012-graf Towards Adaptation of Linguistic Annotations to Scholarly Annotation Formalisms on the Semantic Web - KarinVerspoor + KarinVerspoor KevinLivingston 75–84 W12-3610 @@ -5418,8 +5418,8 @@ Intonosyntactic Data Structures: The Rhapsodie Treebank of Spoken <fixed-case>F</fixed-case>rench KimGerdes SylvainKahane - AnneLacheret - PaolaPietandrea + AnneLacheret + PaolaPietandrea ArthurTruong 85–94 W12-3611 @@ -5428,12 +5428,12 @@ Annotation Schemes to Encode Domain Knowledge in Medical Narratives WilsonMcCoy - Cecilia OvesdotterAlm + Cecilia OvesdotterAlm CaraCalvelli RuiLi Jeff B.Pelz PengchengShi - AnneHaake + AnneHaake 95–103 W12-3612 mccoy-etal-2012-annotation @@ -5449,7 +5449,7 @@ Search Result Diversification Methods to Assist Lexicographers LarsBorin MarkusForsberg - KarinFriberg Heppin + KarinFriberg Heppin RichardJohansson AnnikaKjellandsson 113–117 @@ -5486,24 +5486,24 @@ FrancescaBonin FabioCavulli AronneNoriller - MassimoPoesio - Egon W.Stemle + MassimoPoesio + Egon W.Stemle 134–138 W12-3618 bonin-etal-2012-annotating Annotating Preferences in Chats for Strategic Games - AnaïsCadilhac - NicholasAsher - FarahBenamara + AnaïsCadilhac + NicholasAsher + FarahBenamara 139–143 W12-3619 cadilhac-etal-2012-annotating-preferences Morpheme Segmentation in the <fixed-case>METU</fixed-case>-Sabancı <fixed-case>T</fixed-case>urkish Treebank - RuketCakici + RuketCakici 144–148 W12-3620 cakici-2012-morpheme @@ -5512,7 +5512,7 @@ <fixed-case>A</fixed-case>lvis<fixed-case>AE</fixed-case>: a collaborative Web text annotation editor for knowledge acquisition FrédéricPapazian RobertBossy - ClaireNédellec + ClaireNédellec 149–152 W12-3621 papazian-etal-2012-alvisae @@ -5527,8 +5527,8 @@ Dependency Treebank of <fixed-case>U</fixed-case>rdu and its Evaluation - Riyaz AhmadBhat - Dipti MisraSharma + Riyaz AhmadBhat + Dipti MisraSharma 157–165 W12-3623 bhat-sharma-2012-dependency @@ -5536,8 +5536,8 @@ Annotating Coordination in the <fixed-case>P</fixed-case>enn <fixed-case>T</fixed-case>reebank WolfgangMaier - SandraKübler - ErhardHinrichs + SandraKübler + ErhardHinrichs JuliaKrivanek 166–174 W12-3624 @@ -5568,9 +5568,9 @@ Proceedings of the 3rd Workshop in Computational Approaches to Subjectivity and Sentiment Analysis W12-37 - AlexandraBalahur - AndresMontoyo - Patricio MartinezBarco + AlexandraBalahur + AndresMontoyo + Patricio MartinezBarco EsterBoldrini Association for Computational Linguistics
Jeju, Korea
@@ -5584,14 +5584,14 @@ Multimodal Sentiment Analysis - RadaMihalcea + RadaMihalcea 1 W12-3701 mihalcea-2012-multimodal Subjectivity Word Sense Disambiguation - JanyceWiebe + JanyceWiebe 2 W12-3702 wiebe-2012-subjectivity @@ -5599,9 +5599,9 @@ Random Walk Weighting over <fixed-case>S</fixed-case>enti<fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et for Sentiment Polarity Detection on <fixed-case>T</fixed-case>witter ArturoMontejo-Ráez - EugenioMartínez-Cámara - M. TeresaMartín-Valdivia - L. AlfonsoUreña-López + EugenioMartínez-Cámara + M. TeresaMartín-Valdivia + L. AlfonsoUreña-López 3–10 W12-3703 montejo-raez-etal-2012-random @@ -5621,8 +5621,8 @@ <fixed-case>SAMAR</fixed-case>: A System for Subjectivity and Sentiment Analysis of <fixed-case>A</fixed-case>rabic Social Media MuhammadAbdul-Mageed - SandraKuebler - MonaDiab + SandraKuebler + MonaDiab 19–28 W12-3705 abdul-mageed-etal-2012-samar @@ -5630,8 +5630,8 @@ <fixed-case>O</fixed-case>pinum: statistical sentiment analysis for opinion classification BoyanBonev - GemaRamírez-Sánchez - SergioOrtiz Rojas + GemaRamírez-Sánchez + SergioOrtiz Rojas 29–37 W12-3706 bonev-etal-2012-opinum @@ -5639,7 +5639,7 @@ <fixed-case>S</fixed-case>entimantics: Conceptual Spaces for Lexical Sentiment Polarity Representation with Contextuality AmitavaDas - GambäckBjörn + BjörnGambäck 38–46 W12-3707 das-gamback-2012-sentimantics @@ -5665,7 +5665,7 @@ JieYin NalinNarang PaulThomas - CecileParis + CecileParis 61–69 W12-3710 yin-etal-2012-unifying @@ -5673,8 +5673,8 @@ Prior versus Contextual Emotion of a Word in a Sentence DimanGhazi - DianaInkpen - StanSzpakowicz + DianaInkpen + StanSzpakowicz 70–78 W12-3711 ghazi-etal-2012-prior @@ -5682,7 +5682,7 @@ Cross-discourse Development of Supervised Sentiment Analysis in the Clinical Domain PhillipSmith - MarkLee + MarkLee 79–83 W12-3712 smith-lee-2012-cross @@ -5690,7 +5690,7 @@ <fixed-case>POLITICAL</fixed-case>-<fixed-case>ADS</fixed-case>: An annotated corpus for modeling event-level evaluativity KevinReschke - PranavAnand + PranavAnand 84–88 W12-3713 reschke-anand-2012-political @@ -5699,7 +5699,7 @@ Automatically Annotating A Five-Billion-Word Corpus of <fixed-case>J</fixed-case>apanese Blogs for Affect and Sentiment Analysis MichalPtaszynski RafalRzepka - KenjiAraki + KenjiAraki YoshioMomouchi 89–98 W12-3714 @@ -5751,22 +5751,22 @@ Disfluencies as Extra-Propositional Indicators of Cognitive Processing KathrynWomack WilsonMcCoy - CeciliaOvesdotter Alm + CeciliaOvesdotter Alm CaraCalvelli Jeff B.Pelz PengchengShi - AnneHaake + AnneHaake 1–9 W12-3801 womack-etal-2012-disfluencies How do Negation and Modality Impact on Opinions? - FarahBenamara + FarahBenamara BaptisteChardon - YannickMathieu + YannickMathieu VladimirPopescu - NicholasAsher + NicholasAsher 10–18 W12-3802 benamara-etal-2012-negation @@ -5774,11 +5774,11 @@ Linking Uncertainty in Physicians’ Narratives to Diagnostic Correctness WilsonMcCoy - CeciliaOvesdotter Alm + CeciliaOvesdotter Alm CaraCalvelli Jeff B.Pelz PengchengShi - AnneHaake + AnneHaake 19–27 W12-3803 mccoy-etal-2012-linking @@ -5806,7 +5806,7 @@ SampoPyysalo TomokoOhta SophiaAnaniadou - Jun’ichiTsujii + Jun’ichiTsujii 47–56 W12-3806 stenetorp-etal-2012-bridging @@ -5815,11 +5815,11 @@ Statistical Modality Tagging from Rule-based Annotations and Crowdsourcing VinodkumarPrabhakaran MichaelBloodgood - MonaDiab - BonnieDorr - LoriLevin - Christine D.Piatko - OwenRambow + MonaDiab + BonnieDorr + LoriLevin + Christine D.Piatko + OwenRambow BenjaminVan Durme 57–64 W12-3807 @@ -5827,7 +5827,7 @@ Annotating the Focus of Negation in terms of Questions Under Discussion - PranavAnand + PranavAnand CraigMartell 65–69 W12-3808 @@ -5847,7 +5847,7 @@ Recognizing Arguing Subjectivity and Argument Tags AlexanderConrad - JanyceWiebe + JanyceWiebe RebeccaHwa 80–88 W12-3810 @@ -5861,7 +5861,7 @@ JagadeeshJagarlamudi SujithRavi XiaojunWan - HalDaume III + HalDaume III Association for Computational Linguistics
Jeju, Republic of Korea
July @@ -5893,7 +5893,7 @@ The Study of Effect of Length in Morphological Segmentation of Agglutinative Languages LoganathanRamasamy - ZdeněkŽabokrtský + ZdeněkŽabokrtský SowmyaVajjala 18–24 W12-3903 @@ -5904,8 +5904,8 @@ RogerGranada LuceleneLopes CarlosRamisch - CassiaTrojahn - RenataVieira + CassiaTrojahn + RenataVieira AlineVillavicencio 25–31 W12-3904 @@ -5917,7 +5917,7 @@ Proceedings of the 3rd Workshop on the People’s Web Meets NLP: Collaboratively Constructed Semantic Resources and their Applications to NLP W12-40 IrynaGurevych - Nicoletta CalzolariZamorani + Nicoletta CalzolariZamorani JungiKim Association for Computational Linguistics
Jeju, Republic of Korea
@@ -5931,7 +5931,7 @@ Sentiment Analysis Using a Novel Human Computation Game - Claudiu-CristianMusat + Claudiu-CristianMusat AlirezaGhasemi BoiFaltings 1–9 @@ -5942,7 +5942,7 @@ A Serious Game for Building a <fixed-case>P</fixed-case>ortuguese Lexical-Semantic Network - MathieuMangeot + MathieuMangeot CarlosRamisch 10–14 W12-4002 @@ -5951,7 +5951,7 @@ Collaboratively Building Language Resources while Localising the Web AsankaWasala - ReinhardSchäler + ReinhardSchäler RuvanWeerasinghe ChrisExton 15–19 @@ -5967,7 +5967,7 @@ <fixed-case>EAGER</fixed-case>: Extending Automatically Gazetteers for Entity Recognition - Omer FarukhanGunes + Omer FarukhanGunes TimFurche ChristianSchallhart JensLehmann @@ -5979,7 +5979,7 @@ Extracting Context-Rich Entailment Rules from <fixed-case>W</fixed-case>ikipedia Revision History ElenaCabrio - BernardoMagnini + BernardoMagnini AngelinaIvanova 34–43 W12-4006 @@ -5990,9 +5990,9 @@ Workshop Proceedings of TextGraphs-7: Graph-based Methods for Natural Language Processing W12-41 - IrinaMatveeva - AhmedHassan - GaelDias + IrinaMatveeva + AhmedHassan + GaelDias Association for Computational Linguistics
Jeju, Republic of Korea
July @@ -6015,7 +6015,7 @@ Extracting Signed Social Networks from Text AhmedHassan AmjadAbu-Jbara - DragomirRadev + DragomirRadev 6–14 W12-4102 hassan-etal-2012-extracting @@ -6024,7 +6024,7 @@ Using Link Analysis to Discover Interesting Messages Spread Across <fixed-case>T</fixed-case>witter Min-ChulYang Jung-TaeLee - Hae-ChangRim + Hae-ChangRim 15–19 W12-4103 yang-etal-2012-using @@ -6032,7 +6032,7 @@ Graph Based Similarity Measures for Synonym Extraction from Parsed Text EinatMinkov - WilliamCohen + WilliamCohen 20–24 W12-4104 minkov-cohen-2012-graph @@ -6040,7 +6040,7 @@ Semantic Relatedness for Biomedical Word Sense Disambiguation Kiem-HieuNguyen - Cheol-YoungOck + Cheol-YoungOck 25–29 W12-4105 nguyen-ock-2012-semantic @@ -6090,7 +6090,7 @@ <fixed-case>WSD</fixed-case> for n-best reranking and local language modeling in <fixed-case>SMT</fixed-case> MariannaApidianaki GuillaumeWisniewski - ArtemSokolov + ArtemSokolov AurélienMax FrançoisYvon 1–9 @@ -6101,7 +6101,7 @@ Linguistically-Enriched Models for <fixed-case>B</fixed-case>ulgarian-to-<fixed-case>E</fixed-case>nglish Machine Translation RuiWang PetyaOsenova - KirilSimov + KirilSimov 10–19 W12-4202 wang-etal-2012-linguistically-enriched @@ -6116,7 +6116,7 @@ Towards a Predicate-Argument Evaluation for <fixed-case>MT</fixed-case> - OndřejBojar + OndřejBojar DekaiWu 30–38 W12-4204 @@ -6170,7 +6170,7 @@ Using Domain-specific and Collaborative Resources for Term Translation - MihaelArcan + MihaelArcan ChristianFedermann PaulBuitelaar 86–94 @@ -6180,7 +6180,7 @@ Improving Statistical Machine Translation through co-joining parts of verbal constructs in <fixed-case>E</fixed-case>nglish-<fixed-case>H</fixed-case>indi translation Karunesh KumarArora - R Mahesh KSinha + R Mahesh KSinha 95–101 W12-4211 arora-sinha-2012-improving @@ -6214,7 +6214,7 @@ Proceedings of the Workshop on Detecting Structure in Scholarly Discourse W12-43 - AntalVan Den Bosch + AntalVan Den Bosch HagitShatkay Association for Computational Linguistics
Jeju Island, Korea
@@ -6236,7 +6236,7 @@
Identifying Claimed Knowledge Updates in Biomedical Research Articles - ÁgnesSándor + ÁgnesSándor Anitade Waard 10–17 W12-4302 @@ -6254,7 +6254,7 @@ Open-domain Anatomical Entity Mention Detection TomokoOhta SampoPyysalo - Jun’ichiTsujii + Jun’ichiTsujii SophiaAnaniadou 27–36 W12-4304 @@ -6287,7 +6287,7 @@ W12-44 MinZhang HaizhouLi - AKumaran + AKumaran Association for Computational Linguistics
Jeju, Korea
July @@ -6321,7 +6321,7 @@ Accurate Unsupervised Joint Named-Entity Extraction from Unaligned Parallel Text RobertMunro - Christopher D.Manning + Christopher D.Manning 21–29 W12-4403 munro-manning-2012-accurate @@ -6337,7 +6337,7 @@ Automatically generated <fixed-case>NE</fixed-case> tagged corpora for <fixed-case>E</fixed-case>nglish and <fixed-case>H</fixed-case>ungarian EszterSimon - Dávid MárkNemeskey + Dávid MárkNemeskey 38–46 W12-4405 simon-nemeskey-2012-automatically @@ -6345,8 +6345,8 @@ Rescoring a Phrase-based Machine Transliteration System with Recurrent Neural Network Language Models AndrewFinch - PaulDixon - EiichiroSumita + PaulDixon + EiichiroSumita 47–51 W12-4406 finch-etal-2012-rescoring @@ -6355,16 +6355,16 @@ Syllable-based Machine Transliteration with Extra Phrase Features ChunyueZhang TingtingLi - TiejunZhao + TiejunZhao 52–56 W12-4407 zhang-etal-2012-syllable <fixed-case>E</fixed-case>nglish-<fixed-case>K</fixed-case>orean Named Entity Transliteration Using Substring Alignment and Re-ranking Methods - Chun-KaiWu + Chun-KaiWu Yu-ChunWang - Richard Tzong-HanTsai + Richard Tzong-HanTsai 57–60 W12-4408 wu-etal-2012-english @@ -6379,8 +6379,8 @@ Transliteration by Sequence Labeling with Lattice Encodings and Reranking WaleedAmmar - ChrisDyer - NoahSmith + ChrisDyer + NoahSmith 66–70 W12-4410 ammar-etal-2012-transliteration @@ -6396,11 +6396,11 @@ Cost-benefit Analysis of Two-Stage Conditional Random Fields based <fixed-case>E</fixed-case>nglish-to-<fixed-case>C</fixed-case>hinese Machine Transliteration - Chan-HungKuo + Chan-HungKuo Shih-HungLiu - Mike Tian-JianJiang + Mike Tian-JianJiang Cheng-WeiLee - Wen-LianHsu + Wen-LianHsu 76–80 W12-4412 kuo-etal-2012-cost @@ -6410,7 +6410,7 @@ Joint Conference on EMNLP and CoNLL - Shared Task W12-45 - SameerPradhan + SameerPradhan AlessandroMoschitti NianwenXue Association for Computational Linguistics @@ -6436,9 +6436,9 @@ Latent Structure Perceptron with Feature Induction for Unrestricted Coreference Resolution - EraldoFernandes - Cícerodos Santos - RuyMilidiú + EraldoFernandes + Cícerodos Santos + RuyMilidiú 41–48 W12-4502 fernandes-etal-2012-latent @@ -6446,7 +6446,7 @@ Data-driven Multilingual Coreference Resolution using Resolver Stacking AndersBjörkelund - RichárdFarkas + RichárdFarkas 49–55 W12-4503 bjorkelund-farkas-2012-data @@ -6503,7 +6503,7 @@ <fixed-case>UBIU</fixed-case> for Multilingual Coreference Resolution in <fixed-case>O</fixed-case>nto<fixed-case>N</fixed-case>otes DesislavaZhekova - SandraKübler + SandraKübler JoshuaBonner MarwaRagheb Yu-YinHsu @@ -6568,7 +6568,7 @@ <fixed-case>BART</fixed-case> goes multilingual: The <fixed-case>U</fixed-case>ni<fixed-case>TN</fixed-case> / <fixed-case>E</fixed-case>ssex submission to the <fixed-case>C</fixed-case>o<fixed-case>NLL</fixed-case>-2012 Shared Task OlgaUryupina AlessandroMoschitti - MassimoPoesio + MassimoPoesio 122–128 W12-4515 uryupina-etal-2012-bart @@ -6586,7 +6586,7 @@ Proceedings of the 11th International Workshop on Tree Adjoining Grammars and Related Formalisms (TAG+11) W12-46 GiorgioSatta - Chung-HyeHan + Chung-HyeHan
Paris, France
September 2012 @@ -6600,14 +6600,14 @@ Delayed Tree-Locality, Set-locality, and Clitic Climbing JoanChen-Main ToniaBleam - AravindJoshi + AravindJoshi 1–9 W12-4601 chen-main-etal-2012-delayed
Deriving syntax-semantics mappings: node linking, type shifting and scope ambiguity - Dennis RyanStoroshenko + Dennis RyanStoroshenko RobertFrank 10–18 W12-4602 @@ -6652,7 +6652,7 @@ Incremental Neo-<fixed-case>D</fixed-case>avidsonian semantic construction for <fixed-case>TAG</fixed-case> - AsadSayeed + AsadSayeed VeraDemberg 64–72 W12-4608 @@ -6669,7 +6669,7 @@ Describing São Tomense Using a Tree-Adjoining Meta-Grammar EmmanuelSchang DenysDuchier - BrunelleMagnana Ekoukou + BrunelleMagnana Ekoukou YannickParmentier SimonPetitjean 82–89 @@ -6725,7 +6725,7 @@ Practical Parsing of Parallel Multiple Context-Free Grammars - PeterLjunglöf + PeterLjunglöf 144–152 W12-4617 ljunglof-2012-practical @@ -6739,8 +6739,8 @@ Creating a <fixed-case>T</fixed-case>ree <fixed-case>A</fixed-case>djoining <fixed-case>G</fixed-case>rammar from a Multilayer Treebank - RajeshBhatt - OwenRambow + RajeshBhatt + OwenRambow FeiXia 162–170 W12-4619 @@ -6787,8 +6787,8 @@ A linguistically-motivated 2-stage Tree to Graph Transformation CorentinRibeyre - DjaméSeddah - EricVillemonte de la Clergerie + DjaméSeddah + EricVillemonte de la Clergerie 214–222 W12-4625 ribeyre-etal-2012-linguistically @@ -6803,7 +6803,7 @@ The Shape of Elementary Trees and Scope Possibilities in <fixed-case>STAG</fixed-case> RobertFrank - Dennis RyanStoroshenko + Dennis RyanStoroshenko 232–240 W12-4627 frank-storoshenko-2012-shape @@ -6813,7 +6813,7 @@ Proceedings of the Workshop on Advances in Discourse Analysis and its Computational Aspects W12-47 - EvaHajičová + EvaHajičová LuciePoláková JiříMírovský The COLING 2012 Organizing Committee @@ -6845,7 +6845,7 @@ Measuring the Strength of Linguistic Cues for Discourse Relations - Fatemeh TorabiAsr + Fatemeh TorabiAsr VeraDemberg 33–42 W12-4703 @@ -6906,7 +6906,7 @@ Multi-objective Optimization for Efficient <fixed-case>B</fixed-case>rahmic Keyboards AlbertBrouillette DevrajSarmah - JugalKalita + JugalKalita 29–44 W12-4803 brouillette-etal-2012-multi @@ -6931,7 +6931,7 @@ <fixed-case>B</fixed-case>angla Phonetic Input Method with Foreign Words Handling - Khan Md. AnwarusSalam + Khan Md. AnwarusSalam SetsuoYamada TetsuroNishino 73–78 @@ -6976,7 +6976,7 @@ Proceedings of the First Workshop on Eye-tracking and Natural Language Processing W12-49 MichaelCarl - PushpakBhattacharyya + PushpakBhattacharyya Kamal KumarChoudhary The COLING 2012 Organizing Committee
Mumbai, India
@@ -6990,7 +6990,7 @@ Grounding spoken interaction with real-time gaze in dynamic virtual environments - MatthewCrocker + MatthewCrocker 1–4 W12-4901 crocker-2012-grounding @@ -7026,7 +7026,7 @@ TadayoshiHara DaichiMochihashi YoshinobuKano - AkikoAizawa + AkikoAizawa 55–70 W12-4905 hara-etal-2012-predicting @@ -7059,7 +7059,7 @@ Computational evidence that <fixed-case>H</fixed-case>indi and <fixed-case>U</fixed-case>rdu share a grammar but not the lexicon - K.V.SPrasad + K.V.SPrasad Shafqat MumtazVirk 1–14 W12-5001 @@ -7078,7 +7078,7 @@ <fixed-case>B</fixed-case>engali Question Classification: Towards Developing <fixed-case>QA</fixed-case> System SomnathBanerjee - SivajiBandyopadhyay + SivajiBandyopadhyay 25–40 W12-5003 banerjee-bandyopadhyay-2012-bengali @@ -7088,14 +7088,14 @@ KhumbarDebbarma Braja GopalPatra DipankarDas - SivajiBandyopadhyay + SivajiBandyopadhyay 41–52 W12-5004 debbarma-etal-2012-morphological Comparing Different Criteria for <fixed-case>V</fixed-case>ietnamese Word Segmentation - Quy T.Nguyen + Quy T.Nguyen Ngan L.T.Nguyen YusukeMiyao 53–68 @@ -7105,7 +7105,7 @@ A Light Weight Stemmer for <fixed-case>U</fixed-case>rdu Language: A Scarce Resourced Language Sajjad AhmadKhan - WaqasAnwar + WaqasAnwar Usama IjazBajwa XuanWang 69–78 @@ -7124,7 +7124,7 @@ KishorjitNongmeikapam Vidya RajRK NirmalY - SivajiB + SivajiB 95–108 W12-5008 nongmeikapam-etal-2012-manipuri @@ -7149,7 +7149,7 @@ Tagger Voting for <fixed-case>U</fixed-case>rdu BushraJawaid - OndřejBojar + OndřejBojar 135–144 W12-5011 jawaid-bojar-2012-tagger @@ -7157,7 +7157,7 @@ <fixed-case>BIS</fixed-case> Annotation Standards With Reference to <fixed-case>K</fixed-case>onkani Language MadhaviSardesai - JyotiPawar + JyotiPawar ShantaramWalawalikar EdnaVaz 145–152 @@ -7177,7 +7177,7 @@ Influences of particles on <fixed-case>V</fixed-case>ietnamese tonal Co-articulation Thị LanNguyen - Do DatTran + Do DatTran 163–172 W12-5014 nguyen-tran-2012-influences @@ -7193,7 +7193,7 @@ Bidirectional <fixed-case>B</fixed-case>engali Script and Meetei Mayek Transliteration of Web Based <fixed-case>M</fixed-case>anipuri News Corpus - Thoudam DorenSingh + Thoudam DorenSingh 181–190 W12-5016 singh-2012-bidirectional @@ -7201,17 +7201,17 @@ Rule-based Machine Translation between <fixed-case>I</fixed-case>ndonesian and <fixed-case>M</fixed-case>alaysian Raymond HendySusanto - Septina DianLarasati - Francis M.Tyers + Septina DianLarasati + Francis M.Tyers 191–200 W12-5017 susanto-etal-2012-rule Building Multilingual Search Index using open source framework - ArjunAtreya + ArjunAtreya SwapnilChaudhari - PushpakBhattacharyya + PushpakBhattacharyya GaneshRamakrishnan 201–210 W12-5018 @@ -7227,8 +7227,8 @@ Error tracking in search engine development SwapnilChaudhari - Arjun AtreyaV - PushpakBhattacharyya + Arjun AtreyaV + PushpakBhattacharyya GaneshRamakrishnan 221–228 W12-5020 @@ -7274,7 +7274,7 @@ Like a Lexicographer Weaving Her Lexical Network - AlainPolguère + AlainPolguère 1–4 W12-5101 polguere-2012-like @@ -7289,10 +7289,10 @@ On discriminating f<fixed-case>MRI</fixed-case> representations of abstract <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et taxonomic categories - AndrewAnderson + AndrewAnderson TaoYuan BrianMurphy - MassimoPoesio + MassimoPoesio 21–32 W12-5103 anderson-etal-2012-discriminating @@ -7300,7 +7300,7 @@ Automatic index creation to support navigation in lexical graphs encoding part_of relations MichaelZock - DebelaTesfaye + DebelaTesfaye 33–52 W12-5104 zock-tesfaye-2012-automatic @@ -7317,7 +7317,7 @@ Verb interpretation for basic action types: annotation, ontology induction and creation of prototypical scenes FrancescaFrontini IreneDe Felice - FahadKhan + FahadKhan IreneRusso MonicaMonachini GloriaGagliardi @@ -7331,7 +7331,7 @@ EmmanuelEckard LucieBarque AlexisNasr - BenoîtSagot + BenoîtSagot 81–94 W12-5107 eckard-etal-2012-dictionary @@ -7340,7 +7340,7 @@ Automatic Construction of a <fixed-case>M</fixed-case>ulti<fixed-case>W</fixed-case>ord Expressions Bilingual Lexicon: A Statistical Machine Translation Evaluation Perspective DhouhaBouamor NasredineSemmar - PierreZweigenbaum + PierreZweigenbaum 95–108 W12-5108 bouamor-etal-2012-automatic @@ -7348,8 +7348,8 @@ Hand-Crafting a Lexical Network With a Knowledge-Based Graph Editor NabilGader - VeronikaLux-Pogodalla - AlainPolguère + VeronikaLux-Pogodalla + AlainPolguère 109–126 W12-5109 gader-etal-2012-hand @@ -7357,14 +7357,14 @@ A Procedural <fixed-case>DTD</fixed-case> Project for Dictionary Entry Parsing Described with Parameterized Grammars NeculaiCurteanu - Mihai AlexMoruz + Mihai AlexMoruz 127–136 W12-5110 curteanu-moruz-2012-procedural Automatic Generation of the Universal Word Explanation from <fixed-case>UNL</fixed-case> Ontology - Khan Md.Anwarus Salam + Khan Md.Anwarus Salam HiroshiUchida TetsuroNishino 137–146 @@ -7404,7 +7404,7 @@ Where’s the meeting that was cancelled? existential implications of transitive verbs PatriciaAmaral - Valeriade Paiva + Valeriade Paiva CleoCondoravdi AnnieZaenen 183–194 @@ -7439,7 +7439,7 @@ RuvanWeerasinghe SarmadHussain VirachSornlertlamvanich - Rachel Edita O.Roxas + Rachel Edita O.Roxas The COLING 2012 Organizing Committee
Mumbai, India
December @@ -7452,19 +7452,19 @@ <fixed-case>K</fixed-case>orean <fixed-case>NLP</fixed-case>2<fixed-case>RDF</fixed-case> Resources - YoungGyunHahm - KyungTaeLim + YoungGyunHahm + KyungTaeLim JungyeulPark YongunYoon - Key-SunChoi + Key-SunChoi 1–10 W12-5201 hahm-etal-2012-korean Building Large Scale Text Corpus for <fixed-case>T</fixed-case>ibetan Natural Language Processing by Extracting Text from Web Pages - HuidanLiu - MinghuaNuo + HuidanLiu + MinghuaNuo JianWu YepingHe 11–20 @@ -7477,7 +7477,7 @@ HimadriBharali AmbeswarGogoi RatulDeka - Anup Kr.Barman + Anup Kr.Barman 21–28 W12-5203 sarma-etal-2012-structured @@ -7485,7 +7485,7 @@ Corpus Building of Literary Lesser Rich Language-<fixed-case>B</fixed-case>odo: Insights and Challenges BiswajitBrahma - Anup Kr.Barman + Anup Kr.Barman Shikhar Kr.Sarma BhatimaBoro 29–34 @@ -7495,7 +7495,7 @@ Dependency Parsers for <fixed-case>P</fixed-case>ersian MojganSeraji - BeataMegyesi + BeataMegyesi JoakimNivre 35–44 W12-5205 @@ -7511,7 +7511,7 @@ A Hybrid Dependency Parser for <fixed-case>B</fixed-case>angla - ArnabDhar + ArnabDhar SanjayChatterji SudeshnaSarkar AnupamBasu @@ -7523,7 +7523,7 @@ Repairing <fixed-case>B</fixed-case>engali Verb Chunks for Improved <fixed-case>B</fixed-case>engali to <fixed-case>H</fixed-case>indi Machine Translation SanjayChatterji NabanitaDatta - ArnabDhar + ArnabDhar BiswanathBarik SudeshnaSarkar AnupamBasu @@ -7534,7 +7534,7 @@ Domain Specific Ontology Extractor For <fixed-case>I</fixed-case>ndian Languages BrijeshBhatt - PushpakBhattacharyya + PushpakBhattacharyya 75–84 W12-5209 bhatt-bhattacharyya-2012-domain @@ -7542,7 +7542,7 @@ Constrained Hidden <fixed-case>M</fixed-case>arkov Model for Bilingual Keyword Pairs Alignment DennyCahyadi - FabienCromieres + FabienCromieres SadaoKurohashi 85–94 W12-5210 @@ -7551,7 +7551,7 @@ N-gram and Gazetteer List Based Named Entity Recognition for <fixed-case>U</fixed-case>rdu: A Scarce Resourced Language FaryalJahangir - WaqasAnwar + WaqasAnwar Usama IjazBajwa XuanWang 95–104 @@ -7569,7 +7569,7 @@ Enhancing Lemmatization for <fixed-case>M</fixed-case>ongolian and its Application to Statistical Machine Translation - ChimeddorjOdbayar + ChimeddorjOdbayar AtsushiFujii 115–124 W12-5213 @@ -7589,8 +7589,8 @@ Proceedings of the 2nd Workshop on Sentiment Analysis where AI meets Psychology W12-53 - SivajiBandyopadhyay - ManabuOkumura + SivajiBandyopadhyay + ManabuOkumura The COLING 2012 Organizing Committee
Mumbai, India
December @@ -7611,7 +7611,7 @@ A <fixed-case>CCG</fixed-case>-based Approach to Fine-Grained Sentiment Analysis PhillipSmith - MarkLee + MarkLee 3–16 W12-5302 smith-lee-2012-ccg @@ -7669,7 +7669,7 @@ How Human Analyse Lexical Indicators of Sentiments- A Cognitive Analysis Using Reaction-Time MarimuthuK - Sobha LalithaDevi + Sobha LalithaDevi 81–90 W12-5309 k-devi-2012-human @@ -7692,7 +7692,7 @@ Rule-Based Sentiment Analysis in Narrow Domain: Detecting Sentiment in Daily Horoscopes Using Sentiscope - ZeljkoAgic + ZeljkoAgic DanijelaMerkler 115–124 W12-5312 @@ -7733,7 +7733,7 @@ An Experiment in Integrating Sentiment Features for Tech Stock Prediction in <fixed-case>T</fixed-case>witter Tien ThanhVu ShuChang - Quang ThuyHa + Quang ThuyHa NigelCollier 23–38 W12-5503 @@ -7752,10 +7752,10 @@ Proceedings of the Workshop on Machine Translation and Parsing in Indian Languages W12-56 - Dipti MisraSharma - PrashanthMannem + Dipti MisraSharma + PrashanthMannem JosephvanGenabith - Sobha LalithaDevi + Sobha LalithaDevi RadhikaMamidi RanjaniParthasarathi The COLING 2012 Organizing Committee @@ -7779,7 +7779,7 @@ Semantic Parsing of <fixed-case>T</fixed-case>amil Sentences BalajiJagan - GeethaT V + GeethaT V RanjaniParthasarathi 15–22 W12-5602 @@ -7787,8 +7787,8 @@ <fixed-case>T</fixed-case>amil <fixed-case>NER</fixed-case> - Coping with Real Time Challenges - MalarkodiC.S - PattabhiRK Rao + MalarkodiC.S + PattabhiRK Rao SobhaLalitha Devi 23–38 W12-5603 @@ -7796,7 +7796,7 @@ Sublexical Translations for Low-Resource Language - Khan Md.Anwarus Salam + Khan Md.Anwarus Salam SetsuoYamada TetsuroNishino 39–52 @@ -7813,9 +7813,9 @@ A Diagnostic Evaluation Approach Targeting <fixed-case>MT</fixed-case> Systems for <fixed-case>I</fixed-case>ndian Languages RenuBalyan - Sudip KumarNaskar + Sudip KumarNaskar AntonioToral - NiladriChatterjee + NiladriChatterjee 61–72 W12-5606 balyan-etal-2012-diagnostic @@ -7830,8 +7830,8 @@ Clause Boundary Identification for <fixed-case>M</fixed-case>alayalam Using <fixed-case>CRF</fixed-case> - LakshmiS. - Vijay Sundar RamR + LakshmiS. + Vijay Sundar RamR SobhaLalitha Devi 83–92 W12-5608 @@ -7841,14 +7841,14 @@ Disambiguation of pre/post positions in <fixed-case>E</fixed-case>nglish - <fixed-case>M</fixed-case>alayalam Text Translation JayanV SunilR - BhadranV K + BhadranV K 93–102 W12-5609 v-etal-2012-disambiguation Resolution for Pronouns in <fixed-case>T</fixed-case>amil Using <fixed-case>CRF</fixed-case> - AkilandeswariA + AkilandeswariA SobhaLalitha Devi 103–112 W12-5610 @@ -7857,8 +7857,8 @@ Morphological Processing for <fixed-case>E</fixed-case>nglish-<fixed-case>T</fixed-case>amil Statistical Machine Translation LoganathanRamasamy - OndřejBojar - ZdeněkŽabokrtský + OndřejBojar + ZdeněkŽabokrtský 113–122 W12-5611 ramasamy-etal-2012-morphological @@ -7883,7 +7883,7 @@ <fixed-case>CUNI</fixed-case>: Feature Selection and Error Analysis of a Transition-Based Parser - DanielZeman + DanielZeman 143–148 W12-5614 zeman-2012-cuni @@ -7891,7 +7891,7 @@ Parsing <fixed-case>H</fixed-case>indi with <fixed-case>MDP</fixed-case>arser AlexanderVolokh - GünterNeumann + GünterNeumann 149–154 W12-5615 volokh-neumann-2012-parsing @@ -7899,7 +7899,7 @@ A Three Stage Hybrid Parser for <fixed-case>H</fixed-case>indi SanjayChatterji - ArnadDhar + ArnadDhar SudeshnaSarkar AnupamBasu 155–162 @@ -7927,8 +7927,8 @@ Ensembling Various Dependency Parsers: Adopting Turbo Parser for <fixed-case>I</fixed-case>ndian Languages PuneethKukkadapu - DeepakMalladi - AswarthDara + DeepakMalladi + AswarthDara 179–184 W12-5619 kukkadapu-etal-2012-ensembling @@ -7947,11 +7947,11 @@ Proceedings of the Second Workshop on Applying Machine Learning Techniques to Optimise the Division of Labour in Hybrid MT W12-57 - Josefvan Genabith + Josefvan Genabith ToniBadia ChristianFedermann MaiteMelero - Marta R.Costa-jussà + Marta R.Costa-jussà TsuyoshiOkita The COLING 2012 Organizing Committee
Mumbai, India
@@ -7966,7 +7966,7 @@ Hybrid Adaptation of Named Entity Recognition for Statistical Machine Translation VassilinaNikoulina - AgnesSandor + AgnesSandor MarcDymetman 1–16 W12-5701 @@ -7975,7 +7975,7 @@ Confusion Network Based System Combination for <fixed-case>C</fixed-case>hinese Translation Output: Word-Level or Character-Level? MaoxiLi - MingWenWang + MingWenWang 17–24 W12-5702 li-wang-2012-confusion @@ -7985,7 +7985,7 @@ KartikAsooja JorgeGracia NitishAggarwal - Asunción GómezPérez + Asunción GómezPérez 25–36 W12-5703 asooja-etal-2012-using @@ -8012,7 +8012,7 @@ Sentence-Level Quality Estimation for <fixed-case>MT</fixed-case> System Combination TsuyoshiOkita - RaphaëlRubino + RaphaëlRubino Josefvan Genabith 55–64 W12-5706 @@ -8074,7 +8074,7 @@ Automatically Assessing Free Texts YlliasChali - Sadid A.Hasan + Sadid A.Hasan 9–16 W12-5802 chali-hasan-2012-automatically @@ -8107,7 +8107,7 @@ Textbook Construction from Lecture Transcripts AliabbasPetiwala KannanMoudgalya - PushpakBhattacharyya + PushpakBhattacharyya 43–50 W12-5806 petiwala-etal-2012-textbook @@ -8161,7 +8161,7 @@ Genre-oriented Readability Assessment: a Case Study FeliceDell’Orletta GiuliaVenturi - SimonettaMontemagni + SimonettaMontemagni 91–98 W12-5812 dellorletta-etal-2012-genre @@ -8173,7 +8173,7 @@ W12-59 KarthikVisweswariah AnanthakrishnanRamanathan - Mitesh M.Khapra + Mitesh M.Khapra The COLING 2012 Organizing Committee
Mumbai, India
December @@ -8205,7 +8205,7 @@ A Tagging-style Reordering Model for Phrase-based <fixed-case>SMT</fixed-case> MinweiFeng - HermannNey + HermannNey 17–26 W12-5903 feng-ney-2012-tagging @@ -8222,7 +8222,7 @@ Learning Improved Reordering Models for <fixed-case>U</fixed-case>rdu, <fixed-case>F</fixed-case>arsi and <fixed-case>I</fixed-case>talian using <fixed-case>SMT</fixed-case> RohitGupta Raj NathPatel - RiteshShah + RiteshShah 37–46 W12-5905 gupta-etal-2012-learning @@ -8230,7 +8230,7 @@ Partially modelling word reordering as a sequence labelling problem AnoopKunchukuttan - PushpakBhattacharyya + PushpakBhattacharyya 47–54 W12-5906 kunchukuttan-bhattacharyya-2012-partially @@ -8240,8 +8240,8 @@ Proceedings of the Workshop on Question Answering for Complex Domains W12-60 - NandaKambhatla - SachindraJoshi + NandaKambhatla + SachindraJoshi GaneshRamakrishnan KiranKate PriyankaAgrawal @@ -8258,7 +8258,7 @@ Simple or Complex? Classifying Questions by Answering Complexity YlliasChali - Sadid A.Hasan + Sadid A.Hasan 1–10 W12-6001 chali-hasan-2012-simple @@ -8266,7 +8266,7 @@ Question Classification and Answering from Procedural Text in <fixed-case>E</fixed-case>nglish SomnathBanerjee - SivajiBandyopadhyay + SivajiBandyopadhyay 11–26 W12-6002 banerjee-bandyopadhyay-2012-question @@ -8298,7 +8298,7 @@ <fixed-case>W</fixed-case>iki<fixed-case>T</fixed-case>alk: A Spoken <fixed-case>W</fixed-case>ikipedia-based Open-Domain Knowledge Access System - GrahamWilcock + GrahamWilcock 57–70 W12-6006 wilcock-2012-wikitalk @@ -8308,11 +8308,11 @@ Proceedings of the First International Workshop on Optimization Techniques for Human Language Technology W12-61 - PushpakBhattacharyya + PushpakBhattacharyya AsifEkbal SriparnaSaha MarkJohnson - DiegoMolla-Aliod + DiegoMolla-Aliod MarkDras The COLING 2012 Organizing Committee
Mumbai, India
@@ -8328,16 +8328,16 @@ <fixed-case>B</fixed-case>io<fixed-case>POS</fixed-case>: Biologically Inspired Algorithms for <fixed-case>POS</fixed-case> Tagging Ana PaulaSilva ArlindoSilva - IreneRodrigues + IreneRodrigues 1–16 W12-6101 silva-etal-2012-biopos
Optimization for Efficient Determination of Chunk in Automatic Evaluation for Machine Translation - HiroshiEchizen’ya - KenjiAraki - EduardHovy + HiroshiEchizen’ya + KenjiAraki + EduardHovy 17–30 W12-6102 echizenya-etal-2012-optimization @@ -8355,7 +8355,7 @@ Selection of Discriminative Features for Translation Texts Kuo-MingTang Chien-KangHuang - Chia-MingLee + Chia-MingLee 49–64 W12-6104 tang-etal-2012-selection @@ -8379,7 +8379,7 @@ Iterative <fixed-case>C</fixed-case>hinese Bi-gram Term Extraction Using Machine-learning Classification Approach - Chia-MingLee + Chia-MingLee Chien-KangHuang Kuo-MingTang 95–108 @@ -8389,9 +8389,9 @@ Parameter estimation under uncertainty with Simulated Annealing applied to an ant colony based probabilistic <fixed-case>WSD</fixed-case> algorithm AndonTchechmedjiev - JérômeGoulian + JérômeGoulian DidierSchwab - GillesSérasset + GillesSérasset 109–124 W12-6108 tchechmedjiev-etal-2012-parameter @@ -8401,7 +8401,7 @@ Proceedings of the 10th International Workshop on Finite State Methods and Natural Language Processing W12-62 - IñakiAlegria + IñakiAlegria MansHulden Association for Computational Linguistics
Donostia–San Sebastián
@@ -8415,7 +8415,7 @@ Effect of Language and Error Models on Efficiency of Finite-State Spell-Checking and Correction - Tommi APirinen + Tommi APirinen SamHardwick 1–9 W12-6201 @@ -8446,7 +8446,7 @@ Integrating Aspectually Relevant Properties of Verbs into a Morphological Analyzer for <fixed-case>E</fixed-case>nglish - KatinaBontcheva + KatinaBontcheva 30–34 W12-6205 bontcheva-2012-integrating @@ -8472,7 +8472,7 @@ <fixed-case>WFST</fixed-case>-Based Grapheme-to-Phoneme Conversion: Open Source tools for Alignment, Model-Building and Decoding Josef R.Novak - NobuakiMinematsu + NobuakiMinematsu KeikichiHirose 45–49 W12-6208 @@ -8488,7 +8488,7 @@ Implementation of Replace Rules Using Preference Operator SenkaDrobac - MiikkaSilfverberg + MiikkaSilfverberg AnssiYli-Jyrä 55–59 W12-6210 @@ -8507,9 +8507,9 @@ Developing an Open-Source <fixed-case>FST</fixed-case> Grammar for Verb Chain Transfer in a <fixed-case>S</fixed-case>panish-<fixed-case>B</fixed-case>asque <fixed-case>MT</fixed-case> System - AingeruMayor + AingeruMayor MansHulden - GorkaLabaka + GorkaLabaka 65–69 W12-6212 mayor-etal-2012-developing @@ -8526,8 +8526,8 @@ A Methodology for Obtaining Concept Graphs from Word Graphs MarcosCalvo Jon AnderGómez - Lluís-F.Hurtado - EmilioSanchis + Lluís-F.Hurtado + EmilioSanchis 75–79 W12-6214 calvo-etal-2012-methodology @@ -8549,8 +8549,8 @@ Finite-State Acoustic and Translation Model Composition in Statistical Speech Translation: Empirical Assessment AliciaPérez - M. InésTorres - FranciscoCasacuberta + M. InésTorres + FranciscoCasacuberta 99–107 W12-6217 perez-etal-2012-finite @@ -8597,14 +8597,14 @@ Linguistic foundation for <fixed-case>NLP</fixed-case> - GuodongZhou + GuodongZhou 2 W12-6302 zhou-2012-linguistic A Language Modeling Approach to Identifying Code-Switched Sentences and Words - Liang-ChihYu + Liang-ChihYu Wei-ChengHe Wei-NanChien 3–8 @@ -8614,7 +8614,7 @@ Semi-automatic Annotation of <fixed-case>C</fixed-case>hinese Word Structure JianqiangMa - ChunyuKit + ChunyuKit DaleGerdemann 9–17 W12-6304 @@ -8694,8 +8694,8 @@ ShujianHuang YinggongZhao HaoZhou - XinyuDai - JiajunChen + XinyuDai + JiajunChen 63–68 W12-6312 xi-etal-2012-adapting @@ -8713,7 +8713,7 @@ Rules-based <fixed-case>C</fixed-case>hinese Word Segmentation on <fixed-case>M</fixed-case>icro<fixed-case>B</fixed-case>log for <fixed-case>CIPS</fixed-case>-<fixed-case>SIGHAN</fixed-case> on <fixed-case>CLP</fixed-case>2012 JingZhang - DegenHuang + DegenHuang XiaHan WeiWang 74–78 @@ -8753,7 +8753,7 @@ A Comparison of <fixed-case>C</fixed-case>hinese Word Segmentation on News and Microblog Corpora with a Lexicon Based Method YuxiangJia - HongyingZan + HongyingZan MingFan ZhiminWang 95–98 @@ -8830,7 +8830,7 @@ Explore <fixed-case>C</fixed-case>hinese Encyclopedic Knowledge to Disambiguate Person Names JieLiu RuifengXu - QinLu + QinLu JianXu 138–145 W12-6326 @@ -8849,7 +8849,7 @@ <fixed-case>C</fixed-case>hinese Personal Name Disambiguation Based on Vector Space Model Qing-huFan - Hong-yingZan + Hong-yingZan Yu-meiChai Yu-xiangJia Gui-lingNiu @@ -8908,9 +8908,9 @@ Traditional <fixed-case>C</fixed-case>hinese Parsing Evaluation at <fixed-case>SIGHAN</fixed-case> Bake-offs 2012 - Yuen-HsienTseng + Yuen-HsienTseng Lung-HaoLee - Liang-ChihYu + Liang-ChihYu 199–205 W12-6335 tseng-etal-2012-traditional @@ -8939,8 +8939,8 @@ Improving <fixed-case>PCFG</fixed-case> <fixed-case>C</fixed-case>hinese Parsing with Context-Dependent Probability Re-estimation Yu-MingHsieh Ming-HongBai - Jason S.Chang - Keh-JiannChen + Jason S.Chang + Keh-JiannChen 216–221 W12-6338 hsieh-etal-2012-improving @@ -8956,7 +8956,7 @@ A Conditional Random Field-based Traditional <fixed-case>C</fixed-case>hinese Base Phrase Parser for <fixed-case>SIGHAN</fixed-case> Bake-off 2012 Evaluation - Yih-RuWang + Yih-RuWang Yuan-FuLiao 231–236 W12-6340 diff --git a/data/xml/W13.xml b/data/xml/W13.xml index b3a11bbd51..ccd8c0527f 100644 --- a/data/xml/W13.xml +++ b/data/xml/W13.xml @@ -33,11 +33,11 @@ Automatically Deriving Event Ontologies for a <fixed-case>C</fixed-case>ommon<fixed-case>S</fixed-case>ense Knowledge Base - JamesAllen - Willde Beaumont + JamesAllen + Willde Beaumont LucianGalescu JansenOrfan - MarySwift + MarySwift Choh ManTeng 23–34 W13-0103 @@ -45,8 +45,8 @@ Intensionality was only alleged: On adjective-noun composition in distributional semantics - GemmaBoleda - MarcoBaroni + GemmaBoleda + MarcoBaroni The NghiaPham LouiseMcNally 35–46 @@ -56,10 +56,10 @@ Sentiment Composition Using a Parabolic Model BaptisteChardon - FarahBenamara - YannickMathieu + FarahBenamara + YannickMathieu VladimirPopescu - NicholasAsher + NicholasAsher 47–58 W13-0105 chardon-etal-2013-sentiment @@ -67,15 +67,15 @@ Temporal Relation Classification Based on Temporal Reasoning FranciscoCosta - AntónioBranco + AntónioBranco 59–70 W13-0106 costa-branco-2013-temporal Empirical Validation of Reichenbach’s Tense Framework - LeonDerczynski - RobertGaizauskas + LeonDerczynski + RobertGaizauskas 71–82 W13-0107 derczynski-gaizauskas-2013-empirical @@ -106,7 +106,7 @@ Towards Weakly Supervised Resolution of Null Instantiations - PhilipGorinski + PhilipGorinski JosefRuppenhofer CarolineSporleder 119–130 @@ -126,8 +126,8 @@ Domain Adaptable Semantic Clustering in Statistical <fixed-case>NLG</fixed-case> - BlakeHowald - RavikumarKondadadi + BlakeHowald + RavikumarKondadadi FrankSchilder 143–154 W13-0113 @@ -136,14 +136,14 @@ Sources of Evidence for Implicit Argument Resolution EgoitzLaparra - GermanRigau + GermanRigau 155–166 W13-0114 laparra-rigau-2013-sources Recognising Sets and Their Elements: Tree Kernels for Entity Instantiation Identification - AndrewMcKinlay + AndrewMcKinlay KatjaMarkert 167–178 W13-0115 @@ -152,7 +152,7 @@ A corpus study of clause combination OlgaNikitina - SebastianPadó + SebastianPadó 179–190 W13-0116 nikitina-pado-2013-corpus @@ -167,7 +167,7 @@ The Impact of Selectional Preference Agreement on Semantic Relational Similarity BryanRink - SandaHarabagiu + SandaHarabagiu 204–215 W13-0118 rink-harabagiu-2013-impact @@ -175,8 +175,8 @@ Recognizing Spatial Containment Relations between Event Mentions KirkRoberts - Michael A.Skinner - Sanda M.Harabagiu + Michael A.Skinner + Sanda M.Harabagiu 216–227 W13-0119 roberts-etal-2013-recognizing @@ -185,7 +185,7 @@ Regular Meaning Shifts in <fixed-case>G</fixed-case>erman Particle Verbs: A Case Study SylviaSpringorum JasonUtt - SabineSchulte im Walde + SabineSchulte im Walde 228–239 W13-0120 springorum-etal-2013-regular @@ -204,7 +204,7 @@ Parsimonious Semantic Representations with Projection Pointers - Noortje J.Venhuizen + Noortje J.Venhuizen JohanBos HarmBrouwer 252–263 @@ -220,15 +220,15 @@ What excludes an Alternative in Coherence Relations? - BonnieWebber + BonnieWebber 276–287 W13-0124 webber-2013-excludes A Search Task Dataset for <fixed-case>G</fixed-case>erman Textual Entailment - Britta D.Zeller - SebastianPadó + Britta D.Zeller + SebastianPadó 288–299 W13-0125 zeller-pado-2013-search @@ -258,7 +258,7 @@ The semantic annotation of quantification - HarryBunt + HarryBunt 307–313 W13-0202 bunt-2013-semantic @@ -273,7 +273,7 @@ What is in a text, what isn’t, and what this has to do with lexical semantics - AurelieHerbelot + AurelieHerbelot 321–327 W13-0204 herbelot-2013-text @@ -283,7 +283,7 @@ EliasIosif AlexandrosPotamianos MariaGiannoudaki - KalliopiZervanou + KalliopiZervanou 328–334 W13-0205 iosif-etal-2013-semantic @@ -340,13 +340,13 @@ A Pilot Experiment in Knowledge Authoring as Dialogue - ArtemisParvizi + ArtemisParvizi CarolineJay ChristopherMellish Jeff Z.Pan YuanRen RobertStevens - Keesvan Deemter + Keesvan Deemter 376–382 W13-0212 parvizi-etal-2013-pilot @@ -370,7 +370,7 @@ Gamification for Word Sense Labeling - Noortje J.Venhuizen + Noortje J.Venhuizen ValerioBasile KilianEvang JohanBos @@ -382,7 +382,7 @@ Fitting, Not Clashing! A Distributional Semantic Model of Logical Metonymy AlessandraZarcone AlessandroLenci - SebastianPadó + SebastianPadó JasonUtt 404–410 W13-0216 @@ -395,7 +395,7 @@ W13-03 PaulPortner AynatRubinstein - GrahamKatz + GrahamKatz Association for Computational Linguistics
Potsdam, Germany
March @@ -407,7 +407,7 @@ Challenges in modality annotation in a <fixed-case>B</fixed-case>razilian <fixed-case>P</fixed-case>ortuguese Spontaneous Speech Corpus - Luciana BeatrizAvila + Luciana BeatrizAvila HelianaMello 1–6 W13-0301 @@ -449,7 +449,7 @@ AynatRubinstein HillaryHarner ElizabethKrawczyk - DanielSimonson + DanielSimonson GrahamKatz PaulPortner 38–46 @@ -472,7 +472,7 @@ W13-04 StephenWu NigamShah - Kevin BretonnelCohen + Kevin BretonnelCohen Association for Computational Linguistics
Potsdam, Germany
March @@ -484,7 +484,7 @@ A Framework to Generate Sets of Terms from Large Scale Medical Vocabularies for Natural Language Processing - SalahAït-Mokhtar + SalahAït-Mokhtar CarolineHagège PajolmaRupi 1–6 @@ -510,17 +510,17 @@ Evaluating the Use of Empirically Constructed Lexical Resources for Named Entity Recognition SiddharthaJonnalagadda - TrevorCohen + TrevorCohen StephenWu HongfangLiu - GracielaGonzalez + GracielaGonzalez 23–33 W13-0404 jonnalagadda-etal-2013-evaluating Towards Converting Clinical Phrases into <fixed-case>SNOMED</fixed-case> <fixed-case>CT</fixed-case> Expressions - Rohit J.Kate + Rohit J.Kate 34–43 W13-0405 kate-2013-towards @@ -534,7 +534,7 @@ SiddharthaJonnalagadda KavishwarWagholikar StephenWu - ChristopherChute + ChristopherChute HongfangLiu 44–53 W13-0406 @@ -544,7 +544,7 @@ The <fixed-case>VERICLIG</fixed-case> Project: Extraction of Computer Interpretable Guidelines via Syntactic and Semantic Annotation CamiloThorne MarcoMontali - DiegoCalvanese + DiegoCalvanese ElenaCardillo ClaudioEccher 54–58 @@ -556,7 +556,7 @@ Proceedings of the 9th Joint ISO - ACL SIGSEM Workshop on Interoperable Semantic Annotation W13-05 - HarryBunt + HarryBunt Association for Computational Linguistics
Potsdam, Germany
March @@ -571,7 +571,7 @@ Cross-linguistic annotation of modality: a data-driven hierarchical model MalvinaNissim - PaolaPietrandrea + PaolaPietrandrea AndreaSansò CaterinaMauri 7–14 @@ -587,7 +587,7 @@ Capturing Motion in <fixed-case>ISO</fixed-case>-<fixed-case>S</fixed-case>pace<fixed-case>B</fixed-case>ank - JamesPustejovsky + JamesPustejovsky ZacharyYocum 25–34 W13-0503 @@ -611,7 +611,7 @@ More Than Only Noun-Noun Compounds: Towards an Annotation Scheme for the Semantic Modelling of Other Noun Compound Types BenVerhoeven - Gerhard B.van Huyssteen + Gerhard B.van Huyssteen 59–66 W13-0506 verhoeven-van-huyssteen-2013-noun @@ -619,7 +619,7 @@ Issues in the addition of <fixed-case>ISO</fixed-case> standard annotations to the Switchboard corpus HarryBunt - Alex C.Fang + Alex C.Fang XiaoyueLiu JingCao VolhaPetukhova @@ -638,7 +638,7 @@ Inference Patterns with Intensional Adjectives - JamesPustejovsky + JamesPustejovsky 85–89 W13-0509 pustejovsky-2013-inference @@ -648,9 +648,9 @@ Proceedings of the IWCS 2013 Workshop Towards a Formal Distributional Semantics W13-06 - AurelieHerbelot + AurelieHerbelot RobertoZamparelli - GemmaBoleda + GemmaBoleda Association for Computational Linguistics
Potsdam, Germany
March @@ -663,7 +663,7 @@ Semantic transparency: challenges for distributional semantics Melanie J.Bell - MartinSchäfer + MartinSchäfer 1–10 W13-0601 bell-schafer-2013-semantic @@ -677,10 +677,10 @@ Sentence paraphrase detection: When determiners and word order make the difference - NghiaPham - RaffaellaBernardi - Yao ZhongZhang - MarcoBaroni + NghiaPham + RaffaellaBernardi + Yao ZhongZhang + MarcoBaroni 21–29 W13-0603 pham-etal-2013-sentence @@ -689,7 +689,7 @@ The Curious Case of Metonymic Verbs: A Distributional Characterization JasonUtt AlessandroLenci - SebastianPadó + SebastianPadó AlessandraZarcone 30–39 W13-0604 @@ -700,7 +700,7 @@ Proceedings of the IWCS 2013 Workshop on Computational Models of Spatial Language Interpretation and Generation (CoSLI-3) W13-07 - JohnKelleher + JohnKelleher RobertRoss SimonDobnik Association for Computational Linguistics @@ -722,7 +722,7 @@ Deriving Salience Models from Human Route Directions - JanaGötze + JanaGötze JohanBoye 7–12 W13-0702 @@ -746,7 +746,7 @@ Where Things Happen: On the Semantics of Event Localization - JamesPustejovsky + JamesPustejovsky 29–39 W13-0705 pustejovsky-2013-things @@ -778,7 +778,7 @@ Taste of Two Different Flavours: Which <fixed-case>M</fixed-case>anipuri Script works better for <fixed-case>E</fixed-case>nglish-<fixed-case>M</fixed-case>anipuri Language pair <fixed-case>SMT</fixed-case> Systems? - Thoudam DorenSingh + Thoudam DorenSingh 11–18 W13-0802 singh-2013-taste @@ -786,7 +786,7 @@ Hierarchical Alignment Decomposition Labels for <fixed-case>H</fixed-case>iero Grammar Rules GideonMaillette de Buy Wenniger - KhalilSima’an + KhalilSima’an 19–28 W13-0803 maillette-de-buy-wenniger-simaan-2013-hierarchical @@ -796,7 +796,7 @@ MatthiasHuck DavidVilar MarkusFreitag - HermannNey + HermannNey 29–38 W13-0804 huck-etal-2013-performance @@ -805,7 +805,7 @@ Combining Word Reordering Methods on different Linguistic Abstraction Levels for Statistical Machine Translation TeresaHerrmann JanNiehues - AlexWaibel + AlexWaibel 39–47 W13-0805 herrmann-etal-2013-combining @@ -822,7 +822,7 @@ A Formal Characterization of Parsing Word Alignments by Synchronous Grammars with Empirical Evidence to the <fixed-case>ITG</fixed-case> Hypothesis. GideonMaillette de Buy Wenniger - KhalilSima’an + KhalilSima’an 58–67 W13-0807 maillette-de-buy-wenniger-simaan-2013-formal @@ -841,7 +841,7 @@ W13-09 EkaterinaShutova BeataBeigman Klebanov - JoelTetreault + JoelTetreault ZornitsaKozareva Association for Computational Linguistics
Atlanta, Georgia
@@ -890,9 +890,9 @@
Automatic Metaphor Detection using Large-Scale Lexical Resources and Conventional Metaphor Extraction - YorickWilks + YorickWilks AdamDalton - JamesAllen + JamesAllen LucianGalescu 36–44 W13-0905 @@ -916,7 +916,7 @@ KartikGoyal HuyingLi WhitneySanders - EduardHovy + EduardHovy 52–57 W13-0907 hovy-etal-2013-identifying @@ -929,17 +929,17 @@ DaveBarner DonaldBlack MajorieFriedman - RalphWeischedel + RalphWeischedel 58–66 W13-0908 heintz-etal-2013-automatic Robust Extraction of Metaphor from Novel Data - TomekStrzalkowski - George AaronBroadwell - SarahTaylor - LaurieFeldman + TomekStrzalkowski + George AaronBroadwell + SarahTaylor + LaurieFeldman SamiraShaikh TingLiu BorisYamrom @@ -981,8 +981,8 @@ Managing Multiword Expressions in a Lexicon-Based Sentiment Analysis System for <fixed-case>S</fixed-case>panish - AntonioMoreno-Ortiz - ChantalPérez-Hernández + AntonioMoreno-Ortiz + ChantalPérez-Hernández MariaDel-Olmo 1–10 W13-1001 @@ -999,7 +999,7 @@ Improving Word Translation Disambiguation by Capturing Multiword Expressions with Dictionaries LarsBungum - BjörnGambäck + BjörnGambäck AndréLynum ErwinMarsi 21–30 @@ -1008,7 +1008,7 @@ Complex Predicates are Multi-Word Expressions - MarthaPalmer + MarthaPalmer 31 W13-1004 palmer-2013-complex @@ -1016,7 +1016,7 @@ The (Un)expected Effects of Applying Standard Cleansing Models to Human Ratings on Compositionality StephenRoller - SabineSchulte im Walde + SabineSchulte im Walde SilkeScheible 32–41 W13-1005 @@ -1048,10 +1048,10 @@ An Analysis of Annotation of Verb-Noun Idiomatic Combinations in a Parallel Dependency Corpus - ZdenkaUresova - JanHajic - EvaFucikova - JanaSindlerova + ZdenkaUresova + JanHajic + EvaFucikova + JanaSindlerova 58–63 W13-1009 uresova-etal-2013-analysis @@ -1077,8 +1077,8 @@ Exploring <fixed-case>MWE</fixed-case>s for Knowledge Acquisition from Corporate Technical Documents BellManrique Losada - Carlos M.Zapata Jaramillo - Diego A.Burgos + Carlos M.Zapata Jaramillo + Diego A.Burgos 82–86 W13-1012 manrique-losada-etal-2013-exploring @@ -1093,9 +1093,9 @@ Identifying Pronominal Verbs: Towards Automatic Disambiguation of the Clitic ‘se’ in <fixed-case>P</fixed-case>ortuguese - MagaliSanches Duran - Carolina EvaristoScarton - Sandra MariaAluísio + MagaliSanches Duran + Carolina EvaristoScarton + Sandra MariaAluísio CarlosRamisch 93–100 W13-1014 @@ -1120,8 +1120,8 @@ Combining Different Features of Idiomaticity for the Automatic Classification of <fixed-case>N</fixed-case>oun+<fixed-case>V</fixed-case>erb Expressions in <fixed-case>B</fixed-case>asque - AnttonGurrutxaga - IñakiAlegria + AnttonGurrutxaga + IñakiAlegria 116–125 W13-1017 gurrutxaga-alegria-2013-combining @@ -1129,7 +1129,7 @@ Semantic Roles for Nominal Predicates: Building a Lexical Resource AshwiniVaidya - MarthaPalmer + MarthaPalmer BhuvanaNarasimhan 126–131 W13-1018 @@ -1159,7 +1159,7 @@ KeisukeSakaguchi AkifumiYoshimoto FrancesYung - YujiMatsumoto + YujiMatsumoto 139–144 W13-1021 shigeto-etal-2013-construction @@ -1172,8 +1172,8 @@ CristianDanescu-Niculescu-Mizil AtefehFarzindar MichaelGamon - DianaInkpen - MeenaNagarajan + DianaInkpen + MeenaNagarajan Association for Computational Linguistics
Atlanta, Georgia
June @@ -1188,7 +1188,7 @@ Does Size Matter? Text and Grammar Revision for Parsing Social Media Data MohammadKhan MarkusDickinson - SandraKuebler + SandraKuebler 1–10 W13-1101 khan-etal-2013-size @@ -1203,8 +1203,8 @@ A Preliminary Study of Tweet Summarization using Information Extraction WeiXu - RalphGrishman - AdamMeyers + RalphGrishman + AdamMeyers AlanRitter 20–29 W13-1103 @@ -1212,19 +1212,19 @@ Really? Well. Apparently Bootstrapping Improves the Performance of Sarcasm and Nastiness Classifiers for Online Dialogue - StephanieLukin - MarilynWalker + StephanieLukin + MarilynWalker 30–40 W13-1104 lukin-walker-2013-really Topical Positioning: A New Method for Predicting Opinion Changes in Conversation - Ching-ShengLin + Ching-ShengLin SamiraShaikh - JenniferStromer-Galley + JenniferStromer-Galley JenniferCrowley - TomekStrzalkowski + TomekStrzalkowski VeenaRavishankar 41–48 W13-1105 @@ -1262,7 +1262,7 @@ Translating Government Agencies’ Tweet Feeds: Specificities, Problems and (a few) Solutions FabrizioGotti - PhilippeLanglais + PhilippeLanglais AtefehFarzindar 80–89 W13-1109 @@ -1273,9 +1273,9 @@ Workshop on Events: Definition, Detection, Coreference, and Representation W13-12 - EduardHovy + EduardHovy TerukoMitamura - MarthaPalmer + MarthaPalmer Association for Computational Linguistics
Atlanta, Georgia
June @@ -1288,7 +1288,7 @@ Coping With Implicit Arguments And Events Coreference - RodolfoDelmonte + RodolfoDelmonte 1–10 W13-1201 delmonte-2013-coping @@ -1299,9 +1299,9 @@ Mariekevan Erp PiekVossen SaraTonelli - Willem Robertvan Hage - LucianoSerafini - RacheleSprugnoli + Willem Robertvan Hage + LucianoSerafini + RacheleSprugnoli JesperHoeksema 11–20 W13-1202 @@ -1338,7 +1338,7 @@ Annotating Change of State for Clinical Events LucyVanderwende FeiXia - MelihaYetisgen-Yildiz + MelihaYetisgen-Yildiz 47–51 W13-1206 vanderwende-etal-2013-annotating @@ -1349,7 +1349,7 @@ Proceedings of the Workshop on Vision and Natural Language Processing W13-13 JuliaHockenmaier - TamaraBerg + TamaraBerg Association for Computational Linguistics
Atlanta, Georgia
June @@ -1372,7 +1372,7 @@ Generating Natural-Language Video Descriptions Using Text-Mined Knowledge NivedaKrishnamoorthy GirishMalkarnenkar - RaymondMooney + RaymondMooney KateSaenko SergioGuadarrama 10–19 @@ -1394,9 +1394,9 @@ Proceedings of the Workshop on Computational Linguistics for Literature W13-14 - DavidElson + DavidElson AnnaKazantseva - StanSzpakowicz + StanSzpakowicz Association for Computational Linguistics
Atlanta, Georgia
June @@ -1429,7 +1429,7 @@ Tradition and Modernity in 20th Century <fixed-case>C</fixed-case>hinese Poetry RobVoigt - DanJurafsky + DanJurafsky 17–22 W13-1403 voigt-jurafsky-2013-tradition @@ -1478,7 +1478,7 @@ JannekeRauscher LeonardSwiezinski MartinRiedl - ChrisBiemann + ChrisBiemann 61–71 W13-1409 rauscher-etal-2013-exploring @@ -1525,8 +1525,8 @@ Open Book: a tool for helping <fixed-case>ASD</fixed-case> users’ semantic comprehension EduardBarbu - Maria TeresaMartín-Valdivia - Luis AlfonsoUreña-López + Maria TeresaMartín-Valdivia + Luis AlfonsoUreña-López 11–19 W13-1502 barbu-etal-2013-open @@ -1544,7 +1544,7 @@ Lexical Tightness and Text Complexity MichaelFlor BeataBeigman Klebanov - Kathleen M.Sheehan + Kathleen M.Sheehan 29–38 W13-1504 flor-etal-2013-lexical @@ -1553,7 +1553,7 @@ A System for the Simplification of Numerical Expressions at Different Levels of Understandability SusanaBautista RaquelHervás - PabloGervás + PabloGervás RichardPower SandraWilliams 39–48 @@ -1562,7 +1562,7 @@ A Two-Stage Approach for Generating Unbiased Estimates of Text Complexity - Kathleen M.Sheehan + Kathleen M.Sheehan MichaelFlor DianeNapolitano 49–58 @@ -1574,9 +1574,9 @@ Proceedings of the 4th Workshop on Computational Approaches to Subjectivity, Sentiment and Social Media Analysis W13-16 - AlexandraBalahur - Erikvan der Goot - AndresMontoyo + AlexandraBalahur + Erikvan der Goot + AndresMontoyo Association for Computational Linguistics
Atlanta, Georgia
June @@ -1597,7 +1597,7 @@ Bootstrapped Learning of Emotion Hashtags #hashtags4you AshequlQadir - EllenRiloff + EllenRiloff 2–11 W13-1602 qadir-riloff-2013-bootstrapped @@ -1605,7 +1605,7 @@ Fine-Grained Emotion Recognition in Olympic Tweets Based on Human Computation ValentinaSintsova - ClaudiuMusat + ClaudiuMusat PearlPu 12–20 W13-1603 @@ -1614,7 +1614,7 @@ <fixed-case>S</fixed-case>panish <fixed-case>DAL</fixed-case>: A <fixed-case>S</fixed-case>panish Dictionary of Affect in Language MatíasDell’ Amerlina Ríos - AgustínGravano + AgustínGravano 21–28 W13-1604 dell-amerlina-rios-gravano-2013-spanish @@ -1623,7 +1623,7 @@ The perfect solution for detecting sarcasm in tweets #not ChristineLiebrecht FlorianKunneman - Antalvan den Bosch + Antalvan den Bosch 29–37 W13-1605 liebrecht-etal-2013-perfect @@ -1632,7 +1632,7 @@ Using <fixed-case>PU</fixed-case>-Learning to Detect Deceptive Opinion Spam DonatoHernández Fusilier RafaelGuzmán Cabrera - ManuelMontes-y-Gómez + ManuelMontes-y-Gómez PaoloRosso 38–45 W13-1606 @@ -1643,8 +1643,8 @@ Hugo JairEscalante EsaúVillatoro-Tello AntonioJuárez - ManuelMontes-y-Gómez - LuisVillaseñor + ManuelMontes-y-Gómez + LuisVillaseñor 46–54 W13-1607 escalante-etal-2013-sexual @@ -1686,21 +1686,21 @@ Bilingual Experiments on an Opinion Comparable Corpus - EugenioMartínez-Cámara - M. TeresaMartín-Valdivia - M. DoloresMolina-González - L. AlfonsoUreña-López + EugenioMartínez-Cámara + M. TeresaMartín-Valdivia + M. DoloresMolina-González + L. AlfonsoUreña-López 87–93 W13-1612 martinez-camara-etal-2013-bilingual <fixed-case>RA</fixed-case>-<fixed-case>SR</fixed-case>: Using a ranking algorithm to automatically building resources for subjectivity analysis over annotated corpora - YoanGutiérrez + YoanGutiérrez AndyGonzález - AntonioFernández + AntonioFernández AndrésMontoyo - RafaelMuñoz + RafaelMuñoz 94–99 W13-1613 gutierrez-etal-2013-ra @@ -1717,7 +1717,7 @@ Sentence-Level Subjectivity Detection Using Neuro-Fuzzy Models SamirRustamov ElshanMustafayev - MarkClements + MarkClements 108–114 W13-1615 rustamov-etal-2013-sentence @@ -1742,7 +1742,7 @@ Proceedings of the Eighth Workshop on Innovative Use of NLP for Building Educational Applications W13-17 - JoelTetreault + JoelTetreault JillBurstein ClaudiaLeacock Association for Computational Linguistics @@ -1783,7 +1783,7 @@ Developing and testing a self-assessment and tutoring system - Øistein E.Andersen + Øistein E.Andersen HelenYannakoudakis FionaBarker TimParish @@ -1838,21 +1838,21 @@ AmjadAbu-Jbara RahulJha EricMorley - DragomirRadev + DragomirRadev 82–88 W13-1710 abu-jbara-etal-2013-experimental <fixed-case>VTEX</fixed-case> System Description for the <fixed-case>NLI</fixed-case> 2013 Shared Task - VidasDaudaravičius + VidasDaudaravičius 89–95 W13-1711 daudaravicius-2013-vtex Feature Space Selection and Combination for Native Language Identification - CyrilGoutte + CyrilGoutte SergeLéger MarineCarpuat 96–100 @@ -1861,10 +1861,10 @@ Discriminating Non-Native <fixed-case>E</fixed-case>nglish with 350 Words - JohnHenderson + JohnHenderson GuidoZarrella CraigPfeifer - John D.Burger + John D.Burger 101–110 W13-1713 henderson-etal-2013-discriminating @@ -1887,7 +1887,7 @@ <fixed-case>NLI</fixed-case> Shared Task 2013: <fixed-case>MQ</fixed-case> Submission - ShervinMalmasi + ShervinMalmasi Sze-Meng JojoWong MarkDras 124–133 @@ -1900,7 +1900,7 @@ YutaHayashibe KeisukeSakaguchi MamoruKomachi - YujiMatsumoto + YujiMatsumoto 134–139 W13-1717 mizumoto-etal-2013-naist @@ -1918,7 +1918,7 @@ Exploring Syntactic Representations for Native Language Identification - BenSwanson + BenSwanson 146–151 W13-1719 @@ -1982,7 +1982,7 @@ SerhiyBykh SowmyaVajjala JuliaKrivanek - DetmarMeurers + DetmarMeurers 197–206 W13-1726 bykh-etal-2013-combining @@ -1992,16 +1992,16 @@ AndreaCimino FeliceDell’Orletta GiuliaVenturi - SimonettaMontemagni + SimonettaMontemagni 207–215 W13-1727 cimino-etal-2013-linguistic Improving Native Language Identification with <fixed-case>TF</fixed-case>-<fixed-case>IDF</fixed-case> Weighting - Binyam GebrekidanGebre + Binyam GebrekidanGebre MarcosZampieri - PeterWittenburg + PeterWittenburg TomHeskes 216–223 W13-1728 @@ -2018,7 +2018,7 @@ Feature Engineering in the <fixed-case>NLI</fixed-case> Shared Task 2013: <fixed-case>C</fixed-case>harles <fixed-case>U</fixed-case>niversity Submission Report - BarboraHladká + BarboraHladká MartinHolub VincentKríž 232–241 @@ -2032,7 +2032,7 @@ KristopherKyle ScottCrossley JianminDai - DanielleMcNamara + DanielleMcNamara 242–250 W13-1731 kyle-etal-2013-native @@ -2040,7 +2040,7 @@ Using N-gram and Word Network Features for Native Language Identification ShibamouliLahiri - RadaMihalcea + RadaMihalcea 251–259 W13-1732 lahiri-mihalcea-2013-using @@ -2079,7 +2079,7 @@ ManaalFaruqui VictorChahuneau ShulyWintner - ChrisDyer + ChrisDyer 279–287 W13-1736 tsvetkov-etal-2013-identifying @@ -2094,9 +2094,9 @@ Improving interpretation robustness in a tutorial dialogue system - MyroslavaDzikovska + MyroslavaDzikovska ElaineFarrow - JohannaMoore + JohannaMoore 293–299 W13-1738 dzikovska-etal-2013-improving @@ -2104,7 +2104,7 @@ Detecting Missing Hyphens in Learner Text AoifeCahill - MartinChodorow + MartinChodorow SusanneWolff NitinMadnani 300–305 @@ -2113,7 +2113,7 @@ Applying Machine Translation Metrics to Student-Written Translations - LisaMichaud + LisaMichaud Patricia AnnMcCoy 306–311 W13-1740 @@ -2145,7 +2145,7 @@ Stochastic Bi-Languages to model Dialogs - M. InésTorres + M. InésTorres 9–17 W13-1802 torres-2013-stochastic @@ -2212,8 +2212,8 @@ Modeling Graph Languages with Grammars Extracted via Tree Decompositions - Bevan KeeleyJones - SharonGoldwater + Bevan KeeleyJones + SharonGoldwater MarkJohnson 54–62 W13-1810 @@ -2251,16 +2251,16 @@ A Finite-State Approach to Translate <fixed-case>SNOMED</fixed-case> <fixed-case>CT</fixed-case> Terms into <fixed-case>B</fixed-case>asque Using Medical Prefixes and Suffixes OlatzPerez-de-Viñaspre - MaiteOronoz + MaiteOronoz ManexAgirrezabal - MikelLersundi + MikelLersundi 99–103 W13-1815 perez-de-vinaspre-etal-2013-finite Syncretism and How to Deal with it in a Morphological Analyzer: a <fixed-case>G</fixed-case>erman Example - KatinaBontcheva + KatinaBontcheva 104–107 W13-1816 bontcheva-2013-syncretism @@ -2278,11 +2278,11 @@ Proceedings of the 2013 Workshop on Biomedical Natural Language Processing W13-19 - Kevin BretonnelCohen + Kevin BretonnelCohen DinaDemner-Fushman SophiaAnaniadou - JohnPestian - Jun’ichiTsujii + JohnPestian + Jun’ichiTsujii Association for Computational Linguistics
Sofia, Bulgaria
August @@ -2300,7 +2300,7 @@ Katherine D.Holland Tracy A.Glauser Shannon M.Standridge - Karin M.Verspoor + Karin M.Verspoor JohnPestian 1–9 W13-1901 @@ -2308,8 +2308,8 @@
Identification of Patients with Acute Lung Injury from Free-Text Chest <fixed-case>X</fixed-case>-Ray Reports - MelihaYetisgen-Yildiz - CosminBejan + MelihaYetisgen-Yildiz + CosminBejan MarkWurfel 10–17 W13-1902 @@ -2317,10 +2317,10 @@ Discovering Temporal Narrative Containers in Clinical Text - TimothyMiller + TimothyMiller StevenBethard DmitriyDligach - SameerPradhan + SameerPradhan ChenLin GuerganaSavova 18–26 @@ -2351,7 +2351,7 @@ Unsupervised Linguistically-Driven Reliable Dependency Parses Detection and Self-Training for Adaptation to the Biomedical Domain FeliceDell’Orletta GiuliaVenturi - SimonettaMontemagni + SimonettaMontemagni 45–53 W13-1906 dellorletta-etal-2013-unsupervised @@ -2389,9 +2389,9 @@ AlexanderKlenner SumitMadan SamAnsari - TamaraBobic + TamaraBobic JuliaHoeng - MartinHofmann--Apitius + MartinHofmann--Apitius ManuelPeitsch 80–88 W13-1910 @@ -2401,7 +2401,7 @@ Exploring Word Class N-grams to Measure Language Development in Children GabrielaRamírez de la Rosa ThamarSolorio - ManuelMontes + ManuelMontes YangLiu LisaBedore ElizabethPeña @@ -2419,9 +2419,9 @@ Using the Argumentative Structure of Scientific Literature to Improve Information Access - AntonioJimeno Yepes - JamesMork - AlanAronson + AntonioJimeno Yepes + JamesMork + AlanAronson 102–110 W13-1913 jimeno-yepes-etal-2013-using @@ -2446,7 +2446,7 @@ Parallels between Linguistics and Biology SutanuChakraborti - AshishTendulkar + AshishTendulkar 120–123 W13-1916 chakraborti-tendulkar-2013-parallels @@ -2456,13 +2456,13 @@ Proceedings of the BioNLP Shared Task 2013 Workshop W13-20 - ClaireNédellec + ClaireNédellec RobertBossy Jin-DongKim - Jung-jaeKim + Jung-jaeKim TomokoOhta SampoPyysalo - PierreZweigenbaum + PierreZweigenbaum Association for Computational Linguistics
Sofia, Bulgaria
August @@ -2516,12 +2516,12 @@
Extracting Biomedical Events and Modifications Using Subgraph Matching with Noisy Training Data - AndrewMacKinlay - DavidMartinez - AntonioJimeno Yepes + AndrewMacKinlay + DavidMartinez + AntonioJimeno Yepes HaibinLiu W. JohnWilbur - KarinVerspoor + KarinVerspoor 35–44 W13-2005 mackinlay-etal-2013-extracting @@ -2539,8 +2539,8 @@ <fixed-case>GRO</fixed-case> Task: Populating the Gene Regulation Ontology with events and relations Jung-jaeKim XuHan - VivianLee - DietrichRebholz-Schuhmann + VivianLee + DietrichRebholz-Schuhmann 50–57 W13-2007 kim-etal-2013-gro @@ -2564,7 +2564,7 @@ Sung-JaeJung Sung-PilChoi SophiaAnaniadou - Jun’ichiTsujii + Jun’ichiTsujii 67–75 W13-2009 ohta-etal-2013-overview @@ -2572,9 +2572,9 @@ Generalizing an Approximate Subgraph Matching-based System to Extract Events in Molecular Biology and Cancer Genetics HaibinLiu - KarinVerspoor - Donald C.Comeau - AndrewMacKinlay + KarinVerspoor + Donald C.Comeau + AndrewMacKinlay W. JohnWilbur 76–85 W13-2010 @@ -2582,8 +2582,8 @@ Performance and limitations of the linguistically motivated Cocoa/Peaberry system in a broad biological domain. - SVRamanan - P. SenthilNathan + SVRamanan + P. SenthilNathan 86–93 W13-2011 ramanan-nathan-2013-performance @@ -2600,9 +2600,9 @@ <fixed-case>B</fixed-case>io<fixed-case>NLP</fixed-case> Shared Task 2013: Supporting Resources PontusStenetorp WiktoriaGolik - ThierryHamon - Donald C.Comeau - RezartaIslamaj Doğan + ThierryHamon + Donald C.Comeau + RezartaIslamaj Doğan HaibinLiu W. JohnWilbur 99–103 @@ -2613,8 +2613,8 @@ A fast rule-based approach for biomedical event extraction Quoc-ChinhBui DavidCampos - Erikvan Mulligen - JanKors + Erikvan Mulligen + JanKors 104–108 W13-2014 bui-etal-2013-fast @@ -2623,7 +2623,7 @@ Improving Feature-Based Biomedical Event Extraction System by Integrating Argument Information LishuangLi YiwenWang - DegenHuang + DegenHuang 109–115 W13-2015 li-etal-2013-improving @@ -2635,7 +2635,7 @@ TiliaEllendorff DonTuggener FabioRinaldi - GintarėGrigonytė + GintarėGrigonytė 116–120 W13-2016 schneider-etal-2013-uzh @@ -2643,8 +2643,8 @@ A Hybrid approach for biomedical event extraction Xuan QuangPham - Minh QuangLe - Bao QuocHo + Minh QuangLe + Bao QuocHo 121–124 W13-2017 pham-etal-2013-hybrid @@ -2659,9 +2659,9 @@ Exploring a Probabilistic <fixed-case>E</fixed-case>arley Parser for Event Composition in Biomedical Texts - Mai-VuTran + Mai-VuTran NigelCollier - Hoang-QuynhLe + Hoang-QuynhLe Van-ThuyPhi Thanh-BinhPham 130–134 @@ -2671,7 +2671,7 @@ Detecting Relations in the Gene Regulation Network ThomasProvoost - Marie-FrancineMoens + Marie-FrancineMoens 135–138 W13-2020 provoost-moens-2013-detecting @@ -2715,7 +2715,7 @@ Bacteria Biotope Detection, Ontology-based Normalization, and Relation Extraction using Syntactic Rules İlknurKaradeniz - ArzucanÖzgür + ArzucanÖzgür 170–177 W13-2025 karadeniz-ozgur-2013-bacteria @@ -2764,7 +2764,7 @@ Exploiting Ontology Lexica for Generating Natural Language Texts from <fixed-case>RDF</fixed-case> Data - PhilippCimiano + PhilippCimiano JannaLüker DavidNagel ChristinaUnger @@ -2783,9 +2783,9 @@ Enhancing the Expression of Contrast in the <fixed-case>SP</fixed-case>a<fixed-case>RK</fixed-case>y Restaurant Corpus - DavidHowcroft + DavidHowcroft CrystalNakatsu - MichaelWhite + MichaelWhite 30–39 W13-2104 howcroft-etal-2013-enhancing @@ -2818,7 +2818,7 @@ Graphs and Spatial Relations in the Generation of Referring Expressions JetteViethen MargaretMitchell - EmielKrahmer + EmielKrahmer 72–81 W13-2108 viethen-etal-2013-graphs @@ -2852,14 +2852,14 @@ NadjetBouayad-Agha GerardCasamayor LeoWanner - ChrisMellish + ChrisMellish 98–102 W13-2112 bouayad-agha-etal-2013-overview Narrative Composition: Achieving the Perceived Linearity of Narrative - PabloGervás + PabloGervás 103–104 W13-2113 gervas-2013-narrative @@ -2877,8 +2877,8 @@ Generating Student Feedback from Time-Series Data Using Reinforcement Learning DimitraGkatzia - HelenHastie - SrinivasanJanarthanam + HelenHastie + SrinivasanJanarthanam OliverLemon 115–124 W13-2115 @@ -2898,7 +2898,7 @@ YasharMehdad GiuseppeCarenini FrankTompa - Raymond T.Ng + Raymond T.Ng 136–146 W13-2117 mehdad-etal-2013-abstractive @@ -2915,7 +2915,7 @@ <fixed-case>MIME</fixed-case> - <fixed-case>NLG</fixed-case> in Pre-Hospital Care AnneSchneider AlasdairMort - ChrisMellish + ChrisMellish EhudReiter PhilWilson Pierre-LucVaudry @@ -2926,8 +2926,8 @@ Generation of Quantified Referring Expressions: Evidence from Experimental Data DaleBarr - Keesvan Deemter - RaquelFernández + Keesvan Deemter + RaquelFernández 157–161 W13-2120 barr-etal-2013-generation @@ -2952,7 +2952,7 @@ On the Feasibility of Automatically Describing n-dimensional Objects - PabloDuboue + PabloDuboue 172–177 W13-2123 duboue-2013-feasibility @@ -2960,8 +2960,8 @@ <fixed-case>G</fixed-case>en<fixed-case>N</fixed-case>ext: A Consolidated Domain Adaptable <fixed-case>NLG</fixed-case> System FrankSchilder - BlakeHowald - RaviKondadadi + BlakeHowald + RaviKondadadi 178–182 W13-2124 schilder-etal-2013-gennext @@ -2977,7 +2977,7 @@ A Case Study Towards <fixed-case>T</fixed-case>urkish Paraphrase Alignment SenizDemir - İlknurDurgar El-Kahlout + İlknurDurgar El-Kahlout ErdemUnal 188–192 W13-2126 @@ -2996,7 +2996,7 @@ <fixed-case>MIME</fixed-case>- <fixed-case>NLG</fixed-case> Support for Complex and Unstable Pre-hospital Emergencies AnneSchneider AlasdairMort - ChrisMellish + ChrisMellish EhudReiter PhilWilson Pierre-LucVaudry @@ -3006,14 +3006,14 @@ <fixed-case>T</fixed-case>houghtland: Natural Language Descriptions for Machine Learning n-dimensional Error Functions - PabloDuboue + PabloDuboue 200–201 W13-2129 duboue-2013-thoughtland An Automatic Method for Building a Data-to-Text Generator - SinaZarriess + SinaZarriess KyleRichardson 202–203 W13-2130 @@ -3032,7 +3032,7 @@ KeithButler PriscillaMoraes IanTabolt - Kathleen F.McCoy + Kathleen F.McCoy 206–207 W13-2132 butler-etal-2013-team @@ -3040,8 +3040,8 @@ Content Selection Challenge - <fixed-case>U</fixed-case>niversity of <fixed-case>A</fixed-case>berdeen Entry RomanKutlak - ChrisMellish - Keesvan Deemter + ChrisMellish + Keesvan Deemter 208–209 W13-2133 kutlak-etal-2013-content @@ -3059,14 +3059,14 @@ Proceedings of the Eighth Workshop on Statistical Machine Translation W13-22 - OndrejBojar + OndrejBojar ChristianBuck ChrisCallison-Burch BarryHaddow PhilippKoehn ChristofMonz MattPost - HerveSaint-Amand + HerveSaint-Amand RaduSoricut LuciaSpecia Association for Computational Linguistics @@ -3110,7 +3110,7 @@ AlexandraBirch BarryHaddow UlrichGermann - MariaNadejde + MariaNadejde ChristianBuck PhilippKoehn 52–61 @@ -3121,11 +3121,11 @@ <fixed-case>LIMSI</fixed-case> @ <fixed-case>WMT</fixed-case>13 AlexanderAllauzen NicolasPécheux - Quoc KhanhDo + Quoc KhanhDo MarcoDinarelli ThomasLavergne AurélienMax - Hai-SonLe + Hai-SonLe FrançoisYvon 62–69 W13-2204 @@ -3141,15 +3141,15 @@ AustinMatthews KentonMurray NicolaSegall - AlonLavie - ChrisDyer + AlonLavie + ChrisDyer 70–77 W13-2205 ammar-etal-2013-cmu Feature Decay Algorithms for Fast Deployment of Accurate Statistical Machine Translation Systems - ErgunBiçici + ErgunBiçici 78–84 W13-2206 bicici-2013-feature @@ -3157,7 +3157,7 @@ <fixed-case>CU</fixed-case>ni Multilingual Matrix in the <fixed-case>WMT</fixed-case> 2013 Shared Task KarelBílek - DanielZeman + DanielZeman 85–91 W13-2207 bilek-zeman-2013-cuni @@ -3188,14 +3188,14 @@ JanNiehues TeresaHerrmann IsabelSlawik - AlexWaibel + AlexWaibel 104–108 W13-2210 cho-etal-2013-karlsruhe <fixed-case>TÜBİTAK</fixed-case>-<fixed-case>BİLGEM</fixed-case> <fixed-case>G</fixed-case>erman-<fixed-case>E</fixed-case>nglish Machine Translation Systems for W13 - İlknurDurgar El-Kahlout + İlknurDurgar El-Kahlout CoşkunMermer 109–113 W13-2211 @@ -3214,10 +3214,10 @@ <fixed-case>M</fixed-case>unich-<fixed-case>E</fixed-case>dinburgh-<fixed-case>S</fixed-case>tuttgart Submissions of <fixed-case>OSM</fixed-case> Systems at <fixed-case>WMT</fixed-case>13 NadirDurrani - AlexanderFraser + AlexanderFraser HelmutSchmid HassanSajjad - RichárdFarkas + RichárdFarkas 122–127 W13-2213 durrani-etal-2013-munich @@ -3226,7 +3226,7 @@ Towards Efficient Large-Scale Feature-Rich Statistical Machine Translation VladimirEidelman KeWu - FerhanTure + FerhanTure PhilipResnik JimmyLin 128–133 @@ -3235,12 +3235,12 @@ The <fixed-case>TALP</fixed-case>-<fixed-case>UPC</fixed-case> Phrase-Based Translation Systems for <fixed-case>WMT</fixed-case>13: System Combination with Morphology Generation, Domain Adaptation and Corpus Filtering - LluísFormiga - Marta R.Costa-jussà - José B.Mariño - José A. R.Fonollosa + LluísFormiga + Marta R.Costa-jussà + José B.Mariño + José A. R.Fonollosa AlbertoBarrón-Cedeño - LluísMàrquez + LluísMàrquez 134–140 W13-2215 formiga-etal-2013-talp @@ -3261,10 +3261,10 @@ KevinReschke RobVoigt JohnBauer - SidaWang + SidaWang NataliaSilveira JuliaNeidert - Christopher D.Manning + Christopher D.Manning 148–153 W13-2217 green-etal-2013-feature @@ -3273,7 +3273,7 @@ Factored Machine Translation Systems for <fixed-case>R</fixed-case>ussian-<fixed-case>E</fixed-case>nglish StéphaneHuet ElenaManishina - FabriceLefèvre + FabriceLefèvre 154–157 W13-2218 huet-etal-2013-factored @@ -3288,7 +3288,7 @@ Pre-Reordering for Machine Translation Using Transition-Based Walks on Dependency Parse Trees - Antonio ValerioMiceli-Barone + Antonio ValerioMiceli-Barone GiuseppeAttardi 164–169 W13-2220 @@ -3296,7 +3296,7 @@ <fixed-case>E</fixed-case>dinburgh’s Syntax-Based Machine Translation Systems - MariaNadejde + MariaNadejde PhilipWilliams PhilippKoehn 170–176 @@ -3307,7 +3307,7 @@ Shallow Semantically-Informed <fixed-case>PBSMT</fixed-case> and <fixed-case>HPBSMT</fixed-case> TsuyoshiOkita QunLiu - Josefvan Genabith + Josefvan Genabith 177–184 W13-2222 okita-etal-2013-shallow @@ -3318,15 +3318,15 @@ SaabMansour MatthiasHuck MarkusFreitag - HermannNey + HermannNey EunahCho TeresaHerrmann MohammedMediani JanNiehues - AlexWaibel + AlexWaibel AlexanderAllauzen - Quoc KhanhDo - BiankaBuschbeck + Quoc KhanhDo + BiankaBuschbeck TonioWandmacher 185–192 W13-2223 @@ -3341,7 +3341,7 @@ JoernWuebker MatthiasHuck MarkusFreitag - HermannNey + HermannNey 193–199 W13-2224 peitz-etal-2013-rwth @@ -3351,7 +3351,7 @@ JuanPino AurelienWaite TongXiao - Adriàde Gispert + Adriàde Gispert FedericoFlego WilliamByrne 200–205 @@ -3372,9 +3372,9 @@ The <fixed-case>CNGL</fixed-case>-<fixed-case>DCU</fixed-case>-<fixed-case>P</fixed-case>rompsit Translation Systems for <fixed-case>WMT</fixed-case>13 - RaphaelRubino + RaphaelRubino AntonioToral - SantiagoCortés Vaíllo + SantiagoCortés Vaíllo JunXie XiaofengWu StephenDoherty @@ -3388,7 +3388,7 @@ HassanSajjad SvetlanaSmekalova NadirDurrani - AlexanderFraser + AlexanderFraser HelmutSchmid 219–224 W13-2228 @@ -3398,7 +3398,7 @@ Tunable Distortion Limits and Corpus Cleaning for <fixed-case>SMT</fixed-case> SaraStymne ChristianHardmeier - JörgTiedemann + JörgTiedemann JoakimNivre 225–231 W13-2229 @@ -3409,11 +3409,11 @@ MarionWeller MaxKisselew SvetlanaSmekalova - AlexanderFraser + AlexanderFraser HelmutSchmid NadirDurrani HassanSajjad - RichárdFarkas + RichárdFarkas 232–239 W13-2230 weller-etal-2013-munich @@ -3421,7 +3421,7 @@ Coping with the Subjectivity of Human Judgements in <fixed-case>MT</fixed-case> Quality Estimation MarcoTurchi - MatteoNegri + MatteoNegri MarcelloFederico 240–251 W13-2231 @@ -3430,7 +3430,7 @@ Online Polylingual Topic Models for Fast Document Translation Detection KristeKrstovski - David A.Smith + David A.Smith 252–261 W13-2232 krstovski-smith-2013-online @@ -3446,8 +3446,8 @@ Generating <fixed-case>E</fixed-case>nglish Determiners in Phrase-Based Translation with Synthetic Translation Options YuliaTsvetkov - ChrisDyer - LoriLevin + ChrisDyer + LoriLevin ArchnaBhatia 271–280 W13-2234 @@ -3455,7 +3455,7 @@ Dramatically Reducing Training Data Size Through Vocabulary Saturation - WilliamLewis + WilliamLewis SaulehEetemadi 281–291 W13-2235 @@ -3481,7 +3481,7 @@ Length-Incremental Phrase Training for <fixed-case>SMT</fixed-case> JoernWuebker - HermannNey + HermannNey 309–319 W13-2238 wuebker-ney-2013-length @@ -3489,8 +3489,8 @@ Positive Diversity Tuning for Machine Translation System Combination DanielCer - Christopher D.Manning - DanJurafsky + Christopher D.Manning + DanJurafsky 320–328 W13-2239 cer-etal-2013-positive @@ -3498,16 +3498,16 @@ Selecting Feature Sets for Comparative and Time-Oriented Quality Estimation of Machine Translation Output EleftheriosAvramidis - MajaPopović + MajaPopović 329–336 W13-2240 avramidis-popovic-2013-selecting <fixed-case>SHEF</fixed-case>-<fixed-case>L</fixed-case>ite: When Less is More for Translation Quality Estimation - DanielBeck + DanielBeck KashifShah - TrevorCohn + TrevorCohn LuciaSpecia 337–342 W13-2241 @@ -3515,28 +3515,28 @@ Referential Translation Machines for Quality Estimation - ErgunBiçici + ErgunBiçici 343–351 W13-2242 bicici-2013-referential <fixed-case>FBK</fixed-case>-<fixed-case>UE</fixed-case>din Participation to the <fixed-case>WMT</fixed-case>13 Quality Estimation Shared Task - José GuilhermeCamargo de Souza + José GuilhermeCamargo de Souza ChristianBuck MarcoTurchi - MatteoNegri + MatteoNegri 352–358 W13-2243 camargo-de-souza-etal-2013-fbk The <fixed-case>TALP</fixed-case>-<fixed-case>UPC</fixed-case> Approach to System Selection: <fixed-case>A</fixed-case>siya Features and Pairwise Classification Using Random Forests - LluísFormiga - MeritxellGonzàlez + LluísFormiga + MeritxellGonzàlez AlbertoBarrón-Cedeño - José A. R.Fonollosa - LluísMàrquez + José A. R.Fonollosa + LluísMàrquez 359–364 W13-2244 formiga-etal-2013-talp-upc @@ -3555,8 +3555,8 @@ <fixed-case>MT</fixed-case> Quality Estimation: The <fixed-case>CMU</fixed-case> System for <fixed-case>WMT</fixed-case>‘13 - SiljaHildebrand - StephanVogel + SiljaHildebrand + StephanVogel 373–379 W13-2246 hildebrand-vogel-2013-mt @@ -3564,27 +3564,27 @@ <fixed-case>LORIA</fixed-case> System for the <fixed-case>WMT</fixed-case>13 Quality Estimation Shared Task DavidLanglois - KamelSmaïli + KamelSmaïli 380–385 W13-2247 langlois-smaili-2013-loria <fixed-case>LIG</fixed-case> System for <fixed-case>WMT</fixed-case>13 <fixed-case>QE</fixed-case> Task: Investigating the Usefulness of Features in Word Confidence Estimation for <fixed-case>MT</fixed-case> - Ngoc-QuangLuong + Ngoc-QuangLuong BenjaminLecouteux - LaurentBesacier + LaurentBesacier 386–391 W13-2248 luong-etal-2013-lig <fixed-case>DCU</fixed-case>-<fixed-case>S</fixed-case>ymantec at the <fixed-case>WMT</fixed-case> 2013 Quality Estimation Shared Task - RaphaelRubino + RaphaelRubino JoachimWagner JenniferFoster JohannRoturier - RasoulSamad Zadeh Kaljahi + RasoulSamad Zadeh Kaljahi FredHollowood 392–397 W13-2249 @@ -3592,7 +3592,7 @@ <fixed-case>LIMSI</fixed-case> Submission for the <fixed-case>WMT</fixed-case>‘13 Quality Estimation Task: an Experiment with N-Gram Posteriors - Anil KumarSingh + Anil KumarSingh GuillaumeWisniewski FrançoisYvon 398–404 @@ -3637,7 +3637,7 @@ An Approach Using Style Classification Features for Quality Estimation ErwanMoreau - RaphaelRubino + RaphaelRubino 429–434 W13-2255 moreau-rubino-2013-approach @@ -3664,7 +3664,7 @@ MatthiasHuck JoernWuebker FelixRietig - HermannNey + HermannNey 452–463 W13-2258 huck-etal-2013-phrase @@ -3687,7 +3687,7 @@ Evaluating (and Improving) Sentence Alignment under Noisy Conditions - OmarZaidan + OmarZaidan VishalChowdhary 484–493 W13-2261 @@ -3706,7 +3706,7 @@ Hidden <fixed-case>M</fixed-case>arkov Tree Model for Word Alignment ShuheiKondo KevinDuh - YujiMatsumoto + YujiMatsumoto 503–511 W13-2263 kondo-etal-2013-hidden @@ -3714,7 +3714,7 @@ An <fixed-case>MT</fixed-case> Error-Driven Discriminative Word Lexicon using Sentence Structure Features JanNiehues - AlexWaibel + AlexWaibel 512–520 W13-2264 niehues-waibel-2013-mt @@ -3724,7 +3724,7 @@ Proceedings of the 7th Linguistic Annotation Workshop and Interoperability with Discourse W13-23 - AntonioPareja-Lora + AntonioPareja-Lora MariaLiakata StefanieDipper Association for Computational Linguistics @@ -3743,7 +3743,7 @@ NizarHabash AnnBies SethKulick - MohamedMaamouri + MohamedMaamouri 1–10 W13-2301 @@ -3759,7 +3759,7 @@ Utilizing State-of-the-art Parsers to Diagnose Problems in Treebank Annotation for a Less Resourced Language - QuyNguyen + QuyNguyen NganNguyen YusukeMiyao 19–27 @@ -3776,7 +3776,7 @@ Continuous Measurement Scales in Human Evaluation of Machine Translation YvetteGraham - TimothyBaldwin + TimothyBaldwin AlistairMoffat JustinZobel 33–41 @@ -3799,8 +3799,8 @@ NaomiSaphra DavidBamman ManaalFaruqui - Noah A.Smith - ChrisDyer + Noah A.Smith + ChrisDyer JasonBaldridge 51–60 W13-2307 @@ -3809,7 +3809,7 @@ Converting <fixed-case>I</fixed-case>talian Treebanks: Towards an <fixed-case>I</fixed-case>talian <fixed-case>S</fixed-case>tanford Dependency Treebank CristinaBosco - SimonettaMontemagni + SimonettaMontemagni MariaSimi 61–69 W13-2308 @@ -3820,7 +3820,7 @@ Hen-HsenHuang Chi-HsinYu Tai-WeiChang - Cong-KaiLin + Cong-KaiLin Hsin-HsiChen 70–78 W13-2309 @@ -3830,7 +3830,7 @@ Towards a Better Understanding of Discourse: Integrating Multiple Discourse Annotation Perspectives Using <fixed-case>UIMA</fixed-case> ClaudiuMihăilă GeorgiosKontonatsios - Riza TheresaBatista-Navarro + Riza TheresaBatista-Navarro PaulThompson IoannisKorkontzelos SophiaAnaniadou @@ -3849,7 +3849,7 @@ Importing <fixed-case>MASC</fixed-case> into the <fixed-case>ANNIS</fixed-case> linguistic database: A case study of mapping <fixed-case>G</fixed-case>r<fixed-case>AF</fixed-case> ArneNeumann - NancyIde + NancyIde ManfredStede 98–102 W13-2312 @@ -3873,9 +3873,9 @@ Applicative Structures and Immediate Discourse in the <fixed-case>T</fixed-case>urkish Discourse Bank - IsinDemirşahin + IsinDemirşahin AdnanÖztürel - CemBozşahin + CemBozşahin DenizZeyrek 122–130 W13-2315 @@ -3883,11 +3883,11 @@ <fixed-case>TURKSENT</fixed-case>: A Sentiment Annotation Tool for Social Media - GülşenEryiǧit + GülşenEryiǧit Fatih SametÇetin MeltemYanık TanelTemel - İlyasÇiçekli + İlyasÇiçekli 131–134 W13-2316 eryigit-etal-2013-turksent @@ -3897,17 +3897,17 @@ StephenTratz DouglasBriesch JamalLaoudi - ClareVoss + ClareVoss 135–139 W13-2317 tratz-etal-2013-tweet Relation Annotation for Understanding Research Papers - YukaTateisi + YukaTateisi YoShidahara YusukeMiyao - AkikoAizawa + AkikoAizawa 140–148 W13-2318 tateisi-etal-2013-relation @@ -3926,22 +3926,22 @@ <fixed-case>A</fixed-case>nimacy Annotation in the <fixed-case>H</fixed-case>indi Treebank ItisreeJena - Riyaz AhmadBhat + Riyaz AhmadBhat SambhavJain - Dipti MisraSharma + Dipti MisraSharma 159–167 W13-2320 jena-etal-2013-animacy Automatic Named Entity Pre-annotation for Out-of-domain Human Annotation - SophieRosset + SophieRosset CyrilGrouin ThomasLavergne MohamedBen Jannet - JérémyLeixa + JérémyLeixa OlivierGalibert - PierreZweigenbaum + PierreZweigenbaum 168–177 W13-2321 rosset-etal-2013-automatic @@ -3949,14 +3949,14 @@ <fixed-case>A</fixed-case>bstract <fixed-case>M</fixed-case>eaning <fixed-case>R</fixed-case>epresentation for Sembanking LauraBanarescu - ClaireBonial + ClaireBonial ShuCai MadalinaGeorgescu KiraGriffitt UlfHermjakob KevinKnight PhilippKoehn - MarthaPalmer + MarthaPalmer NathanSchneider 178–186 W13-2322 @@ -3964,7 +3964,7 @@ The Benefits of a Model of Annotation - Rebecca J.Passonneau + Rebecca J.Passonneau BobCarpenter 187–195 W13-2323 @@ -4008,7 +4008,7 @@ AmáliaMendes IrisHendrickx AgostinhoSalgueiro - LucianaÁvila + LucianaÁvila 228–237 W13-2328 mendes-etal-2013-annotating @@ -4020,7 +4020,7 @@ W13-24 JakubPiskorski LidiaPivovarova - HristoTanev + HristoTanev RomanYangarber Association for Computational Linguistics
Sofia, Bulgaria
@@ -4034,14 +4034,14 @@ Invited Talk: Ontologies and Linked Open Data for Acquisition and Exploitation of Language Resources - KirilSimov + KirilSimov 1 W13-2401 simov-2013-invited A Comparison of Approaches for Sentiment Classification on <fixed-case>L</fixed-case>ithuanian <fixed-case>I</fixed-case>nternet Comments - JurgitaKapočiūtė-Dzikienė + JurgitaKapočiūtė-Dzikienė AlgisKrupavičius TomasKrilavičius 2–11 @@ -4051,7 +4051,7 @@ Evaluating Sentiment Analysis Systems in <fixed-case>R</fixed-case>ussian IliaChetviorkin - NataliaLoukachevitch + NataliaLoukachevitch 12–17 W13-2403 chetviorkin-loukachevitch-2013-evaluating @@ -4067,7 +4067,7 @@ Frequently Asked Questions Retrieval for <fixed-case>C</fixed-case>roatian Based on Semantic Textual Similarity - Vanja MladenKaran + Vanja MladenKaran LovroŽmak JanŠnajder 24–33 @@ -4094,7 +4094,7 @@ Lemmatization and Morphosyntactic Tagging of <fixed-case>C</fixed-case>roatian and <fixed-case>S</fixed-case>erbian - ŽeljkoAgić + ŽeljkoAgić NikolaLjubešić DanijelaMerkler 48–57 @@ -4104,7 +4104,7 @@ Modernizing historical <fixed-case>S</fixed-case>lovene words with character-based <fixed-case>SMT</fixed-case> YvesScherrer - TomažErjavec + TomažErjavec 58–62 W13-2409 scherrer-erjavec-2013-modernizing @@ -4181,7 +4181,7 @@ Proceedings of the Sixth Workshop on Building and Using Comparable Corpora W13-25 SergeSharoff - PierreZweigenbaum + PierreZweigenbaum ReinhardRapp Association for Computational Linguistics
Sofia, Bulgaria
@@ -4269,7 +4269,7 @@ Improving <fixed-case>MT</fixed-case> System Using Extracted Parallel Fragments of Text from Comparable Corpora RajdeepGupta SantanuPal - SivajiBandyopadhyay + SivajiBandyopadhyay 69–76 W13-2509 gupta-etal-2013-improving @@ -4296,7 +4296,7 @@ GeorgiosKontonatsios IoannisKorkontzelos SophiaAnaniadou - Jun’ichiTsujii + Jun’ichiTsujii 95–104 W13-2512 kontonatsios-etal-2013-using @@ -4305,14 +4305,14 @@ Comparing Multilingual Comparable Articles Based On Opinions MotazSaad DavidLanglois - KamelSmaïli + KamelSmaïli 105–111 W13-2513 saad-etal-2013-comparing
Mining for Domain-specific Parallel Text from <fixed-case>W</fixed-case>ikipedia - MagdalenaPlamadă + MagdalenaPlamadă MartinVolk 112–120 W13-2514 @@ -4322,7 +4322,7 @@ Gathering and Generating Paraphrases from <fixed-case>T</fixed-case>witter with Application to Normalization WeiXu AlanRitter - RalphGrishman + RalphGrishman 121–128 W13-2515 xu-etal-2013-gathering @@ -4338,7 +4338,7 @@ Finding More Bilingual Webpages with High Credibility via Link Analysis ChengzhiZhang XuchenYao - ChunyuKit + ChunyuKit 138–143 W13-2517 zhang-etal-2013-finding @@ -4349,7 +4349,7 @@ Proceedings of the Fourth Annual Workshop on Cognitive Modeling and Computational Linguistics (CMCL) W13-26 VeraDemberg - RogerLevy + RogerLevy Association for Computational Linguistics
Sofia, Bulgaria
August @@ -4363,7 +4363,7 @@ Why is <fixed-case>E</fixed-case>nglish so easy to segment? AbdellahFourtassi - BenjaminBörschinger + BenjaminBörschinger MarkJohnson EmmanuelDupoux 1–10 @@ -4382,7 +4382,7 @@ Learning non-concatenative morphology MichelleFullwood - TimO’Donnell + TimO’Donnell 21–27 W13-2603 fullwood-odonnell-2013-learning @@ -4398,7 +4398,7 @@ An Analysis of Memory-based Processing Costs using Incremental Deep Syntactic Dependency Parsing - Martenvan Schijndel + Martenvan Schijndel LuanNguyen WilliamSchuler 37–46 @@ -4416,7 +4416,7 @@ The semantic augmentation of a psycholinguistically-motivated syntactic formalism - AsadSayeed + AsadSayeed VeraDemberg 57–65 W13-2607 @@ -4441,7 +4441,7 @@ On the Information Conveyed by Discourse Markers - FatemehTorabi Asr + FatemehTorabi Asr VeraDemberg 84–93 W13-2610 @@ -4462,7 +4462,7 @@ Proceedings of the 7th Workshop on Language Technology for Cultural Heritage, Social Sciences, and Humanities W13-27 PiroskaLendvai - KalliopiZervanou + KalliopiZervanou Association for Computational Linguistics
Sofia, Bulgaria
August @@ -4477,10 +4477,10 @@ Generating Paths through Cultural Heritage Collections SamuelFernando PaulaGoodale - PaulClough + PaulClough MarkStevenson - MarkHall - EnekoAgirre + MarkHall + EnekoAgirre 1–10 W13-2701 fernando-etal-2013-generating @@ -4488,8 +4488,8 @@ Using character overlap to improve language transformation SanderWubben - EmielKrahmer - Antalvan den Bosch + EmielKrahmer + Antalvan den Bosch 11–19 W13-2702 wubben-etal-2013-using @@ -4514,7 +4514,7 @@ Language Technology for Agile Social Media Science SimonWibberley - DavidWeir + DavidWeir JeremyReffin 36–42 W13-2705 @@ -4541,7 +4541,7 @@ Towards a Tool for Interactive Concept Building for Large Scale Analysis in the Humanities - AndreBlessing + AndreBlessing JonathanSonntag FritzKliche UlrichHeid @@ -4555,15 +4555,15 @@ Learning to Extract Folktale Keywords DolfTrieschnigg DongNguyen - MariëtTheune + MariëtTheune 65–73 W13-2709 trieschnigg-etal-2013-learning Towards Creating Precision Grammars from Interlinear Glossed Text: Inferring Large-Scale Typological Properties - Emily M.Bender - Michael WayneGoodman + Emily M.Bender + Michael WayneGoodman JoshuaCrowgey FeiXia 74–83 @@ -4573,7 +4573,7 @@ Using Comparable Collections of Historical Texts for Building a Diachronic Dictionary for Spelling Normalization MarilisaAmoia - Jose ManuelMartinez + Jose ManuelMartinez 84–89 W13-2711 amoia-martinez-2013-using @@ -4595,11 +4595,11 @@ Temporal classification for historical <fixed-case>R</fixed-case>omanian texts - Alina MariaCiobanu + Alina MariaCiobanu AncaDinu - LiviuDinu + LiviuDinu VladNiculae - Octavia-MariaŞulea + Octavia-MariaŞulea 102–106 W13-2714 ciobanu-etal-2013-temporal-classification @@ -4620,11 +4620,11 @@ Proceedings of the Second Workshop on Hybrid Approaches to Translation W13-28 - Marta RuizCosta-jussà + Marta RuizCosta-jussà ReinhardRapp PatrikLambert KurtEberle - Rafael E.Banchs + Rafael E.Banchs BogdanBabych Association for Computational Linguistics
Sofia, Bulgaria
@@ -4650,7 +4650,7 @@
Statistical <fixed-case>MT</fixed-case> Systems Revisited: How much Hybridity do they have? - HermannNey + HermannNey 7 W13-2802 ney-2013-statistical @@ -4664,8 +4664,8 @@ Machine Learning Disambiguation of <fixed-case>Q</fixed-case>uechua Verb Morphology - AnnetteRios Gonzales - AnneGöhring + AnnetteRios Gonzales + AnneGöhring 13–18 W13-2804 rios-gonzales-gohring-2013-machine @@ -4673,7 +4673,7 @@ Improvements to Syntax-based Machine Translation using Ensemble Dependency Parsers NathanGreen - ZdeněkŽabokrtský + ZdeněkŽabokrtský 19–24 W13-2805 green-zabokrtsky-2013-improvements @@ -4694,7 +4694,7 @@ Raj NathPatel RohitGupta Prakash B.Pimpale - SasikumarM + SasikumarM 34–41 W13-2807 patel-etal-2013-reordering @@ -4710,7 +4710,7 @@ Controlled Ascent: Imbuing Statistical <fixed-case>MT</fixed-case> with Linguistic Knowledge - WilliamLewis + WilliamLewis ChrisQuirk 51–66 W13-2809 @@ -4729,7 +4729,7 @@ Integrating morpho-syntactic features in <fixed-case>E</fixed-case>nglish-<fixed-case>A</fixed-case>rabic statistical machine translation InesTurki Khemakhem SalmaJamoussi - AbdelmajidBen Hamadou + AbdelmajidBen Hamadou 74–81 W13-2811 turki-khemakhem-etal-2013-integrating @@ -4746,9 +4746,9 @@ Building bilingual lexicon to create Dialect <fixed-case>T</fixed-case>unisian corpora and adapt language model RahmaBoujelbane - MariemEllouze khemekhem + MariemEllouze khemekhem SiwarBenAyed - LamiaHadrich Belguith + LamiaHadrich Belguith 88–93 W13-2813 boujelbane-etal-2013-building @@ -4756,15 +4756,15 @@ A Hybrid Word Alignment Model for Phrase-Based Statistical Machine Translation SantanuPal - SudipNaskar - SivajiBandyopadhyay + SudipNaskar + SivajiBandyopadhyay 94–101 W13-2814 pal-etal-2013-hybrid Lexical Selection for Hybrid <fixed-case>MT</fixed-case> with Sequence Labeling - AlexRudnick + AlexRudnick MichaelGasser 102–108 W13-2815 @@ -4772,11 +4772,11 @@ Two Approaches to Correcting Homophone Confusions in a Hybrid Machine Translation System - PierretteBouillon + PierretteBouillon JohannaGerlach UlrichGermann BarryHaddow - MannyRayner + MannyRayner 109–116 W13-2816 bouillon-etal-2013-two @@ -4868,7 +4868,7 @@ TadayoshiHara ChenChen YoshinobuKano - AkikoAizawa + AkikoAizawa 49–58 W13-2906 hara-etal-2013-modeling @@ -4876,7 +4876,7 @@ On The Applicability of Readability Models to Web Texts SowmyaVajjala - DetmarMeurers + DetmarMeurers 59–68 W13-2907 vajjala-meurers-2013-applicability @@ -4917,7 +4917,7 @@ Distributions on <fixed-case>M</fixed-case>inimalist <fixed-case>G</fixed-case>rammar Derivations TimHunter - ChrisDyer + ChrisDyer 1–11 W13-3001 hunter-dyer-2013-distributions @@ -4949,7 +4949,7 @@ The <fixed-case>F</fixed-case>robenius Anatomy of Relative Pronouns StephenClark BobCoecke - MehrnooshSadrzadeh + MehrnooshSadrzadeh 41–51 W13-3005 clark-etal-2013-frobenius @@ -4990,7 +4990,7 @@ Investigating Connectivity and Consistency Criteria for Phrase Pair Extraction in Statistical Machine Translation SpyrosMartzoukos - ChristopheCosta Florêncio + ChristopheCosta Florêncio ChristofMonz 93–101 W13-3010 @@ -5021,7 +5021,7 @@ Multi-document multilingual summarization corpus preparation, Part 1: <fixed-case>A</fixed-case>rabic, <fixed-case>E</fixed-case>nglish, <fixed-case>G</fixed-case>reek, <fixed-case>C</fixed-case>hinese, <fixed-case>R</fixed-case>omanian LeiLi - CorinaForascu + CorinaForascu MahmoudEl-Haj GeorgeGiannakopoulos 1–12 @@ -5048,8 +5048,8 @@ <fixed-case>ACL</fixed-case> 2013 <fixed-case>M</fixed-case>ulti<fixed-case>L</fixed-case>ing Pilot Overview JeffKubina - JohnConroy - JudithSchlesinger + JohnConroy + JudithSchlesinger 29–38 W13-3104 kubina-etal-2013-acl @@ -5082,12 +5082,12 @@ Multilingual Summarization: Dimensionality Reduction and a Step Towards Optimal Term Coverage - JohnConroy - Sashka T.Davis + JohnConroy + Sashka T.Davis JeffKubina Yi-KaiLiu - Dianne P.O’Leary - Judith D.Schlesinger + Dianne P.O’Leary + Judith D.Schlesinger 55–63 W13-3108 conroy-etal-2013-multilingual @@ -5123,7 +5123,7 @@ W13-32 AlexandreAllauzen HugoLarochelle - ChristopherManning + ChristopherManning RichardSocher Association for Computational Linguistics
Sofia, Bulgaria
@@ -5138,7 +5138,7 @@ Vector Space Semantic Parsing: A Framework for Compositional Vector Space Models JayantKrishnamurthy - TomMitchell + TomMitchell 1–10 W13-3201 krishnamurthy-mitchell-2013-vector @@ -5147,7 +5147,7 @@ Learning from errors: Using vector-based compositional semantics for parse reranking PhongLe WillemZuidema - RemkoScha + RemkoScha 11–19 W13-3202 le-etal-2013-learning @@ -5159,7 +5159,7 @@ HuiyingLi MrinmayaSachan ShashankSrivastava - EduardHovy + EduardHovy 20–29 W13-3203 goyal-etal-2013-structured @@ -5168,14 +5168,14 @@ Letter N-Gram-based Input Encoding for Continuous Space Language Models HenningSperr JanNiehues - AlexWaibel + AlexWaibel 30–39 W13-3204 sperr-etal-2013-letter Transducing Sentences to Syntactic Feature Vectors: an Alternative Way to “Parse”? - Fabio MassimoZanzotto + Fabio MassimoZanzotto LorenzoDell’Arciprete 40–49 W13-3205 @@ -5183,9 +5183,9 @@ General estimation and evaluation of compositional distributional semantic models - GeorgianaDinu - Nghia ThePham - MarcoBaroni + GeorgianaDinu + Nghia ThePham + MarcoBaroni 50–58 W13-3206 dinu-etal-2013-general @@ -5193,7 +5193,7 @@ Applicative structure in vector space models MártonMakrai - David MarkNemeskey + David MarkNemeskey AndrásKornai 59–63 W13-3207 @@ -5211,8 +5211,8 @@ “Not not bad” is not “bad”: A distributional account of negation Karl MoritzHermann - EdwardGrefenstette - PhilBlunsom + EdwardGrefenstette + PhilBlunsom 74–82 W13-3209 hermann-etal-2013-bad @@ -5221,7 +5221,7 @@ Towards Dynamic Word Sense Discrimination with Random Indexing HansMoen ErwinMarsi - BjörnGambäck + BjörnGambäck 83–90 W13-3210 moen-etal-2013-towards @@ -5253,7 +5253,7 @@ Recurrent Convolutional Neural Networks for Discourse Compositionality NalKalchbrenner - PhilBlunsom + PhilBlunsom 119–126 W13-3214 kalchbrenner-blunsom-2013-recurrent-convolutional @@ -5263,10 +5263,10 @@ Proceedings of the Workshop on Discourse in Machine Translation W13-33 - BonnieWebber - AndreiPopescu-Belis + BonnieWebber + AndreiPopescu-Belis KatjaMarkert - JörgTiedemann + JörgTiedemann Association for Computational Linguistics
Sofia, Bulgaria
August @@ -5280,7 +5280,7 @@ Meaning Unit Segmentation in <fixed-case>E</fixed-case>nglish and <fixed-case>C</fixed-case>hinese: a New Approach to Discourse Phenomena JenniferWilliams - RafaelBanchs + RafaelBanchs HaizhouLi 1–9 W13-3301 @@ -5330,7 +5330,7 @@ Translation of “It” in a Deep Syntax Framework MichalNovák AnnaNedoluzhko - ZdeněkŽabokrtský + ZdeněkŽabokrtský 51–59 W13-3307 novak-etal-2013-translation @@ -5351,7 +5351,7 @@ Proceedings of the Fourth Workshop on Teaching NLP and CL W13-34 IvanDerzhanski - DragomirRadev + DragomirRadev Association for Computational Linguistics
Sofia, Bulgaria
August @@ -5382,8 +5382,8 @@ Introducing Computational Concepts in a Linguistics Olympiad PatrickLittell - LoriLevin - JasonEisner + LoriLevin + JasonEisner DragomirRadev 18–26 W13-3403 @@ -5398,10 +5398,10 @@ Learning from <fixed-case>O</fixed-case>z<fixed-case>CLO</fixed-case>, the <fixed-case>A</fixed-case>ustralian Computational and Linguistics Olympiad - DominiqueEstival - JohnHenderson + DominiqueEstival + JohnHenderson MaryLaughren - DiegoMollá + DiegoMollá CathyBow RachelNordlinger VernaRieschild @@ -5438,7 +5438,7 @@ Treebanking for Data-driven Research in the Classroom - JohnLee + JohnLee Ying CheukHui Yin HeiKong 56–60 @@ -5457,7 +5457,7 @@ A Virtual Manipulative for Learning Log-Linear Models FrancisFerraro - JasonEisner + JasonEisner 66–76 W13-3411 ferraro-eisner-2013-virtual @@ -5471,10 +5471,10 @@ Semantic Technologies in <fixed-case>IBM</fixed-case> <fixed-case>W</fixed-case>atson - AlfioGliozzo + AlfioGliozzo OrBiran SiddharthPatwardhan - KathleenMcKeown + KathleenMcKeown 85–92 W13-3413 gliozzo-etal-2013-semantic @@ -5514,7 +5514,7 @@ Improving Pointwise Mutual Information (<fixed-case>PMI</fixed-case>) by Incorporating Significant Co-occurrence - OmDamani + OmDamani 20–28 W13-3503 damani-2013-improving @@ -5548,9 +5548,9 @@ Spectral Learning of Refinement <fixed-case>HMM</fixed-case>s KarlStratos - AlexanderRush - Shay B.Cohen - MichaelCollins + AlexanderRush + Shay B.Cohen + MichaelCollins 56–64 W13-3507 stratos-etal-2013-spectral @@ -5558,7 +5558,7 @@ Sentence Compression with Joint Structural Inference KapilThadani - KathleenMcKeown + KathleenMcKeown 65–74 W13-3508 thadani-mckeown-2013-sentence @@ -5576,15 +5576,15 @@ Documents and Dependencies: an Exploration of Vector Space Models for Semantic Composition AlonaFyshe BrianMurphy - ParthaTalukdar - TomMitchell + ParthaTalukdar + TomMitchell 84–93 W13-3510 fyshe-etal-2013-documents Hidden <fixed-case>M</fixed-case>arkov tree models for semantic class induction - ÉdouardGrave + ÉdouardGrave GuillaumeObozinski FrancisBach 94–103 @@ -5593,9 +5593,9 @@ Better Word Representations with Recursive Neural Networks for Morphology - ThangLuong + ThangLuong RichardSocher - ChristopherManning + ChristopherManning 104–113 W13-3512 luong-etal-2013-better @@ -5603,8 +5603,8 @@ Separating Disambiguation from Composition in Distributional Semantics DimitriKartsaklis - MehrnooshSadrzadeh - StephenPulman + MehrnooshSadrzadeh + StephenPulman 114–123 W13-3513 kartsaklis-etal-2013-separating @@ -5620,14 +5620,14 @@ Philosophers are Mortal: Inferring the Truth of Unseen Facts GaborAngeli - ChristopherManning + ChristopherManning 133–142 W13-3515 angeli-manning-2013-philosophers Towards Robust Linguistic Analysis using <fixed-case>O</fixed-case>nto<fixed-case>N</fixed-case>otes - SameerPradhan + SameerPradhan AlessandroMoschitti NianwenXue Hwee TouNg @@ -5644,7 +5644,7 @@ JiapingZheng LukeVilnis SameerSingh - Jinho D.Choi + Jinho D.Choi AndrewMcCallum 153–162 W13-3517 @@ -5662,7 +5662,7 @@ Collapsed Variational <fixed-case>B</fixed-case>ayesian Inference for <fixed-case>PCFG</fixed-case>s PengyuWang - PhilBlunsom + PhilBlunsom 173–182 W13-3519 wang-blunsom-2013-collapsed @@ -5671,7 +5671,7 @@ <fixed-case>P</fixed-case>olyglot: Distributed Word Representations for Multilingual <fixed-case>NLP</fixed-case> RamiAl-Rfou’ BryanPerozzi - StevenSkiena + StevenSkiena 183–192 W13-3520 al-rfou-etal-2013-polyglot @@ -5679,10 +5679,10 @@ Exploiting multiple hypotheses for Multilingual Spoken Language Understanding MarcosCalvo - FernandoGarcía - Lluís-F.Hurtado + FernandoGarcía + Lluís-F.Hurtado SantiagoJiménez - EmilioSanchis + EmilioSanchis 193–201 W13-3521 calvo-etal-2013-exploiting @@ -5699,7 +5699,7 @@ Topic Models + Word Alignment = A Flexible Framework for Extracting Bilingual Dictionary from Comparable Corpus XiaodongLiu KevinDuh - YujiMatsumoto + YujiMatsumoto 212–221 W13-3523 liu-etal-2013-topic @@ -5727,7 +5727,7 @@ Proceedings of the Seventeenth Conference on Computational Natural Language Learning: Shared Task W13-36 Hwee TouNg - JoelTetreault + JoelTetreault Siew MeiWu YuanbinWu ChristianHadiwinoto @@ -5764,13 +5764,13 @@ <fixed-case>C</fixed-case>o<fixed-case>NLL</fixed-case>-2013 Shared Task: Grammatical Error Correction <fixed-case>NTHU</fixed-case> System Description - Ting-HuiKao - Yu-WeiChang - Hsun-WenChiu + Ting-HuiKao + Yu-WeiChang + Hsun-WenChiu Tzu-HsiYen JoanneBoisson - Jian-ChengWu - Jason S.Chang + Jian-ChengWu + Jason S.Chang 20–25 W13-3603 kao-etal-2013-conll @@ -5784,7 +5784,7 @@ TomoyaMizumoto YutaHayashibe MamoruKomachi - YujiMatsumoto + YujiMatsumoto 26–33 W13-3604 yoshimoto-etal-2013-naist @@ -5820,15 +5820,15 @@ <fixed-case>LFG</fixed-case>-based Features for Noun Number and Article Grammatical Errors GáborBerend VeronikaVincze - SinaZarrieß - RichárdFarkas + SinaZarrieß + RichárdFarkas 62–67 W13-3608 berend-etal-2013-lfg Toward More Precision in Correction of Grammatical Errors - DanFlickinger + DanFlickinger JiyeYu 68–73 W13-3609 @@ -5846,8 +5846,8 @@ <fixed-case>IITB</fixed-case> System for <fixed-case>C</fixed-case>o<fixed-case>NLL</fixed-case> 2013 Shared Task: A Hybrid Approach to Grammatical Error Correction AnoopKunchukuttan - RiteshShah - PushpakBhattacharyya + RiteshShah + PushpakBhattacharyya 82–87 W13-3611 kunchukuttan-etal-2013-iitb @@ -5865,7 +5865,7 @@ GrigoriSidorov AnubhavGupta MartinTozer - DolorsCatala + DolorsCatala AngelsCatena SandrineFuentes 96–101 @@ -5874,7 +5874,7 @@ Memory-based Grammatical Error Correction - Antalvan den Bosch + Antalvan den Bosch PeterBerck 102–108 W13-3614 @@ -5903,7 +5903,7 @@ <fixed-case>KUNLP</fixed-case> Grammatical Error Correction System For <fixed-case>C</fixed-case>o<fixed-case>NLL</fixed-case>-2013 Shared Task Bong-JunYi Ho-ChangLee - Hae-ChangRim + Hae-ChangRim 123–127 W13-3617 yi-etal-2013-kunlp @@ -5913,7 +5913,7 @@ Proceedings of the Second International Conference on Dependency Linguistics (DepLing 2013) W13-37 - EvaHajičová + EvaHajičová KimGerdes LeoWanner Charles University in Prague, Matfyzpress, Prague, Czech Republic @@ -5928,14 +5928,14 @@ Invited talk: Dependency Structure and Cognition - RichardHudson + RichardHudson 1–11 W13-3701 hudson-2013-invited Invited talk: Dependency Representations, Grammars, Folded Structures, among Other Things! - Aravind K.Joshi + Aravind K.Joshi 12 W13-3702 joshi-2013-invited @@ -5951,7 +5951,7 @@ Towards Joint Morphological Analysis and Dependency Parsing of <fixed-case>T</fixed-case>urkish - ÖzlemÇetinoğlu + ÖzlemÇetinoğlu JonasKuhn 23–32 W13-3704 @@ -5959,9 +5959,9 @@ Divergences in <fixed-case>E</fixed-case>nglish-<fixed-case>H</fixed-case>indi Parallel Dependency Treebanks - HimaniChaudhry + HimaniChaudhry HimanshuSharma - Dipti MisraSharma + Dipti MisraSharma 33–40 W13-3705 chaudhry-etal-2013-divergences @@ -6022,7 +6022,7 @@ Towards a Psycholinguistically Motivated Dependency Grammar for <fixed-case>H</fixed-case>indi SamarHusain - RajeshBhatt + RajeshBhatt ShravanVasishth 108–117 W13-3713 @@ -6054,7 +6054,7 @@ The Representation of <fixed-case>C</fixed-case>zech Light Verb Constructions in a Valency Lexicon VáclavaKettnerová - MarkétaLopatková + MarkétaLopatková 147–156 W13-3717 kettnerova-lopatkova-2013-representation @@ -6084,19 +6084,19 @@ More Constructions, More Genres: Extending <fixed-case>S</fixed-case>tanford Dependencies - Marie-Catherinede Marneffe + Marie-Catherinede Marneffe MiriamConnor NataliaSilveira - Samuel R.Bowman + Samuel R.Bowman TimothyDozat - Christopher D.Manning + Christopher D.Manning 187–196 W13-3721 de-marneffe-etal-2013-constructions Why So Many Nodes? - DanMaxwell + DanMaxwell 197–206 W13-3722 maxwell-2013-many @@ -6122,9 +6122,9 @@ DebankaNandi MaazNomani HimanshuSharma - HimaniChaudhary + HimaniChaudhary SambhavJain - Dipti MisraSharma + Dipti MisraSharma 227–235 W13-3725 nandi-etal-2013-towards @@ -6174,7 +6174,7 @@ Dependency and Constituency in Translation Shift Analysis ManuelaSanguinetti CristinaBosco - LeonardoLesmo + LeonardoLesmo 282–291 W13-3731 sanguinetti-etal-2013-dependency @@ -6182,7 +6182,7 @@ Managing a Multilingual Treebank Project MilanSouček - TimoJärvinen + TimoJärvinen AdamLaMontagne 292–297 W13-3732 @@ -6190,7 +6190,7 @@ An Empirical Study of Differences between Conversion Schemes and Annotation Guidelines - AndersSøgaard + AndersSøgaard 298–307 W13-3733 sogaard-2013-empirical @@ -6201,7 +6201,7 @@ Proceedings of the Joint Symposium on Semantic Processing. Textual Inference and Structures in Corpora W13-38 OctavianPopescu - AlbertoLavelli + AlbertoLavelli
Trento, Italy
November 2013 @@ -6213,7 +6213,7 @@ Text Understanding using Knowledge-Bases and Random Walks - EnekoAgirre + EnekoAgirre 1 W13-3801 agirre-2013-text @@ -6227,7 +6227,7 @@ Ontology Lexicalization as a core task in a language-enhanced Semantic Web - PhilippCimiano + PhilippCimiano 3 W13-3803 cimiano-2013-ontology @@ -6242,14 +6242,14 @@ Entailment graphs for text exploration IdoDagan - BernardoMagnini + BernardoMagnini 5 W13-3805 dagan-magnini-2013-entailment Semantic Textual Similarity: past present and future - MonaDiab + MonaDiab 6 W13-3806 diab-2013-semantic @@ -6277,8 +6277,8 @@ Design and Realization of the <fixed-case>EXCITEMENT</fixed-case> Open Platform for Textual Entailment - GünterNeumann - SebastianPadó + GünterNeumann + SebastianPadó 11 W13-3810 neumann-pado-2013-design @@ -6299,7 +6299,7 @@ Potential and limits of distributional approaches for semantic relatedness - SabineSchulte in Walde + SabineSchulte in Walde 14 W13-3813 schulte-in-walde-2013-potential @@ -6308,7 +6308,7 @@ Towards Compositional Tree Kernels PaoloAnnesi DaniloCroce - RobertoBasili + RobertoBasili 15-23 W13-3814 annesi-etal-2013-towards @@ -6334,8 +6334,8 @@ Abduction for Discourse Interpretation: A Probabilistic Framework EkaterinaOvchinnikova - AndrewGordon - JerryHobbs + AndrewGordon + JerryHobbs 42-50 W13-3817 ovchinnikova-etal-2013-abduction @@ -6361,7 +6361,7 @@ EmanueleBastianelli GiuseppeCastellucci DaniloCroce - RobertoBasili + RobertoBasili 65-69 W13-3820 @@ -6371,7 +6371,7 @@ An empirical classification of verbs based on Semantic Types: the case of the ‘poison’ verbs. JaneBradbury - IsmailEl Maarouf + IsmailEl Maarouf 70-74 W13-3821 bradbury-el-maarouf-2013-empirical @@ -6386,7 +6386,7 @@ Alternative measures of word relatedness in distributional semantics AncaDinu - AlinaCiobanu + AlinaCiobanu 80-84 W13-3823 dinu-ciobanu-2013-alternative @@ -6394,7 +6394,7 @@ Linear Compositional Distributional Semantics and Structural Kernels LorenzoFerrone - Fabio MassimoZanzotto + Fabio MassimoZanzotto 85-89 W13-3824 ferrone-zanzotto-2013-linear @@ -6408,7 +6408,7 @@ Automatic classification of semantic patterns from the Pattern Dictionary of <fixed-case>E</fixed-case>nglish Verbs - IsmaïlEl Maarouf + IsmaïlEl Maarouf VítBaisa 95-99 W13-3826 @@ -6451,8 +6451,8 @@ Proceedings of the Fourth Workshop on Speech and Language Processing for Assistive Technologies W13-39 JanAlexandersson - PeterLjunglöf - Kathleen F.McCoy + PeterLjunglöf + Kathleen F.McCoy FrançoisPortet BrianRoark FrankRudzicz @@ -6507,8 +6507,8 @@ Comparing and combining classifiers for self-taught vocal interfaces LizeBroekx KatrienDreesen - Jort FlorentGemmeke - HugoVan hamme + Jort FlorentGemmeke + HugoVan hamme 21–28 W13-3905 broekx-etal-2013-comparing @@ -6529,7 +6529,7 @@ HanneDeprez EmreYilmaz StefanLievens - HugoVan hamme + HugoVan hamme 35–40 W13-3907 deprez-etal-2013-automating @@ -6540,16 +6540,16 @@ OscarKoller ChristianOberdörfer YannickGweth - HermannNey + HermannNey 41–46 W13-3908 forster-etal-2013-improving Automatic speech recognition in the diagnosis of primary progressive aphasia - KathleenFraser + KathleenFraser FrankRudzicz - NaidaGraham + NaidaGraham ElizabethRochon 47–54 W13-3909 @@ -6590,10 +6590,10 @@ BartOns NetsanetTessema Jannekevan de Loo - JortGemmeke + JortGemmeke GuyDe Pauw - WalterDaelemans - HugoVan hamme + WalterDaelemans + HugoVan hamme 73–81 W13-3913 ons-etal-2013-self @@ -6601,7 +6601,7 @@ The dramatic piece reader for the blind and visually impaired MilanRusko - MarianTrnka + MarianTrnka SakhiaDarjaa JurajHamar 83–91 @@ -6649,7 +6649,7 @@ LodeVuegen BertVan Den Broeck PeterKarsmakers - HugoVan hamme + HugoVan hamme BartVanrumste 113–118 W13-3918 @@ -6659,8 +6659,8 @@ Word Recognition from Continuous Articulatory Movement Time-series Data using Symbolic Representations JunWang ArvindBalasubramanian - LuisMojica de la Vega - Jordan R.Green + LuisMojica de la Vega + Jordan R.Green AshokSamal BalakrishnanPrabhakaran 119–127 @@ -6684,7 +6684,7 @@ MaxineEskenazi MichaelStrube BarbaraDi Eugenio - Jason D.Williams + Jason D.Williams Association for Computational Linguistics
Metz, France
August @@ -6697,7 +6697,7 @@ Discourse Relations, Discourse Structure, Discourse Semantics - BonnieWebber + BonnieWebber 1 W13-4001 webber-2013-discourse @@ -6705,7 +6705,7 @@ Expressivity and comparison of models of discourse structure AntoineVenant - NicholasAsher + NicholasAsher PhilippeMuller PascalDenis StergosAfantenos @@ -6716,7 +6716,7 @@ Unsupervised structured semantic inference for spoken dialog reservation tasks AlejandraLorenzo - LinaRojas-Barahona + LinaRojas-Barahona ChristopheCerisara 12–20 W13-4003 @@ -6725,7 +6725,7 @@ Toward a Better Understanding of Causality between Verbal Events: Extraction and Analysis of the Causal Power of Verb-Verb Associations MehwishRiaz - RoxanaGirju + RoxanaGirju 21–30 W13-4004 riaz-girju-2013-toward @@ -6741,7 +6741,7 @@ Topic Independent Identification of Agreement and Disagreement in Social Media Dialogue AmitaMisra - MarilynWalker + MarilynWalker 41–50 W13-4006 misra-walker-2013-topic @@ -6751,7 +6751,7 @@ ZhouYu DavidGerritsen AmyOgan - AlanBlack + AlanBlack JustineCassell 51–60 W13-4007 @@ -6770,7 +6770,7 @@ Generating More Specific Questions for Acquiring Attributes of Unknown Concepts from Users TsugumiOtsuka KazunoriKomatani - SatoshiSato + SatoshiSato MikioNakano 70–77 W13-4009 @@ -6781,14 +6781,14 @@ ChangsongLiu RuiFang LanboShe - JoyceChai + JoyceChai 78–86 W13-4010 liu-etal-2013-modeling A quantitative view of feedback lexical markers in conversational <fixed-case>F</fixed-case>rench - LaurentPrévot + LaurentPrévot BrigitteBigi RoxaneBertrand 87–91 @@ -6797,8 +6797,8 @@ On the contribution of discourse structure to topic segmentation - PaulaCardoso - MaiteTaboada + PaulaCardoso + MaiteTaboada ThiagoPardo 92–96 W13-4012 @@ -6823,7 +6823,7 @@ Patterns of Importance Variation in Spoken Dialog - NigelWard + NigelWard KarenRichart-Ruiz 107–111 W13-4015 @@ -6840,9 +6840,9 @@ Dialogue Act Recognition in Synchronous and Asynchronous Conversations MaryamTavafi YasharMehdad - ShafiqJoty + ShafiqJoty GiuseppeCarenini - RaymondNg + RaymondNg 117–121 W13-4017 tavafi-etal-2013-dialogue @@ -6868,7 +6868,7 @@ Exploring Features For Localized Detection of Speech Recognition Errors EliPincus SvetlanaStoyanchev - JuliaHirschberg + JuliaHirschberg 132–136 W13-4020 pincus-etal-2013-exploring @@ -6876,8 +6876,8 @@ Modelling Human Clarification Strategies SvetlanaStoyanchev - AlexLiu - JuliaHirschberg + AlexLiu + JuliaHirschberg 137–141 W13-4021 stoyanchev-etal-2013-modelling @@ -6895,7 +6895,7 @@ <fixed-case>AIDA</fixed-case>: Artificial Intelligent Dialogue Agent - Rafael E.Banchs + Rafael E.Banchs RidongJiang SeokhwanKim ArthurNiswar @@ -6906,8 +6906,8 @@ Demonstration of an Always-On Companion for Isolated Older Adults - CandaceSidner - TimothyBickmore + CandaceSidner + TimothyBickmore CharlesRich BarbaraBarry LazloRing @@ -6919,7 +6919,7 @@ A Multithreaded Conversational Interface for Pedestrian Navigation and Question Answering - SrinivasanJanarthanam + SrinivasanJanarthanam OliverLemon XingkunLiu PhilBartie @@ -6931,20 +6931,20 @@ Demonstration of the <fixed-case>PARLANCE</fixed-case> system: a data-driven incremental, spoken dialogue system for interactive search - HelenHastie + HelenHastie Marie-AudeAufaure PanosAlexopoulos HeribertoCuayáhuitl NinaDethlefs - MilicaGasic - JamesHenderson + MilicaGasic + JamesHenderson OliverLemon XingkunLiu PeterMika NesrineBen Mustapha VerenaRieser BlaiseThomson - PirrosTsiakoulis + PirrosTsiakoulis YvesVanrompay 154–156 W13-4026 @@ -6980,7 +6980,7 @@ Interpreting Situated Dialogue Utterances: an Update Model that Uses Speech, Gaze, and Gesture Information - CaseyKennington + CaseyKennington SpyrosKousidis DavidSchlangen 173–182 @@ -7001,9 +7001,9 @@ KallirroiGeorgila RonArtstein FabrizioMorbini - DavidTraum + DavidTraum StefanScherer - Albert SkipRizzo + Albert SkipRizzo Louis-PhilippeMorency 193–202 W13-4032 @@ -7018,24 +7018,24 @@ Learning Dialogue Management Models for Task-Oriented Dialogue with Parallel Dialogue and Task Streams - EunHa - ChristopherMitchell - KristyBoyer - JamesLester + EunHa + ChristopherMitchell + KristyBoyer + JamesLester 204–213 W13-4034 ha-etal-2013-learning <fixed-case>POMDP</fixed-case>-based dialogue manager adaptation to extended domains - MilicaGašić + MilicaGašić CatherineBreslin MatthewHenderson DonghoKim - MartinSzummer + MartinSzummer BlaiseThomson - PirrosTsiakoulis - SteveYoung + PirrosTsiakoulis + SteveYoung 214–222 W13-4035 gasic-etal-2013-pomdp @@ -7043,7 +7043,7 @@ Training and evaluation of an <fixed-case>MDP</fixed-case> model for social multi-user human-robot interaction SimonKeizer - Mary EllenFoster + Mary EllenFoster OliverLemon AndreGaschler ManuelGiuliani @@ -7058,7 +7058,7 @@ AndréBerton AngelaMahr RafaelMath - ChristianMüller + ChristianMüller 233–241 W13-4037 hofmann-etal-2013-evaluation @@ -7066,7 +7066,7 @@ Predicting Tasks in Goal-Oriented Spoken Dialog Systems using Semantic Knowledge Bases AasishPappu - AlexanderRudnicky + AlexanderRudnicky 242–250 W13-4038 pappu-rudnicky-2013-predicting @@ -7074,7 +7074,7 @@ Surface Text based Dialogue Models for Virtual Humans SudeepGandhe - DavidTraum + DavidTraum 251–260 W13-4039 gandhe-traum-2013-surface @@ -7119,7 +7119,7 @@ Tacit Social Contracts for Wheelchairs - DanielCouto Vale + DanielCouto Vale VivienMast 294–303 W13-4044 @@ -7137,7 +7137,7 @@ <fixed-case>IMHO</fixed-case>: An Exploratory Study of Hedging in Web Forums - LilianaMamani Sanchez + LilianaMamani Sanchez CarlVogel 309–313 W13-4046 @@ -7147,7 +7147,7 @@ Impact of <fixed-case>ASR</fixed-case> N-Best Information on <fixed-case>B</fixed-case>ayesian Dialogue Act Recognition HeribertoCuayáhuitl NinaDethlefs - HelenHastie + HelenHastie OliverLemon 314–318 W13-4047 @@ -7156,7 +7156,7 @@ Investigating speaker gaze and pointing behaviour in human-computer interaction with the mint.tools collection SpyrosKousidis - CaseyKennington + CaseyKennington DavidSchlangen 319–323 W13-4048 @@ -7165,14 +7165,14 @@ In-Context Evaluation of Unsupervised Dialogue Act Models for Tutorial Dialogue AysuEzen-Can - KristyBoyer + KristyBoyer 324–328 W13-4049 ezen-can-boyer-2013-context Spoken Dialog Systems for Automated Survey Interviewing - MichaelJohnston + MichaelJohnston PatrickEhlen Frederick G.Conrad Michael F.Schober @@ -7198,9 +7198,9 @@ Evaluating State Representations for Reinforcement Learning of Turn-Taking Policies in Tutorial Dialogue - ChristopherMitchell - KristyBoyer - JamesLester + ChristopherMitchell + KristyBoyer + JamesLester 339–343 W13-4052 mitchell-etal-2013-evaluating @@ -7221,7 +7221,7 @@ SangdoHan KyusongLee DonghyeonLee - Gary GeunbaeLee + Gary GeunbaeLee 349–353 W13-4054 han-etal-2013-counseling @@ -7241,15 +7241,15 @@ ClaireGardent AlejandraLorenzo LauraPerez-Beltrachini - LinaRojas-Barahona + LinaRojas-Barahona 357–359 W13-4056 gardent-etal-2013-weakly Open-Domain Information Access with Talking Robots - KristiinaJokinen - GrahamWilcock + KristiinaJokinen + GrahamWilcock 360–362 W13-4057 jokinen-wilcock-2013-open @@ -7257,8 +7257,8 @@ Demonstration of the <fixed-case>E</fixed-case>mote<fixed-case>W</fixed-case>izard of <fixed-case>O</fixed-case>z Interface for Empathic Robotic Tutors ShwetaBhargava - SrinivasanJanarthanam - HelenHastie + SrinivasanJanarthanam + HelenHastie AmolDeshmukh RuthAylett LeeCorrigan @@ -7292,8 +7292,8 @@ FabrizioMorbini KellyChristoffersen KenjiSagae - DavidTraum - Albert A.Rizzo + DavidTraum + Albert A.Rizzo 372–374 W13-4061 forbell-etal-2013-roundtable @@ -7309,9 +7309,9 @@ Continuously Predicting and Processing Barge-in During a Live Spoken Dialogue Task - EthanSelfridge + EthanSelfridge IkerArizmendi - PeterHeeman + PeterHeeman JasonWilliams 384–393 W13-4063 @@ -7323,11 +7323,11 @@ KartikAudhkhasi KenjiSagae RonArtstein - DoğanCan - PanayiotisGeorgiou - ShriNarayanan + DoğanCan + PanayiotisGeorgiou + ShriNarayanan AntonLeuski - DavidTraum + DavidTraum 394–403 W13-4064 morbini-etal-2013-asr @@ -7337,14 +7337,14 @@ JasonWilliams AntoineRaux DeepakRamachandran - AlanBlack + AlanBlack 404–413 W13-4065 williams-etal-2013-dialog Recipe For Building Robust Spoken Dialog State Trackers: Dialog State Tracking Challenge System Description - SungjinLee + SungjinLee MaxineEskenazi 414–422 W13-4066 @@ -7367,17 +7367,17 @@ Structured Discriminative Model For Dialog State Tracking - SungjinLee + SungjinLee 442–451 W13-4069 lee-2013-structured Comparison of <fixed-case>B</fixed-case>ayesian Discriminative and Generative Models for Dialogue State Tracking - Lukᚎilka + Lukᚎilka DavidMarek MatějKorvas - FilipJurčíček + FilipJurčíček 452–456 W13-4070 zilka-etal-2013-comparison @@ -7407,7 +7407,7 @@ Deep Neural Network Approach for the Dialog State Tracking Challenge MatthewHenderson BlaiseThomson - SteveYoung + SteveYoung 467–471 W13-4073 henderson-etal-2013-deep @@ -7417,8 +7417,8 @@ Proceedings of the 3rd Workshop on Sentiment Analysis where AI meets Psychology W13-41 - SivajiBandyopadhyay - ManabuOkumura + SivajiBandyopadhyay + ManabuOkumura Asian Federation of Natural Language Processing
Nagoya, Japan
October @@ -7432,7 +7432,7 @@ Why Words Alone Are Not Enough: Error Analysis of Lexicon-based Polarity Classifier for <fixed-case>C</fixed-case>zech KateřinaVeselovská - JanHajič jr. + JanHajič jr. 1–5 W13-4101 veselovska-hajic-jr-2013-words @@ -7442,7 +7442,7 @@ YasuhideMiura KeigoHattori TomokoOhkuma - HiroshiMasuichi + HiroshiMasuichi 6–14 W13-4102 miura-etal-2013-topic @@ -7471,7 +7471,7 @@ Proceedings of the IJCNLP 2013 Workshop on Natural Language Processing for Social Media (SocialNLP) W13-42 - Shou-deLin + Shou-deLin Lun-WeiKu Tsung-TingKuo Asian Federation of Natural Language Processing @@ -7487,7 +7487,7 @@ Predicting <fixed-case>TV</fixed-case> Audience Rating with Social Media Wen-TaiHsieh - Seng-cho T.Chou + Seng-cho T.Chou Yu-HsuanCheng Chen-MingWu 1–5 @@ -7499,14 +7499,14 @@ ChoochartHaruechaiyasak AlisaKongthon PornpimonPalingoon - KanokornTrakultaweekoon + KanokornTrakultaweekoon 6–13 W13-4202 haruechaiyasak-etal-2013-sense Social Metaphor Detection via Topical Analysis - Ting-HaoHuang + Ting-HaoHuang 14–22 W13-4203 huang-2013-social @@ -7540,8 +7540,8 @@ Proceedings of the 11th Workshop on Asian Language Resources W13-43 - PushpakBhattacharyya - Key-SunChoi + PushpakBhattacharyya + Key-SunChoi Asian Federation of Natural Language Processing
Nagoya, Japan
October @@ -7587,10 +7587,10 @@
Event and Event Actor Alignment in Phrase Based Statistical Machine Translation - AnupKolya + AnupKolya SantanuPal AsifEkbal - SivajiBandyopadhyay + SivajiBandyopadhyay 36–44 W13-4305 kolya-etal-2013-event @@ -7608,15 +7608,15 @@ Annotating Legitimate Disagreement in Corpus Construction - Billy T.M.Wong - Sophia Y.M.Lee + Billy T.M.Wong + Sophia Y.M.Lee 51–57 W13-4307 wong-lee-2013-annotating A Hybrid Statistical Approach for Named Entity Recognition for <fixed-case>M</fixed-case>alayalam Language - JishaP Jayan + JishaP Jayan RajeevR R ElizabethSherly 58–63 @@ -7626,7 +7626,7 @@ Designing a Generic Scheme for Etymological Annotation: a New Type of Language Corpora Annotation Niladri SekharDash - Mazhar MehdiHussain + Mazhar MehdiHussain 64–71 W13-4309 dash-hussain-2013-designing @@ -7644,8 +7644,8 @@ Proceedings of the Seventh SIGHAN Workshop on Chinese Language Processing W13-44 - Liang-ChihYu - Yuen-HsienTseng + Liang-ChihYu + Yuen-HsienTseng JingboZhu FujiRen Asian Federation of Natural Language Processing @@ -7660,7 +7660,7 @@ Keynote Speech: Lexical Semantics of <fixed-case>C</fixed-case>hinese Language - Keh-JiannChen + Keh-JiannChen 1 W13-4401 chen-2013-keynote @@ -7668,7 +7668,7 @@ Can <fixed-case>MDL</fixed-case> Improve Unsupervised <fixed-case>C</fixed-case>hinese Word Segmentation? PierreMagistry - BenoîtSagot + BenoîtSagot 2–10 W13-4402 magistry-sagot-2013-mdl @@ -7679,7 +7679,7 @@ YiZhang YusukeMiyao TakuyaMatsuzaki - JunichiTsujii + JunichiTsujii 11–19 W13-4403 wang-etal-2013-deep @@ -7689,7 +7689,7 @@ Shu-LingHuang Yu-MingHsieh Su-ChuLin - Keh-JiannChen + Keh-JiannChen 20–28 W13-4404 huang-etal-2013-lexical @@ -7724,9 +7724,9 @@ <fixed-case>C</fixed-case>hinese Spelling Checker Based on Statistical Machine Translation - Hsun-wenChiu - Jian-chengWu - Jason S.Chang + Hsun-wenChiu + Jian-chengWu + Jason S.Chang 49–53 W13-4408 chiu-etal-2013-chinese @@ -7737,7 +7737,7 @@ FeiCheng YanyanLuo KevinDuh - YujiMatsumoto + YujiMatsumoto 54–58 W13-4409 liu-etal-2013-hybrid @@ -7746,7 +7746,7 @@ Introduction to <fixed-case>CKIP</fixed-case> <fixed-case>C</fixed-case>hinese Spelling Check System for <fixed-case>SIGHAN</fixed-case> Bakeoff 2013 Evaluation Yu-MingHsieh Ming-HongBai - Keh-JiannChen + Keh-JiannChen 59–63 W13-4410 hsieh-etal-2013-introduction @@ -7754,15 +7754,15 @@ Automatic <fixed-case>C</fixed-case>hinese Confusion Words Extraction Using Conditional Random Fields and the Web Chun-HungWang - Jason S.Chang - Jian-ChengWu + Jason S.Chang + Jian-ChengWu 64–68 W13-4411 wang-etal-2013-automatic-chinese Conditional Random Field-based Parser and Language Model for Tradi-tional <fixed-case>C</fixed-case>hinese Spelling Checker - Yih-RuWang + Yih-RuWang Yuan-FuLiao Yeh-KuangWu Liang-ChunChang @@ -7773,7 +7773,7 @@ A Maximum Entropy Approach to <fixed-case>C</fixed-case>hinese Spelling Check DongxuHan - BaobaoChang + BaobaoChang 74–78 W13-4413 han-chang-2013-maximum @@ -7783,7 +7783,7 @@ Kuan-YuChen Hung-ShinLee Chung-HanLee - Hsin-MinWang + Hsin-MinWang Hsin-HsiChen 79–83 W13-4414 @@ -7808,12 +7808,12 @@ Sinica-<fixed-case>IASL</fixed-case> <fixed-case>C</fixed-case>hinese spelling check system at Sighan-7 - Ting-HaoYang + Ting-HaoYang Yu-LunHsieh Yu-HsuanChen MichaelTsang Cheng-WeiShih - Wen-LianHsu + Wen-LianHsu 93–96 W13-4417 yang-etal-2013-sinica @@ -7906,7 +7906,7 @@ Extracting and Aggregating False Information from Microblogs - NaoakiOkazaki + NaoakiOkazaki KeitaNabeshima KentoWatanabe JuntaMizuno @@ -7954,7 +7954,7 @@ Incorporating Knowledge Resources to Enhance Medical Information Extraction YasuhideMiura TomokoOhkuma - HiroshiMasuichi + HiroshiMasuichi EmikoYamada Shinohara EijiAramaki KazuhikoOhe @@ -7966,7 +7966,7 @@ Clinical Vocabulary and Clinical Finding Concepts in Medical Literature TakashiOkumura EijiAramaki - YukaTateisi + YukaTateisi 7–13 W13-4602 okumura-etal-2013-clinical @@ -7984,9 +7984,9 @@ Towards High-Reliability Speech Translation in the Medical Domain GrahamNeubig SakrianiSakti - TomokiToda + TomokiToda SatoshiNakamura - YujiMatsumoto + YujiMatsumoto RyosukeIsotani YukichiIkeda 22–29 @@ -8004,8 +8004,8 @@ Proper and Efficient Treatment of Anaphora and Long-Distance Dependency in Context-Free Grammar: An Experiment with Medical Text - WailokTam - KoitiHasida + WailokTam + KoitiHasida YusukeMatsubara EijiAramaki MaiMiyabe @@ -8039,8 +8039,8 @@ Proceedings of the 4th Workshop on South and Southeast Asian Natural Language Processing W13-47 - PushpakBhattacharyya - M. G. AbbasMalik + PushpakBhattacharyya + M. G. AbbasMalik Asian Federation of Natural Language Processing
Nagoya Congress Center, Nagoya, Japan
October @@ -8054,7 +8054,7 @@ Fast Bootstrapping of Grapheme to Phoneme System for Under-resourced Languages - Application to the <fixed-case>I</fixed-case>ban Language SarahSamson Juan - LaurentBesacier + LaurentBesacier 1–8 W13-4701 samson-juan-besacier-2013-fast @@ -8086,7 +8086,7 @@ On Application of Conditional Random Field in Stemming of <fixed-case>B</fixed-case>engali Natural Language Text SandipanSarkar - SivajiBandyopadhyay + SivajiBandyopadhyay 34–42 W13-4705 sarkar-bandyopadhyay-2013-application @@ -8094,8 +8094,8 @@ <fixed-case>U</fixed-case>rdu <fixed-case>H</fixed-case>indi Machine Transliteration using <fixed-case>SMT</fixed-case> M. G. AbbasMalik - ChristianBoitet - LaurentBesacier + ChristianBoitet + LaurentBesacier PushpakBhattacharyya 43–57 W13-4706 @@ -8104,7 +8104,7 @@ <fixed-case>U</fixed-case>rdu Spell Checking: Reverse Edit Distance Approach SaadatIqbal - Muhammad WaqasAnwar + Muhammad WaqasAnwar Usama IjazBajwa ZobiaRehman 58–65 @@ -8122,7 +8122,7 @@ <fixed-case>E</fixed-case>nglish to <fixed-case>U</fixed-case>rdu Hierarchical Phrase-based Statistical Machine Translation NadeemKhan - Muhammad WaqasAnwar + Muhammad WaqasAnwar Usama IjazBajwa NadirDurrani 72–76 @@ -8138,8 +8138,8 @@ <fixed-case>M</fixed-case>alayalam Clause Boundary Identifier: Annotation and Evaluation - SobhaLalitha Devi - LakshmiS + SobhaLalitha Devi + LakshmiS 83–90 W13-4711 lalitha-devi-s-2013-malayalam @@ -8157,15 +8157,15 @@ <fixed-case>A</fixed-case>gree<fixed-case>C</fixed-case>alc: Uma Ferramenta para Análise da Concordância entre Múltiplos Anotadores (<fixed-case>A</fixed-case>gree<fixed-case>C</fixed-case>alc: A Tool for the Analysis of Agreement Between Multiple Annotators) [in <fixed-case>P</fixed-case>ortuguese] - Alexandre RossiAlvares - Norton TrevisanRoman + Alexandre RossiAlvares + Norton TrevisanRoman W13-4801 alvares-roman-2013-agreecalc <fixed-case>M</fixed-case>eta<fixed-case>A</fixed-case>nn: Um Gerador de Ferramentas para Anotação de Textos (<fixed-case>M</fixed-case>eta<fixed-case>A</fixed-case>nn: a Generator of Text Annotation Tools) [in <fixed-case>P</fixed-case>ortuguese] Tiago Emanuel InfanteMissão - Norton TrevisanRoman + Norton TrevisanRoman W13-4802 missao-roman-2013-metaann @@ -8179,7 +8179,7 @@ Uma Investigação sobre Algoritmos de Diferentes Abordagens de Aprendizado Supervisionado na Classificação de Papéis Retóricos em Resumos Científicos (Investigating Algorithms from Different Approaches of Supervised Learning for the Classification of Rhetorical Roles in Scientific Abstracts) [in <fixed-case>P</fixed-case>ortuguese] Vinícius M. A.de Souza - Valéria D.Feltrim + Valéria D.Feltrim W13-4804 de-souza-feltrim-2013-uma @@ -8193,43 +8193,43 @@ Subtopic Annotation in a Corpus of News Texts: Steps Towards Automatic Subtopic Segmentation - Paula C. F.Cardoso - MaiteTaboada + Paula C. F.Cardoso + MaiteTaboada Thiago A. S.Pardo W13-4806 cardoso-etal-2013-subtopic <fixed-case>O</fixed-case> Reconhecimento de Entidades Nomeadas por meio de Conditional Random Fields para a Língua Portuguesa (Named Entity Recognition with Conditional Random Fields for the <fixed-case>P</fixed-case>ortuguese Language) [in <fixed-case>P</fixed-case>ortuguese] - Daniela O. F.do Amaral - RenataVieira + Daniela O. F.do Amaral + RenataVieira W13-4807 do-amaral-vieira-2013-o Análise Automática de Coerência Usando o Modelo Grade de Entidades para o Português (Automatic Coherence Analysis Using the Entity-grid Model for <fixed-case>P</fixed-case>ortuguese) [in <fixed-case>P</fixed-case>ortuguese] Alison R. P.Freitas - Valéria D.Feltrim + Valéria D.Feltrim W13-4808 freitas-feltrim-2013-analise Aplicando Pontos de Corte para Listas de Termos Extraídos (Applying Cut-off Points to Lists of Extracted Terms) [in <fixed-case>P</fixed-case>ortuguese] LuceleneLopes - RenataVieira + RenataVieira W13-4809 lopes-vieira-2013-aplicando Geração de Expressões de Referência usando Relações Espaciais (Referring Expression Generation Using Spatial Relations) [in <fixed-case>P</fixed-case>ortuguese] Diegodos Santos Silva - IvandréParaboni + IvandréParaboni W13-4810 dos-santos-silva-paraboni-2013-geracao Mac-Morpho Revisited: Towards Robust Part-of-Speech Tagging - Erick RochaFonseca + Erick RochaFonseca João Luís G.Rosa W13-4811 fonseca-rosa-2013-mac @@ -8243,7 +8243,7 @@ Text Simplification as Tree Transduction - Gustavo H.Paetzold + Gustavo H.Paetzold LuciaSpecia W13-4813 paetzold-specia-2013-text @@ -8251,7 +8251,7 @@ Automatic Disambiguation of Homographic Heterophone Pairs Containing Open and Closed Mid Vowels ChristopherShulby - GustavoMendonça + GustavoMendonça VanessaMarquiafável W13-4814 shulby-etal-2013-automatic @@ -8267,22 +8267,22 @@ Realização Superficial baseada em Regras (Rule-based Surface Realisation) [in <fixed-case>P</fixed-case>ortuguese] Douglas F. P.da Silva Junior Eder M.de Novais - IvandréParaboni + IvandréParaboni W13-4816 da-silva-junior-etal-2013-realizacao <fixed-case>JWN</fixed-case>-Br - Uma <fixed-case>API</fixed-case> <fixed-case>J</fixed-case>ava para a <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et.<fixed-case>B</fixed-case>r (<fixed-case>JWN</fixed-case>-Br - an <fixed-case>J</fixed-case>ava <fixed-case>API</fixed-case> for <fixed-case>W</fixed-case>ordnet.<fixed-case>B</fixed-case>r) [in <fixed-case>P</fixed-case>ortuguese] Vitor MachadoOliveira - Norton TrevisanRoman + Norton TrevisanRoman W13-4817 oliveira-roman-2013-jwn Geração de features para resolução de correferência: Pessoa, Local e Organização (Feature Generation for Coreference Resolution: Person, Location and Organization) [in <fixed-case>P</fixed-case>ortuguese] - Evandro B.Fonseca - RenataVieira - Aline A.Vanin + Evandro B.Fonseca + RenataVieira + Aline A.Vanin W13-4818 fonseca-etal-2013-geracao @@ -8295,16 +8295,16 @@ Um repositório de verbos para a anotação de papéis semânticos disponível na web (A Verb Repository for Semantic Role Labeling Available in the Web) [in <fixed-case>P</fixed-case>ortuguese] - Magali SanchesDuran + Magali SanchesDuran Jhonata PereiraMartins - Sandra MariaAluísio + Sandra MariaAluísio W13-4820 duran-etal-2013-um Entity-centric Sentiment Analysis on <fixed-case>T</fixed-case>witter data for the Potuguese Language MarloSouza - RenataVieira + RenataVieira W13-4821 souza-vieira-2013-entity @@ -8312,7 +8312,7 @@ Approaches for Helping <fixed-case>B</fixed-case>razilian Students Improve their Scientific Writings EthelSchuster RickLizotte - Sandra M.Aluísio + Sandra M.Aluísio CarmenDayrell W13-4822 schuster-etal-2013-approaches @@ -8327,14 +8327,14 @@ Geração de instruções em mundos virtuais: primeiros passos (Generation of Instructions in Virtual Worlds: First Steps) [in <fixed-case>P</fixed-case>ortuguese] Diego CardozoSandrim FelipeFreire - IvandréParaboni + IvandréParaboni W13-4824 sandrim-etal-2013-geracao Extração de Vocabulário Multilíngue para Tradução em Domínios Especializados (Multilingual Vocabulary Extraction for Machine Translation in Specialized Domains) [in <fixed-case>P</fixed-case>ortuguese] - Lucas WelterHilgert - RenataVieira + Lucas WelterHilgert + RenataVieira W13-4825 hilgert-vieira-2013-extracao @@ -8347,9 +8347,9 @@ Acoustic, Phonetic and Prosodic Features of <fixed-case>P</fixed-case>arkinson’s disease Speech JorgeProença - ArlindoVeiga + ArlindoVeiga SaraCandeias - FernandoPerdigão + FernandoPerdigão W13-4827 proenca-etal-2013-acoustic @@ -8364,9 +8364,9 @@ An Evaluation of the <fixed-case>B</fixed-case>razilian <fixed-case>P</fixed-case>ortuguese <fixed-case>LIWC</fixed-case> Dictionary for Sentiment Analysis - Pedro P. BalageFilho + Pedro P. BalageFilho Thiago Alexandre SalgueiroPardo - Sandra M.Aluísio + Sandra M.Aluísio W13-4829 filho-etal-2013-evaluation @@ -8416,7 +8416,7 @@ <fixed-case>L</fixed-case>ithuanian Dependency Parsing with Rich Morphological Features - JurgitaKapočiūtė-Dzikienė + JurgitaKapočiūtė-Dzikienė JoakimNivre AlgisKrupavičius 12–21 @@ -8425,7 +8425,7 @@ Parsing <fixed-case>C</fixed-case>roatian and <fixed-case>S</fixed-case>erbian by Using <fixed-case>C</fixed-case>roatian Dependency Treebanks - ŽeljkoAgić + ŽeljkoAgić DanijelaMerkler DašaBerović 22–33 @@ -8441,16 +8441,16 @@ The <fixed-case>LIGM</fixed-case>-<fixed-case>A</fixed-case>lpage architecture for the <fixed-case>SPMRL</fixed-case> 2013 Shared Task: Multiword Expression Analysis and Dependency Parsing - MatthieuConstant - MarieCandito - DjaméSeddah + MatthieuConstant + MarieCandito + DjaméSeddah 46–52 W13-4905 constant-etal-2013-ligm Exploring beam-based shift-reduce dependency parsing with <fixed-case>D</fixed-case>y<fixed-case>AL</fixed-case>og: Results from the <fixed-case>SPMRL</fixed-case> 2013 shared task - Éricde la Clergerie + Éricde la Clergerie 53–62 W13-4906 de-la-clergerie-2013-exploring @@ -8465,8 +8465,8 @@ Exploiting the Contribution of Morphological Information to Parsing: the <fixed-case>BASQUE</fixed-case> <fixed-case>TEAM</fixed-case> system in the <fixed-case>SPRML</fixed-case>‘2013 Shared Task IakesGoenaga - KoldoGojenola - NereaEzeiza + KoldoGojenola + NereaEzeiza 71–77 W13-4908 goenaga-etal-2013-exploiting @@ -8483,7 +8483,7 @@ <fixed-case>SPMRL</fixed-case>‘13 Shared Task System: The <fixed-case>CADIM</fixed-case> <fixed-case>A</fixed-case>rabic Dependency Parser YuvalMarton NizarHabash - OwenRambow + OwenRambow SarahAlkhulani 86–90 W13-4910 @@ -8492,7 +8492,7 @@ A Statistical Approach to Prediction of Empty Categories in <fixed-case>H</fixed-case>indi Dependency Treebank PuneethKukkadapu - PrashanthMannem + PrashanthMannem 91–96 W13-4911 kukkadapu-mannem-2013-statistical @@ -8500,8 +8500,8 @@ An Empirical Study on the Effect of Morphological and Lexical Features in <fixed-case>P</fixed-case>ersian Dependency Parsing MojtabaKhallash - AliHadian - BehrouzMinaei-Bidgoli + AliHadian + BehrouzMinaei-Bidgoli 97–107 W13-4912 khallash-etal-2013-empirical @@ -8516,8 +8516,8 @@ Context Based Statistical Morphological Analyzer and its Effect on <fixed-case>H</fixed-case>indi Dependency Parsing - Deepak KumarMalladi - PrashanthMannem + Deepak KumarMalladi + PrashanthMannem 119–128 W13-4914 malladi-mannem-2013-context @@ -8525,7 +8525,7 @@ Representation of Morphosyntactic Units and Coordination Structures in the <fixed-case>T</fixed-case>urkish Dependency Treebank UmutSulubacak - GülşenEryiğit + GülşenEryiğit 129–134 W13-4915 sulubacak-eryigit-2013-representation @@ -8533,9 +8533,9 @@ (Re)ranking Meets Morphosyntax: State-of-the-art Results from the <fixed-case>SPMRL</fixed-case> 2013 Shared Task AndersBjörkelund - ÖzlemÇetinoğlu - RichárdFarkas - ThomasMueller + ÖzlemÇetinoğlu + RichárdFarkas + ThomasMueller WolfgangSeeker 135–145 W13-4916 @@ -8543,15 +8543,15 @@ Overview of the <fixed-case>SPMRL</fixed-case> 2013 Shared Task: A Cross-Framework Evaluation of Parsing Morphologically Rich Languages - DjaméSeddah + DjaméSeddah ReutTsarfaty - SandraKübler - MarieCandito - Jinho D.Choi - RichárdFarkas + SandraKübler + MarieCandito + Jinho D.Choi + RichárdFarkas JenniferFoster IakesGoenaga - KoldoGojenola Galletebeitia + KoldoGojenola Galletebeitia YoavGoldberg SpenceGreen NizarHabash @@ -8559,13 +8559,13 @@ WolfgangMaier JoakimNivre AdamPrzepiórkowski - RyanRoth + RyanRoth WolfgangSeeker YannickVersley VeronikaVincze MarcinWoliński AlinaWróblewska - EricVillemonte de la Clergerie + EricVillemonte de la Clergerie 146–182 W13-4917 seddah-etal-2013-overview @@ -8576,9 +8576,9 @@ Proceedings of TextGraphs-8 Graph-based Methods for Natural Language Processing W13-50 ZornitsaKozareva - IrinaMatveeva + IrinaMatveeva GaborMelli - ViviNastase + ViviNastase Association for Computational Linguistics
Seattle, Washington, USA
October @@ -8599,10 +8599,10 @@
<fixed-case>J</fixed-case>o<fixed-case>B</fixed-case>im<fixed-case>T</fixed-case>ext Visualizer: A Graph-based Approach to Contextualizing Distributional Similarity - ChrisBiemann + ChrisBiemann BonaventuraCoppola - Michael R.Glass - AlfioGliozzo + Michael R.Glass + AlfioGliozzo MatthewHatem MartinRiedl 6–10 @@ -8622,7 +8622,7 @@ Reconstructing Big Semantic Similarity Networks AiHe ShefaliSharma - Chun-NanHsu + Chun-NanHsu 20–28 W13-5004 he-etal-2013-reconstructing @@ -8630,7 +8630,7 @@ Graph-Based Unsupervised Learning of Word Similarities Using Heterogeneous Feature Types AvneeshSaluja - JiříNavrátil + JiříNavrátil 29–38 W13-5005 saluja-navratil-2013-graph @@ -8638,7 +8638,7 @@ From Global to Local Similarities: A Graph-Based Contextualization Method using Distributional Thesauri MartinRiedl - ChrisBiemann + ChrisBiemann 39–43 W13-5006 riedl-biemann-2013-global @@ -8699,7 +8699,7 @@ Proceedings of the Workshop on NLP for Medicine and Biology associated with RANLP 2013 W13-51 GuerganaSavova - Kevin BretonnelCohen + Kevin BretonnelCohen GaliaAngelova INCOMA Ltd. Shoumen, BULGARIA
Hissar, Bulgaria
@@ -8714,7 +8714,7 @@ Active Learning for Phenotyping Tasks DmitriyDligach - TimothyMiller + TimothyMiller GuerganaSavova 1–8 W13-5101 @@ -8722,10 +8722,10 @@ Finding Negative Symptoms of Schizophrenia in Patient Records - GenevieveGorrell + GenevieveGorrell AngusRoberts RichardJackson - RobertStewart + RobertStewart 9–17 W13-5102 gorrell-etal-2013-finding @@ -8759,9 +8759,9 @@ Mariekevan Erp BrianDavis PetyaOsenova - KirilSimov + KirilSimov GeorgiGeorgiev - PreslavNakov + PreslavNakov INCOMA Ltd. Shoumen, BULGARIA
Hissar, Bulgaria
September @@ -8781,7 +8781,7 @@
Evaluation of <fixed-case>SPARQL</fixed-case> query generation from natural language questions - K. BretonnelCohen + K. BretonnelCohen Jin-DongKim 3–7 W13-5202 @@ -8789,8 +8789,8 @@ Mining translations from the web of open linked data - John PhilipMcCrae - PhilippCimiano + John PhilipMcCrae + PhilippCimiano 8–11 W13-5203 mccrae-cimiano-2013-mining @@ -8823,7 +8823,7 @@ HuiYang AlistairWillis DavidMorse - Annede Roeck + Annede Roeck 25–32 W13-5207 yang-etal-2013-literature @@ -8876,7 +8876,7 @@ Combining, Adapting and Reusing Bi-texts between Related Languages: Application to Statistical Machine Translation (invited talk) - PreslavNakov + PreslavNakov 1 W13-5301 nakov-2013-combining @@ -8884,7 +8884,7 @@ Language diversity and implications for Language technology in the Multilingual <fixed-case>E</fixed-case>urope CristinaVertan - Walthervon Hahn + Walthervon Hahn 2–6 W13-5302 vertan-von-hahn-2013-language @@ -8892,9 +8892,9 @@ Corpus development for machine translation between standard and dialectal varieties BarryHaddow - AdolfoHernández + AdolfoHernández FriedrichNeubarth - HaraldTrost + HaraldTrost 7–14 W13-5303 haddow-etal-2013-corpus @@ -8918,7 +8918,7 @@ Lexicon induction and part-of-speech tagging of non-resourced languages without any bilingual resources YvesScherrer - BenoîtSagot + BenoîtSagot 30–39 W13-5306 scherrer-sagot-2013-lexicon @@ -8926,7 +8926,7 @@ The Mysterious Letter <fixed-case>J</fixed-case> AndjelkaZečević - StašaVujičić Stanković + StašaVujičić Stanković 40–44 W13-5307 zecevic-vujicic-stankovic-2013-mysterious @@ -8936,7 +8936,7 @@ Proceedings of the 6th International Conference on Generative Approaches to the Lexicon (GL2013) W13-54 - JamesPustejovsky + JamesPustejovsky Association for Computational Linguistics
Pisa, Italy
September @@ -8966,7 +8966,7 @@ To Coerce or Not to Coerce: A Corpus-based Exploration of Some Complement Coercion Verbs in <fixed-case>C</fixed-case>hinese Chan-ChiaHsu - Shu-KaiHsieh + Shu-KaiHsieh 13–20 W13-5403 hsu-hsieh-2013-coerce @@ -8975,7 +8975,7 @@ Towards the automatic classification of complex-type nominals LaurenRomeo SaraMendes - NúriaBel + NúriaBel 21–28 W13-5404 romeo-etal-2013-towards @@ -8998,9 +8998,9 @@ Expanding <fixed-case>V</fixed-case>erb<fixed-case>N</fixed-case>et with <fixed-case>S</fixed-case>ketch <fixed-case>E</fixed-case>ngine - ClaireBonial + ClaireBonial OrinHargraves - MarthaPalmer + MarthaPalmer 44–53 W13-5407 bonial-etal-2013-expanding @@ -9015,9 +9015,9 @@ <fixed-case>G</fixed-case>enerative <fixed-case>L</fixed-case>exicon Theory and Linguistic Linked Open Data - FahadKhan + FahadKhan FrancescaFrontini - RiccardoDel Gratta + RiccardoDel Gratta MonicaMonachini ValeriaQuochi 62–69 @@ -9029,7 +9029,7 @@ IreneRusso FrancescaFrontini IreneDe Felice - FahadKhan + FahadKhan MonicaMonachini 70–75 W13-5410 @@ -9038,8 +9038,8 @@ Class-based Word Sense Induction for dot-type nominals LaurenRomeo - HéctorMartínez Alonso - NúriaBel + HéctorMartínez Alonso + NúriaBel 76–83 W13-5411 romeo-etal-2013-class @@ -9070,7 +9070,7 @@ Features of Verb Complements in Co-composition: A case study of <fixed-case>C</fixed-case>hinese baking verb using <fixed-case>W</fixed-case>eibo corpus Yu-YunChang - Shu-KaiHsieh + Shu-KaiHsieh 106–114 W13-5415 chang-hsieh-2013-features @@ -9078,7 +9078,7 @@ A Lexico-Semantic Analysis of <fixed-case>C</fixed-case>hinese Locality Phrases - A Topic Clustering Approach August F.Y.Chao - Siaw-FongChung + Siaw-FongChung 115–124 W13-5416 chao-chung-2013-lexico @@ -9095,9 +9095,9 @@ Proceedings of the 2nd Workshop on Linked Data in Linguistics (LDL-2013): Representing and linking lexicons, terminologies and other language data W13-55 ChristianChiarcos - PhilippCimiano + PhilippCimiano ThierryDeclerck - John P.McCrae + John P.McCrae Association for Computational Linguistics
Pisa, Italy
September @@ -9123,9 +9123,9 @@ Linguistic Linked Data for Sentiment Analysis PaulBuitelaar - MihaelArcan - CarlosIglesias - FernandoSánchez-Rada + MihaelArcan + CarlosIglesias + FernandoSánchez-Rada CarloStrapparava 1 - 8 W13-5502 @@ -9133,9 +9133,9 @@ Renewing and Revising <fixed-case>S</fixed-case>em<fixed-case>L</fixed-case>ink - ClaireBonial + ClaireBonial KevinStowe - MarthaPalmer + MarthaPalmer 9 - 17 W13-5503 bonial-etal-2013-renewing @@ -9143,7 +9143,7 @@ <fixed-case>LIME</fixed-case>: Towards a Metadata Module for Ontolex ManuelFiorelli - Maria TeresaPazienza + Maria TeresaPazienza ArmandoStellato 8 - 27 W13-5504 @@ -9151,7 +9151,7 @@ Lemon-aid: using Lemon to aid quantitative historical linguistic analysis - StevenMoran + StevenMoran MartinBrümmer 28 - 33 W13-5505 @@ -9159,7 +9159,7 @@ Transforming the Data Transcription and Analysis Tool Metadata and Labels into a Linguistic Linked Open Data Cloud Resource - AntonioPareja-Lora + AntonioPareja-Lora MaríaBlume BarbaraLust 34 - 43 @@ -9179,7 +9179,7 @@ Linguistic Resources Enhanced with Geospatial Information RichardLittauer BorisVillazon-Terrazas - StevenMoran + StevenMoran 53 - 58 W13-5508 littauer-etal-2013-linguistic @@ -9201,7 +9201,7 @@ Migrating Psycholinguistic Semantic Feature Norms into Linked Data in Linguistics - YoshihikoHayashi + YoshihikoHayashi 70 - 75 W13-5511 hayashi-2013-migrating @@ -9209,7 +9209,7 @@ Towards the establishment of a linguistic linked data network for <fixed-case>I</fixed-case>talian RobertoBartolini - RiccardoDel Gratta + RiccardoDel Gratta FrancescaFrontini 76 - 81 W13-5512 @@ -9222,7 +9222,7 @@ W13-56 StephanOepen KristinHagen - Janne BondiJohannessen + Janne BondiJohannessen Linköping University Electronic Press, Sweden
Oslo, Norway
May @@ -9235,7 +9235,7 @@ Invited Keynote: The Conversational User Interface - RonKaplan + RonKaplan 1–1 W13-5601 kaplan-2013-invited @@ -9249,7 +9249,7 @@ Invited Keynote: 6,909 Reasons to Mess Up Your Data - AndersSøgaard + AndersSøgaard 5–5 W13-5603 sogaard-2013-invited @@ -9270,14 +9270,14 @@ Experiences in Building the Let’s <fixed-case>MT</fixed-case>! Portal on <fixed-case>A</fixed-case>mazon <fixed-case>EC</fixed-case>2 - JörgTiedemann + JörgTiedemann 11–11 W13-5606 tiedemann-2013-experiences Using Constraint Grammar for Chunking - EckhardBick + EckhardBick 13–26 W13-5607 bick-2013-using @@ -9285,8 +9285,8 @@ Features Indicating Readability in <fixed-case>S</fixed-case>wedish Text JohanFalkenjack - KatarinaHeimann Mühlenbock - ArneJönsson + KatarinaHeimann Mühlenbock + ArneJönsson 27–40 W13-5608 falkenjack-etal-2013-features @@ -9317,7 +9317,7 @@ Exploring Features for Named Entity Recognition in <fixed-case>L</fixed-case>ithuanian Text Corpus - JurgitaKapočiūtė-Dzikienė + JurgitaKapočiūtė-Dzikienė AndersNøklestad Janne BondiJohannessen AlgisKrupavičius @@ -9359,16 +9359,16 @@ <fixed-case>N</fixed-case>ordic and <fixed-case>B</fixed-case>altic Wordnets Aligned and Compared through “<fixed-case>W</fixed-case>ord<fixed-case>T</fixed-case>ies” - BoletteSandford Pedersen + BoletteSandford Pedersen LarsBorin MarkusForsberg NeemeKahusk - KristerLindén + KristerLindén JyrkiNiemi NiklasNisbeth LarsNygaard HeiliOrav - EirikurRögnvaldsson + EirikurRögnvaldsson MitchellSeaton KadriVider KaarloVoionmaa @@ -9379,7 +9379,7 @@ Normalisation of Historical Text Using Context-Sensitive Weighted <fixed-case>L</fixed-case>evenshtein Distance and Compound Splitting EvaPettersson - BeátaMegyesi + BeátaMegyesi JoakimNivre 163–179 W13-5617 @@ -9388,22 +9388,22 @@ Modeling <fixed-case>OOV</fixed-case> Words With Letter N-Grams in Statistical Taggers: Preliminary Work in Biomedical Entity Recognition TeemuRuokolainen - MiikkaSilfverberg + MiikkaSilfverberg 181–193 W13-5618 ruokolainen-silfverberg-2013-modeling <fixed-case>B</fixed-case>altic and <fixed-case>N</fixed-case>ordic Parts of the <fixed-case>E</fixed-case>uropean Linguistic Infrastructure - IngunaSkadiņa - AndrejsVasiļjevs + IngunaSkadiņa + AndrejsVasiļjevs LarsBorin - KristerLindén - GyriLosnegaard + KristerLindén + GyriLosnegaard SussiOlsen - BoletteSandford Pedersen + BoletteSandford Pedersen RobertsRozis - KoenraadDe Smedt + KoenraadDe Smedt 195–211 W13-5619 skadina-etal-2013-baltic @@ -9420,7 +9420,7 @@ Using Factual Density to Measure Informativeness of Web Documents ChristopherHorn AlisaZhila - AlexanderGelbukh + AlexanderGelbukh RomanKern ElisabethLex 227–238 @@ -9438,23 +9438,23 @@ Bootstrapping an Unsupervised Approach for Classifying Agreement and Disagreement BerndOpitz - CäciliaZirn + CäciliaZirn 253–265 W13-5623 opitz-zirn-2013-bootstrapping Morphological Analysis with Limited Resources: <fixed-case>L</fixed-case>atvian Example - PēterisPaikens + PēterisPaikens LauraRituma - LaumaPretkalniņa + LaumaPretkalniņa 267–277 W13-5624 paikens-etal-2013-morphological Statistical Syntactic Parsing for <fixed-case>L</fixed-case>atvian - LaumaPretkalniņa + LaumaPretkalniņa LauraRituma 279–289 W13-5625 @@ -9490,7 +9490,7 @@ Analysis of Phonetic Transcription for <fixed-case>D</fixed-case>anish Automatic Speech Recognition - AndreasSøeborg Kirkedal + AndreasSøeborg Kirkedal 321–330 W13-5629 soeborg-kirkedal-2013-analysis @@ -9507,8 +9507,8 @@ Building an Open-Source Development Infrastructure for Language Technology Projects - Sjur N.Moshagen - TommiPirinen + Sjur N.Moshagen + TommiPirinen TrondTrosterud 343–352 W13-5631 @@ -9534,7 +9534,7 @@ Statistical Machine Translation with Readability Constraints SaraStymne - JörgTiedemann + JörgTiedemann ChristianHardmeier JoakimNivre 375–386 @@ -9555,7 +9555,7 @@ Tone Restoration in Transcribed Kammu: Decision-List Word Sense Disambiguation for an Unwritten Language - MarcusUneson + MarcusUneson 399–409 W13-5636 uneson-2013-tone @@ -9574,7 +9574,7 @@ LiesbethAugustinus VincentVandeghinste InekeSchuurman - FrankVan Eynde + FrankVan Eynde 423–428 W13-5638 augustinus-etal-2013-example @@ -9606,7 +9606,7 @@ Finite State Applications with Javascript MansHulden - MiikkaSilfverberg + MiikkaSilfverberg JeridFrancom 441–446 W13-5641 @@ -9627,9 +9627,9 @@ PaulMeurer HelgeDyvik VictoriaRosén - KoenraadDe Smedt - Gunn IngerLyse - GyriSmørdal Losnegaard + KoenraadDe Smedt + Gunn IngerLyse + GyriSmørdal Losnegaard MarthaThunes 453–458 W13-5643 @@ -9647,7 +9647,7 @@ Proceedings of the 13th International Conference on Parsing Technologies (IWPT 2013) W13-57 - HarryBunt + HarryBunt KhalilSima'an LiangHuang Assocation for Computational Linguistics @@ -9671,8 +9671,8 @@ An Efficient Typed Feature Structure Index: Theory and Implementation - BerndKiefer - Hans-UlrichKrieger + BerndKiefer + Hans-UlrichKrieger 17–25 W13-5702 kiefer-krieger-2013-efficient @@ -9687,7 +9687,7 @@ Comparative Evaluation of Argument Extraction Algorithms in Discourse Relation Parsing - EvgenyStepanov + EvgenyStepanov GiuseppeRiccardi 36–44 W13-5704 @@ -9704,7 +9704,7 @@ Improving a symbolic parser through partially supervised learning - Éricde la Clergerie + Éricde la Clergerie 54–72 W13-5706 de-la-clergerie-2013-improving @@ -9714,8 +9714,8 @@ AngelinaIvanova StephanOepen RebeccaDridan - DanFlickinger - LiljaØvrelid + DanFlickinger + LiljaØvrelid 63–72 W13-5707 ivanova-etal-2013-different @@ -9751,7 +9751,7 @@ Active Learning for Dependency Parsing by A Committee of Parsers SaeedMajidi - GregoryCrane + GregoryCrane 98–105 W13-5711 majidi-crane-2013-active @@ -9776,7 +9776,7 @@ JungyeulPark DaisukeKawahara SadaoKurohashi - Key-SunChoi + Key-SunChoi 120–126 W13-5714 park-etal-2013-towards diff --git a/data/xml/W14.xml b/data/xml/W14.xml index 9098a5109c..e12039b0b7 100644 --- a/data/xml/W14.xml +++ b/data/xml/W14.xml @@ -21,7 +21,7 @@ PuryaAliabadi Mohammad SinaAhmadi ShahinSalavati - Kyumars SheykhEsmaili + Kyumars SheykhEsmaili 1-6 W14-0101 aliabadi-etal-2014-towards @@ -35,7 +35,7 @@ <fixed-case>O</fixed-case>nto.<fixed-case>PT</fixed-case>: recent developments of a large public domain <fixed-case>P</fixed-case>ortuguese wordnet - Hugo GonçaloOliveira + Hugo GonçaloOliveira PauloGomes 16-22 W14-0103 @@ -52,7 +52,7 @@ <fixed-case>W</fixed-case>o<fixed-case>N</fixed-case>e<fixed-case>F</fixed-case>, an improved, expanded and evaluated automatic <fixed-case>F</fixed-case>rench translation of <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et QuentinPradet Gaëlde Chalendar - JeanneBaguenier Desormeaux + JeanneBaguenier Desormeaux 32-39 W14-0105 pradet-etal-2014-wonef @@ -69,7 +69,7 @@ Modeling Prefix and Particle Verbs in <fixed-case>G</fixed-case>erma<fixed-case>N</fixed-case>et ChristinaHoppermann - ErhardHinrichs + ErhardHinrichs 49-54 W14-0107 hoppermann-hinrichs-2014-modeling @@ -86,7 +86,7 @@ Aligning Word Senses in <fixed-case>G</fixed-case>erma<fixed-case>N</fixed-case>et and the <fixed-case>DWDS</fixed-case> Dictionary of the <fixed-case>G</fixed-case>erman Language VerenaHenrich - ErhardHinrichs + ErhardHinrichs ReinhildBarkey 63-70 W14-0109 @@ -103,7 +103,7 @@ <fixed-case>J</fixed-case>ava Libraries for Accessing the <fixed-case>P</fixed-case>rinceton <fixed-case>W</fixed-case>ordnet: Comparison and Evaluation - MarkFinlayson + MarkFinlayson 78-85 W14-0111 finlayson-2014-java @@ -114,7 +114,7 @@ NehaPrabhugaonkar VenkateshPrabhu RamdasKarmali - JyotiPawar + JyotiPawar 86-94 W14-0112 nagvenkar-etal-2014-concept @@ -122,7 +122,7 @@ Use of Sense Marking for Improving <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et Coverage NehaPrabhugaonkar - JyotiPawar + JyotiPawar 95-99 W14-0113 prabhugaonkar-pawar-2014-use @@ -160,7 +160,7 @@ Enriching <fixed-case>S</fixed-case>erbian<fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et and Electronic Dictionaries with Terms from the Culinary Domain - Staša VujičićStanković + Staša VujičićStanković CvetanaKrstev DuškoVitas 127-132 @@ -185,15 +185,15 @@ Taking stock of the <fixed-case>A</fixed-case>frican <fixed-case>W</fixed-case>ordnet project: 5 years of development MarissaGriesel - SonjaBosch + SonjaBosch 148-153 W14-0120 griesel-bosch-2014-taking <fixed-case>R</fixed-case>u<fixed-case>T</fixed-case>hes Linguistic Ontology vs. <fixed-case>R</fixed-case>ussian Wordnets - NataliaLoukachevitch - BorisDobrov + NataliaLoukachevitch + BorisDobrov 154-162 W14-0121 loukachevitch-dobrov-2014-ruthes @@ -202,7 +202,7 @@ One Lexicon, Two Structures: So What Gives? NabilGader SandrineOllinger - AlainPolguère + AlainPolguère 163-171 W14-0122 gader-etal-2014-one @@ -219,7 +219,7 @@ Graph Based Algorithm for Automatic Domain Segmentation of <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et BrijeshBhatt SubhashKunnath - PushpakBhattacharyya + PushpakBhattacharyya 178-185 W14-0124 bhatt-etal-2014-graph @@ -227,7 +227,7 @@ Parse Ranking with Semantic Dependencies and <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et XiaochengYin - Jung-JaeKim + Jung-JaeKim ZinaidaPozen FrancisBond 186-193 @@ -237,10 +237,10 @@ Do not do processing, when you can look up: Towards a Discrimination Net for <fixed-case>WSD</fixed-case> DipteshKanojia - PushpakBhattacharyya + PushpakBhattacharyya RajDabre SiddharthaGunti - ManishShrivastava + ManishShrivastava 194-200 W14-0126 kanojia-etal-2014-processing @@ -271,7 +271,7 @@ Facilitating Multi-Lingual Sense Annotation: Human Mediated Lemmatizer - PushpakBhattacharyya + PushpakBhattacharyya AnkitBahuguna LavitaTalukdar BornaliPhukan @@ -299,7 +299,7 @@ Shikhar Kr.Sarma DibyajyotiSarmah RatulDeka - AnupBarman + AnupBarman JumiSarmah HimadriBharali MayashreeMahanta @@ -321,7 +321,7 @@ <fixed-case>A</fixed-case>ssamese <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et based Quality Enhancement of Bilingual Machine Translation System - AnupBarman + AnupBarman JumiSarmah Shikhar KumarSarma 256-261 @@ -338,9 +338,9 @@ News about the <fixed-case>R</fixed-case>omanian <fixed-case>W</fixed-case>ordnet - Verginica BarbuMititelu - Ștefan DanielDumitrescu - DanTufiș + Verginica BarbuMititelu + Ștefan DanielDumitrescu + DanTufiș 268-275 W14-0137 mititelu-etal-2014-news @@ -354,7 +354,7 @@ Leveraging Morpho-semantics for the Discovery of Relations in <fixed-case>C</fixed-case>hinese <fixed-case>W</fixed-case>ordnet - Shu-KaiHsieh + Shu-KaiHsieh Yu-YunChang 283-289 W14-0139 @@ -373,7 +373,7 @@ Terminology in <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et and in pl<fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et MartaDobrowolska - StanSzpakowicz + StanSzpakowicz 299-303 W14-0141 dobrowolska-szpakowicz-2014-terminology @@ -383,7 +383,7 @@ MarekMaziarz MaciejPiasecki EwaRudnicka - StanSzpakowicz + StanSzpakowicz 304-312 W14-0142 maziarz-etal-2014-plwordnet @@ -413,8 +413,8 @@ SudhaBhingardive TanujaAjotikar IrawatiKulkarni - MalharKulkarni - PushpakBhattacharyya + MalharKulkarni + PushpakBhattacharyya 324-329 W14-0145 bhingardive-etal-2014-semi @@ -424,7 +424,7 @@ MarekMaziarz MaciejPiasecki EwaRudnicka - StanSzpakowicz + StanSzpakowicz 330-337 W14-0146 maziarz-etal-2014-registers @@ -433,7 +433,7 @@ <fixed-case>I</fixed-case>ndo<fixed-case>W</fixed-case>ordnet Visualizer: A Graphical User Interface for Browsing and Exploring Wordnets of <fixed-case>I</fixed-case>ndian Languages Devendra SinghChaplot SudhaBhingardive - PushpakBhattacharyya + PushpakBhattacharyya 338-345 W14-0147 chaplot-etal-2014-indowordnet @@ -457,9 +457,9 @@ First steps towards a Predicate Matrix - MaddalenLópez de Lacalle + MaddalenLópez de Lacalle EgoitzLaparra - GermanRigau + GermanRigau 363-371 W14-0150 lopez-de-lacalle-etal-2014-first @@ -474,7 +474,7 @@ Embedding <fixed-case>N</fixed-case>om<fixed-case>L</fixed-case>ex-<fixed-case>BR</fixed-case> nominalizations into <fixed-case>O</fixed-case>pen<fixed-case>W</fixed-case>ordnet-<fixed-case>PT</fixed-case> AlexandreRademaker - Valeriade Paiva + Valeriade Paiva Gerardde Melo Livy Maria RealCoelho 378-382 @@ -484,10 +484,10 @@ <fixed-case>O</fixed-case>pen<fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et-<fixed-case>PT</fixed-case>: A Project Report AlexandreRademaker - Valeriade Paiva + Valeriade Paiva Gerardde Melo LivyReal - MairaGatti + MairaGatti 383-390 W14-0153 rademaker-etal-2014-openwordnet @@ -514,12 +514,12 @@ Proceedings of the EACL 2014 Workshop on Dialogue in Motion W14-02 TiphaineDalmas - JanaGötze + JanaGötze JoakimGustafson - SrinivasanJanarthanam + SrinivasanJanarthanam JanKleindienst - ChristianMueller - AmandaStent + ChristianMueller + AmandaStent AndreasVlachos 10.3115/v1/W14-02 Association for Computational Linguistics @@ -557,7 +557,7 @@ Click or Type: An Analysis of Wizard’s Interaction for Future Wizard Interface Design SrinivasanJanarthanam - RobinHill + RobinHill AnnaDickinson MorganFredriksson 19–27 @@ -567,11 +567,11 @@ Recipes for building voice search <fixed-case>UI</fixed-case>s for automotive - MartinLabsky + MartinLabsky LadislavKunc TomasMacek JanKleindienst - JanVystrcil + JanVystrcil 28–32 W14-0204 10.3115/v1/W14-0204 @@ -597,7 +597,7 @@ Collaborative Exploration in Human-Robot Teams: What’s in their Corpora of Dialog, Video, & <fixed-case>LIDAR</fixed-case> Messages? - ClareVoss + ClareVoss TaylorCassidy DouglasSummers-Stay 43–47 @@ -616,10 +616,10 @@ Mostly Passive Information Delivery – a Prototype - JanVystrčil + JanVystrčil TomasMacek DavidLuksch - MartinLabský + MartinLabský LadislavKunc JanKleindienst TerezaKašparová @@ -630,7 +630,7 @@ Navigation Dialog of Blind People: Recovery from Getting Lost - JanVystrcil + JanVystrcil IvoMaly JanBalata ZdenekMikovec @@ -644,7 +644,7 @@ AasishPappu MingSun SeshadriSridharan - AlexanderRudnicky + AlexanderRudnicky 63–67 W14-0211 10.3115/v1/W14-0211 @@ -653,7 +653,7 @@ Situationally Aware In-Car Information Presentation Using Incremental Speech Generation: Safer, and More Effective SpyrosKousidis - CaseyKennington + CaseyKennington TimoBaumann HendrikBuschmeier StefanKopp @@ -681,9 +681,9 @@ UlrichGermann MichaelCarl PhilippKoehn - GermánSanchis-Trilles - FranciscoCasacuberta - RobinHill + GermánSanchis-Trilles + FranciscoCasacuberta + RobinHill SharonO’Brien 10.3115/v1/W14-03 Association for Computational Linguistics @@ -698,8 +698,8 @@ Word Confidence Estimation for <fixed-case>SMT</fixed-case> N-best List Re-ranking - Ngoc-QuangLuong - LaurentBesacier + Ngoc-QuangLuong + LaurentBesacier BenjaminLecouteux 1–9 W14-0301 @@ -708,8 +708,8 @@ Proofreading Human Translations with an <fixed-case>E</fixed-case>-pen - VicentAlabau - Luis A.Leiva + VicentAlabau + Luis A.Leiva 10–15 W14-0302 10.3115/v1/W14-0302 @@ -743,7 +743,7 @@ Measuring the Cognitive Effort of Literal Translation Processes - MoritzSchaeffer + MoritzSchaeffer MichaelCarl 29–37 W14-0306 @@ -773,7 +773,7 @@ Black-box integration of heterogeneous bilingual resources into an interactive translation system Juan AntonioPérez-Ortiz DanielTorregrosa - MikelForcada + MikelForcada 57–65 W14-0309 10.3115/v1/W14-0309 @@ -784,7 +784,7 @@ VioletaSeretan JohannRoturier DavidSilva - PierretteBouillon + PierretteBouillon 66–71 W14-0310 10.3115/v1/W14-0310 @@ -793,9 +793,9 @@ Real Time Adaptive Machine Translation for Post-Editing with cdec and <fixed-case>T</fixed-case>rans<fixed-case>C</fixed-case>enter MichaelDenkowski - AlonLavie + AlonLavie IsabelLacruz - ChrisDyer + ChrisDyer 72–77 W14-0311 10.3115/v1/W14-0311 @@ -812,7 +812,7 @@ Online Word Alignment for Online Adaptive Machine Translation - M. AminFarajian + M. AminFarajian NicolaBertoldi MarcelloFederico 84–92 @@ -883,9 +883,9 @@ Some Issues on the Normalization of a Corpus of Products Reviews in <fixed-case>P</fixed-case>ortuguese - MagaliSanches Duran + MagaliSanches Duran LucasAvanço - SandraAluísio + SandraAluísio ThiagoPardo Maria da GraçaVolpe Nunes 22–28 @@ -905,7 +905,7 @@ The <fixed-case>PAISÀ</fixed-case> Corpus of <fixed-case>I</fixed-case>talian Web Texts VerenaLyding - EgonStemle + EgonStemle ClaudiaBorghetti MarcoBrunello SaraCastagnoli @@ -924,7 +924,7 @@ Proceedings of the 5th Workshop on Cognitive Aspects of Computational Language Learning (CogACLL) W14-05 AlessandroLenci - MuntsaPadró + MuntsaPadró ThierryPoibeau AlineVillavicencio 10.3115/v1/W14-05 @@ -995,7 +995,7 @@ JudithGaspers MaximilianPanzner AndreLemme - PhilippCimiano + PhilippCimiano Katharina J.Rohlfing SebastianWrede 30–37 @@ -1014,7 +1014,7 @@ How well can a corpus-derived co-occurrence network simulate human associative behavior? - GemmaBel Enguix + GemmaBel Enguix ReinhardRapp MichaelZock 43–48 @@ -1025,7 +1025,7 @@ Agent-based modeling of language evolution TorvaldLekvam - BjörnGambäck + BjörnGambäck LarsBungum 49–54 W14-0510 @@ -1046,9 +1046,9 @@ Proceedings of the 8th Workshop on Language Technology for Cultural Heritage, Social Sciences, and Humanities (LaTeCH) W14-06 - KalliopiZervanou + KalliopiZervanou CristinaVertan - Antalvan den Bosch + Antalvan den Bosch CarolineSporleder 10.3115/v1/W14-06 Association for Computational Linguistics @@ -1067,7 +1067,7 @@ ChristophTeichmann GerhardHeyer MonicaBerti - GregoryCrane + GregoryCrane 1–8 W14-0601 10.3115/v1/W14-0601 @@ -1085,7 +1085,7 @@ Bootstrapping a historical commodities lexicon with <fixed-case>SKOS</fixed-case> and <fixed-case>DB</fixed-case>pedia EwanKlein - BeatriceAlex + BeatriceAlex JimClifford 13–21 W14-0603 @@ -1108,7 +1108,7 @@ A Multilingual Evaluation of Three Spelling Normalisation Methods for Historical Text EvaPettersson - BeátaMegyesi + BeátaMegyesi JoakimNivre 32–41 W14-0605 @@ -1137,7 +1137,7 @@ Automated Error Detection in Digitized Cultural Heritage Documents KataGábor - BenoîtSagot + BenoîtSagot 56–61 W14-0608 10.3115/v1/W14-0608 @@ -1233,7 +1233,7 @@ Automatic Wayang Ontology Construction using Relation Extraction from Free Text HadaiqSanabila - RuliManurung + RuliManurung 128–136 W14-0618 10.3115/v1/W14-0618 @@ -1245,9 +1245,9 @@ Proceedings of the EACL 2014 Workshop on Computational Approaches to Causality in Language (CAtoCL) W14-07 OleksandrKolomiyets - Marie-FrancineMoens - MarthaPalmer - JamesPustejovsky + Marie-FrancineMoens + MarthaPalmer + JamesPustejovsky StevenBethard 10.3115/v1/W14-07 Association for Computational Linguistics @@ -1272,9 +1272,9 @@ Annotating Causality in the <fixed-case>T</fixed-case>emp<fixed-case>E</fixed-case>val-3 Corpus ParamitaMirza - RacheleSprugnoli + RacheleSprugnoli SaraTonelli - ManuelaSperanza + ManuelaSperanza 10–19 W14-0702 10.3115/v1/W14-0702 @@ -1283,7 +1283,7 @@ Automatic Detection of Causal Relations in <fixed-case>G</fixed-case>erman Multilogs TinaBögel - AnnetteHautli-Janisz + AnnetteHautli-Janisz SebastianSulger MiriamButt 20–27 @@ -1311,7 +1311,7 @@ Likelihood of External Causation in the Structure of Events - TanjaSamardžić + TanjaSamardžić PaolaMerlo 40–47 W14-0706 @@ -1321,7 +1321,7 @@ Recognizing Causality in Verb-Noun Pairs via Noun and Verb Semantics MehwishRiaz - RoxanaGirju + RoxanaGirju 48–57 W14-0707 10.3115/v1/W14-0707 @@ -1335,7 +1335,7 @@ ValiaKordoni MarkusEgg AgataSavary - EricWehrli + EricWehrli StefanEvert 10.3115/v1/W14-08 Association for Computational Linguistics @@ -1359,7 +1359,7 @@ A Supervised Model for Extraction of Multiword Expressions, Based on Statistical Context Features MeghdadFarahmand - RonaldoMartins + RonaldoMartins 10–16 W14-0802 10.3115/v1/W14-0802 @@ -1367,7 +1367,7 @@ <fixed-case>VPCT</fixed-case>agger: Detecting Verb-Particle Constructions With Syntax-Based Methods - IstvánNagy T. + IstvánNagy T. VeronikaVincze 17–25 W14-0803 @@ -1385,7 +1385,7 @@ Parsing <fixed-case>M</fixed-case>odern <fixed-case>G</fixed-case>reek verb <fixed-case>MWE</fixed-case>s with <fixed-case>LFG</fixed-case>/<fixed-case>XLE</fixed-case> grammars NikiSamaridi - StellaMarkantonatou + StellaMarkantonatou 33–37 W14-0805 10.3115/v1/W14-0805 @@ -1393,9 +1393,9 @@ Evaluation of a Substitution Method for Idiom Transformation in Statistical Machine Translation - GiancarloSalton + GiancarloSalton RobertRoss - JohnKelleher + JohnKelleher 38–42 W14-0806 10.3115/v1/W14-0806 @@ -1404,8 +1404,8 @@ Encoding <fixed-case>MWE</fixed-case>s in a conceptual lexicon AggelikiFotopoulou - StellaMarkantonatou - VoulaGiouli + StellaMarkantonatou + VoulaGiouli 43–47 W14-0807 10.3115/v1/W14-0807 @@ -1413,9 +1413,9 @@ <fixed-case>G</fixed-case>erman Compounds and Statistical Machine Translation. Can they get along? - CarlaParra Escartín + CarlaParra Escartín StephanPeitz - HermannNey + HermannNey 48–56 W14-0808 10.3115/v1/W14-0808 @@ -1435,7 +1435,7 @@ Mickey Mouse is not a Phrase: Improving Relevance in <fixed-case>E</fixed-case>-Commerce with Multiword Expressions PrathyushaSenthil Kumar VamsiSalaka - Tracy HollowayKing + Tracy HollowayKing BrianJohnson 62–66 W14-0810 @@ -1444,8 +1444,8 @@ Encoding of Compounds in <fixed-case>S</fixed-case>wedish <fixed-case>F</fixed-case>rame<fixed-case>N</fixed-case>et - KarinFriberg Heppin - Miriam R LPetruck + KarinFriberg Heppin + Miriam R LPetruck 67–71 W14-0811 10.3115/v1/W14-0811 @@ -1480,7 +1480,7 @@ Detecting change and emergence for multiword expressions MartinEmms - ArunJayapal + ArunJayapal 89–93 W14-0815 10.3115/v1/W14-0815 @@ -1488,10 +1488,10 @@ An Approach to Take Multi-Word Expressions - ClaireBonial + ClaireBonial MeredithGreen JenettePreciado - MarthaPalmer + MarthaPalmer 94–98 W14-0816 10.3115/v1/W14-0816 @@ -1509,7 +1509,7 @@ Feature Norms of <fixed-case>G</fixed-case>erman Noun Compounds StephenRoller - SabineSchulte im Walde + SabineSchulte im Walde 104–108 W14-0818 10.3115/v1/W14-0818 @@ -1520,7 +1520,7 @@ LisPereira ElgaStrafella KevinDuh - YujiMatsumoto + YujiMatsumoto 109–113 W14-0819 10.3115/v1/W14-0819 @@ -1529,7 +1529,7 @@ Unsupervised Construction of a Lexicon and a Repository of Variation Patterns for <fixed-case>A</fixed-case>rabic Modal Multiword Expressions RaniaAl-Sabbagh - RoxanaGirju + RoxanaGirju JanaDiesner 114–123 W14-0820 @@ -1552,7 +1552,7 @@ W14-09 AnnaFeldman AnnaKazantseva - StanSzpakowicz + StanSzpakowicz 10.3115/v1/W14-09 Association for Computational Linguistics
Gothenburg, Sweden
@@ -1567,7 +1567,7 @@ Generating Music from Literature HannahDavis - SaifMohammad + SaifMohammad 1–10 W14-0901 10.3115/v1/W14-0901 @@ -1576,7 +1576,7 @@ Computational analysis to explore authors’ depiction of characters JosephBullard - CeciliaOvesdotter Alm + CeciliaOvesdotter Alm 11–16 W14-0902 10.3115/v1/W14-0902 @@ -1602,7 +1602,7 @@ Structure-based Clustering of Novels - MarionaColl Ardanuy + MarionaColl Ardanuy CarolineSporleder 31–39 W14-0905 @@ -1642,8 +1642,8 @@ Proceedings of the 3rd Workshop on Hybrid Approaches to Machine Translation (HyTra) W14-10 - Rafael E.Banchs - Marta R.Costa-jussà + Rafael E.Banchs + Marta R.Costa-jussà ReinhardRapp PatrikLambert KurtEberle @@ -1703,7 +1703,7 @@ Building a <fixed-case>S</fixed-case>panish-<fixed-case>G</fixed-case>erman Dictionary for Hybrid <fixed-case>MT</fixed-case> - AnneGöhring + AnneGöhring 30–35 W14-1006 10.3115/v1/W14-1006 @@ -1711,9 +1711,9 @@ An Empirical Study of the Impact of Idioms on Phrase Based Statistical Machine Translation of <fixed-case>E</fixed-case>nglish to <fixed-case>B</fixed-case>razilian-<fixed-case>P</fixed-case>ortuguese - GiancarloSalton + GiancarloSalton RobertRoss - JohnKelleher + JohnKelleher 36–41 W14-1007 10.3115/v1/W14-1007 @@ -1722,7 +1722,7 @@ Resumptive Pronoun Detection for <fixed-case>M</fixed-case>odern <fixed-case>S</fixed-case>tandard <fixed-case>A</fixed-case>rabic to <fixed-case>E</fixed-case>nglish <fixed-case>MT</fixed-case> StephenTratz - ClareVoss + ClareVoss JamalLaoudi 42–47 W14-1008 @@ -1733,7 +1733,7 @@ Automatic Building and Using Parallel Resources for <fixed-case>SMT</fixed-case> from Comparable Corpora SantanuPal ParthaPakray - Sudip KumarNaskar + Sudip KumarNaskar 48–57 W14-1009 10.3115/v1/W14-1009 @@ -1776,7 +1776,7 @@ Deriving de/het gender classification for <fixed-case>D</fixed-case>utch nouns for rule-based <fixed-case>MT</fixed-case> generation tasks BogdanBabych JonathanGeiger - MireiaGinestí Rosell + MireiaGinestí Rosell KurtEberle 75–81 W14-1014 @@ -1803,7 +1803,7 @@ How to overtake <fixed-case>G</fixed-case>oogle in <fixed-case>MT</fixed-case> quality - the <fixed-case>B</fixed-case>altic case - AndrejsVasiljevs + AndrejsVasiljevs 96 W14-1017 10.3115/v1/W14-1017 @@ -1862,7 +1862,7 @@ The impact of near domain transfer on biomedical named entity recognition NigelCollier - Mai-vuTran + Mai-vuTran FerdinandPaster 11–20 W14-1103 @@ -1880,7 +1880,7 @@ Towards Cross-Domain <fixed-case>PDTB</fixed-case>-Style Discourse Parsing - EvgenyStepanov + EvgenyStepanov GiuseppeRiccardi 30–37 W14-1105 @@ -1890,7 +1890,7 @@ Translating <fixed-case>SNOMED</fixed-case> <fixed-case>CT</fixed-case> Terminology into a Minor Language OlatzPerez-de-Viñaspre - MaiteOronoz + MaiteOronoz 38–45 W14-1106 10.3115/v1/W14-1106 @@ -1956,10 +1956,10 @@ Adverse Drug Event prediction combining shallow analysis and machine learning SaraSantiso - ArantzaCasillas + ArantzaCasillas AliciaPérez - MaiteOronoz - KoldoGojenola + MaiteOronoz + KoldoGojenola 85–89 W14-1113 10.3115/v1/W14-1113 @@ -1968,7 +1968,7 @@ Reducing <fixed-case>VSM</fixed-case> data sparseness by generalizing contexts: application to health text mining AmandinePérinet - ThierryHamon + ThierryHamon 90–95 W14-1114 10.3115/v1/W14-1114 @@ -1977,7 +1977,7 @@ Disambiguation of Period Characters in Clinical Narratives MarkusKreuzthaler - StefanSchulz + StefanSchulz 96–100 W14-1115 10.3115/v1/W14-1115 @@ -1985,7 +1985,7 @@ Tuning <fixed-case>H</fixed-case>eidel<fixed-case>T</fixed-case>ime for identifying time expressions in clinical texts in <fixed-case>E</fixed-case>nglish and <fixed-case>F</fixed-case>rench - ThierryHamon + ThierryHamon NataliaGrabar 101–105 W14-1116 @@ -1994,7 +1994,7 @@ Detecting drugs and adverse events from <fixed-case>S</fixed-case>panish social media streams - IsabelSegura-Bedmar + IsabelSegura-Bedmar RicardoRevert PalomaMartínez 106–115 @@ -2036,8 +2036,8 @@ One Step Closer to Automatic Evaluation of Text Simplification Systems - SanjaŠtajner - RuslanMitkov + SanjaŠtajner + RuslanMitkov HoracioSaggion 1–10 W14-1201 @@ -2047,7 +2047,7 @@ Automatic diagnosis of understanding of medical words NataliaGrabar - ThierryHamon + ThierryHamon DanyAmiot 11–20 W14-1202 @@ -2057,7 +2057,7 @@ Exploring Measures of “Readability” for Spoken Language: Analyzing linguistic features of subtitles to identify age-specific <fixed-case>TV</fixed-case> programs SowmyaVajjala - DetmarMeurers + DetmarMeurers 21–29 W14-1203 10.3115/v1/W14-1203 @@ -2075,7 +2075,7 @@ An eye-tracking evaluation of some parser complexity metrics - Matthew J.Green + Matthew J.Green 38–46 W14-1205 10.3115/v1/W14-1205 @@ -2115,10 +2115,10 @@ Improving Readability of <fixed-case>S</fixed-case>wedish Electronic Health Records through Lexical Simplification: First Results - GintarėGrigonyte + GintarėGrigonyte MariaKvist SumithraVelupillai - MatsWirén + MatsWirén 74–83 W14-1209 10.3115/v1/W14-1209 @@ -2136,7 +2136,7 @@ <fixed-case>EACL</fixed-case> - Expansion of Abbreviations in <fixed-case>CL</fixed-case>inical text LisaTengstrand - BeátaMegyesi + BeátaMegyesi AronHenriksson MartinDuneld MariaKvist @@ -2147,8 +2147,8 @@ A Quantitative Insight into the Impact of Translation on Readability - Alina MariaCiobanu - LiviuDinu + Alina MariaCiobanu + LiviuDinu 104–113 W14-1212 10.3115/v1/W14-1212 @@ -2157,7 +2157,7 @@ Classifying easy-to-read texts without parsing JohanFalkenjack - ArneJönsson + ArneJönsson 114–122 W14-1213 10.3115/v1/W14-1213 @@ -2165,7 +2165,7 @@ An Analysis of Crowdsourced Text Simplifications - MarceloAmancio + MarceloAmancio LuciaSpecia 123–130 W14-1214 @@ -2174,8 +2174,8 @@ An evaluation of syntactic simplification rules for people with autism - RichardEvans - ConstantinOrăsan + RichardEvans + ConstantinOrăsan IustinDornescu 131–140 W14-1215 @@ -2188,9 +2188,9 @@ Proceedings of the 5th Workshop on Language Analysis for Social Media (LASM) W14-13 AtefehFarzindar - DianaInkpen + DianaInkpen MichaelGamon - MeenaNagarajan + MeenaNagarajan 10.3115/v1/W14-13 Association for Computational Linguistics
Gothenburg, Sweden
@@ -2205,7 +2205,7 @@ Mining Lexical Variants from Microblogs: An Unsupervised Multilingual Approach AlejandroMosquera - PalomaMoreda Pozo + PalomaMoreda Pozo 1–7 W14-1301 10.3115/v1/W14-1301 @@ -2213,9 +2213,9 @@ Estimating Time to Event from Tweets Using Temporal Expressions - AliHürriyetoǧlu + AliHürriyetoǧlu NellekeOostdijk - Antalvan den Bosch + Antalvan den Bosch 8–16 W14-1302 10.3115/v1/W14-1302 @@ -2224,7 +2224,7 @@ Accurate Language Identification of <fixed-case>T</fixed-case>witter Messages MarcoLui - TimothyBaldwin + TimothyBaldwin 17–25 W14-1303 10.3115/v1/W14-1303 @@ -2234,7 +2234,7 @@ The (Un)Predictability of Emotional Hashtags in <fixed-case>T</fixed-case>witter FlorianKunneman ChristineLiebrecht - Antalvan den Bosch + Antalvan den Bosch 26–34 W14-1304 10.3115/v1/W14-1304 @@ -2243,7 +2243,7 @@ Finding Arguing Expressions of Divergent Viewpoints in Online Debates AmineTrabelsi - Osmar R.Zaïane + Osmar R.Zaïane 35–43 W14-1305 10.3115/v1/W14-1305 @@ -2261,7 +2261,7 @@ Vowel and Diacritic Restoration for Social Media Texts KübraAdali - GülşenEryiǧit + GülşenEryiǧit 53–61 W14-1307 10.3115/v1/W14-1307 @@ -2269,8 +2269,8 @@ A Cascaded Approach for Social Media Text Normalization of <fixed-case>T</fixed-case>urkish - DilaraTorunoǧlu - GülşenEryiǧit + DilaraTorunoǧlu + GülşenEryiǧit 62–70 W14-1308 10.3115/v1/W14-1308 @@ -2352,7 +2352,7 @@ A Type-Driven Tensor-Based Semantics for <fixed-case>CCG</fixed-case> JeanMaillard StephenClark - EdwardGrefenstette + EdwardGrefenstette 46–54 W14-1406 10.3115/v1/W14-1406 @@ -2411,11 +2411,11 @@ Proceedings of the 2nd Workshop on Continuous Vector Space Models and their Compositionality (CVSC) W14-15 AlexandreAllauzen - RaffaellaBernardi - EdwardGrefenstette + RaffaellaBernardi + EdwardGrefenstette HugoLarochelle - ChristopherManning - Scott Wen-tauYih + ChristopherManning + Scott Wen-tauYih 10.3115/v1/W14-15 Association for Computational Linguistics
Gothenburg, Sweden
@@ -2439,7 +2439,7 @@ Distributional Composition using Higher-Order Dependency Vectors JulieWeeds - DavidWeir + DavidWeir JeremyReffin 11–20 W14-1502 @@ -2481,7 +2481,7 @@ Proceedings of the Eighteenth Conference on Computational Natural Language Learning W14-16 RoserMorante - Scott Wen-tauYih + Scott Wen-tauYih 10.3115/v1/W14-16 Association for Computational Linguistics
Ann Arbor, Michigan
@@ -2495,11 +2495,11 @@ What’s in a p-value in <fixed-case>NLP</fixed-case>? - AndersSøgaard - AndersJohannsen - BarbaraPlank + AndersSøgaard + AndersJohannsen + BarbaraPlank DirkHovy - HectorMartínez Alonso + HectorMartínez Alonso 1–10 W14-1601 10.3115/v1/W14-1601 @@ -2529,7 +2529,7 @@ MohamedAl-Badrashiny RamyEskander NizarHabash - OwenRambow + OwenRambow 30–38 W14-1604 10.3115/v1/W14-1604 @@ -2566,7 +2566,7 @@ Looking for Hyponyms in Vector Space MarekRei - TedBriscoe + TedBriscoe 68–77 W14-1608 10.3115/v1/W14-1608 @@ -2595,7 +2595,7 @@ Improved Pattern Learning for Bootstrapped Entity Extraction SonalGupta - ChristopherManning + ChristopherManning 98–108 W14-1611 10.3115/v1/W14-1611 @@ -2603,8 +2603,8 @@ Towards Temporal Scoping of Relational Facts based on <fixed-case>W</fixed-case>ikipedia Data - AvirupSil - Silviu-PetruCucerzan + AvirupSil + Silviu-PetruCucerzan 109–118 W14-1612 10.3115/v1/W14-1612 @@ -2621,8 +2621,8 @@ Treebank Translation for Cross-Lingual Parser Induction - JörgTiedemann - ŽeljkoAgić + JörgTiedemann + ŽeljkoAgić JoakimNivre 130–140 W14-1614 @@ -2632,9 +2632,9 @@ Weakly-Supervised <fixed-case>B</fixed-case>ayesian Learning of a <fixed-case>CCG</fixed-case> Supertagger DanGarrette - ChrisDyer + ChrisDyer JasonBaldridge - Noah A.Smith + Noah A.Smith 141–150 W14-1615 10.3115/v1/W14-1615 @@ -2643,7 +2643,7 @@ Factored <fixed-case>M</fixed-case>arkov Translation with Robust Modeling YangFeng - TrevorCohn + TrevorCohn XinkaiDu 151–159 W14-1616 @@ -2696,7 +2696,7 @@ W14-17 Hwee TouNg Siew MeiWu - TedBriscoe + TedBriscoe ChristianHadiwinoto Raymond HendySusanto ChristopherBryant @@ -2728,7 +2728,7 @@ Grammatical error correction using hybrid systems and type filtering MarianoFelice ZhengYuan - Øistein E.Andersen + Øistein E.Andersen HelenYannakoudakis EkaterinaKochmar 15–24 @@ -2759,10 +2759,10 @@ <fixed-case>RACAI</fixed-case> <fixed-case>GEC</fixed-case> – A hybrid approach to Grammatical Error Correction - TiberiuBoroș - Stefan DanielDumitrescu + TiberiuBoroș + Stefan DanielDumitrescu AdrianZafiu - VerginicaBarbu Mititelu + VerginicaBarbu Mititelu Ionut PaulVăduva 43–48 W14-1705 @@ -2790,7 +2790,7 @@ Tuning a Grammar Correction System for Increased Precision AnoopKunchukuttan SriramChaudhury - PushpakBhattacharyya + PushpakBhattacharyya 60–64 W14-1708 10.3115/v1/W14-1708 @@ -2799,7 +2799,7 @@ <fixed-case>POSTECH</fixed-case> Grammatical Error Correction System in the <fixed-case>C</fixed-case>o<fixed-case>NLL</fixed-case>-2014 Shared Task KyusongLee - Gary GeunbaeLee + Gary GeunbaeLee 65–73 W14-1709 10.3115/v1/W14-1709 @@ -2830,14 +2830,14 @@ <fixed-case>NTHU</fixed-case> at the <fixed-case>C</fixed-case>o<fixed-case>NLL</fixed-case>-2014 Shared Task - Jian-ChengWu + Jian-ChengWu Tzu-HsiYen - JimChang + JimChang Guan-ChengHuang JimmyChang Hsiang-LingHsu - Yu-WeiChang - Jason S.Chang + Yu-WeiChang + Jason S.Chang 91–95 W14-1712 10.3115/v1/W14-1712 @@ -2857,7 +2857,7 @@ Proceedings of the Ninth Workshop on Innovative Use of NLP for Building Educational Applications W14-18 - JoelTetreault + JoelTetreault JillBurstein ClaudiaLeacock 10.3115/v1/W14-18 @@ -2874,7 +2874,7 @@ Automated Measures of Specific Vocabulary Knowledge from Constructed Responses (‘Use These Words to Write a Sentence Based on this Picture’) SwapnaSomasundaran - MartinChodorow + MartinChodorow 1–11 W14-1801 10.3115/v1/W14-1801 @@ -2905,7 +2905,7 @@ ArtiRamesh DanGoldwasser BertHuang - HalDaumé + HalDaumé LiseGetoor 28–33 W14-1804 @@ -2924,7 +2924,7 @@ The pragmatics of margin comments: An empirical study DeboraField - StephenPulman + StephenPulman DeniseWhitelock 43–53 W14-1806 @@ -2945,7 +2945,7 @@ SamuelLeeman-Munk AngelaShelton EricWiebe - JamesLester + JamesLester 61–67 W14-1808 10.3115/v1/W14-1808 @@ -2982,7 +2982,7 @@ Improving Peer Feedback Prediction: The Sentence Level is Right HuyNguyen - DianeLitman + DianeLitman 99–108 W14-1812 10.3115/v1/W14-1812 @@ -3004,7 +3004,7 @@ Similarity-Based Non-Scorable Response Detection for Automated Speech Scoring - Su-YounYoon + Su-YounYoon ShashaXie 116–123 W14-1814 @@ -3013,7 +3013,7 @@ Natural Language Generation with Vocabulary Constraints - BenSwanson + BenSwanson ElifYamangil EugeneCharniak 124–133 @@ -3025,12 +3025,12 @@ Automated scoring of speaking items in an assessment for teachers of <fixed-case>E</fixed-case>nglish as a Foreign Language KlausZechner KeelanEvanini - Su-YounYoon + Su-YounYoon LawrenceDavis XinhaoWang LeiChen - Chong MinLee - Chee WeeLeong + Chong MinLee + Chee WeeLeong 134–142 W14-1816 10.3115/v1/W14-1816 @@ -3048,7 +3048,7 @@ Sentence-level Rewriting Detection FanZhang - DianeLitman + DianeLitman 149–154 W14-1818 10.3115/v1/W14-1818 @@ -3069,7 +3069,7 @@ MartijnWieling GiuliaVenturi AndreaCimino - SimonettaMontemagni + SimonettaMontemagni 163–173 W14-1820 10.3115/v1/W14-1820 @@ -3096,7 +3096,7 @@ AniNenkova RupalPatel FrankRudzicz - AnnaluWaller + AnnaluWaller DesislavaZhekova 10.3115/v1/W14-19 Association for Computational Linguistics @@ -3159,7 +3159,7 @@ Preliminary Test of a Real-Time, Interactive Silent Speech Interface Based on Electromagnetic Articulograph JunWang AshokSamal - JordanGreen + JordanGreen 38–45 W14-1906 10.3115/v1/W14-1906 @@ -3171,7 +3171,7 @@ Proceedings of the Fifth Workshop on Cognitive Modeling and Computational Linguistics W14-20 VeraDemberg - TimothyO’Donnell + TimothyO’Donnell 10.3115/v1/W14-20 Association for Computational Linguistics
Baltimore, Maryland, USA
@@ -3186,7 +3186,7 @@ Computationally Rational Saccadic Control: An Explanation of Spillover Effects Based on Sampling from Noisy Perception and Memory MichaelShvartsman - RichardLewis + RichardLewis SatinderSingh 1–9 W14-2001 @@ -3196,7 +3196,7 @@ Investigating the role of entropy in sentence processing TalLinzen - FlorianJaeger + FlorianJaeger 10–18 W14-2002 10.3115/v1/W14-2002 @@ -3256,7 +3256,7 @@ Quantifying the role of discourse topicality in speakers’ choices of referring expressions NahoOrita - NaomiFeldman + NaomiFeldman JordanBoyd-Graber ElianaVornov 63–70 @@ -3269,11 +3269,11 @@ Proceedings of the First Workshop on Argumentation Mining W14-21 - NancyGreen - KevinAshley - DianeLitman - ChrisReed - VernWalker + NancyGreen + KevinAshley + DianeLitman + ChrisReed + VernWalker 10.3115/v1/W14-21 Association for Computational Linguistics
Baltimore, Maryland
@@ -3326,7 +3326,7 @@ Identifying Appropriate Support for Propositions in Online User Comments JoonsukPark - ClaireCardie + ClaireCardie 29–38 W14-2105 10.3115/v1/W14-2105 @@ -3429,8 +3429,8 @@ Survey in sentiment, polarity and function analysis of citation - MyriamHernández A - José M.Gómez + MyriamHernández A + José M.Gómez 102–103 W14-2115 10.3115/v1/W14-2115 @@ -3460,7 +3460,7 @@ Requirement Mining in Technical Documents JuyeonKang - PatrickSaint-Dizier + PatrickSaint-Dizier 108–109 W14-2118 10.3115/v1/W14-2118 @@ -3472,8 +3472,8 @@ Proceedings of the 2014 Workshop on the Use of Computational Methods in the Study of Endangered Languages W14-22 JeffGood - JuliaHirschberg - OwenRambow + JuliaHirschberg + OwenRambow 10.3115/v1/W14-22 Association for Computational Linguistics
Baltimore, Maryland, USA
@@ -3535,7 +3535,7 @@ KaidiLõo AnttiArppe JordanLachler - SjurMoshagen + SjurMoshagen TrondTrosterud 34–42 W14-2205 @@ -3544,9 +3544,9 @@
Learning Grammar Specifications from <fixed-case>IGT</fixed-case>: A Case Study of Chintang - Emily M.Bender + Emily M.Bender JoshuaCrowgey - Michael WayneGoodman + Michael WayneGoodman FeiXia 43–53 W14-2206 @@ -3557,7 +3557,7 @@ Creating Lexical Resources for Endangered Languages Khang NhutLam FerasAl Tarouti - JugalKalita + JugalKalita 54–62 W14-2207 10.3115/v1/W14-2207 @@ -3574,7 +3574,7 @@ <fixed-case>I</fixed-case>nterlingua<fixed-case>P</fixed-case>lus Machine Translation Approach for Local Languages: Ekegusii & <fixed-case>S</fixed-case>wahili EdwardOmbui - PeterWagacha + PeterWagacha WanjikuNg’ang’a 68–72 W14-2209 @@ -3650,7 +3650,7 @@ HyejuJang MarioPiergallini MiaomiaoWen - CarolynRosé + CarolynRosé 1–10 W14-2301 10.3115/v1/W14-2301 @@ -3670,7 +3670,7 @@ Metaphor Detection through Term Relevance MarcSchulder - EduardHovy + EduardHovy 18–26 W14-2303 10.3115/v1/W14-2303 @@ -3691,7 +3691,7 @@ SuzanneWertheim VladimirZaytsev NiloofarMontazeri - JerryHobbs + JerryHobbs 33–41 W14-2305 10.3115/v1/W14-2305 @@ -3699,12 +3699,12 @@ Computing Affect in Metaphors - TomekStrzalkowski + TomekStrzalkowski SamiraShaikh KitCho - George AaronBroadwell - LaurieFeldman - SarahTaylor + George AaronBroadwell + LaurieFeldman + SarahTaylor BorisYamrom TingLiu IgnacioCases @@ -3744,7 +3744,7 @@ Learning a Lexicon for Broad-coverage Semantic Parsing - JamesAllen + JamesAllen 1–6 W14-2401 10.3115/v1/W14-2401 @@ -3752,9 +3752,9 @@ Semantic Parsing using Distributional Semantics and Probabilistic Logic - IslamBeltagy + IslamBeltagy KatrinErk - RaymondMooney + RaymondMooney 7–11 W14-2402 10.3115/v1/W14-2402 @@ -3772,9 +3772,9 @@ Semantic Parsing for Text to 3<fixed-case>D</fixed-case> Scene Generation - AngelChang + AngelChang ManolisSavva - ChristopherManning + ChristopherManning 17–21 W14-2404 10.3115/v1/W14-2404 @@ -3782,8 +3782,8 @@ A Deep Architecture for Semantic Parsing - EdwardGrefenstette - PhilBlunsom + EdwardGrefenstette + PhilBlunsom Nandode Freitas Karl MoritzHermann 22–27 @@ -3794,7 +3794,7 @@ Combining Formal and Distributional Models of Temporal and Intensional Semantics MikeLewis - MarkSteedman + MarkSteedman 28–32 W14-2406 10.3115/v1/W14-2406 @@ -3813,8 +3813,8 @@ Representing Caused Motion in Embodied Construction Grammar - Ellen K.Dodge - Miriam R. L.Petruck + Ellen K.Dodge + Miriam R. L.Petruck 39–44 W14-2408 10.3115/v1/W14-2408 @@ -3822,8 +3822,8 @@ Low-Dimensional Embeddings of Logic - TimRocktäschel - MatkoBosnjak + TimRocktäschel + MatkoBosnjak SameerSingh SebastianRiedel 45–49 @@ -3844,7 +3844,7 @@ From Treebank Parses to Episodic Logic and Commonsense Inference - LenhartSchubert + LenhartSchubert 55–60 W14-2411 10.3115/v1/W14-2411 @@ -3881,7 +3881,7 @@ Towards <fixed-case>README</fixed-case>-<fixed-case>EVAL</fixed-case> : Interpreting <fixed-case>README</fixed-case> File Instructions - JamesWhite + JamesWhite 76–81 W14-2415 10.3115/v1/W14-2415 @@ -3904,8 +3904,8 @@ W14-25 CristianDanescu-Niculescu-Mizil JacobEisenstein - KathleenMcKeown - Noah A.Smith + KathleenMcKeown + Noah A.Smith 10.3115/v1/W14-25 Association for Computational Linguistics
Baltimore, MD, USA
@@ -3952,9 +3952,9 @@ Overview of the 2014 <fixed-case>NLP</fixed-case> Unshared Task in <fixed-case>P</fixed-case>oli<fixed-case>I</fixed-case>nformatics Noah A.Smith - ClaireCardie + ClaireCardie AnneWashington - JohnWilkerson + JohnWilkerson 5–7 W14-2505 10.3115/v1/W14-2505 @@ -3971,10 +3971,10 @@ Extracting Socioeconomic Patterns from the News: Modelling Text and Outlet Importance Jointly VasileiosLampos - DanielPreoţiuc-Pietro + DanielPreoţiuc-Pietro SinaSamangooei DouweGelling - TrevorCohn + TrevorCohn 13–17 W14-2507 10.3115/v1/W14-2507 @@ -4035,7 +4035,7 @@ Optimizing Features in Active Machine Learning for Complex Qualitative Content Analysis Jasy Suet YanLiew - NancyMcCracken + NancyMcCracken ShichunZhou KevinCrowston 44–48 @@ -4047,7 +4047,7 @@ Power of Confidence: How Poll Scores Impact Topic Dynamics in Political Debates VinodkumarPrabhakaran AshimaArora - OwenRambow + OwenRambow 49 W14-2514 10.3115/v1/W14-2514 @@ -4088,9 +4088,9 @@ Using Simple <fixed-case>NLP</fixed-case> Tools to Trace the Globalization of the Art World - MohamedAlTantawy + MohamedAlTantawy AlixRule - OwenRambow + OwenRambow ZhongyuWang RupayanBasu 66–70 @@ -4100,7 +4100,7 @@ Issue Framing as a Generalizable Phenomenon - AmberBoydstun + AmberBoydstun 71 W14-2519 10.3115/v1/W14-2519 @@ -4119,10 +4119,10 @@ Proceedings of the 5th Workshop on Computational Approaches to Subjectivity, Sentiment and Social Media Analysis W14-26 - AlexandraBalahur - Erikvan der Goot + AlexandraBalahur + Erikvan der Goot RalfSteinberger - AndresMontoyo + AndresMontoyo 10.3115/v1/W14-26 Association for Computational Linguistics
Baltimore, Maryland
@@ -4136,7 +4136,7 @@ <fixed-case>W</fixed-case>ords: Evaluative, Emotional, Colourful, Musical! - SaifMohammad + SaifMohammad 1 W14-2601 10.3115/v1/W14-2601 @@ -4145,7 +4145,7 @@ Robust Cross-Domain Sentiment Analysis for Low-Resource Languages JakobElming - BarbaraPlank + BarbaraPlank DirkHovy 2–7 W14-2602 @@ -4155,7 +4155,7 @@ An Investigation for Implicatures in <fixed-case>C</fixed-case>hinese : Implicatures in <fixed-case>C</fixed-case>hinese and in <fixed-case>E</fixed-case>nglish are similar ! LingjiaDeng - JanyceWiebe + JanyceWiebe 8–17 W14-2603 10.3115/v1/W14-2603 @@ -4191,7 +4191,7 @@ Semantic Role Labeling of Emotions in Tweets - SaifMohammad + SaifMohammad XiaodanZhu JoelMartin 32–41 @@ -4202,7 +4202,7 @@ An Impact Analysis of Features in a Classification Approach to Irony Detection in Product Reviews KonstantinBuschmeier - PhilippCimiano + PhilippCimiano RomanKlinger 42–49 W14-2608 @@ -4222,9 +4222,9 @@ Emotive or Non-emotive: That is The Question MichalPtaszynski - FumitoMasui + FumitoMasui RafalRzepka - KenjiAraki + KenjiAraki 59–65 W14-2610 10.3115/v1/W14-2610 @@ -4233,7 +4233,7 @@ Challenges in Creating a Multilingual Sentiment Analysis Application for Social Media Mining AlexandraBalahur - HristoTanev + HristoTanev Erikvan der Goot 66 W14-2611 @@ -4243,7 +4243,7 @@ Two-Step Model for Sentiment Lexicon Extraction from <fixed-case>T</fixed-case>witter Streams IliaChetviorkin - NataliaLoukachevitch + NataliaLoukachevitch 67–72 W14-2612 10.3115/v1/W14-2612 @@ -4283,7 +4283,7 @@ Sentiment classification of online political discussions: a comparison of a word-based and dependency-based method Hugo LewiHammer Per ErikSolberg - LiljaØvrelid + LiljaØvrelid 90–96 W14-2616 10.3115/v1/W14-2616 @@ -4292,7 +4292,7 @@ Improving Agreement and Disagreement Identification in Online Discussions with A Socially-Tuned Sentiment Lexicon LuWang - ClaireCardie + ClaireCardie 97–106 W14-2617 10.3115/v1/W14-2617 @@ -4302,7 +4302,7 @@ Lexical Acquisition for Opinion Inference: A Sense-Level Lexicon of Benefactive and Malefactive Events YoonjungChoi LingjiaDeng - JanyceWiebe + JanyceWiebe 107–112 W14-2618 10.3115/v1/W14-2618 @@ -4311,7 +4311,7 @@ Dive deeper: Deep Semantics for Sentiment Analysis NikhilkumarJadhav - PushpakBhattacharyya + PushpakBhattacharyya 113–118 W14-2619 10.3115/v1/W14-2619 @@ -4342,7 +4342,7 @@ Effect of Using Regression on Class Confidence Scores in Sentiment Analysis of <fixed-case>T</fixed-case>witter Data ItirOnal Ali MertErtugrul - RukenCakici + RukenCakici 136–141 W14-2622 10.3115/v1/W14-2622 @@ -4352,7 +4352,7 @@ A cognitive study of subjectivity extraction in sentiment annotation AbhijitMishra AdityaJoshi - PushpakBhattacharyya + PushpakBhattacharyya 142–146 W14-2623 10.3115/v1/W14-2623 @@ -4370,7 +4370,7 @@ A Conceptual Framework for Inferring Implicatures - JanyceWiebe + JanyceWiebe LingjiaDeng 154–159 W14-2625 @@ -4382,7 +4382,7 @@ Proceedings of the Joint Workshop on Social Dynamics and Personal Attributes in Social Media W14-27 - AliceOh + AliceOh BenjaminVan Durme DavidYarowsky OrenTsur @@ -4410,7 +4410,7 @@ Using County Demographics to Infer Attributes of <fixed-case>T</fixed-case>witter Users - EhsanMohammady + EhsanMohammady AronCulotta 7–16 W14-2702 @@ -4420,7 +4420,7 @@ The Enrollment Effect: A Study of <fixed-case>A</fixed-case>mazon’s Vine Program DineshPuranam - ClaireCardie + ClaireCardie 17–27 W14-2703 10.3115/v1/W14-2703 @@ -4432,7 +4432,7 @@ HeatherPon-Barry SubbaraoKambhampati EricHekler - David W.McDonald + David W.McDonald 28–32 W14-2704 10.3115/v1/W14-2704 @@ -4451,7 +4451,7 @@ Self-disclosure topic model for <fixed-case>T</fixed-case>witter conversations JinYeongBak - Chin-YewLin + Chin-YewLin AliceOh 42–49 W14-2706 @@ -4461,7 +4461,7 @@ Detecting and Evaluating Local Text Reuse in Social Networks ShaobinXu - DavidSmith + DavidSmith AbigailMullen RyanCordell 50–57 @@ -4472,7 +4472,7 @@ Generating Subjective Responses to Opinionated Articles in Social Media: An Agenda-Driven Architecture and a <fixed-case>T</fixed-case>uring-Like Test TomerCagan - Stefan L.Frank + Stefan L.Frank ReutTsarfaty 58–67 W14-2708 @@ -4491,7 +4491,7 @@ Power of Confidence: How Poll Scores Impact Topic Dynamics in Political Debates VinodkumarPrabhakaran AshimaArora - OwenRambow + OwenRambow 77–82 W14-2710 10.3115/v1/W14-2710 @@ -4529,7 +4529,7 @@ User Type Classification of Tweets with Implications for Event Recognition LalindraDe Silva - EllenRiloff + EllenRiloff 98–108 W14-2714 10.3115/v1/W14-2714 @@ -4539,7 +4539,7 @@ Collective Stance Classification of Posts in Online Debate Forums DhanyaSridhar LiseGetoor - MarilynWalker + MarilynWalker 109–117 W14-2715 10.3115/v1/W14-2715 @@ -4550,7 +4550,7 @@ Proceedings of the 2014 Joint Meeting of SIGMORPHON and SIGFSM W14-28 - ÖzlemÇetinoğlu + ÖzlemÇetinoğlu JeffreyHeinz AndreasMaletti JasonRiggle @@ -4584,8 +4584,8 @@ Comparing Models of Phonotactics for Word Segmentation - NatalieSchrimpf - GajaJarosz + NatalieSchrimpf + GajaJarosz 19–28 W14-2803 10.3115/v1/W14-2803 @@ -4602,7 +4602,7 @@ Automatic Conversion of Dialectal <fixed-case>T</fixed-case>amil Text to Standard Written <fixed-case>T</fixed-case>amil Text using <fixed-case>FST</fixed-case>s MarimuthuK - SobhaLalitha Devi + SobhaLalitha Devi 37–45 W14-2805 10.3115/v1/W14-2805 @@ -4611,7 +4611,7 @@ Rule Based Morphological Analyzer of <fixed-case>K</fixed-case>azakh Language GulshatKessikbayeva - IlyasCicekli + IlyasCicekli 46–54 W14-2806 10.3115/v1/W14-2806 @@ -4621,8 +4621,8 @@ Rules, Analogy, and Social Factors Codetermine Past-tense Formation Patterns in <fixed-case>E</fixed-case>nglish PéterRácz ClaytonBeckner - Jennifer B.Hay - Janet B.Pierrehumbert + Jennifer B.Hay + Janet B.Pierrehumbert 55–63 W14-2807 10.3115/v1/W14-2807 @@ -4644,8 +4644,8 @@ Proceedings of the Second Workshop on EVENTS: Definition, Detection, Coreference, and Representation W14-29 TerukoMitamura - EduardHovy - MarthaPalmer + EduardHovy + MarthaPalmer 10.3115/v1/W14-29 Association for Computational Linguistics
Baltimore, Maryland, USA
@@ -4669,8 +4669,8 @@ Verbal Valency Frame Detection and Selection in <fixed-case>C</fixed-case>zech and <fixed-case>E</fixed-case>nglish OndřejDušek - JanHajič - ZdeňkaUrešová + JanHajič + ZdeňkaUrešová 6–11 W14-2902 10.3115/v1/W14-2902 @@ -4710,7 +4710,7 @@ Conceptual and Practical Steps in Event Coreference Analysis of Large-scale Data - FatemehTorabi Asr + FatemehTorabi Asr JonathanSonntag YuliaGrishina ManfredStede @@ -4725,7 +4725,7 @@ CharleyBeller PaulMcNamee BenjaminVan Durme - StephanieStrassel + StephanieStrassel ZhiyiSong JoeEllis 45–53 @@ -4766,7 +4766,7 @@ Proceedings of Frame Semantics in NLP: A Workshop in Honor of Chuck Fillmore (1929-2014) W14-30 - Miriam R. L.Petruck + Miriam R. L.Petruck Gerardde Melo 10.3115/v1/W14-30 Association for Computational Linguistics @@ -4781,7 +4781,7 @@ <fixed-case>F</fixed-case>rame<fixed-case>N</fixed-case>et: A Knowledge Base for Natural Language Processing - Collin F.Baker + Collin F.Baker 1–5 W14-3001 10.3115/v1/W14-3001 @@ -4789,7 +4789,7 @@ The Case for Empiricism (With and Without Statistics) - KennethChurch + KennethChurch 6–9 W14-3002 10.3115/v1/W14-3002 @@ -4797,7 +4797,7 @@ Case, Constructions, <fixed-case>F</fixed-case>rame<fixed-case>N</fixed-case>et, and the Deep Lexicon - JerryHobbs + JerryHobbs 10–12 W14-3003 10.3115/v1/W14-3003 @@ -4805,9 +4805,9 @@ <fixed-case>S</fixed-case>em<fixed-case>L</fixed-case>ink+: <fixed-case>F</fixed-case>rame<fixed-case>N</fixed-case>et, <fixed-case>V</fixed-case>erb<fixed-case>N</fixed-case>et and Event Ontologies - MarthaPalmer - ClaireBonial - DianaMcCarthy + MarthaPalmer + ClaireBonial + DianaMcCarthy 13–17 W14-3004 10.3115/v1/W14-3004 @@ -4815,7 +4815,7 @@ <fixed-case>F</fixed-case>rame<fixed-case>N</fixed-case>et and Linked Data - NancyIde + NancyIde 18–21 W14-3005 10.3115/v1/W14-3005 @@ -4823,7 +4823,7 @@ Bridging Text and Knowledge with Frames - SriniNarayanan + SriniNarayanan 22–25 W14-3006 10.3115/v1/W14-3006 @@ -4841,7 +4841,7 @@ Using Frame Semantics in Natural Language Processing ApoorvAgarwal DanielBauer - OwenRambow + OwenRambow 30–33 W14-3008 10.3115/v1/W14-3008 @@ -4871,7 +4871,7 @@ W14-31 JasonChuang SpenceGreen - MartiHearst + MartiHearst JeffreyHeer PhilippKoehn 10.3115/v1/W14-31 @@ -4895,9 +4895,9 @@ Interactive Learning of Spatial Knowledge for Text to 3<fixed-case>D</fixed-case> Scene Generation - AngelChang + AngelChang ManolisSavva - ChristopherManning + ChristopherManning 14–21 W14-3102 10.3115/v1/W14-3102 @@ -4925,10 +4925,10 @@ <fixed-case>GLANCE</fixed-case> Visualizes Lexical Phenomena for Language Learning - Mei-HuaChen - Shih-TingHuang - Ting-HuiKao - Hsun-wenChiu + Mei-HuaChen + Shih-TingHuang + Ting-HuiKao + Hsun-wenChiu Tzu-HsiYen 34–37 W14-3105 @@ -4938,7 +4938,7 @@ <fixed-case>SPIED</fixed-case>: <fixed-case>S</fixed-case>tanford Pattern based Information Extraction and Diagnostics SonalGupta - ChristopherManning + ChristopherManning 38–44 W14-3106 10.3115/v1/W14-3106 @@ -4948,7 +4948,7 @@ Interactive Exploration of Asynchronous Conversations: Applying a User-centered Approach to Design a Visual Text Analytic System EnamulHoque GiuseppeCarenini - ShafiqJoty + ShafiqJoty 45–52 W14-3107 10.3115/v1/W14-3107 @@ -4964,7 +4964,7 @@ Design of an Active Learning System with Human Correction for Content Analysis - NancyMcCracken + NancyMcCracken Jasy Suet YanLiew KevinCrowston 59–62 @@ -5044,9 +5044,9 @@ Comparison of different feature sets for identification of variants in progressive aphasia - Kathleen C.Fraser + Kathleen C.Fraser GraemeHirst - Naida L.Graham + Naida L.Graham Jed A.Meltzer Sandra E.Black ElizabethRochon @@ -5081,9 +5081,9 @@ Detecting linguistic idiosyncratic interests in autism using distributional semantic models MasoudRouhizadeh - EmilyPrud’hommeaux + EmilyPrud’hommeaux Janvan Santen - RichardSproat + RichardSproat 46–50 W14-3206 10.3115/v1/W14-3206 @@ -5102,7 +5102,7 @@ Applying prosodic speech features in mental health care: An exploratory study in a life-review intervention for depression Sanne M.A.Lamers - Khiet P.Truong + Khiet P.Truong BasSteunenberg Franciskade Jong Gerben J.Westerhof @@ -5136,7 +5136,7 @@ HirokiTanaka SakrianiSakti GrahamNeubig - TomokiToda + TomokiToda SatoshiNakamura 88–96 W14-3211 @@ -5163,7 +5163,7 @@ TongLiu MeganLytle VincentSilenzio - CeciliaOvesdotter Alm + CeciliaOvesdotter Alm 107–117 W14-3213 10.3115/v1/W14-3213 @@ -5171,14 +5171,14 @@ Towards Assessing Changes in Degree of Depression through <fixed-case>F</fixed-case>acebook - H. AndrewSchwartz + H. AndrewSchwartz JohannesEichstaedt - Margaret L.Kern + Margaret L.Kern GregoryPark MaartenSap DavidStillwell MichalKosinski - LyleUngar + LyleUngar 118–125 W14-3214 10.3115/v1/W14-3214 @@ -5189,7 +5189,7 @@ Proceedings of the Ninth Workshop on Statistical Machine Translation W14-33 - OndřejBojar + OndřejBojar ChristianBuck ChristianFedermann BarryHaddow @@ -5229,7 +5229,7 @@ ChristofMonz PavelPecina MattPost - HerveSaint-Amand + HerveSaint-Amand RaduSoricut LuciaSpecia AlešTamchyna @@ -5240,7 +5240,7 @@ Parallel <fixed-case>FDA</fixed-case>5 for Fast Deployment of Accurate Statistical Machine Translation Systems - ErgunBiçici + ErgunBiçici QunLiu AndyWay 59–65 @@ -5262,7 +5262,7 @@ FabienneCap MarionWeller AnitaRamm - AlexanderFraser + AlexanderFraser 71–78 W14-3305 10.3115/v1/W14-3305 @@ -5270,10 +5270,10 @@ <fixed-case>E</fixed-case>nglish-to-<fixed-case>H</fixed-case>indi system description for <fixed-case>WMT</fixed-case> 2014: Deep Source-Context Features for <fixed-case>M</fixed-case>oses - Marta R.Costa-jussà + Marta R.Costa-jussà ParthGupta PaoloRosso - Rafael E.Banchs + Rafael E.Banchs 79–83 W14-3306 10.3115/v1/W14-3306 @@ -5281,12 +5281,12 @@ The <fixed-case>KIT</fixed-case>-<fixed-case>LIMSI</fixed-case> Translation System for <fixed-case>WMT</fixed-case> 2014 - Quoc KhanhDo + Quoc KhanhDo TeresaHerrmann JanNiehues AlexanderAllauzen FrançoisYvon - AlexWaibel + AlexWaibel 84–89 W14-3307 10.3115/v1/W14-3307 @@ -5295,11 +5295,11 @@ The <fixed-case>IIT</fixed-case> <fixed-case>B</fixed-case>ombay <fixed-case>H</fixed-case>indi-<fixed-case>E</fixed-case>nglish Translation System at <fixed-case>WMT</fixed-case> 2014 PiyushDungarwal - RajenChatterjee + RajenChatterjee AbhijitMishra AnoopKunchukuttan - RiteshShah - PushpakBhattacharyya + RiteshShah + PushpakBhattacharyya 90–96 W14-3308 10.3115/v1/W14-3308 @@ -5321,16 +5321,16 @@ MarkusFreitag StephanPeitz JoernWuebker - HermannNey + HermannNey MatthiasHuck RicoSennrich NadirDurrani - MariaNadejde + MariaNadejde PhilipWilliams PhilippKoehn TeresaHerrmann EunahCho - AlexWaibel + AlexWaibel 105–113 W14-3310 10.3115/v1/W14-3310 @@ -5340,7 +5340,7 @@ <fixed-case>P</fixed-case>hrasal: A Toolkit for New Directions in Statistical Machine Translation SpenceGreen DanielCer - ChristopherManning + ChristopherManning 114–121 W14-3311 10.3115/v1/W14-3311 @@ -5350,7 +5350,7 @@ Anaphora Models and Reordering for Phrase-Based <fixed-case>SMT</fixed-case> ChristianHardmeier SaraStymne - JörgTiedemann + JörgTiedemann AaronSmith JoakimNivre 122–129 @@ -5367,7 +5367,7 @@ JanNiehues IsabelSlawik YuqiZhang - AlexWaibel + AlexWaibel 130–135 W14-3313 10.3115/v1/W14-3313 @@ -5377,7 +5377,7 @@ The <fixed-case>DCU</fixed-case>-<fixed-case>ICTCAS</fixed-case> <fixed-case>MT</fixed-case> system at <fixed-case>WMT</fixed-case> 2014 on <fixed-case>G</fixed-case>erman-<fixed-case>E</fixed-case>nglish Translation Task LiangyouLi XiaofengWu - Santiago CortésVaíllo + Santiago CortésVaíllo JunXie AndyWay QunLiu @@ -5396,8 +5396,8 @@ EvaSchlinger SwabhaSwayamdipta YuliaTsvetkov - AlonLavie - ChrisDyer + AlonLavie + ChrisDyer 142–149 W14-3315 10.3115/v1/W14-3315 @@ -5409,7 +5409,7 @@ SebastianSchuster SpenceGreen KennethHeafield - ChristopherManning + ChristopherManning 150–156 W14-3316 10.3115/v1/W14-3316 @@ -5420,7 +5420,7 @@ StephanPeitz JoernWuebker MarkusFreitag - HermannNey + HermannNey 157–162 W14-3317 10.3115/v1/W14-3317 @@ -5437,12 +5437,12 @@ <fixed-case>A</fixed-case>bu-<fixed-case>M</fixed-case>a<fixed-case>T</fixed-case>ran at <fixed-case>WMT</fixed-case> 2014 Translation Task: Two-step Data Selection and <fixed-case>RBMT</fixed-case>-Style Synthetic Rules - RaphaelRubino + RaphaelRubino AntonioToral - Victor M.Sánchez-Cartagena + Victor M.Sánchez-Cartagena JorgeFerrández-Tordera - SergioOrtiz-Rojas - GemaRamírez-Sánchez + SergioOrtiz-Rojas + GemaRamírez-Sánchez FelipeSánchez-Martínez AndyWay 171–177 @@ -5452,7 +5452,7 @@ The <fixed-case>UA</fixed-case>-Prompsit hybrid machine translation system for the 2014 Workshop on Statistical Machine Translation - Víctor M.Sánchez-Cartagena + Víctor M.Sánchez-Cartagena Juan AntonioPérez-Ortiz FelipeSánchez-Martínez 178–185 @@ -5463,7 +5463,7 @@ Machine Translation and Monolingual Postediting: The <fixed-case>AFRL</fixed-case> <fixed-case>WMT</fixed-case>-14 System LaneSchwartz - TimothyAnderson + TimothyAnderson JeremyGwinnup KatherineYoung 186–194 @@ -5497,7 +5497,7 @@ <fixed-case>E</fixed-case>dinburgh’s Syntax-Based Systems at <fixed-case>WMT</fixed-case> 2014 PhilipWilliams RicoSennrich - MariaNadejde + MariaNadejde MatthiasHuck EvaHasler PhilippKoehn @@ -5522,14 +5522,14 @@ Machine Translation of Medical Texts in the Khresmoi Project OndřejDušek - JanHajič + JanHajič JaroslavaHlaváčová MichalNovák PavelPecina RudolfRosa AlešTamchyna - ZdeňkaUrešová - DanielZeman + ZdeňkaUrešová + DanielZeman 221–228 W14-3326 10.3115/v1/W14-3326 @@ -5573,7 +5573,7 @@ <fixed-case>LIMSI</fixed-case> @ <fixed-case>WMT</fixed-case>’14 Medical Translation Task NicolasPécheux LiGong - Quoc KhanhDo + Quoc KhanhDo BenjaminMarie YuliaIvanishcheva AlexanderAllauzen @@ -5611,7 +5611,7 @@ Randomized Significance Tests in Machine Translation YvetteGraham NitikaMathur - TimothyBaldwin + TimothyBaldwin 266–274 W14-3333 10.3115/v1/W14-3333 @@ -5620,7 +5620,7 @@ Estimating Word Alignment Quality for <fixed-case>SMT</fixed-case> Reordering Tasks SaraStymne - JörgTiedemann + JörgTiedemann JoakimNivre 275–286 W14-3334 @@ -5656,7 +5656,7 @@ <fixed-case>SHEF</fixed-case>-Lite 2.0: Sparse Multi-task <fixed-case>G</fixed-case>aussian Processes for Translation Quality Estimation - DanielBeck + DanielBeck KashifShah LuciaSpecia 307–312 @@ -5666,7 +5666,7 @@ Referential Translation Machines for Predicting Translation Quality - ErgunBiçici + ErgunBiçici AndyWay 313–321 W14-3339 @@ -5675,11 +5675,11 @@ <fixed-case>FBK</fixed-case>-<fixed-case>UPV</fixed-case>-<fixed-case>UE</fixed-case>din participation in the <fixed-case>WMT</fixed-case>14 Quality Estimation shared-task - José GuilhermeCamargo de Souza - JesúsGonzález-Rubio + José GuilhermeCamargo de Souza + JesúsGonzález-Rubio ChristianBuck MarcoTurchi - MatteoNegri + MatteoNegri 322–328 W14-3340 10.3115/v1/W14-3340 @@ -5687,7 +5687,7 @@ Target-Centric Features for Translation Quality Estimation - ChrisHokamp + ChrisHokamp IacerCalixto JoachimWagner JianZhang @@ -5698,8 +5698,8 @@ <fixed-case>LIG</fixed-case> System for Word Level <fixed-case>QE</fixed-case> task at <fixed-case>WMT</fixed-case>14 - Ngoc-QuangLuong - LaurentBesacier + Ngoc-QuangLuong + LaurentBesacier BenjaminLecouteux 335–341 W14-3342 @@ -5708,7 +5708,7 @@ Exploring Consensus in Machine Translation for Quality Estimation - CarolinaScarton + CarolinaScarton LuciaSpecia 342–347 W14-3343 @@ -5728,7 +5728,7 @@ <fixed-case>P</fixed-case>armesan: Meteor without Paraphrases with Paraphrased References - PetraBarančíková + PetraBarančíková 355–361 W14-3345 10.3115/v1/W14-3345 @@ -5745,8 +5745,8 @@ <fixed-case>VERT</fixed-case>a participation in the <fixed-case>WMT</fixed-case>14 Metrics Task - ElisabetComelles - JordiAtserias + ElisabetComelles + JordiAtserias 368–375 W14-3347 10.3115/v1/W14-3347 @@ -5755,7 +5755,7 @@ Meteor Universal: Language Specific Translation Evaluation for Any Target Language MichaelDenkowski - AlonLavie + AlonLavie 376–380 W14-3348 10.3115/v1/W14-3348 @@ -5763,9 +5763,9 @@ Application of Prize based on Sentence Length in Chunk-based Automatic Evaluation of Machine Translation - HiroshiEchizen’ya - KenjiAraki - EduardHovy + HiroshiEchizen’ya + KenjiAraki + EduardHovy 381–386 W14-3349 10.3115/v1/W14-3349 @@ -5774,7 +5774,7 @@ <fixed-case>LAYERED</fixed-case>: Metric for Machine Translation Evaluation ShubhamGautam - PushpakBhattacharyya + PushpakBhattacharyya 387–393 W14-3350 10.3115/v1/W14-3350 @@ -5782,9 +5782,9 @@ <fixed-case>IPA</fixed-case> and <fixed-case>STOUT</fixed-case>: Leveraging Linguistic and Source-based Features for Machine Translation Evaluation - MeritxellGonzàlez + MeritxellGonzàlez AlbertoBarrón-Cedeño - LluísMàrquez + LluísMàrquez 394–401 W14-3351 10.3115/v1/W14-3351 @@ -5792,10 +5792,10 @@ <fixed-case>D</fixed-case>isco<fixed-case>TK</fixed-case>: Using Discourse Structure for Machine Translation Evaluation - ShafiqJoty - FranciscoGuzmán - LluísMàrquez - PreslavNakov + ShafiqJoty + FranciscoGuzmán + LluísMàrquez + PreslavNakov 402–408 W14-3352 10.3115/v1/W14-3352 @@ -5813,7 +5813,7 @@ <fixed-case>BEER</fixed-case>: <fixed-case>BE</fixed-case>tter Evaluation as Ranking MilošStanojević - KhalilSima’an + KhalilSima’an 414–419 W14-3354 10.3115/v1/W14-3354 @@ -5832,9 +5832,9 @@ Crowdsourcing High-Quality Parallel Data Extraction from <fixed-case>T</fixed-case>witter WangLing - LuísMarujo - ChrisDyer - Alan W.Black + LuísMarujo + ChrisDyer + Alan W.Black IsabelTrancoso 426–436 W14-3356 @@ -5863,7 +5863,7 @@ Unsupervised Adaptation for Statistical Machine Translation SaabMansour - HermannNey + HermannNey 457–465 W14-3359 10.3115/v1/W14-3359 @@ -5873,7 +5873,7 @@ An Empirical Comparison of Features and Tuning for Phrase-based Machine Translation SpenceGreen DanielCer - ChristopherManning + ChristopherManning 466–476 W14-3360 10.3115/v1/W14-3360 @@ -5901,7 +5901,7 @@ Linear Mixture Models for Robust Machine Translation MarineCarpuat - CyrilGoutte + CyrilGoutte GeorgeFoster 499–509 W14-3363 @@ -5913,10 +5913,10 @@ Proceedings of BioNLP 2014 W14-34 - KevinCohen + KevinCohen DinaDemner-Fushman SophiaAnaniadou - Jun-ichiTsujii + Jun-ichiTsujii 10.3115/v1/W14-34 Association for Computational Linguistics
Baltimore, Maryland
@@ -5934,10 +5934,10 @@ RobertRivera RachelBeard RobLauder - DavyWeissenbacher + DavyWeissenbacher MatthewScotch GarrickWallstrom - GracielaGonzalez + GracielaGonzalez 1–9 W14-3401 10.3115/v1/W14-3401 @@ -5945,7 +5945,7 @@
Temporal Expression Recognition for Cell Cycle Phase Concepts in Biomedical Literature - NegacyHailu + NegacyHailu NatalyaPanteleyeva KevinCohen 10–18 @@ -5984,7 +5984,7 @@ Detecting Health Related Discussions in Everyday Telephone Conversations for Studying Medical Events in the Lives of Older Adults - GolnarSheikhshab + GolnarSheikhshab IzhakShafran JeffreyKaye 38–44 @@ -6003,12 +6003,12 @@ Generating Patient Problem Lists from the <fixed-case>S</fixed-case>h<fixed-case>AR</fixed-case>e Corpus using <fixed-case>SNOMED</fixed-case> <fixed-case>CT</fixed-case>/<fixed-case>SNOMED</fixed-case> <fixed-case>CT</fixed-case> <fixed-case>CORE</fixed-case> Problem List - DanielleMowery + DanielleMowery MindyRoss SumithraVelupillai - StephaneMeystre - JanyceWiebe - WendyChapman + StephaneMeystre + JanyceWiebe + WendyChapman 54–58 W14-3408 10.3115/v1/W14-3408 @@ -6026,8 +6026,8 @@ Structuring Operative Notes using Active Learning KirkRoberts - SandaHarabagiu - MichaelSkinner + SandaHarabagiu + MichaelSkinner 68–76 W14-3410 10.3115/v1/W14-3410 @@ -6036,7 +6036,7 @@ Chunking Clinical Text Containing Non-Canonical Language AleksandarSavkov - JohnCarroll + JohnCarroll JackieCassell 77–82 W14-3411 @@ -6046,10 +6046,10 @@ Decision Style in a Clinical Reasoning Corpus LimorHochberg - CeciliaOvesdotter Alm + CeciliaOvesdotter Alm Esa M.Rantanen Caroline M.DeLong - AnneHaake + AnneHaake 83–87 W14-3412 10.3115/v1/W14-3412 @@ -6066,7 +6066,7 @@ A repository of semantic types in the <fixed-case>MIMIC</fixed-case> <fixed-case>II</fixed-case> database clinical notes RichardOsborne - AlanAronson + AlanAronson KevinCohen 93–97 W14-3414 @@ -6075,7 +6075,7 @@ Extracting drug indications and adverse drug reactions from <fixed-case>S</fixed-case>panish health social media - IsabelSegura-Bedmar + IsabelSegura-Bedmar Santiagode la Peña González PalomaMartínez 98–106 @@ -6103,10 +6103,10 @@ Towards Gene Recognition from Rare and Ambiguous Abbreviations using a Filtering Approach - MatthiasHartung + MatthiasHartung RomanKlinger MatthiasZwick - PhilippCimiano + PhilippCimiano 118–127 W14-3418 10.3115/v1/W14-3418 @@ -6123,7 +6123,7 @@ Using statistical parsing to detect agrammatic aphasia - Kathleen C.Fraser + Kathleen C.Fraser GraemeHirst Jed A.Meltzer Jennifer E.Mack @@ -6154,7 +6154,7 @@ Improving Collocation Correction by Ranking Suggestions Using Linguistic Knowledge RobertoCarlini - JoanCodina-Filba + JoanCodina-Filba LeoWanner 1–12 W14-3501 @@ -6213,7 +6213,7 @@ A <fixed-case>VIEW</fixed-case> of <fixed-case>R</fixed-case>ussian: Visual Input Enhancement and Adaptive Feedback RobertReynolds EduardSchaf - DetmarMeurers + DetmarMeurers 98-112 W14-3508 reynolds-etal-2014-view @@ -6242,7 +6242,7 @@ Proceedings of the EMNLP 2014 Workshop on Arabic Natural Language Processing (ANLP) W14-36 NizarHabash - StephanVogel + StephanVogel 10.3115/v1/W14-36 Association for Computational Linguistics
Doha, Qatar
@@ -6266,7 +6266,7 @@ The International Corpus of <fixed-case>A</fixed-case>rabic: Compilation, Analysis and Evaluation SamehAlansary - MagdyNagi + MagdyNagi 8–17 W14-3602 10.3115/v1/W14-3602 @@ -6307,7 +6307,7 @@ A Framework for the Classification and Annotation of Multiword Expressions in Dialectal <fixed-case>A</fixed-case>rabic AbdelatiHawwari MohammedAttia - MonaDiab + MonaDiab 48–56 W14-3606 10.3115/v1/W14-3606 @@ -6320,7 +6320,7 @@ RehamMohamed AlaaMohamed BassantFarouk - NagwaEl-Makky + NagwaEl-Makky MarwanTorki 57–64 W14-3607 @@ -6330,8 +6330,8 @@ Automatic <fixed-case>A</fixed-case>rabic diacritics restoration based on deep nets AhmadAl Sallab - MohsenRashwan - HazemM. Raafat + MohsenRashwan + HazemM. Raafat AhmedRafea 65–72 W14-3608 @@ -6350,7 +6350,7 @@ Named Entity Recognition System for Dialectal <fixed-case>A</fixed-case>rabic AyahZirikly - MonaDiab + MonaDiab 78–86 W14-3610 10.3115/v1/W14-3610 @@ -6359,8 +6359,8 @@ Semantic Query Expansion for <fixed-case>A</fixed-case>rabic Information Retrieval AshrafMahgoub - MohsenRashwan - HazemRaafat + MohsenRashwan + HazemRaafat MohamedZahran MagdaFayek 87–92 @@ -6372,14 +6372,14 @@ Transliteration of <fixed-case>A</fixed-case>rabizi into <fixed-case>A</fixed-case>rabic Orthography: Developing a Parallel Annotated <fixed-case>A</fixed-case>rabizi-<fixed-case>A</fixed-case>rabic Script <fixed-case>SMS</fixed-case>/Chat Corpus AnnBies ZhiyiSong - MohamedMaamouri + MohamedMaamouri StephenGrimes HaejoongLee JonathanWright - StephanieStrassel + StephanieStrassel NizarHabash RamyEskander - OwenRambow + OwenRambow 93–103 W14-3612 10.3115/v1/W14-3612 @@ -6400,7 +6400,7 @@ NadiTomeh NizarHabash RamyEskander - JosephLe Roux + JosephLe Roux 114–120 W14-3614 10.3115/v1/W14-3614 @@ -6459,7 +6459,7 @@ <fixed-case>GWU</fixed-case>-<fixed-case>HASP</fixed-case>: Hybrid <fixed-case>A</fixed-case>rabic Spelling and Punctuation Corrector MohammedAttia MohamedAl-Badrashiny - MonaDiab + MonaDiab 148–154 W14-3620 10.3115/v1/W14-3620 @@ -6467,9 +6467,9 @@ <fixed-case>TECHLIMED</fixed-case> system description for the Shared Task on Automatic <fixed-case>A</fixed-case>rabic Error Correction - DjamelMostefa + DjamelMostefa OmarAsbayou - RamziAbbes + RamziAbbes 155–159 W14-3621 10.3115/v1/W14-3621 @@ -6493,7 +6493,7 @@ RamyBaly HazemHajj NizarHabash - WassimEl-Hajj + WassimEl-Hajj 165–173 W14-3623 10.3115/v1/W14-3623 @@ -6510,7 +6510,7 @@ <fixed-case>A</fixed-case>rabic Native Language Identification - ShervinMalmasi + ShervinMalmasi MarkDras 180–186 W14-3625 @@ -6532,7 +6532,7 @@ SerenaJeblee WestonFeely HoudaBouamor - AlonLavie + AlonLavie NizarHabash KemalOflazer 196–206 @@ -6546,7 +6546,7 @@ HassanSajjad AlaaKhader FahadAl Obaidli - PreslavNakov + PreslavNakov StephanVogel 207–216 W14-3628 @@ -6567,9 +6567,9 @@ Proceedings of TextGraphs-9: the workshop on Graph-based Methods for Natural Language Processing W14-37 V.G.VinodVydiswaran - AmarnagSubramanya + AmarnagSubramanya GaborMelli - IrinaMatveeva + IrinaMatveeva 10.3115/v1/W14-37 Association for Computational Linguistics
Doha, Qatar
@@ -6611,7 +6611,7 @@
A Novel Two-stage Framework for Extracting Opinionated Sentences from News Articles - PujariRajkumar + PujariRajkumar SwaraDesai NiloyGanguly PawanGoyal @@ -6631,7 +6631,7 @@ Semi-supervised Graph-based Genre Classification for Web Pages - NoushinRezapour Asheghi + NoushinRezapour Asheghi KatjaMarkert SergeSharoff 39–47 @@ -6650,7 +6650,7 @@ From Visualisation to Hypothesis Construction for Second Language Acquisition - ShervinMalmasi + ShervinMalmasi MarkDras 56–64 W14-3708 @@ -6662,8 +6662,8 @@ Proceedings of the First Workshop on Computational Approaches to Code Switching W14-39 - MonaDiab - JuliaHirschberg + MonaDiab + JuliaHirschberg PascaleFung ThamarSolorio 10.3115/v1/W14-39 @@ -6682,7 +6682,7 @@ RamyEskander MohamedAl-Badrashiny NizarHabash - OwenRambow + OwenRambow 1–12 W14-3901 10.3115/v1/W14-3901 @@ -6770,8 +6770,8 @@ The <fixed-case>CMU</fixed-case> Submission for the Shared Task on Language Identification in Code-Switched Data Chu-ChengLin WaleedAmmar - LoriLevin - ChrisDyer + LoriLevin + ChrisDyer 80–86 W14-3909 10.3115/v1/W14-3909 @@ -6780,7 +6780,7 @@ Language Identification in Code-Switching Scenario NamanJain - Riyaz AhmadBhat + Riyaz AhmadBhat 87–93 W14-3910 10.3115/v1/W14-3910 @@ -6801,8 +6801,8 @@ LeviKing EricBaucom TimurGilmanov - SandraKübler - DanWhyatt + SandraKübler + DanWhyatt WolfgangMaier PaulRodrigues 102–106 @@ -6833,7 +6833,7 @@ <fixed-case>DCU</fixed-case>-<fixed-case>UVT</fixed-case>: Word-Level Language Classification with Code-Mixed Data UtsabBarman JoachimWagner - GrzegorzChrupała + GrzegorzChrupała JenniferFoster 127–132 W14-3915 @@ -6842,7 +6842,7 @@ Incremental N-gram Approach for Language Identification in Code-Switched Text - PrajwolShrestha + PrajwolShrestha 133–138 W14-3916 10.3115/v1/W14-3916 @@ -6865,7 +6865,7 @@ DekaiWu MarineCarpuat XavierCarreras - Eva MariaVecchi + Eva MariaVecchi 10.3115/v1/W14-40 Association for Computational Linguistics
Doha, Qatar
@@ -6881,7 +6881,7 @@ Vector Space Models for Phrase-based Machine Translation TamerAlkhouli AndreasGuta - HermannNey + HermannNey 1–10 W14-4001 10.3115/v1/W14-4001 @@ -6890,7 +6890,7 @@ Bilingual <fixed-case>M</fixed-case>arkov Reordering Labels for Hierarchical <fixed-case>SMT</fixed-case> GideonMaillette de Buy Wenniger - KhalilSima’an + KhalilSima’an 11–21 W14-4002 10.3115/v1/W14-4002 @@ -6912,7 +6912,7 @@ YutoHatakoshi GrahamNeubig SakrianiSakti - TomokiToda + TomokiToda SatoshiNakamura 34–42 W14-4004 @@ -6933,8 +6933,8 @@ Reducing the Impact of Data Sparsity in Statistical Machine Translation KaranSingla KunalSachdeva - SrinivasBangalore - Dipti MisraSharma + SrinivasBangalore + Dipti MisraSharma DikshaYadav 51–56 W14-4006 @@ -7023,10 +7023,10 @@ Word’s Vector Representations meet Machine Translation - EvaMartínez Garcia - JörgTiedemann + EvaMartínez Garcia + JörgTiedemann CristinaEspaña-Bonet - LluísMàrquez + LluísMàrquez 132–134 W14-4015 10.3115/v1/W14-4015 @@ -7034,8 +7034,8 @@ Context Sense Clustering for Translation - JoãoCasteleiro - GabrielLopes + JoãoCasteleiro + GabrielLopes JoaquimSilva 135–137 W14-4016 @@ -7045,7 +7045,7 @@ Evaluating Word Order Recursively over Permutation-Forests MilošStanojević - KhalilSima’an + KhalilSima’an 138–147 W14-4017 10.3115/v1/W14-4017 @@ -7064,7 +7064,7 @@ How Synchronous are Adjuncts in Translation Data? SophieArnoult - KhalilSima’an + KhalilSima’an 157–165 W14-4019 10.3115/v1/W14-4019 @@ -7075,7 +7075,7 @@ Proceedings of the EMNLP 2014 Workshop on Analysis of Large Scale Social Interaction in MOOCs W14-41 - CarolynRose + CarolynRose GeorgeSiemens 10.3115/v1/W14-41 Association for Computational Linguistics @@ -7198,7 +7198,7 @@ Proceedings of the EMNLP’2014 Workshop on Language Technology for Closely Related Languages and Language Variants W14-42 - PreslavNakov + PreslavNakov PetyaOsenova CristinaVertan 10.3115/v1/W14-42 @@ -7233,8 +7233,8 @@ Cross-lingual Dependency Parsing of Related Languages with Rich Morphosyntactic Tagsets - ŽeljkoAgić - JörgTiedemann + ŽeljkoAgić + JörgTiedemann DanijelaMerkler SimonKrek KajaDobrovoljc @@ -7263,12 +7263,12 @@ Adapting Predicate Frames for <fixed-case>U</fixed-case>rdu <fixed-case>P</fixed-case>rop<fixed-case>B</fixed-case>anking - Riyaz AhmadBhat + Riyaz AhmadBhat NamanJain AshwiniVaidya - MarthaPalmer - TafseerAhmed Khan - Dipti MisraSharma + MarthaPalmer + TafseerAhmed Khan + Dipti MisraSharma JamesBabani 47–55 W14-4206 @@ -7295,7 +7295,7 @@ Proper Name Machine Translation from <fixed-case>J</fixed-case>apanese to <fixed-case>J</fixed-case>apanese <fixed-case>S</fixed-case>ign <fixed-case>L</fixed-case>anguage TaroMiyazaki - NaotoKato + NaotoKato SeikiInoue ShuichiUmeda MakikoAzuma @@ -7308,7 +7308,7 @@ Exploring cross-language statistical machine translation for closely related <fixed-case>S</fixed-case>outh <fixed-case>S</fixed-case>lavic languages - MajaPopović + MajaPopović NikolaLjubešić 76–84 W14-4210 @@ -7321,8 +7321,8 @@ AnupamSingh NishkarshShastri MeghaJhunjhunwala - SrinivasBangalore - Dipti MisraSharma + SrinivasBangalore + Dipti MisraSharma 85–91 W14-4211 10.3115/v1/W14-4211 @@ -7330,7 +7330,7 @@ A Comparison of <fixed-case>MT</fixed-case> Methods for Closely Related Languages: a Case Study on <fixed-case>C</fixed-case>zech - <fixed-case>S</fixed-case>lovak Language Pair - VladislavKuboň + VladislavKuboň JernejVičič 92–98 W14-4212 @@ -7341,7 +7341,7 @@ Handling <fixed-case>OOV</fixed-case> Words in Dialectal <fixed-case>A</fixed-case>rabic to <fixed-case>E</fixed-case>nglish Machine Translation MaryamAminian MahmoudGhoneim - MonaDiab + MonaDiab 99–108 W14-4213 10.3115/v1/W14-4213 @@ -7354,7 +7354,7 @@ W14-43 KallirroiGeorgila MatthewStone - HelenHastie + HelenHastie AniNenkova 10.3115/v1/W14-43 Association for Computational Linguistics @@ -7370,7 +7370,7 @@ <fixed-case>K</fixed-case>eynote: Statistical Approaches to Open-domain Spoken Dialogue Systems - SteveYoung + SteveYoung 1 W14-4301 10.3115/v1/W14-4301 @@ -7422,7 +7422,7 @@ Adapting to Personality Over Time: Examining the Effectiveness of Dialogue Policy Progressions in Task-Oriented Interaction AlexandriaVail - KristyBoyer + KristyBoyer 41–50 W14-4306 10.3115/v1/W14-4306 @@ -7472,8 +7472,8 @@ <fixed-case>A</fixed-case>lex: Bootstrapping a Spoken Dialogue System for a New Domain by Real Users OndřejDušek OndřejPlátek - Lukᚎilka - FilipJurčíček + Lukᚎilka + FilipJurčíček 79–83 W14-4311 10.3115/v1/W14-4311 @@ -7481,7 +7481,7 @@ <fixed-case>I</fixed-case>npro<fixed-case>TK</fixed-case>s: A Toolkit for Incremental Situated Processing - CaseyKennington + CaseyKennington SpyrosKousidis DavidSchlangen 84–88 @@ -7495,7 +7495,7 @@ ShaohuaYang YuCheng YunyiJia - JoyceChai + JoyceChai NingXi 89–97 W14-4313 @@ -7506,7 +7506,7 @@ An easy method to make dialogue systems incremental HatimKhouzaimi RomainLaroche - FabriceLefevre + FabriceLefevre 98–107 W14-4314 10.3115/v1/W14-4314 @@ -7515,7 +7515,7 @@ Free on-line speech recogniser based on <fixed-case>K</fixed-case>aldi <fixed-case>ASR</fixed-case> toolkit producing word posterior lattices OndřejPlátek - FilipJurčíček + FilipJurčíček 108–112 W14-4315 10.3115/v1/W14-4315 @@ -7524,7 +7524,7 @@ Combining Task and Dialogue Streams in Unsupervised Dialogue Act Models AysuEzen-Can - KristyBoyer + KristyBoyer 113–122 W14-4316 10.3115/v1/W14-4316 @@ -7532,7 +7532,7 @@ Dialogue Act Modeling for Non-Visual Web Access - VikasAshok + VikasAshok YevgenBorodin SvetlanaStoyanchev IVRamakrishnan @@ -7569,8 +7569,8 @@ The Role of Polarity in Inferring Acceptance and Rejection in Dialogue - JulianSchlöder - RaquelFernández + JulianSchlöder + RaquelFernández 151–160 W14-4321 10.3115/v1/W14-4321 @@ -7579,7 +7579,7 @@ In-depth Exploitation of Noun and Verb Semantics to Identify Causation in Verb-Noun Pairs MehwishRiaz - RoxanaGirju + RoxanaGirju 161–170 W14-4322 10.3115/v1/W14-4322 @@ -7590,7 +7590,7 @@ ReidSwanson ElaheRahimtoroghi ThomasCorcoran - MarilynWalker + MarilynWalker 171–180 W14-4323 10.3115/v1/W14-4323 @@ -7598,8 +7598,8 @@ Evaluating a Spoken Dialogue System that Detects and Adapts to User Affective States - DianeLitman - KatherineForbes-Riley + DianeLitman + KatherineForbes-Riley 181–185 W14-4324 10.3115/v1/W14-4324 @@ -7608,7 +7608,7 @@ Initiative Taking in Negotiation ElnazNouri - DavidTraum + DavidTraum 186–193 W14-4325 10.3115/v1/W14-4325 @@ -7617,7 +7617,7 @@ Knowledge Acquisition Strategies for Goal-Oriented Dialog Systems AasishPappu - AlexanderRudnicky + AlexanderRudnicky 194–198 W14-4326 10.3115/v1/W14-4326 @@ -7653,7 +7653,7 @@ Aspectual Properties of Conversational Activities - Rebecca J.Passonneau + Rebecca J.Passonneau BoxuanGuan Cho HoYeung YuanDu @@ -7665,12 +7665,12 @@ Detecting Inappropriate Clarification Requests in Spoken Dialogue Systems - AlexLiu + AlexLiu RoseSloan Mei-VernThen SvetlanaStoyanchev - JuliaHirschberg - ElizabethShriberg + JuliaHirschberg + ElizabethShriberg 238–242 W14-4331 10.3115/v1/W14-4331 @@ -7689,7 +7689,7 @@ <fixed-case>SAWDUST</fixed-case>: a Semi-Automated Wizard Dialogue Utterance Selection Tool for domain-independent large-domain dialogue SudeepGandhe - DavidTraum + DavidTraum 251–253 W14-4333 10.3115/v1/W14-4333 @@ -7701,7 +7701,7 @@ DavidDeVault KallirroiGeorgila RonArtstein - DavidTraum + DavidTraum Louis-PhilippeMorency 254–256 W14-4334 @@ -7710,16 +7710,16 @@ <fixed-case>MVA</fixed-case>: The Multimodal Virtual Assistant - MichaelJohnston + MichaelJohnston JohnChen PatrickEhlen HyuckchulJung JayLieske AarthiReddy - EthanSelfridge + EthanSelfridge SvetlanaStoyanchev BrantVasilieff - JayWilpon + JayWilpon 257–259 W14-4335 10.3115/v1/W14-4335 @@ -7734,8 +7734,8 @@ CatherineBreslin HeribertoCuayáhuitl NinaDethlefs - MilicaGašić - JamesHenderson + MilicaGašić + JamesHenderson OliverLemon XingkunLiu PeterMika @@ -7743,11 +7743,11 @@ TimPotter VerenaRieser BlaiseThomson - PirrosTsiakoulis + PirrosTsiakoulis YvesVanrompay BorisVillazon-Terrazas MajidYazdani - SteveYoung + SteveYoung YanchaoYu 260–262 W14-4336 @@ -7758,7 +7758,7 @@ The Second Dialog State Tracking Challenge MatthewHenderson BlaiseThomson - Jason D.Williams + Jason D.Williams 263–272 W14-4337 10.3115/v1/W14-4337 @@ -7777,7 +7777,7 @@ Web-style ranking and <fixed-case>SLU</fixed-case> combination for dialog state tracking - Jason D.Williams + Jason D.Williams 282–291 W14-4339 10.3115/v1/W14-4339 @@ -7787,7 +7787,7 @@ Word-Based Dialog State Tracking with Recurrent Neural Networks MatthewHenderson BlaiseThomson - SteveYoung + SteveYoung 292–299 W14-4340 10.3115/v1/W14-4340 @@ -7795,7 +7795,7 @@ Comparative Error Analysis of Dialog State Tracking - RonnieSmith + RonnieSmith 300–309 W14-4341 10.3115/v1/W14-4341 @@ -7803,7 +7803,7 @@ Extrinsic Evaluation of Dialog State Tracking and Predictive Metrics for Dialog Policy Optimization - SungjinLee + SungjinLee 310–317 W14-4342 10.3115/v1/W14-4342 @@ -7833,7 +7833,7 @@ Sequential Labeling for Tracking Dynamic Dialog States SeokhwanKim - Rafael E.Banchs + Rafael E.Banchs 332–336 W14-4345 10.3115/v1/W14-4345 @@ -7845,7 +7845,7 @@ Proceedings of the 8th International Natural Language Generation Conference (INLG) W14-44 MargaretMitchell - KathleenMcCoy + KathleenMcCoy DavidMcDonald AoifeCahill 10.3115/v1/W14-44 @@ -7862,7 +7862,7 @@ A Case Study: <fixed-case>NLG</fixed-case> meeting Weather Industry Demand for Quality and Quantity of Textual Weather Forecasts - SomayajuluSripada + SomayajuluSripada NeilBurnett RossTurner JohnMastin @@ -7875,10 +7875,10 @@ <fixed-case>P</fixed-case>atient<fixed-case>N</fixed-case>arr: Towards generating patient-centric summaries of hospital stays BarbaraDi Eugenio - AndrewBoyd + AndrewBoyd CamilloLugaresi AbhinayaBalasubramanian - GailKeenan + GailKeenan MikeBurton TamaraGoncalves Rezende Macieira JianrongLi @@ -7900,7 +7900,7 @@ Text simplification using synchronous dependency grammars: Generalising automatically harvested rules - MandyaAngrosh + MandyaAngrosh AdvaithSiddharthan 16–25 W14-4404 @@ -7931,7 +7931,7 @@ TatsuroOya YasharMehdad GiuseppeCarenini - RaymondNg + RaymondNg 45–53 W14-4407 10.3115/v1/W14-4407 @@ -7939,9 +7939,9 @@ A Hybrid Approach to Multi-document Summarization of Opinions in Reviews - GiuseppeDi Fabbrizio - AmandaStent - RobertGaizauskas + GiuseppeDi Fabbrizio + AmandaStent + RobertGaizauskas 54–63 W14-4408 10.3115/v1/W14-4408 @@ -7951,7 +7951,7 @@ Adapting Graph Summaries to the Users’ Reading Levels PriscillaMoraes KathleenMcCoy - SandraCarberry + SandraCarberry 64–73 W14-4409 10.3115/v1/W14-4409 @@ -7959,7 +7959,7 @@ Experimental Design to Improve Topic Analysis Based Summarization - JohnMiller + JohnMiller KathleenMcCoy 74–82 W14-4410 @@ -7969,7 +7969,7 @@ Towards a Description of Symbolic Maps RumiyaIzgalieva - DanielVale + DanielVale ElisaVales 83–92 W14-4411 @@ -7979,7 +7979,7 @@ Adapting <fixed-case>S</fixed-case>imple<fixed-case>NLG</fixed-case> for <fixed-case>B</fixed-case>razilian <fixed-case>P</fixed-case>ortuguese realisation Rodrigode Oliveira - SomayajuluSripada + SomayajuluSripada 93–94 W14-4412 10.3115/v1/W14-4412 @@ -7990,7 +7990,7 @@ PriscillaMoraes GabrielSina KathleenMcCoy - SandraCarberry + SandraCarberry 95–98 W14-4413 10.3115/v1/W14-4413 @@ -7999,7 +7999,7 @@ Two-Stage Stochastic Email Synthesizer Yun-NungChen - AlexanderRudnicky + AlexanderRudnicky 99–102 W14-4414 10.3115/v1/W14-4414 @@ -8009,7 +8009,7 @@ A Framework for Health Behavior Change using Companionable Robots BanditaSarma AmitavaDas - RodneyNielsen + RodneyNielsen 103–107 W14-4415 10.3115/v1/W14-4415 @@ -8028,9 +8028,9 @@ Determining Content for Unknown Users: Lessons from the <fixed-case>M</fixed-case>ink<fixed-case>A</fixed-case>pp Case Study GemmaWebster - ChrisMellish - Somayajulu G.Sripada - ReneVan Der Wal + ChrisMellish + Somayajulu G.Sripada + ReneVan Der Wal KoenArts YolandaMelero XavierLambin @@ -8074,7 +8074,7 @@ Latent User Models for Online River Information Tailoring XiwuHan - SomayajuluSripada + SomayajuluSripada KitMacleod AntonioIoris 133–137 @@ -8085,7 +8085,7 @@ Multi-adaptive Natural Language Generation using Principal Component Regression DimitraGkatzia - HelenHastie + HelenHastie OliverLemon 138–142 W14-4422 @@ -8094,8 +8094,8 @@ <fixed-case>TBI</fixed-case>-Doc: Generating Patient & Clinician Reports from Brain Imaging Data - PamelaJordan - NancyGreen + PamelaJordan + NancyGreen ChistopherThomas SusanHolm 143–146 @@ -8105,7 +8105,7 @@ Towards Surface Realization with <fixed-case>CCG</fixed-case>s Induced from Dependencies - MichaelWhite + MichaelWhite 147–151 W14-4424 10.3115/v1/W14-4424 @@ -8114,7 +8114,7 @@ Two-Stage Stochastic Natural Language Generation for Email Synthesis by Modeling Sender Style and Topic Structure Yun-NungChen - AlexanderRudnicky + AlexanderRudnicky 152–156 W14-4425 10.3115/v1/W14-4425 @@ -8150,7 +8150,7 @@ Mining temporal footprints from <fixed-case>W</fixed-case>ikipedia MicheleFilannino - GoranNenadic + GoranNenadic 7–13 W14-4502 10.3115/v1/W14-4502 @@ -8327,7 +8327,7 @@ gdbank: The beginnings of a corpus of dependency structures and type-logical grammar in <fixed-case>S</fixed-case>cottish <fixed-case>G</fixed-case>aelic - ColinBatchelor + ColinBatchelor 60–65 W14-4609 10.3115/v1/W14-4609 @@ -8355,7 +8355,7 @@ Subsegmental language detection in <fixed-case>C</fixed-case>eltic language text AkshayMinocha - FrancisTyers + FrancisTyers 76–80 W14-4612 10.3115/v1/W14-4612 @@ -8401,7 +8401,7 @@ Deep Learning from Web-Scale Corpora for Better Dictionary Interfaces - PavelSmrz + PavelSmrz LubomirOtrusina 22–30 W14-4703 @@ -8411,8 +8411,8 @@ Exploring the use of word embeddings and random walks on <fixed-case>W</fixed-case>ikipedia for the <fixed-case>C</fixed-case>og<fixed-case>A</fixed-case>lex shared task JosuGoikoetxea - EnekoAgirre - AitorSoroa + EnekoAgirre + AitorSoroa 31–34 W14-4704 10.3115/v1/W14-4704 @@ -8452,7 +8452,7 @@ Retrieving Word Associations with a Simple Neighborhood Algorithm in a Graph-based Resource - GemmaBel-Enguix + GemmaBel-Enguix 60–63 W14-4708 10.3115/v1/W14-4708 @@ -8469,8 +8469,8 @@ <fixed-case>W</fixed-case>ord<fixed-case>F</fixed-case>inder - CatalinMititelu - VerginicaBarbu Mititelu + CatalinMititelu + VerginicaBarbu Mititelu 68–74 W14-4710 10.3115/v1/W14-4710 @@ -8495,10 +8495,10 @@ Jibiki-<fixed-case>LINKS</fixed-case>: a tool between traditional dictionaries and lexical networks for modelling lexical resources - YingZhang - MathieuMangeot - ValérieBellynck - ChristianBoitet + YingZhang + MathieuMangeot + ValérieBellynck + ChristianBoitet 87–98 W14-4713 10.3115/v1/W14-4713 @@ -8526,7 +8526,7 @@ A Computational Approach to Generate a Sensorial Lexicon Serra SinemTekiroğlu - GözdeÖzbal + GözdeÖzbal CarloStrapparava 114–125 W14-4716 @@ -8535,7 +8535,7 @@ Database Design of an Online <fixed-case>E</fixed-case>-Learning Tool of <fixed-case>C</fixed-case>hinese Classifiers - HelenaGao + HelenaGao 126–137 W14-4717 10.3115/v1/W14-4717 @@ -8545,7 +8545,7 @@ Default Physical Measurements in <fixed-case>SUMO</fixed-case> FrancescaQuattri AdamPease - John P.McCrae + John P.McCrae 138–143 W14-4718 10.3115/v1/W14-4718 @@ -8565,7 +8565,7 @@ A Lexical Network with a Morphological Model in It NabilGader AuroreKoehl - AlainPolguère + AlainPolguère 154–165 W14-4720 10.3115/v1/W14-4720 @@ -8575,7 +8575,7 @@ Dimensions of Metaphorical Meaning AndrewGargett JosefRuppenhofer - JohnBarnden + JohnBarnden 166–173 W14-4721 10.3115/v1/W14-4721 @@ -8592,7 +8592,7 @@ Frames and terminology: representing predicative terms in the field of the environment - Marie-ClaudeL’ Homme + Marie-ClaudeL’ Homme BenoîtRobichaud 186–197 W14-4723 @@ -8601,10 +8601,10 @@ Modelling the Semantics of Adjectives in the Ontology-Lexicon Interface - John P.McCrae + John P.McCrae FrancescaQuattri ChristinaUnger - PhilippCimiano + PhilippCimiano 198–209 W14-4724 10.3115/v1/W14-4724 @@ -8613,14 +8613,14 @@ Discovering Conceptual Metaphors using Source Domain Spaces SamiraShaikh - TomekStrzalkowski + TomekStrzalkowski KitCho TingLiu - George AaronBroadwell - LaurieFeldman - SarahTaylor + George AaronBroadwell + LaurieFeldman + SarahTaylor BorisYamrom - Ching-ShengLin + Ching-ShengLin NingSa IgnacioCases YuliyaPeshkova @@ -8645,7 +8645,7 @@ W14-48 PatrickDrouin NataliaGrabar - ThierryHamon + ThierryHamon KyoKageura 10.3115/v1/W14-48 Association for Computational Linguistics and Dublin City University @@ -8669,9 +8669,9 @@ Assigning Terms to Domains by Document Classification - RobertGaizauskas + RobertGaizauskas EmmaBarker - Monica LestariParamita + Monica LestariParamita AhmetAker 11–21 W14-4802 @@ -8680,8 +8680,8 @@ Identification of Bilingual Terms from Monolingual Documents for Statistical Machine Translation - MihaelArcan - ClaudioGiuliano + MihaelArcan + ClaudioGiuliano MarcoTurchi PaulBuitelaar 22–31 @@ -8691,7 +8691,7 @@ Terminology Questions in Texts Authored by Patients - NoemieElhadad + NoemieElhadad 32 W14-4804 10.3115/v1/W14-4804 @@ -8699,7 +8699,7 @@ <fixed-case>NPMI</fixed-case> Driven Recognition of Nested Terms - MalgorzataMarciniak + MalgorzataMarciniak AgnieszkaMykowiecka 33–41 W14-4805 @@ -8718,7 +8718,7 @@ The <fixed-case>ACL</fixed-case> <fixed-case>RD</fixed-case>-<fixed-case>TEC</fixed-case>: A Dataset for Benchmarking Terminology Extraction and Classification in Computational Linguistics - BehrangQ. Zadeh + BehrangQ. Zadeh SiegfriedHandschuh 52–63 W14-4807 @@ -8737,7 +8737,7 @@ A comparative User Evaluation of Terminology Management Tools for Interpreters HernaniCosta - GloriaCorpas Pastor + GloriaCorpas Pastor IsabelDurán Muñoz 68–76 W14-4809 @@ -8797,7 +8797,7 @@ Proceedings of LAW VIII - The 8th Linguistic Annotation Workshop W14-49 - LoriLevin + LoriLevin ManfredStede 10.3115/v1/W14-49 Association for Computational Linguistics and Dublin City University @@ -8847,7 +8847,7 @@ Sentence diagrams: their evaluation and combination JirkaHana - BarboraHladká + BarboraHladká IvanaLukšová 38–47 W14-4905 @@ -8858,7 +8858,7 @@ Finding your “Inner-Annotator”: An Experiment in Annotator Independence for Rating Discourse Coherence Quality in Essays JillBurstein SwapnaSomasundaran - MartinChodorow + MartinChodorow 48–53 W14-4906 10.3115/v1/W14-4906 @@ -8868,7 +8868,7 @@ Optimizing annotation efforts to build reliable annotated corpora for training statistical models CyrilGrouin ThomasLavergne - AurélieNévéol + AurélieNévéol 54–58 W14-4907 10.3115/v1/W14-4907 @@ -8876,7 +8876,7 @@ A Web-based Geo-resolution Annotation and Evaluation Tool - BeatriceAlex + BeatriceAlex KateByrne ClaireGrover RichardTobin @@ -8888,7 +8888,7 @@ Annotating Uncertainty in <fixed-case>H</fixed-case>ungarian Webtext VeronikaVincze - Katalin IlonaSimkó + Katalin IlonaSimkó ViktorVarga 64–69 W14-4909 @@ -8900,7 +8900,7 @@ PaulReisert JuntaMizuno MiwaKanno - NaoakiOkazaki + NaoakiOkazaki KentaroInui 70–74 W14-4910 @@ -8959,7 +8959,7 @@ Annotating Discourse Connectives in Spoken <fixed-case>T</fixed-case>urkish - IsinDemirşahin + IsinDemirşahin DenizZeyrek 105–109 W14-4916 @@ -8989,11 +8989,11 @@ Towards Automatic Annotation of Clinical Decision-Making Style LimorHochberg - CeciliaOvesdotter Alm + CeciliaOvesdotter Alm Esa M.Rantanen QiYu Caroline M.DeLong - AnneHaake + AnneHaake 129–138 W14-4919 10.3115/v1/W14-4919 @@ -9002,7 +9002,7 @@ Interactive Annotation for Event Modality in Modern Standard and <fixed-case>E</fixed-case>gyptian <fixed-case>A</fixed-case>rabic Tweets RaniaAl-Sabbagh - RoxanaGirju + RoxanaGirju JanaDiesner 139–148 W14-4920 @@ -9021,7 +9021,7 @@ Focus Annotation in Reading Comprehension Data RamonZiai - DetmarMeurers + DetmarMeurers 159–168 W14-4922 10.3115/v1/W14-4922 @@ -9033,7 +9033,7 @@ Proceedings of the INLG and SIGDIAL 2014 Joint Session W14-50 MargaretMitchell - KathleenMcCoy + KathleenMcCoy DavidMcDonald AoifeCahill 10.3115/v1/W14-50 @@ -9069,7 +9069,7 @@ Crowdsourcing Language Generation Templates for Dialogue Systems MargaretMitchell - DanBohus + DanBohus EceKamar 172–180 W14-5003 @@ -9081,9 +9081,9 @@ Proceedings of the 11th International Conference on Natural Language Processing W14-51 - Dipti MisraSharma + Dipti MisraSharma RajeevSangal - Jyoti D.Pawar + Jyoti D.Pawar NLP Association of India
Goa, India
December @@ -9096,7 +9096,7 @@ Keynote Lecture 1: Complexity of Dependency Representations for Natural Languages - Aravind K.Joshi + Aravind K.Joshi 1 W14-5101 joshi-2014-keynote @@ -9105,7 +9105,7 @@ <fixed-case>SMT</fixed-case> from Agglutinative Languages: Use of Suffix Separation and Word Splitting Prakash B.Pimpale Raj NathPatel - SasikumarM. + SasikumarM. 2–10 W14-5102 pimpale-etal-2014-smt @@ -9114,7 +9114,7 @@ Tackling Close Cousins: Experiences In Developing Statistical Machine Translation Systems For <fixed-case>M</fixed-case>arathi And <fixed-case>H</fixed-case>indi RajDabre JyoteshChoudhari - PushpakBhattacharyya + PushpakBhattacharyya 11–19 W14-5103 dabre-etal-2014-tackling @@ -9122,16 +9122,16 @@ Correlating decoding events with errors in Statistical Machine Translation EleftheriosAvramidis - MajaPopović + MajaPopović 20–29 W14-5104 avramidis-popovic-2014-correlating Supertag Based Pre-ordering in Machine Translation - RajenChatterjee + RajenChatterjee AnoopKunchukuttan - PushpakBhattacharyya + PushpakBhattacharyya 30–38 W14-5105 chatterjee-etal-2014-supertag @@ -9140,7 +9140,7 @@ Duration Modeling by Multi-Models based on Vowel Production characteristics V RamuReddy ParakrantSarkar - K SreenivasaRao + K SreenivasaRao 39–47 W14-5106 reddy-etal-2014-duration @@ -9174,7 +9174,7 @@ Keynote Lecture 2: Text Analysis for identifying Entities and their mentions in <fixed-case>I</fixed-case>ndian languages - SobhaL + SobhaL 68 W14-5110 l-2014-keynote @@ -9183,7 +9183,7 @@ <fixed-case>H</fixed-case>in<fixed-case>MA</fixed-case>: Distributed Morphology based <fixed-case>H</fixed-case>indi Morphological Analyzer AnkitBahuguna LavitaTalukdar - PushpakBhattacharyya + PushpakBhattacharyya SmritiSingh 69–75 W14-5111 @@ -9191,7 +9191,7 @@ Roles of Nominals in Construing Meaning at the Level of Discourse - Soumya SankarGhosh + Soumya SankarGhosh SamirKarmakar 76–81 W14-5112 @@ -9201,7 +9201,7 @@ Anou Tradir: Experiences In Building Statistical Machine Translation Systems For Mauritian Languages – Creole, <fixed-case>E</fixed-case>nglish, <fixed-case>F</fixed-case>rench RajDabre AneeravSukhoo - PushpakBhattacharyya + PushpakBhattacharyya 82–88 W14-5113 dabre-etal-2014-anou @@ -9211,9 +9211,9 @@ SantanuPal Braja GopalPatra DipankarDas - Sudip KumarNaskar - SivajiBandyopadhyay - Josefvan Genabith + Sudip KumarNaskar + SivajiBandyopadhyay + Josefvan Genabith 89–94 W14-5114 pal-etal-2014-sentiment @@ -9224,8 +9224,8 @@ JaiParanjape NileshJoshi IrawatiKulkarni - MalharKulkarni - PushpakBhattacharyya + MalharKulkarni + PushpakBhattacharyya 95–100 W14-5115 redkar-etal-2014-introduction @@ -9234,16 +9234,16 @@ <fixed-case>LMS</fixed-case>im : Computing Domain-specific Semantic Word Similarities Using a Language Modeling Approach SachinPawar SwapnilHingmire - Girish K.Palshikar + Girish K.Palshikar 101–106 W14-5116 pawar-etal-2014-lmsim Multiobjective Optimization and Unsupervised Lexical Acquisition for Named Entity Recognition and Classification - Govind + Govind AsifEkbal - ChrisBiemann + ChrisBiemann 107–112 W14-5117 govind-etal-2014-multiobjective @@ -9253,7 +9253,7 @@ Aswathy PV ArunGopi SajiniT - Bhadran VK + Bhadran VK 113–118 W14-5118 v-etal-2014-improving @@ -9305,15 +9305,15 @@ A Sentiment Analyzer for <fixed-case>H</fixed-case>indi Using <fixed-case>H</fixed-case>indi Senti Lexicon RakshaSharma - PushpakBhattacharyya + PushpakBhattacharyya 150–155 W14-5124 sharma-bhattacharyya-2014-sentiment A Sandhi Splitter for <fixed-case>M</fixed-case>alayalam - Devadath VV - Litton JKurisinkel + Devadath VV + Litton JKurisinkel Dipti MisraSharma VasudevaVarma 156–161 @@ -9323,9 +9323,9 @@ <fixed-case>P</fixed-case>a<fixed-case>CM</fixed-case>an : Parallel Corpus Management Workbench DipteshKanojia - ManishShrivastava + ManishShrivastava RajDabre - PushpakBhattacharyya + PushpakBhattacharyya 162–166 W14-5126 kanojia-etal-2014-pacman @@ -9342,15 +9342,15 @@ A Domain-Restricted, Rule Based, <fixed-case>E</fixed-case>nglish-<fixed-case>H</fixed-case>indi Machine Translation System Based on Dependency Parsing PratikDesai AmitSangodkar - Om P.Damani + Om P.Damani 177–185 W14-5128 desai-etal-2014-domain Translation of <fixed-case>TO</fixed-case> infinitives in Anusaaraka Platform: an <fixed-case>E</fixed-case>nglish <fixed-case>H</fixed-case>indi <fixed-case>MT</fixed-case> system - AksharBharati - Sukhada + AksharBharati + Sukhada SomaPaul 186–195 W14-5129 @@ -9368,7 +9368,7 @@ Naturalistic Audio-Visual Emotion Database Sudarsana ReddyKadiri P.Gangamohan - V.K.Mittal + V.K.Mittal B.Yegnanarayana 206–213 W14-5131 @@ -9385,7 +9385,7 @@ Keynote Lecture 3: Modeling <fixed-case>N</fixed-case>on-<fixed-case>P</fixed-case>ropositional Semantics - LoriLevin + LoriLevin 222 W14-5133 levin-2014-keynote @@ -9401,7 +9401,7 @@ <fixed-case>S</fixed-case>angam: A <fixed-case>P</fixed-case>erso-<fixed-case>A</fixed-case>rabic to <fixed-case>I</fixed-case>ndic Script Machine Transliteration Model - Gurpreet SinghLehal + Gurpreet SinghLehal Tejinder SinghSaini 232–239 W14-5135 @@ -9412,7 +9412,7 @@ ShilpaDesai NeenadDesai JyotiPawar - PushpakBhattacharyya + PushpakBhattacharyya 240–248 W14-5136 desai-etal-2014-autoparse @@ -9436,7 +9436,7 @@ Accurate Identification of the Karta (Subject) Relation in <fixed-case>B</fixed-case>angla - ArnabDhar + ArnabDhar SudeshnaSarkar 267–276 W14-5139 @@ -9447,7 +9447,7 @@ KishorjitNongmeikapam Thiyam IbungomachaSingh Ngariyanbam MayekleimaChanu - SivajiBandyopadhyay + SivajiBandyopadhyay 277–286 W14-5140 nongmeikapam-etal-2014-manipuri @@ -9514,7 +9514,7 @@ SudhaBhingardive RatishPuduppully DhirendraSingh - PushpakBhattacharyya + PushpakBhattacharyya 344–352 W14-5148 bhingardive-etal-2014-merging @@ -9522,7 +9522,7 @@ Hierarchical Recursive Tagset for Annotating Cooking Recipes Sharath ReddyGunamgari - SandipanDandapat + SandipanDandapat MonojitChoudhury 353–361 W14-5149 @@ -9548,14 +9548,14 @@ Identifying Languages at the Word Level in Code-Mixed <fixed-case>I</fixed-case>ndian Social Media Text AmitavaDas - BjörnGambäck + BjörnGambäck 378–387 W14-5152 das-gamback-2014-identifying Unsupervised Detection and Promotion of Authoritative Domains for Medical Queries in Web Search - Manoj K.Chinnakotla + Manoj K.Chinnakotla Rupesh K.Mehta VipulAgrawal 388–394 @@ -9577,7 +9577,7 @@ Proceedings of the Workshop on Open Infrastructures and Analysis Frameworks for HLT W14-52 - NancyIde + NancyIde JensGrivolla 10.3115/v1/W14-52 Association for Computational Linguistics and Dublin City University @@ -9601,10 +9601,10 @@ Integrating <fixed-case>UIMA</fixed-case> with Alveo, a human communication science virtual laboratory - DominiqueEstival + DominiqueEstival SteveCassidy - KarinVerspoor - AndrewMacKinlay + KarinVerspoor + AndrewMacKinlay DenisBurnham 12–22 W14-5202 @@ -9624,7 +9624,7 @@ The Language Application Grid Web Service Exchange Vocabulary NancyIde - JamesPustejovsky + JamesPustejovsky KeithSuderman MarcVerhagen 34–43 @@ -9637,7 +9637,7 @@ TadayoshiHara GoranTopić YusukeMiyao - AkikoAizawa + AkikoAizawa 44–52 W14-5205 10.3115/v1/W14-5205 @@ -9646,7 +9646,7 @@ A Conceptual Framework of Online Natural Language Processing Pipeline Application ChunqiShi - JamesPustejovsky + JamesPustejovsky MarcVerhagen 53–59 W14-5206 @@ -9657,7 +9657,7 @@ Command-line utilities for managing and exploring annotated corpora JoelNothman TimDawborn - James R.Curran + James R.Curran 60–65 W14-5207 10.3115/v1/W14-5207 @@ -9665,10 +9665,10 @@ <fixed-case>SSF</fixed-case>: A Common Representation Scheme for Language Analysis for Language Technology Infrastructure Development - AksharBharati + AksharBharati RajeevSangal - DiptiMisra Sharma - AnilKumar Singh + DiptiMisra Sharma + AnilKumar Singh 66–76 W14-5208 10.3115/v1/W14-5208 @@ -9676,7 +9676,7 @@ Quo Vadis <fixed-case>UIMA</fixed-case>? - ThiloGötz + ThiloGötz JörnKottmann AlexanderLang 77–82 @@ -9698,8 +9698,8 @@ Intellectual Property Rights Management with Web Service Grids - ChristopherCieri - DeniseDiPersio + ChristopherCieri + DeniseDiPersio 93–100 W14-5211 10.3115/v1/W14-5211 @@ -9729,7 +9729,7 @@ MarcosZampieri LilingTan NikolaLjubešić - JörgTiedemann + JörgTiedemann 10.3115/v1/W14-53 Association for Computational Linguistics and Dublin City University
Dublin, Ireland
@@ -9780,7 +9780,7 @@
Morphological Disambiguation and Text Normalization for <fixed-case>S</fixed-case>outhern <fixed-case>Q</fixed-case>uechua Varieties - AnnetteRios Gonzales + AnnetteRios Gonzales Richard AlexanderCastro Mamani 39–47 W14-5305 @@ -9821,7 +9821,7 @@ Part-of-Speech Tag Disambiguation by Cross-Linguistic Majority Vote NoëmiAepli Ruprechtvon Waldenfels - TanjaSamardžić + TanjaSamardžić 76–84 W14-5309 10.3115/v1/W14-5309 @@ -9839,7 +9839,7 @@ Automatically building a <fixed-case>T</fixed-case>unisian Lexicon for Deverbal Nouns AhmedHamdi - NúriaGala + NúriaGala AlexisNasr 95–102 W14-5311 @@ -9858,9 +9858,9 @@ Improved Sentence-Level <fixed-case>A</fixed-case>rabic Dialect Classification - ChristophTillmann + ChristophTillmann SaabMansour - YaserAl-Onaizan + YaserAl-Onaizan 110–119 W14-5313 10.3115/v1/W14-5313 @@ -9868,7 +9868,7 @@ Using Maximum Entropy Models to Discriminate between Similar Languages and Varieties - JordiPorta + JordiPorta José-LuisSancho 120–128 W14-5314 @@ -9880,9 +9880,9 @@ MarcoLui NedLetcher OliverAdams - LongDuong + LongDuong PaulCook - TimothyBaldwin + TimothyBaldwin 129–138 W14-5315 10.3115/v1/W14-5315 @@ -9890,7 +9890,7 @@ The <fixed-case>NRC</fixed-case> System for Discriminating Similar Languages - CyrilGoutte + CyrilGoutte SergeLéger MarineCarpuat 139–145 @@ -9901,8 +9901,8 @@ Experiments in Sentence Language Identification with Groups of Similar Languages BenKing - DragomirRadev - StevenAbney + DragomirRadev + StevenAbney 146–154 W14-5317 10.3115/v1/W14-5317 @@ -9921,11 +9921,11 @@ Proceedings of the Third Workshop on Vision and Language W14-54 - AnjaBelz + AnjaBelz DarrenCosker FrankKeller WilliamSmith - KalinaBontcheva + KalinaBontcheva SienMoens AlanSmeaton 10.3115/v1/W14-54 @@ -9942,7 +9942,7 @@ The Effect of Sensor Errors in Situated Human-Computer Dialogue NielsSchütte - JohnKelleher + JohnKelleher BrianMac Namee 1–8 W14-5401 @@ -9953,7 +9953,7 @@ Joint Navigation in Commander/Robot Teams: Dialog & Task Performance When Vision is Bandwidth-Limited DouglasSummers-Stay TaylorCassidy - ClareVoss + ClareVoss 9–16 W14-5402 10.3115/v1/W14-5402 @@ -9963,7 +9963,7 @@ <fixed-case>TUHOI</fixed-case>: <fixed-case>T</fixed-case>rento Universal Human Object Interaction Dataset Dieu-ThuLe JasperUijlings - RaffaellaBernardi + RaffaellaBernardi 17–24 W14-5403 10.3115/v1/W14-5403 @@ -9983,7 +9983,7 @@ Exploration of functional semantics of prepositions from corpora of descriptions of visual scenes SimonDobnik - JohnKelleher + JohnKelleher 33–37 W14-5405 10.3115/v1/W14-5405 @@ -9994,7 +9994,7 @@ JosiahWang FeiYan AhmetAker - RobertGaizauskas + RobertGaizauskas 38–45 W14-5406 10.3115/v1/W14-5406 @@ -10003,8 +10003,8 @@ Key Event Detection in Video using <fixed-case>ASR</fixed-case> and Visual Data NirajShrestha - Aparna N.Venkitasubramanian - Marie-FrancineMoens + Aparna N.Venkitasubramanian + Marie-FrancineMoens 46–53 W14-5407 10.3115/v1/W14-5407 @@ -10024,7 +10024,7 @@ Semantic and geometric enrichment of 3<fixed-case>D</fixed-case> geo-spatial models with captioned photos and labelled illustrations - ChrisJones + ChrisJones PaulRosin JonathanSlade 62–67 @@ -10036,7 +10036,7 @@ Weakly supervised construction of a repository of iconic images LydiaWeiland WolfgangEffelsberg - Simone PaoloPonzetto + Simone PaoloPonzetto 68–73 W14-5410 10.3115/v1/W14-5410 @@ -10079,7 +10079,7 @@ FabriceMaurel Jean-MarcRoutoure PierreBeust - GaëlDias + GaëlDias 95–102 W14-5414 10.3115/v1/W14-5414 @@ -10117,7 +10117,7 @@ Coloring Objects: Adjective-Noun Visual Semantic Compositionality DatTien Nguyen AngelikiLazaridou - RaffaellaBernardi + RaffaellaBernardi 112–114 W14-5418 10.3115/v1/W14-5418 @@ -10144,7 +10144,7 @@ Keyphrase Extraction using Textual and Visual Features - YaakovHaCohen-Kerner + YaakovHaCohen-Kerner StefanosVrochidis DimitrisLiparas AnastasiaMoumtzidou @@ -10156,8 +10156,8 @@ Towards automatic annotation of communicative gesturing - KristiinaJokinen - GrahamWilcock + KristiinaJokinen + GrahamWilcock 124–125 W14-5422 10.3115/v1/W14-5422 @@ -10168,8 +10168,8 @@ Proceedings of the Fifth Workshop on South and Southeast Asian Natural Language Processing W14-55 - ChristianBoitet - M.G. AbbasMalik + ChristianBoitet + M.G. AbbasMalik 10.3115/v1/W14-55 Association for Computational Linguistics and Dublin City University
Dublin, Ireland
@@ -10202,7 +10202,7 @@ Integrating Dictionaries into an Unsupervised Model for <fixed-case>M</fixed-case>yanmar Word Segmentation YeKyaw Thu AndrewFinch - EiichiroSumita + EiichiroSumita YoshinoriSagisaka 20–27 W14-5503 @@ -10212,8 +10212,8 @@ A Framework for Learning Morphology using Suffix Association Matrix ShilpaDesai - JyotiPawar - PushpakBhattacharyya + JyotiPawar + PushpakBhattacharyya 28–36 W14-5504 10.3115/v1/W14-5504 @@ -10223,7 +10223,7 @@ <fixed-case>E</fixed-case>nglish to <fixed-case>U</fixed-case>rdu Statistical Machine Translation: Establishing a Baseline BushraJawaid AmirKamran - OndřejBojar + OndřejBojar 37–42 W14-5505 10.3115/v1/W14-5505 @@ -10251,7 +10251,7 @@ Developing an interlingual translation lexicon using <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>ets and Grammatical Framework Shafqat MumtazVirk - K.V.S.Prasad + K.V.S.Prasad AarneRanta KrasimirAngelov 55–64 @@ -10281,7 +10281,7 @@ Building <fixed-case>E</fixed-case>nglish-<fixed-case>V</fixed-case>ietnamese Named Entity Corpus with Aligned Bilingual News Articles Quoc HungNgo - DinhDien + DinhDien WernerWiniwarter 85–93 W14-5512 @@ -10313,7 +10313,7 @@ Proceedings of the Workshop on Automatic Text Simplification - Methods and Applications in the Multilingual Society (ATS-MA 2014) W14-56 - ConstantinOrasan + ConstantinOrasan PetyaOsenova CristinaVertan 10.3115/v1/W14-56 @@ -10330,7 +10330,7 @@ Relative clause extraction for syntactic simplification IustinDornescu - RichardEvans + RichardEvans ConstantinOrăsan 1–10 W14-5601 @@ -10341,7 +10341,7 @@ Making Biographical Data in <fixed-case>W</fixed-case>ikipedia Readable: A Pattern-based Multilingual Approach ItziarGonzalez-Dios María JesúsAranzabe - ArantzaDíaz de Ilarraza + ArantzaDíaz de Ilarraza 11–20 W14-5602 10.3115/v1/W14-5602 @@ -10352,7 +10352,7 @@ KshitijMishra AnkushSoni RahulSharma - DiptiSharma + DiptiSharma 21–29 W14-5603 10.3115/v1/W14-5603 @@ -10360,8 +10360,8 @@ The Fewer, the Better? A Contrastive Study about Ways to Simplify - RuslanMitkov - SanjaŠtajner + RuslanMitkov + SanjaŠtajner 30–40 W14-5604 10.3115/v1/W14-5604 @@ -10377,8 +10377,8 @@ Assessing Conformance of Manually Simplified Corpora with User Requirements: the Case of Autistic Readers - SanjaŠtajner - RichardEvans + SanjaŠtajner + RichardEvans IustinDornescu 53–63 W14-5606 @@ -10388,7 +10388,7 @@ Making historical texts accessible to everybody CristinaVertan - Walthervon Hahn + Walthervon Hahn 64–68 W14-5607 10.3115/v1/W14-5607 @@ -10400,9 +10400,9 @@ Proceedings of the First Workshop on Computational Approaches to Compound Analysis (ComAComA 2014) W14-57 BenVerhoeven - WalterDaelemans - Mennovan Zaanen - Gerhardvan Huyssteen + WalterDaelemans + Mennovan Zaanen + Gerhardvan Huyssteen 10.3115/v1/W14-57 Association for Computational Linguistics and Dublin City University
Dublin, Ireland
@@ -10417,7 +10417,7 @@ Modelling Regular Subcategorization Changes in <fixed-case>G</fixed-case>erman Particle Verbs StefanBott - SabineSchulte im Walde + SabineSchulte im Walde 1–10 W14-5701 10.3115/v1/W14-5701 @@ -10425,8 +10425,8 @@ Splitting of Compound Terms in non-Prototypical Compounding Languages - ElizavetaClouet - BéatriceDaille + ElizavetaClouet + BéatriceDaille 11–19 W14-5702 10.3115/v1/W14-5702 @@ -10455,7 +10455,7 @@ Electrophysiological correlates of noun-noun compound processing by non-native speakers of <fixed-case>E</fixed-case>nglish CecileDe Cat - HaraldBaayen + HaraldBaayen EkateriniKlepousniotou 41–52 W14-5705 @@ -10475,8 +10475,8 @@ <fixed-case>W</fixed-case>ordsyoudontknow: Evaluation of lexicon-based decompounding with unknown handling KarolinaOwczarzak Ferdinandde Haan - GeorgeKrupka - DonHindle + GeorgeKrupka + DonHindle 63–71 W14-5707 10.3115/v1/W14-5707 @@ -10484,7 +10484,7 @@ Multiword noun compound bracketing using <fixed-case>W</fixed-case>ikipedia - CarolineBarrière + CarolineBarrière Pierre AndréMénard 72–80 W14-5708 @@ -10495,9 +10495,9 @@ Distinguishing Degrees of Compositionality in Compound Splitting for Statistical Machine Translation MarionWeller FabienneCap - StefanMüller - SabineSchulte im Walde - AlexanderFraser + StefanMüller + SabineSchulte im Walde + AlexanderFraser 81–90 W14-5709 10.3115/v1/W14-5709 @@ -10509,13 +10509,13 @@ Proceedings of Workshop on Lexical and Grammatical Resources for Language Processing W14-58 JorgeBaptista - PushpakBhattacharyya + PushpakBhattacharyya ChristianeFellbaum - MikelForcada + MikelForcada Chu-RenHuang SvetlaKoeva CvetanaKrstev - EricLaporte + EricLaporte 10.3115/v1/W14-58 Association for Computational Linguistics and Dublin City University
Dublin, Ireland
@@ -10538,7 +10538,7 @@ Using language technology resources and tools to construct <fixed-case>S</fixed-case>wedish <fixed-case>F</fixed-case>rame<fixed-case>N</fixed-case>et DanaDannélls - KarinFriberg Heppin + KarinFriberg Heppin AnnaEhrlemark 8–17 W14-5802 @@ -10574,7 +10574,7 @@ Linguistically motivated Language Resources for Sentiment Analysis - VoulaGiouli + VoulaGiouli AggelikiFotopoulou 39–45 W14-5806 @@ -10583,8 +10583,8 @@ Using Morphosemantic Information in Construction of a Pilot Lexical Semantic Resource for <fixed-case>T</fixed-case>urkish - Gözde Gülİşgüder - EşrefAdalı + Gözde Gülİşgüder + EşrefAdalı 46–54 W14-5807 10.3115/v1/W14-5807 @@ -10592,9 +10592,9 @@ Comparing <fixed-case>C</fixed-case>zech and <fixed-case>E</fixed-case>nglish <fixed-case>AMR</fixed-case>s - ZdeňkaUrešová - JanHajič - OndřejBojar + ZdeňkaUrešová + JanHajič + OndřejBojar 55–64 W14-5808 10.3115/v1/W14-5808 @@ -10639,8 +10639,8 @@ AmandaRassi CristinaSantos-Turati JorgeBaptista - NunoMamede - OtoVale + NunoMamede + OtoVale 92–101 W14-5812 10.3115/v1/W14-5812 @@ -10650,7 +10650,7 @@ Collaboratively Constructed Linguistic Resources for Language Variants and their Exploitation in <fixed-case>NLP</fixed-case> Application – the case of <fixed-case>T</fixed-case>unisian <fixed-case>A</fixed-case>rabic and the Social Media FatihaSadat FatmaMallek - MohamedBoudabous + MohamedBoudabous RahmaSellami AtefehFarzindar 102–110 @@ -10661,7 +10661,7 @@ A Database of Paradigmatic Semantic Relation Pairs for <fixed-case>G</fixed-case>erman Nouns, Verbs, and Adjectives SilkeScheible - SabineSchulte im Walde + SabineSchulte im Walde 111–119 W14-5814 10.3115/v1/W14-5814 @@ -10671,7 +10671,7 @@ Improving the Precision of Synset Links Between Cornetto and <fixed-case>P</fixed-case>rinceton <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et LeenSevens VincentVandeghinste - FrankVan Eynde + FrankVan Eynde 120–126 W14-5815 10.3115/v1/W14-5815 @@ -10680,8 +10680,8 @@ Light verb constructions with ‘do’ and ‘be’ in <fixed-case>H</fixed-case>indi: A <fixed-case>TAG</fixed-case> analysis AshwiniVaidya - OwenRambow - MarthaPalmer + OwenRambow + MarthaPalmer 127–136 W14-5816 10.3115/v1/W14-5816 @@ -10720,7 +10720,7 @@ Proceedings of the Second Workshop on Natural Language Processing for Social Media (SocialNLP) W14-59 - Shou-deLin + Shou-deLin Lun-WeiKu ErikCambria Tsung-TingKuo @@ -10746,7 +10746,7 @@ Feature Selection for Highly Skewed Sentiment Analysis Tasks CanLiu - SandraKübler + SandraKübler NingYu 2–11 W14-5902 @@ -10760,7 +10760,7 @@ DanielDakota SridharRajagopalan WenLi - SandraKübler + SandraKübler NingYu 12–21 W14-5903 @@ -10783,7 +10783,7 @@ ErikCambria Lun-WeiKu ChenGui - AlexanderGelbukh + AlexanderGelbukh 28–37 W14-5905 10.3115/v1/W14-5905 @@ -10835,9 +10835,9 @@ Proceedings of the COLING Workshop on Synchronic and Diachronic Approaches to Analyzing Technical Language W14-60 - AdamMeyers + AdamMeyers YifanHe - RalphGrishman + RalphGrishman 10.3115/v1/W14-60 Association for Computational Linguistics and Dublin City University
Dublin, Ireland
@@ -10851,7 +10851,7 @@ Investigating Context Parameters in Technology Term Recognition - Behrang Q.Zadeh + Behrang Q.Zadeh SiegfriedHandschuh 1–10 W14-6001 @@ -10873,7 +10873,7 @@ Ontology-based Technical Text Annotation - FrançoisLévy + FrançoisLévy NadiTomeh YueMa 21–30 @@ -10883,9 +10883,9 @@ Extracting Aspects and Polarity from Patents - PeterAnick + PeterAnick MarcVerhagen - JamesPustejovsky + JamesPustejovsky 31–39 W14-6004 10.3115/v1/W14-6004 @@ -10921,8 +10921,8 @@ YuvalMarton InesRehbein YannickVersley - ÖzlemÇetinoğlu - JoelTetreault + ÖzlemÇetinoğlu + JoelTetreault Dublin City University
Dublin, Ireland
August @@ -10936,9 +10936,9 @@ Parsing <fixed-case>G</fixed-case>erman: How Much Morphology Do We Need? WolfgangMaier - SandraKübler + SandraKübler DanielDakota - DanielWhyatt + DanielWhyatt 1–14 W14-6101 maier-etal-2014-parsing @@ -10948,7 +10948,7 @@ IlianaSimova DimitarVasilev AlexanderPopov - KirilSimov + KirilSimov PetyaOsenova 15–25 W14-6102 @@ -10972,7 +10972,7 @@ Exploring Options for Fast Domain Adaptation of Dependency Parsers ViktorPekar JuntaoYu - MohabEl-karef + MohabEl-karef BerndBohnet 54–65 W14-6105 @@ -10997,9 +10997,9 @@ Initial Explorations in Two-phase <fixed-case>T</fixed-case>urkish Dependency Parsing by Incorporating Constituents - İlknurDurgar El-Kahlout + İlknurDurgar El-Kahlout Ahmet AfşınAkın - ErtuǧrulYılmaz + ErtuǧrulYılmaz 82–89 W14-6108 durgar-el-kahlout-etal-2014-initial @@ -11007,7 +11007,7 @@ Experiments for Dependency Parsing of <fixed-case>G</fixed-case>reek ProkopisProkopidis - HarisPapageorgiou + HarisPapageorgiou 90–96 W14-6109 prokopidis-papageorgiou-2014-experiments @@ -11016,9 +11016,9 @@ Introducing the <fixed-case>IMS</fixed-case>-Wrocław-<fixed-case>S</fixed-case>zeged-<fixed-case>CIS</fixed-case> entry at the <fixed-case>SPMRL</fixed-case> 2014 Shared Task: Reranking and Morpho-syntax meet Unlabeled Data AndersBjörkelund ÖzlemÇetinoğlu - AgnieszkaFaleńska - RichárdFarkas - ThomasMueller + AgnieszkaFaleńska + RichárdFarkas + ThomasMueller WolfgangSeeker ZsoltSzántó 97–102 @@ -11027,8 +11027,8 @@ Introducing the <fixed-case>SPMRL</fixed-case> 2014 Shared Task on Parsing Morphologically-rich Languages - DjaméSeddah - SandraKübler + DjaméSeddah + SandraKübler ReutTsarfaty 103–109 W14-6111 @@ -11092,8 +11092,8 @@ TarekKirchhoffer Hans WernerMüller RomanKlinger - MatthiasHartung - PhilippCimiano + MatthiasHartung + PhilippCimiano 25–32 W14-6204 10.3115/v1/W14-6204 @@ -11137,10 +11137,10 @@ Automatic Analysis of Scientific and Literary Texts. Presentation and Results of the <fixed-case>DEFT</fixed-case>2014 Text Mining Challenge (Analyse automatique de textes littéraires et scientifiques : présentation et résultats du défi fouille de texte <fixed-case>DEFT</fixed-case>2014) [in <fixed-case>F</fixed-case>rench] - ThierryHamon + ThierryHamon QuentinPleplé - PatrickParoubek - PierreZweigenbaum + PatrickParoubek + PierreZweigenbaum CyrilGrouin 1-10 W14-6301 @@ -11156,7 +11156,7 @@ Combining semantic spaces along with structure and constraints (Combiner espaces sémantiques, structure et contraintes) [in <fixed-case>F</fixed-case>rench] - AdilEl Ghali + AdilEl Ghali KaoutarEl Ghali 20-30 W14-6303 @@ -11171,21 +11171,21 @@ Fine-grained semantic categorization of opinion expressions for consensus detection (Catégorisation sémantique fine des expressions d’opinion pour la détection de consensus) [in <fixed-case>F</fixed-case>rench] - FarahBenamara - VéroniqueMoriceau - Yvette YannickMathieu + FarahBenamara + VéroniqueMoriceau + Yvette YannickMathieu 36-44 W14-6305 benamara-etal-2014-fine Classification and Optimization Algorithms: the <fixed-case>LIA</fixed-case>/<fixed-case>ADOC</fixed-case> participation at <fixed-case>DEFT</fixed-case>’14 (Algorithmes de classification et d’optimisation : participation du <fixed-case>LIA</fixed-case>/<fixed-case>ADOC</fixed-case> à <fixed-case>DEFT</fixed-case>’14) [in <fixed-case>F</fixed-case>rench] - Luis AdriánCabrera-Diego + Luis AdriánCabrera-Diego StéphaneHuet BassamJabaian AlejandroMolina - Juan-ManuelTorres-Moreno - MarcEl-Bèze + Juan-ManuelTorres-Moreno + MarcEl-Bèze BarthélémyDurette 45-52 W14-6306 @@ -11202,7 +11202,7 @@ Introductory experiments with evolutionary optimization of reflective semantic - DanielDevatman Hromada + DanielDevatman Hromada 64-68 W14-6308 devatman-hromada-2014-introductory @@ -11212,7 +11212,7 @@ TALN-RECITAL 2014 Workshop FondamenTAL 2014 : Ressources lexicales et TAL - vue d’ensemble sur les dictionnaires électroniques de Jean Dubois et Françoise Dubois-Charlier (FondamenTAL 2014 : Lexical Resources and NLP) W14-64 - Marie-HélèneStéfanini + Marie-HélèneStéfanini DenisLe Pesant Association pour le Traitement Automatique des Langues
Marseille, France
@@ -11242,7 +11242,7 @@
The Electronic Dictionary <fixed-case>DEM</fixed-case> in <fixed-case>N</fixed-case>oo<fixed-case>J</fixed-case> (Le dictionnaire <fixed-case>DEM</fixed-case> dans <fixed-case>N</fixed-case>oo<fixed-case>J</fixed-case>) [in <fixed-case>F</fixed-case>rench] - MaxSilberztein + MaxSilberztein 80-84 W14-6403 silberztein-2014-electronic @@ -11284,7 +11284,7 @@ TALN-RECITAL 2014 Workshop TALAf 2014 : Traitement Automatique des Langues Africaines (TALAf 2014: African Language Processing) W14-65 - MathieuMangeot + MathieuMangeot FatihaSadat Association pour le Traitement Automatique des Langues
Marseille, France
@@ -11319,7 +11319,7 @@
<fixed-case>PFM</fixed-case>: Ikota’s morphology implementation in <fixed-case>XMG</fixed-case> (<fixed-case>PFM</fixed-case> : pour une implémentation de la morphologie de l’ikota dans <fixed-case>XMG</fixed-case>) [in <fixed-case>F</fixed-case>rench] - BrunelleMagnana Ekoukou + BrunelleMagnana Ekoukou 134-140 W14-6504 magnana-ekoukou-2014-pfm @@ -11375,14 +11375,14 @@ TALN-RECITAL 2014 Workshop SemDis 2014 : Enjeux actuels de la sémantique distributionnelle (SemDis 2014: Current Challenges in Distributional Semantics) W14-66 - CécileFabre + CécileFabre NabilHathout - Lydia-MaiHo-Dac + Lydia-MaiHo-Dac FrançoisMorlane-Hondère PhilippeMuller FranckSajous LudovicTanguy - TimVan de Cruys + TimVan de Cruys Association pour le Traitement Automatique des Langues
Marseille, France
July @@ -11463,7 +11463,7 @@ TALN-RECITAL 2014 Workshop RLTLN 2014 : Réseaux Lexicaux pour le TAL (RLTLN 2014 : Lexical Networks for NLP) W14-67 MichaelZock - GemmaBel-Enguix + GemmaBel-Enguix ReinhardRapp Association pour le Traitement Automatique des Langues
Marseille, France
@@ -11477,7 +11477,7 @@ Lexical Networks, Natural Language Processing and Linked Open Data (Réseaux Lexicaux, Traitement des Langues, et Données Liées Ouvertes) [in <fixed-case>F</fixed-case>rench] - GillesSérasset + GillesSérasset 280-280 W14-6701 serasset-2014-lexical @@ -11496,7 +11496,7 @@ BrunoGaume EmmanuelNavarro YannDesalle - BenoîtGaillard + BenoîtGaillard 291-301 W14-6703 gaume-etal-2014-measuring @@ -11529,9 +11529,9 @@ Proceedings of the Third CIPS-SIGHAN Joint Conference on Chinese Language Processing W14-68 LeSun - ChengqingZong + ChengqingZong MinZhang - Gina-AnneLevow + Gina-AnneLevow 10.3115/v1/W14-68 Association for Computational Linguistics
Wuhan, China
@@ -11545,7 +11545,7 @@ Research on <fixed-case>C</fixed-case>hinese discourse rhetorical structure representation scheme and corpus annotation - GuodongZhou + GuodongZhou 1 W14-6801 10.3115/v1/W14-6801 @@ -11595,7 +11595,7 @@ PeijieHuang XianmaoLin ZeqiLian - DeYang + DeYang XiaolingTang LiHuang QiangHuang @@ -11642,7 +11642,7 @@ Bilingual Product Name Dictionary Construction Using a Two Stage Method YatianShen - XuanjingHuang + XuanjingHuang 61–69 W14-6810 10.3115/v1/W14-6810 @@ -11752,9 +11752,9 @@ Overview of <fixed-case>SIGHAN</fixed-case> 2014 Bake-off for <fixed-case>C</fixed-case>hinese Spelling Check - Liang-ChihYu + Liang-ChihYu Lung-HaoLee - Yuen-HsienTseng + Yuen-HsienTseng Hsin-HsiChen 126–132 W14-6820 @@ -11768,7 +11768,7 @@ JianpengHou QianboWang YuanzhuoWang - XueqiCheng + XueqiCheng 133–138 W14-6821 10.3115/v1/W14-6821 @@ -11841,7 +11841,7 @@ Introduction to <fixed-case>BIT</fixed-case> <fixed-case>C</fixed-case>hinese Spelling Correction System at <fixed-case>CLP</fixed-case> 2014 Bake-off MinLiu PingJian - HeyanHuang + HeyanHuang 179–185 W14-6828 10.3115/v1/W14-6828 @@ -11868,7 +11868,7 @@ Nan-changCheng Cheng-qingZong MinHou - Yong-linTeng + Yong-linTeng 192–201 W14-6831 10.3115/v1/W14-6831 @@ -11876,9 +11876,9 @@ <fixed-case>C</fixed-case>hinese Spell Checking Based on Noisy Channel Model - Hsun-wenChiu - Jian-chengWu - Jason S.Chang + Hsun-wenChiu + Jian-chengWu + Jason S.Chang 202–209 W14-6832 10.3115/v1/W14-6832 @@ -11895,7 +11895,7 @@ <fixed-case>NCTU</fixed-case> and <fixed-case>NTUT</fixed-case>’s Entry to <fixed-case>CLP</fixed-case>-2014 <fixed-case>C</fixed-case>hinese Spelling Check Evaluation - Yih-RuWang + Yih-RuWang Yuan-FuLiao 216–219 W14-6834 @@ -11916,7 +11916,7 @@ Proceedings of the Workshop on Natural Language Processing in the 5th Information Systems Research Working Days (JISIC) W14-69 - MyriamHernandez + MyriamHernandez Josafáde Jesus Aguiar Pontes 10.3115/v1/W14-69 Association for Computational Linguistics @@ -11946,7 +11946,7 @@ Language Technologies for Suicide Prevention in Social Media - Jose M.Gomez + Jose M.Gomez 21-29 W14-6903 10.3115/v1/W14-6903 @@ -11955,8 +11955,8 @@ A Supervised Approach for Sentiment Analysis using Skipgrams JaviFernández - José M.Gómez - PatricioMartínez-Barco + José M.Gómez + PatricioMartínez-Barco 30-36 W14-6904 10.3115/v1/W14-6904 @@ -11965,7 +11965,7 @@ Emotion Detection from text: A Survey LeaCanales - PatricioMartínez-Barco + PatricioMartínez-Barco 37-43 W14-6905 10.3115/v1/W14-6905 @@ -11979,7 +11979,7 @@ HideyaMino IsaoGoto SadaoKurohashi - EiichiroSumita + EiichiroSumita Workshop on Asian Translation
Tokyo, Japan
October @@ -12037,7 +12037,7 @@ JingshengCai YujieZhang HuaShan - JinanXu + JinanXu W14-7005 39–43 cai-etal-2014-system @@ -12066,7 +12066,7 @@ ShoHoshino HubertSoyer YusukeMiyao - AkikoAizawa + AkikoAizawa W14-7008 W14-7008.Poster.pdf 55–63 @@ -12106,7 +12106,7 @@ <fixed-case>K</fixed-case>yoto<fixed-case>EBMT</fixed-case> System Description for the 1st Workshop on <fixed-case>A</fixed-case>sian Translation JohnRichardson - FabienCromières + FabienCromières ToshiakiNakazawa SadaoKurohashi W14-7012 diff --git a/data/xml/W15.xml b/data/xml/W15.xml index 0c66975ed9..0a2dfc2066 100644 --- a/data/xml/W15.xml +++ b/data/xml/W15.xml @@ -5,7 +5,7 @@ Proceedings of the 11th International Conference on Computational Semantics W15-01 MatthewPurver - MehrnooshSadrzadeh + MehrnooshSadrzadeh MatthewStone Association for Computational Linguistics
London, UK
@@ -37,7 +37,7 @@ From Adjective Glosses to Attribute Concepts: Learning Different Aspects That an Adjective Can Describe OmidBakhshandeh - JamesAllen + JamesAllen 23–33 W15-0103 bakhshandeh-allen-2015-adjective @@ -45,7 +45,7 @@ Exploiting Fine-grained Syntactic Transfer Features to Predict the Compositionality of <fixed-case>G</fixed-case>erman Particle Verbs StefanBott - SabineSchulte im Walde + SabineSchulte im Walde 34–39 W15-0104 bott-schulte-im-walde-2015-exploiting @@ -54,23 +54,23 @@ Multilingual Reliability and “Semantic” Structure of Continuous Word Spaces MaximilianKöper ChristianScheible - SabineSchulte im Walde + SabineSchulte im Walde 40–45 W15-0105 koper-etal-2015-multilingual Clarifying Intentions in Dialogue: A Corpus Study - Julian J.Schlöder - RaquelFernández + Julian J.Schlöder + RaquelFernández 46–51 W15-0106 schloder-fernandez-2015-clarifying From distributional semantics to feature norms: grounding semantic models in human perceptual data - LuanaFagarasan - Eva MariaVecchi + LuanaFagarasan + Eva MariaVecchi StephenClark 52–57 W15-0107 @@ -79,7 +79,7 @@ Obtaining a Better Understanding of Distributional Models of <fixed-case>G</fixed-case>erman Derivational Morphology MaxKisselew - SebastianPadó + SebastianPadó AlexisPalmer JanŠnajder 58–63 @@ -105,10 +105,10 @@ Alignment of Eye Movements and Spoken Language for Semantic Image Understanding PreethiVaidyanathan - EmilyPrud’hommeaux - CeciliaO. Alm + EmilyPrud’hommeaux + CeciliaO. Alm Jeff B.Pelz - Anne R.Haake + Anne R.Haake 76–81 W15-0111 vaidyanathan-etal-2015-alignment @@ -116,7 +116,7 @@ From a Distance: Using Cross-lingual Word Alignments for Noun Compound Bracketing PatrickZiering - Lonnekevan der Plas + Lonnekevan der Plas 82–87 W15-0112 ziering-van-der-plas-2015-distance @@ -124,15 +124,15 @@ Unsupervised Learning of Coherent and General Semantic Classes for Entity Aggregates HenryAnaya-Sánchez - AnselmoPeñas + AnselmoPeñas 88–93 W15-0113 anaya-sanchez-penas-2015-unsupervised Crowdsourced Word Sense Annotations and Difficult Words and Examples - OierLopez de Lacalle - EnekoAgirre + OierLopez de Lacalle + EnekoAgirre 94–100 W15-0114 lopez-de-lacalle-agirre-2015-crowdsourced @@ -156,7 +156,7 @@ Uniform Surprisal at the Level of Discourse Relations: Negation Markers and Discourse Connective Omission - FatemehTorabi Asr + FatemehTorabi Asr VeraDemberg 118–128 W15-0117 @@ -172,7 +172,7 @@ On the Proper Treatment of Quantifiers in Probabilistic Logic Semantics - IslamBeltagy + IslamBeltagy KatrinErk 140–150 W15-0119 @@ -180,7 +180,7 @@ Mr Darcy and Mr Toad, gentlemen: distributional names and their kinds - AurélieHerbelot + AurélieHerbelot 151–161 W15-0120 herbelot-2015-mr @@ -196,7 +196,7 @@ Automatic Noun Compound Interpretation using Deep Neural Networks and Word Embeddings CorinaDima - ErhardHinrichs + ErhardHinrichs 173–183 W15-0122 dima-hinrichs-2015-automatic @@ -204,7 +204,7 @@ Integrating Non-Linguistic Events into Discourse Structure JulieHunter - NicholasAsher + NicholasAsher AlexLascarides 184–194 W15-0123 @@ -212,7 +212,7 @@ A Discriminative Model for Perceptually-Grounded Incremental Reference Resolution - CaseyKennington + CaseyKennington LiviaDia DavidSchlangen 195–205 @@ -222,7 +222,7 @@ Incremental Semantics for Dialogue Processing: Requirements, and a Comparison of Two Approaches JulianHough - CaseyKennington + CaseyKennington DavidSchlangen JonathanGinzburg 206–216 @@ -231,7 +231,7 @@ Semantic Dependency Graph Parsing Using Tree Approximations - ŽeljkoAgić + ŽeljkoAgić AlexanderKoller StephanOepen 217–227 @@ -247,8 +247,8 @@ Layers of Interpretation: On Grammar and Compositionality - Emily M.Bender - DanFlickinger + Emily M.Bender + DanFlickinger StephanOepen WoodleyPackard AnnCopestake @@ -258,8 +258,8 @@ Pragmatic Rejection - Julian J.Schlöder - RaquelFernández + Julian J.Schlöder + RaquelFernández 250–260 W15-0129 schloder-fernandez-2015-pragmatic @@ -278,7 +278,7 @@ Dynamics of Public Commitments in Dialogue AntoineVenant - NicholasAsher + NicholasAsher 272–282 W15-0131 venant-asher-2015-dynamics @@ -292,8 +292,8 @@ How hard is this query? Measuring the Semantic Complexity of Schema-agnostic Queries - AndréFreitas - JulianoEfson Sales + AndréFreitas + JulianoEfson Sales SiegfriedHandschuh EdwardCurry 294–304 @@ -317,7 +317,7 @@ On the Principles of Semantic Annotation - HarryBunt + HarryBunt W15-0201 bunt-2015-principles @@ -339,7 +339,7 @@ The Semantics of Image Annotation JuliaBosque-Gil - JamesPustejovsky + JamesPustejovsky W15-0204 bosque-gil-pustejovsky-2015-semantics @@ -366,8 +366,8 @@ A Flexible Tool for Manual Word Sense Annotation StevenNeale - JoãoSilva - AntónioBranco + JoãoSilva + AntónioBranco W15-0208 neale-etal-2015-flexible @@ -380,7 +380,7 @@ Semantic Relations in Discourse: The Current State of <fixed-case>ISO</fixed-case> 24617-8 RashmiPrasad - HarryBunt + HarryBunt W15-0210 prasad-bunt-2015-semantic @@ -388,17 +388,17 @@ Analysis of Temporal Expressions Annotated in Clinical Notes HeglerTissot AngusRoberts - LeonDerczynski - GenevieveGorrell - Marcus DidonetDel Fabro + LeonDerczynski + GenevieveGorrell + Marcus DidonetDel Fabro W15-0211 tissot-etal-2015-analysis Rapid <fixed-case>F</fixed-case>rame<fixed-case>N</fixed-case>et annotation of spoken conversation transcripts - JeremyTrione - FredericBechet - BenoitFavre + JeremyTrione + FredericBechet + BenoitFavre AlexisNasr W15-0212 trione-etal-2015-rapid @@ -419,7 +419,7 @@ Towards a Unified Approach to Modality Annotation in <fixed-case>P</fixed-case>ortuguese - Luciana BeatrizÁvila + Luciana BeatrizÁvila AmáliaMendes IrisHendrickx W15-0301 @@ -427,7 +427,7 @@ A hedging annotation scheme focused on epistemic phrases for informal language - Liliana MamaniSanchez + Liliana MamaniSanchez CarlVogel W15-0302 sanchez-vogel-2015-hedging @@ -456,18 +456,18 @@ Extending <fixed-case>OWL</fixed-case> Ontologies by <fixed-case>C</fixed-case>artesian Types to Represent N-ary Relations in Natural Language - Hans-UlrichKrieger + Hans-UlrichKrieger ChristianWillms W15-0401 krieger-willms-2015-extending Ontology Authoring Inspired By Dialogue - ArtemisParvizi + ArtemisParvizi YuanRen MarkelVigo - Keesvan Deemter - ChrisMellish + Keesvan Deemter + ChrisMellish Jeff Z.Pan RobertStevens CarolineJay @@ -483,14 +483,14 @@ Using Ontologies to Model Polysemy in Lexical Resources - FahadKhan + FahadKhan FrancescaFrontini W15-0404 khan-frontini-2015-using Modelling time and space in <fixed-case>B</fixed-case>razilian culture - DanielCouto-Vale + DanielCouto-Vale Rodrigode Oliveira W15-0405 couto-vale-de-oliveira-2015-modelling @@ -506,7 +506,7 @@ When is Lying the Right Choice? FedericoCerutti - ArtemisParvizi + ArtemisParvizi AliceToniolo DaveBraines Geeth R.de Mel @@ -524,7 +524,7 @@ Proceedings of the 2nd Workshop on Argumentation Mining W15-05 - ClaireCardie + ClaireCardie 10.3115/v1/W15-05 Association for Computational Linguistics
Denver, CO
@@ -548,7 +548,7 @@
Identifying Argumentation Schemes in Genetics Research Articles - NancyGreen + NancyGreen 12–21 W15-0502 10.3115/v1/W15-0502 @@ -557,7 +557,7 @@ Extracting Argument and Domain Words for Identifying Argument Components in Texts HuyNguyen - DianeLitman + DianeLitman 22–28 W15-0503 10.3115/v1/W15-0503 @@ -575,7 +575,7 @@ A Shared Task on Argumentation Mining in Newspaper Editorials JohannesKiesel - KhalidAl-Khatib + KhalidAl-Khatib MatthiasHagen BennoStein 35–38 @@ -597,7 +597,7 @@ A Computational Approach for Generating Toulmin Model Argumentation PaulReisert NaoyaInoue - NaoakiOkazaki + NaoakiOkazaki KentaroInui 45–55 W15-0507 @@ -618,7 +618,7 @@ From Argumentation Mining to Stance Classification ParinazSobhani - DianaInkpen + DianaInkpen StanMatwin 67–77 W15-0509 @@ -628,7 +628,7 @@ Argument Discovery and Extraction with the Argument Workbench AdamWyner - WimPeters + WimPeters DavidPrice 78–83 W15-0510 @@ -683,9 +683,9 @@ ShereenOraby LenaReed RyanCompton - EllenRiloff - MarilynWalker - SteveWhittaker + EllenRiloff + MarilynWalker + SteveWhittaker 116–126 W15-0515 10.3115/v1/W15-0515 @@ -694,7 +694,7 @@ Combining Argument Mining Techniques JohnLawrence - ChrisReed + ChrisReed 127–136 W15-0516 10.3115/v1/W15-0516 @@ -705,7 +705,7 @@ Proceedings of the Tenth Workshop on Innovative Use of NLP for Building Educational Applications W15-06 - JoelTetreault + JoelTetreault JillBurstein ClaudiaLeacock 10.3115/v1/W15-06 @@ -743,7 +743,7 @@ Incorporating Coherence of Topics as a Criterion in Automatic Response-to-Text Assessment of the Organization of Writing ZahraRahimi - DianeLitman + DianeLitman ElaineWang RichardCorrenti 20–30 @@ -763,8 +763,8 @@ Automated Scoring of Picture-based Story Narration SwapnaSomasundaran - Chong MinLee - MartinChodorow + Chong MinLee + MartinChodorow XinhaoWang 42–48 W15-0605 @@ -773,7 +773,7 @@ Measuring Feature Diversity in Native Language Identification - ShervinMalmasi + ShervinMalmasi AoifeCahill 49–55 W15-0606 @@ -782,7 +782,7 @@ Automated Evaluation of Scientific Writing: <fixed-case>AESW</fixed-case> Shared Task Proposal - VidasDaudaravičius + VidasDaudaravičius 56–63 W15-0607 10.3115/v1/W15-0607 @@ -802,7 +802,7 @@ Towards Automatic Description of Knowledge Components - CyrilGoutte + CyrilGoutte GuillaumeDurand SergeLéger 75–80 @@ -822,9 +822,9 @@ Interpreting Questions with a Log-Linear Ranking Model in a Virtual Patient Dialogue System EvanJaffe - MichaelWhite + MichaelWhite WilliamSchuler - EricFosler-Lussier + EricFosler-Lussier AlexRosenfeld DouglasDanforth 86–96 @@ -836,7 +836,7 @@ Identifying Patterns For Short Answer Scoring Using Graph-based Lexico-Semantic Text Matching LakshmiRamachandran JianCheng - PeterFoltz + PeterFoltz 97–106 W15-0612 10.3115/v1/W15-0612 @@ -854,7 +854,7 @@ The Jinan <fixed-case>C</fixed-case>hinese Learner Corpus MaolinWang - ShervinMalmasi + ShervinMalmasi MingxuanHuang 118–123 W15-0614 @@ -874,7 +874,7 @@ Annotation and Classification of Argumentative Writing Revisions FanZhang - DianeLitman + DianeLitman 133–143 W15-0616 10.3115/v1/W15-0616 @@ -883,7 +883,7 @@ Embarrassed or Awkward? Ranking Emotion Synonyms for <fixed-case>ESL</fixed-case> Learners’ Appropriate Wording Wei-FanChen - Mei-HuaChen + Mei-HuaChen Lun-WeiKu 144–153 W15-0617 @@ -893,8 +893,8 @@ <fixed-case>R</fixed-case>ev<fixed-case>UP</fixed-case>: Automatic Gap-Fill Question Generation from Educational Texts GirishKumar - RafaelBanchs - Luis FernandoD’Haro + RafaelBanchs + Luis FernandoD’Haro 154–161 W15-0618 10.3115/v1/W15-0618 @@ -903,7 +903,7 @@ Preliminary Experiments on Crowdsourced Evaluation of Feedback Granularity NitinMadnani - MartinChodorow + MartinChodorow AoifeCahill MelissaLopez YokoFutagi @@ -915,7 +915,7 @@ Oracle and Human Baselines for Native Language Identification - ShervinMalmasi + ShervinMalmasi JoelTetreault MarkDras 172–178 @@ -944,7 +944,7 @@ Judging the Quality of Automatically Generated Gap-fill Question using Active Learning - Nobal BikramNiraula + Nobal BikramNiraula VasileRus 196–206 W15-0623 @@ -954,7 +954,7 @@ Generating Reference Texts for Short Answer Scoring Using Graph-based Summarization LakshmiRamachandran - PeterFoltz + PeterFoltz 207–212 W15-0624 10.3115/v1/W15-0624 @@ -982,7 +982,7 @@ Using Learner Data to Improve Error Correction in Adjective–Noun Combinations EkaterinaKochmar - TedBriscoe + TedBriscoe 233–242 W15-0627 10.3115/v1/W15-0627 @@ -1012,7 +1012,7 @@ W15-07 AnnaFeldman AnnaKazantseva - StanSzpakowicz + StanSzpakowicz CorinaKoolen 10.3115/v1/W15-07 Association for Computational Linguistics @@ -1028,7 +1028,7 @@ Tools for Digital Humanities: Enabling Access to the Old <fixed-case>O</fixed-case>ccitan <fixed-case>R</fixed-case>omance of Flamenca OlgaScrivner - SandraKübler + SandraKübler 1–11 W15-0701 10.3115/v1/W15-0701 @@ -1058,7 +1058,7 @@ PrashantJayannavar ApoorvAgarwal MelodyJu - OwenRambow + OwenRambow 32–41 W15-0704 10.3115/v1/W15-0704 @@ -1077,7 +1077,7 @@ A Pilot Experiment on Exploiting Translations for Literary Studies on Kafka’s “Verwandlung” FabienneCap - InaRösiger + InaRösiger JonasKuhn 48–57 W15-0706 @@ -1095,7 +1095,7 @@ Visualizing Poetry with <fixed-case>SPARSAR</fixed-case> – Visual Maps from Poetic Content - RodolfoDelmonte + RodolfoDelmonte 68–78 W15-0708 10.3115/v1/W15-0708 @@ -1139,7 +1139,7 @@ A computational linguistic approach to <fixed-case>S</fixed-case>panish Golden Age Sonnets: metrical and semantic aspects - BorjaNavarro + BorjaNavarro 105–113 W15-0712 10.3115/v1/W15-0712 @@ -1147,7 +1147,7 @@ Automated Translation of a Literary Work: A Pilot Study - LaurentBesacier + LaurentBesacier LaneSchwartz 114–122 W15-0713 @@ -1168,9 +1168,9 @@ Proceedings of the 3rd Workshop on EVENTS: Definition, Detection, Coreference, and Representation W15-08 - EduardHovy + EduardHovy TerukoMitamura - MarthaPalmer + MarthaPalmer 10.3115/v1/W15-08 Association for Computational Linguistics
Denver, Colorado
@@ -1184,7 +1184,7 @@ Translating Granularity of Event Slots into Features for Event Coreference Resolution. - AgataCybulska + AgataCybulska PiekVossen 1–10 W15-0801 @@ -1193,7 +1193,7 @@ Word Sense Disambiguation via <fixed-case>P</fixed-case>rop<fixed-case>S</fixed-case>tore and <fixed-case>O</fixed-case>nto<fixed-case>N</fixed-case>otes for Event Mention Detection - Nicolas R.Fauceglia + Nicolas R.Fauceglia Yiu-ChangLin XuezheMa EduardHovy @@ -1206,7 +1206,7 @@ Opposition Relations among Verb Frames AnnaFeltracco ElisabettaJezek - BernardoMagnini + BernardoMagnini 16–24 W15-0803 10.3115/v1/W15-0803 @@ -1214,8 +1214,8 @@ Encoding event structure in <fixed-case>U</fixed-case>rdu/<fixed-case>H</fixed-case>indi <fixed-case>V</fixed-case>erb<fixed-case>N</fixed-case>et - AnnetteHautli-Janisz - Tracy HollowayKing + AnnetteHautli-Janisz + Tracy HollowayKing GilianRamchand 25–33 W15-0804 @@ -1235,7 +1235,7 @@ Detecting Causally Embedded Structures Using an Evolutionary Algorithm ChenLi - RoxanaGirju + RoxanaGirju 43–52 W15-0806 10.3115/v1/W15-0806 @@ -1254,7 +1254,7 @@ Event analysis for information extraction from business-based technical documents BellManrique Losada - Carlos MarioZapata Jaramillo + Carlos MarioZapata Jaramillo 58–65 W15-0808 10.3115/v1/W15-0808 @@ -1268,7 +1268,7 @@ ZhiyiSong AnnBies SethKulick - StephanieStrassel + StephanieStrassel 66–76 W15-0809 10.3115/v1/W15-0809 @@ -1285,7 +1285,7 @@ Identifying Various Kinds of Event Mentions in K-Parser Output ArpitSharma - NguyenVo + NguyenVo SomakAditya ChittaBaral 82–88 @@ -1297,7 +1297,7 @@ From Light to Rich <fixed-case>ERE</fixed-case>: Annotation of Entities, Relations, and Events ZhiyiSong AnnBies - StephanieStrassel + StephanieStrassel TomRiese JustinMott JoeEllis @@ -1327,7 +1327,7 @@ Semantic Interoperability for Cross-lingual and cross-document Event Detection PiekVossen EgoitzLaparra - GermanRigau + GermanRigau ItziarAldabe 108–116 W15-0814 @@ -1363,7 +1363,7 @@ A Method of Accounting Bigrams in Topic Models MichaelNokel - NataliaLoukachevitch + NataliaLoukachevitch 1–9 W15-0901 10.3115/v1/W15-0901 @@ -1383,7 +1383,7 @@ FabienneCap ManjuNirmal MarionWeller - SabineSchulte im Walde + SabineSchulte im Walde 19–28 W15-0903 10.3115/v1/W15-0903 @@ -1440,7 +1440,7 @@ BaharSalehi NitikaMathur PaulCook - TimothyBaldwin + TimothyBaldwin 54–59 W15-0909 10.3115/v1/W15-0909 @@ -1448,8 +1448,8 @@ The Bare Necessities: Increasing Lexical Coverage for Multi-Word Domain Terms with Less Lexical Data - BranimirBoguraev - EsmeManandise + BranimirBoguraev + EsmeManandise BenjaminSegal 60–64 W15-0910 @@ -1460,7 +1460,7 @@ Phrase translation using a bilingual dictionary and n-gram data: A case study from <fixed-case>V</fixed-case>ietnamese to <fixed-case>E</fixed-case>nglish Khang NhutLam FerasAl Tarouti - JugalKalita + JugalKalita 65–69 W15-0911 10.3115/v1/W15-0911 @@ -1468,9 +1468,9 @@ Annotation and Extraction of Multiword Expressions in <fixed-case>T</fixed-case>urkish Treebanks - GülşenEryiǧit + GülşenEryiǧit KübraAdali - DilaraTorunoğlu-Selamet + DilaraTorunoğlu-Selamet UmutSulubacak TuğbaPamay 70–76 @@ -1516,7 +1516,7 @@ W15-10 DekaiWu MarineCarpuat - EnekoAgirre + EnekoAgirre NoraAranberri 10.3115/v1/W15-10 Association for Computational Linguistics @@ -1541,7 +1541,7 @@ Non-projective Dependency-based Pre-Reordering with Recurrent Neural Network for Machine Translation - Antonio ValerioMiceli-Barone + Antonio ValerioMiceli-Barone GiuseppeAttardi 10–20 W15-1002 @@ -1551,7 +1551,7 @@ Translating Negation: Induction, Search And Model Errors FedericoFancellu - BonnieWebber + BonnieWebber 21–29 W15-1003 10.3115/v1/W15-1003 @@ -1569,7 +1569,7 @@ Unsupervised False Friend Disambiguation Using Contextual Word Clusters and Parallel Word Alignments MaryamAminian MahmoudGhoneim - MonaDiab + MonaDiab 39–48 W15-1005 10.3115/v1/W15-1005 @@ -1588,8 +1588,8 @@ Analyzing <fixed-case>E</fixed-case>nglish-<fixed-case>S</fixed-case>panish Named-Entity enhanced Machine Translation MikelArtetxe EnekoAgirre - InakiAlegria - GorkaLabaka + InakiAlegria + GorkaLabaka 52–54 W15-1007 10.3115/v1/W15-1007 @@ -1598,8 +1598,8 @@ Predicting Prepositions for <fixed-case>SMT</fixed-case> MarionWeller - AlexanderFraser - SabineSchulte im Walde + AlexanderFraser + SabineSchulte im Walde 55–56 W15-1008 10.3115/v1/W15-1008 @@ -1607,7 +1607,7 @@ Translation reranking using source phrase dependency features - Antonio ValerioMiceli-Barone + Antonio ValerioMiceli-Barone 57–60 W15-1009 10.3115/v1/W15-1009 @@ -1635,7 +1635,7 @@ Improving <fixed-case>C</fixed-case>hinese-<fixed-case>E</fixed-case>nglish <fixed-case>P</fixed-case>rop<fixed-case>B</fixed-case>ank Alignment ShuminWu - MarthaPalmer + MarthaPalmer 74–82 W15-1012 10.3115/v1/W15-1012 @@ -1646,8 +1646,8 @@ Proceedings of the 6th Workshop on Cognitive Modeling and Computational Linguistics W15-11 - TimO’Donnell - Martenvan Schijndel + TimO’Donnell + Martenvan Schijndel 10.3115/v1/W15-11 Association for Computational Linguistics
Denver, Colorado
@@ -1661,7 +1661,7 @@ Predictions for self-priming from incremental updating models unifying comprehension and production - Cassandra L.Jacobs + Cassandra L.Jacobs 1–8 W15-1101 10.3115/v1/W15-1101 @@ -1680,7 +1680,7 @@ Audience size and contextual effects on information density in <fixed-case>T</fixed-case>witter conversations GabrielDoyle - MichaelFrank + MichaelFrank 19–28 W15-1103 10.3115/v1/W15-1103 @@ -1689,7 +1689,7 @@ Centre Stage: How Social Network Position Shapes Linguistic Coordination BillNoble - RaquelFernández + RaquelFernández 29–38 W15-1104 10.3115/v1/W15-1104 @@ -1709,7 +1709,7 @@ Verb polysemy and frequency effects in thematic fit modeling ClaytonGreenberg VeraDemberg - AsadSayeed + AsadSayeed 48–57 W15-1106 10.3115/v1/W15-1106 @@ -1745,7 +1745,7 @@ Modeling f<fixed-case>MRI</fixed-case> time courses with linguistic structure at various grain sizes - JohnHale + JohnHale DavidLutz Wen-MingLuh JonathanBrennan @@ -1793,14 +1793,14 @@ The role of personality, age, and gender in tweeting about mental illness - DanielPreoţiuc-Pietro + DanielPreoţiuc-Pietro JohannesEichstaedt GregoryPark MaartenSap LauraSmith VictoriaTobolsky - H. AndrewSchwartz - LyleUngar + H. AndrewSchwartz + LyleUngar 21–30 W15-1203 10.3115/v1/W15-1203 @@ -1821,10 +1821,10 @@ Mental Illness Detection at the World Well-Being Project for the <fixed-case>CLP</fixed-case>sych 2015 Shared Task - DanielPreoţiuc-Pietro + DanielPreoţiuc-Pietro MaartenSap - H. AndrewSchwartz - LyleUngar + H. AndrewSchwartz + LyleUngar 40–45 W15-1205 10.3115/v1/W15-1205 @@ -1884,7 +1884,7 @@ Towards Developing an Annotation Scheme for Depressive Disorder Symptoms: A Preliminary Study using <fixed-case>T</fixed-case>witter Data - DanielleMowery + DanielleMowery CraigBryan MikeConway 89–98 @@ -1924,7 +1924,7 @@ Similarity Measures for Quantifying Restrictive and Repetitive Behavior in Conversations of Autistic Children MasoudRouhizadeh - RichardSproat + RichardSproat Janvan Santen 117–123 W15-1214 @@ -1934,7 +1934,7 @@ Practical issues in developing semantic frameworks for the analysis of verbal fluency data: A <fixed-case>N</fixed-case>orwegian data case study MarkRosenstein - PeterFoltz + PeterFoltz AnjaVaskinn BritaElvevåg 124–133 @@ -1983,7 +1983,7 @@ Translating Negation: A Manual Error Analysis FedericoFancellu - BonnieWebber + BonnieWebber 2–11 W15-1301 10.3115/v1/W15-1301 @@ -2002,7 +2002,7 @@ HalilKilicoglu GracielaRosemblat MichaelCairelli - ThomasRindflesch + ThomasRindflesch 22–31 W15-1303 10.3115/v1/W15-1303 @@ -2012,8 +2012,8 @@ Committed Belief Tagging on the Factbank and <fixed-case>LU</fixed-case> Corpora: A Comparative Study GregoryWerner VinodkumarPrabhakaran - MonaDiab - OwenRambow + MonaDiab + OwenRambow 32–40 W15-1304 10.3115/v1/W15-1304 @@ -2022,9 +2022,9 @@ Extending <fixed-case>N</fixed-case>eg<fixed-case>E</fixed-case>x with Kernel Methods for Negation Detection in Clinical Text ChaitanyaShivade - Marie-Catherinede Marneffe - EricFosler-Lussier - Albert M.Lai + Marie-Catherinede Marneffe + EricFosler-Lussier + Albert M.Lai 41–46 W15-1305 10.3115/v1/W15-1305 @@ -2053,7 +2053,7 @@ Effects of Situational Factors on Metaphor Detection in an Online Discussion Forum HyejuJang MiaomiaoWen - CarolynRosé + CarolynRosé 1–10 W15-1401 10.3115/v1/W15-1401 @@ -2062,7 +2062,7 @@ Supervised Word-Level Metaphor Detection: Experiments with Concreteness and Reweighting of Examples BeataBeigman Klebanov - Chee WeeLeong + Chee WeeLeong MichaelFlor 11–20 W15-1402 @@ -2072,7 +2072,7 @@ Modeling the interaction between sensory and affective meanings for detecting metaphor AndrewGargett - JohnBarnden + JohnBarnden 21–30 W15-1403 10.3115/v1/W15-1403 @@ -2081,7 +2081,7 @@ Exploring Sensorial Features for Metaphor Identification Serra SinemTekiroğlu - GözdeÖzbal + GözdeÖzbal CarloStrapparava 31–39 W15-1404 @@ -2090,7 +2090,7 @@ <fixed-case>M</fixed-case>eta<fixed-case>N</fixed-case>et: Deep semantic automatic metaphor analysis - EllenDodge + EllenDodge JisupHong EliseStickles 40–49 @@ -2101,7 +2101,7 @@ High-Precision Abductive Mapping of Multilingual Metaphors JonathanGordon - JerryHobbs + JerryHobbs JonathanMay FabrizioMorbini 50–55 @@ -2112,7 +2112,7 @@ A Corpus of Rich Metaphor Annotation JonathanGordon - JerryHobbs + JerryHobbs JonathanMay MichaelMohler FabrizioMorbini @@ -2127,12 +2127,12 @@ Understanding Cultural Conflicts using Metaphors and Sociolinguistic Measures of Influence SamiraShaikh - TomekStrzalkowski - SarahTaylor + TomekStrzalkowski + SarahTaylor JohnLien TingLiu - George AaronBroadwell - LaurieFeldman + George AaronBroadwell + LaurieFeldman BorisYamrom KitCho YuliyaPeshkova @@ -2165,9 +2165,9 @@ Proceedings of the 1st Workshop on Vector Space Modeling for Natural Language Processing W15-15 - PhilBlunsom - ShayCohen - ParamveerDhillon + PhilBlunsom + ShayCohen + ParamveerDhillon PercyLiang 10.3115/v1/W15-15 Association for Computational Linguistics @@ -2233,7 +2233,7 @@ Relation Extraction: Perspective from Convolutional Neural Networks Thien HuuNguyen - RalphGrishman + RalphGrishman 39–48 W15-1506 10.3115/v1/W15-1506 @@ -2251,8 +2251,8 @@ A Deep Architecture for Non-Projective Dependency Parsing - ErickFonseca - SandraAluísio + ErickFonseca + SandraAluísio 56–61 W15-1508 10.3115/v1/W15-1508 @@ -2275,7 +2275,7 @@ A Word-Embedding-based Sense Index for Regular Polysemy Representation MarcoDel Tredici - NúriaBel + NúriaBel 70–78 W15-1510 10.3115/v1/W15-1510 @@ -2284,7 +2284,7 @@ Simple Semi-Supervised <fixed-case>POS</fixed-case> Tagging KarlStratos - MichaelCollins + MichaelCollins 79–87 W15-1511 10.3115/v1/W15-1511 @@ -2293,8 +2293,8 @@ Learning Distributed Representations for Multilingual Text Sequences HieuPham - ThangLuong - ChristopherManning + ThangLuong + ChristopherManning 88–94 W15-1512 10.3115/v1/W15-1512 @@ -2331,7 +2331,7 @@ A Vector Space Approach for Aspect Based Sentiment Analysis AbdulazizAlghunaim MitraMohtarami - ScottCyphers + ScottCyphers JimGlass 116–122 W15-1516 @@ -2362,7 +2362,7 @@ Towards Combined Matrix and Tensor Factorization for Universal Schema Relation Extraction SameerSingh - TimRocktäschel + TimRocktäschel SebastianRiedel 135–142 W15-1519 @@ -2372,8 +2372,8 @@ Neural word embeddings with multiplicative feature interactions for tensor-based compositions Joo-KyungKim - Marie-Catherinede Marneffe - EricFosler-Lussier + Marie-Catherinede Marneffe + EricFosler-Lussier 143–150 W15-1520 10.3115/v1/W15-1520 @@ -2381,9 +2381,9 @@ Bilingual Word Representations with Monolingual Quality in Mind - ThangLuong + ThangLuong HieuPham - Christopher D.Manning + Christopher D.Manning 151–159 W15-1521 10.3115/v1/W15-1521 @@ -2394,7 +2394,7 @@ MaheshJoshi EthanHart MirkoVogel - Jean-DavidRuvini + Jean-DavidRuvini 160–167 W15-1522 10.3115/v1/W15-1522 @@ -2412,7 +2412,7 @@ Named Entity Recognition for <fixed-case>A</fixed-case>rabic Social Media AyahZirikly - MonaDiab + MonaDiab 176–185 W15-1524 10.3115/v1/W15-1524 @@ -2420,8 +2420,8 @@ Vector Space Models for Scientific Document Summarization - JohnConroy - SashkaDavis + JohnConroy + SashkaDavis 186–191 W15-1525 10.3115/v1/W15-1525 @@ -2438,7 +2438,7 @@ Estimating User Location in Social Media with Stacked Denoising Auto-encoders JiLiu - DianaInkpen + DianaInkpen 201–210 W15-1527 10.3115/v1/W15-1527 @@ -2449,7 +2449,7 @@ Proceedings of the 9th Linguistic Annotation Workshop W15-16 - AdamMeyers + AdamMeyers InesRehbein HeikeZinsmeister 10.3115/v1/W15-16 @@ -2466,9 +2466,9 @@ Scaling Semantic Frame Annotation NancyChang - PraveenParitosh + PraveenParitosh DavidHuynh - Collin F.Baker + Collin F.Baker 1–10 W15-1601 10.3115/v1/W15-1601 @@ -2477,7 +2477,7 @@ An Analytic and Empirical Evaluation of Return-on-Investment-Based Active Learning RobbieHaertel - EricRingger + EricRingger KevinSeppi PaulFelt 11–20 @@ -2501,7 +2501,7 @@ DominiqueBrunato FeliceDell’Orletta GiuliaVenturi - SimonettaMontemagni + SimonettaMontemagni 31–41 W15-1604 10.3115/v1/W15-1604 @@ -2531,19 +2531,19 @@ A Qualitative Analysis of a Corpus of Opinion Summaries based on Aspects - RoqueLópez + RoqueLópez ThiagoPardo LucasAvanço - PedroFilho + PedroFilho AlessandroBokan - PaulaCardoso + PaulaCardoso MárcioDias FernandoNóbrega MarcoCabezudo - JacksonSouza + JacksonSouza AndressaZacarias EloizeSeno - ArianiDi Felippo + ArianiDi Felippo 62–71 W15-1607 10.3115/v1/W15-1607 @@ -2564,7 +2564,7 @@ Annotating Geographical Entities on Microblog Text KojiMatsuda AkiraSasaki - NaoakiOkazaki + NaoakiOkazaki KentaroInui 85–94 W15-1609 @@ -2575,8 +2575,8 @@ The Annotation Process of the <fixed-case>ITU</fixed-case> Web Treebank TuğbaPamay UmutSulubacak - DilaraTorunoğlu-Selamet - GülşenEryiğit + DilaraTorunoğlu-Selamet + GülşenEryiğit 95–101 W15-1610 10.3115/v1/W15-1610 @@ -2599,7 +2599,7 @@ NathanSchneider VivekSrikumar Jena D.Hwang - MarthaPalmer + MarthaPalmer 112–123 W15-1612 10.3115/v1/W15-1612 @@ -2607,11 +2607,11 @@ Bilingual <fixed-case>E</fixed-case>nglish-<fixed-case>C</fixed-case>zech Valency Lexicon Linked to a Parallel Corpus - ZdeňkaUrešová + ZdeňkaUrešová OndřejDušek - EvaFučíková - JanHajič - JanaŠindlerová + EvaFučíková + JanHajič + JanaŠindlerová 124–128 W15-1613 10.3115/v1/W15-1613 @@ -2649,9 +2649,9 @@ Non-canonical language is not harder to annotate than canonical language - BarbaraPlank - HéctorMartínez Alonso - AndersSøgaard + BarbaraPlank + HéctorMartínez Alonso + AndersSøgaard 148–151 W15-1617 10.3115/v1/W15-1617 @@ -2678,7 +2678,7 @@ Across Languages and Genres: Creating a Universal Annotation Scheme for Textual Relations EkaterinaLapshinova-Koltunski AnnaNedoluzhko - Kerstin AnnaKunz + Kerstin AnnaKunz 168–177 W15-1620 10.3115/v1/W15-1620 @@ -2686,7 +2686,7 @@ Annotating the Implicit Content of Sluices - PranavAnand + PranavAnand JimMcCloskey 178–187 W15-1621 @@ -2696,8 +2696,8 @@ Annotating Causal Language Using Corpus Lexicography of Constructions JesseDunietz - LoriLevin - JaimeCarbonell + LoriLevin + JaimeCarbonell 188–196 W15-1622 10.3115/v1/W15-1622 @@ -2708,7 +2708,7 @@ Proceedings of the third International Workshop on Natural Language Processing for Social Media W15-17 - Shou-deLin + Shou-deLin Lun-WeiKu Cheng-TeLi ErikCambria @@ -2778,7 +2778,7 @@ Proceedings of the 20th Nordic Conference of Computational Linguistics (NODALIDA 2015) W15-18 - BeátaMegyesi + BeátaMegyesi Linköping University Electronic Press, Sweden
Vilnius, Lithuania
May @@ -2831,21 +2831,21 @@
Supersense tagging for <fixed-case>D</fixed-case>anish - HéctorMartínez Alonso - AndersJohannsen + HéctorMartínez Alonso + AndersJohannsen SussiOlsen SanniNimb NicolaiHartvig Sørensen AnnaBraasch - AndersSøgaard - BoletteSandford Pedersen + AndersSøgaard + BoletteSandford Pedersen 21–29 W15-1806 martinez-alonso-etal-2015-supersense <fixed-case>CG</fixed-case>-3 — Beyond Classical Constraint Grammar - EckhardBick + EckhardBick TinoDidriksen 31–39 W15-1807 @@ -2853,7 +2853,7 @@ Automatic Lemmatisation of <fixed-case>L</fixed-case>ithuanian <fixed-case>MWE</fixed-case>s - LoïcBoizou + LoïcBoizou JolantaKovalevskaitė ErikaRimkutė 41–49 @@ -2871,7 +2871,7 @@ Resolving Spatial References using Crowdsourced Geographical Data - JanaGötze + JanaGötze JohanBoye 61–68 W15-1810 @@ -2887,14 +2887,14 @@ Talebob - an Interactive Speech Trainer for <fixed-case>D</fixed-case>anish - Peter JuelHenrichsen + Peter JuelHenrichsen 79–86 W15-1812 henrichsen-2015-talebob The Effect of Author Set Size in Authorship Attribution for <fixed-case>L</fixed-case>ithuanian - JurgitaKapočiūtė-Dzikienė + JurgitaKapočiūtė-Dzikienė LigitaŠarkutė AndriusUtka 87–96 @@ -2904,8 +2904,8 @@ Looking hard: Eye tracking for detecting grammaticality of automatically compressed sentences SigridKlerke - HéctorMartínez Alonso - AndersSøgaard + HéctorMartínez Alonso + AndersSøgaard 97–105 W15-1814 klerke-etal-2015-looking @@ -2926,7 +2926,7 @@ Improving cross-domain dependency parsing with dependency-derived clusters JosteinLien ErikVelldal - LiljaØvrelid + LiljaØvrelid 117–126 W15-1816 lien-etal-2015-improving @@ -2950,7 +2950,7 @@ Topic Models: Accounting Component Structure of Bigrams MichaelNokel - NataliaLoukachevitch + NataliaLoukachevitch 145–152 W15-1819 nokel-loukachevitch-2015-topic @@ -2977,7 +2977,7 @@ Automatic word stress annotation of <fixed-case>R</fixed-case>ussian unrestricted text RobertReynolds - FrancisTyers + FrancisTyers 173–180 W15-1822 reynolds-tyers-2015-automatic @@ -2988,7 +2988,7 @@ YukiAsano ChristianRohrdantz FelixHamborg - DanielKeim + DanielKeim BettinaBraun MiriamButt 181–189 @@ -2997,7 +2997,7 @@ Improving the Cross-Lingual Projection of Syntactic Dependencies - JörgTiedemann + JörgTiedemann 191–199 W15-1824 tiedemann-2015-improving @@ -3005,7 +3005,7 @@ Assessing the Performance of Automatic Speech Recognition Systems When Used by Native and Non-Native Speakers of Three Major Languages in Dictation Workflows JuliánZapata - AndreasSøeborg Kirkedal + AndreasSøeborg Kirkedal 201–210 W15-1825 zapata-soeborg-kirkedal-2015-assessing @@ -3023,7 +3023,7 @@ Automatic conversion of colloquial Finnishto standard <fixed-case>F</fixed-case>innish InariListenmaa - Francis M.Tyers + Francis M.Tyers 219–223 W15-1827 listenmaa-tyers-2015-automatic @@ -3041,7 +3041,7 @@ Sentiment analysis on conversational texts BirgittaOjamaa - Päivi KristiinaJokinen + Päivi KristiinaJokinen KadriMuischenk 233–237 W15-1829 @@ -3056,10 +3056,10 @@ Active learning for sense annotation - HéctorMartínez Alonso - BarbaraPlank - AndersJohannsen - AndersSøgaard + HéctorMartínez Alonso + BarbaraPlank + AndersJohannsen + AndersSøgaard 245–249 W15-1831 martinez-alonso-etal-2015-active @@ -3074,12 +3074,12 @@ A multivariate model for classifying texts’ readability - KatarinaHeimann Mühlenbock - SofieJohansson Kokkinakis + KatarinaHeimann Mühlenbock + SofieJohansson Kokkinakis CarolineLiberg Åsaaf Geijerstam JennyWiksten Folkeryd - ArneJönsson + ArneJönsson ErikKanebrant JohanFalkenjack 257–261 @@ -3097,7 +3097,7 @@ Using Positional Suffix Trees to Perform Agile Tree Kernel Calculation - GustavoHenrique Paetzold + GustavoHenrique Paetzold 269–273 W15-1835 henrique-paetzold-2015-using @@ -3121,7 +3121,7 @@ Analysing Inconsistencies and Errors in <fixed-case>P</fixed-case>o<fixed-case>S</fixed-case> Tagging in two <fixed-case>I</fixed-case>celandic Gold Standards SteinþórSteingrímsson SigrúnHelgadóttir - EiríkurRögnvaldsson + EiríkurRögnvaldsson 287–291 W15-1838 steingrimsson-etal-2015-analysing @@ -3138,7 +3138,7 @@ The Corpus of <fixed-case>A</fixed-case>merican <fixed-case>N</fixed-case>orwegian Speech (<fixed-case>CANS</fixed-case>) - Janne BondiJohannessen + Janne BondiJohannessen 297–300 W15-1840 johannessen-2015-corpus @@ -3153,8 +3153,8 @@ Extracting Semantic Frames using hfst-pmatch SamHardwick - MiikkaSilfverberg - KristerLindén + MiikkaSilfverberg + KristerLindén 305–308 W15-1842 hardwick-etal-2015-extracting @@ -3168,7 +3168,7 @@ <fixed-case>O</fixed-case>morfi — Free and open source morphological lexical database for <fixed-case>F</fixed-case>innish - Tommi APirinen + Tommi APirinen 313–315 W15-1844 pirinen-2015-omorfi @@ -3176,7 +3176,7 @@ A Tool for Automatic Simplification of <fixed-case>S</fixed-case>wedish Texts EvelinaRennes - ArneJönsson + ArneJönsson 317–320 W15-1845 rennes-jonsson-2015-tool @@ -3209,7 +3209,7 @@ Taking the <fixed-case>D</fixed-case>anish Speech Trainer from <fixed-case>CALL</fixed-case> to <fixed-case>ICALL</fixed-case> - Peter JuelHenrichsen + Peter JuelHenrichsen 11–20 W15-1902 henrichsen-2015-taking @@ -3233,7 +3233,7 @@ Short Answer Grading: When Sorting Helps and When it Doesn’t - UlrikePado + UlrikePado CorneliaKiefer 42–50 W15-1905 @@ -3254,7 +3254,7 @@ Proceedings of the workshop on Semantic resources and semantic annotation for Natural Language Processing and the Digital Humanities at NODALIDA 2015 W15-20 - Bolette SandfordPedersen + Bolette SandfordPedersen SussiOlsen LarsBorin Northern European Association for Language Technology @@ -3279,7 +3279,7 @@ Polysemy, underspecification, and aspects – Questions of lumping or splitting in the construction of <fixed-case>S</fixed-case>wedish <fixed-case>F</fixed-case>rame<fixed-case>N</fixed-case>et - KarinFriberg Heppin + KarinFriberg Heppin DanaDannélls 12–20 W15-2002 @@ -3287,7 +3287,7 @@ Determining the most frequent senses using <fixed-case>R</fixed-case>ussian linguistic ontology <fixed-case>R</fixed-case>u<fixed-case>T</fixed-case>hes - NataliaLoukachevitch + NataliaLoukachevitch IliaChetviorkin 21–27 W15-2003 @@ -3305,8 +3305,8 @@ Coarse-grained sense annotation of <fixed-case>D</fixed-case>anish across textual domains SussiOlsen Bolette S.Pedersen - HéctorMartínez Alonso - AndersJohannsen + HéctorMartínez Alonso + AndersJohannsen 36–43 W15-2005 olsen-etal-2015-coarse @@ -3317,7 +3317,7 @@ Proceedings of the Third International Conference on Dependency Linguistics (Depling 2015) W15-21 JoakimNivre - EvaHajičová + EvaHajičová Uppsala University, Uppsala, Sweden
Uppsala, Sweden
August @@ -3330,14 +3330,14 @@ Invited Talk: The Case for <fixed-case>U</fixed-case>niversal <fixed-case>D</fixed-case>ependencies - ChristopherManning + ChristopherManning 1 W15-2101 manning-2015-invited Invited Talk: Lexicon Embedded Syntax - AlainPolguère + AlainPolguère 2–9 W15-2102 polguere-2015-invited @@ -3351,7 +3351,7 @@ Targeted Paraphrasing on Deep Syntactic Layer for <fixed-case>MT</fixed-case> Evaluation - PetraBarančíková + PetraBarančíková RudolfRosa 20–27 W15-2104 @@ -3359,7 +3359,7 @@ Universal and Language-specific Dependency Relations for Analysing <fixed-case>R</fixed-case>omanian - VerginicaBarbu Mititelu + VerginicaBarbu Mititelu CătălinaMărănduc ElenaIrimia 28–37 @@ -3369,7 +3369,7 @@ Emotion and Inner State Adverbials in <fixed-case>R</fixed-case>ussian OlgaBoguslavskaya - IgorBoguslavsky + IgorBoguslavsky 38–47 W15-2106 boguslavskaya-boguslavsky-2015-emotion @@ -3387,7 +3387,7 @@ A <fixed-case>B</fixed-case>ayesian Model for Generative Transition-based Dependency Parsing JanBuys - PhilBlunsom + PhilBlunsom 58–67 W15-2108 buys-blunsom-2015-bayesian @@ -3413,11 +3413,11 @@ Using Parallel Texts and Lexicons for Verbal Word Sense Disambiguation OndřejDušek - EvaFučíková - JanHajič + EvaFučíková + JanHajič MartinPopel - JanaŠindlerová - ZdeňkaUrešová + JanaŠindlerová + ZdeňkaUrešová 82–90 W15-2111 dusek-etal-2015-using @@ -3459,7 +3459,7 @@ Reconstructions of Deletions in a Dependency-based Description of <fixed-case>C</fixed-case>zech: Selected Issues EvaHajičová MarieMikulová - JarmilaPanevová + JarmilaPanevová 131–140 W15-2116 hajicova-etal-2015-reconstructions @@ -3489,9 +3489,9 @@ Towards Cross-language Application of Dependency Grammar - TimoJärvinen + TimoJärvinen ElisabethBertol - SeptinaLarasati + SeptinaLarasati Monica-MihaelaRizea MariaRuiz Santabalbina MilanSouček @@ -3510,7 +3510,7 @@ At the Lexicon-Grammar Interface: The Case of Complex Predicates in the Functional Generative Description VáclavaKettnerová - MarkétaLopatková + MarkétaLopatková 191–200 W15-2122 kettnerova-lopatkova-2015-lexicon @@ -3518,8 +3518,8 @@ Enhancing <fixed-case>F</fixed-case>ree<fixed-case>L</fixed-case>ing Rule-Based Dependency Grammars with Subcategorization Frames MarinaLloberes - IreneCastellón - LluísPadró + IreneCastellón + LluísPadró 201–210 W15-2123 lloberes-etal-2015-enhancing @@ -3552,7 +3552,7 @@ A Historical Overview of the Status of Function Words in Dependency Grammar TimothyOsborne - DanielMaxwell + DanielMaxwell 241–250 W15-2127 osborne-maxwell-2015-historical @@ -3607,14 +3607,14 @@ Does <fixed-case>U</fixed-case>niversal <fixed-case>D</fixed-case>ependencies need a parsing representation? An investigation of <fixed-case>E</fixed-case>nglish NataliaSilveira - ChristopherManning + ChristopherManning 310–319 W15-2134 silveira-manning-2015-universal Catena Operations for Unified Dependency Analysis - KirilSimov + KirilSimov PetyaOsenova 320–329 W15-2135 @@ -3624,16 +3624,16 @@ Zero Alignment of Verb Arguments in a Parallel Treebank - JanaŠindlerová - EvaFučíková - ZdeňkaUrešová + JanaŠindlerová + EvaFučíková + ZdeňkaUrešová 330–339 W15-2136 sindlerova-etal-2015-zero Cross-Lingual Dependency Parsing with <fixed-case>U</fixed-case>niversal <fixed-case>D</fixed-case>ependencies and Predicted <fixed-case>P</fixed-case>o<fixed-case>S</fixed-case> Labels - JörgTiedemann + JörgTiedemann 340–349 W15-2137 tiedemann-2015-cross @@ -3665,7 +3665,7 @@ Domain Adaptation for Dependency Parsing via Self-Training JuntaoYu - MohabElkaref + MohabElkaref BerndBohnet 1–10 W15-2201 @@ -3721,8 +3721,8 @@ Suitability of <fixed-case>P</fixed-case>ar<fixed-case>T</fixed-case>es Test Suite for Parsing Evaluation MarinaLloberes - IreneCastellón - LluísPadró + IreneCastellón + LluísPadró 61–65 W15-2207 10.18653/v1/W15-2207 @@ -3733,7 +3733,7 @@ AkifumiYoshimoto KazuoHara MasashiShimbo - YujiMatsumoto + YujiMatsumoto 66–70 W15-2208 10.18653/v1/W15-2208 @@ -3742,7 +3742,7 @@ <fixed-case>MSTP</fixed-case>arser Model Interpolation for Multi-Source Delexicalized Transfer RudolfRosa - ZdeněkŽabokrtský + ZdeněkŽabokrtský 71–75 W15-2209 10.18653/v1/W15-2209 @@ -3776,7 +3776,7 @@ <fixed-case>CKY</fixed-case> Parsing with Independence Constraints JosephIrwin - YujiMatsumoto + YujiMatsumoto 97–106 W15-2213 10.18653/v1/W15-2213 @@ -3792,9 +3792,9 @@ Stacking or Supertagging for Dependency Parsing – What’s the Difference? - AgnieszkaFaleńska + AgnieszkaFaleńska AndersBjörkelund - ÖzlemÇetinoğlu + ÖzlemÇetinoğlu WolfgangSeeker 118–129 W15-2215 @@ -3827,7 +3827,7 @@ JamesMonette GianpaulRachiele AunikaWarren - ChongZhang + ChongZhang 1–14 W15-2301 10.3115/v1/W15-2301 @@ -3843,7 +3843,7 @@ Topology of Language Classes - Sean A.Fulop + Sean A.Fulop DavidKephart 26–38 W15-2303 @@ -3871,7 +3871,7 @@ A <fixed-case>F</fixed-case>robenius Model of Information Structure in Categorical Compositional Distributional Semantics DimitriKartsaklis - MehrnooshSadrzadeh + MehrnooshSadrzadeh 62–74 W15-2306 10.3115/v1/W15-2306 @@ -3943,7 +3943,7 @@ Proceedings of the Sixth Workshop on Cognitive Aspects of Computational Language Learning W15-24 - RobertBerwick + RobertBerwick AnnaKorhonen AlessandroLenci ThierryPoibeau @@ -3962,7 +3962,7 @@ Using reading behavior to predict grammatical functions MariaBarrett - AndersSøgaard + AndersSøgaard 1–5 W15-2401 10.18653/v1/W15-2401 @@ -3973,7 +3973,7 @@ SigridKlerke SheilaCastilho MariaBarrett - AndersSøgaard + AndersSøgaard 6–13 W15-2402 10.18653/v1/W15-2402 @@ -3981,9 +3981,9 @@ Evaluating Models of Computation and Storage in Human Sentence Processing - ThangLuong - TimothyO’Donnell - NoahGoodman + ThangLuong + TimothyO’Donnell + NoahGoodman 14–21 W15-2403 10.18653/v1/W15-2403 @@ -4003,7 +4003,7 @@ Towards a Model of Prediction-based Syntactic Category Acquisition: First Steps with Word Embeddings RobertGrimm GiovanniCassani - WalterDaelemans + WalterDaelemans StevenGillis 28–32 W15-2405 @@ -4014,7 +4014,7 @@ Which distributional cues help the most? Unsupervised contexts selection for lexical category acquisition GiovanniCassani RobertGrimm - WalterDaelemans + WalterDaelemans StevenGillis 33–39 W15-2406 @@ -4084,7 +4084,7 @@ Modeling dative alternations of individual children - Antalvan den Bosch + Antalvan den Bosch JoanBresnan 103–112 W15-2414 @@ -4096,9 +4096,9 @@ Proceedings of the Second Workshop on Discourse in Machine Translation W15-25 - BonnieWebber + BonnieWebber MarineCarpuat - AndreiPopescu-Belis + AndreiPopescu-Belis ChristianHardmeier 10.18653/v1/W15-25 Association for Computational Linguistics @@ -4114,9 +4114,9 @@ Pronoun-Focused <fixed-case>MT</fixed-case> and Cross-Lingual Pronoun Prediction: Findings of the 2015 <fixed-case>D</fixed-case>isco<fixed-case>MT</fixed-case> Shared Task on Pronoun Translation ChristianHardmeier - PreslavNakov + PreslavNakov SaraStymne - JörgTiedemann + JörgTiedemann YannickVersley MauroCettolo 1–16 @@ -4128,7 +4128,7 @@ Comparison of Coreference Resolvers for Deep Syntax Translation MichalNovák DiekeOele - Gertjanvan Noord + Gertjanvan Noord 17–23 W15-2502 10.18653/v1/W15-2502 @@ -4145,9 +4145,9 @@ Document-Level Machine Translation Evaluation with Gist Consistency and Text Cohesion - ZhengxianGong + ZhengxianGong MinZhang - GuodongZhou + GuodongZhou 33–40 W15-2504 10.18653/v1/W15-2504 @@ -4175,7 +4175,7 @@ A Proposal for a Coherence Corpus in Machine Translation - KarinSim Smith + KarinSim Smith WilkerAziz LuciaSpecia 52–58 @@ -4187,7 +4187,7 @@ Part-of-Speech Driven Cross-Lingual Pronoun Prediction with Feed-Forward Neural Networks JimmyCallin ChristianHardmeier - JörgTiedemann + JörgTiedemann 59–64 W15-2508 10.18653/v1/W15-2508 @@ -4220,7 +4220,7 @@ Rule-Based Pronominal Anaphora Treatment for Machine Translation SharidLoáiciga - ÉricWehrli + ÉricWehrli 86–93 W15-2512 10.18653/v1/W15-2512 @@ -4228,8 +4228,8 @@ Pronoun Translation and Prediction with or without Coreference Links - Ngoc QuangLuong - LeslyMiculicich Werlen + Ngoc QuangLuong + LeslyMiculicich Werlen AndreiPopescu-Belis 94–100 W15-2513 @@ -4238,8 +4238,8 @@ Predicting Pronouns across Languages with Continuous Word Spaces - Ngoc-QuanPham - Lonnekevan der Plas + Ngoc-QuanPham + Lonnekevan der Plas 101–107 W15-2514 10.18653/v1/W15-2514 @@ -4247,7 +4247,7 @@ Baseline Models for Pronoun Prediction and Pronoun-Aware Translation - JörgTiedemann + JörgTiedemann 108–114 W15-2515 10.18653/v1/W15-2515 @@ -4286,7 +4286,7 @@ Crosslingual Annotation and Analysis of Implicit Discourse Connectives for Machine Translation FrancesYung KevinDuh - YujiMatsumoto + YujiMatsumoto 142–152 W15-2519 10.18653/v1/W15-2519 @@ -4323,9 +4323,9 @@ Proceedings of the Sixth International Workshop on Health Text Mining and Information Analysis W15-26 CyrilGrouin - ThierryHamon - AurélieNévéol - PierreZweigenbaum + ThierryHamon + AurélieNévéol + PierreZweigenbaum 10.18653/v1/W15-26 Association for Computational Linguistics
Lisbon, Portugal
@@ -4341,7 +4341,7 @@ In-depth annotation for patient level liver cancer staging Wen-waiYim SharonKwan - MelihaYetisgen + MelihaYetisgen 1–11 W15-2601 10.18653/v1/W15-2601 @@ -4381,8 +4381,8 @@
An Analysis of Biomedical Tokenization: Problems and Strategies - Noa P.Cruz Díaz - ManuelMaña López + Noa P.Cruz Díaz + ManuelMaña López 40–49 W15-2605 10.18653/v1/W15-2605 @@ -4391,7 +4391,7 @@ Annotation of Clinically Important Follow-up Recommendations in Radiology Reports - MelihaYetisgen + MelihaYetisgen PrescottKlassen LucasMcCarthy ElenaPellicer @@ -4416,7 +4416,7 @@ Exploring Word Embedding for Drug Name Recognition - IsabelSegura-Bedmar + IsabelSegura-Bedmar VíctorSuárez-Paniagua PalomaMartínez 64–72 @@ -4436,7 +4436,7 @@ Parser Adaptation to the Biomedical Domain without Re-Training JeffMitchell - MarkSteedman + MarkSteedman 79–89 W15-2610 10.18653/v1/W15-2610 @@ -4477,7 +4477,7 @@ Effectively Crowdsourcing Radiology Report Annotations AnneCocos - AaronMasino + AaronMasino TingQian ElliePavlick ChrisCallison-Burch @@ -4498,7 +4498,7 @@ Information Extraction from Biomedical Texts: Learning Models with Limited Supervision - Marie-FrancineMoens + Marie-FrancineMoens 120 W15-2616 10.18653/v1/W15-2616 @@ -4518,7 +4518,7 @@ GiuliaVenturi TommasoBellandi FeliceDell’Orletta - SimonettaMontemagni + SimonettaMontemagni 131–141 W15-2618 10.18653/v1/W15-2618 @@ -4526,7 +4526,7 @@ Mining and Ranking Biomedical Synonym Candidates from <fixed-case>W</fixed-case>ikipedia - AbhyudayJagannatha + AbhyudayJagannatha JinyingChen HongYu 142–151 @@ -4549,8 +4549,8 @@ W15-27 MichaelRoth AnnieLouis - BonnieWebber - TimBaldwin + BonnieWebber + TimBaldwin 10.18653/v1/W15-27 Association for Computational Linguistics
Lisbon, Portugal
@@ -4588,7 +4588,7 @@ Recovering discourse relations: Varying influence of discourse adverbials HannahRohde AnnaDickinson - ChrisClark + ChrisClark AnnieLouis BonnieWebber 22–31 @@ -4598,7 +4598,7 @@
Semantics and Discourse Processing for Expressive <fixed-case>TTS</fixed-case> - RodolfoDelmonte + RodolfoDelmonte RoccoTripodi 32–43 W15-2704 @@ -4632,8 +4632,8 @@ RashmiPrasad BonnieWebber AlanLee - SameerPradhan - AravindJoshi + SameerPradhan + AravindJoshi 64–69 W15-2707 10.18653/v1/W15-2707 @@ -4643,7 +4643,7 @@ Lexical Level Distribution of Metadiscourse in Spoken Language RuiCorreia MaxineEskenazi - NunoMamede + NunoMamede 70–75 W15-2708 10.18653/v1/W15-2708 @@ -4653,7 +4653,7 @@ Idiom Paraphrases: Seventh Heaven vs Cloud Nine MariaPershina YifanHe - RalphGrishman + RalphGrishman 76–82 W15-2709 10.18653/v1/W15-2709 @@ -4672,10 +4672,10 @@ Predicting word sense annotation agreement - HéctorMartínez Alonso - AndersJohannsen - OierLopez de Lacalle - EnekoAgirre + HéctorMartínez Alonso + AndersJohannsen + OierLopez de Lacalle + EnekoAgirre 89–94 W15-2711 10.18653/v1/W15-2711 @@ -4683,9 +4683,9 @@ Distributional Semantics in Use - RaffaellaBernardi - GemmaBoleda - RaquelFernández + RaffaellaBernardi + GemmaBoleda + RaquelFernández DenisPaperno 95–101 W15-2712 @@ -4697,10 +4697,10 @@ Proceedings of the Fourth Workshop on Vision and Language W15-28 - AnjaBelz - LuisaCoheur + AnjaBelz + LuisaCoheur VittorioFerrari - Marie-FrancineMoens + Marie-FrancineMoens KaterinaPastra IvanVulić 10.18653/v1/W15-28 @@ -4725,10 +4725,10 @@ Computational Integration of Human Vision and Natural Language through Bitext Alignment PreethiVaidyanathan - EmilyPrud’hommeaux - CeciliaO. Alm + EmilyPrud’hommeaux + CeciliaO. Alm Jeff B.Pelz - Anne R.Haake + Anne R.Haake 4–5 W15-2802 10.18653/v1/W15-2802 @@ -4749,7 +4749,7 @@ Lingusitic Analysis of Multi-Modal Recurrent Neural Networks ÁkosKádár - GrzegorzChrupała + GrzegorzChrupała AfraAlishahi 8–9 W15-2804 @@ -4758,7 +4758,7 @@ Defining Visually Descriptive Language - RobertGaizauskas + RobertGaizauskas JosiahWang ArnauRamisa 10–17 @@ -4770,7 +4770,7 @@ Semantic Tuples for Evaluation of Image to Sentence Generation Lily D.Ellebracht ArnauRamisa - Pranava SwaroopMadhyastha + Pranava SwaroopMadhyastha JoseCordero-Rama FrancescMoreno-Noguer AriadnaQuattoni @@ -4793,7 +4793,7 @@ Image with a Message: Towards Detecting Non-Literal Image Usages by Visual Linking LydiaWeiland LauraDietz - Simone PaoloPonzetto + Simone PaoloPonzetto 40–47 W15-2808 10.18653/v1/W15-2808 @@ -4832,9 +4832,9 @@ Generating Semantically Precise Scene Graphs from Textual Descriptions for Improved Image Retrieval SebastianSchuster RanjayKrishna - AngelChang + AngelChang LiFei-Fei - Christopher D.Manning + Christopher D.Manning 70–80 W15-2812 10.18653/v1/W15-2812 @@ -4844,7 +4844,7 @@ Do Distributed Semantic Models Dream of Electric Sheep? Visualizing Word Representations through Image Synthesis AngelikiLazaridou DatTien Nguyen - MarcoBaroni + MarcoBaroni 81–86 W15-2813 10.18653/v1/W15-2813 @@ -4888,10 +4888,10 @@ Proceedings of the 6th Workshop on Computational Approaches to Subjectivity, Sentiment and Social Media Analysis W15-29 - AlexandraBalahur - Erikvan der Goot + AlexandraBalahur + Erikvan der Goot PiekVossen - AndresMontoyo + AndresMontoyo 10.18653/v1/W15-29 Association for Computational Linguistics
Lisboa, Portugal
@@ -4914,7 +4914,7 @@ Sentiment Analysis on Monolingual, Multilingual and Code-Switching <fixed-case>T</fixed-case>witter Corpora DavidVilares - Miguel A.Alonso + Miguel A.Alonso CarlosGómez-Rodríguez 2–8 W15-2902 @@ -4934,7 +4934,7 @@ Enhanced <fixed-case>T</fixed-case>witter Sentiment Classification Using Contextual Information SoroushVosoughi HelenZhou - DebRoy + DebRoy 16–24 W15-2904 10.18653/v1/W15-2904 @@ -4944,8 +4944,8 @@ Your Sentiment Precedes You: Using an author’s historical tweets to predict sarcasm AnupamKhattri AdityaJoshi - PushpakBhattacharyya - MarkCarman + PushpakBhattacharyya + MarkCarman 25–30 W15-2905 10.18653/v1/W15-2905 @@ -4954,7 +4954,7 @@ Optimising Agile Social Media Analysis ThomasKober - DavidWeir + DavidWeir 31–40 W15-2906 10.18653/v1/W15-2906 @@ -5002,7 +5002,7 @@ Analysing domain suitability of a sentiment lexicon by identifying distributionally bipolar words LucieFlekova - DanielPreoţiuc-Pietro + DanielPreoţiuc-Pietro EugenRuppert 77–84 W15-2911 @@ -5011,7 +5011,7 @@ <fixed-case>I</fixed-case>magisaurus: An Interactive Visualizer of Valence and Emotion in the <fixed-case>R</fixed-case>oget’s Thesaurus - SaifMohammad + SaifMohammad 85–91 W15-2912 10.18653/v1/W15-2912 @@ -5019,7 +5019,7 @@ Personality Traits on <fixed-case>T</fixed-case>witter—or—<fixed-case>H</fixed-case>ow to Get 1,500 Personality Tests in a Week - BarbaraPlank + BarbaraPlank DirkHovy 92–98 W15-2913 @@ -5030,7 +5030,7 @@ Negation Scope Detection for <fixed-case>T</fixed-case>witter Sentiment Analysis JohanReitan JørgenFaret - BjörnGambäck + BjörnGambäck LarsBungum 99–108 W15-2914 @@ -5041,7 +5041,7 @@ A Linguistically Informed Convolutional Neural Network SebastianEbert Ngoc ThangVu - HinrichSchütze + HinrichSchütze 109–114 W15-2915 10.18653/v1/W15-2915 @@ -5050,7 +5050,7 @@ How much does word sense disambiguation help in sentiment analysis of micropost data? ChiraagSumanth - DianaInkpen + DianaInkpen 115–121 W15-2916 10.18653/v1/W15-2916 @@ -5068,7 +5068,7 @@ Beyond Sentiment: Social Psychological Analysis of Political <fixed-case>F</fixed-case>acebook Comments in <fixed-case>H</fixed-case>ungary MártonMiháltz - TamásVáradi + TamásVáradi IstvánCsertő ÉvaFülöp TiborPólya @@ -5139,7 +5139,7 @@ Sentiment Classification via a Response Recalibration Framework PhillipSmith - MarkLee + MarkLee 175–180 W15-2925 10.18653/v1/W15-2925 @@ -5150,11 +5150,11 @@ Proceedings of the Tenth Workshop on Statistical Machine Translation W15-30 - OndřejBojar - RajanChatterjee + OndřejBojar + RajanChatterjee ChristianFedermann BarryHaddow - ChrisHokamp + ChrisHokamp MatthiasHuck VarvaraLogacheva PavelPecina @@ -5180,9 +5180,9 @@ PhilippKoehn VarvaraLogacheva ChristofMonz - MatteoNegri + MatteoNegri MattPost - CarolinaScarton + CarolinaScarton LuciaSpecia MarcoTurchi 1–46 @@ -5205,7 +5205,7 @@ AmittaiAxelrod PhilipResnik XiaodongHe - MariOstendorf + MariOstendorf 58–65 W15-3003 10.18653/v1/W15-3003 @@ -5214,7 +5214,7 @@ <fixed-case>DFKI</fixed-case>’s experimental hybrid <fixed-case>MT</fixed-case> system for <fixed-case>WMT</fixed-case> 2015 EleftheriosAvramidis - MajaPopović + MajaPopović AljoschaBurchardt 66–73 W15-3004 @@ -5223,7 +5223,7 @@ <fixed-case>P</fixed-case>ar<fixed-case>FDA</fixed-case> for Fast Deployment of Accurate Statistical Machine Translation Systems, Benchmarks, and Statistics - ErgunBiçici + ErgunBiçici QunLiu AndyWay 74–78 @@ -5245,7 +5245,7 @@ FabienneCap MarionWeller AnitaRamm - AlexanderFraser + AlexanderFraser 84–91 W15-3007 10.18653/v1/W15-3007 @@ -5259,7 +5259,7 @@ TeresaHerrmann MohammedMediani YuqiZhang - AlexWaibel + AlexWaibel 92–97 W15-3008 10.18653/v1/W15-3008 @@ -5290,7 +5290,7 @@ The <fixed-case>AFRL</fixed-case>-<fixed-case>MITLL</fixed-case> <fixed-case>WMT</fixed-case>15 System: There’s More than One Way to Decode It! JeremyGwinnup - TimAnderson + TimAnderson GrantErdmann KatherineYoung ChristinaMay @@ -5305,12 +5305,12 @@ The <fixed-case>KIT</fixed-case>-<fixed-case>LIMSI</fixed-case> Translation System for <fixed-case>WMT</fixed-case> 2015 Thanh-LeHa - Quoc-KhanhDo + Quoc-KhanhDo EunahCho JanNiehues AlexandreAllauzen FrançoisYvon - AlexWaibel + AlexWaibel 120–125 W15-3012 10.18653/v1/W15-3012 @@ -5354,7 +5354,7 @@ BenjaminMarie AlexandreAllauzen FranckBurlot - Quoc-KhanhDo + Quoc-KhanhDo JuliaIve ElenaKnyazeva MatthieuLabeau @@ -5370,8 +5370,8 @@ <fixed-case>U</fixed-case>d<fixed-case>S</fixed-case>-Sant: <fixed-case>E</fixed-case>nglish–<fixed-case>G</fixed-case>erman Hybrid Machine Translation System SantanuPal - SudipNaskar - Josefvan Genabith + SudipNaskar + Josefvan Genabith 152–157 W15-3017 10.18653/v1/W15-3017 @@ -5382,7 +5382,7 @@ Jan-ThorstenPeter FarzadToutounchi JoernWuebker - HermannNey + HermannNey 158–163 W15-3018 10.18653/v1/W15-3018 @@ -5399,7 +5399,7 @@ <fixed-case>S</fixed-case>heffield Systems for the <fixed-case>F</fixed-case>innish-<fixed-case>E</fixed-case>nglish <fixed-case>WMT</fixed-case> Translation Task DavidSteele - KarinSim Smith + KarinSim Smith LuciaSpecia 172–176 W15-3020 @@ -5408,7 +5408,7 @@ Morphological Segmentation and <fixed-case>OPUS</fixed-case> for <fixed-case>F</fixed-case>innish-<fixed-case>E</fixed-case>nglish Machine Translation - JörgTiedemann + JörgTiedemann FilipGinter JennaKanerva 177–183 @@ -5418,11 +5418,11 @@ <fixed-case>A</fixed-case>bu-<fixed-case>M</fixed-case>a<fixed-case>T</fixed-case>ran at <fixed-case>WMT</fixed-case> 2015 Translation Task: Morphological Segmentation and Web Crawling - RaphaelRubino - TommiPirinen - MiquelEsplà-Gomis + RaphaelRubino + TommiPirinen + MiquelEsplà-Gomis NikolaLjubešić - SergioOrtiz-Rojas + SergioOrtiz-Rojas VassilisPapavassiliou ProkopisProkopidis AntonioToral @@ -5454,7 +5454,7 @@ <fixed-case>E</fixed-case>dinburgh’s Syntax-Based Systems at <fixed-case>WMT</fixed-case> 2015 PhilipWilliams RicoSennrich - MariaNadejde + MariaNadejde MatthiasHuck PhilippKoehn 199–209 @@ -5466,7 +5466,7 @@ The <fixed-case>FBK</fixed-case> Participation in the <fixed-case>WMT</fixed-case>15 Automatic Post-editing Shared Task RajenChatterjee MarcoTurchi - MatteoNegri + MatteoNegri 210–215 W15-3025 10.18653/v1/W15-3025 @@ -5476,8 +5476,8 @@ <fixed-case>USAAR</fixed-case>-<fixed-case>SAPE</fixed-case>: An <fixed-case>E</fixed-case>nglish–<fixed-case>S</fixed-case>panish Statistical Automatic Post-Editing System SantanuPal MihaelaVela - Sudip KumarNaskar - Josefvan Genabith + Sudip KumarNaskar + Josefvan Genabith 216–221 W15-3026 10.18653/v1/W15-3026 @@ -5513,9 +5513,9 @@ <fixed-case>L</fixed-case>ist<fixed-case>N</fixed-case>et-based <fixed-case>MT</fixed-case> Rescoring JanNiehues - Quoc KhanhDo + Quoc KhanhDo AlexandreAllauzen - AlexWaibel + AlexWaibel 248–255 W15-3030 10.18653/v1/W15-3030 @@ -5548,7 +5548,7 @@ JoernWuebker MiguelGraça YunsuKim - HermannNey + HermannNey 282–293 W15-3033 10.18653/v1/W15-3033 @@ -5558,7 +5558,7 @@ Investigations on Phrase-based Decoding with Recurrent Neural Network Language and Translation Models TamerAlkhouli FelixRietig - HermannNey + HermannNey 294–303 W15-3034 10.18653/v1/W15-3034 @@ -5566,7 +5566,7 @@ Referential Translation Machines for Predicting Translation Quality and Related Statistics - ErgunBiçici + ErgunBiçici QunLiu AndyWay 304–308 @@ -5576,9 +5576,9 @@ <fixed-case>UA</fixed-case>lacant word-level machine translation quality estimation system at <fixed-case>WMT</fixed-case> 2015 - MiquelEsplà-Gomis + MiquelEsplà-Gomis FelipeSánchez-Martínez - MikelForcada + MikelForcada 309–315 W15-3036 10.18653/v1/W15-3036 @@ -5614,7 +5614,7 @@ <fixed-case>USHEF</fixed-case> and <fixed-case>USAAR</fixed-case>-<fixed-case>USHEF</fixed-case> participation in the <fixed-case>WMT</fixed-case>15 <fixed-case>QE</fixed-case> shared task - CarolinaScarton + CarolinaScarton LilingTan LuciaSpecia 336–341 @@ -5626,9 +5626,9 @@ <fixed-case>SHEF</fixed-case>-<fixed-case>NN</fixed-case>: Translation Quality Estimation with Neural Networks KashifShah VarvaraLogacheva - GustavoPaetzold - FredericBlain - DanielBeck + GustavoPaetzold + FredericBlain + DanielBeck FethiBougares LuciaSpecia 342–347 @@ -5639,7 +5639,7 @@ Strategy-Based Technology for Estimating <fixed-case>MT</fixed-case> Quality LiugangShang - DongfengCai + DongfengCai DuoJi 348–352 W15-3042 @@ -5649,7 +5649,7 @@ <fixed-case>UGENT</fixed-case>-<fixed-case>LT</fixed-case>3 <fixed-case>SCATE</fixed-case> System for Machine Translation Quality Estimation ArdaTezcan - VeroniqueHoste + VeroniqueHoste BartDesmet LieveMacken 353–360 @@ -5669,8 +5669,8 @@ <fixed-case>VERT</fixed-case>a: a Linguistically-motivated Metric at the <fixed-case>WMT</fixed-case>15 Metrics Task - ElisabetComelles - JordiAtserias + ElisabetComelles + JordiAtserias 366–372 W15-3045 10.18653/v1/W15-3045 @@ -5679,8 +5679,8 @@ <fixed-case>UPF</fixed-case>-Cobalt Submission to <fixed-case>WMT</fixed-case>15 Metrics Task MarinaFomicheva - NúriaBel - Iriada Cunha + NúriaBel + Iriada Cunha AntonMalinovskiy 373–379 W15-3046 @@ -5690,8 +5690,8 @@ Machine Translation Evaluation using Recurrent Neural Networks RohitGupta - ConstantinOrăsan - Josefvan Genabith + ConstantinOrăsan + Josefvan Genabith 380–384 W15-3047 10.18653/v1/W15-3047 @@ -5708,7 +5708,7 @@ chr<fixed-case>F</fixed-case>: character n-gram <fixed-case>F</fixed-case>-score for automatic <fixed-case>MT</fixed-case> evaluation - MajaPopović + MajaPopović 392–395 W15-3049 10.18653/v1/W15-3049 @@ -5717,7 +5717,7 @@ <fixed-case>BEER</fixed-case> 1.1: <fixed-case>ILLC</fixed-case> <fixed-case>U</fixed-case>v<fixed-case>A</fixed-case> submission to metrics and tuning task MilošStanojević - KhalilSima’an + KhalilSima’an 396–401 W15-3050 10.18653/v1/W15-3050 @@ -5788,7 +5788,7 @@ GrahamNeubig KoichiroYoshino SakrianiSakti - TomokiToda + TomokiToda SatoshiNakamura 442–449 W15-3057 @@ -5806,11 +5806,11 @@ How do Humans Evaluate Machine Translation - FranciscoGuzmán + FranciscoGuzmán AhmedAbdelali IrinaTemnikova HassanSajjad - StephanVogel + StephanVogel 457–466 W15-3059 10.18653/v1/W15-3059 @@ -5822,7 +5822,7 @@ Jan-ThorstenPeter StephanPeitz MinweiFeng - HermannNey + HermannNey 467–476 W15-3060 10.18653/v1/W15-3060 @@ -5833,7 +5833,7 @@ Proceedings of the Eighth SIGHAN Workshop on Chinese Language Processing W15-31 - Liang-ChihYu + Liang-ChihYu ZhifangSui YueZhang VincentNg @@ -5852,7 +5852,7 @@ Sequential Annotation and Chunking of <fixed-case>C</fixed-case>hinese Discourse Structure FrancesYung KevinDuh - YujiMatsumoto + YujiMatsumoto 1–6 W15-3101 10.18653/v1/W15-3101 @@ -5888,7 +5888,7 @@ WeiLai WeipingYe XinruZhao - MarkLiberman + MarkLiberman 21–25 W15-3104 10.18653/v1/W15-3104 @@ -5896,7 +5896,7 @@ <fixed-case>ACB</fixed-case>i<fixed-case>MA</fixed-case>: Advanced <fixed-case>C</fixed-case>hinese Bi-Character Word Morphological Analyzer - Ting-HaoHuang + Ting-HaoHuang Yun-NungChen LingpengKong 26–31 @@ -5907,9 +5907,9 @@ Introduction to <fixed-case>SIGHAN</fixed-case> 2015 Bake-off for <fixed-case>C</fixed-case>hinese Spelling Check - Yuen-HsienTseng + Yuen-HsienTseng Lung-HaoLee - Li-PingChang + Li-PingChang Hsin-HsiChen 32–37 W15-3106 @@ -5923,7 +5923,7 @@ JinhuaXiong JianpengHou QiaoZhang - XueqiCheng + XueqiCheng 38–45 W15-3107 10.18653/v1/W15-3107 @@ -5931,7 +5931,7 @@ Word Vector/Conditional Random Field-based <fixed-case>C</fixed-case>hinese Spelling Error Detection for <fixed-case>SIGHAN</fixed-case>-2015 Evaluation - Yih-RuWang + Yih-RuWang Yuan-FuLiao 46–49 W15-3108 @@ -6091,7 +6091,7 @@ Rule-Based <fixed-case>W</fixed-case>eibo Messages Sentiment Polarity Classification towards Given Topics HongzhaoZhou - YonglinTeng + YonglinTeng MinHou WeiHe HongtaoZhu @@ -6107,7 +6107,7 @@ ChunLiao ChongFeng SenYang - HeyanHuang + HeyanHuang 158–163 W15-3124 10.18653/v1/W15-3124 @@ -6167,7 +6167,7 @@ Proceedings of the Second Workshop on Arabic Natural Language Processing W15-32 NizarHabash - StephanVogel + StephanVogel KareemDarwish 10.18653/v1/W15-32 Association for Computational Linguistics @@ -6195,8 +6195,8 @@ HazemHajj GilbertBadaro RamyBaly - WassimEl Hajj - KhaledBashir Shaban + WassimEl Hajj + KhaledBashir Shaban 9–17 W15-3202 10.18653/v1/W15-3202 @@ -6211,8 +6211,8 @@ LindaFayad JeffreyKhairallah HazemHajj - KhaledShaban - WassimEl-Hajj + KhaledShaban + WassimEl-Hajj 18–25 W15-3203 10.18653/v1/W15-3203 @@ -6243,7 +6243,7 @@ <fixed-case>DIWAN</fixed-case>: A Dialectal Word Annotation Tool for <fixed-case>A</fixed-case>rabic FaisalAl-Shargi - OwenRambow + OwenRambow 49–58 W15-3206 10.18653/v1/W15-3206 @@ -6254,7 +6254,7 @@ AhmedHamdi AlexisNasr NizarHabash - NúriaGala + NúriaGala 59–68 W15-3207 10.18653/v1/W15-3207 @@ -6262,7 +6262,7 @@ A Conventional Orthography for <fixed-case>A</fixed-case>lgerian <fixed-case>A</fixed-case>rabic - HoudaSaadane + HoudaSaadane NizarHabash 69–79 W15-3208 @@ -6273,7 +6273,7 @@ A Pilot Study on <fixed-case>A</fixed-case>rabic Multi-Genre Corpus Diacritization HoudaBouamor WajdiZaghouani - MonaDiab + MonaDiab OssamaObeid KemalOflazer MahmoudGhoneim @@ -6286,7 +6286,7 @@ Annotating Targets of Opinions in <fixed-case>A</fixed-case>rabic using Crowdsourcing NouraFarra - KathyMcKeown + KathyMcKeown NizarHabash 89–98 W15-3210 @@ -6306,7 +6306,7 @@ Joint <fixed-case>A</fixed-case>rabic Segmentation and Part-Of-Speech Tagging ShabibAlGahtani - JohnMcNaught + JohnMcNaught 108–117 W15-3212 10.18653/v1/W15-3212 @@ -6345,7 +6345,7 @@ <fixed-case>GWU</fixed-case>-<fixed-case>HASP</fixed-case>-2015@<fixed-case>QALB</fixed-case>-2015 Shared Task: Priming Spelling Candidates with Probability MohammedAttia MohamedAl-Badrashiny - MonaDiab + MonaDiab 138–143 W15-3216 10.18653/v1/W15-3216 @@ -6384,11 +6384,11 @@ <fixed-case>TECHLIMED</fixed-case>@<fixed-case>QALB</fixed-case>-Shared Task 2015: a hybrid <fixed-case>A</fixed-case>rabic Error Correction System - DjamelMostefa + DjamelMostefa JaberAbualasal OmarAsbayou MahmoudGzawi - RamziAbbes + RamziAbbes 161–165 W15-3220 10.18653/v1/W15-3220 @@ -6406,7 +6406,7 @@ Robust Part-of-speech Tagging of <fixed-case>A</fixed-case>rabic Text HananAldarmaki - MonaDiab + MonaDiab 173–182 W15-3222 10.18653/v1/W15-3222 @@ -6437,9 +6437,9 @@ Proceedings of the Grammar Engineering Across Frameworks (GEAF) 2015 Workshop W15-33 - Emily M.Bender - LoriLevin - StefanMüller + Emily M.Bender + LoriLevin + StefanMüller YannickParmentier AarneRanta 10.18653/v1/W15-33 @@ -6512,7 +6512,7 @@ Formalising the <fixed-case>S</fixed-case>wedish Constructicon in Grammatical Framework - NormundsGruzitis + NormundsGruzitis DanaDannélls BenjaminLyngfelt AarneRanta @@ -6543,7 +6543,7 @@ Proceedings of the Eighth Workshop on Building and Using Comparable Corpora W15-34 - PierreZweigenbaum + PierreZweigenbaum SergeSharoff ReinhardRapp 10.18653/v1/W15-34 @@ -6559,7 +6559,7 @@ Augmented Comparative Corpora and Monitoring Corpus in <fixed-case>C</fixed-case>hinese: <fixed-case>LIVAC</fixed-case> and Sketch Search Engine Compared - Benjamin K.Tsou + Benjamin K.Tsou 1–2 W15-3401 10.18653/v1/W15-3401 @@ -6570,7 +6570,7 @@ AlbertoBarrón-Cedeño CristinaEspaña-Bonet JosuBoldoba - LluísMàrquez + LluísMàrquez 3–13 W15-3402 10.18653/v1/W15-3402 @@ -6589,7 +6589,7 @@ Projective methods for mining missing translations in <fixed-case>DB</fixed-case>pedia LaurentJakubina - PhillippeLanglais + PhillippeLanglais 23–31 W15-3404 10.18653/v1/W15-3404 @@ -6598,7 +6598,7 @@ Attempting to Bypass Alignment from Comparable Corpora via Pivot Language AlexisLinard - BéatriceDaille + BéatriceDaille EmmanuelMorin 32–37 W15-3405 @@ -6608,7 +6608,7 @@ Application of a Corpus to Identify Gaps between <fixed-case>E</fixed-case>nglish Learners and Native Speakers KatsunoriKotani - TakehikoYoshimi + TakehikoYoshimi 38–42 W15-3406 10.18653/v1/W15-3406 @@ -6684,7 +6684,7 @@ EmmanuelMorin AmirHazem FlorianBoudin - ElizavetaLoginova-Clouet + ElizavetaLoginova-Clouet 88–91 W15-3413 10.18653/v1/W15-3413 @@ -6696,7 +6696,7 @@ Proceedings of the 1st Workshop on Semantics-Driven Statistical Machine Translation (S2MT 2015) W15-35 - DeyiXiong + DeyiXiong KevinDuh ChristianHardmeier RobertoNavigli @@ -6732,7 +6732,7 @@ Integrating Case Frame into <fixed-case>J</fixed-case>apanese to <fixed-case>C</fixed-case>hinese Hierarchical Phrase-based Translation Model - JinanXu + JinanXu JiangmingLiu YufengChen YujieZhang @@ -6760,7 +6760,7 @@ Proceedings of the ACL 2015 Workshop on Novel Computational Approaches to Keyphrase Extraction W15-36 - Sujatha DasGollapalli + Sujatha DasGollapalli CorneliaCaragea XiaoliLi C. LeeGiles @@ -6787,7 +6787,7 @@ Technical Term Extraction Using Measures of Neology ChristopherNorman - AkikoAizawa + AkikoAizawa 2–9 W15-3602 10.18653/v1/W15-3602 @@ -6806,7 +6806,7 @@ The Web as an Implicit Training Set: Application to Noun Compounds Syntax and Semantics - PreslavNakov + PreslavNakov 18 W15-3604 10.18653/v1/W15-3604 @@ -6847,9 +6847,9 @@ Proceedings of the 9th SIGHUM Workshop on Language Technology for Cultural Heritage, Social Sciences, and Humanities (LaTeCH) W15-37 - KalliopiZervanou + KalliopiZervanou Mariekevan Erp - BeatriceAlex + BeatriceAlex 10.18653/v1/W15-37 Association for Computational Linguistics
Beijing, China
@@ -6874,7 +6874,7 @@ Five Centuries of Monarchy in <fixed-case>K</fixed-case>orea: Mining the Text of the Annals of the <fixed-case>J</fixed-case>oseon Dynasty JinYeongBak - AliceOh + AliceOh 10–14 W15-3702 10.18653/v1/W15-3702 @@ -6893,7 +6893,7 @@ Measuring the Structural and Conceptual Similarity of Folktales using Plot Graphs Victoria AnugrahLestari - RuliManurung + RuliManurung 25–33 W15-3704 10.18653/v1/W15-3704 @@ -6910,7 +6910,7 @@ Ranking Relevant Verb Phrases Extracted from Historical Text EvaPettersson - BeátaMegyesi + BeátaMegyesi JoakimNivre 39–47 W15-3706 @@ -6920,7 +6920,7 @@ Ranking election issues through the lens of social media StephenWan - CécileParis + CécileParis 48–52 W15-3707 10.18653/v1/W15-3707 @@ -6939,7 +6939,7 @@ Enriching Interlinear Text using Automatically Constructed Annotators RyanGeorgi FeiXia - WilliamLewis + WilliamLewis 58–67 W15-3709 10.18653/v1/W15-3709 @@ -6947,7 +6947,7 @@ Automatic interlinear glossing as two-level sequence classification - TanjaSamardžić + TanjaSamardžić RobertSchikowski SabineStoll 68–72 @@ -6957,7 +6957,7 @@ Enriching Digitized Medieval Manuscripts: Linking Image, Text and Lexical Knowledge - AitorArronte Álvarez + AitorArronte Álvarez 73–77 W15-3711 10.18653/v1/W15-3711 @@ -6981,7 +6981,7 @@ AndreaBellandi DavideAlbanesi GiuliaBenotto - EmilianoGiovannetti + EmilianoGiovannetti GianfrancoDi Segni 84–88 W15-3713 @@ -7025,10 +7025,10 @@ Proceedings of BioNLP 15 W15-38 - Kevin BretonnelCohen + Kevin BretonnelCohen DinaDemner-Fushman SophiaAnaniadou - Jun-ichiTsujii + Jun-ichiTsujii 10.18653/v1/W15-38 Association for Computational Linguistics
Beijing, China
@@ -7042,8 +7042,8 @@ Complex Event Extraction using <fixed-case>DRUM</fixed-case> - JamesAllen - Willde Beaumont + JamesAllen + Willde Beaumont LucianGalescu Choh ManTeng 1–11 @@ -7062,10 +7062,10 @@ An extended dependency graph for relation extraction in biomedical texts - YifanPeng + YifanPeng SamirGupta CathyWu - VijayShanker + VijayShanker 21–30 W15-3803 10.18653/v1/W15-3803 @@ -7084,7 +7084,7 @@ Extracting Biological Pathway Models From <fixed-case>NLP</fixed-case> Event Representations MichaelSpranger SucheendraPalaniappan - SamikGhosh + SamikGhosh 42–51 W15-3805 10.18653/v1/W15-3805 @@ -7093,7 +7093,7 @@ Shallow Training is cheap but is it good enough? Experiments with Medical Fact Coding RameshNallapati - RaduFlorian + RaduFlorian 52–60 W15-3806 10.18653/v1/W15-3806 @@ -7102,7 +7102,7 @@ Stacked Generalization for Medical Concept Extraction from Clinical Notes YoungjunKim - EllenRiloff + EllenRiloff 61–70 W15-3807 10.18653/v1/W15-3807 @@ -7121,7 +7121,7 @@ Extracting Time Expressions from Clinical Text - TimothyMiller + TimothyMiller StevenBethard DmitriyDligach ChenLin @@ -7159,7 +7159,7 @@ Jin-WooChung Hee-JinLee MariaWolters - JongPark + JongPark 104–113 W15-3812 10.18653/v1/W15-3812 @@ -7181,7 +7181,7 @@ RunqingSong MariaLiakata AndreasVlachos - StephanieSeneff + StephanieSeneff XiangrongZhang 121–126 W15-3814 @@ -7247,8 +7247,8 @@ Investigating Public Health Surveillance using <fixed-case>T</fixed-case>witter - AntonioJimeno Yepes - AndrewMacKinlay + AntonioJimeno Yepes + AndrewMacKinlay BoHan 164–170 W15-3821 @@ -7272,7 +7272,7 @@ DingchengLi YueYu HongfangLiu - Christopher G.Chute + Christopher G.Chute GuoqianJiang 177–182 W15-3823 @@ -7286,10 +7286,10 @@ Proceedings of the Fifth Named Entity Workshop W15-39 XiangyuDuan - Rafael E.Banchs + Rafael E.Banchs MinZhang HaizhouLi - AKumaran + AKumaran 10.18653/v1/W15-39 Association for Computational Linguistics
Beijing, China
@@ -7334,7 +7334,7 @@
Boosting Named Entity Recognition with Neural Character Embeddings - Cícerodos Santos + Cícerodos Santos VictorGuimarães 25–33 W15-3904 @@ -7343,7 +7343,7 @@ Regularity and Flexibility in <fixed-case>E</fixed-case>nglish-<fixed-case>C</fixed-case>hinese Name Transliteration - Oi YeeKwong + Oi YeeKwong 34–42 W15-3905 10.18653/v1/W15-3905 @@ -7361,9 +7361,9 @@ Semi-supervised Learning for <fixed-case>V</fixed-case>ietnamese Named Entity Recognition using Online Conditional Random Fields Quang HongPham - Minh-LeNguyen + Minh-LeNguyen Binh ThanhNguyen - Nguyen VietCuong + Nguyen VietCuong 50–55 W15-3907 10.18653/v1/W15-3907 @@ -7372,7 +7372,7 @@ Boosting <fixed-case>E</fixed-case>nglish-<fixed-case>C</fixed-case>hinese Machine Transliteration via High Quality Alignment and Multilingual Resources YanShao - JörgTiedemann + JörgTiedemann JoakimNivre 56–60 W15-3908 @@ -7384,7 +7384,7 @@ AndrewFinch LemaoLiu XiaolinWang - EiichiroSumita + EiichiroSumita 61–66 W15-3909 10.18653/v1/W15-3909 @@ -7394,7 +7394,7 @@ A Hybrid Transliteration Model for <fixed-case>C</fixed-case>hinese/<fixed-case>E</fixed-case>nglish Named Entities —<fixed-case>BJTU</fixed-case>-<fixed-case>NLP</fixed-case> Report for the 5th Named Entities Workshop DandanWang XiaohuiYang - JinanXu + JinanXu YufengChen NanWang BojiaLiu @@ -7422,7 +7422,7 @@ Data representation methods and use of mined corpora for <fixed-case>I</fixed-case>ndian language transliteration AnoopKunchukuttan - PushpakBhattacharyya + PushpakBhattacharyya 78–82 W15-3912 10.18653/v1/W15-3912 @@ -7431,8 +7431,8 @@ <fixed-case>NCU</fixed-case> <fixed-case>IISR</fixed-case> <fixed-case>E</fixed-case>nglish-<fixed-case>K</fixed-case>orean and <fixed-case>E</fixed-case>nglish-<fixed-case>C</fixed-case>hinese Named Entity Transliteration Using Different Grapheme Segmentation Approaches Yu-ChunWang - Chun-KaiWu - Richard Tzong-HanTsai + Chun-KaiWu + Richard Tzong-HanTsai 83–87 W15-3913 10.18653/v1/W15-3913 @@ -7444,10 +7444,10 @@ Proceedings of the 3rd Workshop on Continuous Vector Space Models and their Compositionality W15-40 AlexandreAllauzen - EdwardGrefenstette + EdwardGrefenstette Karl MoritzHermann HugoLarochelle - Scott Wen-tauYih + Scott Wen-tauYih 10.18653/v1/W15-40 Association for Computational Linguistics
Beijing, China
@@ -7470,9 +7470,9 @@
Recursive Neural Networks Can Learn Logical Semantics - Samuel R.Bowman + Samuel R.Bowman ChristopherPotts - Christopher D.Manning + Christopher D.Manning 12–21 W15-4002 10.18653/v1/W15-4002 @@ -7481,7 +7481,7 @@ Concept Extensions as the Basis for Vector-Space Semantics: Combining Distributional and Ontological Information about Entities - Jackie Chi KitCheung + Jackie Chi KitCheung 22–31 W15-4003 10.18653/v1/W15-4003 @@ -7501,7 +7501,7 @@ Exploring the effect of semantic similarity for Phrase-based Machine Translation KunalSachdeva - DiptiSharma + DiptiSharma 41–47 W15-4005 10.18653/v1/W15-4005 @@ -7536,8 +7536,8 @@ KurtEberle PatrikLambert ReinhardRapp - Rafael E.Banchs - Marta R.Costa-jussà + Rafael E.Banchs + Marta R.Costa-jussà 10.18653/v1/W15-41 Association for Computational Linguistics
Beijing
@@ -7551,10 +7551,10 @@ Bootstrapping a hybrid deep <fixed-case>MT</fixed-case> system - JoãoSilva - JoãoRodrigues + JoãoSilva + JoãoRodrigues LuísGomes - AntónioBranco + AntónioBranco 1–5 W15-4101 10.18653/v1/W15-4101 @@ -7572,7 +7572,7 @@ What a Transfer-Based System Brings to the Combination with <fixed-case>PBMT</fixed-case> AlešTamchyna - OndřejBojar + OndřejBojar 11–20 W15-4103 10.18653/v1/W15-4103 @@ -7590,7 +7590,7 @@ Passive and Pervasive Use of Bilingual Dictionary in Statistical Machine Translation LilingTan - Josefvan Genabith + Josefvan Genabith FrancisBond 30–34 W15-4105 @@ -7607,7 +7607,7 @@ A fuzzier approach to machine translation evaluation: A pilot study on post-editing productivity and automated metrics in commercial settings - CarlaParra Escartín + CarlaParra Escartín ManuelArcedillo 40–45 W15-4107 @@ -7660,10 +7660,10 @@ Proceedings of the 4th Workshop on Linked Data in Linguistics: Resources and Applications W15-42 ChristianChiarcos - John PhilipMcCrae + John PhilipMcCrae PetyaOsenova - PhilippCimiano - NancyIde + PhilippCimiano + NancyIde 10.18653/v1/W15-42 Association for Computational Linguistics
Beijing, China
@@ -7687,8 +7687,8 @@
A Linked Data Model for Multimodal Sentiment and Emotion Analysis - J. FernandoSánchez-Rada - Carlos A.Iglesias + J. FernandoSánchez-Rada + Carlos A.Iglesias RonaldGil 11–19 W15-4202 @@ -7699,8 +7699,8 @@ Seeing is Correcting: curating lexical resources using social interfaces LivyReal FabricioChalub - Valeriade Paiva - ClaudiaFreitas + Valeriade Paiva + ClaudiaFreitas AlexandreRademaker 20–29 W15-4203 @@ -7723,7 +7723,7 @@ Reconciling Heterogeneous Descriptions of Language Resources John PhilipMcCrae PhilippCimiano - VictorRodríguez Doncel + VictorRodríguez Doncel DanielVila-Suero JorgeGracia LucaMatteis @@ -7738,8 +7738,8 @@ <fixed-case>RDF</fixed-case> Representation of Licenses for Language Resources - VictorRodriguez-Doncel - PennyLabropoulou + VictorRodriguez-Doncel + PennyLabropoulou 49–58 W15-4206 10.18653/v1/W15-4206 @@ -7768,8 +7768,8 @@ Linguistic Linked Data in <fixed-case>C</fixed-case>hinese: The Case of <fixed-case>C</fixed-case>hinese <fixed-case>W</fixed-case>ordnet - Chih-YaoLee - Shu-KaiHsieh + Chih-YaoLee + Shu-KaiHsieh 70–74 W15-4209 10.18653/v1/W15-4209 @@ -7808,7 +7808,7 @@ Challenges of studying and processing dialects in social media AnnaJørgensen DirkHovy - AndersSøgaard + AndersSøgaard 9–18 W15-4302 10.18653/v1/W15-4302 @@ -7820,7 +7820,7 @@ NorismaIdris LiyanaShuib RamGopal Raj - AiTiAw + AiTiAw 19–27 W15-4303 10.18653/v1/W15-4303 @@ -7838,8 +7838,8 @@ A Normalizer for <fixed-case>UGC</fixed-case> in <fixed-case>B</fixed-case>razilian <fixed-case>P</fixed-case>ortuguese - MagaliSanches Duran - Maria das GraçasVolpe Nunes + MagaliSanches Duran + Maria das GraçasVolpe Nunes LucasAvanço 38–47 W15-4305 @@ -7848,9 +7848,9 @@ <fixed-case>USFD</fixed-case>: <fixed-case>T</fixed-case>witter <fixed-case>NER</fixed-case> with Drift Compensation and Linked Data - LeonDerczynski + LeonDerczynski IsabelleAugenstein - KalinaBontcheva + KalinaBontcheva 48–53 W15-4306 10.18653/v1/W15-4306 @@ -7870,7 +7870,7 @@ <fixed-case>IITP</fixed-case>: Multiobjective Differential Evolution based <fixed-case>T</fixed-case>witter Named Entity Recognition Md ShadAkhtar - Utpal KumarSikdar + Utpal KumarSikdar AsifEkbal 61–67 W15-4308 @@ -7889,7 +7889,7 @@ <fixed-case>H</fixed-case>allym: Named Entity Recognition on <fixed-case>T</fixed-case>witter with Word Representation - Eun-SukYang + Eun-SukYang Yu-SeopKim 72–77 W15-4310 @@ -7907,7 +7907,7 @@ <fixed-case>B</fixed-case>ekli:A Simple Approach to <fixed-case>T</fixed-case>witter Text Normalization. - RussellBeckley + RussellBeckley 82–86 W15-4312 10.18653/v1/W15-4312 @@ -7933,7 +7933,7 @@ <fixed-case>LYSGROUP</fixed-case>: Adapting a <fixed-case>S</fixed-case>panish microtext normalization system to <fixed-case>E</fixed-case>nglish. - YeraiDoval Mosquera + YeraiDoval Mosquera JesúsVilares CarlosGómez-Rodríguez 99–105 @@ -7944,7 +7944,7 @@ <fixed-case>IITP</fixed-case>: Hybrid Approach for Text Normalization in <fixed-case>T</fixed-case>witter Md ShadAkhtar - Utpal KumarSikdar + Utpal KumarSikdar AsifEkbal 106–110 W15-4316 @@ -7971,8 +7971,8 @@ Shared Tasks of the 2015 Workshop on Noisy User-generated Text: <fixed-case>T</fixed-case>witter Lexical Normalization and Named Entity Recognition - TimothyBaldwin - Marie Catherinede Marneffe + TimothyBaldwin + Marie Catherinede Marneffe BoHan Young-BumKim AlanRitter @@ -8016,7 +8016,7 @@ <fixed-case>NCSU</fixed-case>_<fixed-case>SAS</fixed-case>_<fixed-case>SAM</fixed-case>: Deep Encoding and Reconstruction for Normalization of Noisy Text SamuelLeeman-Munk - JamesLester + JamesLester JamesCox 154–161 W15-4323 @@ -8026,7 +8026,7 @@ Learning finite state word representations for unsupervised <fixed-case>T</fixed-case>witter adaptation of <fixed-case>POS</fixed-case> taggers JulieWulff - AndersSøgaard + AndersSøgaard 162–166 W15-4324 wulff-sogaard-2015-learning @@ -8045,8 +8045,8 @@ Proceedings of the 2nd Workshop on Natural Language Processing Techniques for Educational Applications W15-44 Hsin-HsiChen - Yuen-HsienTseng - YujiMatsumoto + Yuen-HsienTseng + YujiMatsumoto Lung HsiangWong 10.18653/v1/W15-44 Association for Computational Linguistics @@ -8062,8 +8062,8 @@ Overview of the <fixed-case>NLP</fixed-case>-<fixed-case>TEA</fixed-case> 2015 Shared Task for <fixed-case>C</fixed-case>hinese Grammatical Error Diagnosis Lung-HaoLee - Liang-ChihYu - Li-PingChang + Liang-ChihYu + Li-PingChang 1–6 W15-4401 10.18653/v1/W15-4401 @@ -8075,7 +8075,7 @@ Po-LinChen Shih-HungWu Liang-PuChen - Ping-CheYang + Ping-CheYang Ren-DarYang 7–14 W15-4402 @@ -8104,7 +8104,7 @@ Semi-automatic Generation of Multiple-Choice Tests from Mentions of Semantic Relations RenlongAi SebastianKrause - WalterKasper + WalterKasper FeiyuXu HansUszkoreit 26–33 @@ -8117,7 +8117,7 @@ TaoChen NaijiaZheng YueZhao - Muthu KumarChandrasekaran + Muthu KumarChandrasekaran Min-YenKan 34–42 W15-4406 @@ -8127,9 +8127,9 @@ Bilingual Keyword Extraction and its Educational Application - Chung-ChiHuang - Mei-HuaChen - Ping-CheYang + Chung-ChiHuang + Mei-HuaChen + Ping-CheYang 43–48 W15-4407 10.18653/v1/W15-4407 @@ -8202,7 +8202,7 @@ Using Finite State Transducers for Helping Foreign Language Learning HasanKaya - GülşenEryiğit + GülşenEryiğit 94–98 W15-4414 10.18653/v1/W15-4414 @@ -8224,7 +8224,7 @@ Jui-FengYeh Chan-KunYeh Kai-HsiangYu - Ya-TingLi + Ya-TingLi Wan-LingTsai 105–110 W15-4416 @@ -8264,10 +8264,10 @@ TommasoCaselli Mariekevan Erp Anne-LyseMinard - MarkFinlayson + MarkFinlayson BenMiller - JordiAtserias - AlexandraBalahur + JordiAtserias + AlexandraBalahur PiekVossen 10.18653/v1/W15-45 Association for Computational Linguistics @@ -8282,7 +8282,7 @@ Interactions between Narrative Schemas and Document Categories - DanSimonson + DanSimonson AnthonyDavis 1–10 W15-4501 @@ -8294,7 +8294,7 @@ XiangLi Thien HuuNguyen KaiCao - RalphGrishman + RalphGrishman 11–15 W15-4502 10.18653/v1/W15-4502 @@ -8355,7 +8355,7 @@ From <fixed-case>T</fixed-case>ime<fixed-case>L</fixed-case>ines to <fixed-case>S</fixed-case>tory<fixed-case>L</fixed-case>ines: A preliminary proposal for evaluating narratives EgoitzLaparra ItziarAldabe - GermanRigau + GermanRigau 50–55 W15-4508 10.18653/v1/W15-4508 @@ -8379,9 +8379,9 @@ W15-46 AlexanderKoller GabrielSkantze - FilipJurcicek - MasahiroAraki - Carolyn PensteinRose + FilipJurcicek + MasahiroAraki + Carolyn PensteinRose 10.18653/v1/W15-46 Association for Computational Linguistics
Prague, Czech Republic
@@ -8427,7 +8427,7 @@ Miscommunication Recovery in Physically Situated Dialogue MatthewMarge - AlexanderRudnicky + AlexanderRudnicky 22–31 W15-4604 10.18653/v1/W15-4604 @@ -8438,7 +8438,7 @@ TakuyaHiraoka KallirroiGeorgila ElnazNouri - DavidTraum + DavidTraum SatoshiNakamura 32–41 W15-4605 @@ -8448,7 +8448,7 @@ An Incremental Turn-Taking Model with Active System Barge-in for Spoken Dialog Systems TianchengZhao - Alan WBlack + Alan WBlack MaxineEskenazi 42–50 W15-4606 @@ -8457,7 +8457,7 @@ Exploring the Effects of Redundancy within a Tutorial Dialogue System: Restating Students’ Responses - PamelaJordan + PamelaJordan PatriciaAlbacete SandraKatz 51–59 @@ -8477,7 +8477,7 @@ Belief Tracking with Stacked Relational Trees DeepakRamachandran - AdwaitRatnaparkhi + AdwaitRatnaparkhi 68–76 W15-4609 10.18653/v1/W15-4609 @@ -8486,7 +8486,7 @@ “So, which one is it?” The effect of alternative incremental architectures in a high-performance game-playing agent MaikePaetzel - RameshManuvinakurike + RameshManuvinakurike DavidDeVault 77–86 W15-4610 @@ -8510,7 +8510,7 @@ <fixed-case>PDTB</fixed-case> Discourse Parsing as a Tagging Task: The Two Taggers Approach OrBiran - KathleenMcKeown + KathleenMcKeown 96–104 W15-4612 10.18653/v1/W15-4612 @@ -8520,7 +8520,7 @@ Which Synthetic Voice Should <fixed-case>I</fixed-case> Choose for an Evocative Task? EliPincus KallirroiGeorgila - DavidTraum + DavidTraum 105–113 W15-4613 10.18653/v1/W15-4613 @@ -8538,7 +8538,7 @@ Towards Improving Dialogue Topic Tracking Performances with Wikification of Concept Mentions SeokhwanKim - Rafael E.Banchs + Rafael E.Banchs HaizhouLi 124–128 W15-4615 @@ -8550,7 +8550,7 @@ SangdoHan JeesooBang SeonghanRyu - Gary GeunbaeLee + Gary GeunbaeLee 129–133 W15-4616 10.18653/v1/W15-4616 @@ -8559,9 +8559,9 @@ Automated Speech Recognition Technology for Dialogue Interaction with Non-Native Interlocutors - Alexei V.Ivanov + Alexei V.Ivanov VikramRamanarayanan - DavidSuendermann-Oeft + DavidSuendermann-Oeft MelissaLopez KeelanEvanini JidongTao @@ -8573,10 +8573,10 @@ Conversational Knowledge Teaching Agent that uses a Knowledge Base KyusongLee - Paul HongsuckSeo + Paul HongsuckSeo JunhwiChoi SangjunKoo - Gary GeunbaeLee + Gary GeunbaeLee 139–143 W15-4618 10.18653/v1/W15-4618 @@ -8592,7 +8592,7 @@ A <fixed-case>SIP</fixed-case> of <fixed-case>C</fixed-case>o<fixed-case>F</fixed-case>ee : A Sample of Interesting Productions of Conversational Feedback - LaurentPrévot + LaurentPrévot JanGorisch RoxaneBertrand EmilienGorène @@ -8604,7 +8604,7 @@ Reinforcement Learning of Multi-Issue Negotiation Dialogue Policies - AlexandrosPapangelis + AlexandrosPapangelis KallirroiGeorgila 154–158 W15-4621 @@ -8613,12 +8613,12 @@ Fast and easy language understanding for dialog systems with <fixed-case>M</fixed-case>icrosoft Language Understanding Intelligent Service (<fixed-case>LUIS</fixed-case>) - Jason D.Williams + Jason D.Williams EslamKamal MokhtarAshour HaniAmr JessicaMiller - GeoffZweig + GeoffZweig 159–161 W15-4622 10.18653/v1/W15-4622 @@ -8626,8 +8626,8 @@ Multilingual <fixed-case>W</fixed-case>iki<fixed-case>T</fixed-case>alk: <fixed-case>W</fixed-case>ikipedia-based talking robots that switch languages. - GrahamWilcock - KristiinaJokinen + GrahamWilcock + KristiinaJokinen 162–164 W15-4623 10.18653/v1/W15-4623 @@ -8645,7 +8645,7 @@ <fixed-case>I</fixed-case> Couldn’t Agree More: The Role of Conversational Structure in Agreement and Disagreement Detection in Online Discussions SaraRosenthal - KathyMcKeown + KathyMcKeown 168–177 W15-4625 10.18653/v1/W15-4625 @@ -8662,9 +8662,9 @@ Generating Sentence Planning Variations for Story Telling - StephanieLukin + StephanieLukin LenaReed - MarilynWalker + MarilynWalker 188–197 W15-4627 10.18653/v1/W15-4627 @@ -8672,7 +8672,7 @@ <fixed-case>K</fixed-case>eynote: Graph-based Approaches for Spoken Language Understanding - DilekHakkani-Tur + DilekHakkani-Tur 198 W15-4628 10.18653/v1/W15-4628 @@ -8680,7 +8680,7 @@ Evaluating Spoken Dialogue Processing for Time-Offset Interaction - DavidTraum + DavidTraum KallirroiGeorgila RonArtstein AntonLeuski @@ -8692,9 +8692,9 @@ The Real Challenge 2014: Progress and Prospects MaxineEskenazi - Alan WBlack - SungjinLee - DavidTraum + Alan WBlack + SungjinLee + DavidTraum 209–216 W15-4630 10.18653/v1/W15-4630 @@ -8704,7 +8704,7 @@ Argument Mining: Extracting Arguments from Online Dialogue ReidSwanson BrianEcker - MarilynWalker + MarilynWalker 217–226 W15-4631 10.18653/v1/W15-4631 @@ -8721,10 +8721,10 @@ Call Centre Conversation Summarization: A Pilot Task at Multiling 2015 - BenoitFavre - EvgenyStepanov - JérémyTrione - FrédéricBéchet + BenoitFavre + EvgenyStepanov + JérémyTrione + FrédéricBéchet GiuseppeRiccardi 232–236 W15-4633 @@ -8745,10 +8745,10 @@ Comment-to-Article Linking in the Online News Domain AhmetAker - EminaKurtic + EminaKurtic MarkHepple - RobGaizauskas - GiuseppeDi Fabbrizio + RobGaizauskas + GiuseppeDi Fabbrizio 245–249 W15-4635 10.18653/v1/W15-4635 @@ -8780,12 +8780,12 @@ <fixed-case>M</fixed-case>ulti<fixed-case>L</fixed-case>ing 2015: Multilingual Summarization of Single and Multi-Documents, On-line Fora, and Call-center Conversations GeorgeGiannakopoulos JeffKubina - JohnConroy + JohnConroy JosefSteinberger - BenoitFavre - MijailKabadjov - UdoKruschwitz - MassimoPoesio + BenoitFavre + MijailKabadjov + UdoKruschwitz + MassimoPoesio 270–274 W15-4638 10.18653/v1/W15-4638 @@ -8794,12 +8794,12 @@ Stochastic Language Generation in Dialogue using Recurrent Neural Networks with Convolutional Sentence Reranking Tsung-HsienWen - MilicaGašić + MilicaGašić DonghoKim NikolaMrkšić Pei-HaoSu DavidVandyke - SteveYoung + SteveYoung 275–284 W15-4639 10.18653/v1/W15-4639 @@ -8809,7 +8809,7 @@ The <fixed-case>U</fixed-case>buntu Dialogue Corpus: A Large Dataset for Research in Unstructured Multi-Turn Dialogue Systems RyanLowe NissanPow - IulianSerban + IulianSerban JoellePineau 285–294 W15-4640 @@ -8841,7 +8841,7 @@ Optimising Turn-Taking Strategies With Reinforcement Learning HatimKhouzaimi RomainLaroche - FabriceLefèvre + FabriceLefèvre 315–324 W15-4643 10.18653/v1/W15-4643 @@ -8850,9 +8850,9 @@ Acoustic-prosodic entrainment in <fixed-case>S</fixed-case>lovak, <fixed-case>S</fixed-case>panish, <fixed-case>E</fixed-case>nglish and <fixed-case>C</fixed-case>hinese: A cross-linguistic comparison RivkaLevitan - ŠtefanBeňuš - AgustínGravano - JuliaHirschberg + ŠtefanBeňuš + AgustínGravano + JuliaHirschberg 325–334 W15-4644 10.18653/v1/W15-4644 @@ -8861,9 +8861,9 @@ A statistical approach for Non-Sentential Utterance Resolution for Interactive <fixed-case>QA</fixed-case> System DineshRaghu - SathishIndurthi + SathishIndurthi JitendraAjmera - SachindraJoshi + SachindraJoshi 335–343 W15-4645 10.18653/v1/W15-4645 @@ -8927,7 +8927,7 @@ User Adaptive Restoration for Incorrectly-Segmented Utterances in Spoken Dialogue Systems KazunoriKomatani NaokiHotta - SatoshiSato + SatoshiSato MikioNakano 393–401 W15-4651 @@ -8937,7 +8937,7 @@ Incremental Coordination: Attention-Centric Speech Production in a Physically Situated Conversational Agent ZhouYu - DanBohus + DanBohus EricHorvitz 402–406 W15-4652 @@ -8948,7 +8948,7 @@ Hyper-parameter Optimisation of <fixed-case>G</fixed-case>aussian Process Reinforcement Learning for Statistical Dialogue Management LuChen Pei-HaoSu - MilicaGašić + MilicaGašić 407–411 W15-4653 10.18653/v1/W15-4653 @@ -8969,10 +8969,10 @@ Reward Shaping with Recurrent Neural Networks for Speeding up On-Line Policy Learning in Spoken Dialogue Systems Pei-HaoSu DavidVandyke - MilicaGašić + MilicaGašić NikolaMrkšić Tsung-HsienWen - SteveYoung + SteveYoung 417–421 W15-4655 10.18653/v1/W15-4655 @@ -8994,8 +8994,8 @@ MariaSchmidt MarkusMüller MartinWagner - SebastianStüker - AlexWaibel + SebastianStüker + AlexWaibel HansjörgHofmann SteffenWerner 427–431 @@ -9006,8 +9006,8 @@ A distributed cloud-based dialog system for conversational application development VikramRamanarayanan - DavidSuendermann-Oeft - Alexei V.Ivanov + DavidSuendermann-Oeft + Alexei V.Ivanov KeelanEvanini 432–434 W15-4658 @@ -9021,7 +9021,7 @@ RonaldProvine PeterYeh WilliamJarrold - AdwaitRatnaparkhi + AdwaitRatnaparkhi BenjaminDouglas 435–437 W15-4659 @@ -9030,12 +9030,12 @@ Description of the <fixed-case>P</fixed-case>atient<fixed-case>G</fixed-case>enesys Dialogue System - LeonardoCampillos Llanos + LeonardoCampillos Llanos DhouhaBouamor - ÉricBilinski + ÉricBilinski Anne-LaureLigozat - PierreZweigenbaum - SophieRosset + PierreZweigenbaum + SophieRosset 438–440 W15-4660 10.18653/v1/W15-4660 @@ -9046,10 +9046,10 @@ TejaswiKasturi HaojianJin AasishPappu - SungjinLee + SungjinLee BeverleyHarrison RamanaMurthy - AmandaStent + AmandaStent 441–443 W15-4661 10.18653/v1/W15-4661 @@ -9060,7 +9060,7 @@ Proceedings of the 15th European Workshop on Natural Language Generation (ENLG) W15-47 - AnyaBelz + AnyaBelz AlbertGatt FrançoisPortet MatthewPurver @@ -9079,7 +9079,7 @@ A Simple Surface Realization Engine for <fixed-case>T</fixed-case>elugu Sasi Raja SekharDokkara Suresh VermaPenumathsa - Somayajulu GowriSripada + Somayajulu GowriSripada 1–8 W15-4701 10.18653/v1/W15-4701 @@ -9106,8 +9106,8 @@ Inducing Clause-Combining Rules: A Case Study with the <fixed-case>SP</fixed-case>a<fixed-case>RK</fixed-case>y Restaurant Corpus - MichaelWhite - David M.Howcroft + MichaelWhite + David M.Howcroft 28–37 W15-4704 10.18653/v1/W15-4704 @@ -9115,7 +9115,7 @@ Reading Times Predict the Quality of Generated Text Above and Beyond Human Ratings - SinaZarrieß + SinaZarrieß SebastianLoth DavidSchlangen 38–47 @@ -9126,7 +9126,7 @@ Moving Targets: Human References to Unstable Landmarks AdrianaBaltaretu - EmielKrahmer + EmielKrahmer AlfonsMaes 48–51 W15-4706 @@ -9136,8 +9136,8 @@ A Framework for the Generation of Computer System Diagnostics in Natural Language using Finite State Methods RachelFarrell - GordonPace - MichaelRosner + GordonPace + MichaelRosner 52–56 W15-4707 10.18653/v1/W15-4707 @@ -9156,7 +9156,7 @@ <fixed-case>J</fixed-case>apanese Word Reordering Executed Concurrently with Dependency Parsing and Its Evaluation TomohiroOhno KazushiYoshida - YoshihideKato + YoshihideKato ShigekiMatsubara 61–65 W15-4709 @@ -9178,7 +9178,7 @@ LeenSevens VincentVandeghinste InekeSchuurman - FrankVan Eynde + FrankVan Eynde 71–75 W15-4711 10.18653/v1/W15-4711 @@ -9186,7 +9186,7 @@ Translating <fixed-case>I</fixed-case>talian to <fixed-case>LIS</fixed-case> in the Rail Stations - AlessandroMazzei + AlessandroMazzei 76–80 W15-4712 10.18653/v1/W15-4712 @@ -9282,7 +9282,7 @@ Generating Image Descriptions with Gold Standard Visual Inputs: Motivation, Evaluation and Baselines JosiahWang - RobertGaizauskas + RobertGaizauskas 117–126 W15-4722 10.18653/v1/W15-4722 @@ -9385,8 +9385,8 @@ Automated Lossless Hyper-Minimization for Morphological Analyzers SenkaDrobac - MiikkaSilfverberg - KristerLindén + MiikkaSilfverberg + KristerLindén W15-4806 drobac-etal-2015-automated @@ -9434,10 +9434,10 @@ Proceedings of the 18th Annual Conference of the European Association for Machine Translation - İlknur DurgarEl-Kahlout + İlknur DurgarEl-Kahlout MehmedÖzkan FelipeSánchez-Martínez - GemaRamírez-Sánchez + GemaRamírez-Sánchez FredHollowood AndyWay
Antalya, Turkey
@@ -9453,9 +9453,9 @@ Exploiting portability to build an <fixed-case>RBMT</fixed-case> prototype for a new source language NoraAranberri - GorkaLabaka - ArantzaDíaz de Ilarraza - KepaSarasola + GorkaLabaka + ArantzaDíaz de Ilarraza + KepaSarasola W15-4901 3–10 aranberri-etal-2015-exploiting-portability @@ -9463,24 +9463,24 @@ Building hybrid machine translation systems by using an <fixed-case>EBMT</fixed-case> preprocessor to create partial translations MikelArtetxe - GorkaLabaka - KepaSarasola + GorkaLabaka + KepaSarasola W15-4902 11–18 artetxe-etal-2015-building-hybrid Using on-line available sources of bilingual information for word-level machine translation quality estimation - MiquelEsplà-Gomis + MiquelEsplà-Gomis FelipeSánchez-Martínez - Mikel L.Forcada + Mikel L.Forcada W15-4903 19–26 espla-gomis-etal-2015-using-line A general framework for minimizing translation effort: towards a principled combination of translation technologies in computer-aided translation - Mikel L.Forcada + Mikel L.Forcada FelipeSánchez-Martínez W15-4904 27–34 @@ -9489,10 +9489,10 @@ Can Translation Memories afford not to use paraphrasing? RohitGupta - ConstantinOrăsan + ConstantinOrăsan MarcosZampieri MihaelaVela - Josefvan Genabith + Josefvan Genabith W15-4905 35–42 gupta-etal-2015-translation-memories @@ -9518,9 +9518,9 @@ Document-Level Machine Translation with Word Vector Models - Eva MartínezGarcia + Eva MartínezGarcia CristinaEspaña-Bonet - LluísMàrquez + LluísMàrquez W15-4908 59–66 garcia-etal-2015-document-level @@ -9551,23 +9551,23 @@ Dynamic Terminology Integration Methods in Statistical Machine Translation - MārcisPinnis + MārcisPinnis W15-4912 89–96 pinnis-2015-dynamic-terminology Identifying main obstacles for statistical machine translation of morphologically rich <fixed-case>S</fixed-case>outh <fixed-case>S</fixed-case>lavic languages - MajaPopović - MihaelArčan + MajaPopović + MihaelArčan W15-4913 97–104 popovic-arcan-2015-identifying-main Poor man’s lemmatisation for automatic error classification - MajaPopović - MihaelArčan + MajaPopović + MihaelArčan EleftheriosAvramidis AljoschaBurchardt ArleLommel @@ -9585,10 +9585,10 @@ Searching for Context: a Study on Document-Level Labels for Translation Quality Estimation - CarolinaScarton + CarolinaScarton MarcosZampieri MihaelaVela - Josefvan Genabith + Josefvan Genabith LuciaSpecia W15-4916 121–128 @@ -9598,7 +9598,7 @@ Stripping Adjectives: Integration Techniques for Selective Stemming in <fixed-case>SMT</fixed-case> Systems IsabelSlawik JanNiehues - AlexWaibel + AlexWaibel W15-4917 129–136 slawik-etal-2015-stripping-adjectives @@ -9606,8 +9606,8 @@ Evaluating machine translation for assimilation via a gap-filling task EkaterinaAgeeva - Mikel L.Forcada - Francis M.Tyers + Mikel L.Forcada + Francis M.Tyers Juan AntonioPérez-Ortiz W15-4918 137–144 @@ -9615,9 +9615,9 @@ Unsupervised training of maximum-entropy models for lexical selection in rule-based machine translation - Francis M.Tyers + Francis M.Tyers FelipeSánchez-Martínez - Mikel L.Forcada + Mikel L.Forcada W15-4919 145–152 tyers-etal-2015-unsupervised-training @@ -9633,14 +9633,14 @@ Re-assessing the <fixed-case>WMT</fixed-case>2013 Human Evaluation with Professional Translators Trainees MihaelaVela - Josefvan Genabith + Josefvan Genabith W15-4921 161–168 vela-van-genabith-2015-assessing-wmt2013 Integrating a Large, Monolingual Corpus as Translation Memory into Statistical Machine Translation - KatharinaWäschle + KatharinaWäschle StefanRiezler W15-4922 169–176 @@ -9649,8 +9649,8 @@ Target-Side Generation of Prepositions for <fixed-case>SMT</fixed-case> MarionWeller - AlexanderFraser - SabineSchulte im Walde + AlexanderFraser + SabineSchulte im Walde W15-4923 177–184 weller-etal-2015-target @@ -9675,7 +9675,7 @@ Pre-reordering for Statistical Machine Translation of Non-fictional Subtitles - MagdalenaPlamadă + MagdalenaPlamadă GionLinder PhillipStröbel MartinVolk @@ -9702,7 +9702,7 @@ <fixed-case>M</fixed-case>ixed<fixed-case>E</fixed-case>motions: Social Semantic Emotion Analysis for Innovative Multilingual Big Data Analytics Markets - MihaelArcan + MihaelArcan PaulBuitelaar W15-4929 211 @@ -9710,7 +9710,7 @@ The <fixed-case>ACCEPT</fixed-case> Academic Portal: Bringing Together Pre-editing, <fixed-case>MT</fixed-case> and Post-editing into a Learning Environment - PierretteBouillon + PierretteBouillon JohannaGerlach AsheeshGulati VictoriaPorro @@ -9745,7 +9745,7 @@ <fixed-case>H</fixed-case>andy<fixed-case>CAT</fixed-case> - An Open-Source Platform for <fixed-case>CAT</fixed-case> Tool Research - ChristopherHokamp + ChristopherHokamp QunLiu W15-4934 216 @@ -9758,15 +9758,15 @@ MarkusEgg AndyWay LexiBirch - KatiaKermanidis + KatiaKermanidis VilelminiSosoni DimitriosTsoumakos - Antalvan den Bosch + Antalvan den Bosch IrisHendrickx MichaelPapadopoulos PanayotaGeorgakopoulou MariaGialama - Mennovan Zaanen + Mennovan Zaanen IoanaBuliga MitjaJermol DavorOrlic @@ -9791,7 +9791,7 @@ <fixed-case>FALCON</fixed-case>: Federated Active Linguistic data <fixed-case>C</fixed-case>urati<fixed-case>ON</fixed-case> - DavidLewis + DavidLewis W15-4938 220 lewis-2015-falcon-federated @@ -9807,7 +9807,7 @@ <fixed-case>O</fixed-case>kapi+<fixed-case>Q</fixed-case>u<fixed-case>E</fixed-case>st: Translation Quality Estimation within Okapi - Gustavo HenriquePaetzold + Gustavo HenriquePaetzold LuciaSpecia YvesSavourel W15-4940 @@ -9839,13 +9839,13 @@ <fixed-case>A</fixed-case>bu-<fixed-case>M</fixed-case>a<fixed-case>T</fixed-case>ran: Automatic building of Machine Translation AntonioToral - Tommi A.Pirinen + Tommi A.Pirinen AndyWay GemaRamírez-Sánchez - Sergio OrtizRojas - RaphaelRubino - MiquelEsplà - Mikel L.Forcada + Sergio OrtizRojas + RaphaelRubino + MiquelEsplà + Mikel L.Forcada VassilisPapavassiliou ProkopisProkopidis NikolaLjubešić @@ -9858,7 +9858,7 @@ MasaoUtiyama KyoKageura MartinThomas - AnthonyHartley + AnthonyHartley W15-4945 228 utiyama-etal-2015-mnh-tt @@ -9867,7 +9867,7 @@ Smart Computer Aided Translation Environment - <fixed-case>SCATE</fixed-case> VincentVandeghinste TomVanallemeersch - FrankVan Eynde + FrankVan Eynde GeertHeyman SienMoens JorisPelemans @@ -9875,7 +9875,7 @@ IuliannaVan der Lek - Ciudin ArdaTezcan LieveMacken - VéroniqueHoste + VéroniqueHoste EvaGeurts MiekeHaesen W15-4946 @@ -9891,7 +9891,7 @@ IsaoGoto GrahamNeubig SadaoKurohashi - EiichiroSumita + EiichiroSumita Workshop on Asian Translation
Kyoto, Japan
October @@ -9961,7 +9961,7 @@ JohnRichardson RajDabre ChenhuiChu - FabienCromières + FabienCromières ToshiakiNakazawa SadaoKurohashi W15-5006 @@ -9982,7 +9982,7 @@ <fixed-case>NAVER</fixed-case> Machine Translation System for <fixed-case>WAT</fixed-case> 2015 Hyoung-GyuLee - JaeSongLee + JaeSongLee Jun-SeokKim Chang-KiLee W15-5008 @@ -9995,7 +9995,7 @@ An Awkward Disparity between <fixed-case>BLEU</fixed-case> / <fixed-case>RIBES</fixed-case> Scores and Human Judgements in Machine Translation LilingTan JonDehdari - Josefvan Genabith + Josefvan Genabith W15-5009 W15-5009.Presentation.pdf W15-5009.Poster.pdf @@ -10055,7 +10055,7 @@ JanAlexandersson ErcanAltinsoy HeidiChristensen - PeterLjunglöf + PeterLjunglöf FrançoisPortet FrankRudzicz 10.18653/v1/W15-51 @@ -10218,8 +10218,8 @@ Ka HoWong Yu TingYeung Patrick C. M.Wong - Gina-AnneLevow - HelenMeng + Gina-AnneLevow + HelenMeng 86–90 W15-5115 10.18653/v1/W15-5115 @@ -10274,7 +10274,7 @@ LeenSevens VincentVandeghinste InekeSchuurman - FrankVan Eynde + FrankVan Eynde 110–117 W15-5119 10.18653/v1/W15-5119 @@ -10316,7 +10316,7 @@ Using linguistic features longitudinally to predict clinical scores for <fixed-case>A</fixed-case>lzheimer’s disease and related dementias MariaYancheva - KathleenFraser + KathleenFraser FrankRudzicz 134–139 W15-5123 @@ -10326,7 +10326,7 @@ From <fixed-case>E</fixed-case>uropean <fixed-case>P</fixed-case>ortuguese to <fixed-case>P</fixed-case>ortuguese <fixed-case>S</fixed-case>ign <fixed-case>L</fixed-case>anguage InêsAlmeida - LuísaCoheur + LuísaCoheur SaraCandeias 140–143 W15-5124 @@ -10338,7 +10338,7 @@ Proceedings of the Workshop Natural Language Processing for Translation Memories W15-52 - ConstantinOrasan + ConstantinOrasan RohitGupta Association for Computational Linguistics
Hissar, Bulgaria
@@ -10352,7 +10352,7 @@ Creation of new <fixed-case>TM</fixed-case> segments: Fulfilling translators’ wishes - CarlaParra Escartín + CarlaParra Escartín 1–8 W15-5201 parra-escartin-2015-creation @@ -10367,7 +10367,7 @@ Improving Translation Memory Matching through Clause Splitting KaterinaRaisa Timonera - RuslanMitkov + RuslanMitkov 17–23 W15-5203 raisa-timonera-mitkov-2015-improving @@ -10390,12 +10390,12 @@ <fixed-case>CAT</fixed-case>a<fixed-case>L</fixed-case>og: New Approaches to <fixed-case>TM</fixed-case> and Post Editing Interfaces - TapasNayek - Sudip KumarNaskar + TapasNayek + Sudip KumarNaskar SantanuPal MarcosZampieri MihaelaVela - Josefvan Genabith + Josefvan Genabith 36–42 W15-5206 nayek-etal-2015-catalog @@ -10408,7 +10408,7 @@ JakubPiskorski LidiaPivovarova JanŠnajder - HristoTanev + HristoTanev RomanYangarber INCOMA Ltd. Shoumen, BULGARIA
Hissar, Bulgaria
@@ -10422,7 +10422,7 @@ <fixed-case>U</fixed-case>niversal <fixed-case>D</fixed-case>ependencies for <fixed-case>C</fixed-case>roatian (that work for <fixed-case>S</fixed-case>erbian, too) - ŽeljkoAgić + ŽeljkoAgić NikolaLjubešić 1–8 W15-5301 @@ -10430,9 +10430,9 @@ Analytic Morphology – Merging the Paradigmatic and Syntagmatic Perspective in a Treebank - VladimírPetkevič + VladimírPetkevič AlexandrRosen - HanaSkoumalová + HanaSkoumalová PřemyslVítovec 9–16 W15-5302 @@ -10463,7 +10463,7 @@ Regional Linguistic Data Initiative (<fixed-case>R</fixed-case>e<fixed-case>LDI</fixed-case>) - TanjaSamardžić + TanjaSamardžić NikolaLjubešić MajaMiličević 40–42 @@ -10484,7 +10484,7 @@ <fixed-case>E</fixed-case>-law Module Supporting Lawyers in the Process of Knowledge Discovery from Legal Documents - MarekKozłowski + MarekKozłowski MaciejKowalski MaciejKazula 46–48 @@ -10530,14 +10530,14 @@ Universalizing <fixed-case>B</fixed-case>ul<fixed-case>T</fixed-case>ree<fixed-case>B</fixed-case>ank: a Linguistic Tale about Glocalization PetyaOsenova - KirilSimov + KirilSimov 81–89 W15-5313 osenova-simov-2015-universalizing Types of Aspect Terms in Aspect-Oriented Sentiment Labeling - NataliaLoukachevitch + NataliaLoukachevitch EvgeniyKotelnikov PavelBlinov 90–95 @@ -10546,7 +10546,7 @@ Authorship Attribution and Author Profiling of <fixed-case>L</fixed-case>ithuanian Literary Texts - JurgitaKapočiūtė-Dzikienė + JurgitaKapočiūtė-Dzikienė AndriusUtka LigitaŠarkutė 96–105 @@ -10567,13 +10567,13 @@ Proceedings of the Joint Workshop on Language Technology for Closely Related Languages, Varieties and Dialects W15-54 - PreslavNakov + PreslavNakov MarcosZampieri PetyaOsenova LilingTan CristinaVertan NikolaLjubešić - JörgTiedemann + JörgTiedemann Association for Computational Linguistics
Hissar, Bulgaria
September @@ -10597,7 +10597,7 @@
Handling and Mining Linguistic Variation in <fixed-case>UGC</fixed-case> - LeonDerczynski + LeonDerczynski 10 W15-5402 derczynski-2015-handling @@ -10614,7 +10614,7 @@ Joint <fixed-case>B</fixed-case>ayesian Morphology Learning for <fixed-case>D</fixed-case>ravidian Languages ArunKumar - LluísPadró + LluísPadró AntoniOliver 17–23 W15-5404 @@ -10632,14 +10632,14 @@ <fixed-case>W</fixed-case>iki<fixed-case>T</fixed-case>rans: <fixed-case>S</fixed-case>wedish-<fixed-case>D</fixed-case>anish Machine Translation in a Constraint Grammar Framework - EckhardBick + EckhardBick 34 W15-5406 bick-2015-wikitrans Language Identification using Classifier Ensembles - ShervinMalmasi + ShervinMalmasi MarkDras 35–43 W15-5407 @@ -10649,7 +10649,7 @@ Discriminating Similar Languages with Token-Based Backoff TommiJauhiainen HeidiJauhiainen - KristerLindén + KristerLindén 44–51 W15-5408 jauhiainen-etal-2015-discriminating @@ -10673,9 +10673,9 @@ Comparing Approaches to the Identification of Similar Languages MarcosZampieri - Binyam GebrekidanGebre + Binyam GebrekidanGebre HernaniCosta - Josefvan Genabith + Josefvan Genabith 66–72 W15-5411 zampieri-etal-2015-comparing @@ -10691,7 +10691,7 @@ Experiments in Discriminating Similar Languages - CyrilGoutte + CyrilGoutte SergeLéger 78–84 W15-5413 @@ -10701,7 +10701,7 @@ Building Monolingual Word Alignment Corpus for the Greater <fixed-case>C</fixed-case>hina Region FanXu XiongfeiXu - MingwenWang + MingwenWang MaoxiLi 85–94 W15-5414 @@ -10713,9 +10713,9 @@ Proceedings of the Second Workshop on Natural Language Processing and Linked Open Data W15-55 PiekVossen - GermanRigau + GermanRigau PetyaOsenova - KirilSimov + KirilSimov INCOMA Ltd. Shoumen, BULGARIA
Hissar, Bulgaria
September @@ -10744,8 +10744,8 @@ Small in Size, Big in Precision: A Case for Using Language-Specific Lexical Resources for Word Sense Disambiguation StevenNeale - JoãoSilva - AntónioBranco + JoãoSilva + AntónioBranco 6–15 W15-5503 neale-etal-2015-small @@ -10769,14 +10769,14 @@ Accessing Linked Open Data via A Common Ontology KirilSimov - AtanasKiryakov + AtanasKiryakov 33–41 W15-5506 simov-kiryakov-2015-accessing The <fixed-case>G</fixed-case>uan<fixed-case>X</fixed-case>i network: a new multilingual <fixed-case>LLOD</fixed-case> for Language Learning applications - IsmailEl Maarouf + IsmailEl Maarouf HatemMousselly-Sergieh EugeneAlferov HaofenWang @@ -10790,7 +10790,7 @@ Proceedings of the 10th Brazilian Symposium in Information and Human Language Technology - ClaudiaFreitas + ClaudiaFreitas AlexandreRademaker Sociedade Brasileira de Computação
Natal, Brazil
@@ -10825,9 +10825,9 @@
Comparative Analysis between Notations to Classify Named Entities using Conditional Random Fields - Daniela Oliveira F.do Amaral + Daniela Oliveira F.do Amaral MaikiBuffet - RenataVieira + RenataVieira 27-31 W15-5603 do-amaral-etal-2015-comparative @@ -10852,7 +10852,7 @@ Análise Automática de Coerência Textual em Resumos Científicos: Avaliando Quebras de Linearidade (Automatic Analysis of Textual Coherence in Scientific Abstracts: Evaluating Linearity Breaks) Leandro Lagoda Silva - Valéria DelisandraFeltrim + Valéria DelisandraFeltrim 45-49 W15-5606 da-silva-feltrim-2015-analise @@ -10870,8 +10870,8 @@ Integrating support verb constructions into a parser AmandaRassi JorgeBaptista - NunoMamede - OtoVale + NunoMamede + OtoVale 57-61 W15-5608 rassi-etal-2015-integrating @@ -10879,7 +10879,7 @@ Extração de Alvos em Comentários de Notícias em Português baseada na Teoria da Centralização (Target Extraction in News Reviews in <fixed-case>P</fixed-case>ortuguese based on Centering Theory) Frank Willian Cardosode Oliveira - Valéria DelisandraFeltrim + Valéria DelisandraFeltrim 63-67 W15-5609 de-oliveira-feltrim-2015-extracao @@ -10891,7 +10891,7 @@ MarcelSerikawa Matheus Antonio RibeiroSilva RégisZangirolami - Sandra MariaAluísio + Sandra MariaAluísio 69-73 W15-5610 candido-junior-etal-2015-portal @@ -10899,14 +10899,14 @@ <fixed-case>P</fixed-case>rep<fixed-case>N</fixed-case>et.<fixed-case>B</fixed-case>r: a Semantic Network for Prepositions Débora D.Garcia - Bento Carlos Diasda Silva + Bento Carlos Diasda Silva 75-79 W15-5611 garcia-da-silva-2015-prepnet Joint semantic discourse models for automatic multi-document summarization - Paula C. FigueiraCardoso + Paula C. FigueiraCardoso Thiago A. S.Pardo 81-90 W15-5612 @@ -10915,7 +10915,7 @@ Building and Applying Profiles Through Term Extraction LuceleneLopes - RenataVieira + RenataVieira 91-100 W15-5613 lopes-vieira-2015-building @@ -10923,7 +10923,7 @@ An Annotated Corpus for Sentiment Analysis in Political News Gabriel Domingosde Arruda - Norton TrevisanRoman + Norton TrevisanRoman Ana MariaMonteiro 101-110 W15-5614 @@ -10932,7 +10932,7 @@ Campos Aleatórios Condicionais Aplicados à Detecção de Estrutura Retórica em Resumos de Textos Acadêmicos em Português (Conditional Random Fields Applied to Rhetorical Structure Detection in Academic Abstracts in <fixed-case>P</fixed-case>ortuguese) Alexandre C.Andreani - Valéria D.Feltrim + Valéria D.Feltrim 111-120 W15-5615 andreani-feltrim-2015-campos @@ -10941,7 +10941,7 @@ Anotando um Corpus de Notícias para a Análise de Sentimentos: um Relato de Experiência (Annotating a corpus of News for Sentiment Analysis: An Experience Report) Mariza MiolaDosciatti Lohann Paterno CoutinhoFerreira - Emerson CabreraParaiso + Emerson CabreraParaiso 121-130 W15-5616 dosciatti-etal-2015-anotando @@ -10960,7 +10960,7 @@ On Strategies of Human Multi-Document Summarization Renata Tironide Camargo - ArianiDi Felippo + ArianiDi Felippo Thiago A. S.Pardo 141-150 W15-5618 @@ -10977,7 +10977,7 @@ <fixed-case>V</fixed-case>erb<fixed-case>L</fixed-case>ex<fixed-case>P</fixed-case>or: um recurso léxico com anotação de papéis semânticos para o português (<fixed-case>V</fixed-case>erb<fixed-case>L</fixed-case>ex<fixed-case>P</fixed-case>or: a lexical resource annotated with semantic roles for <fixed-case>P</fixed-case>ortuguese) LeonardoZilio - Maria José BocornyFinatto + Maria José BocornyFinatto AlineVillavicencio 161-170 W15-5620 @@ -10985,7 +10985,7 @@ Novo dicionário de formas flexionadas do Unitex-<fixed-case>PB</fixed-case>: avaliação da flexão verbal (New Dictionary of Inflected forms of <fixed-case>UNITEX</fixed-case>-<fixed-case>PB</fixed-case>: Evaluation of Verbal Inflection) - Oto A.Vale + Oto A.Vale JorgeBaptista 171-180 W15-5621 @@ -11009,8 +11009,8 @@ Semi-Automatic Construction of a Textual Entailment Dataset: Selecting Candidates with Vector Space Models - Erick R.Fonseca - Sandra MariaAluísio + Erick R.Fonseca + Sandra MariaAluísio 201-210 W15-5624 fonseca-aluisio-2015-semi @@ -11029,8 +11029,8 @@ Proceedings of the 1st Deep Machine Translation Workshop W15-57 - JanHajič - AntónioBranco + JanHajič + AntónioBranco ÚFAL MFF UK
Praha, Czechia
2015 @@ -11043,7 +11043,7 @@ Modelling the Adjunct/Argument Distinction in Hierarchical Phrase-Based <fixed-case>SMT</fixed-case> SophieArnoult - KhalilSima’an + KhalilSima’an 2–11 W15-5701 arnoult-simaan-2015-modelling @@ -11052,7 +11052,7 @@ Towards Deeper <fixed-case>MT</fixed-case> - A Hybrid System for <fixed-case>G</fixed-case>erman EleftheriosAvramidis AljoschaBurchardt - MajaPopović + MajaPopović HansUszkoreit 12–19 W15-5702 @@ -11071,14 +11071,14 @@ Delimiting Morphosyntactic Search Space with Source-Side Reordering Models JoachimDaiber - KhalilSima’an + KhalilSima’an 29–38 W15-5704 daiber-simaan-2015-delimiting Evaluating a Machine Translation System in a Technical Support Scenario - RosaDel Gaudio + RosaDel Gaudio AljoschaBurchardt ArleLommel 39–47 @@ -11094,12 +11094,12 @@ Deep-syntax <fixed-case>T</fixed-case>ecto<fixed-case>MT</fixed-case> for <fixed-case>E</fixed-case>nglish-<fixed-case>S</fixed-case>panish <fixed-case>MT</fixed-case> - GorkaLabaka + GorkaLabaka OnekaJauregi - ArantzaDíaz de Ilarraza + ArantzaDíaz de Ilarraza MichaelUstaszewski NoraAranberri - EnekoAgirre + EnekoAgirre 55–63 W15-5707 labaka-etal-2015-deep @@ -11116,7 +11116,7 @@ Lexical choice in Abstract Dependency Trees DiekeOele - Gertjanvan Noord + Gertjanvan Noord 73–80 W15-5709 oele-van-noord-2015-lexical @@ -11141,7 +11141,7 @@ Factored models for Deep Machine Translation - KirilSimov + KirilSimov IlianaSimova VelislavaTodorova PetyaOsenova @@ -11151,8 +11151,8 @@ Machine Translation for Multilingual Troubleshooting in the <fixed-case>IT</fixed-case> Domain: A Comparison of Different Strategies - SanjaŠtajner - JoãoRodrigues + SanjaŠtajner + JoãoRodrigues LuísGomes AntónioBranco 106–115 @@ -11164,7 +11164,7 @@ Proceedings of the 12th International Conference on Natural Language Processing W15-59 - Dipti MisraSharma + Dipti MisraSharma RajeevSangal ElizabethSherly NLP Association of India @@ -11179,7 +11179,7 @@ Keynote Lecture 1: Scientific Paper Analysis - YujiMatsumoto + YujiMatsumoto 1 W15-5901 matsumoto-2015-keynote @@ -11187,7 +11187,7 @@ Addressing Class Imbalance in Grammatical Error Detection with Evaluation Metric Optimization AnoopKunchukuttan - PushpakBhattacharyya + PushpakBhattacharyya 2–10 W15-5902 kunchukuttan-bhattacharyya-2015-addressing @@ -11213,8 +11213,8 @@ Noun Phrase Chunking for <fixed-case>M</fixed-case>arathi using Distant Supervision SachinPawar NitinRamrakhiyani - Girish K.Palshikar - PushpakBhattacharyya + Girish K.Palshikar + PushpakBhattacharyya SwapnilHingmire 29–38 W15-5905 @@ -11223,7 +11223,7 @@ Self-Organizing Maps for Classification of a Multi-Labeled Corpus LarsBungum - BjörnGambäck + BjörnGambäck 39–48 W15-5906 bungum-gamback-2015-self @@ -11242,7 +11242,7 @@ SudhaBhingardive DhirendraSingh RudramurthyV - PushpakBhattacharyya + PushpakBhattacharyya 59–64 W15-5908 bhingardive-etal-2015-using @@ -11262,7 +11262,7 @@ SandhyaSingh NileshJoshi AnupamGhosh - PushpakBhattacharyya + PushpakBhattacharyya 71–78 W15-5910 redkar-etal-2015-indowordnet @@ -11270,17 +11270,17 @@ Let Sense Bags Do Talking: Cross Lingual Word Semantic Similarity for <fixed-case>E</fixed-case>nglish and <fixed-case>H</fixed-case>indi ApurvaNagvenkar - JyotiPawar - PushpakBhattacharyya + JyotiPawar + PushpakBhattacharyya 79–83 W15-5911 nagvenkar-etal-2015-sense A temporal expression recognition system for medical documents by - NamanGupta + NamanGupta AdityaJoshi - PushpakBhattacharyya + PushpakBhattacharyya 84–88 W15-5912 gupta-etal-2015-temporal @@ -11288,7 +11288,7 @@ An unsupervised <fixed-case>EM</fixed-case> method to infer time variation in sense probabilities MartinEmms - ArunJayapal + ArunJayapal 89–94 W15-5913 emms-jayapal-2015-unsupervised @@ -11297,7 +11297,7 @@ Solving Data Sparsity by Morphology Injection in Factored <fixed-case>SMT</fixed-case> SreelekhaS PiyushDungarwal - PushpakBhattacharyya + PushpakBhattacharyya MalathiD 95–99 W15-5914 @@ -11317,8 +11317,8 @@ DipteshKanojia ShehzaadDhuliawala AbhijitMishra - NamanGupta - PushpakBhattacharyya + NamanGupta + PushpakBhattacharyya 106–111 W15-5916 kanojia-etal-2015-transchat @@ -11327,7 +11327,7 @@ A Database of Infant Cry Sounds to Study the Likely Cause of Cry ShivamSharma ShubhamAsthana - V. K.Mittal + V. K.Mittal 112–117 W15-5917 sharma-etal-2015-database @@ -11341,7 +11341,7 @@ An Empirical Study of Diversity of Word Alignment and its Symmetrization Techniques for System Combination - Thoudam DorenSingh + Thoudam DorenSingh 124–129 W15-5919 singh-2015-empirical @@ -11349,7 +11349,7 @@ Domain Sentiment Matters: A Two Stage Sentiment Analyzer RakshaSharma - PushpakBhattacharyya + PushpakBhattacharyya 130–137 W15-5920 sharma-bhattacharyya-2015-domain @@ -11387,9 +11387,9 @@ Judge a Book by its Cover: Conservative Focused Crawling under Resource Constraints ShehzaadDhuliawala - Arjun AtreyaV + Arjun AtreyaV Ravi KumarYadav - PushpakBhattacharyya + PushpakBhattacharyya 166–171 W15-5925 dhuliawala-etal-2015-judge @@ -11423,7 +11423,7 @@ A Study on Divergence in <fixed-case>M</fixed-case>alayalam and <fixed-case>T</fixed-case>amil Language in Machine Translation Perceptive - Jisha PJayan + Jisha PJayan ElizabethSherly 189–196 W15-5929 @@ -11440,8 +11440,8 @@ Logistic Regression for Automatic Lexical Level Morphological Paradigm Selection for <fixed-case>K</fixed-case>onkani Nouns ShilpaDesai - JyotiPawar - PushpakBhattacharyya + JyotiPawar + PushpakBhattacharyya 203–208 W15-5931 desai-etal-2015-logistic @@ -11496,8 +11496,8 @@ Automated Analysis of <fixed-case>B</fixed-case>angla Poetry for Classification and Poet Identification GeetanjaliRakshit AnupamGhosh - PushpakBhattacharyya - GholamrezaHaffari + PushpakBhattacharyya + GholamrezaHaffari 247–253 W15-5937 rakshit-etal-2015-automated @@ -11508,7 +11508,7 @@ AnupamJamatia KunalChakma AmitavaDas - BjörnGambäck + BjörnGambäck 254–260 W15-5938 rudrapal-etal-2015-sentence @@ -11517,7 +11517,7 @@ Mood Classification of <fixed-case>H</fixed-case>indi Songs based on Lyrics Braja GopalPatra DipankarDas - SivajiBandyopadhyay + SivajiBandyopadhyay 261–267 W15-5939 patra-etal-2015-mood @@ -11540,9 +11540,9 @@ Simultaneous Feature Selection and Parameter Optimization Using Multi-objective Optimization for Sentiment Analysis - Mohammed ArifKhan + Mohammed ArifKhan AsifEkbal - Eneldo LozaMencía + Eneldo LozaMencía 285–294 W15-5942 khan-etal-2015-simultaneous @@ -11552,7 +11552,7 @@ DhirendraSingh SudhaBhingardive KevinPatel - PushpakBhattacharyya + PushpakBhattacharyya 295–302 W15-5943 singh-etal-2015-detection @@ -11561,7 +11561,7 @@ Augmenting Pivot based <fixed-case>SMT</fixed-case> with word segmentation RohitMore AnoopKunchukuttan - PushpakBhattacharyya + PushpakBhattacharyya RajDabre 303–307 W15-5944 @@ -11571,8 +11571,8 @@ Using Multilingual Topic Models for Improved Alignment in <fixed-case>E</fixed-case>nglish-<fixed-case>H</fixed-case>indi <fixed-case>MT</fixed-case> DipteshKanojia AdityaJoshi - PushpakBhattacharyya - Mark JamesCarman + PushpakBhattacharyya + Mark JamesCarman 308–315 W15-5945 kanojia-etal-2015-using @@ -11581,22 +11581,22 @@ Triangulation of Reordering Tables: An Advancement Over Phrase Table Triangulation in Pivot-Based <fixed-case>SMT</fixed-case> DeepakPatil HarshadChavan - PushpakBhattacharyya + PushpakBhattacharyya 316–324 W15-5946 patil-etal-2015-triangulation Post-editing a chapter of a specialized textbook into 7 languages: importance of terminological proximity with <fixed-case>E</fixed-case>nglish for productivity - RiteshShah - ChristianBoitet - PushpakBhattacharyya + RiteshShah + ChristianBoitet + PushpakBhattacharyya MithunPadmakumar LeonardoZilio RuslanKalitvianski MohammadNasiruddin MutsukoTomokiyo - Sandra CastellanosPáez + Sandra CastellanosPáez 325–332 W15-5947 shah-etal-2015-post @@ -11605,7 +11605,7 @@ Generating Translation Corpora in <fixed-case>I</fixed-case>ndic Languages: Cultivating Bilingual Texts for Cross Lingual Fertilization Niladri SekharDash ArulmoziSelvraj - MazharHussain + MazharHussain 333–342 W15-5948 dash-etal-2015-generating @@ -11613,7 +11613,7 @@ Translation Quality and Effort: Options versus Post-editing DonaldSturgeon - John S. Y.Lee + John S. Y.Lee 343–350 W15-5949 sturgeon-lee-2015-translation @@ -11622,15 +11622,15 @@ Investigating the potential of post-ordering <fixed-case>SMT</fixed-case> output to improve translation quality PratikMehta AnoopKunchukuttan - PushpakBhattacharyya + PushpakBhattacharyya 351–356 W15-5950 mehta-etal-2015-investigating Applying <fixed-case>S</fixed-case>anskrit Concepts for Reordering in <fixed-case>MT</fixed-case> - AksharBharati - Sukhada + AksharBharati + Sukhada PrajnaJha SomaPaul Dipti MSharma @@ -11667,7 +11667,7 @@ Natural Language Processing for Solving Simple Word Problems - Sowmya SSundaram + Sowmya SSundaram DeepakKhemani 394–402 W15-5955 diff --git a/data/xml/W16.xml b/data/xml/W16.xml index 41fd606e0d..48c25fe10b 100644 --- a/data/xml/W16.xml +++ b/data/xml/W16.xml @@ -6,7 +6,7 @@ MohitIyyer HeHe JordanBoyd-Graber - HalDaumé III + HalDaumé III 10.18653/v1/W16-01 Association for Computational Linguistics
San Diego, California
@@ -21,7 +21,7 @@ <fixed-case>W</fixed-case>atson Discovery Advisor: Question-answering in an industrial setting CharleyBeller - GrahamKatz + GrahamKatz AllenGinsberg ChrisPhipps SeanBethard @@ -47,7 +47,7 @@ Attention-Based Convolutional Neural Network for Machine Comprehension WenpengYin SebastianEbert - HinrichSchütze + HinrichSchütze 15–21 W16-0103 10.18653/v1/W16-0103 @@ -56,7 +56,7 @@ Open-domain Factoid Question Answering via Knowledge Graph Search AhmadAghaebrahimian - FilipJurčíček + FilipJurčíček 22–28 W16-0104 10.18653/v1/W16-0104 @@ -122,7 +122,7 @@ Proceedings of the Fifth Workshop on Computational Linguistics for Literature AnnaFeldman AnnaKazantseva - StanSzpakowicz + StanSzpakowicz 10.18653/v1/W16-02 Association for Computational Linguistics
San Diego, California, USA
@@ -158,7 +158,7 @@ AndreaGagliano EmilyPaul KyleBooten - Marti A.Hearst + Marti A.Hearst 20–31 W16-0203 10.18653/v1/W16-0203 @@ -197,7 +197,7 @@ Bilingual Chronological Classification of Hafez’s Poems AryaRahgozar - DianaInkpen + DianaInkpen 54–62 W16-0207 10.18653/v1/W16-0207 @@ -208,7 +208,7 @@ Proceedings of the Third Workshop on Computational Linguistics and Clinical Psychology KristyHollingshead - LyleUngar + LyleUngar 10.18653/v1/W16-03 Association for Computational Linguistics
San Diego, CA, USA
@@ -222,7 +222,7 @@ Detecting late-life depression in <fixed-case>A</fixed-case>lzheimer’s disease through analysis of speech and language - Kathleen C.Fraser + Kathleen C.Fraser FrankRudzicz GraemeHirst 1–11 @@ -233,10 +233,10 @@ Towards Early Dementia Detection: Fusing Linguistic and Non-Linguistic Clinical Data JosephBullard - CeciliaOvesdotter Alm + CeciliaOvesdotter Alm XuminLiu QiYu - RubénProaño + RubénProaño 12–22 W16-0302 10.18653/v1/W16-0302 @@ -271,7 +271,7 @@ Building a Motivational Interviewing Dataset VerónicaPérez-Rosas - RadaMihalcea + RadaMihalcea KennethResnicow SatinderSingh LawrenceAn @@ -306,12 +306,12 @@ Exploring Autism Spectrum Disorders Using <fixed-case>HLT</fixed-case> JuliaParish-Morris - MarkLiberman + MarkLiberman NevilleRyant - ChristopherCieri + ChristopherCieri LeilaBateman EmilyFerguson - RobertSchultz + RobertSchultz 74–84 W16-0308 10.18653/v1/W16-0308 @@ -322,10 +322,10 @@ MayureshOak AnilBehera TitusThomas - CeciliaOvesdotter Alm - EmilyPrud’hommeaux + CeciliaOvesdotter Alm + EmilyPrud’hommeaux ChristopherHoman - RaymondPtucha + RaymondPtucha 85–94 W16-0309 10.18653/v1/W16-0309 @@ -357,7 +357,7 @@ <fixed-case>CLP</fixed-case>sych 2016 Shared Task: Triaging content in online peer-support forums - David N.Milne + David N.Milne GlenPink BenHachey Rafael A.Calvo @@ -368,10 +368,10 @@ <fixed-case>D</fixed-case>ata61-<fixed-case>CSIRO</fixed-case> systems at the <fixed-case>CLP</fixed-case>sych 2016 Shared Task - Sunghwan MacKim + Sunghwan MacKim YufeiWang StephenWan - CécileParis + CécileParis 128–132 W16-0313 10.18653/v1/W16-0313 @@ -379,7 +379,7 @@ Predicting Post Severity in Mental Health Forums - ShervinMalmasi + ShervinMalmasi MarcosZampieri MarkDras 133–137 @@ -409,7 +409,7 @@ Mental Distress Detection and Triage in Forum Posts: The <fixed-case>LT</fixed-case>3 <fixed-case>CLP</fixed-case>sych 2016 Shared Task System BartDesmet GillesJacobs - VéroniqueHoste + VéroniqueHoste 148–152 W16-0317 10.18653/v1/W16-0317 @@ -429,7 +429,7 @@ The <fixed-case>UMD</fixed-case> <fixed-case>CLP</fixed-case>sych 2016 Shared Task System: Text Representation for Predicting Triage of Forum Posts about Mental Health MeirFriedenberg HadiAmiri - HalDaumé III + HalDaumé III PhilipResnik 158–161 W16-0319 @@ -511,7 +511,7 @@ Text-based experiments for Predicting mental health emergencies in online web forum posts Hector-HugoFranco-Penya - LilianaMamani Sanchez + LilianaMamani Sanchez 193–197 W16-0327 10.18653/v1/W16-0327 @@ -521,10 +521,10 @@ Proceedings of the 7th Workshop on Computational Approaches to Subjectivity, Sentiment and Social Media Analysis - AlexandraBalahur - Erikvan der Goot + AlexandraBalahur + Erikvan der Goot PiekVossen - AndresMontoyo + AndresMontoyo 10.18653/v1/W16-04 Association for Computational Linguistics
San Diego, California
@@ -555,7 +555,7 @@ Rumor Identification and Belief Investigation on <fixed-case>T</fixed-case>witter SardarHamidian - MonaDiab + MonaDiab 3–8 W16-0403 10.18653/v1/W16-0403 @@ -563,12 +563,12 @@ Modelling Valence and Arousal in <fixed-case>F</fixed-case>acebook posts - DanielPreoţiuc-Pietro - H. AndrewSchwartz + DanielPreoţiuc-Pietro + H. AndrewSchwartz GregoryPark JohannesEichstaedt - MargaretKern - LyleUngar + MargaretKern + LyleUngar ElisabethShulman 9–15 W16-0404 @@ -625,7 +625,7 @@ The Effect of Negators, Modals, and Degree Adverbs on Sentiment Composition SvetlanaKiritchenko - SaifMohammad + SaifMohammad 43–52 W16-0410 10.18653/v1/W16-0410 @@ -634,7 +634,7 @@ How can <fixed-case>NLP</fixed-case> Tasks Mutually Benefit Sentiment Analysis? A Holistic Approach to Sentiment Analysis LingjiaDeng - JanyceWiebe + JanyceWiebe 53–59 W16-0411 10.18653/v1/W16-0411 @@ -654,7 +654,7 @@ Threat detection in online discussions AkselWester - LiljaØvrelid + LiljaØvrelid ErikVelldal Hugo LewiHammer 66–71 @@ -665,7 +665,7 @@ Classification of comment helpfulness to improve knowledge sharing among medical practitioners. Pierre AndréMénard - CarolineBarrière + CarolineBarrière 72–81 W16-0414 10.18653/v1/W16-0414 @@ -674,8 +674,8 @@ Political Issue Extraction Model: A Novel Hierarchical Topic Model That Uses Tweets By Political And Non-Political Authors AdityaJoshi - PushpakBhattacharyya - MarkCarman + PushpakBhattacharyya + MarkCarman 82–90 W16-0415 10.18653/v1/W16-0415 @@ -684,8 +684,8 @@ Early text classification: a Naïve solution Hugo JairEscalante - ManuelMontes y Gomez - LuisVillasenor + ManuelMontes y Gomez + LuisVillasenor Marcelo LuisErrecalde 91–99 W16-0416 @@ -746,10 +746,10 @@ Domain Adaptation of Polarity Lexicon combining Term Frequency and Bootstrapping - Salud MaríaJiménez-Zafra - MaiteMartin - M. DoloresMolina-Gonzalez - L. AlfonsoUreña-López + Salud MaríaJiménez-Zafra + MaiteMartin + M. DoloresMolina-Gonzalez + L. AlfonsoUreña-López 137–146 W16-0422 10.18653/v1/W16-0422 @@ -757,10 +757,10 @@ Do Enterprises Have Emotions? - SvenBuechel + SvenBuechel UdoHahn JanGoldenstein - Sebastian G. M.Händschke + Sebastian G. M.Händschke PeterWalgenbach 147–153 W16-0423 @@ -797,7 +797,7 @@ Sentiment Analysis in <fixed-case>T</fixed-case>witter: A <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val Perspective - PreslavNakov + PreslavNakov 171–172 W16-0427 10.18653/v1/W16-0427 @@ -813,7 +813,7 @@ A Practical Guide to Sentiment Annotation: Challenges and Solutions - SaifMohammad + SaifMohammad 174–179 W16-0429 10.18653/v1/W16-0429 @@ -831,7 +831,7 @@ Proceedings of the 11th Workshop on Innovative Use of NLP for Building Educational Applications - JoelTetreault + JoelTetreault JillBurstein ClaudiaLeacock HelenYannakoudakis @@ -860,7 +860,7 @@ Text Readability Assessment for Second Language Learners MenglinXia EkaterinaKochmar - TedBriscoe + TedBriscoe 12–22 W16-0502 10.18653/v1/W16-0502 @@ -878,7 +878,7 @@ Automated classification of collaborative problem solving interactions in simulated science tasks MichaelFlor - Su-YounYoon + Su-YounYoon JiangangHao LeiLiu Alinavon Davier @@ -898,8 +898,8 @@ A Report on the Automatic Evaluation of Scientific Writing Shared Task - VidasDaudaravicius - Rafael E.Banchs + VidasDaudaravicius + Rafael E.Banchs ElenaVolodina CourtneyNapoles 53–62 @@ -930,7 +930,7 @@ Characterizing Text Difficulty with Word Frequencies XiaobinChen - DetmarMeurers + DetmarMeurers 84–94 W16-0509 10.18653/v1/W16-0509 @@ -940,7 +940,7 @@ Unsupervised Modeling of Topical Relevance in <fixed-case>L</fixed-case>2 Learner Text RonanCummins HelenYannakoudakis - TedBriscoe + TedBriscoe 95–104 W16-0510 10.18653/v1/W16-0510 @@ -948,8 +948,8 @@ <fixed-case>UW</fixed-case>-<fixed-case>S</fixed-case>tanford System Description for <fixed-case>AESW</fixed-case> 2016 Shared Task on Grammatical Error Detection - DanFlickinger - MichaelGoodman + DanFlickinger + MichaelGoodman WoodleyPackard 105–111 W16-0511 @@ -969,8 +969,8 @@ The <fixed-case>NTNU</fixed-case>-<fixed-case>YZU</fixed-case> System in the <fixed-case>AESW</fixed-case> Shared Task: Automated Evaluation of Scientific Writing Using a Convolutional Neural Network Lung-HaoLee Bo-LinLin - Liang-ChihYu - Yuen-HsienTseng + Liang-ChihYu + Yuen-HsienTseng 122–129 W16-0513 10.18653/v1/W16-0513 @@ -997,8 +997,8 @@ <fixed-case>P</fixed-case>ictogrammar: an <fixed-case>AAC</fixed-case> device based on a semantic grammar - FernandoMartínez-Santiago - Miguel ÁngelGarcía-Cumbreras + FernandoMartínez-Santiago + Miguel ÁngelGarcía-Cumbreras ArturoMontejo-Ráez Manuel CarlosDíaz-Galiano 142–150 @@ -1038,7 +1038,7 @@ Evaluation Dataset (<fixed-case>DT</fixed-case>-Grade) and Word Weighting Approach towards Constructed Short Answers Assessment in Tutorial Dialogue Context RajendraBanjade NabinMaharjan - Nobal BikramNiraula + Nobal BikramNiraula DipeshGautam BorhanSamei VasileRus @@ -1050,7 +1050,7 @@ Linguistically Aware Information Retrieval: Providing Input Enrichment for Second Language Learners MariaChinkina - DetmarMeurers + DetmarMeurers 188–198 W16-0521 10.18653/v1/W16-0521 @@ -1089,7 +1089,7 @@ Combined Tree Kernel-based classifiers for Assessing Quality of Scientific Text - LilianaMamani Sanchez + LilianaMamani Sanchez Hector-HugoFranco-Penya 223–228 W16-0525 @@ -1099,7 +1099,7 @@ Augmenting Course Material with Open Access Textbooks SmithaMilli - Marti A.Hearst + Marti A.Hearst 229–234 W16-0526 10.18653/v1/W16-0526 @@ -1107,7 +1107,7 @@ Exploring the Intersection of Short Answer Assessment, Authorship Attribution, and Plagiarism Detection - BjörnRudzewitz + BjörnRudzewitz 235–241 W16-0527 10.18653/v1/W16-0527 @@ -1117,8 +1117,8 @@ Sentence-Level Grammatical Error Identification as Sequence-to-Sequence Correction AllenSchmaltz YoonKim - Alexander M.Rush - StuartShieber + Alexander M.Rush + StuartShieber 242–251 W16-0528 10.18653/v1/W16-0528 @@ -1136,7 +1136,7 @@ Candidate re-ranking for <fixed-case>SMT</fixed-case>-based grammatical error correction ZhengYuan - TedBriscoe + TedBriscoe MarianoFelice 256–266 W16-0530 @@ -1145,7 +1145,7 @@ Spoken Text Difficulty Estimation Using Linguistic Features - Su-YounYoon + Su-YounYoon YeonsukCho DianeNapolitano 267–276 @@ -1156,7 +1156,7 @@ Automatically Extracting Topical Components for a Response-to-Text Writing Assessment ZahraRahimi - DianeLitman + DianeLitman 277–282 W16-0532 10.18653/v1/W16-0532 @@ -1192,9 +1192,9 @@ Proceedings of the 2nd Workshop on Semantics-Driven Machine Translation (SedMT 2016) - DeyiXiong + DeyiXiong KevinDuh - EnekoAgirre + EnekoAgirre NoraAranberri HoufengWang 10.18653/v1/W16-06 @@ -1237,7 +1237,7 @@ Towards Semantic-based Hybrid Machine Translation between <fixed-case>B</fixed-case>ulgarian and <fixed-case>E</fixed-case>nglish - KirilSimov + KirilSimov PetyaOsenova AlexanderPopov 22–26 @@ -1291,9 +1291,9 @@ Beyond Identity Coreference: Contrasting Indicators of Textual Coherence in <fixed-case>E</fixed-case>nglish and <fixed-case>G</fixed-case>erman - KerstinKunz + KerstinKunz EkaterinaLapshinova-Koltunski - José ManuelMartínez + José ManuelMartínez 23–31 W16-0704 10.18653/v1/W16-0704 @@ -1302,8 +1302,8 @@ Exploring the steps of Verb Phrase Ellipsis ZhengzhongLiu - EdgarGonzàlez Pellicer - DanielGillick + EdgarGonzàlez Pellicer + DanielGillick 32–40 W16-0705 10.18653/v1/W16-0705 @@ -1330,8 +1330,8 @@ Antecedent Prediction Without a Pipeline SamWiseman - Alexander M.Rush - StuartShieber + Alexander M.Rush + StuartShieber 53–58 W16-0708 10.18653/v1/W16-0708 @@ -1350,10 +1350,10 @@ Coreference Resolution for the <fixed-case>B</fixed-case>asque Language with <fixed-case>BART</fixed-case> AnderSoraluze OlatzArregi - XabierArregi - ArantzaDíaz de Ilarraza - MijailKabadjov - MassimoPoesio + XabierArregi + ArantzaDíaz de Ilarraza + MijailKabadjov + MassimoPoesio 67–73 W16-0710 10.18653/v1/W16-0710 @@ -1373,8 +1373,8 @@ How to Handle Split Antecedents in <fixed-case>T</fixed-case>amil? - VijaySundar Ram - SobhaLalitha Devi + VijaySundar Ram + SobhaLalitha Devi 84–91 W16-0712 10.18653/v1/W16-0712 @@ -1394,8 +1394,8 @@ Proceedings of the Second Workshop on Computational Approaches to Deception Detection TommasoFornaciari - EileenFitzpatrick - JoanBachenko + EileenFitzpatrick + JoanBachenko 10.18653/v1/W16-08 Association for Computational Linguistics
San Diego, California
@@ -1418,7 +1418,7 @@
Fake News or Truth? Using Satirical Cues to Detect Potentially Misleading News - VictoriaRubin + VictoriaRubin NiallConroy YiminChen SarahCornwell @@ -1448,7 +1448,7 @@ The Use of Second Life for Deception Detection Research - StephenKunath + StephenKunath KevinMcCabe 32–39 W16-0805 @@ -1463,7 +1463,7 @@ MichelleLevine RivkaLevitan AndrewRosenberg - JuliaHirschberg + JuliaHirschberg 40–44 W16-0806 10.18653/v1/W16-0806 @@ -1483,8 +1483,8 @@ Proceedings of the Workshop on Discontinuous Structures in Natural Language Processing WolfgangMaier - SandraKübler - ConstantinOrasan + SandraKübler + ConstantinOrasan 10.18653/v1/W16-09 Association for Computational Linguistics
San Diego, California
@@ -1506,9 +1506,9 @@
Non-projectivity and valency - ZdenkaUresova - EvaFucikova - JanHajic + ZdenkaUresova + EvaFucikova + JanHajic 12–21 W16-0902 10.18653/v1/W16-0902 @@ -1560,8 +1560,8 @@ Proceedings of the Fourth Workshop on Events - MarthaPalmer - EdHovy + MarthaPalmer + EdHovy TerukoMitamura TimO’Gorman 10.18653/v1/W16-10 @@ -1597,9 +1597,9 @@ Multimodal Use of an Upper-Level Event Ontology - ClaireBonial + ClaireBonial DavidTahmoush - SusanWindisch Brown + SusanWindisch Brown MarthaPalmer 18–26 W16-1003 @@ -1613,7 +1613,7 @@ JeremyGetman JoeEllis JustinMott - StephanieStrassel + StephanieStrassel MarthaPalmer TerukoMitamura MarjorieFreedman @@ -1628,10 +1628,10 @@ Event Nugget and Event Coreference Annotation ZhiyiSong AnnBies - StephanieStrassel + StephanieStrassel JoeEllis TerukoMitamura - Hoa TrangDang + Hoa TrangDang YukariYamakawa SueHolm 37–45 @@ -1652,8 +1652,8 @@ <fixed-case>C</fixed-case>a<fixed-case>T</fixed-case>e<fixed-case>RS</fixed-case>: Causal and Temporal Relation Scheme for Semantic Annotation of Event Structures NasrinMostafazadeh AlysonGrealish - NathanaelChambers - JamesAllen + NathanaelChambers + JamesAllen LucyVanderwende 51–61 W16-1007 @@ -1727,7 +1727,7 @@ Proceedings of the Workshop on Multilingual and Cross-lingual Methods in NLP DipanjanDas - ChrisDyer + ChrisDyer ManaalFaruqui YuliaTsvetkov 10.18653/v1/W16-12 @@ -1744,7 +1744,7 @@ Learning Cross-lingual Representations with Matrix Factorization HananAldarmaki - MonaDiab + MonaDiab 1–9 W16-1201 10.18653/v1/W16-1201 @@ -1773,7 +1773,7 @@ Enhancing Automatic <fixed-case>W</fixed-case>ordnet Construction Using Word Embeddings FerasAl Tarouti - JugalKalita + JugalKalita 30–34 W16-1204 10.18653/v1/W16-1204 @@ -1820,7 +1820,7 @@ Proceedings of the 5th Workshop on Automated Knowledge Base Construction JayPujara - TimRocktaschel + TimRocktaschel DanqiChen SameerSingh 10.18653/v1/W16-13 @@ -1837,9 +1837,9 @@ Using Graphs of Classifiers to Impose Constraints on Semi-supervised Relation Extraction LidongBing - WilliamCohen + WilliamCohen BhuwanDhingra - RichardWang + RichardWang 1–6 W16-1301 10.18653/v1/W16-1301 @@ -1857,9 +1857,9 @@ <fixed-case>IKE</fixed-case> - An Interactive Tool for Knowledge Extraction - BhavanaDalvi + BhavanaDalvi SumithraBhakthavatsalam - ChrisClark + ChrisClark PeterClark OrenEtzioni AnthonyFader @@ -1884,7 +1884,7 @@ Knowledge Base Population for Organization Mentions in Email NingGao MarkDredze - DouglasOard + DouglasOard 24–28 W16-1305 10.18653/v1/W16-1305 @@ -1911,7 +1911,7 @@ But What Do We Actually Know? SimonRazniewski - FabianSuchanek + FabianSuchanek WernerNutt 40–44 W16-1308 @@ -1984,7 +1984,7 @@ SujitPal DarinMcBeath BradAllen - RonDaniel + RonDaniel 81–85 W16-1315 10.18653/v1/W16-1315 @@ -2003,7 +2003,7 @@ Call for Discussion: Building a New Standard Dataset for Relation Extraction Tasks - TeresaMartin + TeresaMartin FieteBotschen AjayNagesh AndrewMcCallum @@ -2112,7 +2112,7 @@ Proceedings of the Joint Workshop on Bibliometric-enhanced Information Retrieval and Natural Language Processing for Digital Libraries (BIRNDL) GuillaumeCabanac - Muthu KumarChandrasekaran + Muthu KumarChandrasekaran IngoFrommholz KokilJaidka Min-YenKan @@ -2203,9 +2203,9 @@ A Study of Reuse and Plagiarism in Speech and Natural Language Processing papers - JosephMariani + JosephMariani GilFrancopoulo - PatrickParoubek + PatrickParoubek 72–83 W16-1509 W16-1509.Presentation.pdf @@ -2311,7 +2311,7 @@ Trainable Citation-enhanced Summarization of Scientific Articles HoracioSaggion - AhmedAbuRa’ed + AhmedAbuRa’ed FrancescoRonzano 175–186 W16-1520 @@ -2322,14 +2322,14 @@ Proceedings of the 1st Workshop on Representation Learning for NLP W16-16 - PhilBlunsom + PhilBlunsom KyunghyunCho - ShayCohen - EdwardGrefenstette + ShayCohen + EdwardGrefenstette Karl MoritzHermann LauraRimell JasonWeston - Scott Wen-tauYih + Scott Wen-tauYih 10.18653/v1/W16-16 Association for Computational Linguistics
Berlin, Germany
@@ -2402,8 +2402,8 @@ Adjusting Word Embeddings with Semantic Intensity Orders Joo-KyungKim - Marie-Catherinede Marneffe - EricFosler-Lussier + Marie-Catherinede Marneffe + EricFosler-Lussier 62–69 W16-1607 10.18653/v1/W16-1607 @@ -2422,7 +2422,7 @@ An Empirical Evaluation of doc2vec with Practical Insights into Document Embedding Generation Jey HanLau - TimothyBaldwin + TimothyBaldwin 78–86 W16-1609 10.18653/v1/W16-1609 @@ -2449,7 +2449,7 @@ Mapping Unseen Words to Task-Trained Embedding Spaces - Pranava SwaroopMadhyastha + Pranava SwaroopMadhyastha MohitBansal KevinGimpel KarenLivescu @@ -2469,7 +2469,7 @@ Towards cross-lingual distributed representations without parallel text trained with adversarial autoencoders - Antonio ValerioMiceli Barone + Antonio ValerioMiceli Barone 121–126 W16-1614 10.18653/v1/W16-1614 @@ -2487,7 +2487,7 @@ Learning Semantic Relatedness in Community Question Answering Using Neural Models HenryNassif MitraMohtarami - JamesGlass + JamesGlass 137–147 W16-1616 10.18653/v1/W16-1616 @@ -2508,7 +2508,7 @@ Thien HuuNguyen LishengFu KyunghyunCho - RalphGrishman + RalphGrishman 158–165 W16-1618 10.18653/v1/W16-1618 @@ -2527,8 +2527,8 @@ Making Sense of Word Embeddings MariaPelevina - NikolayArefiev - ChrisBiemann + NikolayArefiev + ChrisBiemann AlexanderPanchenko 174–183 W16-1620 @@ -2589,7 +2589,7 @@ ImranSheikh IrinaIllina DominiqueFohr - GeorgesLinarès + GeorgesLinarès 222–229 W16-1626 10.18653/v1/W16-1626 @@ -2606,7 +2606,7 @@ Towards Generalizable Sentence Embeddings EleniTriantafillou - Jamie RyanKiros + Jamie RyanKiros RaquelUrtasun RichardZemel 239–248 @@ -2657,7 +2657,7 @@ TimO’Gorman SharoneHorowit-Hendler HengJi - MarthaPalmer + MarthaPalmer 1–6 W16-1701 10.18653/v1/W16-1701 @@ -2687,10 +2687,10 @@ A Discourse-Annotated Corpus of Conjoined <fixed-case>VP</fixed-case>s - BonnieWebber + BonnieWebber RashmiPrasad AlanLee - AravindJoshi + AravindJoshi 22–31 W16-1704 10.18653/v1/W16-1704 @@ -2709,9 +2709,9 @@ Supersense tagging with inter-annotator disagreement - HéctorMartínez Alonso - AndersJohannsen - BarbaraPlank + HéctorMartínez Alonso + AndersJohannsen + BarbaraPlank 43–48 W16-1706 10.18653/v1/W16-1706 @@ -2724,7 +2724,7 @@ NathanSchneider Christopher N. L.Clark AnnieLouis - BonnieWebber + BonnieWebber 49–58 W16-1707 10.18653/v1/W16-1707 @@ -2745,7 +2745,7 @@ Different Flavors of <fixed-case>GUM</fixed-case>: Evaluating Genre and Sentence Type Effects on Multilayer Corpus Annotation Quality AmirZeldes - DanSimonson + DanSimonson 68–78 W16-1709 10.18653/v1/W16-1709 @@ -2754,7 +2754,7 @@ Addressing Annotation Complexity: The Case of Annotating Ideological Perspective in <fixed-case>E</fixed-case>gyptian Social Media HebaElfardy - MonaDiab + MonaDiab 79–88 W16-1710 10.18653/v1/W16-1710 @@ -2779,7 +2779,7 @@ AbhijitSuresh KathrynConger TimO’Gorman - MarthaPalmer + MarthaPalmer 99–109 W16-1712 10.18653/v1/W16-1712 @@ -2789,7 +2789,7 @@ Focus Annotation of Task-based Data: Establishing the Quality of Crowd Annotation KordulaDe Kuthy RamonZiai - DetmarMeurers + DetmarMeurers 110–119 W16-1713 10.18653/v1/W16-1713 @@ -2797,7 +2797,7 @@ Part of Speech Annotation of a <fixed-case>T</fixed-case>urkish-<fixed-case>G</fixed-case>erman Code-Switching Corpus - ÖzlemÇetinoğlu + ÖzlemÇetinoğlu ÇağrıÇöltekin 120–130 W16-1714 @@ -2816,9 +2816,9 @@ Conversion from <fixed-case>P</fixed-case>aninian Karakas to <fixed-case>U</fixed-case>niversal <fixed-case>D</fixed-case>ependencies for <fixed-case>H</fixed-case>indi <fixed-case>D</fixed-case>ependency <fixed-case>T</fixed-case>reebank JuhiTandon - HimaniChaudhary - Riyaz AhmadBhat - Dipti MisraSharma + HimaniChaudhary + Riyaz AhmadBhat + Dipti MisraSharma 141–150 W16-1716 10.18653/v1/W16-1716 @@ -2826,7 +2826,7 @@ Phrase Generalization: a Corpus Study in Multi-Document Abstracts and Original News Alignments - ArianiDi-Felippo + ArianiDi-Felippo AniNenkova 151–159 W16-1717 @@ -2837,7 +2837,7 @@ Generating Disambiguating Paraphrases for Structurally Ambiguous Sentences ManjuanDuan EthanHill - MichaelWhite + MichaelWhite 160–170 W16-1718 10.18653/v1/W16-1718 @@ -2866,7 +2866,7 @@ Creating a Novel Geolocation Corpus from Historical Texts GrantDeLozier - BenWing + BenWing JasonBaldridge ScottNesbit 188–198 @@ -2882,8 +2882,8 @@ ValiaKordoni KostadinCholakov MarkusEgg - StellaMarkantonatou - PreslavNakov + StellaMarkantonatou + PreslavNakov 10.18653/v1/W16-18 Association for Computational Linguistics
Berlin, Germany
@@ -2898,9 +2898,9 @@ Learning Paraphrasing for Multiword Expressions Seid MuhieYimam - HéctorMartínez Alonso + HéctorMartínez Alonso MartinRiedl - ChrisBiemann + ChrisBiemann 1–10 W16-1801 10.18653/v1/W16-1801 @@ -2918,7 +2918,7 @@ Lexical Variability and Compositionality: Investigating Idiomaticity with Distributional Semantic Models Marco Silvio GiuseppeSenaldi - Gianluca E.Lebani + Gianluca E.Lebani AlessandroLenci 21–31 W16-1803 @@ -2928,7 +2928,7 @@ Filtering and Measuring the Intrinsic Quality of Human Compositionality Judgments CarlosRamisch - SilvioCordeiro + SilvioCordeiro AlineVillavicencio 32–37 W16-1804 @@ -2938,8 +2938,8 @@ Graph-based Clustering of Synonym Senses for <fixed-case>G</fixed-case>erman Particle Verbs MoritzWittmann - MarionWeller-Di Marco - SabineSchulte im Walde + MarionWeller-Di Marco + SabineSchulte im Walde 38–43 W16-1805 10.18653/v1/W16-1805 @@ -2948,7 +2948,7 @@ Accounting ngrams and multi-word terms can improve topic models MichaelNokel - NataliaLoukachevitch + NataliaLoukachevitch 44–49 W16-1806 10.18653/v1/W16-1806 @@ -2957,8 +2957,8 @@ Top a Splitter: Using Distributional Semantics for Improving Compound Splitting PatrickZiering - StefanMüller - Lonnekevan der Plas + StefanMüller + Lonnekevan der Plas 50–55 W16-1807 10.18653/v1/W16-1807 @@ -2976,7 +2976,7 @@ Modeling the Non-Substitutability of Multiword Expressions with Distributional Semantics and a Log-Linear Model MeghdadFarahmand - JamesHenderson + JamesHenderson 61–66 W16-1809 10.18653/v1/W16-1809 @@ -2993,8 +2993,8 @@ Representing Support Verbs in <fixed-case>F</fixed-case>rame<fixed-case>N</fixed-case>et - Miriam R. L.Petruck - MichaelEllsworth + Miriam R. L.Petruck + MichaelEllsworth 72–77 W16-1811 10.18653/v1/W16-1811 @@ -3002,9 +3002,9 @@ Inherently Pronominal Verbs in <fixed-case>C</fixed-case>zech: Description and Conversion Based on Treebank Annotation - ZdeňkaUrešová + ZdeňkaUrešová EduardBejček - JanHajič + JanHajič 78–83 W16-1812 10.18653/v1/W16-1812 @@ -3020,9 +3020,9 @@ A study on the production of collocations by <fixed-case>E</fixed-case>uropean <fixed-case>P</fixed-case>ortuguese learners - ÂngelaCosta - LuísaCoheur - TeresaLino + ÂngelaCosta + LuísaCoheur + TeresaLino 91–95 W16-1814 10.18653/v1/W16-1814 @@ -3039,7 +3039,7 @@ Impact of <fixed-case>MWE</fixed-case> Resources on Multiword Recognition MartinRiedl - ChrisBiemann + ChrisBiemann 107–111 W16-1816 10.18653/v1/W16-1816 @@ -3048,7 +3048,7 @@ A Word Embedding Approach to Identifying Verb-Noun Idiomatic Combinations WaseemGharbieh - VirendraBhavsar + VirendraBhavsar PaulCook 112–118 W16-1817 @@ -3090,7 +3090,7 @@ LauraHernández-Domínguez EdgarGarcía-Cano SylvieRatté - GerardoSierra-Martínez + GerardoSierra-Martínez 10–15 W16-1902 10.18653/v1/W16-1902 @@ -3111,7 +3111,7 @@ Leveraging Annotators’ Gaze Behaviour for Coreference Resolution JoeCheri AbhijitMishra - PushpakBhattacharyya + PushpakBhattacharyya 22–26 W16-1904 10.18653/v1/W16-1904 @@ -3138,9 +3138,9 @@ Longitudinal Studies of Variation Sets in Child-directed Speech - MatsWirén - KristinaNilsson Björkenstam - GintarėGrigonytė + MatsWirén + KristinaNilsson Björkenstam + GintarėGrigonytė Elisabet EirCortes 44–52 W16-1907 @@ -3151,7 +3151,7 @@ Learning Phone Embeddings for Word Segmentation of Child-Directed Speech JianqiangMa ÇağrıÇöltekin - ErhardHinrichs + ErhardHinrichs 53–63 W16-1908 10.18653/v1/W16-1908 @@ -3178,8 +3178,8 @@ Modelling the informativeness and timing of non-verbal cues in parent-child interaction - KristinaNilsson Björkenstam - MatsWirén + KristinaNilsson Björkenstam + MatsWirén RobertÖstling 82–90 W16-1911 @@ -3192,7 +3192,7 @@ Proceedings of the 14th SIGMORPHON Workshop on Computational Research in Phonetics, Phonology, and Morphology W16-20 MichaElsner - SandraKuebler + SandraKuebler 10.18653/v1/W16-20 Association for Computational Linguistics
Berlin, Germany
@@ -3218,7 +3218,7 @@ ChristoKirov JohnSylak-Glassman DavidYarowsky - JasonEisner + JasonEisner MansHulden 10–22 W16-2002 @@ -3235,7 +3235,7 @@
<fixed-case>EHU</fixed-case> at the <fixed-case>SIGMORPHON</fixed-case> 2016 Shared Task. A Simple Proposal: Grapheme-to-Phoneme for Inflection - IñakiAlegria + IñakiAlegria IzaskunEtxeberria 27–30 W16-2004 @@ -3256,7 +3256,7 @@ Morphological reinflection with conditional random fields and unsupervised features LingLiu - Lingshuang JackMao + Lingshuang JackMao 36–40 W16-2006 10.18653/v1/W16-2006 @@ -3274,7 +3274,7 @@ Evaluating Sequence Alignment for Learning Inflectional Morphology - DavidKing + DavidKing 49–53 W16-2008 10.18653/v1/W16-2008 @@ -3290,8 +3290,8 @@ <fixed-case>MED</fixed-case>: The <fixed-case>LMU</fixed-case> System for the <fixed-case>SIGMORPHON</fixed-case> 2016 Shared Task on Morphological Reinflection - KatharinaKann - HinrichSchütze + KatharinaKann + HinrichSchütze 62–70 W16-2010 10.18653/v1/W16-2010 @@ -3302,7 +3302,7 @@ DimaTaji RamyEskander NizarHabash - OwenRambow + OwenRambow 71–75 W16-2011 10.18653/v1/W16-2011 @@ -3312,7 +3312,7 @@ Letter Sequence Labeling for Compound Splitting JianqiangMa VerenaHenrich - ErhardHinrichs + ErhardHinrichs 76–81 W16-2012 10.18653/v1/W16-2012 @@ -3341,7 +3341,7 @@ MaxKisselew LauraRimell AlexisPalmer - SebastianPadó + SebastianPadó 93–98 W16-2015 10.18653/v1/W16-2015 @@ -3369,7 +3369,7 @@ Towards robust cross-linguistic comparisons of phonological networks PhilippaShoemark - SharonGoldwater + SharonGoldwater JamesKirby RikSarkar 110–120 @@ -3410,8 +3410,8 @@ Proceedings of the 10th SIGHUM Workshop on Language Technology for Cultural Heritage, Social Sciences, and Humanities W16-21 NilsReiter - BeatriceAlex - Kalliopi A.Zervanou + BeatriceAlex + Kalliopi A.Zervanou 10.18653/v1/W16-21 Association for Computational Linguistics
Berlin, Germany
@@ -3433,7 +3433,7 @@
Analysis of Policy Agendas: Lessons Learned from Automatic Topic Classification of <fixed-case>C</fixed-case>roatian Political Texts - Vanja MladenKaran + Vanja MladenKaran JanŠnajder DanielaŠirinić GoranGlavaš @@ -3481,7 +3481,7 @@ You Shall Know People by the Company They Keep: Person Name Disambiguation for Social Network Construction - MarionaColl Ardanuy + MarionaColl Ardanuy Maartenvan den Bos CarolineSporleder 63–73 @@ -3518,8 +3518,8 @@ How Do Cultural Differences Impact the Quality of Sarcasm Annotation?: A Case Study of <fixed-case>I</fixed-case>ndian Annotators and <fixed-case>A</fixed-case>merican Text AdityaJoshi - PushpakBhattacharyya - MarkCarman + PushpakBhattacharyya + MarkCarman JayaSaraswati RajitaShukla 95–99 @@ -3530,7 +3530,7 @@ Combining Phonology and Morphology for the Normalization of Historical Texts IzaskunEtxeberria - IñakiAlegria + IñakiAlegria LarraitzUria MansHulden 100–105 @@ -3540,8 +3540,8 @@ Towards Building a Political Protest Database to Explain Changes in the Welfare State - ÇağılSönmez - ArzucanÖzgür + ÇağılSönmez + ArzucanÖzgür ErdemYörük 106–110 W16-2113 @@ -3579,7 +3579,7 @@ Towards a text analysis system for political debates Dieu-ThuLe Ngoc ThangVu - AndreBlessing + AndreBlessing 134–139 W16-2117 10.18653/v1/W16-2117 @@ -3624,25 +3624,25 @@ Proceedings of the First Conference on Machine Translation: Volume 1, Research Papers - OndřejBojar + OndřejBojar ChristianBuck - RajenChatterjee + RajenChatterjee ChristianFedermann LianeGuillou BarryHaddow MatthiasHuck - Antonio JimenoYepes - AurélieNévéol + Antonio JimenoYepes + AurélieNévéol MarianaNeves PavelPecina MartinPopel PhilippKoehn ChristofMonz - MatteoNegri + MatteoNegri MattPost LuciaSpecia - KarinVerspoor - JörgTiedemann + KarinVerspoor + JörgTiedemann MarcoTurchi Association for Computational Linguistics
Berlin, Germany
@@ -3659,7 +3659,7 @@ Cross-language Projection of Dependency Trees with Constrained Partial Parsing for Tree-to-Tree Machine Translation YuShen ChenhuiChu - FabienCromieres + FabienCromieres SadaoKurohashi 1–11 W16-2201 @@ -3668,8 +3668,8 @@
Improving Pronoun Translation by Modeling Coreference Uncertainty - Ngoc QuangLuong - AndreiPopescu-Belis + Ngoc QuangLuong + AndreiPopescu-Belis 12–20 W16-2202 10.18653/v1/W16-2202 @@ -3678,7 +3678,7 @@ Modeling verbal inflection for <fixed-case>E</fixed-case>nglish to <fixed-case>G</fixed-case>erman <fixed-case>SMT</fixed-case> AnitaRamm - AlexanderFraser + AlexanderFraser 21–31 W16-2203 10.18653/v1/W16-2203 @@ -3686,7 +3686,7 @@ Modeling Selectional Preferences of Verbs and Nouns in String-to-Tree Machine Translation - MariaNădejde + MariaNădejde AlexandraBirch PhilippKoehn 32–42 @@ -3696,9 +3696,9 @@ Modeling Complement Types in Phrase-Based <fixed-case>SMT</fixed-case> - MarionWeller-Di Marco - AlexanderFraser - SabineSchulte im Walde + MarionWeller-Di Marco + AlexanderFraser + SabineSchulte im Walde 43–53 W16-2205 10.18653/v1/W16-2205 @@ -3711,7 +3711,7 @@ Jan-ThorstenPeter MohammedHethnawi AndreasGuta - HermannNey + HermannNey 54–65 W16-2206 10.18653/v1/W16-2206 @@ -3732,7 +3732,7 @@ JanNiehues Thanh-LeHa EunahCho - AlexWaibel + AlexWaibel 74–82 W16-2208 10.18653/v1/W16-2208 @@ -3750,8 +3750,8 @@ A Framework for Discriminative Rule Selection in Hierarchical <fixed-case>M</fixed-case>oses FabienneBraune - AlexanderFraser - HalDaumé III + AlexanderFraser + HalDaumé III AlešTamchyna 92–101 W16-2210 @@ -3772,7 +3772,7 @@ YunsuKim AndreasGuta JoernWuebker - HermannNey + HermannNey 110–117 W16-2212 10.18653/v1/W16-2212 @@ -3783,7 +3783,7 @@ JoachimDaiber MilošStanojević WilkerAziz - KhalilSima’an + KhalilSima’an 118–130 W16-2213 10.18653/v1/W16-2213 @@ -3793,25 +3793,25 @@ Proceedings of the First Conference on Machine Translation: Volume 2, Shared Task Papers - OndřejBojar + OndřejBojar ChristianBuck - RajenChatterjee + RajenChatterjee ChristianFedermann LianeGuillou BarryHaddow MatthiasHuck - Antonio JimenoYepes - AurélieNévéol + Antonio JimenoYepes + AurélieNévéol MarianaNeves PavelPecina MartinPopel PhilippKoehn ChristofMonz - MatteoNegri + MatteoNegri MattPost LuciaSpecia - KarinVerspoor - JörgTiedemann + KarinVerspoor + JörgTiedemann MarcoTurchi Association for Computational Linguistics
Berlin, Germany
@@ -3840,8 +3840,8 @@ MarianaNeves MartinPopel MattPost - RaphaelRubino - CarolinaScarton + RaphaelRubino + CarolinaScarton LuciaSpecia MarcoTurchi KarinVerspoor @@ -3891,9 +3891,9 @@ <fixed-case>TÜBİTAK</fixed-case> <fixed-case>SMT</fixed-case> System Submission for <fixed-case>WMT</fixed-case>2016 EmreBektaş - ErtuğrulYilmaz + ErtuğrulYilmaz CoşkunMermer - İlknurDurgar El-Kahlout + İlknurDurgar El-Kahlout 246–251 W16-2305 10.18653/v1/W16-2305 @@ -3901,7 +3901,7 @@ <fixed-case>P</fixed-case>ar<fixed-case>FDA</fixed-case> for Instance Selection for Statistical Machine Translation - ErgunBiçici + ErgunBiçici 252–258 W16-2306 10.18653/v1/W16-2306 @@ -3911,7 +3911,7 @@ <fixed-case>S</fixed-case>heffield Systems for the <fixed-case>E</fixed-case>nglish-<fixed-case>R</fixed-case>omanian <fixed-case>WMT</fixed-case> Translation Task - FrédéricBlain + FrédéricBlain XingyiSong LuciaSpecia 259–263 @@ -3973,7 +3973,7 @@ The <fixed-case>AFRL</fixed-case>-<fixed-case>MITLL</fixed-case> <fixed-case>WMT</fixed-case>16 News-Translation Task Systems JeremyGwinnup - TimAnderson + TimAnderson GrantErdmann KatherineYoung MichaeelKazi @@ -3992,7 +3992,7 @@ MohammedMediani MatthiasSperber AlexandreAllauzen - AlexanderWaibel + AlexanderWaibel 303–310 W16-2314 10.18653/v1/W16-2314 @@ -4001,7 +4001,7 @@ The <fixed-case>E</fixed-case>dinburgh/<fixed-case>LMU</fixed-case> Hierarchical Machine Translation System for <fixed-case>WMT</fixed-case> 2016 MatthiasHuck - AlexanderFraser + AlexanderFraser BarryHaddow 311–318 W16-2315 @@ -4053,25 +4053,25 @@ The <fixed-case>QT</fixed-case>21/<fixed-case>H</fixed-case>im<fixed-case>L</fixed-case> Combined Machine Translation System Jan-ThorstenPeter TamerAlkhouli - HermannNey + HermannNey MatthiasHuck FabienneBraune - AlexanderFraser + AlexanderFraser AlešTamchyna OndřejBojar BarryHaddow RicoSennrich - FrédéricBlain + FrédéricBlain LuciaSpecia JanNiehues - AlexWaibel + AlexWaibel AlexandreAllauzen LaurianeAufrant FranckBurlot ElenaKnyazeva ThomasLavergne FrançoisYvon - MārcisPinnis + MārcisPinnis StellaFrank 344–355 W16-2320 @@ -4083,7 +4083,7 @@ Jan-ThorstenPeter TamerAlkhouli AndreasGuta - HermannNey + HermannNey 356–361 W16-2321 10.18653/v1/W16-2321 @@ -4091,7 +4091,7 @@ <fixed-case>A</fixed-case>bu-<fixed-case>M</fixed-case>a<fixed-case>T</fixed-case>ran at <fixed-case>WMT</fixed-case> 2016 Translation Task: Deep Learning, Morphological Segmentation and Tuning on Character Sequences - Víctor M.Sánchez-Cartagena + Víctor M.Sánchez-Cartagena AntonioToral 362–370 W16-2322 @@ -4123,7 +4123,7 @@ AlešTamchyna RomanSudarikov OndřejBojar - AlexanderFraser + AlexanderFraser 385–390 W16-2325 10.18653/v1/W16-2325 @@ -4137,7 +4137,7 @@ FilipGinter SaraStymne RobertÖstling - MarionWeller-Di Marco + MarionWeller-Di Marco 391–398 W16-2326 10.18653/v1/W16-2326 @@ -4147,7 +4147,7 @@ <fixed-case>E</fixed-case>dinburgh’s Statistical Machine Translation Systems for <fixed-case>WMT</fixed-case>16 PhilipWilliams RicoSennrich - MariaNădejde + MariaNădejde MatthiasHuck BarryHaddow OndřejBojar @@ -4170,7 +4170,7 @@ EleftheriosAvramidis AljoschaBurchardt VivienMacketanz - AnkitSrivastava + AnkitSrivastava 415–422 W16-2329 10.18653/v1/W16-2329 @@ -4180,7 +4180,7 @@ <fixed-case>ILLC</fixed-case>-<fixed-case>U</fixed-case>v<fixed-case>A</fixed-case> Adaptation System (Scorpio) at <fixed-case>WMT</fixed-case>’16 <fixed-case>IT</fixed-case>-<fixed-case>DOMAIN</fixed-case> Task HoangCuong StellaFrank - KhalilSima’an + KhalilSima’an 423–427 W16-2330 10.18653/v1/W16-2330 @@ -4197,21 +4197,21 @@ <fixed-case>SMT</fixed-case> and Hybrid systems of the <fixed-case>QTL</fixed-case>eap project in the <fixed-case>WMT</fixed-case>16 <fixed-case>IT</fixed-case>-task - RosaGaudio - GorkaLabaka - EnekoAgirre + RosaGaudio + GorkaLabaka + EnekoAgirre PetyaOsenova - KirilSimov + KirilSimov MartinPopel DiekeOele - Gertjanvan Noord + Gertjanvan Noord LuísGomes - JoãoAntónio Rodrigues + JoãoAntónio Rodrigues StevenNeale - JoãoSilva + JoãoSilva AndreiaQuerido NunoRendeiro - AntónioBranco + AntónioBranco 435–441 W16-2332 10.18653/v1/W16-2332 @@ -4222,9 +4222,9 @@ KoushikPahari AlapanKuila SantanuPal - Sudip KumarNaskar - SivajiBandyopadhyay - Josefvan Genabith + Sudip KumarNaskar + SivajiBandyopadhyay + Josefvan Genabith 442–448 W16-2333 10.18653/v1/W16-2333 @@ -4245,7 +4245,7 @@ <fixed-case>E</fixed-case>nglish-<fixed-case>P</fixed-case>ortuguese Biomedical Translation Task Using a Genuine Phrase-Based Statistical Machine Translation Approach JoséAires - GabrielLopes + GabrielLopes LuísGomes 456–462 W16-2335 @@ -4254,11 +4254,11 @@ The <fixed-case>TALP</fixed-case>–<fixed-case>UPC</fixed-case> <fixed-case>S</fixed-case>panish–<fixed-case>E</fixed-case>nglish <fixed-case>WMT</fixed-case> Biomedical Task: Bilingual Embeddings and Char-based Neural Language Model Rescoring in a Phrase-based System - Marta R.Costa-jussà + Marta R.Costa-jussà CristinaEspaña-Bonet - PranavaMadhyastha + PranavaMadhyastha CarlosEscolano - José A. R.Fonollosa + José A. R.Fonollosa 463–468 W16-2336 10.18653/v1/W16-2336 @@ -4277,7 +4277,7 @@ <fixed-case>IXA</fixed-case> Biomedical Translation System at <fixed-case>WMT</fixed-case>16 Biomedical Translation Task OlatzPerez-de-Viñaspre - GorkaLabaka + GorkaLabaka 477–482 W16-2338 10.18653/v1/W16-2338 @@ -4286,9 +4286,9 @@ <fixed-case>C</fixed-case>obalt<fixed-case>F</fixed-case>: A Fluent Metric for <fixed-case>MT</fixed-case> Evaluation MarinaFomicheva - NúriaBel + NúriaBel LuciaSpecia - Iriada Cunha + Iriada Cunha AntonMalinovskiy 483–490 W16-2339 @@ -4306,7 +4306,7 @@ chr<fixed-case>F</fixed-case> deconstructed: beta parameters and n-gram weights - MajaPopović + MajaPopović 499–504 W16-2341 10.18653/v1/W16-2341 @@ -4317,7 +4317,7 @@ WeiyueWang Jan-ThorstenPeter HendrikRosendahl - HermannNey + HermannNey 505–510 W16-2342 10.18653/v1/W16-2342 @@ -4332,7 +4332,7 @@ ZhimingChen YimingTan MaoxiLi - MingwenWang + MingwenWang 511–517 W16-2343 10.18653/v1/W16-2343 @@ -4351,13 +4351,13 @@ Findings of the 2016 <fixed-case>WMT</fixed-case> Shared Task on Cross-lingual Pronoun Prediction LianeGuillou ChristianHardmeier - PreslavNakov + PreslavNakov SaraStymne JörgTiedemann YannickVersley MauroCettolo - BonnieWebber - AndreiPopescu-Belis + BonnieWebber + AndreiPopescu-Belis 525–542 W16-2345 10.18653/v1/W16-2345 @@ -4367,7 +4367,7 @@ A Shared Task on Multimodal Machine Translation and Crosslingual Image Description LuciaSpecia StellaFrank - KhalilSima’an + KhalilSima’an DesmondElliott 543–553 W16-2346 @@ -4395,7 +4395,7 @@ The <fixed-case>K</fixed-case>yoto <fixed-case>U</fixed-case>niversity Cross-Lingual Pronoun Translation System RajDabre YevgeniyPuzikov - FabienCromieres + FabienCromieres SadaoKurohashi 571–575 W16-2349 @@ -4422,8 +4422,8 @@ Pronoun Language Model and Grammatical Heuristics for Aiding Pronoun Prediction - Ngoc QuangLuong - AndreiPopescu-Belis + Ngoc QuangLuong + AndreiPopescu-Belis 589–595 W16-2352 10.18653/v1/W16-2352 @@ -4477,7 +4477,7 @@ WalidAransa YaxingWang MarcMasana - MercedesGarcía-Martínez + MercedesGarcía-Martínez FethiBougares LoïcBarrault Joostvan de Weijer @@ -4502,7 +4502,7 @@ FrederickLiu Sz-RungShiang JeanOh - ChrisDyer + ChrisDyer 639–645 W16-2360 10.18653/v1/W16-2360 @@ -4511,7 +4511,7 @@ <fixed-case>CUNI</fixed-case> System for <fixed-case>WMT</fixed-case>16 Automatic Post-Editing and Multimodal Translation Tasks JindřichLibovický - JindřichHelcl + JindřichHelcl MarekTlustý OndřejBojar PavelPecina @@ -4523,7 +4523,7 @@ <fixed-case>WMT</fixed-case> 2016 Multimodal Translation System Description based on Bidirectional Recurrent Neural Networks with Double-Embeddings SergioRodríguez Guasch - Marta R.Costa-jussà + Marta R.Costa-jussà 655–659 W16-2362 10.18653/v1/W16-2362 @@ -4559,7 +4559,7 @@ <fixed-case>YODA</fixed-case> System for <fixed-case>WMT</fixed-case>16 Shared Task: Bilingual Document Alignment - Aswarth AbhilashDara + Aswarth AbhilashDara Yiu-ChangLin 679–684 W16-2366 @@ -4568,9 +4568,9 @@ Bitextor’s participation in <fixed-case>WMT</fixed-case>’16: shared task on document alignment - MiquelEsplà-Gomis - MikelForcada - SergioOrtiz-Rojas + MiquelEsplà-Gomis + MikelForcada + SergioOrtiz-Rojas JorgeFerrández-Tordera 685–691 W16-2367 @@ -4588,7 +4588,7 @@ First Steps Towards Coverage-Based Document Alignment LuísGomes - GabrielPereira Lopes + GabrielPereira Lopes 697–702 W16-2369 10.18653/v1/W16-2369 @@ -4597,7 +4597,7 @@ <fixed-case>BAD</fixed-case> <fixed-case>LUC</fixed-case>@<fixed-case>WMT</fixed-case> 2016: a Bilingual Document Alignment Platform Based on Lucene LaurentJakubina - PhillippeLanglais + PhillippeLanglais 703–709 W16-2370 10.18653/v1/W16-2370 @@ -4607,7 +4607,7 @@ Using Term Position Similarity and Language Modeling for Bilingual Document Alignment Thanh C.Le Hoa TrongVu - JonathanOberländer + JonathanOberländer OndřejBojar 710–716 W16-2371 @@ -4627,7 +4627,7 @@ <fixed-case>WMT</fixed-case>2016: A Hybrid Approach to Bilingual Document Alignment - SainikMahata + SainikMahata DipankarDas SantanuPal 724–727 @@ -4639,7 +4639,7 @@ <fixed-case>E</fixed-case>nglish-<fixed-case>F</fixed-case>rench Document Alignment Based on Keywords and Statistical Translation MarekMedveď MilošJakubíček - VojtechKovář + VojtechKovář 728–732 W16-2374 10.18653/v1/W16-2374 @@ -4649,7 +4649,7 @@ The <fixed-case>ILSP</fixed-case>/<fixed-case>ARC</fixed-case> submission to the <fixed-case>WMT</fixed-case> 2016 Bilingual Document Alignment Shared Task VassilisPapavassiliou ProkopisProkopidis - SteliosPiperidis + SteliosPiperidis 733–739 W16-2375 10.18653/v1/W16-2375 @@ -4668,7 +4668,7 @@ The <fixed-case>FBK</fixed-case> Participation in the <fixed-case>WMT</fixed-case> 2016 Automatic Post-editing Shared Task RajenChatterjee - José G.C. de Souza + José G.C. de Souza MatteoNegri MarcoTurchi 745–750 @@ -4689,7 +4689,7 @@ <fixed-case>USAAR</fixed-case>: An Operation Sequential Model for Automatic Statistical Post-Editing SantanuPal MarcosZampieri - Josefvan Genabith + Josefvan Genabith 759–763 W16-2379 10.18653/v1/W16-2379 @@ -4699,7 +4699,7 @@ Bilingual Embeddings and Word Alignments for Translation Quality Estimation AmalAbdelsalam OndřejBojar - SamhaaEl-Beltagy + SamhaaEl-Beltagy 764–771 W16-2380 10.18653/v1/W16-2380 @@ -4707,9 +4707,9 @@ <fixed-case>SHEF</fixed-case>-<fixed-case>MIME</fixed-case>: Word-level Quality Estimation Using Imitation Learning - DanielBeck + DanielBeck AndreasVlachos - GustavoPaetzold + GustavoPaetzold LuciaSpecia 772–776 W16-2381 @@ -4718,7 +4718,7 @@ Referential Translation Machines for Predicting Translation Performance - ErgunBiçici + ErgunBiçici 777–781 W16-2382 10.18653/v1/W16-2382 @@ -4726,9 +4726,9 @@ <fixed-case>UA</fixed-case>lacant word-level and phrase-level machine translation quality estimation systems at <fixed-case>WMT</fixed-case> 2016 - MiquelEsplà-Gomis + MiquelEsplà-Gomis FelipeSánchez-Martínez - MikelForcada + MikelForcada 782–786 W16-2383 10.18653/v1/W16-2383 @@ -4756,7 +4756,7 @@ <fixed-case>USFD</fixed-case>’s Phrase-level Quality Estimation Systems VarvaraLogacheva - FrédéricBlain + FrédéricBlain LuciaSpecia 800–805 W16-2386 @@ -4765,10 +4765,10 @@ Unbabel’s Participation in the <fixed-case>WMT</fixed-case>16 Word-Level Translation Quality Estimation Shared Task - André F. T.Martins - RamónAstudillo - ChrisHokamp - FabioKepler + André F. T.Martins + RamónAstudillo + ChrisHokamp + FabioKepler 806–811 W16-2387 10.18653/v1/W16-2387 @@ -4776,7 +4776,7 @@ <fixed-case>S</fixed-case>imple<fixed-case>N</fixed-case>ets: Quality Estimation with Resource-Light Neural Networks - GustavoPaetzold + GustavoPaetzold LuciaSpecia 812–818 W16-2388 @@ -4786,7 +4786,7 @@ Translation Quality Estimation using Recurrent Neural Network Raj NathPatel - SasikumarM + SasikumarM 819–824 W16-2389 10.18653/v1/W16-2389 @@ -4803,10 +4803,10 @@ Word embeddings and discourse information for Quality Estimation - CarolinaScarton - DanielBeck + CarolinaScarton + DanielBeck KashifShah - KarinSim Smith + KarinSim Smith LuciaSpecia 831–837 W16-2391 @@ -4827,7 +4827,7 @@ <fixed-case>UGENT</fixed-case>-<fixed-case>LT</fixed-case>3 <fixed-case>SCATE</fixed-case> Submission for <fixed-case>WMT</fixed-case>16 Shared Task on Quality Estimation ArdaTezcan - VéroniqueHoste + VéroniqueHoste LieveMacken 843–850 W16-2393 @@ -4883,7 +4883,7 @@ Distributed representation and estimation of <fixed-case>WFST</fixed-case>-based n-gram models CyrilAllauzen - MichaelRiley + MichaelRiley BrianRoark 32–41 W16-2404 @@ -4901,9 +4901,9 @@ Data-Driven Spelling Correction using Weighted Finite-State Methods - MiikkaSilfverberg + MiikkaSilfverberg PekkaKauppinen - KristerLindén + KristerLindén 51–59 W16-2406 10.18653/v1/W16-2406 @@ -4968,7 +4968,7 @@ ThomasKober JeremyReffin JulieWeeds - DavidWeir + DavidWeir 7–12 W16-2502 10.18653/v1/W16-2502 @@ -4986,7 +4986,7 @@ Evaluating Word Embeddings Using a Representative Suite of Practical Tasks Neha NayakKennard GaborAngeli - Christopher D.Manning + Christopher D.Manning 19–23 W16-2504 10.18653/v1/W16-2504 @@ -4996,9 +4996,9 @@ Story Cloze Evaluator: Vector Space Representation Evaluation by Predicting What Happens Next NasrinMostafazadeh LucyVanderwende - Wen-tauYih + Wen-tauYih PushmeetKohli - JamesAllen + JamesAllen 24–29 W16-2505 10.18653/v1/W16-2505 @@ -5009,7 +5009,7 @@ ManaalFaruqui YuliaTsvetkov PushpendreRastogi - ChrisDyer + ChrisDyer 30–35 W16-2506 10.18653/v1/W16-2506 @@ -5026,7 +5026,7 @@ Find the word that does not belong: A Framework for an Intrinsic Evaluation of Word Vector Representations - JoséCamacho-Collados + JoséCamacho-Collados RobertoNavigli 43–50 W16-2508 @@ -5056,7 +5056,7 @@ SaharGhannay YannickEstève NathalieCamelin - PaulDeleglise + PaulDeleglise 62–66 W16-2511 10.18653/v1/W16-2511 @@ -5092,7 +5092,7 @@ Evaluating multi-sense embeddings for semantic resolution monolingually and in word translation GáborBorbély MártonMakrai - Dávid MárkNemeskey + Dávid MárkNemeskey AndrásKornai 83–89 W16-2515 @@ -5118,7 +5118,7 @@ Thematic fit evaluation: an aspect of selectional preferences - AsadSayeed + AsadSayeed ClaytonGreenberg VeraDemberg 99–105 @@ -5139,7 +5139,7 @@ Correlation-based Intrinsic Evaluation of Word Vector Representations YuliaTsvetkov ManaalFaruqui - ChrisDyer + ChrisDyer 111–115 W16-2520 10.18653/v1/W16-2520 @@ -5147,7 +5147,7 @@ Evaluating word embeddings with f<fixed-case>MRI</fixed-case> and eye-tracking - AndersSøgaard + AndersSøgaard 116–121 W16-2521 10.18653/v1/W16-2521 @@ -5189,7 +5189,7 @@ <fixed-case>SLEDDED</fixed-case>: A Proposed Dataset of Event Descriptions for Evaluating Phrase Representations LauraRimell - Eva MariaVecchi + Eva MariaVecchi 140–144 W16-2525 10.18653/v1/W16-2525 @@ -5213,7 +5213,7 @@ PaulCook StefanEvert RolandSchäfer - EgonStemle + EgonStemle 10.18653/v1/W16-26 Association for Computational Linguistics
Berlin
@@ -5307,7 +5307,7 @@ Babler - Data Collection from the Web to Support Speech Recognition and Keyword Search GideonMendels EricaCooper - JuliaHirschberg + JuliaHirschberg 72–81 W16-2609 10.18653/v1/W16-2609 @@ -5343,7 +5343,7 @@ <fixed-case>E</fixed-case>mpiri<fixed-case>ST</fixed-case>: <fixed-case>AIPHES</fixed-case> - Robust Tokenization and <fixed-case>POS</fixed-case>-Tagging for Different Genres SteffenRemus GeroldHintz - ChrisBiemann + ChrisBiemann Christian M.Meyer DarinaBenikova JudithEckle-Kohler @@ -5377,10 +5377,10 @@ Proceedings of the Sixth Named Entity Workshop W16-27 XiangyuDuan - Rafael E.Banchs + Rafael E.Banchs MinZhang HaizhouLi - AKumaran + AKumaran 10.18653/v1/W16-27 Association for Computational Linguistics
Berlin, Germany
@@ -5502,7 +5502,7 @@ AndrewFinch LemaoLiu XiaolinWang - EiichiroSumita + EiichiroSumita 78–82 W16-2711 10.18653/v1/W16-2711 @@ -5512,7 +5512,7 @@ Regulating Orthography-Phonology Relationship for <fixed-case>E</fixed-case>nglish to <fixed-case>T</fixed-case>hai Transliteration Binh MinhNguyen Hoang GiaNgo - Nancy F.Chen + Nancy F.Chen 83–87 W16-2712 10.18653/v1/W16-2712 @@ -5520,7 +5520,7 @@
<fixed-case>M</fixed-case>oses-based official baseline for <fixed-case>NEWS</fixed-case> 2016 - Marta R.Costa-jussà + Marta R.Costa-jussà 88–90 W16-2713 10.18653/v1/W16-2713 @@ -5531,7 +5531,7 @@ Proceedings of the Third Workshop on Argument Mining (ArgMining2016) W16-28 - ChrisReed + ChrisReed 10.18653/v1/W16-28 Association for Computational Linguistics
Berlin, Germany
@@ -5555,7 +5555,7 @@ Summarizing Multi-Party Argumentative Conversations in Reader Comment on News EmmaBarker - RobertGaizauskas + RobertGaizauskas 12–20 W16-2802 10.18653/v1/W16-2802 @@ -5595,7 +5595,7 @@ Extracting Case Law Sentences for Argumentation about the Meaning of Statutory Terms JaromírŠavelka - Kevin D.Ashley + Kevin D.Ashley 50–59 W16-2806 10.18653/v1/W16-2806 @@ -5603,7 +5603,7 @@ Scrutable Feature Sets for Stance Classification - AngroshMandya + AngroshMandya AdvaithSiddharthan AdamWyner 60–69 @@ -5756,10 +5756,10 @@ Proceedings of the 15th Workshop on Biomedical Natural Language Processing W16-29 - Kevin BretonnelCohen + Kevin BretonnelCohen DinaDemner-Fushman SophiaAnaniadou - Jun-ichiTsujii + Jun-ichiTsujii 10.18653/v1/W16-29 Association for Computational Linguistics
Berlin, Germany
@@ -5773,7 +5773,7 @@ A Machine Learning Approach to Clinical Terms Normalization - JoséCastaño + JoséCastaño María LauraGambarte Hee JoonPark Mariadel Pilar Avila Williams @@ -5800,9 +5800,9 @@ Identification, characterization, and grounding of gradable terms in clinical text ChaitanyaShivade - Marie-Catherinede Marneffe - EricFosler-Lussier - Albert M.Lai + Marie-Catherinede Marneffe + EricFosler-Lussier + Albert M.Lai 17–26 W16-2903 10.18653/v1/W16-2903 @@ -5810,7 +5810,7 @@ Graph-based Semi-supervised Gene Mention Tagging - GolnarSheikhshab + GolnarSheikhshab ElizabethStarks AlyKarsan AnoopSarkar @@ -5822,7 +5822,7 @@ Feature Derivation for Exploitation of Distant Annotation via Pattern Induction against Dependency Parses - DayneFreitag + DayneFreitag JohnNiekrasz 36–45 W16-2905 @@ -5839,8 +5839,8 @@ <fixed-case>S</fixed-case>nap<fixed-case>T</fixed-case>o<fixed-case>G</fixed-case>rid: From Statistical to Interpretable Models for Biomedical Information Extraction - Marco A.Valenzuela-Escárcega - GusHahn-Powell + Marco A.Valenzuela-Escárcega + GusHahn-Powell DaneBell MihaiSurdeanu 56–65 @@ -5870,8 +5870,8 @@ Using Distributed Representations to Disambiguate Biomedical and Clinical Concepts StéphanTulkens - SimonSuster - WalterDaelemans + SimonSuster + WalterDaelemans 77–82 W16-2910 10.18653/v1/W16-2910 @@ -5879,7 +5879,7 @@ Unsupervised Document Classification with Informed Topic Models - TimothyMiller + TimothyMiller DmitriyDligach GuerganaSavova 83–91 @@ -5890,7 +5890,7 @@ Vocabulary Development To Support Information Extraction of Substance Abuse from Psychiatry Notes SumithraVelupillai - Danielle L.Mowery + Danielle L.Mowery MikeConway JohnHurdle BrentKious @@ -5913,7 +5913,7 @@ Improving Temporal Relation Extraction with Training Instance Augmentation ChenLin - TimothyMiller + TimothyMiller DmitriyDligach StevenBethard GuerganaSavova @@ -5936,7 +5936,7 @@ Measuring the State of the Art of Automated Pathway Curation Using Graph Algorithms - A Case Study of the m<fixed-case>TOR</fixed-case> Pathway MichaelSpranger SucheendraPalaniappan - SamikGosh + SamikGosh 119–127 W16-2916 10.18653/v1/W16-2916 @@ -5966,9 +5966,9 @@ LanaYeganova WonKim SunKim - RezartaIslamaj Doğan + RezartaIslamaj Doğan WanliLiu - Donald CComeau + Donald CComeau ZhiyongLu W JohnWilbur 141–145 @@ -5978,9 +5978,9 @@ This before That: Causal Precedence in the Biomedical Domain - GusHahn-Powell + GusHahn-Powell DaneBell - Marco A.Valenzuela-Escárcega + Marco A.Valenzuela-Escárcega MihaiSurdeanu 146–155 W16-2920 @@ -5992,7 +5992,7 @@ VivianaCotik VanesaStricker JorgeVivaldi - HoracioRodriguez + HoracioRodriguez 156–165 W16-2921 10.18653/v1/W16-2921 @@ -6013,7 +6013,7 @@ An Information Foraging Approach to Determining the Number of Relevant Features BrianConnolly BenjaminGlass - JohnPestian + JohnPestian 175–180 W16-2923 10.18653/v1/W16-2923 @@ -6022,10 +6022,10 @@ Assessing the Feasibility of an Automated Suggestion System for Communicating Critical Findings from Chest Radiology Reports to Referring Physicians Brian E.Chapman - Danielle L.Mowery + Danielle L.Mowery EvanNarasimhan NeelPatel - WendyChapman + WendyChapman MartaHeilbrun 181–185 W16-2924 @@ -6052,12 +6052,12 @@ Identifying First Episodes of Psychosis in Psychiatric Patient Records using Machine Learning - GenevieveGorrell + GenevieveGorrell SherifatOduola AngusRoberts TomCraig CraigMorgan - RobStewart + RobStewart 196–205 W16-2927 10.18653/v1/W16-2927 @@ -6079,7 +6079,7 @@ Proceedings of the 4th BioNLP Shared Task Workshop W16-30 - ClaireNėdellec + ClaireNėdellec RobertBossy Jin-DongKim 10.18653/v1/W16-30 @@ -6102,7 +6102,7 @@ RobertBossy MouhamadouBa LouiseDeléger - PierreZweigenbaum + PierreZweigenbaum PhilippeBessières LoicLepiniec ClaireNédellec @@ -6151,7 +6151,7 @@ <fixed-case>VERSE</fixed-case>: Event and Relation Extraction in the <fixed-case>B</fixed-case>io<fixed-case>NLP</fixed-case> 2016 Shared Task JakeLever - Steven JMJones + Steven JMJones 42–49 W16-3005 10.18653/v1/W16-3005 @@ -6159,9 +6159,9 @@ A dictionary- and rule-based system for identification of bacteria and habitats in text - Helen VCook + Helen VCook EvangelosPafilis - Lars JuhlJensen + Lars JuhlJensen 50–55 W16-3006 10.18653/v1/W16-3006 @@ -6173,7 +6173,7 @@ HakanŞahin BerfuBüyüköz AlperYayıkçı - ArzucanÖzgür + ArzucanÖzgür 56–63 W16-3007 10.18653/v1/W16-3007 @@ -6201,10 +6201,10 @@ <fixed-case>S</fixed-case>ee<fixed-case>D</fixed-case>ev Binary Event Extraction using <fixed-case>SVM</fixed-case>s and a Rich Feature Set - NageshC. Panyam + NageshC. Panyam GitanshKhirbat - KarinVerspoor - TrevorCohn + KarinVerspoor + TrevorCohn KotagiriRamamohanarao 82–87 W16-3010 @@ -6213,10 +6213,10 @@ Extraction of Regulatory Events using Kernel-based Classifiers and Distant Supervision - AndreLamurias + AndreLamurias Miguel J.Rodrigues - Luka A.Clarke - Francisco M.Couto + Luka A.Clarke + Francisco M.Couto 88–92 W16-3011 10.18653/v1/W16-3011 @@ -6226,7 +6226,7 @@ <fixed-case>DUTIR</fixed-case> in <fixed-case>B</fixed-case>io<fixed-case>NLP</fixed-case>-<fixed-case>ST</fixed-case> 2016: Utilizing Convolutional Network and Distributed Representation to Extract Complicate Relations HongleiLi JianhaiZhang - JianWang + JianWang HongfeiLin ZhihaoYang 93–100 @@ -6250,7 +6250,7 @@ Proceedings of the Fourth BioASQ workshop W16-31 - Ioannis A.Kakadiaris + Ioannis A.Kakadiaris GeorgePaliouras AnastasiaKrithara 10.18653/v1/W16-31 @@ -6278,7 +6278,7 @@ Using Learning-To-Rank to Enhance <fixed-case>NLM</fixed-case> Medical Text Indexer Results IlyaZavorin - JamesMork + JamesMork DinaDemner-Fushman 8–15 W16-3102 @@ -6287,7 +6287,7 @@ <fixed-case>LABDA</fixed-case> at the 2016 <fixed-case>B</fixed-case>io<fixed-case>ASQ</fixed-case> challenge task 4a: Semantic Indexing by using <fixed-case>E</fixed-case>lastic<fixed-case>S</fixed-case>earch - IsabelSegura-Bedmar + IsabelSegura-Bedmar AdriánCarruana PalomaMartínez 16–22 @@ -6299,7 +6299,7 @@ Learning to Answer Biomedical Questions: <fixed-case>OAQA</fixed-case> at <fixed-case>B</fixed-case>io<fixed-case>ASQ</fixed-case> 4<fixed-case>B</fixed-case> ZiYang YueZhou - EricNyberg + EricNyberg 23–37 W16-3104 10.18653/v1/W16-3104 @@ -6322,12 +6322,12 @@ <fixed-case>KSA</fixed-case>nswer: Question-answering System of Kangwon National University and Sogang University in the 2016 <fixed-case>B</fixed-case>io<fixed-case>ASQ</fixed-case> Challenge - Hyeon-guLee + Hyeon-guLee MinkyoungKim HarksooKim JuaeKim SunjaeKwon - JungyunSeo + JungyunSeo Yi-reunKim Jung-KyuChoi 45–49 @@ -6355,7 +6355,7 @@ Proceedings of the 5th Workshop on Vision and Language W16-32 - AnyaBelz + AnyaBelz ErkutErdem KrystianMikolajczyk KaterinaPastra @@ -6384,7 +6384,7 @@ Combining Lexical and Spatial Knowledge to Predict Spatial Relations between Objects in Images - ManuelaHürlimann + ManuelaHürlimann JohanBos 10–18 W16-3202 @@ -6444,7 +6444,7 @@ Building a Bagpipe with a Bag and a Pipe: Exploring Conceptual Combination in Vision SandroPezzelle RaviShekhar - RaffaellaBernardi + RaffaellaBernardi 60–64 W16-3208 10.18653/v1/W16-3208 @@ -6464,7 +6464,7 @@ <fixed-case>M</fixed-case>ulti30<fixed-case>K</fixed-case>: Multilingual <fixed-case>E</fixed-case>nglish-<fixed-case>G</fixed-case>erman Image Descriptions DesmondElliott StellaFrank - KhalilSima’an + KhalilSima’an LuciaSpecia 70–74 W16-3210 @@ -6473,12 +6473,12 @@ “Look, some Green Circles!”: Learning to Quantify from Images - IonutSorodoc + IonutSorodoc AngelikiLazaridou - GemmaBoleda - AurélieHerbelot + GemmaBoleda + AurélieHerbelot SandroPezzelle - RaffaellaBernardi + RaffaellaBernardi 75–79 W16-3211 10.18653/v1/W16-3211 @@ -6523,7 +6523,7 @@ Coordination in <fixed-case>M</fixed-case>inimalist <fixed-case>G</fixed-case>rammars: Excorporation and Across the Board (Head) Movement JohnTorr - Edward P.Stabler + Edward P.Stabler 1–17 W16-3301 torr-stabler-2016-coordination @@ -6575,7 +6575,7 @@ Modelling the ziji Blocking Effect and Constraining Bound Variable Derivations in <fixed-case>MC</fixed-case>-<fixed-case>TAG</fixed-case> with Delayed Locality - Dennis RyanStoroshenko + Dennis RyanStoroshenko 67–76 W16-3307 storoshenko-2016-modelling @@ -6592,8 +6592,8 @@ WonchangChung Suhas SiddheshMhatre AlexisNasr - OwenRambow - SrinivasBangalore + OwenRambow + SrinivasBangalore 85–92 W16-3309 chung-etal-2016-revisiting @@ -6610,14 +6610,14 @@ Hyperedge Replacement and Nonprojective Dependency Structures DanielBauer - OwenRambow + OwenRambow 103–111 W16-3311 bauer-rambow-2016-hyperedge Parasitic Gaps and the Heterogeneity of Dependency Formation in <fixed-case>STAG</fixed-case> - Dennis RyanStoroshenko + Dennis RyanStoroshenko RobertFrank 112–120 W16-3312 @@ -6656,7 +6656,7 @@ Improving Phrase-Based <fixed-case>SMT</fixed-case> Using Cross-Granularity Embedding Similarity PeymanPassban - ChrisHokamp + ChrisHokamp AndyWay QunLiu Baltic Journal of Modern Computing @@ -6677,8 +6677,8 @@ Stand-off Annotation of Web Content as a Legally Safer Alternative to Crawling for Distribution - Mikel L.Forcada - MiquelEsplà-Gomis + Mikel L.Forcada + MiquelEsplà-Gomis Juan AntonioPérez-Ortiz Baltic Journal of Modern Computing 2 @@ -6689,7 +6689,7 @@ Combining Translation Memories and Syntax-Based <fixed-case>SMT</fixed-case>: Experiments with Real Industrial Data LiangyouLi - Carla ParraEscartin + Carla ParraEscartin QunLiu Baltic Journal of Modern Computing 2 @@ -6699,7 +6699,7 @@ The Trouble with Machine Translation Coherence - Karin SimSmith + Karin SimSmith WilkerAziz LuciaSpecia Baltic Journal of Modern Computing @@ -6711,7 +6711,7 @@ Pivoting Methods and Data for <fixed-case>C</fixed-case>zech-<fixed-case>V</fixed-case>ietnamese Translation via <fixed-case>E</fixed-case>nglish Duc TamHoang - OndrejBojar + OndrejBojar Baltic Journal of Modern Computing 2 190-202 @@ -6721,7 +6721,7 @@ Detecting Grammatical Errors in Machine Translation Output Using Dependency Parsing and Treebank Querying ArdaTezcan - VeroniqueHoste + VeroniqueHoste LieveMacken Baltic Journal of Modern Computing 2 @@ -6731,8 +6731,8 @@ Potential and Limits of Using Post-edits as Reference Translations for <fixed-case>MT</fixed-case> Evaluation - MajaPopovic - MihaelArčan + MajaPopovic + MihaelArčan ArleLommel Baltic Journal of Modern Computing 2 @@ -6742,8 +6742,8 @@ Can Text Simplification Help Machine Translation? - SanjaŠtajner - MajaPopovic + SanjaŠtajner + MajaPopovic Baltic Journal of Modern Computing 2 230-242 @@ -6762,9 +6762,9 @@ Semantic Textual Similarity in Quality Estimation - HannaBechara - Carla ParraEscartin - ConstantinOrasan + HannaBechara + Carla ParraEscartin + ConstantinOrasan LuciaSpecia Baltic Journal of Modern Computing 2 @@ -6776,7 +6776,7 @@ Climbing Mont <fixed-case>BLEU</fixed-case>: The Strange World of Reachable High-<fixed-case>BLEU</fixed-case> Translations AaronSmith ChristianHardmeier - JoergTiedemann + JoergTiedemann Baltic Journal of Modern Computing 2 269-281 @@ -6786,8 +6786,8 @@ Interactive-Predictive Translation Based on Multiple Word-Segments MiguelDomingo - AlvaroPeris - FranciscoCasacuberta + AlvaroPeris + FranciscoCasacuberta Baltic Journal of Modern Computing 2 282-291 @@ -6796,8 +6796,8 @@ A Contextual Language Model to Improve Machine Translation of Pronouns by Re-ranking Translation Hypotheses - Ngoc QuangLuong - AndreiPopescu-Belis + Ngoc QuangLuong + AndreiPopescu-Belis Baltic Journal of Modern Computing 2 292-304 @@ -6826,10 +6826,10 @@ Measuring Cognitive Translation Effort with Activity Units - Moritz JonasSchaeffer + Moritz JonasSchaeffer MichaelCarl IsabelLacruz - AkikoAizawa + AkikoAizawa Baltic Journal of Modern Computing 2 331-34195 @@ -6848,7 +6848,7 @@ Dealing with Data Sparseness in <fixed-case>SMT</fixed-case> with Factured Models and Morphological Expansion: a Case Study on <fixed-case>C</fixed-case>roatian - Victor M.Sánchez-Cartagena + Victor M.Sánchez-Cartagena NikolaLjubešić FilipKlubička Baltic Journal of Modern Computing @@ -6860,7 +6860,7 @@ Collaborative Development of a Rule-Based Machine Translator between <fixed-case>C</fixed-case>roatian and <fixed-case>S</fixed-case>erbian FilipKlubička - GemaRamírez-Sánchez + GemaRamírez-Sánchez NikolaLjubešić Baltic Journal of Modern Computing 2 @@ -6871,8 +6871,8 @@ Re-assessing the Impact of <fixed-case>SMT</fixed-case> Techniques with Human Evaluation: a Case Study on <fixed-case>E</fixed-case>nglish—<fixed-case>C</fixed-case>roatian AntonioToral - RaphaelRubino - GemaRamírez-Sánchez + RaphaelRubino + GemaRamírez-Sánchez Baltic Journal of Modern Computing 2 368-375 @@ -6915,7 +6915,7 @@ Processing Document Collections to Automatically Extract Linked Data: Semantic Storytelling Technologies for Smart Curation Workflows PeterBourgonje - JulianMoreno Schneider + JulianMoreno Schneider GeorgRehm FelixSasaki 13–16 @@ -6924,8 +6924,8 @@ On the Robustness of Standalone Referring Expression Generation Algorithms Using <fixed-case>RDF</fixed-case> Data - PabloDuboue - Martin ArielDomínguez + PabloDuboue + Martin ArielDomínguez PaulaEstrella 17–24 W16-3504 @@ -7019,7 +7019,7 @@ Proceedings of the 17th Annual Meeting of the Special Interest Group on Discourse and Dialogue W16-36 - RaquelFernandez + RaquelFernandez WolfgangMinker GiuseppeCarenini RyuichiroHigashinaka @@ -7047,8 +7047,8 @@ Task Lineages: Dialog State Tracking for Flexible Interaction - SungjinLee - AmandaStent + SungjinLee + AmandaStent 11–21 W16-3602 10.18653/v1/W16-3602 @@ -7069,8 +7069,8 @@ VrindavanHarrison LenaReed ErnestoHernandez - EllenRiloff - MarilynWalker + EllenRiloff + MarilynWalker 31–41 W16-3604 10.18653/v1/W16-3604 @@ -7079,11 +7079,11 @@ The <fixed-case>SENSEI</fixed-case> Annotated Corpus: Human Summaries of Reader Comment Conversations in On-line News EmmaBarker - Monica LestariParamita + Monica LestariParamita AhmetAker - EminaKurtic + EminaKurtic MarkHepple - RobertGaizauskas + RobertGaizauskas 42–52 W16-3605 10.18653/v1/W16-3605 @@ -7092,7 +7092,7 @@ Special Session - The Future Directions of Dialogue-Based Intelligent Personal Assistants YoichiMatsuyama - AlexandrosPapangelis + AlexandrosPapangelis 53 W16-3606 10.18653/v1/W16-3606 @@ -7100,7 +7100,7 @@ Keynote - More than meets the ear: Processes that shape dialogue - SusanBrennan + SusanBrennan 54 W16-3607 10.18653/v1/W16-3607 @@ -7110,8 +7110,8 @@ A <fixed-case>W</fixed-case>izard-of-<fixed-case>O</fixed-case>z Study on A Non-Task-Oriented Dialog Systems That Reacts to User Engagement ZhouYu LeahNicolich-Henkin - Alan WBlack - AlexanderRudnicky + Alan WBlack + AlexanderRudnicky 55–63 W16-3608 10.18653/v1/W16-3608 @@ -7158,7 +7158,7 @@ Character Identification on Multiparty Conversation: Identifying Mentions of Characters in <fixed-case>TV</fixed-case> Shows Yu-HsinChen - Jinho D.Choi + Jinho D.Choi 90–100 W16-3612 10.18653/v1/W16-3612 @@ -7187,9 +7187,9 @@ Extracting <fixed-case>PDTB</fixed-case> Discourse Relations from Student Essays - KateForbes-Riley + KateForbes-Riley FanZhang - DianeLitman + DianeLitman 117–127 W16-3615 10.18653/v1/W16-3615 @@ -7209,7 +7209,7 @@ The Role of Discourse Units in Near-Extractive Summarization Junyi JessyLi KapilThadani - AmandaStent + AmandaStent 137–147 W16-3617 10.18653/v1/W16-3617 @@ -7218,8 +7218,8 @@ Initiations and Interruptions in a Spoken Dialog System LeahNicolich-Henkin - CarolynRosé - Alan WBlack + CarolynRosé + Alan WBlack 148–156 W16-3618 10.18653/v1/W16-3618 @@ -7247,9 +7247,9 @@ Syntactic parsing of chat language in contact center conversation corpus AlexisNasr - GeraldineDamnati + GeraldineDamnati AleksandraGuerraz - FredericBechet + FredericBechet 175–184 W16-3621 10.18653/v1/W16-3621 @@ -7258,7 +7258,7 @@ A Context-aware Natural Language Generator for Dialogue Systems OndřejDušek - FilipJurčíček + FilipJurčíček 185–190 W16-3622 10.18653/v1/W16-3622 @@ -7268,7 +7268,7 @@ Identifying Teacher Questions Using Automatic Speech Recognition in Classrooms NathanielBlanchard PatrickDonnelly - Andrew M.Olney + Andrew M.Olney BorhanSamei BrookeWard XiaoyiSun @@ -7304,7 +7304,7 @@ Rapid Prototyping of Form-driven Dialogue Systems Using an Open-source Framework SvetlanaStoyanchev PierreLison - SrinivasBangalore + SrinivasBangalore 216–219 W16-3626 10.18653/v1/W16-3626 @@ -7312,9 +7312,9 @@ <fixed-case>LVCSR</fixed-case> System on a Hybrid <fixed-case>GPU</fixed-case>-<fixed-case>CPU</fixed-case> Embedded Platform for Real-Time Dialog Applications - Alexei V.Ivanov - Patrick L.Lange - DavidSuendermann-Oeft + Alexei V.Ivanov + Patrick L.Lange + DavidSuendermann-Oeft 220–223 W16-3627 10.18653/v1/W16-3627 @@ -7336,7 +7336,7 @@ Selection method of an appropriate response in chat-oriented dialogue systems HideakiMori - MasahiroAraki + MasahiroAraki 228–231 W16-3629 10.18653/v1/W16-3629 @@ -7344,8 +7344,8 @@ Real-Time Understanding of Complex Discriminative Scene Descriptions - RameshManuvinakurike - CaseyKennington + RameshManuvinakurike + CaseyKennington DavidDeVault DavidSchlangen 232–241 @@ -7355,7 +7355,7 @@ Supporting Spoken Assistant Systems with a Graphical User Interface that Signals Incremental Understanding and Prediction State - CaseyKennington + CaseyKennington DavidSchlangen 242–251 W16-3631 @@ -7364,7 +7364,7 @@ Toward incremental dialogue act segmentation in fast-paced interactive dialogue systems - RameshManuvinakurike + RameshManuvinakurike MaikePaetzel ChengQu DavidSchlangen @@ -7385,7 +7385,7 @@ On the Evaluation of Dialogue Systems with Next Utterance Classification RyanLowe - Iulian VladSerban + Iulian VladSerban MichaelNoseworthy LaurentCharlin JoellePineau @@ -7396,8 +7396,8 @@ Towards Using Conversations with Spoken Dialogue Systems in the Automated Assessment of Non-Native Speakers of <fixed-case>E</fixed-case>nglish - DianeLitman - SteveYoung + DianeLitman + SteveYoung MarkGales KateKnill KarenOttewell @@ -7412,7 +7412,7 @@ Measuring the Similarity of Sentential Arguments in Dialogue AmitaMisra BrianEcker - MarilynWalker + MarilynWalker 276–287 W16-3636 10.18653/v1/W16-3636 @@ -7430,7 +7430,7 @@ Do Characters Abuse More Than Words? YasharMehdad - JoelTetreault + JoelTetreault 299–303 W16-3638 10.18653/v1/W16-3638 @@ -7454,7 +7454,7 @@ MasahiroMizukami KoichiroYoshino GrahamNeubig - DavidTraum + DavidTraum SatoshiNakamura 310–318 W16-3640 @@ -7475,7 +7475,7 @@ Reference Resolution in Situated Dialogue with Learned Semantics XiaolongLi - KristyBoyer + KristyBoyer 329–338 W16-3642 10.18653/v1/W16-3642 @@ -7495,7 +7495,7 @@ Learning Fine-Grained Knowledge about Contingent Relations between Everyday Events ElaheRahimtoroghi ErnestoHernandez - MarilynWalker + MarilynWalker 350–359 W16-3644 10.18653/v1/W16-3644 @@ -7526,7 +7526,7 @@ Automatic Recognition of Conversational Strategies in the Service of a Socially-Aware Dialog System RanZhao TanmaySinha - AlanBlack + AlanBlack JustineCassell 381–392 W16-3647 @@ -7546,8 +7546,8 @@ Strategy and Policy Learning for Non-Task-Oriented Conversational Systems ZhouYu ZiyuXu - Alan WBlack - AlexanderRudnicky + Alan WBlack + AlexanderRudnicky 404–412 W16-3649 10.18653/v1/W16-3649 @@ -7559,7 +7559,7 @@ Proceedings of the 6th Workshop on South and Southeast Asian Natural Language Processing (WSSANLP2016) W16-37 DekaiWu - PushpakBhattacharyya + PushpakBhattacharyya The COLING 2016 Organizing Committee
Osaka, Japan
December @@ -7573,7 +7573,7 @@ Compound Type Identification in <fixed-case>S</fixed-case>anskrit: What Roles do the Corpus and Grammar Play? AmrithKrishna - PavankumarSatuluri + PavankumarSatuluri ShubhamSharma ApurvKumar PawanGoyal @@ -7640,7 +7640,7 @@ Enriching Source for <fixed-case>E</fixed-case>nglish-to-<fixed-case>U</fixed-case>rdu Machine Translation BushraJawaid AmirKamran - OndřejBojar + OndřejBojar 54–63 W16-3706 This paper focuses on the generation of case markers for free word order languages that use case markers as phrasal clitics for marking the relationship between the dependent-noun and its head. The generation of such clitics becomes essential task especially when translating from fixed word order languages where syntactic relations are identified by the positions of the dependent-nouns. To address the problem of missing markers on source-side, artificial markers are added in source to improve alignments with its target counterparts. Up to 1 BLEU point increase is observed over the baseline on different test sets for English-to-Urdu. @@ -7650,8 +7650,8 @@ The <fixed-case>IMAGACT</fixed-case>4<fixed-case>ALL</fixed-case> Ontology of Animated Images: Implications for Theoretical and Machine Translation of Action Verbs from <fixed-case>E</fixed-case>nglish-<fixed-case>I</fixed-case>ndian Languages PitambarBehera SharminMuzaffar - Atul Ku.Ojha - GirishJha + Atul Ku.Ojha + GirishJha 64–73 W16-3707 Action verbs are one of the frequently occurring linguistic elements in any given natural language as the speakers use them during every linguistic intercourse. However, each language expresses action verbs in its own inherently unique manner by categorization. One verb can refer to several interpretations of actions and one action can be expressed by more than one verb. The inter-language and intra-language variations create ambiguity for the translation of languages from the source language to target language with respect to action verbs. IMAGACT is a corpus-based ontological platform of action verbs translated from prototypic animated images explained in English and Italian as meta-languages. In this paper, we are presenting the issues and challenges in translating action verbs of Indian languages as target and English as source language by observing the animated images. Among the ten Indian languages which have been annotated so far on the platform are Sanskrit, Hindi, Urdu, Odia (Oriya), Bengali, Manipuri, Tamil, Assamese, Magahi and Marathi. Out of them, Manipuri belongs to the Sino-Tibetan, Tamil comes off the Dravidian and the rest owe their genesis to the Indo-Aryan language family. One of the issues is that the one-word morphological English verbs are translated into most of the Indian languages as verbs having more than one-word form; for instance as in the case of conjunct, compound, serial verbs and so on. We are further presenting a cross-lingual comparison of action verbs among Indian languages. In addition, we are also dealing with the issues in disambiguating animated images by the L1 native speakers using competence-based judgements and the theoretical and machine translation implications they bear. @@ -7660,7 +7660,7 @@ Crowdsourcing-based Annotation of Emotions in <fixed-case>F</fixed-case>ilipino and <fixed-case>E</fixed-case>nglish Tweets Fermin RobertoLapitan - Riza TheresaBatista-Navarro + Riza TheresaBatista-Navarro EliezerAlbacea 74–82 W16-3708 @@ -7715,7 +7715,7 @@ Clustering-based Phonetic Projection in Mismatched Crowdsourcing Channels for Low-resourced <fixed-case>ASR</fixed-case> WendaChen MarkHasegawa-Johnson - NancyChen + NancyChen PreethiJyothi LavVarshney 133–141 @@ -7766,7 +7766,7 @@ Align Me: A framework to generate Parallel Corpus Using <fixed-case>OCR</fixed-case>s and Bilingual Dictionaries PriyamBakliwal - DevadathV V + DevadathV V C VJawahar 183–187 W16-3719 @@ -7775,7 +7775,7 @@ Learning <fixed-case>I</fixed-case>ndonesian-<fixed-case>C</fixed-case>hinese Lexicon with Bilingual Word Embedding Models and Monolingual Signals - XinyingQiu + XinyingQiu GangqinZhu 188–193 W16-3720 @@ -7795,8 +7795,8 @@ Proceedings of the Workshop on Grammar and Lexicon: interactions and interfaces (GramLex) W16-38 - EvaHajičová - IgorBoguslavsky + EvaHajičová + IgorBoguslavsky The COLING 2016 Organizing Committee
Osaka, Japan
December @@ -7817,7 +7817,7 @@
Multiword Expressions at the Grammar-Lexicon Interface - TimothyBaldwin + TimothyBaldwin 7 W16-3802 In this talk, I will outline a range of challenges presented by multiword expressions in terms of (lexicalist) precision grammar engineering, and different strategies for accommodating those challenges, in an attempt to strike the right balance in terms of generalisation and over- and under-generation. @@ -7825,7 +7825,7 @@ Microsyntactic Phenomena as a Computational Linguistics Issue - LeonidIomdin + LeonidIomdin 8–17 W16-3803 Microsyntactic linguistic units, such as syntactic idioms and non-standard syntactic constructions, are poorly represented in linguistic resources, mostly because the former are elements occupying an intermediate position between the lexicon and the grammar and the latter are too specific to be routinely tackled by general grammars. Consequently, many such units produce substantial gaps in systems intended to solve sophisticated computational linguistics tasks, such as parsing, deep semantic analysis, question answering, machine translation, or text generation. They also present obstacles for applying advanced techniques to these tasks, such as machine learning. The paper discusses an approach aimed at bridging such gaps, focusing on the development of monolingual and multilingual corpora where microsyntactic units are to be tagged. @@ -7833,7 +7833,7 @@ <fixed-case>A</fixed-case>lternations: From Lexicon to Grammar And Back Again - MarkétaLopatková + MarkétaLopatková VáclavaKettnerová 18–27 W16-3804 @@ -7843,7 +7843,7 @@ Extra-Specific Multiword Expressions for Language-Endowed Intelligent Agents MarjorieMcShane - SergeiNirenburg + SergeiNirenburg 28–37 W16-3805 Language-endowed intelligent agents benefit from leveraging lexical knowledge falling at different points along a spectrum of compositionality. This means that robust computational lexicons should include not only the compositional expectations of argument-taking words, but also non-compositional collocations (idioms), semi-compositional collocations that might be difficult for an agent to interpret (e.g., standard metaphors), and even collocations that could be compositionally analyzed but are so frequently encountered that recording their meaning increases the efficiency of interpretation. In this paper we argue that yet another type of string-to-meaning mapping can also be useful to intelligent agents: remembered semantic analyses of actual text inputs. These can be viewed as super-specific multi-word expressions whose recorded interpretations mimic a person’s memories of knowledge previously learned from language input. These differ from typical annotated corpora in two ways. First, they provide a full, context-sensitive semantic interpretation rather than select features. Second, they are are formulated in the ontologically-grounded metalanguage used in a particular agent environment, meaning that the interpretations contribute to the dynamically evolving cognitive capabilites of agents configured in that environment. @@ -7859,7 +7859,7 @@ The Development of Multimodal Lexical Resources - JamesPustejovsky + JamesPustejovsky TuanDo GititKehat NikhilKrishnaswamy @@ -7879,7 +7879,7 @@ Improvement of <fixed-case>V</fixed-case>erb<fixed-case>N</fixed-case>et-like resources by frame typing LaurenceDanlos - MatthieuConstant + MatthieuConstant LucieBarque 61–70 W16-3809 @@ -7888,9 +7888,9 @@ Enriching a Valency Lexicon by Deverbative Nouns - EvaFučíková - JanHajič - ZdeňkaUrešová + EvaFučíková + JanHajič + ZdeňkaUrešová 71–80 W16-3810 We present an attempt to automatically identify Czech deverbative nouns using several methods that use large corpora as well as existing lexical resources. The motivation for the task is to extend a verbal valency (i.e., predicate-argument) lexicon by adding nouns that share the valency properties with the base verb, assuming their properties can be derived (even if not trivially) from the underlying verb by deterministic grammatical rules. At the same time, even in inflective languages, not all deverbatives are simply created from their underlying base verb by regular lexical derivation processes. We have thus developed hybrid techniques that use both large parallel corpora and several standard lexical resources. Thanks to the use of parallel corpora, the resulting sets contain also synonyms, which the lexical derivation rules cannot get. For evaluation, we have manually created a small, 100-verb gold data since no such dataset was initially available for Czech. @@ -7899,7 +7899,7 @@ The Grammar of <fixed-case>E</fixed-case>nglish Deverbal Compounds and their Meaning GianinaIordăchioaia - Lonnekevan der Plas + Lonnekevan der Plas GloriannaJagfeld 81–91 W16-3811 @@ -7921,7 +7921,7 @@ AkifumiYoshimoto AkihikoKato HiroyukiShindo - YujiMatsumoto + YujiMatsumoto 102–109 W16-3813 This paper presents our ongoing work on compilation of English multi-word expression (MWE) lexicon. We are especially interested in collecting flexible MWEs, in which some other components can intervene the expression such as “a number of” vs “a large number of” where a modifier of “number” can be placed in the expression and inherit the original meaning. We fiest collect possible candidates of flexible English MWEs from the web, and annotate all of their occurrences in the Wall Street Journal portion of Ontonotes corpus. We make use of word dependency strcuture information of the sentences converted from the phrase structure annotation. This process enables semi-automatic annotation of MWEs in the corpus and simultanaously produces the internal and external dependency representation of flexible MWEs. @@ -7950,9 +7950,9 @@ W16-39 BoHan AlanRitter - LeonDerczynski + LeonDerczynski WeiXu - TimBaldwin + TimBaldwin The COLING 2016 Organizing Committee
Osaka, Japan
December @@ -7965,7 +7965,7 @@ Processing non-canonical or noisy text: fortuitous data to the rescue - BarbaraPlank + BarbaraPlank 1 W16-3901 Real world data differs radically from the benchmark corpora we use in NLP, resulting in large performance drops. The reason for this problem is obvious: NLP models are trained on limited samples from canonical varieties considered standard. However, there are many dimensions, e.g., sociodemographic, language, genre, sentence type, etc. on which texts can differ from the standard. The solution is not obvious: we cannot control for all factors, and it is not clear how to best go beyond the current practice of training on homogeneous data from a single domain and language. In this talk, I review the notion of canonicity, and how it shapes our community’s approach to language. I argue for the use of fortuitous data. Fortuitous data is data out there that just waits to be harvested. It includes data which is in plain sight, but is often neglected, and more distant sources like behavioral data, which first need to be refined. They provide additional contexts and a myriad of opportunities to build more adaptive language technology, some of which I will explore in this talk. @@ -7998,9 +7998,9 @@ From Noisy Questions to <fixed-case>M</fixed-case>inecraft Texts: Annotation Challenges in Extreme Syntax Scenario - HéctorMartínez Alonso - DjaméSeddah - BenoîtSagot + HéctorMartínez Alonso + DjaméSeddah + BenoîtSagot 13–23 W16-3905 User-generated content presents many challenges for its automatic processing. While many of them do come from out-of-vocabulary effects, others spawn from different linguistic phenomena such as unusual syntax. In this work we present a French three-domain data set made up of question headlines from a cooking forum, game chat logs and associated forums from two popular online games (MINECRAFT & LEAGUE OF LEGENDS). We chose these domains because they encompass different degrees of lexical and syntactic compliance with canonical language. We conduct an automatic and manual evaluation of the difficulties of processing these domains for part-of-speech prediction, and introduce a pilot study to determine whether dependency analysis lends itself well to annotate these data. We also discuss the development cost of our data set. @@ -8018,7 +8018,7 @@ Veracity Computing from Lexical Cues and Perceived Certainty Trends - UweReichel + UweReichel PiroskaLendvai 33–42 W16-3907 @@ -8093,7 +8093,7 @@ JuliePain JessieLevacher AdamQuinquenel - AnjaBelz + AnjaBelz 94–101 W16-3914 Postmarketing surveillance (PMS) has the vital aim to monitor effects of drugs after release for use by the general population, but suffers from under-reporting and limited coverage. Automatic methods for detecting drug effect reports, especially for social media, could vastly increase the scope of PMS. Very few automatic PMS methods are currently available, in particular for the messy text types encountered on Twitter. In this paper we describe first results for developing PMS methods specifically for tweets. We describe the corpus of 125,669 tweets we have created and annotated to train and test the tools. We find that generic tools perform well for tweet-level language identification and tweet-level sentiment analysis (both 0.94 F1-Score). For detection of effect mentions we are able to achieve 0.87 F1-Score, while effect-level adverse-vs.-beneficial analysis proves harder with an F1-Score of 0.64. Among other things, our results indicate that MetaMap semantic types provide a very promising basis for identifying drug effect mentions in tweets. @@ -8112,7 +8112,7 @@ Exploring Word Embeddings for Unsupervised Textual User-Generated Content Normalization Thales FelipeCosta Bertaglia - Maria das GraçasVolpe Nunes + Maria das GraçasVolpe Nunes 112–120 W16-3916 Text normalization techniques based on rules, lexicons or supervised training requiring large corpora are not scalable nor domain interchangeable, and this makes them unsuitable for normalizing user-generated content (UGC). Current tools available for Brazilian Portuguese make use of such techniques. In this work we propose a technique based on distributed representation of words (or word embeddings). It generates continuous numeric vectors of high-dimensionality to represent words. The vectors explicitly encode many linguistic regularities and patterns, as well as syntactic and semantic word relationships. Words that share semantic similarity are represented by similar vectors. Based on these features, we present a totally unsupervised, expandable and language and domain independent method for learning normalization lexicons from word embeddings. Our approach obtains high correction rate of orthographic errors and internet slang in product reviews, outperforming the current available tools for Brazilian Portuguese. @@ -8132,7 +8132,7 @@ <fixed-case>J</fixed-case>apanese Text Normalization with Encoder-Decoder Model TaishiIkeda HiroyukiShindo - YujiMatsumoto + YujiMatsumoto 129–137 W16-3918 Text normalization is the task of transforming lexical variants to their canonical forms. We model the problem of text normalization as a character-level sequence to sequence learning problem and present a neural encoder-decoder model for solving it. To train the encoder-decoder model, many sentences pairs are generally required. However, Japanese non-standard canonical pairs are scarce in the form of parallel corpora. To address this issue, we propose a method of data augmentation to increase data size by converting existing resources into synthesized non-standard forms using handcrafted rules. We conducted an experiment to demonstrate that the synthesized corpus contributes to stably train an encoder-decoder model and improve the performance of Japanese text normalization. @@ -8143,7 +8143,7 @@ BenjaminStrauss BethanyToma AlanRitter - Marie-Catherinede Marneffe + Marie-Catherinede Marneffe WeiXu 138–144 W16-3919 @@ -8162,7 +8162,7 @@ Learning to recognise named entities in tweets by exploiting weakly labelled data Kurt JunsheanEspinosa - Riza TheresaBatista-Navarro + Riza TheresaBatista-Navarro SophiaAnaniadou 153–163 W16-3921 @@ -8171,8 +8171,8 @@ Feature-Rich <fixed-case>T</fixed-case>witter Named Entity Recognition and Classification - Utpal KumarSikdar - BjörnGambäck + Utpal KumarSikdar + BjörnGambäck 164–170 W16-3922 Twitter named entity recognition is the process of identifying proper names and classifying them into some predefined labels/categories. The paper introduces a Twitter named entity system using a supervised machine learning approach, namely Conditional Random Fields. A large set of different features was developed and the system was trained using these. The Twitter named entity task can be divided into two parts: i) Named entity extraction from tweets and ii) Twitter name classification into ten different types. For Twitter named entity recognition on unseen test data, our system obtained the second highest F1 score in the shared task: 63.22%. The system performance on the classification task was worse, with an F1 measure of 40.06% on unseen test data, which was the fourth best of the ten systems participating in the shared task. @@ -8277,8 +8277,8 @@ Proceedings of the Workshop on Language Technology Resources and Tools for Digital Humanities (LT4DH) W16-40 - ErhardHinrichs - MarieHinrichs + ErhardHinrichs + MarieHinrichs ThorstenTrippel The COLING 2016 Organizing Committee
Osaka, Japan
@@ -8300,7 +8300,7 @@
Finding Rising and Falling Words - ErikTjong Kim Sang + ErikTjong Kim Sang 2–9 W16-4002 We examine two different methods for finding rising words (among which neologisms) and falling words (among which archaisms) in decades of magazine texts (millions of words) and in years of tweets (billions of words): one based on correlation coefficients of relative frequencies and time, and one based on comparing initial and final word frequencies of time intervals. We find that smoothing frequency scores improves the precision scores of both methods and that the correlation coefficients perform better on magazine text but worse on tweets. Since the two ranking methods find different words they can be used in side-by-side to study the behavior of words over time. @@ -8310,7 +8310,7 @@ A Dataset for Multimodal Question Answering in the Cultural Heritage Domain ShurongSheng LucVan Gool - Marie-FrancineMoens + Marie-FrancineMoens 10–17 W16-4003 Multimodal question answering in the cultural heritage domain allows visitors to ask questions in a more natural way and thus provides better user experiences with cultural objects while visiting a museum, landmark or any other historical site. In this paper, we introduce the construction of a golden standard dataset that will aid research of multimodal question answering in the cultural heritage domain. The dataset, which will be soon released to the public, contains multimodal content including images of typical artworks from the fascinating old-Egyptian Amarna period, related image-containing documents of the artworks and over 800 multimodal queries integrating visual and textual questions. The multimodal questions and related documents are all in English. The multimodal questions are linked to relevant paragraphs in the related documents that contain the answer to the multimodal query. @@ -8359,7 +8359,7 @@ Feelings from the <fixed-case>P</fixed-case>ast—<fixed-case>A</fixed-case>dapting Affective Lexicons for Historical Emotion Analysis - SvenBuechel + SvenBuechel JohannesHellrich UdoHahn 54–61 @@ -8393,7 +8393,7 @@ SilvanaHartmann IrynaGurevych AnetteFrank - ChrisBiemann + ChrisBiemann 76–84 W16-4011 We introduce the third major release of WebAnno, a generic web-based annotation tool for distributed teams. New features in this release focus on semantic annotation tasks (e.g. semantic role labelling or event annotation) and allow the tight integration of semantic annotations with syntactic annotations. In particular, we introduce the concept of slot features, a novel constraint mechanism that allows modelling the interaction between semantic and syntactic annotations, as well as a new annotation user interface. The new features were developed and used in an annotation project for semantic roles on German texts. The paper briefly introduces this project and reports on experiences performing annotations with the new tool. On a comparative evaluation, our tool reaches significant speedups over WebAnno 2 for a semantic annotation task. @@ -8407,7 +8407,7 @@ MarkJanse PetraAjaka MichaElsner - Marie-Catherinede Marneffe + Marie-Catherinede Marneffe 85–93 W16-4012 Although spanning thousands of years and genres as diverse as liturgy, historiography, lyric and other forms of prose and poetry, the body of Latin texts is still relatively sparse compared to English. Data sparsity in Latin presents a number of challenges for traditional Named Entity Recognition techniques. Solving such challenges and enabling reliable Named Entity Recognition in Latin texts can facilitate many down-stream applications, from machine translation to digital historiography, enabling Classicists, historians, and archaeologists for instance, to track the relationships of historical persons, places, and groups on a large scale. This paper presents the first annotated corpus for evaluating Named Entity Recognition in Latin, as well as a fully supervised model that achieves over 90% F-score on a held-out test set, significantly outperforming a competitive baseline. We also present a novel active learning strategy that predicts how many and which sentences need to be annotated for named entities in order to attain a specified degree of accuracy when recognizing named entities automatically in a given text. This maximizes the productivity of annotators while simultaneously controlling quality. @@ -8441,7 +8441,7 @@ Language technology tools and resources for the analysis of multimodal communication LászlóHunyadi - TamásVáradi + TamásVáradi IstvánSzekrényes 117–124 W16-4016 @@ -8470,7 +8470,7 @@ Semantic Indexing of Multilingual Corpora and its Application on the History Domain AlessandroRaganato - JoseCamacho-Collados + JoseCamacho-Collados AntonioRaganato YunseoJoung 140–147 @@ -8480,7 +8480,7 @@ Tagging <fixed-case>I</fixed-case>ngush - Language Technology For Low-Resource Languages Using Resources From Linguistic Field Work - JörgTiedemann + JörgTiedemann JohannaNichols RonaldSprouse 148–155 @@ -8501,7 +8501,7 @@ Tools and Instruments for Building and Querying Diachronic Computational Lexica - FahadKhan + FahadKhan AndreaBellandi MonicaMonachini 164–171 @@ -8558,7 +8558,7 @@ Could Machine Learning Shed Light on Natural Language Complexity? - Maria DoloresJiménez-López + Maria DoloresJiménez-López LeonorBecerra-Bonache 1–11 W16-4101 @@ -8588,7 +8588,7 @@ Addressing surprisal deficiencies in reading time models - Martenvan Schijndel + Martenvan Schijndel WilliamSchuler 32–37 W16-4104 @@ -8598,7 +8598,7 @@ Towards grounding computational linguistic approaches to readability: Modeling reader-text interaction for easy and difficult texts SowmyaVajjala - DetmarMeurers + DetmarMeurers AlexanderEitel KatharinaScheiter 38–48 @@ -8609,7 +8609,7 @@ Memory access during incremental sentence processing causes reading time latency CoryShain - Martenvan Schijndel + Martenvan Schijndel RichardFutrell EdwardGibson WilliamSchuler @@ -8620,7 +8620,7 @@ Reducing lexical complexity as a tool to increase text accessibility for children with dyslexia - NúriaGala + NúriaGala JohannesZiegler 59–66 W16-4107 @@ -8629,7 +8629,7 @@ Syntactic and Lexical Complexity in <fixed-case>I</fixed-case>talian Noncanonical Structures - RodolfoDelmonte + RodolfoDelmonte 67–78 W16-4108 In this paper we will be dealing with different levels of complexity in the processing of Italian, a Romance language inheriting many properties from Latin which make it an almost free word order language . The paper is concerned with syntactic complexity as measurable on the basis of the cognitive parser that incrementally builds up a syntactic representation to be used by the semantic component. The theory behind will be LFG and parsing preferences will be used to justify one choice both from a principled and a processing point of view. LFG is a transformationless theory in which there is no deep structure separate from surface syntactic structure. This is partially in accordance with constructional theories in which noncanonical structures containing non-argument functions FOCUS/TOPIC are treated as multifunctional constituents. Complexity is computed on a processing basis following suggestions made by Blache and demonstrated by Kluender and Chesi @@ -8637,7 +8637,7 @@ Real Multi-Sense or Pseudo Multi-Sense: An Approach to Improve Word Representation - HaoyueShi + HaoyueShi CaihuaLi JunfengHu 79–88 @@ -8649,7 +8649,7 @@ A Preliminary Study of Statistically Predictive Syntactic Complexity Features and Manual Simplifications in <fixed-case>B</fixed-case>asque ItziarGonzalez-Dios María JesúsAranzabe - ArantzaDíaz de Ilarraza + ArantzaDíaz de Ilarraza 89–97 W16-4110 In this paper, we present a comparative analysis of statistically predictive syntactic features of complexity and the treatment of these features by humans when simplifying texts. To that end, we have used a list of the most five statistically predictive features obtained automatically and the Corpus of Basque Simplified Texts (CBST) to analyse how the syntactic phenomena in these features have been manually simplified. Our aim is to go beyond the descriptions of operations found in the corpus and relate the multidisciplinary findings to understand text complexity from different points of view. We also present some issues that can be important when analysing linguistic complexity. @@ -8667,7 +8667,7 @@ Implicit readability ranking using the latent variable of a <fixed-case>B</fixed-case>ayesian Probit model JohanFalkenjack - ArneJönsson + ArneJönsson 104–112 W16-4112 Data driven approaches to readability analysis for languages other than English has been plagued by a scarcity of suitable corpora. Often, relevant corpora consist only of easy-to-read texts with no rank information or empirical readability scores, making only binary approaches, such as classification, applicable. We propose a Bayesian, latent variable, approach to get the most out of these kinds of corpora. In this paper we present results on using such a model for readability ranking. The model is evaluated on a preliminary corpus of ranked student texts with encouraging results. We also assess the model by showing that it performs readability classification on par with a state of the art classifier while at the same being transparent enough to allow more sophisticated interpretations. @@ -8676,7 +8676,7 @@ <fixed-case>CTAP</fixed-case>: A Web-Based Tool Supporting Automatic Complexity Analysis XiaobinChen - DetmarMeurers + DetmarMeurers 113–119 W16-4113 Informed by research on readability and language acquisition, computational linguists have developed sophisticated tools for the analysis of linguistic complexity. While some tools are starting to become accessible on the web, there still is a disconnect between the features that can in principle be identified based on state-of-the-art computational linguistic analysis, and the analyses a second language acquisition researcher, teacher, or textbook writer can readily obtain and visualize for their own collection of texts. This short paper presents a web-based tool development that aims to meet this challenge. The Common Text Analysis Platform (CTAP) is designed to support fully configurable linguistic feature extraction for a wide range of complexity analyses. It features a user-friendly interface, modularized and reusable analysis component integration, and flexible corpus and feature management. Building on the Unstructured Information Management framework (UIMA), CTAP readily supports integration of state-of-the-art NLP and complexity feature extraction maintaining modularization and reusability. CTAP thereby aims at providing a common platform for complexity analysis, encouraging research collaboration and sharing of feature extraction components—to jointly advance the state-of-the-art in complexity analysis in a form that readily supports real-life use by ordinary users. @@ -8699,7 +8699,7 @@ SawsanAlqahtani HoudaBouamor MahmoudGhoneim - MonaDiab + MonaDiab KemalOflazer 127–136 W16-4115 @@ -8718,9 +8718,9 @@ A Comparison Between Morphological Complexity Measures: Typological Data vs. Language Corpora ChristianBentz - TatyanaRuzsics + TatyanaRuzsics AlexanderKoplenig - TanjaSamardžić + TanjaSamardžić 142–153 W16-4117 Language complexity is an intriguing phenomenon argued to play an important role in both language learning and processing. The need to compare languages with regard to their complexity resulted in a multitude of approaches and methods, ranging from accounts targeting specific structural features to global quantification of variation more generally. In this paper, we investigate the degree to which morphological complexity measures are mutually correlated in a sample of more than 500 languages of 101 language families. We use human expert judgements from the World Atlas of Language Structures (WALS), and compare them to four quantitative measures automatically calculated from language corpora. These consist of three previously defined corpus-derived measures, which are all monolingual, and one new measure based on automatic word-alignment across pairs of languages. We find strong correlations between all the measures, illustrating that both expert judgements and automated approaches converge to similar complexity ratings, and can be used interchangeably. @@ -8730,7 +8730,7 @@ Similarity-Based Alignment of Monolingual Corpora for Text Simplification Purposes SarahAlbertsson EvelinaRennes - ArneJönsson + ArneJönsson 154–163 W16-4118 Comparable or parallel corpora are beneficial for many NLP tasks. The automatic collection of corpora enables large-scale resources, even for less-resourced languages, which in turn can be useful for deducing rules and patterns for text rewriting algorithms, a subtask of automatic text simplification. We present two methods for the alignment of Swedish easy-to-read text segments to text segments from a reference corpus. The first method (M1) was originally developed for the task of text reuse detection, measuring sentence similarity by a modified version of a TF-IDF vector space model. A second method (M2), also accounting for part-of-speech tags, was developed, and the methods were compared. For evaluation, a crowdsourcing platform was built for human judgement data collection, and preliminary results showed that cosine similarity relates better to human ranks than the Dice coefficient. We also saw a tendency that including syntactic context to the TF-IDF vector space model is beneficial for this kind of paraphrase alignment task. @@ -8759,7 +8759,7 @@ JixingLi JonathanBrennan AdamMahar - JohnHale + JohnHale 186–191 W16-4121 The relative contributions of meaning and form to sentence processing remains an outstanding issue across the language sciences. We examine this issue by formalizing four incremental complexity metrics and comparing them against freely-available ROI timecourses. Syntax-related metrics based on top-down parsing and structural dependency-distance turn out to significantly improve a regression model, compared to a simpler model that formalizes only conceptual combination using a distributional vector-space model. This confirms the view of the anterior temporal lobes as combinatory engines that deal in both form (see e.g. Brennan et al., 2012; Mazoyer, 1993) and meaning (see e.g., Patterson et al., 2007). This same characterization applies to a posterior temporal region in roughly “Wernicke’s Area.” @@ -8780,7 +8780,7 @@ Abhinav DeepSingh PoojanMehta SamarHusain - RajkumarRajakrishnan + RajkumarRajakrishnan 202–212 W16-4123 Eye-tracking reading times have been attested to reflect cognitive processes underlying sentence comprehension. However, the use of reading times in NLP applications is an underexplored area of research. In this initial work we build an automatic system to assess sentence complexity using automatically predicted eye-tracking reading time measures and demonstrate the efficacy of these reading times for a well known NLP task, namely, readability assessment. We use a machine learning model and a set of features known to be significant predictors of reading times in order to learn per-word reading times from a corpus of English text having reading times of human readers. Subsequently, we use the model to predict reading times for novel text in the context of the aforementioned task. A model based only on reading times gave competitive results compared to the systems that use extensive syntactic features to compute linguistic complexity. Our work, to the best of our knowledge, is the first study to show that automatically predicted reading times can successfully model the difficulty of a text and can be deployed in practical text processing applications. @@ -8789,7 +8789,7 @@ Upper Bound of Entropy Rate Revisited —<fixed-case>A</fixed-case> New Extrapolation of Compressed Large-Scale Corpora— RyosukeTakahira - KumikoTanaka-Ishii + KumikoTanaka-Ishii ŁukaszDębowski 213–221 W16-4124 @@ -8827,11 +8827,11 @@ The impact of simple feature engineering in multilingual medical <fixed-case>NER</fixed-case> RebeckaWeegar - ArantzaCasillas - ArantzaDiaz de Ilarraza - MaiteOronoz + ArantzaCasillas + ArantzaDiaz de Ilarraza + MaiteOronoz AliciaPérez - KoldoGojenola + KoldoGojenola 1–6 W16-4201 The goal of this paper is to examine the impact of simple feature engineering mechanisms before applying more sophisticated techniques to the task of medical NER. Sometimes papers using scientifically sound techniques present raw baselines that could be improved adding simple and cheap features. This work focuses on entity recognition for the clinical domain for three languages: English, Swedish and Spanish. The task is tackled using simple features, starting from the window size, capitalization, prefixes, and moving to POS and semantic tags. This work demonstrates that a simple initial step of feature engineering can improve the baseline results significantly. Hence, the contributions of this paper are: first, a short list of guidelines well supported with experimental results on three languages and, second, a detailed description of the relevance of these features for medical NER. @@ -8862,7 +8862,7 @@ Feature-Augmented Neural Networks for Patient Note De-identification Ji YoungLee FranckDernoncourt - ÖzlemUzuner + ÖzlemUzuner PeterSzolovits 17–22 W16-4204 @@ -8874,7 +8874,7 @@ PrachetaSahoo AsifEkbal SriparnaSaha - DiegoMollá + DiegoMollá KaushikNandan 23–31 W16-4205 @@ -8886,7 +8886,7 @@ ShwetaYadav AsifEkbal SriparnaSaha - PushpakBhattacharyya + PushpakBhattacharyya 32–41 W16-4206 Rapid growth in Electronic Medical Records (EMR) has emerged to an expansion of data in the clinical domain. The majority of the available health care information is sealed in the form of narrative documents which form the rich source of clinical information. Text mining of such clinical records has gained huge attention in various medical applications like treatment and decision making. However, medical records enclose patient Private Health Information (PHI) which can reveal the identities of the patients. In order to retain the privacy of patients, it is mandatory to remove all the PHI information prior to making it publicly available. The aim is to de-identify or encrypt the PHI from the patient medical records. In this paper, we propose an algorithm based on deep learning architecture to solve this problem. We perform de-identification of seven PHI terms from the clinical records. Experiments on benchmark datasets show that our proposed approach achieves encouraging performance, which is better than the baseline model developed with Conditional Random Field. @@ -8894,7 +8894,7 @@ Neural Clinical Paraphrase Generation with Attention - Sadid A.Hasan + Sadid A.Hasan BoLiu JoeyLiu AshequlQadir @@ -8968,7 +8968,7 @@ MarkusKreuzthaler MichelOleynik AlexanderAvian - StefanSchulz + StefanSchulz 91–98 W16-4213 Clinical narratives in electronic health record systems are a rich resource of patient-based information. They constitute an ongoing challenge for natural language processing, due to their high compactness and abundance of short forms. German medical texts exhibit numerous ad-hoc abbreviations that terminate with a period character. The disambiguation of period characters is therefore an important task for sentence and abbreviation detection. This task is addressed by a combination of co-occurrence information of word types with trailing period characters, a large domain dictionary, and a simple rule engine, thus merging statistical and dictionary-based disambiguation strategies. An F-measure of 0.95 could be reached by using the unsupervised approach presented in this paper. The results are promising for a domain-independent abbreviation detection strategy, because our approach avoids retraining of models or use case specific feature engineering efforts required for supervised machine learning approaches. @@ -8991,7 +8991,7 @@ W16-43 MalvinaNissim VivianaPatti - BarbaraPlank + BarbaraPlank The COLING 2016 Organizing Committee
Osaka, Japan
December @@ -9005,7 +9005,7 @@ Zooming in on Gender Differences in Social Media AparnaGarimella - RadaMihalcea + RadaMihalcea 1–10 W16-4301 Men are from Mars and women are from Venus - or so the genre of relationship literature would have us believe. But there is some truth in this idea, and researchers in fields as diverse as psychology, sociology, and linguistics have explored ways to better understand the differences between genders. In this paper, we take another look at the problem of gender discrimination and attempt to move beyond the typical surface-level text classification approach, by (1) identifying semantic and psycholinguistic word classes that reflect systematic differences between men and women and (2) finding differences between genders in the ways they use the same words. We describe several experiments and report results on a large collection of blogs authored by men and women. @@ -9092,7 +9092,7 @@ LeaCanales CarloStrapparava EsterBoldrini - PatricioMartínez-Barco + PatricioMartínez-Barco 91–100 W16-4310 Detecting depression or personality traits, tutoring and student behaviour systems, or identifying cases of cyber-bulling are a few of the wide range of the applications, in which the automatic detection of emotion is a crucial element. Emotion detection has the potential of high impact by contributing the benefit of business, society, politics or education. Given this context, the main objective of our research is to contribute to the resolution of one of the most important challenges in textual emotion detection task: the problems of emotional corpora annotation. This will be tackled by proposing of a new semi-automatic methodology. Our innovative methodology consists in two main phases: (1) an automatic process to pre-annotate the unlabelled sentences with a reduced number of emotional categories; and (2) a refinement manual process where human annotators will determine which is the predominant emotion between the emotional categories selected in the phase 1. Our proposal in this paper is to show and evaluate the pre-annotation process to analyse the feasibility and the benefits by the methodology proposed. The results obtained are promising and allow obtaining a substantial improvement of annotation time and cost and confirm the usefulness of our pre-annotation process to improve the annotation task. @@ -9111,8 +9111,8 @@ Predicting <fixed-case>B</fixed-case>rexit: Classifying Agreement is Better than Sentiment and Pollsters FabioCelli - EvgenyStepanov - MassimoPoesio + EvgenyStepanov + MassimoPoesio GiuseppeRiccardi 110–118 W16-4312 @@ -9131,7 +9131,7 @@ Social and linguistic behavior and its correlation to trait empathy MarinaLitvak - JahnaOtterbacher + JahnaOtterbacher Chee SiangAng DavidAtkins 128–137 @@ -9143,7 +9143,7 @@ The Challenges of Multi-dimensional Sentiment Analysis Across Languages EmilyÖhman TimoHonkela - JörgTiedemann + JörgTiedemann 138–142 W16-4315 This paper outlines a pilot study on multi-dimensional and multilingual sentiment analysis of social media content. We use parallel corpora of movie subtitles as a proxy for colloquial language in social media channels and a multilingual emotion lexicon for fine-grained sentiment analyses. Parallel data sets make it possible to study the preservation of sentiments and emotions in translation and our assessment reveals that the lexical approach shows great inter-language agreement. However, our manual evaluation also suggests that the use of purely lexical methods is limited and further studies are necessary to pinpoint the cross-lingual differences and to develop better sentiment classifiers. @@ -9153,7 +9153,7 @@ The Social Mood of News: Self-reported Annotations to Design Automatic Mood Detection Systems FirojAlam FabioCelli - Evgeny A.Stepanov + Evgeny A.Stepanov ArindamGhosh GiuseppeRiccardi 143–152 @@ -9183,8 +9183,8 @@ Can We Make Computers Laugh at Talks? - Chong MinLee - Su-YounYoon + Chong MinLee + Su-YounYoon LeiChen 173–181 W16-4319 @@ -9193,8 +9193,8 @@ Towards Automatically Classifying Depressive Symptoms from <fixed-case>T</fixed-case>witter Data for Population Health - Danielle L.Mowery - AlbertPark + Danielle L.Mowery + AlbertPark CraigBryan MikeConway 182–191 @@ -9207,7 +9207,7 @@ Proceedings of the Open Knowledge Base and Question Answering Workshop (OKBQA 2016) W16-44 - Key-SunChoi + Key-SunChoi ChristinaUnger PiekVossen Jin-DongKim @@ -9225,7 +9225,7 @@ Using <fixed-case>W</fixed-case>ikipedia and Semantic Resources to Find Answer Types and Appropriate Answer Candidate Sets in Question Answering - Po-ChunChen + Po-ChunChen Meng-JieZhuang Chuan-JieLin 1–10 @@ -9300,8 +9300,8 @@ Double Topic Shifts in Open Domain Conversations: Natural Language Interface for a <fixed-case>W</fixed-case>ikipedia-based Robot Application - KristiinaJokinen - GrahamWilcock + KristiinaJokinen + GrahamWilcock 59–66 W16-4408 The paper describes topic shifting in dialogues with a robot that provides information from Wiki-pedia. The work focuses on a double topical construction of dialogue coherence which refers to discourse coherence on two levels: the evolution of dialogue topics via the interaction between the user and the robot system, and the creation of discourse topics via the content of the Wiki-pedia article itself. The user selects topics that are of interest to her, and the system builds a list of potential topics, anticipated to be the next topic, by the links in the article and by the keywords extracted from the article. The described system deals with Wikipedia articles, but could easily be adapted to other digital information providing systems. @@ -9309,13 +9309,13 @@ Filling a Knowledge Graph with a Crowd - GyuHyeonChoi + GyuHyeonChoi SanghaNam DonghoChoi Key-SunChoi 67–71 W16-4409 - + choi-etal-2016-filling @@ -9330,8 +9330,8 @@ <fixed-case>SRDF</fixed-case>: Extracting Lexical Knowledge Graph for Preserving Sentence Meaning SanghaNam - GyuHyeonChoi - YounggyunHahm + GyuHyeonChoi + YounggyunHahm Key-SunChoi 77–81 W16-4411 @@ -9340,7 +9340,7 @@ <fixed-case>QAF</fixed-case>: Frame Semantics-based Question Interpretation - YounggyunHahm + YounggyunHahm SanghaNam Key-SunChoi 82–90 @@ -9359,7 +9359,7 @@ Dedicated Workflow Management for <fixed-case>OKBQA</fixed-case> Framework JiseongKim - GyuHyeonChoi + GyuHyeonChoi Key-SunChoi 97–101 W16-4414 @@ -9374,9 +9374,9 @@ PatrikLambert BogdanBabych KurtEberle - Rafael E.Banchs + Rafael E.Banchs ReinhardRapp - Marta R.Costa-jussà + Marta R.Costa-jussà The COLING 2016 Organizing Committee
Osaka, Japan
December @@ -9411,7 +9411,7 @@ MengyiLiu TongtaoZhang WenxuanZhou - JianminYao + JianminYao HengJi 16–25 W16-4503 @@ -9441,7 +9441,7 @@ RomanSudarikov OndřejDušek MartinHolub - OndřejBojar + OndřejBojar VincentKríž 42–50 W16-4506 @@ -9460,8 +9460,8 @@
Using Bilingual Segments in Generating Word-to-word Translations - KavithaMahesh - GabrielPereira Lopes + KavithaMahesh + GabrielPereira Lopes LuísGomes 61–71 W16-4508 @@ -9479,8 +9479,8 @@ IsaoGoto GrahamNeubig SadaoKurohashi - Ir. HammamRiza - PushpakBhattacharyya + Ir. HammamRiza + PushpakBhattacharyya The COLING 2016 Organizing Committee
Osaka, Japan
December @@ -9549,8 +9549,8 @@ Global Pre-ordering for Improving Sublanguage Translation MasaruFuji MasaoUtiyama - EiichiroSumita - YujiMatsumoto + EiichiroSumita + YujiMatsumoto 84–93 W16-4606 When translating formal documents, capturing the sentence structure specific to the sublanguage is extremely necessary to obtain high-quality translations. This paper proposes a novel global reordering method with particular focus on long-distance reordering for capturing the global sentence structure of a sublanguage. The proposed method learns global reordering models from a non-annotated parallel corpus and works in conjunction with conventional syntactic reordering. Experimental results on the patent abstract sublanguage show substantial gains of more than 25 points in the RIBES metric and comparable BLEU scores both for Japanese-to-English and English-to-Japanese translations. @@ -9569,7 +9569,7 @@ System Description of bjtu_nlp Neural Machine Translation System ShaotongLi - JinAnXu + JinAnXu YufengChen YujieZhang 104–110 @@ -9596,7 +9596,7 @@ <fixed-case>NICT</fixed-case>-2 Translation System for <fixed-case>WAT</fixed-case>2016: Applying Domain Adaptation to Phrase-based Statistical Machine Translation KenjiImamura - EiichiroSumita + EiichiroSumita 126–132 W16-4611 This paper describes the NICT-2 translation system for the 3rd Workshop on Asian Translation. The proposed system employs a domain adaptation method based on feature augmentation. We regarded the Japan Patent Office Corpus as a mixture of four domain corpora and improved the translation quality of each domain. In addition, we incorporated language models constructed from Google n-grams as external knowledge. Our domain adaptation method can naturally incorporate such external knowledge that contributes to translation quality. @@ -9618,7 +9618,7 @@ XiaolinWang AndrewFinch MasaoUtiyama - EiichiroSumita + EiichiroSumita 139–148 W16-4613 Simultaneous interpretation is a very challenging application of machine translation in which the input is a stream of words from a speech recognition engine. The key problem is how to segment the stream in an online manner into units suitable for translation. The segmentation process proceeds by calculating a confidence score for each word that indicates the soundness of placing a sentence boundary after it, and then heuristics are employed to determine the position of the boundaries. Multiple variants of the confidence scoring method and segmentation heuristics were studied. Experimental results show that the best performing strategy is not only efficient in terms of average latency per word, but also achieved end-to-end translation quality close to an offline baseline, and close to oracle segmentation. @@ -9628,7 +9628,7 @@ Similar <fixed-case>S</fixed-case>outheast <fixed-case>A</fixed-case>sian Languages: Corpus-Based Case Study on <fixed-case>T</fixed-case>hai-<fixed-case>L</fixed-case>aotian and <fixed-case>M</fixed-case>alay-<fixed-case>I</fixed-case>ndonesian ChenchenDing MasaoUtiyama - EiichiroSumita + EiichiroSumita 149–156 W16-4614 This paper illustrates the similarity between Thai and Laotian, and between Malay and Indonesian, based on an investigation on raw parallel data from Asian Language Treebank. The cross-lingual similarity is investigated and demonstrated on metrics of correspondence and order of tokens, based on several standard statistical machine translation techniques. The similarity shown in this study suggests a possibility on harmonious annotation and processing of the language pairs in future development. @@ -9646,7 +9646,7 @@ <fixed-case>K</fixed-case>yoto <fixed-case>U</fixed-case>niversity Participation to <fixed-case>WAT</fixed-case> 2016 - FabienCromieres + FabienCromieres ChenhuiChu ToshiakiNakazawa SadaoKurohashi @@ -9729,7 +9729,7 @@ W16-47 PatrickDrouin NataliaGrabar - ThierryHamon + ThierryHamon KyoKageura KoichiTakeuchi The COLING 2016 Organizing Committee @@ -9753,7 +9753,7 @@ Local-Global Vectors to Improve Unigram Terminology Extraction EhsanAmjadian - DianaInkpen + DianaInkpen TaherehParibakht FarahnazFaez 2–11 @@ -9764,7 +9764,7 @@ Recognition of non-domain phrases in automatically extracted lists of terms AgnieszkaMykowiecka - MalgorzataMarciniak + MalgorzataMarciniak PiotrRychlik 12–20 W16-4703 @@ -9773,7 +9773,7 @@ Contextual term equivalent search using domain-driven disambiguation - CarolineBarrière + CarolineBarrière Pierre AndréMénard DaphnéeAzoulay 21–29 @@ -9794,7 +9794,7 @@ Acquisition of semantic relations between terms: how far can we get with standard <fixed-case>NLP</fixed-case> tools? - InaRoesiger + InaRoesiger JuliaBettinger JohannesSchäfer MichaelDorna @@ -9815,7 +9815,7 @@ A Study on the Interplay Between the Corpus Size and Parameters of a Distributional Model for Term Classification - BehrangQasemiZadeh + BehrangQasemiZadeh 62–72 W16-4708 We propose and evaluate a method for identifying co-hyponym lexical units in a terminological resource. The principles of term recognition and distributional semantics are combined to extract terms from a similar category of concept. Given a set of candidate terms, random projections are employed to represent them as low-dimensional vectors. These vectors are derived automatically from the frequency of the co-occurrences of the candidate terms and words that appear within windows of text in their proximity (context-windows). In a k-nearest neighbours framework, these vectors are classified using a small set of manually annotated terms which exemplify concept categories. We then investigate the interplay between the size of the corpus that is used for collecting the co-occurrences and a number of factors that play roles in the performance of the proposed method: the configuration of context-windows for collecting co-occurrences, the selection of neighbourhood size (k), and the choice of similarity metric. @@ -9823,7 +9823,7 @@ Pattern-based Word Sketches for the Extraction of Semantic Relations - PilarLeón-Araúz + PilarLeón-Araúz AntonioSan Martín PamelaFaber 73–82 @@ -9843,8 +9843,8 @@ Providing and Analyzing <fixed-case>NLP</fixed-case> Terms for our Community GilFrancopoulo - JosephMariani - PatrickParoubek + JosephMariani + PatrickParoubek FrédéricVernier 94–103 W16-4711 @@ -9885,12 +9885,12 @@ Proceedings of the Third Workshop on NLP for Similar Languages, Varieties and Dialects (VarDial3) W16-48 - PreslavNakov + PreslavNakov MarcosZampieri LilingTan NikolaLjubešić - JörgTiedemann - ShervinMalmasi + JörgTiedemann + ShervinMalmasi The COLING 2016 Organizing Committee
Osaka, Japan
December @@ -9936,7 +9936,7 @@ The <fixed-case>GW</fixed-case>/<fixed-case>LT</fixed-case>3 <fixed-case>V</fixed-case>ar<fixed-case>D</fixed-case>ial 2016 Shared Task System for Dialects and Similar Languages Detection AyahZirikly BartDesmet - MonaDiab + MonaDiab 33–41 W16-4804 This paper describes the GW/LT3 contribution to the 2016 VarDial shared task on the identification of similar languages (task 1) and Arabic dialects (task 2). For both tasks, we experimented with Logistic Regression and Neural Network classifiers in isolation. Additionally, we implemented a cascaded classifier that consists of coarse and fine-grained classifiers (task 1) and a classifier ensemble with majority voting for task 2. The submitted systems obtained state-of-the art performance and ranked first for the evaluation on social media data (test sets B1 and B2 for task 1), with a maximum weighted F1 score of 91.94%. @@ -9944,7 +9944,7 @@
Processing Dialectal <fixed-case>A</fixed-case>rabic: Exploiting Variability and Similarity to Overcome Challenges and Discover Opportunities - MonaDiab + MonaDiab 42 W16-4805 We recently witnessed an exponential growth in dialectal Arabic usage in both textual data and speech recordings especially in social media. Processing such media is of great utility for all kinds of applications ranging from information extraction to social media analytics for political and commercial purposes to building decision support systems. Compared to other languages, Arabic, especially the informal variety, poses a significant challenge to natural language processing algorithms since it comprises multiple dialects, linguistic code switching, and a lack of standardized orthographies, to top its relatively complex morphology. Inherently, the problem of processing Arabic in the context of social media is the problem of how to handle resource poor languages. In this talk I will go over some of our insights to some of these problems and show how there is a silver lining where we can generalize some of our solutions to other low resource language contexts. @@ -9952,8 +9952,8 @@ Language Related Issues for Machine Translation between Closely Related <fixed-case>S</fixed-case>outh <fixed-case>S</fixed-case>lavic Languages - MajaPopović - MihaelArčan + MajaPopović + MihaelArčan FilipKlubička 43–52 W16-4806 @@ -9993,7 +9993,7 @@ Automatic Verification and Augmentation of Multilingual Lexicons MaryamAminian MohamedAl-Badrashiny - MonaDiab + MonaDiab 73–81 W16-4810 We present an approach for automatic verification and augmentation of multilingual lexica. We exploit existing parallel and monolingual corpora to extract multilingual correspondents via tri-angulation. We demonstrate the efficacy of our approach on two publicly available resources: Tharwa, a three-way lexicon comprising Dialectal Arabic, Modern Standard Arabic and English lemmas among other information (Diab et al., 2014); and BabelNet, a multilingual thesaurus comprising over 276 languages including Arabic variant entries (Navigli and Ponzetto, 2012). Our automated approach yields an F1-score of 71.71% in generating correct multilingual correspondents against gold Tharwa, and 54.46% against gold BabelNet without any human intervention. @@ -10002,7 +10002,7 @@ Faster Decoding for Subword Level Phrase-based <fixed-case>SMT</fixed-case> between Related Languages AnoopKunchukuttan - PushpakBhattacharyya + PushpakBhattacharyya 82–88 W16-4811 A common and effective way to train translation systems between related languages is to consider sub-word level basic units. However, this increases the length of the sentences resulting in increased decoding time. The increase in length is also impacted by the specific choice of data format for representing the sentences as subwords. In a phrase-based SMT framework, we investigate different choices of decoder parameters as well as data format and their impact on decoding time and translation accuracy. We suggest best options for these settings that significantly improve decoding time with little impact on the translation accuracy. @@ -10018,7 +10018,7 @@ Enlarging Scarce In-domain <fixed-case>E</fixed-case>nglish-<fixed-case>C</fixed-case>roatian Corpus for <fixed-case>SMT</fixed-case> of <fixed-case>MOOC</fixed-case>s Using <fixed-case>S</fixed-case>erbian - MajaPopović + MajaPopović KostadinCholakov ValiaKordoni NikolaLjubešić @@ -10039,9 +10039,9 @@ <fixed-case>DSL</fixed-case> Shared Task 2016: Perfect Is The Enemy of Good Language Discrimination Through Expectation–Maximization and Chunk-based Language Model OndřejHerman - VítSuchomel + VítSuchomel VítBaisa - PavelRychlý + PavelRychlý 114–118 W16-4815 In this paper we investigate two approaches to discrimination of similar languages: Expectation–maximization algorithm for estimating conditional probability P(word|language) and byte level language models similar to compression-based language modelling methods. The accuracy of these methods reached respectively 86.6% and 88.3% on set A of the DSL Shared task 2016 competition. @@ -10077,7 +10077,7 @@ A Character-level Convolutional Neural Network for Distinguishing Similar Languages and Dialects YonatanBelinkov - JamesGlass + JamesGlass 145–152 W16-4819 Discriminating between closely-related language varieties is considered a challenging and important task. This paper describes our submission to the DSL 2016 shared-task, which included two sub-tasks: one on discriminating similar languages and one on identifying Arabic dialects. We developed a character-level neural network for this task. Given a sequence of characters, our model embeds each character in vector space, runs the sequence through multiple convolutions with different filter widths, and pools the convolutional representations to obtain a hidden vector representation of the text that is used for predicting the language or dialect. We primarily focused on the Arabic dialect identification task and obtained an F1 score of 0.4834, ranking 6th out of 18 participants. We also analyze errors made by our system on the Arabic data in some detail, and point to challenges such an approach is faced with. @@ -10086,7 +10086,7 @@ <fixed-case>H</fixed-case>e<fixed-case>LI</fixed-case>, a Word-Based Backoff Method for Language Identification TommiJauhiainen - KristerLindén + KristerLindén HeidiJauhiainen 153–162 W16-4820 @@ -10106,8 +10106,8 @@ Comparing Two Basic Methods for Discriminating Between Similar Languages and Varieties PabloGamallo - IñakiAlegria - José RamomPichel + IñakiAlegria + José RamomPichel ManexAgirrezabal 170–177 W16-4822 @@ -10116,7 +10116,7 @@ Advances in Ngram-based Discrimination of Similar Languages - CyrilGoutte + CyrilGoutte SergeLéger 178–184 W16-4823 @@ -10142,7 +10142,7 @@ <fixed-case>A</fixed-case>rabic Language <fixed-case>WEKA</fixed-case>-Based Dialect Classifier for <fixed-case>A</fixed-case>rabic Automatic Speech Recognition Transcripts AreejAlshutayri - EricAtwell + EricAtwell AbdulrahmanAlosaimy JamesDickins MichaelIngleby @@ -10174,7 +10174,7 @@ Tuning <fixed-case>B</fixed-case>ayes Baseline for Dialect Detection Hector-HugoFranco-Penya - LilianaMamani Sanchez + LilianaMamani Sanchez 227–234 W16-4829 This paper describes an analysis of our submissions to the Dialect Detection Shared Task 2016. We proposed three different systems that involved simplistic features, to name: a Naive-bayes system, a Support Vector Machines-based system and a Tree Kernel-based system. These systems underperform when compared to other submissions in this shared task, since the best one achieved an accuracy of ~0.834. @@ -10183,8 +10183,8 @@ Vanilla Classifiers for Distinguishing between Similar Languages SergiuNisioi - Alina MariaCiobanu - Liviu P.Dinu + Alina MariaCiobanu + Liviu P.Dinu 235–242 W16-4830 In this paper we describe the submission of the UniBuc-NLP team for the Discriminating between Similar Languages Shared Task, DSL 2016. We present and analyze the results we obtained in the closed track of sub-task 1 (Similar languages and language varieties) and sub-task 2 (Arabic dialects). For sub-task 1 we used a logistic regression classifier with tf-idf feature weighting and for sub-task 2 a character-based string kernel with an SVM classifier. Our results show that good accuracy scores can be obtained with limited feature and model engineering. While certain limitations are to be acknowledged, our approach worked surprisingly well for out-of-domain, social media data, with 0.898 accuracy (3rd place) for dataset B1 and 0.838 accuracy (4th place) for dataset B2. @@ -10205,7 +10205,7 @@ Proceedings of the 3rd Workshop on Natural Language Processing Techniques for Educational Applications (NLPTEA2016) W16-49 Hsin-HsiChen - Yuen-HsienTseng + Yuen-HsienTseng VincentNg XiaofeiLu The COLING 2016 Organizing Committee @@ -10221,7 +10221,7 @@ Simplification of Example Sentences for Learners of <fixed-case>J</fixed-case>apanese Functional Expressions JunLiu - YujiMatsumoto + YujiMatsumoto 1–5 W16-4901 Learning functional expressions is one of the difficulties for language learners, since functional expressions tend to have multiple meanings and complicated usages in various situations. In this paper, we report an experiment of simplifying example sentences of Japanese functional expressions especially for Chinese-speaking learners. For this purpose, we developed “Japanese Functional Expressions List” and “Simple Japanese Replacement List”. To evaluate the method, we conduct a small-scale experiment with Chinese-speaking learners on the effectiveness of the simplified example sentences. The experimental results indicate that simplified sentences are helpful in learning Japanese functional expressions. @@ -10230,7 +10230,7 @@ Effectiveness of Linguistic and Learner Features to Listenability Measurement Using a Decision Tree Classifier KatsunoriKotani - TakehikoYoshimi + TakehikoYoshimi 6–10 W16-4902 In learning Asian languages, learners encounter the problem of character types that are different from those in their first language, for instance, between Chinese characters and the Latin alphabet. This problem also affects listening because learners reconstruct letters from speech sounds. Hence, special attention should be paid to listening practice for learners of Asian languages. However, to our knowledge, few studies have evaluated the ease of listening comprehension (listenability) in Asian languages. Therefore, as a pilot study of listenability in Asian languages, we developed a measurement method for learners of English in order to examine the discriminability of linguistic and learner features. The results showed that the accuracy of our method outperformed a simple majority vote, which suggests that a combination of linguistic and learner features should be used to measure listenability in Asian languages as well as in English. @@ -10240,7 +10240,7 @@ A Two-Phase Approach Towards Identifying Argument Structure in Natural Language ArkanathPathak PawanGoyal - PlabanBhowmick + PlabanBhowmick 11–19 W16-4903 We propose a new approach for extracting argument structure from natural language texts that contain an underlying argument. Our approach comprises of two phases: Score Assignment and Structure Prediction. The Score Assignment phase trains models to classify relations between argument units (Support, Attack or Neutral). To that end, different training strategies have been explored. We identify different linguistic and lexical features for training the classifiers. Through ablation study, we observe that our novel use of word-embedding features is most effective for this task. The Structure Prediction phase makes use of the scores from the Score Assignment phase to arrive at the optimal structure. We perform experiments on three argumentation datasets, namely, AraucariaDB, Debatepedia and Wikipedia. We also propose two baselines and observe that the proposed approach outperforms baseline systems for the final task of Structure Prediction. @@ -10260,7 +10260,7 @@ A Comparison of Word Embeddings for <fixed-case>E</fixed-case>nglish and Cross-Lingual <fixed-case>C</fixed-case>hinese Word Sense Disambiguation Hong JinKang TaoChen - Muthu KumarChandrasekaran + Muthu KumarChandrasekaran Min-YenKan 30–39 W16-4905 @@ -10271,10 +10271,10 @@ Overview of <fixed-case>NLP</fixed-case>-<fixed-case>TEA</fixed-case> 2016 Shared Task for <fixed-case>C</fixed-case>hinese Grammatical Error Diagnosis Lung-HaoLee GaoqiRao - Liang-ChihYu + Liang-ChihYu EndongXun BaolinZhang - Li-PingChang + Li-PingChang 40–48 W16-4906 This paper presents the NLP-TEA 2016 shared task for Chinese grammatical error diagnosis which seeks to identify grammatical error types and their range of occurrence within sentences written by learners of Chinese as foreign language. We describe the task definition, data preparation, performance metrics, and evaluation results. Of the 15 teams registered for this shared task, 9 teams developed the system and submitted a total of 36 runs. We expected this evaluation campaign could lead to the development of more advanced NLP techniques for educational applications, especially for Chinese error detection. All data sets with gold standards and scoring scripts are made publicly available to researchers. @@ -10296,7 +10296,7 @@ YajunLiu YingjieHan LiyanZhuo - HongyingZan + HongyingZan 57–62 W16-4908 In the process of learning and using Chinese, foreigners may have grammatical errors due to negative migration of their native languages. Currently, the computer-oriented automatic detection method of grammatical errors is not mature enough. Based on the evaluating task — CGED2016, we select and analyze the classification model and design feature extraction method to obtain grammatical errors including Mission(M), Disorder(W), Selection (S) and Redundant (R) automatically. The experiment results based on the dynamic corpus of HSK show that the Chinese grammatical error automatic detection method, which uses CRF as classification model and n-gram as feature extraction method. It is simple and efficient which play a positive effect on the research of Chinese grammatical error automatic detection and also a supporting and guiding role in the teaching of Chinese as a foreign language. @@ -10307,7 +10307,7 @@ Po-LinChen Shih-HungWu Liang-PuChen - Ping-CheYang + Ping-CheYang 63–72 W16-4909 This paper describe the CYUT-III system on grammar error detection in the 2016 NLP-TEA Chinese Grammar Error Detection shared task CGED. In this task a system has to detect four types of errors, in-cluding redundant word error, missing word error, word selection error and word ordering error. Based on the conditional random fields (CRF) model, our system is a linear tagger that can detect the errors in learners’ essays. Since the system performance depends on the features heavily, in this paper, we are going to report how to integrate the collocation feature into the CRF model. Our system presents the best detection accuracy and Identification accuracy on the TOCFL dataset, which is in traditional Chi-nese. The same system also works well on the simplified Chinese HSK dataset. @@ -10318,7 +10318,7 @@ Wei-ChiehChou Chin-KuiLin Yuan-FuLiao - Yih-RuWang + Yih-RuWang 73–81 W16-4910 This paper discusses how to adapt two new word embedding features to build a more efficient Chinese Grammatical Error Diagnosis (CGED) systems to assist Chinese foreign learners (CFLs) in improving their written essays. The major idea is to apply word order sensitive Word2Vec approaches including (1) structured skip-gram and (2) continuous window (CWindow) models, because they are more suitable for solving syntax-based problems. The proposed new features were evaluated on the Test of Chinese as a Foreign Language (TOCFL) learner database provided by NLP-TEA-3&CGED shared task. Experimental results showed that the new features did work better than the traditional word order insensitive Word2Vec approaches. Moreover, according to the official evaluation results, our system achieved the lowest (0.1362) false positive (FA) and the highest precision rates in all three measurements. @@ -10327,7 +10327,7 @@ A Fluctuation Smoothing Approach for Unsupervised Automatic Short Answer Grading ShouryaRoy - SandipanDandapat + SandipanDandapat Y.Narahari 82–91 W16-4911 @@ -10337,7 +10337,7 @@ <fixed-case>J</fixed-case>apanese Lexical Simplification for Non-Native Speakers MuhaiminHading - YujiMatsumoto + YujiMatsumoto MakiSakamoto 92–96 W16-4912 @@ -10347,8 +10347,8 @@ A Corpus-based Approach for <fixed-case>S</fixed-case>panish-<fixed-case>C</fixed-case>hinese Language Learning ShuyuanCao - Iriada Cunha - MikelIruskieta + Iriada Cunha + MikelIruskieta 97–106 W16-4913 Due to the huge population that speaks Spanish and Chinese, these languages occupy an important position in the language learning studies. Although there are some automatic translation systems that benefit the learning of both languages, there is enough space to create resources in order to help language learners. As a quick and effective resource that can give large amount language information, corpus-based learning is becoming more and more popular. In this paper we enrich a Spanish-Chinese parallel corpus automatically with part of-speech (POS) information and manually with discourse segmentation (following the Rhetorical Structure Theory (RST) (Mann and Thompson, 1988)). Two search tools allow the Spanish-Chinese language learners to carry out different queries based on tokens and lemmas. The parallel corpus and the research tools are available to the academic community. We propose some examples to illustrate how learners can use the corpus to learn Spanish and Chinese. @@ -10367,9 +10367,9 @@ An Aligned <fixed-case>F</fixed-case>rench-<fixed-case>C</fixed-case>hinese corpus of 10<fixed-case>K</fixed-case> segments from university educational material RuslanKalitvianski - LingxiaoWang - ValérieBellynck - ChristianBoitet + LingxiaoWang + ValérieBellynck + ChristianBoitet 117–121 W16-4915 This paper describes a corpus of nearly 10K French-Chinese aligned segments, produced by post-editing machine translated computer science courseware. This corpus was built from 2013 to 2016 within the PROJECT_NAME project, by native Chinese students. The quality, as judged by native speakers, is ad-equate for understanding (far better than by reading only the original French) and for getting better marks. This corpus is annotated at segment-level by a self-assessed quality score. It has been directly used as supplemental training data to build a statistical machine translation system dedicated to that sublanguage, and can be used to extract the specific bilingual terminology. To our knowledge, it is the first corpus of this kind to be released. @@ -10433,7 +10433,7 @@ W16-50 EduardoBlanco RoserMorante - RoserSaurí + RoserSaurí The COLING 2016 Organizing Committee
Osaka, Japan
December @@ -10448,8 +10448,8 @@ ‘Who would have thought of that!’: A Hierarchical Topic Model for Extraction of Sarcasm-prevalent Topics and Sarcasm Detection AdityaJoshi PrayasJain - PushpakBhattacharyya - MarkCarman + PushpakBhattacharyya + MarkCarman 1–10 W16-5001 Topic Models have been reported to be beneficial for aspect-based sentiment analysis. This paper reports the first topic model for sarcasm detection, to the best of our knowledge. Designed on the basis of the intuition that sarcastic tweets are likely to have a mixture of words of both sentiments as against tweets with literal sentiment (either positive or negative), our hierarchical topic model discovers sarcasm-prevalent topics and topic-level sentiment. Using a dataset of tweets labeled using hashtags, the model estimates topic-level, and sentiment-level distributions. Our evaluation shows that topics such as ‘work’, ‘gun laws’, ‘weather’ are sarcasm-prevalent topics. Our model is also able to discover the mixture of sentiment-bearing words that exist in a text of a given sentiment-related label. Finally, we apply our model to predict sarcasm in tweets. We outperform two prior work based on statistical classifiers with specific features, by around 25%. @@ -10467,7 +10467,7 @@ Detecting Level of Belief in <fixed-case>C</fixed-case>hinese and <fixed-case>S</fixed-case>panish Juan PabloColomer KeyuLai - OwenRambow + OwenRambow 22–30 W16-5003 There has been extensive work on detecting the level of committed belief (also known as “factuality”) that an author is expressing towards the propositions in his or her utterances. Previous work on English has revealed that this can be done as a sequence tagging task. In this paper, we investigate the same task for Chinese and Spanish, two very different languages from English and from each other. @@ -10476,7 +10476,7 @@ Contradiction Detection for Rumorous Claims PiroskaLendvai - UweReichel + UweReichel 31–40 W16-5004 The utilization of social media material in journalistic workflows is increasing, demanding automated methods for the identification of mis- and disinformation. Since textual contradiction across social media posts can be a signal of rumorousness, we seek to model how claims in Twitter posts are being textually contradicted. We identify two different contexts in which contradiction emerges: its broader form can be observed across independently posted tweets and its more specific form in threaded conversations. We define how the two scenarios differ in terms of central elements of argumentation: claims and conversation structure. We design and evaluate models for the two scenarios uniformly as 3-way Recognizing Textual Entailment tasks in order to represent claims and conversation structure implicitly in a generic inference model, while previous studies used explicit or no representation of these properties. To address noisy text, our classifiers use simple similarity features derived from the string and part-of-speech level. Corpus statistics reveal distribution differences for these features in contradictory as opposed to non-contradictory tweet relations, and the classifiers yield state of the art performance. @@ -10484,7 +10484,7 @@ Negation and Modality in Machine Translation - PreslavNakov + PreslavNakov 41 W16-5005 Negation and modality are two important grammatical phenomena that have attracted recent research attention as they can contribute to extra-propositional meaning aspects, among with factuality, attribution, irony and sarcasm. These aspects go beyond analysis such as semantic role labeling, and modeling them is important as a step towards a higher level of language understanding, which is needed for practical applications such as sentiment analysis. In this talk, I will go beyond English, and I will discuss how negation and modality are expressed in other languages. I will also go beyond sentiment analysis and I will present some challenges that the two phenomena pose for machine translation (MT). In particular, I will demonstrate how contemporary MT systems fail on them, and I will discuss some possible solutions. @@ -10492,11 +10492,11 @@ Problematic Cases in the Annotation of Negation in <fixed-case>S</fixed-case>panish - Salud MaríaJiménez-Zafra - MaiteMartin - L. AlfonsoUreña-López - ToniMartí - MarionaTaulé + Salud MaríaJiménez-Zafra + MaiteMartin + L. AlfonsoUreña-López + ToniMartí + MarionaTaulé 42–48 W16-5006 This paper presents the main sources of disagreement found during the annotation of the Spanish SFU Review Corpus with negation (SFU ReviewSP -NEG). Negation detection is a challenge in most of the task related to NLP, so the availability of corpora annotated with this phenomenon is essential in order to advance in tasks related to this area. A thorough analysis of the problems found during the annotation could help in the study of this phenomenon. @@ -10518,8 +10518,8 @@ Proceedings of the Fifth Workshop on Building and Evaluating Resources for Biomedical Text Mining (BioTxtM2016) W16-51 SophiaAnaniadou - RizaBatista-Navarro - Kevin BretonnelCohen + RizaBatista-Navarro + Kevin BretonnelCohen DinaDemner-Fushman PaulThompson The COLING 2016 Organizing Committee @@ -10553,7 +10553,7 @@ Building Content-driven Entity Networks for Scarce Scientific Literature using Content Information - Reinald KimAmplayo + Reinald KimAmplayo MinSong 20–29 W16-5103 @@ -10582,8 +10582,8 @@ Fully unsupervised low-dimensional representation of adverse drug reaction events through distributional semantics AliciaPérez - ArantzaCasillas - KoldoGojenola + ArantzaCasillas + KoldoGojenola 50–59 W16-5106 Electronic health records show great variability since the same concept is often expressed with different terms, either scientific latin forms, common or lay variants and even vernacular naming. Deep learning enables distributional representation of terms in a vector-space, and therefore, related terms tend to be close in the vector space. Accordingly, embedding words through these vectors opens the way towards accounting for semantic relatedness through classical algebraic operations. In this work we propose a simple though efficient unsupervised characterization of Adverse Drug Reactions (ADRs). This approach exploits the embedding representation of the terms involved in candidate ADR events, that is, drug-disease entity pairs. In brief, the ADRs are represented as vectors that link the drug with the disease in their context through a recursive additive model. We discovered that a low-dimensional representation that makes use of the modulus and argument of the embedded representation of the ADR event shows correlation with the manually annotated class. Thus, it can be derived that this characterization results in to be beneficial for further classification tasks as predictive features. @@ -10592,11 +10592,11 @@ A Dataset for <fixed-case>ICD</fixed-case>-10 Coding of Death Certificates: Creation and Usage ThomasLavergne - AurélieNévéol + AurélieNévéol AudeRobert CyrilGrouin GrégoireRey - PierreZweigenbaum + PierreZweigenbaum 60–69 W16-5107 Very few datasets have been released for the evaluation of diagnosis coding with the International Classification of Diseases, and only one so far in a language other than English. This paper describes a large-scale dataset prepared from French death certificates, and the problems which needed to be solved to turn it into a dataset suitable for the application of machine learning and natural language processing methods of ICD-10 coding. The dataset includes the free-text statements written by medical doctors, the associated meta-data, the human coder-assigned codes for each statement, as well as the statement segments which supported the coder’s decision for each code. The dataset comprises 93,694 death certificates totalling 276,103 statements and 377,677 ICD-10 code assignments (3,457 unique codes). It was made available for an international automated coding shared task, which attracted five participating teams. An extended version of the dataset will be used in a new edition of the shared task. @@ -10616,7 +10616,7 @@ Supervised classification of end-of-lines in clinical text with no manual annotation - PierreZweigenbaum + PierreZweigenbaum CyrilGrouin ThomasLavergne 80–88 @@ -10627,7 +10627,7 @@ <fixed-case>B</fixed-case>io<fixed-case>DCA</fixed-case> Identifier: A System for Automatic Identification of Discourse Connective and Arguments from Biomedical Text SindhujaGopalan - SobhaLalitha Devi + SobhaLalitha Devi 89–98 W16-5110 This paper describes a Natural language processing system developed for automatic identification of explicit connectives, its sense and arguments. Prior work has shown that the difference in usage of connectives across corpora affects the cross domain connective identification task negatively. Hence the development of domain specific discourse parser has become indispensable. Here, we present a corpus annotated with discourse relations on Medline abstracts. Kappa score is calculated to check the annotation quality of our corpus. The previous works on discourse analysis in bio-medical data have concentrated only on the identification of connectives and hence we have developed an end-end parser for connective and argument identification using Conditional Random Fields algorithm. The type and sub-type of the connective sense is also identified. The results obtained are encouraging. @@ -10636,7 +10636,7 @@ Data, tools and resources for mining social media drug chatter AbeedSarker - GracielaGonzalez + GracielaGonzalez 99–107 W16-5111 Social media has emerged into a crucial resource for obtaining population-based signals for various public health monitoring and surveillance tasks, such as pharmacovigilance. There is an abundance of knowledge hidden within social media data, and the volume is growing. Drug-related chatter on social media can include user-generated information that can provide insights into public health problems such as abuse, adverse reactions, long-term effects, and multi-drug interactions. Our objective in this paper is to present to the biomedical natural language processing, data science, and public health communities data sets (annotated and unannotated), tools and resources that we have collected and created from social media. The data we present was collected from Twitter using the generic and brand names of drugs as keywords, along with their common misspellings. Following the collection of the data, annotation guidelines were created over several iterations, which detail important aspects of social media data annotation and can be used by future researchers for developing similar data sets. The annotation guidelines were followed to prepare data sets for text classification, information extraction and normalization. In this paper, we discuss the preparation of these guidelines, outline the data sets prepared, and present an overview of our state-of-the-art systems for data collection, supervised classification, and information extraction. In addition to the development of supervised systems for classification and extraction, we developed and released unlabeled data and language models. We discuss the potential uses of these language models in data mining and the large volumes of unlabeled data from which they were generated. We believe that the summaries and repositories we present here of our data, annotation guidelines, models, and tools will be beneficial to the research community as a single-point entry for all these resources, and will promote further research in this area. @@ -10646,9 +10646,9 @@ Detection of Text Reuse in <fixed-case>F</fixed-case>rench Medical Corpora EvaD’hondt CyrilGrouin - AurélieNévéol - EfstathiosStamatatos - PierreZweigenbaum + AurélieNévéol + EfstathiosStamatatos + PierreZweigenbaum 108–114 W16-5112 Electronic Health Records (EHRs) are increasingly available in modern health care institutions either through the direct creation of electronic documents in hospitals’ health information systems, or through the digitization of historical paper records. Each EHR creation method yields the need for sophisticated text reuse detection tools in order to prepare the EHR collections for efficient secondary use relying on Natural Language Processing methods. Herein, we address the detection of two types of text reuse in French EHRs: 1) the detection of updated versions of the same document and 2) the detection of document duplicates that still bear surface differences due to OCR or de-identification processing. We present a robust text reuse detection method to automatically identify redundant document pairs in two French EHR corpora that achieves an overall macro F-measure of 0.68 and 0.60, respectively and correctly identifies all redundant document pairs of interest. @@ -10696,8 +10696,8 @@ W16-52 YoheiMurakami DonghuiLin - NancyIde - JamesPustejovsky + NancyIde + JamesPustejovsky The COLING 2016 Organizing Committee
Osaka, Japan
December @@ -10710,10 +10710,10 @@ <fixed-case>K</fixed-case>athaa : <fixed-case>NLP</fixed-case> Systems as Edge-Labeled Directed Acyclic <fixed-case>M</fixed-case>ulti<fixed-case>G</fixed-case>raphs - SharadaMohanty + SharadaMohanty Nehal JWani - ManishSrivastava - DiptiSharma + ManishSrivastava + DiptiSharma 1–10 W16-5201 We present Kathaa, an Open Source web-based Visual Programming Framework for Natural Language Processing (NLP) Systems. Kathaa supports the design, execution and analysis of complex NLP systems by visually connecting NLP components from an easily extensible Module Library. It models NLP systems an edge-labeled Directed Acyclic MultiGraph, and lets the user use publicly co-created modules in their own NLP applications irrespective of their technical proficiency in Natural Language Processing. Kathaa exposes an intuitive web based Interface for the users to interact with and modify complex NLP Systems; and a precise Module definition API to allow easy integration of new state of the art NLP components. Kathaa enables researchers to publish their services in a standardized format to enable the masses to use their services out of the box. The vision of this work is to pave the way for a system like Kathaa, to be the Lego blocks of NLP Research and Applications. As a practical use case we use Kathaa to visually implement the Sampark Hindi-Panjabi Machine Translation Pipeline and the Sampark Hindi-Urdu Machine Translation Pipeline, to demonstrate the fact that Kathaa can handle really complex NLP systems while still being intuitive for the end user. @@ -10723,7 +10723,7 @@ <fixed-case>LAPPS</fixed-case>/Galaxy: Current State and Next Steps NancyIde KeithSuderman - EricNyberg + EricNyberg JamesPustejovsky MarcVerhagen 11–18 @@ -10776,7 +10776,7 @@ A non-expert <fixed-case>K</fixed-case>aldi recipe for <fixed-case>V</fixed-case>ietnamese Speech Recognition System Hieu-ThiLuong - Hai-QuanVu + Hai-QuanVu 51–55 W16-5207 In this paper we describe a non-expert setup for Vietnamese speech recognition system using Kaldi toolkit. We collected a speech corpus over fifteen hours from about fifty Vietnamese native speakers and using it to test the feasibility of our setup. The essential linguistic components for the Automatic Speech Recognition (ASR) system was prepared basing on the written form of the language instead of expertise knowledge on linguistic and phonology as commonly seen in rich resource languages like English. The modeling of tones by integrating them into the phoneme and using the phonetic decision tree is also discussed. Experimental results showed this setup for ASR systems does yield competitive results while still have potentials for further improvements. @@ -10832,7 +10832,7 @@ Vectors or Graphs? On Differences of Representations for Distributional Semantic Models - ChrisBiemann + ChrisBiemann 1–7 W16-5301 Distributional Semantic Models (DSMs) have recently received increased attention, together with the rise of neural architectures for scalable training of dense vector embeddings. While some of the literature even includes terms like ‘vectors’ and ‘dimensionality’ in the definition of DSMs, there are some good reasons why we should consider alternative formulations of distributional models. As an instance, I present a scalable graph-based solution to distributional semantics. The model belongs to the family of ‘count-based’ DSMs, keeps its representation sparse and explicit, and thus fully interpretable. I will highlight some important differences between sparse graph-based and dense vector approaches to DSMs: while dense vector-based models are computationally easier to handle and provide a nice uniform representation that can be compared and combined in many ways, they lack interpretability, provenance and robustness. On the other hand, graph-based sparse models have a more straightforward interpretation, handle sense distinctions more naturally and can straightforwardly be linked to knowledge bases, while lacking the ability to compare arbitrary lexical units and a compositionality operation. Since both representations have their merits, I opt for exploring their combination in the outlook. @@ -10840,7 +10840,7 @@ “Beware the Jabberwock, dear reader!” Testing the distributional reality of construction semantics - GianlucaLebani + GianlucaLebani AlessandroLenci 8–18 W16-5302 @@ -10868,10 +10868,10 @@ Semantic Relation Classification: Task Formalisation and Refinement VivianSantos - ManuelaHuerliman + ManuelaHuerliman BrianDavis SiegfriedHandschuh - AndréFreitas + AndréFreitas 30–39 W16-5305 The identification of semantic relations between terms within texts is a fundamental task in Natural Language Processing which can support applications requiring a lightweight semantic interpretation model. Currently, semantic relation classification concentrates on relations which are evaluated over open-domain data. This work provides a critique on the set of abstract relations used for semantic relation classification with regard to their ability to express relationships between terms which are found in a domain-specific corpora. Based on this analysis, this work proposes an alternative semantic relation model based on reusing and extending the set of abstract relations present in the DOLCE ontology. The resulting set of relations is well grounded, allows to capture a wide range of relations and could thus be used as a foundation for automatic classification of semantic relations. @@ -10881,7 +10881,7 @@ The Power of Language Music: <fixed-case>A</fixed-case>rabic Lemmatization through Patterns MohammedAttia AyahZirikly - MonaDiab + MonaDiab 40–50 W16-5306 The interaction between roots and patterns in Arabic has intrigued lexicographers and morphologists for centuries. While roots provide the consonantal building blocks, patterns provide the syllabic vocalic moulds. While roots provide abstract semantic classes, patterns realize these classes in specific instances. In this way both roots and patterns are indispensable for understanding the derivational, morphological and, to some extent, the cognitive aspects of the Arabic language. In this paper we perform lemmatization (a high-level lexical processing) without relying on a lookup dictionary. We use a hybrid approach that consists of a machine learning classifier to predict the lemma pattern for a given stem, and mapping rules to convert stems to their respective lemmas with the vocalization defined by the pattern. @@ -10899,7 +10899,7 @@ Towards a resource based on users’ knowledge to overcome the Tip of the Tongue problem. MichaelZock - ChrisBiemann + ChrisBiemann 57–68 W16-5308 Language production is largely a matter of words which, in the case of access problems, can be searched for in an external resource (lexicon, thesaurus). In this kind of dialogue the user provides the momentarily available knowledge concerning the target and the system responds with the best guess(es) it can make given this input. As tip-of-the-tongue (ToT)-studies have shown, people always have some knowledge concerning the target (meaning fragments, number of syllables, ...) even if its complete form is eluding them. We will show here how to tap on this knowledge to build a resource likely to help authors (speakers/writers) to overcome the ToT-problem. Yet, before doing so we need a better understanding of the various kinds of knowledge people have when looking for a word. To this end, we asked crowdworkers to provide some cues to describe a given target and to specify then how each one of them relates to the target, in the hope that this could help others to find the elusive word. Next, we checked how well a given search strategy worked when being applied to differently built lexical networks. The results showed quite dramatic differences, which is not really surprising. After all, different networks are built for different purposes; hence each one of them is more or less suited for a given task. What was more surprising though is the fact that the relational information given by the users did not allow us to find the elusive word in WordNet better than without it. @@ -10967,7 +10967,7 @@ <fixed-case>C</fixed-case>og<fixed-case>AL</fixed-case>ex-<fixed-case>V</fixed-case> Shared Task: <fixed-case>LOPE</fixed-case> KananLuce JiaxingYu - Shu-KaiHsieh + Shu-KaiHsieh 110–113 W16-5315 Automatic discovery of semantically-related words is one of the most important NLP tasks, and has great impact on the theoretical psycholinguistic modeling of the mental lexicon. In this shared task, we employ the word embeddings model to testify two thoughts explicitly or implicitly assumed by the NLP community: (1). Word embedding models can reflect syntagmatic similarities in usage between words to distances in projected vector space. (2). Word embedding models can reflect paradigmatic relationships between words. @@ -10996,7 +10996,7 @@ StefanBott NanaKhvtisavrishvili MaxKisselew - SabineSchulte im Walde + SabineSchulte im Walde 125–133 W16-5318 German particle verbs represent a frequent type of multi-word-expression that forms a highly productive paradigm in the lexicon. Similarly to other multi-word expressions, particle verbs exhibit various levels of compositionality. One of the major obstacles for the study of compositionality is the lack of representative gold standards of human ratings. In order to address this bottleneck, this paper presents such a gold standard data set containing 400 randomly selected German particle verbs. It is balanced across several particle types and three frequency bands, and accomplished by human ratings on the degree of semantic compositionality. @@ -11023,8 +11023,8 @@ A Proposal for combining “general” and specialized frames - Marie-ClaudeL’ Homme - CarlosSubirats + Marie-ClaudeL’ Homme + CarlosSubirats BenoîtRobichaud 156–165 W16-5321 @@ -11044,7 +11044,7 @@ Categorization of Semantic Roles for Dictionary Definitions VivianSilva SiegfriedHandschuh - AndréFreitas + AndréFreitas 176–184 W16-5323 Understanding the semantic relationships between terms is a fundamental task in natural language processing applications. While structured resources that can express those relationships in a formal way, such as ontologies, are still scarce, a large number of linguistic resources gathering dictionary definitions is becoming available, but understanding the semantic structure of natural language definitions is fundamental to make them useful in semantic interpretation tasks. Based on an analysis of a subset of WordNet’s glosses, we propose a set of semantic roles that compose the semantic structure of a dictionary definition, and show how they are related to the definition’s syntactic configuration, identifying patterns that can be used in the development of information extraction frameworks and semantic models. @@ -11053,7 +11053,7 @@ Corpus and dictionary development for classifiers/quantifiers towards a <fixed-case>F</fixed-case>rench-<fixed-case>J</fixed-case>apanese machine translation MutsukoTomokiyo - ChristianBoitet + ChristianBoitet 185–192 W16-5324 Although quantifiers/classifiers expressions occur frequently in everyday communications or written documents, there is no description for them in classical bilingual paper dictionaries, nor in machine-readable dictionaries. The paper describes a corpus and dictionary development for quantifiers/classifiers, and their usage in the framework of French-Japanese machine translation (MT). They often cause problems of lexical ambiguity and of set phrase recognition during analysis, in particular for a long-distance language pair like French and Japanese. For the development of a dictionary aiming at ambiguity resolution for expressions including quantifiers and classifiers which may be ambiguous with common nouns, we have annotated our corpus with UWs (interlingual lexemes) of UNL (Universal Networking Language) found on the UNL-jp dictionary. The extraction of potential classifiers/quantifiers from corpus is made by UNLexplorer web service. Keywords : classifiers, quantifiers, phraseology study, corpus annotation, UNL (Universal Networking Language), UWs dictionary, Tori Bank, French-Japanese machine translation (MT). @@ -11064,10 +11064,10 @@ Proceedings of the 12th Workshop on Asian Language Resources (ALR12) W16-54 - KoitiHasida - Kam-FaiWong + KoitiHasida + Kam-FaiWong NicolettaCalzorari - Key-SunChoi + Key-SunChoi The COLING 2016 Organizing Committee
Osaka, Japan
December @@ -11106,7 +11106,7 @@ Tak-sumWong XinyingChen KimGerdes - JohnLee + JohnLee 20–29 W16-5403 This article proposes a Universal Dependency Annotation Scheme for Mandarin Chinese, including POS tags and dependency analysis. We identify cases of idiosyncrasy of Mandarin Chinese that are difficult to fit into the current schema which has mainly been based on the descriptions of various Indo-European languages. We discuss differences between our scheme and those of the Stanford Chinese Dependencies and the Chinese Dependency Treebank. @@ -11126,8 +11126,8 @@ Minh-TienNguyen Dac VietLai Phong-KhacDo - Duc-VuTran - Minh-LeNguyen + Duc-VuTran + Minh-LeNguyen 38–48 W16-5405 This paper presents VSoLSCSum, a Vietnamese linked sentence-comment dataset, which was manually created to treat the lack of standard corpora for social context summarization in Vietnamese. The dataset was collected through the keywords of 141 Web documents in 12 special events, which were mentioned on Vietnamese Web pages. Social users were asked to involve in creating standard summaries and the label of each sentence or comment. The inter-agreement calculated by Cohen’s Kappa among raters after validating is 0.685. To illustrate the potential use of our dataset, a learning to rank method was trained by using a set of local and social features. Experimental results indicate that the summary model trained on our dataset outperforms state-of-the-art baselines in both ROUGE-1 and ROUGE-2 in social context summarization. @@ -11136,7 +11136,7 @@ <fixed-case>BCCWJ</fixed-case>-<fixed-case>D</fixed-case>ep<fixed-case>P</fixed-case>ara: A Syntactic Annotation Treebank on the ‘<fixed-case>B</fixed-case>alanced <fixed-case>C</fixed-case>orpus of <fixed-case>C</fixed-case>ontemporary <fixed-case>W</fixed-case>ritten <fixed-case>J</fixed-case>apanese’ MasayukiAsahara - YujiMatsumoto + YujiMatsumoto 49–58 W16-5406 Paratactic syntactic structures are difficult to represent in syntactic dependency tree structures. As such, we propose an annotation schema for syntactic dependency annotation of Japanese, in which coordinate structures are split from and overlaid on bunsetsu-based (base phrase unit) dependency. The schema represents nested coordinate structures, non-constituent conjuncts, and forward sharing as the set of regions. The annotation was performed on the core data of ‘Balanced Corpus of Contemporary Written Japanese’, which comprised about one million words and 1980 samples from six registers, such as newspapers, books, magazines, and web texts. @@ -11167,7 +11167,7 @@ An Overview of <fixed-case>BPPT</fixed-case>’s <fixed-case>I</fixed-case>ndonesian Language Resources GunarsoGunarso - HammamRiza + HammamRiza 73–77 W16-5409 This paper describes various Indonesian language resources that Agency for the Assessment and Application of Technology (BPPT) has developed and collected since mid 80’s when we joined MMTS (Multilingual Machine Translation System), an international project coordinated by CICC-Japan to develop a machine translation system for five Asian languages (Bahasa Indonesia, Malay, Thai, Japanese, and Chinese). Since then, we have been actively doing many types of research in the field of statistical machine translation, speech recognition, and speech synthesis which requires many text and speech corpus. Most recent cooperation within ASEAN-IVO is the development of Indonesian ALT (Asian Language Treebank) has added new NLP tools. @@ -11212,7 +11212,7 @@ Automatic Evaluation of Commonsense Knowledge for Refining <fixed-case>J</fixed-case>apanese <fixed-case>C</fixed-case>oncept<fixed-case>N</fixed-case>et SeiyaShudo RafalRzepka - KenjiAraki + KenjiAraki 105–112 W16-5413 In this paper we present two methods for automatic common sense knowledge evaluation for Japanese entries in ConceptNet ontology. Our proposed methods utilize text-mining approach: one with relation clue words and WordNet synonyms, and one without. Both methods were tested with a blog corpus. The system based on our proposed methods reached relatively high precision score for three relations (MadeOf, UsedFor, AtLocation), which is comparable with previous research using commercial search engines and simpler input. We analyze errors and discuss problems of common sense evaluation, both manual and automatic and propose ideas for further improvements. @@ -11223,7 +11223,7 @@ MohamedAl-Badrashiny AbdelatiHawwari MahmoudGhoneim - MonaDiab + MonaDiab 113–122 W16-5414 Although MWE are relatively morphologically and syntactically fixed expressions, several types of flexibility can be observed in MWE, verbal MWE in particular. Identifying the degree of morphological and syntactic flexibility of MWE is very important for many Lexicographic and NLP tasks. Adding MWE variants/tokens to a dictionary resource requires characterizing the flexibility among other morphosyntactic features. Carrying out the task manually faces several challenges since it is a very laborious task time and effort wise, as well as it will suffer from coverage limitation. The problem is exacerbated in rich morphological languages where the average word in Arabic could have 12 possible inflection forms. Accordingly, in this paper we introduce a semi-automatic Arabic multiwords expressions resource (SAMER). We propose an automated method that identifies the morphological and syntactic flexibility of Arabic Verbal Multiword Expressions (AVMWE). All observed morphological variants and syntactic pattern alternations of an AVMWE are automatically acquired using large scale corpora. We look for three morphosyntactic aspects of AVMWE types investigating derivational and inflectional variations and syntactic templates, namely: 1) inflectional variation (inflectional paradigm) and calculating degree of flexibility; 2) derivational productivity; and 3) identifying and classifying the different syntactic types. We build a comprehensive list of AVMWE. Every token in the AVMWE list is lemmatized and tagged with POS information. We then search Arabic Gigaword and All ATBs for all possible flexible matches. For each AVMWE type we generate: a) a statistically ranked list of MWE-lexeme inflections and syntactic pattern alternations; b) An abstract syntactic template; and c) The most frequent form. Our technique is validated using a Golden MWE annotated list. The results shows that the quality of the generated resource is 80.04%. @@ -11231,7 +11231,7 @@ Sentiment Analysis for Low Resource Languages: A Study on Informal <fixed-case>I</fixed-case>ndonesian Tweets - Tuan AnhLe + Tuan AnhLe DavidMoeljadi YasuhideMiura TomokoOhkuma @@ -11246,7 +11246,7 @@ Proceedings of the INLG 2016 Workshop on Computational Creativity in Natural Language Generation W16-55 MatthewPurver - PabloGervás + PabloGervás SaschaGriffiths 10.18653/v1/W16-55 Association for Computational Linguistics @@ -11300,7 +11300,7 @@ A Challenge to the Third Hoshi Shinichi Award - SatoshiSato + SatoshiSato 31–35 W16-5505 10.18653/v1/W16-5505 @@ -11339,7 +11339,7 @@ Combinatorics vs Grammar: Archeology of Computational Poetry in Tape Mark <fixed-case>I</fixed-case> - AlessandroMazzei + AlessandroMazzei AndreaValle 61–70 W16-5509 @@ -11357,7 +11357,7 @@ DirkHovy DavidJurgens BrendanO’Connor - AliceOh + AliceOh OrenTsur SvitlanaVolkova 10.18653/v1/W16-56 @@ -11374,7 +11374,7 @@ Relating semantic similarity and semantic association to how humans label other people KennethJoseph - Kathleen M.Carley + Kathleen M.Carley 1–10 W16-5601 10.18653/v1/W16-5601 @@ -11401,7 +11401,7 @@ Social Proof: The Impact of Author Traits on Influence Detection SaraRosenthal - KathyMcKeown + KathyMcKeown 27–36 W16-5604 10.18653/v1/W16-5604 @@ -11419,7 +11419,7 @@ User profiling with geo-located posts and demographic data AdamPoulston MarkStevenson - KalinaBontcheva + KalinaBontcheva 43–48 W16-5606 10.18653/v1/W16-5606 @@ -11448,7 +11448,7 @@ Identifying Stance by Analyzing Political Discourse on <fixed-case>T</fixed-case>witter - KristenJohnson + KristenJohnson DanGoldwasser 66–75 W16-5609 @@ -11467,9 +11467,9 @@ The Effects of Data Collection Methods in <fixed-case>T</fixed-case>witter - Sunghwan MacKim + Sunghwan MacKim StephenWan - CécileParis + CécileParis BrianJin BellaRobinson 86–91 @@ -11510,7 +11510,7 @@ Bag of What? Simple Noun Phrase Extraction for Text Analysis AbramHandler MatthewDenny - HannaWallach + HannaWallach BrendanO’Connor 114–124 W16-5615 @@ -11529,8 +11529,8 @@ The Clinical Panel: Leveraging Psychological Expertise During <fixed-case>NLP</fixed-case> Research GlenCoppersmith KristyHollingshead - H. AndrewSchwartz - MollyIreland + H. AndrewSchwartz + MollyIreland RebeccaResnik KateLoveys AprilForeman @@ -11542,7 +11542,7 @@ Are You a Racist or Am <fixed-case>I</fixed-case> Seeing Things? Annotator Influence on Hate Speech Detection on <fixed-case>T</fixed-case>witter - ZeerakWaseem + ZeerakWaseem 138–142 W16-5618 10.18653/v1/W16-5618 @@ -11551,8 +11551,8 @@ Disentangling Topic Models: A Cross-cultural Analysis of Personal Values through Words StevenWilson - RadaMihalcea - RyanBoyd + RadaMihalcea + RyanBoyd JamesPennebaker 143–152 W16-5619 @@ -11622,7 +11622,7 @@ Automatic Identification of Narrative Diegesis and Point of View JoshuaEisenberg - MarkFinlayson + MarkFinlayson 36–46 W16-5705 10.18653/v1/W16-5705 @@ -11632,7 +11632,7 @@ Richer Event Description: Integrating event coreference with temporal, causal and bridging annotation TimO’Gorman KristinWright-Bettner - MarthaPalmer + MarthaPalmer 47–56 W16-5706 10.18653/v1/W16-5706 @@ -11640,7 +11640,7 @@ <fixed-case>NASTEA</fixed-case>: Investigating Narrative Schemas through Annotated Entities - DanSimonson + DanSimonson AnthonyDavis 57–66 W16-5707 @@ -11661,10 +11661,10 @@ Proceedings of the Second Workshop on Computational Approaches to Code Switching W16-58 - MonaDiab + MonaDiab PascaleFung MahmoudGhoneim - JuliaHirschberg + JuliaHirschberg ThamarSolorio 10.18653/v1/W16-58 Association for Computational Linguistics @@ -11679,7 +11679,7 @@ Challenges of Computational Processing of Code-Switching - ÖzlemÇetinoğlu + ÖzlemÇetinoğlu SarahSchulz Ngoc ThangVu 1–11 @@ -11689,9 +11689,9 @@ Simple Tools for Exploring Variation in Code-switching for Linguists - Gualberto A.Guzman + Gualberto A.Guzman JacquelineSerigos - Barbara E.Bullock + Barbara E.Bullock Almeida JacquelineToribio 12–20 W16-5802 @@ -11703,7 +11703,7 @@ Word-Level Language Identification and Predicting Codeswitching Points in <fixed-case>S</fixed-case>wahili-<fixed-case>E</fixed-case>nglish Language Data MarioPiergallini RouzbehShirvani - GauriS. Gautam + GauriS. Gautam MohamedChouikha 21–29 W16-5803 @@ -11752,8 +11752,8 @@ A Neural Model for Language Identification in Code-Switched Tweets AaronJaech GeorgeMulcaire - MariOstendorf - Noah A.Smith + MariOstendorf + Noah A.Smith 60–64 W16-5807 10.18653/v1/W16-5807 @@ -11773,7 +11773,7 @@ Accurate <fixed-case>P</fixed-case>inyin-<fixed-case>E</fixed-case>nglish Codeswitched Language Identification Meng XuanXia - Jackie Chi KitCheung + Jackie Chi KitCheung 71–79 W16-5809 10.18653/v1/W16-5809 @@ -11840,7 +11840,7 @@ The <fixed-case>H</fixed-case>oward <fixed-case>U</fixed-case>niversity System Submission for the Shared Task in Language Identification in <fixed-case>S</fixed-case>panish-<fixed-case>E</fixed-case>nglish Codeswitching RouzbehShirvani MarioPiergallini - Gauri ShankarGautam + Gauri ShankarGautam MohamedChouikha 116–120 W16-5815 @@ -11849,7 +11849,7 @@ Codeswitching Detection via Lexical Features in Conditional Random Fields - PrajwolShrestha + PrajwolShrestha 121–126 W16-5816 10.18653/v1/W16-5816 @@ -11857,8 +11857,8 @@ Language Identification in Code-Switched Text Using Conditional Random Fields and <fixed-case>B</fixed-case>abelnet - Utpal KumarSikdar - BjörnGambäck + Utpal KumarSikdar + BjörnGambäck 127–131 W16-5817 10.18653/v1/W16-5817 @@ -11879,7 +11879,7 @@ W16-59 Kai-WeiChang Ming-WeiChang - AlexanderRush + AlexanderRush VivekSrikumar 10.18653/v1/W16-59 Association for Computational Linguistics @@ -11894,7 +11894,7 @@ Inside-Outside and Forward-Backward Algorithms Are Just Backprop (tutorial paper) - JasonEisner + JasonEisner 1–17 W16-5901 10.18653/v1/W16-5901 @@ -11903,7 +11903,7 @@ Research on attention memory networks as a model for learning natural language inference ZhuangLiu - DegenHuang + DegenHuang JingZhang KaiyuHuang 18–24 @@ -11923,7 +11923,7 @@ Posterior regularization for Joint Modeling of Multiple Structured Prediction Tasks with Soft Constraints KartikGoyal - ChrisDyer + ChrisDyer 35–43 W16-5904 10.18653/v1/W16-5904 @@ -11942,7 +11942,7 @@ Introducing <fixed-case>DRAIL</fixed-case> – a Step Towards Declarative Deep Relational Learning XiaoZhang - Maria LeonorPacheco + Maria LeonorPacheco ChangLi DanGoldwasser 54–62 @@ -11952,7 +11952,7 @@ Unsupervised Neural Hidden <fixed-case>M</fixed-case>arkov Models - Ke M.Tran + Ke M.Tran YonatanBisk AshishVaswani DanielMarcu @@ -11969,9 +11969,9 @@ W16-60 AnnieLouis MichaelRoth - BonnieWebber - MichaelWhite - LukeZettlemoyer + BonnieWebber + MichaelWhite + LukeZettlemoyer 10.18653/v1/W16-60 Association for Computational Linguistics
Austin, TX
@@ -11986,7 +11986,7 @@ An Analysis of Prerequisite Skills for Reading Comprehension SakuSugawara - AkikoAizawa + AkikoAizawa 1–5 W16-6001 10.18653/v1/W16-6001 @@ -12005,7 +12005,7 @@ Statistical Script Learning with Recurrent Neural Networks KarlPichotta - RaymondMooney + RaymondMooney 11–16 W16-6003 10.18653/v1/W16-6003 @@ -12025,7 +12025,7 @@ Unsupervised Event Coreference for Abstract Words DheerajRajagopal - EduardHovy + EduardHovy TerukoMitamura 22–26 W16-6005 @@ -12035,7 +12035,7 @@ Towards Broad-coverage Meaning Representation: The Case of Comparison Structures OmidBakhshandeh - JamesAllen + JamesAllen 27–31 W16-6006 10.18653/v1/W16-6006 @@ -12053,7 +12053,7 @@ <fixed-case>C</fixed-case>2<fixed-case>D</fixed-case>2<fixed-case>E</fixed-case>2: Using Call Centers to Motivate the Use of Dialog and Diarization in Entity Extraction - KenChurch + KenChurch WeizhongZhu JasonPelecanos 35–38 @@ -12065,7 +12065,7 @@ Visualizing the Content of a Children’s Story in a Virtual World: Lessons Learned Quynh Ngoc ThiDo StevenBethard - Marie-FrancineMoens + Marie-FrancineMoens 39–42 W16-6009 10.18653/v1/W16-6009 @@ -12074,7 +12074,7 @@ Stylistic Transfer in Natural Language Generation Systems Using Recurrent Neural Networks JadKabbara - Jackie Chi KitCheung + Jackie Chi KitCheung 43–47 W16-6010 10.18653/v1/W16-6010 @@ -12107,9 +12107,9 @@ Proceedings of the Seventh International Workshop on Health Text Mining and Information Analysis W16-61 CyrilGrouin - ThierryHamon - AurélieNévéol - PierreZweigenbaum + ThierryHamon + AurélieNévéol + PierreZweigenbaum 10.18653/v1/W16-61 Association for Computational Linguistics
Auxtin, TX
@@ -12179,8 +12179,8 @@ Retrofitting Word Vectors of <fixed-case>M</fixed-case>e<fixed-case>SH</fixed-case> Terms to Improve Semantic Similarity Measures ZhiguoYu - TrevorCohen - ByronWallace + TrevorCohen + ByronWallace ElmerBernstam ToddJohnson 43–51 @@ -12210,7 +12210,7 @@ Citation Analysis with Neural Attention Models TsendsurenMunkhdalai - John P.Lalor + John P.Lalor HongYu 69–77 W16-6109 @@ -12220,7 +12220,7 @@ Replicability of Research in Biomedical Natural Language Processing: a pilot evaluation for a coding task AurélieNévéol - KevinCohen + KevinCohen CyrilGrouin AudeRobert 78–84 @@ -12239,8 +12239,8 @@ Leveraging coreference to identify arms in medical abstracts: An experimental study ElisaFerracane - IainMarshall - Byron C.Wallace + IainMarshall + Byron C.Wallace KatrinErk 86–95 W16-6112 @@ -12259,7 +12259,7 @@ Exploring Query Expansion for Entity Searches in <fixed-case>P</fixed-case>ub<fixed-case>M</fixed-case>ed - Chung-ChiHuang + Chung-ChiHuang ZhiyongLu 106–112 W16-6114 @@ -12288,10 +12288,10 @@ Identifying and Categorizing Disaster-Related Tweets KevinStowe - Michael J.Paul - MarthaPalmer + Michael J.Paul + MarthaPalmer LeysiaPalen - KennethAnderson + KennethAnderson 1–6 W16-6201 10.18653/v1/W16-6201 @@ -12344,9 +12344,9 @@ Detecting Social Roles in <fixed-case>T</fixed-case>witter - Sunghwan MacKim + Sunghwan MacKim StephenWan - CécileParis + CécileParis 34–40 W16-6206 10.18653/v1/W16-6206 @@ -12354,7 +12354,7 @@ Identifying Sensible Participants in Online Discussions - SiddharthJain + SiddharthJain 41–47 W16-6207 10.18653/v1/W16-6207 @@ -12363,9 +12363,9 @@ emoji2vec: Learning Emoji Representations from their Description BenEisner - TimRocktäschel + TimRocktäschel IsabelleAugenstein - MatkoBošnjak + MatkoBošnjak SebastianRiedel 48–54 W16-6208 @@ -12376,7 +12376,7 @@ Learning Latent Local Conversation Modes for Predicting Comment Endorsement in Online Discussions HaoFang HaoCheng - MariOstendorf + MariOstendorf 55–64 W16-6209 10.18653/v1/W16-6209 @@ -12399,7 +12399,7 @@ Yu-LunHsieh Yung-ChunChang Chun-HanChu - Wen-LianHsu + Wen-LianHsu 74–83 W16-6211 10.18653/v1/W16-6211 @@ -12410,8 +12410,8 @@ AaronJaech GeorgeMulcaire ShobhitHathi - MariOstendorf - Noah A.Smith + MariOstendorf + Noah A.Smith 84–93 W16-6212 10.18653/v1/W16-6212 @@ -12421,7 +12421,7 @@ Human versus Machine Attention in Document Classification: A Dataset with Crowdsourced Annotations NikolaosPappas - AndreiPopescu-Belis + AndreiPopescu-Belis 94–100 W16-6213 10.18653/v1/W16-6213 @@ -12432,9 +12432,9 @@ Proceedings of the 13th International Conference on Natural Language Processing W16-63 - Dipti MisraSharma + Dipti MisraSharma RajeevSangal - Anil KumarSingh + Anil KumarSingh NLP Association of India
Varanasi, India
December @@ -12455,7 +12455,7 @@ Integrating <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et for Multiple Sense Embeddings in Vector Semantics DavidFoley - JugalKalita + JugalKalita 2–9 W16-6302 foley-kalita-2016-integrating @@ -12465,7 +12465,7 @@ DebajyotyBanik SukantaSen AsifEkbal - PushpakBhattacharyya + PushpakBhattacharyya 10–19 W16-6303 banik-etal-2016-smt @@ -12473,7 +12473,7 @@ Composition of Compound Nouns Using Distributional Semantics KyraYee - JugalKalita + JugalKalita 20–29 W16-6304 yee-kalita-2016-composition @@ -12499,7 +12499,7 @@ Sentence Based Discourse Classification for <fixed-case>H</fixed-case>indi Story Text-to-Speech (<fixed-case>TTS</fixed-case>) System KumudTripathi ParakrantSarkar - K. SreenivasaRao + K. SreenivasaRao 46–54 W16-6307 tripathi-etal-2016-sentence @@ -12508,7 +12508,7 @@ Biomolecular Event Extraction using a Stacked Generalization based Classifier AmitMajumder AsifEkbal - Sudip KumarNaskar + Sudip KumarNaskar 55–64 W16-6308 majumder-etal-2016-biomolecular @@ -12516,14 +12516,14 @@ Syntax and Pragmatics of Conversation: A Case of <fixed-case>B</fixed-case>angla SamirKarmakar - Soumya SankarGhosh + Soumya SankarGhosh 65–70 W16-6309 karmakar-ghosh-2016-syntax Dependency grammars as Haskell programs - TomaszObrębski + TomaszObrębski 71–80 W16-6310 obrebski-2016-dependency @@ -12535,7 +12535,7 @@ SabyasachiKamila AsifEkbal SriparnaSaha - PushpakBhattacharyya + PushpakBhattacharyya 81–89 W16-6311 kumar-etal-2016-improving @@ -12559,7 +12559,7 @@ Constraint Grammar-based conversion of Dependency Treebanks - EckhardBick + EckhardBick 109–114 W16-6314 bick-2016-constraint @@ -12568,7 +12568,7 @@ Meaning Matters: Senses of Words are More Informative than Words for Cross-domain Sentiment Analysis RakshaSharma SudhaBhingardive - PushpakBhattacharyya + PushpakBhattacharyya 115–119 W16-6315 sharma-etal-2016-meaning @@ -12576,7 +12576,7 @@ <fixed-case>POS</fixed-case> Tagging Experts via Topic Modeling AtreyeeMukherjee - SandraKübler + SandraKübler MatthiasScheutz 120–128 W16-6316 @@ -12586,7 +12586,7 @@ Graph theoretic interpretation of <fixed-case>B</fixed-case>angla traditional grammar SamirKarmakar SayantaniBanerjee - SoumyaGhosh + SoumyaGhosh 129–136 W16-6317 karmakar-etal-2016-graph @@ -12613,7 +12613,7 @@ ShreenivasBharadwaj MonikPamecha AmeyaPrabhu - ManishShrivastava + ManishShrivastava 154–160 W16-6320 athavale-etal-2016-towards @@ -12622,7 +12622,7 @@ <fixed-case>V</fixed-case>aidya: A Spoken Dialog System for Health Domain PrathyushaDanda Brij Mohan LalSrivastava - ManishShrivastava + ManishShrivastava 161–166 W16-6321 danda-etal-2016-vaidya @@ -12639,7 +12639,7 @@ Keynote Lecture 2: Neural (and other Machine Learning) Approaches to Text Normalization - RichardSproat + RichardSproat 177 W16-6323 sproat-2016-keynote @@ -12647,7 +12647,7 @@ Wisdom of Students: A Consistent Automatic Short Answer Grading Technique ShouryaRoy - SandipanDandapat + SandipanDandapat AjayNagesh Y.Narahari 178–187 @@ -12657,18 +12657,18 @@ A Recurrent Neural Network Architecture for De-identifying Clinical Records Shweta - AnkitKumar + AnkitKumar AsifEkbal SriparnaSaha - PushpakBhattacharyya + PushpakBhattacharyya 188–197 W16-6325 shweta-etal-2016-recurrent <fixed-case>T</fixed-case>witter Named Entity Extraction and Linking Using Differential Evolution - Utpal KumarSikdar - BjörnGambäck + Utpal KumarSikdar + BjörnGambäck 198–207 W16-6326 sikdar-gamback-2016-twitter @@ -12685,7 +12685,7 @@ A Computational Analysis of <fixed-case>M</fixed-case>ahabharata DebaratiDas BhaskarjyotiDas - KaviMahesh + KaviMahesh 219–228 W16-6328 das-etal-2016-computational @@ -12693,14 +12693,14 @@ Use of Features for Accentuation of ghañanta Words Samir JanardanSohoni - Malhar A.Kulkarni + Malhar A.Kulkarni 229–238 W16-6329 sohoni-kulkarni-2016-use Learning to Identify Subjective Sentences - Girish K.Palshikar + Girish K.Palshikar ManojApte DeepakPandita VikramSingh @@ -12710,10 +12710,10 @@ Opinion Mining in a Code-Mixed Environment: A Case Study with Government Portals - DeepakGupta + DeepakGupta AnkitLamba AsifEkbal - PushpakBhattacharyya + PushpakBhattacharyya 249–258 W16-6331 gupta-etal-2016-opinion @@ -12721,7 +12721,7 @@ Use of Semantic Knowledge Base for Enhancement of Coherence of Code-mixed Topic-Based Aspect Clusters KavitaAsnani - Jyoti DPawar + Jyoti DPawar 259–266 W16-6332 asnani-pawar-2016-use @@ -12729,7 +12729,7 @@ Genetic Algorithm (<fixed-case>GA</fixed-case>) Implementation for Feature Selection in <fixed-case>M</fixed-case>anipuri <fixed-case>POS</fixed-case> Tagging KishorjitNongmeikapam - SivajiBandyopadhyay + SivajiBandyopadhyay 267–274 W16-6333 nongmeikapam-bandyopadhyay-2016-genetic @@ -12754,8 +12754,8 @@ On Why Coarse Class Classification is Bottleneck in Noun Compound Interpretation GirishkumarPonkiya - PushpakBhattacharyya - Girish K.Palshikar + PushpakBhattacharyya + Girish K.Palshikar 293–298 W16-6336 ponkiya-etal-2016-coarse @@ -12767,8 +12767,8 @@ NandiniGhag JaiParanjape NileshJoshi - MalharKulkarni - PushpakBhattacharyya + MalharKulkarni + PushpakBhattacharyya 299–304 W16-6337 redkar-etal-2016-verbframator @@ -12807,9 +12807,9 @@ Proceedings of the 2nd Deep Machine Translation Workshop W16-64 - JanHajič - Gertjanvan Noord - AntónioBranco + JanHajič + Gertjanvan Noord + AntónioBranco ÚFAL MFF UK
Lisbon, Portugal
October @@ -12824,7 +12824,7 @@ <fixed-case>M</fixed-case>oses & Treex Hybrid <fixed-case>MT</fixed-case> Systems Bestiary RudolfRosa MartinPopel - OndřejBojar + OndřejBojar DavidMareček OndřejDušek W16-6401 @@ -12834,14 +12834,14 @@ Factoring Adjunction in Hierarchical Phrase-Based <fixed-case>SMT</fixed-case> SophieArnoult - KhalilSima’an + KhalilSima’an W16-6402 11-20 arnoult-simaan-2016-factoring A Hybrid Approach for Deep Machine Translation - KirilSimov + KirilSimov PetyaOsenova W16-6403 21-28 @@ -12852,7 +12852,7 @@ EleftheriosAvramidis VivienMacketanz AljoschaBurchardt - JindrichHelcl + JindrichHelcl HansUszkoreit W16-6404 29-38 @@ -12861,20 +12861,20 @@ Adding syntactic structure to bilingual terminology for improved domain adaptation MikelArtetxe - GorkaLabaka + GorkaLabaka ChakavehSaedi - JoãoRodrigues - JoãoSilva + JoãoRodrigues + JoãoSilva AntónioBranco - EnekoAgirre + EnekoAgirre W16-6405 39-46 artetxe-etal-2016-adding Incorporation of a valency lexicon into a <fixed-case>T</fixed-case>ecto<fixed-case>MT</fixed-case> pipeline - NataliaKlyueva - VladislavKuboň + NataliaKlyueva + VladislavKuboň W16-6406 47-53 klyueva-kubon-2016-incorporation @@ -12884,9 +12884,9 @@ Proceedings of the joint workshop on NLP for Computer Assisted Language Learning and NLP for Language Acquisition ElenaVolodina - GintarėGrigonytė + GintarėGrigonytė IldikóPilán - Kristina NilssonBjörkenstam + Kristina NilssonBjörkenstam LarsBorin LiU Electronic Press
Umeå, Sweden
@@ -12911,7 +12911,7 @@
Towards error annotation in a learner corpus of <fixed-case>P</fixed-case>ortuguese - Iriadel Río + Iriadel Río SandraAntunes AmáliaMendes MaartenJanssen @@ -12970,7 +12970,7 @@ Building a learner corpus for <fixed-case>R</fixed-case>ussian - EkaterinaRakhilina + EkaterinaRakhilina AnastasiaVyrenkova ElmiraMustakimova AlinaLadygina @@ -13041,10 +13041,10 @@ Generating summaries of hospitalizations: A new metric to assess the complexity of medical terms and their definitions SabitaAcharya BarbaraDi Eugenio - Andrew D.Boyd + Andrew D.Boyd KarenDunn Lopez RichardCameron - Gail MKeenan + Gail MKeenan 26–30 W16-6604 10.18653/v1/W16-6604 @@ -13052,7 +13052,7 @@ Designing Algorithms for Referring with Proper Names - Keesvan Deemter + Keesvan Deemter 31–35 W16-6605 10.18653/v1/W16-6605 @@ -13064,7 +13064,7 @@ VassilisPlachouras FrankSchilder HirokoBretz - JochenLeidner + JochenLeidner DezhaoSong 36–39 W16-6606 @@ -13082,8 +13082,8 @@ Abstractive Compression of Captions with Attentive Recurrent Neural Networks SanderWubben - EmielKrahmer - Antalvan den Bosch + EmielKrahmer + Antalvan den Bosch SuzanVerberne 41–50 W16-6608 @@ -13102,12 +13102,12 @@ Automatic label generation for news comment clusters AhmetAker - MonicaParamita - EminaKurtic + MonicaParamita + EminaKurtic AdamFunk EmmaBarker MarkHepple - RobGaizauskas + RobGaizauskas 61–69 W16-6610 10.18653/v1/W16-6610 @@ -13117,7 +13117,7 @@ Improving Fluency in Narrative Text Generation With Grammatical Transformations and Probabilistic Parsing EmilyAhn FabrizioMorbini - AndrewGordon + AndrewGordon 70–73 W16-6611 10.18653/v1/W16-6611 @@ -13127,7 +13127,7 @@ The Multilingual Affective Soccer Corpus (<fixed-case>MASC</fixed-case>): Compiling a biased parallel corpus on soccer reportage in <fixed-case>E</fixed-case>nglish, <fixed-case>G</fixed-case>erman and <fixed-case>D</fixed-case>utch NadineBraun MartijnGoudbeek - EmielKrahmer + EmielKrahmer 74–78 W16-6612 10.18653/v1/W16-6612 @@ -13135,7 +13135,7 @@ Challenges of Argument Mining: Generating an Argument Synthesis based on the Qualia Structure - PatrickSaint-Dizier + PatrickSaint-Dizier 79–83 W16-6613 10.18653/v1/W16-6613 @@ -13154,7 +13154,7 @@ Task demands and individual variation in referring expressions AdrianaBaltaretu - ThiagoCastro Ferreira + ThiagoCastro Ferreira 89–93 W16-6615 10.18653/v1/W16-6615 @@ -13173,7 +13173,7 @@ Evaluative Pattern Extraction for Automated Text Generation Chia-ChenLee - Shu-KaiHsieh + Shu-KaiHsieh 99–103 W16-6617 10.18653/v1/W16-6617 @@ -13182,7 +13182,7 @@ Statistics-Based Lexical Choice for <fixed-case>NLG</fixed-case> from Quantitative Information XiaoLi - Keesvan Deemter + Keesvan Deemter ChenghuaLin 104–108 W16-6618 @@ -13211,8 +13211,8 @@ Enabling text readability awareness during the micro planning phase of <fixed-case>NLG</fixed-case> applications PriscillaMoraes - KathleenMcCoy - SandraCarberry + KathleenMcCoy + SandraCarberry 121–131 W16-6621 10.18653/v1/W16-6621 @@ -13238,8 +13238,8 @@ Statistical Natural Language Generation from Tabular Non-textual Data JoyMahapatra - Sudip KumarNaskar - SivajiBandyopadhyay + Sudip KumarNaskar + SivajiBandyopadhyay 143–152 W16-6624 10.18653/v1/W16-6624 @@ -13249,7 +13249,7 @@ Paraphrase Generation from Latent-Variable <fixed-case>PCFG</fixed-case>s for Semantic Parsing ShashiNarayan SivaReddy - Shay B.Cohen + Shay B.Cohen 153–162 W16-6625 10.18653/v1/W16-6625 @@ -13280,7 +13280,7 @@ A Challenge Proposal for Narrative Generation Using <fixed-case>CNL</fixed-case>s EugenioConcepción GonzaloMéndez - PabloGervás + PabloGervás CarlosLeón 171–173 W16-6628 @@ -13298,7 +13298,7 @@ <fixed-case>S</fixed-case>imple<fixed-case>NLG</fixed-case>-<fixed-case>IT</fixed-case>: adapting <fixed-case>S</fixed-case>imple<fixed-case>NLG</fixed-case> to <fixed-case>I</fixed-case>talian - AlessandroMazzei + AlessandroMazzei CristinaBattaglino CristinaBosco 184–192 @@ -13309,7 +13309,7 @@ Don’t Mention the Shoe! A Learning to Rank Approach to Content Selection for Image Description Generation JosiahWang - RobertGaizauskas + RobertGaizauskas 193–202 W16-6631 10.18653/v1/W16-6631 @@ -13354,9 +13354,9 @@ Towards proper name generation: a corpus analysis - ThiagoCastro Ferreira + ThiagoCastro Ferreira SanderWubben - EmielKrahmer + EmielKrahmer 222–226 W16-6636 10.18653/v1/W16-6636 @@ -13373,8 +13373,8 @@ Enhancing <fixed-case>PTB</fixed-case> <fixed-case>U</fixed-case>niversal <fixed-case>D</fixed-case>ependencies for Grammar-Based Surface Realization - David L.King - MichaelWhite + David L.King + MichaelWhite 232–236 W16-6638 10.18653/v1/W16-6638 @@ -13382,7 +13382,7 @@ Effect of Data Annotation, Feature Selection and Model Choice on Spatial Description Generation in <fixed-case>F</fixed-case>rench - AnjaBelz + AnjaBelz AdrianMuscat BrandonBirmingham JessieLevacher @@ -13396,8 +13396,8 @@ <fixed-case>QGASP</fixed-case>: a Framework for Question Generation Based on Different Levels of Linguistic Information HugoPatinho Rodrigues - LuísaCoheur - EricNyberg + LuísaCoheur + EricNyberg 242–243 W16-6640 10.18653/v1/W16-6640 @@ -13405,7 +13405,7 @@ Automatic Reports from Spreadsheets: Data Analysis for the Rest of Us - PabloDuboue + PabloDuboue 244–245 W16-6641 10.18653/v1/W16-6641 @@ -13413,7 +13413,7 @@ Towards Generating Colour Terms for Referents in Photographs: Prefer the Expected or the Unexpected? - SinaZarrieß + SinaZarrieß DavidSchlangen 246–255 W16-6642 @@ -13423,7 +13423,7 @@ Absolute and Relative Properties in Geographic Referring Expressions Rodrigode Oliveira - SomayajuluSripada + SomayajuluSripada EhudReiter 256–264 W16-6643 diff --git a/data/xml/W17.xml b/data/xml/W17.xml index 7bccdc8291..129754f13d 100644 --- a/data/xml/W17.xml +++ b/data/xml/W17.xml @@ -26,7 +26,7 @@ DustinBowers AnttiArppe JordanLachler - SjurMoshagen + SjurMoshagen TrondTrosterud 1–9 W17-0101 @@ -72,8 +72,8 @@ <fixed-case>STREAMLI</fixed-case>n<fixed-case>ED</fixed-case> Challenges: Aligning Research Interests with Shared Tasks - Gina-AnneLevow - Emily M.Bender + Gina-AnneLevow + Emily M.Bender PatrickLittell KristenHowell ShobhanaChelliah @@ -122,7 +122,7 @@ Inferring Case Systems from <fixed-case>IGT</fixed-case>: Enriching the Enrichment KristenHowell - Emily M.Bender + Emily M.Bender MichelLockwood FeiXia OlgaZamaraeva @@ -136,8 +136,8 @@ JordanKodner SpencerCaplan HongzhiXu - Mitchell P.Marcus - CharlesYang + Mitchell P.Marcus + CharlesYang 76–84 W17-0111 10.18653/v1/W17-0111 @@ -198,7 +198,7 @@ Computational Support for Finding Word Classes: A Case Study of <fixed-case>A</fixed-case>bui OlgaZamaraeva FrantišekKratochvíl - Emily M.Bender + Emily M.Bender FeiXia KristenHowell 130–140 @@ -218,7 +218,7 @@ Connecting Documentation and Revitalization: A New Approach to Language Apps - Alexa N.Little + Alexa N.Little 151–155 W17-0120 10.18653/v1/W17-0120 @@ -255,7 +255,7 @@ Proceedings of the 21st Nordic Conference on Computational Linguistics W17-02 - JörgTiedemann + JörgTiedemann NinaTahmasebi Association for Computational Linguistics
Gothenburg, Sweden
@@ -270,7 +270,7 @@ Joint <fixed-case>UD</fixed-case> Parsing of <fixed-case>N</fixed-case>orwegian <fixed-case>B</fixed-case>okmål and <fixed-case>N</fixed-case>ynorsk ErikVelldal - LiljaØvrelid + LiljaØvrelid PetterHohle 1–10 W17-0201 @@ -280,7 +280,7 @@ Replacing <fixed-case>OOV</fixed-case> Words For Dependency Parsing With Distributional Semantics PrasanthKolachina MartinRiedl - ChrisBiemann + ChrisBiemann 11–19 W17-0202 kolachina-etal-2017-replacing @@ -336,7 +336,7 @@ <fixed-case>OCR</fixed-case> and post-correction of historical <fixed-case>F</fixed-case>innish texts SenkaDrobac PekkaKauppinen - KristerLindén + KristerLindén 70–76 W17-0209 drobac-etal-2017-ocr @@ -345,7 +345,7 @@ <fixed-case>T</fixed-case>witter Topic Modeling by Tweet Aggregation AsbjørnSteinskog JonasTherkelsen - BjörnGambäck + BjörnGambäck 77–86 W17-0210 steinskog-etal-2017-twitter @@ -369,8 +369,8 @@ Using Pseudowords for Algorithm Comparison: An Evaluation Framework for Graph-based Word Sense Induction - FlavioMassimiliano Cecchini - ChrisBiemann + FlavioMassimiliano Cecchini + ChrisBiemann MartinRiedl 105–114 W17-0213 @@ -378,8 +378,8 @@ <fixed-case>N</fixed-case>orth-<fixed-case>S</fixed-case>ámi to <fixed-case>F</fixed-case>innish rule-based machine translation system - TommiPirinen - Francis M.Tyers + TommiPirinen + Francis M.Tyers TrondTrosterud RyanJohnson KevinUnhammer @@ -396,7 +396,7 @@ SandraNystø Rahka Marja-LiisaOlthuis TrondTrosterud - Francis M.Tyers + Francis M.Tyers 123–131 W17-0215 antonsen-etal-2017-machine @@ -404,7 +404,7 @@ <fixed-case>SWEGRAM</fixed-case> – A Web-Based Tool for Automatic Annotation and Analysis of <fixed-case>S</fixed-case>wedish Texts JesperNäsman - BeátaMegyesi + BeátaMegyesi AnnePalmér 132–141 W17-0216 @@ -413,7 +413,7 @@ Optimizing a <fixed-case>P</fixed-case>o<fixed-case>S</fixed-case> Tagset for <fixed-case>N</fixed-case>orwegian Dependency Parsing PetterHohle - LiljaØvrelid + LiljaØvrelid ErikVelldal 142–151 W17-0217 @@ -451,7 +451,7 @@ Evaluation of language identification methods using 285 languages TommiJauhiainen - KristerLindén + KristerLindén HeidiJauhiainen 183–191 W17-0221 @@ -460,14 +460,14 @@ Can We Create a Tool for General Domain Event Analysis? SiimOrasmaa - Heiki-JaanKaalep + Heiki-JaanKaalep 192–201 W17-0222 orasmaa-kaalep-2017-create From Treebank to <fixed-case>P</fixed-case>ropbank: A Semantic-Role and <fixed-case>V</fixed-case>erb<fixed-case>N</fixed-case>et Corpus for <fixed-case>D</fixed-case>anish - EckhardBick + EckhardBick 202–210 W17-0223 bick-2017-treebank @@ -516,7 +516,7 @@ SteinþórSteingrímsson JónGuðnason SigrúnHelgadóttir - EiríkurRögnvaldsson + EiríkurRögnvaldsson 237–240 W17-0229 steingrimsson-etal-2017-malromur @@ -541,9 +541,9 @@ A modernised version of the Glossa corpus search system AndersNøklestad KristinHagen - JanneBondi Johannessen + JanneBondi Johannessen MichałKosek - JoelPriestley + JoelPriestley 251–254 W17-0232 noklestad-etal-2017-modernised @@ -628,7 +628,7 @@ <fixed-case>W</fixed-case>ordnet extension via word embeddings: Experiments on the <fixed-case>N</fixed-case>orwegian <fixed-case>W</fixed-case>ordnet HeidiSand ErikVelldal - LiljaØvrelid + LiljaØvrelid 298–302 W17-0242 sand-etal-2017-wordnet @@ -638,7 +638,7 @@ RobertÖstling CarlBörstell MoaGärdenfors - MatsWirén + MatsWirén 303–308 W17-0243 ostling-etal-2017-universal @@ -649,7 +649,7 @@ EvelinaRennes DanielFahlborg VidaJohansson - ArneJönsson + ArneJönsson 309–313 W17-0244 falkenjack-etal-2017-services @@ -664,7 +664,7 @@ <fixed-case>TALERUM</fixed-case> - Learning <fixed-case>D</fixed-case>anish by Doing <fixed-case>D</fixed-case>anish - PeterJuel Henrichsen + PeterJuel Henrichsen 318–321 W17-0246 juel-henrichsen-2017-talerum @@ -683,7 +683,7 @@ VictoriaRosén HelgeDyvik PaulMeurer - KoenraadDe Smedt + KoenraadDe Smedt 326–329 W17-0248 rosen-etal-2017-exploring @@ -704,9 +704,9 @@ Proceedings of the joint workshop on NLP for Computer Assisted Language Learning and NLP for Language Acquisition ElenaVolodina - GintarėGrigonytė + GintarėGrigonytė IldikóPilán - Kristina NilssonBjörkenstam + Kristina NilssonBjörkenstam LarsBorin LiU Electronic Press
Gothenburg, Sweden
@@ -729,7 +729,7 @@ Challenging learners in their individual zone of proximal development using pedagogic developmental benchmarks of syntactic complexity XiaobinChen - DetmarMeurers + DetmarMeurers 8-17 W17-0302 chen-meurers-2017-challenging @@ -753,10 +753,10 @@ Developing a web-based workbook for <fixed-case>E</fixed-case>nglish supporting the interaction of students and teachers - BjörnRudzewitz + BjörnRudzewitz RamonZiai KordulaDe Kuthy - DetmarMeurers + DetmarMeurers 36-46 W17-0305 rudzewitz-etal-2017-developing @@ -765,7 +765,7 @@ Annotating errors in student texts: First experiences and experiments SaraStymne EvaPettersson - BeátaMegyesi + BeátaMegyesi AnnePalmér 47-60 W17-0306 @@ -785,7 +785,7 @@ Proceedings of the NoDaLiDa 2017 Workshop on Universal Dependencies (UDW 2017) W17-04 - Marie-Catherinede Marneffe + Marie-Catherinede Marneffe JoakimNivre SebastianSchuster Association for Computational Linguistics @@ -800,7 +800,7 @@ Cross-Lingual Parser Selection for Low-Resource Languages - ŽeljkoAgić + ŽeljkoAgić 1–10 W17-0401 agic-2017-cross @@ -815,7 +815,7 @@ Increasing Return on Annotation Investment: The Automatic Construction of a <fixed-case>U</fixed-case>niversal <fixed-case>D</fixed-case>ependency Treebank for <fixed-case>D</fixed-case>utch GosseBouma - Gertjanvan Noord + Gertjanvan Noord 19–26 W17-0403 bouma-van-noord-2017-increasing @@ -824,7 +824,7 @@ Converting the <fixed-case>T</fixed-case>ü<fixed-case>B</fixed-case>a-<fixed-case>D</fixed-case>/<fixed-case>Z</fixed-case> Treebank of <fixed-case>G</fixed-case>erman to <fixed-case>U</fixed-case>niversal <fixed-case>D</fixed-case>ependencies ÇağrıÇöltekin BenCampbell - ErhardHinrichs + ErhardHinrichs HeikeTelljohann 27–37 W17-0404 @@ -835,7 +835,7 @@ PeterDirix LiesbethAugustinus Danielvan Niekerk - FrankVan Eynde + FrankVan Eynde 38–47 W17-0405 dirix-etal-2017-universal @@ -843,7 +843,7 @@ Elliptic Constructions: Spotting Patterns in <fixed-case>UD</fixed-case> Treebanks KiraDroganova - DanielZeman + DanielZeman 48–57 W17-0406 droganova-zeman-2017-elliptic @@ -858,7 +858,7 @@ Towards <fixed-case>U</fixed-case>niversal <fixed-case>D</fixed-case>ependencies for Learner <fixed-case>C</fixed-case>hinese - JohnLee + JohnLee HermanLeung KeyingLi 67–71 @@ -893,7 +893,7 @@ <fixed-case>U</fixed-case>dapi: Universal <fixed-case>API</fixed-case> for <fixed-case>U</fixed-case>niversal <fixed-case>D</fixed-case>ependencies MartinPopel - ZdeněkŽabokrtský + ZdeněkŽabokrtský MartinVojtek 96–101 W17-0412 @@ -902,7 +902,7 @@ <fixed-case>U</fixed-case>niversal <fixed-case>D</fixed-case>ependencies for <fixed-case>G</fixed-case>reek ProkopisProkopidis - HarisPapageorgiou + HarisPapageorgiou 102–106 W17-0413 prokopidis-papageorgiou-2017-universal @@ -918,7 +918,7 @@ Empirically Sampling <fixed-case>U</fixed-case>niversal <fixed-case>D</fixed-case>ependencies NatalieSchluter - ŽeljkoAgić + ŽeljkoAgić 117–122 W17-0415 schluter-agic-2017-empirically @@ -927,7 +927,7 @@ Gapping Constructions in <fixed-case>U</fixed-case>niversal <fixed-case>D</fixed-case>ependencies v2 SebastianSchuster MatthewLamm - Christopher D.Manning + Christopher D.Manning 123–132 W17-0416 schuster-etal-2017-gapping @@ -935,14 +935,14 @@ Toward <fixed-case>U</fixed-case>niversal <fixed-case>D</fixed-case>ependencies for <fixed-case>A</fixed-case>inu HajimeSenuma - AkikoAizawa + AkikoAizawa 133–139 W17-0417 senuma-aizawa-2017-toward Automatic Morpheme Segmentation and Labeling in <fixed-case>U</fixed-case>niversal <fixed-case>D</fixed-case>ependencies Resources - MiikkaSilfverberg + MiikkaSilfverberg MansHulden 140–145 W17-0418 @@ -984,7 +984,7 @@ Improving <fixed-case>POS</fixed-case> Tagging in <fixed-case>O</fixed-case>ld <fixed-case>S</fixed-case>panish Using <fixed-case>TEITOK</fixed-case> MaartenJanssen JosepAusensi - JosepFontana + JosepFontana 2–6 W17-0502 janssen-etal-2017-improving @@ -1009,7 +1009,7 @@ Ambiguity in Semantically Related Word Substitutions: an investigation in historical <fixed-case>B</fixed-case>ible translations - MariaMoritz + MariaMoritz MarcoBüchler 18–23 W17-0505 @@ -1031,7 +1031,7 @@ MichaelHund FrederikDennig MiriamButt - DanielKeim + DanielKeim 32–39 W17-0507 schatzle-etal-2017-histobankvis @@ -1048,7 +1048,7 @@ Data-driven Morphology and Sociolinguistics for Early <fixed-case>M</fixed-case>odern <fixed-case>D</fixed-case>utch MarijnSchraagen - Marjovan Koppen + Marjovan Koppen FeikeDietz 47–53 W17-0509 @@ -1071,9 +1071,9 @@ Proceedings of the Third Workshop on Computational Linguistics for Uralic Languages W17-06 - Francis M.Tyers + Francis M.Tyers MichaelRießler - Tommi A.Pirinen + Tommi A.Pirinen TrondTrosterud 10.18653/v1/W17-06 Association for Computational Linguistics @@ -1177,8 +1177,8 @@ cmcl TedGibson TalLinzen - AsadSayeed - Martenvan Schijndel + AsadSayeed + Martenvan Schijndel WilliamSchuler @@ -1190,7 +1190,7 @@ MatthewNelson StanislasDehaene ChristophePallier - JohnHale + JohnHale 1–10 W17-0701 10.18653/v1/W17-0701 @@ -1200,7 +1200,7 @@ Learning an Input Filter for Argument Structure Acquisition LaurelPerkins - NaomiFeldman + NaomiFeldman JeffreyLidz 11–19 W17-0702 @@ -1211,7 +1211,7 @@ Grounding sound change in ideal observer models of perception ZacharyBurchill - T. FlorianJaeger + T. FlorianJaeger 20–28 W17-0703 10.18653/v1/W17-0703 @@ -1265,7 +1265,7 @@ Readers vs. Writers vs. Texts: Coping with Different Perspectives of Text Understanding in Emotion Annotation - SvenBuechel + SvenBuechel UdoHahn 1–12 W17-0801 @@ -1276,7 +1276,7 @@ Finding Good Conversations Online: The <fixed-case>Y</fixed-case>ahoo <fixed-case>N</fixed-case>ews Annotated Comments Corpus CourtneyNapoles - JoelTetreault + JoelTetreault AasishPappu EnricaRosato BrianProvenzale @@ -1298,7 +1298,7 @@ A Code-Switching Corpus of <fixed-case>T</fixed-case>urkish-<fixed-case>G</fixed-case>erman Conversations - ÖzlemÇetinoğlu + ÖzlemÇetinoğlu 34–40 W17-0804 10.18653/v1/W17-0804 @@ -1307,9 +1307,9 @@ Annotating omission in statement pairs - HéctorMartínez Alonso + HéctorMartínez Alonso AmauryDelamaire - BenoîtSagot + BenoîtSagot 41–45 W17-0805 10.18653/v1/W17-0805 @@ -1336,7 +1336,7 @@ ChihoToyoshima MayukaYamamoto KyoKageura - AnthonyHartley + AnthonyHartley 57–66 W17-0807 10.18653/v1/W17-0807 @@ -1346,7 +1346,7 @@ Representation and Interchange of Linguistic Annotation. An In-Depth, Side-by-Side Comparison of Three Designs RichardEckart de Castilho - NancyIde + NancyIde EmanueleLapponi StephanOepen KeithSuderman @@ -1374,7 +1374,7 @@ MartinTutek JanŠnajder GoranGlavaš - BojanaDalbelo Bašić + BojanaDalbelo Bašić NatašaMilić-Frayling 82–90 W17-0810 @@ -1388,7 +1388,7 @@ ArihantGupta AvijitVajpayee ArjitSrivastava - ManishShrivastava + ManishShrivastava 91–94 W17-0811 10.18653/v1/W17-0811 @@ -1398,8 +1398,8 @@ The <fixed-case>BEC</fixed-case>au<fixed-case>SE</fixed-case> Corpus 2.0: Annotating Causality and Overlapping Relations JesseDunietz - LoriLevin - JaimeCarbonell + LoriLevin + JaimeCarbonell 95–104 W17-0812 10.18653/v1/W17-0812 @@ -1436,7 +1436,7 @@ W17-09 MichaelRoth NasrinMostafazadeh - NathanaelChambers + NathanaelChambers AnnieLouis 10.18653/v1/W17-09 Association for Computational Linguistics @@ -1470,7 +1470,7 @@ OriShapira ShyamUpadhyay DanRoth - EugenioMartinez Camara + EugenioMartinez Camara IrynaGurevych IdoDagan 12–24 @@ -1515,7 +1515,7 @@ MichaelRoth AnnieLouis NathanaelChambers - JamesAllen + JamesAllen 46–51 W17-0906 10.18653/v1/W17-0906 @@ -1529,7 +1529,7 @@ IoannisKonstas LeilaZilles YejinChoi - Noah A.Smith + Noah A.Smith 52–55 W17-0907 10.18653/v1/W17-0907 @@ -1542,8 +1542,8 @@ YevgeniyPuzikov AndreasRücklé JudithEckle-Kohler - TeresaMartin - EugenioMartínez-Cámara + TeresaMartin + EugenioMartínez-Cámara DaniilSorokin MaximePeyrard IrynaGurevych @@ -1578,7 +1578,7 @@ MelissaRoemmele SosukeKobayashi NaoyaInoue - AndrewGordon + AndrewGordon 74–80 W17-0911 10.18653/v1/W17-0911 @@ -1588,7 +1588,7 @@ <fixed-case>IIT</fixed-case> (<fixed-case>BHU</fixed-case>): System Description for <fixed-case>LSDS</fixed-case>em’17 Shared Task PranavGoel - Anil KumarSingh + Anil KumarSingh 81–86 W17-0912 10.18653/v1/W17-0912 @@ -1612,11 +1612,11 @@ W17-10 GeorgeGiannakopoulos ElenaLloret - John M.Conroy + John M.Conroy JosefSteinberger MarinaLitvak - PeterRankel - BenoitFavre + PeterRankel + BenoitFavre 10.18653/v1/W17-10 Association for Computational Linguistics
Valencia, Spain
@@ -1648,8 +1648,8 @@ Decoupling Encoder and Decoder Networks for Abstractive Document Summarization YingXu Jey HanLau - TimothyBaldwin - TrevorCohn + TimothyBaldwin + TrevorCohn 7–11 W17-1002 10.18653/v1/W17-1002 @@ -1660,7 +1660,7 @@ Centroid-based Text Summarization through Compositionality of Word Embeddings GaetanoRossiello PierpaoloBasile - GiovanniSemeraro + GiovanniSemeraro 12–21 W17-1003 10.18653/v1/W17-1003 @@ -1692,8 +1692,8 @@ Ultra-Concise Multi-genre Summarisation of Web2.0: towards Intelligent Content Generation ElenaLloret EsterBoldrini - PatricioMartínez-Barco - ManuelPalomar + PatricioMartínez-Barco + ManuelPalomar 37–46 W17-1006 10.18653/v1/W17-1006 @@ -1702,9 +1702,9 @@
Machine Learning Approach to Evaluate <fixed-case>M</fixed-case>ulti<fixed-case>L</fixed-case>ingual Summaries - SamiraEllouze + SamiraEllouze MaherJaoua - LamiaHadrich Belguith + LamiaHadrich Belguith 47–54 W17-1007 10.18653/v1/W17-1007 @@ -1754,9 +1754,9 @@ Potential and Limitations of Cross-Domain Sentiment Classification - Jan MilanDeriu + Jan MilanDeriu MartinWeilenmann - DirkVon Gruenigen + DirkVon Gruenigen MarkCieliebak 17–24 W17-1103 @@ -1769,7 +1769,7 @@ KevinMcKelvey PeterGoutzounis Stephenda Cruz - NathanaelChambers + NathanaelChambers 25–35 W17-1104 10.18653/v1/W17-1104 @@ -1790,7 +1790,7 @@ A <fixed-case>T</fixed-case>witter Corpus and Benchmark Resources for <fixed-case>G</fixed-case>erman Sentiment Analysis MarkCieliebak - Jan MilanDeriu + Jan MilanDeriu DominicEgger FatihUzdilli 45–51 @@ -1804,11 +1804,11 @@ Proceedings of the Fourth Workshop on NLP for Similar Languages, Varieties and Dialects (VarDial) W17-12 - PreslavNakov + PreslavNakov MarcosZampieri NikolaLjubešić - JörgTiedemann - ShevinMalmasi + JörgTiedemann + ShevinMalmasi AhmedAli 10.18653/v1/W17-12 Association for Computational Linguistics @@ -1888,7 +1888,7 @@ Why <fixed-case>C</fixed-case>atalan-<fixed-case>S</fixed-case>panish Neural Machine Translation? Analysis, comparison and combination with standard Rule and Phrase-based technologies - Marta R.Costa-jussà + Marta R.Costa-jussà 55–62 W17-1207 10.18653/v1/W17-1207 @@ -1938,7 +1938,7 @@ Evaluating <fixed-case>H</fixed-case>e<fixed-case>LI</fixed-case> with Non-Linear Mappings TommiJauhiainen - KristerLindén + KristerLindén HeidiJauhiainen 102–108 W17-1212 @@ -1949,8 +1949,8 @@ A Perplexity-Based Method for Similar Languages Discrimination PabloGamallo - Jose RamomPichel - IñakiAlegria + Jose RamomPichel + IñakiAlegria 109–114 W17-1213 10.18653/v1/W17-1213 @@ -1969,7 +1969,7 @@ Discriminating between Similar Languages with Word-level Convolutional Neural Networks MarceloCriscuolo - Sandra MariaAluísio + Sandra MariaAluísio 124–130 W17-1215 10.18653/v1/W17-1215 @@ -1987,11 +1987,11 @@ Discriminating between Similar Languages Using a Combination of Typed and Untyped Character N-grams and Words - HelenaGomez + HelenaGomez IliaMarkov JorgeBaptista GrigoriSidorov - DavidPinto + DavidPinto 137–145 W17-1217 10.18653/v1/W17-1217 @@ -2012,7 +2012,7 @@ When Sparse Traditional Models Outperform Dense Neural Networks: the Curious Case of Discriminating between Similar Languages MariaMedvedeva MartinKroon - BarbaraPlank + BarbaraPlank 156–163 W17-1219 10.18653/v1/W17-1219 @@ -2061,7 +2061,7 @@ Exploring Lexical and Syntactic Features for Language Variety Identification Chrisvan der Lee - Antalvan den Bosch + Antalvan den Bosch 190–199 W17-1224 10.18653/v1/W17-1224 @@ -2071,7 +2071,7 @@ Learning to Identify <fixed-case>A</fixed-case>rabic and <fixed-case>G</fixed-case>erman Dialects using Multiple Kernels Radu TudorIonescu - AndreiButnaru + AndreiButnaru 200–209 W17-1225 10.18653/v1/W17-1225 @@ -2081,9 +2081,9 @@ <fixed-case>S</fixed-case>lavic Forest, <fixed-case>N</fixed-case>orwegian Wood RudolfRosa - DanielZeman + DanielZeman DavidMareček - ZdeněkŽabokrtský + ZdeněkŽabokrtský 210–219 W17-1226 10.18653/v1/W17-1226 @@ -2096,9 +2096,9 @@ Proceedings of the Third Arabic Natural Language Processing Workshop W17-13 NizarHabash - MonaDiab + MonaDiab KareemDarwish - WassimEl-Hajj + WassimEl-Hajj HendAl-Khalifa HoudaBouamor NadiTomeh @@ -2187,7 +2187,7 @@ SalimaMedhaffar FethiBougares YannickEstève - LamiaHadrich-Belguith + LamiaHadrich-Belguith 55–61 W17-1307 10.18653/v1/W17-1307 @@ -2201,7 +2201,7 @@ AhmadGhandour ShadyElbassuoni HazemHajj - KhaledShaban + KhaledShaban 62–71 W17-1308 10.18653/v1/W17-1308 @@ -2211,7 +2211,7 @@ A New Error Annotation for Dyslexic texts in <fixed-case>A</fixed-case>rabic MahaAlamri - William JTeahan + William JTeahan 72–78 W17-1309 10.18653/v1/W17-1309 @@ -2225,7 +2225,7 @@ AbdullahM. Mousa MostafaElhosiny SherifAbdou - MohsenRashwan + MohsenRashwan 79–83 W17-1310 10.18653/v1/W17-1310 @@ -2275,7 +2275,7 @@ HazemHajj WassimEl-Hajj NizarHabash - KhaledShaban + KhaledShaban 110–118 W17-1314 10.18653/v1/W17-1314 @@ -2333,7 +2333,7 @@ DominiqueFohr DenisJouvet DavidLanglois - KamelSmaili + KamelSmaili 157–165 W17-1319 10.18653/v1/W17-1319 @@ -2344,7 +2344,7 @@ <fixed-case>U</fixed-case>niversal <fixed-case>D</fixed-case>ependencies for <fixed-case>A</fixed-case>rabic DimaTaji NizarHabash - DanielZeman + DanielZeman 166–176 W17-1320 10.18653/v1/W17-1320 @@ -2377,7 +2377,7 @@ Proceedings of the 6th Workshop on Balto-Slavic Natural Language Processing W17-14 - TomažErjavec + TomažErjavec JakubPiskorski LidiaPivovarova JanŠnajder @@ -2427,7 +2427,7 @@ Projecting Multiword Expression Resources on a <fixed-case>P</fixed-case>olish Treebank AgataSavary - JakubWaszczuk + JakubWaszczuk 20–26 W17-1404 10.18653/v1/W17-1404 @@ -2457,9 +2457,9 @@ <fixed-case>U</fixed-case>niversal <fixed-case>D</fixed-case>ependencies for <fixed-case>S</fixed-case>erbian in Comparison with <fixed-case>C</fixed-case>roatian and Other <fixed-case>S</fixed-case>lavic Languages - TanjaSamardžić + TanjaSamardžić MirjanaStarović - ŽeljkoAgić + ŽeljkoAgić NikolaLjubešić 39–44 W17-1407 @@ -2606,7 +2606,7 @@ Use Generalized Representations, But Do Not Forget Surface Features - Nafise SadatMoosavi + Nafise SadatMoosavi MichaelStrube 1–7 W17-1501 @@ -2618,8 +2618,8 @@ Enriching <fixed-case>B</fixed-case>asque Coreference Resolution System using Semantic Knowledge sources AnderSoraluze OlatzArregi - XabierArregi - ArantzaDíaz de Ilarraza + XabierArregi + ArantzaDíaz de Ilarraza 8–16 W17-1502 10.18653/v1/W17-1502 @@ -2648,8 +2648,8 @@ Using Coreference Links to Improve <fixed-case>S</fixed-case>panish-to-<fixed-case>E</fixed-case>nglish Machine Translation - LeslyMiculicich Werlen - AndreiPopescu-Belis + LeslyMiculicich Werlen + AndreiPopescu-Belis 30–40 W17-1505 10.18653/v1/W17-1505 @@ -2679,7 +2679,7 @@ Projection-based Coreference Resolution Using Deep Syntax MichalNovák AnnaNedoluzhko - ZdeněkŽabokrtský + ZdeněkŽabokrtský 56–64 W17-1508 10.18653/v1/W17-1508 @@ -2692,11 +2692,11 @@ Proceedings of the First ACL Workshop on Ethics in Natural Language Processing W17-16 DirkHovy - ShannonSpruit + ShannonSpruit MargaretMitchell - Emily M.Bender + Emily M.Bender MichaelStrube - HannaWallach + HannaWallach 10.18653/v1/W17-16 Association for Computational Linguistics
Valencia, Spain
@@ -2738,7 +2738,7 @@
Ethical by Design: Ethics Best Practices for Natural Language Processing - Jochen L.Leidner + Jochen L.Leidner VassilisPlachouras 30–40 W17-1604 @@ -2781,7 +2781,7 @@ Ethical Considerations in <fixed-case>NLP</fixed-case> Shared Tasks - CarlaParra Escartín + CarlaParra Escartín WesselReijers TeresaLynn JossMoorkens @@ -2806,9 +2806,9 @@ A Short Review of Ethical Challenges in Clinical Natural Language Processing - SimonŠuster + SimonŠuster StéphanTulkens - WalterDaelemans + WalterDaelemans 80–87 W17-1610 10.18653/v1/W17-1610 @@ -2840,7 +2840,7 @@ ChareseSmiley FrankSchilder VassilisPlachouras - Jochen L.Leidner + Jochen L.Leidner 103–108 W17-1613 10.18653/v1/W17-1613 @@ -2852,7 +2852,7 @@ Proceedings of the 13th Workshop on Multiword Expressions (MWE 2017) W17-17 - StellaMarkantonatou + StellaMarkantonatou CarlosRamisch AgataSavary VeronikaVincze @@ -2869,7 +2869,7 @@ <fixed-case>P</fixed-case>ara<fixed-case>D</fixed-case>i: Dictionary of Paraphrases of <fixed-case>C</fixed-case>zech Complex Predicates with Light Verbs - PetraBarančíková + PetraBarančíková VáclavaKettnerová 1–10 W17-1701 @@ -2892,8 +2892,8 @@ Using bilingual word-embeddings for multilingual collocation extraction MarcosGarcia - MarcosGarcía-Salido - MargaritaAlonso-Ramos + MarcosGarcía-Salido + MargaritaAlonso-Ramos 21–30 W17-1703 10.18653/v1/W17-1703 @@ -2904,13 +2904,13 @@ The <fixed-case>PARSEME</fixed-case> Shared Task on Automatic Identification of Verbal Multiword Expressions AgataSavary CarlosRamisch - SilvioCordeiro + SilvioCordeiro FedericoSangati VeronikaVincze - BehrangQasemiZadeh - MarieCandito + BehrangQasemiZadeh + MarieCandito FabienneCap - VoulaGiouli + VoulaGiouli IvelinaStoyanova AntoineDoucet 31–47 @@ -2921,7 +2921,7 @@ <fixed-case>US</fixed-case>zeged: Identifying Verbal Multiword Expressions with <fixed-case>POS</fixed-case> Tagging and Parsing Techniques - Katalin IlonaSimkó + Katalin IlonaSimkó ViktóriaKovács VeronikaVincze 48–53 @@ -2934,7 +2934,7 @@ Parsing and <fixed-case>MWE</fixed-case> Detection: Fips at the <fixed-case>PARSEME</fixed-case> Shared Task VasilikiFoufi LukaNerima - ÉricWehrli + ÉricWehrli 54–59 W17-1706 10.18653/v1/W17-1706 @@ -2943,7 +2943,7 @@ Neural Networks for Multi-Word Expression Detection - NataliaKlyueva + NataliaKlyueva AntoineDoucet MilanStraka 60–65 @@ -2955,7 +2955,7 @@ Factoring Ambiguity out of the Prediction of Compositionality for <fixed-case>G</fixed-case>erman Multi-Word Expressions StefanBott - SabineSchulte im Walde + SabineSchulte im Walde 66–72 W17-1708 10.18653/v1/W17-1708 @@ -2974,7 +2974,7 @@ Understanding Idiomatic Variation KristinaGeeraert - R. HaraldBaayen + R. HaraldBaayen JohnNewman 80–90 W17-1710 @@ -3024,11 +3024,11 @@ Detection of Verbal Multi-Word Expressions via Conditional Random Fields with Syntactic Dependency Features and Semantic Re-Ranking - AlfredoMaldonado + AlfredoMaldonado LifengHan ErwanMoreau AshjanAlsulaimani - Koel DuttaChowdhury + Koel DuttaChowdhury CarlVogel QunLiu 114–120 @@ -3039,10 +3039,10 @@ A data-driven approach to verbal multiword expression detection. <fixed-case>PARSEME</fixed-case> Shared Task system description paper - TiberiuBoros + TiberiuBoros SoniaPipa - VerginicaBarbu Mititelu - DanTufis + VerginicaBarbu Mititelu + DanTufis 121–126 W17-1716 10.18653/v1/W17-1716 @@ -3052,8 +3052,8 @@ The <fixed-case>ATILF</fixed-case>-<fixed-case>LLF</fixed-case> System for Parseme Shared Task: a Transition-based Verbal Multiword Expression Tagger HazemAl Saied - MatthieuConstant - MarieCandito + MatthieuConstant + MarieCandito 127–132 W17-1717 10.18653/v1/W17-1717 @@ -3064,7 +3064,7 @@ Investigating the Opacity of Verb-Noun Multiword Expression Usages in Context ShivaTaslimipoor OmidRohanian - RuslanMitkov + RuslanMitkov AfsanehFazly 133–138 W17-1718 @@ -3076,7 +3076,7 @@ Compositionality in Verb-Particle Constructions ArchnaBhatia Choh ManTeng - JamesAllen + JamesAllen 139–148 W17-1719 10.18653/v1/W17-1719 @@ -3086,10 +3086,10 @@ Rule-Based Translation of <fixed-case>S</fixed-case>panish Verb-Noun Combinations into <fixed-case>B</fixed-case>asque UxoaIñurrieta - ItziarAduriz - ArantzaDíaz de Ilarraza - GorkaLabaka - KepaSarasola + ItziarAduriz + ArantzaDíaz de Ilarraza + GorkaLabaka + KepaSarasola 149–154 W17-1720 10.18653/v1/W17-1720 @@ -3107,7 +3107,7 @@ Simple Compound Splitting for <fixed-case>G</fixed-case>erman - MarionWeller-Di Marco + MarionWeller-Di Marco 161–166 W17-1722 10.18653/v1/W17-1722 @@ -3126,8 +3126,8 @@ Comparing Recurring Lexico-Syntactic Trees (<fixed-case>RLT</fixed-case>s) and Ngram Techniques for Extended Phraseology Extraction - AgnèsTutin - OlivierKraif + AgnèsTutin + OlivierKraif 176–180 W17-1724 10.18653/v1/W17-1724 @@ -3136,8 +3136,8 @@ Benchmarking Joint Lexical and Syntactic Analysis on Multiword-Rich Data - MatthieuConstant - HéctorMartinez Alonso + MatthieuConstant + HéctorMartinez Alonso 181–186 W17-1725 10.18653/v1/W17-1725 @@ -3148,7 +3148,7 @@ Semi-Automated Resolution of Inconsistency for a Harmonized Multiword Expression and Dependency Parse Annotation KingChan JulianBrooke - TimothyBaldwin + TimothyBaldwin 187–193 W17-1726 10.18653/v1/W17-1726 @@ -3168,7 +3168,7 @@ Complex Verbs are Different: Exploring the Visual Modality in Multi-Modal Models to Predict Compositionality MaximilianKöper - SabineSchulte im Walde + SabineSchulte im Walde 200–206 W17-1728 10.18653/v1/W17-1728 @@ -3182,7 +3182,7 @@ W17-18 EduardoBlanco RoserMorante - RoserSaurí + RoserSaurí 10.18653/v1/W17-18 Association for Computational Linguistics
Valencia, Spain
@@ -3198,9 +3198,9 @@ Understanding the Semantics of Narratives of Interpersonal Violence through Reader Annotations and Physiological Reactions AlexanderCalderwood Elizabeth A.Pruett - RaymondPtucha + RaymondPtucha ChristopherHoman - CeciliaOvesdotter Alm + CeciliaOvesdotter Alm 1–9 W17-1801 10.18653/v1/W17-1801 @@ -3210,7 +3210,7 @@ Intension, Attitude, and Tense Annotation in a High-Fidelity Semantic Representation GeneKim - LenhartSchubert + LenhartSchubert 10–15 W17-1802 10.18653/v1/W17-1802 @@ -3232,7 +3232,7 @@ FedericoFancellu SivaReddy AdamLopez - BonnieWebber + BonnieWebber 22–32 W17-1804 10.18653/v1/W17-1804 @@ -3255,7 +3255,7 @@ is able to handle phenomena related to scope by means of an higher-order type th The Scope and Focus of Negation: A Complete Annotation Framework for <fixed-case>I</fixed-case>talian BegoñaAltuna Anne-LyseMinard - ManuelaSperanza + ManuelaSperanza 34–42 W17-1806 10.18653/v1/W17-1806 @@ -3264,9 +3264,9 @@ is able to handle phenomena related to scope by means of an higher-order type th Annotation of negation in the <fixed-case>IULA</fixed-case> <fixed-case>S</fixed-case>panish Clinical Record Corpus - MontserratMarimon + MontserratMarimon JorgeVivaldi - NúriaBel + NúriaBel 43–52 W17-1807 10.18653/v1/W17-1807 @@ -3275,9 +3275,9 @@ is able to handle phenomena related to scope by means of an higher-order type th Annotating Negation in <fixed-case>S</fixed-case>panish Clinical Texts - NoaCruz + NoaCruz RoserMorante - Manuel J.Maña López + Manuel J.Maña López JacintoMata Vázquez Carlos L.Parra Calderón 53–58 @@ -3290,7 +3290,7 @@ is able to handle phenomena related to scope by means of an higher-order type th Neural Networks for Negation Cue Detection in <fixed-case>C</fixed-case>hinese HangfengHe FedericoFancellu - BonnieWebber + BonnieWebber 59–63 W17-1809 10.18653/v1/W17-1809 @@ -3301,7 +3301,7 @@ is able to handle phenomena related to scope by means of an higher-order type th An open-source tool for negation detection: a maximum-margin approach MartineEnger ErikVelldal - LiljaØvrelid + LiljaØvrelid 64–69 W17-1810 10.18653/v1/W17-1810 @@ -3313,7 +3313,7 @@ is able to handle phenomena related to scope by means of an higher-order type th Proceedings of the 1st Workshop on Sense, Concept and Entity Representations and their Applications W17-19 - JoseCamacho-Collados + JoseCamacho-Collados Mohammad TaherPilehvar 10.18653/v1/W17-19 Association for Computational Linguistics @@ -3329,7 +3329,7 @@ is able to handle phenomena related to scope by means of an higher-order type th Compositional Semantics using Feature-Based Models from <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et PabloGamallo - MartínPereira-Fariña + MartínPereira-Fariña 1–11 W17-1901 10.18653/v1/W17-1901 @@ -3351,7 +3351,7 @@ is able to handle phenomena related to scope by means of an higher-order type th Improving Verb Metaphor Detection by Propagating Abstractness to Words, Phrases and Individual Senses MaximilianKöper - SabineSchulte im Walde + SabineSchulte im Walde 24–30 W17-1903 10.18653/v1/W17-1903 @@ -3362,7 +3362,7 @@ is able to handle phenomena related to scope by means of an higher-order type th Improving Clinical Diagnosis Inference through Integration of Structured and Unstructured Knowledge YuanLing YuanAn - SadidHasan + SadidHasan 31–36 W17-1904 10.18653/v1/W17-1904 @@ -3373,7 +3373,7 @@ is able to handle phenomena related to scope by means of an higher-order type th Classifying Lexical-semantic Relationships by Exploiting Sense/Concept Representations KentaroKanada TetsunoriKobayashi - YoshihikoHayashi + YoshihikoHayashi 37–46 W17-1905 10.18653/v1/W17-1905 @@ -3405,7 +3405,7 @@ is able to handle phenomena related to scope by means of an higher-order type th Creating and Validating Multilingual Semantic Representations for Six Languages: Expert versus Non-Expert Crowds MahmoudEl-Haj PaulRayson - ScottPiao + ScottPiao StephenWattam 61–71 W17-1908 @@ -3417,8 +3417,8 @@ is able to handle phenomena related to scope by means of an higher-order type th Using Linked Disambiguated Distributional Networks for Word Sense Disambiguation AlexanderPanchenko StefanoFaralli - Simone PaoloPonzetto - ChrisBiemann + Simone PaoloPonzetto + ChrisBiemann 72–78 W17-1909 10.18653/v1/W17-1909 @@ -3431,7 +3431,7 @@ is able to handle phenomena related to scope by means of an higher-order type th JulieWeeds JohnWilkie JeremyReffin - DavidWeir + DavidWeir 79–90 W17-1910 10.18653/v1/W17-1910 @@ -3441,7 +3441,7 @@ is able to handle phenomena related to scope by means of an higher-order type th Elucidating Conceptual Properties from Word Embeddings Kyoung-RokJang - Sung-HyonMyaeng + Sung-HyonMyaeng 91–95 W17-1911 10.18653/v1/W17-1911 @@ -3495,7 +3495,7 @@ is able to handle phenomena related to scope by means of an higher-order type th Proceedings of the Sixth Workshop on Vision and Language W17-20 - AnyaBelz + AnyaBelz ErkutErdem KaterinaPastra KrystianMikolajczyk @@ -3534,9 +3534,9 @@ is able to handle phenomena related to scope by means of an higher-order type th Learning to Recognize Animals by Watching Documentaries: Using Subtitles as Weak Supervision - AparnaNurani Venkitasubramanian + AparnaNurani Venkitasubramanian TinneTuytelaars - Marie-FrancineMoens + Marie-FrancineMoens 21–30 W17-2003 10.18653/v1/W17-2003 @@ -3596,12 +3596,12 @@ is able to handle phenomena related to scope by means of an higher-order type th Proceedings of the Joint SIGHUM Workshop on Computational Linguistics for Cultural Heritage, Social Sciences, Humanities and Literature W17-22 - BeatriceAlex + BeatriceAlex StefaniaDegaetano-Ortlieb AnnaFeldman AnnaKazantseva NilsReiter - StanSzpakowicz + StanSzpakowicz 10.18653/v1/W17-22 Association for Computational Linguistics
Vancouver, Canada
@@ -3616,7 +3616,7 @@ is able to handle phenomena related to scope by means of an higher-order type th Metaphor Detection in a Poetry Corpus VaibhavKesarwani - DianaInkpen + DianaInkpen StanSzpakowicz ChrisTanasescu 1–9 @@ -3640,7 +3640,7 @@ is able to handle phenomena related to scope by means of an higher-order type th Investigating the Relationship between Literary Genres and Emotional Plot Development EvgenyKim - SebastianPadó + SebastianPadó RomanKlinger 17–26 W17-2203 @@ -3650,7 +3650,7 @@ is able to handle phenomena related to scope by means of an higher-order type th Enjambment Detection in a Large Diachronic Corpus of <fixed-case>S</fixed-case>panish Sonnets - PabloRuiz Fabo + PabloRuiz Fabo ClaraMartínez Cantón ThierryPoibeau ElenaGonzález-Blanco @@ -3694,7 +3694,7 @@ is able to handle phenomena related to scope by means of an higher-order type th An End-to-end Environment for Research Question-Driven Entity Extraction and Network Analysis - AndreBlessing + AndreBlessing NoraEchelmeyer MarkusJohn NilsReiter @@ -3717,7 +3717,7 @@ is able to handle phenomena related to scope by means of an higher-order type th Finding a Character’s Voice: Stylome Classification on Literary Characters - Liviu P.Dinu + Liviu P.Dinu Ana SabinaUban 78–82 W17-2210 @@ -3737,7 +3737,7 @@ is able to handle phenomena related to scope by means of an higher-order type th Speeding up corpus development for linguistic research: language documentation and acquisition in <fixed-case>R</fixed-case>omansh Tuatschin GéraldineWalther - BenoîtSagot + BenoîtSagot 89–94 W17-2212 10.18653/v1/W17-2212 @@ -3761,7 +3761,7 @@ is able to handle phenomena related to scope by means of an higher-order type th A Dataset for <fixed-case>S</fixed-case>anskrit Word Segmentation AmrithKrishna - Pavan KumarSatuluri + Pavan KumarSatuluri PawanGoyal 105–114 W17-2214 @@ -3784,10 +3784,10 @@ is able to handle phenomena related to scope by means of an higher-order type th Proceedings of the 16th BioNLP Workshop W17-23 - Kevin BretonnelCohen + Kevin BretonnelCohen DinaDemner-Fushman SophiaAnaniadou - JunichiTsujii + JunichiTsujii 10.18653/v1/W17-23 Association for Computational Linguistics
Vancouver, Canada,
@@ -3826,8 +3826,8 @@ is able to handle phenomena related to scope by means of an higher-order type th Insights into Analogy Completion from the Biomedical Domain DenisNewman-Griffis - AlbertLai - EricFosler-Lussier + AlbertLai + EricFosler-Lussier 19–28 W17-2303 10.18653/v1/W17-2303 @@ -3836,7 +3836,7 @@ is able to handle phenomena related to scope by means of an higher-order type th Deep learning for extracting protein-protein interactions from biomedical literature - YifanPeng + YifanPeng ZhiyongLu 29–38 W17-2304 @@ -3861,7 +3861,7 @@ is able to handle phenomena related to scope by means of an higher-order type th KonstantinosBougiatiotis AnastasiaKrithara GeorgiosPaliouras - IoannisKakadiaris + IoannisKakadiaris 48–57 W17-2306 10.18653/v1/W17-2306 @@ -3876,7 +3876,7 @@ is able to handle phenomena related to scope by means of an higher-order type th AdityaChandrasekar ZiYang NiloyGupta - EricNyberg + EricNyberg 58–66 W17-2307 10.18653/v1/W17-2307 @@ -3885,7 +3885,7 @@ is able to handle phenomena related to scope by means of an higher-order type th <fixed-case>M</fixed-case>acquarie <fixed-case>U</fixed-case>niversity at <fixed-case>B</fixed-case>io<fixed-case>ASQ</fixed-case> 5b – Query-based Summarisation Techniques for Selecting the Ideal Answers - DiegoMollá + DiegoMollá 67–75 W17-2308 10.18653/v1/W17-2308 @@ -3920,11 +3920,11 @@ is able to handle phenomena related to scope by means of an higher-order type th Creation and evaluation of a dictionary-based tagger for virus species and proteins - HelenCook + HelenCook RūdolfsBērziņš Cristina LealRodrıguez Juan MiguelCejuela - Lars JuhlJensen + Lars JuhlJensen 91–98 W17-2311 10.18653/v1/W17-2311 @@ -3934,8 +3934,8 @@ is able to handle phenomena related to scope by means of an higher-order type th Representation of complex terms in a vector space structured by an ontology for a normalization task ArnaudFerré - PierreZweigenbaum - ClaireNédellec + PierreZweigenbaum + ClaireNédellec 99–106 W17-2312 10.18653/v1/W17-2312 @@ -3944,7 +3944,7 @@ is able to handle phenomena related to scope by means of an higher-order type th Improving Correlation with Human Judgments by Integrating Semantic Similarity with Second–Order Vectors - BridgetMcInnes + BridgetMcInnes TedPedersen 107–116 W17-2313 @@ -3968,7 +3968,7 @@ is able to handle phenomena related to scope by means of an higher-order type th SudhaRao DanielMarcu KevinKnight - HalDaumé III + HalDaumé III 126–135 W17-2315 10.18653/v1/W17-2315 @@ -3981,7 +3981,7 @@ is able to handle phenomena related to scope by means of an higher-order type th AbeedSarker MasoudRouhizadeh KarenO’Connor - GracielaGonzalez + GracielaGonzalez 136–142 W17-2316 10.18653/v1/W17-2316 @@ -3991,8 +3991,8 @@ is able to handle phenomena related to scope by means of an higher-order type th Unsupervised Context-Sensitive Spelling Correction of Clinical Free-Text with Word and Character N-Gram Embeddings PieterFivez - SimonŠuster - WalterDaelemans + SimonŠuster + WalterDaelemans 143–148 W17-2317 10.18653/v1/W17-2317 @@ -4002,7 +4002,7 @@ is able to handle phenomena related to scope by means of an higher-order type th Characterization of Divergence in Impaired Speech of <fixed-case>ALS</fixed-case> Patients ArchnaBhatia - BonnieDorr + BonnieDorr KristyHollingshead Samuel L.Phillips BarbaraMcKenzie @@ -4015,10 +4015,10 @@ is able to handle phenomena related to scope by means of an higher-order type th Deep Learning for Punctuation Restoration in Medical Reports WaelSalloum - GregFinley + GregFinley ErikEdwards MarkMiller - DavidSuendermann-Oeft + DavidSuendermann-Oeft 159–164 W17-2319 10.18653/v1/W17-2319 @@ -4027,7 +4027,7 @@ is able to handle phenomena related to scope by means of an higher-order type th Unsupervised Domain Adaptation for Clinical Negation Detection - TimothyMiller + TimothyMiller StevenBethard HadiAmiri GuerganaSavova @@ -4039,12 +4039,12 @@ is able to handle phenomena related to scope by means of an higher-order type th <fixed-case>B</fixed-case>io<fixed-case>C</fixed-case>reative <fixed-case>VI</fixed-case> Precision Medicine Track: creating a training corpus for mining protein-protein interactions affected by mutations - RezartaIslamaj Doğan + RezartaIslamaj Doğan AndrewChatr-aryamontri SunKim Chih-HsuanWei - YifanPeng - DonaldComeau + YifanPeng + DonaldComeau ZhiyongLu 171–175 W17-2321 @@ -4055,7 +4055,7 @@ is able to handle phenomena related to scope by means of an higher-order type th Painless Relation Extraction with Kindred JakeLever - StevenJones + StevenJones 176–183 W17-2322 10.18653/v1/W17-2322 @@ -4066,7 +4066,7 @@ is able to handle phenomena related to scope by means of an higher-order type th Noise Reduction Methods for Distantly Supervised Biomedical Relation Extraction GangLi CathyWu - K.Vijay-Shanker + K.Vijay-Shanker 184–193 W17-2323 10.18653/v1/W17-2323 @@ -4104,7 +4104,7 @@ is able to handle phenomena related to scope by means of an higher-order type th A.S.M. AshiqueMahmood KarenRoss CathyWu - K.Vijay-Shanker + K.Vijay-Shanker 206–215 W17-2326 10.18653/v1/W17-2326 @@ -4119,7 +4119,7 @@ is able to handle phenomena related to scope by means of an higher-order type th SophiaKatrenko PascalCoupet MariusDoornenbal - MichelleGregory + MichelleGregory 216–221 W17-2327 10.18653/v1/W17-2327 @@ -4153,7 +4153,7 @@ is able to handle phenomena related to scope by means of an higher-order type th Protein Word Detection using Text Segmentation Techniques DeviGanesan - Ashish V.Tendulkar + Ashish V.Tendulkar SutanuChakraborti 238–246 W17-2330 @@ -4197,7 +4197,7 @@ is able to handle phenomena related to scope by means of an higher-order type th Evaluating Feature Extraction Methods for Knowledge-based Biomedical Word Sense Disambiguation SamHenry ClintCuffy - BridgetMcInnes + BridgetMcInnes 272–281 W17-2334 10.18653/v1/W17-2334 @@ -4206,7 +4206,7 @@ is able to handle phenomena related to scope by means of an higher-order type th Investigating the Documentation of Electronic Cigarette Use in the Veteran Affairs Electronic Health Record: A Pilot Study - DanielleMowery + DanielleMowery BrettSouth OlgaPatterson Shu-HongZhu @@ -4220,10 +4220,10 @@ is able to handle phenomena related to scope by means of an higher-order type th Automated Preamble Detection in Dictated Medical Reports WaelSalloum - GregFinley + GregFinley ErikEdwards MarkMiller - DavidSuendermann-Oeft + DavidSuendermann-Oeft 287–295 W17-2336 10.18653/v1/W17-2336 @@ -4233,7 +4233,7 @@ is able to handle phenomena related to scope by means of an higher-order type th A Biomedical Question Answering System in <fixed-case>B</fixed-case>io<fixed-case>ASQ</fixed-case> 2017 MouradSarrouti - SaidOuatik El Alaoui + SaidOuatik El Alaoui 296–301 W17-2337 10.18653/v1/W17-2337 @@ -4245,7 +4245,7 @@ is able to handle phenomena related to scope by means of an higher-order type th KevinPatel DivyaPatel MansiGolakiya - PushpakBhattacharyya + PushpakBhattacharyya NileshBirari 302–306 W17-2338 @@ -4277,7 +4277,7 @@ is able to handle phenomena related to scope by means of an higher-order type th Representations of Time Expressions for Temporal Relation Extraction with Convolutional Neural Networks ChenLin - TimothyMiller + TimothyMiller DmitriyDligach StevenBethard GuerganaSavova @@ -4290,7 +4290,7 @@ is able to handle phenomena related to scope by means of an higher-order type th Automatic Diagnosis Coding of Radiology Reports: A Comparison of Deep Learning and Conventional Classification Methods SarvnazKarimi - XiangDai + XiangDai HamedHassanzadeh AnthonyNguyen 328–332 @@ -4301,9 +4301,9 @@ is able to handle phenomena related to scope by means of an higher-order type th Automatic classification of doctor-patient questions for a virtual patient record query task - LeonardoCampillos Llanos - SophieRosset - PierreZweigenbaum + LeonardoCampillos Llanos + SophieRosset + PierreZweigenbaum 333–341 W17-2343 10.18653/v1/W17-2343 @@ -4326,7 +4326,7 @@ is able to handle phenomena related to scope by means of an higher-order type th Clinical Event Detection with Hybrid Neural Architecture AdyashaMaharana - MelihaYetisgen + MelihaYetisgen 351–355 W17-2345 10.18653/v1/W17-2345 @@ -4336,8 +4336,8 @@ is able to handle phenomena related to scope by means of an higher-order type th Extracting Personal Medical Events for User Timeline Construction using Minimal Supervision AakankshaNaik - ChrisBogart - CarolynRose + ChrisBogart + CarolynRose 356–364 W17-2346 10.18653/v1/W17-2346 @@ -4380,7 +4380,7 @@ is able to handle phenomena related to scope by means of an higher-order type th MartinRiedl SwapnaSomasundaran GoranGlavaš - EduardHovy + EduardHovy 10.18653/v1/W17-24 Association for Computational Linguistics
Vancouver, Canada
@@ -4398,7 +4398,7 @@ is able to handle phenomena related to scope by means of an higher-order type th Henrique Ferrazde Arruda ThalesSinelli Luciano da FontouraCosta - Diego RaphaelAmancio + Diego RaphaelAmancio 1–10 W17-2401 10.18653/v1/W17-2401 @@ -4421,8 +4421,8 @@ is able to handle phenomena related to scope by means of an higher-order type th Merging knowledge bases in different languages JerónimoHernández-González - Estevam R.Hruschka Jr. - Tom M.Mitchell + Estevam R.Hruschka Jr. + Tom M.Mitchell 21–29 W17-2403 10.18653/v1/W17-2403 @@ -4431,7 +4431,7 @@ is able to handle phenomena related to scope by means of an higher-order type th Parameter Free Hierarchical Graph-Based Clustering for Analyzing Continuous Word Embeddings - Thomas AlexanderTrost + Thomas AlexanderTrost DietrichKlakow 30–38 W17-2404 @@ -4452,8 +4452,8 @@ is able to handle phenomena related to scope by means of an higher-order type th Graph Methods for Multilingual <fixed-case>F</fixed-case>rame<fixed-case>N</fixed-case>ets - Collin F.Baker - MichaelEllsworth + Collin F.Baker + MichaelEllsworth 45–50 W17-2406 10.18653/v1/W17-2406 @@ -4474,7 +4474,7 @@ is able to handle phenomena related to scope by means of an higher-order type th Work Hard, Play Hard: Email Classification on the Avocado and <fixed-case>E</fixed-case>nron Corpora SakharAlkhereyf - OwenRambow + OwenRambow 57–65 W17-2408 10.18653/v1/W17-2408 @@ -4484,7 +4484,7 @@ is able to handle phenomena related to scope by means of an higher-order type th A Graph Based Semi-Supervised Approach for Analysis of Derivational Nouns in <fixed-case>S</fixed-case>anskrit AmrithKrishna - PavankumarSatuluri + PavankumarSatuluri HarshavardhanPonnada MuneebAhmed GulabArora @@ -4513,7 +4513,7 @@ is able to handle phenomena related to scope by means of an higher-order type th Proceedings of the 10th Workshop on Building and Using Comparable Corpora W17-25 SergeSharoff - PierreZweigenbaum + PierreZweigenbaum ReinhardRapp 10.18653/v1/W17-25 Association for Computational Linguistics @@ -4528,7 +4528,7 @@ is able to handle phenomena related to scope by means of an higher-order type th Users and Data: The Two Neglected Children of Bilingual Natural Language Processing Research - PhillippeLanglais + PhillippeLanglais 1–5 W17-2501 10.18653/v1/W17-2501 @@ -4538,7 +4538,7 @@ is able to handle phenomena related to scope by means of an higher-order type th Deep Investigation of Cross-Language Plagiarism Detection Methods JérémyFerrero - LaurentBesacier + LaurentBesacier DidierSchwab FrédéricAgnès 6–15 @@ -4604,7 +4604,7 @@ is able to handle phenomena related to scope by means of an higher-order type th Weighted Set-Theoretic Alignment of Comparable Sentences AndoniAzpeitia ThierryEtchegoyhen - EvaMartínez Garcia + EvaMartínez Garcia 41–45 W17-2508 10.18653/v1/W17-2508 @@ -4614,7 +4614,7 @@ is able to handle phenomena related to scope by means of an higher-order type th <fixed-case>BUCC</fixed-case> 2017 Shared Task: a First Attempt Toward a Deep Learning Framework for Identifying Parallel Sentences in Comparable Corpora FrancisGrégoire - PhilippeLanglais + PhilippeLanglais 46–50 W17-2509 10.18653/v1/W17-2509 @@ -4633,9 +4633,9 @@ is able to handle phenomena related to scope by means of an higher-order type th <fixed-case>BUCC</fixed-case>2017: A Hybrid Approach for Identifying Parallel Sentences in Comparable Corpora - SainikMahata + SainikMahata DipankarDas - SivajiBandyopadhyay + SivajiBandyopadhyay 56–59 W17-2511 10.18653/v1/W17-2511 @@ -4658,12 +4658,12 @@ is able to handle phenomena related to scope by means of an higher-order type th Proceedings of the 2nd Workshop on Representation Learning for NLP W17-26 - PhilBlunsom + PhilBlunsom AntoineBordes KyunghyunCho - ShayCohen - ChrisDyer - EdwardGrefenstette + ShayCohen + ChrisDyer + EdwardGrefenstette Karl MoritzHermann LauraRimell JasonWeston @@ -4716,7 +4716,7 @@ is able to handle phenomena related to scope by means of an higher-order type th Emergent Predication Structure in Hidden State Vectors of Neural Readers HaiWang - TakeshiOnishi + TakeshiOnishi KevinGimpel DavidMcAllester 26–36 @@ -4728,7 +4728,7 @@ is able to handle phenomena related to scope by means of an higher-order type th Towards Harnessing Memory Networks for Coreference Resolution JoeCheri - PushpakBhattacharyya + PushpakBhattacharyya 37–42 W17-2605 10.18653/v1/W17-2605 @@ -4773,7 +4773,7 @@ is able to handle phenomena related to scope by means of an higher-order type th Knowledge Base Completion: Baselines Strike Back RudolfKadlec - OndrejBajgar + OndrejBajgar JanKleindienst 69–74 W17-2609 @@ -4785,7 +4785,7 @@ is able to handle phenomena related to scope by means of an higher-order type th Sequential Attention: A Context-Aware Alignment Function for Machine Reading SebastianBrarda PhilipYeres - SamuelBowman + SamuelBowman 75–80 W17-2610 10.18653/v1/W17-2610 @@ -4864,7 +4864,7 @@ is able to handle phenomena related to scope by means of an higher-order type th Learning Bilingual Projections of Embeddings for Vocabulary Expansion in Machine Translation - Pranava SwaroopMadhyastha + Pranava SwaroopMadhyastha CristinaEspaña-Bonet 139–145 W17-2617 @@ -4975,7 +4975,7 @@ is able to handle phenomena related to scope by means of an higher-order type th HannesSchulz JeremieZumer LaylaEl Asri - ShikharSharma + ShikharSharma 219–227 W17-2626 10.18653/v1/W17-2626 @@ -5010,7 +5010,7 @@ is able to handle phenomena related to scope by means of an higher-order type th SandeepSubramanian SaiRajeswar FrancisDutil - ChrisPal + ChrisPal AaronCourville 241–251 W17-2629 @@ -5022,9 +5022,9 @@ is able to handle phenomena related to scope by means of an higher-order type th Deep Active Learning for Named Entity Recognition YanyaoShen HyokunYun - ZacharyLipton + ZacharyLipton YakovKronrod - AnimashreeAnandkumar + AnimashreeAnandkumar 252–256 W17-2630 10.18653/v1/W17-2630 @@ -5061,8 +5061,8 @@ is able to handle phenomena related to scope by means of an higher-order type th BenMiller Mariekevan Erp PiekVossen - MarthaPalmer - EduardHovy + MarthaPalmer + EduardHovy TerukoMitamura DavidCaswell 10.18653/v1/W17-27 @@ -5079,7 +5079,7 @@ is able to handle phenomena related to scope by means of an higher-order type th news<fixed-case>L</fixed-case>ens: building and visualizing long-ranging news stories PhilippeLaban - MartiHearst + MartiHearst 1–9 W17-2701 10.18653/v1/W17-2701 @@ -5089,7 +5089,7 @@ is able to handle phenomena related to scope by means of an higher-order type th Detecting Changes in <fixed-case>T</fixed-case>witter Streams using Temporal Clusters of Hashtags YunliWang - CyrilGoutte + CyrilGoutte 10–14 W17-2702 10.18653/v1/W17-2702 @@ -5121,7 +5121,7 @@ is able to handle phenomena related to scope by means of an higher-order type th Tracing armed conflicts with diachronic word embedding models AndreyKutuzov ErikVelldal - LiljaØvrelid + LiljaØvrelid 31–36 W17-2705 10.18653/v1/W17-2705 @@ -5131,7 +5131,7 @@ is able to handle phenomena related to scope by means of an higher-order type th The Circumstantial Event Ontology (<fixed-case>CEO</fixed-case>) - RoxaneSegers + RoxaneSegers TommasoCaselli PiekVossen 37–41 @@ -5143,9 +5143,9 @@ is able to handle phenomena related to scope by means of an higher-order type th Event Detection and Semantic Storytelling: Generating a Travelogue from a large Collection of Personal Letters GeorgRehm - JulianMoreno Schneider + JulianMoreno Schneider PeterBourgonje - AnkitSrivastava + AnkitSrivastava JanNehring ArminBerger LucaKönig @@ -5161,7 +5161,7 @@ is able to handle phenomena related to scope by means of an higher-order type th Inference of Fine-Grained Event Causality from Blogs and Films ZhichaoHu ElaheRahimtoroghi - MarilynWalker + MarilynWalker 52–58 W17-2708 10.18653/v1/W17-2708 @@ -5172,7 +5172,7 @@ is able to handle phenomena related to scope by means of an higher-order type th On the Creation of a Security-Related Event Corpus MartinAtkinson JakubPiskorski - HristoTanev + HristoTanev VanniZavarella 59–65 W17-2709 @@ -5201,8 +5201,8 @@ is able to handle phenomena related to scope by means of an higher-order type th The Rich Event Ontology - SusanBrown - ClaireBonial + SusanBrown + ClaireBonial LeoObrst MarthaPalmer 87–97 @@ -5272,7 +5272,7 @@ is able to handle phenomena related to scope by means of an higher-order type th Guiding Interaction Behaviors for Multi-modal Grounded Language Learning JesseThomason JivkoSinapov - RaymondMooney + RaymondMooney 20–24 W17-2803 10.18653/v1/W17-2803 @@ -5283,7 +5283,7 @@ is able to handle phenomena related to scope by means of an higher-order type th Structured Learning for Context-aware Spoken Language Understanding of Robotic Commands AndreaVanzo DaniloCroce - RobertoBasili + RobertoBasili DanieleNardi 25–34 W17-2804 @@ -5329,14 +5329,14 @@ is able to handle phenomena related to scope by means of an higher-order type th Exploring Variation of Natural Human Commands to a Robot in a Collaborative Navigation Task MatthewMarge - ClaireBonial + ClaireBonial AshleyFoots - CoryHayes + CoryHayes CassidyHenry KimberlyPollard RonArtstein - ClareVoss - DavidTraum + ClareVoss + DavidTraum 58–66 W17-2808 10.18653/v1/W17-2808 @@ -5361,7 +5361,7 @@ is able to handle phenomena related to scope by means of an higher-order type th Are Distributional Representations Ready for the Real World? Evaluating Word Vectors for Grounded Perceptual Meaning - LiLucy + LiLucy JonGauthier 76–85 W17-2810 @@ -5391,7 +5391,7 @@ is able to handle phenomena related to scope by means of an higher-order type th SriraamNatarajan Janardhan RaoDoppa JuliaHockenmaier - MarthaPalmer + MarthaPalmer DanRoth 95–103 W17-2812 @@ -5426,7 +5426,7 @@ is able to handle phenomena related to scope by means of an higher-order type th Language-independent Gender Prediction on <fixed-case>T</fixed-case>witter NikolaLjubešić DarjaFišer - TomažErjavec + TomažErjavec 1–6 W17-2901 10.18653/v1/W17-2901 @@ -5445,9 +5445,9 @@ is able to handle phenomena related to scope by means of an higher-order type th Personality Driven Differences in Paraphrase Preference - DanielPreoţiuc-Pietro + DanielPreoţiuc-Pietro JordanCarpenter - LyleUngar + LyleUngar 17–26 W17-2903 10.18653/v1/W17-2903 @@ -5480,7 +5480,7 @@ is able to handle phenomena related to scope by means of an higher-order type th Cross-Lingual Classification of Topics in Political Texts GoranGlavaš FedericoNanni - Simone PaoloPonzetto + Simone PaoloPonzetto 42–46 W17-2906 10.18653/v1/W17-2906 @@ -5502,9 +5502,9 @@ is able to handle phenomena related to scope by means of an higher-order type th ShrimaiPrabhumoye SamridhiChoudhary EvangeliaSpiliopoulou - ChristopherBogart - CarolynRose - Alan WBlack + ChristopherBogart + CarolynRose + Alan WBlack 53–62 W17-2908 10.18653/v1/W17-2908 @@ -5536,8 +5536,8 @@ is able to handle phenomena related to scope by means of an higher-order type th Code-Switching as a Social Act: The Case of <fixed-case>A</fixed-case>rabic <fixed-case>W</fixed-case>ikipedia Talk Pages MichaelYoder ShrutiRijhwani - CarolynRosé - LoriLevin + CarolynRosé + LoriLevin 73–82 W17-2911 10.18653/v1/W17-2911 @@ -5558,7 +5558,7 @@ is able to handle phenomena related to scope by means of an higher-order type th Ideological Phrase Indicators for Classification of Political Discourse Framing on <fixed-case>T</fixed-case>witter - KristenJohnson + KristenJohnson I-TaLee DanGoldwasser 90–99 @@ -5572,10 +5572,10 @@ is able to handle phenomena related to scope by means of an higher-order type th Proceedings of the First Workshop on Abusive Language Online W17-30 - ZeerakWaseem + ZeerakWaseem Wendy Hui KyongChung DirkHovy - JoelTetreault + JoelTetreault 10.18653/v1/W17-30 Association for Computational Linguistics
Vancouver, BC, Canada
@@ -5600,7 +5600,7 @@ is able to handle phenomena related to scope by means of an higher-order type th Constructive Language in News Comments VaradaKolhatkar - MaiteTaboada + MaiteTaboada 11–17 W17-3002 10.18653/v1/W17-3002 @@ -5657,7 +5657,7 @@ is able to handle phenomena related to scope by means of an higher-order type th Legal Framework, Dataset and Annotation Schema for Socially Unacceptable Online Discourse Practices in <fixed-case>S</fixed-case>lovene DarjaFišer - TomažErjavec + TomažErjavec NikolaLjubešić 46–51 W17-3007 @@ -5731,8 +5731,8 @@ is able to handle phenomena related to scope by means of an higher-order type th Using Convolutional Neural Networks to Classify Hate-Speech - BjörnGambäck - Utpal KumarSikdar + BjörnGambäck + Utpal KumarSikdar 85–90 W17-3013 10.18653/v1/W17-3013 @@ -5756,7 +5756,7 @@ is able to handle phenomena related to scope by means of an higher-order type th Proceedings of the Fourth Workshop on Computational Linguistics and Clinical Psychology — From Linguistic Signal to Clinical Reality W17-31 KristyHollingshead - Molly E.Ireland + Molly E.Ireland KateLoveys 10.18653/v1/W17-31 Association for Computational Linguistics @@ -5796,7 +5796,7 @@ is able to handle phenomena related to scope by means of an higher-order type th A Corpus Analysis of Social Connections and Social Isolation in Adolescents Suffering from Depressive Disorders Jia-WenGuo - Danielle LMowery + Danielle LMowery DjinLai KatherineSward MikeConway @@ -5809,7 +5809,7 @@ is able to handle phenomena related to scope by means of an higher-order type th Monitoring Tweets for Depression to Detect At-risk Users ZunairaJamil - DianaInkpen + DianaInkpen PrasadithBuddhitha KentonWhite 32–40 @@ -5833,7 +5833,7 @@ is able to handle phenomena related to scope by means of an higher-order type th Natural-language Interactive Narratives in Imaginal Exposure Therapy for Obsessive-Compulsive Disorder MelissaRoemmele PaolaMardo - AndrewGordon + AndrewGordon 48–57 W17-3106 10.18653/v1/W17-3106 @@ -5853,8 +5853,8 @@ is able to handle phenomena related to scope by means of an higher-order type th Detecting and Explaining Crisis RohanKshirsagar - RobertMorris - SamuelBowman + RobertMorris + SamuelBowman 66–73 W17-3108 10.18653/v1/W17-3108 @@ -5888,7 +5888,7 @@ is able to handle phenomena related to scope by means of an higher-order type th Proceedings of the First Workshop on Neural Machine Translation W17-32 - ThangLuong + ThangLuong AlexandraBirch GrahamNeubig AndrewFinch @@ -5918,7 +5918,7 @@ is able to handle phenomena related to scope by means of an higher-order type th JanNiehues EunahCho Thanh-LeHa - AlexWaibel + AlexWaibel 11–17 W17-3202 10.18653/v1/W17-3202 @@ -5970,7 +5970,7 @@ is able to handle phenomena related to scope by means of an higher-order type th Beam Search Strategies for Neural Machine Translation MarkusFreitag - YaserAl-Onaizan + YaserAl-Onaizan 56–60 W17-3207 10.18653/v1/W17-3207 @@ -6009,7 +6009,7 @@ is able to handle phenomena related to scope by means of an higher-order type th W17-34 MakotoKanazawa Philippede Groote - MehrnooshSadrzadeh + MehrnooshSadrzadeh 10.18653/v1/W17-34 Association for Computational Linguistics
London, UK
@@ -6058,7 +6058,7 @@ is able to handle phenomena related to scope by means of an higher-order type th
Latent-Variable <fixed-case>PCFG</fixed-case>s: Background and Applications - ShayCohen + ShayCohen 47–58 W17-3405 10.18653/v1/W17-3405 @@ -6083,7 +6083,7 @@ is able to handle phenomena related to scope by means of an higher-order type th A Monotonicity Calculus and Its Completeness ThomasIcard - LawrenceMoss + LawrenceMoss WilliamTune 75–87 W17-3408 @@ -6149,8 +6149,8 @@ is able to handle phenomena related to scope by means of an higher-order type th Proceedings of the 10th International Conference on Natural Language Generation W17-35 - Jose M.Alonso - AlbertoBugarín + Jose M.Alonso + AlbertoBugarín EhudReiter 10.18653/v1/W17-35 Association for Computational Linguistics @@ -6165,10 +6165,10 @@ is able to handle phenomena related to scope by means of an higher-order type th Linguistic realisation as machine translation: Comparing different <fixed-case>MT</fixed-case> models for <fixed-case>AMR</fixed-case>-to-text generation - ThiagoCastro Ferreira + ThiagoCastro Ferreira IacerCalixto SanderWubben - EmielKrahmer + EmielKrahmer 1–10 W17-3501 10.18653/v1/W17-3501 @@ -6177,7 +6177,7 @@ is able to handle phenomena related to scope by means of an higher-order type th A Survey on Intelligent Poetry Generation: Languages, Features, Techniques, Reutilisation and Evaluation - HugoGonçalo Oliveira + HugoGonçalo Oliveira 11–20 W17-3502 10.18653/v1/W17-3502 @@ -6197,7 +6197,7 @@ is able to handle phenomena related to scope by means of an higher-order type th Content Selection for Real-time Sports News Construction from Commentary Texts - Jin-geYao + Jin-geYao JianminZhang XiaojunWan JianguoXiao @@ -6244,7 +6244,7 @@ is able to handle phenomena related to scope by means of an higher-order type th Co-<fixed-case>P</fixed-case>oe<fixed-case>T</fixed-case>ry<fixed-case>M</fixed-case>e: a Co-Creative Interface for the Composition of Poetry - HugoGonçalo Oliveira + HugoGonçalo Oliveira TiagoMendes AnaBoavida 70–71 @@ -6255,8 +6255,8 @@ is able to handle phenomena related to scope by means of an higher-order type th Refer-i<fixed-case>TTS</fixed-case>: A System for Referring in Spoken Installments to Objects in Real-World Images - SinaZarrieß - M. SoledadLópez Gambino + SinaZarrieß + M. SoledadLópez Gambino DavidSchlangen 72–73 W17-3509 @@ -6296,7 +6296,7 @@ is able to handle phenomena related to scope by means of an higher-order type th <fixed-case>PASS</fixed-case>: A <fixed-case>D</fixed-case>utch data-to-text system for soccer, targeted towards specific audiences Chrisvan der Lee - EmielKrahmer + EmielKrahmer SanderWubben 95–104 W17-3513 @@ -6317,7 +6317,7 @@ is able to handle phenomena related to scope by means of an higher-order type th Talking about the world with a distributed model - GemmaBoleda + GemmaBoleda 114 W17-3515 10.18653/v1/W17-3515 @@ -6327,7 +6327,7 @@ is able to handle phenomena related to scope by means of an higher-order type th The <fixed-case>C</fixed-case>ode2<fixed-case>T</fixed-case>ext Challenge: Text Generation in Source Libraries KyleRichardson - SinaZarrieß + SinaZarrieß JonasKuhn 115–119 W17-3516 @@ -6340,7 +6340,7 @@ is able to handle phenomena related to scope by means of an higher-order type th SimonMille BerndBohnet LeoWanner - AnjaBelz + AnjaBelz 120–123 W17-3517 10.18653/v1/W17-3517 @@ -6391,7 +6391,7 @@ is able to handle phenomena related to scope by means of an higher-order type th <fixed-case>G</fixed-case>-<fixed-case>TUNA</fixed-case>: a corpus of referring expressions in <fixed-case>G</fixed-case>erman, including duration information - DavidHowcroft + DavidHowcroft JorrigVogels VeraDemberg 149–153 @@ -6489,7 +6489,7 @@ is able to handle phenomena related to scope by means of an higher-order type th EvaHasler FelixStahlberg MarcusTomalin - Adriàde Gispert + Adriàde Gispert BillByrne 208–212 W17-3531 @@ -6499,12 +6499,12 @@ is able to handle phenomena related to scope by means of an higher-order type th Investigating the content and form of referring expressions in <fixed-case>M</fixed-case>andarin: introducing the Mtuna corpus - Keesvan Deemter + Keesvan Deemter LeSun RintSybesma XiaoLi BoChen - MuyunYang + MuyunYang 213–217 W17-3532 10.18653/v1/W17-3532 @@ -6537,7 +6537,7 @@ is able to handle phenomena related to scope by means of an higher-order type th Textually Summarising Incomplete Data StephanieInglis EhudReiter - SomayajuluSripada + SomayajuluSripada 228–232 W17-3535 10.18653/v1/W17-3535 @@ -6546,8 +6546,8 @@ is able to handle phenomena related to scope by means of an higher-order type th Improving the generation of personalised descriptions - ThiagoCastro Ferreira - IvandréParaboni + ThiagoCastro Ferreira + IvandréParaboni 233–237 W17-3536 10.18653/v1/W17-3536 @@ -6587,9 +6587,9 @@ is able to handle phenomena related to scope by means of an higher-order type th Referential Success of Set Referring Expressions with Fuzzy Properties - NicolásMarín + NicolásMarín GustavoRivas-Gervilla - DanielSánchez + DanielSánchez 247–251 W17-3540 10.18653/v1/W17-3540 @@ -6686,7 +6686,7 @@ is able to handle phenomena related to scope by means of an higher-order type th Applying the <fixed-case>R</fixed-case>hetorical <fixed-case>S</fixed-case>tructure <fixed-case>T</fixed-case>heory in <fixed-case>A</fixed-case>lzheimer patients’ speech AnayeliPaulino - GerardoSierra + GerardoSierra 34–38 W17-3605 10.18653/v1/W17-3605 @@ -6695,7 +6695,7 @@ is able to handle phenomena related to scope by means of an higher-order type th Using lexical level information in discourse structures for <fixed-case>B</fixed-case>asque sentiment analysis JonAlkorta - KoldoGojenola + KoldoGojenola MikelIruskieta MaiteTaboada 39–47 @@ -6706,7 +6706,7 @@ is able to handle phenomena related to scope by means of an higher-order type th Framework for the Analysis of Simplified Texts Taking Discourse into Account: the <fixed-case>B</fixed-case>asque Causal Relations as Case Study ItziarGonzalez-Dios - ArantzaDiaz de Ilarraza + ArantzaDiaz de Ilarraza MikelIruskieta 48–57 W17-3607 @@ -6771,9 +6771,9 @@ is able to handle phenomena related to scope by means of an higher-order type th A Simple Method for Clarifying Sentences with Coordination Ambiguities - MichaelWhite + MichaelWhite ManjuanDuan - David L.King + David L.King W17-3702 10.18653/v1/W17-3702 white-etal-2017-simple @@ -6787,7 +6787,7 @@ is able to handle phenomena related to scope by means of an higher-order type th An Essay on Self-explanatory Computational Intelligence: A Linguistic Model of Data Processing Systems - Jose M.Alonso + Jose M.Alonso GracianTrivino W17-3704 10.18653/v1/W17-3704 @@ -6799,8 +6799,8 @@ is able to handle phenomena related to scope by means of an higher-order type th Proceedings of the Linguistic Resources for Automatic Natural Language Generation - LiRA@NLG W17-38 KristinaKocijan - PeterMachonis - MaxSilberztein + PeterMachonis + MaxSilberztein 10.18653/v1/W17-38 Association for Computational Linguistics
Santiago de Compostela, Spain
@@ -6826,7 +6826,7 @@ is able to handle phenomena related to scope by means of an higher-order type th MilagrosFernández-Gavilanes EnriqueCosta-Montenegro JonathanJuncal-Martínez - Francisco J.González-Castaño + Francisco J.González-Castaño 11-15 W17-3802 10.18653/v1/W17-3802 @@ -6891,7 +6891,7 @@ is able to handle phenomena related to scope by means of an higher-order type th Proceedings of the Workshop on Computational Creativity in Natural Language Generation (CC-NLG 2017) W17-39 - HugoGonçalo Oliveira + HugoGonçalo Oliveira BenBurtenshaw MikeKestemont TomDe Smedt @@ -6928,7 +6928,7 @@ is able to handle phenomena related to scope by means of an higher-order type th
Template-Free Construction of Rhyming Poems with Thematic Cohesion - PabloGervás + PabloGervás 21–28 W17-3903 10.18653/v1/W17-3903 @@ -6936,7 +6936,7 @@ is able to handle phenomena related to scope by means of an higher-order type th Synthetic Literature: Writing Science Fiction in a Co-Creative Process - EnriqueManjavacas + EnriqueManjavacas FolgertKarsdorp BenBurtenshaw MikeKestemont @@ -6987,7 +6987,7 @@ is able to handle phenomena related to scope by means of an higher-order type th CyrilAllauzen FrançoiseBeaufays TomOuyang - MichaelRiley + MichaelRiley DavidRybach 10–19 W17-4002 @@ -7022,7 +7022,7 @@ is able to handle phenomena related to scope by means of an higher-order type th Finite-State Morphological Analysis for <fixed-case>M</fixed-case>arathi VinitRavishankar - Francis M.Tyers + Francis M.Tyers 50–55 W17-4006 10.18653/v1/W17-4006 @@ -7031,7 +7031,7 @@ is able to handle phenomena related to scope by means of an higher-order type th Word Transduction for Addressing the <fixed-case>OOV</fixed-case> Problem in Machine Translation for Similar Resource-Scarce Languages ShashikantSharma - Anil KumarSingh + Anil KumarSingh 56–63 W17-4007 10.18653/v1/W17-4009 @@ -7058,7 +7058,7 @@ is able to handle phenomena related to scope by means of an higher-order type th Evaluating an Automata Approach to Query Containment - MichaelMinock + MichaelMinock 75–79 W17-4010 10.18653/v1/W17-4010 @@ -7070,7 +7070,7 @@ is able to handle phenomena related to scope by means of an higher-order type th Proceedings of the First Workshop on Subword and Character Level Models in NLP W17-41 ManaalFaruqui - HinrichSchuetze + HinrichSchuetze IsabelTrancoso YadollahYaghoobzadeh 10.18653/v1/W17-41 @@ -7097,7 +7097,7 @@ is able to handle phenomena related to scope by means of an higher-order type th Learning variable length units for <fixed-case>SMT</fixed-case> between related languages via Byte Pair Encoding AnoopKunchukuttan - PushpakBhattacharyya + PushpakBhattacharyya 14–24 W17-4102 10.18653/v1/W17-4102 @@ -7119,7 +7119,7 @@ is able to handle phenomena related to scope by means of an higher-order type th MariaPonomareva KirillMilintsevich EkaterinaChernyak - AnatolyStarostin + AnatolyStarostin 31–35 W17-4104 10.18653/v1/W17-4104 @@ -7150,7 +7150,7 @@ is able to handle phenomena related to scope by means of an higher-order type th Weakly supervised learning of allomorphy - MiikkaSilfverberg + MiikkaSilfverberg MansHulden 46–56 W17-4107 @@ -7194,7 +7194,7 @@ is able to handle phenomena related to scope by means of an higher-order type th Exploring Cross-Lingual Transfer of Morphological Knowledge In Sequence-to-Sequence Models HuimingJin - KatharinaKann + KatharinaKann 70–75 W17-4110 10.18653/v1/W17-4110 @@ -7203,7 +7203,7 @@ is able to handle phenomena related to scope by means of an higher-order type th Unlabeled Data for Morphological Generation With Character-Based Sequence-to-Sequence Models - KatharinaKann + KatharinaKann HinrichSchütze 76–81 W17-4111 @@ -7250,9 +7250,9 @@ is able to handle phenomena related to scope by means of an higher-order type th Word Representation Models for Morphologically Rich Languages in Neural Machine Translation EkaterinaVylomova - TrevorCohn + TrevorCohn XuanliHe - GholamrezaHaffari + GholamrezaHaffari 103–108 W17-4115 10.18653/v1/W17-4115 @@ -7262,7 +7262,7 @@ is able to handle phenomena related to scope by means of an higher-order type th Spell-Checking based on Syllabification and Character-level Graphs for a <fixed-case>P</fixed-case>eruvian Agglutinative Language CarloAlva - ArturoOncevay + ArturoOncevay 109–116 W17-4116 10.18653/v1/W17-4116 @@ -7283,7 +7283,7 @@ is able to handle phenomena related to scope by means of an higher-order type th A General-Purpose Tagger with Convolutional Neural Networks XiangYu - AgnieszkaFalenska + AgnieszkaFalenska Ngoc ThangVu 124–129 W17-4118 @@ -7313,7 +7313,7 @@ is able to handle phenomena related to scope by means of an higher-order type th Neural Paraphrase Identification of Questions with Noisy Pretraining - Gaurav SinghTomar + Gaurav SinghTomar ThyagoDuque OscarTäckström JakobUszkoreit @@ -7328,7 +7328,7 @@ is able to handle phenomena related to scope by means of an higher-order type th Sub-character Neural Language Modelling in <fixed-case>J</fixed-case>apanese VietNguyen JulianBrooke - TimothyBaldwin + TimothyBaldwin 148–153 W17-4122 10.18653/v1/W17-4122 @@ -7337,9 +7337,9 @@ is able to handle phenomena related to scope by means of an higher-order type th Byte-based Neural Machine Translation - Marta R.Costa-jussà + Marta R.Costa-jussà CarlosEscolano - José A. R.Fonollosa + José A. R.Fonollosa 154–158 W17-4123 10.18653/v1/W17-4123 @@ -7349,7 +7349,7 @@ is able to handle phenomena related to scope by means of an higher-order type th Improving Opinion-Target Extraction with Character-Level Word Embeddings SoufianJebbara - PhilippCimiano + PhilippCimiano 159–167 W17-4124 10.18653/v1/W17-4124 @@ -7378,7 +7378,7 @@ is able to handle phenomena related to scope by means of an higher-order type th Predicting News Values from Headline Text and Emotions Maria Piadi Buono JanŠnajder - BojanaDalbelo Bašić + BojanaDalbelo Bašić GoranGlavaš MartinTutek NatasaMilic-Frayling @@ -7391,7 +7391,7 @@ is able to handle phenomena related to scope by means of an higher-order type th Predicting User Views in Online News DanielHardt - OwenRambow + OwenRambow 7–12 W17-4202 10.18653/v1/W17-4202 @@ -7414,9 +7414,9 @@ is able to handle phenomena related to scope by means of an higher-order type th What to Write? A topic recommender for journalists AlessandroCucchiarelli - ChristianMorbidoni + ChristianMorbidoni GiovanniStilo - PaolaVelardi + PaolaVelardi 19–24 W17-4204 10.18653/v1/W17-4204 @@ -7439,7 +7439,7 @@ is able to handle phenomena related to scope by means of an higher-order type th Language-based Construction of Explorable News Graphs for Journalists RémiBois - GuillaumeGravier + GuillaumeGravier EricJamet EmmanuelMorin PascaleSébillot @@ -7456,7 +7456,7 @@ is able to handle phenomena related to scope by means of an higher-order type th PiekVossen Jannekevan der Zwaan AntskeFokkens - Willemvan Hage + Willemvan Hage IngerLeemans IsaMaks 37–45 @@ -7469,7 +7469,7 @@ is able to handle phenomena related to scope by means of an higher-order type th Analyzing the Revision Logs of a <fixed-case>J</fixed-case>apanese Newspaper for Article Quality Assessment HideakiTamori YutaHitomi - NaoakiOkazaki + NaoakiOkazaki KentaroInui 46–50 W17-4208 @@ -7493,7 +7493,7 @@ is able to handle phenomena related to scope by means of an higher-order type th Incongruent Headlines: Yet Another Way to Mislead Your Readers SophieChesney MariaLiakata - MassimoPoesio + MassimoPoesio MatthewPurver 56–61 W17-4210 @@ -7514,8 +7514,8 @@ is able to handle phenomena related to scope by means of an higher-order type th Semantic Storytelling, Cross-lingual Event Detection and other Semantic Services for a Newsroom Content Curation Dashboard - JulianMoreno-Schneider - AnkitSrivastava + JulianMoreno-Schneider + AnkitSrivastava PeterBourgonje DavidWabnitz GeorgRehm @@ -7551,7 +7551,7 @@ is able to handle phenomena related to scope by means of an higher-order type th From Clickbait to Fake News Detection: An Approach based on Detecting the Stance of Headlines to Articles PeterBourgonje - JulianMoreno Schneider + JulianMoreno Schneider GeorgRehm 84–89 W17-4215 @@ -7583,7 +7583,7 @@ is able to handle phenomena related to scope by means of an higher-order type th Using <fixed-case>N</fixed-case>ew <fixed-case>Y</fixed-case>ork <fixed-case>T</fixed-case>imes Picks to Identify Constructive Comments VaradaKolhatkar - MaiteTaboada + MaiteTaboada 100–105 W17-4218 10.18653/v1/W17-4218 @@ -7609,7 +7609,7 @@ is able to handle phenomena related to scope by means of an higher-order type th Kai-WeiChang Ming-WeiChang VivekSrikumar - Alexander M.Rush + Alexander M.Rush 10.18653/v1/W17-43 Association for Computational Linguistics
Copenhagen, Denmark
@@ -7654,7 +7654,7 @@ is able to handle phenomena related to scope by means of an higher-order type th Structured Prediction via Learning to Search under Bandit Feedback AmrSharaf - HalDaumé III + HalDaumé III 17–26 W17-4304 10.18653/v1/W17-4304 @@ -7665,7 +7665,7 @@ is able to handle phenomena related to scope by means of an higher-order type th Syntax Aware <fixed-case>LSTM</fixed-case> model for Semantic Role Labeling FengQian LeiSha - BaobaoChang + BaobaoChang Lu-chenLiu MingZhang 27–32 @@ -7697,7 +7697,7 @@ is able to handle phenomena related to scope by means of an higher-order type th Piecewise Latent Variables for Neural Variational Text Processing - Iulian VladSerban + Iulian VladSerban AlexanderOrorbia II JoellePineau AaronCourville @@ -7712,10 +7712,10 @@ is able to handle phenomena related to scope by means of an higher-order type th Proceedings of the 3rd Workshop on Noisy User-generated Text W17-44 - LeonDerczynski + LeonDerczynski WeiXu AlanRitter - TimBaldwin + TimBaldwin 10.18653/v1/W17-44 Association for Computational Linguistics
Copenhagen, Denmark
@@ -7739,9 +7739,9 @@ is able to handle phenomena related to scope by means of an higher-order type th Towards the Understanding of Gaming Audiences by Modeling Twitch Emotes FrancescoBarbieri - LuisEspinosa-Anke + LuisEspinosa-Anke MiguelBallesteros - JuanSoler-Company + JuanSoler-Company HoracioSaggion 11–20 W17-4402 @@ -7764,7 +7764,7 @@ is able to handle phenomena related to scope by means of an higher-order type th To normalize, or not to normalize: The impact of normalization on Part-of-Speech tagging Robvan der Goot - BarbaraPlank + BarbaraPlank MalvinaNissim 31–39 W17-4404 @@ -7787,7 +7787,7 @@ is able to handle phenomena related to scope by means of an higher-order type th Incorporating Metadata into Content-Based User Embeddings LinziXing - Michael J.Paul + Michael J.Paul 45–49 W17-4406 10.18653/v1/W17-4406 @@ -7797,8 +7797,8 @@ is able to handle phenomena related to scope by means of an higher-order type th Simple Queries as Distant Labels for Predicting Gender on <fixed-case>T</fixed-case>witter ChrisEmmery - GrzegorzChrupała - WalterDaelemans + GrzegorzChrupała + WalterDaelemans 50–55 W17-4407 10.18653/v1/W17-4407 @@ -7819,7 +7819,7 @@ is able to handle phenomena related to scope by means of an higher-order type th Evaluating hypotheses in geolocation on a very large sample of <fixed-case>T</fixed-case>witter BaharSalehi - AndersSøgaard + AndersSøgaard 62–67 W17-4409 10.18653/v1/W17-4409 @@ -7830,7 +7830,7 @@ is able to handle phenomena related to scope by means of an higher-order type th The Effect of Error Rate in Artificially Generated Data for Automatic Preposition and Determiner Correction FraserBowen JonDehdari - Josefvan Genabith + Josefvan Genabith 68–76 W17-4410 10.18653/v1/W17-4410 @@ -7852,7 +7852,7 @@ is able to handle phenomena related to scope by means of an higher-order type th Noisy <fixed-case>U</fixed-case>yghur Text Normalization OsmanTursun - RuketCakici + RuketCakici 85–93 W17-4412 10.18653/v1/W17-4412 @@ -7862,7 +7862,7 @@ is able to handle phenomena related to scope by means of an higher-order type th Crowdsourcing Multiple Choice Science Questions JohannesWelbl - Nelson F.Liu + Nelson F.Liu MattGardner 94–106 W17-4413 @@ -7887,8 +7887,8 @@ is able to handle phenomena related to scope by means of an higher-order type th Huntsville, hospitals, and hockey teams: Names can reveal your location BaharSalehi DirkHovy - EduardHovy - AndersSøgaard + EduardHovy + AndersSøgaard 116–121 W17-4415 10.18653/v1/W17-4415 @@ -7898,8 +7898,8 @@ is able to handle phenomena related to scope by means of an higher-order type th Improving Document Clustering by Removing Unnatural Language MyunghaJang - Jinho D.Choi - JamesAllan + Jinho D.Choi + JamesAllan 122–130 W17-4416 10.18653/v1/W17-4416 @@ -7953,10 +7953,10 @@ is able to handle phenomena related to scope by means of an higher-order type th Multi-channel <fixed-case>B</fixed-case>i<fixed-case>LSTM</fixed-case>-<fixed-case>CRF</fixed-case> Model for Emerging Named Entity Recognition in Social Media - Bill Y.Lin - FrankXu + Bill Y.Lin + FrankXu ZhiyiLuo - KennyZhu + KennyZhu 160–165 W17-4421 10.18653/v1/W17-4421 @@ -7985,8 +7985,8 @@ is able to handle phenomena related to scope by means of an higher-order type th A Feature-based Ensemble Approach to Recognition of Emerging and Rare Named Entities - Utpal KumarSikdar - BjörnGambäck + Utpal KumarSikdar + BjörnGambäck 177–181 W17-4424 10.18653/v1/W17-4424 @@ -7999,7 +7999,7 @@ is able to handle phenomena related to scope by means of an higher-order type th Proceedings of the Workshop on New Frontiers in Summarization W17-45 LuWang - Jackie Chi KitCheung + Jackie Chi KitCheung GiuseppeCarenini FeiLiu 10.18653/v1/W17-45 @@ -8057,7 +8057,7 @@ is able to handle phenomena related to scope by means of an higher-order type th Coarse-to-Fine Attention Models for Document Summarization JeffreyLing - AlexanderRush + AlexanderRush 33–42 W17-4505 10.18653/v1/W17-4505 @@ -8067,7 +8067,7 @@ is able to handle phenomena related to scope by means of an higher-order type th Automatic Community Creation for Abstractive Spoken Conversations Summarization KaranSingla - EvgenyStepanov + EvgenyStepanov Ali OrkanBayer GiuseppeCarenini GiuseppeRiccardi @@ -8102,8 +8102,8 @@ is able to handle phenomena related to scope by means of an higher-order type th Topic Model Stability for Hierarchical Summarization - JohnMiller - KathleenMcCoy + JohnMiller + KathleenMcCoy 64–73 W17-4509 10.18653/v1/W17-4509 @@ -8158,7 +8158,7 @@ is able to handle phenomena related to scope by means of an higher-order type th Proceedings of the Workshop on Speech-Centric Natural Language Processing W17-46 NicholasRuiz - SrinivasBangalore + SrinivasBangalore 10.18653/v1/W17-46 Association for Computational Linguistics
Copenhagen, Denmark
@@ -8173,7 +8173,7 @@ is able to handle phenomena related to scope by means of an higher-order type th Functions of Silences towards Information Flow in Spoken Conversation Shammur AbsarChowdhury - EvgenyStepanov + EvgenyStepanov MorenaDanieli GiuseppeRiccardi 1–9 @@ -8195,7 +8195,7 @@ is able to handle phenomena related to scope by means of an higher-order type th Analyzing Human and Machine Performance In Resolving Ambiguous Spoken Sentences HusseinGhaly - MichaelMandel + MichaelMandel 18–26 W17-4603 10.18653/v1/W17-4603 @@ -8241,7 +8241,7 @@ is able to handle phenomena related to scope by means of an higher-order type th AntoniosAnastasopoulos SameerBansal DavidChiang - SharonGoldwater + SharonGoldwater AdamLopez 53–58 W17-4607 @@ -8252,7 +8252,7 @@ is able to handle phenomena related to scope by means of an higher-order type th <fixed-case>A</fixed-case>mharic-<fixed-case>E</fixed-case>nglish Speech Translation in Tourism Domain MichaelMelese - LaurentBesacier + LaurentBesacier MillionMeshesha 59–66 W17-4608 @@ -8273,7 +8273,7 @@ is able to handle phenomena related to scope by means of an higher-order type th Improving coreference resolution with automatically predicted prosodic information - InaRoesiger + InaRoesiger SabrinaStehwien ArndtRiester Ngoc ThangVu @@ -8288,14 +8288,14 @@ is able to handle phenomena related to scope by means of an higher-order type th Proceedings of the Second Conference on Machine Translation W17-47 - OndřejBojar + OndřejBojar ChristianBuck - RajenChatterjee + RajenChatterjee ChristianFedermann YvetteGraham BarryHaddow MatthiasHuck - Antonio JimenoYepes + Antonio JimenoYepes PhilippKoehn JuliaKreutzer 10.18653/v1/W17-47 @@ -8313,7 +8313,7 @@ is able to handle phenomena related to scope by means of an higher-order type th Sense-Aware Statistical Machine Translation using Adaptive Context-Dependent Clustering XiaoPu NikolaosPappas - AndreiPopescu-Belis + AndreiPopescu-Belis 1–10 W17-4701 10.18653/v1/W17-4701 @@ -8321,7 +8321,7 @@ is able to handle phenomena related to scope by means of an higher-order type th Improving Word Sense Disambiguation in Neural Machine Translation with Sense Embeddings - AnnetteRios Gonzales + AnnetteRios Gonzales LauraMascarell RicoSennrich 11–19 @@ -8332,7 +8332,7 @@ is able to handle phenomena related to scope by means of an higher-order type th Word Representations in Factored Neural Machine Translation FranckBurlot - MercedesGarcía-Martínez + MercedesGarcía-Martínez LoïcBarrault FethiBougares FrançoisYvon @@ -8344,8 +8344,8 @@ is able to handle phenomena related to scope by means of an higher-order type th Modeling Target-Side Inflection in Neural Machine Translation AlešTamchyna - MarionWeller-Di Marco - AlexanderFraser + MarionWeller-Di Marco + AlexanderFraser 32–42 W17-4704 10.18653/v1/W17-4704 @@ -8364,7 +8364,7 @@ is able to handle phenomena related to scope by means of an higher-order type th Target-side Word Segmentation Strategies for Neural Machine Translation MatthiasHuck SimonRiess - AlexanderFraser + AlexanderFraser 56–67 W17-4706 10.18653/v1/W17-4706 @@ -8372,7 +8372,7 @@ is able to handle phenomena related to scope by means of an higher-order type th Predicting Target Language <fixed-case>CCG</fixed-case> Supertags Improves Neural Machine Translation - MariaNădejde + MariaNădejde SivaReddy RicoSennrich TomaszDwojak @@ -8406,8 +8406,8 @@ is able to handle phenomena related to scope by means of an higher-order type th Deep architectures for Neural Machine Translation - Antonio ValerioMiceli Barone - JindřichHelcl + Antonio ValerioMiceli Barone + JindřichHelcl RicoSennrich BarryHaddow AlexandraBirch @@ -8419,7 +8419,7 @@ is able to handle phenomena related to scope by means of an higher-order type th Biasing Attention-Based Recurrent Neural Networks Using External Alignment Information TamerAlkhouli - HermannNey + HermannNey 108–117 W17-4711 10.18653/v1/W17-4711 @@ -8428,7 +8428,7 @@ is able to handle phenomena related to scope by means of an higher-order type th Effective Domain Mixing for Neural Machine Translation DennyBritz - QuocLe + QuocLe ReidPryzant 118–126 W17-4712 @@ -8437,9 +8437,9 @@ is able to handle phenomena related to scope by means of an higher-order type th Multi-Domain Neural Machine Translation through Unsupervised Adaptation - M. AminFarajian + M. AminFarajian MarcoTurchi - MatteoNegri + MatteoNegri MarcelloFederico 127–137 W17-4713 @@ -8449,8 +8449,8 @@ is able to handle phenomena related to scope by means of an higher-order type th Adapting Neural Machine Translation with Parallel Synthetic Data MaraChinea-Ríos - ÁlvaroPeris - FranciscoCasacuberta + ÁlvaroPeris + FranciscoCasacuberta 138–147 W17-4714 10.18653/v1/W17-4714 @@ -8459,7 +8459,7 @@ is able to handle phenomena related to scope by means of an higher-order type th Copied Monolingual Data Improves Low-Resource Neural Machine Translation AnnaCurrey - Antonio ValerioMiceli Barone + Antonio ValerioMiceli Barone KennethHeafield 148–156 W17-4715 @@ -8469,11 +8469,11 @@ is able to handle phenomena related to scope by means of an higher-order type th Guiding Neural Machine Translation Decoding with External Knowledge RajenChatterjee - MatteoNegri + MatteoNegri MarcoTurchi MarcelloFederico LuciaSpecia - FrédéricBlain + FrédéricBlain 157–168 W17-4716 10.18653/v1/W17-4716 @@ -8492,9 +8492,9 @@ is able to handle phenomena related to scope by means of an higher-order type th QunLiu VarvaraLogacheva ChristofMonz - MatteoNegri + MatteoNegri MattPost - RaphaelRubino + RaphaelRubino LuciaSpecia MarcoTurchi 169–214 @@ -8517,9 +8517,9 @@ is able to handle phenomena related to scope by means of an higher-order type th Findings of the <fixed-case>WMT</fixed-case> 2017 Biomedical Translation Shared Task AntonioJimeno Yepes - AurélieNévéol + AurélieNévéol MarianaNeves - KarinVerspoor + KarinVerspoor OndřejBojar ArthurBoyer CristianGrozea @@ -8542,7 +8542,7 @@ is able to handle phenomena related to scope by means of an higher-order type th RomanSudarikov DavidMareček TomKocmi - DušanVariš + DušanVariš OndřejBojar 248–256 W17-4720 @@ -8568,10 +8568,10 @@ is able to handle phenomena related to scope by means of an higher-order type th GuillaumeKlein CatherineKobus NataliaSegal - ChristopheServan + ChristopheServan BoWang DakunZhang - JosepCrego + JosepCrego JeanSenellart 265–270 W17-4722 @@ -8580,7 +8580,7 @@ is able to handle phenomena related to scope by means of an higher-order type th <fixed-case>FBK</fixed-case>’s Participation to the <fixed-case>E</fixed-case>nglish-to-<fixed-case>G</fixed-case>erman News Translation Task of <fixed-case>WMT</fixed-case> 2017 - Mattia AntoninoDi Gangi + Mattia AntoninoDi Gangi NicolaBertoldi MarcelloFederico 271–275 @@ -8604,8 +8604,8 @@ is able to handle phenomena related to scope by means of an higher-order type th The <fixed-case>TALP</fixed-case>-<fixed-case>UPC</fixed-case> Neural Machine Translation System for <fixed-case>G</fixed-case>erman/<fixed-case>F</fixed-case>innish-<fixed-case>E</fixed-case>nglish Using the Inverse Direction Model in Rescoring CarlosEscolano - Marta R.Costa-jussà - José A. R.Fonollosa + Marta R.Costa-jussà + José A. R.Fonollosa 283–287 W17-4725 10.18653/v1/W17-4725 @@ -8613,7 +8613,7 @@ is able to handle phenomena related to scope by means of an higher-order type th <fixed-case>LIUM</fixed-case> Machine Translation Systems for <fixed-case>WMT</fixed-case>17 News Translation Task - MercedesGarcía-Martínez + MercedesGarcía-Martínez OzanCaglayan WalidAransa AdrienBardet @@ -8637,7 +8637,7 @@ is able to handle phenomena related to scope by means of an higher-order type th The <fixed-case>AFRL-MITLL</fixed-case> <fixed-case>WMT17</fixed-case> Systems: Old, New, Borrowed, <fixed-case>BLEU</fixed-case> JeremyGwinnup - TimothyAnderson + TimothyAnderson GrantErdmann KatherineYoung MichaeelKazi @@ -8663,7 +8663,7 @@ is able to handle phenomena related to scope by means of an higher-order type th <fixed-case>LMU</fixed-case> <fixed-case>M</fixed-case>unich’s Neural Machine Translation Systems for News Articles and Health Information Texts MatthiasHuck FabienneBraune - AlexanderFraser + AlexanderFraser 315–322 W17-4730 10.18653/v1/W17-4730 @@ -8672,7 +8672,7 @@ is able to handle phenomena related to scope by means of an higher-order type th Rule-based Machine translation from <fixed-case>E</fixed-case>nglish to <fixed-case>F</fixed-case>innish ArviHurskainen - JörgTiedemann + JörgTiedemann 323–329 W17-4731 10.18653/v1/W17-4731 @@ -8696,7 +8696,7 @@ is able to handle phenomena related to scope by means of an higher-order type th The <fixed-case>H</fixed-case>elsinki Neural Machine Translation System RobertÖstling YvesScherrer - JörgTiedemann + JörgTiedemann GongboTang TommiNieminen 338–347 @@ -8707,20 +8707,20 @@ is able to handle phenomena related to scope by means of an higher-order type th The <fixed-case>QT</fixed-case>21 Combined Machine Translation System for <fixed-case>E</fixed-case>nglish to <fixed-case>L</fixed-case>atvian Jan-ThorstenPeter - HermannNey + HermannNey OndřejBojar - Ngoc-QuanPham + Ngoc-QuanPham JanNiehues - AlexWaibel + AlexWaibel FranckBurlot FrançoisYvon - MārcisPinnis + MārcisPinnis ValtersŠics JasmijnBastings MiguelRios WilkerAziz PhilipWilliams - FrédéricBlain + FrédéricBlain LuciaSpecia 348–357 W17-4734 @@ -8736,7 +8736,7 @@ is able to handle phenomena related to scope by means of an higher-order type th JanRosendahl NickRossenbach MiguelGraça - HermannNey + HermannNey 358–365 W17-4735 10.18653/v1/W17-4735 @@ -8744,12 +8744,12 @@ is able to handle phenomena related to scope by means of an higher-order type th The Karlsruhe Institute of Technology Systems for the News Translation Task in <fixed-case>WMT</fixed-case> 2017 - Ngoc-QuanPham + Ngoc-QuanPham JanNiehues Thanh-LeHa EunahCho MatthiasSperber - AlexanderWaibel + AlexanderWaibel 366–373 W17-4736 10.18653/v1/W17-4736 @@ -8757,8 +8757,8 @@ is able to handle phenomena related to scope by means of an higher-order type th Tilde’s Machine Translation Systems for <fixed-case>WMT</fixed-case> 2017 - MārcisPinnis - RihardsKrišlauks + MārcisPinnis + RihardsKrišlauks TomsMiks DaigaDeksne ValtersŠics @@ -8786,7 +8786,7 @@ is able to handle phenomena related to scope by means of an higher-order type th UlrichGermann BarryHaddow KennethHeafield - Antonio ValerioMiceli Barone + Antonio ValerioMiceli Barone PhilipWilliams 389–399 W17-4739 @@ -8810,7 +8810,7 @@ is able to handle phenomena related to scope by means of an higher-order type th The <fixed-case>JAIST</fixed-case> Machine Translation Systems for <fixed-case>WMT</fixed-case> 17 Hai-LongTrieu Trung-TinPham - Le-MinhNguyen + Le-MinhNguyen 405–409 W17-4741 10.18653/v1/W17-4741 @@ -8874,7 +8874,7 @@ is able to handle phenomena related to scope by means of an higher-order type th OzanCaglayan WalidAransa AdrienBardet - MercedesGarcía-Martínez + MercedesGarcía-Martínez FethiBougares LoïcBarrault MarcMasana @@ -8888,7 +8888,7 @@ is able to handle phenomena related to scope by means of an higher-order type th <fixed-case>DCU</fixed-case> System Report on the <fixed-case>WMT</fixed-case> 2017 Multi-modal Machine Translation Task IacerCalixto - KoelDutta Chowdhury + KoelDutta Chowdhury QunLiu 440–444 W17-4747 @@ -8900,7 +8900,7 @@ is able to handle phenomena related to scope by means of an higher-order type th JohnDuselis MichaelHutt JeremyGwinnup - JamesDavis + JamesDavis JoshuaSandvick 445–449 W17-4748 @@ -8909,7 +8909,7 @@ is able to handle phenomena related to scope by means of an higher-order type th <fixed-case>CUNI</fixed-case> System for the <fixed-case>WMT</fixed-case>17 Multimodal Translation Task - JindřichHelcl + JindřichHelcl JindřichLibovický 450–457 W17-4749 @@ -8937,7 +8937,7 @@ is able to handle phenomena related to scope by means of an higher-order type th <fixed-case>S</fixed-case>heffield <fixed-case>M</fixed-case>ulti<fixed-case>MT</fixed-case>: Using Object Posterior Predictions for Multimodal Machine Translation - Pranava SwaroopMadhyastha + Pranava SwaroopMadhyastha JosiahWang LuciaSpecia 470–476 @@ -8949,7 +8949,7 @@ is able to handle phenomena related to scope by means of an higher-order type th <fixed-case>NICT</fixed-case>-<fixed-case>NAIST</fixed-case> System for <fixed-case>WMT</fixed-case>17 Multimodal Translation Task JingyiZhang MasaoUtiyama - EiichroSumita + EiichroSumita GrahamNeubig SatoshiNakamura 477–482 @@ -8978,7 +8978,7 @@ is able to handle phenomena related to scope by means of an higher-order type th A Shared Task on Bandit Learning for Machine Translation - ArtemSokolov + ArtemSokolov JuliaKreutzer KellenSunderland PavelDanchenko @@ -8993,7 +8993,7 @@ is able to handle phenomena related to scope by means of an higher-order type th Results of the <fixed-case>WMT</fixed-case>17 Neural <fixed-case>MT</fixed-case> Training Task OndřejBojar - JindřichHelcl + JindřichHelcl TomKocmi JindřichLibovický TomášMusil @@ -9012,7 +9012,7 @@ is able to handle phenomena related to scope by means of an higher-order type th Predicting Translation Performance with Referential Translation Machines - ErgunBiçici + ErgunBiçici 540–544 W17-4759 10.18653/v1/W17-4759 @@ -9020,8 +9020,8 @@ is able to handle phenomena related to scope by means of an higher-order type th Bilexical Embeddings for Quality Estimation - FrédéricBlain - CarolinaScarton + FrédéricBlain + CarolinaScarton LuciaSpecia 545–550 W17-4760 @@ -9036,7 +9036,7 @@ is able to handle phenomena related to scope by means of an higher-order type th QingyuXiang LilinZhang MaoxiLi - MingwenWang + MingwenWang 551–555 W17-4761 10.18653/v1/W17-4761 @@ -9063,8 +9063,8 @@ is able to handle phenomena related to scope by means of an higher-order type th Unbabel’s Participation in the <fixed-case>WMT</fixed-case>17 Translation Quality Estimation Shared Task - André F. T.Martins - FabioKepler + André F. T.Martins + FabioKepler JoséMonteiro 569–574 W17-4764 @@ -9073,7 +9073,7 @@ is able to handle phenomena related to scope by means of an higher-order type th Feature-Enriched Character-Level Convolutions for Text Regression - GustavoPaetzold + GustavoPaetzold LuciaSpecia 575–581 W17-4765 @@ -9114,7 +9114,7 @@ is able to handle phenomena related to scope by means of an higher-order type th OndřejBojar OndřejHübsch RudolfRosa - DušanVariš + DušanVariš 604–611 W17-4769 10.18653/v1/W17-4769 @@ -9122,7 +9122,7 @@ is able to handle phenomena related to scope by means of an higher-order type th chr<fixed-case>F</fixed-case>++: words helping character n-grams - MajaPopović + MajaPopović 612–618 W17-4770 10.18653/v1/W17-4770 @@ -9140,7 +9140,7 @@ is able to handle phenomena related to scope by means of an higher-order type th <fixed-case>LIG</fixed-case>-<fixed-case>CRIS</fixed-case>t<fixed-case>AL</fixed-case> Submission for the <fixed-case>WMT</fixed-case> 2017 Automatic Post-Editing Task AlexandreBérard - LaurentBesacier + LaurentBesacier OlivierPietquin 623–629 W17-4772 @@ -9150,10 +9150,10 @@ is able to handle phenomena related to scope by means of an higher-order type th Multi-source Neural Automatic Post-Editing: <fixed-case>FBK</fixed-case>’s participation in the <fixed-case>WMT</fixed-case> 2017 <fixed-case>APE</fixed-case> shared task RajenChatterjee - M. AminFarajian - MatteoNegri + M. AminFarajian + MatteoNegri MarcoTurchi - AnkitSrivastava + AnkitSrivastava SantanuPal 630–638 W17-4773 @@ -9171,7 +9171,7 @@ is able to handle phenomena related to scope by means of an higher-order type th Ensembling Factored Neural Machine Translation Models for Automatic Post-Editing and Quality Estimation - ChrisHokamp + ChrisHokamp 647–654 W17-4775 10.18653/v1/W17-4775 @@ -9184,7 +9184,7 @@ is able to handle phenomena related to scope by means of an higher-order type th LiuHuang LilinZhang MaoxiLi - MingwenWang + MingwenWang 655–660 W17-4776 10.18653/v1/W17-4776 @@ -9192,7 +9192,7 @@ is able to handle phenomena related to scope by means of an higher-order type th <fixed-case>CUNI</fixed-case> System for <fixed-case>WMT</fixed-case>17 Automatic Post-Editing Task - DušanVariš + DušanVariš OndřejBojar 661–666 W17-4777 @@ -9205,7 +9205,7 @@ is able to handle phenomena related to scope by means of an higher-order type th ShiFeng KhanhNguyen KiantéBrantley - HalDaumé III + HalDaumé III 667–673 W17-4778 10.18653/v1/W17-4778 @@ -9244,9 +9244,9 @@ is able to handle phenomena related to scope by means of an higher-order type th Proceedings of the Third Workshop on Discourse in Machine Translation W17-48 - BonnieWebber - AndreiPopescu-Belis - JörgTiedemann + BonnieWebber + AndreiPopescu-Belis + JörgTiedemann 10.18653/v1/W17-48 Association for Computational Linguistics
Copenhagen, Denmark
@@ -9262,7 +9262,7 @@ is able to handle phenomena related to scope by means of an higher-order type th Findings of the 2017 <fixed-case>D</fixed-case>isco<fixed-case>MT</fixed-case> Shared Task on Cross-lingual Pronoun Prediction SharidLoáiciga SaraStymne - PreslavNakov + PreslavNakov ChristianHardmeier JörgTiedemann MauroCettolo @@ -9276,7 +9276,7 @@ is able to handle phenomena related to scope by means of an higher-order type th
Validation of an Automatic Metric for the Accuracy of Pronoun Translation (<fixed-case>APT</fixed-case>) - LeslyMiculicich Werlen + LeslyMiculicich Werlen AndreiPopescu-Belis 17–25 W17-4802 @@ -9379,7 +9379,7 @@ is able to handle phenomena related to scope by means of an higher-order type th Translating Implicit Discourse Connectives Based on Cross-lingual Annotation and Alignment HongzhengLi - PhilippeLanglais + PhilippeLanglais YaohongJin 93–98 W17-4812 @@ -9398,7 +9398,7 @@ is able to handle phenomena related to scope by means of an higher-order type th On Integrating Discourse in Machine Translation - KarinSim Smith + KarinSim Smith 110–121 W17-4814 10.18653/v1/W17-4814 @@ -9437,8 +9437,8 @@ is able to handle phenomena related to scope by means of an higher-order type th Shakespearizing Modern Language Using Copy-Enriched Sequence to Sequence Models HarshJhamtani VarunGangal - EduardHovy - EricNyberg + EduardHovy + EricNyberg 10–19 W17-4902 10.18653/v1/W17-4902 @@ -9460,7 +9460,7 @@ is able to handle phenomena related to scope by means of an higher-order type th Harvesting Creative Templates for Generating Stylistically Varied Restaurant Reviews ShereenOraby SheidehHomayon - MarilynWalker + MarilynWalker 28–36 W17-4904 10.18653/v1/W17-4904 @@ -9470,7 +9470,7 @@ is able to handle phenomena related to scope by means of an higher-order type th Is writing style predictive of scientific fraud? ChloéBraud - AndersSøgaard + AndersSøgaard 37–42 W17-4905 10.18653/v1/W17-4905 @@ -9503,7 +9503,7 @@ is able to handle phenomena related to scope by means of an higher-order type th Topic and audience effects on distinctively <fixed-case>S</fixed-case>cottish vocabulary usage in <fixed-case>T</fixed-case>witter data PhilippaShoemark JamesKirby - SharonGoldwater + SharonGoldwater 59–68 W17-4908 10.18653/v1/W17-4908 @@ -9535,7 +9535,7 @@ is able to handle phenomena related to scope by means of an higher-order type th Stylistic Variation in Television Dialogue for Natural Language Generation GraceLin - MarilynWalker + MarilynWalker 85–93 W17-4911 10.18653/v1/W17-4911 @@ -9564,9 +9564,9 @@ is able to handle phenomena related to scope by means of an higher-order type th Assessing the Stylistic Properties of Neurally Generated Text in Authorship Attribution - EnriqueManjavacas + EnriqueManjavacas JeroenDe Gussem - WalterDaelemans + WalterDaelemans MikeKestemont 116–125 W17-4914 @@ -9579,7 +9579,7 @@ is able to handle phenomena related to scope by means of an higher-order type th Proceedings of the 12th Workshop on Innovative Use of NLP for Building Educational Applications W17-50 - JoelTetreault + JoelTetreault JillBurstein ClaudiaLeacock HelenYannakoudakis @@ -9596,7 +9596,7 @@ is able to handle phenomena related to scope by means of an higher-order type th Question Difficulty – How to Estimate Without Norming, How to Use for Automated Grading - UlrikePadó + UlrikePadó 1–10 W17-5001 10.18653/v1/W17-5001 @@ -9606,7 +9606,7 @@ is able to handle phenomena related to scope by means of an higher-order type th Combining <fixed-case>CNN</fixed-case>s and Pattern Matching for Question Interpretation in a Virtual Patient Dialogue System LifengJin - MichaelWhite + MichaelWhite EvanJaffe LauraZimmerman DouglasDanforth @@ -9653,7 +9653,7 @@ is able to handle phenomena related to scope by means of an higher-order type th Predicting Specificity in Classroom Discussion LucaLugini - DianeLitman + DianeLitman 52–61 W17-5006 10.18653/v1/W17-5006 @@ -9662,7 +9662,7 @@ is able to handle phenomena related to scope by means of an higher-order type th A Report on the 2017 Native Language Identification Shared Task - ShervinMalmasi + ShervinMalmasi KeelanEvanini AoifeCahill JoelTetreault @@ -9689,7 +9689,7 @@ is able to handle phenomena related to scope by means of an higher-order type th Predicting Audience’s Laughter During Presentations Using Convolutional Neural Network LeiChen - Chong MinLee + Chong MinLee 86–90 W17-5009 10.18653/v1/W17-5009 @@ -9722,7 +9722,7 @@ is able to handle phenomena related to scope by means of an higher-order type th An Investigation into the Pedagogical Features of Documents EmilySheng - PremNatarajan + PremNatarajan JonathanGordon GullyBurns 109–120 @@ -9734,8 +9734,8 @@ is able to handle phenomena related to scope by means of an higher-order type th Combining Multiple Corpora for Readability Assessment for People with Cognitive Disabilities VictoriaYaneva - ConstantinOrăsan - RichardEvans + ConstantinOrăsan + RichardEvans OmidRohanian 121–132 W17-5013 @@ -9757,7 +9757,7 @@ is able to handle phenomena related to scope by means of an higher-order type th Distractor Generation for <fixed-case>C</fixed-case>hinese Fill-in-the-blank Items ShuJiang - JohnLee + JohnLee 143–148 W17-5015 10.18653/v1/W17-5015 @@ -9768,7 +9768,7 @@ is able to handle phenomena related to scope by means of an higher-order type th An Error-Oriented Approach to Word Embedding Pre-Training YoumnaFarag MarekRei - TedBriscoe + TedBriscoe 149–158 W17-5016 10.18653/v1/W17-5016 @@ -9781,7 +9781,7 @@ is able to handle phenomena related to scope by means of an higher-order type th AndreaHorbach AoifeCahill TorstenZesch - Chong MinLee + Chong MinLee 159–168 W17-5017 10.18653/v1/W17-5017 @@ -9793,7 +9793,7 @@ is able to handle phenomena related to scope by means of an higher-order type th AnaïsTack ThomasFrançois SophieRoekhaut - CédrickFairon + CédrickFairon 169–179 W17-5018 10.18653/v1/W17-5018 @@ -9825,7 +9825,7 @@ is able to handle phenomena related to scope by means of an higher-order type th PavelIrcing JanŠvec ZbyněkZajíc - BarboraHladká + BarboraHladká MartinHolub 198–209 W17-5021 @@ -9871,9 +9871,9 @@ is able to handle phenomena related to scope by means of an higher-order type th Neural Networks and Spelling Features for Native Language Identification JohannesBjerva - GintarėGrigonytė + GintarėGrigonytė RobertÖstling - BarbaraPlank + BarbaraPlank 235–239 W17-5025 10.18653/v1/W17-5025 @@ -9925,10 +9925,10 @@ is able to handle phenomena related to scope by means of an higher-order type th Effects of Lexical Properties on Viewing Time per Word in Autistic and Neurotypical Readers - SanjaŠtajner + SanjaŠtajner VictoriaYaneva - RuslanMitkov - Simone PaoloPonzetto + RuslanMitkov + Simone PaoloPonzetto 271–281 W17-5030 10.18653/v1/W17-5030 @@ -9938,7 +9938,7 @@ is able to handle phenomena related to scope by means of an higher-order type th Transparent text quality assessment with convolutional neural networks RobertÖstling - GintareGrigonyte + GintareGrigonyte 282–286 W17-5031 10.18653/v1/W17-5031 @@ -9950,7 +9950,7 @@ is able to handle phenomena related to scope by means of an higher-order type th MarekRei MarianoFelice ZhengYuan - TedBriscoe + TedBriscoe 287–292 W17-5032 10.18653/v1/W17-5032 @@ -9970,7 +9970,7 @@ is able to handle phenomena related to scope by means of an higher-order type th Multiple Choice Question Generation Utilizing An Ontology KatherineStasaski - Marti A.Hearst + Marti A.Hearst 303–312 W17-5034 10.18653/v1/W17-5034 @@ -9991,7 +9991,7 @@ is able to handle phenomena related to scope by means of an higher-order type th Language Based Mapping of Science Assessment Items to Skills FarahNadeem - MariOstendorf + MariOstendorf 319–326 W17-5036 10.18653/v1/W17-5036 @@ -10012,7 +10012,7 @@ is able to handle phenomena related to scope by means of an higher-order type th Question Generation for Language Learning: From ensuring texts are read to supporting learning MariaChinkina - DetmarMeurers + DetmarMeurers 334–344 W17-5038 10.18653/v1/W17-5038 @@ -10043,7 +10043,7 @@ is able to handle phenomena related to scope by means of an higher-order type th Exploring Optimal Voting in Native Language Identification - CyrilGoutte + CyrilGoutte SergeLéger 367–373 W17-5041 @@ -10069,8 +10069,8 @@ is able to handle phenomena related to scope by means of an higher-order type th BoBlankers JohannesBjerva MalvinaNissim - Gertjanvan Noord - BarbaraPlank + Gertjanvan Noord + BarbaraPlank MartijnWieling 382–389 W17-5043 @@ -10091,8 +10091,8 @@ is able to handle phenomena related to scope by means of an higher-order type th Native Language Identification on Text and Speech MarcosZampieri - Alina MariaCiobanu - Liviu P.Dinu + Alina MariaCiobanu + Liviu P.Dinu 398–404 W17-5045 10.18653/v1/W17-5045 @@ -10102,7 +10102,7 @@ is able to handle phenomena related to scope by means of an higher-order type th Native Language Identification using Phonetic Algorithms ChareseSmiley - SandraKübler + SandraKübler 405–412 W17-5046 10.18653/v1/W17-5046 @@ -10112,7 +10112,7 @@ is able to handle phenomena related to scope by means of an higher-order type th A deep-learning based native-language classification by using a latent semantic analysis for the <fixed-case>NLI</fixed-case> Shared Task 2017 Yoo RheeOh - Hyung-BaeJeon + Hyung-BaeJeon Hwa JeonSong Yun-KyungLee Jeon-GuePark @@ -10125,8 +10125,8 @@ is able to handle phenomena related to scope by means of an higher-order type th Fusion of Simple Models for Native Language Identification - FabioKepler - RamonF. Astudillo + FabioKepler + RamonF. Astudillo AlbertoAbad 423–429 W17-5048 @@ -10147,7 +10147,7 @@ is able to handle phenomena related to scope by means of an higher-order type th Using Gaze to Predict Text Readability Ana ValeriaGonzález-Garduño - AndersSøgaard + AndersSøgaard 438–443 W17-5050 10.18653/v1/W17-5050 @@ -10185,14 +10185,14 @@ is able to handle phenomena related to scope by means of an higher-order type th W17-51 IvanHabernal IrynaGurevych - KevinAshley - ClaireCardie - NancyGreen - DianeLitman + KevinAshley + ClaireCardie + NancyGreen + DianeLitman GeorgiosPetasis - ChrisReed + ChrisReed NoamSlonim - VernWalker + VernWalker 10.18653/v1/W17-51 Association for Computational Linguistics
Copenhagen, Denmark
@@ -10222,7 +10222,7 @@ is able to handle phenomena related to scope by means of an higher-order type th ElenaMusi AlyssaHwang SmarandaMuresan - KathyMcKeown + KathyMcKeown 11–21 W17-5102 10.18653/v1/W17-5102 @@ -10266,7 +10266,7 @@ is able to handle phenomena related to scope by means of an higher-order type th Building an Argument Search Engine for the Web HenningWachsmuth MartinPotthast - KhalidAl-Khatib + KhalidAl-Khatib YamenAjjour JanaPuschmann JianiQu @@ -10327,7 +10327,7 @@ is able to handle phenomena related to scope by means of an higher-order type th
Using Question-Answering Techniques to Implement a Knowledge-Driven Argument Mining Approach - PatrickSaint-Dizier + PatrickSaint-Dizier 85–90 W17-5111 10.18653/v1/W17-5111 @@ -10388,9 +10388,9 @@ is able to handle phenomena related to scope by means of an higher-order type th Proceedings of the 8th Workshop on Computational Approaches to Subjectivity, Sentiment and Social Media Analysis W17-52 - AlexandraBalahur - Saif M.Mohammad - Erikvan der Goot + AlexandraBalahur + Saif M.Mohammad + Erikvan der Goot 10.18653/v1/W17-52 Association for Computational Linguistics
Copenhagen, Denmark
@@ -10415,7 +10415,7 @@ is able to handle phenomena related to scope by means of an higher-order type th Assessing State-of-the-Art Sentiment Models on State-of-the-Art Sentiment Datasets JeremyBarnes RomanKlinger - SabineSchulte im Walde + SabineSchulte im Walde 2–12 W17-5202 10.18653/v1/W17-5202 @@ -10433,7 +10433,7 @@ is able to handle phenomena related to scope by means of an higher-order type th HendrikSchuff JeremyBarnes JulianMohme - SebastianPadó + SebastianPadó RomanKlinger 13–23 W17-5203 @@ -10446,7 +10446,7 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me
Ranking Right-Wing Extremist Social Media Profiles by Similarity to Democratic and Extremist Groups - MatthiasHartung + MatthiasHartung RomanKlinger FranziskaSchmidtke LarsVogel @@ -10502,7 +10502,7 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me Towards Syntactic <fixed-case>I</fixed-case>berian Polarity Classification DavidVilares MarcosGarcia - Miguel A.Alonso + Miguel A.Alonso CarlosGómez-Rodríguez 67–73 W17-5209 @@ -10523,9 +10523,9 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me Linguistic Reflexes of Well-Being and Happiness in Echo JiaqiWu - MarilynWalker - PranavAnand - SteveWhittaker + MarilynWalker + PranavAnand + SteveWhittaker 81–91 W17-5211 10.18653/v1/W17-5211 @@ -10595,11 +10595,11 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me Towards an integrated pipeline for aspect-based sentiment analysis in various domains - OrphéeDe Clercq + OrphéeDe Clercq ElsLefever GillesJacobs TijlCarpels - VéroniqueHoste + VéroniqueHoste 136–142 W17-5218 10.18653/v1/W17-5218 @@ -10621,7 +10621,7 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me Lexicon Integrated <fixed-case>CNN</fixed-case> Models with Attention for Sentiment Analysis BonggunShin TimothyLee - Jinho D.Choi + Jinho D.Choi 149–158 W17-5220 10.18653/v1/W17-5220 @@ -10665,7 +10665,7 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me Unsupervised Aspect Term Extraction with <fixed-case>B</fixed-case>-<fixed-case>LSTM</fixed-case> & <fixed-case>CRF</fixed-case> using Automatically Labelled Datasets AthanasiosGiannakopoulos - ClaudiuMusat + ClaudiuMusat AndreeaHossmann MichaelBaeriswyl 180–188 @@ -10677,7 +10677,7 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me <fixed-case>PLN</fixed-case>-<fixed-case>PUCRS</fixed-case> at <fixed-case>E</fixed-case>mo<fixed-case>I</fixed-case>nt-2017: Psycholinguistic features for emotion intensity prediction in tweets HenriqueSantos - RenataVieira + RenataVieira 189–192 W17-5225 10.18653/v1/W17-5225 @@ -10724,8 +10724,8 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me Md ShadAkhtar PalaashSawant AsifEkbal - JyotiPawar - PushpakBhattacharyya + JyotiPawar + PushpakBhattacharyya 212–218 W17-5229 10.18653/v1/W17-5229 @@ -10735,7 +10735,7 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me <fixed-case>NSE</fixed-case>mo at <fixed-case>E</fixed-case>mo<fixed-case>I</fixed-case>nt-2017: An Ensemble to Predict Emotion Intensity in Tweets SreekanthMadisetty - Maunendra SankarDesarkar + Maunendra SankarDesarkar 219–224 W17-5230 10.18653/v1/W17-5230 @@ -10744,7 +10744,7 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me <fixed-case>T</fixed-case>ecnolengua <fixed-case>L</fixed-case>ingmotif at <fixed-case>E</fixed-case>mo<fixed-case>I</fixed-case>nt-2017: A lexicon-based approach - AntonioMoreno-Ortiz + AntonioMoreno-Ortiz 225–232 W17-5231 10.18653/v1/W17-5231 @@ -10764,9 +10764,9 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me <fixed-case>YZU</fixed-case>-<fixed-case>NLP</fixed-case> at <fixed-case>E</fixed-case>mo<fixed-case>I</fixed-case>nt-2017: Determining Emotion Intensity Using a Bi-directional <fixed-case>LSTM</fixed-case>-<fixed-case>CNN</fixed-case> Model YuanyeHe - Liang-ChihYu + Liang-ChihYu K. RobertLai - WeiyiLiu + WeiyiLiu 238–242 W17-5233 10.18653/v1/W17-5233 @@ -10796,7 +10796,7 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me <fixed-case>LIPN</fixed-case>-<fixed-case>UAM</fixed-case> at <fixed-case>E</fixed-case>mo<fixed-case>I</fixed-case>nt-2017:Combination of Lexicon-based features and Sentence-level Vector Representations for Emotion Intensity Determination DavideBuscaldi - BelemPriego + BelemPriego 255–258 W17-5236 10.18653/v1/W17-5236 @@ -10821,13 +10821,13 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me Proceedings of the 2nd Workshop on Evaluating Vector Space Representations for NLP W17-53 - SamuelBowman + SamuelBowman YoavGoldberg FelixHill AngelikiLazaridou OmerLevy RoiReichart - AndersSøgaard + AndersSøgaard 10.18653/v1/W17-53 Association for Computational Linguistics
Copenhagen, Denmark
@@ -10877,7 +10877,7 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me Evaluation of word embeddings against cognitive processes: primed reaction times in lexical decision and naming tasks JeremyAuguste ArnaudRey - BenoitFavre + BenoitFavre 21–26 W17-5304 10.18653/v1/W17-5304 @@ -10896,7 +10896,7 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me
Recognizing Textual Entailment in <fixed-case>T</fixed-case>witter Using Word Embeddings - Octavia-MariaŞulea + Octavia-MariaŞulea 31–35 W17-5306 10.18653/v1/W17-5306 @@ -10910,7 +10910,7 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me Zhen-HuaLing SiWei HuiJiang - DianaInkpen + DianaInkpen 36–40 W17-5307 10.18653/v1/W17-5307 @@ -10930,8 +10930,8 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me Character-level Intra Attention Network for Natural Language Inference HanYang - Marta R.Costa-jussà - José A. R.Fonollosa + Marta R.Costa-jussà + José A. R.Fonollosa 46–50 W17-5309 10.18653/v1/W17-5309 @@ -10956,7 +10956,7 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me Thuong-HaiPham XiaoyuBai MarcTanti - Lonnekevan der Plas + Lonnekevan der Plas AlbertGatt 56–60 W17-5311 @@ -10969,8 +10969,8 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me Proceedings of the First Workshop on Building Linguistically Generalizable NLP Systems W17-54 - EmilyBender - HalDaumé III + EmilyBender + HalDaumé III AllysonEttinger SudhaRao 10.18653/v1/W17-54 @@ -11013,7 +11013,7 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me Massively Multilingual Neural Grapheme-to-Phoneme Conversion BenPeters JonDehdari - Josefvan Genabith + Josefvan Genabith 19–26 W17-5403 10.18653/v1/W17-5403 @@ -11023,8 +11023,8 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me <fixed-case>BIBI</fixed-case> System Description: Building with <fixed-case>CNN</fixed-case>s and Breaking with Deep Reinforcement Learning YitongLi - TrevorCohn - TimothyBaldwin + TrevorCohn + TimothyBaldwin 27–32 W17-5404 10.18653/v1/W17-5404 @@ -11036,11 +11036,11 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me TaylorMahler WillyCheung MichaElsner - DavidKing - Marie-Catherinede Marneffe + DavidKing + Marie-Catherinede Marneffe CoryShain SymonStevens-Guille - MichaelWhite + MichaelWhite 33–39 W17-5405 10.18653/v1/W17-5405 @@ -11051,9 +11051,9 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me An Adaptable Lexical Simplification Architecture for Major <fixed-case>I</fixed-case>bero-<fixed-case>R</fixed-case>omance Languages - DanielFerrés + DanielFerrés HoracioSaggion - XavierGómez Guinovart + XavierGómez Guinovart 40–47 W17-5406 10.18653/v1/W17-5406 @@ -11063,7 +11063,7 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me Cross-genre Document Retrieval: Matching between Conversational and Formal Writings TomaszJurczyk - Jinho D.Choi + Jinho D.Choi 48–53 W17-5407 10.18653/v1/W17-5407 @@ -11104,7 +11104,7 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me Proceedings of the 18th Annual SIGdial Meeting on Discourse and Dialogue W17-55 - KristiinaJokinen + KristiinaJokinen ManfredStede DavidDeVault AnnieLouis @@ -11132,7 +11132,7 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me Towards Full Text Shallow Discourse Relation Annotation: Experiments with Cross-Paragraph Implicit Relations in the <fixed-case>PDTB</fixed-case> RashmiPrasad - KatherineForbes Riley + KatherineForbes Riley AlanLee 7–16 W17-5502 @@ -11179,7 +11179,7 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me MihailEric LakshmiKrishnan FrancoisCharette - Christopher D.Manning + Christopher D.Manning 37–49 W17-5506 10.18653/v1/W17-5506 @@ -11219,11 +11219,11 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me PawełBudzianowski IñigoCasanueva NikolaMrkšić - Lina M.Rojas-Barahona + Lina M.Rojas-Barahona Pei-HaoSu Tsung-HsienWen - MilicaGašić - SteveYoung + MilicaGašić + SteveYoung 65–70 W17-5509 10.18653/v1/W17-5509 @@ -11243,7 +11243,7 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me Demonstration of interactive teaching for end-to-end dialog control with hybrid code networks - Jason D.Williams + Jason D.Williams LarsLiden 82–85 W17-5511 @@ -11259,8 +11259,8 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me NikolaMrkšić Tsung-HsienWen IñigoCasanueva - Lina M.Rojas-Barahona - MilicaGašić + Lina M.Rojas-Barahona + MilicaGašić 86–92 W17-5512 10.18653/v1/W17-5512 @@ -11281,8 +11281,8 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me Sequential Dialogue Context Modeling for Spoken Language Understanding AnkurBapna - GokhanTür - DilekHakkani-Tür + GokhanTür + DilekHakkani-Tür LarryHeck 103–114 W17-5514 @@ -11331,8 +11331,8 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me Pei-HaoSu PawełBudzianowski StefanUltes - MilicaGašić - SteveYoung + MilicaGašić + SteveYoung 147–157 W17-5518 10.18653/v1/W17-5518 @@ -11368,11 +11368,11 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me EdwardCai AllenLu EliPincus - DavidTraum + DavidTraum StefanUltes - Lina M.Rojas-Barahona - MilicaGasic - SteveYoung + Lina M.Rojas-Barahona + MilicaGasic + SteveYoung MaxineEskenazi 170–173 W17-5521 @@ -11395,7 +11395,7 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me The Role of Conversation Context for Sarcasm Detection in Online Interactions DebanjanGhosh - AlexanderRichard Fabbri + AlexanderRichard Fabbri SmarandaMuresan 186–196 W17-5523 @@ -11429,7 +11429,7 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me <fixed-case>F</fixed-case>rames: a corpus for adding memory to goal-oriented dialogue systems LaylaEl Asri HannesSchulz - ShikharSharma + ShikharSharma JeremieZumer JustinHarris EmeryFine @@ -11455,7 +11455,7 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me Neural-based Natural Language Generation in Dialogue using <fixed-case>RNN</fixed-case> Encoder-Decoder with Semantic Aggregation Van-KhanhTran - Le-MinhNguyen + Le-MinhNguyen SatoshiTojo 231–240 W17-5528 @@ -11465,8 +11465,8 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me Beyond On-hold Messages: Conversational Time-buying in Task-oriented Dialogue - SoledadLópez Gambino - SinaZarrieß + SoledadLópez Gambino + SinaZarrieß DavidSchlangen 241–246 W17-5529 @@ -11487,7 +11487,7 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me Predicting Success in Goal-Driven Human-Human Dialogues MichaelNoseworthy - Jackie Chi KitCheung + Jackie Chi KitCheung JoellePineau 253–262 W17-5531 @@ -11500,7 +11500,7 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me JordonJohnson VadenMasrani GiuseppeCarenini - RaymondNg + RaymondNg 263–272 W17-5532 10.18653/v1/W17-5532 @@ -11520,7 +11520,7 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me Adversarial evaluation for open-domain dialogue generation EliaBruni - RaquelFernández + RaquelFernández 284–288 W17-5534 10.18653/v1/W17-5534 @@ -11531,7 +11531,7 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me Exploring Joint Neural Model for Sentence Level Discourse Parsing and Sentiment Analysis BitaNejat GiuseppeCarenini - RaymondNg + RaymondNg 289–298 W17-5535 10.18653/v1/W17-5535 @@ -11554,8 +11554,8 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me ShereenOraby VrindavanHarrison AmitaMisra - EllenRiloff - MarilynWalker + EllenRiloff + MarilynWalker 310–319 W17-5537 10.18653/v1/W17-5537 @@ -11567,8 +11567,8 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me Finding Structure in Figurative Language: Metaphor Detection with Topic-based Frames HyejuJang KeithMaki - EduardHovy - CarolynRosé + EduardHovy + CarolynRosé 320–330 W17-5538 10.18653/v1/W17-5538 @@ -11577,7 +11577,7 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me Using Reinforcement Learning to Model Incrementality in a Fast-Paced Dialogue Game - RameshManuvinakurike + RameshManuvinakurike DavidDeVault KallirroiGeorgila 331–341 @@ -11589,7 +11589,7 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me Inferring Narrative Causality between Event Pairs in Films ZhichaoHu - MarilynWalker + MarilynWalker 342–351 W17-5540 10.18653/v1/W17-5540 @@ -11623,8 +11623,8 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me ElaheRahimtoroghi JiaqiWu RuiminWang - PranavAnand - MarilynWalker + PranavAnand + MarilynWalker 360–369 W17-5543 10.18653/v1/W17-5543 @@ -11655,7 +11655,7 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me MatthiasGrabmair GrahamNeubig JonathanFrancis - EricNyberg + EricNyberg 374–383 W17-5545 10.18653/v1/W17-5545 @@ -11701,10 +11701,10 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me Building a Better Bitext for Structurally Different Languages through Self-training JungyeulPark - LoïcDugast + LoïcDugast Jeen-PyoHong Chang-UkShin - Jeong-WonCha + Jeong-WonCha 1–10 W17-5601 We propose a novel method to bootstrap the construction of parallel corpora for new pairs of structurally different languages. We do so by combining the use of a pivot language and self-training. A pivot language enables the use of existing translation models to bootstrap the alignment and a self-training procedure enables to achieve better alignment, both at the document and sentence level. We also propose several evaluation methods for the resulting alignment. @@ -11807,7 +11807,7 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me <fixed-case>J</fixed-case>apanese to <fixed-case>E</fixed-case>nglish/<fixed-case>C</fixed-case>hinese/<fixed-case>K</fixed-case>orean Datasets for Translation Quality Estimation and Automatic Post-Editing AtsushiFujita - EiichiroSumita + EiichiroSumita 79–88 W17-5705 Aiming at facilitating the research on quality estimation (QE) and automatic post-editing (APE) of machine translation (MT) outputs, especially for those among Asian languages, we have created new datasets for Japanese to English, Chinese, and Korean translations. As the source text, actual utterances in Japanese were extracted from the log data of our speech translation service. MT outputs were then given by phrase-based statistical MT systems. Finally, human evaluators were employed to grade the quality of MT outputs and to post-edit them. This paper describes the characteristics of the created datasets and reports on our benchmarking experiments on word-level QE, sentence-level QE, and APE conducted using the created datasets. @@ -11871,7 +11871,7 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me Ensemble and Reranking: Using Multiple Models in the <fixed-case>NICT</fixed-case>-2 Neural Machine Translation System at <fixed-case>WAT</fixed-case>2017 KenjiImamura - EiichiroSumita + EiichiroSumita 127–134 W17-5711 In this paper, we describe the NICT-2 neural machine translation system evaluated at WAT2017. This system uses multiple models as an ensemble and combines models with opposite decoding directions by reranking (called bi-directional reranking). In our experimental results on small data sets, the translation quality improved when the number of models was increased to 32 in total and did not saturate. In the experiments on large data sets, improvements of 1.59-3.32 BLEU points were achieved when six-model ensembles were combined by the bi-directional reranking. @@ -11883,7 +11883,7 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me KatsuhitoSudoh SatoshiNakamura MasaoUtiyama - EiichiroSumita + EiichiroSumita 135–139 W17-5712 This paper describes the details about the NAIST-NICT machine translation system for WAT2017 English-Japanese Scientific Paper Translation Task. The system consists of a language-independent tokenizer and an attentional encoder-decoder style neural machine translation model. According to the official results, our system achieves higher translation accuracy than any systems submitted previous campaigns despite simple model architecture. @@ -11901,7 +11901,7 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me <fixed-case>K</fixed-case>yoto <fixed-case>U</fixed-case>niversity Participation to <fixed-case>WAT</fixed-case> 2017 - FabienCromieres + FabienCromieres RajDabre ToshiakiNakazawa SadaoKurohashi @@ -11913,8 +11913,8 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me <fixed-case>CUNI</fixed-case> <fixed-case>NMT</fixed-case> System for <fixed-case>WAT</fixed-case> 2017 Translation Tasks TomKocmi - DušanVariš - OndřejBojar + DušanVariš + OndřejBojar 154–159 W17-5715 The paper presents this year’s CUNI submissions to the WAT 2017 Translation Task focusing on the Japanese-English translation, namely Scientific papers subtask, Patents subtask and Newswire subtask. We compare two neural network architectures, the standard sequence-to-sequence with attention (Seq2Seq) and an architecture using convolutional sentence encoder (FBConv2Seq), both implemented in the NMT framework Neural Monkey that we currently participate in developing. We also compare various types of preprocessing of the source Japanese sentences and their impact on the overall results. Furthermore, we include the results of our experiments with out-of-domain data obtained by combining the corpora provided for each subtask. @@ -11934,7 +11934,7 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me SandhyaSingh RiteshPanjwani AnoopKunchukuttan - PushpakBhattacharyya + PushpakBhattacharyya 167–170 W17-5717 In this paper, we empirically compare the two encoder-decoder neural machine translation architectures: convolutional sequence to sequence model (ConvS2S) and recurrent sequence to sequence model (RNNS2S) for English-Hindi language pair as part of IIT Bombay’s submission to WAT2017 shared task. We report the results for both English-Hindi and Hindi-English direction of language pair. @@ -12001,7 +12001,7 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me Hong-JieDai Yung-ChunChang JitendraJonnagaddala - Wen-LianHsu + Wen-LianHsu 26–32 W17-5804 The increasing popularity of social media lead users to share enormous information on the internet. This information has various application like, it can be used to develop models to understand or predict user behavior on social media platforms. For example, few online retailers have studied the shopping patterns to predict shopper’s pregnancy stage. Another interesting application is to use the social media platforms to analyze users’ health-related information. In this study, we developed a tree kernel-based model to classify tweets conveying pregnancy related information using this corpus. The developed pregnancy classification model achieved an accuracy of 0.847 and an F-score of 0.565. A new corpus from popular social media platform Twitter was developed for the purpose of this study. In future, we would like to improve this corpus by reducing noise such as retweets. @@ -12041,7 +12041,7 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me JuaeKim SunjaeKwon YoungjoongKo - JungyunSeo + JungyunSeo 47–51 W17-5807 Biomedical Named Entity (NE) recognition is a core technique for various works in the biomedical domain. In previous studies, using machine learning algorithm shows better performance than dictionary-based and rule-based approaches because there are too many terminological variations of biomedical NEs and new biomedical NEs are constantly generated. To achieve the high performance with a machine-learning algorithm, good-quality corpora are required. However, it is difficult to obtain the good-quality corpora because an-notating a biomedical corpus for ma-chine-learning is extremely time-consuming and costly. In addition, most previous corpora are insufficient for high-level tasks because they cannot cover various domains. Therefore, we propose a method for generating a large amount of machine-labeled data that covers various domains. To generate a large amount of machine-labeled data, firstly we generate an initial machine-labeled data by using a chunker and MetaMap. The chunker is developed to extract only biomedical NEs with manually annotated data. MetaMap is used to annotate the category of bio-medical NE. Then we apply the self-training approach to bootstrap the performance of initial machine-labeled data. In our experiments, the biomedical NE recognition system that is trained with our proposed machine-labeled data achieves much high performance. As a result, our system outperforms biomedical NE recognition system that using MetaMap only with 26.03%p improvements on F1-score. @@ -12051,7 +12051,7 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me Enhancing Drug-Drug Interaction Classification with Corpus-level Feature and Classifier Ensemble Jing CyunTu Po-TingLai - Richard Tzong-HanTsai + Richard Tzong-HanTsai 52–56 W17-5808 The study of drug-drug interaction (DDI) is important in the drug discovering. Both PubMed and DrugBank are rich resources to retrieve DDI information which is usually represented in plain text. Automatically extracting DDI pairs from text improves the quality of drug discov-ering. In this paper, we presented a study that focuses on the DDI classification. We normalized the drug names, and developed both sentence-level and corpus-level features for DDI classification. A classifier ensemble approach is used for the unbalance DDI labels problem. Our approach achieved an F-score of 65.4% on SemEval 2013 DDI test set. The experimental results also show the effects of proposed corpus-level features in the DDI task. @@ -12061,7 +12061,7 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me Chemical-Induced Disease Detection Using Invariance-based Pattern Learning Model NehaWarikoo Yung-ChunChang - Wen-LianHsu + Wen-LianHsu 57–64 W17-5809 In this work, we introduce a novel feature engineering approach named “algebraic invariance” to identify discriminative patterns for learning relation pair features for the chemical-disease relation (CDR) task of BioCreative V. Our method exploits the existing structural similarity of the key concepts of relation descriptions from the CDR corpus to generate robust linguistic patterns for SVM tree kernel-based learning. Preprocessing of the training data classifies the entity pairs as either related or unrelated to build instance types for both inter-sentential and intra-sentential scenarios. An invariant function is proposed to process and optimally cluster similar patterns for both positive and negative instances. The learning model for CDR pairs is based on the SVM tree kernel approach, which generates feature trees and vectors and is modeled on suitable invariance based patterns, bringing brevity, precision and context to the identifier features. Results demonstrate that our method outperformed other compared approaches, achieved a high recall rate of 85.08%, and averaged an F1-score of 54.34% without the use of any additional knowledge bases. @@ -12072,10 +12072,10 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me Proceedings of the 4th Workshop on Natural Language Processing Techniques for Educational Applications (NLPTEA 2017) W17-59 - Yuen-HsienTseng + Yuen-HsienTseng Hsin-HsiChen Lung-HaoLee - Liang-ChihYu + Liang-ChihYu Asian Federation of Natural Language Processing
Taipei, Taiwan
December @@ -12104,7 +12104,7 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me Understanding Non-Native Writings: Can a Parser Help? JirkaHana - BarboraHladká + BarboraHladká 12–16 W17-5902 We present a pilot study on parsing non-native texts written by learners of Czech. We performed experiments that have shown that at least high-level syntactic functions, like subject, predicate, and object, can be assigned based on a parser trained on standard native language. @@ -12113,7 +12113,7 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me <fixed-case>C</fixed-case>arrier Sentence Selection for Fill-in-the-blank Items ShuJiang - JohnLee + JohnLee 17–22 W17-5903 Fill-in-the-blank items are a common form of exercise in computer-assisted language learning systems. To automatically generate an effective item, the system must be able to select a high-quality carrier sentence that illustrates the usage of the target word. Previous approaches for carrier sentence selection have considered sentence length, vocabulary difficulty, the position of the target word and the presence of finite verbs. This paper investigates the utility of word co-occurrence statistics and lexical similarity as selection criteria. In an evaluation on generating fill-in-the-blank items for learning Chinese as a foreign language, we show that these two criteria can improve carrier sentence quality. @@ -12125,8 +12125,8 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me SandhyaSingh MeenakshiSomasundaram DharaGorasia - MalharKulkarni - PushpakBhattacharyya + MalharKulkarni + PushpakBhattacharyya 23–28 W17-5904 In today’s technology driven digital era, education domain is undergoing a transformation from traditional approaches to more learner controlled and flexible methods of learning. This transformation has opened the new avenues for interdisciplinary research in the field of educational technology and natural language processing in developing quality digital aids for learning and teaching. The tool presented here - Hindi Shabhadamitra, developed using Hindi Wordnet for Hindi language learning, is one such e-learning tool. It has been developed as a teaching and learning aid suitable for formal school based curriculum and informal setup for self learning users. Besides vocabulary, it also provides word based grammar along with images and pronunciation for better learning and retention. This aid demonstrates that how a rich lexical resource like wordnet can be systematically remodeled for practical usage in the educational domain. @@ -12137,9 +12137,9 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me GabrielFung MaximeDebosschere DingminWang - BoLi + BoLi JiaZhu - Kam-FaiWong + Kam-FaiWong 29–34 W17-5905 This paper provides an overview along with our findings of the Chinese Spelling Check shared task at NLPTEA 2017. The goal of this task is to develop a computer-assisted system to automatically diagnose typing errors in traditional Chinese sentences written by students. We defined six types of errors which belong to two categories. Given a sentence, the system should detect where the errors are, and for each detected error determine its type and provide correction suggestions. We designed, constructed, and released a benchmark dataset for this task. @@ -12191,8 +12191,8 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me Complex Word Identification: Challenges in Data Annotation and System Performance MarcosZampieri - ShervinMalmasi - GustavoPaetzold + ShervinMalmasi + GustavoPaetzold LuciaSpecia 59–63 W17-5910 @@ -12215,7 +12215,7 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me HarsimranBedi SangameshwarPatil SwapnilHingmire - GirishPalshikar + GirishPalshikar 69–77 W17-5912 Event timeline serves as the basic structure of history, and it is used as a disposition of key phenomena in studying history as a subject in secondary school. In order to enable a student to understand a historical phenomenon as a series of connected events, we present a system for automatic event timeline generation from history textbooks. Additionally, we propose Message Sequence Chart (MSC) and time-map based visualization techniques to visualize an event timeline. We also identify key computational challenges in developing natural language processing based applications for history textbooks. @@ -12289,7 +12289,7 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me GuopingHuang JiajunZhang YuZhou - ChengqingZong + ChengqingZong 37–45 W17-6005 Terms extensively exist in specific domains, and term translation plays a critical role in domain-specific machine translation (MT) tasks. However, it’s a challenging task to translate them correctly for the huge number of pre-existing terms and the endless new terms. To achieve better term translation quality, it is necessary to inject external term knowledge into the underlying MT system. Fortunately, there are plenty of term translation knowledge in parenthetical sentences on the Internet. In this paper, we propose a simple, straightforward and effective framework to improve term translation by learning from parenthetical sentences. This framework includes: (1) a focused web crawler; (2) a parenthetical sentence filter, acquiring parenthetical sentences including bilingual term pairs; (3) a term translation knowledge extractor, extracting bilingual term translation candidates; (4) a probability learner, generating the term translation table for MT decoders. The extensive experiments demonstrate that our proposed framework significantly improves the translation quality of terms and sentences. @@ -12339,14 +12339,14 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me Reflexives and Reciprocals in Synchronous <fixed-case>T</fixed-case>ree <fixed-case>A</fixed-case>djoining <fixed-case>G</fixed-case>rammar CristinaAggazzotti - Stuart M.Shieber + Stuart M.Shieber 31–42 W17-6204 aggazzotti-shieber-2017-reflexives Coordination in <fixed-case>TAG</fixed-case> without the Conjoin Operation - Chung-hyeHan + Chung-hyeHan AnoopSarkar 43–52 W17-6205 @@ -12354,7 +12354,7 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me Scope, Time, and Predicate Restriction in <fixed-case>B</fixed-case>lackfoot using <fixed-case>MC</fixed-case>-<fixed-case>STAG</fixed-case> - Dennis RyanStoroshenko + Dennis RyanStoroshenko 53–60 W17-6206 storoshenko-2017-scope @@ -12369,7 +12369,7 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me Parsing with Dynamic Continuized <fixed-case>CCG</fixed-case> - MichaelWhite + MichaelWhite SimonCharlow JordanNeedle DylanBumford @@ -12379,7 +12379,7 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me Multiword Expression-Aware <fixed-case>A</fixed-case>* <fixed-case>TAG</fixed-case> Parsing Revisited - JakubWaszczuk + JakubWaszczuk AgataSavary YannickParmentier 84–93 @@ -12406,7 +12406,7 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me Transforming Dependency Structures to <fixed-case>LTAG</fixed-case> Derivation Trees CaioCorro - JosephLe Roux + JosephLe Roux 112–121 W17-6212 corro-le-roux-2017-transforming @@ -12415,10 +12415,10 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me Linguistically Rich Vector Representations of Supertags for <fixed-case>TAG</fixed-case> Parsing DanFriedman JungoKasai - R. ThomasMcCoy + R. ThomasMcCoy RobertFrank ForrestDavis - OwenRambow + OwenRambow 122–131 W17-6213 friedman-etal-2017-linguistically @@ -12428,7 +12428,7 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me PauliXu RobertFrank JungoKasai - OwenRambow + OwenRambow 132–141 W17-6214 xu-etal-2017-tag @@ -12472,8 +12472,8 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me Lexicalized vs. Delexicalized Parsing in Low-Resource Scenarios - AgnieszkaFalenska - ÖzlemÇetinoğlu + AgnieszkaFalenska + ÖzlemÇetinoğlu 18–24 W17-6303 We present a systematic analysis of lexicalized vs. delexicalized parsing in low-resource scenarios, and propose a methodology to choose one method over another under certain conditions. We create a set of simulation experiments on 41 languages and apply our findings to 9 low-resource languages. Experimental results show that our methodology chooses the best approach in 8 out of 9 cases. @@ -12481,8 +12481,8 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me Improving neural tagging with lexical information - BenoîtSagot - HéctorMartínez Alonso + BenoîtSagot + HéctorMartínez Alonso 25–31 W17-6304 Neural part-of-speech tagging has achieved competitive results with the incorporation of character-based and pre-trained word embeddings. In this paper, we show that a state-of-the-art bi-LSTM tagger can benefit from using information from morphosyntactic lexicons as additional input. The tagger, trained on several dozen languages, shows a consistent, average improvement when using lexical information, even when also using character-based embeddings, thus showing the complementarity of the different sources of lexical information. The improvements are particularly important for the smaller datasets. @@ -12490,7 +12490,7 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me Prepositional Phrase Attachment over Word Embedding Products - Pranava SwaroopMadhyastha + Pranava SwaroopMadhyastha XavierCarreras AriadnaQuattoni 32–43 @@ -12500,7 +12500,7 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me <fixed-case>L</fixed-case>1-<fixed-case>L</fixed-case>2 Parallel Dependency Treebank as Learner Corpus - JohnLee + JohnLee KeyingLi HermanLeung 44–49 @@ -12510,7 +12510,7 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me Splitting Complex <fixed-case>E</fixed-case>nglish Sentences - JohnLee + JohnLee J. Buddhika K. PathirageDon 50–55 W17-6307 @@ -12529,9 +12529,9 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me Leveraging Newswire Treebanks for Parsing Conversational Data with Argument Scrambling - Riyaz A.Bhat - IrshadBhat - DiptiSharma + Riyaz A.Bhat + IrshadBhat + DiptiSharma 61–66 W17-6309 We investigate the problem of parsing conversational data of morphologically-rich languages such as Hindi where argument scrambling occurs frequently. We evaluate a state-of-the-art non-linear transition-based parsing system on a new dataset containing 506 dependency trees for sentences from Bollywood (Hindi) movie scripts and Twitter posts of Hindi monolingual speakers. We show that a dependency parser trained on a newswire treebank is strongly biased towards the canonical structures and degrades when applied to conversational data. Inspired by Transformational Generative Grammar (Chomsky, 1965), we mitigate the sampling bias by generating all theoretically possible alternative word orders of a clause from the existing (kernel) structures in the treebank. Training our parser on canonical and transformed structures improves performance on conversational data by around 9% LAS over the baseline newswire parser. @@ -12539,7 +12539,7 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me Using hyperlinks to improve multilingual partial parsers - AndersSøgaard + AndersSøgaard 67–71 W17-6310 Syntactic annotation is costly and not available for the vast majority of the world’s languages. We show that sometimes we can do away with less labeled data by exploiting more readily available forms of mark-up. Specifically, we revisit an idea from Valentin Spitkovsky’s work (2010), namely that hyperlinks typically bracket syntactic constituents or chunks. We strengthen his results by showing that not only can hyperlinks help in low resource scenarios, exemplified here by Quechua, but learning from hyperlinks can also improve state-of-the-art NLP models for English newswire. We also present out-of-domain evaluation on English Ontonotes 4.0. @@ -12549,8 +12549,8 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me Correcting prepositional phrase attachments using multimodal corpora SebastienDelecraz AlexisNasr - FredericBechet - BenoitFavre + FredericBechet + BenoitFavre 72–77 W17-6311 PP-attachments are an important source of errors in parsing natural language. We propose in this article to use data coming from a multimodal corpus, combining textual, visual and conceptual information, as well as a correction strategy, to propose alternative attachments in the output of a parser. @@ -12569,7 +12569,7 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me Effective Online Reordering with Arc-Eager Transitions RyosukeKohita HiroshiNoji - YujiMatsumoto + YujiMatsumoto 88–98 W17-6313 We present a new transition system with word reordering for unrestricted non-projective dependency parsing. Our system is based on decomposed arc-eager rather than arc-standard, which allows more flexible ambiguity resolution between a local projective and non-local crossing attachment. In our experiment on Universal Dependencies 2.0, we find our parser outperforms the ordinary swap-based parser particularly on languages with a large amount of non-projectivity. @@ -12630,7 +12630,7 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me Proceedings of the Fourth International Conference on Dependency Linguistics (Depling 2017) W17-65 - SimonettaMontemagni + SimonettaMontemagni JoakimNivre Linköping University Electronic Press
Pisa, Italy
@@ -12651,7 +12651,7 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me
Syntax-Semantics Interface: A Plea for a Deep Dependency Sentence Structure - EvaHajičová + EvaHajičová 2–3 W17-6502 hajicova-2017-syntax @@ -12666,7 +12666,7 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me On the Predicate-Argument Structure: Internal and Absorbing Scope - IgorBoguslavsky + IgorBoguslavsky 15–24 W17-6504 boguslavsky-2017-predicate @@ -12690,10 +12690,10 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me Enhanced <fixed-case>UD</fixed-case> Dependencies with Neutralized Diathesis Alternation - MarieCandito + MarieCandito BrunoGuillaume GuyPerrier - DjaméSeddah + DjaméSeddah 42–53 W17-6507 candito-etal-2017-enhanced @@ -12708,8 +12708,8 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me A Dependency Treebank for <fixed-case>K</fixed-case>urmanji <fixed-case>K</fixed-case>urdish - MemduhGökırmak - Francis M.Tyers + MemduhGökırmak + Francis M.Tyers 64–72 W17-6509 gokirmak-tyers-2017-dependency @@ -12750,7 +12750,7 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me Assessing the Annotation Consistency of the <fixed-case>U</fixed-case>niversal <fixed-case>D</fixed-case>ependencies Corpora - Marie-Catherinede Marneffe + Marie-Catherinede Marneffe MatiasGrioni JennaKanerva FilipGinter @@ -12768,7 +12768,7 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me Dependency Structure of Binary Conjunctions(of the <fixed-case>IF</fixed-case>…, <fixed-case>THEN</fixed-case>… Type) - IgorMel’čuk + IgorMel’čuk 127–134 W17-6516 melcuk-2017-dependency @@ -12823,9 +12823,9 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me AlexandreRademaker FabricioChalub LivyReal - CláudiaFreitas - EckhardBick - Valeriade Paiva + CláudiaFreitas + EckhardBick + Valeriade Paiva 197–206 W17-6523 rademaker-etal-2017-universal @@ -12853,16 +12853,16 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me Annotating <fixed-case>I</fixed-case>talian Social Media Texts in <fixed-case>U</fixed-case>niversal <fixed-case>D</fixed-case>ependencies ManuelaSanguinetti CristinaBosco - AlessandroMazzei - AlbertoLavelli - FabioTamburini + AlessandroMazzei + AlbertoLavelli + FabioTamburini 229–239 W17-6526 sanguinetti-etal-2017-annotating <fixed-case>H</fixed-case>ungarian Copula Constructions in Dependency Syntax and Parsing - Katalin IlonaSimkó + Katalin IlonaSimkó VeronikaVincze 240–247 W17-6527 @@ -12870,7 +12870,7 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me Semgrex-Plus: a Tool for Automatic Dependency-Graph Rewriting - FabioTamburini + FabioTamburini 248–254 W17-6528 tamburini-2017-semgrex @@ -12878,7 +12878,7 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me Unity in Diversity: A Unified Parsing Strategy for Major <fixed-case>I</fixed-case>ndian Languages JuhiTandon - Dipti MisraSharma + Dipti MisraSharma 255–265 W17-6529 tandon-sharma-2017-unity @@ -12888,7 +12888,7 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me Tak-sumWong KimGerdes HermanLeung - JohnLee + JohnLee 266–275 W17-6530 wong-etal-2017-quantitative @@ -12904,7 +12904,7 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me Core Arguments in <fixed-case>U</fixed-case>niversal <fixed-case>D</fixed-case>ependencies - DanielZeman + DanielZeman 287–296 W17-6532 zeman-2017-core @@ -12914,7 +12914,7 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me Proceedings of the 11th Brazilian Symposium in Information and Human Language Technology W17-66 - Gustavo HenriquePaetzold + Gustavo HenriquePaetzold VládiaPinheiro Sociedade Brasileira de Computação
Uberlândia, Brazil
@@ -12940,7 +12940,7 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me Estudo exploratório de categorias gramaticais com potencial de indicadores para a Análise de Sentimentos (An Exploratory study of grammatical categories as potential indicators for Sentiment Analysis)[In <fixed-case>P</fixed-case>ortuguese] JúliaRodrigues AdrianaPagano - EmersonParaiso + EmersonParaiso 17-21 W17-6602 rodrigues-etal-2017-estudo @@ -12955,7 +12955,7 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me
A study on irony within the context of 7x1-<fixed-case>PT</fixed-case> corpus - SilviaMoraes + SilviaMoraes RackelMachado MatheusRedecker RafaelCadaval @@ -12977,7 +12977,7 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me Wheel of Life: an initial investigation. Topic-Related Polarity Visualization in Personal Stories HenriqueSantos - RenataVieira + RenataVieira GreicePinho JacksonPinheiro 37-41 @@ -13006,9 +13006,9 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me Processo de construção de um corpus anotado com Entidades Geológicas visando <fixed-case>REN</fixed-case> (Building an annotated corpus with geological entities for <fixed-case>NER</fixed-case>)[In <fixed-case>P</fixed-case>ortuguese] DanielaAmaral - SandraCollovini + SandraCollovini AnnyFigueira - RenataVieira + RenataVieira RenataVieira MarcoGonzalez 63-72 @@ -13064,11 +13064,11 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me <fixed-case>P</fixed-case>ortuguese Word Embeddings: Evaluating on Word Analogies and Natural Language Tasks NathanHartmann - ErickFonseca + ErickFonseca ChristopherShulby MarcosTreviso JéssicaSilva - SandraAluísio + SandraAluísio 122-131 W17-6615 hartmann-etal-2017-portuguese @@ -13085,7 +13085,7 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me Normalizador de Texto para Lingua Portuguesa baseado em Modelo de Linguagem (A Normalizer based on Language Model for Texts in <fixed-case>P</fixed-case>ortuguese)[In <fixed-case>P</fixed-case>ortuguese] PatrickBard Renan LopesLuis - SilviaMoraes + SilviaMoraes 142-150 W17-6617 bard-etal-2017-normalizador @@ -13094,7 +13094,7 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me Evaluating Word Embeddings for Sentence Boundary Detection in Speech Transcripts MarcosTreviso ChristopherShulby - SandraAluísio + SandraAluísio 151-160 W17-6618 treviso-etal-2017-evaluating @@ -13128,7 +13128,7 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me Constituição de Um Dicionário Eletrônico Trilíngue Fundado em Frames a partir da Extração Automática de Candidatos a Termos do Domínio do Turismo (The Constitution of a Trilingual Eletronic Dictionary Based on Frames from the Automatic Extraction of Candidate Terms of the Tourism Domain)[In <fixed-case>P</fixed-case>ortuguese] Simone RodriguesPeron-Corrêa - Tiago TimponiTorrent + Tiago TimponiTorrent 193-200 W17-6622 peron-correa-torrent-2017-constituicao @@ -13136,7 +13136,7 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me A Modelagem Computacional do Domínio dos Esportes na <fixed-case>F</fixed-case>rame<fixed-case>N</fixed-case>et Brasil (The Computational Modeling of the Sports Domain in <fixed-case>F</fixed-case>rame<fixed-case>N</fixed-case>et Brasil)[In <fixed-case>P</fixed-case>ortuguese] Alexandre DinizCosta - Tiago TimponiTorrent + Tiago TimponiTorrent 201-208 W17-6623 costa-torrent-2017-modelagem @@ -13144,8 +13144,8 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me Descrição e modelagem de construções interrogativas <fixed-case>QU</fixed-case>- em Português Brasileiro para o desenvolvimento de um chatbot (Description and modeling of interrogative constructs <fixed-case>QU</fixed-case>- in <fixed-case>B</fixed-case>razilian <fixed-case>P</fixed-case>ortuguese for the development of a chatbot)[In <fixed-case>P</fixed-case>ortuguese] Natália DuarteMarção - Tiago TimponiTorrent - Ely Edison da SilvaMatos + Tiago TimponiTorrent + Ely Edison da SilvaMatos 209-216 W17-6624 marcao-etal-2017-descricao @@ -13153,7 +13153,7 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me Construções de Estrutura Argumental no âmbito do Constructicon da <fixed-case>F</fixed-case>rame<fixed-case>N</fixed-case>et Brasil: proposta de uma modelagem linguístico-computacional (Structural Constructs of Arguments in the Context of the Construction of <fixed-case>F</fixed-case>rame<fixed-case>N</fixed-case>et Brasil: a proposal for a computational-linguistic modeling)[In <fixed-case>P</fixed-case>ortuguese] Vânia GomesAlmeida - Tiago TimponiTorrent + Tiago TimponiTorrent 217-223 W17-6625 almeida-torrent-2017-construcoes @@ -13168,7 +13168,7 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me Uma Proposta Metodológica para a Categorização Automatizada de Atrações Turísticas a partir de Comentários de Usuários em Plataformas Online (A Methodological Proposition for the Automatic Categorization of Touristic Attractions from User Comments in Online Platforms)[In <fixed-case>P</fixed-case>ortuguese] Vanessa Maria Ramos LopesPaiva - Tiago TimponiTorrent + Tiago TimponiTorrent 232-239 W17-6627 paiva-torrent-2017-uma @@ -13177,7 +13177,7 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me Sofrer uma ofensa, Receber uma advertência: Verbos-suporte Conversos de ‘Fazer’ no Português do Brasil (Suffering an offense, Receiving a citation: Supporting Vectors Converted from ‘To do’ in <fixed-case>B</fixed-case>razilian <fixed-case>P</fixed-case>ortuguese)[In <fixed-case>P</fixed-case>ortuguese] Claúdia D.Barros Nathalia P.Calcia - Oto A.Vale + Oto A.Vale 240-246 W17-6628 barros-etal-2017-sofrer @@ -13202,8 +13202,8 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me As bases de dados verbais <fixed-case>ADESSE</fixed-case> e <fixed-case>V</fixed-case>i<fixed-case>PE</fixed-case>r: uma análise constrastiva das construções locativas em espanhol e em português (The verbal databases <fixed-case>ADESSE</fixed-case> and <fixed-case>V</fixed-case>i<fixed-case>PE</fixed-case>r: a contrastive analysis of locative constructs in <fixed-case>S</fixed-case>panish and <fixed-case>P</fixed-case>ortuguese)[In <fixed-case>P</fixed-case>ortuguese] RoanaRodrigues - OtoVale - LauraAlonso Alemany + OtoVale + LauraAlonso Alemany 266-273 W17-6631 rodrigues-etal-2017-bases @@ -13230,7 +13230,7 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me Extracting word lists for domain-specific implicit opinions from corpora - Núria BertomeuCastelló + Núria BertomeuCastelló ManfredStede W17-6802 castello-stede-2017-extracting @@ -13247,7 +13247,7 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me Semantic Variation in Online Communities of Practice MarcoDel Tredici - RaquelFernández + RaquelFernández W17-6804 del-tredici-fernandez-2017-semantic @@ -13284,7 +13284,7 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me If Sentences Could See: Investigating Visual Information for Semantic Textual Similarity GoranGlavaš IvanVulić - Simone PaoloPonzetto + Simone PaoloPonzetto W17-6809 glavas-etal-2017-sentences @@ -13306,10 +13306,10 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me Extracting hypernym relations from <fixed-case>W</fixed-case>ikipedia disambiguation pages : comparing symbolic and machine learning approaches MounaKamel - CassiaTrojahn + CassiaTrojahn AdelGhamnia NathalieAussenac-Gilles - CécileFabre + CécileFabre W17-6812 kamel-etal-2017-extracting @@ -13328,7 +13328,7 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me AnnaDickinson NathanSchneider AnnieLouis - BonnieWebber + BonnieWebber W17-6814 rohde-etal-2017-exploring @@ -13372,7 +13372,7 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me <fixed-case>P</fixed-case>ropbank Annotation of <fixed-case>D</fixed-case>anish Noun Frames - EckhardBick + EckhardBick W17-6902 bick-2017-propbank @@ -13385,10 +13385,10 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me Living a discrete life in a continuous world: Reference in cross-modal entity tracking - GemmaBoleda - SebastianPadó - Nghia ThePham - MarcoBaroni + GemmaBoleda + SebastianPadó + Nghia ThePham + MarcoBaroni W17-6904 boleda-etal-2017-living @@ -13400,8 +13400,8 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me A Semantically-Based Computational Approach to Narrative Structure - RodolfoDelmonte - GiuliaMarchesini + RodolfoDelmonte + GiuliaMarchesini W17-6906 delmonte-marchesini-2017-semantically @@ -13417,7 +13417,7 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me Bigger does not mean better! We prefer specificity EmmanuelleDusserre - MuntsaPadró + MuntsaPadró W17-6908 dusserre-padro-2017-bigger @@ -13449,7 +13449,7 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me Incorporating visual features into word embeddings: A bimodal autoencoder-based approach MikaHasegawa TetsunoriKobayashi - YoshihikoHayashi + YoshihikoHayashi W17-6912 hasegawa-etal-2017-incorporating @@ -13471,13 +13471,13 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me Textual Inference: getting logic from humans Aikaterini-LidaKalouli LivyReal - Valeriade Paiva + Valeriade Paiva W17-6915 kalouli-etal-2017-textual Situating Word Senses in their Historical Context with Linked Data - FahadKhan + FahadKhan JackBowers FrancescaFrontini W17-6916 @@ -13508,14 +13508,14 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me RossBeveridge JaimeRuiz BruceDraper - JamesPustejovsky + JamesPustejovsky W17-6919 krishnaswamy-etal-2017-communicating Ambiguss, a game for building a Sense Annotated Corpus for <fixed-case>F</fixed-case>rench MathieuLafourcade - Nathalie LeBrun + Nathalie LeBrun W17-6920 lafourcade-brun-2017-ambiguss @@ -13529,9 +13529,9 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me Are doggies really nicer than dogs? The impact of morphological derivation on emotional valence in <fixed-case>G</fixed-case>erman GabriellaLapesa - SebastianPadó + SebastianPadó TillmannPross - AntjeRossdeutscher + AntjeRossdeutscher W17-6922 lapesa-etal-2017-doggies @@ -13567,7 +13567,7 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me Neural Disambiguation of Causal Lexical Markers Based on Context - EugenioMartínez-Cámara + EugenioMartínez-Cámara VeredShwartz IrynaGurevych IdoDagan @@ -13579,7 +13579,7 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me MariiaMelymuka GabriellaLapesa MaxKisselew - SebastianPadó + SebastianPadó W17-6928 melymuka-etal-2017-modeling @@ -13599,7 +13599,7 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me Distributional <fixed-case>L</fixed-case>esk: Effective Knowledge-Based Word Sense Disambiguation DiekeOele - Gertjanvan Noord + Gertjanvan Noord W17-6931 oele-van-noord-2017-distributional @@ -13613,7 +13613,7 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me There’s no ‘Count or Predict’ but task-based selection for distributional models MartinRiedl - ChrisBiemann + ChrisBiemann W17-6933 riedl-biemann-2017-theres @@ -13643,7 +13643,7 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me ShotaSasaki ShoTakase NaoyaInoue - NaoakiOkazaki + NaoakiOkazaki KentaroInui W17-6937 sasaki-etal-2017-handling @@ -13652,20 +13652,20 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me Vision and Language Integration: Moving beyond Objects RaviShekhar SandroPezzelle - AurélieHerbelot + AurélieHerbelot MoinNabi EnverSangineto - RaffaellaBernardi + RaffaellaBernardi W17-6938 shekhar-etal-2017-vision Can You See the (Linguistic) Difference? Exploring Mass/Count Distinction in Vision - David AddisonSmith + David AddisonSmith SandroPezzelle FrancescaFranzon ChiaraZanini - RaffaellaBernardi + RaffaellaBernardi W17-6939 smith-etal-2017-see @@ -13681,7 +13681,7 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me <fixed-case>L</fixed-case>ex<fixed-case>S</fixed-case>ub<fixed-case>NC</fixed-case>: A Dataset of Lexical Substitution for Nominal Compounds RodrigoWilkens LeonardoZilio - Silvio RicardoCordeiro + Silvio RicardoCordeiro FelipePaula CarlosRamisch MarcoIdiart @@ -13693,7 +13693,7 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me Exploring Soft-Clustering for <fixed-case>G</fixed-case>erman (Particle) Verbs across Frequency Ranges MoritzWittmann MaximilianKöper - SabineSchulte im Walde + SabineSchulte im Walde W17-6942 wittmann-etal-2017-exploring @@ -13719,9 +13719,9 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me Proceedings of Language, Ontology, Terminology and Knowledge Structures Workshop (LOTKS 2017) FrancescaFrontini LarisaGrčić Simeunović - ŠpelaVintar - Anas FahadKhan - ArtemisParvisi + ŠpelaVintar + Anas FahadKhan + ArtemisParvisi Association for Computational Linguistics
Montpellier, France
September @@ -13735,7 +13735,7 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me Exploratory Analysis for Ontology Learning from Social Events on Social Media Streaming in <fixed-case>S</fixed-case>panish EnriqueValeriano - ArturoOncevay-Marcos + ArturoOncevay-Marcos W17-7001 valeriano-oncevay-marcos-2017-exploratory @@ -13765,7 +13765,7 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me Fine-grained domain classification of text using <fixed-case>TERMIUM</fixed-case> Plus GabrielBernier-Colborne - CarolineBarrière + CarolineBarrière Pierre AndréMénard W17-7005 bernier-colborne-etal-2017-fine @@ -13773,14 +13773,14 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me <fixed-case>TBX</fixed-case> in <fixed-case>ODD</fixed-case>: Schema-agnostic specification and documentation for <fixed-case>T</fixed-case>erm<fixed-case>B</fixed-case>ase e<fixed-case>X</fixed-case>change StefanPernes - LaurentRomary + LaurentRomary W17-7006 pernes-romary-2017-tbx Enrichment of <fixed-case>F</fixed-case>rench Biomedical Ontologies with <fixed-case>UMLS</fixed-case> Concepts and Semantic Types for Biomedical Named Entity Recognition Though Ontological Semantic Annotation AndonTchechmedjiev - ClémentJonquet + ClémentJonquet W17-7007 tchechmedjiev-jonquet-2017-enrichment @@ -13803,7 +13803,7 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me Developing <fixed-case>L</fixed-case>ex<fixed-case>O</fixed-case>: a Collaborative Editor of Multilingual Lexica and Termino-Ontological Resources in the Humanities AndreaBellandi - EmilianoGiovannetti + EmilianoGiovannetti SilviaPiccini AnjaWeingart W17-7010 @@ -13822,7 +13822,7 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me Proceedings of the IWCS workshop on Foundations of Situated and Multimodal Communication - NicholasAsher + NicholasAsher JulieHunter AlexLascarides ws @@ -13836,7 +13836,7 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me Exploring Multi-Modal <fixed-case>T</fixed-case>ext+<fixed-case>I</fixed-case>mage Models to Distinguish between Abstract and Concrete Nouns Sai AbishekBhaskar MaximilianKöper - SabineSchulte Im Walde + SabineSchulte Im Walde DiegoFrassinelli W17-7101 bhaskar-etal-2017-exploring @@ -13849,7 +13849,7 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me Creating Common Ground through Multimodal Simulations - JamesPustejovsky + JamesPustejovsky NikhilKrishnaswamy BruceDraper PradyumnaNarayana @@ -13909,7 +13909,7 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me Correcting Contradictions Aikaterini-LidaKalouli - Valeriade Paiva + Valeriade Paiva LivyReal W17-7205 kalouli-etal-2017-correcting @@ -14019,8 +14019,8 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me A semantically-based approach to the annotation of narrative style - RodolfoDelmonte - GiuliaMarchesi + RodolfoDelmonte + GiuliaMarchesi W17-7402 delmonte-marchesi-2017-semantically @@ -14032,9 +14032,9 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me Revisiting the <fixed-case>ISO</fixed-case> standard for dialogue act annotation - HarryBunt + HarryBunt VolhaPetukhova - Alex ChengyuFang + Alex ChengyuFang W17-7404 bunt-etal-2017-revisiting @@ -14047,7 +14047,7 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me KetongSu Benjamin R.Cowan KillianLevacher - Arturo CalvoDevesa + Arturo CalvoDevesa LodanaCerrato NickCampbell VincentWade @@ -14079,14 +14079,14 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me Towards interoperable annotation of quantification - HarryBunt + HarryBunt W17-7409 bunt-2017-towards <fixed-case>PACTE</fixed-case>: A colloaborative platform for textual annotation Pierre AndréMénard - CarolineBarrière + CarolineBarrière W17-7410 menard-barriere-2017-pacte @@ -14107,8 +14107,8 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me Temporal@<fixed-case>ODIL</fixed-case> project: Adapting <fixed-case>ISO</fixed-case>-<fixed-case>T</fixed-case>ime<fixed-case>ML</fixed-case> to syntactic treebanks for the temporal annotation of spoken speech Jean-YvesAntoine - JakubWasczuk - AnaïsLefeuvre-Haftermeyer + JakubWasczuk + AnaïsLefeuvre-Haftermeyer LotfiAbouda EmmanuelSchang AgataSavary @@ -14125,7 +14125,7 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me Enriching the Notion of Path in <fixed-case>ISO</fixed-case>-Space - JamesPustejovsky + JamesPustejovsky KiyongLee W17-7415 pustejovsky-lee-2017-enriching @@ -14134,7 +14134,7 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me Proceedings of the 14th International Conference on Natural Language Processing (ICON-2017) - SivajiBandyopadhyay + SivajiBandyopadhyay NLP Association of India
Kolkata, India
December @@ -14163,7 +14163,7 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me Three-phase training to address data sparsity in Neural Machine Translation RuchitAgrawal MihirShekhar - DiptiSharma + DiptiSharma 13–22 W17-7503 agrawal-etal-2017-three @@ -14173,7 +14173,7 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me SauravJha AanchalChaurasia AkhileshSudhakar - Anil KumarSingh + Anil KumarSingh 23–32 W17-7504 jha-etal-2017-reference @@ -14182,7 +14182,7 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me A vis-à-vis evaluation of <fixed-case>MT</fixed-case> paradigms for linguistically distant languages RuchitAgrawal JahfarAli - Dipti MisraSharma + Dipti MisraSharma 33–42 W17-7505 agrawal-etal-2017-vis @@ -14200,7 +14200,7 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me <fixed-case>POS</fixed-case> Tagging For Resource Poor Languages Through Feature Projection PruthwikMishra VandanMujadia - Dipti MisraSharma + Dipti MisraSharma 50–55 W17-7507 mishra-etal-2017-pos @@ -14208,7 +14208,7 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me An Exploration of Word Embedding Initialization in Deep-Learning Tasks TomKocmi - OndřejBojar + OndřejBojar 56–64 W17-7508 kocmi-bojar-2017-exploration @@ -14287,7 +14287,7 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me TanikSaikh TirthankarGhosal AsifEkbal - PushpakBhattacharyya + PushpakBhattacharyya 131–140 W17-7517 saikh-etal-2017-document @@ -14296,7 +14296,7 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me Is your Statement Purposeless? Predicting Computer Science Graduation Admission Acceptance based on Statement Of Purpose DipteshKanojia NikhilWani - PushpakBhattacharyya + PushpakBhattacharyya 141–145 W17-7518 kanojia-etal-2017-statement @@ -14304,14 +14304,14 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me Natural Language Programing with Automatic Code Generation towards Solving Addition-Subtraction Word Problems SouravMandal - Sudip KumarNaskar + Sudip KumarNaskar 146–154 W17-7519 mandal-naskar-2017-natural Unsupervised Separation of Transliterable and Native Words for <fixed-case>M</fixed-case>alayalam - DeepakP + DeepakP 155–164 W17-7520 p-2017-unsupervised @@ -14337,14 +14337,14 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me KuntalDey RitvikShrivastava SarojKaushik - L VenkataSubramaniam + L VenkataSubramaniam 178–187 W17-7523 dey-etal-2017-semtagger Reasoning with Sets to Solve Simple Word Problems Automatically - Sowmya SSundaram + Sowmya SSundaram DeepakKhemani 188–196 W17-7524 @@ -14361,7 +14361,7 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me Beyond <fixed-case>W</fixed-case>ord2<fixed-case>V</fixed-case>ec: Embedding Words and Phrases in Same Vector Space Vijay PrakashDwivedi - ManishShrivastava + ManishShrivastava 205–211 W17-7526 dwivedi-shrivastava-2017-beyond @@ -14394,7 +14394,7 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me <fixed-case>M</fixed-case>alayalam <fixed-case>V</fixed-case>erb<fixed-case>F</fixed-case>rames - Jisha PJayan + Jisha PJayan Asha SNair GovindaruV 236–244 @@ -14407,8 +14407,8 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me SandhyaSingh DharaGorasia MeenakshiSomasundaram - MalharKulkarni - PushpakBhattacharyya + MalharKulkarni + PushpakBhattacharyya 245–254 W17-7531 redkar-etal-2017-hindi-shabdamitra @@ -14427,7 +14427,7 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me End to End Dialog System for <fixed-case>T</fixed-case>elugu PrathyushaDanda PrathyushaJwalapuram - ManishShrivastava + ManishShrivastava 265–272 W17-7533 danda-etal-2017-end @@ -14461,7 +14461,7 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me Unsupervised Morpheme Segmentation Through Numerical Weighting and Thresholding JoyMahapatra - Sudip KumarNaskar + Sudip KumarNaskar 298–304 W17-7537 mahapatra-naskar-2017-unsupervised @@ -14470,7 +14470,7 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me Experiments with Domain Dependent Dialogue Act Classification using Open-Domain Dialogue Corpora SwapnilHingmire ApoorvShrivastava - GirishPalshikar + GirishPalshikar SaurabhSrivastava 305–311 W17-7538 @@ -14479,7 +14479,7 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me Normalization of Social Media Text using Deep Neural Networks Ajay ShankarTiwari - Sudip KumarNaskar + Sudip KumarNaskar 312–321 W17-7539 tiwari-naskar-2017-normalization @@ -14541,24 +14541,24 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me Linguistic approach based Transfer Learning for Sentiment Classification in <fixed-case>H</fixed-case>indi VartikaRai SaksheeVijay - DiptiMisra + DiptiMisra 373–382 W17-7546 rai-etal-2017-linguistic Scalable Bio-Molecular Event Extraction System towards Knowledge Acquisition - Pattabhi RKRao + Pattabhi RKRao SindhujaGopalan - Sobha LalithaDevi + Sobha LalithaDevi 383–391 W17-7547 rao-etal-2017-scalable Co-reference Resolution in <fixed-case>T</fixed-case>amil Text - Vijay SundarRam - Sobha LalithaDevi + Vijay SundarRam + Sobha LalithaDevi 392–401 W17-7548 ram-devi-2017-co @@ -14566,8 +14566,8 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me Cross Linguistic Variations in Discourse Relations among <fixed-case>I</fixed-case>ndian Languages SindhujaGopalan - LakshmiS - Sobha LalithaDevi + LakshmiS + Sobha LalithaDevi 402–407 W17-7549 gopalan-etal-2017-cross @@ -14625,7 +14625,7 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me Neural Networks for Semantic Textual Similarity DerekPrijatelj - JugalKalita + JugalKalita JonathanVentura 456–465 W17-7556 @@ -14635,7 +14635,7 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me Open Set Text Classification Using <fixed-case>CNN</fixed-case>s SridhamaPrakhya VinodiniVenkataram - JugalKalita + JugalKalita 466–475 W17-7557 prakhya-etal-2017-open @@ -14652,7 +14652,7 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me Neural Morphological Disambiguation Using Surface and Contextual Morphological Awareness AkhileshSudhakar - Anil KumarSingh + Anil KumarSingh 485–494 W17-7559 sudhakar-singh-2017-neural @@ -14660,7 +14660,7 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me Word Sense Disambiguation for <fixed-case>M</fixed-case>alayalam in a Conditional Random Field Framework Junaida MK - Jisha PJayan + Jisha PJayan ElizabethSherly 495–502 W17-7560 @@ -14670,7 +14670,7 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me Semisupervied Data Driven Word Sense Disambiguation for Resource-poor Languages PratibhaRani VikramPudi - Dipti M.Sharma + Dipti M.Sharma 503–512 W17-7561 rani-etal-2017-semisupervied @@ -14685,7 +14685,7 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me Semantic Enrichment Across Language: A Case Study of <fixed-case>C</fixed-case>zech Bibliographic Databases - PavelSmrz + PavelSmrz LubomirOtrusina 523–532 W17-7563 @@ -14696,7 +14696,7 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me Proceedings of the 16th International Workshop on Treebanks and Linguistic Theories W17-76 - JanHajič + JanHajič
Prague, Czech Republic
2017 tlt @@ -14707,14 +14707,14 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me Annotating and parsing to semantic frames: feedback from the <fixed-case>F</fixed-case>rench <fixed-case>F</fixed-case>rame<fixed-case>N</fixed-case>et project - MarieCandito + MarieCandito v W17-7601 candito-2017-annotating Downstream use of syntactic analysis: does representation matter? - LiljaØvrelid + LiljaØvrelid vi W17-7602 ovrelid-2017-downstream @@ -14724,16 +14724,16 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me Daniëlde Kok PatriciaFischer CorinaDima - ErhardHinrichs + ErhardHinrichs 1–9 W17-7603 de-kok-etal-2017-distributional <fixed-case>UD</fixed-case> Annotatrix: An annotation tool for <fixed-case>U</fixed-case>niversal <fixed-case>D</fixed-case>ependencies - Francis M.Tyers + Francis M.Tyers MariyaSheyanova - Jonathan NorthWashington + Jonathan NorthWashington 10–17 W17-7604 tyers-etal-2017-ud @@ -14766,7 +14766,7 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me Extensions to the <fixed-case>G</fixed-case>r<fixed-case>ETEL</fixed-case> Treebank Query Application - JanOdijk + JanOdijk Martijnvan der Klis SheeanSpoel 46–55 @@ -14776,9 +14776,9 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me The Relation of Form and Function in Linguistic Theory and in a Multilayer Treebank EduardBejček - EvaHajičová + EvaHajičová MarieMikulová - JarmilaPanevová + JarmilaPanevová 56–63 W17-7609 bejcek-etal-2017-relation @@ -14786,16 +14786,16 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me Literal readings of multiword expressions: as scarce as hen’s teeth AgataSavary - Silvio RicardoCordeiro + Silvio RicardoCordeiro 64–72 W17-7610 savary-cordeiro-2017-literal Querying Multi-word Expressions Annotation with <fixed-case>CQL</fixed-case> - NataliaKlyueva + NataliaKlyueva AnnaVernerová - BehrangQasemizadeh + BehrangQasemizadeh 73–79 W17-7611 klyueva-etal-2017-querying @@ -14827,7 +14827,7 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me Error Analysis of Cross-lingual Tagging and Parsing RudolfRosa - ZdeněkŽabokrtský + ZdeněkŽabokrtský 106–118 W17-7615 rosa-zabokrtsky-2017-error @@ -14843,7 +14843,7 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me Recent Developments within <fixed-case>B</fixed-case>ul<fixed-case>T</fixed-case>ree<fixed-case>B</fixed-case>ank PetyaOsenova - KirilSimov + KirilSimov 129–137 W17-7617 osenova-simov-2017-recent @@ -14851,7 +14851,7 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me Towards a dependency-annotated treebank for <fixed-case>B</fixed-case>ambara EkaterinaAplonova - Francis M.Tyers + Francis M.Tyers 138–145 W17-7618 aplonova-tyers-2017-towards @@ -14865,7 +14865,7 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me What <fixed-case>I</fixed-case> think when <fixed-case>I</fixed-case> think about treebanks - AndersSøgaard + AndersSøgaard 161–166 W17-7620 sogaard-2017-think @@ -14873,7 +14873,7 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me Syntactic Semantic Correspondence in Dependency Grammar CătălinaMărănduc - CătălinMititelu + CătălinMititelu VictoriaBobicev 167–180 W17-7621 @@ -14899,7 +14899,7 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me Dangerous Relations in Dependency Treebanks ChiaraAlzetta FeliceDell’Orletta - SimonettaMontemagni + SimonettaMontemagni GiuliaVenturi 201–210 W17-7624 @@ -14960,7 +14960,7 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me Automatic Summarization of Online Debates NattapongSanchan AhmetAker - KalinaBontcheva + KalinaBontcheva 19–27 10.26615/978-954-452-038-0_003 https://doi.org/10.26615/978-954-452-038-0_003 @@ -14983,7 +14983,7 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me Proceedings of the Workshop Knowledge Resources for the Socio-Economic Sciences and Humanities associated with RANLP 2017 - KalliopiZervanou + KalliopiZervanou PetyaOsenova EvelineWandl-Vogt DanCristea @@ -15011,7 +15011,7 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me A Multiform Balanced Dependency Treebank for <fixed-case>R</fixed-case>omanian MihaelaColhon CătălinaMărănduc - CătălinMititelu + CătălinMititelu 9–18 10.26615/978-954-452-040-3_002 https://doi.org/10.26615/978-954-452-040-3_002 @@ -15024,7 +15024,7 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me PiekVossen MarcoRospocher RinkeHoekstra - Willem Robertvan Hage + Willem Robertvan Hage 19–25 10.26615/978-954-452-040-3_003 https://doi.org/10.26615/978-954-452-040-3_003 @@ -15062,9 +15062,9 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me Proceedings of the Workshop Human-Informed Translation and Interpreting Technology IrinaTemnikova - ConstantinOrasan - Gloria CorpasPastor - StephanVogel + ConstantinOrasan + Gloria CorpasPastor + StephanVogel Association for Computational Linguistics, Shoumen, Bulgaria
Varna, Bulgaria
September @@ -15091,7 +15091,7 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me Experiments in Non-Coherent Post-editing CristinaToledo Báez - MoritzSchaeffer + MoritzSchaeffer MichaelCarl 11–20 10.26615/978-954-452-042-7_002 @@ -15134,7 +15134,7 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me Translation Memory Systems Have a Long Way to Go AndreaSilvestre Baquero - RuslanMitkov + RuslanMitkov 44–51 10.26615/978-954-452-042-7_006 https://doi.org/10.26615/978-954-452-042-7_006 @@ -15166,7 +15166,7 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me Proceedings of the Biomedical NLP Workshop associated with RANLP 2017 SvetlaBoytcheva - Kevin BretonnelCohen + Kevin BretonnelCohen GuerganaSavova GaliaAngelova INCOMA Ltd. @@ -15200,7 +15200,7 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me Discourse-Wide Extraction of Assay Frames from the Biological Literature - DayneFreitag + DayneFreitag PaulKalmar EricYeh 15–23 @@ -15221,7 +15221,7 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me Understanding of unknown medical words NataliaGrabar - ThierryHamon + ThierryHamon 32–41 10.26615/978-954-452-044-1_005 https://doi.org/10.26615/978-954-452-044-1_005 @@ -15234,7 +15234,7 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me SteffenRemus AlexanderPanchenko AndreasHolzinger - ChrisBiemann + ChrisBiemann 42–48 10.26615/978-954-452-044-1_006 https://doi.org/10.26615/978-954-452-044-1_006 @@ -15275,7 +15275,7 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me <fixed-case>POMELO</fixed-case>: <fixed-case>M</fixed-case>edline corpus with manually annotated food-drug interactions - ThierryHamon + ThierryHamon VincentTabanou FleurMougin NataliaGrabar @@ -15289,7 +15289,7 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me Annotation of Clinical Narratives in <fixed-case>B</fixed-case>ulgarian language IvajloRadev - KirilSimov + KirilSimov GaliaAngelova SvetlaBoytcheva 81–87 @@ -15330,7 +15330,7 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me Tools for Building a Corpus to Study the Historical and Geographical Variation of the <fixed-case>R</fixed-case>omanian Language VictoriaBobicev CătălinaMărănduc - Cenel AugustoPerez + Cenel AugustoPerez 10–19 0.26615/978-954-452-046-5_002 http://doi.org/10.26615/978-954-452-046-5_002 @@ -15354,7 +15354,7 @@ with emotion annotation. We (a) analyse annotation reliability and annotation me On the annotation of vague expressions: a case study on <fixed-case>R</fixed-case>omanian historical texts AncaDinu - Walthervon Hahn + Walthervon Hahn CristinaVertan 24–31 0.26615/978-954-452-046-5_004 diff --git a/data/xml/W18.xml b/data/xml/W18.xml index e8567a8a4d..01507645b9 100644 --- a/data/xml/W18.xml +++ b/data/xml/W18.xml @@ -4,10 +4,10 @@ Proceedings of the 8th Workshop on Cognitive Modeling and Computational Linguistics (CMCL 2018) W18-01 - AsadSayeed - CassandraJacobs + AsadSayeed + CassandraJacobs TalLinzen - Martenvan Schijndel + Martenvan Schijndel 10.18653/v1/W18-01 Association for Computational Linguistics
Salt Lake City, Utah
@@ -42,7 +42,7 @@ Dynamic encoding of structural uncertainty in gradient symbols Pyeong WhanCho MatthewGoldrick - Richard L.Lewis + Richard L.Lewis PaulSmolensky 19–28 W18-0103 @@ -106,11 +106,11 @@ Proceedings of the Fourth International Workshop on Computational Linguistics of Uralic Languages W18-02 - Tommi A.Pirinen + Tommi A.Pirinen MichaelRießler JackRueter TrondTrosterud - Francis M.Tyers + Francis M.Tyers 10.18653/v1/W18-02 Association for Computational Linguistics
Helsinki, Finland
@@ -125,7 +125,7 @@ Dependency Parsing of Code-Switching Data with Cross-Lingual Feature Representations NikoPartanen - KyungtaeLim + KyungtaeLim MichaelRießler ThierryPoibeau 1–17 @@ -199,7 +199,7 @@ Initial Experiments in Data-Driven Morphological Analysis for <fixed-case>F</fixed-case>innish - MiikkaSilfverberg + MiikkaSilfverberg MansHulden 98–105 W18-0209 @@ -225,7 +225,7 @@ Parallel Forms in <fixed-case>E</fixed-case>stonian Finite State Morphology - Heiki-JaanKaalep + Heiki-JaanKaalep 139–153 W18-0212 10.18653/v1/W18-0212 @@ -252,7 +252,7 @@ Proceedings of the Society for Computation in Linguistics (SCiL) 2018 - GajaJarosz + GajaJarosz BrendanO’Connor JoePater scil @@ -298,7 +298,7 @@ Modeling the Decline in <fixed-case>E</fixed-case>nglish Passivization LiwenHou - DavidSmith + DavidSmith 34-43 10.7275/R5ZC812C W18-0304 @@ -314,7 +314,7 @@ A bidirectional mapping between <fixed-case>E</fixed-case>nglish and <fixed-case>CNF</fixed-case>-based reasoners - StevenAbney + StevenAbney 55-63 10.7275/R5PZ571N W18-0306 @@ -332,7 +332,7 @@ Differentiating Phrase Structure Parsing and Memory Retrieval in the Brain ShohiniBhattasali - JohnHale + JohnHale ChristophePallier JonathanBrennan Wen-MingLuh @@ -369,7 +369,7 @@ Conditions on abruptness in a gradient-ascent Maximum Entropy learner - ElliottMoreton + ElliottMoreton 113-124 10.7275/R5XG9PBX W18-0312 @@ -377,7 +377,7 @@ Using Rhetorical Topics for Automatic Summarization - Natalie M.Schrimpf + Natalie M.Schrimpf 125-135 10.7275/R5SQ8XM6 W18-0313 @@ -385,8 +385,8 @@ Sound Analogies with Phoneme Embeddings - Miikka P.Silfverberg - LingshuangMao + Miikka P.Silfverberg + LingshuangMao MansHulden 136-144 10.7275/R5NZ85VD @@ -424,7 +424,7 @@ Proceedings of the Thirteenth Workshop on Innovative Use of NLP for Building Educational Applications W18-05 - JoelTetreault + JoelTetreault JillBurstein EkaterinaKochmar ClaudiaLeacock @@ -455,9 +455,9 @@ Using Paraphrasing and Memory-Augmented Models to Combat Data Sparsity in Question Interpretation with a Virtual Patient Dialogue System LifengJin - DavidKing + DavidKing AmadHussein - MichaelWhite + MichaelWhite DouglasDanforth 13–23 When interpreting questions in a virtual patient dialogue system one must inevitably tackle the challenge of a long tail of relatively infrequently asked questions. To make progress on this challenge, we investigate the use of paraphrasing for data augmentation and neural memory-based classification, finding that the two methods work best in combination. In particular, we find that the neural memory-based approach not only outperforms a straight CNN classifier on low frequency questions, but also takes better advantage of the augmented data created by paraphrasing, together yielding a nearly 10% absolute improvement in accuracy on the least frequently asked questions. @@ -480,7 +480,7 @@ Automatic Input Enrichment for Selecting Reading Material: An Online Study with <fixed-case>E</fixed-case>nglish Teachers MariaChinkina AnkitaOswal - DetmarMeurers + DetmarMeurers 35–44 Input material at the appropriate level is crucial for language acquisition. Automating the search for such material can systematically and efficiently support teachers in their pedagogical practice. This is the goal of the computational linguistic task of automatic input enrichment (Chinkina & Meurers, 2016): It analyzes and re-ranks a collection of texts in order to prioritize those containing target linguistic forms. In the online study described in the paper, we collected 240 responses from English teachers in order to investigate whether they preferred automatic input enrichment over web search when selecting reading material for class. Participants demonstrated a general preference for the material provided by an automatic input enrichment system. It was also rated significantly higher than the texts retrieved by a standard web search engine with regard to the representation of linguistic forms and equivalent with regard to the relevance of the content to the topic. We discuss the implications of the results for language teaching and consider the potential strands of future research. W18-0504 @@ -490,7 +490,7 @@ Estimating Linguistic Complexity for Science Texts FarahNadeem - MariOstendorf + MariOstendorf 45–55 Evaluation of text difficulty is important both for downstream tasks like text simplification, and for supporting educators in classrooms. Existing work on automated text complexity analysis uses linear models with engineered knowledge-driven features as inputs. While this offers interpretability, these models have lower accuracy for shorter texts. Traditional readability metrics have the additional drawback of not generalizing to informational texts such as science. We propose a neural approach, training on science and other informational texts, to mitigate both problems. Our results show that neural methods outperform knowledge-based linear models for short texts, and have the capacity to generalize to genres not present in the training data. W18-0505 @@ -522,11 +522,11 @@ A Report on the Complex Word Identification Shared Task 2018 Seid MuhieYimam - ChrisBiemann - ShervinMalmasi - GustavoPaetzold + ChrisBiemann + ShervinMalmasi + GustavoPaetzold LuciaSpecia - SanjaŠtajner + SanjaŠtajner AnaïsTack MarcosZampieri 66–78 @@ -548,9 +548,9 @@ <fixed-case>COAST</fixed-case> - Customizable Online Syllable Enhancement in Texts. A flexible framework for automatically enhancing reading materials HeikoHolz - ZarahWeiss + ZarahWeiss OliverBrehm - DetmarMeurers + DetmarMeurers 89–100 This paper presents COAST, a web-based application to easily and automatically enhance syllable structure, word stress, and spacing in texts, that was designed in close collaboration with learning therapists to ensure its practical relevance. Such syllable-enhanced texts are commonly used in learning therapy or private tuition to promote the recognition of syllables in order to improve reading and writing skills. In a state of the art solutions for automatic syllable enhancement, we put special emphasis on syllable stress and support specific marking of the primary syllable stress in words. Core features of our tool are i) a highly customizable text enhancement and template functionality, and ii) a novel crowd-sourcing mechanism that we employ to address the issue of data sparsity in language resources. We successfully tested COAST with real-life practitioners in a series of user tests validating the concept of our framework. W18-0509 @@ -570,7 +570,7 @@ Annotating Student Talk in Text-based Classroom Discussions LucaLugini - DianeLitman + DianeLitman AmandaGodley ChristopherOlshefski 110–116 @@ -591,12 +591,12 @@ Generating Feedback for <fixed-case>E</fixed-case>nglish Foreign Language Exercises - BjörnRudzewitz + BjörnRudzewitz RamonZiai KordulaDe Kuthy VerenaMöller FlorianNuxoll - DetmarMeurers + DetmarMeurers 127–136 While immediate feedback on learner language is often discussed in the Second Language Acquisition literature (e.g., Mackey 2006), few systems used in real-life educational settings provide helpful, metalinguistic feedback to learners. In this paper, we present a novel approach leveraging task information to generate the expected range of well-formed and ill-formed variability in learner answers along with the required diagnosis and feedback. We combine this offline generation approach with an online component that matches the actual student answers against the pre-computed hypotheses. The results obtained for a set of 33 thousand answers of 7th grade German high school students learning English show that the approach successfully covers frequent answer patterns. At the same time, paraphrases and content errors require a more flexible alignment approach, for which we are planning to complement the method with the CoMiC approach successfully used for the analysis of reading comprehension answers (Meurers et al., 2011). W18-0513 @@ -608,7 +608,7 @@ AnaïsTack ThomasFrançois PietDesmet - CédrickFairon + CédrickFairon 137–146 In this paper, we introduce NT2Lex, a novel lexical resource for Dutch as a foreign language (NT2) which includes frequency distributions of 17,743 words and expressions attested in expert-written textbook texts and readers graded along the scale of the Common European Framework of Reference (CEFR). In essence, the lexicon informs us about what kind of vocabulary should be understood when reading Dutch as a non-native reader at a particular proficiency level. The main novelty of the resource with respect to the previously developed CEFR-graded lexicons concerns the introduction of corpus-based evidence for L2 word sense complexity through the linkage to Open Dutch WordNet (Postma et al., 2016). The resource thus contains, on top of the lemmatised and part-of-speech tagged lexical entries, a total of 11,999 unique word senses and 8,934 distinct synsets. W18-0514 @@ -640,7 +640,7 @@ <fixed-case>L</fixed-case>a<fixed-case>STUS</fixed-case>/<fixed-case>TALN</fixed-case> at Complex Word Identification (<fixed-case>CWI</fixed-case>) 2018 Shared Task - AhmedAbuRa’ed + AhmedAbuRa’ed HoracioSaggion 159–165 This paper presents the participation of the LaSTUS/TALN team in the Complex Word Identification (CWI) Shared Task 2018 in the English monolingual track . The purpose of the task was to determine if a word in a given sentence can be judged as complex or not by a certain target audience. For the English track, task organizers provided a training and a development datasets of 27,299 and 3,328 words respectively together with the sentence in which each word occurs. The words were judged as complex or not by 20 human evaluators; ten of whom are natives. We submitted two systems: one system modeled each word to evaluate as a numeric vector populated with a set of lexical, semantic and contextual features while the other system relies on a word embedding representation and a distance metric. We trained two separate classifiers to automatically decide if each word is complex or not. We submitted six runs, two for each of the three subsets of the English monolingual CWI track. @@ -660,7 +660,7 @@ <fixed-case>U</fixed-case>nibuc<fixed-case>K</fixed-case>ernel: A kernel-based learning method for complex word identification - AndreiButnaru + AndreiButnaru Radu TudorIonescu 175–183 In this paper, we present a kernel-based learning approach for the 2018 Complex Word Identification (CWI) Shared Task. Our approach is based on combining multiple low-level features, such as character n-grams, with high-level semantic features that are either automatically learned using word embeddings or extracted from a lexical knowledge base, namely WordNet. After feature extraction, we employ a kernel method for the learning phase. The feature matrix is first transformed into a normalized kernel matrix. For the binary classification task (simple versus complex), we employ Support Vector Machines. For the regression task, in which we have to predict the complexity level of a word (a word is more complex if it is labeled as complex by more annotators), we employ v-Support Vector Regression. We applied our approach only on the three English data sets containing documents from Wikipedia, WikiNews and News domains. Our best result during the competition was the third place on the English Wikipedia data set. However, in this paper, we also report better post-competition results. @@ -693,7 +693,7 @@ NikhilWani SandeepMathias Jayashree AanandGajjam - PushpakBhattacharyya + PushpakBhattacharyya 200–205 In this paper, we present an effective system using voting ensemble classifiers to detect contextually complex words for non-native English speakers. To make the final decision, we channel a set of eight calibrated classifiers based on lexical, size and vocabulary features and train our model with annotated datasets collected from a mixture of native and non-native speakers. Thereafter, we test our system on three datasets namely News, WikiNews, and Wikipedia and report competitive results with an F1-Score ranging between 0.777 to 0.855 for each of the datasets. Our system outperforms multiple other models and falls within 0.042 to 0.026 percent of the best-performing model’s score in the shared task. W18-0522 @@ -703,8 +703,8 @@ Grotoco@<fixed-case>SLAM</fixed-case>: Second Language Acquisition Modeling with Simple Features, Learners and Task-wise Models SigridKlerke - HéctorMartínez Alonso - BarbaraPlank + HéctorMartínez Alonso + BarbaraPlank 206–211 We present our submission to the 2018 Duolingo Shared Task on Second Language Acquisition Modeling (SLAM). We focus on evaluating a range of features for the task, including user-derived measures, while examining how far we can get with a simple linear classifier. Our analysis reveals that errors differ per exercise format, which motivates our final and best-performing system: a task-wise (per exercise-format) model. W18-0523 @@ -760,7 +760,7 @@ Annotation and Classification of Sentence-level Revision Improvement TazinAfrin - DianeLitman + DianeLitman 240–246 Studies of writing revisions rarely focus on revision quality. To address this issue, we introduce a corpus of between-draft revisions of student argumentative essays, annotated as to whether each revision improves essay quality. We demonstrate a potential usage of our annotations by developing a machine learning model to predict revision improvement. With the goal of expanding training data, we also extract revisions from a dataset edited by expert proofreaders. Our results indicate that blending expert and non-expert revisions increases model performance, with expert data particularly important for predicting low-quality revisions. W18-0528 @@ -770,7 +770,7 @@ Language Model Based Grammatical Error Correction without Annotated Training Data ChristopherBryant - TedBriscoe + TedBriscoe 247–253 Since the end of the CoNLL-2014 shared task on grammatical error correction (GEC), research into language model (LM) based approaches to GEC has largely stagnated. In this paper, we re-examine LMs in GEC and show that it is entirely possible to build a simple system that not only requires minimal annotated data (∼1000 sentences), but is also fairly competitive with several state-of-the-art systems. This approach should be of particular interest for languages where very little annotated training data exists, although we also hope to use it as a baseline to motivate future research. W18-0529 @@ -791,7 +791,7 @@ Automated Content Analysis: A Case Study of Computer Science Student Summaries YanjunGao Patricia M.Davies - Rebecca J.Passonneau + Rebecca J.Passonneau 264–272 Technology is transforming Higher Education learning and teaching. This paper reports on a project to examine how and why automated content analysis could be used to assess precis writing by university students. We examine the case of one hundred and twenty-two summaries written by computer science freshmen. The texts, which had been hand scored using a teacher-designed rubric, were autoscored using the Natural Language Processing software, PyrEval. Pearson’s correlation coefficient and Spearman rank correlation were used to analyze the relationship between the teacher score and the PyrEval score for each summary. Three content models automatically constructed by PyrEval from different sets of human reference summaries led to consistent correlations, showing that the approach is reliable. Also observed was that, in cases where the focus of student assessment centers on formative feedback, categorizing the PyrEval scores by examining the average and standard deviations could lead to novel interpretations of their relationships. It is suggested that this project has implications for the ways in which automated content analysis could be used to help university students improve their summarization skills. W18-0531 @@ -801,7 +801,7 @@ Toward Data-Driven Tutorial Question Answering with Deep Learning Conversational Models MayankKulkarni - KristyBoyer + KristyBoyer 273–283 There has been an increase in popularity of data-driven question answering systems given their recent success. This pa-per explores the possibility of building a tutorial question answering system for Java programming from data sampled from a community-based question answering forum. This paper reports on the creation of a dataset that could support building such a tutorial question answering system and discusses the methodology to create the 106,386 question strong dataset. We investigate how retrieval-based and generative models perform on the given dataset. The work also investigates the usefulness of using hybrid approaches such as combining retrieval-based and generative models. The results indicate that building data-driven tutorial systems using community-based question answering forums holds significant promise. W18-0532 @@ -824,9 +824,9 @@ A <fixed-case>P</fixed-case>ortuguese Native Language Identification Dataset - Iriadel Río Gayo + Iriadel Río Gayo MarcosZampieri - ShervinMalmasi + ShervinMalmasi 291–296 In this paper we present NLI-PT, the first Portuguese dataset compiled for Native Language Identification (NLI), the task of identifying an author’s first language based on their second language writing. The dataset includes 1,868 student essays written by learners of European Portuguese, native speakers of the following L1s: Chinese, English, Spanish, German, Russian, French, Japanese, Italian, Dutch, Tetum, Arabic, Polish, Korean, Romanian, and Swedish. NLI-PT includes the original student text and four different types of annotation: POS, fine-grained POS, constituency parses, and dependency parses. NLI-PT can be used not only in NLI but also in research on several topics in the field of Second Language Acquisition and educational NLP. We discuss possible applications of this dataset and present the results obtained for the first lexical baseline system for Portuguese NLI. W18-0534 @@ -848,8 +848,8 @@ MengZhang XieChen RonanCummins - Øistein E.Andersen - TedBriscoe + Øistein E.Andersen + TedBriscoe 305–314 Some language exams have multiple writing tasks. When a learner writes multiple texts in a language exam, it is not surprising that the quality of these texts tends to be similar, and the existing automated text scoring (ATS) systems do not explicitly model this similarity. In this paper, we suggest that it could be useful to include the other texts written by this learner in the same exam as extra references in an ATS system. We propose various approaches of fusing information from multiple tasks and pass this authorship knowledge into our ATS model on six different datasets. We show that this can positively affect the model performance at a global level. W18-0536 @@ -871,7 +871,7 @@ Segun TaofeekAroyehun JasonAngel Daniel AlejandroPérez Alvarez - AlexanderGelbukh + AlexanderGelbukh 322–327 We describe the systems of NLP-CIC team that participated in the Complex Word Identification (CWI) 2018 shared task. The shared task aimed to benchmark approaches for identifying complex words in English and other languages from the perspective of non-native speakers. Our goal is to compare two approaches: feature engineering and a deep neural network. Both approaches achieved comparable performance on the English test set. We demonstrated the flexibility of the deep-learning approach by using the same deep neural network setup in the Spanish track. Our systems achieved competitive results: all our systems were within 0.01 of the system with the best macro-F1 score on the test sets except on Wikipedia test set, on which our best system is 0.04 below the best macro-F1 score. W18-0538 @@ -900,7 +900,7 @@ Complex Word Identification Using Character n-grams - MajaPopović + MajaPopović 341–348 This paper investigates the use of character n-gram frequencies for identifying complex words in English, German and Spanish texts. The approach is based on the assumption that complex words are likely to contain different character sequences than simple words. The multinomial Naive Bayes classifier was used with n-grams of different lengths as features, and the best results were obtained for the combination of 2-grams and 4-grams. This variant was submitted to the Complex Word Identification Shared Task 2018 for all texts and achieved F-scores between 70% and 83%. The system was ranked in the middle range for all English texts, as third of fourteen submissions for German, and as tenth of seventeen submissions for Spanish. The method is not very convenient for the cross-language task, achieving only 59% on the French text. W18-0541 @@ -980,7 +980,7 @@ Co-Attention Based Neural Network for Source-Dependent Essay Scoring HaoranZhang - DianeLitman + DianeLitman 399–409 This paper presents an investigation of using a co-attention based neural network for source-dependent essay scoring. We use a co-attention mechanism to help the model learn the importance of each part of the essay more accurately. Also, this paper shows that the co-attention based neural network model provides reliable score prediction of source-dependent responses. We evaluate our model on two source-dependent response corpora. Results show that our model outperforms the baseline on both corpora. We also show that the attention of the model is similar to the expert opinions with examples. W18-0549 @@ -1007,7 +1007,7 @@ W18-06 KateLoveys KateNiederhoffer - EmilyPrud’hommeaux + EmilyPrud’hommeaux RebeccaResnik PhilipResnik 10.18653/v1/W18-06 @@ -1049,7 +1049,7 @@ SurajNair AyahZirikly MeirFriedenberg - HalDaumé III + HalDaumé III PhilipResnik 25–36 We report on the creation of a dataset for studying assessment of suicide risk via online postings in Reddit. Evaluation of risk-level annotations by experts yields what is, to our knowledge, the first demonstration of reliability in risk assessment by clinicians based on social media postings. We also introduce and demonstrate the value of a new, detailed rubric for assessing suicide risk, compare crowdsourced with expert performance, and present baseline predictive modeling experiments using the new dataset, which will be made available to researchers through the American Association of Suicidology. @@ -1064,7 +1064,7 @@ KateNiederhoffer KateLoveys PhilipResnik - H. AndrewSchwartz + H. AndrewSchwartz 37–46 We describe the shared task for the CLPsych 2018 workshop, which focused on predicting current and future psychological health from an essay authored in childhood. Language-based predictions of a person’s current health have the potential to supplement traditional psychological assessment such as questionnaires, improving intake risk measurement and monitoring. Predictions of future psychological health can aid with both early detection and the development of preventative care. Research into the mental health trajectory of people, beginning from their childhood, has thus far been an area of little work within the NLP community. This shared task represents one of the first attempts to evaluate the use of early language to predict future health; this has the potential to support a wide variety of clinical health care tasks, from early assessment of lifetime risk for mental health problems, to optimal timing for targeted interventions aimed at both prevention and treatment. W18-0604 @@ -1074,7 +1074,7 @@ An Approach to the <fixed-case>CLP</fixed-case>sych 2018 Shared Task Using Top-Down Text Representation and Simple Bottom-Up Model Selection MicahIserman - MollyIreland + MollyIreland AndrewLittlefield TylerDavis SageMaliepaard @@ -1088,9 +1088,9 @@ Using contextual information for automatic triage of posts in a peer-support forum EdgarAltszyler Ariel J.Berenstein - DavidMilne + DavidMilne Rafael A.Calvo - DiegoFernandez Slezak + DiegoFernandez Slezak 57–68 Mental health forums are online spaces where people can share their experiences anonymously and get peer support. These forums, require the supervision of moderators to provide support in delicate cases, such as posts expressing suicide ideation. The large increase in the number of forum users makes the task of the moderators unmanageable without the help of automatic triage systems. In the present paper, we present a Machine Learning approach for the triage of posts. Most approaches in the literature focus on the content of the posts, but only a few authors take advantage of features extracted from the context in which they appear. Our approach consists of the development and implementation of a large variety of new features from both, the content and the context of posts, such as previous messages, interaction with other users and author’s history. Our method has competed in the CLPsych 2017 Shared Task, obtaining the first place for several of the subtasks. Moreover, we also found that models that take advantage of post context improve significantly its performance in the detection of flagged posts (posts that require moderators attention), as well as those that focus on post content outperforms in the detection of most urgent events. W18-0606 @@ -1102,7 +1102,7 @@ JuliaIve GeorgeGkotsis RinaDutta - RobertStewart + RobertStewart SumithraVelupillai 69–77 Mental health problems represent a major public health challenge. Automated analysis of text related to mental health is aimed to help medical decision-making, public health policies and to improve health care. Such analysis may involve text classification. Traditionally, automated classification has been performed mainly using machine learning methods involving costly feature engineering. Recently, the performance of those methods has been dramatically improved by neural methods. However, mainly Convolutional neural networks (CNNs) have been explored. In this paper, we apply a hierarchical Recurrent neural network (RNN) architecture with an attention mechanism on social media data related to mental health. We show that this architecture improves overall classification results as compared to previously reported results on the same data. Benefitting from the attention mechanism, it can also efficiently select text elements crucial for classification decisions, which can also be used for in-depth analysis. @@ -1114,7 +1114,7 @@ Cross-cultural differences in language markers of depression online KateLoveys JonathanTorrez - AlexFine + AlexFine GlenMoriarty GlenCoppersmith 78–87 @@ -1128,7 +1128,7 @@ AhmedHusseini Orabi PrasadithBuddhitha MahmoudHusseini Orabi - DianaInkpen + DianaInkpen 88–97 Mental illness detection in social media can be considered a complex task, mainly due to the complicated nature of mental disorders. In recent years, this research area has started to evolve with the continuous increase in popularity of social media platforms that became an integral part of people’s life. This close relationship between social media platforms and their users has made these platforms to reflect the users’ personal life with different limitations. In such an environment, researchers are presented with a wealth of information regarding one’s life. In addition to the level of complexity in identifying mental illnesses through social media platforms, adopting supervised machine learning approaches such as deep neural networks have not been widely accepted due to the difficulties in obtaining sufficient amounts of annotated training data. Due to these reasons, we try to identify the most effective deep neural network architecture among a few of selected architectures that were successfully used in natural language processing tasks. The chosen architectures are used to detect users with signs of mental illnesses (depression in our case) given limited unstructured text data extracted from the Twitter social media platform. W18-0609 @@ -1137,9 +1137,9 @@ Current and Future Psychological Health Prediction using Language and Socio-Demographics of Children for the <fixed-case>CLP</fixed-case>ysch 2018 Shared Task - Sharath ChandraGuntuku + Sharath ChandraGuntuku SalvatoreGiorgi - LyleUngar + LyleUngar 98–106 This article is a system description and report on the submission of a team from the University of Pennsylvania in the ’CLPsych 2018’ shared task. The goal of the shared task was to use childhood language as a marker for both current and future psychological health over individual lifetimes. Our system employs multiple textual features derived from the essays written and individuals’ socio-demographic variables at the age of 11. We considered several word clustering approaches, and explore the use of linear regression based on different feature sets. Our approach showed best results for predicting distress at the age of 42 and for predicting current anxiety on Disattenuated Pearson Correlation, and ranked fourth in the future health prediction task. In addition to the subtasks presented, we attempted to provide insight into mental health aspects at different ages. Our findings indicate that misspellings, words with illegible letters and increased use of personal pronouns are correlated with poor mental health at age 11, while descriptions about future physical activity, family and friends are correlated with good mental health. W18-0610 @@ -1199,7 +1199,7 @@ Automatic Detection of Incoherent Speech for Diagnosing Schizophrenia DanIter JongYoon - DanJurafsky + DanJurafsky 136–146 Schizophrenia is a mental disorder which afflicts an estimated 0.7% of adults world wide. It affects many areas of mental function, often evident from incoherent speech. Diagnosing schizophrenia relies on subjective judgments resulting in disagreements even among trained clinicians. Recent studies have proposed the use of natural language processing for diagnosis by drawing on automatically-extracted linguistic features like discourse coherence and lexicon. Here, we present the first benchmark comparison of previously proposed coherence models for detecting symptoms of schizophrenia and evaluate their performance on a new dataset of recorded interviews between subjects and clinicians. We also present two alternative coherence metrics based on modern sentence embedding techniques that outperform the previous methods on our dataset. Lastly, we propose a novel computational model for reference incoherence based on ambiguous pronoun usage and show that it is a highly predictive feature on our data. While the number of subjects is limited in this pilot study, our results suggest new directions for diagnosing common symptoms of schizophrenia. W18-0615 @@ -1219,7 +1219,7 @@ MeredithCola JuhiPandey Edward S.Brodkin - Robert T.Schultz + Robert T.Schultz BirkanTunç 147–157 Autism spectrum disorder (ASD) is a neurodevelopmental condition characterized by impaired social communication and the presence of restricted, repetitive patterns of behaviors and interests. Prior research suggests that restricted patterns of behavior in ASD may be cross-domain phenomena that are evident in a variety of modalities. Computational studies of language in ASD provide support for the existence of an underlying dimension of restriction that emerges during a conversation. Similar evidence exists for restricted patterns of facial movement. Using tools from computational linguistics, computer vision, and information theory, this study tests whether cognitive-motor restriction can be detected across multiple behavioral domains in adults with ASD during a naturalistic conversation. Our methods identify restricted behavioral patterns, as measured by entropy in word use and mouth movement. Results suggest that adults with ASD produce significantly less diverse mouth movements and words than neurotypical adults, with an increased reliance on repeated patterns in both domains. The diversity values of the two domains are not significantly correlated, suggesting that they provide complementary information. @@ -1257,7 +1257,7 @@ Predicting Human Trustfulness from <fixed-case>F</fixed-case>acebook Language MohammadzamanZamani AnnekeBuffone - H. AndrewSchwartz + H. AndrewSchwartz 174–181 Trustfulness — one’s general tendency to have confidence in unknown people or situations — predicts many important real-world outcomes such as mental health and likelihood to cooperate with others such as clinicians. While data-driven measures of interpersonal trust have previously been introduced, here, we develop the first language-based assessment of the personality trait of trustfulness by fitting one’s language to an accepted questionnaire-based trust score. Further, using trustfulness as a type of case study, we explore the role of questionnaire size as well as word count in developing language-based predictive models of users’ psychological traits. We find that leveraging a longer questionnaire can yield greater test set accuracy, while, for training, we find it beneficial to include users who took smaller questionnaires which offers more observations for training. Similarly, after noting a decrease in individual prediction error as word count increased, we found a word count-weighted training scheme was helpful when there were very few users in the first place. W18-0619 @@ -1266,7 +1266,7 @@ Within and Between-Person Differences in Language Used Across Anxiety Support and Neutral <fixed-case>R</fixed-case>eddit Communities - MollyIreland + MollyIreland MicahIserman 182–193 Although many studies have distinguished between the social media language use of people who do and do not have a mental health condition, within-person context-sensitive comparisons (for example, analyzing individuals’ language use when seeking support or discussing neutral topics) are less common. Two dictionary-based analyses of Reddit communities compared (1) anxious individuals’ comments in anxiety support communities (e.g., /r/PanicParty) with the same users’ comments in neutral communities (e.g., /r/todayilearned), and, (2) within popular neutral communities, comments by members of anxiety subreddits with comments by other users. Each comparison yielded theory-consistent effects as well as unexpected results that suggest novel hypotheses to be tested in the future. Results have relevance for improving researchers’ and practitioners’ ability to unobtrusively assess anxiety symptoms in conversations that are not explicitly about mental health. @@ -1292,7 +1292,7 @@ Proceedings of the First Workshop on Computational Models of Reference, Anaphora and Coreference W18-07 - MassimoPoesio + MassimoPoesio VincentNg MaciejOgrodniczuk 10.18653/v1/W18-07 @@ -1308,7 +1308,7 @@ Anaphora Resolution for <fixed-case>T</fixed-case>witter Conversations: An Exploratory Study - BerfinAktaş + BerfinAktaş TatjanaScheffler ManfredStede 1–10 @@ -1322,8 +1322,8 @@ MassimoPoesio YuliaGrishina VaradaKolhatkar - NafiseMoosavi - InaRoesiger + NafiseMoosavi + InaRoesiger AdamRoussel FabianSimonjetz AlexandraUma @@ -1338,7 +1338,7 @@ Rule- and Learning-based Methods for Bridging Resolution in the <fixed-case>ARRAU</fixed-case> Corpus - InaRoesiger + InaRoesiger 23–33 We present two systems for bridging resolution, which we submitted to the CRAC shared task on bridging anaphora resolution in the ARRAU corpus (track 2): a rule-based approach following Hou et al. 2014 and a learning-based approach. The re-implementation of Hou et al. 2014 achieves very poor performance when being applied to ARRAU. We found that the reasons for this lie in the different bridging annotations: whereas the rule-based system suggests many referential bridging pairs, ARRAU contains mostly lexical bridging. We describe the differences between these two types of bridging and adapt the rule-based approach to be able to handle lexical bridging. The modified rule-based approach achieves reasonable performance on all (sub)-tasks and outperforms a simple learning-based approach. W18-0703 @@ -1356,10 +1356,10 @@ Integrating Predictions from Neural-Network Relation Classifiers into Coreference and Bridging Resolution - InaRoesiger + InaRoesiger MaximilianKöper Kim AnhNguyen - SabineSchulte im Walde + SabineSchulte im Walde 44–49 Cases of coreference and bridging resolution often require knowledge about semantic relations between anaphors and antecedents. We suggest state-of-the-art neural-network classifiers trained on relation benchmarks to predict and integrate likelihoods for relations. Two experiments with representations differing in noise and complexity improve our bridging but not our coreference resolver. W18-0705 @@ -1369,7 +1369,7 @@ Towards Bridging Resolution in <fixed-case>G</fixed-case>erman: Data Analysis and Rule-based Experiments JanisPagel - InaRoesiger + InaRoesiger 50–60 Bridging resolution is the task of recognising bridging anaphors and linking them to their antecedents. While there is some work on bridging resolution for English, there is only little work for German. We present two datasets which contain bridging annotations, namely DIRNDL and GRAIN, and compare the performance of a rule-based system with a simple baseline approach on these two corpora. The performance for full bridging resolution ranges between an F1 score of 13.6% for DIRNDL and 11.8% for GRAIN. An analysis using oracle lists suggests that the system could, to a certain extent, benefit from ranking and re-ranking antecedent candidates. Furthermore, we investigate the importance of single features and show that the features used in our work seem promising for future bridging resolution approaches. W18-0706 @@ -1410,7 +1410,7 @@ JixingLi MurielleFabre Wen-MingLuh - JohnHale + JohnHale 87–96 Typological differences between English and Chinese suggest stronger reliance on salience of the antecedent during pronoun resolution in Chinese. We examined this hypothesis by correlating a difficulty measure of pronoun resolution derived by the activation-based ACT-R model with the brain activity of English and Chinese participants listening to a same audiobook during fMRI recording. The ACT-R model predicts higher overall difficulty for English speakers, which is supported at the brain level in left Broca’s area. More generally, it confirms that computational modeling approach is able to dissociate different dimensions that are involved in the complex process of pronoun resolution in the brain. W18-0710 @@ -1523,7 +1523,7 @@ Leveraging Syntactic Constructions for Metaphor Identification KevinStowe - MarthaPalmer + MarthaPalmer 17–26 Identification of metaphoric language in text is critical for generating effective semantic representations for natural language understanding. Computational approaches to metaphor identification have largely relied on heuristic based models or feature-based machine learning, using hand-crafted lexical resources coupled with basic syntactic information. However, recent work has shown the predictive power of syntactic constructions in determining metaphoric source and target domains (Sullivan 2013). Our work intends to explore syntactic constructions and their relation to metaphoric language. We undertake a corpus-based analysis of predicate-argument constructions and their metaphoric properties, and attempt to effectively represent syntactic constructions as features for metaphor processing, both in identifying source and target domains and in distinguishing metaphoric words from non-metaphoric. W18-0903 @@ -1533,7 +1533,7 @@ Literal, Metphorical or Both? Detecting Metaphoricity in Isolated Adjective-Noun Phrases AgnieszkaMykowiecka - MalgorzataMarciniak + MalgorzataMarciniak AleksanderWawer 27–33 The paper addresses the classification of isolated Polish adjective-noun phrases according to their metaphoricity. We tested neural networks to predict if a phrase has a literal or metaphorical sense or can have both senses depending on usage. The input to the neural network consists of word embeddings, but we also tested the impact of information about the domain of the adjective and about the abstractness of the noun. We applied our solution to English data available on the Internet and compared it to results published in papers. We found that the solution based on word embeddings only can achieve results comparable with complex solutions requiring additional information. @@ -1563,7 +1563,7 @@ A Report on the 2018 <fixed-case>VUA</fixed-case> Metaphor Detection Shared Task - Chee Wee (Ben)Leong + Chee Wee (Ben)Leong BeataBeigman Klebanov EkaterinaShutova 56–66 @@ -1596,7 +1596,7 @@ Phrase-Level Metaphor Identification Using Distributed Representations of Word Meaning OmniaZayed - John PhilipMcCrae + John PhilipMcCrae PaulBuitelaar 81–90 Metaphor is an essential element of human cognition which is often used to express ideas and emotions that might be difficult to express using literal language. Processing metaphoric language is a challenging task for a wide range of applications ranging from text simplification to psychotherapy. Despite the variety of approaches that are trying to process metaphor, there is still a need for better models that mimic the human cognition while exploiting fewer resources. In this paper, we present an approach based on distributional semantics to identify metaphors on the phrase-level. We investigated the use of different word embeddings models to identify verb-noun pairs where the verb is used metaphorically. Several experiments are conducted to show the performance of the proposed approach on benchmark datasets. @@ -1640,7 +1640,7 @@ Di-<fixed-case>LSTM</fixed-case> Contrast : A Deep Neural Network for Metaphor Detection KrishnkantSwarnkar - Anil KumarSingh + Anil KumarSingh 115–120 The contrast between the contextual and general meaning of a word serves as an important clue for detecting its metaphoricity. In this paper, we present a deep neural architecture for metaphor detection which exploits this contrast. Additionally, we also use cost-sensitive learning by re-weighting examples, and baseline features like concreteness ratings, POS and WordNet-based features. The best performing system of ours achieves an overall F1 score of 0.570 on All POS category and 0.605 on the Verbs category at the Metaphor Shared Task 2018. W18-0914 @@ -1662,7 +1662,7 @@ Detecting Figurative Word Occurrences Using Recurrent Neural Networks AgnieszkaMykowiecka AleksanderWawer - MalgorzataMarciniak + MalgorzataMarciniak 124–127 The paper addresses detection of figurative usage of words in English text. The chosen method was to use neural nets fed by pretrained word embeddings. The obtained results show that simple solutions, based on words embeddings only, are comparable to complex solutions, using many sources of information which are not available for languages less-studied than English. W18-0916 @@ -1682,7 +1682,7 @@ Using Language Learner Data for Metaphor Detection - EgonStemle + EgonStemle AlexanderOnysko 133–138 This article describes the system that participated in the shared task on metaphor detection on the Vrije University Amsterdam Metaphor Corpus (VUA). The ST was part of the workshop on processing figurative language at the 16th annual conference of the North American Chapter of the Association for Computational Linguistics (NAACL2018). The system combines a small assertion of trending techniques, which implement matured methods from NLP and ML; in particular, the system uses word embeddings from standard corpora and from corpora representing different proficiency levels of language learners in a LSTM BiRNN architecture. The system is available under the APLv2 open-source license. @@ -1714,7 +1714,7 @@ SoumyaWadhwa VarshaEmbar MatthiasGrabmair - EricNyberg + EricNyberg 1–7 In this paper, we investigate the tendency of end-to-end neural Machine Reading Comprehension (MRC) models to match shallow patterns rather than perform inference-oriented reasoning on RC benchmarks. We aim to test the ability of these systems to answer questions which focus on referential inference. We propose ParallelQA, a strategy to formulate such questions using parallel passages. We also demonstrate that existing neural models fail to generalize well to this setting. W18-1001 @@ -1728,7 +1728,7 @@ SeyedarianHosseini MichaelNoukhovitch YoshuaBengio - JackieCheung + JackieCheung 8–16 Commonsense knowledge bases such as ConceptNet represent knowledge in the form of relational triples. Inspired by recent work by Li et al., we analyse if knowledge base completion models can be used to mine commonsense knowledge from raw text. We propose novelty of predicted triples with respect to the training set as an important factor in interpreting results. We critically analyse the difficulty of mining novel commonsense knowledge, and show that a simple baseline method that outperforms the previous state of the art on predicting more novel triples. W18-1002 @@ -1775,7 +1775,7 @@ W18-11 MalvinaNissim VivianaPatti - BarbaraPlank + BarbaraPlank ClaudiaWagner 10.18653/v1/W18-11 Association for Computational Linguistics @@ -1812,7 +1812,7 @@ Building an annotated dataset of app store reviews with Appraisal features in <fixed-case>E</fixed-case>nglish and <fixed-case>S</fixed-case>panish NataliaMora - JuliaLavid-López + JuliaLavid-López 16–24 This paper describes the creation and annotation of a dataset consisting of 250 English and Spanish app store reviews from Google’s Play Store with Appraisal features. This is one of the most influential linguistic frameworks for the analysis of evaluation and opinion in discourse due to its insightful descriptive features. However, it has not been extensively applied in NLP in spite of its potential for the classification of the subjective content of these reviews. We describe the dataset, the annotation scheme and guidelines, the agreement studies, the annotation results and their impact on the characterisation of this genre. W18-1103 @@ -1823,7 +1823,7 @@ Enabling Deep Learning of Emotion With First-Person Seed Expressions HassanAlhuzali MuhammadAbdul-Mageed - LyleUngar + LyleUngar 25–35 The computational treatment of emotion in natural language text remains relatively limited, and Arabic is no exception. This is partly due to lack of labeled data. In this work, we describe and manually validate a method for the automatic acquisition of emotion labeled data and introduce a newly developed data set for Modern Standard and Dialectal Arabic emotion detection focused at Robert Plutchik’s 8 basic emotion types. Using a hybrid supervision method that exploits first person emotion seeds, we show how we can acquire promising results with a deep gated recurrent neural network. Our best model reaches 70% F-score, significantly (i.e., 11%, p < 0.05) outperforming a competitive baseline. Applying our method and data on an external dataset of 4 emotions released around the same time we finalized our work, we acquire 7% absolute gain in F-score over a linear SVM classifier trained on gold data, thus validating our approach. W18-1104 @@ -1836,7 +1836,7 @@ DeepanshuVijay VinaySingh Syed SarfarazAkhtar - ManishShrivastava + ManishShrivastava 36–41 Hate speech detection in social media texts is an important Natural language Processing task, which has several crucial applications like sentiment analysis, investigating cyberbullying and examining socio-political controversies. While relevant research has been done independently on code-mixed social media texts and hate speech detection, our work is the first attempt in detecting hate speech in Hindi-English code-mixed social media text. In this paper, we analyze the problem of hate speech detection in code-mixed texts and present a Hindi-English code-mixed dataset consisting of tweets posted online on Twitter. The tweets are annotated with the language at word level and the class they belong to (Hate Speech or Normal Speech). We also propose a supervised classification system for detecting hate speech in the text using various character level, word level, and lexicon based features. W18-1105 @@ -1885,7 +1885,7 @@ Understanding the Effect of Gender and Stance in Opinion Expression in Debates on “Abortion” EsinDurmus - ClaireCardie + ClaireCardie 69–75 In this paper, we focus on understanding linguistic differences across groups with different self-identified gender and stance in expressing opinions about ABORTION. We provide a new dataset consisting of users’ gender, stance on ABORTION as well as the debates in ABORTION drawn from debate.org. We use the gender and stance information to identify significant linguistic differences across individuals with different gender and stance. We show the importance of considering the stance information along with the gender since we observe significant linguistic differences across individuals with different stance even within the same gender group. W18-1110 @@ -1962,7 +1962,7 @@ Proceedings of the Second Workshop on Subword/Character LEvel Models W18-12 ManaalFaruqui - HinrichSchütze + HinrichSchütze IsabelTrancoso YuliaTsvetkov YadollahYaghoobzadeh @@ -1990,7 +1990,7 @@ Entropy-Based Subword Mining with an Application to Word Embeddings AhmedEl-Kishky - FrankXu + FrankXu AstonZhang StephenMacke JiaweiHan @@ -2015,7 +2015,7 @@ Addressing Low-Resource Scenarios with Character-aware Embeddings SeanPapay - SebastianPadó + SebastianPadó Ngoc ThangVu 32–37 Most modern approaches to computing word embeddings assume the availability of text corpora with billions of words. In this paper, we explore a setup where only corpora with millions of words are available, and many words in any new text are out of vocabulary. This setup is both of practical interests – modeling the situation for specific domains and low-resource languages – and of psycholinguistic interest, since it corresponds much more closely to the actual experiences and challenges of human language learning and use. We compare standard skip-gram word embeddings with character-based embeddings on word relatedness prediction. Skip-grams excel on large corpora, while character-based embeddings do well on small corpora generally and rare and complex words specifically. The models can be combined easily. @@ -2037,9 +2037,9 @@ Discovering Phonesthemes with Sparse Regularization - Nelson F.Liu - Gina-AnneLevow - Noah A.Smith + Nelson F.Liu + Gina-AnneLevow + Noah A.Smith 49–54 We introduce a simple method for extracting non-arbitrary form-meaning representations from a collection of semantic vectors. We treat the problem as one of feature selection for a model trained to predict word vectors from subword features. We apply this model to the problem of automatically discovering phonesthemes, which are submorphemic sound clusters that appear in words with similar meaning. Many of our model-predicted phonesthemes overlap with those proposed in the linguistics literature, and we validate our approach with human judgments. W18-1206 @@ -2049,7 +2049,7 @@ Meaningless yet meaningful: Morphology grounded subword-level <fixed-case>NMT</fixed-case> TamaliBanerjee - PushpakBhattacharyya + PushpakBhattacharyya 55–60 We explore the use of two independent subsystems Byte Pair Encoding (BPE) and Morfessor as basic units for subword-level neural machine translation (NMT). We show that, for linguistically distant language-pairs Morfessor-based segmentation algorithm produces significantly better quality translation than BPE. However, for close language-pairs BPE-based subword-NMT may translate better than Morfessor-based subword-NMT. We propose a combined approach of these two segmentation algorithms Morfessor-BPE (M-BPE) which outperforms these two baseline systems in terms of BLEU score. Our results are supported by experiments on three language-pairs: English-Hindi, Bengali-Hindi and English-Bengali. W18-1207 @@ -2082,7 +2082,7 @@ ShiranDudy ShaobinXu StevenBedrick - DavidSmith + DavidSmith 72–77 Brain-computer interfaces and other augmentative and alternative communication devices introduce language-modeing challenges distinct from other character-entry methods. In particular, the acquired signal of the EEG (electroencephalogram) signal is noisier, which, in turn, makes the user intent harder to decipher. In order to adapt to this condition, we propose to maintain ambiguous history for every time step, and to employ, apart from the character language model, word information to produce a more robust prediction system. We present preliminary results that compare this proposed Online-Context Language Model (OCLM) to current algorithms that are used in this type of setting. Evaluation on both perplexity and predictive accuracy demonstrates promising results when dealing with ambiguous histories in order to provide to the front end a distribution of the next character the user might type. W18-1210 @@ -2111,7 +2111,7 @@ Using Hedge Detection to Improve Committed Belief Tagging MorganUlinski SethBenjamin - JuliaHirschberg + JuliaHirschberg 1–5 We describe a novel method for identifying hedge terms using a set of manually constructed rules. We present experiments adding hedge features to a committed belief system to improve classification. We compare performance of this system (a) without hedging features, (b) with dictionary-based features, and (c) with rule-based features. We find that using hedge features improves performance of the committed belief system, particularly in identifying instances of non-committed belief and reported belief. W18-1301 @@ -2131,7 +2131,7 @@ Detecting Sarcasm is Extremely Easy ;-) NatalieParde - RodneyNielsen + RodneyNielsen 21–26 Detecting sarcasm in text is a particularly challenging problem in computational semantics, and its solution may vary across different types of text. We analyze the performance of a domain-general sarcasm detection system on datasets from two very different domains: Twitter, and Amazon product reviews. We categorize the errors that we identify with each, and make recommendations for addressing these issues in NLP systems in the future. W18-1303 @@ -2164,8 +2164,8 @@ W18-14 ParisaKordjamshidi ArchnaBhatia - JamesPustejovsky - Marie-FrancineMoens + JamesPustejovsky + Marie-FrancineMoens 10.18653/v1/W18-14 Association for Computational Linguistics
New Orleans
@@ -2181,7 +2181,7 @@ Exploring the Functional and Geometric Bias of Spatial Relations Using Neural Language Models SimonDobnik MehdiGhanimifard - JohnKelleher + JohnKelleher 1–11 The challenge for computational models of spatial descriptions for situated dialogue systems is the integration of information from different modalities. The semantics of spatial descriptions are grounded in at least two sources of information: (i) a geometric representation of space and (ii) the functional interaction of related objects that. We train several neural language models on descriptions of scenes from a dataset of image captions and examine whether the functional or geometric bias of spatial descriptions reported in the literature is reflected in the estimated perplexity of these models. The results of these experiments have implications for the creation of models of spatial lexical semantics for human-robot dialogue systems. Furthermore, they also provide an insight into the kinds of the semantic knowledge captured by neural language models trained on spatial descriptions, which has implications for image captioning systems. W18-1401 @@ -2191,7 +2191,7 @@ Building and Learning Structures in a Situated Blocks World Through Deep Language Understanding IanPerera - JamesAllen + JamesAllen Choh ManTeng LucianGalescu 12–20 @@ -2203,7 +2203,7 @@ Computational Models for Spatial Prepositions GeorgiyPlatonov - LenhartSchubert + LenhartSchubert 21–30 Developing computational models of spatial prepositions (such as on, in, above, etc.) is crucial for such tasks as human-machine collaboration, story understanding, and 3D model generation from descriptions. However, these prepositions are notoriously vague and ambiguous, with meanings depending on the types, shapes and sizes of entities in the argument positions, the physical and task context, and other factors. As a result truth value judgments for prepositional relations are often uncertain and variable. In this paper we treat the modeling task as calling for assignment of probabilities to such relations as a function of multiple factors, where such probabilities can be viewed as estimates of whether humans would judge the relations to hold in given circumstances. We implemented our models in a 3D blocks world and a room world in a computer graphics setting, and found that true/false judgments based on these models do not differ much more from human judgments that the latter differ from one another. However, what really matters pragmatically is not the accuracy of truth value judgments but whether, for instance, the computer models suffice for identifying objects described in terms of prepositional relations, (e.g., “the box to the left of the table”, where there are multiple boxes). For such tasks, our models achieved accuracies above 90% for most relations. W18-1403 @@ -2212,8 +2212,8 @@ Lexical Conceptual Structure of Literal and Metaphorical Spatial Language: A Case Study of “Push” - BonnieDorr - MariOlsen + BonnieDorr + MariOlsen 31–40 Prior methodologies for understanding spatial language have treated literal expressions such as “Mary pushed the car over the edge” differently from metaphorical extensions such as “Mary’s job pushed her over the edge”. We demonstrate a methodology for standardizing literal and metaphorical meanings, by building on work in Lexical Conceptual Structure (LCS), a general-purpose representational component used in machine translation. We argue that spatial predicates naturally extend into other fields (e.g., circumstantial or temporal), and that LCS provides both a framework for distinguishing spatial from non-spatial, and a system for finding metaphorical meaning extensions. We start with MetaNet (MN), a large repository of conceptual metaphors, condensing 197 spatial entries into sixteen top-level categories of motion frames. Using naturally occurring instances of English push , and expansions of MN frames, we demonstrate that literal and metaphorical extensions exhibit patterns predicted and represented by the LCS model. W18-1404 @@ -2222,8 +2222,8 @@ Representing Spatial Relations in <fixed-case>F</fixed-case>rame<fixed-case>N</fixed-case>et - Miriam R. L.Petruck - Michael J.Ellsworth + Miriam R. L.Petruck + Michael J.Ellsworth 41–45 While humans use natural language to express spatial relations between and across entities in the world with great facility, natural language systems have a facility that depends on that human facility. This position paper presents approach to representing spatial relations in language, and advocates its adoption for representing the meaning of spatial language. This work shows the importance of axis-orientation systems for capturing the complexity of spatial relations, which FrameNet encodes with semantic types. W18-1405 @@ -2235,7 +2235,7 @@ JasonBaldridge TaniaBedrax-Weiss DaphneLuong - SriniNarayanan + SriniNarayanan BoPang FernandoPereira RaduSoricut @@ -2261,8 +2261,8 @@ The Case for Systematically Derived Spatial Language Usage - BonnieDorr - ClareVoss + BonnieDorr + ClareVoss 63–70 This position paper argues that, while prior work in spatial language understanding for tasks such as robot navigation focuses on mapping natural language into deep conceptual or non-linguistic representations, it is possible to systematically derive regular patterns of spatial language usage from existing lexical-semantic resources. Furthermore, even with access to such resources, effective solutions to many application areas such as robot navigation and narrative generation also require additional knowledge at the syntax-semantics interface to cover the wide range of spatial expressions observed and available to natural language speakers. We ground our insights in, and present our extensions to, an existing lexico-semantic resource, covering 500 semantic classes of verbs, of which 219 fall within a spatial subset. We demonstrate that these extensions enable systematic derivation of regular patterns of spatial language without requiring manual annotation. W18-1408 @@ -2275,7 +2275,7 @@ Proceedings of the First Workshop on Storytelling W18-15 MargaretMitchell - Ting-Hao ‘Kenneth’Huang + Ting-Hao ‘Kenneth’Huang FrancisFerraro IshanMisra 10.18653/v1/W18-15 @@ -2304,7 +2304,7 @@ Linguistic Features of Helpfulness in Automated Support for Creative Writing MelissaRoemmele - AndrewGordon + AndrewGordon 14–19 We examine an emerging NLP application that supports creative writing by automatically suggesting continuing sentences in a story. The application tracks users’ modifications to generated sentences, which can be used to quantify their “helpfulness” in advancing the story. We explore the task of predicting helpfulness based on automatically detected linguistic features of the suggestions. We illustrate this analysis on a set of user interactions with the application using an initial selection of features relevant to story generation. W18-1502 @@ -2313,9 +2313,9 @@ A Pipeline for Creative Visual Storytelling - StephanieLukin + StephanieLukin ReginaldHobbs - ClareVoss + ClareVoss 20–32 Computational visual storytelling produces a textual description of events and interpretations depicted in a sequence of images. These texts are made possible by advances and cross-disciplinary approaches in natural language processing, generation, and computer vision. We define a computational creative visual storytelling as one with the ability to alter the telling of a story along three aspects: to speak about different environments, to produce variations based on narrative goals, and to adapt the narrative to the audience. These aspects of creative storytelling and their effect on the narrative have yet to be explored in visual storytelling. This paper presents a pipeline of task-modules, Object Identification, Single-Image Inferencing, and Multi-Image Narration, that serve as a preliminary design for building a creative visual storyteller. We have piloted this design for a sequence of images in an annotation task. We present and analyze the collected corpus and describe plans towards automation. W18-1503 @@ -2347,7 +2347,7 @@ An Encoder-decoder Approach to Predicting Causal Relations in Stories MelissaRoemmele - AndrewGordon + AndrewGordon 50–59 We address the task of predicting causally related events in stories according to a standard evaluation framework, the Choice of Plausible Alternatives (COPA). We present a neural encoder-decoder model that learns to predict relations between adjacent sequences in stories as a means of modeling causality. We explore this approach using different methods for extracting and representing sequence pairs as well as different model architectures. We also compare the impact of different training datasets on our model. In particular, we demonstrate the usefulness of a corpus not previously applied to COPA, the ROCStories corpus. While not state-of-the-art, our results establish a new reference point for systems evaluated on COPA, and one that is particularly informative for future neural-based approaches. W18-1506 @@ -2358,7 +2358,7 @@ Neural Event Extraction from Movies Description AlexTozzo DejanJovanović - MohamedAmer + MohamedAmer 60–66 We present a novel approach for event extraction and abstraction from movie descriptions. Our event frame consists of “who”, “did what” “to whom”, “where”, and “when”. We formulate our problem using a recurrent neural network, enhanced with structural features extracted from syntactic parser, and trained using curriculum learning by progressively increasing the difficulty of the sentences. Our model serves as an intermediate step towards question answering systems, visual storytelling, and story completion tasks. We evaluate our approach on MovieQA dataset. W18-1507 @@ -2409,7 +2409,7 @@ Detecting Syntactic Features of Translated <fixed-case>C</fixed-case>hinese HaiHu WenLi - SandraKübler + SandraKübler 20–28 We present a machine learning approach to distinguish texts translated to Chinese (by humans) from texts originally written in Chinese, with a focus on a wide range of syntactic features. Using Support Vector Machines (SVMs) as classifier on a genre-balanced corpus in translation studies of Chinese, we find that constituent parse trees and dependency triples as features without lexical information perform very well on the task, with an F-measure above 90%, close to the results of lexical n-gram features, without the risk of learning topic information rather than translation features. Thus, we claim syntactic features alone can accurately distinguish translated from original Chinese. Translated Chinese exhibits an increased use of determiners, subject position pronouns, NP + “的” as NP modifiers, multiple NPs or VPs conjoined by "、", among other structures. We also interpret the syntactic features with reference to previous translation studies in Chinese, particularly the usage of pronouns. W18-1603 @@ -2432,7 +2432,7 @@ FranciscoRangel PaoloRosso JulianBrooke - AlexandraUitdenbogerd + AlexandraUitdenbogerd 39–43 In this paper, we approach the task of native language identification in a realistic cross-corpus scenario where a model is trained with available data and has to predict the native language from data of a different corpus. The motivation behind this study is to investigate native language identification in the Australian academic scenario where a majority of students come from China, Indonesia, and Arabic-speaking nations. We have proposed a statistical embedding representation reporting a significant improvement over common single-layer approaches of the state of the art, identifying Chinese, Arabic, and Indonesian in a cross-corpus scenario. The proposed approach was shown to be competitive even when the data is scarce and imbalanced. W18-1605 @@ -2447,7 +2447,7 @@ GoranGlavaš SwapnaSomasundaran MartinRiedl - EduardHovy + EduardHovy 10.18653/v1/W18-17 Association for Computational Linguistics
New Orleans, Louisiana, USA
@@ -2462,8 +2462,8 @@ Scientific Discovery as Link Prediction in Influence and Citation Graphs FanLuo - Marco A.Valenzuela-Escárcega - GusHahn-Powell + Marco A.Valenzuela-Escárcega + GusHahn-Powell MihaiSurdeanu 1–6 We introduce a machine learning approach for the identification of “white spaces” in scientific knowledge. Our approach addresses this task as link prediction over a graph that contains over 2M influence statements such as “CTCF activates FOXA1”, which were automatically extracted using open-domain machine reading. We model this prediction task using graph-based features extracted from the above influence graph, as well as from a citation graph that captures scientific communities. We evaluated the proposed approach through backtesting. Although the data is heavily unbalanced (50 times more negative examples than positives), our approach predicts which influence links will be discovered in the “near future” with a F1 score of 27 points, and a mean average precision of 68%. @@ -2474,7 +2474,7 @@ Efficient Generation and Processing of Word Co-occurrence Networks Using corpus2graph ZhengZhang - PierreZweigenbaum + PierreZweigenbaum RuiqingYin 7–11 Corpus2graph is an open-source NLP-application-oriented tool that generates a word co-occurrence network from a large corpus. It not only contains different built-in methods to preprocess words, analyze sentences, extract word pairs and define edge weights, but also supports user-customized functions. By using parallelization techniques, it can generate a large word co-occurrence network of the whole English Wikipedia data within hours. And thanks to its nodes-edges-weight three-level progressive calculation design, rebuilding networks with different configurations is even faster as it does not need to start all over again. This tool also works with other graph libraries such as igraph, NetworkX and graph-tool as a front end providing data to boost network generation speed. @@ -2484,7 +2484,7 @@ Multi-hop Inference for Sentence-level <fixed-case>T</fixed-case>ext<fixed-case>G</fixed-case>raphs: How Challenging is Meaningfully Combining Information for Science Question Answering? - PeterJansen + PeterJansen 12–17 Question Answering for complex questions is often modelled as a graph construction or traversal task, where a solver must build or traverse a graph of facts that answer and explain a given question. This “multi-hop” inference has been shown to be extremely challenging, with few models able to aggregate more than two facts before being overwhelmed by “semantic drift”, or the tendency for long chains of facts to quickly drift off topic. This is a major barrier to current inference models, as even elementary science questions require an average of 4 to 6 facts to answer and explain. In this work we empirically characterize the difficulty of building or traversing a graph of sentences connected by lexical overlap, by evaluating chance sentence aggregation quality through 9,784 manually-annotated judgements across knowledge graphs built from three free-text corpora (including study guides and Simple Wikipedia). We demonstrate semantic drift tends to be high and aggregation quality low, at between 0.04 and 3, and highlight scenarios that maximize the likelihood of meaningfully combining information. W18-1703 @@ -2497,7 +2497,7 @@ StéphaneHuet ThiagoGouveia da Silva Andréa CarneiroLinhares - Juan-ManuelTorres-Moreno + Juan-ManuelTorres-Moreno 18–27 Multi-Sentence Compression (MSC) aims to generate a short sentence with key information from a cluster of closely related sentences. MSC enables summarization and question-answering systems to generate outputs combining fully formed sentences from one or several documents. This paper describes a new Integer Linear Programming method for MSC using a vertex-labeled graph to select different keywords, and novel 3-gram scores to generate more informative sentences while maintaining their grammaticality. Our system is of good quality and outperforms the state-of-the-art for evaluations led on news dataset. We led both automatic and manual evaluations to determine the informativeness and the grammaticality of compressions for each dataset. Additional tests, which take advantage of the fact that the length of compressions can be modulated, still improve ROUGE scores with shorter output sentences. W18-1704 @@ -2586,7 +2586,7 @@ Fluency Over Adequacy: A Pilot Study in Measuring User Trust in Imperfect <fixed-case>MT</fixed-case> - MariannaMartindale + MariannaMartindale MarineCarpuat 13–25 W18-1803 @@ -2594,10 +2594,10 @@ Combining Quality Estimation and Automatic Post-editing to Enhance Machine Translation output - RajenChatterjee - MatteoNegri + RajenChatterjee + MatteoNegri MarcoTurchi - FrédéricBlain + FrédéricBlain LuciaSpecia 26–38 W18-1804 @@ -2607,7 +2607,7 @@ Neural Morphological Tagging of Lemma Sequences for Machine Translation CostanzaConforti MatthiasHuck - AlexanderFraser + AlexanderFraser 39–53 W18-1805 conforti-etal-2018-neural @@ -2624,8 +2624,8 @@ How Robust Are Character-Based Word Embeddings in Tagging and <fixed-case>MT</fixed-case> Against Wrod Scramlbing or Randdm Nouse? GeorgHeigold StalinVaranasi - GünterNeumann - Josefvan Genabith + GünterNeumann + Josefvan Genabith 68–80 W18-1807 heigold-etal-2018-robust @@ -2642,7 +2642,7 @@ Register-sensitive Translation: a Case Study of <fixed-case>M</fixed-case>andarin and <fixed-case>C</fixed-case>antonese (Non-archival Extended Abstract) Tak-sumWong - JohnLee + JohnLee 89–96 W18-1809 wong-lee-2018-register @@ -2693,7 +2693,7 @@ Simultaneous Translation using Optimized Segmentation MaryamSiahbani - HassanShavarani + HassanShavarani AshkanAlinejad AnoopSarkar 154–167 @@ -2702,13 +2702,13 @@ Neural Monkey: The Current State and Beyond - JindřichHelcl + JindřichHelcl JindřichLibovický TomKocmi TomášMusil OndřejCífka - DušanVariš - OndřejBojar + DušanVariš + OndřejBojar 168–176 W18-1816 helcl-etal-2018-neural @@ -2720,7 +2720,7 @@ YuntianDeng VincentNguyen JeanSenellart - AlexanderRush + AlexanderRush 177–184 W18-1817 klein-etal-2018-opennmt @@ -2753,7 +2753,7 @@ AidanGomez StephanGouws LlionJones - ŁukaszKaiser + ŁukaszKaiser NalKalchbrenner NikiParmar RyanSepassi @@ -2769,7 +2769,7 @@ TobiasDomhan MichaelDenkowski DavidVilar - ArtemSokolov + ArtemSokolov AnnClifton MattPost 200–207 @@ -2793,8 +2793,8 @@ W18-19 JaniceCampbell AlexYanishevsky - JenniferDoyon - DougJones + JenniferDoyon + DougJones Association for Machine Translation in the Americas
Boston, MA
March @@ -2853,7 +2853,7 @@
Same-language machine translation for local flavours/flavors - GemaRamírez-Sánchez + GemaRamírez-Sánchez JaniceCampbell 35–53 W18-1908 @@ -2868,8 +2868,8 @@ Developing a Neural Machine Translation Service for the 2017-2018 <fixed-case>E</fixed-case>uropean <fixed-case>U</fixed-case>nion Presidency - MārcisPinnis - RihardsKalnins + MārcisPinnis + RihardsKalnins 72–83 W18-1910 pinnis-kalnins-2018-developing @@ -2897,8 +2897,8 @@ Turning <fixed-case>NMT</fixed-case> Research into Commercial Products - DragosMunteanu - AdriàGispert + DragosMunteanu + AdriàGispert 166–193 W18-1914 munteanu-gispert-2018-turning @@ -2923,8 +2923,8 @@ AnnClifton GregHanneman PatrickPorter - DonnaGates - AlmutHildebrand + DonnaGates + AlmutHildebrand AnishKumar 223–233 W18-1917 @@ -2950,7 +2950,7 @@ CoreyMiller DanielleSilverman VanesaJurica - ElizabethRicherson + ElizabethRicherson RodneyMorris ElisabethMallard 275–282 @@ -2959,18 +2959,18 @@ Challenges in Speech Recognition and Translation of High-Value Low-Density Polysynthetic Languages - JudithKlavans + JudithKlavans JohnMorgan StephenLaRocca JeffreyMicher - ClareVoss + ClareVoss 283–293 W18-1921 klavans-etal-2018-challenges Evaluating Automatic Speech Recognition in Translation - EvelyneTzoukermann + EvelyneTzoukermann CoreyMiller 294–302 W18-1922 @@ -2987,7 +2987,7 @@ <fixed-case>T</fixed-case>utorial: De-mystifying Neural <fixed-case>MT</fixed-case> - DragosMunteanu + DragosMunteanu LingTsou W18-1924 munteanu-tsou-2018-tutorial @@ -2995,7 +2995,7 @@ <fixed-case>T</fixed-case>utorial: <fixed-case>MQM</fixed-case>-<fixed-case>DQF</fixed-case>: A Good Marriage (Translation Quality for the 21st Century) ArleLommel - AlanMelby + AlanMelby W18-1925 lommel-melby-2018-tutorial @@ -3032,7 +3032,7 @@ Termbase Exchange (<fixed-case>TBX</fixed-case>) - SueWright + SueWright 25–47 W18-2002 wright-2018-termbase @@ -3060,7 +3060,7 @@ Translation <fixed-case>API</fixed-case> Cases and Classes (<fixed-case>TAPICC</fixed-case>) - AlanMelby + AlanMelby 95–112 W18-2006 melby-2018-translation @@ -3070,9 +3070,9 @@ Proceedings of the AMTA 2018 Workshop on Translation Quality Estimation and Automatic Post-Editing W18-21 - RamónAstudillo - JoãoGraça - AndréMartins + RamónAstudillo + JoãoGraça + AndréMartins Association for Machine Translation in the Americas
Boston, MA
March @@ -3191,7 +3191,7 @@ A Survey of Machine Translation Work in the <fixed-case>P</fixed-case>hilippines: From 1998 to 2018 NathanielOco - RachelRoxas + RachelRoxas 30–36 W18-2204 oco-roxas-2018-survey @@ -3219,7 +3219,7 @@ Apertium’s Web Toolchain for Low-Resource Language Technology SushainCherivirala ShardulChiplunkar - JonathanWashington + JonathanWashington KevinUnhammer 53–62 W18-2207 @@ -3231,9 +3231,9 @@ Proceedings of the BioNLP 2018 workshop W18-23 DinaDemner-Fushman - Kevin BretonnelCohen + Kevin BretonnelCohen SophiaAnaniadou - JunichiTsujii + JunichiTsujii Association for Computational Linguistics
Melbourne, Australia
July @@ -3290,7 +3290,7 @@
Identifying Key Sentences for Precision Oncology Using Semi-Supervised Learning - JuricaŠeva + JuricaŠeva MartinWackerbauer UlfLeser 35–46 @@ -3301,7 +3301,7 @@ Ontology alignment in the biomedical domain using entity definitions and context - Lucy LuWang + Lucy LuWang ChandraBhagavatula MarkNeumann KyleLo @@ -3374,7 +3374,7 @@ NitishKulkarni SrividyaPranavi GabrielBayomi - EricNyberg + EricNyberg TerukoMitamura 109–117 In this paper, we present a novel Biomedical Question Answering system, BioAMA: “Biomedical Ask Me Anything” on task 5b of the annual BioASQ challenge. In this work, we focus on a wide variety of question types including factoid, list based, summary and yes/no type questions that generate both exact and well-formed ‘ideal’ answers. For summary-type questions, we combine effective IR-based techniques for retrieval and diversification of relevant snippets for a question to create an end-to-end system which achieves a ROUGE-2 score of 0.72 and a ROUGE-SU4 score of 0.71 on ideal answer questions (7% improvement over the previous best model). Additionally, we propose a novel NLI-based framework to answer the yes/no questions. To train the NLI model, we also devise a transfer-learning technique by cross-domain projection of word embeddings. Finally, we present a two-stage approach to address the factoid and list type questions by first generating a candidate set using NER taggers and ranking them using both supervised or unsupervised techniques. @@ -3385,7 +3385,7 @@ <fixed-case>P</fixed-case>hrase2<fixed-case>V</fixed-case>ec<fixed-case>GLM</fixed-case>: Neural generalized language model–based semantic tagging for complex query reformulation in medical <fixed-case>IR</fixed-case> ManirupaDas - EricFosler-Lussier + EricFosler-Lussier SimonLin SoheilMoosavinasab DavidChen @@ -3401,7 +3401,7 @@ Convolutional neural networks for chemical-disease relation extraction are improved with character-based word embeddings Dat QuocNguyen - KarinVerspoor + KarinVerspoor 129–136 We investigate the incorporation of character-based word representations into a standard CNN-based relation extraction model. We experiment with two common neural architectures, CNN and LSTM, to learn word vector representations from character embeddings. Through a task on the BioCreative-V CDR corpus, extracting relationships between chemicals and diseases, we show that models exploiting the character-based word representations improve on models that do not use this information, obtaining state-of-the-art result relative to previous neural approaches. W18-2314 @@ -3443,7 +3443,7 @@ <fixed-case>S</fixed-case>ingle<fixed-case>C</fixed-case>ite: Towards an improved Single Citation Search in <fixed-case>P</fixed-case>ub<fixed-case>M</fixed-case>ed LanaYeganova - Donald CComeau + Donald CComeau WonKim W JohnWilbur ZhiyongLu @@ -3456,7 +3456,7 @@ A Framework for Developing and Evaluating Word Embeddings of Drug-named Entity MengnanZhao - Aaron J.Masino + Aaron J.Masino Christopher C.Yang 156–160 We investigate the quality of task specific word embeddings created with relatively small, targeted corpora. We present a comprehensive evaluation framework including both intrinsic and extrinsic evaluation that can be expanded to named entities beyond drug name. Intrinsic evaluation results tell that drug name embeddings created with a domain specific document corpus outperformed the previously published versions that derived from a very large general text corpus. Extrinsic evaluation uses word embedding for the task of drug name recognition with Bi-LSTM model and the results demonstrate the advantage of using domain-specific word embeddings as the only input feature for drug name recognition with F1-score achieving 0.91. This work suggests that it may be advantageous to derive domain specific embeddings for certain tasks even when the domain specific corpus is of limited size. @@ -3468,7 +3468,7 @@ <fixed-case>M</fixed-case>e<fixed-case>SH</fixed-case>-based dataset for measuring the relevance of text retrieval Won GyuKim LanaYeganova - DonaldComeau + DonaldComeau W JohnWilbur ZhiyongLu 161–165 @@ -3548,8 +3548,8 @@ Proceedings of the Seventh Named Entities Workshop W18-24 - NancyChen - Rafael E.Banchs + NancyChen + Rafael E.Banchs XiangyuDuan MinZhang HaizhouLi @@ -3596,7 +3596,7 @@ Attention-based Semantic Priming for Slot-filling JiewenWu Rafael E.Banchs - Luis FernandoD’Haro + Luis FernandoD’Haro PavitraKrishnaswamy NancyChen 22–26 @@ -3610,7 +3610,7 @@ VinaySingh DeepanshuVijay Syed SarfarazAkhtar - ManishShrivastava + ManishShrivastava 27–35 Named Entity Recognition (NER) is a major task in the field of Natural Language Processing (NLP), and also is a sub-task of Information Extraction. The challenge of NER for tweets lie in the insufficient information available in a tweet. There has been a significant amount of work done related to entity extraction, but only for resource rich languages and domains such as newswire. Entity extraction is, in general, a challenging task for such an informal text, and code-mixed text further complicates the process with it’s unstructured and incomplete information. We propose experiments with different machine learning classification algorithms with word, character and lexical features. The algorithms we experimented with are Decision tree, Long Short-Term Memory (LSTM), and Conditional Random Field (CRF). In this paper, we present a corpus for NER in Hindi-English Code-Mixed along with extensive experiments on our machine learning models which achieved the best f1-score of 0.95 with both CRF and LSTM. W18-2405 @@ -3634,7 +3634,7 @@ Named-Entity Tagging and Domain adaptation for Better Customized Translation ZhongweiLi XuancongWang - Ai TiAw + Ai TiAw Eng SiongChng HaizhouLi 41–46 @@ -3751,10 +3751,10 @@ MarkNeumann OyvindTafjord PradeepDasigi - Nelson F.Liu - MatthewPeters + Nelson F.Liu + MatthewPeters MichaelSchmitz - LukeZettlemoyer + LukeZettlemoyer 1–6 Modern natural language processing (NLP) research requires writing code. Ideally this code would provide a precise definition of the approach, easy repeatability of results, and a basis for extending the research. However, many research codebases bury high-level parameters under implementation details, are challenging to run and debug, and are difficult enough to extend that they are more likely to be rewritten. This paper describes AllenNLP, a library for applying deep learning methods to NLP research that addresses these issues with easy-to-use command-line tools, declarative configuration-driven experiments, and modular NLP abstractions. AllenNLP has already increased the rate of research experimentation and the sharing of NLP components at the Allen Institute for Artificial Intelligence, and we are working to have the same impact across the field. W18-2501 @@ -3788,7 +3788,7 @@ Devendra SinghChaplot BowenTan XingjiangYu - EricXing + EricXing 13–22 We introduce Texar, an open-source toolkit aiming to support the broad set of text generation tasks. Different from many existing toolkits that are specialized for specific applications (e.g., neural machine translation), Texar is designed to be highly flexible and versatile. This is achieved by abstracting the common patterns underlying the diverse tasks and methodologies, creating a library of highly reusable modules and functionalities, and enabling arbitrary model architectures and various algorithmic paradigms. The features make Texar particularly suitable for technique sharing and generalization across different text generation applications. The toolkit emphasizes heavily on extensibility and modularized system design, so that components can be freely plugged in or swapped out. We conduct extensive experiments and case studies to demonstrate the use and advantage of the toolkit. W18-2503 @@ -3814,9 +3814,9 @@ The risk of sub-optimal use of Open Source <fixed-case>NLP</fixed-case> Software: <fixed-case>UKB</fixed-case> is inadvertently state-of-the-art in knowledge-based <fixed-case>WSD</fixed-case> - EnekoAgirre - OierLópez de Lacalle - AitorSoroa + EnekoAgirre + OierLópez de Lacalle + AitorSoroa 29–33 UKB is an open source collection of programs for performing, among other tasks, Knowledge-Based Word Sense Disambiguation (WSD). Since it was released in 2009 it has been often used out-of-the-box in sub-optimal settings. We show that nine years later it is the state-of-the-art on knowledge-based WSD. This case shows the pitfalls of releasing open source NLP software without optimal default settings and precise instructions for reproducibility. W18-2505 @@ -3854,8 +3854,8 @@ Integrating Multiple <fixed-case>NLP</fixed-case> Technologies into an Open-source Platform for Multilingual Media Monitoring UlrichGermann RenārsLiepins - DidzisGosko - GuntisBarzdins + DidzisGosko + GuntisBarzdins 47–51 The open-source SUMMA Platform is a highly scalable distributed architecture for monitoring a large number of media broadcasts in parallel, with a lag behind actual broadcast time of at most a few minutes. It assembles numerous state-of-the-art NLP technologies into a fully automated media ingestion pipeline that can record live broadcasts, detect and transcribe spoken content, translate from several languages (original text or transcribed speech) into English, recognize Named Entities, detect topics, cluster and summarize documents across language barriers, and extract and store factual claims in these news items. This paper describes the intended use cases and discusses the system design decisions that allowed us to integrate state-of-the-art NLP modules into an effective workflow with comparatively little effort. W18-2508 @@ -3864,7 +3864,7 @@ The Annotated Transformer - AlexanderRush + AlexanderRush 52–60 A major goal of open-source NLP is to quickly and accurately reproduce the results of new work, in a manner that the community can easily use and modify. While most papers publish enough detail for replication, it still may be difficult to achieve good results in practice. This paper presents a worked exercise of paper reproduction with the goal of implementing the results of the recent Transformer model. The replication exercise aims at simple code structure that follows closely with the original work, while achieving an efficient usable system. W18-2509 @@ -3894,7 +3894,7 @@ Ruminating Reader: Reasoning with Gated Multi-hop Attention YichenGong - SamuelBowman + SamuelBowman 1–11 To answer the question in machine comprehension (MC) task, the models need to establish the interaction between the question and the context. To tackle the problem that the single-pass model cannot reflect on and correct its answer, we present Ruminating Reader. Ruminating Reader adds a second pass of attention and a novel information fusion component to the Bi-Directional Attention Flow model (BiDAF). We propose novel layer structures that construct a query aware context vector representation and fuse encoding representation with intermediate representation on top of BiDAF model. We show that a multi-hop attention mechanism can be applied to a bi-directional attention structure. In experiments on SQuAD, we find that the Reader outperforms the BiDAF baseline by 2.1 F1 score and 2.7 EM score. Our analysis shows that different hops of the attention have different responsibilities in selecting answers. W18-2601 @@ -3904,7 +3904,7 @@ Systematic Error Analysis of the <fixed-case>S</fixed-case>tanford Question Answering Dataset Marc-AntoineRondeau - T. J.Hazen + T. J.Hazen 12–20 We analyzed the outputs of multiple question answering (QA) models applied to the Stanford Question Answering Dataset (SQuAD) to identify the core challenges for QA systems on this data set. Through an iterative process, challenging aspects were hypothesized through qualitative analysis of the common error cases. A classifier was then constructed to predict whether SQuAD test examples were likely to be difficult for systems to answer based on features associated with the hypothesized aspects. The classifier’s performance was used to accept or reject each aspect as an indicator of difficulty. With this approach, we ensured that our hypotheses were systematically tested and not simply accepted based on our pre-existing biases. Our explanations are not accepted based on human evaluation of individual examples. This process also enabled us to identify the primary QA strategy learned by the models, i.e., systems determined the acceptable answer type for a question and then selected the acceptable answer span of that type containing the highest density of words present in the question within its local vicinity in the passage. W18-2602 @@ -3915,7 +3915,7 @@ A Multi-Stage Memory Augmented Neural Network for Machine Reading Comprehension SeunghakYu - Sathish ReddyIndurthi + Sathish ReddyIndurthi SeohyunBack HaejunLee 21–30 @@ -3960,7 +3960,7 @@ Robust and Scalable Differentiable Neural Computer for Question Answering JörgFranke JanNiehues - AlexWaibel + AlexWaibel 47–59 Deep learning models are often not easily adaptable to new tasks and require task-specific adjustments. The differentiable neural computer (DNC), a memory-augmented neural network, is designed as a general problem solver which can be used in a wide range of tasks. But in reality, it is hard to apply this model to new tasks. We analyze the DNC and identify possible improvements within the application of question answering. This motivates a more robust and scalable DNC (rsDNC). The objective precondition is to keep the general character of this model intact while making its application more reliable and speeding up its required training time. The rsDNC is distinguished by a more robust training, a slim memory unit and a bidirectional architecture. We not only achieve new state-of-the-art performance on the bAbI task, but also minimize the performance variance between different initializations. Furthermore, we demonstrate the simplified applicability of the rsDNC to new tasks with passable results on the CNN RC task without adaptions. W18-2606 @@ -3976,12 +3976,12 @@ RajarshiDas AndrewMcCallum MariaChang - AchilleFokoue-Nkoutche + AchilleFokoue-Nkoutche PavanKapanipathi NicholasMattei RyanMusa KartikTalamadupula - MichaelWitbrock + MichaelWitbrock 60–70 The recent work of Clark et al. (2018) introduces the AI2 Reasoning Challenge (ARC) and the associated ARC dataset that partitions open domain, complex science questions into easy and challenge sets. That paper includes an analysis of 100 questions with respect to the types of knowledge and reasoning required to answer them; however, it does not include clear definitions of these types, nor does it offer information about the quality of the labels. We propose a comprehensive set of definitions of knowledge and reasoning types necessary for answering the questions in the ARC dataset. Using ten annotators and a sophisticated annotation interface, we analyze the distribution of labels across the challenge set and statistics related to them. Additionally, we demonstrate that although naive information retrieval methods return sentences that are irrelevant to answering the query, sufficient supporting text is often present in the (ARC) corpus. Evaluating with human-selected relevant sentences improves the performance of a neural machine comprehension model by 42 points. W18-2607 @@ -4018,7 +4018,7 @@ Comparative Analysis of Neural <fixed-case>QA</fixed-case> models on <fixed-case>SQ</fixed-case>u<fixed-case>AD</fixed-case> SoumyaWadhwa KhyathiChandu - EricNyberg + EricNyberg 89–97 The task of Question Answering has gained prominence in the past few decades for testing the ability of machines to understand natural language. Large datasets for Machine Reading have led to the development of neural models that cater to deeper language understanding compared to information retrieval tasks. Different components in these neural architectures are intended to tackle different challenges. As a first step towards achieving generalization across multiple domains, we attempt to understand and compare the peculiarities of existing end-to-end neural models on the Stanford Question Answering Dataset (SQuAD) by performing quantitative as well as qualitative analysis of the results attained by each of them. We observed that prediction errors reflect certain model-specific biases, which we further discuss in this paper. W18-2610 @@ -4045,7 +4045,7 @@ W18-27 AlexandraBirch AndrewFinch - ThangLuong + ThangLuong GrahamNeubig YusukeOda Association for Computational Linguistics @@ -4086,8 +4086,8 @@ Iterative Back-Translation for Neural Machine Translation Vu Cong DuyHoang PhilippKoehn - GholamrezaHaffari - TrevorCohn + GholamrezaHaffari + TrevorCohn 18–24 We present iterative back-translation, a method for generating increasingly better synthetic parallel data from monolingual data to train neural machine translation systems. Our proposed method is very simple yet effective and highly applicable in practice. We demonstrate improvements in neural machine translation quality in both high and low resourced scenarios, including the best reported BLEU scores for the WMT 2017 German↔English tasks. W18-2703 @@ -4097,7 +4097,7 @@ Inducing Grammars with and for Neural Machine Translation YonatanBisk - KeTran + KeTran 25–35 Machine translation systems require semantic knowledge and grammatical understanding. Neural machine translation (NMT) systems often assume this information is captured by an attention mechanism and a decoder that ensures fluency. Recent work has shown that incorporating explicit syntax alleviates the burden of modeling both types of knowledge. However, requiring parses is expensive and does not explore the question of what syntax a model needs during translation. To address both of these issues we introduce a model that simultaneously translates while inducing dependency trees. In this way, we leverage the benefits of structure while investigating what syntax NMT must induce to maximize performance. We show that our dependency trees are 1. language pair dependent and 2. improve translation quality. W18-2704 @@ -4131,7 +4131,7 @@ Enhancement of Encoder and Attention Using Target Monolingual Corpora in Neural Machine Translation KenjiImamura AtsushiFujita - EiichiroSumita + EiichiroSumita 55–63 A large-scale parallel corpus is required to train encoder-decoder neural machine translation. The method of using synthetic parallel texts, in which target monolingual corpora are automatically translated into source sentences, is effective in improving the decoder, but is unreliable for enhancing the encoder. In this paper, we propose a method that enhances the encoder and attention using target monolingual corpora by generating multiple source sentences via sampling. By using multiple source sentences, diversity close to that of humans is achieved. Our experimental results show that the translation quality is improved by increasing the number of synthetic source sentences for each given target sentence, and quality close to that using a manually created parallel corpus was achieved. W18-2707 @@ -4184,9 +4184,9 @@ Towards one-shot learning for rare-word translation with external experts - Ngoc-QuanPham + Ngoc-QuanPham JanNiehues - AlexanderWaibel + AlexanderWaibel 100–109 Neural machine translation (NMT) has significantly improved the quality of automatic translation models. One of the main challenges in current systems is the translation of rare words. We present a generic approach to address this weakness by having external models annotate the training data as Experts, and control the model-expert interaction with a pointer network and reinforcement learning. Our experiments using phrase-based models to simulate Experts to complement neural machine translation models show that the model can be trained to copy the annotations into the output consistently. We demonstrate the benefit of our proposed framework in outof domain translation scenarios with only lexical resources, improving more than 1.0 BLEU point in both translation directions English-Spanish and German-English. W18-2712 @@ -4196,7 +4196,7 @@ <fixed-case>NICT</fixed-case> Self-Training Approach to Neural Machine Translation at <fixed-case>NMT</fixed-case>-2018 KenjiImamura - EiichiroSumita + EiichiroSumita 110–115 This paper describes the NICT neural machine translation system submitted at the NMT-2018 shared task. A characteristic of our approach is the introduction of self-training. Since our self-training does not change the model structure, it does not influence the efficiency of translation, such as the translation speed. The experimental results showed that the translation quality improved not only in the sequence-to-sequence (seq-to-seq) models but also in the transformer models. W18-2713 @@ -4207,7 +4207,7 @@ Fast Neural Machine Translation Implementation HieuHoang TomaszDwojak - RihardsKrislauks + RihardsKrislauks DanielTorregrosa KennethHeafield 116–121 @@ -4223,8 +4223,8 @@ BoWang GuillaumeKlein Jean-PierreRamatchandirin - JosepCrego - AlexanderRush + JosepCrego + AlexanderRush 122–128 We present a system description of the OpenNMT Neural Machine Translation entry for the WNMT 2018 evaluation. In this work, we developed a heavily optimized NMT inference model targeting a high-performance CPU system. The final system uses a combination of four techniques, all of them lead to significant speed-ups in combination: (a) sequence distillation, (b) architecture modifications, (c) precomputation, particularly of vocabulary, and (d) CPU targeted quantization. This work achieves the fastest performance of the shared task, and led to the development of new features that have been integrated to OpenNMT and available to the community. W18-2715 @@ -4265,11 +4265,11 @@ Predicting Brain Activation with <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et Embeddings - JoãoAntónio Rodrigues + JoãoAntónio Rodrigues RubenBranco - JoãoSilva + JoãoSilva ChakavehSaedi - AntónioBranco + AntónioBranco 1–5 The task of taking a semantic representation of a noun and predicting the brain activity triggered by it in terms of fMRI spatial patterns was pioneered by Mitchell et al. 2008. That seminal work used word co-occurrence features to represent the meaning of the nouns. Even though the task does not impose any specific type of semantic representation, the vast majority of subsequent approaches resort to feature-based models or to semantic spaces (aka word embeddings). We address this task, with competitive results, by using instead a semantic network to encode lexical semantics, thus providing further evidence for the cognitive plausibility of this approach to model lexical meaning. W18-2801 @@ -4290,7 +4290,7 @@ Language Production Dynamics with Recurrent Neural Networks JesúsCalvillo - MatthewCrocker + MatthewCrocker 17–26 We present an analysis of the internal mechanism of the recurrent neural model of sentence production presented by Calvillo et al. (2016). The results show clear patterns of computation related to each layer in the network allowing to infer an algorithmic account, where the semantics activates the semantically related words, then each word generated at each time step activates syntactic and semantic constraints on possible continuations, while the recurrence preserves information through time. We propose that such insights could generalize to other models with similar architecture, including some used in computational linguistics for language modeling, machine translation and image caption generation. W18-2803 @@ -4323,7 +4323,7 @@ Affordances in Grounded Language Learning StephenMcGregor - KyungTaeLim + KyungTaeLim 41–46 We present a novel methodology involving mappings between different modes of semantic representation. We propose distributional semantic models as a mechanism for representing the kind of world knowledge inherent in the system of abstract symbols characteristic of a sophisticated community of language users. Then, motivated by insight from ecological psychology, we describe a model approximating affordances, by which we mean a language learner’s direct perception of opportunities for action in an environment. We present a preliminary experiment involving mapping between these two representational modalities, and propose that our methodology can become the basis for a cognitively inspired model of grounded language learning. W18-2806 @@ -4345,7 +4345,7 @@ JixingLi MurielleFabre Wen-MingLuh - JohnHale + JohnHale 56–64 The current study examined the role of syntactic structure during pronoun resolution. We correlated complexity measures derived by the syntax-sensitive Hobbs algorithm and a neural network model for pronoun resolution with brain activity of participants listening to an audiobook during fMRI recording. Compared to the neural network model, the Hobbs algorithm is associated with larger clusters of brain activation in a network including the left Broca’s area. W18-2808 @@ -4355,7 +4355,7 @@ A Sound and Complete Left-Corner Parsing for <fixed-case>M</fixed-case>inimalist <fixed-case>G</fixed-case>rammars MilošStanojević - EdwardStabler + EdwardStabler 65–74 This paper presents a left-corner parser for minimalist grammars. The relation between the parser and the grammar is transparent in the sense that there is a very simple 1-1 correspondence between derivations and parses. Like left-corner context-free parsers, left-corner minimalist parsers can be non-terminating when the grammar has empty left corners, so an easily computed left-corner oracle is defined to restrict the search. W18-2809 @@ -4367,12 +4367,12 @@ Proceedings of the Workshop on the Relevance of Linguistic Structure in Neural Architectures for NLP W18-29 - GeorgianaDinu + GeorgianaDinu MiguelBallesteros - AvirupSil - SamBowman + AvirupSil + SamBowman WaelHamza - AndersSogaard + AndersSogaard TahiraNaseem YoavGoldberg Association for Computational Linguistics @@ -4451,7 +4451,7 @@ Syntactic Dependency Representations in Neural Relation Classification FarhadNooralahzadeh - LiljaØvrelid + LiljaØvrelid 47–53 We investigate the use of different syntactic dependency representations in a neural relation classification task and compare the CoNLL, Stanford Basic and Universal Dependencies schemes. We further compare with a syntax-agnostic approach and perform an error analysis in order to gain a better understanding of the results. W18-2907 @@ -4468,9 +4468,9 @@ HeHe FelixHill SpandanaGella - JamieKiros + JamieKiros HongyuanMei - DipendraMisra + DipendraMisra Association for Computational Linguistics
Melbourne, Australia
July @@ -4485,7 +4485,7 @@ Corpus Specificity in <fixed-case>LSA</fixed-case> and Word2vec: The Role of Out-of-Domain Documents EdgarAltszyler MarianoSigman - DiegoFernández Slezak + DiegoFernández Slezak 1–10 W18-3001 Despite the popularity of word embeddings, the precise way by which they acquire semantic relations between words remain unclear. In the present article, we investigate whether LSA and word2vec capacity to identify relevant semantic relations increases with corpus size. One intuitive hypothesis is that the capacity to identify relevant associations should increase as the amount of data increases. However, if corpus size grows in topics which are not specific to the domain of interest, signal to noise ratio may weaken. Here we investigate the effect of corpus specificity and size in word-embeddings, and for this, we study two ways for progressive elimination of documents: the elimination of random documents vs. the elimination of documents unrelated to a specific task. We show that word2vec can take advantage of all the documents, obtaining its best performance when it is trained with the whole corpus. On the contrary, the specialization (removal of out-of-domain documents) of the training corpus, accompanied by a decrease of dimensionality, can increase LSA word-representation quality while speeding up the processing time. From a cognitive-modeling point of view, we point out that LSA’s word-knowledge acquisitions may not be efficiently exploiting higher-order co-occurrences and global relations, whereas word2vec does. @@ -4614,8 +4614,8 @@ Evaluating Word Embeddings in Multi-label Classification Using Fine-Grained Name Typing YadollahYaghoobzadeh - KatharinaKann - HinrichSchütze + KatharinaKann + HinrichSchütze 101–106 W18-3013 Embedding models typically associate each word with a single real-valued vector, representing its different properties. Evaluation methods, therefore, need to analyze the accuracy and completeness of these properties in embeddings. This requires fine-grained analysis of embedding subspaces. Multi-label classification is an appropriate way to do so. We propose a new evaluation method for word embeddings based on multi-label classification given a word embedding. The task we use is fine-grained name typing: given a large corpus, find all types that a name can refer to based on the name embedding. Given the scale of entities in knowledge bases, we can build datasets for this task that are complementary to the current embedding evaluation datasets in: they are very large, contain fine-grained classes, and allow the direct evaluation of embeddings without confounding factors like sentence context. @@ -4638,9 +4638,9 @@ <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et Embeddings ChakavehSaedi - AntónioBranco - JoãoAntónio Rodrigues - JoãoSilva + AntónioBranco + JoãoAntónio Rodrigues + JoãoSilva 122–131 W18-3016 Semantic networks and semantic spaces have been two prominent approaches to represent lexical semantics. While a unified account of the lexical meaning relies on one being able to convert between these representations, in both directions, the conversion direction from semantic networks into semantic spaces started to attract more attention recently. In this paper we present a methodology for this conversion and assess it with a case study. When it is applied over WordNet, the performance of the resulting embeddings in a mainstream semantic similarity task is very good, substantially superior to the performance of word embeddings based on very large collections of texts like word2vec. @@ -4693,7 +4693,7 @@ Limitations of Cross-Lingual Learning from Image Search MareikeHartmann - AndersSøgaard + AndersSøgaard 159–163 W18-3021 Cross-lingual representation learning is an important step in making NLP scale to all the world’s languages. Previous work on bilingual lexicon induction suggests that it is possible to learn cross-lingual representations of words based on similarities between images associated with these words. However, that work focused (almost exclusively) on the translation of nouns only. Here, we investigate whether the meaning of other parts-of-speech (POS), in particular adjectives and verbs, can be learned in the same way. Our experiments across five language pairs indicate that previous work does not scale to the problem of learning cross-lingual representations beyond simple nouns. @@ -4731,11 +4731,11 @@ <fixed-case>LSTM</fixed-case>s Exploit Linguistic Attributes of Data - Nelson F.Liu + Nelson F.Liu OmerLevy RoySchwartz ChenhaoTan - Noah A.Smith + Noah A.Smith 180–186 W18-3024 While recurrent neural networks have found success in a variety of natural language processing applications, they are general models of sequential data. We investigate how the properties of natural language data affect an LSTM’s ability to learn a nonlinguistic task: recalling elements from its input. We find that models trained on natural language data are able to recall tokens from much longer sequences than models trained on non-language sequential data. Furthermore, we show that the LSTM learns to solve the memorization task by explicitly using a subset of its neurons to count timesteps in the input. We hypothesize that the patterns and structure in natural language data enable LSTMs to learn by providing approximate ways of reducing loss, but understanding the effect of different training data on the learnability of LSTMs remains an open question. @@ -4754,8 +4754,8 @@ Jointly Embedding Entities and Text with Distant Supervision DenisNewman-Griffis - Albert MLai - EricFosler-Lussier + Albert MLai + EricFosler-Lussier 195–206 W18-3026 Learning representations for knowledge base entities and concepts is becoming increasingly important for NLP applications. However, recent entity embedding methods have relied on structured resources that are expensive to create for new domains and corpora. We present a distantly-supervised method for jointly learning embeddings of entities and text from an unnanotated corpus, using only a list of mappings between entities and surface forms. We learn embeddings from open-domain and biomedical corpora, and compare against prior methods that rely on human-annotated text or large knowledge graph structure. Our embeddings capture entity similarity and relatedness better than prior work, both in existing biomedical datasets and a new Wikipedia-based dataset that we release to the community. Results on analogy completion and entity sense disambiguation indicate that entities and words capture complementary information that can be effectively combined for downstream use. @@ -4789,8 +4789,8 @@ Proceedings of the First Workshop on Economics and Natural Language Processing W18-31 UdoHahn - VéroniqueHoste - Ming-FengTsai + VéroniqueHoste + Ming-FengTsai Association for Computational Linguistics
Melbourne, Australia
July @@ -4828,8 +4828,8 @@
A Corpus of Corporate Annual and Social Responsibility Reports: 280 Million Tokens of Balanced Organizational Writing - Sebastian G.M.Händschke - SvenBuechel + Sebastian G.M.Händschke + SvenBuechel JanGoldenstein PhilippPoschmann TinghuiDuan @@ -4844,7 +4844,7 @@ Word Embeddings-Based Uncertainty Detection in Financial Disclosures Christoph KilianTheil - SanjaŠtajner + SanjaŠtajner HeinerStuckenschmidt 32–37 In this paper, we use NLP techniques to detect linguistic uncertainty in financial disclosures. Leveraging general-domain and domain-specific word embedding models, we automatically expand an existing dictionary of uncertainty triggers. We furthermore examine how an expert filtering affects the quality of such an expansion. We show that the dictionary expansions significantly improve regressions on stock return volatility. Lastly, we prove that the expansions significantly boost the automatic detection of uncertain sentences. @@ -4854,8 +4854,8 @@ A Simple End-to-End Question Answering Model for Product Information - TuanLai - TrungBui + TuanLai + TrungBui ShengLi NedimLipka 38–43 @@ -4919,8 +4919,8 @@ FahadAlGhamdi VictorSoto ThamarSolorio - MonaDiab - JuliaHirschberg + MonaDiab + JuliaHirschberg Association for Computational Linguistics
Melbourne, Australia
July @@ -4970,11 +4970,11 @@ KhyathiChandu EkaterinaLoginova VishalGupta - Josefvan Genabith - GünterNeumann - ManojChinnakotla - EricNyberg - Alan W.Black + Josefvan Genabith + GünterNeumann + ManojChinnakotla + EricNyberg + Alan W.Black 29–38 Code-Mixing (CM) is the phenomenon of alternating between two or more languages which is prevalent in bi- and multi-lingual communities. Most NLP applications today are still designed with the assumption of a single interaction language and are most likely to break given a CM utterance with multiple languages mixed at a morphological, phrase or sentence level. For example, popular commercial search engines do not yet fully understand the intents expressed in CM queries. As a first step towards fostering research which supports CM in NLP applications, we systematically crowd-sourced and curated an evaluation dataset for factoid question answering in three CM languages - Hinglish (Hindi+English), Tenglish (Telugu+English) and Tamlish (Tamil+English) which belong to two language families (Indo-Aryan and Dravidian). We share the details of our data collection process, techniques which were used to avoid inducing lexical bias amongst the crowd workers and other CM specific linguistic properties of the dataset. Our final dataset, which is available freely for research purposes, has 1,694 Hinglish, 2,848 Tamlish and 1,391 Tenglish factoid questions and their answers. We discuss the techniques used by the participants for the first edition of this ongoing challenge. W18-3204 @@ -4984,8 +4984,8 @@ Transliteration Better than Translation? Answering Code-mixed Questions over a Knowledge Base VishalGupta - ManojChinnakotla - ManishShrivastava + ManojChinnakotla + ManishShrivastava 39–50 Humans can learn multiple languages. If they know a fact in one language, they can answer a question in another language they understand. They can also answer Code-mix (CM) questions: questions which contain both languages. This behavior is attributed to the unique learning ability of humans. Our task aims to study if machines can achieve this. We demonstrate how effectively a machine can answer CM questions. In this work, we adopt a two phase approach: candidate generation and candidate re-ranking to answer questions. We propose a Triplet-Siamese-Hybrid CNN (TSHCNN) to re-rank candidate answers. We show experiments on the SimpleQuestions dataset. Our network is trained only on English questions provided in this dataset and noisy Hindi translations of these questions and can answer English-Hindi CM questions effectively without the need of translation into English. Back-transliterated CM questions outperform their lexical and sentence level translated counterparts by 5% & 35% in accuracy respectively, highlighting the efficacy of our approach in a resource constrained setting. W18-3205 @@ -5005,7 +5005,7 @@ Code-Switching Language Modeling using Syntax-Aware Multi-Task Learning - Genta IndraWinata + Genta IndraWinata AndreaMadotto Chien-ShengWu PascaleFung @@ -5017,7 +5017,7 @@ Predicting the presence of a Matrix Language in code-switching - BarbaraBullock + BarbaraBullock WallyGuzmán JacquelineSerigos VivekSharath @@ -5032,7 +5032,7 @@ Automatic Detection of Code-switching Style from Acoustics SaiKrishnaRallabandi SunayanaSitaram - Alan WBlack + Alan WBlack 76–81 Multilingual speakers switch between languages in an non-trivial fashion displaying inter sentential, intra sentential, and congruent lexicalization based transitions. While monolingual ASR systems may be capable of recognizing a few words from a foreign language, they are usually not robust enough to handle these varied styles of code-switching. There is also a lack of large code-switched speech corpora capturing all these styles making it difficult to build code-switched speech recognition systems. We hypothesize that it may be useful for an ASR system to be able to first detect the switching style of a particular utterance from acoustics, and then use specialized language models or other adaptation techniques for decoding the speech. In this paper, we look at the first problem of detecting code-switching style from acoustics. We classify code-switched Spanish-English and Hindi-English corpora using two metrics and show that features extracted from acoustics alone can distinguish between different kinds of code-switching in these language pairs. W18-3209 @@ -5055,7 +5055,7 @@ KhyathiChandu ThomasManzini SumeetSingh - Alan W.Black + Alan W.Black 92–97 Code-switching (CS), the practice of alternating between two or more languages in conversations, is pervasive in most multi-lingual communities. CS texts have a complex interplay between languages and occur in informal contexts that make them harder to collect and construct NLP tools for. We approach this problem through Language Modeling (LM) on a new Hindi-English mixed corpus containing 59,189 unique sentences collected from blogging websites. We implement and discuss different Language Models derived from a multi-layered LSTM architecture. We hypothesize that encoding language information strengthens a language model by helping to learn code-switching points. We show that our highest performing model achieves a test perplexity of 19.52 on the CS corpus that we collected and processed. On this data we demonstrate that our performance is an improvement over AWD-LSTM LM (a recent state of the art on monolingual English). W18-3211 @@ -5088,7 +5088,7 @@ Bilingual Character Representation for Efficiently Addressing Out-of-Vocabulary Words in Code-Switching Named Entity Recognition - Genta IndraWinata + Genta IndraWinata Chien-ShengWu AndreaMadotto PascaleFung @@ -5100,9 +5100,9 @@ Named Entity Recognition on Code-Switched Data Using Conditional Random Fields - Utpal KumarSikdar + Utpal KumarSikdar BiswanathBarik - BjörnGambäck + BjörnGambäck 115–119 Named Entity Recognition is an important information extraction task that identifies proper names in unstructured texts and classifies them into some pre-defined categories. Identification of named entities in code-mixed social media texts is a more difficult and challenging task as the contexts are short, ambiguous and often noisy. This work proposes a Conditional Random Fields based named entity recognition system to identify proper names in code-switched data and classify them into nine categories. The system ranked fifth among nine participant systems and achieved a 59.25% F1-score. W18-3215 @@ -5114,8 +5114,8 @@ FlorianJanke TongruiLi EricRincón - GualbertoGuzmán - BarbaraBullock + GualbertoGuzmán + BarbaraBullock Almeida JacquelineToribio 120–125 This paper describes the system for the Named Entity Recognition Shared Task of the Third Workshop on Computational Approaches to Linguistic Code-Switching (CALCS) submitted by the Bilingual Annotations Tasks (BATs) research group of the University of Texas. Our system uses several features to train a Conditional Random Field (CRF) model for classifying input words as Named Entities (NEs) using the Inside-Outside-Beginning (IOB) tagging scheme. We participated in the Modern Standard Arabic-Egyptian Arabic (MSA-EGY) and English-Spanish (ENG-SPA) tasks, achieving weighted average F-scores of 65.62 and 54.16 respectively. We also describe the performance of a deep neural network (NN) trained on a subset of the CRF features, which did not surpass CRF performance. @@ -5127,7 +5127,7 @@ Tackling Code-Switched <fixed-case>NER</fixed-case>: Participation of <fixed-case>CMU</fixed-case> ParvathyGeetha KhyathiChandu - Alan WBlack + Alan WBlack 126–131 Named Entity Recognition plays a major role in several downstream applications in NLP. Though this task has been heavily studied in formal monolingual texts and also noisy texts like Twitter data, it is still an emerging task in code-switched (CS) content on social media. This paper describes our participation in the shared task of NER on code-switched data for Spanglish (Spanish + English) and Arabish (Arabic + English). In this paper we describe models that intuitively developed from the data for the shared task Named Entity Recognition on Code-switched Data. Owing to the sparse and non-linear relationships between words in Twitter data, we explored neural architectures that are capable of non-linearities fairly well. In specific, we trained character level models and word level models based on Bidirectional LSTMs (Bi-LSTMs) to perform sequential tagging. We train multiple models to identify nominal mentions and subsequently use this information to predict the labels of named entity in a sequence. Our best model is a character level model along with word level pre-trained multilingual embeddings that gave an F-score of 56.72 in Spanglish and a word level model that gave an F-score of 65.02 in Arabish on the test data. W18-3217 @@ -5163,7 +5163,7 @@ <fixed-case>IIT</fixed-case> (<fixed-case>BHU</fixed-case>) Submission for the <fixed-case>ACL</fixed-case> Shared Task on Named Entity Recognition on Code-switched Data ShashwatTrivedi HarshRangwani - AnilKumar Singh + AnilKumar Singh 148–153 This paper describes the best performing system for the shared task on Named Entity Recognition (NER) on code-switched data for the language pair Spanish-English (ENG-SPA). We introduce a gated neural architecture for the NER task. Our final model achieves an F1 score of 63.76%, outperforming the baseline by 10%. W18-3220 @@ -5229,7 +5229,7 @@ Multimodal Relational Tensor Network for Sentiment and Emotion Classification SauravSahay - Shachi HKumar + Shachi HKumar RuiXia JonathanHuang LamaNachman @@ -5266,7 +5266,7 @@ Polarity and Intensity: the Two Aspects of Sentiment Analysis LeiminTian CatherineLai - JohannaMoore + JohannaMoore 40–47 Current multimodal sentiment analysis frames sentiment score prediction as a general Machine Learning task. However, what the sentiment score actually represents has often been overlooked. As a measurement of opinions and affective states, a sentiment score generally consists of two aspects: polarity and intensity. We decompose sentiment scores into these two aspects and study how they are conveyed through individual modalities and combined multimodal models in a naturalistic monologue setting. In particular, we build unimodal and multimodal multi-task learning models with sentiment score prediction as the main task and polarity and/or intensity classification as the auxiliary tasks. Our experiments show that sentiment analysis benefits from multi-task learning, and individual modalities differ when conveying the polarity and intensity aspects of sentiment. W18-3306 @@ -5313,7 +5313,7 @@ Proceedings of the Workshop on Deep Learning Approaches for Low-Resource NLP W18-34 - RezaHaffari + RezaHaffari ColinCherry GeorgeFoster ShahramKhadivi @@ -5330,11 +5330,11 @@ Character-level Supervision for Low-resource <fixed-case>POS</fixed-case> Tagging - KatharinaKann + KatharinaKann JohannesBjerva IsabelleAugenstein - BarbaraPlank - AndersSøgaard + BarbaraPlank + AndersSøgaard 1–11 Neural part-of-speech (POS) taggers are known to not perform well with little training data. As a step towards overcoming this problem, we present an architecture for learning more robust neural POS taggers by jointly training a hierarchical, recurrent model and a recurrent character-based sequence-to-sequence network supervised using an auxiliary objective. This way, we introduce stronger character-level supervision into the model, which enables better generalization to unseen words and provides regularization, making our encoding less prone to overfitting. We experiment with three auxiliary tasks: lemmatization, character-based word autoencoding, and character-based random string autoencoding. Experiments with minimal amounts of labeled data on 34 languages show that our new architecture outperforms a single-task baseline and, surprisingly, that, on average, raw text autoencoding can be as beneficial for low-resource POS tagging as using lemma information. Our neural POS tagger closes the gap to a state-of-the-art POS tagger (MarMoT) for low-resource scenarios by 43%, even outperforming it on languages with templatic morphology, e.g., Arabic, Hebrew, and Turkish, by some margin. W18-3401 @@ -5354,7 +5354,7 @@ Multi-task learning for historical text normalization: Size matters MarcelBollmann - AndersSøgaard + AndersSøgaard JoachimBingel 19–24 Historical text normalization suffers from small datasets that exhibit high variance, and previous work has shown that multi-task learning can be used to leverage data from related problems in order to obtain more robust models. Previous work has been limited to datasets from a specific language and a specific historical period, and it is not clear whether results generalize. It therefore remains an open problem, when historical text normalization benefits from multi-task learning. We explore the benefits of multi-task learning across 10 different datasets, representing different languages and periods. Our main finding—contrary to what has been observed for other NLP tasks—is that multi-task learning mainly works when target task data is very scarce. @@ -5375,7 +5375,7 @@ Multimodal Neural Machine Translation for Low-resource Language Pairs using Synthetic Data - KoelDutta Chowdhury + KoelDutta Chowdhury MohammedHasanuzzaman QunLiu 33–42 @@ -5401,7 +5401,7 @@ Domain Adapted Word Embeddings for Improved Sentiment Classification - PrathushaKameswara Sarma + PrathushaKameswara Sarma YingyuLiang BillSethares 51–59 @@ -5425,7 +5425,7 @@ Semi-Supervised Learning with Auxiliary Evaluation Component for Large Scale e-Commerce Text Classification MingkuanLiu MusenWen - SelcukKopru + SelcukKopru XianjingLiu AlanLu 68–76 @@ -5437,7 +5437,7 @@ Low-rank passthrough neural networks - Antonio ValerioMiceli Barone + Antonio ValerioMiceli Barone 77–86 Various common deep learning architectures, such as LSTMs, GRUs, Resnets and Highway Networks, employ state passthrough connections that support training with high feed-forward depth or recurrence over many time steps. These “Passthrough Networks” architectures also enable the decoupling of the network state size from the number of parameters of the network, a possibility has been studied by Sak et al. (2014) with their low-rank parametrization of the LSTM. In this work we extend this line of research, proposing effective, low-rank and low-rank plus diagonal matrix parametrizations for Passthrough Networks which exploit this decoupling property, reducing the data complexity and memory requirements of the network while preserving its memory capacity. This is particularly beneficial in low-resource settings as it supports expressive models with a compact parametrization less susceptible to overfitting. We present competitive experimental results on several tasks, including language modeling and a near state of the art result on sequential randomly-permuted MNIST classification, a hard task on natural data. W18-3410 @@ -5464,9 +5464,9 @@ Sociolinguistic Corpus of <fixed-case>W</fixed-case>hats<fixed-case>A</fixed-case>pp Chats in <fixed-case>S</fixed-case>panish among College Students AlejandroDorantes - GerardoSierra + GerardoSierra Tlauhlia YamínDonohue Pérez - GemmaBel-Enguix + GemmaBel-Enguix MónicaJasso Rosales 1–6 This work presents the Sociolinguistic Corpus of WhatsApp Chats in Spanish among College Students, a corpus of raw data for general use. Its purpose is to offer data for the study of of language and interactions via Instant Messaging (IM) among bachelors. Our paper consists of an overview of both the corpus’s content and demographic metadata. Furthermore, it presents the current research being conducted with it —namely parenthetical expressions, orality traits, and code-switching. This work also includes a brief outline of similar corpora and recent studies in the field of IM. @@ -5502,7 +5502,7 @@ Detecting Offensive Tweets in <fixed-case>H</fixed-case>indi-<fixed-case>E</fixed-case>nglish Code-Switched Language PuneetMathur - RajivShah + RajivShah RamitSawhney DebanjanMahata 18–26 @@ -5587,9 +5587,9 @@ Improving Classification of <fixed-case>T</fixed-case>witter Behavior During Hurricane Events KevinStowe JenningsAnderson - MarthaPalmer + MarthaPalmer LeysiaPalen - KenAnderson + KenAnderson 67–75 A large amount of social media data is generated during natural disasters, and identifying the relevant portions of this data is critical for researchers attempting to understand human behavior, the effects of information sources, and preparatory actions undertaken during these events. In order to classify human behavior during hazard events, we employ machine learning for two tasks: identifying hurricane related tweets and classifying user evacuation behavior during hurricanes. We show that feature-based and deep learning methods provide different benefits for tweet classification, and ensemble-based methods using linguistic, temporal, and geospatial features can effectively classify user behavior. W18-3512 @@ -5612,7 +5612,7 @@ Proceedings of the First Workshop on Multilingual Surface Realisation W18-36 SimonMille - AnjaBelz + AnjaBelz BerndBohnet EmilyPitler LeoWanner @@ -5666,9 +5666,9 @@ Surface Realization Shared Task 2018 (<fixed-case>SR</fixed-case>18): The <fixed-case>T</fixed-case>ilburg <fixed-case>U</fixed-case>niversity Approach - ThiagoCastro Ferreira + ThiagoCastro Ferreira SanderWubben - EmielKrahmer + EmielKrahmer 35–38 This study describes the approach developed by the Tilburg University team to the shallow task of the Multilingual Surface Realization Shared Task 2018 (SR18). Based on (Castro Ferreira et al., 2017), the approach works by first preprocessing an input dependency tree into an ordered linearized string, which is then realized using a statistical machine translation model. Our approach shows promising results, with BLEU scores above 50 for 5 different languages (English, French, Italian, Portuguese and Spanish) and above 35 for the Dutch language. W18-3604 @@ -5677,8 +5677,8 @@ The <fixed-case>OSU</fixed-case> Realizer for <fixed-case>SRST</fixed-case> ‘18: Neural Sequence-to-Sequence Inflection and Incremental Locality-Based Linearization - DavidKing - MichaelWhite + DavidKing + MichaelWhite 39–48 Surface realization is a nontrivial task as it involves taking structured data and producing grammatically and semantically correct utterances. Many competing grammar-based and statistical models for realization still struggle with relatively simple sentences. For our submission to the 2018 Surface Realization Shared Task, we tackle the shallow task by first generating inflected wordforms with a neural sequence-to-sequence model before incrementally linearizing them. For linearization, we use a global linear model trained using early update that makes use of features that take into account the dependency structure and dependency locality. Using this pipeline sufficed to produce surprisingly strong results in the shared task. In future work, we intend to pursue joint approaches to linearization and morphological inflection and incorporating a neural language model into the linearization choices. W18-3605 @@ -5688,7 +5688,7 @@ Generating High-Quality Surface Realizations Using Data Augmentation and Factored Sequence Models HenryElder - ChrisHokamp + ChrisHokamp 49–53 This work presents state of the art results in reconstruction of surface realizations from obfuscated text. We identify the lack of sufficient training data as the major obstacle to training high-performing models, and solve this issue by generating large amounts of synthetic training data. We also propose preprocessing techniques which make the structure contained in the input features more accessible to sequence models. Our models were ranked first on all evaluation metrics in the English portion of the 2018 Surface Realization shared task. W18-3606 @@ -5711,7 +5711,7 @@ <fixed-case>NILC</fixed-case>-<fixed-case>SWORNEMO</fixed-case> at the Surface Realization Shared Task: Exploring Syntax-Based Word Ordering using Neural Models - Marco AntonioSobrevilla Cabezudo + Marco AntonioSobrevilla Cabezudo ThiagoPardo 58–64 This paper describes the submission by the NILC Computational Linguistics research group of the University of São Paulo/Brazil to the Track 1 of the Surface Realization Shared Task (SRST Track 1). We present a neural-based method that works at the syntactic level to order the words (which we refer by NILC-SWORNEMO, standing for “Syntax-based Word ORdering using NEural MOdels”). Additionally, we apply a bottom-up approach to build the sentence and, using language-specific lexicons, we produce the proper word form of each lemma in the sentence. The results obtained by our method outperformed the average of the results for English, Portuguese and Spanish in the track. @@ -5722,7 +5722,7 @@ The <fixed-case>D</fixed-case>ip<fixed-case>I</fixed-case>nfo-<fixed-case>U</fixed-case>ni<fixed-case>T</fixed-case>o system for <fixed-case>SRST</fixed-case> 2018 ValerioBasile - AlessandroMazzei + AlessandroMazzei 65–71 This paper describes the system developed by the DipInfo-UniTo team to participate to the shallow track of the Surface Realization Shared Task 2018. The system employs two separate neural networks with different architectures to predict the word ordering and the morphological inflection independently from each other. The UniTO realizer is language independent, and its simple architecture allowed it to be scored in the central part of the final ranking of the shared task. W18-3609 @@ -5734,7 +5734,7 @@ Proceedings of the 5th Workshop on Natural Language Processing Techniques for Educational Applications W18-37 - Yuen-HsienTseng + Yuen-HsienTseng Hsin-HsiChen VincentNg MamoruKomachi @@ -5752,7 +5752,7 @@ Generating Questions for Reading Comprehension using Coherence Relations TakshakDesai ParagDakle - DanMoldovan + DanMoldovan 1–10 In this paper, we have proposed a technique for generating complex reading comprehension questions from a discourse that are more useful than factual ones derived from assertions. Our system produces a set of general-level questions using coherence relations and a set of well-defined syntactic transformations on the input text. Generated questions evaluate comprehension abilities like a comprehensive analysis of the text and its structure, correct identification of the author’s intent, a thorough evaluation of stated arguments; and a deduction of the high-level semantic relations that hold between text spans. Experiments performed on the RST-DT corpus allow us to conclude that our system possesses a strong aptitude for generating intricate questions. These questions are capable of effectively assessing a student’s interpretation of the text. W18-3701 @@ -5793,7 +5793,7 @@ Thank “Goodness”! A Way to Measure Style in Student Essays SandeepMathias - PushpakBhattacharyya + PushpakBhattacharyya 35–41 Essays have two major components for scoring - content and style. In this paper, we describe a property of the essay, called goodness, and use it to predict the score given for the style of student essays. We compare our approach to solve this problem with baseline approaches, like language modeling and also a state-of-the-art deep learning system. We show that, despite being quite intuitive, our approach is very powerful in predicting the style of the essays. W18-3705 @@ -5899,7 +5899,7 @@ Joint learning of frequency and word embeddings for multilingual readability assessment Dieu-ThuLe - Cam-TuNguyen + Cam-TuNguyen XiaoliangWang 103–107 This paper describes two models that employ word frequency embeddings to deal with the problem of readability assessment in multiple languages. The task is to determine the difficulty level of a given document, i.e., how hard it is for a reader to fully comprehend the text. The proposed models show how frequency information can be integrated to improve the readability assessment. The experimental results testing on both English and Chinese datasets show that the proposed models improve the results notably when comparing to those using only traditional word embeddings. @@ -5910,7 +5910,7 @@ <fixed-case>MULLE</fixed-case>: A grammar-based <fixed-case>L</fixed-case>atin language learning tool to supplement the classroom setting HerbertLange - PeterLjunglöf + PeterLjunglöf 108–112 MULLE is a tool for language learning that focuses on teaching Latin as a foreign language. It is aimed for easy integration into the traditional classroom setting and syllabus, which makes it distinct from other language learning tools that provide standalone learning experience. It uses grammar-based lessons and embraces methods of gamification to improve the learner motivation. The main type of exercise provided by our application is to practice translation, but it is also possible to shift the focus to vocabulary or morphology training. W18-3715 @@ -5919,9 +5919,9 @@ Textual Features Indicative of Writing Proficiency in Elementary School <fixed-case>S</fixed-case>panish Documents - GemmaBel-Enguix + GemmaBel-Enguix DianaDueñas Chávez - ArturoCuriel Díaz + ArturoCuriel Díaz 113–118 Childhood acquisition of written language is not straightforward. Writing skills evolve differently depending on external factors, such as the conditions in which children practice their productions and the quality of their instructors’ guidance. This can be challenging in low-income areas, where schools may struggle to ensure ideal acquisition conditions. Developing computational tools to support the learning process may counterweight negative environmental influences; however, few work exists on the use of information technologies to improve childhood literacy. This work centers around the computational study of Spanish word and syllable structure in documents written by 2nd and 3rd year elementary school students. The studied texts were compared against a corpus of short stories aimed at the same age group, so as to observe whether the children tend to produce similar written patterns as the ones they are expected to interpret at their literacy level. The obtained results show some significant differences between the two kinds of texts, pointing towards possible strategies for the implementation of new education software in support of written language acquisition. W18-3716 @@ -5931,7 +5931,7 @@ Assessment of an Index for Measuring Pronunciation Difficulty KatsunoriKotani - TakehikoYoshimi + TakehikoYoshimi 119–124 This study assesses an index for measur-ing the pronunciation difficulty of sen-tences (henceforth, pronounceability) based on the normalized edit distance from a reference sentence to a transcrip-tion of learners’ pronunciation. Pro-nounceability should be examined when language teachers use a computer-assisted language learning system for pronunciation learning to maintain the motivation of learners. However, unlike the evaluation of learners’ pronunciation performance, previous research did not focus on pronounceability not only for English but also for Asian languages. This study found that the normalized edit distance was reliable but not valid. The lack of validity appeared to be because of an English test used for determining the proficiency of learners. W18-3717 @@ -5950,7 +5950,7 @@ From Fidelity to Fluency: Natural Language Processing for Translator Training - Oi YeeKwong + Oi YeeKwong 130–134 This study explores the use of natural language processing techniques to enhance bilingual lexical access beyond simple equivalents, to enable translators to navigate along a wider cross-lingual lexical space and more examples showing different translation strategies, which is essential for them to learn to produce not only faithful but also fluent translations. W18-3719 @@ -5959,7 +5959,7 @@ Countering Position Bias in Instructor Interventions in <fixed-case>MOOC</fixed-case> Discussion Forums - Muthu KumarChandrasekaran + Muthu KumarChandrasekaran Min-YenKan 135–142 We systematically confirm that instructors are strongly influenced by the user interface presentation of Massive Online Open Course (MOOC) discussion forums. In a large scale dataset, we conclusively show that instructor interventions exhibit strong position bias, as measured by the position where the thread appeared on the user interface at the time of intervention. We measure and remove this bias, enabling unbiased statistical modelling and evaluation. We show that our de-biased classifier improves predicting interventions over the state-of-the-art on courses with sufficient number of interventions by 8.2% in F1 and 24.4% in recall on average. @@ -6047,7 +6047,7 @@ Detecting Simultaneously <fixed-case>C</fixed-case>hinese Grammar Errors Based on a <fixed-case>B</fixed-case>i<fixed-case>LSTM</fixed-case>-<fixed-case>CRF</fixed-case> Model YajunLiu - HongyingZan + HongyingZan MengjieZhong HongchaoMa 188–193 @@ -6071,7 +6071,7 @@ Shih-HungWu Jun-WeiWang Liang-PuChen - Ping-CheYang + Ping-CheYang 199–202 This paper reports how we build a Chinese Grammatical Error Diagnosis system in the NLPTEA-2018 CGED shared task. In 2018, we sent three runs with three different approaches. The first one is a pattern-based approach by frequent error pattern matching. The second one is a sequential labelling approach by conditional random fields (CRF). The third one is a rewriting approach by sequence to sequence (seq2seq) model. The three approaches have different properties that aim to optimize different performance metrics and the formal run results show the differences as we expected. W18-3729 @@ -6093,10 +6093,10 @@ Proceedings of the First Workshop on Linguistic Resources for Natural Language Processing W18-38 - PeterMachonis + PeterMachonis AnabelaBarreiro KristinaKocijan - MaxSilberztein + MaxSilberztein Association for Computational Linguistics
Santa Fe, New Mexico, USA
August @@ -6109,7 +6109,7 @@ Corpus Phonetics: Past, Present, and Future - MarkLiberman + MarkLiberman 1 W18-3801 Invited talk @@ -6126,7 +6126,7 @@ Rule-based vs. Neural Net Approaches to Semantic Textual Similarity LinruiZhang - DanMoldovan + DanMoldovan 12–17 W18-3803 This paper presents a neural net approach to determine Semantic Textual Similarity (STS) using attention-based bidirectional Long Short-Term Memory Networks (Bi-LSTM). To this date, most of the traditional STS systems were rule-based that built on top of excessive use of linguistic features and resources. In this paper, we present an end-to-end attention-based Bi-LSTM neural network system that solely takes word-level features, without expensive feature engineering work or the usage of external resources. By comparing its performance with traditional rule-based systems against SemEval-2012 benchmark, we make an assessment on the limitations and strengths of neural net systems to rule-based systems on Semantic Textual Similarity. @@ -6171,8 +6171,8 @@ <fixed-case>STYLUS</fixed-case>: A Resource for Systematically Derived Language Usage - BonnieDorr - ClareVoss + BonnieDorr + ClareVoss 57–64 W18-3808 We describe a resource derived through extraction of a set of argument realizations from an existing lexical-conceptual structure (LCS) Verb Database of 500 verb classes (containing a total of 9525 verb entries) to include information about realization of arguments for a range of different verb classes. We demonstrate that our extended resource, called STYLUS (SysTematicallY Derived Language USe), enables systematic derivation of regular patterns of language usage without requiring manual annotation. We posit that both spatially oriented applications such as robot navigation and more general applications such as narrative generation require a layered representation scheme where a set of primitives (often grounded in space/motion such as GO) is coupled with a representation of constraints at the syntax-semantics interface. We demonstrate that the resulting resource covers three cases of lexico-semantic operations applicable to both language understanding and language generation. @@ -6181,7 +6181,7 @@ Contemporary <fixed-case>A</fixed-case>mharic Corpus: Automatically Morpho-Syntactically Tagged <fixed-case>A</fixed-case>mharic Corpus Andargachew MekonnenGezmu - Binyam EphremSeyoum + Binyam EphremSeyoum MichaelGasser AndreasNürnberger 65–70 @@ -6193,7 +6193,7 @@ Gold Corpus for Telegraphic Summarization ChanakyaMalireddy Srivenkata N MSomisetty - ManishShrivastava + ManishShrivastava 71–77 W18-3810 Most extractive summarization techniques operate by ranking all the source sentences and then select the top ranked sentences as the summary. Such methods are known to produce good summaries, especially when applied to news articles and scientific texts. However, they don’t fare so well when applied to texts such as fictional narratives, which don’t have a single central or recurrent theme. This is because usually the information or plot of the story is spread across several sentences. In this paper, we discuss a different summarization technique called Telegraphic Summarization. Here, we don’t select whole sentences, rather pick short segments of text spread across sentences, as the summary. We have tailored a set of guidelines to create such summaries and, using the same, annotate a gold corpus of 200 English short stories. @@ -6210,15 +6210,15 @@ Parallel Corpora for bi-Directional Statistical Machine Translation for Seven <fixed-case>E</fixed-case>thiopian Language Pairs - SolomonTeferra Abate + SolomonTeferra Abate MichaelMelese - MarthaYifiru Tachbelie + MarthaYifiru Tachbelie MillionMeshesha SolomonAtinafu WondwossenMulugeta YaregalAssabie HafteAbera - BinyamEphrem + BinyamEphrem TewodrosAbebe WondimagegnhueTsegaye AmanuelLemma @@ -6232,7 +6232,7 @@ Using Embeddings to Compare <fixed-case>F</fixed-case>rame<fixed-case>N</fixed-case>et Frames Across Languages JenniferSikos - SebastianPadó + SebastianPadó 91–101 W18-3813 Much interest in Frame Semantics is fueled by the substantial extent of its applicability across languages. At the same time, lexicographic studies have found that the applicability of individual frames can be diminished by cross-lingual divergences regarding polysemy, syntactic valency, and lexicalization. Due to the large effort involved in manual investigations, there are so far no broad-coverage resources with “problematic” frames for any language pair. Our study investigates to what extent multilingual vector representations of frames learned from manually annotated corpora can address this need by serving as a wide coverage source for such divergences. We present a case study for the language pair English — German using the FrameNet and SALSA corpora and find that inferences can be made about cross-lingual frame applicability using a vector space model. @@ -6250,8 +6250,8 @@ Towards an Automatic Classification of Illustrative Examples in a Large <fixed-case>J</fixed-case>apanese-<fixed-case>F</fixed-case>rench Dictionary Obtained by <fixed-case>OCR</fixed-case> - ChristianBoitet - MathieuMangeot + ChristianBoitet + MathieuMangeot MutsukoTomokiyo 112–121 W18-3815 @@ -6271,7 +6271,7 @@ Enabling Code-Mixed Translation: Parallel Corpus Creation and <fixed-case>MT</fixed-case> Augmentation Approach MrinalDhar VaibhavKumar - ManishShrivastava + ManishShrivastava 131–140 W18-3817 Code-mixing, use of two or more languages in a single sentence, is ubiquitous; generated by multi-lingual speakers across the world. The phenomenon presents itself prominently in social media discourse. Consequently, there is a growing need for translating code-mixed hybrid language into standard languages. However, due to the lack of gold parallel data, existing machine translation systems fail to properly translate code-mixed text. In an effort to initiate the task of machine translation of code-mixed content, we present a newly created parallel corpus of code-mixed English-Hindi and English. We selected previously available English-Hindi code-mixed data as a starting point for the creation of our parallel corpus. We then chose 4 human translators, fluent in both English and Hindi, for translating the 6088 code-mixed English-Hindi sentences to English. With the help of the created parallel corpus, we analyzed the structure of English-Hindi code-mixed data and present a technique to augment run-of-the-mill machine translation (MT) approaches that can help achieve superior translations without the need for specially designed translation systems. We present an augmentation pipeline for existing MT approaches, like Phrase Based MT (Moses) and Neural MT, to improve the translation of code-mixed text. The augmentation pipeline is presented as a pre-processing step and can be plugged with any existing MT system, which we demonstrate by improving translations done by systems like Moses, Google Neural Machine Translation System (NMTS) and Bing Translator for English-Hindi code-mixed content. @@ -6283,10 +6283,10 @@ Proceedings of the Fifth Workshop on NLP for Similar Languages, Varieties and Dialects (VarDial 2018) W18-39 MarcosZampieri - PreslavNakov + PreslavNakov NikolaLjubešić - JörgTiedemann - ShervinMalmasi + JörgTiedemann + ShervinMalmasi AhmedAli Association for Computational Linguistics
Santa Fe, New Mexico, USA
@@ -6305,16 +6305,16 @@ PreslavNakov AhmedAli SuwonShon - JamesGlass + JamesGlass YvesScherrer - TanjaSamardžić + TanjaSamardžić NikolaLjubešić JörgTiedemann Chrisvan der Lee StefanGrondelaers NellekeOostdijk DirkSpeelman - Antalvan den Bosch + Antalvan den Bosch RiteshKumar BorniniLahiri MayankJain @@ -6326,9 +6326,9 @@ Encoder-Decoder Methods for Text Normalization MassimoLusetti - TatyanaRuzsics - AnneGöhring - TanjaSamardžić + TatyanaRuzsics + AnneGöhring + TanjaSamardžić ElisabethStark 18–28 W18-3902 @@ -6348,7 +6348,7 @@ Sub-label dependencies for Neural Morphological Tagging – The Joint Submission of <fixed-case>U</fixed-case>niversity of <fixed-case>C</fixed-case>olorado and <fixed-case>U</fixed-case>niversity of <fixed-case>H</fixed-case>elsinki for <fixed-case>V</fixed-case>ar<fixed-case>D</fixed-case>ial 2018 - MiikkaSilfverberg + MiikkaSilfverberg SenkaDrobac 37–45 W18-3904 @@ -6377,7 +6377,7 @@ Iterative Language Model Adaptation for <fixed-case>I</fixed-case>ndo-<fixed-case>A</fixed-case>ryan Language Identification TommiJauhiainen HeidiJauhiainen - KristerLindén + KristerLindén 66–75 W18-3907 This paper presents the experiments and results obtained by the SUKI team in the Indo-Aryan Language Identification shared task of the VarDial 2018 Evaluation Campaign. The shared task was an open one, but we did not use any corpora other than what was distributed by the organizers. A total of eight teams provided results for this shared task. Our submission using a HeLI-method based language identifier with iterative language model adaptation obtained the best results in the shared task with a macro F1-score of 0.958. @@ -6385,7 +6385,7 @@ Language and the Shifting Sands of Domain, Space and Time (Invited Talk) - TimothyBaldwin + TimothyBaldwin 76 W18-3908 In this talk, I will first present recent work on domain debiasing in the context of language identification, then discuss a new line of work on language variety analysis in the form of dialect map generation. Finally, I will reflect on the interplay between time and space on language variation, and speculate on how these can be captured in a single model. @@ -6393,7 +6393,7 @@ <fixed-case>U</fixed-case>nibuc<fixed-case>K</fixed-case>ernel Reloaded: First Place in <fixed-case>A</fixed-case>rabic Dialect Identification for the Second Year in a Row - AndreiButnaru + AndreiButnaru Radu TudorIonescu 77–87 W18-3909 @@ -6404,7 +6404,7 @@ Varying image description tasks: spoken versus written descriptions Emielvan Miltenburg RuudKoolen - EmielKrahmer + EmielKrahmer 88–100 W18-3910 Automatic image description systems are commonly trained and evaluated on written image descriptions. At the same time, these systems are often used to provide spoken descriptions (e.g. for visually impaired users) through apps like TapTapSee or Seeing AI. This is not a problem, as long as spoken and written descriptions are very similar. However, linguistic research suggests that spoken language often differs from written language. These differences are not regular, and vary from context to context. Therefore, this paper investigates whether there are differences between written and spoken image descriptions, even if they are elicited through similar tasks. We compare descriptions produced in two languages (English and Dutch), and in both languages observe substantial differences between spoken and written descriptions. Future research should see if users prefer the spoken over the written style and, if so, aim to emulate spoken descriptions. @@ -6413,7 +6413,7 @@ Transfer Learning for <fixed-case>B</fixed-case>ritish <fixed-case>S</fixed-case>ign <fixed-case>L</fixed-case>anguage Modelling BorisMocialov - HelenHastie + HelenHastie GrahamTurner 101–110 W18-3911 @@ -6440,8 +6440,8 @@ Neural Network Architectures for <fixed-case>A</fixed-case>rabic Dialect Identification EliseMichon - Minh QuangPham - JosepCrego + Minh QuangPham + JosepCrego JeanSenellart 128–136 W18-3914 @@ -6452,7 +6452,7 @@ <fixed-case>H</fixed-case>e<fixed-case>LI</fixed-case>-based Experiments in Discriminating Between <fixed-case>D</fixed-case>utch and <fixed-case>F</fixed-case>lemish Subtitles TommiJauhiainen HeidiJauhiainen - KristerLindén + KristerLindén 137–144 W18-3915 This paper presents the experiments and results obtained by the SUKI team in the Discriminating between Dutch and Flemish in Subtitles shared task of the VarDial 2018 Evaluation Campaign. Our best submission was ranked 8th, obtaining macro F1-score of 0.61. Our best results were produced by a language identifier implementing the HeLI method without any modifications. We describe, in addition to the best method we used, some of the experiments we did with unsupervised clustering. @@ -6460,9 +6460,9 @@ Measuring language distance among historical varieties using perplexity. Application to <fixed-case>E</fixed-case>uropean <fixed-case>P</fixed-case>ortuguese. - Jose RamomPichel Campos + Jose RamomPichel Campos PabloGamallo - IñakiAlegria + IñakiAlegria 145–155 W18-3916 The objective of this work is to quantify, with a simple and robust measure, the distance between historical varieties of a language. The measure will be inferred from text corpora corresponding to historical periods. Different approaches have been proposed for similar aims: Language Identification, Phylogenetics, Historical Linguistics or Dialectology. In our approach, we used a perplexity-based measure to calculate language distance between all the historical periods of a specific language: European Portuguese. Perplexity has also proven to be a robust metric to calculate distance between languages. However, this measure has not been tested yet to identify diachronic periods within the historical evolution of a specific language. For this purpose, a historical Portuguese corpus has been constructed from different open sources containing texts with close original spelling. The results of our experiments show that Portuguese keeps an important degree of homogeneity over time. We anticipate this metric to be a starting point to be applied to other languages. @@ -6494,11 +6494,11 @@ Discriminating between <fixed-case>I</fixed-case>ndo-<fixed-case>A</fixed-case>ryan Languages Using <fixed-case>SVM</fixed-case> Ensembles - Alina MariaCiobanu + Alina MariaCiobanu MarcosZampieri ShervinMalmasi SantanuPal - Liviu P.Dinu + Liviu P.Dinu 178–184 W18-3920 In this paper we present a system based on SVM ensembles trained on characters and words to discriminate between five similar languages of the Indo-Aryan family: Hindi, Braj Bhasha, Awadhi, Bhojpuri, and Magahi. The system competed in the Indo-Aryan Language Identification (ILI) shared task organized within the VarDial Evaluation Campaign 2018. Our best entry in the competition, named ILIdentification, scored 88.95% F1 score and it was ranked 3rd out of 8 teams. @@ -6509,7 +6509,7 @@ DivyanshuGupta GouravDhakad JayprakashGupta - Anil KumarSingh + Anil KumarSingh 185–190 W18-3921 Text language Identification is a Natural Language Processing task of identifying and recognizing a given language out of many different languages from a piece of text. This paper describes our submission to the ILI 2018 shared-task, which includes the identification of 5 closely related Indo-Aryan languages. We developed a word-level LSTM(Long Short-term Memory) model, a specific type of Recurrent Neural Network model, for this task. Given a sentence, our model embeds each word of the sentence and convert into its trainable word embedding, feeds them into our LSTM network and finally predict the language. We obtained an F1 macro score of 0.836, ranking 5th in the task. @@ -6518,7 +6518,7 @@ Exploring Classifier Combinations for Language Variety Identification TimKreutz - WalterDaelemans + WalterDaelemans 191–198 W18-3922 This paper describes CLiPS’s submissions for the Discriminating between Dutch and Flemish in Subtitles (DFS) shared task at VarDial 2018. We explore different ways to combine classifiers trained on different feature groups. Our best system uses two Linear SVM classifiers; one trained on lexical features (word n-grams) and one trained on syntactic features (PoS n-grams). The final prediction for a document to be in Flemish Dutch or Netherlandic Dutch is made by the classifier that outputs the highest probability for one of the two labels. This confidence vote approach outperforms a meta-classifier on the development data and on the test data. @@ -6526,7 +6526,7 @@ Identification of Differences between <fixed-case>D</fixed-case>utch Language Varieties with the <fixed-case>V</fixed-case>ar<fixed-case>D</fixed-case>ial2018 <fixed-case>D</fixed-case>utch-<fixed-case>F</fixed-case>lemish Subtitle Data - Hansvan Halteren + Hansvan Halteren NellekeOostdijk 199–209 W18-3923 @@ -6547,8 +6547,8 @@ FernandoBenites RalfGrubenmann Piusvon Däniken - Dirkvon Grünigen - JanDeriu + Dirkvon Grünigen + JanDeriu MarkCieliebak 218–227 W18-3925 @@ -6579,7 +6579,7 @@ When Simple n-gram Models Outperform Syntactic Approaches: Discriminating between <fixed-case>D</fixed-case>utch and <fixed-case>F</fixed-case>lemish MartinKroon MashaMedvedeva - BarbaraPlank + BarbaraPlank 244–253 W18-3928 In this paper we present the results of our participation in the Discriminating between Dutch and Flemish in Subtitles VarDial 2018 shared task. We try techniques proven to work well for discriminating between language varieties as well as explore the potential of using syntactic features, i.e. hierarchical syntactic subtrees. We experiment with different combinations of features. Discriminating between these two languages turned out to be a very hard task, not only for a machine: human performance is only around 0.51 F1 score; our best system is still a simple Naive Bayes model with word unigrams and bigrams. The system achieved an F1 score (macro) of 0.62, which ranked us 4th in the shared task. @@ -6589,7 +6589,7 @@ <fixed-case>H</fixed-case>e<fixed-case>LI</fixed-case>-based Experiments in <fixed-case>S</fixed-case>wiss <fixed-case>G</fixed-case>erman Dialect Identification TommiJauhiainen HeidiJauhiainen - KristerLindén + KristerLindén 254–262 W18-3929 In this paper we present the experiments and results by the SUKI team in the German Dialect Identification shared task of the VarDial 2018 Evaluation Campaign. Our submission using HeLI with adaptive language models obtained the best results in the shared task with a macro F1-score of 0.686, which is clearly higher than the other submitted results. Without some form of unsupervised adaptation on the test set, it might not be possible to reach as high an F1-score with the level of domain difference between the datasets of the shared task. We describe the methods used in detail, as well as some additional experiments carried out during the shared task. @@ -6606,7 +6606,7 @@ A Neural Approach to Language Variety Translation - Marta R.Costa-jussà + Marta R.Costa-jussà MarcosZampieri SantanuPal 275–282 @@ -6624,9 +6624,9 @@ <fixed-case>G</fixed-case>erman Dialect Identification Using Classifier Ensembles - Alina MariaCiobanu + Alina MariaCiobanu ShervinMalmasi - Liviu P.Dinu + Liviu P.Dinu 288–294 W18-3933 In this paper we present the GDI classification entry to the second German Dialect Identification (GDI) shared task organized within the scope of the VarDial Evaluation Campaign 2018. We present a system based on SVM classifier ensembles trained on characters and words. The system was trained on a collection of speech transcripts of five Swiss-German dialects provided by the organizers. The transcripts included in the dataset contained speakers from Basel, Bern, Lucerne, and Zurich. Our entry in the challenge reached 62.03% F1 score and was ranked third out of eight teams. @@ -6637,7 +6637,7 @@ Proceedings of the Third Workshop on Semantic Deep Learning W18-40 - Luis EspinosaAnke + Luis EspinosaAnke DagmarGromann ThierryDeclerck Association for Computational Linguistics @@ -6654,7 +6654,7 @@ Replicated <fixed-case>S</fixed-case>iamese <fixed-case>LSTM</fixed-case> in Ticketing System for Similarity Learning and Retrieval in Asymmetric Texts PankajGupta BerntAndrassy - HinrichSchütze + HinrichSchütze 1–11 W18-4001 The goal of our industrial ticketing system is to retrieve a relevant solution for an input query, by matching with historical tickets stored in knowledge base. A query is comprised of subject and description, while a historical ticket consists of subject, description and solution. To retrieve a relevant solution, we use textual similarity paradigm to learn similarity in the query and historical tickets. The task is challenging due to significant term mismatch in the query and ticket pairs of asymmetric lengths, where subject is a short text but description and solution are multi-sentence texts. We present a novel Replicated Siamese LSTM model to learn similarity in asymmetric text pairs, that gives 22% and 7% gain (Accuracy@10) for retrieval task, respectively over unsupervised and supervised baselines. We also show that the topic and distributed semantic features for short and long texts improved both similarity learning and retrieval. @@ -6662,9 +6662,9 @@ Word-Embedding based Content Features for Automated Oral Proficiency Scoring - Su-YounYoon + Su-YounYoon AnastassiaLoukina - Chong MinLee + Chong MinLee MatthewMulholland XinhaoWang IkkyuChoi @@ -6770,7 +6770,7 @@ Enhancing Cohesion and Coherence of Fake Text to Improve Believability for Deceiving Cyber Attackers PrakruthiKaruna HemantPurohit - ÖzlemUzuner + ÖzlemUzuner SushilJajodia RajeshGanesan 31–40 @@ -6820,7 +6820,7 @@ Diana I.Luna-Umanzor Alma E.Ríos-Ponce Balderas-PliegoMariana - GemmaBel-Enguix + GemmaBel-Enguix 85–93 W18-4109 Older adults tend to suffer a decline in some of their cognitive capabilities, being language one of least affected processes. Word association norms (WAN) also known as free word associations reflect word-word relations, the participant reads or hears a word and is asked to write or say the first word that comes to mind. Free word associations show how the organization of semantic memory remains almost unchanged with age. We have performed a WAN task with very small samples of older adults with Alzheimer’s disease (AD), vascular dementia (VaD) and mixed dementia (MxD), and also with a control group of typical aging adults, matched by age, sex and education. All of them are native speakers of Mexican Spanish. The results show, as expected, that Alzheimer disease has a very important impact in lexical retrieval, unlike vascular and mixed dementia. This suggests that linguistic tests elaborated from WAN can be also used for detecting AD at early stages. @@ -6892,12 +6892,12 @@ BenMiller Mariekevan Erp PiekVossen - MarthaPalmer - EduardHovy + MarthaPalmer + EduardHovy TerukoMitamura DavidCaswell - Susan W.Brown - ClaireBonial + Susan W.Brown + ClaireBonial Association for Computational Linguistics
Santa Fe, New Mexico, U.S.A
August @@ -6910,7 +6910,7 @@ Every Object Tells a Story - JamesPustejovsky + JamesPustejovsky NikhilKrishnaswamy 1–6 W18-4301 @@ -6941,7 +6941,7 @@ W. VictorYarlott CristinaCornelio TianGao - MarkFinlayson + MarkFinlayson 25–33 W18-4304 Discourse structure is a key aspect of all forms of text, providing valuable information both to humans and machines. We applied the hierarchical theory of news discourse developed by van Dijk to examine how paragraphs operate as units of discourse structure within news articles—what we refer to here as document-level discourse. This document-level discourse provides a characterization of the content of each paragraph that describes its relation to the events presented in the article (such as main events, backgrounds, and consequences) as well as to other components of the story (such as commentary and evaluation). The purpose of a news discourse section is of great utility to story understanding as it affects both the importance and temporal order of items introduced in the text—therefore, if we know the news discourse purpose for different sections, we should be able to better rank events for their importance and better construct timelines. We test two hypotheses: first, that people can reliably annotate news articles with van Dijk’s theory; second, that we can reliably predict these labels using machine learning. We show that people have a high degree of agreement with each other when annotating the theory (F1 > 0.8, Cohen’s kappa > 0.6), demonstrating that it can be both learned and reliably applied by human annotators. Additionally, we demonstrate first steps toward machine learning of the theory, achieving a performance of F1 = 0.54, which is 65% of human performance. Moreover, we have generated a gold-standard, adjudicated corpus of 50 documents for document-level discourse annotation based on the ACE Phase 2 corpus. @@ -6970,7 +6970,7 @@ ChristopherReale ClaireBonial HeesungKwon - ClareVoss + ClareVoss 55–60 W18-4307 We propose a method to improve human activity recognition in video by leveraging semantic information about the target activities from an expert-defined linguistic resource, VerbNet. Our hypothesis is that activities that share similar event semantics, as defined by the semantic predicates of VerbNet, will be more likely to share some visual components. We use a deep convolutional neural network approach as a baseline and incorporate linguistic information from VerbNet through multi-task learning. We present results of experiments showing the added information has negligible impact on recognition performance. We discuss how this may be because the lexical semantic information defined by VerbNet is generally not visually salient given the video processing approach used here, and how we may handle this in future approaches. @@ -7015,9 +7015,9 @@ Proceedings of the First Workshop on Trolling, Aggression and Cyberbullying (TRAC-2018) W18-44 RiteshKumar - Atul Kr.Ojha + Atul Kr.Ojha MarcosZampieri - ShervinMalmasi + ShervinMalmasi Association for Computational Linguistics
Santa Fe, New Mexico, USA
August @@ -7062,7 +7062,7 @@ Fully Connected Neural Network with Advance Preprocessor to Identify Aggression over <fixed-case>F</fixed-case>acebook and <fixed-case>T</fixed-case>witter KashyapRaiyani - TeresaGonçalves + TeresaGonçalves PauloQuaresma Vitor BeiresNogueira 28–41 @@ -7073,7 +7073,7 @@ Cyberbullying Intervention Based on Convolutional Neural Networks QianjiaHuang - DianaInkpen + DianaInkpen JianhongZhang DavidVan Bruwaene 42–51 @@ -7136,7 +7136,7 @@ Aggression Detection in Social Media: Using Deep Neural Networks, Data Augmentation, and Pseudo Labeling Segun TaofeekAroyehun - AlexanderGelbukh + AlexanderGelbukh 90–97 W18-4411 With the advent of the read-write web which facilitates social interactions in online spaces, the rise of anti-social behaviour in online spaces has attracted the attention of researchers. In this paper, we address the challenge of automatically identifying aggression in social media posts. Our team, saroyehun, participated in the English track of the Aggression Detection in Social Media Shared Task. On this task, we investigate the efficacy of deep neural network models of varying complexity. Our results reveal that deep neural network models require more data points to do better than an NBSVM linear baseline based on character n-grams. Our improved deep neural network models were trained on augmented data and pseudo labeled examples. Our LSTM classifier receives a weighted macro-F1 score of 0.6425 to rank first overall on the Facebook subtask of the shared task. On the social media sub-task, our CNN-LSTM model records a weighted macro-F1 score of 0.5920 to place third overall. @@ -7156,7 +7156,7 @@ Degree based Classification of Harmful Speech using <fixed-case>T</fixed-case>witter Data SanjanaSharma SakshamAgrawal - ManishShrivastava + ManishShrivastava 106–112 W18-4413 Harmful speech has various forms and it has been plaguing the social media in different ways. If we need to crackdown different degrees of hate speech and abusive behavior amongst it, the classification needs to be based on complex ramifications which needs to be defined and hold accountable for, other than racist, sexist or against some particular group and community. This paper primarily describes how we created an ontological classification of harmful speech based on degree of hateful intent and used it to annotate twitter data accordingly. The key contribution of this paper is the new dataset of tweets we created based on ontological classes and degrees of harmful speech found in the text. We also propose supervised classification system for recognizing these respective harmful speech classes in the texts hence. This serves as a preliminary work to lay down foundation on defining different classes of harmful speech and subsequent work will be done in making it’s automatic detection more robust and efficient. @@ -7164,7 +7164,7 @@ Aggressive Language Identification Using Word Embeddings and Sentiment Features - ConstantinOrăsan + ConstantinOrăsan 113–119 W18-4414 This paper describes our participation in the First Shared Task on Aggression Identification. The method proposed relies on machine learning to identify social media texts which contain aggression. The main features employed by our method are information extracted from word embeddings and the output of a sentiment analyser. Several machine learning methods and different combinations of features were tried. The official submissions used Support Vector Machines and Random Forests. The official evaluation showed that for texts similar to the ones in the training dataset Random Forests work best, whilst for texts which are different SVMs are a better choice. The evaluation also showed that despite its simplicity the method performs well when compared with more elaborated methods. @@ -7173,7 +7173,7 @@ Aggression Detection in Social Media using Deep Neural Networks SreekanthMadisetty - MaunendraSankar Desarkar + MaunendraSankar Desarkar 120–127 W18-4415 With the rise of user-generated content in social media coupled with almost non-existent moderation in many such systems, aggressive contents have been observed to rise in such forums. In this paper, we work on the problem of aggression detection in social media. Aggression can sometimes be expressed directly or overtly or it can be hidden or covert in the text. On the other hand, most of the content in social media is non-aggressive in nature. We propose an ensemble based system to classify an input post to into one of three classes, namely, Overtly Aggressive, Covertly Aggressive, and Non-aggressive. Our approach uses three deep learning methods, namely, Convolutional Neural Networks (CNN) with five layers (input, convolution, pooling, hidden, and output), Long Short Term Memory networks (LSTM), and Bi-directional Long Short Term Memory networks (Bi-LSTM). A majority voting based ensemble method is used to combine these classifiers (CNN, LSTM, and Bi-LSTM). We trained our method on Facebook comments dataset and tested on Facebook comments (in-domain) and other social media posts (cross-domain). Our system achieves the F1-score (weighted) of 0.604 for Facebook posts and 0.508 for social media posts. @@ -7195,7 +7195,7 @@ Cyberbullying Detection Task: the <fixed-case>EBSI</fixed-case>-<fixed-case>LIA</fixed-case>-<fixed-case>UNAM</fixed-case> System (<fixed-case>ELU</fixed-case>) at <fixed-case>COLING</fixed-case>’18 <fixed-case>TRAC</fixed-case>-1 IgnacioArroyo-Fernández DominicForest - Juan-ManuelTorres-Moreno + Juan-ManuelTorres-Moreno MauricioCarrasco-Ruiz ThomasLegeleux KarenJoannette @@ -7218,7 +7218,7 @@ AhmedHusseini Orabi MahmoudHusseini Orabi QianjiaHuang - DianaInkpen + DianaInkpen DavidVan Bruwaene 159–165 W18-4419 @@ -7247,7 +7247,7 @@ Combining Shallow and Deep Learning for Aggressive Text Detection ViktorGolem - Vanja MladenKaran + Vanja MladenKaran JanŠnajder 188–198 W18-4422 @@ -7269,12 +7269,12 @@ Proceedings of the Second Joint SIGHUM Workshop on Computational Linguistics for Cultural Heritage, Social Sciences, Humanities and Literature W18-45 - BeatriceAlex + BeatriceAlex StefaniaDegaetano-Ortlieb AnnaFeldman AnnaKazantseva NilsReiter - StanSzpakowicz + StanSzpakowicz Association for Computational Linguistics
Santa Fe, New Mexico
August @@ -7288,8 +7288,8 @@ Learning Diachronic Analogies to Analyze Concept Change MatthiasOrlikowski - MatthiasHartung - PhilippCimiano + MatthiasHartung + PhilippCimiano 1–11 W18-4501 We propose to study the evolution of concepts by learning to complete diachronic analogies between lists of terms which relate to the same concept at different points in time. We present a number of models based on operations on word embedddings that correspond to different assumptions about the characteristics of diachronic analogies and change in concept vocabularies. These are tested in a quantitative evaluation for nine different concepts on a corpus of Dutch newspapers from the 1950s and 1980s. We show that a model which treats the concept terms as analogous and learns weights to compensate for diachronic changes (weighted linear combination) is able to more accurately predict the missing term than a learned transformation and two baselines for most of the evaluated concepts. We also find that all models tend to be coherent in relation to the represented concept, but less discriminative in regard to other concepts. Additionally, we evaluate the effect of aligning the time-specific embedding spaces using orthogonal Procrustes, finding varying effects on performance, depending on the model, concept and evaluation metric. For the weighted linear combination, however, results improve with alignment in a majority of cases. All related code is released publicly. @@ -7375,7 +7375,7 @@ MikaHämäläinen TanjaSäily JackRueter - JörgTiedemann + JörgTiedemann EetuMäkelä 87–96 W18-4510 @@ -7402,9 +7402,9 @@ A Method for Human-Interpretable Paraphrasticality Prediction - MariaMoritz + MariaMoritz JohannesHellrich - SvenBüchel + SvenBüchel 113–118 W18-4513 The detection of reused text is important in a wide range of disciplines. However, even as research in the field of plagiarism detection is constantly improving, heavily modified or paraphrased text is still challenging for current methodologies. For historical texts, these problems are even more severe, since text sources were often subject to stronger and more frequent modifications. Despite the need for tools to automate text criticism, e.g., tracing modifications in historical text, algorithmic support is still limited. While current techniques can tell if and how frequently a text has been modified, very little work has been done on determining the degree and kind of paraphrastic modification—despite such information being of substantial interest to scholars. We present a human-interpretable, feature-based method to measure paraphrastic modification. Evaluating our technique on three data sets, we find that our approach performs competitive to text similarity scores borrowed from machine translation evaluation, being much harder to interpret. @@ -7421,7 +7421,7 @@ Towards Coreference for Literary Text: Analyzing Domain-Specific Phenomena - InaRoesiger + InaRoesiger SarahSchulz NilsReiter 129–138 @@ -7452,7 +7452,7 @@ Induction of a Large-Scale Knowledge Graph from the <fixed-case>R</fixed-case>egesta <fixed-case>I</fixed-case>mperii JuriOpitz LeoBorn - ViviNastase + ViviNastase 159–168 W18-4518 We induce and visualize a Knowledge Graph over the Regesta Imperii (RI), an important large-scale resource for medieval history research. The RI comprise more than 150,000 digitized abstracts of medieval charters issued by the Roman-German kings and popes distributed over many European locations and a time span of more than 700 years. Our goal is to provide a resource for historians to visualize and query the RI, possibly aiding medieval history research. The resulting medieval graph and visualization tools are shared publicly. @@ -7464,9 +7464,9 @@ Proceedings of the Workshop on Linguistic Complexity and Natural Language Processing W18-46 LeonorBecerra-Bonache - M. DoloresJiménez-López - CarlosMartín-Vide - AdriàTorrens-Urrutia + M. DoloresJiménez-López + CarlosMartín-Vide + AdriàTorrens-Urrutia Association for Computational Linguistics
Santa Fe, New-Mexico
August @@ -7489,7 +7489,7 @@
Computational Complexity of Natural Languages: A Reasoned Overview - AntónioBranco + AntónioBranco 10–19 W18-4602 There has been an upsurge of research interest in natural language complexity. As this interest will benefit from being informed by established contributions in this area, this paper presents a reasoned overview of central results concerning the computational complexity of natural language parsing. This overview also seeks to help to understand why, contrary to recent and widespread assumptions, it is by no means sufficient that an agent handles sequences of items under a pattern a^n b^n or under a pattern a^n b^m c^n d^m to ascertain ipso facto that this is the result of at least an underlying context-free grammar or an underlying context-sensitive grammar, respectively. In addition, it seeks to help to understand why it is also not sufficient that an agent handles sequences of items under a pattern a^n b^n for it to be deemed as having a cognitive capacity of higher computational complexity. @@ -7520,7 +7520,7 @@ AyushJain VishalSingh SidharthRanjan - RajakrishnanRajkumar + RajakrishnanRajkumar SumeetAgarwal 38–48 W18-4605 @@ -7549,7 +7549,7 @@ Proceedings of the 14th Joint ACL-ISO Workshop on Interoperable Semantic Annotation W18-47 - HarryBunt + HarryBunt Association for Computational Linguistics
Santa Fe, New Mexico, USA
August @@ -7562,9 +7562,9 @@ <fixed-case>D</fixed-case>ial<fixed-case>E</fixed-case>dit: Annotations for Spoken Conversational Image Editing - RameshManuvirakurike + RameshManuvirakurike JacquelineBrixey - TrungBui + TrungBui WalterChang RonArtstein KallirroiGeorgila @@ -7591,8 +7591,8 @@ SimonKeizer CatherinePelachaud VolhaPetukhova - LaurentPrévot - MariëtTheune + LaurentPrévot + MariëtTheune 21–34 W18-4703 bunt-etal-2018-downward @@ -7600,7 +7600,7 @@ The Revision of <fixed-case>ISO</fixed-case>-Space,Focused on the Movement Link KiyongLee - JamesPustejovsky + JamesPustejovsky HarryBunt 35–44 W18-4704 @@ -7623,7 +7623,7 @@ A Dialogue Annotation Scheme for Weight Management Chat using the Trans-Theoretical Model of Health Behavior Change - RameshManuvirakurike + RameshManuvirakurike SumanthBharawadj KallirroiGeorgila 60–68 @@ -7651,7 +7651,7 @@ Discourse Annotation in the <fixed-case>PDTB</fixed-case>: The Next Generation RashmiPrasad - BonnieWebber + BonnieWebber AlanLee 87–97 W18-4710 @@ -7660,7 +7660,7 @@ Towards Understanding End-of-trip Instructions in a Taxi Ride Scenario DeepthiKarkada - RameshManuvirakurike + RameshManuvirakurike KallirroiGeorgila 98–107 W18-4711 @@ -7671,7 +7671,7 @@ Proceedings of the Workshop on Computational Modeling of Polysynthetic Languages W18-48 - Judith L.Klavans + Judith L.Klavans Association for Computational Linguistics
Santa Fe, New Mexico, USA
August @@ -7692,7 +7692,7 @@
A Neural Morphological Analyzer for <fixed-case>A</fixed-case>rapaho Verbs Learned from a Finite State Transducer - SarahMoeller + SarahMoeller GhazalehKazeminejad AndrewCowell MansHulden @@ -7712,7 +7712,7 @@ A prototype finite-state morphological analyser for <fixed-case>C</fixed-case>hukchi VasilisaAndriyanets - FrancisTyers + FrancisTyers 31–40 W18-4804 In this article we describe the application of finite-state transducers to the morphological and phonological systems of Chukchi, a polysynthetic language spoken in the north of the Russian Federation. The language exhibits progressive and regressive vowel harmony, productive incorporation and extensive circumfixing. To implement the analyser we use the well-known Helsinki Finite-State Toolkit (HFST). The resulting model covers the majority of the morphological and phonological processes. A brief evaluation carried out on publically-available corpora shows that the coverage of the transducer is between and 53% and 76%. An error evaluation of 100 tokens randomly selected from the corpus, which were not covered by the analyser shows that most of the morphological processes are covered and that the majority of errors are caused by a limited stem lexicon. @@ -7750,10 +7750,10 @@ Lost in Translation: Analysis of Information Loss During Machine Translation Between Polysynthetic and Fusional Languages ManuelMager - ElisabethMager + ElisabethMager AlfonsoMedina-Urrea - Ivan VladimirMeza Ruiz - KatharinaKann + Ivan VladimirMeza Ruiz + KatharinaKann 73–83 W18-4808 Machine translation from polysynthetic to fusional languages is a challenging task, which gets further complicated by the limited amount of parallel text available. Thus, translation performance is far from the state of the art for high-resource and more intensively studied language pairs. To shed light on the phenomena which hamper automatic translation to and from polysynthetic languages, we study translations from three low-resource, polysynthetic languages (Nahuatl, Wixarika and Yorem Nokki) into Spanish and vice versa. Doing so, we find that in a morpheme-to-morpheme alignment an important amount of information contained in polysynthetic morphemes has no Spanish counterpart, and its translation is often omitted. We further conduct a qualitative analysis and, thus, identify morpheme types that are commonly hard to align or ignored in the translation process. @@ -7761,7 +7761,7 @@ Automatic Glossing in a Low-Resource Setting for Language Documentation - SarahMoeller + SarahMoeller MansHulden 84–93 W18-4809 @@ -7778,8 +7778,8 @@ Jena D.Hwang NathanSchneider MelanieAndresen - SameerPradhan - Miriam R. L.Petruck + SameerPradhan + Miriam R. L.Petruck Association for Computational Linguistics
Santa Fe, New Mexico, USA
August @@ -7793,7 +7793,7 @@ Annotation Schemes for Surface Construction Labeling - LoriLevin + LoriLevin 1 W18-4901 In this talk I will describe the interaction of linguistics and language technologies in Surface Construction Labeling (SCL) from the perspective of corpus annotation tasks such as definiteness, modality, and causality. Linguistically, following Construction Grammar, SCL recognizes that meaning may be carried by morphemes, words, or arbitrary constellations of morpho-lexical elements. SCL is like Shallow Semantic Parsing in that it does not attempt a full compositional analysis of meaning, but rather identifies only the main elements of a semantic frame, where the frames may be invoked by constructions as well as lexical items. Computationally, SCL is different from tasks such as information extraction in that it deals only with meanings that are expressed in a conventional, grammaticalized way and does not address inferred meanings. I review the work of Dunietz (2018) on the labeling of causal frames including causal connectives and cause and effect arguments. I will describe how to design an annotation scheme for SCL, including isolating basic units of form and meaning and building a “constructicon”. I will conclude with remarks about the nature of universal categories and universal meaning representations in language technologies. This talk describes joint work with Jaime Carbonell, Jesse Dunietz, Nathan Schneider, and Miriam Petruck. @@ -7820,7 +7820,7 @@ Processing <fixed-case>MWE</fixed-case>s: Neurocognitive Bases of Verbal <fixed-case>MWE</fixed-case>s and Lexical Cohesiveness within <fixed-case>MWE</fixed-case>s ShohiniBhattasali MurielleFabre - JohnHale + JohnHale 6–17 W18-4904 Multiword expressions have posed a challenge in the past for computational linguistics since they comprise a heterogeneous family of word clusters and are difficult to detect in natural language data. In this paper, we present a fMRI study based on language comprehension to provide neuroimaging evidence for processing MWEs. We investigate whether different MWEs have distinct neural bases, e.g. if verbal MWEs involve separate brain areas from non-verbal MWEs and if MWEs with varying levels of cohesiveness activate dissociable brain regions. Our study contributes neuroimaging evidence illustrating that different MWEs elicit spatially distinct patterns of activation. We also adapt an association measure, usually used to detect MWEs, as a cognitively plausible metric for language processing. @@ -7854,7 +7854,7 @@ Fixed Similes: Measuring aspects of the relation between <fixed-case>MWE</fixed-case> idiomatic semantics and syntactic flexibility - StellaMarkantonatou + StellaMarkantonatou PanagiotisKouris YanisMaistros 51–61 @@ -7865,7 +7865,7 @@ Fine-Grained Termhood Prediction for <fixed-case>G</fixed-case>erman Compound Terms Using Neural Networks AnnaHätty - SabineSchulte im Walde + SabineSchulte im Walde 62–73 W18-4909 Automatic term identification and investigating the understandability of terms in a specialized domain are often treated as two separate lines of research. We propose a combined approach for this matter, by defining fine-grained classes of termhood and framing a classification task. The classes reflect tiers of a term’s association to a domain. The new setup is applied to German closed compounds as term candidates in the domain of cooking. For the prediction of the classes, we compare several neural network architectures and also take salient information about the compounds’ components into account. We show that applying a similar class distinction to the compounds’ components and propagating this information within the network improves the compound class prediction results. @@ -7874,10 +7874,10 @@ Towards a Computational Lexicon for <fixed-case>M</fixed-case>oroccan <fixed-case>D</fixed-case>arija: Words, Idioms, and Constructions JamalLaoudi - ClaireBonial + ClaireBonial LuciaDonatelli StephenTratz - ClareVoss + ClareVoss 74–85 W18-4910 In this paper, we explore the challenges of building a computational lexicon for Moroccan Darija (MD), an Arabic dialect spoken by over 32 million people worldwide but which only recently has begun appearing frequently in written form in social media. We raise the question of what belongs in such a lexicon and start by describing our work building traditional word-level lexicon entries with their English translations. We then discuss challenges in translating idiomatic MD text that led to creating multi-word expression lexicon entries whose meanings could not be fully derived from the individual words. Finally, we provide a preliminary exploration of constructions to be considered for inclusion in an MD constructicon by translating examples of English constructions and examining their MD counterparts. @@ -7886,12 +7886,12 @@ Verbal Multiword Expressions in <fixed-case>B</fixed-case>asque Corpora UxoaIñurrieta - ItziarAduriz + ItziarAduriz AinaraEstarrona ItziarGonzalez-Dios - AnttonGurrutxaga - RubenUrizar - IñakiAlegria + AnttonGurrutxaga + RubenUrizar + IñakiAlegria 86–95 W18-4911 This paper presents a Basque corpus where Verbal Multiword Expressions (VMWEs) were annotated following universal guidelines. Information on the annotation is given, and some ideas for discussion upon the guidelines are also proposed. The corpus is useful not only for NLP-related research, but also to draw conclusions on Basque phraseology in comparison with other languages. @@ -7929,11 +7929,11 @@ Developing and Evaluating Annotation Procedures for <fixed-case>T</fixed-case>witter Data during Hazard Events KevinStowe - MarthaPalmer + MarthaPalmer JenningsAnderson MarinaKogan LeysiaPalen - Kenneth M.Anderson + Kenneth M.Anderson RebeccaMorss JulieDemuth HeatherLazrus @@ -7957,8 +7957,8 @@ The <fixed-case>RST</fixed-case> <fixed-case>S</fixed-case>panish-<fixed-case>C</fixed-case>hinese Treebank ShuyuanCao - Iriada Cunha - MikelIruskieta + Iriada Cunha + MikelIruskieta 156–166 W18-4917 Discourse analysis is necessary for different tasks of Natural Language Processing (NLP). As two of the most spoken languages in the world, discourse analysis between Spanish and Chinese is important for NLP research. This paper aims to present the first open Spanish-Chinese parallel corpus annotated with discourse information, whose theoretical framework is based on the Rhetorical Structure Theory (RST). We have evaluated and harmonized each annotation part to obtain a high annotated-quality corpus. The corpus is already available to the public. @@ -7995,9 +7995,9 @@ Constructing an Annotated Corpus of Verbal <fixed-case>MWE</fixed-case>s for <fixed-case>E</fixed-case>nglish AbigailWalsh - ClaireBonial + ClaireBonial KristinaGeeraert - John P.McCrae + John P.McCrae NathanSchneider ClarissaSomers 193–200 @@ -8007,7 +8007,7 @@ Cooperating Tools for <fixed-case>MWE</fixed-case> Lexicon Management and Corpus Annotation - YujiMatsumoto + YujiMatsumoto AkihikoKato HiroyukiShindo ToshioMorita @@ -8020,8 +8020,8 @@ “Fingers in the Nose”: Evaluating Speakers’ Identification of Multi-Word Expressions Using a Slightly Gamified Crowdsourcing Platform KarënFort BrunoGuillaume - MatthieuConstant - NicolasLefèbvre + MatthieuConstant + NicolasLefèbvre Yann-AlanPilatte 207–213 W18-4923 @@ -8042,15 +8042,15 @@ Edition 1.1 of the <fixed-case>PARSEME</fixed-case> Shared Task on Automatic Identification of Verbal Multiword Expressions CarlosRamisch - Silvio RicardoCordeiro + Silvio RicardoCordeiro AgataSavary VeronikaVincze - VerginicaBarbu Mititelu + VerginicaBarbu Mititelu ArchnaBhatia MajaBuljan - MarieCandito + MarieCandito PolonaGantar - VoulaGiouli + VoulaGiouli TungaGüngör AbdelatiHawwari UxoaIñurrieta @@ -8059,8 +8059,8 @@ TimmLichte ChayaLiebeskind JohannaMonti - CarlaParra Escartín - BehrangQasemiZadeh + CarlaParra Escartín + BehrangQasemiZadeh RenataRamisch NathanSchneider IvelinaStoyanova @@ -8075,7 +8075,7 @@ <fixed-case>CRF</fixed-case>-Seq and <fixed-case>CRF</fixed-case>-<fixed-case>D</fixed-case>ep<fixed-case>T</fixed-case>ree at <fixed-case>PARSEME</fixed-case> Shared Task 2018: Detecting Verbal <fixed-case>MWE</fixed-case>s using Sequential and Dependency-Based Approaches ErwanMoreau AshjanAlsulaimani - AlfredoMaldonado + AlfredoMaldonado CarlVogel 241–247 W18-4926 @@ -8094,7 +8094,7 @@ <fixed-case>GBD</fixed-case>-<fixed-case>NER</fixed-case> at <fixed-case>PARSEME</fixed-case> Shared Task 2018: Multi-Word Expression Detection Using Bidirectional Long-Short-Term Memory Networks and Graph-Based Decoding - TiberiuBoros + TiberiuBoros RuxandraBurtica 254–260 W18-4928 @@ -8114,7 +8114,7 @@ <fixed-case>TRAPACC</fixed-case> and <fixed-case>TRAPACCS</fixed-case> at <fixed-case>PARSEME</fixed-case> Shared Task 2018: Neural Transition Tagging of Verbal Multiword Expressions ReginaStodden - BehrangQasemiZadeh + BehrangQasemiZadeh LauraKallmeyer 268–274 W18-4930 @@ -8123,7 +8123,7 @@ <fixed-case>TRAVERSAL</fixed-case> at <fixed-case>PARSEME</fixed-case> Shared Task 2018: Identification of Verbal Multiword Expressions Using a Discriminative Tree-Structured Model - JakubWaszczuk + JakubWaszczuk 275–282 W18-4931 This paper describes a system submitted to the closed track of the PARSEME shared task (edition 1.1) on automatic identification of verbal multiword expressions (VMWEs). The system represents VMWE identification as a labeling task where one of two labels (MWE or not-MWE) must be predicted for each node in the dependency tree based on local context, including adjacent nodes and their labels. The system relies on multiclass logistic regression to determine the globally optimal labeling of a tree. The system ranked 1st in the general cross-lingual ranking of the closed track systems, according to both official evaluation measures: MWE-based F1 and token-based F1. @@ -8145,7 +8145,7 @@ NicolasZampieri ManonScholivet CarlosRamisch - BenoitFavre + BenoitFavre 290–296 W18-4933 This paper describes the Veyn system, submitted to the closed track of the PARSEME Shared Task 2018 on automatic identification of verbal multiword expressions (VMWEs). Veyn is based on a sequence tagger using recurrent neural networks. We represent VMWEs using a variant of the begin-inside-outside encoding scheme combined with the VMWE category tag. In addition to the system description, we present development experiments to determine the best tagging scheme. Veyn is freely available, covers 19 languages, and was ranked ninth (MWE-based) and eight (Token-based) among 13 submissions, considering macro-averaged F1 across languages. @@ -8157,9 +8157,9 @@ Proceedings of the 19th Annual SIGdial Meeting on Discourse and Dialogue W18-50 KazunoriKomatani - DianeLitman + DianeLitman KaiYu - AlexPapangelis + AlexPapangelis LawrenceCavedon MikioNakano Association for Computational Linguistics @@ -8195,8 +8195,8 @@ Modeling Linguistic and Personality Adaptation for Natural Language Generation ZhichaoHu - JeanFox Tree - MarilynWalker + JeanFox Tree + MarilynWalker 20–31 W18-5003 Previous work has shown that conversants adapt to many aspects of their partners’ language. Other work has shown that while every person is unique, they often share general patterns of behavior. Theories of personality aim to explain these shared patterns, and studies have shown that many linguistic cues are correlated with personality traits. We propose an adaptation measure for adaptive natural language generation for dialogs that integrates the predictions of both personality theories and adaptation theories, that can be applied as a dialog unfolds, on a turn by turn basis. We show that our measure meets criteria for validity, and that adaptation varies according to corpora and task, speaker, and the set of features used to model it. We also produce fine-grained models according to the dialog segmentation or the speaker, and demonstrate the decaying trend of adaptation. @@ -8243,7 +8243,7 @@ FlorianKreyssig IñigoCasanueva PawełBudzianowski - MilicaGašić + MilicaGašić 60–69 W18-5007 User Simulators are one of the major tools that enable offline training of task-oriented dialogue systems. For this task the Agenda-Based User Simulator (ABUS) is often used. The ABUS is based on hand-crafted rules and its output is in semantic form. Issues arise from both properties such as limited diversity and the inability to interface a text-level belief tracker. This paper introduces the Neural User Simulator (NUS) whose behaviour is learned from a corpus and which generates natural language, hence needing a less labelled dataset than simulators generating a semantic output. In comparison to much of the past work on this topic, which evaluates user simulators on corpus-based metrics, we use the NUS to train the policy of a reinforcement learning based Spoken Dialogue System. The NUS is compared to the ABUS by evaluating the policies that were trained using the simulators. Cross-model evaluation is performed i.e. training on one simulator and testing on the other. Furthermore, the trained policies are tested on real users. In both evaluation tasks the NUS outperformed the ABUS. @@ -8276,7 +8276,7 @@ A Situated Dialogue System for Learning Structural Concepts in Blocks World IanPerera - JamesAllen + JamesAllen Choh ManTeng LucianGalescu 89–98 @@ -8298,14 +8298,14 @@ Consequences and Factors of Stylistic Differences in Human-Robot Dialogue - StephanieLukin + StephanieLukin KimberlyPollard - ClaireBonial + ClaireBonial MatthewMarge CassidyHenry RonArtstein - DavidTraum - ClareVoss + DavidTraum + ClareVoss 110–118 W18-5012 This paper identifies stylistic differences in instruction-giving observed in a corpus of human-robot dialogue. Differences in verbosity and structure (i.e., single-intent vs. multi-intent instructions) arose naturally without restrictions or prior guidance on how users should speak with the robot. Different styles were found to produce different rates of miscommunication, and correlations were found between style differences and individual user variation, trust, and interaction experience with the robot. Understanding potential consequences and factors that influence style can inform design of dialogue systems that are robust to natural variation from human users. @@ -8327,7 +8327,7 @@ SarahPlane ArielMarvasti TylerEgan - CaseyKennington + CaseyKennington 130–139 W18-5014 When interacting with robots in a situated spoken dialogue setting, human dialogue partners tend to assign anthropomorphic and social characteristics to those robots. In this paper, we explore the age and educational level that human dialogue partners assign to three different robotic systems, including an un-embodied spoken dialogue system. We found that how a robot speaks is as important to human perceptions as the way the robot looks. Using the data from our experiment, we derived prosodic, emotional, and linguistic features from the participants to train and evaluate a classifier that predicts perceived intelligence, age, and education level. @@ -8384,8 +8384,8 @@ LenaReed ShubhangiTandon SharathT.S. - StephanieLukin - MarilynWalker + StephanieLukin + MarilynWalker 180–190 W18-5019 Natural language generators for task-oriented dialogue must effectively realize system dialogue actions and their associated semantics. In many applications, it is also desirable for generators to control the style of an utterance. To date, work on task-oriented neural generation has primarily focused on semantic fidelity rather than achieving stylistic goals, while work on style has been done in contexts where it is difficult to measure content preservation. Here we present three different sequence-to-sequence models and carefully test how well they disentangle content and style. We use a statistical generator, Personage, to synthesize a new corpus of over 88,000 restaurant domain utterances whose style varies according to models of personality, giving us total control over both the semantic content and the stylistic variation in the training data. We then vary the amount of explicit stylistic supervision given to the three models. We show that our most explicit model can simultaneously achieve high fidelity to both semantic and stylistic goals: this model adds a context vector of 36 stylistic parameters as input to the hidden state of the encoder at each time step, showing the benefits of explicit stylistic supervision, even when the amount of training data is large. @@ -8430,7 +8430,7 @@ Discourse Coherence in the Wild: A Dataset, Evaluation and Methods AliceLai - JoelTetreault + JoelTetreault 214–223 W18-5023 W18-5023.Attachment.pdf @@ -8495,7 +8495,7 @@ <fixed-case>D</fixed-case>ial<fixed-case>C</fixed-case>rowd: A toolkit for easy dialog system assessment KyusongLee TianchengZhao - Alan W.Black + Alan W.Black MaxineEskenazi 245–248 W18-5028 @@ -8508,8 +8508,8 @@ DavidPautler VikramRamanarayanan KirbyCofino - PatrickLange - DavidSuendermann-Oeft + PatrickLange + DavidSuendermann-Oeft 249–252 W18-5029 We present a paradigm for interactive teacher training that leverages multimodal dialog technology to puppeteer custom-designed embodied conversational agents (ECAs) in student roles. We used the open-source multimodal dialog system HALEF to implement a small-group classroom math discussion involving Venn diagrams where a human teacher candidate has to interact with two student ECAs whose actions are controlled by the dialog system. Such an automated paradigm has the potential to be extended and scaled to a wide range of interactive simulation scenarios in education, medicine, and business where group interaction training is essential. @@ -8519,7 +8519,7 @@ An Empirical Study of Self-Disclosure in Spoken Dialogue Systems AbhilashaRavichander - Alan W.Black + Alan W.Black 253–263 W18-5030 Self-disclosure is a key social strategy employed in conversation to build relations and increase conversational depth. It has been heavily studied in psychology and linguistic literature, particularly for its ability to induce self-disclosure from the recipient, a phenomena known as reciprocity. However, we know little about how self-disclosure manifests in conversation with automated dialog systems, especially as any self-disclosure on the part of a dialog system is patently disingenuous. In this work, we run a large-scale quantitative analysis on the effect of self-disclosure by analyzing interactions between real-world users and a spoken dialog system in the context of social conversation. We find that indicators of reciprocity occur even in human-machine dialog, with far-reaching implications for chatbots in a variety of domains including education, negotiation and social dialog. @@ -8545,11 +8545,11 @@ StefanUltes PawełBudzianowski IñigoCasanueva - Lina M.Rojas-Barahona + Lina M.Rojas-Barahona Bo-HsiangTseng Yen-ChenWu - SteveYoung - MilicaGašić + SteveYoung + MilicaGašić 273–283 W18-5032 W18-5032.Attachment.pdf @@ -8559,8 +8559,8 @@ Conversational Image Editing: Incremental Intent Identification in a New Dialogue Task - RameshManuvinakurike - TrungBui + RameshManuvinakurike + TrungBui WalterChang KallirroiGeorgila 284–295 @@ -8595,7 +8595,7 @@ MarcoGuerini SimoneMagnolini VevakeBalaraman - BernardoMagnini + BernardoMagnini 317–326 W18-5036 We present a domain portable zero-shot learning approach for entity recognition in task-oriented conversational agents, which does not assume any annotated sentences at training time. Rather, we derive a neural model of the entity names based only on available gazetteers, and then apply the model to recognize new entities in the context of user utterances. In order to evaluate our working hypothesis we focus on nominal entities that are largely used in e-commerce to name products. Through a set of experiments in two languages (English and Italian) and three different domains (furniture, food, clothing), we show that the neural gazetteer-based approach outperforms several competitive baselines, with minimal requirements of linguistic features. @@ -8620,7 +8620,7 @@ FlorianKreyssig Bo-HsiangTseng Yen-chenWu - MilicaGašić + MilicaGašić 332–337 W18-5038 Reinforcement learning (RL) is a promising dialogue policy optimisation approach, but traditional RL algorithms fail to scale to large domains. Recently, Feudal Dialogue Management (FDM), has shown to increase the scalability to large domains by decomposing the dialogue management decision into two steps, making use of the domain ontology to abstract the dialogue state in each step. In order to abstract the state space, however, previous work on FDM relies on handcrafted feature functions. In this work, we show that these feature functions can be learned jointly with the policy model while obtaining similar performance, even outperforming the handcrafted features in several environments and domains. @@ -8635,7 +8635,7 @@ IñigoCasanueva Yen-ChenWu StefanUltes - MilicaGašić + MilicaGašić 338–343 W18-5039 Cross-domain natural language generation (NLG) is still a difficult task within spoken dialogue modelling. Given a semantic representation provided by the dialogue manager, the language generator should generate sentences that convey desired information. Traditional template-based generators can produce sentences with all necessary information, but these sentences are not sufficiently diverse. With RNN-based models, the diversity of the generated sentences can be high, however, in the process some information is lost. In this work, we improve an RNN-based generator by considering latent information at the sentence level during generation using conditional variational auto-encoder architecture. We demonstrate that our model outperforms the original RNN-based generator, while yielding highly diverse sentences. In addition, our model performs better when the training data is limited. @@ -8704,7 +8704,7 @@ Multi-task Learning for Joint Language Understanding and Dialogue State Tracking AbhinavRastogi RaghavGupta - DilekHakkani-Tur + DilekHakkani-Tur 376–384 W18-5045 This paper presents a novel approach for multi-task learning of language understanding (LU) and dialogue state tracking (DST) in task-oriented dialogue systems. Multi-task training enables the sharing of the neural network layers responsible for encoding the user utterance for both LU and DST and improves performance while reducing the number of network parameters. In our proposed framework, DST operates on a set of candidate values for each slot that has been mentioned so far. These candidate sets are generated using LU slot annotations for the current user utterance, dialogue acts corresponding to the preceding system utterance and the dialogue state estimated for the previous turn, enabling DST to handle slots with a large or unbounded set of possible values and deal with slot values not seen during training. Furthermore, to bridge the gap between training and inference, we investigate the use of scheduled sampling on LU output for the current user utterance as well as the DST output for the preceding turn. @@ -8735,7 +8735,7 @@ <fixed-case>C</fixed-case>ogent: A Generic Dialogue System Shell Based on a Collaborative Problem Solving Model LucianGalescu Choh ManTeng - JamesAllen + JamesAllen IanPerera 400–409 W18-5048 @@ -8763,7 +8763,7 @@ RuihongHuang VinodkumarPrabhakaran RobVoigt - ZeerakWaseem + ZeerakWaseem JacquelineWernimont Association for Computational Linguistics
Brussels, Belgium
@@ -8788,7 +8788,7 @@
Hate Speech Dataset from a White Supremacy Forum - Onade Gibert + Onade Gibert NaiaraPerez AitorGarcía-Pablos MontseCuadros @@ -8812,7 +8812,7 @@ Predictive Embeddings for Hate Speech Detection on <fixed-case>T</fixed-case>witter RohanKshirsagar TyrusCukuvac - KathyMcKeown + KathyMcKeown SusanMcGregor 26–32 W18-5104 @@ -8838,7 +8838,7 @@ AmanVarshney Syed SarfarazAkhtar DeepanshuVijay - ManishShrivastava + ManishShrivastava 43–50 W18-5106 In the past few years, bully and aggressive posts on social media have grown significantly, causing serious consequences for victims/users of all demographics. Majority of the work in this field has been done for English only. In this paper, we introduce a deep learning based classification system for Facebook posts and comments of Hindi-English Code-Mixed text to detect the aggressive behaviour of/towards users. Our work focuses on text from users majorly in the Indian Subcontinent. The dataset that we used for our models is provided by TRAC-1in their shared task. Our classification model assigns each Facebook post/comment to one of the three predefined categories: “Overtly Aggressive”, “Covertly Aggressive” and “Non-Aggressive”. We experimented with 6 classification models and our CNN model on a 10 K-fold cross-validation gave the best result with the prediction accuracy of 73.2%. @@ -8847,7 +8847,7 @@ Creating a <fixed-case>W</fixed-case>hats<fixed-case>A</fixed-case>pp Dataset to Study Pre-teen Cyberbullying - RacheleSprugnoli + RacheleSprugnoli StefanoMenini SaraTonelli FilippoOncini @@ -8885,7 +8885,7 @@ The Effects of User Features on <fixed-case>T</fixed-case>witter Hate Speech Detection EliseFehn Unsvåg - BjörnGambäck + BjörnGambäck 75–85 W18-5110 The paper investigates the potential effects user features have on hate speech classification. A quantitative analysis of Twitter data was conducted to better understand user characteristics, but no correlations were found between hateful text and the characteristics of the users who had posted it. However, experiments with a hate speech classifier based on datasets from three different languages showed that combining certain user features with textual features gave slight improvements of classification performance. While the incorporation of user features resulted in varying impact on performance for the different datasets used, user network-related features provided the most consistent improvements. @@ -8947,7 +8947,7 @@ Datasets of <fixed-case>S</fixed-case>lovene and <fixed-case>C</fixed-case>roatian Moderated News Comments NikolaLjubešić - TomažErjavec + TomažErjavec DarjaFišer 124–131 W18-5116 @@ -8957,7 +8957,7 @@ Cross-Domain Detection of Abusive Language Online - Vanja MladenKaran + Vanja MladenKaran JanŠnajder 132–137 W18-5117 @@ -8970,7 +8970,7 @@ PuneetMathur RamitSawhney MeghnaAyyar - RajivShah + RajivShah 138–148 W18-5118 The use of code-switched languages (e.g., Hinglish, which is derived by the blending of Hindi with the English language) is getting much popular on Twitter due to their ease of communication in native languages. However, spelling variations and absence of grammar rules introduce ambiguity and make it difficult to understand the text automatically. This paper presents the Multi-Input Multi-Channel Transfer Learning based model (MIMCT) to detect offensive (hate speech or abusive) Hinglish tweets from the proposed Hinglish Offensive Tweet (HOT) dataset using transfer learning coupled with multiple feature inputs. Specifically, it takes multiple primary word embedding along with secondary extracted features as inputs to train a multi-channel CNN-LSTM architecture that has been pre-trained on English tweets through transfer learning. The proposed MIMCT model outperforms the baseline supervised classification models, transfer learning based CNN and LSTM models to establish itself as the state of the art in the unexplored domain of Hinglish offensive text classification. @@ -9075,7 +9075,7 @@ An Argument-Annotated Corpus of Scientific Publications AnneLauscher GoranGlavaš - Simone PaoloPonzetto + Simone PaoloPonzetto 40–46 W18-5206 Argumentation is an essential feature of scientific language. We present an annotation study resulting in a corpus of scientific publications annotated with argumentative components and relations. The argumentative annotations have been added to the existing Dr. Inventor Corpus, already annotated for four other rhetorical aspects. We analyze the annotated argumentative structures and investigate the relations between argumentation and other rhetorical aspects of scientific writing, such as discourse roles and citation contexts. @@ -9095,7 +9095,7 @@ Argument Component Classification for Classroom Discussions LucaLugini - DianeLitman + DianeLitman 57–67 W18-5208 This paper focuses on argument component classification for transcribed spoken classroom discussions, with the goal of automatically classifying student utterances into claims, evidence, and warrants. We show that an existing method for argument component classification developed for another educationally-oriented domain performs poorly on our dataset. We then show that feature sets from prior work on argument mining for student essays and online dialogues can be used to improve performance considerably. We also provide a comparison between convolutional neural networks and recurrent neural networks when trained under different conditions to classify argument components in classroom discussions. While neural network models are not always able to outperform a logistic regression model, we were able to gain some useful insights: convolutional networks are more robust than recurrent networks both at the character and at the word level, and specificity information can help boost performance in multi-task training. @@ -9104,7 +9104,7 @@ Evidence Types, Credibility Factors, and Patterns or Soft Rules for Weighing Conflicting Evidence: Argument Mining in the Context of Legal Rules Governing Evidence Assessment - Vern R.Walker + Vern R.Walker DinaFoerster Julia MonicaPonce MatthewRosen @@ -9144,7 +9144,7 @@ MaoranXu HaoFu YangLiu - XuanjingHuang + XuanjingHuang 97–104 W18-5212 In this paper, we propose to incorporate topic aspects information for online comments convincingness evaluation. Our model makes use of graph convolutional network to utilize implicit topic information within a discussion thread to assist the evaluation of convincingness of each single comment. In order to test the effectiveness of our proposed model, we annotate topic information on top of a public dataset for argument convincingness evaluation. Experimental results show that topic information is able to improve the performance for convincingness evaluation. We also make a move to detect topic aspects automatically. @@ -9153,7 +9153,7 @@ Proposed Method for Annotation of Scientific Arguments in Terms of Semantic Relations and Argument Schemes - NancyGreen + NancyGreen 105–110 W18-5213 This paper presents a proposed method for annotation of scientific arguments in biological/biomedical journal articles. Semantic entities and relations are used to represent the propositional content of arguments in instances of argument schemes. We describe an experiment in which we encoded the arguments in a journal article to identify issues in this approach. Our catalogue of argument schemes and a copy of the annotated article are now publically available. @@ -9173,7 +9173,7 @@ Dave the debater: a retrieval-based and generative argumentative dialogue agent Dieu ThuLe - Cam-TuNguyen + Cam-TuNguyen Kim AnhNguyen 121–130 W18-5215 @@ -9220,7 +9220,7 @@ Proceedings of the 6th BioASQ Workshop A challenge on large-scale biomedical semantic indexing and question answering W18-53 - Ioannis A.Kakadiaris + Ioannis A.Kakadiaris GeorgePaliouras AnastasiaKrithara Association for Computational Linguistics @@ -9258,7 +9258,7 @@ <fixed-case>M</fixed-case>acquarie <fixed-case>U</fixed-case>niversity at <fixed-case>B</fixed-case>io<fixed-case>ASQ</fixed-case> 6b: Deep learning and deep reinforcement learning for query-based summarisation - DiegoMollá + DiegoMollá 22–29 W18-5303 This paper describes Macquarie University’s contribution to the BioASQ Challenge (BioASQ 6b, Phase B). We focused on the extraction of the ideal answers, and the task was approached as an instance of query-based multi-document summarisation. In particular, this paper focuses on the experiments related to the deep learning and reinforcement learning approaches used in the submitted runs. The best run used a deep learning model under a regression-based framework. The deep learning architecture used features derived from the output of LSTM chains on word embeddings, plus features based on similarity with the query, and sentence position. The reinforcement learning approach was a proof-of-concept prototype that trained a global policy using REINFORCE. The global policy was implemented as a neural network that used tf.idf features encoding the candidate sentence, question, and context. @@ -9281,8 +9281,8 @@ <fixed-case>M</fixed-case>ind<fixed-case>L</fixed-case>ab Neural Network Approach at <fixed-case>B</fixed-case>io<fixed-case>ASQ</fixed-case> 6<fixed-case>B</fixed-case> AndrésRosso-Mateus - Fabio A.González - ManuelMontes-y-Gómez + Fabio A.González + ManuelMontes-y-Gómez 40–46 W18-5305 Biomedical Question Answering is concerned with the development of methods and systems that automatically find answers to natural language posed questions. In this work, we describe the system used in the BioASQ Challenge task 6b for document retrieval and snippet retrieval (with particular emphasis in this subtask). The proposed model makes use of semantic similarity patterns that are evaluated and measured by a convolutional neural network architecture. Subsequently, the snippet ranking performance is improved with a pseudo-relevance feedback approach in a later step. Based on the preliminary results, we reached the second position in snippet retrieval sub-task. @@ -9293,7 +9293,7 @@ <fixed-case>A</fixed-case>ttention<fixed-case>M</fixed-case>e<fixed-case>SH</fixed-case>: Simple, Effective and Interpretable Automatic <fixed-case>M</fixed-case>e<fixed-case>SH</fixed-case> Indexer QiaoJin BhuwanDhingra - WilliamCohen + WilliamCohen XinghuaLu 47–56 W18-5306 @@ -9308,7 +9308,7 @@ QiuzeWu BoyueLi KhyathiChandu - EricNyberg + EricNyberg 57–65 W18-5307 The growing number of biomedical publications is a challenge for human researchers, who invest considerable effort to search for relevant documents and pinpointed answers. Biomedical Question Answering can automatically generate answers for a user’s topic or question, significantly reducing the effort required to locate the most relevant information in a large document corpus. Extractive summarization techniques, which concatenate the most relevant text units drawn from multiple documents, perform well on automatic evaluation metrics like ROUGE, but score poorly on human readability, due to the presence of redundant text and grammatical errors in the answer. This work moves toward abstractive summarization, which attempts to distill and present the meaning of the original text in a more coherent way. We incorporate a sentence fusion approach, based on Integer Linear Programming, along with three novel approaches for sentence ordering, in an attempt to improve the human readability of ideal answers. Using an open framework for configuration space exploration (BOOM), we tested over 2000 unique system configurations in order to identify the best-performing combinations for the sixth edition of Phase B of the BioASQ challenge. @@ -9344,7 +9344,7 @@ PramatiKalwad KhyathiChandu TerukoMitamura - EricNyberg + EricNyberg 79–89 W18-5310 The ever-increasing magnitude of biomedical information sources makes it difficult and time-consuming for a human researcher to find the most relevant documents and pinpointed answers for a specific question or topic when using only a traditional search engine. Biomedical Question Answering systems automatically identify the most relevant documents and pinpointed answers, given an information need expressed as a natural language question. Generating a non-redundant, human-readable summary that satisfies the information need of a given biomedical question is the focus of the Ideal Answer Generation task, part of the BioASQ challenge. This paper presents a system for ideal answer generation (using ontology-based retrieval and a neural learning-to-rank approach, combined with extractive and abstractive summarization techniques) which achieved the highest ROUGE score of 0.659 on the BioASQ 5b batch 2 test. @@ -9357,7 +9357,7 @@ Proceedings of the 2018 EMNLP Workshop BlackboxNLP: Analyzing and Interpreting Neural Networks for NLP W18-54 TalLinzen - GrzegorzChrupała + GrzegorzChrupała AfraAlishahi Association for Computational Linguistics
Brussels, Belgium
@@ -9373,7 +9373,7 @@ When does deep multi-task learning work for loosely related document classification tasks? EmmaKerinec ChloéBraud - AndersSøgaard + AndersSøgaard 1–8 W18-5401 This work aims to contribute to our understanding of when multi-task learning through parameter sharing in deep neural networks leads to improvements over single-task learning. We focus on the setting of learning from loosely related tasks, for which no theoretical guarantees exist. We therefore approach the question empirically, studying which properties of datasets and single-task learning characteristics correlate with improvements from multi-task learning. We are the first to study this in a text classification setting and across more than 500 different task pairs. @@ -9383,7 +9383,7 @@ Analyzing Learned Representations of a Deep <fixed-case>ASR</fixed-case> Performance Prediction Model ZiedElloumi - LaurentBesacier + LaurentBesacier OlivierGalibert BenjaminLecouteux 9–15 @@ -9396,7 +9396,7 @@ Explaining non-linear Classifier Decisions within Kernel-based Deep Architectures DaniloCroce DanieleRossini - RobertoBasili + RobertoBasili 16–24 W18-5403 Nonlinear methods such as deep neural networks achieve state-of-the-art performances in several semantic NLP tasks. However epistemologically transparent decisions are not provided as for the limited interpretability of the underlying acquired neural models. In neural-based semantic inference tasks epistemological transparency corresponds to the ability of tracing back causal connections between the linguistic properties of a input instance and the produced classification output. In this paper, we propose the use of a methodology, called Layerwise Relevance Propagation, over linguistically motivated neural architectures, namely Kernel-based Deep Architectures (KDA), to guide argumentations and explanation inferences. In such a way, each decision provided by a KDA can be linked to real examples, linguistically related to the input instance: these can be used to motivate the network output. Quantitative analysis shows that richer explanations about the semantic and syntagmatic structures of the examples characterize more convincing arguments in two tasks, i.e. question classification and semantic role labeling. @@ -9405,7 +9405,7 @@ Nightmare at test time: How punctuation prevents parsers from generalizing - AndersSøgaard + AndersSøgaard Miryamde Lhoneux IsabelleAugenstein 25–29 @@ -9417,7 +9417,7 @@ Evaluating Textual Representations through Image Generation GrahamSpinks - Marie-FrancineMoens + Marie-FrancineMoens 30–39 W18-5405 We present a methodology for determining the quality of textual representations through the ability to generate images from them. Continuous representations of textual input are ubiquitous in modern Natural Language Processing techniques either at the core of machine learning algorithms or as the by-product at any given layer of a neural network. While current techniques to evaluate such representations focus on their performance on particular tasks, they don’t provide a clear understanding of the level of informational detail that is stored within them, especially their ability to represent spatial information. The central premise of this paper is that visual inspection or analysis is the most convenient method to quickly and accurately determine information content. Through the use of text-to-image neural networks, we propose a new technique to compare the quality of textual representations by visualizing their information content. The method is illustrated on a medical dataset where the correct representation of spatial information and shorthands are of particular importance. For four different well-known textual representations, we show with a quantitative analysis that some representations are consistently able to deliver higher quality visualizations of the information content. Additionally, we show that the quantitative analysis technique correlates with the judgment of a human expert evaluator in terms of alignment. @@ -9426,7 +9426,7 @@ On the Role of Text Preprocessing in Neural Network Architectures: An Evaluation Study on Text Categorization and Sentiment Analysis - JoseCamacho-Collados + JoseCamacho-Collados Mohammad TaherPilehvar 40–46 W18-5406 @@ -9437,7 +9437,7 @@ Jump to better conclusions: <fixed-case>SCAN</fixed-case> both left and right JasmijnBastings - MarcoBaroni + MarcoBaroni JasonWeston KyunghyunCho DouweKiela @@ -9462,7 +9462,7 @@ Linguistic representations in multi-task neural networks for ellipsis resolution OlaRønning DanielHardt - AndersSøgaard + AndersSøgaard 66–73 W18-5409 Sluicing resolution is the task of identifying the antecedent to a question ellipsis. Antecedents are often sentential constituents, and previous work has therefore relied on syntactic parsing, together with complex linguistic features. A recent model instead used partial parsing as an auxiliary task in sequential neural network architectures to inject syntactic information. We explore the linguistic information being brought to bear by such networks, both by defining subsets of the data exhibiting relevant linguistic characteristics, and by examining the internal representations of the network. Both perspectives provide evidence for substantial linguistic knowledge being deployed by the neural networks. @@ -9474,7 +9474,7 @@ ShunKiyono ShoTakase JunSuzuki - NaoakiOkazaki + NaoakiOkazaki KentaroInui MasaakiNagata 74–81 @@ -9486,8 +9486,8 @@ Rule induction for global explanation of trained models MadhumitaSushil - SimonŠuster - WalterDaelemans + SimonŠuster + WalterDaelemans 82–97 W18-5411 Understanding the behavior of a trained network and finding explanations for its outputs is important for improving the network’s performance and generalization ability, and for ensuring trust in automated systems. Several approaches have previously been proposed to identify and visualize the most important features by analyzing a trained network. However, the relations between different features and classes are lost in most cases. We propose a technique to induce sets of if-then-else rules that capture these relations to globally explain the predictions of a network. We first calculate the importance of the features in the trained network. We then weigh the original inputs with these feature importance scores, simplify the transformed input space, and finally fit a rule induction model to explain the model predictions. We find that the output rule-sets can explain the predictions of a neural network trained for 4-class text classification from the 20 newsgroups dataset to a macro-averaged F-score of 0.80. We make the code available at https://github.com/clips/interpret_with_rules. @@ -9498,7 +9498,7 @@ Can <fixed-case>LSTM</fixed-case> Learn to Capture Agreement? The Case of <fixed-case>B</fixed-case>asque ShauliRavfogel YoavGoldberg - FrancisTyers + FrancisTyers 98–107 W18-5412 Sequential neural networks models are powerful tools in a variety of Natural Language Processing (NLP) tasks. The sequential nature of these models raises the questions: to what extent can these models implicitly learn hierarchical structures typical to human language, and what kind of grammatical phenomena can they acquire? We focus on the task of agreement prediction in Basque, as a case study for a task that requires implicit understanding of sentence structure and the acquisition of a complex but consistent morphological system. Analyzing experimental results from two syntactic prediction tasks – verb number prediction and suffix recovery – we find that sequential models perform worse on agreement prediction in Basque than one might expect on the basis of a previous agreement prediction work in English. Tentative findings based on diagnostic classifiers suggest the network makes use of local heuristics as a proxy for the hierarchical structure of the sentence. We propose the Basque agreement prediction task as challenging benchmark for models that attempt to learn regularities in human language. @@ -9508,7 +9508,7 @@ Rearranging the Familiar: Testing Compositional Generalization in Recurrent Networks JoãoLoula - MarcoBaroni + MarcoBaroni BrendenLake 108–114 W18-5413 @@ -9519,7 +9519,7 @@ Evaluating the Ability of <fixed-case>LSTM</fixed-case>s to Learn Context-Free Grammars LuziSennhauser - RobertBerwick + RobertBerwick 115–124 W18-5414 While long short-term memory (LSTM) neural net architectures are designed to capture sequence information, human language is generally composed of hierarchical structures. This raises the question as to whether LSTMs can learn hierarchical structures. We explore this question with a well-formed bracket prediction task using two types of brackets modeled by an LSTM. Demonstrating that such a system is learnable by an LSTM is the first step in demonstrating that the entire class of CFLs is also learnable. We observe that the model requires exponential memory in terms of the number of characters and embedded depth, where a sub-linear memory should suffice. Still, the model does more than memorize the training input. It learns how to distinguish between relevant and irrelevant information. On the other hand, we also observe that the model does not generalize well. We conclude that LSTMs do not learn the relevant underlying context-free rules, suggesting the good overall performance is attained rather by an efficient way of evaluating nuisance variables. LSTMs are a way to quickly reach good results for many natural language tasks, but to understand and generate natural language one has to investigate other concepts that can make more direct use of natural language’s structural nature. @@ -9561,7 +9561,7 @@ <fixed-case>LISA</fixed-case>: Explaining Recurrent Neural Network Judgments via Layer-w<fixed-case>I</fixed-case>se Semantic Accumulation and Example to Pattern Transformation PankajGupta - HinrichSchütze + HinrichSchütze 154–164 W18-5418 Recurrent neural networks (RNNs) are temporal networks and cumulative in nature that have shown promising results in various natural language processing tasks. Despite their success, it still remains a challenge to understand their hidden behavior. In this work, we analyze and interpret the cumulative nature of RNN via a proposed technique named as Layer-wIse-Semantic-Accumulation (LISA) for explaining decisions and detecting the most likely (i.e., saliency) patterns that the network relies on while decision making. We demonstrate (1) LISA: “How an RNN accumulates or builds semantics during its sequential processing for a given text example and expected response” (2) Example2pattern: “How the saliency patterns look like for each category in the data according to the network in decision making”. We analyse the sensitiveness of RNNs about different inputs to check the increase or decrease in prediction scores and further extract the saliency patterns learned by the network. We employ two relation classification datasets: SemEval 10 Task 8 and TAC KBP Slot Filling to explain RNN predictions via the LISA and example2pattern. @@ -9572,7 +9572,7 @@ Analysing the potential of seq-to-seq models for incremental interpretation in task-oriented dialogue DieuwkeHupkes SanneBouwmeester - RaquelFernández + RaquelFernández 165–174 W18-5419 We investigate how encoder-decoder models trained on a synthetic dataset of task-oriented dialogues process disfluencies, such as hesitations and self-corrections. We find that, contrary to earlier results, disfluencies have very little impact on the task success of seq-to-seq models with attention. Using visualisations and diagnostic classifiers, we analyse the representations that are incrementally built by the model, and discover that models develop little to no awareness of the structure of disfluencies. However, adding disfluencies to the data appears to help the model create clearer representations overall, as evidenced by the attention patterns the different models exhibit. @@ -9615,7 +9615,7 @@ What do <fixed-case>RNN</fixed-case> Language Models Learn about Filler–Gap Dependencies? EthanWilcox - RogerLevy + RogerLevy TakashiMorita RichardFutrell 211–221 @@ -9637,7 +9637,7 @@ Closing Brackets with Recurrent Neural Networks NataliaSkachkova - ThomasTrost + ThomasTrost DietrichKlakow 232–239 W18-5425 @@ -9705,7 +9705,7 @@ An Analysis of Encoder Representations in Transformer-Based Machine Translation AlessandroRaganato - JörgTiedemann + JörgTiedemann 287–297 W18-5431 The attention mechanism is a successful technique in modern NLP, especially in tasks like machine translation. The recently proposed network architecture of the Transformer is based entirely on attention mechanisms and achieves new state of the art results in neural machine translation, outperforming other sequence-to-sequence models. However, so far not much is known about the internal properties of the model and the representations it learns to achieve that performance. To study this question, we investigate the information that is learned by the attention mechanism in Transformer models with different translation quality. We assess the representations of the encoder by extracting dependency relations based on self-attention weights, we perform four probing tasks to study the amount of syntactic and semantic captured information and we also test attention in a transfer learning scenario. Our analysis sheds light on the relative strengths and weaknesses of the various encoder representations. We observe that specific attention heads mark syntactic dependency relations and we can also confirm that lower layers tend to learn more about syntax while higher layers tend to encode more semantics. @@ -9717,7 +9717,7 @@ JohnnyWei KhiemPham BrendanO’Connor - BrianDillon + BrianDillon 298–305 W18-5432 Sequence to sequence (seq2seq) models are often employed in settings where the target output is natural language. However, the syntactic properties of the language generated from these models are not well understood. We explore whether such output belongs to a formal and realistic grammar, by employing the English Resource Grammar (ERG), a broad coverage, linguistically precise HPSG-based grammar of English. From a French to English parallel corpus, we analyze the parseability and grammatical constructions occurring in output from a seq2seq translation model. Over 93% of the model translations are parseable, suggesting that it learns to generate conforming to a grammar. The model has trouble learning the distribution of rarer syntactic rules, and we pinpoint several constructions that differentiate translations between the references and our model. @@ -9778,7 +9778,7 @@ Interpretable Textual Neuron Representations for <fixed-case>NLP</fixed-case> NinaPoerner BenjaminRoth - HinrichSchütze + HinrichSchütze 325–327 W18-5437 Input optimization methods, such as Google Deep Dream, create interpretable representations of neurons for computer vision DNNs. We propose and evaluate ways of transferring this technology to NLP. Our results suggest that gradient ascent with a gumbel softmax layer produces n-gram representations that outperform naive corpus search in terms of target neuron activation. The representations highlight differences in syntax awareness between the language and visual models of the Imaginet architecture. @@ -9835,7 +9835,7 @@ Interpretable Word Embedding Contextualization Kyoung-RokJang - Sung-HyonMyaeng + Sung-HyonMyaeng Sang-BumKim 341–343 W18-5442 @@ -9846,7 +9846,7 @@ State Gradients for <fixed-case>RNN</fixed-case> Memory Analysis LyanVerwimp - HugoVan hamme + HugoVan hamme VincentRenkens PatrickWambacq 344–346 @@ -9881,7 +9881,7 @@ JulianMichael FelixHill OmerLevy - SamuelBowman + SamuelBowman 353–355 W18-5446 Human ability to understand language is general, flexible, and robust. In contrast, most NLU models above the word level are designed for a specific task and struggle with out-of-domain data. If we aspire to develop models with understanding beyond the detection of superficial correspondences between inputs and outputs, then it is critical to develop a unified model that can execute a range of linguistic tasks across different domains. To facilitate research in this direction, we present the General Language Understanding Evaluation (GLUE, gluebenchmark.com): a benchmark of nine diverse NLU tasks, an auxiliary dataset for probing models for understanding of specific linguistic phenomena, and an online platform for evaluating and comparing models. For some benchmark tasks, training data is plentiful, but for others it is limited or does not match the genre of the test set. GLUE thus favors models that can represent linguistic knowledge in a way that facilitates sample-efficient learning and effective knowledge-transfer across tasks. While none of the datasets in GLUE were created from scratch for the benchmark, four of them feature privately-held test data, which is used to ensure that the benchmark is used fairly. We evaluate baselines that use ELMo (Peters et al., 2018), a powerful transfer learning technique, as well as state-of-the-art sentence representation models. The best models still achieve fairly low absolute scores. Analysis with our diagnostic dataset yields similarly weak performance over all phenomena tested, with some exceptions. @@ -9901,7 +9901,7 @@ Language Modeling Teaches You More than Translation Does: Lessons Learned Through Auxiliary Syntactic Task Analysis KellyZhang - SamuelBowman + SamuelBowman 359–361 W18-5448 Recently, researchers have found that deep LSTMs trained on tasks like machine translation learn substantial syntactic and semantic information about their input sentences, including part-of-speech. These findings begin to shed light on why pretrained representations, like ELMo and CoVe, are so beneficial for neural language understanding models. We still, though, do not yet have a clear understanding of how the choice of pretraining objective affects the type of linguistic information that models learn. With this in mind, we compare four objectives—language modeling, translation, skip-thought, and autoencoding—on their ability to induce syntactic and part-of-speech information, holding constant the quantity and genre of the training data, as well as the LSTM architecture. @@ -9924,7 +9924,7 @@ Interpretable Structure Induction via Sparse Attention BenPeters VladNiculae - André F. T.Martins + André F. T.Martins 365–367 W18-5450 Neural network methods are experiencing wide adoption in NLP, thanks to their empirical performance on many tasks. Modern neural architectures go way beyond simple feedforward and recurrent models: they are complex pipelines that perform soft, differentiable computation instead of discrete logic. The price of such soft computing is the introduction of dense dependencies, which make it hard to disentangle the patterns that trigger a prediction. Our recent work on sparse and structured latent computation presents a promising avenue for enhancing interpretability of such neural pipelines. Through this extended abstract, we aim to discuss and explore the potential and impact of our methods. @@ -9938,7 +9938,7 @@ MichaelBehrisch AdamPerer HanspeterPfister - AlexanderRush + AlexanderRush 368–370 W18-5451 Neural attention-based sequence-to-sequence models (seq2seq) (Sutskever et al., 2014; Bahdanau et al., 2014) have proven to be accurate and robust for many sequence prediction tasks. They have become the standard approach for automatic translation of text, at the cost of increased model complexity and uncertainty. End-to-end trained neural models act as a black box, which makes it difficult to examine model decisions and attribute errors to a specific part of a model. The highly connected and high-dimensional internal representations pose a challenge for analysis and visualization tools. The development of methods to understand seq2seq predictions is crucial for systems in production settings, as mistakes involving language are often very apparent to human readers. For instance, a widely publicized incident resulted from a translation system mistakenly translating “good morning” into “attack them” leading to a wrongful arrest (Hern, 2017). @@ -9949,7 +9949,7 @@ Grammar Induction with Neural Language Models: An Unusual Replication Phu MonHtut KyunghyunCho - SamuelBowman + SamuelBowman 371–373 W18-5452 Grammar induction is the task of learning syntactic structure without the expert-labeled treebanks (Charniak and Carroll, 1992; Klein and Manning, 2002). Recent work on latent tree learning offers a new family of approaches to this problem by inducing syntactic structure using the supervision from a downstream NLP task (Yogatama et al., 2017; Maillard et al., 2017; Choi et al., 2018). In a recent paper published at ICLR, Shen et al. (2018) introduce such a model and report near state-of-the-art results on the target task of language modeling, and the first strong latent tree learning result on constituency parsing. During the analysis of this model, we discover issues that make the original results hard to trust, including tuning and even training on what is effectively the test set. Here, we analyze the model under different configurations to understand what it learns and to identify the conditions under which it succeeds. We find that this model represents the first empirical success for neural network latent tree learning, and that neural language modeling warrants further study as a setting for grammar induction. @@ -9981,7 +9981,7 @@ End-to-end Image Captioning Exploits Distributional Similarity in Multimodal Space - Pranava SwaroopMadhyastha + Pranava SwaroopMadhyastha JosiahWang LuciaSpecia 381–383 @@ -10037,8 +10037,8 @@ The Data Challenge in Misinformation Detection: Source Reputation vs. Content Veracity - FatemehTorabi Asr - MaiteTaboada + FatemehTorabi Asr + MaiteTaboada 10–15 W18-5502 Misinformation detection at the level of full news articles is a text classification problem. Reliably labeled data in this domain is rare. Previous work relied on news articles collected from so-called “reputable” and “suspicious” websites and labeled accordingly. We leverage fact-checking websites to collect individually-labeled news articles with regard to the veracity of their content and use this data to test the cross-domain generalization of a classifier trained on bigger text collections but labeled according to source reputation. Our results suggest that reputation-based classification is not sufficient for predicting the veracity level of the majority of news articles, and that the system performance on different test datasets depends on topic distribution. Therefore collecting well-balanced and carefully-assessed training data is a priority for developing robust misinformation detection systems. @@ -10049,7 +10049,7 @@ Crowdsourcing Semantic Label Propagation in Relation Classification AncaDumitrache LoraAroyo - ChrisWelty + ChrisWelty 16–21 W18-5503 Distant supervision is a popular method for performing relation extraction from text that is known to produce noisy labels. Most progress in relation extraction and classification has been made with crowdsourced corrections to distant-supervised labels, and there is evidence that indicates still more would be better. In this paper, we explore the problem of propagating human annotation signals gathered for open-domain relation classification through the CrowdTruth methodology for crowdsourcing, that captures ambiguity in annotations by measuring inter-annotator disagreement. Our approach propagates annotations to sentences that are similar in a low dimensional embedding space, expanding the number of labels by two orders of magnitude. Our experiments show significant improvement in a sentence-level multi-class relation classifier. @@ -10059,8 +10059,8 @@ Retrieve and Re-rank: A Simple and Effective <fixed-case>IR</fixed-case> Approach to Simple Question Answering over Knowledge Graphs VishalGupta - ManojChinnakotla - ManishShrivastava + ManojChinnakotla + ManishShrivastava 22–27 W18-5504 SimpleQuestions is a commonly used benchmark for single-factoid question answering (QA) over Knowledge Graphs (KG). Existing QA systems rely on various components to solve different sub-tasks of the problem (such as entity detection, entity linking, relation prediction and evidence integration). In this work, we propose a different approach to the problem and present an information retrieval style solution for it. We adopt a two-phase approach: candidate generation and candidate re-ranking to answer questions. We propose a Triplet-Siamese-Hybrid CNN (TSHCNN) to re-rank candidate answers. Our approach achieves an accuracy of 80% which sets a new state-of-the-art on the SimpleQuestions dataset. @@ -10174,7 +10174,7 @@ Affordance Extraction and Inference based on Semantic Role Labeling DanielLoureiro - AlípioJorge + AlípioJorge 91–96 W18-5514 Common-sense reasoning is becoming increasingly important for the advancement of Natural Language Processing. While word embeddings have been very successful, they cannot explain which aspects of ‘coffee’ and ‘tea’ make them similar, or how they could be related to ‘shop’. In this paper, we propose an explicit word representation that builds upon the Distributional Hypothesis to represent meaning from semantic roles, and allow inference of relations from their meshing, as supported by the affordance-based Indexical Hypothesis. We find that our model improves the state-of-the-art on unsupervised word similarity tasks while allowing for direct inference of new relations from the same vector space. @@ -10231,7 +10231,7 @@ <fixed-case>SIRIUS</fixed-case>-<fixed-case>LTG</fixed-case>: An Entity Linking Approach to Fact Extraction and Verification FarhadNooralahzadeh - LiljaØvrelid + LiljaØvrelid 119–123 W18-5519 This article presents the SIRIUS-LTG system for the Fact Extraction and VERification (FEVER) Shared Task. It consists of three components: 1) Wikipedia Page Retrieval: First we extract the entities in the claim, then we find potential Wikipedia URI candidates for each of the entities using a SPARQL query over DBpedia 2) Sentence selection: We investigate various techniques i.e. Smooth Inverse Frequency (SIF), Word Mover’s Distance (WMD), Soft-Cosine Similarity, Cosine similarity with unigram Term Frequency Inverse Document Frequency (TF-IDF) to rank sentences by their similarity to the claim. 3) Textual Entailment: We compare three models for the task of claim classification. We apply a Decomposable Attention (DA) model (Parikh et al., 2016), a Decomposed Graph Entailment (DGE) model (Khot et al., 2018) and a Gradient-Boosted Decision Trees (TalosTree) model (Sean et al., 2017) for this task. The experiments show that the pipeline with simple Cosine Similarity using TFIDF in sentence selection along with DA model as labelling model achieves the best results on the development set (F1 evidence: 32.17, label accuracy: 59.61 and FEVER score: 0.3778). Furthermore, it obtains 30.19, 48.87 and 36.55 in terms of F1 evidence, label accuracy and FEVER score, respectively, on the test set. Our system ranks 15th among 23 participants in the shared task prior to any human-evaluation of the evidence. @@ -10297,7 +10297,7 @@ Team <fixed-case>SWEEP</fixed-case>er: Joint Sentence Extraction and Fact Checking with Pointer Networks ChristopherHidey - MonaDiab + MonaDiab 150–155 W18-5525 Many tasks such as question answering and reading comprehension rely on information extracted from unreliable sources. These systems would thus benefit from knowing whether a statement from an unreliable source is correct. We present experiments on the FEVER (Fact Extraction and VERification) task, a shared task that involves selecting sentences from Wikipedia and predicting whether a claim is supported by those sentences, refuted, or there is not enough information. Fact checking is a task that benefits from not only asserting or disputing the veracity of a claim but also finding evidence for that position. As these tasks are dependent on each other, an ideal model would consider the veracity of the claim when finding evidence and also find only the evidence that is relevant. We thus jointly model sentence extraction and verification on the FEVER shared task. Among all participants, we ranked 5th on the blind test set (prior to any additional human evaluation of the evidence). @@ -10308,7 +10308,7 @@ <fixed-case>QED</fixed-case>: A fact verification system for the <fixed-case>FEVER</fixed-case> shared task JacksonLuken NanjiangJiang - Marie-Catherinede Marneffe + Marie-Catherinede Marneffe 156–160 W18-5526 This paper describes our system submission to the 2018 Fact Extraction and VERification (FEVER) shared task. The system uses a heuristics-based approach for evidence extraction and a modified version of the inference model by Parikh et al. (2016) for classification. Our process is broken down into three modules: potentially relevant documents are gathered based on key phrases in the claim, then any possible evidence sentences inside those documents are extracted, and finally our classifier discards any evidence deemed irrelevant and uses the remaining to classify the claim’s veracity. Our system beats the shared task baseline by 12% and is successful at finding correct evidence (evidence retrieval F1 of 62.5% on the development set). @@ -10319,7 +10319,7 @@ Team <fixed-case>UMBC</fixed-case>-<fixed-case>FEVER</fixed-case> : Claim verification using Semantic Lexical Resources AnkurPadia FrancisFerraro - TimFinin + TimFinin 161–165 W18-5527 We describe our system used in the 2018 FEVER shared task. The system employed a frame-based information retrieval approach to select Wikipedia sentences providing evidence and used a two-layer multilayer perceptron to classify a claim as correct or not. Our submission achieved a score of 0.3966 on the Evidence F1 metric with accuracy of 44.79%, and FEVER score of 0.2628 F1 points. @@ -10342,7 +10342,7 @@ Proceedings of the Ninth International Workshop on Health Text Mining and Information Analysis W18-56 - AlbertoLavelli + AlbertoLavelli Anne-LyseMinard FabioRinaldi Association for Computational Linguistics @@ -10383,9 +10383,9 @@ Revisiting neural relation classification in clinical notes with external information - SimonŠuster + SimonŠuster MadhumitaSushil - WalterDaelemans + WalterDaelemans 22–28 W18-5603 Recently, segment convolutional neural networks have been proposed for end-to-end relation extraction in the clinical domain, achieving results comparable to or outperforming the approaches with heavy manual feature engineering. In this paper, we analyze the errors made by the neural classifier based on confusion matrices, and then investigate three simple extensions to overcome its limitations. We find that including ontological association between drugs and problems, and data-induced association between medical concepts does not reliably improve the performance, but that large gains are obtained by the incorporation of semantic classes to capture relation triggers. @@ -10395,7 +10395,7 @@ Supervised Machine Learning for Extractive Query Based Summarisation of Biomedical Data MandeepKaur - DiegoMollá + DiegoMollá 29–37 W18-5604 The automation of text summarisation of biomedical publications is a pressing need due to the plethora of information available online. This paper explores the impact of several supervised machine learning approaches for extracting multi-document summaries for given queries. In particular, we compare classification and regression approaches for query-based extractive summarisation using data provided by the BioASQ Challenge. We tackled the problem of annotating sentences for training classification systems and show that a simple annotation approach outperforms regression-based summarisation. @@ -10406,7 +10406,7 @@ Comparing <fixed-case>CNN</fixed-case> and <fixed-case>LSTM</fixed-case> character-level embeddings in <fixed-case>B</fixed-case>i<fixed-case>LSTM</fixed-case>-<fixed-case>CRF</fixed-case> models for chemical and disease named entity recognition ZenanZhai Dat QuocNguyen - KarinVerspoor + KarinVerspoor 38–43 W18-5605 We compare the use of LSTM-based and CNN-based character-level word embeddings in BiLSTM-CRF models to approach chemical and disease named entity recognition (NER) tasks. Empirical results over the BioCreative V CDR corpus show that the use of either type of character-level word embeddings in conjunction with the BiLSTM-CRF models leads to comparable state-of-the-art performance. However, the models using CNN-based character-level word embeddings have a computational performance advantage, increasing training time over word-based models by 25% while the LSTM-based character-level word embeddings more than double the required training time. @@ -10415,14 +10415,14 @@ Deep learning for language understanding of mental health concepts derived from Cognitive Behavioural Therapy - Lina M.Rojas-Barahona + Lina M.Rojas-Barahona Bo-HsiangTseng YinpeiDai ClareMansfield OsmanRamadan StefanUltes MichaelCrawford - MilicaGašić + MilicaGašić 44–54 W18-5606 In recent years, we have seen deep learning and distributed representations of words and sentences make impact on a number of natural language processing tasks, such as similarity, entailment and sentiment analysis. Here we introduce a new task: understanding of mental health concepts derived from Cognitive Behavioural Therapy (CBT). We define a mental health ontology based on the CBT principles, annotate a large corpus where this phenomena is exhibited and perform understanding using deep learning and distributed representations. Our results show that the performance of deep learning models combined with word embeddings or sentence embeddings significantly outperform non-deep-learning models in this difficult task. This understanding module will be an essential component of a statistical dialogue system delivering therapy. @@ -10432,7 +10432,7 @@ Investigating the Challenges of Temporal Relation Extraction from Clinical Text DianaGalvan - NaoakiOkazaki + NaoakiOkazaki KojiMatsuda KentaroInui 55–64 @@ -10457,7 +10457,7 @@ Unsupervised Identification of Study Descriptors in Toxicology Research: An Experimental Study DrahomiraHerrmannova - StevenYoung + StevenYoung RobertPatton ChristopherStahl NicoleKleinstreuer @@ -10498,7 +10498,7 @@ Automatically Detecting the Position and Type of Psychiatric Evaluation Report Sections DeyaBanisakher NaphtaliRishe - Mark A.Finlayson + Mark A.Finlayson 101–110 W18-5612 Psychiatric evaluation reports represent a rich and still mostly-untapped source of information for developing systems for automatic diagnosis and treatment of mental health problems. These reports contain free-text structured within sections using a convention of headings. We present a model for automatically detecting the position and type of different psychiatric evaluation report sections. We developed this model using a corpus of 150 sample reports that we gathered from the Web, and used sentences as a processing unit while section headings were used as labels of section type. From these labels we generated a unified hierarchy of labels of section types, and then learned n-gram models of the language found in each section. To model conventions for section order, we integrated these n-gram models with a Hierarchical Hidden Markov Model (HHMM) representing the probabilities of observed section orders found in the corpus, and then used this HHMM n-gram model in a decoding framework to infer the most likely section boundaries and section types for documents with their section labels removed. We evaluated our model over two tasks, namely, identifying section boundaries and identifying section types and orders. Our model significantly outperformed baselines for each task with an F1 of 0.88 for identifying section types, and a 0.26 WindowDiff (Wd) and 0.20 and (Pk) scores, respectively, for identifying section boundaries. @@ -10510,7 +10510,7 @@ TarakaRama PålBrekke ØysteinNytrø - LiljaØvrelid + LiljaØvrelid 111–121 W18-5613 In this article, we describe the development of annotation guidelines for family history information in Norwegian clinical text. We make use of incrementally developed synthetic clinical text describing patients’ family history relating to cases of cardiac disease and present a general methodology which integrates the synthetically produced clinical statements and guideline development. We analyze inter-annotator agreement based on the developed guidelines and present results from experiments aimed at evaluating the validity and applicability of the annotated corpus using machine learning techniques. The resulting annotated corpus contains 477 sentences and 6030 tokens. Both the annotation guidelines and the annotated corpus are made freely available and as such constitutes the first publicly available resource of Norwegian clinical text. @@ -10534,8 +10534,8 @@ NicholasMiller KirstenBolton PhilipCawkwell - MarieMeteer - JamesPustejovsky + MarieMeteer + JamesPustejovsky MeiHua-Hall 129–138 W18-5615 @@ -10573,7 +10573,7 @@ In-domain Context-aware Token Embeddings Improve Biomedical Named Entity Recognition - GolnarSheikhshabbafghi + GolnarSheikhshabbafghi InancBirol AnoopSarkar 160–164 @@ -10585,7 +10585,7 @@ Self-training improves Recurrent Neural Networks performance for Temporal Relation Extraction ChenLin - TimothyMiller + TimothyMiller DmitriyDligach HadiAmiri StevenBethard @@ -10615,7 +10615,7 @@ AndréBittar RinaDutta RashmiPatel - RobertStewart + RobertStewart SumithraVelupillai 183–192 W18-5621 @@ -10628,7 +10628,7 @@ JulienTourille MatthieuDoutreligne OlivierFerret - AurélieNévéol + AurélieNévéol NicolasParis XavierTannier 193–203 @@ -10642,7 +10642,7 @@ YuhaoZhang Daisy YiDing TianpeiQian - Christopher D.Manning + Christopher D.Manning Curtis P.Langlotz 204–213 W18-5623 @@ -10708,7 +10708,7 @@ A Methodology for Evaluating Interaction Strategies of Task-Oriented Conversational Agents MarcoGuerini SaraFalcone - BernardoMagnini + BernardoMagnini 24–32 W18-5704 In task-oriented conversational agents, more attention has been usually devoted to assessing task effectiveness, rather than to how the task is achieved. However, conversational agents are moving towards more complex and human-like interaction capabilities (e.g. the ability to use a formal/informal register, to show an empathetic behavior), for which standard evaluation methodologies may not suffice. In this paper, we provide a novel methodology to assess - in a completely controlled way - the impact on the quality of experience of agent’s interaction strategies. The methodology is based on a within subject design, where two slightly different transcripts of the same interaction with a conversational agent are presented to the user. Through a series of pilot experiments we prove that this methodology allows fast and cheap experimentation/evaluation, focusing on aspects that are overlooked by current methods. @@ -10749,7 +10749,7 @@ Data Augmentation for Neural Online Chats Response Selection WenchaoDu - AlanBlack + AlanBlack 52–58 W18-5708 Data augmentation seeks to manipulate the available data for training to improve the generalization ability of models. We investigate two data augmentation proxies, permutation and flipping, for neural dialog response selection task on various models over multiple datasets, including both Chinese and English languages. Different from standard data augmentation techniques, our method combines the original and synthesized data for prediction. Empirical results show that our approach can gain 1 to 3 recall-at-1 points over baseline models in both full-scale and small-scale settings. @@ -10774,7 +10774,7 @@ IgnacioAguado AndreeaHossmann MichaelBaeriswyl - ClaudiuMusat + ClaudiuMusat 67–73 W18-5710 Most of the world’s data is stored in relational databases. Accessing these requires specialized knowledge of the Structured Query Language (SQL), putting them out of the reach of many people. A recent research thread in Natural Language Processing (NLP) aims to alleviate this problem by automatically translating natural language questions into SQL queries. While the proposed solutions are a great start, they lack robustness and do not easily generalize: the methods require high quality descriptions of the database table columns, and the most widely used training dataset, WikiSQL, is heavily biased towards using those descriptions as part of the questions. In this work, we propose solutions to both problems: we entirely eliminate the need for column descriptions, by relying solely on their contents, and we augment the WikiSQL dataset by paraphrasing column names to reduce bias. We show that the accuracy of existing methods drops when trained on our augmented, column-agnostic dataset, and that our own method reaches state of the art accuracy, while relying on column contents only. @@ -10784,7 +10784,7 @@ Exploring Named Entity Recognition As an Auxiliary Task for Slot Filling in Conversational Language Understanding SamuelLouvan - BernardoMagnini + BernardoMagnini 74–80 W18-5711 Slot filling is a crucial task in the Natural Language Understanding (NLU) component of a dialogue system. Most approaches for this task rely solely on the domain-specific datasets for training. We propose a joint model of slot filling and Named Entity Recognition (NER) in a multi-task learning (MTL) setup. Our experiments on three slot filling datasets show that using NER as an auxiliary task improves slot filling performance and achieve competitive performance compared with state-of-the-art. In particular, NER is effective when supervised at the lower layer of the model. For low-resource scenarios, we found that MTL is effective for one dataset. @@ -10794,7 +10794,7 @@ Why are Sequence-to-Sequence Models So Dull? Understanding the Low-Diversity Problem of Chatbots ShaojieJiang - Maartende Rijke + Maartende Rijke 81–86 W18-5712 Diversity is a long-studied topic in information retrieval that usually refers to the requirement that retrieved results should be non-repetitive and cover different aspects. In a conversational setting, an additional dimension of diversity matters: an engaging response generation system should be able to output responses that are diverse and interesting. Sequence-to-sequence (Seq2Seq) models have been shown to be very effective for response generation. However, dialogue responses generated by Seq2Seq models tend to have low diversity. In this paper, we review known sources and existing approaches to this low-diversity problem. We also identify a source of low diversity that has been little studied so far, namely model over-confidence. We sketch several directions for tackling model over-confidence and, hence, the low-diversity problem, including confidence penalties and label smoothing. @@ -10817,7 +10817,7 @@ Proceedings of the Fifteenth Workshop on Computational Research in Phonetics, Phonology, and Morphology W18-58 - SandraKuebler + SandraKuebler GarrettNicolai Association for Computational Linguistics
Brussels, Belgium
@@ -10851,7 +10851,7 @@
Acoustic Word Disambiguation with Phonogical Features in <fixed-case>D</fixed-case>anish <fixed-case>ASR</fixed-case> - Andreas SøeborgKirkedal + Andreas SøeborgKirkedal 21–31 W18-5803 Phonological features can indicate word class and we can use word class information to disambiguate both homophones and homographs in automatic speech recognition (ASR). We show Danish stød can be predicted from speech and used to improve ASR. We discover which acoustic features contain the signal of stød, how to use these features to predict stød and how we can make use of stød and stødpredictive acoustic features to improve overall ASR accuracy and decoding speed. In the process, we discover acoustic features that are novel to the phonetic characterisation of stød. @@ -10861,11 +10861,11 @@ <fixed-case>A</fixed-case>daptor <fixed-case>G</fixed-case>rammars for the Linguist: Word Segmentation Experiments for Very Low-Resource Languages PierreGodard - LaurentBesacier + LaurentBesacier FrançoisYvon - MartineAdda-Decker - GillesAdda - HélèneMaynard + MartineAdda-Decker + GillesAdda + HélèneMaynard AnnieRialland 32–42 W18-5804 @@ -10907,7 +10907,7 @@ Automatically Tailoring Unsupervised Morphological Segmentation to the Language RamyEskander - OwenRambow + OwenRambow SmarandaMuresan 78–83 W18-5808 @@ -10968,8 +10968,8 @@ On Hapax Legomena and Morphological Productivity - JanetPierrehumbert - RamonGranell + JanetPierrehumbert + RamonGranell 125–130 W18-5814 Quantifying and predicting morphological productivity is a long-standing challenge in corpus linguistics and psycholinguistics. The same challenge reappears in natural language processing in the context of handling words that were not seen in the training set (out-of-vocabulary, or OOV, words). Prior research showed that a good indicator of the productivity of a morpheme is the number of words involving it that occur exactly once (the hapax legomena). A technical connection was adduced between this result and Good-Turing smoothing, which assigns probability mass to unseen events on the basis of the simplifying assumption that word frequencies are stationary. In a large-scale study of 133 affixes in Wikipedia, we develop evidence that success in fact depends on tapping the frequency range in which the assumptions of Good-Turing are violated. @@ -10979,7 +10979,7 @@ A Morphological Analyzer for <fixed-case>S</fixed-case>hipibo-Konibo RonaldCardenas - DanielZeman + DanielZeman 131–139 W18-5815 We present a fairly complete morphological analyzer for Shipibo-Konibo, a low-resourced native language spoken in the Amazonian region of Peru. We resort to the robustness of finite-state systems in order to model the complex morphosyntax of the language. Evaluation over raw corpora shows promising coverage of grammatical phenomena, limited only by the scarce lexicon. We make this tool freely available so as to aid the production of annotated corpora and impulse further research in native languages of Peru. @@ -11012,7 +11012,7 @@ Phonological Features for Morphological Inflection AdamWiemerslage - MiikkaSilfverberg + MiikkaSilfverberg MansHulden 161–166 W18-5818 @@ -11034,10 +11034,10 @@ Proceedings of the 2018 EMNLP Workshop SMM4H: The 3rd Social Media Mining for Health Applications Workshop & Shared Task W18-59 - GracielaGonzalez-Hernandez - DavyWeissenbacher + GracielaGonzalez-Hernandez + DavyWeissenbacher AbeedSarker - MichaelPaul + MichaelPaul Association for Computational Linguistics
Brussels, Belgium
October @@ -11119,7 +11119,7 @@ SahilChopra SimraShahid LaibaMehnaz - RajivShah + RajivShah 27–31 W18-5907 Social media-based text mining in healthcare has received special attention in recent times due to the enhanced accessibility of social media sites like Twitter. The increasing trend of spreading important information in distress can help patients reach out to prospective blood donors in a time bound manner. However such manual efforts are mostly inefficient due to the limited network of a user. In a novel step to solve this problem, we present an annotated Emergency Blood Donation Request (EBDR) dataset to classify tweets referring to the necessity of urgent blood donation requirement. Additionally, we also present an automated feature-based SVM classification technique that can help selective EBDR tweets reach relevant personals as well as medical authorities. Our experiments also present a quantitative evidence that linguistic along with handcrafted heuristics can act as the most representative set of signals this task with an accuracy of 97.89%. @@ -11128,7 +11128,7 @@
Dealing with Medication Non-Adherence Expressions in <fixed-case>T</fixed-case>witter - TakeshiOnishi + TakeshiOnishi DavyWeissenbacher AriKlein KarenO’Connor @@ -11165,10 +11165,10 @@ Shot Or Not: Comparison of <fixed-case>NLP</fixed-case> Approaches for Vaccination Behaviour Detection AdityaJoshi - XiangDai + XiangDai SarvnazKarimi RossSparks - CécileParis + CécileParis C RainaMacIntyre 43–47 W18-5911 @@ -11210,7 +11210,7 @@ Automatic Identification of Drugs and Adverse Drug Reaction Related Tweets Segun TaofeekAroyehun - AlexanderGelbukh + AlexanderGelbukh 54–55 W18-5915 We describe our submissions to the Third Social Media Mining for Health Applications Shared Task. We participated in two tasks (tasks 1 and 3). For both tasks, we experimented with a traditional machine learning model (Naive Bayes Support Vector Machine (NBSVM)), deep learning models (Convolutional Neural Networks (CNN), Long Short-Term Memory (LSTM), and Bidirectional LSTM (BiLSTM)), and the combination of deep learning model with SVM. We observed that the NBSVM reaches superior performance on both tasks on our development split of the training data sets. Official result for task 1 based on the blind evaluation data shows that the predictions of the NBSVM achieved our team’s best F-score of 0.910 which is above the average score received by all submissions to the task. On task 3, the combination of of BiLSTM and SVM gives our best F-score for the positive class of 0.394. @@ -11281,7 +11281,7 @@ Proceedings of the Second Workshop on Universal Dependencies (UDW 2018) W18-60 - Marie-Catherinede Marneffe + Marie-Catherinede Marneffe TeresaLynn SebastianSchuster Association for Computational Linguistics @@ -11298,7 +11298,7 @@ Assessing the Impact of Incremental Error Detection and Correction. A Case Study on the <fixed-case>I</fixed-case>talian <fixed-case>U</fixed-case>niversal <fixed-case>D</fixed-case>ependency Treebank ChiaraAlzetta FeliceDell’Orletta - SimonettaMontemagni + SimonettaMontemagni MariaSimi GiuliaVenturi 1–7 @@ -11329,11 +11329,11 @@ Expletives in <fixed-case>U</fixed-case>niversal <fixed-case>D</fixed-case>ependency Treebanks GosseBouma - JanHajic + JanHajic DagHaug JoakimNivre Per ErikSolberg - LiljaØvrelid + LiljaØvrelid 18–26 W18-6003 Although treebanks annotated according to the guidelines of Universal Dependencies (UD) now exist for many languages, the goal of annotating the same phenomena in a cross-linguistically consistent fashion is not always met. In this paper, we investigate one phenomenon where we believe such consistency is lacking, namely expletive elements. Such elements occupy a position that is structurally associated with a core argument (or sometimes an oblique dependent), yet are non-referential and semantically void. Many UD treebanks identify at least some elements as expletive, but the range of phenomena differs between treebanks, even for closely related languages, and sometimes even for different treebanks for the same language. In this paper, we present criteria for identifying expletives that are applicable across languages and compatible with the goals of UD, give an overview of expletives as found in current UD treebanks, and present recommendations for the annotation of expletives so that more consistent annotation can be achieved in future releases. @@ -11342,10 +11342,10 @@ Challenges in Converting the Index <fixed-case>T</fixed-case>homisticus Treebank into <fixed-case>U</fixed-case>niversal <fixed-case>D</fixed-case>ependencies - Flavio MassimilianoCecchini + Flavio MassimilianoCecchini MarcoPassarotti PaolaMarongiu - DanielZeman + DanielZeman 27–36 W18-6004 This paper describes the changes applied to the original process used to convert the Index Thomisticus Treebank, a corpus including texts in Medieval Latin by Thomas Aquinas, into the annotation style of Universal Dependencies. The changes are made both to harmonise the Universal Dependencies version of the Index Thomisticus Treebank with the two other available Latin treebanks and to fix errors and inconsistencies resulting from the original process. The paper details the treatment of different issues in PoS tagging, lemmatisation and assignment of dependency relations. Finally, it assesses the quality of the new conversion process by providing an evaluation against a gold standard. @@ -11367,7 +11367,7 @@ KiraDroganova FilipGinter JennaKanerva - DanielZeman + DanielZeman 47–54 W18-6006 In this paper, we focus on parsing rare and non-trivial constructions, in particular ellipsis. We report on several experiments in enrichment of training data for this specific construction, evaluated on five languages: Czech, English, Finnish, Russian and Slovak. These data enrichment methods draw upon self-training and tri-training, combined with a stratified sampling method mimicking the structural complexity of the original treebank. In addition, using these same methods, we also demonstrate small improvements over the CoNLL-17 parsing shared task winning system for four of the five languages, not only restricted to the elliptical constructions. @@ -11402,8 +11402,8 @@ MasayukiAsahara Jena D.Hwang YusukeMiyao - Jinho D.Choi - YujiMatsumoto + Jinho D.Choi + YujiMatsumoto 75–84 W18-6009 This paper discusses the representation of coordinate structures in the Universal Dependencies framework for two head-final languages, Japanese and Korean. UD applies a strict principle that makes the head of coordination the left-most conjunct. However, the guideline may produce syntactic trees which are difficult to accept in head-final languages. This paper describes the status in the current Japanese and Korean corpora and proposes alternative designs suitable for these languages. @@ -11421,8 +11421,8 @@ Marrying <fixed-case>U</fixed-case>niversal <fixed-case>D</fixed-case>ependencies and <fixed-case>U</fixed-case>niversal <fixed-case>M</fixed-case>orphology - Arya D.McCarthy - MiikkaSilfverberg + Arya D.McCarthy + MiikkaSilfverberg RyanCotterell MansHulden DavidYarowsky @@ -11438,7 +11438,7 @@ PaolaMarongiu FilipGinter JennaKanerva - SimonettaMontemagni + SimonettaMontemagni SebastianSchuster MariaSimi 102–107 @@ -11473,7 +11473,7 @@ The First <fixed-case>K</fixed-case>omi-<fixed-case>Z</fixed-case>yrian <fixed-case>U</fixed-case>niversal <fixed-case>D</fixed-case>ependencies Treebanks NikoPartanen RogierBlokland - KyungTaeLim + KyungTaeLim ThierryPoibeau MichaelRießler 126–132 @@ -11495,7 +11495,7 @@ Multi-source synthetic treebank creation for improved cross-lingual dependency parsing - FrancisTyers + FrancisTyers MariyaSheyanova AleksandraMartynova PavelStepachev @@ -11511,11 +11511,11 @@ AlonsoVasquez RenzoEgo Aguirre CandyAngulo - JohnMiller + JohnMiller ClaudiaVillanueva - ŽeljkoAgić + ŽeljkoAgić RobertoZariquiey - ArturoOncevay + ArturoOncevay 151–161 W18-6018 We present an initial version of the Universal Dependencies (UD) treebank for Shipibo-Konibo, the first South American, Amazonian, Panoan and Peruvian language with a resource built under UD. We describe the linguistic aspects of how the tagset was defined and the treebank was annotated; in addition we present our specific treatment of linguistic units called clitics. Although the treebank is still under development, it allowed us to perform a typological comparison against Spanish, the predominant language in Peru, and dependency syntax parsing experiments in both monolingual and cross-lingual approaches. @@ -11569,7 +11569,7 @@ W18-61 WeiXu AlanRitter - TimBaldwin + TimBaldwin AfshinRahimi Association for Computational Linguistics
Brussels, Belgium
@@ -11585,7 +11585,7 @@ Inducing a lexicon of sociolinguistic variables from code-mixed text PhilippaShoemark JamesKirby - SharonGoldwater + SharonGoldwater 1–6 W18-6101 Sociolinguistics is often concerned with how variants of a linguistic item (e.g., nothing vs. nothin’) are used by different groups or in different situations. We introduce the task of inducing lexical variables from code-mixed text: that is, identifying equivalence pairs such as (football, fitba) along with their linguistic code (football→British, fitba→Scottish). We adapt a framework for identifying gender-biased word pairs to this new task, and present results on three different pairs of English dialects, using tweets as the code-mixed text. Our system achieves precision of over 70% for two of these three datasets, and produces useful results even without extensive parameter tuning. Our success in adapting this framework from gender to language variety suggests that it could be used to discover other types of analogous pairs as well. @@ -11596,7 +11596,7 @@ <fixed-case>T</fixed-case>witter Geolocation using Knowledge-Based Methods TaroMiyazaki AfshinRahimi - TrevorCohn + TrevorCohn TimothyBaldwin 7–16 W18-6102 @@ -11629,7 +11629,7 @@ How do you correct run-on sentences it’s not as easy as it seems JunchaoZheng CourtneyNapoles - JoelTetreault + JoelTetreault KostiantynOmelianchuk 33–38 W18-6105 @@ -11652,7 +11652,7 @@
Normalization of Transliterated Words in Code-Mixed Data Using <fixed-case>S</fixed-case>eq2<fixed-case>S</fixed-case>eq Model & <fixed-case>L</fixed-case>evenshtein Distance - SoumilMandal + SoumilMandal KarthickNanmaran 49–53 W18-6107 @@ -11735,7 +11735,7 @@ Detecting Code-Switching between <fixed-case>T</fixed-case>urkish-<fixed-case>E</fixed-case>nglish Language Pair ZeynepYirmibeşoğlu - GülşenEryiğit + GülşenEryiğit 110–115 W18-6115 Code-switching (usage of different languages within a single conversation context in an alternative manner) is a highly increasing phenomenon in social media and colloquial usage which poses different challenges for natural language processing. This paper introduces the first study for the detection of Turkish-English code-switching and also a small test data collected from social media in order to smooth the way for further studies. The proposed system using character level n-grams and conditional random fields (CRFs) obtains 95.6% micro-averaged F1-score on the introduced test data set. @@ -11744,8 +11744,8 @@ Language Identification in Code-Mixed Data using Multichannel Neural Networks and Context Capture - SoumilMandal - Anil KumarSingh + SoumilMandal + Anil KumarSingh 116–120 W18-6116 An accurate language identification tool is an absolute necessity for building complex NLP systems to be used on code-mixed data. Lot of work has been recently done on the same, but there’s still room for improvement. Inspired from the recent advancements in neural network architectures for computer vision tasks, we have implemented multichannel neural networks combining CNN and LSTM for word level language identification of code-mixed data. Combining this with a Bi-LSTM-CRF context capture module, accuracies of 93.28% and 93.32% is achieved on our two testing sets. @@ -11768,7 +11768,7 @@ Content Extraction and Lexical Analysis from Customer-Agent Interactions SergiuNisioi AncaBucur - Liviu P.Dinu + Liviu P.Dinu 132–136 W18-6118 In this paper, we provide a lexical comparative analysis of the vocabulary used by customers and agents in an Enterprise Resource Planning (ERP) environment and a potential solution to clean the data and extract relevant content for NLP. As a result, we demonstrate that the actual vocabulary for the language that prevails in the ERP conversations is highly divergent from the standardized dictionary and further different from general language usage as extracted from the Common Crawl corpus. Moreover, in specific business communication circumstances, where it is expected to observe a high usage of standardized language, code switching and non-standard expression are predominant, emphasizing once more the discrepancy between the day-to-day use of language and the standardized one. @@ -11778,7 +11778,7 @@ Preferred Answer Selection in <fixed-case>S</fixed-case>tack <fixed-case>O</fixed-case>verflow: Better Text Representations ... and Metadata, Metadata, Metadata StevenXu - AndrewBennett + AndrewBennett DorisHoogeveen Jey HanLau TimothyBaldwin @@ -11802,7 +11802,7 @@ Classification of Tweets about Reported Events using Neural Networks KiminobuMakino - YukaTakei + YukaTakei TaroMiyazaki JunGoto 153–163 @@ -11815,8 +11815,8 @@ Learning to Define Terms in the Software Domain VidhishaBalachandran DheerajRajagopal - Rose CatherineKanjirathinkal - WilliamCohen + Rose CatherineKanjirathinkal + WilliamCohen 164–172 W18-6122 One way to test a person’s knowledge of a domain is to ask them to define domain-specific terms. Here, we investigate the task of automatically generating definitions of technical terms by reading text from the technical domain. Specifically, we learn definitions of software entities from a large corpus built from the user forum Stack Overflow. To model definitions, we train a language model and incorporate additional domain-specific information like word co-occurrence, and ontological category information. Our approach improves previous baselines by 2 BLEU points for the definition generation task. Our experiments also show the additional challenges associated with the task and the short-comings of language-model based architectures for definition generation. @@ -11848,7 +11848,7 @@ Low-resource named entity recognition via multi-source projection: Not quite there yet? Jan ViumEnghoff SørenHarrison - ŽeljkoAgić + ŽeljkoAgić 195–201 W18-6125 Projecting linguistic annotations through word alignments is one of the most prevalent approaches to cross-lingual transfer learning. Conventional wisdom suggests that annotation projection “just works” regardless of the task at hand. We carefully consider multi-source projection for named entity recognition. Our experiment with 17 languages shows that to detect named entities in true low-resource languages, annotation projection may not be the right way to move forward. On a more positive note, we also uncover the conditions that do favor named entity projection from multiple sources. We argue these are infeasible under noisy low-resource constraints. @@ -11860,7 +11860,7 @@ LishengFu BonanMin Thien HuuNguyen - RalphGrishman + RalphGrishman 202–207 W18-6126 Typical relation extraction models are trained on a single corpus annotated with a pre-defined relation schema. An individual corpus is often small, and the models may often be biased or overfitted to the corpus. We hypothesize that we can learn a better representation by combining multiple relation datasets. We attempt to use a shared encoder to learn the unified feature representation and to augment it with regularization by adversarial training. The additional corpora feeding the encoder can help to learn a better feature representation layer even though the relation schemas are different. We use ACE05 and ERE datasets as our case study for experiments. The multi-task model obtains significant improvement on both datasets. @@ -11904,9 +11904,9 @@ Proceedings of the 9th Workshop on Computational Approaches to Subjectivity, Sentiment and Social Media Analysis W18-62 - AlexandraBalahur - Saif M.Mohammad - VeroniqueHoste + AlexandraBalahur + Saif M.Mohammad + VeroniqueHoste RomanKlinger Association for Computational Linguistics
Brussels, Belgium
@@ -11920,7 +11920,7 @@ Identifying Affective Events and the Reasons for their Polarity - EllenRiloff + EllenRiloff 1 W18-6201 Many events have a positive or negative impact on our lives (e.g., “I bought a house” is typically good news, but ”My house burned down” is bad news). Recognizing events that have affective polarity is essential for narrative text understanding, conversational dialogue, and applications such as summarization and sarcasm detection. We will discuss our recent work on identifying affective events and categorizing them based on the underlying reasons for their affective polarity. First, we will describe a weakly supervised learning method to induce a large set of affective events from a text corpus by optimizing for semantic consistency. Second, we will present models to classify affective events based on Human Need Categories, which often explain people’s motivations and desires. Our best results use a co-training model that consists of event expression and event context classifiers and exploits both labeled and unlabeled texts. We will conclude with a discussion of interesting directions for future work in this area. @@ -11929,7 +11929,7 @@ Deep contextualized word representations for detecting sarcasm and irony - SuzanaIlić + SuzanaIlić EdisonMarrese-Taylor JorgeBalazs YutakaMatsuo @@ -11942,7 +11942,7 @@ Implicit Subjective and Sentimental Usages in Multi-sense Word Embeddings YuqiSun - HaoyueShi + HaoyueShi JunfengHu 8–13 W18-6203 @@ -11965,7 +11965,7 @@ Creating a Dataset for Multilingual Fine-grained Emotion-detection Using Gamification-based Annotation EmilyÖhman KaislaKajava - JörgTiedemann + JörgTiedemann TimoHonkela 24–30 W18-6205 @@ -11976,7 +11976,7 @@ <fixed-case>IEST</fixed-case>: <fixed-case>WASSA</fixed-case>-2018 Implicit Emotions Shared Task RomanKlinger - OrphéeDe Clercq + OrphéeDe Clercq SaifMohammad AlexandraBalahur 31–42 @@ -12022,7 +12022,7 @@ Sentiment analysis under temporal shift JanLukes - AndersSøgaard + AndersSøgaard 65–71 W18-6210 Sentiment analysis models often rely on training data that is several years old. In this paper, we show that lexical features change polarity over time, leading to degrading performance. This effect is particularly strong in sparse models relying only on highly predictive features. Using predictive feature selection, we are able to significantly improve the accuracy of such models over time. @@ -12043,7 +12043,7 @@ Topic-Specific Sentiment Analysis Can Help Identify Political Ideology SumitBhatia - DeepakP + DeepakP 79–84 W18-6212 Ideological leanings of an individual can often be gauged by the sentiment one expresses about different issues. We propose a simple framework that represents a political ideology as a distribution of sentiment polarities towards a set of topics. This representation can then be used to detect ideological leanings of documents (speeches, news articles, etc.) based on the sentiments expressed towards different topics. Experiments performed using a widely used dataset show the promise of our proposed approach that achieves comparable performance to other methods despite being much simpler and more interpretable. @@ -12053,8 +12053,8 @@ Saying no but meaning yes: negation and sentiment analysis in <fixed-case>B</fixed-case>asque JonAlkorta - KoldoGojenola - MikelIruskieta + KoldoGojenola + MikelIruskieta 85–90 W18-6213 In this work, we have analyzed the effects of negation on the semantic orientation in Basque. The analysis shows that negation markers can strengthen, weaken or have no effect on sentiment orientation of a word or a group of words. Using the Constraint Grammar formalism, we have designed and evaluated a set of linguistic rules to formalize these three phenomena. The results show that two phenomena, strengthening and no change, have been identified accurately and the third one, weakening, with acceptable results. @@ -12065,7 +12065,7 @@ Leveraging Writing Systems Change for Deep Learning Based <fixed-case>C</fixed-case>hinese Emotion Analysis RongXiang YunfeiLong - QinLu + QinLu DanXiong I-HsuanChen 91–96 @@ -12078,7 +12078,7 @@ Ternary <fixed-case>T</fixed-case>witter Sentiment Classification with Distant Supervision and Sentiment-Specific Word Embeddings MatsByrkjeland FrederikGørvell de Lichtenberg - BjörnGambäck + BjörnGambäck 97–106 W18-6215 The paper proposes the Ternary Sentiment Embedding Model, a new model for creating sentiment embeddings based on the Hybrid Ranking Model of Tang et al. (2016), but trained on ternary-labeled data instead of binary-labeled, utilizing sentiment embeddings from datasets made with different distant supervision methods. The model is used as part of a complete Twitter Sentiment Analysis system and empirically compared to existing systems, showing that it outperforms Hybrid Ranking and that the quality of the distant-supervised dataset has a great impact on the quality of the produced sentiment embeddings. @@ -12108,7 +12108,7 @@ The Role of Emotions in Native Language Identification IliaMarkov - ViviNastase + ViviNastase CarloStrapparava GrigoriSidorov 123–129 @@ -12131,7 +12131,7 @@ Dual Memory Network Model for Biased Product Review Classification YunfeiLong MingyuMa - QinLu + QinLu RongXiang Chu-RenHuang 140–148 @@ -12165,7 +12165,7 @@ RamitSawhney PrachiManchanda PuneetMathur - RajivShah + RajivShah RajSingh 167–175 W18-6223 @@ -12175,7 +12175,7 @@ <fixed-case>UTFPR</fixed-case> at <fixed-case>IEST</fixed-case> 2018: Exploring Character-to-Word Composition for Emotion Analysis - GustavoPaetzold + GustavoPaetzold 176–181 W18-6224 We introduce the UTFPR system for the Implicit Emotions Shared Task of 2018: A compositional character-to-word recurrent neural network that does not exploit heavy and/or hard-to-obtain resources. We find that our approach can outperform multiple baselines, and offers an elegant and effective solution to the problem of orthographic variance in tweets. @@ -12206,9 +12206,9 @@ <fixed-case>SINAI</fixed-case> at <fixed-case>IEST</fixed-case> 2018: Neural Encoding of Emotional External Knowledge for Emotion Classification Flor MiriamPlaza-del-Arco - EugenioMartínez-Cámara - MaiteMartin - L. AlfonsoUreña- López + EugenioMartínez-Cámara + MaiteMartin + L. AlfonsoUreña- López 195–200 W18-6227 In this paper, we describe our participation in WASSA 2018 Implicit Emotion Shared Task (IEST 2018). We claim that the use of emotional external knowledge may enhance the performance and the capacity of generalization of an emotion classification system based on neural networks. Accordingly, we submitted four deep learning systems grounded in a sequence encoding layer. They mainly differ in the feature vector space and the recurrent neural network used in the sequence encoding layer. The official results show that the systems that used emotional external knowledge have a higher capacity of generalization, hence our claim holds. @@ -12342,7 +12342,7 @@ What Makes You Stressed? Finding Reasons From Tweets ReshmiGopalakrishna Pillai MikeThelwall - ConstantinOrasan + ConstantinOrasan 266–272 W18-6239 Detecting stress from social media gives a non-intrusive and inexpensive alternative to traditional tools such as questionnaires or physiological sensors for monitoring mental state of individuals. This paper introduces a novel framework for finding reasons for stress from tweets, analyzing multiple categories for the first time. Three word-vector based methods are evaluated on collections of tweets about politics or airlines and are found to be more accurate than standard machine learning algorithms. @@ -12363,7 +12363,7 @@ Identifying Opinion-Topics and Polarity of Parliamentary Debate Motions GavinAbercrombie - Riza TheresaBatista-Navarro + Riza TheresaBatista-Navarro 280–285 W18-6241 Analysis of the topics mentioned and opinions expressed in parliamentary debate motions–or proposals–is difficult for human readers, but necessary for understanding and automatic processing of the content of the subsequent speeches. We present a dataset of debate motions with pre-existing ‘policy’ labels, and investigate the utility of these labels for simultaneous topic and opinion polarity analysis. For topic detection, we apply one-versus-the-rest supervised topic classification, finding that good performance is achieved in predicting the policy topics, and that textual features derived from the debate titles associated with the motions are particularly indicative of motion topic. We then examine whether the output could also be used to determine the positions taken by proposers towards the different policies by investigating how well humans agree in interpreting the opinion polarities of the motions. Finding very high levels of agreement, we conclude that the policies used can be reliable labels for use in these tasks, and that successful topic detection can therefore provide opinion analysis of the motions ‘for free’. @@ -12442,7 +12442,7 @@ Predicting Adolescents’ Educational Track from Chat Messages on <fixed-case>D</fixed-case>utch Social Media LisaHilte - WalterDaelemans + WalterDaelemans ReinhildVandekerckhove 328–334 W18-6248 @@ -12480,23 +12480,23 @@ Proceedings of the Third Conference on Machine Translation: Research Papers W18-63 - OndřejBojar - RajenChatterjee + OndřejBojar + RajenChatterjee ChristianFedermann MarkFishel YvetteGraham BarryHaddow MatthiasHuck - Antonio JimenoYepes + Antonio JimenoYepes PhilippKoehn ChristofMonz - MatteoNegri - AurélieNévéol + MatteoNegri + AurélieNévéol MarianaNeves MattPost LuciaSpecia MarcoTurchi - KarinVerspoor + KarinVerspoor Association for Computational Linguistics
Brussels, Belgium
October @@ -12521,7 +12521,7 @@
Character-level <fixed-case>C</fixed-case>hinese-<fixed-case>E</fixed-case>nglish Translation through <fixed-case>ASCII</fixed-case> Encoding - Nikola I.Nikolov + Nikola I.Nikolov YuhuangHu Mi XueTan Richard H.R.Hahnloser @@ -12566,7 +12566,7 @@ Coreference and Coherence in Neural Machine Translation: A Study Using Oracle Experiments DarioStojanovski - AlexanderFraser + AlexanderFraser 49–60 W18-6306 Cross-sentence context can provide valuable information in Machine Translation and is critical for translation of anaphoric pronouns and for providing consistent translations. In this paper, we devise simple oracle experiments targeting coreference and coherence. Oracles are an easy way to evaluate the effect of different discourse-level phenomena in NMT using BLEU and eliminate the necessity to manually define challenge sets for this purpose. We propose two context-aware NMT models and compare them against models working on a concatenation of consecutive sentences. Concatenation models perform better, but are computationally expensive. We show that NMT models taking advantage of context oracle signals can achieve considerable gains in BLEU, of up to 7.02 BLEU for coreference and 1.89 BLEU for coherence on subtitles translation. Access to strong signals allows us to make clear comparisons between context-aware models. @@ -12576,7 +12576,7 @@ A Large-Scale Test Set for the Evaluation of Context-Aware Pronoun Translation in Neural Machine Translation MathiasMüller - AnnetteRios + AnnetteRios ElenaVoita RicoSennrich 61–72 @@ -12588,8 +12588,8 @@ Beyond Weight Tying: Learning Joint Input-Output Embeddings for Neural Machine Translation NikolaosPappas - LeslyMiculicich - JamesHenderson + LeslyMiculicich + JamesHenderson 73–83 W18-6308 Tying the weights of the target word embeddings with the target word classifiers of neural machine translation models leads to faster training and often to better translation quality. Given the success of this parameter sharing, we investigate other forms of sharing in between no sharing and hard equality of parameters. In particular, we propose a structure-aware output layer which captures the semantic structure of the output space of words within a joint input-output embedding. The model is a generalized form of weight tying which shares parameters but allows learning a more flexible relationship with input word embeddings and allows the effective capacity of the output layer to be controlled. In addition, the model shares weights across output classifiers and translation contexts which allows it to better leverage prior knowledge about them. Our evaluation on English-to-Finnish and English-to-German datasets shows the effectiveness of the method against strong encoder-decoder baselines trained with or without weight tying. @@ -12614,7 +12614,7 @@ Improving Neural Language Models with Weight Norm Initialization and Regularization ChristianHerold YingboGao - HermannNey + HermannNey 93–100 W18-6310 Embedding and projection matrices are commonly used in neural language models (NLM) as well as in other sequence processing networks that operate on large vocabularies. We examine such matrices in fine-tuned language models and observe that a NLM learns word vectors whose norms are related to the word frequencies. We show that by initializing the weight norms with scaled log word counts, together with other techniques, lower perplexities can be obtained in early epochs of training. We also introduce a weight norm regularization loss term, whose hyperparameters are tuned via a grid search. With this method, we are able to significantly improve perplexities on two word-level language modeling tasks (without dynamic evaluation): from 54.44 to 53.16 on Penn Treebank (PTB) and from 61.45 to 60.13 on WikiText-2 (WT2). @@ -12624,8 +12624,8 @@ Contextual Neural Model for Translating Bilingual Multi-Speaker Conversations SameenMaruf - André F. T.Martins - GholamrezaHaffari + André F. T.Martins + GholamrezaHaffari 101–112 W18-6311 Recent works in neural machine translation have begun to explore document translation. However, translating online multi-speaker conversations is still an open problem. In this work, we propose the task of translating Bilingual Multi-Speaker Conversations, and explore neural architectures which exploit both source and target-side conversation histories for this task. To initiate an evaluation for this task, we introduce datasets extracted from Europarl v7 and OpenSubtitles2016. Our experiments on four language-pairs confirm the significance of leveraging conversation history, both in terms of BLEU and manual evaluation. @@ -12649,12 +12649,12 @@ BrianThompson HudaKhayrallah AntoniosAnastasopoulos - Arya D.McCarthy + Arya D.McCarthy KevinDuh RebeccaMarvin PaulMcNamee JeremyGwinnup - TimAnderson + TimAnderson PhilippKoehn 124–132 W18-6313 @@ -12719,7 +12719,7 @@ On The Alignment Problem In Multi-Head Attention-Based Neural Machine Translation TamerAlkhouli GabrielBretschner - HermannNey + HermannNey 177–185 W18-6318 This work investigates the alignment problem in state-of-the-art multi-head attention models based on the transformer architecture. We demonstrate that alignment extraction in transformer models can be improved by augmenting an additional alignment head to the multi-head source-to-target attention component. This is used to compute sharper attention weights. We describe how to use the alignment head to achieve competitive performance. To study the effect of adding the alignment head, we simulate a dictionary-guided translation task, where the user wants to guide translation using pre-defined dictionary entries. Using the proposed approach, we achieve up to 3.8% BLEU improvement when using the dictionary, in comparison to 2.4% BLEU in the baseline case. We also propose alignment pruning to speed up decoding in alignment-based neural machine translation (ANMT), which speeds up translation by a factor of 1.8 without loss in translation performance. We carry out experiments on the shared WMT 2016 English→Romanian news task and the BOLT Chinese→English discussion forum task. @@ -12738,8 +12738,8 @@ Exploring gap filling as a cheaper alternative to reading comprehension questionnaires when evaluating machine translation for gisting - Mikel L.Forcada - CarolinaScarton + Mikel L.Forcada + CarolinaScarton LuciaSpecia BarryHaddow AlexandraBirch @@ -12786,7 +12786,7 @@ Massively Parallel Cross-Lingual Learning in Low-Resource Target Language Translation ZhongZhou MatthiasSperber - AlexanderWaibel + AlexanderWaibel 232–243 W18-6324 We work on translation from rich-resource languages to low-resource languages. The main challenges we identify are the lack of low-resource language data, effective methods for cross-lingual transfer, and the variable-binding problem that is common in neural systems. We build a translation system that addresses these challenges using eight European language families as our test ground. Firstly, we add the source and the target family labels and study intra-family and inter-family influences for effective cross-lingual transfer. We achieve an improvement of +9.9 in BLEU score for English-Swedish translation using eight families compared to the single-family multi-source multi-target baseline. Moreover, we find that training on two neighboring families closest to the low-resource language is often enough. Secondly, we construct an ablation study and find that reasonably good results can be achieved even with considerably less target data. Thirdly, we address the variable-binding problem by building an order-preserving named entity translation model. We obtain 60.6% accuracy in qualitative evaluation where our translations are akin to human translations in a preliminary study. @@ -12806,7 +12806,7 @@ Input Combination Strategies for Multi-Source Transformer Decoder JindřichLibovický - JindřichHelcl + JindřichHelcl DavidMareček 253–260 W18-6326 @@ -12829,23 +12829,23 @@ Proceedings of the Third Conference on Machine Translation: Shared Task Papers W18-64 - OndřejBojar - RajenChatterjee + OndřejBojar + RajenChatterjee ChristianFedermann MarkFishel YvetteGraham BarryHaddow MatthiasHuck - Antonio JimenoYepes + Antonio JimenoYepes PhilippKoehn ChristofMonz - MatteoNegri - AurélieNévéol + MatteoNegri + AurélieNévéol MarianaNeves MattPost LuciaSpecia MarcoTurchi - KarinVerspoor + KarinVerspoor Association for Computational Linguistics
Belgium, Brussels
October @@ -12916,7 +12916,7 @@
Robust parfda Statistical Machine Translation Results - ErgunBiçici + ErgunBiçici 345–354 W18-6405 We build parallel feature decay algorithms (parfda) Moses statistical machine translation (SMT) models for language pairs in the translation task. parfda obtains results close to the top constrained phrase-based SMT with an average of 2.252 BLEU points difference on WMT 2017 datasets using significantly less computation for building SMT systems than that would be spent using all available corpora. We obtain BLEU upper bounds based on target coverage to identify which systems used additional data. We use PRO for tuning to decrease fluctuations in the results and postprocess translation outputs to decrease translation errors due to the casing of words. F1 scores on the key phrases of the English to Turkish testsuite that we prepared reveal that parfda achieves 2nd best results. Truecasing translations before scoring obtained the best results overall. @@ -12927,8 +12927,8 @@ The <fixed-case>TALP</fixed-case>-<fixed-case>UPC</fixed-case> Machine Translation Systems for <fixed-case>WMT</fixed-case>18 News Shared Translation Task NoeCasas CarlosEscolano - Marta R.Costa-jussà - José A. R.Fonollosa + Marta R.Costa-jussà + José A. R.Fonollosa 355–360 W18-6406 In this article we describe the TALP-UPC research group participation in the WMT18 news shared translation task for Finnish-English and Estonian-English within the multi-lingual subtrack. All of our primary submissions implement an attention-based Neural Machine Translation architecture. Given that Finnish and Estonian belong to the same language family and are similar, we use as training data the combination of the datasets of both language pairs to paliate the data scarceness of each individual pair. We also report the translation quality of systems trained on individual language pair data to serve as baseline and comparison reference. @@ -12972,7 +12972,7 @@ YunsuKim JulianSchamper JiahuiGeng - HermannNey + HermannNey 377–385 W18-6409 10.18653/v1/W18-6409 @@ -12993,7 +12993,7 @@ The <fixed-case>AFRL</fixed-case> <fixed-case>WMT</fixed-case>18 Systems: Ensembling, Continuation and Combination JeremyGwinnup - TimAnderson + TimAnderson GrantErdmann KatherineYoung 394–398 @@ -13010,7 +13010,7 @@ UlrichGermann RomanGrundkiewicz KennethHeafield - Antonio ValerioMiceli Barone + Antonio ValerioMiceli Barone RicoSennrich 399–409 W18-6412 @@ -13033,10 +13033,10 @@ The <fixed-case>MLLP</fixed-case>-<fixed-case>UPV</fixed-case> <fixed-case>G</fixed-case>erman-<fixed-case>E</fixed-case>nglish Machine Translation System for <fixed-case>WMT</fixed-case>18 JavierIranzo-Sánchez PauBaquero-Arnal - Gonçal V.Garcés Díaz-Munío + Gonçal V.Garcés Díaz-Munío AdriàMartínez-Villaronga JorgeCivera - AlfonsJuan + AlfonsJuan 418–424 W18-6414 W18-6414.Poster.pdf @@ -13077,9 +13077,9 @@ <fixed-case>JUCBNMT</fixed-case> at <fixed-case>WMT</fixed-case>2018 News Translation Task: Character Based Neural Machine Translation of <fixed-case>F</fixed-case>innish to <fixed-case>E</fixed-case>nglish - Sainik KumarMahata + Sainik KumarMahata DipankarDas - SivajiBandyopadhyay + SivajiBandyopadhyay 445–448 W18-6418 In the current work, we present a description of the system submitted to WMT 2018 News Translation Shared task. The system was created to translate news text from Finnish to English. The system used a Character Based Neural Machine Translation model to accomplish the given task. The current paper documents the preprocessing steps, the description of the submitted system and the results produced using the same. Our system garnered a BLEU score of 12.9. @@ -13092,7 +13092,7 @@ RuiWang AtsushiFujita MasaoUtiyama - EiichiroSumita + EiichiroSumita 449–455 W18-6419 This paper presents the NICT’s participation to the WMT18 shared news translation task. We participated in the eight translation directions of four language pairs: Estonian-English, Finnish-English, Turkish-English and Chinese-English. For each translation direction, we prepared state-of-the-art statistical (SMT) and neural (NMT) machine translation systems. Our NMT systems were trained with the transformer architecture using the provided parallel data enlarged with a large quantity of back-translated monolingual data that we generated with a new incremental training framework. Our primary submissions to the task are the result of a simple combination of our SMT and NMT systems. Our systems are ranked first for the Estonian-English and Finnish-English language pairs (constraint) according to BLEU-cased. @@ -13121,9 +13121,9 @@ The Karlsruhe Institute of Technology Systems for the News Translation Task in <fixed-case>WMT</fixed-case> 2018 - Ngoc-QuanPham + Ngoc-QuanPham JanNiehues - AlexanderWaibel + AlexanderWaibel 467–472 W18-6422 We present our experiments in the scope of the news translation task in WMT 2018, in directions: English→German. The core of our systems is the encoder-decoder based neural machine translation models using the transformer architecture. We enhanced the model with a deeper architecture. By using techniques to limit the memory consumption, we were able to train models that are 4 times larger on one GPU and improve the performance by 1.2 BLEU points. Furthermore, we performed sentence selection for the newly available ParaCrawl corpus. Thereby, we could improve the effectiveness of the corpus by 0.5 BLEU points. @@ -13132,9 +13132,9 @@ Tilde’s Machine Translation Systems for <fixed-case>WMT</fixed-case> 2018 - MārcisPinnis + MārcisPinnis MatīssRikters - RihardsKrišlauks + RihardsKrišlauks 473–481 W18-6423 The paper describes the development process of the Tilde’s NMT systems that were submitted for the WMT 2018 shared task on news translation. We describe the data filtering and pre-processing workflows, the NMT system training architectures, and automatic evaluation results. For the WMT 2018 shared task, we submitted seven systems (both constrained and unconstrained) for English-Estonian and Estonian-English translation directions. The submitted systems were trained using Transformer models. @@ -13156,7 +13156,7 @@ YvesScherrer TommiNieminen ArviHurskainen - JörgTiedemann + JörgTiedemann 488–495 W18-6425 This paper describes the University of Helsinki’s submissions to the WMT18 shared news translation task for English-Finnish and English-Estonian, in both directions. This year, our main submissions employ a novel neural architecture, the Transformer, using the open-source OpenNMT framework. Our experiments couple domain labeling and fine tuned multilingual models with shared vocabularies between the source and target language, using the provided parallel data of the shared task and additional back-translations. Finally, we compare, for the English-to-Finnish case, the effectiveness of different machine translation architectures, starting from a rule-based approach to our best neural model, analyzing the output and highlighting future research. @@ -13170,7 +13170,7 @@ ParniaBahar YunsuKim ArneNix - HermannNey + HermannNey 496–503 W18-6426 This paper describes the statistical machine translation systems developed at RWTH Aachen University for the German→English, English→Turkish and Chinese→English translation tasks of the EMNLP 2018 Third Conference on Machine Translation (WMT 2018). We use ensembles of neural machine translation systems based on the Transformer architecture. Our main focus is on the German→English task where we to all automatic scored first with respect metrics provided by the organizers. We identify data selection, fine-tuning, batch size and model dimension as important hyperparameters. In total we improve by 6.8% BLEU over our last year’s submission and by 4.8% BLEU over the winning system of the 2017 German→English task. In English→Turkish task, we show 3.6% BLEU improvement over the last year’s winning system. We further report results on the Chinese→English task where we improve 2.2% BLEU on average over our baseline systems but stay behind the 2018 winning systems. @@ -13180,7 +13180,7 @@ The <fixed-case>U</fixed-case>niversity of <fixed-case>C</fixed-case>ambridge’s Machine Translation Systems for <fixed-case>WMT</fixed-case>18 FelixStahlberg - Adriàde Gispert + Adriàde Gispert BillByrne 504–512 W18-6427 @@ -13193,7 +13193,7 @@ DarioStojanovski ViktorHangya MatthiasHuck - AlexanderFraser + AlexanderFraser 513–521 W18-6428 We describe LMU Munich’s unsupervised machine translation systems for English↔German translation. These systems were used to participate in the WMT18 news translation shared task and more specifically, for the unsupervised learning sub-track. The systems are trained on English and German monolingual data only and exploit and combine previously proposed techniques such as using word-by-word translated data based on bilingual word embeddings, denoising and on-the-fly backtranslation. @@ -13304,7 +13304,7 @@ The Word Sense Disambiguation Test Suite at <fixed-case>WMT</fixed-case>18 - AnnetteRios + AnnetteRios MathiasMüller RicoSennrich 588–596 @@ -13335,12 +13335,12 @@ BenoitHuet MikkoKurimo JormaLaaksonen - BernardMerialdo + BernardMerialdo PhuPham MatsSjöberg UmutSulubacak - JörgTiedemann - RaphaelTroncy + JörgTiedemann + RaphaelTroncy RaúlVázquez 603–611 W18-6439 @@ -13355,7 +13355,7 @@ MichaelHutt GrantErdmann JohnDuselis - JamesDavis + JamesDavis 612–615 W18-6440 AFRL-Ohio State extends its usage of visual domain-driven machine translation for use as a peer with traditional machine translation systems. As a peer, it is enveloped into a system combination of neural and statistical MT systems to present a composite translation. @@ -13364,9 +13364,9 @@ <fixed-case>CUNI</fixed-case> System for the <fixed-case>WMT</fixed-case>18 Multimodal Translation Task - JindřichHelcl + JindřichHelcl JindřichLibovický - DušanVariš + DušanVariš 616–623 W18-6441 We present our submission to the WMT18 Multimodal Translation Task. The main feature of our submission is applying a self-attentive network instead of a recurrent neural network. We evaluate two methods of incorporating the visual features in the model: first, we include the image representation as another input to the network; second, we train the model to predict the visual features and use it as an auxiliary objective. For our submission, we acquired both textual and multimodal additional data. Both of the proposed methods yield significant improvements over recurrent networks and self-attentive textual baselines. @@ -13376,8 +13376,8 @@ <fixed-case>S</fixed-case>heffield Submissions for <fixed-case>WMT</fixed-case>18 Multimodal Translation Shared Task ChiraagLala - Pranava SwaroopMadhyastha - CarolinaScarton + Pranava SwaroopMadhyastha + CarolinaScarton LuciaSpecia 624–631 W18-6442 @@ -13421,7 +13421,7 @@ MatthiasHuck DarioStojanovski ViktorHangya - AlexanderFraser + AlexanderFraser 648–654 W18-6446 We present the LMU Munich machine translation systems for the English–German language pair. We have built neural machine translation systems for both translation directions (English→German and German→English) and for two different domains (the biomedical domain and the news domain). The systems were used for our participation in the WMT18 biomedical translation task and in the shared task on machine translation of news. The main focus of our recent system development efforts has been on achieving improvements in the biomedical domain over last year’s strong biomedical translation engine for English→German (Huck et al., 2017a). Considerable progress has been made in the latter task, which we report on in this paper. @@ -13453,7 +13453,7 @@ Neural Machine Translation with the Transformer and Multi-Source <fixed-case>R</fixed-case>omance Languages for the Biomedical <fixed-case>WMT</fixed-case> 2018 task BrianTubay - Marta R.Costa-jussà + Marta R.Costa-jussà 667–670 W18-6449 The Transformer architecture has become the state-of-the-art in Machine Translation. This model, which relies on attention-based mechanisms, has outperformed previous neural machine translation architectures in several tasks. In this system description paper, we report details of training neural machine translation with multi-source Romance languages with the Transformer model and in the evaluation frame of the biomedical WMT 2018 task. Using multi-source languages from the same family allows improvements of over 6 BLEU points. @@ -13474,10 +13474,10 @@ Findings of the <fixed-case>WMT</fixed-case> 2018 Shared Task on Quality Estimation LuciaSpecia - FrédéricBlain + FrédéricBlain VarvaraLogacheva - RamónF. Astudillo - André F. T.Martins + RamónF. Astudillo + André F. T.Martins 689–709 W18-6451 We report the results of the WMT18 shared task on Quality Estimation, i.e. the task of predicting the quality of the output of machine translation systems at various granularity levels: word, phrase, sentence and document. This year we include four language pairs, three text domains, and translations produced by both statistical and neural machine translation systems. Participating teams from ten institutions submitted a variety of systems to different task variants and language pairs. @@ -13488,7 +13488,7 @@ Findings of the <fixed-case>WMT</fixed-case> 2018 Shared Task on Automatic Post-Editing RajenChatterjee MatteoNegri - RaphaelRubino + RaphaelRubino MarcoTurchi 710–725 W18-6452 @@ -13501,7 +13501,7 @@ PhilippKoehn HudaKhayrallah KennethHeafield - Mikel L.Forcada + Mikel L.Forcada 726–739 W18-6453 We posed the shared task of assigning sentence-level quality scores for a very noisy corpus of sentence pairs crawled from the web, with the goal of sub-selecting 1% and 10% of high-quality data to be used to train machine translation systems. Seventeen participants from companies, national research labs, and universities participated in this task. @@ -13522,7 +13522,7 @@ <fixed-case>ITER</fixed-case>: Improving Translation Edit Rate through Optimizable Edit Costs JoybrataPanja - Sudip KumarNaskar + Sudip KumarNaskar 746–750 W18-6455 The paper presents our participation in the WMT 2018 Metrics Shared Task. We propose an improved version of Translation Edit/Error Rate (TER). In addition to including the basic edit operations in TER, namely - insertion, deletion, substitution and shift, our metric also allows stem matching, optimizable edit costs and better normalization so as to correlate better with human judgement scores. The proposed metric shows much higher correlation with human judgments than TER. @@ -13544,7 +13544,7 @@ Keep It or Not: Word Level Quality Estimation for Post-Editing PrasenjitBasu SantanuPal - Sudip KumarNaskar + Sudip KumarNaskar 759–764 W18-6457 The paper presents our participation in the WMT 2018 shared task on word level quality estimation (QE) of machine translated (MT) text, i.e., to predict whether a word in MT output for a given source context is correctly translated and hence should be retained in the post-edited translation (PE), or not. To perform the QE task, we measure the similarity of the source context of the target MT word with the context for which the word is retained in PE in the training data. This is achieved in two different ways, using Bag-of-Words (BoW) model and Document-to-Vector (Doc2Vec) model. In the BoW model, we compute the cosine similarity while in the Doc2Vec model we consider the Doc2Vec similarity. By applying the Kneedle algorithm on the F1mult vs. similarity score plot, we derive the threshold based on which OK/BAD decisions are taken for the MT words. Experimental results revealed that the Doc2Vec model performs better than the BoW model on the word level QE task. @@ -13553,7 +13553,7 @@ <fixed-case>RTM</fixed-case> results for Predicting Translation Performance - ErgunBiçici + ErgunBiçici 765–769 W18-6458 With improved prediction combination using weights based on their training performance and stacking and multilayer perceptrons to build deeper prediction models, RTMs become the 3rd system in general at the sentence-level prediction of translation scores and achieve the lowest RMSE in English to German NMT QET results. For the document-level task, we compare document-level RTM models with sentence-level RTM models obtained with the concatenation of document sentences and obtain similar results. @@ -13585,7 +13585,7 @@ Supervised and Unsupervised Minimalist Quality Estimators: Vicomtech’s Participation in the <fixed-case>WMT</fixed-case> 2018 Quality Estimation Task ThierryEtchegoyhen - EvaMartínez Garcia + EvaMartínez Garcia AndoniAzpeitia 782–787 W18-6461 @@ -13608,8 +13608,8 @@ <fixed-case>S</fixed-case>heffield Submissions for the <fixed-case>WMT</fixed-case>18 Quality Estimation Shared Task JuliaIve - CarolinaScarton - FrédéricBlain + CarolinaScarton + FrédéricBlain LuciaSpecia 794–800 W18-6463 @@ -13620,8 +13620,8 @@ <fixed-case>UA</fixed-case>lacant machine translation quality estimation at <fixed-case>WMT</fixed-case> 2018: a simple approach using phrase tables and feed-forward neural networks FelipeSánchez-Martínez - MiquelEsplà-Gomis - Mikel L.Forcada + MiquelEsplà-Gomis + Mikel L.Forcada 801–808 W18-6464 We describe the Universitat d’Alacant submissions to the word- and sentence-level machine translation (MT) quality estimation (QE) shared task at WMT 2018. Our approach to word-level MT QE builds on previous work to mark the words in the machine-translated sentence as OK or BAD, and is extended to determine if a word or sequence of words need to be inserted in the gap after each word. Our sentence-level submission simply uses the edit operations predicted by the word-level approach to approximate TER. The method presented ranked first in the sub-task of identifying insertions in gaps for three out of the six datasets, and second in the rest of them. @@ -13632,7 +13632,7 @@ <fixed-case>A</fixed-case>libaba Submission for <fixed-case>WMT</fixed-case>18 Quality Estimation Task JiayiWang KaiFan - BoLi + BoLi FengmingZhou BoxingChen YangbinShi @@ -13669,7 +13669,7 @@ SantanuPal NicoHerbig AntonioKrüger - Josefvan Genabith + Josefvan Genabith 827–835 W18-6468 This paper presents our English–German Automatic Post-Editing (APE) system submitted to the APE Task organized at WMT 2018 (Chatterjee et al., 2018). The proposed model is an extension of the transformer architecture: two separate self-attention-based encoders encode the machine translation output (mt) and the source (src), followed by a joint encoder that attends over a combination of these two encoded sequences (encsrc and encmt) for generating the post-edited sentence. We compare this multi-source architecture (i.e, {src, mt} → pe) to a monolingual transformer (i.e., mt → pe) model and an ensemble combining the multi-source {src, mt} → pe and single-source mt → pe models. For both the PBSMT and the NMT task, the ensemble yields the best results, followed by the multi-source model and last the single-source approach. Our best model, the ensemble, achieves a BLEU score of 66.16 and 74.22 for the PBSMT and NMT task, respectively. @@ -13679,7 +13679,7 @@ <fixed-case>DFKI</fixed-case>-<fixed-case>MLT</fixed-case> System Description for the <fixed-case>WMT</fixed-case>18 Automatic Post-editing Task DariaPylypenko - RaphaelRubino + RaphaelRubino 836–839 W18-6469 This paper presents the Automatic Post-editing (APE) systems submitted by the DFKI-MLT group to the WMT’18 APE shared task. Three monolingual neural sequence-to-sequence APE systems were trained using target-language data only: one using an attentional recurrent neural network architecture and two using the attention-only (transformer) architecture. The training data was composed of machine translated (MT) output used as source to the APE model aligned with their manually post-edited version or reference translation as target. We made use of the provided training sets only and trained APE models applicable to phrase-based and neural MT outputs. Results show better performances reached by the attention-only model over the recurrent one, significant improvement over the baseline when post-editing phrase-based MT output but degradation when applied to neural MT output. @@ -13723,7 +13723,7 @@ <fixed-case>STACC</fixed-case>, <fixed-case>OOV</fixed-case> Density and N-gram Saturation: Vicomtech’s Participation in the <fixed-case>WMT</fixed-case> 2018 Shared Task on Parallel Corpus Filtering AndoniAzpeitia ThierryEtchegoyhen - EvaMartínez Garcia + EvaMartínez Garcia 860–866 W18-6473 We describe Vicomtech’s participation in the WMT 2018 Shared Task on parallel corpus filtering. We aimed to evaluate a simple approach to the task, which can efficiently process large volumes of data and can be easily deployed for new datasets in different language pairs and domains. We based our approach on STACC, an efficient and portable method for parallel sentence identification in comparable corpora. To address the specifics of the corpus filtering task, which features significant volumes of noisy data, the core method was expanded with a penalty based on the amount of unknown words in sentence pairs. Additionally, we experimented with a complementary data saturation method based on source sentence n-grams, with the goal of demoting parallel sentence pairs that do not contribute significant amounts of yet unobserved n-grams. Our approach requires no prior training and is highly efficient on the type of large datasets featured in the corpus filtering task. We achieved competitive results with this simple and portable method, ranking in the top half among competing systems overall. @@ -13733,7 +13733,7 @@ A hybrid pipeline of rules and machine learning to filter web-crawled parallel corpora EduardBarbu - VerginicaBarbu Mititelu + VerginicaBarbu Mititelu 867–871 W18-6474 A hybrid pipeline comprising rules and machine learning is used to filter a noisy web English-German parallel corpus for the Parallel Corpus Filtering task. The core of the pipeline is a module based on the logistic regression algorithm that returns the probability that a translation unit is accepted. The training set for the logistic regression is created by automatic annotation. The quality of the automatic annotation is estimated by manually labeling the training set. @@ -13753,7 +13753,7 @@ <fixed-case>MAJE</fixed-case> Submission to the <fixed-case>WMT</fixed-case>2018 Shared Task on Parallel Corpus Filtering MarinaFomicheva - JesúsGonzález-Rubio + JesúsGonzález-Rubio 877–881 W18-6476 This paper describes the participation of Webinterpret in the shared task on parallel corpus filtering at the Third Conference on Machine Translation (WMT 2018). The paper describes the main characteristics of our approach and discusses the results obtained on the data sets published for the shared task. @@ -13763,7 +13763,7 @@ An Unsupervised System for Parallel Corpus Filtering ViktorHangya - AlexanderFraser + AlexanderFraser 882–887 W18-6477 In this paper we describe LMU Munich’s submission for the WMT 2018 Parallel Corpus Filtering shared task which addresses the problem of cleaning noisy parallel corpora. The task of mining and cleaning parallel sentences is important for improving the quality of machine translation systems, especially for low-resource languages. We tackle this problem in a fully unsupervised fashion relying on bilingual word embeddings created without any bilingual signal. After pre-filtering noisy data we rank sentence pairs by calculating bilingual sentence-level similarities and then remove redundant data by employing monolingual similarity as well. Our unsupervised system achieved good performance during the official evaluation of the shared task, scoring only a few BLEU points behind the best systems, while not requiring any parallel training data. @@ -13798,7 +13798,7 @@ SamuelLarkin DarleneStewart MichelSimard - CyrilGoutte + CyrilGoutte Chi-kiuLo 900–907 W18-6480 @@ -13812,7 +13812,7 @@ MichelSimard DarleneStewart SamuelLarkin - CyrilGoutte + CyrilGoutte PatrickLittell 908–916 W18-6481 @@ -13834,7 +13834,7 @@ <fixed-case>UTFPR</fixed-case> at <fixed-case>WMT</fixed-case> 2018: Minimalistic Supervised Corpora Filtering for Machine Translation - GustavoPaetzold + GustavoPaetzold 923–927 W18-6483 We present the UTFPR systems at the WMT 2018 parallel corpus filtering task. Our supervised approach discerns between good and bad translations by training classic binary classification models over an artificially produced binary classification dataset derived from a high-quality translation set, and a minimalistic set of 6 semantic distance features that rely only on easy-to-gather resources. We rank translations by their probability for the “good” label. Our results show that logistic regression pairs best with our approach, yielding more consistent results throughout the different settings evaluated. @@ -13846,7 +13846,7 @@ VassilisPapavassiliou SokratisSofianopoulos ProkopisProkopidis - SteliosPiperidis + SteliosPiperidis 928–933 W18-6484 This paper describes the submission of the Institute for Language and Speech Processing/Athena Research and Innovation Center (ILSP/ARC) for the WMT 2018 Parallel Corpus Filtering shared task. We explore several properties of sentences and sentence pairs that our system explored in the context of the task with the purpose of clustering sentence pairs according to their appropriateness in training MT systems. We also discuss alternative methods for ranking the sentence pairs of the most appropriate clusters with the aim of generating the two datasets (of 10 and 100 million words as required in the task) that were evaluated. By summarizing the results of several experiments that were carried out by the organizers during the evaluation phase, our submission achieved an average BLEU score of 26.41, even though it does not make use of any language-specific resources like bilingual lexica, monolingual corpora, or MT output, while the average score of the best participant system was 27.91. @@ -13855,8 +13855,8 @@ <fixed-case>SYSTRAN</fixed-case> Participation to the <fixed-case>WMT</fixed-case>2018 Shared Task on Parallel Corpus Filtering - MinhQuangPham - JosepCrego + MinhQuangPham + JosepCrego JeanSenellart 934–938 W18-6485 @@ -13866,7 +13866,7 @@ Tilde’s Parallel Corpus Filtering Methods for <fixed-case>WMT</fixed-case> 2018 - MārcisPinnis + MārcisPinnis 939–945 W18-6486 The paper describes parallel corpus filtering methods that allow reducing noise of noisy “parallel” corpora from a level where the corpora are not usable for neural machine translation training (i.e., the resulting systems fail to achieve reasonable translation quality; well below 10 BLEU points) up to a level where the trained systems show decent (over 20 BLEU points on a 10 million word dataset and up to 30 BLEU points on a 100 million word dataset). The paper also documents Tilde’s submissions to the WMT 2018 shared task on parallel corpus filtering. @@ -13880,7 +13880,7 @@ YunsuKim MiguelGraça AmanGokrani - HermannNey + HermannNey 946–954 W18-6487 This paper describes the submission of RWTH Aachen University for the De→En parallel corpus filtering task of the EMNLP 2018 Third Conference on Machine Translation (WMT 2018). We use several rule-based, heuristic methods to preselect sentence pairs. These sentence pairs are scored with count-based and neural systems as language and translation models. In addition to single sentence-pair scoring, we further implement a simple redundancy removing heuristic. Our best performing corpus filtering system relies on recurrent neural language models and translation models based on the transformer architecture. A model trained on 10M randomly sampled tokens reaches a performance of 9.2% BLEU on newstest2018. Using our filtering and ranking techniques we achieve 34.8% BLEU. @@ -13889,10 +13889,10 @@ Prompsit’s submission to <fixed-case>WMT</fixed-case> 2018 Parallel Corpus Filtering shared task - Víctor M.Sánchez-Cartagena + Víctor M.Sánchez-Cartagena MartaBañón - SergioOrtiz-Rojas - GemaRamírez + SergioOrtiz-Rojas + GemaRamírez 955–962 W18-6488 This paper describes Prompsit Language Engineering’s submissions to the WMT 2018 parallel corpus filtering shared task. Our four submissions were based on an automatic classifier for identifying pairs of sentences that are mutual translations. A set of hand-crafted hard rules for discarding sentences with evident flaws were applied before the classifier. We explored different strategies for achieving a training corpus with diverse vocabulary and fluent sentences: language model scoring, an active-learning-inspired data selection algorithm and n-gram saturation. Our submissions were very competitive in comparison with other participants on the 100 million word training corpus. @@ -13904,7 +13904,7 @@ RuiWang BenjaminMarie MasaoUtiyama - EiichiroSumita + EiichiroSumita 963–967 W18-6489 This paper presents the NICT’s participation in the WMT18 shared parallel corpus filtering task. The organizers provided 1 billion words German-English corpus crawled from the web as part of the Paracrawl project. This corpus is too noisy to build an acceptable neural machine translation (NMT) system. Using the clean data of the WMT18 shared news translation task, we designed several features and trained a classifier to score each sentence pairs in the noisy data. Finally, we sampled 100 million and 10 million words and built corresponding NMT systems. Empirical results show that our NMT systems trained on sampled data achieve promising performance. @@ -13916,7 +13916,7 @@ Proceedings of the 11th International Conference on Natural Language Generation W18-65 - EmielKrahmer + EmielKrahmer AlbertGatt MartijnGoudbeek Association for Computational Linguistics @@ -13958,7 +13958,7 @@ Syntactic Manipulation for Generating more Diverse and Interesting Texts - Jan MilanDeriu + Jan MilanDeriu MarkCieliebak 22–34 W18-6503 @@ -13982,7 +13982,7 @@ SebastianGehrmann FalconDai HenryElder - AlexanderRush + AlexanderRush 46–56 W18-6505 Learning to generate fluent natural language from structured data with neural networks has become an common approach for NLG. This problem can be challenging when the form of the structured data varies between examples. This paper presents a survey of several extensions to sequence-to-sequence models to account for the latent content selection process, particularly variants of copy attention and coverage decoding. We further propose a training method based on diverse ensembling to encourage models to learn distinct sentence templates during training. An empirical evaluation of these techniques shows an increase in the quality of generated text across five automated metrics, as well as human evaluation. @@ -13992,7 +13992,7 @@ <fixed-case>S</fixed-case>imple<fixed-case>NLG</fixed-case>-<fixed-case>ZH</fixed-case>: a Linguistic Realisation Engine for <fixed-case>M</fixed-case>andarin GuanyiChen - Keesvan Deemter + Keesvan Deemter ChenghuaLin 57–66 W18-6506 @@ -14004,7 +14004,7 @@ Adapting <fixed-case>S</fixed-case>imple<fixed-case>NLG</fixed-case> to <fixed-case>G</fixed-case>alician language AndreaCascallar-Fuentes AlejandroRamos-Soto - AlbertoBugarín Diz + AlbertoBugarín Diz 67–72 W18-6507 In this paper, we describe SimpleNLG-GL, an adaptation of the linguistic realisation SimpleNLG library for the Galician language. This implementation is derived from SimpleNLG-ES, the English-Spanish version of this library. It has been tested using a battery of examples which covers the most common rules for Galician. @@ -14014,7 +14014,7 @@ Going <fixed-case>D</fixed-case>utch: Creating <fixed-case>S</fixed-case>imple<fixed-case>NLG</fixed-case>-<fixed-case>NL</fixed-case> Ruudde Jong - MariëtTheune + MariëtTheune 73–78 W18-6508 This paper presents SimpleNLG-NL, an adaptation of the SimpleNLG surface realisation engine for the Dutch language. It describes a novel method for determining and testing the grammatical constructions to be implemented, using target sentences sampled from a treebank. @@ -14025,7 +14025,7 @@ Learning to Flip the Bias of News Headlines Wei-FanChen HenningWachsmuth - KhalidAl-Khatib + KhalidAl-Khatib BennoStein 79–88 W18-6509 @@ -14037,7 +14037,7 @@ Stylistically User-Specific Generation AbdurrisyadFikri HiroyaTakamura - ManabuOkumura + ManabuOkumura 89–98 W18-6510 Recent neural models for response generation show good results in terms of general responses. In real conversations, however, depending on the speaker/responder, similar utterances should require different responses. In this study, we attempt to consider individual user’s information in adjusting the notable sequence-to-sequence (seq2seq) model for more diverse, user-specific responses. We assume that we need user-specific features to adjust the response and we argue that some selected representative words from the users are suitable for this task. Furthermore, we prove that even for unseen or unknown users, our model can provide more diverse and interesting responses, while maintaining correlation with input utterances. Experimental results with human evaluation show that our model can generate more interesting responses than the popular seq2seqmodel and achieve higher relevance with input utterances than our baseline. @@ -14051,7 +14051,7 @@ XingkunLiu AtanasLaskov PedroPatron - HelenHastie + HelenHastie 99–108 W18-6511 As unmanned vehicles become more autonomous, it is important to maintain a high level of transparency regarding their behaviour and how they operate. This is particularly important in remote locations where they cannot be directly observed. Here, we describe a method for generating explanations in natural language of autonomous system behaviour and reasoning. Our method involves deriving an interpretable model of autonomy through having an expert ‘speak aloud’ and providing various levels of detail based on this model. Through an online evaluation study with operators, we show it is best to generate explanations with multiple possible reasons but tersely worded. This work has implications for designing interfaces for autonomy as well as for explainable AI and operator training. @@ -14112,7 +14112,7 @@ <fixed-case>S</fixed-case>patial<fixed-case>VOC</fixed-case>2<fixed-case>K</fixed-case>: A Multilingual Dataset of Images with Annotations and Features for Spatial Relations between Objects - AnjaBelz + AnjaBelz AdrianMuscat PierreAnguill MouhamadouSow @@ -14128,7 +14128,7 @@ Adding the Third Dimension to Spatial Relation Detection in 2<fixed-case>D</fixed-case> Images BrandonBirmingham AdrianMuscat - AnjaBelz + AnjaBelz 146–151 W18-6517 Detection of spatial relations between objects in images is currently a popular subject in image description research. A range of different language and geometric object features have been used in this context, but methods have not so far used explicit information about the third dimension (depth), except when manually added to annotations. The lack of such information hampers detection of spatial relations that are inherently 3D. In this paper, we use a fully automatic method for creating a depth map of an image and derive several different object-level depth features from it which we add to an existing feature set to test the effect on spatial relation detection. We show that performance increases are obtained from adding depth features in all scenarios tested. @@ -14148,7 +14148,7 @@ Modelling Pro-drop with the Rational Speech Acts Model GuanyiChen - Keesvan Deemter + Keesvan Deemter ChenghuaLin 159–164 W18-6519 @@ -14172,7 +14172,7 @@ Enriching the <fixed-case>W</fixed-case>eb<fixed-case>NLG</fixed-case> corpus - ThiagoCastro Ferreira + ThiagoCastro Ferreira DiegoMoussallem EmielKrahmer SanderWubben @@ -14185,7 +14185,7 @@ Towards making <fixed-case>NLG</fixed-case> a voice for interpretable Machine Learning JamesForrest - SomayajuluSripada + SomayajuluSripada WeiPang GeorgeCoghill 177–182 @@ -14198,7 +14198,7 @@ Template-based multilingual football reports generation using <fixed-case>W</fixed-case>ikidata as a knowledge base LorenzoGatti Chrisvan der Lee - MariëtTheune + MariëtTheune 183–188 W18-6523 This paper presents a new version of a football reports generation system called PASS. The original version generated Dutch text and relied on a limited hand-crafted knowledge base. We describe how, in a short amount of time, we extended PASS to produce English texts, exploiting machine translation and Wikidata as a large-scale source of multilingual knowledge. @@ -14208,7 +14208,7 @@ Automatic Evaluation of Neural Personality-based Chatbots YujieXing - RaquelFernández + RaquelFernández 189–194 W18-6524 Stylistic variation is critical to render the utterances generated by conversational agents natural and engaging. In this paper, we focus on sequence-to-sequence models for open-domain dialogue response generation and propose a new method to evaluate the extent to which such models are able to generate responses that reflect different personality traits. @@ -14237,7 +14237,7 @@ Underspecified <fixed-case>U</fixed-case>niversal <fixed-case>D</fixed-case>ependency Structures as Inputs for Multilingual Surface Realisation SimonMille - AnjaBelz + AnjaBelz BerndBohnet LeoWanner 199–209 @@ -14249,7 +14249,7 @@ <fixed-case>LSTM</fixed-case> Hypertagging ReidFu - MichaelWhite + MichaelWhite 210–220 W18-6528 Hypertagging, or supertagging for surface realization, is the process of assigning lexical categories to nodes in an input semantic graph. Previous work has shown that hypertagging significantly increases realization speed and quality by reducing the search space of the realizer. Building on recent work using LSTMs to improve accuracy on supertagging for parsing, we develop an LSTM hypertagging method for OpenCCG, an open source NLP toolkit for CCG. Our results show significant improvements in both hypertagging accuracy and downstream realization performance. @@ -14269,12 +14269,12 @@ Generating <fixed-case>E</fixed-case>-Commerce Product Titles and Predicting their Quality - José G.Camargo de Souza + José G.Camargo de Souza MichaelKozielski PrashantMathur ErnieChang MarcoGuerini - MatteoNegri + MatteoNegri MarcoTurchi EvgenyMatusov 233–243 @@ -14286,7 +14286,7 @@ Designing and testing the messages produced by a virtual dietitian LucaAnselma - AlessandroMazzei + AlessandroMazzei 244–253 W18-6531 This paper presents a project about the automatic generation of persuasive messages in the context of the diet management. In the first part of the paper we introduce the basic mechanisms related to data interpretation and content selection for a numerical data-to-text generation architecture. In the second part of the paper we discuss a number of factors influencing the design of the messages. In particular, we consider the design of the aggregation procedure. Finally, we present the results of a human-based evaluation concerning this design factor. @@ -14308,7 +14308,7 @@ Automatically Generating Questions about Novel Metaphors in Literature NatalieParde - RodneyNielsen + RodneyNielsen 264–273 W18-6533 The automatic generation of stimulating questions is crucial to the development of intelligent cognitive exercise applications. We developed an approach that generates appropriate Questioning the Author queries based on novel metaphors in diverse syntactic relations in literature. We show that the generated questions are comparable to human-generated questions in terms of naturalness, sensibility, and depth, and score slightly higher than human-generated questions in terms of clarity. We also show that questions generated about novel metaphors are rated as cognitively deeper than questions generated about non- or conventional metaphors, providing evidence that metaphor novelty can be leveraged to promote cognitive exercise. @@ -14329,7 +14329,7 @@ Can Neural Generators for Dialogue Learn Sentence Planning and Discourse Structuring? LenaReed ShereenOraby - MarilynWalker + MarilynWalker 284–295 W18-6535 Responses in task-oriented dialogue systems often realize multiple propositions whose ultimate form depends on the use of sentence planning and discourse structuring operations. For example a recommendation may consist of an explicitly evaluative utterance e.g. Chanpen Thai is the best option, along with content related by the justification discourse relation, e.g. It has great food and service, that combines multiple propositions into a single phrase. While neural generation methods integrate sentence planning and surface realization in one end-to-end learning framework, previous work has not shown that neural generators can: (1) perform common sentence planning and discourse structuring operations; (2) make decisions as to whether to realize content in a single sentence or over multiple sentences; (3) generalize sentence planning and discourse relation operations beyond what was seen in training. We systematically create large training corpora that exhibit particular sentence planning operations and then test neural models to see what they learn. We compare models without explicit latent variables for sentence planning with ones that provide explicit supervision during training. We show that only the models with additional supervision can reproduce sentence planning and discourse operations and generalize to situations unseen in training. @@ -14339,7 +14339,7 @@ Neural Generation of Diverse Questions using Answer Focus, Contextual and Linguistic Features VrindavanHarrison - MarilynWalker + MarilynWalker 296–306 W18-6536 Question Generation is the task of automatically creating questions from textual input. In this work we present a new Attentional Encoder–Decoder Recurrent Neural Network model for automatic question generation. Our model incorporates linguistic features and an additional sentence embedding to capture meaning at both sentence and word levels. The linguistic features are designed to capture information related to named entity recognition, word case, and entity coreference resolution. In addition our model uses a copying mechanism and a special answer signal that enables generation of numerous diverse questions on a given sentence. Our model achieves state of the art results of 19.98 Bleu_4 on a benchmark Question Generation dataset, outperforming all previously published results by a significant margin. A human evaluation also shows that the added features improve the quality of the generated questions. @@ -14364,7 +14364,7 @@ MartinPotthast NedimLipka BennoStein - HinrichSchütze + HinrichSchütze 318–321 W18-6538 The TL;DR challenge fosters research in abstractive summarization of informal text, the largest and fastest-growing source of textual data on the web, which has been overlooked by summarization research so far. The challenge owes its name to the frequent practice of social media users to supplement long posts with a “TL;DR”—for “too long; didn’t read”—followed by a short summary as a courtesy to those who would otherwise reply with the exact same abbreviation to indicate they did not care to read a post for its apparent length. Posts featuring TL;DR summaries form an excellent ground truth for summarization, and by tapping into this resource for the first time, we have mined millions of training examples from social media, opening the door to all kinds of generative models. @@ -14386,7 +14386,7 @@ Adapting Descriptions of People to the Point of View of a Moving Observer GonzaloMéndez RaquelHervás - PabloGervás + PabloGervás Ricardode la Rosa DanielRuiz 329–338 @@ -14433,7 +14433,7 @@ Comprehension Driven Document Planning in Natural Language Generation Systems CraigThomson EhudReiter - SomayajuluSripada + SomayajuluSripada 371–380 W18-6544 This paper proposes an approach to NLG system design which focuses on generating output text which can be more easily processed by the reader. Ways in which cognitive theory might be combined with existing NLG techniques are discussed and two simple experiments in content ordering are presented. @@ -14453,7 +14453,7 @@ Toward <fixed-case>B</fixed-case>ayesian Synchronous Tree Substitution Grammars for Sentence Planning - David M.Howcroft + David M.Howcroft DietrichKlakow VeraDemberg 391–396 @@ -14465,7 +14465,7 @@ The Task Matters: Comparing Image Captioning and Task-Based Dialogical Image Description NikolaiIlinykh - SinaZarrieß + SinaZarrieß DavidSchlangen 397–402 W18-6547 @@ -14477,7 +14477,7 @@ Generating Summaries of Sets of Consumer Products: Learning from Experiments KittipitchKuptavanich EhudReiter - KeesVan Deemter + KeesVan Deemter AdvaithSiddharthan 403–407 W18-6548 @@ -14514,8 +14514,8 @@ Meteorologists and Students: A resource for language grounding of geographical descriptors AlejandroRamos-Soto EhudReiter - Keesvan Deemter - JoseAlonso + Keesvan Deemter + JoseAlonso AlbertGatt 421–425 W18-6551 @@ -14526,7 +14526,7 @@ <fixed-case>C</fixed-case>yclegen: Cyclic consistency based product review generator from attributes VasuSharma - HarshSharma + HarshSharma AnkitaBishnu LabheshPatel 426–430 @@ -14549,7 +14549,7 @@ Characterizing Variation in Crowd-Sourced Data for Training Neural Language Generators to Produce Stylistically Varied Outputs JurajJuraska - MarilynWalker + MarilynWalker 441–450 W18-6554 One of the biggest challenges of end-to-end language generation from meaning representations in dialogue systems is making the outputs more natural and varied. Here we take a large corpus of 50K crowd-sourced utterances in the restaurant domain and develop text analysis methods that systematically characterize types of sentences in the training data. We then automatically label the training data to allow us to conduct two kinds of experiments with a neural generator. First, we test the effect of training the system with different stylistic partitions and quantify the effect of smaller, but more stylistically controlled training data. Second, we propose a method of labeling the style variants during training, and show that we can modify the style of the generated utterances using our stylistic labels. We contrast and compare these methods that can be used with any existing large corpus, showing how they vary in terms of semantic quality and stylistic control. @@ -14560,7 +14560,7 @@ Char2char Generation with Reranking for the <fixed-case>E</fixed-case>2<fixed-case>E</fixed-case> <fixed-case>NLG</fixed-case> Challenge ShubhamAgarwal MarcDymetman - ÉricGaussier + ÉricGaussier 451–456 W18-6555 This paper describes our submission to the E2E NLG Challenge. Recently, neural seq2seq approaches have become mainstream in NLG, often resorting to pre- (respectively post-) processing delexicalization (relexicalization) steps at the word-level to handle rare words. By contrast, we train a simple character level seq2seq model, which requires no pre/post-processing (delexicalization, tokenization or even lowercasing), with surprisingly good results. For further improvement, we explore two re-ranking approaches for scoring candidates. We also introduce a synthetic dataset creation procedure, which opens up a new way of creating artificial datasets for Natural Language Generation. @@ -14629,7 +14629,7 @@ Statistical <fixed-case>NLG</fixed-case> for Generating the Content and Form of Referring Expressions XiaoLi - Keesvan Deemter + Keesvan Deemter ChenghuaLin 482–491 W18-6561 @@ -14640,9 +14640,9 @@ Specificity measures and reference AlbertGatt - NicolásMarín + NicolásMarín GustavoRivas-Gervilla - DanielSánchez + DanielSánchez 492–502 W18-6562 In this paper we study empirically the validity of measures of referential success for referring expressions involving gradual properties. More specifically, we study the ability of several measures of referential success to predict the success of a user in choosing the right object, given a referring expression. Experimental results indicate that certain fuzzy measures of success are able to predict human accuracy in reference resolution. Such measures are therefore suitable for the estimation of the success or otherwise of a referring expression produced by a generation algorithm, especially in case the properties in a domain cannot be assumed to have crisp denotations. @@ -14651,7 +14651,7 @@ Decoding Strategies for Neural Referring Expression Generation - SinaZarrieß + SinaZarrieß DavidSchlangen 503–512 W18-6563 @@ -14664,7 +14664,7 @@ Proceedings of the 3rd Workshop on Computational Creativity in Natural Language Generation (CC-NLG 2018) W18-66 - HugoGonçalo Oliveira + HugoGonçalo Oliveira BenBurtenshaw RaquelHervás Association for Computational Linguistics @@ -14716,7 +14716,7 @@ Content Determination for Chess as a Source for Suspenseful Narratives RichardDoust - PabloGervás + PabloGervás 26-33 W18-6605 10.18653/v1/W18-6605 @@ -14726,7 +14726,7 @@ Generating Stories Using Role-playing Games and Simulated Human-like Conversations AlanTapscott CarlosLeón - PabloGervás + PabloGervás 34-42 W18-6606 10.18653/v1/W18-6606 @@ -14737,9 +14737,9 @@ Proceedings of the Workshop on Intelligent Interactive Systems and Language Generation (2IS&NLG) W18-67 - Jose M.Alonso + Jose M.Alonso AlejandroCatala - MariëtTheune + MariëtTheune Association for Computational Linguistics
Tilburg, the Netherlands
November @@ -14777,7 +14777,7 @@ MatthieuRiou StéphaneHuet BassamJabaian - FabriceLefèvre + FabriceLefèvre 9-14 W18-6703 10.18653/v1/W18-6703 @@ -14798,7 +14798,7 @@ Trouble on the Road: Finding Reasons for Commuter Stress from Tweets ReshmiGopalakrishna Pillai MikeThelwall - ConstantinOrasan + ConstantinOrasan 20-25 W18-6705 10.18653/v1/W18-6705 @@ -14806,10 +14806,10 @@
Assisted Nominalization for Academic <fixed-case>E</fixed-case>nglish Writing - JohnLee + JohnLee DariushSaberi MarvinLam - JonathanWebster + JonathanWebster 26-30 W18-6706 10.18653/v1/W18-6706 @@ -14818,10 +14818,10 @@ Two-Step Training and Mixed Encoding-Decoding for Implementing a Generative Chatbot with a Small Dialogue Corpus JintaeKim - Hyeon-GuLee + Hyeon-GuLee HarksooKim YeonsooLee - Young-GilKim + Young-GilKim 31-35 W18-6707 10.18653/v1/W18-6707 @@ -14831,7 +14831,7 @@ Supporting Content Design with an Eye Tracker: The Case of Weather-based Recommendations AlejandroCatala Jose M.Alonso - AlbertoBugarin + AlbertoBugarin 36-41 W18-6708 10.18653/v1/W18-6708 @@ -14843,7 +14843,7 @@ DaphneIppolito ArunKirubarajan JaiThirani - LyleUngar + LyleUngar ChrisCallison-Burch 42-44 W18-6709 @@ -14854,7 +14854,7 @@ <fixed-case>C</fixed-case>heck<fixed-case>Y</fixed-case>our<fixed-case>M</fixed-case>eal!: diet management with <fixed-case>NLG</fixed-case> LucaAnselma SimoneDonetti - AlessandroMazzei + AlessandroMazzei AndreaPirone 45-47 W18-6710 @@ -14866,7 +14866,7 @@ Proceedings of the Workshop on NLG for Human–Robot Interaction W18-69 - Mary EllenFoster + Mary EllenFoster HendrikBuschmeier DimitraGkatzia Association for Computational Linguistics @@ -14885,7 +14885,7 @@ Context-sensitive Natural Language Generation for robot-assisted second language tutoring BramWillemsen Jande Wit - EmielKrahmer + EmielKrahmer Mirjamde Haas PaulVogt 1–7 @@ -14907,7 +14907,7 @@ Shaping a social robot’s humor with Natural Language Generation and socially-aware reinforcement learning HannesRitschel - ElisabethAndré + ElisabethAndré 12–16 W18-6903 Humor is an important aspect in human interaction to regulate conversations, increase interpersonal attraction and trust. For social robots, humor is one aspect to make interactions more natural, enjoyable, and to increase credibility and acceptance. In combination with appropriate non-verbal behavior, natural language generation offers the ability to create content on-the-fly. This work outlines the building-blocks for providing an individual, multimodal interaction experience by shaping the robot’s humor with the help of Natural Language Generation and Reinforcement Learning based on human social signals. @@ -14918,7 +14918,7 @@ From sensors to sense: Integrated heterogeneous ontologies for Natural Language Generation MihaiPomarlan RobertPorzel - JohnBateman + JohnBateman RainerMalaka 17–21 W18-6904 @@ -14930,7 +14930,7 @@ A farewell to arms: Non-verbal communication for non-humanoid robots Aaron G.Cass KristinaStriegnitz - NickWebb + NickWebb 22–26 W18-6905 Human-robot interactions situated in a dynamic environment create a unique mix of challenges for conversational systems. We argue that, on the one hand, NLG can contribute to addressing these challenges and that, on the other hand, they pose interesting research problems for NLG. To illustrate our position we describe our research on non-humanoid robots using non-verbal signals to support communication. @@ -14939,7 +14939,7 @@ Being data-driven is not enough: Revisiting interactive instruction giving as a challenge for <fixed-case>NLG</fixed-case> - SinaZarrieß + SinaZarrieß DavidSchlangen 27–31 W18-6906 @@ -14952,10 +14952,10 @@ Proceedings of the 1st Workshop on Automatic Text Adaptation (ATA) W18-70 - ArneJönsson + ArneJönsson EvelinaRennes HoracioSaggion - SanjaStajner + SanjaStajner VictoriaYaneva Association for Computational Linguistics
Tilburg, the Netherlands
@@ -14998,9 +14998,9 @@ Assisted Lexical Simplification for <fixed-case>F</fixed-case>rench Native Children with Reading Difficulties FirasHmida - Mokhtar B.Billami + Mokhtar B.Billami ThomasFrançois - NúriaGala + NúriaGala 21-28 W18-7004 10.18653/v1/W18-7004 @@ -15010,10 +15010,10 @@ Reference-less Quality Estimation of Text Simplification Systems LouisMartin SamuelHumeau - Pierre-EmmanuelMazaré - Éricde La Clergerie + Pierre-EmmanuelMazaré + Éricde La Clergerie AntoineBordes - BenoîtSagot + BenoîtSagot 29-38 W18-7005 10.18653/v1/W18-7005 @@ -15022,7 +15022,7 @@ Improving Machine Translation of <fixed-case>E</fixed-case>nglish Relative Clauses with Automatic Text Simplification SanjaŠtajner - MajaPopović + MajaPopović 39-48 W18-7006 10.18653/v1/W18-7006 @@ -15081,21 +15081,21 @@ Demonstrating the <fixed-case>MUSTE</fixed-case> Language Learning Environment HerbertLange - PeterLjunglöf + PeterLjunglöf 41-46 W18-7105 lange-ljunglof-2018-demonstrating Learner Corpus Anonymization in the Age of <fixed-case>GDPR</fixed-case>: Insights from the Creation of a Learner Corpus of <fixed-case>S</fixed-case>wedish - BeátaMegyesi + BeátaMegyesi LenaGranstedt SofiaJohansson JuliaPrentice DanRosén Carl-JohanSchenström GunlögSundberg - MatsWirén + MatsWirén ElenaVolodina 47-56 W18-7106 @@ -15104,7 +15104,7 @@ Work Smart - Reducing Effort in Short-Answer Grading MargotMieskes - UlrikePadó + UlrikePadó 57-68 W18-7107 mieskes-pado-2018-work @@ -15119,9 +15119,9 @@ A Linguistically-Informed Search Engine to Identifiy Reading Material for Functional Illiteracy Classes - ZarahWeiss + ZarahWeiss SabrinaDittrich - DetmarMeurers + DetmarMeurers 79-90 W18-7109 weiss-etal-2018-linguistically @@ -15129,10 +15129,10 @@ Feedback Strategies for Form and Meaning in a Real-life Language Tutoring System RamonZiai - BjoernRudzewitz + BjoernRudzewitz KordulaDe Kuthy FlorianNuxoll - DetmarMeurers + DetmarMeurers 91-98 W18-7110 ziai-etal-2018-feedback diff --git a/data/xml/W19.xml b/data/xml/W19.xml index b574d6183c..4315d520cc 100644 --- a/data/xml/W19.xml +++ b/data/xml/W19.xml @@ -3,7 +3,7 @@ Proceedings of the Society for Computation in Linguistics (SCiL) 2019 - GajaJarosz + GajaJarosz MaxNelson BrendanO’Connor JoePater @@ -18,7 +18,7 @@ Can Entropy Explain Successor Surprisal Effects in Reading? - Martenvan Schijndel + Martenvan Schijndel TalLinzen 1-7 10.7275/qtbb-9d05 @@ -56,7 +56,7 @@ Modeling Clausal Complementation for a Grammar Engineering Resource OlgaZamaraeva KristenHowell - Emily M.Bender + Emily M.Bender 39-49 10.7275/dygn-c796 W19-0105 @@ -65,7 +65,7 @@ Do <fixed-case>RNN</fixed-case>s learn human-like abstract word order preferences? RichardFutrell - Roger P.Levy + Roger P.Levy 50-59 10.7275/jb34-9986 W19-0106 @@ -81,7 +81,7 @@ Constraint breeding during on-line incremental learning - ElliotMoreton + ElliotMoreton 69-80 10.7275/6f9x-6411 W19-0108 @@ -90,7 +90,7 @@ An Incremental Iterated Response Model of Pragmatics ReubenCohn-Gordon - NoahGoodman + NoahGoodman ChristopherPotts 81-90 10.7275/cprc-8x17 @@ -152,7 +152,7 @@ Using Sentiment Induction to Understand Variation in Gendered Online Communities - LiLucy + LiLucy JuliaMendelsohn 156-166 10.7275/11wq-ep51 @@ -161,7 +161,7 @@ On the difficulty of a distributional semantics of spoken language - GrzegorzChrupała + GrzegorzChrupała LiekeGelderloos ÁkosKádár AfraAlishahi @@ -183,7 +183,7 @@ Guess Who’s Coming (and Who’s Going): Bringing Perspective to the Rational Speech Acts Framework Carolyn JaneAnderson - Brian W.Dillon + Brian W.Dillon 185-194 10.7275/9bn3-8x38 W19-0119 @@ -210,7 +210,7 @@ Modeling the Acquisition of Words with Multiple Meanings LibbyBarak SammyFloyd - AdeleGoldberg + AdeleGoldberg 216-225 10.7275/tr21-m273 W19-0122 @@ -218,7 +218,7 @@ Evaluation Order Effects in Dynamic Continuized <fixed-case>CCG</fixed-case>: From Negative Polarity Items to Balanced Punctuation - MichaelWhite + MichaelWhite 226-235 10.7275/kpch-rk05 W19-0123 @@ -226,10 +226,10 @@ <fixed-case>A</fixed-case>bstract <fixed-case>M</fixed-case>eaning <fixed-case>R</fixed-case>epresentation for Human-Robot Dialogue - Claire N.Bonial + Claire N.Bonial LuciaDonatelli JessicaErvin - Clare R.Voss + Clare R.Voss 236-246 10.7275/v3c5-yd35 W19-0124 @@ -267,7 +267,7 @@ On Evaluating the Generalization of <fixed-case>LSTM</fixed-case> Models in Formal Languages MiracSuzgun YonatanBelinkov - Stuart M.Shieber + Stuart M.Shieber 277-286 10.7275/s02b-4d91 W19-0128 @@ -275,10 +275,10 @@ Verb Argument Structure Alternations in Word and Sentence Embeddings - KatharinaKann + KatharinaKann AlexWarstadt AdinaWilliams - Samuel R.Bowman + Samuel R.Bowman 287-297 10.7275/q5js-4y86 W19-0129 @@ -289,9 +289,9 @@ Proceedings of the Fifth International Workshop on Computational Linguistics for Uralic Languages W19-03 - Tommi A.Pirinen - Heiki-JaanKaalep - Francis M.Tyers + Tommi A.Pirinen + Heiki-JaanKaalep + Francis M.Tyers Association for Computational Linguistics
Tartu, Estonia
January @@ -304,7 +304,7 @@ Data-Driven Morphological Analysis for <fixed-case>U</fixed-case>ralic Languages - MiikkaSilfverberg + MiikkaSilfverberg FrancisTyers 1–14 W19-0301 @@ -402,7 +402,7 @@ Is this the end? Two-step tokenization of sentence boundaries LindaWiechetek - Sjur NørstebøMoshagen + Sjur NørstebøMoshagen ThomasOmma 141–153 W19-0312 @@ -464,7 +464,7 @@ A Type-coherent, Expressive Representation as an Initial Step to Language Understanding Gene LouisKim - LenhartSchubert + LenhartSchubert 13–30 A growing interest in tasks involving language understanding by the NLP community has led to the need for effective semantic parsing and inference. Modern NLP systems use semantic representations that do not quite fulfill the nuanced needs for language understanding: adequately modeling language semantics, enabling general inferences, and being accurately recoverable. This document describes underspecified logical forms (ULF) for Episodic Logic (EL), which is an initial form for a semantic representation that balances these needs. ULFs fully resolve the semantic type structure while leaving issues such as quantifier scope, word sense, and anaphora unresolved; they provide a starting point for further resolution into EL, and enable certain structural inferences without further resolution. This document also presents preliminary results of creating a hand-annotated corpus of ULFs for the purpose of training a precise ULF parser, showing a three-person pairwise interannotator agreement of 0.88 on confident annotations. We hypothesize that a divide-and-conquer approach to semantic parsing starting with derivation of ULFs will lead to semantic analyses that do justice to subtle aspects of linguistic meaning, and will enable construction of more accurate semantic parsers. W19-0402 @@ -473,7 +473,7 @@ A Semantic Annotation Scheme for Quantification - HarryBunt + HarryBunt 31–42 This paper describes in brief the proposal called ‘QuantML’ which was accepted by the International Organisation for Standards (ISO) last February as a starting point for developing a standard for the interoperable annotation of quantification phenomena in natural language, as part of the ISO 24617 Semantic Annotation Framework. The proposal, firmly rooted in the theory of generalised quantifiers, neo-Davidsonian semantics, and DRT, covers a wide range of quantification phenomena. The QuantML scheme consists of (1) an abstract syntax which defines ‘annotation structures’ as triples and other set-theoretic constructs; (b) a compositional semantics of annotation structures; (3) an XML representation of annotation structures. W19-0403 @@ -503,7 +503,7 @@ A Semantic Ontology of <fixed-case>D</fixed-case>anish Adjectives - EckhardBick + EckhardBick 71–78 This paper presents a semantic annotation scheme for Danish adjectives, focusing both on prototypical semantic content and semantic collocational restrictions on an adjective’s head noun. The core type set comprises about 110 categories ordered in a shallow hierarchy with 14 primary and 25 secondary umbrella categories. In addition, domain information and binary sentiment tags are provided, as well as VerbNet-derived frames and semantic roles for those adjectives governing arguments. The scheme has been almost fully implemented on the lexicon of the Danish VISL parser, DanGram, containing 14,000 adjectives. We discuss the annotation scheme and its applicational perspectives, and present a statistical breakdown and coverage evaluation for three Danish reference corpora. W19-0406 @@ -537,7 +537,7 @@ Temporal and Aspectual Entailment ThomasKober SanderBijl de Vroe - MarkSteedman + MarkSteedman 103–119 Inferences regarding “Jane’s arrival in London” from predications such as “Jane is going to London” or “Jane has gone to London” depend on tense and aspect of the predications. Tense determines the temporal location of the predication in the past, present or future of the time of utterance. The aspectual auxiliaries on the other hand specify the internal constituency of the event, i.e. whether the event of “going to London” is completed and whether its consequences hold at that time or not. While tense and aspect are among the most important factors for determining natural language inference, there has been very little work to show whether modern embedding models capture these semantic concepts. In this paper we propose a novel entailment dataset and analyse the ability of contextualised word representations to perform inference on predications across aspectual types and tenses. We show that they encode a substantial amount of information relating to tense and aspect, but fail to consistently model inferences that require reasoning with these semantic properties. W19-0409 @@ -547,7 +547,7 @@ Don’t Blame Distributional Semantics if it can’t do Entailment MatthijsWestera - GemmaBoleda + GemmaBoleda 120–133 Distributional semantics has had enormous empirical success in Computational Linguistics and Cognitive Science in modeling various semantic phenomena, such as semantic similarity, and distributional models are widely used in state-of-the-art Natural Language Processing systems. However, the theoretical status of distributional semantics within a broader theory of language and cognition is still unclear: What does distributional semantics model? Can it be, on its own, a fully adequate model of the meanings of linguistic expressions? The standard answer is that distributional semantics is not fully adequate in this regard, because it falls short on some of the central aspects of formal semantic approaches: truth conditions, entailment, reference, and certain aspects of compositionality. We argue that this standard answer rests on a misconception: These aspects do not belong in a theory of expression meaning, they are instead aspects of speaker meaning, i.e., communicative intentions in a particular context. In a slogan: words do not refer, speakers do. Clearing this up enables us to argue that distributional semantics on its own is an adequate model of expression meaning. Our proposal sheds light on the role of distributional semantics in a broader theory of language and cognition, its relationship to formal semantics, and its place in computational models. W19-0410 @@ -556,7 +556,7 @@ Ambiguity in Explicit Discourse Connectives - BonnieWebber + BonnieWebber RashmiPrasad AlanLee 134–141 @@ -569,7 +569,7 @@ Aligning Open <fixed-case>IE</fixed-case> Relations and <fixed-case>KB</fixed-case> Relations using a <fixed-case>S</fixed-case>iamese Network Based on Word Embedding Rifki AfinaPutri GiwonHong - Sung-HyonMyaeng + Sung-HyonMyaeng 142–153 Open Information Extraction (Open IE) aims at generating entity-relation-entity triples from a large amount of text, aiming at capturing key semantics of the text. Given a triple, the relation expresses the type of semantic relation between the entities. Although relations from an Open IE system are more extensible than those used in a traditional Information Extraction system and a Knowledge Base (KB) such as Knowledge Graphs, the former lacks in semantics; an Open IE relation is simply a sequence of words, whereas a KB relation has a predefined meaning. As a way to provide a meaning to an Open IE relation, we attempt to align it with one of the predefined set of relations used in a KB. Our approach is to use a Siamese network that compares two sequences of word embeddings representing an Open IE relation and a predefined KB relation. In order to make the approach practical, we automatically generate a training dataset using a distant supervision approach instead of relying on a hand-labeled dataset. Our experiment shows that the proposed method can capture the relational semantics better than the recent approaches. W19-0412 @@ -581,8 +581,8 @@ Md ShadAkhtar AbhishekKumar AsifEkbal - ChrisBiemann - PushpakBhattacharyya + ChrisBiemann + PushpakBhattacharyya 154–164 In this paper, we propose a language-agnostic deep neural network architecture for aspect-based sentiment analysis. The proposed approach is based on Bidirectional Long Short-Term Memory (Bi-LSTM) network, which is further assisted with extra hand-crafted features. We define three different architectures for the successful combination of word embeddings and hand-crafted features. We evaluate the proposed approach for six languages (i.e. English, Spanish, French, Dutch, German and Hindi) and two problems (i.e. aspect term extraction and aspect sentiment classification). Experiments show that the proposed model attains state-of-the-art performance in most of the settings. W19-0413 @@ -623,7 +623,7 @@ Cross-Lingual Transfer of Semantic Roles: From Raw Text to Semantic Roles MaryamAminian Mohammad SadeghRasooli - MonaDiab + MonaDiab 200–210 We describe a transfer method based on annotation projection to develop a dependency-based semantic role labeling system for languages for which no supervised linguistic information other than parallel data is available. Unlike previous work that presumes the availability of supervised features such as lemmas, part-of-speech tags, and dependency parse trees, we only make use of word and character features. Our deep model considers using character-based representations as well as unsupervised stem embeddings to alleviate the need for supervised features. Our experiments outperform a state-of-the-art method that uses supervised lexico-syntactic features on 6 out of 7 languages in the Universal Proposition Bank. W19-0417 @@ -634,8 +634,8 @@ Evaluating the Representational Hub of Language and Vision Models RaviShekhar EceTakmaz - RaquelFernández - RaffaellaBernardi + RaquelFernández + RaffaellaBernardi 211–222 The multimodal models used in the emerging field at the intersection of computational linguistics and computer vision implement the bottom-up processing of the “Hub and Spoke” architecture proposed in cognitive science to represent how the brain processes and combines multi-sensory inputs. In particular, the Hub is implemented as a neural network encoder. We investigate the effect on this encoder of various vision-and-language tasks proposed in the literature: visual question answering, visual reference resolution, and visually grounded dialogue. To measure the quality of the representations learned by the encoder, we use two kinds of analyses. First, we evaluate the encoder pre-trained on the different vision-and-language tasks on an existing “diagnostic task” designed to assess multimodal semantic understanding. Second, we carry out a battery of analyses aimed at studying how the encoder merges and exploits the two modalities. W19-0418 @@ -647,7 +647,7 @@ RezkaLeonandya DieuwkeHupkes EliaBruni - GermánKruszewski + GermánKruszewski 223–234 Learning to follow human instructions is a long-pursued goal in artificial intelligence. The task becomes particularly challenging if no prior knowledge of the employed language is assumed while relying only on a handful of examples to learn from. Work in the past has relied on hand-coded components or manually engineered features to provide strong inductive biases that make learning in such situations possible. In contrast, here we seek to establish whether this knowledge can be acquired automatically by a neural network system through a two phase training procedure: A (slow) offline learning stage where the network learns about the general structure of the task and a (fast) online adaptation phase where the network learns the language of a new given speaker. Controlled experiments show that when the network is exposed to familiar instructions but containing novel words, the model adapts very efficiently to the new vocabulary. Moreover, even for human speakers whose language usage can depart significantly from our artificial training language, our network can still make use of its automatically acquired inductive bias to learn to follow instructions more effectively. W19-0419 @@ -680,8 +680,8 @@ Using <fixed-case>W</fixed-case>iktionary as a resource for <fixed-case>WSD</fixed-case> : the case of <fixed-case>F</fixed-case>rench verbs VincentSegonne - MarieCandito - BenoîtCrabbé + MarieCandito + BenoîtCrabbé 259–270 As opposed to word sense induction, word sense disambiguation (WSD) has the advantage of us-ing interpretable senses, but requires annotated data, which are quite rare for most languages except English (Miller et al. 1993; Fellbaum, 1998). In this paper, we investigate which strategy to adopt to achieve WSD for languages lacking data that was annotated specifically for the task, focusing on the particular case of verb disambiguation in French. We first study the usability of Eurosense (Bovi et al. 2017) , a multilingual corpus extracted from Europarl (Kohen, 2005) and automatically annotated with BabelNet (Navigli and Ponzetto, 2010) senses. Such a resource opened up the way to supervised and semi-supervised WSD for resourceless languages like French. While this perspective looked promising, our evaluation on French verbs was inconclusive and showed the annotated senses’ quality was not sufficient for supervised WSD on French verbs. Instead, we propose to use Wiktionary, a collaboratively edited, multilingual online dictionary, as a resource for WSD. Wiktionary provides both sense inventory and manually sense tagged examples which can be used to train supervised and semi-supervised WSD systems. Yet, because senses’ distribution differ in lexicographic examples found in Wiktionary with respect to natural text, we then focus on studying the impact on WSD of the training data size and senses’ distribution. Using state-of-the art semi-supervised systems, we report experiments of Wiktionary-based WSD for French verbs, evaluated on FrenchSemEval (FSE), a new dataset of French verbs manually annotated with wiktionary senses. W19-0422 @@ -690,7 +690,7 @@ A Comparison of Context-sensitive Models for Lexical Substitution - AinaGarí Soler + AinaGarí Soler AnneCocos MariannaApidianaki ChrisCallison-Burch @@ -712,7 +712,7 @@ Frame Identification as Categorization: Exemplars vs Prototypes in Embeddingland JenniferSikos - SebastianPadó + SebastianPadó 295–306 Categorization is a central capability of human cognition, and a number of theories have been developed to account for properties of categorization. Even though many tasks in semantics also involve categorization of some kind, theories of categorization do not play a major role in contemporary research in computational linguistics. This paper follows the idea that embedding-based models of semantics lend themselves well to being formulated in terms of classical categorization theories. The benefit is a space of model families that enables (a) the formulation of hypotheses about the impact of major design decisions, and (b) a transparent assessment of these decisions. We instantiate this idea on the task of frame-semantic frame identification. We define four models that cross two design variables: (a) the choice of prototype vs. exemplar categorization, corresponding to different degrees of generalization applied to the input; and (b) the presence vs. absence of a fine-tuning step, corresponding to generic vs. task-adaptive categorization. We find that for frame identification, generalization and task-adaptive categorization both yield substantial benefits. Our prototype-based, fine-tuned model, which combines the best choices for these variables, establishes a new state of the art in frame identification. W19-0425 @@ -761,7 +761,7 @@ Distributional Semantics in the Real World: Building Word Vector Representations from a Truth-Theoretic Model ElizavetaKuzmenko - AurélieHerbelot + AurélieHerbelot 16–23 Distributional semantics models (DSMs) are known to produce excellent representations of word meaning, which correlate with a range of behavioural data. As lexical representations, they have been said to be fundamentally different from truth-theoretic models of semantics, where meaning is defined as a correspondence relation to the world. There are two main aspects to this difference: a) DSMs are built over corpus data which may or may not reflect ‘what is in the world’; b) they are built from word co-occurrences, that is, from lexical types rather than entities and sets. In this paper, we inspect the properties of a distributional model built over a set-theoretic approximation of ‘the real world’. To achieve this, we take the annotation a large database of images marked with objects, attributes and relations, convert the data into a representation akin to first-order logic and build several distributional models using various combinations of features. We evaluate those models over both relatedness and similarity datasets, demonstrating their effectiveness in standard evaluations. This allows us to conclude that, despite prior claims, truth-theoretic models are good candidates for building graded lexical representations of meaning. W19-0503 @@ -793,7 +793,7 @@ Distributional Interaction of Concreteness and Abstractness in Verb–Noun Subcategorisation DiegoFrassinelli - SabineSchulte im Walde + SabineSchulte im Walde 38–43 In recent years, both cognitive and computational research has provided empirical analyses of contextual co-occurrence of concrete and abstract words, partially resulting in inconsistent pictures. In this work we provide a more fine-grained description of the distributional nature in the corpus-based interaction of verbs and nouns within subcategorisation, by investigating the concreteness of verbs and nouns that are in a specific syntactic relationship with each other, i.e., subject, direct object, and prepositional object. Overall, our experiments show consistent patterns in the distributional representation of subcategorising and subcategorised concrete and abstract words. At the same time, the studies reveal empirical evidence why contextual abstractness represents a valuable indicator for automatic non-literal language identification. W19-0506 @@ -803,7 +803,7 @@ Generating a Novel Dataset of Multimodal Referring Expressions NikhilKrishnaswamy - JamesPustejovsky + JamesPustejovsky 44–51 Referring expressions and definite descriptions of objects in space exploit information both about object characteristics and locations. To resolve potential ambiguity, referencing strategies in language can rely on increasingly abstract concepts to distinguish an object in a given location from similar ones elsewhere, yet the description of the intended location may still be imprecise or difficult to interpret. Meanwhile, modalities such as gesture may communicate spatial information such as locations in a more concise manner. In real peer-to-peer communication, humans use language and gesture together to reference entities, with a capacity for mixing and changing modalities where needed. While recent progress in AI and human-computer interaction has created systems where a human can interact with a computer multimodally, computers often lack the capacity to intelligently mix modalities when generating referring expressions. We present a novel dataset of referring expressions combining natural language and gesture, describe its creation and evaluation, and its uses to train computational models for generating and interpreting multimodal referring expressions. W19-0507 @@ -864,7 +864,7 @@ A Dynamic Semantics for Causal Counterfactuals KennethLai - JamesPustejovsky + JamesPustejovsky 1–8 Under the standard approach to counterfactuals, to determine the meaning of a counterfactual sentence, we consider the “closest” possible world(s) where the antecedent is true, and evaluate the consequent. Building on the standard approach, some researchers have found that the set of worlds to be considered is dependent on context; it evolves with the discourse. Others have focused on how to define the “distance” between possible worlds, using ideas from causal modeling. This paper integrates the two ideas. We present a semantics for counterfactuals that uses a distance measure based on causal laws, that can also change over time. We show how our semantics can be implemented in the Haskell programming language. W19-0601 @@ -914,7 +914,7 @@ Semantic Frame Embeddings for Detecting Relations between Software Requirements WaadAlhoshan - RizaBatista-Navarro + RizaBatista-Navarro LipingZhao 44–51 The early phases of requirements engineering (RE) deal with a vast amount of software requirements (i.e., requirements that define characteristics of software systems), which are typically expressed in natural language. Analysing such unstructured requirements, usually obtained from users’ inputs, is considered a challenging task due to the inherent ambiguity and inconsistency of natural language. To support such a task, methods based on natural language processing (NLP) can be employed. One of the more recent advances in NLP is the use of word embeddings for capturing contextual information, which can then be applied in word analogy tasks. In this paper, we describe a new resource, i.e., embedding-based representations of semantic frames in FrameNet, which was developed to support the detection of relations between software requirements. Our embeddings, which encapsulate contextual information at the semantic frame level, were trained on a large corpus of requirements (i.e., a collection of more than three million mobile application reviews). The similarity between these frame embeddings is then used as a basis for detecting semantic relatedness between software requirements. Compared with existing resources underpinned by word-level embeddings alone, and frame embeddings built upon pre-trained vectors, our proposed frame embeddings obtained better performance against judgements of an RE expert. These encouraging results demonstrate the strong potential of the resource in supporting RE analysis tasks (e.g., traceability), which we plan to investigate as part of our future work. @@ -955,7 +955,7 @@ Assessing the Difficulty of Classifying <fixed-case>C</fixed-case>oncept<fixed-case>N</fixed-case>et Relations in a Multi-Label Classification Setting MariaBecker MichaelStaniek - ViviNastase + ViviNastase AnetteFrank Commonsense knowledge relations are crucial for advanced NLU tasks. We examine the learnability of such relations as represented in ConceptNet, taking into account their specific properties, which can make relation classification difficult: a given concept pair can be linked by multiple relation types, and relations can have multi-word arguments of diverse semantic types. We explore a neural open world multi-label classification approach that focuses on the evaluation of classification accuracy for individual relations. Based on an in-depth study of the specific properties of the ConceptNet resource, we investigate the impact of different relation representations and model variations. Our analysis reveals that the complexity of argument types and relation ambiguity are the most important challenges to address. We design a customized evaluation method to address the incompleteness of the resource that can be expanded in future work. W19-0801 @@ -985,7 +985,7 @@ Semantic Matching of Documents from Heterogeneous Collections: A Simple and Transparent Method for Practical Applications - Mark-ChristophMueller + Mark-ChristophMueller We present a very simple, unsupervised method for the pairwise matching of documents from heterogeneous collections. We demonstrate our method with the Concept-Project matching task, which is a binary classification task involving pairs of documents from heterogeneous collections. Although our method only employs standard resources without any domain- or task-specific modifications, it clearly outperforms the more complex system of the original authors. In addition, our method is transparent, because it provides explicit information about how a similarity score was computed, and efficient, because it is based on the aggregation of (pre-computable) word-level similarities. W19-0804 10.18653/v1/W19-0804 @@ -996,12 +996,12 @@ Proceedings of the IWCS Workshop Vector Semantics for Discourse and Dialogue W19-09 - MehrnooshSadrzadeh + MehrnooshSadrzadeh MatthewPurver ArashEshghi JulianHough RuthKempson - Patrick G. T.Healey + Patrick G. T.Healey Association for Computational Linguistics
Gothenburg, Sweden
May @@ -1108,8 +1108,8 @@ Proceedings of the Sixth Workshop on Natural Language and Computer Science W19-11 RobinCooper - Valeriade Paiva - Lawrence S.Moss + Valeriade Paiva + Lawrence S.Moss Association for Computational Linguistics
Gothenburg, Sweden
May @@ -1136,7 +1136,7 @@ Towards Natural Language Story Understanding with Rich Logical Schemas LaneLawley Gene LouisKim - LenhartSchubert + LenhartSchubert 11–22 Generating “commonsense’’ knowledge for intelligent understanding and reasoning is a difficult, long-standing problem, whose scale challenges the capacity of any approach driven primarily by human input. Furthermore, approaches based on mining statistically repetitive patterns fail to produce the rich representations humans acquire, and fall far short of human efficiency in inducing knowledge from text. The idea of our approach to this problem is to provide a learning system with a “head start” consisting of a semantic parser, some basic ontological knowledge, and most importantly, a small set of very general schemas about the kinds of patterns of events (often purposive, causal, or socially conventional) that even a one- or two-year-old could reasonably be presumed to possess. We match these initial schemas to simple children’s stories, obtaining concrete instances, and combining and abstracting these into new candidate schemas. Both the initial and generated schemas are specified using a rich, expressive logical form. While modern approaches to schema reasoning often only use slot-and-filler structures, this logical form allows us to specify complex relations and constraints over the slots. Though formal, the representations are language-like, and as such readily relatable to NL text. The agents, objects, and other roles in the schemas are represented by typed variables, and the event variables can be related through partial temporal ordering and causal relations. To match natural language stories with existing schemas, we first parse the stories into an underspecified variant of the logical form used by the schemas, which is suitable for most concrete stories. We include a walkthrough of matching a children’s story to these schemas and generating inferences from these matches. W19-1102 @@ -1204,7 +1204,7 @@ Discourse Representation Structure Parsing with Recurrent Neural Networks and the Transformer Model JiangmingLiu - Shay B.Cohen + Shay B.Cohen MirellaLapata We describe the systems we developed for Discourse Representation Structure (DRS) parsing as part of the IWCS-2019 Shared Task of DRS Parsing.1 Our systems are based on sequence-to-sequence modeling. To implement our model, we use the open-source neural machine translation system implemented in PyTorch, OpenNMT-py. We experimented with a variety of encoder-decoder models based on recurrent neural networks and the Transformer model. We conduct experiments on the standard benchmark of the Parallel Meaning Bank (PMB 2.2). Our best system achieves a score of 84.8% F1 in the DRS parsing shared task. W19-1203 @@ -1224,11 +1224,11 @@ Proceedings of the Tenth Workshop on Computational Approaches to Subjectivity, Sentiment and Social Media Analysis W19-13 - AlexandraBalahur + AlexandraBalahur RomanKlinger - VeroniqueHoste + VeroniqueHoste CarloStrapparava - OrpheeDe Clercq + OrpheeDe Clercq Association for Computational Linguistics
Minneapolis, USA
June @@ -1256,7 +1256,7 @@ DaYin XiaoLiu XiuyuWu - BaobaoChang + BaobaoChang 6–15 In this paper, we propose a soft label approach to target-level sentiment classification task, in which a history-based soft labeling model is proposed to measure the possibility of a context word as an opinion word. We also apply a convolution layer to extract local active features, and introduce positional weights to take relative distance information into consideration. In addition, we obtain more informative target representation by training with context tokens together to make deeper interaction between target and context tokens. We conduct experiments on SemEval 2014 datasets and the experimental results show that our approach significantly outperforms previous models and gives state-of-the-art results on these datasets. W19-1302 @@ -1276,7 +1276,7 @@
Exploring Fine-Tuned Embeddings that Model Intensifiers for Emotion Analysis - Laura Ana MariaBostan + Laura Ana MariaBostan RomanKlinger 25–34 Adjective phrases like “a little bit surprised”, “completely shocked”, or “not stunned at all” are not handled properly by current state-of-the-art emotion classification and intensity prediction systems. Based on this finding, we analyze differences between embeddings used by these systems in regard to their capability of handling such cases and argue that intensifiers in context of emotion words need special treatment, as is established for sentiment polarity classification, but not for more fine-grained emotion prediction. To resolve this issue, we analyze different aspects of a post-processing pipeline which enriches the word representations of such phrases. This includes expansion of semantic spaces at the phrase level and sub-word level followed by retrofitting to emotion lexicons. We evaluate the impact of these steps with ‘A La Carte and Bag-of-Substrings extensions based on pretrained GloVe,Word2vec, and fastText embeddings against a crowd-sourced corpus of intensity annotations for tweets containing our focus phrases. We show that the fastText-based models do not gain from handling these specific phrases under inspection. For Word2vec embeddings, we show that our post-processing pipeline improves the results by up to 8% on a novel dataset densly populated with intensifiers while it does not decrease the performance on the established EmoInt dataset. @@ -1321,10 +1321,10 @@ How do we feel when a robot dies? Emotions expressed on <fixed-case>T</fixed-case>witter before and after hitch<fixed-case>BOT</fixed-case>’s destruction - Kathleen C.Fraser + Kathleen C.Fraser FraukeZeller David HarrisSmith - SaifMohammad + SaifMohammad FrankRudzicz 62–71 In 2014, a chatty but immobile robot called hitchBOT set out to hitchhike across Canada. It similarly made its way across Germany and the Netherlands, and had begun a trip across the USA when it was destroyed by vandals. In this work, we analyze the emotions and sentiments associated with words in tweets posted before and after hitchBOT’s destruction to answer two questions: Were there any differences in the emotions expressed across the different countries visited by hitchBOT? And how did the public react to the demise of hitchBOT? Our analyses indicate that while there were few cross-cultural differences in sentiment towards hitchBOT, there was a significant negative emotional reaction to its destruction, suggesting that people had formed an emotional connection with hitchBOT and perceived its destruction as morally wrong. We discuss potential implications of anthropomorphism and emotional attachment to robots from the perspective of robot ethics. @@ -1338,7 +1338,7 @@ LakshyaKumar ArpanSomani AdityaJoshi - PushpakBhattacharyya + PushpakBhattacharyya 72–80 Research in sarcasm detection spans almost a decade. However a particular form of sarcasm remains unexplored: sarcasm expressed through numbers, which we estimate, forms about 11% of the sarcastic tweets in our dataset. The sentence ‘Love waking up at 3 am’ is sarcastic because of the number. In this paper, we focus on detecting sarcasm in tweets arising out of numbers. Initially, to get an insight into the problem, we implement a rule-based and a statistical machine learning-based (ML) classifier. The rule-based classifier conveys the crux of the numerical sarcasm problem, namely, incongruity arising out of numbers. The statistical ML classifier uncovers the indicators i.e., features of such sarcasm. The actual system in place, however, are two deep learning (DL) models, CNN and attention network that obtains an F-score of 0.93 and 0.91 on our dataset of tweets containing numbers. To the best of our knowledge, this is the first line of research investigating the phenomenon of sarcasm arising out of numbers, culminating in a detector thereof. W19-1309 @@ -1372,10 +1372,10 @@ Proceedings of the Sixth Workshop on NLP for Similar Languages, Varieties and Dialects W19-14 MarcosZampieri - PreslavNakov - ShervinMalmasi + PreslavNakov + ShervinMalmasi NikolaLjubešić - JörgTiedemann + JörgTiedemann AhmedAli Association for Computational Linguistics
Ann Arbor, Michigan
@@ -1392,14 +1392,14 @@ MarcosZampieri ShervinMalmasi YvesScherrer - TanjaSamardžić - FrancisTyers - MiikkaSilfverberg - NataliaKlyueva + TanjaSamardžić + FrancisTyers + MiikkaSilfverberg + NataliaKlyueva Tung-LePan Chu-RenHuang Radu TudorIonescu - Andrei M.Butnaru + Andrei M.Butnaru TommiJauhiainen 1–16 In this paper, we present the findings of the Third VarDial Evaluation Campaign organized as part of the sixth edition of the workshop on Natural Language Processing (NLP) for Similar Languages, Varieties and Dialects (VarDial), co-located with NAACL 2019. This year, the campaign included five shared tasks, including one task re-run – German Dialect Identification (GDI) – and four new tasks – Cross-lingual Morphological Analysis (CMA), Discriminating between Mainland and Taiwan variation of Mandarin Chinese (DMT), Moldavian vs. Romanian Cross-dialect Topic identification (MRC), and Cuneiform Language Identification (CLI). A total of 22 teams submitted runs across the five shared tasks. After the end of the competition, we received 14 system description papers, which are published in the VarDial workshop proceedings and referred to in this report. @@ -1410,7 +1410,7 @@ Improving Cuneiform Language Identification with <fixed-case>BERT</fixed-case> GabrielBernier-Colborne - CyrilGoutte + CyrilGoutte SergeLéger 17–25 We describe the systems developed by the National Research Council Canada for the Cuneiform Language Identification (CLI) shared task at the 2019 VarDial evaluation campaign. We compare a state-of-the-art baseline relying on character n-grams and a traditional statistical classifier, a voting ensemble of classifiers, and a deep learning approach using a Transformer network. We describe how these systems were trained, and analyze the impact of some preprocessing and model estimation decisions. The deep neural network achieved 77% accuracy on the test data, which turned out to be the best performance at the CLI evaluation, establishing a new state-of-the-art for cuneiform language identification. @@ -1488,7 +1488,7 @@ TommiJauhiainen HeidiJauhiainen TeroAlstola - KristerLindén + KristerLindén 89–98 This article introduces a corpus of cuneiform texts from which the dataset for the use of the Cuneiform Language Identification (CLI) 2019 shared task was derived as well as some preliminary language identification experiments conducted using that corpus. We also describe the CLI dataset and how it was derived from the corpus. In addition, we provide some baseline language identification results using the CLI dataset. To the best of our knowledge, the experiments detailed here represent the first time that automatic language identification methods have been used on cuneiform data. W19-1409 @@ -1498,7 +1498,7 @@ Leveraging Pretrained Word Embeddings for Part-of-Speech Tagging of Code Switching Data FahadAlGhamdi - MonaDiab + MonaDiab 99–109 Linguistic Code Switching (CS) is a phenomenon that occurs when multilingual speakers alternate between two or more languages/dialects within a single conversation. Processing CS data is especially challenging in intra-sentential data given state-of-the-art monolingual NLP technologies since such technologies are geared toward the processing of one language at a time. In this paper, we address the problem of Part-of-Speech tagging (POS) in the context of linguistic code switching (CS). We explore leveraging multiple neural network architectures to measure the impact of different pre-trained embeddings methods on POS tagging CS data. We investigate the landscape in four CS language pairs, Spanish-English, Hindi-English, Modern Standard Arabic- Egyptian Arabic dialect (MSA-EGY), and Modern Standard Arabic- Levantine Arabic dialect (MSA-LEV). Our results show that multilingual embedding (e.g., MSA-EGY and MSA-LEV) helps closely related languages (EGY/LEV) but adds noise to the languages that are distant (SPA/HIN). Finally, we show that our proposed models outperform state-of-the-art CS taggers for MSA-EGY language pair. W19-1410 @@ -1526,7 +1526,7 @@ <fixed-case>BAM</fixed-case>: A combination of deep and shallow models for <fixed-case>G</fixed-case>erman Dialect Identification. - Andrei M.Butnaru + Andrei M.Butnaru 128–137 *This is a submission for the Third VarDial Evaluation Campaign* In this paper, we present a machine learning approach for the German Dialect Identification (GDI) Closed Shared Task of the DSL 2019 Challenge. The proposed approach combines deep and shallow models, by applying a voting scheme on the outputs resulted from a Character-level Convolutional Neural Networks (Char-CNN), a Long Short-Term Memory (LSTM) network, and a model based on String Kernels. The first model used is the Char-CNN model that merges multiple convolutions computed with kernels of different sizes. The second model is the LSTM network which applies a global max pooling over the returned sequences over time. Both models pass the activation maps to two fully-connected layers. The final model is based on String Kernels, computed on character p-grams extracted from speech transcripts. The model combines two blended kernel functions, one is the presence bits kernel, and the other is the intersection kernel. The empirical results obtained in the shared task prove that the approach can achieve good results. The system proposed in this paper obtained the fourth place with a macro-F1 score of 62.55% W19-1413 @@ -1592,7 +1592,7 @@ Discriminating between <fixed-case>M</fixed-case>andarin <fixed-case>C</fixed-case>hinese and <fixed-case>S</fixed-case>wiss-<fixed-case>G</fixed-case>erman varieties using adaptive language models TommiJauhiainen - KristerLindén + KristerLindén HeidiJauhiainen 178–187 This paper describes the language identification systems used by the SUKI team in the Discriminating between the Mainland and Taiwan variation of Mandarin Chinese (DMT) and the German Dialect Identification (GDI) shared tasks which were held as part of the third VarDial Evaluation Campaign. The DMT shared task included two separate tracks, one for the simplified Chinese script and one for the traditional Chinese script. We submitted three runs on both tracks of the DMT task as well as on the GDI task. We won the traditional Chinese track using Naive Bayes with language model adaptation, came second on GDI with an adaptive version of the HeLI 2.0 method, and third on the simplified Chinese track using again the adaptive Naive Bayes. @@ -1632,7 +1632,7 @@ Experiments in Cuneiform Language Identification - Gustavo HenriquePaetzold + Gustavo HenriquePaetzold MarcosZampieri 209–213 This paper presents methods to discriminate between languages and dialects written in Cuneiform script, one of the first writing systems in the world. We report the results obtained by the PZ team in the Cuneiform Language Identification (CLI) shared task organized within the scope of the VarDial Evaluation Campaign 2019. The task included two languages, Sumerian and Akkadian. The latter is divided into six dialects: Old Babylonian, Middle Babylonian peripheral, Standard Babylonian, Neo Babylonian, Late Babylonian, and Neo Assyrian. We approach the task using a meta-classifier trained on various SVM models and we show the effectiveness of the system for this task. Our submission achieved 0.738 F1 score in discriminating between the seven languages and dialects and it was ranked fourth in the competition among eight teams. @@ -1654,7 +1654,7 @@ Cross-lingual Annotation Projection Is Effective for Neural Part-of-Speech Tagging MatthiasHuck DianaDutka - AlexanderFraser + AlexanderFraser 223–233 We tackle the important task of part-of-speech tagging using a neural model in the zero-resource scenario, where we have no access to gold-standard POS training data. We compare this scenario with the low-resource scenario, where we have access to a small amount of gold-standard POS training data. Our experiments focus on Ukrainian as a representative of under-resourced languages. Russian is highly related to Ukrainian, so we exploit gold-standard Russian POS tags. We consider four techniques to perform Ukrainian POS tagging: zero-shot tagging and cross-lingual annotation projection (for the zero-resource scenario), and compare these with self-training and multilingual learning (for the low-resource scenario). We find that cross-lingual annotation projection works particularly well in the zero-resource scenario. W19-1425 @@ -1666,7 +1666,7 @@ Proceedings of the Third Workshop on Structured Prediction for NLP W19-15 - AndreMartins + AndreMartins AndreasVlachos ZornitsaKozareva SujithRavi @@ -1708,7 +1708,7 @@ <fixed-case>SPARSE</fixed-case>: Structured Prediction using Argument-Relative Structured Encoding RishiBommasani ArzooKatiyar - ClaireCardie + ClaireCardie 13–17 We propose structured encoding as a novel approach to learning representations for relations and events in neural structured prediction. Our approach explicitly leverages the structure of available relation and event metadata to generate these representations, which are parameterized by both the attribute structure of the metadata as well as the learned representation of the arguments of the relations and events. We consider affine, biaffine, and recurrent operators for building hierarchical representations and modelling underlying features. We apply our approach to the second-order structured prediction task studied in the 2016/2017 Belief and Sentiment analysis evaluations (BeSt): given a document and its entities, relations, and events (including metadata and mentions), determine the sentiment of each entity towards every relation and event in the document. Without task-specific knowledge sources or domain engineering, we significantly improve over systems and baselines that neglect the available metadata or its hierarchical structure. We observe across-the-board improvements on the BeSt 2016/2017 sentiment analysis task of at least 2.3 (absolute) and 10.6% (relative) F-measure over the previous state-of-the-art. W19-1503 @@ -1721,7 +1721,7 @@ Lightly-supervised Representation Learning with Global Interpretability AndrewZupon MariaAlexeeva - Marco A.Valenzuela-Escárcega + Marco A.Valenzuela-Escárcega AjayNagesh MihaiSurdeanu 18–28 @@ -1764,7 +1764,7 @@ Corpus of Multimodal Interaction for Collaborative Planning Miltiadis MariosKatsakioris - HelenHastie + HelenHastie IoannisKonstas AtanasLaskov 1–6 @@ -1837,7 +1837,7 @@ <fixed-case>S</fixed-case>patial<fixed-case>N</fixed-case>et: A Declarative Resource for Spatial Relations MorganUlinski BobCoyne - JuliaHirschberg + JuliaHirschberg 61–70 This paper introduces SpatialNet, a novel resource which links linguistic expressions to actual spatial configurations. SpatialNet is based on FrameNet (Ruppenhofer et al., 2016) and VigNet (Coyne et al., 2011), two resources which use frame semantics to encode lexical meaning. SpatialNet uses a deep semantic representation of spatial relations to provide a formal description of how a language expresses spatial information. This formal representation of the lexical semantics of spatial language also provides a consistent way to represent spatial meaning across multiple languages. In this paper, we describe the structure of SpatialNet, with examples from English and German. We also show how SpatialNet can be combined with other existing NLP tools to create a text-to-scene system for a language. W19-1607 @@ -1862,7 +1862,7 @@ W19-17 HeidiChristensenUniversity of Sheffield KristyHollingsheadFlorida Institute for Human and Machine Cognition - EmilyPrud’hommeauxBoston College + EmilyPrud’hommeauxBoston College FrankRudziczUniversity of Toronto KeithVertanenMichigan Technological University Association for Computational Linguistics @@ -1915,7 +1915,7 @@ Speech-based Estimation of Bulbar Regression in Amyotrophic Lateral Sclerosis AlanWisler KristinTeplansky - JordanGreen + JordanGreen YanaYunusova ThomasCampbell DaraghHeitzman @@ -1929,7 +1929,7 @@ A Blissymbolics Translation System UsmanSohail - DavidTraum + DavidTraum 32–36 Blissymbolics (Bliss) is a pictographic writing system that is used by people with communication disorders. Bliss attempts to create a writing system that makes words easier to distinguish by using pictographic symbols that encapsulate meaning rather than sound, as the English alphabet does for example. Users of Bliss rely on human interpreters to use Bliss. We created a translation system from Bliss to natural English with the hopes of decreasing the reliance on human interpreters by the Bliss community. We first discuss the basic rules of Blissymbolics. Then we point out some of the challenges associated with developing computer assisted tools for Blissymbolics. Next we talk about our ongoing work in developing a translation system, including current limitations, and future work. We conclude with a set of examples showing the current capabilities of our translation system. W19-1705 @@ -1952,7 +1952,7 @@ Noisy Neural Language Modeling for Typing Prediction in <fixed-case>BCI</fixed-case> Communication RuiDong - DavidSmith + DavidSmith ShiranDudy StevenBedrick 44–51 @@ -1966,8 +1966,8 @@ Proceedings of the Second Workshop on Shortcomings in Vision and Language W19-18 - RaffaellaBernardi - RaquelFernandez + RaffaellaBernardi + RaquelFernandez SpandanaGella KushalKafle ChristopherKanan @@ -2063,7 +2063,7 @@ Grounded Word Sense Translation ChiraagLala - PranavaMadhyastha + PranavaMadhyastha LuciaSpecia 78–85 Recent work on visually grounded language learning has focused on broader applications of grounded representations, such as visual question answering and multimodal machine translation. In this paper we consider grounded word sense translation, i.e. the task of correctly translating an ambiguous source word given the corresponding textual and visual context. Our main objective is to investigate the extent to which images help improve word-level (lexical) translation quality. We do so by first studying the dataset for this task to understand the scope and challenges of the task. We then explore different data settings, image features, and ways of grounding to investigate the gain from using images in each of the combinations. We find that grounding on the image is specially beneficial in weaker unidirectional recurrent translation models. We observe that adding structured image information leads to stronger gains in lexical translation accuracy. @@ -2107,7 +2107,7 @@ An Analysis of Attention over Clinical Notes for Predictive Tasks SarthakJain RaminMohammadi - Byron C.Wallace + Byron C.Wallace 15–21 The shift to electronic medical records (EMRs) has engendered research into machine learning and natural language technologies to analyze patient records, and to predict from these clinical outcomes of interest. Two observations motivate our aims here. First, unstructured notes contained within EMR often contain key information, and hence should be exploited by models. Second, while strong predictive performance is important, interpretability of models is perhaps equally so for applications in this domain. Together, these points suggest that neural models for EMR may benefit from incorporation of attention over notes, which one may hope will both yield performance gains and afford transparency in predictions. In this work we perform experiments to explore this question using two EMR corpora and four different predictive tasks, that: (i) inclusion of attention mechanisms is critical for neural encoder modules that operate over notes fields in order to yield competitive performance, but, (ii) unfortunately, while these boost predictive performance, it is decidedly less clear whether they provide meaningful support for predictions. W19-1902 @@ -2116,7 +2116,7 @@ Extracting Adverse Drug Event Information with Minimal Engineering - TimothyMiller + TimothyMiller AlonGeva DmitriyDligach 22–27 @@ -2128,7 +2128,7 @@ Hierarchical Nested Named Entity Recognition ZitaMarinho - AfonsoMendes + AfonsoMendes SebastiãoMiranda DavidNogueira 28–34 @@ -2164,7 +2164,7 @@ Study of lexical aspect in the <fixed-case>F</fixed-case>rench medical language. Development of a lexical resource AgathePierson - CédrickFairon + CédrickFairon 55–64 This paper details the development of a linguistic resource designed to improve temporal information extraction systems and to integrate aspectual values. After a brief review of recent works in temporal information extraction for the medical area, we discuss the linguistic notion of aspect and how it got a place in the NLP field. Then, we present our clinical data and describe the five-step approach adopted in this study. Finally, we represent the linguistic resource itself and explain how we elaborated it and which properties were selected for the creation of the tables. W19-1907 @@ -2174,7 +2174,7 @@ A <fixed-case>BERT</fixed-case>-based Universal Model for Both Within- and Cross-sentence Clinical Temporal Relation Extraction ChenLin - TimothyMiller + TimothyMiller DmitriyDligach StevenBethard GuerganaSavova @@ -2204,7 +2204,7 @@ AlejandroPiad-Morffis YoanGuitérrez SuilanEstevez-Velarde - RafaelMuñoz + RafaelMuñoz 79–88 Knowledge discovery from text in natural language is a task usually aided by the manual construction of annotated corpora. Specifically in the clinical domain, several annotation models are used depending on the characteristics of the task to solve (e.g., named entity recognition, relation extraction, etc.). However, few general-purpose annotation models exist, that can support a broad range of knowledge extraction tasks. This paper presents an annotation model designed to capture a large portion of the semantics of natural language text. The structure of the annotation model is presented, with examples of annotated sentences and a brief description of each semantic role and relation defined. This research focuses on an application to clinical texts in the Spanish language. Nevertheless, the presented annotation model is extensible to other domains and languages. An example of annotated sentences, guidelines, and suitable configuration files for an annotation tool are also provided for the research community. W19-1910 @@ -2264,7 +2264,7 @@ EbenHolderness PhilipCawkwell KirstenBolton - JamesPustejovsky + JamesPustejovsky Mei-HuaHall 117–123 Recently natural language processing (NLP) tools have been developed to identify and extract salient risk indicators in electronic health records (EHRs). Sentiment analysis, although widely used in non-medical areas for improving decision making, has been studied minimally in the clinical setting. In this study, we undertook, to our knowledge, the first domain adaptation of sentiment analysis to psychiatric EHRs by defining psychiatric clinical sentiment, performing an annotation project, and evaluating multiple sentence-level sentiment machine learning (ML) models. Results indicate that off-the-shelf sentiment analysis tools fail in identifying clinically positive or negative polarity, and that the definition of clinical sentiment that we provide is learnable with relatively small amounts of training data. This project is an initial step towards further refining sentiment analysis methods for clinical use. Our long-term objective is to incorporate the results of this project as part of a machine learning model that predicts inpatient readmission risk. We hope that this work will initiate a discussion concerning domain adaptation of sentiment analysis to the clinical setting. @@ -2276,8 +2276,8 @@ Medical Word Embeddings for <fixed-case>S</fixed-case>panish: Development and Evaluation FelipeSoares MartaVillegas - AitorGonzalez-Agirre - MartinKrallinger + AitorGonzalez-Agirre + MartinKrallinger JordiArmengol-Estapé 124–133 Word embeddings are representations of words in a dense vector space. Although they are not recent phenomena in Natural Language Processing (NLP), they have gained momentum after the recent developments of neural methods and Word2Vec. Regarding their applications in medical and clinical NLP, they are invaluable resources when training in-domain named entity recognition systems, classifiers or taggers, for instance. Thus, the development of tailored word embeddings for medical NLP is of great interest. However, we identified a gap in the literature which we aim to fill in this paper: the availability of embeddings for medical NLP in Spanish, as well as a standardized form of intrinsic evaluation. Since most work has been done for English, some established datasets for intrinsic evaluation are already available. In this paper, we show the steps we employed to adapt such datasets for the first time to Spanish, of particular relevance due to the considerable volume of EHRs in this language, as well as the creation of in-domain medical word embeddings for the Spanish using the state-of-the-art FastText model. We performed intrinsic evaluation with our adapted datasets, as well as extrinsic evaluation with a named entity recognition systems using a baseline embedding of general-domain. Both experiments proved that our embeddings are suitable for use in medical NLP in the Spanish language, and are more accurate than general-domain ones. @@ -2355,7 +2355,7 @@ DenisNewman-Griffis AparajitaHaldar HakanFerhatosmanoglu - EricFosler-Lussier + EricFosler-Lussier 8–17 Analysis of word embedding properties to inform their use in downstream NLP tasks has largely been studied by assessing nearest neighbors. However, geometric properties of the continuous feature space contribute directly to the use of embedding features in downstream models, and are largely unexplored. We consider four properties of word embedding geometry, namely: position relative to the origin, distribution of features in the vector space, global pairwise distances, and local pairwise distances. We define a sequence of transformations to generate new embeddings that expose subsets of these properties to downstream models and evaluate change in task performance to understand the contribution of each property to NLP models. We transform publicly available pretrained embeddings from three popular toolkits (word2vec, GloVe, and FastText) and evaluate on a variety of intrinsic tasks, which model linguistic information in the vector space, and extrinsic tasks, which use vectors as input to machine learning models. We find that intrinsic evaluations are highly sensitive to absolute position, while extrinsic tasks rely primarily on local similarity. Our findings suggest that future embedding models and post-processing techniques should focus primarily on similarity to nearby points in vector space. W19-2002 @@ -2378,7 +2378,7 @@ How Well Do Embedding Models Capture Non-compositionality? A View from Multiword Expressions NavnitaNandakumar - TimothyBaldwin + TimothyBaldwin BaharSalehi 27–34 In this paper, we apply various embedding methods on multiword expressions to study how well they capture the nuances of non-compositional data. Our results from a pool of word-, character-, and document-level embbedings suggest that Word2vec performs the best, followed by FastText and Infersent. Moreover, we find that recently-proposed contextualised embedding models such as Bert and ELMo are not adept at handling non-compositionality in multiword expressions. @@ -2388,7 +2388,7 @@ Measuring Semantic Abstraction of Multilingual <fixed-case>NMT</fixed-case> with Paraphrase Recognition and Generation Tasks - JörgTiedemann + JörgTiedemann YvesScherrer 35–42 In this paper, we investigate whether multilingual neural translation models learn stronger semantic abstractions of sentences than bilingual ones. We test this hypotheses by measuring the perplexity of such models when applied to paraphrases of the source language. The intuition is that an encoder produces better representations if a decoder is capable of recognizing synonymous sentences in the same language even though the model is never trained for that task. In our setup, we add 16 different auxiliary languages to a bidirectional bilingual baseline model (English-French) and test it with in-domain and out-of-domain paraphrases in English. The results show that the perplexity is significantly reduced in each of the cases, indicating that meaning can be grounded in translation. This is further supported by a study on paraphrase generation that we also include at the end of the paper. @@ -2460,7 +2460,7 @@ Probing Biomedical Embeddings from Language Models QiaoJin BhuwanDhingra - WilliamCohen + WilliamCohen XinghuaLu 82–89 Contextualized word embeddings derived from pre-trained language models (LMs) show significant improvements on downstream NLP tasks. Pre-training on domain-specific corpora, such as biomedical articles, further improves their performance. In this paper, we conduct probing experiments to determine what additional information is carried intrinsically by the in-domain trained contextualized embeddings. For this we use the pre-trained LMs as fixed feature extractors and restrict the downstream task models to not have additional sequence modeling layers. We compare BERT (Devlin et al. 2018), ELMo (Peters et al., 2018), BioBERT (Lee et al., 2019) and BioELMo, a biomedical version of ELMo trained on 10M PubMed abstracts. Surprisingly, while fine-tuned BioBERT is better than BioELMo in biomedical NER and NLI tasks, as a fixed feature extractor BioELMo outperforms BioBERT in our probing tasks. We use visualization and nearest neighbor analysis to show that better encoding of entity-type and relational information leads to this superiority. @@ -2538,7 +2538,7 @@ VeronicaLynn SalvatoreGiorgi NiranjanBalasubramanian - H. AndrewSchwartz + H. AndrewSchwartz 18–28 NLP naturally puts a primary focus on leveraging document language, occasionally considering user attributes as supplemental. However, as we tackle more social scientific tasks, it is possible user attributes might be of primary importance and the document supplemental. Here, we systematically investigate the predictive power of user-level features alone versus document-level features for document-level tasks. We first show user attributes can sometimes carry more task-related information than the document itself. For example, a tweet-level stance detection model using only 13 user-level attributes (i.e. features that did not depend on the specific tweet) was able to obtain a higher F1 than the top-performing SemEval participant. We then consider multiple tasks and a wider range of user attributes, showing the performance of strong document-only models can often be improved (as in stance, sentiment, and sarcasm) with user attributes, particularly benefiting tasks with stable “trait-like” outcomes (e.g. stance) most relative to frequently changing “state-like” outcomes (e.g. sentiment). These results not only support the growing work on integrating user factors into predictive systems, but that some of our NLP tasks might be better cast primarily as user-level (or human) tasks. W19-2103 @@ -2585,7 +2585,7 @@ Using time series and natural language processing to identify viral moments in the 2016 <fixed-case>U</fixed-case>.<fixed-case>S</fixed-case>. Presidential Debate JosephineLukito - PrathushaK Sarma + PrathushaK Sarma JordanFoley AmanAbhishek 54–64 @@ -2597,7 +2597,7 @@ Stance Classification, Outcome Prediction, and Impact Assessment: <fixed-case>NLP</fixed-case> Tasks for Studying Group Decision-Making ElijahMayfield - AlanBlack + AlanBlack 65–77 In group decision-making, the nuanced process of conflict and resolution that leads to consensus formation is closely tied to the quality of decisions made. Behavioral scientists rarely have rich access to process variables, though, as unstructured discussion transcripts are difficult to analyze. Here, we define ways for NLP researchers to contribute to the study of groups and teams. We introduce three tasks alongside a large new corpus of over 400,000 group debates on Wikipedia. We describe the tasks and their importance, then provide baselines showing that BERT contextualized word embeddings consistently outperform other language representations. W19-2108 @@ -2631,7 +2631,7 @@ Simple dynamic word embeddings for mapping perceptions in the public sphere NabeelGillani - RogerLevy + RogerLevy 94–99 Word embeddings trained on large-scale historical corpora can illuminate human biases and stereotypes that perpetuate social inequalities. These embeddings are often trained in separate vector space models defined according to different attributes of interest. In this paper, we introduce a single, unified dynamic embedding model that learns attribute-specific word embeddings and apply it to a novel dataset—talk radio shows from around the US—to analyze perceptions about refugees. We validate our model on a benchmark dataset and apply it to two corpora of talk radio shows averaging 117 million words produced over one month across 83 stations and 64 cities. Our findings suggest that dynamic word embeddings are capable of identifying nuanced differences in public discourse about contentious topics, suggesting their usefulness as a tool for better understanding how the public perceives and engages with different issues across time, geography, and other dimensions. W19-2111 @@ -2642,7 +2642,7 @@ Modeling Behavioral Aspects of Social Media Discourse for Moral Classification - KristenJohnson + KristenJohnson DanGoldwasser 100–109 Political discourse on social media microblogs, specifically Twitter, has become an undeniable part of mainstream U.S. politics. Given the length constraint of tweets, politicians must carefully word their statements to ensure their message is understood by their intended audience. This constraint often eliminates the context of the tweet, making automatic analysis of social media political discourse a difficult task. To overcome this challenge, we propose simultaneous modeling of high-level abstractions of political language, such as political slogans and framing strategies, with abstractions of how politicians behave on Twitter. These behavioral abstractions can be further leveraged as forms of supervision in order to increase prediction accuracy, while reducing the burden of annotation. In this work, we use Probabilistic Soft Logic (PSL) to build relational models to capture the similarities in language and behavior that obfuscate political messages on Twitter. When combined, these descriptors reveal the moral foundations underlying the discourse of U.S. politicians online, across differing governing administrations, showing how party talking points remain cohesive or change over time. @@ -2659,10 +2659,10 @@ ElliottAsh LeslieBarrett DanielChen - AdamMeyers - DanielPreotiuc-Pietro + AdamMeyers + DanielPreotiuc-Pietro DavidRosenberg - AmandaStent + AmandaStent Association for Computational Linguistics
Minneapolis, Minnesota
June @@ -2690,7 +2690,7 @@ JohnAberdeen KarlBranting CraigPfeifer - AlexanderYeh + AlexanderYeh AmartyaChakraborty 12–20 Recent research has demonstrated that judicial and administrative decisions can be predicted by machine-learning models trained on prior decisions. However, to have any practical application, these predictions must be explainable, which in turn requires modeling a rich set of features. Such approaches face a roadblock if the knowledge engineering required to create these features is not scalable. We present an approach to developing a feature-rich corpus of administrative rulings about domain name disputes, an approach which leverages a small amount of manual annotation and prototypical patterns present in the case documents to automatically extend feature labels to the entire corpus. To demonstrate the feasibility of this approach, we report results from systems trained on this dataset. @@ -2700,7 +2700,7 @@
The Extent of Repetition in Contract Language - DanSimonson + DanSimonson DanielBroderick JonathanHerr 21–30 @@ -2745,14 +2745,14 @@ Developing and Orchestrating a Portfolio of Natural Legal Language Processing and Document Curation Services GeorgRehm - JuliánMoreno-Schneider + JuliánMoreno-Schneider JorgeGracia ArtemRevenko VictorMireles MariaKhvalchik IlanKernerman AndisLagzdins - MarcisPinnis + MarcisPinnis ArtusVasilevskis ElenaLeitner JanMilde @@ -2862,8 +2862,8 @@ AlessioPalmero Aprosio SaraTonelli MarcoTurchi - MatteoNegri - Mattia A.Di Gangi + MatteoNegri + Mattia A.Di Gangi 37–44 Neural text simplification has gained increasing attention in the NLP community thanks to recent advancements in deep sequence-to-sequence learning. Most recent efforts with such a data-demanding paradigm have dealt with the English language, for which sizeable training datasets are currently available to deploy competitive models. Similar improvements on less resource-rich languages are conditioned either to intensive manual work to create training data, or to the design of effective automatic generation techniques to bypass the data acquisition bottleneck. Inspired by the machine translation field, in which synthetic parallel pairs generated from monolingual data yield significant improvements to neural models, in this paper we exploit large amounts of heterogeneous data to automatically select simple sentences, which are then used to create synthetic simplification pairs. We also evaluate other solutions, such as oversampling and the use of external word embeddings to be fed to the neural simplification system. Our approach is evaluated on Italian and Spanish, for which few thousand gold sentence pairs are available. The results show that these techniques yield performance improvements over a baseline sequence-to-sequence configuration. W19-2305 @@ -2912,8 +2912,8 @@ ZiangXie CindyWang MaxDrach - DanJurafsky - AndrewNg + DanJurafsky + AndrewNg 74–81 We introduce a simple method for text style transfer that frames style transfer as denoising: we synthesize a noisy corpus and treat the source style as a noisy version of the target style. To control for aspects such as preserving meaning while modifying style, we propose a reranking approach in the data synthesis phase. We evaluate our method on three novel style transfer tasks: transferring between British and American varieties, text genres (formal vs. casual), and lyrics from different musical genres. By measuring style transfer quality, meaning preservation, and the fluency of generated outputs, we demonstrate that our method is able both to produce high-quality output while maintaining the flexibility to suggest syntactically rich stylistic edits. W19-2309 @@ -2983,7 +2983,7 @@ Character Identification Refined: A Proposal LabibaJahan - MarkFinlayson + MarkFinlayson 12–18 Characters are a key element of narrative and so character identification plays an important role in automatic narrative understanding. Unfortunately, most prior work that incorporates character identification is not built upon a clear, theoretically grounded concept of character. They either take character identification for granted (e.g., using simple heuristics on referring expressions), or rely on simplified definitions that do not capture important distinctions between characters and other referents in the story. Prior approaches have also been rather complicated, relying, for example, on predefined case bases or ontologies. In this paper we propose a narratologically grounded definition of character for discussion at the workshop, and also demonstrate a preliminary yet straightforward supervised machine learning model with a small set of features that performs well on two corpora. The most important of the two corpora is a set of 46 Russian folktales, on which the model achieves an F1 of 0.81. Error analysis suggests that features relevant to the plot will be necessary for further improvements in performance. W19-2402 @@ -2994,8 +2994,8 @@ Deep Natural Language Understanding of News Text JayaShree EmilyLiu - AndrewGordon - JerryHobbs + AndrewGordon + JerryHobbs 19–27 Early proposals for the deep understanding of natural language text advocated an approach of “interpretation as abduction,” where the meaning of a text was derived as an explanation that logically entailed the input words, given a knowledge base of lexical and commonsense axioms. While most subsequent NLP research has instead pursued statistical and data-driven methods, the approach of interpretation as abduction has seen steady advancements in both theory and software implementations. In this paper, we summarize advances in deriving the logical form of the text, encoding commonsense knowledge, and technologies for scalable abductive reasoning. We then explore the application of these advancements to the deep understanding of a paragraph of news text, where the subtle meaning of words and phrases are resolved by backward chaining on a knowledge base of 80 hand-authored axioms. W19-2403 @@ -3004,13 +3004,13 @@ Extraction of Message Sequence Charts from Narrative History Text - GirishPalshikar + GirishPalshikar SachinPawar SangameshwarPatil SwapnilHingmire NitinRamrakhiyani HarsimranBedi - PushpakBhattacharyya + PushpakBhattacharyya VasudevaVarma 28–36 In this paper, we advocate the use of Message Sequence Chart (MSC) as a knowledge representation to capture and visualize multi-actor interactions and their temporal ordering. We propose algorithms to automatically extract an MSC from a history narrative. For a given narrative, we first identify verbs which indicate interactions and then use dependency parsing and Semantic Role Labelling based approaches to identify senders (initiating actors) and receivers (other actors involved) for these interaction verbs. As a final step in MSC extraction, we employ a state-of-the art algorithm to temporally re-order these interactions. Our evaluation on multiple publicly available narratives shows improvements over four baselines. @@ -3044,11 +3044,11 @@ Proceedings of the 3rd Joint SIGHUM Workshop on Computational Linguistics for Cultural Heritage, Social Sciences, Humanities and Literature W19-25 - BeatriceAlex + BeatriceAlex StefaniaDegaetano-Ortlieb AnnaKazantseva NilsReiter - StanSzpakowicz + StanSzpakowicz Association for Computational Linguistics
Minneapolis, USA
June @@ -3062,7 +3062,7 @@ Modeling Word Emotion in Historical Language: Quantity Beats Supposed Stability in Seed Word Selection JohannesHellrich - SvenBuechel + SvenBuechel UdoHahn 1–11 To understand historical texts, we must be aware that language—including the emotional connotation attached to words—changes over time. In this paper, we aim at estimating the emotion which is associated with a given word in former language stages of English and German. Emotion is represented following the popular Valence-Arousal-Dominance (VAD) annotation scheme. While being more expressive than polarity alone, existing word emotion induction methods are typically not suited for addressing it. To overcome this limitation, we present adaptations of two popular algorithms to VAD. To measure their effectiveness in diachronic settings, we present the first gold standard for historical word emotions, which was created by scholars with proficiency in the respective language stages and covers both English and German. In contrast to claims in previous work, our findings indicate that hand-selecting small sets of seed words with supposedly stable emotional meaning is actually harm- rather than helpful. @@ -3074,7 +3074,7 @@ Clustering-Based Article Identification in Historical Newspapers MartinRiedl DanielaBetz - SebastianPadó + SebastianPadó 12–17 This article focuses on the problem of identifying articles and recovering their text from within and across newspaper pages when OCR just delivers one text file per page. We frame the task as a segmentation plus clustering step. Our results on a sample of 1912 New York Tribune magazine shows that performing the clustering based on similarities computed with word embeddings outperforms a similarity measure based on character n-grams and words. Furthermore, the automatic segmentation based on the text results in low scores, due to the low quality of some OCRed documents. W19-2502 @@ -3157,7 +3157,7 @@ MikaHämäläinen TanjaSäily JackRueter - JörgTiedemann + JörgTiedemann EetuMäkelä 71–75 This paper studies the use of NMT (neural machine translation) as a normalization method for an early English letter corpus. The corpus has previously been normalized so that only less frequent deviant forms are left out without normalization. This paper discusses different methods for improving the normalization of these deviant forms by using different approaches. Adding features to the training data is found to be unhelpful, but using a lexicographical resource to filter the top candidates produced by the NMT model together with lemmatization improves results. @@ -3177,7 +3177,7 @@ Semantics and Homothetic Clustering of Hafez Poetry AryaRahgozar - DianaInkpen + DianaInkpen 82–90 We have created two sets of labels for Hafez (1315-1390) poems, using unsupervised learning. Our labels are the only semantic clustering alternative to the previously existing, hand-labeled, gold-standard classification of Hafez poems, to be used for literary research. We have cross-referenced, measured and analyzed the agreements of our clustering labels with Houman’s chronological classes. Our features are based on topic modeling and word embeddings. We also introduced a similarity of similarities’ features, we called homothetic clustering approach that proved effective, in case of Hafez’s small corpus of ghazals2. Although all our experiments showed different clusters when compared with Houman’s classes, we think they were valid in their own right to have provided further insights, and have proved useful as a contrasting alternative to Houman’s classes. Our homothetic clusterer and its feature design and engineering framework can be used for further semantic analysis of Hafez’s poetry and other similar literary research. W19-2511 @@ -3188,7 +3188,7 @@ Computational Linguistics Applications for Multimedia Services KyeongminRim KelleyLynch - JamesPustejovsky + JamesPustejovsky 91–97 We present Computational Linguistics Applications for Multimedia Services (CLAMS), a platform that provides access to computational content analysis tools for archival multimedia material that appear in different media, such as text, audio, image, and video. The primary goal of CLAMS is: (1) to develop an interchange format between multimodal metadata generation tools to ensure interoperability between tools; (2) to provide users with a portable, user-friendly workflow engine to chain selected tools to extract meaningful analyses; and (3) to create a public software development kit (SDK) for developers that eases deployment of analysis tools within the CLAMS platform. CLAMS is designed to help archives and libraries enrich the metadata associated with their mass-digitized multimedia collections, that would otherwise be largely unsearchable. W19-2512 @@ -3208,7 +3208,7 @@ On the Feasibility of Automated Detection of Allusive Text Reuse - EnriqueManjavacas + EnriqueManjavacas BrianLong MikeKestemont 104–114 @@ -3219,7 +3219,7 @@ The limits of <fixed-case>S</fixed-case>panglish? - BarbaraBullock + BarbaraBullock WallyGuzmán Almeida JacquelineToribio 115–121 @@ -3246,7 +3246,7 @@ Proceedings of the Workshop on Extracting Structured Knowledge from Scientific Publications W19-26 - ViviNastase + ViviNastase BenjaminRoth LauraDietz AndrewMcCallum @@ -3325,8 +3325,8 @@ SohamParikh ElizabethConrad OshinAgarwal - IainMarshall - ByronWallace + IainMarshall + ByronWallace AniNenkova 43–47 Standard paradigms for search do not work well in the medical context. Typical information needs, such as retrieving a full list of medical interventions for a given condition, or finding the reported efficacy of a particular treatment with respect to a specific outcome of interest cannot be straightforwardly posed in typical text-box search. Instead, we propose faceted-search in which a user specifies a condition and then can browse treatments and outcomes that have been evaluated. Choosing from these, they can access randomized control trials (RCTs) describing individual studies. Realizing such a view of the medical evidence requires information extraction techniques to identify the population, interventions, and outcome measures in an RCT. Patients, health practitioners, and biomedical librarians all stand to benefit from such innovation in search of medical evidence. We present an initial prototype of such an interface applied to pre-registered clinical studies. We also discuss pilot studies into the applicability of information extraction methods to allow for similar access to all published trial results. @@ -3363,7 +3363,7 @@ RonenTamari HiroyukiShindo DafnaShahaf - YujiMatsumoto + YujiMatsumoto 62–71 Understanding procedural text requires tracking entities, actions and effects as the narrative unfolds. We focus on the challenging real-world problem of action-graph extraction from materials science papers, where language is highly specialized and data annotation is expensive and scarce. We propose a novel approach, Text2Quest, where procedural text is interpreted as instructions for an interactive game. A learning agent completes the game by executing the procedure correctly in a text-based simulated lab environment. The framework can complement existing approaches and enables richer forms of learning compared to static texts. We discuss potential limitations and advantages of the approach, and release a prototype proof-of-concept, hoping to encourage research in this direction. W19-2609 @@ -3372,7 +3372,7 @@ Textual and Visual Characteristics of Mathematical Expressions in Scholar Documents - VidasDaudaravicius + VidasDaudaravicius 72–81 Mathematical expressions (ME) are widely used in scholar documents. In this paper we analyze characteristics of textual and visual MEs characteristics for the image-to-LaTeX translation task. While there are open data-sets of LaTeX files with MEs included it is very complicated to extract these MEs from a document and to compile the list of MEs. Therefore we release a corpus of open-access scholar documents with PDF and JATS-XML parallel files. The MEs in these documents are LaTeX encoded and are document independent. The data contains more than 1.2 million distinct annotated formulae and more than 80 million raw tokens of LaTeX MEs in more than 8 thousand documents. While the variety of textual lengths and visual sizes of MEs are not well defined we found that the task of analyzing MEs in scholar documents can be reduced to the subtask of a particular text length, image width and height bounds, and display MEs can be processed as arrays of partial MEs. W19-2610 @@ -3388,7 +3388,7 @@ DebopamDas Erick MazieroGalani Juliano DesideratoAntonio - MikelIruskieta + MikelIruskieta Association for Computational Linguistics
Minneapolis, MN
June @@ -3403,7 +3403,7 @@ Introduction to Discourse Relation Parsing and Treebanking (<fixed-case>DISRPT</fixed-case>): 7th Workshop on <fixed-case>R</fixed-case>hetorical <fixed-case>S</fixed-case>tructure <fixed-case>T</fixed-case>heory and Related Formalisms AmirZeldes DebopamDas - Erick GalaniMaziero + Erick GalaniMaziero JulianoAntonio MikelIruskieta 1–6 @@ -3468,7 +3468,7 @@ Annotating Shallow Discourse Relations in <fixed-case>T</fixed-case>witter Conversations TatjanaScheffler - BerfinAktaş + BerfinAktaş DebopamDas ManfredStede 50–55 @@ -3538,7 +3538,7 @@ The <fixed-case>DISRPT</fixed-case> 2019 Shared Task on Elementary Discourse Unit Segmentation and Connective Detection AmirZeldes DebopamDas - Erick GalaniMaziero + Erick GalaniMaziero JulianoAntonio MikelIruskieta 97–104 @@ -3574,7 +3574,7 @@ MikelIruskieta KepaBengoetxea AitziberAtutxa Salazar - ArantzaDiaz de Ilarraza + ArantzaDiaz de Ilarraza 125–132 The DISPRT 2019 workshop has organized a shared task aiming to identify cross-formalism and multilingual discourse segments. Elementary Discourse Units (EDUs) are quite similar across different theories. Segmentation is the very first stage on the way of rhetorical annotation. Still, each annotation project adopted several decisions with consequences not only on the annotation of the relational discourse structure but also at the segmentation stage. In this shared task, we have employed pre-trained word embeddings, neural networks (BiLSTM+CRF) to perform the segmentation. We report F1 results for 6 languages: Basque (0.853), English (0.919), French (0.907), German (0.913), Portuguese (0.926) and Spanish (0.868 and 0.769). Finally, we also pursued an error analysis based on clause typology for Basque and Spanish, in order to understand the performance of the segmenter. W19-2716 @@ -3600,7 +3600,7 @@ Towards discourse annotation and sentiment analysis of the <fixed-case>B</fixed-case>asque Opinion Corpus JonAlkorta - KoldoGojenola + KoldoGojenola MikelIruskieta 144–152 Discourse information is crucial for a better understanding of the text structure and it is also necessary to describe which part of an opinionated text is more relevant or to decide how a text span can change the polarity (strengthen or weaken) of other span by means of coherence relations. This work presents the first results on the annotation of the Basque Opinion Corpus using Rhetorical Structure Theory (RST). Our evaluation results and analysis show us the main avenues to improve on a future annotation process. We have also extracted the subjectivity of several rhetorical relations and the results show the effect of sentiment words in relations and the influence of each relation in the semantic orientation value. @@ -3631,7 +3631,7 @@ ChandrakumariSuvarna PoojaCasula MingtongZhang - CarolynRosé + CarolynRosé 163–168 We present a package of annotation resources, including annotation guideline, flowchart, and an Intelligent Tutoring System for training human annotators. These resources can be used to apply Rhetorical Structure Theory (RST) to essays written by students in K-12 schools. Furthermore, we highlight the great potential of using RST to provide automated feedback for improving writing quality across genres. W19-2720 @@ -3645,7 +3645,7 @@ Proceedings of the Second Workshop on Computational Models of Reference, Anaphora and Coreference W19-28 MaciejOgrodniczuk - SameerPradhan + SameerPradhan YuliaGrishina VincentNg Association for Computational Linguistics @@ -3730,10 +3730,10 @@ Proceedings of the Workshop on Cognitive Modeling and Computational Linguistics W19-29 EmmanueleChersoni - CassandraJacobs + CassandraJacobs AlessandroLenci TalLinzen - LaurentPrévot + LaurentPrévot EnricoSantus Association for Computational Linguistics
Minneapolis, Minnesota
@@ -3749,7 +3749,7 @@ The Active-Filler Strategy in a Move-Eager Left-Corner <fixed-case>M</fixed-case>inimalist <fixed-case>G</fixed-case>rammar Parser TimHunter MilošStanojević - EdwardStabler + EdwardStabler 1–10 Recent psycholinguistic evidence suggests that human parsing of moved elements is ‘active’, and perhaps even ‘hyper-active’: it seems that a leftward-moved object is related to a verbal position rapidly, perhaps even before the transitivity information associated with the verb is available to the listener. This paper presents a formal, sound and complete parser for Minimalist Grammars whose search space contains branching points that we can identify as the locus of the decision to perform this kind of active gap-finding. This brings formal models of parsing into closer contact with recent psycholinguistic theorizing than was previously possible. W19-2901 @@ -3759,7 +3759,7 @@ Priming vs. Inhibition of Optional Infinitival “to” RobinMelnick - ThomasWasow + ThomasWasow 11–19 The word “to” that precedes verbs in English infinitives is optional in at least two environments: in what Wasow et al. (2015) previously called the “do-be” construction, and in the complement of “help”, which we explore in the present work. In the “do-be” construction, Wasow et al. found that a preceding infinitival “to” increases the use of following optional “to”, but the use of “to” in the complement of help is reduced following “to help”. We examine two hypotheses regarding why the same function word is primed by prior use in one construction and inhibited in another. We then test predictions made by the two hypotheses, finding support for one of them. W19-2902 @@ -3769,8 +3769,8 @@ Simulating <fixed-case>S</fixed-case>panish-<fixed-case>E</fixed-case>nglish Code-Switching: El Modelo Está Generating Code-Switches CharaTsoukala - Stefan L.Frank - Antalvan den Bosch + Stefan L.Frank + Antalvan den Bosch JorgeValdés Kroff MirjamBroersma 20–29 @@ -3783,7 +3783,7 @@ Surprisal and Interference Effects of Case Markers in <fixed-case>H</fixed-case>indi Word Order SidharthRanjan SumeetAgarwal - RajakrishnanRajkumar + RajakrishnanRajkumar 30–42 Based on the Production-Distribution-Comprehension (PDC) account of language processing, we formulate two distinct hypotheses about case marking, word order choices and processing in Hindi. Our first hypothesis is that Hindi tends to optimize for processing efficiency at both lexical and syntactic levels. We quantify the role of case markers in this process. For the task of predicting the reference sentence occurring in a corpus (amidst meaning-equivalent grammatical variants) using a machine learning model, surprisal estimates from an artificial version of the language (i.e., Hindi without any case markers) result in lower prediction accuracy compared to natural Hindi. Our second hypothesis is that Hindi tends to minimize interference due to case markers while ordering preverbal constituents. We show that Hindi tends to avoid placing next to each other constituents whose heads are marked by identical case inflections. Our findings adhere to PDC assumptions and we discuss their implications for language production, learning and universals. W19-2904 @@ -3793,7 +3793,7 @@ Modeling Hierarchical Syntactic Structures in Morphological Processing YoheiOseki - CharlesYang + CharlesYang AlecMarantz 43–52 Sentences are represented as hierarchical syntactic structures, which have been successfully modeled in sentence processing. In contrast, despite the theoretical agreement on hierarchical syntactic structures within words, words have been argued to be computationally less complex than sentences and implemented by finite-state models as linear strings of morphemes, and even the psychological reality of morphemes has been denied. In this paper, extending the computational models employed in sentence processing to morphological processing, we performed a computational simulation experiment where, given incremental surprisal as a linking hypothesis, five computational models with different representational assumptions were evaluated against human reaction times in visual lexical decision experiments available from the English Lexicon Project (ELP), a “shared task” in the morphological processing literature. The simulation experiment demonstrated that (i) “amorphous” models without morpheme units underperformed relative to “morphous” models, (ii) a computational model with hierarchical syntactic structures, Probabilistic Context-Free Grammar (PCFG), most accurately explained human reaction times, and (iii) this performance was achieved on top of surface frequency effects. These results strongly suggest that morphological processing tracks morphemes incrementally from left to right and parses them into hierarchical syntactic structures, contrary to “amorphous” and finite-state models of morphological processing. @@ -3804,7 +3804,7 @@ A Modeling Study of the Effects of Surprisal and Entropy in Perceptual Decision Making of an Adaptive Agent Pyeong WhanCho - RichardLewis + RichardLewis 53–61 Processing difficulty in online language comprehension has been explained in terms of surprisal and entropy reduction. Although both hypotheses have been supported by experimental data, we do not fully understand their relative contributions on processing difficulty. To develop a better understanding, we propose a mechanistic model of perceptual decision making that interacts with a simulated task environment with temporal dynamics. The proposed model collects noisy bottom-up evidence over multiple timesteps, integrates it with its top-down expectation, and makes perceptual decisions, producing processing time data directly without relying on any linking hypothesis. Temporal dynamics in the task environment was determined by a simple finite-state grammar, which was designed to create the situations where the surprisal and entropy reduction hypotheses predict different patterns. After the model was trained to maximize rewards, the model developed an adaptive policy and both surprisal and entropy effects were observed especially in a measure reflecting earlier processing. W19-2906 @@ -3814,7 +3814,7 @@ Modeling Long-Distance Cue Integration in Spoken Word Recognition WednesdayBushong - T. FlorianJaeger + T. FlorianJaeger 62–70 Cues to linguistic categories are distributed across the speech signal. Optimal categorization thus requires that listeners maintain gradient representations of incoming input in order to integrate that information with later cues. There is now evidence that listeners can and do integrate cues that occur far apart in time. Computational models of this integration have however been lacking. We take a first step at addressing this gap by mathematically formalizing four models of how listeners may maintain and use cue information during spoken language understanding and test them on two perception experiments. In one experiment, we find support for rational integration of cues at long distances. In a second, more memory and attention-taxing experiment, we find evidence in favor of a switching model that avoids maintaining detailed representations of cues in memory. These results are a first step in understanding what kinds of mechanisms listeners use for cue integration under different memory and attentional constraints. W19-2907 @@ -3826,7 +3826,7 @@ BrunoGaume LydiaMai Ho-Dac LudovicTanguy - CécileFabre + CécileFabre BénédictePierrejean NabilHathout JérômeFarinas @@ -3844,8 +3844,8 @@ Dependency Parsing with your Eyes: Dependency Structure Predicts Eye Regressions During Reading AlessandroLopopolo - Stefan L.Frank - Antalvan den Bosch + Stefan L.Frank + Antalvan den Bosch RoelWillems 77–85 Backward saccades during reading have been hypothesized to be involved in structural reanalysis, or to be related to the level of text difficulty. We test the hypothesis that backward saccades are involved in online syntactic analysis. If this is the case we expect that saccades will coincide, at least partially, with the edges of the relations computed by a dependency parser. In order to test this, we analyzed a large eye-tracking dataset collected while 102 participants read three short narrative texts. Our results show a relation between backward saccades and the syntactic structure of sentences. @@ -3876,7 +3876,7 @@ Quantifiers in a Multimodal World: Hallucinating Vision with Language and Sound AlbertoTestoni SandroPezzelle - RaffaellaBernardi + RaffaellaBernardi 105–116 Inspired by the literature on multisensory integration, we develop a computational model to ground quantifiers in perception. The model learns to pick, out of nine quantifiers (‘few’, ‘many’, ‘all’, etc.), the one that is more likely to describe the percent of animals in a visual-auditory input containing both animals and artifacts. We show that relying on concurrent sensory inputs increases model performance on the quantification task. Moreover, we evaluate the model in a situation in which only the auditory modality is given, while the visual one is ‘hallucinanted’ either from the auditory input itself or from a linguistic caption describing the quantity of entities in the auditory input. This way, the model exploits prior associations between modalities. We show that the model profits from the prior knowledge and outperforms the auditory-only setting. W19-2912 @@ -3897,7 +3897,7 @@ The Development of Abstract Concepts in Children’s Early Lexical Networks AbdellahFourtassi IsaacScheinfeld - MichaelFrank + MichaelFrank 129–133 How do children learn abstract concepts such as animal vs. artifact? Previous research has suggested that such concepts can partly be derived using cues from the language children hear around them. Following this suggestion, we propose a model where we represent the children’ developing lexicon as an evolving network. The nodes of this network are based on vocabulary knowledge as reported by parents, and the edges between pairs of nodes are based on the probability of their co-occurrence in a corpus of child-directed speech. We found that several abstract categories can be identified as the dense regions in such networks. In addition, our simulations suggest that these categories develop simultaneously, rather than sequentially, thanks to the children’s word learning trajectory which favors the exploration of the global conceptual space. W19-2914 @@ -3906,7 +3906,7 @@ Verb-Second Effect on Quantifier Scope Interpretation - AsadSayeed + AsadSayeed MatthiasLindemann VeraDemberg 134–139 @@ -3967,7 +3967,7 @@ Towards augmenting crisis counselor training by improving message retrieval OriannaDemasi - Marti A.Hearst + Marti A.Hearst BenjaminRecht 1–11 A fundamental challenge when training counselors is presenting novices with the opportunity to practice counseling distressed individuals without exacerbating a situation. Rather than replacing human empathy with an automated counselor, we propose simulating an individual in crisis so that human counselors in training can practice crisis counseling in a low-risk environment. Towards this end, we collect a dataset of suicide prevention counselor role-play transcripts and make initial steps towards constructing a CRISISbot for humans to counsel while in training. In this data-constrained setting, we evaluate the potential for message retrieval to construct a coherent chat agent in light of recent advances with text embedding methods. Our results show that embeddings can considerably improve retrieval approaches to make them competitive with generative models. By coherently retrieving messages, we can help counselors practice chatting in a low-risk environment. @@ -3981,7 +3981,7 @@ DerrickHull JacobLevine BonnieRay - KathyMcKeown + KathyMcKeown 12–23 While conversation in therapy sessions can vary widely in both topic and style, an understanding of the underlying techniques used by therapists can provide valuable insights into how therapists best help clients of different types. Dialogue act classification aims to identify the conversational “action” each speaker takes at each utterance, such as sympathizing, problem-solving or assumption checking. We propose to apply dialogue act classification to therapy transcripts, using a therapy-specific labeling scheme, in order to gain a high-level understanding of the flow of conversation in therapy sessions. We present a novel annotation scheme that spans multiple psychotherapeutic approaches, apply it to a large and diverse corpus of psychotherapy transcripts, and present and discuss classification results obtained using both SVM and neural network-based models. The results indicate that identifying the structure and flow of therapeutic actions is an obtainable goal, opening up the opportunity in the future to provide therapeutic recommendations tailored to specific client situations. W19-3002 @@ -3992,7 +3992,7 @@ <fixed-case>CLP</fixed-case>sych 2019 Shared Task: Predicting the Degree of Suicide Risk in <fixed-case>R</fixed-case>eddit Posts AyahZirikly PhilipResnik - ÖzlemUzuner + ÖzlemUzuner KristyHollingshead 24–33 The shared task for the 2019 Workshop on Computational Linguistics and Clinical Psychology (CLPsych’19) introduced an assessment of suicide risk based on social media postings, using data from Reddit to identify users at no, low, moderate, or severe risk. Two variations of the task focused on users whose posts to the r/SuicideWatch subreddit indicated they might be at risk; a third task looked at screening users based only on their more everyday (non-SuicideWatch) posts. We received submissions from 15 different teams, and the results provide progress and insight into the value of language signal in helping to predict risk level. @@ -4020,8 +4020,8 @@ HuyVu MohammadZamani ParthLimbachiya - Sharath ChandraGuntuku - H. AndrewSchwartz + Sharath ChandraGuntuku + H. AndrewSchwartz 39–44 Mental health predictive systems typically model language as if from a single context (e.g. Twitter posts, status updates, or forum posts) and often limited to a single level of analysis (e.g. either the message-level or user-level). Here, we bring these pieces together to explore the use of open-vocabulary (BERT embeddings, topics) and theoretical features (emotional expression lexica, personality) for the task of suicide risk assessment on support forums (the CLPsych-2019 Shared Task). We used dual context based approaches (modeling content from suicide forums separate from other content), built over both traditional ML models as well as a novel dual RNN architecture with user-factor adaptation. We find that while affect from the suicide context distinguishes with no-risk from those with “any-risk”, personality factors from the non-suicide contexts provide distinction of the levels of risk: low, medium, and high risk. Within the shared task, our dual-context approach (listed as SBU-HLAB in the official results) achieved state-of-the-art performance predicting suicide risk using a combination of suicide-context and non-suicide posts (Task B), achieving an F1 score of 0.50 over hidden test set labels. W19-3005 @@ -4035,7 +4035,7 @@ BirkanTunc CaseyZampella EdwardBrodkin - RobertSchultz + RobertSchultz JuliaParish-Morris 45–54 Spoken language ability is highly heterogeneous in Autism Spectrum Disorder (ASD), which complicates efforts to identify linguistic markers for use in diagnostic classification, clinical characterization, and for research and clinical outcome measurement. Machine learning techniques that harness the power of multivariate statistics and non-linear data analysis hold promise for modeling this heterogeneity, but many models require enormous datasets, which are unavailable for most psychiatric conditions (including ASD). In lieu of such datasets, good models can still be built by leveraging domain knowledge. In this study, we compare two machine learning approaches: the first approach incorporates prior knowledge about language variation across middle childhood, adolescence, and adulthood to classify 6-minute naturalistic conversation samples from 140 age- and IQ-matched participants (81 with ASD), while the other approach treats all ages the same. We found that individual age-informed models were significantly more accurate than a single model tasked with building a common algorithm across age groups. Furthermore, predictive linguistic features differed significantly by age group, confirming the importance of considering age-related changes in language use when classifying ASD. Our results suggest that limitations imposed by heterogeneity inherent to ASD and from developmental change with age can be (at least partially) overcome using domain knowledge, such as understanding spoken language development from childhood through adulthood. @@ -4045,7 +4045,7 @@ The importance of sharing patient-generated clinical speech and language data - Kathleen C.Fraser + Kathleen C.Fraser NicklasLinz HaliLindsay AlexandraKönig @@ -4058,7 +4058,7 @@ Depressed Individuals Use Negative Self-Focused Language When Recalling Recent Interactions with Close Romantic Partners but Not Family or <fixed-case>F</fixed-case>riends TaleenNalabandian - MollyIreland + MollyIreland 62–73 Depression is characterized by a self-focused negative attentional bias, which is often reflected in everyday language use. In a prospective writing study, we explored whether the association between depressive symptoms and negative, self-focused language varies across social contexts. College students (N = 243) wrote about a recent interaction with a person they care deeply about. Depression symptoms positively correlated with negative emotion words and first-person singular pronouns (or negative self-focus) when writing about a recent interaction with romantic partners or, to a lesser extent, friends, but not family members. The pattern of results was more pronounced when participants perceived greater self-other overlap (i.e., interpersonal closeness) with their romantic partner. Findings regarding how the linguistic profile of depression differs by type of relationship may inform more effective methods of clinical diagnosis and treatment. W19-3008 @@ -4128,7 +4128,7 @@ Reviving a psychometric measure: Classification and prediction of the Operant Motive Test DirkJohannßen - ChrisBiemann + ChrisBiemann DavidScheffer 121–125 Implicit motives allow for the characterization of behavior, subsequent success and long-term development. While this has been operationalized in the operant motive test, research on motives has declined mainly due to labor-intensive and costly human annotation. In this study, we analyze over 200,000 labeled data items from 40,000 participants and utilize them for engineering features for training a logistic model tree machine learning model. It captures manually assigned motives well with an F-score of 80%, coming close to the pairwise annotator intraclass correlation coefficient of r = .85. In addition, we found a significant correlation of r = .2 between subsequent academic success and data automatically labeled with our model in an extrinsic evaluation. @@ -4155,7 +4155,7 @@ Overcoming the bottleneck in traditional assessments of verbal memory: Modeling human ratings and classifying clinical group membership ChelseaChandler - Peter W.Foltz + Peter W.Foltz JianCheng Jared C.Bernstein Elizabeth P.Rosenfeld @@ -4274,7 +4274,7 @@ Dictionaries and Decision Trees for the 2019 <fixed-case>CLP</fixed-case>sych Shared Task MicahIserman TaleenNalabandian - MollyIreland + MollyIreland 188–194 In this summary, we discuss our approach to the CLPsych Shared Task and its initial results. For our predictions in each task, we used a recursive partitioning algorithm (decision trees) to select from our set of features, which were primarily dictionary scores and counts of individual words. We focused primarily on Task A, which aimed to predict suicide risk, as rated by a team of expert clinicians (Shing et al., 2018), based on language used in SuicideWatch posts on Reddit. Category-level findings highlight the potential importance of social and moral language categories. Word-level correlates of risk levels underline the value of fine-grained data-driven approaches, revealing both theory-consistent and potentially novel correlates of suicide risk that may motivate future research. W19-3025 @@ -4326,7 +4326,7 @@ Bottom-Up Unranked Tree-to-Graph Transducers for Translation into Semantic Graphs JohannaBjörklund - Shay B.Cohen + Shay B.Cohen FrankDrewes GiorgioSatta 7–17 @@ -4339,7 +4339,7 @@ On the Compression of Lexicon Transducers MarcoCognetta CyrilAllauzen - MichaelRiley + MichaelRiley 18–26 W19-3105 In finite-state language processing pipelines, a lexicon is often a key component. It needs to be comprehensive to ensure accuracy, reducing out-of-vocabulary misses. However, in memory-constrained environments (e.g., mobile phones), the size of the component automata must be kept small. Indeed, a delicate balance between comprehensiveness, speed, and memory must be struck to conform to device requirements while providing a good user experience. @@ -4393,7 +4393,7 @@ In this paper, we describe a compression scheme for lexicons when represented as A Syntactically Expressive Morphological Analyzer for <fixed-case>T</fixed-case>urkish AdnanOzturel TolgaKayadelen - IsinDemirsahin + IsinDemirsahin 65–75 W19-3110 We present a broad coverage model of Turkish morphology and an open-source morphological analyzer that implements it. The model captures intricacies of Turkish morphology-syntax interface, thus could be used as a baseline that guides language model development. It introduces a novel fine part-of-speech tagset, a fine-grained affix inventory and represents morphotactics without zero-derivations. The morphological analyzer is freely available. It consists of modular reusable components of human-annotated gold standard lexicons, implements Turkish morphotactics as finite-state transducers using OpenFst and morphophonemic processes as Thrax grammars. @@ -4415,7 +4415,7 @@ In this paper, we describe a compression scheme for lexicons when represented as Distilling weighted finite automata from arbitrary probabilistic models Ananda TheerthaSuresh BrianRoark - MichaelRiley + MichaelRiley VladSchogol 87–97 W19-3112 @@ -4437,7 +4437,7 @@ In this paper, we describe a compression scheme for lexicons when represented as LawrenceWolf-Sonkin VladSchogol BrianRoark - MichaelRiley + MichaelRiley 108–117 W19-3114 The use of the Latin script for text entry of South Asian languages is common, even though there is no standard orthography for these languages in the script. We explore several compact finite-state architectures that permit variable spellings of words during mobile text entry. We find that approaches making use of transliteration transducers provide large accuracy improvements over baselines, but that simpler approaches involving a compact representation of many attested alternatives yields much of the accuracy gain. This is particularly important when operating under constraints on model size (e.g., on inexpensive mobile devices with limited storage and memory for keyboard models), and on speed of inference, since people typing on mobile keyboards expect no perceptual delay in keyboard responsiveness. @@ -4458,8 +4458,8 @@ In this paper, we describe a compression scheme for lexicons when represented as Proceedings of the Fourth Social Media Mining for Health Applications (#SMM4H) Workshop & Shared Task W19-32 - DavyWeissenbacher - GracielaGonzalez-Hernandez + DavyWeissenbacher + GracielaGonzalez-Hernandez Association for Computational Linguistics
Florence, Italy
August @@ -4475,7 +4475,7 @@ In this paper, we describe a compression scheme for lexicons when represented as KaiHe JialunWu XiaoyongMa - ChongZhang + ChongZhang MingHuang ChenLi LixiaYao @@ -4503,7 +4503,7 @@ In this paper, we describe a compression scheme for lexicons when represented as ArjunMagge AshlynnDaughton KarenO’Connor - Michael J.Paul + Michael J.Paul GracielaGonzalez-Hernandez 21–30 The number of users of social media continues to grow, with nearly half of adults worldwide and two-thirds of all American adults using social networking. Advances in automated data processing, machine learning and NLP present the possibility of utilizing this massive data source for biomedical and public health applications, if researchers address the methodological challenges unique to this media. We present the Social Media Mining for Health Shared Tasks collocated with the ACL at Florence in 2019, which address these challenges for health monitoring and surveillance, utilizing state of the art techniques for processing noisy, real-world, and substantially creative language expressions from social media users. For the fourth execution of this challenge, we proposed four different tasks. Task 1 asked participants to distinguish tweets reporting an adverse drug reaction (ADR) from those that do not. Task 2, a follow-up to Task 1, asked participants to identify the span of text in tweets reporting ADRs. Task 3 is an end-to-end task where the goal was to first detect tweets mentioning an ADR and then map the extracted colloquial mentions of ADRs in the tweets to their corresponding standard concept IDs in the MedDRA vocabulary. Finally, Task 4 asked participants to classify whether a tweet contains a personal mention of one’s health, a more general discussion of the health issue, or is an unrelated mention. A total of 34 teams from around the world registered and 19 teams from 12 countries submitted a system run. We summarize here the corpora for this challenge which are freely available at https://competitions.codalab.org/competitions/22521, and present an overview of the methods and the results of the competing systems. @@ -4515,7 +4515,7 @@ In this paper, we describe a compression scheme for lexicons when represented as <fixed-case>M</fixed-case>ed<fixed-case>N</fixed-case>orm: A Corpus and Embeddings for Cross-terminology Medical Concept Normalisation MaksimBelousov William G.Dixon - GoranNenadic + GoranNenadic 31–39 The medical concept normalisation task aims to map textual descriptions to standard terminologies such as SNOMED-CT or MedDRA. Existing publicly available datasets annotated using different terminologies cannot be simply merged and utilised, and therefore become less valuable when developing machine learning-based concept normalisation systems. To address that, we designed a data harmonisation pipeline and engineered a corpus of 27,979 textual descriptions simultaneously mapped to both MedDRA and SNOMED-CT, sourced from five publicly available datasets across biomedical and social media domains. The pipeline can be used in the future to integrate new datasets into the corpus and also could be applied in relevant data curation tasks. We also described a method to merge different terminologies into a single concept graph preserving their relations and demonstrated that representation learning approach based on random walks on a graph can efficiently encode both hierarchical and equivalent relations and capture semantic similarities not only between concepts inside a given terminology but also between concepts from different terminologies. We believe that making a corpus and embeddings for cross-terminology medical concept normalisation available to the research community would contribute to a better understanding of the task. W19-3204 @@ -4526,7 +4526,7 @@ In this paper, we describe a compression scheme for lexicons when represented as Passive Diagnosis Incorporating the <fixed-case>PHQ</fixed-case>-4 for Depression and Anxiety FionnDelahunty RobertJohansson - MihaelArcan + MihaelArcan 40–46 Depression and anxiety are the two most prevalent mental health disorders worldwide, impacting the lives of millions of people each year. In this work, we develop and evaluate a multilabel, multidimensional deep neural network designed to predict PHQ-4 scores based on individuals written text. Our system outperforms random baseline metrics and provides a novel approach to how we can predict psychometric scores from written text. Additionally, we explore how this architecture can be applied to analyse social media data. W19-3205 @@ -4598,7 +4598,7 @@ In this paper, we describe a compression scheme for lexicons when represented as Affective Behaviour Analysis of On-line User Interactions: Are On-line Support Groups More Therapeutic than <fixed-case>T</fixed-case>witter? GiulianoTortoreto - EvgenyStepanov + EvgenyStepanov AlessandraCervone MateuszDubiel GiuseppeRiccardi @@ -4644,7 +4644,7 @@ In this paper, we describe a compression scheme for lexicons when represented as Deep Learning for Identification of Adverse Effect Mentions In <fixed-case>T</fixed-case>witter Data PaulBarry - OzlemUzuner + OzlemUzuner 99–101 Social Media Mining for Health Applications (SMM4H) Adverse Effect Mentions Shared Task challenges participants to accurately identify spans of text within a tweet that correspond to Adverse Effects (AEs) resulting from medication usage (Weissenbacher et al., 2019). This task features a training data set of 2,367 tweets, in addition to a 1,000 tweet evaluation data set. The solution presented here features a bidirectional Long Short-term Memory Network (bi-LSTM) for the generation of character-level embeddings. It uses a second bi-LSTM trained on both character and token level embeddings to feed a Conditional Random Field (CRF) which provides the final classification. This paper further discusses the deep learning algorithms used in our solution. W19-3215 @@ -4655,8 +4655,8 @@ In this paper, we describe a compression scheme for lexicons when represented as Using Machine Learning and Deep Learning Methods to Find Mentions of Adverse Drug Reactions in Social Media PilarLópez Úbeda Manuel CarlosDíaz Galiano - MaiteMartin - L. AlfonsoUrena Lopez + MaiteMartin + L. AlfonsoUrena Lopez 102–106 Over time the use of social networks is becoming very popular platforms for sharing health related information. Social Media Mining for Health Applications (SMM4H) provides tasks such as those described in this document to help manage information in the health domain. This document shows the first participation of the SINAI group. We study approaches based on machine learning and deep learning to extract adverse drug reaction mentions from Twitter. The results obtained in the tasks are encouraging, we are close to the average of all participants and even above in some cases. W19-3216 @@ -4749,7 +4749,7 @@ In this paper, we describe a compression scheme for lexicons when represented as SimraShahid LaibaMehnaz YamanKumar - Rajiv RatnShah + Rajiv RatnShah 127–132 In this paper, we present our approach and the system description for the Social Media Mining for Health Applications (SMM4H) Shared Task 1,2 and 4 (2019). Our main contribution is to show the effectiveness of Transfer Learning approaches like BERT and ULMFiT, and how they generalize for the classification tasks like identification of adverse drug reaction mentions and reporting of personal health problems in tweets. We show the use of stacked embeddings combined with BLSTM+CRF tagger for identifying spans mentioning adverse drug reactions in tweets. We also show that these approaches perform well even with imbalanced dataset in comparison to undersampling and oversampling. W19-3223 @@ -4759,7 +4759,7 @@ In this paper, we describe a compression scheme for lexicons when represented as Detection of Adverse Drug Reaction in Tweets Using a Combination of Heterogeneous Word Embeddings Segun TaofeekAroyehun - AlexanderGelbukh + AlexanderGelbukh 133–135 This paper details our approach to the task of detecting reportage of adverse drug reaction in tweets as part of the 2019 social media mining for healthcare applications shared task. We employed a combination of three types of word representations as input to a LSTM model. With this approach, we achieved an F1 score of 0.5209. W19-3224 @@ -4785,10 +4785,10 @@ In this paper, we describe a compression scheme for lexicons when represented as W19-33 NianwenXue WilliamCroft - JanHajic + JanHajic Chu-RenHuang StephanOepen - MarthaPalmer + MarthaPalmer JamesPustejovksy Association for Computational Linguistics
Florence, Italy
@@ -4826,7 +4826,7 @@ In this paper, we describe a compression scheme for lexicons when represented as
Modeling Quantification and Scope in <fixed-case>A</fixed-case>bstract <fixed-case>M</fixed-case>eaning <fixed-case>R</fixed-case>epresentations - JamesPustejovsky + JamesPustejovsky KenLai NianwenXue 28–33 @@ -4849,7 +4849,7 @@ In this paper, we describe a compression scheme for lexicons when represented as <fixed-case>GKR</fixed-case>: Bridging the Gap between Symbolic/structural and Distributional Meaning Representations Aikaterini-LidaKalouli RichardCrouch - Valeriade Paiva + Valeriade Paiva 44–55 Three broad approaches have been attempted to combine distributional and structural/symbolic aspects to construct meaning representations: a) injecting linguistic features into distributional representations, b) injecting distributional features into symbolic representations or c) combining structural and distributional features in the final representation. This work focuses on an example of the third and less studied approach: it extends the Graphical Knowledge Representation (GKR) to include distributional features and proposes a division of semantic labour between the distributional and structural/symbolic features. We propose two extensions of GKR that clearly show this division and empirically test one of the proposals on an NLI dataset with hard compositional pairs. W19-3305 @@ -4865,7 +4865,7 @@ In this paper, we describe a compression scheme for lexicons when represented as GraemeMcGuire SophieSackstein GeorgiyPlatonov - LenhartSchubert + LenhartSchubert 56–65 Unscoped episodic logical form (ULF) is a semantic representation capturing the predicate-argument structure of English within the episodic logic formalism in relation to the syntactic structure, while leaving scope, word sense, and anaphora unresolved. We describe how ULF can be used to generate natural language inferences that are grounded in the semantic and syntactic structure through a small set of rules defined over interpretable predicates and transformations on ULFs. The semantic restrictions placed by ULF semantic types enables us to ensure that the inferred structures are semantically coherent while the nearness to syntax enables accurate mapping to English. We demonstrate these inferences on four classes of conversationally-oriented inferences in a mixed genre dataset with 68.5% precision from human judgments. W19-3306 @@ -4874,7 +4874,7 @@ In this paper, we describe a compression scheme for lexicons when represented as A Plea for Information Structure as a Part of Meaning Representation - EvaHajicova + EvaHajicova 66–72 The view that the representation of information structure (IS) should be a part of (any type of) representation of meaning is based on the fact that IS is a semantically relevant phenomenon. In the contribution, three arguments supporting this view are briefly summarized, namely, the relation of IS to the interpretation of negation and presupposition, the relevance of IS to the understanding of discourse connectivity and for the establishment and interpretation of coreference relations. Afterwards, possible integration of the description of the main ingredient of IS into a meaning representation is illustrated. W19-3307 @@ -4893,7 +4893,7 @@ In this paper, we describe a compression scheme for lexicons when represented as Meta-Semantic Representation for Early Detection of <fixed-case>A</fixed-case>lzheimer’s Disease - Jinho D.Choi + Jinho D.Choi MengmeiLi FeliciaGoldstein IhabHajjar @@ -4942,7 +4942,7 @@ In this paper, we describe a compression scheme for lexicons when represented as Meaning Representation of Null Instantiated Semantic Roles in <fixed-case>F</fixed-case>rame<fixed-case>N</fixed-case>et - Miriam R LPetruck + Miriam R LPetruck 121–127 Humans have the unique ability to infer information about participants in a scene, even if they are not mentioned in a text about that scene. Computer systems cannot do so without explicit information about those participants. This paper addresses the linguistic phenomenon of null-instantiated frame elements, i.e., implicit semantic roles, and their representation in FrameNet (FN). It motivates FN’s annotation practice, and illustrates three types of null-instantiated arguments that FrameNet tracks, noting that other lexical resources do not record such semantic-pragmatic information, despite its need in natural language understanding (NLU), and the elaborate efforts to create new datasets. It challenges the community to appeal to FN data to develop more sophisticated techniques for recognizing implicit semantic roles, and creating needed datasets. Although the annotation of null-instantiated roles was lexicographically motivated, FN provides useful information for text processing, and therefore must be considered in the design of any meaning representation for natural language understanding. W19-3313 @@ -4973,7 +4973,7 @@ In this paper, we describe a compression scheme for lexicons when represented as Preparing <fixed-case>SNACS</fixed-case> for Subjects and Objects - AdiShalev + AdiShalev Jena D.Hwang NathanSchneider VivekSrikumar @@ -4998,11 +4998,11 @@ In this paper, we describe a compression scheme for lexicons when represented as <fixed-case>V</fixed-case>erb<fixed-case>N</fixed-case>et Representations: Subevent Semantics for Transfer Verbs - Susan WindischBrown + Susan WindischBrown JuliaBonn JamesGung AnnieZaenen - JamesPustejovsky + JamesPustejovsky MarthaPalmer 154–163 This paper announces the release of a new version of the English lexical resource VerbNet with substantially revised semantic representations designed to facilitate computer planning and reasoning based on human language. We use the transfer of possession and transfer of information event representations to illustrate both the general framework of the representations and the types of nuances the new representations can capture. These representations use a Generative Lexicon-inspired subevent structure to track attributes of event participants across time, highlighting oppositions and temporal and causal relations among the subevents. @@ -5045,13 +5045,13 @@ In this paper, we describe a compression scheme for lexicons when represented as Augmenting <fixed-case>A</fixed-case>bstract <fixed-case>M</fixed-case>eaning <fixed-case>R</fixed-case>epresentation for Human-Robot Dialogue - ClaireBonial + ClaireBonial LuciaDonatelli - Stephanie M.Lukin + Stephanie M.Lukin StephenTratz RonArtstein - DavidTraum - ClareVoss + DavidTraum + ClareVoss 199–210 We detail refinements made to Abstract Meaning Representation (AMR) that make the representation more suitable for supporting a situated dialogue system, where a human remotely controls a robot for purposes of search and rescue and reconnaissance. We propose 36 augmented AMRs that capture speech acts, tense and aspect, and spatial information. This linguistic information is vital for representing important distinctions, for example whether the robot has moved, is moving, or will move. We evaluate two existing AMR parsers for their performance on dialogue data. We also outline a model for graph-to-graph conversion, in which output from AMR parsers is converted into our refined AMRs. The design scheme presented here, though task-specific, is extendable for broad coverage of speech acts using AMR in future task-independent work. W19-3322 @@ -5064,8 +5064,8 @@ In this paper, we describe a compression scheme for lexicons when represented as Proceedings of the Second Workshop on Storytelling W19-34 FrancisFerraro - Ting-Hao ‘Kenneth’Huang - Stephanie M.Lukin + Ting-Hao ‘Kenneth’Huang + Stephanie M.Lukin MargaretMitchell Association for Computational Linguistics
Florence, Italy
@@ -5095,7 +5095,7 @@ In this paper, we describe a compression scheme for lexicons when represented as KhyathiChandu ShrimaiPrabhumoye RuslanSalakhutdinov - Alan WBlack + Alan WBlack 11–21 Visual storytelling is the task of generating stories based on a sequence of images. Inspired by the recent works in neural generation focusing on controlling the form of text, this paper explores the idea of generating these stories in different personas. However, one of the main challenges of performing this task is the lack of a dataset of visual stories in different personas. Having said that, there are independent datasets for both visual storytelling and annotated sentences for various persona. In this paper we describe an approach to overcome this by getting labelled persona data from a different task and leveraging those annotations to perform persona based story generation. We inspect various ways of incorporating personality in both the encoder and the decoder representations to steer the generation in the target direction. To this end, we propose five models which are incremental extensions to the baseline model to perform the task at hand. In our experiments we use five different personas to guide the generation process. We find that the models based on our hypotheses perform better at capturing words while generating stories in the target persona. W19-3402 @@ -5107,7 +5107,7 @@ In this paper, we describe a compression scheme for lexicons when represented as XinruYan AakankshaNaik YohanJo - CarolynRose + CarolynRose 22–33 We propose a novel take on understanding narratives in social media, focusing on learning ”functional story schemas”, which consist of sets of stereotypical functional structures. We develop an unsupervised pipeline to extract schemas and apply our method to Reddit posts to detect schematic structures that are characteristic of different subreddits. We validate our schemas through human interpretation and evaluate their utility via a text classification task. Our experiments show that extracted schemas capture distinctive structural patterns in different subreddits, improving classification performance of several models by 2.4% on average. We also observe that these schemas serve as lenses that reveal community norms. W19-3403 @@ -5120,7 +5120,7 @@ In this paper, we describe a compression scheme for lexicons when represented as VeraDemberg PavelShkadzko WeiShi - AsadSayeed + AsadSayeed 34–45 Automatically generating globally coherent stories is a challenging problem. Neural text generation models have been shown to perform well at generating fluent sentences from data, but they usually fail to keep track of the overall coherence of the story after a couple of sentences. Existing work that incorporates a text planning module succeeded in generating recipes and dialogues, but appears quite data-demanding. We propose a novel story generation approach that generates globally coherent stories from a fairly small corpus. The model exploits a symbolic text planning module to produce text plans, thus reducing the demand of data; a neural surface realization module then generates fluent text conditioned on the text plan. Human evaluation showed that our model outperforms various baselines by a wide margin and generates stories which are fluent as well as globally coherent. W19-3404 @@ -5155,7 +5155,7 @@ In this paper, we describe a compression scheme for lexicons when represented as <fixed-case>N</fixed-case>arrative <fixed-case>G</fixed-case>eneration in the <fixed-case>W</fixed-case>ild: <fixed-case>M</fixed-case>ethods from <fixed-case>N</fixed-case>a<fixed-case>N</fixed-case>o<fixed-case>G</fixed-case>en<fixed-case>M</fixed-case>o Judithvan Stegeren - MariëtTheune + MariëtTheune 65–74 In text generation, generating long stories is still a challenge. Coherence tends to decrease rapidly as the output length increases. Especially for generated stories, coherence of the narrative is an important quality aspect of the output text. In this paper we examine how narrative coherence is attained in the submissions of NaNoGenMo 2018, an online text generation event where participants are challenged to generate a 50,000 word novel. We list the main approaches that were used to generate coherent narratives and link them to scientific literature. Finally, we give recommendations on when to use which approach. W19-3407 @@ -5176,7 +5176,7 @@ In this paper, we describe a compression scheme for lexicons when represented as A Simple Approach to Classify Fictional and Non-Fictional Genres Mohammed RameezQureshi SidharthRanjan - RajakrishnanRajkumar + RajakrishnanRajkumar KushalShah 81–89 In this work, we deploy a logistic regression classifier to ascertain whether a given document belongs to the fiction or non-fiction genre. For genre identification, previous work had proposed three classes of features, viz., low-level (character-level and token counts), high-level (lexical and syntactic information) and derived features (type-token ratio, average word length or average sentence length). Using the Recursive feature elimination with cross-validation (RFECV) algorithm, we perform feature selection experiments on an exhaustive set of nineteen features (belonging to all the classes mentioned above) extracted from Brown corpus text. As a result, two simple features viz., the ratio of the number of adverbs to adjectives and the number of adjectives to pronouns turn out to be the most significant. Subsequently, our classification experiments aimed towards genre identification of documents from the Brown and Baby BNC corpora demonstrate that the performance of a classifier containing just the two aforementioned features is at par with that of a classifier containing the exhaustive feature set. @@ -5208,7 +5208,7 @@ In this paper, we describe a compression scheme for lexicons when represented as Winter is here: Summarizing <fixed-case>T</fixed-case>witter Streams related to Pre-Scheduled Events AnietieAndy - Derry TantiWijaya + Derry TantiWijaya ChrisCallison-Burch 112–116 Pre-scheduled events, such as TV shows and sports games, usually garner considerable attention from the public. Twitter captures large volumes of discussions and messages related to these events, in real-time. Twitter streams related to pre-scheduled events are characterized by the following: (1) spikes in the volume of published tweets reflect the highlights of the event and (2) some of the published tweets make reference to the characters involved in the event, in the context in which they are currently portrayed in a subevent. In this paper, we take advantage of these characteristics to identify the highlights of pre-scheduled events from tweet streams and we demonstrate a method to summarize these highlights. We evaluate our algorithm on tweets collected around 2 episodes of a popular TV show, Game of Thrones, Season 7. @@ -5223,7 +5223,7 @@ In this paper, we describe a compression scheme for lexicons when represented as PrakharGupta VinayshekharBannihatti Kumar MukulBhutani - Alan WBlack + Alan WBlack 117–126 We study the problem of generating interesting endings for stories. Neural generative models have shown promising results for various text generation problems. Sequence to Sequence (Seq2Seq) models are typically trained to generate a single output sequence for a given input sequence. However, in the context of a story, multiple endings are possible. Seq2Seq models tend to ignore the context and generate generic and dull responses. Very few works have studied generating diverse and interesting story endings for the same story context. In this paper, we propose models which generate more diverse and interesting outputs by 1) training models to focus attention on important keyphrases of the story, and 2) promoting generating nongeneric words. We show that the combination of the two leads to more interesting endings. W19-3413 @@ -5247,9 +5247,9 @@ In this paper, we describe a compression scheme for lexicons when represented as Proceedings of the Third Workshop on Abusive Language Online W19-35 Sarah T.Roberts - JoelTetreault + JoelTetreault VinodkumarPrabhakaran - ZeerakWaseem + ZeerakWaseem Association for Computational Linguistics
Florence, Italy
August @@ -5289,7 +5289,7 @@ In this paper, we describe a compression scheme for lexicons when represented as Detecting harassment in real-time as conversations develop WesselStoop FlorianKunneman - Antalvan den Bosch + Antalvan den Bosch BenMiller 19–24 We developed a machine-learning-based method to detect video game players that harass teammates or opponents in chat earlier in the conversation. This real-time technology would allow gaming companies to intervene during games, such as issue warnings or muting or banning a player. In a proof-of-concept experiment on League of Legends data we compute and visualize evaluation metrics for a machine learning classifier as conversations unfold, and observe that the optimal precision and recall of detecting toxic players at each moment in the conversation depends on the confidence threshold of the classifier: the threshold should start low, and increase as the conversation unfolds. How fast this sliding threshold should increase depends on the training set size. @@ -5336,7 +5336,7 @@ In this paper, we describe a compression scheme for lexicons when represented as The Discourse of Online Content Moderation: Investigating Polarized User Responses to Changes in <fixed-case>R</fixed-case>eddit’s Quarantine Policy QinlanShen - CarolynRose + CarolynRose 58–69 Recent concerns over abusive behavior on their platforms have pressured social media companies to strengthen their content moderation policies. However, user opinions on these policies have been relatively understudied. In this paper, we present an analysis of user responses to a September 27, 2018 announcement about the quarantine policy on Reddit as a case study of to what extent the discourse on content moderation is polarized by users’ ideological viewpoint. We introduce a novel partitioning approach for characterizing user polarization based on their distribution of participation across interest subreddits. We then use automated techniques for capturing framing to examine how users with different viewpoints discuss moderation issues, finding that right-leaning users invoked censorship while left-leaning users highlighted inconsistencies on how content policies are applied. Overall, we argue for a more nuanced approach to moderation by highlighting the intersection of behavior and ideology in considering how abusive language is defined and regulated. W19-3507 @@ -5360,7 +5360,7 @@ In this paper, we describe a compression scheme for lexicons when represented as AlexHarris DongNguyen RebekahTromble - ScottHale + ScottHale HelenMargetts 80–93 Online abusive content detection is an inherently difficult task. It has received considerable attention from academia, particularly within the computational linguistics community, and performance appears to have improved as the field has matured. However, considerable challenges and unaddressed frontiers remain, spanning technical, social and ethical dimensions. These issues constrain the performance, efficiency and generalizability of abusive content detection systems. In this article we delineate and clarify the main challenges and frontiers in the field, critically evaluate their implications and discuss potential solutions. We also highlight ways in which social scientific insights can advance research. We discuss the lack of support given to researchers working with abusive content and provide guidelines for ethical research. @@ -5372,7 +5372,7 @@ In this paper, we describe a compression scheme for lexicons when represented as A Hierarchically-Labeled <fixed-case>P</fixed-case>ortuguese Hate Speech Dataset PaulaFortuna JoãoRocha da Silva - JuanSoler-Company + JuanSoler-Company LeoWanner SérgioNunes 94–104 @@ -5399,7 +5399,7 @@ In this paper, we describe a compression scheme for lexicons when represented as <fixed-case>L</fixed-case>-<fixed-case>HSAB</fixed-case>: A <fixed-case>L</fixed-case>evantine <fixed-case>T</fixed-case>witter Dataset for Hate Speech and Abusive Language HalaMulki HatemHaddad - ChediBechikh Ali + ChediBechikh Ali HalimaAlshabani 111–118 Hate speech and abusive language have become a common phenomenon on Arabic social media. Automatic hate speech and abusive detection systems can facilitate the prohibition of toxic textual contents. The complexity, informality and ambiguity of the Arabic dialects hindered the provision of the needed resources for Arabic abusive/hate speech detection research. In this paper, we introduce the first publicly-available Levantine Hate Speech and Abusive (L-HSAB) Twitter dataset with the objective to be a benchmark dataset for automatic detection of online Levantine toxic contents. We, further, provide a detailed review of the data collection steps and how we design the annotation guidelines such that a reliable dataset annotation is guaranteed. This has been later emphasized through the comprehensive evaluation of the annotations as the annotation agreement metrics of Cohen’s Kappa (k) and Krippendorff’s alpha (α) indicated the consistency of the annotations. @@ -5420,7 +5420,7 @@ In this paper, we describe a compression scheme for lexicons when represented as Preemptive Toxic Language Detection in <fixed-case>W</fixed-case>ikipedia Comments Using Thread-Level Context - Vanja MladenKaran + Vanja MladenKaran JanŠnajder 129–134 We address the task of automatically detecting toxic content in user generated texts. We fo cus on exploring the potential for preemptive moderation, i.e., predicting whether a particular conversation thread will, in the future, incite a toxic comment. Moreover, we perform preliminary investigation of whether a model that jointly considers all comments in a conversation thread outperforms a model that considers only individual comments. Using an existing dataset of conversations among Wikipedia contributors as a starting point, we compile a new large-scale dataset for this task consisting of labeled comments and comments from their conversation threads. @@ -5433,7 +5433,7 @@ In this paper, we describe a compression scheme for lexicons when represented as SravanBodapati SpandanaGella KasturiBhattacharjee - YaserAl-Onaizan + YaserAl-Onaizan 135–145 The text we see in social media suffers from lots of undesired characterstics like hatespeech, abusive language, insults etc. The nature of this text is also very different compared to the traditional text we see in news with lots of obfuscated words, intended typos. This poses several robustness challenges to many natural language processing (NLP) techniques developed for traditional text. Many techniques proposed in the recent times such as charecter encoding models, subword models, byte pair encoding to extract subwords can aid in dealing with few of these nuances. In our work, we analyze the effectiveness of each of the above techniques, compare and contrast various word decomposition techniques when used in combination with others. We experiment with recent advances of finetuning pretrained language models, and demonstrate their robustness to domain shift. We also show our approaches achieve state of the art performance on Wikipedia attack, toxicity datasets, and Twitter hatespeech dataset. W19-3515 @@ -5445,7 +5445,7 @@ In this paper, we describe a compression scheme for lexicons when represented as A Platform Agnostic Dual-Strand Hate Speech Detector Johannes SkjeggestadMeyer - BjörnGambäck + BjörnGambäck 146–156 Hate speech detectors must be applicable across a multitude of services and platforms, and there is hence a need for detection approaches that do not depend on any information specific to a given platform. For instance, the information stored about the text’s author may differ between services, and so using such data would reduce a system’s general applicability. The paper thus focuses on using exclusively text-based input in the detection, in an optimised architecture combining Convolutional Neural Networks and Long Short-Term Memory-networks. The hate speech detector merges two strands with character n-grams and word embeddings to produce the final classification, and is shown to outperform comparable previous approaches. W19-3516 @@ -5499,7 +5499,7 @@ In this paper, we describe a compression scheme for lexicons when represented as DiyiYang RossanaCunha SamiraShaikh - ZeerakWaseem + ZeerakWaseem Association for Computational Linguistics
Florence, Italy
August @@ -5521,7 +5521,7 @@ In this paper, we describe a compression scheme for lexicons when represented as Towards a Resource Grammar for <fixed-case>R</fixed-case>unyankore and Rukiga DavidBamutura - PeterLjunglöf + PeterLjunglöf 2–6 Currently, there is a lack of computational grammar resources for many under-resourced languages which limits the ability to develop Natural Language Processing (NLP) tools and applications such as Multilingual Document Authoring, Computer-Assisted Language Learning (CALL) and Low-Coverage Machine Translation (MT) for these languages. In this paper, we present our attempt to formalise the grammar of two such languages: Runyankore and Rukiga. For this formalisation we use the Grammatical Framework (GF) and its Resource Grammar Library (GF-RGL). bamutura-ljunglof-2019-towards @@ -5596,9 +5596,9 @@ In this paper, we describe a compression scheme for lexicons when represented as <fixed-case>E</fixed-case>nglish-<fixed-case>E</fixed-case>thiopian Languages Statistical Machine Translation - Solomon TeferraAbate + Solomon TeferraAbate MichaelMelese - Martha YifiruTachbelie + Martha YifiruTachbelie MillionMeshesha SolomonAtinafu WondwossenMulugeta @@ -5633,7 +5633,7 @@ In this paper, we describe a compression scheme for lexicons when represented as Learning Trilingual Dictionaries for <fixed-case>U</fixed-case>rdu – <fixed-case>R</fixed-case>oman <fixed-case>U</fixed-case>rdu – <fixed-case>E</fixed-case>nglish MoizRauf - SebastianPadó + SebastianPadó 38–42 In this paper, we present an effort to generate a joint Urdu, Roman Urdu and English trilingual lexicon using automated methods. We make a case for using statistical machine translation approaches and parallel corpora for dictionary creation. To this purpose, we use word alignment tools on the corpus and evaluate translations using human evaluators. Despite different writing script and considerable noise in the corpus our results show promise with over 85% accuracy of Roman Urdu–Urdu and 45% English–Urdu pairs. rauf-pado-2019-learning @@ -5673,7 +5673,7 @@ In this paper, we describe a compression scheme for lexicons when represented as Controlling the Specificity of Clarification Question Generation Yang TristaCao SudhaRao - HalDaumé III + HalDaumé III 53–56 Unlike comprehension-style questions, clarification questions look for some missing information in a given context. However, without guidance, neural models for question generation, similar to dialog generation models, lead to generic and bland questions that cannot elicit useful information. We argue that controlling the level of specificity of the generated questions can have useful applications and propose a neural clarification question generation model for the same. We first train a classifier that annotates a clarification question with its level of specificity (generic or specific) to the given context. Our results on the Amazon questions dataset demonstrate that training a clarification question generation model on specificity annotated data can generate questions with varied levels of specificity to the given context. cao-etal-2019-controlling @@ -5682,7 +5682,7 @@ In this paper, we describe a compression scheme for lexicons when represented as Non-Monotonic Sequential Text Generation KianteBrantley KyunghyunCho - HalDaumé + HalDaumé SeanWelleck 57–59 Standard sequential generation methods assume a pre-specified generation order, such as text generation methods which generate words from left to right. In this work, we propose a framework for training models of text generation that operate in non-monotonic orders; the model directly learns good orders, without any additional annotation. Our framework operates by generating a word at an arbitrary position, and then recursively generating words to its left and then words to its right, yielding a binary tree. Learning is framed as imitation learning, including a coaching method which moves from imitating an oracle to reinforcing the policy’s own preferences. Experimental results demonstrate that using the proposed method, it is possible to learn policies which generate text without pre-specifying a generation order while achieving competitive performance with conventional left-to-right generation. @@ -5724,7 +5724,7 @@ In this paper, we describe a compression scheme for lexicons when represented as Construction and Alignment of Multilingual Entailment Graphs for Semantic Inference SabineWeber - MarkSteedman + MarkSteedman 77–79 This paper presents ongoing work on the construction and alignment of predicate entailment graphs in English and German. We extract predicate-argument pairs from large corpora of monolingual English and German news text and construct monolingual paraphrase clusters and entailment graphs. We use an aligned subset of entities to derive the bilingual alignment of entities and relations, and achieve better than baseline results on a translated subset of a predicate entailment data set (Levy and Dagan, 2016) and the German portion of XNLI (Conneau et al., 2018). weber-steedman-2019-construction @@ -5742,7 +5742,7 @@ In this paper, we describe a compression scheme for lexicons when represented as Acoustic Characterization of Singaporean Children’s <fixed-case>E</fixed-case>nglish: Comparisons to <fixed-case>A</fixed-case>merican and <fixed-case>B</fixed-case>ritish Counterparts YulingGu - NancyChen + NancyChen 83–87 We investigate English pronunciation patterns in Singaporean children in relation to their American and British counterparts by conducting archetypal analysis on selected vowel pairs. Given that Singapore adopts British English as the institutional standard, one might expect Singaporean children to follow British pronunciation patterns, but we observe that Singaporean children also present similar patterns to Americans for TRAP-BATH spilt vowels: (1) British and Singaporean children both produce these vowels with a relatively lowered tongue height. (2) These vowels are more fronted for American and Singaporean children (p < 0.001). In addition, when comparing /æ/ and /ε/ productions, British speakers show the clearest distinction between the two vowels; Singaporean and American speakers exhibit a higher and more fronted tongue position for /æ/ (p < 0.001), causing /æ/ to be acoustically more similar to /ε/. gu-chen-2019-acoustic @@ -5775,7 +5775,7 @@ In this paper, we describe a compression scheme for lexicons when represented as Isolating the Effects of Modeling Recursive Structures: A Case Study in Pronunciation Prediction of <fixed-case>C</fixed-case>hinese Characters MinhNguyen Gia HNgo - NancyChen + NancyChen 95–97 Finding that explicitly modeling structures leads to better generalization, we consider the task of predicting Cantonese pronunciations of logographs (Chinese characters) using logographs’ recursive structures. This task is a suitable case study for two reasons. First, logographs’ pronunciations depend on structures (i.e. the hierarchies of sub-units in logographs) Second, the quality of logographic structures is consistent since the structures are constructed automatically using a set of rules. Thus, this task is less affected by confounds such as varying quality between annotators. Empirical results show that modeling structures explicitly using treeLSTM outperforms LSTM baseline, reducing prediction error by 6.0% relative. nguyen-etal-2019-isolating @@ -5817,7 +5817,7 @@ In this paper, we describe a compression scheme for lexicons when represented as Polysemous Language in Child Directed Speech SammyFloyd LibbyBarak - AdeleGoldberg + AdeleGoldberg CaseyLew-Williams 114–117 Polysemous Language in Child Directed Speech Learning the meaning of words is one of the fundamental building blocks of verbal communication. Models of child language acquisition have generally made the simplifying assumption that each word appears in child-directed speech with a single meaning. To understand naturalistic word learning during childhood, it is essential to know whether children hear input that is in fact constrained to single meaning per word, or whether the environment naturally contains multiple senses. In this study, we use a topic modeling approach to automatically induce word senses from child-directed speech. Our results confirm the plausibility of our automated analysis approach and reveal an increasing rate of using multiple senses in child-directed speech, starting with corpora from children as early as the first year of life. @@ -5827,7 +5827,7 @@ In this paper, we describe a compression scheme for lexicons when represented as Principled Frameworks for Evaluating Ethics in <fixed-case>NLP</fixed-case> Systems ShrimaiPrabhumoye ElijahMayfield - Alan WBlack + Alan WBlack 118–121 We critique recent work on ethics in natural language processing. Those discussions have focused on data collection, experimental design, and interventions in modeling. But we argue that we ought to first understand the frameworks of ethics that are being used to evaluate the fairness and justice of algorithmic systems. Here, we begin that discussion by outlining deontological and consequentialist ethics, and make predictions on the research agenda prioritized by each. prabhumoye-etal-2019-principled @@ -5845,7 +5845,7 @@ In this paper, we describe a compression scheme for lexicons when represented as Evaluating Ways of Adapting Word Similarity LibbyBarak - AdeleGoldberg + AdeleGoldberg 126–128 People judge pairwise similarity by deciding which aspects of the words’ meanings are relevant for the comparison of the given pair. However, computational representations of meaning rely on dimensions of the vector representation for similarity comparisons, without considering the specific pairing at hand. Prior work has adapted computational similarity judgments by using the softmax function in order to address this limitation by capturing asymmetry in human judgments. We extend this analysis by showing that a simple modification of cosine similarity offers a better correlation with human judgments over a comprehensive dataset. The modification performs best when the similarity between two words is calculated with reference to other words that are most similar and dissimilar to the pair. barak-goldberg-2019-evaluating @@ -5870,7 +5870,7 @@ In this paper, we describe a compression scheme for lexicons when represented as Context Effects on Human Judgments of Similarity LibbyBarak NoeKong-Johnson - AdeleGoldberg + AdeleGoldberg 135–137 The semantic similarity of words forms the basis of many natural language processing methods. These computational similarity measures are often based on a mathematical comparison of vector representations of word meanings, while human judgments of similarity differ in lacking geometrical properties, e.g., symmetric similarity and triangular similarity. In this study, we propose a novel task design to further explore human behavior by asking whether a pair of words is deemed more similar depending on an immediately preceding judgment. Results from a crowdsourcing experiment show that people consistently judge words as more similar when primed by a judgment that evokes a relevant relationship. Our analysis further shows that word2vec similarity correlated significantly better with the out-of-context judgments, thus confirming the methodological differences in human-computer judgments, and offering a new testbed for probing the differences. barak-etal-2019-context @@ -5895,7 +5895,7 @@ In this paper, we describe a compression scheme for lexicons when represented as Pardon the Interruption: Automatic Analysis of Gender and Competitive Turn-Taking in <fixed-case>U</fixed-case>nited <fixed-case>S</fixed-case>tates <fixed-case>S</fixed-case>upreme <fixed-case>C</fixed-case>ourt Hearings - HaleyLepp + HaleyLepp 143–145 The United States Supreme Court plays a key role in defining the legal basis for gender discrimination throughout the country, yet there are few checks on gender bias within the court itself. In conversational turn-taking, interruptions have been documented as a marker of bias between speakers of different genders. The goal of this study is to automatically differentiate between respectful and disrespectful conversational turns taken during official hearings, which could help in detecting bias and finding remediation techniques for discourse in the courtroom. In this paper, I present a corpus of turns annotated by legal professionals, and describe the design of a semi-supervised classifier that will use acoustic and lexical features to analyze turn-taking at scale. On completion of annotations, this classifier will be trained to extract the likelihood that turns are respectful or disrespectful for use in studies of speech trends. lepp-2019-pardon @@ -5905,7 +5905,7 @@ In this paper, we describe a compression scheme for lexicons when represented as NouhaDziri EhsanKamalloo KoryMathewson - OsmarZaiane + OsmarZaiane 146–148 Evaluating open-domain dialogue systems is difficult due to the diversity of possible correct answers. Automatic metrics such as BLEU correlate weakly with human annotations, resulting in a significant bias across different models and datasets. Some researchers resort to human judgment experimentation for assessing response quality, which is expensive, time consuming, and not scalable. Moreover, judges tend to evaluate a small number of dialogues, meaning that minor differences in evaluation configuration may lead to dissimilar results. In this paper, we present interpretable metrics for evaluating topic coherence by making use of distributed sentence representations. Furthermore, we introduce calculable approximations of human judgment based on conversational coherence by adopting state-of-the-art entailment techniques. Results show that our metrics can be used as a surrogate for human judgment, making it easy to evaluate dialogue systems on large-scale datasets and allowing an unbiased estimate for the quality of the responses. This paper has been accepted in NAACL 2019. dziri-etal-2019-evaluating-coherence @@ -5937,8 +5937,8 @@ In this paper, we describe a compression scheme for lexicons when represented as A Parallel Corpus <fixed-case>M</fixed-case>ixtec-<fixed-case>S</fixed-case>panish CynthiaMontaño GerardoSierra Martínez - GemmaBel-Enguix - HelenaGomez + GemmaBel-Enguix + HelenaGomez 157–159 This work is about the compilation process of parallel documents Spanish-Mixtec. There are not many Spanish-Mixec parallel texts and most of the sources are non-digital books. Due to this, we need to face the errors when digitizing the sources and difficulties in sentence alignment, as well as the fact that does not exist a standard orthography. Our parallel corpus consists of sixty texts coming from books and digital repositories. These documents belong to different domains: history, traditional stories, didactic material, recipes, ethnographical descriptions of each town and instruction manuals for disease prevention. We have classified this material in five major categories: didactic (6 texts), educative (6 texts), interpretative (7 texts), narrative (39 texts), and poetic (2 texts). The final total of tokens is 49,814 Spanish words and 47,774 Mixtec words. The texts belong to the states of Oaxaca (48 texts), Guerrero (9 texts) and Puebla (3 texts). According to this data, we see that the corpus is unbalanced in what refers to the representation of the different territories. While 55% of speakers are in Oaxaca, 80% of texts come from this region. Guerrero has the 30% of speakers and the 15% of texts and Puebla, with the 15% of the speakers has a representation of the 5% in the corpus. montano-etal-2019-parallel @@ -5973,7 +5973,7 @@ In this paper, we describe a compression scheme for lexicons when represented as <fixed-case>CSI</fixed-case> <fixed-case>P</fixed-case>eru News: finding the culprit, victim and location in news articles GinaBustamante - ArturoOncevay + ArturoOncevay 174–176 We introduce a shift on the DS method over the domain of crime-related news from Peru, attempting to find the culprit, victim and location of a crime description from a RE perspective. Obtained results are highly promising and show that proposed modifications are effective in non-traditional domains. bustamante-oncevay-2019-csi @@ -5990,7 +5990,7 @@ In this paper, we describe a compression scheme for lexicons when represented as Cross-Sentence Transformations in Text Simplification FernandoAlva-Manchego - CarolinaScarton + CarolinaScarton LuciaSpecia 181–184 Current approaches to Text Simplification focus on simplifying sentences individually. However, certain simplification transformations span beyond single sentences (e.g. joining and re-ordering sentences). In this paper, we motivate the need for modelling the simplification task at the document level, and assess the performance of sequence-to-sequence neural models in this setup. We analyse parallel original-simplified documents created by professional editors and show that there are frequent rewriting transformations that are not restricted to sentence boundaries. We also propose strategies to automatically evaluate the performance of a simplification model on these cross-sentence transformations. Our experiments show the inability of standard sequence-to-sequence neural models to learn these transformations, and suggest directions towards document-level simplification. @@ -6001,9 +6001,9 @@ In this paper, we describe a compression scheme for lexicons when represented as Proceedings of the 7th Workshop on Balto-Slavic Natural Language Processing W19-37 - TomažErjavec + TomažErjavec MichałMarcińczuk - PreslavNakov + PreslavNakov JakubPiskorski LidiaPivovarova JanŠnajder @@ -6084,7 +6084,7 @@ In this paper, we describe a compression scheme for lexicons when represented as Data Set for Stance and Sentiment Analysis from User Comments on <fixed-case>C</fixed-case>roatian News MihaelaBošnjak - Vanja MladenKaran + Vanja MladenKaran 50–55 Nowadays it is becoming more important than ever to find new ways of extracting useful information from the evergrowing amount of user-generated data available online. In this paper, we describe the creation of a data set that contains news articles and corresponding comments from Croatian news outlet 24 sata. Our annotation scheme is specifically tailored for the task of detecting stances and sentiment from user comments as well as assessing if commentator claims are verifiable. Through this data, we hope to get a better understanding of the publics viewpoint on various events. In addition, we also explore the potential of applying supervised machine learning models toautomate annotation of more data. W19-3707 @@ -6131,7 +6131,7 @@ In this paper, we describe a compression scheme for lexicons when represented as <fixed-case>TLR</fixed-case> at <fixed-case>BSNLP</fixed-case>2019: A Multilingual Named Entity Recognition System - Jose G.Moreno + Jose G.Moreno ElvysLinhares Pontes MickaelCoustaty AntoineDoucet @@ -6145,7 +6145,7 @@ In this paper, we describe a compression scheme for lexicons when represented as Tuning Multilingual Transformers for Language-Specific Named Entity Recognition MikhailArkhipov MariaTrofimova - YuriKuratov + YuriKuratov AlexeySorokin 89–93 Our paper addresses the problem of multilingual named entity recognition on the material of 4 languages: Russian, Bulgarian, Czech and Polish. We solve this task using the BERT model. We use a hundred languages multilingual model as base for transfer to the mentioned Slavic languages. Unsupervised pre-training of the BERT model on these 4 languages allows to significantly outperform baseline neural approaches and multilingual BERT. Additional improvement is achieved by extending BERT with a word-level CRF layer. Our system was submitted to BSNLP 2019 Shared Task on Multilingual Named Entity Recognition and demonstrated top performance in multilingual setting for two competition metrics. We open-sourced NER models and BERT model pre-trained on the four Slavic languages. @@ -6167,7 +6167,7 @@ In this paper, we describe a compression scheme for lexicons when represented as <fixed-case>JRC</fixed-case> <fixed-case>TMA</fixed-case>-<fixed-case>CC</fixed-case>: <fixed-case>S</fixed-case>lavic Named Entity Recognition and Linking. Participation in the <fixed-case>BSNLP</fixed-case>-2019 shared task GuillaumeJacquet JakubPiskorski - HristoTanev + HristoTanev RalfSteinberger 100–104 We report on the participation of the JRC Text Mining and Analysis Competence Centre (TMA-CC) in the BSNLP-2019 Shared Task, which focuses on named-entity recognition, lemmatisation and cross-lingual linking. We propose a hybrid system combining a rule-based approach and light ML techniques. We use multilingual lexical resources such as JRC-NAMES and BABELNET together with a named entity guesser to recognise names. In a second step, we combine known names with wild cards to increase recognition recall by also capturing inflection variants. In a third step, we increase precision by filtering these name candidates with automatically learnt inflection patterns derived from name occurrences in large news article collections. Our major requirement is to achieve high precision. We achieved an average of 65% F-measure with 93% precision on the four languages. @@ -6178,7 +6178,7 @@ In this paper, we describe a compression scheme for lexicons when represented as Building <fixed-case>E</fixed-case>nglish-to-<fixed-case>S</fixed-case>erbian Machine Translation System for <fixed-case>IMD</fixed-case>b Movie Reviews PintuLohar - MajaPopović + MajaPopović AndyWay 105–113 This paper reports the results of the first experiment dealing with the challenges of building a machine translation system for user-generated content involving a complex South Slavic language. We focus on translation of English IMDb user movie reviews into Serbian, in a low-resource scenario. We explore potentials and limits of (i) phrase-based and neural machine translation systems trained on out-of-domain clean parallel data from news articles (ii) creating additional synthetic in-domain parallel corpus by machine-translating the English IMDb corpus into Serbian. Our main findings are that morphology and syntax are better handled by the neural approach than by the phrase-based approach even in this low-resource mismatched domain scenario, however the situation is different for the lexical aspect, especially for person names. This finding also indicates that in general, machine translation of person names into Slavic languages (especially those which require/allow transcription) should be investigated more systematically. @@ -6213,7 +6213,7 @@ In this paper, we describe a compression scheme for lexicons when represented as Proceedings of the First Workshop on Gender Bias in Natural Language Processing W19-38 - Marta R.Costa-jussà + Marta R.Costa-jussà ChristianHardmeier WillRadford KellieWebster @@ -6266,7 +6266,7 @@ In this paper, we describe a compression scheme for lexicons when represented as Measuring Gender Bias in Word Embeddings across Domains and Discovering New Gender Bias Word Categories KaytlinChaloner - AlfredoMaldonado + AlfredoMaldonado 25–32 Prior work has shown that word embeddings capture human stereotypes, including gender bias. However, there is a lack of studies testing the presence of specific gender bias categories in word embeddings across diverse domains. This paper aims to fill this gap by applying the WEAT bias detection method to four sets of word embeddings trained on corpora from four different domains: news, social networking, biomedical and a gender-balanced corpus extracted from Wikipedia (GAP). We find that some domains are definitely more prone to gender bias than others, and that the categories of gender bias present also vary for each set of word embeddings. We detect some gender bias in GAP. We also propose a simple but novel method for discovering new bias categories by clustering word embeddings. We validate this method through WEAT’s hypothesis testing mechanism and find it useful for expanding the relatively small set of well-known gender bias word categories commonly used in the literature. W19-3804 @@ -6287,7 +6287,7 @@ In this paper, we describe a compression scheme for lexicons when represented as Conceptor Debiasing of Word Representations Evaluated on <fixed-case>WEAT</fixed-case> SaketKarve - LyleUngar + LyleUngar JoãoSedoc 40–48 Bias in word representations, such as Word2Vec, has been widely reported and investigated, and efforts made to debias them. We apply the debiasing conceptor for post-processing both traditional and contextualized word embeddings. Our method can simultaneously remove racial and gender biases from word representations. Unlike standard debiasing methods, the debiasing conceptor can utilize heterogeneous lists of biased words without loss in performance. Finally, our empirical experiments show that the debiasing conceptor diminishes racial and gender bias of word representations as measured using the Word Embedding Association Test (WEAT) of Caliskan et al. (2017). @@ -6309,7 +6309,7 @@ In this paper, we describe a compression scheme for lexicons when represented as The Role of Protected Class Word Lists in Bias Identification of Contextualized Word Representations JoãoSedoc - LyleUngar + LyleUngar 55–61 Systemic bias in word embeddings has been widely reported and studied, and efforts made to debias them; however, new contextualized embeddings such as ELMo and BERT are only now being similarly studied. Standard debiasing methods require heterogeneous lists of target words to identify the “bias subspace”. We show show that using new contextualized word embeddings in conceptor debiasing allows us to more accurately debias word embeddings by breaking target word lists into more homogeneous subsets and then combining (”Or’ing”) the debiasing conceptors of the different subsets. W19-3808 @@ -6341,7 +6341,7 @@ In this paper, we describe a compression scheme for lexicons when represented as <fixed-case>BERT</fixed-case> Masked Language Modeling for Co-reference Resolution FelipeAlfaro Marta R.Costa-jussà - José A. R.Fonollosa + José A. R.Fonollosa 76–81 This paper explains the TALP-UPC participation for the Gendered Pronoun Resolution shared-task of the 1st ACL Workshop on Gender Bias for Natural Language Processing. We have implemented two models for mask language modeling using pre-trained BERT adjusted to work for a classification problem. The proposed solutions are based on the word probabilities of the original BERT model, but using common English names to replace the original test names. W19-3811 @@ -6467,7 +6467,7 @@ In this paper, we describe a compression scheme for lexicons when represented as KeitaKurita NidhiVyas AyushPareek - Alan WBlack + Alan WBlack YuliaTsvetkov 166–172 Contextual word embeddings such as BERT have achieved state of the art performance in numerous NLP tasks. Since they are optimized to capture the statistical properties of training data, they tend to pick up on and amplify social stereotypes present in the data as well. In this study, we (1) propose a template-based method to quantify bias in BERT; (2) show that this method obtains more consistent results in capturing social biases than the traditional cosine based method; and (3) conduct a case study, evaluating gender bias in a downstream task of Gender Pronoun Resolution. Although our case study focuses on gender bias, the proposed technique is generalizable to unveiling other biases, including in multiclass settings, such as racial and religious biases. @@ -6492,7 +6492,7 @@ In this paper, we describe a compression scheme for lexicons when represented as Proceedings of the Workshop on Deep Learning and Formal Languages: Building Bridges W19-39 - JasonEisner + JasonEisner MatthiasGallé JeffreyHeinz AriadnaQuattoni @@ -6540,7 +6540,7 @@ In this paper, we describe a compression scheme for lexicons when represented as Multi-Element Long Distance Dependencies: Using <fixed-case>SP</fixed-case>k Languages to Explore the Characteristics of Long-Distance Dependencies AbhijitMahalunkar - JohnKelleher + JohnKelleher 34–43 In order to successfully model Long Distance Dependencies (LDDs) it is necessary to under-stand the full-range of the characteristics of the LDDs exhibited in a target dataset. In this paper, we use Strictly k-Piecewise languages to generate datasets with various properties. We then compute the characteristics of the LDDs in these datasets using mutual information and analyze the impact of factors such as (i) k, (ii) length of LDDs, (iii) vocabulary size, (iv) forbidden strings, and (v) dataset size. This analysis reveal that the number of interacting elements in a dependency is an important characteristic of LDDs. This leads us to the challenge of modelling multi-element long-distance dependencies. Our results suggest that attention mechanisms in neural networks may aide in modeling datasets with multi-element long-distance dependencies. However, we conclude that there is a need to develop more efficient attention mechanisms to address this issue. W19-3904 @@ -6551,7 +6551,7 @@ In this paper, we describe a compression scheme for lexicons when represented as <fixed-case>LSTM</fixed-case> Networks Can Perform Dynamic Counting MiracSuzgun YonatanBelinkov - StuartShieber + StuartShieber SebastianGehrmann 44–54 In this paper, we systematically assess the ability of standard recurrent networks to perform dynamic counting and to encode hierarchical representations. All the neural models in our experiments are designed to be small-sized networks both to prevent them from memorizing the training sets and to visualize and interpret their behaviour at test time. Our results demonstrate that the Long Short-Term Memory (LSTM) networks can learn to recognize the well-balanced parenthesis language (Dyck-1) and the shuffles of multiple Dyck-1 languages, each defined over different parenthesis-pairs, by emulating simple real-time k-counter machines. To the best of our knowledge, this work is the first study to introduce the shuffle languages to analyze the computational power of neural networks. We also show that a single-layer LSTM with only one hidden unit is practically sufficient for recognizing the Dyck-1 language. However, none of our recurrent networks was able to yield a good performance on the Dyck-2 language learning task, which requires a model to have a stack-like mechanism for recognition. @@ -6580,7 +6580,7 @@ In this paper, we describe a compression scheme for lexicons when represented as Crowdsourced Hedge Term Disambiguation MorganUlinski - JuliaHirschberg + JuliaHirschberg 1–5 We address the issue of acquiring quality annotations of hedging words and phrases, linguistic phenomenona in which words, sounds, or other constructions are used to express ambiguity or uncertainty. Due to the limited availability of existing corpora annotated for hedging, linguists and other language scientists have been constrained as to the extent they can study this phenomenon. In this paper, we introduce a new method of acquiring hedging annotations via crowdsourcing, based on reformulating the task of labeling hedges as a simple word sense disambiguation task. We also introduce a new hedging corpus we have constructed by applying this method, a collection of forum posts annotated using Amazon Mechanical Turk. We found that the crowdsourced judgments we obtained had an inter-annotator agreement of 92.89% (Fleiss’ Kappa=0.751) and, when comparing a subset of these annotations to an expert-annotated gold standard, an accuracy of 96.65%. W19-4001 @@ -6591,7 +6591,7 @@ In this paper, we describe a compression scheme for lexicons when represented as <fixed-case>W</fixed-case>i<fixed-case>R</fixed-case>e57 : A Fine-Grained Benchmark for Open Information Extraction WilliamLechelle FabrizioGotti - PhillippeLanglais + PhillippeLanglais 6–15 We build a reference for the task of Open Information Extraction, on five documents. We tentatively resolve a number of issues that arise, including coreference and granularity, and we take steps toward addressing inference, a significant problem. We seek to better pinpoint the requirements for the task. We produce our annotation guidelines specifying what is correct to extract and what is not. In turn, we use this reference to score existing Open IE systems. We address the non-trivial problem of evaluating the extractions produced by systems against the reference tuples, and share our evaluation script. Among seven compared extractors, we find the MinIE system to perform best. W19-4002 @@ -6689,8 +6689,8 @@ In the last paragraph of Section 3, the word "additional" was changed to "supple Assessing Back-Translation as a Corpus Generation Strategy for non-<fixed-case>E</fixed-case>nglish Tasks: A Study in Reading Comprehension and Word Sense Disambiguation FabricioMonsalve KervyRivas Rojas - Marco AntonioSobrevilla Cabezudo - ArturoOncevay + Marco AntonioSobrevilla Cabezudo + ArturoOncevay 81–89 Corpora curated by experts have sustained Natural Language Processing mainly in English, but the expensiveness of corpora creation is a barrier for the development in further languages. Thus, we propose a corpus generation strategy that only requires a machine translation system between English and the target language in both directions, where we filter the best translations by computing automatic translation metrics and the task performance score. By studying Reading Comprehension in Spanish and Word Sense Disambiguation in Portuguese, we identified that a more quality-oriented metric has high potential in the corpora selection without degrading the task performance. We conclude that it is possible to systematise the building of quality corpora using machine translation and automatic metrics, besides some prior effort to clean and process the data. W19-4010 @@ -6700,7 +6700,7 @@ In the last paragraph of Section 3, the word "additional" was changed to "supple A Framework for Annotating ‘Related Works’ to Support Feedback to Novice Writers ArleneCasey - BonnieWebber + BonnieWebber DorotaGlowacka 90–99 Understanding what is expected of academic writing can be difficult for novice writers to assimilate, and recent years have seen several automated tools become available to support academic writing. Our work presents a framework for annotating features of the Related Work section of academic writing, that supports writer feedback. @@ -6712,7 +6712,7 @@ In the last paragraph of Section 3, the word "additional" was changed to "supple An Online Annotation Assistant for Argument Schemes JohnLawrence JackyVisser - ChrisReed + ChrisReed 100–107 Understanding the inferential principles underpinning an argument is essential to the proper interpretation and evaluation of persuasive discourse. Argument schemes capture the conventional patterns of reasoning appealed to in persuasion. The empirical study of these patterns relies on the availability of data about the actual use of argumentation in communicative practice. Annotated corpora of argument schemes, however, are scarce, small, and unrepresentative. Aiming to address this issue, we present one step in the development of improved datasets by integrating the Argument Scheme Key – a novel annotation method based on one of the most popular typologies of argument schemes – into the widely used OVA software for argument analysis. W19-4012 @@ -6757,8 +6757,8 @@ In the last paragraph of Section 3, the word "additional" was changed to "supple Aikaterini-LidaKalouli AnnebethBuis LivyReal - MarthaPalmer - Valeriade Paiva + MarthaPalmer + Valeriade Paiva 132–143 The vast amount of research introducing new corpora and techniques for semi-automatically annotating corpora shows the important role that datasets play in today’s research, especially in the machine learning community. This rapid development raises concerns about the quality of the datasets created and consequently of the models trained, as recently discussed with respect to the Natural Language Inference (NLI) task. In this work we conduct an annotation experiment based on a small subset of the SICK corpus. The experiment reveals several problems in the annotation guidelines, and various challenges of the NLI task itself. Our quantitative evaluation of the experiment allows us to assign our empirical observations to specific linguistic phenomena and leads us to recommendations for future annotation tasks, for NLI and possibly for other tasks. W19-4016 @@ -6796,7 +6796,7 @@ In the last paragraph of Section 3, the word "additional" was changed to "supple AbdullatifKöksal BalkizOzturk Basaran TungaGungor - ArzucanÖzgür + ArzucanÖzgür 166–177 In this paper, we present the current version of two different treebanks, the re-annotation of the Turkish PUD Treebank and the first annotation of the Turkish National Corpus Universal Dependency (henceforth TNC-UD). The annotation of both treebanks, the Turkish PUD Treebank and TNC-UD, was carried out based on the decisions concerning linguistic adequacy of re-annotation of the Turkish IMST-UD Treebank (Türk et. al., forthcoming). Both of the treebanks were annotated with the same annotation process and morphological and syntactic analyses. The TNC-UD is planned to have 10,000 sentences. In this paper, we will present the first 500 sentences along with the annotation PUD Treebank. Moreover, this paper also offers the parsing results of a graph-based neural parser on the previous and re-annotated PUD, as well as the TNC-UD. In light of the comparisons, even though we observe a slight decrease in the attachment scores of the Turkish PUD treebank, we demonstrate that the annotation of the TNC-UD improves the parsing accuracy of Turkish. In addition to the treebanks, we have also constructed a custom annotation software with advanced filtering and morphological editing options. Both the treebanks, including a full edit-history and the annotation guidelines, and the custom software are publicly available under an open license online. W19-4019 @@ -6806,7 +6806,7 @@ In the last paragraph of Section 3, the word "additional" was changed to "supple A Dataset for Semantic Role Labelling of <fixed-case>H</fixed-case>indi-<fixed-case>E</fixed-case>nglish Code-Mixed Tweets RiyaPal - DiptiSharma + DiptiSharma 178–188 We present a data set of 1460 Hindi-English code-mixed tweets consisting of 20,949 tokens labelled with Proposition Bank labels marking their semantic roles. We created verb frames for complex predicates present in the corpus and formulated mappings from Paninian dependency labels to Proposition Bank labels. With the help of these mappings and the dependency tree, we propose a baseline rule based system for Semantic Role Labelling of Hindi-English code-mixed data. We obtain an accuracy of 96.74% for Argument Identification and are able to further classify 73.93% of the labels correctly. While there is relevant ongoing research on Semantic Role Labelling and on building tools for code-mixed social media data, this is the first attempt at labelling semantic roles in code-mixed data, to the best of our knowledge. W19-4020 @@ -6816,7 +6816,7 @@ In the last paragraph of Section 3, the word "additional" was changed to "supple A Multi-Platform Annotation Ecosystem for Domain Adaptation RichardEckart de Castilho - NancyIde + NancyIde Jin-DongKim Jan-ChristophKlie KeithSuderman @@ -6829,7 +6829,7 @@ In the last paragraph of Section 3, the word "additional" was changed to "supple A New Annotation Scheme for the <fixed-case>S</fixed-case>ejong Part-of-speech Tagged Corpus JungyeulPark - FrancisTyers + FrancisTyers 195–202 In this paper we present a new annotation scheme for the Sejong part-of-speech tagged corpus based on Universal Dependencies style annotation. By using a new annotation scheme, we can produce Sejong-style morphological analysis and part-of-speech tagging results which have been the de facto standard for Korean language processing. We also explore the possibility of doing named-entity recognition and semantic-role labelling for Korean using the new annotation scheme. W19-4022 @@ -6907,7 +6907,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat Towards a General <fixed-case>A</fixed-case>bstract <fixed-case>M</fixed-case>eaning <fixed-case>R</fixed-case>epresentation Corpus for <fixed-case>B</fixed-case>razilian <fixed-case>P</fixed-case>ortuguese - Marco AntonioSobrevilla Cabezudo + Marco AntonioSobrevilla Cabezudo ThiagoPardo 236–244 Abstract Meaning Representation (AMR) is a recent and prominent semantic representation with good acceptance and several applications in the Natural Language Processing area. For English, there is a large annotated corpus (with approximately 39K sentences) that supports the research with the representation. However, to the best of our knowledge, there is only one restricted corpus for Portuguese, which contains 1,527 sentences. In this context, this paper presents an effort to build a general purpose AMR-annotated corpus for Brazilian Portuguese by translating and adapting AMR English guidelines. Our results show that such approach is feasible, but there are some challenging phenomena to solve. More than this, efforts are necessary to increase the coverage of the corresponding lexical resource that supports the annotation. @@ -6922,7 +6922,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat W19-41 Yun-NungChen TaniaBedrax-Weiss - DilekHakkani-Tur + DilekHakkani-Tur AnujKumar MikeLewis Thang-MinhLuong @@ -6975,7 +6975,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat NouhaDziri EhsanKamalloo KoryMathewson - OsmarZaiane + OsmarZaiane 18–31 Sequence-to-Sequence (Seq2Seq) models have witnessed a notable success in generating natural conversational exchanges. Notwithstanding the syntactically well-formed responses generated by these neural network models, they are prone to be acontextual, short and generic. In this work, we introduce a Topical Hierarchical Recurrent Encoder Decoder (THRED), a novel, fully data-driven, multi-turn response generation system intended to produce contextual and topic-aware responses. Our model is built upon the basic Seq2Seq model by augmenting it with a hierarchical joint attention mechanism that incorporates topical concepts and previous interactions into the response generation. To train our model, we provide a clean and high-quality conversational dataset mined from Reddit comments. We evaluate THRED on two novel automated metrics, dubbed Semantic Similarity and Response Echo Index, as well as with human evaluation. Our experiments demonstrate that the proposed model is able to generate more diverse and contextually relevant responses compared to the strong baselines. W19-4103 @@ -7024,7 +7024,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat ChulakaGunasekara Jonathan K.Kummerfeld LazarosPolymenakos - WalterLasecki + WalterLasecki 60–67 Goal-oriented dialogue in complex domains is an extremely challenging problem and there are relatively few datasets. This task provided two new resources that presented different challenges: one was focused but small, while the other was large but diverse. We also considered several new variations on the next utterance selection problem: (1) increasing the number of candidates, (2) including paraphrases, and (3) not including a correct option in the candidate set. Twenty teams participated, developing a range of neural network models, including some that successfully incorporated external data to boost performance. Both datasets have been publicly released, enabling future work to build on these results, working towards robust goal-oriented dialogue systems. W19-4107 @@ -7050,7 +7050,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat Energy-Based Modelling for Dialogue State Tracking Anh DuongTrinh RobertRoss - JohnKelleher + JohnKelleher 77–86 The uncertainties of language and the complexity of dialogue contexts make accurate dialogue state tracking one of the more challenging aspects of dialogue processing. To improve state tracking quality, we argue that relationships between different aspects of dialogue state must be taken into account as they can often guide a more accurate interpretation process. To this end, we present an energy-based approach to dialogue state tracking as a structured classification task. The novelty of our approach lies in the use of an energy network on top of a deep learning architecture to explore more signal correlations between network variables including input features and output labels. We demonstrate that the energy-based approach improves the performance of a deep learning dialogue state tracker towards state-of-the-art results without the need for many of the other steps required by current state-of-the-art methods. W19-4109 @@ -7098,7 +7098,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat Learning to Explain: Answering Why-Questions via Rephrasing AllenNie ErinBennett - NoahGoodman + NoahGoodman 113–120 Providing plausible responses to why questions is a challenging but critical goal for language based human-machine interaction. Explanations are challenging in that they require many different forms of abstract knowledge and reasoning. Previous work has either relied on human-curated structured knowledge bases or detailed domain representation to generate satisfactory explanations. They are also often limited to ranking pre-existing explanation choices. In our work, we contribute to the under-explored area of generating natural language explanations for general phenomena. We automatically collect large datasets of explanation-phenomenon pairs which allow us to train sequence-to-sequence models to generate natural language explanations. We compare different training strategies and evaluate their performance using both automatic scores and human ratings. We demonstrate that our strategy is sufficient to generate highly plausible explanations for general open-domain phenomena compared to other models trained on different datasets. W19-4113 @@ -7212,7 +7212,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat AhmetÜstün Robvan der Goot GosseBouma - Gertjanvan Noord + Gertjanvan Noord 35–49 This paper describes our submission to SIGMORPHON 2019 Task 2: Morphological analysis and lemmatization in context. Our model is a multi-task sequence to sequence neural network, which jointly learns morphological tagging and lemmatization. On the encoding side, we exploit character-level as well as contextual information. We introduce a multi-attention decoder to selectively focus on different parts of character and word sequences. To further improve the model, we train on multiple datasets simultaneously and use external embeddings for initialization. Our final model reaches an average morphological tagging F1 score of 94.54 and a lemma accuracy of 93.91 on the test data, ranking respectively 3rd and 6th out of 13 teams in the SIGMORPHON 2019 shared task. W19-4206 @@ -7222,7 +7222,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat <fixed-case>IT</fixed-case>–<fixed-case>IST</fixed-case> at the <fixed-case>SIGMORPHON</fixed-case> 2019 Shared Task: Sparse Two-headed Models for Inflection BenPeters - André F. T.Martins + André F. T.Martins 50–56 This paper presents the Instituto de Telecomunicações–Instituto Superior Técnico submission to Task 1 of the SIGMORPHON 2019 Shared Task. Our models combine sparse sequence-to-sequence models with a two-headed attention mechanism that learns separate attention distributions for the lemma and inflectional tags. Among submissions to Task 1, our models rank second and third. Despite the low data setting of the task (only 100 in-language training examples), they learn plausible inflection patterns and often concentrate all probability mass into a small set of hypotheses, making beam search exact. W19-4207 @@ -7234,8 +7234,8 @@ One of the references was wrong therefore it is corrected to cite the appropriat AditiChaudhary ElizabethSalesky GayatriBhat - David R.Mortensen - JaimeCarbonell + David R.Mortensen + JaimeCarbonell YuliaTsvetkov 57–70 This paper presents the submission by the CMU-01 team to the SIGMORPHON 2019 task 2 of Morphological Analysis and Lemmatization in Context. This task requires us to produce the lemma and morpho-syntactic description of each token in a sequence, for 107 treebanks. We approach this task with a hierarchical neural conditional random field (CRF) model which predicts each coarse-grained feature (eg. POS, Case, etc.) independently. However, most treebanks are under-resourced, thus making it challenging to train deep neural models for them. Hence, we propose a multi-lingual transfer training regime where we transfer from multiple related languages that share similar typology. @@ -7270,7 +7270,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat AlexisPalmer Suleyman OlcayPolat TarakaRama - RodneyNielsen + RodneyNielsen 87–94 This paper presents the UNT HiLT+Ling system for the Sigmorphon 2019 shared Task 2: Morphological Analysis and Lemmatization in Context. Our core approach focuses on the morphological tagging task; part-of-speech tagging and lemmatization are treated as secondary tasks. Given the highly multilingual nature of the task, we propose an approach which makes minimal use of the supplied training data, in order to be extensible to languages without labeled training data for the morphological inflection task. Specifically, we use a parallel Bible corpus to align contextual embeddings at the verse level. The aligned verses are used to build cross-language translation matrices, which in turn are used to map between embedding spaces for the various languages. Finally, we use sets of inflected forms, primarily from a high-resource language, to induce vector representations for individual UniMorph tags. Morphological analysis is performed by matching vector representations to embeddings for individual tokens. While our system results are dramatically below the average system submitted for the shared task evaluation campaign, our method is (we suspect) unique in its minimal reliance on labeled training data. W19-4211 @@ -7281,7 +7281,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat <fixed-case>UDP</fixed-case>ipe at <fixed-case>SIGMORPHON</fixed-case> 2019: Contextualized Embeddings, Regularization with Morphological Categories, Corpora Merging MilanStraka JanaStraková - JanHajic + JanHajic 95–103 We present our contribution to the SIGMORPHON 2019 Shared Task: Crosslinguality and Context in Morphology, Task 2: contextual morphological analysis and lemmatization. We submitted a modification of the UDPipe 2.0, one of best-performing systems of the CoNLL 2018 Shared Task: Multilingual Parsing from Raw Text to Universal Dependencies and an overall winner of the 2018 Shared Task on Extrinsic Parser Evaluation. As our first improvement, we use the pretrained contextualized embeddings (BERT) as additional inputs to the network; secondly, we use individual morphological features as regularization; and finally, we merge the selected corpora of the same language. In the lemmatization task, our system exceeds all the submitted systems by a wide margin with lemmatization accuracy 95.78 (second best was 95.00, third 94.46). In the morphological analysis, our system placed tightly second: our morphological analysis accuracy was 93.19, the winning system’s 93.23. W19-4212 @@ -7292,7 +7292,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat <fixed-case>CUNI</fixed-case>–<fixed-case>M</fixed-case>alta system at <fixed-case>SIGMORPHON</fixed-case> 2019 Shared Task on Morphological Analysis and Lemmatization in context: Operation-based word formation RonaldCardenas ClaudiaBorg - DanielZeman + DanielZeman 104–112 This paper presents the submission by the Charles University-University of Malta team to the SIGMORPHON 2019 Shared Task on Morphological Analysis and Lemmatization in context. We present a lemmatization model based on previous work on neural transducers (Makarov and Clematide, 2018b; Aharoni and Goldberg, 2016). The key difference is that our model transforms the whole word form in every step, instead of consuming it character by character. We propose a merging strategy inspired by Byte-Pair-Encoding that reduces the space of valid operations by merging frequent adjacent operations. The resulting operations not only encode the actions to be performed but the relative position in the word token and how characters need to be transformed. Our morphological tagger is a vanilla biLSTM tagger that operates over operation representations, encoding operations and words in a hierarchical manner. Even though relative performance according to metrics is below the baseline, experiments show that our models capture important associations between interpretable operation labels and fine-grained morpho-syntax labels. W19-4213 @@ -7385,7 +7385,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat Unsupervised Morphological Segmentation for Low-Resource Polysynthetic Languages RamyEskander - JudithKlavans + JudithKlavans SmarandaMuresan 189–195 Polysynthetic languages pose a challenge for morphological analysis due to the root-morpheme complexity and to the word class “squish”. In addition, many of these polysynthetic languages are low-resource. We propose unsupervised approaches for morphological segmentation of low-resource polysynthetic languages based on Adaptor Grammars (AG) (Eskander et al., 2016). We experiment with four languages from the Uto-Aztecan family. Our AG-based approaches outperform other unsupervised approaches and show promise when compared to supervised methods, outperforming them on two of the four languages. @@ -7406,8 +7406,8 @@ One of the references was wrong therefore it is corrected to cite the appropriat Encoder-decoder models for latent phonological representations of words - Cassandra L.Jacobs - FrédéricMailhot + Cassandra L.Jacobs + FrédéricMailhot 206–217 We use sequence-to-sequence networks trained on sequential phonetic encoding tasks to construct compositional phonological representations of words. We show that the output of an encoder network can predict the phonetic durations of American English words better than a number of alternative forms. We also show that the model’s learned representations map onto existing measures of words’ phonological structure (phonological neighborhood density and phonotactic probability). W19-4224 @@ -7426,15 +7426,15 @@ One of the references was wrong therefore it is corrected to cite the appropriat The <fixed-case>SIGMORPHON</fixed-case> 2019 Shared Task: Morphological Analysis in Context and Cross-Lingual Transfer for Inflection - Arya D.McCarthy + Arya D.McCarthy EkaterinaVylomova ShijieWu ChaitanyaMalaviya LawrenceWolf-Sonkin GarrettNicolai ChristoKirov - MiikkaSilfverberg - Sabrina J.Mielke + MiikkaSilfverberg + Sabrina J.Mielke JeffreyHeinz RyanCotterell MansHulden @@ -7455,7 +7455,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat IsabelleAugenstein SpandanaGella SebastianRuder - KatharinaKann + KatharinaKann BurcuCan JohannesWelbl AlexisConneau @@ -7487,9 +7487,9 @@ One of the references was wrong therefore it is corrected to cite the appropriat To Tune or Not to Tune? Adapting Pretrained Representations to Diverse Tasks - Matthew E.Peters + Matthew E.Peters SebastianRuder - Noah A.Smith + Noah A.Smith 7–14 While most previous work has focused on different pretraining objectives and architectures for transfer learning, we ask how to best adapt the pretrained model to a given target task. We focus on the two most common forms of adaptation, feature extraction (where the pretrained weights are frozen), and directly fine-tuning the pretrained model. Our empirical results across diverse NLP tasks with two state-of-the-art models show that the relative performance of fine-tuning vs. feature extraction depends on the similarity of the pretraining and target tasks. We explore possible explanations for this finding and provide a set of adaptation guidelines for the NLP practitioner. W19-4302 @@ -7513,7 +7513,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat AlessandroRaganato RaúlVázquez MathiasCreutz - JörgTiedemann + JörgTiedemann 27–32 In this paper, we explore a multilingual translation model with a cross-lingually shared layer that can be used as fixed-size sentence representation in different downstream tasks. We systematically study the impact of the size of the shared layer and the effect of including additional languages in the model. In contrast to related previous work, we demonstrate that the performance in translation does correlate with trainable downstream tasks. In particular, we show that larger intermediate layers not only improve translation quality, especially for long sentences, but also push the accuracy of trainable classification tasks. On the other hand, shorter representations lead to increased compression that is beneficial in non-trainable similarity tasks. We hypothesize that the training procedure on the downstream task enables the model to identify the encoded information that is useful for the specific task whereas non-trainable benchmarks can be confused by other types of information also encoded in the representation of a sentence. W19-4304 @@ -7524,7 +7524,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat Multilingual <fixed-case>NMT</fixed-case> with a Language-Independent Attention Bridge RaúlVázquez AlessandroRaganato - JörgTiedemann + JörgTiedemann MathiasCreutz 33–39 In this paper, we propose an architecture for machine translation (MT) capable of obtaining multilingual sentence representations by incorporating an intermediate attention bridge that is shared across all languages. We train the model with language-specific encoders and decoders that are connected through an inner-attention layer on the encoder side. The attention bridge exploits the semantics from each language for translation and develops into a language-agnostic meaning representation that can efficiently be used for transfer learning. We present a new framework for the efficient development of multilingual neural machine translation (NMT) using this model and scheduled training. We have tested the approach in a systematic way with a multi-parallel data set. The model achieves substantial improvements over strong bilingual models and performs well for zero-shot translation, which demonstrates its ability of abstraction and transfer learning. @@ -7547,7 +7547,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat <fixed-case>M</fixed-case>o<fixed-case>RT</fixed-case>y: Unsupervised Learning of Task-specialized Word Embeddings by Autoencoding NilsRethmeier - BarbaraPlank + BarbaraPlank 49–54 Word embeddings have undoubtedly revolutionized NLP. However, pretrained embeddings do not always work for a specific task (or set of tasks), particularly in limited resource setups. We introduce a simple yet effective, self-supervised post-processing method that constructs task-specialized word representations by picking from a menu of reconstructing transformations to yield improved end-task performance (MORTY). The method is complementary to recent state-of-the-art approaches to inductive transfer via fine-tuning, and forgoes costly model architectures and annotation. We evaluate MORTY on a broad range of setups, including different word embedding methods, corpus sizes and end-task semantics. Finally, we provide a surprisingly simple recipe to obtain specialized embeddings that better fit end-tasks. W19-4307 @@ -7572,7 +7572,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat NickRossenbach JanRosendahl ShahramKhadivi - HermannNey + HermannNey 61–71 We propose a novel model architecture and training algorithm to learn bilingual sentence embeddings from a combination of parallel and monolingual data. Our method connects autoencoding and neural machine translation to force the source and target sentence embeddings to share the same space without the help of a pivot language or an additional transformation. We train a multilayer perceptron on top of the sentence embeddings to extract good bilingual sentence pairs from nonparallel or noisy parallel data. Our approach shows promising performance on sentence alignment recovery and the WMT 2018 parallel corpus filtering tasks with only a single model. W19-4309 @@ -7595,7 +7595,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat Composing Noun Phrase Vector Representations Aikaterini-LidaKalouli - Valeriade Paiva + Valeriade Paiva RichardCrouch 84–95 Vector representations of words have seen an increasing success over the past years in a variety of NLP tasks. While there seems to be a consensus about the usefulness of word embeddings and how to learn them, it is still unclear which representations can capture the meaning of phrases or even whole sentences. Recent work has shown that simple operations outperform more complex deep architectures. In this work, we propose two novel constraints for computing noun phrase vector representations. First, we propose that the semantic and not the syntactic contribution of each component of a noun phrase should be considered, so that the resulting composed vectors express more of the phrase meaning. Second, the composition process of the two phrase vectors should apply suitable dimensions’ selection in a way that specific semantic features captured by the phrase’s meaning become more salient. Our proposed methods are compared to 11 other approaches, including popular baselines and a neural net architecture, and are evaluated across 6 tasks and 2 datasets. Our results show that these constraints lead to more expressive phrase representations and can be applied to other state-of-the-art methods to improve their performance. @@ -7629,7 +7629,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat Constructive Type-Logical Supertagging With Self-Attention Networks KonstantinosKogkalidis - MichaelMoortgat + MichaelMoortgat TejaswiniDeoskar 113–123 We propose a novel application of self-attention networks towards grammar induction. We present an attention-based supertagger for a refined type-logical grammar, trained on constructing types inductively. In addition to achieving a high overall type accuracy, our model is able to learn the syntax of the grammar’s type system along with its denotational semantics. This lifts the closed world assumption commonly made by lexicalized grammar supertaggers, greatly enhancing its generalization potential. This is evidenced both by its adequate accuracy over sparse word types and its ability to correctly construct complex types never seen during training, which, to the best of our knowledge, was as of yet unaccomplished. @@ -7663,7 +7663,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat An Empirical Study on Pre-trained Embeddings and Language Models for Bot Detection AndresGarcia-Silva CristianBerrio - José ManuelGómez-Pérez + José ManuelGómez-Pérez 148–155 Fine-tuning pre-trained language models has significantly advanced the state of art in a wide range of NLP downstream tasks. Usually, such language models are learned from large and well-formed text corpora from e.g. encyclopedic resources, books or news. However, a significant amount of the text to be analyzed nowadays is Web data, often from social media. In this paper we consider the research question: How do standard pre-trained language models generalize and capture the peculiarities of rather short, informal and frequently automatically generated text found in social media? To answer this question, we focus on bot detection in Twitter as our evaluation task and test the performance of fine-tuning approaches based on language models against popular neural architectures such as LSTM and CNN combined with pre-trained and contextualized embeddings. Our results also show strong performance variations among the different language model approaches, which suggest further research. W19-4317 @@ -7673,7 +7673,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat Probing Multilingual Sentence Representations With <fixed-case>X</fixed-case>-Probe VinitRavishankar - LiljaØvrelid + LiljaØvrelid ErikVelldal 156–168 This paper extends the task of probing sentence representations for linguistic insight in a multilingual domain. In doing so, we make two contributions: first, we provide datasets for multilingual probing, derived from Wikipedia, in five languages, viz. English, French, German, Spanish and Russian. Second, we evaluate six sentence encoders for each language, each trained by mapping sentence representations to English sentence representations, using sentences in a parallel corpus. We discover that cross-lingually mapped representations are often better at retaining certain linguistic information than representations derived from English encoders trained on natural language inference (NLI) as a downstream task. @@ -7694,7 +7694,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat Learning Multilingual Meta-Embeddings for Code-Switching Named Entity Recognition - Genta IndraWinata + Genta IndraWinata ZhaojiangLin PascaleFung 181–186 @@ -7753,7 +7753,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat On Committee Representations of Adversarial Learning Models for Question-Answer Ranking SparshGupta - VitorCarvalho + VitorCarvalho 218–223 Adversarial training is a process in Machine Learning that explicitly trains models on adversarial inputs (inputs designed to deceive or trick the learning process) in order to make it more robust or accurate. In this paper we investigate how representing adversarial training models as committees can be used to effectively improve the performance of Question-Answer (QA) Ranking. We start by empirically probing the effects of adversarial training over multiple QA ranking algorithms, including the state-of-the-art Multihop Attention Network model. We evaluate these algorithms on several benchmark datasets and observe that, while adversarial training is beneficial to most baseline algorithms, there are cases where it may lead to overfitting and performance degradation. We investigate the causes of such degradation, and then propose a new representation procedure for this adversarial learning problem, based on committee learning, that not only is capable of consistently improving all baseline algorithms, but also outperforms the previous state-of-the-art algorithm by as much as 6% in NDCG. W19-4325 @@ -7831,7 +7831,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat Leveraging Pre-Trained Embeddings for <fixed-case>W</fixed-case>elsh Taggers IgnatiusEzeani - ScottPiao + ScottPiao StevenNeale PaulRayson DawnKnight @@ -7899,10 +7899,10 @@ One of the references was wrong therefore it is corrected to cite the appropriat Computationally Modeling the Impact of Task-Appropriate Language Complexity and Accuracy on Human Grading of <fixed-case>G</fixed-case>erman Essays - ZarahWeiss + ZarahWeiss AnjaRiemenschneider PaulineSchröter - DetmarMeurers + DetmarMeurers 30–45 Computational linguistic research on the language complexity of student writing typically involves human ratings as a gold standard. However, educational science shows that teachers find it difficult to identify and cleanly separate accuracy, different aspects of complexity, contents, and structure. In this paper, we therefore explore the use of computational linguistic methods to investigate how task-appropriate complexity and accuracy relate to the grading of overall performance, content performance, and language performance as assigned by teachers. Based on texts written by students for the official school-leaving state examination (Abitur), we show that teachers successfully assign higher language performance grades to essays with higher task-appropriate language complexity and properly separate this from content scores. Yet, accuracy impacts teacher assessment for all grading rubrics, also the content score, overemphasizing the role of accuracy. Our analysis is based on broad computational linguistic modeling of German language complexity and an innovative theory- and data-driven feature aggregation method inferring task-appropriate language complexity. W19-4404 @@ -7924,8 +7924,8 @@ One of the references was wrong therefore it is corrected to cite the appropriat The <fixed-case>BEA</fixed-case>-2019 Shared Task on Grammatical Error Correction ChristopherBryant MarianoFelice - Øistein E.Andersen - TedBriscoe + Øistein E.Andersen + TedBriscoe 52–75 This paper reports on the BEA-2019 Shared Task on Grammatical Error Correction (GEC). As with the CoNLL-2014 shared task, participants are required to correct all types of errors in test data. One of the main contributions of the BEA-2019 shared task is the introduction of a new dataset, the Write&Improve+LOCNESS corpus, which represents a wider range of native and learner English levels and abilities. Another contribution is the introduction of tracks, which control the amount of annotated data available to participants. Systems are evaluated in terms of ERRANT F_0.5, which allows us to report a much wider range of performance statistics. The competition was hosted on Codalab and remains open for further submissions on the blind test set. W19-4406 @@ -7957,7 +7957,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat Regression or classification? Automated Essay Scoring for <fixed-case>N</fixed-case>orwegian StigJohan Berggren TarakaRama - LiljaØvrelid + LiljaØvrelid 92–102 In this paper we present first results for the task of Automated Essay Scoring for Norwegian learner language. We analyze a number of properties of this task experimentally and assess (i) the formulation of the task as either regression or classification, (ii) the use of various non-neural and neural machine learning architectures with various types of input representations, and (iii) applying multi-task learning for joint prediction of essay scoring and native language identification. We find that a GRU-based attention model trained in a single-task setting performs best at the AES task. W19-4409 @@ -8086,7 +8086,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat Noisy Channel for Low Resource Grammatical Error Correction SimonFlachs OphélieLacroix - AndersSøgaard + AndersSøgaard 191–196 This paper describes our contribution to the low-resource track of the BEA 2019 shared task on Grammatical Error Correction (GEC). Our approach to GEC builds on the theory of the noisy channel by combining a channel model and language model. We generate confusion sets from the Wikipedia edit history and use the frequencies of edits to estimate the channel model. Additionally, we use two pre-trained language models: 1) Google’s BERT model, which we fine-tune for specific error types and 2) OpenAI’s GPT-2 model, utilizing that it can operate with previous sentences as context. Furthermore, we search for the optimal combinations of corrections using beam search. W19-4420 @@ -8188,7 +8188,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat Anglicized Words and Misspelled Cognates in Native Language Identification IliaMarkov - ViviNastase + ViviNastase CarloStrapparava 275–284 In this paper, we present experiments that estimate the impact of specific lexical choices of people writing in a second language (L2). In particular, we look at misspelled words that indicate lexical uncertainty on the part of the author, and separate them into three categories: misspelled cognates, “L2-ed” (in our case, anglicized) words, and all other spelling errors. We test the assumption that such errors contain clues about the native language of an essay’s author through the task of native language identification. The results of the experiments show that the information brought by each of these categories is complementary. We also note that while the distribution of such features changes with the proficiency level of the writer, their contribution towards native language identification remains significant at all levels. @@ -8221,7 +8221,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat Toward Automated Content Feedback Generation for Non-native Spontaneous Speech - Su-YounYoon + Su-YounYoon Ching-NiHsieh KlausZechner MatthewMulholland @@ -8315,7 +8315,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat Simple Construction of Mixed-Language Texts for Vocabulary Learning AdithyaRenduchintala PhilippKoehn - JasonEisner + JasonEisner 369–379 We present a machine foreign-language teacher that takes documents written in a student’s native language and detects situations where it can replace words with their foreign glosses such that new foreign vocabulary can be learned simply through reading the resulting mixed-language text. We show that it is possible to design such a machine teacher without any supervised data from (human) students. We accomplish this by modifying a cloze language model to incrementally learn new vocabulary items, and use this language model as a proxy for the word guessing and learning ability of real students. Our machine foreign-language teacher decides which subset of words to replace by consulting this language model. We evaluate three variants of our student proxy language models through a study on Amazon Mechanical Turk (MTurk). We find that MTurk “students” were able to guess the meanings of foreign words introduced by the machine teacher with high accuracy for both function words as well as content words in two out of the three models. In addition, we show that students are able to retain their knowledge about the foreign words after they finish reading the document. W19-4439 @@ -8324,8 +8324,8 @@ One of the references was wrong therefore it is corrected to cite the appropriat Analyzing Linguistic Complexity and Accuracy in Academic Language Development of <fixed-case>G</fixed-case>erman across Elementary and Secondary School - ZarahWeiss - DetmarMeurers + ZarahWeiss + DetmarMeurers 380–393 We track the development of writing complexity and accuracy in German students’ early academic language development from first to eighth grade. Combining an empirically broad approach to linguistic complexity with the high-quality error annotation included in the Karlsruhe Children’s Text corpus (Lavalley et al. 2015) used, we construct models of German academic language development that successfully identify the student’s grade level. We show that classifiers for the early years rely more on accuracy development, whereas development in secondary school is better characterized by increasingly complex language in all domains: linguistic system, language use, and human sentence processing characteristics. We demonstrate the generalizability and robustness of models using such a broad complexity feature set across writing topics. W19-4440 @@ -8334,8 +8334,8 @@ One of the references was wrong therefore it is corrected to cite the appropriat Content Modeling for Automated Oral Proficiency Scoring System - Su-YounYoon - Chong MinLee + Su-YounYoon + Chong MinLee 394–401 We developed an automated oral proficiency scoring system for non-native English speakers’ spontaneous speech. Automated systems that score holistic proficiency are expected to assess a wide range of performance categories, and the content is one of the core performance categories. In order to assess the quality of the content, we trained a Siamese convolutional neural network (CNN) to model the semantic relationship between key points generated by experts and a test response. The correlation between human scores and Siamese CNN scores was comparable to human-human agreement (r=0.63), and it was higher than the baseline content features. The inclusion of Siamese CNN-based feature to the existing state-of-the-art automated scoring model achieved a small but statistically significant improvement. However, the new model suffered from score inflation for long atypical responses with serious content issues. We investigated the reasons of this score inflation by analyzing the associations with linguistic features and identifying areas strongly associated with the score errors. W19-4441 @@ -8372,7 +8372,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat Metaphors in Text Simplification: To change or not to change, that is the question YuliaClausen - ViviNastase + ViviNastase 423–434 We present an analysis of metaphors in news text simplification. Using features that capture general and metaphor specific characteristics, we test whether we can automatically identify which metaphors will be changed or preserved, and whether there are features that have different predictive power for metaphors or literal words. The experiments show that the Age of Acquisition is the most distinctive feature for both metaphors and literal words. Features that capture Imageability and Concreteness are useful when used alone, but within the full set of features they lose their impact. Frequency of use seems to be the best feature to differentiate metaphors that should be changed and those to be preserved. W19-4444 @@ -8400,7 +8400,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat DavidGerritsen BrittanyMcLaughlin EzekielDixon-Román - Alan WBlack + Alan WBlack 444–460 There is a long record of research on equity in schools. As machine learning researchers begin to study fairness and bias in earnest, language technologies in education have an unusually strong theoretical and applied foundation to build on. Here, we introduce concepts from culturally relevant pedagogy and other frameworks for teaching and learning, identifying future work on equity in NLP. We present case studies in a range of topics like intelligent tutoring systems, computer-assisted language learning, automated essay scoring, and sentiment analysis in classrooms, and provide an actionable agenda for research. W19-4446 @@ -8436,7 +8436,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat The Unbearable Weight of Generating Artificial Errors for Grammatical Error Correction Phu MonHtut - JoelTetreault + JoelTetreault 478–483 In this paper, we investigate the impact of using 4 recent neural models for generating artificial errors to help train the neural grammatical error correction models. We conduct a battery of experiments on the effect of data size, models, and comparison with a rule-based approach. W19-4449 @@ -8448,7 +8448,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat FarahNadeem HuyNguyen YangLiu - MariOstendorf + MariOstendorf 484–493 Automated essay scoring systems typically rely on hand-crafted features to predict essay quality, but such systems are limited by the cost of feature engineering. Neural networks offer an alternative to feature engineering, but they typically require more annotated data. This paper explores network structures, contextualized embeddings and pre-training strategies aimed at capturing discourse characteristics of essays. Experiments on three essay scoring tasks show benefits from all three strategies in different combinations, with simpler architectures being more effective when less training data is available. W19-4450 @@ -8476,7 +8476,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat ElenaMusi PatriciaDavies SmarandaMuresan - Rebecca J.Passonneau + Rebecca J.Passonneau 507–518 We present a unique dataset of student source-based argument essays to facilitate research on the relations between content, argumentation skills, and assessment. Two classroom writing assignments were given to college students in a STEM major, accompanied by a carefully designed rubric. The paper presents a reliability study of the rubric, showing it to be highly reliable, and initial annotation on content and argumentation annotation of the essays. W19-4452 @@ -8513,8 +8513,8 @@ One of the references was wrong therefore it is corrected to cite the appropriat A Cascade Model for Proposition Extraction in Argumentation YohanJo JackyVisser - ChrisReed - EduardHovy + ChrisReed + EduardHovy 11–24 We present a model to tackle a fundamental but understudied problem in computational argumentation: proposition extraction. Propositions are the basic units of an argument and the primary building blocks of most argument mining systems. However, they are usually substituted by argumentative discourse units obtained via surface-level text segmentation, which may yield text segments that lack semantic information necessary for subsequent argument mining processes. In contrast, our cascade model aims to extract complete propositions by handling anaphora resolution, text segmentation, reported speech, questions, imperatives, missing subjects, and revision. We formulate each task as a computational problem and test various models using a corpus of the 2016 U.S. presidential debates. We show promising performance for some tasks and discuss main challenges in proposition extraction. W19-4502 @@ -8648,7 +8648,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat Annotation of Rhetorical Moves in Biochemistry Articles MohammedAlliheedi - Robert E.Mercer + Robert E.Mercer RobinCohen 113–123 This paper focuses on the real world application of scientific writing and on determining rhetorical moves, an important step in establishing the argument structure of biomedical articles. Using the observation that the structure of scholarly writing in laboratory-based experimental sciences closely follows laboratory procedures, we examine most closely the Methods section of the texts and adopt an approach of identifying rhetorical moves that are procedure-oriented. We also propose a verb-centric frame semantics with an effective set of semantic roles in order to support the analysis. These components are designed to support a computational model that extends a promising proposal of appropriate rhetorical moves for this domain, but one which is merely descriptive. Our work also contributes to the understanding of argument-related annotation schemes. In particular, we conduct a detailed study with human annotators to confirm that our selection of semantic roles is effective in determining the underlying rhetorical structure of existing biomedical articles in an extensive dataset. The annotated dataset that we produce provides the important knowledge needed for our ultimate goal of analyzing biochemistry articles. @@ -8673,7 +8673,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat AlexanderBondarenko MircoFranzek MatthiasHagen - ChrisBiemann + ChrisBiemann 136–145 We tackle the tasks of automatically identifying comparative sentences and categorizing the intended preference (e.g., “Python has better NLP libraries than MATLAB” → Python, better, MATLAB). To this end, we manually annotate 7,199 sentences for 217 distinct target item pairs from several domains (27% of the sentences contain an oriented comparison in the sense of “better” or “worse”). A gradient boosting model based on pre-trained sentence embeddings reaches an F1 score of 85% in our experimental evaluation. The model can be used to extract comparative sentences for pro/con argumentation in comparative / argument search engines or debating technologies. W19-4516 @@ -8684,7 +8684,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat Ranking Passages for Argument Convincingness PeterPotash AdamFerguson - Timothy J.Hazen + Timothy J.Hazen 146–155 In data ranking applications, pairwise annotation is often more consistent than cardinal annotation for learning ranking models. We examine this in a case study on ranking text passages for argument convincingness. Our task is to choose text passages that provide the highest-quality, most-convincing arguments for opposing sides of a topic. Using data from a deployed system within the Bing search engine, we construct a pairwise-labeled dataset for argument convincingness that is substantially more comprehensive in topical coverage compared to existing public resources. We detail the process of extracting topical passages for queries submitted to a search engine, creating annotated sets of passages aligned to different stances on a topic, and assessing argument convincingness of passages using pairwise annotation. Using a state-of-the-art convincingness model, we evaluate several methods for using pairwise-annotated data examples to train models for ranking passages. Our results show pairwise training outperforms training that regresses to a target score for each passage. Our results also show a simple ‘win-rate’ score is a better regression target than the previously proposed page-rank target. Lastly, addressing the need to filter noisy crowd-sourced annotations when constructing a dataset, we show that filtering for transitivity within pairwise annotations is more effective than filtering based on annotation confidence measures for individual examples. W19-4517 @@ -8705,7 +8705,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat Persuasion of the Undecided: Language vs. the Listener LianeLongpre EsinDurmus - ClaireCardie + ClaireCardie 167–176 This paper examines the factors that govern persuasion for a priori UNDECIDED versus DECIDED audience members in the context of on-line debates. We separately study two types of influences: linguistic factors — features of the language of the debate itself; and audience factors — features of an audience member encoding demographic information, prior beliefs, and debate platform behavior. In a study of users of a popular debate platform, we find first that different combinations of linguistic features are critical for predicting persuasion outcomes for UNDECIDED versus DECIDED members of the audience. We additionally find that audience factors have more influence on predicting the side (PRO/CON) that persuaded UNDECIDED users than for DECIDED users that flip their stance to the opposing side. Our results emphasize the importance of considering the undecided and decided audiences separately when studying linguistic factors of persuasion. W19-4519 @@ -8728,8 +8728,8 @@ One of the references was wrong therefore it is corrected to cite the appropriat Proceedings of the Fourth Arabic Natural Language Processing Workshop W19-46 - WassimEl-Hajj - Lamia HadrichBelguith + WassimEl-Hajj + Lamia HadrichBelguith FethiBougares WalidMagdy ImedZitouni @@ -8763,7 +8763,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat MahitabEmam KhaledEssam RobertNabil - HanyHassan + HanyHassan 11–17 Parallel corpora available for building machine translation (MT) models for dialectal Arabic (DA) are rather limited. The scarcity of resources has prompted the use of Modern Standard Arabic (MSA) abundant resources to complement the limited dialectal resource. However, dialectal clitics often differ between MSA and DA. This paper compares morphology-aware DA word segmentation to other word segmentation approaches like Byte Pair Encoding (BPE) and Sub-word Regularization (SR). A set of experiments conducted on Egyptian Arabic (EA), Levantine Arabic (LA), and Gulf Arabic (GA) show that a sufficiently accurate morphology-aware segmentation used in conjunction with BPE outperforms the other word segmentation approaches. W19-4602 @@ -8789,7 +8789,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat HalaMulki HatemHaddad MouradGridach - IsmailBabaoğlu + IsmailBabaoğlu 30–39 Arabic sentiment analysis models have employed compositional embedding features to represent the Arabic dialectal content. These embeddings are usually composed via ordered, syntax-aware composition functions and learned within deep neural frameworks. With the free word order and the varying syntax nature across the different Arabic dialects, a sentiment analysis system developed for one dialect might not be efficient for the others. Here we present syntax-ignorant n-gram embeddings to be used in sentiment analysis of several Arabic dialects. The proposed embeddings were composed and learned using an unordered composition function and a shallow neural model. Five datasets of different dialects were used to evaluate the produced embeddings in the sentiment analysis task. The obtained results revealed that, our syntax-ignorant embeddings could outperform word2vec model and doc2vec both variant models in addition to hand-crafted system baselines, while a competent performance was noticed towards baseline systems that adopted more complicated neural architectures. W19-4604 @@ -8813,7 +8813,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat Homograph Disambiguation through Selective Diacritic Restoration SawsanAlqahtani HananAldarmaki - MonaDiab + MonaDiab 49–59 Lexical ambiguity, a challenging phenomenon in all natural languages, is particularly prevalent for languages with diacritics that tend to be omitted in writing, such as Arabic. Omitting diacritics leads to an increase in the number of homographs: different words with the same spelling. Diacritic restoration could theoretically help disambiguate these words, but in practice, the increase in overall sparsity leads to performance degradation in NLP applications. In this paper, we propose approaches for automatically marking a subset of words for diacritic restoration, which leads to selective homograph disambiguation. Compared to full or no diacritic restoration, these approaches yield selectively-diacritized datasets that balance sparsity and lexical disambiguation. We evaluate the various selection strategies extrinsically on several downstream applications: neural machine translation, part-of-speech tagging, and semantic textual similarity. Our experiments on Arabic show promising results, where our devised strategies on selective diacritization lead to a more balanced and consistent performance in downstream applications. W19-4606 @@ -8838,7 +8838,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat NourEl Droubi HazemHajj WassimEl-Hajj - KhaledShaban + KhaledShaban 68–77 Arabic is a complex language with limited resources which makes it challenging to produce accurate text classification tasks such as sentiment analysis. The utilization of transfer learning (TL) has recently shown promising results for advancing accuracy of text classification in English. TL models are pre-trained on large corpora, and then fine-tuned on task-specific datasets. In particular, universal language models (ULMs), such as recently developed BERT, have achieved state-of-the-art results in various NLP tasks in English. In this paper, we hypothesize that similar success can be achieved for Arabic. The work aims at supporting the hypothesis by developing the first Universal Language Model in Arabic (hULMonA - حلمنا meaning our dream), demonstrating its use for Arabic classifications tasks, and demonstrating how a pre-trained multi-lingual BERT can also be used for Arabic. We then conduct a benchmark study to evaluate both ULM successes with Arabic sentiment analysis. Experiment results show that the developed hULMonA and multi-lingual ULM are able to generalize well to multiple Arabic data sets and achieve new state of the art results in Arabic Sentiment Analysis for some of the tested sets. W19-4608 @@ -8862,7 +8862,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat XingyuFu AseelAddawood NahilSobh - ClareVoss + ClareVoss JiaweiHan 88–96 In this paper, we tackle the problem of “root extraction” from words in the Semitic language family. A challenge in applying natural language processing techniques to these languages is the data sparsity problem that arises from their rich internal morphology, where the substructure is inherently non-concatenative and morphemes are interdigitated in word formation. While previous automated methods have relied on human-curated rules or multiclass classification, they have not fully leveraged the various combinations of regular, sequential concatenative morphology within the words and the internal interleaving within templatic stems of roots and patterns. To address this, we propose a constrained sequence-to-sequence root extraction method. Experimental results show our constrained model outperforms a variety of methods at root extraction. Furthermore, by enriching word embeddings with resulting decompositions, we show improved results on word analogy, word similarity, and language modeling tasks. @@ -8924,7 +8924,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat SaneYagi OuafaaKacha NizarHabash - OwenRambow + OwenRambow 137–147 We present a collection of morphologically annotated corpora for seven Arabic dialects: Taizi Yemeni, Sanaani Yemeni, Najdi, Jordanian, Syrian, Iraqi and Moroccan Arabic. The corpora collectively cover over 200,000 words, and are all manually annotated in a common set of standards for orthography, diacritized lemmas, tokenization, morphological units and English glosses. These corpora will be publicly available to serve as benchmarks for training and evaluating systems for Arabic dialect morphological analysis and disambiguation. W19-4615 @@ -8946,7 +8946,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat Translating Between Morphologically Rich Languages: An <fixed-case>A</fixed-case>rabic-to-<fixed-case>T</fixed-case>urkish Machine Translation System - İlknurDurgar El-Kahlout + İlknurDurgar El-Kahlout EmreBektaş Naime ŞeymaErdem HamzaKaya @@ -9114,7 +9114,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat VictorGuichard PraveenJoshi HaithemAfli - AbdessalamBouchekif + AbdessalamBouchekif 249–253 In this paper, we present two approaches for Arabic Fine-Grained Dialect Identification. The first approach is based on Recurrent Neural Networks (BLSTM, BGRU) using hierarchical classification. The main idea is to separate the classification process for a sentence from a given text in two stages. We start with a higher level of classification (8 classes) and then the finer-grained classification (26 classes). The second approach is given by a voting system based on Naive Bayes and Random Forest. Our system achieves an F1 score of 63.02 % on the subtask evaluation dataset. W19-4631 @@ -9136,7 +9136,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat KarimaMeftouh KarimaAbidi SalimaHarrat - KamelSmaili + KamelSmaili 259–263 This paper describes the approach adopted by the SMarT research group to build a dialect identification system in the framework of the Madar shared task on Arabic fine-grained dialect identification. We experimented several approaches, but we finally decided to use a Multinomial Naive Bayes classifier based on word and character ngrams in addition to the language model probabilities. We achieved a score of 67.73% in terms of Macro accuracy and a macro-averaged F1-score of 67.31% W19-4633 @@ -9236,7 +9236,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat From Insanely Jealous to Insanely Delicious: Computational Models for the Semantic Bleaching of <fixed-case>E</fixed-case>nglish Intensifiers YiweiLuo - DanJurafsky + DanJurafsky BethLevin 1–13 We introduce novel computational models for modeling semantic bleaching, a widespread category of change in which words become more abstract or lose elements of meaning, like the development of “arrive” from its earlier meaning ‘become at shore.’ We validate our methods on a widespread case of bleaching in English: de-adjectival adverbs that originate as manner adverbs (as in “awfully behaved”) and later become intensifying adverbs (as in “awfully nice”). Our methods formally quantify three reflexes of bleaching: decreasing similarity to the source meaning (e.g., “awful”), increasing similarity to a fully bleached prototype (e.g., “very”), and increasing productivity (e.g., the breadth of adjectives that an adverb modifies). We also test a new causal model and find evidence that bleaching is initially triggered in contexts such as “conspicuously evident” and “insanely jealous”, where an adverb premodifies a semantically similar adjective. These contexts provide a form of “bridging context” (Evans and Wilkins, 2000) that allow a manner adverb to be reinterpreted as an intensifying adverb similar to “very”. @@ -9277,7 +9277,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat Contextualized Diachronic Word Representations GaneshJawahar - DjaméSeddah + DjaméSeddah 35–47 Diachronic word embeddings play a key role in capturing interesting patterns about how language evolves over time. Most of the existing work focuses on studying corpora spanning across several decades, which is understandably still not a possibility when working on social media-based user-generated content. In this work, we address the problem of studying semantic changes in a large Twitter corpus collected over five years, a much shorter period than what is usually the norm in diachronic studies. We devise a novel attentional model, based on Bernoulli word embeddings, that are conditioned on contextual extra-linguistic (social) features such as network, spatial and socio-economic variables, which are associated with Twitter users, as well as topic-based features. We posit that these social features provide an inductive bias that helps our model to overcome the narrow time-span regime problem. Our extensive experiments reveal that our proposed model is able to capture subtle semantic shifts without being biased towards frequency cues and also works well when certain contextual features are absent. Our model fits the data better than current state-of-the-art dynamic word embedding models and therefore is a promising tool to study diachronic semantic changes over small time periods. W19-4705 @@ -9320,7 +9320,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat A Method to Automatically Identify Diachronic Variation in Collocations. MarcosGarcia - MarcosGarcía Salido + MarcosGarcía Salido 71–80 This paper introduces a novel method to track collocational variations in diachronic corpora that can identify several changes undergone by these phraseological combinations and to propose alternative solutions found in later periods. The strategy consists of extracting syntactically-related candidates of collocations and ranking them using statistical association measures. Then, starting from the first period of the corpus, the system tracks each combination over time, verifying different types of historical variation such as the loss of one or both lemmas, the disappearance of the collocation, or its diachronic frequency trend. Using a distributional semantics strategy, it also proposes other linguistic structures which convey similar meanings to those extinct collocations. A case study on historical corpora of Portuguese and Spanish shows that the system speeds up and facilitates the finding of some diachronic changes and phraseological shifts that are harder to identify without using automated methods. W19-4709 @@ -9435,8 +9435,8 @@ One of the references was wrong therefore it is corrected to cite the appropriat Studying Laws of Semantic Divergence across Languages using Cognate Sets AnaUban - Alina MariaCiobanu - Liviu P.Dinu + Alina MariaCiobanu + Liviu P.Dinu 161–166 Semantic divergence in related languages is a key concern of historical linguistics. Intra-lingual semantic shift has been previously studied in computational linguistics, but this can only provide a limited picture of the evolution of word meanings, which often develop in a multilingual environment. In this paper we investigate semantic change across languages by measuring the semantic distance of cognate words in multiple languages. By comparing current meanings of cognates in different languages, we hope to uncover information about their previous meanings, and about how they diverged in their respective languages from their common original etymon. We further study the properties of their semantic divergence, by analyzing how the features of words such as frequency and polysemy are related to the divergence in their meaning, and thus make the first steps towards formulating laws of cross-lingual semantic change. W19-4720 @@ -9483,7 +9483,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat One-to-<fixed-case>X</fixed-case> Analogical Reasoning on Word Embeddings: a Case for Diachronic Armed Conflict Prediction from News Texts AndreyKutuzov ErikVelldal - LiljaØvrelid + LiljaØvrelid 196–201 We extend the well-known word analogy task to a one-to-X formulation, including one-to-none cases, when no correct answer exists. The task is cast as a relation discovery problem and applied to historical armed conflicts datasets, attempting to predict new relations of type ‘location:armed-group’ based on data about past events. As the source of semantic information, we use diachronic word embedding models trained on English news texts. A simple technique to improve diachronic performance in such task is demonstrated, using a threshold based on a function of cosine distance to decrease the number of false positives; this approach is shown to be beneficial on two different corpora. Finally, we publish a ready-to-use test set for one-to-X analogy evaluation on historical armed conflicts data. W19-4724 @@ -9507,7 +9507,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat Semantic Change in the Language of <fixed-case>UK</fixed-case> Parliamentary Debates GavinAbercrombie - RizaBatista-Navarro + RizaBatista-Navarro 210–215 We investigate changes in the meanings of words used in the UK Parliament across two different epochs. We use word embeddings to explore changes in the distribution of words of interest and uncover words that appear to have undergone semantic transformation in the intervening period, and explore different ways of obtaining target words for this purpose. We find that semantic changes are generally in line with those found in other corpora, and little evidence that parliamentary language is more static than general English. It also seems that words with senses that have been recorded in the dictionary as having fallen into disuse do not undergo semantic changes in this domain. W19-4726 @@ -9538,7 +9538,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat Measuring the Compositionality of Noun-Noun Compounds over Time PrajitDhar JanisPagel - Lonnekevan der Plas + Lonnekevan der Plas 234–239 We present work in progress on the temporal progression of compositionality in noun-noun compounds. Previous work has proposed computational methods for determining the compositionality of compounds. These methods try to automatically determine how transparent the meaning of the compound as a whole is with respect to the meaning of its parts. We hypothesize that such a property might change over time. We use the time-stamped Google Books corpus for our diachronic investigations, and first examine whether the vector-based semantic spaces extracted from this corpus are able to predict compositionality ratings, despite their inherent limitations. We find that using temporal information helps predicting the ratings, although correlation with the ratings is lower than reported for other corpora. Finally, we show changes in compositionality over time for a selection of compounds. W19-4729 @@ -9548,7 +9548,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat Towards Automatic Variant Analysis of Ancient Devotional Texts AmirHazem - BéatriceDaille + BéatriceDaille DominiqueStutzmann JacobCurrie ChristineJacquin @@ -9595,7 +9595,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat ChristinSchätzle Frederik L.Dennig MichaelBlumenschein - Daniel A.Keim + Daniel A.Keim MiriamButt 272–278 Historical change typically is the result of complex interactions between several linguistic factors. Identifying the relevant factors and understanding how they interact across the temporal dimension is the core remit of historical linguistics. With respect to corpus work, this entails a separate annotation, extraction and painstaking pair-wise comparison of the relevant bits of information. This paper presents a significant extension of HistoBankVis, a multilayer visualization system which allows a fast and interactive exploration of complex linguistic data. Linguistic factors can be understood as data dimensions which show complex interrelationships. We model these relationships with the Parallel Sets technique. We demonstrate the powerful potential of this technique by applying the system to understanding the interaction of case, grammatical relations and word order in the history of Icelandic. @@ -9609,7 +9609,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat Proceedings of the 2019 ACL Workshop BlackboxNLP: Analyzing and Interpreting Neural Networks for NLP W19-48 TalLinzen - GrzegorzChrupała + GrzegorzChrupała YonatanBelinkov DieuwkeHupkes Association for Computational Linguistics @@ -9637,7 +9637,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat Sentiment Analysis Is Not Solved! Assessing and Probing Sentiment Classification JeremyBarnes - LiljaØvrelid + LiljaØvrelid ErikVelldal 12–23 Neural methods for sentiment analysis have led to quantitative improvements over previous approaches, but these advances are not always accompanied with a thorough analysis of the qualitative differences. Therefore, it is not clear what outstanding conceptual challenges for sentiment analysis remain. In this work, we attempt to discover what challenges still prove a problem for sentiment classifiers for English and to provide a challenging dataset. We collect the subset of sentences that an (oracle) ensemble of state-of-the-art sentiment classifiers misclassify and then annotate them for 18 linguistic and paralinguistic phenomena, such as negation, sarcasm, modality, etc. Finally, we provide a case study that demonstrates the usefulness of the dataset to probe the performance of a given sentiment classifier with respect to linguistic phenomena. @@ -9649,7 +9649,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat Second-order Co-occurrence Sensitivity of Skip-Gram with Negative Sampling DominikSchlechtweg CennetOguz - SabineSchulte im Walde + SabineSchulte im Walde 24–30 We simulate first- and second-order context overlap and show that Skip-Gram with Negative Sampling is similar to Singular Value Decomposition in capturing second-order co-occurrence information, while Pointwise Mutual Information is agnostic to it. We support the results with an empirical study finding that the models react differently when provided with additional second-order information. Our findings reveal a basic property of Skip-Gram with Negative Sampling and point towards an explanation of its success on a variety of tasks. W19-4803 @@ -9703,7 +9703,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat Do Human Rationales Improve Machine Explanations? JuliaStrout YeZhang - RaymondMooney + RaymondMooney 56–62 Work on “learning with rationales” shows that humans providing explanations to a machine learning system can improve the system’s predictive accuracy. However, this work has not been connected to work in “explainable AI” which concerns machines explaining their reasoning to humans. In this work, we show that learning with rationales can also improve the quality of the machine’s explanations as evaluated by human judges. Specifically, we present experiments showing that, for CNN-based text classification, explanations generated using “supervised attention” are judged superior to explanations generated using normal unsupervised attention. W19-4807 @@ -9756,7 +9756,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat Faithful Multimodal Explanation for Visual Question Answering JialinWu - RaymondMooney + RaymondMooney 103–112 AI systems’ ability to explain their reasoning is critical to their utility and trustworthiness. Deep neural networks have enabled significant progress on many challenging problems such as visual question answering (VQA). However, most of them are opaque black boxes with limited explanatory capability. This paper presents a novel approach to developing a high-performing VQA system that can elucidate its answers with integrated textual and visual explanations that faithfully reflect important aspects of its underlying reasoning while capturing the style of comprehensible human explanations. Extensive experimental evaluation demonstrates the advantages of this approach compared to competing methods using both automated metrics and human evaluation. W19-4812 @@ -9805,7 +9805,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat Modeling Paths for Explainable Knowledge Base Completion JosuaStadelmaier - SebastianPadó + SebastianPadó 147–157 A common approach in knowledge base completion (KBC) is to learn representations for entities and relations in order to infer missing facts by generalizing existing ones. A shortcoming of standard models is that they do not explain their predictions to make them verifiable easily to human inspection. In this paper, we propose the Context Path Model (CPM) which generates explanations for new facts in KBC by providing sets of context paths as supporting evidence for these triples. For example, a new triple (Theresa May, nationality, Britain) may be explained by the path (Theresa May, born in, Eastbourne, contained in, Britain). The CPM is formulated as a wrapper that can be applied on top of various existing KBC models. We evaluate it for the well-established TransE model. We observe that its performance remains very close despite the added complexity, and that most of the paths proposed as explanations provide meaningful evidence to assess the correctness. W19-4816 @@ -9835,7 +9835,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat Hierarchical Representation in Neural Language Models: Suppression and Recovery of Expectations EthanWilcox - RogerLevy + RogerLevy RichardFutrell 181–190 Work using artificial languages as training input has shown that LSTMs are capable of inducing the stack-like data structures required to represent context-free and certain mildly context-sensitive languages — formal language classes which correspond in theory to the hierarchical structures of natural language. Here we present a suite of experiments probing whether neural language models trained on linguistic data induce these stack-like data structures and deploy them while incrementally predicting words. We study two natural language phenomena: center embedding sentences and syntactic island constraints on the filler–gap dependency. In order to properly predict words in these structures, a model must be able to temporarily suppress certain expectations and then recover those expectations later, essentially pushing and popping these expectations on a stack. Our results provide evidence that models can successfully suppress and recover expectations in many cases, but do not fully recover their previous grammatical state. @@ -9912,7 +9912,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat <fixed-case>GE</fixed-case>val: Tool for Debugging <fixed-case>NLP</fixed-case> Datasets and Models - FilipGraliński + FilipGraliński AnnaWróblewska TomaszStanisławek KamilGrabowski @@ -9938,7 +9938,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat KevinClark UrvashiKhandelwal OmerLevy - Christopher D.Manning + Christopher D.Manning 276–286 Large pre-trained neural networks such as BERT have had great recent success in NLP, motivating a growing body of research investigating what aspects of language they are able to learn from unlabeled data. Most recent analysis has focused on model outputs (e.g., language model surprisal) or internal vector representations (e.g., probing classifiers). Complementary to these works, we propose methods for analyzing the attention mechanisms of pre-trained models and apply them to BERT. BERT’s attention heads exhibit patterns such as attending to delimiter tokens, specific positional offsets, or broadly attending over the whole sentence, with heads in the same layer often exhibiting similar behaviors. We further show that certain attention heads correspond well to linguistic notions of syntax and coreference. For example, we find heads that attend to the direct objects of verbs, determiners of nouns, objects of prepositions, and coreferent mentions with remarkably high accuracy. Lastly, we propose an attention-based probing classifier and use it to further demonstrate that substantial syntactic information is captured in BERT’s attention. W19-4828 @@ -9951,7 +9951,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat Proceedings of TyP-NLP: The First Workshop on Typology for Polyglot NLP W19-49 HaimDubossarsky - Arya D.McCarthy + Arya D.McCarthy Edoardo MariaPonti IvanVulić EkaterinaVylomova @@ -9976,9 +9976,9 @@ One of the references was wrong therefore it is corrected to cite the appropriat Proceedings of the 18th BioNLP Workshop and Shared Task W19-50 DinaDemner-Fushman - Kevin BretonnelCohen + Kevin BretonnelCohen SophiaAnaniadou - JunichiTsujii + JunichiTsujii Association for Computational Linguistics
Florence, Italy
August @@ -10048,7 +10048,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat
Transfer Learning in Biomedical Natural Language Processing: An Evaluation of <fixed-case>BERT</fixed-case> and <fixed-case>ELM</fixed-case>o on Ten Benchmarking Datasets - YifanPeng + YifanPeng ShankaiYan ZhiyongLu 58–65 @@ -10073,7 +10073,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat <fixed-case>M</fixed-case>o<fixed-case>NER</fixed-case>o: a Biomedical Gold Standard Corpus for the <fixed-case>R</fixed-case>omanian Language MariaMitrofan - VerginicaBarbu Mititelu + VerginicaBarbu Mititelu GrigorinaMitrofan 71–79 In an era when large amounts of data are generated daily in various fields, the biomedical field among others, linguistic resources can be exploited for various tasks of Natural Language Processing. Moreover, increasing number of biomedical documents are available in languages other than English. To be able to extract information from natural language free text resources, methods and tools are needed for a variety of languages. This paper presents the creation of the MoNERo corpus, a gold standard biomedical corpus for Romanian, annotated with both part of speech tags and named entities. MoNERo comprises 154,825 morphologically annotated tokens and 23,188 entity annotations belonging to four entity semantic groups corresponding to UMLS Semantic Groups. @@ -10088,7 +10088,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat AdityaSiddhant AnirudhaRayasam NiketTandon - EduardHovy + EduardHovy 80–87 Domain adaptation remains one of the most challenging aspects in the wide-spread use of Semantic Role Labeling (SRL) systems. Current state-of-the-art methods are typically trained on large-scale datasets, but their performances do not directly transfer to low-resource domain-specific settings. In this paper, we propose two approaches for domain adaptation in the biological domain that involves pre-training LSTM-CRF based on existing large-scale datasets and adapting it for a low-resource corpus of biological processes. Our first approach defines a mapping between the source labels and the target labels, and the other approach modifies the final CRF layer in sequence-labeling neural network architecture. We perform our experiments on ProcessBank dataset which contains less than 200 paragraphs on biological processes. We improve over the previous state-of-the-art system on this dataset by 21 F1 points. We also show that, by incorporating event-event relationship in ProcessBank, we are able to achieve an additional 2.6 F1 gain, giving us possible insights into how to improve SRL systems for biological process using richer annotations. W19-5009 @@ -10111,7 +10111,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat HannaPylieva ArtemChernodub NataliaGrabar - ThierryHamon + ThierryHamon 97–104 Patients and their families often require a better understanding of medical information provided by doctors. We currently address this issue by improving the identification of difficult to understand medical words. We introduce novel embeddings received from RNN - FrnnMUTE (French RNN Medical Understandability Text Embeddings) which allow to reach up to 87.0 F1 score in identification of difficult words. We also note that adding pre-trained FastText word embeddings to the feature set substantially improves the performance of the model which classifies words according to their difficulty. We study the generalizability of different models through three cross-validation scenarios which allow testing classifiers in real-world conditions: understanding of medical words by new users, and classification of new unseen words by the automatic models. The RNN - FrnnMUTE embeddings and the categorization code are being made available for the research. W19-5011 @@ -10124,7 +10124,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat MariskaLeeflang RenéSpijker EvangelosKanoulas - AurélieNévéol + AurélieNévéol 105–114 Systematic reviews are important in evidence based medicine, but are expensive to produce. Automating or semi-automating the data extraction of index test, target condition, and reference standard from articles has the potential to decrease the cost of conducting systematic reviews of diagnostic test accuracy, but relevant training data is not available. We create a distantly supervised dataset of approximately 90,000 sentences, and let two experts manually annotate a small subset of around 1,000 sentences for evaluation. We evaluate the performance of BioBERT and logistic regression for ranking the sentences, and compare the performance for distant and direct supervision. Our results suggest that distant supervision can work as well as, or better than direct supervision on this problem, and that distantly trained models can perform as well as, or better than human annotators. W19-5012 @@ -10137,7 +10137,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat TsantaRandriatsitohaina FleurMougin NataliaGrabar - ThierryHamon + ThierryHamon 115–124 In this paper, we address the problem of automatically constructing a relevant corpus of scientific articles about food-drug interactions. There is a growing number of scientific publications that describe food-drug interactions but currently building a high-coverage corpus that can be used for information extraction purposes is not trivial. We investigate several methods for automating the query selection process using an expert-curated corpus of food-drug interactions. Our experiments show that index term features along with a decision tree classifier are the best approach for this task and that feature selection approaches and in particular gain ratio outperform frequency-based methods for query selection. W19-5013 @@ -10148,7 +10148,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat Enhancing biomedical word embeddings by retrofitting to verb clusters BillyChiu SimonBaker - MarthaPalmer + MarthaPalmer AnnaKorhonen 125–134 Verbs play a fundamental role in many biomed-ical tasks and applications such as relation and event extraction. We hypothesize that performance on many downstream tasks can be improved by aligning the input pretrained embeddings according to semantic verb classes. In this work, we show that by using semantic clusters for verbs, a large lexicon of verbclasses derived from biomedical literature, weare able to improve the performance of common pretrained embeddings in downstream tasks by retrofitting them to verb classes. We present a simple and computationally efficient approach using a widely-available “off-the-shelf” retrofitting algorithm to align pretrained embeddings according to semantic verb clusters. We achieve state-of-the-art results on text classification and relation extraction tasks. @@ -10161,7 +10161,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat AdityaJoshi SarvnazKarimi RossSparks - CecileParis + CecileParis C RainaMacIntyre 135–141 Distributed representations of text can be used as features when training a statistical classifier. These representations may be created as a composition of word vectors or as context-based sentence vectors. We compare the two kinds of representations (word versus context) for three classification problems: influenza infection classification, drug usage classification and personal health mention classification. For statistical classifiers trained for each of these problems, context-based representations based on ELMo, Universal Sentence Encoder, Neural-Net Language Model and FLAIR are better than Word2Vec, GloVe and the two adapted using the MESH ontology. There is an improvement of 2-4% in the accuracy when these context-based representations are used instead of word-based representations. @@ -10192,7 +10192,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat Incorporating Figure Captions and Descriptive Text in <fixed-case>M</fixed-case>e<fixed-case>SH</fixed-case> Term Indexing XindiWang - Robert E.Mercer + Robert E.Mercer 165–175 The goal of text classification is to automatically assign categories to documents. Deep learning automatically learns effective features from data instead of adopting human-designed features. In this paper, we focus specifically on biomedical document classification using a deep learning approach. We present a novel multichannel TextCNN model for MeSH term indexing. Beyond the normal use of the text from the abstract and title for model training, we also consider figure and table captions, as well as paragraphs associated with the figures and tables. We demonstrate that these latter text sources are important feature sources for our method. A new dataset consisting of these text segments curated from 257,590 full text articles together with the articles’ MEDLINE/PubMed MeSH terms is publicly available. W19-5018 @@ -10265,7 +10265,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat Contributions to Clinical Named Entity Recognition in <fixed-case>P</fixed-case>ortuguese FábioLopes CésarTeixeira - HugoGonçalo Oliveira + HugoGonçalo Oliveira 223–233 Having in mind that different languages might present different challenges, this paper presents the following contributions to the area of Information Extraction from clinical text, targeting the Portuguese language: a collection of 281 clinical texts in this language, with manually-annotated named entities; word embeddings trained in a larger collection of similar texts; results of using BiLSTM-CRF neural networks for named entity recognition on the annotated collection, including a comparison of using in-domain or out-of-domain word embeddings in this task. Although learned with much less data, performance is higher when using in-domain embeddings. When tested in 20 independent clinical texts, this model achieved better results than a model using larger out-of-domain embeddings. W19-5024 @@ -10325,7 +10325,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat CyrilGrouin NataliaGrabar VincentClaveau - ThierryHamon + ThierryHamon 273–282 Textual data are useful for accessing expert information. Yet, since the texts are representative of distinct language uses, it is necessary to build specific corpora in order to be able to design suitable NLP tools. In some domains, such as medical domain, it may be complicated to access the representative textual data and their semantic annotations, while there exists a real need for providing efficient tools and methods. Our paper presents a corpus of clinical cases written in French, and their semantic annotations. Thus, we manually annotated a set of 717 files into four general categories (age, gender, outcome, and origin) for a total number of 2,835 annotations. The values of age, gender, and outcome are normalized. A subset with 70 files has been additionally manually annotated into 27 categories for a total number of 5,198 annotations. W19-5029 @@ -10336,7 +10336,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat Two-stage Federated Phenotyping and Patient Representation Learning DianboLiu DmitriyDligach - TimothyMiller + TimothyMiller 283–291 A large percentage of medical information is in unstructured text format in electronic medical record systems. Manual extraction of information from clinical notes is extremely time consuming. Natural language processing has been widely used in recent years for automatic information extraction from medical texts. However, algorithms trained on data from a single healthcare provider are not generalizable and error-prone due to the heterogeneity and uniqueness of medical documents. We develop a two-stage federated natural language processing method that enables utilization of clinical notes from different hospitals or clinics without moving the data, and demonstrate its performance using obesity and comorbities phenotyping as medical task. This approach not only improves the quality of a specific clinical task but also facilitates knowledge progression in the whole healthcare system, which is an essential part of learning health system. To the best of our knowledge, this is the first application of federated machine learning in clinical NLP. W19-5030 @@ -10400,9 +10400,9 @@ One of the references was wrong therefore it is corrected to cite the appropriat SaberAkhondi CamiloThorne ChristianDruckenbrodt - TrevorCohn - MichelleGregory - KarinVerspoor + TrevorCohn + MichelleGregory + KarinVerspoor 328–338 Chemical patents are an important resource for chemical information. However, few chemical Named Entity Recognition (NER) systems have been evaluated on patent documents, due in part to their structural and linguistic complexity. In this paper, we explore the NER performance of a BiLSTM-CRF model utilising pre-trained word embeddings, character-level word representations and contextualized ELMo word representations for chemical patents. We compare word embeddings pre-trained on biomedical and chemical patent corpora. The effect of tokenizers optimized for the chemical domain on NER performance in chemical patents is also explored. The results on two patent corpora show that contextualized word representations generated from ELMo substantially improve chemical NER performance w.r.t. the current state-of-the-art. We also show that domain-specific resources such as word embeddings trained on chemical patents and chemical-specific tokenizers, have a positive impact on NER performance. W19-5035 @@ -10434,7 +10434,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat Extracting relations between outcomes and significance levels in Randomized Controlled Trials (<fixed-case>RCT</fixed-case>s) publications AnnaKoroleva - PatrickParoubek + PatrickParoubek 359–369 Randomized controlled trials assess the effects of an experimental intervention by comparing it to a control intervention with regard to some variables - trial outcomes. Statistical hypothesis testing is used to test if the experimental intervention is superior to the control. Statistical significance is typically reported for the measured outcomes and is an important characteristic of the results. We propose a machine learning approach to automatically extract reported outcomes, significance levels and the relation between them. We annotated a corpus of 663 sentences with 2,552 outcome - significance level relations (1,372 positive and 1,180 negative relations). We compared several classifiers, using a manually crafted feature set, and a number of deep learning models. The best performance (F-measure of 94%) was shown by the BioBERT fine-tuned model. W19-5038 @@ -10474,7 +10474,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat ShefaliGarg SheetalShalini PrashantGupta - EricNyberg + EricNyberg TerukoMitamura 389–398 Parallel deep learning architectures like fine-tuned BERT and MT-DNN, have quickly become the state of the art, bypassing previous deep and shallow learning methods by a large margin. More recently, pre-trained models from large related datasets have been able to perform well on many downstream tasks by just fine-tuning on domain-specific datasets (similar to transfer learning). However, using powerful models on non-trivial tasks, such as ranking and large document classification, still remains a challenge due to input size limitations of parallel architecture and extremely small datasets (insufficient for fine-tuning). In this work, we introduce an end-to-end system, trained in a multi-task setting, to filter and re-rank answers in the medical domain. We use task-specific pre-trained models as deep feature extractors. Our model achieves the highest Spearman’s Rho and Mean Reciprocal Rank of 0.338 and 0.9622 respectively, on the ACL-BioNLP workshop MediQA Question Answering shared-task. @@ -10532,7 +10532,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat <fixed-case>DUT</fixed-case>-<fixed-case>NLP</fixed-case> at <fixed-case>MEDIQA</fixed-case> 2019: An Adversarial Multi-Task Network to Jointly Model Recognizing Question Entailment and Question Answering - HuiweiZhou + HuiweiZhou XuefeiLi WeihongYao ChengkunLang @@ -10545,7 +10545,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat <fixed-case>DUT</fixed-case>-<fixed-case>BIM</fixed-case> at <fixed-case>MEDIQA</fixed-case> 2019: Utilizing Transformer Network and Medical Domain-Specific Contextualized Representations for Question Answering - HuiweiZhou + HuiweiZhou BizunLei ZheLiu ZhuangLiu @@ -10562,7 +10562,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat AditiChaudhary JamesRoute TerukoMitamura - EricNyberg + EricNyberg 453–461 This paper presents the submissions by TeamDr.Quad to the ACL-BioNLP 2019 shared task on Textual Inference and Question Entailment in the Medical Domain. Our system is based on the prior work Liu et al. (2019) which uses a multi-task objective function for textual entailment. In this work, we explore different strategies for generalizing state-of-the-art language understanding models to the specialized medical domain. Our results on the shared task demonstrate that incorporating domain knowledge through data augmentation is a powerful strategy for addressing challenges posed specialized domains such as medicine. W19-5048 @@ -10574,7 +10574,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat Sai AbishekBhaskar RashiRungta JamesRoute - EricNyberg + EricNyberg TerukoMitamura 462–470 This paper presents a multi-task learning approach to natural language inference (NLI) and question entailment (RQE) in the biomedical domain. Recognizing textual inference relations and question similarity can address the issue of answering new consumer health questions by mapping them to Frequently Asked Questions on reputed websites like the NIH. We show that leveraging information from parallel tasks across domains along with medical knowledge integration allows our model to learn better biomedical feature representations. Our final models for the NLI and RQE tasks achieve the 4th and 2nd rank on the shared-task leaderboard respectively. @@ -10662,7 +10662,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat <fixed-case>L</fixed-case>asige<fixed-case>B</fixed-case>io<fixed-case>TM</fixed-case> at <fixed-case>MEDIQA</fixed-case> 2019: Biomedical Question Answering using Bidirectional Transformers and Named Entity Recognition - AndreLamurias + AndreLamurias Francisco MCouto 523–527 Biomedical Question Answering (QA) aims at providing automated answers to user questions, regarding a variety of biomedical topics. For example, these questions may ask for related to diseases, drugs, symptoms, or medical procedures. Automated biomedical QA systems could improve the retrieval of information necessary to answer these questions. The MEDIQA challenge consisted of three tasks concerning various aspects of biomedical QA. This challenge aimed at advancing approaches to Natural Language Inference (NLI) and Recognizing Question Entailment (RQE), which would then result in enhanced approaches to biomedical QA. Our approach explored a common Transformer-based architecture that could be applied to each task. This approach shared the same pre-trained weights, but which were then fine-tuned for each task using the provided training data. Furthermore, we augmented the training data with external datasets and enriched the question and answer texts using MER, a named entity recognition tool. Our approach obtained high levels of accuracy, in particular on the NLI task, which classified pairs of text according to their relation. For the QA task, we obtained higher Spearman’s rank correlation values using the entities recognized by MER. @@ -10702,10 +10702,10 @@ One of the references was wrong therefore it is corrected to cite the appropriat Proceedings of the Joint Workshop on Multiword Expressions and WordNet (MWE-WN 2019) W19-51 AgataSavary - Carla ParraEscartín + Carla ParraEscartín FrancisBond JelenaMitrović - Verginica BarbuMititelu + Verginica BarbuMititelu Association for Computational Linguistics
Florence, Italy
August @@ -10743,7 +10743,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat The <fixed-case>R</fixed-case>omanian Corpus Annotated with Verbal Multiword Expressions VerginicaBarbu Mititelu MihaelaCristescu - MihaelaOnofrei + MihaelaOnofrei 13–21 This paper reports on the Romanian journalistic corpus annotated with verbal multiword expressions following the PARSEME guidelines. The corpus is sentence split, tokenized, part-of-speech tagged, lemmatized, syntactically annotated and verbal multiword expressions are identified and classified. It offers insights into the frequency of such Romanian word combinations and allows for their characterization. We offer data about the types of verbal multiword expressions in the corpus and some of their characteristics, such as internal structure, diversity in the corpus, average length, productivity of the verbs. This is a language resource that is important per se, as well as for the task of automatic multiword expressions identification, which can be further used in other systems. It was already used as training and test material in the shared tasks for the automatic identification of verbal multiword expressions organized by PARSEME. W19-5103 @@ -10764,7 +10764,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat Learning to Predict Novel Noun-Noun Compounds PrajitDhar - Lonnekevan der Plas + Lonnekevan der Plas 30–39 We introduce temporally and contextually-aware models for the novel task of predicting unseen but plausible concepts, as conveyed by noun-noun compounds in a time-stamped corpus. We train compositional models on observed compounds, more specifically the composed distributed representations of their constituents across a time-stamped corpus, while giving it corrupted instances (where head or modifier are replaced by a random constituent) as negative evidence. The model captures generalisations over this data and learns what combinations give rise to plausible compounds and which ones do not. After training, we query the model for the plausibility of automatically generated novel combinations and verify whether the classifications are accurate. For our best model, we find that in around 85% of the cases, the novel compounds generated are attested in previously unseen data. An additional estimated 5% are plausible despite not being attested in the recent corpus, based on judgments from independent human raters. W19-5105 @@ -10784,8 +10784,8 @@ One of the references was wrong therefore it is corrected to cite the appropriat A comparison of statistical association measures for identifying dependency-based collocations in various languages. MarcosGarcia - MarcosGarcía Salido - MargaritaAlonso-Ramos + MarcosGarcía Salido + MargaritaAlonso-Ramos 49–59 This paper presents an exploration of different statistical association measures to automatically identify collocations from corpora in English, Portuguese, and Spanish. To evaluate the impact of the association metrics we manually annotated corpora with three different syntactic patterns of collocations (adjective-noun, verb-object and nominal compounds). We took advantage of the PARSEME 1.1 Shared Task corpora by selecting a subset of 155k tokens in the three referred languages, in which we annotated 1,526 collocations with the corresponding Lexical Functions according to the Meaning-Text Theory. Using the resulting gold-standard, we have carried out a comparison between frequency data and several well-known association measures, both symmetric and asymmetric. The results show that the combination of dependency triples with raw frequency information is as powerful as the best association measures in most syntactic patterns and languages. Furthermore, and despite the asymmetric behaviour of collocations, directional approaches perform worse than the symmetric ones in the extraction of these phraseological combinations. W19-5107 @@ -10807,7 +10807,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat Modeling <fixed-case>MWE</fixed-case>s in <fixed-case>BTB</fixed-case>-<fixed-case>WN</fixed-case> LaskaLaskova PetyaOsenova - KirilSimov + KirilSimov IvajloRadev ZaraKancheva 70–78 @@ -10819,7 +10819,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat Without lexicons, multiword expression identification will never fly: A position statement AgataSavary - SilvioCordeiro + SilvioCordeiro CarlosRamisch 79–91 Because most multiword expressions (MWEs), especially verbal ones, are semantically non-compositional, their automatic identification in running text is a prerequisite for semantically-oriented downstream applications. However, recent developments, driven notably by the PARSEME shared task on automatic identification of verbal MWEs, show that this task is harder than related tasks, despite recent contributions both in multilingual corpus annotation and in computational models. In this paper, we analyse possible reasons for this state of affairs. They lie in the nature of the MWE phenomenon, as well as in its distributional properties. We also offer a comparative analysis of the state-of-the-art systems, which exhibit particularly strong sensitivity to unseen data. On this basis, we claim that, in order to make strong headway in MWE identification, the community should bend its mind into coupling identification of MWEs with their discovery, via syntactic MWE lexicons. Such lexicons need not necessarily achieve a linguistically complete modelling of MWEs’ behavior, but they should provide minimal morphosyntactic information to cover some potential uses, so as to complement existing MWE-annotated corpora. We define requirements for such minimal NLP-oriented lexicon, and we propose a roadmap for the MWE community driven by these requirements. @@ -10840,7 +10840,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat Semantic Modelling of Adjective-Noun Collocations Using <fixed-case>F</fixed-case>rame<fixed-case>N</fixed-case>et YanaStrakatova - ErhardHinrichs + ErhardHinrichs 104–113 In this paper we argue that Frame Semantics (Fillmore, 1982) provides a good framework for semantic modelling of adjective-noun collocations. More specifically, the notion of a frame is rich enough to account for nouns from different semantic classes and to model semantic relations that hold between an adjective and a noun in terms of Frame Elements. We have substantiated these findings by considering a sample of adjective-noun collocations from German such as “enger Freund” ‘close friend’ and “starker Regen” ‘heavy rain’. The data sample is taken from different semantic fields identified in the German wordnet GermaNet (Hamp and Feldweg, 1997; Henrich and Hinrichs, 2010). The study is based on the electronic dictionary DWDS (Klein and Geyken, 2010) and uses the collocation extraction tool Wortprofil (Geyken et al., 2009). The FrameNet modelling is based on the online resource available at http://framenet.icsi.berkeley.edu. Since FrameNets are available for a range of typologically different languages, it is feasible to extend the current case study to other languages. W19-5112 @@ -10849,7 +10849,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat A Neural Graph-based Approach to Verbal <fixed-case>MWE</fixed-case> Identification - JakubWaszczuk + JakubWaszczuk RafaelEhren ReginaStodden LauraKallmeyer @@ -10871,7 +10871,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat <fixed-case>IDION</fixed-case>: A database for <fixed-case>M</fixed-case>odern <fixed-case>G</fixed-case>reek multiword expressions - StellaMarkantonatou + StellaMarkantonatou PanagiotisMinos GeorgeZakis VassilikiMoutzouri @@ -10884,7 +10884,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat Identification of Adjective-Noun Neologisms using Pretrained Language Models - John PhilipMcCrae + John PhilipMcCrae 135–141 Neologism detection is a key task in the constructing of lexical resources and has wider implications for NLP, however the identification of multiword neologisms has received little attention. In this paper, we show that we can effectively identify the distinction between compositional and non-compositional adjective-noun pairs by using pretrained language models and comparing this with individual word embeddings. Our results show that the use of these models significantly improves over baseline linguistic features, however the combination with linguistic features still further improves the results, suggesting the strength of a hybrid approach. W19-5116 @@ -10894,7 +10894,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat Neural Lemmatization of Multiword Expressions MarineSchmitt - MathieuConstant + MathieuConstant 142–148 This article focuses on the lemmatization of multiword expressions (MWEs). We propose a deep encoder-decoder architecture generating for every MWE word its corresponding part in the lemma, based on the internal context of the MWE. The encoder relies on recurrent networks based on (1) the character sequence of the individual words to capture their morphological properties, and (2) the word sequence of the MWE to capture lexical and syntactic properties. The decoder in charge of generating the corresponding part of the lemma for each word of the MWE is based on a classical character-level attention-based recurrent model. Our model is evaluated for Italian, French, Polish and Portuguese and shows good performances except for Polish. W19-5117 @@ -10906,7 +10906,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat AntonioŠajatović MajaBuljan JanŠnajder - BojanaDalbelo Bašić + BojanaDalbelo Bašić 149–154 Automatic Term Extraction (ATE) extracts terminology from domain-specific corpora. ATE is used in many NLP tasks, including Computer Assisted Translation, where it is typically applied to individual documents rather than the entire corpus. While corpus-level ATE has been extensively evaluated, it is not obvious how the results transfer to document-level ATE. To fill this gap, we evaluate 16 state-of-the-art ATE methods on full-length documents from three different domains, on both corpus and document levels. Unlike existing studies, our evaluation is more realistic as we take into account all gold terms. We show that no single method is best in corpus-level ATE, but C-Value and KeyConceptRelatendess surpass others in document-level ATE. W19-5118 @@ -10939,7 +10939,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat The Impact of Word Representations on Sequential Neural <fixed-case>MWE</fixed-case> Identification NicolasZampieri CarlosRamisch - GeraldineDamnati + GeraldineDamnati 169–175 Recent initiatives such as the PARSEME shared task allowed the rapid development of MWE identification systems. Many of those are based on recent NLP advances, using neural sequence models that take continuous word representations as input. We study two related questions in neural MWE identification: (a) the use of lemmas and/or surface forms as input features, and (b) the use of word-based or character-based embeddings to represent them. Our experiments on Basque, French, and Polish show that character-based representations yield systematically better results than word-based ones. In some cases, character-based representations of surface forms can be used as a proxy for lemmas, depending on the morphological complexity of the language. W19-5121 @@ -10951,23 +10951,23 @@ One of the references was wrong therefore it is corrected to cite the appropriat Proceedings of the Fourth Conference on Machine Translation (Volume 1: Research Papers) W19-52 - OndřejBojar - RajenChatterjee + OndřejBojar + RajenChatterjee ChristianFedermann MarkFishel YvetteGraham BarryHaddow MatthiasHuck - Antonio JimenoYepes + Antonio JimenoYepes PhilippKoehn - AndréMartins + AndréMartins ChristofMonz - MatteoNegri - AurélieNévéol + MatteoNegri + AurélieNévéol MarianaNeves MattPost MarcoTurchi - KarinVerspoor + KarinVerspoor Association for Computational Linguistics
Florence, Italy
August @@ -10992,10 +10992,10 @@ One of the references was wrong therefore it is corrected to cite the appropriat
Improving Zero-shot Translation with Language-Independent Constraints - Ngoc-QuanPham + Ngoc-QuanPham JanNiehues Thanh-LeHa - AlexanderWaibel + AlexanderWaibel 13–23 An important concern in training multilingual neural machine translation (NMT) is to translate between language pairs unseen during training, i.e zero-shot translation. Improving this ability kills two birds with one stone by providing an alternative to pivot translation which also allows us to better understand how the model captures information between languages. In this work, we carried out an investigation on this capability of the multilingual NMT models. First, we intentionally create an encoder architecture which is independent with respect to the source language. Such experiments shed light on the ability of NMT encoders to learn multilingual representations, in general. Based on such proof of concept, we were able to design regularization methods into the standard Transformer model, so that the whole architecture becomes more robust in zero-shot conditions. We investigated the behaviour of such models on the standard IWSLT 2017 multilingual dataset. We achieved an average improvement of 2.23 BLEU points across 12 language pairs compared to the zero-shot performance of a state-of-the-art multilingual system. Additionally, we carry out further experiments in which the effect is confirmed even for language pairs with multiple intermediate pivots. W19-5202 @@ -11029,7 +11029,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat YunsuKim JulianSchamper ShahramKhadivi - HermannNey + HermannNey 45–52 Back-translation — data augmentation by translating target monolingual data — is a crucial component in modern neural machine translation (NMT). In this work, we reformulate back-translation in the scope of cross-entropy optimization of an NMT model, clarifying its underlying mathematical assumptions and approximations beyond its heuristic usage. Our formulation covers broader synthetic data generation schemes, including sampling from a target-to-source NMT model. With this formulation, we point out fundamental problems of the sampling-based approaches and propose to remedy them by (i) disabling label smoothing for the target-to-source model and (ii) sampling from a restricted search space. Our statements are investigated on the WMT 2018 German <-> English news translation task. W19-5205 @@ -11127,23 +11127,23 @@ One of the references was wrong therefore it is corrected to cite the appropriat Proceedings of the Fourth Conference on Machine Translation (Volume 2: Shared Task Papers, Day 1) W19-53 - OndřejBojar - RajenChatterjee + OndřejBojar + RajenChatterjee ChristianFedermann MarkFishel YvetteGraham BarryHaddow MatthiasHuck - Antonio JimenoYepes + Antonio JimenoYepes PhilippKoehn - AndréMartins + AndréMartins ChristofMonz - MatteoNegri - AurélieNévéol + MatteoNegri + AurélieNévéol MarianaNeves MattPost MarcoTurchi - KarinVerspoor + KarinVerspoor Association for Computational Linguistics
Florence, Italy
August @@ -11158,14 +11158,14 @@ One of the references was wrong therefore it is corrected to cite the appropriat Findings of the 2019 Conference on Machine Translation (<fixed-case>WMT</fixed-case>19) LoïcBarrault OndřejBojar - Marta R.Costa-jussà + Marta R.Costa-jussà ChristianFedermann MarkFishel YvetteGraham BarryHaddow MatthiasHuck PhilippKoehn - ShervinMalmasi + ShervinMalmasi ChristofMonz MathiasMüller SantanuPal @@ -11215,7 +11215,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat UlrichGermann RomanGrundkiewicz FaheemKirefu - Antonio ValerioMiceli Barone + Antonio ValerioMiceli Barone AlexandraBirch 103–115 The University of Edinburgh participated in the WMT19 Shared Task on News Translation in six language directions: English↔Gujarati, English↔Chinese, German→English, and English→Czech. For all translation directions, we created or used back-translations of monolingual data in the target language as additional synthetic training data. For English↔Gujarati, we also explored semi-supervised MT with cross-lingual language model pre-training, and translation pivoting through Hindi. For translation to and from Chinese, we investigated character-based tokenisation vs. sub-word segmentation of Chinese text. For German→English, we studied the impact of vast amounts of back-translated training data on translation quality, gaining a few additional insights over Edunov et al. (2018). For English→Czech, we compared different preprocessing and tokenisation regimes. @@ -11238,14 +11238,14 @@ One of the references was wrong therefore it is corrected to cite the appropriat
Machine Translation with parfda, <fixed-case>M</fixed-case>oses, kenlm, nplm, and <fixed-case>PRO</fixed-case> - ErgunBiçici + ErgunBiçici 122–128 We build parfda Moses statistical machine translation (SMT) models for most language pairs in the news translation task. We experiment with a hybrid approach using neural language models integrated into Moses. We obtain the constrained data statistics on the machine translation task, the coverage of the test sets, and the upper bounds on the translation results. We also contribute a new testsuite for the German-English language pair and a new automated key phrase extraction technique for the evaluation of the testsuite translations. W19-5306 Clarifies notation in Table 7, Figure 2 caption, and Table 4. - 10.18653/v1/W19-5306 Clarified notation in Table 7. + 10.18653/v1/W19-5306 bicici-2019-machine @@ -11295,10 +11295,10 @@ One of the references was wrong therefore it is corrected to cite the appropriat The <fixed-case>TALP</fixed-case>-<fixed-case>UPC</fixed-case> Machine Translation Systems for <fixed-case>WMT</fixed-case>19 News Translation Task: Pivoting Techniques for Low Resource <fixed-case>MT</fixed-case> NoeCasas - José A. R.Fonollosa + José A. R.Fonollosa CarlosEscolano ChristineBasta - Marta R.Costa-jussà + Marta R.Costa-jussà 155–162 In this article, we describe the TALP-UPC research group participation in the WMT19 news translation shared task for Kazakh-English. Given the low amount of parallel training data, we resort to using Russian as pivot language, training subword-based statistical translation systems for Russian-Kazakh and Russian-English that were then used to create two synthetic pseudo-parallel corpora for Kazakh-English and English-Kazakh respectively. Finally, a self-attention model based on the decoder part of the Transformer architecture was trained on the two pseudo-parallel corpora. W19-5311 @@ -11308,7 +11308,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat <fixed-case>K</fixed-case>yoto <fixed-case>U</fixed-case>niversity Participation to the <fixed-case>WMT</fixed-case> 2019 News Shared Task - FabienCromieres + FabienCromieres SadaoKurohashi 163–167 We describe here the experiments we did for the the news translation shared task of WMT 2019. We focused on the new German-to-French language direction, and mostly used current standard approaches to develop a Neural Machine Translation system. We make use of the Tensor2Tensor implementation of the Transformer model. After carefully cleaning the data and noting the importance of the good use of recent monolingual data for the task, we obtain our final result by combining the output of a diverse set of trained models through the use of their “checkpoint agreement”. @@ -11324,7 +11324,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat RuiWang AtsushiFujita MasaoUtiyama - EiichiroSumita + EiichiroSumita 168–174 In this paper, we describe our supervised neural machine translation (NMT) systems that we developed for the news translation task for Kazakh↔English, Gujarati↔English, Chinese↔English, and English→Finnish translation directions. We focused on leveraging multilingual transfer learning and back-translation for the extremely low-resource language pairs: Kazakh↔English and Gujarati↔English translation. For the Chinese↔English translation, we used the provided parallel data augmented with a large quantity of back-translated monolingual data to train state-of-the-art NMT systems. We then employed techniques that have been proven to be most effective, such as back-translation, fine-tuning, and model ensembling, to generate the primary submissions of Chinese↔English. For English→Finnish, our submission from WMT18 remains a strong baseline despite the increase in parallel corpora for this year’s task. W19-5313 @@ -11354,7 +11354,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat The <fixed-case>IIIT</fixed-case>-<fixed-case>H</fixed-case> <fixed-case>G</fixed-case>ujarati-<fixed-case>E</fixed-case>nglish Machine Translation System for <fixed-case>WMT</fixed-case>19 VikrantGoyal - Dipti MisraSharma + Dipti MisraSharma 191–195 This paper describes the Neural Machine Translation system of IIIT-Hyderabad for the Gujarati→English news translation shared task of WMT19. Our system is basedon encoder-decoder framework with attention mechanism. We experimented with Multilingual Neural MT models. Our experiments show that Multilingual Neural Machine Translation leveraging parallel data from related language pairs helps in significant BLEU improvements upto 11.5, for low resource language pairs like Gujarati-English W19-5316 @@ -11383,7 +11383,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat The <fixed-case>AFRL</fixed-case> <fixed-case>WMT</fixed-case>19 Systems: Old Favorites and New Tricks JeremyGwinnup GrantErdmann - TimAnderson + TimAnderson 203–208 This paper describes the Air Force Research Laboratory (AFRL) machine translation systems and the improvements that were developed during the WMT19 evaluation campaign. This year, we refine our approach to training popular neural machine translation toolkits, experiment with a new domain adaptation technique and again measure improvements in performance on the Russian–English language pair. W19-5318 @@ -11392,7 +11392,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat Evaluating the Supervised and Zero-shot Performance of Multi-lingual Translation Models - ChrisHokamp + ChrisHokamp JohnGlover DemianGholipour Ghalandari 209–217 @@ -11404,9 +11404,9 @@ One of the references was wrong therefore it is corrected to cite the appropriat The <fixed-case>MLLP</fixed-case>-<fixed-case>UPV</fixed-case> Supervised Machine Translation Systems for <fixed-case>WMT</fixed-case>19 News Translation Task JavierIranzo-Sánchez - Gonçal V.Garcés Díaz-Munío + Gonçal V.Garcés Díaz-Munío JorgeCivera - AlfonsJuan + AlfonsJuan 218–224 This paper describes the participation of the MLLP research group of the Universitat Politècnica de València in the WMT 2019 News Translation Shared Task. In this edition, we have submitted systems for the German ↔ English and German ↔ French language pairs, participating in both directions of each pair. Our submitted systems, based on the Transformer architecture, make ample use of data filtering, synthetic data and domain adaptation through fine-tuning. W19-5320 @@ -11498,7 +11498,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat Incorporating Word and Subword Units in Unsupervised Machine Translation Using Language Model Rescoring ZihanLiu YanXu - Genta IndraWinata + Genta IndraWinata PascaleFung 275–282 This paper describes CAiRE’s submission to the unsupervised machine translation track of the WMT’19 news shared task from German to Czech. We leverage a phrase-based statistical machine translation (PBSMT) model and a pre-trained language model to combine word-level neural machine translation (NMT) and subword-level NMT models without using any parallel data. We propose to solve the morphological richness problem of languages by training byte-pair encoding (BPE) embeddings for German and Czech separately, and they are aligned using MUSE (Conneau et al., 2018). To ensure the fluency and consistency of translations, a rescoring mechanism is proposed that reuses the pre-trained language model to select the translation candidates generated through beam search. Moreover, a series of pre-processing and post-processing approaches are applied to improve the quality of final translations. @@ -11508,11 +11508,11 @@ One of the references was wrong therefore it is corrected to cite the appropriat <fixed-case>JUMT</fixed-case> at <fixed-case>WMT</fixed-case>2019 News Translation Task: A Hybrid Approach to Machine Translation for <fixed-case>L</fixed-case>ithuanian to <fixed-case>E</fixed-case>nglish - Sainik KumarMahata + Sainik KumarMahata AvishekGarain AdityarRayala DipankarDas - SivajiBandyopadhyay + SivajiBandyopadhyay 283–286 In the current work, we present a description of the system submitted to WMT 2019 News Translation Shared task. The system was created to translate news text from Lithuanian to English. To accomplish the given task, our system used a Word Embedding based Neural Machine Translation model to post edit the outputs generated by a Statistical Machine Translation model. The current paper documents the architecture of our model, descriptions of the various modules and the results produced using the same. Our system garnered a BLEU score of 17.6. W19-5328 @@ -11538,7 +11538,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat KehaiChen AtsushiFujita MasaoUtiyama - EiichiroSumita + EiichiroSumita 294–301 This paper presents the NICT’s participation in the WMT19 unsupervised news translation task. We participated in the unsupervised translation direction: German-Czech. Our primary submission to the task is the result of a simple combination of our unsupervised neural and statistical machine translation systems. Our system is ranked first for the German-to-Czech translation task, using only the data provided by the organizers (“constraint’”), according to both BLEU-cased and human evaluation. We also performed contrastive experiments with other language pairs, namely, English-Gujarati and English-Kazakh, to better assess the effectiveness of unsupervised machine translation in for distant language pairs and in truly low-resource conditions. W19-5330 @@ -11560,8 +11560,8 @@ One of the references was wrong therefore it is corrected to cite the appropriat Shankha RajNayek AdityaChowdhury SantanuPal - Sudip KumarNaskar - Josefvan Genabith + Sudip KumarNaskar + Josefvan Genabith 308–313 In this paper we describe our joint submission (JU-Saarland) from Jadavpur University and Saarland University in the WMT 2019 news translation shared task for English–Gujarati language pair within the translation task sub-track. Our baseline and primary submissions are built using Recurrent neural network (RNN) based neural machine translation (NMT) system which follows attention mechanism. Given the fact that the two languages belong to different language families and there is not enough parallel data for this language pair, building a high quality NMT system for this language pair is a difficult task. We produced synthetic data through back-translation from available monolingual data. We report the translation quality of our English–Gujarati and Gujarati–English NMT systems trained at word, byte-pair and character encoding levels where RNN at word level is considered as the baseline and used for comparison purpose. Our English–Gujarati system ranked in the second position in the shared task. W19-5332 @@ -11585,10 +11585,10 @@ One of the references was wrong therefore it is corrected to cite the appropriat e<fixed-case>T</fixed-case>ranslation’s Submissions to the <fixed-case>WMT</fixed-case> 2019 News Translation Task CsabaOravecz - KatinaBontcheva + KatinaBontcheva AdrienLardilleux - LászlóTihanyi - AndreasEisele + LászlóTihanyi + AndreasEisele 320–326 This paper describes the submissions of the eTranslation team to the WMT 2019 news translation shared task. The systems have been developed with the aim of identifying and following rather than establishing best practices, under the constraints imposed by a low resource training and decoding environment normally used for our production systems. Thus most of the findings and results are transferable to systems used in the eTranslation service. Evaluations suggest that this approach is able to produce decent models with good performance and speed without the overhead of using prohibitively deep and complex architectures. W19-5334 @@ -11598,8 +11598,8 @@ One of the references was wrong therefore it is corrected to cite the appropriat Tilde’s Machine Translation Systems for <fixed-case>WMT</fixed-case> 2019 - MarcisPinnis - RihardsKrišlauks + MarcisPinnis + RihardsKrišlauks MatīssRikters 327–334 The paper describes the development process of Tilde’s NMT systems for the WMT 2019 shared task on news translation. We trained systems for the English-Lithuanian and Lithuanian-English translation directions in constrained and unconstrained tracks. We build upon the best methods of the previous year’s competition and combine them with recent advancements in the field. We also present a new method to ensure source domain adherence in back-translated data. Our systems achieved a shared first place in human evaluation. @@ -11610,7 +11610,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat Apertium-fin-eng–Rule-based Shallow Machine Translation for <fixed-case>WMT</fixed-case> 2019 Shared Task - TommiPirinen + TommiPirinen 335–341 In this paper we describe a rule-based, bi-directional machine translation system for the Finnish—English language pair. The baseline system was based on the existing data of FinnWordNet, omorfi and apertium-eng. We have built the disambiguation, lexical selection and translation rules by hand. The dictionaries and rules have been developed based on the shared task data. We describe in this article the use of the shared task data as a kind of a test-driven development workflow in RBMT development and show that it suits perfectly to a modern software engineering continuous integration workflow of RBMT and yields big increases to BLEU scores with minimal effort. W19-5336 @@ -11640,7 +11640,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat WeiyueWang ParniaBahar YingboGao - HermannNey + HermannNey 349–355 This paper describes the neural machine translation systems developed at the RWTH Aachen University for the German-English, Chinese-English and Kazakh-English news translation tasks of the Fourth Conference on Machine Translation (WMT19). For all tasks, the final submitted system is based on the Transformer architecture. We focus on improving data filtering and fine-tuning as well as systematically evaluating interesting approaches like unigram language model segmentation and transfer learning. For the De-En task, none of the tested methods gave a significant improvement over last years winning system and we end up with the same performance, resulting in 39.6% BLEU on newstest2019. In the Zh-En task, we show 1.3% BLEU improvement over our last year’s submission, which we mostly attribute to the splitting of long sentences during translation. We further report results on the Kazakh-English task where we gain improvements of 11.1% BLEU over our baseline system. On the same task we present a recent transfer learning approach, which uses half of the free parameters of our submission system and performs on par with it. W19-5338 @@ -11649,7 +11649,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat The <fixed-case>U</fixed-case>niversitat d’Alacant Submissions to the <fixed-case>E</fixed-case>nglish-to-<fixed-case>K</fixed-case>azakh News Translation Task at <fixed-case>WMT</fixed-case> 2019 - Víctor M.Sánchez-Cartagena + Víctor M.Sánchez-Cartagena Juan AntonioPérez-Ortiz FelipeSánchez-Martínez 356–363 @@ -11662,7 +11662,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat <fixed-case>CUED</fixed-case>@<fixed-case>WMT</fixed-case>19:<fixed-case>EWC</fixed-case>&<fixed-case>LM</fixed-case>s FelixStahlberg DanielleSaunders - Adriàde Gispert + Adriàde Gispert BillByrne 364–373 Two techniques provide the fabric of the Cambridge University Engineering Department’s (CUED) entry to the WMT19 evaluation campaign: elastic weight consolidation (EWC) and different forms of language modelling (LMs). We report substantial gains by fine-tuning very strong baselines on former WMT test sets using a combination of checkpoint averaging and EWC. A sentence-level Transformer LM and a document-level LM based on a modified Transformer architecture yield further gains. As in previous years, we also extract n-gram probabilities from SMT lattices which can be seen as a source-conditioned n-gram LM. @@ -11712,7 +11712,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat DarioStojanovski ViktorHangya MatthiasHuck - AlexanderFraser + AlexanderFraser 393–399 We describe LMU Munich’s machine translation system for German→Czech translation which was used to participate in the WMT19 shared task on unsupervised news translation. We train our model using monolingual data only from both languages. The final model is an unsupervised neural model using established techniques for unsupervised translation such as denoising autoencoding and online back-translation. We bootstrap the model with masked language model pretraining and enhance it with back-translations from an unsupervised phrase-based system which is itself bootstrapped using unsupervised bilingual word embeddings. W19-5344 @@ -11722,7 +11722,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat Combining Local and Document-Level Context: The <fixed-case>LMU</fixed-case> <fixed-case>M</fixed-case>unich Neural Machine Translation System at <fixed-case>WMT</fixed-case>19 DarioStojanovski - AlexanderFraser + AlexanderFraser 400–406 We describe LMU Munich’s machine translation system for English→German translation which was used to participate in the WMT19 shared task on supervised news translation. We specifically participated in the document-level MT track. The system used as a primary submission is a context-aware Transformer capable of both rich modeling of limited contextual information and integration of large-scale document-level context with a less rich representation. We train this model by fine-tuning a big Transformer baseline. Our experimental results show that document-level context provides for large improvements in translation quality, and adding a rich representation of the previous sentence provides a small additional gain. W19-5345 @@ -11734,7 +11734,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat SukantaSen Kamal KumarGupta AsifEkbal - PushpakBhattacharyya + PushpakBhattacharyya 407–411 We describe our submission to WMT 2019 News translation shared task for Gujarati-English language pair. We submit constrained systems, i.e, we rely on the data provided for this language pair and do not use any external data. We train Transformer based subword-level neural machine translation (NMT) system using original parallel corpus along with synthetic parallel corpus obtained through back-translation of monolingual data. Our primary systems achieve BLEU scores of 10.4 and 8.1 for Gujarati→English and English→Gujarati, respectively. We observe that incorporating monolingual data through back-translation improves the BLEU score significantly over baseline NMT and SMT systems for this language pair. W19-5346 @@ -11750,7 +11750,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat SamiVirpioja AlessandroRaganato ArviHurskainen - JörgTiedemann + JörgTiedemann 412–423 In this paper we present the University of Helsinki submissions to the WMT 2019 shared news translation task in three language pairs: English-German, English-Finnish and Finnish-English. This year we focused first on cleaning and filtering the training data using multiple data-filtering approaches, resulting in much smaller and cleaner training sets. For English-German we trained both sentence-level transformer models as well as compared different document-level translation approaches. For Finnish-English and English-Finnish we focused on different segmentation approaches and we also included a rule-based system for English-Finnish. W19-5347 @@ -11792,7 +11792,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat <fixed-case>DFKI</fixed-case>-<fixed-case>NMT</fixed-case> Submission to the <fixed-case>WMT</fixed-case>19 News Translation Task JingyiZhang - Josefvan Genabith + Josefvan Genabith 440–444 This paper describes the DFKI-NMT submission to the WMT19 News translation task. We participated in both English-to-German and German-to-English directions. We trained Transformer models and adopted various techniques for effectively training our models, including data selection, back-translation and in-domain fine-tuning. We give a detailed analysis of the performance of our system. W19-5350 @@ -11828,7 +11828,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat Evaluating Conjunction Disambiguation on <fixed-case>E</fixed-case>nglish-to-<fixed-case>G</fixed-case>erman and <fixed-case>F</fixed-case>rench-to-<fixed-case>G</fixed-case>erman <fixed-case>WMT</fixed-case> 2019 Translation Hypotheses - MajaPopović + MajaPopović 464–469 We present a test set for evaluating an MT system’s capability to translate ambiguous conjunctions depending on the sentence structure. We concentrate on the English conjunction “but” and its French equivalent “mais” which can be translated into two different German conjunctions. We evaluate all English-to-German and French-to-German submissions to the WMT 2019 shared translation task. The evaluation is done mainly automatically, with additional fast manual inspection of unclear cases. All systems almost perfectly recognise the target conjunction “aber”, whereas accuracies for the other target conjunction “sondern” range from 78% to 97%, and the errors are mostly caused by replacing it with the alternative conjunction “aber”. The best performing system for both language pairs is a multilingual Transformer “TartuNLP” system trained on all WMT 2019 language pairs which use the Latin script, indicating that the multilingual approach is beneficial for conjunction disambiguation. As for other system features, such as using synthetic back-translated data, context-aware, hybrid, etc., no particular (dis)advantages can be observed. Qualitative manual inspection of translation hypotheses shown that highly ranked systems generally produce translations with high adequacy and fluency, meaning that these systems are not only capable of capturing the right conjunction whereas the rest of the translation hypothesis is poor. On the other hand, the low ranked systems generally exhibit lower fluency and poor adequacy. W19-5353 @@ -11839,7 +11839,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat The <fixed-case>M</fixed-case>u<fixed-case>C</fixed-case>o<fixed-case>W</fixed-case> Test Suite at <fixed-case>WMT</fixed-case> 2019: Automatically Harvested Multilingual Contrastive Word Sense Disambiguation Test Sets for Machine Translation AlessandroRaganato YvesScherrer - JörgTiedemann + JörgTiedemann 470–480 Supervised Neural Machine Translation (NMT) systems currently achieve impressive translation quality for many language pairs. One of the key features of a correct translation is the ability to perform word sense disambiguation (WSD), i.e., to translate an ambiguous word with its correct sense. Existing evaluation benchmarks on WSD capabilities of translation systems rely heavily on manual work and cover only few language pairs and few word types. We present MuCoW, a multilingual contrastive test suite that covers 16 language pairs with more than 200 thousand contrastive sentence pairs, automatically built from word-aligned parallel corpora and the wide-coverage multilingual sense inventory of BabelNet. We evaluate the quality of the ambiguity lexicons and of the resulting test suite on all submissions from 9 language pairs presented in the WMT19 news shared translation task, plus on other 5 language pairs using NMT pretrained models. The MuCoW test suite is available at http://github.com/Helsinki-NLP/MuCoW. W19-5354 @@ -11863,7 +11863,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat <fixed-case>WMDO</fixed-case>: Fluency-based Word Mover’s Distance for Machine Translation Evaluation JulianChow LuciaSpecia - PranavaMadhyastha + PranavaMadhyastha 494–500 We propose WMDO, a metric based on distance between distributions in the semantic vector space. Matching in the semantic space has been investigated for translation evaluation, but the constraints of a translation’s word order have not been fully explored. Building on the Word Mover’s Distance metric and various word embeddings, we introduce a fragmentation penalty to account for fluency of a translation. This word order extension is shown to perform better than standard WMD, with promising results against other types of metrics. W19-5356 @@ -11893,7 +11893,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat <fixed-case>EED</fixed-case>: Extended Edit Distance Measure for Machine Translation PeterStanchev WeiyueWang - HermannNey + HermannNey 514–520 Over the years a number of machine translation metrics have been developed in order to evaluate the accuracy and quality of machine-generated translations. Metrics such as BLEU and TER have been used for decades. However, with the rapid progress of machine translation systems, the need for better metrics is growing. This paper proposes an extension of the edit distance, which achieves better human correlation, whilst remaining fast, flexible and easy to understand. W19-5359 @@ -11927,7 +11927,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat <fixed-case>NICT</fixed-case>’s Supervised Neural Machine Translation Systems for the <fixed-case>WMT</fixed-case>19 Translation Robustness Task RajDabre - EiichiroSumita + EiichiroSumita 533–536 In this paper we describe our neural machine translation (NMT) systems for Japanese↔English translation which we submitted to the translation robustness task. We focused on leveraging transfer learning via fine tuning to improve translation quality. We used a fairly well established domain adaptation technique called Mixed Fine Tuning (MFT) (Chu et. al., 2017) to improve translation quality for Japanese↔English. We also trained bi-directional NMT models instead of uni-directional ones as the former are known to be quite robust, especially in low-resource scenarios. However, given the noisy nature of the in-domain training data, the improvements we obtained are rather modest. W19-5362 @@ -11945,7 +11945,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat <fixed-case>CUNI</fixed-case> System for the <fixed-case>WMT</fixed-case>19 Robustness Task - JindřichHelcl + JindřichHelcl JindřichLibovický MartinPopel 539–543 @@ -12007,23 +12007,23 @@ One of the references was wrong therefore it is corrected to cite the appropriat Proceedings of the Fourth Conference on Machine Translation (Volume 3: Shared Task Papers, Day 2) W19-54 - OndřejBojar - RajenChatterjee + OndřejBojar + RajenChatterjee ChristianFedermann MarkFishel YvetteGraham BarryHaddow MatthiasHuck - Antonio JimenoYepes + Antonio JimenoYepes PhilippKoehn - AndréMartins + AndréMartins ChristofMonz - MatteoNegri - AurélieNévéol + MatteoNegri + AurélieNévéol MarianaNeves MattPost MarcoTurchi - KarinVerspoor + KarinVerspoor Association for Computational Linguistics
Florence, Italy
August @@ -12036,7 +12036,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat Findings of the <fixed-case>WMT</fixed-case> 2019 Shared Tasks on Quality Estimation - ErickFonseca + ErickFonseca LisaYankovskaya André F. T.Martins MarkFishel @@ -12068,7 +12068,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat CristianGrozea AntonioJimeno Yepes MadeleineKittner - MartinKrallinger + MartinKrallinger NancyMah AurelieNeveol MarianaNeves @@ -12085,7 +12085,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat Findings of the <fixed-case>WMT</fixed-case> 2019 Shared Task on Parallel Corpus Filtering for Low-Resource Conditions PhilippKoehn - FranciscoGuzmán + FranciscoGuzmán VishravChaudhary JuanPino 54–72 @@ -12096,7 +12096,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat <fixed-case>RTM</fixed-case> Stacking Results for Machine Translation Performance Prediction - ErgunBiçici + ErgunBiçici 73–77 We obtain new results using referential translation machines with increased number of learning models in the set of results that are stacked to obtain a better mixture of experts prediction. We combine features extracted from the word-level predictions with the sentence- or document-level features, which significantly improve the results on the training sets but decrease the test set results. W19-5405 @@ -12105,12 +12105,12 @@ One of the references was wrong therefore it is corrected to cite the appropriat Unbabel’s Participation in the <fixed-case>WMT</fixed-case>19 Translation Quality Estimation Shared Task - FabioKepler + FabioKepler JonayTrénous MarcosTreviso MiguelVera AntónioGóis - M. AminFarajian + M. AminFarajian António V.Lopes André F. T.Martins 78–84 @@ -12186,7 +12186,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat Unbabel’s Submission to the <fixed-case>WMT</fixed-case>2019 <fixed-case>APE</fixed-case> Shared Task: <fixed-case>BERT</fixed-case>-Based Encoder-Decoder for Automatic Post-Editing António V.Lopes - M. AminFarajian + M. AminFarajian Gonçalo M.Correia JonayTrénous André F. T.Martins @@ -12202,7 +12202,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat HongfeiXu NicoHerbig AntonioKrüger - Josefvan Genabith + Josefvan Genabith 124–131 In this paper we present an English–German Automatic Post-Editing (APE) system called transference, submitted to the APE Task organized at WMT 2019. Our transference model is based on a multi-encoder transformer architecture. Unlike previous approaches, it (i) uses a transformer encoder block for src, (ii) followed by a transformer decoder block, but without masking, for self-attention on mt, which effectively acts as second encoder combining src –> mt, and (iii) feeds this representation into a final decoder block generating pe. Our model improves over the raw black-box neural machine translation system by 0.9 and 1.0 absolute BLEU points on the WMT 2019 APE development and test set. Our submission ranked 3rd, however compared to the two top systems, performance differences are not statistically significant. W19-5414 @@ -12235,7 +12235,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat <fixed-case>U</fixed-case>d<fixed-case>S</fixed-case> Submission for the <fixed-case>WMT</fixed-case> 19 Automatic Post-Editing Task HongfeiXu QiuhuiLiu - Josefvan Genabith + Josefvan Genabith 145–150 In this paper, we describe our submission to the English-German APE shared task at WMT 2019. We utilize and adapt an NMT architecture originally developed for exploiting context information to APE, implement this in our own transformer model and explore joint training of the APE task with a de-noising encoder. W19-5417 @@ -12247,8 +12247,8 @@ One of the references was wrong therefore it is corrected to cite the appropriat Terminology-Aware Segmentation and Domain Feature for the <fixed-case>WMT</fixed-case>19 Biomedical Translation Task Casimiro PioCarrino BardiaRafieian - Marta R.Costa-jussà - José A. R.Fonollosa + Marta R.Costa-jussà + José A. R.Fonollosa 151–155 In this work, we give a description of the TALP-UPC systems submitted for the WMT19 Biomedical Translation Task. Our proposed strategy is NMT model-independent and relies only on one ingredient, a biomedical terminology list. We first extracted such a terminology list by labelling biomedical words in our training dataset using the BabelNet API. Then, we designed a data preparation strategy to insert the terms information at a token level. Finally, we trained the Transformer model with this terms-informed data. Our best-submitted system ranked 2nd and 3rd for Spanish-English and English-Spanish translation directions, respectively. W19-5418 @@ -12294,7 +12294,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat <fixed-case>BSC</fixed-case> Participation in the <fixed-case>WMT</fixed-case> Translation of Biomedical Abstracts FelipeSoares - MartinKrallinger + MartinKrallinger 175–178 This paper describes the machine translation systems developed by the Barcelona Supercomputing (BSC) team for the biomedical translation shared task of WMT19. Our system is based on Neural Machine Translation unsing the OpenNMT-py toolkit and Transformer architecture. We participated in four translation directions for the English/Spanish and English/Portuguese language pairs. To create our training data, we concatenated several parallel corpora, both from in-domain and out-of-domain sources, as well as terminological resources from UMLS. W19-5422 @@ -12306,7 +12306,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat PauBaquero-Arnal JavierIranzo-Sánchez JorgeCivera - AlfonsJuan + AlfonsJuan 179–184 This paper describes the participation of the MLLP research group of the Universitat Politècnica de València in the WMT 2019 Similar Language Translation Shared Task. We have submitted systems for the Portuguese ↔ Spanish language pair, in both directions. We have submitted systems based on the Transformer architecture as well as an in development novel architecture which we have called 2D alternating RNN. We have carried out domain adaptation through fine-tuning. W19-5423 @@ -12317,7 +12317,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat The <fixed-case>TALP</fixed-case>-<fixed-case>UPC</fixed-case> System for the <fixed-case>WMT</fixed-case> Similar Language Task: Statistical vs Neural Machine Translation MagdalenaBiesialska LluisGuardia - Marta R.Costa-jussà + Marta R.Costa-jussà 185–191 Although the problem of similar language translation has been an area of research interest for many years, yet it is still far from being solved. In this paper, we study the performance of two popular approaches: statistical and neural. We conclude that both methods yield similar results; however, the performance varies depending on the language pair. While the statistical approach outperforms the neural one by a difference of 6 BLEU points for the Spanish-Portuguese language pair, the proposed neural model surpasses the statistical one by a difference of 2 BLEU points for Czech-Polish. In the former case, the language similarity (based on perplexity) is much higher than in the latter case. Additionally, we report negative results for the system combination with back-translation. Our TALP-UPC system submission won 1st place for Czech->Polish and 2nd place for Spanish->Portuguese in the official evaluation of the 1st WMT Similar Language Translation task. W19-5424 @@ -12337,7 +12337,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat Utilizing Monolingual Data in <fixed-case>NMT</fixed-case> for Similar Languages: Submission to Similar Language Translation Task JyotsanaKhatri - PushpakBhattacharyya + PushpakBhattacharyya 197–201 This paper describes our submission to Shared Task on Similar Language Translation in Fourth Conference on Machine Translation (WMT 2019). We submitted three systems for Hindi -> Nepali direction in which we have examined the performance of a RNN based NMT system, a semi-supervised NMT system where monolingual data of both languages is utilized using the architecture by and a system trained with extra synthetic sentences generated using copy of source and target sentences without using any additional monolingual data. W19-5426 @@ -12348,7 +12348,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat Neural Machine Translation: <fixed-case>H</fixed-case>indi-<fixed-case>N</fixed-case>epali Sahinur RahmanLaskar ParthaPakray - SivajiBandyopadhyay + SivajiBandyopadhyay 202–207 With the extensive use of Machine Translation (MT) technology, there is progressively interest in directly translating between pairs of similar languages. Because the main challenge is to overcome the limitation of available parallel data to produce a precise MT output. Current work relies on the Neural Machine Translation (NMT) with attention mechanism for the similar language translation of WMT19 shared task in the context of Hindi-Nepali pair. The NMT systems trained the Hindi-Nepali parallel corpus and tested, analyzed in Hindi ⇔ Nepali translation. The official result declared at WMT19 shared task, which shows that our NMT system obtained Bilingual Evaluation Understudy (BLEU) score 24.6 for primary configuration in Nepali to Hindi translation. Also, we have achieved BLEU score 53.7 (Hindi to Nepali) and 49.1 (Nepali to Hindi) in contrastive system type. W19-5427 @@ -12368,7 +12368,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat Panlingua-<fixed-case>KMI</fixed-case> <fixed-case>MT</fixed-case> System for Similar Language Translation Task at <fixed-case>WMT</fixed-case> 2019 - Atul Kr.Ojha + Atul Kr.Ojha RiteshKumar AkankshaBansal PriyaRani @@ -12382,7 +12382,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat <fixed-case>UDS</fixed-case>–<fixed-case>DFKI</fixed-case> Submission to the <fixed-case>WMT</fixed-case>2019 <fixed-case>C</fixed-case>zech–<fixed-case>P</fixed-case>olish Similar Language Translation Shared Task SantanuPal MarcosZampieri - Josefvan Genabith + Josefvan Genabith 219–223 In this paper we present the UDS-DFKI system submitted to the Similar Language Translation shared task at WMT 2019. The first edition of this shared task featured data from three pairs of similar languages: Czech and Polish, Hindi and Nepali, and Portuguese and Spanish. Participants could choose to participate in any of these three tracks and submit system outputs in any translation direction. We report the results obtained by our system in translating from Czech to Polish and comment on the impact of out-of-domain test data in the performance of our system. UDS-DFKI achieved competitive performance ranking second among ten teams in Czech to Polish translation. W19-5430 @@ -12436,7 +12436,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat Low-Resource Corpus Filtering Using Multilingual Sentence Embeddings VishravChaudhary YuqingTang - FranciscoGuzmán + FranciscoGuzmán HolgerSchwenk PhilippKoehn 261–266 @@ -12457,7 +12457,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat Webinterpret Submission to the <fixed-case>WMT</fixed-case>2019 Shared Task on Parallel Corpus Filtering - JesúsGonzález-Rubio + JesúsGonzález-Rubio 271–276 This document describes the participation of Webinterpret in the shared task on parallel corpus filtering at the Fourth Conference on Machine Translation (WMT 2019). Here, we describe the main characteristics of our approach and discuss the results obtained on the data sets published for the shared task. W19-5437 @@ -12477,8 +12477,8 @@ One of the references was wrong therefore it is corrected to cite the appropriat Filtering of Noisy Parallel Corpora Based on Hypothesis Generation ZuzannaParcheta - GermánSanchis-Trilles - FranciscoCasacuberta + GermánSanchis-Trilles + FranciscoCasacuberta 282–288 The filtering task of noisy parallel corpora in WMT2019 aims to challenge participants to create filtering methods to be useful for training machine translation systems. In this work, we introduce a noisy parallel corpora filtering system based on generating hypotheses by means of a translation model. We train translation models in both language pairs: Nepali–English and Sinhala–English using provided parallel corpora. We select the training subset for three language pairs (Nepali, Sinhala and Hindi to English) jointly using bilingual cross-entropy selection to create the best possible translation model for both language pairs. Once the translation models are trained, we translate the noisy corpora and generate a hypothesis for each sentence pair. We compute the smoothed BLEU score between the target sentence and generated hypothesis. In addition, we apply several rules to discard very noisy or inadequate sentences which can lower the translation score. These heuristics are based on sentence length, source and target similarity and source language detection. We compare our results with the baseline published on the shared task website, which uses the Zipporah model, over which we achieve significant improvements in one of the conditions in the shared task. The designed filtering system is domain independent and all experiments are conducted using neural machine translation. W19-5439 @@ -12489,7 +12489,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat Parallel Corpus Filtering Based on Fuzzy String Matching SukantaSen AsifEkbal - PushpakBhattacharyya + PushpakBhattacharyya 289–293 In this paper, we describe the IIT Patna’s submission to WMT 2019 shared task on parallel corpus filtering. This shared task asks the participants to develop methods for scoring each parallel sentence from a given noisy parallel corpus. Quality of the scoring method is judged based on the quality of SMT and NMT systems trained on smaller set of high-quality parallel sentences sub-sampled from the original noisy corpus. This task has two language pairs. We submit for both the Nepali-English and Sinhala-English language pairs. We define fuzzy string matching score between English and the translated (into English) source based on Levenshtein distance. Based on the scores, we sub-sample two sets (having 1 million and 5 millions English tokens) of parallel sentences from each parallel corpus, and train SMT systems for development purpose only. The organizers publish the official evaluation using both SMT and NMT on the final official test set. Total 10 teams participated in the shared task and according the official evaluation, our scoring method obtains 2nd position in the team ranking for 1-million NepaliEnglish NMT and 5-million Sinhala-English NMT categories. W19-5440 @@ -12500,7 +12500,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat The <fixed-case>U</fixed-case>niversity of <fixed-case>H</fixed-case>elsinki Submission to the <fixed-case>WMT</fixed-case>19 Parallel Corpus Filtering Task RaúlVázquez UmutSulubacak - JörgTiedemann + JörgTiedemann 294–300 This paper describes the University of Helsinki Language Technology group’s participation in the WMT 2019 parallel corpus filtering task. Our scores were produced using a two-step strategy. First, we individually applied a series of filters to remove the ‘bad’ quality sentences. Then, we produced scores for each sentence by weighting these features with a classification model. This methodology allowed us to build a simple and reliable system that is easily adaptable to other language pairs. W19-5441 @@ -12541,7 +12541,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat AbhishekSainani NitinRamrakhiyani SachinPawar - Girish KPalshikar + Girish KPalshikar SmitaGhaisas 8–13 W19-5502 @@ -12550,7 +12550,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat Rationale Classification for Educational Trading Platforms AnnieYing - PabloDuboue + PabloDuboue 14–20 W19-5503 ying-duboue-2019-rationale @@ -12598,7 +12598,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat Learning to Learn Sales Prediction with Social Media Sentiment ZhaojiangLin AndreaMadotto - Genta IndraWinata + Genta IndraWinata ZihanLiu YanXu CongGao @@ -12711,7 +12711,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat <fixed-case>HITS</fixed-case>-<fixed-case>SBD</fixed-case> at the <fixed-case>F</fixed-case>in<fixed-case>SBD</fixed-case> Task: Machine Learning vs. Rule-based Sentence Boundary Detection MehwishFatima - Mark-ChristophMueller + Mark-ChristophMueller 115–121 W19-5520 fatima-mueller-2019-hits @@ -12721,7 +12721,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat MingyuWan RongXiang EmmanueleChersoni - NataliaKlyueva + NataliaKlyueva KathleenAhrens BinMiao DavidBroadstock @@ -12747,7 +12747,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat W19-56 MahmoudEl-Haj PaulRayson - EricAtwell + EricAtwell LamaAlsudias Association for Computational Linguistics
Cardiff, United Kingdom
@@ -12829,7 +12829,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat Crisis Detection from <fixed-case>A</fixed-case>rabic Tweets AlaaAlharbi - MarkLee + MarkLee 72–79 W19-5609 alharbi-lee-2019-crisis @@ -12837,7 +12837,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat The Design of the <fixed-case>S</fixed-case>au<fixed-case>LTC</fixed-case> application for the <fixed-case>E</fixed-case>nglish-<fixed-case>A</fixed-case>rabic Learner Translation Corpus MahaAl-Harthi - AmalAlsaif + AmalAlsaif 80–88 W19-5610 al-harthi-alsaif-2019-design @@ -12964,10 +12964,10 @@ One of the references was wrong therefore it is corrected to cite the appropriat Proceedings of the 5th Workshop on Semantic Deep Learning (SemDeep-5) W19-58 - LuisEspinosa-Anke + LuisEspinosa-Anke ThierryDeclerck DagmarGromann - JoseCamacho-Collados + JoseCamacho-Collados Mohammad TaherPilehvar Association for Computational Linguistics
Macau, China
@@ -12982,14 +12982,14 @@ One of the references was wrong therefore it is corrected to cite the appropriat <fixed-case>LIAAD</fixed-case> at <fixed-case>S</fixed-case>em<fixed-case>D</fixed-case>eep-5 Challenge: Word-in-Context (<fixed-case>W</fixed-case>i<fixed-case>C</fixed-case>) DanielLoureiro - AlípioJorge + AlípioJorge 1–5 W19-5801 loureiro-jorge-2019-liaad <fixed-case>LIMSI</fixed-case>-<fixed-case>MULTISEM</fixed-case> at the <fixed-case>IJCAI</fixed-case> <fixed-case>S</fixed-case>em<fixed-case>D</fixed-case>eep-5 <fixed-case>W</fixed-case>i<fixed-case>C</fixed-case> Challenge: Context Representations for Word Usage Similarity Estimation - AinaGarí Soler + AinaGarí Soler MariannaApidianaki AlexandreAllauzen 6–11 @@ -13027,9 +13027,9 @@ One of the references was wrong therefore it is corrected to cite the appropriat Extending Neural Question Answering with Linguistic Input Features FabianHommel - PhilippCimiano + PhilippCimiano MatthiasOrlikowski - MatthiasHartung + MatthiasHartung 31–39 W19-5806 hommel-etal-2019-extending @@ -13040,7 +13040,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat ValerioPiccioni VevakeBalaraman MarcoGuerini - BernardoMagnini + BernardoMagnini 40–49 W19-5807 magnolini-etal-2019-use @@ -13069,11 +13069,11 @@ One of the references was wrong therefore it is corrected to cite the appropriat Proceedings of the 20th Annual SIGdial Meeting on Discourse and Dialogue W19-59 SatoshiNakamura - MilicaGasic + MilicaGasic IngridZukerman GabrielSkantze MikioNakano - AlexandrosPapangelis + AlexandrosPapangelis StefanUltes KoichiroYoshino Association for Computational Linguistics @@ -13120,7 +13120,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat Few-Shot Dialogue Generation Without Annotated Data: A Transfer Learning Approach IgorShalyminov - SungjinLee + SungjinLee ArashEshghi OliverLemon 32–39 @@ -13133,7 +13133,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat <fixed-case>SIM</fixed-case>: A Slot-Independent Neural Model for Dialogue State Tracking ChenguangZhu MichaelZeng - XuedongHuang + XuedongHuang 40–45 Dialogue state tracking is an important component in task-oriented dialogue systems to identify users’ goals and requests as a dialogue proceeds. However, as most previous models are dependent on dialogue slots, the model complexity soars when the number of slots increases. In this paper, we put forward a slot-independent neural model (SIM) to track dialogue states while keeping the model complexity invariant to the number of dialogue slots. The model utilizes attention mechanisms between user utterance and system actions. SIM achieves state-of-the-art results on WoZ and DSTC2 tasks, with only 20% of the model size of previous models. W19-5905 @@ -13185,7 +13185,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat Capturing Dialogue State Variable Dependencies with an Energy-based Neural Dialogue State Tracker Anh DuongTrinh Robert J.Ross - John D.Kelleher + John D.Kelleher 75–84 Dialogue state tracking requires the population and maintenance of a multi-slot frame representation of the dialogue state. Frequently, dialogue state tracking systems assume independence between slot values within a frame. In this paper we argue that treating the prediction of each slot value as an independent prediction task may ignore important associations between the slot values, and, consequently, we argue that treating dialogue state tracking as a structured prediction problem can help to improve dialogue state tracking performance. To support this argument, the research presented in this paper is structured into three stages: (i) analyzing variable dependencies in dialogue data; (ii) applying an energy-based methodology to model dialogue state tracking as a structured prediction task; and (iii) evaluating the impact of inter-slot relationships on model performance. Overall we demonstrate that modelling the associations between target slots with an energy-based formalism improves dialogue state tracking performance in a number of ways. W19-5910 @@ -13195,7 +13195,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat Leveraging Non-Conversational Tasks for Low Resource Slot Filling: Does it help? SamuelLouvan - BernardoMagnini + BernardoMagnini 85–91 Slot filling is a core operation for utterance understanding in task-oriented dialogue systems. Slots are typically domain-specific, and adding new domains to a dialogue system involves data and time-intensive processes. A popular technique to address the problem is transfer learning, where it is assumed the availability of a large slot filling dataset for the source domain, to be used to help slot filling on the target domain, with fewer data. In this work, instead, we propose to leverage source tasks based on semantically related non-conversational resources (e.g., semantic sequence tagging datasets), as they are both cheaper to obtain and reusable to several slot filling domains. We show that using auxiliary non-conversational tasks in a multi-task learning setup consistently improves low resource slot filling performance. W19-5911 @@ -13207,7 +13207,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat AlexandrosPapangelis Yi-ChiaWang PieroMolino - GokhanTur + GokhanTur 92–102 Some of the major challenges in training conversational agents include the lack of large-scale data of real-world complexity, defining appropriate evaluation measures, and managing meaningful conversations across many topics over long periods of time. Moreover, most works tend to assume that the conversational agent’s environment is stationary, a somewhat strong assumption. To remove this assumption and overcome the lack of data, we take a step away from the traditional training pipeline and model the conversation as a stochastic collaborative game. Each agent (player) has a role (“assistant”, “tourist”, “eater”, etc.) and their own objectives, and can only interact via language they generate. Each agent, therefore, needs to learn to operate optimally in an environment with multiple sources of uncertainty (its own LU and LG, the other agent’s LU, Policy, and LG). In this work, we present the first complete attempt at concurrently training conversational agents that communicate only via self-generated language and show that they outperform supervised and deep learning baselines. W19-5912 @@ -13227,7 +13227,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat Spoken Conversational Search for General Knowledge - Lina M.Rojas Barahona + Lina M.Rojas Barahona PascalBellec BenoitBesset MartinhoDossantos @@ -13235,7 +13235,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat MunshiAsadullah OlivierLeblouch Jeanyves.Lancien - GeraldineDamnati + GeraldineDamnati EmmanuelMory FredericHerledan 110–113 @@ -13249,7 +13249,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat Jean-LeonBouraoui SoniaLe Meitour RomainCarbou - Lina M.Rojas Barahona + Lina M.Rojas Barahona VincentLemaire 114–117 We present Graph2Bots, a tool for assisting conversational agent designers. It extracts a graph representation from human-human conversations by using unsupervised learning. The generated graph contains the main stages of the dialogue and their inner transitions. The graphical user interface (GUI) then allows graph editing. @@ -13273,7 +13273,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat SemihYavuz AbhinavRastogi Guan-LinChao - DilekHakkani-Tur + DilekHakkani-Tur 122–132 Recent advances in neural sequence-to-sequence models have led to promising results for several language generation-based tasks, including dialogue response generation, summarization, and machine translation. However, these models are known to have several problems, especially in the context of chit-chat based dialogue systems: they tend to generate short and dull responses that are often too generic. Furthermore, these models do not ground conversational responses on knowledge and facts, resulting in turns that are not accurate, informative and engaging for the users. In this paper, we propose and experiment with a series of response generation models that aim to serve in the general scenario where in addition to the dialogue context, relevant unstructured external knowledge in the form of text is also assumed to be available for models to harness. Our proposed approach extends pointer-generator networks (See et al., 2017) by allowing the decoder to hierarchically attend and copy from external knowledge in addition to the dialogue context. We empirically show the effectiveness of the proposed model compared to several baselines including (Ghazvininejadet al., 2018; Zhang et al., 2018) through both automatic evaluation metrics and human evaluation on ConvAI2 dataset. W19-5917 @@ -13298,7 +13298,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat XinnuoXu YizheZhang LarsLiden - SungjinLee + SungjinLee 143–154 Although the data-driven approaches of some recent bot building platforms make it possible for a wide range of users to easily create dialogue systems, those platforms don’t offer tools for quickly identifying which log dialogues contain problems. This is important since corrections to log dialogues provide a means to improve performance after deployment. A log dialogue ranker, which ranks problematic dialogues higher, is an essential tool due to the sheer volume of log dialogues that could be generated. However, training a ranker typically requires labelling a substantial amount of data, which is not feasible for most users. In this paper, we present a novel unsupervised approach for dialogue ranking using GANs and release a corpus of labelled dialogues for evaluation and comparison with supervised methods. The evaluation result shows that our method compares favorably to supervised methods without any labelled data. W19-5919 @@ -13336,7 +13336,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat HuXu BingLiu HuaixiuZheng - GokhanTur + GokhanTur 178–187 This paper proposes a novel end-to-end architecture for task-oriented dialogue systems. It is based on a simple and practical yet very effective sequence-to-sequence approach, where language understanding and state tracking tasks are modeled jointly with a structured copy-augmented sequential decoder and a multi-label decoder for each slot. The policy engine and language generation tasks are modeled jointly following that. The copy-augmented sequential decoder deals with new or unknown values in the conversation, while the multi-label decoder combined with the sequential decoder ensures the explicit assignment of values to slots. On the generation part, slot binary classifiers are used to improve performance. This architecture is scalable to real-world scenarios and is shown through an empirical evaluation to achieve state-of-the-art performance on both the Cambridge Restaurant dataset and the Stanford in-car assistant dataset. W19-5922 @@ -13346,7 +13346,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat <fixed-case>F</fixed-case>riends<fixed-case>QA</fixed-case>: Open-Domain Question Answering on <fixed-case>TV</fixed-case> Show Transcripts ZhengzheYang - Jinho D.Choi + Jinho D.Choi 188–197 This paper presents FriendsQA, a challenging question answering dataset that contains 1,222 dialogues and 10,610 open-domain questions, to tackle machine comprehension on everyday conversations. Each dialogue, involving multiple speakers, is annotated with several types of questions regarding the dialogue contexts, and the answers are annotated with certain spans in the dialogue. A series of crowdsourcing tasks are conducted to ensure good annotation quality, resulting a high inter-annotator agreement of 81.82%. A comprehensive annotation analytics is provided for a deeper understanding in this dataset. Three state-of-the-art QA systems are experimented, R-Net, QANet, and BERT, and evaluated on this dataset. BERT in particular depicts promising results, an accuracy of 74.2% for answer utterance selection and an F1-score of 64.2% for answer span selection, suggesting that the FriendsQA task is hard yet has a great potential of elevating QA research on multiparty dialogue to another level. W19-5923 @@ -13355,7 +13355,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat Foundations of Collaborative Task-Oriented Dialogue: What’s in a Slot? - PhilipCohen + PhilipCohen 198–209 In this paper, we examine the foundations of task-oriented dialogues, in which systems are requested to perform tasks for humans. We argue that the way this dialogue task has been framed has limited its applicability to processing simple requests with atomic “slot-fillers”. However, real task-oriented dialogues can contain more complex utterances that provide non-atomic constraints on slot values. For example, in response to the system’s question “What time do you want me to reserve the restaurant?”, a user should be able to say “the earliest time available,” which cannot be handled by classic “intent + slots” approaches that do not incorporate expressive logical form meaning representations. Furthermore, situations for which it would be desirable to build task-oriented dialogue systems, e.g., to engage in mixed-initiative, collaborative or multiparty dialogues, will require a more general approach. In order to overcome these limitations and to provide such an approach, we give a logical analysis of the “intent+slot” dialogue setting using a modal logic of intention and including a more expansive notion of “dialogue state”. Finally, we briefly discuss our program of research to build a next generation of plan-based dialogue systems that goes beyond “intent + slots”. W19-5924 @@ -13379,7 +13379,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat Guan-LinChao AbhinavRastogi SemihYavuz - DilekHakkani-Tur + DilekHakkani-Tur JindongChen IanLane 215–225 @@ -13402,7 +13402,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat A Quantitative Analysis of Patients’ Narratives of Heart Failure SabitaAcharya BarbaraDi Eugenio - AndrewBoyd + AndrewBoyd RichardCameron KarenDunn Lopez PamelaMartyn-Nemeth @@ -13421,7 +13421,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat <fixed-case>TDD</fixed-case>iscourse: A Dataset for Discourse-Level Temporal Ordering of Events AakankshaNaik LukeBreitfeller - CarolynRose + CarolynRose 239–249 Prior work on temporal relation classification has focused extensively on event pairs in the same or adjacent sentences (local), paying scant attention to discourse-level (global) pairs. This restricts the ability of systems to learn temporal links between global pairs, since reliance on local syntactic features suffices to achieve reasonable performance on existing datasets. However, systems should be capable of incorporating cues from document-level structure to assign temporal relations. In this work, we take a first step towards discourse-level temporal ordering by creating TDDiscourse, the first dataset focusing specifically on temporal links between event pairs which are more than one sentence apart. We create TDDiscourse by augmenting TimeBank-Dense, a corpus of English news articles, manually annotating global pairs that cannot be inferred automatically from existing annotations. Our annotations double the number of temporal links in TimeBank-Dense, while possessing several desirable properties such as focusing on long-distance pairs and not being automatically inferable. We adapt and benchmark the performance of three state-of-the-art models on TDDiscourse and observe that existing systems indeed find discourse-level temporal ordering harder. W19-5929 @@ -13431,7 +13431,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat Real Life Application of a Question Answering System Using <fixed-case>BERT</fixed-case> Language Model FrancescaAlloatti - LuigiDi Caro + LuigiDi Caro GianpieroSportelli 250–253 It is often hard to apply the newest advances in research to real life scenarios. They usually require the resolution of some specific task applied to a restricted domain, all the while providing small amounts of data to begin with. In this study we apply one of the newest innovations in Deep Learning to a task of text classification. We created a question answering system in Italian that provides information about a specific subject, e-invoicing and digital billing. Italy recently introduced a new legislation about e-invoicing and people have some legit doubts, therefore a large share of professionals could benefit from this tool. @@ -13456,7 +13456,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat AbhishekSethi SanchitAgarwal TagyoungChung - DilekHakkani-Tur + DilekHakkani-Tur 264–273 Dialog state tracking is used to estimate the current belief state of a dialog given all the preceding conversation. Machine reading comprehension, on the other hand, focuses on building systems that read passages of text and answer questions that require some understanding of passages. We formulate dialog state tracking as a reading comprehension task to answer the question what is the state of the current dialog? after reading conversational context. In contrast to traditional state tracking methods where the dialog state is often predicted as a distribution over a closed set of all the possible slot values within an ontology, our method uses a simple attention-based neural network to point to the slot values within the conversation. Experiments on MultiWOZ-2.0 cross-domain dialog dataset show that our simple system can obtain similar accuracies compared to the previous more complex methods. By exploiting recent advances in contextual word embeddings, adding a model that explicitly tracks whether a slot value should be carried over to the next turn, and combining our method with a traditional joint state tracking method that relies on closed set vocabulary, we can obtain a joint-goal accuracy of 47.33% on the standard test split, exceeding current state-of-the-art by 11.75%**. W19-5932 @@ -13577,7 +13577,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat A Dynamic Strategy Coach for Effective Negotiation YihengZhou HeHe - Alan WBlack + Alan WBlack YuliaTsvetkov 367–378 Negotiation is a complex activity involving strategic reasoning, persuasion, and psychology. An average person is often far from an expert in negotiation. Our goal is to assist humans to become better negotiators through a machine-in-the-loop approach that combines machine’s advantage at data-driven decision-making and human’s language generation ability. We consider a bargaining scenario where a seller and a buyer negotiate the price of an item for sale through a text-based dialogue. Our negotiation coach monitors messages between them and recommends strategies in real time to the seller to get a better deal (e.g., “reject the proposal and propose a price”, “talk about your personal experience with the product”). The best strategy largely depends on the context (e.g., the current price, the buyer’s attitude). Therefore, we first identify a set of negotiation strategies, then learn to predict the best strategy in a given dialogue context from a set of human-human bargaining dialogues. Evaluation on human-human dialogues shows that our coach increases the profits of the seller by almost 60%. @@ -13592,7 +13592,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat TianchengZhao AmyPavel MaxineEskenazi - JeffreyBigham + JeffreyBigham 379–391 The aim of this paper is to mitigate the shortcomings of automatic evaluation of open-domain dialog systems through multi-reference evaluation. Existing metrics have been shown to correlate poorly with human judgement, particularly in open-domain dialog. One alternative is to collect human annotations for evaluation, which can be expensive and time consuming. To demonstrate the effectiveness of multi-reference evaluation, we augment the test set of DailyDialog with multiple references. A series of experiments show that the use of multiple references results in improved correlation between several automatic metrics and human judgement for both the quality and the diversity of system output. W19-5944 @@ -13614,7 +13614,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat Dialogue Act Classification in Team Communication for Robot Assisted Disaster Response TatianaAnikina - IvanaKruijff-Korbayova + IvanaKruijff-Korbayova 399–410 We present the results we obtained on the classification of dialogue acts in a corpus of human-human team communication in the domain of robot-assisted disaster response. We annotated dialogue acts according to the ISO 24617-2 standard scheme and carried out experiments using the FastText linear classifier as well as several neural architectures, including feed-forward, recurrent and convolutional neural models with different types of embeddings, context and attention mechanism. The best performance was achieved with a ”Divide & Merge” architecture presented in the paper, using trainable GloVe embeddings and a structured dialogue history. This model learns from the current utterance and the preceding context separately and then combines the two generated representations. Average accuracy of 10-fold cross-validation is 79.8%, F-score 71.8%. W19-5946 @@ -13624,8 +13624,8 @@ One of the references was wrong therefore it is corrected to cite the appropriat Multi-Task Learning of System Dialogue Act Selection for Supervised Pretraining of Goal-Oriented Dialogue Policies SarahMcLeod - IvanaKruijff-Korbayova - BerndKiefer + IvanaKruijff-Korbayova + BerndKiefer 411–417 This paper describes the use of Multi-Task Neural Networks (NNs) for system dialogue act selection. These models leverage the representations learned by the Natural Language Understanding (NLU) unit to enable robust initialization/bootstrapping of dialogue policies from medium sized initial data sets. We evaluate the models on two goal-oriented dialogue corpora in the travel booking domain. Results show the proposed models improve over models trained without knowledge of NLU tasks. W19-5947 @@ -13686,7 +13686,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat JordanLachler AlexisPalmer LaneSchwartz - MiikkaSilfverberg + MiikkaSilfverberg Association for Computational Linguistics
Honolulu
February @@ -13724,7 +13724,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat
<fixed-case>OCR</fixed-case> evaluation tools for the 21st century - Eddie AntonioSantos + Eddie AntonioSantos 23–27 W19-6004 santos-2019-ocr @@ -13733,7 +13733,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat Handling cross-cutting properties in automatic inference of lexical classes: A case study of Chintang OlgaZamaraeva KristenHowell - Emily M.Bender + Emily M.Bender 28–38 W19-6005 zamaraeva-etal-2019-handling @@ -13749,7 +13749,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat Seeing more than whitespace — Tokenisation and disambiguation in a <fixed-case>N</fixed-case>orth <fixed-case>S</fixed-case>ámi grammar checker LindaWiechetek - Sjur NørstebøMoshagen + Sjur NørstebøMoshagen Kevin BrubeckUnhammer 46–55 W19-6007 @@ -13773,10 +13773,10 @@ One of the references was wrong therefore it is corrected to cite the appropriat A biscriptual morphological transducer for <fixed-case>C</fixed-case>rimean <fixed-case>T</fixed-case>atar - Francis M.Tyers - JonathanWashington + Francis M.Tyers + JonathanWashington DaryaKavitskaya - MemduhGökırmak + MemduhGökırmak NickHowell RemziyeBerberova 74–80 @@ -13785,7 +13785,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat Improving Low-Resource Morphological Learning with Intermediate Forms from Finite State Transducers - SarahMoeller + SarahMoeller GhazalehKazeminejad AndrewCowell MansHulden @@ -13809,7 +13809,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat Proceedings of the 22nd Nordic Conference on Computational Linguistics W19-61 MareikeHartmann - BarbaraPlank + BarbaraPlank Linköping University Electronic Press
Turku, Finland
September–October @@ -13823,7 +13823,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat Comparison between <fixed-case>NMT</fixed-case> and <fixed-case>PBSMT</fixed-case> Performance for Translating Noisy User-Generated Content José CarlosRosales Núñez - DjaméSeddah + DjaméSeddah GuillaumeWisniewski 2–14 This work compares the performances achieved by Phrase-Based Statistical Machine Translation systems (PB-SMT) and attention-based Neuronal Machine Translation systems (NMT) when translating User Generated Content (UGC), as encountered in social medias, from French to English. We show that, contrary to what could be expected, PBSMT outperforms NMT when translating non-canonical inputs. Our error analysis uncovers the specificities of UGC that are problematic for sequential NMT architectures and suggests new avenue for improving NMT models. @@ -13900,8 +13900,8 @@ One of the references was wrong therefore it is corrected to cite the appropriat Comparing linear and neural models for competitive <fixed-case>MWE</fixed-case> identification Hazem AlSaied - MarieCandito - MathieuConstant + MarieCandito + MathieuConstant 86–96 In this paper, we compare the use of linear versus neural classifiers in a greedy transition system for MWE identification. Both our linear and neural models achieve a new state-of-the-art on the PARSEME 1.1 shared task data sets, comprising 20 languages. Surprisingly, our best model is a simple feed-forward network with one hidden layer, although more sophisticated (recurrent) architectures were tested. The feedback from this study is that tuning a SVM is rather straightforward, whereas tuning our neural system revealed more challenging. Given the number of languages and the variety of linguistic phenomena to handle for the MWE identification task, we have designed an accurate tuning procedure, and we show that hyperparameters are better selected by using a majority-vote within random search configurations rather than a simple best configuration selection. Although the performance is rather good (better than both the best shared task system and the average of the best per-language results), further work is needed to improve the generalization power, especially on unseen MWEs. W19-6109 @@ -13909,8 +13909,8 @@ One of the references was wrong therefore it is corrected to cite the appropriat Syntax-based identification of light-verb constructions - Silvio RicardoCordeiro - MarieCandito + Silvio RicardoCordeiro + MarieCandito 97–104 This paper analyzes results on light-verb construction identification from the PARSEME shared-task, distinguishing between simple cases that could be directly learned from training data from more complex cases that require an extra level of semantic processing. We propose a simple baseline that beats the state of the art for the simple cases, and couple it with another simple baseline to handle the complex cases. We additionally present two other classifiers based on a richer set of features, with results surpassing the state of the art by 8 percentage points. W19-6110 @@ -13920,7 +13920,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat Comparing the Performance of Feature Representations for the Categorization of the Easy-to-Read Variety vs Standard Language MarinaSantini BenjaminDanielsson - ArneJönsson + ArneJönsson 105–114 We explore the effectiveness of four feature representations – bag-of-words, word embeddings, principal components and autoencoders – for the binary categorization of the easy-to-read variety vs standard language. Standard language refers to the ordinary language variety used by a population as a whole or by a community, while the “easy-to-read” variety is a simpler (or a simplified) version of the standard language. We test the efficiency of these feature representations on three corpora, which differ in size, class balance, unit of analysis, language and topic. We rely on supervised and unsupervised machine learning algorithms. Results show that bag-of-words is a robust and straightforward feature representation for this task and performs well in many experimental settings. Its performance is equivalent or equal to the performance achieved with principal components and autoencorders, whose preprocessing is however more time-consuming. Word embeddings are less accurate than the other feature representations for this classification task. W19-6111 @@ -13939,7 +13939,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat Annotating evaluative sentences for sentiment analysis: a dataset for <fixed-case>N</fixed-case>orwegian PetterMæhlum JeremyBarnes - LiljaØvrelid + LiljaØvrelid ErikVelldal 121–130 This paper documents the creation of a large-scale dataset of evaluative sentences – i.e. both subjective and objective sentences that are found to be sentiment-bearing – based on mixed-domain professional reviews from various news-sources. We present both the annotation scheme and first results for classification experiments. The effort represents a step toward creating a Norwegian dataset for fine-grained sentiment analysis. @@ -14005,7 +14005,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat Lexicon information in neural sentiment analysis: a multi-task learning approach JeremyBarnes SamiaTouileb - LiljaØvrelid + LiljaØvrelid ErikVelldal 175–186 This paper explores the use of multi-task learning (MTL) for incorporating external knowledge in neural models. Specifically, we show how MTL can enable a BiLSTM sentiment classifier to incorporate information from sentiment lexicons. Our MTL set-up is shown to improve model performance (compared to a single-task set-up) on both English and Norwegian sentence-level sentiment datasets. The paper also introduces a new sentiment lexicon for Norwegian. @@ -14025,7 +14025,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat Political Stance in <fixed-case>D</fixed-case>anish RasmusLehmann - LeonDerczynski + LeonDerczynski 197–207 The task of stance detection consists of classifying the opinion within a text towards some target. This paper seeks to generate a dataset of quotes from Danish politicians, label this dataset to allow the task of stance detection to be performed, and present annotation guidelines to allow further expansion of the generated dataset. Furthermore, three models based on an LSTM architecture are designed, implemented and optimized to perform the task of stance detection for the generated dataset. Experiments are performed using conditionality and bi-directionality for these models, and using either singular word embeddings or averaged word embeddings for an entire quote, to determine the optimal model design. The simplest model design, applying neither conditionality or bi-directionality, and averaged word embeddings across quotes, yields the strongest results. Furthermore, it was found that inclusion of the quotes politician, and the party affiliation of the quoted politician, greatly improved performance of the strongest model. W19-6121 @@ -14035,7 +14035,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat Joint Rumour Stance and Veracity Prediction Anders EdelboLillie Emil RefsgaardMiddelboe - LeonDerczynski + LeonDerczynski 208–221 The net is rife with rumours that spread through microblogs and social media. Not all the claims in these can be verified. However, recent work has shown that the stances alone that commenters take toward claims can be sufficiently good indicators of claim veracity, using e.g. an HMM that takes conversational stance sequences as the only input. Existing results are monolingual (English) and mono-platform (Twitter). This paper introduces a stance-annotated Reddit dataset for the Danish language, and describes various implementations of stance classification models. Of these, a Linear SVM provides predicts stance best, with 0.76 accuracy / 0.42 macro F1. Stance labels are then used to predict veracity across platforms and also across languages, training on conversations held in one language and using the model on conversations held in another. In our experiments, monolinugal scores reach stance-based veracity accuracy of 0.83 (F1 0.68); applying the model across languages predicts veracity of claims with an accuracy of 0.82 (F1 0.67). This demonstrates the surprising and powerful viability of transferring stance-based veracity prediction across languages. W19-6122 @@ -14075,7 +14075,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat Matching Keys and Encrypted Manuscripts EvaPettersson - BeataMegyesi + BeataMegyesi 253–261 Historical cryptology is the study of historical encrypted messages aiming at their decryption by analyzing the mathematical, linguistic and other coding patterns and their historical context. In libraries and archives we can find quite a lot of ciphers, as well as keys describing the method used to transform the plaintext message into a ciphertext. In this paper, we present work on automatically mapping keys to ciphers to reconstruct the original plaintext message, and use language models generated from historical texts to guess the underlying plaintext language. W19-6126 @@ -14104,7 +14104,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat AnttiSuni HandeCelikkanat SofoklisKakouros - JörgTiedemann + JörgTiedemann MarttiVainio 281–290 In this paper we introduce a new natural language processing dataset and benchmark for predicting prosodic prominence from written text. To our knowledge this will be the largest publicly available dataset with prosodic labels. We describe the dataset construction and the resulting benchmark dataset in detail and train a number of different models ranging from feature-based classifiers to neural network systems for the prediction of discretized prosodic prominence. We show that pre-trained contextualized word representations from BERT outperform the other models even with less than 10% of the training data. Finally we discuss the dataset in light of the results and point to future research and plans for further improving both the dataset and methods of predicting prosodic prominence from text. The dataset and the code for the models will be made publicly available. @@ -14135,7 +14135,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat Ensembles of Neural Morphological Inflection Models IlmariKylliäinen - MiikkaSilfverberg + MiikkaSilfverberg 304–309 We investigate different ensemble learning techniques for neural morphological inflection using bidirectional LSTM encoder-decoder models with attention. We experiment with weighted and unweighted majority voting and bagging. We find that all investigated ensemble methods lead to improved accuracy over a baseline of a single model. However, contrary to expectation based on earlier work by Najafi et al. (2018) and Silfverberg et al. (2017), weighting does not deliver clear benefits. Bagging was found to underperform plain voting ensembles in general. W19-6132 @@ -14194,7 +14194,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat Bornholmsk Natural Language Processing: Resources and Tools - LeonDerczynski + LeonDerczynski Alex SpeedKjeldsen 338–344 This paper introduces language processing resources and tools for Bornholmsk, a language spoken on the island of Bornholm, with roots in Danish and closely related to Scanian. This presents an overview of the language and available data, and the first NLP models for this living, minority Nordic language. Sammenfattnijng på borrijnholmst: Dæjnna artikkelijn introduserer natursprågsresurser å varktoi for borrijnholmst, ed språg a dær snakkes på ön Borrijnholm me rødder i danst å i nær familia me skånst. Artikkelijn gjer ed âuersyn âuer språged å di datan som fijnnes, å di fosste NLP modællarna for dætta læwenes nordiska minnretâlsspråged. @@ -14217,7 +14217,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat AndreKåsen AndersNøklestad KristinHagen - JoelPriestley + JoelPriestley 350–355 This paper describes an evaluation of five data-driven part-of-speech (PoS) taggers for spoken Norwegian. The taggers all rely on different machine learning mechanisms: decision trees, hidden Markov models (HMMs), conditional random fields (CRFs), long-short term memory networks (LSTMs), and convolutional neural networks (CNNs). We go into some of the challenges posed by the task of tagging spoken, as opposed to written, language, and in particular a wide range of dialects as is found in the recordings of the LIA (Language Infrastructure made Accessible) project. The results show that the taggers based on either conditional random fields or neural networks perform much better than the rest, with the LSTM tagger getting the highest score. W19-6140 @@ -14227,7 +14227,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat The Lacunae of <fixed-case>D</fixed-case>anish Natural Language Processing AndreasKirkedal BarbaraPlank - LeonDerczynski + LeonDerczynski NatalieSchluter 356–362 Danish is a North Germanic language spoken principally in Denmark, a country with a long tradition of technological and scientific innovation. However, the language has received relatively little attention from a technological perspective. In this paper, we review Natural Language Processing (NLP) research, digital resources and tools which have been developed for Danish. We find that availability of models and tools is limited, which calls for work that lifts Danish NLP a step closer to the privileged languages. Dansk abstrakt: Dansk er et nordgermansk sprog, talt primært i kongeriget Danmark, et land med stærk tradition for teknologisk og videnskabelig innovation. Det danske sprog har imidlertid været genstand for relativt begrænset opmærksomhed, teknologisk set. I denne artikel gennemgår vi sprogteknologi-forskning, -ressourcer og -værktøjer udviklet for dansk. Vi konkluderer at der eksisterer et fåtal af modeller og værktøjer, hvilket indbyder til forskning som løfter dansk sprogteknologi i niveau med mere priviligerede sprog. @@ -14274,7 +14274,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat The <fixed-case>OPUS</fixed-case> Resource Repository: An Open Package for Creating Parallel Corpora and Machine Translation Services MikkoAulamo - JörgTiedemann + JörgTiedemann 389–394 This paper presents a flexible and powerful system for creating parallel corpora and for running neural machine translation services. Our package provides a scalable data repository backend that offers transparent data pre-processing pipelines and automatic alignment procedures that facilitate the compilation of extensive parallel data sets from a variety of sources. Moreover, we develop a web-based interface that constitutes an intuitive frontend for end-users of the platform. The whole system can easily be distributed over virtual machines and implements a sophisticated permission system with secure connections and a flexible database for storing arbitrary metadata. Furthermore, we also provide an interface for neural machine translation that can run as a service on virtual machines, which also incorporates a connection to the data repository software. W19-6146 @@ -14284,7 +14284,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat Garnishing a phonetic dictionary for <fixed-case>ASR</fixed-case> intake Iben NyholmDebess Sandra SaxovLamhauge - Peter JuelHenrichsen + Peter JuelHenrichsen 395–399 We present a new method for preparing a lexical-phonetic database as a resource for acoustic model training. The research is an offshoot of the ongoing Project Ravnur (Speech Recognition for Faroese), but the method is language-independent. At NODALIDA 2019 we demonstrate the method (called SHARP) online, showing how a traditional lexical-phonetic dictionary (with a very rich phone inventory) is transformed into an ASR-friendly database (with reduced phonetics, preventing data sparseness). The mapping procedure is informed by a corpus of speech transcripts. We conclude with a discussion on the benefits of a well-thought-out BLARK design (Basic Language Resource Kit), making tools like SHARP possible. W19-6147 @@ -14314,11 +14314,11 @@ One of the references was wrong therefore it is corrected to cite the appropriat Proceedings of the First NLPL Workshop on Deep Learning for Natural Language Processing W19-62 JoakimNivre - LeonDerczynski + LeonDerczynski FilipGinter BjørnLindi StephanOepen - AndersSøgaard + AndersSøgaard JörgTidemann Linköping University Electronic Press
Turku, Finland
@@ -14334,7 +14334,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat Mark my Word: A Sequence-to-Sequence Approach to Definition Modeling TimotheeMickus DenisPaperno - MatthieuConstant + MatthieuConstant 1–11 Defining words in a textual context is a useful task both for practical purposes and for gaining insight into distributed word representations. Building on the distributional hypothesis, we argue here that the most natural formalization of definition modeling is to treat it as a sequence-to-sequence task, rather than a word-to-sequence task: given an input sequence with a highlighted word, generate a contextually appropriate definition for it. We implement this approach in a Transformer-based sequence-to-sequence model. Our proposal allows to train contextualization and definition generation in an end-to-end fashion, which is a conceptual improvement over earlier works. We achieve state-of-the-art results both in contextual and non-contextual definition modeling. W19-6201 @@ -14373,8 +14373,8 @@ One of the references was wrong therefore it is corrected to cite the appropriat Multilingual Probing of Deep Pre-Trained Contextual Encoders VinitRavishankar - MemduhGökırmak - LiljaØvrelid + MemduhGökırmak + LiljaØvrelid ErikVelldal 37–47 Encoders that generate representations based on context have, in recent years, benefited from adaptations that allow for pre-training on large text corpora. Earlier work on evaluating fixed-length sentence representations has included the use of ‘probing’ tasks, that use diagnostic classifiers to attempt to quantify the extent to which these encoders capture specific linguistic phenomena. The principle of probing has also resulted in extended evaluations that include relatively newer word-level pre-trained encoders. We build on probing tasks established in the literature and comprehensively evaluate and analyse – from a typological perspective amongst others – multilingual variants of existing encoders on probing datasets constructed for 6 non-English languages. Specifically, we probe each layer of a multiple monolingual RNN-based ELMo models, the transformer-based BERT’s cased and uncased multilingual variants, and a variant of BERT that uses a cross-lingual modelling scheme (XLM). @@ -14433,7 +14433,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat Automatic Generation and Semantic Grading of <fixed-case>E</fixed-case>speranto Sentences in a Teaching Context - EckhardBick + EckhardBick 10–19 W19-6302 bick-2019-automatic @@ -14448,7 +14448,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat Linguistic features and proficiency classification in <fixed-case>L</fixed-case>2 <fixed-case>S</fixed-case>panish and <fixed-case>L</fixed-case>2<fixed-case>P</fixed-case>ortuguese. - Iriadel Río + Iriadel Río 31–40 W19-6304 del-rio-2019-linguistic @@ -14456,9 +14456,9 @@ One of the references was wrong therefore it is corrected to cite the appropriat Integrating large-scale web data and curated corpus data in a search engine supporting <fixed-case>G</fixed-case>erman literacy education SabrinaDittrich - ZarahWeiss + ZarahWeiss HannesSchröter - DetmarMeurers + DetmarMeurers 41–56 W19-6305 dittrich-etal-2019-integrating @@ -14488,7 +14488,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat Summarization Evaluation meets Short-Answer Grading MargotMieskes - UlrikePadó + UlrikePadó 79–85 W19-6308 mieskes-pado-2019-summarization @@ -14507,8 +14507,8 @@ One of the references was wrong therefore it is corrected to cite the appropriat RamonZiai FlorianNuxoll KordulaDe Kuthy - BjörnRudzewitz - DetmarMeurers + BjörnRudzewitz + DetmarMeurers 93–99 W19-6310 ziai-etal-2019-impact @@ -14520,7 +14520,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat W19-64 MahmoudEl-Haj PaulRayson - StevenYoung + StevenYoung HoudaBouamor SiraFerradans Linköping University Electronic Press @@ -14560,21 +14560,21 @@ One of the references was wrong therefore it is corrected to cite the appropriat Active Learning for Financial Investment Reports SianGooding - TedBriscoe + TedBriscoe 25–32 W19-6404 gooding-briscoe-2019-active Towards Unlocking the Narrative of the <fixed-case>U</fixed-case>nited <fixed-case>S</fixed-case>tates Income Tax Forms - EsmeManandise + EsmeManandise 33–41 W19-6405 manandise-2019-towards Tone Analysis in <fixed-case>S</fixed-case>panish Financial Reporting Narratives - AntonioMoreno-Sandoval + AntonioMoreno-Sandoval Pablo Alfonso HayaAna Gisbert MartaGuerrero HelenaMontoro @@ -14630,7 +14630,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat Proceedings of the Workshop on NLP and Pseudonymisation W19-65 LarsAhrenberg - BeataMegyesi + BeataMegyesi Linköping Electronic Press
Turku, Finland
September @@ -14676,7 +14676,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat Proceedings of Machine Translation Summit XVII: Research Track W19-66 - MikelForcada + MikelForcada AndyWay BarryHaddow RicoSennrich @@ -14694,7 +14694,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat Online Sentence Segmentation for Simultaneous Interpretation using Multi-Shifted Recurrent Neural Network XiaolinWang MasaoUtiyama - EiichiroSumita + EiichiroSumita 1–11 W19-6601 wang-etal-2019-online @@ -14707,7 +14707,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat MuhammadRahman YimingWang HainanXu - DanielPovey + DanielPovey PhilippKoehn KevinDuh 12–20 @@ -14716,8 +14716,8 @@ One of the references was wrong therefore it is corrected to cite the appropriat
Enhancing Transformer for End-to-end Speech-to-Text Translation - Mattia AntoninoDi Gangi - MatteoNegri + Mattia AntoninoDi Gangi + MatteoNegri RoldanoCattoni RobertoDessi MarcoTurchi @@ -14736,7 +14736,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat <fixed-case>T</fixed-case>ranslator2<fixed-case>V</fixed-case>ec: Understanding and Representing Human Post-Editors AntónioGóis - André F. T.Martins + André F. T.Martins 43–54 W19-6605 gois-martins-2019-translator2vec @@ -14769,7 +14769,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat Automatic error classification with multiple error labels - MajaPopovic + MajaPopovic DavidVilar 87–95 W19-6609 @@ -14813,7 +14813,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat Improving Anaphora Resolution in Neural Machine Translation Using Curriculum Learning DarioStojanovski - AlexanderFraser + AlexanderFraser 140–150 W19-6614 stojanovski-fraser-2019-improving @@ -14850,7 +14850,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat An Exploration of Placeholding in Neural Machine Translation MattPost ShuoyangDing - MariannaMartindale + MariannaMartindale WinstonWu 182–192 W19-6618 @@ -14896,7 +14896,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat Identifying Fluently Inadequate Output in Neural and Statistical Machine Translation - MariannaMartindale + MariannaMartindale MarineCarpuat KevinDuh PaulMcNamee @@ -14919,7 +14919,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat JohnOrtega FelipeSánchez-Martínez MarcoTurchi - MatteoNegri + MatteoNegri 256–266 W19-6625 ortega-etal-2019-improving @@ -14948,7 +14948,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat Proceedings of Machine Translation Summit XVII: Translator, Project and User Tracks W19-67 - MikelForcada + MikelForcada AndyWay JohnTinsley DimitarShterionov @@ -14966,8 +14966,8 @@ One of the references was wrong therefore it is corrected to cite the appropriat Competitiveness Analysis of the <fixed-case>E</fixed-case>uropean Machine Translation Market - AndrejsVasiļjevs - IngunaSkadiņa + AndrejsVasiļjevs + IngunaSkadiņa IndraSāmīte KasparsKauliņš ĒriksAjausks @@ -14982,8 +14982,8 @@ One of the references was wrong therefore it is corrected to cite the appropriat MihaelaVela SantanuPal MarcosZampieri - SudipNaskar - Josefvan Genabith + SudipNaskar + Josefvan Genabith 8–15 W19-6702 vela-etal-2019-improving @@ -15066,7 +15066,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat On reducing translation shifts in translations intended for <fixed-case>MT</fixed-case> evaluation - MajaPopovic + MajaPopovic 80–87 W19-6712 popovic-2019-reducing @@ -15133,7 +15133,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat i<fixed-case>ADAATPA</fixed-case> Project: Pangeanic use cases - MercedesGarcía-Martínez + MercedesGarcía-Martínez AmandoEstela LaurentBié AlexandreHelle @@ -15164,9 +15164,9 @@ One of the references was wrong therefore it is corrected to cite the appropriat <fixed-case>P</fixed-case>ara<fixed-case>C</fixed-case>rawl: Web-scale parallel corpora for the languages of the <fixed-case>EU</fixed-case> - MiquelEsplà + MiquelEsplà MikelForcada - GemaRamírez-Sánchez + GemaRamírez-Sánchez HieuHoang 118–119 W19-6721 @@ -15177,7 +15177,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat Chao-HongLiu AndyWay CatarinaSilva - AndréMartins + AndréMartins 120–121 W19-6722 liu-etal-2019-pivot @@ -15191,7 +15191,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat RachelBawden FelipeSánchez-Martínez Mikel L.Forcada - MiquelEsplà-Gomis + MiquelEsplà-Gomis VíctorSánchez-Cartagena Juan AntonioPérez-Ortiz WilkerAziz @@ -15217,7 +15217,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat Bharathi RajaChakravarthi JuanAlonso NoeCasas - MihaelArcan + MihaelArcan 125–133 W19-6725 torregrosa-etal-2019-leveraging @@ -15244,7 +15244,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat Surveying the potential of using speech technologies for post-editing purposes in the context of international organizations: What do professional translators think? JeevanthiLiyanapathirana - PierretteBouillon + PierretteBouillon BartoloméMesa-Lao 149–158 W19-6728 @@ -15283,9 +15283,9 @@ One of the references was wrong therefore it is corrected to cite the appropriat MarekMazur ManuelHerranz AlexHelle - GemaRamírez-Sánchez + GemaRamírez-Sánchez VíctorSánchez-Cartagena - MārcisPinnis + MārcisPinnis ValtersŠics 179–185 W19-6732 @@ -15308,7 +15308,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat Monolingual backtranslation in a medical speech translation system for diagnostic interviews - a <fixed-case>NMT</fixed-case> approach JonathanMutal - PierretteBouillon + PierretteBouillon JohannaGerlach PaulaEstrella HervéSpechbach @@ -15333,12 +15333,12 @@ One of the references was wrong therefore it is corrected to cite the appropriat Incremental Adaptation of <fixed-case>NMT</fixed-case> for Professional Post-editors: A User Study MiguelDomingo - MercedesGarcía-Martínez - ÁlvaroPeris + MercedesGarcía-Martínez + ÁlvaroPeris AlexandreHelle AmandoEstela LaurentBié - FranciscoCasacuberta + FranciscoCasacuberta ManuelHerranz 219–227 W19-6737 @@ -15363,9 +15363,9 @@ One of the references was wrong therefore it is corrected to cite the appropriat Proceedings of the 2nd Workshop on Technologies for MT of Low Resource Languages W19-68 AlinaKarakanta - Atul Kr.Ojha + Atul Kr.Ojha Chao-HongLiu - JonathanWashington + JonathanWashington NathanielOco Surafel MelakuLakew ValentinMalykh @@ -15397,7 +15397,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat Workflows for kickstarting <fixed-case>RBMT</fixed-case> in virtually No-Resource Situation - Tommi APirinen + Tommi APirinen 11–16 W19-6803 pirinen-2019-workflows @@ -15406,15 +15406,15 @@ One of the references was wrong therefore it is corrected to cite the appropriat A Continuous Improvement Framework of Machine Translation for <fixed-case>S</fixed-case>hipibo-Konibo Héctor Erasmo GómezMontoya Kervy Dante RivasRojas - ArturoOncevay + ArturoOncevay 17–23 W19-6804 montoya-etal-2019-continuous A free/open-source rule-based machine translation system for <fixed-case>C</fixed-case>rimean <fixed-case>T</fixed-case>atar to <fixed-case>T</fixed-case>urkish - MemduhGökırmak - FrancisTyers + MemduhGökırmak + FrancisTyers JonathanWashington 24–31 W19-6805 @@ -15446,7 +15446,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat Corpus Building for Low Resource Languages in the <fixed-case>DARPA</fixed-case> <fixed-case>LORELEI</fixed-case> Program JenniferTracey - StephanieStrassel + StephanieStrassel AnnBies ZhiyiSong MichaelArrigo @@ -15465,9 +15465,9 @@ One of the references was wrong therefore it is corrected to cite the appropriat Bharathi RajaChakravarthi RubaPriyadharshini BernardoStearns - ArunJayapal + ArunJayapal SridevyS - MihaelArcan + MihaelArcan ManelZarrouk John PMcCrae 56–63 @@ -15478,7 +15478,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat A3-108 Machine Translation System for <fixed-case>L</fixed-case>o<fixed-case>R</fixed-case>es<fixed-case>MT</fixed-case> 2019 SaumitraYadav VandanMujadia - ManishShrivastava + ManishShrivastava 64–67 W19-6810 yadav-etal-2019-a3 @@ -15504,8 +15504,8 @@ One of the references was wrong therefore it is corrected to cite the appropriat W19-69 TeresaLynn DelythPrys - ColinBatchelor - FrancisTyers + ColinBatchelor + FrancisTyers European Association for Machine Translation
Dublin, Ireland
August @@ -15518,7 +15518,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat Unsupervised multi-word term recognition in <fixed-case>W</fixed-case>elsh - IrenaSpasić + IrenaSpasić DavidOwen DawnKnight AndreasArtemiou @@ -15566,7 +15566,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat Adapting Term Recognition to an Under-Resourced Language: the Case of <fixed-case>I</fixed-case>rish - John P.McCrae + John P.McCrae AdrianDoyle 48–57 W19-6907 @@ -15593,7 +15593,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat A Character-Level <fixed-case>LSTM</fixed-case> Network Model for Tokenizing the <fixed-case>O</fixed-case>ld <fixed-case>I</fixed-case>rish text of the <fixed-case>W</fixed-case>ürzburg Glosses on the <fixed-case>P</fixed-case>auline Epistles AdrianDoyle - John P.McCrae + John P.McCrae ClodaghDowney 70–79 W19-6910 @@ -15614,7 +15614,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat Proceedings of the Second MEMENTO workshop on Modelling Parameters of Cognitive Effort in Translation Production W19-70 MichaelCarl - SilviaHansen-Schirra + SilviaHansen-Schirra European Association for Machine Translation
Dublin, Ireland
August @@ -15635,7 +15635,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat Modelling word translation entropy and syntactic equivalence with machine learning BramVanroy - OrphéeDe Clercq + OrphéeDe Clercq LieveMacken 3–4 W19-7002 @@ -15652,7 +15652,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat Translation Quality and Effort Prediction in Professional Machine Translation Post-Editing JenniferVardaro - MoritzSchaeffer + MoritzSchaeffer SilviaHansen-Schirra 7–8 W19-7004 @@ -15702,7 +15702,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat AnkeTardel SilviaHansen-Schirra SilkeGutermuth - MoritzSchaeffer + MoritzSchaeffer 19–20 W19-7010 tardel-etal-2019-automatization @@ -15728,7 +15728,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat Proceedings of the Second Workshop on Multilingualism at the Intersection of Knowledge Bases and Machine Translation W19-71 - MihaelArcan + MihaelArcan MarcoTurchi JinhuaDu DimitarShterionov @@ -15747,7 +15747,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et Gloss Translation for Under-resourced Languages using Multilingual Neural Machine Translation Bharathi RajaChakravarthi MihaelArcan - John P.McCrae + John P.McCrae 1–7 W19-7101 chakravarthi-etal-2019-wordnet @@ -15756,8 +15756,8 @@ One of the references was wrong therefore it is corrected to cite the appropriat Leveraging <fixed-case>SNOMED</fixed-case> <fixed-case>CT</fixed-case> terms and relations for machine translation of clinical texts from <fixed-case>B</fixed-case>asque to <fixed-case>S</fixed-case>panish XabierSoto OlatzPerez-De-Viñaspre - MaiteOronoz - GorkaLabaka + MaiteOronoz + GorkaLabaka 8–18 W19-7102 soto-etal-2019-leveraging @@ -15785,7 +15785,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat Hybrid Data-Model Parallel Training for Sequence-to-Sequence Recurrent Neural Network Machine Translation JunyaOno MasaoUtiyama - EiichiroSumita + EiichiroSumita 4–12 W19-7201 ono-etal-2019-hybrid @@ -15824,7 +15824,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat Proceedings of the Qualities of Literary Machine Translation W19-73 JamesHadley - MajaPopović + MajaPopović HaithemAfli AndyWay European Association for Machine Translation @@ -15840,8 +15840,8 @@ One of the references was wrong therefore it is corrected to cite the appropriat Neural Machine Translation of Literary Texts from <fixed-case>E</fixed-case>nglish to <fixed-case>S</fixed-case>lovene TajaKuzman - ŠpelaVintar - MihaelArčan + ŠpelaVintar + MihaelArčan 1–9 W19-7301 kuzman-etal-2019-neural @@ -15904,7 +15904,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat <fixed-case>T</fixed-case>witter Bot Detection using Diversity Measures DijanaKosmajac - VladoKeselj + VladoKeselj 1–8 W19-7401 kosmajac-keselj-2019-twitter @@ -15943,7 +15943,7 @@ One of the references was wrong therefore it is corrected to cite the appropriat ShobhitJain Sravan BabuBodapati RameshNallapati - AnimaAnandkumar + AnimaAnandkumar 34–41 W19-7405 jain-etal-2019-multi @@ -16164,11 +16164,11 @@ One of the references was wrong therefore it is corrected to cite the appropriat Introduction to <fixed-case>S</fixed-case>anskrit Shabdamitra: An Educational Application of <fixed-case>S</fixed-case>anskrit <fixed-case>W</fixed-case>ordnet - MalharKulkarni + MalharKulkarni NileshJoshi SayaliKhare HanumantRedkar - PushpakBhattacharyya + PushpakBhattacharyya 117–133 W19-7509 kulkarni-etal-2019-introduction @@ -16187,8 +16187,8 @@ One of the references was wrong therefore it is corrected to cite the appropriat Utilizing Word Embeddings based Features for Phylogenetic Tree Generation of <fixed-case>S</fixed-case>anskrit Texts DipteshKanojia AbhijeetDubey - MalharKulkarni - PushpakBhattacharyya + MalharKulkarni + PushpakBhattacharyya GholemrezaHaffari 152–165 W19-7511 @@ -16197,8 +16197,8 @@ One of the references was wrong therefore it is corrected to cite the appropriat An Introduction to the Textual History Tool DipteshKanojia - MalharKulkarni - PushpakBhattacharyya + MalharKulkarni + PushpakBhattacharyya EivindKahrs 166–180 W19-7512 @@ -16243,7 +16243,7 @@ Participants of the tutorial will get a clear understanding of Neural Model type Challenge Test Sets for <fixed-case>MT</fixed-case> Evaluation - MajaPopović + MajaPopović SheilaCastilho Most of the test sets used for the evaluation of MT systems reflect the frequency distribution of different phenomena found in naturally occurring data (”standard” or ”natural” test sets). However, to better understand particular strengths and weaknesses of MT systems, especially those based on neural networks, it is necessary to apply more focused evaluation procedures. Therefore, another type of test sets (”challenge” test sets, also called ”test suites”) is being increasingly employed in order to highlight points of difficulty which are relevant to model development, training, or using of the given system. This tutorial will be useful for anyone (researchers, developers, users, translators) interested in detailed evaluation and getting a better understanding of machine translation (MT) systems and models. The attendees will learn about the motivation and linguistic background of challenge test sets and a range of testing possibilities applied to the state-of-the-art MT systems, as well as a number of practical aspects and challenges. W19-7602.Presentation.pdf @@ -16292,7 +16292,7 @@ In this tutorial on MT and post-editing we would like to continue sharing the la <fixed-case>S</fixed-case>yntax<fixed-case>F</fixed-case>est 2019 Invited talk - Transferring <fixed-case>NLP</fixed-case> models across languages and domains - BarbaraPlank + BarbaraPlank 2–2 W19-7702 10.18653/v1/W19-7702 @@ -16313,7 +16313,7 @@ In this tutorial on MT and post-editing we would like to continue sharing the la Reflexives in <fixed-case>C</fixed-case>zech from a Dependency Perspective VaclavaKettnerova - MarketaLopatkova + MarketaLopatkova 14–25 W19-7704 10.18653/v1/W19-7704 @@ -16381,7 +16381,7 @@ In this tutorial on MT and post-editing we would like to continue sharing the la EleniMetheniti PomiPark KristinaKolesova - GünterNeumann + GünterNeumann 100–111 W19-7712 10.18653/v1/W19-7712 @@ -16428,7 +16428,7 @@ In this tutorial on MT and post-editing we would like to continue sharing the la Towards Deep <fixed-case>U</fixed-case>niversal <fixed-case>D</fixed-case>ependencies KiraDroganova - DanielZeman + DanielZeman 144–152 W19-7717 10.18653/v1/W19-7717 @@ -16438,8 +16438,8 @@ In this tutorial on MT and post-editing we would like to continue sharing the la Delimiting Adverbial Meanings. A corpus-based comparative study on <fixed-case>C</fixed-case>zech spatial prepositions and their <fixed-case>E</fixed-case>nglish equivalents MarieMikulová VeronikaKolářová - JarmilaPanevová - EvaHajičová + JarmilaPanevová + EvaHajičová 153–159 W19-7718 10.18653/v1/W19-7718 @@ -16448,7 +16448,7 @@ In this tutorial on MT and post-editing we would like to continue sharing the la A <fixed-case>S</fixed-case>panish <fixed-case>E</fixed-case>-dictionary of Collocations Maria AuxiliadoraBarrios Rodriguez - IgorBoguslavsky + IgorBoguslavsky 160–167 W19-7719 10.18653/v1/W19-7719 @@ -16496,7 +16496,7 @@ In this tutorial on MT and post-editing we would like to continue sharing the la <fixed-case>P</fixed-case>āṇinian Syntactico-Semantic Relation Labels AmbaKulkarni - DiptiSharma + DiptiSharma 198–208 W19-7724 10.18653/v1/W19-7724 @@ -16505,7 +16505,7 @@ In this tutorial on MT and post-editing we would like to continue sharing the la Experiments on human incremental parsing LeonidMityushin - LeonidIomdin + LeonidIomdin 209–215 W19-7725 10.18653/v1/W19-7725 @@ -16526,10 +16526,10 @@ In this tutorial on MT and post-editing we would like to continue sharing the la Proceedings of the 18th International Workshop on Treebanks and Linguistic Theories (TLT, SyntaxFest 2019) W19-78 - MarieCandito + MarieCandito KilianEvang StephanOepen - DjaméSeddah + DjaméSeddah Association for Computational Linguistics
Paris, France
August @@ -16583,10 +16583,10 @@ In this tutorial on MT and post-editing we would like to continue sharing the la
Parallel Dependency Treebank Annotated with Interlinked Verbal Synonym Classes and Roles - ZdeňkaUrešová - EvaFučíková - EvaHajičová - JanHajič + ZdeňkaUrešová + EvaFučíková + EvaHajičová + JanHajič 38–50 W19-7805 10.18653/v1/W19-7805 @@ -16594,7 +16594,7 @@ In this tutorial on MT and post-editing we would like to continue sharing the la Ordering of Adverbials of Time and Place in Grammars and in an Annotated <fixed-case>E</fixed-case>nglish-<fixed-case>C</fixed-case>zech Parallel Corpus - EvaHajičová + EvaHajičová JiříMírovský KateřinaRysová 51–60 @@ -16621,7 +16621,7 @@ In this tutorial on MT and post-editing we would like to continue sharing the la Challenges of Annotating a Code-Switching Treebank - ÖzlemÇetinoğlu + ÖzlemÇetinoğlu ÇağrıÇöltekin 82–90 W19-7809 @@ -16631,7 +16631,7 @@ In this tutorial on MT and post-editing we would like to continue sharing the la Dependency Parser for <fixed-case>B</fixed-case>engali-<fixed-case>E</fixed-case>nglish Code-Mixed Data enhanced with a Synthetic Treebank UrmiGhosh - DiptiSharma + DiptiSharma SimranKhanuja 91–99 W19-7810 @@ -16692,7 +16692,7 @@ In this tutorial on MT and post-editing we would like to continue sharing the la Challenges of language change and variation: towards an extended treebank of Medieval <fixed-case>F</fixed-case>rench MathildeRegnault SophiePrévost - EricVillemonte de la Clergerie + EricVillemonte de la Clergerie 144–150 W19-7816 10.18653/v1/W19-7816 @@ -16777,7 +16777,7 @@ In this tutorial on MT and post-editing we would like to continue sharing the la Extracting out of the subject in <fixed-case>F</fixed-case>rench: experimental evidence - AnneAbeillé + AnneAbeillé ElodieWinckel 68–74 W19-7908 @@ -16807,7 +16807,7 @@ In this tutorial on MT and post-editing we would like to continue sharing the la Dependency Length Minimization vs. Word Order Constraints: An Empirical Study On 55 Treebanks XiangYu - AgnieszkaFalenska + AgnieszkaFalenska JonasKuhn 89–97 W19-7911 @@ -16857,7 +16857,7 @@ In this tutorial on MT and post-editing we would like to continue sharing the la Proceedings of the Third Workshop on Universal Dependencies (UDW, SyntaxFest 2019) W19-80 AlexandreRademaker - FrancisTyers + FrancisTyers Association for Computational Linguistics
Paris, France
August @@ -16891,7 +16891,7 @@ In this tutorial on MT and post-editing we would like to continue sharing the la
Developing <fixed-case>U</fixed-case>niversal <fixed-case>D</fixed-case>ependencies for <fixed-case>W</fixed-case>olof - Cheikh BambaDione + Cheikh BambaDione 12–23 W19-8003 10.18653/v1/W19-8003 @@ -16900,7 +16900,7 @@ In this tutorial on MT and post-editing we would like to continue sharing the la Improving <fixed-case>UD</fixed-case> processing via satellite resources for morphology KajaDobrovoljc - TomažErjavec + TomažErjavec NikolaLjubešić 24–34 W19-8004 @@ -16970,7 +16970,7 @@ In this tutorial on MT and post-editing we would like to continue sharing the la Recursive <fixed-case>LSTM</fixed-case> Tree Representation for Arc-Standard Transition-Based Dependency Parsing - MohabElkaref + MohabElkaref BerndBohnet 101–107 W19-8012 @@ -16984,7 +16984,7 @@ In this tutorial on MT and post-editing we would like to continue sharing the la ŞaziyeBetül Özateş BalkızÖztürk Başaran TungaGüngör - ArzucanÖzgür + ArzucanÖzgür 108–115 W19-8013 10.18653/v1/W19-8013 @@ -16993,7 +16993,7 @@ In this tutorial on MT and post-editing we would like to continue sharing the la Towards transferring <fixed-case>B</fixed-case>ulgarian Sentences with Elliptical Elements to <fixed-case>U</fixed-case>niversal <fixed-case>D</fixed-case>ependencies: issues and strategies PetyaOsenova - KirilSimov + KirilSimov 116–123 W19-8014 10.18653/v1/W19-8014 @@ -17011,7 +17011,7 @@ In this tutorial on MT and post-editing we would like to continue sharing the la Building minority dependency treebanks, dictionaries and computational grammars at the same time—an experiment in <fixed-case>K</fixed-case>arelian treebanking - Tommi APirinen + Tommi APirinen 132–136 W19-8016 10.18653/v1/W19-8016 @@ -17026,9 +17026,9 @@ In this tutorial on MT and post-editing we would like to continue sharing the la VeraDemberg ChandraKhatri AbhinavRastogi - DoniaScott - MarilynWalker - MichaelWhite + DoniaScott + MarilynWalker + MichaelWhite Association for Computational Linguistics
Tokyo, Japan
November @@ -17172,7 +17172,7 @@ In this tutorial on MT and post-editing we would like to continue sharing the la Proceedings of the 1st Workshop on Interactive Natural Language Technology for Explainable Artificial Intelligence (NL4XAI 2019) W19-84 - Jose M.Alonso + Jose M.Alonso AlejandroCatala Association for Computational Linguistics 2019 @@ -17202,8 +17202,8 @@ In this tutorial on MT and post-editing we would like to continue sharing the la
A Survey of Explainable <fixed-case>AI</fixed-case> Terminology - Miruna-AdrianaClinciu - HelenHastie + Miruna-AdrianaClinciu + HelenHastie 8–13 W19-8403 10.18653/v1/W19-8403 @@ -17255,7 +17255,7 @@ In this tutorial on MT and post-editing we would like to continue sharing the la Proceedings of the Second International Workshop on Resources and Tools for Derivational Morphology W19-85 MagdaŠevčíková - ZdeněkŽabokrtský + ZdeněkŽabokrtský EleonoraLitta MarcoPassarotti Charles University, Faculty of Mathematics and Physics, Institute of Formal and Applied Linguistics @@ -17277,7 +17277,7 @@ In this tutorial on MT and post-editing we would like to continue sharing the la <fixed-case>P</fixed-case>ara<fixed-case>D</fixed-case>is and Démonette: From Theory to Resources for Derivational Paradigms - FiammettaNamer + FiammettaNamer NabilHathout 5--14 W19-8502 @@ -17287,7 +17287,7 @@ In this tutorial on MT and post-editing we would like to continue sharing the la Semantic descriptions of <fixed-case>F</fixed-case>rench derivational relations in a families-and-paradigms framework DanieleSanacore NabilHathout - FiammettaNamer + FiammettaNamer 15--24 W19-8503 sanacore-etal-2019-semantic @@ -17387,7 +17387,7 @@ In this tutorial on MT and post-editing we would like to continue sharing the la Proceedings of the 12th International Conference on Natural Language Generation - Keesvan Deemter + Keesvan Deemter ChenghuaLin HiroyaTakamura Association for Computational Linguistics @@ -17474,7 +17474,7 @@ In this tutorial on MT and post-editing we would like to continue sharing the la Computational Argumentation Synthesis as a Language Modeling Task RoxanneEl Baff HenningWachsmuth - KhalidAl Khatib + KhalidAl Khatib ManfredStede BennoStein 54–64 @@ -17493,7 +17493,7 @@ In this tutorial on MT and post-editing we would like to continue sharing the la BehnamHedayatnia AnuVenkatesh RaeferGabriel - DilekHakkani-Tur + DilekHakkani-Tur 65–75 Encoder-decoder based neural architectures serve as the basis of state-of-the-art approaches in end-to-end open domain dialog systems. Since most of such systems are trained with a maximum likelihood (MLE) objective they suffer from issues such as lack of generalizability and the generic response problem, i.e., a system response that can be an answer to a large number of user utterances, e.g., “Maybe, I don’t know.” Having explicit feedback on the relevance and interestingness of a system response at each turn can be a useful signal for mitigating such issues and improving system quality by selecting responses from different approaches. Towards this goal, we present a system that evaluates chatbot responses at each dialog turn for coherence and engagement. Our system provides explicit turn-level dialog quality feedback, which we show to be highly correlated with human evaluation. To show that incorporating this feedback in the neural response generation models improves dialog quality, we present two different and complementary mechanisms to incorporate explicit feedback into a neural response generation model: reranking and direct modification of the loss function during training. Our studies show that a response generation model that incorporates these combined feedback mechanisms produce more engaging and coherent responses in an open-domain spoken dialog setting, significantly improving the response quality using both automatic and human evaluation. W19-8608 @@ -17527,7 +17527,7 @@ In this tutorial on MT and post-editing we would like to continue sharing the la JinfengRao KartikeyaUpasani AnushaBalakrishnan - MichaelWhite + MichaelWhite AnujKumar RajenSubba 95–100 @@ -17550,7 +17550,7 @@ In this tutorial on MT and post-editing we would like to continue sharing the la Neural Question Generation using Interrogative Phrases YuichiSasazawa ShoTakase - NaoakiOkazaki + NaoakiOkazaki 106–111 Question Generation (QG) is the task of generating questions from a given passage. One of the key requirements of QG is to generate a question such that it results in a target answer. Previous works used a target answer to obtain a desired question. However, we also want to specify how to ask questions and improve the quality of generated questions. In this study, we explore the use of interrogative phrases as additional sources to control QG. By providing interrogative phrases, we expect that QG can generate a more reliable sequence of words subsequent to an interrogative phrase. We present a baseline sequence-to-sequence model with the attention, copy, and coverage mechanisms, and show that the simple baseline achieves state-of-the-art performance. The experiments demonstrate that interrogative phrases contribute to improving the performance of QG. In addition, we report the superiority of using interrogative phrases in human evaluation. Finally, we show that a question answering system can provide target answers more correctly when the questions are generated with interrogative phrases. W19-8613 @@ -17571,7 +17571,7 @@ In this tutorial on MT and post-editing we would like to continue sharing the la <fixed-case>M</fixed-case>in<fixed-case>W</fixed-case>iki<fixed-case>S</fixed-case>plit: A Sentence Splitting Corpus with Minimal Propositions ChristinaNiklaus - AndréFreitas + AndréFreitas SiegfriedHandschuh 118–123 We compiled a new sentence splitting corpus that is composed of 203K pairs of aligned complex source and simplified target sentences. Contrary to previously proposed text simplification corpora, which contain only a small number of split examples, we present a dataset where each input sentence is broken down into a set of minimal propositions, i.e. a sequence of sound, self-contained utterances with each of them presenting a minimal semantic unit that cannot be further decomposed into meaningful propositions. This corpus is useful for developing sentence splitting approaches that learn how to transform sentences with a complex linguistic structure into a fine-grained representation of short sentences that present a simple and more regular structure which is easier to process for downstream applications and thus facilitates and improves their performance. @@ -17597,7 +17597,7 @@ In this tutorial on MT and post-editing we would like to continue sharing the la <fixed-case>KPT</fixed-case>imes: A Large-Scale Dataset for Keyphrase Generation on News Documents YgorGallina FlorianBoudin - BeatriceDaille + BeatriceDaille 130–135 Keyphrase generation is the task of predicting a set of lexical units that conveys the main content of a source text. Existing datasets for keyphrase generation are only readily available for the scholarly domain and include non-expert annotations. In this paper we present KPTimes, a large-scale dataset of news texts paired with editor-curated keyphrases. Exploring the dataset, we show how editors tag documents, and how their annotations differ from those found in existing datasets. We also train and evaluate state-of-the-art neural keyphrase generation models on KPTimes to gain insights on how well they perform on the news domain. The dataset is available online at https://github.com/ygorg/KPTimes. W19-8617 @@ -17607,7 +17607,7 @@ In this tutorial on MT and post-editing we would like to continue sharing the la Sketch Me if You Can: Towards Generating Detailed Descriptions of Object Shape by Grounding in Images and Drawings TingHan - SinaZarrieß + SinaZarrieß 136–140 A lot of recent work in Language & Vision has looked at generating descriptions or referring expressions for objects in scenes of real-world images, though focusing mostly on relatively simple language like object names, color and location attributes (e.g., brown chair on the left). This paper presents work on Draw-and-Tell, a dataset of detailed descriptions for common objects in images where annotators have produced fine-grained attribute-centric expressions distinguishing a target object from a range of similar objects. Additionally, the dataset comes with hand-drawn sketches for each object. As Draw-and-Tell is medium-sized and contains a rich vocabulary, it constitutes an interesting challenge for CNN-LSTM architectures used in state-of-the-art image captioning models. We explore whether the additional modality given through sketches can help such a model to learn to accurately ground detailed language referring expressions to object shapes. Our results are encouraging. W19-8618 @@ -17619,7 +17619,7 @@ In this tutorial on MT and post-editing we would like to continue sharing the la FengNie JinpengWang RongPan - Chin-YewLin + Chin-YewLin 141–146 Data-to-text generation aims to generate descriptions given a structured input data (i.e., a table with multiple records). Existing neural methods for encoding input data can be divided into two categories: a) pooling based encoders which ignore dependencies between input records or b) recurrent encoders which model only sequential dependencies between input records. In our investigation, although the recurrent encoder generally outperforms the pooling based encoder by learning the sequential dependencies, it is sensitive to the order of the input records (i.e., performance decreases when injecting the random shuffling noise over input data). To overcome this problem, we propose to adopt the self-attention mechanism to learn dependencies between arbitrary input records. Experimental results show the proposed method achieves comparable results and remains stable under random shuffling over input data. W19-8619 @@ -17642,7 +17642,7 @@ In this tutorial on MT and post-editing we would like to continue sharing the la Tell Me More: A Dataset of Visual Scene Description Sequences NikolaiIlinykh - SinaZarrieß + SinaZarrieß DavidSchlangen 152–157 We present a dataset consisting of what we call image description sequences, which are multi-sentence descriptions of the contents of an image. These descriptions were collected in a pseudo-interactive setting, where the describer was told to describe the given image to a listener who needs to identify the image within a set of images, and who successively asks for more information. As we show, this setup produced nicely structured data that, we think, will be useful for learning models capable of planning and realising such description discourses. @@ -17653,7 +17653,7 @@ In this tutorial on MT and post-editing we would like to continue sharing the la A Closer Look at Recent Results of Verb Selection for Data-to-Text <fixed-case>NLG</fixed-case> GuanyiChen - Jin-GeYao + Jin-GeYao 158–163 Automatic natural language generation systems need to use the contextually-appropriate verbs when describing different kinds of facts or events, which has triggered research interest on verb selection for data-to-text generation. In this paper, we discuss a few limitations of the current task settings and the evaluation metrics. We also provide two simple, efficient, interpretable baseline approaches for statistical selection of trend verbs, which give a strong performance on both previously used evaluation metrics and our new evaluation. W19-8622 @@ -17665,7 +17665,7 @@ In this tutorial on MT and post-editing we would like to continue sharing the la <fixed-case>V</fixed-case>i<fixed-case>GGO</fixed-case>: A Video Game Corpus for Data-To-Text Generation in Open-Domain Conversation JurajJuraska KevinBowden - MarilynWalker + MarilynWalker 164–172 The uptake of deep learning in natural language generation (NLG) led to the release of both small and relatively large parallel corpora for training neural models. The existing data-to-text datasets are, however, aimed at task-oriented dialogue systems, and often thus limited in diversity and versatility. They are typically crowdsourced, with much of the noise left in them. Moreover, current neural NLG models do not take full advantage of large training data, and due to their strong generalizing properties produce sentences that look template-like regardless. We therefore present a new corpus of 7K samples, which (1) is clean despite being crowdsourced, (2) has utterances of 9 generalizable and conversational dialogue act types, making it more suitable for open-domain dialogue systems, and (3) explores the domain of video games, which is new to dialogue systems despite having excellent potential for supporting rich conversations. W19-8623 @@ -17684,7 +17684,7 @@ In this tutorial on MT and post-editing we would like to continue sharing the la Visually grounded generation of entailments from premises - SomayehJafaritazehjani + SomayehJafaritazehjani AlbertGatt MarcTanti 178–188 @@ -17741,7 +17741,7 @@ In this tutorial on MT and post-editing we would like to continue sharing the la Efficiency Metrics for Data-Driven Models: A Text Summarization Case Study ErionÇano - OndřejBojar + OndřejBojar 229–239 Using data-driven models for solving text summarization or similar tasks has become very common in the last years. Yet most of the studies report basic accuracy scores only, and nothing is known about the ability of the proposed models to improve when trained on more data. In this paper, we define and propose three data efficiency metrics: data score efficiency, data time deficiency and overall data efficiency. We also propose a simple scheme that uses those metrics and apply it for a more comprehensive evaluation of popular methods on text summarization and title generation tasks. For the latter task, we process and release a huge collection of 35 million abstract-title pairs from scientific articles. Our results reveal that among the tested models, the Transformer is the most efficient on both tasks. W19-8630 @@ -17786,7 +17786,7 @@ In this tutorial on MT and post-editing we would like to continue sharing the la Personalized Substitution Ranking for Lexical Simplification - JohnLee + JohnLee Chak YanYeung 258–267 A lexical simplification (LS) system substitutes difficult words in a text with simpler ones to make it easier for the user to understand. In the typical LS pipeline, the Substitution Ranking step determines the best substitution out of a set of candidates. Most current systems do not consider the user’s vocabulary proficiency, and always aim for the simplest candidate. This approach may overlook less-simple candidates that the user can understand, and that are semantically closer to the original word. We propose a personalized approach for Substitution Ranking to identify the candidate that is the closest synonym and is non-complex for the user. In experiments on learners of English at different proficiency levels, we show that this approach enhances the semantic faithfulness of the output, at the cost of a relatively small increase in the number of complex words. @@ -17809,7 +17809,7 @@ In this tutorial on MT and post-editing we would like to continue sharing the la Head-First Linearization with Tree-Structured Representation XiangYu - AgnieszkaFalenska + AgnieszkaFalenska Ngoc ThangVu JonasKuhn 279–289 @@ -17830,7 +17830,7 @@ In this tutorial on MT and post-editing we would like to continue sharing the la Generation of Hip-Hop Lyrics with Hierarchical Modeling and Conditional Templates - EnriqueManjavacas + EnriqueManjavacas MikeKestemont FolgertKarsdorp 301–310 @@ -17872,9 +17872,9 @@ In this tutorial on MT and post-editing we would like to continue sharing the la HideakiTamori KoKikuta JiroNishitoba - NaoakiOkazaki + NaoakiOkazaki KentaroInui - ManabuOkumura + ManabuOkumura 333–343 Browsing news articles on multiple devices is now possible. The lengths of news article headlines have precise upper bounds, dictated by the size of the display of the relevant device or interface. Therefore, controlling the length of headlines is essential when applying the task of headline generation to news production. However, because there is no corpus of headlines of multiple lengths for a given article, previous research on controlling output length in headline generation has not discussed whether the system outputs could be adequately evaluated without multiple references of different lengths. In this paper, we introduce two corpora, which are Japanese News Corpus (JNC) and JApanese MUlti-Length Headline Corpus (JAMUL), to confirm the validity of previous evaluation settings. The JNC provides common supervision data for headline generation. The JAMUL is a large-scale evaluation dataset for headlines of three different lengths composed by professional editors. We report new findings on these corpora; for example, although the longest length reference summary can appropriately evaluate the existing methods controlling output length, this evaluation setting has several problems. W19-8641 @@ -17898,7 +17898,7 @@ In this tutorial on MT and post-editing we would like to continue sharing the la AlbertGatt Emielvan Miltenburg SanderWubben - EmielKrahmer + EmielKrahmer 355–368 Currently, there is little agreement as to how Natural Language Generation (NLG) systems should be evaluated. While there is some agreement regarding automatic metrics, there is a high degree of variation in the way that human evaluation is carried out. This paper provides an overview of how human evaluation is currently conducted, and presents a set of best practices, grounded in the literature. With this paper, we hope to contribute to the quality and consistency of human evaluations in NLG. W19-8643 @@ -17973,7 +17973,7 @@ In this tutorial on MT and post-editing we would like to continue sharing the la Merelvan de Kerkhof RuudKoolen MartijnGoudbeek - EmielKrahmer + EmielKrahmer 403–408 Task effects in NLG corpus elicitation recently started to receive more attention, but are usually not modeled statistically. We present a controlled replication of the study by Van Miltenburg et al. (2018b), contrasting spoken with written descriptions. We collected additional written Dutch descriptions to supplement the spoken data from the DIDEC corpus, and analyzed the descriptions using mixed effects modeling to account for variation between participants and items. Our results show that the effects of modality largely disappear in a controlled setting. W19-8649 @@ -18007,7 +18007,7 @@ In this tutorial on MT and post-editing we would like to continue sharing the la Semantic Noise Matters for Neural Natural Language Generation OndřejDušek - David M.Howcroft + David M.Howcroft VerenaRieser 421–426 Neural natural language generation (NNLG) systems are known for their pathological outputs, i.e. generating text which is unrelated to the input specification. In this paper, we show the impact of semantic noise on state-of-the-art NNLG models which implement different semantic control mechanisms. We find that cleaned data can improve semantic correctness by up to 97%, while maintaining fluency. We also find that the most common error is omitting information, rather than hallucination. @@ -18030,7 +18030,7 @@ In this tutorial on MT and post-editing we would like to continue sharing the la Towards a Metric for Automated Conversational Dialogue System Evaluation and Improvement - Jan MilanDeriu + Jan MilanDeriu MarkCieliebak 432–437 We present “AutoJudge”, an automated evaluation method for conversational dialogue systems. The method works by first generating dialogues based on self-talk, i.e. dialogue systems talking to itself. Then, it uses human ratings on these dialogues to train an automated judgement model. Our experiments show that AutoJudge correlates well with the human ratings and can be used to automatically evaluate dialogue systems, even in deployed systems. In a second part, we attempt to apply AutoJudge to improve existing systems. This works well for re-ranking a set of candidate utterances. However, our experiments show that AutoJudge cannot be applied as reward for reinforcement learning, although the metric can distinguish good from bad dialogues. We discuss potential reasons, but state here already that this is still an open question for further research. @@ -18055,7 +18055,7 @@ In this tutorial on MT and post-editing we would like to continue sharing the la FelixClouth JeroenVermunt XanderVerbeek - EmielKrahmer + EmielKrahmer 443–452 In this paper, we present a novel data-to-text system for cancer patients, providing information on quality of life implications after treatment, which can be embedded in the context of shared decision making. Currently, information on quality of life implications is often not discussed, partly because (until recently) data has been lacking. In our work, we rely on a newly developed prediction model, which assigns patients to scenarios. Furthermore, we use data-to-text techniques to explain these scenario-based predictions in personalized and understandable language. We highlight the possibilities of NLG for personalization, discuss ethical implications and also present the outcomes of a first evaluation with clinicians. W19-8656 @@ -18069,7 +18069,7 @@ In this tutorial on MT and post-editing we would like to continue sharing the la RahulGoel BehnamHedayatnia AnuVenkatesh - DilekHakkani-Tur + DilekHakkani-Tur RaeferGabriel 453–462 Current approaches to Natural Language Generation (NLG) for dialog mainly focus on domain-specific, task-oriented applications (e.g. restaurant booking) using limited ontologies (up to 20 slot types), usually without considering the previous conversation context. Furthermore, these approaches require large amounts of data for each domain, and do not benefit from examples that may be available for other domains. This work explores the feasibility of applying statistical NLG to scenarios requiring larger ontologies, such as multi-domain dialog applications or open-domain question answering (QA) based on knowledge graphs. We model NLG through an Encoder-Decoder framework using a large dataset of interactions between real-world users and a conversational agent for open-domain QA. First, we investigate the impact of increasing the number of slot types on the generation quality and experiment with different partitions of the QA data with progressively larger ontologies (up to 369 slot types). Second, we perform multi-task learning experiments between open-domain QA and task-oriented dialog, and benchmark our model on a popular NLG dataset. Moreover, we experiment with using the conversational context as an additional input to improve response generation quality. Our experiments show the feasibility of learning statistical NLG models for open-domain QA with larger ontologies. @@ -18079,7 +18079,7 @@ In this tutorial on MT and post-editing we would like to continue sharing the la Using <fixed-case>NLG</fixed-case> for speech synthesis of mathematical sentences - AlessandroMazzei + AlessandroMazzei MicheleMonticone CristianBernareggi 463–472 @@ -18128,7 +18128,7 @@ In this tutorial on MT and post-editing we would like to continue sharing the la <fixed-case>D</fixed-case>is<fixed-case>S</fixed-case>im: A Discourse-Aware Syntactic Text Simplification Framework for <fixed-case>E</fixed-case>nglish and <fixed-case>G</fixed-case>erman ChristinaNiklaus MatthiasCetto - AndréFreitas + AndréFreitas SiegfriedHandschuh 504–507 We introduce DisSim, a discourse-aware sentence splitting framework for English and German whose goal is to transform syntactically complex sentences into an intermediate representation that presents a simple and more regular structure which is easier to process for downstream semantic applications. For this purpose, we turn input sentences into a two-layered semantic hierarchy in the form of core facts and accompanying contexts, while identifying the rhetorical relations that hold between them. In that way, we preserve the coherence structure of the input and, hence, its interpretability for downstream tasks. @@ -18167,7 +18167,7 @@ In this tutorial on MT and post-editing we would like to continue sharing the la Generating Abstractive Summaries with Finetuned Language Models SebastianGehrmann ZacharyZiegler - AlexanderRush + AlexanderRush 516–522 Neural abstractive document summarization is commonly approached by models that exhibit a mostly extractive behavior. This behavior is facilitated by a copy-attention which allows models to copy words from a source document. While models in the mostly extractive news summarization domain benefit from this inductive bias, they commonly fail to paraphrase or compress information from the source document. Recent advances in transfer-learning from large pretrained language models give rise to alternative approaches that do not rely on copy-attention and instead learn to generate concise and abstractive summaries. In this paper, as part of the TL;DR challenge, we compare the abstractiveness of summaries from different summarization approaches and show that transfer-learning can be efficiently utilized without any changes to the model architecture. We demonstrate that the approach leads to a higher level of abstraction for a similar performance on the TL;DR challenge tasks, enabling true natural language compression. W19-8665 @@ -18180,7 +18180,7 @@ In this tutorial on MT and post-editing we would like to continue sharing the la MichaelVölske NedimLipka BennoStein - HinrichSchütze + HinrichSchütze MartinPotthast 523–528 In this paper, we report on the results of the TL;DR challenge, discussing an extensive manual evaluation of the expected properties of a good summary based on analyzing the comments provided by human annotators. @@ -18225,7 +18225,7 @@ In this tutorial on MT and post-editing we would like to continue sharing the la Neural Generation for <fixed-case>C</fixed-case>zech: Data and Baselines OndřejDušek - FilipJurčíček + FilipJurčíček 563–574 We present the first dataset targeted at end-to-end NLG in Czech in the restaurant domain, along with several strong baseline models using the sequence-to-sequence approach. While non-English NLG is under-explored in general, Czech, as a morphologically rich language, makes the task even harder: Since Czech requires inflecting named entities, delexicalization or copy mechanisms do not work out-of-the-box and lexicalizing the generated outputs is non-trivial. In our experiments, we present two different approaches to this this problem: (1) using a neural language model to select the correct inflected form while lexicalizing, (2) a two-step generation setup: our sequence-to-sequence model generates an interleaved sequence of lemmas and morphological tags, which are then inflected by a morphological generator. W19-8670 @@ -18235,7 +18235,7 @@ In this tutorial on MT and post-editing we would like to continue sharing the la Modeling Confidence in Sequence-to-Sequence Models JanNiehues - Ngoc-QuanPham + Ngoc-QuanPham 575–583 Recently, significant improvements have been achieved in various natural language processing tasks using neural sequence-to-sequence models. While aiming for the best generation quality is important, ultimately it is also necessary to develop models that can assess the quality of their output. In this work, we propose to use the similarity between training and test conditions as a measure for models’ confidence. We investigate methods solely using the similarity as well as methods combining it with the posterior probability. While traditionally only target tokens are annotated with confidence measures, we also investigate methods to annotate source tokens with confidence. By learning an internal alignment model, we can significantly improve confidence projection over using state-of-the-art external alignment tools. We evaluate the proposed methods on downstream confidence estimation for machine translation (MT). We show improvements on segment-level confidence estimation as well as on confidence estimation for source tokens. In addition, we show that the same methods can also be applied to other tasks using sequence-to-sequence models. On the automatic speech recognition (ASR) task, we are able to find 60% of the errors by looking at 20% of the data. W19-8671 @@ -18245,7 +18245,7 @@ In this tutorial on MT and post-editing we would like to continue sharing the la A Good Sample is Hard to Find: Noise Injection Sampling and Self-Training for Neural Language Generation Models ChrisKedzie - KathleenMcKeown + KathleenMcKeown 584–593 Deep neural networks (DNN) are quickly becoming the de facto standard modeling method for many natural language generation (NLG) tasks. In order for such models to truly be useful, they must be capable of correctly generating utterances for novel meaning representations (MRs) at test time. In practice, even sophisticated DNNs with various forms of semantic control frequently fail to generate utterances faithful to the input MR. In this paper, we propose an architecture agnostic self-training method to sample novel MR/text utterance pairs to augment the original training data. Remarkably, after training on the augmented data, even simple encoder-decoder models with greedy decoding are capable of generating semantically correct utterances that are as good as state-of-the-art outputs in both automatic and human evaluations of quality. W19-8672 @@ -18304,7 +18304,7 @@ In this tutorial on MT and post-editing we would like to continue sharing the la What Influences the Features of Post-editese? A Preliminary Study SheilaCastilho NatáliaResende - RuslanMitkov + RuslanMitkov 19–27 While a number of studies have shown evidence of translationese phenomena, that is, statistical differences between original texts and translated texts (Gellerstam, 1986), results of studies searching for translationese features in postedited texts (what has been called ”posteditese” (Daems et al., 2017)) have presented mixed results. This paper reports a preliminary study aimed at identifying the presence of post-editese features in machine-translated post-edited texts and at understanding how they differ from translationese features. We test the influence of factors such as post-editing (PE) levels (full vs. light), translation proficiency (professionals vs. students) and text domain (news vs. literary). Results show evidence of post-editese features, especially in light PE texts and in certain domains. W19-8703 @@ -18314,8 +18314,8 @@ In this tutorial on MT and post-editing we would like to continue sharing the la Designing a Frame-Semantic Machine Translation Evaluation Metric OliverCzulo - Tiago TimponiTorrent - Ely Edison da SilvaMatos + Tiago TimponiTorrent + Ely Edison da SilvaMatos AlexandreDiniz da Costa DebanjanaKar 28–35 @@ -18355,7 +18355,7 @@ In this tutorial on MT and post-editing we would like to continue sharing the la Comparing a Hand-crafted to an Automatically Generated Feature Set for Deep Learning: Pairwise Translation Evaluation DespoinaMouratidis - Katia LidaKermanidis + Katia LidaKermanidis 66–74 The automatic evaluation of machine translation (MT) has proven to be a very significant research topic. Most automatic evaluation methods focus on the evaluation of the output of MT as they compute similarity scores that represent translation quality. This work targets on the performance of MT evaluation. We present a general scheme for learning to classify parallel translations, using linguistic information, of two MT model outputs and one human (reference) translation. We present three experiments to this scheme using neural networks (NN). One using string based hand-crafted features (Exp1), the second using automatically trained embeddings from the reference and the two MT outputs (one from a statistical machine translation (SMT) model and the other from a neural ma-chine translation (NMT) model), which are learned using NN (Exp2), and the third experiment (Exp3) that combines information from the other two experiments. The languages involved are English (EN), Greek (GR) and Italian (IT) segments are educational in domain. The proposed language-independent learning scheme which combines information from the two experiments (experiment 3) achieves higher classification accuracy compared with models using BLEU score information as well as other classification approaches, such as Random Forest (RF) and Support Vector Machine (SVM). W19-8708 @@ -18366,7 +18366,7 @@ In this tutorial on MT and post-editing we would like to continue sharing the la Differences between <fixed-case>SMT</fixed-case> and <fixed-case>NMT</fixed-case> Output - a Translators’ Point of View JonathanMutal LiseVolkart - PierretteBouillon + PierretteBouillon SabrinaGirletti PaulaEstrella 75–81 @@ -18416,7 +18416,7 @@ In this tutorial on MT and post-editing we would like to continue sharing the la Towards a Proactive <fixed-case>MWE</fixed-case> Terminological Platform for Cross-Lingual Mediation in the Age of Big Data - Benjamin K.Tsou + Benjamin K.Tsou KapoChow JunruNie YuanYuan @@ -18482,8 +18482,8 @@ In this tutorial on MT and post-editing we would like to continue sharing the la <fixed-case>RANLP</fixed-case> 2019 Multilingual Headline Generation Task Overview MarinaLitvak - John M.Conroy - Peter A.Rankel + John M.Conroy + Peter A.Rankel 1–5 The objective of the 2019 RANLP Multilingual Headline Generation (HG) Task is to explore some of the challenges highlighted by current state of the art approaches on creating informative headlines to news articles: non-descriptive headlines, out-of-domain training data, generating headlines from long documents which are not well represented by the head heuristic, and dealing with multilingual domain. This tasks makes available a large set of training data for headline generation and provides an evaluation methods for the task. Our data sets are drawn from Wikinews as well as Wikipedia. Participants were required to generate headlines for at least 3 languages, which were evaluated via automatic methods. A key aspect of the task is multilinguality. The task measures the performance of multilingual headline generation systems using the Wikipedia and Wikinews articles in multiple languages. The objective is to assess the performance of automatic headline generation techniques on text documents covering a diverse range of languages and topics outside the news domain. W19-8901 diff --git a/data/xml/W77.xml b/data/xml/W77.xml index 9f43ed37ed..71e803e644 100644 --- a/data/xml/W77.xml +++ b/data/xml/W77.xml @@ -77,7 +77,7 @@ <fixed-case>DAN</fixed-case>w<fixed-case>ORD</fixed-case> – Hyppighedsundersøgelser i moderne dansk (<fixed-case>DAN</fixed-case>w<fixed-case>ORD</fixed-case> – Frequency surveys in modern <fixed-case>D</fixed-case>anish) [In <fixed-case>D</fixed-case>anish] - BenteMaegaard + BenteMaegaard HanneRuus 65–74 W77-0109 @@ -107,7 +107,7 @@ Chartanalys och morfologi (Chart analysis and morphology) [In <fixed-case>S</fixed-case>wedish] - AnnaSågvall Hein + AnnaSågvall Hein 87–93 W77-0113 sagvall-hein-1977-chartanalys diff --git a/data/xml/W79.xml b/data/xml/W79.xml index 966bdb5860..c223bf5f6a 100644 --- a/data/xml/W79.xml +++ b/data/xml/W79.xml @@ -4,7 +4,7 @@ Proceedings of the 2nd Nordic Conference of Computational Linguistics (NODALIDA 1979) W79-01 - BenteMaegaard + BenteMaegaard Institut for Anvendt og Matematisk Lingvistik, University of Copenhagen, Denmark
Copenhagen, Denmark
October diff --git a/data/xml/W81.xml b/data/xml/W81.xml index f0c3965785..a8f569db22 100644 --- a/data/xml/W81.xml +++ b/data/xml/W81.xml @@ -59,7 +59,7 @@
Experience with <fixed-case>COMMENTATOR</fixed-case>, a computer system simulating verbal behaviour - MilanBílý + MilanBílý 39–46 W81-0107 bily-1981-experience @@ -94,7 +94,7 @@ <fixed-case>U</fixed-case>ppsala Chart Parser, Version 2 (<fixed-case>UCP</fixed-case>-2) – En översikt (<fixed-case>U</fixed-case>ppsala Chart Parser, Version 2 (<fixed-case>UCP</fixed-case>-2) – An overview) [In <fixed-case>S</fixed-case>wedish] - AnnaSågvall Hein + AnnaSågvall Hein 95–116 W81-0112 sagvall-hein-1981-uppsala diff --git a/data/xml/W83.xml b/data/xml/W83.xml index 948ed90998..449e3f6a56 100644 --- a/data/xml/W83.xml +++ b/data/xml/W83.xml @@ -4,7 +4,7 @@ Proceedings of the 4th Nordic Conference of Computational Linguistics (NODALIDA 1983) W83-01 - Anna SågvallHein + Anna SågvallHein Centrum för datorlingvistik, Uppsala University, Sweden
Uppsala, Sweden
May @@ -89,10 +89,10 @@
Knowledge Engineering Applied to Morphological Analysis - HarriJäppinen - AarnoLehtola - EsaNelimarkka - MattiYlilammi + HarriJäppinen + AarnoLehtola + EsaNelimarkka + MattiYlilammi 111–120 W83-0111 jappinen-etal-1984-knowledge @@ -120,23 +120,23 @@ <fixed-case>HP</fixed-case> – A Heuristic Finite State Parser Based on Morphology - GunnelKällgren + GunnelKällgren 155–161 W83-0115 kallgren-1984-hp Regelformalismer til brug ved datamatisk lingvistik (Rule formalisms for use in computational linguistics) [In <fixed-case>D</fixed-case>anish] - BenteMaegaard + BenteMaegaard 162–168 W83-0116 maegaard-1984-regelformalismer A Computational Model of <fixed-case>F</fixed-case>innish Sentence Structure - EsaNelimarkka - HarriJäppinen - AarnoLehtola + EsaNelimarkka + HarriJäppinen + AarnoLehtola 169–177 W83-0117 nelimarkka-etal-1984-computational diff --git a/data/xml/W85.xml b/data/xml/W85.xml index ede1febbe4..94f5290756 100644 --- a/data/xml/W85.xml +++ b/data/xml/W85.xml @@ -51,7 +51,7 @@ A self-extending lexicon: description of a word learning program - EvaEjerhed + EvaEjerhed HankBromley 59–72 W85-0106 @@ -88,7 +88,7 @@ <fixed-case>REFTEX</fixed-case> – et datamatstøttet oversættelsessystem (<fixed-case>REFTEX</fixed-case> – A computer-assisted translation system) [In <fixed-case>D</fixed-case>anish] - Poul SørenKjærsgaard + Poul SørenKjærsgaard 121–130 W85-0111 kjaersgaard-1986-reftex @@ -109,7 +109,7 @@ <fixed-case>DPL</fixed-case> – a computational method for describing grammars and modelling parsers - AarnoLehtola + AarnoLehtola 151–159 W85-0114 lehtola-1986-dpl @@ -137,7 +137,7 @@ A two-level description of written <fixed-case>F</fixed-case>rench - AnnetteÖstling Andersson + AnnetteÖstling Andersson 195–202 W85-0118 ostling-andersson-1986-two diff --git a/data/xml/W87.xml b/data/xml/W87.xml index f8a4dbc816..b89c9b0849 100644 --- a/data/xml/W87.xml +++ b/data/xml/W87.xml @@ -16,7 +16,7 @@ What good is Syntactic Information in the Lexicon of a Syntactic Parser? - GunnelKällgren + GunnelKällgren 5–16 W87-0101 kallgren-1988-good @@ -37,7 +37,7 @@ <fixed-case>AWARE</fixed-case> – <fixed-case>DAG</fixed-case>-transformations for Semantic Analysis - AarnoLehtola + AarnoLehtola TimoHonkela 58–68 W87-0104 @@ -46,7 +46,7 @@ Predication Graphs as Canonical Representation of Query Sentences TimoHonkela - AarnoLehtola + AarnoLehtola K.Valkonen 69–77 W87-0105 @@ -82,7 +82,7 @@ Processing Sentences Clause by Clause - EvaEjerhed + EvaEjerhed 155–169 W87-0110 ejerhed-1988-processing @@ -148,7 +148,7 @@ Simulering af relationel database (Simulation of relational databases) [In <fixed-case>D</fixed-case>anish] - BodilNistrup Madsen + BodilNistrup Madsen 286–300 W87-0119 nistrup-madsen-1988-simulering diff --git a/data/xml/W89.xml b/data/xml/W89.xml index 68fa0bd16e..f88ad46934 100644 --- a/data/xml/W89.xml +++ b/data/xml/W89.xml @@ -5,7 +5,7 @@ Proceedings of the 7th Nordic Conference of Computational Linguistics (NODALIDA 1989) W89-01 JörgenPind - EiríkurRögnvaldsson + EiríkurRögnvaldsson Institute of Lexicography, Institute of Linguistics, University of Iceland, Iceland
Reykjavík, Iceland
1990 @@ -24,7 +24,7 @@
A <fixed-case>S</fixed-case>wedish Clause Grammar And Its Implementation - EvaEjerhed + EvaEjerhed 14–29 W89-0102 ejerhed-1990-swedish @@ -45,14 +45,14 @@ Is Two-level Morphology a Morphological Model? - Janne BondiJohannessen + Janne BondiJohannessen 51–59 W89-0105 johannessen-1990-two Automatic Indexing and Generating of Content Graphs from Unrestricted Text - GunnelKällgren + GunnelKällgren 60–76 W89-0106 kallgren-1990-automatic @@ -108,14 +108,14 @@ Representational Issues within <fixed-case>E</fixed-case>urotra - HanneFersøe + HanneFersøe 157–169 W89-0114 fersoe-1990-representational Identifiering av diskursrefenter vid maskinöversättning från ryska till svenska (Identification of dicourse references in machine translation from <fixed-case>R</fixed-case>ussian to <fixed-case>S</fixed-case>wedish) [In <fixed-case>S</fixed-case>wedish] - BarbaraGawrońska-Werngren + BarbaraGawrońska-Werngren 170–182 W89-0115 gawronska-werngren-1990-identifiering @@ -136,7 +136,7 @@ Collocations in Knowledge Based Machine Translation - GuðrúnMagnúsdóttir + GuðrúnMagnúsdóttir 204–207 W89-0118 magnusdottir-1990-collocations @@ -192,7 +192,7 @@ Application-Dependent Discourse Management for Natural Language Interfaces: An Empirical Investigation - ArneJönsson + ArneJönsson 297–307 W89-0126 jonsson-1990-application @@ -214,7 +214,7 @@ Lemmatising the Definitions of Svensk Ordbok by Morphological and Syntactic Analysis. A Pilot Study - AnnaSågvall Hein + AnnaSågvall Hein 342–357 W89-0129 sagvall-hein-1990-lemmatising @@ -230,7 +230,7 @@ Proceedings of the First International Workshop on Parsing Technologies - MasaruTomita + MasaruTomita Carnegy Mellon University
Pittsburgh, Pennsylvania, USA
August @@ -245,7 +245,7 @@ Unification and Classification: An Experiment in Information-Based Parsing - Robert T.Kasper + Robert T.Kasper 1–7 When dealing with a phenomenon as vast and com plex as natural language, an experimental approach is often the best way to discover new computational methods and determine their usefulness. The experimental process includes designing and selecting new experiments, carrying out the experiments, and evaluating the experiments. Most conference presentations are about finished experiments, completed theoretical results, or the evaluation of systems already in use. In this workshop setting, I would like to depart from this tendency to discuss some experiments that we are beginning to perform, and the reasons for investigating a particular approach to parsing. This approach builds on recent work in unification-based parsing and classification-based knowledge representation, developing an architecture that brings together the capabilities of these related frameworks. W89-0201 @@ -261,10 +261,10 @@ An Overview of Disjunctive Constraint Satisfaction - John T.Maxwell III - Ronald M.Kaplan + John T.Maxwell III + Ronald M.Kaplan 18–27 - + W89-0203 maxwell-iii-kaplan-1989-overview @@ -304,7 +304,7 @@ The Computational Implementation of Principle-Based Parsers SandiwayFong - Robert C.Berwick + Robert C.Berwick 75–84 This paper addresses the issue of how to organize linguistic principles for efficient processing. Based on the general characterization of principles in terms of purely computational properties, the effects of principle-ordering on parser performance are investigated. A novel parser that exploits the possible variation in principle-ordering to dynamically re-order principles is described. Heuristics for minimizing the amount of unnecessary work performed during the parsing process are also discussed. W89-0208 @@ -387,18 +387,18 @@ Parsing Spoken Language Using Combinatory Grammars - MarkSteedman + MarkSteedman 162–171 - + W89-0217 steedman-1989-parsing Recognition of <fixed-case>C</fixed-case>ombinatory <fixed-case>C</fixed-case>ategorial <fixed-case>G</fixed-case>rammars and Linear Indexed Grammars - K.Vijay-Shanker - David J.Weir + K.Vijay-Shanker + David J.Weir 172–181 - + W89-0218 vijay-shanker-weir-1989-recognition @@ -428,7 +428,7 @@
Probabilistic Parsing for Spoken Language Applications - StephanieSeneff + StephanieSeneff 209–218 A new natural language system, TINA, has been developed for applications involving spoken language tasks, which integrate key ideas from context free grammars, Augmented Transition Networks (ATN’s) [6], and Lexical Functional Grammars (LFG’s) [1]. The parser uses a best-first strategy, with probability assignments on all arcs obtained automatically from a set of example sentences. An initial context-free grammar, derived from the example sentences, is first converted to a probabilistic network structure. Control includes both top-down and bottom-up cycles, and key parameters are passed among nodes to deal with long-distance movement, agreement, and semantic constraints. The probabilities provide a natural mechanism for exploring more common grammatical constructions first. One novel feature of TINA is that it provides an atuomatic sentence generation capability, which has been very effective for identifying overgeneration problems. A fully integrated spoken language system using this parser is under development. W89-0222 @@ -445,7 +445,7 @@ A Connectionist Parser Aimed at Spoken Language AjayJain - AlexWaibel + AlexWaibel 221–229 We describe a connectionist model which learns to parse single sentences from sequential word input. A parse in the connectionist network contains information about role assignment, prepositional attachment, relative clause structure, and subordinate clause structure. The trained network displays several interesting types of behavior. These include predictive ability, tolerance to certain corruptions of input word sequences, and some generalization capability. We report on experiments in which a small number of sentence types have been successfully learned by a network. Work is in progress on a larger database. Application of this type of connectionist model to the area of spoken language processing is discussed. W89-0224 @@ -471,9 +471,9 @@ Complexity and Decidability in Left-Associative Grammar - RolandHausser + RolandHausser 254–263 - + W89-0227 hausser-1989-complexity @@ -487,7 +487,7 @@ Finite State Machines from Feature Grammars - Alan WBlack + Alan WBlack 277–285 This paper describes the conversion of a set of feature grammar rules into a deterministic finite state machine that accepts the same language (or at least a well-defined related language). First the reasoning behind why this is an interesting thing to do within the Edinburgh speech recogniser project, is discussed. Then details about the compilation algorithm are given. Finally, there is some discussion of the advantages and disadvantages of this method of implementing feature based grammar formalisms. W89-0229 @@ -522,9 +522,9 @@ Chart Parsing for Loosely Coupled Parallel Systems - Henry S.Thompson + Henry S.Thompson 320–328 - + W89-0233 thompson-1989-chart @@ -540,7 +540,7 @@ The Relevance of Lexicalization to Parsing YvesSchabes - Aravind K.Joshi + Aravind K.Joshi 339–349 In this paper, we investigate the processing of the so-called ‘lexicalized’ grammar. In ‘lexicalized’ grammars (Schabes, Abeille and Joshi, 1988), each elementary structure is systema tically associated with a lexical ‘head’. These structures specify extended domains of locality (as compared to CFGs) over which constraints can be stated. The ‘grammar’ consists of a lexicon where each lexical item is associated with a finite number of structures for which that item is the ‘head’ . There are no separate grammar rules. There are, of course, ‘rules’ which tell us how these structures are combined. A general two-pass parsing strategy for ‘lexicalized’ grammars follows naturally. In the first stage, the parser selects a set of elementary structures associated with the lexical items in the input sentence, and in the second stage the sentence is parsed with respect to this set. We evaluate this strategy with respect to two characteristics. First, the amount of filtering on the entire grammar is evaluated: once the first pass is performed, the parser uses only a subset of the grammar. Second, we evaluate the use of non-local information: the structures selected during the first pass encode the morphological value (and therefore the position in the string) of their ‘head’; this enables the parser to use non-local in form ation to guide its search. We take Lexicalized Tree Adjoining Grammars as an in stance of lexicalized grammar. We illustrate the organization of the grammar. Then we show how a general Earley-type TAG parser (Schabes and Joshi, 1988) can take advantage of lexicalization. Empirical data show that the filtering of the grammar and the non-local in formation provided by the two-pass strategy improve the performance of the parser. We explain how constraints over the elementary structures expressed by unification equations can be parsed by a simple extension of the Earley-type TAG parser. Lexicalization guarantees termination of the algorithm without special devices such as restrictors. W89-0235 @@ -566,7 +566,7 @@ Analysis Techniques for <fixed-case>K</fixed-case>orean Sentences Based on <fixed-case>L</fixed-case>exical <fixed-case>F</fixed-case>unctional <fixed-case>G</fixed-case>rammar Deok HoYoon - Yung TaekKim + Yung TaekKim 369–378 The Unification-based Grammars seem to be adequate for the analysis of agglutinative languages such as Korean, etc. In this paper, the merits of Lexical Functional Grammar is analyzed and the structure of Korean Syntactic Analyzer is described. Verbal complex category is used for the analysis of several linguistic phenomena and a new attribute of UNKNOWN is defined for the analysis of grammatical relations. W89-0238 @@ -584,10 +584,10 @@ Parsing, Word Associations and Typical Predicate-Argument Relations - KennethChurch - WilliamGale + KennethChurch + WilliamGale PatrickHanks - DonaldHindle + DonaldHindle 389–398 There are a number of collocational constraints in natural languages that ought to play a more important role in natural language parsers. Thus, for example, it is hard for most parsers to take advantage of the fact that wine is typically drunk, produced, and sold, but (probably) not pruned. So too, it is hard for a parser to know which verbs go with which prepositions (e.g., set up) and which nouns fit together to form compound noun phrases (e.g., computer programmer). This paper will attempt to show that many of these types of concerns can be addressed with syntactic methods (symbol pushing), and need not require explicit semantic interpretation. We have found that it is possible to identify many of these interesting co-occurrence relations by computing simple summary statistics over millions of words of text. This paper will summarize a number of experiments carried out by various subsets of the authors over the last few years. The term collocation will be used quite broadly to include constraints on SVO (subject verb object) triples, phrasal verbs, compound noun phrases, and psychoiinguistic notions of word association (e.g., doctor/nurse). W89-0240 @@ -605,7 +605,7 @@ <fixed-case>PREMO</fixed-case>: Parsing by Conspicuous Lexical Consumption Brian M.Slator - YorickWilks + YorickWilks 401–413 PREMO is a knowledge-based Preference Semantics parser with access to a large, lexical semantic knowledge base and organized along the lines of an operating system. The state of every partial parse is captured in a structure called a language object, and the control structure of the preference machine is a priority queue of these language objects. The language object at the front of the queue has the highest score as computed by a preference metric that weighs grammatical predictions, semantic type matching, and pragmatic coherence. The highest priority language object is the intermediate reading that is currently most preferred (the others are still “alive,” but not actively pursued); in this way the preference machine avoids combinatorial explosion by following a “best-first” strategy for parsing. The system has clear extensions into parallel processing. W89-0242 @@ -621,9 +621,9 @@ A Broad-Coverage Natural Language Analysis System - KarenJensen + KarenJensen 425–441 - + W89-0244 jensen-1989-broad @@ -637,9 +637,9 @@ A Dependency-Based Parser for Topic and Focus - EvaHajičová + EvaHajičová 448–457 - + W89-0246 hajicova-1989-dependency diff --git a/data/xml/W90.xml b/data/xml/W90.xml index dec565a8e0..9eee102094 100644 --- a/data/xml/W90.xml +++ b/data/xml/W90.xml @@ -3,9 +3,9 @@ Proceedings of the Fifth International Workshop on Natural Language Generation - Kathleen R.McKeown - Johanna D.Moore - SergeiNirenburg + Kathleen R.McKeown + Johanna D.Moore + SergeiNirenburg Association for Computational Linguistics
Linden Hall Conference Center, Dawson, Pennsylvania
June @@ -18,22 +18,22 @@ Using <fixed-case>T</fixed-case>ree <fixed-case>A</fixed-case>djoining <fixed-case>G</fixed-case>rammars Systemic Framework in the - Kathleen F.McCoy - K.Vijay-Shanker + Kathleen F.McCoy + K.Vijay-Shanker GijooYang W90-0101 mccoy-etal-1990-using Generation and Synchronous <fixed-case>T</fixed-case>ree-<fixed-case>A</fixed-case>djoining <fixed-case>G</fixed-case>rammars - Stuart M.Shieber + Stuart M.Shieber YvesSchabes W90-0102 shieber-schabes-1990-generation A Connectionist Treatment of Grammar for Generation: Relying on Emergents - NigelWard + NigelWard W90-0103 ward-1990-connectionist @@ -46,7 +46,7 @@ A collocational based approach to salience-sensitive lexical selection LeoWanner - John A.Bateman + John A.Bateman W90-0105 wanner-bateman-1990-collocational @@ -65,19 +65,19 @@
Upper Modeling: organizing knowledge for natural language processing - John A.Bateman + John A.Bateman W90-0108 bateman-1990-upper Abstract Linguistic Resources for Text Planning - Marie W.Meteer + Marie W.Meteer W90-0109 meteer-1990-abstract Using Discourse Focus, Temporal Focus, and Spatial Focus to Generate Multisentential Text - Mark T.Maybury + Mark T.Maybury W90-0110 maybury-1990-using @@ -90,7 +90,7 @@
Domain Communication Knowledge - OwenRambow + OwenRambow W90-0112 rambow-1990-domain @@ -102,15 +102,15 @@
The Role of Underlying Structure in Text Generation - Robert AlanGranville + Robert AlanGranville W90-0114 granville-1990-role The Basic Block Model of Extended Explanations David J.Mooney - SandraCarberry - Kathleen F.McCoy + SandraCarberry + Kathleen F.McCoy W90-0115 mooney-etal-1990-basic @@ -122,7 +122,7 @@
Parsimonious and Profligate Approaches to the Question of Discourse Structure Relations - Eduard H.Hovy + Eduard H.Hovy W90-0117 hovy-1990-parsimonious @@ -160,14 +160,14 @@
Relational-Grammar-Based Generation in the <fixed-case>JETS</fixed-case> <fixed-case>J</fixed-case>apanese-<fixed-case>E</fixed-case>nglish Machine Translation System - David E.Johnson + David E.Johnson HideoWatanabe W90-0123 johnson-watanabe-1990-relational Real-Time Generation from Systemic Grammars - TerryPatten + TerryPatten Daniel S.Stoops W90-0124 patten-stoops-1990-real @@ -175,8 +175,8 @@ Narrated Animation: A Case for Generation NormanBadler - MarkSteedman - Bonnie LynnWebber + MarkSteedman + Bonnie LynnWebber W90-0125 badler-etal-1990-narrated @@ -186,7 +186,7 @@ Proceedings of the First International Workshop on Tree Adjoining Grammar and Related Frameworks (TAG+1) W90-02 KarinHarbusch - WolfgangWahlster + WolfgangWahlster Internationales Begegnungs- und Forschungszentrum für Informatik (IBFI)
Schloß Dagstuhl
August @@ -199,7 +199,7 @@ Formal properties of Synchronous <fixed-case>T</fixed-case>ree-<fixed-case>A</fixed-case>djoining <fixed-case>G</fixed-case>rammars - StuartShieber + StuartShieber 6–7 W90-0201 shieber-1990-formal @@ -223,14 +223,14 @@ Multicomponent <fixed-case>T</fixed-case>ree <fixed-case>A</fixed-case>djoining <fixed-case>G</fixed-case>rammars - DavidWeir + DavidWeir 9 W90-0204 weir-1990-multicomponent Embedded Pushdown Automata - K.Vijay-Shanker + K.Vijay-Shanker 10 W90-0205 vijay-shanker-1990-embedded @@ -260,7 +260,7 @@ Parallel <fixed-case>TAG</fixed-case> Parsing on the Connection Machine MichaelPalis - DavidWei + DavidWei 12–13 W90-0209 palis-wei-1990-parallel @@ -268,7 +268,7 @@ <fixed-case>T</fixed-case>ree <fixed-case>A</fixed-case>djoining <fixed-case>G</fixed-case>rammar, Segment Grammar and Incremental Sentence Generation GerardKempen - KoenraadDeSmedt + KoenraadDeSmedt 13–14 W90-0210 kempen-desmedt-1990-tree @@ -297,16 +297,16 @@ A <fixed-case>TAG</fixed-case> analysis of the Third construction in <fixed-case>G</fixed-case>erman - AnthonyKroch - BeatriceSantorini - AravindJoshi + AnthonyKroch + BeatriceSantorini + AravindJoshi 16–17 W90-0214 kroch-etal-1990-tag <fixed-case>F</fixed-case>rench and <fixed-case>E</fixed-case>nglish determiners: Interaction of morphology, syntax and semantics in <fixed-case>L</fixed-case>exicalized <fixed-case>T</fixed-case>ree <fixed-case>A</fixed-case>djoining <fixed-case>G</fixed-case>rammars - AnneAbeillé + AnneAbeillé 17–20 W90-0215 abeille-1990-french @@ -320,7 +320,7 @@ Coordination in <fixed-case>TAG</fixed-case> in the manner of <fixed-case>CCG</fixed-case> (Combinatory Category Grammars): Fixed vs. Flexible Phrase Structure - AravindJoshi + AravindJoshi 21 W90-0217 joshi-1990-coordination diff --git a/data/xml/W91.xml b/data/xml/W91.xml index 384493850e..ab7de0395c 100644 --- a/data/xml/W91.xml +++ b/data/xml/W91.xml @@ -19,13 +19,13 @@ Reversibility in a Constraint and Type based Logic Grammar: Application to Secondary Predication PalmiraMarrafa - PatrickSaint-Dizier + PatrickSaint-Dizier W91-0102 marrafa-saint-dizier-1991-reversibility Towards Uniform Processing of Constraint-based Categorial Grammars - Gertjanvan Noord + Gertjanvan Noord W91-0103 van-noord-1991-towards @@ -37,13 +37,13 @@ Reversibility and Modularity in Natural Language Generation - GunterNeumann + GunterNeumann W91-0105 neumann-1991-reversibility Reversible <fixed-case>NLP</fixed-case> by Deriving the Grammars From the Knowledge Base - David D.McDonald + David D.McDonald W91-0106 mcdonald-1991-reversible @@ -55,7 +55,7 @@ Generation and Translation Towards a Formalism-Independent Characterisation - Henry S.Thompson + Henry S.Thompson W91-0108 thompson-1991-generation @@ -64,31 +64,31 @@ JamesBarnett InderjeetMani PaulMartin - ElaineRich + ElaineRich W91-0109 barnett-etal-1991-reversible A Uniform Architecture for Parsing, Generation and Transfer - RemiZajac + RemiZajac W91-0110 zajac-1991-uniform Common Heuristics for Parsing, Generation, and Whatever... - HasidaKoiti + KoitiHasida W91-0111 hasida-1991-common A General Computational Method for Grammar Inversion - TomekStrzalkowski + TomekStrzalkowski W91-0112 strzalkowski-1991-general Compiling Trace & Unification Grammar for Parsing and Generation - Hans UlrichBlock + Hans UlrichBlock W91-0113 block-1991-compiling @@ -101,7 +101,7 @@
Handling Pragmatic Information With A Reversible Architecture - MasatoIshizaki + MasatoIshizaki W91-0115 ishizaki-1991-handling @@ -129,14 +129,14 @@ Knowledge representation and knowledge of words - Richmond H.Thomason + Richmond H.Thomason W91-0201 thomason-1991-knowledge Syntax-Driven and Ontology-Driven Lexical Semantics - SergeiNirenburg - LoriLevin + SergeiNirenburg + LoriLevin W91-0202 nirenburg-levin-1991-syntax @@ -160,7 +160,7 @@
Conventional Metaphor and the Lexicon - James H.Martin + James H.Martin W91-0206 martin-1991-conventional @@ -181,13 +181,13 @@ Lexical Operations in a Unification-based Framework AnnCopestake - TedBriscoe + TedBriscoe W91-0209 copestake-briscoe-1991-lexical Lexical Structures for Linguistic Inference - PeterAnick + PeterAnick SabineBergler W91-0210 anick-bergler-1991-lexical @@ -207,7 +207,7 @@ Lexical and World Knowledge: Theoretical and Applied Viewpoints - John S.White + John S.White W91-0213 white-1991-lexical @@ -225,7 +225,7 @@
For the Lexicon That Has Everything - MarthaEvens + MarthaEvens JoanneDardaine Yu-FenHuang Sun M.Li @@ -238,13 +238,13 @@ Acquiring and representing semantic information in a Lexical Knowledge Base - NicolettaCalzolari + NicolettaCalzolari W91-0217 calzolari-1991-acquiring General Lexical Representation for an Effect Predicate - MarthaPalmer + MarthaPalmer W91-0218 palmer-1991-general @@ -257,25 +257,25 @@
Presuppositions and Default Reasoning: A Study in Lexical Pragmatics - Robert E.Mercer + Robert E.Mercer W91-0220 mercer-1991-presuppositions Lexicon, Ontology and Text Meaning - Boyan A.Onyshkevych + Boyan A.Onyshkevych W91-0221 onyshkevych-1991-lexicon A Two-Level Knowledge Representation for Machine Translation: Lexical Semantics and Tense/Aspect - Bonnie J.Dorr + Bonnie J.Dorr W91-0222 dorr-1991-two The Autonomy of Shallow Lexical Knowledge - KathleenDahlgren + KathleenDahlgren W91-0223 dahlgren-1991-autonomy @@ -327,7 +327,7 @@
A Unification-based Grammar of Serial Verb Constructions - Adams B.Bodomo + Adams B.Bodomo 41–56 W91-0304 bodomo-1992-unification @@ -372,7 +372,7 @@ On the Coverage of a Morphological Analyser based on “Svensk Ordbok” [A Dictionary of <fixed-case>S</fixed-case>wedish] - AnnaSågvall Hein + AnnaSågvall Hein 119–131 W91-0310 sagvall-hein-1992-coverage @@ -401,7 +401,7 @@ Anaphora and Intensionality in Classical Logic - JørgenVilladsen + JørgenVilladsen 165–176 W91-0314 villadsen-1992-anaphora @@ -416,7 +416,7 @@ A <fixed-case>S</fixed-case>wedish Core Vocabulary for Machine Translation - AnnetteÖstling + AnnetteÖstling 187–198 W91-0316 ostling-1992-swedish diff --git a/data/xml/W93.xml b/data/xml/W93.xml index d016ef3390..89d4c59350 100644 --- a/data/xml/W93.xml +++ b/data/xml/W93.xml @@ -12,7 +12,7 @@ Word Sense Disambiguation by Human Subjects: Computational and Psycholinguistic Applications - ThomasAhlswede + ThomasAhlswede DavidLorand W93-0101 ahlswede-lorand-1993-word @@ -21,7 +21,7 @@ Towards Building Contextual Representations of Word Senses Using Statistical Models ClaudiaLeacock GeoffreyTowell - EllenVoorhees + EllenVoorhees W93-0102 leacock-etal-1993-towards @@ -29,7 +29,7 @@ Lexical Concept Acquisition from Collocation Map Young S.Han Young KyoonHan - Key-SunChoi + Key-SunChoi W93-0103 han-etal-1993-lexical @@ -43,7 +43,7 @@ Identifying Unknown Proper Names in Newswire Text InderjeetMani T. RichardMacmillan - SusannLuperfoy + SusannLuperfoy ElaineLusher SharonLaskowski W93-0105 @@ -51,16 +51,16 @@
Customizing a Lexicon to Better Suit a Computational Task - MartiHearst - HinrichSchuetze + MartiHearst + HinrichSchuetze W93-0106 hearst-schuetze-1993-customizing Hierarchical Clustering of Verbs - RobertoBasili - MariaPazienza - PaolaVelardi + RobertoBasili + MariaPazienza + PaolaVelardi W93-0107 basili-etal-1993-hierarchical @@ -74,22 +74,22 @@ The Automatic Acquisition of Frequencies of Verb Subcategorization Frames from Tagged Corpora AkiraUshioda - David A.Evans + David A.Evans TedGibson - AlexWaibel + AlexWaibel W93-0109 ushioda-etal-1993-automatic Acquiring Predicate-Argument Mapping Information from Multilingual Texts ChinatsuAone - DouglasMcKee + DouglasMcKee W93-0110 aone-mckee-1993-acquiring Experiments in Syntactic and Semantic Classification and Disambiguation using Bootstrapping - RobertFutrelle + RobertFutrelle SusanGauch W93-0111 futrelle-gauch-1993-experiments @@ -109,16 +109,16 @@ Categorization and Standardizing Proper Nouns for Efficient Information Retrieval WoojinPaik - ElizabethLiddy - EdmundYu + ElizabethLiddy + EdmundYu MaryMcKenna W93-0114 paik-etal-1993-categorization The Long Journey from the Core to the Real Size of Large <fixed-case>LDB</fixed-case>s - ElenaPaskaleva - KirilSimov + ElenaPaskaleva + KirilSimov MarianaDamova MilenaSlavcheva W93-0115 @@ -137,9 +137,9 @@ Rhetorical Relations: Necessary But Not Sufficient - SandraCarberry - JenniferChu - NancyGreen + SandraCarberry + JenniferChu + NancyGreen LynnLambert W93-0201 carberry-etal-1993-rhetorical @@ -153,7 +153,7 @@ Knowledge, Intention, Rhetoric: Levels of Variation in Multilingual Instructions JudyDelin - DoniaScott + DoniaScott TonyHartley W93-0203 delin-etal-1993-knowledge @@ -166,7 +166,7 @@ A Goal-Based Grammar of Rhetoric - ChrysanneDiMarco + ChrysanneDiMarco GraemeHirst MarzenaMakuta-Giluk W93-0205 @@ -174,20 +174,20 @@ An Algorithm for High-Level Organization of Multi-Paragraph Texts - RobertGranville + RobertGranville W93-0206 granville-1993-algorithm Planning for Intentions with Rhetorical Relations - SusanHaller + SusanHaller W93-0207 haller-1993-planning Structuring Two-Medium Dialog for Learning Language and Other Things HenryHamburger - DanTufis + DanTufis RazaHashim W93-0208 hamburger-etal-1993-structuring @@ -200,14 +200,14 @@ In Defense of Syntax: Informational, Intentional, and Rhetorical Structures in Discourse - EduardHovy + EduardHovy W93-0210 hovy-1993-defense Observations and Directions in Text Structure JohnHughes - KathleenMcCoy + KathleenMcCoy W93-0211 hughes-mccoy-1993-observations @@ -226,7 +226,7 @@ Towards Stratification of <fixed-case>RST</fixed-case> TanyaKorelsky - RichardKittredge + RichardKittredge W93-0214 korelsky-kittredge-1993-towards @@ -238,82 +238,82 @@ Empirical Evidence for Intention-Based Discourse Segmentation - Diane J.Litman - Rebecca J.Passonneau + Diane J.Litman + Rebecca J.Passonneau W93-0216 litman-passonneau-1993-empirical The Need for Intentionally-Based Approaches to Language - KarenLochbaum + KarenLochbaum W93-0217 lochbaum-1993-need Intentions in Bilingual Dialogue Processing - SusannLuperFoy + SusannLuperFoy W93-0218 luperfoy-1993-intentions The Representation of Interdependencies between Communicative Goals and Rhetorical Relations in the Framework of Multimedia Document Generation - ElisabethMaier + ElisabethMaier W93-0219 maier-1993-representation On Structure and Intention - MarkMaybury + MarkMaybury W93-0220 maybury-1993-structure Textual Constraints, Rhetorical <fixed-case>RE</fixed-case>lations and Communicative Goals and Rhetorical Relations in the Framework of Multimedia Document Generation - ElisabethMaier + ElisabethMaier W93-0221 maier-1993-textual Assumption Underlying Discourse Relations: Which Ones are Really There and Where are They? - MarieMeteer + MarieMeteer W93-0222 meteer-1993-assumption How Could Rhetorical Relations Be Used in Machine Translation? - RuslanMitkov + RuslanMitkov W93-0223 mitkov-1993-rhetorical On the Necessity of Intentions and the Usefulness of Rhetorical Relations: A Position Paper - VibhuMittal - CecileParis + VibhuMittal + CecileParis W93-0224 mittal-paris-1993-necessity Investigating Discourse Relations MeganMoser - JohannaMoore + JohannaMoore W93-0225 moser-moore-1993-investigating Intentions, Information, and Inference: Two Rhetorical Questions - JonOberlander + JonOberlander W93-0226 oberlander-1993-intentions Rhetoric as Knowledge - OwenRambow + OwenRambow W93-0227 rambow-1993-rhetoric Intentions, Rhetoric, or Discourse Relations ? A Case from Multilingual Document Generation - DietmarRosner + DietmarRosner W93-0228 rosner-1993-intentions @@ -339,13 +339,13 @@ On Discourse Relations, Rhetorical Relations and Rhetoric - CandaceSidner + CandaceSidner W93-0232 sidner-1993-discourse Summarising as a Lever for Studying Large-Scale Discourse Structure - KarenSparck Jones + KarenSparck Jones W93-0233 sparck-jones-1993-summarising @@ -357,7 +357,7 @@ Rhetorical Relations, Action and Intentionality in Conversation - DavidTraum + DavidTraum W93-0235 traum-1993-rhetorical @@ -375,19 +375,19 @@
Information and Deliberation in Discourse - MarilynWalker + MarilynWalker W93-0238 walker-1993-information Issues in Linguistic Segmentation - JanyceWiebe + JanyceWiebe W93-0239 wiebe-1993-issues Closing the Gap Between Discourse Structure and Communicative Intention - Horng Jyh PaulWu + Horng Jyh PaulWu W93-0240 wu-1993-closing @@ -405,20 +405,20 @@ Robust Bilingual Word Alignment for Machine Aided Translation IdoDagan - KennethChurch + KennethChurch WillianGale W93-0301 dagan-etal-1993-robust Robust Text Processing in Automated Information Retrieval - TomekStrzalkowski + TomekStrzalkowski W93-0302 strzalkowski-1993-robust-text Document Filtering using Semantic Information from a Machine Readable Dictionary - Elizabeth D.Liddy + Elizabeth D.Liddy WoojinPaik W93-0303 liddy-paik-1993-document @@ -432,7 +432,7 @@ <fixed-case>HMM</fixed-case>-Based Part-of-Speech Tagging for <fixed-case>C</fixed-case>hinese Corpora Chao-HuangChang - Cheng-derChen + Cheng-derChen W93-0305 chang-chen-1993-hmm @@ -445,7 +445,7 @@ Structural Ambiguity and Conceptual Relations PhilipResnik - Marti A.Hearst + Marti A.Hearst W93-0307 resnik-hearst-1993-structural @@ -478,8 +478,8 @@ Example-Based Sense Tagging of Running <fixed-case>C</fixed-case>hinese Text XiangTong - Chang-ningHuang - Cheng-mingGuo + Chang-ningHuang + Cheng-mingGuo W93-0312 tong-etal-1993-example @@ -537,7 +537,7 @@ Porting a Stochastic Part-of-Speech Tagger to <fixed-case>S</fixed-case>wedish - DouglassCutting + DouglassCutting 65–70 W93-0405 cutting-1994-porting @@ -545,7 +545,7 @@ Tagging Experiments Using Neural Networks MartinEineborg - BjörnGambäck + BjörnGambäck 71–81 W93-0406 eineborg-gamback-1994-tagging @@ -559,7 +559,7 @@ On Implementing <fixed-case>S</fixed-case>wedish Tense and Aspect - BjörnGambäck + BjörnGambäck 97–109 W93-0408 gamback-1994-implementing @@ -581,7 +581,7 @@ From Semantic Representations to <fixed-case>SQL</fixed-case> Queries PerAnker Jensen - BodilNistrup Madsen + BodilNistrup Madsen AnnieStahél CarlVikner 133–142 @@ -591,7 +591,7 @@ Clustering Sentences – Making Sense of Synonymous Sentences JussiKarlgren - BjörnGambäck + BjörnGambäck ChristerSamuelsson 143–154 W93-0412 @@ -671,7 +671,7 @@ Preferences and Linguistic Choices in the Multra Machine Translation System - AnnaSågvall Hein + AnnaSågvall Hein 267–276 W93-0423 sagvall-hein-1994-preferences diff --git a/data/xml/W94.xml b/data/xml/W94.xml index 17f2f6cbb4..8e9f66ca6d 100644 --- a/data/xml/W94.xml +++ b/data/xml/W94.xml @@ -16,21 +16,21 @@ INVITED TALK: Qualitative and Quantitative Designs for Speech Translation - HiyanAlshawi + HiyanAlshawi W94-0101 alshawi-1994-invited The Noisy Channel and the Braying Donkey - RobertoBasili - Maria TeresaPazienza - PaolaVelardi + RobertoBasili + Maria TeresaPazienza + PaolaVelardi W94-0102 basili-etal-1994-noisy <fixed-case>AMALGAM</fixed-case>: Automatic Mapping Among Lexico-Grammatical Annotation Models - EricAtwell + EricAtwell JohnHughes CliveSouter W94-0103 @@ -38,14 +38,14 @@ Study and Implementation of Combined Techniques for Automatic Extraction of Terminology - BeatriceDaille + BeatriceDaille W94-0104 daille-1994-study Parsing with Principles and Probabilities AndrewFordham - MatthewCrocker + MatthewCrocker W94-0105 fordham-crocker-1994-parsing @@ -57,8 +57,8 @@ Complexity of Description of Primitives: Relevance to Local Statistical Computations - Aravind K.Joshi - B.Srinivas + Aravind K.Joshi + B.Srinivas W94-0107 joshi-srinivas-1994-complexity @@ -71,21 +71,21 @@ Integrating Symbolic and Statistical Approches in Speech and Natural Language Applications - MarieMeteer - HerbertGish + MarieMeteer + HerbertGish W94-0109 meteer-gish-1994-integrating Combining Linguistic with Statistical Methods in Automatic Speech Understanding - PattiPrice + PattiPrice W94-0110 price-1994-combining Exploring the Statistical Derivation of Transformational Rule Sequences for Part-of-Speech Tagging - Lance A.Ramshaw - Mitchell P.Marcus + Lance A.Ramshaw + Mitchell P.Marcus W94-0111 ramshaw-marcus-1994-exploring @@ -97,8 +97,8 @@ Recovering From Parser Failures: A Hybrid Statistical/Symbolic Approach - Carolyn PensteinRose - AlexWaibel + Carolyn PensteinRose + AlexWaibel W94-0113 rose-waibel-1994-recovering @@ -111,7 +111,7 @@ Learning a Radically Lexical Grammar DannySoloman - Mary McGeeWood + Mary McGeeWood W94-0115 soloman-wood-1994-learning @@ -141,14 +141,14 @@ Constraints, Exceptions and Representations - T. MarkEllison + T. MarkEllison W94-0203 ellison-1994-constraints Default Finite State Machines and Finite State Phonology GeraldPenn - RichmondThomason + RichmondThomason W94-0204 penn-thomason-1994-default @@ -198,13 +198,13 @@ <fixed-case>DPOCL</fixed-case>: A Principled Approach To Discourse Planning R. MichaelYoung - Johanna D.Moore + Johanna D.Moore W94-0302 young-moore-1994-dpocl Building Underlying Structures for Multiparagraph Texts - RobertGranville + RobertGranville W94-0303 granville-1994-building @@ -223,8 +223,8 @@
Intentions, Structure and Expression in Multi-Lingual Instructions - Cecile L.Paris - Donia R.Scott + Cecile L.Paris + Donia R.Scott W94-0306 paris-scott-1994-intentions @@ -238,9 +238,9 @@ Expressing Procedural Relationships in Multilingual Instructions JudyDelin - AnthonyHartley - CecileParis - DoniaScott + AnthonyHartley + CecileParis + DoniaScott KeithVander Linden W94-0308 delin-etal-1994-expressing @@ -253,7 +253,7 @@ On the Creative Use of Language: The Form of Lexical Resources - David D.McDonald + David D.McDonald FedericaBusa W94-0310 mcdonald-busa-1994-creative @@ -261,13 +261,13 @@ Semantic Lexicons: The Cornerstone for Lexical Choice in Natural Language Generation EvelyneViegas - PierretteBouillon + PierretteBouillon W94-0311 viegas-bouillon-1994-semantic Generating Event Descriptions with Sage: A Simulation and Generation Environment - MarieMeteer + MarieMeteer W94-0312 meteer-1994-generating @@ -304,7 +304,7 @@ Towards the Application of Text Generation in an Integrated Publication System ElkeTeich - JohnBateman + JohnBateman W94-0318 teich-bateman-1994-towards @@ -316,35 +316,35 @@ The Role of Cognitive Modeling in Communicative Intentions - OwenRambow - MarilynWalker + OwenRambow + MarilynWalker W94-0320 rambow-walker-1994-role Recognizing Digressive Questions Using a Model for Interactive Generation for Interactive Generation - Susan M.Haller + Susan M.Haller W94-0321 haller-1994-recognizing Generating Indirect Answers to Yes-No Questions - NancyGreen - SandraCarberry + NancyGreen + SandraCarberry W94-0322 green-carberry-1994-generating Real-Time Natural Language Generation in <fixed-case>NL</fixed-case>-<fixed-case>SOAR</fixed-case> RobertRubinoff - Jill FainLehman + Jill FainLehman W94-0323 rubinoff-lehman-1994-real Generating Cooperative System Responses in Information Retrieval Dialogues MarkusFischer - ElisabethMaier + ElisabethMaier AdelheitStein W94-0324 fischer-etal-1994-generating @@ -352,7 +352,7 @@ Situation Viewpoints for Generation HenryHamburger - DanTufis + DanTufis W94-0325 hamburger-tufis-1994-situation @@ -364,22 +364,22 @@ Bidirectional Incremental Generation and Analysis with Categorial Grammar and Indexed Quasi-Logical Form - TorbjoernLager - William J.Black + TorbjoernLager + William J.Black W94-0327 lager-black-1994-bidirectional Toward a Multidimensional Framework to Guide the Automated Generation of Text Types - JuliaLavid - EduardHovy + JuliaLavid + EduardHovy W94-0328 lavid-hovy-1994-toward <fixed-case>CORECT</fixed-case>: Combining <fixed-case>CSCW</fixed-case> with Natural Language Generation for Collaborative Requirement Capture JohnLevine - ChrisMellish + ChrisMellish W94-0329 levine-mellish-1994-corect @@ -392,9 +392,9 @@
Generation in the <fixed-case>LOLITA</fixed-case> System: An Engineering Approach - Mark H.Smith - RobertoGarigliano - Richard G.Morgan + Mark H.Smith + RobertoGarigliano + Richard G.Morgan W94-0331 smith-etal-1994-generation diff --git a/data/xml/W95.xml b/data/xml/W95.xml index 96b1e34b5b..5e90d2ae28 100644 --- a/data/xml/W95.xml +++ b/data/xml/W95.xml @@ -19,13 +19,13 @@
Lexical Heads, Phrase Structure and the Induction of Grammar - Carlde Marcken + Carlde Marcken W95-0102 de-marcken-1995-lexical Prepositional Phrase Attachment through a Backed-off Model - MichaelCollins + MichaelCollins JamesBrooks W95-0103 collins-brooks-1995-prepositional @@ -50,8 +50,8 @@ Text Chunking using Transformation-Based Learning - LanceRamshaw - MitchMarcus + LanceRamshaw + MitchMarcus W95-0107 ramshaw-marcus-1995-text @@ -73,21 +73,21 @@
Inverse Document Frequency (<fixed-case>IDF</fixed-case>): A Measure of Deviations from <fixed-case>P</fixed-case>oisson - KennethChurch - WilliamGale + KennethChurch + WilliamGale W95-0110 church-gale-1995-inverse Automatic Suggestion of Significant Terms for a Predefined Topic - JoeZhou + JoeZhou PeteDapkus W95-0111 zhou-dapkus-1995-automatic Automatically Acquiring Conceptual Patterns without an Annotated Corpus - EllenRiloff + EllenRiloff JayShoen W95-0112 riloff-shoen-1995-automatically @@ -145,7 +145,7 @@ <fixed-case>S</fixed-case>wedish Language Processing in the Spoken Language Translator - BjörnGambäck + BjörnGambäck 37–49 W95-0203 gamback-1995-swedish @@ -159,7 +159,7 @@ Sense Extension Functions in Lexical Semantics - PeterRossen Skadhauge + PeterRossen Skadhauge 59–68 W95-0205 rossen-skadhauge-1995-sense diff --git a/data/xml/W96.xml b/data/xml/W96.xml index bdec515112..6d13cc09d7 100644 --- a/data/xml/W96.xml +++ b/data/xml/W96.xml @@ -3,7 +3,7 @@ Fourth Workshop on Very Large Corpora - DoniaScott + DoniaScott Association for Computational Linguistics
Herstmonceux Castle, Sussex, UK
June @@ -16,14 +16,14 @@ Using Word Class for Part-of-speech Disambiguation - EvelyneTzoukermann - Dragomir R.Radev + EvelyneTzoukermann + Dragomir R.Radev W96-0101 tzoukermann-radev-1996-using <fixed-case>MBT</fixed-case>: A Memory-Based Part of Speech Tagger-Generator - WalterDaelemans + WalterDaelemans JakubZavrel PeterBerck StevenGillis @@ -61,27 +61,27 @@ Automatic Extraction of Word Sequence Correspondences in Parallel Corpora MihokoKitamura - YujiMatsumoto + YujiMatsumoto W96-0107 kitamura-matsumoto-1996-automatic A Statistical Approach to Automatic <fixed-case>OCR</fixed-case> Error Correction in Context XiangTong - David A.Evans + David A.Evans W96-0108 tong-evans-1996-statistical Exploiting Text Structure for Topic Identification TadashiNomoto - YujiMatsumoto + YujiMatsumoto W96-0109 nomoto-matsumoto-1996-exploiting Statistical Models for Deep-structure Disambiguation - TungHuiChiang + TungHuiChiang Keh-YihSu W96-0110 chiang-su-1996-statistical @@ -107,7 +107,7 @@ Towards Automatic Grammar Acquisition from a Bracketed Corpus ThanarukTheeramunkong - ManabuOkumara + ManabuOkumara W96-0114 theeramunkong-okumara-1996-towards @@ -146,7 +146,7 @@ Modeling Conversational Speech for Speech Recognition - MarieMeteer + MarieMeteer RukminiIyer W96-0204 meteer-iyer-1996-modeling @@ -166,34 +166,34 @@ Combining Hand-crafted Rules and Unsupervised Learning in Constraint-based Morphological Disambiguation KemalOflazer - GokhanTur + GokhanTur W96-0207 oflazer-tur-1996-combining Comparative Experiments on Disambiguating Word Senses: An Illustration of the Role of Bias in Machine Learning - Raymond J.Mooney + Raymond J.Mooney W96-0208 mooney-1996-comparative Apportioning Development Effort in a Probabilistic <fixed-case>LR</fixed-case> Parsing System Through Evaluation - JohnCarroll - TedBriscoe + JohnCarroll + TedBriscoe W96-0209 carroll-briscoe-1996-apportioning The Measure of a Model - RebeccaBruce - JanyceWiebe + RebeccaBruce + JanyceWiebe TedPedersen W96-0210 bruce-etal-1996-measure Automating Feature Set Selection for Case-Based Learning of Linguistic Knowledge - ClaireCardie + ClaireCardie W96-0211 cardie-1996-automating @@ -206,13 +206,13 @@ A Maximum Entropy Model for Part-Of-Speech Tagging - AdwaitRatnaparkhi + AdwaitRatnaparkhi W96-0213 ratnaparkhi-1996-maximum Efficient Algorithms for Parsing the <fixed-case>DOP</fixed-case> Model - JoshuaGoodman + JoshuaGoodman W96-0214 goodman-1996-efficient @@ -241,7 +241,7 @@ Controlling the Application of Lexical Rules - TedBriscoe + TedBriscoe AnnCopestake W96-0303 briscoe-copestake-1996-controlling @@ -249,7 +249,7 @@ Using Lexical Semantic Techniques to Classify Free-Responses JillBurstein - RandyKaplan + RandyKaplan SusanneWolff ChiLu W96-0304 @@ -257,15 +257,15 @@ Acquisition of Computational-Semantic Lexicons from Machine Readable Lexical Resources - Jason J.S.Chang + Jason J.S.Chang J.N.Chen W96-0305 chang-chen-1996-acquisition Acquisition of Semantic Lexicons: Using Word Sense Disambiguation to Improve Precision - Bonnie J.Dorr - DougJones + Bonnie J.Dorr + DougJones W96-0306 dorr-jones-1996-acquisition @@ -278,13 +278,13 @@ <i>Lexical Rules</i> is Italicized StephenHelmreich - DavidFarwell + DavidFarwell W96-0308 helmreich-farwell-1996-lexical Qualia Structure and the Compositional Interpretation of Compounds - MichaelJohnston + MichaelJohnston FedericaBusa W96-0309 johnston-busa-1996-qualia @@ -292,14 +292,14 @@ Lexical Rules for Deverbal Adjectives VictorRaskin - SergeiNirenburg + SergeiNirenburg W96-0310 raskin-nirenburg-1996-lexical Morphological Productivity in the Lexicon Onur T.Sehitoglu - H. CemBozsahin + H. CemBozsahin W96-0311 sehitoglu-bozsahin-1996-morphological @@ -309,7 +309,7 @@ Eighth International Natural Language Generation Workshop inlg 1996 - DoniaScott + DoniaScott Association for Computational Linguistics
Stroudsburg, PA, USA
10.18653/v1/W96-04 @@ -322,7 +322,7 @@ The <fixed-case>H</fixed-case>ealth<fixed-case>D</fixed-case>oc Sentence Planner LeoWanner - EduardHovy + EduardHovy W96-0401 wanner-hovy-1996-healthdoc 10.18653/v1/W96-0401 @@ -345,9 +345,9 @@ Approximate Generation from Non-Hierarchical Representations - NicolasNicolov - ChrisMellish - GraemeRitchie + NicolasNicolov + ChrisMellish + GraemeRitchie W96-0404 nicolov-etal-1996-approximate 10.18653/v1/W96-0404 @@ -370,15 +370,15 @@ Generating Patent Claims from Interactive Input SvetlanaSheremetyeva - SergeiNirenburg - IreneNirenburg + SergeiNirenburg + IreneNirenburg W96-0407 sheremetyeva-etal-1996-generating 10.18653/v1/W96-0407 Considering the Effects of Second Language Learning on Generation - Kathleen F.McCoy + Kathleen F.McCoy Christopher A.Pennington Linda Z.Suri W96-0408 @@ -387,9 +387,9 @@ Tactical Generation in a Free Constituent Order Language - Dilek ZeynepHakkani + Dilek ZeynepHakkani KemalOflazer - IlyasCicekli + IlyasCicekli W96-0409 hakkani-etal-1996-tactical 10.18653/v1/W96-0409 @@ -397,7 +397,7 @@ Paying Heed to Collocations MatthewStone - ChristineDoran + ChristineDoran W96-0410 stone-doran-1996-paying 10.18653/v1/W96-0410 @@ -440,8 +440,8 @@ Sources of Flexibility in Dynamic Hypertext Generation AlistairKnott - ChrisMellish - JonOberlander + ChrisMellish + JonOberlander MickO’Donnell W96-0416 knott-etal-1996-sources @@ -473,7 +473,7 @@ Eighth International Natural Language Generation Workshop (Posters and Demonstrations) inlg 1996 - DoniaScott + DoniaScott Association for Computational Linguistics
Stroudsburg, PA, USA
10.18653/v1/W96-05 @@ -494,15 +494,15 @@ <fixed-case>SPLAT</fixed-case>: A sentence-plan authoring tool BruceJakeway - ChrysanneDiMarco + ChrysanneDiMarco W96-0502 jakeway-dimarco-1996-splat 10.18653/v1/W96-0502 The <fixed-case>M</fixed-case>odel<fixed-case>E</fixed-case>xplainer - BenoitLavoie - OwenRambow + BenoitLavoie + OwenRambow EhudReiter W96-0503 lavoie-etal-1996-modelexplainer @@ -510,7 +510,7 @@ <fixed-case>DRAFTER</fixed-case> - CécileParis + CécileParis KeithVander Linden W96-0504 paris-vander-linden-1996-drafter @@ -527,14 +527,14 @@ <fixed-case>PICARD</fixed-case>: The Next Generator StephenBeale - SergeiNirenburg + SergeiNirenburg W96-0506 beale-nirenburg-1996-picard 10.18653/v1/W96-0506 Overview of <fixed-case>A</fixed-case>leth<fixed-case>G</fixed-case>en - JoséCoch + JoséCoch W96-0507 coch-1996-overview 10.18653/v1/W96-0507 @@ -542,7 +542,7 @@ On Lexical Aggregation and Ordering HerculesDalianis - EduardHovy + EduardHovy W96-0508 dalianis-hovy-1996-lexical 10.18653/v1/W96-0508 @@ -550,7 +550,7 @@ Generating ‘Distributed’ Referring Expressions: an Initial Report BarbaraDi Eugenio - Johanna D.Moore + Johanna D.Moore W96-0509 di-eugenio-moore-1996-generating 10.18653/v1/W96-0509 @@ -572,7 +572,7 @@ An Architecture For Distributed Natural Language Summarization - Dragomir R.Radev + Dragomir R.Radev W96-0512 radev-1996-architecture 10.18653/v1/W96-0512 diff --git a/data/xml/W97.xml b/data/xml/W97.xml index d972e8c442..0793469ec1 100644 --- a/data/xml/W97.xml +++ b/data/xml/W97.xml @@ -12,7 +12,7 @@ Summary of Invited Speech - MitchMarcus + MitchMarcus W97-0101 marcus-1997-summary @@ -24,7 +24,7 @@ Commercial Impact of <fixed-case>VLC</fixed-case> Research - HowardTurtle + HowardTurtle W97-0103 turtle-1997-commercial @@ -36,24 +36,24 @@ Probabilistic Parsing of Unrestricted <fixed-case>E</fixed-case>nglish Text, With a Highly-Detailed Grammar - EzraBlack + EzraBlack StephenEubank - HidekiKashioka - DavidMagerman + HidekiKashioka + DavidMagerman W97-0105 black-etal-1997-probabilistic Grammar Acquisition Based on Clustering Analysis and Its Application to Statistical Parsing ThanarukTheeramunkong - ManabuOkumura + ManabuOkumura W97-0106 theeramunkong-okumura-1997-grammar Reestimation and Best-First Parsing Algorithm for Probabilistic Dependency Grammars SeungmiLee - Key-SunChoi + Key-SunChoi W97-0107 lee-choi-1997-reestimation @@ -66,20 +66,20 @@ Corpus Based <fixed-case>PP</fixed-case> Attachment Ambiguity Resolution with a Semantic Dictionary JiriStetina - MakotoNagao + MakotoNagao W97-0109 stetina-nagao-1997-corpus Corpus Based Statistical Generalization Tree in Rule Optimization - Joyce YueChai - Alan W.Biermann + Joyce YueChai + Alan W.Biermann W97-0110 chai-biermann-1997-corpus Clustering Co-occurrence Graph based on Transitivity - KumikoTanaka-Ishii + KumikoTanaka-Ishii W97-0111 tanaka-ishii-1997-clustering @@ -92,7 +92,7 @@ Data Reliability and Its Effects on Automatic Abstracting TadashiNomoto - YujiMatsumoto + YujiMatsumoto W97-0113 nomoto-matsumoto-1997-data @@ -104,8 +104,8 @@ Statistical Acquisition of Terminology Dictionary - HuangXuan-jing - WuLi-de + Xuan-jingHuang + Li-deWu WangWen-xin W97-0115 huang-etal-1997-statistical @@ -118,14 +118,14 @@ A Natural Language Correction Model for Continuous Speech Recognition - TomekStrzalkowski + TomekStrzalkowski RonaldBrandow W97-0117 strzalkowski-brandow-1997-natural The Effects of Corpus Size and Homogeneity on Language Model Quality - Tony G.Rose + Tony G.Rose W97-0118 rose-1997-effects @@ -163,28 +163,28 @@ Analysis of Unknown Lexical Items using Morphological and Syntactic Information with the <fixed-case>TIMIT</fixed-case> Corpus Scott M.Thede - MaryHarper + MaryHarper W97-0124 thede-harper-1997-analysis A Local Grammar-based Approach to Recognizing of Proper Names in <fixed-case>K</fixed-case>orean Texts - Jee-SunNam - Key-SunChoi + Jee-SunNam + Key-SunChoi W97-0125 nam-choi-1997-local A Statistical Approach to <fixed-case>T</fixed-case>hai Morphological Analyzer KawtrakulAsanee - ThumkanonChalathip + ChalathipThumkanon W97-0126 kawtrakul-thumkanon-1997-statistical Probabilistic Word Classification Based on Context-Sensitive Binary Tree Method JunGao - XiXianChen + XiXianChen W97-0127 gao-chen-1997-probabilistic @@ -207,10 +207,10 @@ Experience in <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et Sense Tagging in the <fixed-case>W</fixed-case>all <fixed-case>S</fixed-case>treet <fixed-case>J</fixed-case>ournal - JanyceWiebe + JanyceWiebe JulieMaples LeiDuan - RebeccaBruce + RebeccaBruce W97-0202 wiebe-etal-1997-experience @@ -222,7 +222,7 @@
A Frame-Semantic Approach to Semantic Annotation - John B.Lowe + John B.Lowe W97-0204 lowe-1997-frame @@ -236,7 +236,7 @@ Analysis of a Hand-Tagging Task ChristianeFellbaum JoachimGrabowski - ShariLand + ShariLand W97-0206 fellbaum-etal-1997-analysis
@@ -248,7 +248,7 @@
Sense Tagging: Semantic Tagging with a Lexicon - YorickWilks + YorickWilks MarkStevenson W97-0208 wilks-stevenson-1997-sense @@ -261,18 +261,18 @@ Investigating Complementary Methods for Verb Sense Pruning - HongyanJing + HongyanJing VasileiosHatzivassiloglou - RebeccaPassonneau - KathleenMcKeown + RebeccaPassonneau + KathleenMcKeown W97-0210 jing-etal-1997-investigating Towards a Bootstrapping Framework for Corpus Semantic Tagging - RobertoBasili + RobertoBasili MichelangeloDella Rocca - Maria TeresaPazienza + Maria TeresaPazienza W97-0211 basili-etal-1997-towards @@ -292,14 +292,14 @@
Writing Annotation Instructions - JanyceWiebe + JanyceWiebe W97-0214 wiebe-1997-writing Combining Knowledge Sources for Automatic Semantic Tagging - DouglasJones - BoyanOnyshkevych + DouglasJones + BoyanOnyshkevych W97-0215 jones-onyshkevych-1997-combining @@ -318,14 +318,14 @@
Constructing Semantic Tagsets - AlainPolguere + AlainPolguere W97-0218 polguere-1997-constructing Structured Lexicons and Semantic Tagging - Bonnie J.Dorr - Mari BromanOlsen + Bonnie J.Dorr + Mari BromanOlsen W97-0219 dorr-olsen-1997-structured @@ -342,35 +342,35 @@ A Linear Observed Time Statistical Parser Based on Maximum Entropy Models - AdwaitRatnaparkhi + AdwaitRatnaparkhi W97-0301 ratnaparkhi-1997-linear Global Thresholding and Multiple-Pass Parsing - JoshuaGoodman + JoshuaGoodman W97-0302 goodman-1997-global An Efficient Distribution of Labor in a Two Stage Robust Interpretation Process - Carolyn PensteinRose - AlonLavie + Carolyn PensteinRose + AlonLavie W97-0303 rose-lavie-1997-efficient Text Segmentation Using Exponential Models DougBeeferman - AdamBerger - JohnLafferty + AdamBerger + JohnLafferty W97-0304 beeferman-etal-1997-text Detecting Subject Boundaries Within Text: A Language Independent Statistical Approach KorinRichmond - AndrewSmith + AndrewSmith EinatAmitay W97-0305 richmond-etal-1997-detecting @@ -393,7 +393,7 @@ On aligning trees - JoCalder + JoCalder W97-0308 calder-1997-aligning @@ -425,23 +425,23 @@
A Corpus-Based Approach for Building Semantic Lexicons - EllenRiloff + EllenRiloff JessicaShepherd W97-0313 riloff-shepherd-1997-corpus Inducing Terminology for Lexical Acquisition - RobertoBasili - GianlucaDe Rossi - Maria TeresaPazienza + RobertoBasili + GianlucaDe Rossi + Maria TeresaPazienza W97-0314 basili-etal-1997-inducing Name Searching and Information Retrieval PaulThompson - Christopher C.Dozier + Christopher C.Dozier W97-0315 thompson-dozier-1997-name @@ -465,30 +465,30 @@
Probabilistic Coreference in Information Extraction - AndrewKehler + AndrewKehler W97-0319 kehler-1997-probabilistic An Empirical Approach to Temporal Reference Resolution - JanyceWiebe + JanyceWiebe TomO’Hara - KennethMcKeever + KennethMcKeever ThorstenOhrstrom-Sandgren W97-0320 wiebe-etal-1997-empirical Word Sense Disambiguation Based on Structured Semantic Space - JiDonghong - HuangChangning + DonghongJi + ChangningHuang W97-0321 ji-huang-1997-word Distinguishing Word Senses in Untagged Text TedPedersen - RebeccaBruce + RebeccaBruce W97-0322 pedersen-bruce-1997-distinguishing @@ -518,8 +518,8 @@
A Dialogue Analysis Model with Statistical Speech Act Processing for Dialogue Machine Translation - Jae-wonLee - Gil ChangKim + Jae-wonLee + Gil ChangKim W97-0402 lee-kim-1997-dialogue @@ -569,40 +569,40 @@
<fixed-case>E</fixed-case>nglish-to-<fixed-case>M</fixed-case>andarin Speech Translation with Head Transducers - HiyanAlshawi + HiyanAlshawi W97-0408 alshawi-1997-english Interactive Speech Translation in the <fixed-case>DIPLOMAT</fixed-case> Project - RobertFrederking - AlexanderRudnicky + RobertFrederking + AlexanderRudnicky ChristopherHogan W97-0409 frederking-etal-1997-interactive Expanding the Domain of a Multi-lingual Speech-to-Speech Translation System - AlonLavie - LoriLevin + AlonLavie + LoriLevin PumingZhan - MaiteTaboada - DonnaGates + MaiteTaboada + DonnaGates MirellaLapata CortisClark MatthewBroadhead - AlexWaibel + AlexWaibel W97-0410 lavie-etal-1997-expanding Translation Methodology in the Spoken Language Translator: An Evaluation - DavidCarter + DavidCarter RalphBecket - MannyRayner + MannyRayner RobertEklund CatrionaMacDermid - MatsWirén + MatsWirén SabineKirchmeier-Andersen ChristinaPhilp W97-0411 @@ -628,7 +628,7 @@ Spoken Language Translation with the <fixed-case>ITSV</fixed-case>ox System - EricWehrli + EricWehrli Jean-LucCochard W97-0415 wehrli-cochard-1997-spoken @@ -660,7 +660,7 @@ Simple <fixed-case>NLP</fixed-case> Techniques for Expanding Telegraphic Sentences - Kathleen F.McCoy + Kathleen F.McCoy W97-0503 mccoy-1997-simple @@ -697,18 +697,18 @@
A Tutor for Teaching <fixed-case>E</fixed-case>nglish as a Second Language for Deaf Users of <fixed-case>A</fixed-case>merican <fixed-case>S</fixed-case>ign <fixed-case>L</fixed-case>anguage - Kathleen F.McCoy + Kathleen F.McCoy Lisa N.Masterman W97-0508 mccoy-masterman-1997-tutor Application of <fixed-case>NLP</fixed-case> technologyto production of closed-caption <fixed-case>TV</fixed-case>. programs in <fixed-case>J</fixed-case>apanese for the hearing impaired - TakahiroWakao + TakahiroWakao TerumasaEhara EijiSawamura YoshiharuAbe - KatsuhikoShirai + KatsuhikoShirai W97-0509 wakao-etal-1997-application @@ -734,9 +734,9 @@ Evaluating Interactive Dialogue Systems: Extending Component Evaluation to Integrated System Evaluation - Marilyn A.Walker - Diane J.Litman - Candace A.Kamm + Marilyn A.Walker + Diane J.Litman + Candace A.Kamm AliciaAbella W97-0601 walker-etal-1997-evaluating @@ -744,37 +744,37 @@ A Generic Template to evaluate integrated components in spoken dialogue systems Gavin E.Churcher - Eric S.Atwell + Eric S.Atwell CliveSouter W97-0602 churcher-etal-1997-generic <fixed-case>GENERALITY</fixed-case> <fixed-case>AND</fixed-case> <fixed-case>OBJECTIVITY</fixed-case> Central Issues in Putting a Dialogue Evaluation Tool into Practical Use - LailaDybkjaer - Niels OleBernsen - HansDybkjaer + LailaDybkjaer + Niels OleBernsen + HansDybkjaer W97-0603 dybkjaer-etal-1997-generality An Object-Oriented Model for the Design of Cross-Domain Dialogue Systems - Ian M.O’Neill - Michael F.McTear + Ian M.O’Neill + Michael F.McTear W97-0604 oneill-mctear-1997-object Automatic Lexicon Enhancement by Means of Corpus Tagging - FredericBechet + FredericBechet ThierrySpriet - MarcEl-Beze + MarcEl-Beze W97-0605 bechet-etal-1997-automatic Clarification Dialogues as Measure to Increase Robustness in a Spoken Dialogue System - ElisabethMaier + ElisabethMaier NorbertReithinger JanAlexandersson W97-0606 @@ -782,7 +782,7 @@ Performance Measures for the Next Generation of Spoken Natural Language Dialog Systems - Ronnie W.Smith + Ronnie W.Smith W97-0607 smith-1997-performance @@ -803,7 +803,7 @@ Context Modeling for Language and Speech Generation - Keesvan Deemter + Keesvan Deemter W97-0610 van-deemter-1997-context @@ -818,7 +818,7 @@ ToshihikoItoh AkihiroDenda SatoruKogure - SeiichiNakagawa + SeiichiNakagawa W97-0612 itoh-etal-1997-robust
@@ -833,7 +833,7 @@ Mark-JanNederhof GosseBouma RobKoeling - Gertjanvan Noord + Gertjanvan Noord W97-0614 nederhof-etal-1997-grammatical
@@ -846,8 +846,8 @@
How to obey the 7 commandments for spoken dialogue? - EmielKrahmer - JanLandsbergen + EmielKrahmer + JanLandsbergen XavierPouteau W97-0616 krahmer-etal-1997-obey @@ -882,7 +882,7 @@ Speech-Graphics Dialogue Systems - Alan W.Biermann + Alan W.Biermann Michael S.Fulkerson Greg A.Keim W97-0621 @@ -901,13 +901,13 @@ <fixed-case>S</fixed-case>ummarising: Where are we now? Where should we go? - KarenSparck Jones + KarenSparck Jones W97-0701 sparck-jones-1997-summarising Salience-based Content Characterisafion of Text Documents - BranimirBoguraev + BranimirBoguraev W97-0702 boguraev-1997-salience @@ -920,8 +920,8 @@ Automated Text Summarization in <fixed-case>SUMMARIST</fixed-case> - EduardHovy - ChinYewLin + EduardHovy + ChinYewLin W97-0704 hovy-lin-1997-automated @@ -935,7 +935,7 @@
A Proposal for Task-based Evaluation of Text Summarization Systems - Therese FirminHand + Therese FirminHand W97-0706 hand-1997-proposal @@ -943,7 +943,7 @@ Automatic Text Summarization by Paragraph Extraction MandarMitra AmitSinghal - ChrisBuckley + ChrisBuckley W97-0707 mitra-etal-1997-automatic
@@ -958,7 +958,7 @@ Statistical methods for retrieving most significant paragraphs in newspaper articles JoseAbracos - Gabriel PereiraLopes + Gabriel PereiraLopes W97-0709 abracos-lopes-1997-statistical @@ -1017,7 +1017,7 @@ Multilingual design of <fixed-case>E</fixed-case>uro<fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et PiekVossen PedroDiez-Orzas - WimPeters + WimPeters W97-0801 vossen-etal-1997-multilingual
@@ -1046,14 +1046,14 @@ Lexical Discrimination with the <fixed-case>I</fixed-case>talian Version of <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et AlessandroArtale - BernardoMagnini + BernardoMagnini CarloStrapparava W97-0805 artale-etal-1997-lexical Integrating a Lexical Database and a Training Collection for Text Categorization - Jose MariaGomez-Hidalgo + Jose MariaGomez-Hidalgo Manuelde Buenaga Rodriguez W97-0806 gomez-hidalgo-de-buenaga-rodriguez-1997-integrating @@ -1068,27 +1068,27 @@ Word Sense Disambiguation for Acquisition of Selectional Preferences - DianaMcCarthy + DianaMcCarthy W97-0808 mccarthy-1997-word The Use of Lexical Semantics in Information Extraction - Joyce YueChai - Alan W.Biermann + Joyce YueChai + Alan W.Biermann W97-0809 chai-biermann-1997-use Subject and Object Dependency Extraction Using Finite-State Transducers - SalahAit-Mokhtar + SalahAit-Mokhtar Jean-PierreChanod W97-0810 ait-mokhtar-chanod-1997-subject An Experiment in Semantic Tagging using Hidden <fixed-case>M</fixed-case>arkov Model Tagging - FrederiqueSegond + FrederiqueSegond AnneSchiller GregoryGrefenstette Jean-PierreChanod @@ -1104,7 +1104,7 @@ Inferring Semantic Similarity from Distributional Evidence: an Analogy-based Approach to Word Sense Disambiguation StefanoFederici - SimonettaMontemagni + SimonettaMontemagni VitoPirrelli W97-0813 federici-etal-1997-inferring @@ -1134,7 +1134,7 @@ EtienneCornu FrancoisGrosjean LysianeGrosjean - NatalieKubler + NatalieKubler NicolasLewy CorinneTschumi W97-0902 @@ -1143,11 +1143,11 @@ Software Re-Use and Evolution in Text Generation Applications KarenKukich - RebeccaPassonneau - KathleenMcKeown - DragomirRadev + RebeccaPassonneau + KathleenMcKeown + DragomirRadev VasileiosHatzivassiloglou - HongyanJing + HongyanJing W97-0903 kukich-etal-1997-software @@ -1165,7 +1165,7 @@ Practical Considerations in Building a Multi-Lingual Authoring System for Business Letters - JohnTait + JohnTait W97-0906 tait-1997-practical @@ -1194,16 +1194,16 @@ Recycling Lingware in a Multilingual <fixed-case>MT</fixed-case> System - MannyRayner - DavidCarter + MannyRayner + DavidCarter IvanBretan RobertEklund - MatsWiren + MatsWiren Steffen LeoHansen SabineKirchmeier-Andersen ChristinaPhilp FinnSorensen - Hanne ErdmanThomsen + Hanne ErdmanThomsen W97-0910 rayner-etal-1997-recycling @@ -1221,29 +1221,29 @@ A Trainable Message Understanding System AmitBagga - Joyce YueChai + Joyce YueChai W97-1001 bagga-chai-1997-trainable Relational Learning of Pattern-Match Rules for Information Extraction - Mary ElaineCaliff - Raymond J.Mooney + Mary ElaineCaliff + Raymond J.Mooney W97-1002 califf-mooney-1997-relational A Preliminary Study of Word Clustering Based on Syntactic Behavior Wide R.Hogenhout - YujiMatsumoto + YujiMatsumoto W97-1003 hogenhout-matsumoto-1997-preliminary Learning New Compositions from Given Ones - JiDonghong + DonghongJi HeJun - HuangChangning + ChangningHuang W97-1004 ji-etal-1997-learning @@ -1251,7 +1251,7 @@ A Statistical Decision Making Method: A Case Study on Prepositional Phrase Attachment MehmetKayaalp TedPedersen - RebeccaBruce + RebeccaBruce W97-1005 kayaalp-etal-1997-statistical
@@ -1264,15 +1264,15 @@
From Psycholinguistic Modelling of Interlanguage in Second Language Acquisition to a Computational Model - MontseMaritxalar - ArantzaDiaz de Ilarraza - MaiteOronoz + MontseMaritxalar + ArantzaDiaz de Ilarraza + MaiteOronoz W97-1007 maritxalar-etal-1997-psycholinguistic What makes a word: Learning base units in <fixed-case>J</fixed-case>apanese for speech recognition - Laura MayfieldTomokiyo + Laura MayfieldTomokiyo KlausRies W97-1008 tomokiyo-ries-1997-makes @@ -1287,13 +1287,13 @@ Learning Stochastic Categorial Grammars MilesOsborne - TedBriscoe + TedBriscoe W97-1010 osborne-briscoe-1997-learning Learning and Application of Differential Grammars - David M. W.Powers + David M. W.Powers W97-1011 powers-1997-learning @@ -1311,8 +1311,8 @@ Word Triggers and the <fixed-case>EM</fixed-case> Algorithm - ChristophTillmann - HermannNey + ChristophTillmann + HermannNey W97-1014 tillmann-ney-1997-word @@ -1326,7 +1326,7 @@ Resolving <fixed-case>PP</fixed-case> attachment Ambiguities with Memory-Based Learning JakubZavrel - WalterDaelemans + WalterDaelemans JornVeenstra W97-1016 zavrel-etal-1997-resolving @@ -1346,7 +1346,7 @@ A Complexity Measure for Diachronic <fixed-case>C</fixed-case>hinese Phonology AnandRaman JohnNewman - JonPatrick + JonPatrick W97-1101 raman-etal-1997-complexity @@ -1378,20 +1378,20 @@
A <fixed-case>C</fixed-case>zech Morphological Lexicon - HanaSkoumalova + HanaSkoumalova W97-1106 skoumalova-1997-czech Stochastic phonological grammars and acceptability JohnColeman - JanetPierrehumbert + JanetPierrehumbert W97-1107 coleman-pierrehumbert-1997-stochastic Linearization of Nonlinear Lexical Representations - George AntonKiraz + George AntonKiraz W97-1108 kiraz-1997-linearization @@ -1408,7 +1408,7 @@ Probabilistic Model of Acoustic/Prosody/Concept Relationships for Speech Synthesis - Nanette M.Veilleux + Nanette M.Veilleux W97-1201 veilleux-1997-probabilistic @@ -1430,7 +1430,7 @@ Integrating Language Generation with Speech Synthesis in a Concept to Speech System ShimeiPan - Kathleen R.McKeown + Kathleen R.McKeown W97-1204 pan-mckeown-1997-integrating @@ -1478,8 +1478,8 @@ Resolving bridging references in unrestricted text - MassimoPoesio - RenataVieira + MassimoPoesio + RenataVieira SimoneTeufel W97-1301 poesio-etal-1997-resolving @@ -1494,7 +1494,7 @@ Factors in anaphora resolution: they are not the only things that matter. A case study based on two different approaches - RuslanMitkov + RuslanMitkov W97-1303 mitkov-1997-factors @@ -1544,15 +1544,15 @@
Event coreference for information extraction - KevinHumphreys - RobertGaizauskas - SalihaAzzam + KevinHumphreys + RobertGaizauskas + SalihaAzzam W97-1311 humphreys-etal-1997-event How far are we from (semi-)automatic of anaphoric links in corpora? - RuslanMitkov + RuslanMitkov W97-1312 mitkov-1997-far @@ -1565,7 +1565,7 @@
Cooperation between pronoun and reference resolution for unrestricted texts - AndreiPopescu-Belis + AndreiPopescu-Belis IsabelleRobba W97-1314 popescu-belis-robba-1997-cooperation @@ -1583,8 +1583,8 @@ Integration and Synchronization of Input Modes during Multimodal Human-Computer Interaction - SharonOviatt - AntonellaDeAngeli + SharonOviatt + AntonellaDeAngeli KarenKuhn W97-1401 oviatt-etal-1997-integration @@ -1592,7 +1592,7 @@ Referring in Multimodal Systems: The Importance of User Expertise and System Features DanielaPetrelli - AntonellaDe Angeli + AntonellaDe Angeli WalterGerbino GiuliaCassano W97-1402 @@ -1601,7 +1601,7 @@ Towards Generation of Fluent Referring Action in Multimodal Situations TsuneakiKato - Yukiko I.Nakano + Yukiko I.Nakano W97-1403 kato-nakano-1997-towards @@ -1642,7 +1642,7 @@ Planning Referential Acts for Animated Presentation Agents - ElisabethAndre + ElisabethAndre ThomasRist W97-1409 andre-rist-1997-planning @@ -1657,8 +1657,8 @@ Referring to Displays in Multimodal Interfaces DaqingHe - GraemeRitchie - JohnLee + GraemeRitchie + JohnLee W97-1411 he-etal-1997-referring @@ -1672,14 +1672,14 @@ Constraints on the Use of Language, Gesture and Speech for Multimodal Dialogues BertrandGaiffe - LaurentRomary + LaurentRomary W97-1413 gaiffe-romary-1997-constraints A Model for Multimodal Reference Resolution - Luis. A.Pineda - E. GabrielaGarza + Luis. A.Pineda + E. GabrielaGarza W97-1414 pineda-garza-1997-model @@ -1716,60 +1716,60 @@ Some apparently disjoint aims and requirements for grammar development environments” the case of natural language generation - JohnBateman + JohnBateman W97-1501 bateman-1997-apparently The <fixed-case>T</fixed-case>ree<fixed-case>B</fixed-case>anker: a Tool for Supervised Training of Parsed Corpora - DavidCarter + DavidCarter W97-1502 carter-1997-treebanker Participatory Design for Linguistic Engineering: the Case of the <fixed-case>GEPPETTO</fixed-case> Development Environment - FabioCiravegna - AlbertoLavelli + FabioCiravegna + AlbertoLavelli DanielaPetrelli - FabioPianesi + FabioPianesi W97-1503 ciravegna-etal-1997-participatory Hypertextual Grammar Development - LucaDini - GiampaoloMazzini + LucaDini + GiampaoloMazzini W97-1504 dini-mazzini-1997-hypertextual Maintaining the Forest and Burning out the Underbrush in <fixed-case>XTAG</fixed-case> - ChristineDoran - BethHockey + ChristineDoran + BethHockey PhilipHopely JosephRosenzweig AnoopSarkar - B.Srinivas + B.Srinivas FeiXia W97-1505 doran-etal-1997-maintaining The <fixed-case>C</fixed-case>on<fixed-case>T</fixed-case>roll System as Large Grammar Development Platform - ThiloGotz - Walt DetmarMeurers + ThiloGotz + Walt DetmarMeurers W97-1506 gotz-meurers-1997-controll Application-driven automatic subgrammar extraction - RenateHenschel + RenateHenschel W97-1507 henschel-1997-application Lexical Resource Reconciliation in the Xerox Linguistic Environment - Ronald M.Kaplan + Ronald M.Kaplan W97-1508 kaplan-1997-lexical @@ -1781,7 +1781,7 @@ <fixed-case>EFLUF</fixed-case> - an Implementation of a <fixed-case>FL</fixed-case>exible Unification Formalism - LenaStromback + LenaStromback W97-1510 stromback-1997-efluf @@ -1789,7 +1789,7 @@ Exploiting Contextual Information in Hypothesis Selection for Grammar Refinement ThanarukTheeramunkong YasunobuKawaguchi - ManabuOkumura + ManabuOkumura W97-1511 theeramunkong-etal-1997-exploiting @@ -1801,15 +1801,15 @@ Hdrug. A Flexible and Extendible Development Environment for Natural Language Processing. - Gertjanvan Noord + Gertjanvan Noord GosseBouma W97-1513 van-noord-bouma-1997-hdrug An Object-Oriented Linguistic Engineering Environment using <fixed-case>LFG</fixed-case> (Lexical Functionnal Grammar) and <fixed-case>CG</fixed-case> (Conceptual Graphs) - JeromeVapillon - XavierBriffault + JeromeVapillon + XavierBriffault GerardSabah KarimChibout W97-1514 diff --git a/data/xml/W98.xml b/data/xml/W98.xml index 2cf5dcaef8..4ba73670bb 100644 --- a/data/xml/W98.xml +++ b/data/xml/W98.xml @@ -4,10 +4,10 @@ Proceedings of the Fourth International Workshop on Tree Adjoining Grammars and Related Frameworks (TAG+4) W98-01 - AnneAbeillé + AnneAbeillé TilmanBecker GiorgioSatta - K.Vijay-Shanker + K.Vijay-Shanker Institute for Research in Cognitive Science
University of Pennsylvania
August @@ -21,14 +21,14 @@ An experiment on synchronous <fixed-case>TAG</fixed-case>s for the construction of a transfer module AlexandreAgustini - Vera Lúcia Strubede Lima + Vera Lúcia Strubede Lima 1–4 W98-0101 agustini-de-lima-1998-experiment Transplanting supertags from <fixed-case>E</fixed-case>nglish to <fixed-case>S</fixed-case>panish - SrinivasBangalore + SrinivasBangalore 5–8 W98-0102 bangalore-1998-transplanting @@ -44,7 +44,7 @@ Motion verbs and semantic features in <fixed-case>TAG</fixed-case> ToniaBleam - MarthaPalmer + MarthaPalmer K.Vijay-Shanker 13–16 W98-0104 @@ -59,7 +59,7 @@ Can the <fixed-case>TAG</fixed-case> derivation tree represent a semantic graph? An answer in the light of Meaning-Text Theory - Marie-HélèneCandito + Marie-HélèneCandito SylvainKahane 21–24 W98-0106 @@ -67,7 +67,7 @@ Defining <fixed-case>DTG</fixed-case> derivations to get semantic graphs - Marie-HélèneCandito + Marie-HélèneCandito SylvainKahane 25–28 W98-0107 @@ -75,11 +75,11 @@ The <fixed-case>L</fixed-case>ex<fixed-case>S</fixed-case>ys project - JohnCarroll - NicolasNicolov + JohnCarroll + NicolasNicolov OlgaShaumyan MartineSmets - DavidWeir + DavidWeir 29–33 W98-0108 carroll-etal-1998-lexsys @@ -100,9 +100,9 @@ A tabular interpretation of bottom-up automata for <fixed-case>TAG</fixed-case> - Ericde la Clergerie - Miguel A.Alonso Pardo - David CabreroSouto + Ericde la Clergerie + Miguel A.Alonso Pardo + David CabreroSouto 42–45 W98-0111 de-la-clergerie-etal-1998-tabular @@ -118,7 +118,7 @@ Describing discourse semantics ClaireGardent - BonnieWebber + BonnieWebber 50–53 W98-0113 gardent-webber-1998-describing @@ -163,7 +163,7 @@ Partial proof trees and structural modalities - Aravind K.Joshi + Aravind K.Joshi SethKulick NatashaKurtonina 74–75 @@ -216,7 +216,7 @@ ‘Category families’ for Categorial Grammars - MaryMcGee Wood + MaryMcGee Wood 100–103 W98-0126 mcgee-wood-1998-category @@ -225,8 +225,8 @@ Packing of feature structures for optimizing the <fixed-case>HPSG</fixed-case>-style grammar translated from <fixed-case>TAG</fixed-case> YusukeMiyao KentaroTorisawa - YukaTateisi - Jun’ichiTsujii + YukaTateisi + Jun’ichiTsujii 104–107 W98-0127 miyao-etal-1998-packing @@ -241,7 +241,7 @@ Description theory, <fixed-case>LTAG</fixed-case>s and underspecified semantics ReinhardMuskens - EmielKrahmer + EmielKrahmer 112–115 W98-0129 muskens-krahmer-1998-description @@ -257,14 +257,14 @@ Automatic extraction of stochastic lexicalized tree grammars from treebanks - GünterNeumann + GünterNeumann 120–123 W98-0131 neumann-1998-automatic Memoisation in sentence generation with lexicalised grammars - NicolasNicolov + NicolasNicolov 124–127 W98-0132 nicolov-1998-memoisation @@ -286,7 +286,7 @@ Wh-islands in <fixed-case>TAG</fixed-case> and related formalisms - OwenRambow + OwenRambow K.Vijay-Shanker 147–150 W98-0135 @@ -316,7 +316,7 @@ A compact encoding of a <fixed-case>DTG</fixed-case> grammar MartineSmets - RogerEvans + RogerEvans 164–167 W98-0139 smets-evans-1998-compact @@ -324,17 +324,17 @@ Formal analyses of the <fixed-case>H</fixed-case>ungarian verbal complex TemeseSzalai - EdwardStabler + EdwardStabler 168–171 W98-0140 szalai-stabler-1998-formal Translating the <fixed-case>XTAG</fixed-case> <fixed-case>E</fixed-case>nglish grammar to <fixed-case>HPSG</fixed-case> - YukaTateisi + YukaTateisi KentaroTorisawa YusukeMiyao - Jun’ichiTsujii + Jun’ichiTsujii 172–175 W98-0141 tateisi-etal-1998-translating @@ -349,7 +349,7 @@ Consistent grammar development using partial-tree descriptions for <fixed-case>L</fixed-case>exicalized <fixed-case>T</fixed-case>ree-<fixed-case>A</fixed-case>djoining <fixed-case>G</fixed-case>rammars FeiXia - MarthaPalmer + MarthaPalmer K.Vijay-Shanker JosephRosenzweig 180–183 @@ -402,10 +402,10 @@ “<fixed-case>I</fixed-case> just played that a minute ago!:” Designing User Interfaces for Audio Navigation - JuliaHirschberg + JuliaHirschberg JohnChoi - ChristineNakatani - SteveWhittaker + ChristineNakatani + SteveWhittaker W98-0206 hirschberg-etal-1998-just @@ -424,7 +424,7 @@ RodHolland RobHyland InderjeetMani - MarkMaybury + MarkMaybury AndyMerlino JimRayson W98-0208 @@ -439,17 +439,17 @@ A Media-Independent Content Language for Integrated Text and Graphics Generation - NancyGreen + NancyGreen GiuseppeCarenini - StephanKerpedjiev - StevenRoth - JohannaMoore + StephanKerpedjiev + StevenRoth + JohannaMoore W98-0210 green-etal-1998-media How to build a (quite general) linguistic diagram editor - JoCalder + JoCalder W98-0211 calder-1998-build @@ -461,7 +461,7 @@ Multimodal Visualization of Geometrical Constructions - ValerieBellynck + ValerieBellynck W98-0213 bellynck-1998-multimodal @@ -480,7 +480,7 @@ Integration of Speech and Vision in a small mobile robot - DominiqueEstival + DominiqueEstival W98-0216 estival-1998-integration @@ -503,7 +503,7 @@ Identifying the Linguistic Correlates of Rhetorical Relations - Simon H.Corston-Oliver + Simon H.Corston-Oliver W98-0302 corston-oliver-1998-identifying @@ -513,7 +513,7 @@ KarenKukich SusanneWolff ChiLu - MartinChodorow + MartinChodorow W98-0303 burstein-etal-1998-enriching @@ -562,13 +562,13 @@ Some Exotic Discourse Markers of Spoken Dialog - NigelWard + NigelWard W98-0311 ward-1998-exotic Lexical Marking and the Recovery of Discourse Structure - KathleenDahlgren + KathleenDahlgren W98-0312 dahlgren-1998-lexical @@ -581,14 +581,14 @@
Signalling in written text: a corpus-based approach - Marie-PaulePery-Woodley + Marie-PaulePery-Woodley W98-0314 pery-woodley-1998-signalling Anchoring a <fixed-case>L</fixed-case>exicalized <fixed-case>T</fixed-case>ree-<fixed-case>A</fixed-case>djoining <fixed-case>G</fixed-case>rammar for Discourse - Bonnie LynnWebber - Aravind K.Joshi + Bonnie LynnWebber + Aravind K.Joshi W98-0315 webber-joshi-1998-anchoring @@ -602,7 +602,7 @@
Cue Phrase Selection in Instruction Dialogue Using Machine Learning - Yukiko I.Nakano + Yukiko I.Nakano TsuneakiKato W98-0317 nakano-kato-1998-cue @@ -616,8 +616,8 @@ Lexical, Prosodic, and Syntactic Cues for Dialog Acts - DanielJurafsky - ElizabethShriberg + DanielJurafsky + ElizabethShriberg BarbaraFox TraciCurl W98-0319 @@ -636,7 +636,7 @@ Towards an implementable dependency grammar - TimoJärvinen + TimoJärvinen PasiTapanainen W98-0501 jarvinen-tapanainen-1998-towards @@ -649,10 +649,10 @@ Two Useful Measures of Word Order Complexity - TomasHolan - VladislavKubon - KarelOliva - MartinPlatek + TomasHolan + VladislavKubon + KarelOliva + MartinPlatek W98-0503 holan-etal-1998-two @@ -670,14 +670,14 @@ Movement rules revisited - EvaHajicova + EvaHajicova W98-0506 hajicova-1998-movement Integration of syntactic and lexical information in a hierarchical dependency grammar CristinaBarbero - LeonardoLesmo + LeonardoLesmo VincenzoLombardo W98-0507 barbero-etal-1998-integration @@ -715,15 +715,15 @@ Complements and Adjuncts in Dependency Grammar Parsing Emulated by a Constrained Context-Free Grammar - Tom B.Y.Lai - ChangningHuang + Tom B.Y.Lai + ChangningHuang W98-0512 lai-huang-1998-complements An Annotated Corpus in <fixed-case>J</fixed-case>apanese Using <fixed-case>T</fixed-case>esniere’s Structural Syntax YvesLepage - AndoShin-Ichi + Shin-IchiAndo AkamineSusumu IidaHitoshi W98-0513 @@ -763,10 +763,10 @@ Using <fixed-case>NOMLEX</fixed-case> to Produce Nominalization Patterns for Information Extraction - AdamMeyers - CatherineMacleod + AdamMeyers + CatherineMacleod RomanYangarber - RalphGrishman + RalphGrishman LeslieBarrett RuthReeves W98-0604 @@ -778,7 +778,7 @@ MasakiMurata YasunoriYata MitsunobuShimada - MakotoNagao + MakotoNagao W98-0605 kurohashi-etal-1998-construction @@ -786,7 +786,7 @@ The treatment of noun phrase queries in a natural language database access system AlexandraKlein JohannesMatiasek - HaraldTrost + HaraldTrost W98-0606 klein-etal-1998-treatment
@@ -794,13 +794,13 @@ Integrating Referring and Informing in <fixed-case>NP</fixed-case> Planning MichaelO’Donnell HuaCheng - JanetHitzeman + JanetHitzeman W98-0607 odonnell-etal-1998-integrating
Coreference in Knowledge Editing - Keesvan Deemter + Keesvan Deemter RichardPower W98-0608 van-deemter-power-1998-coreference @@ -833,7 +833,7 @@ Nominal Metonymy Processing - BoyanOnyshkevych + BoyanOnyshkevych W98-0613 onyshkevych-1998-nominal @@ -858,7 +858,7 @@ General Word Sense Disambiguation Method Based on a Full Sentential Context JiriStetina SadaoKurohashi - MakotoNagao + MakotoNagao W98-0701 stetina-etal-1998-general
@@ -870,14 +870,14 @@
Word Sense Disambiguation based on Semantic Density - RadaMihalcea - Dan I.Moldovan + RadaMihalcea + Dan I.Moldovan W98-0703 mihalcea-moldovan-1998-word The Use of <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et in Information Retrieval - RilaMandala + MandalaRila TokunagaTakenobu TanakaHozumi W98-0704 @@ -913,31 +913,31 @@ Using <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et for Building <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>ets - XavierFarreres - GermanRigau + XavierFarreres + GermanRigau HoracioRodffguez W98-0709 farreres-etal-1998-using Aligning <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et with Additional Lexical Resources - Oi YeeKwong + Oi YeeKwong W98-0710 kwong-1998-aligning Automatic Adaptation of <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et to Sublanguages and to Computational Tasks - RobertoBasili + RobertoBasili AlessandroCucchiarelli CarloConsoli - Maria TeresaPazienza - PaolaVelardi + Maria TeresaPazienza + PaolaVelardi W98-0711 basili-etal-1998-automatic Augmenting <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et-like lexical resources with distributional evidence. An application-oriented perspective - SimonettaMontemagni + SimonettaMontemagni VitoPirrelli W98-0712 montemagni-pirrelli-1998-augmenting @@ -945,15 +945,15 @@ Lexical Acquisition with <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et and the Mikrokosmos Ontology TomO’Hara - KaviMahesh - SergeiNirenburg + KaviMahesh + SergeiNirenburg W98-0713 ohara-etal-1998-lexical Algorithms for Ontological Mediation Alistair E.Campbell - Stuart C.Shapiro + Stuart C.Shapiro W98-0714 campbell-shapiro-1998-algorithms @@ -965,7 +965,7 @@ A Comparison of <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et and <fixed-case>R</fixed-case>oget’s Taxonomy for Measuring Semantic Similarity - Michael L.McHale + Michael L.McHale W98-0716 mchale-1998-comparison @@ -978,7 +978,7 @@
Usage of <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et in Natural Language Generation - HongyanJing + HongyanJing W98-0718 jing-1998-usage @@ -990,7 +990,7 @@
Deriving Metonymic Coercions from <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et - Sanda M.Harabagiu + Sanda M.Harabagiu W98-0720 harabagiu-1998-deriving @@ -1030,7 +1030,7 @@ Grapheme-to-phoneme transcription rules for <fixed-case>S</fixed-case>panish, with application to automatic speech recognition and synthesis PatriziaBonaventura FabioGiuliani - Juan MariaGarrido + Juan MariaGarrido IsabelOrtin W98-0804 bonaventura-etal-1998-grapheme @@ -1079,7 +1079,7 @@
An Approach to the Automatic Acquisition of Phonotactic Constraints - AnjaBelz + AnjaBelz W98-0905 belz-1998-approach @@ -1104,7 +1104,7 @@ Discovering Lexical Information by Tagging <fixed-case>A</fixed-case>rabic Newspaper Text SaleemAbuleil - MarthaEvens + MarthaEvens W98-1001 abuleil-evens-1998-discovering @@ -1130,7 +1130,7 @@
Translating Names and Technical Terms in <fixed-case>A</fixed-case>rabic Text - BonnieGlover + BonnieGlover KevinKnight W98-1005 glover-knight-1998-translating @@ -1157,13 +1157,13 @@ A Computational Morphology System for <fixed-case>A</fixed-case>rabic RiyadAl-Shalabi - MarthaEvens + MarthaEvens W98-1009 al-shalabi-evens-1998-computational A Morphological Analyzer for <fixed-case>A</fixed-case>kkadian Verbal Forms with a Model of Phonetic Transformations - FrancoisBarthelemy + FrancoisBarthelemy W98-1010 barthelemy-1998-morphological @@ -1175,7 +1175,7 @@ Generating Determiners and Quantifiers in <fixed-case>H</fixed-case>ebrew - Yael DahanNetzer + Yael DahanNetzer MichaelElhadad W98-1012 netzer-elhadad-1998-generating @@ -1222,13 +1222,13 @@ Encoding Linguistic Corpora - NancyIde + NancyIde W98-1102 ide-1998-encoding Using a Probabilistic Translation Model for Cross-Language Information Retrieval - Jian-YunNie + Jian-YunNie PierreIsabelle GeorgeFoster W98-1103 @@ -1237,20 +1237,20 @@ Using Suffix Arrays to Compute Term Frequency and Document Frequency for All Substrings in a Corpus MikioYamamoto - Kenneth W.Church + Kenneth W.Church W98-1104 yamamoto-church-1998-using Semantic Tagging using a Probabilistic Context Free Grammar - MichaelCollins + MichaelCollins ScottMiller W98-1105 collins-miller-1998-semantic An Empirical Approach to Conceptual Case Frame Acquisition - EllenRiloff + EllenRiloff MarkSchmelzenbach W98-1106 riloff-schmelzenbach-1998-empirical @@ -1258,7 +1258,7 @@ Semantic Lexicon Acquisition for Learning Natural Language Interfaces Cynthia A.Thompson - Raymond J.Mooney + Raymond J.Mooney W98-1107 thompson-mooney-1998-semantic @@ -1281,8 +1281,8 @@ Generalized unknown morpheme guessing for hybrid <fixed-case>POS</fixed-case> tagging of <fixed-case>K</fixed-case>orean - JeongwonCha - GeunbaeLee + JeongwonCha + GeunbaeLee Jong-HyeokLee W98-1110 cha-etal-1998-generalized @@ -1295,9 +1295,9 @@ Aligning tagged bitexts - RaquelMartinez + RaquelMartinez JosebaAbaitua - ArantzaCasillas + ArantzaCasillas W98-1112 martinez-etal-1998-aligning @@ -1305,22 +1305,22 @@ Towards Unsupervised Extraction of Verb Paradigms from Large Corpora Cornelia H.Parkes Alexander M.Malek - Mitchell P.Marcus + Mitchell P.Marcus W98-1113 parkes-etal-1998-towards Can Subcategorisation Probabilities Help a Statistical Parser - JohnCarroll + JohnCarroll GuidoMinnen - TedBriscoe + TedBriscoe W98-1114 carroll-etal-1998-subcategorisation Edge-Based Best-First Chart Parsing EugeneCharniak - SharonGoldwater + SharonGoldwater MarkJohnson W98-1115 charniak-etal-1998-edge @@ -1344,14 +1344,14 @@ AndrewBorthwick JohnSterling EugeneAgichtein - RalphGrishman + RalphGrishman W98-1118 borthwick-etal-1998-exploiting A Statistical Approach to Anaphora Resolution NiyuGe - JohnHale + JohnHale EugeneCharniak W98-1119 ge-etal-1998-statistical @@ -1359,29 +1359,29 @@ A Decision Tree Method for Finding and Classifying Names in <fixed-case>J</fixed-case>apanese Texts SatoshiSekine - RalphGrishman + RalphGrishman HiroyukiShinnou W98-1120 sekine-etal-1998-decision <fixed-case>POS</fixed-case> Tagging versus Classes in Language Modeling - Peter A.Heeman + Peter A.Heeman W98-1121 heeman-1998-pos Automatic Acquisition of Phrase Grammars for Stochastic Language Modeling GiuseppeRiccardi - SrinivasBangalore + SrinivasBangalore W98-1122 riccardi-bangalore-1998-automatic Linear Segmentation and Segment Significance Min-YenKan - Judith L.Klavans - Kathleen R.McKeown + Judith L.Klavans + Kathleen R.McKeown W98-1123 kan-etal-1998-linear @@ -1394,15 +1394,15 @@ Discourse Parsing: A Decision Tree Approach TadashiNomoto - YujiMatsumoto + YujiMatsumoto W98-1125 nomoto-matsumoto-1998-discourse Mapping Collocational Properties into Machine Learning Features - Janyce M.Wiebe - Kenneth J.McKeever - Rebecca F.Bruce + Janyce M.Wiebe + Kenneth J.McKeever + Rebecca F.Bruce W98-1126 wiebe-etal-1998-mapping @@ -1419,7 +1419,7 @@ Abstraction Is Harmful in Language Learning - WalterDaelemans + WalterDaelemans W98-1201 daelemans-1998-abstraction @@ -1435,19 +1435,19 @@ Learning a Lexicalized Grammar for <fixed-case>G</fixed-case>erman - SandraKubler + SandraKubler W98-1203 kubler-1998-learning A Lexically-Intensive Algorithm for Domain-Specific Knowlegde Acquisition - ReneSchneider + ReneSchneider W98-1204 schneider-1998-lexically Look-Back and Look-Ahead in the Conversion of Hidden <fixed-case>M</fixed-case>arkov Models into Finite State Transducers - AndréKempe + AndréKempe W98-1205 kempe-1998-look @@ -1466,9 +1466,9 @@
Implementing a Sense Tagger in a General Architecture for Text Engineering - HamishCunningham + HamishCunningham MarkStevenson - YorickWilks + YorickWilks W98-1208 cunningham-etal-1998-implementing @@ -1504,7 +1504,7 @@
Automatically generating hypertext in newspaper articles by computing semantic relatedness - GreenStephen J + Stephen JGreen W98-1213 green-1998-automatically @@ -1517,13 +1517,13 @@
Sense Variation and Lexical Semantics Generative Operations - PatrickSaint-Dizier + PatrickSaint-Dizier W98-1215 saint-dizier-1998-sense An Attempt to Use Weighted Cusums to Identify Sublanguages - HaroldSomers + HaroldSomers W98-1216 somers-1998-attempt @@ -1535,7 +1535,7 @@
Applications and Explanations of <fixed-case>Z</fixed-case>ipf’s Law - David M. W.Powers + David M. W.Powers W98-1218 powers-1998-applications @@ -1555,7 +1555,7 @@
Generatlon of Simple <fixed-case>T</fixed-case>urkish Sentences with Systemic-Functional Grammar - IlyasCicekli + IlyasCicekli TurgayKorkrmaz W98-1221 cicekli-korkrmaz-1998-generatlon @@ -1570,16 +1570,16 @@ Modularity in Inductively-Learned Word Pronunciation Systems - Antalvan den Bosch + Antalvan den Bosch TonWeijters - WalterDaelemans + WalterDaelemans W98-1223 van-den-bosch-etal-1998-modularity Do Not Forget: Full Memory in Memory-Based Learning of Word Pronunciation - Antalvan den Bosch - WalterDaelemans + Antalvan den Bosch + WalterDaelemans W98-1224 van-den-bosch-daelemans-1998-forget @@ -1654,7 +1654,7 @@
The Total <fixed-case>T</fixed-case>uring Test and the Loebner Prize - David M. W.Powers + David M. W.Powers W98-1235 powers-1998-total @@ -1680,19 +1680,19 @@
Morphemes as Necessary Concept for Structures Discovery from Untagged Corpora - HerveDejean + HerveDejean W98-1239 dejean-1998-morphemes The segmentation problem in morphology learning - Christopher D.Manning + Christopher D.Manning W98-1240 manning-1998-segmentation Reconciliation of Unsupervised Clustering, Segmentation and Cohesion - David M. W.Powers + David M. W.Powers W98-1241 powers-1998-reconciliation @@ -1734,7 +1734,7 @@ Robust Parsing Using a Hidden <fixed-case>M</fixed-case>arkov Model Wide R.Hogenhout - YujiMatsumoto + YujiMatsumoto W98-1304 hogenhout-matsumoto-1998-robust @@ -1748,14 +1748,14 @@
Treatment of e-Moves in Subset Construction - Gertjanvan Noord + Gertjanvan Noord W98-1306 van-noord-1998-treatment Learning Finite-State Models for Language Understanding - DavidPico - EnriqueVidal + DavidPico + EnriqueVidal W98-1307 pico-vidal-1998-learning @@ -1768,13 +1768,13 @@ Implementing Voting Constraints with Finite State Transducers KemalOflazer - GokhanTur + GokhanTur W98-1309 oflazer-tur-1998-implementing Feature Structures, Unification and Finite-State Transducers - RemiZajac + RemiZajac W98-1310 zajac-1998-feature @@ -1800,7 +1800,7 @@ Natural Language Generation - EduardHovy + EduardHovy Association for Computational Linguistics
Niagara-on-the-Lake, Ontario, Canada
August @@ -1813,49 +1813,49 @@ Natural Language Generation Journeys to Interactive 3<fixed-case>D</fixed-case> Worlds Invited Talk Extended Abstract - James C.Lester + James C.Lester William H.Bares - Charles B.Callaway + Charles B.Callaway Stuart G.Towns W98-1401 lester-etal-1998-natural Communicative Goal-Driven <fixed-case>NL</fixed-case> Generation and Data-Driven Graphics Generation: An Architectural Synthesis for Multimedia Page Generation - JohnBateman + JohnBateman ThomasKamps - JorgKleinz + JorgKleinz KlausReichenberger W98-1402 bateman-etal-1998-communicative A Principled Representation of Attributive Descriptions for Generating Integrated Text and Information Graphics Presentations - NancyGreen + NancyGreen GiuseppeCarenini - JohannaMoore + JohannaMoore W98-1403 green-etal-1998-principled An Architecture for Opportunistic Text Generation - ChrisMellish + ChrisMellish MickO’Donnell - JonOberlander + JonOberlander AlistairKnott W98-1404 mellish-etal-1998-architecture Controlled Realization of Complex Objects - David D.McDonald + David D.McDonald W98-1405 mcdonald-1998-controlled De-Constraining Text Generation StephenBeale - SergeiNirenburg + SergeiNirenburg EvelyneViegas LeoWanner W98-1406 @@ -1879,10 +1879,10 @@ A New Approach to Expert System Explanations ReginaBarzilay DarylMcCullough - OwenRambow - JonathanDeCristofaro + OwenRambow + JonathanDeCristofaro TanyaKorelsky - BenoitLavoie + BenoitLavoie W98-1409 barzilay-etal-1998-new @@ -1894,9 +1894,9 @@
Experiments Using Stochastic Search for Text Planning - ChrisMellish + ChrisMellish AlistairKnott - JonOberlander + JonOberlander MickO’Donnell W98-1411 mellish-etal-1998-experiments @@ -1938,7 +1938,7 @@ Planning Dialogue Contributions With New Information - KristiinaJokinen + KristiinaJokinen HidekiTanaka AkioYokoo W98-1417 @@ -1946,7 +1946,7 @@ Generation of Noun Compounds in <fixed-case>H</fixed-case>ebrew: Can Syntactic Knowledge Be Fully Encapsulated? - Yael DahanNetzer + Yael DahanNetzer MichaelElhadad W98-1418 netzer-elhadad-1998-generation @@ -1954,14 +1954,14 @@ Textual Economy Through Close Coupling of Syntax and Semantics MatthewStone - BonnieWebber + BonnieWebber W98-1419 stone-webber-1998-textual A Language-Independent System for Generating Feature Structures from Interlingua Representations MuratTemizsoy - IlyasCicekli + IlyasCicekli W98-1420 temizsoy-cicekli-1998-language @@ -1980,7 +1980,7 @@ Approaches to Surface Realization With <fixed-case>HPSG</fixed-case> - GrahamWilcock + GrahamWilcock W98-1423 wilcock-1998-approaches @@ -2004,22 +2004,22 @@
The Practical Value of N-Grams Is in Generation - IreneLangkilde + IreneLangkilde KevinKnight W98-1426 langkilde-knight-1998-practical Generation as a Solution to Its Own Problem - DoniaScott + DoniaScott RichardPower - RogerEvans + RogerEvans W98-1427 scott-etal-1998-generation <fixed-case>EXEMPLARS</fixed-case>: A Practical, Extensible Framework For Dynamic Text Generation - MichaelWhite + MichaelWhite TedCaldwell W98-1428 white-caldwell-1998-exemplars @@ -2036,16 +2036,16 @@ System Demonstration Content Planning as the Basis for an Intelligent Tutoring System RevaFreedman StefanBrandle - MichaelGlass + MichaelGlass Jung HeeKim YujianZhou - Martha W.Evens + Martha W.Evens W98-1430 freedman-etal-1998-system System Demonstration <fixed-case>FLAUBERT</fixed-case>: An User Friendly System for Multilingual Text Generation - FredericMeunier + FredericMeunier LaurenceDanlos W98-1431 meunier-danlos-1998-system @@ -2059,8 +2059,8 @@ System Demonstration <fixed-case>G</fixed-case>oal<fixed-case>G</fixed-case>etter: Generation of Spoken Soccer Reports - MarietTheune - EstherKlabbers + MarietTheune + EstherKlabbers W98-1433 theune-klabbers-1998-system @@ -2074,7 +2074,7 @@
System Demonstration Interactive Generation and Knowledge Administration in <fixed-case>M</fixed-case>ulti<fixed-case>M</fixed-case>eteo - JoseCoch + JoseCoch W98-1435 coch-1998-system @@ -2084,15 +2084,15 @@ TeodoraRatiu MariaFerencz Tonde-CsillaKovacs - IstvanNagy - DianaZaiu + IstvanNagy + DianaZaiu W98-1436 ferencz-etal-1998-romvox
<fixed-case>WYSIWYM</fixed-case>: knowledge editing with natural language feedback RichardPower - DoniaScott + DoniaScott W98-1437 power-scott-1998-wysiwym @@ -2101,7 +2101,7 @@ Proceedings of the Third Conference on Empirical Methods for Natural Language Processing W98-15 - NancyIde + NancyIde AtroVoutilainen Association for Computational Linguistics
Palacio de Exposiciones y Congresos, Granada, Spain
@@ -2116,15 +2116,15 @@ Dynamic Coreference-Based Summarization BreckBaldwin - Thomas S.Morton + Thomas S.Morton 1–6 W98-1501 baldwin-morton-1998-dynamic Multilingual Robust Anaphora Resolution - RuslanMitkov - LamiaBelguith + RuslanMitkov + LamiaBelguith MalgorzataStys 7–16 W98-1502 @@ -2133,7 +2133,7 @@ Aligning Clattses in Parallel Texts SotirisBoutsis - SteliosPiperidis + SteliosPiperidis 17–26 W98-1503 boutsis-piperidis-1998-aligning @@ -2156,15 +2156,15 @@ Measures for Corpus Similarity and Homogeneity AdamKilgarriff - TonyRose + TonyRose 46–52 W98-1506 kilgarriff-rose-1998-measures Word-Sense Distinguishability and Inter-Coder Agreement - RebeccaBruce - JanyceWiebe + RebeccaBruce + JanyceWiebe 53–60 W98-1507 bruce-wiebe-1998-word @@ -2200,14 +2200,14 @@ <fixed-case>J</fixed-case>apanese Dependency Structure Analysis based on Lexicalized Statistics FujioMasakazu - YujiMatsumoto + YujiMatsumoto 87–95 W98-1511 fujio-matsumoto-1998-japanese A Comparison of Criteria for Maximum Entropy/ Minimum Divergence Feature Selection - AdamBerger + AdamBerger HarryPrintz 96–106 W98-1512 @@ -2223,7 +2223,7 @@ Proceedings of the 11th Nordic Conference of Computational Linguistics (NODALIDA 1998) W98-16 - BenteMaegaard + BenteMaegaard Center for Sprogteknologi, University of Copenhagen, Denmark
Copenhagen, Denmark
March @@ -2237,7 +2237,7 @@ <fixed-case>LMT</fixed-case> at Tivoli Gardens ArendseBernth - MichaelMcCord + MichaelMcCord 4–12 W98-1601 bernth-mccord-1998-lmt @@ -2265,7 +2265,7 @@ Structural Lexical Heuristics in the Automatic Analysis of <fixed-case>P</fixed-case>ortuguese - EckhardBick + EckhardBick 44–56 W98-1605 bick-1998-structural @@ -2280,7 +2280,7 @@ A Chart-Based Framework for Grammar Checking. Initial Studies - AnnaSågvall Hein + AnnaSågvall Hein 68–80 W98-1607 sagvall-hein-1998-chart @@ -2288,7 +2288,7 @@ <fixed-case>CP</fixed-case>-<fixed-case>UDOG</fixed-case>: An Algorithm for the Disambiguation of Compound Participles in <fixed-case>D</fixed-case>anish JensAhlmann Hansen - Poul SørenKjærsgaard + Poul SørenKjærsgaard 81–86 W98-1608 ahlmann-hansen-kjaersgaard-1998-cp @@ -2302,7 +2302,7 @@ <fixed-case>CATCH</fixed-case>: A Program for Developing World Wide Web <fixed-case>CALL</fixed-case> Material - Erik F.Tjong Kim Sang + Erik F.Tjong Kim Sang 94–99 W98-1610 tjong-kim-sang-1998-catch @@ -2318,14 +2318,14 @@ Peeking Into the <fixed-case>D</fixed-case>anish Living Room. <fixed-case>I</fixed-case>nternet access to a large speech corpus - Peter JuelHenrichsen + Peter JuelHenrichsen 109–119 W98-1612 henrichsen-1998-peeking Extraction of Translation Equivalents from Parallel Corpora - JörgTiedemann + JörgTiedemann 120–128 W98-1613 tiedemann-1998-extraction @@ -2347,7 +2347,7 @@ Logic for Part-of-Speech Tagging and Shallow Parsing - TorbjörnLager + TorbjörnLager 152–159 W98-1616 lager-1998-logic @@ -2375,7 +2375,7 @@ Teaching and learning computational linguistics in an international setting - Koenraadde Smedt + Koenraadde Smedt 186–189 W98-1620 de-smedt-1998-teaching diff --git a/data/xml/W99.xml b/data/xml/W99.xml index 43cd9feb9f..0d209a027d 100644 --- a/data/xml/W99.xml +++ b/data/xml/W99.xml @@ -12,7 +12,7 @@ An Integrated Approach to Reference and Presupposition Resolution - Robert T.Kasper + Robert T.Kasper Paul C.Davis CraigeRoberts W99-0101 @@ -28,14 +28,14 @@ Anaphora Resolution using Extended Centen’ng Algorithm in a Multi-modal Dialogue System HarksooKim Jeong-MiCho - JungyunSeo + JungyunSeo W99-0103 kim-etal-1999-anaphora Knowledge-Lean Coreference Resolution and its Relation to Textual Cohesion and Coherence - Sanda M.Harabagiu - Steven J.Maiorano + Sanda M.Harabagiu + Steven J.Maiorano W99-0104 harabagiu-maiorano-1999-knowledge @@ -50,7 +50,7 @@ Discourse Structure and Co-Reference: An Empirical Study DanCristea - NancyIde + NancyIde DanielMarcu ValentinTablan W99-0106 @@ -58,22 +58,22 @@ Building a Tool for Annotating Reference in Discourse - JonathanDeCristofaro + JonathanDeCristofaro MichaelStrube - Kathleen E.McCoy + Kathleen E.McCoy W99-0107 decristofaro-etal-1999-building Generating Anaphoric Expressions: Pronoun or Definite Description? - Kathleen E.McCoy + Kathleen E.McCoy MichaelStrube W99-0108 mccoy-strube-1999-generating Cb or not Cb? Centering theory applied to <fixed-case>NLG</fixed-case> - RodgerKibble + RodgerKibble W99-0109 kibble-1999-cb @@ -124,7 +124,7 @@ Is Hillary Rodham <fixed-case>C</fixed-case>linton the President? Disambiguating Names across Documents - YaelRavin + YaelRavin ZunaidKazi W99-0202 ravin-kazi-1999-hillary @@ -141,7 +141,7 @@ Automatic Slide Presentation from Semantically Annotated Documents MasaoUtiyama - KoitiHasida + KoitiHasida W99-0204 utiyama-hasida-1999-automatic @@ -149,7 +149,7 @@ Resolution of Indirect Anaphora in <fixed-case>J</fixed-case>apanese Sentences Using Examples: “<fixed-case>X</fixed-case> no <fixed-case>Y</fixed-case> (<fixed-case>Y</fixed-case> of <fixed-case>X</fixed-case>)” MasakiMurata HitoshiIsahara - MakotoNagao + MakotoNagao W99-0205 murata-etal-1999-resolution @@ -157,15 +157,15 @@ Pronoun Resolution in <fixed-case>J</fixed-case>apanese Sentences Using Surface Expressions and Examples MasakiMurata HitoshiIsahara - MakotoNagao + MakotoNagao W99-0206 murata-etal-1999-pronoun Corpus-Based Anaphora Resolution Towards Antecedent Preference - MichaelPaul + MichaelPaul KazuhideYamamoto - EiichiroSumita + EiichiroSumita W99-0207 paul-etal-1999-corpus @@ -186,30 +186,30 @@
Coreference-oriented Interlingual Slot Structure & Machine Translation - JesusPeral - ManuelPalomar - AntonioFerrandez + JesusPeral + ManuelPalomar + AntonioFerrandez W99-0210 peral-etal-1999-coreference Using Coreference Chains for Text Summarization - SalihaAzzam - KevinHumphreys - RobertGaizauskas + SalihaAzzam + KevinHumphreys + RobertGaizauskas W99-0211 azzam-etal-1999-using Using Coreference for Question Answering - Thomas S.Morton + Thomas S.Morton W99-0212 morton-1999-using What is coreference, and what should coreference annotation be? - Keesvan Deemter - RodgerKibble + Keesvan Deemter + RodgerKibble W99-0213 van-deemter-kibble-1999-coreference @@ -227,7 +227,7 @@ Annotation Graphs as a Framework for Multidimensional Linguistic Data Analysis StevenBird - MarkLiberman + MarkLiberman W99-0301 bird-liberman-1999-annotation @@ -240,7 +240,7 @@
Argumentation Mark-Up: A Proposal - Jean-FrancoisDelannoy + Jean-FrancoisDelannoy W99-0303 delannoy-1999-argumentation @@ -275,10 +275,10 @@
Tagging of Speech Acts and Dialogue Games in <fixed-case>S</fixed-case>panish Call Home - LoriLevin + LoriLevin KlausRies AnnThyme-Gobbel - AlonLavie + AlonLavie W99-0306 levin-etal-1999-tagging @@ -292,7 +292,7 @@
Tagging Psychotherapeutic Interviews for Linguistic Analysis - Jon DavidPatrick + Jon DavidPatrick W99-0308 patrick-1999-tagging @@ -326,15 +326,15 @@
A Two-level Approach to Coding Dialogue for Discourse Structure: Activities of the 1998 <fixed-case>DRI</fixed-case> Working Group on Higher-level Structures - David R.Traum - Christine H.Nakatani + David R.Traum + Christine H.Nakatani W99-0313 traum-nakatani-1999-two Automatically Extracting Grounding Tags from <fixed-case>BF</fixed-case> Tags TeresaZollo - MarkCore + MarkCore W99-0314 zollo-core-1999-automatically @@ -356,7 +356,7 @@
Eliciting Natural Speech From Non-Native Users: Collecting Speech Data for <fixed-case>LVCSR</fixed-case> - Laura MayfieldTomokiyo + Laura MayfieldTomokiyo SusanneBurger W99-0402 tomokiyo-burger-1999-eliciting @@ -376,22 +376,22 @@ Modeling the language assessment process and result: Proposed architecture for automatic oral proficiency assessment - Gina-AnneLevow - Mari BromanOlsen + Gina-AnneLevow + Mari BromanOlsen W99-0405 levow-olsen-1999-modeling Dual Use of Linguistic Resources: Evaluation of <fixed-case>MT</fixed-case> Systems and Language Learners LisaDecrozant - Clare R.Voss + Clare R.Voss W99-0406 decrozant-voss-1999-dual <fixed-case>FAME</fixed-case>: a Functional Annotation Meta-scheme for multi-modal and multi-lingual Parsing Evaluation AlessandroLenci - SimonettaMontemagni + SimonettaMontemagni VitoPirrelli ClaudiaSoria W99-0407 @@ -399,8 +399,8 @@ Modeling User Language Proficiency in a Writing Tutor for Deaf Learners of <fixed-case>E</fixed-case>nglish - Lisa N.Michaud - Kathleen F.McCoy + Lisa N.Michaud + Kathleen F.McCoy W99-0408 michaud-mccoy-1999-modeling @@ -413,14 +413,14 @@
A Web-Based System for Automatic Language Skill Assessment: <fixed-case>EVALING</fixed-case> - CedrickFairon + CedrickFairon W99-0410 fairon-1999-web Automated Essay Scoring for Nonnative <fixed-case>E</fixed-case>nglish Speakers JillBurstein - MartinChodorow + MartinChodorow W99-0411 burstein-chodorow-1999-automated @@ -437,16 +437,16 @@ <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et 2 - A Morphologically and Semantically Enhanced Resource - Sanda M.Harabagiu + Sanda M.Harabagiu George A.Miller - Dan I.Moldovan + Dan I.Moldovan W99-0501 harabagiu-etal-1999-wordnet A Case Study on Inter-Annotator Agreement for Word Sense Disambiguation Hwee TouNg - Chung YongLim + Chung YongLim Shou KingFoo W99-0502 ng-etal-1999-case @@ -455,16 +455,16 @@ Supervised Learning of Lexical Semantic Verb Classes Using Frequency Distributions SuzanneStevenson PaolaMerlo - Natalia KariaevaRutgers + Natalia KariaevaRutgers W99-0503 stevenson-etal-1999-supervised On the concept of diathesis alternations as semantic oppositions - AnaFernandez - M. AntoniaMarti - GloriaVazquez - IreneCastellon + AnaFernandez + M. AntoniaMarti + GloriaVazquez + IreneCastellon W99-0504 fernandez-etal-1999-concept @@ -476,20 +476,20 @@
On Some Aspects of Lexical Standardization - RemiZajac + RemiZajac W99-0506 zajac-1999-aspects <fixed-case>SIMPLE</fixed-case>- Semantic Information for Multifunctional Plurilingual Lexica: Some Examples of <fixed-case>D</fixed-case>anish” Concrete Nouns - Bolette SandfordPedersen + Bolette SandfordPedersen BrittKeson W99-0507 pedersen-keson-1999-simple Parallel Translations as Sense Discriminators - NancyIde + NancyIde W99-0508 ide-1999-parallel @@ -518,7 +518,7 @@ Towards a Universal Index of Meaning PiekVossen - WimPeters + WimPeters JulioGonzalo W99-0512 vossen-etal-1999-towards @@ -544,7 +544,7 @@ What’s Happened Since the First <fixed-case>SIGDAT</fixed-case> Meeting? - Kenneth WardChurch + Kenneth WardChurch W99-0601 church-1999-whats @@ -564,9 +564,9 @@ Improved Alignment Models for Statistical Machine Translation - Franz JosefOch - ChristophTillmann - HermannNey + Franz JosefOch + ChristophTillmann + HermannNey W99-0604 och-etal-1999-improved @@ -579,7 +579,7 @@
Boosting Applied to Tagging and <fixed-case>PP</fixed-case> Attachment - StevenAbney + StevenAbney Robert E.Schapire YoramSinger W99-0606 @@ -587,7 +587,7 @@ Applying Extrasentential Context To Maximum Entropy Based Tagging With A Large Semantic And Syntactic Tagset - EzraBlack + EzraBlack AndrewFinch RuiqiangZhang W99-0607 @@ -595,8 +595,8 @@ Improving <fixed-case>POS</fixed-case> Tagging Using Machine-Learning Techniques - LluisMarquez - HoracioRodriguez + LluisMarquez + HoracioRodriguez JosepCarmona JosepMontolio W99-0608 @@ -620,21 +620,21 @@ Noun Phrase Coreference as Clustering - ClaireCardie + ClaireCardie KiriWagstaff W99-0611 cardie-wagstaff-1999-noun Language Independent Named Entity Recognition Combining Morphological and Contextual Evidence - SilviuCucerzan + SilviuCucerzan DavidYarowsky W99-0612 cucerzan-yarowsky-1999-language Unsupervised Models for Named Entity Classification - MichaelCollins + MichaelCollins YoramSinger W99-0613 collins-singer-1999-unsupervised @@ -649,19 +649,19 @@ <fixed-case>HMM</fixed-case> Specialization with Selective Lexicalization Jin-DongKim Sang-ZooLee - Hae-ChangRim + Hae-ChangRim W99-0615 kim-etal-1999-hmm Why Doesn’t Natural Language Come Naturally? - RichardSchwartz + RichardSchwartz W99-0616 schwartz-1999-doesnt <fixed-case>POS</fixed-case> Tags and Decision Trees for Language Modeling - Peter A.Heeman + Peter A.Heeman W99-0617 heeman-1999-pos @@ -676,14 +676,14 @@ Word Informativeness and Automatic Pitch Accent Modeling ShimeiPan - Kathleen R.McKeown + Kathleen R.McKeown W99-0619 pan-mckeown-1999-word Learning Discourse Relations with Active Data Selection TadashiNomoto - YujiMatsumoto + YujiMatsumoto W99-0620 nomoto-matsumoto-1999-learning @@ -699,13 +699,13 @@ Guiding a Well-Founded Parser with Corpus Statistics AmonSeagull - LenhartSchubert + LenhartSchubert W99-0622 seagull-schubert-1999-guiding Exploiting Diversity in Natural Language Processing: Combining Parsers - John C.Henderson + John C.Henderson EricBrill W99-0623 henderson-brill-1999-exploiting @@ -713,7 +713,7 @@ Lexical ambiguity and Information Retrieval revisited JulioGonzalo - AnselmoPenas + AnselmoPenas FelisaVerdejo W99-0624 gonzalo-etal-1999-lexical @@ -721,21 +721,21 @@ Detecting Text Similarity over Short Passages: Exploring Linguistic Feature Combinations via Machine Learning VasileiosHatzivassiloglou - Judith L.Klavans + Judith L.Klavans EleazarEskin W99-0625 hatzivassiloglou-etal-1999-detecting Automatic Construction of Weighted String Similarity Measures - JorgTiedemann + JorgTiedemann W99-0626 tiedemann-1999-automatic Taking the load off the conference chairs-towards a digital paper-routing assistant DavidYarowsky - RaduFlorian + RaduFlorian W99-0627 yarowsky-florian-1999-taking @@ -743,7 +743,7 @@ <fixed-case>PP</fixed-case>-Attachment: A Committee Machine Approach Martha A.Alegre Josep M.Sopena - AgustiLloberas + AgustiLloberas W99-0628 alegre-etal-1999-pp @@ -751,7 +751,7 @@ Cascaded Grammatical Relation Assignment SabineBuchholz JornVeenstra - WalterDaelemans + WalterDaelemans W99-0629 buchholz-etal-1999-cascaded @@ -765,7 +765,7 @@ An Iterative Approach to Estimating Frequencies over a Semantic Hierarchy StephenClark - DavidWeir + DavidWeir W99-0631 clark-weir-1999-iterative @@ -778,7 +778,7 @@
Improving <fixed-case>B</fixed-case>rill’s <fixed-case>POS</fixed-case> Tagger for an Agglutinative Language - BeataMegyesi + BeataMegyesi W99-0633 megyesi-1999-improving @@ -786,14 +786,14 @@ Corpus-Based Learning for Noun Phrase Coreference Resolution Wee MengSoon Hwee TouNg - Chung YongLim + Chung YongLim W99-0634 soon-etal-1999-corpus
Corpus-Based Approach for Nominal Compound Analysis for <fixed-case>K</fixed-case>orean Based on Linguistic and Statistical Information JuntaeYoon - Key-SunChoi + Key-SunChoi MansukSong W99-0635 yoon-etal-1999-corpus @@ -811,21 +811,21 @@ Unsupervised Learning of Word Boundary with Description Length Gain - ChunyuKit - YorickWilks + ChunyuKit + YorickWilks W99-0701 kit-wilks-1999-unsupervised Experiments in Unsupervised Entropy-Based Corpus Segmentation - AndréKempe + AndréKempe W99-0702 kempe-1999-experiments Practical Bootstrapping of Morphological Analyzers KemalOflazer - SergeiNirenburg + SergeiNirenburg W99-0703 oflazer-nirenburg-1999-practical @@ -837,21 +837,21 @@ The u-<fixed-case>TBL</fixed-case> System: Logic Programming Tools for Transformation-Based Learning - TorbjornLager + TorbjornLager W99-0705 lager-1999-u-tbl Learning Transformation Rules to Find Grammatical Relations LisaFerro - MarcVilain - AlexanderYeh + MarcVilain + AlexanderYeh W99-0706 ferro-etal-1999-learning Memory-Based Shallow Parsing - WalterDaelemans + WalterDaelemans SabineBuchholz JornVeenstra W99-0707 @@ -876,7 +876,7 @@ Keynote Talk: Diamonds on my Windshield: the Use of Computer-based Instruction in Computational Linguistics - JoCalder + JoCalder W99-0801 calder-1999-keynote @@ -889,13 +889,13 @@ Web tools for introductory computational linguistics DafyddGibbon - JulieCarson-Berndsen + JulieCarson-Berndsen W99-0803 gibbon-carson-berndsen-1999-web Intranet learning tools for <fixed-case>NLP</fixed-case> - William J.Black + William J.Black SimonHill MahmoudKassaei W99-0804 @@ -924,7 +924,7 @@ An Open Distance Learning Web-Course for <fixed-case>NLP</fixed-case> in <fixed-case>IR</fixed-case> FelisaVerdejo JulioGonzalo - AnselmoPenas + AnselmoPenas W99-0808 verdejo-etal-1999-open @@ -941,14 +941,14 @@ Hiding a Semantic Hierarchy in a <fixed-case>M</fixed-case>arkov Model - StevenAbney + StevenAbney MarcLight W99-0901 abney-light-1999-hiding The applications of unsupervised learning to <fixed-case>J</fixed-case>apanese grapheme-phoneme alignment - TimothyBaldwin + TimothyBaldwin HozumiTanaka W99-0902 baldwin-tanaka-1999-applications @@ -956,14 +956,14 @@ Dual Distributional Verb Sense Disambiguation with Small Corpora and Machine Readable Dictionaries Jeong-MiCho - JungyunSeo - Gil ChangKim + JungyunSeo + Gil ChangKim W99-0903 cho-etal-1999-dual Unsupervised learning of derivational morphology from inflectional lexicons - EricGaussier + EricGaussier W99-0904 gaussier-1999-unsupervised @@ -1066,7 +1066,7 @@ The shortcomings of a tagger KristinHagen - Janne BondiJohannessen + Janne BondiJohannessen AndersNøklestad 66–75 W99-1007 @@ -1082,15 +1082,15 @@ Extracting Keywords from Digital Document Collections - AnnaJonsson + AnnaJonsson 83–90 W99-1009 jonsson-2000-extracting Ontologically Supported Semantic Matching - Atanas K.Kiryakov - Kiril Iv.Simov + Atanas K.Kiryakov + Kiril Iv.Simov 91–102 W99-1010 kiryakov-simov-2000-ontologically @@ -1104,7 +1104,7 @@ Towards a Finite-State Parser for <fixed-case>S</fixed-case>wedish - BeátaMegyesi + BeátaMegyesi SaraRydin 115–123 W99-1012 @@ -1144,7 +1144,7 @@ Designing a System for <fixed-case>S</fixed-case>wedish Spoken Document Retrieval BotondPakucs - BjörnGambäck + BjörnGambäck 162–173 W99-1017 pakucs-gamback-2000-designing @@ -1159,7 +1159,7 @@ An Information Retrieval System with Cooperative Behaviour PauloQuaresma - IrenePimenta Rodrigues + IrenePimenta Rodrigues 182–190 W99-1019 quaresma-pimenta-rodrigues-2000-information @@ -1174,7 +1174,7 @@ Automatic proofreading for <fixed-case>N</fixed-case>orwegian: The challenges of lexical and grammatical variation - Koenraadde Smedt + Koenraadde Smedt VictoriaRosén 206–215 W99-1021 @@ -1182,7 +1182,7 @@ Word Alignment Step by Step - JörgTiedemann + JörgTiedemann 216–227 W99-1022 tiedemann-2000-word diff --git a/data/xml/X93.xml b/data/xml/X93.xml index 087822e2d2..9517b08183 100644 --- a/data/xml/X93.xml +++ b/data/xml/X93.xml @@ -15,7 +15,7 @@ <fixed-case>TIPSTER</fixed-case> Program Overview - Roberta H.Merchant + Roberta H.Merchant 10.3115/1119149.1119151 1–2 X93-1001 @@ -31,7 +31,7 @@ The <fixed-case>M</fixed-case>essage <fixed-case>U</fixed-case>nderstanding <fixed-case>C</fixed-case>onferences - Beth M.Sundheim + Beth M.Sundheim 10.3115/1119149.1119153 5–5 X93-1003 @@ -73,7 +73,7 @@ <fixed-case>INQUERY</fixed-case> System Overview JohnBroglio James P.Callan - W. BruceCroft + W. BruceCroft 10.3115/1119149.1119159 47–67 X93-1008 @@ -82,7 +82,7 @@ <fixed-case>TIPSTER</fixed-case> Phase <fixed-case>I</fixed-case> Final Report BillCaid - StephenGallant + StephenGallant JoelCarleton DavidSudbeck 10.3115/1119149.1119160 @@ -92,8 +92,8 @@ <fixed-case>DR</fixed-case>-<fixed-case>LINK</fixed-case> System: Phase <fixed-case>I</fixed-case> Summary - Elizabeth D.Liddy - Sung H.Myaeng + Elizabeth D.Liddy + Sung H.Myaeng 10.3115/1119149.1119161 93–112 X93-1010 @@ -117,7 +117,7 @@ Tasks, Domains, and Languages for Information Extraction - BoyanOnyshkevych + BoyanOnyshkevych Mary EllenOkurowski LynnCarlson 10.3115/1119149.1119165 @@ -128,7 +128,7 @@ Corpora and Data Preparation for Information Extraction LynnCarlson - BoyanOnyshkevych + BoyanOnyshkevych Mary EllenOkurowski 10.3115/1119149.1119166 135–139 @@ -137,7 +137,7 @@ Template Design for Information Extraction - BoyanOnyshkevych + BoyanOnyshkevych 10.3115/1119149.1119167 141–145 X93-1015 @@ -145,7 +145,7 @@ <fixed-case>TIPSTER/MUC</fixed-case>-5 Information Extraction System Evaluation - Beth M.Sundheim + Beth M.Sundheim 10.3115/1119149.1119168 147–163 X93-1016 @@ -153,7 +153,7 @@ An Analysis of the Joint Venture <fixed-case>J</fixed-case>apanese Text Prototype and Its Effect on System Performance - SteveMoiorano + SteveMoiorano 10.3115/1119149.1119169 165–178 X93-1017 @@ -169,11 +169,11 @@ <fixed-case>BBN</fixed-case>’s <fixed-case>PLUM</fixed-case> Probabilistic Language Understanding System - RalphWeischedel - DamarisAyuso - HeidiFox + RalphWeischedel + DamarisAyuso + HeidiFox TomoyoshiMatsukawa - ConstantinePapageorgiou + ConstantinePapageorgiou DawnMacLaughlin MasaichiroKitagawa TsutomuSakai @@ -188,14 +188,14 @@ The <fixed-case>TIPSTER/SHOGUN</fixed-case> Project - Paul S.Jacobs - GeorgeKrupka - LisaRau - Michael L.Mauldin + Paul S.Jacobs + GeorgeKrupka + LisaRau + Michael L.Mauldin TerukoMitamura TsuyoshiKitani IraSider - LoisChilds + LoisChilds 10.3115/1119149.1119172 209–221 X93-1020 @@ -203,15 +203,15 @@ <fixed-case>CRL</fixed-case>/<fixed-case>B</fixed-case>randeis: The <fixed-case>D</fixed-case>iderot System - JimCowie - LouiseGuthrie - WangJin - WilliamOgden - JamesPustejovsky + JimCowie + LouiseGuthrie + JinWang + WilliamOgden + JamesPustejovsky RongWang - TakahiroWakao - ScottWaterman - YorickWilks + TakahiroWakao + WatermanScott + YorickWilks 10.3115/1119149.1119173 223–239 X93-1021 @@ -224,7 +224,7 @@ S.Soderland E.Riloff C.Cardie - J.Peterson + J.Peterson F.Feng 10.3115/1119149.1119174 241–256 @@ -233,8 +233,8 @@ Dictionary Construction by Domain Experts - EllenRiloff - Wendy G.Lehnert + EllenRiloff + Wendy G.Lehnert 10.3115/1119149.1119175 257–259 X93-1023 diff --git a/data/xml/X96.xml b/data/xml/X96.xml index 2c1f5f4b3d..d2421b3862 100644 --- a/data/xml/X96.xml +++ b/data/xml/X96.xml @@ -39,7 +39,7 @@ Technology Transfer: Observations from the <fixed-case>TIPSTER</fixed-case> Text Program - Sarah M.Taylor + Sarah M.Taylor 10.3115/1119018.1119023 23–32 X96-1004 @@ -55,7 +55,7 @@ The <fixed-case>M</fixed-case>essage <fixed-case>U</fixed-case>nderstanding <fixed-case>C</fixed-case>onferences - Beth M.Sundheim + Beth M.Sundheim 10.3115/1119018.1119025 35–37 X96-1006 @@ -88,7 +88,7 @@ The <fixed-case>L</fixed-case>ockheed <fixed-case>M</fixed-case>artin <fixed-case>TIPSTER</fixed-case> <fixed-case>II</fixed-case> Project - StevenMaiorano + StevenMaiorano 10.3115/1119018.1119030 47–48 X96-1010 @@ -96,7 +96,7 @@ <fixed-case>C</fixed-case>ervantes - A System Supporting Text Analysis - JimCowie + JimCowie 10.3115/1119018.1119031 49–49 X96-1011 @@ -104,7 +104,7 @@ The <fixed-case>NYU</fixed-case> <fixed-case>TIPSTER</fixed-case> <fixed-case>II</fixed-case> Project - Sarah M.Taylor + Sarah M.Taylor 10.3115/1119018.1119032 51–51 X96-1012 @@ -112,7 +112,7 @@ <fixed-case>SRA</fixed-case> Participation in <fixed-case>TIPSTER</fixed-case> Phase <fixed-case>II</fixed-case> - LisaRau + LisaRau 10.3115/1119018.1119033 53–53 X96-1013 @@ -120,7 +120,7 @@ The <fixed-case>SRI</fixed-case> <fixed-case>TIPSTER</fixed-case> <fixed-case>II</fixed-case> Project - StevenMaiorano + StevenMaiorano 10.3115/1119018.1119034 55–56 X96-1014 @@ -147,7 +147,7 @@ <fixed-case>C</fixed-case>able <fixed-case>A</fixed-case>bstracting and <fixed-case>IN</fixed-case>dexing <fixed-case>S</fixed-case>ystem (<fixed-case>CANIS</fixed-case>) Prototype IraSider JeffreyBaker - DeborahBrady + DeborahBrady LynneHigbie TomHoward 10.3115/1119018.1119038 @@ -167,9 +167,9 @@ The <fixed-case>HOOKAH</fixed-case> Information Extraction System ChrisBarclay - SeanBoisen + SeanBoisen ClintonHyde - RalphWeischedel + RalphWeischedel 10.3115/1119018.1119040 79–82 X96-1019 @@ -185,7 +185,7 @@ <fixed-case>O</fixed-case>leada: User-Centered <fixed-case>TIPSTER</fixed-case> Technology for Language Instruction - William C.Ogden + William C.Ogden PhilipBernick 10.3115/1119018.1119042 85–90 @@ -211,7 +211,7 @@ The <fixed-case>T</fixed-case>emple <fixed-case>T</fixed-case>ranslator’s <fixed-case>W</fixed-case>orkstation Project MichelleVanni - RemiZajac + RemiZajac 10.3115/1119018.1119045 101–106 X96-1024 @@ -227,15 +227,15 @@ <fixed-case>C</fixed-case>hinese Information Extraction and Retrieval - SeanBoisen - MichaelCrystal + SeanBoisen + MichaelCrystal ErikPeterson - RalphWeischedel + RalphWeischedel JohnBroglio JamieCallan - BruceCroft + BruceCroft TheresaHand - ThomasKeenan + ThomasKeenan Mary EllenOkurowski 10.3115/1119018.1119047 109–119 @@ -244,10 +244,10 @@ <fixed-case>TIPSTER</fixed-case>-Compatible Projects at <fixed-case>S</fixed-case>heffield - HamishCunningham - KevinHumphreys - RobertGaizauskas - YorickWilks + HamishCunningham + KevinHumphreys + RobertGaizauskas + YorickWilks 10.3115/1119018.1119048 121–123 X96-1027 @@ -255,11 +255,11 @@ Progress in Information Extraction - RalphWeischedel - SeanBoisen - DanielBikel - RobertBobrow - MichaelCrystal + RalphWeischedel + SeanBoisen + DanielBikel + RobertBobrow + MichaelCrystal WilliamFerguson AllanWechsler The PLUM Research Group @@ -270,7 +270,7 @@ The Role of Syntax in Information Extraction - RalphGrishman + RalphGrishman 10.3115/1119018.1119051 139–142 X96-1029 @@ -278,7 +278,7 @@ Natural Language Information Retrieval: <fixed-case>TIPSTER</fixed-case>-2 Final Report - TomekStrzalkowski + TomekStrzalkowski 10.3115/1119018.1119052 143–148 X96-1030 @@ -304,7 +304,7 @@ A Simple Probabilistic Approach to Classification and Routing - LouiseGuthrie + LouiseGuthrie JamesLeistensnider 10.3115/1119018.1119055 167–177 @@ -313,7 +313,7 @@ An Evaluation of Coreference Resolution Strategies for Acquiring Associated Information - Lois C.Childs + Lois C.Childs 10.3115/1119018.1119056 179–184 X96-1034 @@ -321,7 +321,7 @@ Advances in Multilingual Text Retrieval - MarkDavis + MarkDavis 10.3115/1119018.1119057 185–194 X96-1035 @@ -329,8 +329,8 @@ Integration of Document Detection and Information Extraction - LouiseGuthrie - TomekStrzalkowski + LouiseGuthrie + TomekStrzalkowski WangJin FangLin 10.3115/1119018.1119058 @@ -340,12 +340,12 @@ <fixed-case>SRI</fixed-case>’s Tipster <fixed-case>II</fixed-case> Project - Jerry R.Hobbs - DouglasAppelt - JohnBear - DavidIsrael + Jerry R.Hobbs + DouglasAppelt + JohnBear + DavidIsrael MegumiKameyama - AndrewKehler + AndrewKehler MarkStickel MabryTyson 10.3115/1119018.1119059 @@ -363,7 +363,7 @@ Building an Architecture: A <fixed-case>CAWG</fixed-case> Saga - RalphGrishman + RalphGrishman 10.3115/1119018.1119062 213–215 X96-1039 @@ -379,7 +379,7 @@ <fixed-case>TUIT</fixed-case>: A Toolkit for Constructing Multilingual <fixed-case>TIPSTER</fixed-case> User Interfaces - William C.Ogden + William C.Ogden 10.3115/1119018.1119064 219–220 X96-1041 @@ -395,7 +395,7 @@ <fixed-case>TIPSTER</fixed-case> Text Phase <fixed-case>II</fixed-case> Architecture Design Version 2.1p 19 <fixed-case>J</fixed-case>une 1996 - RalphGrishman + RalphGrishman 10.3115/1119018.1119066 249–305 X96-1043 @@ -427,8 +427,8 @@ Design of the <fixed-case>MUC</fixed-case>-6 Evaluation - RalphGrishman - BethSundheim + RalphGrishman + BethSundheim 10.3115/1119018.1119072 413–422 X96-1047 @@ -436,7 +436,7 @@ Overview of Results of the <fixed-case>MUC</fixed-case>-6 Evaluation - Beth M.Sundheim + Beth M.Sundheim 10.3115/1119018.1119073 423–442 X96-1048 @@ -444,9 +444,9 @@ The Multilingual Entity Task (<fixed-case>MET</fixed-case>) Overview - RobertaMerchant + RobertaMerchant Mary EllenOkurowski - NancyChinchor + NancyChinchor 10.3115/1119018.1119075 445–447 X96-1049 @@ -454,7 +454,7 @@ Multilingual Entity Task (<fixed-case>MET</fixed-case>): <fixed-case>J</fixed-case>apanese Results - StevenMaiorano + StevenMaiorano TerryWilson 10.3115/1119018.1119076 449–451 @@ -463,7 +463,7 @@ An Interpretative Data Analysis of <fixed-case>C</fixed-case>hinese Named Entity Subtypes - Thomas A.Keenan + Thomas A.Keenan 10.3115/1119018.1119077 453–455 X96-1051 @@ -481,11 +481,11 @@ <fixed-case>MITRE</fixed-case>: Description of the <fixed-case>A</fixed-case>lembic System as Used in <fixed-case>MET</fixed-case> JohnAberdeen JohnBurger - DavidDay - LynetteHirschman - DavidPalmer - PatriciaRobinson - MarcVilain + DavidDay + LynetteHirschman + DavidPalmer + PatriciaRobinson + MarcVilain 10.3115/1119018.1119079 461–462 X96-1053 @@ -501,11 +501,11 @@ Approaches in <fixed-case>MET</fixed-case> (Multi-Lingual Entity Task) - DamarisAyuso - DanielBikel + DamarisAyuso + DanielBikel TashaHall ErikPeterson - RalphWeischedel + RalphWeischedel PatrickJost 10.3115/1119018.1119081 465–466 @@ -514,7 +514,7 @@ <fixed-case>CRL</fixed-case>’s Approach to <fixed-case>MET</fixed-case> - JimCowie + JimCowie 10.3115/1119018.1119082 467–468 X96-1056 @@ -540,10 +540,10 @@ <fixed-case>NEC</fixed-case> Corporation and <fixed-case>U</fixed-case>niversity of <fixed-case>S</fixed-case>heffield: “Description of <fixed-case>NEC</fixed-case>/<fixed-case>S</fixed-case>heffleld System Used For <fixed-case>MET</fixed-case> <fixed-case>J</fixed-case>apanese” YoshikazuTakemoto - TakahiroWakao + TakahiroWakao HiroshiYamada - RobertGaizauskas - YorickWilks + RobertGaizauskas + YorickWilks 10.3115/1119018.1119085 475–476 X96-1059 diff --git a/data/xml/X98.xml b/data/xml/X98.xml index 2bdd5256d7..373fab96db 100644 --- a/data/xml/X98.xml +++ b/data/xml/X98.xml @@ -40,8 +40,8 @@ The Common Pattern Specification Language - Douglas E.Appelt - BoyanOnyshkevych + Douglas E.Appelt + BoyanOnyshkevych 10.3115/1119089.1119095 23–30 X98-1004 @@ -73,7 +73,7 @@ The <fixed-case>SRI</fixed-case> <fixed-case>TIPSTER</fixed-case> <fixed-case>III</fixed-case> Project - StevenMaiorano + StevenMaiorano 10.3115/1119089.1119100 39–40 X98-1008 @@ -89,7 +89,7 @@ Coreference Resolution Strategies From an Application Perspective - Lois C.Childs + Lois C.Childs DavidDadd NorrisHeintzelman 10.3115/1119089.1119103 @@ -99,7 +99,7 @@ Extracting and Normalizing Temporal Expressions - Lois C.Childs + Lois C.Childs DavidCassel 10.3115/1119089.1119104 51–56 @@ -108,7 +108,7 @@ Research in Information Extraction: 1996-98 - RalphGrishman + RalphGrishman 10.3115/1119089.1119105 57–60 X98-1012 @@ -116,12 +116,12 @@ Information Extraction Research and Applications: Current Progress and Future Directions - AndrewKehler - Jerry R.Hobbs - DouglasAppelt - JohnBear + AndrewKehler + Jerry R.Hobbs + DouglasAppelt + JohnBear MatthewCaywood - DavidIsrael + DavidIsrael MegumiKameyama DavidMartin ClaireMonteleoni @@ -133,12 +133,12 @@ Algorithms That Learn to Extract Information <fixed-case>BBN</fixed-case>: <fixed-case>TIPSTER</fixed-case> Phase <fixed-case>III</fixed-case> ScottMiller - MichaelCrystal - HeidiFox - LanceRamshaw - RichardSchwartz + MichaelCrystal + HeidiFox + LanceRamshaw + RichardSchwartz RebeccaStone - RalphWeischedel + RalphWeischedel 10.3115/1119089.1119107 75–89 X98-1014 @@ -157,7 +157,7 @@ Transforming Examples into Patterns for Information Extraction RomanYangarber - RalphGrishman + RalphGrishman 10.3115/1119089.1119109 97–103 X98-1016 @@ -165,12 +165,12 @@ The Smart/Empire <fixed-case>TIPSTER</fixed-case> <fixed-case>IR</fixed-case> System - ChrisBuckley + ChrisBuckley JanetWalz - ClaireCardie + ClaireCardie ScottMardis MandarMitra - DavidPierce + DavidPierce KiriWagstaff 10.3115/1119089.1119111 107–121 @@ -180,7 +180,7 @@ Dynamic Data Fusion TedDiamond - Elizabeth D.Liddy + Elizabeth D.Liddy 10.3115/1119089.1119112 123–128 X98-1018 @@ -188,7 +188,7 @@ Improving <fixed-case>E</fixed-case>nglish and <fixed-case>C</fixed-case>hinese Ad-Hoc Retrieval: <fixed-case>TIPSTER</fixed-case> Text Phase 3 Final Report - Kui-LamKwok + Kui-LamKwok 10.3115/1119089.1119113 129–137 X98-1019 @@ -196,9 +196,9 @@ Enhancing Detection through Linguistic Indexing and Topic Expansion - TomekStrzalkowski + TomekStrzalkowski Gees C.Stein - G. BowdenWise + G. BowdenWise 10.3115/1119089.1119114 139–148 X98-1020 @@ -207,7 +207,7 @@ Overview of the <fixed-case>U</fixed-case>niversity of <fixed-case>P</fixed-case>ennsylvania’s <fixed-case>TIPSTER</fixed-case> Project BreckBaldwin - Thomas S.Morton + Thomas S.Morton AmitBagga 10.3115/1119089.1119116 151–161 @@ -216,7 +216,7 @@ An <fixed-case>NTU</fixed-case>-Approach to Automatic Sentence Extraction for Summary Generation - Kuang-huaChen + Kuang-huaChen Sheng-JieHuang Wen-ChengLin Hsin-HsiChen @@ -228,7 +228,7 @@ Improving Robust Domain Independent Summarization - JimCowie + JimCowie EugeneLudovik HugoMolina-Salgado 10.3115/1119089.1119118 @@ -238,7 +238,7 @@ Automatic Text Summarization in <fixed-case>TIPSTER</fixed-case> - ThereseFirmin + ThereseFirmin InderjeetMani 10.3115/1119089.1119119 179–180 @@ -247,8 +247,8 @@ Summarization: (1) Using <fixed-case>MMR</fixed-case> for Diversity- Based Reranking and (2) Evaluating Summaries - JadeGoldstein - JaimeCarbonell + JadeGoldstein + JaimeCarbonell 10.3115/1119089.1119120 181–195 X98-1025 @@ -256,8 +256,8 @@ Automated Text Summarization and the <fixed-case>S</fixed-case>ummarist System - EduardHovy - Chin-YewLin + EduardHovy + Chin-YewLin 10.3115/1119089.1119121 197–214 X98-1026 @@ -266,7 +266,7 @@ Multiple & Single Document Summarization Using <fixed-case>DR-LINK</fixed-case> MaryMcKenna - ElizabethLiddy + ElizabethLiddy 10.3115/1119089.1119122 215–221 X98-1027 @@ -274,9 +274,9 @@ A Text-Extraction Based Summarizer - TomekStrzalkowski + TomekStrzalkowski Gees C.Stein - G. BowdenWise + G. BowdenWise 10.3115/1119089.1119123 223–230 X98-1028 @@ -292,7 +292,7 @@ <fixed-case>MUC</fixed-case>/<fixed-case>MET</fixed-case> Evaluation Trends - Nancy A.Chinchor + Nancy A.Chinchor 10.3115/1119089.1119126 235–239 X98-1030 @@ -300,7 +300,7 @@ The <fixed-case>T</fixed-case>ext <fixed-case>RE</fixed-case>trieval <fixed-case>C</fixed-case>onferences (<fixed-case>TREC</fixed-case>s) - Ellen M.Voorhees + Ellen M.Voorhees DonnaHarman 10.3115/1119089.1119127 241–273 diff --git a/data/xml/Y00.xml b/data/xml/Y00.xml index 7384acb378..e3709177d5 100644 --- a/data/xml/Y00.xml +++ b/data/xml/Y00.xml @@ -25,7 +25,7 @@ Verb Alternations and <fixed-case>J</fixed-case>apanese : How, What and Where - TimothyBaldwin + TimothyBaldwin HozumiTanaka 3–14 Y00-1002 @@ -34,7 +34,7 @@ Detection and Correction of Phonetic Errors with a New Orthographic Dictionary - SivajiBandyopadhyay + SivajiBandyopadhyay 15–22 Y00-1003 http://hdl.handle.net/2065/12147 @@ -61,8 +61,8 @@ Textual Information Segmentation by Cohesive Ties - Samuel W.K.Chan - Benjamin K.T’sou + Samuel W.K.Chan + Benjamin K.T’sou C.F.Choy 47–56 Y00-1006 @@ -122,7 +122,7 @@ Using Bilingual Semantic Information in <fixed-case>C</fixed-case>hinese-<fixed-case>K</fixed-case>orean Word Alignment Jin-XiaHuang - Key-SunChoi + Key-SunChoi 121–130 Y00-1013 http://hdl.handle.net/2065/12144 @@ -146,7 +146,7 @@ Qualia Structure and the Accessibility of Arguments : <fixed-case>J</fixed-case>apanese Internally-Headed Relative Clauses with Implicit Target - Chiharu UdaKikuta + Chiharu UdaKikuta 153–164 Y00-1016 http://hdl.handle.net/2065/12148 @@ -186,7 +186,7 @@ On the Discourse Analysis in <fixed-case>K</fixed-case>orean Dialogues - Ik-hwanLee + Ik-hwanLee MinhaengLee 207–218 Y00-1021 @@ -243,7 +243,7 @@ Collocation Deficiency in a Learner Corpus of <fixed-case>E</fixed-case>nglish : From an Overuse Perspective - Rebecca Hsue-HuehShih + Rebecca Hsue-HuehShih 281–288 Y00-1028 http://hdl.handle.net/2065/12161 @@ -270,14 +270,14 @@ Automatic Conversion from Phonetic to Textual Representation of <fixed-case>C</fixed-case>antonese : The Case of <fixed-case>H</fixed-case>ong <fixed-case>K</fixed-case>ong Court Proceedings - Benjamin K.Tsou + Benjamin K.Tsou K.K.Sin - Samuel W. K.Chan - Tom B. Y.Lai - CaesarLun + Samuel W. K.Chan + Tom B. Y.Lai + CaesarLun K. T.Ko - Gary K. K.Chan - Lawrence Y. L.Cheung + Gary K. K.Chan + Lawrence Y. L.Cheung 313–324 Y00-1031 http://hdl.handle.net/2065/12165 @@ -336,7 +336,7 @@ A Unified Approach to Tense in <fixed-case>J</fixed-case>apanese - KeiYoshimoto + KeiYoshimoto ChidoriNakamura YoshikiMori 389–400 @@ -346,7 +346,7 @@ Exclusion phrases and criticisms of semantic compositionality - RichardZuber + RichardZuber 401–412 Y00-1039 http://hdl.handle.net/2065/12174 @@ -354,7 +354,7 @@ The Semantics of amwu-N-to/-irato/-ina in <fixed-case>K</fixed-case>orean : Arbitrary Choice and Concession - ChungminLee + ChungminLee DaehoChung SeunghoNam 413–424 diff --git a/data/xml/Y01.xml b/data/xml/Y01.xml index 55069e0fa5..d77a2efb61 100644 --- a/data/xml/Y01.xml +++ b/data/xml/Y01.xml @@ -3,9 +3,9 @@ Proceedings of the 15th Pacific Asia Conference on Language, Information and Computation - Benjamin K.T’sou - Olivia O.Y.Kwong - Tom B.Y.Lai + Benjamin K.T’sou + Olivia O.Y.Kwong + Tom B.Y.Lai City University of Hong Kong
Hong Kong, China
February @@ -18,9 +18,9 @@ Building a Large Lexical Databank Which Provides Deep Semantics - Charles J.Fillmore + Charles J.Fillmore CharlesWooters - Collin F.Baker + Collin F.Baker 3–26 Y01-1001 http://hdl.handle.net/2065/12202 @@ -46,7 +46,7 @@ A Parallel Interpretation of Floated Quantifiers and Adverbials MasahiroKobayashi - KeiYoshimoto + KeiYoshimoto 45–52 Y01-1004 http://hdl.handle.net/2065/12209 @@ -88,8 +88,8 @@ Towards a Conceptual Representation of Lexical Meaning in <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et - Jen-nanChen - Sue J.Ker + Jen-nanChen + Sue J.Ker 97–108 Y01-1009 http://hdl.handle.net/2065/12214 @@ -132,7 +132,7 @@ Temporal Structure on Discourse Level within the Controlled Information Packaging Theory - Ik-HwanLee + Ik-HwanLee MinhaengLee 151–162 Y01-1014 @@ -157,9 +157,9 @@ Building domain-independent text generation system - XinYuDeng + XinYuDeng SadaoKurohashi - Jun’ichiNakamura + Jun’ichiNakamura 187–194 Y01-1017 http://hdl.handle.net/2065/12190 @@ -252,7 +252,7 @@ An <fixed-case>HPSG</fixed-case> Account of the Hierarchical Clause Formation in <fixed-case>J</fixed-case>apanese : <fixed-case>HPSG</fixed-case>-Based <fixed-case>J</fixed-case>apanese Grammar for Practical Parsing TakashiMiyata AkiraOtani - YujiMatsumoto + YujiMatsumoto 305–316 Y01-1028 http://hdl.handle.net/2065/12203 @@ -260,7 +260,7 @@ The <fixed-case>J</fixed-case>apanese Internally-Headed Relative Clause as a Marked Head-Complement Structure - Chiharu UdaKikuta + Chiharu UdaKikuta 317–324 Y01-1029 http://hdl.handle.net/2065/12204 diff --git a/data/xml/Y02.xml b/data/xml/Y02.xml index a8fa6cab82..b53ad68cc9 100644 --- a/data/xml/Y02.xml +++ b/data/xml/Y02.xml @@ -3,9 +3,9 @@ Proceedings of the 16th Pacific Asia Conference on Language, Information and Computation - Ik-HwanLee + Ik-HwanLee Yong-BeomKim - Key-SunChoi + Key-SunChoi MinhaengLee The Korean Society for Language and Information
Jeju, Korea
@@ -19,7 +19,7 @@ Robust Syntactic Annotation of Corpora and Memory-based Parsing - Erhard W.Hinrichs + Erhard W.Hinrichs 1–1 Y02-1001 http://hdl.handle.net/2065/12215 @@ -36,7 +36,7 @@ Identification of <fixed-case>C</fixed-case>hinese Personal Names in Unrestricted Texts LawrenceCheung - Benjamin K.Tsou + Benjamin K.Tsou MaosongSun 28–35 Y02-1003 @@ -64,8 +64,8 @@ On Negative Imperatives in <fixed-case>K</fixed-case>orean - Chung-hyeHan - Chung-minLee + Chung-hyeHan + Chung-minLee 59–68 Y02-1006 http://hdl.handle.net/2065/12255 @@ -73,10 +73,10 @@ <fixed-case>P</fixed-case>enn <fixed-case>K</fixed-case>orean Treebank : Development and Evaluation - Chung-hyeHan + Chung-hyeHan Na-RaeHan Eon-SukKo - MarthaPalmer + MarthaPalmer HeejongYi 69–78 Y02-1007 @@ -105,7 +105,7 @@ Type Construction of Nouns with the Verb ha- ‘do’ SeohyunIm - ChungminLee + ChungminLee 103–112 Y02-1010 http://hdl.handle.net/2065/12216 @@ -137,7 +137,7 @@ An Alignment Based Technique for Text Translation between Traditional <fixed-case>C</fixed-case>hinese and Simplified <fixed-case>C</fixed-case>hinese - Sue J.Ker + Sue J.Ker Chun-HsienLin 147–156 Y02-1014 @@ -146,8 +146,8 @@ Verb Pattern Based <fixed-case>K</fixed-case>orean-<fixed-case>C</fixed-case>hinese Machine Translation System - ChanghyunKim - Young KilKim + ChanghyunKim + Young KilKim MunpyoHong Young AeSeo Sung IlYang @@ -162,7 +162,7 @@ Jun-SuKim Wang-WooLee Chang-HwanKim - Cheol-youngOck + Cheol-youngOck 166–176 Y02-1016 http://hdl.handle.net/2065/12222 @@ -227,7 +227,7 @@ Toward a Bilingual Legal Term Glossary from Context Profiles - Oi YeeKwong + Oi YeeKwong 249–258 Y02-1024 http://hdl.handle.net/2065/12231 @@ -243,8 +243,8 @@ An Operator Assisted Call Routing System - Chun-JenLee - Jason S.Chang + Chun-JenLee + Jason S.Chang 271–280 Y02-1026 http://hdl.handle.net/2065/12233 @@ -262,7 +262,7 @@ A <fixed-case>K</fixed-case>orean Noun Semantic Hierarchy (<fixed-case>W</fixed-case>ordnet) Construction JuhoLee KoaunghiUn - Hee-SookBae + Hee-SookBae Key-SunChoi 290–295 Y02-1028 @@ -271,7 +271,7 @@ Implementation of Long-distance Reflexives in <fixed-case>K</fixed-case>orean : A Categorial Grammar Approach - Yong-hunLee + Yong-hunLee 296–307 Y02-1029 http://hdl.handle.net/2065/12237 @@ -299,7 +299,7 @@ If a Quantifier is not floated, but moored or even incorporated : Complexity of Presuppositions in Local Domain YoshikiMori - KeiYoshimoto + KeiYoshimoto 330–347 Y02-1032 http://hdl.handle.net/2065/12240 @@ -401,7 +401,7 @@ Building a Domain-Specific <fixed-case>F</fixed-case>rench-<fixed-case>K</fixed-case>orean Lexicon - AesunYoon + AesunYoon 465–474 Y02-1044 http://hdl.handle.net/2065/12253 diff --git a/data/xml/Y03.xml b/data/xml/Y03.xml index 1429cf534d..2dd93d4827 100644 --- a/data/xml/Y03.xml +++ b/data/xml/Y03.xml @@ -3,8 +3,8 @@ Proceedings of the 17th Pacific Asia Conference on Language, Information and Computation - Dong HongJi - Kim TengLua + Dong HongJi + Kim TengLua COLIPS PUBLICATIONS
Sentosa, Singapore
October @@ -17,7 +17,7 @@ Virtual Linked Lexical Knowledge Base for Causality Reasoning - Key-SunChoi + Key-SunChoi 1–1 Y03-1001 http://hdl.handle.net/2065/12259 @@ -105,8 +105,8 @@ Porting Grammars between Typologically Similar Languages : <fixed-case>J</fixed-case>apanese to <fixed-case>K</fixed-case>orean RogerKim MaryDalrymple - Ronald M.Kaplan - Tracy HollowayKing + Ronald M.Kaplan + Tracy HollowayKing 98–105 Y03-1011 http://hdl.handle.net/2065/12309 @@ -131,7 +131,7 @@ Stock Markets as Ocean Water : A Corpus-based, Comparative Study of <fixed-case>M</fixed-case>andarin <fixed-case>C</fixed-case>hinese, <fixed-case>E</fixed-case>nglish and <fixed-case>S</fixed-case>panish - Siaw-FongChung + Siaw-FongChung KathleenAhrens Ya-huiSung 124–133 @@ -150,7 +150,7 @@ Context-rule Model for Pos Tagging Yu-FangTsai - Keh-JiannChen + Keh-JiannChen 146–151 Y03-1016 http://hdl.handle.net/2065/12264 @@ -158,7 +158,7 @@ <fixed-case>C</fixed-case>hinese Word Segmentation Based on Contextual Entropy - Jin HuHuang + Jin HuHuang DavidPowers 152–158 Y03-1017 @@ -178,7 +178,7 @@ Cross-Lingual Text Filtering Based on Text Concepts and k<fixed-case>NN</fixed-case> ShaoziLi WeifengSu - TangqiuLi + TangqiuLi HuowangChen 166–173 Y03-1019 @@ -204,8 +204,8 @@ A Synchronous Corpus-Based Study of Verb-Noun Fluidity in <fixed-case>C</fixed-case>hinese - Oi YeeKwong - Benjamin K.Tsou + Oi YeeKwong + Benjamin K.Tsou 194–203 Y03-1022 http://hdl.handle.net/2065/12271 @@ -266,8 +266,8 @@ Efficient Methods for Multigram Compound Discovery - WuHorng Jyh Paul - NgHong I + Horng Jyh PaulWu + Hong INg GongRuibin 257–268 Y03-1029 @@ -276,7 +276,7 @@ Translation Template Learning Based on Hidden <fixed-case>M</fixed-case>arkov Modeling - Minh LeNguyen + Minh LeNguyen AkariShimazu SusumuHoriguchi 269–276 @@ -307,7 +307,7 @@ A New Sentence Reduction based on Decision Tree Model - Minh LeNguyen + Minh LeNguyen SusumuHoriguchi 290–297 Y03-1033 @@ -316,7 +316,7 @@ <fixed-case>J</fixed-case>apanese Parser on the basis of the <fixed-case>L</fixed-case>exical-<fixed-case>F</fixed-case>unctional <fixed-case>G</fixed-case>rammar Formalism and its Evaluation - HiroshiMasuichi + HiroshiMasuichi TomokoOkuma HirokiYoshimura YasunariHarada @@ -327,9 +327,9 @@ A Statistical Approach to <fixed-case>C</fixed-case>hinese-to-<fixed-case>E</fixed-case>nglish Back-Transliteration - Chun-JenLee - Jason S.Chang - Jyh-Shing RogerJang + Chun-JenLee + Jason S.Chang + Jyh-Shing RogerJang 310–318 Y03-1035 http://hdl.handle.net/2065/12286 @@ -346,7 +346,7 @@ Modeling Verb Order in Complex Multi-Verbal Predicate Constructions Olivia S.-C.Lam - Adams B.Bodomo + Adams B.Bodomo 328–338 Y03-1037 http://hdl.handle.net/2065/12288 @@ -425,12 +425,12 @@ Towards a Multi-Objective Corpus for <fixed-case>V</fixed-case>ietnamese Language - VuHai Quan + Hai QuanVu PhamNam Trung NguyenDuc Hoang Ha HuynhBao Toan LeHoai Bac - HoangKiem + KiemHoang 416–422 Y03-1046 http://hdl.handle.net/2065/12298 @@ -439,7 +439,7 @@ Using Zero Anaphora Resolution to Improve Text Categorization Ching-LongYeh - Yi-ChunChen + Yi-ChunChen 423–430 Y03-1047 http://hdl.handle.net/2065/12299 @@ -464,7 +464,7 @@ The Treatment of <fixed-case>J</fixed-case>apanese Focus Particles Based on <fixed-case>L</fixed-case>exical-<fixed-case>F</fixed-case>unctional <fixed-case>G</fixed-case>rammar TomokoOhkuma - HiroshiMasuichi + HiroshiMasuichi HirokiYoshimura YasunariHarada 448–455 diff --git a/data/xml/Y04.xml b/data/xml/Y04.xml index 5a2e4a8b3d..b84fb1aa65 100644 --- a/data/xml/Y04.xml +++ b/data/xml/Y04.xml @@ -3,11 +3,11 @@ Proceedings of the 18th Pacific Asia Conference on Language, Information and Computation - HiroshiMasuichi + HiroshiMasuichi TomokoOhkuma KiyoshiIshikawa YasunariHarada - KeiYoshimoto + KeiYoshimoto Logico-Linguistic Society of Japan
Waseda University, Tokyo, Japan
December @@ -28,7 +28,7 @@
Machine Learning based <fixed-case>NLP</fixed-case> : Experiences and Supporting Tools - YujiMatsumoto + YujiMatsumoto 15–16 Y04-1002 http://hdl.handle.net/2065/555 @@ -97,7 +97,7 @@ <fixed-case>J</fixed-case>apanese Subjects and Information Structure : A Constraint-based Approach AkiraOhtani - YujiMatsumoto + YujiMatsumoto 93–104 Y04-1010 http://hdl.handle.net/2065/563 @@ -115,7 +115,7 @@ Automatic Discovery of Telic and Agentive Roles from Corpus Data IchiroYamada - TimothyBaldwin + TimothyBaldwin 115–126 Y04-1012 http://hdl.handle.net/2065/565 @@ -133,9 +133,9 @@ Pruning False Unknown Words to Improve <fixed-case>C</fixed-case>hinese Word Segmentation - Chooi-LingGoh + Chooi-LingGoh MasayukiAsahara - YujiMatsumoto + YujiMatsumoto 139–150 Y04-1014 http://hdl.handle.net/2065/567 @@ -143,7 +143,7 @@ Ontology-based Prediction of Compound Relations : A Study Based on <fixed-case>SUMO</fixed-case> - Jia-FeiHong + Jia-FeiHong Xiang-BingLi Chu-RenHuang 151–160 @@ -158,8 +158,8 @@ AoifeCahill RowenaChan RuthO’Donovan - AdamsBodomo - Josefvan Genabith + AdamsBodomo + Josefvan Genabith AndyWay 161–172 Y04-1016 @@ -188,7 +188,7 @@ Integrated Use of Internal and External Evidence in the Alignment of Multi-Word Named Entities TakeshiKutsumi - TakehikoYoshimi + TakehikoYoshimi KatsunoriKotani IchikoSata HitoshiIsahara @@ -216,7 +216,7 @@ Scalar Meanings of the Concessive (-to), the Contrastive Topic Marker (-nun) and -man ‘only’ in <fixed-case>K</fixed-case>orean (and <fixed-case>J</fixed-case>apanese) - ChungminLee + ChungminLee 217–226 Y04-1022 http://hdl.handle.net/2065/575 @@ -267,8 +267,8 @@ Adaptive Word Sense Tagging on <fixed-case>C</fixed-case>hinese Corpus - Sue-jinKer - Jen-NanChen + Sue-jinKer + Jen-NanChen 267–274 Y04-1028 http://hdl.handle.net/2065/581 @@ -285,7 +285,7 @@ <fixed-case>C</fixed-case>hinese-<fixed-case>E</fixed-case>nglish Parallel Corpus Construction and its Application - BaobaoChang + BaobaoChang 283–290 Y04-1030 http://hdl.handle.net/2065/583 diff --git a/data/xml/Y05.xml b/data/xml/Y05.xml index fae7b8a6a3..02062c0c0c 100644 --- a/data/xml/Y05.xml +++ b/data/xml/Y05.xml @@ -50,7 +50,7 @@ A Framework for Data Management for the Online Volunteer Translators’ Aid System <fixed-case>QRL</fixed-case>ex YoucefBey KyoKageura - ChristianBoitet + ChristianBoitet 51–60 Y05-1005 http://hdl.handle.net/2065/28988 @@ -60,7 +60,7 @@ From Text to Sign Language: Exploiting the Spatial and Motioning Dimension Ji-WonChoi Hee-JinLee - Jong C.Park + Jong C.Park 61–69 Y05-1006 http://hdl.handle.net/2065/28986 @@ -68,7 +68,7 @@ <fixed-case>MARKET</fixed-case> Metaphors: <fixed-case>C</fixed-case>hinese, <fixed-case>E</fixed-case>nglish and <fixed-case>M</fixed-case>alay - Siaw-FongChung + Siaw-FongChung 71–81 Y05-1007 http://hdl.handle.net/2065/29011 @@ -103,7 +103,7 @@ MakotoKondo HidekiAsoh AkiraTakagi - YukihiroIto + YukihiroIto 107–118 Y05-1010 http://hdl.handle.net/2065/28993 @@ -111,10 +111,10 @@ A Study on Implementation of <fixed-case>S</fixed-case>outhern-<fixed-case>M</fixed-case>in <fixed-case>T</fixed-case>aiwanese Tone Sandhi System - IunUn-gian - LauKiat-gak + Un-gianIun + Kiat-gakLau LiSheng-an - KaoCheng-yan + Cheng-yanKao 119–130 Y05-1011 http://hdl.handle.net/2065/29009 @@ -123,7 +123,7 @@ Vowel Sound Disambiguation for Intelligible <fixed-case>K</fixed-case>orean Speech Synthesis Ho-JoonLee - Jong C.Park + Jong C.Park 131–142 Y05-1012 http://hdl.handle.net/2065/28995 @@ -153,7 +153,7 @@ A Structured <fixed-case>SVM</fixed-case> Semantic Parser Augmented by Semantic Tagging with Conditional Random Field - Minh LeNguyen + Minh LeNguyen AkiraShimazu Hieu XuanPhan 167–177 @@ -164,7 +164,7 @@ Multiply Quantified Internally Headed Relative Clause in <fixed-case>J</fixed-case>apanese: A Skolem Term Based Approach RuiOtake - KeiYoshimoto + KeiYoshimoto 179–189 Y05-1016 http://hdl.handle.net/2065/29015 @@ -174,7 +174,7 @@ A study on multiple interpretations of frequency adverbs in <fixed-case>J</fixed-case>apanese TomoakiOzawa HiroyukiNishina - KeiYoshimoto + KeiYoshimoto ShigeruSato 191–198 Y05-1017 @@ -187,7 +187,7 @@ KazunoriKomatani TakashiMiyata KoichiHashida - HiroshiOkuno + HiroshiOkuno 199–210 Y05-1018 http://hdl.handle.net/2065/28996 @@ -233,8 +233,8 @@ An Approach to Improve the Smoothing Process Based on Non-uniform Redistribution - Feng-LongHuang - Ming-ShingYu + Feng-LongHuang + Ming-ShingYu 257–264 Y05-1023 http://hdl.handle.net/2065/29016 diff --git a/data/xml/Y06.xml b/data/xml/Y06.xml index 8d5cfaeb92..2d7160b70e 100644 --- a/data/xml/Y06.xml +++ b/data/xml/Y06.xml @@ -14,7 +14,7 @@ Which Is Essential for <fixed-case>C</fixed-case>hinese Word Segmentation: Character versus Word - Chang-NingHuang + Chang-NingHuang HaiZhao 1–12 Y06-1001 @@ -31,7 +31,7 @@ Towards a Neuro-Cognitive Model of Human Sentence Processing - KeiYoshimoto + KeiYoshimoto ShigeruSato 21–27 Y06-1003 @@ -79,7 +79,7 @@ An Information Retrieval Model Based On Word Concept ChenWu QuanZhang - XiangfengWei + XiangfengWei 56–63 Y06-1008 http://hdl.handle.net/2065/29040 @@ -87,7 +87,7 @@ Discriminative Reranking for Spelling Correction - YangZhang + YangZhang PilianHe WeiXiang MuLi @@ -120,9 +120,9 @@ Effective Tag Set Selection in <fixed-case>C</fixed-case>hinese Word Segmentation via Conditional Random Field Modeling HaiZhao - Chang-NingHuang + Chang-NingHuang MuLi - Bao-LiangLu + Bao-LiangLu 87–94 Y06-1012 http://hdl.handle.net/2065/29030 @@ -141,9 +141,9 @@ A Comparative Study of the Effect of Word Segmentation On <fixed-case>C</fixed-case>hinese Terminology Extraction LuningJi - QinLu + QinLu WenjieLi - YiRongChen + YiRongChen 101–108 Y06-1014 http://hdl.handle.net/2065/29021 @@ -152,7 +152,7 @@ <fixed-case>TC</fixed-case>tract-A Collocation Extraction Approach for Noun Phrases Using Shallow Parsing Rules and Statistic Models Wan YinLi - QinLu + QinLu JamesLiu 109–116 Y06-1015 @@ -225,7 +225,7 @@ The Analysis of <fixed-case>C</fixed-case>hinese Sentence Semantic Chunk Share Based on <fixed-case>HNC</fixed-case> Theory QuanZhang ChenWu - XiangfengWei + XiangfengWei 175–182 Y06-1023 http://hdl.handle.net/2065/29068 @@ -233,7 +233,7 @@ Using <fixed-case>C</fixed-case>hinese <fixed-case>G</fixed-case>igaword Corpus and <fixed-case>C</fixed-case>hinese Word Sketch in linguistic Research - Jia-FeiHong + Jia-FeiHong Chu-RenHuang 183–190 Y06-1024 @@ -259,9 +259,9 @@ Knowledge-Rich Approach to Automatic Grammatical Information Acquisition: Enriching <fixed-case>C</fixed-case>hinese <fixed-case>S</fixed-case>ketch <fixed-case>E</fixed-case>ngine with a Lexical Grammar Chu-RenHuang - Wei-YunMa + Wei-YunMa Yi-ChingWu - Chih-MingChiu + Chih-MingChiu 206–214 Y06-1027 http://hdl.handle.net/2065/29027 @@ -269,11 +269,11 @@ <fixed-case>V</fixed-case>ietnamese Word Segmentation with <fixed-case>CRF</fixed-case>s and <fixed-case>SVM</fixed-case>s: An Investigation - Cam-TuNguyen + Cam-TuNguyen Trung-KienNguyen Xuan-HieuPhan - Le-MinhNguyen - Quang-ThuyHa + Le-MinhNguyen + Quang-ThuyHa 215–222 Y06-1028 http://hdl.handle.net/2065/29084 @@ -281,7 +281,7 @@ A language-independent method for the alignement of parallel corpora - Thi Minh HuyềnNguyễn + Thi Minh HuyềnNguyễn MathiasRossignol 223–230 Y06-1029 @@ -318,7 +318,7 @@ Research on Hypothesizing and Sorting the Eg Candidates in <fixed-case>C</fixed-case>hinese Semantic Parsing - XiangFengWei + XiangFengWei QuanZhang 250–256 Y06-1033 @@ -328,8 +328,8 @@ Mining the Relation between Sentiment Expression and Target Using Dependency of Words ZhongchaoFei - XuanjingHuang - LideWu + XuanjingHuang + LideWu 257–264 Y06-1034 http://hdl.handle.net/2065/29079 @@ -348,7 +348,7 @@ A Constraint-based Morphological Analyzer for Concatenative and Non-concatenative Morphology Farrah CherryFortes-Galvan - Rachel EditaRoxas + Rachel EditaRoxas 273–279 Y06-1036 http://hdl.handle.net/2065/29081 @@ -386,7 +386,7 @@ An Activation-based Sentence Processing Model of <fixed-case>E</fixed-case>nglish KeiTakahashi KiyoshiIshikawa - KeiYoshimoto + KeiYoshimoto 303–310 Y06-1040 http://hdl.handle.net/2065/29018 @@ -395,7 +395,7 @@ Platform for Full-Syntax Grammar Development Using Meta-grammar Constructs AlešHorák - VladimírKadlec + VladimírKadlec 311–318 Y06-1041 http://hdl.handle.net/2065/29045 @@ -413,7 +413,7 @@ Using the <fixed-case>S</fixed-case>wadesh list for creating a simple common taxonomy - LaurentPrévot + LaurentPrévot Chu-RenHuang I-LiSu 324–331 @@ -423,11 +423,11 @@ The Construction of a Dictionary for a Two-layer <fixed-case>C</fixed-case>hinese Morphological Analyzer - Chooi-LingGoh - Jia + Chooi-LingGoh + Jia YuchangCheng MasayukiAsahara - YujiMatsumoto + YujiMatsumoto 332–340 Y06-1044 http://hdl.handle.net/2065/29024 @@ -435,7 +435,7 @@ A Natural Language Model of Computing with Words in Web Pages - Ze-yuZheng + Ze-yuZheng PingZhang 341–346 Y06-1045 @@ -472,9 +472,9 @@ <fixed-case>H</fixed-case>ow<fixed-case>N</fixed-case>et Based <fixed-case>C</fixed-case>hinese Question Classification - DongfengCai - JingguangSun - GuipingZhang + DongfengCai + JingguangSun + GuipingZhang DexinLv YanjuDong YanSong @@ -497,7 +497,7 @@ Automatic Target Word Disambiguation Using Syntactic Relationships EbonyDomingo - Rachel EditaRoxas + Rachel EditaRoxas 374–377 Y06-1051 http://hdl.handle.net/2065/29057 @@ -507,7 +507,7 @@ Semantic Representation and Composition for Unknown Compounds in <fixed-case>E</fixed-case>-<fixed-case>H</fixed-case>ow<fixed-case>N</fixed-case>et Yueh-YinShih Shu-LingHuang - Keh-JiannChen + Keh-JiannChen 378–381 Y06-1052 http://hdl.handle.net/2065/29039 @@ -526,7 +526,7 @@ Learning Translation Rules for a Bidirectional <fixed-case>E</fixed-case>nglish-<fixed-case>F</fixed-case>ilipino Machine Translator Michelle WendyTan Bryan AnthonyHong - Danniel LiwanagAlcantara + Danniel LiwanagAlcantara AmielPerez LawrenceTan 386–389 @@ -536,9 +536,9 @@ A Visualization method for machine translation evaluation results - Jian-MinYao - Yun-QianQu - Qiao-MingZhu + Jian-MinYao + Yun-QianQu + Qiao-MingZhu JingZhang 390–393 Y06-1055 @@ -555,11 +555,11 @@ Research on concept-sememe tree and semantic relevance computation - GuiPingZhang + GuiPingZhang ChaoYu - DongFengCai + DongFengCai YanSong - JingGuangSun + JingGuangSun 398–402 Y06-1057 http://hdl.handle.net/2065/29052 @@ -687,7 +687,7 @@ Translation & Transform Algorithm of Query Sentence in Cross-Language Information Retrieval Xiao-feiZhang Ke-liangZhang - He-yanHuang + He-yanHuang 467–470 Y06-1071 http://hdl.handle.net/2065/29062 diff --git a/data/xml/Y07.xml b/data/xml/Y07.xml index c94c4570cf..02c84a25ce 100644 --- a/data/xml/Y07.xml +++ b/data/xml/Y07.xml @@ -14,7 +14,7 @@ Scalable Deep Linguistic Processing: Mind the Lexical Gap - TimothyBaldwin + TimothyBaldwin 3–12 Y07-1001 http://hdl.handle.net/2065/29115 @@ -22,7 +22,7 @@ The Semantics of Semantic Annotation - HarryBunt + HarryBunt 13–28 Y07-1002 http://hdl.handle.net/2065/29125 @@ -30,7 +30,7 @@ Deep Lexical Semantics: The Ontological Ascent - Jerry R.Hobbs + Jerry R.Hobbs 29–41 Y07-1003 http://hdl.handle.net/2065/29124 @@ -74,7 +74,7 @@ <fixed-case>BEYT</fixed-case>rans: A Free Online Collaborative <fixed-case>W</fixed-case>iki-Based <fixed-case>CAT</fixed-case> Environment Designed for Online Translation Communities YoucefBey KyoKageura - ChristianBoitet + ChristianBoitet 87–94 Y07-1008 http://hdl.handle.net/2065/29117 @@ -91,7 +91,7 @@ Customizing an <fixed-case>E</fixed-case>nglish-<fixed-case>K</fixed-case>orean Machine Translation System for Patent Translation Sung-KwonChoi - Young-GilKim + Young-GilKim 105–114 Y07-1010 http://hdl.handle.net/2065/29090 @@ -107,7 +107,7 @@ Computing Thresholds of Linguistic Saliency - Siaw-FongChung + Siaw-FongChung KathleenAhrens Chung-PingCheng Chu-RenHuang @@ -119,9 +119,9 @@ Modality and Modal Sense Representation in <fixed-case>E</fixed-case>-<fixed-case>H</fixed-case>ow<fixed-case>N</fixed-case>et - You-ShanChung + You-ShanChung Shu-LingHuang - Keh-JiannChen + Keh-JiannChen 136–145 Y07-1013 http://hdl.handle.net/2065/29096 @@ -129,8 +129,8 @@ <fixed-case>A</fixed-case>uto<fixed-case>C</fixed-case>or: A Query Based Automatic Acquisition of Corpora of Closely-related Languages - Davis Muhajereen D.Dimalen - Rachel Edita O.Roxas + Davis Muhajereen D.Dimalen + Rachel Edita O.Roxas 146–154 Y07-1014 http://hdl.handle.net/2065/29141 @@ -138,7 +138,7 @@ The Polysemy of Da3: An ontology-based lexical semantic study - Jia-FeiHong + Jia-FeiHong Chu-RenHuang KathleenAhrens 155–162 @@ -157,7 +157,7 @@ Time-moving Metaphors and Ego-moving Metaphors: Which Is Better Comprehended by <fixed-case>T</fixed-case>aiwanese? Hsin-mei MayHuang - Ching-yu ShelleyHsieh + Ching-yu ShelleyHsieh 173–181 Y07-1017 http://hdl.handle.net/2065/29099 @@ -224,7 +224,7 @@ Transition and Parsing State and Incrementality in Dynamic Syntax MasahiroKobayashi - KeiYoshimoto + KeiYoshimoto 249–258 Y07-1025 http://hdl.handle.net/2065/29142 @@ -233,7 +233,7 @@ A Focus Account for Contrastive Reduplication: Prototypicality and Contrastivity BinnaLee - ChungminLee + ChungminLee 259–267 Y07-1026 http://hdl.handle.net/2065/29089 @@ -249,7 +249,7 @@ Implementation of Presence and Absence of Blocking Effects: A Categorial Grammar Approach to <fixed-case>C</fixed-case>hinese and <fixed-case>K</fixed-case>orean - Yong-hunLee + Yong-hunLee 275–284 Y07-1028 http://hdl.handle.net/2065/29111 @@ -257,7 +257,7 @@ Mining Parallel Text from the Web based on Sentence Alignment - BoLi + BoLi JuanLiu HuiliZhu 285–292 @@ -275,7 +275,7 @@ Using Non-Local Features to Improve Named Entity Recognition Recall - XinnianMao + XinnianMao WeiXu YuanDong SaikeHe @@ -288,7 +288,7 @@ Analysis of Indirect Uses of Interrogative Sentences Carrying Anger Hye-JinMin - Jong C.Park + Jong C.Park 311–320 Y07-1032 http://hdl.handle.net/2065/29094 @@ -299,7 +299,7 @@ MakiMiyake TerryJoyce JaeyoungJung - HiroyukiAkama + HiroyukiAkama 321–329 Y07-1033 http://hdl.handle.net/2065/29088 @@ -327,7 +327,7 @@ Acquisition of Named-Entity-Related Relations for Searching - Tri-ThanhNguyen + Tri-ThanhNguyen AkiraShimazu 349–357 Y07-1036 @@ -345,7 +345,7 @@ Case, Coordination, and Information Structure in <fixed-case>J</fixed-case>apanese AkiraOtani - MarkSteedman + MarkSteedman 365–374 Y07-1038 http://hdl.handle.net/2065/29104 @@ -354,7 +354,7 @@ Automatic Acquisition of <fixed-case>L</fixed-case>exical-<fixed-case>F</fixed-case>unctional <fixed-case>G</fixed-case>rammar Resources from a <fixed-case>J</fixed-case>apanese Dependency Corpus MasanoriOya - Josefvan Genabith + Josefvan Genabith 375–384 Y07-1039 http://hdl.handle.net/2065/29140 @@ -364,8 +364,8 @@ Semi-Automatic Annotation Tool to Build Large Dependency Tree-Tagged Corpus Eun-JinPark Jae-HoonKim - Chang-HyunKim - Young-KillKim + Chang-HyunKim + Young-KillKim 385–393 Y07-1040 http://hdl.handle.net/2065/29100 @@ -373,7 +373,7 @@ Multiple Sluicing in <fixed-case>E</fixed-case>nglish - Myung-KwanPark + Myung-KwanPark Jung-MinKang 394–404 Y07-1041 @@ -390,8 +390,8 @@ Relation Extraction Using Convolution Tree Kernel Expanded with Entity Features - LonghuaQian - GuodongZhou + LonghuaQian + GuodongZhou QiaominZhu PeideQian 415–421 @@ -469,8 +469,8 @@ Yu-ChunWang Yi-HsunLee Chu-ChengLin - Tzong-Han RichardTsai - Wen-LianHsu + Tzong-Han RichardTsai + Wen-LianHsu 489–497 Y07-1051 http://hdl.handle.net/2065/29139 @@ -478,7 +478,7 @@ Research on a Model of Extracting Persons’ Information Based on Statistic Method and Conceptual Knowledge - XiangFengWei + XiangFengWei NingJia QuanZhang HanFenZang @@ -499,7 +499,7 @@ Distal Demonstrative Hitlo in <fixed-case>T</fixed-case>aiwanese <fixed-case>S</fixed-case>outhern <fixed-case>M</fixed-case>in - Yi-jingZhao + Yi-jingZhao 522–530 Y07-1054 http://hdl.handle.net/2065/29110 @@ -507,7 +507,7 @@ Children’s Acquisition of Demonstrative Pronouns in <fixed-case>M</fixed-case>andarin <fixed-case>C</fixed-case>hinese - Yi-jingZhao + Yi-jingZhao 532–541 Y07-1055 http://hdl.handle.net/2065/29095 @@ -515,7 +515,7 @@ Ambiguity of Reflexives and Case Extension - RichardZuber + RichardZuber 542–547 Y07-1056 http://hdl.handle.net/2065/29123 diff --git a/data/xml/Y08.xml b/data/xml/Y08.xml index e9dbbb7050..c6de955635 100644 --- a/data/xml/Y08.xml +++ b/data/xml/Y08.xml @@ -3,7 +3,7 @@ Proceedings of the 22nd Pacific Asia Conference on Language, Information and Computation - Rachel Edita O.Roxas + Rachel Edita O.Roxas De La Salle University, Manila, Philippines
The University of the Philippines Visayas Cebu College, Cebu City, Philippines
November @@ -30,7 +30,7 @@
Scalar Implicatures: Pragmatic Inferences or Grammar? - ChungminLee + ChungminLee 30–45 Y08-1003 lee-2008-scalar @@ -40,7 +40,7 @@ HaizhouLi BinMa Kong-AikLee - Khe-ChaiSim + Khe-ChaiSim HanwuSun RongTong DonglaiZhu @@ -58,7 +58,7 @@ Some Challenges of Advanced Question-Answering: an Experiment with How-to Questions - PatrickSaint-Dizier + PatrickSaint-Dizier 65–73 Y08-1006 saint-dizier-2008-challenges @@ -87,7 +87,7 @@ Constituent Structure for <fixed-case>F</fixed-case>ilipino: Induction through Probabilistic Approaches - DannielAlcantara + DannielAlcantara AllanBorra 113–122 Y08-1010 @@ -114,7 +114,7 @@ Mary GraceLim Patricia BeaPerez Joanna PatriciaReyes - Nathalie RoseLim + Nathalie RoseLim 141–150 Y08-1013 chen-etal-2008-natural @@ -134,7 +134,7 @@ Ki-YoungLee Yoon-HyungRoh Oh-WoogKwon - Young-GilKim + Young-GilKim 161–168 Y08-1015 choi-etal-2008-overcome @@ -142,7 +142,7 @@ Multi-Engine Approach for Named Entity Recognition in <fixed-case>B</fixed-case>engali AsifEkbal - SivajiBandyopadhyay + SivajiBandyopadhyay 169–178 Y08-1016 ekbal-bandyopadhyay-2008-multi @@ -182,7 +182,7 @@ <fixed-case>K</fixed-case>orean Parsing Based on the Applicative <fixed-case>C</fixed-case>ombinatory <fixed-case>C</fixed-case>ategorial <fixed-case>G</fixed-case>rammar JuyeonKang - Jean-PierreDesclés + Jean-PierreDesclés 215–224 Y08-1021 kang-descles-2008-korean @@ -198,7 +198,7 @@ A Preliminary Study on the Impact of Lexical Concreteness on Word Sense Disambiguation - Oi YeeKwong + Oi YeeKwong 235–244 Y08-1023 kwong-2008-preliminary @@ -214,7 +214,7 @@ An Improved Corpus Comparison Approach to Domain Specific Term Recognition XiaoyueLiu - ChunyuKit + ChunyuKit 253–261 Y08-1025 liu-kit-2008-improved @@ -222,17 +222,17 @@ Extending an <fixed-case>I</fixed-case>ndonesian Semantic Analysis-based Question Answering System with Linguistic and World Knowledge Axioms RahmadMahendra - Septina DianLarasati - RuliManurung + Septina DianLarasati + RuliManurung 262–271 Y08-1026 mahendra-etal-2008-extending An Implementation of a Flexible Author-Reviewer Model of Generation using Genetic Algorithms - RuliManurung - GraemeRitchie - HenryThompson + RuliManurung + GraemeRitchie + HenryThompson 272–281 Y08-1027 manurung-etal-2008-implementation @@ -247,7 +247,7 @@ On <fixed-case>J</fixed-case>apanese Desiderative Constructions AkiraOhtani - MarkSteedman + MarkSteedman 290–301 Y08-1029 ohtani-steedman-2008-japanese @@ -279,7 +279,7 @@ Trend-based Document Clustering for Sensitive and Stable Topic Detection - YoshihideSato + YoshihideSato HarumiKawashima HidenoriOkuda MasahiroOku @@ -324,7 +324,7 @@ Using ‘Low-cost’ Learning Features for Pronoun Resolution RamonCuevas - IvandréParaboni + IvandréParaboni 377–383 Y08-1038 cuevas-paraboni-2008-using @@ -343,7 +343,7 @@ Controlled <fixed-case>K</fixed-case>orean for <fixed-case>K</fixed-case>orean-<fixed-case>E</fixed-case>nglish <fixed-case>MT</fixed-case> MunpyoHong - Chang-HyunKim + Chang-HyunKim 391–396 Y08-1040 hong-kim-2008-controlled @@ -353,7 +353,7 @@ RileHu YuezhongTang ChenLi - XiaWang + XiaWang 397–403 Y08-1041 hu-etal-2008-statistical @@ -376,9 +376,9 @@ What is Needed the Most in <fixed-case>MT</fixed-case>-Supported Paper Writing - Chang HyunKim + Chang HyunKim Oh-WoogKwon - Young KilKim + Young KilKim 418–427 Y08-1044 kim-etal-2008-needed @@ -420,7 +420,7 @@ Ki-YoungLee Sung-KwonChoi Oh-WoogKwon - Young-GilKim + Young-GilKim 460–466 Y08-1049 roh-etal-2008-recognizing diff --git a/data/xml/Y09.xml b/data/xml/Y09.xml index 23942388a9..2fa1bbb582 100644 --- a/data/xml/Y09.xml +++ b/data/xml/Y09.xml @@ -16,7 +16,7 @@ A Step toward Compositional Semantics: <fixed-case>E</fixed-case>-<fixed-case>H</fixed-case>ow<fixed-case>N</fixed-case>et a Lexical Semantic Representation System - Keh-JiannChen + Keh-JiannChen Shu-LingHuang 1–8 Y09-1001 @@ -38,14 +38,14 @@ Resultatives as Causal Relations between Events - Ik-HwanLee + Ik-HwanLee 29–39 Y09-1004 lee-2009-resultatives Developing Speech Recognition and Synthesis Technologies to Support Computer-Aided Pronunciation Training for <fixed-case>C</fixed-case>hinese Learners of <fixed-case>E</fixed-case>nglish - HelenMeng + HelenMeng 40–42 Y09-1005 meng-2009-developing @@ -70,7 +70,7 @@ Dependency Grammar Based <fixed-case>E</fixed-case>nglish Subject-Verb Agreement Evaluation - DongfengCai + DongfengCai YonghuaHu XueleiMiao YanSong @@ -80,9 +80,9 @@ <fixed-case>W</fixed-case>iki<fixed-case>S</fixed-case>ense: Supersense Tagging of <fixed-case>W</fixed-case>ikipedia Named Entities Based <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et - JosephChang - Richard Tzong-HanTsai - Jason S.Chang + JosephChang + Richard Tzong-HanTsai + Jason S.Chang 72–81 Y09-1009 chang-etal-2009-wikisense @@ -90,7 +90,7 @@ An Integrated Approach to Heterogeneous Data for Information Extraction YingChen - Sophia Y. M.Lee + Sophia Y. M.Lee Chu-RenHuang 82–91 Y09-1010 @@ -99,7 +99,7 @@ Are Emotions Enumerable or Decomposable? And its Implications for Emotion Processing YingChen - Sophia Y. M.Lee + Sophia Y. M.Lee Chu-RenHuang 92–100 Y09-1011 @@ -116,7 +116,7 @@ Coupling an Annotated Corpus and a Morphosyntactic Lexicon for State-of-the-Art <fixed-case>POS</fixed-case> Tagging with Less Human Effort PascalDenis - BenoîtSagot + BenoîtSagot 110–119 Y09-1013 denis-sagot-2009-coupling @@ -125,14 +125,14 @@ Voted Approach for Part of Speech Tagging in <fixed-case>B</fixed-case>engali AsifEkbal Md.Hasanuzzaman - SivajiBandyopadhyay + SivajiBandyopadhyay 120–129 Y09-1014 ekbal-etal-2009-voted Adjective Density as a Text Formality Characteristic for Automatic Text Classification: A Study Based on the <fixed-case>B</fixed-case>ritish <fixed-case>N</fixed-case>ational <fixed-case>C</fixed-case>orpus - Alex ChengyuFang + Alex ChengyuFang JingCao 130–139 Y09-1015 @@ -142,7 +142,7 @@ Correcting Errors Using the Framework of Argumentation: Towards Generating Argumentative Correction Propositions from Error Annotation Schemas MarieGarnier ArnaudRykner - PatrickSaint-Dizier + PatrickSaint-Dizier 140–149 Y09-1016 garnier-etal-2009-correcting @@ -159,7 +159,7 @@ <fixed-case>L</fixed-case>ogistic<fixed-case>LDA</fixed-case>: Regularizing <fixed-case>L</fixed-case>atent <fixed-case>D</fixed-case>irichlet <fixed-case>A</fixed-case>llocation by Logistic Regression Jia-ChengGuo - Bao-LiangLu + Bao-LiangLu ZhiweiLi LeiZhang 160–169 @@ -169,8 +169,8 @@ Dependency Relations as Source Context in Phrase-Based <fixed-case>SMT</fixed-case> RejwanulHaque - Sudip KumarNaskar - Antalvan den Bosch + Sudip KumarNaskar + Antalvan den Bosch AndyWay 170–179 Y09-1019 @@ -190,7 +190,7 @@ Query-Focused Multi-Document Summarization Using Co-Training Based Semi-Supervised Learning PoHu - DonghongJi + DonghongJi HaiWang ChongTeng 190–199 @@ -199,10 +199,10 @@ Review Classification Using Semantic Features and Run-Time Weighting - Chung-chiHuang + Chung-chiHuang Meng-chiechLee Zhe-nanLin - Jason S.Chang + Jason S.Chang 200–209 Y09-1022 huang-etal-2009-review @@ -226,7 +226,7 @@ Layer-Based Dependency Parsing PingJian - ChengqingZong + ChengqingZong 230–239 Y09-1025 jian-zong-2009-layer @@ -246,7 +246,7 @@ An Experimental Syntactic Study of Binding: A Case Study of <fixed-case>K</fixed-case>orean Long-Distance Anaphor caki Ji-HyeKim - James H.Yoon + James H.Yoon 250–259 Y09-1027 kim-yoon-2009-experimental @@ -254,7 +254,7 @@ Method of Extracting Is-A and Part-Of Relations Using Pattern Pairs in Mass Corpus Se-JongKim - Yong-HunLee + Yong-HunLee Jong-HyeokLee 260–268 Y09-1028 @@ -286,7 +286,7 @@ Cause Event Representations for Happiness and Surprise - Sophia Yat MeiLee + Sophia Yat MeiLee YingChen Chu-RenHuang 297–306 @@ -313,7 +313,7 @@ Approach to Selecting Best Development Set for Phrase-Based Statistical Machine Translation PengLiu YuZhou - ChengqingZong + ChengqingZong 325–334 Y09-1035 liu-etal-2009-approach @@ -321,7 +321,7 @@ Using Extra-Linguistic Material for <fixed-case>M</fixed-case>andarin-<fixed-case>F</fixed-case>rench Verbal Constructions Comparison PierreMagistry - LaurentPrévot + LaurentPrévot HintatCheung Chien-yunShiao YannDesalle @@ -332,9 +332,9 @@ Improving Unsegmented Dialogue Turns Annotation with N-gram Transducers - Carlos-D.Martínez-Hinarejos + Carlos-D.Martínez-Hinarejos VicentTamarit - José-MiguelBenedí + José-MiguelBenedí 345–354 Y09-1037 martinez-hinarejos-etal-2009-improving @@ -342,7 +342,7 @@ Using Tree Kernels for Classifying Temporal Relations between Events Seyed AbolghasemMirroshandel - GholamrezaGhassem-Sani + GholamrezaGhassem-Sani MahdyKhayyamian 355–364 Y09-1038 @@ -359,9 +359,9 @@ Extending Bilingual <fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et via Hierarchical Word Translation Classification Tzu-yiNien TsunKu - Chung-chiHuang - Mei-huaChen - Jason S.Chang + Chung-chiHuang + Mei-huaChen + Jason S.Chang 375–384 Y09-1040 nien-etal-2009-extending @@ -376,14 +376,14 @@ Note on <fixed-case>J</fixed-case>apanese Epistemic Verb Constructions: A Surface-Compositional Analysis AkiraOhtani - MarkSteedman + MarkSteedman 395–404 Y09-1042 ohtani-steedman-2009-note On the So-Called Thematic Use of Wa: Reconsideration and Reconciliation - David Y.Oshima + David Y.Oshima 405–414 Y09-1043 oshima-2009-called @@ -462,7 +462,7 @@ Finding Answers to Definition Questions Using Web Knowledge Bases HanRen - DonghongJi + DonghongJi JingWan ChongTeng 484–492 @@ -473,7 +473,7 @@ Incorporating Statistical Information of Lexical Dependency into a Rule-Based Parser Yoon-HyungRoh Ki-YoungLee - Young-GilKim + Young-GilKim 493–500 Y09-2007 roh-etal-2009-incorporating @@ -484,7 +484,7 @@ Bianca PamelaAlcera Ed OswaldGo Czarina MegGonzales - Nathalie RoseLim + Nathalie RoseLim 501–510 Y09-2008 samson-etal-2009-automated @@ -507,7 +507,7 @@ <fixed-case>C</fixed-case>hinese Function Tag Labeling - WeiweiSun + WeiweiSun ZhifangSui 530–539 Y09-2011 @@ -530,7 +530,7 @@ Passage Retrieval Using Answer Type Profiles in Question Answering - Surya GaneshVeeravalli + Surya GaneshVeeravalli VasudevaVarma 559–568 Y09-2014 @@ -548,7 +548,7 @@ A Framework for Effectively Integrating Hard and Soft Syntactic Rules into Phrase Based Translation JiajunZhang - ChengqingZong + ChengqingZong 579–588 Y09-2016 zhang-zong-2009-framework @@ -556,7 +556,7 @@ A Bootstrapping Method for Finer-Grained Opinion Mining Using Graph Model ShuZhang - YingjuXia + YingjuXia YaoMeng HaoYu 589–595 @@ -575,22 +575,22 @@ Summarizing Opinions in Blog Threads - AlexandraBalahur - MijailKabadjov + AlexandraBalahur + MijailKabadjov JosefSteinberger RalfSteinberger - AndrésMontoyo + AndrésMontoyo 606–613 Y09-2019 balahur-etal-2009-summarizing-opinions Constraint Based Hybrid Approach to Parsing <fixed-case>I</fixed-case>ndian Languages - AksharBharati + AksharBharati SamarHusain MeherVijay KalyanDeepak - Dipti MisraSharma + Dipti MisraSharma RajeevSangal 614–621 Y09-2020 @@ -600,7 +600,7 @@ Interpolated <fixed-case>PLSI</fixed-case> for Learning Plausible Verb Arguments HiramCalvo KentaroInui - YujiMatsumoto + YujiMatsumoto 622–629 Y09-2021 calvo-etal-2009-interpolated @@ -615,8 +615,8 @@ <fixed-case>V</fixed-case>ocab<fixed-case>A</fixed-case>nalyzer: A Referred Word List Analyzing Tool with Keyword, Concordancing and N-gram Functions - Siaw-FongChung - F.Y. AugustChao + Siaw-FongChung + F.Y. AugustChao Yi-ChenHsieh 638–645 Y09-2023 @@ -624,8 +624,8 @@ Building Online Corpora of <fixed-case>P</fixed-case>hilippine Languages - Shirley N.Dita - Rachel Edita O.Roxas + Shirley N.Dita + Rachel Edita O.Roxas PaulInventado 646–653 Y09-2024 @@ -641,9 +641,9 @@ <fixed-case>L</fixed-case>atin Etymologies as Features on <fixed-case>BNC</fixed-case> Text Categorization - Alex ChengyuFang + Alex ChengyuFang WanyinLi - NancyIde + NancyIde 662–669 Y09-2026 fang-etal-2009-latin @@ -651,8 +651,8 @@ Experiments on Domain Adaptation for <fixed-case>E</fixed-case>nglish–<fixed-case>H</fixed-case>indi <fixed-case>SMT</fixed-case> RejwanulHaque - Sudip KumarNaskar - Josefvan Genabith + Sudip KumarNaskar + Josefvan Genabith AndyWay 670–677 Y09-2027 @@ -668,7 +668,7 @@ Bridging the Gap between Graph Modeling and Developmental Psycholinguistics: An Experiment on Measuring Lexical Proximity in <fixed-case>C</fixed-case>hinese Semantic Space - Shu-KaiHsieh + Shu-KaiHsieh Chun-HanChang IvyKuo HintatCheung @@ -683,7 +683,7 @@ YunJin QingLi YingshunWu - Young-GilKim + Young-GilKim 694–701 Y09-2030 jin-etal-2009-effective @@ -701,7 +701,7 @@ Syntactic Category Prediction for Improving Translation Quality in <fixed-case>E</fixed-case>nglish-<fixed-case>K</fixed-case>orean Machine Translation - Sung-DongKim + Sung-DongKim 710–717 Y09-2032 kim-2009-syntactic @@ -712,7 +712,7 @@ Sung-KwonChoi Ki-YoungLee Yoon-HyungRoh - Young-GilKim + Young-GilKim 718–725 Y09-2033 kwon-etal-2009-customizing @@ -728,9 +728,9 @@ Extracting Keyphrases from <fixed-case>C</fixed-case>hinese News Articles Using <fixed-case>T</fixed-case>ext<fixed-case>R</fixed-case>ank and Query Log Knowledge WeimingLiang - Chang-NingHuang + Chang-NingHuang MuLi - Bao-LiangLu + Bao-LiangLu 733–740 Y09-2035 liang-etal-2009-extracting @@ -739,7 +739,7 @@ Modeling the Relationship among Linguistic Typological Features with Hierarchical <fixed-case>D</fixed-case>irichlet Process Chu-ChengLin Yu-ChunWang - Richard Tzong-HanTsai + Richard Tzong-HanTsai 741–747 Y09-2036 lin-etal-2009-modeling @@ -748,7 +748,7 @@ Document Re-ranking via <fixed-case>W</fixed-case>ikipedia Articles for Definition/Biography Type Questions MaofuLiu FangFang - DonghongJi + DonghongJi 748–754 Y09-2037 liu-etal-2009-document @@ -756,7 +756,7 @@ Towards Bilingual Term Extraction in Comparable Patents BinLu - Benjamin K.Tsou + Benjamin K.Tsou 755–762 Y09-2038 lu-tsou-2009-towards @@ -764,7 +764,7 @@ Factors Affecting Part-of-Speech Tagging for <fixed-case>T</fixed-case>agalog ErlynManguilimotan - YujiMatsumoto + YujiMatsumoto 763–770 Y09-2039 manguilimotan-matsumoto-2009-factors @@ -774,7 +774,7 @@ KentaOouchida Jin-DongKim ToshihisaTakagi - Jun’ichiTsujii + Jun’ichiTsujii 771–778 Y09-2040 oouchida-etal-2009-guidelink @@ -817,10 +817,10 @@ Named Entity Recognition for <fixed-case>M</fixed-case>anipuri Using Support Vector Machine - Thoudam DorenSingh + Thoudam DorenSingh KishorjitNongmeikapam AsifEkbal - SivajiBandyopadhyay + SivajiBandyopadhyay 811–818 Y09-2045 singh-etal-2009-named @@ -837,7 +837,7 @@ Which is More Suitable for <fixed-case>C</fixed-case>hinese Word Segmentation, the Generative Model or the Discriminative One? KunWang - ChengqingZong + ChengqingZong Keh-YihSu 827–834 Y09-2047 @@ -846,11 +846,11 @@ Design of <fixed-case>C</fixed-case>hinese <fixed-case>HPSG</fixed-case> Framework for Data-Driven Parsing XiangliWang - ShunyaIwasawa + ShunyaIwasawa YusukeMiyao TakuyaMatsuzaki KunYu - Jun’ichiTsujii + Jun’ichiTsujii 835–842 Y09-2048 wang-etal-2009-design @@ -858,7 +858,7 @@ Rule-based <fixed-case>K</fixed-case>orean Grapheme to Phoneme Conversion Using Sound Patterns Yu-ChunWang - Richard Tzong-HanTsai + Richard Tzong-HanTsai 843–850 Y09-2049 wang-tsai-2009-rule @@ -889,7 +889,7 @@ Towards Establishing a Hierarchy in the <fixed-case>J</fixed-case>apanese Sentence Structure - KeiYoshimoto + KeiYoshimoto ChidoriNakamura AlastairButler 875–882 diff --git a/data/xml/Y10.xml b/data/xml/Y10.xml index 4c4992e8db..20c49b060b 100644 --- a/data/xml/Y10.xml +++ b/data/xml/Y10.xml @@ -6,7 +6,7 @@ RyoOtoguro KiyoshiIshikawa HiroshiUmemoto - KeiYoshimoto + KeiYoshimoto YasunariHarada Institute of Digital Enhancement of Cognitive Processing, Waseda University
Tohoku University, Sendai, Japan
@@ -70,7 +70,7 @@
A Morphosyntactic Analysis of the Pronominal System of <fixed-case>P</fixed-case>hilippine Languages - Shirley N.Dita + Shirley N.Dita 45–59 Y10-1008 dita-2010-morphosyntactic @@ -101,7 +101,7 @@ Enhanced Genre Classification through Linguistically Fine-Grained <fixed-case>POS</fixed-case> Tags - Alex ChengyuFang + Alex ChengyuFang JingCao 85–94 Y10-1012 @@ -110,7 +110,7 @@ Identifying Emotional Expressions, Intensities and Sentence Level Emotion Tags Using a Supervised Framework DipankarDas - SivajiBandyopadhyay + SivajiBandyopadhyay 95–104 Y10-1013 das-bandyopadhyay-2010-identifying @@ -190,7 +190,7 @@ Evidentials and Epistemic Modal in <fixed-case>K</fixed-case>orean: Evidence from Their Intractions - ChungminLee + ChungminLee 193–202 Y10-1023 lee-2010-evidentials @@ -199,7 +199,7 @@ Implementation of <fixed-case>K</fixed-case>orean Syllable Structures in the Typed Feature Structure Formalism Gyu-hyungLee Ye-seulPark - Yong-hunLee + Yong-hunLee 203–212 Y10-1024 lee-etal-2010-implementation @@ -215,8 +215,8 @@ e<fixed-case>S</fixed-case>patial<fixed-case>ML</fixed-case>: An Event-Driven Spatial Annotation Framework KiyongLee - JonathanWebster - Alex ChengyuFang + JonathanWebster + Alex ChengyuFang 223–232 Y10-1026 lee-etal-2010-espatialml @@ -233,8 +233,8 @@ Developing an Online <fixed-case>I</fixed-case>ndonesian Corpora Repository - RuliManurung - BayuDistiawan + RuliManurung + BayuDistiawan Desmond DarmaPutra 243–249 Y10-1028 @@ -250,8 +250,8 @@ Unsupervised Classification of Biomedical Abstracts Using Lexical Association JonathonRead - JonathanWebster - Alex ChengyuFang + JonathanWebster + Alex ChengyuFang 261–270 Y10-1030 read-etal-2010-unsupervised @@ -303,14 +303,14 @@ HaoYu YaoMeng YingliangLu - YingjuXia + YingjuXia 321–330 Y10-1036 yang-etal-2010-fault Syntactically Complex Demonstratives and Sortal Inherency - RichardZuber + RichardZuber 331–338 Y10-1037 zuber-2010-syntactically @@ -332,20 +332,20 @@ <fixed-case>GRASP</fixed-case>: Grammar- and Syntax-based Pattern-Finder for Collocation and Phrase Learning - Mei-huaChen - Chung-chiHuang - Shih-tingHuang - Jason S.Chang + Mei-huaChen + Chung-chiHuang + Shih-tingHuang + Jason S.Chang 357–364 Y10-1040 chen-etal-2010-grasp Mitigating Problems in Analogy-based <fixed-case>EBMT</fixed-case> with <fixed-case>SMT</fixed-case> and vice versa: A Case Study with Named Entity Transliteration - SandipanDandapat + SandipanDandapat SaraMorrissey - Sudip KumarNaskar - HaroldSomers + Sudip KumarNaskar + HaroldSomers 365–372 Y10-1041 dandapat-etal-2010-mitigating @@ -373,8 +373,8 @@ Using Corpus-based Linguistic Approaches in Sense Prediction Study - Jia-FeiHong - Sue-JinKer + Jia-FeiHong + Sue-JinKer Chu-RenHuang KathleenAhrens 399–407 @@ -390,8 +390,8 @@ The Specialized Vocabulary of Modern Patent Language: Semantic Associations in Patent Lexis - Darren Hsin-hungLin - Shelley Ching-yuHsieh + Darren Hsin-hungLin + Shelley Ching-yuHsieh 417–424 Y10-1047 lin-hsieh-2010-specialized @@ -408,7 +408,7 @@ An Approach toward Register Classification of Book Samples in the <fixed-case>B</fixed-case>alanced <fixed-case>C</fixed-case>orpus of <fixed-case>C</fixed-case>ontemporary <fixed-case>W</fixed-case>ritten <fixed-case>J</fixed-case>apanese WakakoKashino - ManabuOkumura + ManabuOkumura 433–438 Y10-1049 kashino-okumura-2010-approach @@ -424,9 +424,9 @@ A Supervised Machine Learning Approach for Event-Event Relation Identification - Anup KumarKolya + Anup KumarKolya AsifEkbal - SivajiBandyopadhyay + SivajiBandyopadhyay 447–454 Y10-1051 kolya-etal-2010-supervised @@ -458,7 +458,7 @@ YusukeMiyao AlastairButler KeiYoshimoto - Jun’ichiTsujii + Jun’ichiTsujii 481–488 Y10-1055 miyao-etal-2010-modular @@ -480,7 +480,7 @@ A Multi-Dimensional Analysis of <fixed-case>J</fixed-case>apanese Benefactives: The Case of the Yaru-Construction AkiraOtani - MarkSteedman + MarkSteedman 503–510 Y10-1058 otani-steedman-2010-multi @@ -509,7 +509,7 @@ Incorporate Credibility into Context for the Best Social Media Answers QiSu - Helen Kai-yunChen + Helen Kai-yunChen Chu-RenHuang 535–541 Y10-1062 @@ -518,7 +518,7 @@ A Query Focused Multi Document Automatic Summarization PinakiBhaskar - SivajiBandyopadhyay + SivajiBandyopadhyay 545–554 Y10-1063 bhaskar-bandyopadhyay-2010-query @@ -527,7 +527,7 @@ Through Low-Cost Annotation to Reliable Parsing Evaluation MarekGrác MilošJakubíček - VojtěchKovář + VojtěchKovář 555–562 Y10-1064 grac-etal-2010-low @@ -550,7 +550,7 @@ How Well Conditional Random Fields Can be Used in Novel Term Recognition XingZhang YanSong - Alex ChengyuFang + Alex ChengyuFang 583–592 Y10-1067 zhang-etal-2010-well @@ -569,8 +569,8 @@ Evan Liz CantojaBuhay Marie Joy PadillaEvardone Hansel BaguioNocon - Davis MuhajereenDimalen - Rachel EditaRoxas + Davis MuhajereenDimalen + Rachel EditaRoxas 603–611 Y10-1069 buhay-etal-2010-autolex @@ -585,7 +585,7 @@ Finding Emotion Holder from <fixed-case>B</fixed-case>engali Blog <fixed-case>T</fixed-case>exts—<fixed-case>A</fixed-case>n Unsupervised Syntactic Approach DipankarDas - SivajiBandyopadhyay + SivajiBandyopadhyay 621–628 Y10-1071 das-bandyopadhyay-2010-finding @@ -620,7 +620,7 @@ Using Various Features in Machine Learning to Obtain High Levels of Performance for Recognition of <fixed-case>J</fixed-case>apanese Notational Variants MasahiroKojima MasakiMurata - Jun’ichiKazama + Jun’ichiKazama KowKuroda AtsushiFujita EijiAramaki @@ -648,7 +648,7 @@ The Acquisition of Imperfective Aspect Marking in <fixed-case>K</fixed-case>orean as a Second Language by <fixed-case>J</fixed-case>apanese Learners - Ju-yeonRyu + Ju-yeonRyu KaoruHorie YasuhiroShirai 677–684 @@ -694,14 +694,14 @@ Workshop on Advanced Corpus Solutions - Janne BondiJohannessen + Janne BondiJohannessen 717–719 Y10-1083 johannessen-2010-workshop Degrees of Orality in Speech-like Corpora: Comparative Annotation of Chat and <fixed-case>E</fixed-case>-mail Corpora - EckhardBick + EckhardBick 721–729 Y10-1084 bick-2010-degrees @@ -717,16 +717,16 @@ Fast Syntactic Searching in Very Large Corpora for Many Languages MilošJakubíček AdamKilgarriff - DianaMcCarthy - PavelRychlý + DianaMcCarthy + PavelRychlý 741–747 Y10-1086 jakubicek-etal-2010-fast A Multilingual Speech Resource: The <fixed-case>N</fixed-case>ordic Dialect Corpus - Janne BondiJohannessen - JoelPriestley + Janne BondiJohannessen + JoelPriestley AndersNøklestad 749–758 Y10-1087 @@ -743,7 +743,7 @@ Advanced Corpus Solutions for Humanities Researchers JamesWilson - AnthonyHartley + AnthonyHartley SergeSharoff PaulStephenson 769–778 @@ -774,7 +774,7 @@ Towards the Global <fixed-case>S</fixed-case>enti<fixed-case>W</fixed-case>ord<fixed-case>N</fixed-case>et AmitavaDas - SivajiBandyopadhyay + SivajiBandyopadhyay 799–808 Y10-1092 das-bandyopadhyay-2010-towards @@ -782,7 +782,7 @@ Towards an Automatic Measurement of Verbal Lexicon Acquisition: The Case for a Young Children-versus-Adults Classification in <fixed-case>F</fixed-case>rench and <fixed-case>M</fixed-case>andarin YannDesalle - Shu-KaiHsieh + Shu-KaiHsieh BrunoGaume HintatCheung 809–818 @@ -791,10 +791,10 @@ Graph Representation of Synonymy and Translation Resources for Crosslinguistic Modelisation of Meaning - BenoîtGaillard + BenoîtGaillard YannickChudy PierreMagistry - Shu-KaiHsieh + Shu-KaiHsieh EmmanuelNavarro 819–830 Y10-1094 @@ -809,7 +809,7 @@ Computational Modeling of Verb Acquisition, from a Monolingual to a Bilingual Study - LaurentPrévot + LaurentPrévot Chun-HanChang YannDesalle 841–851 @@ -833,14 +833,14 @@ Natural Language Production in Database Semantics - RolandHausser + RolandHausser 875–884 Y10-1099 hausser-2010-natural Change of Location and Change of State: How Telicity is Attained - ChungminLee + ChungminLee 885–894 Y10-1100 lee-2010-change @@ -854,7 +854,7 @@ A Note on Pseudo-comparatives like “John is rich like <fixed-case>X</fixed-case>!” and “Like <fixed-case>X</fixed-case>, John is rich!” - BenjaminTsou + BenjaminTsou 907–915 Y10-1102 tsou-2010-note diff --git a/data/xml/Y11.xml b/data/xml/Y11.xml index 330b152529..d53a028994 100644 --- a/data/xml/Y11.xml +++ b/data/xml/Y11.xml @@ -3,7 +3,7 @@ Proceedings of the 25th Pacific Asia Conference on Language, Information and Computation - Helena HongGao + Helena HongGao MinghuiDong Institute of Digital Enhancement of Cognitive Processing, Waseda University
Singapore
@@ -24,7 +24,7 @@
<fixed-case>E</fixed-case>nglish-<fixed-case>C</fixed-case>hinese Name Transliteration with Bi-Directional Syllable-Based Maximum Matching - Oi YeeKwong + Oi YeeKwong 11–19 Y11-1002 kwong-2011-english @@ -35,7 +35,7 @@ YangshengJi NingXi ShujianHuang - JiajunChen + JiajunChen 20–30 Y11-1003 zhao-etal-2011-language @@ -67,7 +67,7 @@ Measuring Concept Concreteness from the Lexicographic Perspective - Oi YeeKwong + Oi YeeKwong 60–69 Y11-1007 kwong-2011-measuring @@ -89,7 +89,7 @@ Automatic Wrapper Generation and Maintenance - YingjuXia + YingjuXia YuhangYang ShuZhang HaoYu @@ -99,10 +99,10 @@ Evaluation via Negativa of <fixed-case>C</fixed-case>hinese Word Segmentation for Information Retrieval - Mike Tian-JianJiang + Mike Tian-JianJiang Cheng-WeiShih - Richard Tzong-HanTsai - Wen-LianHsu + Richard Tzong-HanTsai + Wen-LianHsu 100–109 Y11-1011 jiang-etal-2011-evaluation @@ -118,7 +118,7 @@ A Graph-based Bilingual Corpus Selection Approach for <fixed-case>SMT</fixed-case> - WenhanChao + WenhanChao ZhoujunLi 120–129 Y11-1013 @@ -135,7 +135,7 @@ Context Resolution of Verb Particle Constructions for <fixed-case>E</fixed-case>nglish to <fixed-case>H</fixed-case>indi Translation - NiladriChatterjee + NiladriChatterjee RenuBalyan 140–149 Y11-1015 @@ -160,9 +160,9 @@ <fixed-case>T</fixed-case>ibetan Word Segmentation as Syllable Tagging Using Conditional Random Field - HuidanLiu - MinghuaNuo - LonglongMa + HuidanLiu + MinghuaNuo + LonglongMa JianWu YepingHe 168–177 @@ -178,7 +178,7 @@ The <fixed-case>L</fixed-case>1 Acquisition of the Imperfective Aspect markers in <fixed-case>K</fixed-case>orean: a Comparison with <fixed-case>J</fixed-case>apanese - Ju-YeonRyu + Ju-YeonRyu 186–195 Y11-1020 ryu-2011-l1 @@ -214,7 +214,7 @@ A Bare-bones Constraint Grammar - EckhardBick + EckhardBick 226–235 Y11-1024 bick-2011-bare @@ -223,7 +223,7 @@ Spring Cleaning and Grammar Compression: Two Techniques for Detection of Redundancy in <fixed-case>HPSG</fixed-case> Grammars AntskeFokkens YiZhang - Emily M.Bender + Emily M.Bender 236–244 Y11-1025 fokkens-etal-2011-spring @@ -256,7 +256,7 @@ Annotating the Structure and Semantics of Fables - Oi YeeKwong + Oi YeeKwong 275–282 Y11-1029 kwong-2011-annotating @@ -264,7 +264,7 @@ Verbal Inflection in <fixed-case>H</fixed-case>indi: A Distributed Morphology Approach SmritiSingh - Vaijayanthi M.Sarma + Vaijayanthi M.Sarma 283–292 Y11-1030 singh-sarma-2011-verbal @@ -272,7 +272,7 @@ Word classes in <fixed-case>I</fixed-case>ndonesian: A linguistic reality or a convenient fallacy in natural language processing? MeladelMistica - TimothyBaldwin + TimothyBaldwin I WayanArka 293–302 Y11-1031 @@ -280,7 +280,7 @@ Automated Proof Reading of Clinical Notes - JonPatrick + JonPatrick DungNguyen 303–312 Y11-1032 @@ -306,7 +306,7 @@ Unsupervised Word Sense Disambiguation Using Neighborhood Knowledge - HeyanHuang + HeyanHuang ZhizhuoYang PingJian 333–342 @@ -316,7 +316,7 @@ Dependency-based Analysis for <fixed-case>T</fixed-case>agalog Sentences ErlynManguilimotan - YujiMatsumoto + YujiMatsumoto 343–352 Y11-1036 manguilimotan-matsumoto-2011-dependency @@ -338,7 +338,7 @@ In Situ Text Summarisation for Museum Visitors - TimothyBaldwin + TimothyBaldwin PatrickYe FabianBohnert IngridZukerman @@ -350,7 +350,7 @@ Iteratively Estimating Pattern Reliability and Seed Quality With Extraction Consistency Yi-HsunLee Chung-YaoChuang - Wen-LianHsu + Wen-LianHsu 382–391 Y11-1040 lee-etal-2011-iteratively @@ -367,7 +367,7 @@ A Listwise Approach to Coreference Resolution in Multiple Languages Oanh ThiTran Bach XuanNgo - Minh LeNguyen + Minh LeNguyen AkiraShimazu 400–409 Y11-1042 @@ -376,7 +376,7 @@ Combining Dependency and Constituent-based Syntactic Information for Anaphoricity Determination in Coreference Resolution FangKong - GuodongZhou + GuodongZhou 410–419 Y11-1043 kong-zhou-2011-combining @@ -395,7 +395,7 @@ A Hybrid Extraction Model for <fixed-case>C</fixed-case>hinese Noun/Verb Synonymous bi-gram Collocations WanyinLi - QinLu + QinLu 430–439 Y11-1045 li-lu-2011-hybrid @@ -425,7 +425,7 @@ An <fixed-case>E</fixed-case>nglish-<fixed-case>C</fixed-case>hinese Cross-lingual Word Semantic Similarity Measure Exploring Attributes and Relations LinDai - HeyanHuang + HeyanHuang 467–476 Y11-1049 dai-huang-2011-english @@ -433,8 +433,8 @@ Learning-to-Translate Based on the <fixed-case>S</fixed-case>-<fixed-case>SSTC</fixed-case> Annotation Schema Enya KongTang - ZaharinYusoff - ChristianBoitet + ZaharinYusoff + ChristianBoitet 477–484 Y11-1050 tang-etal-2011-learning @@ -507,7 +507,7 @@ <fixed-case>NERSIL</fixed-case> - the Named-Entity Recognition System for <fixed-case>I</fixed-case>ban Language Yong SooFong - Bali RanaivoMalanҫon + Bali RanaivoMalanҫon Alvin YeoWee 549–558 Y11-1059 @@ -517,7 +517,7 @@ Improving <fixed-case>PP</fixed-case> Attachment Disambiguation in a Rule-based Parser Yoon-HyungRoh Ki-YoungLee - Young-GilKim + Young-GilKim 559–566 Y11-1060 roh-etal-2011-improving @@ -544,7 +544,7 @@ TakafumiSuzuki KiyokoUchiyama RyotaTomisaka - AkikoAizawa + AkikoAizawa 587–596 Y11-1063 suzuki-etal-2011-analyzing @@ -563,7 +563,7 @@ A Construction Grammar Approach to Prepositional Phrase Attachment: Semantic Feature Analysis of <fixed-case>V</fixed-case> <fixed-case>NP</fixed-case>1 into <fixed-case>NP</fixed-case>2 Construction LiyinChen - Siaw-FongChung + Siaw-FongChung Chao-LinLiu 607–614 Y11-1065 diff --git a/data/xml/Y12.xml b/data/xml/Y12.xml index 2801bad424..17c1149517 100644 --- a/data/xml/Y12.xml +++ b/data/xml/Y12.xml @@ -4,7 +4,7 @@ Proceedings of the 26th Pacific Asia Conference on Language, Information, and Computation Y12-1 - RuliManurung + RuliManurung FrancisBond Faculty of Computer Science, Universitas Indonesia
Bali, Indonesia
@@ -32,28 +32,28 @@
Idiomaticity and Classical Traditions in Some <fixed-case>E</fixed-case>ast <fixed-case>A</fixed-case>sian Languages - Benjamin KTsou + Benjamin KTsou 39–55 Y12-1003 tsou-2012-idiomaticity Things between Lexicon and Grammar - YujiMatsumoto + YujiMatsumoto 56–57 Y12-1004 matsumoto-2012-things Social Media: Friend or Foe of Natural Language Processing? - TimothyBaldwin + TimothyBaldwin 58–59 Y12-1005 baldwin-2012-social Towards a Semantic Annotation of <fixed-case>E</fixed-case>nglish Television News - Building and Evaluating a Constraint Grammar <fixed-case>F</fixed-case>rame<fixed-case>N</fixed-case>et - EckhardBick + EckhardBick 60–69 Y12-1006 bick-2012-towards @@ -70,7 +70,7 @@ Automatic Domain Adaptation for Word Sense Disambiguation Based on Comparison of Multiple Classifiers KanakoKomiya - ManabuOkumura + ManabuOkumura 80–88 Y12-1008 komiya-okumura-2012-automatic @@ -126,15 +126,15 @@ <fixed-case>I</fixed-case>ndonesian Dependency Treebank: Annotation and Parsing NathanGreen - Septina DianLarasati - ZdenekZabokrtsky + Septina DianLarasati + ZdenekZabokrtsky 137–145 Y12-1014 green-etal-2012-indonesian Handling <fixed-case>I</fixed-case>ndonesian Clitics: A Dataset Comparison for an <fixed-case>I</fixed-case>ndonesian-<fixed-case>E</fixed-case>nglish Statistical Machine Translation System - Septina DianLarasati + Septina DianLarasati 146–152 Y12-1015 larasati-2012-handling @@ -182,14 +182,14 @@ Extracting Keywords from Multi-party Live Chats Su NamKim - TimothyBaldwin + TimothyBaldwin 199–208 Y12-1021 kim-baldwin-2012-extracting Extracting Networks of People and Places from Literary Texts - JohnLee + JohnLee Chak YanYeung 209–218 Y12-1022 @@ -205,7 +205,7 @@ Pattern Matching Refinements to Dictionary-Based Code-Switching Point Detection NathanielOco - Rachel EditaRoxas + Rachel EditaRoxas 229–236 Y12-1024 oco-roxas-2012-pattern @@ -257,7 +257,7 @@ Emotional Tendency Identification for Micro-blog Topics Based on Multiple Characteristics QuanchaoLiu ChongFeng - HeyanHuang + HeyanHuang 280–288 Y12-1030 liu-etal-2012-emotional @@ -265,7 +265,7 @@ Product Name Classification for Product Instance Distinction Hye-JinMin - Jong C.Park + Jong C.Park 289–298 Y12-1031 min-park-2012-product @@ -290,14 +290,14 @@ Applying Statistical Post-Editing to <fixed-case>E</fixed-case>nglish-to-<fixed-case>K</fixed-case>orean Rule-based Machine Translation System Ki-YoungLee - Young-GilKim + Young-GilKim 318–324 Y12-1034 lee-kim-2012-applying A Model of <fixed-case>V</fixed-case>ietnamese Person Named Entity Question Answering System - Mai-VuTran + Mai-VuTran Duc-TrongLe Xuan TuTran Tien-TungNguyen @@ -309,7 +309,7 @@ Towards a Semantic Annotation of <fixed-case>E</fixed-case>nglish Television News - Building and Evaluating a Constraint Grammar <fixed-case>F</fixed-case>rame<fixed-case>N</fixed-case>et ShaohuaYang HaiZhao - Bao-liangLu + Bao-liangLu 333–342 Y12-1036 yang-etal-2012-towards @@ -333,7 +333,7 @@ Introduction of a Probabilistic Language Model to Non-Factoid Question Answering Using Example <fixed-case>Q</fixed-case>&<fixed-case>A</fixed-case> Pairs - KyosukeYoshida + KyosukeYoshida TaroUeda MadokaIshioroshi HideyukiShibuki @@ -362,7 +362,7 @@ Anaphora Annotation in <fixed-case>H</fixed-case>indi Dependency <fixed-case>T</fixed-case>ree<fixed-case>B</fixed-case>ank PraveenDakwale HimanshuSharma - Dipti MSharma + Dipti MSharma 391–400 Y12-1042 dakwale-etal-2012-anaphora @@ -370,8 +370,8 @@ Improving Statistical Machine Translation with Processing Shallow Parsing Hoai-ThuVuong - Vinh VanNguyen - Viet HongTran + Vinh VanNguyen + Viet HongTran AkiraShimazu 401–407 Y12-1043 @@ -379,7 +379,7 @@ Psycholinguistics, Lexicography, and Word Sense Disambiguation - Oi YeeKwong + Oi YeeKwong 408–417 Y12-1044 kwong-2012-psycholinguistics @@ -396,7 +396,7 @@ The Headedness of <fixed-case>M</fixed-case>andarin <fixed-case>C</fixed-case>hinese Serial Verb Constructions: A Corpus-Based Study JingxiaLin Chu-RenHuang - HuaruiZhang + HuaruiZhang HongzhiXu 428–435 Y12-1046 @@ -435,7 +435,7 @@ Classifying Dialogue Acts in Multi-party Live Chats Su NamKim LawrenceCavedon - TimothyBaldwin + TimothyBaldwin 463–472 Y12-1050 kim-etal-2012-classifying-dialogue @@ -451,14 +451,14 @@ Deep Lexical Acquisition of Type Properties in Low-resource Languages: A Case Study in <fixed-case>W</fixed-case>ambaya JeremyNicholson RachelNordlinger - TimothyBaldwin + TimothyBaldwin 481–490 Y12-1052 nicholson-etal-2012-deep <fixed-case>C</fixed-case>hinese Sentiments on the Clouds: A Preliminary Experiment on Corpus Processing and Exploration on Cloud Service - Shu-KaiHsieh + Shu-KaiHsieh Yu-YunChang Meng-XianShih 491–497 @@ -491,9 +491,9 @@ Analysis of Social and Expressive Factors of Requests by Methods of Text Mining - DašaMunková + DašaMunková MichalMunk - ZuzanaFráterová + ZuzanaFráterová BeátaĎuračková 515–524 Y12-1056 @@ -502,7 +502,7 @@ Set Expansion using Sibling Relations between Semantic Categories ShoTakase - NaoakiOkazaki + NaoakiOkazaki KentaroInui 525–534 Y12-1057 @@ -519,7 +519,7 @@ Text Readability Classification of Textbooks of a Low-Resource Language - ZahurulIslam + ZahurulIslam AlexanderMehler RashedurRahman 545–553 @@ -538,7 +538,7 @@ Improved Constituent Context Model with Features YunHuang MinZhang - Chew LimTan + Chew LimTan 564–573 Y12-1061 huang-etal-2012-improved-constituent @@ -591,7 +591,7 @@ Psych-Predicates: How They Are Different - ChungminLee + ChungminLee 626–631 Y12-1068 lee-2012-psych @@ -607,7 +607,7 @@ Gap in “Gapless” Relative Clauses in <fixed-case>K</fixed-case>orean and Other <fixed-case>A</fixed-case>sian Languages Jeong-ShikLee - ChungminLee + ChungminLee 640–645 Y12-1070 lee-lee-2012-gap diff --git a/data/xml/Y13.xml b/data/xml/Y13.xml index 69a4a8bd6e..9eb0e69341 100644 --- a/data/xml/Y13.xml +++ b/data/xml/Y13.xml @@ -62,9 +62,9 @@ A Quantitative Comparative Study of Prosodic and Discourse Units, the Case of <fixed-case>F</fixed-case>rench and <fixed-case>T</fixed-case>aiwan <fixed-case>M</fixed-case>andarin - LaurentPrévot + LaurentPrévot Shu-ChuanTseng - Alvin Cheng-HsienChen + Alvin Cheng-HsienChen KlimPeshkov 92–101 Y13-1007 @@ -73,7 +73,7 @@ Corpus-Based Research on Tense Analysis and Rhetorical Structure in Journal Article Abstracts Pin-ningTu - Shih-PingWang + Shih-PingWang 102–107 Y13-1008 tu-wang-2013-corpus @@ -90,7 +90,7 @@ A Study of the Effectiveness of Suffixes for <fixed-case>C</fixed-case>hinese Word Segmentation XiaoqingLi - ChengqingZong + ChengqingZong Keh-YihSu 118–125 Y13-1010 @@ -107,14 +107,14 @@ Difficulties in Perception and Pronunciation of <fixed-case>M</fixed-case>andarin <fixed-case>C</fixed-case>hinese Disyllabic Word Tone Acquisition: A Study of Some <fixed-case>J</fixed-case>apanese <fixed-case>U</fixed-case>niversity Students YutingDong YasushiTsubota - MasatakeDantsuji + MasatakeDantsuji 143–152 Y13-1012 dong-etal-2013-difficulties Exploring the <fixed-case>C</fixed-case>hinese Mental Lexicon with Word Association Norms - Oi YeeKwong + Oi YeeKwong 153–162 Y13-1013 kwong-2013-exploring @@ -123,7 +123,7 @@ Towards Automatic Error Type Classification of <fixed-case>J</fixed-case>apanese Language Learners’ Writings HiromiOyama MamoruKomachi - YujiMatsumoto + YujiMatsumoto 163–172 Y13-1014 oyama-etal-2013-towards @@ -137,7 +137,7 @@ Clausal-Packaging of Path of Motion in Second Language Acquisition of <fixed-case>R</fixed-case>ussian and <fixed-case>S</fixed-case>panish - KawaiChui + KawaiChui Hsiang-linYeh Wen-ChunLan Yu-HanCheng @@ -166,7 +166,7 @@ SachiYasuda HikariKonishi MizuhoImada - KikuoMaekawa + KikuoMaekawa 206–214 Y13-1019 asahara-etal-2013-bccwj @@ -174,15 +174,15 @@ A Corpus-based Approach to Linguistic Function HengbinYan - JonathanWebster + JonathanWebster 215–221 Y13-1020 yan-webster-2013-corpus A Case Study of a Free Word Order - VladislavKuboň - MarkétaLopatková + VladislavKuboň + MarkétaLopatková JiříMírovský 222–231 Y13-1021 @@ -197,7 +197,7 @@ <fixed-case>C</fixed-case>hin<fixed-case>G</fixed-case>ram: A <fixed-case>TRALE</fixed-case> Implementation of an <fixed-case>HPSG</fixed-case> Fragment for <fixed-case>M</fixed-case>andarin <fixed-case>C</fixed-case>hinese - StefanMüller + StefanMüller JannaLipenkova 240–249 Y13-1023 @@ -215,7 +215,7 @@ Transliteration Extraction from Classical <fixed-case>C</fixed-case>hinese Buddhist Literature Using Conditional Random Fields Yu-ChunWang - Richard Tzong-HanTsai + Richard Tzong-HanTsai 260–266 Y13-1025 wang-tsai-2013-transliteration @@ -256,7 +256,7 @@ An Application of Comparative Corpora of Interactional Data – Toward the Sound Profiles of Sites of Initiation in <fixed-case>F</fixed-case>rench and <fixed-case>M</fixed-case>andarin Recycling Repair - Helen Kai-yunChen + Helen Kai-yunChen 302–311 Y13-1030 chen-2013-application @@ -264,14 +264,14 @@ Of-Constructions in the Predicate of Demonstrate and Show in Academic Discourse LiyinChen - Siaw-FongChung + Siaw-FongChung 312–321 Y13-1031 chen-chung-2013-constructions Spatial Particles in <fixed-case>E</fixed-case>nglish: A Quantitative Corpus-Based Approach to the Conceptualization of Symmetry in Bodily Orientation - Alvin Cheng-HsienChen + Alvin Cheng-HsienChen 322–328 Y13-1032 chen-2013-spatial @@ -292,7 +292,7 @@ HidekiAsoh AkiraTakagi TatsuhiroKonishi - YukihiroItoh + YukihiroItoh 339–348 Y13-1034 noguchi-etal-2013-event @@ -343,7 +343,7 @@ Transliteration Systems across <fixed-case>I</fixed-case>ndian Languages Using Parallel Corpora RishabhSrivastava - Riyaz AhmadBhat + Riyaz AhmadBhat 390–398 Y13-1040 srivastava-bhat-2013-transliteration @@ -360,7 +360,7 @@ Classifying Questions in Question Answering System Using Finite State Machines with a Simple Learning Approach Mohammad MoinulHoque - TeresaGoncalves + TeresaGoncalves PauloQuaresma 409–414 Y13-1042 @@ -377,22 +377,22 @@ <fixed-case>V</fixed-case>ietnamese Text Accent Restoration with Statistical Machine Translation Luan-NghiaPham - Viet-HongTran - Vinh-VanNguyen + Viet-HongTran + Vinh-VanNguyen 423–429 Y13-1044 pham-etal-2013-vietnamese A Compact <fixed-case>FP</fixed-case>-Tree for Fast Frequent Pattern Retrieval - Tri ThanhNguyen + Tri ThanhNguyen 430–439 Y13-1045 nguyen-2013-compact <fixed-case>ML</fixed-case>-Tuned Constraint Grammars - EckhardBick + EckhardBick 440–449 Y13-1046 bick-2013-ml @@ -451,7 +451,7 @@ Automatic Clause Boundary Annotation in the <fixed-case>H</fixed-case>indi Treebank RahulSharma SomaPaul - Riyaz AhmadBhat + Riyaz AhmadBhat SambhavJain 499–504 Y13-1053 @@ -520,14 +520,14 @@ Automatic Identification of <fixed-case>E</fixed-case>nglish Collocation Errors Based on Dependency Relations - Zhao-MingGao + Zhao-MingGao 550–555 Y13-2006 gao-2013-automatic A <fixed-case>J</fixed-case>apanese Learning Support System Matching Individual Abilities - TakahiroOhno + TakahiroOhno ZyunitiroEdani AyatoInoue DongliHan diff --git a/data/xml/Y14.xml b/data/xml/Y14.xml index 1c46d4f77b..c3e071f9e7 100644 --- a/data/xml/Y14.xml +++ b/data/xml/Y14.xml @@ -16,7 +16,7 @@ Robust Semantics for Semantic Parsing - MarkSteedman + MarkSteedman 1–1 Y14-1001 steedman-2014-robust @@ -30,14 +30,14 @@ Registerial Cartography: Context-based Mapping of Text Types and their Rhetorical-relational Organization - Christian M.I.M.Matthiessen + Christian M.I.M.Matthiessen 5–26 Y14-1003 matthiessen-2014-registerial Discourse for Machine Translation. - BonnieWebber + BonnieWebber 27–27 Y14-1004 webber-2014-discourse @@ -72,7 +72,7 @@ Phonological Suppression of Anaphoric Wh-expressions in <fixed-case>E</fixed-case>nglish and <fixed-case>K</fixed-case>orean - Myung-KwanPark + Myung-KwanPark 57–64 Y14-1009 park-2014-phonological @@ -83,7 +83,7 @@ SonseShimaoka KazetoYamamoto YotaroWatanabe - NaoakiOkazaki + NaoakiOkazaki KentaroInui 65–74 Y14-1010 @@ -96,7 +96,7 @@ Cen-ChiehChen ChadLiu Chun-HungLu - Wen-LianHsu + Wen-LianHsu 75–84 Y14-1011 chang-etal-2014-semantic-frame @@ -119,7 +119,7 @@ A Corpus-Based Quantitative Study of Nominalizations across <fixed-case>C</fixed-case>hinese and <fixed-case>B</fixed-case>ritish Media <fixed-case>E</fixed-case>nglish YingLiu - Alex ChengyuFang + Alex ChengyuFang NaixingWei 101–110 Y14-1014 @@ -151,7 +151,7 @@ Taking Antonymy Mask off in Vector Space EnricoSantus - QinLu + QinLu AlessandroLenci Chu-RenHuang 135–144 @@ -207,7 +207,7 @@ Automatically Building a Corpus for Sentiment Analysis on <fixed-case>I</fixed-case>ndonesian Tweets Alfan FarizkiWicaksono ClaraVania - BayuDistiawan + BayuDistiawan MirnaAdriani 185–194 Y14-1024 @@ -233,7 +233,7 @@ How Mutual Knowledge Constrains the Choice of Anaphoric Demonstratives in <fixed-case>J</fixed-case>apanese and <fixed-case>E</fixed-case>nglish - David YoshikazuOshima + David YoshikazuOshima EricMcCready 214–223 Y14-1027 @@ -248,7 +248,7 @@ Annotating Article Errors in <fixed-case>S</fixed-case>panish Learner Texts: Design and Evaluation of an Annotation Scheme - M. PilarValverde Ibañez + M. PilarValverde Ibañez AkiraOhtani 234–243 Y14-1029 @@ -288,9 +288,9 @@ <fixed-case>T</fixed-case>ake<fixed-case>T</fixed-case>wo: A Word Aligner based on Self Learning - JimChang - Jian-ChengWu - JasonChang + JimChang + Jian-ChengWu + JasonChang 282–291 Y14-1034 chang-etal-2014-taketwo @@ -311,7 +311,7 @@ Readability of <fixed-case>B</fixed-case>angla News Articles for Children - ZahrulIslam + ZahrulIslam RashedurRahman 309–317 Y14-1037 @@ -321,7 +321,7 @@ Focusing on a Subset of Scripts Enhances the Learning Efficiency of Second Language Writing System Ching-PongAu Yuk-ManCheung - CharlesChen Jr. + CharlesChen Jr. 318–327 Y14-1038 au-etal-2014-focusing @@ -331,7 +331,7 @@ MiaoFan QiangZhou EmilyChang - Thomas FangZheng + Thomas FangZheng 328–337 Y14-1039 fan-etal-2014-transition @@ -374,7 +374,7 @@ A Quantitative View of Short Utterances in Daily Conversation: A Case Study of Thats right, Thats true and Thats correct YanjiaoLi - Alex ChengyuFang + Alex ChengyuFang JingCao 378–386 Y14-1044 @@ -384,7 +384,7 @@ A Listenability Measuring Method for an Adaptive Computer-assisted Language Learningand Teaching System KatsunoriKotani ShotaUeda - TakehikoYoshimi + TakehikoYoshimi HiroakiNanjo 387–394 Y14-1045 @@ -437,7 +437,7 @@ On the Functional Differences between the Discourse Particles Ne and Yone in <fixed-case>J</fixed-case>apanese. - David YoshikazuOshima + David YoshikazuOshima 442–451 Y14-1051 oshima-2014-functional @@ -477,7 +477,7 @@ A Hierarchical Word Sequence Language Model XiaoyiWu - YujiMatsumoto + YujiMatsumoto 489–494 Y14-1056 wu-matsumoto-2014-hierarchical @@ -525,16 +525,16 @@ A Keyword-based Monolingual Sentence Aligner in Text Simplification - Chung-ChiHuang + Chung-ChiHuang 542–550 Y14-1062 huang-2014-keyword Automatic Detection of Comma Splices - JohnLee + JohnLee Chak YanYeung - MartinChodorow + MartinChodorow 551–560 Y14-1063 lee-etal-2014-automatic @@ -633,7 +633,7 @@ K-repeating Substrings: a String-Algorithmic Approach to Privacy-Preserving Publishing of Textual Data YusukeMatsubara - KoitiHasida + KoitiHasida 658–667 Y14-1075 matsubara-hasida-2014-k diff --git a/data/xml/Y15.xml b/data/xml/Y15.xml index 5e97900097..a8e7480c88 100644 --- a/data/xml/Y15.xml +++ b/data/xml/Y15.xml @@ -17,7 +17,7 @@ Two-level Word Class Categorization Model in Analytic Languages and Its Implications for <fixed-case>POS</fixed-case> Tagging in <fixed-case>M</fixed-case>odern <fixed-case>C</fixed-case>hinese Corpora RenqiangWang - ChangningHuang + ChangningHuang 1–10 Y15-1001 wang-huang-2015-two @@ -44,7 +44,7 @@ SalimaHarrat SalmaJamoussi MouradAbbas - KamelSmaili + KamelSmaili 26–34 Y15-1004 meftouh-etal-2015-machine @@ -63,8 +63,8 @@ Computing Semantic Text Similarity Using Rich Features - YangLiu - ChengjieSun + YangLiu + ChengjieSun LeiLin XiaolongWang YumingZhao @@ -109,7 +109,7 @@ Distant Supervision for Entity Linking MiaoFan QiangZhou - Thomas FangZheng + Thomas FangZheng 79–86 Y15-1010 fan-etal-2015-distant @@ -127,7 +127,7 @@ Fast and Large-scale Unsupervised Relation Extraction ShoTakase - NaoakiOkazaki + NaoakiOkazaki KentaroInui 96–105 Y15-1012 @@ -137,7 +137,7 @@ Reducing Lexical Features in Parsing by Word Embeddings HiroyaKomatsu RanTian - NaoakiOkazaki + NaoakiOkazaki KentaroInui 106–113 Y15-1013 @@ -162,7 +162,7 @@ Unsupervised and Lightly Supervised Part-of-Speech Tagging Using Recurrent Neural Networks OthmanZennaki NasredineSemmar - LaurentBesacier + LaurentBesacier 133–142 Y15-1016 zennaki-etal-2015-unsupervised @@ -195,7 +195,7 @@ A Comprehensive Filter Feature Selection for Improving Document Classification Nguyen Hoai NamLe - Bao QuocHo + Bao QuocHo 169–177 Y15-1020 le-ho-2015-comprehensive @@ -223,7 +223,7 @@ Sentiment Classification of <fixed-case>A</fixed-case>rabic Documents: Experiments with multi-type features and ensemble algorithms AmineBayoudhi HatemGhorbel - Lamia HadrichBelguith + Lamia HadrichBelguith 196–205 Y15-1023 bayoudhi-etal-2015-sentiment @@ -254,14 +254,14 @@ A Comparative Study on <fixed-case>M</fixed-case>andarin and <fixed-case>C</fixed-case>antonese Resultative Verb Compounds Helena Yan PingLau - Sophia Yat MeiLee + Sophia Yat MeiLee 231–239 Y15-1027 lau-lee-2015-comparative Complex-<fixed-case>NP</fixed-case> Islands in <fixed-case>K</fixed-case>orean: An Experimental Approach - Yong-hunLee + Yong-hunLee YeonkyungPark 240–249 Y15-1028 @@ -271,7 +271,7 @@ Two Types of Multiple Subject Constructions (<fixed-case>MSC</fixed-case>s) in <fixed-case>K</fixed-case>orean Ji-HyeKim EunahKim - JamesYoon + JamesYoon 250–258 Y15-1029 kim-etal-2015-two @@ -282,7 +282,7 @@ VichetChea AndrewFinch MasaoUtiyama - EiichiroSumita + EiichiroSumita 259–269 Y15-1030 kyaw-thu-etal-2015-large @@ -291,7 +291,7 @@ <fixed-case>E</fixed-case>nglish to <fixed-case>C</fixed-case>hinese Translation: How <fixed-case>C</fixed-case>hinese Character Matters RuiWang HaiZhao - Bao-LiangLu + Bao-LiangLu 270–280 Y15-1031 wang-etal-2015-english @@ -310,7 +310,7 @@ Large-scale Dictionary Construction via Pivot-based Statistical Machine Translation with Significance Pruning and Neural Network Features RajDabre ChenhuiChu - FabienCromieres + FabienCromieres ToshiakiNakazawa SadaoKurohashi 289–297 @@ -319,7 +319,7 @@ Annotation and Classification of <fixed-case>F</fixed-case>rench Feedback Communicative Functions - LaurentPrévot + LaurentPrévot JanGorisch SankarMukherjee 298–306 @@ -357,7 +357,7 @@ The Cross-modal Representation of Metaphors YutungChang - KawaiChui + KawaiChui 332–340 Y15-1038 chang-chui-2015-cross @@ -365,7 +365,7 @@ Writing to Read: the Case of <fixed-case>C</fixed-case>hinese QiZhang - RonanReilly + RonanReilly 341–350 Y15-1039 zhang-reilly-2015-writing @@ -373,14 +373,14 @@ Design of a Learner Corpus for Listening and Speaking Performance KatsunoriKotani - TakehikoYoshimi + TakehikoYoshimi 351–358 Y15-1040 kotani-yoshimi-2015-design Understanding Infants’ Language Development in Relation to Levels of Consciousness: An Approach in Building up an Agent-based Model - Helena HongGao + Helena HongGao CanGuo 359–368 Y15-1041 @@ -406,7 +406,7 @@ Self Syntactico-Semantic Enrichment of <fixed-case>LMF</fixed-case> Normalized Dictionaries ImenElleuch BilelGargouri - AbdelmajidBen Hamadou + AbdelmajidBen Hamadou 387–395 Y15-1044 elleuch-etal-2015-self @@ -428,7 +428,7 @@ Not Voice but Case Identity in <fixed-case>VP</fixed-case> Ellipsis of <fixed-case>E</fixed-case>nglish - MyungkwanPark + MyungkwanPark SunjooChoi 413–421 Y15-1047 @@ -437,7 +437,7 @@ A Statistical Modeling of the Correlation between Island Effects and Working-memory Capacity for <fixed-case>L</fixed-case>2 Learners EuheeKim - MyungkwanPark + MyungkwanPark 422–430 Y15-1048 kim-park-2015-statistical @@ -462,7 +462,7 @@ An Improved Hierarchical Word Sequence Language Model Using Directional Information XiaoyiWu - YujiMatsumoto + YujiMatsumoto 449–454 Y15-1051 wu-matsumoto-2015-improved @@ -508,7 +508,7 @@ YoshinariFujinuma HikaruYokono PascualMartínez-Gómez - AkikoAizawa + AkikoAizawa 488–495 Y15-1056 fujinuma-etal-2015-distant @@ -563,7 +563,7 @@ Corpus annotation with a linguistic analysis of the associations between event mentions and spatial expressions Jin-WooChung JinseonYou - Jong C.Park + Jong C.Park 535–543 Y15-1062 chung-etal-2015-corpus @@ -629,7 +629,7 @@ Toward a Corpus of <fixed-case>C</fixed-case>antonese Verbal Comments and their Classification by Multi-dimensional Analysis - Oi YeeKwong + Oi YeeKwong 10–18 Y15-2002 kwong-2015-toward @@ -639,7 +639,7 @@ SoyunJeong YoungminPark SangwooKang - JungyunSeo + JungyunSeo 19–26 Y15-2003 jeong-etal-2015-improved @@ -647,19 +647,19 @@ An Arguing Lexicon for Stance Classification on Short Text Comments in <fixed-case>C</fixed-case>hinese Ju-hanChuang - Shu-KaiHsieh + Shu-KaiHsieh 27–36 Y15-2004 chuang-hsieh-2015-arguing Learning Sentential Patterns of Various Rhetoric Moves for Assisted Academic Writing - JimChang + JimChang Hsiang-LingHsu JoanneBoisson Hao-ChunPeng Yu-HsuanWu - Jason S.Chang + Jason S.Chang 37–45 Y15-2005 chang-etal-2015-learning @@ -691,7 +691,7 @@ Semi-automatic Filtering of Translation Errors in Triangle Corpus Sung-KwonChoi Jong-HunShin - Young-GilKim + Young-GilKim 72–79 Y15-2009 choi-etal-2015-semi @@ -700,7 +700,7 @@ Cross-language Projection of Dependency Trees for Tree-to-tree Machine Translation YuShen ChenhuiChu - FabienCromieres + FabienCromieres SadaoKurohashi 80–88 Y15-2010 @@ -717,7 +717,7 @@ Finding the Origin of a Translated Historical Document - ZahrulIslam + ZahrulIslam NatiaDundua 96–105 Y15-2012 @@ -727,7 +727,7 @@ Improving the Performance of an Example-Based Machine Translation System Using a Domain-specific Bilingual Lexicon NasredineSemmar OthmanZennaki - MeriamaLaib + MeriamaLaib 106–115 Y15-2013 semmar-etal-2015-improving @@ -736,7 +736,7 @@ A Multifactorial Analysis of <fixed-case>E</fixed-case>nglish Particle Movement in <fixed-case>K</fixed-case>orean <fixed-case>EFL</fixed-case> Learners’ Writings Gyu-HyeongLee Ha-EungKim - Yong-hunLee + Yong-hunLee 116–124 Y15-2014 lee-etal-2015-multifactorial @@ -745,7 +745,7 @@ An Efficient Annotation for Phrasal Verbs using Dependency Information MasayukiKomai HiroyukiShindo - YujiMatsumoto + YujiMatsumoto 125–131 Y15-2015 komai-etal-2015-efficient @@ -754,7 +754,7 @@ Color Aesthetics and Social Networks in Complete Tang Poems: Explorations and Discoveries Chao-LinLiu HongsuWang - Wen-HueiCheng + Wen-HueiCheng Chu-TingHsu Wei-YunChiu 132–141 @@ -796,7 +796,7 @@ Distinguishing between True and False Stories using various Linguistic Features - YaakovHacohen-Kerner + YaakovHacohen-Kerner RakefetDilmon ShimonFriedlich Daniel NisimCohen @@ -806,7 +806,7 @@ Bilingually motivated segmentation and generation of word translations using relatively small translation data sets - Kavitha KarimbiMahesh + Kavitha KarimbiMahesh LuísGomes JoséLopes 187–196 @@ -868,7 +868,7 @@ A Corpus-based Comparatively Study on the Semantic Features and Syntactic patterns of Yòu/Hái in <fixed-case>M</fixed-case>andarin <fixed-case>C</fixed-case>hinese YuncuiZhang - PengyuanLiu + PengyuanLiu 249–257 Y15-2029 zhang-liu-2015-corpus @@ -894,7 +894,7 @@ Automatic Classification of Spoken Languages using Diverse Acoustic Features - YaakovHacohen-Kerner + YaakovHacohen-Kerner RubenHagege 275–285 Y15-2032 @@ -929,7 +929,7 @@ Feature Reduction Using Ensemble Approach - YingjuXia + YingjuXia CuiqinHou ZhuoranXu JunSun @@ -960,7 +960,7 @@ Dependency parsing for <fixed-case>C</fixed-case>hinese long sentence: A second-stage main structure parsing method - BoLi + BoLi YunfeiLong WeiguangQu 337–344 diff --git a/data/xml/Y16.xml b/data/xml/Y16.xml index a6ca9a2d81..4abfd29ef4 100644 --- a/data/xml/Y16.xml +++ b/data/xml/Y16.xml @@ -15,14 +15,14 @@ The <fixed-case>C</fixed-case>ore<fixed-case>G</fixed-case>ram Project: Theoretical Linguistics, Theory Development and Verification - StefanMüller + StefanMüller Y16-1001 3–3 muller-2016-coregram Inferring Methodological Meta-knowledge from Large Biomedical Corpora - GoranNenadic + GoranNenadic Y16-1002 5–5 nenadic-2016-inferring @@ -64,7 +64,7 @@ The grammar and semantics of disjuncts in World Englishes - ShirleyDita + ShirleyDita Y16-1008 35–35 dita-2016-grammar @@ -92,7 +92,7 @@ <fixed-case>K</fixed-case>orean Language Resources for Everyone JungyeulPark Jeen-PyoHong - Jeong-WonCha + Jeong-WonCha Y16-2002 49–58 park-etal-2016-korean @@ -108,7 +108,7 @@ A Generalized Framework for Hierarchical Word Sequence Language Model XiaoyiWu KevinDuh - YujiMatsumoto + YujiMatsumoto Y16-2004 69–75 wu-etal-2016-generalized @@ -116,7 +116,7 @@ Processing <fixed-case>E</fixed-case>nglish <fixed-case>I</fixed-case>sland Sentences by <fixed-case>K</fixed-case>orean <fixed-case>EFL</fixed-case> Learners YeonkyungPark - Yong-hunLee + Yong-hunLee Y16-2005 77–84 park-lee-2016-processing @@ -125,7 +125,7 @@ Multiple Emotions Detection in Conversation Transcripts Duc-AnhPhan HiroyukiShindo - YujiMatsumoto + YujiMatsumoto Y16-2006 85–94 phan-etal-2016-multiple @@ -183,7 +183,7 @@ Event Based Emotion Classification for News Articles MingleiLi DaWang - QinLu + QinLu YunfeiLong Y16-2013 153–162 @@ -201,7 +201,7 @@ Integrating Word Embedding Offsets into the Espresso System for Part-Whole Relation Extraction Van-ThuyPhi - YujiMatsumoto + YujiMatsumoto Y16-2015 173–181 phi-matsumoto-2016-integrating @@ -210,7 +210,7 @@ An Experimental Study of Subject Properties in <fixed-case>K</fixed-case>orean Multiple Subject Constructions (<fixed-case>MSC</fixed-case>s) Ji-HyeKim EunahKim - JamesYoon + JamesYoon Y16-2016 183–190 kim-etal-2016-experimental @@ -224,10 +224,10 @@ Planting Trees in the Desert: Delexicalized Tagging and Parsing Combined - DanielZeman + DanielZeman DavidMareček ZhiweiYu - ZdeněkŽabokrtský + ZdeněkŽabokrtský Y16-2018 199–207 zeman-etal-2016-planting @@ -268,7 +268,7 @@ SumitMaharjan MasakiSaito KotaYamaguchi - NaoakiOkazaki + NaoakiOkazaki TakayukiOkatani KentaroInui Y16-2022 @@ -277,7 +277,7 @@ Strong Associations Can Be Weak: Some Thoughts on Cross-lingual Word Webs for Translation - Oi YeeKwong + Oi YeeKwong Y16-2023 249–257 kwong-2016-strong @@ -285,8 +285,8 @@ Dealing with Out-Of-Vocabulary Problem in Sentence Alignment Using Word Similarity Hai-LongTrieu - Le-MinhNguyen - Phuong-ThaiNguyen + Le-MinhNguyen + Phuong-ThaiNguyen Y16-2024 259–266 trieu-etal-2016-dealing @@ -302,7 +302,7 @@ Toward the automatic extraction of knowledge of usable goods MeiUemura NahoOrita - NaoakiOkazaki + NaoakiOkazaki KentaroInui Y16-2026 277–285 @@ -361,7 +361,7 @@ The Inner Circle vs. the Outer Circle or <fixed-case>B</fixed-case>ritish <fixed-case>E</fixed-case>nglish vs. <fixed-case>A</fixed-case>merican <fixed-case>E</fixed-case>nglish - Yong-hunLee + Yong-hunLee Ki-sukJun Y16-3004 339–346 @@ -371,7 +371,7 @@ A Correlation Analysis of <fixed-case>E</fixed-case>nglish Particle Placement of Three <fixed-case>E</fixed-case>ast <fixed-case>A</fixed-case>sian <fixed-case>EFL</fixed-case> Learners Writings Ha-EungKim Gyu-HyeongLee - Yong-hunLee + Yong-hunLee Y16-3005 347–354 kim-etal-2016-correlation @@ -456,7 +456,7 @@ NathanielOco Leif RomeritchSyliongka TodAllman - Rachel EditaRoxas + Rachel EditaRoxas Y16-3015 433–438 oco-etal-2016-philippine @@ -542,7 +542,7 @@ The Cloud of Knowing: Non-factive al-ta ‘know’ (as a Neg-raiser) in <fixed-case>K</fixed-case>orean - ChungminLee + ChungminLee SeungjinHong Y16-3026 527–533 @@ -553,7 +553,7 @@ MasatoshiSuzuki KojiMatsuda SatoshiSekine - NaoakiOkazaki + NaoakiOkazaki KentaroInui Y16-3027 535–544 diff --git a/data/xml/Y17.xml b/data/xml/Y17.xml index 123f287aad..ed1ee94353 100644 --- a/data/xml/Y17.xml +++ b/data/xml/Y17.xml @@ -4,7 +4,7 @@ Proceedings of the 31st Pacific Asia Conference on Language, Information and Computation Y17-1 - Rachel EditaRoxas + Rachel EditaRoxas The National University (Phillippines) November 2017 @@ -66,7 +66,7 @@ The Phrasal-Prepositional Verbs in <fixed-case>P</fixed-case>hilippine <fixed-case>E</fixed-case>nglish: A Corpus-based Analysis JennibelleElla - ShirleyDita + ShirleyDita 34–41 Y17-1008 ella-dita-2017-phrasal @@ -131,7 +131,7 @@ The Importance of Automatic Syntactic Features in <fixed-case>V</fixed-case>ietnamese Named Entity Recognition Thai-HoangPham - PhuongLe-Hong + PhuongLe-Hong 97–103 Y17-1016 pham-le-hong-2017-importance @@ -171,7 +171,7 @@ A Parallel Recurrent Neural Network for Language Modeling with <fixed-case>POS</fixed-case> Tags ChaoSu - HeyanHuang + HeyanHuang ShuminShi YuhangGuo HaoWu @@ -182,7 +182,7 @@ Identifying Deception in <fixed-case>I</fixed-case>ndonesian Transcribed Interviews through Lexical-based Approach TifaniWarnita - Dessi PujiLestari + Dessi PujiLestari 148–154 Y17-1022 warnita-lestari-2017-identifying @@ -205,7 +205,7 @@ Remarks on epistemically biased questions - David YoshikazuOshima + David YoshikazuOshima 169–177 Y17-1025 oshima-2017-remarks @@ -235,7 +235,7 @@ Subjecthood and Grammatical Relations in <fixed-case>K</fixed-case>orean: An Experimental Study with Honorific Agreement and Plural Copying Ji-HyeKim - Yong-HunLee + Yong-HunLee James Hye-SukYoon 206–213 Y17-1029 @@ -251,7 +251,7 @@ A Corpus-based Analysis of Near-Synonymous Sentence-final Particles in <fixed-case>M</fixed-case>andarin <fixed-case>C</fixed-case>hinese: “bale” and “eryi” XuefengGao - Yat-MeiLee + Yat-MeiLee 222–230 Y17-1031 gao-lee-2017-corpus @@ -269,7 +269,7 @@ Word Learning by Young Bilinguals: Understanding the Denotation and Connotation Differences of “Cut” Verbs in <fixed-case>E</fixed-case>nglish and <fixed-case>C</fixed-case>hinese Keng HweeNeo - HelenaGao + HelenaGao 241–248 Y17-1033 neo-gao-2017-word @@ -317,7 +317,7 @@ Rule-based Reordering and Post-Processing for <fixed-case>I</fixed-case>ndonesian-<fixed-case>K</fixed-case>orean Statistical Machine Translation Candy OliviaMawalim - Dessi PujiLestari + Dessi PujiLestari AyuPurwarianti 287–295 Y17-1039 @@ -326,7 +326,7 @@ Sentence Complexity Estimation for <fixed-case>C</fixed-case>hinese-speaking Learners of <fixed-case>J</fixed-case>apanese JunLiu - YujiMatsumoto + YujiMatsumoto 296–302 Y17-1040 liu-matsumoto-2017-sentence @@ -366,7 +366,7 @@ A Crowdsourcing Approach for Annotating Causal Relation Instances in <fixed-case>W</fixed-case>ikipedia KazuakiHanawa AkiraSasaki - NaoakiOkazaki + NaoakiOkazaki KentaroInui 336–345 Y17-1045 @@ -393,7 +393,7 @@ Extracting Important Tweets for News Writers using Recurrent Neural Network with Attention Mechanism and Multi-task Learning TaroMiyazaki ShinToriumi - YukaTakei + YukaTakei IchiroYamada JunGoto 363–369 @@ -402,7 +402,7 @@ Tweet Extraction for News Production Considering Unreality - YukaTakei + YukaTakei TaroMiyazaki IchiroYamada JunGoto @@ -422,8 +422,8 @@ Investigating Phrase-Based and Neural-Based Machine Translation on Low-Resource Settings Hai LongTrieu - Duc-VuTran - Le MinhNguyen + Duc-VuTran + Le MinhNguyen 384–391 Y17-1051 trieu-etal-2017-investigating diff --git a/data/xml/Y18.xml b/data/xml/Y18.xml index a7266d1ff4..907ce2929b 100644 --- a/data/xml/Y18.xml +++ b/data/xml/Y18.xml @@ -21,7 +21,7 @@ Multi-dialect Neural Machine Translation and Dialectometry KaoriAbe YuichirohMatsubayashi - NaoakiOkazaki + NaoakiOkazaki KentaroInui Y18-1001 abe-etal-2018-multi @@ -30,7 +30,7 @@ Automated Error Correction and Validation for <fixed-case>POS</fixed-case> Tagging of <fixed-case>H</fixed-case>indi SachiAngle PruthwikMishra - Dipti MisraSharma + Dipti MisraSharma Y18-1002 angle-etal-2018-automated @@ -60,7 +60,7 @@ Towards an Automatic Text Comprehension for the <fixed-case>A</fixed-case>rabic Question-Answering: Semantic and Logical Representation of Texts WidedBakari - PatriceBellot + PatriceBellot MahmoudNeji Y18-1006 bakari-etal-2018-towards @@ -88,14 +88,14 @@ Investigating the <fixed-case>E</fixed-case>nglish <fixed-case>ADJECTIVE</fixed-case> <fixed-case>OF</fixed-case> Construction in Academic Writing LiyinChen - Siaw-FongChung + Siaw-FongChung Y18-1009 chen-chung-2018-investigating Detecting Free Translation in Parallel Corpora from Attention Scores QiChen - Oi YeeKwong + Oi YeeKwong JingboZhu Y18-1010 chen-etal-2018-detecting @@ -127,7 +127,7 @@ A Corpus Study of Linguistic-Cultural Conceptualization of <fixed-case>FEAR</fixed-case> in <fixed-case>C</fixed-case>hinese and <fixed-case>R</fixed-case>ussian - KawaiChui + KawaiChui Hsiang-LinYeh Jie-LiTsai Y18-1014 @@ -146,14 +146,14 @@ Too Many Questions? What Can We Do? : Multiple Question Span Detection PrathyushaDanda Brij Mohan LalSrivastava - ManishShrivastava + ManishShrivastava Y18-1016 danda-etal-2018-many <fixed-case>B</fixed-case>o<fixed-case>WL</fixed-case>er: A neural approach to extractive text summarization PranavDhakras - ManishShrivastava + ManishShrivastava Y18-1017 dhakras-shrivastava-2018-bowler @@ -161,7 +161,7 @@ Effectiveness of Character Language Model for <fixed-case>V</fixed-case>ietnamese Named Entity Recognition Xuan-DungDoan Trung-ThanhDang - Le-MinhNguyen + Le-MinhNguyen Y18-1018 doan-etal-2018-effectiveness @@ -175,7 +175,7 @@ Exclamative Sentences in Emotion Expressions in <fixed-case>M</fixed-case>andarin <fixed-case>C</fixed-case>hinese: A Corpus-based Approach XuefengGao - Sophia Yat MeiLee + Sophia Yat MeiLee Y18-1020 gao-lee-2018-exclamative @@ -196,7 +196,7 @@ Are They Arguing or not: A Corpus-based Study Min-ChunHsiao - Siaw-FongChung + Siaw-FongChung Y18-1023 hsiao-chung-2018-arguing @@ -248,7 +248,7 @@ Model-Theoretic Incremental Interpretation Based on <fixed-case>D</fixed-case>iscourse <fixed-case>R</fixed-case>epresentation <fixed-case>T</fixed-case>heory - YoshihideKato + YoshihideKato ShigekiMatsubara Y18-1030 kato-matsubara-2018-model @@ -279,7 +279,7 @@ ShunKiyono ShoTakase JunSuzuki - NaoakiOkazaki + NaoakiOkazaki KentaroInui MasaakiNagata Y18-1034 @@ -294,14 +294,14 @@ The Non-deictic Use of Demonstratives in Conversations and Interpreted Speeches in Contemporary <fixed-case>H</fixed-case>ong <fixed-case>K</fixed-case>ong <fixed-case>C</fixed-case>antonese - Oi YeeKwong + Oi YeeKwong Y18-1036 kwong-2018-non Questions as a Pre-event, Pivot Event and Post-event of Emotions Helena Yan PingLau - Sophia Yat MeiLee + Sophia Yat MeiLee ZhongqingWang Y18-1037 lau-etal-2018-questions @@ -323,7 +323,7 @@ A New Angle on <fixed-case>L</fixed-case>2 Texts: A Statistical Approach to Translation Universals Younghee CheriLee - Yong-HunLee + Yong-HunLee Y18-1040 lee-lee-2018-new @@ -339,14 +339,14 @@ <fixed-case>C</fixed-case>hinese Spelling Check based on Neural Machine Translation Chiao-WenLi Jhih-JieChen - JasonChang + JasonChang Y18-1042 li-etal-2018-chinese Research on Entity Relation Extraction for Military Field ChenLiang - HongyingZan + HongyingZan YajunLiu YunfangWu Y18-1043 @@ -374,7 +374,7 @@ FeiCheng YiranWang HiroyukiShindo - YujiMatsumoto + YujiMatsumoto Y18-1046 liu-etal-2018-automatic-error @@ -423,15 +423,15 @@ KatsuhikoHayashi TakahiroIshihara HitoshiManabe - YujiMatsumoto + YujiMatsumoto Y18-1052 matsuno-etal-2018-reduction <fixed-case>E</fixed-case>qu<fixed-case>G</fixed-case>ener: A Reasoning Network for Word Problem Solving by Generating Arithmetic Equations PruthwikMishra - Litton JKurisinkel - Dipti MisraSharma + Litton JKurisinkel + Dipti MisraSharma VasudevaVarma Y18-1053 mishra-etal-2018-equgener @@ -510,7 +510,7 @@ Cheng-CyuanPeng Ching-YuYang Jhih-JieChen - JasonChang + JasonChang Y18-1062 peng-etal-2018-smartwrite @@ -575,7 +575,7 @@ HoyunSong JinseonYou Jin-WooChung - Jong C.Park + Jong C.Park Y18-1070 song-etal-2018-feature @@ -671,7 +671,7 @@ A Re-examination of Syntactic Complexity by Investigating the Internal Structure Variations of Adverbial Clauses across Speech and Writing MingyuWan - Alex ChengyuFang + Alex ChengyuFang Y18-1082 wan-fang-2018-examination @@ -711,7 +711,7 @@ Attention-based <fixed-case>BLSTM</fixed-case>-<fixed-case>CRF</fixed-case> Architecture for <fixed-case>M</fixed-case>ongolian Named Entity Recognition YuzhuXiong - MinghuaNuo + MinghuaNuo Y18-1088 xiong-nuo-2018-attention @@ -727,7 +727,7 @@ Development of Perceptual Training Software for Realizing High Variability Training Paradigm and Self Adaptive Training Paradigm RuiningYang HiroakiNanjo - MasatakeDantsuji + MasatakeDantsuji Y18-1090 yang-etal-2018-development @@ -830,10 +830,10 @@ Food-Related Sentiment Analysis for <fixed-case>C</fixed-case>antonese - NataliaKlyueva + NataliaKlyueva YunfeiLong Chu-RenHuang - QinLu + QinLu Y18-2004 klyueva-etal-2018-food @@ -896,7 +896,7 @@ RajDabre AnoopKunchukuttan AtsushiFujita - EiichiroSumita + EiichiroSumita Y18-3003 dabre-etal-2018-nicts @@ -921,7 +921,7 @@ RuiWang ChenchenDing MasaoUtiyama - EiichiroSumita + EiichiroSumita Y18-3006 wang-etal-2018-english @@ -929,7 +929,7 @@ Combination of Statistical and Neural Machine Translation for <fixed-case>M</fixed-case>yanmar-<fixed-case>E</fixed-case>nglish BenjaminMarie AtsushiFujita - EiichiroSumita + EiichiroSumita Y18-3007 marie-etal-2018-combination @@ -959,8 +959,8 @@ The <fixed-case>RGNLP</fixed-case> Machine Translation Systems for <fixed-case>WAT</fixed-case> 2018 - Atul Kr.Ojha - Koel DuttaChowdhury + Atul Kr.Ojha + Koel DuttaChowdhury Chao-HongLiu KaranSaxena Y18-3011 @@ -971,7 +971,7 @@ SukantaSen Kamal KumarGupta AsifEkbal - PushpakBhattacharyya + PushpakBhattacharyya Y18-3012 sen-etal-2018-iitp @@ -979,7 +979,7 @@ Multilingual <fixed-case>I</fixed-case>ndian Language Translation System at <fixed-case>WAT</fixed-case> 2018: Many-to-one Phrase-based <fixed-case>SMT</fixed-case> TamaliBanerjee AnoopKunchukuttan - PushpakBhattacharya + PushpakBhattacharya Y18-3013 banerjee-etal-2018-multilingual diff --git a/data/xml/Y95.xml b/data/xml/Y95.xml index 94e34da0cc..0993d82971 100644 --- a/data/xml/Y95.xml +++ b/data/xml/Y95.xml @@ -3,8 +3,8 @@ Proceedings of the 10th Pacific Asia Conference on Language, Information and Computation - Benjamin K.T’sou - Tom B. Y.Lai + Benjamin K.T’sou + Tom B. Y.Lai City University of Hong Kong
City University of Hong Kong, Hong Kong
December @@ -17,7 +17,7 @@ Distances and Trees in Linguistics - William S-Y.Wang + William S-Y.Wang 1–6 Y95-1001 http://hdl.handle.net/2065/11866 @@ -33,7 +33,7 @@ An Analysis of Generic Expressions in Situation Semantics - Ik-HwanLee + Ik-HwanLee 19–28 Y95-1003 http://hdl.handle.net/2065/11879 @@ -49,7 +49,7 @@ Comprehending Text : Achieving Coherence through a Connectionist Architecture - Samuel W. K.Chan + Samuel W. K.Chan 39–48 Y95-1005 http://hdl.handle.net/2065/11894 @@ -57,7 +57,7 @@ Predication of Meaning of Bisyllabic <fixed-case>C</fixed-case>hinese Compound Words Using Back Propagation Neural Network - LuaKim Teng + Kim TengLua 49–56 Y95-1006 http://hdl.handle.net/2065/11895 @@ -82,7 +82,7 @@ Scrambling in <fixed-case>G</fixed-case>erman : Extraction into the Mittelfeld - StefanMüller + StefanMüller 79–84 Y95-1009 http://hdl.handle.net/2065/11899 @@ -116,7 +116,7 @@ A Cognitive Account of the Lexical Polysemy of <fixed-case>C</fixed-case>hinese Kai - Flora Yu-FangWang + Flora Yu-FangWang 103–108 Y95-1013 http://hdl.handle.net/2065/11867 @@ -125,8 +125,8 @@ Automatic Sense Disambiguation for Target Word Selection Kwon YangKim - Se YoungPark - Sang JoLee + Se YoungPark + Sang JoLee 109–114 Y95-1014 http://hdl.handle.net/2065/11868 @@ -134,8 +134,8 @@ Structural Ambiguity and Conceptual Information Retrieval - Mathis Huey-chyunChen - Jason J.S.Chang + Mathis Huey-chyunChen + Jason J.S.Chang 115–120 Y95-1015 http://hdl.handle.net/2065/11869 @@ -152,8 +152,8 @@ The Postprocessing of Optical Character Recognition Based on Statistical Noisy Channel and Language Model - Jason J. S.Chang - Shun-DerChen + Jason J. S.Chang + Shun-DerChen 127–132 Y95-1017 http://hdl.handle.net/2065/11871 @@ -161,7 +161,7 @@ A Quantitative Analysis of Word-Definition in a Machine-Readable Dictionary - Robert W.P.Luk + Robert W.P.Luk Venus M.K.Chan 133–138 Y95-1018 @@ -195,7 +195,7 @@ <fixed-case>HMM</fixed-case> Parameter Learning for <fixed-case>J</fixed-case>apanese Morphological Analyzer KoichiTakeuchi - YujiMatsumoto + YujiMatsumoto 163–172 Y95-1022 http://hdl.handle.net/2065/11876 @@ -203,8 +203,8 @@ Automatic Acquisition of Class-based Rules for Word Alignment - Sur-JinKer - Jason J.S.Chang + Sur-JinKer + Jason J.S.Chang 173–184 Y95-1023 http://hdl.handle.net/2065/11877 @@ -213,7 +213,7 @@ Automated Alignment in Multilingual Corpora J.A.Campbell - Alex ChengyuFang + Alex ChengyuFang 185–194 Y95-1024 http://hdl.handle.net/2065/11878 @@ -266,7 +266,7 @@ A Corpus-Based Study of Adverbial Clauses in <fixed-case>M</fixed-case>andarin <fixed-case>C</fixed-case>hinese Conversations : A Preliminary Analysis - Yu-FangWang + Yu-FangWang 237–242 Y95-1030 http://hdl.handle.net/2065/11885 @@ -274,7 +274,7 @@ A Network-Based Writing System for <fixed-case>F</fixed-case>rench - AesunYoon + AesunYoon Hyuk-ChulKwon 243–248 Y95-1031 @@ -283,7 +283,7 @@ Web Access to a Lexical Database Using <fixed-case>VB</fixed-case>/Access <fixed-case>CGI</fixed-case> Programming - Jonathan J.Webster + Jonathan J.Webster 249–254 Y95-1032 http://hdl.handle.net/2065/11887 @@ -292,7 +292,7 @@ Document Ranking Method for High Precision Rate Mee-SunJeon - Se-YoungPark + Se-YoungPark 255–260 Y95-1033 http://hdl.handle.net/2065/11888 @@ -301,7 +301,7 @@ Natural Languages Analysis in Machine Translation (<fixed-case>MT</fixed-case>) Based on the <fixed-case>STCG</fixed-case> (String-Tree Correspondence Grammar) TangEnya Kong - ZaharinYusoff + ZaharinYusoff 261–266 Y95-1034 http://hdl.handle.net/2065/11889 @@ -326,7 +326,7 @@ A Unified Account of Polarity Phenomena - ChungminLee + ChungminLee 281–291 Y95-1037 http://hdl.handle.net/2065/11892 diff --git a/data/xml/Y96.xml b/data/xml/Y96.xml index 9ac59fab3c..cadc3111ce 100644 --- a/data/xml/Y96.xml +++ b/data/xml/Y96.xml @@ -34,7 +34,7 @@ Subject-oriented and non Subject-oriented Long-distance Anaphora : an Integrated Approach - AntonioBranco + AntonioBranco PalmiraMarrafa 21–30 Y96-1003 @@ -60,7 +60,7 @@ Underspecified <fixed-case>J</fixed-case>apanese Semantics in a Machine Translation System - BjörnGambäck + BjörnGambäck ChristianLieske YoshikiMori 53–62 @@ -73,7 +73,7 @@ J.A.Campbell N.Chatterjee M.Manela - Alex ChengyuFang + Alex ChengyuFang 63–72 Y96-1007 http://hdl.handle.net/2065/12063 @@ -97,7 +97,7 @@ A Discourse Approach to Causal Sentences in <fixed-case>M</fixed-case>andarin <fixed-case>C</fixed-case>hinese - Mei-chihTsai + Mei-chihTsai 93–98 Y96-1010 http://hdl.handle.net/2065/12066 @@ -121,8 +121,8 @@ Neural Networks in <fixed-case>C</fixed-case>hinese Lexical Classification - Md MarufHasan - Kim-TengLua + Md MarufHasan + Kim-TengLua 119–128 Y96-1013 http://hdl.handle.net/2065/12020 @@ -131,9 +131,9 @@ A Logical Structure for the Construction of Machine Readable Dictionaries Byung-JinChoi - Jae-SungLee + Jae-SungLee Woon-JaeLee - Key-SunChoi + Key-SunChoi 129–136 Y96-1014 http://hdl.handle.net/2065/12021 @@ -141,8 +141,8 @@ Extraction of Thematic Roles from Dictionary Definitions - Michael L.Mc Hale - Sung H.Myaeng + Michael L.Mc Hale + Sung H.Myaeng 137–146 Y96-1015 http://hdl.handle.net/2065/12022 @@ -150,7 +150,7 @@ Beyond Telicity and Affected-Theme : Semantic Factors Contributing to the Resultative Interpretation of Predicates in <fixed-case>J</fixed-case>apanese - ChiharuUda + ChiharuUda 147–156 Y96-1016 http://hdl.handle.net/2065/12023 @@ -166,9 +166,9 @@ <fixed-case>S</fixed-case>INICA <fixed-case>C</fixed-case>ORPUS : Design Methodology for Balanced Corpora - Keh-JiannChen + Keh-JiannChen Chu-RenHuang - Li-PingChang + Li-PingChang Hui-LiHsu 167–176 Y96-1018 @@ -219,7 +219,7 @@ A Proposal of <fixed-case>K</fixed-case>orean Conjugation System and its Application to Morphological Analysis YoshitakaHirano - YujiMatsumoto + YujiMatsumoto 229–236 Y96-1024 http://hdl.handle.net/2065/12032 @@ -229,7 +229,7 @@ Rule-based Approach to <fixed-case>K</fixed-case>orean Morphological Disambiguation Supported by Statistical Method Min-JungKim Hyuk-ChulKwon - Ae-SunYoon + Ae-SunYoon 237–246 Y96-1025 http://hdl.handle.net/2065/12033 @@ -341,8 +341,8 @@ Principle-based Parsing for <fixed-case>C</fixed-case>hinese - Charles D.Yang - Robert C.Berwick + Charles D.Yang + Robert C.Berwick 363–371 Y96-1038 http://hdl.handle.net/2065/12047 @@ -360,7 +360,7 @@ Fast Statistical Grammar Induction Wide R.Hogenhout - YujiMatsumoto + YujiMatsumoto 383–392 Y96-1040 http://hdl.handle.net/2065/12049 diff --git a/data/xml/Y98.xml b/data/xml/Y98.xml index 6a24033660..ab40139fbc 100644 --- a/data/xml/Y98.xml +++ b/data/xml/Y98.xml @@ -4,7 +4,7 @@ Proceedings of the 12th Pacific Asia Conference on Language, Information and Computation JinGuo - Kim TengLua + Kim TengLua JieXu Chinese and Oriental Languages Information Processing Society
Singapore
@@ -76,7 +76,7 @@
A Multiple Inheritance Analysis of the Internally-Headed Relative Clause in <fixed-case>J</fixed-case>apanese - Chiharu UdaKikuta + Chiharu UdaKikuta 82–93 Y98-1008 http://hdl.handle.net/2065/12104 @@ -109,7 +109,7 @@ Common Grounds as Multiple Information States Jae-IlYeom - Ik-HwanLee + Ik-HwanLee 127–138 Y98-1012 http://hdl.handle.net/2065/12072 @@ -125,7 +125,7 @@ Predictivity vs. Stipulativity in the Lexicon - Cornelia MariaVerspoor + Cornelia MariaVerspoor 152–162 Y98-1014 http://hdl.handle.net/2065/12074 @@ -134,7 +134,7 @@ Using Case Prototypicality as a Semantic Primitive Dan-HeeYang - Ik-HwanLee + Ik-HwanLee MansukSong 163–171 Y98-1015 @@ -160,7 +160,7 @@ <fixed-case>MI</fixed-case>-trigger-based Language Modelling - GuodongZhou + GuodongZhou Kim-TengLua 195–205 Y98-1018 @@ -169,8 +169,8 @@ Extracting Recurrent Phrases and Terms from Texts Using a Purely Statistical Method - Zhao-MingGao - HaroldSomers + Zhao-MingGao + HaroldSomers 206–211 Y98-1019 http://hdl.handle.net/2065/12080 @@ -206,7 +206,7 @@ Word-Sense Classification by Hierarchical Clustering Ken Y.K.Lau - Robert W.P.Luk + Robert W.P.Luk 236–247 Y98-1023 http://hdl.handle.net/2065/12084 @@ -214,7 +214,7 @@ Automatic Acquisition of a High-Precision Translation Lexicon from Parallel <fixed-case>C</fixed-case>hinese-<fixed-case>E</fixed-case>nglish Corpora - Zhao-MingGao + Zhao-MingGao 248–254 Y98-1024 http://hdl.handle.net/2065/12085 @@ -222,7 +222,7 @@ <fixed-case>S</fixed-case>urrogater : A Simple Yet Efficient Document Condensation System - JoeZhou + JoeZhou 255–262 Y98-1025 http://hdl.handle.net/2065/12086 @@ -240,7 +240,7 @@ On Removing Ambiguity in Text Understanding SiminLi - YukihiroIto + YukihiroIto 271–282 Y98-1027 http://hdl.handle.net/2065/12088 @@ -267,7 +267,7 @@ Syntactic Verifier as a Filter to Compound Unit Recognizer - HanminJung + HanminJung SanghwaYuh TaewanKim Dong-InPark @@ -287,7 +287,7 @@ The Advantages of 3<fixed-case>D</fixed-case>-Trees in Modeling Human Sentence Processing - Charles C.Lee + Charles C.Lee 316–327 Y98-1032 http://hdl.handle.net/2065/12093 @@ -312,7 +312,7 @@ Using A Semantic Classification in Parsing <fixed-case>C</fixed-case>hinese : Some Preliminary Results - Kok WeeGan + Kok WeeGan 340–347 Y98-1035 http://hdl.handle.net/2065/12097 diff --git a/data/xml/Y99.xml b/data/xml/Y99.xml index 0ee459e58f..8df65099dd 100644 --- a/data/xml/Y99.xml +++ b/data/xml/Y99.xml @@ -17,7 +17,7 @@ Linguistics in an Age of Engineering - ChristopherManning + ChristopherManning 1–1 Y99-1001 http://hdl.handle.net/2065/12106 @@ -41,7 +41,7 @@ Lexical Information and Beyond : Constructional Inferences in Semantic Representation - Mei-ChunLiu + Mei-ChunLiu Chu-RenHuang Ching-YiLee 27–37 @@ -51,8 +51,8 @@ Alternation Across Semantic Fields : A Study of <fixed-case>M</fixed-case>andarin Verbs of Emotion - Li-liChang - Keh-jiannChen + Li-liChang + Keh-jiannChen Chu-RenHuang 39–50 Y99-1005 @@ -78,7 +78,7 @@ Lexical Information and Pragmatic Information : Reflexivity of an Event and Resultative Constructions in <fixed-case>J</fixed-case>apanese - Chiharu UdaKikuta + Chiharu UdaKikuta 75–86 Y99-1008 http://hdl.handle.net/2065/12139 @@ -153,8 +153,8 @@ Free Word Order in a Constraint-based Implementation of Dependency Grammar - Tom B.Y.Lai - HuangChangning + Tom B.Y.Lai + ChangningHuang 161–168 Y99-1017 http://hdl.handle.net/2065/12114 @@ -207,8 +207,8 @@ The Lexicon in <fixed-case>FCIDB</fixed-case> : A Friendly <fixed-case>C</fixed-case>hinese Interface for <fixed-case>DBMS</fixed-case> Da-JinnWang - Tsong-YiChen - Martha W.Evens + Tsong-YiChen + Martha W.Evens 215–222 Y99-1023 http://hdl.handle.net/2065/12120 @@ -263,7 +263,7 @@ Extraction of Simple Sentences from Mixed Sentences for Building <fixed-case>K</fixed-case>orean Case Frames Dan-HeeYang - Ik-HwanLee + Ik-HwanLee MansukSong 269–276 Y99-1029 @@ -273,7 +273,7 @@ Sub-Sentential Alignment Method by Analogy TantelyAndriamanankasina - KenjiAraki + KenjiAraki KojiTochinai 277–284 Y99-1030 @@ -282,8 +282,8 @@ A Study of Performance Evaluation for <fixed-case>GA</fixed-case>-<fixed-case>ILMT</fixed-case> Using Travel <fixed-case>E</fixed-case>nglish - HiroshiEchizen-ya - KenjiAraki + HiroshiEchizen-ya + KenjiAraki YoshioMomouchi KojiTochinai 285–292 @@ -293,8 +293,8 @@ Anaphora Resolution as Lexical Cohesion Identification - Samuel W.K.Chan - Benjamin K.T’sou + Samuel W.K.Chan + Benjamin K.T’sou 293–304 Y99-1032 http://hdl.handle.net/2065/12130 diff --git a/data/yaml/name_variants.yaml b/data/yaml/name_variants.yaml deleted file mode 100644 index ab68e1d861..0000000000 --- a/data/yaml/name_variants.yaml +++ /dev/null @@ -1,11263 +0,0 @@ -- canonical: {first: Anthony, last: Hughes} - id: anthony-hughes - orcid: 0009-0003-4065-1094 - variants: - - {first: Anthony James, last: Hughes} -- canonical: {first: Benjamin Matthias, last: Ruppik} - id: benjamin-matthias-ruppik - orcid: 0000-0001-9035-9217 - variants: - - {first: Benjamin, last: Ruppik} -- canonical: {first: Kranti, last: Chalamalasetti} - id: kranti-chalamalasetti - variants: - - {first: Chalamalasetti, last: Kranti} -- canonical: {first: Felicia, last: Körner} - id: felicia-koerner - variants: - - {first: Felicia, last: Koerner} -- canonical: {first: Pranav, last: A} - comment: UC Santa Cruz - id: pranav-a - similar: [pranav-anand] -- canonical: {first: Balamurali, last: AR} - variants: - - {first: Balamurali, last: A.R.} - - {first: Balamurali, last: A.R} -- canonical: {first: Solomon Teferra, last: Abate} - variants: - - {first: Solomon, last: Teferra Abate} - - {first: Solomon, last: Teferra} -- canonical: {first: Ramzi, last: Abbès} - variants: - - {first: Ramzi, last: Abbes} -- canonical: {first: Samir, last: AbdelRahman} - variants: - - {first: Samir, last: Abdelrahman} -- canonical: {first: Anne, last: Abeillé} - variants: - - {first: Anne, last: Abeille} -- canonical: {first: Steven, last: Abney} - id: steven-abney - variants: - - {first: Steve, last: Abney} - - {first: Steven P., last: Abney} -- canonical: {first: Victor, last: Abrash} - id: victor-abrash -- canonical: {first: José I., last: Abreu} - variants: - - {first: Jose I., last: Abreu} - - {first: José, last: Abreu} -- canonical: {first: Sarkis, last: Abrilian} - id: sarkis-abrilian -- canonical: {first: Ahmed, last: AbuRa’ed} - variants: - - {first: Ahmed, last: Abura’ed} -- canonical: {first: Esref, last: Adali} - variants: - - {first: Eşref, last: Adalı} - - {first: Eşref, last: Adali} -- canonical: {first: Gilles, last: Adda} - id: gilles-adda -- canonical: {first: Martine, last: Adda-Decker} - id: martine-adda-decker - variants: - - {first: Martine, last: Adda-decker} -- canonical: {first: David Ifeoluwa, last: Adelani} - variants: - - {first: David, last: Adelani} - - {first: David I., last: Adelani} -- canonical: {first: Wei, last: Ai} - id: wei-ai-umich - orcid: 0000-0001-6271-9430 - institution: University of Michigan -- canonical: {first: Wei, last: Ai} - id: wei-ai - comment: May refer to several people -- canonical: {first: Giovanni, last: Adorni} - id: giovanni-adorni -- canonical: {first: Geert, last: Adriaens} - id: geert-adriaens -- canonical: {first: Itziar, last: Aduriz} - id: itziar-aduriz -- canonical: {first: Rodrigo, last: Agerri} - id: rodrigo-agerri -- canonical: {first: Eneko, last: Agirre} - id: eneko-agirre -- canonical: {first: Željko, last: Agić} - variants: - - {first: Zeljko, last: Agic} -- canonical: {first: Shyam Sundar, last: Agrawal} - variants: - - {first: Shyam, last: Agrawal} -- canonical: {first: David W., last: Aha} - variants: - - {first: David, last: Aha} -- canonical: {first: Thomas, last: Ahlswede} - variants: - - {first: Thomas E., last: Ahlswede} -- canonical: {first: Elisabeth, last: Ahlsén} - variants: - - {first: Elisabeth, last: Ahlsen} -- canonical: {first: Faisal, last: Ahmad} - variants: - - {first: Faisal, last: Ahmed} -- canonical: {first: Wasi, last: Ahmad} - variants: - - {first: Wasi Uddin, last: Ahmad} -- canonical: {first: Tafseer, last: Ahmed} - variants: - - {first: Tafseer, last: Ahmed Khan} -- canonical: {first: Byung-Gyu, last: Ahn} - variants: - - {first: Byung Gyu, last: Ahn} -- canonical: {first: Gregory, last: Aist} - variants: - - {first: Greg, last: Aist} -- canonical: {first: Salah, last: Ait-Mokhtar} - variants: - - {first: Salah, last: Aït-Mokhtar} -- canonical: {first: Akiko, last: Aizawa} - variants: - - {first: Akiko N., last: Aizawa} -- canonical: {first: Gianmaria, last: Ajani} - id: gianmaria-ajani -- canonical: {first: Hiroyuki, last: Akama} - variants: - - {first: Hiroyuki, last: Akam} -- canonical: {first: Zheng, last: Yuan} - comment: Cambridge - id: zheng-yuan-cambridge - orcid: 0000-0003-2406-1708 - institution: University of Cambridge -- canonical: {first: Zheng, last: Yuan} - id: zheng-yuan - comment: May refer to several people -- canonical: {first: Mohammad, last: Akbar} - id: mohammad-akbar -- canonical: {first: A., last: Akilandeswari} - variants: - - {first: Akilandeswari, last: A} -- canonical: {first: Berfin, last: Aktaş} - variants: - - {first: Berfin, last: Aktas} -- canonical: {first: Khalid, last: Al Khatib} - variants: - - {first: Khalid, last: Al-Khatib} -- canonical: {first: Mosleh Hmoud, last: Al-Adhaileh} - variants: - - {first: Mosleh H., last: Al-Adhaileh} -- canonical: {first: Chau Minh, last: Pham} - orcid: 0009-0004-0435-7450 - institution: University of Maryland - variants: - - {first: Chau, last: Pham} -- canonical: {first: Adil, last: Al-Kufaishi} - id: adil-al-kufaishi -- canonical: {first: Yaser, last: Al-Onaizan} - variants: - - {first: Yaser, last: Al-onaizan} -- canonical: {first: Amal, last: Al-Saif} - variants: - - {first: Amal, last: Alsaif} -- canonical: {first: Vicent, last: Alabau} - variants: - - {first: Vicente, last: Alabau} -- canonical: {first: Jesujoba, last: Alabi} - variants: - - {first: Jesujoba O., last: Alabi} - - {first: Jesujoba Oluwadara, last: Alabi} -- canonical: {first: Danniel Liwanag, last: Alcantara} - variants: - - {first: Danniel, last: Alcantara} -- canonical: {first: Izaskun, last: Aldezabal} - id: izaskun-aldezabal -- canonical: {first: Iñaki, last: Alegría} - id: inaki-alegria - variants: - - {first: Iñaki, last: Alegria} - - {first: Inaki, last: Alegria} -- canonical: {first: Beatrice, last: Alex} - variants: - - {first: Bea, last: Alex} -- canonical: {first: Zoltán, last: Alexin} - id: zoltan-alexin -- canonical: {first: James, last: Allan} - comment: UMass Amherst - id: james-allan - similar: [james-allen] -- canonical: {first: James, last: Allen} - comment: Rochester - id: james-allen - similar: [james-allan] - variants: - - {first: James F., last: Allen} -- canonical: {first: Jonathan, last: Allen} - variants: - - {first: Jonathan, last: All} -- canonical: {first: Hector, last: Allende-Cid} - variants: - - {first: Héctor, last: Allende} - - {first: Héctor, last: Allende-Cid} -- canonical: {first: Fil, last: Alleva} - id: fil-alleva - variants: - - {first: Fileno, last: Alleva} -- canonical: {first: José João, last: Almeida} - variants: - - {first: Jose Joao, last: Almeida} -- canonical: {first: Miguel B., last: Almeida} - variants: - - {first: Miguel, last: Almeida} -- canonical: {first: Huda, last: Almuzaini} - id: huda-almuzaini -- canonical: {first: Jose M., last: Alonso} - variants: - - {first: Jose, last: Alonso} -- canonical: {first: Miguel A., last: Alonso} - variants: - - {first: Miguel, last: Alonso Pardo} - - {first: Miguel A., last: Alonso Pardo} -- canonical: {first: Laura, last: Alonso Alemany} - variants: - - {first: Laura, last: Alonso i Alemany} - - {first: Laura, last: Alonso} -- canonical: {first: Erick, last: Alphonse} - id: erick-alphonse -- canonical: {first: Hiyan, last: Alshawi} - variants: - - {first: Hiyan, last: Alsawi} -- canonical: {first: Romina, last: Altamirano} - variants: - - {first: Ivana Romina, last: Altamirano} -- canonical: {first: Mohamed, last: Altantawy} - variants: - - {first: Mohamed, last: AlTantawy} -- canonical: {first: Sandra, last: Aluísio} - variants: - - {first: Sandra Maria, last: Aluísio} - - {first: Sandra, last: Aluisio} - - {first: Sandra M., last: Aluísio} -- canonical: {first: Carlos, last: Alzate} - variants: - - {first: Carlos, last: Alzate Perez} -- canonical: {first: Diego Raphael, last: Amancio} - variants: - - {first: Diego, last: Amancio} -- canonical: {first: Shin-ya, last: Amano} - variants: - - {first: Sin-ya, last: Amano} -- canonical: {first: Fredy A., last: Amaya} - id: fredy-a-amaya -- canonical: {first: Juan Carlos, last: Amengual} - id: juan-carlos-amengual -- canonical: {first: Mohamed R., last: Amer} - variants: - - {first: Mohamed, last: Amer} -- canonical: {first: Enrique, last: Amigó} - variants: - - {first: Enrique, last: Amigo} -- canonical: {first: Massih R., last: Amini} - variants: - - {first: Massih-Reza, last: Amini} -- canonical: {first: Reinald Kim, last: Amplayo} - variants: - - {first: Reinald, last: Kim Amplayo} -- canonical: {first: Marcelo Adriano, last: Amâncio} - variants: - - {first: Marcelo, last: Amancio} -- canonical: {first: Pranav, last: Anand} - comment: Dayta AI - id: pranav-anand - similar: [pranav-a] -- canonical: {first: Animashree, last: Anandkumar} - variants: - - {first: Anima, last: Anandkumar} -- canonical: {first: Øistein E., last: Andersen} - variants: - - {first: Øistein, last: Andersen} -- canonical: {first: Andrew J., last: Anderson} - variants: - - {first: Andrew, last: Anderson} -- canonical: {first: Anne H., last: Anderson} - variants: - - {first: Anne, last: Anderson} -- canonical: {first: Kenneth M., last: Anderson} - variants: - - {first: Kenneth, last: Anderson} - - {first: Ken, last: Anderson} -- canonical: {first: Tim, last: Anderson} - variants: - - {first: Timothy, last: Anderson} -- canonical: {first: Winston N, last: Anderson} - variants: - - {first: Winston, last: Anderson} -- canonical: {first: Shinichi, last: Ando} - variants: - - {first: Sinichi, last: Ando} - - {first: Shin-ichi, last: Ando} - - {first: Shin-Ichi, last: Ando} -- canonical: {first: Elisabeth, last: Andre} - variants: - - {first: Elisabeth, last: André} -- canonical: {first: Alexandre, last: Andreewsky} - comment: LIMSI - id: alexandre-andreewsky - similar: [alexander-andreyewsky] -- canonical: {first: Alexander, last: Andreyewsky} - comment: IBM - id: alexander-andreyewsky - similar: [alexandre-andreewsky] -- canonical: {first: Peter, last: Anick} - variants: - - {first: Peter G., last: Anick} -- canonical: {first: Olatz, last: Ansa} - id: olatz-ansa -- canonical: {first: Georges, last: Antoniadis} - id: georges-antoniadis -- canonical: {first: Juliano D., last: Antonio} - id: juliano-d-antonio -- canonical: {first: Waqas, last: Anwar} - variants: - - {first: Muhammad Waqas, last: Anwar} -- canonical: {first: Douglas, last: Appelt} - variants: - - {first: Douglas E., last: Appelt} - - {first: Doug, last: Appelt} -- canonical: {first: Noriko H., last: Arai} - variants: - - {first: Noriko, last: Arai} -- canonical: {first: Kenji, last: Araki} - id: kenji-araki -- canonical: {first: Masahiro, last: Araki} - id: masahiro-araki -- canonical: {first: Mihael, last: Arcan} - variants: - - {first: Mihael, last: Arčan} -- canonical: {first: Nikolay, last: Arefyev} - variants: - - {first: Nikolay, last: Arefiev} -- canonical: {first: Nerea, last: Areta} - id: nerea-areta -- canonical: {first: Susan, last: Armstrong} - id: susan-armstrong - variants: - - {first: Susan, last: Warwick-Armstrong} - - {first: Susan, last: Warwick} -- canonical: {first: Alan R., last: Aronson} - variants: - - {first: Alan, last: Aronson} -- canonical: {first: Xabier, last: Arregi} - id: xabier-arregi -- canonical: {first: Jose Mari, last: Arriola} - id: jose-mari-arriola -- canonical: {first: Núria, last: Artigas} - id: nuria-artigas -- canonical: {first: Xabier, last: Artola} - id: xabier-artola -- canonical: {first: Kavosh, last: Asadi Atui} - variants: - - {first: Kavosh, last: Asadi} -- canonical: {first: Noushin Rezapour, last: Asheghi} - variants: - - {first: Noushin, last: Rezapour Asheghi} -- canonical: {first: Nicholas, last: Asher} - variants: - - {first: Nicolas, last: Asher} -- canonical: {first: Kevin D., last: Ashley} - variants: - - {first: Kevin, last: Ashley} -- canonical: {first: Àlex R., last: Atrio} - variants: - - {first: Àlex, last: Atrio} -- canonical: {first: Jordi, last: Atserias} - id: jordi-atserias - variants: - - {first: Jordi, last: Atserias Batalla} -- canonical: {first: Mohamed, last: Attia} - id: mohamed-attia -- canonical: {first: Eric, last: Atwell} - variants: - - {first: Eric Steven, last: Atwell} - - {first: Eric S., last: Atwell} -- canonical: {first: Steve, last: Austin} - id: steve-austin -- canonical: {first: Luciana Beatriz, last: Avila} - variants: - - {first: Luciana Beatriz, last: Ávila} - - {first: Luciana, last: Ávila} -- canonical: {first: Aiti, last: Aw} - variants: - - {first: AiTi, last: Aw} - - {first: Ai Ti, last: Aw} -- canonical: {first: Christelle, last: Ayache} - id: christelle-ayache -- canonical: {first: Necip Fazil, last: Ayan} - variants: - - {first: Necip, last: Fazil Ayan} -- canonical: {first: Damaris, last: Ayuso} - id: damaris-ayuso - variants: - - {first: Damaris M., last: Ayuso} -- canonical: {first: Saliha, last: Azzam} - id: saliha-azzam -- canonical: {first: Harald, last: Baayen} - variants: - - {first: R. Harald, last: Baayen} -- canonical: {first: Ismail, last: Babaoğlu} - variants: - - {first: Ismail, last: Babaoglu} -- canonical: {first: Ciprian, last: Bacalu} - id: ciprian-bacalu -- canonical: {first: Ngo Xuan, last: Bach} - variants: - - {first: Ngo, last: Xuan Bach} -- canonical: {first: Joan, last: Bachenko} - id: joan-bachenko -- canonical: {first: Daniel, last: Bachut} - id: daniel-bachut -- canonical: {first: Brett W., last: Bader} - variants: - - {first: Brett, last: Bader} -- canonical: {first: Adriana, last: Badulescu} - variants: - - {first: Adriana, last: Bădulescu} -- canonical: {first: Hee-Sook, last: Bae} - variants: - - {first: Hee Sook, last: Bae} -- canonical: {first: Erik, last: Baert} - id: erik-baert -- canonical: {first: Mirko, last: Baglioni} - id: mirko-baglioni -- canonical: {first: Jeanne, last: Baguenier Desormeaux} - variants: - - {first: Jeanne, last: Baguenier-Desormeaux} -- canonical: {first: Lalit R., last: Bahl} - id: lalit-r-bahl -- canonical: {first: Mohammad, last: Bahrani} - id: mohammad-bahrani -- canonical: {first: Ruzena, last: Bajcsy} - id: ruzena-bajcsy -- canonical: {first: Ondřej, last: Bajgar} - variants: - - {first: Ondrej, last: Bajgar} -- canonical: {first: Stylianos, last: Bakamidis} - id: stylianos-bakamidis -- canonical: {first: Collin F., last: Baker} - variants: - - {first: Collin, last: Baker} -- canonical: {first: George, last: Baker} - id: george-baker - variants: - - {first: George Arthur, last: Baker} -- canonical: {first: James, last: Baker} - variants: - - {first: James K., last: Baker} -- canonical: {first: Janet, last: Baker} - variants: - - {first: Janet M., last: Baker} -- canonical: {first: Kathryn, last: Baker} - variants: - - {first: Kathryn L., last: Baker} -- canonical: {first: Pedro, last: Balage Filho} - variants: - - {first: Pedro, last: Balage} - - {first: Pedro Paulo, last: Balage Filho} - - {first: Pedro P. Balage, last: Filho} - - {first: Pedro, last: Filho} -- canonical: {first: Alexandra, last: Balahur} - variants: - - {first: Alexandra, last: Balahur-Dobrescu} -- canonical: {first: Timothy, last: Baldwin} - variants: - - {first: Tim, last: Baldwin} -- canonical: {first: Catherine N., last: Ball} - variants: - - {first: Catherine, last: Ball} -- canonical: {first: Bruce W., last: Ballard} - id: bruce-w-ballard - variants: - - {first: Bruce, last: Ballard} -- canonical: {first: Rafael E., last: Banchs} - variants: - - {first: Rafael, last: Banchs} -- canonical: {first: Sivaji, last: Bandyopadhyay} - variants: - - {first: Sivaji, last: Bandopadhyay} - - {first: Sivaju, last: Bandyopadhyay} - - {first: Sivaji, last: B} -- canonical: {first: Eduardo R., last: Banga} - id: eduardo-r-banga - variants: - - {first: Eduardo, last: R. Banga} - - {first: Eduardo Rodríguez, last: Banga} -- canonical: {first: Srinivas, last: Bangalore} - variants: - - {first: B., last: Srinivas} - - {first: '', last: Srinivas} -- canonical: {first: Forrest, last: Bao} - variants: - - {first: Forrest Sheng, last: Bao} -- canonical: {first: Petra, last: Barancikova} - variants: - - {first: Petra, last: Barančíková} -- canonical: {first: Cătălina, last: Barbu} - id: catalina-barbu - variants: - - {first: Catalina, last: Barbu} -- canonical: {first: Verginica, last: Barbu Mititelu} - variants: - - {first: Verginica Barbu, last: Mititelu} -- canonical: {first: Anup, last: Barman} - variants: - - {first: Anup Kr., last: Barman} -- canonical: {first: John, last: Barnden} - id: john-barnden - variants: - - {first: John A., last: Barnden} -- canonical: {first: Marco, last: Baroni} - id: marco-baroni -- canonical: {first: Roberto, last: Barra-Chicote} - variants: - - {first: Roberto Barra, last: Chicote} -- canonical: {first: Sergio, last: Barrachina} - id: sergio-barrachina -- canonical: {first: Claude, last: Barras} - id: claude-barras -- canonical: {first: Caroline, last: Barriere} - variants: - - {first: Caroline, last: Barrière} -- canonical: {first: Chris, last: Barry} - id: chris-barry -- canonical: {first: Valentina, last: Bartalesi Lenzi} - id: valentina-bartalesi-lenzi -- canonical: {first: François, last: Barthélemy} - variants: - - {first: Francois, last: Barthelemy} -- canonical: {first: G. Edward, last: Barton} - variants: - - {first: G. Edward, last: 'Barton, Jr.'} -- canonical: {first: Guntis, last: Barzdins} - variants: - - {first: Guntis, last: Bārzdiņš} -- canonical: {first: Karine, last: Baschung} - id: karine-baschung -- canonical: {first: Roberto, last: Basili} - id: roberto-basili -- canonical: {first: Colin, last: Batchelor} - variants: - - {first: Colin R., last: Batchelor} -- canonical: {first: John, last: Bateman} - variants: - - {first: John A., last: Bateman} -- canonical: {first: Madeleine, last: Bates} - id: madeleine-bates - variants: - - {first: Madeline, last: Bates} -- canonical: {first: Riza Theresa, last: Batista-Navarro} - variants: - - {first: Riza, last: Batista-Navarro} -- canonical: {first: Anton, last: Batliner} - id: anton-batliner -- canonical: {first: Istvan, last: Batori} - id: istvan-batori -- canonical: {first: Marco, last: Battista} - id: marco-battista -- canonical: {first: William A., last: 'Baumgartner, Jr.'} - variants: - - {first: William A., last: Baumgartner Jr.} - - {first: William A., last: Baumgartner} - - {first: William, last: Baumgartner} - - {first: William, last: Baumgartner Jr.} -- canonical: {first: Samuel, last: Bayer} - variants: - - {first: Sam, last: Bayer} -- canonical: {first: Andrew David, last: Beale} - variants: - - {first: Andrew, last: David} -- canonical: {first: David L., last: Bean} - variants: - - {first: David, last: Bean} -- canonical: {first: John, last: Bear} - id: john-bear -- canonical: {first: Hannah, last: Bechara} - variants: - - {first: Hanna, last: Béchara} - - {first: Hanna, last: Bechara} - - {first: Hannah, last: Béchara} -- canonical: {first: Frederic, last: Bechet} - id: frederic-bechet - variants: - - {first: Frédéric, last: Bechét} - - {first: Frédéric, last: Béchet} - - {first: Frederic, last: Béchet} -- canonical: {first: Chedi, last: Bechikh Ali} - variants: - - {first: Chedi, last: Bechikh} -- canonical: {first: Daniel, last: Beck} - variants: - - {first: Daniel Emilio, last: Beck} -- canonical: {first: Lee, last: Becker} - variants: - - {first: Lee A., last: Becker} -- canonical: {first: Russell, last: Beckley} - variants: - - {first: Russ, last: Beckley} -- canonical: {first: Paul, last: Bedaride} - variants: - - {first: Paul, last: Bédaride} -- canonical: {first: Cosmin Adrian, last: Bejan} - variants: - - {first: Cosmin, last: Adrian Bejan} - - {first: Cosmin, last: Bejan} -- canonical: {first: Núria, last: Bel} - variants: - - {first: Nuria, last: Bel} -- canonical: {first: Gemma, last: Bel-Enguix} - variants: - - {first: Gemma Bel, last: Enguix} - - {first: Gemma, last: Bel Enguix} -- canonical: {first: Julie, last: Belião} - variants: - - {first: Julie, last: Beliao} -- canonical: {first: Narjès, last: Bellamine Ben Saoud} - variants: - - {first: Narjès Bellamine Ben, last: Saoud} -- canonical: {first: Patrice, last: Bellot} - id: patrice-bellot -- canonical: {first: Valérie, last: Bellynck} - variants: - - {first: Valerie, last: Bellynck} -- canonical: {first: Islam, last: Beltagy} - id: islam-beltagy -- canonical: {first: Robert S., last: Belvin} - variants: - - {first: Robert, last: Belvin} - - {first: Robert S., last: Melvin} -- canonical: {first: Anja, last: Belz} - variants: - - {first: Anya, last: Belz} -- canonical: {first: Roni, last: Ben Aharon} - variants: - - {first: Roni, last: Ben-Aharon} -- canonical: {first: Abdelmajid, last: Ben Hamadou} - variants: - - {first: Abdelmajid, last: Ben hamadou} - - {first: Abdelmajid, last: Benhamadou} - - {first: Abdelmajid-Lin, last: Ben Hamadou} -- canonical: {first: Abderrahim, last: Benabbou} - id: abderrahim-benabbou -- canonical: {first: Farah, last: Benamara} - variants: - - {first: Farah, last: Beanamara} - - {first: Farah, last: Benamara Zitoune} -- canonical: {first: Chomicha, last: Bendahman} - id: chomicha-bendahman -- canonical: {first: Emily M., last: Bender} - variants: - - {first: Emily, last: Bender} -- canonical: {first: José-Miguel, last: Benedí} - id: jose-miguel-benedi - variants: - - {first: Jose-Miguel, last: Benedi} - - {first: José Miguel, last: Benedí} - - {first: José Miguel, last: Benedí Ruíz} - - {first: José-M., last: Benedí} - - {first: José Miguel, last: Benedi Ruiz} - - {first: José-Miguel, last: Benedí Ruíz} -- canonical: {first: Simon, last: Benigeri} - id: simon-benigeri - variants: - - {first: Simon, last: Ben Igeri} -- canonical: {first: Andrew, last: Bennett} - id: andrew-bennett - similar: [andrew-bennetts] -- canonical: {first: Paul, last: Bennett} - variants: - - {first: Paul N., last: Bennett} -- canonical: {first: Andrew, last: Bennetts} - id: andrew-bennetts - similar: [andrew-bennett] -- canonical: {first: Alexander, last: Berg} - variants: - - {first: Alex, last: Berg} - - {first: Alexander C, last: Berg} -- canonical: {first: Tamara, last: Berg} - variants: - - {first: Tamara L., last: Berg} - - {first: Tamara L, last: Berg} -- canonical: {first: Carole, last: Bergamini} - id: carole-bergamini -- canonical: {first: Adam, last: Berger} - variants: - - {first: Adam L., last: Berger} -- canonical: {first: Maria, last: Berger} - variants: - - {first: Maria, last: Moritz} -- canonical: {first: Raffaella, last: Bernardi} - id: raffaella-bernardi -- canonical: {first: Niels Ole, last: Bernsen} - variants: - - {first: Niels Ole, last: Bernse} - - {first: Niels O., last: Bernsen} -- canonical: {first: Elisa, last: Bertino} - id: elisa-bertino -- canonical: {first: Núria, last: Bertomeu} - variants: - - {first: Nuria, last: Bertomeu} - - {first: Núria, last: Bertomeu Castelló} - - {first: Núria Bertomeu, last: Castelló} -- canonical: {first: Robert C., last: Berwick} - variants: - - {first: Robert, last: Berwick} - - {first: Robert Cregar, last: Berwick} -- canonical: {first: Gabriel G., last: Bes} - id: gabriel-g-bes - variants: - - {first: Gabriel G., last: Bès} - - {first: Gabriel, last: Bès} -- canonical: {first: Laurent, last: Besacier} - id: laurent-besacier -- canonical: {first: Štefan, last: Beňuš} - variants: - - {first: Stefan, last: Benus} - - {first: S̆tefan, last: Ben̆us̆} -- canonical: {first: Akshar, last: Bharati} - variants: - - {first: Akshar, last: Bharathi} -- canonical: {first: Irshad, last: Bhat} - variants: - - {first: Irshad A., last: Bhat} -- canonical: {first: Rajesh, last: Bhat} - id: rajesh-bhat - similar: [rajesh-bhatt] -- canonical: {first: Riyaz Ahmad, last: Bhat} - variants: - - {first: Riyaz A., last: Bhat} -- canonical: {first: Rajesh, last: Bhatt} - comment: UMass Amherst - id: rajesh-bhatt - similar: [rajesh-bhat] -- canonical: {first: Pushpak, last: Bhattacharyya} - variants: - - {first: Pushpak, last: Bhattacharya} -- canonical: {first: Virendrakumar, last: Bhavsar} - variants: - - {first: Virendra, last: Bhavsar} -- canonical: {first: Plaban Kr., last: Bhowmick} - variants: - - {first: Plaban, last: Bhowmick} -- canonical: {first: Ergun, last: Bicici} - variants: - - {first: Ergun, last: Biçici} -- canonical: {first: Eckhard, last: Bick} - id: eckhard-bick -- canonical: {first: Timothy W., last: Bickmore} - variants: - - {first: Timothy, last: Bickmore} -- canonical: {first: Chris, last: Biemann} - variants: - - {first: Christian, last: Biemann} -- canonical: {first: Janusz Stanisław, last: Bien} - variants: - - {first: Janusz Stanislaw, last: Bien} - - {first: Janusz S., last: Bień} - - {first: Janusz S., last: Bien} -- canonical: {first: Marie A., last: Bienkowski} - variants: - - {first: Marie, last: Bienkowski} -- canonical: {first: Alan W., last: Biermann} - id: alan-w-biermann - variants: - - {first: Alan, last: Biermann} -- canonical: {first: Jeffrey P., last: Bigham} - variants: - - {first: Jeffrey, last: Bigham} -- canonical: {first: Daniel M., last: Bikel} - variants: - - {first: Daniel, last: Bikel} - - {first: Dan, last: Bikel} -- canonical: {first: Dimitrios, last: Bilidas} - variants: - - {first: Dimitris, last: Bilidas} -- canonical: {first: Eric, last: Bilinski} - variants: - - {first: Éric, last: Bilinski} -- canonical: {first: Mokhtar B., last: Billami} - variants: - - {first: Mokhtar-Boumedyen, last: Billami} -- canonical: {first: Jeff, last: Bilmes} - variants: - - {first: Jeff A., last: Bilmes} -- canonical: {first: Matthew W., last: Bilotti} - variants: - - {first: Matthew, last: Bilotti} -- canonical: {first: Milan, last: Bily} - variants: - - {first: Milan, last: Bílý} -- canonical: {first: Diana, last: Binnenpoorte} - id: diana-binnenpoorte -- canonical: {first: Elizabeth, last: Bishop} - id: elizabeth-bishop -- canonical: {first: Alan W., last: Black} - id: alan-w-black - variants: - - {first: Alan, last: Black} - - {first: Alan W, last: Black} -- canonical: {first: Ezra, last: Black} - id: ezra-black - variants: - - {first: Ezra W., last: Black} -- canonical: {first: Lois M., last: Black} - variants: - - {first: Lois, last: Black} -- canonical: {first: William J., last: Black} - id: william-j-black - variants: - - {first: William J, last: Black} - - {first: William, last: Black} -- canonical: {first: Frédéric, last: Blain} - variants: - - {first: Frederic, last: Blain} -- canonical: {first: Hervé, last: Blanchon} - variants: - - {first: Herve, last: Blanchon} -- canonical: {first: Christian, last: Blaschke} - id: christian-blaschke -- canonical: {first: Nate, last: Blaylock} - id: nate-blaylock -- canonical: {first: David, last: Blei} - variants: - - {first: David M., last: Blei} -- canonical: {first: Hatte, last: Blejer} - variants: - - {first: Hatte R., last: Blejer} -- canonical: {first: André, last: Blessing} - variants: - - {first: Andre, last: Blessing} -- canonical: {first: Hans Ulrich, last: Block} - variants: - - {first: Hans-Ulrich, last: Block} -- canonical: {first: Marsden S., last: Blois} - id: marsden-s-blois -- canonical: {first: Phil, last: Blunsom} - variants: - - {first: Philip, last: Blunsom} -- canonical: {first: Tamara, last: Bobić} - variants: - - {first: Tamara, last: Bobic} -- canonical: {first: Daniel, last: Bobrow} - variants: - - {first: Daniel G., last: Bobrow} -- canonical: {first: Robert, last: Bobrow} - id: robert-bobrow - variants: - - {first: Robert J., last: Bobrow} - - {first: Rusty, last: Bobrow} -- canonical: {first: Péter Pál, last: Boda} - variants: - - {first: Péter, last: Boda} -- canonical: {first: Adams B., last: Bodomo} - variants: - - {first: Adams, last: Bodomo} -- canonical: {first: Guido, last: Boella} - id: guido-boella -- canonical: {first: Katharina, last: Boesefeldt} - id: katharina-boesefeldt -- canonical: {first: Christopher, last: Bogart} - variants: - - {first: Chris, last: Bogart} -- canonical: {first: Branimir, last: Boguraev} - id: branimir-boguraev - variants: - - {first: Branimir K., last: Boguraev} - - {first: Bran, last: Boguraev} -- canonical: {first: Igor, last: Boguslavsky} - variants: - - {first: Igor M., last: Boguslavsky} -- canonical: {first: Dan, last: Bohus} - id: dan-bohus - variants: - - {first: Dan, last: Bohuş} -- canonical: {first: Sean, last: Boisen} - id: sean-boisen -- canonical: {first: Christian, last: Boitet} - id: christian-boitet -- canonical: {first: Loic, last: Boizou} - variants: - - {first: Loïc, last: Boizou} -- canonical: {first: Ondřej, last: Bojar} - variants: - - {first: Ondrej, last: Bojar} -- canonical: {first: Julie E., last: Boland} - variants: - - {first: Julie, last: Boland} -- canonical: {first: Daniel, last: Bolaños} - variants: - - {first: Daniel, last: Bolanos} -- canonical: {first: Gemma, last: Boleda} - variants: - - {first: Gemma, last: Boleda Torrent} -- canonical: {first: Andrea, last: Bolognesi} - id: andrea-bolognesi -- canonical: {first: Igor A., last: Bolshakov} - variants: - - {first: Igor, last: Bolshakov} -- canonical: {first: Antonio, last: Bonafonte} - id: antonio-bonafonte -- canonical: {first: Jean-François, last: Bonastre} - id: jean-francois-bonastre - variants: - - {first: Jean-Francois, last: Bonastre} -- canonical: {first: Guillaume, last: Bonfante} - variants: - - {first: Guillame, last: Bonfante} -- canonical: {first: Claire, last: Bonial} - variants: - - {first: Claire N., last: Bonial} -- canonical: {first: Marco Aldo Piccolino, last: Boniforti} - variants: - - {first: Marco Aldo, last: Piccolino Boniforti} -- canonical: {first: Hélène, last: Bonneau-Maynard} - id: helene-bonneau-maynard - variants: - - {first: Hélène, last: Maynard} -- canonical: {first: Kalina, last: Bontcheva} - id: kalina-bontcheva - similar: [katina-bontcheva] -- canonical: {first: Katina, last: Bontcheva} - id: katina-bontcheva - similar: [kalina-bontcheva] -- canonical: {first: German, last: Bordel} - id: german-bordel - variants: - - {first: Germán, last: Bordel} -- canonical: {first: Emanuela, last: Boroş} - variants: - - {first: Emanuela, last: Boroș} - - {first: Emanuela, last: Boros} -- canonical: {first: Tiberiu, last: Boroş} - variants: - - {first: Tiberiu, last: Boroș} - - {first: Tiberiu, last: Boros} -- canonical: {first: Sonja, last: Bosch} - variants: - - {first: Sonja E., last: Bosch} -- canonical: {first: Matko, last: Bosnjak} - variants: - - {first: Matko, last: Bošnjak} -- canonical: {first: Elizabeth C., last: Botha} - id: elizabeth-c-botha -- canonical: {first: Alexandre, last: Bouchard-Côté} - variants: - - {first: Alexandre, last: Bouchard} -- canonical: {first: Abdessalam, last: Bouchekif} - variants: - - {first: Abdesselam, last: Bouchekif} -- canonical: {first: Mohamed Mahdi, last: Boudabous} - variants: - - {first: Mohamed, last: Boudabous} -- canonical: {first: Aicha, last: Bouhjar} - variants: - - {first: Aïcha, last: Bouhjar} -- canonical: {first: Pierrette, last: Bouillon} - id: pierrette-bouillon -- canonical: {first: Philippe, last: Boula de Mareüil} - id: philippe-boula-de-mareuil - variants: - - {first: Philippe Boula, last: de Mareüil} -- canonical: {first: Gilles, last: Boulianne} - id: gilles-boulianne -- canonical: {first: Paolo, last: Bouquet} - id: paolo-bouquet -- canonical: {first: Laurent, last: Bourbeau} - id: laurent-bourbeau -- canonical: {first: Caroline, last: Bousquet-Vernhettes} - variants: - - {first: Caroline, last: Bousquet} -- canonical: {first: Lou, last: Boves} - variants: - - {first: Louis, last: Boves} -- canonical: {first: Samuel, last: Bowman} - variants: - - {first: Samuel R., last: Bowman} - - {first: Sam, last: Bowman} -- canonical: {first: Stephen, last: Boxwell} - variants: - - {first: Stephen A., last: Boxwell} -- canonical: {first: Richard D., last: Boyce} - variants: - - {first: Richard, last: Boyce} -- canonical: {first: Andrew, last: Boyd} - variants: - - {first: Andrew D., last: Boyd} -- canonical: {first: Amber, last: Boydstun} - variants: - - {first: Amber E., last: Boydstun} -- canonical: {first: Kristy, last: Boyer} - variants: - - {first: Kristy Elizabeth, last: Boyer} -- canonical: {first: Cem, last: Bozsahin} - variants: - - {first: Cem, last: Bozşahin} - - {first: H. Cem, last: Bozsahin} -- canonical: {first: Olivier, last: Boëffard} - variants: - - {first: Olivier, last: Boeffard} -- canonical: {first: Lisa, last: Braden-Harder} - variants: - - {first: Lisa C., last: Braden-Harder} -- canonical: {first: Deborah, last: Brady} - variants: - - {first: Deb, last: Brady} -- canonical: {first: Annelies, last: Braffort} - id: annelies-braffort -- canonical: {first: S.R.K., last: Branavan} - variants: - - {first: S. R. K., last: Branavan} -- canonical: {first: António, last: Branco} - variants: - - {first: Antonio, last: Branco} - - {first: Antonio H., last: Branco} - - {first: António Horta, last: Branco} -- canonical: {first: Andrew, last: Brasher} - id: andrew-brasher -- canonical: {first: Harry, last: Bratt} - id: harry-bratt -- canonical: {first: Adrian, last: Braşoveanu} - variants: - - {first: Adrian, last: Brasoveanu} -- canonical: {first: Eric, last: Breck} - variants: - - {first: Eric J., last: Breck} -- canonical: {first: D. S., last: Bree} - variants: - - {first: D.S., last: Bree} -- canonical: {first: Jason, last: Brenier} - variants: - - {first: Jason M., last: Brenier} -- canonical: {first: Susan E., last: Brennan} - variants: - - {first: Susan, last: Brennan} -- canonical: {first: Xavier, last: Briffault} - id: xavier-briffault -- canonical: {first: Ted, last: Briscoe} - id: ted-briscoe - variants: - - {first: Edward, last: Briscoe} -- canonical: {first: George Aaron, last: Broadwell} - variants: - - {first: Aaron, last: Broadwell} - - {first: G. Aaron, last: Broadwell} -- canonical: {first: Daan, last: Broeder} - id: daan-broeder -- canonical: {first: Michael K., last: Brown} - variants: - - {first: Michael, last: Brown} -- canonical: {first: Peter F., last: Brown} - id: peter-f-brown -- canonical: {first: Ralf D., last: Brown} - variants: - - {first: Ralf, last: Brown} -- canonical: {first: Susan Windisch, last: Brown} - variants: - - {first: Susan, last: Windisch Brown} - - {first: Susan, last: Brown} - - {first: Susan W., last: Brown} -- canonical: {first: Rebecca, last: Bruce} - variants: - - {first: Rebecca F., last: Bruce} -- canonical: {first: Hennie, last: Brugman} - id: hennie-brugman -- canonical: {first: Ernst, last: Buchberger} - id: ernst-buchberger -- canonical: {first: Chris, last: Buckley} - id: chris-buckley -- canonical: {first: Sven, last: Buechel} - variants: - - {first: Sven, last: Büchel} -- canonical: {first: Alberto, last: Bugarín Diz} - variants: - - {first: Alberto, last: Bugarín} - - {first: Alberto, last: Bugarin} -- canonical: {first: Trung, last: Bui} - variants: - - {first: Trung H., last: Bui} -- canonical: {first: Florin, last: Bulgarov} - variants: - - {first: Florin Adrian, last: Bulgarov} -- canonical: {first: Barbara, last: Bullock} - variants: - - {first: Barbara E., last: Bullock} -- canonical: {first: Razvan, last: Bunescu} - variants: - - {first: Razvan C., last: Bunescu} -- canonical: {first: Harry, last: Bunt} - id: harry-bunt -- canonical: {first: Laura, last: Burdick} - variants: - - {first: Laura, last: Wendlandt} -- canonical: {first: Gaston, last: Burek} - variants: - - {first: Gaston G., last: Burek} -- canonical: {first: Clint, last: Burfoot} - variants: - - {first: Clinton, last: Burfoot} -- canonical: {first: John D., last: Burger} - comment: MITRE - id: john-d-burger - similar: [john-f-burger] -- canonical: {first: John F., last: Burger} - comment: System Development Corporation - id: john-f-burger - similar: [john-d-burger] -- canonical: {first: Christopher J.C., last: Burges} - variants: - - {first: Chris J.C., last: Burges} -- canonical: {first: Diego A., last: Burgos} - variants: - - {first: Diego, last: Burgos} -- canonical: {first: Bianka, last: Buschbeck} - id: bianka-buschbeck - variants: - - {first: Bianka, last: Buschbeck-Wolf} -- canonical: {first: Andrei, last: Butnaru} - variants: - - {first: Andrei M., last: Butnaru} -- canonical: {first: Bill, last: Byrne} - comment: University of Cambridge - id: bill-byrne -- canonical: {first: Bill, last: Byrne} - comment: UCSD Ph.d; https://www.linkedin.com/in/billb/ - id: bill-byrne-ucsd -- canonical: {first: Donna, last: Byron} - id: donna-byron - variants: - - {first: Donna K., last: Byron} -- canonical: {first: Tamás, last: Bíró} - variants: - - {first: Tamás, last: Biró} -- canonical: {first: Benjamin, last: Börschinger} - variants: - - {first: Benjamin, last: Boerschinger} -- canonical: {first: Kenneth S., last: Bøgh} - variants: - - {first: Kenneth, last: Bøgh} -- canonical: {first: Alena, last: Bŏhmová} - variants: - - {first: Alena, last: Bohmova} - - {first: Alena, last: Böhmová} -- canonical: {first: Sheila, last: C. M. de Sousa} - variants: - - {first: Sheila C.M., last: de Sousa} -- canonical: {first: José G., last: C. de Souza} - variants: - - {first: José G.C., last: de Souza} - - {first: Jose G.C., last: de Souza} - - {first: José Guilherme, last: Camargo de Souza} - - {first: José G., last: Camargo de Souza} - - {first: José Guilherme, last: C. de Souza} -- canonical: {first: Malarkodi, last: C.S.} - variants: - - {first: Malarkodi, last: C.S} - - {first: CS., last: Malarkodi} -- canonical: {first: Joao Paulo, last: Cabral} - variants: - - {first: João P., last: Cabral} -- canonical: {first: Luís Miguel, last: Cabral} - variants: - - {first: Luís, last: Cabral} -- canonical: {first: Luis-Adrián, last: Cabrera-Diego} - variants: - - {first: Luis Adrián, last: Cabrera-Diego} -- canonical: {first: Maria Teresa, last: Cabré} - id: maria-teresa-cabre - variants: - - {first: M. Teresa, last: Cabré} - - {first: Teresa, last: Cabré} -- canonical: {first: Whitney L., last: Cade} - variants: - - {first: Whitney, last: Cade} -- canonical: {first: Anais, last: Cadilhac} - variants: - - {first: Anaïs, last: Cadilhac} -- canonical: {first: Michael J., last: Cafarella} - variants: - - {first: Michael, last: Cafarella} -- canonical: {first: Lynne, last: Cahill} - id: lynne-cahill - variants: - - {first: Lynne J., last: Cahill} -- canonical: {first: Dongfeng, last: Cai} - variants: - - {first: DongFeng, last: Cai} -- canonical: {first: Jun Fu, last: Cai} - variants: - - {first: Junfu, last: Cai} -- canonical: {first: Qingqing, last: Cai} - variants: - - {first: Qing-qing, last: Cai} -- canonical: {first: Jo, last: Calder} - variants: - - {first: Jonathan, last: Calder} -- canonical: {first: Mary Elaine, last: Califf} - id: mary-elaine-califf -- canonical: {first: Charles B., last: Callaway} - variants: - - {first: Charles, last: Callaway} -- canonical: {first: Diego, last: Calvanese} - id: diego-calvanese -- canonical: {first: Nicoletta, last: Calzolari} - id: nicoletta-calzolari - variants: - - {first: Nicoletta Calzolari, last: Zamorani} -- canonical: {first: Jose, last: Camacho-Collados} - variants: - - {first: José, last: Camacho-Collados} -- canonical: {first: Ellen, last: Campana} - id: ellen-campana -- canonical: {first: Joseph P., last: Campbell} - variants: - - {first: Joseph, last: Campbell} -- canonical: {first: Francisco, last: Campillo} - variants: - - {first: Francisco Campillo, last: Díaz} -- canonical: {first: Doğan, last: Can} - variants: - - {first: Dogan, last: Can} -- canonical: {first: Arnaldo, last: 'Candido, Jr.'} - variants: - - {first: Arnaldo, last: Candido Jr.} - - {first: Arnaldo, last: Candido Jr} - - {first: Arnaldo, last: Candido} -- canonical: {first: Marie, last: Candito} - variants: - - {first: Marie-Helene, last: Candito} - - {first: Marie-Hélène, last: Candito} -- canonical: {first: Amparo Elizabeth, last: Cano Basave} - variants: - - {first: Amparo Elizabeth, last: Cano-Basave} -- canonical: {first: Xuan-Nga, last: Cao} - variants: - - {first: Xuân-Nga, last: Cao} - - {first: Xuân-Nga Cao, last: Kam} -- canonical: {first: Amedeo, last: Cappelli} - id: amedeo-cappelli -- canonical: {first: George, last: Carayannis} - id: george-carayannis -- canonical: {first: José María, last: Carazo} - variants: - - {first: José-María, last: Carazo} -- canonical: {first: Sandra, last: Carberry} - variants: - - {first: M. Sandra, last: Carberry} -- canonical: {first: Jaime G., last: Carbonell} - comment: CMU - id: jaime-g-carbonell - similar: [jaime-r-carbonell] - variants: - - {first: Jaime, last: Carbonell} - - {first: Jaime G., last: Carbonell Jr} -- canonical: {first: Jaime R., last: Carbonell} - comment: BBN; d. 1973 - id: jaime-r-carbonell - similar: [jaime-g-carbonell] -- canonical: {first: Antonio, last: Cardenal} - variants: - - {first: Antonio, last: Cardenal-Lopez} -- canonical: {first: Claire, last: Cardie} - id: claire-cardie -- canonical: {first: Patrick, last: Cardinal} - id: patrick-cardinal -- canonical: {first: Paula, last: Cardoso} - id: paula-cardoso - variants: - - {first: Paula C. Figueira, last: Cardoso} - - {first: Paula C. F., last: Cardoso} -- canonical: {first: George, last: Caridakis} - id: george-caridakis -- canonical: {first: Kathleen M., last: Carley} - variants: - - {first: Kathleen, last: Carley} -- canonical: {first: Mark, last: Carman} - variants: - - {first: Mark J., last: Carman} - - {first: Mark James, last: Carman} -- canonical: {first: Jorge, last: Carrillo de Albornoz} - variants: - - {first: Jorge Carrillo, last: de Albornoz} -- canonical: {first: Jeremy J., last: Carroll} - variants: - - {first: Jeremy, last: Carroll} -- canonical: {first: John A., last: Carroll} - comment: Cambridge, Sussex - id: john-a-carroll - similar: [john-b-carroll] - variants: - - {first: John, last: Carroll} -- canonical: {first: John B., last: Carroll} - comment: UNC - id: john-b-carroll - similar: [john-a-carroll] -- canonical: {first: Julie, last: Carson-Berndsen} - variants: - - {first: Julie, last: Carson} - - {first: Julle, last: Carson-Berndsen} -- canonical: {first: David, last: Carter} - variants: - - {first: David M., last: Carter} -- canonical: {first: Christopher, last: Caruso} - variants: - - {first: Chris, last: Caruso} -- canonical: {first: Vitor, last: Carvalho} - variants: - - {first: Vitor R., last: Carvalho} -- canonical: {first: Francisco, last: Casacuberta} - id: francisco-casacuberta -- canonical: {first: Bernardino, last: Casas} - id: bernardino-casas -- canonical: {first: Helena de Medeiros, last: Caseli} - variants: - - {first: Helena, last: de Medeiros Caseli} -- canonical: {first: Arantza, last: Casillas} - id: arantza-casillas -- canonical: {first: Asunción, last: Castaño} - id: asuncion-castano -- canonical: {first: José, last: Castaño} - variants: - - {first: José M., last: Castaño} -- canonical: {first: João Miguel, last: Casteleiro} - variants: - - {first: João, last: Casteleiro} -- canonical: {first: Núria, last: Castell} - variants: - - {first: Nuria, last: Castell} -- canonical: {first: Antonio, last: Castellanos} - id: antonio-castellanos -- canonical: {first: Eric, last: Castelli’} - variants: - - {first: Eric, last: Castelli} -- canonical: {first: Irene, last: Castellón} - variants: - - {first: Irene, last: Castellon} -- canonical: {first: Julio, last: Castillo} - variants: - - {first: Julio Javier, last: Castillo} -- canonical: {first: Thiago, last: Castro Ferreira} - variants: - - {first: Thiago, last: Ferreira} -- canonical: {first: Maria Lucia, last: Castro Jorge} - variants: - - {first: Maria Lucía Castro, last: Jorge} - - {first: Maria Lucía, last: Castro Jorge} -- canonical: {first: Maria Jose, last: Castro-Bleda} - variants: - - {first: María José, last: Castro} - - {first: María-José, last: Castro} -- canonical: {first: Dolors, last: Català} - variants: - - {first: Dolors, last: Catala} -- canonical: {first: Maria Novella, last: Catarsi} - id: maria-novella-catarsi -- canonical: {first: Roberta, last: Catizone} - id: roberta-catizone -- canonical: {first: Gabriela, last: Cavaglià} - variants: - - {first: Gabriela, last: Cavaglia} -- canonical: {first: Alexandru, last: Ceauşu} - variants: - - {first: Alexandru, last: Ceausu} -- canonical: {first: Guillermo A., last: Cecchi} - variants: - - {first: Guillermo, last: Cecchi} -- canonical: {first: Ali Hadian, last: Cefidekhanie} - variants: - - {first: Ali, last: Hadian} -- canonical: {first: Pedro Concejero, last: Cerezo} - variants: - - {first: Pedro, last: Concejero} -- canonical: {first: Scott A., last: Hale} - variants: - - {first: Scott, last: Hale} -- canonical: {first: Jeong-Won, last: Cha} - variants: - - {first: Jeongwon, last: Cha} -- canonical: {first: Seungho, last: Cha} - id: seungho-cha -- canonical: {first: Joyce, last: Chai} - variants: - - {first: Joyce Yue, last: Chai} - - {first: Joyce Y., last: Chai} -- canonical: {first: Kian Ming A., last: Chai} - variants: - - {first: Kian Ming Adam, last: Chai} -- canonical: {first: Aimilios, last: Chalamandaris} - id: aimilios-chalamandaris - variants: - - {first: Chalamandaris, last: Aimilios} -- canonical: {first: Nathanael, last: Chambers} - variants: - - {first: Nathan, last: Chambers} -- canonical: {first: Gary K. K., last: Chan} - id: gary-k-k-chan -- canonical: {first: Kwok-Ping, last: Chan} - variants: - - {first: Kwok Ping, last: Chan} -- canonical: {first: Samuel W. K., last: Chan} - id: samuel-w-k-chan - variants: - - {first: Samuel W.K., last: Chan} -- canonical: {first: Brian J., last: Chandler} - id: brian-j-chandler - variants: - - {first: Brian, last: Chandler} -- canonical: {first: Sharath, last: Chandra Guntuku} - variants: - - {first: Sharath Chandra, last: Guntuku} -- canonical: {first: Raman, last: Chandrasekar} - id: raman-chandrasekar -- canonical: {first: Muthu Kumar, last: Chandrasekaran} - variants: - - {first: Muthu, last: Kumar Chandrasekaran} -- canonical: {first: Angel, last: Chang} - variants: - - {first: Angel X., last: Chang} -- canonical: {first: Baobao, last: Chang} - variants: - - {first: Bao-Bao, last: Chang} -- canonical: {first: Ching Yun, last: Chang} - variants: - - {first: Ching-Yun, last: Chang} -- canonical: {first: Edward Y., last: Chang} - variants: - - {first: Edward, last: Chang} -- canonical: {first: Jason S., last: Chang} - variants: - - {first: Jason, last: Chang} - - {first: Jason J. S., last: Chang} - - {first: Jason J.S., last: Chang} - - {first: Jason J., last: Chang} -- canonical: {first: Jim, last: Chang} - variants: - - {first: Jimmy, last: Chang} -- canonical: {first: Joseph Z., last: Chang} - variants: - - {first: Joseph, last: Chang} - - {first: Joseph Z, last: Chang} -- canonical: {first: Jyun-Sheng, last: Chang} - variants: - - {first: Jyun-sheng, last: Chang} -- canonical: {first: Kai-min Kevin, last: Chang} - variants: - - {first: Kai-Min, last: Chang} - - {first: Kai-min K., last: Chang} -- canonical: {first: Li-Li, last: Chang} - variants: - - {first: Li-li, last: Chang} -- canonical: {first: Li-Ping, last: Chang} - variants: - - {first: Li-ping, last: Chang} -- canonical: {first: Pi-Chuan, last: Chang} - variants: - - {first: Pichuan, last: Chang} -- canonical: {first: Shih-Fu, last: Chang} - variants: - - {first: Shih-fu, last: Chang} -- canonical: {first: Yu-wei, last: Chang} - variants: - - {first: Yu-Wei, last: Chang} -- canonical: {first: F. Y. August, last: Chao} - variants: - - {first: F.Y. August, last: Chao} -- canonical: {first: Wenhan, last: Chao} - variants: - - {first: WenHan, last: Chao} - - {first: Wen-Han, last: Chao} -- canonical: {first: Wendy, last: Chapman} - variants: - - {first: Wendy W, last: Chapman} -- canonical: {first: Marcela, last: Charfuelan} - variants: - - {first: Marcela, last: Charfuelán} -- canonical: {first: Eric, last: Charton} - variants: - - {first: Éric, last: Charton} -- canonical: {first: Noël, last: Chateau} - id: noel-chateau -- canonical: {first: Niladri, last: Chatterjee} - id: niladri-chatterjee -- canonical: {first: Rajen, last: Chatterjee} - variants: - - {first: Rajan, last: Chatterjee} -- canonical: {first: Jacques, last: Chauché} - id: jacques-chauche -- canonical: {first: Himani, last: Chaudhry} - variants: - - {first: Himani, last: Chaudhary} -- canonical: {first: Bidyut Baran, last: Chaudhuri} - id: bidyut-baran-chaudhuri - variants: - - {first: Bidyut B., last: Chaudhuri} -- canonical: {first: Chiwei, last: Che} - id: chiwei-che -- canonical: {first: Alvin Cheng-Hsien, last: Chen} - variants: - - {first: Cheng-Hsien, last: Chen} -- canonical: {first: Catherine, last: Chen} - comment: UC Berkley - id: catherine-chen-ucberkley -- canonical: {first: Catherine, last: Chen} - comment: Brown - id: catherine-chen-bu -- canonical: {first: Chao-Jan, last: Chen} - variants: - - {first: Chao-jan, last: Chen} -- canonical: {first: Cheng-Der, last: Chen} - variants: - - {first: Cheng-der, last: Chen} -- canonical: {first: Feng-Yi, last: Chen} - variants: - - {first: Feng-yi, last: Chen} -- canonical: {first: Francine, last: Chen} - variants: - - {first: Francine R., last: Chen} -- canonical: {first: Helen Kaiyun, last: Chen} - variants: - - {first: Kai-Yun, last: Chen} - - {first: Kai-yun, last: Chen} - - {first: Helen Kai-yun, last: Chen} -- canonical: {first: Huey-Chyun, last: Chen} - variants: - - {first: Mathis Huey-chyun, last: Chen} -- canonical: {first: Jen Nan, last: Chen} - variants: - - {first: Jen-Nan, last: Chen} - - {first: Jen-nan, last: Chen} -- canonical: {first: Jiajun, last: Chen} - variants: - - {first: Jia-jun, last: Chen} - - {first: Jia-Jun, last: Chen} -- canonical: {first: Keh-Jiann, last: Chen} - variants: - - {first: Keh-jiann, last: Chen} - - {first: Ke-Jiann, last: Chen} - - {first: K. J., last: Chen} -- canonical: {first: Qian, last: Cao} - id: qian-cao-renmin - orcid: 0000-0003-3288-1714 - institution: Renmin University of China - comment: Renmin -- canonical: {first: Qian, last: Cao} - id: qian-cao - comment: May refer to several people -- canonical: {first: Kuang-hua, last: Chen} - variants: - - {first: Kuang-Hua, last: Chen} -- canonical: {first: Li-mei, last: Chen} - variants: - - {first: Li-Mei, last: Chen} -- canonical: {first: Liang-Yu, last: Chen} - variants: - - {first: Liangyu, last: Chen} -- canonical: {first: Mei-hua, last: Chen} - variants: - - {first: Mei-Hua, last: Chen} -- canonical: {first: Mia Xu, last: Chen} - variants: - - {first: Mia, last: Chen} -- canonical: {first: Nancy, last: Chen} - variants: - - {first: Nancy F., last: Chen} -- canonical: {first: Po Chun, last: Chen} - variants: - - {first: Po-Chun, last: Chen} -- canonical: {first: Po Hsuan, last: Chen} - variants: - - {first: Po-Hsuan, last: Chen} -- canonical: {first: Shun-Der, last: Chen} - variants: - - {first: Shun-Der, last: Cheng} -- canonical: {first: Ssu-Cheng, last: Chen} - variants: - - {first: Su-Cheng, last: Chen} -- canonical: {first: Stanley F., last: Chen} - variants: - - {first: Stanley, last: Chen} -- canonical: {first: Tsong-yi, last: Chen} - variants: - - {first: Tsong-Yi, last: Chen} -- canonical: {first: Xixian, last: Chen} - variants: - - {first: XiXian, last: Chen} -- canonical: {first: Yi-Rong, last: Chen} - variants: - - {first: YiRong, last: Chen} - - {first: Yi-Rung, last: Chen} -- canonical: {first: YiChun, last: Chen} - variants: - - {first: Yi-Chun, last: Chen} -- canonical: {first: Yuanzhu Peter, last: Chen} - variants: - - {first: Peter, last: Chen} -- canonical: {first: Zhiyu, last: Chen} - id: zhiyu-chen-lehigh - orcid: 0000-0002-3096-7912 - comment: Lehigh University -- canonical: {first: Zhiyu, last: Chen} - id: zhiyu-chen - comment: May refer to several people -- canonical: {first: Charles, last: 'Chen, Jr.'} - variants: - - {first: Charles, last: Chen} - - {first: Charles, last: Chen Jr.} -- canonical: {first: Noureddine, last: Chenfour} - id: noureddine-chenfour -- canonical: {first: Wen-Huei, last: Cheng} - variants: - - {first: Wen-Hui, last: Cheng} -- canonical: {first: Xueqi, last: Cheng} - variants: - - {first: Xue-Qi, last: Cheng} -- canonical: {first: Chi-Shun, last: Cheung} - variants: - - {first: Chi Shun, last: Cheung} -- canonical: {first: Jackie Chi Kit, last: Cheung} - variants: - - {first: Jackie C. K., last: Cheung} - - {first: Jackie C.K., last: Cheung} - - {first: Jackie, last: Cheung} -- canonical: {first: Lawrence Y. L., last: Cheung} - id: lawrence-y-l-cheung - variants: - - {first: Lawrence Y.L., last: Cheung} -- canonical: {first: Peter A., last: Chew} - variants: - - {first: Peter, last: Chew} -- canonical: {first: Adam, last: Cheyer} - id: adam-cheyer -- canonical: {first: Tung-Hui, last: Chiang} - variants: - - {first: TungHui, last: Chiang} -- canonical: {first: Yuang-Chin, last: Chiang} - variants: - - {first: Yuang-chin, last: Chiang} -- canonical: {first: Jen-Tzung, last: Chien} - variants: - - {first: Jen-Tzong, last: Chien} -- canonical: {first: Lois C., last: Childs} - variants: - - {first: Lois, last: Childs} -- canonical: {first: Phil Sidney, last: Ostheimer} - id: phil-sidney-ostheimer - orcid: 0009-0009-6186-3233 - institution: RPTU Kaiserslautern-Landau - variants: - - {first: Phil, last: Ostheimer} -- canonical: {first: Odbayar, last: Chimeddorj} - variants: - - {first: Chimeddorj, last: Odbayar} -- canonical: {first: Nancy, last: Chinchor} - id: nancy-chinchor - variants: - - {first: Nancy A., last: Chinchor} -- canonical: {first: P. C., last: Ching} - variants: - - {first: P.C., last: Ching} -- canonical: {first: Manoj, last: Chinnakotla} - variants: - - {first: Manoj K., last: Chinnakotla} - - {first: Manoj Kumar, last: Chinnakotla} -- canonical: {first: Luminita, last: Chiran} - id: luminita-chiran -- canonical: {first: Mahesh V., last: Chitrao} - variants: - - {first: Mahesh, last: Chitrao} -- canonical: {first: Chih-Ming, last: Chiu} - variants: - - {first: Chih-ming, last: Chiu} -- canonical: {first: Hsun-Wen, last: Chiu} - variants: - - {first: Hsun-wen, last: Chiu} -- canonical: {first: Timothy, last: Chklovski} - variants: - - {first: Tim, last: Chklovski} -- canonical: {first: Hakaze, last: Cho} - id: hakaze-cho - variants: - - {first: Yufeng, last: Zhao} -- canonical: {first: Martin, last: Chodorow} - variants: - - {first: Martin S., last: Chodorow} -- canonical: {first: GyuHyeon, last: Choi} - variants: - - {first: Gyu-Hyeon, last: Choi} -- canonical: {first: Jinho D., last: Choi} - variants: - - {first: Jinho, last: Choi} -- canonical: {first: Key-Sun, last: Choi} - variants: - - {first: Key-sun, last: Choi} -- canonical: {first: Annick, last: Choisier} - id: annick-choisier -- canonical: {first: Mickey W. C., last: Chong} - variants: - - {first: Mickey W.C., last: Chong} -- canonical: {first: George, last: Chou} - id: george-chou -- canonical: {first: Seng-Cho T., last: Chou} - variants: - - {first: Seng-cho T., last: Chou} -- canonical: {first: Prafulla Kumar, last: Choubey} - variants: - - {first: Prafulla, last: Choubey} -- canonical: {first: Khalid, last: Choukri} - id: khalid-choukri - variants: - - {first: Kalid, last: Choukri} -- canonical: {first: Yen-Lu, last: Chow} - variants: - - {first: Yen-lu, last: Chow} -- canonical: {first: Thomas Ulrich, last: Christiansen} - variants: - - {first: Thomas, last: Christiansen} -- canonical: {first: Dimitris, last: Christodoulakis} - variants: - - {first: Dimitris N., last: Christodoulakis} -- canonical: {first: C. Mario, last: Christoudias} - variants: - - {first: Mario, last: Christoudias} -- canonical: {first: Grzegorz, last: Chrupała} - variants: - - {first: Grzegorz, last: Chrupala} -- canonical: {first: Jennifer, last: Chu-Carroll} - variants: - - {first: Jennifer, last: Chu} -- canonical: {first: Tat-Seng, last: Chua} - variants: - - {first: Tat Seng, last: Chua} -- canonical: {first: Ka-Wai, last: Chui} - variants: - - {first: Kawai, last: Chui} -- canonical: {first: Grace, last: Chung} - variants: - - {first: Grace Y, last: Chung} -- canonical: {first: Hee Sung, last: Chung} - variants: - - {first: Hee-Sung, last: Chung} -- canonical: {first: HooJung, last: Chung} - variants: - - {first: Hoojung, last: Chung} -- canonical: {first: Siaw-Fong, last: Chung} - variants: - - {first: Siaw Fong, last: Chung} -- canonical: {first: You-Shan, last: Chung} - variants: - - {first: You-shan, last: Chung} -- canonical: {first: Kenneth, last: Church} - variants: - - {first: Kenneth Ward, last: Church} - - {first: Ken, last: Church} - - {first: Kenneth W., last: Church} -- canonical: {first: Christopher, last: Chute} - variants: - - {first: Christopher G., last: Chute} -- canonical: {first: Ilyas, last: Cicekli} - variants: - - {first: İlyas, last: Çiçekli} -- canonical: {first: Christopher, last: Cieri} - variants: - - {first: Chris, last: Cieri} -- canonical: {first: Philipp, last: Cimiano} - id: philipp-cimiano -- canonical: {first: Alina Maria, last: Ciobanu} - variants: - - {first: Alina, last: Ciobanu} -- canonical: {first: Manuel R., last: Ciosici} - variants: - - {first: Manuel, last: Ciosici} -- canonical: {first: Fabio, last: Ciravegna} - id: fabio-ciravegna -- canonical: {first: Montserrat, last: Civit} - id: montserrat-civit -- canonical: {first: Chris, last: Clark} - variants: - - {first: Christine, last: Clark} -- canonical: {first: Jonathan H., last: Clark} - variants: - - {first: Jonathan, last: Clark} -- canonical: {first: Charles L. A., last: Clarke} - id: charles-l-a-clarke -- canonical: {first: Luka A., last: Clarke} - variants: - - {first: Luka, last: Clarke} -- canonical: {first: Mark A., last: Clements} - variants: - - {first: Mark, last: Clements} -- canonical: {first: Miruna, last: Clinciu} - variants: - - {first: Miruna-Adriana, last: Clinciu} -- canonical: {first: John H., last: 'Clippinger, Jr.'} - variants: - - {first: John Henry, last: 'Clippinger, Jr.'} -- canonical: {first: Paul, last: Clough} - variants: - - {first: Paul D., last: Clough} -- canonical: {first: Martin, last: Cmejrek} - variants: - - {first: Martin, last: Čmejrek} -- canonical: {first: Noah, last: Coccaro} - id: noah-coccaro -- canonical: {first: Jose, last: Coch} - variants: - - {first: José, last: Coch} -- canonical: {first: John, last: Cocke} - id: john-cocke -- canonical: {first: Joan, last: Codina-Filba} - variants: - - {first: Joan, last: Codina-Filbà} - - {first: Joan, last: Codina} -- canonical: {first: Jordan, last: Cohen} - variants: - - {first: Jordan R., last: Cohen} -- canonical: {first: K. Bretonnel, last: Cohen} - variants: - - {first: Kevin Bretonnel, last: Cohen} - - {first: Kevin B., last: Cohen} - - {first: Kevin, last: Cohen} -- canonical: {first: Philip R., last: Cohen} - variants: - - {first: Philip, last: Cohen} - - {first: Phil R., last: Cohen} -- canonical: {first: Shay B., last: Cohen} - variants: - - {first: Shay, last: Cohen} -- canonical: {first: Trevor, last: Cohen} - comment: University of Washington - id: trevor-cohen - similar: [trevor-cohn] -- canonical: {first: William, last: Cohen} - variants: - - {first: William W., last: Cohen} -- canonical: {first: Yael, last: Cohen-Sygal} - variants: - - {first: Yael, last: Sygal} -- canonical: {first: Luísa, last: Coheur} - variants: - - {first: Luisa, last: Coheur} -- canonical: {first: Trevor, last: Cohn} - comment: University of Melbourne - id: trevor-cohn - similar: [trevor-cohen] -- canonical: {first: Andrew W., last: Cole} - variants: - - {first: Andrew, last: Cole} -- canonical: {first: Ronald, last: Cole} - id: ronald-cole - variants: - - {first: Ron, last: Cole} - - {first: Ronald A., last: Cole} -- canonical: {first: Mariona, last: Coll Ardanuy} - variants: - - {first: Mariona Coll, last: Ardanuy} -- canonical: {first: Christophe, last: Collet} - id: christophe-collet -- canonical: {first: Jean-Marc, last: Colletta} - id: jean-marc-colletta -- canonical: {first: Edward, last: Collins} - variants: - - {first: Ed, last: Collins} -- canonical: {first: Michael, last: Collins} - variants: - - {first: Michael John, last: Collins} - - {first: Mike, last: Collins} -- canonical: {first: Sandra, last: Collovini} - id: sandra-collovini -- canonical: {first: Pere, last: Comas} - variants: - - {first: Pere R., last: Comas} -- canonical: {first: Donald C., last: Comeau} - variants: - - {first: Don, last: Comeau} - - {first: Donald, last: Comeau} - - {first: Donald C, last: Comeau} -- canonical: {first: Elisabet, last: Comelles} - id: elisabet-comelles -- canonical: {first: Kristian, last: Concepcion} - variants: - - {first: Kris, last: Concepcion} -- canonical: {first: Jae, last: Hee Lee} - id: jae-hee-lee-bremen - orcid: 0000-0001-9840-780X - institution: University of Bremen - comment: Bremen -- canonical: {first: Jae, last: Hee Lee} - id: jae-hee-lee - comment: May refer to several people -- canonical: {first: Sherri, last: Condon} - variants: - - {first: Sherri L., last: Condon} -- canonical: {first: John, last: Conroy} - variants: - - {first: John M., last: Conroy} -- canonical: {first: Matthieu, last: Constant} - variants: - - {first: Mathieu, last: Constant} -- canonical: {first: Susan P., last: Converse} - variants: - - {first: Susan, last: Converse} -- canonical: {first: Helen V., last: Cook} - variants: - - {first: Helen, last: Cook} - - {first: Helen V, last: Cook} -- canonical: {first: Peter-Arno, last: Coppen} - id: peter-arno-coppen -- canonical: {first: Ornella, last: Corazzari} - id: ornella-corazzari -- canonical: {first: Greville C., last: Corbett} - variants: - - {first: Greville, last: Corbett} - - {first: Greville G., last: Corbett} -- canonical: {first: Peter, last: Corbett} - variants: - - {first: Peter T., last: Corbett} -- canonical: {first: João Paulo, last: Cordeiro} - variants: - - {first: João, last: Cordeiro} -- canonical: {first: Silvio, last: Cordeiro} - variants: - - {first: Silvio Ricardo, last: Cordeiro} -- canonical: {first: Mark G., last: Core} - variants: - - {first: Mark, last: Core} -- canonical: {first: Courtney D., last: Corley} - variants: - - {first: Courtney, last: Corley} -- canonical: {first: Annick, last: Corluy} - id: annick-corluy -- canonical: {first: Thomas L., last: Cornell} - variants: - - {first: Thomas, last: Cornell} -- canonical: {first: Gloria, last: Corpas Pastor} - variants: - - {first: Gloria, last: Corpas} - - {first: Gloria Corpas, last: Pastor} -- canonical: {first: Simon, last: Corston-Oliver} - variants: - - {first: Simon H., last: Corston-Oliver} -- canonical: {first: Louise, last: Corti} - id: louise-corti -- canonical: {first: Santiago, last: Cortés Vaíllo} - variants: - - {first: Santiago, last: Cortes} - - {first: Santiago Cortés, last: Vaíllo} -- canonical: {first: William J., last: Corvey} - variants: - - {first: William, last: Corvey} -- canonical: {first: Angela, last: Costa} - variants: - - {first: Ângela, last: Costa} -- canonical: {first: Luís Fernando, last: Costa} - variants: - - {first: Luís, last: Costa} -- canonical: {first: Christophe, last: Costa Florêncio} - variants: - - {first: Christophe Costa, last: Florencio} -- canonical: {first: Marta R., last: Costa-jussà} - variants: - - {first: Marta, last: R. Costa-jussà} - - {first: Marta R., last: Costa-Jussà} - - {first: Marta R., last: Costa-Jussa} - - {first: Marta, last: Ruiz Costa-jussà} - - {first: Marta Ruiz, last: Costa-jussà} -- canonical: {first: Fintan J., last: Costello} - variants: - - {first: Fintan, last: Costello} -- canonical: {first: William, last: Coster} - variants: - - {first: Will, last: Coster} -- canonical: {first: Louise-Amélie, last: Cougnon} - variants: - - {first: Louis-Amélie, last: Cougnon} -- canonical: {first: Francisco M., last: Couto} - variants: - - {first: Francisco, last: Couto} -- canonical: {first: Daniel, last: Couto Vale} - variants: - - {first: Daniel, last: Couto-Vale} - - {first: Daniel, last: Vale} -- canonical: {first: Jim, last: Cowie} - id: jim-cowie -- canonical: {first: Roddy, last: Cowie} - id: roddy-cowie -- canonical: {first: Benoit, last: Crabbé} - variants: - - {first: Benoît, last: Crabbé} -- canonical: {first: Gregory, last: Crane} - variants: - - {first: Gregory R., last: Crane} -- canonical: {first: Lambros, last: Cranias} - variants: - - {first: Lambros, last: Kranias} -- canonical: {first: Josep M., last: Crego} - variants: - - {first: Josep Maria, last: Crego} - - {first: Josep, last: Crego} -- canonical: {first: Luca, last: Cristoforetti} - id: luca-cristoforetti -- canonical: {first: Matthew, last: Crocker} - variants: - - {first: Matthew W., last: Crocker} -- canonical: {first: W. Bruce, last: Croft} - variants: - - {first: Bruce, last: Croft} -- canonical: {first: Fabien, last: Cromieres} - variants: - - {first: Fabien, last: Cromières} -- canonical: {first: Paul A., last: Crook} - variants: - - {first: Paul, last: Crook} -- canonical: {first: Noa P., last: Cruz Diaz} - variants: - - {first: Noa P., last: Cruz} - - {first: Noa, last: Cruz} - - {first: Noa P., last: Cruz Díaz} -- canonical: {first: Michael, last: Crystal} - variants: - - {first: Michael R., last: Crystal} -- canonical: {first: Andras, last: Csomai} - variants: - - {first: András, last: Csomai} -- canonical: {first: Catia, last: Cucchiarini} - id: catia-cucchiarini -- canonical: {first: Silviu, last: Cucerzan} - variants: - - {first: Silviu-Petru, last: Cucerzan} -- canonical: {first: Chris, last: Culy} - variants: - - {first: Christopher, last: Culy} -- canonical: {first: Hamish, last: Cunningham} - id: hamish-cunningham -- canonical: {first: Arturo, last: Curiel} - variants: - - {first: Arturo, last: Curiel Díaz} -- canonical: {first: Wei, last: Liu} - id: wei-liu-kcl - orcid: 0000-0003-0011-7797 - institution: Kings College London - comment: KCL -- canonical: {first: Wei, last: Liu} - id: wei-liu - comment: May refer to several people -- canonical: {first: James R., last: Curran} - variants: - - {first: James, last: Curran} -- canonical: {first: Douglass, last: Cutting} - variants: - - {first: Doug, last: Cutting} -- canonical: {first: Jan, last: Cuřín} - id: jan-curin -- canonical: {first: Agata, last: Cybulska} - variants: - - {first: Agata Katarzyna, last: Cybulska} -- canonical: {first: Scott, last: Cyphers} - variants: - - {first: D. Scott, last: Cyphers} -- canonical: {first: Marianne, last: Dabbadie} - id: marianne-dabbadie -- canonical: {first: Walter, last: Daelemans} - id: walter-daelemans -- canonical: {first: Deborah A., last: Dahl} - id: deborah-a-dahl - variants: - - {first: Deborah, last: Dahl} -- canonical: {first: Kathleen, last: Dahlgren} - id: kathleen-dahlgren -- canonical: {first: Li-Rong, last: Dai} - variants: - - {first: LiRong, last: Dai} -- canonical: {first: Xiang, last: Dai} - variants: - - {first: Xiangying, last: Dai} -- canonical: {first: Xinyu, last: Dai} - variants: - - {first: Xin-yu, last: Dai} - - {first: Xin-Yu, last: Dai} -- canonical: {first: Béatrice, last: Daille} - variants: - - {first: Beatrice, last: Daille} -- canonical: {first: Bojana, last: Dalbelo Bašić} - variants: - - {first: Bojana Dalbelo, last: Bašić} -- canonical: {first: Patrice, last: Dalle} - id: patrice-dalle -- canonical: {first: Bhavana, last: Dalvi} - variants: - - {first: Bhavana, last: Dalvi Mishra} -- canonical: {first: Om P., last: Damani} - variants: - - {first: Om, last: Damani} -- canonical: {first: Fred, last: Damerau} - variants: - - {first: Fred J., last: Damerau} -- canonical: {first: Laurie, last: Damianos} - variants: - - {first: Laurie E., last: Damianos} -- canonical: {first: Danica, last: Damljanović} - variants: - - {first: Danica, last: Damljanovic} -- canonical: {first: Géraldine, last: Damnati} - variants: - - {first: Geraldine, last: Damnati} -- canonical: {first: Robert I., last: Damper} - id: robert-i-damper -- canonical: {first: Sandipan, last: Dandapat} - variants: - - {first: Sandipan, last: Dandpat} -- canonical: {first: Hoa Trang, last: Dang} - variants: - - {first: Hoa, last: Dang} -- canonical: {first: Ron, last: 'Daniel, Jr.'} - variants: - - {first: Ron, last: Daniel} - - {first: Ron, last: Daniel Jr.} -- canonical: {first: Masatake, last: Dantsuji} - id: masatake-dantsuji -- canonical: {first: Aswarth Abhilash, last: Dara} - variants: - - {first: Aswarth, last: Dara} -- canonical: {first: Stéfan, last: Darmoni} - variants: - - {first: Stefan, last: Darmoni} -- canonical: {first: Vidas, last: Daudaravicius} - variants: - - {first: Vidas, last: Daudaravičius} -- canonical: {first: Jordi, last: Daudé} - id: jordi-daude -- canonical: {first: Hal, last: Daumé III} - variants: - - {first: Hal, last: Daume III} - - {first: Hal, last: Daume} - - {first: Hal, last: Daumé} -- canonical: {first: Chris Irwin, last: Davis} - variants: - - {first: Chris, last: Davis} -- canonical: {first: James, last: Davis} - variants: - - {first: James Raymond, last: Davis} -- canonical: {first: Mark W., last: Davis} - variants: - - {first: Mark, last: Davis} -- canonical: {first: Sashka T., last: Davis} - variants: - - {first: Sashka, last: Davis} -- canonical: {first: Ian P., last: Davy} - variants: - - {first: Ian, last: Davy} - - {first: Ian P, last: Davy} -- canonical: {first: David, last: Day} - variants: - - {first: David S., last: Day} -- canonical: {first: Antonella, last: De Angeli} - variants: - - {first: Antonella, last: DeAngeli} -- canonical: {first: Vitor, last: De Araujo} - variants: - - {first: Vítor, last: Araújo} -- canonical: {first: Orphee, last: De Clercq} - variants: - - {first: Orphée, last: De Clercq} -- canonical: {first: Georges, last: De Moor} - id: georges-de-moor -- canonical: {first: Renato, last: De Mori} - variants: - - {first: Renato, last: de Mori} -- canonical: {first: Anne, last: De Roeck} - variants: - - {first: Anne, last: DeRoeck} - - {first: Anne, last: de Roeck} - - {first: Anne, last: deRoeck} - - {first: A.N., last: De Roeck} - - {first: Anne N., last: De Roeck} -- canonical: {first: Gianluca, last: De Rossi} - variants: - - {first: Gianluca, last: Rossi} -- canonical: {first: Koenraad, last: De Smedt} - variants: - - {first: Koenraad, last: de Smedt} - - {first: Koenraad, last: DeSmedt} -- canonical: {first: Ángel, last: De la Torre} - id: angel-de-la-torre -- canonical: {first: Jonathan, last: DeCristofaro} - variants: - - {first: Jonathan D., last: DeCristofaro} -- canonical: {first: Rosa, last: Del Gaudio} - variants: - - {first: Rosa, last: Gaudio} -- canonical: {first: Riccardo, last: Del Gratta} - variants: - - {first: Riccardo, last: del Gratta} -- canonical: {first: Iria, last: Del Río Gayo} - variants: - - {first: Iria, last: del Río Gayo} - - {first: Iria, last: del Río} - - {first: Iria, last: del Rio} -- canonical: {first: Elisabeth, last: Delais-Roussarie} - variants: - - {first: Élisabeth, last: Delais-Roussarie} -- canonical: {first: Jean-François, last: Delannoy} - variants: - - {first: Jean-Francois, last: Delannoy} -- canonical: {first: Stephen A., last: Della Pietra} - id: stephen-a-della-pietra - variants: - - {first: Stephen, last: Della Pietra} - - {first: Stephen, last: DellaPietra} -- canonical: {first: Vincent J., last: Della Pietra} - id: vincent-j-della-pietra - variants: - - {first: Vincent, last: DellaPietra} -- canonical: {first: Rodolfo, last: Delmonte} - id: rodolfo-delmonte -- canonical: {first: Paul, last: Deléglise} - variants: - - {first: Paul, last: Deleglise} -- canonical: {first: George, last: Demetriou} - variants: - - {first: George C., last: Demetriou} -- canonical: {first: Isin, last: Demirsahin} - variants: - - {first: Işin, last: Demirşahin} - - {first: Isin, last: Demirşahin} -- canonical: {first: Peter, last: Deng} - id: peter-deng -- canonical: {first: Xinyu, last: Deng} - variants: - - {first: XinYu, last: Deng} -- canonical: {first: Zhi-Hong, last: Deng} - variants: - - {first: Zhihong, last: Deng} -- canonical: {first: Alexandre, last: Denis} - id: alexandre-denis -- canonical: {first: Leon, last: Derczynski} - variants: - - {first: Leon, last: Strømberg-Derczynski} -- canonical: {first: Jan Milan, last: Deriu} - variants: - - {first: Jan, last: Deriu} -- canonical: {first: Julien, last: Derivière} - id: julien-deriviere -- canonical: {first: Maunendra Sankar, last: Desarkar} - variants: - - {first: Maunendra, last: Sankar Desarkar} -- canonical: {first: Théo, last: Desbordes} - variants: - - {first: Theo, last: Desbordes} -- canonical: {first: Jean-Pierre, last: Descles} - variants: - - {first: Jean-Pierre, last: Desclés} - - {first: Jean Pierre, last: Descles} -- canonical: {first: Elina, last: Desipri} - id: elina-desipri - variants: - - {first: Elina, last: Desypri} -- canonical: {first: José, last: Deulofeu} - variants: - - {first: Jose, last: Deulofeu} -- canonical: {first: Arturo Calvo, last: Devesa} - variants: - - {first: Arturo, last: Calvo} -- canonical: {first: Laurence, last: Devillers} - id: laurence-devillers -- canonical: {first: Pradip, last: Dey} - variants: - - {first: Paradip, last: Dey} -- canonical: {first: Arnab, last: Dhar} - variants: - - {first: Arnad, last: Dhar} -- canonical: {first: Paramveer S., last: Dhillon} - variants: - - {first: Paramveer, last: Dhillon} -- canonical: {first: Luigi, last: Di Caro} - variants: - - {first: Luigi, last: di Caro} -- canonical: {first: Giuseppe, last: Di Fabbrizio} - variants: - - {first: Giuseppe, last: Fabbrizio} -- canonical: {first: Mattia A., last: Di Gangi} - variants: - - {first: Mattia Antonino, last: Di Gangi} - - {first: Mattia, last: Di Gangi} -- canonical: {first: Mauro, last: Di Manzo} - id: mauro-di-manzo -- canonical: {first: Giorgio Maria, last: Di Nunzio} - variants: - - {first: Giorgio, last: Di Nunzio} -- canonical: {first: Vittorio, last: Di Tomaso} - id: vittorio-di-tomaso -- canonical: {first: Zihao, last: Li} - id: zihao-li-helsinki - orcid: 0009-0008-9329-5341 - institution: University of Helsinki - comment: Helsinki -- canonical: {first: Zihao, last: Li} - id: zihao-li - comment: May refer to several people -- canonical: {first: Chrysanne, last: DiMarco} - variants: - - {first: Chrysanne, last: Di Marco} -- canonical: {first: Denise, last: DiPersio} - variants: - - {first: Denise, last: Dipersio} -- canonical: {first: Mona, last: Diab} - variants: - - {first: Mona T., last: Diab} -- canonical: {first: Gaël, last: Dias} - variants: - - {first: Gael, last: Dias} - - {first: Gäel, last: Dias} -- canonical: {first: Miguel Sales, last: Dias} - variants: - - {first: Miguel, last: Dias} -- canonical: {first: Bento Carlos, last: Dias-da-Silva} - variants: - - {first: Bento Carlos Dias, last: da Silva} -- canonical: {first: Javier, last: Dieguez-Tirado} - variants: - - {first: Javier, last: Dieguez} -- canonical: {first: Dinh, last: Dien} - variants: - - {first: Dien, last: Dinh} -- canonical: {first: Mireia, last: Diez} - variants: - - {first: Mireia, last: Díez} -- canonical: {first: Vassilios, last: Digalakis} - id: vassilios-digalakis -- canonical: {first: Brian W., last: Dillon} - variants: - - {first: Brian, last: Dillon} -- canonical: {first: Davis Muhajereen D., last: Dimalen} - variants: - - {first: Davis Muhajereen, last: Dimalen} -- canonical: {first: Vania, last: Dimitrova} - variants: - - {first: Vanya, last: Dimitrova} -- canonical: {first: Luca, last: Dini} - id: luca-dini -- canonical: {first: Norbert, last: Dinstl} - id: norbert-dinstl -- canonical: {first: Georgiana, last: Dinu} - id: georgiana-dinu -- canonical: {first: Liviu P., last: Dinu} - variants: - - {first: Liviu, last: Dinu} - - {first: Liviu Petrisor, last: Dinu} -- canonical: {first: Cheikh M. Bamba, last: Dione} - variants: - - {first: Cheikh Bamba, last: Dione} -- canonical: {first: Bayu, last: Distiawan} - variants: - - {first: Bayu Distiawan, last: Trisedya} -- canonical: {first: Shirley, last: Dita} - variants: - - {first: Shirley N., last: Dita} -- canonical: {first: Paul, last: Dixon} - variants: - - {first: Paul R., last: Dixon} -- canonical: {first: Quoc Khanh, last: Do} - variants: - - {first: Quoc-Khanh, last: Do} -- canonical: {first: Simon, last: Dobrisek} - variants: - - {first: Simon, last: Dobrišek} -- canonical: {first: Boris V., last: Dobrov} - id: boris-v-dobrov - variants: - - {first: Boris, last: Dobrov} -- canonical: {first: Laura, last: Docio-Fernandez} - variants: - - {first: Laura, last: Docío-Fernández} -- canonical: {first: George R., last: Doddington} - variants: - - {first: George, last: Doddington} -- canonical: {first: Ellen K., last: Dodge} - variants: - - {first: Ellen, last: Dodge} -- canonical: {first: Shinichi, last: Doi} - variants: - - {first: Shin’ichi, last: Doi} -- canonical: {first: Charles P., last: Dolan} - variants: - - {first: Charles, last: Dolan} -- canonical: {first: William B., last: Dolan} - variants: - - {first: William, last: Dolan} - - {first: Bill, last: Dolan} -- canonical: {first: Ioannis, last: Dologlou} - id: ioannis-dologlou -- canonical: {first: Martin Ariel, last: Dominguez} - variants: - - {first: Martín, last: Domínguez} - - {first: Martin Ariel, last: Domínguez} -- canonical: {first: Ming Chui, last: Dong} - variants: - - {first: Ming-Chui, last: Dong} -- canonical: {first: Xin Luna, last: Dong} - variants: - - {first: Xin, last: Dong} -- canonical: {first: Christine, last: Doran} - id: christine-doran -- canonical: {first: Bonnie, last: Dorr} - variants: - - {first: Bonnie J., last: Dorr} -- canonical: {first: Jochen, last: Dorre} - variants: - - {first: Jochen, last: Dörre} -- canonical: {first: Léon, last: Dostert} - variants: - - {first: Leon, last: Dostert} - - {first: L. E., last: Dostert} -- canonical: {first: Ellen, last: Douglas-Cowie} - id: ellen-douglas-cowie -- canonical: {first: Yerai, last: Doval} - variants: - - {first: Yerai, last: Doval Mosquera} -- canonical: {first: John, last: Dowding} - id: john-dowding -- canonical: {first: Jennifer, last: Doyon} - variants: - - {first: Jennifer B., last: Doyon} -- canonical: {first: Christopher, last: Dozier} - variants: - - {first: Christopher C., last: Dozier} -- canonical: {first: Elliott Franco, last: Drabek} - variants: - - {first: Elliott, last: Drabek} - - {first: Elliott, last: Drábek} -- canonical: {first: Felix, last: Dreizin} - id: felix-dreizin -- canonical: {first: Biljana, last: Drndarević} - variants: - - {first: Biljana, last: Drndarevic} -- canonical: {first: Witold, last: Drożdżyński} - variants: - - {first: Witold, last: Drozdzynski} -- canonical: {first: Sebastian, last: Drude} - id: sebastian-drude -- canonical: {first: Jianyong, last: Duan} - variants: - - {first: Jian-Yong, last: Duan} -- canonical: {first: Yuguang, last: Duan} - variants: - - {first: Yu, last: Duan} -- canonical: {first: Pablo, last: Duboue} - variants: - - {first: Pablo A., last: Duboue} - - {first: Pablo Ariel, last: Duboue} -- canonical: {first: Loic, last: Dugast} - variants: - - {first: Loïc, last: Dugast} -- canonical: {first: Stefan Daniel, last: Dumitrescu} - variants: - - {first: Ștefan Daniel, last: Dumitrescu} - - {first: Ștefan, last: Dumitrescu} -- canonical: {first: Pierre, last: Dumouchel} - id: pierre-dumouchel -- canonical: {first: Ted E., last: Dunning} - variants: - - {first: Ted, last: Dunning} -- canonical: {first: Long, last: Duong} - variants: - - {first: Long, last: Duong Thanh} -- canonical: {first: Magali Sanches, last: Duran} - variants: - - {first: Magali, last: Sanches Duran} - - {first: Magali, last: Duran} -- canonical: {first: Ilknur, last: Durgar El-Kahlout} - variants: - - {first: Ilknur Durgar, last: El-Kahlout} - - {first: İlknur, last: Durgar El-Kahlout} - - {first: İlknur Durgar, last: El-Kahlout} -- canonical: {first: Koel, last: Dutta Chowdhury} - variants: - - {first: Koel Dutta, last: Chowdhury} -- canonical: {first: Arienne, last: Dwyer} - id: arienne-dwyer -- canonical: {first: Hans, last: Dybkjaer} - variants: - - {first: Hans, last: Dybkjær} -- canonical: {first: Laila, last: Dybkjaer} - variants: - - {first: Laila, last: Dybkjær} -- canonical: {first: Chris, last: Dyer} - variants: - - {first: Christopher, last: Dyer} - - {first: Christopher J., last: Dyer} -- canonical: {first: Michael G., last: Dyer} - variants: - - {first: Michael, last: Dyer} -- canonical: {first: Myroslava O., last: Dzikovska} - variants: - - {first: Myroslava, last: Dzikovska} -- canonical: {first: Daniel, last: Déchelotte} - variants: - - {first: Daniel, last: Dechelotte} -- canonical: {first: Hervé, last: Déjean} - id: herve-dejean - variants: - - {first: Herve, last: Dejean} -- canonical: {first: Víctor J., last: Díaz} - variants: - - {first: Victor J., last: Díaz} -- canonical: {first: Jesús E., last: Díaz Verdejo} - id: jesus-e-diaz-verdejo -- canonical: {first: Arantza, last: Díaz de Ilarraza} - id: arantza-diaz-de-ilarraza - variants: - - {first: Arantza, last: Diaz de Ilarraza} -- canonical: {first: Elisabeth, last: D’Halleweyn} - variants: - - {first: Elizabeth, last: D’Halleweyn} -- canonical: {first: Luis Fernando, last: D’Haro} - variants: - - {first: Luis F., last: d’Haro} -- canonical: {first: Susana, last: Early} - id: susana-early -- canonical: {first: Hiroshi, last: Echizen-ya} - variants: - - {first: Hiroshi, last: Echizen’ya} -- canonical: {first: Philip, last: Edmonds} - variants: - - {first: Philip G., last: Edmonds} -- canonical: {first: Angels, last: Egea} - variants: - - {first: Àngels, last: Egea} -- canonical: {first: Liat, last: Ein Dor} - variants: - - {first: Liat, last: Ein-Dor} -- canonical: {first: Andreas, last: Eisele} - id: andreas-eisele -- canonical: {first: Jason, last: Eisner} - variants: - - {first: Jason M., last: Eisner} -- canonical: {first: Eva, last: Ejerhed} - variants: - - {first: Eva I., last: Ejerhed} -- canonical: {first: Kerstin Severinson, last: Eklundh} - variants: - - {first: Kerstin, last: Severinson Eklundh} - - {first: Kerstin, last: Severinson} -- canonical: {first: Said Ouatik, last: El Alaoui} - variants: - - {first: Said, last: Ouatik El Alaoui} -- canonical: {first: Adil, last: El Ghali} - variants: - - {first: Adil, last: El-Ghali} -- canonical: {first: Ismail, last: El Maarouf} - variants: - - {first: Ismaïl, last: El Maarouf} -- canonical: {first: Samhaa R., last: El-Beltagy} - variants: - - {first: Samhaa, last: El-Beltagy} -- canonical: {first: Marc, last: El-Bèze} - id: marc-el-beze - variants: - - {first: Marc, last: El-Beze} -- canonical: {first: Wassim, last: El-Hajj} - variants: - - {first: Wassim, last: El Hajj} -- canonical: {first: Mohab, last: El-karef} - variants: - - {first: Mohab, last: Elkaref} -- canonical: {first: Noémie, last: Elhadad} - variants: - - {first: Noemie, last: Elhadad} -- canonical: {first: Frédéric, last: Eliséi} - variants: - - {first: Frederic, last: Elisei} -- canonical: {first: Faiza, last: Elkateb-Gara} - variants: - - {first: Faiza, last: Gara} - - {first: Faïza, last: Elkateb-Gara} -- canonical: {first: John, last: Elliott} - variants: - - {first: John, last: Elliot} -- canonical: {first: David, last: Ellis} - variants: - - {first: David Ellis, last: Rogers} -- canonical: {first: T. Mark, last: Ellison} - id: t-mark-ellison -- canonical: {first: Samira, last: Ellouze} - variants: - - {first: Samira Walha, last: Ellouze} -- canonical: {first: Mariem, last: Ellouze Khemekhem} - variants: - - {first: Mariem, last: Ellouze Khemakhem} - - {first: Mariem, last: Ellouze} - - {first: Mariem Ellouze, last: Khmekhem} - - {first: Mariem, last: Ellouze khemekhem} -- canonical: {first: Michael, last: Ellsworth} - variants: - - {first: Michael J., last: Ellsworth} -- canonical: {first: Mohan, last: Zhang} - id: mohan-zhang-unc - orcid: 0009-0000-8866-7878 - institution: University of North Carolina - comment: UNC -- canonical: {first: Mohan, last: Zhang} - id: mohan-zhang - comment: May refer to several people -- canonical: {first: David, last: Elson} - variants: - - {first: David K., last: Elson} -- canonical: {first: Martin C., last: Emele} - variants: - - {first: Martin, last: Emele} -- canonical: {first: Louisette, last: Emirkanian} - id: louisette-emirkanian -- canonical: {first: Chantal, last: Enguehard} - id: chantal-enguehard -- canonical: {first: Mark, last: Epstein} - id: mark-epstein -- canonical: {first: Adoram, last: Erell} - id: adoram-erell -- canonical: {first: Tomaž, last: Erjavec} - variants: - - {first: Tomaz, last: Erjavec} -- canonical: {first: Gunes, last: Erkan} - variants: - - {first: Güneş, last: Erkan} -- canonical: {first: Gülşen, last: Eryiğit} - variants: - - {first: Gülşen, last: Eryiǧit} -- canonical: {first: Mahbaneh, last: Eshaghzadeh Torbati} - variants: - - {first: Mahbaneh, last: Eshaghzadeh} -- canonical: {first: Iris, last: Eshkol} - variants: - - {first: Iris, last: Eshkol-Taravella} -- canonical: {first: Salvador, last: España} - id: salvador-espana -- canonical: {first: Luis, last: Espinosa Anke} - variants: - - {first: Luis, last: Espinosa-Anke} - - {first: Luis Espinosa, last: Anke} -- canonical: {first: Miquel, last: Esplà-Gomis} - variants: - - {first: Miquel, last: Esplà} -- canonical: {first: Dominique, last: Estival} - id: dominique-estival -- canonical: {first: David A., last: Evans} - variants: - - {first: David Andreoff, last: Evans} -- canonical: {first: David K., last: Evans} - variants: - - {first: David, last: Evans} - - {first: David Kirk, last: Evans} -- canonical: {first: Edmund Grimley, last: Evans} - variants: - - {first: Edmund, last: Grimley-Evans} -- canonical: {first: Richard, last: Evans} - id: richard-evans -- canonical: {first: Roger, last: Evans} - id: roger-evans -- canonical: {first: Martha, last: Evens} - variants: - - {first: Martha W., last: Evens} - - {first: Martha W, last: Evens} -- canonical: {first: Stephanie S., last: Everett} - variants: - - {first: Stephanie, last: Everett} -- canonical: {first: Lindsay J., last: Evett} - id: lindsay-j-evett -- canonical: {first: Chandra Kiran Reddy, last: Evuru} - variants: - - {first: Chandra Kiran, last: Evuru} -- canonical: {first: Frank Van, last: Eynde} - variants: - - {first: Frank, last: van Eynde} - - {first: Frank, last: Van Eynde} -- canonical: {first: Nerea, last: Ezeiza} - id: nerea-ezeiza -- canonical: {first: Cécile, last: Fabre} - variants: - - {first: Cecile, last: Fabre} -- canonical: {first: Karoly, last: Fabricz} - id: karoly-fabricz -- canonical: {first: Marcos Didonet Del, last: Fabro} - variants: - - {first: Marcus Didonet, last: Del Fabro} -- canonical: {first: Hakimeh, last: Fadaee} - variants: - - {first: Hakimeh, last: Fadaei} -- canonical: {first: Cédrick, last: Fairon} - variants: - - {first: Cedrick, last: Fairon} -- canonical: {first: Nikos, last: Fakotakis} - id: nikos-fakotakis - variants: - - {first: Nikos D., last: Fakotakis} -- canonical: {first: Agnieszka, last: Falenska} - variants: - - {first: Agnieszka, last: Faleńska} -- canonical: {first: Shixi, last: Fan} - variants: - - {first: ShiXi, last: Fan} -- canonical: {first: Alex Chengyu, last: Fang} - variants: - - {first: Alex C., last: Fang} -- canonical: {first: M. Amin, last: Farajian} - variants: - - {first: Mohammad Amin, last: Farajian} -- canonical: {first: Richárd, last: Farkas} - variants: - - {first: Richard, last: Farkas} -- canonical: {first: Javier, last: Farreres} - variants: - - {first: Xavier, last: Farreres} -- canonical: {first: Tanveer A., last: Faruquie} - variants: - - {first: Tanveer, last: Faruquie} - - {first: Tanveer A, last: Faruquie} -- canonical: {first: David, last: Farwell} - id: david-farwell -- canonical: {first: Nicolas R., last: Fauceglia} - variants: - - {first: Nicolas, last: Fauceglia} -- canonical: {first: Benoit, last: Favre} - variants: - - {first: Benoît, last: Favre} -- canonical: {first: Steven, last: Feiner} - variants: - - {first: Steven K., last: Feiner} -- canonical: {first: Laurie, last: Feldman} - variants: - - {first: Laurie Beth, last: Feldman} -- canonical: {first: Naomi, last: Feldman} - variants: - - {first: Naomi H., last: Feldman} -- canonical: {first: Laszlo, last: Felfoldi} - variants: - - {first: László, last: Felföldi} -- canonical: {first: Ariani Di, last: Felippo} - variants: - - {first: Ariani, last: Di-Felippo} - - {first: Ariani, last: Di Felippo} -- canonical: {first: Valéria Delisandra, last: Feltrim} - variants: - - {first: Valéria, last: Feltrim} - - {first: Valéria D., last: Feltrim} -- canonical: {first: Fangfang, last: Feng} - id: fangfang-feng -- canonical: {first: Jens Erik, last: Fenstad} - variants: - - {first: Jens-Erik, last: Fenstad} -- canonical: {first: Eraldo, last: Fernandes} - variants: - - {first: Eraldo Rezende, last: Fernandes} -- canonical: {first: Ana, last: Fernandez} - variants: - - {first: Ana Fernández, last: Montraveta} - - {first: Ana, last: Fernández-Montraveta} -- canonical: {first: Ramón, last: Fernandez Astudillo} - variants: - - {first: Ramón, last: Astudillo} - - {first: Ramón, last: F. Astudillo} - - {first: Ramon, last: F. Astudillo} -- canonical: {first: Diego, last: Fernandez Slezak} - variants: - - {first: Diego, last: Fernández Slezak} -- canonical: {first: Raquel, last: Fernández} - variants: - - {first: Raquel, last: Fernandez} -- canonical: {first: Antonio, last: Fernández Orquín} - variants: - - {first: Antonio, last: Fernandez Orquín} - - {first: Antonio, last: Fernández-Orquín} - - {first: Antonio, last: Fernández} -- canonical: {first: David, last: Fernández-Amorós} - variants: - - {first: David, last: Fernández} - - {first: David, last: Férnandez-Amorós} -- canonical: {first: Fernando, last: Fernández-Martínez} - variants: - - {first: Fernando Fernández, last: Martínez} -- canonical: {first: Stéphane, last: Ferrari} - variants: - - {first: Stephane, last: Ferrari} -- canonical: {first: Kathleen, last: Ferraro} - variants: - - {first: Kathleen, last: Ferrara} -- canonical: {first: Antonio, last: Ferrández} - id: antonio-ferrandez - variants: - - {first: Antonio, last: Ferrandez} -- canonical: {first: Óscar, last: Ferrández} - variants: - - {first: Oscar, last: Ferrandez} - - {first: Oscar, last: Ferrández} -- canonical: {first: Gaëlle, last: Ferré} - variants: - - {first: Gaelle, last: Ferré} -- canonical: {first: Daniel, last: Ferrés} - variants: - - {first: Dani, last: Ferrés} -- canonical: {first: Hanne, last: Fersøe} - id: hanne-fersoe - variants: - - {first: Hanne, last: Fersoe} -- canonical: {first: Charles J., last: Fillmore} - variants: - - {first: Charles, last: Fillmore} -- canonical: {first: Maria José B., last: Finatto} - variants: - - {first: Maria José, last: Finatto} - - {first: Maria José Bocorny, last: Finatto} -- canonical: {first: Alex, last: Fine} - variants: - - {first: Alex B., last: Fine} -- canonical: {first: Linda, last: Fineman} - id: linda-fineman -- canonical: {first: Tim, last: Finin} - variants: - - {first: Timothy W., last: Finin} -- canonical: {first: Pamela E., last: Fink} - id: pamela-e-fink -- canonical: {first: Jenny Rose, last: Finkel} - variants: - - {first: Jenny, last: Finkel} -- canonical: {first: Mark, last: Finlayson} - variants: - - {first: Mark A., last: Finlayson} -- canonical: {first: Gregory, last: Finley} - variants: - - {first: Greg, last: Finley} -- canonical: {first: Therese, last: Firmin} - variants: - - {first: Therese Firmin, last: Hand} -- canonical: {first: Jonathan G., last: Fiscus} - id: jonathan-g-fiscus - variants: - - {first: Jonathan C., last: Fiscus} - - {first: Jonathan, last: Fiscus} -- canonical: {first: David, last: Fisher} - id: david-fisher -- canonical: {first: William M., last: Fisher} - id: william-m-fisher - variants: - - {first: William, last: Fisher} -- canonical: {first: Xin, last: Xu} - id: xin-xu-ucsd - orcid: 0000-0001-5238-0955 - comment: UCSD - institution: UC San Diego -- canonical: {first: Xin, last: Xu} - id: xin-xu - comment: May refer to multiple people -- canonical: {first: Sisay, last: Fissaha Adafre} - variants: - - {first: Sisay, last: Fissaha} - - {first: Sisay Fissaha, last: Adafre} -- canonical: {first: Eileen, last: Fitzpatrick} - id: eileen-fitzpatrick -- canonical: {first: James L., last: Flanagan} - id: james-l-flanagan -- canonical: {first: Sébastien, last: Flavier} - variants: - - {first: Sebastien, last: Flavier} -- canonical: {first: Iuliana Alexandra, last: Fleşcan-Lovin-Arseni} - variants: - - {first: Iuliana Alexandra, last: Fleșcan-Lovin-Arseni} - - {first: Iuliana-Alexandra, last: Flescan-Lovin-Arseni} -- canonical: {first: Dan, last: Flickinger} - id: dan-flickinger - variants: - - {first: Daniel, last: Flickinger} - - {first: Daniel P., last: Flickinger} -- canonical: {first: Radu, last: Florian} - id: radu-florian -- canonical: {first: Christian, last: Fluhr} - id: christian-fluhr -- canonical: {first: Achille, last: Fokoue-Nkoutche} - variants: - - {first: Achille, last: Fokoue} -- canonical: {first: Helka, last: Folch} - id: helka-folch -- canonical: {first: Peter, last: Foltz} - variants: - - {first: Peter W., last: Foltz} -- canonical: {first: José A. R., last: Fonollosa} - variants: - - {first: Jose A., last: R. Fonollosa} - - {first: José A.R., last: Fonollosa} - - {first: José A., last: R. Fonollosa} - - {first: Jose A. R., last: Fonollosa} -- canonical: {first: Erick, last: Fonseca} - variants: - - {first: Erick Rocha, last: Fonseca} - - {first: Erick R., last: Fonseca} -- canonical: {first: Evandro B., last: Fonseca} - variants: - - {first: Evandro, last: Fonseca} -- canonical: {first: Ariadna, last: Font Llitjós} - variants: - - {first: Ariadna, last: Font-Llitjos} - - {first: Ariadna, last: Font Llitjos} -- canonical: {first: Josep Maria, last: Fontana} - variants: - - {first: Josep, last: Fontana} -- canonical: {first: Kate, last: Forbes-Riley} - variants: - - {first: Kate, last: Forbes} - - {first: Katherine, last: Forbes-Riley} - - {first: Katherine, last: Forbes} - - {first: Katherine, last: Forbes Riley} -- canonical: {first: Kenneth, last: Forbus} - variants: - - {first: Kenneth D., last: Forbus} -- canonical: {first: Mikel L., last: Forcada} - variants: - - {first: Mikel, last: Forcada} -- canonical: {first: Cameron Shaw, last: Fordyce} - variants: - - {first: Cameron, last: Fordyce} -- canonical: {first: Lluis, last: Formiga} - variants: - - {first: Lluís, last: Formiga} -- canonical: {first: David, last: Forsyth} - variants: - - {first: David A., last: Forsyth} -- canonical: {first: Corina, last: Forăscu} - variants: - - {first: Corina, last: Forascu} -- canonical: {first: Eric, last: Fosler-Lussier} - variants: - - {first: J. Eric, last: Fosler} - - {first: Eric, last: Fosler} -- canonical: {first: Victoria, last: Fossum} - variants: - - {first: Victoria Li, last: Fossum} -- canonical: {first: Dean, last: Foster} - variants: - - {first: Dean P., last: Foster} -- canonical: {first: Mary Ellen, last: Foster} - variants: - - {first: Mary E., last: Foster} -- canonical: {first: Kilian A., last: Foth} - variants: - - {first: Kilian, last: Foth} -- canonical: {first: Stavroula-Evita, last: Fotinea} - id: stavroula-evita-fotinea -- canonical: {first: Christophe, last: Fouqueré} - id: christophe-fouquere -- canonical: {first: Sébastien, last: Fournier} - variants: - - {first: Sebastien, last: Fournier} -- canonical: {first: Heidi, last: Fox} - variants: - - {first: Heidi J., last: Fox} -- canonical: {first: Jean E., last: Fox Tree} - variants: - - {first: Jean Fox, last: Tree} - - {first: Jean, last: Fox Tree} -- canonical: {first: Michael C., last: Frank} - variants: - - {first: Michael, last: Frank} -- canonical: {first: Stefan L., last: Frank} - variants: - - {first: Stefan, last: Frank} -- canonical: {first: Alexander, last: Franz} - variants: - - {first: Alexander M., last: Franz} -- canonical: {first: Claire, last: François} - variants: - - {first: Claire, last: Francois} -- canonical: {first: Chaoqun, last: Liu} - id: chaoqun-liu-ntu - orcid: 0000-0001-8014-2516 - institution: Nanyang Technological University - comment: NTU -- canonical: {first: Chaoqun, last: Liu} - id: chaoqun-liu - comment: May refer to several people -- canonical: {first: Alexander, last: Fraser} - variants: - - {first: Alex, last: Fraser} -- canonical: {first: Kathleen C., last: Fraser} - variants: - - {first: Kathleen, last: Fraser} -- canonical: {first: Norman M., last: Fraser} - variants: - - {first: Norman, last: Fraser} -- canonical: {first: Elisabeth, last: Frasnelli} - id: elisabeth-frasnelli -- canonical: {first: Zuzana, last: Fraterova} - variants: - - {first: Zuzana, last: Fráterová} -- canonical: {first: Robert, last: Frederking} - variants: - - {first: Robert E., last: Frederking} -- canonical: {first: Dayne, last: Freitag} - id: dayne-freitag -- canonical: {first: André, last: Freitas} - variants: - - {first: Andre, last: Freitas} -- canonical: {first: Cláudia, last: Freitas} - variants: - - {first: Claudia, last: Freitas} -- canonical: {first: Karin, last: Friberg Heppin} - variants: - - {first: Karin Friberg, last: Heppin} - - {first: Karin, last: Friberg} -- canonical: {first: Carol, last: Friedman} - id: carol-friedman -- canonical: {first: Richard, last: Fritzson} - variants: - - {first: Rich, last: Fritzson} -- canonical: {first: Sónia, last: Frota} - id: sonia-frota -- canonical: {first: Eva, last: Fucikova} - variants: - - {first: Eva, last: Fučíková} -- canonical: {first: Maria, last: Fuentes} - variants: - - {first: Maria, last: Fuentes Fort} -- canonical: {first: Jun’ichi, last: Fukumoto} - variants: - - {first: Junichi, last: Fukumoto} -- canonical: {first: Shun-ya, last: Fukunaga} - variants: - - {first: Shunya, last: Fukunaga} -- canonical: {first: Sean A., last: Fulop} - variants: - - {first: Sean, last: Fulop} -- canonical: {first: Sadaoki, last: Furui} - id: sadaoki-furui -- canonical: {first: Robert P., last: Futrelle} - variants: - - {first: Robert, last: Futrelle} -- canonical: {first: Luana, last: Fǎgǎrǎşan} - variants: - - {first: Luana, last: Fagarasan} -- canonical: {first: Kiran, last: GVR} - variants: - - {first: Kiran, last: Gvr} -- canonical: {first: Raghu Pujitha, last: Gade} - variants: - - {first: Pujitha, last: Gade} -- canonical: {first: Benoit, last: Gaillard} - variants: - - {first: Benoît, last: Gaillard} -- canonical: {first: Robert, last: Gaizauskas} - id: robert-gaizauskas - variants: - - {first: Robert J., last: Gaizauskas} - - {first: Rob, last: Gaizauskas} -- canonical: {first: Nuria, last: Gala} - variants: - - {first: Núria, last: Gala} - - {first: Nùria, last: Gala} -- canonical: {first: Dimitrios, last: Galanis} - variants: - - {first: Dimitris, last: Galanis} -- canonical: {first: William A., last: Gale} - variants: - - {first: William, last: Gale} -- canonical: {first: Stephen L., last: Gallant} - variants: - - {first: Stephen, last: Gallant} -- canonical: {first: Ascension, last: Gallardo-Antolin} - variants: - - {first: Ascension, last: Gallardo} -- canonical: {first: Sylvain, last: Galliano} - id: sylvain-galliano -- canonical: {first: Björn, last: Gambäck} - variants: - - {first: Bjorn, last: Gamback} - - {first: Björn, last: Gämback} -- canonical: {first: Iñaki, last: Gaminde} - id: inaki-gaminde -- canonical: {first: Kok Wee, last: Gan} - variants: - - {first: Kok-Wee, last: Gan} -- canonical: {first: Surya, last: Ganesh} - variants: - - {first: Surya Ganesh, last: V} - - {first: Surya Ganesh, last: Veeravalli} -- canonical: {first: Barathi, last: Ganesh H. B.} - variants: - - {first: Barathi, last: Ganesh HB} -- canonical: {first: Vikas, last: Ganjigunte Ashok} - variants: - - {first: Vikas, last: Ashok} -- canonical: {first: Helena Hong, last: Gao} - variants: - - {first: Helena, last: Gao} -- canonical: {first: Zhao Ming, last: Gao} - variants: - - {first: Zhao-Ming, last: Gao} - - {first: Zhao-ming, last: Gao} -- canonical: {first: Radovan, last: Garabík} - variants: - - {first: Radovan, last: Garabik} -- canonical: {first: Fernando, last: Garcia} - variants: - - {first: Fernando, last: García-Granada} - - {first: Fernando, last: García} -- canonical: {first: Marie-Neige, last: Garcia} - id: marie-neige-garcia -- canonical: {first: Jorge, last: Garcia Flores} - variants: - - {first: Jorge, last: García Flores} - - {first: Jorge J., last: García Flores} -- canonical: {first: Alberto, last: Garcia-Duran} - variants: - - {first: Alberto, last: García-Durán} -- canonical: {first: Carmen, last: Garcia-Mateo} - variants: - - {first: Carmen, last: García-Mateo} -- canonical: {first: Gonçal V., last: Garcés Díaz-Munío} - orcid: 0000-0002-2594-5858 -- canonical: {first: Mar, last: García} - id: mar-garcia -- canonical: {first: José M., last: García Miguel} - variants: - - {first: José M., last: García-Miguel} -- canonical: {first: Marcos, last: García Salido} - variants: - - {first: Marcos, last: García-Salido} -- canonical: {first: Miguel Ángel, last: García-Cumbreras} - variants: - - {first: M. Ángel, last: García} - - {first: Miguel, last: García-Cumbreras} - - {first: Miguel Á., last: García Cumbreras} -- canonical: {first: Mercedes, last: García-Martínez} - variants: - - {first: Mercedes García, last: Martínez} -- canonical: {first: Ana, last: García-Serrano} - variants: - - {first: Ana M., last: García-Serrano} -- canonical: {first: Diogo, last: Glória-Silva} - orcid: 0000-0002-4420-7455 - institution: NOVA University of Lisbon - School of Science and Technology - variants: - - {first: Diogo F. C., last: Silva} -- canonical: {first: Ismael, last: García-Varea} - variants: - - {first: Ismael García, last: Varea} - - {first: Ismael, last: García Varea} -- canonical: {first: Manuel, last: García-Vega} - variants: - - {first: Manuel, last: García} -- canonical: {first: Roberto, last: Garigliano} - id: roberto-garigliano -- canonical: {first: John S., last: Garofolo} - id: john-s-garofolo - variants: - - {first: John, last: Garofolo} -- canonical: {first: Juan María, last: Garrido} - variants: - - {first: Juan Maria, last: Garrido} -- canonical: {first: Marta, last: Garrote-Salazar} - variants: - - {first: Marta, last: Garrote} -- canonical: {first: Paul H., last: Garthwaite} - variants: - - {first: Paul, last: Garthwaite} - - {first: Paul H, last: Garthwaite} -- canonical: {first: E. Gabriela, last: Garza} - variants: - - {first: Gabriela, last: Garza} -- canonical: {first: Aina, last: Garí Soler} - variants: - - {first: Aina Garí, last: Soler} -- canonical: {first: Milica, last: Gasic} - variants: - - {first: Milica, last: Gašić} -- canonical: {first: Donna, last: Gates} - variants: - - {first: Donna M., last: Gates} -- canonical: {first: Maíra, last: Gatti} - variants: - - {first: Maira, last: Gatti} -- canonical: {first: Eric, last: Gaussier} - variants: - - {first: Éric, last: Gaussier} -- canonical: {first: Akash Kumar, last: Gautam} - variants: - - {first: Akash, last: Gautam} -- canonical: {first: Gauri Shankar, last: Gautam} - variants: - - {first: Gauri, last: S. Gautam} -- canonical: {first: Marsal, last: Gavalda} - variants: - - {first: Marsal, last: Gavaldà} -- canonical: {first: Maria, last: Gavrilidou} - id: maria-gavrilidou -- canonical: {first: Jean Mark, last: Gawron} - id: jean-mark-gawron - variants: - - {first: Mark, last: Gawron} - - {first: J. Mark, last: Gawron} -- canonical: {first: Barbara, last: Gawronska} - variants: - - {first: Barbara, last: Gawronska-Werngren} - - {first: Barbara, last: Gawrońska-Werngren} -- canonical: {first: Claudia, last: Gdaniec} - id: claudia-gdaniec -- canonical: {first: Binyam Gebrekidan, last: Gebre} - variants: - - {first: Binyam, last: Gebre} -- canonical: {first: T. V., last: Geetha} - variants: - - {first: Geetha, last: T V} - - {first: T V, last: Geetha} -- canonical: {first: Maayan, last: Geffet} - variants: - - {first: Maayan, last: Zhitomirsky-Geffet} -- canonical: {first: Johanna, last: Geiß} - variants: - - {first: Johanna, last: Geiss} -- canonical: {first: Alexander, last: Gelbukh} - variants: - - {first: Alexander F., last: Gelbukh} -- canonical: {first: Debela Tesfaye, last: Gemechu} - variants: - - {first: Debela, last: Tesfaye} -- canonical: {first: Jort Florent, last: Gemmeke} - variants: - - {first: Jort F., last: Gemmeke} - - {first: Jort, last: Gemmeke} -- canonical: {first: Cédric, last: Gendrot} - variants: - - {first: Cedric, last: Gendrot} -- canonical: {first: Edouard, last: Geoffrois} - id: edouard-geoffrois -- canonical: {first: Lucas, last: Georges Gabriel Charpentier} - variants: - - {first: Lucas, last: Charpentier} -- canonical: {first: Panayiotis, last: Georgiou} - variants: - - {first: Panayiotis G., last: Georgiou} -- canonical: {first: Matthew, last: Gerber} - variants: - - {first: Matt, last: Gerber} - - {first: Matthew S., last: Gerber} - - {first: Matthew, last: Garber} -- canonical: {first: Abigail S., last: Gertner} - variants: - - {first: Abigail, last: Gertner} -- canonical: {first: Pablo, last: Gervás} - id: pablo-gervas -- canonical: {first: Gholamreza, last: Ghassem-Sani} - variants: - - {first: Gholamreza, last: Ghassem-sani} - - {first: Gholamreza, last: Ghasem-Sani} -- canonical: {first: Samik, last: Ghosh} - variants: - - {first: Samik, last: Gosh} -- canonical: {first: Soumya Sankar, last: Ghosh} - variants: - - {first: Soumya, last: Ghosh} -- canonical: {first: Egidio, last: Giachin} - id: egidio-giachin -- canonical: {first: Daniela, last: Gifu} - variants: - - {first: Daniela, last: Gîfu} -- canonical: {first: Helen M., last: Gigley} - variants: - - {first: Helen, last: Gigley} -- canonical: {first: Luca, last: Gilardoni} - id: luca-gilardoni -- canonical: {first: Laurent, last: Gillard} - id: laurent-gillard -- canonical: {first: Dan, last: Gillick} - variants: - - {first: Daniel, last: Gillick} -- canonical: {first: Laurence, last: Gillick} - variants: - - {first: Laurence S., last: Gillick} -- canonical: {first: Jesús, last: Giménez} - variants: - - {first: Jesus, last: Gimenez} -- canonical: {first: Mireia, last: Ginestí-Rosell} - variants: - - {first: Mireia, last: Ginestí Rosell} -- canonical: {first: Alexandru-Lucian, last: Ginsca} - variants: - - {first: Alexandru, last: Ginsca} - - {first: Alexandru-Lucian, last: Gînscă} -- canonical: {first: Voula, last: Giouli} - id: voula-giouli -- canonical: {first: Emiliano, last: Giovannetti} - variants: - - {first: Emiliano, last: Giovanetti} -- canonical: {first: Joan, last: Giralt Duran} - variants: - - {first: Joan Giralt, last: Duran} -- canonical: {first: Christian, last: Girardi} - id: christian-girardi -- canonical: {first: Roxana, last: Girju} - variants: - - {first: Roxana, last: Gîrju} -- canonical: {first: Herbert, last: Gish} - variants: - - {first: Herb, last: Gish} -- canonical: {first: Claudio, last: Giuliano} - id: claudio-giuliano -- canonical: {first: Sheila R., last: Glasbey} - id: sheila-r-glasbey - variants: - - {first: Sheila, last: Glasbey} -- canonical: {first: James, last: Glass} - variants: - - {first: James R., last: Glass} -- canonical: {first: Michael, last: Glass} - variants: - - {first: Michael R., last: Glass} -- canonical: {first: Meghan, last: Glenn} - variants: - - {first: Meghan Lammie, last: Glenn} -- canonical: {first: Alfio, last: Gliozzo} - variants: - - {first: Alfio, last: Massimiliano Gliozzo} - - {first: Alfio Massimiliano, last: Gliozzo} - - {first: Alfio M., last: Gliozzo} -- canonical: {first: Daniele, last: Godard} - variants: - - {first: Danièle, last: Godard} -- canonical: {first: Guenther, last: Goerz} - id: guenther-goerz -- canonical: {first: Sebastian, last: Goeser} - id: sebastian-goeser -- canonical: {first: Chooi-Ling, last: Goh} - variants: - - {first: Chooi Ling, last: Goh} -- canonical: {first: Koldo, last: Gojenola} - id: koldo-gojenola - variants: - - {first: Koldobika, last: Gojenola} - - {first: Koldo, last: Gojenola Galletebeitia} -- canonical: {first: Adele, last: Goldberg} - variants: - - {first: Adele E., last: Goldberg} -- canonical: {first: Andrew B., last: Goldberg} - variants: - - {first: Andrew, last: Goldberg} -- canonical: {first: Eli, last: Goldberg} - id: eli-goldberg -- canonical: {first: Jade, last: Goldstein} - variants: - - {first: Jade, last: Goldstein-Stewart} -- canonical: {first: Sharon, last: Goldwater} - variants: - - {first: Sharon J., last: Goldwater} -- canonical: {first: Sujatha Das, last: Gollapalli} - variants: - - {first: Sujatha, last: Das Gollapalli} - - {first: Sujatha, last: Das} -- canonical: {first: Helena, last: Gomez} - variants: - - {first: Helena, last: Gómez} -- canonical: {first: Jose Maria, last: Gomez-Hidalgo} - variants: - - {first: Jose Maria Gomez, last: Hidalgo} - - {first: José M. Gómez, last: Hidalgo} -- canonical: {first: Junping, last: Gong} - variants: - - {first: Jun-ping, last: Gong} -- canonical: {first: Zhengxian, last: Gong} - variants: - - {first: ZhengXian, last: Gong} -- canonical: {first: Graciela, last: Gonzalez} - variants: - - {first: Graciela, last: Gonzalez-Hernandez} -- canonical: {first: Meritxell, last: Gonzàlez} - id: meritxell-gonzalez - variants: - - {first: Meritxell, last: González} -- canonical: {first: Edgar, last: Gonzàlez Pellicer} - variants: - - {first: Edgar, last: Gonzàlez} -- canonical: {first: Fabio A., last: González} - variants: - - {first: Fabio, last: González} -- canonical: {first: Aitor, last: González-Agirre} - variants: - - {first: Aitor, last: Gonzalez-Agirre} -- canonical: {first: Francisco Javier, last: González-Castaño} - variants: - - {first: Francisco J., last: González-Castaño} -- canonical: {first: Ana, last: González-Ledesma} - variants: - - {first: Ana, last: Gonzalez} -- canonical: {first: Joaquín, last: González-Rodríguez} - variants: - - {first: Joaquin, last: Gonzalez-Rodriguez} -- canonical: {first: Jesús, last: González-Rubio} - variants: - - {first: Jesús, last: González Rubio} -- canonical: {first: Hugo, last: Gonçalo Oliveira} - variants: - - {first: Hugo Gonçalo, last: Oliveira} -- canonical: {first: Patricia, last: Gonçalves} - variants: - - {first: Patricia Nunes, last: Gonçalves} - - {first: Patrícia, last: Gonçalves} -- canonical: {first: Teresa, last: Gonçalves} - variants: - - {first: Teresa, last: Goncalves} -- canonical: {first: David, last: Goodine} - id: david-goodine -- canonical: {first: Joshua, last: Goodman} - variants: - - {first: Joshua T., last: Goodman} -- canonical: {first: Michael Wayne, last: Goodman} - variants: - - {first: Michael, last: Goodman} -- canonical: {first: Noah, last: Goodman} - variants: - - {first: Noah D., last: Goodman} -- canonical: {first: Andrew, last: Gordon} - variants: - - {first: Andrew S., last: Gordon} -- canonical: {first: Joshua B., last: Gordon} - variants: - - {first: Joshua, last: Gordon} -- canonical: {first: Yonael, last: Gorfu} - id: yonael-gorfu -- canonical: {first: Allen L., last: Gorin} - variants: - - {first: Allen, last: Gorin} -- canonical: {first: Philip, last: Gorinski} - variants: - - {first: Philip John, last: Gorinski} -- canonical: {first: Matthew R., last: Gormley} - variants: - - {first: Matthew, last: Gormley} -- canonical: {first: Genevieve, last: Gorrell} - id: genevieve-gorrell -- canonical: {first: Didzis, last: Gosko} - variants: - - {first: Didzis, last: Goško} -- canonical: {first: Thilo, last: Gotz} - variants: - - {first: Thilo, last: Götz} -- canonical: {first: Jérôme, last: Goulian} - id: jerome-goulian -- canonical: {first: Cyril, last: Goutte} - id: cyril-goutte -- canonical: {first: Arthur C., last: Graesser} - variants: - - {first: Art, last: Graesser} - - {first: Arthur, last: Graesser} -- canonical: {first: Joseph F., last: Grafsgaard} - variants: - - {first: Joseph, last: Grafsgaard} -- canonical: {first: Naida, last: Graham} - variants: - - {first: Naida L., last: Graham} -- canonical: {first: Filip, last: Gralinski} - variants: - - {first: Filip, last: Graliński} -- canonical: {first: Ramon, last: Granell} - variants: - - {first: Ramón, last: Granell} -- canonical: {first: Robert, last: Granville} - variants: - - {first: Robert Alan, last: Granville} -- canonical: {first: Agustin, last: Gravano} - variants: - - {first: Agustín, last: Gravano} -- canonical: {first: Édouard, last: Grave} - variants: - - {first: Edouard, last: Grave} -- canonical: {first: Guillaume, last: Gravier} - id: guillaume-gravier -- canonical: {first: João, last: Graça} - variants: - - {first: Joao, last: Graca} - - {first: João V., last: Graça} -- canonical: {first: Jordan R., last: Green} - variants: - - {first: Jordan, last: Green} -- canonical: {first: Matthew J., last: Green} - variants: - - {first: Matthew, last: Green} -- canonical: {first: Nancy, last: Green} - variants: - - {first: Nancy L., last: Green} -- canonical: {first: Stephen J., last: Green} - variants: - - {first: Stephen, last: Green} - - {first: Stephen J, last: Green} -- canonical: {first: Mark A., last: Greenwood} - variants: - - {first: Mark, last: Greenwood} -- canonical: {first: Edward, last: Grefenstette} - id: edward-grefenstette -- canonical: {first: Michelle, last: Gregory} - id: michelle-gregory - variants: - - {first: Michelle L., last: Gregory} -- canonical: {first: Warren, last: Greiff} - variants: - - {first: Warren R., last: Greiff} -- canonical: {first: Thomas L., last: Griffiths} - variants: - - {first: Thomas, last: Griffiths} -- canonical: {first: Gintarė, last: Grigonytė} - variants: - - {first: Gintare, last: Grigonyte} - - {first: Gintarė, last: Grigonyte} -- canonical: {first: Ralph, last: Grishman} - id: ralph-grishman -- canonical: {first: Hendrik Johannes, last: Groenewald} - variants: - - {first: Hendrik J., last: Groenewald} -- canonical: {first: Leif, last: Groenqvist} - variants: - - {first: Leif, last: Gronqvist} -- canonical: {first: Maria Toporowska, last: Gronostaj} - variants: - - {first: Maria, last: Toporowska Gronostaj} -- canonical: {first: Jerneja, last: Gros} - variants: - - {first: Jerneja Žganec, last: Gros} -- canonical: {first: Justin H., last: Gross} - variants: - - {first: Justin, last: Gross} -- canonical: {first: Barbara J., last: Grosz} - variants: - - {first: Barbara, last: Grosz} -- canonical: {first: Laszlo, last: Grunfeld} - id: laszlo-grunfeld -- canonical: {first: Normunds, last: Gruzitis} - variants: - - {first: Normunds, last: Grūzītis} -- canonical: {first: Nicole, last: Grégoire} - variants: - - {first: Nicole, last: Gregoire} -- canonical: {first: Hung-Yan, last: Gu} - variants: - - {first: Hung-yan, last: Gu} -- canonical: {first: Franz, last: Guenthner} - id: franz-guenthner -- canonical: {first: Emiliano Raul, last: Guevara} - variants: - - {first: Emiliano, last: Guevara} -- canonical: {first: Pierre, last: Guillaume} - id: pierre-guillaume -- canonical: {first: Thierry, last: Guillotin} - id: thierry-guillotin -- canonical: {first: Curry I., last: Guinn} - variants: - - {first: Curry, last: Guinn} -- canonical: {first: José M., last: Guirao} - variants: - - {first: José María, last: Guirao} -- canonical: {first: Greg, last: Gul-rajani} - variants: - - {first: Greg, last: Gulrajani} -- canonical: {first: Omer Farukhan, last: Gunes} - variants: - - {first: Omer, last: Gunes} -- canonical: {first: Cheng-ming, last: Guo} - variants: - - {first: Cheng Ming, last: Guo} -- canonical: {first: Ying-Mei, last: Guo} - variants: - - {first: YingMei, last: Guo} -- canonical: {first: Yuqing, last: Guo} - variants: - - {first: Yuqing, last: Gao} -- canonical: {first: Zhicheng, last: Guo} - comment: Tsinghua - id: zhicheng-guo-tsinghua -- canonical: {first: Zhicheng, last: Guo} - comment: xidian - id: zhicheng-guo-xidian -- canonical: {first: Deepak, last: Gupta} - variants: - - {first: Deepak Kumar, last: Gupta} - - {first: Deepa, last: Gupta} -- canonical: {first: Naman K., last: Gupta} - variants: - - {first: Naman, last: Gupta} -- canonical: {first: Vineet, last: Gupta} - id: vineet-gupta -- canonical: {first: Antton, last: Gurrutxaga} - id: antton-gurrutxaga -- canonical: {first: Sofia, last: Gustafson-Capková} - variants: - - {first: Sofia, last: Gustafson Capková} -- canonical: {first: Louise, last: Guthrie} - id: louise-guthrie -- canonical: {first: E. Dario, last: Gutierrez} - variants: - - {first: Elkin, last: Darío Gutiérrez} - - {first: E. Darío, last: Gutiérrez} -- canonical: {first: Yoan, last: Gutiérrez} - variants: - - {first: Yoan, last: Gutiérrez Vázquez} -- canonical: {first: Gualberto A., last: Guzman} - variants: - - {first: Gualberto, last: Guzmán} -- canonical: {first: Francisco, last: Guzmán} - variants: - - {first: Francisco, last: Guzman} -- canonical: {first: Tibor, last: Gyimóthy} - id: tibor-gyimothy -- canonical: {first: José M., last: Gómez} - variants: - - {first: José Manuel, last: Gómez} - - {first: Jose Manuel, last: Gómez} - - {first: Jose M., last: Gomez} -- canonical: {first: Xavier, last: Gómez Guinovart} - variants: - - {first: Xavier, last: Gómez-Guinovart} -- canonical: {first: Naiara, last: Pérez} - orcid: 0000-0001-8648-0428 - institution: University of the Basque Country (UPV/EHU) - variants: - - {first: Naiara, last: Perez-Miguel} - - {first: Naiara, last: Miguel} -- canonical: {first: Asunción, last: Gómez-Pérez} - variants: - - {first: Asunción Gómez, last: Pérez} -- canonical: {first: José Manuel, last: Gómez-Pérez} - variants: - - {first: Jose Manuel, last: Gomez-Perez} -- canonical: {first: Anne, last: Göhring} - variants: - - {first: Anne, last: Goehring} -- canonical: {first: Memduh, last: Gökırmak} - variants: - - {first: Memduh, last: Gokirmak} -- canonical: {first: Jana, last: Götze} - variants: - - {first: Jana, last: Goetze} -- canonical: {first: Shachi, last: H. Kumar} - variants: - - {first: Shachi H, last: Kumar} -- canonical: {first: Eun Young, last: Ha} - variants: - - {first: Eun, last: Ha} - - {first: Eun Y., last: Ha} -- canonical: {first: Le Quan, last: Ha} - variants: - - {first: Le Q, last: Ha} -- canonical: {first: Quang Thuy, last: Ha} - variants: - - {first: Quang-Thuy, last: Ha} -- canonical: {first: Yaakov, last: HaCohen-Kerner} - variants: - - {first: Yaakov, last: Hacohen-Kerner} -- canonical: {first: Anne, last: Haake} - variants: - - {first: Anne R., last: Haake} -- canonical: {first: Salah, last: Haamid} - id: salah-haamid -- canonical: {first: Andrew, last: Haas} - variants: - - {first: Andrew R., last: Haas} -- canonical: {first: Christopher, last: Habel} - variants: - - {first: Christopher U., last: Habel} -- canonical: {first: Benoit, last: Habert} - id: benoit-habert - variants: - - {first: Benoît, last: Habert} -- canonical: {first: Kadri, last: Hacioglu} - id: kadri-hacioglu -- canonical: {first: Bassam, last: Haddad} - id: bassam-haddad -- canonical: {first: Nicholas J., last: Haddock} - variants: - - {first: Nicholas, last: Haddock} -- canonical: {first: Widad Mustafa El, last: Hadi} - id: widad-mustafa-el-hadi - variants: - - {first: Widad Mustafa, last: El Hadi} - - {first: Widad, last: Mustafa El Hadi} -- canonical: {first: Mohamed Nassime, last: Hadjadj} - variants: - - {first: Mohamed, last: Hadjadj} -- canonical: {first: Lamia, last: Hadrich Belguith} - variants: - - {first: Lamia Hadrich, last: Belguith} - - {first: Lamia, last: Hadrich-Belguith} - - {first: Lamia, last: Belguith} - - {first: Lamia, last: Belguith Hadrich} -- canonical: {first: Walter, last: Haeseryn} - id: walter-haeseryn -- canonical: {first: Nazila, last: Hafezi} - id: nazila-hafezi -- canonical: {first: Gholamreza, last: Haffari} - variants: - - {first: Reza, last: Haffari} -- canonical: {first: Younggyun, last: Hahm} - variants: - - {first: YoungGyun, last: Hahm} -- canonical: {first: Gus, last: Hahn-Powell} - variants: - - {first: Gustave, last: Hahn-Powell} -- canonical: {first: Negacy, last: Hailu} - variants: - - {first: Negacy D., last: Hailu} -- canonical: {first: Horst-Udo, last: Hain} - id: horst-udo-hain -- canonical: {first: Jan, last: Hajic} - id: jan-hajic - similar: [jan-hajic-jr] - variants: - - {first: Jan, last: Hajič} -- canonical: {first: Eva, last: Hajicova} - id: eva-hajicova - variants: - - {first: Eva, last: Hajicová} - - {first: Eva, last: Hajičová} -- canonical: {first: Jan, last: Hajič jr.} - id: jan-hajic-jr - similar: [jan-hajic] -- canonical: {first: Dilek, last: Hakkani-Tur} - id: dilek-hakkani-tur - variants: - - {first: Dilek, last: Hakkani-Tür} - - {first: Dilek Zeynep, last: Hakkani} -- canonical: {first: John, last: Hale} - variants: - - {first: John T., last: Hale} -- canonical: {first: Keith, last: Hall} - variants: - - {first: Keith B., last: Hall} -- canonical: {first: Mark, last: Hall} - variants: - - {first: Mark Michael, last: Hall} -- canonical: {first: Patrick, last: Haller} - id: patrick-haller-zurich - note: University of Zurich - orcid: 0000-0002-8968-7587 -- canonical: {first: Susan, last: Haller} - id: susan-haller - variants: - - {first: Susan M., last: Haller} -- canonical: {first: Péter, last: Halácsy} - variants: - - {first: Péter, last: Halácsky} -- canonical: {first: Olivier, last: Hamon} - id: olivier-hamon -- canonical: {first: Thierry, last: Hamon} - id: thierry-hamon -- canonical: {first: Julien, last: Hamonic} - id: julien-hamonic -- canonical: {first: Chung-hye, last: Han} - variants: - - {first: Chung-Hye, last: Han} - - {first: Chunghye, last: Han} -- canonical: {first: HyoJung, last: Han} - variants: - - {first: Hou Jeung, last: Han} -- canonical: {first: Jingguang, last: Han} - variants: - - {first: Jing Guang, last: Han} -- canonical: {first: Kenji, last: Hanakata} - id: kenji-hanakata -- canonical: {first: Philip, last: Hanna} - id: philip-hanna -- canonical: {first: Dorte Haltrup, last: Hansen} - variants: - - {first: Dorte H., last: Hansen} -- canonical: {first: Silvia, last: Hansen-Schirra} - variants: - - {first: Silvia, last: Hansen} -- canonical: {first: Sanda, last: Harabagiu} - variants: - - {first: Sanda M., last: Harabagiu} -- canonical: {first: Robert M., last: Haralick} - variants: - - {first: Robert, last: Haralick} -- canonical: {first: Mary, last: Harper} - id: mary-harper - variants: - - {first: Mary P., last: Harper} -- canonical: {first: Phil, last: Harrison} - id: phil-harrison - variants: - - {first: Philip, last: Harrison} -- canonical: {first: Anthony, last: Hartley} - id: anthony-hartley - variants: - - {first: Anthony F., last: Hartley} -- canonical: {first: Matthias, last: Hartung} - id: matthias-hartung -- canonical: {first: Md. Maruf, last: Hasan} - variants: - - {first: Md Maruf, last: Hasan} - - {first: Maruf, last: Hasan} -- canonical: {first: Sadid A., last: Hasan} - variants: - - {first: Sadid, last: Hasan} -- canonical: {first: Saša, last: Hasan} - variants: - - {first: Sasa, last: Hasan} -- canonical: {first: Tatsunori B., last: Hashimoto} - variants: - - {first: Tatsunori, last: Hashimoto} -- canonical: {first: Koiti, last: Hasida} - variants: - - {first: Kôiti, last: Hasida} -- canonical: {first: Ahmed, last: Hassan} - variants: - - {first: Ahmed Hassan, last: Awadallah} -- canonical: {first: Hany, last: Hassan Awadalla} - variants: - - {first: Hany, last: Hassan} -- canonical: {first: Helen, last: Hastie} - variants: - - {first: Helen Wright, last: Hastie} -- canonical: {first: Alexander G., last: Hauptmann} - variants: - - {first: Alex, last: Hauptmann} - - {first: Alexander, last: Hauptmann} -- canonical: {first: Roland R., last: Hausser} - variants: - - {first: Roland, last: Hausser} -- canonical: {first: Annette, last: Hautli} - variants: - - {first: Annette, last: Hautli-Janisz} -- canonical: {first: Jiří, last: Havelka} - variants: - - {first: Jiri, last: Havelka} -- canonical: {first: Jennifer, last: Hay} - variants: - - {first: Jennifer B., last: Hay} -- canonical: {first: Yoshihiko, last: Hayashi} - id: yoshihiko-hayashi -- canonical: {first: Cory, last: Hayes} - variants: - - {first: Cory J., last: Hayes} -- canonical: {first: Jer, last: Hayes} - variants: - - {first: Jeremiah, last: Hayes} -- canonical: {first: Timothy J., last: Hazen} - variants: - - {first: T. J., last: Hazen} -- canonical: {first: Patrick, last: Healey} - variants: - - {first: Pat, last: Healey} - - {first: Patrick G. T., last: Healey} - - {first: Patrick G.T., last: Healey} -- canonical: {first: Marti A., last: Hearst} - variants: - - {first: Marti, last: Hearst} -- canonical: {first: Peter A., last: Heeman} - variants: - - {first: Peter, last: Heeman} -- canonical: {first: George E., last: Heidorn} - id: george-e-heidorn -- canonical: {first: Katarina, last: Heimann Mühlenbock} - variants: - - {first: Katarina, last: Mühlenbock} -- canonical: {first: Pascale, last: Feldkamp} - institution: Aarhus University - orcid: 0000-0002-2434-4268 - variants: - - {first: Pascale, last: Moreira} - - {first: Pascale Feldkamp, last: Moreira} -- canonical: {first: Jindřich, last: Helcl} - variants: - - {first: Jindrich, last: Helcl} -- canonical: {first: Randall A., last: Helzerman} - id: randall-a-helzerman -- canonical: {first: Christian F., last: Hempelmann} - variants: - - {first: Christian, last: Hempelmann} -- canonical: {first: Charles T., last: Hemphill} - variants: - - {first: Charles, last: Hemphill} -- canonical: {first: James, last: Henderson} - variants: - - {first: James B., last: Henderson} -- canonical: {first: John, last: Henderson} - variants: - - {first: John C., last: Henderson} -- canonical: {first: James, last: Hendler} - variants: - - {first: James A., last: Hendler} -- canonical: {first: Robert J., last: Hendley} - variants: - - {first: Robert, last: Hendley} -- canonical: {first: Gary G., last: Hendrix} - variants: - - {first: Gary, last: Hendrix} -- canonical: {first: Enrique, last: Henestroza Anguiano} - variants: - - {first: Enrique Henestroza, last: Anguiano} -- canonical: {first: Peter Juel, last: Henrichsen} - variants: - - {first: Peter, last: Juel Henrichsen} -- canonical: {first: Carlos, last: Henríquez} - variants: - - {first: Carlos, last: Henriquez} - - {first: Carlos A., last: Henríquez Q.} -- canonical: {first: Renate, last: Henschel} - id: renate-henschel -- canonical: {first: Aurélie, last: Herbelot} - variants: - - {first: Aurelie, last: Herbelot} -- canonical: {first: Amaç, last: Herdaǧdelen} - variants: - - {first: Amaç, last: Herdağdelen} -- canonical: {first: Myriam, last: Hernandez} - variants: - - {first: Myriam, last: Hernández A} - - {first: Myriam, last: Hernández} -- canonical: {first: Daniel, last: Hernandez-Lopez} - variants: - - {first: Daniel Hernández, last: López} -- canonical: {first: Inmaculada, last: Hernáez} - id: inmaculada-hernaez - variants: - - {first: Inmaculada, last: Hernaez} - - {first: Inma, last: Hernaez} - - {first: Inma, last: Hernáez} -- canonical: {first: Gregorio, last: Hernández} - id: gregorio-hernandez - variants: - - {first: Gregorio, last: Hernandez} -- canonical: {first: Luis, last: Hernández} - variants: - - {first: Luis Hernández, last: Gomez} - - {first: Luis Hernández, last: Gómez} - - {first: Luis A., last: Hernandez} - - {first: Luis A., last: Hernández} - - {first: Luis A. Hernández, last: Gómez} -- canonical: {first: Adolfo, last: Hernández H.} - variants: - - {first: Adolfo, last: Hernández} -- canonical: {first: John R., last: Hershey} - variants: - - {first: John, last: Hershey} -- canonical: {first: James, last: Hieronymus} - id: james-hieronymus -- canonical: {first: Almut Silja, last: Hildebrand} - variants: - - {first: Silja, last: Hildebrand} - - {first: Almut, last: Hildebrand} -- canonical: {first: Lucas Welter, last: Hilgert} - variants: - - {first: Lucas, last: Hilgert} -- canonical: {first: Robin L., last: Hill} - variants: - - {first: Robin, last: Hill} -- canonical: {first: Dustin, last: Hillard} - id: dustin-hillard -- canonical: {first: Donald, last: Hindle} - id: donald-hindle - variants: - - {first: Don, last: Hindle} -- canonical: {first: Elizabeth A., last: Hinkelman} - variants: - - {first: Elizabeth, last: Hinkelman} -- canonical: {first: Erhard, last: Hinrichs} - variants: - - {first: Erhard W., last: Hinrichs} -- canonical: {first: Marie, last: Hinrichs} - variants: - - {first: Marie, last: Boyle-Hinrichs} -- canonical: {first: Hideki, last: Hirakawa} - id: hideki-hirakawa -- canonical: {first: Julia, last: Hirschberg} - variants: - - {first: Julia B., last: Hirschberg} -- canonical: {first: Lynette, last: Hirschman} - id: lynette-hirschman - variants: - - {first: Lynette, last: Hirshman} -- canonical: {first: Toru, last: Hitaka} - variants: - - {first: Tooru, last: Hitaka} -- canonical: {first: Janet, last: Hitzeman} - id: janet-hitzeman -- canonical: {first: Barbora, last: Hladká} - id: barbora-hladka - variants: - - {first: Barbora, last: Hladka} -- canonical: {first: Bao Quoc, last: Ho} - variants: - - {first: Quoc, last: Ho} - - {first: Quoc, last: Ho Bao} -- canonical: {first: Hing-cheung, last: Ho} - variants: - - {first: Hing-Cheung, last: Ho} -- canonical: {first: Heng, last: Wang} - comment: University of Sydney - institution: University of Sydney - orcid: 0009-0009-5473-5751 - id: heng-wang-sydney -- canonical: {first: Heng, last: Wang} - comment: May refer to several people - id: heng-wang -- canonical: {first: Tu-Bao, last: Ho} - variants: - - {first: Tu Bao, last: Ho} -- canonical: {first: Lydia-Mai, last: Ho-Dac} - variants: - - {first: Mai, last: Ho-dac} -- canonical: {first: Jerry R., last: Hobbs} - id: jerry-r-hobbs - variants: - - {first: Jerry, last: Hobbs} -- canonical: {first: Beth Ann, last: Hockey} - id: beth-ann-hockey - variants: - - {first: Beth A., last: Hockey} - - {first: Beth, last: Hockey} -- canonical: {first: Edward, last: Hoenkamp} - variants: - - {first: Eduard, last: Hoenkamp} -- canonical: {first: Wolfgang, last: Hoeppner} - id: wolfgang-hoeppner -- canonical: {first: Anja, last: Hoethker} - variants: - - {first: Anja, last: Höthker} -- canonical: {first: Holger, last: Hoffmann} - variants: - - {first: Holger, last: Hoffman} -- canonical: {first: Raphael, last: Hoffmann} - variants: - - {first: Raphael, last: Hoffman} -- canonical: {first: Th. R., last: Hofmann} - variants: - - {first: T. R., last: Hofmann} -- canonical: {first: Martin, last: Hofmann--Apitius} - variants: - - {first: Martin, last: Hofmann-Apitius} -- canonical: {first: Chris, last: Hokamp} - variants: - - {first: Christopher, last: Hokamp} -- canonical: {first: Tomáš, last: Holan} - variants: - - {first: Tomas, last: Holan} -- canonical: {first: Natsuko, last: Holden} - id: natsuko-holden -- canonical: {first: Gordana Ilić, last: Holen} - variants: - - {first: Gordana Ilic, last: Holen} -- canonical: {first: Hsiao-Wuen, last: Hon} - id: hsiao-wuen-hon -- canonical: {first: Jia-Fei, last: Hong} - variants: - - {first: Jia-Fei, last: Hung} -- canonical: {first: Philip, last: Hoole} - variants: - - {first: Phil, last: Hoole} -- canonical: {first: Heather, last: Horsfall} - id: heather-horsfall -- canonical: {first: Tamás, last: Horváth} - id: tamas-horvath -- canonical: {first: Iris, last: Hoser} - id: iris-hoser -- canonical: {first: Veronique, last: Hoste} - variants: - - {first: Véronique, last: Hoste} -- canonical: {first: Wen-Juan, last: Hou} - variants: - - {first: Wen, last: Juan Hou} - - {first: Juan, last: Wen} -- canonical: {first: Eduard, last: Hovy} - variants: - - {first: Eduard H., last: Hovy} - - {first: Ed, last: Hovy} -- canonical: {first: Blake, last: Howald} - variants: - - {first: Blake Stephen, last: Howald} -- canonical: {first: David M., last: Howcroft} - variants: - - {first: David, last: Howcroft} -- canonical: {first: Frederick M., last: Hoyt} - variants: - - {first: Frederick, last: Hoyt} -- canonical: {first: Daniel, last: Hromada} - variants: - - {first: Daniel Devatman, last: Hromada} - - {first: Daniel, last: Devatman Hromada} -- canonical: {first: Estevam R., last: 'Hruschka, Jr.'} - variants: - - {first: Estevam R., last: Hruschka Jr.} -- canonical: {first: Hung-ting, last: Hsieh} - variants: - - {first: Hung-Ting, last: Hsieh} -- canonical: {first: Shelley Ching-Yu, last: Hsieh} - variants: - - {first: Ching-yu, last: Hsieh} - - {first: Shelley Ching-yu, last: Hsieh} - - {first: Ching-yu Shelley, last: Hsieh} -- canonical: {first: Shu-Kai, last: Hsieh} - variants: - - {first: Shu-kai, last: Hsieh} - - {first: ShuKai, last: Hsieh} -- canonical: {first: Wen-Chi, last: Hsien} - variants: - - {first: Wen-Chi, last: Hsie} -- canonical: {first: Bo-June (Paul), last: Hsu} - variants: - - {first: Bo-june Paul, last: Hsu} - - {first: Bo-June Paul, last: Hsu} -- canonical: {first: Chun-nan, last: Hsu} - variants: - - {first: Chun-Nan, last: Hsu} -- canonical: {first: Wen-Lian, last: Hsu} - variants: - - {first: Wen-lian, last: Hsu} -- canonical: {first: Yu-Ling Una, last: Hsu} - variants: - - {first: Yu-Ling, last: Hsu} -- canonical: {first: Dong Cheng, last: Hu} - variants: - - {first: Dong-Cheng, last: Hu} -- canonical: {first: An-Ta, last: Huang} - variants: - - {first: Anta, last: Huang} -- canonical: {first: Changning, last: Huang} - variants: - - {first: Chang-Ning, last: Huang} - - {first: Chang-ning, last: Huang} -- canonical: {first: Chung-Chi, last: Huang} - variants: - - {first: Chung-chi, last: Huang} -- canonical: {first: Degen, last: Huang} - variants: - - {first: De-Gen, last: Huang} -- canonical: {first: Eric H., last: Huang} - variants: - - {first: Eric, last: Huang} -- canonical: {first: Feng-Long, last: Huang} - variants: - - {first: Feng-Long, last: Hwang} -- canonical: {first: He-Yan, last: Huang} - variants: - - {first: He-yan, last: Huang} - - {first: Heyan, last: Huang} -- canonical: {first: Jin Hu, last: Huang} - variants: - - {first: JinHu, last: Huang} -- canonical: {first: Jui Ting, last: Huang} - variants: - - {first: Jui-Ting, last: Huang} -- canonical: {first: Lian′en, last: Huang} - variants: - - {first: Lian’en, last: Huang} -- canonical: {first: Qi-quan, last: Huang} - variants: - - {first: Qi-Quan, last: Huang} -- canonical: {first: Shih-Ting, last: Huang} - variants: - - {first: Shih-ting, last: Huang} - - {first: Shi-Ting, last: Huang} -- canonical: {first: Shuan-fan, last: Huang} - variants: - - {first: Shuan-Fan, last: Huang} -- canonical: {first: Ting-Hao, last: Huang} - variants: - - {first: Ting-Hao ‘Kenneth’, last: Huang} - - {first: Ting-Hao Kenneth, last: Huang} -- canonical: {first: Xiangji, last: Huang} - variants: - - {first: Jimmy Xiangji, last: Huang} -- canonical: {first: Xuan-Jing, last: Huang} - variants: - - {first: Xuan-jing, last: Huang} - - {first: Xuanjing, last: Huang} -- canonical: {first: Xuedong, last: Huang} - id: xuedong-huang -- canonical: {first: Jing, last: Huang} - id: jing-huang-stanford - orcid: 0000-0001-9301-9410 - comment: May refer to many people -- canonical: {first: Jing, last: Huang} - id: jing-huang - comment: May refer to many people -- canonical: {first: Richard A., last: Hudson} - variants: - - {first: Richard, last: Hudson} -- canonical: {first: Manuela, last: Huerlimann} - variants: - - {first: Manuela, last: Hürlimann} - - {first: Manuela, last: Huerliman} -- canonical: {first: Mathew, last: Huerta-Enochian} - id: mathew-huerta-enochian -- canonical: {first: Kevin, last: Humphreys} - id: kevin-humphreys -- canonical: {first: Jeih-weih, last: Hung} - variants: - - {first: Jeih-Weih, last: Hung} -- canonical: {first: Kate, last: Hunicke-Smith} - id: kate-hunicke-smith -- canonical: {first: Dan, last: Hunter} - id: dan-hunter -- canonical: {first: Lawrence, last: Hunter} - variants: - - {first: Lawrence E., last: Hunter} -- canonical: {first: Lluís-F., last: Hurtado} - variants: - - {first: Lluís F., last: Hurtado} - - {first: LLuís-F., last: Hurtado} -- canonical: {first: Mazhar Mehdi, last: Hussain} - variants: - - {first: Mazhar, last: Hussain} -- canonical: {first: W. John, last: Hutchins} - variants: - - {first: John, last: Hutchins} -- canonical: {first: Christian, last: Huyck} - id: christian-huyck -- canonical: {first: Mei-Yuh, last: Hwang} - id: mei-yuh-hwang -- canonical: {first: Sebastian G. M., last: Händschke} - variants: - - {first: Sebastian G.M., last: Händschke} -- canonical: {first: Christian, last: Hänig} - variants: - - {first: Christian, last: Haenig} -- canonical: {first: Harald, last: Höge} - id: harald-hoge - variants: - - {first: Harald, last: Hoege} -- canonical: {first: Ali, last: Hürriyetoğlu} - variants: - - {first: Ali, last: Hurriyetoglu} - - {first: Ali, last: Hürriyetoǧlu} -- canonical: {first: Fidelia, last: Ibekwe-SanJuan} - variants: - - {first: Fidelia, last: Ibekwe-Sanjuan} -- canonical: {first: Nancy, last: Ide} - variants: - - {first: Nancy M., last: Ide} -- canonical: {first: Carlos A., last: Iglesias} - variants: - - {first: Carlos, last: Iglesias} -- canonical: {first: Suzana, last: Ilic} - variants: - - {first: Suzana, last: Ilić} -- canonical: {first: Sathish Reddy, last: Indurthi} - variants: - - {first: Sathish, last: Reddy} - - {first: Sathish, last: Indurthi} -- canonical: {first: Anton Karl, last: Ingason} - variants: - - {first: Anton K., last: Ingason} -- canonical: {first: Robert, last: Ingria} - id: robert-ingria -- canonical: {first: Diana, last: Inkpen} - variants: - - {first: Diana Zaiu, last: Inkpen} - - {first: Diana, last: Zaiu} -- canonical: {first: Leonid, last: Iomdin} - variants: - - {first: Leonid L., last: Iomdin} -- canonical: {first: Molly, last: Ireland} - variants: - - {first: Molly E., last: Ireland} -- canonical: {first: José, last: Iria} - variants: - - {first: Jose, last: Iria} -- canonical: {first: Mikel, last: Iruskieta} - id: mikel-iruskieta -- canonical: {first: Anas El, last: Isbihani} - variants: - - {first: Anas, last: El Isbihani} -- canonical: {first: Masato, last: Ishizaki} - id: masato-ishizaki -- canonical: {first: Aminul, last: Islam} - variants: - - {first: Md. Aminul, last: Islam} -- canonical: {first: Zahurul, last: Islam} - variants: - - {first: Zahrul, last: Islam} -- canonical: {first: Rezarta, last: Islamaj Dogan} - variants: - - {first: Rezarta, last: Islamaj Doğan} -- canonical: {first: David, last: Israel} - variants: - - {first: David J., last: Israel} -- canonical: {first: Shuichi, last: Itahashi} - variants: - - {first: Shuich, last: Itahashi} -- canonical: {first: Yukihiro, last: Itoh} - variants: - - {first: Yukihiro, last: Ito} -- canonical: {first: Abe, last: Ittycheriah} - id: abe-ittycheriah -- canonical: {first: Un-Gian, last: Iunn} - variants: - - {first: Un-gian, last: Iun} - - {first: Ún-giân, last: Iû} -- canonical: {first: Alexei V., last: Ivanov} - variants: - - {first: Alexei, last: Ivanov} -- canonical: {first: Krasimira, last: Ivanova} - variants: - - {first: Krassimira, last: Ivanova} -- canonical: {first: Lucja, last: Iwanska} - variants: - - {first: Lucja M., last: Iwanska} -- canonical: {first: Shun’ya, last: Iwasawa} - variants: - - {first: Shunya, last: Iwasawa} -- canonical: {first: Rubén, last: Izquierdo} - variants: - - {first: Ruben, last: Izquierdo Bevia} - - {first: Ruben, last: Izquierdo} -- canonical: {first: Litton, last: J Kurisinkel} - variants: - - {first: Litton J, last: Kurisinkel} -- canonical: {first: Eric, last: Jackson} - id: eric-jackson -- canonical: {first: Cassandra L., last: Jacobs} - variants: - - {first: Cassandra, last: Jacobs} -- canonical: {first: Paul S., last: Jacobs} - id: paul-s-jacobs - variants: - - {first: Paul, last: Jacobs} -- canonical: {first: T. Florian, last: Jaeger} - variants: - - {first: Florian, last: Jaeger} -- canonical: {first: Somayeh, last: Jafaritazehjani} - variants: - - {first: Somayeh, last: Jafaritazehjan} -- canonical: {first: Abhyuday, last: Jagannatha} - variants: - - {first: Abhyuday N, last: Jagannatha} -- canonical: {first: Michael E., last: Jahr} - variants: - - {first: Michael, last: Jahr} -- canonical: {first: Brage Ekroll, last: Jahren} - variants: - - {first: Brage, last: Jahren} -- canonical: {first: Siddharth, last: Jain} - variants: - - {first: Siddhanth, last: Jain} -- canonical: {first: Primož, last: Jakopin} - variants: - - {first: Primoz, last: Jakopin} -- canonical: {first: Anthony, last: Jameson} - id: anthony-jameson -- canonical: {first: Srinivasan, last: Janarthanam} - variants: - - {first: Srini, last: Janarthanam} -- canonical: {first: Jyh-Shing Roger, last: Jang} - variants: - - {first: Jyh-Shing, last: Jang} - - {first: Jyh-Shing, last: Roger Jang} - - {first: Roger Jyh-Shing, last: Jang} -- canonical: {first: Myung-Gil, last: Jang} - variants: - - {first: Myoung-Gil, last: Jang} -- canonical: {first: Seok Bae, last: Jang} - variants: - - {first: Seok B., last: Jang} -- canonical: {first: Peter, last: Jansen} - variants: - - {first: Peter J., last: Jansen} -- canonical: {first: Michèle, last: Jardino} - id: michele-jardino - variants: - - {first: Michele, last: Jardino} -- canonical: {first: Gaja, last: Jarosz} - variants: - - {first: Gaja E., last: Jarosz} -- canonical: {first: Timo, last: Jarvinen} - variants: - - {first: Timo, last: Järvinen} -- canonical: {first: Jisha P., last: Jayan} - variants: - - {first: Jisha, last: P Jayan} - - {first: Jisha P, last: Jayan} -- canonical: {first: Arun Kumar, last: Jayapal} - variants: - - {first: Arun, last: Jayapal} -- canonical: {first: Frederick, last: Jelinek} - id: frederick-jelinek - variants: - - {first: Fred, last: Jelinek} - - {first: Fredrick, last: Jelinek} -- canonical: {first: Karen, last: Jensen} - id: karen-jensen -- canonical: {first: Lars Juhl, last: Jensen} - variants: - - {first: Lars J., last: Jensen} -- canonical: {first: Hyung-Bae, last: Jeon} - variants: - - {first: Hyungbae, last: Jeon} -- canonical: {first: Girish Nath, last: Jha} - variants: - - {first: Girish, last: Jha} -- canonical: {first: Donghong, last: Ji} - variants: - - {first: DongHong, last: Ji} - - {first: Dong-Hong, last: Ji} - - {first: Dong Hong, last: Ji} -- canonical: {first: Paul D, last: Ji} - variants: - - {first: Paul D., last: Ji} -- canonical: {first: Jia-Yan, last: Jian} - variants: - - {first: Jia Yan, last: Jian} -- canonical: {first: Mike Tian-Jian, last: Jiang} - variants: - - {first: Tian-Jian, last: Jiang} -- canonical: {first: Zheng Ping, last: Jiang} - variants: - - {first: Zhengping, last: Jiang} -- canonical: {first: Antonio, last: Jimeno Yepes} - variants: - - {first: Antonio Jimeno, last: Yepes} - - {first: Antonio José, last: Jimeno Yepes} - - {first: Antonio, last: Jimeno-Yepes} -- canonical: {first: M. Dolores, last: Jiménez-López} - variants: - - {first: Maria Dolores, last: Jiménez-López} -- canonical: {first: Salud María, last: Jiménez-Zafra} - variants: - - {first: Salud M., last: Jiménez-Zafra} - - {first: Salud M., last: Jiménez Zafra} -- canonical: {first: Hongyan, last: Jing} - id: hongyan-jing -- canonical: {first: Petr, last: Jirku} - id: petr-jirku -- canonical: {first: Amanda C., last: Jobbins} - id: amanda-c-jobbins -- canonical: {first: Janne Bondi, last: Johannessen} - variants: - - {first: Janne, last: Bondi Johannessen} -- canonical: {first: Anders, last: Johannsen} - variants: - - {first: Anders, last: Johanssen} -- canonical: {first: David E., last: Johnson} - variants: - - {first: David, last: Johnson} -- canonical: {first: Helen L., last: Johnson} - variants: - - {first: Helen, last: Johnson} -- canonical: {first: Kristen, last: Johnson} - variants: - - {first: Kristen Marie, last: Johnson} -- canonical: {first: Michael T., last: Johnson} - id: michael-t-johnson -- canonical: {first: Rie, last: Johnson} - variants: - - {first: Rie, last: Ando} - - {first: Rie Kubota, last: Ando} -- canonical: {first: Roderick L., last: Johnson} - id: roderick-l-johnson -- canonical: {first: Michael, last: Johnston} - id: michael-johnston -- canonical: {first: Kristiina, last: Jokinen} - variants: - - {first: Päivi Kristiina, last: Jokinen} -- canonical: {first: Bevan, last: Jones} - variants: - - {first: Bevan K., last: Jones} - - {first: Bevan Keeley, last: Jones} -- canonical: {first: Christopher, last: Jones} - variants: - - {first: Chris, last: Jones} -- canonical: {first: Dominic R., last: Jones} - variants: - - {first: Dominic, last: Jones} -- canonical: {first: Douglas, last: Jones} - variants: - - {first: Douglas A., last: Jones} - - {first: Doug, last: Jones} -- canonical: {first: Gareth J. F., last: Jones} - variants: - - {first: Gareth J.F., last: Jones} -- canonical: {first: Mark, last: Jones} - variants: - - {first: Mark A., last: Jones} - - {first: Mark Alan, last: Jones} -- canonical: {first: Michael, last: Jones} - variants: - - {first: Michael P., last: Jones} -- canonical: {first: Steven JM, last: Jones} - variants: - - {first: Steven, last: Jones} -- canonical: {first: Clement, last: Jonquet} - variants: - - {first: Clément, last: Jonquet} -- canonical: {first: Michael I., last: Jordan} - variants: - - {first: Michael, last: Jordan} -- canonical: {first: Pamela, last: Jordan} - variants: - - {first: Pamela W., last: Jordan} -- canonical: {first: Alipio, last: Jorge} - variants: - - {first: Alípio, last: Jorge} -- canonical: {first: Aravind, last: Joshi} - id: aravind-joshi - variants: - - {first: Aravind K., last: Joshi} -- canonical: {first: Sachindra, last: Joshi} - variants: - - {first: Sachin, last: Joshi} -- canonical: {first: Shafiq, last: Joty} - variants: - - {first: Shafiq R., last: Joty} -- canonical: {first: Yun-Cheng, last: Ju} - variants: - - {first: Yun Cheng, last: Ju} -- canonical: {first: Alfons, last: Juan} - variants: - - {first: Alfons, last: Juan-Císcar} -- canonical: {first: Yau-Tarng, last: Juang} - variants: - - {first: Yau-Tang, last: Juang} -- canonical: {first: Jozef, last: Juhár} - variants: - - {first: Jozef, last: Juhar} -- canonical: {first: Cléo, last: Jullien} - variants: - - {first: Cleo, last: Jullien} -- canonical: {first: Han-Min, last: Jung} - variants: - - {first: Hanmin, last: Jung} -- canonical: {first: Sangkeun, last: Jung} - variants: - - {first: SangKeun, last: Jung} -- canonical: {first: Sung Young, last: Jung} - variants: - - {first: Sung-Young, last: Jung} -- canonical: {first: Simeon, last: Junker} - variants: - - {first: Simeon, last: Schüz} -- canonical: {first: Dan, last: Jurafsky} - variants: - - {first: Daniel, last: Jurafsky} -- canonical: {first: Filip, last: Jurcicek} - variants: - - {first: Filip, last: Jurčíček} -- canonical: {first: Marcel Adam, last: Just} - variants: - - {first: Marcel, last: Just} -- canonical: {first: Harri, last: Jäppinen} - id: harri-jappinen - variants: - - {first: Harri, last: Jappinen} -- canonical: {first: Arne, last: Jönsson} - variants: - - {first: Arne, last: Jonsson} -- canonical: {first: Brigitte, last: Jörg} - variants: - - {first: Brigitte, last: Jorg} -- canonical: {first: Bhadran V., last: K} - variants: - - {first: Bhadran, last: V K} - - {first: Bhadran V, last: K} -- canonical: {first: Heiki-Jaan, last: Kaalep} - variants: - - {first: Heiki Jaan, last: Kaalep} -- canonical: {first: Mijail, last: Kabadjov} - id: mijail-kabadjov - variants: - - {first: Mijail A., last: Kabadjov} - - {first: Mijail, last: Alexandrov-Kabadjov} -- canonical: {first: Michael B., last: Kac} - variants: - - {first: Michael, last: Kac} -- canonical: {first: Vladimír, last: Kadlec} - variants: - - {first: Vladimir, last: Kadlec} -- canonical: {first: Jeremy G., last: Kahn} - variants: - - {first: Jeremy, last: Kahn} -- canonical: {first: Łukasz, last: Kaiser} - variants: - - {first: Lukasz, last: Kaiser} -- canonical: {first: Michael, last: Kaisser} - variants: - - {first: Michael, last: Kaißer} -- canonical: {first: Ioannis, last: Kakadiaris} - variants: - - {first: Ioannis A., last: Kakadiaris} -- canonical: {first: Jun’ichi, last: Kakegawa} - variants: - - {first: Jun-ichi, last: Kakegawa} -- canonical: {first: Jugal, last: Kalita} - id: jugal-kalita - variants: - - {first: Jugal K., last: Kalita} -- canonical: {first: Rihards, last: Kalniņš} - variants: - - {first: Rihards, last: Kalnins} -- canonical: {first: Nanda, last: Kambhatla} - id: nanda-kambhatla - variants: - - {first: Nandakishore, last: Kambhatla} -- canonical: {first: Shin-ichiro, last: Kamei} - variants: - - {first: Shinichiro, last: Kamei} -- canonical: {first: Prathusha, last: Kameswara Sarma} - variants: - - {first: Prathusha, last: K Sarma} -- canonical: {first: Candace A., last: Kamm} - variants: - - {first: Candace, last: Kamm} -- canonical: {first: Bo-Yeong, last: Kang} - variants: - - {first: Bo-yeong, last: Kang} -- canonical: {first: Rose Catherine, last: Kanjirathinkal} - variants: - - {first: Rose, last: Catherine} -- canonical: {first: Ashvin, last: Kannan} - id: ashvin-kannan -- canonical: {first: Paul, last: Kantor} - variants: - - {first: Paul B., last: Kantor} -- canonical: {first: Cheng-yan, last: Kao} - variants: - - {first: Cheng-Yan, last: Kao} - - {first: Cheng Yan, last: Kao} -- canonical: {first: Ting-hui, last: Kao} - variants: - - {first: Ting-Hui, last: Kao} -- canonical: {first: Randy M., last: Kaplan} - variants: - - {first: Randy, last: Kaplan} -- canonical: {first: Ronald M., last: Kaplan} - variants: - - {first: Ronald, last: Kaplan} - - {first: Ron, last: Kaplan} -- canonical: {first: Jurgita, last: Kapočiūtė-Dzikienė} - variants: - - {first: Jurgita, last: Kapociute-Dzikiene} -- canonical: {first: Diman, last: Karagyozov} - variants: - - {first: Diman, last: Karagiozov} -- canonical: {first: Rafael - Michael, last: Karampatsis} - variants: - - {first: Rafael Michael, last: Karampatsis} -- canonical: {first: Vanja M., last: Karan} - variants: - - {first: Vanja Mladen, last: Karan} -- canonical: {first: David R., last: Karger} - variants: - - {first: David, last: Karger} -- canonical: {first: Kostas, last: Karpouzis} - id: kostas-karpouzis -- canonical: {first: Hideki, last: Kashioka} - id: hideki-kashioka -- canonical: {first: Robert T., last: Kasper} - variants: - - {first: Robert, last: Kasper} -- canonical: {first: Walter, last: Kasper} - id: walter-kasper -- canonical: {first: Rohit, last: Kate} - variants: - - {first: Rohit J., last: Kate} -- canonical: {first: Naoto, last: Kato} - variants: - - {first: Naoto, last: Katoh} -- canonical: {first: Yoshihide, last: Kato} - variants: - - {first: Yoshihide, last: Sato} -- canonical: {first: Graham, last: Katz} - variants: - - {first: E. Graham, last: Katz} -- canonical: {first: Jason, last: Katz-Brown} - variants: - - {first: Jason, last: Brown} -- canonical: {first: Ergina, last: Kavallieratou} - id: ergina-kavallieratou -- canonical: {first: Hisashi, last: Kawai} - variants: - - {first: Kawai, last: Hisashi} -- canonical: {first: Jun′ichi, last: Kazama} - variants: - - {first: Jun’ichi, last: Kazama} -- canonical: {first: Zdravko, last: Kačič} - variants: - - {first: Zdravko, last: Kacic} -- canonical: {first: John, last: Keane} - variants: - - {first: John, last: Kane} -- canonical: {first: Michael S., last: Kearns} - variants: - - {first: Michael, last: Kearns} -- canonical: {first: Gail M., last: Keenan} - variants: - - {first: Gail, last: Keenan} - - {first: Gail M, last: Keenan} -- canonical: {first: Thomas A., last: Keenan} - variants: - - {first: Thomas, last: Keenan} -- canonical: {first: Judy Anne, last: Kegl} - variants: - - {first: Judy, last: Kegl} -- canonical: {first: Andrew, last: Kehler} - variants: - - {first: Andy, last: Kehler} -- canonical: {first: Daniel, last: Keim} - variants: - - {first: Daniel A., last: Keim} -- canonical: {first: John, last: Kelleher} - variants: - - {first: John D., last: Kelleher} -- canonical: {first: Andre, last: Kempe} - variants: - - {first: André, last: Kempe} -- canonical: {first: Casey, last: Kennington} - variants: - - {first: Casey Redd, last: Kennington} -- canonical: {first: Fabio, last: Kepler} - id: fabio-kepler - variants: - - {first: Fabio N., last: Kepler} - - {first: Fabio Natanael, last: Kepler} -- canonical: {first: Sue J., last: Ker} - variants: - - {first: Sur-Jin, last: Ker} - - {first: Su-Jin, last: Ker} - - {first: Sue-Jin, last: Ker} - - {first: Sue-jin, last: Ker} -- canonical: {first: Katia Lida, last: Kermanidis} - variants: - - {first: Katia, last: Kermanidis} -- canonical: {first: Margaret, last: Kern} - variants: - - {first: Margaret L., last: Kern} -- canonical: {first: Stephan M., last: Kerpedjiev} - variants: - - {first: Stephan, last: Kerpedjiev} -- canonical: {first: Vlado, last: Keselj} - variants: - - {first: Vlado, last: Kešelj} -- canonical: {first: Fahad, last: Khan} - variants: - - {first: Anas Fahad, last: Khan} -- canonical: {first: Md. Anwarus Salam, last: Khan} - variants: - - {first: Khan Md. Anwarus, last: Salam} - - {first: Khan Md., last: Anwarus Salam} -- canonical: {first: Mohammed Arif, last: Khan} - variants: - - {first: Arif, last: Khan} - - {first: Arif Md., last: Khan} -- canonical: {first: Vikash, last: Khandelwal} - variants: - - {first: Vikas, last: Khandelwal} -- canonical: {first: Mitesh M., last: Khapra} - variants: - - {first: Mitesh, last: Khapra} - - {first: Mitesh, last: M. Khapra} - - {first: Mitesh M, last: Khapra} - - {first: Mitesh Shantadevi, last: Khapra} -- canonical: {first: Sanjeev, last: Khudanpur} - id: sanjeev-khudanpur -- canonical: {first: Rodger, last: Kibble} - id: rodger-kibble -- canonical: {first: Chloé, last: Kiddon} - variants: - - {first: Chloe, last: Kiddon} -- canonical: {first: Zhihao, last: Zhang} - id: zhihao-zhang-soochow - orcid: 0000-0001-9283-101X - institution: Soochow University - comment: Soochow -- canonical: {first: Zhihao, last: Zhang} - id: zhihao-zhang - comment: May refer to several people -- canonical: {first: Bernd, last: Kiefer} - id: bernd-kiefer -- canonical: {first: Hoang, last: Kiem} - variants: - - {first: Kiem, last: Hoang} -- canonical: {first: Scott F., last: Kiesling} - variants: - - {first: Scott, last: Kiesling} -- canonical: {first: Hideaki, last: Kikuchi} - id: hideaki-kikuchi -- canonical: {first: Gen-ichiro, last: Kikui} - variants: - - {first: Gen’ichiro, last: Kikui} -- canonical: {first: Chiharu Uda, last: Kikuta} - variants: - - {first: Chiharu, last: Uda} -- canonical: {first: Bong-Wan, last: Kim} - variants: - - {first: Jong Wan, last: Kim} -- canonical: {first: Chang-Hyun, last: Kim} - variants: - - {first: Changhyun, last: Kim} - - {first: Chang Hyun, last: Kim} -- canonical: {first: Deok-bong, last: Kim} - variants: - - {first: Deok-Bong, last: Kim} -- canonical: {first: Dong-Il, last: Kim} - variants: - - {first: Dong-il, last: Kim} -- canonical: {first: Eun-kyung, last: Kim} - variants: - - {first: Eun-Kyung, last: Kim} -- canonical: {first: Gil Chang, last: Kim} - variants: - - {first: GilChang, last: Kim} - - {first: Gil-Chang, last: Kim} - - {first: Gilchang, last: Kim} -- canonical: {first: Hyuhng Joon, last: Kim} - variants: - - {first: Hyuhng, last: Kim} -- canonical: {first: Jung-jae, last: Kim} - variants: - - {first: Jung-Jae, last: Kim} -- canonical: {first: Kyoung-young, last: Kim} - variants: - - {first: Kyoung-Young, last: Kim} -- canonical: {first: Sung Dong, last: Kim} - variants: - - {first: Sung-Dong, last: Kim} -- canonical: {first: Sunghwan Mac, last: Kim} - variants: - - {first: Sunghwan, last: Kim} -- canonical: {first: Young-Gil, last: Kim} - variants: - - {first: Young-Kil, last: Kim} - - {first: Young Kil, last: Kim} - - {first: Young-Kill, last: Kim} - - {first: YoungKil, last: Kim} -- canonical: {first: Yung Taek, last: Kim} - variants: - - {first: Yung-Taek, last: Kim} -- canonical: {first: Owen, last: Kimball} - id: owen-kimball -- canonical: {first: David, last: King} - variants: - - {first: David L., last: King} -- canonical: {first: Tracy Holloway, last: King} - variants: - - {first: Tracy H., last: King} -- canonical: {first: Brian, last: Kingsbury} - id: brian-kingsbury -- canonical: {first: Jim, last: Kinzey} - variants: - - {first: Jim, last: Kimzey} -- canonical: {first: Karin, last: Kipper} - variants: - - {first: Karin Christine, last: Kipper} - - {first: Karin, last: Schuler} - - {first: Karin, last: Kipper Schuler} - - {first: Karin, last: Kipper-Schuler} -- canonical: {first: George Anton, last: Kiraz} - variants: - - {first: George, last: Kiraz} -- canonical: {first: Andreas Søeborg, last: Kirkedal} - variants: - - {first: Andreas, last: Søeborg Kirkedal} -- canonical: {first: Jamie, last: Kiros} - variants: - - {first: Jamie Ryan, last: Kiros} -- canonical: {first: Atanas, last: Kiryakov} - variants: - - {first: Atanas K., last: Kiryakov} -- canonical: {first: Balázs, last: Kis} - variants: - - {first: Balazs, last: Kis} -- canonical: {first: Imre, last: Kiss} - id: imre-kiss -- canonical: {first: Chunyu, last: Kit} - variants: - - {first: Chun-yu, last: Kit} -- canonical: {first: Sotaro, last: Kita} - id: sotaro-kita -- canonical: {first: Richard, last: Kittredge} - id: richard-kittredge -- canonical: {first: Poul Søren, last: Kjærsgaard} - variants: - - {first: Poul Soren, last: Kjaersgaard} -- canonical: {first: Esther, last: Klabbers} - id: esther-klabbers -- canonical: {first: Ioannis, last: Klapaftis} - variants: - - {first: Ioannis P., last: Klapaftis} -- canonical: {first: Alex, last: Klassmann} - variants: - - {first: Alexander, last: Klassmann} -- canonical: {first: Judith L., last: Klavans} - id: judith-l-klavans - variants: - - {first: Judith, last: Klavans} -- canonical: {first: Wolfgang, last: Klein} - id: wolfgang-klein -- canonical: {first: Jörg, last: Kleinz} - variants: - - {first: Jorg, last: Kleinz} -- canonical: {first: Gerda, last: Klimonow} - id: gerda-klimonow -- canonical: {first: Tor, last: Klingberg} - id: tor-klingberg -- canonical: {first: Natalia, last: Klyueva} - variants: - - {first: Natalia, last: Kljueva} -- canonical: {first: Tina, last: Klüwer} - variants: - - {first: Tina, last: Kluewer} -- canonical: {first: Krzysztof, last: Kochut} - id: krzysztof-kochut -- canonical: {first: Andras, last: Kocsor} - variants: - - {first: András, last: Kocsor} -- canonical: {first: Hanae, last: Koiso} - id: hanae-koiso -- canonical: {first: Mare, last: Koit} - id: mare-koit -- canonical: {first: Atsuko, last: Koizumi} - id: atsuko-koizumi -- canonical: {first: George, last: Kokkinakis} - id: george-kokkinakis - variants: - - {first: George K., last: Kokkinakis} -- canonical: {first: Sofie Johansson, last: Kokkinakis} - variants: - - {first: Sofie, last: Johansson Kokkinakis} -- canonical: {first: Sia, last: Kolkovska} - variants: - - {first: Siya, last: Kolkovska} -- canonical: {first: David, last: Kolovratnik} - variants: - - {first: David, last: Kolovratník} -- canonical: {first: Anup Kumar, last: Kolya} - variants: - - {first: Anup, last: Kumar Kolya} - - {first: Anup, last: Kolya} -- canonical: {first: Ravikumar, last: Komandur} - variants: - - {first: K, last: Ravikumar} -- canonical: {first: Rik, last: Koncel-Kedziorski} - id: rik-koncel-kedziorski -- canonical: {first: Ravikumar, last: Kondadadi} - variants: - - {first: Ravi, last: Kondadadi} - - {first: Ravi Kumar, last: Kondadadi} -- canonical: {first: Alexis, last: Konstantinidis} - variants: - - {first: Alexis, last: Konstandinidis} -- canonical: {first: Selcuk, last: Kopru} - variants: - - {first: Selçuk, last: Köprü} -- canonical: {first: Jan, last: Kors} - variants: - - {first: Jan, last: Korst} -- canonical: {first: Govind, last: Kothari} - variants: - - {first: '', last: Govind} -- canonical: {first: Guy-Noel, last: Kouarata} - variants: - - {first: Guy-Noël, last: Kouarata} -- canonical: {first: Eleni, last: Koutsogeorgos} - id: eleni-koutsogeorgos -- canonical: {first: John J., last: Kovarik} - variants: - - {first: John, last: Kovarik} -- canonical: {first: Vojtěch, last: Kovář} - variants: - - {first: Vojtech, last: Kovář} -- canonical: {first: Marek, last: Kozlowski} - variants: - - {first: Marek, last: Kozłowski} -- canonical: {first: Emiel, last: Krahmer} - variants: - - {first: Emiel J., last: Krahmer} -- canonical: {first: Olivier, last: Kraif} - id: olivier-kraif -- canonical: {first: Martin, last: Krallinger} - id: martin-krallinger -- canonical: {first: Steven, last: Krauwer} - id: steven-krauwer -- canonical: {first: Jana, last: Kravalová} - variants: - - {first: Jana, last: Kravalova} -- canonical: {first: Hans-Ulrich, last: Krieger} - variants: - - {first: HansUlrich, last: Krieger} -- canonical: {first: Raghava, last: Krishnan} - id: raghava-krishnan -- canonical: {first: Rihards, last: Krišlauks} - variants: - - {first: Rihards, last: Krislauks} -- canonical: {first: Anthony, last: Kroch} - variants: - - {first: Anthony S., last: Kroch} -- canonical: {first: Geert-Jan M., last: Kruijff} - variants: - - {first: Geert-Jan, last: Kruijff} -- canonical: {first: Ivana, last: Kruijff-Korbayová} - variants: - - {first: Ivana, last: Kruijff-Korbayova} - - {first: Ivana, last: Kruijff-Korbayovà} -- canonical: {first: George, last: Krupka} - variants: - - {first: George R., last: Krupka} -- canonical: {first: Udo, last: Kruschwitz} - id: udo-kruschwitz -- canonical: {first: Germán, last: Kruszewski} - variants: - - {first: German, last: Kruszewski} -- canonical: {first: Francis, last: Kubala} - id: francis-kubala -- canonical: {first: Vladislav, last: Kubon} - variants: - - {first: Vladislav, last: Kuboň} - - {first: Vladlslav, last: Kubon} -- canonical: {first: Taku, last: Kudo} - variants: - - {first: Taku, last: Kudoh} -- canonical: {first: Ulrike, last: Kugler} - id: ulrike-kugler -- canonical: {first: Anne, last: Kuhn} - id: anne-kuhn -- canonical: {first: Robert J., last: Kuhns} - variants: - - {first: Robert, last: Kuhns} -- canonical: {first: Malhar, last: Kulkarni} - variants: - - {first: Malhar A., last: Kulkarni} -- canonical: {first: Ayush, last: Kumar} - variants: - - {first: Kumar, last: Ayush} -- canonical: {first: Harshit, last: Kumar} - id: harshit-kumar -- canonical: {first: Harshit, last: Kumar} - id: harshit-kumar-iit -- canonical: {first: Anand, last: Kumar M} - variants: - - {first: Anand Kumar, last: Madasamy} - - {first: Anand Kumar, last: M} -- canonical: {first: A, last: Kumaran} - variants: - - {first: A., last: Kumaran} -- canonical: {first: Masako, last: Kume} - id: masako-kume -- canonical: {first: Andrew L., last: Kun} - variants: - - {first: Andrew, last: Kun} -- canonical: {first: Stephen, last: Kunath} - variants: - - {first: Stephen A., last: Kunath} -- canonical: {first: Kerstin, last: Kunz} - variants: - - {first: Kerstin Anna, last: Kunz} -- canonical: {first: Chan-hung, last: Kuo} - variants: - - {first: Chan-Hung, last: Kuo} -- canonical: {first: Sankar, last: Kuppan} - variants: - - {first: Sankar, last: K} -- canonical: {first: Anna, last: Kupść} - variants: - - {first: Anna, last: Kupsc} -- canonical: {first: Yurii, last: Kuratov} - variants: - - {first: Yuri, last: Kuratov} -- canonical: {first: Mohamed Zakaria, last: Kurdi} - variants: - - {first: Mohamed-Zakaria, last: Kurdi} -- canonical: {first: Emina, last: Kurtić} - variants: - - {first: Emina, last: Kurtic} -- canonical: {first: Nicholas, last: Kushmerick} - id: nicholas-kushmerick -- canonical: {first: Andreas, last: Kustner} - id: andreas-kustner -- canonical: {first: Sergey O., last: Kuznetsov} - variants: - - {first: Sergei O., last: Kuznetsov} -- canonical: {first: Ivona, last: Kučerová} - variants: - - {first: Ivona, last: Kuc̆erová} -- canonical: {first: Pavel, last: Kvĕtoň} - variants: - - {first: Pavel, last: Kveton} - - {first: Pavel, last: Květoň} -- canonical: {first: Stan C., last: Kwasny} - variants: - - {first: Stan, last: Kwasny} -- canonical: {first: Cheol Jung, last: Kweon} - variants: - - {first: Cheoljung, last: Kweon} -- canonical: {first: Kui-Lam, last: Kwok} - id: kui-lam-kwok - variants: - - {first: Kui Lam, last: Kwok} -- canonical: {first: Olivia O.Y., last: Kwong} - variants: - - {first: O.Y., last: Kwong} - - {first: Oi Yee, last: Kwong} -- canonical: {first: Gunnel, last: Källgren} - variants: - - {first: Gunnel, last: Kallgren} -- canonical: {first: Joachim, last: Köhler} - variants: - - {first: Joachim, last: Koehler} -- canonical: {first: Natalie, last: Kübler} - variants: - - {first: Natalie, last: Kubler} -- canonical: {first: Sandra, last: Kübler} - variants: - - {first: Sandra, last: Kubler} - - {first: Sandra, last: Kuebler} -- canonical: {first: Sobha, last: L} - variants: - - {first: L., last: Sobha} -- canonical: {first: Abhay, last: L. Kashyap} - variants: - - {first: Abhay, last: Kashyap} -- canonical: {first: Gorka, last: Labaka} - id: gorka-labaka -- canonical: {first: Penny, last: Labropoulou} - id: penny-labropoulou -- canonical: {first: Martin, last: Labský} - variants: - - {first: Martin, last: Labsky} -- canonical: {first: Finley, last: Lacatusu} - variants: - - {first: V. Finley, last: Lacatusu} -- canonical: {first: Anne, last: Lacheret} - variants: - - {first: Anne, last: Lacheret-Dujour} -- canonical: {first: John, last: Lafferty} - id: john-lafferty - variants: - - {first: John D., last: Lafferty} - - {first: John, last: Lafrerty} -- canonical: {first: Frederique, last: Laforest} - variants: - - {first: Frédérique, last: Laforest} -- canonical: {first: Antonio-L., last: Lagarda} - variants: - - {first: Antonio, last: Lagarda} - - {first: Antonio L., last: Lagarda} -- canonical: {first: Torbjörn, last: Lager} - variants: - - {first: Torbjorn, last: Lager} - - {first: Torbjoern, last: Lager} -- canonical: {first: Albert M., last: Lai} - variants: - - {first: Albert, last: Lai} - - {first: Albert M, last: Lai} -- canonical: {first: Jennifer C., last: Lai} - variants: - - {first: Jenifer C., last: Lai} - - {first: Jennifer, last: Lai} -- canonical: {first: Min-Hua, last: Lai} - variants: - - {first: Min Hua, last: Lai} -- canonical: {first: Tom B.Y., last: Lai} - id: tom-b-y-lai - variants: - - {first: Tom B. Y., last: Lai} - - {first: Tom B.Y, last: Lai} -- canonical: {first: Tom Bong-yeung, last: Lai} - variants: - - {first: Bong-Yeung, last: Lai} -- canonical: {first: Tuan, last: Lai} - variants: - - {first: Tuan Manh, last: Lai} -- canonical: {first: Yu-da, last: Lai} - variants: - - {first: Yu-Da, last: Lai} -- canonical: {first: Meriama, last: Laib} - variants: - - {first: Meriama, last: Laïb} - - {first: Mariama, last: Laib} -- canonical: {first: Sobha, last: Lalitha Devi} - variants: - - {first: Lalitha Devi, last: Sobha} - - {first: Sobha Lalitha, last: Devi} -- canonical: {first: John P., last: Lalor} - variants: - - {first: John, last: Lalor} -- canonical: {first: Lori, last: Lamel} - id: lori-lamel - variants: - - {first: Lori F., last: Lamel} -- canonical: {first: André, last: Lamúrias} - variants: - - {first: Andre, last: Lamurias} -- canonical: {first: Man, last: Lan} - variants: - - {first: Lan, last: Man} -- canonical: {first: Thomas, last: Landauer} - variants: - - {first: Thomas K, last: Landauer} -- canonical: {first: Shari, last: Landes} - variants: - - {first: Shari, last: Land} -- canonical: {first: Jan, last: Landsbergen} - variants: - - {first: S. P. J., last: Landsbergen} - - {first: S.P.J., last: Landsbergen} -- canonical: {first: Francois-Michel, last: Lang} - variants: - - {first: Francois M., last: Lang} -- canonical: {first: Patrick L., last: Lange} - variants: - - {first: Patrick, last: Lange} -- canonical: {first: D. Terence, last: Langendoen} - variants: - - {first: Terence, last: Langendoen} -- canonical: {first: Irene, last: Langkilde} - variants: - - {first: Irene, last: Langkilde-Geary} -- canonical: {first: Philippe, last: Langlais} - variants: - - {first: Phillippe, last: Langlais} -- canonical: {first: Eric, last: Laporte} - variants: - - {first: Éric, last: Laporte} -- canonical: {first: Christophe, last: Laprun} - variants: - - {first: Christophe D., last: Laprun} -- canonical: {first: Septina Dian, last: Larasati} - variants: - - {first: Septina, last: Larasati} -- canonical: {first: Walter, last: Lasecki} - variants: - - {first: Walter S., last: Lasecki} -- canonical: {first: Olga N., last: Lashevskaja} - variants: - - {first: Olga, last: Lashevskaja} -- canonical: {first: Naveen Kumar, last: Laskari} - variants: - - {first: Naveen, last: Kumar} -- canonical: {first: Kiat-gak, last: Lau} - variants: - - {first: Kiat-Gak, last: Lau} - - {first: Kiãt-gãk, last: Lâu} -- canonical: {first: Alberto, last: Lavelli} - id: alberto-lavelli -- canonical: {first: Julia, last: Lavid-López} - variants: - - {first: Julia, last: Lavid} -- canonical: {first: Alon, last: Lavie} - id: alon-lavie -- canonical: {first: Benoit, last: Lavoie} - id: benoit-lavoie -- canonical: {first: Seamus, last: Lawless} - variants: - - {first: Séamus, last: Lawless} -- canonical: {first: Audrey, last: Le} - variants: - - {first: Audrey N., last: Le} -- canonical: {first: Hai-Son, last: Le} - variants: - - {first: Hai Son, last: Le} - - {first: Hai-son, last: Le} -- canonical: {first: Hoang Quynh, last: Le} - variants: - - {first: Hoang-Quynh, last: Le} -- canonical: {first: Quoc, last: Le} - variants: - - {first: Quoc V., last: Le} -- canonical: {first: Nathalie, last: Le Brun} - variants: - - {first: Nathalie Le, last: Brun} -- canonical: {first: Phuong, last: Le Hong} - id: phuong-le-hong - variants: - - {first: Phuong, last: Le-Hong} - - {first: Hồng Phương, last: Lê} - - {first: Phương, last: Lê Hồng} - - {first: Hong-Phuong, last: Le} -- canonical: {first: Sébastien, last: Le Maguer} - variants: - - {first: Sébastien Le, last: Maguer} -- canonical: {first: Quang, last: Le Minh} - variants: - - {first: Minh Quang, last: Le} -- canonical: {first: Joseph, last: Le Roux} - variants: - - {first: Joseph Le, last: Roux} -- canonical: {first: Jean-Luc, last: LeBrun} - variants: - - {first: Jean-Luc, last: Lebrun} -- canonical: {first: Gianluca E., last: Lebani} - variants: - - {first: Gianluca, last: Lebani} -- canonical: {first: Gilles, last: Lechenadec} - id: gilles-lechenadec -- canonical: {first: C. H., last: Lee} - variants: - - {first: C.-H., last: Lee} -- canonical: {first: Charles C., last: Lee} - variants: - - {first: Charles, last: Lee} -- canonical: {first: Chi-Chun, last: Lee} - variants: - - {first: Chi-Chun (Jeremy), last: Lee} - - {first: Chi-Chun Jeremy, last: Lee} -- canonical: {first: Chi-Yao, last: Lee} - variants: - - {first: Chih-yao, last: Lee} - - {first: Chih-Yao, last: Lee} -- canonical: {first: Chia-Ying, last: Lee} - variants: - - {first: Chia-ying, last: Lee} -- canonical: {first: Chia-ming, last: Lee} - variants: - - {first: Chia-Ming, last: Lee} -- canonical: {first: Chun-Jen, last: Lee} - variants: - - {first: Chun-Jun, last: Lee} -- canonical: {first: Chungmin, last: Lee} - variants: - - {first: Chong Min, last: Lee} - - {first: Chung-min, last: Lee} -- canonical: {first: Donghun, last: Lee} - comment: Kakao Brain - id: donghun-lee-kb -- canonical: {first: Donghun, last: Lee} - comment: Korea University - id: donghun-lee-ku -- canonical: {first: Gary Geunbae, last: Lee} - variants: - - {first: Geunbae, last: Lee} -- canonical: {first: Hsiang-Pin, last: Lee} - variants: - - {first: Hsiang-Ping, last: Lee} -- canonical: {first: Hyeon-gu, last: Lee} - variants: - - {first: Hyeon-Gu, last: Lee} -- canonical: {first: Ik-Hwan, last: Lee} - variants: - - {first: Ik-hwan, last: Lee} -- canonical: {first: Jae-Won, last: Lee} - variants: - - {first: Jae-won, last: Lee} -- canonical: {first: JaeSung, last: Lee} - variants: - - {first: Jae-Sung, last: Lee} -- canonical: {first: Jaesong, last: Lee} - variants: - - {first: JaeSong, last: Lee} -- canonical: {first: Jin-seok, last: Lee} - variants: - - {first: Jin-Seok, last: Lee} -- canonical: {first: John S. Y., last: Lee} - variants: - - {first: John, last: Lee} -- canonical: {first: Joo-Young, last: Lee} - variants: - - {first: JooYoung, last: Lee} -- canonical: {first: Kai-Fu, last: Lee} - id: kai-fu-lee -- canonical: {first: Kyung-Soon, last: Lee} - variants: - - {first: KyungSoon, last: Lee} -- canonical: {first: Lianhau, last: Lee} - variants: - - {first: Lian Hau, last: Lee} -- canonical: {first: Lin-Shan, last: Lee} - variants: - - {first: Lin-shan, last: Lee} -- canonical: {first: Mark, last: Lee} - id: mark-lee - variants: - - {first: Mark G., last: Lee} -- canonical: {first: Sang-Jo, last: Lee} - variants: - - {first: Sang Jo, last: Lee} -- canonical: {first: Sophia Y. M., last: Lee} - variants: - - {first: Sophia Y.M., last: Lee} -- canonical: {first: Sophia Yat Mei, last: Lee} - variants: - - {first: Yat-Mei, last: Lee} -- canonical: {first: Sungjin, last: Lee} - variants: - - {first: Sung-Jin, last: Lee} -- canonical: {first: Vivian K., last: Lee} - variants: - - {first: Vivian, last: Lee} -- canonical: {first: Woong Ki, last: Lee} - variants: - - {first: Woong-Ki, last: Lee} -- canonical: {first: Yeon Su, last: Lee} - variants: - - {first: Yeon-Su, last: Lee} -- canonical: {first: Yong-Hun, last: Lee} - variants: - - {first: Yong-hun, last: Lee} -- canonical: {first: Yoong Keok, last: Lee} - variants: - - {first: Yoong, last: Keok Lee} -- canonical: {first: Nicolas, last: Lefebvre} - variants: - - {first: Nicolas, last: Lefèbvre} -- canonical: {first: Anaïs, last: Lefeuvre} - variants: - - {first: Anaïs, last: Lefeuvre-Haftermeyer} -- canonical: {first: Fabrice, last: Lefèvre} - id: fabrice-lefevre - variants: - - {first: Fabrice, last: Lefevre} -- canonical: {first: Gurpreet Singh, last: Lehal} - variants: - - {first: Gurpreet, last: Singh Lehal} - - {first: Gurpreet, last: Lehal} -- canonical: {first: Jill Fain, last: Lehman} - variants: - - {first: Jill F., last: Lehman} -- canonical: {first: Wendy, last: Lehnert} - id: wendy-lehnert - variants: - - {first: Wendy G., last: Lehnert} -- canonical: {first: Aarno, last: Lehtola} - id: aarno-lehtola -- canonical: {first: Richard E, last: Leibbrandt} - variants: - - {first: Richard E., last: Leibbrandt} -- canonical: {first: Jochen L., last: Leidner} - variants: - - {first: Jochen, last: Leidner} -- canonical: {first: Marielle, last: Leijten} - variants: - - {first: Mariëlle, last: Leijten} -- canonical: {first: Luis A., last: Leiva} - variants: - - {first: Luis, last: Leiva} -- canonical: {first: Jeremy, last: Leixa} - variants: - - {first: Jérémy, last: Leixa} -- canonical: {first: Pietro, last: Leo} - id: pietro-leo -- canonical: {first: Jacqueline, last: Leon} - variants: - - {first: Jacqueline, last: Léon} -- canonical: {first: Chee Wee, last: Leong} - variants: - - {first: Chee Wee (Ben), last: Leong} -- canonical: {first: Haley, last: Lepp} - variants: - - {first: Haley M., last: Lepp} -- canonical: {first: Mikel, last: Lersundi} - id: mikel-lersundi -- canonical: {first: Leonardo, last: Lesmo} - id: leonardo-lesmo -- canonical: {first: Dessi Puji, last: Lestari} - variants: - - {first: Dessi, last: Lestari} -- canonical: {first: James, last: Lester} - variants: - - {first: James C., last: Lester} -- canonical: {first: Igor, last: Leturia} - id: igor-leturia -- canonical: {first: Hong, last: Leung} - variants: - - {first: Hong C., last: Leung} -- canonical: {first: Lori, last: Levin} - variants: - - {first: Lori S., last: Levin} -- canonical: {first: Lauren, last: Levine} - variants: - - {first: Lauren Elizabeth, last: Levine} -- canonical: {first: Stephen C., last: Levinson} - comment: Max-Planck-Institute for Psycholinguistics - id: stephen-c-levinson - similar: [stephen-e-levinson] -- canonical: {first: Stephen E., last: Levinson} - comment: Bell Labs - id: stephen-e-levinson - similar: [stephen-c-levinson] -- canonical: {first: Gina-Anne, last: Levow} - variants: - - {first: Gina, last: Levow} -- canonical: {first: Roger, last: Levy} - variants: - - {first: Roger P., last: Levy} -- canonical: {first: Kristīne, last: Levāne-Petrova} - variants: - - {first: Kristīne, last: Levāne} -- canonical: {first: Barbara, last: Lewandowska-Tomaszyk} - variants: - - {first: Barbara, last: Lewandowska} -- canonical: {first: David D., last: Lewis} - variants: - - {first: David, last: Lewis} -- canonical: {first: Richard L., last: Lewis} - variants: - - {first: Richard, last: Lewis} -- canonical: {first: William, last: Lewis} - variants: - - {first: William D., last: Lewis} -- canonical: {first: Fernando Sánchez, last: León} - variants: - - {first: Fernando, last: Sánchez} -- canonical: {first: Saul, last: León} - variants: - - {first: Saul, last: León Silverio} - - {first: Saúl, last: León} -- canonical: {first: Pilar, last: León-Araúz} - variants: - - {first: Pilar León, last: Araúz} -- canonical: {first: Belinda Z., last: Li} - variants: - - {first: Belinda, last: Li} -- canonical: {first: Bo, last: Li} - comment: May refer to several people - id: bo-li -- canonical: {first: Bo, last: Li} - comment: BeiHang - id: bo-li-bh -- canonical: {first: Bo, last: Li} - comment: Vanderbilt, UIUC - id: bo-li-vanderbilt -- canonical: {first: Bo, last: Li} - comment: NUS, Google - id: bo-li-nus - variant: {first: Troy, last: Lee} -- canonical: {first: Bo, last: Li} - comment: Chinese Academy of Sciences - id: bo-li-cas -- canonical: {first: Huifeng, last: Li} - variants: - - {first: Hui-Feng, last: Li} -- canonical: {first: Jiatong, last: Li} - comment: Hong Kong Polytechnic - id: jiatong-li-hk -- canonical: {first: Jiatong, last: Li} - comment: Rutgers - id: jiatong-li-ru -- canonical: {first: Junhui, last: Li} - variants: - - {first: JunHui, last: Li} -- canonical: {first: Shih-Min, last: Li} - variants: - - {first: Shi-Min, last: Li} -- canonical: {first: Shuanglong, last: Li} - variants: - - {first: ShuangLong, last: Li} -- canonical: {first: Tangqiu, last: Li} - variants: - - {first: Tanqiu, last: Li} -- canonical: {first: Victor O.K., last: Li} - variants: - - {first: Victor O. K., last: Li} -- canonical: {first: Weigang, last: Li} - variants: - - {first: Weikang, last: Li} -- canonical: {first: Yongqi, last: Li} - comment: Wuhan University - id: yongqi-li-wuhan -- canonical: {first: Jonghyun, last: Choi} - id: jonghyun-choi-umd - orcid: 0000-0002-7934-8434 - institution: University of Maryland - comment: University of Maryland -- canonical: {first: Jonghyun, last: Choi} - id: jonghyun-choi - comment: May refer to several people -- canonical: {first: Yongqi, last: Li} - comment: The Hong Kong Polytechnic University - id: yongqi-li-hk -- canonical: {first: Huizhi, last: Liang} - variants: - - {first: HuiZhi, last: Liang} -- canonical: {first: Po-Yu, last: Liang} - variants: - - {first: Po-yu, last: Liang} -- canonical: {first: Mark, last: Liberman} - id: mark-liberman - variants: - - {first: Mark Y., last: Liberman} -- canonical: {first: Elizabeth D., last: Liddy} - variants: - - {first: Elizabeth, last: Liddy} -- canonical: {first: Chung Yong, last: Lim} - variants: - - {first: Daniel Chung Yong, last: Lim} -- canonical: {first: Heui-Seok, last: Lim} - variants: - - {first: Heuiseok, last: Lim} -- canonical: {first: KyungTae, last: Lim} - variants: - - {first: Kyungtae, last: Lim} -- canonical: {first: Nathalie Rose, last: Lim} - variants: - - {first: Nathalie, last: Lim} -- canonical: {first: Bill Yuchen, last: Lin} - variants: - - {first: Bill Y., last: Lin} -- canonical: {first: Bor-Shen, last: Lin} - variants: - - {first: Bor-shen, last: Lin} -- canonical: {first: Cheng-Yuan, last: Lin} - variants: - - {first: Cheng Yuan, last: Lin} -- canonical: {first: Chi-san Althon, last: Lin} - variants: - - {first: Chi-San, last: Lin} - - {first: Chi-San Althon, last: Lin} -- canonical: {first: Dongsheng, last: Li} - id: dongsheng-li-fudan - orcid: 0000-0003-3103-8442 - institution: Fudan University - comment: Fudan -- canonical: {first: Dongsheng, last: Li} - id: dongsheng-li - comment: May refer to several people -- canonical: {first: Chih-Lung, last: Lin} - variants: - - {first: Chih-Long, last: Lin} -- canonical: {first: Chin-Yew, last: Lin} - variants: - - {first: ChinYew, last: Lin} -- canonical: {first: Ching-sheng, last: Lin} - variants: - - {first: Ching-Sheng, last: Lin} -- canonical: {first: Cong-kai, last: Lin} - variants: - - {first: Cong-Kai, last: Lin} -- canonical: {first: Darren Hsin-Hung, last: Lin} - variants: - - {first: Darren Hsin-hung, last: Lin} - - {first: Hsin-Hung, last: Lin} -- canonical: {first: Hing-Lung, last: Lin} - variants: - - {first: Hing-lung, last: Lin} -- canonical: {first: Qiguang, last: Lin} - id: qiguang-lin -- canonical: {first: Shou-De, last: Lin} - variants: - - {first: Shou-de, last: Lin} -- canonical: {first: Shu-Yen, last: Lin} - variants: - - {first: Shu-yen, last: Lin} -- canonical: {first: Victoria, last: Lin} - comment: CMU - id: victoria-lin-cmu -- canonical: {first: Xi Victoria, last: Lin} - comment: U of Washington, Meta -- canonical: {first: Xiaojun, last: Lin} - variants: - - {first: Xiaojun, last: Li} -- canonical: {first: Ya-Ting, last: Lin} - variants: - - {first: Ya-Ting, last: Li} -- canonical: {first: Georges, last: Linarès} - variants: - - {first: Georges, last: Linares} -- canonical: {first: Krister, last: Lindén} - variants: - - {first: Krister, last: Linden} -- canonical: {first: Marcia C., last: Linebarger} - variants: - - {first: Marcia, last: Linebarger} -- canonical: {first: Maria Teresa, last: Lino} - variants: - - {first: Teresa, last: Lino} -- canonical: {first: Nikos, last: Liolios} - id: nikos-liolios -- canonical: {first: Zachary C., last: Lipton} - variants: - - {first: Zachary, last: Lipton} -- canonical: {first: Adam, last: Liska} - variants: - - {first: Adam, last: Liška} -- canonical: {first: Lucian Vlad, last: Lita} - variants: - - {first: Lucian, last: Lita} -- canonical: {first: Diane, last: Litman} - variants: - - {first: Diane J., last: Litman} -- canonical: {first: Alexa N., last: Little} - variants: - - {first: Alexa, last: Little} -- canonical: {first: Alex, last: Liu} - variants: - - {first: Alexander, last: Liu} -- canonical: {first: Bingquan, last: Liu} - variants: - - {first: BingQuan, last: Liu} -- canonical: {first: Chin-Ting, last: Liu} - variants: - - {first: Chin-Ting Jimbo, last: Liu} -- canonical: {first: Fei, last: Liu} - comment: May refer to several people - id: fei-liu -- canonical: {first: Fei, last: Liu} - comment: UT Dallas, Bosch, CMU, University of Central Florida, Emory University - id: fei-liu-utdallas -- canonical: {first: Fei, last: Liu} - comment: Google Assistant - id: fei-liu-gga -- canonical: {first: Fei, last: Liu} - comment: University of Melbourne - id: fei-liu-unimelb -- canonical: {first: Huidan, last: Liu} - variants: - - {first: Hui Dan, last: Liu} -- canonical: {first: Mei-Chun, last: Liu} - variants: - - {first: Mei-chun, last: Liu} -- canonical: {first: Nelson F., last: Liu} - variants: - - {first: Nelson, last: Liu} -- canonical: {first: Pengyuan, last: Liu} - variants: - - {first: PengYuan, last: Liu} - - {first: Peng-Yuan, last: Liu} -- canonical: {first: Peter J., last: Liu} - variants: - - {first: Peter, last: Liu} -- canonical: {first: Weiyi, last: Liu} - variants: - - {first: Weiyi, last: Lu} -- canonical: {first: Yang, last: Liu} - comment: Edinburgh Ph.D., Microsoft - id: yang-liu-edinburgh -- canonical: {first: Yang, last: Liu} - comment: Beijing Language and Culture University - id: yang-liu-blcu -- canonical: {first: Yang, last: Liu} - comment: The Chinese University of Hong Kong (Shenzhen) - id: yang-liu-hk -- canonical: {first: Yang, last: Liu} - comment: 刘扬; Ph.D Purdue; ICSI, Dallas, Facebook, Liulishuo, Amazon - id: yang-liu-icsi -- canonical: {first: Yang, last: Liu} - comment: 刘洋; ICT, Tsinghua, Beijing Academy of Artificial Intelligence - id: yang-liu-ict -- canonical: {first: Yang, last: Liu} - comment: Peking University - id: yang-liu-pk -- canonical: {first: Yang, last: Liu} - comment: Wilfrid Laurier University - id: yang-liu-wl -- canonical: {first: Yang, last: Liu} - comment: Samsung Research Center Beijing - id: yang-liu-ss -- canonical: {first: Yang, last: Liu} - comment: National University of Defense Technology - id: yang-liu-dt -- canonical: {first: Yang, last: Liu} - comment: Microsoft Cognitive Services Research - id: yang-liu-microsoft -- canonical: {first: Yang, last: Liu} - comment: May refer to several people - id: yang-liu - similar: [yang-janet-liu] -- canonical: {first: Yang, last: Liu} - comment: Univ. of Michigan, UC Santa Cruz - id: yang-liu-umich -- canonical: {first: Yang, last: Liu} - comment: University of Helsinki - id: yang-liu-Helsinki -- canonical: {first: Yang, last: Liu} - comment: 3M Health Information Systems - id: yang-liu-3m -- canonical: {first: Yang, last: Liu} - comment: Tianjin University, China - id: yang-liu-tianjin -- canonical: {first: Yang Janet, last: Liu} - comment: Georgetown University; 刘洋 - id: yang-janet-liu -- canonical: {first: Andrej, last: Ljolje} - id: andrej-ljolje -- canonical: {first: Peter, last: Ljunglöf} - variants: - - {first: Peter, last: Ljunglof} -- canonical: {first: Leonardo Campillos, last: Llanos} - variants: - - {first: Leonardo, last: Campillos Llanos} -- canonical: {first: Eduardo, last: Lleida} - variants: - - {first: Eduardo, last: LLeida} -- canonical: {first: Agusti, last: Lloberas} - variants: - - {first: Agusti, last: LLoberas} -- canonical: {first: Fernando, last: Llopis} - variants: - - {first: Fernando, last: LLopis} -- canonical: {first: David, last: Llorens} - id: david-llorens -- canonical: {first: Héctor, last: Llorens} - variants: - - {first: Hector, last: Llorens} -- canonical: {first: Feng-Ju, last: Lo} - variants: - - {first: Fengju, last: Lo} -- canonical: {first: Wai-Kit, last: Lo} - variants: - - {first: Wai Kit, last: Lo} -- canonical: {first: Karen E., last: Lochbaum} - variants: - - {first: Karen, last: Lochbaum} -- canonical: {first: Elizaveta, last: Loginova-Clouet} - variants: - - {first: Elizaveta, last: Clouet} -- canonical: {first: Derek, last: Long} - variants: - - {first: Derek P., last: Long} -- canonical: {first: Marketa, last: Lopatkova} - variants: - - {first: Markéta, last: Straňáková-Lopatková} - - {first: Markéta, last: Lopatková} -- canonical: {first: Gabriel, last: Lopes} - variants: - - {first: Jose Gabriel P., last: Lopes} - - {first: Jose Gabriel, last: Lopes} - - {first: Gabriel P., last: Lopes} - - {first: José Gabriel Pereira, last: Lopes} - - {first: Gabriel, last: Pereira Lopes} - - {first: Gabriel Pereira, last: Lopes} -- canonical: {first: Roque, last: Lopez Condori} - variants: - - {first: Roque, last: López} -- canonical: {first: Oier, last: Lopez de Lacalle} - variants: - - {first: Oier López, last: de Lacalle} - - {first: Oier Lopez, last: de Lacalle} - - {first: Oier, last: López de Lacalle} -- canonical: {first: Alina Beatrice, last: Lorent} - variants: - - {first: Alina Beatrice, last: Lorenţ} - - {first: Alina, last: Lorenț} -- canonical: {first: Natalia, last: Loukachevitch} - id: natalia-loukachevitch - variants: - - {first: Natalia V., last: Loukachevitch} -- canonical: {first: John B., last: Lowe} - variants: - - {first: John, last: Lowe} -- canonical: {first: Eneldo, last: Loza Mencía} - variants: - - {first: Eneldo Loza, last: Mencía} -- canonical: {first: Bao-Liang, last: Lu} - variants: - - {first: Bao-liang, last: Lu} -- canonical: {first: Qin, last: Lu} - id: qin-lu -- canonical: {first: Wei-lun, last: Lu} - variants: - - {first: Wei-Lwun, last: Lu} - - {first: Louis Wei-lun, last: Lu} -- canonical: {first: Kim-Teng, last: Lua} - variants: - - {first: KimTeng, last: Lua} - - {first: Kim Teng, last: Lua} -- canonical: {first: Juan Manuel, last: Lucas-Cuesta} - variants: - - {first: Juan Manuel, last: Lucas} -- canonical: {first: Li, last: Lucy} - variants: - - {first: Lucy, last: Li} -- canonical: {first: Peter J., last: Ludlow} - variants: - - {first: Peter, last: Ludlow} -- canonical: {first: Robert W.P., last: Luk} - id: robert-w-p-luk -- canonical: {first: Robert Wing Pong, last: Luk} - variants: - - {first: Wing-Pong, last: Luk} -- canonical: {first: Stephanie, last: Lukin} - variants: - - {first: Stephanie M., last: Lukin} -- canonical: {first: Suen Caesar, last: Lun} - id: suen-caesar-lun - variants: - - {first: Caesar Suen, last: Lun} - - {first: Caesar, last: Lun} - - {first: S. Caesar, last: Lun} -- canonical: {first: Xiaoqiang, last: Luo} - id: xiaoqiang-luo -- canonical: {first: Minh-Thang, last: Luong} - variants: - - {first: Thang, last: Luong} -- canonical: {first: Ngoc Quang, last: Luong} - variants: - - {first: Ngoc-Quang, last: Luong} -- canonical: {first: Susann, last: LuperFoy} - variants: - - {first: Susann, last: Luperfoy} -- canonical: {first: Veronika, last: Lux} - variants: - - {first: Veronika, last: Lux-Pogodalla} - - {first: Véronika, last: Lux-Pogodalla} -- canonical: {first: Gunn Inger, last: Lyse} - variants: - - {first: Gunn, last: Lyse} -- canonical: {first: Steven L., last: Lytinen} - variants: - - {first: Steven, last: Lytinen} -- canonical: {first: Eldon G., last: Lytle} - variants: - - {first: Eldon G., last: Lytel} -- canonical: {first: Dau-cheng, last: Lyu} - variants: - - {first: Dau-Cheng, last: Lyu} -- canonical: {first: Ren-Yuan, last: Lyu} - variants: - - {first: Ren-yuan, last: Lyu} -- canonical: {first: François, last: Lévy} - variants: - - {first: François, last: Levy} -- canonical: {first: Tuan Anh, last: Lê} - variants: - - {first: Tuan Anh, last: Le} - - {first: Tuấn Anh, last: Lê} -- canonical: {first: M. Soledad, last: López Gambino} - variants: - - {first: Soledad, last: López Gambino} -- canonical: {first: Karmele, last: López de Ipiña} - id: karmele-lopez-de-ipina -- canonical: {first: Maddalen, last: López de Lacalle} - variants: - - {first: Maddalen, last: Lopez de Lacalle} -- canonical: {first: Ramón, last: López-Cózar} - id: ramon-lopez-cozar -- canonical: {first: Birte, last: Lönneker} - variants: - - {first: Birte, last: Lönneker-Rodman} - - {first: Birte, last: Loenneker-Rodman} -- canonical: {first: Jia, last: Lü} - variants: - - {first: Jia, last: Lu} -- canonical: {first: Yajuan, last: Lü} - variants: - - {first: Yajuan, last: Lu} - - {first: Yajuan, last: Lv} -- canonical: {first: Harald, last: Lüngen} - variants: - - {first: Harald, last: Lungen} -- canonical: {first: Marie-Claude, last: L’Homme} - variants: - - {first: Marie-Claude, last: L’ Homme} -- canonical: {first: Sasikumar, last: M} - variants: - - {first: Sasikumar, last: M.} -- canonical: {first: Ariadne, last: M. B. Rizzoni Carvalho} - variants: - - {first: Ariadne M. B. R., last: Carvalho} -- canonical: {first: Nagwa, last: M. El-Makky} - variants: - - {first: Nagwa, last: El-Makky} -- canonical: {first: Longlong, last: Ma} - variants: - - {first: Long Long, last: Ma} -- canonical: {first: Wei-Ying, last: Ma} - variants: - - {first: Wei-ying, last: Ma} -- canonical: {first: Wei-Yun, last: Ma} - variants: - - {first: Wei Yun, last: Ma} -- canonical: {first: Mohamed, last: Maamouri} - variants: - - {first: Mohammed, last: Maamouri} -- canonical: {first: Andrew, last: Maas} - variants: - - {first: Andrew L., last: Maas} -- canonical: {first: Rónan, last: Mac an tSaoir} - variants: - - {first: Ronan, last: Mac an tSaoir} -- canonical: {first: Andrew, last: MacKinlay} - variants: - - {first: Andrew, last: McKinlay} -- canonical: {first: Peter, last: Machonis} - variants: - - {first: Peter A., last: Machonis} -- canonical: {first: Catherine, last: Macleod} - variants: - - {first: Catherine, last: MacLeod} -- canonical: {first: Imanol, last: Madariaga} - id: imanol-madariaga -- canonical: {first: Pranava Swaroop, last: Madhyastha} - variants: - - {first: Pranava, last: Madhyastha} -- canonical: {first: Bente, last: Maegaard} - id: bente-maegaard -- canonical: {first: Kikuo, last: Maekawa} - id: kikuo-maekawa -- canonical: {first: Valérie, last: Maffiolo} - id: valerie-maffiolo -- canonical: {first: David M., last: Magerman} - id: david-m-magerman - variants: - - {first: David, last: Magerman} -- canonical: {first: Brunelle, last: Magnana Ekoukou} - variants: - - {first: Brunelle Magnana, last: Ekoukou} -- canonical: {first: Bernardo, last: Magnini} - id: bernardo-magnini -- canonical: {first: Guðrun, last: Magnúsdóttir} - variants: - - {first: Guðrún, last: Magnúsdóttir} -- canonical: {first: Sainik, last: Mahata} - variants: - - {first: Sainik Kumar, last: Mahata} -- canonical: {first: Kavi, last: Mahesh} - variants: - - {first: Kavitha, last: Mahesh} - - {first: Kavitha Karimbi, last: Mahesh} -- canonical: {first: Trang, last: Mai Xuan} - variants: - - {first: Trang Mai, last: Xuan} -- canonical: {first: Elisabeth, last: Maier} - variants: - - {first: Elisabeth, last: Mager} -- canonical: {first: Frederic, last: Mailhot} - variants: - - {first: Fred, last: Mailhot} - - {first: Frédéric, last: Mailhot} -- canonical: {first: Steven J., last: Maiorano} - variants: - - {first: Steve, last: Maiorano} - - {first: Steven, last: Maiorano} - - {first: Steve, last: Moiorano} -- canonical: {first: François, last: Mairesse} - variants: - - {first: Francois, last: Mairesse} -- canonical: {first: John, last: Makhoul} - id: john-makhoul -- canonical: {first: Shozo, last: Makino} - id: shozo-makino -- canonical: {first: Alfredo, last: Maldonado} - variants: - - {first: Alfredo, last: Maldonado Guerra} - - {first: Alfredo, last: Maldonado-Guerra} -- canonical: {first: Nishtha, last: Malhotra} - variants: - - {first: Nishta, last: Malhotra} -- canonical: {first: M. G. Abbas, last: Malik} - variants: - - {first: M.G. Abbas, last: Malik} - - {first: M G Abbas, last: Malik} -- canonical: {first: Deepak Kumar, last: Malladi} - variants: - - {first: Deepak, last: Malladi} -- canonical: {first: Shervin, last: Malmasi} - variants: - - {first: Shevin, last: Malmasi} -- canonical: {first: Preetam, last: Maloor} - id: preetam-maloor -- canonical: {first: Robert, last: Malouf} - variants: - - {first: Rob, last: Malouf} -- canonical: {first: Liliana, last: Mamani Sanchez} - variants: - - {first: Liliana, last: Mamani Sánchez} - - {first: Liliana Mamani, last: Sanchez} -- canonical: {first: Nuno, last: Mamede} - variants: - - {first: Nuno J., last: Mamede} -- canonical: {first: Nadia, last: Mana} - id: nadia-mana -- canonical: {first: Esmeralda, last: Manandise} - variants: - - {first: Esme, last: Manandise} -- canonical: {first: Alexis, last: Manaster-Ramer} - variants: - - {first: Alexis, last: Manaster Ramer} -- canonical: {first: Soumil, last: Mandal} - variants: - - {first: Soumik, last: Mandal} -- canonical: {first: Rila, last: Mandala} - variants: - - {first: Mandala, last: Rila} -- canonical: {first: Michael, last: Mandel} - variants: - - {first: Michael, last: Mandl} -- canonical: {first: Angrosh, last: Mandya} - variants: - - {first: Mandya, last: Angrosh} -- canonical: {first: Mathieu, last: Mangeot} - variants: - - {first: Mathieu, last: Mangeot-Lerebours} -- canonical: {first: Lidia, last: Mangu} - id: lidia-mangu -- canonical: {first: Enrique, last: Manjavacas} - variants: - - {first: Enrique, last: Manjavacas Arevalo} -- canonical: {first: Varun, last: Manjunatha} - variants: - - {first: Varun, last: Manjunath} -- canonical: {first: Gideon, last: Mann} - variants: - - {first: Gideon S., last: Mann} -- canonical: {first: William C., last: Mann} - variants: - - {first: William, last: Mann} -- canonical: {first: Prashanth, last: Mannem} - variants: - - {first: Prashanth Reddy, last: Mannem} - - {first: Prashanth, last: Reddy} -- canonical: {first: Christopher D., last: Manning} - variants: - - {first: Christopher, last: Manning} - - {first: Chris, last: Manning} -- canonical: {first: Andre, last: Mansikkaniemi} - variants: - - {first: André, last: Mansikkaniemi} -- canonical: {first: Mairgup, last: Mansur} - variants: - - {first: Mansur, last: Mairgup} -- canonical: {first: Ruli, last: Manurung} - id: ruli-manurung -- canonical: {first: Ramesh, last: Manuvinakurike} - variants: - - {first: Ramesh, last: Manuvirakurike} -- canonical: {first: Lingshuang Jack, last: Mao} - variants: - - {first: Lingshuang, last: Mao} -- canonical: {first: Xinnian, last: Mao} - variants: - - {first: Xin, last: Mao} -- canonical: {first: Yu Hang, last: Mao} - variants: - - {first: Yu-Hang, last: Mao} - - {first: Yuhang, last: Mao} -- canonical: {first: Valérie, last: Mapelli} - variants: - - {first: Valerie, last: Mapelli} -- canonical: {first: Yannick, last: Marchand} - id: yannick-marchand -- canonical: {first: Giulia, last: Marchesini} - variants: - - {first: Giulia, last: Marchesi} -- canonical: {first: Malgorzata, last: Marciniak} - variants: - - {first: Małgorzata, last: Marciniak} -- canonical: {first: Mitch, last: Marcus} - id: mitch-marcus - variants: - - {first: Mitchell, last: Marcus} - - {first: Mitchell P., last: Marcus} -- canonical: {first: Joseph, last: Mariani} - id: joseph-mariani -- canonical: {first: Montserrat, last: Marimon} - variants: - - {first: Montserrat, last: Marimón} - - {first: Montserrat Marimon, last: Felipe} -- canonical: {first: Nicolas, last: Marin} - variants: - - {first: Nicolás, last: Marín} -- canonical: {first: Andre, last: Mariotti} - variants: - - {first: André, last: Mariotti} -- canonical: {first: Alberto, last: Maritxalar} - id: alberto-maritxalar -- canonical: {first: Montse, last: Maritxalar} - id: montse-maritxalar -- canonical: {first: José B., last: Mariño} - variants: - - {first: José, last: Mariño} -- canonical: {first: Stella, last: Markantonatou} - id: stella-markantonatou -- canonical: {first: Aleksandra Zögling, last: Markuš} - variants: - - {first: Aleksandra, last: Zögling} -- canonical: {first: Kornél, last: Markó} - variants: - - {first: Kornel, last: Markó} -- canonical: {first: Iain, last: Marshall} - variants: - - {first: Iain J., last: Marshall} -- canonical: {first: Pierre-Francois, last: Marteau} - variants: - - {first: Pierre-François, last: Marteau} -- canonical: {first: Alvin, last: Martin} - variants: - - {first: Alvin F., last: Martin} -- canonical: {first: James H., last: Martin} - variants: - - {first: James, last: Martin} -- canonical: {first: Jean-Claude, last: Martin} - id: jean-claude-martin -- canonical: {first: M. Patrick, last: Martin} - variants: - - {first: Pierre M., last: Martin} - - {first: Patrick, last: Martin} -- canonical: {first: Marco, last: Martin} - id: marco-martin -- canonical: {first: Melanie, last: Martin} - variants: - - {first: Melanie J., last: Martin} -- canonical: {first: William A., last: Martin} - id: william-a-martin -- canonical: {first: Marianna, last: Martindale} - variants: - - {first: Marianna J., last: Martindale} -- canonical: {first: David, last: Martinez} - variants: - - {first: David, last: Martínez} -- canonical: {first: Miroslav, last: Martinović} - variants: - - {first: Miroslav, last: Martinovic} -- canonical: {first: André F. T., last: Martins} - variants: - - {first: Andre, last: Martins} - - {first: André, last: Martins} -- canonical: {first: Fernando, last: Martins} - id: fernando-martins -- canonical: {first: Ronaldo Teixeira, last: Martins} - variants: - - {first: Ronaldo, last: Martins} -- canonical: {first: David, last: Martins de Matos} - variants: - - {first: David Martins, last: de Matos} - - {first: David M., last: de Matos} -- canonical: {first: M. Antònia, last: Martí} - id: m-antonia-marti - variants: - - {first: M. Antonia, last: Martí} - - {first: M. Antonia, last: Marti} - - {first: Antonia, last: Martí} - - {first: Mª Antònia, last: Martí} - - {first: Maria Antònia, last: Martí} - - {first: Toni, last: Martí} -- canonical: {first: M. Teresa, last: Martín-Valdivia} - variants: - - {first: Maite, last: Martin} - - {first: María Teresa, last: Martín-Valdivia} - - {first: Maria Teresa, last: Martín-Valdivia} - - {first: Teresa, last: Martin} - - {first: M. Teresa, last: Martín} - - {first: Maite, last: Martín-Valdivia} -- canonical: {first: Carlos, last: Martín-Vide} - variants: - - {first: Carlos Martin, last: Vide} -- canonical: {first: José Manuel, last: Martínez} - variants: - - {first: Jose M.M., last: Martinez} - - {first: José Manuel, last: Martínez Martínez} - - {first: Jose Manuel, last: Martinez} -- canonical: {first: Raquel, last: Martínez} - variants: - - {first: Raquel, last: Martinez} -- canonical: {first: Héctor, last: Martínez Alonso} - variants: - - {first: Hector, last: Martinez} - - {first: Héctor, last: Martínez} - - {first: Héctor Martínez, last: Alonso} - - {first: Hector, last: Martinez Alonso} - - {first: Héctor, last: Martinez Alonso} - - {first: Hector, last: Martínez Alonso} -- canonical: {first: Eva, last: Martínez Garcia} - variants: - - {first: Eva Martínez, last: Garcia} -- canonical: {first: Patricio, last: Martínez-Barco} - id: patricio-martinez-barco - variants: - - {first: Patricio, last: Martinez-Barco} - - {first: Patricio Martinez, last: Barco} -- canonical: {first: Eugenio, last: Martínez-Cámara} - variants: - - {first: Eugenio, last: Martinez Camara} -- canonical: {first: Carlos-D., last: Martínez-Hinarejos} - variants: - - {first: Carlos D., last: Martínez-Hinarejos} - - {first: Carlos D., last: Martínez Hinarejos} - - {first: Carlos D., last: Martínez} -- canonical: {first: Fernando, last: Martínez-Santiago} - variants: - - {first: Fernando, last: Martínez Santiago} -- canonical: {first: Luis, last: Marujo} - variants: - - {first: Luís, last: Marujo} -- canonical: {first: Andrés, last: Marzal} - id: andres-marzal -- canonical: {first: Aaron J., last: Masino} - variants: - - {first: Aaron, last: Masino} -- canonical: {first: Flavio, last: Massimiliano Cecchini} - variants: - - {first: Flavio Massimiliano, last: Cecchini} -- canonical: {first: Demetrios, last: Master} - variants: - - {first: Demitrios, last: Master} -- canonical: {first: Fumito, last: Masui} - id: fumito-masui -- canonical: {first: Hiroshi, last: Masuichi} - variants: - - {first: Hiroshi, last: Mashuichi} -- canonical: {first: Marco, last: Matassoni} - id: marco-matassoni -- canonical: {first: Yannick, last: Mathieu} - variants: - - {first: Yvette Yannick, last: Mathieu} - - {first: Yvette, last: Mathieu} -- canonical: {first: Ely Edison da Silva, last: Matos} - variants: - - {first: Ely, last: Matos} - - {first: Ely E. S., last: Matos} -- canonical: {first: Yuji, last: Matsumoto} - variants: - - {first: Yūji, last: Matsumoto} -- canonical: {first: Shoichi, last: Matsunaga} - variants: - - {first: Sho-ichi, last: Matsunaga} -- canonical: {first: Christian M.I.M., last: Matthiessen} - variants: - - {first: Christian M. I. M., last: Matthiessen} -- canonical: {first: Irina, last: Matveeva} - id: irina-matveeva -- canonical: {first: Mirjam Sepesy, last: Maucec} - variants: - - {first: Mirjam Sepesy, last: Maučec} -- canonical: {first: Michael L., last: Mauldin} - variants: - - {first: Michael, last: Mauldin} -- canonical: {first: Daniel, last: Maxwell} - variants: - - {first: Dan, last: Maxwell} -- canonical: {first: K. Tamsin, last: Maxwell} - variants: - - {first: Tamsin, last: Maxwell} -- canonical: {first: John T., last: Maxwell III} - variants: - - {first: John, last: Maxwell} - - {first: John T., last: Maxwell} -- canonical: {first: Mark T., last: Maybury} - variants: - - {first: Mark, last: Maybury} -- canonical: {first: Aingeru, last: Mayor} - id: aingeru-mayor -- canonical: {first: Pierre-Emmanuel, last: Mazare} - variants: - - {first: Pierre-Emmanuel, last: Mazaré} -- canonical: {first: Erick Galani, last: Maziero} - variants: - - {first: Erick, last: Maziero} -- canonical: {first: Pawel, last: Mazur} - variants: - - {first: Paweł, last: Mazur} -- canonical: {first: Alessandro, last: Mazzei} - id: alessandro-mazzei -- canonical: {first: Giampaolo, last: Mazzini} - id: giampaolo-mazzini -- canonical: {first: Manuel J., last: Maña López} - variants: - - {first: Manuel J., last: Maña} - - {first: Manual Maña, last: López} - - {first: Manuel, last: Maña López} -- canonical: {first: Michael L., last: Mc Hale} - variants: - - {first: Michael L., last: McHale} -- canonical: {first: Gordon I., last: McCalla} - id: gordon-i-mccalla - variants: - - {first: Gordon, last: McCalla} -- canonical: {first: J. Scott, last: McCarley} - variants: - - {first: Scott, last: McCarley} -- canonical: {first: Arya D., last: McCarthy} - variants: - - {first: Arya, last: McCarthy} -- canonical: {first: Diana, last: McCarthy} - variants: - - {first: Diana F., last: McCarthy} -- canonical: {first: Joe, last: McCarthy} - id: joe-mccarthy -- canonical: {first: Michael C., last: McCord} - variants: - - {first: Michael, last: McCord} -- canonical: {first: Kathleen F., last: McCoy} - variants: - - {first: Kathleen, last: McCoy} - - {first: Kathleen E., last: McCoy} -- canonical: {first: Nancy, last: McCracken} - variants: - - {first: Nancy J., last: McCracken} -- canonical: {first: John Philip, last: McCrae} - variants: - - {first: John, last: McCrae} - - {first: John P., last: McCrae} -- canonical: {first: David D., last: McDonald} - comment: MIT, BBN, SIFT - id: david-d-mcdonald - similar: [david-w-mcdonald] -- canonical: {first: David W., last: McDonald} - comment: Univ. of Washington - id: david-w-mcdonald - similar: [david-d-mcdonald] -- canonical: {first: Joyce, last: McDowell} - id: joyce-mcdowell -- canonical: {first: Dan, last: McFarland} - variants: - - {first: Daniel, last: McFarland} - - {first: Daniel A., last: McFarland} -- canonical: {first: David, last: McGee} - variants: - - {first: David R., last: McGee} -- canonical: {first: Bridget, last: McInnes} - variants: - - {first: Bridget Thomson, last: McInnes} - - {first: Bridget T., last: McInnes} -- canonical: {first: Douglas, last: McKee} - variants: - - {first: Doug, last: McKee} -- canonical: {first: Kenneth J., last: McKeever} - variants: - - {first: Kenneth, last: McKeever} -- canonical: {first: Kathleen, last: McKeown} - variants: - - {first: Kathy, last: McKeown} - - {first: Kathleen R., last: McKeown} -- canonical: {first: Danielle S., last: McNamara} - variants: - - {first: Danielle, last: McNamara} -- canonical: {first: John, last: McNaught} - id: john-mcnaught -- canonical: {first: Margaret, last: McRorie} - id: margaret-mcrorie -- canonical: {first: Susan W., last: McRoy} - variants: - - {first: Susan, last: McRoy} -- canonical: {first: Kevin, last: McTait} - id: kevin-mctait -- canonical: {first: Michael F., last: McTear} - variants: - - {first: Michael, last: McTear} -- canonical: {first: Boubaker, last: Meddeb-Hamrouni} - variants: - - {first: Boubaker, last: Meddeb Hamrouni} -- canonical: {first: Christopher, last: Meek} - variants: - - {first: Chris, last: Meek} -- canonical: {first: Beáta, last: Megyesi} - variants: - - {first: Beata, last: Megyesi} - - {first: Beáta Bandmann, last: Megyesi} - - {first: Beáta B., last: Megyesi} -- canonical: {first: Dennis, last: Mehay} - variants: - - {first: Dennis Nolan, last: Mehay} -- canonical: {first: Sanket Vaibhav, last: Mehta} - variants: - - {first: Vaibhav, last: Mehta} -- canonical: {first: Baye Yimam, last: Mekonnen} - variants: - - {first: Baye, last: Yimam} -- canonical: {first: Alan K., last: Melby} - variants: - - {first: Alan, last: Melby} -- canonical: {first: Chris, last: Mellish} - id: chris-mellish - variants: - - {first: Chris S., last: Mellish} -- canonical: {first: Igor, last: Mel’čuk} - id: igor-melcuk -- canonical: {first: Alfonso, last: Mendes} - variants: - - {first: Afonso, last: Mendes} -- canonical: {first: Ana Cristina, last: Mendes} - variants: - - {first: Ana C., last: Mendes} - - {first: Ana, last: Mendes} -- canonical: {first: Eneida A., last: Mendonca} - variants: - - {first: Eneida, last: Mendonca} -- canonical: {first: Gustavo, last: Mendonca} - variants: - - {first: Gustavo, last: Mendonça} -- canonical: {first: Helen, last: Meng} - variants: - - {first: Helen M., last: Meng} -- canonical: {first: Rakesh R, last: Menon} - variants: - - {first: Rakesh, last: Menon} -- canonical: {first: Robert E., last: Mercer} - comment: Univ. of Western Ontario - id: robert-e-mercer - similar: [robert-l-mercer] -- canonical: {first: Robert L., last: Mercer} - comment: IBM - id: robert-l-mercer - similar: [robert-e-mercer] -- canonical: {first: Roberta H., last: Merchant} - variants: - - {first: Roberta, last: Merchant} -- canonical: {first: Bernard, last: Merialdo} - id: bernard-merialdo -- canonical: {first: Elizabeth, last: Merkhofer} - variants: - - {first: Elizabeth M., last: Merkhofer} -- canonical: {first: Marie, last: Meteer} - variants: - - {first: Marie W., last: Meteer} -- canonical: {first: Marie Hélène, last: Metzger} - variants: - - {first: Marie-Hélène, last: Metzger} -- canonical: {first: Dieter, last: Metzing} - id: dieter-metzing -- canonical: {first: Frédéric, last: Meunier} - variants: - - {first: Frederic, last: Meunier} -- canonical: {first: Detmar, last: Meurers} - variants: - - {first: W. Detmar, last: Meurers} - - {first: Walt Detmar, last: Meurers} -- canonical: {first: Montserrat, last: Meya} - id: montserrat-meya -- canonical: {first: Adam, last: Meyers} - id: adam-meyers -- canonical: {first: Benjamin S., last: Meyers} - variants: - - {first: Benjamin, last: Meyers} -- canonical: {first: Stephane, last: Meystre} - variants: - - {first: Stéphane, last: Meystre} -- canonical: {first: Ivan, last: Meza-Ruiz} - variants: - - {first: Ivan Vladimir, last: Meza Ruiz} - - {first: Ivan V., last: Meza} - - {first: Ivan, last: Meza} - - {first: Ivan Vladimir, last: Meza-Ruiz} -- canonical: {first: Antonio Valerio, last: Miceli-Barone} - variants: - - {first: Antonio Valerio, last: Miceli Barone} -- canonical: {first: Lisa N., last: Michaud} - variants: - - {first: Lisa, last: Michaud} -- canonical: {first: Patrizia, last: Michelassi} - id: patrizia-michelassi -- canonical: {first: Archibald, last: Michiels} - id: archibald-michiels -- canonical: {first: Lesly, last: Miculicich Werlen} - variants: - - {first: Lesly, last: Miculicich} -- canonical: {first: Sabrina J., last: Mielke} - variants: - - {first: Sabrina, last: Mielke} -- canonical: {first: Rada, last: Mihalcea} - variants: - - {first: Rada F., last: Mihalcea} -- canonical: {first: France, last: Mihelic} - variants: - - {first: France, last: Mihelič} -- canonical: {first: Tomáš, last: Mikolov} - variants: - - {first: Tomas, last: Mikolov} -- canonical: {first: Sandra, last: Milena Castellanos Páez} - variants: - - {first: Sandra Castellanos, last: Páez} -- canonical: {first: Ruy Luiz, last: Milidiú} - variants: - - {first: Ruy, last: Milidiú} -- canonical: {first: Dale A., last: Miller} - variants: - - {first: Dale, last: Miller} -- canonical: {first: John, last: Miller} - variants: - - {first: John E., last: Miller} -- canonical: {first: Keith J., last: Miller} - variants: - - {first: Keith, last: Miller} -- canonical: {first: Lance A., last: Miller} - id: lance-a-miller -- canonical: {first: Laura G., last: Miller} - id: laura-g-miller -- canonical: {first: Timothy, last: Miller} - variants: - - {first: Tim, last: Miller} -- canonical: {first: Daniel P., last: Mills} - variants: - - {first: Daniel, last: Mills} -- canonical: {first: David N., last: Milne} - variants: - - {first: David, last: Milne} -- canonical: {first: Robert, last: Milne} - variants: - - {first: Rob, last: Milne} -- canonical: {first: Behrouz, last: Minaei-Bidgoli} - variants: - - {first: Behrouz, last: Minaei-bidgoli} - - {first: Behrouz, last: Minaei} -- canonical: {first: Nobuaki, last: Minematsu} - id: nobuaki-minematsu -- canonical: {first: Zhaoyan, last: Ming} - variants: - - {first: Zhao-Yan, last: Ming} -- canonical: {first: Michael, last: Minock} - variants: - - {first: Michael J., last: Minock} -- canonical: {first: T. T., last: Mirnalinee} - variants: - - {first: Mirnalinee, last: T T} - - {first: T T, last: Mirnalinee} -- canonical: {first: Dipendra, last: Misra} - variants: - - {first: Dipendra Kumar, last: Misra} -- canonical: {first: Kei, last: Mitamura} - id: kei-mitamura -- canonical: {first: Brian, last: Mitchell} - id: brian-mitchell -- canonical: {first: Christopher, last: Mitchell} - variants: - - {first: Christopher M., last: Mitchell} -- canonical: {first: Tom, last: Mitchell} - variants: - - {first: Tom M., last: Mitchell} -- canonical: {first: Catalin, last: Mititelu} - variants: - - {first: Cătălin, last: Mititelu} -- canonical: {first: Ruslan, last: Mitkov} - id: ruslan-mitkov -- canonical: {first: V. K., last: Mittal} - variants: - - {first: V.K., last: Mittal} -- canonical: {first: Vibhu O., last: Mittal} - variants: - - {first: Vibhu, last: Mittal} -- canonical: {first: Natalia N., last: Modjeska} - variants: - - {first: Natalia, last: Modjeska} -- canonical: {first: Sarah, last: Moeller} - variants: - - {first: Sarah R., last: Moeller} -- canonical: {first: Marie Francine, last: Moens} - variants: - - {first: Marie-Francine, last: Moens} -- canonical: {first: Saif, last: Mohammad} - variants: - - {first: Saif M., last: Mohammad} -- canonical: {first: Ehsan, last: Mohammady Ardehaly} - variants: - - {first: Ehsan, last: Mohammady} -- canonical: {first: Sharada Prasanna, last: Mohanty} - variants: - - {first: Sharada, last: Mohanty} -- canonical: {first: Muhammad Tasnim, last: Mohiuddin} - variants: - - {first: Tasnim, last: Mohiuddin} -- canonical: {first: Begoña Villada, last: Moirón} - variants: - - {first: Begoña, last: Villada Moirón} - - {first: Begoña, last: Villada} - - {first: M. Begoña Villada, last: Moirón} -- canonical: {first: Luis Gerardo, last: Mojica de la Vega} - variants: - - {first: Luis, last: Mojica de la Vega} -- canonical: {first: Christian, last: Moldovan} - variants: - - {first: Cristian, last: Moldovan} -- canonical: {first: Dan, last: Moldovan} - id: dan-moldovan - variants: - - {first: Dan I., last: Moldovan} -- canonical: {first: M. Dolores, last: Molina-González} - variants: - - {first: M. Dolores, last: Molina-Gonzalez} -- canonical: {first: Diego, last: Molla} - variants: - - {first: Diego, last: Mollá-Aliod} - - {first: Diego, last: Mollá Aliod} - - {first: Diego, last: Molla-Aliod} - - {first: Diego, last: Mollá} -- canonical: {first: Simonetta, last: Montemagni} - id: simonetta-montemagni -- canonical: {first: Calkin S., last: Montero} - variants: - - {first: Calkin, last: Montero} -- canonical: {first: Juan M., last: Montero} - variants: - - {first: Juan Manuel, last: Montero} -- canonical: {first: Manuel, last: Montes} - variants: - - {first: Manuel, last: Montes-y-Gómez} - - {first: Manuel, last: Montes y Gomez} - - {first: Manuel, last: Montes y Gómez} -- canonical: {first: Azucena, last: Montes-Rendon} - variants: - - {first: Azucena, last: Montes} -- canonical: {first: Christine A., last: Montgomery} - variants: - - {first: Christine, last: Montgomery} -- canonical: {first: Andrés, last: Montoyo} - variants: - - {first: Andres, last: Montoyo} - - {first: Andrés, last: Montoyo Guijarro} -- canonical: {first: Kyong-Hi, last: Moon} - variants: - - {first: Kyonghi, last: Moon} -- canonical: {first: Sungrim, last: Moon} - variants: - - {first: SungRim, last: Moon} -- canonical: {first: Raymond, last: Mooney} - variants: - - {first: Raymond J., last: Mooney} -- canonical: {first: Johanna D., last: Moore} - id: johanna-d-moore - variants: - - {first: Johanna, last: Moore} -- canonical: {first: Robert C., last: Moore} - id: robert-c-moore - variants: - - {first: Robert, last: Moore} -- canonical: {first: Roger K., last: Moore} - variants: - - {first: Roger, last: Moore} -- canonical: {first: Michael, last: Moortgat} - id: michael-moortgat -- canonical: {first: Nafise Sadat, last: Moosavi} - variants: - - {first: Nafise, last: Moosavi} -- canonical: {first: Sílvia, last: Moraes} - variants: - - {first: Silvia, last: Moraes} -- canonical: {first: Nicolás, last: Morales} - variants: - - {first: Nicolas, last: Morales} -- canonical: {first: Douglas B., last: Moran} - variants: - - {first: Douglas, last: Moran} - - {first: Doug, last: Moran} -- canonical: {first: Steven, last: Moran} - variants: - - {first: Steve, last: Moran} -- canonical: {first: Paul, last: Morarescu} - variants: - - {first: Paul, last: Morărescu} - - {first: Paul C., last: Morărescu} -- canonical: {first: Christian, last: Morbidoni} - id: christian-morbidoni -- canonical: {first: Grégoire, last: Moreau de Montcheuil} - variants: - - {first: Grégoire, last: de Montcheuil} -- canonical: {first: Paloma, last: Moreda Pozo} - variants: - - {first: Paloma, last: Moreda} -- canonical: {first: Jihai, last: Zhang} - id: jihai-zhang-cuhk - orcid: 0000-0002-1400-9116 - institution: The Chinese University of Hong Kong - comment: CUHK -- canonical: {first: Jihai, last: Zhang} - id: jihai-zhang - comment: May refer to several people -- canonical: {first: Asunción, last: Moreno} - id: asuncion-moreno - variants: - - {first: Asuncion, last: Moreno} - - {first: Asuncíon, last: Moreno} -- canonical: {first: José G., last: Moreno} - variants: - - {first: Jose G., last: Moreno} - - {first: Jose, last: Moreno} -- canonical: {first: Lidia, last: Moreno} - id: lidia-moreno -- canonical: {first: Antonio, last: Moreno Ribas} - comment: Univ. Rovira i Virgili - id: antonio-moreno-ribas - similar: [antonio-moreno-ortiz, antonio-moreno-sandoval] -- canonical: {first: Julian, last: Moreno Schneider} - variants: - - {first: Julian, last: Moreno-Schneider} - - {first: Julián, last: Moreno-Schneider} - - {first: Julián, last: Moreno Schneider} -- canonical: {first: Antonio, last: Moreno-Ortiz} - comment: Univ. of Málaga - id: antonio-moreno-ortiz - similar: [antonio-moreno-ribas, antonio-moreno-sandoval] - variants: - - {first: Antonio, last: Moreno Ortiz} -- canonical: {first: Antonio, last: Moreno-Sandoval} - comment: NYU, Univ. Autónoma de Madrid - id: antonio-moreno-sandoval - similar: [antonio-moreno-ortiz, antonio-moreno-ribas] - variants: - - {first: Antonio Moreno, last: Sandoval} -- canonical: {first: Elliott, last: Moreton} - variants: - - {first: Elliot, last: Moreton} -- canonical: {first: Lorenzo, last: Moretti} - id: lorenzo-moretti -- canonical: {first: Richard G., last: Morgan} - variants: - - {first: Richard, last: Morgan} -- canonical: {first: William, last: Morgan} - variants: - - {first: William T., last: Morgan} -- canonical: {first: Véronique, last: Moriceau} - variants: - - {first: Veronique, last: Moriceau} -- canonical: {first: Tsuyoshi, last: Morimoto} - variants: - - {first: Tsuyosi, last: Morimoto} -- canonical: {first: James G., last: Mork} - variants: - - {first: James, last: Mork} -- canonical: {first: Robert W., last: Morris} - variants: - - {first: Robert, last: Morris} -- canonical: {first: David R., last: Mortensen} - variants: - - {first: David, last: Mortensen} -- canonical: {first: Thomas S., last: Morton} - variants: - - {first: Thomas, last: Morton} -- canonical: {first: Alex, last: Moruz} - variants: - - {first: Mihai Alex, last: Moruz} -- canonical: {first: Ulrike, last: Mosel} - id: ulrike-mosel -- canonical: {first: Sjur, last: Moshagen} - variants: - - {first: Sjur Nørstebø, last: Moshagen} - - {first: Sjur N., last: Moshagen} -- canonical: {first: Lawrence S., last: Moss} - variants: - - {first: Lawrence, last: Moss} -- canonical: {first: Anna, last: Jonsson} - id: anna-jonsson-umea - orcid: 0000-0002-9873-4170 - institution: Umeå University - comment: Umeå University -- canonical: {first: Anna, last: Jonsson} - id: anna-jonsson - comment: May refer to several people -- canonical: {first: Djamel, last: Mostefa} - id: djamel-mostefa -- canonical: {first: Jessica, last: Moszkowicz} - variants: - - {first: Jessica L., last: Moszkowicz} -- canonical: {first: Abdelhak, last: Mouradi} - id: abdelhak-mouradi -- canonical: {first: Hamed, last: Movasagh} - id: hamed-movasagh -- canonical: {first: Danielle L., last: Mowery} - variants: - - {first: Danielle, last: Mowery} - - {first: Danielle L, last: Mowery} -- canonical: {first: Joanna, last: Mrozinski} - id: joanna-mrozinski -- canonical: {first: Christian, last: Mueller} - variants: - - {first: Christian, last: Müller} -- canonical: {first: Thomas, last: Mueller} - variants: - - {first: Thomas, last: Müller} -- canonical: {first: Chafic, last: Mukbel} - id: chafic-mukbel -- canonical: {first: Rutu, last: Mulkar-Mehta} - variants: - - {first: Rutu, last: Mulkar} -- canonical: {first: Dasa, last: Munkova} - variants: - - {first: Daša, last: Munková} -- canonical: {first: Juan Pablo, last: Munoz} - id: juan-pablo-munoz - variants: - - {first: J. Pablo, last: Muñoz} -- canonical: {first: Dragos Stefan, last: Munteanu} - variants: - - {first: Dragos, last: Munteanu} -- canonical: {first: William R., last: Murray} - variants: - - {first: William, last: Murray} -- canonical: {first: Hema A., last: Murthy} - variants: - - {first: Hema, last: Murthy} -- canonical: {first: Hy, last: Murveit} - id: hy-murveit -- canonical: {first: Claudiu, last: Musat} - variants: - - {first: Claudiu-Cristian, last: Musat} -- canonical: {first: Gabriele, last: Musillo} - variants: - - {first: Gabriele Antonio, last: Musillo} -- canonical: {first: Pradeep, last: Muthukrishnan} - variants: - - {first: Pradeep, last: Muthukrishan} -- canonical: {first: Rafael, last: Muñoz} - id: rafael-munoz - variants: - - {first: Rafael, last: Muñoz Guillena} - - {first: Rafael, last: Muñoz-Guillena} -- canonical: {first: Sung-Hyon, last: Myaeng} - variants: - - {first: Sung Hyon, last: Myaeng} - - {first: Sung H., last: Myaeng} - - {first: Sung-hyon, last: Myaeng} -- canonical: {first: Kanthashree, last: Mysore Sathyendra} - variants: - - {first: Kanthashree Mysore, last: Sathyendra} -- canonical: {first: Lluís, last: Màrquez} - id: lluis-marquez - variants: - - {first: Lluis, last: Marquez} - - {first: Lluis, last: Màrquez} - - {first: Lluis, last: Márquez} -- canonical: {first: Gildas, last: Ménier} - variants: - - {first: Gildas, last: Menier} -- canonical: {first: Bernd, last: Möbius} - variants: - - {first: Bernd, last: Mobius} -- canonical: {first: Christof, last: Müller} - variants: - - {first: Christof E., last: Müller} -- canonical: {first: Christoph, last: Müller} - variants: - - {first: Christoph, last: Mueller} -- canonical: {first: Frank Henrik, last: Müller} - variants: - - {first: Frank H., last: Müller} - - {first: Frank, last: Müller} -- canonical: {first: Mark-Christoph, last: Müller} - variants: - - {first: Mark-Christoph, last: Mueller} -- canonical: {first: Bang, last: Nguyen} - institution: Notre Dame - orcid: 0009-0002-8365-4562 -- canonical: {first: Stefan, last: Müller} - variants: - - {first: Stefan, last: Muller} - - {first: Stefan, last: Mueller} -- canonical: {first: Maria, last: Nadejde} - variants: - - {first: Maria, last: Nădejde} -- canonical: {first: Makoto, last: Nagao} - id: makoto-nagao -- canonical: {first: Meenakshi, last: Nagarajan} - variants: - - {first: Meena, last: Nagarajan} -- canonical: {first: Magdi, last: Nagi} - variants: - - {first: Magdy, last: Nagi} -- canonical: {first: István, last: Nagy T.} - variants: - - {first: István T., last: Nagy} - - {first: István, last: Nagy} - - {first: Istvan, last: Nagy} -- canonical: {first: Seiichi, last: Nakagawa} - id: seiichi-nakagawa -- canonical: {first: Jun-ichi, last: Nakamura} - variants: - - {first: Jun’ichi, last: Nakamura} -- canonical: {first: Yukiko I., last: Nakano} - variants: - - {first: Yukiko, last: Nakano} -- canonical: {first: Ndapandula, last: Nakashole} - variants: - - {first: Ndapa, last: Nakashole} -- canonical: {first: Christine H., last: Nakatani} - variants: - - {first: Christine, last: Nakatani} -- canonical: {first: Shu, last: Nakazato} - id: shu-nakazato -- canonical: {first: Preslav, last: Nakov} - variants: - - {first: Preslav I., last: Nakov} -- canonical: {first: Yuhao, last: Wang} - id: yuhao-wang-renmin - orcid: 0009-0001-5760-9285 - institution: Renmin University of China - comment: Renmin -- canonical: {first: Yuhao, last: Wang} - id: yuhao-wang - comment: May refer to several people -- canonical: {first: Jee-sun, last: Nam} - variants: - - {first: Jee-Sun, last: Nam} -- canonical: {first: Fiammetta, last: Namer} - variants: - - {first: Fiametta, last: Namer} -- canonical: {first: Shrikanth, last: Narayanan} - variants: - - {first: Shri, last: Narayanan} - - {first: Shrikanth S., last: Narayanan} -- canonical: {first: Srini, last: Narayanan} - variants: - - {first: Srinivas, last: Narayanan} -- canonical: {first: Maria Fernanda Bacelar do, last: Nascimento} - variants: - - {first: Maria Fernanda Bacelar, last: do Nascimento} - - {first: Fernanda Bacelar, last: do Nascimento} -- canonical: {first: Jamal A., last: Nasir} - variants: - - {first: Jamal, last: Nasir} -- canonical: {first: Sudip Kumar, last: Naskar} - variants: - - {first: Sudip, last: Kumar Naskar} - - {first: Sudip, last: Naskar} -- canonical: {first: Vivi, last: Nastase} - variants: - - {first: Vivi, last: Năstase} -- canonical: {first: Prem, last: Natarajan} - variants: - - {first: Premkumar, last: Natarajan} -- canonical: {first: P. Senthil, last: Nathan} - variants: - - {first: Senthil, last: Nathan} -- canonical: {first: Borja, last: Navarro} - id: borja-navarro - variants: - - {first: Borja, last: Navarro-Colorado} -- canonical: {first: Eva, last: Navas} - id: eva-navas -- canonical: {first: Jiří, last: Navrátil} - variants: - - {first: Jiri, last: Navratil} -- canonical: {first: Tapas, last: Nayak} - variants: - - {first: Tapas, last: Nayek} -- canonical: {first: Adeline, last: Nazarenko} - id: adeline-nazarenko - variants: - - {first: Adeline, last: Nazarenko-Perrin} -- canonical: {first: Jeannette G., last: Neal} - id: jeannette-g-neal -- canonical: {first: Silvia, last: Necşulescu} - variants: - - {first: Silvia, last: Necsulescu} -- canonical: {first: Nicolas, last: Nedobejkine} - id: nicolas-nedobejkine -- canonical: {first: Mary S., last: Neff} - variants: - - {first: Mary, last: Neff} -- canonical: {first: Matteo, last: Negri} - id: matteo-negri -- canonical: {first: Anil Kumar, last: Nelakanti} - variants: - - {first: Anil, last: Kumar} -- canonical: {first: Esa, last: Nelimarkka} - id: esa-nelimarkka -- canonical: {first: Dávid Márk, last: Nemeskey} - variants: - - {first: David Mark, last: Nemeskey} -- canonical: {first: Goran, last: Nenadic} - variants: - - {first: Goran, last: Nenadić} -- canonical: {first: Jian, last: Wang} - id: jian-wang-hongkongpoly - orcid: 0000-0002-8992-8336 - institution: The Hong Kong Polytechnic University - comment: Hong Kong Polytechnic -- canonical: {first: Jian, last: Wang} - id: jian-wang - comment: May refer to several people -- canonical: {first: João P., last: Neto} - variants: - - {first: Joao P., last: Neto} - - {first: Joao, last: Neto} - - {first: João, last: Neto} - - {first: João Paulo, last: Neto} -- canonical: {first: Yael, last: Netzer} - variants: - - {first: Yael Dahan, last: Netzer} - - {first: Yael, last: Dahan} -- canonical: {first: Günter, last: Neumann} - variants: - - {first: Gunter, last: Neumann} - - {first: Guenter, last: Neumann} -- canonical: {first: Aurelie, last: Neveol} - variants: - - {first: Aurélie, last: Névéol} -- canonical: {first: Bruce E., last: Nevin} - variants: - - {first: Bruce, last: Nevin} -- canonical: {first: Paula, last: Newman} - id: paula-newman - variants: - - {first: Paula S., last: Newman} -- canonical: {first: Hermann, last: Ney} - id: hermann-ney -- canonical: {first: Gunta, last: Nešpore} - variants: - - {first: Gunta, last: Nespore-Berzkalne} -- canonical: {first: Andrew Y., last: Ng} - variants: - - {first: Andrew, last: Ng} -- canonical: {first: Hong-I, last: Ng} - variants: - - {first: Hong I, last: Ng} -- canonical: {first: Jun Ping, last: Ng} - variants: - - {first: Jun-Ping, last: Ng} -- canonical: {first: Raymond, last: Ng} - variants: - - {first: Raymond T., last: Ng} -- canonical: {first: See Kiong, last: Ng} - variants: - - {first: See-Kiong, last: Ng} -- canonical: {first: Cam-Tu, last: Nguyen} - variants: - - {first: Cẩm Tú, last: Nguyễn} -- canonical: {first: Huy, last: Nguyen} - comment: Stanford - id: huy-nguyen-stanford -- canonical: {first: Huy, last: Nguyen} - comment: UPitt, Amazon - id: huy-nguyen-pgh -- canonical: {first: Huy, last: Nguyen} - comment: BCL Technologies Inc. - id: huy-nguyen-bcl -- canonical: {first: Huy, last: Nguyen} - comment: ex-liulishuo - id: huy-nguyen-lls -- canonical: {first: Huy Tien, last: Nguyen} - variants: - - {first: Huy-Tien, last: Nguyen} -- canonical: {first: Kasu Sai Kartheek, last: Reddy} - orcid: 0009-0007-6679-3313 - institution: Indian Institute of Information Technology Dharwad, India - variants: - - {first: Sai Kartheek, last: Reddy Kasu} -- canonical: {first: Long, last: Nguyen} - id: long-nguyen -- canonical: {first: Minh Le, last: Nguyen} - id: minh-le-nguyen - variants: - - {first: Minh-Le, last: Nguyen} - - {first: Le-Minh, last: Nguyen} - - {first: Nguyen Le, last: Minh} - - {first: Le Minh, last: Nguyen} - - {first: Nguyen, last: Le Minh} -- canonical: {first: Phuong-Thai, last: Nguyen} - variants: - - {first: Phuong Thai, last: Nguyen} -- canonical: {first: Quy, last: Nguyen} - variants: - - {first: Quy T., last: Nguyen} -- canonical: {first: Thi Minh Huyen, last: Nguyen} - id: thi-minh-huyen-nguyen - variants: - - {first: Thi Minh Huyền, last: Nguyễn} - - {first: Thị Minh Huyền, last: Nguyễn} - - {first: Thi-Minh-Huyen, last: Nguyen} -- canonical: {first: ThuyLinh, last: Nguyen} - variants: - - {first: Thuy Linh, last: Nguyen} -- canonical: {first: Toan Q., last: Nguyen} - variants: - - {first: Toan, last: Nguyen} -- canonical: {first: Tri-Thanh, last: Nguyen} - variants: - - {first: Tri Thanh, last: Nguyen} -- canonical: {first: Van minh, last: Nguyen} - variants: - - {first: Van Minh, last: Nguyen} -- canonical: {first: Viet Cuong, last: Nguyen} - variants: - - {first: Nguyen Viet, last: Cuong} -- canonical: {first: Vinh Van, last: Nguyen} - variants: - - {first: Vinh-Van, last: Nguyen} -- canonical: {first: Ngô Thanh, last: Nhàn} - id: ngo-thanh-nhan - variants: - - {first: Ngo Thanh, last: Nhan} -- canonical: {first: Nicolas, last: Nicolov} - id: nicolas-nicolov - similar: [nikola-i-nikolov] -- canonical: {first: Jian-Yun, last: Nie} - variants: - - {first: Jian-yun, last: Nie} -- canonical: {first: Rodney, last: Nielsen} - variants: - - {first: Rodney D., last: Nielsen} -- canonical: {first: Sonja, last: Nießen} - id: sonja-niessen - variants: - - {first: Sonja, last: Niessen} -- canonical: {first: Nikola I., last: Nikolov} - id: nikola-i-nikolov - similar: [nicolas-nicolov] -- canonical: {first: Kristina, last: Nilsson Björkenstam} - variants: - - {first: Kristina, last: Nilsson} - - {first: Kristina Nilsson, last: Björkenstam} - - {first: Kristina, last: N. Björkenstam} -- canonical: {first: Nobal Bikram, last: Niraula} - variants: - - {first: Nobal, last: Niraula} -- canonical: {first: Irene, last: Nirenburg} - variants: - - {first: Irene B., last: Nirenburg} -- canonical: {first: Sergei, last: Nirenburg} - id: sergei-nirenburg - variants: - - {first: Sergei, last: Nirenberg} -- canonical: {first: Toyoaki, last: Nishida} - variants: - - {first: Toyo-aki, last: Nishida} -- canonical: {first: Bodil, last: Nistrup Madsen} - variants: - - {first: Bodil Nistrup, last: Madsen} -- canonical: {first: Zheng-Yu, last: Niu} - variants: - - {first: Zheng Yu, last: Niu} - - {first: Zhengyu, last: Niu} -- canonical: {first: Pascal, last: Nocéra} - variants: - - {first: Pascal, last: Nocera} -- canonical: {first: Albino, last: Nogueiras} - variants: - - {first: Albino Nogueiras, last: Rodriguez} -- canonical: {first: Lewis M., last: Norton} - variants: - - {first: Lewis, last: Norton} -- canonical: {first: R., last: Nozohoor-Farshi} - variants: - - {first: R, last: Nozohoor-Farshi} -- canonical: {first: Rita, last: Nuebel} - variants: - - {first: Rita, last: Nüebel} -- canonical: {first: Minghua, last: Nuo} - variants: - - {first: Ming Hua, last: Nuo} -- canonical: {first: Aparna, last: Nurani Venkitasubramanian} - variants: - - {first: Aparna N., last: Venkitasubramanian} -- canonical: {first: Eric, last: Nyberg} - variants: - - {first: Eric H., last: Nyberg III} - - {first: Eric H., last: Nyberg} - - {first: Eric H., last: 'Nyberg, 3rd'} -- canonical: {first: Claire, last: Nédellec} - variants: - - {first: Claire, last: Nėdellec} -- canonical: {first: Elmar, last: Nöth} - id: elmar-noth - variants: - - {first: Elmar, last: Noth} -- canonical: {first: Douglas W., last: Oard} - variants: - - {first: Douglas, last: Oard} - - {first: Doug, last: Oard} -- canonical: {first: Jon, last: Oberlander} - variants: - - {first: Jonathan, last: Oberländer} -- canonical: {first: Laura Ana Maria, last: Oberländer} - variants: - - {first: Laura Ana Maria, last: Bostan} - - {first: Laura-Ana-Maria, last: Bostan} -- canonical: {first: Ivan, last: Obradović} - variants: - - {first: Ivan, last: Obradoviæ} -- canonical: {first: Tomasz, last: Obrębski} - variants: - - {first: Tomasz, last: Obrebski} -- canonical: {first: Franz Josef, last: Och} - id: franz-josef-och - variants: - - {first: Franz J., last: Och} - - {first: Franz, last: Och} -- canonical: {first: Cheol-Young, last: Ock} - variants: - - {first: Cheolyoung, last: Ock} - - {first: Cheol-young, last: Ock} -- canonical: {first: Wei, last: Fan} - id: wei-fan-hkust - orcid: 0009-0008-1900-7081 - institution: Hong Kong University of Science and Technology - comment: HKUST -- canonical: {first: Wei, last: Fan} - id: wei-fan - comment: May refer to several people -- canonical: {first: Julian J., last: Odell} - id: julian-j-odell -- canonical: {first: Jan, last: Odijk} - id: jan-odijk -- canonical: {first: Pinar, last: Oezden Wennerberg} - variants: - - {first: Pinar, last: Wennerberg} - - {first: Pinar Oezden, last: Wennerberg} -- canonical: {first: William C., last: Ogden} - variants: - - {first: William, last: Ogden} -- canonical: {first: Philip, last: Ogren} - variants: - - {first: Philip V., last: Ogren} -- canonical: {first: Alice, last: Oh} - variants: - - {first: Alice H., last: Oh} -- canonical: {first: Jong-Hoon, last: Oh} - variants: - - {first: Jong Hoon, last: Oh} -- canonical: {first: Takahiro, last: Ohno} - variants: - - {first: Takahiro, last: Ono} -- canonical: {first: Atul Kr., last: Ojha} - variants: - - {first: Atul Ku., last: Ojha} -- canonical: {first: Naoaki, last: Okazaki} - variants: - - {first: Naoki, last: Okazaki} -- canonical: {first: Manabu, last: Okumura} - variants: - - {first: Manabu, last: Okumara} -- canonical: {first: Hiroshi G., last: Okuno} - variants: - - {first: Hiroshi, last: Okuno} -- canonical: {first: Duane E., last: Olawsky} - variants: - - {first: Duane, last: Olawsky} -- canonical: {first: Karel, last: Oliva} - variants: - - {first: Karel, last: Oli̊va} -- canonical: {first: José Luís, last: Oliveira} - variants: - - {first: Luís, last: Oliveira} -- canonical: {first: Osvaldo Novais, last: Oliveira Jr.} - variants: - - {first: Osvaldo, last: Oliveira Jr} -- canonical: {first: Solange, last: Oliveira Rezende} - variants: - - {first: Solange, last: Rezende} -- canonical: {first: Andrew, last: Olney} - variants: - - {first: Andrew M., last: Olney} -- canonical: {first: Mari Broman, last: Olsen} - variants: - - {first: Mari, last: Olsen} -- canonical: {first: Maurizio, last: Omologo} - id: maurizio-omologo -- canonical: {first: Arturo, last: Oncevay} - variants: - - {first: Arturo, last: Oncevay-Marcos} -- canonical: {first: Corinna, last: Onelli} - id: corinna-onelli -- canonical: {first: Takashi, last: Onishi} - variants: - - {first: Takeshi, last: Onishi} - - {first: Takashi, last: Oonishi} -- canonical: {first: Boyan, last: Onyshkevych} - variants: - - {first: Boyan A., last: Onyshkevych} -- canonical: {first: Constantin, last: Orasan} - id: constantin-orasan - variants: - - {first: Constantin, last: Orăsan} -- canonical: {first: Zeynep, last: Orhan} - variants: - - {first: Orhan, last: Zeynep} -- canonical: {first: Maite, last: Oronoz} - id: maite-oronoz -- canonical: {first: J. Walker, last: Orr} - variants: - - {first: Walker, last: Orr} -- canonical: {first: Javier, last: Ortega-García} - variants: - - {first: Javier, last: Ortega-Garcia} -- canonical: {first: Sergio, last: Ortiz Rojas} - variants: - - {first: Sergio, last: Ortiz-Rojas} - - {first: Sergio Ortiz, last: Rojas} -- canonical: {first: Pedro, last: Ortiz Suarez} - variants: - - {first: Pedro Javier, last: Ortiz Suárez} -- canonical: {first: Daniel, last: Ortiz-Martínez} - variants: - - {first: Daniel, last: Ortíz-Martínez} - - {first: Daniel, last: Ortiz Martínez} -- canonical: {first: Beatrice, last: Oshika} - variants: - - {first: Beatrice T., last: Oshika} -- canonical: {first: David Yoshikazu, last: Oshima} - variants: - - {first: David Y., last: Oshima} -- canonical: {first: Mari, last: Ostendorf} - id: mari-ostendorf -- canonical: {first: Julia, last: Otmakhova} - variants: - - {first: Yulia, last: Otmakhova} -- canonical: {first: Jahna, last: Otterbacher} - variants: - - {first: Jahna C., last: Otterbacher} -- canonical: {first: Cecilia, last: Ovesdotter Alm} - variants: - - {first: Cecilia Ovesdotter, last: Alm} - - {first: Cecilia O., last: Alm} - - {first: Cecilia, last: O. Alm} -- canonical: {first: Sharon, last: Oviatt} - variants: - - {first: Sharon L., last: Oviatt} -- canonical: {first: Hiromi Itoh, last: Ozaku} - variants: - - {first: Hiromi itoh, last: Ozaku} -- canonical: {first: Canberk, last: Ozdemir} - variants: - - {first: Canberk, last: Özdemir} -- canonical: {first: Benoît, last: Ozell} - variants: - - {first: Benoit, last: Ozell} -- canonical: {first: Timothy, last: O’Donnell} - variants: - - {first: Timothy J., last: O’Donnell} - - {first: Tim, last: O’Donnell} -- canonical: {first: Thomas P., last: O’Hara} - variants: - - {first: Thomas, last: O’Hara} -- canonical: {first: Tim, last: O’Keefe} - variants: - - {first: Timothy, last: O’Keefe} -- canonical: {first: Dianne P., last: O’Leary} - variants: - - {first: Dianne, last: O’Leary} -- canonical: {first: Ian M., last: O’Neill} - variants: - - {first: Ian, last: O’Neill} -- canonical: {first: Douglas, last: O’Shaughnessy} - id: douglas-oshaughnessy - variants: - - {first: Douglas D., last: O’Shaughnessy} -- canonical: {first: Dave, last: O’mara} - id: dave-omara -- canonical: {first: Elaine, last: O′Mahony} - variants: - - {first: Elaine, last: O’Mahony} -- canonical: {first: Sarah Masud, last: Preum} - orcid: 0000-0002-7771-8323 - variants: - - {first: Sarah, last: Preum} - - {first: Sarah M., last: Preum} -- canonical: {first: Deepak, last: P} - variants: - - {first: Deepak, last: Padmanabhan} -- canonical: {first: Gerhard, last: Paaß} - variants: - - {first: Gerhard, last: Paass} -- canonical: {first: Gordon, last: Pace} - variants: - - {first: Gordon J., last: Pace} -- canonical: {first: María Leonor, last: Pacheco} - variants: - - {first: Maria Leonor, last: Pacheco} -- canonical: {first: Ulrike, last: Pado} - variants: - - {first: Ulrike, last: Padó} -- canonical: {first: Lluís, last: Padró} - id: lluis-padro - variants: - - {first: Lluis, last: Padro} - - {first: Lluis, last: Padró} -- canonical: {first: Muntsa, last: Padró} - id: muntsa-padro -- canonical: {first: Sebastian, last: Padó} - variants: - - {first: Sebastian, last: Pado} -- canonical: {first: Gustavo, last: Paetzold} - variants: - - {first: Gustavo H., last: Paetzold} - - {first: Gustavo, last: Henrique Paetzold} - - {first: Gustavo Henrique, last: Paetzold} -- canonical: {first: Peteris, last: Paikens} - variants: - - {first: Pēteris, last: Paikens} -- canonical: {first: Jean-Pierre, last: Paillet} - variants: - - {first: Jean Pierre, last: Paillet} -- canonical: {first: Helen, last: Pain} - id: helen-pain -- canonical: {first: Daniel, last: Paiva} - id: daniel-paiva - variants: - - {first: Daniel S., last: Paiva} -- canonical: {first: Sergey V., last: Pakhomov} - variants: - - {first: Sergey, last: Pakhomov} -- canonical: {first: Serguei, last: Pakhomov} - variants: - - {first: Serguei V., last: Pakhomov} -- canonical: {first: Christopher, last: Pal} - variants: - - {first: Chris, last: Pal} -- canonical: {first: David S., last: Pallett} - id: david-s-pallett - variants: - - {first: David, last: Pallett} -- canonical: {first: David D., last: Palmer} - variants: - - {first: David, last: Palmer} -- canonical: {first: Martha, last: Palmer} - variants: - - {first: Martha Stone, last: Palmer} - - {first: Martha S., last: Palmer} -- canonical: {first: Manuel, last: Palomar} - id: manuel-palomar -- canonical: {first: Girish, last: Palshikar} - variants: - - {first: Girish K., last: Palshikar} - - {first: Girish K, last: Palshikar} -- canonical: {first: Michael J., last: Pan} - variants: - - {first: Michael, last: Pan} -- canonical: {first: Yi-Cheng, last: Pan} - variants: - - {first: Yi-cheng, last: Pan} -- canonical: {first: Onkar Arun, last: Pandit} - variants: - - {first: Onkar, last: Pandit} -- canonical: {first: Jarmila, last: Panevová} - variants: - - {first: Jarmila, last: Panevova} -- canonical: {first: Nagesh C., last: Panyam} - variants: - - {first: Nagesh, last: C. Panyam} -- canonical: {first: Constantine, last: Papageorgiou} - variants: - - {first: Constantine P., last: Papageorgiou} -- canonical: {first: Harris, last: Papageorgiou} - variants: - - {first: Haris, last: Papageorgiou} -- canonical: {first: Alexandros, last: Papangelis} - variants: - - {first: Alex, last: Papangelis} -- canonical: {first: Ivandré, last: Paraboni} - variants: - - {first: Ivandre, last: Paraboni} -- canonical: {first: Emerson Cabrera, last: Paraiso} - variants: - - {first: Emerson, last: Paraiso} -- canonical: {first: Monica Lestari, last: Paramita} - variants: - - {first: Monica, last: Paramita} -- canonical: {first: Jose Manuel, last: Pardo} - variants: - - {first: Jose M., last: Pardo} - - {first: José M., last: Pardo} -- canonical: {first: Mi, last: Zhang} - id: mi-zhang-ucd - orcid: 0000-0003-3567-3478 - institution: University College Dublin - comment: Dublin -- canonical: {first: Mi, last: Zhang} - id: mi-zhang - comment: May refer to multiple people -- canonical: {first: Antonio, last: Pareja Lora} - variants: - - {first: Antonio, last: Pareja-Lora} -- canonical: {first: Ankur, last: Parikh} - variants: - - {first: Ankur P., last: Parikh} -- canonical: {first: Cecile, last: Paris} - variants: - - {first: Cécile, last: Paris} - - {first: Cecile L., last: Paris} -- canonical: {first: Praveen, last: Paritosh} - variants: - - {first: Praveen, last: P} -- canonical: {first: HyukRo, last: Park} - variants: - - {first: Hyukro, last: Park} -- canonical: {first: Hyun Seok, last: Park} - variants: - - {first: Hyun S., last: Park} -- canonical: {first: Jong C., last: Park} - variants: - - {first: Jong, last: Park} -- canonical: {first: Myung-Kwan, last: Park} - variants: - - {first: Myungkwan, last: Park} -- canonical: {first: Sang-Kyu, last: Park} - variants: - - {first: Sangkyu, last: Park} -- canonical: {first: Se-Young, last: Park} - variants: - - {first: Se Young, last: Park} -- canonical: {first: Y. Albert, last: Park} - variants: - - {first: Albert, last: Park} -- canonical: {first: ‘Ōiwi, last: Parker Jones} - variants: - - {first: Oiwi, last: Parker Jones} -- canonical: {first: Patrick, last: Paroubek} - id: patrick-paroubek -- canonical: {first: Carla, last: Parra Escartín} - variants: - - {first: Carla, last: Parra} - - {first: Carla Parra, last: Escartín} - - {first: Carla Parra, last: Escartin} -- canonical: {first: Barbara H., last: Partee} - variants: - - {first: Barbara, last: Partee} -- canonical: {first: Md. Rizwan, last: Parvez} - variants: - - {first: Md Rizwan, last: Parvez} -- canonical: {first: Artemis, last: Parvizi} - variants: - - {first: Artemis, last: Parvisi} -- canonical: {first: Marius, last: Pasca} - variants: - - {first: Marius A., last: Pasca} - - {first: Marius, last: Paşca} -- canonical: {first: Elena, last: Paskaleva} - variants: - - {first: Elena, last: Pascaleva} -- canonical: {first: Rebecca J., last: Passonneau} - variants: - - {first: Rebecca, last: Passonneau} -- canonical: {first: John K., last: Pate} - variants: - - {first: John, last: Pate} - - {first: John K, last: Pate} -- canonical: {first: Pu, last: Zhao} - id: pu-zhao-northeastern - orcid: 0000-0001-5018-2859 - institution: Northeastern University - comment: Northeastern -- canonical: {first: Pu, last: Zhao} - id: pu-zhao - comment: May refer to several people -- canonical: {first: Pratikkumar, last: Patel} - variants: - - {first: Pratik, last: Patel} -- canonical: {first: Jon, last: Patrick} - variants: - - {first: Jon D., last: Patrick} - - {first: Jon David, last: Patrick} -- canonical: {first: Terry, last: Patten} - id: terry-patten -- canonical: {first: Michael, last: Paul} - variants: - - {first: Michael J., last: Paul} -- canonical: {first: Niklas, last: Paulsson} - id: niklas-paulsson -- canonical: {first: Jyoti, last: Pawar} - variants: - - {first: Jyoti D., last: Pawar} - - {first: Jyoti D, last: Pawar} -- canonical: {first: Maria Teresa, last: Pazienza} - id: maria-teresa-pazienza - variants: - - {first: Maria Teresa, last: Pazienze} - - {first: Maria, last: Pazienza} -- canonical: {first: Bolette Sandford, last: Pedersen} - variants: - - {first: Bolette, last: Sandford Pedersen} - - {first: Bolette S., last: Pedersen} - - {first: Bolette, last: Pedersen} - - {first: Bo, last: Pedersen} -- canonical: {first: Víctor, last: Peinado} - variants: - - {first: Victor, last: Peinado} -- canonical: {first: Bryan, last: Pellom} - id: bryan-pellom -- canonical: {first: Mikel, last: Penagarikano} - id: mikel-penagarikano -- canonical: {first: Yifan, last: Peng} - comment: cmu - id: yifan-peng-cmu -- canonical: {first: Christopher, last: Pennington} - variants: - - {first: Chris, last: Pennington} -- canonical: {first: Joseph J., last: Peper} - variants: - - {first: Joseph, last: Peper} -- canonical: {first: Jesús, last: Peral} - id: jesus-peral - variants: - - {first: Jesus, last: Peral} -- canonical: {first: Fernando, last: Perdigão} - variants: - - {first: Fernando S., last: Perdigão} -- canonical: {first: Jose Manuel, last: Perea-Ortega} - variants: - - {first: José M., last: Perea-Ortega} - - {first: Jose Manuel, last: Perea} - - {first: Jose-Manuel, last: Perea-Ortega} -- canonical: {first: Daniel Bastos, last: Pereira} - variants: - - {first: Daniel B., last: Pereira} -- canonical: {first: Fernando C. N., last: Pereira} - variants: - - {first: Fernando C.N., last: Pereira} -- canonical: {first: Luísa, last: Pereira} - variants: - - {first: Luisa, last: Pereira} -- canonical: {first: Martín, last: Pereira-Fariña} - id: martin-pereira-farina -- canonical: {first: Cenel-Augusto, last: Perez} - variants: - - {first: Cenel Augusto, last: Perez} -- canonical: {first: Álvaro, last: Peris} - variants: - - {first: Alvaro, last: Peris} -- canonical: {first: C. Raymond, last: Perrault} - variants: - - {first: Raymond, last: Perrault} -- canonical: {first: Andreas, last: Persidis} - id: andreas-persidis -- canonical: {first: Marie-Paule, last: Pery-Woodley} - variants: - - {first: Marie-Paule, last: Péry-Woodley} -- canonical: {first: John, last: Pestian} - variants: - - {first: John P., last: Pestian} -- canonical: {first: Matthew E., last: Peters} - variants: - - {first: Matthew, last: Peters} -- canonical: {first: Wim, last: Peters} - id: wim-peters -- canonical: {first: Daniel, last: Peterson} - variants: - - {first: Daniel W., last: Peterson} -- canonical: {first: J., last: Peterson} - variants: - - {first: Jill, last: Peterson} -- canonical: {first: Vladimir, last: Petkevic} - variants: - - {first: Vladimír, last: Petkevič} -- canonical: {first: Saša, last: Petrović} - variants: - - {first: Sasa, last: Petrovic} -- canonical: {first: Miriam R. L., last: Petruck} - variants: - - {first: Miriam R.L., last: Petruck} - - {first: Miriam R L, last: Petruck} -- canonical: {first: Anselmo, last: Peñas} - variants: - - {first: Anselmo, last: Penas} -- canonical: {first: Minh Quang, last: Pham} - comment: SYSTRAN - id: minh-quang-pham - similar: [minh-quang-nhat-pham] - variants: - - {first: MinhQuang, last: Pham} -- canonical: {first: Minh Quang Nhat, last: Pham} - comment: JAIST, Alt Vietnam - id: minh-quang-nhat-pham - similar: [minh-quang-pham] -- canonical: {first: Nghia The, last: Pham} - variants: - - {first: Nghia, last: Pham} -- canonical: {first: Ngoc-Quan, last: Pham} - variants: - - {first: Ngoc Quan, last: Pham} -- canonical: {first: Tuoi Thi, last: Phan} - variants: - - {first: Tuoi, last: T. Phan} -- canonical: {first: John, last: Phillips} - comment: Univ. of Manchester - id: john-phillips - similar: [jon-phillips] -- canonical: {first: Jon, last: Phillips} - comment: Georgetown, MITRE - id: jon-phillips - similar: [john-phillips] -- canonical: {first: Michael, last: Phillips} - id: michael-phillips -- canonical: {first: Robert, last: Phillips} - variants: - - {first: Rob, last: Phillips} -- canonical: {first: Fabio, last: Pianesi} - id: fabio-pianesi -- canonical: {first: Emanuele, last: Pianta} - id: emanuele-pianta -- canonical: {first: Scott S.L., last: Piao} - id: scott-s-l-piao - variants: - - {first: Scott, last: Piao} - - {first: Scott S. L., last: Piao} -- canonical: {first: Christine, last: Piatko} - variants: - - {first: Christine D., last: Piatko} -- canonical: {first: Francesco, last: Piazza} - id: francesco-piazza -- canonical: {first: José Ramom, last: Pichel Campos} - variants: - - {first: José Ramom, last: Pichel} - - {first: Jose Ramom, last: Pichel} - - {first: Jose Ramom, last: Pichel Campos} -- canonical: {first: M. A., last: Picheny} - variants: - - {first: M.A., last: Picheny} -- canonical: {first: David, last: Picó} - variants: - - {first: David, last: Pico} -- canonical: {first: Roberto, last: Pieraccini} - id: roberto-pieraccini -- canonical: {first: David, last: Pierce} - variants: - - {first: David R., last: Pierce} -- canonical: {first: Janet, last: Pierrehumbert} - variants: - - {first: Janet B., last: Pierrehumbert} -- canonical: {first: Paola, last: Pietrandrea} - variants: - - {first: Paola, last: Pietandrea} -- canonical: {first: Luis. A., last: Pineda} - variants: - - {first: Luis, last: Pineda} -- canonical: {first: Gisele Montilha, last: Pinheiro} - variants: - - {first: Gisele, last: Montilha} -- canonical: {first: Mārcis, last: Pinnis} - variants: - - {first: Marcis, last: Pinnis} -- canonical: {first: David, last: Pinto} - variants: - - {first: David Eduardo, last: Pinto Avendaño} - - {first: David, last: Pinto Avendaño} -- canonical: {first: R., last: Piotrowski} - variants: - - {first: R. G., last: Piotrowski} -- canonical: {first: Stelios, last: Piperidis} - id: stelios-piperidis - variants: - - {first: Stelios, last: Piperdis} -- canonical: {first: Tommi A., last: Pirinen} - variants: - - {first: Tommi, last: Pirinen} - - {first: Tommi A, last: Pirinen} -- canonical: {first: John F., last: Pitrelli} - variants: - - {first: John, last: Pitrelli} -- canonical: {first: Luiz Augusto, last: Pizzato} - variants: - - {first: Luiz Augusto Sangoi, last: Pizzato} -- canonical: {first: Paul, last: Placeway} - id: paul-placeway -- canonical: {first: Mihaela, last: Plamada-Onofrei} - variants: - - {first: Mihaela, last: Onofrei} - - {first: Mihaela, last: Plămadă-Onofrei} -- canonical: {first: Magdalena, last: Plamadă} - variants: - - {first: Magdalena, last: Plamada} -- canonical: {first: Barbara, last: Plank} - id: barbara-plank -- canonical: {first: Martin, last: Platek} - variants: - - {first: Martin, last: Plátek} -- canonical: {first: John C., last: Platt} - variants: - - {first: John, last: Platt} -- canonical: {first: Matúš, last: Pleva} - variants: - - {first: Matus, last: Pleva} -- canonical: {first: Massimo, last: Poesio} - id: massimo-poesio -- canonical: {first: Alain, last: Polguère} - id: alain-polguere - variants: - - {first: Alain, last: Polguere} -- canonical: {first: Joseph, last: Polifroni} - variants: - - {first: Joseph H., last: Polifroni} -- canonical: {first: Ziortza, last: Polin} - id: ziortza-polin -- canonical: {first: Carl, last: Pollard} - variants: - - {first: Carl J., last: Pollard} -- canonical: {first: Petr, last: Pollák} - variants: - - {first: Petr, last: Pollak} -- canonical: {first: Simone Paolo, last: Ponzetto} - variants: - - {first: Simone P., last: Ponzetto} - - {first: Simone, last: Ponzetto} -- canonical: {first: Diana Nicoleta, last: Popa} - variants: - - {first: Diana, last: Popa} -- canonical: {first: Ashok, last: Popat} - variants: - - {first: Ashok C., last: Popat} -- canonical: {first: Lubos, last: Popelínsky} - variants: - - {first: Luboš, last: Popelínský} -- canonical: {first: Andrei, last: Popescu-Belis} - id: andrei-popescu-belis - variants: - - {first: Andrei, last: Popescu Belis} -- canonical: {first: Maja, last: Popović} - variants: - - {first: Maja, last: Popovic} -- canonical: {first: Bruce, last: Porter} - variants: - - {first: Bruce W., last: Porter} -- canonical: {first: Oana, last: Postolache} - variants: - - {first: Oana-Diana, last: Postolache} -- canonical: {first: Petra, last: Poukarová} - variants: - - {first: Petra, last: Klimešová} -- canonical: {first: Daniel, last: Povey} - id: daniel-povey -- canonical: {first: David M. W., last: Powers} - id: david-m-w-powers - variants: - - {first: David M W, last: Powers} -- canonical: {first: Maria, last: Pozzi} - variants: - - {first: María, last: Pozzi} - - {first: Mara, last: Pozzi} -- canonical: {first: Sameer, last: Pradhan} - id: sameer-pradhan - variants: - - {first: Sameer S., last: Pradhan} -- canonical: {first: K.V.S., last: Prasad} - variants: - - {first: K.V.S, last: Prasad} -- canonical: {first: Federico, last: Prat} - id: federico-prat -- canonical: {first: Daniel, last: Preoţiuc-Pietro} - variants: - - {first: Daniel, last: Preotiuc-Pietro} -- canonical: {first: Nives Mikelić, last: Preradović} - variants: - - {first: Nives, last: Mikelić Preradović} -- canonical: {first: Lauma, last: Pretkalniņa} - variants: - - {first: Lauma, last: Pretkalnina} - - {first: Lauma, last: Pretkalnin̨a} -- canonical: {first: Sergio José, last: Rodríguez Méndez} - orcid: 0000-0001-7203-8399 - institution: National Yang Ming Chiao Tung University - comment: NYCU - variants: - - {first: Sergio J., last: Rodriguez Mendez} -- canonical: {first: Patti, last: Price} - id: patti-price - variants: - - {first: Patti J., last: Price} -- canonical: {first: Belém, last: Priego Sanchez} - variants: - - {first: Belem, last: Priego} -- canonical: {first: Joel, last: Priestley} - variants: - - {first: Joel James, last: Priestley} -- canonical: {first: Danie J., last: Prinsloo} - variants: - - {first: Danie, last: Prinsloo} -- canonical: {first: Ruben A., last: Proano} - variants: - - {first: Rubén, last: Proaño} - - {first: Rubén A., last: Proaño} -- canonical: {first: Irina, last: Prodanof} - id: irina-prodanof -- canonical: {first: Domenico, last: Proietti} - id: domenico-proietti -- canonical: {first: Carlos A., last: Prolo} - variants: - - {first: Carlos, last: Prolo} -- canonical: {first: Gabor, last: Proszeky} - variants: - - {first: Gábor, last: Prószéky} - - {first: Gabor, last: Prbszeky} -- canonical: {first: Emily, last: Prud’hommeaux} - variants: - - {first: Emily T., last: Prud’hommeaux} - - {first: Emily, last: Prud'hommeaux} -- canonical: {first: Mark, last: Przybocki} - variants: - - {first: Mark A., last: Przybocki} -- canonical: {first: Laurent, last: Prévot} - variants: - - {first: Laurent, last: Prevot} -- canonical: {first: Josef, last: Psutka} - id: josef-psutka - variants: - - {first: Josef V., last: Psutka} -- canonical: {first: Jan, last: Ptacek} - variants: - - {first: Jan, last: Ptáček} -- canonical: {first: Raymond, last: Ptucha} - variants: - - {first: Ray, last: Ptucha} -- canonical: {first: Rajkumar, last: Pujari} - variants: - - {first: Pujari, last: Rajkumar} -- canonical: {first: Paolo, last: Puliti} - id: paolo-puliti -- canonical: {first: Geoffrey K., last: Pullum} - variants: - - {first: Geoffrey, last: Pullum} -- canonical: {first: Stephen, last: Pulman} - id: stephen-pulman - variants: - - {first: Stephen G., last: Pulman} -- canonical: {first: Ryosuke, last: Takahashi} - id: ryosuke-takahashi-tohoku - orcid: 0009-0002-9887-2781 - comment: Tohoku - institution: Tohoku University -- canonical: {first: Ryosuke, last: Takahashi} - id: ryosuke-takahashi - comment: May refer to several people -- canonical: {first: James, last: Pustejovsky} - id: james-pustejovsky - variants: - - {first: James D., last: Pustejovsky} -- canonical: {first: Guy, last: Pérennou} - id: guy-perennou -- canonical: {first: Chantal, last: Pérez-Hernández} - variants: - - {first: Chantal, last: Pérez} -- canonical: {first: Behrang, last: QasemiZadeh} - variants: - - {first: Behrang, last: Q. Zadeh} - - {first: Behrang Q., last: Zadeh} - - {first: Behrang, last: Zadeh} - - {first: Behrang, last: Qasemizadeh} -- canonical: {first: Haoliang, last: Qi} - variants: - - {first: HaoLiang, last: Qi} -- canonical: {first: Longhua, last: Qian} - variants: - - {first: LongHua, last: Qian} -- canonical: {first: Xin Ying, last: Qiu} - variants: - - {first: Xinying, last: Qiu} -- canonical: {first: Yun-Qian, last: Qu} - variants: - - {first: Yunqian, last: Qu} -- canonical: {first: Maurice, last: Quezel-Ambrunaz} - id: maurice-quezel-ambrunaz -- canonical: {first: Matthieu, last: Quignard} - id: matthieu-quignard -- canonical: {first: Kevin M., last: Quinn} - variants: - - {first: Kevin, last: Quinn} -- canonical: {first: Pattabhi, last: RK Rao} - variants: - - {first: T. Pattabhi, last: R. K Rao} - - {first: Pattabhi RK, last: Rao} -- canonical: {first: Hazem, last: Raafat} - variants: - - {first: Hazem, last: M. Raafat} -- canonical: {first: Lawrence R., last: Rabiner} - id: lawrence-r-rabiner -- canonical: {first: David Nicolas, last: Racca} - variants: - - {first: David Nicolás, last: Racca} -- canonical: {first: Dragomir, last: Radev} - variants: - - {first: Dragomir R., last: Radev} -- canonical: {first: Remo, last: Raffaelli} - id: remo-raffaelli -- canonical: {first: Anna N., last: Rafferty} - variants: - - {first: Anna, last: Rafferty} -- canonical: {first: Ahmed, last: Ragheb} - id: ahmed-ragheb -- canonical: {first: Achla M., last: Raina} - variants: - - {first: Achla, last: Raina} - - {first: Achla M, last: Raina} -- canonical: {first: S., last: Rajendran} - variants: - - {first: Rajendran, last: S} -- canonical: {first: Rajakrishnan, last: Rajkumar} - variants: - - {first: Rajkumar, last: Rajakrishnan} -- canonical: {first: Martin, last: Rajman} - id: martin-rajman -- canonical: {first: Ekaterina V., last: Rakhilina} - variants: - - {first: Ekaterina, last: Rakhilina} -- canonical: {first: Bhuvana, last: Ramabhadran} - id: bhuvana-ramabhadran -- canonical: {first: Ananth, last: Ramakrishnan A.} - variants: - - {first: Ananth, last: Ramakrishnan A} -- canonical: {first: Sv, last: Ramanan} - variants: - - {first: SV, last: Ramanan} -- canonical: {first: Owen, last: Rambow} - variants: - - {first: Owen C., last: Rambow} -- canonical: {first: Radoslaw, last: Ramocki} - variants: - - {first: Radosław, last: Ramocki} -- canonical: {first: Margarita Alonso, last: Ramos} - variants: - - {first: Margarita, last: Alonso-Ramos} -- canonical: {first: Lance, last: Ramshaw} - variants: - - {first: Lance A., last: Ramshaw} -- canonical: {first: Wenyu, last: Zhang} - id: wenyu-zhang-cornell - orcid: 0000-0002-3849-4320 - comment: Cornell - institution: Cornell University -- canonical: {first: Wenyu, last: Zhang} - id: wenyu-zhang - comment: May refer to several people -- canonical: {first: Gema, last: Ramírez-Sánchez} - variants: - - {first: Gema, last: Ramírez} -- canonical: {first: Bali, last: Ranaivo-Malançon} - variants: - - {first: Bali, last: Ranaivo-Malancon} - - {first: Bali Ranaivo, last: Malanҫon} -- canonical: {first: Peter A., last: Rankel} - variants: - - {first: Peter, last: Rankel} -- canonical: {first: K Sreenivasa, last: Rao} - variants: - - {first: K. Sreenivasa, last: Rao} -- canonical: {first: Spyros, last: Raptis} - id: spyros-raptis -- canonical: {first: Mohsen, last: Rashwan} - id: mohsen-rashwan -- canonical: {first: Lev, last: Ratinov} - variants: - - {first: Lev-Arie, last: Ratinov} -- canonical: {first: Adwait, last: Ratnaparkhi} - id: adwait-ratnaparkhi -- canonical: {first: Esther, last: Ratsch} - id: esther-ratsch -- canonical: {first: Lisa, last: Rau} - variants: - - {first: Lisa F., last: Rau} -- canonical: {first: Yael, last: Ravin} - id: yael-ravin -- canonical: {first: Balaraman, last: Ravindran} - id: balaraman-ravindran -- canonical: {first: Manny, last: Rayner} - id: manny-rayner -- canonical: {first: Agha Ali, last: Raza} - variants: - - {first: Agha, last: Raza} -- canonical: {first: Mike, last: Reape} - id: mike-reape -- canonical: {first: Dietrich, last: Rebholz Schuhmann} - variants: - - {first: Dietrich, last: Rebholz-Schuhmann} -- canonical: {first: Chris, last: Reed} - id: chris-reed -- canonical: {first: Florence, last: Reeder} - variants: - - {first: Florence M., last: Reeder} -- canonical: {first: Larry H., last: Reeker} - id: larry-h-reeker -- canonical: {first: Uwe, last: Reichel} - variants: - - {first: Uwe D., last: Reichel} -- canonical: {first: Ronan G., last: Reilly} - variants: - - {first: Ronan, last: Reilly} -- canonical: {first: Frederick, last: Reiss} - variants: - - {first: Frederick R., last: Reiss} -- canonical: {first: Jose, last: Relaño-Gil} - variants: - - {first: Jose, last: Relano Gil} - - {first: José, last: Relaño Gil} - - {first: José, last: Relaño} -- canonical: {first: Francesc, last: Ribas} - variants: - - {first: Francesc Ribas, last: Framis} -- canonical: {first: Marco Tulio, last: Ribeiro} - variants: - - {first: Marco, last: Ribeiro} -- canonical: {first: Ricardo, last: Ribeiro} - variants: - - {first: Ricardo Daniel, last: Ribeiro} -- canonical: {first: Elaine, last: Rich} - variants: - - {first: Elaine A., last: Rich} -- canonical: {first: Alexander, last: Richard Fabbri} - variants: - - {first: Alexander R., last: Fabbri} - - {first: Alexander, last: Fabbri} -- canonical: {first: German, last: Rigau} - id: german-rigau -- canonical: {first: Michael, last: Riley} - variants: - - {first: Michael D., last: Riley} -- canonical: {first: Ellen, last: Riloff} - id: ellen-riloff -- canonical: {first: Hae Chang, last: Rim} - variants: - - {first: Hae-Chang, last: Rim} -- canonical: {first: Antonio, last: Rincón} - variants: - - {first: Antonio, last: Rincon} -- canonical: {first: Thomas C., last: Rindflesch} - variants: - - {first: Thomas, last: Rindflesch} -- canonical: {first: Eric, last: Ringger} - variants: - - {first: Eric K., last: Ringger} -- canonical: {first: Annette, last: Rios Gonzales} - variants: - - {first: Annette, last: Rios} -- canonical: {first: Eric Sven, last: Ristad} - variants: - - {first: Eric, last: Ristad} -- canonical: {first: Graeme, last: Ritchie} - id: graeme-ritchie - variants: - - {first: Graeme D., last: Ritchie} -- canonical: {first: Hammam, last: Riza} - variants: - - {first: Ir. Hammam, last: Riza} -- canonical: {first: Albert A., last: Rizzo} - variants: - - {first: Albert, last: Rizzo} - - {first: Skip, last: Rizzo} - - {first: Albert Skip, last: Rizzo} -- canonical: {first: Nick, last: Rizzolo} - variants: - - {first: Nicholas, last: Rizzolo} -- canonical: {first: Jane J., last: Robinson} - variants: - - {first: Jane, last: Robinson} -- canonical: {first: Patricia, last: Robinson} - id: patricia-robinson -- canonical: {first: Leonida Della, last: Rocca} - variants: - - {first: Leonida, last: Della-Rocca} - - {first: Leonida, last: Della Rocca} -- canonical: {first: Martha-Alicia, last: Rocha} - variants: - - {first: Martha Alicia, last: Rocha} -- canonical: {first: Tim, last: Rocktäschel} - variants: - - {first: Tim, last: Rocktaschel} -- canonical: {first: Álvaro, last: Rodrigo} - variants: - - {first: Alvaro, last: Rodrigo} -- canonical: {first: Luis, last: Rodrigo-Aguado} - variants: - - {first: Luis, last: Rodrigo} -- canonical: {first: Irene, last: Rodrigues} - variants: - - {first: Irene Pimenta, last: Rodrigues} - - {first: Irene, last: Pimenta Rodrigues} -- canonical: {first: João, last: Rodrigues} - variants: - - {first: João, last: António Rodrigues} -- canonical: {first: Kepa Joseba, last: Rodriguez} - variants: - - {first: Kepa J., last: Rodríguez} - - {first: Kepa Joseba, last: Rodríguez} -- canonical: {first: H., last: Rodriguez Hontoria} - variants: - - {first: H., last: Rodriguez} -- canonical: {first: Victor, last: Rodriguez-Doncel} - variants: - - {first: Víctor, last: Rodríguez} - - {first: Victor, last: Rodríguez Doncel} -- canonical: {first: Luis Javier, last: Rodriguez-Fuentes} - variants: - - {first: Luis Javier, last: Rodríguez-Fuentes} -- canonical: {first: Mari Carmen, last: Rodriguez-Gancedo} - id: mari-carmen-rodriguez-gancedo - variants: - - {first: M. Carmen Rodríguez, last: Gancedo} - - {first: M. Carmen, last: Rodríguez} - - {first: Mari Carmen, last: Rodríguez} -- canonical: {first: Carlos, last: Rodriguez-Penagos} - variants: - - {first: Carlos, last: Rodríguez} - - {first: Carlos Rodriguez, last: Penagos} - - {first: Carlos, last: Rodríguez Penagos} - - {first: Carlos, last: Rodríguez-Penagos} -- canonical: {first: Horacio, last: Rodríguez} - variants: - - {first: Horacio, last: Rodriguez} -- canonical: {first: Miguel, last: Rodríguez Hernández} - variants: - - {first: Miguel Ángel, last: Rodríguez} - - {first: Miguel, last: Rodríguez} -- canonical: {first: Christophe, last: Roeder} - variants: - - {first: Chris, last: Roeder} -- canonical: {first: Ina, last: Roesiger} - variants: - - {first: Ina, last: Rösiger} -- canonical: {first: U., last: Rohini} - variants: - - {first: Rohini, last: U} -- canonical: {first: J. Robin, last: Rohlicek} - id: j-robin-rohlicek - variants: - - {first: Robin, last: Rohlicek} -- canonical: {first: David M., last: Rojas} - variants: - - {first: David, last: Rojas} -- canonical: {first: Lina M., last: Rojas Barahona} - variants: - - {first: Lina M., last: Rojas-Barahona} - - {first: Lina, last: Rojas-Barahona} - - {first: Lina, last: Rojas} - - {first: Lina Maria, last: Rojas-Barahona} -- canonical: {first: Norton Trevisan, last: Roman} - variants: - - {first: Norton T., last: Roman} - - {first: Norton, last: Trevisan Roman} -- canonical: {first: Daniela M., last: Romano} - variants: - - {first: Daniela, last: Romano} -- canonical: {first: Lorenza, last: Romano} - id: lorenza-romano -- canonical: {first: Laurent, last: Romary} - id: laurent-romary -- canonical: {first: Tiit, last: Roosmaa} - id: tiit-roosmaa -- canonical: {first: Paul, last: Roossin} - id: paul-roossin -- canonical: {first: Carolyn, last: Rose} - id: carolyn-rose - variants: - - {first: Carolyn P., last: Rose} - - {first: Carolyn P., last: Rosé} - - {first: Carolyn, last: P. Rosé} - - {first: Carolyn Penstein, last: Rose} - - {first: Carolyn, last: Penstein Rosé} - - {first: Carolyn Penstein, last: Rosé} - - {first: Carolyn, last: Penstein-Rosé} - - {first: Carolyn, last: Rosé} -- canonical: {first: Tony, last: Rose} - id: tony-rose - variants: - - {first: Tony G., last: Rose} -- canonical: {first: Ronald, last: Rosenfeld} - id: ronald-rosenfeld -- canonical: {first: Stanley J., last: Rosenschein} - variants: - - {first: Stanley, last: Rosenschein} - - {first: Stan, last: Rosenschein} -- canonical: {first: Michael, last: Rosner} - id: michael-rosner - variants: - - {first: Mike, last: Rosner} -- canonical: {first: Peter, last: Rossen Skadhauge} - variants: - - {first: Peter Rossen, last: Skadhauge} -- canonical: {first: Sophie, last: Rosset} - id: sophie-rosset -- canonical: {first: Piercarlo, last: Rossi} - id: piercarlo-rossi -- canonical: {first: Stefano Dei, last: Rossi} - variants: - - {first: Stefano, last: Dei Rossi} -- canonical: {first: Alexandre, last: Rossi Alvares} - variants: - - {first: Alexandre Rossi, last: Alvares} -- canonical: {first: Antti-Veikko, last: Rosti} - variants: - - {first: Antti-Veikko I., last: Rosti} -- canonical: {first: Ryan, last: Roth} - variants: - - {first: Ryan, last: M. Roth} -- canonical: {first: Steven, last: Roth} - variants: - - {first: Steven F., last: Roth} -- canonical: {first: Jacques, last: Rouault} - id: jacques-rouault -- canonical: {first: Brigitte, last: Roudaud} - id: brigitte-roudaud -- canonical: {first: Salim, last: Roukos} - id: salim-roukos -- canonical: {first: Grégory, last: Roulet--Guiot} - variants: - - {first: Grégory, last: Roulet-Guiot} -- canonical: {first: François, last: Rousselot} - id: francois-rousselot - variants: - - {first: Francois, last: Rousselot} -- canonical: {first: Bryan R., last: Routledge} - variants: - - {first: Bryan, last: Routledge} -- canonical: {first: Justus C., last: Roux} - id: justus-c-roux -- canonical: {first: Rachel Edita, last: Roxas} - variants: - - {first: Rachel Edita O., last: Roxas} - - {first: Rachel, last: Roxas} -- canonical: {first: Deb, last: Roy} - variants: - - {first: Suman, last: Deb Roy} -- canonical: {first: Antje, last: Roßdeutscher} - variants: - - {first: Antje, last: Rossdeutscher} -- canonical: {first: Victoria L., last: Rubin} - variants: - - {first: Victoria, last: Rubin} -- canonical: {first: Raphael, last: Rubino} - variants: - - {first: Raphaël, last: Rubino} -- canonical: {first: Antonio J., last: Rubio} - id: antonio-j-rubio -- canonical: {first: Alex, last: Rudnick} - id: alex-rudnick - similar: [alexander-rudnicky] -- canonical: {first: Alexander, last: Rudnicky} - id: alexander-rudnicky - similar: [alex-rudnick] - variants: - - {first: Alexander I., last: Rudnicky} - - {first: Alex, last: Rudnicky} -- canonical: {first: Björn, last: Rudzewitz} - variants: - - {first: Bjoern, last: Rudzewitz} -- canonical: {first: Stefan, last: Rued} - variants: - - {first: Stefan, last: Rüd} -- canonical: {first: Pablo, last: Ruiz Fabo} - variants: - - {first: Pablo, last: Ruiz} -- canonical: {first: María, last: Ruiz-Casado} - variants: - - {first: Maria, last: Ruiz-Casado} -- canonical: {first: Juana María, last: Ruiz-Martínez} - variants: - - {first: Juana Maria, last: Ruiz-Martínez} - - {first: Juana Maria, last: Ruiz Martinez} -- canonical: {first: C.J., last: Rupp} - variants: - - {first: C. J., last: Rupp} -- canonical: {first: Alexander M., last: Rush} - variants: - - {first: Alexander, last: Rush} -- canonical: {first: Albert, last: Russel} - id: albert-russel -- canonical: {first: Graham, last: Russell} - id: graham-russell - variants: - - {first: Graham J., last: Russell} -- canonical: {first: Martin, last: Russell} - id: martin-russell -- canonical: {first: Natalia Kariaeva, last: Rutgers} - variants: - - {first: Natalia, last: Kariaeva} -- canonical: {first: Jean David, last: Ruvini} - variants: - - {first: Jean-David, last: Ruvini} -- canonical: {first: Tatyana, last: Ruzsics} - variants: - - {first: Tatiana, last: Ruzsics} -- canonical: {first: Karen L., last: Ryan} - variants: - - {first: Karen, last: Ryan} -- canonical: {first: Pavel, last: Rychlý} - variants: - - {first: Pavel, last: Rychly} -- canonical: {first: Ju-yeon, last: Ryu} - variants: - - {first: Ju-Yeon, last: Ryu} -- canonical: {first: Won Ho, last: Ryu} - variants: - - {first: Won-Ho, last: Ryu} -- canonical: {first: Eirikur, last: Rögnvaldsson} - variants: - - {first: Eiríkur, last: Rögnvaldsson} -- canonical: {first: Dietmar, last: Rösner} - id: dietmar-rosner - variants: - - {first: Dietmar, last: Rosner} - - {first: Dietmar F., last: Roesner} - - {first: Dietmar, last: Roesner} -- canonical: {first: Carlos Subirats, last: Rüggeberg} - variants: - - {first: Carlos, last: Subirats} -- canonical: {first: Lakshmi, last: S} - variants: - - {first: Lakshmi, last: Saheer} - - {first: Lakshmi, last: S.} -- canonical: {first: Houda, last: Saadane} - variants: - - {first: Houda, last: Saâdane} -- canonical: {first: Sari, last: Saba-Sadiya} - variants: - - {first: Sari, last: Sadiya} -- canonical: {first: Victor, last: Sadler} - id: victor-sadler -- canonical: {first: Mehrnoosh, last: Sadrzadeh} - id: mehrnoosh-sadrzadeh -- canonical: {first: Naomi, last: Sager} - id: naomi-sager -- canonical: {first: Benoît, last: Sagot} - variants: - - {first: Benoit, last: Sagot} -- canonical: {first: Herve, last: Saint-Amand} - variants: - - {first: Hervé, last: Saint-Amand} -- canonical: {first: Patrick, last: Saint-Dizier} - variants: - - {first: Patrick, last: Saint Dizier} -- canonical: {first: Suguru, last: Saitô} - variants: - - {first: Suguru, last: Saito} -- canonical: {first: Rafa, last: Saiz} - id: rafa-saiz -- canonical: {first: Maximiliano, last: Saiz-Noeda} - id: maximiliano-saiz-noeda -- canonical: {first: Satoshi, last: Sakai} - id: satoshi-sakai -- canonical: {first: Sebastián Peña, last: Saldarriaga} - variants: - - {first: Peña, last: Saldarriaga} - - {first: Sebastian, last: Peña Saldarriaga} -- canonical: {first: Juliano Efson, last: Sales} - variants: - - {first: Juliano, last: Efson Sales} - - {first: Juliano, last: Sales} -- canonical: {first: Morris, last: Salkoff} - id: morris-salkoff -- canonical: {first: Ansaf, last: Salleb-Aouissi} - variants: - - {first: Ansaf, last: Salleb-Aoussi} -- canonical: {first: Gerard, last: Salton} - id: gerard-salton -- canonical: {first: Giancarlo, last: Salton} - variants: - - {first: Giancarlo D., last: Salton} -- canonical: {first: Madis, last: Saluveer} - id: madis-saluveer -- canonical: {first: Sethserey, last: Sam*’} - variants: - - {first: Sethserey, last: Sam} -- canonical: {first: Rasoul, last: Samad Zadeh Kaljahi} - variants: - - {first: Rasul, last: Samad Zadeh Kaljahi} -- canonical: {first: Tanja, last: Samardzic} - variants: - - {first: Tanja, last: Samardžić} -- canonical: {first: Nagiza, last: Samatova} - variants: - - {first: Nagiza F., last: Samatova} -- canonical: {first: Hossein, last: Sameti} - id: hossein-sameti -- canonical: {first: Ken, last: Samuel} - variants: - - {first: Kenneth, last: Samuel} -- canonical: {first: Ruben, last: San-Segundo} - variants: - - {first: Rubén, last: San-Segundo} -- canonical: {first: Daniel, last: Sanchez-Cisneros} - variants: - - {first: Daniel, last: Sánchez} -- canonical: {first: Olivia, last: Sanchez-Graillet} - variants: - - {first: Olivia, last: Sanchez} -- canonical: {first: Emilio, last: Sanchis} - variants: - - {first: Emilio, last: Sanchís} -- canonical: {first: Germán, last: Sanchis-Trilles} - variants: - - {first: Germán, last: Sanchis Trilles} - - {first: Germán, last: Sanchis} -- canonical: {first: Gregory, last: Sanders} - variants: - - {first: Gregory A., last: Sanders} - - {first: Greg, last: Sanders} -- canonical: {first: Baskaran, last: Sankaran} - variants: - - {first: Sankaran, last: Baskaran} -- canonical: {first: Beatrice, last: Santorini} - id: beatrice-santorini -- canonical: {first: Eddie Antonio, last: Santos} - orcid: 0000-0001-5337-715X - variants: - - {first: Eddie A., last: Santos} - - {first: Eddie, last: Antonio Santos} - - {first: Eddie, last: Santos} -- canonical: {first: Estela, last: Saquete} - id: estela-saquete - variants: - - {first: Estela, last: Saquete Boro} -- canonical: {first: Murat, last: Saraclar} - variants: - - {first: Murat, last: Saraçlar} -- canonical: {first: Xabier, last: Saralegi} - id: xabier-saralegi -- canonical: {first: Kepa, last: Sarasola} - id: kepa-sarasola -- canonical: {first: K, last: Saravanan} - id: k-saravanan - variants: - - {first: Saravanan, last: K} - - {first: K., last: Saravanan} -- canonical: {first: Ruhi, last: Sarikaya} - variants: - - {first: Ruhi, last: Srikaya} -- canonical: {first: Efsun, last: Sarioglu Kayi} - variants: - - {first: Efsun, last: Sarioglu} -- canonical: {first: Anish Das, last: Sarma} - variants: - - {first: Atish Das, last: Sarma} -- canonical: {first: Shikhar Kr., last: Sarma} - degree: Gauhati University - orcid: 0000-0002-9495-1901 - id: shikhar-kumar-sarma-gu - variants: - - {first: Shikhar, last: Sarma} - - {first: Shikhar, last: Sharma} -- canonical: {first: Shikhar, last: Sharma} - comment: May refer to multiple people - id: shikhar-sharma -- canonical: {first: Vaijayanthi M., last: Sarma} - variants: - - {first: Vaijayanthi, last: Sarma} -- canonical: {first: Satoshi, last: Sato} - id: satoshi-sato -- canonical: {first: Pavankumar, last: Satuluri} - variants: - - {first: Pavan Kumar, last: Satuluri} -- canonical: {first: Baiba, last: Saulīte} - variants: - - {first: Baiba, last: Saulite} -- canonical: {first: Roser, last: Saurí} - variants: - - {first: Roser, last: Sauri} -- canonical: {first: Asad, last: Sayeed} - variants: - - {first: Asad B., last: Sayeed} -- canonical: {first: Yucel, last: Saygin} - variants: - - {first: Yücel, last: Saygın} -- canonical: {first: Carolina, last: Scarton} - variants: - - {first: Carolina Evaristo, last: Scarton} -- canonical: {first: Remko, last: Scha} - id: remko-scha - variants: - - {first: Remko J. H., last: Scha} -- canonical: {first: Moritz, last: Schaeffer} - variants: - - {first: Moritz Jonas, last: Schaeffer} -- canonical: {first: Roger C., last: Schank} - variants: - - {first: Roger, last: Schank} -- canonical: {first: Peter, last: Schauble} - variants: - - {first: Peter, last: Schäuble} -- canonical: {first: Judith D., last: Schlesinger} - variants: - - {first: Judith, last: Schlesinger} -- canonical: {first: Michael, last: Schlichtkrull} - variants: - - {first: Michael Sejr, last: Schlichtkrull} -- canonical: {first: Ralf, last: Schlueter} - variants: - - {first: Ralf, last: Schlüter} -- canonical: {first: Julian J., last: Schlöder} - variants: - - {first: Julian, last: Schlöder} -- canonical: {first: Laurent, last: Schmitt} - id: laurent-schmitt -- canonical: {first: René, last: Schneider} - variants: - - {first: Rene, last: Schneider} -- canonical: {first: Edward, last: Schofield} - variants: - - {first: Ed, last: Schofield} -- canonical: {first: Natalie M., last: Schrimpf} - variants: - - {first: Natalie, last: Schrimpf} -- canonical: {first: Elizabeth, last: Schroeder} - variants: - - {first: Elizabeth Schroeder, last: Richerson} - - {first: Elizabeth, last: Richerson} -- canonical: {first: Lenhart, last: Schubert} - variants: - - {first: Lenhart K., last: Schubert} - - {first: Len, last: Schubert} -- canonical: {first: Björn, last: Schuller} - variants: - - {first: Bjoern, last: Schuller} -- canonical: {first: Sabine, last: Schulte im Walde} - variants: - - {first: Sabine, last: Schulte Im Walde} - - {first: Sabine, last: Schulte in Walde} -- canonical: {first: Robert T., last: Schultz} - variants: - - {first: Robert, last: Schultz} -- canonical: {first: Julia Maria, last: Schulz} - variants: - - {first: Julia, last: Schulz} -- canonical: {first: Stefan, last: Schulz} - variants: - - {first: Stefan, last: Schultz} -- canonical: {first: Sarah E., last: Schwarm} - variants: - - {first: Sarah, last: Schwarm} -- canonical: {first: Ariel, last: Schwartz} - variants: - - {first: Ariel S., last: Schwartz} -- canonical: {first: H. Andrew, last: Schwartz} - variants: - - {first: Hansen Andrew, last: Schwartz} - - {first: Hansen A., last: Schwartz} - - {first: H Andrew, last: Schwartz} -- canonical: {first: Richard, last: Schwartz} - id: richard-schwartz - variants: - - {first: Rich, last: Schwartz} -- canonical: {first: Ulrich, last: Schäfer} - variants: - - {first: Ulrich, last: Schafer} - - {first: Ulrich, last: Schaefer} -- canonical: {first: Martin, last: Schäler} - variants: - - {first: Martin, last: Schäfer} -- canonical: {first: Reinhard, last: Schäler} - variants: - - {first: Reinhard, last: Schaler} -- canonical: {first: Hinrich, last: Schütze} - variants: - - {first: Hinrich, last: Schutze} - - {first: Hinrich, last: Schuetze} -- canonical: {first: Donia, last: Scott} - id: donia-scott - variants: - - {first: Donia R., last: Scott} -- canonical: {first: Djamé, last: Seddah} - variants: - - {first: Djame, last: Seddah} -- canonical: {first: Roxane, last: Segers} - variants: - - {first: Roxanne, last: Segers} -- canonical: {first: Frédérique, last: Segond} - variants: - - {first: Frederique, last: Segond} -- canonical: {first: Jérémie, last: Segouat} - id: jeremie-segouat -- canonical: {first: Isabel, last: Segura-Bedmar} - variants: - - {first: Isabel, last: Segura Bedmar} -- canonical: {first: Corrado, last: Seidenari} - id: corrado-seidenari -- canonical: {first: Bernard, last: Seite} - id: bernard-seite -- canonical: {first: Ethan, last: Selfridge} - variants: - - {first: Ethan O., last: Selfridge} -- canonical: {first: Sathiya Keerthi, last: Selvaraj} - variants: - - {first: Sathiya, last: Keerthi} -- canonical: {first: Jiří, last: Semecký} - variants: - - {first: Jirí, last: Semecky} -- canonical: {first: Giovanni, last: Semeraro} - id: giovanni-semeraro -- canonical: {first: Stephanie, last: Seneff} - id: stephanie-seneff -- canonical: {first: Hongsuck, last: Seo} - variants: - - {first: Paul Hongsuck, last: Seo} -- canonical: {first: Jungyun, last: Seo} - variants: - - {first: Jung Yun, last: Seo} -- canonical: {first: Luciano, last: Serafini} - id: luciano-serafini -- canonical: {first: Iulian Vlad, last: Serban} - variants: - - {first: Iulian, last: Serban} -- canonical: {first: Jean-François, last: Serignat} - id: jean-francois-serignat -- canonical: {first: Nicolás, last: Serrano} - variants: - - {first: Nicolas, last: Serrano} -- canonical: {first: Christophe, last: Servan} - id: christophe-servan -- canonical: {first: Andrea, last: Setzer} - id: andrea-setzer -- canonical: {first: Jurica, last: Seva} - variants: - - {first: Jurica, last: Ševa} -- canonical: {first: Ayisigi B., last: Sevdik-Calli} - variants: - - {first: Ayişiği, last: Sevdik-Çalli} -- canonical: {first: Binyam Ephrem, last: Seyoum} - variants: - - {first: Binyam, last: Ephrem} -- canonical: {first: Petr, last: Sgall} - id: petr-sgall -- canonical: {first: Khaled, last: Shaban} - variants: - - {first: Khaled, last: Bashir Shaban} -- canonical: {first: Rajiv, last: Shah} - variants: - - {first: Rajiv Ratn, last: Shah} -- canonical: {first: Ritesh, last: Shah} - variants: - - {first: Ritesh M., last: Shah} -- canonical: {first: Mostafa, last: Shahin} - id: mostafa-shahin -- canonical: {first: Adi, last: Shalev} - variants: - - {first: Adi, last: Bitan} -- canonical: {first: Zoya M., last: Shalyapina} - id: zoya-m-shalyapina - variants: - - {first: Zoyn M., last: Shalyapina} -- canonical: {first: Stuart C., last: Shapiro} - id: stuart-c-shapiro -- canonical: {first: Abdul-Baquee, last: Sharaf} - variants: - - {first: Abdul-Baquee M., last: Sharaf} -- canonical: {first: Dipti Misra, last: Sharma} - variants: - - {first: Dipti, last: Misra Sharma} - - {first: Dipti, last: Sharma} - - {first: Dipti M., last: Sharma} - - {first: Dipti, last: Misra} - - {first: Dipti M, last: Sharma} -- canonical: {first: Harsh Vardhan, last: Sharma} - variants: - - {first: Harsh, last: Sharma} -- canonical: {first: Vishnu Dutt, last: Sharma} - variants: - - {first: Vishnu, last: Sharma} -- canonical: {first: Richard A., last: Sharman} - id: richard-a-sharman -- canonical: {first: Stefanie, last: Shattuck-Hufnagel} - variants: - - {first: S. Shattuck, last: Hufnagel} -- canonical: {first: Hassan S., last: Shavarani} - variants: - - {first: Hassan, last: Shavarani} -- canonical: {first: Bayan Abu, last: Shawar} - variants: - - {first: Bayan, last: Abu Shawar} -- canonical: {first: Kathleen M., last: Sheehan} - variants: - - {first: Kathleen, last: Sheehan} -- canonical: {first: Golnar, last: Sheikhshab} - variants: - - {first: Golnar, last: Sheikhshabbafghi} -- canonical: {first: Jia-Lin, last: Shen} - variants: - - {first: Jia-lin, last: Shen} -- canonical: {first: David D., last: Sherertz} - id: david-d-sherertz -- canonical: {first: Mohamed Ahmed, last: Sherif} - variants: - - {first: Mohamed, last: Sherif} -- canonical: {first: Kyumars, last: Sheykh Esmaili} - variants: - - {first: Kyumars Sheykh, last: Esmaili} -- canonical: {first: Freda, last: Shi} - id: freda-shi - orcid: 0009-0009-5697-449X - variants: - - {first: Haoyue, last: Shi} -- canonical: {first: Stuart M., last: Shieber} - variants: - - {first: Stuart, last: Shieber} -- canonical: {first: Hsue-Hueh, last: Shih} - variants: - - {first: Rebecca Hsue-Hueh, last: Shih} -- canonical: {first: Katsumasa, last: Shimizu} - id: katsumasa-shimizu -- canonical: {first: Tohru, last: Shimizu} - variants: - - {first: Toru, last: Shimizu} -- canonical: {first: Mitsuo, last: Shimohata} - id: mitsuo-shimohata -- canonical: {first: Saim, last: Shin} - variants: - - {first: Sa-Im, last: Shin} -- canonical: {first: Katsuhiko, last: Shirai} - id: katsuhiko-shirai -- canonical: {first: Satoshi, last: Shirai} - variants: - - {first: Satosi, last: Shirai} -- canonical: {first: Praneeth M., last: Shishtla} - variants: - - {first: Praneeth, last: Shishtla} - - {first: Praneeth M, last: Shishtla} -- canonical: {first: Darla Magdalene, last: Shockley} - variants: - - {first: Darla, last: Shockley} -- canonical: {first: Prajwol, last: Shrestha} - variants: - - {first: Prajol, last: Shrestha} -- canonical: {first: Elizabeth, last: Shriberg} - id: elizabeth-shriberg -- canonical: {first: Manish, last: Shrivastava} - variants: - - {first: Manish, last: Srivastava} -- canonical: {first: Heung Yeung, last: Shum} - variants: - - {first: Heung-Yeung, last: Shum} -- canonical: {first: Elvira I., last: Sicilia-Garcia} - id: elvira-i-sicilia-garcia -- canonical: {first: Candace L., last: Sidner} - variants: - - {first: Candace, last: Sidner} -- canonical: {first: Gerardo, last: Sierra} - variants: - - {first: Gerardo, last: Sierra-Martínez} -- canonical: {first: Utpal Kumar, last: Sikdar} - variants: - - {first: Utpal, last: Sikdar} -- canonical: {first: Avirup, last: Sil} - variants: - - {first: Avi, last: Sil} -- canonical: {first: Max, last: Silberztein} - variants: - - {first: Max D., last: Silberztein} -- canonical: {first: Miikka, last: Silfverberg} - variants: - - {first: Miikka P., last: Silfverberg} -- canonical: {first: João, last: Silva} - variants: - - {first: João Ricardo, last: Silva} -- canonical: {first: Mario J., last: Silva} - variants: - - {first: Mário J., last: Silva} - - {first: Mário, last: Silva} -- canonical: {first: Kim E. A., last: Silverman} - variants: - - {first: Kim E.A., last: Silverman} -- canonical: {first: Khe Chai, last: Sim} - variants: - - {first: Khe-Chai, last: Sim} -- canonical: {first: Karin, last: Sim Smith} - variants: - - {first: Karin Sim, last: Smith} -- canonical: {first: Khalil, last: Sima’an} - id: khalil-simaan -- canonical: {first: Katalin Ilona, last: Simkó} - variants: - - {first: Katalin, last: Simkó} -- canonical: {first: Anca-Roxana, last: Simon} - variants: - - {first: Anca, last: Simon} - - {first: Anca-Roxana, last: Şimon} -- canonical: {first: Nathalie, last: Simonin} - id: nathalie-simonin -- canonical: {first: Dan, last: Simonson} - variants: - - {first: Daniel, last: Simonson} -- canonical: {first: Kiril, last: Simov} - variants: - - {first: Kiril Iv., last: Simov} -- canonical: {first: King Kui, last: Sin} - id: king-kui-sin - variants: - - {first: KingKui, last: Sin} -- canonical: {first: Anil Kumar, last: Singh} - variants: - - {first: Anil, last: Kumar Singh} -- canonical: {first: Munindar P., last: Singh} - variants: - - {first: Munindar, last: Singh} -- canonical: {first: Thoudam Doren, last: Singh} - variants: - - {first: Thoudam, last: Doren Singh} -- canonical: {first: R Mahesh K, last: Sinha} - variants: - - {first: R. Mahesh K., last: Sinha} -- canonical: {first: Inguna, last: Skadiņa} - variants: - - {first: Inguna, last: Skadina} - - {first: Inguna, last: Skadin̨a} -- canonical: {first: Wojciech, last: Skalmowski} - id: wojciech-skalmowski -- canonical: {first: Romuald, last: Skiba} - id: romuald-skiba -- canonical: {first: Steven, last: Skiena} - variants: - - {first: Steve, last: Skiena} -- canonical: {first: Michael, last: Skinner} - variants: - - {first: Michael A., last: Skinner} -- canonical: {first: Hana, last: Skoumalova} - variants: - - {first: Hana, last: Skoumalová} -- canonical: {first: Frank, last: Smadja} - variants: - - {first: Frank A., last: Smadja} -- canonical: {first: Kamel, last: Smaili} - variants: - - {first: Kamel, last: Smaïli} -- canonical: {first: Nasser, last: Smaili} - id: nasser-smaili -- canonical: {first: Sharon, last: Small} - variants: - - {first: Sharon, last: Gower Small} -- canonical: {first: R. A., last: Smit} - variants: - - {first: R.A., last: Smit} -- canonical: {first: Andrew, last: Smith} - variants: - - {first: Andrew E., last: Smith} -- canonical: {first: Brian Cantwell, last: Smith} - variants: - - {first: Brian, last: Smith} -- canonical: {first: David A., last: Smith} - variants: - - {first: David, last: Smith} - - {first: David Addison, last: Smith} -- canonical: {first: Francis J., last: Smith} - id: francis-j-smith -- canonical: {first: Jason, last: Smith} - variants: - - {first: Jason R., last: Smith} -- canonical: {first: Mark H., last: Smith} - variants: - - {first: Mark, last: Smith} -- canonical: {first: Noah A., last: Smith} - variants: - - {first: Noah, last: Smith} -- canonical: {first: Raoul N., last: Smith} - variants: - - {first: Raoul N, last: Smith} -- canonical: {first: Ronnie W., last: Smith} - variants: - - {first: Ronnie, last: Smith} -- canonical: {first: Otakar, last: Smrz} - variants: - - {first: Otakar, last: Smrž} -- canonical: {first: Pavel, last: Smrz} - variants: - - {first: Pavel, last: Smrž} -- canonical: {first: Gyri, last: Smørdal Losnegaard} - variants: - - {first: Gyri S., last: Losnegaard} - - {first: Gyri, last: Losnegaard} -- canonical: {first: Matthew, last: Snover} - variants: - - {first: Matthew G., last: Snover} -- canonical: {first: Marco Antonio, last: Sobrevilla Cabezudo} - variants: - - {first: Marco A., last: Sobrevilla Cabezudo} - - {first: Marco, last: Sobrevilla} -- canonical: {first: Stephen, last: Soderland} - id: stephen-soderland -- canonical: {first: Sylvana, last: Sofkova Hashemi} - variants: - - {first: Sylvana, last: Sofkova} -- canonical: {first: Artem, last: Sokolov} - variants: - - {first: Artem, last: Sokokov} -- canonical: {first: Juan José Rodríguez, last: Soler} - variants: - - {first: Juan José, last: Rodríguez} -- canonical: {first: Joan, last: Soler i Bou} - variants: - - {first: Joan, last: Soler} -- canonical: {first: Juan, last: Soler-Company} - variants: - - {first: Juan, last: Soler Company} -- canonical: {first: Aitor, last: Sologaistoa} - id: aitor-sologaistoa -- canonical: {first: Harold, last: Somers} - id: harold-somers - variants: - - {first: Harold L., last: Somers} -- canonical: {first: Norman K., last: Sondheimer} - variants: - - {first: Norman, last: Sondheimer} -- canonical: {first: Young Chol, last: Song} - variants: - - {first: Young C., last: Song} -- canonical: {first: Cagil, last: Sonmez} - variants: - - {first: Çağıl, last: Sönmez} - - {first: Cagil, last: Sönmez} -- canonical: {first: Von-Wun, last: Soo} - variants: - - {first: Von-wun, last: Soo} -- canonical: {first: Frank K., last: Soong} - variants: - - {first: Frank, last: Soong} -- canonical: {first: Jeffrey, last: Sorensen} - variants: - - {first: Jeffrey S., last: Sorensen} -- canonical: {first: Aitor, last: Soroa} - id: aitor-soroa - variants: - - {first: Aitor, last: Soroa Etxabe} -- canonical: {first: Ionut, last: Sorodoc} - variants: - - {first: Ionut-Teodor, last: Sorodoc} -- canonical: {first: William, last: Soto Martinez} - variants: - - {first: William, last: Soto} -- canonical: {first: Susana, last: Sotelo} - orcid: 0000-0002-0067-7957 - variants: - - {first: Susana Sotelo, last: Docio} -- canonical: {first: Maria Clara Paixão de, last: Sousa} - variants: - - {first: Maria Clara, last: Paixão de Sousa} -- canonical: {first: David Cabrero, last: Souto} - variants: - - {first: David, last: Cabrero} -- canonical: {first: Jackson, last: Souza} - id: jackson-souza -- canonical: {first: Vinícius Mourão Alves de, last: Souza} - variants: - - {first: Vinícius Mourão Alves, last: de Souza} -- canonical: {first: Irena, last: Spasić} - variants: - - {first: Irena, last: Spasic} -- canonical: {first: Manuela, last: Speranza} - id: manuela-speranza -- canonical: {first: Valentin I., last: Spitkovsky} - variants: - - {first: Valentin, last: Spitkovsky} -- canonical: {first: Drahomíra “johanka”, last: Spoustová} - variants: - - {first: Johanka, last: Spoustová} - - {first: Drahomíra „johanka“, last: Spoustová} -- canonical: {first: Richard, last: Sproat} - variants: - - {first: Richard W., last: Sproat} -- canonical: {first: Rachele, last: Sprugnoli} - id: rachele-sprugnoli -- canonical: {first: Shannon L., last: Spruit} - variants: - - {first: Shannon, last: Spruit} -- canonical: {first: Peter, last: Spyns} - id: peter-spyns -- canonical: {first: Constantine D., last: Spyropoulos} - variants: - - {first: Constantine, last: Spyropoulos} -- canonical: {first: Karen, last: Spärck Jones} - id: karen-sparck-jones - variants: - - {first: Karen, last: Sparck Jones} - - {first: Karen, last: Jones} -- canonical: {first: Rohini K., last: Srihari} - variants: - - {first: Rohini, last: Srihari} - - {first: K. Rohini, last: Srihari} -- canonical: {first: Munirathnam, last: Srikanth} - id: munirathnam-srikanth - variants: - - {first: Muirathnam, last: Srikanth} -- canonical: {first: Somayajulu, last: Sripada} - variants: - - {first: Somayajulu G., last: Sripada} - - {first: Somayajula G., last: Sripada} - - {first: Somayajulu Gowri, last: Sripada} -- canonical: {first: Ankit, last: Srivastava} - variants: - - {first: Ankit Kumar, last: Srivastava} - - {first: Ankit K., last: Srivastava} - - {first: Ankit, last: Kumar} -- canonical: {first: Edward, last: Stabler} - variants: - - {first: Edward P., last: 'Stabler, Jr.'} - - {first: Edward P., last: Stabler} -- canonical: {first: Gregory, last: Stainhauer} - id: gregory-stainhauer -- canonical: {first: David, last: Stallard} - id: david-stallard - variants: - - {first: David G., last: Stallard} -- canonical: {first: Bonnie Glover, last: Stalls} - variants: - - {first: Bonnie, last: Glover} -- canonical: {first: Efstathios, last: Stamatatos} - id: efstathios-stamatatos -- canonical: {first: Ranka, last: Stanković} - variants: - - {first: Ranka, last: Stankoviæ} -- canonical: {first: Ingrid, last: Starke} - id: ingrid-starke -- canonical: {first: Anatoli, last: Starostin} - variants: - - {first: Anatoly, last: Starostin} -- canonical: {first: Mark, last: Steedman} - id: mark-steedman -- canonical: {first: Dan, last: Stefanescu} - variants: - - {first: Dan, last: Ştefănescu} - - {first: Dan, last: Ştefanescu} - - {first: Dan, last: Ștefănescu} -- canonical: {first: Stefan, last: Steidl} - id: stefan-steidl -- canonical: {first: Erich H., last: Steiner} - variants: - - {first: Erich, last: Steiner} -- canonical: {first: Egon, last: Stemle} - variants: - - {first: Egon W., last: Stemle} -- canonical: {first: Amanda, last: Stent} - id: amanda-stent - variants: - - {first: Amanda J., last: Stent} -- canonical: {first: Evgeny, last: Stepanov} - variants: - - {first: Evgeny A., last: Stepanov} -- canonical: {first: Richard M., last: Stern} - variants: - - {first: Richard, last: Stern} -- canonical: {first: Rosemary, last: Stevenson} - id: rosemary-stevenson -- canonical: {first: Brandon M., last: Stewart} - variants: - - {first: Brandon, last: Stewart} -- canonical: {first: Robert, last: Stewart} - variants: - - {first: Rob, last: Stewart} -- canonical: {first: Andreas, last: Stolcke} - id: andreas-stolcke -- canonical: {first: Scott C., last: Stoness} - variants: - - {first: Scott, last: Stoness} -- canonical: {first: Dennis Ryan, last: Storoshenko} - variants: - - {first: Dennis R., last: Storoshenko} -- canonical: {first: Marco Antonio, last: Stranisci} - variants: - - {first: Marco, last: Stranisci} -- canonical: {first: Stephanie, last: Strassel} - variants: - - {first: Stephanie M., last: Strassel} -- canonical: {first: Helmer, last: Strik} - id: helmer-strik -- canonical: {first: Lena, last: Stromback} - variants: - - {first: Lena, last: Strömbäck} -- canonical: {first: Jennifer, last: Stromer-Galley} - variants: - - {first: Jennifer, last: Strommer-Galley} -- canonical: {first: Tomek, last: Strzalkowski} - id: tomek-strzalkowski - variants: - - {first: Tomek, last: Strzalkowskl} -- canonical: {first: Sofia, last: Strönbergsson} - variants: - - {first: Sofia, last: Strömbergsson} -- canonical: {first: Janienke, last: Sturm} - id: janienke-sturm -- canonical: {first: Dean, last: Sturtevant} - variants: - - {first: Dean G., last: Sturtevant} -- canonical: {first: Margo, last: Stys-Budzikowska} - variants: - - {first: Margo, last: Budzikowska} - - {first: Margo, last: Stys} -- canonical: {first: Marie-Hélène, last: Stéfanini} - variants: - - {first: Marie-Helene, last: Stefanini} -- canonical: {first: Yang, last: Zhang} - comment: USTC - id: yang-zhang-ustc - orcid: 0000-0002-7863-5183 - institution: University of Science and Technology of China -- canonical: {first: Yang, last: Zhang} - id: yang-zhang - comment: May refer to several people -- canonical: {first: Sebastian, last: Stüker} - variants: - - {first: Sebastian, last: Stueker} -- canonical: {first: Cheng-chao, last: Su} - variants: - - {first: Cheng-Chao, last: Su} -- canonical: {first: L. Venkata, last: Subramaniam} - id: l-venkata-subramaniam - variants: - - {first: L Venkata, last: Subramaniam} -- canonical: {first: Shivashankar, last: Subramanian} - variants: - - {first: S., last: Shivashankar} -- canonical: {first: Amarnag, last: Subramanya} - variants: - - {first: Amar, last: Subramanya} -- canonical: {first: Fabian, last: Suchanek} - variants: - - {first: Fabian M., last: Suchanek} -- canonical: {first: Vit, last: Suchomel} - variants: - - {first: Vít, last: Suchomel} -- canonical: {first: David, last: Suendermann-Oeft} - variants: - - {first: David, last: Suendermann} -- canonical: {first: Masakatsu, last: Sugimoto} - id: masakatsu-sugimoto -- canonical: {first: Ryochi, last: Sugimura} - id: ryochi-sugimura -- canonical: {first: Yoshi, last: Suhara} - variants: - - {first: Yoshihiko, last: Suhara} -- canonical: {first: '', last: Sukhada} - variants: - - {first: Sukhada, last: Palkar} -- canonical: {first: Jana, last: Sukkarieh} - variants: - - {first: Jana Z., last: Sukkarieh} -- canonical: {first: Md Arafat, last: Sultan} - variants: - - {first: Md. Arafat, last: Sultan} - - {first: Md., last: Sultan} -- canonical: {first: Eiichiro, last: Sumita} - variants: - - {first: Eiichro, last: Sumita} -- canonical: {first: Cheng-Jie, last: Sun} - variants: - - {first: Chengjie, last: Sun} -- canonical: {first: Jingguang, last: Sun} - variants: - - {first: JingGuang, last: Sun} -- canonical: {first: Sheng-he, last: Sun} - variants: - - {first: Sheng-He, last: Sun} -- canonical: {first: Weiwei, last: Sun} - comment: Shandong University - id: weiwei-sun-sd -- canonical: {first: Yufang, last: Sun} - variants: - - {first: Yu-fang, last: Sun} -- canonical: {first: Vijay, last: Sundar Ram} - variants: - - {first: Vijay Sundar, last: Ram} - - {first: R. Vijay Sundar, last: Ram} - - {first: Vijay Sundar Ram, last: R} -- canonical: {first: Sowmya S., last: Sundaram} - variants: - - {first: Sowmya S, last: Sundaram} -- canonical: {first: Beth M., last: Sundheim} - variants: - - {first: Beth, last: Sundheim} -- canonical: {first: Yao-Ting, last: Sung} - variants: - - {first: Yao-Ting, last: Hung} -- canonical: {first: Simon, last: Suster} - variants: - - {first: Simon, last: Šuster} -- canonical: {first: Richard F. E., last: Sutcliffe} - variants: - - {first: Richard F.E., last: Sutcliffe} -- canonical: {first: Armando, last: Suárez} - id: armando-suarez -- canonical: {first: Mari Carmen, last: Suárez-Figueroa} - variants: - - {first: M. Carmen, last: Suárez-Figueroa} -- canonical: {first: Piergiorgio, last: Svaizer} - id: piergiorgio-svaizer -- canonical: {first: Ben, last: Swanson} - variants: - - {first: Benjamin, last: Swanson} -- canonical: {first: Daniel G., last: Swanson} - variants: - - {first: Daniel, last: Swanson} -- canonical: {first: Robert S., last: Swier} - variants: - - {first: Robert, last: Swier} -- canonical: {first: Mary, last: Swift} - variants: - - {first: Mary D., last: Swift} -- canonical: {first: A.J.M., last: Szanser} - variants: - - {first: A.J., last: Szanser} -- canonical: {first: Stan, last: Szpakowicz} - variants: - - {first: Stanislaw, last: Szpakowicz} - - {first: Stanisław, last: Szpakowicz} -- canonical: {first: Marcin, last: Szummer} - variants: - - {first: Martin, last: Szummer} -- canonical: {first: Joan-Andreu, last: Sánchez} - variants: - - {first: Joan-Andreu, last: Sanchez} - - {first: Joan Andreu, last: Sánchez} -- canonical: {first: Jon, last: Sánchez} - id: jon-sanchez - variants: - - {first: Jon, last: Sanchez} -- canonical: {first: Víctor M., last: Sánchez-Cartagena} - variants: - - {first: Victor M., last: Sánchez-Cartagena} -- canonical: {first: Cristina, last: Sánchez-Marco} - variants: - - {first: Cristina, last: Marco} - - {first: Cristina Sánchez, last: Marco} -- canonical: {first: J. Fernando, last: Sánchez-Rada} - variants: - - {first: Fernando, last: Sánchez-Rada} -- canonical: {first: Ágnes, last: Sándor} - variants: - - {first: Agnes, last: Sandor} -- canonical: {first: Anna, last: Sågvall Hein} - variants: - - {first: Anna Sagvall, last: Hein} - - {first: Anna Sågvall, last: Hein} -- canonical: {first: Rune, last: Sætre} - variants: - - {first: Rune, last: Saetre} -- canonical: {first: Gilles, last: Sérasset} - variants: - - {first: Gilles, last: Serasset} -- canonical: {first: Anders, last: Søgaard} - variants: - - {first: Anders, last: Sogaard} -- canonical: {first: Chris, last: Thomas} - id: chris-thomas - orcid: 0000-0002-3226-396X - variants: - - {first: Christopher, last: Thomas} -- canonical: {first: Christopher, last: Thomas} - comment: May refer to several people - id: christopher-thomas -- canonical: {first: Maite, last: Taboada} - id: maite-taboada -- canonical: {first: Martha Yifiru, last: Tachbelie} - variants: - - {first: Martha, last: Yifiru Tachbelie} -- canonical: {first: Thiago D., last: Tadeu} - variants: - - {first: Thiago, last: Tadeu} -- canonical: {first: Chia-Hung, last: Tai} - variants: - - {first: Chia-hung, last: Tai} -- canonical: {first: John, last: Tait} - variants: - - {first: John Irving, last: Tait} -- canonical: {first: Kazuya, last: Takeda} - variants: - - {first: Kasuya, last: Takeda} -- canonical: {first: Yuka, last: Takei} - variants: - - {first: Yuya, last: Takei} -- canonical: {first: Zeerak, last: Talat} - variants: - - {first: Zeerak, last: Waseem} -- canonical: {first: Susan W., last: Talbott} - variants: - - {first: Susan, last: Talbott} -- canonical: {first: Partha, last: Talukdar} - variants: - - {first: Partha Pratim, last: Talukdar} - - {first: Partha, last: Pratim Talukdar} - - {first: Partha P., last: Talukdar} -- canonical: {first: Wai Lok, last: Tam} - variants: - - {first: Wailok, last: Tam} -- canonical: {first: Fabio, last: Tamburini} - id: fabio-tamburini -- canonical: {first: Noriyuki, last: Tamura} - id: noriyuki-tamura -- canonical: {first: Chew Lim, last: Tan} - variants: - - {first: Chew-Lim, last: Tan} - - {first: ChewLim, last: Tan} -- canonical: {first: Kumiko, last: Tanaka-Ishii} - variants: - - {first: Kumiko, last: Tanaka} -- canonical: {first: Hristo, last: Tanev} - variants: - - {first: Hristo, last: Tannev} -- canonical: {first: Ahmet Cüneyd, last: Tantuğ} - variants: - - {first: A. Cüneyd, last: Tantuǧ} -- canonical: {first: Daniel, last: Tapias} - variants: - - {first: Daniel Tapias, last: Merino} -- canonical: {first: Doina, last: Tatar} - variants: - - {first: Doina, last: Tătar} -- canonical: {first: Yuka, last: Tateisi} - variants: - - {first: Yuka, last: Tateishi} -- canonical: {first: Mariona, last: Taulé} - id: mariona-taule - variants: - - {first: Mariona, last: Taule} -- canonical: {first: Miriam, last: Tavoni} - id: miriam-tavoni -- canonical: {first: Sarah, last: Taylor} - variants: - - {first: Sarah M., last: Taylor} -- canonical: {first: Suzanne Liebowitz, last: Taylor} - variants: - - {first: Suzanne, last: Liebowitz} -- canonical: {first: William J., last: Teahan} - id: william-j-teahan - variants: - - {first: William J, last: Teahan} -- canonical: {first: João Paulo, last: Teixeira} - id: joao-paulo-teixeira - variants: - - {first: João P., last: Teixeira} -- canonical: {first: Eric Sadit, last: Tellez} - variants: - - {first: Eric S., last: Tellez} -- canonical: {first: Ashish V., last: Tendulkar} - variants: - - {first: Ashish, last: Tendulkar} -- canonical: {first: Yonglin, last: Teng} - variants: - - {first: Yong-lin, last: Teng} -- canonical: {first: Harry, last: Tennant} - variants: - - {first: Harry R., last: Tennant} -- canonical: {first: Alexandre, last: Termier} - id: alexandre-termier -- canonical: {first: Egidio L., last: Terra} - variants: - - {first: Egidio, last: Terra} -- canonical: {first: Maurizio, last: Tesconi} - variants: - - {first: Maurizio, last: Tescon} -- canonical: {first: Joel, last: Tetreault} - variants: - - {first: Joel R., last: Tetreault} -- canonical: {first: Lisanne, last: Teunissen} - variants: - - {first: Lisa, last: Teunissen} -- canonical: {first: Mariët, last: Theune} - id: mariet-theune - variants: - - {first: Mariet, last: Theune} -- canonical: {first: John C., last: Thomas} - variants: - - {first: John, last: Thomas} -- canonical: {first: Richmond H., last: Thomason} - variants: - - {first: Richmond, last: Thomason} -- canonical: {first: Henry S., last: Thompson} - variants: - - {first: Henry, last: Thompson} -- canonical: {first: Hanne Erdman, last: Thomsen} - variants: - - {first: Hanne, last: Erdman Thomsen} -- canonical: {first: Chalathip, last: Thumkanon} - variants: - - {first: Chalatip, last: Thumkanon} -- canonical: {first: Junfeng, last: Tian} - variants: - - {first: Jun Feng, last: Tian} -- canonical: {first: Jörg, last: Tiedemann} - variants: - - {first: Jorg, last: Tiedemann} - - {first: Joerg, last: Tiedemann} -- canonical: {first: Laszlo, last: Tihanyi} - variants: - - {first: László, last: Tihanyi} -- canonical: {first: Christoph, last: Tillmann} - id: christoph-tillmann -- canonical: {first: Harry J., last: Tily} - variants: - - {first: Harry, last: Tily} -- canonical: {first: Ismail, last: Timimi} - id: ismail-timimi - variants: - - {first: Ismaïl, last: Timimi} -- canonical: {first: Neil, last: Tipper} - id: neil-tipper -- canonical: {first: Erik, last: Tjong Kim Sang} - variants: - - {first: Erik F., last: Tjong Kim Sang} -- canonical: {first: Tomoki, last: Toda} - variants: - - {first: Tomiki, last: Toda} -- canonical: {first: Amalia, last: Todirascu} - variants: - - {first: Amalia, last: Todiraşcu} -- canonical: {first: Doroteo T., last: Toledano} - variants: - - {first: Doroteo Torre, last: Toledano} - - {first: Doroteo, last: Toledano} -- canonical: {first: Gaurav Singh, last: Tomar} - variants: - - {first: Gaurav, last: Singh} -- canonical: {first: David, last: Tomas} - variants: - - {first: David, last: Tomás} -- canonical: {first: Masaru, last: Tomita} - id: masaru-tomita -- canonical: {first: Yoshihiro, last: Tomiyama} - id: yoshihiro-tomiyama -- canonical: {first: Laura Mayfield, last: Tomokiyo} - variants: - - {first: Laura, last: Mayfield} -- canonical: {first: Loong-Cheong, last: Tong} - variants: - - {first: Loong Cheong, last: Tong} -- canonical: {first: Fatemeh, last: Torabi Asr} - variants: - - {first: Fatemeh Torabi, last: Asr} -- canonical: {first: Adrià, last: Torrens Urrutia} - variants: - - {first: Adrià, last: Torrens-Urrutia} -- canonical: {first: Tiago Timponi, last: Torrent} - variants: - - {first: Tiago, last: Torrent} - - {first: Tiago T., last: Torrent} -- canonical: {first: Yixuan, last: Tang} - comment: HKUST - id: yixuan-tang-hkust - orcid: 0009-0006-2405-2026 - institution: Hong Kong University of Science and Technology -- canonical: {first: Yixuan, last: Tang} - comment: May refer to several people - id: yixuan-tang -- canonical: {first: M. Inés, last: Torres} - variants: - - {first: María Inés, last: Torres} -- canonical: {first: Juan-Manuel, last: Torres-Moreno} - variants: - - {first: Juan-Manuel Torres, last: Moreno} - - {first: Juan-Manuel, last: Torres} -- canonical: {first: Dilara, last: Torunoğlu-Selamet} - variants: - - {first: Dilara, last: Torunoǧlu} -- canonical: {first: Alejandro H., last: Toselli} - variants: - - {first: Alejandro Héctor, last: Toselli} -- canonical: {first: Kanokorn, last: Trakultaweekoon} - variants: - - {first: Kanokorn, last: Trakultaweekool} -- canonical: {first: Do-Dat, last: Tran} - variants: - - {first: Do Dat, last: Tran} -- canonical: {first: Duc-Vu, last: Tran} - variants: - - {first: Vu Duc, last: Tran} -- canonical: {first: Giang Binh, last: Tran} - variants: - - {first: Giang, last: Tran} -- canonical: {first: Ke M., last: Tran} - variants: - - {first: Ke, last: Tran} - - {first: Ke, last: Tran Manh} -- canonical: {first: Mai-Vu, last: Tran} - variants: - - {first: Mai-vu, last: Tran} -- canonical: {first: Nam-Khanh, last: Tran} - variants: - - {first: Nam Khanh, last: Tran} -- canonical: {first: Quan Hung, last: Tran} - variants: - - {first: Quan, last: Tran} -- canonical: {first: Tuan, last: Tran} - variants: - - {first: Tuan Dung, last: Tran} -- canonical: {first: Viet Hong, last: Tran} - variants: - - {first: Viet-Hong, last: Tran} -- canonical: {first: Diana, last: Trandabat} - variants: - - {first: Diana, last: Trandabăț} - - {first: Diana, last: Trandabăţ} - - {first: Diana Marie, last: Trandabăţ} -- canonical: {first: David, last: Traum} - variants: - - {first: David R., last: Traum} -- canonical: {first: Beata, last: Trawiński} - variants: - - {first: Beata, last: Trawinski} -- canonical: {first: Jérémy, last: Trione} - variants: - - {first: Jeremy, last: Trione} -- canonical: {first: Marian, last: Trnka} - variants: - - {first: Marián, last: Trnka} -- canonical: {first: Cassia, last: Trojahn} - variants: - - {first: Cássia, last: Trojahn} -- canonical: {first: Roy, last: Tromble} - variants: - - {first: Roy W., last: Tromble} -- canonical: {first: Raphael, last: Troncy} - variants: - - {first: Raphaël, last: Troncy} -- canonical: {first: Harald, last: Trost} - id: harald-trost -- canonical: {first: Thomas Alexander, last: Trost} - variants: - - {first: Thomas, last: Trost} -- canonical: {first: Khiet P., last: Truong} - variants: - - {first: Khiet, last: Truong} -- canonical: {first: Mei-Chih, last: Tsai} - variants: - - {first: Mei-chih, last: Tsai} -- canonical: {first: Ming-Feng, last: Tsai} - variants: - - {first: Meng-Feng, last: Tsai} -- canonical: {first: Richard Tzong-Han, last: Tsai} - variants: - - {first: Tzong-Han, last: Tsai} - - {first: Tzong-Han Richard, last: Tsai} - - {first: Richard Tzong-han, last: Tsai} -- canonical: {first: Sung-Fung, last: Tsai} - variants: - - {first: Sung-Feng, last: Tsai} -- canonical: {first: Chiu-yu, last: Tseng} - variants: - - {first: Chiu-Yu, last: Tseng} -- canonical: {first: Chiung-hui, last: Tseng} - variants: - - {first: Chiung-Hui, last: Tseng} -- canonical: {first: Huihsin, last: Tseng} - variants: - - {first: Hui-hsin, last: Tseng} - - {first: Hui-Hsin, last: Tseng} -- canonical: {first: Yuen-Hsien, last: Tseng} - variants: - - {first: Yuan-Hsien, last: Tseng} -- canonical: {first: Pirros, last: Tsiakoulis} - id: pirros-tsiakoulis -- canonical: {first: Benjamin K., last: Tsou} - id: benjamin-k-tsou - variants: - - {first: Benjamin K.Y., last: Tsou} - - {first: Benjamin K., last: T’sou} - - {first: Benjamin, last: Tsou} - - {first: Benjamin K, last: Tsou} -- canonical: {first: Jun’ichi, last: Tsujii} - id: junichi-tsujii - variants: - - {first: Jun-ichi, last: Tsujii} - - {first: Jun-Ichi, last: Tsujii} - - {first: Junichi, last: Tsujii} - - {first: Jun-ich, last: Tsujii} -- canonical: {first: Junya, last: Tsutsumi} - id: junya-tsutsumi -- canonical: {first: Wen-Hsiang, last: Tu} - variants: - - {first: Wen-hsiang, last: Tu} -- canonical: {first: Ying-Chieh, last: Tu} - variants: - - {first: Ying-chieh, last: Tu} -- canonical: {first: Luu Anh, last: Tuan} - variants: - - {first: Anh, last: Luu} - - {first: Anh Tuan, last: Luu} -- canonical: {first: Allen B., last: Tucker} - variants: - - {first: Allen, last: Tucker} -- canonical: {first: Catalina Oana, last: Tudor} - variants: - - {first: Catalina O., last: Tudor} -- canonical: {first: Dan, last: Tufiş} - variants: - - {first: Dan, last: Tufis} - - {first: Dan, last: Tufiș} -- canonical: {first: Giovanni, last: Tummarello} - id: giovanni-tummarello -- canonical: {first: Gokhan, last: Tur} - id: gokhan-tur - variants: - - {first: Gokhan, last: Tür} -- canonical: {first: Umit Deniz, last: Turan} - variants: - - {first: Ümit Deniz, last: Turan} -- canonical: {first: Ramona Andreea, last: Turcu} - variants: - - {first: Ramona-Andreea, last: Turcu} -- canonical: {first: Joseph, last: Turian} - variants: - - {first: Joseph P., last: Turian} -- canonical: {first: Franco, last: Turini} - id: franco-turini -- canonical: {first: Jordi, last: Turmo} - id: jordi-turmo -- canonical: {first: Peter, last: Turney} - variants: - - {first: Peter D., last: Turney} -- canonical: {first: Howard R., last: Turtle} - variants: - - {first: Howard, last: Turtle} -- canonical: {first: Agnès, last: Tutin} - variants: - - {first: Agnes, last: Tutin} -- canonical: {first: Mark S., last: Tuttle} - id: mark-s-tuttle -- canonical: {first: Francis, last: Tyers} - variants: - - {first: Francis M., last: Tyers} -- canonical: {first: Evelyne, last: Tzoukermann} - id: evelyne-tzoukermann -- canonical: {first: Ferhan, last: Türe} - variants: - - {first: Ferhan, last: Ture} -- canonical: {first: Raghavendra, last: Udupa} - variants: - - {first: Raghavendra Udupa, last: U.} -- canonical: {first: Yoshihiro, last: Ueda} - id: yoshihiro-ueda -- canonical: {first: Shunsuke, last: Uemura} - variants: - - {first: Syunsuke, last: Uemura} -- canonical: {first: Chunyang, last: Jiang} - comment: HKUST - id: chunyang-jiang-hkust - orcid: 0009-0005-3401-4093 - institution: Hong Kong University of Science and Technology -- canonical: {first: Chunyang, last: Jiang} - id: chunyang-jiang - comment: May refer to several people -- canonical: {first: Alexandra L., last: Uitdenbogerd} - variants: - - {first: Alexandra, last: Uitdenbogerd} -- canonical: {first: Nancy, last: Underwood} - variants: - - {first: Nancy L., last: Underwood} -- canonical: {first: Marcus, last: Uneson} - variants: - - {first: Markus, last: Uneson} -- canonical: {first: Lyle, last: Ungar} - variants: - - {first: Lyle H., last: Ungar} -- canonical: {first: L. Alfonso, last: Urena Lopez} - variants: - - {first: L. Alfonso, last: Ureña-López} - - {first: L. Alfonso, last: Ureña López} - - {first: L. Alfonso, last: Urena-López} - - {first: L. Alfonso, last: Urena} - - {first: Alfonso, last: Ureña-López} - - {first: Luis Alfonso, last: Ureña-López} - - {first: L. Alfonso, last: Ureña- López} -- canonical: {first: Zdenka, last: Uresova} - variants: - - {first: Zdeňka, last: Urešová} -- canonical: {first: Ruben, last: Urizar} - id: ruben-urizar - variants: - - {first: Rubén, last: Urizar} -- canonical: {first: Miriam, last: Urkia} - id: miriam-urkia -- canonical: {first: Cristian, last: Ursu} - variants: - - {first: Christian, last: Ursu} -- canonical: {first: Suzan, last: Uskudarli} - variants: - - {first: Suzan, last: Üsküdarlı} -- canonical: {first: David C., last: Uthus} - variants: - - {first: David, last: Uthus} -- canonical: {first: Ozlem, last: Uzuner} - variants: - - {first: Özlem, last: Uzuner} -- canonical: {first: Elaine, last: Uí Dhonnchadha} - id: elaine-ui-dhonnchadha -- canonical: {first: Arjun Atreya, last: V} - variants: - - {first: Arjun, last: Atreya V} - - {first: Arjun, last: Atreya} -- canonical: {first: Subbarao K., last: V} - variants: - - {first: K.V., last: Subbarao} - - {first: Subbarao K, last: V.} -- canonical: {first: Devadath, last: V V} - variants: - - {first: Devadath V, last: V} -- canonical: {first: Mayank N., last: Vahia} - variants: - - {first: Mayank, last: Vahia} -- canonical: {first: Jacqueline, last: Vaissiere} - variants: - - {first: Jacqueline, last: Vaissière} -- canonical: {first: Antonio S., last: Valderrábanos} - variants: - - {first: Antonio S., last: Valderrabanos} -- canonical: {first: Oto, last: Vale} - variants: - - {first: Oto A., last: Vale} -- canonical: {first: Marco A., last: Valenzuela-Escárcega} - variants: - - {first: Marco Antonio, last: Valenzuela-Escárcega} -- canonical: {first: Andre, last: Valli} - variants: - - {first: André, last: Valli} -- canonical: {first: Valtcho, last: Valtchev} - id: valtcho-valtchev -- canonical: {first: Andoni, last: Valverde} - id: andoni-valverde -- canonical: {first: M. Pilar, last: Valverde Ibáñez} - variants: - - {first: M. Pilar, last: Valverde Ibañez} -- canonical: {first: Carol, last: Van Ess-Dykema} - variants: - - {first: Carol J., last: Van Ess-Dykema} - - {first: Carol, last: VanEss-Dykema} -- canonical: {first: Marjo, last: Van Koppen} - variants: - - {first: Marjo, last: van Koppen} -- canonical: {first: Tim, last: Van de Cruys} - variants: - - {first: Tim, last: Van De Cruys} -- canonical: {first: Aline A., last: Vanin} - variants: - - {first: Aline, last: Vanin} -- canonical: {first: Tristan, last: Vanrullen} - variants: - - {first: Tristan, last: van Rullen} - - {first: Tristan, last: Van Rullen} -- canonical: {first: Jerome, last: Vapillon} - id: jerome-vapillon -- canonical: {first: Dániel, last: Varga} - id: daniel-varga - variants: - - {first: Daniel, last: Varga} -- canonical: {first: István, last: Varga} - variants: - - {first: Istvan, last: Varga} -- canonical: {first: Giovanni Battista, last: Varile} - id: giovanni-battista-varile - variants: - - {first: Giovanni B., last: Varile} -- canonical: {first: Dusan, last: Varis} - variants: - - {first: Dušan, last: Variš} -- canonical: {first: Ioana, last: Vasilescu} - id: ioana-vasilescu -- canonical: {first: Gunaranjan, last: Vasireddy} - id: gunaranjan-vasireddy -- canonical: {first: Andrejs, last: Vasiļjevs} - variants: - - {first: Andrejs, last: Vasiljevs} -- canonical: {first: Alexander, last: Vasserman} - variants: - - {first: Alex, last: Vasserman} -- canonical: {first: Dominique, last: Vaufreydaz} - id: dominique-vaufreydaz -- canonical: {first: Bernard, last: Vauquois} - id: bernard-vauquois -- canonical: {first: Guillaume, last: Vauvert} - id: guillaume-vauvert -- canonical: {first: Eva Maria, last: Vecchi} - variants: - - {first: Eva, last: Vecchi} -- canonical: {first: Arlindo, last: Veiga} - variants: - - {first: Arlindo O., last: Veiga} -- canonical: {first: Nanette M., last: Veilleux} - id: nanette-veilleux -- canonical: {first: Gerard, last: Veillon} - id: gerard-veillon -- canonical: {first: Paola, last: Velardi} - id: paola-velardi -- canonical: {first: Patricia, last: Velazquez-Morales} - variants: - - {first: Patricia, last: Velázquez-Morales} -- canonical: {first: Noortje, last: Venhuizen} - variants: - - {first: Noortje J., last: Venhuizen} -- canonical: {first: Pranav Narayanan, last: Venkit} - variants: - - {first: Pranav, last: Venkit} -- canonical: {first: Mateja, last: Verlič} - variants: - - {first: Mateja, last: Verlic} -- canonical: {first: Yiyang, last: Du} - id: yiyang-du-cmu - comment: CMU - orcid: 0009-0007-1949-9736 - institution: Carnegie Mellon University -- canonical: {first: Yiyang, last: Du} - id: yiyang-du - comment: May refer to several people -- canonical: {first: Jean, last: Veronis} - variants: - - {first: Jean, last: Véronis} -- canonical: {first: Karin, last: Verspoor} - variants: - - {first: Karin M., last: Verspoor} - - {first: Cornelia Maria, last: Verspoor} -- canonical: {first: Anita Lilla, last: Verő} - variants: - - {first: Anita Lilla, last: Vero} -- canonical: {first: Katerina, last: Veselá} - variants: - - {first: Kateřina, last: Veselá} -- canonical: {first: Grażyna, last: Vetulani} - variants: - - {first: Grazyna, last: Vetulani} -- canonical: {first: José Luis, last: Vicedo} - id: jose-luis-vicedo - variants: - - {first: Jose-Luis, last: Vicedo} - - {first: Jose Luis, last: Vicedo} - - {first: José L., last: Vicedo} -- canonical: {first: Enrique, last: Vidal} - id: enrique-vidal -- canonical: {first: Renata, last: Vieira} - id: renata-vieira -- canonical: {first: Sarah, last: Vieweg} - variants: - - {first: Sarah E., last: Vieweg} -- canonical: {first: Jacob Hoover, last: Vigly} - id: jacob-hoover-vigly - variants: - - {first: Jacob Louis, last: Hoover} - - {first: Jacob, last: Hoover} -- canonical: {first: Marina, last: Vigário} - id: marina-vigario -- canonical: {first: K., last: Vijay-Shanker} - id: k-vijay-shanker - variants: - - {first: K, last: Vijay-Shanker} - - {first: K., last: Vijay-Shankar} - - {first: Vijay, last: Shanker} -- canonical: {first: Marc, last: Vilain} - variants: - - {first: Marc B., last: Vilain} -- canonical: {first: Juan Miguel, last: Vilar} - id: juan-miguel-vilar - variants: - - {first: Juan-Miguel, last: Vilar} - - {first: Juan M., last: Vilar} -- canonical: {first: Darnes, last: Vilariño} - variants: - - {first: Darnes, last: Vilariño Ayala} -- canonical: {first: Jorgen, last: Villadsen} - variants: - - {first: Jørgen, last: Villadsen} -- canonical: {first: Jeanne, last: Villaneau} - id: jeanne-villaneau -- canonical: {first: Luís, last: Villarejo} - variants: - - {first: Luis, last: Villarejo} -- canonical: {first: Luis, last: Villaseñor-Pineda} - variants: - - {first: Luis, last: Villaseñor} - - {first: Luis, last: Villasenor} -- canonical: {first: Éric, last: Villemonte de la Clergerie} - variants: - - {first: Eric, last: Villemonte de la Clergerie} - - {first: Eric, last: de la Clergerie} - - {first: Eric, last: de La Clergerie} - - {first: Éric, last: de La Clergerie} - - {first: Éric, last: de la Clergerie} - - {first: Éric, last: Villemonte de La Clergerie} -- canonical: {first: Špela, last: Vintar} - variants: - - {first: Spela, last: Vintar} -- canonical: {first: S. V. N., last: Vishwanathan} - variants: - - {first: S.V.N., last: Vishwanathan} -- canonical: {first: George, last: Vladutz} - id: george-vladutz -- canonical: {first: Nguyen, last: Vo} - variants: - - {first: Nguyen, last: Ha Vo} -- canonical: {first: Stephan, last: Vogel} - id: stephan-vogel - variants: - - {first: Stephen, last: Vogel} -- canonical: {first: Maria das Graças, last: Volpe Nunes} - variants: - - {first: Maria, last: das Graças Volpe Nunes} - - {first: Maria, last: das Gracas Volpe Nunes} - - {first: Maria das Graças Volpe, last: Nunes} - - {first: Maria, last: das Graças} - - {first: Maria das Graças V., last: Nunes} - - {first: Maria das Graças, last: Nunes} - - {first: Maria das Gracas, last: Volpe} -- canonical: {first: Dirk, last: Von Gruenigen} - variants: - - {first: Dirk, last: von Grünigen} -- canonical: {first: Ellen M., last: Voorhees} - variants: - - {first: Ellen, last: Voorhees} -- canonical: {first: Clare, last: Voss} - variants: - - {first: Clare R., last: Voss} -- canonical: {first: Hai-Quan, last: Vu} - variants: - - {first: Hai Quan, last: Vu} -- canonical: {first: Pranav, last: Goel} - comment: UMD - id: pranav-goel-umd - orcid: 0000-0003-1037-2687 - institution: University of Maryland -- canonical: {first: Pranav, last: Goel} - id: pranav-goel - comment: May refer to several people -- canonical: {first: Thuy, last: Vu} - variants: - - {first: Thuy-Trang, last: Vu} -- canonical: {first: Tu, last: Vu} - variants: - - {first: Tu Thanh, last: Vu} -- canonical: {first: Xuan Luong, last: Vu} - variants: - - {first: Xuân Lương, last: Vũ} - - {first: Xuan-Luong, last: Vu} -- canonical: {first: Kristina, last: Vuckovic} - variants: - - {first: Kristina, last: Vučković} -- canonical: {first: Stasa, last: Vujicic-Stankovic} - variants: - - {first: Staša Vujičić, last: Stanković} - - {first: Staša, last: Vujičić Stanković} -- canonical: {first: Jan, last: Vystrčil} - variants: - - {first: Jan, last: Vystrcil} -- canonical: {first: Tamás, last: Váradi} - variants: - - {first: Tamas, last: Váradi} -- canonical: {first: Glòria, last: Vázquez} - variants: - - {first: Gloria, last: Vázquez} - - {first: Gloria, last: Vazquez} -- canonical: {first: Silvia, last: Vázquez} - variants: - - {first: Silvia Rodríguez, last: Vázquez} -- canonical: {first: Sonia, last: Vázquez} - variants: - - {first: Sonia, last: Vazquez} - - {first: Sonia, last: Vázquez Pérez} -- canonical: {first: Jaakko, last: Väyrynen} - variants: - - {first: Jaakko J., last: Väyrynen} -- canonical: {first: Luuk Van, last: Waes} - variants: - - {first: Luuk, last: Van Waes} -- canonical: {first: Peter Waiganjo, last: Wagacha} - variants: - - {first: Peter W., last: Wagacha} - - {first: Peter, last: Wagacha} -- canonical: {first: Stefan, last: Wagner} - variants: - - {first: Stefan, last: Wager} -- canonical: {first: Wolfgang, last: Wahlster} - id: wolfgang-wahlster -- canonical: {first: Alex, last: Waibel} - id: alex-waibel - variants: - - {first: Alexander, last: Waibel} -- canonical: {first: Takahiro, last: Wakao} - id: takahiro-wakao -- canonical: {first: Christopher R., last: Walker} - variants: - - {first: Christopher, last: Walker} - - {first: Christopher R, last: Walker} -- canonical: {first: Marilyn, last: Walker} - id: marilyn-walker - variants: - - {first: Marilyn A., last: Walker} -- canonical: {first: Vern, last: Walker} - variants: - - {first: Vern R., last: Walker} -- canonical: {first: Byron C., last: Wallace} - variants: - - {first: Byron, last: Wallace} -- canonical: {first: Hanna, last: Wallach} - variants: - - {first: Hanna M., last: Wallach} -- canonical: {first: Joel, last: Wallenberg} - variants: - - {first: Joel C., last: Wallenberg} -- canonical: {first: Annalu, last: Waller} - id: annalu-waller -- canonical: {first: Alan, last: Wallington} - id: alan-wallington - variants: - - {first: Alan M., last: Wallington} -- canonical: {first: David L., last: Waltz} - id: david-l-waltz -- canonical: {first: Chi-Shing, last: Wang} - variants: - - {first: Chi-shing, last: Wang} -- canonical: {first: Daisy Zhe, last: Wang} - variants: - - {first: Zhe, last: Wang} -- canonical: {first: Flora Yu-Fang, last: Wang} - variants: - - {first: Yu-Fang, last: Wang} -- canonical: {first: Hsin-Min, last: Wang} - variants: - - {first: Hsin-min, last: Wang} -- canonical: {first: JianXiang, last: Wang} - variants: - - {first: Jianxiang, last: Wang} -- canonical: {first: Kexin, last: Wang} - comment: Bytedance - id: kexin-wang-bd -- canonical: {first: Kexin, last: Wang} - comment: TU Darmstadt - id: kexin-wang-tudarmstadt - orcid: 0000-0003-1175-7829 - institution: TU Darmstadt -- canonical: {first: Kexin, last: Wang} - id: kexin-wang - comment: May refer to several people -- canonical: {first: Kun-Ching, last: Wang} - variants: - - {first: Kun-ching, last: Wang} -- canonical: {first: Ling Xiao, last: Wang} - variants: - - {first: Lingxiao, last: Wang} -- canonical: {first: Lucy Lu, last: Wang} - id: lucy-lu-wang - variants: - - {first: Lucy, last: Wang} -- canonical: {first: Michelle Q., last: Wang} - variants: - - {first: Michelle, last: Wang} -- canonical: {first: Mingwen, last: Wang} - variants: - - {first: MingWen, last: Wang} - - {first: Ming-Wei, last: Wang} -- canonical: {first: Richard C., last: Wang} - variants: - - {first: Richard, last: Wang} -- canonical: {first: Shih-ping, last: Wang} - variants: - - {first: Shih-Ping, last: Wang} -- canonical: {first: Sida I., last: Wang} - variants: - - {first: Sida, last: Wang} -- canonical: {first: Wen, last: Wang} - id: wen-wang -- canonical: {first: Wen Ting, last: Wang} - variants: - - {first: WenTing, last: Wang} -- canonical: {first: William S-Y., last: Wang} - variants: - - {first: William S.-Y., last: Wang} -- canonical: {first: Xia, last: Wang} - id: xia-wang -- canonical: {first: Xiao-Long, last: Wang} - variants: - - {first: XiaoLong, last: Wang} - - {first: Xiao-long, last: Wang} -- canonical: {first: Xiaolei, last: Wang} - comment: Fudan - id: xiaolei-wang-fudan -- canonical: {first: Xiaolei, last: Wang} - comment: Renmin - id: xiaolei-wang-renmin -- canonical: {first: Yih-Ru, last: Wang} - variants: - - {first: Yih-ru, last: Wang} -- canonical: {first: YongCheng, last: Wang} - variants: - - {first: Yong-Cheng, last: Wang} - - {first: Yong Cheng, last: Wang} -- canonical: {first: Nigel, last: Ward} - variants: - - {first: Nigel G., last: Ward} -- canonical: {first: Wayne, last: Ward} - id: wayne-ward - variants: - - {first: Wayne H., last: Ward} -- canonical: {first: David H. D., last: Warren} - variants: - - {first: David H.D., last: Warren} -- canonical: {first: Jonathan, last: Washington} - variants: - - {first: Jonathan North, last: Washington} - - {first: Jonathan N., last: Washington} -- canonical: {first: Thomas, last: Wasow} - variants: - - {first: Tom, last: Wasow} -- canonical: {first: Jakub, last: Waszczuk} - variants: - - {first: Jakub, last: Wasczuk} -- canonical: {first: Catherine I., last: Watson} - variants: - - {first: Catherine, last: Watson} -- canonical: {first: J. Angus, last: Webb} - variants: - - {first: Angus, last: Webb} -- canonical: {first: Nick, last: Webb} - id: nick-webb -- canonical: {first: Bonnie, last: Webber} - id: bonnie-webber - variants: - - {first: Bonnie L., last: Webber} - - {first: Bonnie Lynn, last: Webber} -- canonical: {first: Heinz J., last: Weber} - variants: - - {first: H-J., last: Weber} -- canonical: {first: Jonathan J., last: Webster} - variants: - - {first: Jonathan, last: Webster} -- canonical: {first: Jurgen, last: Wedekind} - variants: - - {first: Jürgen, last: Wedekind} -- canonical: {first: Eric, last: Wehrli} - variants: - - {first: Éric, last: Wehrli} -- canonical: {first: Xiangfeng, last: Wei} - variants: - - {first: XiangFeng, last: Wei} -- canonical: {first: Robert, last: Weide} - id: robert-weide -- canonical: {first: Amy, last: Weinberg} - variants: - - {first: Amy S., last: Weinberg} -- canonical: {first: Steven H., last: Weinberger} - variants: - - {first: Steven, last: Weinberger} -- canonical: {first: Clifford J., last: Weinstein} - variants: - - {first: Clifford, last: Weinstein} -- canonical: {first: Mitch, last: Weintraub} - id: mitch-weintraub - variants: - - {first: Mitchel, last: Weintraub} -- canonical: {first: Maxwell, last: Weinzierl} - variants: - - {first: Maxwell A., last: Weinzierl} -- canonical: {first: David, last: Weir} - id: david-weir - variants: - - {first: David J., last: Weir} - - {first: David, last: Wei} -- canonical: {first: Ralph, last: Weischedel} - variants: - - {first: Ralph M., last: Weischedel} -- canonical: {first: Zarah, last: Weiss} - variants: - - {first: Zarah, last: Weiß} -- canonical: {first: Davy, last: Weissenbacher} - id: davy-weissenbacher -- canonical: {first: Daniel S., last: Weld} - variants: - - {first: Daniel, last: Weld} - - {first: Dan, last: Weld} -- canonical: {first: Marion, last: Weller-Di Marco} - variants: - - {first: Marion, last: Di Marco} -- canonical: {first: Ben, last: Wellner} - variants: - - {first: Benjamin, last: Wellner} -- canonical: {first: Chris, last: Welty} - variants: - - {first: Christopher, last: Welty} -- canonical: {first: Christopher M., last: White} - id: christopher-m-white -- canonical: {first: James Paul, last: White} - variants: - - {first: James P., last: White} - - {first: James, last: White} -- canonical: {first: John S., last: White} - variants: - - {first: John, last: White} -- canonical: {first: Michael, last: White} - id: michael-white - variants: - - {first: Mike, last: White} -- canonical: {first: Peter, last: White} - variants: - - {first: Pete, last: White} -- canonical: {first: Ryen, last: White} - variants: - - {first: Ryan, last: White} -- canonical: {first: Pete, last: Whitelock} - id: pete-whitelock -- canonical: {first: Edward W. D., last: Whittaker} - id: edward-w-d-whittaker -- canonical: {first: Steve, last: Whittaker} - id: steve-whittaker -- canonical: {first: Daniel, last: Whyatt} - variants: - - {first: Dan, last: Whyatt} -- canonical: {first: Janyce, last: Wiebe} - variants: - - {first: Janyce M., last: Wiebe} - - {first: Jan, last: Wiebe} -- canonical: {first: Colin W., last: Wightman} - id: colin-w-wightman -- canonical: {first: Derry Tanti, last: Wijaya} - variants: - - {first: Derry, last: Wijaya} -- canonical: {first: Graham, last: Wilcock} - id: graham-wilcock -- canonical: {first: John, last: Wilkerson} - variants: - - {first: John D., last: Wilkerson} -- canonical: {first: Yorick, last: Wilks} - id: yorick-wilks -- canonical: {first: Jason D., last: Williams} - variants: - - {first: Jason, last: Williams} -- canonical: {first: Jay, last: Wilpon} - variants: - - {first: Jay G., last: Wilpon} -- canonical: {first: Andrew, last: Wilson} - variants: - - {first: Andrew T., last: Wilson} -- canonical: {first: Amy, last: Winarske} - id: amy-winarske -- canonical: {first: Genta Indra, last: Winata} - variants: - - {first: Genta, last: Winata} -- canonical: {first: Benjamin, last: Wing} - variants: - - {first: Ben, last: Wing} -- canonical: {first: Mats, last: Wirén} - variants: - - {first: Mats, last: Wiren} -- canonical: {first: G. Bowden, last: Wise} - variants: - - {first: Bowden, last: Wise} -- canonical: {first: Michael J., last: Witbrock} - variants: - - {first: Michael, last: Witbrock} -- canonical: {first: Peter, last: Wittenburg} - id: peter-wittenburg -- canonical: {first: Billy T.M., last: Wong} - variants: - - {first: Billy T. M., last: Wong} -- canonical: {first: Kam-Fai, last: Wong} - id: kam-fai-wong - variants: - - {first: Kam-fai, last: Wong} -- canonical: {first: Ping Wai, last: Wong} - variants: - - {first: Percy Ping-Wai, last: Wong} -- canonical: {first: Raymond, last: Wong} - variants: - - {first: Raymond K., last: Wong} -- canonical: {first: Mary McGee, last: Wood} - id: mary-mcgee-wood - variants: - - {first: Mary, last: McGee Wood} -- canonical: {first: Phil C., last: Woodland} - id: phil-c-woodland -- canonical: {first: William A., last: Woods} - id: william-a-woods -- canonical: {first: Karsten L., last: Worm} - id: karsten-l-worm - variants: - - {first: Karsten, last: Worm} -- canonical: {first: Monika, last: Woszczyna} - id: monika-woszczyna -- canonical: {first: Klaus, last: Wothke} - id: klaus-wothke -- canonical: {first: Sue Ellen, last: Wright} - variants: - - {first: Sue, last: Wright} -- canonical: {first: Chia-Lung, last: Wu} - variants: - - {first: Chia-Long, last: Wu} -- canonical: {first: Chun-Kai, last: Wu} - variants: - - {first: Kevin Chun-Kai, last: Wu} -- canonical: {first: Horng Jyh Paul, last: Wu} - variants: - - {first: Horng-Jyh P., last: Wu} -- canonical: {first: Jian-Chen, last: Wu} - variants: - - {first: Jien-Chen, last: Wu} -- canonical: {first: Jian-Cheng, last: Wu} - variants: - - {first: Jian-cheng, last: Wu} - - {first: Jiancheng, last: Wu} -- canonical: {first: Lide, last: Wu} - variants: - - {first: Li-de, last: Wu} -- canonical: {first: Ming-Jer, last: Wu} - variants: - - {first: Min-Jer, last: Wu} -- canonical: {first: Katharina, last: Wäschle} - variants: - - {first: Katharina, last: Waeschle} -- canonical: {first: Amelie, last: Wührl} - variants: - - {first: Amelie, last: Wuehrl} -- canonical: {first: Geraldo Bonorino, last: Xexéo} - variants: - - {first: Geraldo, last: Xexéo} -- canonical: {first: Yingju, last: Xia} - variants: - - {first: Ying-Ju, last: Xia} - - {first: YingJu, last: Xia} -- canonical: {first: Jinghui, last: Xiao} - variants: - - {first: JingHui, last: Xiao} -- canonical: {first: Eric, last: Xing} - variants: - - {first: Eric P., last: Xing} -- canonical: {first: Deyi, last: Xiong} - variants: - - {first: De-Yi, last: Xiong} -- canonical: {first: Frank F., last: Xu} - variants: - - {first: Frank, last: Xu} -- canonical: {first: Jian-ming, last: Xu} - variants: - - {first: Jian-Ming, last: Xu} -- canonical: {first: Jinan, last: Xu} - variants: - - {first: JinAn, last: Xu} -- canonical: {first: Mingbin, last: Xu} - variants: - - {first: MingBin, last: Xu} -- canonical: {first: Zhiming, last: Xu} - variants: - - {first: Zhi-Ming, last: Xu} -- canonical: {first: Serge A., last: Yablonsky} - variants: - - {first: Serge, last: Yablonsky} -- canonical: {first: Ihsan, last: Yalcinkaya} - variants: - - {first: İhsan, last: Yalçinkaya} - - {first: İhsan, last: Yalcinkaya} -- canonical: {first: Hirofumi, last: Yamamoto} - variants: - - {first: Hirohumi, last: Yamamoto} -- canonical: {first: Yoichi, last: Yamashita} - id: yoichi-yamashita -- canonical: {first: Chao-Han Huck, last: Yang} - variants: - - {first: Huck Chao-Han, last: Yang} -- canonical: {first: Charles, last: Yang} - variants: - - {first: Charles D., last: Yang} -- canonical: {first: Dechuan, last: Yang} - variants: - - {first: De, last: Yang} -- canonical: {first: Dong, last: Yang} - id: dong-yang -- canonical: {first: Eun-Suk, last: Yang} - variants: - - {first: Eunsuk, last: Yang} -- canonical: {first: Li-chin, last: Yang} - variants: - - {first: Li-Chin, last: Yang} -- canonical: {first: Lingpeng, last: Yang} - variants: - - {first: LingPeng, last: Yang} -- canonical: {first: Muyun, last: Yang} - variants: - - {first: MuYun, last: Yang} - - {first: Mu-yun, last: Yang} -- canonical: {first: Ping-Che, last: Yang} - variants: - - {first: Ping-che, last: Yang} -- canonical: {first: Ting-hao, last: Yang} - variants: - - {first: Ting-Hao, last: Yang} -- canonical: {first: Yaosheng, last: Yang} - variants: - - {first: YaoSheng, last: Yang} -- canonical: {first: Jianmin, last: Yao} - variants: - - {first: Jian-min, last: Yao} - - {first: Jian-Min, last: Yao} -- canonical: {first: Jin-ge, last: Yao} - variants: - - {first: Jin-Ge, last: Yao} -- canonical: {first: Yao, last: Yao} - id: yao-yao-uwisc -- canonical: {first: Yao, last: Yao} - id: yao-yao -- canonical: {first: Mustafa, last: Yaseen} - id: mustafa-yaseen -- canonical: {first: Norihito, last: Yasuda} - variants: - - {first: Norihi, last: Yasuda} -- canonical: {first: Alexander, last: Yeh} - variants: - - {first: Alexander S., last: Yeh} - - {first: Alex, last: Yeh} -- canonical: {first: Kevin C., last: Yeh} - variants: - - {first: Kevin, last: Yeh} -- canonical: {first: Ming-chin, last: Yen} - variants: - - {first: Ming-Chin, last: Yen} -- canonical: {first: Meliha, last: Yetisgen-Yildiz} - variants: - - {first: Meliha, last: Yetisgen} - - {first: Meliha, last: Yetişgen} -- canonical: {first: Szu-ting, last: Yi} - variants: - - {first: Szuting, last: Yi} -- canonical: {first: Wen-tau, last: Yih} - variants: - - {first: Scott Wen-tau, last: Yih} -- canonical: {first: Matti, last: Ylilammi} - id: matti-ylilammi -- canonical: {first: Shoichi, last: Yokoyama} - id: shoichi-yokoyama -- canonical: {first: Aesun, last: Yoon} - variants: - - {first: Ae sun, last: Yoon} - - {first: Ae-Sun, last: Yoon} -- canonical: {first: James, last: Yoon} - variants: - - {first: James H., last: Yoon} -- canonical: {first: Su-Youn, last: Yoon} - variants: - - {first: Su-youn, last: Yoon} -- canonical: {first: Kyosuke, last: Yoshida} - variants: - - {first: Kyôsuke, last: Yoshida} -- canonical: {first: Takehiko, last: Yoshimi} - id: takehiko-yoshimi -- canonical: {first: Kei, last: Yoshimoto} - id: kei-yoshimoto -- canonical: {first: Nick J., last: Youd} - variants: - - {first: Nick, last: Youd} -- canonical: {first: Sheryl, last: Young} - variants: - - {first: Sheryl R., last: Young} -- canonical: {first: Steve, last: Young} - variants: - - {first: Steven, last: Young} -- canonical: {first: Steve J., last: Young} - id: steve-j-young -- canonical: {first: Clement T., last: Yu} - variants: - - {first: Clement, last: Yu} -- canonical: {first: Edmund, last: Yu} - variants: - - {first: Edmund S., last: Yu} -- canonical: {first: Liang-Chih, last: Yu} - variants: - - {first: Liang-chih, last: Yu} -- canonical: {first: Ming-Shing, last: Yu} - variants: - - {first: Ming-shing, last: Yu} -- canonical: {first: Philip S., last: Yu} - variants: - - {first: Philip, last: Yu} -- canonical: {first: Zaharin, last: Yusoff} - id: zaharin-yusoff -- canonical: {first: Ertugrul, last: Yılmaz} - variants: - - {first: Ertuğrul, last: Yilmaz} - - {first: Ertuǧrul, last: Yılmaz} -- canonical: {first: Osmar R., last: Zaiane} - variants: - - {first: Osmar, last: Zaïane} - - {first: Osmar, last: Zaiane} - - {first: Osmar R., last: Zaïane} -- canonical: {first: Omar, last: Zaidan} - variants: - - {first: Omar F., last: Zaidan} -- canonical: {first: Remi, last: Zajac} - variants: - - {first: Rémi, last: Zajac} -- canonical: {first: Xabier, last: Zalbide} - id: xabier-zalbide -- canonical: {first: Jordi Porta, last: Zamorano} - variants: - - {first: Jordi, last: Porta} -- canonical: {first: Antonio, last: Zampolli} - id: antonio-zampolli -- canonical: {first: Hongying, last: Zan} - variants: - - {first: Hong-ying, last: Zan} -- canonical: {first: Stefano, last: Zanobini} - id: stefano-zanobini -- canonical: {first: Fabio Massimo, last: Zanzotto} - id: fabio-massimo-zanzotto - variants: - - {first: Fabio, last: Massimo Zanzotto} - - {first: Fabio, last: Zanzotto} -- canonical: {first: Carlos Mario, last: Zapata Jaramillo} - variants: - - {first: Carlos M., last: Zapata Jaramillo} -- canonical: {first: Gian Piero, last: Zarri} - id: gian-piero-zarri -- canonical: {first: Sina, last: Zarrieß} - variants: - - {first: Sina, last: Zarriess} -- canonical: {first: George, last: Zavaliagkos} - id: george-zavaliagkos -- canonical: {first: Britta, last: Zeller} - variants: - - {first: Britta D., last: Zeller} -- canonical: {first: Daniel, last: Zeman} - variants: - - {first: Dan, last: Zeman} -- canonical: {first: Kalliopi, last: Zervanou} - variants: - - {first: Kalliopi A., last: Zervanou} -- canonical: {first: Luke, last: Zettlemoyer} - variants: - - {first: Luke S., last: Zettlemoyer} -- canonical: {first: ChengXiang, last: Zhai} - variants: - - {first: Chengxiang, last: Zhai} -- canonical: {first: Chao, last: Zhang} - comment: Tsinghua University - id: chao-zhang-tu -- canonical: {first: Dan, last: Zhang} - comment: Tsinghua University - id: dan-zhang-tsinghua -- canonical: {first: Dan, last: Zhang} - comment: May refer to several people - id: dan-zhang -- canonical: {first: Fang-Fang, last: Zhang} - variants: - - {first: Fangfang, last: Zhang} -- canonical: {first: Guiping, last: Zhang} - variants: - - {first: GuiPing, last: Zhang} -- canonical: {first: Huarui, last: Zhang} - variants: - - {first: HuaRui, last: Zhang} -- canonical: {first: Ke-Jia, last: Zhang} - variants: - - {first: Ke-Jia, last: Chang} -- canonical: {first: Li, last: Zhang} - comment: University of Pennsylvania - id: li-zhang-upenn -- canonical: {first: Li, last: Zhang} - comment: UC San Diego - id: li-zhang-ucsandiego -- canonical: {first: Li, last: Zhang} - comment: UK - id: li-zhang-uk -- canonical: {first: Li, last: Zhang} - comment: Google - id: li-zhang-gg -- canonical: {first: Li, last: Zhang} - comment: AWS - id: li-zhang-aws -- canonical: {first: Li, last: Zhang} - comment: IBM-china - id: li-zhang-ibmc -- canonical: {first: Li, last: Zhang} - comment: Newcastle, UK - id: li-zhang-newcastle -- canonical: {first: Li, last: Zhang} - comment: Teesside University - id: li-zhang-teesside -- canonical: {first: Li, last: Zhang} - comment: Birmingham - id: li-zhang-birmingham -- canonical: {first: Li, last: Zhang} - comment: Google - id: li-zhang-google -- canonical: {first: Li, last: Zhang} - comment: Nankai - id: li-zhang-nankai -- canonical: {first: Li, last: Zhang} - comment: Wuhan - id: li-zhang-wuhan -- canonical: {first: Ranran Haoran, last: Zhang} - comment: Penn State University - id: ranran-haoran-zhang -- canonical: {first: Weinan, last: Zhang} - variants: - - {first: Wei-Nan, last: Zhang} -- canonical: {first: Xiuzhen (Jenny), last: Zhang} - variants: - - {first: Xiuzhen, last: Zhang} -- canonical: {first: Yao-Zhong, last: Zhang} - id: yao-zhong-zhang - variants: - - {first: Yao Zhong, last: Zhang} - - {first: Yao-zhong, last: Zhang} -- canonical: {first: Ying, last: Zhang} - variants: - - {first: Joy Ying, last: Zhang} -- canonical: {first: Tiejun, last: Zhao} - variants: - - {first: TieJun, last: Zhao} - - {first: Tie-Jun, last: Zhao} - - {first: Tie-jun, last: Zhao} -- canonical: {first: Wayne Xin, last: Zhao} - variants: - - {first: Xin, last: Zhao} -- canonical: {first: Weina, last: Zhao} - variants: - - {first: Wei Na, last: Zhao} -- canonical: {first: Yi-jing, last: Zhao} - variants: - - {first: Yi-Jing, last: Hao} -- canonical: {first: Fang, last: Zheng} - variants: - - {first: Thomas Fang, last: Zheng} -- canonical: {first: Jiaheng, last: Zheng} - variants: - - {first: Jia-heng, last: Zheng} -- canonical: {first: Ze-yu, last: Zheng} - variants: - - {first: Zeyu, last: Zheng} -- canonical: {first: Guodong, last: Zhou} - variants: - - {first: GuoDong, last: Zhou} -- canonical: {first: Huiwei, last: Zhou} - variants: - - {first: HuiWei, last: Zhou} -- canonical: {first: Joe, last: Zhou} - variants: - - {first: Joe F., last: Zhou} -- canonical: {first: Yan-Zuo, last: Zhou} - variants: - - {first: Yen-zuo, last: Zhou} -- canonical: {first: Zhi Min, last: Zhou} - variants: - - {first: Zhi-Min, last: Zhou} -- canonical: {first: Kenny, last: Zhu} - variants: - - {first: Kenny Q., last: Zhu} -- canonical: {first: Qiaoming, last: Zhu} - variants: - - {first: Qiao-ming, last: Zhu} - - {first: Qiao-Ming, last: Zhu} - - {first: QiaoMing, last: Zhu} -- canonical: {first: Song-chun, last: Zhu} - variants: - - {first: Song-Chun, last: Zhu} -- canonical: {first: Xiaojin, last: Zhu} - variants: - - {first: Xiaojin Jerry, last: Zhu} -- canonical: {first: Janez, last: Zibert} - variants: - - {first: Janez, last: Žibert} -- canonical: {first: Ute, last: Ziegenhain} - id: ute-ziegenhain -- canonical: {first: Harald H., last: Zimmermann} - id: harald-h-zimmermann -- canonical: {first: Cäcilia, last: Zirn} - variants: - - {first: Caecilia, last: Zirn} -- canonical: {first: Arturs, last: Znotins} - variants: - - {first: Artūrs, last: Znotiņš} -- canonical: {first: Chengqing, last: Zong} - variants: - - {first: Cheng-qing, last: Zong} -- canonical: {first: Enrico, last: Zovato} - id: enrico-zovato -- canonical: {first: Richard, last: Zuber} - id: richard-zuber -- canonical: {first: Victor, last: Zue} - id: victor-zue - variants: - - {first: Victor W., last: Zue} -- canonical: {first: Geoffrey, last: Zweig} - id: geoffrey-zweig - variants: - - {first: Geoff, last: Zweig} -- canonical: {first: Pierre, last: Zweigenbaum} - id: pierre-zweigenbaum -- canonical: {first: Iria, last: da Cunha} - id: iria-da-cunha -- canonical: {first: William, last: de Beaumont} - variants: - - {first: Will, last: de Beaumont} -- canonical: {first: Martine, last: de Calmès} - id: martine-de-calmes -- canonical: {first: Guadalupe Aguado, last: de Cea} - variants: - - {first: Guadalupe, last: Aguado de Cea} - - {first: Guadalupe, last: Aguado-de-Cea} -- canonical: {first: Ricardo, last: de Córdoba} - variants: - - {first: Ricardo, last: de Cordoba} -- canonical: {first: Adrià, last: de Gispert} - variants: - - {first: Adrià, last: Gispert} - - {first: Adrià, last: De Gispert} -- canonical: {first: Clément, last: de Groc} - variants: - - {first: Clément, last: De Groc} -- canonical: {first: Vera Lucia Strube, last: de Lima} - variants: - - {first: Vera Lúcia Strube, last: de Lima} -- canonical: {first: Céline, last: de Looze} - variants: - - {first: Céline, last: Delooze} - - {first: Céline, last: De Looze} - - {first: Celine, last: De Looze} -- canonical: {first: Claude, last: de Loupy} - variants: - - {first: Claude, last: De Loupy} -- canonical: {first: Carl, last: de Marcken} - variants: - - {first: Carl G., last: de Marcken} -- canonical: {first: Marie-Catherine, last: de Marneffe} - variants: - - {first: Marie Catherine, last: de Marneffe} -- canonical: {first: Paulo C F, last: de Oliveira} - variants: - - {first: Paulo C. F., last: de Oliveira} -- canonical: {first: Valeria, last: de Paiva} - id: valeria-de-paiva -- canonical: {first: Maarten, last: de Rijke} - variants: - - {first: Maarten, last: De Rijke} -- canonical: {first: Folkert, last: de Vriend} - id: folkert-de-vriend -- canonical: {first: Peter V., last: deSouza} - id: peter-v-desouza -- canonical: {first: Louis, last: des Tombe} - id: louis-des-tombe -- canonical: {first: Daniela Oliveira F., last: do Amaral} - variants: - - {first: Daniela O. F., last: do Amaral} -- canonical: {first: Cicero, last: dos Santos} - variants: - - {first: Cícero, last: dos Santos} - - {first: Cícero Nogueira, last: dos Santos} - - {first: Cicero, last: Nogueira dos Santos} - - {first: Cícero, last: Nogueira dos Santos} -- canonical: {first: Johan Adam, last: du Preez} - id: johan-adam-du-preez -- canonical: {first: Hugo Van, last: hamme} - variants: - - {first: Hugo, last: Van hamme} -- canonical: {first: Kees, last: van Deemter} - variants: - - {first: Kees, last: Van Deemter} -- canonical: {first: Josef, last: van Genabith} - id: josef-van-genabith - variants: - - {first: Josef, last: Van Genabith} -- canonical: {first: Willem Robert, last: van Hage} - variants: - - {first: Willem, last: Van Hage} - - {first: Willem, last: van Hage} -- canonical: {first: Hans, last: van Halteren} - variants: - - {first: Hans, last: Van Halteren} -- canonical: {first: Gerhard B., last: van Huyssteen} - variants: - - {first: Gerhard, last: Van Huyssteen} - - {first: Gerhard, last: van Huyssteen} - - {first: Gerhard B, last: van Huyssteen} -- canonical: {first: Marcel P., last: van Lohuizen} - variants: - - {first: Marcel P., last: Van Lohuizen} -- canonical: {first: Erik, last: van Mulligen} - variants: - - {first: Erik M., last: van Mulligen} -- canonical: {first: Gertjan, last: van Noord} - variants: - - {first: Gertjan, last: Van Noord} -- canonical: {first: Marten, last: van Schijndel} - variants: - - {first: Marten, last: Van Schijndel} - - {first: Martin, last: van Schijndel} -- canonical: {first: Dieter, last: van Uytvanck} - variants: - - {first: Dieter, last: Van Uytvanck} -- canonical: {first: Menno, last: van Zaanen} - variants: - - {first: Menno, last: van Zannen} -- canonical: {first: Antal, last: van den Bosch} - variants: - - {first: Antal, last: Van den Bosch} - - {first: Antal, last: Van Den Bosch} -- canonical: {first: Henk, last: van den Heuvel} - id: henk-van-den-heuvel -- canonical: {first: Erik, last: van der Goot} - variants: - - {first: Erik, last: Van der Goot} -- canonical: {first: P. H. J., last: van der Kamp} - variants: - - {first: P.H.J., last: van der Kamp} -- canonical: {first: Lonneke, last: van der Plas} - variants: - - {first: Lonneke, last: Van Der Plas} -- canonical: {first: Hennie, last: van der Vliet} - variants: - - {first: Hennie, last: VanderVliet} -- canonical: {first: Rene, last: van der Wal} - variants: - - {first: René, last: van der Wal} - - {first: Rene, last: Van Der Wal} -- canonical: {first: Walther, last: von Hahn} - variants: - - {first: Walther, last: v. Hahn} -- canonical: {first: Katharina, last: von der Wense} - variants: - - {first: Katharina, last: Kann} -- canonical: {first: Aitor, last: Álvarez} - variants: - - {first: Aitor, last: Arronte Álvarez} -- canonical: {first: Ruket, last: Çakıcı} - variants: - - {first: Ruket, last: Cakici} - - {first: Ruken, last: Cakici} - - {first: Ruken, last: Çakıcı} -- canonical: {first: Özlem, last: Çetinoğlu} - variants: - - {first: Ozlem, last: Cetinoglu} - - {first: Özlem, last: Çetinoglu} -- canonical: {first: Haldur, last: Õim} - id: haldur-oim - variants: - - {first: Haldur, last: Oim} -- canonical: {first: Hale, last: Ögel Balaban} - variants: - - {first: Hale, last: Ogel} -- canonical: {first: Berkay Furkan, last: Önder} - variants: - - {first: Berkay, last: Önder} -- canonical: {first: Annette, last: Östling Andersson} - variants: - - {first: Annette, last: Östling} -- canonical: {first: Gözde, last: Özbal} - variants: - - {first: Gozde, last: Ozbal} -- canonical: {first: Arzucan, last: Özgür} - variants: - - {first: Arzucan, last: Ozgur} -- canonical: {first: Lilja, last: Øvrelid} - variants: - - {first: Lilja, last: Ovrelid} -- canonical: {first: Damir, last: Ćavar} - variants: - - {first: Damir, last: Cavar} -- canonical: {first: Matej, last: Ďurčo} - variants: - - {first: Matej, last: Durco} -- canonical: {first: Ozan, last: İrsoy} - variants: - - {first: Ozan, last: Irsoy} -- canonical: {first: Gözde Gül, last: Şahin} - variants: - - {first: Gözde, last: Şahin} - - {first: Gözde Gül, last: İşgüder} -- canonical: {first: Gabriela, last: Şerban} - variants: - - {first: Gabriela, last: Serban} -- canonical: {first: Octavia-Maria, last: Şulea} - variants: - - {first: Maria, last: Sulea} - - {first: Octavia-Maria, last: Sulea} - - {first: Maria-Octavia, last: Sulea} -- canonical: {first: Jana, last: Šindlerová} - variants: - - {first: Jana, last: Sindlerova} -- canonical: {first: Sanja, last: Štajner} - variants: - - {first: Sanja, last: Stajner} -- canonical: {first: Zdeněk, last: Žabokrtský} - variants: - - {first: Zdenek, last: Zabokrtsky} - - {first: Zdenĕk, last: Žabokrtský} - - {first: Zdenek, last: Žabokrtsky} -- canonical: {first: Lukáš, last: Žilka} - variants: - - {first: Lukas, last: Zilka} -- canonical: {first: Anirudh, last: Sundar} - variants: - - {first: Anirudh S., last: Sundar} - - {first: Anirudh S, last: Sundar} -- canonical: {first: Cong, last: Liu} - comment: Florida Atlantic University - id: cong-liu-fau -- canonical: {first: Cong, last: Liu} - comment: May refer to several people - id: cong-liu -- canonical: {first: Cong, last: Liu} - comment: University of California, Riverside - id: cong-liu-ucr -- canonical: {first: Cong, last: Liu} - comment: iFLYTEK Research - id: cong-liu-iflytek -- canonical: {first: Kyuyoung, last: Kim} - variants: - - {first: Kyu-Young, last: Kim} -- canonical: {first: Jann Railey, last: Montalan} - id: jann-railey-montalan - variants: - - {first: Jann, last: Montalan} - - {first: Railey, last: Montalan} - - {first: Jann Railey E., last: Montalan} -- canonical: {first: R. Thomas, last: McCoy} - id: r-thomas-mccoy - variants: - - {first: Tom, last: McCoy} -- canonical: {first: Kun, last: Zhang} - comment: University of Science and Technology of China - id: kun-zhang-ustc -- canonical: {first: Kun, last: Zhang} - comment: Inria Saclay-Île-de-France - id: kun-zhang-inria -- canonical: {first: Kun, last: Zhang} - comment: University of Chinese Academy of Sciences - id: kun-zhang-ucas -- canonical: {first: Kun, last: Zhang} - comment: May refer to multiple people - id: kun-zhang -- canonical: {first: Xuan Long, last: Do} - variants: - - {first: Do Xuan, last: Long} -- canonical: {first: Jian, last: Chen} - comment: May refer to several people - id: jian-chen -- canonical: {first: Jian, last: Chen} - comment: University at Buffalo - id: jian-chen-ub -- canonical: {first: Hannah, last: Cyberey} - id: hannah-cyberey - variants: - - {first: Hannah, last: Chen} -- canonical: {first: Lester James Validad, last: Miranda} - id: lester-james-validad-miranda - variants: - - {first: Lester James, last: Miranda} -- canonical: {first: Marten, last: During} - comment: University of Luxembourg - id: marten-during-ul -- canonical: {first: Marten, last: During} - comment: May refer to several people - id: marten-during -- canonical: {first: Börje F., last: Karlsson} - variants: - - {first: Börje, last: Karlsson} - comment: https://github.com/acl-org/acl-anthology/issues/4041 - orcid: 0000-0001-8925-360X - degree: PUC-Rio -- canonical: {first: Saptarshi, last: Ghosh} - id: saptarshi-ghosh-cincinnati - degree: University of Cincinnati - orcid: 0009-0006-9472-7121 -- canonical: {first: Saptarshi, last: Ghosh} - comment: May refer to several people - id: saptarshi-ghosh -- canonical: {first: Mayank, last: Singh} - comment: University of Arizona - id: mayank-singh-az -- canonical: {first: Mayank, last: Singh} - comment: May refer to several people - id: mayank-singh -- canonical: {first: Takumi, last: Goto} - variants: - - {first: Takumi, last: Gotou} - id: 0009-0006-8124-899X - degree: Nara Institute of Science and Technology -- canonical: {first: Muhammad N., last: ElNokrashy} - id: muhammad-elnokrashy - variants: - - {first: Muhammad, last: ElNokrashy} - - {first: Muhammad Nael, last: ElNokrashy} -- canonical: {first: Nishat, last: Raihan} - orcid: 0000-0001-6242-398X - variants: - - {first: Md Nishat, last: Raihan} -- canonical: {first: Ona, last: de Gibert} - id: ona-de-gibert - variants: - - {first: Ona, last: de Gibert Bonet} - orcid: 0000-0002-7163-4807 - degree: University of Helsinki, Finland -- canonical: {first: Wenzheng, last: Zhang} - comment: Rutgers University - orcid: 0009-0009-2578-9224 - id: wenzheng-zhang-ru -- canonical: {first: Wenzheng, last: Zhang} - comment: May refer to several people - id: wenzheng-zhang -- canonical: {first: Zhengyan, last: Shi} - orcid: 0000-0003-3074-3035 - degree: University College London - variants: - - {first: Zhengxiang, last: Shi} -- canonical: {first: Shu, last: Yang} - comment: University of British Columbia - orcid: 0000-0002-8507-7191 - id: shu-yang-ubc -- canonical: {first: Shu, last: Yang} - comment: May refer to several people - id: shu-yang -- canonical: {first: Chen, last: Cecilia Liu} - id: chen-cecilia-liu - orcid: 0009-0004-2382-8609 - comment: Technische Universität Darmstadt -- canonical: {first: Chen, last: Liu} - comment: May refer to several people - id: chen-liu -- canonical: {first: Li, last: Lin} - degree: Peking University - orcid: 0009-0008-5072-5022 - id: li-lin-pku -- canonical: {first: Li, last: Lin} - comment: May refer to multiple people - id: li-lin -- canonical: {first: Junyu, last: Luo} - degree: Peking University - orcid: 0009-0001-6894-1144 - id: junyu-luo-pu -- canonical: {first: Junyu, last: Luo} - comment: May refer to multiple people - id: junyu-luo -- canonical: {first: Zhihao, last: Wang} - degree: Xiamen University - orcid: 0009-0008-7497-6467 - id: zhihao-wang-xu -- canonical: {first: Zhihao, last: Wang} - comment: May refer to multiple people - id: zhihao-wang -- canonical: {first: Ryan, last: Boyd} - orcid: 0000-0002-1876-6050 - degree: University of Texas at Austin - variants: - - {first: Ryan L., last: Boyd} -- canonical: {first: Qi, last: Li} - degree: University at Buffalo - orcid: 0000-0002-3136-2157 - id: qi-li-ub -- canonical: {first: Qi, last: Li} - comment: May refer to multiple people - id: qi-li -- canonical: {first: Zhihan, last: Zhang} - degree: Singapore Management University - orcid: 0009-0009-5813-9172 - id: zhihan-zhang-smu -- canonical: {first: Zhihan, last: Zhang} - comment: May refer to multiple people - id: zhihan-zhang -- canonical: {first: Ning, last: Liu} - degree: Tsinghua University - orcid: 0000-0001-7475-9739 - id: ning-liu-tsinghua -- canonical: {first: Ning, last: Liu} - comment: May refer to multiple people - id: ning-liu -- canonical: {first: Changye, last: Li} - degree: University of Minnesota - orcid: 0000-0002-9743-7406 - id: changye-li-umn -- canonical: {first: Changye, last: Li} - comment: May refer to multiple people - id: changye-li -- canonical: {first: Ya, last: Li} - degree: Chinese Academy of Sciences - orcid: 0000-0002-6284-5039 - id: ya-li-cas -- canonical: {first: Ya, last: Li} - comment: May refer to multiple people - id: ya-li -- canonical: {first: Yue, last: Li} - degree: East China Normal University - orcid: 0009-0005-5509-2103 - id: yue-li-ecnu -- canonical: {first: Yue, last: Li} - comment: May refer to multiple people - id: yue-li -- canonical: {first: Lu, last: Xu} - degree: Sapienza University of Rome - orcid: 0000-0002-5660-3631 - id: lu-xu-uniroma1 -- canonical: {first: Lu, last: Xu} - comment: May refer to multiple people - id: lu-xu -- canonical: {first: Jiahao, last: Yuan} - degree: East China Normal University - orcid: 0009-0002-6194-450X - id: jiahao-yuan-ecnu -- canonical: {first: Jiahao, last: Yuan} - comment: May refer to multiple people - id: jiahao-yuan -- canonical: {first: Chong, last: Zhang} - degree: Xi'an Jiaotong-Liverpool University - orcid: 0009-0003-2020-6989 - id: chong-zhang-xjtlu -- canonical: {first: Chong, last: Zhang} - comment: May refer to multiple people - id: chong-zhang -- canonical: {first: Xinpeng, last: Wang} - degree: Ludwig Maximilian University of Munich (LMU) - orcid: 0009-0006-5213-1119 - id: xinpeng-wang-lmu -- canonical: {first: Xinpeng, last: Wang} - comment: May refer to multiple people - id: xinpeng-wang -- canonical: {first: Shengjie, last: Li} - comment: University of Texas at Dallas - id: shengjie-li - orcid: 0000-0002-5442-5464 -- canonical: {first: Shengjie, last: Li} - id: shengjie-li-peking - comment: Peking University - orcid: 0000-0003-3489-9125 -- canonical: {first: Shashank, last: Gupta} - id: shashank-gupta-uiuc - orcid: 0000-0002-3683-3739 - institution: University of Illinois at Urbana-Champaign -- canonical: {first: Shashank, last: Gupta} - id: shashank-gupta - comment: "May refer to several people" -- canonical: {first: Chen, last: Zhang} - id: chen-zhang-peking - orcid: 0000-0001-5842-0516 - institution: Peking University -- canonical: {first: Chen, last: Zhang} - id: chen-zhang - comment: May refer to several people diff --git a/data/yaml/people.yaml b/data/yaml/people.yaml new file mode 100644 index 0000000000..91d7f9480b --- /dev/null +++ b/data/yaml/people.yaml @@ -0,0 +1,15340 @@ +a-akilandeswari: + names: + - {first: A., last: Akilandeswari} + - {first: Akilandeswari, last: A} +a-j-m-szanser: + names: + - {first: A.J.M., last: Szanser} + - {first: A.J., last: Szanser} +a-kumaran: + names: + - {first: A, last: Kumaran} + - {first: A., last: Kumaran} +aarno-lehtola: + names: + - {first: Aarno, last: Lehtola} + - {first: A., last: Lehtola} +aaron-j-masino: + names: + - {first: Aaron J., last: Masino} + - {first: Aaron, last: Masino} +abdelhak-mouradi: + names: + - {first: Abdelhak, last: Mouradi} + - {first: A., last: Mouradi} +abdelmajid-ben-hamadou: + names: + - {first: Abdelmajid, last: Ben Hamadou} + - {first: Abdelmajid, last: Ben hamadou} + - {first: Abdelmajid, last: Benhamadou} + - {first: Abdelmajid-Lin, last: Ben Hamadou} +abderrahim-benabbou: + names: + - {first: Abderrahim, last: Benabbou} + - {first: A., last: Benabbou} +abdessalam-bouchekif: + names: + - {first: Abdessalam, last: Bouchekif} + - {first: Abdesselam, last: Bouchekif} +abdul-baquee-sharaf: + names: + - {first: Abdul-Baquee, last: Sharaf} + - {first: Abdul-Baquee M., last: Sharaf} +abe-ittycheriah: + names: + - {first: Abe, last: Ittycheriah} + - {first: A., last: Ittycheriah} +abhay-l-kashyap: + names: + - {first: Abhay, last: L. Kashyap} + - {first: Abhay, last: Kashyap} +abhyuday-jagannatha: + names: + - {first: Abhyuday, last: Jagannatha} + - {first: Abhyuday N, last: Jagannatha} +abigail-s-gertner: + names: + - {first: Abigail S., last: Gertner} + - {first: Abigail, last: Gertner} +achille-fokoue-nkoutche: + names: + - {first: Achille, last: Fokoue-Nkoutche} + - {first: Achille, last: Fokoue} +achla-m-raina: + names: + - {first: Achla M., last: Raina} + - {first: Achla, last: Raina} + - {first: Achla M, last: Raina} +adam-berger: + names: + - {first: Adam, last: Berger} + - {first: Adam L., last: Berger} +adam-cheyer: + names: + - {first: Adam, last: Cheyer} + - {first: A., last: Cheyer} +adam-liska: + names: + - {first: Adam, last: Liska} + - {first: Adam, last: Liška} +adam-meyers: + names: + - {first: Adam, last: Meyers} + - {first: A., last: Meyers} +adams-b-bodomo: + names: + - {first: Adams B., last: Bodomo} + - {first: Adams, last: Bodomo} +adele-goldberg: + names: + - {first: Adele, last: Goldberg} + - {first: Adele E., last: Goldberg} +adeline-nazarenko: + names: + - {first: Adeline, last: Nazarenko} + - {first: Adeline, last: Nazarenko-Perrin} + - {first: A., last: Nazarenko} +adi-shalev: + names: + - {first: Adi, last: Shalev} + - {first: Adi, last: Bitan} +adil-al-kufaishi: + names: + - {first: Adil, last: Al-Kufaishi} + - {first: A., last: Al-Kufaishi} +adil-el-ghali: + names: + - {first: Adil, last: El Ghali} + - {first: Adil, last: El-Ghali} +adolfo-hernandez-h: + names: + - {first: Adolfo, last: Hernández H.} + - {first: Adolfo, last: Hernández} +adoram-erell: + names: + - {first: Adoram, last: Erell} + - {first: A., last: Erell} +adria-de-gispert: + names: + - {first: Adrià, last: de Gispert} + - {first: Adrià, last: Gispert} + - {first: Adrià, last: De Gispert} +adria-torrens-urrutia: + names: + - {first: Adrià, last: Torrens Urrutia} + - {first: Adrià, last: Torrens-Urrutia} +adrian-brasoveanu: + names: + - {first: Adrian, last: Braşoveanu} + - {first: Adrian, last: Brasoveanu} +adriana-badulescu: + names: + - {first: Adriana, last: Badulescu} + - {first: Adriana, last: Bădulescu} +adwait-ratnaparkhi: + names: + - {first: Adwait, last: Ratnaparkhi} + - {first: A., last: Ratnaparkhi} +aesun-yoon: + names: + - {first: Aesun, last: Yoon} + - {first: Ae sun, last: Yoon} + - {first: Ae-Sun, last: Yoon} +agata-cybulska: + names: + - {first: Agata, last: Cybulska} + - {first: Agata Katarzyna, last: Cybulska} +agha-ali-raza: + names: + - {first: Agha Ali, last: Raza} + - {first: Agha, last: Raza} +agnes-sandor: + names: + - {first: Ágnes, last: Sándor} + - {first: Agnes, last: Sandor} +agnes-tutin: + names: + - {first: Agnès, last: Tutin} + - {first: Agnes, last: Tutin} +agnieszka-falenska: + names: + - {first: Agnieszka, last: Falenska} + - {first: Agnieszka, last: Faleńska} +agusti-lloberas: + names: + - {first: Agusti, last: Lloberas} + - {first: Agusti, last: LLoberas} +agustin-gravano: + names: + - {first: Agustin, last: Gravano} + - {first: Agustín, last: Gravano} +ahmed-aburaed: + names: + - {first: Ahmed, last: AbuRa’ed} + - {first: Ahmed, last: Abura’ed} +ahmed-hassan: + names: + - {first: Ahmed, last: Hassan} + - {first: Ahmed Hassan, last: Awadallah} +ahmed-ragheb: + names: + - {first: Ahmed, last: Ragheb} + - {first: A., last: Ragheb} +ahmet-cuneyd-tantug: + names: + - {first: Ahmet Cüneyd, last: Tantuğ} + - {first: A. Cüneyd, last: Tantuǧ} +aicha-bouhjar: + names: + - {first: Aicha, last: Bouhjar} + - {first: Aïcha, last: Bouhjar} +aimilios-chalamandaris: + names: + - {first: Aimilios, last: Chalamandaris} + - {first: Chalamandaris, last: Aimilios} + - {first: A., last: Chalamandaris} +aina-gari-soler: + names: + - {first: Aina, last: Garí Soler} + - {first: Aina Garí, last: Soler} +aingeru-mayor: + names: + - {first: Aingeru, last: Mayor} + - {first: A., last: Mayor} +aiti-aw: + names: + - {first: Aiti, last: Aw} + - {first: AiTi, last: Aw} + - {first: Ai Ti, last: Aw} +aitor-alvarez: + names: + - {first: Aitor, last: Álvarez} + - {first: Aitor, last: Arronte Álvarez} +aitor-gonzalez-agirre: + names: + - {first: Aitor, last: González-Agirre} + - {first: Aitor, last: Gonzalez-Agirre} +aitor-sologaistoa: + names: + - {first: Aitor, last: Sologaistoa} + - {first: A., last: Sologaistoa} +aitor-soroa: + names: + - {first: Aitor, last: Soroa} + - {first: Aitor, last: Soroa Etxabe} + - {first: A., last: Soroa} +akash-kumar-gautam: + names: + - {first: Akash Kumar, last: Gautam} + - {first: Akash, last: Gautam} +akiko-aizawa: + names: + - {first: Akiko, last: Aizawa} + - {first: Akiko N., last: Aizawa} +akshar-bharati: + names: + - {first: Akshar, last: Bharati} + - {first: Akshar, last: Bharathi} +alain-polguere: + names: + - {first: Alain, last: Polguère} + - {first: Alain, last: Polguere} + - {first: A., last: Polguere} +alan-k-melby: + names: + - {first: Alan K., last: Melby} + - {first: Alan, last: Melby} +alan-r-aronson: + names: + - {first: Alan R., last: Aronson} + - {first: Alan, last: Aronson} +alan-w-biermann: + names: + - {first: Alan W., last: Biermann} + - {first: Alan, last: Biermann} + - {first: A., last: Biermann} +alan-w-black: + names: + - {first: Alan W., last: Black} + - {first: Alan, last: Black} + - {first: Alan W, last: Black} + - {first: A.W., last: Black} +alan-wallington: + names: + - {first: Alan, last: Wallington} + - {first: Alan M., last: Wallington} + - {first: A.M., last: Wallington} +albert-a-rizzo: + names: + - {first: Albert A., last: Rizzo} + - {first: Albert, last: Rizzo} + - {first: Skip, last: Rizzo} + - {first: Albert Skip, last: Rizzo} +albert-m-lai: + names: + - {first: Albert M., last: Lai} + - {first: Albert, last: Lai} + - {first: Albert M, last: Lai} +albert-russel: + names: + - {first: Albert, last: Russel} + - {first: A., last: Russel} +alberto-bugarin-diz: + names: + - {first: Alberto, last: Bugarín Diz} + - {first: Alberto, last: Bugarín} + - {first: Alberto, last: Bugarin} +alberto-garcia-duran: + names: + - {first: Alberto, last: Garcia-Duran} + - {first: Alberto, last: García-Durán} +alberto-lavelli: + names: + - {first: Alberto, last: Lavelli} + - {first: A., last: Lavelli} +alberto-maritxalar: + names: + - {first: Alberto, last: Maritxalar} + - {first: A., last: Maritxalar} +albino-nogueiras: + names: + - {first: Albino, last: Nogueiras} + - {first: Albino Nogueiras, last: Rodriguez} +alejandro-h-toselli: + names: + - {first: Alejandro H., last: Toselli} + - {first: Alejandro Héctor, last: Toselli} +aleksandra-zogling-markus: + names: + - {first: Aleksandra Zögling, last: Markuš} + - {first: Aleksandra, last: Zögling} +alena-bohmova: + names: + - {first: Alena, last: Bŏhmová} + - {first: Alena, last: Bohmova} + - {first: Alena, last: Böhmová} +alessandro-mazzei: + names: + - {first: Alessandro, last: Mazzei} + - {first: A, last: Mazzei} +alex-chengyu-fang: + names: + - {first: Alex Chengyu, last: Fang} + - {first: Alex C., last: Fang} +alex-fine: + names: + - {first: Alex, last: Fine} + - {first: Alex B., last: Fine} +alex-klassmann: + names: + - {first: Alex, last: Klassmann} + - {first: Alexander, last: Klassmann} +alex-liu: + names: + - {first: Alex, last: Liu} + - {first: Alexander, last: Liu} +alex-moruz: + names: + - {first: Alex, last: Moruz} + - {first: Mihai Alex, last: Moruz} +alex-r-atrio: + names: + - {first: Àlex R., last: Atrio} + - {first: Àlex, last: Atrio} +alex-rudnick: + names: + - {first: Alex, last: Rudnick} + similar: + - alexander-rudnicky +alex-waibel: + names: + - {first: Alex, last: Waibel} + - {first: Alexander, last: Waibel} + - {first: A., last: Waibel} +alexa-n-little: + names: + - {first: Alexa N., last: Little} + - {first: Alexa, last: Little} +alexander-andreyewsky: + comment: IBM + names: + - {first: Alexander, last: Andreyewsky} + - {first: A., last: Andreyewsky} + similar: + - alexandre-andreewsky +alexander-berg: + names: + - {first: Alexander, last: Berg} + - {first: Alex, last: Berg} + - {first: Alexander C, last: Berg} +alexander-franz: + names: + - {first: Alexander, last: Franz} + - {first: Alexander M., last: Franz} +alexander-fraser: + names: + - {first: Alexander, last: Fraser} + - {first: Alex, last: Fraser} +alexander-g-hauptmann: + names: + - {first: Alexander G., last: Hauptmann} + - {first: Alex, last: Hauptmann} + - {first: Alexander, last: Hauptmann} +alexander-gelbukh: + names: + - {first: Alexander, last: Gelbukh} + - {first: Alexander F., last: Gelbukh} +alexander-m-rush: + names: + - {first: Alexander M., last: Rush} + - {first: Alexander, last: Rush} +alexander-richard-fabbri: + names: + - {first: Alexander, last: Richard Fabbri} + - {first: Alexander R., last: Fabbri} + - {first: Alexander, last: Fabbri} +alexander-rudnicky: + names: + - {first: Alexander, last: Rudnicky} + - {first: Alexander I., last: Rudnicky} + - {first: Alex, last: Rudnicky} + - {first: A., last: Rudnicky} + similar: + - alex-rudnick +alexander-vasserman: + names: + - {first: Alexander, last: Vasserman} + - {first: Alex, last: Vasserman} +alexander-yeh: + names: + - {first: Alexander, last: Yeh} + - {first: Alexander S., last: Yeh} + - {first: Alex, last: Yeh} +alexandra-balahur: + names: + - {first: Alexandra, last: Balahur} + - {first: Alexandra, last: Balahur-Dobrescu} +alexandra-l-uitdenbogerd: + names: + - {first: Alexandra L., last: Uitdenbogerd} + - {first: Alexandra, last: Uitdenbogerd} +alexandre-andreewsky: + comment: LIMSI + names: + - {first: Alexandre, last: Andreewsky} + - {first: A., last: Andreewsky} + similar: + - alexander-andreyewsky +alexandre-bouchard-cote: + names: + - {first: Alexandre, last: Bouchard-Côté} + - {first: Alexandre, last: Bouchard} +alexandre-denis: + names: + - {first: Alexandre, last: Denis} + - {first: A., last: Denis} +alexandre-rossi-alvares: + names: + - {first: Alexandre, last: Rossi Alvares} + - {first: Alexandre Rossi, last: Alvares} +alexandre-termier: + names: + - {first: Alexandre, last: Termier} + - {first: A., last: Termier} +alexandros-papangelis: + names: + - {first: Alexandros, last: Papangelis} + - {first: Alex, last: Papangelis} +alexandru-ceausu: + names: + - {first: Alexandru, last: Ceauşu} + - {first: Alexandru, last: Ceausu} +alexandru-lucian-ginsca: + names: + - {first: Alexandru-Lucian, last: Ginsca} + - {first: Alexandru, last: Ginsca} + - {first: Alexandru-Lucian, last: Gînscă} +alexei-v-ivanov: + names: + - {first: Alexei V., last: Ivanov} + - {first: Alexei, last: Ivanov} +alexis-konstantinidis: + names: + - {first: Alexis, last: Konstantinidis} + - {first: Alexis, last: Konstandinidis} +alexis-manaster-ramer: + names: + - {first: Alexis, last: Manaster-Ramer} + - {first: Alexis, last: Manaster Ramer} +alfio-gliozzo: + names: + - {first: Alfio, last: Gliozzo} + - {first: Alfio, last: Massimiliano Gliozzo} + - {first: Alfio Massimiliano, last: Gliozzo} + - {first: Alfio M., last: Gliozzo} +alfons-juan: + names: + - {first: Alfons, last: Juan} + - {first: Alfons, last: Juan-Císcar} +alfonso-mendes: + names: + - {first: Alfonso, last: Mendes} + - {first: Afonso, last: Mendes} +alfredo-maldonado: + names: + - {first: Alfredo, last: Maldonado} + - {first: Alfredo, last: Maldonado Guerra} + - {first: Alfredo, last: Maldonado-Guerra} +ali-hadian-cefidekhanie: + names: + - {first: Ali Hadian, last: Cefidekhanie} + - {first: Ali, last: Hadian} +ali-hurriyetoglu: + names: + - {first: Ali, last: Hürriyetoğlu} + - {first: Ali, last: Hurriyetoglu} + - {first: Ali, last: Hürriyetoǧlu} +alice-oh: + names: + - {first: Alice, last: Oh} + - {first: Alice H., last: Oh} +alina-beatrice-lorent: + names: + - {first: Alina Beatrice, last: Lorent} + - {first: Alina Beatrice, last: Lorenţ} + - {first: Alina, last: Lorenț} +alina-maria-ciobanu: + names: + - {first: Alina Maria, last: Ciobanu} + - {first: Alina, last: Ciobanu} +aline-a-vanin: + names: + - {first: Aline A., last: Vanin} + - {first: Aline, last: Vanin} +alipio-jorge: + names: + - {first: Alipio, last: Jorge} + - {first: Alípio, last: Jorge} +allen-b-tucker: + names: + - {first: Allen B., last: Tucker} + - {first: Allen, last: Tucker} +allen-l-gorin: + names: + - {first: Allen L., last: Gorin} + - {first: Allen, last: Gorin} +almut-silja-hildebrand: + names: + - {first: Almut Silja, last: Hildebrand} + - {first: Silja, last: Hildebrand} + - {first: Almut, last: Hildebrand} +alon-lavie: + names: + - {first: Alon, last: Lavie} + - {first: A., last: Lavie} +alvaro-peris: + names: + - {first: Álvaro, last: Peris} + - {first: Alvaro, last: Peris} +alvaro-rodrigo: + names: + - {first: Álvaro, last: Rodrigo} + - {first: Alvaro, last: Rodrigo} +alvin-cheng-hsien-chen: + names: + - {first: Alvin Cheng-Hsien, last: Chen} + - {first: Cheng-Hsien, last: Chen} +alvin-martin: + names: + - {first: Alvin, last: Martin} + - {first: Alvin F., last: Martin} +amac-herdagdelen: + names: + - {first: Amaç, last: Herdaǧdelen} + - {first: Amaç, last: Herdağdelen} +amal-al-saif: + names: + - {first: Amal, last: Al-Saif} + - {first: Amal, last: Alsaif} +amalia-todirascu: + names: + - {first: Amalia, last: Todirascu} + - {first: Amalia, last: Todiraşcu} +amanda-c-jobbins: + names: + - {first: Amanda C., last: Jobbins} + - {first: A.C., last: Jobbins} +amanda-stent: + names: + - {first: Amanda, last: Stent} + - {first: Amanda J., last: Stent} + - {first: A., last: Stent} +amarnag-subramanya: + names: + - {first: Amarnag, last: Subramanya} + - {first: Amar, last: Subramanya} +amber-boydstun: + names: + - {first: Amber, last: Boydstun} + - {first: Amber E., last: Boydstun} +amedeo-cappelli: + names: + - {first: Amedeo, last: Cappelli} + - {first: A., last: Cappelli} +amelie-wuhrl: + names: + - {first: Amelie, last: Wührl} + - {first: Amelie, last: Wuehrl} +aminul-islam: + names: + - {first: Aminul, last: Islam} + - {first: Md. Aminul, last: Islam} +amparo-elizabeth-cano-basave: + names: + - {first: Amparo Elizabeth, last: Cano Basave} + - {first: Amparo Elizabeth, last: Cano-Basave} +amy-weinberg: + names: + - {first: Amy, last: Weinberg} + - {first: Amy S., last: Weinberg} +amy-winarske: + names: + - {first: Amy, last: Winarske} + - {first: A., last: Winarske} +an-ta-huang: + names: + - {first: An-Ta, last: Huang} + - {first: Anta, last: Huang} +ana-cristina-mendes: + names: + - {first: Ana Cristina, last: Mendes} + - {first: Ana C., last: Mendes} + - {first: Ana, last: Mendes} +ana-fernandez: + names: + - {first: Ana, last: Fernandez} + - {first: Ana Fernández, last: Montraveta} + - {first: Ana, last: Fernández-Montraveta} +ana-garcia-serrano: + names: + - {first: Ana, last: García-Serrano} + - {first: Ana M., last: García-Serrano} +ana-gonzalez-ledesma: + names: + - {first: Ana, last: González-Ledesma} + - {first: Ana, last: Gonzalez} +anais-cadilhac: + names: + - {first: Anais, last: Cadilhac} + - {first: Anaïs, last: Cadilhac} +anais-lefeuvre: + names: + - {first: Anaïs, last: Lefeuvre} + - {first: Anaïs, last: Lefeuvre-Haftermeyer} +anand-kumar-m: + names: + - {first: Anand, last: Kumar M} + - {first: Anand Kumar, last: Madasamy} + - {first: Anand Kumar, last: M} +ananth-ramakrishnan-a: + names: + - {first: Ananth, last: Ramakrishnan A.} + - {first: Ananth, last: Ramakrishnan A} +anas-el-isbihani: + names: + - {first: Anas El, last: Isbihani} + - {first: Anas, last: El Isbihani} +anatoli-starostin: + names: + - {first: Anatoli, last: Starostin} + - {first: Anatoly, last: Starostin} +anca-roxana-simon: + names: + - {first: Anca-Roxana, last: Simon} + - {first: Anca, last: Simon} + - {first: Anca-Roxana, last: Şimon} +anders-johannsen: + names: + - {first: Anders, last: Johannsen} + - {first: Anders, last: Johanssen} +anders-sogaard: + names: + - {first: Anders, last: Søgaard} + - {first: Anders, last: Sogaard} +andoni-valverde: + names: + - {first: Andoni, last: Valverde} + - {first: A., last: Valverde} +andras-csomai: + names: + - {first: Andras, last: Csomai} + - {first: András, last: Csomai} +andras-kocsor: + names: + - {first: Andras, last: Kocsor} + - {first: András, last: Kocsor} +andre-blessing: + names: + - {first: André, last: Blessing} + - {first: Andre, last: Blessing} +andre-f-t-martins: + names: + - {first: André F. T., last: Martins} + - {first: Andre, last: Martins} + - {first: André, last: Martins} +andre-freitas: + names: + - {first: André, last: Freitas} + - {first: Andre, last: Freitas} +andre-kempe: + names: + - {first: Andre, last: Kempe} + - {first: André, last: Kempe} +andre-lamurias: + names: + - {first: André, last: Lamúrias} + - {first: Andre, last: Lamurias} +andre-mansikkaniemi: + names: + - {first: Andre, last: Mansikkaniemi} + - {first: André, last: Mansikkaniemi} +andre-mariotti: + names: + - {first: Andre, last: Mariotti} + - {first: André, last: Mariotti} +andre-valli: + names: + - {first: Andre, last: Valli} + - {first: André, last: Valli} +andrea-bolognesi: + names: + - {first: Andrea, last: Bolognesi} + - {first: A., last: Bolognesi} +andrea-setzer: + names: + - {first: Andrea, last: Setzer} + - {first: A., last: Setzer} +andreas-eisele: + names: + - {first: Andreas, last: Eisele} + - {first: A., last: Eisele} +andreas-kustner: + names: + - {first: Andreas, last: Kustner} + - {first: A., last: Kustner} +andreas-persidis: + names: + - {first: Andreas, last: Persidis} + - {first: A., last: Persidis} +andreas-soeborg-kirkedal: + names: + - {first: Andreas Søeborg, last: Kirkedal} + - {first: Andreas, last: Søeborg Kirkedal} +andreas-stolcke: + names: + - {first: Andreas, last: Stolcke} + - {first: A., last: Stolcke} +andrei-butnaru: + names: + - {first: Andrei, last: Butnaru} + - {first: Andrei M., last: Butnaru} +andrei-popescu-belis: + names: + - {first: Andrei, last: Popescu-Belis} + - {first: Andrei, last: Popescu Belis} + - {first: A., last: Popescu-Belis} +andrej-ljolje: + names: + - {first: Andrej, last: Ljolje} + - {first: A., last: Ljolje} + - {first: A, last: Ljolje} +andrejs-vasiljevs: + names: + - {first: Andrejs, last: Vasiļjevs} + - {first: Andrejs, last: Vasiljevs} +andres-marzal: + names: + - {first: Andrés, last: Marzal} + - {first: A., last: Marzal} +andres-montoyo: + names: + - {first: Andrés, last: Montoyo} + - {first: Andres, last: Montoyo} + - {first: Andrés, last: Montoyo Guijarro} +andrew-b-goldberg: + names: + - {first: Andrew B., last: Goldberg} + - {first: Andrew, last: Goldberg} +andrew-bennett: + names: + - {first: Andrew, last: Bennett} + similar: + - andrew-bennetts +andrew-bennetts: + names: + - {first: Andrew, last: Bennetts} + similar: + - andrew-bennett +andrew-boyd: + names: + - {first: Andrew, last: Boyd} + - {first: Andrew D., last: Boyd} +andrew-brasher: + names: + - {first: Andrew, last: Brasher} + - {first: A., last: Brasher} +andrew-david-beale: + names: + - {first: Andrew David, last: Beale} + - {first: Andrew, last: David} +andrew-gordon: + names: + - {first: Andrew, last: Gordon} + - {first: Andrew S., last: Gordon} +andrew-haas: + names: + - {first: Andrew, last: Haas} + - {first: Andrew R., last: Haas} +andrew-j-anderson: + names: + - {first: Andrew J., last: Anderson} + - {first: Andrew, last: Anderson} +andrew-kehler: + names: + - {first: Andrew, last: Kehler} + - {first: Andy, last: Kehler} +andrew-l-kun: + names: + - {first: Andrew L., last: Kun} + - {first: Andrew, last: Kun} +andrew-maas: + names: + - {first: Andrew, last: Maas} + - {first: Andrew L., last: Maas} +andrew-mackinlay: + names: + - {first: Andrew, last: MacKinlay} + - {first: Andrew, last: McKinlay} +andrew-olney: + names: + - {first: Andrew, last: Olney} + - {first: Andrew M., last: Olney} +andrew-smith: + names: + - {first: Andrew, last: Smith} + - {first: Andrew E., last: Smith} +andrew-w-cole: + names: + - {first: Andrew W., last: Cole} + - {first: Andrew, last: Cole} +andrew-wilson: + names: + - {first: Andrew, last: Wilson} + - {first: Andrew T., last: Wilson} +andrew-y-ng: + names: + - {first: Andrew Y., last: Ng} + - {first: Andrew, last: Ng} +angel-chang: + names: + - {first: Angel, last: Chang} + - {first: Angel X., last: Chang} +angel-de-la-torre: + names: + - {first: Ángel, last: De la Torre} + - {first: A., last: De la Torre} +angela-costa: + names: + - {first: Angela, last: Costa} + - {first: Ângela, last: Costa} +angels-egea: + names: + - {first: Angels, last: Egea} + - {first: Àngels, last: Egea} +angrosh-mandya: + names: + - {first: Angrosh, last: Mandya} + - {first: Mandya, last: Angrosh} +anil-kumar-nelakanti: + names: + - {first: Anil Kumar, last: Nelakanti} + - {first: Anil, last: Kumar} +anil-kumar-singh: + names: + - {first: Anil Kumar, last: Singh} + - {first: Anil, last: Kumar Singh} +animashree-anandkumar: + names: + - {first: Animashree, last: Anandkumar} + - {first: Anima, last: Anandkumar} +anirudh-sundar: + names: + - {first: Anirudh, last: Sundar} + - {first: Anirudh S., last: Sundar} + - {first: Anirudh S, last: Sundar} +anish-das-sarma: + names: + - {first: Anish Das, last: Sarma} + - {first: Atish Das, last: Sarma} +anita-lilla-vero: + names: + - {first: Anita Lilla, last: Verő} + - {first: Anita Lilla, last: Vero} +anja-belz: + names: + - {first: Anja, last: Belz} + - {first: Anya, last: Belz} +anja-hoethker: + names: + - {first: Anja, last: Hoethker} + - {first: Anja, last: Höthker} +ankit-srivastava: + names: + - {first: Ankit, last: Srivastava} + - {first: Ankit Kumar, last: Srivastava} + - {first: Ankit K., last: Srivastava} + - {first: Ankit, last: Kumar} +ankur-parikh: + names: + - {first: Ankur, last: Parikh} + - {first: Ankur P., last: Parikh} +anna-jonsson-umea: + comment: Umeå University + disable_name_matching: true + names: + - {first: Anna, last: Jonsson} + orcid: 0000-0002-9873-4170 +anna-kupsc: + names: + - {first: Anna, last: Kupść} + - {first: Anna, last: Kupsc} +anna-n-rafferty: + names: + - {first: Anna N., last: Rafferty} + - {first: Anna, last: Rafferty} +anna-sagvall-hein: + names: + - {first: Anna, last: Sågvall Hein} + - {first: Anna Sagvall, last: Hein} + - {first: Anna Sågvall, last: Hein} +annalu-waller: + names: + - {first: Annalu, last: Waller} + - {first: A., last: Waller} +anne-abeille: + names: + - {first: Anne, last: Abeillé} + - {first: Anne, last: Abeille} +anne-de-roeck: + names: + - {first: Anne, last: De Roeck} + - {first: Anne, last: DeRoeck} + - {first: Anne, last: de Roeck} + - {first: Anne, last: deRoeck} + - {first: A.N., last: De Roeck} + - {first: Anne N., last: De Roeck} +anne-gohring: + names: + - {first: Anne, last: Göhring} + - {first: Anne, last: Goehring} +anne-h-anderson: + names: + - {first: Anne H., last: Anderson} + - {first: Anne, last: Anderson} +anne-haake: + names: + - {first: Anne, last: Haake} + - {first: Anne R., last: Haake} +anne-kuhn: + names: + - {first: Anne, last: Kuhn} + - {first: A., last: Kuhn} +anne-lacheret: + names: + - {first: Anne, last: Lacheret} + - {first: Anne, last: Lacheret-Dujour} +annelies-braffort: + names: + - {first: Annelies, last: Braffort} + - {first: A., last: Braffort} +annette-hautli: + names: + - {first: Annette, last: Hautli} + - {first: Annette, last: Hautli-Janisz} +annette-ostling-andersson: + names: + - {first: Annette, last: Östling Andersson} + - {first: Annette, last: Östling} +annette-rios-gonzales: + names: + - {first: Annette, last: Rios Gonzales} + - {first: Annette, last: Rios} +annick-choisier: + names: + - {first: Annick, last: Choisier} + - {first: A., last: Choisier} +annick-corluy: + names: + - {first: Annick, last: Corluy} + - {first: A., last: Corluy} +ansaf-salleb-aouissi: + names: + - {first: Ansaf, last: Salleb-Aouissi} + - {first: Ansaf, last: Salleb-Aoussi} +anselmo-penas: + names: + - {first: Anselmo, last: Peñas} + - {first: Anselmo, last: Penas} +antal-van-den-bosch: + names: + - {first: Antal, last: van den Bosch} + - {first: Antal, last: Van den Bosch} + - {first: Antal, last: Van Den Bosch} +anthony-hartley: + names: + - {first: Anthony, last: Hartley} + - {first: Anthony F., last: Hartley} + - {first: A., last: Hartley} +anthony-hughes: + names: + - {first: Anthony, last: Hughes} + - {first: Anthony James, last: Hughes} + orcid: 0009-0003-4065-1094 +anthony-jameson: + names: + - {first: Anthony, last: Jameson} + - {first: A., last: Jameson} +anthony-kroch: + names: + - {first: Anthony, last: Kroch} + - {first: Anthony S., last: Kroch} +antje-rossdeutscher: + names: + - {first: Antje, last: Roßdeutscher} + - {first: Antje, last: Rossdeutscher} +anton-batliner: + names: + - {first: Anton, last: Batliner} + - {first: A., last: Batliner} +anton-karl-ingason: + names: + - {first: Anton Karl, last: Ingason} + - {first: Anton K., last: Ingason} +antonella-de-angeli: + names: + - {first: Antonella, last: De Angeli} + - {first: Antonella, last: DeAngeli} +antonio-bonafonte: + names: + - {first: Antonio, last: Bonafonte} + - {first: A., last: Bonafonte} +antonio-branco: + names: + - {first: António, last: Branco} + - {first: Antonio, last: Branco} + - {first: Antonio H., last: Branco} + - {first: António Horta, last: Branco} +antonio-cardenal: + names: + - {first: Antonio, last: Cardenal} + - {first: Antonio, last: Cardenal-Lopez} +antonio-castellanos: + names: + - {first: Antonio, last: Castellanos} + - {first: A., last: Castellanos} +antonio-fernandez-orquin: + names: + - {first: Antonio, last: Fernández Orquín} + - {first: Antonio, last: Fernandez Orquín} + - {first: Antonio, last: Fernández-Orquín} + - {first: Antonio, last: Fernández} +antonio-ferrandez: + names: + - {first: Antonio, last: Ferrández} + - {first: Antonio, last: Ferrandez} + - {first: A., last: Ferrandez} + - {first: A., last: Ferrández} +antonio-j-rubio: + names: + - {first: Antonio J., last: Rubio} + - {first: A.J., last: Rubio} +antonio-jimeno-yepes: + names: + - {first: Antonio, last: Jimeno Yepes} + - {first: Antonio Jimeno, last: Yepes} + - {first: Antonio José, last: Jimeno Yepes} + - {first: Antonio, last: Jimeno-Yepes} +antonio-l-lagarda: + names: + - {first: Antonio-L., last: Lagarda} + - {first: Antonio, last: Lagarda} + - {first: Antonio L., last: Lagarda} +antonio-moreno-ortiz: + comment: Univ. of Málaga + names: + - {first: Antonio, last: Moreno-Ortiz} + - {first: Antonio, last: Moreno Ortiz} + - {first: Antonio, last: Moreno} + similar: + - antonio-moreno-ribas + - antonio-moreno-sandoval +antonio-moreno-ribas: + comment: Univ. Rovira i Virgili + names: + - {first: Antonio, last: Moreno Ribas} + - {first: Antonio, last: Moreno} + similar: + - antonio-moreno-ortiz + - antonio-moreno-sandoval +antonio-moreno-sandoval: + comment: NYU, Univ. Autónoma de Madrid + names: + - {first: Antonio, last: Moreno-Sandoval} + - {first: Antonio Moreno, last: Sandoval} + - {first: Antonio, last: Moreno} + similar: + - antonio-moreno-ortiz + - antonio-moreno-ribas +antonio-pareja-lora: + names: + - {first: Antonio, last: Pareja Lora} + - {first: Antonio, last: Pareja-Lora} +antonio-rincon: + names: + - {first: Antonio, last: Rincón} + - {first: Antonio, last: Rincon} +antonio-s-valderrabanos: + names: + - {first: Antonio S., last: Valderrábanos} + - {first: Antonio S., last: Valderrabanos} +antonio-valerio-miceli-barone: + names: + - {first: Antonio Valerio, last: Miceli-Barone} + - {first: Antonio Valerio, last: Miceli Barone} +antonio-zampolli: + names: + - {first: Antonio, last: Zampolli} + - {first: A., last: Zampolli} +antti-veikko-rosti: + names: + - {first: Antti-Veikko, last: Rosti} + - {first: Antti-Veikko I., last: Rosti} +antton-gurrutxaga: + names: + - {first: Antton, last: Gurrutxaga} + - {first: A., last: Gurrutxaga} +anup-barman: + names: + - {first: Anup, last: Barman} + - {first: Anup Kr., last: Barman} +anup-kumar-kolya: + names: + - {first: Anup Kumar, last: Kolya} + - {first: Anup, last: Kumar Kolya} + - {first: Anup, last: Kolya} +aparna-nurani-venkitasubramanian: + names: + - {first: Aparna, last: Nurani Venkitasubramanian} + - {first: Aparna N., last: Venkitasubramanian} +arantza-casillas: + names: + - {first: Arantza, last: Casillas} + - {first: A., last: Casillas} +arantza-diaz-de-ilarraza: + names: + - {first: Arantza, last: Díaz de Ilarraza} + - {first: Arantza, last: Diaz de Ilarraza} + - {first: A, last: Diaz de Ilarraza} + - {first: A., last: Diaz de Ilarraza Sanchez} + - {first: A., last: Diaz de Ilarraza} + - {first: A., last: Díaz de Ilarraza} +aravind-joshi: + names: + - {first: Aravind, last: Joshi} + - {first: Aravind K., last: Joshi} + - {first: A., last: Joshi} + - {first: A.K., last: Joshi} + - {first: A. K., last: Joshi} + - {first: Aravin K., last: Joshi} +archibald-michiels: + names: + - {first: Archibald, last: Michiels} + - {first: A., last: Michiels} +ariadna-font-llitjos: + names: + - {first: Ariadna, last: Font Llitjós} + - {first: Ariadna, last: Font-Llitjos} + - {first: Ariadna, last: Font Llitjos} +ariadne-m-b-rizzoni-carvalho: + names: + - {first: Ariadne, last: M. B. Rizzoni Carvalho} + - {first: Ariadne M. B. R., last: Carvalho} +ariani-di-felippo: + names: + - {first: Ariani Di, last: Felippo} + - {first: Ariani, last: Di-Felippo} + - {first: Ariani, last: Di Felippo} +ariel-schwartz: + names: + - {first: Ariel, last: Schwartz} + - {first: Ariel S., last: Schwartz} +arienne-dwyer: + names: + - {first: Arienne, last: Dwyer} + - {first: A., last: Dwyer} +arjun-atreya-v: + names: + - {first: Arjun Atreya, last: V} + - {first: Arjun, last: Atreya V} + - {first: Arjun, last: Atreya} +arlindo-veiga: + names: + - {first: Arlindo, last: Veiga} + - {first: Arlindo O., last: Veiga} +armando-suarez: + names: + - {first: Armando, last: Suárez} + - {first: A., last: Suárez} +arnab-dhar: + names: + - {first: Arnab, last: Dhar} + - {first: Arnad, last: Dhar} +arnaldo-candido-jr: + names: + - {first: Arnaldo, last: 'Candido, Jr.'} + - {first: Arnaldo, last: Candido Jr.} + - {first: Arnaldo, last: Candido Jr} + - {first: Arnaldo, last: Candido} +arne-jonsson: + names: + - {first: Arne, last: Jönsson} + - {first: Arne, last: Jonsson} +artem-sokolov: + names: + - {first: Artem, last: Sokolov} + - {first: Artem, last: Sokokov} +artemis-parvizi: + names: + - {first: Artemis, last: Parvizi} + - {first: Artemis, last: Parvisi} +arthur-c-graesser: + names: + - {first: Arthur C., last: Graesser} + - {first: Art, last: Graesser} + - {first: Arthur, last: Graesser} +arturo-calvo-devesa: + names: + - {first: Arturo Calvo, last: Devesa} + - {first: Arturo, last: Calvo} +arturo-curiel: + names: + - {first: Arturo, last: Curiel} + - {first: Arturo, last: Curiel Díaz} +arturo-oncevay: + names: + - {first: Arturo, last: Oncevay} + - {first: Arturo, last: Oncevay-Marcos} +arturs-znotins: + names: + - {first: Arturs, last: Znotins} + - {first: Artūrs, last: Znotiņš} +arun-kumar-jayapal: + names: + - {first: Arun Kumar, last: Jayapal} + - {first: Arun, last: Jayapal} +arya-d-mccarthy: + names: + - {first: Arya D., last: McCarthy} + - {first: Arya, last: McCarthy} +arzucan-ozgur: + names: + - {first: Arzucan, last: Özgür} + - {first: Arzucan, last: Ozgur} +asad-sayeed: + names: + - {first: Asad, last: Sayeed} + - {first: Asad B., last: Sayeed} +ascension-gallardo-antolin: + names: + - {first: Ascension, last: Gallardo-Antolin} + - {first: Ascension, last: Gallardo} +ashish-v-tendulkar: + names: + - {first: Ashish V., last: Tendulkar} + - {first: Ashish, last: Tendulkar} +ashok-popat: + names: + - {first: Ashok, last: Popat} + - {first: Ashok C., last: Popat} +ashvin-kannan: + names: + - {first: Ashvin, last: Kannan} + - {first: A., last: Kannan} +asuncion-castano: + names: + - {first: Asunción, last: Castaño} + - {first: A., last: Castaño} +asuncion-gomez-perez: + names: + - {first: Asunción, last: Gómez-Pérez} + - {first: Asunción Gómez, last: Pérez} +asuncion-moreno: + names: + - {first: Asunción, last: Moreno} + - {first: Asuncion, last: Moreno} + - {first: Asuncíon, last: Moreno} + - {first: A., last: Moreno} +aswarth-abhilash-dara: + names: + - {first: Aswarth Abhilash, last: Dara} + - {first: Aswarth, last: Dara} +atanas-kiryakov: + names: + - {first: Atanas, last: Kiryakov} + - {first: Atanas K., last: Kiryakov} +atsuko-koizumi: + names: + - {first: Atsuko, last: Koizumi} + - {first: A., last: Koizumi} +atul-kr-ojha: + names: + - {first: Atul Kr., last: Ojha} + - {first: Atul Ku., last: Ojha} +audrey-le: + names: + - {first: Audrey, last: Le} + - {first: Audrey N., last: Le} +aurelie-herbelot: + names: + - {first: Aurélie, last: Herbelot} + - {first: Aurelie, last: Herbelot} +aurelie-neveol: + names: + - {first: Aurelie, last: Neveol} + - {first: Aurélie, last: Névéol} +avirup-sil: + names: + - {first: Avirup, last: Sil} + - {first: Avi, last: Sil} +ayisigi-b-sevdik-calli: + names: + - {first: Ayisigi B., last: Sevdik-Calli} + - {first: Ayişiği, last: Sevdik-Çalli} +ayush-kumar: + names: + - {first: Ayush, last: Kumar} + - {first: Kumar, last: Ayush} +azucena-montes-rendon: + names: + - {first: Azucena, last: Montes-Rendon} + - {first: Azucena, last: Montes} +baiba-saulite: + names: + - {first: Baiba, last: Saulīte} + - {first: Baiba, last: Saulite} +balamurali-ar: + names: + - {first: Balamurali, last: AR} + - {first: Balamurali, last: A.R.} + - {first: Balamurali, last: A.R} +balaraman-ravindran: + names: + - {first: Balaraman, last: Ravindran} + - {first: B., last: Ravindran} +balazs-kis: + names: + - {first: Balázs, last: Kis} + - {first: Balazs, last: Kis} +bali-ranaivo-malancon: + names: + - {first: Bali, last: Ranaivo-Malançon} + - {first: Bali, last: Ranaivo-Malancon} + - {first: Bali Ranaivo, last: Malanҫon} +bang-nguyen: + names: + - {first: Bang, last: Nguyen} + orcid: 0009-0002-8365-4562 +bao-liang-lu: + names: + - {first: Bao-Liang, last: Lu} + - {first: Bao-liang, last: Lu} +bao-quoc-ho: + names: + - {first: Bao Quoc, last: Ho} + - {first: Quoc, last: Ho} + - {first: Quoc, last: Ho Bao} +baobao-chang: + names: + - {first: Baobao, last: Chang} + - {first: Bao-Bao, last: Chang} +barathi-ganesh-h-b: + names: + - {first: Barathi, last: Ganesh H. B.} + - {first: Barathi, last: Ganesh HB} +barbara-bullock: + names: + - {first: Barbara, last: Bullock} + - {first: Barbara E., last: Bullock} +barbara-gawronska: + names: + - {first: Barbara, last: Gawronska} + - {first: Barbara, last: Gawronska-Werngren} + - {first: Barbara, last: Gawrońska-Werngren} +barbara-h-partee: + names: + - {first: Barbara H., last: Partee} + - {first: Barbara, last: Partee} +barbara-j-grosz: + names: + - {first: Barbara J., last: Grosz} + - {first: Barbara, last: Grosz} +barbara-lewandowska-tomaszyk: + names: + - {first: Barbara, last: Lewandowska-Tomaszyk} + - {first: Barbara, last: Lewandowska} +barbara-plank: + names: + - {first: Barbara, last: Plank} + - {first: B., last: Plank} +barbora-hladka: + names: + - {first: Barbora, last: Hladká} + - {first: Barbora, last: Hladka} + - {first: B., last: Hladká} +baskaran-sankaran: + names: + - {first: Baskaran, last: Sankaran} + - {first: Sankaran, last: Baskaran} +bassam-haddad: + names: + - {first: Bassam, last: Haddad} + - {first: B., last: Haddad} +bayan-abu-shawar: + names: + - {first: Bayan Abu, last: Shawar} + - {first: Bayan, last: Abu Shawar} +baye-yimam-mekonnen: + names: + - {first: Baye Yimam, last: Mekonnen} + - {first: Baye, last: Yimam} +bayu-distiawan: + names: + - {first: Bayu, last: Distiawan} + - {first: Bayu Distiawan, last: Trisedya} +beata-megyesi: + names: + - {first: Beáta, last: Megyesi} + - {first: Beata, last: Megyesi} + - {first: Beáta Bandmann, last: Megyesi} + - {first: Beáta B., last: Megyesi} +beata-trawinski: + names: + - {first: Beata, last: Trawiński} + - {first: Beata, last: Trawinski} +beatrice-alex: + names: + - {first: Beatrice, last: Alex} + - {first: Bea, last: Alex} +beatrice-daille: + names: + - {first: Béatrice, last: Daille} + - {first: Beatrice, last: Daille} +beatrice-oshika: + names: + - {first: Beatrice, last: Oshika} + - {first: Beatrice T., last: Oshika} +beatrice-santorini: + names: + - {first: Beatrice, last: Santorini} + - {first: B., last: Santorini} +begona-villada-moiron: + names: + - {first: Begoña Villada, last: Moirón} + - {first: Begoña, last: Villada Moirón} + - {first: Begoña, last: Villada} + - {first: M. Begoña Villada, last: Moirón} +behrang-qasemizadeh: + names: + - {first: Behrang, last: QasemiZadeh} + - {first: Behrang, last: Q. Zadeh} + - {first: Behrang Q., last: Zadeh} + - {first: Behrang, last: Zadeh} + - {first: Behrang, last: Qasemizadeh} +behrouz-minaei-bidgoli: + names: + - {first: Behrouz, last: Minaei-Bidgoli} + - {first: Behrouz, last: Minaei-bidgoli} + - {first: Behrouz, last: Minaei} +belem-priego-sanchez: + names: + - {first: Belém, last: Priego Sanchez} + - {first: Belem, last: Priego} +belinda-z-li: + names: + - {first: Belinda Z., last: Li} + - {first: Belinda, last: Li} +ben-swanson: + names: + - {first: Ben, last: Swanson} + - {first: Benjamin, last: Swanson} +ben-wellner: + names: + - {first: Ben, last: Wellner} + - {first: Benjamin, last: Wellner} +benjamin-borschinger: + names: + - {first: Benjamin, last: Börschinger} + - {first: Benjamin, last: Boerschinger} +benjamin-k-tsou: + names: + - {first: Benjamin K., last: Tsou} + - {first: Benjamin K.Y., last: Tsou} + - {first: Benjamin K., last: T’sou} + - {first: Benjamin, last: Tsou} + - {first: Benjamin K, last: Tsou} + - {first: B. K., last: T’sou} +benjamin-matthias-ruppik: + names: + - {first: Benjamin Matthias, last: Ruppik} + - {first: Benjamin, last: Ruppik} + orcid: 0000-0001-9035-9217 +benjamin-s-meyers: + names: + - {first: Benjamin S., last: Meyers} + - {first: Benjamin, last: Meyers} +benjamin-wing: + names: + - {first: Benjamin, last: Wing} + - {first: Ben, last: Wing} +benoit-crabbe: + names: + - {first: Benoit, last: Crabbé} + - {first: Benoît, last: Crabbé} +benoit-favre: + names: + - {first: Benoit, last: Favre} + - {first: Benoît, last: Favre} +benoit-gaillard: + names: + - {first: Benoit, last: Gaillard} + - {first: Benoît, last: Gaillard} +benoit-habert: + names: + - {first: Benoit, last: Habert} + - {first: Benoît, last: Habert} + - {first: B., last: Habert} +benoit-lavoie: + names: + - {first: Benoit, last: Lavoie} + - {first: B., last: Lavoie} +benoit-ozell: + names: + - {first: Benoît, last: Ozell} + - {first: Benoit, last: Ozell} +benoit-sagot: + names: + - {first: Benoît, last: Sagot} + - {first: Benoit, last: Sagot} +bente-maegaard: + names: + - {first: Bente, last: Maegaard} + - {first: B., last: Maegaard} +bento-carlos-dias-da-silva: + names: + - {first: Bento Carlos, last: Dias-da-Silva} + - {first: Bento Carlos Dias, last: da Silva} +berfin-aktas: + names: + - {first: Berfin, last: Aktaş} + - {first: Berfin, last: Aktas} +berkay-furkan-onder: + names: + - {first: Berkay Furkan, last: Önder} + - {first: Berkay, last: Önder} +bernard-merialdo: + names: + - {first: Bernard, last: Merialdo} + - {first: B., last: Merialdo} +bernard-seite: + names: + - {first: Bernard, last: Seite} + - {first: B., last: Seite} +bernard-vauquois: + names: + - {first: Bernard, last: Vauquois} + - {first: B., last: Vauquois} +bernardino-casas: + names: + - {first: Bernardino, last: Casas} + - {first: B., last: Casas} +bernardo-magnini: + names: + - {first: Bernardo, last: Magnini} + - {first: B., last: Magnini} +bernd-kiefer: + names: + - {first: Bernd, last: Kiefer} + - {first: B., last: Kiefer} +bernd-mobius: + names: + - {first: Bernd, last: Möbius} + - {first: Bernd, last: Mobius} +beth-ann-hockey: + names: + - {first: Beth Ann, last: Hockey} + - {first: Beth A., last: Hockey} + - {first: Beth, last: Hockey} + - {first: B. A., last: Hockey} +beth-m-sundheim: + names: + - {first: Beth M., last: Sundheim} + - {first: Beth, last: Sundheim} +bevan-jones: + names: + - {first: Bevan, last: Jones} + - {first: Bevan K., last: Jones} + - {first: Bevan Keeley, last: Jones} +bhadran-v-k: + names: + - {first: Bhadran V., last: K} + - {first: Bhadran, last: V K} + - {first: Bhadran V, last: K} +bhavana-dalvi: + names: + - {first: Bhavana, last: Dalvi} + - {first: Bhavana, last: Dalvi Mishra} +bhuvana-ramabhadran: + names: + - {first: Bhuvana, last: Ramabhadran} + - {first: B., last: Ramabhadran} +bianka-buschbeck: + names: + - {first: Bianka, last: Buschbeck} + - {first: Bianka, last: Buschbeck-Wolf} + - {first: B., last: Buschbeck} +bidyut-baran-chaudhuri: + names: + - {first: Bidyut Baran, last: Chaudhuri} + - {first: Bidyut B., last: Chaudhuri} + - {first: B. B., last: Chaudhuri} +biljana-drndarevic: + names: + - {first: Biljana, last: Drndarević} + - {first: Biljana, last: Drndarevic} +bill-byrne: + comment: University of Cambridge + names: + - {first: Bill, last: Byrne} + - {first: W., last: Byrne} + - {first: William, last: Byrne} + - {first: William J., last: Byrne} +bill-byrne-ucsd: + comment: UCSD Ph.d; https://www.linkedin.com/in/billb/ + names: + - {first: Bill, last: Byrne} +bill-yuchen-lin: + names: + - {first: Bill Yuchen, last: Lin} + - {first: Bill Y., last: Lin} +billy-t-m-wong: + names: + - {first: Billy T.M., last: Wong} + - {first: Billy T. M., last: Wong} +bingquan-liu: + names: + - {first: Bingquan, last: Liu} + - {first: BingQuan, last: Liu} +binyam-ephrem-seyoum: + names: + - {first: Binyam Ephrem, last: Seyoum} + - {first: Binyam, last: Ephrem} +binyam-gebrekidan-gebre: + names: + - {first: Binyam Gebrekidan, last: Gebre} + - {first: Binyam, last: Gebre} +birte-lonneker: + names: + - {first: Birte, last: Lönneker} + - {first: Birte, last: Lönneker-Rodman} + - {first: Birte, last: Loenneker-Rodman} +bjorn-gamback: + names: + - {first: Björn, last: Gambäck} + - {first: Bjorn, last: Gamback} + - {first: Björn, last: Gämback} +bjorn-rudzewitz: + names: + - {first: Björn, last: Rudzewitz} + - {first: Bjoern, last: Rudzewitz} +bjorn-schuller: + names: + - {first: Björn, last: Schuller} + - {first: Bjoern, last: Schuller} +blake-howald: + names: + - {first: Blake, last: Howald} + - {first: Blake Stephen, last: Howald} +bo-june-paul-hsu: + names: + - {first: Bo-June (Paul), last: Hsu} + - {first: Bo-june Paul, last: Hsu} + - {first: Bo-June Paul, last: Hsu} +bo-li-bh: + comment: BeiHang + names: + - {first: Bo, last: Li} +bo-li-cas: + comment: Chinese Academy of Sciences + names: + - {first: Bo, last: Li} +bo-li-nus: + comment: NUS, Google + names: + - {first: Bo, last: Li} + - {first: Troy, last: Lee} +bo-li-vanderbilt: + comment: Vanderbilt, UIUC + names: + - {first: Bo, last: Li} +bo-yeong-kang: + names: + - {first: Bo-Yeong, last: Kang} + - {first: Bo-yeong, last: Kang} +bodil-nistrup-madsen: + names: + - {first: Bodil, last: Nistrup Madsen} + - {first: Bodil Nistrup, last: Madsen} +bojana-dalbelo-basic: + names: + - {first: Bojana, last: Dalbelo Bašić} + - {first: Bojana Dalbelo, last: Bašić} +bolette-sandford-pedersen: + names: + - {first: Bolette Sandford, last: Pedersen} + - {first: Bolette, last: Sandford Pedersen} + - {first: Bolette S., last: Pedersen} + - {first: Bolette, last: Pedersen} + - {first: Bo, last: Pedersen} +bong-wan-kim: + names: + - {first: Bong-Wan, last: Kim} + - {first: Jong Wan, last: Kim} +bonnie-dorr: + names: + - {first: Bonnie, last: Dorr} + - {first: Bonnie J., last: Dorr} +bonnie-glover-stalls: + names: + - {first: Bonnie Glover, last: Stalls} + - {first: Bonnie, last: Glover} +bonnie-webber: + names: + - {first: Bonnie, last: Webber} + - {first: Bonnie L., last: Webber} + - {first: Bonnie Lynn, last: Webber} + - {first: B., last: Webber} + - {first: B.L., last: Nash-Webber} +bor-shen-lin: + names: + - {first: Bor-Shen, last: Lin} + - {first: Bor-shen, last: Lin} +boris-v-dobrov: + names: + - {first: Boris V., last: Dobrov} + - {first: Boris, last: Dobrov} + - {first: B., last: Dobrov} +borja-navarro: + names: + - {first: Borja, last: Navarro} + - {first: Borja, last: Navarro-Colorado} + - {first: B., last: Navarro} +borje-f-karlsson: + comment: https://github.com/acl-org/acl-anthology/issues/4041 + degree: PUC-Rio + names: + - {first: Börje F., last: Karlsson} + - {first: Börje, last: Karlsson} + orcid: 0000-0001-8925-360X +boubaker-meddeb-hamrouni: + names: + - {first: Boubaker, last: Meddeb-Hamrouni} + - {first: Boubaker, last: Meddeb Hamrouni} +boyan-onyshkevych: + names: + - {first: Boyan, last: Onyshkevych} + - {first: Boyan A., last: Onyshkevych} +brage-ekroll-jahren: + names: + - {first: Brage Ekroll, last: Jahren} + - {first: Brage, last: Jahren} +brandon-m-stewart: + names: + - {first: Brandon M., last: Stewart} + - {first: Brandon, last: Stewart} +branimir-boguraev: + names: + - {first: Branimir, last: Boguraev} + - {first: Branimir K., last: Boguraev} + - {first: Bran, last: Boguraev} + - {first: B.K., last: Boguraev} +brett-w-bader: + names: + - {first: Brett W., last: Bader} + - {first: Brett, last: Bader} +brian-cantwell-smith: + names: + - {first: Brian Cantwell, last: Smith} + - {first: Brian, last: Smith} +brian-j-chandler: + names: + - {first: Brian J., last: Chandler} + - {first: Brian, last: Chandler} + - {first: B. J., last: Chandler} +brian-kingsbury: + names: + - {first: Brian, last: Kingsbury} + - {first: B., last: Kingsbury} +brian-mitchell: + names: + - {first: Brian, last: Mitchell} + - {first: B., last: Mitchell} +brian-w-dillon: + names: + - {first: Brian W., last: Dillon} + - {first: Brian, last: Dillon} +bridget-mcinnes: + names: + - {first: Bridget, last: McInnes} + - {first: Bridget Thomson, last: McInnes} + - {first: Bridget T., last: McInnes} +brigitte-jorg: + names: + - {first: Brigitte, last: Jörg} + - {first: Brigitte, last: Jorg} +brigitte-roudaud: + names: + - {first: Brigitte, last: Roudaud} + - {first: B., last: Roudaud} +britta-zeller: + names: + - {first: Britta, last: Zeller} + - {first: Britta D., last: Zeller} +bruce-e-nevin: + names: + - {first: Bruce E., last: Nevin} + - {first: Bruce, last: Nevin} +bruce-porter: + names: + - {first: Bruce, last: Porter} + - {first: Bruce W., last: Porter} +bruce-w-ballard: + names: + - {first: Bruce W., last: Ballard} + - {first: Bruce, last: Ballard} + - {first: B., last: Ballard} +brunelle-magnana-ekoukou: + names: + - {first: Brunelle, last: Magnana Ekoukou} + - {first: Brunelle Magnana, last: Ekoukou} +bryan-pellom: + names: + - {first: Bryan, last: Pellom} + - {first: B., last: Pellom} +bryan-r-routledge: + names: + - {first: Bryan R., last: Routledge} + - {first: Bryan, last: Routledge} +byron-c-wallace: + names: + - {first: Byron C., last: Wallace} + - {first: Byron, last: Wallace} +byung-gyu-ahn: + names: + - {first: Byung-Gyu, last: Ahn} + - {first: Byung Gyu, last: Ahn} +c-h-lee: + names: + - {first: C. H., last: Lee} + - {first: C.-H., last: Lee} +c-j-rupp: + names: + - {first: C.J., last: Rupp} + - {first: C. J., last: Rupp} +c-mario-christoudias: + names: + - {first: C. Mario, last: Christoudias} + - {first: Mario, last: Christoudias} +c-raymond-perrault: + names: + - {first: C. Raymond, last: Perrault} + - {first: Raymond, last: Perrault} +cacilia-zirn: + names: + - {first: Cäcilia, last: Zirn} + - {first: Caecilia, last: Zirn} +cagil-sonmez: + names: + - {first: Cagil, last: Sonmez} + - {first: Çağıl, last: Sönmez} + - {first: Cagil, last: Sönmez} +calkin-s-montero: + names: + - {first: Calkin S., last: Montero} + - {first: Calkin, last: Montero} +cam-tu-nguyen: + names: + - {first: Cam-Tu, last: Nguyen} + - {first: Cẩm Tú, last: Nguyễn} +cameron-shaw-fordyce: + names: + - {first: Cameron Shaw, last: Fordyce} + - {first: Cameron, last: Fordyce} +canberk-ozdemir: + names: + - {first: Canberk, last: Ozdemir} + - {first: Canberk, last: Özdemir} +candace-a-kamm: + names: + - {first: Candace A., last: Kamm} + - {first: Candace, last: Kamm} +candace-l-sidner: + names: + - {first: Candace L., last: Sidner} + - {first: Candace, last: Sidner} +carl-de-marcken: + names: + - {first: Carl, last: de Marcken} + - {first: Carl G., last: de Marcken} +carl-pollard: + names: + - {first: Carl, last: Pollard} + - {first: Carl J., last: Pollard} +carla-parra-escartin: + names: + - {first: Carla, last: Parra Escartín} + - {first: Carla, last: Parra} + - {first: Carla Parra, last: Escartín} + - {first: Carla Parra, last: Escartin} +carlos-a-iglesias: + names: + - {first: Carlos A., last: Iglesias} + - {first: Carlos, last: Iglesias} +carlos-a-prolo: + names: + - {first: Carlos A., last: Prolo} + - {first: Carlos, last: Prolo} +carlos-alzate: + names: + - {first: Carlos, last: Alzate} + - {first: Carlos, last: Alzate Perez} +carlos-d-martinez-hinarejos: + names: + - {first: Carlos-D., last: Martínez-Hinarejos} + - {first: Carlos D., last: Martínez-Hinarejos} + - {first: Carlos D., last: Martínez Hinarejos} + - {first: Carlos D., last: Martínez} +carlos-henriquez: + names: + - {first: Carlos, last: Henríquez} + - {first: Carlos, last: Henriquez} + - {first: Carlos A., last: Henríquez Q.} +carlos-mario-zapata-jaramillo: + names: + - {first: Carlos Mario, last: Zapata Jaramillo} + - {first: Carlos M., last: Zapata Jaramillo} +carlos-martin-vide: + names: + - {first: Carlos, last: Martín-Vide} + - {first: Carlos Martin, last: Vide} +carlos-rodriguez-penagos: + names: + - {first: Carlos, last: Rodriguez-Penagos} + - {first: Carlos, last: Rodríguez} + - {first: Carlos Rodriguez, last: Penagos} + - {first: Carlos, last: Rodríguez Penagos} + - {first: Carlos, last: Rodríguez-Penagos} +carlos-subirats-ruggeberg: + names: + - {first: Carlos Subirats, last: Rüggeberg} + - {first: Carlos, last: Subirats} +carmen-garcia-mateo: + names: + - {first: Carmen, last: Garcia-Mateo} + - {first: Carmen, last: García-Mateo} +carol-friedman: + names: + - {first: Carol, last: Friedman} + - {first: C., last: Friedman} +carol-van-ess-dykema: + names: + - {first: Carol, last: Van Ess-Dykema} + - {first: Carol J., last: Van Ess-Dykema} + - {first: Carol, last: VanEss-Dykema} +carole-bergamini: + names: + - {first: Carole, last: Bergamini} + - {first: C., last: Bergamini} +carolina-scarton: + names: + - {first: Carolina, last: Scarton} + - {first: Carolina Evaristo, last: Scarton} +caroline-barriere: + names: + - {first: Caroline, last: Barriere} + - {first: Caroline, last: Barrière} +caroline-bousquet-vernhettes: + names: + - {first: Caroline, last: Bousquet-Vernhettes} + - {first: Caroline, last: Bousquet} +carolyn-rose: + names: + - {first: Carolyn, last: Rose} + - {first: Carolyn P., last: Rose} + - {first: Carolyn P., last: Rosé} + - {first: Carolyn, last: P. Rosé} + - {first: Carolyn Penstein, last: Rose} + - {first: Carolyn, last: Penstein Rosé} + - {first: Carolyn Penstein, last: Rosé} + - {first: Carolyn, last: Penstein-Rosé} + - {first: Carolyn, last: Rosé} + - {first: C. P., last: Rose} +casey-kennington: + names: + - {first: Casey, last: Kennington} + - {first: Casey Redd, last: Kennington} +cassandra-l-jacobs: + names: + - {first: Cassandra L., last: Jacobs} + - {first: Cassandra, last: Jacobs} +cassia-trojahn: + names: + - {first: Cassia, last: Trojahn} + - {first: Cássia, last: Trojahn} +catalin-mititelu: + names: + - {first: Catalin, last: Mititelu} + - {first: Cătălin, last: Mititelu} +catalina-barbu: + names: + - {first: Cătălina, last: Barbu} + - {first: Catalina, last: Barbu} + - {first: C., last: Barbu} +catalina-oana-tudor: + names: + - {first: Catalina Oana, last: Tudor} + - {first: Catalina O., last: Tudor} +catherine-chen-bu: + comment: Brown + names: + - {first: Catherine, last: Chen} +catherine-chen-ucberkley: + comment: UC Berkley + names: + - {first: Catherine, last: Chen} +catherine-i-watson: + names: + - {first: Catherine I., last: Watson} + - {first: Catherine, last: Watson} +catherine-macleod: + names: + - {first: Catherine, last: Macleod} + - {first: Catherine, last: MacLeod} +catherine-n-ball: + names: + - {first: Catherine N., last: Ball} + - {first: Catherine, last: Ball} +catia-cucchiarini: + names: + - {first: Catia, last: Cucchiarini} + - {first: C., last: Cucchiarini} +cecile-fabre: + names: + - {first: Cécile, last: Fabre} + - {first: Cecile, last: Fabre} +cecile-paris: + names: + - {first: Cecile, last: Paris} + - {first: Cécile, last: Paris} + - {first: Cecile L., last: Paris} +cecilia-ovesdotter-alm: + names: + - {first: Cecilia, last: Ovesdotter Alm} + - {first: Cecilia Ovesdotter, last: Alm} + - {first: Cecilia O., last: Alm} + - {first: Cecilia, last: O. Alm} +cedric-gendrot: + names: + - {first: Cédric, last: Gendrot} + - {first: Cedric, last: Gendrot} +cedrick-fairon: + names: + - {first: Cédrick, last: Fairon} + - {first: Cedrick, last: Fairon} +celine-de-looze: + names: + - {first: Céline, last: de Looze} + - {first: Céline, last: Delooze} + - {first: Céline, last: De Looze} + - {first: Celine, last: De Looze} +cem-bozsahin: + names: + - {first: Cem, last: Bozsahin} + - {first: Cem, last: Bozşahin} + - {first: H. Cem, last: Bozsahin} +cenel-augusto-perez: + names: + - {first: Cenel-Augusto, last: Perez} + - {first: Cenel Augusto, last: Perez} +chafic-mukbel: + names: + - {first: Chafic, last: Mukbel} + - {first: C., last: Mukbel} +chalathip-thumkanon: + names: + - {first: Chalathip, last: Thumkanon} + - {first: Chalatip, last: Thumkanon} +chan-hung-kuo: + names: + - {first: Chan-hung, last: Kuo} + - {first: Chan-Hung, last: Kuo} +chandra-kiran-reddy-evuru: + names: + - {first: Chandra Kiran Reddy, last: Evuru} + - {first: Chandra Kiran, last: Evuru} +chang-hyun-kim: + names: + - {first: Chang-Hyun, last: Kim} + - {first: Changhyun, last: Kim} + - {first: Chang Hyun, last: Kim} +changning-huang: + names: + - {first: Changning, last: Huang} + - {first: Chang-Ning, last: Huang} + - {first: Chang-ning, last: Huang} +changye-li-umn: + degree: University of Minnesota + disable_name_matching: true + names: + - {first: Changye, last: Li} + orcid: 0000-0002-9743-7406 +chantal-enguehard: + names: + - {first: Chantal, last: Enguehard} + - {first: C., last: Enguehard} +chantal-perez-hernandez: + names: + - {first: Chantal, last: Pérez-Hernández} + - {first: Chantal, last: Pérez} +chao-han-huck-yang: + names: + - {first: Chao-Han Huck, last: Yang} + - {first: Huck Chao-Han, last: Yang} +chao-jan-chen: + names: + - {first: Chao-Jan, last: Chen} + - {first: Chao-jan, last: Chen} +chao-zhang-tu: + comment: Tsinghua University + names: + - {first: Chao, last: Zhang} + - {first: Zhang, last: Chao} +chaoqun-liu-ntu: + comment: NTU + disable_name_matching: true + names: + - {first: Chaoqun, last: Liu} + orcid: 0000-0001-8014-2516 +charles-b-callaway: + names: + - {first: Charles B., last: Callaway} + - {first: Charles, last: Callaway} +charles-c-lee: + names: + - {first: Charles C., last: Lee} + - {first: Charles, last: Lee} +charles-chen-jr: + names: + - {first: Charles, last: 'Chen, Jr.'} + - {first: Charles, last: Chen} + - {first: Charles, last: Chen Jr.} +charles-j-fillmore: + names: + - {first: Charles J., last: Fillmore} + - {first: Charles, last: Fillmore} +charles-l-a-clarke: + names: + - {first: Charles L. A., last: Clarke} + - {first: C. L. A., last: Clarke} +charles-p-dolan: + names: + - {first: Charles P., last: Dolan} + - {first: Charles, last: Dolan} +charles-t-hemphill: + names: + - {first: Charles T., last: Hemphill} + - {first: Charles, last: Hemphill} +charles-yang: + names: + - {first: Charles, last: Yang} + - {first: Charles D., last: Yang} +chau-minh-pham: + names: + - {first: Chau Minh, last: Pham} + - {first: Chau, last: Pham} + orcid: 0009-0004-0435-7450 +chedi-bechikh-ali: + names: + - {first: Chedi, last: Bechikh Ali} + - {first: Chedi, last: Bechikh} +chee-wee-leong: + names: + - {first: Chee Wee, last: Leong} + - {first: Chee Wee (Ben), last: Leong} +cheikh-m-bamba-dione: + names: + - {first: Cheikh M. Bamba, last: Dione} + - {first: Cheikh Bamba, last: Dione} +chen-cecilia-liu: + comment: Technische Universität Darmstadt + disable_name_matching: true + names: + - {first: Chen, last: Cecilia Liu} + - {first: Chen, last: Liu} + orcid: 0009-0004-2382-8609 +chen-zhang-peking: + disable_name_matching: true + names: + - {first: Chen, last: Zhang} + orcid: 0000-0001-5842-0516 +cheng-chao-su: + names: + - {first: Cheng-chao, last: Su} + - {first: Cheng-Chao, last: Su} +cheng-der-chen: + names: + - {first: Cheng-Der, last: Chen} + - {first: Cheng-der, last: Chen} +cheng-jie-sun: + names: + - {first: Cheng-Jie, last: Sun} + - {first: Chengjie, last: Sun} +cheng-ming-guo: + names: + - {first: Cheng-ming, last: Guo} + - {first: Cheng Ming, last: Guo} +cheng-yan-kao: + names: + - {first: Cheng-yan, last: Kao} + - {first: Cheng-Yan, last: Kao} + - {first: Cheng Yan, last: Kao} +cheng-yuan-lin: + names: + - {first: Cheng-Yuan, last: Lin} + - {first: Cheng Yuan, last: Lin} +chengqing-zong: + names: + - {first: Chengqing, last: Zong} + - {first: Cheng-qing, last: Zong} +chengxiang-zhai: + names: + - {first: ChengXiang, last: Zhai} + - {first: Chengxiang, last: Zhai} +cheol-jung-kweon: + names: + - {first: Cheol Jung, last: Kweon} + - {first: Cheoljung, last: Kweon} +cheol-young-ock: + names: + - {first: Cheol-Young, last: Ock} + - {first: Cheolyoung, last: Ock} + - {first: Cheol-young, last: Ock} +chew-lim-tan: + names: + - {first: Chew Lim, last: Tan} + - {first: Chew-Lim, last: Tan} + - {first: ChewLim, last: Tan} +chi-chun-lee: + names: + - {first: Chi-Chun, last: Lee} + - {first: Chi-Chun (Jeremy), last: Lee} + - {first: Chi-Chun Jeremy, last: Lee} +chi-san-althon-lin: + names: + - {first: Chi-san Althon, last: Lin} + - {first: Chi-San, last: Lin} + - {first: Chi-San Althon, last: Lin} +chi-shing-wang: + names: + - {first: Chi-Shing, last: Wang} + - {first: Chi-shing, last: Wang} +chi-shun-cheung: + names: + - {first: Chi-Shun, last: Cheung} + - {first: Chi Shun, last: Cheung} +chi-yao-lee: + names: + - {first: Chi-Yao, last: Lee} + - {first: Chih-yao, last: Lee} + - {first: Chih-Yao, last: Lee} +chia-hung-tai: + names: + - {first: Chia-Hung, last: Tai} + - {first: Chia-hung, last: Tai} +chia-lung-wu: + names: + - {first: Chia-Lung, last: Wu} + - {first: Chia-Long, last: Wu} +chia-ming-lee: + names: + - {first: Chia-ming, last: Lee} + - {first: Chia-Ming, last: Lee} +chia-ying-lee: + names: + - {first: Chia-Ying, last: Lee} + - {first: Chia-ying, last: Lee} +chih-lung-lin: + names: + - {first: Chih-Lung, last: Lin} + - {first: Chih-Long, last: Lin} +chih-ming-chiu: + names: + - {first: Chih-Ming, last: Chiu} + - {first: Chih-ming, last: Chiu} +chiharu-uda-kikuta: + names: + - {first: Chiharu Uda, last: Kikuta} + - {first: Chiharu, last: Uda} +chin-ting-liu: + names: + - {first: Chin-Ting, last: Liu} + - {first: Chin-Ting Jimbo, last: Liu} +chin-yew-lin: + names: + - {first: Chin-Yew, last: Lin} + - {first: ChinYew, last: Lin} +ching-sheng-lin: + names: + - {first: Ching-sheng, last: Lin} + - {first: Ching-Sheng, last: Lin} +ching-yun-chang: + names: + - {first: Ching Yun, last: Chang} + - {first: Ching-Yun, last: Chang} +chiu-yu-tseng: + names: + - {first: Chiu-yu, last: Tseng} + - {first: Chiu-Yu, last: Tseng} +chiung-hui-tseng: + names: + - {first: Chiung-hui, last: Tseng} + - {first: Chiung-Hui, last: Tseng} +chiwei-che: + names: + - {first: Chiwei, last: Che} + - {first: C., last: Che} +chloe-kiddon: + names: + - {first: Chloé, last: Kiddon} + - {first: Chloe, last: Kiddon} +chomicha-bendahman: + names: + - {first: Chomicha, last: Bendahman} + - {first: C., last: Bendahman} +chong-zhang-xjtlu: + degree: Xi'an Jiaotong-Liverpool University + disable_name_matching: true + names: + - {first: Chong, last: Zhang} + orcid: 0009-0003-2020-6989 +chooi-ling-goh: + names: + - {first: Chooi-Ling, last: Goh} + - {first: Chooi Ling, last: Goh} +chris-barry: + names: + - {first: Chris, last: Barry} + - {first: C., last: Barry} +chris-biemann: + names: + - {first: Chris, last: Biemann} + - {first: Christian, last: Biemann} +chris-buckley: + names: + - {first: Chris, last: Buckley} + - {first: C., last: Buckley} +chris-clark: + names: + - {first: Chris, last: Clark} + - {first: Christine, last: Clark} +chris-culy: + names: + - {first: Chris, last: Culy} + - {first: Christopher, last: Culy} +chris-dyer: + names: + - {first: Chris, last: Dyer} + - {first: Christopher, last: Dyer} + - {first: Christopher J., last: Dyer} +chris-hokamp: + names: + - {first: Chris, last: Hokamp} + - {first: Christopher, last: Hokamp} +chris-irwin-davis: + names: + - {first: Chris Irwin, last: Davis} + - {first: Chris, last: Davis} +chris-mellish: + names: + - {first: Chris, last: Mellish} + - {first: Chris S., last: Mellish} + - {first: C, last: Mellish} + - {first: C. S., last: Mellish} +chris-reed: + names: + - {first: Chris, last: Reed} + - {first: C., last: Reed} +chris-thomas: + disable_name_matching: true + names: + - {first: Chris, last: Thomas} + - {first: Christopher, last: Thomas} + orcid: 0000-0002-3226-396X +chris-welty: + names: + - {first: Chris, last: Welty} + - {first: Christopher, last: Welty} +christelle-ayache: + names: + - {first: Christelle, last: Ayache} + - {first: C., last: Ayache} +christian-blaschke: + names: + - {first: Christian, last: Blaschke} + - {first: C., last: Blaschke} +christian-boitet: + names: + - {first: Christian, last: Boitet} + - {first: Ch., last: Boitet} +christian-f-hempelmann: + names: + - {first: Christian F., last: Hempelmann} + - {first: Christian, last: Hempelmann} +christian-fluhr: + names: + - {first: Christian, last: Fluhr} + - {first: C., last: Fluhr} +christian-girardi: + names: + - {first: Christian, last: Girardi} + - {first: C., last: Girardi} +christian-hanig: + names: + - {first: Christian, last: Hänig} + - {first: Christian, last: Haenig} +christian-huyck: + names: + - {first: Christian, last: Huyck} + - {first: C., last: Huyck} +christian-m-i-m-matthiessen: + names: + - {first: Christian M.I.M., last: Matthiessen} + - {first: Christian M. I. M., last: Matthiessen} +christian-moldovan: + names: + - {first: Christian, last: Moldovan} + - {first: Cristian, last: Moldovan} +christian-morbidoni: + names: + - {first: Christian, last: Morbidoni} + - {first: C., last: Morbidoni} +christian-mueller: + names: + - {first: Christian, last: Mueller} + - {first: Christian, last: Müller} +christine-a-montgomery: + names: + - {first: Christine A., last: Montgomery} + - {first: Christine, last: Montgomery} +christine-doran: + names: + - {first: Christine, last: Doran} + - {first: C, last: Doran} +christine-h-nakatani: + names: + - {first: Christine H., last: Nakatani} + - {first: Christine, last: Nakatani} +christine-piatko: + names: + - {first: Christine, last: Piatko} + - {first: Christine D., last: Piatko} +christof-muller: + names: + - {first: Christof, last: Müller} + - {first: Christof E., last: Müller} +christoph-muller: + names: + - {first: Christoph, last: Müller} + - {first: Christoph, last: Mueller} +christoph-tillmann: + names: + - {first: Christoph, last: Tillmann} + - {first: C., last: Tillmann} +christophe-collet: + names: + - {first: Christophe, last: Collet} + - {first: C., last: Collet} +christophe-costa-florencio: + names: + - {first: Christophe, last: Costa Florêncio} + - {first: Christophe Costa, last: Florencio} +christophe-fouquere: + names: + - {first: Christophe, last: Fouqueré} + - {first: C., last: Fouquere} +christophe-laprun: + names: + - {first: Christophe, last: Laprun} + - {first: Christophe D., last: Laprun} +christophe-roeder: + names: + - {first: Christophe, last: Roeder} + - {first: Chris, last: Roeder} +christophe-servan: + names: + - {first: Christophe, last: Servan} + - {first: C., last: Servan} +christopher-bogart: + names: + - {first: Christopher, last: Bogart} + - {first: Chris, last: Bogart} +christopher-caruso: + names: + - {first: Christopher, last: Caruso} + - {first: Chris, last: Caruso} +christopher-chute: + names: + - {first: Christopher, last: Chute} + - {first: Christopher G., last: Chute} +christopher-cieri: + names: + - {first: Christopher, last: Cieri} + - {first: Chris, last: Cieri} +christopher-d-manning: + names: + - {first: Christopher D., last: Manning} + - {first: Christopher, last: Manning} + - {first: Chris, last: Manning} +christopher-dozier: + names: + - {first: Christopher, last: Dozier} + - {first: Christopher C., last: Dozier} +christopher-habel: + names: + - {first: Christopher, last: Habel} + - {first: Christopher U., last: Habel} +christopher-j-c-burges: + names: + - {first: Christopher J.C., last: Burges} + - {first: Chris J.C., last: Burges} +christopher-jones: + names: + - {first: Christopher, last: Jones} + - {first: Chris, last: Jones} +christopher-m-white: + names: + - {first: Christopher M., last: White} + - {first: C. M., last: White} +christopher-meek: + names: + - {first: Christopher, last: Meek} + - {first: Chris, last: Meek} +christopher-mitchell: + names: + - {first: Christopher, last: Mitchell} + - {first: Christopher M., last: Mitchell} +christopher-pal: + names: + - {first: Christopher, last: Pal} + - {first: Chris, last: Pal} +christopher-pennington: + names: + - {first: Christopher, last: Pennington} + - {first: Chris, last: Pennington} +christopher-r-walker: + names: + - {first: Christopher R., last: Walker} + - {first: Christopher, last: Walker} + - {first: Christopher R, last: Walker} +chrysanne-dimarco: + names: + - {first: Chrysanne, last: DiMarco} + - {first: Chrysanne, last: Di Marco} +chun-jen-lee: + names: + - {first: Chun-Jen, last: Lee} + - {first: Chun-Jun, last: Lee} +chun-kai-wu: + names: + - {first: Chun-Kai, last: Wu} + - {first: Kevin Chun-Kai, last: Wu} +chun-nan-hsu: + names: + - {first: Chun-nan, last: Hsu} + - {first: Chun-Nan, last: Hsu} +chung-chi-huang: + names: + - {first: Chung-Chi, last: Huang} + - {first: Chung-chi, last: Huang} +chung-hye-han: + names: + - {first: Chung-hye, last: Han} + - {first: Chung-Hye, last: Han} + - {first: Chunghye, last: Han} +chung-yong-lim: + names: + - {first: Chung Yong, last: Lim} + - {first: Daniel Chung Yong, last: Lim} +chungmin-lee: + names: + - {first: Chungmin, last: Lee} + - {first: Chong Min, last: Lee} + - {first: Chung-min, last: Lee} +chunyang-jiang-hkust: + comment: HKUST + disable_name_matching: true + names: + - {first: Chunyang, last: Jiang} + orcid: 0009-0005-3401-4093 +chunyu-kit: + names: + - {first: Chunyu, last: Kit} + - {first: Chun-yu, last: Kit} +cicero-dos-santos: + names: + - {first: Cicero, last: dos Santos} + - {first: Cícero, last: dos Santos} + - {first: Cícero Nogueira, last: dos Santos} + - {first: Cicero, last: Nogueira dos Santos} + - {first: Cícero, last: Nogueira dos Santos} +ciprian-bacalu: + names: + - {first: Ciprian, last: Bacalu} + - {first: C., last: Bacalu} +claire-bonial: + names: + - {first: Claire, last: Bonial} + - {first: Claire N., last: Bonial} +claire-cardie: + names: + - {first: Claire, last: Cardie} + - {first: C., last: Cardie} +claire-francois: + names: + - {first: Claire, last: François} + - {first: Claire, last: Francois} +claire-nedellec: + names: + - {first: Claire, last: Nédellec} + - {first: Claire, last: Nėdellec} +clare-voss: + names: + - {first: Clare, last: Voss} + - {first: Clare R., last: Voss} +claude-barras: + names: + - {first: Claude, last: Barras} + - {first: C., last: Barras} +claude-de-loupy: + names: + - {first: Claude, last: de Loupy} + - {first: Claude, last: De Loupy} +claudia-freitas: + names: + - {first: Cláudia, last: Freitas} + - {first: Claudia, last: Freitas} +claudia-gdaniec: + names: + - {first: Claudia, last: Gdaniec} + - {first: C., last: Gdaniec} +claudio-giuliano: + names: + - {first: Claudio, last: Giuliano} + - {first: C., last: Giuliano} +claudiu-musat: + names: + - {first: Claudiu, last: Musat} + - {first: Claudiu-Cristian, last: Musat} +clement-de-groc: + names: + - {first: Clément, last: de Groc} + - {first: Clément, last: De Groc} +clement-jonquet: + names: + - {first: Clement, last: Jonquet} + - {first: Clément, last: Jonquet} +clement-t-yu: + names: + - {first: Clement T., last: Yu} + - {first: Clement, last: Yu} +cleo-jullien: + names: + - {first: Cléo, last: Jullien} + - {first: Cleo, last: Jullien} +clifford-j-weinstein: + names: + - {first: Clifford J., last: Weinstein} + - {first: Clifford, last: Weinstein} +clint-burfoot: + names: + - {first: Clint, last: Burfoot} + - {first: Clinton, last: Burfoot} +colin-batchelor: + names: + - {first: Colin, last: Batchelor} + - {first: Colin R., last: Batchelor} +colin-w-wightman: + names: + - {first: Colin W., last: Wightman} + - {first: C.W., last: Wightman} + - {first: C. W., last: Wightman} +collin-f-baker: + names: + - {first: Collin F., last: Baker} + - {first: Collin, last: Baker} +cong-kai-lin: + names: + - {first: Cong-kai, last: Lin} + - {first: Cong-Kai, last: Lin} +cong-liu-fau: + comment: Florida Atlantic University + names: + - {first: Cong, last: Liu} +cong-liu-iflytek: + comment: iFLYTEK Research + names: + - {first: Cong, last: Liu} +cong-liu-ucr: + comment: University of California, Riverside + names: + - {first: Cong, last: Liu} +constantin-orasan: + names: + - {first: Constantin, last: Orasan} + - {first: Constantin, last: Orăsan} + - {first: C., last: Orasan} +constantine-d-spyropoulos: + names: + - {first: Constantine D., last: Spyropoulos} + - {first: Constantine, last: Spyropoulos} +constantine-papageorgiou: + names: + - {first: Constantine, last: Papageorgiou} + - {first: Constantine P., last: Papageorgiou} +corina-forascu: + names: + - {first: Corina, last: Forăscu} + - {first: Corina, last: Forascu} +corinna-onelli: + names: + - {first: Corinna, last: Onelli} + - {first: C., last: Onelli} +corrado-seidenari: + names: + - {first: Corrado, last: Seidenari} + - {first: C., last: Seidenari} +cory-hayes: + names: + - {first: Cory, last: Hayes} + - {first: Cory J., last: Hayes} +cosmin-adrian-bejan: + names: + - {first: Cosmin Adrian, last: Bejan} + - {first: Cosmin, last: Adrian Bejan} + - {first: Cosmin, last: Bejan} +courtney-d-corley: + names: + - {first: Courtney D., last: Corley} + - {first: Courtney, last: Corley} +cristian-ursu: + names: + - {first: Cristian, last: Ursu} + - {first: Christian, last: Ursu} +cristina-sanchez-marco: + names: + - {first: Cristina, last: Sánchez-Marco} + - {first: Cristina, last: Marco} + - {first: Cristina Sánchez, last: Marco} +curry-i-guinn: + names: + - {first: Curry I., last: Guinn} + - {first: Curry, last: Guinn} +cyril-goutte: + names: + - {first: Cyril, last: Goutte} + - {first: C., last: Goutte} +d-s-bree: + names: + - {first: D. S., last: Bree} + - {first: D.S., last: Bree} +d-terence-langendoen: + names: + - {first: D. Terence, last: Langendoen} + - {first: Terence, last: Langendoen} +daan-broeder: + names: + - {first: Daan, last: Broeder} + - {first: D., last: Broeder} +daisy-zhe-wang: + names: + - {first: Daisy Zhe, last: Wang} + - {first: Zhe, last: Wang} +dale-a-miller: + names: + - {first: Dale A., last: Miller} + - {first: Dale, last: Miller} +damaris-ayuso: + names: + - {first: Damaris, last: Ayuso} + - {first: Damaris M., last: Ayuso} + - {first: D., last: Ayuso} +damir-cavar: + names: + - {first: Damir, last: Ćavar} + - {first: Damir, last: Cavar} +dan-bohus: + names: + - {first: Dan, last: Bohus} + - {first: Dan, last: Bohuş} + - {first: D., last: Bohus} +dan-flickinger: + names: + - {first: Dan, last: Flickinger} + - {first: Daniel, last: Flickinger} + - {first: Daniel P., last: Flickinger} + - {first: D., last: Flickenger} +dan-gillick: + names: + - {first: Dan, last: Gillick} + - {first: Daniel, last: Gillick} +dan-hunter: + names: + - {first: Dan, last: Hunter} + - {first: D., last: Hunter} +dan-jurafsky: + names: + - {first: Dan, last: Jurafsky} + - {first: Daniel, last: Jurafsky} +dan-mcfarland: + names: + - {first: Dan, last: McFarland} + - {first: Daniel, last: McFarland} + - {first: Daniel A., last: McFarland} +dan-moldovan: + names: + - {first: Dan, last: Moldovan} + - {first: Dan I., last: Moldovan} + - {first: D., last: Moldovan} +dan-simonson: + names: + - {first: Dan, last: Simonson} + - {first: Daniel, last: Simonson} +dan-stefanescu: + names: + - {first: Dan, last: Stefanescu} + - {first: Dan, last: Ştefănescu} + - {first: Dan, last: Ştefanescu} + - {first: Dan, last: Ștefănescu} +dan-tufis: + names: + - {first: Dan, last: Tufiş} + - {first: Dan, last: Tufis} + - {first: Dan, last: Tufiș} +dan-zhang-tsinghua: + comment: Tsinghua University + disable_name_matching: true + names: + - {first: Dan, last: Zhang} +danica-damljanovic: + names: + - {first: Danica, last: Damljanović} + - {first: Danica, last: Damljanovic} +danie-j-prinsloo: + names: + - {first: Danie J., last: Prinsloo} + - {first: Danie, last: Prinsloo} +daniel-bachut: + names: + - {first: Daniel, last: Bachut} + - {first: D., last: Bachut} +daniel-bastos-pereira: + names: + - {first: Daniel Bastos, last: Pereira} + - {first: Daniel B., last: Pereira} +daniel-beck: + names: + - {first: Daniel, last: Beck} + - {first: Daniel Emilio, last: Beck} +daniel-bobrow: + names: + - {first: Daniel, last: Bobrow} + - {first: Daniel G., last: Bobrow} +daniel-bolanos: + names: + - {first: Daniel, last: Bolaños} + - {first: Daniel, last: Bolanos} +daniel-couto-vale: + names: + - {first: Daniel, last: Couto Vale} + - {first: Daniel, last: Couto-Vale} + - {first: Daniel, last: Vale} +daniel-dechelotte: + names: + - {first: Daniel, last: Déchelotte} + - {first: Daniel, last: Dechelotte} +daniel-ferres: + names: + - {first: Daniel, last: Ferrés} + - {first: Dani, last: Ferrés} +daniel-g-swanson: + names: + - {first: Daniel G., last: Swanson} + - {first: Daniel, last: Swanson} +daniel-hernandez-lopez: + names: + - {first: Daniel, last: Hernandez-Lopez} + - {first: Daniel Hernández, last: López} +daniel-hromada: + names: + - {first: Daniel, last: Hromada} + - {first: Daniel Devatman, last: Hromada} + - {first: Daniel, last: Devatman Hromada} +daniel-keim: + names: + - {first: Daniel, last: Keim} + - {first: Daniel A., last: Keim} +daniel-m-bikel: + names: + - {first: Daniel M., last: Bikel} + - {first: Daniel, last: Bikel} + - {first: Dan, last: Bikel} +daniel-maxwell: + names: + - {first: Daniel, last: Maxwell} + - {first: Dan, last: Maxwell} +daniel-ortiz-martinez: + names: + - {first: Daniel, last: Ortiz-Martínez} + - {first: Daniel, last: Ortíz-Martínez} + - {first: Daniel, last: Ortiz Martínez} +daniel-p-mills: + names: + - {first: Daniel P., last: Mills} + - {first: Daniel, last: Mills} +daniel-paiva: + names: + - {first: Daniel, last: Paiva} + - {first: Daniel S., last: Paiva} + - {first: D, last: Paiva} + - {first: D., last: Paiva} +daniel-peterson: + names: + - {first: Daniel, last: Peterson} + - {first: Daniel W., last: Peterson} +daniel-povey: + names: + - {first: Daniel, last: Povey} + - {first: D., last: Povey} +daniel-preotiuc-pietro: + names: + - {first: Daniel, last: Preoţiuc-Pietro} + - {first: Daniel, last: Preotiuc-Pietro} +daniel-s-weld: + names: + - {first: Daniel S., last: Weld} + - {first: Daniel, last: Weld} + - {first: Dan, last: Weld} +daniel-sanchez-cisneros: + names: + - {first: Daniel, last: Sanchez-Cisneros} + - {first: Daniel, last: Sánchez} +daniel-tapias: + names: + - {first: Daniel, last: Tapias} + - {first: Daniel Tapias, last: Merino} +daniel-varga: + names: + - {first: Dániel, last: Varga} + - {first: Daniel, last: Varga} + - {first: D., last: Varga} +daniel-whyatt: + names: + - {first: Daniel, last: Whyatt} + - {first: Dan, last: Whyatt} +daniel-zeman: + names: + - {first: Daniel, last: Zeman} + - {first: Dan, last: Zeman} +daniela-gifu: + names: + - {first: Daniela, last: Gifu} + - {first: Daniela, last: Gîfu} +daniela-m-romano: + names: + - {first: Daniela M., last: Romano} + - {first: Daniela, last: Romano} +daniela-oliveira-f-do-amaral: + names: + - {first: Daniela Oliveira F., last: do Amaral} + - {first: Daniela O. F., last: do Amaral} +daniele-godard: + names: + - {first: Daniele, last: Godard} + - {first: Danièle, last: Godard} +danielle-l-mowery: + names: + - {first: Danielle L., last: Mowery} + - {first: Danielle, last: Mowery} + - {first: Danielle L, last: Mowery} +danielle-s-mcnamara: + names: + - {first: Danielle S., last: McNamara} + - {first: Danielle, last: McNamara} +danniel-liwanag-alcantara: + names: + - {first: Danniel Liwanag, last: Alcantara} + - {first: Danniel, last: Alcantara} +darla-magdalene-shockley: + names: + - {first: Darla Magdalene, last: Shockley} + - {first: Darla, last: Shockley} +darnes-vilarino: + names: + - {first: Darnes, last: Vilariño} + - {first: Darnes, last: Vilariño Ayala} +darren-hsin-hung-lin: + names: + - {first: Darren Hsin-Hung, last: Lin} + - {first: Darren Hsin-hung, last: Lin} + - {first: Hsin-Hung, last: Lin} +dasa-munkova: + names: + - {first: Dasa, last: Munkova} + - {first: Daša, last: Munková} +dau-cheng-lyu: + names: + - {first: Dau-cheng, last: Lyu} + - {first: Dau-Cheng, last: Lyu} +dave-omara: + names: + - {first: Dave, last: O’mara} + - {first: D., last: O’Mara} +david-a-evans: + names: + - {first: David A., last: Evans} + - {first: David Andreoff, last: Evans} +david-a-smith: + names: + - {first: David A., last: Smith} + - {first: David, last: Smith} + - {first: David Addison, last: Smith} +david-blei: + names: + - {first: David, last: Blei} + - {first: David M., last: Blei} +david-c-uthus: + names: + - {first: David C., last: Uthus} + - {first: David, last: Uthus} +david-cabrero-souto: + names: + - {first: David Cabrero, last: Souto} + - {first: David, last: Cabrero} +david-carter: + names: + - {first: David, last: Carter} + - {first: David M., last: Carter} +david-d-lewis: + names: + - {first: David D., last: Lewis} + - {first: David, last: Lewis} +david-d-mcdonald: + comment: MIT, BBN, SIFT + names: + - {first: David D., last: McDonald} + - {first: David, last: McDonald} + similar: + - david-w-mcdonald +david-d-palmer: + names: + - {first: David D., last: Palmer} + - {first: David, last: Palmer} +david-d-sherertz: + names: + - {first: David D., last: Sherertz} + - {first: D. D., last: Sherertz} +david-day: + names: + - {first: David, last: Day} + - {first: David S., last: Day} +david-e-johnson: + names: + - {first: David E., last: Johnson} + - {first: David, last: Johnson} +david-ellis: + names: + - {first: David, last: Ellis} + - {first: David Ellis, last: Rogers} +david-elson: + names: + - {first: David, last: Elson} + - {first: David K., last: Elson} +david-farwell: + names: + - {first: David, last: Farwell} + - {first: D., last: Farwell} +david-fernandez-amoros: + names: + - {first: David, last: Fernández-Amorós} + - {first: David, last: Fernández} + - {first: David, last: Férnandez-Amorós} +david-fisher: + names: + - {first: David, last: Fisher} + - {first: D., last: Fisher} +david-forsyth: + names: + - {first: David, last: Forsyth} + - {first: David A., last: Forsyth} +david-goodine: + names: + - {first: David, last: Goodine} + - {first: D., last: Goodine} +david-h-d-warren: + names: + - {first: David H. D., last: Warren} + - {first: David H.D., last: Warren} +david-ifeoluwa-adelani: + names: + - {first: David Ifeoluwa, last: Adelani} + - {first: David, last: Adelani} + - {first: David I., last: Adelani} +david-israel: + names: + - {first: David, last: Israel} + - {first: David J., last: Israel} +david-k-evans: + names: + - {first: David K., last: Evans} + - {first: David, last: Evans} + - {first: David Kirk, last: Evans} +david-king: + names: + - {first: David, last: King} + - {first: David L., last: King} +david-kolovratnik: + names: + - {first: David, last: Kolovratnik} + - {first: David, last: Kolovratník} +david-l-bean: + names: + - {first: David L., last: Bean} + - {first: David, last: Bean} +david-l-waltz: + names: + - {first: David L., last: Waltz} + - {first: D. L., last: Waltz} +david-llorens: + names: + - {first: David, last: Llorens} + - {first: D., last: Llorens} +david-m-howcroft: + names: + - {first: David M., last: Howcroft} + - {first: David, last: Howcroft} +david-m-magerman: + names: + - {first: David M., last: Magerman} + - {first: David, last: Magerman} + - {first: D., last: Magerman} +david-m-rojas: + names: + - {first: David M., last: Rojas} + - {first: David, last: Rojas} +david-m-w-powers: + names: + - {first: David M. W., last: Powers} + - {first: David M W, last: Powers} + - {first: D. M. W., last: Powers} +david-mark-nemeskey: + names: + - {first: Dávid Márk, last: Nemeskey} + - {first: David Mark, last: Nemeskey} +david-martinez: + names: + - {first: David, last: Martinez} + - {first: David, last: Martínez} +david-martins-de-matos: + names: + - {first: David, last: Martins de Matos} + - {first: David Martins, last: de Matos} + - {first: David M., last: de Matos} +david-mcgee: + names: + - {first: David, last: McGee} + - {first: David R., last: McGee} +david-n-milne: + names: + - {first: David N., last: Milne} + - {first: David, last: Milne} +david-nicolas-racca: + names: + - {first: David Nicolas, last: Racca} + - {first: David Nicolás, last: Racca} +david-pico: + names: + - {first: David, last: Picó} + - {first: David, last: Pico} +david-pierce: + names: + - {first: David, last: Pierce} + - {first: David R., last: Pierce} +david-pinto: + names: + - {first: David, last: Pinto} + - {first: David Eduardo, last: Pinto Avendaño} + - {first: David, last: Pinto Avendaño} +david-r-karger: + names: + - {first: David R., last: Karger} + - {first: David, last: Karger} +david-r-mortensen: + names: + - {first: David R., last: Mortensen} + - {first: David, last: Mortensen} +david-s-pallett: + names: + - {first: David S., last: Pallett} + - {first: David, last: Pallett} + - {first: D. S., last: Pallett} + - {first: D., last: Pallett} +david-stallard: + names: + - {first: David, last: Stallard} + - {first: David G., last: Stallard} + - {first: D., last: Stallard} +david-suendermann-oeft: + names: + - {first: David, last: Suendermann-Oeft} + - {first: David, last: Suendermann} +david-tomas: + names: + - {first: David, last: Tomas} + - {first: David, last: Tomás} +david-traum: + names: + - {first: David, last: Traum} + - {first: David R., last: Traum} +david-w-aha: + names: + - {first: David W., last: Aha} + - {first: David, last: Aha} +david-w-mcdonald: + comment: Univ. of Washington + names: + - {first: David W., last: McDonald} + similar: + - david-d-mcdonald +david-weir: + names: + - {first: David, last: Weir} + - {first: David J., last: Weir} + - {first: David, last: Wei} + - {first: D. J., last: Weir} +david-yoshikazu-oshima: + names: + - {first: David Yoshikazu, last: Oshima} + - {first: David Y., last: Oshima} +davis-muhajereen-d-dimalen: + names: + - {first: Davis Muhajereen D., last: Dimalen} + - {first: Davis Muhajereen, last: Dimalen} +davy-weissenbacher: + names: + - {first: Davy, last: Weissenbacher} + - {first: D., last: Weissenbacher} +dayne-freitag: + names: + - {first: Dayne, last: Freitag} + - {first: D., last: Freitag} +dean-foster: + names: + - {first: Dean, last: Foster} + - {first: Dean P., last: Foster} +dean-sturtevant: + names: + - {first: Dean, last: Sturtevant} + - {first: Dean G., last: Sturtevant} +deb-roy: + names: + - {first: Deb, last: Roy} + - {first: Suman, last: Deb Roy} +debela-tesfaye-gemechu: + names: + - {first: Debela Tesfaye, last: Gemechu} + - {first: Debela, last: Tesfaye} +deborah-a-dahl: + names: + - {first: Deborah A., last: Dahl} + - {first: Deborah, last: Dahl} + - {first: D., last: Dahl} +deborah-brady: + names: + - {first: Deborah, last: Brady} + - {first: Deb, last: Brady} +dechuan-yang: + names: + - {first: Dechuan, last: Yang} + - {first: De, last: Yang} +deepak-gupta: + names: + - {first: Deepak, last: Gupta} + - {first: Deepak Kumar, last: Gupta} + - {first: Deepa, last: Gupta} +deepak-kumar-malladi: + names: + - {first: Deepak Kumar, last: Malladi} + - {first: Deepak, last: Malladi} +deepak-p: + names: + - {first: Deepak, last: P} + - {first: Deepak, last: Padmanabhan} +degen-huang: + names: + - {first: Degen, last: Huang} + - {first: De-Gen, last: Huang} +demetrios-master: + names: + - {first: Demetrios, last: Master} + - {first: Demitrios, last: Master} +denise-dipersio: + names: + - {first: Denise, last: DiPersio} + - {first: Denise, last: Dipersio} +dennis-mehay: + names: + - {first: Dennis, last: Mehay} + - {first: Dennis Nolan, last: Mehay} +dennis-ryan-storoshenko: + names: + - {first: Dennis Ryan, last: Storoshenko} + - {first: Dennis R., last: Storoshenko} +deok-bong-kim: + names: + - {first: Deok-bong, last: Kim} + - {first: Deok-Bong, last: Kim} +derek-long: + names: + - {first: Derek, last: Long} + - {first: Derek P., last: Long} +derry-tanti-wijaya: + names: + - {first: Derry Tanti, last: Wijaya} + - {first: Derry, last: Wijaya} +dessi-puji-lestari: + names: + - {first: Dessi Puji, last: Lestari} + - {first: Dessi, last: Lestari} +detmar-meurers: + names: + - {first: Detmar, last: Meurers} + - {first: W. Detmar, last: Meurers} + - {first: Walt Detmar, last: Meurers} +devadath-v-v: + names: + - {first: Devadath, last: V V} + - {first: Devadath V, last: V} +deyi-xiong: + names: + - {first: Deyi, last: Xiong} + - {first: De-Yi, last: Xiong} +diana-binnenpoorte: + names: + - {first: Diana, last: Binnenpoorte} + - {first: D., last: Binnenpoorte} +diana-inkpen: + names: + - {first: Diana, last: Inkpen} + - {first: Diana Zaiu, last: Inkpen} + - {first: Diana, last: Zaiu} +diana-mccarthy: + names: + - {first: Diana, last: McCarthy} + - {first: Diana F., last: McCarthy} +diana-nicoleta-popa: + names: + - {first: Diana Nicoleta, last: Popa} + - {first: Diana, last: Popa} +diana-trandabat: + names: + - {first: Diana, last: Trandabat} + - {first: Diana, last: Trandabăț} + - {first: Diana, last: Trandabăţ} + - {first: Diana Marie, last: Trandabăţ} +diane-litman: + names: + - {first: Diane, last: Litman} + - {first: Diane J., last: Litman} +dianne-p-oleary: + names: + - {first: Dianne P., last: O’Leary} + - {first: Dianne, last: O’Leary} +didzis-gosko: + names: + - {first: Didzis, last: Gosko} + - {first: Didzis, last: Goško} +diego-a-burgos: + names: + - {first: Diego A., last: Burgos} + - {first: Diego, last: Burgos} +diego-calvanese: + names: + - {first: Diego, last: Calvanese} + - {first: D., last: Calvanese} +diego-fernandez-slezak: + names: + - {first: Diego, last: Fernandez Slezak} + - {first: Diego, last: Fernández Slezak} +diego-molla: + names: + - {first: Diego, last: Molla} + - {first: Diego, last: Mollá-Aliod} + - {first: Diego, last: Mollá Aliod} + - {first: Diego, last: Molla-Aliod} + - {first: Diego, last: Mollá} +diego-raphael-amancio: + names: + - {first: Diego Raphael, last: Amancio} + - {first: Diego, last: Amancio} +dieter-metzing: + names: + - {first: Dieter, last: Metzing} + - {first: D., last: Metzing} +dieter-van-uytvanck: + names: + - {first: Dieter, last: van Uytvanck} + - {first: Dieter, last: Van Uytvanck} +dietmar-rosner: + names: + - {first: Dietmar, last: Rösner} + - {first: Dietmar, last: Rosner} + - {first: Dietmar F., last: Roesner} + - {first: Dietmar, last: Roesner} + - {first: D., last: Roesner} +dietrich-rebholz-schuhmann: + names: + - {first: Dietrich, last: Rebholz Schuhmann} + - {first: Dietrich, last: Rebholz-Schuhmann} +dilara-torunoglu-selamet: + names: + - {first: Dilara, last: Torunoğlu-Selamet} + - {first: Dilara, last: Torunoǧlu} +dilek-hakkani-tur: + names: + - {first: Dilek, last: Hakkani-Tur} + - {first: Dilek, last: Hakkani-Tür} + - {first: Dilek Zeynep, last: Hakkani} + - {first: D., last: Hakkani-Tur} +diman-karagyozov: + names: + - {first: Diman, last: Karagyozov} + - {first: Diman, last: Karagiozov} +dimitrios-bilidas: + names: + - {first: Dimitrios, last: Bilidas} + - {first: Dimitris, last: Bilidas} +dimitrios-galanis: + names: + - {first: Dimitrios, last: Galanis} + - {first: Dimitris, last: Galanis} +dimitris-christodoulakis: + names: + - {first: Dimitris, last: Christodoulakis} + - {first: Dimitris N., last: Christodoulakis} +dinh-dien: + names: + - {first: Dinh, last: Dien} + - {first: Dien, last: Dinh} +diogo-gloria-silva: + names: + - {first: Diogo, last: Glória-Silva} + - {first: Diogo F. C., last: Silva} + orcid: 0000-0002-4420-7455 +dipendra-misra: + names: + - {first: Dipendra, last: Misra} + - {first: Dipendra Kumar, last: Misra} +dipti-misra-sharma: + names: + - {first: Dipti Misra, last: Sharma} + - {first: Dipti, last: Misra Sharma} + - {first: Dipti, last: Sharma} + - {first: Dipti M., last: Sharma} + - {first: Dipti, last: Misra} + - {first: Dipti M, last: Sharma} +dirk-von-gruenigen: + names: + - {first: Dirk, last: Von Gruenigen} + - {first: Dirk, last: von Grünigen} +djame-seddah: + names: + - {first: Djamé, last: Seddah} + - {first: Djame, last: Seddah} +djamel-mostefa: + names: + - {first: Djamel, last: Mostefa} + - {first: D., last: Mostefa} +do-dat-tran: + names: + - {first: Do-Dat, last: Tran} + - {first: Do Dat, last: Tran} +dogan-can: + names: + - {first: Doğan, last: Can} + - {first: Dogan, last: Can} +doina-tatar: + names: + - {first: Doina, last: Tatar} + - {first: Doina, last: Tătar} +dolors-catala: + names: + - {first: Dolors, last: Català} + - {first: Dolors, last: Catala} +domenico-proietti: + names: + - {first: Domenico, last: Proietti} + - {first: D., last: Proietti} +dominic-r-jones: + names: + - {first: Dominic R., last: Jones} + - {first: Dominic, last: Jones} +dominique-estival: + names: + - {first: Dominique, last: Estival} + - {first: D, last: Estival} + - {first: D., last: Estival} +dominique-vaufreydaz: + names: + - {first: Dominique, last: Vaufreydaz} + - {first: D., last: Vaufreydaz} +donald-c-comeau: + names: + - {first: Donald C., last: Comeau} + - {first: Don, last: Comeau} + - {first: Donald, last: Comeau} + - {first: Donald C, last: Comeau} +donald-hindle: + names: + - {first: Donald, last: Hindle} + - {first: Don, last: Hindle} + - {first: D., last: Hindle} +dong-cheng-hu: + names: + - {first: Dong Cheng, last: Hu} + - {first: Dong-Cheng, last: Hu} +dong-il-kim: + names: + - {first: Dong-Il, last: Kim} + - {first: Dong-il, last: Kim} +dong-yang: + names: + - {first: Dong, last: Yang} + - {first: D., last: Yang} +dongfeng-cai: + names: + - {first: Dongfeng, last: Cai} + - {first: DongFeng, last: Cai} +donghong-ji: + names: + - {first: Donghong, last: Ji} + - {first: DongHong, last: Ji} + - {first: Dong-Hong, last: Ji} + - {first: Dong Hong, last: Ji} +donghun-lee-kb: + comment: Kakao Brain + names: + - {first: Donghun, last: Lee} +donghun-lee-ku: + comment: Korea University + names: + - {first: Donghun, last: Lee} +dongsheng-li-fudan: + comment: Fudan + disable_name_matching: true + names: + - {first: Dongsheng, last: Li} + orcid: 0000-0003-3103-8442 +donia-scott: + names: + - {first: Donia, last: Scott} + - {first: Donia R., last: Scott} + - {first: D, last: Scott} +donna-byron: + names: + - {first: Donna, last: Byron} + - {first: Donna K., last: Byron} + - {first: D., last: Byron} +donna-gates: + names: + - {first: Donna, last: Gates} + - {first: Donna M., last: Gates} +doroteo-t-toledano: + names: + - {first: Doroteo T., last: Toledano} + - {first: Doroteo Torre, last: Toledano} + - {first: Doroteo, last: Toledano} +dorte-haltrup-hansen: + names: + - {first: Dorte Haltrup, last: Hansen} + - {first: Dorte H., last: Hansen} +douglas-appelt: + names: + - {first: Douglas, last: Appelt} + - {first: Douglas E., last: Appelt} + - {first: Doug, last: Appelt} +douglas-b-moran: + names: + - {first: Douglas B., last: Moran} + - {first: Douglas, last: Moran} + - {first: Doug, last: Moran} +douglas-jones: + names: + - {first: Douglas, last: Jones} + - {first: Douglas A., last: Jones} + - {first: Doug, last: Jones} +douglas-mckee: + names: + - {first: Douglas, last: McKee} + - {first: Doug, last: McKee} +douglas-oshaughnessy: + names: + - {first: Douglas, last: O’Shaughnessy} + - {first: Douglas D., last: O’Shaughnessy} + - {first: D., last: O’Shaughnessy} +douglas-w-oard: + names: + - {first: Douglas W., last: Oard} + - {first: Douglas, last: Oard} + - {first: Doug, last: Oard} +douglass-cutting: + names: + - {first: Douglass, last: Cutting} + - {first: Doug, last: Cutting} +dragomir-radev: + names: + - {first: Dragomir, last: Radev} + - {first: Dragomir R., last: Radev} +dragos-stefan-munteanu: + names: + - {first: Dragos Stefan, last: Munteanu} + - {first: Dragos, last: Munteanu} +drahomira-johanka-spoustova: + names: + - {first: Drahomíra “johanka”, last: Spoustová} + - {first: Johanka, last: Spoustová} + - {first: Drahomíra „johanka“, last: Spoustová} +duane-e-olawsky: + names: + - {first: Duane E., last: Olawsky} + - {first: Duane, last: Olawsky} +duc-vu-tran: + names: + - {first: Duc-Vu, last: Tran} + - {first: Vu Duc, last: Tran} +dusan-varis: + names: + - {first: Dusan, last: Varis} + - {first: Dušan, last: Variš} +dustin-hillard: + names: + - {first: Dustin, last: Hillard} + - {first: D., last: Hillard} +e-dario-gutierrez: + names: + - {first: E. Dario, last: Gutierrez} + - {first: Elkin, last: Darío Gutiérrez} + - {first: E. Darío, last: Gutiérrez} +e-gabriela-garza: + names: + - {first: E. Gabriela, last: Garza} + - {first: Gabriela, last: Garza} +eckhard-bick: + names: + - {first: Eckhard, last: Bick} + - {first: E., last: Bick} +eddie-antonio-santos: + names: + - {first: Eddie Antonio, last: Santos} + - {first: Eddie A., last: Santos} + - {first: Eddie, last: Antonio Santos} + - {first: Eddie, last: Santos} + orcid: 0000-0001-5337-715X +edgar-gonzalez-pellicer: + names: + - {first: Edgar, last: Gonzàlez Pellicer} + - {first: Edgar, last: Gonzàlez} +edmund-grimley-evans: + names: + - {first: Edmund Grimley, last: Evans} + - {first: Edmund, last: Grimley-Evans} +edmund-yu: + names: + - {first: Edmund, last: Yu} + - {first: Edmund S., last: Yu} +edouard-geoffrois: + names: + - {first: Edouard, last: Geoffrois} + - {first: E., last: Geoffrois} +edouard-grave: + names: + - {first: Édouard, last: Grave} + - {first: Edouard, last: Grave} +eduard-hovy: + names: + - {first: Eduard, last: Hovy} + - {first: Eduard H., last: Hovy} + - {first: Ed, last: Hovy} +eduardo-lleida: + names: + - {first: Eduardo, last: Lleida} + - {first: Eduardo, last: LLeida} +eduardo-r-banga: + names: + - {first: Eduardo R., last: Banga} + - {first: Eduardo, last: R. Banga} + - {first: Eduardo Rodríguez, last: Banga} +edward-collins: + names: + - {first: Edward, last: Collins} + - {first: Ed, last: Collins} +edward-grefenstette: + names: + - {first: Edward, last: Grefenstette} + - {first: E., last: Grefenstette} +edward-hoenkamp: + names: + - {first: Edward, last: Hoenkamp} + - {first: Eduard, last: Hoenkamp} +edward-schofield: + names: + - {first: Edward, last: Schofield} + - {first: Ed, last: Schofield} +edward-stabler: + names: + - {first: Edward, last: Stabler} + - {first: Edward P., last: 'Stabler, Jr.'} + - {first: Edward P., last: Stabler} +edward-w-d-whittaker: + names: + - {first: Edward W. D., last: Whittaker} + - {first: E.W.D., last: Whittaker} +edward-y-chang: + names: + - {first: Edward Y., last: Chang} + - {first: Edward, last: Chang} +efstathios-stamatatos: + names: + - {first: Efstathios, last: Stamatatos} + - {first: E., last: Stamatatos} +efsun-sarioglu-kayi: + names: + - {first: Efsun, last: Sarioglu Kayi} + - {first: Efsun, last: Sarioglu} +egidio-giachin: + names: + - {first: Egidio, last: Giachin} + - {first: E., last: Giachin} +egidio-l-terra: + names: + - {first: Egidio L., last: Terra} + - {first: Egidio, last: Terra} +egon-stemle: + names: + - {first: Egon, last: Stemle} + - {first: Egon W., last: Stemle} +ehsan-mohammady-ardehaly: + names: + - {first: Ehsan, last: Mohammady Ardehaly} + - {first: Ehsan, last: Mohammady} +eiichiro-sumita: + names: + - {first: Eiichiro, last: Sumita} + - {first: Eiichro, last: Sumita} +eileen-fitzpatrick: + names: + - {first: Eileen, last: Fitzpatrick} + - {first: E., last: Fitzpatrick} +eirikur-rognvaldsson: + names: + - {first: Eirikur, last: Rögnvaldsson} + - {first: Eiríkur, last: Rögnvaldsson} +ekaterina-v-rakhilina: + names: + - {first: Ekaterina V., last: Rakhilina} + - {first: Ekaterina, last: Rakhilina} +elaine-omahony: + names: + - {first: Elaine, last: O′Mahony} + - {first: Elaine, last: O’Mahony} +elaine-rich: + names: + - {first: Elaine, last: Rich} + - {first: Elaine A., last: Rich} +elaine-ui-dhonnchadha: + names: + - {first: Elaine, last: Uí Dhonnchadha} + - {first: E., last: Uí Dhonnchadha} +eldon-g-lytle: + names: + - {first: Eldon G., last: Lytle} + - {first: Eldon G., last: Lytel} +elena-paskaleva: + names: + - {first: Elena, last: Paskaleva} + - {first: Elena, last: Pascaleva} +eleni-koutsogeorgos: + names: + - {first: Eleni, last: Koutsogeorgos} + - {first: E., last: Koutsogeorgos} +eli-goldberg: + names: + - {first: Eli, last: Goldberg} + - {first: E., last: Goldberg} +elina-desipri: + names: + - {first: Elina, last: Desipri} + - {first: Elina, last: Desypri} + - {first: E., last: Desipri} +elisa-bertino: + names: + - {first: Elisa, last: Bertino} + - {first: E., last: Bertino} +elisabet-comelles: + names: + - {first: Elisabet, last: Comelles} + - {first: E., last: Comelles} +elisabeth-ahlsen: + names: + - {first: Elisabeth, last: Ahlsén} + - {first: Elisabeth, last: Ahlsen} +elisabeth-andre: + names: + - {first: Elisabeth, last: Andre} + - {first: Elisabeth, last: André} +elisabeth-delais-roussarie: + names: + - {first: Elisabeth, last: Delais-Roussarie} + - {first: Élisabeth, last: Delais-Roussarie} +elisabeth-dhalleweyn: + names: + - {first: Elisabeth, last: D’Halleweyn} + - {first: Elizabeth, last: D’Halleweyn} +elisabeth-frasnelli: + names: + - {first: Elisabeth, last: Frasnelli} + - {first: E., last: Frasnelli} +elisabeth-maier: + names: + - {first: Elisabeth, last: Maier} + - {first: Elisabeth, last: Mager} +elizabeth-a-hinkelman: + names: + - {first: Elizabeth A., last: Hinkelman} + - {first: Elizabeth, last: Hinkelman} +elizabeth-bishop: + names: + - {first: Elizabeth, last: Bishop} + - {first: E., last: Bishop} +elizabeth-c-botha: + names: + - {first: Elizabeth C., last: Botha} + - {first: E.C., last: Botha} +elizabeth-d-liddy: + names: + - {first: Elizabeth D., last: Liddy} + - {first: Elizabeth, last: Liddy} +elizabeth-merkhofer: + names: + - {first: Elizabeth, last: Merkhofer} + - {first: Elizabeth M., last: Merkhofer} +elizabeth-schroeder: + names: + - {first: Elizabeth, last: Schroeder} + - {first: Elizabeth Schroeder, last: Richerson} + - {first: Elizabeth, last: Richerson} +elizabeth-shriberg: + names: + - {first: Elizabeth, last: Shriberg} + - {first: E., last: Shriberg} +elizaveta-loginova-clouet: + names: + - {first: Elizaveta, last: Loginova-Clouet} + - {first: Elizaveta, last: Clouet} +ellen-campana: + names: + - {first: Ellen, last: Campana} + - {first: E., last: Campana} +ellen-douglas-cowie: + names: + - {first: Ellen, last: Douglas-Cowie} + - {first: E., last: Douglas-Cowie} +ellen-k-dodge: + names: + - {first: Ellen K., last: Dodge} + - {first: Ellen, last: Dodge} +ellen-m-voorhees: + names: + - {first: Ellen M., last: Voorhees} + - {first: Ellen, last: Voorhees} +ellen-riloff: + names: + - {first: Ellen, last: Riloff} + - {first: E., last: Riloff} +elliott-franco-drabek: + names: + - {first: Elliott Franco, last: Drabek} + - {first: Elliott, last: Drabek} + - {first: Elliott, last: Drábek} +elliott-moreton: + names: + - {first: Elliott, last: Moreton} + - {first: Elliot, last: Moreton} +elmar-noth: + names: + - {first: Elmar, last: Nöth} + - {first: Elmar, last: Noth} + - {first: E., last: Nöth} +elvira-i-sicilia-garcia: + names: + - {first: Elvira I., last: Sicilia-Garcia} + - {first: E.I., last: Sicilia-Garcia} + - {first: E. I., last: Sicilia-Garcia} +ely-edison-da-silva-matos: + names: + - {first: Ely Edison da Silva, last: Matos} + - {first: Ely, last: Matos} + - {first: Ely E. S., last: Matos} +emanuela-boros: + names: + - {first: Emanuela, last: Boroş} + - {first: Emanuela, last: Boroș} + - {first: Emanuela, last: Boros} +emanuele-pianta: + names: + - {first: Emanuele, last: Pianta} + - {first: E., last: Pianta} +emerson-cabrera-paraiso: + names: + - {first: Emerson Cabrera, last: Paraiso} + - {first: Emerson, last: Paraiso} +emiel-krahmer: + names: + - {first: Emiel, last: Krahmer} + - {first: Emiel J., last: Krahmer} +emiliano-giovannetti: + names: + - {first: Emiliano, last: Giovannetti} + - {first: Emiliano, last: Giovanetti} +emiliano-raul-guevara: + names: + - {first: Emiliano Raul, last: Guevara} + - {first: Emiliano, last: Guevara} +emilio-sanchis: + names: + - {first: Emilio, last: Sanchis} + - {first: Emilio, last: Sanchís} +emily-m-bender: + names: + - {first: Emily M., last: Bender} + - {first: Emily, last: Bender} +emily-prudhommeaux: + names: + - {first: Emily, last: Prud’hommeaux} + - {first: Emily T., last: Prud’hommeaux} + - {first: Emily, last: Prud'hommeaux} +emina-kurtic: + names: + - {first: Emina, last: Kurtić} + - {first: Emina, last: Kurtic} +eneida-a-mendonca: + names: + - {first: Eneida A., last: Mendonca} + - {first: Eneida, last: Mendonca} +eneko-agirre: + names: + - {first: Eneko, last: Agirre} + - {first: E., last: Agirre} +eneldo-loza-mencia: + names: + - {first: Eneldo, last: Loza Mencía} + - {first: Eneldo Loza, last: Mencía} +enrico-zovato: + names: + - {first: Enrico, last: Zovato} + - {first: E., last: Zovato} +enrique-amigo: + names: + - {first: Enrique, last: Amigó} + - {first: Enrique, last: Amigo} +enrique-henestroza-anguiano: + names: + - {first: Enrique, last: Henestroza Anguiano} + - {first: Enrique Henestroza, last: Anguiano} +enrique-manjavacas: + names: + - {first: Enrique, last: Manjavacas} + - {first: Enrique, last: Manjavacas Arevalo} +enrique-vidal: + names: + - {first: Enrique, last: Vidal} + - {first: E., last: Vidal} +eraldo-fernandes: + names: + - {first: Eraldo, last: Fernandes} + - {first: Eraldo Rezende, last: Fernandes} +ergina-kavallieratou: + names: + - {first: Ergina, last: Kavallieratou} + - {first: E., last: Kavallieratou} +ergun-bicici: + names: + - {first: Ergun, last: Bicici} + - {first: Ergun, last: Biçici} +erhard-hinrichs: + names: + - {first: Erhard, last: Hinrichs} + - {first: Erhard W., last: Hinrichs} +eric-atwell: + names: + - {first: Eric, last: Atwell} + - {first: Eric Steven, last: Atwell} + - {first: Eric S., last: Atwell} +eric-bilinski: + names: + - {first: Eric, last: Bilinski} + - {first: Éric, last: Bilinski} +eric-breck: + names: + - {first: Eric, last: Breck} + - {first: Eric J., last: Breck} +eric-castelli: + names: + - {first: Eric, last: Castelli’} + - {first: Eric, last: Castelli} +eric-charton: + names: + - {first: Eric, last: Charton} + - {first: Éric, last: Charton} +eric-fosler-lussier: + names: + - {first: Eric, last: Fosler-Lussier} + - {first: J. Eric, last: Fosler} + - {first: Eric, last: Fosler} +eric-gaussier: + names: + - {first: Eric, last: Gaussier} + - {first: Éric, last: Gaussier} +eric-h-huang: + names: + - {first: Eric H., last: Huang} + - {first: Eric, last: Huang} +eric-jackson: + names: + - {first: Eric, last: Jackson} + - {first: E., last: Jackson} +eric-laporte: + names: + - {first: Eric, last: Laporte} + - {first: Éric, last: Laporte} +eric-nyberg: + names: + - {first: Eric, last: Nyberg} + - {first: Eric H., last: Nyberg III} + - {first: Eric H., last: Nyberg} + - {first: Eric H., last: 'Nyberg, 3rd'} +eric-ringger: + names: + - {first: Eric, last: Ringger} + - {first: Eric K., last: Ringger} +eric-sadit-tellez: + names: + - {first: Eric Sadit, last: Tellez} + - {first: Eric S., last: Tellez} +eric-sven-ristad: + names: + - {first: Eric Sven, last: Ristad} + - {first: Eric, last: Ristad} +eric-villemonte-de-la-clergerie: + names: + - {first: Éric, last: Villemonte de la Clergerie} + - {first: Eric, last: Villemonte de la Clergerie} + - {first: Eric, last: de la Clergerie} + - {first: Eric, last: de La Clergerie} + - {first: Éric, last: de La Clergerie} + - {first: Éric, last: de la Clergerie} + - {first: Éric, last: Villemonte de La Clergerie} +eric-wehrli: + names: + - {first: Eric, last: Wehrli} + - {first: Éric, last: Wehrli} +eric-xing: + names: + - {first: Eric, last: Xing} + - {first: Eric P., last: Xing} +erich-h-steiner: + names: + - {first: Erich H., last: Steiner} + - {first: Erich, last: Steiner} +erick-alphonse: + names: + - {first: Erick, last: Alphonse} + - {first: E., last: Alphonse} +erick-fonseca: + names: + - {first: Erick, last: Fonseca} + - {first: Erick Rocha, last: Fonseca} + - {first: Erick R., last: Fonseca} +erick-galani-maziero: + names: + - {first: Erick Galani, last: Maziero} + - {first: Erick, last: Maziero} +erik-baert: + names: + - {first: Erik, last: Baert} + - {first: E., last: Baert} +erik-tjong-kim-sang: + names: + - {first: Erik, last: Tjong Kim Sang} + - {first: Erik F., last: Tjong Kim Sang} +erik-van-der-goot: + names: + - {first: Erik, last: van der Goot} + - {first: Erik, last: Van der Goot} +erik-van-mulligen: + names: + - {first: Erik, last: van Mulligen} + - {first: Erik M., last: van Mulligen} +ernst-buchberger: + names: + - {first: Ernst, last: Buchberger} + - {first: E., last: Buchberger} +ertugrul-yilmaz: + names: + - {first: Ertugrul, last: Yılmaz} + - {first: Ertuğrul, last: Yilmaz} + - {first: Ertuǧrul, last: Yılmaz} +esa-nelimarkka: + names: + - {first: Esa, last: Nelimarkka} + - {first: E., last: Nelimarkka} +esmeralda-manandise: + names: + - {first: Esmeralda, last: Manandise} + - {first: Esme, last: Manandise} +esref-adali: + names: + - {first: Esref, last: Adali} + - {first: Eşref, last: Adalı} + - {first: Eşref, last: Adali} +estela-saquete: + names: + - {first: Estela, last: Saquete} + - {first: Estela, last: Saquete Boro} + - {first: E., last: Saquete} +estevam-r-hruschka-jr: + names: + - {first: Estevam R., last: 'Hruschka, Jr.'} + - {first: Estevam R., last: Hruschka Jr.} +esther-klabbers: + names: + - {first: Esther, last: Klabbers} + - {first: E., last: Klabbers} +esther-ratsch: + names: + - {first: Esther, last: Ratsch} + - {first: E., last: Ratsch} +ethan-selfridge: + names: + - {first: Ethan, last: Selfridge} + - {first: Ethan O., last: Selfridge} +eugenio-martinez-camara: + names: + - {first: Eugenio, last: Martínez-Cámara} + - {first: Eugenio, last: Martinez Camara} +eun-kyung-kim: + names: + - {first: Eun-kyung, last: Kim} + - {first: Eun-Kyung, last: Kim} +eun-suk-yang: + names: + - {first: Eun-Suk, last: Yang} + - {first: Eunsuk, last: Yang} +eun-young-ha: + names: + - {first: Eun Young, last: Ha} + - {first: Eun, last: Ha} + - {first: Eun Y., last: Ha} +eva-ejerhed: + names: + - {first: Eva, last: Ejerhed} + - {first: Eva I., last: Ejerhed} +eva-fucikova: + names: + - {first: Eva, last: Fucikova} + - {first: Eva, last: Fučíková} +eva-hajicova: + names: + - {first: Eva, last: Hajicova} + - {first: Eva, last: Hajicová} + - {first: Eva, last: Hajičová} + - {first: E., last: Hajicova} +eva-maria-vecchi: + names: + - {first: Eva Maria, last: Vecchi} + - {first: Eva, last: Vecchi} +eva-martinez-garcia: + names: + - {first: Eva, last: Martínez Garcia} + - {first: Eva Martínez, last: Garcia} +eva-navas: + names: + - {first: Eva, last: Navas} + - {first: E., last: Navas} +evandro-b-fonseca: + names: + - {first: Evandro B., last: Fonseca} + - {first: Evandro, last: Fonseca} +evelyne-tzoukermann: + names: + - {first: Evelyne, last: Tzoukermann} + - {first: E., last: Tzoukermann} +evgeny-stepanov: + names: + - {first: Evgeny, last: Stepanov} + - {first: Evgeny A., last: Stepanov} +ezra-black: + names: + - {first: Ezra, last: Black} + - {first: Ezra W., last: Black} + - {first: E., last: Black} +f-y-august-chao: + names: + - {first: F. Y. August, last: Chao} + - {first: F.Y. August, last: Chao} +fabian-suchanek: + names: + - {first: Fabian, last: Suchanek} + - {first: Fabian M., last: Suchanek} +fabien-cromieres: + names: + - {first: Fabien, last: Cromieres} + - {first: Fabien, last: Cromières} +fabio-a-gonzalez: + names: + - {first: Fabio A., last: González} + - {first: Fabio, last: González} +fabio-ciravegna: + names: + - {first: Fabio, last: Ciravegna} + - {first: F., last: Ciravegna} +fabio-kepler: + names: + - {first: Fabio, last: Kepler} + - {first: Fabio N., last: Kepler} + - {first: Fabio Natanael, last: Kepler} + - {first: F., last: Kepler} +fabio-massimo-zanzotto: + names: + - {first: Fabio Massimo, last: Zanzotto} + - {first: Fabio, last: Massimo Zanzotto} + - {first: Fabio, last: Zanzotto} + - {first: F., last: Zanzotto} +fabio-pianesi: + names: + - {first: Fabio, last: Pianesi} + - {first: F., last: Pianesi} +fabio-tamburini: + names: + - {first: Fabio, last: Tamburini} + - {first: F., last: Tamburini} +fabrice-lefevre: + names: + - {first: Fabrice, last: Lefèvre} + - {first: Fabrice, last: Lefevre} + - {first: F., last: Lefevre} +fahad-khan: + names: + - {first: Fahad, last: Khan} + - {first: Anas Fahad, last: Khan} +faisal-ahmad: + names: + - {first: Faisal, last: Ahmad} + - {first: Faisal, last: Ahmed} +faiza-elkateb-gara: + names: + - {first: Faiza, last: Elkateb-Gara} + - {first: Faiza, last: Gara} + - {first: Faïza, last: Elkateb-Gara} +fang-fang-zhang: + names: + - {first: Fang-Fang, last: Zhang} + - {first: Fangfang, last: Zhang} +fang-zheng: + names: + - {first: Fang, last: Zheng} + - {first: Thomas Fang, last: Zheng} +fangfang-feng: + names: + - {first: Fangfang, last: Feng} + - {first: F., last: Feng} +farah-benamara: + names: + - {first: Farah, last: Benamara} + - {first: Farah, last: Beanamara} + - {first: Farah, last: Benamara Zitoune} +fatemeh-torabi-asr: + names: + - {first: Fatemeh, last: Torabi Asr} + - {first: Fatemeh Torabi, last: Asr} +federico-prat: + names: + - {first: Federico, last: Prat} + - {first: F., last: Prat} +fei-liu-gga: + comment: Google Assistant + names: + - {first: Fei, last: Liu} +fei-liu-unimelb: + comment: University of Melbourne + names: + - {first: Fei, last: Liu} +fei-liu-utdallas: + comment: UT Dallas, Bosch, CMU, University of Central Florida, Emory University + names: + - {first: Fei, last: Liu} +felicia-koerner: + names: + - {first: Felicia, last: Körner} + - {first: Felicia, last: Koerner} +felix-dreizin: + names: + - {first: Felix, last: Dreizin} + - {first: F., last: Dreizin} +feng-ju-lo: + names: + - {first: Feng-Ju, last: Lo} + - {first: Fengju, last: Lo} +feng-long-huang: + names: + - {first: Feng-Long, last: Huang} + - {first: Feng-Long, last: Hwang} +feng-yi-chen: + names: + - {first: Feng-Yi, last: Chen} + - {first: Feng-yi, last: Chen} +ferhan-ture: + names: + - {first: Ferhan, last: Türe} + - {first: Ferhan, last: Ture} +fernando-c-n-pereira: + names: + - {first: Fernando C. N., last: Pereira} + - {first: Fernando C.N., last: Pereira} +fernando-fernandez-martinez: + names: + - {first: Fernando, last: Fernández-Martínez} + - {first: Fernando Fernández, last: Martínez} +fernando-garcia: + names: + - {first: Fernando, last: Garcia} + - {first: Fernando, last: García-Granada} + - {first: Fernando, last: García} +fernando-llopis: + names: + - {first: Fernando, last: Llopis} + - {first: Fernando, last: LLopis} +fernando-martinez-santiago: + names: + - {first: Fernando, last: Martínez-Santiago} + - {first: Fernando, last: Martínez Santiago} +fernando-martins: + names: + - {first: Fernando, last: Martins} + - {first: F., last: Martins} +fernando-perdigao: + names: + - {first: Fernando, last: Perdigão} + - {first: Fernando S., last: Perdigão} +fernando-sanchez-leon: + names: + - {first: Fernando Sánchez, last: León} + - {first: Fernando, last: Sánchez} +fiammetta-namer: + names: + - {first: Fiammetta, last: Namer} + - {first: Fiametta, last: Namer} +fidelia-ibekwe-sanjuan: + names: + - {first: Fidelia, last: Ibekwe-SanJuan} + - {first: Fidelia, last: Ibekwe-Sanjuan} +fil-alleva: + names: + - {first: Fil, last: Alleva} + - {first: Fileno, last: Alleva} + - {first: F., last: Alleva} +filip-gralinski: + names: + - {first: Filip, last: Gralinski} + - {first: Filip, last: Graliński} +filip-jurcicek: + names: + - {first: Filip, last: Jurcicek} + - {first: Filip, last: Jurčíček} +finley-lacatusu: + names: + - {first: Finley, last: Lacatusu} + - {first: V. Finley, last: Lacatusu} +fintan-j-costello: + names: + - {first: Fintan J., last: Costello} + - {first: Fintan, last: Costello} +flavio-massimiliano-cecchini: + names: + - {first: Flavio, last: Massimiliano Cecchini} + - {first: Flavio Massimiliano, last: Cecchini} +flora-yu-fang-wang: + names: + - {first: Flora Yu-Fang, last: Wang} + - {first: Yu-Fang, last: Wang} +florence-reeder: + names: + - {first: Florence, last: Reeder} + - {first: Florence M., last: Reeder} +florin-bulgarov: + names: + - {first: Florin, last: Bulgarov} + - {first: Florin Adrian, last: Bulgarov} +folkert-de-vriend: + names: + - {first: Folkert, last: de Vriend} + - {first: F., last: De Vriend} + - {first: F., last: de Vriend} +forrest-bao: + names: + - {first: Forrest, last: Bao} + - {first: Forrest Sheng, last: Bao} +france-mihelic: + names: + - {first: France, last: Mihelic} + - {first: France, last: Mihelič} +francesc-ribas: + names: + - {first: Francesc, last: Ribas} + - {first: Francesc Ribas, last: Framis} +francesco-piazza: + names: + - {first: Francesco, last: Piazza} + - {first: F., last: Piazza} +francine-chen: + names: + - {first: Francine, last: Chen} + - {first: Francine R., last: Chen} +francis-j-smith: + names: + - {first: Francis J., last: Smith} + - {first: F. J., last: Smith} + - {first: F J, last: Smith} +francis-kubala: + names: + - {first: Francis, last: Kubala} + - {first: F., last: Kubala} +francis-tyers: + names: + - {first: Francis, last: Tyers} + - {first: Francis M., last: Tyers} +francisco-campillo: + names: + - {first: Francisco, last: Campillo} + - {first: Francisco Campillo, last: Díaz} +francisco-casacuberta: + names: + - {first: Francisco, last: Casacuberta} + - {first: F., last: Casacuberta} +francisco-guzman: + names: + - {first: Francisco, last: Guzmán} + - {first: Francisco, last: Guzman} +francisco-javier-gonzalez-castano: + names: + - {first: Francisco Javier, last: González-Castaño} + - {first: Francisco J., last: González-Castaño} +francisco-m-couto: + names: + - {first: Francisco M., last: Couto} + - {first: Francisco, last: Couto} +franco-turini: + names: + - {first: Franco, last: Turini} + - {first: F., last: Turini} +francois-barthelemy: + names: + - {first: François, last: Barthélemy} + - {first: Francois, last: Barthelemy} +francois-levy: + names: + - {first: François, last: Lévy} + - {first: François, last: Levy} +francois-mairesse: + names: + - {first: François, last: Mairesse} + - {first: Francois, last: Mairesse} +francois-michel-lang: + names: + - {first: Francois-Michel, last: Lang} + - {first: Francois M., last: Lang} +francois-rousselot: + names: + - {first: François, last: Rousselot} + - {first: Francois, last: Rousselot} + - {first: F., last: Rousselot} +frank-f-xu: + names: + - {first: Frank F., last: Xu} + - {first: Frank, last: Xu} +frank-henrik-muller: + names: + - {first: Frank Henrik, last: Müller} + - {first: Frank H., last: Müller} + - {first: Frank, last: Müller} +frank-k-soong: + names: + - {first: Frank K., last: Soong} + - {first: Frank, last: Soong} +frank-smadja: + names: + - {first: Frank, last: Smadja} + - {first: Frank A., last: Smadja} +frank-van-eynde: + names: + - {first: Frank Van, last: Eynde} + - {first: Frank, last: van Eynde} + - {first: Frank, last: Van Eynde} +franz-guenthner: + names: + - {first: Franz, last: Guenthner} + - {first: F., last: Guenthner} +franz-josef-och: + names: + - {first: Franz Josef, last: Och} + - {first: Franz J., last: Och} + - {first: Franz, last: Och} + - {first: F. J., last: Och} +fred-damerau: + names: + - {first: Fred, last: Damerau} + - {first: Fred J., last: Damerau} +freda-shi: + names: + - {first: Freda, last: Shi} + - {first: Haoyue, last: Shi} + orcid: 0009-0009-5697-449X +frederic-bechet: + names: + - {first: Frederic, last: Bechet} + - {first: Frédéric, last: Bechét} + - {first: Frédéric, last: Béchet} + - {first: Frederic, last: Béchet} + - {first: F., last: Bechet} +frederic-blain: + names: + - {first: Frédéric, last: Blain} + - {first: Frederic, last: Blain} +frederic-elisei: + names: + - {first: Frédéric, last: Eliséi} + - {first: Frederic, last: Elisei} +frederic-mailhot: + names: + - {first: Frederic, last: Mailhot} + - {first: Fred, last: Mailhot} + - {first: Frédéric, last: Mailhot} +frederic-meunier: + names: + - {first: Frédéric, last: Meunier} + - {first: Frederic, last: Meunier} +frederick-jelinek: + names: + - {first: Frederick, last: Jelinek} + - {first: Fred, last: Jelinek} + - {first: Fredrick, last: Jelinek} + - {first: F., last: Jelinek} +frederick-m-hoyt: + names: + - {first: Frederick M., last: Hoyt} + - {first: Frederick, last: Hoyt} +frederick-reiss: + names: + - {first: Frederick, last: Reiss} + - {first: Frederick R., last: Reiss} +frederique-laforest: + names: + - {first: Frederique, last: Laforest} + - {first: Frédérique, last: Laforest} +frederique-segond: + names: + - {first: Frédérique, last: Segond} + - {first: Frederique, last: Segond} +fredy-a-amaya: + names: + - {first: Fredy A., last: Amaya} + - {first: F., last: Amaya} +fumito-masui: + names: + - {first: Fumito, last: Masui} + - {first: F., last: Masui} +g-bowden-wise: + names: + - {first: G. Bowden, last: Wise} + - {first: Bowden, last: Wise} +g-edward-barton: + names: + - {first: G. Edward, last: Barton} + - {first: G. Edward, last: 'Barton, Jr.'} +gabor-proszeky: + names: + - {first: Gabor, last: Proszeky} + - {first: Gábor, last: Prószéky} + - {first: Gabor, last: Prbszeky} +gabriel-g-bes: + names: + - {first: Gabriel G., last: Bes} + - {first: Gabriel G., last: Bès} + - {first: Gabriel, last: Bès} + - {first: G.G., last: Bes} +gabriel-lopes: + names: + - {first: Gabriel, last: Lopes} + - {first: Jose Gabriel P., last: Lopes} + - {first: Jose Gabriel, last: Lopes} + - {first: Gabriel P., last: Lopes} + - {first: José Gabriel Pereira, last: Lopes} + - {first: Gabriel, last: Pereira Lopes} + - {first: Gabriel Pereira, last: Lopes} +gabriela-cavaglia: + names: + - {first: Gabriela, last: Cavaglià} + - {first: Gabriela, last: Cavaglia} +gabriela-serban: + names: + - {first: Gabriela, last: Şerban} + - {first: Gabriela, last: Serban} +gabriele-musillo: + names: + - {first: Gabriele, last: Musillo} + - {first: Gabriele Antonio, last: Musillo} +gael-dias: + names: + - {first: Gaël, last: Dias} + - {first: Gael, last: Dias} + - {first: Gäel, last: Dias} +gaelle-ferre: + names: + - {first: Gaëlle, last: Ferré} + - {first: Gaelle, last: Ferré} +gail-m-keenan: + names: + - {first: Gail M., last: Keenan} + - {first: Gail, last: Keenan} + - {first: Gail M, last: Keenan} +gaja-jarosz: + names: + - {first: Gaja, last: Jarosz} + - {first: Gaja E., last: Jarosz} +gareth-j-f-jones: + names: + - {first: Gareth J. F., last: Jones} + - {first: Gareth J.F., last: Jones} +gary-g-hendrix: + names: + - {first: Gary G., last: Hendrix} + - {first: Gary, last: Hendrix} +gary-geunbae-lee: + names: + - {first: Gary Geunbae, last: Lee} + - {first: Geunbae, last: Lee} +gary-k-k-chan: + names: + - {first: Gary K. K., last: Chan} + - {first: G. K. K., last: Chan} +gaston-burek: + names: + - {first: Gaston, last: Burek} + - {first: Gaston G., last: Burek} +gaurav-singh-tomar: + names: + - {first: Gaurav Singh, last: Tomar} + - {first: Gaurav, last: Singh} +gauri-shankar-gautam: + names: + - {first: Gauri Shankar, last: Gautam} + - {first: Gauri, last: S. Gautam} +geert-adriaens: + names: + - {first: Geert, last: Adriaens} + - {first: G., last: Adriaens} +geert-jan-m-kruijff: + names: + - {first: Geert-Jan M., last: Kruijff} + - {first: Geert-Jan, last: Kruijff} +gema-ramirez-sanchez: + names: + - {first: Gema, last: Ramírez-Sánchez} + - {first: Gema, last: Ramírez} +gemma-bel-enguix: + names: + - {first: Gemma, last: Bel-Enguix} + - {first: Gemma Bel, last: Enguix} + - {first: Gemma, last: Bel Enguix} +gemma-boleda: + names: + - {first: Gemma, last: Boleda} + - {first: Gemma, last: Boleda Torrent} +gen-ichiro-kikui: + names: + - {first: Gen-ichiro, last: Kikui} + - {first: Gen’ichiro, last: Kikui} +genevieve-gorrell: + names: + - {first: Genevieve, last: Gorrell} + - {first: G., last: Gorrell} +genta-indra-winata: + names: + - {first: Genta Indra, last: Winata} + - {first: Genta, last: Winata} +geoffrey-k-pullum: + names: + - {first: Geoffrey K., last: Pullum} + - {first: Geoffrey, last: Pullum} +geoffrey-zweig: + names: + - {first: Geoffrey, last: Zweig} + - {first: Geoff, last: Zweig} + - {first: G., last: Zweig} +george-aaron-broadwell: + names: + - {first: George Aaron, last: Broadwell} + - {first: Aaron, last: Broadwell} + - {first: G. Aaron, last: Broadwell} +george-anton-kiraz: + names: + - {first: George Anton, last: Kiraz} + - {first: George, last: Kiraz} +george-baker: + names: + - {first: George, last: Baker} + - {first: George Arthur, last: Baker} +george-carayannis: + names: + - {first: George, last: Carayannis} + - {first: G., last: Carayannis} +george-caridakis: + names: + - {first: George, last: Caridakis} + - {first: G., last: Caridakis} +george-chou: + names: + - {first: George, last: Chou} + - {first: G., last: Chou} +george-demetriou: + names: + - {first: George, last: Demetriou} + - {first: George C., last: Demetriou} +george-e-heidorn: + names: + - {first: George E., last: Heidorn} + - {first: G. E., last: Heidorn} +george-kokkinakis: + names: + - {first: George, last: Kokkinakis} + - {first: George K., last: Kokkinakis} + - {first: G., last: Kokkinakis} +george-krupka: + names: + - {first: George, last: Krupka} + - {first: George R., last: Krupka} +george-r-doddington: + names: + - {first: George R., last: Doddington} + - {first: George, last: Doddington} +george-vladutz: + names: + - {first: George, last: Vladutz} + - {first: G., last: Vladutz} +george-zavaliagkos: + names: + - {first: George, last: Zavaliagkos} + - {first: G., last: Zavaliagkos} +georges-antoniadis: + names: + - {first: Georges, last: Antoniadis} + - {first: G., last: Antoniadis} +georges-de-moor: + names: + - {first: Georges, last: De Moor} + - {first: G., last: De Moor} +georges-linares: + names: + - {first: Georges, last: Linarès} + - {first: Georges, last: Linares} +georgiana-dinu: + names: + - {first: Georgiana, last: Dinu} + - {first: G., last: Dinu} +geraldine-damnati: + names: + - {first: Géraldine, last: Damnati} + - {first: Geraldine, last: Damnati} +geraldo-bonorino-xexeo: + names: + - {first: Geraldo Bonorino, last: Xexéo} + - {first: Geraldo, last: Xexéo} +gerard-salton: + names: + - {first: Gerard, last: Salton} + - {first: G., last: Salton} + - {first: G, last: Salton} +gerard-veillon: + names: + - {first: Gerard, last: Veillon} + - {first: G., last: Veillon} +gerardo-sierra: + names: + - {first: Gerardo, last: Sierra} + - {first: Gerardo, last: Sierra-Martínez} +gerda-klimonow: + names: + - {first: Gerda, last: Klimonow} + - {first: G., last: Klimonow} +gerhard-b-van-huyssteen: + names: + - {first: Gerhard B., last: van Huyssteen} + - {first: Gerhard, last: Van Huyssteen} + - {first: Gerhard, last: van Huyssteen} + - {first: Gerhard B, last: van Huyssteen} +gerhard-paass: + names: + - {first: Gerhard, last: Paaß} + - {first: Gerhard, last: Paass} +german-bordel: + names: + - {first: German, last: Bordel} + - {first: Germán, last: Bordel} + - {first: G., last: Bordel} +german-kruszewski: + names: + - {first: Germán, last: Kruszewski} + - {first: German, last: Kruszewski} +german-rigau: + names: + - {first: German, last: Rigau} + - {first: G., last: Rigau} +german-sanchis-trilles: + names: + - {first: Germán, last: Sanchis-Trilles} + - {first: Germán, last: Sanchis Trilles} + - {first: Germán, last: Sanchis} +gertjan-van-noord: + names: + - {first: Gertjan, last: van Noord} + - {first: Gertjan, last: Van Noord} +gholamreza-ghassem-sani: + names: + - {first: Gholamreza, last: Ghassem-Sani} + - {first: Gholamreza, last: Ghassem-sani} + - {first: Gholamreza, last: Ghasem-Sani} +gholamreza-haffari: + names: + - {first: Gholamreza, last: Haffari} + - {first: Reza, last: Haffari} +giampaolo-mazzini: + names: + - {first: Giampaolo, last: Mazzini} + - {first: G., last: Mazzini} +gian-piero-zarri: + names: + - {first: Gian Piero, last: Zarri} + - {first: G.P., last: Zarri} +giancarlo-salton: + names: + - {first: Giancarlo, last: Salton} + - {first: Giancarlo D., last: Salton} +giang-binh-tran: + names: + - {first: Giang Binh, last: Tran} + - {first: Giang, last: Tran} +gianluca-de-rossi: + names: + - {first: Gianluca, last: De Rossi} + - {first: Gianluca, last: Rossi} +gianluca-e-lebani: + names: + - {first: Gianluca E., last: Lebani} + - {first: Gianluca, last: Lebani} +gianmaria-ajani: + names: + - {first: Gianmaria, last: Ajani} + - {first: G., last: Ajani} +gideon-mann: + names: + - {first: Gideon, last: Mann} + - {first: Gideon S., last: Mann} +gil-chang-kim: + names: + - {first: Gil Chang, last: Kim} + - {first: GilChang, last: Kim} + - {first: Gil-Chang, last: Kim} + - {first: Gilchang, last: Kim} +gildas-menier: + names: + - {first: Gildas, last: Ménier} + - {first: Gildas, last: Menier} +gilles-adda: + names: + - {first: Gilles, last: Adda} + - {first: G., last: Adda} +gilles-boulianne: + names: + - {first: Gilles, last: Boulianne} + - {first: G., last: Boulianne} +gilles-lechenadec: + names: + - {first: Gilles, last: Lechenadec} + - {first: G., last: Lechenadec} +gilles-serasset: + names: + - {first: Gilles, last: Sérasset} + - {first: Gilles, last: Serasset} +gina-anne-levow: + names: + - {first: Gina-Anne, last: Levow} + - {first: Gina, last: Levow} +gintare-grigonyte: + names: + - {first: Gintarė, last: Grigonytė} + - {first: Gintare, last: Grigonyte} + - {first: Gintarė, last: Grigonyte} +giorgio-maria-di-nunzio: + names: + - {first: Giorgio Maria, last: Di Nunzio} + - {first: Giorgio, last: Di Nunzio} +giovanni-adorni: + names: + - {first: Giovanni, last: Adorni} + - {first: G., last: Adorni} +giovanni-battista-varile: + names: + - {first: Giovanni Battista, last: Varile} + - {first: Giovanni B., last: Varile} + - {first: G.B., last: Varile} +giovanni-semeraro: + names: + - {first: Giovanni, last: Semeraro} + - {first: G., last: Semeraro} +giovanni-tummarello: + names: + - {first: Giovanni, last: Tummarello} + - {first: G., last: Tummarello} +girish-nath-jha: + names: + - {first: Girish Nath, last: Jha} + - {first: Girish, last: Jha} +girish-palshikar: + names: + - {first: Girish, last: Palshikar} + - {first: Girish K., last: Palshikar} + - {first: Girish K, last: Palshikar} +gisele-montilha-pinheiro: + names: + - {first: Gisele Montilha, last: Pinheiro} + - {first: Gisele, last: Montilha} +giulia-marchesini: + names: + - {first: Giulia, last: Marchesini} + - {first: Giulia, last: Marchesi} +giuseppe-di-fabbrizio: + names: + - {first: Giuseppe, last: Di Fabbrizio} + - {first: Giuseppe, last: Fabbrizio} +gloria-corpas-pastor: + names: + - {first: Gloria, last: Corpas Pastor} + - {first: Gloria, last: Corpas} + - {first: Gloria Corpas, last: Pastor} +gloria-vazquez: + names: + - {first: Glòria, last: Vázquez} + - {first: Gloria, last: Vázquez} + - {first: Gloria, last: Vazquez} +gokhan-tur: + names: + - {first: Gokhan, last: Tur} + - {first: Gokhan, last: Tür} + - {first: G., last: Tur} +golnar-sheikhshab: + names: + - {first: Golnar, last: Sheikhshab} + - {first: Golnar, last: Sheikhshabbafghi} +goncal-v-garces-diaz-munio: + names: + - {first: Gonçal V., last: Garcés Díaz-Munío} + orcid: 0000-0002-2594-5858 +goran-nenadic: + names: + - {first: Goran, last: Nenadic} + - {first: Goran, last: Nenadić} +gordana-ilic-holen: + names: + - {first: Gordana Ilić, last: Holen} + - {first: Gordana Ilic, last: Holen} +gordon-i-mccalla: + names: + - {first: Gordon I., last: McCalla} + - {first: Gordon, last: McCalla} + - {first: G.I., last: McCalla} +gordon-pace: + names: + - {first: Gordon, last: Pace} + - {first: Gordon J., last: Pace} +gorka-labaka: + names: + - {first: Gorka, last: Labaka} + - {first: G., last: Labaka} +govind-kothari: + names: + - {first: Govind, last: Kothari} + - {first: '', last: Govind} +gozde-gul-sahin: + names: + - {first: Gözde Gül, last: Şahin} + - {first: Gözde, last: Şahin} + - {first: Gözde Gül, last: İşgüder} +gozde-ozbal: + names: + - {first: Gözde, last: Özbal} + - {first: Gozde, last: Ozbal} +grace-chung: + names: + - {first: Grace, last: Chung} + - {first: Grace Y, last: Chung} +graciela-gonzalez: + names: + - {first: Graciela, last: Gonzalez} + - {first: Graciela, last: Gonzalez-Hernandez} +graeme-ritchie: + names: + - {first: Graeme, last: Ritchie} + - {first: Graeme D., last: Ritchie} + - {first: G.D., last: Ritchie} + - {first: G., last: Ritchie} +graham-katz: + names: + - {first: Graham, last: Katz} + - {first: E. Graham, last: Katz} +graham-russell: + names: + - {first: Graham, last: Russell} + - {first: Graham J., last: Russell} + - {first: G.J., last: Russell} +graham-wilcock: + names: + - {first: Graham, last: Wilcock} + - {first: G., last: Wilcock} +grazyna-vetulani: + names: + - {first: Grażyna, last: Vetulani} + - {first: Grazyna, last: Vetulani} +greg-gul-rajani: + names: + - {first: Greg, last: Gul-rajani} + - {first: Greg, last: Gulrajani} +gregoire-moreau-de-montcheuil: + names: + - {first: Grégoire, last: Moreau de Montcheuil} + - {first: Grégoire, last: de Montcheuil} +gregorio-hernandez: + names: + - {first: Gregorio, last: Hernández} + - {first: Gregorio, last: Hernandez} + - {first: G., last: Hernández} +gregory-aist: + names: + - {first: Gregory, last: Aist} + - {first: Greg, last: Aist} +gregory-crane: + names: + - {first: Gregory, last: Crane} + - {first: Gregory R., last: Crane} +gregory-finley: + names: + - {first: Gregory, last: Finley} + - {first: Greg, last: Finley} +gregory-roulet-guiot: + names: + - {first: Grégory, last: Roulet--Guiot} + - {first: Grégory, last: Roulet-Guiot} +gregory-sanders: + names: + - {first: Gregory, last: Sanders} + - {first: Gregory A., last: Sanders} + - {first: Greg, last: Sanders} +gregory-stainhauer: + names: + - {first: Gregory, last: Stainhauer} + - {first: G., last: Stainhauer} + - {first: Gregory, last: Stainhaouer} +greville-c-corbett: + names: + - {first: Greville C., last: Corbett} + - {first: Greville, last: Corbett} + - {first: Greville G., last: Corbett} +grzegorz-chrupala: + names: + - {first: Grzegorz, last: Chrupała} + - {first: Grzegorz, last: Chrupala} +guadalupe-aguado-de-cea: + names: + - {first: Guadalupe Aguado, last: de Cea} + - {first: Guadalupe, last: Aguado de Cea} + - {first: Guadalupe, last: Aguado-de-Cea} +gualberto-a-guzman: + names: + - {first: Gualberto A., last: Guzman} + - {first: Gualberto, last: Guzmán} +gudrun-magnusdottir: + names: + - {first: Guðrun, last: Magnúsdóttir} + - {first: Guðrún, last: Magnúsdóttir} +guenther-goerz: + names: + - {first: Guenther, last: Goerz} + - {first: G., last: Goerz} +guido-boella: + names: + - {first: Guido, last: Boella} + - {first: G., last: Boella} +guillaume-bonfante: + names: + - {first: Guillaume, last: Bonfante} + - {first: Guillame, last: Bonfante} +guillaume-gravier: + names: + - {first: Guillaume, last: Gravier} + - {first: G., last: Gravier} +guillaume-vauvert: + names: + - {first: Guillaume, last: Vauvert} + - {first: G., last: Vauvert} +guillermo-a-cecchi: + names: + - {first: Guillermo A., last: Cecchi} + - {first: Guillermo, last: Cecchi} +guiping-zhang: + names: + - {first: Guiping, last: Zhang} + - {first: GuiPing, last: Zhang} +gulsen-eryigit: + names: + - {first: Gülşen, last: Eryiğit} + - {first: Gülşen, last: Eryiǧit} +gunaranjan-vasireddy: + names: + - {first: Gunaranjan, last: Vasireddy} + - {first: G., last: Vasireddy} +gunes-erkan: + names: + - {first: Gunes, last: Erkan} + - {first: Güneş, last: Erkan} +gunn-inger-lyse: + names: + - {first: Gunn Inger, last: Lyse} + - {first: Gunn, last: Lyse} +gunnel-kallgren: + names: + - {first: Gunnel, last: Källgren} + - {first: Gunnel, last: Kallgren} +gunta-nespore: + names: + - {first: Gunta, last: Nešpore} + - {first: Gunta, last: Nespore-Berzkalne} +gunter-neumann: + names: + - {first: Günter, last: Neumann} + - {first: Gunter, last: Neumann} + - {first: Guenter, last: Neumann} +guntis-barzdins: + names: + - {first: Guntis, last: Barzdins} + - {first: Guntis, last: Bārzdiņš} +guodong-zhou: + names: + - {first: Guodong, last: Zhou} + - {first: GuoDong, last: Zhou} +gurpreet-singh-lehal: + names: + - {first: Gurpreet Singh, last: Lehal} + - {first: Gurpreet, last: Singh Lehal} + - {first: Gurpreet, last: Lehal} +gus-hahn-powell: + names: + - {first: Gus, last: Hahn-Powell} + - {first: Gustave, last: Hahn-Powell} +gustavo-mendonca: + names: + - {first: Gustavo, last: Mendonca} + - {first: Gustavo, last: Mendonça} +gustavo-paetzold: + names: + - {first: Gustavo, last: Paetzold} + - {first: Gustavo H., last: Paetzold} + - {first: Gustavo, last: Henrique Paetzold} + - {first: Gustavo Henrique, last: Paetzold} +guy-noel-kouarata: + names: + - {first: Guy-Noel, last: Kouarata} + - {first: Guy-Noël, last: Kouarata} +guy-perennou: + names: + - {first: Guy, last: Pérennou} + - {first: G., last: Perennou} +gyri-smordal-losnegaard: + names: + - {first: Gyri, last: Smørdal Losnegaard} + - {first: Gyri S., last: Losnegaard} + - {first: Gyri, last: Losnegaard} +gyuhyeon-choi: + names: + - {first: GyuHyeon, last: Choi} + - {first: Gyu-Hyeon, last: Choi} +h-andrew-schwartz: + names: + - {first: H. Andrew, last: Schwartz} + - {first: Hansen Andrew, last: Schwartz} + - {first: Hansen A., last: Schwartz} + - {first: H Andrew, last: Schwartz} +h-rodriguez-hontoria: + names: + - {first: H., last: Rodriguez Hontoria} + - {first: H., last: Rodriguez} +hae-chang-rim: + names: + - {first: Hae Chang, last: Rim} + - {first: Hae-Chang, last: Rim} +hai-quan-vu: + names: + - {first: Hai-Quan, last: Vu} + - {first: Hai Quan, last: Vu} +hai-son-le: + names: + - {first: Hai-Son, last: Le} + - {first: Hai Son, last: Le} + - {first: Hai-son, last: Le} +hakaze-cho: + names: + - {first: Hakaze, last: Cho} + - {first: Yufeng, last: Zhao} +hakimeh-fadaee: + names: + - {first: Hakimeh, last: Fadaee} + - {first: Hakimeh, last: Fadaei} +hal-daume-iii: + names: + - {first: Hal, last: Daumé III} + - {first: Hal, last: Daume III} + - {first: Hal, last: Daume} + - {first: Hal, last: Daumé} +haldur-oim: + names: + - {first: Haldur, last: Õim} + - {first: Haldur, last: Oim} + - {first: H., last: Oim} +hale-ogel-balaban: + names: + - {first: Hale, last: Ögel Balaban} + - {first: Hale, last: Ogel} +haley-lepp: + names: + - {first: Haley, last: Lepp} + - {first: Haley M., last: Lepp} +hamed-movasagh: + names: + - {first: Hamed, last: Movasagh} + - {first: H., last: Movasagh} +hamish-cunningham: + names: + - {first: Hamish, last: Cunningham} + - {first: H., last: Cunningham} +hammam-riza: + names: + - {first: Hammam, last: Riza} + - {first: Ir. Hammam, last: Riza} +han-min-jung: + names: + - {first: Han-Min, last: Jung} + - {first: Hanmin, last: Jung} +hana-skoumalova: + names: + - {first: Hana, last: Skoumalova} + - {first: Hana, last: Skoumalová} +hanae-koiso: + names: + - {first: Hanae, last: Koiso} + - {first: H., last: Koiso} +hanna-wallach: + names: + - {first: Hanna, last: Wallach} + - {first: Hanna M., last: Wallach} +hannah-bechara: + names: + - {first: Hannah, last: Bechara} + - {first: Hanna, last: Béchara} + - {first: Hanna, last: Bechara} + - {first: Hannah, last: Béchara} +hannah-cyberey: + names: + - {first: Hannah, last: Cyberey} + - {first: Hannah, last: Chen} +hanne-erdman-thomsen: + names: + - {first: Hanne Erdman, last: Thomsen} + - {first: Hanne, last: Erdman Thomsen} +hanne-fersoe: + names: + - {first: Hanne, last: Fersøe} + - {first: Hanne, last: Fersoe} + - {first: H., last: Fersøe} +hans-dybkjaer: + names: + - {first: Hans, last: Dybkjaer} + - {first: Hans, last: Dybkjær} +hans-ulrich-block: + names: + - {first: Hans Ulrich, last: Block} + - {first: Hans-Ulrich, last: Block} +hans-ulrich-krieger: + names: + - {first: Hans-Ulrich, last: Krieger} + - {first: HansUlrich, last: Krieger} +hans-van-halteren: + names: + - {first: Hans, last: van Halteren} + - {first: Hans, last: Van Halteren} +hany-hassan-awadalla: + names: + - {first: Hany, last: Hassan Awadalla} + - {first: Hany, last: Hassan} +haoliang-qi: + names: + - {first: Haoliang, last: Qi} + - {first: HaoLiang, last: Qi} +harald-baayen: + names: + - {first: Harald, last: Baayen} + - {first: R. Harald, last: Baayen} +harald-h-zimmermann: + names: + - {first: Harald H., last: Zimmermann} + - {first: H., last: Zimmermann} +harald-hoge: + names: + - {first: Harald, last: Höge} + - {first: Harald, last: Hoege} + - {first: H., last: Höge} +harald-lungen: + names: + - {first: Harald, last: Lüngen} + - {first: Harald, last: Lungen} +harald-trost: + names: + - {first: Harald, last: Trost} + - {first: H., last: Trost} +harold-somers: + names: + - {first: Harold, last: Somers} + - {first: Harold L., last: Somers} + - {first: H.L., last: Somers} +harri-jappinen: + names: + - {first: Harri, last: Jäppinen} + - {first: Harri, last: Jappinen} + - {first: H., last: Jäppinen} +harris-papageorgiou: + names: + - {first: Harris, last: Papageorgiou} + - {first: Haris, last: Papageorgiou} +harry-bratt: + names: + - {first: Harry, last: Bratt} + - {first: H., last: Bratt} +harry-bunt: + names: + - {first: Harry, last: Bunt} + - {first: H. C., last: Bunt} +harry-j-tily: + names: + - {first: Harry J., last: Tily} + - {first: Harry, last: Tily} +harry-tennant: + names: + - {first: Harry, last: Tennant} + - {first: Harry R., last: Tennant} +harsh-vardhan-sharma: + names: + - {first: Harsh Vardhan, last: Sharma} + - {first: Harsh, last: Sharma} +harshit-kumar: + names: + - {first: Harshit, last: Kumar} +harshit-kumar-iit: + names: + - {first: Harshit, last: Kumar} +hassan-s-shavarani: + names: + - {first: Hassan S., last: Shavarani} + - {first: Hassan, last: Shavarani} +hatte-blejer: + names: + - {first: Hatte, last: Blejer} + - {first: Hatte R., last: Blejer} +hazem-raafat: + names: + - {first: Hazem, last: Raafat} + - {first: Hazem, last: M. Raafat} +he-yan-huang: + names: + - {first: He-Yan, last: Huang} + - {first: He-yan, last: Huang} + - {first: Heyan, last: Huang} +heather-horsfall: + names: + - {first: Heather, last: Horsfall} + - {first: H. J., last: Horsfall} +hector-allende-cid: + names: + - {first: Hector, last: Allende-Cid} + - {first: Héctor, last: Allende} + - {first: Héctor, last: Allende-Cid} +hector-llorens: + names: + - {first: Héctor, last: Llorens} + - {first: Hector, last: Llorens} +hector-martinez-alonso: + names: + - {first: Héctor, last: Martínez Alonso} + - {first: Hector, last: Martinez} + - {first: Héctor, last: Martínez} + - {first: Héctor Martínez, last: Alonso} + - {first: Hector, last: Martinez Alonso} + - {first: Héctor, last: Martinez Alonso} + - {first: Hector, last: Martínez Alonso} +hee-sook-bae: + names: + - {first: Hee-Sook, last: Bae} + - {first: Hee Sook, last: Bae} +hee-sung-chung: + names: + - {first: Hee Sung, last: Chung} + - {first: Hee-Sung, last: Chung} +heidi-fox: + names: + - {first: Heidi, last: Fox} + - {first: Heidi J., last: Fox} +heiki-jaan-kaalep: + names: + - {first: Heiki-Jaan, last: Kaalep} + - {first: Heiki Jaan, last: Kaalep} +heinz-j-weber: + names: + - {first: Heinz J., last: Weber} + - {first: H-J., last: Weber} +helen-hastie: + names: + - {first: Helen, last: Hastie} + - {first: Helen Wright, last: Hastie} +helen-kaiyun-chen: + names: + - {first: Helen Kaiyun, last: Chen} + - {first: Kai-Yun, last: Chen} + - {first: Kai-yun, last: Chen} + - {first: Helen Kai-yun, last: Chen} +helen-l-johnson: + names: + - {first: Helen L., last: Johnson} + - {first: Helen, last: Johnson} +helen-m-gigley: + names: + - {first: Helen M., last: Gigley} + - {first: Helen, last: Gigley} +helen-meng: + names: + - {first: Helen, last: Meng} + - {first: Helen M., last: Meng} +helen-pain: + names: + - {first: Helen, last: Pain} + - {first: H., last: Pain} +helen-v-cook: + names: + - {first: Helen V., last: Cook} + - {first: Helen, last: Cook} + - {first: Helen V, last: Cook} +helena-de-medeiros-caseli: + names: + - {first: Helena de Medeiros, last: Caseli} + - {first: Helena, last: de Medeiros Caseli} +helena-gomez: + names: + - {first: Helena, last: Gomez} + - {first: Helena, last: Gómez} +helena-hong-gao: + names: + - {first: Helena Hong, last: Gao} + - {first: Helena, last: Gao} +helene-bonneau-maynard: + names: + - {first: Hélène, last: Bonneau-Maynard} + - {first: Hélène, last: Maynard} + - {first: H., last: Bonneau-Maynard} +helka-folch: + names: + - {first: Helka, last: Folch} + - {first: H., last: Folch} +helmer-strik: + names: + - {first: Helmer, last: Strik} + - {first: H., last: Strik} +hema-a-murthy: + names: + - {first: Hema A., last: Murthy} + - {first: Hema, last: Murthy} +hendrik-johannes-groenewald: + names: + - {first: Hendrik Johannes, last: Groenewald} + - {first: Hendrik J., last: Groenewald} +heng-wang-sydney: + comment: University of Sydney + disable_name_matching: true + names: + - {first: Heng, last: Wang} + orcid: 0009-0009-5473-5751 +henk-van-den-heuvel: + names: + - {first: Henk, last: van den Heuvel} + - {first: H., last: van den Heuvel} +hennie-brugman: + names: + - {first: Hennie, last: Brugman} + - {first: H., last: Brugman} +hennie-van-der-vliet: + names: + - {first: Hennie, last: van der Vliet} + - {first: Hennie, last: VanderVliet} +henry-s-thompson: + names: + - {first: Henry S., last: Thompson} + - {first: Henry, last: Thompson} +herbert-gish: + names: + - {first: Herbert, last: Gish} + - {first: Herb, last: Gish} +hermann-ney: + names: + - {first: Hermann, last: Ney} + - {first: H., last: Ney} +herve-blanchon: + names: + - {first: Hervé, last: Blanchon} + - {first: Herve, last: Blanchon} +herve-dejean: + names: + - {first: Hervé, last: Déjean} + - {first: Herve, last: Dejean} + - {first: H., last: Dejean} +herve-saint-amand: + names: + - {first: Herve, last: Saint-Amand} + - {first: Hervé, last: Saint-Amand} +heui-seok-lim: + names: + - {first: Heui-Seok, last: Lim} + - {first: Heuiseok, last: Lim} +heung-yeung-shum: + names: + - {first: Heung Yeung, last: Shum} + - {first: Heung-Yeung, last: Shum} +hideaki-kikuchi: + names: + - {first: Hideaki, last: Kikuchi} + - {first: H., last: Kikuchi} +hideki-hirakawa: + names: + - {first: Hideki, last: Hirakawa} + - {first: H., last: Hirakawa} +hideki-kashioka: + names: + - {first: Hideki, last: Kashioka} + - {first: H, last: Kashioka} +himani-chaudhry: + names: + - {first: Himani, last: Chaudhry} + - {first: Himani, last: Chaudhary} +hing-cheung-ho: + names: + - {first: Hing-cheung, last: Ho} + - {first: Hing-Cheung, last: Ho} +hing-lung-lin: + names: + - {first: Hing-Lung, last: Lin} + - {first: Hing-lung, last: Lin} +hinrich-schutze: + names: + - {first: Hinrich, last: Schütze} + - {first: Hinrich, last: Schutze} + - {first: Hinrich, last: Schuetze} +hirofumi-yamamoto: + names: + - {first: Hirofumi, last: Yamamoto} + - {first: Hirohumi, last: Yamamoto} +hiromi-itoh-ozaku: + names: + - {first: Hiromi Itoh, last: Ozaku} + - {first: Hiromi itoh, last: Ozaku} +hiroshi-echizen-ya: + names: + - {first: Hiroshi, last: Echizen-ya} + - {first: Hiroshi, last: Echizen’ya} +hiroshi-g-okuno: + names: + - {first: Hiroshi G., last: Okuno} + - {first: Hiroshi, last: Okuno} +hiroshi-masuichi: + names: + - {first: Hiroshi, last: Masuichi} + - {first: Hiroshi, last: Mashuichi} +hiroyuki-akama: + names: + - {first: Hiroyuki, last: Akama} + - {first: Hiroyuki, last: Akam} +hisashi-kawai: + names: + - {first: Hisashi, last: Kawai} + - {first: Kawai, last: Hisashi} +hiyan-alshawi: + names: + - {first: Hiyan, last: Alshawi} + - {first: Hiyan, last: Alsawi} +hoa-trang-dang: + names: + - {first: Hoa Trang, last: Dang} + - {first: Hoa, last: Dang} +hoang-kiem: + names: + - {first: Hoang, last: Kiem} + - {first: Kiem, last: Hoang} +hoang-quynh-le: + names: + - {first: Hoang Quynh, last: Le} + - {first: Hoang-Quynh, last: Le} +holger-hoffmann: + names: + - {first: Holger, last: Hoffmann} + - {first: Holger, last: Hoffman} +hong-i-ng: + names: + - {first: Hong-I, last: Ng} + - {first: Hong I, last: Ng} +hong-leung: + names: + - {first: Hong, last: Leung} + - {first: Hong C., last: Leung} +hongsuck-seo: + names: + - {first: Hongsuck, last: Seo} + - {first: Paul Hongsuck, last: Seo} +hongyan-jing: + names: + - {first: Hongyan, last: Jing} + - {first: H., last: Jing} +hongying-zan: + names: + - {first: Hongying, last: Zan} + - {first: Hong-ying, last: Zan} +hoojung-chung: + names: + - {first: HooJung, last: Chung} + - {first: Hoojung, last: Chung} +horacio-rodriguez: + names: + - {first: Horacio, last: Rodríguez} + - {first: Horacio, last: Rodriguez} +horng-jyh-paul-wu: + names: + - {first: Horng Jyh Paul, last: Wu} + - {first: Horng-Jyh P., last: Wu} +horst-udo-hain: + names: + - {first: Horst-Udo, last: Hain} + - {first: H.-U., last: Hain} +hossein-sameti: + names: + - {first: Hossein, last: Sameti} + - {first: H., last: Sameti} +houda-saadane: + names: + - {first: Houda, last: Saadane} + - {first: Houda, last: Saâdane} +howard-r-turtle: + names: + - {first: Howard R., last: Turtle} + - {first: Howard, last: Turtle} +hristo-tanev: + names: + - {first: Hristo, last: Tanev} + - {first: Hristo, last: Tannev} +hsiang-pin-lee: + names: + - {first: Hsiang-Pin, last: Lee} + - {first: Hsiang-Ping, last: Lee} +hsiao-wuen-hon: + names: + - {first: Hsiao-Wuen, last: Hon} + - {first: H.W., last: Hon} + - {first: H., last: Hon} +hsin-min-wang: + names: + - {first: Hsin-Min, last: Wang} + - {first: Hsin-min, last: Wang} +hsue-hueh-shih: + names: + - {first: Hsue-Hueh, last: Shih} + - {first: Rebecca Hsue-Hueh, last: Shih} +hsun-wen-chiu: + names: + - {first: Hsun-Wen, last: Chiu} + - {first: Hsun-wen, last: Chiu} +huarui-zhang: + names: + - {first: Huarui, last: Zhang} + - {first: HuaRui, last: Zhang} +huda-almuzaini: + names: + - {first: Huda, last: Almuzaini} +huey-chyun-chen: + names: + - {first: Huey-Chyun, last: Chen} + - {first: Mathis Huey-chyun, last: Chen} +hugo-goncalo-oliveira: + names: + - {first: Hugo, last: Gonçalo Oliveira} + - {first: Hugo Gonçalo, last: Oliveira} +hugo-van-hamme: + names: + - {first: Hugo Van, last: hamme} + - {first: Hugo, last: Van hamme} +huidan-liu: + names: + - {first: Huidan, last: Liu} + - {first: Hui Dan, last: Liu} +huifeng-li: + names: + - {first: Huifeng, last: Li} + - {first: Hui-Feng, last: Li} +huihsin-tseng: + names: + - {first: Huihsin, last: Tseng} + - {first: Hui-hsin, last: Tseng} + - {first: Hui-Hsin, last: Tseng} +huiwei-zhou: + names: + - {first: Huiwei, last: Zhou} + - {first: HuiWei, last: Zhou} +huizhi-liang: + names: + - {first: Huizhi, last: Liang} + - {first: HuiZhi, last: Liang} +hung-ting-hsieh: + names: + - {first: Hung-ting, last: Hsieh} + - {first: Hung-Ting, last: Hsieh} +hung-yan-gu: + names: + - {first: Hung-Yan, last: Gu} + - {first: Hung-yan, last: Gu} +huy-nguyen-bcl: + comment: BCL Technologies Inc. + names: + - {first: Huy, last: Nguyen} +huy-nguyen-lls: + comment: ex-liulishuo + names: + - {first: Huy, last: Nguyen} +huy-nguyen-pgh: + comment: UPitt, Amazon + names: + - {first: Huy, last: Nguyen} +huy-nguyen-stanford: + comment: Stanford + names: + - {first: Huy, last: Nguyen} +huy-tien-nguyen: + names: + - {first: Huy Tien, last: Nguyen} + - {first: Huy-Tien, last: Nguyen} +hy-murveit: + names: + - {first: Hy, last: Murveit} + - {first: H., last: Murveit} +hyeon-gu-lee: + names: + - {first: Hyeon-gu, last: Lee} + - {first: Hyeon-Gu, last: Lee} +hyojung-han: + names: + - {first: HyoJung, last: Han} + - {first: Hou Jeung, last: Han} +hyuhng-joon-kim: + names: + - {first: Hyuhng Joon, last: Kim} + - {first: Hyuhng, last: Kim} +hyukro-park: + names: + - {first: HyukRo, last: Park} + - {first: Hyukro, last: Park} +hyun-seok-park: + names: + - {first: Hyun Seok, last: Park} + - {first: Hyun S., last: Park} +hyung-bae-jeon: + names: + - {first: Hyung-Bae, last: Jeon} + - {first: Hyungbae, last: Jeon} +iain-marshall: + names: + - {first: Iain, last: Marshall} + - {first: Iain J., last: Marshall} +ian-m-oneill: + names: + - {first: Ian M., last: O’Neill} + - {first: Ian, last: O’Neill} +ian-p-davy: + names: + - {first: Ian P., last: Davy} + - {first: Ian, last: Davy} + - {first: Ian P, last: Davy} +igor-a-bolshakov: + names: + - {first: Igor A., last: Bolshakov} + - {first: Igor, last: Bolshakov} +igor-boguslavsky: + names: + - {first: Igor, last: Boguslavsky} + - {first: Igor M., last: Boguslavsky} +igor-leturia: + names: + - {first: Igor, last: Leturia} + - {first: I., last: Leturia} +igor-melcuk: + names: + - {first: Igor, last: Mel’čuk} + - {first: I., last: Mel’cuk} + - {first: I. A., last: Mel’čuk} +ihsan-yalcinkaya: + names: + - {first: Ihsan, last: Yalcinkaya} + - {first: İhsan, last: Yalçinkaya} + - {first: İhsan, last: Yalcinkaya} +ik-hwan-lee: + names: + - {first: Ik-Hwan, last: Lee} + - {first: Ik-hwan, last: Lee} +ilknur-durgar-el-kahlout: + names: + - {first: Ilknur, last: Durgar El-Kahlout} + - {first: Ilknur Durgar, last: El-Kahlout} + - {first: İlknur, last: Durgar El-Kahlout} + - {first: İlknur Durgar, last: El-Kahlout} +ilyas-cicekli: + names: + - {first: Ilyas, last: Cicekli} + - {first: İlyas, last: Çiçekli} +imanol-madariaga: + names: + - {first: Imanol, last: Madariaga} + - {first: I., last: Madariaga} +imre-kiss: + names: + - {first: Imre, last: Kiss} + - {first: I., last: Kiss} +ina-roesiger: + names: + - {first: Ina, last: Roesiger} + - {first: Ina, last: Rösiger} +inaki-alegria: + names: + - {first: Iñaki, last: Alegría} + - {first: Iñaki, last: Alegria} + - {first: Inaki, last: Alegria} + - {first: I, last: Alegria} + - {first: I., last: Alegria} +inaki-gaminde: + names: + - {first: Iñaki, last: Gaminde} + - {first: I., last: Gaminde} +ingrid-starke: + names: + - {first: Ingrid, last: Starke} + - {first: I., last: Starke} +inguna-skadina: + names: + - {first: Inguna, last: Skadiņa} + - {first: Inguna, last: Skadina} + - {first: Inguna, last: Skadin̨a} +inmaculada-hernaez: + names: + - {first: Inmaculada, last: Hernáez} + - {first: Inmaculada, last: Hernaez} + - {first: Inma, last: Hernaez} + - {first: Inma, last: Hernáez} + - {first: I., last: Hernáez} +ioana-vasilescu: + names: + - {first: Ioana, last: Vasilescu} + - {first: I., last: Vasilescu} +ioannis-dologlou: + names: + - {first: Ioannis, last: Dologlou} + - {first: I., last: Dologlou} +ioannis-kakadiaris: + names: + - {first: Ioannis, last: Kakadiaris} + - {first: Ioannis A., last: Kakadiaris} +ioannis-klapaftis: + names: + - {first: Ioannis, last: Klapaftis} + - {first: Ioannis P., last: Klapaftis} +ionut-sorodoc: + names: + - {first: Ionut, last: Sorodoc} + - {first: Ionut-Teodor, last: Sorodoc} +irena-spasic: + names: + - {first: Irena, last: Spasić} + - {first: Irena, last: Spasic} +irene-castellon: + names: + - {first: Irene, last: Castellón} + - {first: Irene, last: Castellon} +irene-langkilde: + names: + - {first: Irene, last: Langkilde} + - {first: Irene, last: Langkilde-Geary} +irene-nirenburg: + names: + - {first: Irene, last: Nirenburg} + - {first: Irene B., last: Nirenburg} +irene-rodrigues: + names: + - {first: Irene, last: Rodrigues} + - {first: Irene Pimenta, last: Rodrigues} + - {first: Irene, last: Pimenta Rodrigues} +iria-da-cunha: + names: + - {first: Iria, last: da Cunha} + - {first: I., last: da Cunha} +iria-del-rio-gayo: + names: + - {first: Iria, last: Del Río Gayo} + - {first: Iria, last: del Río Gayo} + - {first: Iria, last: del Río} + - {first: Iria, last: del Rio} +irina-matveeva: + names: + - {first: Irina, last: Matveeva} + - {first: I., last: Matveeva} +irina-prodanof: + names: + - {first: Irina, last: Prodanof} + - {first: I., last: Prodanof} +iris-eshkol: + names: + - {first: Iris, last: Eshkol} + - {first: Iris, last: Eshkol-Taravella} +iris-hoser: + names: + - {first: Iris, last: Hoser} + - {first: I., last: Hoser} +irshad-bhat: + names: + - {first: Irshad, last: Bhat} + - {first: Irshad A., last: Bhat} +isabel-segura-bedmar: + names: + - {first: Isabel, last: Segura-Bedmar} + - {first: Isabel, last: Segura Bedmar} +isin-demirsahin: + names: + - {first: Isin, last: Demirsahin} + - {first: Işin, last: Demirşahin} + - {first: Isin, last: Demirşahin} +islam-beltagy: + names: + - {first: Islam, last: Beltagy} + - {first: I., last: Beltagy} +ismael-garcia-varea: + names: + - {first: Ismael, last: García-Varea} + - {first: Ismael García, last: Varea} + - {first: Ismael, last: García Varea} +ismail-babaoglu: + names: + - {first: Ismail, last: Babaoğlu} + - {first: Ismail, last: Babaoglu} +ismail-el-maarouf: + names: + - {first: Ismail, last: El Maarouf} + - {first: Ismaïl, last: El Maarouf} +ismail-timimi: + names: + - {first: Ismail, last: Timimi} + - {first: Ismaïl, last: Timimi} + - {first: I., last: Timimi} +istvan-batori: + names: + - {first: Istvan, last: Batori} + - {first: I., last: Batori} +istvan-nagy-t: + names: + - {first: István, last: Nagy T.} + - {first: István T., last: Nagy} + - {first: István, last: Nagy} + - {first: Istvan, last: Nagy} +istvan-varga: + names: + - {first: István, last: Varga} + - {first: Istvan, last: Varga} +itziar-aduriz: + names: + - {first: Itziar, last: Aduriz} + - {first: I., last: Aduriz} +iulian-vlad-serban: + names: + - {first: Iulian Vlad, last: Serban} + - {first: Iulian, last: Serban} +iuliana-alexandra-flescan-lovin-arseni: + names: + - {first: Iuliana Alexandra, last: Fleşcan-Lovin-Arseni} + - {first: Iuliana Alexandra, last: Fleșcan-Lovin-Arseni} + - {first: Iuliana-Alexandra, last: Flescan-Lovin-Arseni} +ivan-meza-ruiz: + names: + - {first: Ivan, last: Meza-Ruiz} + - {first: Ivan Vladimir, last: Meza Ruiz} + - {first: Ivan V., last: Meza} + - {first: Ivan, last: Meza} + - {first: Ivan Vladimir, last: Meza-Ruiz} +ivan-obradovic: + names: + - {first: Ivan, last: Obradović} + - {first: Ivan, last: Obradoviæ} +ivana-kruijff-korbayova: + names: + - {first: Ivana, last: Kruijff-Korbayová} + - {first: Ivana, last: Kruijff-Korbayova} + - {first: Ivana, last: Kruijff-Korbayovà} +ivandre-paraboni: + names: + - {first: Ivandré, last: Paraboni} + - {first: Ivandre, last: Paraboni} +ivona-kucerova: + names: + - {first: Ivona, last: Kučerová} + - {first: Ivona, last: Kuc̆erová} +izaskun-aldezabal: + names: + - {first: Izaskun, last: Aldezabal} + - {first: I., last: Aldezabal} +j-angus-webb: + names: + - {first: J. Angus, last: Webb} + - {first: Angus, last: Webb} +j-fernando-sanchez-rada: + names: + - {first: J. Fernando, last: Sánchez-Rada} + - {first: Fernando, last: Sánchez-Rada} +j-peterson: + names: + - {first: J., last: Peterson} + - {first: Jill, last: Peterson} +j-robin-rohlicek: + names: + - {first: J. Robin, last: Rohlicek} + - {first: Robin, last: Rohlicek} + - {first: J.R., last: Rohlicek} + - {first: J. R., last: Rohlicek} +j-scott-mccarley: + names: + - {first: J. Scott, last: McCarley} + - {first: Scott, last: McCarley} +j-walker-orr: + names: + - {first: J. Walker, last: Orr} + - {first: Walker, last: Orr} +jaakko-vayrynen: + names: + - {first: Jaakko, last: Väyrynen} + - {first: Jaakko J., last: Väyrynen} +jackie-chi-kit-cheung: + names: + - {first: Jackie Chi Kit, last: Cheung} + - {first: Jackie C. K., last: Cheung} + - {first: Jackie C.K., last: Cheung} + - {first: Jackie, last: Cheung} +jackson-souza: + names: + - {first: Jackson, last: Souza} + - {first: J., last: Souza} +jacob-hoover-vigly: + names: + - {first: Jacob Hoover, last: Vigly} + - {first: Jacob Louis, last: Hoover} + - {first: Jacob, last: Hoover} +jacqueline-leon: + names: + - {first: Jacqueline, last: Leon} + - {first: Jacqueline, last: Léon} +jacqueline-vaissiere: + names: + - {first: Jacqueline, last: Vaissiere} + - {first: Jacqueline, last: Vaissière} +jacques-chauche: + names: + - {first: Jacques, last: Chauché} + - {first: J., last: Chauche} + - {first: J., last: Chauché} +jacques-rouault: + names: + - {first: Jacques, last: Rouault} + - {first: J., last: Rouault} +jade-goldstein: + names: + - {first: Jade, last: Goldstein} + - {first: Jade, last: Goldstein-Stewart} +jae-hee-lee-bremen: + comment: Bremen + disable_name_matching: true + names: + - {first: Jae, last: Hee Lee} + - {first: Jae Hee, last: Lee} + orcid: 0000-0001-9840-780X +jae-won-lee: + names: + - {first: Jae-Won, last: Lee} + - {first: Jae-won, last: Lee} +jaesong-lee: + names: + - {first: Jaesong, last: Lee} + - {first: JaeSong, last: Lee} +jaesung-lee: + names: + - {first: JaeSung, last: Lee} + - {first: Jae-Sung, last: Lee} +jahna-otterbacher: + names: + - {first: Jahna, last: Otterbacher} + - {first: Jahna C., last: Otterbacher} +jaime-g-carbonell: + comment: CMU + names: + - {first: Jaime G., last: Carbonell} + - {first: Jaime, last: Carbonell} + - {first: Jaime G., last: Carbonell Jr} + similar: + - jaime-r-carbonell +jaime-r-carbonell: + comment: BBN; d. 1973 + names: + - {first: Jaime R., last: Carbonell} + similar: + - jaime-g-carbonell +jakub-waszczuk: + names: + - {first: Jakub, last: Waszczuk} + - {first: Jakub, last: Wasczuk} +jamal-a-nasir: + names: + - {first: Jamal A., last: Nasir} + - {first: Jamal, last: Nasir} +james-allan: + comment: UMass Amherst + names: + - {first: James, last: Allan} + - {first: J., last: Allan} + similar: + - james-allen +james-allen: + comment: Rochester + names: + - {first: James, last: Allen} + - {first: James F., last: Allen} + similar: + - james-allan +james-baker: + names: + - {first: James, last: Baker} + - {first: James K., last: Baker} +james-davis: + names: + - {first: James, last: Davis} + - {first: James Raymond, last: Davis} +james-g-mork: + names: + - {first: James G., last: Mork} + - {first: James, last: Mork} +james-glass: + names: + - {first: James, last: Glass} + - {first: James R., last: Glass} +james-h-martin: + names: + - {first: James H., last: Martin} + - {first: James, last: Martin} +james-henderson: + names: + - {first: James, last: Henderson} + - {first: James B., last: Henderson} +james-hendler: + names: + - {first: James, last: Hendler} + - {first: James A., last: Hendler} +james-hieronymus: + names: + - {first: James, last: Hieronymus} + - {first: J., last: Hieronymus} +james-l-flanagan: + names: + - {first: James L., last: Flanagan} + - {first: J. L., last: Flanagan} + - {first: J., last: Flanagan} +james-lester: + names: + - {first: James, last: Lester} + - {first: James C., last: Lester} +james-paul-white: + names: + - {first: James Paul, last: White} + - {first: James P., last: White} + - {first: James, last: White} +james-pustejovsky: + names: + - {first: James, last: Pustejovsky} + - {first: James D., last: Pustejovsky} + - {first: J., last: Pustejovsky} +james-r-curran: + names: + - {first: James R., last: Curran} + - {first: James, last: Curran} +james-yoon: + names: + - {first: James, last: Yoon} + - {first: James H., last: Yoon} +jamie-kiros: + names: + - {first: Jamie, last: Kiros} + - {first: Jamie Ryan, last: Kiros} +jan-curin: + names: + - {first: Jan, last: Cuřín} + - {first: J., last: Cuřín} +jan-hajic: + names: + - {first: Jan, last: Hajic} + - {first: Jan, last: Hajič} + - {first: J., last: Hajič} + similar: + - jan-hajic-jr +jan-hajic-jr: + names: + - {first: Jan, last: Hajič jr.} + similar: + - jan-hajic +jan-kors: + names: + - {first: Jan, last: Kors} + - {first: Jan, last: Korst} +jan-landsbergen: + names: + - {first: Jan, last: Landsbergen} + - {first: S. P. J., last: Landsbergen} + - {first: S.P.J., last: Landsbergen} +jan-milan-deriu: + names: + - {first: Jan Milan, last: Deriu} + - {first: Jan, last: Deriu} +jan-odijk: + names: + - {first: Jan, last: Odijk} + - {first: J., last: Odijk} +jan-ptacek: + names: + - {first: Jan, last: Ptacek} + - {first: Jan, last: Ptáček} +jan-vystrcil: + names: + - {first: Jan, last: Vystrčil} + - {first: Jan, last: Vystrcil} +jana-gotze: + names: + - {first: Jana, last: Götze} + - {first: Jana, last: Goetze} +jana-kravalova: + names: + - {first: Jana, last: Kravalová} + - {first: Jana, last: Kravalova} +jana-sindlerova: + names: + - {first: Jana, last: Šindlerová} + - {first: Jana, last: Sindlerova} +jana-sukkarieh: + names: + - {first: Jana, last: Sukkarieh} + - {first: Jana Z., last: Sukkarieh} +jane-j-robinson: + names: + - {first: Jane J., last: Robinson} + - {first: Jane, last: Robinson} +janet-baker: + names: + - {first: Janet, last: Baker} + - {first: Janet M., last: Baker} +janet-hitzeman: + names: + - {first: Janet, last: Hitzeman} + - {first: J., last: Hitzeman} +janet-pierrehumbert: + names: + - {first: Janet, last: Pierrehumbert} + - {first: Janet B., last: Pierrehumbert} +janez-zibert: + names: + - {first: Janez, last: Zibert} + - {first: Janez, last: Žibert} +janienke-sturm: + names: + - {first: Janienke, last: Sturm} + - {first: J., last: Sturm} +jann-railey-montalan: + names: + - {first: Jann Railey, last: Montalan} + - {first: Jann, last: Montalan} + - {first: Railey, last: Montalan} + - {first: Jann Railey E., last: Montalan} +janne-bondi-johannessen: + names: + - {first: Janne Bondi, last: Johannessen} + - {first: Janne, last: Bondi Johannessen} +janusz-stanislaw-bien: + names: + - {first: Janusz Stanisław, last: Bien} + - {first: Janusz Stanislaw, last: Bien} + - {first: Janusz S., last: Bień} + - {first: Janusz S., last: Bien} +janyce-wiebe: + names: + - {first: Janyce, last: Wiebe} + - {first: Janyce M., last: Wiebe} + - {first: Jan, last: Wiebe} +jarmila-panevova: + names: + - {first: Jarmila, last: Panevová} + - {first: Jarmila, last: Panevova} +jason-brenier: + names: + - {first: Jason, last: Brenier} + - {first: Jason M., last: Brenier} +jason-d-williams: + names: + - {first: Jason D., last: Williams} + - {first: Jason, last: Williams} +jason-eisner: + names: + - {first: Jason, last: Eisner} + - {first: Jason M., last: Eisner} +jason-katz-brown: + names: + - {first: Jason, last: Katz-Brown} + - {first: Jason, last: Brown} +jason-s-chang: + names: + - {first: Jason S., last: Chang} + - {first: Jason, last: Chang} + - {first: Jason J. S., last: Chang} + - {first: Jason J.S., last: Chang} + - {first: Jason J., last: Chang} +jason-smith: + names: + - {first: Jason, last: Smith} + - {first: Jason R., last: Smith} +javier-dieguez-tirado: + names: + - {first: Javier, last: Dieguez-Tirado} + - {first: Javier, last: Dieguez} +javier-farreres: + names: + - {first: Javier, last: Farreres} + - {first: Xavier, last: Farreres} +javier-ortega-garcia: + names: + - {first: Javier, last: Ortega-García} + - {first: Javier, last: Ortega-Garcia} +jay-wilpon: + names: + - {first: Jay, last: Wilpon} + - {first: Jay G., last: Wilpon} +jean-claude-martin: + names: + - {first: Jean-Claude, last: Martin} + - {first: J-C., last: Martin} + - {first: J.-C., last: Martin} + - {first: J.C., last: Martin} +jean-david-ruvini: + names: + - {first: Jean David, last: Ruvini} + - {first: Jean-David, last: Ruvini} +jean-e-fox-tree: + names: + - {first: Jean E., last: Fox Tree} + - {first: Jean Fox, last: Tree} + - {first: Jean, last: Fox Tree} +jean-francois-bonastre: + names: + - {first: Jean-François, last: Bonastre} + - {first: Jean-Francois, last: Bonastre} + - {first: J-F., last: Bonastre} + - {first: J.-F., last: Bonastre} +jean-francois-delannoy: + names: + - {first: Jean-François, last: Delannoy} + - {first: Jean-Francois, last: Delannoy} +jean-francois-serignat: + names: + - {first: Jean-François, last: Serignat} + - {first: J.F., last: Serignat} +jean-luc-lebrun: + names: + - {first: Jean-Luc, last: LeBrun} + - {first: Jean-Luc, last: Lebrun} +jean-marc-colletta: + names: + - {first: Jean-Marc, last: Colletta} + - {first: J.M., last: Colletta} +jean-mark-gawron: + names: + - {first: Jean Mark, last: Gawron} + - {first: Mark, last: Gawron} + - {first: J. Mark, last: Gawron} + - {first: J. M., last: Gawron} +jean-pierre-descles: + names: + - {first: Jean-Pierre, last: Descles} + - {first: Jean-Pierre, last: Desclés} + - {first: Jean Pierre, last: Descles} +jean-pierre-paillet: + names: + - {first: Jean-Pierre, last: Paillet} + - {first: Jean Pierre, last: Paillet} +jean-veronis: + names: + - {first: Jean, last: Veronis} + - {first: Jean, last: Véronis} +jeanne-baguenier-desormeaux: + names: + - {first: Jeanne, last: Baguenier Desormeaux} + - {first: Jeanne, last: Baguenier-Desormeaux} +jeanne-villaneau: + names: + - {first: Jeanne, last: Villaneau} + - {first: J., last: Villaneau} +jeannette-g-neal: + names: + - {first: Jeannette G., last: Neal} + - {first: J.G., last: Neal} +jee-sun-nam: + names: + - {first: Jee-sun, last: Nam} + - {first: Jee-Sun, last: Nam} +jeff-bilmes: + names: + - {first: Jeff, last: Bilmes} + - {first: Jeff A., last: Bilmes} +jeffrey-p-bigham: + names: + - {first: Jeffrey P., last: Bigham} + - {first: Jeffrey, last: Bigham} +jeffrey-sorensen: + names: + - {first: Jeffrey, last: Sorensen} + - {first: Jeffrey S., last: Sorensen} +jeih-weih-hung: + names: + - {first: Jeih-weih, last: Hung} + - {first: Jeih-Weih, last: Hung} +jen-nan-chen: + names: + - {first: Jen Nan, last: Chen} + - {first: Jen-Nan, last: Chen} + - {first: Jen-nan, last: Chen} +jen-tzung-chien: + names: + - {first: Jen-Tzung, last: Chien} + - {first: Jen-Tzong, last: Chien} +jennifer-c-lai: + names: + - {first: Jennifer C., last: Lai} + - {first: Jenifer C., last: Lai} + - {first: Jennifer, last: Lai} +jennifer-chu-carroll: + names: + - {first: Jennifer, last: Chu-Carroll} + - {first: Jennifer, last: Chu} +jennifer-doyon: + names: + - {first: Jennifer, last: Doyon} + - {first: Jennifer B., last: Doyon} +jennifer-hay: + names: + - {first: Jennifer, last: Hay} + - {first: Jennifer B., last: Hay} +jennifer-stromer-galley: + names: + - {first: Jennifer, last: Stromer-Galley} + - {first: Jennifer, last: Strommer-Galley} +jenny-rose-finkel: + names: + - {first: Jenny Rose, last: Finkel} + - {first: Jenny, last: Finkel} +jens-erik-fenstad: + names: + - {first: Jens Erik, last: Fenstad} + - {first: Jens-Erik, last: Fenstad} +jeong-won-cha: + names: + - {first: Jeong-Won, last: Cha} + - {first: Jeongwon, last: Cha} +jer-hayes: + names: + - {first: Jer, last: Hayes} + - {first: Jeremiah, last: Hayes} +jeremie-segouat: + names: + - {first: Jérémie, last: Segouat} + - {first: J., last: Segouat} +jeremy-g-kahn: + names: + - {first: Jeremy G., last: Kahn} + - {first: Jeremy, last: Kahn} +jeremy-j-carroll: + names: + - {first: Jeremy J., last: Carroll} + - {first: Jeremy, last: Carroll} +jeremy-leixa: + names: + - {first: Jeremy, last: Leixa} + - {first: Jérémy, last: Leixa} +jeremy-trione: + names: + - {first: Jérémy, last: Trione} + - {first: Jeremy, last: Trione} +jerneja-gros: + names: + - {first: Jerneja, last: Gros} + - {first: Jerneja Žganec, last: Gros} +jerome-goulian: + names: + - {first: Jérôme, last: Goulian} + - {first: J., last: Goulian} +jerome-vapillon: + names: + - {first: Jerome, last: Vapillon} + - {first: J., last: Vapillon} +jerry-r-hobbs: + names: + - {first: Jerry R., last: Hobbs} + - {first: Jerry, last: Hobbs} + - {first: J.R., last: Hobbs} +jessica-moszkowicz: + names: + - {first: Jessica, last: Moszkowicz} + - {first: Jessica L., last: Moszkowicz} +jesujoba-alabi: + names: + - {first: Jesujoba, last: Alabi} + - {first: Jesujoba O., last: Alabi} + - {first: Jesujoba Oluwadara, last: Alabi} +jesus-e-diaz-verdejo: + names: + - {first: Jesús E., last: Díaz Verdejo} + - {first: J.E., last: Díaz Verdejo} +jesus-gimenez: + names: + - {first: Jesús, last: Giménez} + - {first: Jesus, last: Gimenez} +jesus-gonzalez-rubio: + names: + - {first: Jesús, last: González-Rubio} + - {first: Jesús, last: González Rubio} +jesus-peral: + names: + - {first: Jesús, last: Peral} + - {first: Jesus, last: Peral} + - {first: J., last: Peral} +jia-fei-hong: + names: + - {first: Jia-Fei, last: Hong} + - {first: Jia-Fei, last: Hung} +jia-lin-shen: + names: + - {first: Jia-Lin, last: Shen} + - {first: Jia-lin, last: Shen} +jia-lu: + names: + - {first: Jia, last: Lü} + - {first: Jia, last: Lu} +jia-yan-jian: + names: + - {first: Jia-Yan, last: Jian} + - {first: Jia Yan, last: Jian} +jiahao-yuan-ecnu: + degree: East China Normal University + disable_name_matching: true + names: + - {first: Jiahao, last: Yuan} + orcid: 0009-0002-6194-450X +jiaheng-zheng: + names: + - {first: Jiaheng, last: Zheng} + - {first: Jia-heng, last: Zheng} +jiajun-chen: + names: + - {first: Jiajun, last: Chen} + - {first: Jia-jun, last: Chen} + - {first: Jia-Jun, last: Chen} +jian-chen-ub: + comment: University at Buffalo + disable_name_matching: true + names: + - {first: Jian, last: Chen} +jian-chen-wu: + names: + - {first: Jian-Chen, last: Wu} + - {first: Jien-Chen, last: Wu} +jian-cheng-wu: + names: + - {first: Jian-Cheng, last: Wu} + - {first: Jian-cheng, last: Wu} + - {first: Jiancheng, last: Wu} +jian-ming-xu: + names: + - {first: Jian-ming, last: Xu} + - {first: Jian-Ming, last: Xu} +jian-wang-hongkongpoly: + comment: Hong Kong Polytechnic + disable_name_matching: true + names: + - {first: Jian, last: Wang} + orcid: 0000-0002-8992-8336 +jian-yun-nie: + names: + - {first: Jian-Yun, last: Nie} + - {first: Jian-yun, last: Nie} +jianmin-yao: + names: + - {first: Jianmin, last: Yao} + - {first: Jian-min, last: Yao} + - {first: Jian-Min, last: Yao} +jianxiang-wang: + names: + - {first: JianXiang, last: Wang} + - {first: Jianxiang, last: Wang} +jianyong-duan: + names: + - {first: Jianyong, last: Duan} + - {first: Jian-Yong, last: Duan} +jiatong-li-hk: + comment: Hong Kong Polytechnic + names: + - {first: Jiatong, last: Li} +jiatong-li-ru: + comment: Rutgers + names: + - {first: Jiatong, last: Li} +jihai-zhang-cuhk: + comment: CUHK + disable_name_matching: true + names: + - {first: Jihai, last: Zhang} + orcid: 0000-0002-1400-9116 +jill-fain-lehman: + names: + - {first: Jill Fain, last: Lehman} + - {first: Jill F., last: Lehman} +jim-chang: + names: + - {first: Jim, last: Chang} + - {first: Jimmy, last: Chang} +jim-cowie: + names: + - {first: Jim, last: Cowie} + - {first: J., last: Cowie} +jim-kinzey: + names: + - {first: Jim, last: Kinzey} + - {first: Jim, last: Kimzey} +jin-ge-yao: + names: + - {first: Jin-ge, last: Yao} + - {first: Jin-Ge, last: Yao} +jin-hu-huang: + names: + - {first: Jin Hu, last: Huang} + - {first: JinHu, last: Huang} +jin-seok-lee: + names: + - {first: Jin-seok, last: Lee} + - {first: Jin-Seok, last: Lee} +jinan-xu: + names: + - {first: Jinan, last: Xu} + - {first: JinAn, last: Xu} +jindrich-helcl: + names: + - {first: Jindřich, last: Helcl} + - {first: Jindrich, last: Helcl} +jingguang-han: + names: + - {first: Jingguang, last: Han} + - {first: Jing Guang, last: Han} +jingguang-sun: + names: + - {first: Jingguang, last: Sun} + - {first: JingGuang, last: Sun} +jinghui-xiao: + names: + - {first: Jinghui, last: Xiao} + - {first: JingHui, last: Xiao} +jinho-d-choi: + names: + - {first: Jinho D., last: Choi} + - {first: Jinho, last: Choi} +jiri-havelka: + names: + - {first: Jiří, last: Havelka} + - {first: Jiri, last: Havelka} +jiri-navratil: + names: + - {first: Jiří, last: Navrátil} + - {first: Jiri, last: Navratil} +jiri-semecky: + names: + - {first: Jiří, last: Semecký} + - {first: Jirí, last: Semecky} +jisha-p-jayan: + names: + - {first: Jisha P., last: Jayan} + - {first: Jisha, last: P Jayan} + - {first: Jisha P, last: Jayan} +jo-calder: + names: + - {first: Jo, last: Calder} + - {first: Jonathan, last: Calder} +joachim-kohler: + names: + - {first: Joachim, last: Köhler} + - {first: Joachim, last: Koehler} +joan-andreu-sanchez: + names: + - {first: Joan-Andreu, last: Sánchez} + - {first: Joan-Andreu, last: Sanchez} + - {first: Joan Andreu, last: Sánchez} +joan-bachenko: + names: + - {first: Joan, last: Bachenko} + - {first: J., last: Bachenko} +joan-codina-filba: + names: + - {first: Joan, last: Codina-Filba} + - {first: Joan, last: Codina-Filbà} + - {first: Joan, last: Codina} +joan-giralt-duran: + names: + - {first: Joan, last: Giralt Duran} + - {first: Joan Giralt, last: Duran} +joan-soler-i-bou: + names: + - {first: Joan, last: Soler i Bou} + - {first: Joan, last: Soler} +joanna-mrozinski: + names: + - {first: Joanna, last: Mrozinski} + - {first: J., last: Mrozinski} +joao-graca: + names: + - {first: João, last: Graça} + - {first: Joao, last: Graca} + - {first: João V., last: Graça} +joao-miguel-casteleiro: + names: + - {first: João Miguel, last: Casteleiro} + - {first: João, last: Casteleiro} +joao-p-neto: + names: + - {first: João P., last: Neto} + - {first: Joao P., last: Neto} + - {first: Joao, last: Neto} + - {first: João, last: Neto} + - {first: João Paulo, last: Neto} +joao-paulo-cabral: + names: + - {first: Joao Paulo, last: Cabral} + - {first: João P., last: Cabral} +joao-paulo-cordeiro: + names: + - {first: João Paulo, last: Cordeiro} + - {first: João, last: Cordeiro} +joao-paulo-teixeira: + names: + - {first: João Paulo, last: Teixeira} + - {first: João P., last: Teixeira} +joao-rodrigues: + names: + - {first: João, last: Rodrigues} + - {first: João, last: António Rodrigues} +joao-silva: + names: + - {first: João, last: Silva} + - {first: João Ricardo, last: Silva} +joaquin-gonzalez-rodriguez: + names: + - {first: Joaquín, last: González-Rodríguez} + - {first: Joaquin, last: Gonzalez-Rodriguez} +jochen-dorre: + names: + - {first: Jochen, last: Dorre} + - {first: Jochen, last: Dörre} +jochen-l-leidner: + names: + - {first: Jochen L., last: Leidner} + - {first: Jochen, last: Leidner} +joe-mccarthy: + names: + - {first: Joe, last: McCarthy} + - {first: J., last: McCarthy} +joe-zhou: + names: + - {first: Joe, last: Zhou} + - {first: Joe F., last: Zhou} +joel-priestley: + names: + - {first: Joel, last: Priestley} + - {first: Joel James, last: Priestley} +joel-tetreault: + names: + - {first: Joel, last: Tetreault} + - {first: Joel R., last: Tetreault} +joel-wallenberg: + names: + - {first: Joel, last: Wallenberg} + - {first: Joel C., last: Wallenberg} +johan-adam-du-preez: + names: + - {first: Johan Adam, last: du Preez} + - {first: J.A., last: du Preez} +johanna-d-moore: + names: + - {first: Johanna D., last: Moore} + - {first: Johanna, last: Moore} + - {first: J. D., last: Moore} +johanna-geiss: + names: + - {first: Johanna, last: Geiß} + - {first: Johanna, last: Geiss} +john-a-carroll: + comment: Cambridge, Sussex + names: + - {first: John A., last: Carroll} + - {first: John, last: Carroll} + similar: + - john-b-carroll +john-b-carroll: + comment: UNC + names: + - {first: John B., last: Carroll} + similar: + - john-a-carroll +john-b-lowe: + names: + - {first: John B., last: Lowe} + - {first: John, last: Lowe} +john-barnden: + names: + - {first: John, last: Barnden} + - {first: John A., last: Barnden} + - {first: J.A., last: Barnden} +john-bateman: + names: + - {first: John, last: Bateman} + - {first: John A., last: Bateman} +john-bear: + names: + - {first: John, last: Bear} + - {first: J., last: Bear} +john-c-platt: + names: + - {first: John C., last: Platt} + - {first: John, last: Platt} +john-c-thomas: + names: + - {first: John C., last: Thomas} + - {first: John, last: Thomas} +john-cocke: + names: + - {first: John, last: Cocke} + - {first: J., last: Cocke} +john-conroy: + names: + - {first: John, last: Conroy} + - {first: John M., last: Conroy} +john-d-burger: + comment: MITRE + names: + - {first: John D., last: Burger} + - {first: John, last: Burger} + similar: + - john-f-burger +john-dowding: + names: + - {first: John, last: Dowding} + - {first: J., last: Dowding} +john-elliott: + names: + - {first: John, last: Elliott} + - {first: John, last: Elliot} +john-f-burger: + comment: System Development Corporation + names: + - {first: John F., last: Burger} + - {first: John, last: Burger} + similar: + - john-d-burger +john-f-pitrelli: + names: + - {first: John F., last: Pitrelli} + - {first: John, last: Pitrelli} +john-h-clippinger-jr: + names: + - {first: John H., last: 'Clippinger, Jr.'} + - {first: John Henry, last: 'Clippinger, Jr.'} +john-hale: + names: + - {first: John, last: Hale} + - {first: John T., last: Hale} +john-henderson: + names: + - {first: John, last: Henderson} + - {first: John C., last: Henderson} +john-j-kovarik: + names: + - {first: John J., last: Kovarik} + - {first: John, last: Kovarik} +john-k-pate: + names: + - {first: John K., last: Pate} + - {first: John, last: Pate} + - {first: John K, last: Pate} +john-keane: + names: + - {first: John, last: Keane} + - {first: John, last: Kane} +john-kelleher: + names: + - {first: John, last: Kelleher} + - {first: John D., last: Kelleher} +john-lafferty: + names: + - {first: John, last: Lafferty} + - {first: John D., last: Lafferty} + - {first: John, last: Lafrerty} + - {first: J., last: Lafferty} +john-makhoul: + names: + - {first: John, last: Makhoul} + - {first: J., last: Makhoul} +john-mcnaught: + names: + - {first: John, last: McNaught} + - {first: J., last: McNaught} +john-miller: + names: + - {first: John, last: Miller} + - {first: John E., last: Miller} +john-p-lalor: + names: + - {first: John P., last: Lalor} + - {first: John, last: Lalor} +john-pestian: + names: + - {first: John, last: Pestian} + - {first: John P., last: Pestian} +john-philip-mccrae: + names: + - {first: John Philip, last: McCrae} + - {first: John, last: McCrae} + - {first: John P., last: McCrae} +john-phillips: + comment: Univ. of Manchester + names: + - {first: John, last: Phillips} + similar: + - jon-phillips +john-r-hershey: + names: + - {first: John R., last: Hershey} + - {first: John, last: Hershey} +john-s-garofolo: + names: + - {first: John S., last: Garofolo} + - {first: John, last: Garofolo} + - {first: J. S., last: Garofolo} + - {first: J., last: Garofolo} +john-s-white: + names: + - {first: John S., last: White} + - {first: John, last: White} +john-s-y-lee: + names: + - {first: John S. Y., last: Lee} + - {first: John, last: Lee} +john-t-maxwell-iii: + names: + - {first: John T., last: Maxwell III} + - {first: John, last: Maxwell} + - {first: John T., last: Maxwell} +john-tait: + names: + - {first: John, last: Tait} + - {first: John Irving, last: Tait} +john-wilkerson: + names: + - {first: John, last: Wilkerson} + - {first: John D., last: Wilkerson} +jon-oberlander: + names: + - {first: Jon, last: Oberlander} + - {first: Jonathan, last: Oberländer} +jon-patrick: + names: + - {first: Jon, last: Patrick} + - {first: Jon D., last: Patrick} + - {first: Jon David, last: Patrick} +jon-phillips: + comment: Georgetown, MITRE + names: + - {first: Jon, last: Phillips} + - {first: John, last: Phillips} + similar: + - john-phillips +jon-sanchez: + names: + - {first: Jon, last: Sánchez} + - {first: Jon, last: Sanchez} + - {first: J., last: Sánchez} +jonathan-allen: + names: + - {first: Jonathan, last: Allen} + - {first: Jonathan, last: All} +jonathan-decristofaro: + names: + - {first: Jonathan, last: DeCristofaro} + - {first: Jonathan D., last: DeCristofaro} +jonathan-g-fiscus: + names: + - {first: Jonathan G., last: Fiscus} + - {first: Jonathan C., last: Fiscus} + - {first: Jonathan, last: Fiscus} + - {first: J. G., last: Fiscus} + - {first: Johathan G., last: Fiscus} +jonathan-h-clark: + names: + - {first: Jonathan H., last: Clark} + - {first: Jonathan, last: Clark} +jonathan-j-webster: + names: + - {first: Jonathan J., last: Webster} + - {first: Jonathan, last: Webster} +jonathan-washington: + names: + - {first: Jonathan, last: Washington} + - {first: Jonathan North, last: Washington} + - {first: Jonathan N., last: Washington} +jong-c-park: + names: + - {first: Jong C., last: Park} + - {first: Jong, last: Park} +jong-hoon-oh: + names: + - {first: Jong-Hoon, last: Oh} + - {first: Jong Hoon, last: Oh} +jonghyun-choi-umd: + comment: University of Maryland + disable_name_matching: true + names: + - {first: Jonghyun, last: Choi} + orcid: 0000-0002-7934-8434 +joo-young-lee: + names: + - {first: Joo-Young, last: Lee} + - {first: JooYoung, last: Lee} +jordan-cohen: + names: + - {first: Jordan, last: Cohen} + - {first: Jordan R., last: Cohen} +jordan-r-green: + names: + - {first: Jordan R., last: Green} + - {first: Jordan, last: Green} +jordi-atserias: + names: + - {first: Jordi, last: Atserias} + - {first: Jordi, last: Atserias Batalla} + - {first: J., last: Atserias} +jordi-daude: + names: + - {first: Jordi, last: Daudé} + - {first: J., last: Daudé} +jordi-porta-zamorano: + names: + - {first: Jordi Porta, last: Zamorano} + - {first: Jordi, last: Porta} +jordi-turmo: + names: + - {first: Jordi, last: Turmo} + - {first: J., last: Turmo} +jorg-kleinz: + names: + - {first: Jörg, last: Kleinz} + - {first: Jorg, last: Kleinz} +jorg-tiedemann: + names: + - {first: Jörg, last: Tiedemann} + - {first: Jorg, last: Tiedemann} + - {first: Joerg, last: Tiedemann} +jorge-carrillo-de-albornoz: + names: + - {first: Jorge, last: Carrillo de Albornoz} + - {first: Jorge Carrillo, last: de Albornoz} +jorge-garcia-flores: + names: + - {first: Jorge, last: Garcia Flores} + - {first: Jorge, last: García Flores} + - {first: Jorge J., last: García Flores} +jorgen-villadsen: + names: + - {first: Jorgen, last: Villadsen} + - {first: Jørgen, last: Villadsen} +jort-florent-gemmeke: + names: + - {first: Jort Florent, last: Gemmeke} + - {first: Jort F., last: Gemmeke} + - {first: Jort, last: Gemmeke} +jose-a-r-fonollosa: + names: + - {first: José A. R., last: Fonollosa} + - {first: Jose A., last: R. Fonollosa} + - {first: José A.R., last: Fonollosa} + - {first: José A., last: R. Fonollosa} + - {first: Jose A. R., last: Fonollosa} +jose-b-marino: + names: + - {first: José B., last: Mariño} + - {first: José, last: Mariño} +jose-camacho-collados: + names: + - {first: Jose, last: Camacho-Collados} + - {first: José, last: Camacho-Collados} +jose-castano: + names: + - {first: José, last: Castaño} + - {first: José M., last: Castaño} +jose-coch: + names: + - {first: Jose, last: Coch} + - {first: José, last: Coch} +jose-deulofeu: + names: + - {first: José, last: Deulofeu} + - {first: Jose, last: Deulofeu} +jose-g-c-de-souza: + names: + - {first: José G., last: C. de Souza} + - {first: José G.C., last: de Souza} + - {first: Jose G.C., last: de Souza} + - {first: José Guilherme, last: Camargo de Souza} + - {first: José G., last: Camargo de Souza} + - {first: José Guilherme, last: C. de Souza} +jose-g-moreno: + names: + - {first: José G., last: Moreno} + - {first: Jose G., last: Moreno} + - {first: Jose, last: Moreno} +jose-i-abreu: + names: + - {first: José I., last: Abreu} + - {first: Jose I., last: Abreu} + - {first: José, last: Abreu} +jose-iria: + names: + - {first: José, last: Iria} + - {first: Jose, last: Iria} +jose-joao-almeida: + names: + - {first: José João, last: Almeida} + - {first: Jose Joao, last: Almeida} +jose-luis-oliveira: + names: + - {first: José Luís, last: Oliveira} + - {first: Luís, last: Oliveira} +jose-luis-vicedo: + names: + - {first: José Luis, last: Vicedo} + - {first: Jose-Luis, last: Vicedo} + - {first: Jose Luis, last: Vicedo} + - {first: José L., last: Vicedo} + - {first: J.L., last: Vicedo} +jose-m-alonso: + names: + - {first: Jose M., last: Alonso} + - {first: Jose, last: Alonso} +jose-m-garcia-miguel: + names: + - {first: José M., last: García Miguel} + - {first: José M., last: García-Miguel} +jose-m-gomez: + names: + - {first: José M., last: Gómez} + - {first: José Manuel, last: Gómez} + - {first: Jose Manuel, last: Gómez} + - {first: Jose M., last: Gomez} +jose-m-guirao: + names: + - {first: José M., last: Guirao} + - {first: José María, last: Guirao} +jose-manuel-gomez-perez: + names: + - {first: José Manuel, last: Gómez-Pérez} + - {first: Jose Manuel, last: Gomez-Perez} +jose-manuel-martinez: + names: + - {first: José Manuel, last: Martínez} + - {first: Jose M.M., last: Martinez} + - {first: José Manuel, last: Martínez Martínez} + - {first: Jose Manuel, last: Martinez} +jose-manuel-pardo: + names: + - {first: Jose Manuel, last: Pardo} + - {first: Jose M., last: Pardo} + - {first: José M., last: Pardo} +jose-manuel-perea-ortega: + names: + - {first: Jose Manuel, last: Perea-Ortega} + - {first: José M., last: Perea-Ortega} + - {first: Jose Manuel, last: Perea} + - {first: Jose-Manuel, last: Perea-Ortega} +jose-mari-arriola: + names: + - {first: Jose Mari, last: Arriola} + - {first: J. M., last: Arriola} + - {first: J.M., last: Arriola} + - {first: J.M, last: Arriola} +jose-maria-carazo: + names: + - {first: José María, last: Carazo} + - {first: José-María, last: Carazo} +jose-maria-gomez-hidalgo: + names: + - {first: Jose Maria, last: Gomez-Hidalgo} + - {first: Jose Maria Gomez, last: Hidalgo} + - {first: José M. Gómez, last: Hidalgo} +jose-miguel-benedi: + names: + - {first: José-Miguel, last: Benedí} + - {first: Jose-Miguel, last: Benedi} + - {first: José Miguel, last: Benedí} + - {first: José Miguel, last: Benedí Ruíz} + - {first: José-M., last: Benedí} + - {first: José Miguel, last: Benedi Ruiz} + - {first: José-Miguel, last: Benedí Ruíz} + - {first: J. M., last: Benedí} +jose-ramom-pichel-campos: + names: + - {first: José Ramom, last: Pichel Campos} + - {first: José Ramom, last: Pichel} + - {first: Jose Ramom, last: Pichel} + - {first: Jose Ramom, last: Pichel Campos} +jose-relano-gil: + names: + - {first: Jose, last: Relaño-Gil} + - {first: Jose, last: Relano Gil} + - {first: José, last: Relaño Gil} + - {first: José, last: Relaño} +josef-psutka: + names: + - {first: Josef, last: Psutka} + - {first: Josef V., last: Psutka} + - {first: J., last: Psutka} + - {first: J.V., last: Psutka} +josef-van-genabith: + names: + - {first: Josef, last: van Genabith} + - {first: Josef, last: Van Genabith} + - {first: J., last: Van Genabith} +josep-m-crego: + names: + - {first: Josep M., last: Crego} + - {first: Josep Maria, last: Crego} + - {first: Josep, last: Crego} +josep-maria-fontana: + names: + - {first: Josep Maria, last: Fontana} + - {first: Josep, last: Fontana} +joseph-f-grafsgaard: + names: + - {first: Joseph F., last: Grafsgaard} + - {first: Joseph, last: Grafsgaard} +joseph-j-peper: + names: + - {first: Joseph J., last: Peper} + - {first: Joseph, last: Peper} +joseph-le-roux: + names: + - {first: Joseph, last: Le Roux} + - {first: Joseph Le, last: Roux} +joseph-mariani: + names: + - {first: Joseph, last: Mariani} + - {first: J., last: Mariani} +joseph-p-campbell: + names: + - {first: Joseph P., last: Campbell} + - {first: Joseph, last: Campbell} +joseph-polifroni: + names: + - {first: Joseph, last: Polifroni} + - {first: Joseph H., last: Polifroni} +joseph-turian: + names: + - {first: Joseph, last: Turian} + - {first: Joseph P., last: Turian} +joseph-z-chang: + names: + - {first: Joseph Z., last: Chang} + - {first: Joseph, last: Chang} + - {first: Joseph Z, last: Chang} +joshua-b-gordon: + names: + - {first: Joshua B., last: Gordon} + - {first: Joshua, last: Gordon} +joshua-goodman: + names: + - {first: Joshua, last: Goodman} + - {first: Joshua T., last: Goodman} +joyce-chai: + names: + - {first: Joyce, last: Chai} + - {first: Joyce Yue, last: Chai} + - {first: Joyce Y., last: Chai} +joyce-mcdowell: + names: + - {first: Joyce, last: McDowell} + - {first: J., last: McDowell} +jozef-juhar: + names: + - {first: Jozef, last: Juhár} + - {first: Jozef, last: Juhar} +ju-yeon-ryu: + names: + - {first: Ju-yeon, last: Ryu} + - {first: Ju-Yeon, last: Ryu} +juan-carlos-amengual: + names: + - {first: Juan Carlos, last: Amengual} + - {first: J. C., last: Amengual} +juan-jose-rodriguez-soler: + names: + - {first: Juan José Rodríguez, last: Soler} + - {first: Juan José, last: Rodríguez} +juan-m-montero: + names: + - {first: Juan M., last: Montero} + - {first: Juan Manuel, last: Montero} +juan-manuel-lucas-cuesta: + names: + - {first: Juan Manuel, last: Lucas-Cuesta} + - {first: Juan Manuel, last: Lucas} +juan-manuel-torres-moreno: + names: + - {first: Juan-Manuel, last: Torres-Moreno} + - {first: Juan-Manuel Torres, last: Moreno} + - {first: Juan-Manuel, last: Torres} +juan-maria-garrido: + names: + - {first: Juan María, last: Garrido} + - {first: Juan Maria, last: Garrido} +juan-miguel-vilar: + names: + - {first: Juan Miguel, last: Vilar} + - {first: Juan-Miguel, last: Vilar} + - {first: Juan M., last: Vilar} + - {first: J. M., last: Vilar} +juan-pablo-munoz: + names: + - {first: Juan Pablo, last: Munoz} + - {first: J. Pablo, last: Muñoz} +juan-soler-company: + names: + - {first: Juan, last: Soler-Company} + - {first: Juan, last: Soler Company} +juana-maria-ruiz-martinez: + names: + - {first: Juana María, last: Ruiz-Martínez} + - {first: Juana Maria, last: Ruiz-Martínez} + - {first: Juana Maria, last: Ruiz Martinez} +judith-d-schlesinger: + names: + - {first: Judith D., last: Schlesinger} + - {first: Judith, last: Schlesinger} +judith-l-klavans: + names: + - {first: Judith L., last: Klavans} + - {first: Judith, last: Klavans} + - {first: J., last: Klavans} +judy-anne-kegl: + names: + - {first: Judy Anne, last: Kegl} + - {first: Judy, last: Kegl} +jugal-kalita: + names: + - {first: Jugal, last: Kalita} + - {first: Jugal K., last: Kalita} + - {first: J.K., last: Kalita} +jui-ting-huang: + names: + - {first: Jui Ting, last: Huang} + - {first: Jui-Ting, last: Huang} +julia-hirschberg: + names: + - {first: Julia, last: Hirschberg} + - {first: Julia B., last: Hirschberg} +julia-lavid-lopez: + names: + - {first: Julia, last: Lavid-López} + - {first: Julia, last: Lavid} +julia-maria-schulz: + names: + - {first: Julia Maria, last: Schulz} + - {first: Julia, last: Schulz} +julia-otmakhova: + names: + - {first: Julia, last: Otmakhova} + - {first: Yulia, last: Otmakhova} +julian-j-odell: + names: + - {first: Julian J., last: Odell} + - {first: J.J., last: Odell} +julian-j-schloder: + names: + - {first: Julian J., last: Schlöder} + - {first: Julian, last: Schlöder} +julian-moreno-schneider: + names: + - {first: Julian, last: Moreno Schneider} + - {first: Julian, last: Moreno-Schneider} + - {first: Julián, last: Moreno-Schneider} + - {first: Julián, last: Moreno Schneider} +juliano-d-antonio: + names: + - {first: Juliano D., last: Antonio} + - {first: J.D., last: Antonio} +juliano-efson-sales: + names: + - {first: Juliano Efson, last: Sales} + - {first: Juliano, last: Efson Sales} + - {first: Juliano, last: Sales} +julie-beliao: + names: + - {first: Julie, last: Belião} + - {first: Julie, last: Beliao} +julie-carson-berndsen: + names: + - {first: Julie, last: Carson-Berndsen} + - {first: Julie, last: Carson} + - {first: Julle, last: Carson-Berndsen} +julie-e-boland: + names: + - {first: Julie E., last: Boland} + - {first: Julie, last: Boland} +julien-deriviere: + names: + - {first: Julien, last: Derivière} + - {first: J., last: Derivière} +julien-hamonic: + names: + - {first: Julien, last: Hamonic} + - {first: J., last: Hamonic} +julio-castillo: + names: + - {first: Julio, last: Castillo} + - {first: Julio Javier, last: Castillo} +jun-fu-cai: + names: + - {first: Jun Fu, last: Cai} + - {first: Junfu, last: Cai} +jun-ichi-nakamura: + names: + - {first: Jun-ichi, last: Nakamura} + - {first: Jun’ichi, last: Nakamura} +jun-ping-ng: + names: + - {first: Jun Ping, last: Ng} + - {first: Jun-Ping, last: Ng} +junfeng-tian: + names: + - {first: Junfeng, last: Tian} + - {first: Jun Feng, last: Tian} +jung-jae-kim: + names: + - {first: Jung-jae, last: Kim} + - {first: Jung-Jae, last: Kim} +jungyun-seo: + names: + - {first: Jungyun, last: Seo} + - {first: Jung Yun, last: Seo} +junhui-li: + names: + - {first: Junhui, last: Li} + - {first: JunHui, last: Li} +junichi-fukumoto: + names: + - {first: Jun’ichi, last: Fukumoto} + - {first: Junichi, last: Fukumoto} +junichi-kakegawa: + names: + - {first: Jun’ichi, last: Kakegawa} + - {first: Jun-ichi, last: Kakegawa} +junichi-kazama: + names: + - {first: Jun′ichi, last: Kazama} + - {first: Jun’ichi, last: Kazama} +junichi-tsujii: + names: + - {first: Jun’ichi, last: Tsujii} + - {first: Jun-ichi, last: Tsujii} + - {first: Jun-Ichi, last: Tsujii} + - {first: Junichi, last: Tsujii} + - {first: Jun-ich, last: Tsujii} + - {first: J., last: Tsujii} +junping-gong: + names: + - {first: Junping, last: Gong} + - {first: Jun-ping, last: Gong} +junya-tsutsumi: + names: + - {first: Junya, last: Tsutsumi} + - {first: J., last: Tsutsumi} +junyu-luo-pu: + degree: Peking University + disable_name_matching: true + names: + - {first: Junyu, last: Luo} + orcid: 0009-0001-6894-1144 +jurgen-wedekind: + names: + - {first: Jurgen, last: Wedekind} + - {first: Jürgen, last: Wedekind} +jurgita-kapociute-dzikiene: + names: + - {first: Jurgita, last: Kapočiūtė-Dzikienė} + - {first: Jurgita, last: Kapociute-Dzikiene} +jurica-seva: + names: + - {first: Jurica, last: Seva} + - {first: Jurica, last: Ševa} +justin-h-gross: + names: + - {first: Justin H., last: Gross} + - {first: Justin, last: Gross} +justus-c-roux: + names: + - {first: Justus C., last: Roux} + - {first: J.C., last: Roux} + - {first: J. C., last: Roux} +jyh-shing-roger-jang: + names: + - {first: Jyh-Shing Roger, last: Jang} + - {first: Jyh-Shing, last: Jang} + - {first: Jyh-Shing, last: Roger Jang} + - {first: Roger Jyh-Shing, last: Jang} +jyoti-pawar: + names: + - {first: Jyoti, last: Pawar} + - {first: Jyoti D., last: Pawar} + - {first: Jyoti D, last: Pawar} +jyun-sheng-chang: + names: + - {first: Jyun-Sheng, last: Chang} + - {first: Jyun-sheng, last: Chang} +k-bretonnel-cohen: + names: + - {first: K. Bretonnel, last: Cohen} + - {first: Kevin Bretonnel, last: Cohen} + - {first: Kevin B., last: Cohen} + - {first: Kevin, last: Cohen} +k-saravanan: + names: + - {first: K, last: Saravanan} + - {first: Saravanan, last: K} + - {first: K., last: Saravanan} +k-sreenivasa-rao: + names: + - {first: K Sreenivasa, last: Rao} + - {first: K. Sreenivasa, last: Rao} +k-tamsin-maxwell: + names: + - {first: K. Tamsin, last: Maxwell} + - {first: Tamsin, last: Maxwell} +k-v-s-prasad: + names: + - {first: K.V.S., last: Prasad} + - {first: K.V.S, last: Prasad} +k-vijay-shanker: + names: + - {first: K., last: Vijay-Shanker} + - {first: K, last: Vijay-Shanker} + - {first: K., last: Vijay-Shankar} + - {first: Vijay, last: Shanker} +ka-wai-chui: + names: + - {first: Ka-Wai, last: Chui} + - {first: Kawai, last: Chui} +kadri-hacioglu: + names: + - {first: Kadri, last: Hacioglu} + - {first: K., last: Hacioglu} +kai-fu-lee: + names: + - {first: Kai-Fu, last: Lee} + - {first: K.F., last: Lee} +kai-min-kevin-chang: + names: + - {first: Kai-min Kevin, last: Chang} + - {first: Kai-Min, last: Chang} + - {first: Kai-min K., last: Chang} +kalina-bontcheva: + names: + - {first: Kalina, last: Bontcheva} + similar: + - katina-bontcheva +kalliopi-zervanou: + names: + - {first: Kalliopi, last: Zervanou} + - {first: Kalliopi A., last: Zervanou} +kam-fai-wong: + names: + - {first: Kam-Fai, last: Wong} + - {first: Kam-fai, last: Wong} + - {first: K.F., last: Wong} +kamel-smaili: + names: + - {first: Kamel, last: Smaili} + - {first: Kamel, last: Smaïli} +kanokorn-trakultaweekoon: + names: + - {first: Kanokorn, last: Trakultaweekoon} + - {first: Kanokorn, last: Trakultaweekool} +kanthashree-mysore-sathyendra: + names: + - {first: Kanthashree, last: Mysore Sathyendra} + - {first: Kanthashree Mysore, last: Sathyendra} +karel-oliva: + names: + - {first: Karel, last: Oliva} + - {first: Karel, last: Oli̊va} +karen-e-lochbaum: + names: + - {first: Karen E., last: Lochbaum} + - {first: Karen, last: Lochbaum} +karen-jensen: + names: + - {first: Karen, last: Jensen} + - {first: K., last: Jensen} +karen-l-ryan: + names: + - {first: Karen L., last: Ryan} + - {first: Karen, last: Ryan} +karen-sparck-jones: + names: + - {first: Karen, last: Spärck Jones} + - {first: Karen, last: Sparck Jones} + - {first: Karen, last: Jones} + - {first: K., last: Sparck Jones} +karin-friberg-heppin: + names: + - {first: Karin, last: Friberg Heppin} + - {first: Karin Friberg, last: Heppin} + - {first: Karin, last: Friberg} +karin-kipper: + names: + - {first: Karin, last: Kipper} + - {first: Karin Christine, last: Kipper} + - {first: Karin, last: Schuler} + - {first: Karin, last: Kipper Schuler} + - {first: Karin, last: Kipper-Schuler} +karin-sim-smith: + names: + - {first: Karin, last: Sim Smith} + - {first: Karin Sim, last: Smith} +karin-verspoor: + names: + - {first: Karin, last: Verspoor} + - {first: Karin M., last: Verspoor} + - {first: Cornelia Maria, last: Verspoor} +karine-baschung: + names: + - {first: Karine, last: Baschung} + - {first: K., last: Baschung} +karmele-lopez-de-ipina: + names: + - {first: Karmele, last: López de Ipiña} + - {first: K., last: López de Ipiña} + - {first: K., last: Lopez de Ipina} +karoly-fabricz: + names: + - {first: Karoly, last: Fabricz} + - {first: K., last: Fabricz} +karsten-l-worm: + names: + - {first: Karsten L., last: Worm} + - {first: Karsten, last: Worm} + - {first: K. L., last: Worm} +kasu-sai-kartheek-reddy: + names: + - {first: Kasu Sai Kartheek, last: Reddy} + - {first: Sai Kartheek, last: Reddy Kasu} + orcid: 0009-0007-6679-3313 +katalin-ilona-simko: + names: + - {first: Katalin Ilona, last: Simkó} + - {first: Katalin, last: Simkó} +katarina-heimann-muhlenbock: + names: + - {first: Katarina, last: Heimann Mühlenbock} + - {first: Katarina, last: Mühlenbock} +kate-forbes-riley: + names: + - {first: Kate, last: Forbes-Riley} + - {first: Kate, last: Forbes} + - {first: Katherine, last: Forbes-Riley} + - {first: Katherine, last: Forbes} + - {first: Katherine, last: Forbes Riley} +kate-hunicke-smith: + names: + - {first: Kate, last: Hunicke-Smith} + - {first: K., last: Hunicke-Smith} +katerina-vesela: + names: + - {first: Katerina, last: Veselá} + - {first: Kateřina, last: Veselá} +katharina-boesefeldt: + names: + - {first: Katharina, last: Boesefeldt} + - {first: K., last: Boesefeldt} +katharina-von-der-wense: + names: + - {first: Katharina, last: von der Wense} + - {first: Katharina, last: Kann} +katharina-waschle: + names: + - {first: Katharina, last: Wäschle} + - {first: Katharina, last: Waeschle} +kathleen-c-fraser: + names: + - {first: Kathleen C., last: Fraser} + - {first: Kathleen, last: Fraser} +kathleen-dahlgren: + names: + - {first: Kathleen, last: Dahlgren} + - {first: K., last: Dahlgren} +kathleen-f-mccoy: + names: + - {first: Kathleen F., last: McCoy} + - {first: Kathleen, last: McCoy} + - {first: Kathleen E., last: McCoy} +kathleen-ferraro: + names: + - {first: Kathleen, last: Ferraro} + - {first: Kathleen, last: Ferrara} +kathleen-m-carley: + names: + - {first: Kathleen M., last: Carley} + - {first: Kathleen, last: Carley} +kathleen-m-sheehan: + names: + - {first: Kathleen M., last: Sheehan} + - {first: Kathleen, last: Sheehan} +kathleen-mckeown: + names: + - {first: Kathleen, last: McKeown} + - {first: Kathy, last: McKeown} + - {first: Kathleen R., last: McKeown} +kathryn-baker: + names: + - {first: Kathryn, last: Baker} + - {first: Kathryn L., last: Baker} +katia-lida-kermanidis: + names: + - {first: Katia Lida, last: Kermanidis} + - {first: Katia, last: Kermanidis} +katina-bontcheva: + names: + - {first: Katina, last: Bontcheva} + similar: + - kalina-bontcheva +katsuhiko-shirai: + names: + - {first: Katsuhiko, last: Shirai} + - {first: K., last: Shirai} +katsumasa-shimizu: + names: + - {first: Katsumasa, last: Shimizu} + - {first: K., last: Shimizu} +kavi-mahesh: + names: + - {first: Kavi, last: Mahesh} + - {first: Kavitha, last: Mahesh} + - {first: Kavitha Karimbi, last: Mahesh} +kavosh-asadi-atui: + names: + - {first: Kavosh, last: Asadi Atui} + - {first: Kavosh, last: Asadi} +kazuya-takeda: + names: + - {first: Kazuya, last: Takeda} + - {first: Kasuya, last: Takeda} +ke-jia-zhang: + names: + - {first: Ke-Jia, last: Zhang} + - {first: Ke-Jia, last: Chang} +ke-m-tran: + names: + - {first: Ke M., last: Tran} + - {first: Ke, last: Tran} + - {first: Ke, last: Tran Manh} +kees-van-deemter: + names: + - {first: Kees, last: van Deemter} + - {first: Kees, last: Van Deemter} +keh-jiann-chen: + names: + - {first: Keh-Jiann, last: Chen} + - {first: Keh-jiann, last: Chen} + - {first: Ke-Jiann, last: Chen} + - {first: K. J., last: Chen} +kei-mitamura: + names: + - {first: Kei, last: Mitamura} + - {first: K., last: Mitamura} +kei-yoshimoto: + names: + - {first: Kei, last: Yoshimoto} + - {first: K., last: Yoshimoto} +keith-hall: + names: + - {first: Keith, last: Hall} + - {first: Keith B., last: Hall} +keith-j-miller: + names: + - {first: Keith J., last: Miller} + - {first: Keith, last: Miller} +ken-samuel: + names: + - {first: Ken, last: Samuel} + - {first: Kenneth, last: Samuel} +kenji-araki: + names: + - {first: Kenji, last: Araki} + - {first: K., last: Araki} +kenji-hanakata: + names: + - {first: Kenji, last: Hanakata} + - {first: K., last: Hanakata} +kenneth-church: + names: + - {first: Kenneth, last: Church} + - {first: Kenneth Ward, last: Church} + - {first: Ken, last: Church} + - {first: Kenneth W., last: Church} +kenneth-forbus: + names: + - {first: Kenneth, last: Forbus} + - {first: Kenneth D., last: Forbus} +kenneth-j-mckeever: + names: + - {first: Kenneth J., last: McKeever} + - {first: Kenneth, last: McKeever} +kenneth-m-anderson: + names: + - {first: Kenneth M., last: Anderson} + - {first: Kenneth, last: Anderson} + - {first: Ken, last: Anderson} +kenneth-s-bogh: + names: + - {first: Kenneth S., last: Bøgh} + - {first: Kenneth, last: Bøgh} +kenny-zhu: + names: + - {first: Kenny, last: Zhu} + - {first: Kenny Q., last: Zhu} +kepa-joseba-rodriguez: + names: + - {first: Kepa Joseba, last: Rodriguez} + - {first: Kepa J., last: Rodríguez} + - {first: Kepa Joseba, last: Rodríguez} +kepa-sarasola: + names: + - {first: Kepa, last: Sarasola} + - {first: K, last: Sarasola} + - {first: K., last: Sarasola} +kerstin-kunz: + names: + - {first: Kerstin, last: Kunz} + - {first: Kerstin Anna, last: Kunz} +kerstin-severinson-eklundh: + names: + - {first: Kerstin Severinson, last: Eklundh} + - {first: Kerstin, last: Severinson Eklundh} + - {first: Kerstin, last: Severinson} +kevin-c-yeh: + names: + - {first: Kevin C., last: Yeh} + - {first: Kevin, last: Yeh} +kevin-d-ashley: + names: + - {first: Kevin D., last: Ashley} + - {first: Kevin, last: Ashley} +kevin-humphreys: + names: + - {first: Kevin, last: Humphreys} + - {first: K., last: Humphreys} +kevin-m-quinn: + names: + - {first: Kevin M., last: Quinn} + - {first: Kevin, last: Quinn} +kevin-mctait: + names: + - {first: Kevin, last: McTait} + - {first: K., last: McTait} +kexin-wang-bd: + comment: Bytedance + names: + - {first: Kexin, last: Wang} +kexin-wang-tudarmstadt: + comment: TU Darmstadt + names: + - {first: Kexin, last: Wang} + orcid: 0000-0003-1175-7829 +key-sun-choi: + names: + - {first: Key-Sun, last: Choi} + - {first: Key-sun, last: Choi} +khaled-shaban: + names: + - {first: Khaled, last: Shaban} + - {first: Khaled, last: Bashir Shaban} +khalid-al-khatib: + names: + - {first: Khalid, last: Al Khatib} + - {first: Khalid, last: Al-Khatib} +khalid-choukri: + names: + - {first: Khalid, last: Choukri} + - {first: Kalid, last: Choukri} + - {first: K., last: Choukri} +khalil-simaan: + names: + - {first: Khalil, last: Sima’an} + - {first: K., last: Sima’an} +khe-chai-sim: + names: + - {first: Khe Chai, last: Sim} + - {first: Khe-Chai, last: Sim} +khiet-p-truong: + names: + - {first: Khiet P., last: Truong} + - {first: Khiet, last: Truong} +kian-ming-a-chai: + names: + - {first: Kian Ming A., last: Chai} + - {first: Kian Ming Adam, last: Chai} +kiat-gak-lau: + names: + - {first: Kiat-gak, last: Lau} + - {first: Kiat-Gak, last: Lau} + - {first: Kiãt-gãk, last: Lâu} +kikuo-maekawa: + names: + - {first: Kikuo, last: Maekawa} + - {first: K., last: Maekawa} +kilian-a-foth: + names: + - {first: Kilian A., last: Foth} + - {first: Kilian, last: Foth} +kim-e-a-silverman: + names: + - {first: Kim E. A., last: Silverman} + - {first: Kim E.A., last: Silverman} +kim-teng-lua: + names: + - {first: Kim-Teng, last: Lua} + - {first: KimTeng, last: Lua} + - {first: Kim Teng, last: Lua} +king-kui-sin: + names: + - {first: King Kui, last: Sin} + - {first: KingKui, last: Sin} + - {first: K. K., last: Sin} + - {first: K.K., last: Sin} +kiran-gvr: + names: + - {first: Kiran, last: GVR} + - {first: Kiran, last: Gvr} +kiril-simov: + names: + - {first: Kiril, last: Simov} + - {first: Kiril Iv., last: Simov} +klaus-wothke: + names: + - {first: Klaus, last: Wothke} + - {first: K., last: Wothke} +koel-dutta-chowdhury: + names: + - {first: Koel, last: Dutta Chowdhury} + - {first: Koel Dutta, last: Chowdhury} +koenraad-de-smedt: + names: + - {first: Koenraad, last: De Smedt} + - {first: Koenraad, last: de Smedt} + - {first: Koenraad, last: DeSmedt} +koiti-hasida: + names: + - {first: Koiti, last: Hasida} + - {first: Kôiti, last: Hasida} +kok-wee-gan: + names: + - {first: Kok Wee, last: Gan} + - {first: Kok-Wee, last: Gan} +koldo-gojenola: + names: + - {first: Koldo, last: Gojenola} + - {first: Koldobika, last: Gojenola} + - {first: Koldo, last: Gojenola Galletebeitia} + - {first: K., last: Gojenola} +kornel-marko: + names: + - {first: Kornél, last: Markó} + - {first: Kornel, last: Markó} +kostas-karpouzis: + names: + - {first: Kostas, last: Karpouzis} + - {first: K., last: Karpouzis} +kranti-chalamalasetti: + names: + - {first: Kranti, last: Chalamalasetti} + - {first: Chalamalasetti, last: Kranti} +krasimira-ivanova: + names: + - {first: Krasimira, last: Ivanova} + - {first: Krassimira, last: Ivanova} +kristen-johnson: + names: + - {first: Kristen, last: Johnson} + - {first: Kristen Marie, last: Johnson} +krister-linden: + names: + - {first: Krister, last: Lindén} + - {first: Krister, last: Linden} +kristian-concepcion: + names: + - {first: Kristian, last: Concepcion} + - {first: Kris, last: Concepcion} +kristiina-jokinen: + names: + - {first: Kristiina, last: Jokinen} + - {first: Päivi Kristiina, last: Jokinen} +kristina-nilsson-bjorkenstam: + names: + - {first: Kristina, last: Nilsson Björkenstam} + - {first: Kristina, last: Nilsson} + - {first: Kristina Nilsson, last: Björkenstam} + - {first: Kristina, last: N. Björkenstam} +kristina-vuckovic: + names: + - {first: Kristina, last: Vuckovic} + - {first: Kristina, last: Vučković} +kristine-levane-petrova: + names: + - {first: Kristīne, last: Levāne-Petrova} + - {first: Kristīne, last: Levāne} +kristy-boyer: + names: + - {first: Kristy, last: Boyer} + - {first: Kristy Elizabeth, last: Boyer} +krzysztof-kochut: + names: + - {first: Krzysztof, last: Kochut} + - {first: K., last: Kochut} +kuang-hua-chen: + names: + - {first: Kuang-hua, last: Chen} + - {first: Kuang-Hua, last: Chen} +kui-lam-kwok: + names: + - {first: Kui-Lam, last: Kwok} + - {first: Kui Lam, last: Kwok} + - {first: K.L., last: Kwok} +kumiko-tanaka-ishii: + names: + - {first: Kumiko, last: Tanaka-Ishii} + - {first: Kumiko, last: Tanaka} +kun-ching-wang: + names: + - {first: Kun-Ching, last: Wang} + - {first: Kun-ching, last: Wang} +kun-zhang-inria: + comment: Inria Saclay-Île-de-France + names: + - {first: Kun, last: Zhang} +kun-zhang-ucas: + comment: University of Chinese Academy of Sciences + names: + - {first: Kun, last: Zhang} +kun-zhang-ustc: + comment: University of Science and Technology of China + names: + - {first: Kun, last: Zhang} +kwok-ping-chan: + names: + - {first: Kwok-Ping, last: Chan} + - {first: Kwok Ping, last: Chan} +kyong-hi-moon: + names: + - {first: Kyong-Hi, last: Moon} + - {first: Kyonghi, last: Moon} +kyosuke-yoshida: + names: + - {first: Kyosuke, last: Yoshida} + - {first: Kyôsuke, last: Yoshida} +kyoung-young-kim: + names: + - {first: Kyoung-young, last: Kim} + - {first: Kyoung-Young, last: Kim} +kyumars-sheykh-esmaili: + names: + - {first: Kyumars, last: Sheykh Esmaili} + - {first: Kyumars Sheykh, last: Esmaili} +kyung-soon-lee: + names: + - {first: Kyung-Soon, last: Lee} + - {first: KyungSoon, last: Lee} +kyungtae-lim: + names: + - {first: KyungTae, last: Lim} + - {first: Kyungtae, last: Lim} +kyuyoung-kim: + names: + - {first: Kyuyoung, last: Kim} + - {first: Kyu-Young, last: Kim} +l-alfonso-urena-lopez: + names: + - {first: L. Alfonso, last: Urena Lopez} + - {first: L. Alfonso, last: Ureña-López} + - {first: L. Alfonso, last: Ureña López} + - {first: L. Alfonso, last: Urena-López} + - {first: L. Alfonso, last: Urena} + - {first: Alfonso, last: Ureña-López} + - {first: Luis Alfonso, last: Ureña-López} + - {first: L. Alfonso, last: Ureña- López} +l-venkata-subramaniam: + names: + - {first: L. Venkata, last: Subramaniam} + - {first: L Venkata, last: Subramaniam} + - {first: L. V., last: Subramaniam} + - {first: L V, last: Subramaniam} +laila-dybkjaer: + names: + - {first: Laila, last: Dybkjaer} + - {first: Laila, last: Dybkjær} +lakshmi-s: + names: + - {first: Lakshmi, last: S} + - {first: Lakshmi, last: Saheer} + - {first: Lakshmi, last: S.} +lalit-r-bahl: + names: + - {first: Lalit R., last: Bahl} + - {first: L. R., last: Bahl} + - {first: L.R., last: Bahl} +lambros-cranias: + names: + - {first: Lambros, last: Cranias} + - {first: Lambros, last: Kranias} +lamia-hadrich-belguith: + names: + - {first: Lamia, last: Hadrich Belguith} + - {first: Lamia Hadrich, last: Belguith} + - {first: Lamia, last: Hadrich-Belguith} + - {first: Lamia, last: Belguith} + - {first: Lamia, last: Belguith Hadrich} +lance-a-miller: + names: + - {first: Lance A., last: Miller} + - {first: L. A., last: Miller} +lance-ramshaw: + names: + - {first: Lance, last: Ramshaw} + - {first: Lance A., last: Ramshaw} +larry-h-reeker: + names: + - {first: Larry H., last: Reeker} + - {first: L.H., last: Reeker} +lars-juhl-jensen: + names: + - {first: Lars Juhl, last: Jensen} + - {first: Lars J., last: Jensen} +laszlo-felfoldi: + names: + - {first: Laszlo, last: Felfoldi} + - {first: László, last: Felföldi} +laszlo-grunfeld: + names: + - {first: Laszlo, last: Grunfeld} + - {first: L., last: Grunfeld} +laszlo-tihanyi: + names: + - {first: Laszlo, last: Tihanyi} + - {first: László, last: Tihanyi} +lauma-pretkalnina: + names: + - {first: Lauma, last: Pretkalniņa} + - {first: Lauma, last: Pretkalnina} + - {first: Lauma, last: Pretkalnin̨a} +laura-alonso-alemany: + names: + - {first: Laura, last: Alonso Alemany} + - {first: Laura, last: Alonso i Alemany} + - {first: Laura, last: Alonso} +laura-ana-maria-oberlander: + names: + - {first: Laura Ana Maria, last: Oberländer} + - {first: Laura Ana Maria, last: Bostan} + - {first: Laura-Ana-Maria, last: Bostan} +laura-burdick: + names: + - {first: Laura, last: Burdick} + - {first: Laura, last: Wendlandt} +laura-docio-fernandez: + names: + - {first: Laura, last: Docio-Fernandez} + - {first: Laura, last: Docío-Fernández} +laura-g-miller: + names: + - {first: Laura G., last: Miller} + - {first: L. G., last: Miller} +laura-mayfield-tomokiyo: + names: + - {first: Laura Mayfield, last: Tomokiyo} + - {first: Laura, last: Mayfield} +lauren-levine: + names: + - {first: Lauren, last: Levine} + - {first: Lauren Elizabeth, last: Levine} +laurence-devillers: + names: + - {first: Laurence, last: Devillers} + - {first: L., last: Devillers} +laurence-gillick: + names: + - {first: Laurence, last: Gillick} + - {first: Laurence S., last: Gillick} +laurent-besacier: + names: + - {first: Laurent, last: Besacier} + - {first: L., last: Besacier} +laurent-bourbeau: + names: + - {first: Laurent, last: Bourbeau} + - {first: L., last: Bourbeau} +laurent-gillard: + names: + - {first: Laurent, last: Gillard} + - {first: L., last: Gillard} +laurent-prevot: + names: + - {first: Laurent, last: Prévot} + - {first: Laurent, last: Prevot} +laurent-romary: + names: + - {first: Laurent, last: Romary} + - {first: L., last: Romary} +laurent-schmitt: + names: + - {first: Laurent, last: Schmitt} + - {first: L., last: Schmitt} +laurie-damianos: + names: + - {first: Laurie, last: Damianos} + - {first: Laurie E., last: Damianos} +laurie-feldman: + names: + - {first: Laurie, last: Feldman} + - {first: Laurie Beth, last: Feldman} +lawrence-hunter: + names: + - {first: Lawrence, last: Hunter} + - {first: Lawrence E., last: Hunter} +lawrence-r-rabiner: + names: + - {first: Lawrence R., last: Rabiner} + - {first: L. R., last: Rabiner} +lawrence-s-moss: + names: + - {first: Lawrence S., last: Moss} + - {first: Lawrence, last: Moss} +lawrence-y-l-cheung: + names: + - {first: Lawrence Y. L., last: Cheung} + - {first: Lawrence Y.L., last: Cheung} + - {first: L. Y. L., last: Cheung} +le-quan-ha: + names: + - {first: Le Quan, last: Ha} + - {first: Le Q, last: Ha} +lee-becker: + names: + - {first: Lee, last: Becker} + - {first: Lee A., last: Becker} +leif-groenqvist: + names: + - {first: Leif, last: Groenqvist} + - {first: Leif, last: Gronqvist} +lena-stromback: + names: + - {first: Lena, last: Stromback} + - {first: Lena, last: Strömbäck} +lenhart-schubert: + names: + - {first: Lenhart, last: Schubert} + - {first: Lenhart K., last: Schubert} + - {first: Len, last: Schubert} +leon-derczynski: + names: + - {first: Leon, last: Derczynski} + - {first: Leon, last: Strømberg-Derczynski} +leon-dostert: + names: + - {first: Léon, last: Dostert} + - {first: Leon, last: Dostert} + - {first: L. E., last: Dostert} +leonardo-campillos-llanos: + names: + - {first: Leonardo Campillos, last: Llanos} + - {first: Leonardo, last: Campillos Llanos} +leonardo-lesmo: + names: + - {first: Leonardo, last: Lesmo} + - {first: L., last: Lesmo} +leonid-iomdin: + names: + - {first: Leonid, last: Iomdin} + - {first: Leonid L., last: Iomdin} +leonida-della-rocca: + names: + - {first: Leonida Della, last: Rocca} + - {first: Leonida, last: Della-Rocca} + - {first: Leonida, last: Della Rocca} +lesly-miculicich-werlen: + names: + - {first: Lesly, last: Miculicich Werlen} + - {first: Lesly, last: Miculicich} +lev-ratinov: + names: + - {first: Lev, last: Ratinov} + - {first: Lev-Arie, last: Ratinov} +lewis-m-norton: + names: + - {first: Lewis M., last: Norton} + - {first: Lewis, last: Norton} +li-chin-yang: + names: + - {first: Li-chin, last: Yang} + - {first: Li-Chin, last: Yang} +li-li-chang: + names: + - {first: Li-Li, last: Chang} + - {first: Li-li, last: Chang} +li-lin-pku: + degree: Peking University + disable_name_matching: true + names: + - {first: Li, last: Lin} + orcid: 0009-0008-5072-5022 +li-lucy: + names: + - {first: Li, last: Lucy} + - {first: Lucy, last: Li} +li-mei-chen: + names: + - {first: Li-mei, last: Chen} + - {first: Li-Mei, last: Chen} +li-ping-chang: + names: + - {first: Li-Ping, last: Chang} + - {first: Li-ping, last: Chang} +li-rong-dai: + names: + - {first: Li-Rong, last: Dai} + - {first: LiRong, last: Dai} +li-zhang-aws: + comment: AWS + names: + - {first: Li, last: Zhang} +li-zhang-birmingham: + comment: Birmingham + names: + - {first: Li, last: Zhang} +li-zhang-gg: + comment: Google + names: + - {first: Li, last: Zhang} +li-zhang-google: + comment: Google + names: + - {first: Li, last: Zhang} +li-zhang-ibmc: + comment: IBM-china + names: + - {first: Li, last: Zhang} +li-zhang-nankai: + comment: Nankai + names: + - {first: Li, last: Zhang} +li-zhang-newcastle: + comment: Newcastle, UK + names: + - {first: Li, last: Zhang} +li-zhang-teesside: + comment: Teesside University + names: + - {first: Li, last: Zhang} +li-zhang-ucsandiego: + comment: UC San Diego + names: + - {first: Li, last: Zhang} +li-zhang-uk: + comment: UK + names: + - {first: Li, last: Zhang} +li-zhang-upenn: + comment: University of Pennsylvania + names: + - {first: Li, last: Zhang} +li-zhang-wuhan: + comment: Wuhan + names: + - {first: Li, last: Zhang} +lianen-huang: + names: + - {first: Lian′en, last: Huang} + - {first: Lian’en, last: Huang} +liang-chih-yu: + names: + - {first: Liang-Chih, last: Yu} + - {first: Liang-chih, last: Yu} +liang-yu-chen: + names: + - {first: Liang-Yu, last: Chen} + - {first: Liangyu, last: Chen} +lianhau-lee: + names: + - {first: Lianhau, last: Lee} + - {first: Lian Hau, last: Lee} +liat-ein-dor: + names: + - {first: Liat, last: Ein Dor} + - {first: Liat, last: Ein-Dor} +lide-wu: + names: + - {first: Lide, last: Wu} + - {first: Li-de, last: Wu} +lidia-mangu: + names: + - {first: Lidia, last: Mangu} + - {first: L., last: Mangu} +lidia-moreno: + names: + - {first: Lidia, last: Moreno} + - {first: L., last: Moreno} +liliana-mamani-sanchez: + names: + - {first: Liliana, last: Mamani Sanchez} + - {first: Liliana, last: Mamani Sánchez} + - {first: Liliana Mamani, last: Sanchez} +lilja-ovrelid: + names: + - {first: Lilja, last: Øvrelid} + - {first: Lilja, last: Ovrelid} +lin-shan-lee: + names: + - {first: Lin-Shan, last: Lee} + - {first: Lin-shan, last: Lee} +lina-m-rojas-barahona: + names: + - {first: Lina M., last: Rojas Barahona} + - {first: Lina M., last: Rojas-Barahona} + - {first: Lina, last: Rojas-Barahona} + - {first: Lina, last: Rojas} + - {first: Lina Maria, last: Rojas-Barahona} +linda-fineman: + names: + - {first: Linda, last: Fineman} + - {first: L., last: Fineman} +lindsay-j-evett: + names: + - {first: Lindsay J., last: Evett} + - {first: L.J., last: Evett} +ling-xiao-wang: + names: + - {first: Ling Xiao, last: Wang} + - {first: Lingxiao, last: Wang} +lingpeng-yang: + names: + - {first: Lingpeng, last: Yang} + - {first: LingPeng, last: Yang} +lingshuang-jack-mao: + names: + - {first: Lingshuang Jack, last: Mao} + - {first: Lingshuang, last: Mao} +lisa-braden-harder: + names: + - {first: Lisa, last: Braden-Harder} + - {first: Lisa C., last: Braden-Harder} +lisa-n-michaud: + names: + - {first: Lisa N., last: Michaud} + - {first: Lisa, last: Michaud} +lisa-rau: + names: + - {first: Lisa, last: Rau} + - {first: Lisa F., last: Rau} +lisanne-teunissen: + names: + - {first: Lisanne, last: Teunissen} + - {first: Lisa, last: Teunissen} +litton-j-kurisinkel: + names: + - {first: Litton, last: J Kurisinkel} + - {first: Litton J, last: Kurisinkel} +liviu-p-dinu: + names: + - {first: Liviu P., last: Dinu} + - {first: Liviu, last: Dinu} + - {first: Liviu Petrisor, last: Dinu} +lluis-f-hurtado: + names: + - {first: Lluís-F., last: Hurtado} + - {first: Lluís F., last: Hurtado} + - {first: LLuís-F., last: Hurtado} +lluis-formiga: + names: + - {first: Lluis, last: Formiga} + - {first: Lluís, last: Formiga} +lluis-marquez: + names: + - {first: Lluís, last: Màrquez} + - {first: Lluis, last: Marquez} + - {first: Lluis, last: Màrquez} + - {first: Lluis, last: Márquez} + - {first: L., last: Màrquez} +lluis-padro: + names: + - {first: Lluís, last: Padró} + - {first: Lluis, last: Padro} + - {first: Lluis, last: Padró} + - {first: L., last: Padró} + - {first: L., last: Padro} +loic-boizou: + names: + - {first: Loic, last: Boizou} + - {first: Loïc, last: Boizou} +loic-dugast: + names: + - {first: Loic, last: Dugast} + - {first: Loïc, last: Dugast} +lois-c-childs: + names: + - {first: Lois C., last: Childs} + - {first: Lois, last: Childs} +lois-m-black: + names: + - {first: Lois M., last: Black} + - {first: Lois, last: Black} +long-duong: + names: + - {first: Long, last: Duong} + - {first: Long, last: Duong Thanh} +long-nguyen: + names: + - {first: Long, last: Nguyen} + - {first: L., last: Nguyen} +longhua-qian: + names: + - {first: Longhua, last: Qian} + - {first: LongHua, last: Qian} +longlong-ma: + names: + - {first: Longlong, last: Ma} + - {first: Long Long, last: Ma} +lonneke-van-der-plas: + names: + - {first: Lonneke, last: van der Plas} + - {first: Lonneke, last: Van Der Plas} +loong-cheong-tong: + names: + - {first: Loong-Cheong, last: Tong} + - {first: Loong Cheong, last: Tong} +lorenza-romano: + names: + - {first: Lorenza, last: Romano} + - {first: L., last: Romano} +lorenzo-moretti: + names: + - {first: Lorenzo, last: Moretti} + - {first: L., last: Moretti} +lori-lamel: + names: + - {first: Lori, last: Lamel} + - {first: Lori F., last: Lamel} + - {first: L.F., last: Lamel} +lori-levin: + names: + - {first: Lori, last: Levin} + - {first: Lori S., last: Levin} +lou-boves: + names: + - {first: Lou, last: Boves} + - {first: Louis, last: Boves} +louis-des-tombe: + names: + - {first: Louis, last: des Tombe} + - {first: L., last: des Tombe} +louise-amelie-cougnon: + names: + - {first: Louise-Amélie, last: Cougnon} + - {first: Louis-Amélie, last: Cougnon} +louise-corti: + names: + - {first: Louise, last: Corti} + - {first: L., last: Corti} +louise-guthrie: + names: + - {first: Louise, last: Guthrie} + - {first: L., last: Guthrie} +louisette-emirkanian: + names: + - {first: Louisette, last: Emirkanian} + - {first: L., last: Emirkanian} +lu-xu-uniroma1: + degree: Sapienza University of Rome + disable_name_matching: true + names: + - {first: Lu, last: Xu} + orcid: 0000-0002-5660-3631 +luana-fagarasan: + names: + - {first: Luana, last: Fǎgǎrǎşan} + - {first: Luana, last: Fagarasan} +lubos-popelinsky: + names: + - {first: Lubos, last: Popelínsky} + - {first: Luboš, last: Popelínský} +luca-cristoforetti: + names: + - {first: Luca, last: Cristoforetti} + - {first: L., last: Cristoforetti} +luca-dini: + names: + - {first: Luca, last: Dini} + - {first: L., last: Dini} +luca-gilardoni: + names: + - {first: Luca, last: Gilardoni} + - {first: L., last: Gilardoni} +lucas-georges-gabriel-charpentier: + names: + - {first: Lucas, last: Georges Gabriel Charpentier} + - {first: Lucas, last: Charpentier} +lucas-welter-hilgert: + names: + - {first: Lucas Welter, last: Hilgert} + - {first: Lucas, last: Hilgert} +lucian-vlad-lita: + names: + - {first: Lucian Vlad, last: Lita} + - {first: Lucian, last: Lita} +luciana-beatriz-avila: + names: + - {first: Luciana Beatriz, last: Avila} + - {first: Luciana Beatriz, last: Ávila} + - {first: Luciana, last: Ávila} +luciano-serafini: + names: + - {first: Luciano, last: Serafini} + - {first: L., last: Serafini} +lucja-iwanska: + names: + - {first: Lucja, last: Iwanska} + - {first: Lucja M., last: Iwanska} +lucy-lu-wang: + names: + - {first: Lucy Lu, last: Wang} + - {first: Lucy, last: Wang} +luigi-di-caro: + names: + - {first: Luigi, last: Di Caro} + - {first: Luigi, last: di Caro} +luis-a-leiva: + names: + - {first: Luis A., last: Leiva} + - {first: Luis, last: Leiva} +luis-a-pineda: + names: + - {first: Luis. A., last: Pineda} + - {first: Luis, last: Pineda} +luis-adrian-cabrera-diego: + names: + - {first: Luis-Adrián, last: Cabrera-Diego} + - {first: Luis Adrián, last: Cabrera-Diego} +luis-espinosa-anke: + names: + - {first: Luis, last: Espinosa Anke} + - {first: Luis, last: Espinosa-Anke} + - {first: Luis Espinosa, last: Anke} +luis-fernando-costa: + names: + - {first: Luís Fernando, last: Costa} + - {first: Luís, last: Costa} +luis-fernando-dharo: + names: + - {first: Luis Fernando, last: D’Haro} + - {first: Luis F., last: d’Haro} +luis-gerardo-mojica-de-la-vega: + names: + - {first: Luis Gerardo, last: Mojica de la Vega} + - {first: Luis, last: Mojica de la Vega} +luis-hernandez: + names: + - {first: Luis, last: Hernández} + - {first: Luis Hernández, last: Gomez} + - {first: Luis Hernández, last: Gómez} + - {first: Luis A., last: Hernandez} + - {first: Luis A., last: Hernández} + - {first: Luis A. Hernández, last: Gómez} +luis-javier-rodriguez-fuentes: + names: + - {first: Luis Javier, last: Rodriguez-Fuentes} + - {first: Luis Javier, last: Rodríguez-Fuentes} +luis-marujo: + names: + - {first: Luis, last: Marujo} + - {first: Luís, last: Marujo} +luis-miguel-cabral: + names: + - {first: Luís Miguel, last: Cabral} + - {first: Luís, last: Cabral} +luis-rodrigo-aguado: + names: + - {first: Luis, last: Rodrigo-Aguado} + - {first: Luis, last: Rodrigo} +luis-villarejo: + names: + - {first: Luís, last: Villarejo} + - {first: Luis, last: Villarejo} +luis-villasenor-pineda: + names: + - {first: Luis, last: Villaseñor-Pineda} + - {first: Luis, last: Villaseñor} + - {first: Luis, last: Villasenor} +luisa-coheur: + names: + - {first: Luísa, last: Coheur} + - {first: Luisa, last: Coheur} +luisa-pereira: + names: + - {first: Luísa, last: Pereira} + - {first: Luisa, last: Pereira} +luiz-augusto-pizzato: + names: + - {first: Luiz Augusto, last: Pizzato} + - {first: Luiz Augusto Sangoi, last: Pizzato} +luka-a-clarke: + names: + - {first: Luka A., last: Clarke} + - {first: Luka, last: Clarke} +lukas-zilka: + names: + - {first: Lukáš, last: Žilka} + - {first: Lukas, last: Zilka} +lukasz-kaiser: + names: + - {first: Łukasz, last: Kaiser} + - {first: Lukasz, last: Kaiser} +luke-zettlemoyer: + names: + - {first: Luke, last: Zettlemoyer} + - {first: Luke S., last: Zettlemoyer} +luminita-chiran: + names: + - {first: Luminita, last: Chiran} + - {first: L., last: Chiran} +luu-anh-tuan: + names: + - {first: Luu Anh, last: Tuan} + - {first: Anh, last: Luu} + - {first: Anh Tuan, last: Luu} +luuk-van-waes: + names: + - {first: Luuk Van, last: Waes} + - {first: Luuk, last: Van Waes} +lydia-mai-ho-dac: + names: + - {first: Lydia-Mai, last: Ho-Dac} + - {first: Mai, last: Ho-dac} +lyle-ungar: + names: + - {first: Lyle, last: Ungar} + - {first: Lyle H., last: Ungar} +lynette-hirschman: + names: + - {first: Lynette, last: Hirschman} + - {first: Lynette, last: Hirshman} + - {first: L., last: Hirschman} +lynne-cahill: + names: + - {first: Lynne, last: Cahill} + - {first: Lynne J., last: Cahill} + - {first: L, last: Cahill} +m-a-picheny: + names: + - {first: M. A., last: Picheny} + - {first: M.A., last: Picheny} +m-amin-farajian: + names: + - {first: M. Amin, last: Farajian} + - {first: Mohammad Amin, last: Farajian} +m-antonia-marti: + names: + - {first: M. Antònia, last: Martí} + - {first: M. Antonia, last: Martí} + - {first: M. Antonia, last: Marti} + - {first: Antonia, last: Martí} + - {first: Mª Antònia, last: Martí} + - {first: Maria Antònia, last: Martí} + - {first: Toni, last: Martí} + - {first: M. A., last: Marti} + - {first: M.A., last: Martí} + - {first: M. A., last: Martí} +m-dolores-jimenez-lopez: + names: + - {first: M. Dolores, last: Jiménez-López} + - {first: Maria Dolores, last: Jiménez-López} +m-dolores-molina-gonzalez: + names: + - {first: M. Dolores, last: Molina-González} + - {first: M. Dolores, last: Molina-Gonzalez} +m-g-abbas-malik: + names: + - {first: M. G. Abbas, last: Malik} + - {first: M.G. Abbas, last: Malik} + - {first: M G Abbas, last: Malik} +m-ines-torres: + names: + - {first: M. Inés, last: Torres} + - {first: María Inés, last: Torres} +m-patrick-martin: + names: + - {first: M. Patrick, last: Martin} + - {first: Pierre M., last: Martin} + - {first: Patrick, last: Martin} +m-pilar-valverde-ibanez: + names: + - {first: M. Pilar, last: Valverde Ibáñez} + - {first: M. Pilar, last: Valverde Ibañez} +m-soledad-lopez-gambino: + names: + - {first: M. Soledad, last: López Gambino} + - {first: Soledad, last: López Gambino} +m-teresa-martin-valdivia: + names: + - {first: M. Teresa, last: Martín-Valdivia} + - {first: Maite, last: Martin} + - {first: María Teresa, last: Martín-Valdivia} + - {first: Maria Teresa, last: Martín-Valdivia} + - {first: Teresa, last: Martin} + - {first: M. Teresa, last: Martín} + - {first: Maite, last: Martín-Valdivia} +maarten-de-rijke: + names: + - {first: Maarten, last: de Rijke} + - {first: Maarten, last: De Rijke} +maayan-geffet: + names: + - {first: Maayan, last: Geffet} + - {first: Maayan, last: Zhitomirsky-Geffet} +maddalen-lopez-de-lacalle: + names: + - {first: Maddalen, last: López de Lacalle} + - {first: Maddalen, last: Lopez de Lacalle} +madeleine-bates: + names: + - {first: Madeleine, last: Bates} + - {first: Madeline, last: Bates} + - {first: M., last: Bates} +madis-saluveer: + names: + - {first: Madis, last: Saluveer} + - {first: M., last: Saluveer} +magali-sanches-duran: + names: + - {first: Magali Sanches, last: Duran} + - {first: Magali, last: Sanches Duran} + - {first: Magali, last: Duran} +magdalena-plamada: + names: + - {first: Magdalena, last: Plamadă} + - {first: Magdalena, last: Plamada} +magdi-nagi: + names: + - {first: Magdi, last: Nagi} + - {first: Magdy, last: Nagi} +mahbaneh-eshaghzadeh-torbati: + names: + - {first: Mahbaneh, last: Eshaghzadeh Torbati} + - {first: Mahbaneh, last: Eshaghzadeh} +mahesh-v-chitrao: + names: + - {first: Mahesh V., last: Chitrao} + - {first: Mahesh, last: Chitrao} +mai-vu-tran: + names: + - {first: Mai-Vu, last: Tran} + - {first: Mai-vu, last: Tran} +maira-gatti: + names: + - {first: Maíra, last: Gatti} + - {first: Maira, last: Gatti} +mairgup-mansur: + names: + - {first: Mairgup, last: Mansur} + - {first: Mansur, last: Mairgup} +maite-oronoz: + names: + - {first: Maite, last: Oronoz} + - {first: M., last: Oronoz} +maite-taboada: + names: + - {first: Maite, last: Taboada} + - {first: M., last: Taboada} +maja-popovic: + names: + - {first: Maja, last: Popović} + - {first: Maja, last: Popovic} +makoto-nagao: + names: + - {first: Makoto, last: Nagao} + - {first: M., last: Nagao} +malarkodi-c-s: + names: + - {first: Malarkodi, last: C.S.} + - {first: Malarkodi, last: C.S} + - {first: CS., last: Malarkodi} +malgorzata-marciniak: + names: + - {first: Malgorzata, last: Marciniak} + - {first: Małgorzata, last: Marciniak} +malhar-kulkarni: + names: + - {first: Malhar, last: Kulkarni} + - {first: Malhar A., last: Kulkarni} +man-lan: + names: + - {first: Man, last: Lan} + - {first: Lan, last: Man} +manabu-okumura: + names: + - {first: Manabu, last: Okumura} + - {first: Manabu, last: Okumara} +manish-shrivastava: + names: + - {first: Manish, last: Shrivastava} + - {first: Manish, last: Srivastava} +manny-rayner: + names: + - {first: Manny, last: Rayner} + - {first: M., last: Rayner} +manoj-chinnakotla: + names: + - {first: Manoj, last: Chinnakotla} + - {first: Manoj K., last: Chinnakotla} + - {first: Manoj Kumar, last: Chinnakotla} +manuel-garcia-vega: + names: + - {first: Manuel, last: García-Vega} + - {first: Manuel, last: García} +manuel-j-mana-lopez: + names: + - {first: Manuel J., last: Maña López} + - {first: Manuel J., last: Maña} + - {first: Manual Maña, last: López} + - {first: Manuel, last: Maña López} +manuel-montes: + names: + - {first: Manuel, last: Montes} + - {first: Manuel, last: Montes-y-Gómez} + - {first: Manuel, last: Montes y Gomez} + - {first: Manuel, last: Montes y Gómez} +manuel-palomar: + names: + - {first: Manuel, last: Palomar} + - {first: M., last: Palomar} +manuel-r-ciosici: + names: + - {first: Manuel R., last: Ciosici} + - {first: Manuel, last: Ciosici} +manuela-huerlimann: + names: + - {first: Manuela, last: Huerlimann} + - {first: Manuela, last: Hürlimann} + - {first: Manuela, last: Huerliman} +manuela-speranza: + names: + - {first: Manuela, last: Speranza} + - {first: M., last: Speranza} +mar-garcia: + names: + - {first: Mar, last: García} + - {first: M., last: García} +marc-el-beze: + names: + - {first: Marc, last: El-Bèze} + - {first: Marc, last: El-Beze} + - {first: M., last: El-Bèze} +marc-vilain: + names: + - {first: Marc, last: Vilain} + - {first: Marc B., last: Vilain} +marcel-adam-just: + names: + - {first: Marcel Adam, last: Just} + - {first: Marcel, last: Just} +marcel-p-van-lohuizen: + names: + - {first: Marcel P., last: van Lohuizen} + - {first: Marcel P., last: Van Lohuizen} +marcela-charfuelan: + names: + - {first: Marcela, last: Charfuelan} + - {first: Marcela, last: Charfuelán} +marcelo-adriano-amancio: + names: + - {first: Marcelo Adriano, last: Amâncio} + - {first: Marcelo, last: Amancio} +marcia-c-linebarger: + names: + - {first: Marcia C., last: Linebarger} + - {first: Marcia, last: Linebarger} +marcin-szummer: + names: + - {first: Marcin, last: Szummer} + - {first: Martin, last: Szummer} +marcis-pinnis: + names: + - {first: Mārcis, last: Pinnis} + - {first: Marcis, last: Pinnis} +marco-a-valenzuela-escarcega: + names: + - {first: Marco A., last: Valenzuela-Escárcega} + - {first: Marco Antonio, last: Valenzuela-Escárcega} +marco-aldo-piccolino-boniforti: + names: + - {first: Marco Aldo Piccolino, last: Boniforti} + - {first: Marco Aldo, last: Piccolino Boniforti} +marco-antonio-sobrevilla-cabezudo: + names: + - {first: Marco Antonio, last: Sobrevilla Cabezudo} + - {first: Marco A., last: Sobrevilla Cabezudo} + - {first: Marco, last: Sobrevilla} +marco-antonio-stranisci: + names: + - {first: Marco Antonio, last: Stranisci} + - {first: Marco, last: Stranisci} +marco-baroni: + names: + - {first: Marco, last: Baroni} + - {first: M., last: Baroni} +marco-battista: + names: + - {first: Marco, last: Battista} + - {first: M., last: Battista} +marco-martin: + names: + - {first: Marco, last: Martin} + - {first: M., last: Martin} +marco-matassoni: + names: + - {first: Marco, last: Matassoni} + - {first: M., last: Matassoni} +marco-tulio-ribeiro: + names: + - {first: Marco Tulio, last: Ribeiro} + - {first: Marco, last: Ribeiro} +marcos-didonet-del-fabro: + names: + - {first: Marcos Didonet Del, last: Fabro} + - {first: Marcus Didonet, last: Del Fabro} +marcos-garcia-salido: + names: + - {first: Marcos, last: García Salido} + - {first: Marcos, last: García-Salido} +marcus-uneson: + names: + - {first: Marcus, last: Uneson} + - {first: Markus, last: Uneson} +mare-koit: + names: + - {first: Mare, last: Koit} + - {first: M., last: Koit} +marek-kozlowski: + names: + - {first: Marek, last: Kozlowski} + - {first: Marek, last: Kozłowski} +margaret-kern: + names: + - {first: Margaret, last: Kern} + - {first: Margaret L., last: Kern} +margaret-mcrorie: + names: + - {first: Margaret, last: McRorie} + - {first: M., last: McRorie} +margarita-alonso-ramos: + names: + - {first: Margarita Alonso, last: Ramos} + - {first: Margarita, last: Alonso-Ramos} +margo-stys-budzikowska: + names: + - {first: Margo, last: Stys-Budzikowska} + - {first: Margo, last: Budzikowska} + - {first: Margo, last: Stys} +mari-broman-olsen: + names: + - {first: Mari Broman, last: Olsen} + - {first: Mari, last: Olsen} +mari-carmen-rodriguez-gancedo: + names: + - {first: Mari Carmen, last: Rodriguez-Gancedo} + - {first: M. Carmen Rodríguez, last: Gancedo} + - {first: M. Carmen, last: Rodríguez} + - {first: Mari Carmen, last: Rodríguez} +mari-carmen-suarez-figueroa: + names: + - {first: Mari Carmen, last: Suárez-Figueroa} + - {first: M. Carmen, last: Suárez-Figueroa} +mari-ostendorf: + names: + - {first: Mari, last: Ostendorf} + - {first: M., last: Ostendorf} + - {first: M, last: Ostendorf} +maria-berger: + names: + - {first: Maria, last: Berger} + - {first: Maria, last: Moritz} +maria-clara-paixao-de-sousa: + names: + - {first: Maria Clara Paixão de, last: Sousa} + - {first: Maria Clara, last: Paixão de Sousa} +maria-das-gracas-volpe-nunes: + names: + - {first: Maria das Graças, last: Volpe Nunes} + - {first: Maria, last: das Graças Volpe Nunes} + - {first: Maria, last: das Gracas Volpe Nunes} + - {first: Maria das Graças Volpe, last: Nunes} + - {first: Maria, last: das Graças} + - {first: Maria das Graças V., last: Nunes} + - {first: Maria das Graças, last: Nunes} + - {first: Maria das Gracas, last: Volpe} +maria-fernanda-bacelar-do-nascimento: + names: + - {first: Maria Fernanda Bacelar do, last: Nascimento} + - {first: Maria Fernanda Bacelar, last: do Nascimento} + - {first: Fernanda Bacelar, last: do Nascimento} +maria-fuentes: + names: + - {first: Maria, last: Fuentes} + - {first: Maria, last: Fuentes Fort} +maria-gavrilidou: + names: + - {first: Maria, last: Gavrilidou} + - {first: M., last: Gavrilidou} +maria-jose-b-finatto: + names: + - {first: Maria José B., last: Finatto} + - {first: Maria José, last: Finatto} + - {first: Maria José Bocorny, last: Finatto} +maria-jose-castro-bleda: + names: + - {first: Maria Jose, last: Castro-Bleda} + - {first: María José, last: Castro} + - {first: María-José, last: Castro} +maria-leonor-pacheco: + names: + - {first: María Leonor, last: Pacheco} + - {first: Maria Leonor, last: Pacheco} +maria-lucia-castro-jorge: + names: + - {first: Maria Lucia, last: Castro Jorge} + - {first: Maria Lucía Castro, last: Jorge} + - {first: Maria Lucía, last: Castro Jorge} +maria-nadejde: + names: + - {first: Maria, last: Nadejde} + - {first: Maria, last: Nădejde} +maria-novella-catarsi: + names: + - {first: Maria Novella, last: Catarsi} + - {first: M. N., last: Catarsi} +maria-pozzi: + names: + - {first: Maria, last: Pozzi} + - {first: María, last: Pozzi} + - {first: Mara, last: Pozzi} +maria-ruiz-casado: + names: + - {first: María, last: Ruiz-Casado} + - {first: Maria, last: Ruiz-Casado} +maria-teresa-cabre: + names: + - {first: Maria Teresa, last: Cabré} + - {first: M. Teresa, last: Cabré} + - {first: Teresa, last: Cabré} +maria-teresa-lino: + names: + - {first: Maria Teresa, last: Lino} + - {first: Teresa, last: Lino} +maria-teresa-pazienza: + names: + - {first: Maria Teresa, last: Pazienza} + - {first: Maria Teresa, last: Pazienze} + - {first: Maria, last: Pazienza} + - {first: M. T., last: Pazienza} + - {first: M.T., last: Pazienza} +maria-toporowska-gronostaj: + names: + - {first: Maria Toporowska, last: Gronostaj} + - {first: Maria, last: Toporowska Gronostaj} +marian-trnka: + names: + - {first: Marian, last: Trnka} + - {first: Marián, last: Trnka} +marianna-martindale: + names: + - {first: Marianna, last: Martindale} + - {first: Marianna J., last: Martindale} +marianne-dabbadie: + names: + - {first: Marianne, last: Dabbadie} + - {first: M., last: Dabbadie} +marie-a-bienkowski: + names: + - {first: Marie A., last: Bienkowski} + - {first: Marie, last: Bienkowski} +marie-candito: + names: + - {first: Marie, last: Candito} + - {first: Marie-Helene, last: Candito} + - {first: Marie-Hélène, last: Candito} +marie-catherine-de-marneffe: + names: + - {first: Marie-Catherine, last: de Marneffe} + - {first: Marie Catherine, last: de Marneffe} +marie-claude-lhomme: + names: + - {first: Marie-Claude, last: L’Homme} + - {first: Marie-Claude, last: L’ Homme} +marie-francine-moens: + names: + - {first: Marie Francine, last: Moens} + - {first: Marie-Francine, last: Moens} +marie-helene-metzger: + names: + - {first: Marie Hélène, last: Metzger} + - {first: Marie-Hélène, last: Metzger} +marie-helene-stefanini: + names: + - {first: Marie-Hélène, last: Stéfanini} + - {first: Marie-Helene, last: Stefanini} +marie-hinrichs: + names: + - {first: Marie, last: Hinrichs} + - {first: Marie, last: Boyle-Hinrichs} +marie-meteer: + names: + - {first: Marie, last: Meteer} + - {first: Marie W., last: Meteer} +marie-neige-garcia: + names: + - {first: Marie-Neige, last: Garcia} + - {first: M. N., last: Garcia} +marie-paule-pery-woodley: + names: + - {first: Marie-Paule, last: Pery-Woodley} + - {first: Marie-Paule, last: Péry-Woodley} +marielle-leijten: + names: + - {first: Marielle, last: Leijten} + - {first: Mariëlle, last: Leijten} +mariem-ellouze-khemekhem: + names: + - {first: Mariem, last: Ellouze Khemekhem} + - {first: Mariem, last: Ellouze Khemakhem} + - {first: Mariem, last: Ellouze} + - {first: Mariem Ellouze, last: Khmekhem} + - {first: Mariem, last: Ellouze khemekhem} +mariet-theune: + names: + - {first: Mariët, last: Theune} + - {first: Mariet, last: Theune} + - {first: M., last: Theune} +marilyn-walker: + names: + - {first: Marilyn, last: Walker} + - {first: Marilyn A., last: Walker} + - {first: M. A., last: Walker} +marina-vigario: + names: + - {first: Marina, last: Vigário} + - {first: M., last: Vigário} +mario-j-silva: + names: + - {first: Mario J., last: Silva} + - {first: Mário J., last: Silva} + - {first: Mário, last: Silva} +marion-weller-di-marco: + names: + - {first: Marion, last: Weller-Di Marco} + - {first: Marion, last: Di Marco} +mariona-coll-ardanuy: + names: + - {first: Mariona, last: Coll Ardanuy} + - {first: Mariona Coll, last: Ardanuy} +mariona-taule: + names: + - {first: Mariona, last: Taulé} + - {first: Mariona, last: Taule} + - {first: M., last: Taulé} +marius-pasca: + names: + - {first: Marius, last: Pasca} + - {first: Marius A., last: Pasca} + - {first: Marius, last: Paşca} +marjo-van-koppen: + names: + - {first: Marjo, last: Van Koppen} + - {first: Marjo, last: van Koppen} +mark-a-clements: + names: + - {first: Mark A., last: Clements} + - {first: Mark, last: Clements} +mark-a-greenwood: + names: + - {first: Mark A., last: Greenwood} + - {first: Mark, last: Greenwood} +mark-carman: + names: + - {first: Mark, last: Carman} + - {first: Mark J., last: Carman} + - {first: Mark James, last: Carman} +mark-christoph-muller: + names: + - {first: Mark-Christoph, last: Müller} + - {first: Mark-Christoph, last: Mueller} +mark-epstein: + names: + - {first: Mark, last: Epstein} + - {first: M., last: Epstein} +mark-finlayson: + names: + - {first: Mark, last: Finlayson} + - {first: Mark A., last: Finlayson} +mark-g-core: + names: + - {first: Mark G., last: Core} + - {first: Mark, last: Core} +mark-h-smith: + names: + - {first: Mark H., last: Smith} + - {first: Mark, last: Smith} +mark-hall: + names: + - {first: Mark, last: Hall} + - {first: Mark Michael, last: Hall} +mark-jones: + names: + - {first: Mark, last: Jones} + - {first: Mark A., last: Jones} + - {first: Mark Alan, last: Jones} +mark-lee: + names: + - {first: Mark, last: Lee} + - {first: Mark G., last: Lee} + - {first: M.G., last: Lee} +mark-liberman: + names: + - {first: Mark, last: Liberman} + - {first: Mark Y., last: Liberman} + - {first: M. Y., last: Liberman} + - {first: M., last: Liberman} +mark-przybocki: + names: + - {first: Mark, last: Przybocki} + - {first: Mark A., last: Przybocki} +mark-s-tuttle: + names: + - {first: Mark S., last: Tuttle} + - {first: M. S., last: Tuttle} +mark-steedman: + names: + - {first: Mark, last: Steedman} + - {first: M., last: Steedman} +mark-t-maybury: + names: + - {first: Mark T., last: Maybury} + - {first: Mark, last: Maybury} +mark-w-davis: + names: + - {first: Mark W., last: Davis} + - {first: Mark, last: Davis} +marketa-lopatkova: + names: + - {first: Marketa, last: Lopatkova} + - {first: Markéta, last: Straňáková-Lopatková} + - {first: Markéta, last: Lopatková} +marsal-gavalda: + names: + - {first: Marsal, last: Gavalda} + - {first: Marsal, last: Gavaldà} +marsden-s-blois: + names: + - {first: Marsden S., last: Blois} + - {first: M. S., last: Blois} +marta-garrote-salazar: + names: + - {first: Marta, last: Garrote-Salazar} + - {first: Marta, last: Garrote} +marta-r-costa-jussa: + names: + - {first: Marta R., last: Costa-jussà} + - {first: Marta, last: R. Costa-jussà} + - {first: Marta R., last: Costa-Jussà} + - {first: Marta R., last: Costa-Jussa} + - {first: Marta, last: Ruiz Costa-jussà} + - {first: Marta Ruiz, last: Costa-jussà} +marten-during-ul: + comment: University of Luxembourg + names: + - {first: Marten, last: During} +marten-van-schijndel: + names: + - {first: Marten, last: van Schijndel} + - {first: Marten, last: Van Schijndel} + - {first: Martin, last: van Schijndel} +martha-alicia-rocha: + names: + - {first: Martha-Alicia, last: Rocha} + - {first: Martha Alicia, last: Rocha} +martha-evens: + names: + - {first: Martha, last: Evens} + - {first: Martha W., last: Evens} + - {first: Martha W, last: Evens} +martha-palmer: + names: + - {first: Martha, last: Palmer} + - {first: Martha Stone, last: Palmer} + - {first: Martha S., last: Palmer} +martha-yifiru-tachbelie: + names: + - {first: Martha Yifiru, last: Tachbelie} + - {first: Martha, last: Yifiru Tachbelie} +marti-a-hearst: + names: + - {first: Marti A., last: Hearst} + - {first: Marti, last: Hearst} +martin-ariel-dominguez: + names: + - {first: Martin Ariel, last: Dominguez} + - {first: Martín, last: Domínguez} + - {first: Martin Ariel, last: Domínguez} +martin-c-emele: + names: + - {first: Martin C., last: Emele} + - {first: Martin, last: Emele} +martin-chodorow: + names: + - {first: Martin, last: Chodorow} + - {first: Martin S., last: Chodorow} +martin-cmejrek: + names: + - {first: Martin, last: Cmejrek} + - {first: Martin, last: Čmejrek} +martin-hofmann-apitius: + names: + - {first: Martin, last: Hofmann--Apitius} + - {first: Martin, last: Hofmann-Apitius} +martin-krallinger: + names: + - {first: Martin, last: Krallinger} + - {first: M., last: Krallinger} +martin-labsky: + names: + - {first: Martin, last: Labský} + - {first: Martin, last: Labsky} +martin-pereira-farina: + names: + - {first: Martín, last: Pereira-Fariña} + - {first: M., last: Pereira-Fariña} +martin-platek: + names: + - {first: Martin, last: Platek} + - {first: Martin, last: Plátek} +martin-rajman: + names: + - {first: Martin, last: Rajman} + - {first: M., last: Rajman} +martin-russell: + names: + - {first: Martin, last: Russell} + - {first: M., last: Russell} +martin-schaler: + names: + - {first: Martin, last: Schäler} + - {first: Martin, last: Schäfer} +martine-adda-decker: + names: + - {first: Martine, last: Adda-Decker} + - {first: Martine, last: Adda-decker} + - {first: M., last: Adda-Decker} +martine-de-calmes: + names: + - {first: Martine, last: de Calmès} + - {first: M., last: de Calmes} +mary-elaine-califf: + names: + - {first: Mary Elaine, last: Califf} + - {first: M. E., last: Califf} +mary-ellen-foster: + names: + - {first: Mary Ellen, last: Foster} + - {first: Mary E., last: Foster} +mary-harper: + names: + - {first: Mary, last: Harper} + - {first: Mary P., last: Harper} + - {first: M. P., last: Harper} +mary-mcgee-wood: + names: + - {first: Mary McGee, last: Wood} + - {first: Mary, last: McGee Wood} + - {first: M., last: McGee Wood} +mary-s-neff: + names: + - {first: Mary S., last: Neff} + - {first: Mary, last: Neff} +mary-swift: + names: + - {first: Mary, last: Swift} + - {first: Mary D., last: Swift} +masahiro-araki: + names: + - {first: Masahiro, last: Araki} + - {first: M., last: Araki} +masakatsu-sugimoto: + names: + - {first: Masakatsu, last: Sugimoto} + - {first: M., last: Sugimoto} +masako-kume: + names: + - {first: Masako, last: Kume} + - {first: M., last: Kume} +masaru-tomita: + names: + - {first: Masaru, last: Tomita} + - {first: M., last: Tomita} +masatake-dantsuji: + names: + - {first: Masatake, last: Dantsuji} + - {first: M., last: Dantsuji} +masato-ishizaki: + names: + - {first: Masato, last: Ishizaki} + - {first: M., last: Ishizaki} +massih-r-amini: + names: + - {first: Massih R., last: Amini} + - {first: Massih-Reza, last: Amini} +massimo-poesio: + names: + - {first: Massimo, last: Poesio} + - {first: M., last: Poesio} +matej-durco: + names: + - {first: Matej, last: Ďurčo} + - {first: Matej, last: Durco} +mateja-verlic: + names: + - {first: Mateja, last: Verlič} + - {first: Mateja, last: Verlic} +mathew-huerta-enochian: + names: + - {first: Mathew, last: Huerta-Enochian} +mathieu-mangeot: + names: + - {first: Mathieu, last: Mangeot} + - {first: Mathieu, last: Mangeot-Lerebours} +matko-bosnjak: + names: + - {first: Matko, last: Bosnjak} + - {first: Matko, last: Bošnjak} +mats-wiren: + names: + - {first: Mats, last: Wirén} + - {first: Mats, last: Wiren} +matteo-negri: + names: + - {first: Matteo, last: Negri} + - {first: M., last: Negri} +matthew-crocker: + names: + - {first: Matthew, last: Crocker} + - {first: Matthew W., last: Crocker} +matthew-e-peters: + names: + - {first: Matthew E., last: Peters} + - {first: Matthew, last: Peters} +matthew-gerber: + names: + - {first: Matthew, last: Gerber} + - {first: Matt, last: Gerber} + - {first: Matthew S., last: Gerber} + - {first: Matthew, last: Garber} +matthew-j-green: + names: + - {first: Matthew J., last: Green} + - {first: Matthew, last: Green} +matthew-r-gormley: + names: + - {first: Matthew R., last: Gormley} + - {first: Matthew, last: Gormley} +matthew-snover: + names: + - {first: Matthew, last: Snover} + - {first: Matthew G., last: Snover} +matthew-w-bilotti: + names: + - {first: Matthew W., last: Bilotti} + - {first: Matthew, last: Bilotti} +matthias-hartung: + names: + - {first: Matthias, last: Hartung} + - {first: M., last: Hartung} +matthieu-constant: + names: + - {first: Matthieu, last: Constant} + - {first: Mathieu, last: Constant} +matthieu-quignard: + names: + - {first: Matthieu, last: Quignard} + - {first: M., last: Quignard} +matti-ylilammi: + names: + - {first: Matti, last: Ylilammi} + - {first: M., last: Ylilammi} +mattia-a-di-gangi: + names: + - {first: Mattia A., last: Di Gangi} + - {first: Mattia Antonino, last: Di Gangi} + - {first: Mattia, last: Di Gangi} +matus-pleva: + names: + - {first: Matúš, last: Pleva} + - {first: Matus, last: Pleva} +maunendra-sankar-desarkar: + names: + - {first: Maunendra Sankar, last: Desarkar} + - {first: Maunendra, last: Sankar Desarkar} +maurice-quezel-ambrunaz: + names: + - {first: Maurice, last: Quezel-Ambrunaz} + - {first: M., last: Quezel-Ambrunaz} +maurizio-omologo: + names: + - {first: Maurizio, last: Omologo} + - {first: M., last: Omologo} +maurizio-tesconi: + names: + - {first: Maurizio, last: Tesconi} + - {first: Maurizio, last: Tescon} +mauro-di-manzo: + names: + - {first: Mauro, last: Di Manzo} + - {first: M., last: Di Manzo} +max-silberztein: + names: + - {first: Max, last: Silberztein} + - {first: Max D., last: Silberztein} +maximiliano-saiz-noeda: + names: + - {first: Maximiliano, last: Saiz-Noeda} + - {first: M., last: Saiz-Noeda} +maxwell-weinzierl: + names: + - {first: Maxwell, last: Weinzierl} + - {first: Maxwell A., last: Weinzierl} +mayank-n-vahia: + names: + - {first: Mayank N., last: Vahia} + - {first: Mayank, last: Vahia} +mayank-singh-az: + comment: University of Arizona + disable_name_matching: true + names: + - {first: Mayank, last: Singh} +mazhar-mehdi-hussain: + names: + - {first: Mazhar Mehdi, last: Hussain} + - {first: Mazhar, last: Hussain} +md-anwarus-salam-khan: + names: + - {first: Md. Anwarus Salam, last: Khan} + - {first: Khan Md. Anwarus, last: Salam} + - {first: Khan Md., last: Anwarus Salam} +md-arafat-sultan: + names: + - {first: Md Arafat, last: Sultan} + - {first: Md. Arafat, last: Sultan} + - {first: Md., last: Sultan} +md-maruf-hasan: + names: + - {first: Md. Maruf, last: Hasan} + - {first: Md Maruf, last: Hasan} + - {first: Maruf, last: Hasan} +md-rizwan-parvez: + names: + - {first: Md. Rizwan, last: Parvez} + - {first: Md Rizwan, last: Parvez} +meenakshi-nagarajan: + names: + - {first: Meenakshi, last: Nagarajan} + - {first: Meena, last: Nagarajan} +meghan-glenn: + names: + - {first: Meghan, last: Glenn} + - {first: Meghan Lammie, last: Glenn} +mehrnoosh-sadrzadeh: + names: + - {first: Mehrnoosh, last: Sadrzadeh} + - {first: M., last: Sadrzadeh} +mei-chih-tsai: + names: + - {first: Mei-Chih, last: Tsai} + - {first: Mei-chih, last: Tsai} +mei-chun-liu: + names: + - {first: Mei-Chun, last: Liu} + - {first: Mei-chun, last: Liu} +mei-hua-chen: + names: + - {first: Mei-hua, last: Chen} + - {first: Mei-Hua, last: Chen} +mei-yuh-hwang: + names: + - {first: Mei-Yuh, last: Hwang} + - {first: M., last: Hwang} +melanie-martin: + names: + - {first: Melanie, last: Martin} + - {first: Melanie J., last: Martin} +meliha-yetisgen-yildiz: + names: + - {first: Meliha, last: Yetisgen-Yildiz} + - {first: Meliha, last: Yetisgen} + - {first: Meliha, last: Yetişgen} +memduh-gokirmak: + names: + - {first: Memduh, last: Gökırmak} + - {first: Memduh, last: Gokirmak} +menno-van-zaanen: + names: + - {first: Menno, last: van Zaanen} + - {first: Menno, last: van Zannen} +mercedes-garcia-martinez: + names: + - {first: Mercedes, last: García-Martínez} + - {first: Mercedes García, last: Martínez} +meriama-laib: + names: + - {first: Meriama, last: Laib} + - {first: Meriama, last: Laïb} + - {first: Mariama, last: Laib} +meritxell-gonzalez: + names: + - {first: Meritxell, last: Gonzàlez} + - {first: Meritxell, last: González} + - {first: M., last: González} +mi-zhang-ucd: + comment: Dublin + disable_name_matching: true + names: + - {first: Mi, last: Zhang} + orcid: 0000-0003-3567-3478 +mia-xu-chen: + names: + - {first: Mia Xu, last: Chen} + - {first: Mia, last: Chen} +michael-b-kac: + names: + - {first: Michael B., last: Kac} + - {first: Michael, last: Kac} +michael-c-frank: + names: + - {first: Michael C., last: Frank} + - {first: Michael, last: Frank} +michael-c-mccord: + names: + - {first: Michael C., last: McCord} + - {first: Michael, last: McCord} +michael-collins: + names: + - {first: Michael, last: Collins} + - {first: Michael John, last: Collins} + - {first: Mike, last: Collins} +michael-crystal: + names: + - {first: Michael, last: Crystal} + - {first: Michael R., last: Crystal} +michael-e-jahr: + names: + - {first: Michael E., last: Jahr} + - {first: Michael, last: Jahr} +michael-ellsworth: + names: + - {first: Michael, last: Ellsworth} + - {first: Michael J., last: Ellsworth} +michael-f-mctear: + names: + - {first: Michael F., last: McTear} + - {first: Michael, last: McTear} +michael-g-dyer: + names: + - {first: Michael G., last: Dyer} + - {first: Michael, last: Dyer} +michael-glass: + names: + - {first: Michael, last: Glass} + - {first: Michael R., last: Glass} +michael-i-jordan: + names: + - {first: Michael I., last: Jordan} + - {first: Michael, last: Jordan} +michael-j-cafarella: + names: + - {first: Michael J., last: Cafarella} + - {first: Michael, last: Cafarella} +michael-j-pan: + names: + - {first: Michael J., last: Pan} + - {first: Michael, last: Pan} +michael-j-witbrock: + names: + - {first: Michael J., last: Witbrock} + - {first: Michael, last: Witbrock} +michael-johnston: + names: + - {first: Michael, last: Johnston} + - {first: M., last: Johnston} +michael-jones: + names: + - {first: Michael, last: Jones} + - {first: Michael P., last: Jones} +michael-k-brown: + names: + - {first: Michael K., last: Brown} + - {first: Michael, last: Brown} +michael-kaisser: + names: + - {first: Michael, last: Kaisser} + - {first: Michael, last: Kaißer} +michael-l-mauldin: + names: + - {first: Michael L., last: Mauldin} + - {first: Michael, last: Mauldin} +michael-l-mc-hale: + names: + - {first: Michael L., last: Mc Hale} + - {first: Michael L., last: McHale} +michael-mandel: + names: + - {first: Michael, last: Mandel} + - {first: Michael, last: Mandl} +michael-minock: + names: + - {first: Michael, last: Minock} + - {first: Michael J., last: Minock} +michael-moortgat: + names: + - {first: Michael, last: Moortgat} + - {first: M., last: Moortgat} +michael-paul: + names: + - {first: Michael, last: Paul} + - {first: Michael J., last: Paul} +michael-phillips: + names: + - {first: Michael, last: Phillips} + - {first: M., last: Phillips} +michael-riley: + names: + - {first: Michael, last: Riley} + - {first: Michael D., last: Riley} +michael-rosner: + names: + - {first: Michael, last: Rosner} + - {first: Mike, last: Rosner} + - {first: M., last: Rosner} + - {first: M.A., last: Rosner} +michael-s-kearns: + names: + - {first: Michael S., last: Kearns} + - {first: Michael, last: Kearns} +michael-schlichtkrull: + names: + - {first: Michael, last: Schlichtkrull} + - {first: Michael Sejr, last: Schlichtkrull} +michael-skinner: + names: + - {first: Michael, last: Skinner} + - {first: Michael A., last: Skinner} +michael-t-johnson: + names: + - {first: Michael T., last: Johnson} + - {first: M. T., last: Johnson} +michael-wayne-goodman: + names: + - {first: Michael Wayne, last: Goodman} + - {first: Michael, last: Goodman} +michael-white: + names: + - {first: Michael, last: White} + - {first: Mike, last: White} +michele-jardino: + names: + - {first: Michèle, last: Jardino} + - {first: Michele, last: Jardino} + - {first: M., last: Jardino} +michelle-gregory: + names: + - {first: Michelle, last: Gregory} + - {first: Michelle L., last: Gregory} + - {first: M. L., last: Gregory} +michelle-q-wang: + names: + - {first: Michelle Q., last: Wang} + - {first: Michelle, last: Wang} +mickey-w-c-chong: + names: + - {first: Mickey W. C., last: Chong} + - {first: Mickey W.C., last: Chong} +miguel-a-alonso: + names: + - {first: Miguel A., last: Alonso} + - {first: Miguel, last: Alonso Pardo} + - {first: Miguel A., last: Alonso Pardo} +miguel-angel-garcia-cumbreras: + names: + - {first: Miguel Ángel, last: García-Cumbreras} + - {first: M. Ángel, last: García} + - {first: Miguel, last: García-Cumbreras} + - {first: Miguel Á., last: García Cumbreras} +miguel-b-almeida: + names: + - {first: Miguel B., last: Almeida} + - {first: Miguel, last: Almeida} +miguel-rodriguez-hernandez: + names: + - {first: Miguel, last: Rodríguez Hernández} + - {first: Miguel Ángel, last: Rodríguez} + - {first: Miguel, last: Rodríguez} +miguel-sales-dias: + names: + - {first: Miguel Sales, last: Dias} + - {first: Miguel, last: Dias} +mihael-arcan: + names: + - {first: Mihael, last: Arcan} + - {first: Mihael, last: Arčan} +mihaela-plamada-onofrei: + names: + - {first: Mihaela, last: Plamada-Onofrei} + - {first: Mihaela, last: Onofrei} + - {first: Mihaela, last: Plămadă-Onofrei} +miikka-silfverberg: + names: + - {first: Miikka, last: Silfverberg} + - {first: Miikka P., last: Silfverberg} +mijail-kabadjov: + names: + - {first: Mijail, last: Kabadjov} + - {first: Mijail A., last: Kabadjov} + - {first: Mijail, last: Alexandrov-Kabadjov} + - {first: M. A., last: Kabadjov} +mike-reape: + names: + - {first: Mike, last: Reape} + - {first: M, last: Reape} +mike-tian-jian-jiang: + names: + - {first: Mike Tian-Jian, last: Jiang} + - {first: Tian-Jian, last: Jiang} +mikel-iruskieta: + names: + - {first: Mikel, last: Iruskieta} + - {first: M., last: Iruskieta} +mikel-l-forcada: + names: + - {first: Mikel L., last: Forcada} + - {first: Mikel, last: Forcada} +mikel-lersundi: + names: + - {first: Mikel, last: Lersundi} + - {first: M., last: Lersundi} +mikel-penagarikano: + names: + - {first: Mikel, last: Penagarikano} + - {first: M., last: Peñagarikano} +milan-bily: + names: + - {first: Milan, last: Bily} + - {first: Milan, last: Bílý} +milica-gasic: + names: + - {first: Milica, last: Gasic} + - {first: Milica, last: Gašić} +min-hua-lai: + names: + - {first: Min-Hua, last: Lai} + - {first: Min Hua, last: Lai} +ming-chin-yen: + names: + - {first: Ming-chin, last: Yen} + - {first: Ming-Chin, last: Yen} +ming-chui-dong: + names: + - {first: Ming Chui, last: Dong} + - {first: Ming-Chui, last: Dong} +ming-feng-tsai: + names: + - {first: Ming-Feng, last: Tsai} + - {first: Meng-Feng, last: Tsai} +ming-jer-wu: + names: + - {first: Ming-Jer, last: Wu} + - {first: Min-Jer, last: Wu} +ming-shing-yu: + names: + - {first: Ming-Shing, last: Yu} + - {first: Ming-shing, last: Yu} +mingbin-xu: + names: + - {first: Mingbin, last: Xu} + - {first: MingBin, last: Xu} +minghua-nuo: + names: + - {first: Minghua, last: Nuo} + - {first: Ming Hua, last: Nuo} +mingwen-wang: + names: + - {first: Mingwen, last: Wang} + - {first: MingWen, last: Wang} + - {first: Ming-Wei, last: Wang} +minh-le-nguyen: + names: + - {first: Minh Le, last: Nguyen} + - {first: Minh-Le, last: Nguyen} + - {first: Le-Minh, last: Nguyen} + - {first: Nguyen Le, last: Minh} + - {first: Le Minh, last: Nguyen} + - {first: Nguyen, last: Le Minh} + - {first: M.L, last: Nguyen} +minh-quang-nhat-pham: + comment: JAIST, Alt Vietnam + names: + - {first: Minh Quang Nhat, last: Pham} + similar: + - minh-quang-pham +minh-quang-pham: + comment: SYSTRAN + names: + - {first: Minh Quang, last: Pham} + - {first: MinhQuang, last: Pham} + similar: + - minh-quang-nhat-pham +minh-thang-luong: + names: + - {first: Minh-Thang, last: Luong} + - {first: Thang, last: Luong} +miquel-espla-gomis: + names: + - {first: Miquel, last: Esplà-Gomis} + - {first: Miquel, last: Esplà} +mireia-diez: + names: + - {first: Mireia, last: Diez} + - {first: Mireia, last: Díez} +mireia-ginesti-rosell: + names: + - {first: Mireia, last: Ginestí-Rosell} + - {first: Mireia, last: Ginestí Rosell} +miriam-r-l-petruck: + names: + - {first: Miriam R. L., last: Petruck} + - {first: Miriam R.L., last: Petruck} + - {first: Miriam R L, last: Petruck} +miriam-tavoni: + names: + - {first: Miriam, last: Tavoni} + - {first: M., last: Tavoni} +miriam-urkia: + names: + - {first: Miriam, last: Urkia} + - {first: M, last: Urkia} + - {first: M., last: Urkia} +mirjam-sepesy-maucec: + names: + - {first: Mirjam Sepesy, last: Maucec} + - {first: Mirjam Sepesy, last: Maučec} +mirko-baglioni: + names: + - {first: Mirko, last: Baglioni} + - {first: M., last: Baglioni} +miroslav-martinovic: + names: + - {first: Miroslav, last: Martinović} + - {first: Miroslav, last: Martinovic} +miruna-clinciu: + names: + - {first: Miruna, last: Clinciu} + - {first: Miruna-Adriana, last: Clinciu} +mitch-marcus: + names: + - {first: Mitch, last: Marcus} + - {first: Mitchell, last: Marcus} + - {first: Mitchell P., last: Marcus} + - {first: M., last: Marcus} +mitch-weintraub: + names: + - {first: Mitch, last: Weintraub} + - {first: Mitchel, last: Weintraub} + - {first: M., last: Weintraub} +mitesh-m-khapra: + names: + - {first: Mitesh M., last: Khapra} + - {first: Mitesh, last: Khapra} + - {first: Mitesh, last: M. Khapra} + - {first: Mitesh M, last: Khapra} + - {first: Mitesh Shantadevi, last: Khapra} +mitsuo-shimohata: + names: + - {first: Mitsuo, last: Shimohata} + - {first: M., last: Shimohata} +mohab-el-karef: + names: + - {first: Mohab, last: El-karef} + - {first: Mohab, last: Elkaref} +mohamed-ahmed-sherif: + names: + - {first: Mohamed Ahmed, last: Sherif} + - {first: Mohamed, last: Sherif} +mohamed-altantawy: + names: + - {first: Mohamed, last: Altantawy} + - {first: Mohamed, last: AlTantawy} +mohamed-attia: + names: + - {first: Mohamed, last: Attia} + - {first: M., last: Attia} +mohamed-maamouri: + names: + - {first: Mohamed, last: Maamouri} + - {first: Mohammed, last: Maamouri} +mohamed-mahdi-boudabous: + names: + - {first: Mohamed Mahdi, last: Boudabous} + - {first: Mohamed, last: Boudabous} +mohamed-nassime-hadjadj: + names: + - {first: Mohamed Nassime, last: Hadjadj} + - {first: Mohamed, last: Hadjadj} +mohamed-r-amer: + names: + - {first: Mohamed R., last: Amer} + - {first: Mohamed, last: Amer} +mohamed-zakaria-kurdi: + names: + - {first: Mohamed Zakaria, last: Kurdi} + - {first: Mohamed-Zakaria, last: Kurdi} +mohammad-akbar: + names: + - {first: Mohammad, last: Akbar} + - {first: M., last: Akbar} +mohammad-bahrani: + names: + - {first: Mohammad, last: Bahrani} + - {first: M., last: Bahrani} +mohammed-arif-khan: + names: + - {first: Mohammed Arif, last: Khan} + - {first: Arif, last: Khan} + - {first: Arif Md., last: Khan} +mohan-zhang-unc: + comment: UNC + disable_name_matching: true + names: + - {first: Mohan, last: Zhang} + orcid: 0009-0000-8866-7878 +mohsen-rashwan: + names: + - {first: Mohsen, last: Rashwan} + - {first: M., last: Rashwan} +mokhtar-b-billami: + names: + - {first: Mokhtar B., last: Billami} + - {first: Mokhtar-Boumedyen, last: Billami} +molly-ireland: + names: + - {first: Molly, last: Ireland} + - {first: Molly E., last: Ireland} +mona-diab: + names: + - {first: Mona, last: Diab} + - {first: Mona T., last: Diab} +monica-lestari-paramita: + names: + - {first: Monica Lestari, last: Paramita} + - {first: Monica, last: Paramita} +monika-woszczyna: + names: + - {first: Monika, last: Woszczyna} + - {first: M., last: Woszczyna} +montse-maritxalar: + names: + - {first: Montse, last: Maritxalar} + - {first: M, last: Maritxalar} + - {first: M., last: Maritxalar} +montserrat-civit: + names: + - {first: Montserrat, last: Civit} + - {first: M., last: Civit} +montserrat-marimon: + names: + - {first: Montserrat, last: Marimon} + - {first: Montserrat, last: Marimón} + - {first: Montserrat Marimon, last: Felipe} +montserrat-meya: + names: + - {first: Montserrat, last: Meya} + - {first: M., last: Meya} +moritz-schaeffer: + names: + - {first: Moritz, last: Schaeffer} + - {first: Moritz Jonas, last: Schaeffer} +morris-salkoff: + names: + - {first: Morris, last: Salkoff} + - {first: M., last: Salkoff} +mosleh-hmoud-al-adhaileh: + names: + - {first: Mosleh Hmoud, last: Al-Adhaileh} + - {first: Mosleh H., last: Al-Adhaileh} +mostafa-shahin: + names: + - {first: Mostafa, last: Shahin} + - {first: M., last: Shahin} +muhammad-elnokrashy: + names: + - {first: Muhammad N., last: ElNokrashy} + - {first: Muhammad, last: ElNokrashy} + - {first: Muhammad Nael, last: ElNokrashy} +muhammad-tasnim-mohiuddin: + names: + - {first: Muhammad Tasnim, last: Mohiuddin} + - {first: Tasnim, last: Mohiuddin} +munindar-p-singh: + names: + - {first: Munindar P., last: Singh} + - {first: Munindar, last: Singh} +munirathnam-srikanth: + names: + - {first: Munirathnam, last: Srikanth} + - {first: Muirathnam, last: Srikanth} + - {first: M., last: Srikanth} +muntsa-padro: + names: + - {first: Muntsa, last: Padró} + - {first: M., last: Padró} +murat-saraclar: + names: + - {first: Murat, last: Saraclar} + - {first: Murat, last: Saraçlar} +mustafa-yaseen: + names: + - {first: Mustafa, last: Yaseen} + - {first: M., last: Yaseen} +muthu-kumar-chandrasekaran: + names: + - {first: Muthu Kumar, last: Chandrasekaran} + - {first: Muthu, last: Kumar Chandrasekaran} +muyun-yang: + names: + - {first: Muyun, last: Yang} + - {first: MuYun, last: Yang} + - {first: Mu-yun, last: Yang} +myriam-hernandez: + names: + - {first: Myriam, last: Hernandez} + - {first: Myriam, last: Hernández A} + - {first: Myriam, last: Hernández} +myroslava-o-dzikovska: + names: + - {first: Myroslava O., last: Dzikovska} + - {first: Myroslava, last: Dzikovska} +myung-gil-jang: + names: + - {first: Myung-Gil, last: Jang} + - {first: Myoung-Gil, last: Jang} +myung-kwan-park: + names: + - {first: Myung-Kwan, last: Park} + - {first: Myungkwan, last: Park} +nadia-mana: + names: + - {first: Nadia, last: Mana} + - {first: N., last: Mana} +nafise-sadat-moosavi: + names: + - {first: Nafise Sadat, last: Moosavi} + - {first: Nafise, last: Moosavi} +nagesh-c-panyam: + names: + - {first: Nagesh C., last: Panyam} + - {first: Nagesh, last: C. Panyam} +nagiza-samatova: + names: + - {first: Nagiza, last: Samatova} + - {first: Nagiza F., last: Samatova} +nagwa-m-el-makky: + names: + - {first: Nagwa, last: M. El-Makky} + - {first: Nagwa, last: El-Makky} +naiara-perez: + names: + - {first: Naiara, last: Pérez} + - {first: Naiara, last: Perez-Miguel} + - {first: Naiara, last: Miguel} + orcid: 0000-0001-8648-0428 +naida-graham: + names: + - {first: Naida, last: Graham} + - {first: Naida L., last: Graham} +nam-khanh-tran: + names: + - {first: Nam-Khanh, last: Tran} + - {first: Nam Khanh, last: Tran} +naman-k-gupta: + names: + - {first: Naman K., last: Gupta} + - {first: Naman, last: Gupta} +nancy-chen: + names: + - {first: Nancy, last: Chen} + - {first: Nancy F., last: Chen} +nancy-chinchor: + names: + - {first: Nancy, last: Chinchor} + - {first: Nancy A., last: Chinchor} + - {first: N., last: Chinchor} +nancy-green: + names: + - {first: Nancy, last: Green} + - {first: Nancy L., last: Green} +nancy-ide: + names: + - {first: Nancy, last: Ide} + - {first: Nancy M., last: Ide} +nancy-mccracken: + names: + - {first: Nancy, last: McCracken} + - {first: Nancy J., last: McCracken} +nancy-underwood: + names: + - {first: Nancy, last: Underwood} + - {first: Nancy L., last: Underwood} +nanda-kambhatla: + names: + - {first: Nanda, last: Kambhatla} + - {first: Nandakishore, last: Kambhatla} + - {first: N., last: Kambhatla} +nanette-veilleux: + names: + - {first: Nanette M., last: Veilleux} + - {first: N. M., last: Veilleux} + - {first: N, last: Veilleux} +naoaki-okazaki: + names: + - {first: Naoaki, last: Okazaki} + - {first: Naoki, last: Okazaki} +naomi-feldman: + names: + - {first: Naomi, last: Feldman} + - {first: Naomi H., last: Feldman} +naomi-sager: + names: + - {first: Naomi, last: Sager} + - {first: N., last: Sager} +naoto-kato: + names: + - {first: Naoto, last: Kato} + - {first: Naoto, last: Katoh} +narjes-bellamine-ben-saoud: + names: + - {first: Narjès, last: Bellamine Ben Saoud} + - {first: Narjès Bellamine Ben, last: Saoud} +nasser-smaili: + names: + - {first: Nasser, last: Smaili} + - {first: N., last: Smaili} +natalia-kariaeva-rutgers: + names: + - {first: Natalia Kariaeva, last: Rutgers} + - {first: Natalia, last: Kariaeva} +natalia-klyueva: + names: + - {first: Natalia, last: Klyueva} + - {first: Natalia, last: Kljueva} +natalia-loukachevitch: + names: + - {first: Natalia, last: Loukachevitch} + - {first: Natalia V., last: Loukachevitch} + - {first: N., last: Loukachevitch} +natalia-n-modjeska: + names: + - {first: Natalia N., last: Modjeska} + - {first: Natalia, last: Modjeska} +natalie-kubler: + names: + - {first: Natalie, last: Kübler} + - {first: Natalie, last: Kubler} +natalie-m-schrimpf: + names: + - {first: Natalie M., last: Schrimpf} + - {first: Natalie, last: Schrimpf} +nate-blaylock: + names: + - {first: Nate, last: Blaylock} + - {first: N., last: Blaylock} +nathalie-le-brun: + names: + - {first: Nathalie, last: Le Brun} + - {first: Nathalie Le, last: Brun} +nathalie-rose-lim: + names: + - {first: Nathalie Rose, last: Lim} + - {first: Nathalie, last: Lim} +nathalie-simonin: + names: + - {first: Nathalie, last: Simonin} + - {first: N., last: Simonin} +nathanael-chambers: + names: + - {first: Nathanael, last: Chambers} + - {first: Nathan, last: Chambers} +natsuko-holden: + names: + - {first: Natsuko, last: Holden} + - {first: N., last: Holden} +naveen-kumar-laskari: + names: + - {first: Naveen Kumar, last: Laskari} + - {first: Naveen, last: Kumar} +nazila-hafezi: + names: + - {first: Nazila, last: Hafezi} + - {first: N., last: Hafezi} +ndapandula-nakashole: + names: + - {first: Ndapandula, last: Nakashole} + - {first: Ndapa, last: Nakashole} +necip-fazil-ayan: + names: + - {first: Necip Fazil, last: Ayan} + - {first: Necip, last: Fazil Ayan} +negacy-hailu: + names: + - {first: Negacy, last: Hailu} + - {first: Negacy D., last: Hailu} +neil-tipper: + names: + - {first: Neil, last: Tipper} + - {first: N, last: Tipper} +nelson-f-liu: + names: + - {first: Nelson F., last: Liu} + - {first: Nelson, last: Liu} +nerea-areta: + names: + - {first: Nerea, last: Areta} + - {first: N., last: Areta} +nerea-ezeiza: + names: + - {first: Nerea, last: Ezeiza} + - {first: N., last: Ezeiza} +nghia-the-pham: + names: + - {first: Nghia The, last: Pham} + - {first: Nghia, last: Pham} +ngo-thanh-nhan: + names: + - {first: Ngô Thanh, last: Nhàn} + - {first: Ngo Thanh, last: Nhan} + - {first: NT., last: Nhàn} +ngo-xuan-bach: + names: + - {first: Ngo Xuan, last: Bach} + - {first: Ngo, last: Xuan Bach} +ngoc-quan-pham: + names: + - {first: Ngoc-Quan, last: Pham} + - {first: Ngoc Quan, last: Pham} +ngoc-quang-luong: + names: + - {first: Ngoc Quang, last: Luong} + - {first: Ngoc-Quang, last: Luong} +nguyen-vo: + names: + - {first: Nguyen, last: Vo} + - {first: Nguyen, last: Ha Vo} +nicholas-asher: + names: + - {first: Nicholas, last: Asher} + - {first: Nicolas, last: Asher} +nicholas-j-haddock: + names: + - {first: Nicholas J., last: Haddock} + - {first: Nicholas, last: Haddock} +nicholas-kushmerick: + names: + - {first: Nicholas, last: Kushmerick} + - {first: N., last: Kushmerick} +nick-j-youd: + names: + - {first: Nick J., last: Youd} + - {first: Nick, last: Youd} +nick-rizzolo: + names: + - {first: Nick, last: Rizzolo} + - {first: Nicholas, last: Rizzolo} +nick-webb: + names: + - {first: Nick, last: Webb} + - {first: N., last: Webb} +nicolas-lefebvre: + names: + - {first: Nicolas, last: Lefebvre} + - {first: Nicolas, last: Lefèbvre} +nicolas-marin: + names: + - {first: Nicolas, last: Marin} + - {first: Nicolás, last: Marín} +nicolas-morales: + names: + - {first: Nicolás, last: Morales} + - {first: Nicolas, last: Morales} +nicolas-nedobejkine: + names: + - {first: Nicolas, last: Nedobejkine} + - {first: N., last: Nedobejkine} +nicolas-nicolov: + names: + - {first: Nicolas, last: Nicolov} + - {first: N., last: Nicolov} + similar: + - nikola-i-nikolov +nicolas-r-fauceglia: + names: + - {first: Nicolas R., last: Fauceglia} + - {first: Nicolas, last: Fauceglia} +nicolas-serrano: + names: + - {first: Nicolás, last: Serrano} + - {first: Nicolas, last: Serrano} +nicole-gregoire: + names: + - {first: Nicole, last: Grégoire} + - {first: Nicole, last: Gregoire} +nicoletta-calzolari: + names: + - {first: Nicoletta, last: Calzolari} + - {first: Nicoletta Calzolari, last: Zamorani} + - {first: N., last: Calzolari} +niels-ole-bernsen: + names: + - {first: Niels Ole, last: Bernsen} + - {first: Niels Ole, last: Bernse} + - {first: Niels O., last: Bernsen} +nigel-ward: + names: + - {first: Nigel, last: Ward} + - {first: Nigel G., last: Ward} +niklas-paulsson: + names: + - {first: Niklas, last: Paulsson} + - {first: N., last: Paulsson} +nikola-i-nikolov: + names: + - {first: Nikola I., last: Nikolov} + similar: + - nicolas-nicolov +nikolay-arefyev: + names: + - {first: Nikolay, last: Arefyev} + - {first: Nikolay, last: Arefiev} +nikos-fakotakis: + names: + - {first: Nikos, last: Fakotakis} + - {first: Nikos D., last: Fakotakis} + - {first: N., last: Fakotakis} +nikos-liolios: + names: + - {first: Nikos, last: Liolios} + - {first: N., last: Liolios} +niladri-chatterjee: + names: + - {first: Niladri, last: Chatterjee} + - {first: N., last: Chatterjee} +ning-liu-tsinghua: + degree: Tsinghua University + disable_name_matching: true + names: + - {first: Ning, last: Liu} + orcid: 0000-0001-7475-9739 +nishat-raihan: + names: + - {first: Nishat, last: Raihan} + - {first: Md Nishat, last: Raihan} + orcid: 0000-0001-6242-398X +nishtha-malhotra: + names: + - {first: Nishtha, last: Malhotra} + - {first: Nishta, last: Malhotra} +nives-mikelic-preradovic: + names: + - {first: Nives Mikelić, last: Preradović} + - {first: Nives, last: Mikelić Preradović} +noa-p-cruz-diaz: + names: + - {first: Noa P., last: Cruz Diaz} + - {first: Noa P., last: Cruz} + - {first: Noa, last: Cruz} + - {first: Noa P., last: Cruz Díaz} +noah-a-smith: + names: + - {first: Noah A., last: Smith} + - {first: Noah, last: Smith} +noah-coccaro: + names: + - {first: Noah, last: Coccaro} + - {first: N., last: Coccaro} +noah-goodman: + names: + - {first: Noah, last: Goodman} + - {first: Noah D., last: Goodman} +nobal-bikram-niraula: + names: + - {first: Nobal Bikram, last: Niraula} + - {first: Nobal, last: Niraula} +nobuaki-minematsu: + names: + - {first: Nobuaki, last: Minematsu} + - {first: N., last: Minematsu} +noel-chateau: + names: + - {first: Noël, last: Chateau} + - {first: N., last: Chateau} +noemie-elhadad: + names: + - {first: Noémie, last: Elhadad} + - {first: Noemie, last: Elhadad} +noortje-venhuizen: + names: + - {first: Noortje, last: Venhuizen} + - {first: Noortje J., last: Venhuizen} +norbert-dinstl: + names: + - {first: Norbert, last: Dinstl} + - {first: N., last: Dinstl} +norihito-yasuda: + names: + - {first: Norihito, last: Yasuda} + - {first: Norihi, last: Yasuda} +noriko-h-arai: + names: + - {first: Noriko H., last: Arai} + - {first: Noriko, last: Arai} +noriyuki-tamura: + names: + - {first: Noriyuki, last: Tamura} + - {first: N., last: Tamura} +norman-k-sondheimer: + names: + - {first: Norman K., last: Sondheimer} + - {first: Norman, last: Sondheimer} +norman-m-fraser: + names: + - {first: Norman M., last: Fraser} + - {first: Norman, last: Fraser} +normunds-gruzitis: + names: + - {first: Normunds, last: Gruzitis} + - {first: Normunds, last: Grūzītis} +norton-trevisan-roman: + names: + - {first: Norton Trevisan, last: Roman} + - {first: Norton T., last: Roman} + - {first: Norton, last: Trevisan Roman} +noureddine-chenfour: + names: + - {first: Noureddine, last: Chenfour} + - {first: N., last: Chenfour} +noushin-rezapour-asheghi: + names: + - {first: Noushin Rezapour, last: Asheghi} + - {first: Noushin, last: Rezapour Asheghi} +nuno-mamede: + names: + - {first: Nuno, last: Mamede} + - {first: Nuno J., last: Mamede} +nuria-artigas: + names: + - {first: Núria, last: Artigas} + - {first: N., last: Artigas} +nuria-bel: + names: + - {first: Núria, last: Bel} + - {first: Nuria, last: Bel} +nuria-bertomeu: + names: + - {first: Núria, last: Bertomeu} + - {first: Nuria, last: Bertomeu} + - {first: Núria, last: Bertomeu Castelló} + - {first: Núria Bertomeu, last: Castelló} +nuria-castell: + names: + - {first: Núria, last: Castell} + - {first: Nuria, last: Castell} +nuria-gala: + names: + - {first: Nuria, last: Gala} + - {first: Núria, last: Gala} + - {first: Nùria, last: Gala} +oana-postolache: + names: + - {first: Oana, last: Postolache} + - {first: Oana-Diana, last: Postolache} +octavia-maria-sulea: + names: + - {first: Octavia-Maria, last: Şulea} + - {first: Maria, last: Sulea} + - {first: Octavia-Maria, last: Sulea} + - {first: Maria-Octavia, last: Sulea} +odbayar-chimeddorj: + names: + - {first: Odbayar, last: Chimeddorj} + - {first: Chimeddorj, last: Odbayar} +oier-lopez-de-lacalle: + names: + - {first: Oier, last: Lopez de Lacalle} + - {first: Oier López, last: de Lacalle} + - {first: Oier Lopez, last: de Lacalle} + - {first: Oier, last: López de Lacalle} +oistein-e-andersen: + names: + - {first: Øistein E., last: Andersen} + - {first: Øistein, last: Andersen} +oiwi-parker-jones: + names: + - {first: ‘Ōiwi, last: Parker Jones} + - {first: Oiwi, last: Parker Jones} +olatz-ansa: + names: + - {first: Olatz, last: Ansa} + - {first: O., last: Ansa} +olga-n-lashevskaja: + names: + - {first: Olga N., last: Lashevskaja} + - {first: Olga, last: Lashevskaja} +olivia-o-y-kwong: + names: + - {first: Olivia O.Y., last: Kwong} + - {first: O.Y., last: Kwong} + - {first: Oi Yee, last: Kwong} +olivia-sanchez-graillet: + names: + - {first: Olivia, last: Sanchez-Graillet} + - {first: Olivia, last: Sanchez} +olivier-boeffard: + names: + - {first: Olivier, last: Boëffard} + - {first: Olivier, last: Boeffard} +olivier-hamon: + names: + - {first: Olivier, last: Hamon} + - {first: O., last: Hamon} +olivier-kraif: + names: + - {first: Olivier, last: Kraif} + - {first: O., last: Kraif} +om-p-damani: + names: + - {first: Om P., last: Damani} + - {first: Om, last: Damani} +omar-zaidan: + names: + - {first: Omar, last: Zaidan} + - {first: Omar F., last: Zaidan} +omer-farukhan-gunes: + names: + - {first: Omer Farukhan, last: Gunes} + - {first: Omer, last: Gunes} +ona-de-gibert: + degree: University of Helsinki, Finland + names: + - {first: Ona, last: de Gibert} + - {first: Ona, last: de Gibert Bonet} + orcid: 0000-0002-7163-4807 +ondrej-bajgar: + names: + - {first: Ondřej, last: Bajgar} + - {first: Ondrej, last: Bajgar} +ondrej-bojar: + names: + - {first: Ondřej, last: Bojar} + - {first: Ondrej, last: Bojar} +onkar-arun-pandit: + names: + - {first: Onkar Arun, last: Pandit} + - {first: Onkar, last: Pandit} +ornella-corazzari: + names: + - {first: Ornella, last: Corazzari} + - {first: O., last: Corazzari} +orphee-de-clercq: + names: + - {first: Orphee, last: De Clercq} + - {first: Orphée, last: De Clercq} +oscar-ferrandez: + names: + - {first: Óscar, last: Ferrández} + - {first: Oscar, last: Ferrandez} + - {first: Oscar, last: Ferrández} +osmar-r-zaiane: + names: + - {first: Osmar R., last: Zaiane} + - {first: Osmar, last: Zaïane} + - {first: Osmar, last: Zaiane} + - {first: Osmar R., last: Zaïane} +osvaldo-novais-oliveira-jr: + names: + - {first: Osvaldo Novais, last: Oliveira Jr.} + - {first: Osvaldo, last: Oliveira Jr} +otakar-smrz: + names: + - {first: Otakar, last: Smrz} + - {first: Otakar, last: Smrž} +oto-vale: + names: + - {first: Oto, last: Vale} + - {first: Oto A., last: Vale} +owen-kimball: + names: + - {first: Owen, last: Kimball} + - {first: O., last: Kimball} +owen-rambow: + names: + - {first: Owen, last: Rambow} + - {first: Owen C., last: Rambow} +ozan-irsoy: + names: + - {first: Ozan, last: İrsoy} + - {first: Ozan, last: Irsoy} +ozlem-cetinoglu: + names: + - {first: Özlem, last: Çetinoğlu} + - {first: Ozlem, last: Cetinoglu} + - {first: Özlem, last: Çetinoglu} +ozlem-uzuner: + names: + - {first: Ozlem, last: Uzuner} + - {first: Özlem, last: Uzuner} +p-c-ching: + names: + - {first: P. C., last: Ching} + - {first: P.C., last: Ching} +p-h-j-van-der-kamp: + names: + - {first: P. H. J., last: van der Kamp} + - {first: P.H.J., last: van der Kamp} +p-senthil-nathan: + names: + - {first: P. Senthil, last: Nathan} + - {first: Senthil, last: Nathan} +pablo-duboue: + names: + - {first: Pablo, last: Duboue} + - {first: Pablo A., last: Duboue} + - {first: Pablo Ariel, last: Duboue} +pablo-gervas: + names: + - {first: Pablo, last: Gervás} + - {first: P., last: Gervás} +pablo-ruiz-fabo: + names: + - {first: Pablo, last: Ruiz Fabo} + - {first: Pablo, last: Ruiz} +paloma-moreda-pozo: + names: + - {first: Paloma, last: Moreda Pozo} + - {first: Paloma, last: Moreda} +pamela-e-fink: + names: + - {first: Pamela E., last: Fink} + - {first: P., last: Fink} +pamela-jordan: + names: + - {first: Pamela, last: Jordan} + - {first: Pamela W., last: Jordan} +panayiotis-georgiou: + names: + - {first: Panayiotis, last: Georgiou} + - {first: Panayiotis G., last: Georgiou} +paola-pietrandrea: + names: + - {first: Paola, last: Pietrandrea} + - {first: Paola, last: Pietandrea} +paola-velardi: + names: + - {first: Paola, last: Velardi} + - {first: P., last: Velardi} +paolo-bouquet: + names: + - {first: Paolo, last: Bouquet} + - {first: P., last: Bouquet} +paolo-puliti: + names: + - {first: Paolo, last: Puliti} + - {first: P., last: Puliti} +paramveer-s-dhillon: + names: + - {first: Paramveer S., last: Dhillon} + - {first: Paramveer, last: Dhillon} +partha-talukdar: + names: + - {first: Partha, last: Talukdar} + - {first: Partha Pratim, last: Talukdar} + - {first: Partha, last: Pratim Talukdar} + - {first: Partha P., last: Talukdar} +pascal-nocera: + names: + - {first: Pascal, last: Nocéra} + - {first: Pascal, last: Nocera} +pascale-feldkamp: + names: + - {first: Pascale, last: Feldkamp} + - {first: Pascale, last: Moreira} + - {first: Pascale Feldkamp, last: Moreira} + orcid: 0000-0002-2434-4268 +patrice-bellot: + names: + - {first: Patrice, last: Bellot} + - {first: P., last: Bellot} +patrice-dalle: + names: + - {first: Patrice, last: Dalle} + - {first: P., last: Dalle} +patricia-goncalves: + names: + - {first: Patricia, last: Gonçalves} + - {first: Patricia Nunes, last: Gonçalves} + - {first: Patrícia, last: Gonçalves} +patricia-robinson: + names: + - {first: Patricia, last: Robinson} + - {first: P., last: Robinson} +patricia-velazquez-morales: + names: + - {first: Patricia, last: Velazquez-Morales} + - {first: Patricia, last: Velázquez-Morales} +patricio-martinez-barco: + names: + - {first: Patricio, last: Martínez-Barco} + - {first: Patricio, last: Martinez-Barco} + - {first: Patricio Martinez, last: Barco} + - {first: P., last: Martínez-Barco} +patrick-cardinal: + names: + - {first: Patrick, last: Cardinal} + - {first: P., last: Cardinal} +patrick-haller-zurich: + comment: University of Zurich + names: + - {first: Patrick, last: Haller} + orcid: 0000-0002-8968-7587 +patrick-healey: + names: + - {first: Patrick, last: Healey} + - {first: Pat, last: Healey} + - {first: Patrick G. T., last: Healey} + - {first: Patrick G.T., last: Healey} +patrick-l-lange: + names: + - {first: Patrick L., last: Lange} + - {first: Patrick, last: Lange} +patrick-paroubek: + names: + - {first: Patrick, last: Paroubek} + - {first: P., last: Paroubek} +patrick-saint-dizier: + names: + - {first: Patrick, last: Saint-Dizier} + - {first: Patrick, last: Saint Dizier} +patrizia-michelassi: + names: + - {first: Patrizia, last: Michelassi} + - {first: P., last: Michelassi} +pattabhi-rk-rao: + names: + - {first: Pattabhi, last: RK Rao} + - {first: T. Pattabhi, last: R. K Rao} + - {first: Pattabhi RK, last: Rao} +patti-price: + names: + - {first: Patti, last: Price} + - {first: Patti J., last: Price} + - {first: P. J., last: Price} + - {first: P., last: Price} +paul-a-crook: + names: + - {first: Paul A., last: Crook} + - {first: Paul, last: Crook} +paul-bedaride: + names: + - {first: Paul, last: Bedaride} + - {first: Paul, last: Bédaride} +paul-bennett: + names: + - {first: Paul, last: Bennett} + - {first: Paul N., last: Bennett} +paul-clough: + names: + - {first: Paul, last: Clough} + - {first: Paul D., last: Clough} +paul-d-ji: + names: + - {first: Paul D, last: Ji} + - {first: Paul D., last: Ji} +paul-deleglise: + names: + - {first: Paul, last: Deléglise} + - {first: Paul, last: Deleglise} +paul-dixon: + names: + - {first: Paul, last: Dixon} + - {first: Paul R., last: Dixon} +paul-h-garthwaite: + names: + - {first: Paul H., last: Garthwaite} + - {first: Paul, last: Garthwaite} + - {first: Paul H, last: Garthwaite} +paul-kantor: + names: + - {first: Paul, last: Kantor} + - {first: Paul B., last: Kantor} +paul-morarescu: + names: + - {first: Paul, last: Morarescu} + - {first: Paul, last: Morărescu} + - {first: Paul C., last: Morărescu} +paul-placeway: + names: + - {first: Paul, last: Placeway} + - {first: P., last: Placeway} +paul-roossin: + names: + - {first: Paul, last: Roossin} + - {first: P., last: Roossin} +paul-s-jacobs: + names: + - {first: Paul S., last: Jacobs} + - {first: Paul, last: Jacobs} + - {first: P., last: Jacobs} +paula-cardoso: + names: + - {first: Paula, last: Cardoso} + - {first: Paula C. Figueira, last: Cardoso} + - {first: Paula C. F., last: Cardoso} + - {first: P., last: Cardoso} +paula-newman: + names: + - {first: Paula, last: Newman} + - {first: Paula S., last: Newman} + - {first: P. S., last: Newman} + - {first: P., last: Newman} +paulo-c-f-de-oliveira: + names: + - {first: Paulo C F, last: de Oliveira} + - {first: Paulo C. F., last: de Oliveira} +pavankumar-satuluri: + names: + - {first: Pavankumar, last: Satuluri} + - {first: Pavan Kumar, last: Satuluri} +pavel-kveton: + names: + - {first: Pavel, last: Kvĕtoň} + - {first: Pavel, last: Kveton} + - {first: Pavel, last: Květoň} +pavel-rychly: + names: + - {first: Pavel, last: Rychlý} + - {first: Pavel, last: Rychly} +pavel-smrz: + names: + - {first: Pavel, last: Smrz} + - {first: Pavel, last: Smrž} +pawel-mazur: + names: + - {first: Pawel, last: Mazur} + - {first: Paweł, last: Mazur} +pedro-balage-filho: + names: + - {first: Pedro, last: Balage Filho} + - {first: Pedro, last: Balage} + - {first: Pedro Paulo, last: Balage Filho} + - {first: Pedro P. Balage, last: Filho} + - {first: Pedro, last: Filho} +pedro-concejero-cerezo: + names: + - {first: Pedro Concejero, last: Cerezo} + - {first: Pedro, last: Concejero} +pedro-ortiz-suarez: + names: + - {first: Pedro, last: Ortiz Suarez} + - {first: Pedro Javier, last: Ortiz Suárez} +pengyuan-liu: + names: + - {first: Pengyuan, last: Liu} + - {first: PengYuan, last: Liu} + - {first: Peng-Yuan, last: Liu} +penny-labropoulou: + names: + - {first: Penny, last: Labropoulou} + - {first: P., last: Labropoulou} +pere-comas: + names: + - {first: Pere, last: Comas} + - {first: Pere R., last: Comas} +pete-whitelock: + names: + - {first: Pete, last: Whitelock} + - {first: P. J., last: Whitelock} + - {first: P., last: Whitelock} +peter-a-chew: + names: + - {first: Peter A., last: Chew} + - {first: Peter, last: Chew} +peter-a-heeman: + names: + - {first: Peter A., last: Heeman} + - {first: Peter, last: Heeman} +peter-a-rankel: + names: + - {first: Peter A., last: Rankel} + - {first: Peter, last: Rankel} +peter-anick: + names: + - {first: Peter, last: Anick} + - {first: Peter G., last: Anick} +peter-arno-coppen: + names: + - {first: Peter-Arno, last: Coppen} + - {first: P.A., last: Coppen} +peter-corbett: + names: + - {first: Peter, last: Corbett} + - {first: Peter T., last: Corbett} +peter-deng: + names: + - {first: Peter, last: Deng} + - {first: P., last: Deng} +peter-f-brown: + names: + - {first: Peter F., last: Brown} + - {first: P., last: Brown} +peter-foltz: + names: + - {first: Peter, last: Foltz} + - {first: Peter W., last: Foltz} +peter-halacsy: + names: + - {first: Péter, last: Halácsy} + - {first: Péter, last: Halácsky} +peter-j-liu: + names: + - {first: Peter J., last: Liu} + - {first: Peter, last: Liu} +peter-j-ludlow: + names: + - {first: Peter J., last: Ludlow} + - {first: Peter, last: Ludlow} +peter-jansen: + names: + - {first: Peter, last: Jansen} + - {first: Peter J., last: Jansen} +peter-juel-henrichsen: + names: + - {first: Peter Juel, last: Henrichsen} + - {first: Peter, last: Juel Henrichsen} +peter-ljunglof: + names: + - {first: Peter, last: Ljunglöf} + - {first: Peter, last: Ljunglof} +peter-machonis: + names: + - {first: Peter, last: Machonis} + - {first: Peter A., last: Machonis} +peter-pal-boda: + names: + - {first: Péter Pál, last: Boda} + - {first: Péter, last: Boda} +peter-rossen-skadhauge: + names: + - {first: Peter, last: Rossen Skadhauge} + - {first: Peter Rossen, last: Skadhauge} +peter-schauble: + names: + - {first: Peter, last: Schauble} + - {first: Peter, last: Schäuble} +peter-spyns: + names: + - {first: Peter, last: Spyns} + - {first: P., last: Spyns} +peter-turney: + names: + - {first: Peter, last: Turney} + - {first: Peter D., last: Turney} +peter-v-desouza: + names: + - {first: Peter V., last: deSouza} + - {first: P. V., last: deSouza} + - {first: P.V., last: de Souza} +peter-waiganjo-wagacha: + names: + - {first: Peter Waiganjo, last: Wagacha} + - {first: Peter W., last: Wagacha} + - {first: Peter, last: Wagacha} +peter-white: + names: + - {first: Peter, last: White} + - {first: Pete, last: White} +peter-wittenburg: + names: + - {first: Peter, last: Wittenburg} + - {first: P., last: Wittenburg} +peteris-paikens: + names: + - {first: Peteris, last: Paikens} + - {first: Pēteris, last: Paikens} +petr-jirku: + names: + - {first: Petr, last: Jirku} + - {first: P., last: Jirku} +petr-pollak: + names: + - {first: Petr, last: Pollák} + - {first: Petr, last: Pollak} +petr-sgall: + names: + - {first: Petr, last: Sgall} + - {first: P., last: Sgall} +petra-barancikova: + names: + - {first: Petra, last: Barancikova} + - {first: Petra, last: Barančíková} +petra-poukarova: + names: + - {first: Petra, last: Poukarová} + - {first: Petra, last: Klimešová} +phil-blunsom: + names: + - {first: Phil, last: Blunsom} + - {first: Philip, last: Blunsom} +phil-c-woodland: + names: + - {first: Phil C., last: Woodland} + - {first: P.C., last: Woodland} +phil-harrison: + names: + - {first: Phil, last: Harrison} + - {first: Philip, last: Harrison} + - {first: P., last: Harrison} +phil-sidney-ostheimer: + names: + - {first: Phil Sidney, last: Ostheimer} + - {first: Phil, last: Ostheimer} + orcid: 0009-0009-6186-3233 +philip-edmonds: + names: + - {first: Philip, last: Edmonds} + - {first: Philip G., last: Edmonds} +philip-gorinski: + names: + - {first: Philip, last: Gorinski} + - {first: Philip John, last: Gorinski} +philip-hanna: + names: + - {first: Philip, last: Hanna} + - {first: P., last: Hanna} + - {first: P, last: Hanna} +philip-hoole: + names: + - {first: Philip, last: Hoole} + - {first: Phil, last: Hoole} +philip-ogren: + names: + - {first: Philip, last: Ogren} + - {first: Philip V., last: Ogren} +philip-r-cohen: + names: + - {first: Philip R., last: Cohen} + - {first: Philip, last: Cohen} + - {first: Phil R., last: Cohen} +philip-s-yu: + names: + - {first: Philip S., last: Yu} + - {first: Philip, last: Yu} +philipp-cimiano: + names: + - {first: Philipp, last: Cimiano} + - {first: P., last: Cimiano} +philippe-boula-de-mareuil: + names: + - {first: Philippe, last: Boula de Mareüil} + - {first: Philippe Boula, last: de Mareüil} + - {first: P. Boula, last: de Mareüil} +philippe-langlais: + names: + - {first: Philippe, last: Langlais} + - {first: Phillippe, last: Langlais} +phuong-le-hong: + names: + - {first: Phuong, last: Le Hong} + - {first: Phuong, last: Le-Hong} + - {first: Hồng Phương, last: Lê} + - {first: Phương, last: Lê Hồng} + - {first: Hong-Phuong, last: Le} + - {first: H. Phuong, last: Le} +phuong-thai-nguyen: + names: + - {first: Phuong-Thai, last: Nguyen} + - {first: Phuong Thai, last: Nguyen} +pi-chuan-chang: + names: + - {first: Pi-Chuan, last: Chang} + - {first: Pichuan, last: Chang} +piercarlo-rossi: + names: + - {first: Piercarlo, last: Rossi} + - {first: P., last: Rossi} +piergiorgio-svaizer: + names: + - {first: Piergiorgio, last: Svaizer} + - {first: P., last: Svaizer} +pierre-dumouchel: + names: + - {first: Pierre, last: Dumouchel} + - {first: P., last: Dumouchel} +pierre-emmanuel-mazare: + names: + - {first: Pierre-Emmanuel, last: Mazare} + - {first: Pierre-Emmanuel, last: Mazaré} +pierre-francois-marteau: + names: + - {first: Pierre-Francois, last: Marteau} + - {first: Pierre-François, last: Marteau} +pierre-guillaume: + names: + - {first: Pierre, last: Guillaume} + - {first: P., last: Guillaume} +pierre-zweigenbaum: + names: + - {first: Pierre, last: Zweigenbaum} + - {first: P., last: Zweigenbaum} +pierrette-bouillon: + names: + - {first: Pierrette, last: Bouillon} + - {first: P., last: Bouillon} +pietro-leo: + names: + - {first: Pietro, last: Leo} + - {first: P., last: Leo} +pilar-leon-arauz: + names: + - {first: Pilar, last: León-Araúz} + - {first: Pilar León, last: Araúz} +pinar-oezden-wennerberg: + names: + - {first: Pinar, last: Oezden Wennerberg} + - {first: Pinar, last: Wennerberg} + - {first: Pinar Oezden, last: Wennerberg} +ping-che-yang: + names: + - {first: Ping-Che, last: Yang} + - {first: Ping-che, last: Yang} +ping-wai-wong: + names: + - {first: Ping Wai, last: Wong} + - {first: Percy Ping-Wai, last: Wong} +pirros-tsiakoulis: + names: + - {first: Pirros, last: Tsiakoulis} + - {first: P., last: Tsiakoulis} +plaban-kr-bhowmick: + names: + - {first: Plaban Kr., last: Bhowmick} + - {first: Plaban, last: Bhowmick} +po-chun-chen: + names: + - {first: Po Chun, last: Chen} + - {first: Po-Chun, last: Chen} +po-hsuan-chen: + names: + - {first: Po Hsuan, last: Chen} + - {first: Po-Hsuan, last: Chen} +po-yu-liang: + names: + - {first: Po-Yu, last: Liang} + - {first: Po-yu, last: Liang} +poul-soren-kjaersgaard: + names: + - {first: Poul Søren, last: Kjærsgaard} + - {first: Poul Soren, last: Kjaersgaard} +pradeep-muthukrishnan: + names: + - {first: Pradeep, last: Muthukrishnan} + - {first: Pradeep, last: Muthukrishan} +pradip-dey: + names: + - {first: Pradip, last: Dey} + - {first: Paradip, last: Dey} +prafulla-kumar-choubey: + names: + - {first: Prafulla Kumar, last: Choubey} + - {first: Prafulla, last: Choubey} +prajwol-shrestha: + names: + - {first: Prajwol, last: Shrestha} + - {first: Prajol, last: Shrestha} +pranav-a: + comment: UC Santa Cruz + names: + - {first: Pranav, last: A} + similar: + - pranav-anand +pranav-anand: + comment: Dayta AI + names: + - {first: Pranav, last: Anand} + similar: + - pranav-a +pranav-goel-umd: + comment: UMD + disable_name_matching: true + names: + - {first: Pranav, last: Goel} + orcid: 0000-0003-1037-2687 +pranav-narayanan-venkit: + names: + - {first: Pranav Narayanan, last: Venkit} + - {first: Pranav, last: Venkit} +pranava-swaroop-madhyastha: + names: + - {first: Pranava Swaroop, last: Madhyastha} + - {first: Pranava, last: Madhyastha} +praneeth-m-shishtla: + names: + - {first: Praneeth M., last: Shishtla} + - {first: Praneeth, last: Shishtla} + - {first: Praneeth M, last: Shishtla} +prashanth-mannem: + names: + - {first: Prashanth, last: Mannem} + - {first: Prashanth Reddy, last: Mannem} + - {first: Prashanth, last: Reddy} +prathusha-kameswara-sarma: + names: + - {first: Prathusha, last: Kameswara Sarma} + - {first: Prathusha, last: K Sarma} +pratikkumar-patel: + names: + - {first: Pratikkumar, last: Patel} + - {first: Pratik, last: Patel} +praveen-paritosh: + names: + - {first: Praveen, last: Paritosh} + - {first: Praveen, last: P} +preetam-maloor: + names: + - {first: Preetam, last: Maloor} + - {first: P., last: Maloor} +prem-natarajan: + names: + - {first: Prem, last: Natarajan} + - {first: Premkumar, last: Natarajan} +preslav-nakov: + names: + - {first: Preslav, last: Nakov} + - {first: Preslav I., last: Nakov} +primoz-jakopin: + names: + - {first: Primož, last: Jakopin} + - {first: Primoz, last: Jakopin} +pu-zhao-northeastern: + comment: Northeastern + disable_name_matching: true + names: + - {first: Pu, last: Zhao} + orcid: 0000-0001-5018-2859 +pushpak-bhattacharyya: + names: + - {first: Pushpak, last: Bhattacharyya} + - {first: Pushpak, last: Bhattacharya} +qi-li-ub: + degree: University at Buffalo + disable_name_matching: true + names: + - {first: Qi, last: Li} + orcid: 0000-0002-3136-2157 +qi-quan-huang: + names: + - {first: Qi-quan, last: Huang} + - {first: Qi-Quan, last: Huang} +qian-cao-renmin: + comment: Renmin + disable_name_matching: true + names: + - {first: Qian, last: Cao} + orcid: 0000-0003-3288-1714 +qiaoming-zhu: + names: + - {first: Qiaoming, last: Zhu} + - {first: Qiao-ming, last: Zhu} + - {first: Qiao-Ming, last: Zhu} + - {first: QiaoMing, last: Zhu} +qiguang-lin: + names: + - {first: Qiguang, last: Lin} + - {first: Q., last: Lin} +qin-lu: + names: + - {first: Qin, last: Lu} + - {first: Q., last: Lu} +qingqing-cai: + names: + - {first: Qingqing, last: Cai} + - {first: Qing-qing, last: Cai} +quan-hung-tran: + names: + - {first: Quan Hung, last: Tran} + - {first: Quan, last: Tran} +quang-le-minh: + names: + - {first: Quang, last: Le Minh} + - {first: Minh Quang, last: Le} +quang-thuy-ha: + names: + - {first: Quang Thuy, last: Ha} + - {first: Quang-Thuy, last: Ha} +quoc-khanh-do: + names: + - {first: Quoc Khanh, last: Do} + - {first: Quoc-Khanh, last: Do} +quoc-le: + names: + - {first: Quoc, last: Le} + - {first: Quoc V., last: Le} +quy-nguyen: + names: + - {first: Quy, last: Nguyen} + - {first: Quy T., last: Nguyen} +r-a-smit: + names: + - {first: R. A., last: Smit} + - {first: R.A., last: Smit} +r-mahesh-k-sinha: + names: + - {first: R Mahesh K, last: Sinha} + - {first: R. Mahesh K., last: Sinha} +r-nozohoor-farshi: + names: + - {first: R., last: Nozohoor-Farshi} + - {first: R, last: Nozohoor-Farshi} +r-piotrowski: + names: + - {first: R., last: Piotrowski} + - {first: R. G., last: Piotrowski} +r-thomas-mccoy: + names: + - {first: R. Thomas, last: McCoy} + - {first: Tom, last: McCoy} +rachel-edita-roxas: + names: + - {first: Rachel Edita, last: Roxas} + - {first: Rachel Edita O., last: Roxas} + - {first: Rachel, last: Roxas} +rachele-sprugnoli: + names: + - {first: Rachele, last: Sprugnoli} + - {first: R., last: Sprugnoli} +rada-mihalcea: + names: + - {first: Rada, last: Mihalcea} + - {first: Rada F., last: Mihalcea} +radoslaw-ramocki: + names: + - {first: Radoslaw, last: Ramocki} + - {first: Radosław, last: Ramocki} +radovan-garabik: + names: + - {first: Radovan, last: Garabík} + - {first: Radovan, last: Garabik} +radu-florian: + names: + - {first: Radu, last: Florian} + - {first: R., last: Florian} +rafa-saiz: + names: + - {first: Rafa, last: Saiz} + - {first: R., last: Saiz} +rafael-e-banchs: + names: + - {first: Rafael E., last: Banchs} + - {first: Rafael, last: Banchs} +rafael-michael-karampatsis: + names: + - {first: Rafael - Michael, last: Karampatsis} + - {first: Rafael Michael, last: Karampatsis} +rafael-munoz: + names: + - {first: Rafael, last: Muñoz} + - {first: Rafael, last: Muñoz Guillena} + - {first: Rafael, last: Muñoz-Guillena} + - {first: R., last: Muñoz} +raffaella-bernardi: + names: + - {first: Raffaella, last: Bernardi} + - {first: R., last: Bernardi} +raghava-krishnan: + names: + - {first: Raghava, last: Krishnan} + - {first: R, last: Krishnan} +raghavendra-udupa: + names: + - {first: Raghavendra, last: Udupa} + - {first: Raghavendra Udupa, last: U.} +raghu-pujitha-gade: + names: + - {first: Raghu Pujitha, last: Gade} + - {first: Pujitha, last: Gade} +rajakrishnan-rajkumar: + names: + - {first: Rajakrishnan, last: Rajkumar} + - {first: Rajkumar, last: Rajakrishnan} +rajen-chatterjee: + names: + - {first: Rajen, last: Chatterjee} + - {first: Rajan, last: Chatterjee} +rajesh-bhat: + names: + - {first: Rajesh, last: Bhat} + similar: + - rajesh-bhatt +rajesh-bhatt: + comment: UMass Amherst + names: + - {first: Rajesh, last: Bhatt} + similar: + - rajesh-bhat +rajiv-shah: + names: + - {first: Rajiv, last: Shah} + - {first: Rajiv Ratn, last: Shah} +rajkumar-pujari: + names: + - {first: Rajkumar, last: Pujari} + - {first: Pujari, last: Rajkumar} +rakesh-r-menon: + names: + - {first: Rakesh R, last: Menon} + - {first: Rakesh, last: Menon} +ralf-d-brown: + names: + - {first: Ralf D., last: Brown} + - {first: Ralf, last: Brown} +ralf-schlueter: + names: + - {first: Ralf, last: Schlueter} + - {first: Ralf, last: Schlüter} +ralph-grishman: + names: + - {first: Ralph, last: Grishman} + - {first: R., last: Grishman} +ralph-weischedel: + names: + - {first: Ralph, last: Weischedel} + - {first: Ralph M., last: Weischedel} +raman-chandrasekar: + names: + - {first: Raman, last: Chandrasekar} + - {first: R., last: Chandrasekar} + - {first: Raman, last: Chandraseker} +ramesh-manuvinakurike: + names: + - {first: Ramesh, last: Manuvinakurike} + - {first: Ramesh, last: Manuvirakurike} +ramon-fernandez-astudillo: + names: + - {first: Ramón, last: Fernandez Astudillo} + - {first: Ramón, last: Astudillo} + - {first: Ramón, last: F. Astudillo} + - {first: Ramon, last: F. Astudillo} +ramon-granell: + names: + - {first: Ramon, last: Granell} + - {first: Ramón, last: Granell} +ramon-lopez-cozar: + names: + - {first: Ramón, last: López-Cózar} + - {first: R., last: López-Cózar} +ramona-andreea-turcu: + names: + - {first: Ramona Andreea, last: Turcu} + - {first: Ramona-Andreea, last: Turcu} +ramzi-abbes: + names: + - {first: Ramzi, last: Abbès} + - {first: Ramzi, last: Abbes} +randall-a-helzerman: + names: + - {first: Randall A., last: Helzerman} + - {first: R. A., last: Helzerman} +randy-m-kaplan: + names: + - {first: Randy M., last: Kaplan} + - {first: Randy, last: Kaplan} +ranka-stankovic: + names: + - {first: Ranka, last: Stanković} + - {first: Ranka, last: Stankoviæ} +ranran-haoran-zhang: + comment: Penn State University + names: + - {first: Ranran Haoran, last: Zhang} +raoul-n-smith: + names: + - {first: Raoul N., last: Smith} + - {first: Raoul N, last: Smith} +raphael-hoffmann: + names: + - {first: Raphael, last: Hoffmann} + - {first: Raphael, last: Hoffman} +raphael-rubino: + names: + - {first: Raphael, last: Rubino} + - {first: Raphaël, last: Rubino} +raphael-troncy: + names: + - {first: Raphael, last: Troncy} + - {first: Raphaël, last: Troncy} +raquel-fernandez: + names: + - {first: Raquel, last: Fernández} + - {first: Raquel, last: Fernandez} +raquel-martinez: + names: + - {first: Raquel, last: Martínez} + - {first: Raquel, last: Martinez} +rasoul-samad-zadeh-kaljahi: + names: + - {first: Rasoul, last: Samad Zadeh Kaljahi} + - {first: Rasul, last: Samad Zadeh Kaljahi} +ravikumar-komandur: + names: + - {first: Ravikumar, last: Komandur} + - {first: K, last: Ravikumar} +ravikumar-kondadadi: + names: + - {first: Ravikumar, last: Kondadadi} + - {first: Ravi, last: Kondadadi} + - {first: Ravi Kumar, last: Kondadadi} +raymond-mooney: + names: + - {first: Raymond, last: Mooney} + - {first: Raymond J., last: Mooney} +raymond-ng: + names: + - {first: Raymond, last: Ng} + - {first: Raymond T., last: Ng} +raymond-ptucha: + names: + - {first: Raymond, last: Ptucha} + - {first: Ray, last: Ptucha} +raymond-wong: + names: + - {first: Raymond, last: Wong} + - {first: Raymond K., last: Wong} +razvan-bunescu: + names: + - {first: Razvan, last: Bunescu} + - {first: Razvan C., last: Bunescu} +rebecca-bruce: + names: + - {first: Rebecca, last: Bruce} + - {first: Rebecca F., last: Bruce} +rebecca-j-passonneau: + names: + - {first: Rebecca J., last: Passonneau} + - {first: Rebecca, last: Passonneau} +reinald-kim-amplayo: + names: + - {first: Reinald Kim, last: Amplayo} + - {first: Reinald, last: Kim Amplayo} +reinhard-schaler: + names: + - {first: Reinhard, last: Schäler} + - {first: Reinhard, last: Schaler} +remi-zajac: + names: + - {first: Remi, last: Zajac} + - {first: Rémi, last: Zajac} +remko-scha: + names: + - {first: Remko, last: Scha} + - {first: Remko J. H., last: Scha} + - {first: R. J. H., last: Scha} +remo-raffaelli: + names: + - {first: Remo, last: Raffaelli} + - {first: R., last: Raffaelli} +ren-yuan-lyu: + names: + - {first: Ren-Yuan, last: Lyu} + - {first: Ren-yuan, last: Lyu} +renata-vieira: + names: + - {first: Renata, last: Vieira} + - {first: R., last: Vieira} +renate-henschel: + names: + - {first: Renate, last: Henschel} + - {first: R., last: Henschel} +renato-de-mori: + names: + - {first: Renato, last: De Mori} + - {first: Renato, last: de Mori} +rene-schneider: + names: + - {first: René, last: Schneider} + - {first: Rene, last: Schneider} +rene-van-der-wal: + names: + - {first: Rene, last: van der Wal} + - {first: René, last: van der Wal} + - {first: Rene, last: Van Der Wal} +rezarta-islamaj-dogan: + names: + - {first: Rezarta, last: Islamaj Dogan} + - {first: Rezarta, last: Islamaj Doğan} +ricardo-de-cordoba: + names: + - {first: Ricardo, last: de Córdoba} + - {first: Ricardo, last: de Cordoba} +ricardo-ribeiro: + names: + - {first: Ricardo, last: Ribeiro} + - {first: Ricardo Daniel, last: Ribeiro} +riccardo-del-gratta: + names: + - {first: Riccardo, last: Del Gratta} + - {first: Riccardo, last: del Gratta} +richard-a-hudson: + names: + - {first: Richard A., last: Hudson} + - {first: Richard, last: Hudson} +richard-a-sharman: + names: + - {first: Richard A., last: Sharman} + - {first: R.A., last: Sharman} + - {first: R. A., last: Sharman} +richard-c-wang: + names: + - {first: Richard C., last: Wang} + - {first: Richard, last: Wang} +richard-d-boyce: + names: + - {first: Richard D., last: Boyce} + - {first: Richard, last: Boyce} +richard-e-leibbrandt: + names: + - {first: Richard E, last: Leibbrandt} + - {first: Richard E., last: Leibbrandt} +richard-evans: + names: + - {first: Richard, last: Evans} + - {first: R., last: Evans} +richard-f-e-sutcliffe: + names: + - {first: Richard F. E., last: Sutcliffe} + - {first: Richard F.E., last: Sutcliffe} +richard-farkas: + names: + - {first: Richárd, last: Farkas} + - {first: Richard, last: Farkas} +richard-fritzson: + names: + - {first: Richard, last: Fritzson} + - {first: Rich, last: Fritzson} +richard-g-morgan: + names: + - {first: Richard G., last: Morgan} + - {first: Richard, last: Morgan} +richard-kittredge: + names: + - {first: Richard, last: Kittredge} + - {first: R., last: Kittredge} +richard-l-lewis: + names: + - {first: Richard L., last: Lewis} + - {first: Richard, last: Lewis} +richard-m-stern: + names: + - {first: Richard M., last: Stern} + - {first: Richard, last: Stern} +richard-schwartz: + names: + - {first: Richard, last: Schwartz} + - {first: Rich, last: Schwartz} + - {first: R., last: Schwartz} +richard-sproat: + names: + - {first: Richard, last: Sproat} + - {first: Richard W., last: Sproat} +richard-tzong-han-tsai: + names: + - {first: Richard Tzong-Han, last: Tsai} + - {first: Tzong-Han, last: Tsai} + - {first: Tzong-Han Richard, last: Tsai} + - {first: Richard Tzong-han, last: Tsai} +richard-zuber: + names: + - {first: Richard, last: Zuber} + - {first: R., last: Zuber} +richmond-h-thomason: + names: + - {first: Richmond H., last: Thomason} + - {first: Richmond, last: Thomason} +rie-johnson: + names: + - {first: Rie, last: Johnson} + - {first: Rie, last: Ando} + - {first: Rie Kubota, last: Ando} +rihards-kalnins: + names: + - {first: Rihards, last: Kalniņš} + - {first: Rihards, last: Kalnins} +rihards-krislauks: + names: + - {first: Rihards, last: Krišlauks} + - {first: Rihards, last: Krislauks} +rik-koncel-kedziorski: + names: + - {first: Rik, last: Koncel-Kedziorski} + - {first: R., last: Koncel-Kedziorski} +rila-mandala: + names: + - {first: Rila, last: Mandala} + - {first: Mandala, last: Rila} +rita-nuebel: + names: + - {first: Rita, last: Nuebel} + - {first: Rita, last: Nüebel} +ritesh-shah: + names: + - {first: Ritesh, last: Shah} + - {first: Ritesh M., last: Shah} +riyaz-ahmad-bhat: + names: + - {first: Riyaz Ahmad, last: Bhat} + - {first: Riyaz A., last: Bhat} +riza-theresa-batista-navarro: + names: + - {first: Riza Theresa, last: Batista-Navarro} + - {first: Riza, last: Batista-Navarro} +robert-bobrow: + names: + - {first: Robert, last: Bobrow} + - {first: Robert J., last: Bobrow} + - {first: Rusty, last: Bobrow} + - {first: R., last: Bobrow} +robert-c-berwick: + names: + - {first: Robert C., last: Berwick} + - {first: Robert, last: Berwick} + - {first: Robert Cregar, last: Berwick} +robert-c-moore: + names: + - {first: Robert C., last: Moore} + - {first: Robert, last: Moore} + - {first: R. C., last: Moore} +robert-e-mercer: + comment: Univ. of Western Ontario + names: + - {first: Robert E., last: Mercer} + - {first: Robert, last: Mercer} + similar: + - robert-l-mercer +robert-frederking: + names: + - {first: Robert, last: Frederking} + - {first: Robert E., last: Frederking} +robert-gaizauskas: + names: + - {first: Robert, last: Gaizauskas} + - {first: Robert J., last: Gaizauskas} + - {first: Rob, last: Gaizauskas} + - {first: R., last: Gaizauskas} +robert-granville: + names: + - {first: Robert, last: Granville} + - {first: Robert Alan, last: Granville} +robert-i-damper: + names: + - {first: Robert I., last: Damper} + - {first: R.I., last: Damper} +robert-ingria: + names: + - {first: Robert, last: Ingria} + - {first: R., last: Ingria} +robert-j-hendley: + names: + - {first: Robert J., last: Hendley} + - {first: Robert, last: Hendley} +robert-j-kuhns: + names: + - {first: Robert J., last: Kuhns} + - {first: Robert, last: Kuhns} +robert-l-mercer: + comment: IBM + names: + - {first: Robert L., last: Mercer} + - {first: R., last: Mercer} + - {first: R. L., last: Mercer} + - {first: Robert, last: Mercer} + similar: + - robert-e-mercer +robert-m-haralick: + names: + - {first: Robert M., last: Haralick} + - {first: Robert, last: Haralick} +robert-malouf: + names: + - {first: Robert, last: Malouf} + - {first: Rob, last: Malouf} +robert-milne: + names: + - {first: Robert, last: Milne} + - {first: Rob, last: Milne} +robert-p-futrelle: + names: + - {first: Robert P., last: Futrelle} + - {first: Robert, last: Futrelle} +robert-phillips: + names: + - {first: Robert, last: Phillips} + - {first: Rob, last: Phillips} +robert-s-belvin: + names: + - {first: Robert S., last: Belvin} + - {first: Robert, last: Belvin} + - {first: Robert S., last: Melvin} +robert-s-swier: + names: + - {first: Robert S., last: Swier} + - {first: Robert, last: Swier} +robert-stewart: + names: + - {first: Robert, last: Stewart} + - {first: Rob, last: Stewart} +robert-t-kasper: + names: + - {first: Robert T., last: Kasper} + - {first: Robert, last: Kasper} +robert-t-schultz: + names: + - {first: Robert T., last: Schultz} + - {first: Robert, last: Schultz} +robert-w-morris: + names: + - {first: Robert W., last: Morris} + - {first: Robert, last: Morris} +robert-w-p-luk: + names: + - {first: Robert W.P., last: Luk} + - {first: R.W.P., last: Luk} +robert-weide: + names: + - {first: Robert, last: Weide} + - {first: R., last: Weide} +robert-wing-pong-luk: + names: + - {first: Robert Wing Pong, last: Luk} + - {first: Wing-Pong, last: Luk} +roberta-catizone: + names: + - {first: Roberta, last: Catizone} + - {first: R., last: Catizone} +roberta-h-merchant: + names: + - {first: Roberta H., last: Merchant} + - {first: Roberta, last: Merchant} +roberto-barra-chicote: + names: + - {first: Roberto, last: Barra-Chicote} + - {first: Roberto Barra, last: Chicote} +roberto-basili: + names: + - {first: Roberto, last: Basili} + - {first: R., last: Basili} +roberto-garigliano: + names: + - {first: Roberto, last: Garigliano} + - {first: R., last: Garigliano} +roberto-pieraccini: + names: + - {first: Roberto, last: Pieraccini} + - {first: R., last: Pieraccini} +robin-l-hill: + names: + - {first: Robin L., last: Hill} + - {first: Robin, last: Hill} +roddy-cowie: + names: + - {first: Roddy, last: Cowie} + - {first: R., last: Cowie} +roderick-l-johnson: + names: + - {first: Roderick L., last: Johnson} + - {first: R.L., last: Johnson} + - {first: R., last: Johnson} +rodger-kibble: + names: + - {first: Rodger, last: Kibble} + - {first: R., last: Kibble} +rodney-nielsen: + names: + - {first: Rodney, last: Nielsen} + - {first: Rodney D., last: Nielsen} +rodolfo-delmonte: + names: + - {first: Rodolfo, last: Delmonte} + - {first: R., last: Delmonte} +rodrigo-agerri: + names: + - {first: Rodrigo, last: Agerri} + - {first: R., last: Agerri} +roger-c-schank: + names: + - {first: Roger C., last: Schank} + - {first: Roger, last: Schank} +roger-evans: + names: + - {first: Roger, last: Evans} + - {first: R, last: Evans} +roger-k-moore: + names: + - {first: Roger K., last: Moore} + - {first: Roger, last: Moore} +roger-levy: + names: + - {first: Roger, last: Levy} + - {first: Roger P., last: Levy} +rohini-k-srihari: + names: + - {first: Rohini K., last: Srihari} + - {first: Rohini, last: Srihari} + - {first: K. Rohini, last: Srihari} +rohit-kate: + names: + - {first: Rohit, last: Kate} + - {first: Rohit J., last: Kate} +roland-r-hausser: + names: + - {first: Roland R., last: Hausser} + - {first: Roland, last: Hausser} +romina-altamirano: + names: + - {first: Romina, last: Altamirano} + - {first: Ivana Romina, last: Altamirano} +romuald-skiba: + names: + - {first: Romuald, last: Skiba} + - {first: R., last: Skiba} +ron-daniel-jr: + names: + - {first: Ron, last: 'Daniel, Jr.'} + - {first: Ron, last: Daniel} + - {first: Ron, last: Daniel Jr.} +ronald-cole: + names: + - {first: Ronald, last: Cole} + - {first: Ron, last: Cole} + - {first: Ronald A., last: Cole} + - {first: R., last: Cole} +ronald-m-kaplan: + names: + - {first: Ronald M., last: Kaplan} + - {first: Ronald, last: Kaplan} + - {first: Ron, last: Kaplan} +ronald-rosenfeld: + names: + - {first: Ronald, last: Rosenfeld} + - {first: R., last: Rosenfeld} +ronaldo-teixeira-martins: + names: + - {first: Ronaldo Teixeira, last: Martins} + - {first: Ronaldo, last: Martins} +ronan-g-reilly: + names: + - {first: Ronan G., last: Reilly} + - {first: Ronan, last: Reilly} +ronan-mac-an-tsaoir: + names: + - {first: Rónan, last: Mac an tSaoir} + - {first: Ronan, last: Mac an tSaoir} +roni-ben-aharon: + names: + - {first: Roni, last: Ben Aharon} + - {first: Roni, last: Ben-Aharon} +ronnie-w-smith: + names: + - {first: Ronnie W., last: Smith} + - {first: Ronnie, last: Smith} +roque-lopez-condori: + names: + - {first: Roque, last: Lopez Condori} + - {first: Roque, last: López} +rosa-del-gaudio: + names: + - {first: Rosa, last: Del Gaudio} + - {first: Rosa, last: Gaudio} +rose-catherine-kanjirathinkal: + names: + - {first: Rose Catherine, last: Kanjirathinkal} + - {first: Rose, last: Catherine} +rosemary-stevenson: + names: + - {first: Rosemary, last: Stevenson} + - {first: R., last: Stevenson} +roser-sauri: + names: + - {first: Roser, last: Saurí} + - {first: Roser, last: Sauri} +roxana-girju: + names: + - {first: Roxana, last: Girju} + - {first: Roxana, last: Gîrju} +roxane-segers: + names: + - {first: Roxane, last: Segers} + - {first: Roxanne, last: Segers} +roy-tromble: + names: + - {first: Roy, last: Tromble} + - {first: Roy W., last: Tromble} +ruben-a-proano: + names: + - {first: Ruben A., last: Proano} + - {first: Rubén, last: Proaño} + - {first: Rubén A., last: Proaño} +ruben-izquierdo: + names: + - {first: Rubén, last: Izquierdo} + - {first: Ruben, last: Izquierdo Bevia} + - {first: Ruben, last: Izquierdo} +ruben-san-segundo: + names: + - {first: Ruben, last: San-Segundo} + - {first: Rubén, last: San-Segundo} +ruben-urizar: + names: + - {first: Ruben, last: Urizar} + - {first: Rubén, last: Urizar} + - {first: R., last: Urizar} +ruhi-sarikaya: + names: + - {first: Ruhi, last: Sarikaya} + - {first: Ruhi, last: Srikaya} +ruket-cakici: + names: + - {first: Ruket, last: Çakıcı} + - {first: Ruket, last: Cakici} + - {first: Ruken, last: Cakici} + - {first: Ruken, last: Çakıcı} +ruli-manurung: + names: + - {first: Ruli, last: Manurung} + - {first: R., last: Manurung} +rune-saetre: + names: + - {first: Rune, last: Sætre} + - {first: Rune, last: Saetre} +ruslan-mitkov: + names: + - {first: Ruslan, last: Mitkov} + - {first: R., last: Mitkov} +russell-beckley: + names: + - {first: Russell, last: Beckley} + - {first: Russ, last: Beckley} +rutu-mulkar-mehta: + names: + - {first: Rutu, last: Mulkar-Mehta} + - {first: Rutu, last: Mulkar} +ruy-luiz-milidiu: + names: + - {first: Ruy Luiz, last: Milidiú} + - {first: Ruy, last: Milidiú} +ruzena-bajcsy: + names: + - {first: Ruzena, last: Bajcsy} + - {first: R., last: Bajcsy} +ryan-boyd: + degree: University of Texas at Austin + names: + - {first: Ryan, last: Boyd} + - {first: Ryan L., last: Boyd} + orcid: 0000-0002-1876-6050 +ryan-roth: + names: + - {first: Ryan, last: Roth} + - {first: Ryan, last: M. Roth} +ryen-white: + names: + - {first: Ryen, last: White} + - {first: Ryan, last: White} +ryochi-sugimura: + names: + - {first: Ryochi, last: Sugimura} + - {first: R., last: Sugimura} +ryosuke-takahashi-tohoku: + comment: Tohoku + disable_name_matching: true + names: + - {first: Ryosuke, last: Takahashi} + orcid: 0009-0002-9887-2781 +s-r-k-branavan: + names: + - {first: S.R.K., last: Branavan} + - {first: S. R. K., last: Branavan} +s-rajendran: + names: + - {first: S., last: Rajendran} + - {first: Rajendran, last: S} +s-v-n-vishwanathan: + names: + - {first: S. V. N., last: Vishwanathan} + - {first: S.V.N., last: Vishwanathan} +sabine-schulte-im-walde: + names: + - {first: Sabine, last: Schulte im Walde} + - {first: Sabine, last: Schulte Im Walde} + - {first: Sabine, last: Schulte in Walde} +sabrina-j-mielke: + names: + - {first: Sabrina J., last: Mielke} + - {first: Sabrina, last: Mielke} +sachindra-joshi: + names: + - {first: Sachindra, last: Joshi} + - {first: Sachin, last: Joshi} +sadaoki-furui: + names: + - {first: Sadaoki, last: Furui} + - {first: S., last: Furui} +sadid-a-hasan: + names: + - {first: Sadid A., last: Hasan} + - {first: Sadid, last: Hasan} +said-ouatik-el-alaoui: + names: + - {first: Said Ouatik, last: El Alaoui} + - {first: Said, last: Ouatik El Alaoui} +saif-mohammad: + names: + - {first: Saif, last: Mohammad} + - {first: Saif M., last: Mohammad} +saim-shin: + names: + - {first: Saim, last: Shin} + - {first: Sa-Im, last: Shin} +sainik-mahata: + names: + - {first: Sainik, last: Mahata} + - {first: Sainik Kumar, last: Mahata} +salah-ait-mokhtar: + names: + - {first: Salah, last: Ait-Mokhtar} + - {first: Salah, last: Aït-Mokhtar} +salah-haamid: + names: + - {first: Salah, last: Haamid} + - {first: S., last: Haamid} +saliha-azzam: + names: + - {first: Saliha, last: Azzam} + - {first: S., last: Azzam} +salim-roukos: + names: + - {first: Salim, last: Roukos} + - {first: S., last: Roukos} +salud-maria-jimenez-zafra: + names: + - {first: Salud María, last: Jiménez-Zafra} + - {first: Salud M., last: Jiménez-Zafra} + - {first: Salud M., last: Jiménez Zafra} +salvador-espana: + names: + - {first: Salvador, last: España} + - {first: S., last: España} +sameer-pradhan: + names: + - {first: Sameer, last: Pradhan} + - {first: Sameer S., last: Pradhan} + - {first: S., last: Pradhan} +samhaa-r-el-beltagy: + names: + - {first: Samhaa R., last: El-Beltagy} + - {first: Samhaa, last: El-Beltagy} +samik-ghosh: + names: + - {first: Samik, last: Ghosh} + - {first: Samik, last: Gosh} +samir-abdelrahman: + names: + - {first: Samir, last: AbdelRahman} + - {first: Samir, last: Abdelrahman} +samira-ellouze: + names: + - {first: Samira, last: Ellouze} + - {first: Samira Walha, last: Ellouze} +samuel-bayer: + names: + - {first: Samuel, last: Bayer} + - {first: Sam, last: Bayer} +samuel-bowman: + names: + - {first: Samuel, last: Bowman} + - {first: Samuel R., last: Bowman} + - {first: Sam, last: Bowman} +samuel-w-k-chan: + names: + - {first: Samuel W. K., last: Chan} + - {first: Samuel W.K., last: Chan} + - {first: S. W. K., last: Chan} +sanda-harabagiu: + names: + - {first: Sanda, last: Harabagiu} + - {first: Sanda M., last: Harabagiu} +sandipan-dandapat: + names: + - {first: Sandipan, last: Dandapat} + - {first: Sandipan, last: Dandpat} +sandra-aluisio: + names: + - {first: Sandra, last: Aluísio} + - {first: Sandra Maria, last: Aluísio} + - {first: Sandra, last: Aluisio} + - {first: Sandra M., last: Aluísio} +sandra-carberry: + names: + - {first: Sandra, last: Carberry} + - {first: M. Sandra, last: Carberry} +sandra-collovini: + names: + - {first: Sandra, last: Collovini} + - {first: S., last: Collovini} +sandra-kubler: + names: + - {first: Sandra, last: Kübler} + - {first: Sandra, last: Kubler} + - {first: Sandra, last: Kuebler} +sandra-milena-castellanos-paez: + names: + - {first: Sandra, last: Milena Castellanos Páez} + - {first: Sandra Castellanos, last: Páez} +sang-jo-lee: + names: + - {first: Sang-Jo, last: Lee} + - {first: Sang Jo, last: Lee} +sang-kyu-park: + names: + - {first: Sang-Kyu, last: Park} + - {first: Sangkyu, last: Park} +sangkeun-jung: + names: + - {first: Sangkeun, last: Jung} + - {first: SangKeun, last: Jung} +sanja-stajner: + names: + - {first: Sanja, last: Štajner} + - {first: Sanja, last: Stajner} +sanjeev-khudanpur: + names: + - {first: Sanjeev, last: Khudanpur} + - {first: S., last: Khudanpur} +sankar-kuppan: + names: + - {first: Sankar, last: Kuppan} + - {first: Sankar, last: K} +sanket-vaibhav-mehta: + names: + - {first: Sanket Vaibhav, last: Mehta} + - {first: Vaibhav, last: Mehta} +santiago-cortes-vaillo: + names: + - {first: Santiago, last: Cortés Vaíllo} + - {first: Santiago, last: Cortes} + - {first: Santiago Cortés, last: Vaíllo} +saptarshi-ghosh-cincinnati: + degree: University of Cincinnati + disable_name_matching: true + names: + - {first: Saptarshi, last: Ghosh} + orcid: 0009-0006-9472-7121 +sarah-e-schwarm: + names: + - {first: Sarah E., last: Schwarm} + - {first: Sarah, last: Schwarm} +sarah-masud-preum: + names: + - {first: Sarah Masud, last: Preum} + - {first: Sarah, last: Preum} + - {first: Sarah M., last: Preum} + orcid: 0000-0002-7771-8323 +sarah-moeller: + names: + - {first: Sarah, last: Moeller} + - {first: Sarah R., last: Moeller} +sarah-taylor: + names: + - {first: Sarah, last: Taylor} + - {first: Sarah M., last: Taylor} +sarah-vieweg: + names: + - {first: Sarah, last: Vieweg} + - {first: Sarah E., last: Vieweg} +sari-saba-sadiya: + names: + - {first: Sari, last: Saba-Sadiya} + - {first: Sari, last: Sadiya} +sarkis-abrilian: + names: + - {first: Sarkis, last: Abrilian} + - {first: S., last: Abrilian} +sasa-hasan: + names: + - {first: Saša, last: Hasan} + - {first: Sasa, last: Hasan} +sasa-petrovic: + names: + - {first: Saša, last: Petrović} + - {first: Sasa, last: Petrovic} +sashka-t-davis: + names: + - {first: Sashka T., last: Davis} + - {first: Sashka, last: Davis} +sasikumar-m: + names: + - {first: Sasikumar, last: M} + - {first: Sasikumar, last: M.} +sathish-reddy-indurthi: + names: + - {first: Sathish Reddy, last: Indurthi} + - {first: Sathish, last: Reddy} + - {first: Sathish, last: Indurthi} +sathiya-keerthi-selvaraj: + names: + - {first: Sathiya Keerthi, last: Selvaraj} + - {first: Sathiya, last: Keerthi} +satoshi-sakai: + names: + - {first: Satoshi, last: Sakai} + - {first: S., last: Sakai} +satoshi-sato: + names: + - {first: Satoshi, last: Sato} + - {first: S., last: Sato} +satoshi-shirai: + names: + - {first: Satoshi, last: Shirai} + - {first: Satosi, last: Shirai} +saul-leon: + names: + - {first: Saul, last: León} + - {first: Saul, last: León Silverio} + - {first: Saúl, last: León} +scott-a-hale: + names: + - {first: Scott A., last: Hale} + - {first: Scott, last: Hale} +scott-c-stoness: + names: + - {first: Scott C., last: Stoness} + - {first: Scott, last: Stoness} +scott-cyphers: + names: + - {first: Scott, last: Cyphers} + - {first: D. Scott, last: Cyphers} +scott-f-kiesling: + names: + - {first: Scott F., last: Kiesling} + - {first: Scott, last: Kiesling} +scott-s-l-piao: + names: + - {first: Scott S.L., last: Piao} + - {first: Scott, last: Piao} + - {first: Scott S. L., last: Piao} + - {first: S. L., last: Piao} +se-young-park: + names: + - {first: Se-Young, last: Park} + - {first: Se Young, last: Park} +seamus-lawless: + names: + - {first: Seamus, last: Lawless} + - {first: Séamus, last: Lawless} +sean-a-fulop: + names: + - {first: Sean A., last: Fulop} + - {first: Sean, last: Fulop} +sean-boisen: + names: + - {first: Sean, last: Boisen} + - {first: S., last: Boisen} +sebastian-drude: + names: + - {first: Sebastian, last: Drude} + - {first: S., last: Drude} +sebastian-g-m-handschke: + names: + - {first: Sebastian G. M., last: Händschke} + - {first: Sebastian G.M., last: Händschke} +sebastian-goeser: + names: + - {first: Sebastian, last: Goeser} + - {first: S., last: Goeser} +sebastian-pado: + names: + - {first: Sebastian, last: Padó} + - {first: Sebastian, last: Pado} +sebastian-pena-saldarriaga: + names: + - {first: Sebastián Peña, last: Saldarriaga} + - {first: Peña, last: Saldarriaga} + - {first: Sebastian, last: Peña Saldarriaga} +sebastian-stuker: + names: + - {first: Sebastian, last: Stüker} + - {first: Sebastian, last: Stueker} +sebastien-flavier: + names: + - {first: Sébastien, last: Flavier} + - {first: Sebastien, last: Flavier} +sebastien-fournier: + names: + - {first: Sébastien, last: Fournier} + - {first: Sebastien, last: Fournier} +sebastien-le-maguer: + names: + - {first: Sébastien, last: Le Maguer} + - {first: Sébastien Le, last: Maguer} +see-kiong-ng: + names: + - {first: See Kiong, last: Ng} + - {first: See-Kiong, last: Ng} +seiichi-nakagawa: + names: + - {first: Seiichi, last: Nakagawa} + - {first: S., last: Nakagawa} +selcuk-kopru: + names: + - {first: Selcuk, last: Kopru} + - {first: Selçuk, last: Köprü} +seng-cho-t-chou: + names: + - {first: Seng-Cho T., last: Chou} + - {first: Seng-cho T., last: Chou} +seok-bae-jang: + names: + - {first: Seok Bae, last: Jang} + - {first: Seok B., last: Jang} +septina-dian-larasati: + names: + - {first: Septina Dian, last: Larasati} + - {first: Septina, last: Larasati} +serge-a-yablonsky: + names: + - {first: Serge A., last: Yablonsky} + - {first: Serge, last: Yablonsky} +sergei-nirenburg: + names: + - {first: Sergei, last: Nirenburg} + - {first: Sergei, last: Nirenberg} + - {first: S., last: Nirenburg} +sergey-o-kuznetsov: + names: + - {first: Sergey O., last: Kuznetsov} + - {first: Sergei O., last: Kuznetsov} +sergey-v-pakhomov: + names: + - {first: Sergey V., last: Pakhomov} + - {first: Sergey, last: Pakhomov} +sergio-barrachina: + names: + - {first: Sergio, last: Barrachina} + - {first: S., last: Barrachina} +sergio-jose-rodriguez-mendez: + comment: NYCU + names: + - {first: Sergio José, last: Rodríguez Méndez} + - {first: Sergio J., last: Rodriguez Mendez} + orcid: 0000-0001-7203-8399 +sergio-ortiz-rojas: + names: + - {first: Sergio, last: Ortiz Rojas} + - {first: Sergio, last: Ortiz-Rojas} + - {first: Sergio Ortiz, last: Rojas} +serguei-pakhomov: + names: + - {first: Serguei, last: Pakhomov} + - {first: Serguei V., last: Pakhomov} +sethserey-sam: + names: + - {first: Sethserey, last: Sam*’} + - {first: Sethserey, last: Sam} +seungho-cha: + names: + - {first: Seungho, last: Cha} + - {first: S., last: Cha} +shachi-h-kumar: + names: + - {first: Shachi, last: H. Kumar} + - {first: Shachi H, last: Kumar} +shafiq-joty: + names: + - {first: Shafiq, last: Joty} + - {first: Shafiq R., last: Joty} +shannon-l-spruit: + names: + - {first: Shannon L., last: Spruit} + - {first: Shannon, last: Spruit} +sharada-prasanna-mohanty: + names: + - {first: Sharada Prasanna, last: Mohanty} + - {first: Sharada, last: Mohanty} +sharath-chandra-guntuku: + names: + - {first: Sharath, last: Chandra Guntuku} + - {first: Sharath Chandra, last: Guntuku} +shari-landes: + names: + - {first: Shari, last: Landes} + - {first: Shari, last: Land} +sharon-goldwater: + names: + - {first: Sharon, last: Goldwater} + - {first: Sharon J., last: Goldwater} +sharon-oviatt: + names: + - {first: Sharon, last: Oviatt} + - {first: Sharon L., last: Oviatt} +sharon-small: + names: + - {first: Sharon, last: Small} + - {first: Sharon, last: Gower Small} +shashank-gupta-uiuc: + disable_name_matching: true + names: + - {first: Shashank, last: Gupta} + orcid: 0000-0002-3683-3739 +shay-b-cohen: + names: + - {first: Shay B., last: Cohen} + - {first: Shay, last: Cohen} +sheila-c-m-de-sousa: + names: + - {first: Sheila, last: C. M. de Sousa} + - {first: Sheila C.M., last: de Sousa} +sheila-r-glasbey: + names: + - {first: Sheila R., last: Glasbey} + - {first: Sheila, last: Glasbey} + - {first: S.R., last: Glasbey} +shelley-ching-yu-hsieh: + names: + - {first: Shelley Ching-Yu, last: Hsieh} + - {first: Ching-yu, last: Hsieh} + - {first: Shelley Ching-yu, last: Hsieh} + - {first: Ching-yu Shelley, last: Hsieh} +sheng-he-sun: + names: + - {first: Sheng-he, last: Sun} + - {first: Sheng-He, last: Sun} +shengjie-li: + comment: University of Texas at Dallas + names: + - {first: Shengjie, last: Li} + orcid: 0000-0002-5442-5464 +shengjie-li-peking: + comment: Peking University + names: + - {first: Shengjie, last: Li} + orcid: 0000-0003-3489-9125 +sherri-condon: + names: + - {first: Sherri, last: Condon} + - {first: Sherri L., last: Condon} +shervin-malmasi: + names: + - {first: Shervin, last: Malmasi} + - {first: Shevin, last: Malmasi} +sheryl-young: + names: + - {first: Sheryl, last: Young} + - {first: Sheryl R., last: Young} +shih-fu-chang: + names: + - {first: Shih-Fu, last: Chang} + - {first: Shih-fu, last: Chang} +shih-min-li: + names: + - {first: Shih-Min, last: Li} + - {first: Shi-Min, last: Li} +shih-ping-wang: + names: + - {first: Shih-ping, last: Wang} + - {first: Shih-Ping, last: Wang} +shih-ting-huang: + names: + - {first: Shih-Ting, last: Huang} + - {first: Shih-ting, last: Huang} + - {first: Shi-Ting, last: Huang} +shikhar-kumar-sarma-gu: + degree: Gauhati University + disable_name_matching: true + names: + - {first: Shikhar Kr., last: Sarma} + - {first: Shikhar, last: Sarma} + - {first: Shikhar, last: Sharma} + - {first: Shikhar Kr, last: Sarma} + - {first: Shikhar, last: Kumar Sarma} + - {first: Shikhar Kumar, last: Sarma} + orcid: 0000-0002-9495-1901 +shin-ichiro-kamei: + names: + - {first: Shin-ichiro, last: Kamei} + - {first: Shinichiro, last: Kamei} +shin-ya-amano: + names: + - {first: Shin-ya, last: Amano} + - {first: Sin-ya, last: Amano} +shinichi-ando: + names: + - {first: Shinichi, last: Ando} + - {first: Sinichi, last: Ando} + - {first: Shin-ichi, last: Ando} + - {first: Shin-Ichi, last: Ando} +shinichi-doi: + names: + - {first: Shinichi, last: Doi} + - {first: Shin’ichi, last: Doi} +shirley-dita: + names: + - {first: Shirley, last: Dita} + - {first: Shirley N., last: Dita} +shivashankar-subramanian: + names: + - {first: Shivashankar, last: Subramanian} + - {first: S., last: Shivashankar} +shixi-fan: + names: + - {first: Shixi, last: Fan} + - {first: ShiXi, last: Fan} +shoichi-matsunaga: + names: + - {first: Shoichi, last: Matsunaga} + - {first: Sho-ichi, last: Matsunaga} +shoichi-yokoyama: + names: + - {first: Shoichi, last: Yokoyama} + - {first: S., last: Yokoyama} +shou-de-lin: + names: + - {first: Shou-De, last: Lin} + - {first: Shou-de, last: Lin} +shozo-makino: + names: + - {first: Shozo, last: Makino} + - {first: S., last: Makino} +shrikanth-narayanan: + names: + - {first: Shrikanth, last: Narayanan} + - {first: Shri, last: Narayanan} + - {first: Shrikanth S., last: Narayanan} +shu-kai-hsieh: + names: + - {first: Shu-Kai, last: Hsieh} + - {first: Shu-kai, last: Hsieh} + - {first: ShuKai, last: Hsieh} +shu-nakazato: + names: + - {first: Shu, last: Nakazato} + - {first: S., last: Nakazato} +shu-yang-ubc: + comment: University of British Columbia + disable_name_matching: true + names: + - {first: Shu, last: Yang} + orcid: 0000-0002-8507-7191 +shu-yen-lin: + names: + - {first: Shu-Yen, last: Lin} + - {first: Shu-yen, last: Lin} +shuan-fan-huang: + names: + - {first: Shuan-fan, last: Huang} + - {first: Shuan-Fan, last: Huang} +shuanglong-li: + names: + - {first: Shuanglong, last: Li} + - {first: ShuangLong, last: Li} +shuichi-itahashi: + names: + - {first: Shuichi, last: Itahashi} + - {first: Shuich, last: Itahashi} +shun-der-chen: + names: + - {first: Shun-Der, last: Chen} + - {first: Shun-Der, last: Cheng} +shun-ya-fukunaga: + names: + - {first: Shun-ya, last: Fukunaga} + - {first: Shunya, last: Fukunaga} +shunsuke-uemura: + names: + - {first: Shunsuke, last: Uemura} + - {first: Syunsuke, last: Uemura} +shunya-iwasawa: + names: + - {first: Shun’ya, last: Iwasawa} + - {first: Shunya, last: Iwasawa} +shyam-sundar-agrawal: + names: + - {first: Shyam Sundar, last: Agrawal} + - {first: Shyam, last: Agrawal} +sia-kolkovska: + names: + - {first: Sia, last: Kolkovska} + - {first: Siya, last: Kolkovska} +siaw-fong-chung: + names: + - {first: Siaw-Fong, last: Chung} + - {first: Siaw Fong, last: Chung} +sida-i-wang: + names: + - {first: Sida I., last: Wang} + - {first: Sida, last: Wang} +siddharth-jain: + names: + - {first: Siddharth, last: Jain} + - {first: Siddhanth, last: Jain} +silvia-hansen-schirra: + names: + - {first: Silvia, last: Hansen-Schirra} + - {first: Silvia, last: Hansen} +silvia-moraes: + names: + - {first: Sílvia, last: Moraes} + - {first: Silvia, last: Moraes} +silvia-necsulescu: + names: + - {first: Silvia, last: Necşulescu} + - {first: Silvia, last: Necsulescu} +silvia-vazquez: + names: + - {first: Silvia, last: Vázquez} + - {first: Silvia Rodríguez, last: Vázquez} +silvio-cordeiro: + names: + - {first: Silvio, last: Cordeiro} + - {first: Silvio Ricardo, last: Cordeiro} +silviu-cucerzan: + names: + - {first: Silviu, last: Cucerzan} + - {first: Silviu-Petru, last: Cucerzan} +simeon-junker: + names: + - {first: Simeon, last: Junker} + - {first: Simeon, last: Schüz} +simon-benigeri: + names: + - {first: Simon, last: Benigeri} + - {first: Simon, last: Ben Igeri} +simon-corston-oliver: + names: + - {first: Simon, last: Corston-Oliver} + - {first: Simon H., last: Corston-Oliver} +simon-dobrisek: + names: + - {first: Simon, last: Dobrisek} + - {first: Simon, last: Dobrišek} +simon-suster: + names: + - {first: Simon, last: Suster} + - {first: Simon, last: Šuster} +simone-paolo-ponzetto: + names: + - {first: Simone Paolo, last: Ponzetto} + - {first: Simone P., last: Ponzetto} + - {first: Simone, last: Ponzetto} +simonetta-montemagni: + names: + - {first: Simonetta, last: Montemagni} + - {first: S., last: Montemagni} +sina-zarriess: + names: + - {first: Sina, last: Zarrieß} + - {first: Sina, last: Zarriess} +sisay-fissaha-adafre: + names: + - {first: Sisay, last: Fissaha Adafre} + - {first: Sisay, last: Fissaha} + - {first: Sisay Fissaha, last: Adafre} +sivaji-bandyopadhyay: + names: + - {first: Sivaji, last: Bandyopadhyay} + - {first: Sivaji, last: Bandopadhyay} + - {first: Sivaju, last: Bandyopadhyay} + - {first: Sivaji, last: B} +sjur-moshagen: + names: + - {first: Sjur, last: Moshagen} + - {first: Sjur Nørstebø, last: Moshagen} + - {first: Sjur N., last: Moshagen} +sobha-l: + names: + - {first: Sobha, last: L} + - {first: L., last: Sobha} +sobha-lalitha-devi: + names: + - {first: Sobha, last: Lalitha Devi} + - {first: Lalitha Devi, last: Sobha} + - {first: Sobha Lalitha, last: Devi} +sofia-gustafson-capkova: + names: + - {first: Sofia, last: Gustafson-Capková} + - {first: Sofia, last: Gustafson Capková} +sofia-stronbergsson: + names: + - {first: Sofia, last: Strönbergsson} + - {first: Sofia, last: Strömbergsson} +sofie-johansson-kokkinakis: + names: + - {first: Sofie Johansson, last: Kokkinakis} + - {first: Sofie, last: Johansson Kokkinakis} +solange-oliveira-rezende: + names: + - {first: Solange, last: Oliveira Rezende} + - {first: Solange, last: Rezende} +solomon-teferra-abate: + names: + - {first: Solomon Teferra, last: Abate} + - {first: Solomon, last: Teferra Abate} + - {first: Solomon, last: Teferra} +somayajulu-sripada: + names: + - {first: Somayajulu, last: Sripada} + - {first: Somayajulu G., last: Sripada} + - {first: Somayajula G., last: Sripada} + - {first: Somayajulu Gowri, last: Sripada} +somayeh-jafaritazehjani: + names: + - {first: Somayeh, last: Jafaritazehjani} + - {first: Somayeh, last: Jafaritazehjan} +song-chun-zhu: + names: + - {first: Song-chun, last: Zhu} + - {first: Song-Chun, last: Zhu} +sonia-frota: + names: + - {first: Sónia, last: Frota} + - {first: S., last: Frota} +sonia-vazquez: + names: + - {first: Sonia, last: Vázquez} + - {first: Sonia, last: Vazquez} + - {first: Sonia, last: Vázquez Pérez} +sonja-bosch: + names: + - {first: Sonja, last: Bosch} + - {first: Sonja E., last: Bosch} +sonja-niessen: + names: + - {first: Sonja, last: Nießen} + - {first: Sonja, last: Niessen} + - {first: S., last: Nießen} +sophia-y-m-lee: + names: + - {first: Sophia Y. M., last: Lee} + - {first: Sophia Y.M., last: Lee} +sophia-yat-mei-lee: + names: + - {first: Sophia Yat Mei, last: Lee} + - {first: Yat-Mei, last: Lee} +sophie-rosset: + names: + - {first: Sophie, last: Rosset} + - {first: S., last: Rosset} +sotaro-kita: + names: + - {first: Sotaro, last: Kita} + - {first: S., last: Kita} +soumil-mandal: + names: + - {first: Soumil, last: Mandal} + - {first: Soumik, last: Mandal} +soumya-sankar-ghosh: + names: + - {first: Soumya Sankar, last: Ghosh} + - {first: Soumya, last: Ghosh} +sowmya-s-sundaram: + names: + - {first: Sowmya S., last: Sundaram} + - {first: Sowmya S, last: Sundaram} +spela-vintar: + names: + - {first: Špela, last: Vintar} + - {first: Spela, last: Vintar} +spyros-raptis: + names: + - {first: Spyros, last: Raptis} + - {first: S., last: Raptis} +srini-narayanan: + names: + - {first: Srini, last: Narayanan} + - {first: Srinivas, last: Narayanan} +srinivas-bangalore: + names: + - {first: Srinivas, last: Bangalore} + - {first: B., last: Srinivas} + - {first: '', last: Srinivas} +srinivasan-janarthanam: + names: + - {first: Srinivasan, last: Janarthanam} + - {first: Srini, last: Janarthanam} +ssu-cheng-chen: + names: + - {first: Ssu-Cheng, last: Chen} + - {first: Su-Cheng, last: Chen} +stan-c-kwasny: + names: + - {first: Stan C., last: Kwasny} + - {first: Stan, last: Kwasny} +stan-szpakowicz: + names: + - {first: Stan, last: Szpakowicz} + - {first: Stanislaw, last: Szpakowicz} + - {first: Stanisław, last: Szpakowicz} +stanley-f-chen: + names: + - {first: Stanley F., last: Chen} + - {first: Stanley, last: Chen} +stanley-j-rosenschein: + names: + - {first: Stanley J., last: Rosenschein} + - {first: Stanley, last: Rosenschein} + - {first: Stan, last: Rosenschein} +stasa-vujicic-stankovic: + names: + - {first: Stasa, last: Vujicic-Stankovic} + - {first: Staša Vujičić, last: Stanković} + - {first: Staša, last: Vujičić Stanković} +stavroula-evita-fotinea: + names: + - {first: Stavroula-Evita, last: Fotinea} + - {first: S.-E., last: Fotinea} +stefan-benus: + names: + - {first: Štefan, last: Beňuš} + - {first: Stefan, last: Benus} + - {first: S̆tefan, last: Ben̆us̆} +stefan-daniel-dumitrescu: + names: + - {first: Stefan Daniel, last: Dumitrescu} + - {first: Ștefan Daniel, last: Dumitrescu} + - {first: Ștefan, last: Dumitrescu} +stefan-darmoni: + names: + - {first: Stéfan, last: Darmoni} + - {first: Stefan, last: Darmoni} +stefan-l-frank: + names: + - {first: Stefan L., last: Frank} + - {first: Stefan, last: Frank} +stefan-muller: + names: + - {first: Stefan, last: Müller} + - {first: Stefan, last: Muller} + - {first: Stefan, last: Mueller} +stefan-rued: + names: + - {first: Stefan, last: Rued} + - {first: Stefan, last: Rüd} +stefan-schulz: + names: + - {first: Stefan, last: Schulz} + - {first: Stefan, last: Schultz} +stefan-steidl: + names: + - {first: Stefan, last: Steidl} + - {first: S., last: Steidl} +stefan-wagner: + names: + - {first: Stefan, last: Wagner} + - {first: Stefan, last: Wager} +stefanie-shattuck-hufnagel: + names: + - {first: Stefanie, last: Shattuck-Hufnagel} + - {first: S. Shattuck, last: Hufnagel} +stefano-dei-rossi: + names: + - {first: Stefano Dei, last: Rossi} + - {first: Stefano, last: Dei Rossi} +stefano-zanobini: + names: + - {first: Stefano, last: Zanobini} + - {first: S., last: Zanobini} +stelios-piperidis: + names: + - {first: Stelios, last: Piperidis} + - {first: Stelios, last: Piperdis} + - {first: S., last: Piperidis} +stella-markantonatou: + names: + - {first: Stella, last: Markantonatou} + - {first: S., last: Markantonatou} +stephan-m-kerpedjiev: + names: + - {first: Stephan M., last: Kerpedjiev} + - {first: Stephan, last: Kerpedjiev} +stephan-vogel: + names: + - {first: Stephan, last: Vogel} + - {first: Stephen, last: Vogel} + - {first: S., last: Vogel} +stephane-ferrari: + names: + - {first: Stéphane, last: Ferrari} + - {first: Stephane, last: Ferrari} +stephane-meystre: + names: + - {first: Stephane, last: Meystre} + - {first: Stéphane, last: Meystre} +stephanie-lukin: + names: + - {first: Stephanie, last: Lukin} + - {first: Stephanie M., last: Lukin} +stephanie-s-everett: + names: + - {first: Stephanie S., last: Everett} + - {first: Stephanie, last: Everett} +stephanie-seneff: + names: + - {first: Stephanie, last: Seneff} + - {first: S., last: Seneff} +stephanie-strassel: + names: + - {first: Stephanie, last: Strassel} + - {first: Stephanie M., last: Strassel} +stephen-a-della-pietra: + names: + - {first: Stephen A., last: Della Pietra} + - {first: Stephen, last: Della Pietra} + - {first: Stephen, last: DellaPietra} + - {first: S., last: Della Pietra} +stephen-boxwell: + names: + - {first: Stephen, last: Boxwell} + - {first: Stephen A., last: Boxwell} +stephen-c-levinson: + comment: Max-Planck-Institute for Psycholinguistics + names: + - {first: Stephen C., last: Levinson} + - {first: St., last: Levinson} + similar: + - stephen-e-levinson +stephen-e-levinson: + comment: Bell Labs + names: + - {first: Stephen E., last: Levinson} + - {first: S. E., last: Levinson} + similar: + - stephen-c-levinson +stephen-j-green: + names: + - {first: Stephen J., last: Green} + - {first: Stephen, last: Green} + - {first: Stephen J, last: Green} +stephen-kunath: + names: + - {first: Stephen, last: Kunath} + - {first: Stephen A., last: Kunath} +stephen-l-gallant: + names: + - {first: Stephen L., last: Gallant} + - {first: Stephen, last: Gallant} +stephen-pulman: + names: + - {first: Stephen, last: Pulman} + - {first: Stephen G., last: Pulman} + - {first: S.G., last: Pulman} + - {first: S. G., last: Pulman} +stephen-soderland: + names: + - {first: Stephen, last: Soderland} + - {first: S., last: Soderland} +steve-austin: + names: + - {first: Steve, last: Austin} + - {first: S., last: Austin} +steve-j-young: + names: + - {first: Steve J., last: Young} + - {first: S.J., last: Young} +steve-whittaker: + names: + - {first: Steve, last: Whittaker} + - {first: S., last: Whittaker} +steve-young: + names: + - {first: Steve, last: Young} + - {first: Steven, last: Young} +steven-abney: + names: + - {first: Steven, last: Abney} + - {first: Steve, last: Abney} + - {first: Steven P., last: Abney} + - {first: S., last: Abney} +steven-feiner: + names: + - {first: Steven, last: Feiner} + - {first: Steven K., last: Feiner} +steven-h-weinberger: + names: + - {first: Steven H., last: Weinberger} + - {first: Steven, last: Weinberger} +steven-j-maiorano: + names: + - {first: Steven J., last: Maiorano} + - {first: Steve, last: Maiorano} + - {first: Steven, last: Maiorano} + - {first: Steve, last: Moiorano} +steven-jm-jones: + names: + - {first: Steven JM, last: Jones} + - {first: Steven, last: Jones} +steven-krauwer: + names: + - {first: Steven, last: Krauwer} + - {first: S., last: Krauwer} +steven-l-lytinen: + names: + - {first: Steven L., last: Lytinen} + - {first: Steven, last: Lytinen} +steven-moran: + names: + - {first: Steven, last: Moran} + - {first: Steve, last: Moran} +steven-roth: + names: + - {first: Steven, last: Roth} + - {first: Steven F., last: Roth} +steven-skiena: + names: + - {first: Steven, last: Skiena} + - {first: Steve, last: Skiena} +stuart-c-shapiro: + names: + - {first: Stuart C., last: Shapiro} + - {first: S.C., last: Shapiro} +stuart-m-shieber: + names: + - {first: Stuart M., last: Shieber} + - {first: Stuart, last: Shieber} +stylianos-bakamidis: + names: + - {first: Stylianos, last: Bakamidis} + - {first: S., last: Bakamidis} +su-youn-yoon: + names: + - {first: Su-Youn, last: Yoon} + - {first: Su-youn, last: Yoon} +subbarao-k-v: + names: + - {first: Subbarao K., last: V} + - {first: K.V., last: Subbarao} + - {first: Subbarao K, last: V.} +sudip-kumar-naskar: + names: + - {first: Sudip Kumar, last: Naskar} + - {first: Sudip, last: Kumar Naskar} + - {first: Sudip, last: Naskar} +sue-ellen-wright: + names: + - {first: Sue Ellen, last: Wright} + - {first: Sue, last: Wright} +sue-j-ker: + names: + - {first: Sue J., last: Ker} + - {first: Sur-Jin, last: Ker} + - {first: Su-Jin, last: Ker} + - {first: Sue-Jin, last: Ker} + - {first: Sue-jin, last: Ker} +suen-caesar-lun: + names: + - {first: Suen Caesar, last: Lun} + - {first: Caesar Suen, last: Lun} + - {first: Caesar, last: Lun} + - {first: S. Caesar, last: Lun} + - {first: C, last: Lun} +suguru-saito: + names: + - {first: Suguru, last: Saitô} + - {first: Suguru, last: Saito} +sujatha-das-gollapalli: + names: + - {first: Sujatha Das, last: Gollapalli} + - {first: Sujatha, last: Das Gollapalli} + - {first: Sujatha, last: Das} +sukhada: + names: + - {first: '', last: Sukhada} + - {first: Sukhada, last: Palkar} +sung-dong-kim: + names: + - {first: Sung Dong, last: Kim} + - {first: Sung-Dong, last: Kim} +sung-fung-tsai: + names: + - {first: Sung-Fung, last: Tsai} + - {first: Sung-Feng, last: Tsai} +sung-hyon-myaeng: + names: + - {first: Sung-Hyon, last: Myaeng} + - {first: Sung Hyon, last: Myaeng} + - {first: Sung H., last: Myaeng} + - {first: Sung-hyon, last: Myaeng} +sung-young-jung: + names: + - {first: Sung Young, last: Jung} + - {first: Sung-Young, last: Jung} +sunghwan-mac-kim: + names: + - {first: Sunghwan Mac, last: Kim} + - {first: Sunghwan, last: Kim} +sungjin-lee: + names: + - {first: Sungjin, last: Lee} + - {first: Sung-Jin, last: Lee} +sungrim-moon: + names: + - {first: Sungrim, last: Moon} + - {first: SungRim, last: Moon} +surya-ganesh: + names: + - {first: Surya, last: Ganesh} + - {first: Surya Ganesh, last: V} + - {first: Surya Ganesh, last: Veeravalli} +susan-armstrong: + names: + - {first: Susan, last: Armstrong} + - {first: Susan, last: Warwick-Armstrong} + - {first: Susan, last: Warwick} + - {first: S., last: Warwick-Armstrong} +susan-e-brennan: + names: + - {first: Susan E., last: Brennan} + - {first: Susan, last: Brennan} +susan-haller: + names: + - {first: Susan, last: Haller} + - {first: Susan M., last: Haller} + - {first: S.M., last: Haller} +susan-p-converse: + names: + - {first: Susan P., last: Converse} + - {first: Susan, last: Converse} +susan-w-mcroy: + names: + - {first: Susan W., last: McRoy} + - {first: Susan, last: McRoy} +susan-w-talbott: + names: + - {first: Susan W., last: Talbott} + - {first: Susan, last: Talbott} +susan-windisch-brown: + names: + - {first: Susan Windisch, last: Brown} + - {first: Susan, last: Windisch Brown} + - {first: Susan, last: Brown} + - {first: Susan W., last: Brown} +susana-early: + names: + - {first: Susana, last: Early} + - {first: S., last: Early} +susana-sotelo: + names: + - {first: Susana, last: Sotelo} + - {first: Susana Sotelo, last: Docio} + orcid: 0000-0002-0067-7957 +susann-luperfoy: + names: + - {first: Susann, last: LuperFoy} + - {first: Susann, last: Luperfoy} +suzan-uskudarli: + names: + - {first: Suzan, last: Uskudarli} + - {first: Suzan, last: Üsküdarlı} +suzana-ilic: + names: + - {first: Suzana, last: Ilic} + - {first: Suzana, last: Ilić} +suzanne-liebowitz-taylor: + names: + - {first: Suzanne Liebowitz, last: Taylor} + - {first: Suzanne, last: Liebowitz} +sv-ramanan: + names: + - {first: Sv, last: Ramanan} + - {first: SV, last: Ramanan} +sven-buechel: + names: + - {first: Sven, last: Buechel} + - {first: Sven, last: Büchel} +sylvain-galliano: + names: + - {first: Sylvain, last: Galliano} + - {first: S., last: Galliano} +sylvana-sofkova-hashemi: + names: + - {first: Sylvana, last: Sofkova Hashemi} + - {first: Sylvana, last: Sofkova} +szu-ting-yi: + names: + - {first: Szu-ting, last: Yi} + - {first: Szuting, last: Yi} +t-florian-jaeger: + names: + - {first: T. Florian, last: Jaeger} + - {first: Florian, last: Jaeger} +t-mark-ellison: + names: + - {first: T. Mark, last: Ellison} + - {first: T. M., last: Ellison} +t-t-mirnalinee: + names: + - {first: T. T., last: Mirnalinee} + - {first: Mirnalinee, last: T T} + - {first: T T, last: Mirnalinee} +t-v-geetha: + names: + - {first: T. V., last: Geetha} + - {first: Geetha, last: T V} + - {first: T V, last: Geetha} +tafseer-ahmed: + names: + - {first: Tafseer, last: Ahmed} + - {first: Tafseer, last: Ahmed Khan} +takahiro-ohno: + names: + - {first: Takahiro, last: Ohno} + - {first: Takahiro, last: Ono} +takahiro-wakao: + names: + - {first: Takahiro, last: Wakao} + - {first: T., last: Wakao} +takashi-onishi: + names: + - {first: Takashi, last: Onishi} + - {first: Takeshi, last: Onishi} + - {first: Takashi, last: Oonishi} +takehiko-yoshimi: + names: + - {first: Takehiko, last: Yoshimi} + - {first: T., last: Yoshimi} +taku-kudo: + names: + - {first: Taku, last: Kudo} + - {first: Taku, last: Kudoh} +takumi-goto: + degree: Nara Institute of Science and Technology + names: + - {first: Takumi, last: Goto} + - {first: Takumi, last: Gotou} + orcid: 0009-0006-8124-899X +tamara-berg: + names: + - {first: Tamara, last: Berg} + - {first: Tamara L., last: Berg} + - {first: Tamara L, last: Berg} +tamara-bobic: + names: + - {first: Tamara, last: Bobić} + - {first: Tamara, last: Bobic} +tamas-biro: + names: + - {first: Tamás, last: Bíró} + - {first: Tamás, last: Biró} +tamas-horvath: + names: + - {first: Tamás, last: Horváth} + - {first: T., last: Horvath} +tamas-varadi: + names: + - {first: Tamás, last: Váradi} + - {first: Tamas, last: Váradi} +tangqiu-li: + names: + - {first: Tangqiu, last: Li} + - {first: Tanqiu, last: Li} +tanja-samardzic: + names: + - {first: Tanja, last: Samardzic} + - {first: Tanja, last: Samardžić} +tanveer-a-faruquie: + names: + - {first: Tanveer A., last: Faruquie} + - {first: Tanveer, last: Faruquie} + - {first: Tanveer A, last: Faruquie} +tapas-nayak: + names: + - {first: Tapas, last: Nayak} + - {first: Tapas, last: Nayek} +tat-seng-chua: + names: + - {first: Tat-Seng, last: Chua} + - {first: Tat Seng, last: Chua} +tatsunori-b-hashimoto: + names: + - {first: Tatsunori B., last: Hashimoto} + - {first: Tatsunori, last: Hashimoto} +tatyana-ruzsics: + names: + - {first: Tatyana, last: Ruzsics} + - {first: Tatiana, last: Ruzsics} +ted-briscoe: + names: + - {first: Ted, last: Briscoe} + - {first: Edward, last: Briscoe} + - {first: E.J., last: Briscoe} +ted-e-dunning: + names: + - {first: Ted E., last: Dunning} + - {first: Ted, last: Dunning} +teresa-goncalves: + names: + - {first: Teresa, last: Gonçalves} + - {first: Teresa, last: Goncalves} +terry-patten: + names: + - {first: Terry, last: Patten} + - {first: T., last: Patten} +th-r-hofmann: + names: + - {first: Th. R., last: Hofmann} + - {first: T. R., last: Hofmann} +theo-desbordes: + names: + - {first: Théo, last: Desbordes} + - {first: Theo, last: Desbordes} +therese-firmin: + names: + - {first: Therese, last: Firmin} + - {first: Therese Firmin, last: Hand} +thi-minh-huyen-nguyen: + names: + - {first: Thi Minh Huyen, last: Nguyen} + - {first: Thi Minh Huyền, last: Nguyễn} + - {first: Thị Minh Huyền, last: Nguyễn} + - {first: Thi-Minh-Huyen, last: Nguyen} + - {first: T. M. Huyen, last: Nguyen} +thiago-castro-ferreira: + names: + - {first: Thiago, last: Castro Ferreira} + - {first: Thiago, last: Ferreira} +thiago-d-tadeu: + names: + - {first: Thiago D., last: Tadeu} + - {first: Thiago, last: Tadeu} +thierry-guillotin: + names: + - {first: Thierry, last: Guillotin} + - {first: T., last: Guillotin} +thierry-hamon: + names: + - {first: Thierry, last: Hamon} + - {first: T., last: Hamon} +thilo-gotz: + names: + - {first: Thilo, last: Gotz} + - {first: Thilo, last: Götz} +thomas-a-keenan: + names: + - {first: Thomas A., last: Keenan} + - {first: Thomas, last: Keenan} +thomas-ahlswede: + names: + - {first: Thomas, last: Ahlswede} + - {first: Thomas E., last: Ahlswede} +thomas-alexander-trost: + names: + - {first: Thomas Alexander, last: Trost} + - {first: Thomas, last: Trost} +thomas-c-rindflesch: + names: + - {first: Thomas C., last: Rindflesch} + - {first: Thomas, last: Rindflesch} +thomas-l-cornell: + names: + - {first: Thomas L., last: Cornell} + - {first: Thomas, last: Cornell} +thomas-l-griffiths: + names: + - {first: Thomas L., last: Griffiths} + - {first: Thomas, last: Griffiths} +thomas-landauer: + names: + - {first: Thomas, last: Landauer} + - {first: Thomas K, last: Landauer} +thomas-mueller: + names: + - {first: Thomas, last: Mueller} + - {first: Thomas, last: Müller} +thomas-p-ohara: + names: + - {first: Thomas P., last: O’Hara} + - {first: Thomas, last: O’Hara} +thomas-s-morton: + names: + - {first: Thomas S., last: Morton} + - {first: Thomas, last: Morton} +thomas-ulrich-christiansen: + names: + - {first: Thomas Ulrich, last: Christiansen} + - {first: Thomas, last: Christiansen} +thomas-wasow: + names: + - {first: Thomas, last: Wasow} + - {first: Tom, last: Wasow} +thoudam-doren-singh: + names: + - {first: Thoudam Doren, last: Singh} + - {first: Thoudam, last: Doren Singh} +thuy-vu: + names: + - {first: Thuy, last: Vu} + - {first: Thuy-Trang, last: Vu} +thuylinh-nguyen: + names: + - {first: ThuyLinh, last: Nguyen} + - {first: Thuy Linh, last: Nguyen} +tiago-timponi-torrent: + names: + - {first: Tiago Timponi, last: Torrent} + - {first: Tiago, last: Torrent} + - {first: Tiago T., last: Torrent} +tiberiu-boros: + names: + - {first: Tiberiu, last: Boroş} + - {first: Tiberiu, last: Boroș} + - {first: Tiberiu, last: Boros} +tibor-gyimothy: + names: + - {first: Tibor, last: Gyimóthy} + - {first: T., last: Gyimothy} +tiejun-zhao: + names: + - {first: Tiejun, last: Zhao} + - {first: TieJun, last: Zhao} + - {first: Tie-Jun, last: Zhao} + - {first: Tie-jun, last: Zhao} +tiit-roosmaa: + names: + - {first: Tiit, last: Roosmaa} + - {first: T., last: Roosmaa} +tim-anderson: + names: + - {first: Tim, last: Anderson} + - {first: Timothy, last: Anderson} +tim-finin: + names: + - {first: Tim, last: Finin} + - {first: Timothy W., last: Finin} +tim-okeefe: + names: + - {first: Tim, last: O’Keefe} + - {first: Timothy, last: O’Keefe} +tim-rocktaschel: + names: + - {first: Tim, last: Rocktäschel} + - {first: Tim, last: Rocktaschel} +tim-van-de-cruys: + names: + - {first: Tim, last: Van de Cruys} + - {first: Tim, last: Van De Cruys} +timo-jarvinen: + names: + - {first: Timo, last: Jarvinen} + - {first: Timo, last: Järvinen} +timothy-baldwin: + names: + - {first: Timothy, last: Baldwin} + - {first: Tim, last: Baldwin} +timothy-chklovski: + names: + - {first: Timothy, last: Chklovski} + - {first: Tim, last: Chklovski} +timothy-j-hazen: + names: + - {first: Timothy J., last: Hazen} + - {first: T. J., last: Hazen} +timothy-miller: + names: + - {first: Timothy, last: Miller} + - {first: Tim, last: Miller} +timothy-odonnell: + names: + - {first: Timothy, last: O’Donnell} + - {first: Timothy J., last: O’Donnell} + - {first: Tim, last: O’Donnell} +timothy-w-bickmore: + names: + - {first: Timothy W., last: Bickmore} + - {first: Timothy, last: Bickmore} +tina-kluwer: + names: + - {first: Tina, last: Klüwer} + - {first: Tina, last: Kluewer} +ting-hao-huang: + names: + - {first: Ting-Hao, last: Huang} + - {first: Ting-Hao ‘Kenneth’, last: Huang} + - {first: Ting-Hao Kenneth, last: Huang} +ting-hao-yang: + names: + - {first: Ting-hao, last: Yang} + - {first: Ting-Hao, last: Yang} +ting-hui-kao: + names: + - {first: Ting-hui, last: Kao} + - {first: Ting-Hui, last: Kao} +toan-q-nguyen: + names: + - {first: Toan Q., last: Nguyen} + - {first: Toan, last: Nguyen} +tohru-shimizu: + names: + - {first: Tohru, last: Shimizu} + - {first: Toru, last: Shimizu} +tom-b-y-lai: + names: + - {first: Tom B.Y., last: Lai} + - {first: Tom B. Y., last: Lai} + - {first: Tom B.Y, last: Lai} + - {first: T. B. Y., last: Lai} +tom-bong-yeung-lai: + names: + - {first: Tom Bong-yeung, last: Lai} + - {first: Bong-Yeung, last: Lai} +tom-mitchell: + names: + - {first: Tom, last: Mitchell} + - {first: Tom M., last: Mitchell} +tomas-holan: + names: + - {first: Tomáš, last: Holan} + - {first: Tomas, last: Holan} +tomas-mikolov: + names: + - {first: Tomáš, last: Mikolov} + - {first: Tomas, last: Mikolov} +tomasz-obrebski: + names: + - {first: Tomasz, last: Obrębski} + - {first: Tomasz, last: Obrebski} +tomaz-erjavec: + names: + - {first: Tomaž, last: Erjavec} + - {first: Tomaz, last: Erjavec} +tomek-strzalkowski: + names: + - {first: Tomek, last: Strzalkowski} + - {first: Tomek, last: Strzalkowskl} + - {first: T., last: Strzalkowski} +tommi-a-pirinen: + names: + - {first: Tommi A., last: Pirinen} + - {first: Tommi, last: Pirinen} + - {first: Tommi A, last: Pirinen} +tomoki-toda: + names: + - {first: Tomoki, last: Toda} + - {first: Tomiki, last: Toda} +tony-rose: + names: + - {first: Tony, last: Rose} + - {first: Tony G., last: Rose} + - {first: T.G., last: Rose} +tor-klingberg: + names: + - {first: Tor, last: Klingberg} + - {first: T., last: Klingberg} +torbjorn-lager: + names: + - {first: Torbjörn, last: Lager} + - {first: Torbjorn, last: Lager} + - {first: Torbjoern, last: Lager} +toru-hitaka: + names: + - {first: Toru, last: Hitaka} + - {first: Tooru, last: Hitaka} +toyoaki-nishida: + names: + - {first: Toyoaki, last: Nishida} + - {first: Toyo-aki, last: Nishida} +tracy-holloway-king: + names: + - {first: Tracy Holloway, last: King} + - {first: Tracy H., last: King} +trang-mai-xuan: + names: + - {first: Trang, last: Mai Xuan} + - {first: Trang Mai, last: Xuan} +trevor-cohen: + comment: University of Washington + names: + - {first: Trevor, last: Cohen} + similar: + - trevor-cohn +trevor-cohn: + comment: University of Melbourne + names: + - {first: Trevor, last: Cohn} + similar: + - trevor-cohen +tri-thanh-nguyen: + names: + - {first: Tri-Thanh, last: Nguyen} + - {first: Tri Thanh, last: Nguyen} +tristan-vanrullen: + names: + - {first: Tristan, last: Vanrullen} + - {first: Tristan, last: van Rullen} + - {first: Tristan, last: Van Rullen} +trung-bui: + names: + - {first: Trung, last: Bui} + - {first: Trung H., last: Bui} +tsong-yi-chen: + names: + - {first: Tsong-yi, last: Chen} + - {first: Tsong-Yi, last: Chen} +tsuyoshi-morimoto: + names: + - {first: Tsuyoshi, last: Morimoto} + - {first: Tsuyosi, last: Morimoto} +tu-bao-ho: + names: + - {first: Tu-Bao, last: Ho} + - {first: Tu Bao, last: Ho} +tu-vu: + names: + - {first: Tu, last: Vu} + - {first: Tu Thanh, last: Vu} +tuan-anh-le: + names: + - {first: Tuan Anh, last: Lê} + - {first: Tuan Anh, last: Le} + - {first: Tuấn Anh, last: Lê} +tuan-lai: + names: + - {first: Tuan, last: Lai} + - {first: Tuan Manh, last: Lai} +tuan-tran: + names: + - {first: Tuan, last: Tran} + - {first: Tuan Dung, last: Tran} +tung-hui-chiang: + names: + - {first: Tung-Hui, last: Chiang} + - {first: TungHui, last: Chiang} +tuoi-thi-phan: + names: + - {first: Tuoi Thi, last: Phan} + - {first: Tuoi, last: T. Phan} +u-rohini: + names: + - {first: U., last: Rohini} + - {first: Rohini, last: U} +udo-kruschwitz: + names: + - {first: Udo, last: Kruschwitz} + - {first: U., last: Kruschwitz} +ulrich-schafer: + names: + - {first: Ulrich, last: Schäfer} + - {first: Ulrich, last: Schafer} + - {first: Ulrich, last: Schaefer} +ulrike-kugler: + names: + - {first: Ulrike, last: Kugler} + - {first: U., last: Kugler} +ulrike-mosel: + names: + - {first: Ulrike, last: Mosel} + - {first: U., last: Mosel} +ulrike-pado: + names: + - {first: Ulrike, last: Pado} + - {first: Ulrike, last: Padó} +umit-deniz-turan: + names: + - {first: Umit Deniz, last: Turan} + - {first: Ümit Deniz, last: Turan} +un-gian-iunn: + names: + - {first: Un-Gian, last: Iunn} + - {first: Un-gian, last: Iun} + - {first: Ún-giân, last: Iû} +ute-ziegenhain: + names: + - {first: Ute, last: Ziegenhain} + - {first: U., last: Ziegenhain} +utpal-kumar-sikdar: + names: + - {first: Utpal Kumar, last: Sikdar} + - {first: Utpal, last: Sikdar} +uwe-reichel: + names: + - {first: Uwe, last: Reichel} + - {first: Uwe D., last: Reichel} +v-k-mittal: + names: + - {first: V. K., last: Mittal} + - {first: V.K., last: Mittal} +vaijayanthi-m-sarma: + names: + - {first: Vaijayanthi M., last: Sarma} + - {first: Vaijayanthi, last: Sarma} +valentin-i-spitkovsky: + names: + - {first: Valentin I., last: Spitkovsky} + - {first: Valentin, last: Spitkovsky} +valentina-bartalesi-lenzi: + names: + - {first: Valentina, last: Bartalesi Lenzi} + - {first: V., last: Bartalesi Lenzi} +valeria-de-paiva: + names: + - {first: Valeria, last: de Paiva} +valeria-delisandra-feltrim: + names: + - {first: Valéria Delisandra, last: Feltrim} + - {first: Valéria, last: Feltrim} + - {first: Valéria D., last: Feltrim} +valerie-bellynck: + names: + - {first: Valérie, last: Bellynck} + - {first: Valerie, last: Bellynck} +valerie-maffiolo: + names: + - {first: Valérie, last: Maffiolo} + - {first: V., last: Maffiolo} +valerie-mapelli: + names: + - {first: Valérie, last: Mapelli} + - {first: Valerie, last: Mapelli} +valtcho-valtchev: + names: + - {first: Valtcho, last: Valtchev} + - {first: V., last: Valtchev} +van-minh-nguyen: + names: + - {first: Van minh, last: Nguyen} + - {first: Van Minh, last: Nguyen} +vania-dimitrova: + names: + - {first: Vania, last: Dimitrova} + - {first: Vanya, last: Dimitrova} +vanja-m-karan: + names: + - {first: Vanja M., last: Karan} + - {first: Vanja Mladen, last: Karan} +varun-manjunatha: + names: + - {first: Varun, last: Manjunatha} + - {first: Varun, last: Manjunath} +vassilios-digalakis: + names: + - {first: Vassilios, last: Digalakis} + - {first: V., last: Digalakis} +vera-lucia-strube-de-lima: + names: + - {first: Vera Lucia Strube, last: de Lima} + - {first: Vera Lúcia Strube, last: de Lima} +verginica-barbu-mititelu: + names: + - {first: Verginica, last: Barbu Mititelu} + - {first: Verginica Barbu, last: Mititelu} +vern-walker: + names: + - {first: Vern, last: Walker} + - {first: Vern R., last: Walker} +veronika-lux: + names: + - {first: Veronika, last: Lux} + - {first: Veronika, last: Lux-Pogodalla} + - {first: Véronika, last: Lux-Pogodalla} +veronique-hoste: + names: + - {first: Veronique, last: Hoste} + - {first: Véronique, last: Hoste} +veronique-moriceau: + names: + - {first: Véronique, last: Moriceau} + - {first: Veronique, last: Moriceau} +vibhu-o-mittal: + names: + - {first: Vibhu O., last: Mittal} + - {first: Vibhu, last: Mittal} +vicent-alabau: + names: + - {first: Vicent, last: Alabau} + - {first: Vicente, last: Alabau} +victor-abrash: + names: + - {first: Victor, last: Abrash} + - {first: V., last: Abrash} +victor-j-diaz: + names: + - {first: Víctor J., last: Díaz} + - {first: Victor J., last: Díaz} +victor-m-sanchez-cartagena: + names: + - {first: Víctor M., last: Sánchez-Cartagena} + - {first: Victor M., last: Sánchez-Cartagena} +victor-o-k-li: + names: + - {first: Victor O.K., last: Li} + - {first: Victor O. K., last: Li} +victor-peinado: + names: + - {first: Víctor, last: Peinado} + - {first: Victor, last: Peinado} +victor-rodriguez-doncel: + names: + - {first: Victor, last: Rodriguez-Doncel} + - {first: Víctor, last: Rodríguez} + - {first: Victor, last: Rodríguez Doncel} +victor-sadler: + names: + - {first: Victor, last: Sadler} + - {first: V., last: Sadler} +victor-zue: + names: + - {first: Victor, last: Zue} + - {first: Victor W., last: Zue} + - {first: V., last: Zue} +victoria-fossum: + names: + - {first: Victoria, last: Fossum} + - {first: Victoria Li, last: Fossum} +victoria-l-rubin: + names: + - {first: Victoria L., last: Rubin} + - {first: Victoria, last: Rubin} +victoria-lin-cmu: + comment: CMU + names: + - {first: Victoria, last: Lin} +vidas-daudaravicius: + names: + - {first: Vidas, last: Daudaravicius} + - {first: Vidas, last: Daudaravičius} +viet-cuong-nguyen: + names: + - {first: Viet Cuong, last: Nguyen} + - {first: Nguyen Viet, last: Cuong} +viet-hong-tran: + names: + - {first: Viet Hong, last: Tran} + - {first: Viet-Hong, last: Tran} +vijay-sundar-ram: + names: + - {first: Vijay, last: Sundar Ram} + - {first: Vijay Sundar, last: Ram} + - {first: R. Vijay Sundar, last: Ram} + - {first: Vijay Sundar Ram, last: R} +vikas-ganjigunte-ashok: + names: + - {first: Vikas, last: Ganjigunte Ashok} + - {first: Vikas, last: Ashok} +vikash-khandelwal: + names: + - {first: Vikash, last: Khandelwal} + - {first: Vikas, last: Khandelwal} +vincent-j-della-pietra: + names: + - {first: Vincent J., last: Della Pietra} + - {first: Vincent, last: DellaPietra} + - {first: V., last: Della Pietra} +vineet-gupta: + names: + - {first: Vineet, last: Gupta} + - {first: V., last: Gupta} +vinh-van-nguyen: + names: + - {first: Vinh Van, last: Nguyen} + - {first: Vinh-Van, last: Nguyen} +vinicius-mourao-alves-de-souza: + names: + - {first: Vinícius Mourão Alves de, last: Souza} + - {first: Vinícius Mourão Alves, last: de Souza} +virendrakumar-bhavsar: + names: + - {first: Virendrakumar, last: Bhavsar} + - {first: Virendra, last: Bhavsar} +vishnu-dutt-sharma: + names: + - {first: Vishnu Dutt, last: Sharma} + - {first: Vishnu, last: Sharma} +vit-suchomel: + names: + - {first: Vit, last: Suchomel} + - {first: Vít, last: Suchomel} +vitor-carvalho: + names: + - {first: Vitor, last: Carvalho} + - {first: Vitor R., last: Carvalho} +vitor-de-araujo: + names: + - {first: Vitor, last: De Araujo} + - {first: Vítor, last: Araújo} +vittorio-di-tomaso: + names: + - {first: Vittorio, last: Di Tomaso} + - {first: V., last: Di Tomaso} +vivi-nastase: + names: + - {first: Vivi, last: Nastase} + - {first: Vivi, last: Năstase} +vivian-k-lee: + names: + - {first: Vivian K., last: Lee} + - {first: Vivian, last: Lee} +vladimir-kadlec: + names: + - {first: Vladimír, last: Kadlec} + - {first: Vladimir, last: Kadlec} +vladimir-petkevic: + names: + - {first: Vladimir, last: Petkevic} + - {first: Vladimír, last: Petkevič} +vladislav-kubon: + names: + - {first: Vladislav, last: Kubon} + - {first: Vladislav, last: Kuboň} + - {first: Vladlslav, last: Kubon} +vlado-keselj: + names: + - {first: Vlado, last: Keselj} + - {first: Vlado, last: Kešelj} +vojtech-kovar: + names: + - {first: Vojtěch, last: Kovář} + - {first: Vojtech, last: Kovář} +von-wun-soo: + names: + - {first: Von-Wun, last: Soo} + - {first: Von-wun, last: Soo} +voula-giouli: + names: + - {first: Voula, last: Giouli} + - {first: V., last: Giouli} +w-bruce-croft: + names: + - {first: W. Bruce, last: Croft} + - {first: Bruce, last: Croft} +w-john-hutchins: + names: + - {first: W. John, last: Hutchins} + - {first: John, last: Hutchins} +wai-kit-lo: + names: + - {first: Wai-Kit, last: Lo} + - {first: Wai Kit, last: Lo} +wai-lok-tam: + names: + - {first: Wai Lok, last: Tam} + - {first: Wailok, last: Tam} +walter-daelemans: + names: + - {first: Walter, last: Daelemans} + - {first: W., last: Daelemans} +walter-haeseryn: + names: + - {first: Walter, last: Haeseryn} + - {first: W., last: Haeseryn} +walter-kasper: + names: + - {first: Walter, last: Kasper} + - {first: W., last: Kasper} +walter-lasecki: + names: + - {first: Walter, last: Lasecki} + - {first: Walter S., last: Lasecki} +walther-von-hahn: + names: + - {first: Walther, last: von Hahn} + - {first: Walther, last: v. Hahn} +waqas-anwar: + names: + - {first: Waqas, last: Anwar} + - {first: Muhammad Waqas, last: Anwar} +warren-greiff: + names: + - {first: Warren, last: Greiff} + - {first: Warren R., last: Greiff} +wasi-ahmad: + names: + - {first: Wasi, last: Ahmad} + - {first: Wasi Uddin, last: Ahmad} +wassim-el-hajj: + names: + - {first: Wassim, last: El-Hajj} + - {first: Wassim, last: El Hajj} +wayne-ward: + names: + - {first: Wayne, last: Ward} + - {first: Wayne H., last: Ward} + - {first: W., last: Ward} +wayne-xin-zhao: + names: + - {first: Wayne Xin, last: Zhao} + - {first: Xin, last: Zhao} +wei-ai-umich: + disable_name_matching: true + names: + - {first: Wei, last: Ai} + orcid: 0000-0001-6271-9430 +wei-fan-hkust: + comment: HKUST + disable_name_matching: true + names: + - {first: Wei, last: Fan} + orcid: 0009-0008-1900-7081 +wei-liu-kcl: + comment: KCL + disable_name_matching: true + names: + - {first: Wei, last: Liu} + orcid: 0000-0003-0011-7797 +wei-lun-lu: + names: + - {first: Wei-lun, last: Lu} + - {first: Wei-Lwun, last: Lu} + - {first: Louis Wei-lun, last: Lu} +wei-ying-ma: + names: + - {first: Wei-Ying, last: Ma} + - {first: Wei-ying, last: Ma} +wei-yun-ma: + names: + - {first: Wei-Yun, last: Ma} + - {first: Wei Yun, last: Ma} +weigang-li: + names: + - {first: Weigang, last: Li} + - {first: Weikang, last: Li} +weina-zhao: + names: + - {first: Weina, last: Zhao} + - {first: Wei Na, last: Zhao} +weinan-zhang: + names: + - {first: Weinan, last: Zhang} + - {first: Wei-Nan, last: Zhang} +weiwei-sun-sd: + comment: Shandong University + names: + - {first: Weiwei, last: Sun} +weiyi-liu: + names: + - {first: Weiyi, last: Liu} + - {first: Weiyi, last: Lu} +wen-chi-hsien: + names: + - {first: Wen-Chi, last: Hsien} + - {first: Wen-Chi, last: Hsie} +wen-hsiang-tu: + names: + - {first: Wen-Hsiang, last: Tu} + - {first: Wen-hsiang, last: Tu} +wen-huei-cheng: + names: + - {first: Wen-Huei, last: Cheng} + - {first: Wen-Hui, last: Cheng} +wen-juan-hou: + names: + - {first: Wen-Juan, last: Hou} + - {first: Wen, last: Juan Hou} + - {first: Juan, last: Wen} +wen-lian-hsu: + names: + - {first: Wen-Lian, last: Hsu} + - {first: Wen-lian, last: Hsu} +wen-tau-yih: + names: + - {first: Wen-tau, last: Yih} + - {first: Scott Wen-tau, last: Yih} +wen-ting-wang: + names: + - {first: Wen Ting, last: Wang} + - {first: WenTing, last: Wang} +wen-wang: + names: + - {first: Wen, last: Wang} + - {first: W., last: Wang} +wendy-chapman: + names: + - {first: Wendy, last: Chapman} + - {first: Wendy W, last: Chapman} +wendy-lehnert: + names: + - {first: Wendy, last: Lehnert} + - {first: Wendy G., last: Lehnert} + - {first: W., last: Lehnert} +wenhan-chao: + names: + - {first: Wenhan, last: Chao} + - {first: WenHan, last: Chao} + - {first: Wen-Han, last: Chao} +wenyu-zhang-cornell: + comment: Cornell + disable_name_matching: true + names: + - {first: Wenyu, last: Zhang} + orcid: 0000-0002-3849-4320 +wenzheng-zhang-ru: + comment: Rutgers University + disable_name_matching: true + names: + - {first: Wenzheng, last: Zhang} + orcid: 0009-0009-2578-9224 +whitney-l-cade: + names: + - {first: Whitney L., last: Cade} + - {first: Whitney, last: Cade} +widad-mustafa-el-hadi: + names: + - {first: Widad Mustafa El, last: Hadi} + - {first: Widad Mustafa, last: El Hadi} + - {first: Widad, last: Mustafa El Hadi} + - {first: W., last: Mustafa El Hadi} +willem-robert-van-hage: + names: + - {first: Willem Robert, last: van Hage} + - {first: Willem, last: Van Hage} + - {first: Willem, last: van Hage} +william-a-baumgartner-jr: + names: + - {first: William A., last: 'Baumgartner, Jr.'} + - {first: William A., last: Baumgartner Jr.} + - {first: William A., last: Baumgartner} + - {first: William, last: Baumgartner} + - {first: William, last: Baumgartner Jr.} +william-a-gale: + names: + - {first: William A., last: Gale} + - {first: William, last: Gale} +william-a-martin: + names: + - {first: William A., last: Martin} + - {first: W. A., last: Martin} +william-a-woods: + names: + - {first: William A., last: Woods} + - {first: W. A., last: Woods} +william-b-dolan: + names: + - {first: William B., last: Dolan} + - {first: William, last: Dolan} + - {first: Bill, last: Dolan} +william-c-mann: + names: + - {first: William C., last: Mann} + - {first: William, last: Mann} +william-c-ogden: + names: + - {first: William C., last: Ogden} + - {first: William, last: Ogden} +william-cohen: + names: + - {first: William, last: Cohen} + - {first: William W., last: Cohen} +william-coster: + names: + - {first: William, last: Coster} + - {first: Will, last: Coster} +william-de-beaumont: + names: + - {first: William, last: de Beaumont} + - {first: Will, last: de Beaumont} +william-j-black: + names: + - {first: William J., last: Black} + - {first: William J, last: Black} + - {first: William, last: Black} + - {first: W.J., last: Black} +william-j-corvey: + names: + - {first: William J., last: Corvey} + - {first: William, last: Corvey} +william-j-teahan: + names: + - {first: William J., last: Teahan} + - {first: William J, last: Teahan} + - {first: W. J., last: Teahan} +william-lewis: + names: + - {first: William, last: Lewis} + - {first: William D., last: Lewis} +william-m-fisher: + names: + - {first: William M., last: Fisher} + - {first: William, last: Fisher} + - {first: W. M., last: Fisher} + - {first: W., last: Fisher} +william-morgan: + names: + - {first: William, last: Morgan} + - {first: William T., last: Morgan} +william-r-murray: + names: + - {first: William R., last: Murray} + - {first: William, last: Murray} +william-s-y-wang: + names: + - {first: William S-Y., last: Wang} + - {first: William S.-Y., last: Wang} +william-soto-martinez: + names: + - {first: William, last: Soto Martinez} + - {first: William, last: Soto} +wim-peters: + names: + - {first: Wim, last: Peters} + - {first: W., last: Peters} +winston-n-anderson: + names: + - {first: Winston N, last: Anderson} + - {first: Winston, last: Anderson} +witold-drozdzynski: + names: + - {first: Witold, last: Drożdżyński} + - {first: Witold, last: Drozdzynski} +wojciech-skalmowski: + names: + - {first: Wojciech, last: Skalmowski} + - {first: W., last: Skalmowski} +wolfgang-hoeppner: + names: + - {first: Wolfgang, last: Hoeppner} + - {first: W., last: Hoeppner} +wolfgang-klein: + names: + - {first: Wolfgang, last: Klein} + - {first: W., last: Klein} +wolfgang-wahlster: + names: + - {first: Wolfgang, last: Wahlster} + - {first: W., last: Wahlster} +won-ho-ryu: + names: + - {first: Won Ho, last: Ryu} + - {first: Won-Ho, last: Ryu} +woong-ki-lee: + names: + - {first: Woong Ki, last: Lee} + - {first: Woong-Ki, last: Lee} +xabier-arregi: + names: + - {first: Xabier, last: Arregi} + - {first: X, last: Arregi} + - {first: X., last: Arregi} +xabier-artola: + names: + - {first: Xabier, last: Artola} + - {first: X, last: Artola} + - {first: X., last: Artola} +xabier-saralegi: + names: + - {first: Xabier, last: Saralegi} + - {first: X., last: Saralegi} +xabier-zalbide: + names: + - {first: Xabier, last: Zalbide} + - {first: X., last: Zalbide} +xavier-briffault: + names: + - {first: Xavier, last: Briffault} + - {first: X., last: Briffault} +xavier-gomez-guinovart: + names: + - {first: Xavier, last: Gómez Guinovart} + - {first: Xavier, last: Gómez-Guinovart} +xi-victoria-lin: + comment: U of Washington, Meta + names: + - {first: Xi Victoria, last: Lin} +xia-wang: + names: + - {first: Xia, last: Wang} + - {first: X. S., last: Wang} +xiang-dai: + names: + - {first: Xiang, last: Dai} + - {first: Xiangying, last: Dai} +xiangfeng-wei: + names: + - {first: Xiangfeng, last: Wei} + - {first: XiangFeng, last: Wei} +xiangji-huang: + names: + - {first: Xiangji, last: Huang} + - {first: Jimmy Xiangji, last: Huang} +xiao-long-wang: + names: + - {first: Xiao-Long, last: Wang} + - {first: XiaoLong, last: Wang} + - {first: Xiao-long, last: Wang} +xiaojin-zhu: + names: + - {first: Xiaojin, last: Zhu} + - {first: Xiaojin Jerry, last: Zhu} +xiaojun-lin: + names: + - {first: Xiaojun, last: Lin} + - {first: Xiaojun, last: Li} +xiaolei-wang-fudan: + comment: Fudan + names: + - {first: Xiaolei, last: Wang} +xiaolei-wang-renmin: + comment: Renmin + names: + - {first: Xiaolei, last: Wang} +xiaoqiang-luo: + names: + - {first: Xiaoqiang, last: Luo} + - {first: X., last: Luo} +xin-luna-dong: + names: + - {first: Xin Luna, last: Dong} + - {first: Xin, last: Dong} +xin-xu-ucsd: + comment: UCSD + disable_name_matching: true + names: + - {first: Xin, last: Xu} + orcid: 0000-0001-5238-0955 +xin-ying-qiu: + names: + - {first: Xin Ying, last: Qiu} + - {first: Xinying, last: Qiu} +xinnian-mao: + names: + - {first: Xinnian, last: Mao} + - {first: Xin, last: Mao} +xinpeng-wang-lmu: + degree: Ludwig Maximilian University of Munich (LMU) + disable_name_matching: true + names: + - {first: Xinpeng, last: Wang} + orcid: 0009-0006-5213-1119 +xinyu-dai: + names: + - {first: Xinyu, last: Dai} + - {first: Xin-yu, last: Dai} + - {first: Xin-Yu, last: Dai} +xinyu-deng: + names: + - {first: Xinyu, last: Deng} + - {first: XinYu, last: Deng} +xiuzhen-jenny-zhang: + names: + - {first: Xiuzhen (Jenny), last: Zhang} + - {first: Xiuzhen, last: Zhang} +xixian-chen: + names: + - {first: Xixian, last: Chen} + - {first: XiXian, last: Chen} +xuan-jing-huang: + names: + - {first: Xuan-Jing, last: Huang} + - {first: Xuan-jing, last: Huang} + - {first: Xuanjing, last: Huang} +xuan-long-do: + names: + - {first: Xuan Long, last: Do} + - {first: Do Xuan, last: Long} +xuan-luong-vu: + names: + - {first: Xuan Luong, last: Vu} + - {first: Xuân Lương, last: Vũ} + - {first: Xuan-Luong, last: Vu} +xuan-nga-cao: + names: + - {first: Xuan-Nga, last: Cao} + - {first: Xuân-Nga, last: Cao} + - {first: Xuân-Nga Cao, last: Kam} +xuedong-huang: + names: + - {first: Xuedong, last: Huang} + - {first: X.D., last: Huang} + - {first: X., last: Huang} +xueqi-cheng: + names: + - {first: Xueqi, last: Cheng} + - {first: Xue-Qi, last: Cheng} +y-albert-park: + names: + - {first: Y. Albert, last: Park} + - {first: Albert, last: Park} +ya-li-cas: + degree: Chinese Academy of Sciences + disable_name_matching: true + names: + - {first: Ya, last: Li} + orcid: 0000-0002-6284-5039 +ya-ting-lin: + names: + - {first: Ya-Ting, last: Lin} + - {first: Ya-Ting, last: Li} +yaakov-hacohen-kerner: + names: + - {first: Yaakov, last: HaCohen-Kerner} + - {first: Yaakov, last: Hacohen-Kerner} +yael-cohen-sygal: + names: + - {first: Yael, last: Cohen-Sygal} + - {first: Yael, last: Sygal} +yael-netzer: + names: + - {first: Yael, last: Netzer} + - {first: Yael Dahan, last: Netzer} + - {first: Yael, last: Dahan} +yael-ravin: + names: + - {first: Yael, last: Ravin} + - {first: Y., last: Ravin} +yajuan-lu: + names: + - {first: Yajuan, last: Lü} + - {first: Yajuan, last: Lu} + - {first: Yajuan, last: Lv} +yan-zuo-zhou: + names: + - {first: Yan-Zuo, last: Zhou} + - {first: Yen-zuo, last: Zhou} +yang-janet-liu: + comment: Georgetown University; 刘洋 + names: + - {first: Yang Janet, last: Liu} + - {first: Yang, last: Liu} +yang-liu-3m: + comment: 3M Health Information Systems + names: + - {first: Yang, last: Liu} +yang-liu-blcu: + comment: Beijing Language and Culture University + names: + - {first: Yang, last: Liu} +yang-liu-dt: + comment: National University of Defense Technology + names: + - {first: Yang, last: Liu} +yang-liu-edinburgh: + comment: Edinburgh Ph.D., Microsoft + names: + - {first: Yang, last: Liu} +yang-liu-helsinki: + comment: University of Helsinki + names: + - {first: Yang, last: Liu} +yang-liu-hk: + comment: The Chinese University of Hong Kong (Shenzhen) + names: + - {first: Yang, last: Liu} +yang-liu-icsi: + comment: 刘扬; Ph.D Purdue; ICSI, Dallas, Facebook, Liulishuo, Amazon + names: + - {first: Yang, last: Liu} + - {first: Y., last: Liu} +yang-liu-ict: + comment: 刘洋; ICT, Tsinghua, Beijing Academy of Artificial Intelligence + names: + - {first: Yang, last: Liu} +yang-liu-microsoft: + comment: Microsoft Cognitive Services Research + names: + - {first: Yang, last: Liu} +yang-liu-pk: + comment: Peking University + names: + - {first: Yang, last: Liu} +yang-liu-ss: + comment: Samsung Research Center Beijing + names: + - {first: Yang, last: Liu} +yang-liu-tianjin: + comment: Tianjin University, China + names: + - {first: Yang, last: Liu} +yang-liu-umich: + comment: Univ. of Michigan, UC Santa Cruz + names: + - {first: Yang, last: Liu} +yang-liu-wl: + comment: Wilfrid Laurier University + names: + - {first: Yang, last: Liu} +yang-zhang-ustc: + comment: USTC + disable_name_matching: true + names: + - {first: Yang, last: Zhang} + orcid: 0000-0002-7863-5183 +yannick-marchand: + names: + - {first: Yannick, last: Marchand} + - {first: Y., last: Marchand} +yannick-mathieu: + names: + - {first: Yannick, last: Mathieu} + - {first: Yvette Yannick, last: Mathieu} + - {first: Yvette, last: Mathieu} +yao-ting-sung: + names: + - {first: Yao-Ting, last: Sung} + - {first: Yao-Ting, last: Hung} +yao-yao: + names: + - {first: Yao, last: Yao} +yao-yao-uwisc: + names: + - {first: Yao, last: Yao} +yao-zhong-zhang: + names: + - {first: Yao-Zhong, last: Zhang} + - {first: Yao Zhong, last: Zhang} + - {first: Yao-zhong, last: Zhang} + - {first: Y., last: Zhang} +yaosheng-yang: + names: + - {first: Yaosheng, last: Yang} + - {first: YaoSheng, last: Yang} +yaser-al-onaizan: + names: + - {first: Yaser, last: Al-Onaizan} + - {first: Yaser, last: Al-onaizan} +yau-tarng-juang: + names: + - {first: Yau-Tarng, last: Juang} + - {first: Yau-Tang, last: Juang} +yen-lu-chow: + names: + - {first: Yen-Lu, last: Chow} + - {first: Yen-lu, last: Chow} +yeon-su-lee: + names: + - {first: Yeon Su, last: Lee} + - {first: Yeon-Su, last: Lee} +yerai-doval: + names: + - {first: Yerai, last: Doval} + - {first: Yerai, last: Doval Mosquera} +yi-cheng-pan: + names: + - {first: Yi-Cheng, last: Pan} + - {first: Yi-cheng, last: Pan} +yi-jing-zhao: + names: + - {first: Yi-jing, last: Zhao} + - {first: Yi-Jing, last: Hao} +yi-rong-chen: + names: + - {first: Yi-Rong, last: Chen} + - {first: YiRong, last: Chen} + - {first: Yi-Rung, last: Chen} +yichun-chen: + names: + - {first: YiChun, last: Chen} + - {first: Yi-Chun, last: Chen} +yifan-peng-cmu: + comment: cmu + names: + - {first: Yifan, last: Peng} +yih-ru-wang: + names: + - {first: Yih-Ru, last: Wang} + - {first: Yih-ru, last: Wang} +ying-chieh-tu: + names: + - {first: Ying-Chieh, last: Tu} + - {first: Ying-chieh, last: Tu} +ying-mei-guo: + names: + - {first: Ying-Mei, last: Guo} + - {first: YingMei, last: Guo} +ying-zhang: + names: + - {first: Ying, last: Zhang} + - {first: Joy Ying, last: Zhang} +yingju-xia: + names: + - {first: Yingju, last: Xia} + - {first: Ying-Ju, last: Xia} + - {first: YingJu, last: Xia} +yixuan-tang-hkust: + comment: HKUST + disable_name_matching: true + names: + - {first: Yixuan, last: Tang} + orcid: 0009-0006-2405-2026 +yiyang-du-cmu: + comment: CMU + disable_name_matching: true + names: + - {first: Yiyang, last: Du} + orcid: 0009-0007-1949-9736 +yoan-gutierrez: + names: + - {first: Yoan, last: Gutiérrez} + - {first: Yoan, last: Gutiérrez Vázquez} +yoichi-yamashita: + names: + - {first: Yoichi, last: Yamashita} + - {first: Y., last: Yamashita} +yonael-gorfu: + names: + - {first: Yonael, last: Gorfu} + - {first: Y., last: Gorfu} +yong-hun-lee: + names: + - {first: Yong-Hun, last: Lee} + - {first: Yong-hun, last: Lee} +yongcheng-wang: + names: + - {first: YongCheng, last: Wang} + - {first: Yong-Cheng, last: Wang} + - {first: Yong Cheng, last: Wang} +yonglin-teng: + names: + - {first: Yonglin, last: Teng} + - {first: Yong-lin, last: Teng} +yongqi-li-hk: + comment: The Hong Kong Polytechnic University + names: + - {first: Yongqi, last: Li} +yongqi-li-wuhan: + comment: Wuhan University + names: + - {first: Yongqi, last: Li} +yoong-keok-lee: + names: + - {first: Yoong Keok, last: Lee} + - {first: Yoong, last: Keok Lee} +yorick-wilks: + names: + - {first: Yorick, last: Wilks} + - {first: Y., last: Wilks} +yoshi-suhara: + names: + - {first: Yoshi, last: Suhara} + - {first: Yoshihiko, last: Suhara} +yoshihide-kato: + names: + - {first: Yoshihide, last: Kato} + - {first: Yoshihide, last: Sato} +yoshihiko-hayashi: + names: + - {first: Yoshihiko, last: Hayashi} + - {first: Y., last: Hayashi} +yoshihiro-tomiyama: + names: + - {first: Yoshihiro, last: Tomiyama} + - {first: Y., last: Tomiyama} +yoshihiro-ueda: + names: + - {first: Yoshihiro, last: Ueda} + - {first: Y., last: Ueda} +you-shan-chung: + names: + - {first: You-Shan, last: Chung} + - {first: You-shan, last: Chung} +young-chol-song: + names: + - {first: Young Chol, last: Song} + - {first: Young C., last: Song} +young-gil-kim: + names: + - {first: Young-Gil, last: Kim} + - {first: Young-Kil, last: Kim} + - {first: Young Kil, last: Kim} + - {first: Young-Kill, last: Kim} + - {first: YoungKil, last: Kim} +younggyun-hahm: + names: + - {first: Younggyun, last: Hahm} + - {first: YoungGyun, last: Hahm} +yu-da-lai: + names: + - {first: Yu-da, last: Lai} + - {first: Yu-Da, last: Lai} +yu-hang-mao: + names: + - {first: Yu Hang, last: Mao} + - {first: Yu-Hang, last: Mao} + - {first: Yuhang, last: Mao} +yu-ling-una-hsu: + names: + - {first: Yu-Ling Una, last: Hsu} + - {first: Yu-Ling, last: Hsu} +yu-wei-chang: + names: + - {first: Yu-wei, last: Chang} + - {first: Yu-Wei, last: Chang} +yuang-chin-chiang: + names: + - {first: Yuang-Chin, last: Chiang} + - {first: Yuang-chin, last: Chiang} +yuanzhu-peter-chen: + names: + - {first: Yuanzhu Peter, last: Chen} + - {first: Peter, last: Chen} +yucel-saygin: + names: + - {first: Yucel, last: Saygin} + - {first: Yücel, last: Saygın} +yue-li-ecnu: + degree: East China Normal University + disable_name_matching: true + names: + - {first: Yue, last: Li} + orcid: 0009-0005-5509-2103 +yuen-hsien-tseng: + names: + - {first: Yuen-Hsien, last: Tseng} + - {first: Yuan-Hsien, last: Tseng} +yufang-sun: + names: + - {first: Yufang, last: Sun} + - {first: Yu-fang, last: Sun} +yuguang-duan: + names: + - {first: Yuguang, last: Duan} + - {first: Yu, last: Duan} +yuhao-wang-renmin: + comment: Renmin + disable_name_matching: true + names: + - {first: Yuhao, last: Wang} + orcid: 0009-0001-5760-9285 +yuji-matsumoto: + names: + - {first: Yuji, last: Matsumoto} + - {first: Yūji, last: Matsumoto} +yuka-takei: + names: + - {first: Yuka, last: Takei} + - {first: Yuya, last: Takei} +yuka-tateisi: + names: + - {first: Yuka, last: Tateisi} + - {first: Yuka, last: Tateishi} +yukihiro-itoh: + names: + - {first: Yukihiro, last: Itoh} + - {first: Yukihiro, last: Ito} +yukiko-i-nakano: + names: + - {first: Yukiko I., last: Nakano} + - {first: Yukiko, last: Nakano} +yun-cheng-ju: + names: + - {first: Yun-Cheng, last: Ju} + - {first: Yun Cheng, last: Ju} +yun-qian-qu: + names: + - {first: Yun-Qian, last: Qu} + - {first: Yunqian, last: Qu} +yung-taek-kim: + names: + - {first: Yung Taek, last: Kim} + - {first: Yung-Taek, last: Kim} +yuqing-guo: + names: + - {first: Yuqing, last: Guo} + - {first: Yuqing, last: Gao} +yurii-kuratov: + names: + - {first: Yurii, last: Kuratov} + - {first: Yuri, last: Kuratov} +zachary-c-lipton: + names: + - {first: Zachary C., last: Lipton} + - {first: Zachary, last: Lipton} +zaharin-yusoff: + names: + - {first: Zaharin, last: Yusoff} + - {first: Y., last: Zaharin} +zahurul-islam: + names: + - {first: Zahurul, last: Islam} + - {first: Zahrul, last: Islam} +zarah-weiss: + names: + - {first: Zarah, last: Weiss} + - {first: Zarah, last: Weiß} +zdenek-zabokrtsky: + names: + - {first: Zdeněk, last: Žabokrtský} + - {first: Zdenek, last: Zabokrtsky} + - {first: Zdenĕk, last: Žabokrtský} + - {first: Zdenek, last: Žabokrtsky} +zdenka-uresova: + names: + - {first: Zdenka, last: Uresova} + - {first: Zdeňka, last: Urešová} +zdravko-kacic: + names: + - {first: Zdravko, last: Kačič} + - {first: Zdravko, last: Kacic} +ze-yu-zheng: + names: + - {first: Ze-yu, last: Zheng} + - {first: Zeyu, last: Zheng} +zeerak-talat: + names: + - {first: Zeerak, last: Talat} + - {first: Zeerak, last: Waseem} +zeljko-agic: + names: + - {first: Željko, last: Agić} + - {first: Zeljko, last: Agic} +zeynep-orhan: + names: + - {first: Zeynep, last: Orhan} + - {first: Orhan, last: Zeynep} +zhao-ming-gao: + names: + - {first: Zhao Ming, last: Gao} + - {first: Zhao-Ming, last: Gao} + - {first: Zhao-ming, last: Gao} +zhaoyan-ming: + names: + - {first: Zhaoyan, last: Ming} + - {first: Zhao-Yan, last: Ming} +zheng-ping-jiang: + names: + - {first: Zheng Ping, last: Jiang} + - {first: Zhengping, last: Jiang} +zheng-yu-niu: + names: + - {first: Zheng-Yu, last: Niu} + - {first: Zheng Yu, last: Niu} + - {first: Zhengyu, last: Niu} +zheng-yuan-cambridge: + comment: Cambridge + disable_name_matching: true + names: + - {first: Zheng, last: Yuan} + orcid: 0000-0003-2406-1708 +zhengxian-gong: + names: + - {first: Zhengxian, last: Gong} + - {first: ZhengXian, last: Gong} +zhengyan-shi: + degree: University College London + names: + - {first: Zhengyan, last: Shi} + - {first: Zhengxiang, last: Shi} + orcid: 0000-0003-3074-3035 +zhi-hong-deng: + names: + - {first: Zhi-Hong, last: Deng} + - {first: Zhihong, last: Deng} +zhi-min-zhou: + names: + - {first: Zhi Min, last: Zhou} + - {first: Zhi-Min, last: Zhou} +zhicheng-guo-tsinghua: + comment: Tsinghua + names: + - {first: Zhicheng, last: Guo} +zhicheng-guo-xidian: + comment: xidian + names: + - {first: Zhicheng, last: Guo} +zhihan-zhang-smu: + degree: Singapore Management University + disable_name_matching: true + names: + - {first: Zhihan, last: Zhang} + orcid: 0009-0009-5813-9172 +zhihao-wang-xu: + degree: Xiamen University + disable_name_matching: true + names: + - {first: Zhihao, last: Wang} + orcid: 0009-0008-7497-6467 +zhihao-zhang-soochow: + comment: Soochow + disable_name_matching: true + names: + - {first: Zhihao, last: Zhang} + orcid: 0000-0001-9283-101X +zhiming-xu: + names: + - {first: Zhiming, last: Xu} + - {first: Zhi-Ming, last: Xu} +zhiyu-chen-lehigh: + comment: Lehigh University + disable_name_matching: true + names: + - {first: Zhiyu, last: Chen} + orcid: 0000-0002-3096-7912 +zihao-li-helsinki: + comment: Helsinki + disable_name_matching: true + names: + - {first: Zihao, last: Li} + orcid: 0009-0008-9329-5341 +ziortza-polin: + names: + - {first: Ziortza, last: Polin} + - {first: Z., last: Polin} +zoltan-alexin: + names: + - {first: Zoltán, last: Alexin} + - {first: Z., last: Alexin} +zoya-m-shalyapina: + names: + - {first: Zoya M., last: Shalyapina} + - {first: Zoyn M., last: Shalyapina} + - {first: Z.M., last: Shalyapina} + - {first: Z. M., last: Shalyapina} +zuzana-fraterova: + names: + - {first: Zuzana, last: Fraterova} + - {first: Zuzana, last: Fráterová} diff --git a/hugo/content/info/verification.md b/hugo/content/info/verification.md new file mode 100644 index 0000000000..b934c2c152 --- /dev/null +++ b/hugo/content/info/verification.md @@ -0,0 +1,11 @@ +--- +Title: Verified authors +linktitle: Verification +subtitle: How the ACL Anthology verifies authors +date: "2025-09-19" +--- +The ACL Anthology distinguishes between verified and unverified people. + +A _verified_ person is one for whom we have an explicit entry in our names database. +This can happen in two ways: manual verification, or automatically via the provision +of an ORCID iD. diff --git a/hugo/content/people/_content.gotmpl b/hugo/content/people/_content.gotmpl index 8b3bc09fe8..d3eb6da03e 100644 --- a/hugo/content/people/_content.gotmpl +++ b/hugo/content/people/_content.gotmpl @@ -2,7 +2,7 @@ {{ $page := dict "kind" "page" "path" $person_id - "slug" $person_id + "slug" (index (split $person_id "/") -1) "params" (dict "name" $person_id "lastname" $person.last) "title" $person.full }} diff --git a/hugo/layouts/_default/baseof.html b/hugo/layouts/_default/baseof.html index 61777eea19..bf0a9eb7c6 100644 --- a/hugo/layouts/_default/baseof.html +++ b/hugo/layouts/_default/baseof.html @@ -22,7 +22,7 @@ {{ $sass_options := (dict "includePaths" (slice "assets/css" "assets/css/vendor/bootstrap/scss")) }} {{ $style := resources.Get "css/main.scss" | toCSS $sass_options | minify | fingerprint }} - + {{ block "meta" . }}{{ end }} diff --git a/hugo/layouts/people/single.html b/hugo/layouts/people/single.html index a4348e8996..5a5e7c10db 100644 --- a/hugo/layouts/people/single.html +++ b/hugo/layouts/people/single.html @@ -4,6 +4,15 @@

{{ $person.first }} {{ $person.last }} + {{ with $person.orcid }} + + + + {{ else }} + + + + {{ end }}

{{ with $person.comment }}

{{.}}

diff --git a/hugo/static/.htaccess b/hugo/static/.htaccess index 6f95b2e073..ed7455ab09 100644 --- a/hugo/static/.htaccess +++ b/hugo/static/.htaccess @@ -81,6 +81,10 @@ RewriteRule ^thumb/(.*)$ anthology-files/thumb/$1 [L,NC] # since the pattern-match can't match source side. RewriteRule ^people/[a-z]/([\-a-z0-9]+)/?$ people/$1/ [R=301,L,NC] +# If the requested author page does not exist, soft-redirect [303 See Other] to the unverified/ URL +RewriteCond %{REQUEST_FILENAME} !-d +RewriteRule ^people/([^/]+)/?$ people/unverified/$1/ [L,R=303] + # Videos ## match old-style URLs, e.g., /N13-1001.mp4 -> anthology-files/videos/N/N13-1001.mp4 ## also supports videos split into pieces, e.g., /N13-4001.1.mp4 @@ -101,4 +105,4 @@ RewriteRule \.copyright\.pdf$ - [R=404,L] Options +ExecCGI AddHandler cgi-script .cgi RewriteRule ^(\d{4}\.[a-zA-Z\d]+-[a-zA-Z\d]+\.[a-zA-Z\d]+?)(?:v\d+)?\.(bib|xml|endf)$ /ANTHOLOGYDIR/cgi-bin/extract_citation.cgi?anthology_id=$1&format=$2 [L,NC] -RewriteRule ^([A-Za-z]\d{2}\-\d{4})(?:v\d+)?\.(bib|xml|endf)$ /ANTHOLOGYDIR/cgi-bin/extract_citation.cgi?anthology_id=$1&format=$2 [L,NC] \ No newline at end of file +RewriteRule ^([A-Za-z]\d{2}\-\d{4})(?:v\d+)?\.(bib|xml|endf)$ /ANTHOLOGYDIR/cgi-bin/extract_citation.cgi?anthology_id=$1&format=$2 [L,NC] diff --git a/hugo/static/images/orcid_16x16.gif.webp b/hugo/static/images/orcid_16x16.gif.webp new file mode 100644 index 0000000000..281082d57f Binary files /dev/null and b/hugo/static/images/orcid_16x16.gif.webp differ diff --git a/python/CHANGELOG.md b/python/CHANGELOG.md index ff2aadaa04..8bcb077605 100644 --- a/python/CHANGELOG.md +++ b/python/CHANGELOG.md @@ -1,5 +1,34 @@ # Changelog +## [Unreleased] + +This release implements the new [name resolution and author ID logic](https://github.com/acl-org/acl-anthology/wiki/Author-Page-Plan), and is therefore fundamentally incompatible with ACL Anthology data before the switch to this new system. + +### Added + +- NameSpecification now provides an `orcid` field. +- Person: + - Now provides `orcid`, `degree`, `disable_name_matching`, and `similar_ids` fields that correspond to the respective fields in the new `people.yaml`. + - Changing `id`, `orcid`, `names`, or using `add_name()` or `remove_names()` will now automatically update the PersonIndex. + - Added `update_id()` that updates a person's ID on all of their connected papers. + - Added `make_explicit()` that makes all necessary changes to change an implicit ("unverified/") to an explicit Person. +- PersonIndex: + - Now also indexes Person objects by ORCID, and provides `by_orcid` and `get_by_orcid()`. + - Now also keeps a mapping of name slugs to (verified) person IDs, via `slugs_to_verified_ids` (mostly for internal use). + - Added `ingest_namespec()` to implement the [matching logic on ingestion](https://github.com/acl-org/acl-anthology/wiki/Author-Page-Plan#ingestion) of new volumes. + - Added `create_person()` to instantiate a new Person and add it to the index. + +### Changed + +- Several breaking changes to PersonIndex for the new author ID system: + - Loading the index now expects a `people.yaml` file instead of `name_variants.yaml`. + - Renamed `get_or_create_person()` to `resolve_namespec()` and refactored it to reflect the [new name resolution logic](https://github.com/acl-org/acl-anthology/wiki/Author-Page-Plan#proposed-name-resolution-logic). + - Renamed `name_to_ids` to `by_name`, in line with the new `by_orcid` field. + - Changed the type of exceptions that can be raised; `AmbiguousNameError` was replaced by `NameSpecResolutionError` and `PersonDefinitionError`. + - Changed the previously experimental `save()` function to serialize the `people.yaml` file. +- Person now stores names as tuples of `(Name, NameLink)`, the latter of which indicates if the name was explicitly defined in `people.yaml` or inferred by the name resolution logic (e.g. via slug matching). As a consequence, `Person.names` can no longer be modified in-place; use `Person.add_name()`, `Person.remove_name()`, or the setter of `Person.names`. +- Setting a canonical name for a Person changed from `.set_canonical_name()` to `Person.canonical_name = ...` + ## [0.5.3] — 2025-06-22 This release adds more functionality for ingesting new proceedings and modifying existing data. diff --git a/python/acl_anthology/collections/collection.py b/python/acl_anthology/collections/collection.py index cd868a3ae7..29615e78f7 100644 --- a/python/acl_anthology/collections/collection.py +++ b/python/acl_anthology/collections/collection.py @@ -193,8 +193,10 @@ def create_volume( ) volume.is_data_loaded = True - # For convenience, if editors were given, we add them to the index here + # If editors were given, we fill in their ID & add them to the index if volume.editors: + for namespec in volume.editors: + self.root.people.ingest_namespec(namespec) self.root.people._add_to_index(volume.editors, volume.full_id_tuple) self.data[id] = volume @@ -302,7 +304,7 @@ def save(self, path: Optional[StrPath] = None, minimal_diff: bool = True) -> Non minimal_diff: If True (default), will compare against an existing XML file in `self.path` to minimize the difference, i.e., to prevent noise from changes in the XML that make no semantic difference. See [`utils.xml.ensure_minimal_diff`][acl_anthology.utils.xml.ensure_minimal_diff] for details. """ if path is None: - path = self.path + path = self.path # pragma: no cover collection = etree.Element("collection", {"id": self.id}) for volume in self.volumes(): collection.append(volume.to_xml(with_papers=True)) diff --git a/python/acl_anthology/collections/paper.py b/python/acl_anthology/collections/paper.py index b48807fae9..b95e04896a 100644 --- a/python/acl_anthology/collections/paper.py +++ b/python/acl_anthology/collections/paper.py @@ -252,7 +252,7 @@ class Paper: type: The paper's type, currently used to mark frontmatter and backmatter. """ - id: str = field(converter=int_to_str) + id: str = field(converter=int_to_str) # validator defined below parent: Volume = field(repr=False, eq=False) bibkey: str = field( on_setattr=attrs.setters.pipe(attrs.setters.validate, _update_bibkey_index), diff --git a/python/acl_anthology/collections/volume.py b/python/acl_anthology/collections/volume.py index ee6dba7aeb..07d3857d5b 100644 --- a/python/acl_anthology/collections/volume.py +++ b/python/acl_anthology/collections/volume.py @@ -74,7 +74,7 @@ class Volume(SlottedDict[Paper]): shorttitle: A shortened form of the title. (Aliased to `shortbooktitle` for initialization.) """ - id: str = field(converter=int_to_str) + id: str = field(converter=int_to_str) # validator defined below parent: Collection = field(repr=False, eq=False) type: VolumeType = field(repr=False, converter=VolumeType) title: MarkupText = field(alias="booktitle") @@ -276,10 +276,14 @@ def create_paper( # Necessary because on_setattr is not called during initialization: paper.bibkey = bibkey # triggers bibkey generating (if necessary) & indexing - # For convenience, if authors/editors were given, we add them to the index here + # If authors/editors were given, we fill in their ID & add them to the index if paper.authors: + for namespec in paper.authors: + self.root.people.ingest_namespec(namespec) self.root.people._add_to_index(paper.authors, paper.full_id_tuple) if paper.editors: + for namespec in paper.editors: + self.root.people.ingest_namespec(namespec) self.root.people._add_to_index(paper.editors, paper.full_id_tuple) self.data[id] = paper diff --git a/python/acl_anthology/exceptions.py b/python/acl_anthology/exceptions.py index e091a51f9f..008b2d1e33 100644 --- a/python/acl_anthology/exceptions.py +++ b/python/acl_anthology/exceptions.py @@ -18,7 +18,7 @@ from typing import TYPE_CHECKING if TYPE_CHECKING: - from .people import Name, NameSpecification + from .people import NameSpecification from .utils.ids import AnthologyIDTuple if sys.version_info >= (3, 11): @@ -40,19 +40,6 @@ def add_note(self, note: str) -> None: self.__notes__.append(note) -class AmbiguousNameError(AnthologyException): - """Raised when an ambiguous name would need an explicit and unique ID, but does not have one. - - Attributes: - name (Name): The name that raised the error. - """ - - def __init__(self, name: Name, message: str) -> None: - super().__init__(message) - self.name = name - self.add_note("Did you forget to add an explicit/unique ID to this name?") - - class AnthologyDuplicateIDError(AnthologyException, ValueError): """Raised when trying to set an ID or create an item with an ID that already exists. @@ -91,10 +78,10 @@ def __init__(self, parent: AnthologyIDTuple, tag: str, message: str) -> None: self.tag = tag -class NameIDUndefinedError(AnthologyException): - """Raised when an author ID was requested that is not defined. +class NameSpecResolutionError(AnthologyException): + """Raised when a NameSpecification cannot be resolved to a person. - This can happen when an `` or `` was used with an ID which was not defined in `name_variants.yaml`, or when trying to look up a NameSpecification that does not correspond to any Person in the PersonIndex. + This should never happen with a NameSpecification from the loaded Anthology data, but might happen if a NameSpecification is manually created. Attributes: name_spec (NameSpecification): The name specification that raised the error. @@ -105,6 +92,18 @@ def __init__(self, name_spec: NameSpecification, message: str) -> None: self.name_spec = name_spec +class PersonDefinitionError(NameSpecResolutionError): + """Raised when a NameSpecification defines an ID, but either the ID or one of its fields is not compatible with the definition in `people.yaml`. + + This can happen when an `` or `` is used with an ID which was not defined in `people.yaml`; when the name used together with this ID was not listed among the possible names in `people.yaml`; or when the ORCID used together with this ID does not match the ORCID defined in `people.yaml`. + + Attributes: + name_spec (NameSpecification): The name specification that raised the error. + """ + + pass + + class SchemaMismatchWarning(UserWarning): """Raised when the data directory contains a different XML schema as this library. @@ -118,4 +117,4 @@ def __init__(self) -> None: super().__init__( "Data directory contains a different schema.rnc as this library; " "you might need to update the data or the acl-anthology library." - ) + ) # pragma: no cover diff --git a/python/acl_anthology/people/__init__.py b/python/acl_anthology/people/__init__.py index 49063fa6de..99053f9a78 100644 --- a/python/acl_anthology/people/__init__.py +++ b/python/acl_anthology/people/__init__.py @@ -13,7 +13,14 @@ # limitations under the License. from .name import Name, NameSpecification, ConvertableIntoName -from .person import Person +from .person import Person, NameLink from .index import PersonIndex -__all__ = ["ConvertableIntoName", "Name", "NameSpecification", "Person", "PersonIndex"] +__all__ = [ + "ConvertableIntoName", + "Name", + "NameLink", + "NameSpecification", + "Person", + "PersonIndex", +] diff --git a/python/acl_anthology/people/index.py b/python/acl_anthology/people/index.py index 9a3df2423a..980909d464 100644 --- a/python/acl_anthology/people/index.py +++ b/python/acl_anthology/people/index.py @@ -14,7 +14,7 @@ from __future__ import annotations -from attrs import define, field, asdict +from attrs import define, field from collections.abc import Iterable from collections import Counter, defaultdict import itertools as it @@ -22,7 +22,7 @@ from rich.progress import track from scipy.cluster.hierarchy import DisjointSet # type: ignore import sys -from typing import cast, Any, TYPE_CHECKING +from typing import cast, Any, Optional, TYPE_CHECKING import yaml try: @@ -31,10 +31,16 @@ from yaml import Loader, Dumper # type: ignore from ..containers import SlottedDict -from ..exceptions import AnthologyException, AmbiguousNameError, NameIDUndefinedError -from ..utils.ids import AnthologyIDTuple +from ..exceptions import ( + AnthologyException, + AnthologyInvalidIDError, + NameSpecResolutionError, + PersonDefinitionError, +) +from ..utils.ids import AnthologyIDTuple, is_verified_person_id from ..utils.logging import get_logger -from . import Person, Name, NameSpecification +from . import Person, Name, NameLink, NameSpecification +from .name import _YAMLName if TYPE_CHECKING: from _typeshed import StrPath @@ -42,7 +48,7 @@ from ..collections import Paper, Volume log = get_logger() -VARIANTS_FILE = "yaml/name_variants.yaml" +PEOPLE_INDEX_FILE = "yaml/people.yaml" @define @@ -62,19 +68,55 @@ class PersonIndex(SlottedDict[Person]): Attributes: parent: The parent Anthology instance to which this index belongs. verbose: If False, will not show progress bar when building the index from scratch. - name_to_ids: A mapping of [Name][acl_anthology.people.name.Name] instances to person IDs. + path: The path to `people.yaml`. + by_orcid: A mapping of ORCIDs (as strings) to person IDs. + by_name: A mapping of [Name][acl_anthology.people.name.Name] instances to lists of person IDs. + slugs_to_verified_ids: A mapping of strings (representing slugified names) to lists of person IDs. similar: A [disjoint-set structure][scipy.cluster.hierarchy.DisjointSet] of persons with similar names. is_data_loaded: A flag indicating whether the index has been constructed. """ parent: Anthology = field(repr=False, eq=False) verbose: bool = field(default=True) - name_to_ids: dict[Name, list[str]] = field( + path: Path = field(init=False) + _by_orcid: dict[str, str] = field(init=False, repr=False, default={}) + _by_name: dict[Name, list[str]] = field( init=False, repr=False, factory=lambda: defaultdict(list) ) - similar: DisjointSet = field(init=False, repr=False, factory=DisjointSet) + _slugs_to_verified_ids: dict[str, set[str]] = field( + init=False, repr=False, factory=lambda: defaultdict(list) + ) + _similar: DisjointSet = field(init=False, repr=False, factory=DisjointSet) is_data_loaded: bool = field(init=False, repr=True, default=False) + @path.default + def _path(self) -> Path: + return self.parent.datadir / Path(PEOPLE_INDEX_FILE) + + @property + def by_orcid(self) -> dict[str, str]: + if not self.is_data_loaded: + self.load() + return self._by_orcid + + @property + def by_name(self) -> dict[Name, list[str]]: + if not self.is_data_loaded: + self.load() + return self._by_name + + @property + def similar(self) -> DisjointSet: + if not self.is_data_loaded: + self.load() + return self._similar + + @property + def slugs_to_verified_ids(self) -> dict[str, set[str]]: + if not self.is_data_loaded: + self.load() + return self._slugs_to_verified_ids + def get_by_name(self, name: Name) -> list[Person]: """Access persons by their name. @@ -86,12 +128,12 @@ def get_by_name(self, name: Name) -> list[Person]: """ if not self.is_data_loaded: self.load() - return [self.data[pid] for pid in self.name_to_ids[name]] + return [self.data[pid] for pid in self._by_name[name]] def get_by_namespec(self, name_spec: NameSpecification) -> Person: """Access persons by their name specification. - See [get_or_create_person()][acl_anthology.people.index.PersonIndex.get_or_create_person] for exceptions that can be raised by this function. + See [resolve_namespec()][acl_anthology.people.index.PersonIndex.resolve_namespec] for exceptions that can be raised by this function. Parameters: name_spec: A name specification. @@ -101,7 +143,22 @@ def get_by_namespec(self, name_spec: NameSpecification) -> Person: """ if not self.is_data_loaded: self.load() - return self.get_or_create_person(name_spec, create=False) + return self.resolve_namespec(name_spec) + + def get_by_orcid(self, orcid: str) -> Person | None: + """Access persons by their ORCID. + + Parameters: + orcid: A string representing an ORCID. + + Returns: + The person with that ORCID, if it exists, otherwise None. + """ + if not self.is_data_loaded: + self.load() + if orcid in self._by_orcid: + return self.data[self._by_orcid[orcid]] + return None def find_coauthors( self, person: str | Person, include_volumes: bool = True @@ -143,13 +200,9 @@ def find_coauthors_counter( and not cast("Volume", item).has_frontmatter ): continue - coauthors.update( - self.get_or_create_person(ns, create=False).id for ns in item.editors - ) + coauthors.update(self.resolve_namespec(ns).id for ns in item.editors) if hasattr(item, "authors"): - coauthors.update( - self.get_or_create_person(ns, create=False).id for ns in item.authors - ) + coauthors.update(self.resolve_namespec(ns).id for ns in item.authors) del coauthors[person.id] return coauthors @@ -164,8 +217,10 @@ def load(self) -> None: def reset(self) -> None: """Resets the index.""" self.data = {} - self.name_to_ids = defaultdict(list) - self.similar = DisjointSet() + self._by_orcid = {} + self._by_name = defaultdict(list) + self._slugs_to_verified_ids = defaultdict(set) + self._similar = DisjointSet() self.is_data_loaded = False def build(self, show_progress: bool = False) -> None: @@ -175,8 +230,7 @@ def build(self, show_progress: bool = False) -> None: Exceptions raised during the index creation are sent to the logger, and only a generic exception is raised at the end. """ self.reset() - # Load variant list, so IDs defined there are added first - self._load_variant_list() + self._load_people_index() # Go through every single volume/paper and add authors/editors iterator = track( self.parent.collections.values(), @@ -190,7 +244,7 @@ def build(self, show_progress: bool = False) -> None: context: Paper | Volume = volume try: for name_spec in volume.editors: - person = self.get_or_create_person(name_spec) + person = self.resolve_namespec(name_spec, allow_creation=True) person.item_ids.append(volume.full_id_tuple) for paper in volume.papers(): context = paper @@ -202,9 +256,9 @@ def build(self, show_progress: bool = False) -> None: else paper.get_editors() ) for name_spec in name_specs: - person = self.get_or_create_person(name_spec) + person = self.resolve_namespec(name_spec, allow_creation=True) person.item_ids.append(paper.full_id_tuple) - except Exception as exc: + except Exception as exc: # pragma: no cover note = f"Raised in {context.__class__.__name__} {context.full_id}; {name_spec}" # If this is merged into a single if-statement (with "or"), # the type checker complains ¯\_(ツ)_/¯ @@ -217,107 +271,312 @@ def build(self, show_progress: bool = False) -> None: if raised_exception: raise Exception( "An exception was raised while building PersonIndex; check the logger for details." - ) + ) # pragma: no cover self.is_data_loaded = True + def _load_people_index(self) -> None: + """Load and parse the `people.yaml` file. + + Raises: + AnthologyInvalidIDError: If `people.yaml` contains a malformed person ID; or if a person is listed without any names. + """ + merge_list: list[tuple[str, str]] = [] + + with open(self.path, "r", encoding="utf-8") as f: + data = yaml.load(f, Loader=Loader) + + for pid, entry in data.items(): + if not is_verified_person_id(pid): + raise AnthologyInvalidIDError( + pid, f"Invalid person ID in people.yaml: {pid}" + ) # pragma: no cover + self.add_person( + Person( + id=pid, + parent=self.parent, + names=[Name.from_dict(n) for n in entry.pop("names")], + orcid=entry.pop("orcid", None), + comment=entry.pop("comment", None), + degree=entry.pop("degree", None), + similar_ids=entry.get("similar", []), + disable_name_matching=entry.pop("disable_name_matching", False), + is_explicit=True, + ) + ) + for similar_id in entry.pop("similar", []): + merge_list.append((pid, similar_id)) + + # Check for unprocessed keys to catch errors + if entry: + log.warning( + f"people.yaml: entry '{pid}' has unknown keys: {entry.keys()}" + ) # pragma: no cover + + # Process IDs with similar names + for pid_set in self._slugs_to_verified_ids.values(): + pid_list = list(pid_set) + for pid in pid_list[1:]: + self._similar.merge(pid_list[0], pid) + for a, b in merge_list: + self._similar.merge(a, b) + def add_person(self, person: Person) -> None: """Add a new person to the index. Parameters: person: The person to add, which should not exist in the index yet. + + Raises: + AnthologyInvalidIDError: If a person with the same ID or ORCID already exists in the index. """ if (pid := person.id) in self.data: - raise KeyError(f"A Person with ID '{pid}' already exists in the index") + raise AnthologyInvalidIDError( + pid, f"A Person with ID '{pid}' already exists in the index" + ) self.data[pid] = person - self.similar.add(pid) + self._similar.add(pid) + if person.orcid is not None: + if person.orcid in self._by_orcid: + raise ValueError( + f"ORCID '{person.orcid}' already assigned to person '{self._by_orcid[person.orcid]}'" + ) + self._by_orcid[person.orcid] = pid + for name in person.names: + self._by_name[name].append(pid) + if is_verified_person_id(pid): + self._slugs_to_verified_ids[name.slugify()].add(pid) + + def create_person( + self, + id: str, + names: list[Name], + **kwargs: Any, + ) -> Person: + """Create a new explicit person and add it to the index. + + Parameters: + id: The ID of the new person. + names: A list of names for the new person; must contain at least one. + **kwargs: Any valid list or optional attribute of [Person][acl_anthology.people.person.Person]. + + Returns: + The created [Person][acl_anthology.people.person.Person] object. + + Raises: + AnthologyInvalidIDError: If a person with the given ID already exists, or the ID is not a well-formed verified-person ID. + ValueError: If the list of names is empty. + """ + if not self.is_data_loaded: + self.load() + if id in self.data: + raise AnthologyInvalidIDError( + id, f"A Person with ID '{id}' already exists in the index" + ) + if not is_verified_person_id(id): + raise AnthologyInvalidIDError(id, f"Not a valid verified-person ID: {id}") + if not names: + raise ValueError("List of names cannot be empty") + + kwargs["parent"] = self.parent + kwargs["is_explicit"] = True + + person = Person(id=id, names=names, **kwargs) + self.add_person(person) + return person + + def _update_id(self, old_id: str, new_id: str) -> None: + """Update a person ID in the index. + + Will change all indices to remove the old ID and replace it with the new one. Will be called automatically from Person; do not call manually. + + Parameters: + old_id: A person ID that already exists in the index. + new_id: The new person ID it should be changed to, which mustn't exist in the index. + """ + if not self.is_data_loaded: + return + person = self.data.pop(old_id) + self.data[new_id] = person + # Note: cannot remove from DisjointSet + self._similar.add(new_id) + self._similar.merge(old_id, new_id) + if person.orcid is not None: + self._by_orcid[person.orcid] = new_id for name in person.names: - self.name_to_ids[name].append(pid) + self._remove_name(old_id, name) + self._add_name(new_id, name) + + def _update_orcid(self, pid: str, old: Optional[str], new: Optional[str]) -> None: + """Update a person's ORCID in the index. + + Will be called automatically from Person; do not call manually. + """ + if not self.is_data_loaded: + return + if old is not None and old in self._by_orcid: + del self._by_orcid[old] + if new is not None: + self._by_orcid[new] = pid + + def _add_name(self, pid: str, name: Name) -> None: + """Add a name for a person to the index. + + Will be called automatically from Person; do not call manually. + """ + if not self.is_data_loaded: + return + self._by_name[name].append(pid) + if is_verified_person_id(pid): + self._slugs_to_verified_ids[name.slugify()].add(pid) + + def _remove_name(self, pid: str, name: Name) -> None: + """Remove a name for a person from the index. + + Will be called automatically from Person; do not call manually. + """ + if not self.is_data_loaded: + return + try: + self._by_name[name].remove(pid) + if is_verified_person_id(pid): + self._slugs_to_verified_ids[name.slugify()].remove(pid) + except KeyError: + pass + + def ingest_namespec(self, name_spec: NameSpecification) -> NameSpecification: + """Update a name specification for ingestion, potentially filling in the ID field. - def get_or_create_person( - self, name_spec: NameSpecification, create: bool = True + If the name specification contains an ORCID but doesn't have an ID yet, this will find the person with this ORCID and fill in their ID; if it doesn't exist yet, it will create a new person with a "verified" ID and fill in the new, generated ID. The supplied name specification will be modified in-place, but also returned. + + Parameters: + name_spec: The name specification on the paper, volume, etc. + + Returns: + The name specification as it should be used for the new ingestion material. + """ + if name_spec.orcid is None or name_spec.id is not None: + return name_spec + + if (person := self.get_by_orcid(name_spec.orcid)) is not None: + name_spec.id = person.id + # Make sure the name used here is listed for this person + person.add_name(name_spec.name) + else: + # Need to create a new person; generate name slug for the ID + pid = name_spec.name.slugify() + if pid in self.data: + # ID is already in use; add last four digits of ORCID to disambiguate + pid = f"{pid}-{name_spec.orcid[-4:]}" + + self.add_person( + Person( + id=pid, + parent=self.parent, + names=[name_spec.name] + name_spec.variants, + orcid=name_spec.orcid, + is_explicit=True, + ) + ) + name_spec.id = pid + + return name_spec + + def resolve_namespec( + self, name_spec: NameSpecification, allow_creation: bool = False ) -> Person: - """Get the person represented by a name specification, or create a new one if needed. + """Resolve a name specification to a person, potentially creating a new unverified person instance. Parameters: name_spec: The name specification on the paper, volume, etc. - create: If False, will not create a new Person object, but instead raise `NameIDUndefinedError` if no person matching `name_spec` exists. Defaults to True. + allow_creation: If True, will instantiate a new Person object with an unverified ID if no person matching `name_spec` exists. Defaults to False. Returns: - The person represented by `name_spec`. This will try to use the `id` attribute if it is set, look up the name in the index otherwise, or try to find a matching person by way of an ID clash. If all of these fail, it will create a new person and return that. + The person represented by `name_spec`. If `name_spec.id` is set, this will determine the person to resolve to. Otherwise, the slugified name will be used to find a matching person; an explicitly-defined (verified) person can be returned if exactly one such person exists and does not have `disable_name_matching` set. In all other cases, it will resolve to an unverified person. Raises: - AmbiguousNameError: If there are multiple known IDs for the given name, but there is no explicit `id` attribute. - NameIDUndefinedError: If there is an explicit `id` attribute, but the ID has not been defined. + NameSpecResolutionError: If `name_spec` cannot be resolved to a Person and `allow_creation` is False. + PersonDefinitionError: If `name_spec.id` is set, but either the ID or the name used with the ID has not been defined in `people.yaml`. (Inherits from NameSpecResolutionError) """ name = name_spec.name if (pid := name_spec.id) is not None: - # Explicit ID given; should already exist from name_variants.yaml - person = self.data.get(pid) - if person is None or not person.is_explicit: - exc1 = NameIDUndefinedError( - name_spec, f"Name '{name}' used with ID '{pid}' that doesn't exist" + # Explicit ID given – should be explicitly defined in people.yaml + if pid not in self.data or not (person := self.data[pid]).is_explicit: + raise PersonDefinitionError( + name_spec, f"ID '{pid}' wasn't defined in people.yaml" ) - exc1.add_note("Did you forget to define the ID in name_variants.yaml?") - raise exc1 - person.add_name(name) - elif pid_list := self.name_to_ids[name]: - # Name already exists in the index, but has no explicit ID - if len(pid_list) > 1: - exc2 = AmbiguousNameError( - name, - f"Name '{name.as_first_last()}' is ambiguous, but was used without an ID", + if not person.has_name(name): + raise PersonDefinitionError( + name_spec, + f"ID '{pid}' was used with name '{name}' that wasn't defined in people.yaml", + ) + if name_spec.orcid is not None and name_spec.orcid != person.orcid: + raise PersonDefinitionError( + name_spec, + f"ID '{pid}' was used with ORCID '{name_spec.orcid}', but people.yaml has '{person.orcid}'", ) - exc2.add_note(f"Known IDs are: {', '.join(pid_list)}") - raise exc2 - pid = pid_list[0] - person = self.data[pid] else: - # Name not in the index and has no explicit ID - pid = self.generate_id(name) - try: - # If the auto-generated ID already exists, we assume it's the same person - person = self.data[pid] - # If the name scores higher than the current canonical one, we - # also assume we should set this as the canonical one - if (not person.is_explicit) and ( - name.score() > person.canonical_name.score() - ): - person.set_canonical_name(name) - else: - person.add_name(name) - self.name_to_ids[name].append(pid) - except KeyError: - if create: - # If the auto-generated ID doesn't exist yet, then and only - # then do we create a new person - person = Person(id=pid, parent=self.parent, names=[name]) + # No explicit ID given + if name_spec.orcid is not None: + exc1 = NameSpecResolutionError( + name_spec, + "NameSpecification defines an ORCID without an ID", + ) + exc1.add_note( + "To specify an ORCID on a paper, the person needs to have an entry in `people.yaml` and be used with an explicit ID." + ) + raise exc1 + + # Generate slug for name matching + slug = name.slugify() + + # Check if the slugified name matches any verified IDs + matching_ids = list(self._slugs_to_verified_ids.get(slug, [])) + if ( + len(matching_ids) == 1 + and not (person := self.data[matching_ids[0]]).disable_name_matching + ): + # Slug unambiguously maps to person and name matching not disabled + pid = person.id + if not person.has_name(name): + person.add_name(name, inferred=True) + self._by_name[name].append(pid) + + else: + # Resolve to unverified ID + pid = f"unverified/{slug}" + + if pid in self.data: + # Unverified ID already exists; assume it's the same person + person = self.data[pid] + if not person.has_name(name): + # If the name scores higher than the current canonical + # one, we also assume we should set this as the + # canonical one + if name.score() > person.canonical_name.score(): + person._set_canonical_name(name, inferred=True) + else: + person.add_name(name, inferred=True) + self._by_name[name].append(pid) + elif allow_creation: + # Unverified ID doesn't exist yet; create it + person = Person( + id=pid, parent=self.parent, names=[(name, NameLink.INFERRED)] + ) self.add_person(person) else: - raise NameIDUndefinedError( + raise NameSpecResolutionError( name_spec, - f"Name '{name}' generated ID '{pid}' that doesn't exist", + f"NameSpecification resolved to ID '{pid}' which doesn't exist", ) + # Make sure that name variants specified here are registered for name in name_spec.variants: - person.add_name(name) - if name not in self.name_to_ids: - self.name_to_ids[name].append(pid) + if not person.has_name(name): + person.add_name(name, inferred=True) + if name not in self._by_name: + self._by_name[name].append(pid) return person - @staticmethod - def generate_id(name: Name) -> str: - """Generates and returns an ID from the given name. - - Warning: - This **intentionally doesn't guarantee uniqueness** of the generated ID. - If two names generate identical IDs with this method, we assume they - refer to the same person. This happens e.g. when there are missing - accents in one version, or when we have an inconsistent first/last split - for multiword names. These cases have in practice always referred to - the same person. - """ - return name.slugify() - def _add_to_index( self, namespecs: Iterable[NameSpecification], item_id: AnthologyIDTuple ) -> None: @@ -329,89 +588,36 @@ def _add_to_index( return for namespec in namespecs: - person = self.get_or_create_person(namespec) + person = self.resolve_namespec(namespec, allow_creation=True) person.item_ids.append(item_id) - def _load_variant_list(self) -> None: - """Loads and parses the `name_variant.yaml` file. - - Raises: - AmbiguousNameError: If there are ambiguous "canonical" names without explicit, unique IDs for each one. - """ - filename = self.parent.datadir / Path(VARIANTS_FILE) - merge_list: list[tuple[str, str]] = [] - with open(filename, "r", encoding="utf-8") as f: - variant_list = yaml.load(f, Loader=Loader) - for entry in variant_list: - # Every entry must have a "canonical" name - canonical = Name.from_dict(entry["canonical"]) - # If it doesn't define an ID, we have to create one - if (pid := entry.get("id")) is None: - pid = self.generate_id(canonical) - if pid in self.data: - raise AmbiguousNameError( - canonical, - ( - f"While parsing {filename}: " - f"name '{canonical.as_first_last()}' is ambiguous, but the " - f"automatically generated ID '{pid}' already exists." - ), - ) - # Parse all the variant names, and make sure canonical stays at index 0 - names = [canonical] + [ - Name.from_dict(var) for var in entry.get("variants", []) - ] - # Now we can create a new person from this entry... - person = Person( - id=pid, - parent=self.parent, - names=names, - comment=entry.get("comment", None), - is_explicit=True, - ) - # ...and add it to the index - self.add_person(person) - for similar_id in entry.get("similar", []): - merge_list.append((pid, similar_id)) - - # Process IDs with similar names - for name, pid_list in self.name_to_ids.items(): - for pid in pid_list[1:]: - self.similar.merge(pid_list[0], pid) - for a, b in merge_list: - self.similar.merge(a, b) - - def save(self, path: StrPath) -> None: - """Save the entire index. - - CURRENTLY UNTESTED; DO NOT USE. + def save(self, path: Optional[StrPath] = None) -> None: + """Save the `people.yaml` file. Arguments: - path: The filename to save to. + path: The filename to save to. If None, defaults to the parent Anthology's `people.yaml` file. """ - data = [] + if path is None: + path = self.path # pragma: no cover + + data = {} for person in self.values(): + if not person.is_explicit: + continue + attrib: dict[str, Any] = { - "id": person.id, - "canonical": asdict( - person.canonical_name, - filter=lambda a, v: not (a.name == "script" and v is None), - ), + "names": [ + _YAMLName(name) + for (name, link_type) in person._names + if link_type == NameLink.EXPLICIT + ], + "comment": person.comment, + "degree": person.degree, + "disable_name_matching": person.disable_name_matching, + "orcid": person.orcid, + "similar": person.similar_ids, } - if person.item_ids: - attrib["items"] = person.item_ids - if len(person.names) > 1: - attrib["variants"] = [ - asdict( - name, filter=lambda a, v: not (a.name == "script" and v is None) - ) - for name in person.names[1:] - ] - similar = self.similar.subset(person.id) - if len(similar) > 1: - attrib["similar"] = [id_ for id_ in similar if id_ != person.id] - if person.comment is not None: - attrib["comment"] = person.comment - data.append(attrib) + data[person.id] = {k: v for k, v in attrib.items() if v} + with open(path, "w", encoding="utf-8") as f: - yaml.dump(data, f, Dumper=Dumper) + yaml.dump(data, f, allow_unicode=True, Dumper=Dumper) diff --git a/python/acl_anthology/people/name.py b/python/acl_anthology/people/name.py index 289787dbf0..b82acedc74 100644 --- a/python/acl_anthology/people/name.py +++ b/python/acl_anthology/people/name.py @@ -21,6 +21,12 @@ import re from slugify import slugify from typing import Any, Optional, cast, TypeAlias +import yaml + +try: + from yaml import CDumper as Dumper +except ImportError: # pragma: no cover + from yaml import Dumper # type: ignore from ..utils.latex import latex_encode @@ -114,17 +120,13 @@ def score(self) -> float: score += 0.5 return score + @cache def slugify(self) -> str: """ Returns: A [slugified string](https://github.com/un33k/python-slugify#how-to-use) of the full name. """ - if not (name := self.as_first_last()): - # Only necessary because of - slug = "none" - else: - slug = slugify(name) - return slug + return slugify(self.as_first_last()) @classmethod def from_dict(cls, name: dict[str, str]) -> Name: @@ -250,8 +252,9 @@ class NameSpecification: Attributes: name: The person's name. - id: Unique ID for the person that this name refers to. Defaults to `None`. - affiliation: Professional affiliation. Defaults to `None`. + id: Unique ID for the person that this name refers to. + orcid: An ORCID that was supplied together with this name. + affiliation: Professional affiliation. variants: Variant spellings of this name in different scripts. Note: @@ -263,6 +266,7 @@ class NameSpecification: name: Name = field(converter=_Name_from) id: Optional[str] = field(default=None, validator=v.optional(v.instance_of(str))) + orcid: Optional[str] = field(default=None, validator=v.optional(v.instance_of(str))) affiliation: Optional[str] = field( default=None, validator=v.optional(v.instance_of(str)) ) @@ -321,6 +325,7 @@ def from_xml(cls, person: etree._Element) -> NameSpecification: return cls( Name(first, cast(str, last)), id=person.get("id"), + orcid=person.get("orcid"), affiliation=affiliation, variants=variants, ) @@ -336,6 +341,8 @@ def to_xml(self, tag: str = "author") -> etree._Element: elem = etree.Element(tag) if self.id is not None: elem.set("id", self.id) + if self.orcid is not None: + elem.set("orcid", self.orcid) elem.extend( ( E.first(self.first) if self.first else E.first(), @@ -347,3 +354,21 @@ def to_xml(self, tag: str = "author") -> etree._Element: for variant in self.variants: elem.append(variant.to_xml()) return elem + + +class _YAMLName(yaml.YAMLObject): + """YAMLObject representing names. + + This exists to serialize names in "flow" style (i.e. one-liner `{first: ..., last: ...}`) without having to force flow style on the entire YAML document. + """ + + yaml_dumper = Dumper + yaml_tag = "tag:yaml.org,2002:map" # serialize like a dictionary + yaml_flow_style = True # force flow style + + def __init__(self, name: Name) -> None: + if name.first is not None: + self.first = name.first + self.last = name.last + if name.script is not None: + self.script = name.script diff --git a/python/acl_anthology/people/person.py b/python/acl_anthology/people/person.py index be0652b8e9..0b01bb3919 100644 --- a/python/acl_anthology/people/person.py +++ b/python/acl_anthology/people/person.py @@ -14,41 +14,99 @@ from __future__ import annotations +import attrs from attrs import define, field -from typing import Iterator, Optional, TYPE_CHECKING +from enum import Enum +import itertools as it +from typing import Any, Iterator, Optional, Sequence, TYPE_CHECKING +from ..exceptions import AnthologyException, AnthologyInvalidIDError from ..utils.attrs import auto_validate_types -from ..utils.ids import AnthologyIDTuple, build_id_from_tuple +from ..utils.ids import ( + AnthologyIDTuple, + build_id_from_tuple, + is_valid_orcid, + is_verified_person_id, +) from . import Name if TYPE_CHECKING: + from . import NameSpecification from ..anthology import Anthology from ..collections import Paper, Volume +class NameLink(Enum): + """How a Name was connected to a Person.""" + + EXPLICIT = "explicit" + """Name is explicitly listed in `people.yaml` file.""" + + INFERRED = "inferred" + """Name was connected to this Person via slug matching heuristic.""" + + +def _name_list_converter( + name_list: Sequence[Name | tuple[Name, NameLink]], +) -> list[tuple[Name, NameLink]]: + return [ + (item, NameLink.EXPLICIT) if isinstance(item, Name) else item + for item in name_list + ] + + +def _update_person_index(person: Person, attr: attrs.Attribute[Any], value: str) -> str: + """Update the [PersonIndex][acl_anthology.people.index.PersonIndex]. + + Intended to be called from `on_setattr` of an [attrs.field][]. + """ + index = person.parent.people + if attr.name == "id": + index._update_id(person.id, value) + elif attr.name == "orcid": + index._update_orcid(person.id, person.orcid, value) + return value + + @define(field_transformer=auto_validate_types) class Person: """A natural person. Info: - All information about persons is currently derived from [name specifications][acl_anthology.people.name.NameSpecification] on volumes and papers, and not stored explicitly. This means that Person objects **cannot be used to make changes** to Anthology data; change the information on papers instead. + The connection between persons and Anthology items is derived from [name specifications][acl_anthology.people.name.NameSpecification] on volumes and papers, and not stored explicitly. This means that Person objects **cannot be used to make changes to paper metadata**, e.g. which person a paper is associated with or under which name; change the information on papers instead. + + Person objects **can** be used to make changes to metadata that appears in `people.yaml`, such as ORCID, comment, degree, and alternative names for this person. Attributes: id: A unique ID for this person. parent: The parent Anthology instance to which this person belongs. - names: A list of names under which this person has published. item_ids: A list of volume and/or paper IDs this person has authored or edited. - comment: A comment for disambiguation purposes; can be stored in `name_variants.yaml`. - is_explicit: True if this person has names explicitly defined in `name_variants.yaml`. Note this does _not_ necessarily mean an explicit ID was defined for the person there. + orcid: The person's ORCID. + comment: A comment for disambiguation purposes. + degree: The person's institution of highest degree, for disambiguation purposes. + similar_ids: A list of person IDs with names that should be considered similar to this one. Do **not** use this to _find_ people with similar names; that should be done via [`PersonIndex.similar`][acl_anthology.people.index.PersonIndex]. This attribute can be used to explicitly add more "similar IDs" that are not automatically derived via similar names. + disable_name_matching: If True, no items should be assigned to this person unless they explicitly specify this person's ID. + is_explicit: If True, this person's ID is explicitly defined in `people.yaml`. You probably want to use [`make_explicit()`][acl_anthology.people.person.Person.make_explicit] rather than change this attribute. """ - id: str = field() + id: str = field( + on_setattr=attrs.setters.pipe(attrs.setters.validate, _update_person_index) + ) parent: Anthology = field(repr=False, eq=False) - names: list[Name] = field(factory=list) + _names: list[tuple[Name, NameLink]] = field( + factory=list, converter=_name_list_converter + ) item_ids: list[AnthologyIDTuple] = field( factory=list, repr=lambda x: f"" ) + orcid: Optional[str] = field( + default=None, + on_setattr=attrs.setters.pipe(attrs.setters.validate, _update_person_index), + ) # validator defined below comment: Optional[str] = field(default=None) - is_explicit: Optional[bool] = field(default=False) # TODO: why can this be None? + degree: Optional[str] = field(default=None) + similar_ids: list[str] = field(factory=list) + disable_name_matching: Optional[bool] = field(default=False, converter=bool) + is_explicit: Optional[bool] = field(default=False, converter=bool) def __eq__(self, other: object) -> bool: if not isinstance(other, Person): @@ -58,6 +116,23 @@ def __eq__(self, other: object) -> bool: def __hash__(self) -> int: return hash(self.id) + @orcid.validator + def _check_orcid(self, _: Any, value: Optional[str]) -> None: + if value is not None and not is_valid_orcid(value): + raise ValueError("ORCID is not valid (wrong format or checksum)") + + @property + def names(self) -> list[Name]: + return [name for (name, _) in self._names] + + @names.setter + def names(self, values: list[Name]) -> None: + for name, _ in self._names: + self.parent.people._remove_name(self.id, name) + for name in values: + self.parent.people._add_name(self.id, name) + self._names = _name_list_converter(values) + @property def canonical_name(self) -> Name: """ @@ -67,22 +142,60 @@ def canonical_name(self) -> Name: try: # By convention, the first entry of `self.names` is treated as the # canonical entry - return self.names[0] - except KeyError: + return self._names[0][0] + except IndexError: raise ValueError(f"No names defined for person '{self.id}'") @canonical_name.setter def canonical_name(self, name: Name) -> None: - self.set_canonical_name(name) + self._set_canonical_name(name) - def add_name(self, name: Name) -> None: + def _set_canonical_name(self, name: Name, inferred: bool = False) -> None: + """Set the canonical name for this person. + + Outside of the library, use Person.canonical_name = ... + + Parameters: + name: Name that should be treated as canonical for this person. + inferred: Marks the canonical name as inferred (used inside the name slug matching algorithm). + """ + link_type = NameLink.INFERRED if inferred else NameLink.EXPLICIT + if not self.has_name(name): + self._names.insert(0, (name, link_type)) + else: + self._names = [(name, link_type)] + [x for x in self._names if x[0] != name] + + def add_name(self, name: Name, inferred: bool = False) -> None: """Add a name for this person. Parameters: name: Name that can refer to this person. + inferred: If True, will be marked as `NameLinkingType.INFERRED`, which will e.g. cause this name to not be written to `people.yaml`. Used when building the [`PersonIndex`][acl_anthology.people.index.PersonIndex] from the XML data; you probably don't want to set this manually. Defaults to False. """ - if name not in self.names: - self.names.append(name) + link_type = NameLink.INFERRED if inferred else NameLink.EXPLICIT + if not self.has_name(name): + self._names.append((name, link_type)) + self.parent.people._add_name(self.id, name) + elif (name, link_type) not in self._names: + # ensure that name is re-inserted at same position + idx = self.names.index(name) + del self._names[idx] + self._names.insert(idx, (name, link_type)) + + def remove_name(self, name: Name) -> None: + """Remove an explicit name for this person. + + Warning: + If the name is still used on a paper or volume with the ID of this person, this may result in an Exception during index building. Names that were implicitly linked to this person cannot be removed this way, as the name would simply reappear on next load. + + Parameters: + name: Name that should be removed from this person. + + Raises: + ValueError: If this name was not explicitly linked to this person. + """ + self._names.remove((name, NameLink.EXPLICIT)) + self.parent.people._remove_name(self.id, name) def has_name(self, name: Name) -> bool: """ @@ -92,19 +205,64 @@ def has_name(self, name: Name) -> bool: Returns: True if the given name can refer to this person. """ - return name in self.names + return any(existing_name == name for (existing_name, _) in self._names) - def set_canonical_name(self, name: Name) -> None: - """Set the canonical name for this person. + def make_explicit(self, new_id: str) -> None: + """Turn this person that was implicitly created into an explicitly-represented one. + + This will result in this person having an explicit entry in `people.yaml` with all names that are currently associated with this person. It will also add their new explicit ID to all papers and volumes currently associated with this person. Parameters: - name: Name that should be treated as canonical for this person. + new_id: The new ID for this person, which must match [`RE_VERIFIED_PERSON_ID`][acl_anthology.utils.ids.RE_VERIFIED_PERSON_ID]. + + Raises: + AnthologyException: If `self.explicit` is already True. + ValueError: If the supplied ID is not valid, or if it already exists in the PersonIndex. """ - try: - self.names.pop(self.names.index(name)) - except ValueError: - pass - self.names.insert(0, name) + if self.is_explicit: + raise AnthologyException("Person is already explicit") + + self.is_explicit = True + self.update_id(new_id) + self._names = [(name, NameLink.EXPLICIT) for name, _ in self._names] + + def update_id(self, new_id: str) -> None: + """Update the ID of this person, including on all of their associated papers. + + In contrast to simply changing the `id` attribute, this function will go through all associated papers and update the ID attribute there. + + Parameters: + new_id: The new ID for this person, which must match [`RE_VERIFIED_PERSON_ID`][acl_anthology.utils.ids.RE_VERIFIED_PERSON_ID]. + + Raises: + AnthologyException: If `self.is_explicit` is False. + AnthologyInvalidIDError: If the supplied ID is not valid, or if it already exists in the PersonIndex. + """ + if not self.is_explicit: + exc = AnthologyException("Can only update ID for explicit person") + exc.add_note("Did you want to use make_explicit() instead?") + raise exc + if not is_verified_person_id(new_id): + raise AnthologyInvalidIDError( + new_id, f"Not a valid verified-person ID: {new_id}" + ) + + old_id = self.id + + def namespec_refers_to_self(namespec: NameSpecification) -> bool: + if is_verified_person_id(old_id): + return namespec.id == old_id + return namespec.id is None and self.has_name(namespec.name) + + self.id = new_id # will update PersonIndex + for paper in self.papers(): + for namespec in it.chain(paper.authors, paper.editors): + if namespec_refers_to_self(namespec): + namespec.id = new_id + for volume in self.volumes(): + for namespec in volume.editors: + if namespec_refers_to_self(namespec): + namespec.id = new_id def papers(self) -> Iterator[Paper]: """Returns an iterator over all papers associated with this person. @@ -119,7 +277,7 @@ def papers(self) -> Iterator[Paper]: if paper is None: raise ValueError( f"Person {self.id} lists associated paper {build_id_from_tuple(anthology_id)}, which doesn't exist" - ) + ) # pragma: no cover yield paper def volumes(self) -> Iterator[Volume]: @@ -131,5 +289,5 @@ def volumes(self) -> Iterator[Volume]: if volume is None: raise ValueError( f"Person {self.id} lists associated volume {build_id_from_tuple(anthology_id)}, which doesn't exist" - ) + ) # pragma: no cover yield volume diff --git a/python/acl_anthology/text/markuptext.py b/python/acl_anthology/text/markuptext.py index 18f18fbd2d..6d3aca6087 100644 --- a/python/acl_anthology/text/markuptext.py +++ b/python/acl_anthology/text/markuptext.py @@ -241,7 +241,8 @@ def to_xml(self, tag: str = "span") -> etree._Element: """ if isinstance(self._content, str): element = etree.Element(tag) - element.text = self._content + if self._content: + element.text = self._content else: element = deepcopy(self._content) element.tag = tag diff --git a/python/acl_anthology/utils/ids.py b/python/acl_anthology/utils/ids.py index 28812f3e10..1cc5386981 100644 --- a/python/acl_anthology/utils/ids.py +++ b/python/acl_anthology/utils/ids.py @@ -1,4 +1,4 @@ -# Copyright 2023-2024 Marcel Bollmann +# Copyright 2023-2025 Marcel Bollmann # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -14,6 +14,7 @@ """Functions for manipulating Anthology IDs.""" +import functools import re from typing import Optional @@ -30,6 +31,12 @@ RE_ITEM_ID = re.compile(r"[a-z0-9]+") """A regular expression matching any valid volume or paper ID.""" +RE_VERIFIED_PERSON_ID = re.compile(r"[a-z][\-a-z0-9]+") +"""A regular expression matching any valid verified person ID.""" + +RE_ORCID = re.compile(r"[0-9]{4}-[0-9]{4}-[0-9]{4}-[0-9]{3}[0-9X]") +"""A regular expression matching any string that looks like an ORCID.""" + def build_id( collection_id: str, volume_id: Optional[str] = None, paper_id: Optional[str] = None @@ -200,6 +207,34 @@ def is_valid_item_id(id_: str) -> bool: return RE_ITEM_ID.fullmatch(id_) is not None +def is_valid_orcid(orcid: str) -> bool: + """Validate that a string looks like an ORCID and has the correct checksum. + + Returns: + True if the ORCID validates, False otherwise. + """ + if RE_ORCID.fullmatch(orcid) is None: + return False + # + total = functools.reduce( + lambda x, y: (x + int(y)) * 2, orcid[:-1].replace("-", ""), 0 + ) + checksum = (12 - (total % 11)) % 11 + return orcid[-1] == str(checksum) if checksum < 10 else orcid[-1] == "X" + + +def is_verified_person_id(id_: str) -> bool: + """Validate that a string is formatted like a verified person ID. + + Returns: + True if this ID can refer to a verified person. + + Warning: + Does not perform any kind of input validation. + """ + return RE_VERIFIED_PERSON_ID.fullmatch(id_) is not None + + def infer_year(anthology_id: AnthologyID) -> str: """Infer the year from an Anthology ID. diff --git a/python/acl_anthology/utils/logging.py b/python/acl_anthology/utils/logging.py index a6e5a95044..964640d6ac 100644 --- a/python/acl_anthology/utils/logging.py +++ b/python/acl_anthology/utils/logging.py @@ -15,8 +15,9 @@ """Functions for convenient logging.""" import logging +from rich.console import Console from rich.logging import RichHandler -from typing import cast +from typing import cast, Optional from ..config import config @@ -46,7 +47,9 @@ def emit(self, record: logging.LogRecord) -> None: self.highest = record.levelno -def setup_rich_logging(**kwargs: object) -> SeverityTracker: +def setup_rich_logging( + console: Optional[Console] = None, **kwargs: object +) -> SeverityTracker: """Set up a logger that uses rich markup and severity tracking. This function is intended to be called in a script. It calls [logging.basicConfig][] and is therefore not executed by default, as applications may wish to setup their loggers differently. @@ -57,6 +60,8 @@ def setup_rich_logging(**kwargs: object) -> SeverityTracker: Returns: The severity tracker, so that it can be used to check the highest emitted log level. """ + if console is None: + console = Console(stderr=True) log_config: dict[str, object] = dict( level="NOTSET", format="%(message)s", @@ -65,7 +70,9 @@ def setup_rich_logging(**kwargs: object) -> SeverityTracker: ) log_config.update(kwargs) tracker = SeverityTracker() - cast(list[logging.Handler], log_config["handlers"]).extend([RichHandler(), tracker]) + cast(list[logging.Handler], log_config["handlers"]).extend( + [RichHandler(console=console), tracker] + ) logging.basicConfig(**log_config) # type: ignore logging.captureWarnings(True) return tracker diff --git a/python/docs/guide/modifying-data.md b/python/docs/guide/modifying-data.md index b085684803..ab1c21db7d 100644 --- a/python/docs/guide/modifying-data.md +++ b/python/docs/guide/modifying-data.md @@ -16,7 +16,9 @@ are some rules of thumb when making modifications to the data: whenever possible, rather than instantiating them directly. 2. You can modify objects by simply modifying their attributes, as long as the object in question has an explicit representation in the Anthology data. - - This includes collections, volumes, papers, events, but not e.g. persons. + - This includes collections, volumes, papers, events. + - It also includes persons where `Person.is_explicit == True`, as those have + an explicit representation in `people.yaml`. 3. Saving data is always non-destructive and will avoid introducing unnecessary changes (e.g. no needless reordering of XML tags). {==This is currently only true & tested for XML files, not for the YAML files.==} @@ -39,6 +41,10 @@ just fetch the paper and set its `doi` attribute: >>> paper.doi = '10.18653/v1/2022.acl-long.99' ``` +!!! tip "Rule of thumb" + + For all `collections` objects, setting attributes should either raise a `TypeError`, or "do the right thing." However, be careful when modifying _list_ attributes in-place, as no input validation is performed in that case. + ### Simple attributes Attributes generally perform **input validation**. For example, since a paper's @@ -65,9 +71,6 @@ date of ingestion is stored as a string, but the following will also work: '2025-01-08' ``` -**As a general rule, setting attributes of `collections` objects should either -raise a `TypeError`, or "do the right thing."** - ### List attributes List attributes can be modified the same way as other attributes; for example, @@ -80,26 +83,24 @@ to the author list: >>> paper.authors.append(spec) ``` -To change an existing author's name, you just need to remember that names are -immutable: +To change an existing author's name, you just need to remember that **names are +immutable**: ```pycon >>> paper.authors[0].name.first = "Marc Marcel" # will NOT work >>> paper.authors[0].name = Name("Bollmann, Marc Marcel") # works ``` -!!! danger - - Input validation or conversion cannot be done when modifying mutable - attributes such as lists (only when _setting_ them). That means you won't - get an (immediate) error if you e.g. append the wrong type of object to a - list attribute. +There is **no input validation or conversion** when modifying mutable attributes +such as lists (only when _setting_ them). That means you won't get an +immediate error if you e.g. append the wrong type of object to a list +attribute! ### Things to keep in mind #### Citation keys If a paper's title or author list has changed, you might want to recreate its -citation key (or 'bibkey'). This can be done by simply calling +citation key (or 'bibkey'). This can be done by calling [`Paper.refresh_bibkey()`][acl_anthology.collections.paper.Paper.refresh_bibkey]. If the auto-generated bibkey is identical to the current one, the bibkey will not change. @@ -123,45 +124,156 @@ the new data. ## Modifying people -{==TODO==} +A person can be _explicit_ (has an entry in `people.yaml`) or _inferred_ (was instantiated from a name specification without an ID). To make modifications to persons, it is important to remember that: + +1. Only an _explicit_ person's attributes can be meaningfully modified. + +2. Changing which person a paper/volume is assigned to should be done by modifying the name specification on the paper/volume, not by changing anything on the Person object. + +??? info "A note on terminology" + + Within the library, the term **explicit** refers to a person that has an entry in `people.yaml`, whereas **inferred** refers to a person that was instantiated automatically while loading the XML data files (and has no entry in `people.yaml`). + + Currently, all inferred persons have IDs beginning with `unverified/`, while IDs of explicit persons _must not_ begin with `unverified/`. + + In practice, this means that "inferred" persons are currently equivalent to "unverified" persons, but the library intentionally uses terminology that is agnostic to the semantics of the ID. If the semantics of whom we consider "(un)verified" change, the terminology in the library needn't change, as it only refers to the technical aspect of where the ID came from (`people.yaml` vs. implicit instantiation). + +### Creating a new person + +Manually creating a new person (that will get saved to `people.yaml` and can +have an ORCID and other metadata) can be done in two ways: + +1. By calling [`PersonIndex.create_person()`][acl_anthology.people.index.PersonIndex.create_person]. The returned Person is _not_ linked to any papers/volumes, but you can set their ID afterwards on name specifications. + +2. By calling [`make_explicit()`][acl_anthology.people.person.Person.make_explicit] on a currently _inferred_ person. This will not only add this person to the database, but also **set their ID on all papers/volumes** currently associated with them. + +### Example: Merging two persons + +**Situation:** An author has published under multiple names, and therefore two separate persons get instantiated for them (let's call them `p1` and `p2`). We want to merge them into a single person. + +1. If neither `p1` nor `p2` are _explicit_: Call [`p1.make_explicit()`][acl_anthology.people.person.Person.make_explicit]. This will create an entry in `people.yaml` with all current names of `p1` and add the new ID to all papers and volumes currently inferred to belong to `p1`. + +2. Iterate through `p2.papers()` and `p2.volumes()` {==(TODO: a function to iterate through all items, no matter the type)==} and add `p1`'s new ID to the name specifications that are currently resolved to `p2`. {==TODO: It's currently a bit tricky to find the _name specification_ referring to a person; should add a function for this.==} + +3. Save both the PersonIndex and the changed collections. {==TODO: The library current cannot track which collections have actually changed, so there is no "save all" function yet.==} + +### Example: Disambiguating a person + +**Situation:** A person `p1` is currently associated with papers/volumes that actually belong to different people, who just happened to publish under the same name. We want to create a new person instance for the other author with the same name. + +1. Call [`anthology.people.create_person()`][acl_anthology.people.index.PersonIndex.create_person] for all persons who do not have an explicit ID yet, giving all the names that can refer to this person. Also supply the ORCID when calling this function, if it is known. + +2. For each person, iterate through the papers that actually belong to them and update the name specification that currently resolves to `p1` by setting the explicit ID of the correct newly-created person. {==TODO: Same as above: It's currently a bit tricky to find the _name specification_ referring to a person; should add a function for this.==} ## Ingesting new proceedings -{==TODO==} +Proceedings can be ingested almost entirely via functionality from this library; +in particular, no data files (XML or YAML) need to be saved manually. _(The +only functionality that is currently not part of this library is the fixed-caser +for paper titles, which is described below.)_ ### New collections, volumes, and papers Creating new objects from `acl_anthology.collections` should be done with -`create_` functions from their respective parent objects. Here is a minimal -example to create a new paper in an entirely new collection: +`create_` functions from their respective parent objects. -```python -collection = anthology.create_collection("2049.acl") -volume = collection.create_volume( - id="long", - title=MarkupText.from_string("Proceedings of the ..."), -) -paper = volume.create_paper( - title=MarkupText.from_string("GPT-5000 is all you need") -) -``` - -All attributes that can be set on these objects can also be supplied as keyword -parameters to the `create_` functions; alternatively, they can be set on the -object after it has been created. - -Some required attributes don't _need_ to be supplied on these functions: +All attributes that can be set on these objects (Volumes, Papers, etc.) can also +be supplied as keyword parameters to the `create_` functions. Some required +attributes don't _need_ to be supplied here: +- A Paper's `id` will be set to the next-highest numeric ID that doesn't already + exist in the volume, starting at `"1"`. +- A Paper's `bibkey` will be automatically generated if not explicitly set. - A Volume's `year` attribute will be derived from the collection ID (e.g., `"2049"` in a collection with ID `"2049.acl"`). - A Volume's `type` will default to [PROCEEDINGS][acl_anthology.collections.types.VolumeType]. -- A Paper's `id` will be set to the next-highest numeric ID that doesn't already - exist in the volume, starting at `"1"`. -- A Paper's `bibkey` will be automatically generated if not explicitly set. - (But if you didn't supply an `authors` list when creating the paper, you will - want to call `refresh_bibkey()` on the Paper after setting the authors.) + +However, it is **strongly recommended to supply the author/editor list** when +calling a `create_` function, as this will resolve person IDs and create correct +bibkeys automatically. + +!!! example + + Here is an example for how to create a new paper in an entirely new collection: + + ```python + collection = anthology.create_collection("2049.acl") + volume = collection.create_volume( + id="long", + title=MarkupText.from_latex_maybe("Proceedings of the ..."), + venue_ids=["acl"], + ) + paper = volume.create_paper( + title=MarkupText.from_latex_maybe("GPT$^{\\infty}$ is all you need"), + authors=[NameSpecification(first="John", last="Doe")], + ) + ``` + + When all volumes and papers have been added, the XML file is written by calling: + + ```python + collection.save() + ``` + +??? info "If you don't supply an author list here..." + + If you don't supply `authors` or `editors` when calling a `create_` function, or you need to modify those afterwards for some reason, you will need to perform these steps manually (which are otherwise handled by the `create_` function): + + - Call `anthology.people.ingest_namespec()` on each NameSpecification. + - Call `refresh_bibkey()` on the Paper. + +### Specifying titles and abstracts + +Paper titles and abstracts always need to be **supplied as [MarkupText][acl_anthology.text.markuptext.MarkupText]**. Simple strings can be instantiated with [`MarkupText.from_string()`][acl_anthology.text.markuptext.MarkupText.from_string]. For titles and abstracts containing LaTeX commands, [`MarkupText.from_latex()`][acl_anthology.text.markuptext.MarkupText.from_latex] can be used. In practice, however, it may be unknown if text actually contains LaTeX markup. In that case, using [`MarkupText.from_latex_maybe()`][acl_anthology.text.markuptext.MarkupText.from_latex_maybe] may be preferable, which will e.g. prevent percentage signs `%` from being interpreted as starting a LaTeX comment, and apply a heuristic to decide if a tilde `~` should be interpreted as a literal character or as a LaTeX non-breaking space. {==TODO: We might want to make `as_latex_maybe()` the default instantiator for MarkupText, which would greatly simplify the instantiation of this in what is probably the most common use case.==} + +Paper titles should also have our **fixed-casing algorithm** applied to protect certain characters e.g. by wrapping them in braces in BibTeX entries. **The fixed-caser is currently not part of this Python library.** There are two options for running the fixed-casing on a new ingestion: + +1. _Outside the ingestion script:_ Run [`bin/fixedcase/protect.py`](https://github.com/acl-org/acl-anthology/blob/master/bin/fixedcase/protect.py) on the new XML files produced by the ingestion script. + +2. _Within the ingestion script:_ Convert titles to XML, run `fixedcase.protect()`, then set the title again from the modified XML element: + + ```python + import fixedcase + + xml_title = paper.title.to_xml("title") + fixedcase.protect(xml_title) + paper.title = MarkupText.from_xml(xml_title) + ``` + + +### Specifying authors + +Authors need to be specified by creating [name +specifications](accessing-authors.md#name-specifications), for example: + +```python +NameSpecification(Name("Marcel", "Bollmann"), orcid="0000-0003-2598-8150") +``` + +If an ORCID is supplied, the NameSpecification also needs to have an explicit ID +referring to an entry in `people.yaml`. **The library can add an ID +automatically** as long as you supply the author/editor list to the `create_` +function, so there is typically **no need to call `create_person()`** during +ingestion! + +!!! example + + If you create a paper in the following way... + + ```python + paper = volume.create_paper( + title=MarkupText.from_string("The past and future of the ACL Anthology"), + authors=[NameSpecification(Name("Marcel", "Bollmann"), orcid="0000-0003-2598-8150")], + ) + ``` + + ...the name specification will automatically be updated with an ID referring to this person in one of two ways: + + - If a person with this ORCID already exists in `people.yaml`, their ID will be filled in. + - If a person with this ORCID does not exist in `people.yaml`, a new entry with this ORCID will be added to `people.yaml` with an auto-generated person ID. The ID is a slug of the person's name; if necessary to avoid an ID clash, the last four digits of their ORCID will be appended. + ### New events @@ -187,18 +299,11 @@ the gory details), it's best to ensure that: `event.add_colocated(volume)`. -### Parsing markup +### Connecting to venues and SIGs -MarkupText can be instantiated from strings representing LaTeX via -[`MarkupText.from_latex()`][acl_anthology.text.markuptext.MarkupText.from_latex]. -This can be useful for titles and abstracts if they contain LaTeX commands, but -in practice, it may be unknown if they actually do. In that case, using -[`MarkupText.from_latex_maybe()`][acl_anthology.text.markuptext.MarkupText.from_latex_maybe] -may be preferable, which will e.g. prevent percentage signs `%` from being -interpreted as starting a LaTeX comment, and apply a heuristic to decide if a -tilde `~` should be interpreted as a literal character or as a LaTeX -non-breaking space. +Volumes can be connected to venues by modifying the volume's `venue_ids` list. {==TODO: adding new venues==} +{==TODO: connecting to SIGs; we may want to refactor how SIGs are represented before introducing this functionality.==} ## Saving changes @@ -213,4 +318,7 @@ non-breaking space. non-destructive through [integration tests on the entire Anthology data](https://github.com/acl-org/acl-anthology/blob/master/python/tests/anthology_integration_test.py). -{==TODO: changes to YAML files, `Anthology.save_all()`, etc. ==} +- **Changes to the person database (`people.yaml`)** can be saved by calling + [`PersonIndex.save()`][acl_anthology.people.index.PersonIndex.save]. + +{==TODO: changes to other YAML files, `Anthology.save_all()`, etc. ==} diff --git a/python/mkdocs.yml b/python/mkdocs.yml index 0539c2f47a..7525e533bb 100644 --- a/python/mkdocs.yml +++ b/python/mkdocs.yml @@ -9,6 +9,7 @@ markdown_extensions: - footnotes - pymdownx.betterem - pymdownx.critic + - pymdownx.details - pymdownx.smartsymbols - pymdownx.superfences: custom_fences: diff --git a/python/tests/anthology_integration_test.py b/python/tests/anthology_integration_test.py index a69f4bf1d0..eba941ca60 100644 --- a/python/tests/anthology_integration_test.py +++ b/python/tests/anthology_integration_test.py @@ -69,6 +69,22 @@ def test_full_anthology_should_validate_schema(full_anthology): collection.validate_schema() +@pytest.mark.integration +def test_full_anthology_roundtrip_people_yaml(full_anthology, tmp_path): + full_anthology.people.build() + yaml_in = full_anthology.people.path + yaml_out = tmp_path / "people.yaml" + full_anthology.people.save(yaml_out) + assert yaml_out.is_file() + with ( + open(yaml_in, "r", encoding="utf-8") as f, + open(yaml_out, "r", encoding="utf-8") as g, + ): + expected = f.read() + out = g.read() + assert out == expected + + @pytest.mark.integration @pytest.mark.parametrize("minimal_diff", (True, False)) def test_full_anthology_roundtrip_xml( diff --git a/python/tests/anthology_test.py b/python/tests/anthology_test.py index 275ea83a01..094019a652 100644 --- a/python/tests/anthology_test.py +++ b/python/tests/anthology_test.py @@ -142,10 +142,10 @@ def test_get_event(anthology): def test_get_person(anthology): - person = anthology.get_person("yang-liu-edinburgh") + person = anthology.get_person("yang-liu-microsoft") assert person is not None assert person.canonical_name == Name("Yang", "Liu") - assert person.comment == "Edinburgh" + assert person.comment == "Microsoft Cognitive Services Research" def test_find_people(anthology): diff --git a/python/tests/collections/paper_test.py b/python/tests/collections/paper_test.py index d28a0e1f14..805001c7d6 100644 --- a/python/tests/collections/paper_test.py +++ b/python/tests/collections/paper_test.py @@ -17,6 +17,7 @@ import pytest from acl_anthology.collections import CollectionIndex from acl_anthology.collections.types import PaperType, VolumeType +from acl_anthology.exceptions import AnthologyXMLError from acl_anthology.files import AttachmentReference, PDFReference from acl_anthology.people import NameSpecification from acl_anthology.text import MarkupText @@ -35,6 +36,7 @@ class VolumeStub: title = MarkupText.from_string("Generic volume") editors = [] + full_id_tuple = ("2099", "stub", None) @pytest.fixture @@ -168,7 +170,7 @@ def test_paper_remove_author(anthology): paper = anthology.get_paper("2022.acl-demo.2") ns = paper.authors[-1] person = anthology.resolve(ns) - assert person.id == "iryna-gurevych" + assert person.id == "unverified/iryna-gurevych" assert paper.full_id_tuple in person.item_ids # Removing last author from paper @@ -205,6 +207,7 @@ def test_paper_add_author(anthology): Strings from neurons to language TimFernando 1–10 + 2022.naloma-1.1 fernando-2022-strings
@@ -271,6 +274,18 @@ def test_paper_roundtrip_xml(xml): assert etree.tostring(out, encoding="unicode") == xml +def test_paper_from_xml_invalid_tag(): + xml = """ + Briefly Noted + JohnDoe + J89-1009 + nn-1989-briefly + +""" + with pytest.raises(AnthologyXMLError): + Paper.from_xml(VolumeStub(), etree.fromstring(xml)) + + test_cases_paper_to_bibtex = ( ( "2022.acl-long.268", diff --git a/python/tests/collections/volume_test.py b/python/tests/collections/volume_test.py index 275cc3abb5..f57d7d65c0 100644 --- a/python/tests/collections/volume_test.py +++ b/python/tests/collections/volume_test.py @@ -463,7 +463,7 @@ def test_volume_remove_editor(anthology): volume = anthology.get_volume("2022.acl-long") ns = volume.editors[1] person = anthology.resolve(ns) - assert person.id == "preslav-nakov" + assert person.id == "unverified/preslav-nakov" assert volume.full_id_tuple in person.item_ids # Removing editor from volume diff --git a/python/tests/conftest.py b/python/tests/conftest.py index afbd96201b..2b8dd7d261 100644 --- a/python/tests/conftest.py +++ b/python/tests/conftest.py @@ -16,6 +16,7 @@ import itertools as it import pytest import reprlib +from unittest.mock import Mock pytest.register_assert_rewrite("acl_anthology.utils.xml") @@ -24,6 +25,7 @@ class AnthologyStub: datadir = None + people = Mock() @pytest.fixture diff --git a/python/tests/data/anthology/xml/1989.cl.xml b/python/tests/data/anthology/xml/1989.cl.xml index a7523884a8..723e126dcf 100644 --- a/python/tests/data/anthology/xml/1989.cl.xml +++ b/python/tests/data/anthology/xml/1989.cl.xml @@ -1,3 +1,2 @@ - - + diff --git a/python/tests/data/anthology/xml/2022.acl.xml b/python/tests/data/anthology/xml/2022.acl.xml index 3e9ee6f0b5..46d3d6c1e7 100644 --- a/python/tests/data/anthology/xml/2022.acl.xml +++ b/python/tests/data/anthology/xml/2022.acl.xml @@ -4363,7 +4363,7 @@ in the Case of Unambiguous Gender ZoeyLiu CrystalRichardson RichardHatcher - EmilyPrud’hommeaux + EmilyPrud’hommeaux 3933-3944 Languages are classified as low-resource when they lack the quantity of data necessary for training statistical and machine learning tools and models. Causes of resource scarcity vary but can include poor access to technology for developing these resources, a relatively small population of speakers, or a lack of urgency for collecting such resources in bilingual populations where the second language is high-resource. As a result, the languages described as low-resource in the literature are as different as Finnish on the one hand, with millions of speakers using it in every imaginable domain, and Seneca, with only a small-handful of fluent speakers using the language primarily in a restricted domain. While issues stemming from the lack of resources necessary to train models unite this disparate group of languages, many other issues cut across the divide between widely-spoken low-resource languages and endangered languages. In this position paper, we discuss the unique technological, cultural, practical, and ethical challenges that researchers and indigenous speech community members face when working together to develop language technology to support endangered language documentation and revitalization. We report the perspectives of language teachers, Master Speakers and elders from indigenous communities, as well as the point of view of academics. We describe an ongoing fruitful collaboration and make recommendations for future partnerships between academic researchers and language community stakeholders. 2022.acl-long.272 diff --git a/python/tests/data/anthology/yaml/name_variants.yaml b/python/tests/data/anthology/yaml/name_variants.yaml deleted file mode 100644 index a3a2b9ff80..0000000000 --- a/python/tests/data/anthology/yaml/name_variants.yaml +++ /dev/null @@ -1,37 +0,0 @@ -- canonical: {first: Pranav, last: A} - comment: UC Santa Cruz - id: pranav-a - similar: [pranav-anand] -- canonical: {first: Pranav, last: Anand} - comment: Dayta AI - id: pranav-anand - similar: [pranav-a] -- canonical: {first: Yang, last: Liu} - comment: Edinburgh - id: yang-liu-edinburgh -- canonical: {first: Yang, last: Liu} - comment: 刘扬; Ph.D Purdue; ICSI, Dallas, Facebook, Liulishuo, Amazon - id: yang-liu-icsi -- canonical: {first: Yang, last: Liu} - comment: 刘洋; ICT, Tsinghua, Beijing Academy of Artificial Intelligence - id: yang-liu-ict -- canonical: {first: Yang, last: Liu} - comment: Microsoft Cognitive Services Research - id: yang-liu-microsoft -- canonical: {first: José M., last: Guirao} - variants: - - {first: José María, last: Guirao} -- canonical: {first: Steven, last: Krauwer} - id: steven-krauwer -- canonical: {first: Susan, last: Armstrong} - variants: - - {first: Susan, last: Warwick-Armstrong} - - {first: Susan, last: Warwick} -- canonical: {first: Emily, last: Prud’hommeaux} - variants: - - {first: Emily T., last: Prud’hommeaux} - - {first: Emily, last: Prud'hommeaux} -- canonical: {first: Srinivas, last: Bangalore} - variants: - - {first: B., last: Srinivas} - - {first: '', last: Srinivas} diff --git a/python/tests/data/anthology/yaml/people.yaml b/python/tests/data/anthology/yaml/people.yaml new file mode 100644 index 0000000000..f14543583c --- /dev/null +++ b/python/tests/data/anthology/yaml/people.yaml @@ -0,0 +1,36 @@ +emily-prudhommeaux: + names: + - {first: Emily, last: Prud’hommeaux} + - {first: Emily T., last: Prud’hommeaux} + - {first: Emily, last: Prud'hommeaux} +marcel-bollmann: + names: + - {first: Marcel, last: Bollmann} + orcid: 0000-0003-2598-8150 +pranav-a: + comment: UC Santa Cruz + names: + - {first: Pranav, last: A} + similar: + - pranav-anand +pranav-anand: + comment: Dayta AI + disable_name_matching: true + names: + - {first: Pranav, last: Anand} +steven-krauwer: + names: + - {first: Steven, last: Krauwer} + - {first: S., last: Krauwer} +yang-liu-icsi: + comment: 刘扬; Ph.D Purdue; ICSI, Dallas, Facebook, Liulishuo, Amazon + names: + - {first: Yang, last: Liu} +yang-liu-ict: + comment: 刘洋; ICT, Tsinghua, Beijing Academy of Artificial Intelligence + names: + - {first: Yang, last: Liu} +yang-liu-microsoft: + comment: Microsoft Cognitive Services Research + names: + - {first: Yang, last: Liu} diff --git a/python/tests/people/name_test.py b/python/tests/people/name_test.py index a12ee5200a..131b7d79ba 100644 --- a/python/tests/people/name_test.py +++ b/python/tests/people/name_test.py @@ -141,6 +141,12 @@ def test_name_spec_to_xml_onlylast(): assert etree.tostring(element, encoding="unicode") == xml +def test_name_spec_to_xml_with_id_and_orcid(): + xml = 'Mausam' + element = NameSpecification.from_xml(etree.fromstring(xml)).to_xml("editor") + assert etree.tostring(element, encoding="unicode") == xml + + def test_name_variant_from_xml(): xml = """ diff --git a/python/tests/people/person_test.py b/python/tests/people/person_test.py index 1c0a81c67d..ec1c376da0 100644 --- a/python/tests/people/person_test.py +++ b/python/tests/people/person_test.py @@ -12,7 +12,9 @@ # See the License for the specific language governing permissions and # limitations under the License. -from acl_anthology.people import Name, Person +import pytest +from acl_anthology.exceptions import AnthologyException, AnthologyInvalidIDError +from acl_anthology.people import Name, NameLink, Person def test_person_names(anthology_stub): @@ -26,22 +28,22 @@ def test_person_names(anthology_stub): assert not person.has_name(n3) -def test_person_canonical_names(anthology_stub): +def test_person_canonical_name(anthology_stub): n1 = Name("Yang", "Liu") n2 = Name("Y.", "Liu") person = Person("yang-liu", anthology_stub, [n1, n2]) assert person.canonical_name == n1 - person.set_canonical_name(n2) + person.canonical_name = n2 assert person.canonical_name == n2 assert len(person.names) == 2 -def test_person_add_names(anthology_stub): +def test_person_add_name(anthology_stub): n1 = Name("Yang", "Liu") n2 = Name("Y.", "Liu") person = Person("yang-liu", anthology_stub, [n1]) assert person.canonical_name == n1 - person.set_canonical_name(n2) + person.canonical_name = n2 assert person.canonical_name == n2 assert len(person.names) == 2 n3 = Name("Yang X.", "Liu") @@ -50,14 +52,131 @@ def test_person_add_names(anthology_stub): assert len(person.names) == 3 -def test_person_papers(anthology): - person = anthology.get_person("nicoletta-calzolari") +def test_person_remove_name(anthology_stub): + n1 = Name("Yang", "Liu") + n2 = Name("Y.", "Liu") + person = Person("yang-liu", anthology_stub, [n1, n2]) + assert person.has_name(n2) + person.remove_name(n2) + assert not person.has_name(n2) + assert len(person.names) == 1 + + +def test_person_names_explicit_vs_inferred(anthology_stub): + n1 = Name("Yang", "Liu") + n2 = Name("Y.", "Liu") + person = Person("yang-liu", anthology_stub, [n1]) + assert (n1, NameLink.EXPLICIT) in person._names + person.canonical_name = n2 + assert (n2, NameLink.EXPLICIT) in person._names + n3 = Name("Yang X.", "Liu") + person.add_name(n3, inferred=True) + assert (n3, NameLink.INFERRED) in person._names + + +def test_person_add_name_explicit_vs_inferred(anthology_stub): + n1 = Name("Yang", "Liu") + n2 = Name("Y.", "Liu") + n3 = Name("Yang X.", "Liu") + person = Person("yang-liu", anthology_stub, [n1]) + person.add_name(n2, inferred=True) + person.add_name(n3, inferred=False) + assert person._names[1] == (n2, NameLink.INFERRED) + assert person._names[2] == (n3, NameLink.EXPLICIT) + # Calling add_name() with an existing name, but different "inferred" flag, + # should overwrite the NameLink value but keep the name in the same position + person.add_name(n2, inferred=False) + assert person._names[1] == (n2, NameLink.EXPLICIT) + person.add_name(n3, inferred=True) + assert person._names[2] == (n3, NameLink.INFERRED) + + +def test_person_no_name(anthology_stub): + person = Person("yang-liu", anthology_stub, []) + assert len(person.names) == 0 + with pytest.raises(ValueError): + person.canonical_name + name = Name("Yang", "Liu") + person.canonical_name = name + assert len(person.names) == 1 + assert person.canonical_name == name + + +def test_person_set_canonical_name(anthology_stub): + person = Person("rene-muller", anthology_stub, [Name("Rene", "Muller")]) + assert len(person.names) == 1 + name = Name("René", "Müller") + person.canonical_name = name + assert len(person.names) == 2 + assert person.canonical_name == name + + +def test_person_orcid(anthology_stub): + person = Person( + "marcel-bollmann", + anthology_stub, + [Name("Marcel", "Bollmann")], + orcid="0000-0002-1297-6794", + ) + assert person.orcid == "0000-0002-1297-6794" + person.orcid = "0000-0003-2598-8150" + assert person.orcid == "0000-0003-2598-8150" + with pytest.raises(ValueError): + person.orcid = "https://orcid.org/0000-0003-2598-8150" + with pytest.raises(ValueError): + person.orcid = "0000-0003-2598-815X" + + +def test_person_papers_unverified(anthology): + person = anthology.get_person("unverified/nicoletta-calzolari") assert person.canonical_name == Name("Nicoletta", "Calzolari") assert len(person.item_ids) == 3 assert len(list(person.papers())) == 2 assert len(list(person.volumes())) == 1 +def test_person_papers_verified(anthology): + person = anthology.get_person("yang-liu-ict") + assert person.canonical_name == Name("Yang", "Liu") + assert len(person.item_ids) == 2 + assert len(list(person.papers())) == 2 + + +def test_person_update_id(anthology): + person = anthology.get_person("marcel-bollmann") + person.update_id("marcel-bollmann-rub") + assert anthology.get_person("marcel-bollmann") is None + assert anthology.get_person("marcel-bollmann-rub") is person + person.update_id("marcel-bollmann") + assert anthology.get_person("marcel-bollmann") is person + assert anthology.get_person("marcel-bollmann-rub") is None + + +def test_person_update_id_should_update_connected_papers(anthology): + person = anthology.get_person("yang-liu-ict") + person.update_id("yang-liu-new") + namespec = anthology.get(person.item_ids[0]).authors[-1] + assert namespec.name == Name("Yang", "Liu") + assert namespec.id == "yang-liu-new" + + +def test_person_cannot_update_id_when_inferred(anthology): + person = anthology.get_person("unverified/nicoletta-calzolari") + assert not person.is_explicit + with pytest.raises(AnthologyException): + person.update_id("nicoletta-calzolari") + + +def test_person_cannot_update_id_with_invalid_id(anthology): + person = anthology.get_person("marcel-bollmann") + with pytest.raises(AnthologyInvalidIDError): + person.update_id("Marcel-Bollmann") + with pytest.raises(AnthologyInvalidIDError): + person.update_id("42-marcel-bollmann") + with pytest.raises(AnthologyInvalidIDError): + person.update_id("marcel bollmann") + + def test_person_with_name_variants(anthology): # Name variants should be recorded as names of that person person = anthology.get_person("yang-liu-ict") @@ -68,10 +187,23 @@ def test_person_with_name_variants(anthology): def test_person_is_explicit(anthology): person = anthology.get_person("yang-liu-ict") assert person.is_explicit - person = anthology.get_person("nicoletta-calzolari") + person = anthology.get_person("unverified/nicoletta-calzolari") assert not person.is_explicit - person = anthology.get_person("srinivas-bangalore") + + +def test_person_make_explicit(anthology): + person = anthology.get_person("unverified/nicoletta-calzolari") + assert not person.is_explicit + person.make_explicit("nicoletta-calzolari") + assert person.is_explicit + assert person.id == "nicoletta-calzolari" + + +def test_person_make_explicit_should_raise_when_explicit(anthology): + person = anthology.get_person("marcel-bollmann") assert person.is_explicit + with pytest.raises(AnthologyException): + person.make_explicit("marcel-bollmann") def test_person_equality(anthology_stub): @@ -82,4 +214,5 @@ def test_person_equality(anthology_stub): assert person1 == person2 assert person1 != person3 assert person2 != person3 + assert person2 != "yang-liu" # comparison with non-Person object is always False assert hash(person1) == hash(person2) diff --git a/python/tests/people/personindex_test.py b/python/tests/people/personindex_test.py index 24963c2e53..87b9731f16 100644 --- a/python/tests/people/personindex_test.py +++ b/python/tests/people/personindex_test.py @@ -1,4 +1,4 @@ -# Copyright 2023-2024 Marcel Bollmann +# Copyright 2023-2025 Marcel Bollmann # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -13,195 +13,124 @@ # limitations under the License. import pytest -from acl_anthology.exceptions import AmbiguousNameError, NameIDUndefinedError -from acl_anthology.people import Name, NameSpecification, Person, PersonIndex +from acl_anthology.exceptions import ( + AnthologyInvalidIDError, + NameSpecResolutionError, + PersonDefinitionError, +) +from acl_anthology.people import Name, NameLink, NameSpecification, Person, PersonIndex @pytest.fixture -def index(anthology_stub): +def index_stub(anthology_stub): return PersonIndex(anthology_stub) @pytest.fixture -def index_with_full_anthology(anthology): - return PersonIndex(anthology) +def index(anthology): + return anthology.people -def test_load_variant_list(index): - index._load_variant_list() +def test_load_people_index(index_stub): + index = index_stub + index.reset() + index._load_people_index() index.is_data_loaded = True for pid in ( - "pranav-a", - "pranav-anand", - "yang-liu-edinburgh", + "emily-prudhommeaux", + "steven-krauwer", "yang-liu-icsi", "yang-liu-ict", "yang-liu-microsoft", - "steven-krauwer", ): assert pid in index -def test_load_variant_list_correct_variants(index): - index._load_variant_list() +def test_load_people_index_registers_names(index_stub): + index = index_stub + index.reset() + index._load_people_index() index.is_data_loaded = True - n1 = Name("Susan", "Armstrong") - n2 = Name("Susan", "Warwick") - assert n1 in index.name_to_ids - assert n2 in index.name_to_ids - pid = index.name_to_ids[n1] - assert pid == index.name_to_ids[n2] + n1 = Name("Steven", "Krauwer") + n2 = Name("S.", "Krauwer") + assert n1 in index.by_name + assert n2 in index.by_name + pid = index.by_name[n1] + assert pid == index.by_name[n2] assert pid[0] in index -def test_load_variant_list_correct_ids(index): - # If no explicit ID is defined, the ID should be based on the canonical - # name in the variants list - index._load_variant_list() - index.is_data_loaded = True - n1 = Name("Susan", "Warwick-Armstrong") - pid = index.name_to_ids[n1] - assert pid == ["susan-armstrong"] - - -def test_load_variant_find_people_single_name(index): - # People with a single name should correctly be found - index._load_variant_list() - index.is_data_loaded = True - n1 = Name(None, "Srinivas") - pid = index.name_to_ids[n1] - assert pid == ["srinivas-bangalore"] - - -def test_add_person(index): +def test_add_person(index_stub): + index = index_stub + index.reset() p1 = Person("yang-liu", index.parent, [Name("Yang", "Liu")]) index.add_person(p1) index.is_data_loaded = True # to prevent it attempting to build itself assert "yang-liu" in index - assert Name("Yang", "Liu") in index.name_to_ids - assert index.name_to_ids[Name("Yang", "Liu")] == ["yang-liu"] + assert Name("Yang", "Liu") in index.by_name + assert index.by_name[Name("Yang", "Liu")] == ["yang-liu"] assert index.get_by_name(Name("Yang", "Liu"))[0] is p1 assert index.get_by_namespec(NameSpecification(Name("Yang", "Liu"))) is p1 assert index.get("yang-liu") is p1 - with pytest.raises(KeyError): + with pytest.raises(ValueError): index.add_person(Person("yang-liu", index.parent)) -def test_get_or_create_person_with_id(index): +def test_similar_names_defined_in_people_index(index_stub): + index = index_stub + index.reset() + index._load_people_index() index.is_data_loaded = True - ns1 = NameSpecification(Name("Yang", "Liu"), id="yang-liu-icsi") - ns2 = NameSpecification(Name("Y.", "Liu"), id="yang-liu-icsi") - with pytest.raises(NameIDUndefinedError): - index.get_or_create_person(ns1) - index._load_variant_list() - person1 = index.get_or_create_person(ns1) - assert person1.id == "yang-liu-icsi" - person2 = index.get_or_create_person(ns2) - assert person1 is person2 - assert person1 is index["yang-liu-icsi"] - assert person1.has_name(Name("Yang", "Liu")) - assert person1.has_name(Name("Y.", "Liu")) - - -def test_get_or_create_person_new_person(index): - index.is_data_loaded = True - ns1 = NameSpecification(Name("Yang", "Liu")) - ns2 = NameSpecification(Name("Yang", "Liu"), affiliation="University of Edinburgh") - person1 = index.get_or_create_person(ns1) - assert person1.has_name(Name("Yang", "Liu")) - person2 = index.get_or_create_person(ns2) - assert person1 is person2 - assert person1 is index[person1.id] - - -def test_get_or_create_person_new_person_disallowed(index): - ns1 = NameSpecification(Name("Yang", "Liu")) - with pytest.raises(NameIDUndefinedError): - index.get_or_create_person(ns1, create=False) - - -def test_get_or_create_person_with_ambiguous_name(index): - index._load_variant_list() - ns1 = NameSpecification(Name("Yang", "Liu")) - ns2 = NameSpecification(Name("Yang", "Liu"), id="yang-liu-icsi") - with pytest.raises(AmbiguousNameError): - index.get_or_create_person(ns1) - person = index.get_or_create_person(ns2) - assert person.id == "yang-liu-icsi" - - -def test_get_or_create_person_with_name_merging(index): - ns1 = NameSpecification(Name("John", "Neumann")) - ns2 = NameSpecification(Name("Jöhn", "Néumänn")) - person1 = index.get_or_create_person(ns1) - person2 = index.get_or_create_person(ns2) - assert person1 is person2 - assert person2.has_name(ns1.name) - assert person2.has_name(ns2.name) - assert person2.canonical_name == ns2.name - - -def test_get_or_create_person_with_explicit_canonical_name(index): - index._load_variant_list() - # This name is defined as canonical in the variants list - ns1 = NameSpecification(Name("Emily", "Prud’hommeaux")) - # This one is not, but scores higher according to our heuristics - ns2 = NameSpecification(Name("Emily", "Prud’Hommeaux")) - assert ( - ns2.name.score() > ns1.name.score() - ), "This test assumes that `ns2` will score higher than `ns1`." - person1 = index.get_or_create_person(ns1) - person2 = index.get_or_create_person(ns2) - assert person1 is person2 - assert person2.has_name(ns1.name) - assert person2.has_name(ns2.name) - # Canonical name should still be the one defined in variants list - assert person2.canonical_name == ns1.name - - -def test_similar_names_defined_in_variant_list(index): - index._load_variant_list() similar = index.similar.subset("pranav-a") assert similar == {"pranav-a", "pranav-anand"} -def test_similar_names_through_same_canonical_name(index): - index._load_variant_list() +def test_similar_names_through_same_canonical_name(index_stub): + index = index_stub + index.reset() + index._load_people_index() + index.is_data_loaded = True similar = index.similar.subset("yang-liu-ict") assert similar == { - "yang-liu-edinburgh", "yang-liu-icsi", "yang-liu-ict", "yang-liu-microsoft", } -def test_build_personindex(index_with_full_anthology): - index = index_with_full_anthology +def test_build_personindex(index): assert not index.is_data_loaded index.build(show_progress=False) assert index.is_data_loaded assert "yang-liu-microsoft" in index - assert Name("Nicoletta", "Calzolari") in index.name_to_ids + assert Name("Nicoletta", "Calzolari") in index.by_name + assert "0000-0003-2598-8150" in index.by_orcid -def test_build_personindex_automatically(index_with_full_anthology): - index = index_with_full_anthology +def test_build_personindex_automatically(index): assert not index.is_data_loaded persons = index.get_by_name(Name("Nicoletta", "Calzolari")) assert index.is_data_loaded assert len(persons) == 1 -def test_canonical_name_is_never_a_variant(index_with_full_anthology): - index = index_with_full_anthology +@pytest.mark.parametrize( + "name", ("by_orcid", "by_name", "similar", "slugs_to_verified_ids") +) +def test_build_personindex_automatically_on_property_access(index, name): + assert not index.is_data_loaded + _ = getattr(index, name) + assert index.is_data_loaded + + +def test_canonical_name_never_has_script(index): for person in index.values(): assert person.canonical_name.script is None -def test_get_person_coauthors(index_with_full_anthology): - index = index_with_full_anthology - person = index.get_by_name(Name("Kathleen", "Dahlgren"))[0] +def test_get_person_coauthors(index): + index.load() + person = index.by_name[Name("Kathleen", "Dahlgren")][0] coauthors = index.find_coauthors(person) assert len(coauthors) == 1 assert coauthors[0].canonical_name == Name("Joyce", "McDowell") @@ -214,36 +143,524 @@ def test_get_person_coauthors(index_with_full_anthology): assert len(coauthors) == 2 -def test_get_person_coauthors_counter(index_with_full_anthology): - index = index_with_full_anthology - person = index.get_by_name(Name("Kathleen", "Dahlgren"))[0] - coauthors = index.find_coauthors_counter(person) +def test_get_person_coauthors_counter(index): + coauthors = index.find_coauthors_counter("unverified/kathleen-dahlgren") assert len(coauthors) == 1 - assert coauthors["joyce-mcdowell"] == 1 + assert coauthors["unverified/joyce-mcdowell"] == 1 person = index.get_by_name(Name("Preslav", "Nakov"))[0] coauthors = index.find_coauthors_counter(person) assert len(coauthors) == 2 - assert coauthors["joyce-mcdowell"] == 0 - assert coauthors["aline-villavicencio"] == 2 + assert coauthors["unverified/joyce-mcdowell"] == 0 + assert coauthors["unverified/aline-villavicencio"] == 2 -def test_get_by_namespec(index_with_full_anthology): - index = index_with_full_anthology +def test_get_by_namespec(index): ns1 = NameSpecification(Name("Yang", "Liu")) ns2 = NameSpecification(Name("Yang", "Liu"), id="yang-liu-microsoft") - # In contrast to test_get_or_create_person_new_person_disallowed, this - # should behave differently because it makes sure the index is built first - with pytest.raises(AmbiguousNameError): + with pytest.raises(NameSpecResolutionError): index.get_by_namespec(ns1) person = index.get_by_namespec(ns2) assert person.id == "yang-liu-microsoft" assert person.canonical_name == Name("Yang", "Liu") -def test_get_by_name_variants(index_with_full_anthology): +def test_get_by_name_variants(index): # It should be possible to find a person by a name variant - index = index_with_full_anthology persons = index.get_by_name(Name("洋", "刘")) assert len(persons) == 1 assert persons[0].id == "yang-liu-ict" + + +def test_get_by_orcid(index): + person = index.get_by_orcid("0000-0003-2598-8150") + assert person is not None + assert person.id == "marcel-bollmann" + assert index.get_by_orcid("0000-0000-0000-0000") is None + + +def test_change_orcid(index): + person = index.get_by_orcid("0000-0003-2598-8150") + assert person is not None + assert person.id == "marcel-bollmann" + person.orcid = "0000-0002-2909-0906" + assert index.get_by_orcid("0000-0003-2598-8150") is None + assert index.get_by_orcid("0000-0002-2909-0906") is person + + +def test_create_person(index): + person = index.create_person( + id="matt-post", + names=[Name("Matt", "Post")], + orcid="0000-0002-1297-6794", + ) + assert person.id in index + assert person.id == "matt-post" + assert person.orcid == "0000-0002-1297-6794" + assert person.is_explicit + + +def test_create_person_should_fail_on_duplicate_orcid(index): + with pytest.raises(ValueError): + index.create_person( + id="marcel-bollmann-twin", + names=[Name("Marcel", "Bollmann")], + orcid="0000-0003-2598-8150", # already assigned to "marcel-bollmann" + ) + + +def test_create_person_should_fail_on_duplicate_id(index): + with pytest.raises(AnthologyInvalidIDError): + index.create_person( + id="marcel-bollmann", # already exists + names=[Name("Marcel", "Bollmann")], + ) + + +def test_create_person_should_fail_on_unverified_id(index): + with pytest.raises(AnthologyInvalidIDError): + index.create_person( + id="unverified/john-doe", # cannot create this manually + names=[Name("John", "Doe")], + ) + + +def test_create_person_should_fail_on_empty_names(index): + with pytest.raises(ValueError): + index.create_person( + id="john-doe-new", + names=[], # cannot be empty + ) + + +############################################################################## +### Tests for changing Person attributes that should update the index +############################################################################## + + +def test_person_id_change_should_update_index(anthology): + index = anthology.people + person = index["marcel-bollmann"] + person.id = "marcel-bollmann-rub" + assert "marcel-bollmann" not in index + assert "marcel-bollmann-rub" in index + assert index.by_orcid["0000-0003-2598-8150"] == "marcel-bollmann-rub" + assert index.by_name[Name("Marcel", "Bollmann")] == ["marcel-bollmann-rub"] + + +def test_person_orcid_change_should_update_index(anthology): + index = anthology.people + person = index["yang-liu-ict"] + orcid = "0000-0003-4154-7507" + assert orcid not in index.by_orcid + person.orcid = orcid + assert orcid in index.by_orcid + assert index.by_orcid[orcid] == "yang-liu-ict" + + +def test_person_add_name_should_update_index(anthology): + index = anthology.people + person = index["marcel-bollmann"] + name = Name("Marc Marcel", "Bollmann") + assert not index.by_name[name] + person.add_name(name) + assert index.by_name[name] == ["marcel-bollmann"] + assert index.slugs_to_verified_ids[name.slugify()] == set(["marcel-bollmann"]) + + +def test_person_remove_name_should_update_index(anthology): + index = anthology.people + person = index["steven-krauwer"] + name = Name("S.", "Krauwer") + assert index.by_name[name] == ["steven-krauwer"] + person.remove_name(name) + assert not index.by_name[name] + assert not index.slugs_to_verified_ids[name.slugify()] + + +def test_person_setting_names_should_update_index(anthology): + index = anthology.people + person = index["steven-krauwer"] + names = [Name("Steven", "Krauwer"), Name("Steven J.", "Krauwer")] + person.names = names + # previously existing name + assert index.by_name[names[0]] == ["steven-krauwer"] + # added name + assert index.by_name[names[1]] == ["steven-krauwer"] + # removed name + assert not index.by_name[Name("S.", "Krauwer")] + + +############################################################################## +### Tests for name resolution logic +############################################################################## + +# Format: (Name, NameSpecification attributes, expected ID or Exception) +test_cases_resolve_namespec = ( + #### "No match" cases + ( # Name does not exist in people.yaml + {"first": "Matthew", "last": "Stevens"}, + {}, + "unverified/matthew-stevens", + ), + ( # Person with explicit ID does not exist in people.yaml + {"first": "Matthew", "last": "Stevens"}, + {"id": "matthew-stevens"}, + PersonDefinitionError, + ), + #### "One match" cases + ( # Name exists in people.yaml, unambiguous + {"first": "Steven", "last": "Krauwer"}, + {}, + "steven-krauwer", + ), + ( # Name exists in people.yaml, unambiguous, but not as canonical name + {"first": "Emily T.", "last": "Prud’hommeaux"}, + {}, + "emily-prudhommeaux", + ), + ( # Person unambiguous, but has `disable_name_matching: true` + {"first": "Pranav", "last": "Anand"}, + {}, + "unverified/pranav-anand", + ), + ( # `disable_name_matching: true` doesn't affect NameSpecs with explicit ID + {"first": "Pranav", "last": "Anand"}, + {"id": "pranav-anand"}, + "pranav-anand", + ), + ( # Name exists in people.yaml with an ORCID, unambiguous + {"first": "Marcel", "last": "Bollmann"}, + {}, + "marcel-bollmann", + ), + ( # ... with explicit ID + {"first": "Marcel", "last": "Bollmann"}, + {"id": "marcel-bollmann"}, + "marcel-bollmann", + ), + ( # ... with explicit ID & ORCID + {"first": "Marcel", "last": "Bollmann"}, + {"id": "marcel-bollmann", "orcid": "0000-0003-2598-8150"}, + "marcel-bollmann", + ), + ( # ... with explicit ID & ORCID, but ORCID doesn't match + {"first": "Marcel", "last": "Bollmann"}, + {"id": "marcel-bollmann", "orcid": "0000-0002-7491-7669"}, + PersonDefinitionError, + ), + ( # ... with explicit ID & ORCID, but name isn't listed in people.yaml + {"first": "Marc Marcel", "last": "Bollmann"}, + {"id": "marcel-bollmann", "orcid": "0000-0003-2598-8150"}, + PersonDefinitionError, + ), + ( # Name matches an existing, unambiguous name via slugification + {"first": "Stèven", "last": "Kräuwer"}, + {}, + "steven-krauwer", + ), + ( # ... even when it's not the canonical name + {"first": "Emily T.", "last": "Prüd’hommeaux"}, + {}, + "emily-prudhommeaux", + ), + ( # ... even with different first/last split + {"first": "Emily", "last": "T. Prud’hommeaux"}, + {}, + "emily-prudhommeaux", + ), + #### "2+ matches" cases + ( # Name exists in people.yaml for several people + {"first": "Yang", "last": "Liu"}, + {}, + "unverified/yang-liu", + ), + ( # ... will resolve to known person with explicit ID + {"first": "Yang", "last": "Liu"}, + {"id": "yang-liu-icsi"}, + "yang-liu-icsi", + ), + ( # ... affiliation is NOT used in any way for name resolution + {"first": "Yang", "last": "Liu"}, + {"affiliation": "Microsoft Cognitive Services Research"}, + "unverified/yang-liu", + ), + #### Malformed name specifications + ( # Person with explicit ORCID, but no explicit ID (always disallowed) + {"first": "Matthew", "last": "Stevens"}, + {"orcid": "0000-0002-7491-7669"}, + NameSpecResolutionError, + ), + ( # ... even if the person exists (ID is still required) + {"first": "Marcel", "last": "Bollmann"}, + {"orcid": "0000-0003-2598-8150"}, + NameSpecResolutionError, + ), +) + + +@pytest.mark.parametrize( + "name_dict, namespec_params, expected_result", + test_cases_resolve_namespec, +) +def test_resolve_namespec(name_dict, namespec_params, expected_result, index): + index.reset() + index._load_people_index() + name = Name.from_dict(name_dict) + namespec = NameSpecification(name, **namespec_params) + + if isinstance(expected_result, str): + person = index.resolve_namespec(namespec, allow_creation=True) + assert person.has_name(name) + assert person.id == expected_result + elif isinstance(expected_result, type): + with pytest.raises(expected_result): + index.resolve_namespec(namespec, allow_creation=True) + else: + raise ValueError( + f"Test cannot take expected result of type {type(expected_result)}" + ) + + +def test_resolve_namespec_disallow_creation(index): + index.reset() + index._load_people_index() + # If we would map to an unverified ID but allow_creation is False, should raise + with pytest.raises(NameSpecResolutionError): + index.resolve_namespec( + NameSpecification(Name("Matthew", "Stevens")), allow_creation=False + ) + + +def test_resolve_namespec_name_scoring_for_unverified_ids(index_stub): + # Person does not exist, will create an unverified ID + person1 = index_stub.resolve_namespec( + NameSpecification(Name("Rene", "Muller")), allow_creation=True + ) + assert person1.id == "unverified/rene-muller" + assert person1.canonical_name == Name("Rene", "Muller") + # Name resolves to the same person as above + person2 = index_stub.resolve_namespec( + NameSpecification(Name("René", "Müller")), allow_creation=True + ) + assert person2.id == "unverified/rene-muller" + assert person2 is person1 + # ... and also updates their canonical name, as it scores higher! + assert person2.canonical_name == Name("René", "Müller") + + +test_cases_namelink = ( + # Names that are explicitly defined in people.yaml should always have + # NameLink.EXPLICIT after resolve_namespec() + ( + {"first": "Steven", "last": "Krauwer"}, + NameLink.EXPLICIT, + ), + ( + {"first": "S.", "last": "Krauwer"}, + NameLink.EXPLICIT, + ), + ( + {"first": "Marcel", "last": "Bollmann"}, + NameLink.EXPLICIT, + ), + # Names that are matched via slugification should always have + # NameLink.INFERRED after resolve_namespec() + ( + {"first": "Stèven", "last": "Kräuwer"}, + NameLink.INFERRED, + ), + ( + {"first": "Emily T.", "last": "Prüd’hommeaux"}, + NameLink.INFERRED, + ), + ( + {"first": "Emily", "last": "T. Prud’hommeaux"}, + NameLink.INFERRED, + ), +) + + +@pytest.mark.parametrize("name_dict, expected_namelink", test_cases_namelink) +def test_check_namelink_after_resolve_namespec(name_dict, expected_namelink, index): + index.reset() + index._load_people_index() + name = Name.from_dict(name_dict) + namespec = NameSpecification(name) + person = index.resolve_namespec(namespec, allow_creation=True) + + assert ( + name, + expected_namelink, + ) in person._names # maybe provide a function for this? + + +############################################################################## +### Tests for ingestion logic +############################################################################## + +# Format: (Name, NameSpecification attributes, expected ID) +test_cases_ingest_namespec = ( + ( # No ORCID in the ingestion material + {"first": "Matthew", "last": "Stevens"}, + {}, + None, + ), + #### ORCID in the ingestion material, matches a person in our `people.yaml` + ( + {"first": "Marcel", "last": "Bollmann"}, + {"orcid": "0000-0003-2598-8150"}, + "marcel-bollmann", + ), + ( # ... even if the name wasn't recorded yet in `people.yaml` + {"first": "Marc Marcel", "last": "Bollmann"}, + {"orcid": "0000-0003-2598-8150"}, + "marcel-bollmann", + ), + #### ORCID in the ingestion material, no match in our `people.yaml` + ( # Person should be created + {"first": "Matt", "last": "Post"}, + {"orcid": "0000-0002-1297-6794"}, + "matt-post", + ), + ( # It shouldn't matter if other persons with the same name exist, only ORCID matters + {"first": "Yang", "last": "Liu"}, + {"orcid": "0000-0003-4154-7507"}, + "yang-liu", # this ID is actually not defined in people.yaml! + ), + ( # When generated ID is already taken, append the last four digits of ORCID + {"first": "Marcel", "last": "Bollmann"}, + {"orcid": "0000-0003-3750-1098"}, + "marcel-bollmann-1098", + ), + #### Edge cases + ( # If function is already called with an ID for some reason, nothing happens + {"first": "Marcel", "last": "Bollmann"}, + {"id": "marcel-bollmann"}, + "marcel-bollmann", + ), +) + + +@pytest.mark.parametrize( + "name_dict, namespec_params, expected_result", + test_cases_ingest_namespec, +) +def test_ingest_namespec(name_dict, namespec_params, expected_result, index): + index.reset() + index._load_people_index() + name = Name.from_dict(name_dict) + namespec = NameSpecification(name, **namespec_params) + index.ingest_namespec(namespec) + + assert namespec.id == expected_result + if namespec.id is not None: + # Should also exist in (or have been added to) index + assert namespec.id in index + # ... with the name given here + assert index[namespec.id].has_name(name) + + +def test_ingest_namespec_returns_namespec(index): + ns1 = NameSpecification(Name("Matt", "Post"), orcid="0000-0002-1297-6794") + ns2 = index.ingest_namespec(ns1) + assert ns1 is ns2 + + +############################################################################## +### Tests for saving people.yaml +############################################################################## + + +def test_people_yaml_roundtrip(index, tmp_path): + index.load() + yaml_in = index.path + yaml_out = tmp_path / "people.yaml" + index.save(yaml_out) + assert yaml_out.is_file() + with ( + open(yaml_in, "r", encoding="utf-8") as f, + open(yaml_out, "r", encoding="utf-8") as g, + ): + expected = f.read() + out = g.read() + assert out == expected + + +def test_add_fields_to_people_yaml(index, tmp_path): + index.load() + yaml_out = tmp_path / "people.add_fields.yaml" + + # Modifications + person = index["marcel-bollmann"] + person.add_name(Name("Marc Marcel", "Bollmann")) + person.degree = "Ruhr-Universität Bochum" + + # Test that modifications are saved to people.yaml + index.save(yaml_out) + assert yaml_out.is_file() + with open(yaml_out, "r", encoding="utf-8") as f: + out = f.read() + + assert ( + """ +marcel-bollmann: + degree: Ruhr-Universität Bochum + names: + - {first: Marcel, last: Bollmann} + - {first: Marc Marcel, last: Bollmann} + orcid: 0000-0003-2598-8150""" + in out + ) + + +def test_add_person_to_people_yaml_via_make_explicit(index, tmp_path): + index.load() + yaml_out = tmp_path / "people.make_explicit.yaml" + + # Modifications + person = index["unverified/preslav-nakov"] + person.make_explicit("preslav-nakov") + person.orcid = "0000-0002-3600-1510" + + # Test that modifications are saved to people.yaml + index.save(yaml_out) + assert yaml_out.is_file() + with open(yaml_out, "r", encoding="utf-8") as f: + out = f.read() + + assert ( + """ +preslav-nakov: + names: + - {first: Preslav, last: Nakov} + orcid: 0000-0002-3600-1510""" + in out + ) + + +def test_add_person_to_people_yaml_via_create_person(index, tmp_path): + index.load() + yaml_out = tmp_path / "people.create_person.yaml" + + # Modifications + index.create_person( + id="preslav-nakov", + names=[Name("Preslav", "Nakov")], + orcid="0000-0002-3600-1510", + ) + + # Test that modifications are saved to people.yaml + index.save(yaml_out) + assert yaml_out.is_file() + with open(yaml_out, "r", encoding="utf-8") as f: + out = f.read() + + assert ( + """ +preslav-nakov: + names: + - {first: Preslav, last: Nakov} + orcid: 0000-0002-3600-1510""" + in out + ) diff --git a/python/tests/utils/ids_test.py b/python/tests/utils/ids_test.py index 7ea051a565..4f195805d0 100644 --- a/python/tests/utils/ids_test.py +++ b/python/tests/utils/ids_test.py @@ -100,3 +100,14 @@ def test_is_valid_item_id(): assert not ids.is_valid_item_id("main.42") assert not ids.is_valid_item_id("acl ") assert not ids.is_valid_item_id("") + + +def test_is_valid_orcid(): + assert ids.is_valid_orcid("0000-0002-1825-0097") + assert ids.is_valid_orcid("0000-0001-5109-3700") + assert ids.is_valid_orcid("0000-0002-1694-233X") + assert not ids.is_valid_orcid("0000-0002-1825-009X") + assert not ids.is_valid_orcid("0000-0001-5109-3701") + assert not ids.is_valid_orcid("0000-0002-1694-2339") + assert not ids.is_valid_orcid("000000021694233X") + assert not ids.is_valid_orcid("0002-1694-233X")